From 17972791577022b163d3e7dd62ee2f994f017637 Mon Sep 17 00:00:00 2001
From: ZMZ <zmz@yanhuangdata.com>
Date: Mon, 12 Apr 2021 15:11:27 +0800
Subject: [PATCH 001/719] support null data type in gdv

---
 cpp/src/gandiva/CMakeLists.txt             |   2 +
 cpp/src/gandiva/annotator.cc               |  18 ++-
 cpp/src/gandiva/dex.h                      |   8 ++
 cpp/src/gandiva/dex_visitor.h              |   3 +
 cpp/src/gandiva/exported_funcs.h           |   6 +
 cpp/src/gandiva/expr_decomposer.cc         |   7 ++
 cpp/src/gandiva/expr_decomposer.h          |   1 +
 cpp/src/gandiva/expr_validator.cc          |  22 +++-
 cpp/src/gandiva/expr_validator.h           |   1 +
 cpp/src/gandiva/function_registry.cc       |   4 +
 cpp/src/gandiva/function_registry_common.h |   1 +
 cpp/src/gandiva/function_registry_null.h   |  40 +++++++
 cpp/src/gandiva/llvm_generator.cc          |  28 ++++-
 cpp/src/gandiva/llvm_generator.h           |   1 +
 cpp/src/gandiva/node.h                     |  15 ++-
 cpp/src/gandiva/node_visitor.h             |   2 +
 cpp/src/gandiva/null_ops.cc                |  50 ++++++++
 cpp/src/gandiva/null_ops.h                 |  30 +++++
 cpp/src/gandiva/null_ops_test.cc           |  30 +++++
 cpp/src/gandiva/precompiled/types.h        |   1 +
 cpp/src/gandiva/projector.cc               |  13 ++-
 cpp/src/gandiva/tests/CMakeLists.txt       |   1 +
 cpp/src/gandiva/tests/null_test.cc         | 130 +++++++++++++++++++++
 cpp/src/gandiva/tree_expr_builder.cc       |   2 +
 24 files changed, 399 insertions(+), 17 deletions(-)
 create mode 100644 cpp/src/gandiva/function_registry_null.h
 create mode 100644 cpp/src/gandiva/null_ops.cc
 create mode 100644 cpp/src/gandiva/null_ops.h
 create mode 100644 cpp/src/gandiva/null_ops_test.cc
 create mode 100644 cpp/src/gandiva/tests/null_test.cc

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index fcdaf97d526..e9bbc19c5b0 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -69,6 +69,7 @@ set(SRC_FILES
     expression_registry.cc
     exported_funcs_registry.cc
     filter.cc
+    null_ops.cc
     function_ir_builder.cc
     function_registry.cc
     function_registry_arithmetic.cc
@@ -233,6 +234,7 @@ add_gandiva_test(internals-test
                  random_generator_holder_test.cc
                  hash_utils_test.cc
                  gdv_function_stubs_test.cc
+                 null_ops_test.cc
                  EXTRA_DEPENDENCIES
                  LLVM::LLVM_INTERFACE
                  ${GANDIVA_OPENSSL_LIBS}
diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc
index f6acaff1804..8d0eb145e17 100644
--- a/cpp/src/gandiva/annotator.cc
+++ b/cpp/src/gandiva/annotator.cc
@@ -77,13 +77,21 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
     ++buffer_idx;
   }
 
-  uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
-  eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
+  if (array_data.type->id() == arrow::Type::NA) {
+    eval_batch->SetBuffer(desc.data_idx(), nullptr, array_data.offset);
+  } else {
+    uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
+    eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
+  }
   if (is_output) {
     // pass in the Buffer object for output data buffers. Can be used for resizing.
-    uint8_t* data_buf_ptr =
-        reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
-    eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
+    if (array_data.type->id() == arrow::Type::NA) {
+      eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), nullptr, array_data.offset);
+    } else {
+      uint8_t* data_buf_ptr =
+          reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
+      eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
+    }
   }
 }
 
diff --git a/cpp/src/gandiva/dex.h b/cpp/src/gandiva/dex.h
index 3920f82f1d7..c4b3a81ca2e 100644
--- a/cpp/src/gandiva/dex.h
+++ b/cpp/src/gandiva/dex.h
@@ -205,6 +205,14 @@ class GANDIVA_EXPORT LiteralDex : public Dex {
   LiteralHolder holder_;
 };
 
+/// decomposed expression for a null literal.
+class GANDIVA_EXPORT NullLiteralDex : public Dex {
+ public:
+  NullLiteralDex() {}
+
+  void Accept(DexVisitor& visitor) override { visitor.Visit(*this); }
+};
+
 /// decomposed if-else expression.
 class GANDIVA_EXPORT IfDex : public Dex {
  public:
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index ba5de970dda..07277286b53 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -30,6 +30,7 @@ class VectorReadFixedLenValueDex;
 class VectorReadVarLenValueDex;
 class LocalBitMapValidityDex;
 class LiteralDex;
+class NullLiteralDex;
 class TrueDex;
 class FalseDex;
 class NonNullableFuncDex;
@@ -53,6 +54,7 @@ class GANDIVA_EXPORT DexVisitor {
   virtual void Visit(const TrueDex& dex) = 0;
   virtual void Visit(const FalseDex& dex) = 0;
   virtual void Visit(const LiteralDex& dex) = 0;
+  virtual void Visit(const NullLiteralDex& dex) = 0;
   virtual void Visit(const NonNullableFuncDex& dex) = 0;
   virtual void Visit(const NullableNeverFuncDex& dex) = 0;
   virtual void Visit(const NullableInternalFuncDex& dex) = 0;
@@ -77,6 +79,7 @@ class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(TrueDex)
   VISIT_DCHECK(FalseDex)
   VISIT_DCHECK(LiteralDex)
+  VISIT_DCHECK(NullLiteralDex)
   VISIT_DCHECK(NonNullableFuncDex)
   VISIT_DCHECK(NullableNeverFuncDex)
   VISIT_DCHECK(NullableInternalFuncDex)
diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h
index 58205266094..1dc1f57f770 100644
--- a/cpp/src/gandiva/exported_funcs.h
+++ b/cpp/src/gandiva/exported_funcs.h
@@ -32,6 +32,12 @@ class ExportedFuncsBase {
   virtual void AddMappings(Engine* engine) const = 0;
 };
 
+// Class for exporting Null functions
+class ExportedNullFunctions : public ExportedFuncsBase {
+  void AddMappings(Engine* engine) const override;
+};
+REGISTER_EXPORTED_FUNCS(ExportedNullFunctions);
+
 // Class for exporting Stub functions
 class ExportedStubFunctions : public ExportedFuncsBase {
   void AddMappings(Engine* engine) const override;
diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc
index 07252b42fd2..834a7211e89 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -223,6 +223,13 @@ Status ExprDecomposer::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
+Status ExprDecomposer::Visit(const NullLiteralNode& node) {
+  auto value_dex = std::make_shared<NullLiteralDex>();
+  auto validity_dex = std::make_shared<FalseDex>();
+  result_ = std::make_shared<ValueValidityPair>(validity_dex, value_dex);
+  return Status::OK();
+}
+
 // The bolow functions use a stack to detect :
 // a. nested if-else expressions.
 //    In such cases,  the local bitmap can be re-used.
diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h
index 3e8e67de255..ebc64e227db 100644
--- a/cpp/src/gandiva/expr_decomposer.h
+++ b/cpp/src/gandiva/expr_decomposer.h
@@ -63,6 +63,7 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor {
   Status Visit(const FunctionNode& node) override;
   Status Visit(const IfNode& node) override;
   Status Visit(const LiteralNode& node) override;
+  Status Visit(const NullLiteralNode& node) override;
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index fd46c2894b9..47e11f3a836 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -42,11 +42,14 @@ Status ExprValidator::Validate(const ExpressionPtr& expr) {
 }
 
 Status ExprValidator::Visit(const FieldNode& node) {
-  auto llvm_type = types_->IRType(node.return_type()->id());
-  ARROW_RETURN_IF(llvm_type == nullptr,
-                  Status::ExpressionValidationError("Field ", node.field()->name(),
-                                                    " has unsupported data type ",
-                                                    node.return_type()->name()));
+  auto return_type = node.return_type();
+  if (return_type->id() != arrow::Type::NA) {
+    auto llvm_type = types_->DataVecType(node.return_type());
+    ARROW_RETURN_IF(llvm_type == nullptr,
+                    Status::ExpressionValidationError("Field ", node.field()->name(),
+                                                      " has unsupported data type ",
+                                                      node.return_type()->name()));
+  }
 
   // Ensure that field is found in schema
   auto field_in_schema_entry = field_map_.find(node.field()->name());
@@ -120,6 +123,15 @@ Status ExprValidator::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
+Status ExprValidator::Visit(const NullLiteralNode& node) {
+  auto llvm_type = types_->DataVecType(node.return_type());
+  ARROW_RETURN_IF(llvm_type != nullptr,
+                  Status::ExpressionValidationError("Should be data type ",
+                                                    node.return_type()->name()));
+
+  return Status::OK();
+}
+
 Status ExprValidator::Visit(const BooleanNode& node) {
   ARROW_RETURN_IF(
       node.children().size() < 2,
diff --git a/cpp/src/gandiva/expr_validator.h b/cpp/src/gandiva/expr_validator.h
index e25afe5e7e8..b3399ff517c 100644
--- a/cpp/src/gandiva/expr_validator.h
+++ b/cpp/src/gandiva/expr_validator.h
@@ -57,6 +57,7 @@ class ExprValidator : public NodeVisitor {
   Status Visit(const FunctionNode& node) override;
   Status Visit(const IfNode& node) override;
   Status Visit(const LiteralNode& node) override;
+  Status Visit(const NullLiteralNode& node) override;
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc
index d5d015c10b4..2d622124102 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -20,6 +20,7 @@
 #include "gandiva/function_registry_datetime.h"
 #include "gandiva/function_registry_hash.h"
 #include "gandiva/function_registry_math_ops.h"
+#include "gandiva/function_registry_null.h"
 #include "gandiva/function_registry_string.h"
 #include "gandiva/function_registry_timestamp_arithmetic.h"
 
@@ -65,6 +66,9 @@ SignatureMap FunctionRegistry::InitPCMap() {
   auto v6 = GetDateTimeArithmeticFunctionRegistry();
   pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end());
 
+  auto v8 = GetNullFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v8.begin(), v8.end());
+
   for (auto& elem : pc_registry_) {
     for (auto& func_signature : elem.signatures()) {
       map.insert(std::make_pair(&(func_signature), &elem));
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index d1555fba3ce..1ccba270c03 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -43,6 +43,7 @@ using arrow::int16;
 using arrow::int32;
 using arrow::int64;
 using arrow::int8;
+using arrow::null;
 using arrow::uint16;
 using arrow::uint32;
 using arrow::uint64;
diff --git a/cpp/src/gandiva/function_registry_null.h b/cpp/src/gandiva/function_registry_null.h
new file mode 100644
index 00000000000..a01cbef6fc1
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_null.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetNullFunctionRegistry() {
+  static std::vector<NativeFunction> null_fn_registry_ = {
+      NativeFunction("equal",
+                     {"not_equal", "less_than", "less_than_or_equal_to", "greater_than",
+                      "greater_than_or_equal_to"},
+                     DataTypeVector{null(), null()}, boolean(), kResultNullNever,
+                     "compare_null_null"),
+      NativeFunction("isnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
+                     "isnull_null"),
+      NativeFunction("isnotnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
+                     "isnotnull_null")};
+  return null_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 1a80f1e7586..4ab96eb6999 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -170,6 +170,9 @@ llvm::Value* LLVMGenerator::GetDataReference(llvm::Value* arg_addrs, int idx,
   llvm::Value* load = LoadVectorAtIndex(arg_addrs, idx, name);
   llvm::Type* base_type = types()->DataVecType(field->type());
   llvm::Value* ret;
+  if (base_type == nullptr) {
+    return nullptr;
+  }
   if (base_type->isPointerTy()) {
     ret = ir_builder()->CreateIntToPtr(load, base_type, name + "_darray");
   } else {
@@ -363,6 +366,8 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count,
     AddFunctionCall("gdv_fn_populate_varlen_vector", types()->i32_type(),
                     {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var,
                      output_value->data(), output_value->length()});
+  } else if (output_type_id == arrow::Type::NA) {
+    // Do nothing when data type is null
   } else {
     return Status::NotImplemented("output type ", output->Type()->ToString(),
                                   " not supported");
@@ -452,6 +457,10 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr,
   // Extract the destination bitmap address.
   int out_idx = compiled_expr.output()->validity_idx();
   uint8_t* dst_bitmap = eval_batch.GetBuffer(out_idx);
+  if (dst_bitmap == nullptr) {
+    // Return when dst_bitmap is null meaning data type is null
+    return;
+  }
   // Compute the destination bitmap.
   if (selection_vector == nullptr) {
     accumulator.ComputeResult(dst_bitmap);
@@ -556,6 +565,9 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueDex& dex) {
       break;
     }
 
+    case arrow::Type::NA:
+      break;
+
     default: {
       auto slot_offset = builder->CreateGEP(slot_ref, slot_index);
       slot_value = builder->CreateLoad(slot_offset, dex.FieldName());
@@ -720,6 +732,13 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) {
   result_.reset(new LValue(value, len));
 }
 
+void LLVMGenerator::Visitor::Visit(const NullLiteralDex& dex) {
+  llvm::Value* value = nullptr;
+  llvm::Value* len = nullptr;
+  ADD_VISITOR_TRACE("visit Literal null");
+  result_.reset(new LValue(value, len));
+}
+
 void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) {
   const std::string& function_name = dex.func_descriptor()->name();
   ADD_VISITOR_TRACE("visit NonNullableFunc base function " + function_name);
@@ -1240,10 +1259,11 @@ std::vector<llvm::Value*> LLVMGenerator::Visitor::BuildParams(
     // build value.
     DexPtr value_expr = pair->value_expr();
     value_expr->Accept(*this);
-    LValue& result_ref = *result();
-
-    // append all the parameters corresponding to this LValue.
-    result_ref.AppendFunctionParams(&params);
+    if (auto result_ptr = result()) {
+      LValue& result_ref = *result_ptr;
+      // append all the parameters corresponding to this LValue.
+      result_ref.AppendFunctionParams(&params);
+    }
 
     // build validity.
     if (with_validity) {
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 8ff9711c0f9..a6fa1bb0339 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -100,6 +100,7 @@ class GANDIVA_EXPORT LLVMGenerator {
     void Visit(const TrueDex& dex) override;
     void Visit(const FalseDex& dex) override;
     void Visit(const LiteralDex& dex) override;
+    void Visit(const NullLiteralDex& dex) override;
     void Visit(const NonNullableFuncDex& dex) override;
     void Visit(const NullableNeverFuncDex& dex) override;
     void Visit(const NullableInternalFuncDex& dex) override;
diff --git a/cpp/src/gandiva/node.h b/cpp/src/gandiva/node.h
index 20807d4a0cb..6e4c22e93b1 100644
--- a/cpp/src/gandiva/node.h
+++ b/cpp/src/gandiva/node.h
@@ -23,7 +23,6 @@
 #include <vector>
 
 #include "arrow/status.h"
-
 #include "gandiva/arrow.h"
 #include "gandiva/func_descriptor.h"
 #include "gandiva/gandiva_aliases.h"
@@ -94,6 +93,20 @@ class GANDIVA_EXPORT LiteralNode : public Node {
   bool is_null_;
 };
 
+/// \brief Node in the expression tree, representing a NullLiteralNode.
+class GANDIVA_EXPORT NullLiteralNode : public Node {
+ public:
+  NullLiteralNode() : Node(arrow::null()) {}
+
+  Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
+
+  std::string ToString() const override {
+    std::stringstream ss;
+    ss << "(const " << return_type()->ToString() << ") null";
+    return ss.str();
+  }
+};
+
 /// \brief Node in the expression tree, representing an arrow field.
 class GANDIVA_EXPORT FieldNode : public Node {
  public:
diff --git a/cpp/src/gandiva/node_visitor.h b/cpp/src/gandiva/node_visitor.h
index b118e496383..c8516907788 100644
--- a/cpp/src/gandiva/node_visitor.h
+++ b/cpp/src/gandiva/node_visitor.h
@@ -30,6 +30,7 @@ class FieldNode;
 class FunctionNode;
 class IfNode;
 class LiteralNode;
+class NullLiteralNode;
 class BooleanNode;
 template <typename Type>
 class InExpressionNode;
@@ -43,6 +44,7 @@ class GANDIVA_EXPORT NodeVisitor {
   virtual Status Visit(const FunctionNode& node) = 0;
   virtual Status Visit(const IfNode& node) = 0;
   virtual Status Visit(const LiteralNode& node) = 0;
+  virtual Status Visit(const NullLiteralNode& node) = 0;
   virtual Status Visit(const BooleanNode& node) = 0;
   virtual Status Visit(const InExpressionNode<int32_t>& node) = 0;
   virtual Status Visit(const InExpressionNode<int64_t>& node) = 0;
diff --git a/cpp/src/gandiva/null_ops.cc b/cpp/src/gandiva/null_ops.cc
new file mode 100644
index 00000000000..79d21ae6c9a
--- /dev/null
+++ b/cpp/src/gandiva/null_ops.cc
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "gandiva/null_ops.h"
+
+#include "gandiva/engine.h"
+#include "gandiva/exported_funcs.h"
+#include "gandiva/gdv_function_stubs.h"
+
+/// Stub functions that can be accessed from LLVM or the pre-compiled library.
+
+extern "C" {
+bool compare_null_null() { return false; }
+
+bool isnull_null() { return true; }
+
+bool isnotnull_null() { return false; }
+}
+
+namespace gandiva {
+void ExportedNullFunctions::AddMappings(Engine* engine) const {
+  std::vector<llvm::Type*> args;
+  auto types = engine->types();
+
+  args = {types->i1_type(), types->i1_type()};
+  engine->AddGlobalMappingForFunc("compare_null_null", types->i1_type() /*return_type*/,
+                                  args, reinterpret_cast<void*>(compare_null_null));
+
+  args = {types->i1_type()};
+  engine->AddGlobalMappingForFunc("isnull_null", types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(isnull_null));
+
+  args = {types->i1_type()};
+  engine->AddGlobalMappingForFunc("isnotnull_null", types->i1_type() /*return_type*/,
+                                  args, reinterpret_cast<void*>(isnotnull_null));
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/null_ops.h b/cpp/src/gandiva/null_ops.h
new file mode 100644
index 00000000000..65bce6fe149
--- /dev/null
+++ b/cpp/src/gandiva/null_ops.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+/// Stub functions that can be accessed from LLVM.
+extern "C" {
+
+bool compare_null_null();
+
+bool isnull_null();
+
+bool isnotnull_null();
+}
\ No newline at end of file
diff --git a/cpp/src/gandiva/null_ops_test.cc b/cpp/src/gandiva/null_ops_test.cc
new file mode 100644
index 00000000000..3ef351cb773
--- /dev/null
+++ b/cpp/src/gandiva/null_ops_test.cc
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestNullOps, Test) {
+  EXPECT_FALSE(compare_null_null());
+  EXPECT_TRUE(isnull_null());
+  EXPECT_FALSE(isnotnull_null());
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 1b0f96e0ab7..2c0bbd47f1e 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 #include "gandiva/gdv_function_stubs.h"
+#include "gandiva/null_ops.h"
 
 // Use the same names as in arrow data types. Makes it easy to write pre-processor macros.
 using gdv_boolean = bool;
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index 734720c64c9..50440dd5e0a 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -24,7 +24,6 @@
 
 #include "arrow/util/hash_util.h"
 #include "arrow/util/logging.h"
-
 #include "gandiva/cache.h"
 #include "gandiva/expr_validator.h"
 #include "gandiva/llvm_generator.h"
@@ -289,6 +288,8 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
   } else if (arrow::is_binary_like(type_id)) {
     // we don't know the expected size for varlen output vectors.
     data_len = 0;
+  } else if (type_id == arrow::Type::NA) {
+    data_len = 0;
   } else {
     return Status::Invalid("Unsupported output data type " + type->ToString());
   }
@@ -301,7 +302,11 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
   }
   buffers.push_back(std::move(data_buffer));
 
-  *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers));
+  if (type_id == arrow::Type::NA) {
+    *array_data = arrow::ArrayData::Make(type, num_records, {nullptr});
+  } else {
+    *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers));
+  }
   return Status::OK();
 }
 
@@ -350,6 +355,10 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
     int64_t data_len = array_data.buffers[1]->capacity();
     ARROW_RETURN_IF(data_len < min_data_len,
                     Status::Invalid("Data buffer too small for ", field.name()));
+  } else if (type_id == arrow::Type::NA) {
+    ARROW_RETURN_IF(array_data.buffers.size() == 1 && array_data.buffers[0] == nullptr,
+                    Status::Invalid("Data buffer should be nullptr for null typed field",
+                                    field.name()));
   } else {
     return Status::Invalid("Unsupported output data type " + field.type()->ToString());
   }
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt
index 5fa2da16c63..a57085c589e 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -25,6 +25,7 @@ add_gandiva_test(binary_test)
 add_gandiva_test(date_time_test)
 add_gandiva_test(to_string_test)
 add_gandiva_test(utf8_test)
+add_gandiva_test(null_test)
 add_gandiva_test(hash_test)
 add_gandiva_test(in_expr_test)
 add_gandiva_test(null_validity_test)
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
new file mode 100644
index 00000000000..a3ff18baa32
--- /dev/null
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "gandiva/projector.h"
+#include "gandiva/tests/test_util.h"
+#include "gandiva/tree_expr_builder.h"
+
+namespace gandiva {
+
+using arrow::boolean;
+using arrow::null;
+
+class TestNull : public ::testing::Test {
+ public:
+  void SetUp() { pool_ = arrow::default_memory_pool(); }
+
+ protected:
+  arrow::MemoryPool* pool_;
+};
+
+TEST_F(TestNull, TestSimple) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  auto literal_null = TreeExprBuilder::MakeNull(arrow::null());
+  auto node_field_null = TreeExprBuilder::MakeField(field_null);
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+  auto expr_1 = TreeExprBuilder::MakeExpression(literal_null, res_1);
+  auto expr_2 = TreeExprBuilder::MakeExpression(node_field_null, res_2);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {expr_1, expr_2}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto nb = std::make_shared<arrow::NullBuilder>();
+  auto _ = nb->AppendNulls(4);
+  std::shared_ptr<arrow::NullArray> null_array;
+  _ = nb->Finish(&null_array);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(null_array, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(null_array, outputs.at(1));
+}
+
+TEST_F(TestNull, TestOps) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  // output fields
+  auto res_1 = field("res1", boolean());
+  auto res_2 = field("res2", boolean());
+  auto res_3 = field("res3", boolean());
+  auto res_4 = field("res4", boolean());
+  auto res_5 = field("res5", boolean());
+  auto res_6 = field("res6", boolean());
+  auto res_7 = field("res7", boolean());
+  auto res_8 = field("res8", boolean());
+  auto expr_1 = TreeExprBuilder::MakeExpression("equal", {field_null, field_null}, res_1);
+  auto expr_2 =
+      TreeExprBuilder::MakeExpression("not_equal", {field_null, field_null}, res_2);
+  auto expr_3 =
+      TreeExprBuilder::MakeExpression("less_than", {field_null, field_null}, res_3);
+  auto expr_4 = TreeExprBuilder::MakeExpression("less_than_or_equal_to",
+                                                {field_null, field_null}, res_4);
+  auto expr_5 =
+      TreeExprBuilder::MakeExpression("greater_than", {field_null, field_null}, res_5);
+  auto expr_6 = TreeExprBuilder::MakeExpression("greater_than_or_equal_to",
+                                                {field_null, field_null}, res_6);
+  auto expr_7 = TreeExprBuilder::MakeExpression("isnull", {field_null}, res_7);
+  auto expr_8 = TreeExprBuilder::MakeExpression("isnotnull", {field_null}, res_8);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(
+      schema, {expr_1, expr_2, expr_3, expr_4, expr_5, expr_6, expr_7, expr_8},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto nb = std::make_shared<arrow::NullBuilder>();
+  auto _ = nb->AppendNulls(4);
+  std::shared_ptr<arrow::NullArray> null_array;
+  _ = nb->Finish(&null_array);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  auto exp_true = MakeArrowArrayBool({true, true, true, true}, {true, true, true, true});
+  auto exp_false =
+      MakeArrowArrayBool({false, false, false, false}, {true, true, true, true});
+  for (int i = 0; i < 8; i++) {
+    if (i == 6) {
+      EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(i));
+    } else {
+      EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(i));
+    }
+  }
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc
index b27b92010e8..7a66cb0a49c 100644
--- a/cpp/src/gandiva/tree_expr_builder.cc
+++ b/cpp/src/gandiva/tree_expr_builder.cc
@@ -105,6 +105,8 @@ NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) {
       DecimalScalar128 literal(decimal_type->precision(), decimal_type->scale());
       return std::make_shared<LiteralNode>(data_type, LiteralHolder(literal), true);
     }
+    case arrow::Type::NA:
+      return std::make_shared<NullLiteralNode>();
     default:
       return nullptr;
   }

From e493e940ce56de7b268ad9b9f22798126616335c Mon Sep 17 00:00:00 2001
From: ZMZ <zmz@yanhuangdata.com>
Date: Tue, 13 Apr 2021 09:39:09 +0800
Subject: [PATCH 002/719] update compare function return type

---
 cpp/src/gandiva/function_registry_null.h |  2 +-
 cpp/src/gandiva/llvm_generator.cc        |  2 +-
 cpp/src/gandiva/null_ops.cc              |  8 ++++----
 cpp/src/gandiva/null_ops.h               |  6 +++---
 cpp/src/gandiva/null_ops_test.cc         |  6 +++---
 cpp/src/gandiva/tests/null_test.cc       | 22 ++++++++++------------
 6 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_null.h b/cpp/src/gandiva/function_registry_null.h
index a01cbef6fc1..ab45e6f4e41 100644
--- a/cpp/src/gandiva/function_registry_null.h
+++ b/cpp/src/gandiva/function_registry_null.h
@@ -28,7 +28,7 @@ std::vector<NativeFunction> GetNullFunctionRegistry() {
       NativeFunction("equal",
                      {"not_equal", "less_than", "less_than_or_equal_to", "greater_than",
                       "greater_than_or_equal_to"},
-                     DataTypeVector{null(), null()}, boolean(), kResultNullNever,
+                     DataTypeVector{null(), null()}, null(), kResultNullNever,
                      "compare_null_null"),
       NativeFunction("isnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
                      "isnull_null"),
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 4ab96eb6999..f7f1d464474 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -500,7 +500,7 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name,
 
   // build a call to the llvm function.
   llvm::Value* value;
-  if (ret_type->isVoidTy()) {
+  if (ret_type == nullptr || ret_type->isVoidTy()) {
     // void functions can't have a name for the call.
     value = ir_builder()->CreateCall(fn, args);
   } else {
diff --git a/cpp/src/gandiva/null_ops.cc b/cpp/src/gandiva/null_ops.cc
index 79d21ae6c9a..b7179a8e8be 100644
--- a/cpp/src/gandiva/null_ops.cc
+++ b/cpp/src/gandiva/null_ops.cc
@@ -23,11 +23,11 @@
 /// Stub functions that can be accessed from LLVM or the pre-compiled library.
 
 extern "C" {
-bool compare_null_null() { return false; }
+void compare_null_null(bool in1_valid, bool in2_valid) {}
 
-bool isnull_null() { return true; }
+bool isnull_null(bool in_valid) { return true; }
 
-bool isnotnull_null() { return false; }
+bool isnotnull_null(bool in_valid) { return false; }
 }
 
 namespace gandiva {
@@ -36,7 +36,7 @@ void ExportedNullFunctions::AddMappings(Engine* engine) const {
   auto types = engine->types();
 
   args = {types->i1_type(), types->i1_type()};
-  engine->AddGlobalMappingForFunc("compare_null_null", types->i1_type() /*return_type*/,
+  engine->AddGlobalMappingForFunc("compare_null_null", types->void_type() /*return_type*/,
                                   args, reinterpret_cast<void*>(compare_null_null));
 
   args = {types->i1_type()};
diff --git a/cpp/src/gandiva/null_ops.h b/cpp/src/gandiva/null_ops.h
index 65bce6fe149..492eb6033cd 100644
--- a/cpp/src/gandiva/null_ops.h
+++ b/cpp/src/gandiva/null_ops.h
@@ -22,9 +22,9 @@
 /// Stub functions that can be accessed from LLVM.
 extern "C" {
 
-bool compare_null_null();
+void compare_null_null(bool in1_valid, bool in2_valid);
 
-bool isnull_null();
+bool isnull_null(bool in_valid);
 
-bool isnotnull_null();
+bool isnotnull_null(bool in_valid);
 }
\ No newline at end of file
diff --git a/cpp/src/gandiva/null_ops_test.cc b/cpp/src/gandiva/null_ops_test.cc
index 3ef351cb773..a979b82a771 100644
--- a/cpp/src/gandiva/null_ops_test.cc
+++ b/cpp/src/gandiva/null_ops_test.cc
@@ -23,8 +23,8 @@
 namespace gandiva {
 
 TEST(TestNullOps, Test) {
-  EXPECT_FALSE(compare_null_null());
-  EXPECT_TRUE(isnull_null());
-  EXPECT_FALSE(isnotnull_null());
+  compare_null_null(true, true);
+  EXPECT_TRUE(isnull_null(true));
+  EXPECT_FALSE(isnotnull_null(true));
 }
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
index a3ff18baa32..e018ab0dff4 100644
--- a/cpp/src/gandiva/tests/null_test.cc
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -76,12 +76,12 @@ TEST_F(TestNull, TestOps) {
   auto schema = arrow::schema({field_null});
 
   // output fields
-  auto res_1 = field("res1", boolean());
-  auto res_2 = field("res2", boolean());
-  auto res_3 = field("res3", boolean());
-  auto res_4 = field("res4", boolean());
-  auto res_5 = field("res5", boolean());
-  auto res_6 = field("res6", boolean());
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+  auto res_3 = field("res3", null());
+  auto res_4 = field("res4", null());
+  auto res_5 = field("res5", null());
+  auto res_6 = field("res6", null());
   auto res_7 = field("res7", boolean());
   auto res_8 = field("res8", boolean());
   auto expr_1 = TreeExprBuilder::MakeExpression("equal", {field_null, field_null}, res_1);
@@ -118,13 +118,11 @@ TEST_F(TestNull, TestOps) {
   auto exp_true = MakeArrowArrayBool({true, true, true, true}, {true, true, true, true});
   auto exp_false =
       MakeArrowArrayBool({false, false, false, false}, {true, true, true, true});
-  for (int i = 0; i < 8; i++) {
-    if (i == 6) {
-      EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(i));
-    } else {
-      EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(i));
-    }
+  for (int i = 0; i < 6; i++) {
+    EXPECT_EQ(outputs.at(i)->null_count(), 4);
   }
+  EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(6));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(7));
 }
 
 }  // namespace gandiva

From c51c19e2ebfd4cce95c156c3cd63c14113dbde2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 13 Apr 2021 12:31:23 +0200
Subject: [PATCH 003/719] ARROW-12342: [Packaging] Fix tabulation in crossbow
 templates for submitting nightly builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The generated templates can be checked using `archery crossbow render <task-name> --arrow-branch master`

Closes #10000 from kszucs/crossbow-template-tabulation

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/crossbow/core.py            | 3 ++-
 dev/tasks/linux-packages/travis.linux.arm64.yml | 3 +--
 dev/tasks/macros.jinja                          | 8 ++++----
 dev/tasks/tasks.yml                             | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py
index d4d3d5183b5..9d3074a21d5 100644
--- a/dev/archery/archery/crossbow/core.py
+++ b/dev/archery/archery/crossbow/core.py
@@ -121,7 +121,8 @@ def format_all(items, pattern):
 
     loader = jinja2.FileSystemLoader(searchpath)
     env = jinja2.Environment(loader=loader, trim_blocks=True,
-                             lstrip_blocks=True)
+                             lstrip_blocks=True,
+                             undefined=jinja2.StrictUndefined)
     env.filters['format_all'] = format_all
     template = env.get_template(template)
     return template.render(**params)
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index e9457d6a337..aba604161d8 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -144,5 +144,4 @@ script:
   - popd
 
 after_success:
-  {% set patterns = upload_extensions | format_all("arrow/python/repaired_wheels/*.whl") %}
-  {{ macros.github_upload_releases(patterns) }}
+  {{ macros.travis_upload_releases(upload_extensions) }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index e0552b11bcf..db1b64cd649 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -91,11 +91,11 @@ on:
 {% endmacro %}
 
 {%- macro github_upload_gemfury(pattern) -%}
-  {% if arrow.branch == 'master' %}
+  {%- if arrow.branch == 'master' -%}
   - name: Upload package to Gemfury
     shell: bash
     run: |
-      path=$(ls {{ patter }})
+      path=$(ls {{ pattern }})
       curl -F "package=@${path}" https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/
     env:
       CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
@@ -138,7 +138,7 @@ on:
 {% endmacro %}
 
 {%- macro azure_upload_anaconda(pattern) -%}
-  {% if arrow.branch == 'master' %}
+  {%- if arrow.branch == 'master' -%}
   - task: CondaEnvironment@1
     inputs:
       packageSpecs: 'anaconda-client'
@@ -188,7 +188,7 @@ on:
 {% endmacro %}
 
 {%- macro travis_upload_gemfury(pattern) -%}
-  {% if arrow.branch == 'master' %}
+  {%- if arrow.branch == 'master' -%}
   - |
     WHEEL_PATH=$(echo arrow/python/repaired_wheels/*.whl)
     curl \
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 5a04c98f640..dcf49414fcc 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -372,7 +372,7 @@ tasks:
 # enable S3 support from macOS 10.13 so we don't need to bundle curl, crypt and ssl
 {% for macos_version, macos_codename, arrow_s3 in [("10.9", "mavericks", "OFF"),
                                                    ("10.13", "high-sierra", "ON")] %}
-  {% set platform_tag = "macosx_{}_{}".format(macos_version.replace('.', '_'), arch_alias) %}
+  {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
   wheel-osx-{{ macos_codename }}-{{ python_tag }}:
     ci: github

From 62f8c20306f366a848f1392eba97665ef155a2b2 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 13 Apr 2021 12:49:54 +0200
Subject: [PATCH 004/719] ARROW-12326: [C++] Avoid needless c-ares detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we use system gRPC, we don't need to detect c-ares.

This change also simplifies gRPC detection. System gRPC detection
requires CMake config or pkg-config. System gRPC detection by
gRPC_ROOT is removed because we can't maintain Abseil dependencies.

Closes #9977 from kou/cpp-avoid-needless-c-ares-detection

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/cmake_modules/Find-c-aresAlt.cmake      |  71 ++++++
 cpp/cmake_modules/FindgRPCAlt.cmake         | 247 ++++----------------
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  77 +++---
 cpp/src/arrow/flight/CMakeLists.txt         |  15 +-
 4 files changed, 151 insertions(+), 259 deletions(-)
 create mode 100644 cpp/cmake_modules/Find-c-aresAlt.cmake

diff --git a/cpp/cmake_modules/Find-c-aresAlt.cmake b/cpp/cmake_modules/Find-c-aresAlt.cmake
new file mode 100644
index 00000000000..dd16393cad2
--- /dev/null
+++ b/cpp/cmake_modules/Find-c-aresAlt.cmake
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(find_package_args)
+if(c-aresAlt_FIND_VERSION)
+  list(APPEND find_package_args ${c-aresAlt_FIND_VERSION})
+endif()
+if(c-aresAlt_FIND_QUIETLY)
+  list(APPEND find_package_args QUIET)
+endif()
+find_package(c-ares ${find_package_args})
+if(c-ares_FOUND)
+  set(c-aresAlt_FOUND TRUE)
+  return()
+endif()
+
+find_package(PkgConfig QUIET)
+pkg_check_modules(c-ares_PC libcares)
+if(c-ares_PC_FOUND)
+  set(c-ares_INCLUDE_DIR "${c-ares_PC_INCLUDEDIR}")
+
+  list(APPEND c-ares_PC_LIBRARY_DIRS "${c-ares_PC_LIBDIR}")
+  find_library(c-ares_LIB cares
+               PATHS ${c-ares_PC_LIBRARY_DIRS}
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+               NO_DEFAULT_PATH)
+elseif(c-ares_ROOT)
+  find_library(c-ares_LIB
+               NAMES cares
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}cares${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATHS ${c-ares_ROOT}
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+               NO_DEFAULT_PATH)
+  find_path(c-ares_INCLUDE_DIR
+            NAMES ares.h
+            PATHS ${c-ares_ROOT}
+            NO_DEFAULT_PATH
+            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+else()
+  find_library(c-ares_LIB
+               NAMES cares
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}cares${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_path(c-ares_INCLUDE_DIR NAMES ares.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+endif()
+
+find_package_handle_standard_args(c-aresAlt REQUIRED_VARS c-ares_LIB c-ares_INCLUDE_DIR)
+
+if(c-aresAlt_FOUND)
+  if(NOT TARGET c-ares::cares)
+    add_library(c-ares::cares UNKNOWN IMPORTED)
+    set_target_properties(
+      c-ares::cares
+      PROPERTIES IMPORTED_LOCATION "${c-ares_LIB}" INTERFACE_INCLUDE_DIRECTORIES
+                 "${c-ares_INCLUDE_DIR}")
+  endif()
+endif()
diff --git a/cpp/cmake_modules/FindgRPCAlt.cmake b/cpp/cmake_modules/FindgRPCAlt.cmake
index 79fe01744d3..841b3b61b83 100644
--- a/cpp/cmake_modules/FindgRPCAlt.cmake
+++ b/cpp/cmake_modules/FindgRPCAlt.cmake
@@ -24,222 +24,57 @@ if(gRPC_FOUND)
   return()
 endif()
 
-unset(GRPC_ALT_VERSION)
-
-if(ARROW_GRPC_USE_SHARED)
-  set(GRPC_GPR_LIB_NAMES)
-  set(GRPC_GRPC_LIB_NAMES)
-  set(GRPC_GRPCPP_LIB_NAMES)
-  set(GRPC_ADDRESS_SORTING_LIB_NAMES)
-  set(GRPC_UPB_LIB_NAMES)
-  if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(APPEND GRPC_GPR_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}gpr${CMAKE_IMPORT_LIBRARY_SUFFIX}")
-    list(APPEND GRPC_GRPC_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}grpc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
-    list(APPEND GRPC_GRPCPP_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}grpc++${CMAKE_IMPORT_LIBRARY_SUFFIX}")
-    list(
-      APPEND GRPC_ADDRESS_SORTING_LIB_NAMES
-             "${CMAKE_IMPORT_LIBRARY_PREFIX}address_sorting${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
-    list(APPEND GRPC_UPB_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}upb${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+find_package(PkgConfig QUIET)
+pkg_check_modules(GRPCPP_PC grpc++)
+if(GRPCPP_PC_FOUND)
+  set(gRPCAlt_VERSION "${GRPCPP_PC_VERSION}")
+  set(GRPCPP_INCLUDE_DIRECTORIES ${GRPCPP_PC_INCLUDEDIR})
+  if(ARROW_GRPC_USE_SHARED)
+    set(GRPCPP_LINK_LIBRARIES ${GRPCPP_PC_LINK_LIBRARIES})
+    set(GRPCPP_LINK_OPTIONS ${GRPCPP_PC_LDFLAGS_OTHER})
+    set(GRPCPP_COMPILE_OPTIONS ${GRPCPP_PC_CFLAGS_OTHER})
+  else()
+    set(GRPCPP_LINK_LIBRARIES)
+    foreach(GRPCPP_LIBRARY_NAME ${GRPCPP_PC_STATIC_LIBRARIES})
+      find_library(
+        GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}
+        NAMES
+          "${CMAKE_STATIC_LIBRARY_PREFIX}${GRPCPP_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+        HINTS ${GRPCPP_PC_STATIC_LIBRARY_DIRS})
+      list(APPEND GRPCPP_LINK_LIBRARIES "${GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}}")
+    endforeach()
+    set(GRPCPP_LINK_OPTIONS ${GRPCPP_PC_STATIC_LDFLAGS_OTHER})
+    set(GRPCPP_COMPILE_OPTIONS ${GRPCPP_PC_STATIC_CFLAGS_OTHER})
   endif()
-  list(APPEND GRPC_GPR_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}gpr${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(APPEND GRPC_GRPC_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}grpc${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(APPEND GRPC_GRPCPP_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}grpc++${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(
-    APPEND GRPC_ADDRESS_SORTING_LIB_NAMES
-           "${CMAKE_SHARED_LIBRARY_PREFIX}address_sorting${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  list(APPEND GRPC_UPB_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}upb${CMAKE_SHARED_LIBRARY_SUFFIX}")
-else()
-  set(GRPC_GPR_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_GRPC_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_GRPCPP_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_ADDRESS_SORTING_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(GRPC_UPB_LIB_NAMES
-      "${CMAKE_STATIC_LIBRARY_PREFIX}upb${CMAKE_STATIC_LIBRARY_SUFFIX}")
-endif()
-
-if(gRPC_ROOT)
-  find_library(GRPC_GPR_LIB
-               NAMES ${GRPC_GPR_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_GRPC_LIB
-               NAMES ${GRPC_GRPC_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_GRPCPP_LIB
-               NAMES ${GRPC_GRPCPP_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_ADDRESS_SORTING_LIB
-               NAMES ${GRPC_ADDRESS_SORTING_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_library(GRPC_UPB_LIB
-               NAMES ${GRPC_UPB_LIB_NAMES}
-               PATHS ${gRPC_ROOT}
-               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin NO_DEFAULT_PATH
-               PATHS ${gRPC_ROOT}
+  list(GET GRPCPP_LINK_LIBRARIES 0 GRPCPP_IMPORTED_LOCATION)
+  list(REMOVE_AT GRPCPP_LINK_LIBRARIES 0)
+  find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin
+               HINTS ${GRPCPP_PC_PREFIX}
+               NO_DEFAULT_PATH
                PATH_SUFFIXES "bin")
-  find_path(GRPC_INCLUDE_DIR
-            NAMES grpc/grpc.h
-            PATHS ${gRPC_ROOT}
-            NO_DEFAULT_PATH
-            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
-else()
-  find_package(PkgConfig QUIET)
-  pkg_check_modules(GRPC_PC grpc++)
-  if(GRPC_PC_FOUND)
-    set(GRPC_ALT_VERSION "${GRPC_PC_VERSION}")
-    set(GRPC_INCLUDE_DIR "${GRPC_PC_INCLUDEDIR}")
-    list(APPEND GRPC_PC_LIBRARY_DIRS "${GRPC_PC_LIBDIR}")
-    message(STATUS "${GRPC_PC_LIBRARY_DIRS}")
-
-    find_library(GRPC_GPR_LIB
-                 NAMES ${GRPC_GPR_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_GRPC_LIB
-                 NAMES ${GRPC_GRPC_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_GRPCPP_LIB
-                 NAMES ${GRPC_GRPCPP_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_ADDRESS_SORTING_LIB
-                 NAMES ${GRPC_ADDRESS_SORTING_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_library(GRPC_UPB_LIB
-                 NAMES ${GRPC_UPB_LIB_NAMES}
-                 PATHS ${GRPC_PC_LIBRARY_DIRS}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-    find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin
-                 HINTS ${GRPC_PC_PREFIX}
-                 NO_DEFAULT_PATH
-                 PATH_SUFFIXES "bin")
-  else()
-    find_library(GRPC_GPR_LIB
-                 NAMES ${GRPC_GPR_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_GRPC_LIB
-                 NAMES ${GRPC_GRPC_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_GRPCPP_LIB
-                 NAMES ${GRPC_GRPCPP_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_ADDRESS_SORTING_LIB
-                 NAMES ${GRPC_ADDRESS_SORTING_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_library(GRPC_UPB_LIB
-                 NAMES ${GRPC_UPB_LIB_NAMES}
-                 PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin PATH_SUFFIXES "bin")
-    find_path(GRPC_INCLUDE_DIR
-              NAMES grpc/grpc.h
-              PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  set(gRPCAlt_FIND_PACKAGE_ARGS gRPCAlt REQUIRED_VARS GRPCPP_IMPORTED_LOCATION
+                                GRPC_CPP_PLUGIN)
+  if(gRPCAlt_VERSION)
+    list(APPEND gRPCAlt_FIND_PACKAGE_ARGS VERSION_VAR gRPCAlt_VERSION)
   endif()
+  find_package_handle_standard_args(${gRPCAlt_FIND_PACKAGE_ARGS})
+else()
+  set(gRPCAlt_FOUND FALSE)
 endif()
 
-set(GRPC_ALT_FIND_PACKAGE_ARGS
-    gRPCAlt
-    REQUIRED_VARS
-    GRPC_INCLUDE_DIR
-    GRPC_GPR_LIB
-    GRPC_GRPC_LIB
-    GRPC_GRPCPP_LIB
-    GRPC_CPP_PLUGIN)
-if(GRPC_ALT_VERSION)
-  list(APPEND GRPC_ALT_FIND_PACKAGE_ARGS VERSION_VAR GRPC_ALT_VERSION)
-endif()
-find_package_handle_standard_args(${GRPC_ALT_FIND_PACKAGE_ARGS})
-
 if(gRPCAlt_FOUND)
-  add_library(gRPC::gpr UNKNOWN IMPORTED)
-  set_target_properties(gRPC::gpr
-                        PROPERTIES IMPORTED_LOCATION "${GRPC_GPR_LIB}"
-                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-
-  add_library(gRPC::grpc UNKNOWN IMPORTED)
-  set_target_properties(
-    gRPC::grpc
-    PROPERTIES IMPORTED_LOCATION
-               "${GRPC_GRPC_LIB}"
-               INTERFACE_INCLUDE_DIRECTORIES
-               "${GRPC_INCLUDE_DIR}"
-               INTERFACE_LINK_LIBRARIES
-               "OpenSSL::SSL;OpenSSL::Crypto;ZLIB::ZLIB;c-ares::cares")
-
-  set(_GRPCPP_LINK_LIBRARIES "gRPC::grpc;gRPC::gpr")
-
-  if(GRPC_ADDRESS_SORTING_LIB)
-    # Address sorting is optional and not always required.
-    add_library(gRPC::address_sorting UNKNOWN IMPORTED)
-    set_target_properties(gRPC::address_sorting
-                          PROPERTIES IMPORTED_LOCATION "${GRPC_ADDRESS_SORTING_LIB}"
-                                     INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-    set(_GRPCPP_LINK_LIBRARIES "${_GRPCPP_LINK_LIBRARIES};gRPC::address_sorting")
-  endif()
-
-  if(GRPC_UPB_LIB)
-    # upb is used by recent gRPC versions
-    add_library(gRPC::upb UNKNOWN IMPORTED)
-    set_target_properties(gRPC::upb
-                          PROPERTIES IMPORTED_LOCATION "${GRPC_UPB_LIB}"
-                                     INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-    set(_GRPCPP_LINK_LIBRARIES "${_GRPCPP_LINK_LIBRARIES};gRPC::upb")
-  endif()
-
-  find_package(absl CONFIG)
-  if(absl_FOUND)
-    # Abseil libraries that recent gRPC versions depend on
-    set(_ABSL_LIBS
-        bad_optional_access
-        int128
-        raw_logging_internal
-        str_format_internal
-        strings
-        throw_delegate
-        time
-        time_zone)
-
-    foreach(_ABSL_LIB ${_ABSL_LIBS})
-      set(_GRPCPP_LINK_LIBRARIES "${_GRPCPP_LINK_LIBRARIES};absl::${_ABSL_LIB}")
-    endforeach()
-  endif()
-
   add_library(gRPC::grpc++ UNKNOWN IMPORTED)
   set_target_properties(gRPC::grpc++
                         PROPERTIES IMPORTED_LOCATION
-                                   "${GRPC_GRPCPP_LIB}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${_GRPCPP_LINK_LIBRARIES}"
+                                   "${GRPCPP_IMPORTED_LOCATION}"
+                                   INTERFACE_COMPILE_OPTIONS
+                                   "${GRPCPP_COMPILE_OPTIONS}"
                                    INTERFACE_INCLUDE_DIRECTORIES
-                                   "${GRPC_INCLUDE_DIR}")
+                                   "${GRPCPP_INCLUDE_DIRECTORIES}"
+                                   INTERFACE_LINK_LIBRARIES
+                                   "${GRPCPP_LINK_LIBRARIES}"
+                                   INTERFACE_LINK_OPTIONS
+                                   "${GRPCPP_LINK_OPTIONS}")
 
   add_executable(gRPC::grpc_cpp_plugin IMPORTED)
   set_target_properties(gRPC::grpc_cpp_plugin
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 9f240e448f6..3f686346bb1 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2338,29 +2338,15 @@ macro(build_cares)
   list(APPEND ARROW_BUNDLED_STATIC_LIBS c-ares::cares)
 endmacro()
 
-if(ARROW_WITH_GRPC)
-  if(c-ares_SOURCE STREQUAL "AUTO")
-    find_package(c-ares QUIET CONFIG)
-    if(c-ares_FOUND)
-      set(CARES_INCLUDE_DIR ${c-ares_INCLUDE_DIR})
-    else()
-      build_cares()
-    endif()
-  elseif(c-ares_SOURCE STREQUAL "BUNDLED")
-    build_cares()
-  elseif(c-ares_SOURCE STREQUAL "SYSTEM")
-    find_package(c-ares REQUIRED CONFIG)
-    set(CARES_INCLUDE_DIR ${c-ares_INCLUDE_DIR})
-  endif()
-
-  # TODO: Don't use global includes but rather target_include_directories
-  include_directories(SYSTEM ${CARES_INCLUDE_DIR})
-endif()
-
 # ----------------------------------------------------------------------
 # Dependencies for Arrow Flight RPC
 
 macro(build_grpc)
+  resolve_dependency(c-ares HAVE_ALT TRUE)
+  # TODO: Don't use global includes but rather target_include_directories
+  get_target_property(c-ares_INCLUDE_DIR c-ares::cares INTERFACE_INCLUDE_DIRECTORIES)
+  include_directories(SYSTEM ${c-ares_INCLUDE_DIR})
+
   message(STATUS "Building gRPC from source")
 
   # First need to build Abseil
@@ -2548,27 +2534,38 @@ macro(build_grpc)
                         PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GPR}"
                                    INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
 
-  add_library(gRPC::grpc STATIC IMPORTED)
-  set_target_properties(gRPC::grpc
-                        PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPC}"
-                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
-
   add_library(gRPC::address_sorting STATIC IMPORTED)
   set_target_properties(gRPC::address_sorting
                         PROPERTIES IMPORTED_LOCATION
                                    "${GRPC_STATIC_LIBRARY_ADDRESS_SORTING}"
                                    INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}")
 
+  add_library(gRPC::grpc STATIC IMPORTED)
+  set(GRPC_LINK_LIBRARIES
+      gRPC::gpr
+      gRPC::upb
+      gRPC::address_sorting
+      ${ABSL_LIBRARIES}
+      c-ares::cares
+      ZLIB::ZLIB
+      Threads::Threads)
+  set_target_properties(gRPC::grpc
+                        PROPERTIES IMPORTED_LOCATION
+                                   "${GRPC_STATIC_LIBRARY_GRPC}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${GRPC_INCLUDE_DIR}"
+                                   INTERFACE_LINK_LIBRARIES
+                                   "${GRPC_LINK_LIBRARIES}")
+
   add_library(gRPC::grpc++ STATIC IMPORTED)
-  set_target_properties(
-    gRPC::grpc++
-    PROPERTIES
-      IMPORTED_LOCATION
-      "${GRPC_STATIC_LIBRARY_GRPCPP}"
-      INTERFACE_LINK_LIBRARIES
-      "gRPC::grpc;gRPC::gpr;gRPC::upb;gRPC::address_sorting;${ABSL_LIBRARIES};Threads::Threads"
-      INTERFACE_INCLUDE_DIRECTORIES
-      "${GRPC_INCLUDE_DIR}")
+  set(GRPCPP_LINK_LIBRARIES gRPC::grpc ${ARROW_PROTOBUF_LIBPROTOBUF})
+  set_target_properties(gRPC::grpc++
+                        PROPERTIES IMPORTED_LOCATION
+                                   "${GRPC_STATIC_LIBRARY_GRPCPP}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${GRPC_INCLUDE_DIR}"
+                                   INTERFACE_LINK_LIBRARIES
+                                   "${GRPCPP_LINK_LIBRARIES}")
 
   add_executable(gRPC::grpc_cpp_plugin IMPORTED)
   set_target_properties(gRPC::grpc_cpp_plugin
@@ -2607,11 +2604,11 @@ macro(build_grpc)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS
               ${ABSL_LIBRARIES}
-              gRPC::upb
+              gRPC::address_sorting
               gRPC::gpr
               gRPC::grpc
-              gRPC::address_sorting
-              gRPC::grpcpp_for_bundling)
+              gRPC::grpcpp_for_bundling
+              gRPC::upb)
 endmacro()
 
 if(ARROW_WITH_GRPC)
@@ -2622,14 +2619,8 @@ if(ARROW_WITH_GRPC)
                      REQUIRED_VERSION
                      ${ARROW_GRPC_REQUIRED_VERSION})
 
-  if(TARGET gRPC::address_sorting)
-    set(GRPC_HAS_ADDRESS_SORTING TRUE)
-  else()
-    set(GRPC_HAS_ADDRESS_SORTING FALSE)
-  endif()
-
   # TODO: Don't use global includes but rather target_include_directories
-  get_target_property(GRPC_INCLUDE_DIR gRPC::grpc INTERFACE_INCLUDE_DIRECTORIES)
+  get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
 
   if(GRPC_VENDORED)
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index bc91d7e8c22..e1176ff0ac0 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -19,15 +19,10 @@ add_custom_target(arrow_flight)
 
 arrow_install_all_headers("arrow/flight")
 
-set(ARROW_FLIGHT_STATIC_LINK_LIBS
-    gRPC::grpc++
-    ${ABSL_LIBRARIES}
-    ${ARROW_PROTOBUF_LIBPROTOBUF}
-    c-ares::cares
-    ZLIB::ZLIB)
+set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++)
 
 if(WIN32)
-  list(APPEND ARROW_FLIGHT_STATIC_LINK_LIBS ws2_32.lib)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
 
 if(ARROW_TEST_LINKAGE STREQUAL "static")
@@ -83,7 +78,7 @@ function(test_grpc_version DST_VAR DETECT_VERSION TEST_FILE)
     try_compile(HAS_GRPC_VERSION ${CMAKE_CURRENT_BINARY_DIR}/try_compile SOURCES
                 "${CMAKE_CURRENT_SOURCE_DIR}/try_compile/${TEST_FILE}"
                 CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CURRENT_INCLUDE_DIRECTORIES}"
-                LINK_LIBRARIES gRPC::grpc gRPC::grpc++
+                LINK_LIBRARIES gRPC::grpc++
                 OUTPUT_VARIABLE TLS_CREDENTIALS_OPTIONS_CHECK_OUTPUT CXX_STANDARD 11)
     if(HAS_GRPC_VERSION)
       set(${DST_VAR}
@@ -177,10 +172,10 @@ add_arrow_lib(arrow_flight
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
               arrow_shared
-              ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+              ${ARROW_FLIGHT_LINK_LIBS}
               STATIC_LINK_LIBS
               arrow_static
-              ${ARROW_FLIGHT_STATIC_LINK_LIBS})
+              ${ARROW_FLIGHT_LINK_LIBS})
 
 foreach(LIB_TARGET ${ARROW_FLIGHT_LIBRARIES})
   target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_FLIGHT_EXPORTING)

From 2c77f3b00a9c8d4fa089a33174a55bfa88b25e69 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 13 Apr 2021 12:57:25 +0200
Subject: [PATCH 005/719] ARROW-12352: [CI][R][Windows] Remove needless
 workaround for MSYS2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

repo.msys2.org is alive. sf.net may be fragile than repo.msys2.org.

See also ARROW-10202: https://issues.apache.org/jira/browse/ARROW-10202

Closes #10004 from kou/ci-r-remove-needless-msys2-workaround

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/r_windows_build.sh | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh
index be03b75f5ad..9988dfb6494 100755
--- a/ci/scripts/r_windows_build.sh
+++ b/ci/scripts/r_windows_build.sh
@@ -28,13 +28,8 @@ if [ "$RTOOLS_VERSION" = "35" ]; then
   curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf
   # Update keys: https://www.msys2.org/news/#2020-06-29-new-packagers
   msys2_repo_base_url=https://repo.msys2.org/msys
-  # Mirror
-  msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2
   curl -OSsL "${msys2_repo_base_url}/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
   pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-  # Use sf.net instead of http://repo.msys2.org/ temporary.
-  sed -i -e "s,^Server = http://repo\.msys2\.org/msys,Server = ${msys2_repo_base_url},g" \
-    /etc/pacman.conf
   pacman --noconfirm -Scc
   pacman --noconfirm -Syy
   # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)

From 72249203be90b45a315cf8028536fd72a7f9427b Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 13 Apr 2021 13:02:11 +0200
Subject: [PATCH 006/719] ARROW-11752: [R] Replace usage of
 testthat::expect_is()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`testthat::expect_is` is now deprecated - this PR replaces uses of it with alternative functions.  When updating `expect_dplyr_error`, I fixed an issue with its usage which led to one of the tests failing as it no longer gives the expected error, so have set this test to skip.

Closes #9909 from thisisnic/arrow-11752

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 r/tests/testthat/helper-expectation.R       | 32 +++++++++-
 r/tests/testthat/test-Array.R               |  6 +-
 r/tests/testthat/test-RecordBatch.R         |  4 +-
 r/tests/testthat/test-Table.R               |  6 +-
 r/tests/testthat/test-arrow-info.R          |  2 +-
 r/tests/testthat/test-buffer-reader.R       |  8 +--
 r/tests/testthat/test-buffer.R              | 10 ++--
 r/tests/testthat/test-chunked-array.R       |  6 +-
 r/tests/testthat/test-compute-aggregate.R   | 28 ++++-----
 r/tests/testthat/test-compute-vector.R      |  2 +-
 r/tests/testthat/test-data-type.R           |  6 +-
 r/tests/testthat/test-dataset.R             | 66 ++++++++++-----------
 r/tests/testthat/test-dplyr-filter.R        | 12 ++--
 r/tests/testthat/test-dplyr-mutate.R        |  2 +-
 r/tests/testthat/test-dplyr.R               | 20 +++----
 r/tests/testthat/test-expression.R          | 44 +++++++-------
 r/tests/testthat/test-feather.R             | 16 ++---
 r/tests/testthat/test-filesystem.R          | 16 ++---
 r/tests/testthat/test-json.R                |  6 +-
 r/tests/testthat/test-memory-pool.R         |  4 +-
 r/tests/testthat/test-message-reader.R      | 32 +++++-----
 r/tests/testthat/test-message.R             | 12 ++--
 r/tests/testthat/test-metadata.R            |  2 +-
 r/tests/testthat/test-python.R              | 16 ++---
 r/tests/testthat/test-read-record-batch.R   |  2 +-
 r/tests/testthat/test-read-write.R          |  2 +-
 r/tests/testthat/test-record-batch-reader.R | 26 ++++----
 r/tests/testthat/test-s3-minio.R            |  2 +-
 r/tests/testthat/test-scalar.R              |  2 +-
 r/tests/testthat/test-schema.R              | 14 ++---
 30 files changed, 219 insertions(+), 187 deletions(-)

diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 39cc9e0597a..2ebd44f7bba 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -23,6 +23,11 @@ expect_data_frame <- function(x, y, ...) {
   expect_equal(as.data.frame(x), y, ...)
 }
 
+expect_r6_class <- function(object, class){
+  expect_s3_class(object, class)
+  expect_s3_class(object, "R6")
+}
+
 expect_equivalent <- function(object, expected, ...) {
   # HACK: dplyr includes an all.equal.tbl_df method that is causing failures.
   # They look spurious, like:
@@ -98,12 +103,35 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
 expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its start
                                tbl,  # A tbl/df as reference, will make RB/Table with
                                ...) {
+  # ensure we have supplied tbl
+  force(tbl)
+  
   expr <- rlang::enquo(expr)
   msg <- tryCatch(
     rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))),
-    error = function (e) conditionMessage(e)
+    error = function (e) {
+      msg <- conditionMessage(e)
+
+      # The error here is of the form:
+      #
+      # Problem with `filter()` input `..1`.
+      # x object 'b_var' not found
+      # ℹ Input `..1` is `chr == b_var`.
+      #
+      # but what we really care about is the `x` block
+      # so (temporarily) let's pull those blocks out when we find them
+      pattern <- i18ize_error_messages()
+      
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
   )
-  expect_is(msg, "character", label = "dplyr on data.frame did not error")
+  # make sure msg is a character object (i.e. there has been an error)
+  # If it did not error, we would get a data.frame or whatever
+  # This expectation will tell us "dplyr on data.frame errored is not TRUE"
+  expect_true(identical(typeof(msg), "character"), label = "dplyr on data.frame errored")
 
   expect_error(
     rlang::eval_tidy(
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 35ae357f703..b4fa8296d3a 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -496,7 +496,7 @@ test_that("Array$create() supports tibble with no columns (ARROW-8354)", {
 
 test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
   # Should be able to create an empty list with a type hint.
-  expect_is(Array$create(list(), list_of(bool())), "ListArray")
+  expect_r6_class(Array$create(list(), list_of(bool())), "ListArray")
 
   # logical
   expect_array_roundtrip(list(NA), list_of(bool()))
@@ -542,7 +542,7 @@ test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
 
 test_that("Array$create() handles vector -> large list arrays", {
   # Should be able to create an empty list with a type hint.
-  expect_is(Array$create(list(), type = large_list_of(bool())), "LargeListArray")
+  expect_r6_class(Array$create(list(), type = large_list_of(bool())), "LargeListArray")
 
   # logical
   expect_array_roundtrip(list(NA), large_list_of(bool()), as = large_list_of(bool()))
@@ -587,7 +587,7 @@ test_that("Array$create() handles vector -> large list arrays", {
 
 test_that("Array$create() handles vector -> fixed size list arrays", {
   # Should be able to create an empty list with a type hint.
-  expect_is(Array$create(list(), type = fixed_size_list_of(bool(), 20)), "FixedSizeListArray")
+  expect_r6_class(Array$create(list(), type = fixed_size_list_of(bool(), 20)), "FixedSizeListArray")
 
   # logical
   expect_array_roundtrip(list(NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index b71c07b78c2..ff7f17eca6e 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -438,8 +438,8 @@ test_that("RecordBatch$Equals(check_metadata)", {
   rb1 <- record_batch(df)
   rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some="metadata")))
 
-  expect_is(rb1, "RecordBatch")
-  expect_is(rb2, "RecordBatch")
+  expect_r6_class(rb1, "RecordBatch")
+  expect_r6_class(rb2, "RecordBatch")
   expect_false(rb1$schema$HasMetadata)
   expect_true(rb2$schema$HasMetadata)
   expect_identical(rb2$schema$metadata, list(some = "metadata"))
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 7a0b8bd6c02..86bda393e2d 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -299,7 +299,7 @@ test_that("table active bindings", {
   tab <- Table$create(tbl)
 
   expect_identical(dim(tbl), dim(tab))
-  expect_is(tab$columns, "list")
+  expect_type(tab$columns, "list")
   expect_equal(tab$columns[[1]], tab[[1]])
 })
 
@@ -400,8 +400,8 @@ test_that("Table$Equals(check_metadata)", {
   tab2 <- Table$create(x = 1:2, y = c("a", "b"),
                        schema = tab1$schema$WithMetadata(list(some="metadata")))
 
-  expect_is(tab1, "Table")
-  expect_is(tab2, "Table")
+  expect_r6_class(tab1, "Table")
+  expect_r6_class(tab2, "Table")
   expect_false(tab1$schema$HasMetadata)
   expect_true(tab2$schema$HasMetadata)
   expect_identical(tab2$schema$metadata, list(some = "metadata"))
diff --git a/r/tests/testthat/test-arrow-info.R b/r/tests/testthat/test-arrow-info.R
index 2a7af3aac67..3fac3f422e8 100644
--- a/r/tests/testthat/test-arrow-info.R
+++ b/r/tests/testthat/test-arrow-info.R
@@ -16,7 +16,7 @@
 # under the License.
 
 test_that("arrow_info()", {
-  expect_is(arrow_info(), "arrow_info")
+  expect_s3_class(arrow_info(), "arrow_info")
   expect_output(print(arrow_info()), "Arrow package version")
   options(arrow.foo=FALSE)
   expect_output(print(arrow_info()), "arrow.foo")
diff --git a/r/tests/testthat/test-buffer-reader.R b/r/tests/testthat/test-buffer-reader.R
index 94be16ad569..3236a3a477d 100644
--- a/r/tests/testthat/test-buffer-reader.R
+++ b/r/tests/testthat/test-buffer-reader.R
@@ -22,9 +22,9 @@ test_that("BufferReader can be created from R objects", {
   int <- BufferReader$create(integer(13))
   raw <- BufferReader$create(raw(16))
 
-  expect_is(num, "BufferReader")
-  expect_is(int, "BufferReader")
-  expect_is(raw, "BufferReader")
+  expect_r6_class(num, "BufferReader")
+  expect_r6_class(int, "BufferReader")
+  expect_r6_class(raw, "BufferReader")
 
   expect_equal(num$GetSize(), 13*8)
   expect_equal(int$GetSize(), 13*4)
@@ -35,6 +35,6 @@ test_that("BufferReader can be created from Buffer", {
   buf <- buffer(raw(76))
   reader <- BufferReader$create(buf)
 
-  expect_is(reader, "BufferReader")
+  expect_r6_class(reader, "BufferReader")
   expect_equal(reader$GetSize(), 76)
 })
diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R
index c19f61196ee..1b3ea09cb92 100644
--- a/r/tests/testthat/test-buffer.R
+++ b/r/tests/testthat/test-buffer.R
@@ -20,33 +20,33 @@ context("Buffer")
 test_that("Buffer can be created from raw vector", {
   vec <- raw(123)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 123)
 })
 
 test_that("Buffer can be created from integer vector", {
   vec <- integer(17)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 17 * 4)
 })
 
 test_that("Buffer can be created from numeric vector", {
   vec <- numeric(17)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 17 * 8)
 })
 
 test_that("Buffer can be created from complex vector", {
   vec <- complex(3)
   buf <- buffer(vec)
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
   expect_equal(buf$size, 3 * 16)
 })
 
 test_that("buffer buffer buffers buffers", {
-  expect_is(buffer(buffer(42)), "Buffer")
+  expect_r6_class(buffer(buffer(42)), "Buffer")
 })
 
 test_that("Other types can't be converted to Buffers", {
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index 17a82de810f..e72067a6d5f 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -177,7 +177,7 @@ test_that("ChunkedArray supports integer64 (ARROW-3716)", {
   expect_type_equal(zero, int64())
   ca <- ChunkedArray$create(zero, x)
   expect_type_equal(ca, int64())
-  expect_is(as.vector(ca), "integer64")
+  expect_s3_class(as.vector(ca), "integer64")
   expect_identical(as.vector(ca), c(bit64::as.integer64(0L), x))
 })
 
@@ -199,12 +199,12 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", {
   a <- chunked_array(1:10, 1:10)
   for (type in c(int_types, uint_types)) {
     casted <- a$cast(type)
-    expect_is(casted, "ChunkedArray")
+    expect_r6_class(casted, "ChunkedArray")
     expect_type_equal(casted$type, type)
   }
   # Also test casting to double(), not actually a type, a base R function but should be alias for float64
   dbl <- a$cast(double())
-  expect_is(dbl, "ChunkedArray")
+  expect_r6_class(dbl, "ChunkedArray")
   expect_type_equal(dbl$type, float64())
 })
 
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 2208f581de9..77010579d78 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -28,7 +28,7 @@ test_that("list_compute_functions", {
 test_that("sum.Array", {
   ints <- 1:5
   a <- Array$create(ints)
-  expect_is(sum(a), "Scalar")
+  expect_r6_class(sum(a), "Scalar")
   expect_identical(as.integer(sum(a)), sum(ints))
 
   floats <- c(1.3, 2.4, 3)
@@ -38,7 +38,7 @@ test_that("sum.Array", {
   floats <- c(floats, NA)
   na <- Array$create(floats)
   expect_identical(as.numeric(sum(na)), sum(floats))
-  expect_is(sum(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(sum(na, na.rm = TRUE), "Scalar")
   expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE))
 
   bools <- c(TRUE, NA, TRUE, FALSE)
@@ -49,7 +49,7 @@ test_that("sum.Array", {
 
 test_that("sum.ChunkedArray", {
   a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
-  expect_is(sum(a), "Scalar")
+  expect_r6_class(sum(a), "Scalar")
   expect_true(is.na(as.vector(sum(a))))
   expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35)
 })
@@ -69,7 +69,7 @@ test_that("sum.Scalar", {
 test_that("mean.Array", {
   ints <- 1:4
   a <- Array$create(ints)
-  expect_is(mean(a), "Scalar")
+  expect_r6_class(mean(a), "Scalar")
   expect_identical(as.vector(mean(a)), mean(ints))
 
   floats <- c(1.3, 2.4, 3)
@@ -79,7 +79,7 @@ test_that("mean.Array", {
   floats <- c(floats, NA)
   na <- Array$create(floats)
   expect_identical(as.vector(mean(na)), mean(floats))
-  expect_is(mean(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(mean(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))
 
   bools <- c(TRUE, NA, TRUE, FALSE)
@@ -90,7 +90,7 @@ test_that("mean.Array", {
 
 test_that("mean.ChunkedArray", {
   a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
-  expect_is(mean(a), "Scalar")
+  expect_r6_class(mean(a), "Scalar")
   expect_true(is.na(as.vector(mean(a))))
   expect_identical(as.vector(mean(a, na.rm = TRUE)), 35/13)
 })
@@ -111,7 +111,7 @@ test_that("Bad input handling of call_function", {
 test_that("min.Array", {
   ints <- 1:4
   a <- Array$create(ints)
-  expect_is(min(a), "Scalar")
+  expect_r6_class(min(a), "Scalar")
   expect_identical(as.vector(min(a)), min(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -121,7 +121,7 @@ test_that("min.Array", {
   floats <- c(floats, NA)
   na <- Array$create(floats)
   expect_identical(as.vector(min(na)), min(floats))
-  expect_is(min(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -133,7 +133,7 @@ test_that("min.Array", {
 test_that("max.Array", {
   ints <- 1:4
   a <- Array$create(ints)
-  expect_is(max(a), "Scalar")
+  expect_r6_class(max(a), "Scalar")
   expect_identical(as.vector(max(a)), max(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -143,7 +143,7 @@ test_that("max.Array", {
   floats <- c(floats, NA)
   na <- Array$create(floats)
   expect_identical(as.vector(max(na)), max(floats))
-  expect_is(max(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -155,7 +155,7 @@ test_that("max.Array", {
 test_that("min.ChunkedArray", {
   ints <- 1:4
   a <- ChunkedArray$create(ints)
-  expect_is(min(a), "Scalar")
+  expect_r6_class(min(a), "Scalar")
   expect_identical(as.vector(min(a)), min(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -165,7 +165,7 @@ test_that("min.ChunkedArray", {
   floats <- c(floats, NA)
   na <- ChunkedArray$create(floats)
   expect_identical(as.vector(min(na)), min(floats))
-  expect_is(min(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
@@ -177,7 +177,7 @@ test_that("min.ChunkedArray", {
 test_that("max.ChunkedArray", {
   ints <- 1:4
   a <- ChunkedArray$create(ints)
-  expect_is(max(a), "Scalar")
+  expect_r6_class(max(a), "Scalar")
   expect_identical(as.vector(max(a)), max(ints))
 
   floats <- c(1.3, 3, 2.4)
@@ -187,7 +187,7 @@ test_that("max.ChunkedArray", {
   floats <- c(floats, NA)
   na <- ChunkedArray$create(floats)
   expect_identical(as.vector(max(na)), max(floats))
-  expect_is(max(na, na.rm = TRUE), "Scalar")
+  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
 
   bools <- c(TRUE, TRUE, FALSE)
diff --git a/r/tests/testthat/test-compute-vector.R b/r/tests/testthat/test-compute-vector.R
index 0b184889bee..95e93634934 100644
--- a/r/tests/testthat/test-compute-vector.R
+++ b/r/tests/testthat/test-compute-vector.R
@@ -18,7 +18,7 @@
 expect_bool_function_equal <- function(array_exp, r_exp) {
   # Assert that the Array operation returns a boolean array
   # and that its contents are equal to expected
-  expect_is(array_exp, "ArrowDatum")
+  expect_r6_class(array_exp, "ArrowDatum")
   expect_type_equal(array_exp, bool())
   expect_identical(as.vector(array_exp), r_exp)
 }
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index a5ecb41de64..5c0a31191a1 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -392,17 +392,17 @@ test_that("decimal type and validation", {
   expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100")
   expect_error(decimal(4, NA), '"scale" must be an integer')
 
-  expect_is(decimal(4, 2), "Decimal128Type")
+  expect_r6_class(decimal(4, 2), "Decimal128Type")
 
 })
 
 test_that("Binary", {
-  expect_is(binary(), "Binary")
+  expect_r6_class(binary(), "Binary")
   expect_equal(binary()$ToString(), "binary")
 })
 
 test_that("FixedSizeBinary", {
-  expect_is(fixed_size_binary(4), "FixedSizeBinary")
+  expect_r6_class(fixed_size_binary(4), "FixedSizeBinary")
   expect_equal(fixed_size_binary(4)$ToString(), "fixed_size_binary[4]")
 
   # input validation
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 932c568cd38..192b4b4220d 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -100,9 +100,9 @@ if(arrow_with_parquet()) {
 test_that("Simple interface for datasets", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_is(ds$format, "ParquetFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds$format, "ParquetFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       select(chr, dbl) %>%
@@ -208,7 +208,7 @@ test_that("dataset from directory URI", {
   skip_if_not_available("parquet")
   uri <- paste0("file://", dataset_dir)
   ds <- open_dataset(uri, partitioning = schema(part = uint8()))
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       select(chr, dbl) %>%
@@ -276,7 +276,7 @@ test_that("Simple interface for datasets (custom ParquetFileFormat)", {
 test_that("Hive partitioning", {
   skip_if_not_available("parquet")
   ds <- open_dataset(hive_dir, partitioning = hive_partition(other = utf8(), group = uint8()))
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       filter(group == 2) %>%
@@ -327,8 +327,8 @@ test_that("Partitioning inference", {
 
 test_that("IPC/Feather format data", {
   ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
-  expect_is(ds$format, "IpcFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds$format, "IpcFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
   expect_warning(
     expect_identical(dim(ds), c(NA, 7L))
@@ -356,8 +356,8 @@ test_that("IPC/Feather format data", {
 test_that("CSV dataset", {
   skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-12181
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
-  expect_is(ds$format, "CsvFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds$format, "CsvFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
   expect_warning(
     expect_identical(dim(ds), c(NA, 7L))
@@ -426,8 +426,8 @@ test_that("compressed CSV dataset", {
   write.csv(df1, gzfile(dst_file), row.names = FALSE, quote = FALSE)
   format <- FileFormat$create("csv")
   ds <- open_dataset(dst_dir, format = format)
-  expect_is(ds$format, "CsvFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds$format, "CsvFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
 
   expect_equivalent(
     ds %>%
@@ -590,7 +590,7 @@ test_that("Creating UnionDataset", {
   ds1 <- open_dataset(file.path(dataset_dir, 1))
   ds2 <- open_dataset(file.path(dataset_dir, 2))
   union1 <- open_dataset(list(ds1, ds2))
-  expect_is(union1, "UnionDataset")
+  expect_r6_class(union1, "UnionDataset")
   expect_equivalent(
     union1 %>%
       select(chr, dbl) %>%
@@ -605,7 +605,7 @@ test_that("Creating UnionDataset", {
 
   # Now with the c() method
   union2 <- c(ds1, ds2)
-  expect_is(union2, "UnionDataset")
+  expect_r6_class(union2, "UnionDataset")
   expect_equivalent(
     union2 %>%
       select(chr, dbl) %>%
@@ -624,7 +624,7 @@ test_that("Creating UnionDataset", {
 
 test_that("InMemoryDataset", {
   ds <- InMemoryDataset$create(rbind(df1, df2))
-  expect_is(ds, "InMemoryDataset")
+  expect_r6_class(ds, "InMemoryDataset")
   expect_equivalent(
     ds %>%
       select(chr, dbl) %>%
@@ -861,9 +861,9 @@ test_that("filter() on date32 columns", {
 test_that("filter() with expressions", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_is(ds$format, "ParquetFileFormat")
-  expect_is(ds$filesystem, "LocalFileSystem")
-  expect_is(ds, "Dataset")
+  expect_r6_class(ds$format, "ParquetFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds, "Dataset")
   expect_equivalent(
     ds %>%
       select(chr, dbl) %>%
@@ -1314,7 +1314,7 @@ test_that("Dataset and query print methods", {
     ),
     fixed = TRUE
   )
-  expect_is(ds$metadata, "list")
+  expect_type(ds$metadata, "list")
   q <- select(ds, string = chr, lgl, integer = int)
   expect_output(
     print(q),
@@ -1348,16 +1348,16 @@ test_that("Dataset and query print methods", {
 
 expect_scan_result <- function(ds, schm) {
   sb <- ds$NewScan()
-  expect_is(sb, "ScannerBuilder")
+  expect_r6_class(sb, "ScannerBuilder")
   expect_equal(sb$schema, schm)
 
   sb$Project(c("chr", "lgl"))
   sb$Filter(Expression$field_ref("dbl") == 8)
   scn <- sb$Finish()
-  expect_is(scn, "Scanner")
+  expect_r6_class(scn, "Scanner")
 
   tab <- scn$ToTable()
-  expect_is(tab, "Table")
+  expect_r6_class(tab, "Table")
 
   expect_equivalent(
     as.data.frame(tab),
@@ -1373,19 +1373,19 @@ test_that("Assembling a Dataset manually and getting a Table", {
 
   fmt <- FileFormat$create("parquet")
   factory <- FileSystemDatasetFactory$create(fs, selector, NULL, fmt, partitioning = partitioning)
-  expect_is(factory, "FileSystemDatasetFactory")
-
+  expect_r6_class(factory, "FileSystemDatasetFactory")
+  
   schm <- factory$Inspect()
-  expect_is(schm, "Schema")
+  expect_r6_class(schm, "Schema")
 
   phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
   expect_equal(names(phys_schm), names(df1))
   expect_equal(names(schm), c(names(phys_schm), "part"))
 
   child <- factory$Finish(schm)
-  expect_is(child, "FileSystemDataset")
-  expect_is(child$schema, "Schema")
-  expect_is(child$format, "ParquetFileFormat")
+  expect_r6_class(child, "FileSystemDataset")
+  expect_r6_class(child$schema, "Schema")
+  expect_r6_class(child$format, "ParquetFileFormat")
   expect_equal(names(schm), names(child$schema))
   expect_equivalent(child$files, files)
 
@@ -1396,22 +1396,22 @@ test_that("Assembling a Dataset manually and getting a Table", {
 test_that("Assembling multiple DatasetFactories with DatasetFactory", {
   skip_if_not_available("parquet")
   factory1 <- dataset_factory(file.path(dataset_dir, 1), format = "parquet")
-  expect_is(factory1, "FileSystemDatasetFactory")
+  expect_r6_class(factory1, "FileSystemDatasetFactory")
   factory2 <- dataset_factory(file.path(dataset_dir, 2), format = "parquet")
-  expect_is(factory2, "FileSystemDatasetFactory")
+  expect_r6_class(factory2, "FileSystemDatasetFactory")
 
   factory <- DatasetFactory$create(list(factory1, factory2))
-  expect_is(factory, "DatasetFactory")
+  expect_r6_class(factory, "DatasetFactory")
 
   schm <- factory$Inspect()
-  expect_is(schm, "Schema")
+  expect_r6_class(schm, "Schema")
 
   phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
   expect_equal(names(phys_schm), names(df1))
 
   ds <- factory$Finish(schm)
-  expect_is(ds, "UnionDataset")
-  expect_is(ds$schema, "Schema")
+  expect_r6_class(ds, "UnionDataset")
+  expect_r6_class(ds$schema, "Schema")
   expect_equal(names(schm), names(ds$schema))
   expect_equivalent(map(ds$children, ~.$files), files)
 
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index bac64297c5a..c4ab042380f 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -272,7 +272,7 @@ test_that("filter() with string ops", {
 
 test_that("filter environment scope", {
   # "object 'b_var' not found"
-  expect_dplyr_error(input %>% filter(batch, chr == b_var))
+  expect_dplyr_error(input %>% filter(chr == b_var), tbl)
 
   b_var <- "b"
   expect_dplyr_equal(
@@ -283,7 +283,8 @@ test_that("filter environment scope", {
   )
   # Also for functions
   # 'could not find function "isEqualTo"' because we haven't defined it yet
-  expect_dplyr_error(filter(batch, isEqualTo(int, 4)))
+  expect_dplyr_error(input %>% filter(isEqualTo(int, 4)), tbl)
+  
 
   skip("Need to substitute in user defined function too")
   # TODO: fix this: this isEqualTo function is eagerly evaluating; it should
@@ -389,11 +390,14 @@ test_that("filter() with .data pronoun", {
     tbl
   )
 
+  skip("test now faulty - code no longer gives error & outputs a empty tibble")
   # but there is an error if we don't override the masking with `.env`
   expect_dplyr_error(
-    tbl %>%
+    input %>%
       filter(.data$dbl > chr) %>%
       select(.data$chr, .data$int, .data$lgl) %>%
-      collect()
+      collect(),
+    tbl
   )
+  
 })
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 662f6d7478a..4f202fa5958 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -26,7 +26,7 @@ tbl$verses <- verses[[1]]
 tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
 
 test_that("mutate() is lazy", {
-  expect_is(
+  expect_s3_class(
     tbl %>% record_batch() %>% mutate(int = int + 6L),
     "arrow_dplyr_query"
   )
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index def7886a0bf..a02b00f3d95 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -32,7 +32,7 @@ test_that("basic select/filter/collect", {
     select(int, chr) %>%
     filter(int > 5)
 
-  expect_is(b2, "arrow_dplyr_query")
+  expect_s3_class(b2, "arrow_dplyr_query")
   t2 <- collect(b2)
   expect_equal(t2, tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")])
   # Test that the original object is not affected
@@ -187,7 +187,7 @@ test_that("collect(as_data_frame=FALSE)", {
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "RecordBatch")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
   expect_equal(as.data.frame(b2), expected)
 
@@ -195,7 +195,7 @@ test_that("collect(as_data_frame=FALSE)", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "RecordBatch")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -203,7 +203,7 @@ test_that("collect(as_data_frame=FALSE)", {
     filter(int > 5) %>%
     group_by(int) %>%
     collect(as_data_frame = FALSE)
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
@@ -257,7 +257,7 @@ test_that("head", {
     filter(int > 5) %>%
     head(2)
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "RecordBatch")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")][1:2, ]
   expect_equal(as.data.frame(b2), expected)
 
@@ -265,7 +265,7 @@ test_that("head", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     head(2)
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "RecordBatch")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -273,7 +273,7 @@ test_that("head", {
     filter(int > 5) %>%
     group_by(int) %>%
     head(2)
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
@@ -290,7 +290,7 @@ test_that("tail", {
     filter(int > 5) %>%
     tail(2)
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "RecordBatch")
   expected <- tail(tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")], 2)
   expect_equal(as.data.frame(b2), expected)
 
@@ -298,7 +298,7 @@ test_that("tail", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     tail(2)
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "RecordBatch")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -306,7 +306,7 @@ test_that("tail", {
     filter(int > 5) %>%
     group_by(int) %>%
     tail(2)
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index d7eb6df63e3..dd61b5e3ca2 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -18,7 +18,7 @@
 context("Expressions")
 
 test_that("Can create an expression", {
-  expect_is(build_array_expression(">", Array$create(1:5), 4), "array_expression")
+  expect_s3_class(build_array_expression(">", Array$create(1:5), 4), "array_expression")
 })
 
 test_that("as.vector(array_expression)", {
@@ -37,11 +37,11 @@ test_that("array_expression print method", {
 test_that("array_refs", {
   tab <- Table$create(a = 1:5)
   ex <- build_array_expression(">", array_expression("array_ref", field_name = "a"), 4)
-  expect_is(ex, "array_expression")
+  expect_s3_class(ex, "array_expression")
   expect_identical(ex$args[[1]]$args$field_name, "a")
   expect_identical(find_array_refs(ex), "a")
   out <- eval_array_expression(ex, tab)
-  expect_is(out, "ChunkedArray")
+  expect_r6_class(out, "ChunkedArray")
   expect_equal(as.vector(out), c(FALSE, FALSE, FALSE, FALSE, TRUE))
 })
 
@@ -55,45 +55,45 @@ test_that("C++ expressions", {
   i64 <- Expression$scalar(bit64::as.integer64(42))
   time <- Expression$scalar(hms::hms(56, 34, 12))
 
-  expect_is(f == g, "Expression")
-  expect_is(f == 4, "Expression")
-  expect_is(f == "", "Expression")
-  expect_is(f == NULL, "Expression")
-  expect_is(f == date, "Expression")
-  expect_is(f == i64, "Expression")
-  expect_is(f == time, "Expression")
+  expect_r6_class(f == g, "Expression")
+  expect_r6_class(f == 4, "Expression")
+  expect_r6_class(f == "", "Expression")
+  expect_r6_class(f == NULL, "Expression")
+  expect_r6_class(f == date, "Expression")
+  expect_r6_class(f == i64, "Expression")
+  expect_r6_class(f == time, "Expression")
   # can't seem to make this work right now because of R Ops.method dispatch
-  # expect_is(f == as.Date("2020-01-15"), "Expression")
-  expect_is(f == ts, "Expression")
-  expect_is(f <= 2L, "Expression")
-  expect_is(f != FALSE, "Expression")
-  expect_is(f > 4, "Expression")
-  expect_is(f < 4 & f > 2, "Expression")
-  expect_is(f < 4 | f > 2, "Expression")
-  expect_is(!(f < 4), "Expression")
+  # expect_r6_class(f == as.Date("2020-01-15"), "Expression")
+  expect_r6_class(f == ts, "Expression")
+  expect_r6_class(f <= 2L, "Expression")
+  expect_r6_class(f != FALSE, "Expression")
+  expect_r6_class(f > 4, "Expression")
+  expect_r6_class(f < 4 & f > 2, "Expression")
+  expect_r6_class(f < 4 | f > 2, "Expression")
+  expect_r6_class(!(f < 4), "Expression")
   expect_output(
     print(f > 4),
     'Expression\n(f > 4)',
     fixed = TRUE
   )
   # Interprets that as a list type
-  expect_is(f == c(1L, 2L), "Expression")
+  expect_r6_class(f == c(1L, 2L), "Expression")
 })
 
 test_that("Can create an expression", {
   a <- Array$create(as.numeric(1:5))
   expr <- array_expression("cast", a, options = list(to_type = int32()))
-  expect_is(expr, "array_expression")
+  expect_s3_class(expr, "array_expression")
   expect_equal(eval_array_expression(expr), Array$create(1:5))
 
   b <- Array$create(0.5:4.5)
   bad_expr <- array_expression("cast", b, options = list(to_type = int32()))
-  expect_is(bad_expr, "array_expression")
+  expect_s3_class(bad_expr, "array_expression")
   expect_error(
     eval_array_expression(bad_expr),
     "Invalid: Float value .* was truncated converting"
   )
   expr <- array_expression("cast", b, options = list(to_type = int32(), allow_float_truncate = TRUE))
-  expect_is(expr, "array_expression")
+  expect_s3_class(expr, "array_expression")
   expect_equal(eval_array_expression(expr), Array$create(0:4))
 })
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index abaae2c7195..d5d82a73e12 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -44,18 +44,18 @@ expect_feather_roundtrip <- function(write_fun) {
 
   # Read both back
   tab2 <- read_feather(tf2)
-  expect_is(tab2, "data.frame")
+  expect_s3_class(tab2, "data.frame")
 
   tab3 <- read_feather(tf3)
-  expect_is(tab3, "data.frame")
+  expect_s3_class(tab3, "data.frame")
 
   # reading directly from arrow::io::MemoryMappedFile
   tab4 <- read_feather(mmap_open(tf3))
-  expect_is(tab4, "data.frame")
+  expect_s3_class(tab4, "data.frame")
 
   # reading directly from arrow::io::ReadableFile
   tab5 <- read_feather(ReadableFile$create(tf3))
-  expect_is(tab5, "data.frame")
+  expect_s3_class(tab5, "data.frame")
 
   expect_equal(tib, tab2)
   expect_equal(tib, tab3)
@@ -105,7 +105,7 @@ test_that("write_feather option error handling", {
 
 test_that("read_feather supports col_select = <names>", {
   tab1 <- read_feather(feather_file, col_select = c("x", "y"))
-  expect_is(tab1, "data.frame")
+  expect_s3_class(tab1, "data.frame")
 
   expect_equal(tib$x, tab1$x)
   expect_equal(tib$y, tab1$y)
@@ -113,7 +113,7 @@ test_that("read_feather supports col_select = <names>", {
 
 test_that("feather handles col_select = <integer>", {
   tab1 <- read_feather(feather_file, col_select = 1:2)
-  expect_is(tab1, "data.frame")
+  expect_s3_class(tab1, "data.frame")
 
   expect_equal(tib$x, tab1$x)
   expect_equal(tib$y, tab1$y)
@@ -135,7 +135,7 @@ test_that("feather handles col_select = <tidyselect helper>", {
 
 test_that("feather read/write round trip", {
   tab1 <- read_feather(feather_file, as_data_frame = FALSE)
-  expect_is(tab1, "Table")
+  expect_r6_class(tab1, "Table")
 
   expect_equal(tib, as.data.frame(tab1))
 })
@@ -143,7 +143,7 @@ test_that("feather read/write round trip", {
 test_that("Read feather from raw vector", {
   test_raw <- readBin(feather_file, what = "raw", n = 5000)
   df <- read_feather(test_raw)
-  expect_is(df, "data.frame")
+  expect_s3_class(df, "data.frame")
 })
 
 test_that("FeatherReader", {
diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R
index 918c495ec04..344865c077a 100644
--- a/r/tests/testthat/test-filesystem.R
+++ b/r/tests/testthat/test-filesystem.R
@@ -81,9 +81,9 @@ test_that("SubTreeFilesystem", {
   file.copy(DESCRIPTION, file.path(td, "DESCRIPTION"))
 
   st_fs <- SubTreeFileSystem$create(td)
-  expect_is(st_fs, "SubTreeFileSystem")
-  expect_is(st_fs, "FileSystem")
-  expect_is(st_fs$base_fs, "LocalFileSystem")
+  expect_r6_class(st_fs, "SubTreeFileSystem")
+  expect_r6_class(st_fs, "FileSystem")
+  expect_r6_class(st_fs$base_fs, "LocalFileSystem")
   expect_identical(
     capture.output(print(st_fs)),
     paste0("SubTreeFileSystem: ", "file://", st_fs$base_path)
@@ -137,7 +137,7 @@ test_that("FileSystem$from_uri", {
   skip_on_cran()
   skip_if_not_available("s3")
   fs_and_path <- FileSystem$from_uri("s3://ursa-labs-taxi-data")
-  expect_is(fs_and_path$fs, "S3FileSystem")
+  expect_r6_class(fs_and_path$fs, "S3FileSystem")
   expect_identical(fs_and_path$fs$region, "us-east-2")
 })
 
@@ -145,7 +145,7 @@ test_that("SubTreeFileSystem$create() with URI", {
   skip_on_cran()
   skip_if_not_available("s3")
   fs <- SubTreeFileSystem$create("s3://ursa-labs-taxi-data")
-  expect_is(fs, "SubTreeFileSystem")
+  expect_r6_class(fs, "SubTreeFileSystem")
   expect_identical(
     capture.output(print(fs)),
     "SubTreeFileSystem: s3://ursa-labs-taxi-data/"
@@ -156,15 +156,15 @@ test_that("S3FileSystem", {
   skip_on_cran()
   skip_if_not_available("s3")
   s3fs <- S3FileSystem$create()
-  expect_is(s3fs, "S3FileSystem")
+  expect_r6_class(s3fs, "S3FileSystem")
 })
 
 test_that("s3_bucket", {
   skip_on_cran()
   skip_if_not_available("s3")
   bucket <- s3_bucket("ursa-labs-r-test")
-  expect_is(bucket, "SubTreeFileSystem")
-  expect_is(bucket$base_fs, "S3FileSystem")
+  expect_r6_class(bucket, "SubTreeFileSystem")
+  expect_r6_class(bucket$base_fs, "S3FileSystem")
   expect_identical(bucket$region, "us-west-2")
   expect_identical(
     capture.output(print(bucket)),
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index a35a465bf0b..b0b508bbc4b 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -58,9 +58,9 @@ test_that("read_json_arrow() converts to tibble", {
   tab2 <- read_json_arrow(mmap_open(tf))
   tab3 <- read_json_arrow(ReadableFile$create(tf))
 
-  expect_is(tab1, "tbl_df")
-  expect_is(tab2, "tbl_df")
-  expect_is(tab3, "tbl_df")
+  expect_s3_class(tab1, "tbl_df")
+  expect_s3_class(tab2, "tbl_df")
+  expect_s3_class(tab3, "tbl_df")
 
   expect_equal(tab1, tab2)
   expect_equal(tab1, tab3)
diff --git a/r/tests/testthat/test-memory-pool.R b/r/tests/testthat/test-memory-pool.R
index ab38cc71ffd..0aa18aadc20 100644
--- a/r/tests/testthat/test-memory-pool.R
+++ b/r/tests/testthat/test-memory-pool.R
@@ -18,8 +18,8 @@
 test_that("default_memory_pool and its attributes", {
   pool <- default_memory_pool()
   # Not integer bc can be >2gb, so we cast to double
-  expect_is(pool$bytes_allocated, "numeric")
-  expect_is(pool$max_memory, "numeric")
+  expect_type(pool$bytes_allocated, "double")
+  expect_type(pool$max_memory, "double")
   expect_true(pool$backend_name %in% c("system", "jemalloc", "mimalloc"))
 
   expect_true(all(supported_memory_backends() %in% c("system", "jemalloc", "mimalloc")))
diff --git a/r/tests/testthat/test-message-reader.R b/r/tests/testthat/test-message-reader.R
index 0bd6d66c544..340a3e3ed1e 100644
--- a/r/tests/testthat/test-message-reader.R
+++ b/r/tests/testthat/test-message-reader.R
@@ -24,10 +24,10 @@ test_that("MessageReader can be created from raw vectors", {
   reader <- MessageReader$create(bytes)
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$RECORD_BATCH)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
@@ -38,10 +38,10 @@ test_that("MessageReader can be created from raw vectors", {
   reader <- MessageReader$create(bytes)
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
@@ -52,16 +52,16 @@ test_that("MessageReader can be created from input stream", {
   bytes <- batch$serialize()
 
   stream <- BufferReader$create(bytes)
-  expect_is(stream, "BufferReader")
+  expect_r6_class(stream, "BufferReader")
 
   reader <- MessageReader$create(stream)
-  expect_is(reader, "MessageReader")
+  expect_r6_class(reader, "MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$RECORD_BATCH)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
@@ -70,16 +70,16 @@ test_that("MessageReader can be created from input stream", {
   bytes <- schema$serialize()
 
   stream <- BufferReader$create(bytes)
-  expect_is(stream, "BufferReader")
+  expect_r6_class(stream, "BufferReader")
 
   reader <- MessageReader$create(stream)
-  expect_is(reader, "MessageReader")
+  expect_r6_class(reader, "MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- reader$ReadNextMessage()
   expect_null(message)
diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R
index c6cd9fe4b09..b9fb3a162a7 100644
--- a/r/tests/testthat/test-message.R
+++ b/r/tests/testthat/test-message.R
@@ -23,10 +23,10 @@ test_that("read_message can read from input stream", {
   stream <- BufferReader$create(bytes)
 
   message <- read_message(stream)
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$RECORD_BATCH)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- read_message(stream)
   expect_null(read_message(stream))
@@ -37,10 +37,10 @@ test_that("read_message() can read Schema messages", {
   stream <- BufferReader$create(bytes)
   message <- read_message(stream)
 
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
-  expect_is(message$body, "Buffer")
-  expect_is(message$metadata, "Buffer")
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
 
   message <- read_message(stream)
   expect_null(read_message(stream))
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 4e1895e82ec..afce1c2244c 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -167,7 +167,7 @@ test_that("haven types roundtrip via feather", {
 
 test_that("Date/time type roundtrip", {
   rb <- record_batch(example_with_times)
-  expect_is(rb$schema$posixlt$type, "StructType")
+  expect_r6_class(rb$schema$posixlt$type, "StructType")
   expect_identical(as.data.frame(rb), example_with_times)
 })
 
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index b564bfee950..885274846e1 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -43,7 +43,7 @@ test_that("Array to Python", {
   pa <- reticulate::import("pyarrow", convert = FALSE)
   r <- Array$create(c(1, 2, 3))
   py <- pa$concat_arrays(list(r))
-  expect_is(py, "pyarrow.lib.Array")
+  expect_s3_class(py, "pyarrow.lib.Array")
   expect_equal(reticulate::py_to_r(py), r)
 })
 
@@ -52,7 +52,7 @@ test_that("RecordBatch to/from Python", {
   pa <- reticulate::import("pyarrow", convert = FALSE)
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   py <- reticulate::r_to_py(batch)
-  expect_is(py, "pyarrow.lib.RecordBatch")
+  expect_s3_class(py, "pyarrow.lib.RecordBatch")
   expect_equal(reticulate::py_to_r(py), batch)
 })
 
@@ -63,8 +63,8 @@ test_that("Table and ChunkedArray from Python", {
   tab <- Table$create(batch, batch)
   pybatch <- reticulate::r_to_py(batch)
   pytab <- pa$Table$from_batches(list(pybatch, pybatch))
-  expect_is(pytab, "pyarrow.lib.Table")
-  expect_is(pytab[0], "pyarrow.lib.ChunkedArray")
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab[0], "pyarrow.lib.ChunkedArray")
   expect_equal(reticulate::py_to_r(pytab[0]), tab$col1)
   expect_equal(reticulate::py_to_r(pytab), tab)
 })
@@ -75,11 +75,11 @@ test_that("Table and ChunkedArray to Python", {
   tab <- Table$create(batch, batch)
 
   pychunked <- reticulate::r_to_py(tab$col1)
-  expect_is(pychunked, "pyarrow.lib.ChunkedArray")
+  expect_s3_class(pychunked, "pyarrow.lib.ChunkedArray")
   expect_equal(reticulate::py_to_r(pychunked), tab$col1)
 
   pytab <- reticulate::r_to_py(tab)
-  expect_is(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab, "pyarrow.lib.Table")
   expect_equal(reticulate::py_to_r(pytab), tab)
 })
 
@@ -87,7 +87,7 @@ test_that("RecordBatch with metadata roundtrip", {
   skip_if_no_pyarrow()
   batch <- RecordBatch$create(example_with_times)
   pybatch <- reticulate::r_to_py(batch)
-  expect_is(pybatch, "pyarrow.lib.RecordBatch")
+  expect_s3_class(pybatch, "pyarrow.lib.RecordBatch")
   expect_equal(reticulate::py_to_r(pybatch), batch)
   expect_identical(as.data.frame(reticulate::py_to_r(pybatch)), example_with_times)
 })
@@ -96,7 +96,7 @@ test_that("Table with metadata roundtrip", {
   skip_if_no_pyarrow()
   tab <- Table$create(example_with_times)
   pytab <- reticulate::r_to_py(tab)
-  expect_is(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab, "pyarrow.lib.Table")
   expect_equal(reticulate::py_to_r(pytab), tab)
   expect_identical(as.data.frame(reticulate::py_to_r(pytab)), example_with_times)
 })
diff --git a/r/tests/testthat/test-read-record-batch.R b/r/tests/testthat/test-read-record-batch.R
index 9383c476588..56f4e8e6e00 100644
--- a/r/tests/testthat/test-read-record-batch.R
+++ b/r/tests/testthat/test-read-record-batch.R
@@ -34,7 +34,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", {
 
   stream <- FileOutputStream$create(tf)
   writer <- RecordBatchFileWriter$create(stream, tab$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write_table(tab)
   writer$close()
   stream$close()
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index a9ce5f12809..ea3aa34a424 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -119,7 +119,7 @@ test_that("reading/writing a raw vector (sparklyr integration)", {
     as.data.frame(RecordBatchStreamReader$create(x)$read_next_batch())
   }
   bytes <- write_to_raw(example_data)
-  expect_is(bytes, "raw")
+  expect_type(bytes, "raw")
   expect_identical(read_from_raw_test(bytes), example_data)
   # this could just be `read_ipc_stream(x)`; propose that
   expect_identical(read_ipc_stream(bytes), example_data)
diff --git a/r/tests/testthat/test-record-batch-reader.R b/r/tests/testthat/test-record-batch-reader.R
index 9a5e4dd4cc0..483588ab4bb 100644
--- a/r/tests/testthat/test-record-batch-reader.R
+++ b/r/tests/testthat/test-record-batch-reader.R
@@ -28,7 +28,7 @@ test_that("RecordBatchStreamReader / Writer", {
   sink <- BufferOutputStream$create()
   expect_equal(sink$tell(), 0)
   writer <- RecordBatchStreamWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
@@ -36,19 +36,19 @@ test_that("RecordBatchStreamReader / Writer", {
   writer$close()
 
   buf <- sink$finish()
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
 
   reader <- RecordBatchStreamReader$create(buf)
-  expect_is(reader, "RecordBatchStreamReader")
+  expect_r6_class(reader, "RecordBatchStreamReader")
 
   batch1 <- reader$read_next_batch()
-  expect_is(batch1, "RecordBatch")
+  expect_r6_class(batch1, "RecordBatch")
   expect_equal(batch, batch1)
   batch2 <- reader$read_next_batch()
-  expect_is(batch2, "RecordBatch")
+  expect_r6_class(batch2, "RecordBatch")
   expect_equal(batch, batch2)
   batch3 <- reader$read_next_batch()
-  expect_is(batch3, "RecordBatch")
+  expect_r6_class(batch3, "RecordBatch")
   expect_equal(batch, batch3)
   expect_null(reader$read_next_batch())
 })
@@ -56,20 +56,20 @@ test_that("RecordBatchStreamReader / Writer", {
 test_that("RecordBatchFileReader / Writer", {
   sink <- BufferOutputStream$create()
   writer <- RecordBatchFileWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
   writer$close()
 
   buf <- sink$finish()
-  expect_is(buf, "Buffer")
+  expect_r6_class(buf, "Buffer")
 
   reader <- RecordBatchFileReader$create(buf)
-  expect_is(reader, "RecordBatchFileReader")
+  expect_r6_class(reader, "RecordBatchFileReader")
 
   batch1 <- reader$get_batch(0)
-  expect_is(batch1, "RecordBatch")
+  expect_r6_class(batch1, "RecordBatch")
   expect_equal(batch, batch1)
 
   expect_equal(reader$num_record_batches, 3)
@@ -78,7 +78,7 @@ test_that("RecordBatchFileReader / Writer", {
 test_that("StreamReader read_table", {
   sink <- BufferOutputStream$create()
   writer <- RecordBatchStreamWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
@@ -93,7 +93,7 @@ test_that("StreamReader read_table", {
 test_that("FileReader read_table", {
   sink <- BufferOutputStream$create()
   writer <- RecordBatchFileWriter$create(sink, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$write(batch)
   writer$write(tab)
   writer$write(tbl)
@@ -137,6 +137,6 @@ test_that("reader with 0 batches", {
 
   reader <- RecordBatchStreamReader$create(buf)
   tab <- reader$read_table()
-  expect_is(tab, "Table")
+  expect_r6_class(tab, "Table")
   expect_identical(dim(tab), c(0L, 1L))
 })
diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R
index d3493f8110a..8cb0dafdfe4 100644
--- a/r/tests/testthat/test-s3-minio.R
+++ b/r/tests/testthat/test-s3-minio.R
@@ -38,7 +38,7 @@ if (arrow_with_s3() && process_is_running("minio server")) {
       scheme = "http",
       endpoint_override = paste0("localhost:", minio_port)
     )
-    expect_is(fs, "S3FileSystem")
+    expect_r6_class(fs, "S3FileSystem")
     now <- as.character(as.numeric(Sys.time()))
     # If minio isn't running, this will hang for a few seconds and fail with a
     # curl timeout, causing `run_these` to be set to FALSE and skipping the tests
diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R
index 501298a8021..21b2836496e 100644
--- a/r/tests/testthat/test-scalar.R
+++ b/r/tests/testthat/test-scalar.R
@@ -19,7 +19,7 @@ context("Scalar")
 
 expect_scalar_roundtrip <- function(x, type) {
   s <- Scalar$create(x)
-  expect_is(s, "Scalar")
+  expect_r6_class(s, "Scalar")
   expect_type_equal(s$type, type)
   expect_identical(length(s), 1L)
   if (inherits(type, "NestedType")) {
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index ac888d94101..87dad175e2b 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -120,27 +120,27 @@ test_that("reading schema from Buffer", {
   # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter
   #       maybe there is an easier way to serialize a schema
   batch <- record_batch(x = 1:10)
-  expect_is(batch, "RecordBatch")
+  expect_r6_class(batch, "RecordBatch")
 
   stream <- BufferOutputStream$create()
   writer <- RecordBatchStreamWriter$create(stream, batch$schema)
-  expect_is(writer, "RecordBatchWriter")
+  expect_r6_class(writer, "RecordBatchWriter")
   writer$close()
 
   buffer <- stream$finish()
-  expect_is(buffer, "Buffer")
+  expect_r6_class(buffer, "Buffer")
 
   reader <- MessageReader$create(buffer)
-  expect_is(reader, "MessageReader")
+  expect_r6_class(reader, "MessageReader")
 
   message <- reader$ReadNextMessage()
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
 
   stream <- BufferReader$create(buffer)
-  expect_is(stream, "BufferReader")
+  expect_r6_class(stream, "BufferReader")
   message <- read_message(stream)
-  expect_is(message, "Message")
+  expect_r6_class(message, "Message")
   expect_equal(message$type, MessageType$SCHEMA)
 })
 

From 1ed681912be7246695cdd938ea632e1751403f67 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 13 Apr 2021 07:14:45 -0400
Subject: [PATCH 007/719] ARROW-12277: [Rust][DataFusion] Implement
 Sum/Count/Min/Max aggregates for Timestamp(_,_)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# Rationale:
If you try and aggregate (via SUM, for example) a column of a timestamp type, DataFusion generates an error:
```
Coercion from [Timestamp(Nanosecond, None)] to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed.
```

For example, from IOx

```
> show columns from t;
+---------------+--------------+------------+-------------+-----------------------------+-------------+
| table_catalog | table_schema | table_name | column_name | data_type                   | is_nullable |
+---------------+--------------+------------+-------------+-----------------------------+-------------+
| datafusion    | public       | t          | a           | Utf8                        | NO          |
| datafusion    | public       | t          | b           | Timestamp(Nanosecond, None) | NO          |
+---------------+--------------+------------+-------------+-----------------------------+-------------+
2 row in set. Query took 0 seconds.
> select sum(b) from t;
Plan("Coercion from [Timestamp(Nanosecond, None)] to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed.")
```

# Changes:
Add support for aggregating timestamp types and tests for same

# Notes
Note this is follow on / more fleshing out of the work done in #9773 by @velvia (👋  thanks for adding Timestamps to `ScalarValue`)

Supporting AVG on timestamps is tracked by https://issues.apache.org/jira/browse/ARROW-12318. It is more involved (as currently Avg assumes the output type is always F64), and not important for myuse case at the moment.

Closes #9970 from alamb/alamb/ARROW-12277-aggregate-timestamps

Authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/datafusion/src/execution/context.rs      | 115 +++++++++++++++
 .../src/physical_plan/aggregates.rs           |  19 ++-
 .../src/physical_plan/datetime_expressions.rs |   4 +-
 .../src/physical_plan/expressions/min_max.rs  |  49 ++++++-
 .../src/physical_plan/group_scalar.rs         |  24 +++-
 rust/datafusion/src/scalar.rs                 | 132 ++++++++++++++----
 rust/datafusion/src/test/mod.rs               |  92 +++++++++++-
 7 files changed, 393 insertions(+), 42 deletions(-)

diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index ce0ea6d0050..07d5b629e1b 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -1403,6 +1403,121 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn aggregate_timestamps_sum() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let mut ctx = create_ctx(&tmp_dir, 1)?;
+        ctx.register_table("t", test::table_with_timestamps())
+            .unwrap();
+
+        let results = plan_and_collect(
+            &mut ctx,
+            "SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t",
+        )
+        .await
+        .unwrap_err();
+
+        assert_eq!(results.to_string(), "Error during planning: Coercion from [Timestamp(Nanosecond, None)] to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed.");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn aggregate_timestamps_count() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let mut ctx = create_ctx(&tmp_dir, 1)?;
+        ctx.register_table("t", test::table_with_timestamps())
+            .unwrap();
+
+        let results = plan_and_collect(
+            &mut ctx,
+            "SELECT count(nanos), count(micros), count(millis), count(secs) FROM t",
+        )
+        .await
+        .unwrap();
+
+        let expected = vec![
+            "+--------------+---------------+---------------+-------------+",
+            "| COUNT(nanos) | COUNT(micros) | COUNT(millis) | COUNT(secs) |",
+            "+--------------+---------------+---------------+-------------+",
+            "| 3            | 3             | 3             | 3           |",
+            "+--------------+---------------+---------------+-------------+",
+        ];
+        assert_batches_sorted_eq!(expected, &results);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn aggregate_timestamps_min() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let mut ctx = create_ctx(&tmp_dir, 1)?;
+        ctx.register_table("t", test::table_with_timestamps())
+            .unwrap();
+
+        let results = plan_and_collect(
+            &mut ctx,
+            "SELECT min(nanos), min(micros), min(millis), min(secs) FROM t",
+        )
+        .await
+        .unwrap();
+
+        let expected = vec![
+            "+----------------------------+----------------------------+-------------------------+---------------------+",
+            "| MIN(nanos)                 | MIN(micros)                | MIN(millis)             | MIN(secs)           |",
+            "+----------------------------+----------------------------+-------------------------+---------------------+",
+            "| 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 |",
+            "+----------------------------+----------------------------+-------------------------+---------------------+",
+        ];
+        assert_batches_sorted_eq!(expected, &results);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn aggregate_timestamps_max() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let mut ctx = create_ctx(&tmp_dir, 1)?;
+        ctx.register_table("t", test::table_with_timestamps())
+            .unwrap();
+
+        let results = plan_and_collect(
+            &mut ctx,
+            "SELECT max(nanos), max(micros), max(millis), max(secs) FROM t",
+        )
+        .await
+        .unwrap();
+
+        let expected = vec![
+            "+-------------------------+-------------------------+-------------------------+---------------------+",
+            "| MAX(nanos)              | MAX(micros)             | MAX(millis)             | MAX(secs)           |",
+            "+-------------------------+-------------------------+-------------------------+---------------------+",
+            "| 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 |",
+            "+-------------------------+-------------------------+-------------------------+---------------------+",
+];
+        assert_batches_sorted_eq!(expected, &results);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn aggregate_timestamps_avg() -> Result<()> {
+        let tmp_dir = TempDir::new()?;
+        let mut ctx = create_ctx(&tmp_dir, 1)?;
+        ctx.register_table("t", test::table_with_timestamps())
+            .unwrap();
+
+        let results = plan_and_collect(
+            &mut ctx,
+            "SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t",
+        )
+        .await
+        .unwrap_err();
+
+        assert_eq!(results.to_string(), "Error during planning: Coercion from [Timestamp(Nanosecond, None)] to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed.");
+        Ok(())
+    }
+
     #[tokio::test]
     async fn join_partitioned() -> Result<()> {
         // self join on partition id (workaround for duplicate column name)
diff --git a/rust/datafusion/src/physical_plan/aggregates.rs b/rust/datafusion/src/physical_plan/aggregates.rs
index be90daa954d..9417c7c8f05 100644
--- a/rust/datafusion/src/physical_plan/aggregates.rs
+++ b/rust/datafusion/src/physical_plan/aggregates.rs
@@ -34,7 +34,7 @@ use super::{
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::distinct_expressions;
 use crate::physical_plan::expressions;
-use arrow::datatypes::{DataType, Schema};
+use arrow::datatypes::{DataType, Schema, TimeUnit};
 use expressions::{avg_return_type, sum_return_type};
 use std::{fmt, str::FromStr, sync::Arc};
 
@@ -160,6 +160,8 @@ pub fn create_aggregate_expr(
     })
 }
 
+static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8];
+
 static NUMERICS: &[DataType] = &[
     DataType::Int8,
     DataType::Int16,
@@ -173,14 +175,25 @@ static NUMERICS: &[DataType] = &[
     DataType::Float64,
 ];
 
+static TIMESTAMPS: &[DataType] = &[
+    DataType::Timestamp(TimeUnit::Second, None),
+    DataType::Timestamp(TimeUnit::Millisecond, None),
+    DataType::Timestamp(TimeUnit::Microsecond, None),
+    DataType::Timestamp(TimeUnit::Nanosecond, None),
+];
+
 /// the signatures supported by the function `fun`.
 fn signature(fun: &AggregateFunction) -> Signature {
     // note: the physical expression must accept the type returned by this function or the execution panics.
     match fun {
         AggregateFunction::Count => Signature::Any(1),
         AggregateFunction::Min | AggregateFunction::Max => {
-            let mut valid = vec![DataType::Utf8, DataType::LargeUtf8];
-            valid.extend_from_slice(NUMERICS);
+            let valid = STRINGS
+                .iter()
+                .chain(NUMERICS.iter())
+                .chain(TIMESTAMPS.iter())
+                .cloned()
+                .collect::<Vec<_>>();
             Signature::Uniform(1, valid)
         }
         AggregateFunction::Avg | AggregateFunction::Sum => {
diff --git a/rust/datafusion/src/physical_plan/datetime_expressions.rs b/rust/datafusion/src/physical_plan/datetime_expressions.rs
index 3d363ce97d2..7b5816186f2 100644
--- a/rust/datafusion/src/physical_plan/datetime_expressions.rs
+++ b/rust/datafusion/src/physical_plan/datetime_expressions.rs
@@ -324,8 +324,8 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
     Ok(match array {
         ColumnarValue::Scalar(scalar) => {
-            if let ScalarValue::TimeNanosecond(v) = scalar {
-                ColumnarValue::Scalar(ScalarValue::TimeNanosecond((f)(*v)?))
+            if let ScalarValue::TimestampNanosecond(v) = scalar {
+                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond((f)(*v)?))
             } else {
                 return Err(DataFusionError::Execution(
                     "array of `date_trunc` must be non-null scalar Utf8".to_string(),
diff --git a/rust/datafusion/src/physical_plan/expressions/min_max.rs b/rust/datafusion/src/physical_plan/expressions/min_max.rs
index 2fd84a6cc70..5ed14610ada 100644
--- a/rust/datafusion/src/physical_plan/expressions/min_max.rs
+++ b/rust/datafusion/src/physical_plan/expressions/min_max.rs
@@ -25,12 +25,13 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
 use crate::scalar::ScalarValue;
 use arrow::compute;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, TimeUnit};
 use arrow::{
     array::{
         ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, LargeStringArray, StringArray, UInt16Array, UInt32Array, UInt64Array,
-        UInt8Array,
+        Int8Array, LargeStringArray, StringArray, TimestampMicrosecondArray,
+        TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
+        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
     },
     datatypes::Field,
 };
@@ -128,6 +129,27 @@ macro_rules! min_max_batch {
             DataType::UInt32 => typed_min_max_batch!($VALUES, UInt32Array, UInt32, $OP),
             DataType::UInt16 => typed_min_max_batch!($VALUES, UInt16Array, UInt16, $OP),
             DataType::UInt8 => typed_min_max_batch!($VALUES, UInt8Array, UInt8, $OP),
+            DataType::Timestamp(TimeUnit::Second, _) => {
+                typed_min_max_batch!($VALUES, TimestampSecondArray, TimestampSecond, $OP)
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, _) => typed_min_max_batch!(
+                $VALUES,
+                TimestampMillisecondArray,
+                TimestampMillisecond,
+                $OP
+            ),
+            DataType::Timestamp(TimeUnit::Microsecond, _) => typed_min_max_batch!(
+                $VALUES,
+                TimestampMicrosecondArray,
+                TimestampMicrosecond,
+                $OP
+            ),
+            DataType::Timestamp(TimeUnit::Nanosecond, _) => typed_min_max_batch!(
+                $VALUES,
+                TimestampNanosecondArray,
+                TimestampNanosecond,
+                $OP
+            ),
             other => {
                 // This should have been handled before
                 return Err(DataFusionError::Internal(format!(
@@ -229,6 +251,27 @@ macro_rules! min_max {
             (ScalarValue::LargeUtf8(lhs), ScalarValue::LargeUtf8(rhs)) => {
                 typed_min_max_string!(lhs, rhs, LargeUtf8, $OP)
             }
+            (ScalarValue::TimestampSecond(lhs), ScalarValue::TimestampSecond(rhs)) => {
+                typed_min_max!(lhs, rhs, TimestampSecond, $OP)
+            }
+            (
+                ScalarValue::TimestampMillisecond(lhs),
+                ScalarValue::TimestampMillisecond(rhs),
+            ) => {
+                typed_min_max!(lhs, rhs, TimestampMillisecond, $OP)
+            }
+            (
+                ScalarValue::TimestampMicrosecond(lhs),
+                ScalarValue::TimestampMicrosecond(rhs),
+            ) => {
+                typed_min_max!(lhs, rhs, TimestampMicrosecond, $OP)
+            }
+            (
+                ScalarValue::TimestampNanosecond(lhs),
+                ScalarValue::TimestampNanosecond(rhs),
+            ) => {
+                typed_min_max!(lhs, rhs, TimestampNanosecond, $OP)
+            }
             e => {
                 return Err(DataFusionError::Internal(format!(
                     "MIN/MAX is not expected to receive a scalar {:?}",
diff --git a/rust/datafusion/src/physical_plan/group_scalar.rs b/rust/datafusion/src/physical_plan/group_scalar.rs
index a55e1d7a9a3..f4987ae3a7d 100644
--- a/rust/datafusion/src/physical_plan/group_scalar.rs
+++ b/rust/datafusion/src/physical_plan/group_scalar.rs
@@ -64,9 +64,15 @@ impl TryFrom<&ScalarValue> for GroupByScalar {
             ScalarValue::UInt16(Some(v)) => GroupByScalar::UInt16(*v),
             ScalarValue::UInt32(Some(v)) => GroupByScalar::UInt32(*v),
             ScalarValue::UInt64(Some(v)) => GroupByScalar::UInt64(*v),
-            ScalarValue::TimeMillisecond(Some(v)) => GroupByScalar::TimeMillisecond(*v),
-            ScalarValue::TimeMicrosecond(Some(v)) => GroupByScalar::TimeMicrosecond(*v),
-            ScalarValue::TimeNanosecond(Some(v)) => GroupByScalar::TimeNanosecond(*v),
+            ScalarValue::TimestampMillisecond(Some(v)) => {
+                GroupByScalar::TimeMillisecond(*v)
+            }
+            ScalarValue::TimestampMicrosecond(Some(v)) => {
+                GroupByScalar::TimeMicrosecond(*v)
+            }
+            ScalarValue::TimestampNanosecond(Some(v)) => {
+                GroupByScalar::TimeNanosecond(*v)
+            }
             ScalarValue::Utf8(Some(v)) => GroupByScalar::Utf8(Box::new(v.clone())),
             ScalarValue::Float32(None)
             | ScalarValue::Float64(None)
@@ -110,9 +116,15 @@ impl From<&GroupByScalar> for ScalarValue {
             GroupByScalar::UInt32(v) => ScalarValue::UInt32(Some(*v)),
             GroupByScalar::UInt64(v) => ScalarValue::UInt64(Some(*v)),
             GroupByScalar::Utf8(v) => ScalarValue::Utf8(Some(v.to_string())),
-            GroupByScalar::TimeMillisecond(v) => ScalarValue::TimeMillisecond(Some(*v)),
-            GroupByScalar::TimeMicrosecond(v) => ScalarValue::TimeMicrosecond(Some(*v)),
-            GroupByScalar::TimeNanosecond(v) => ScalarValue::TimeNanosecond(Some(*v)),
+            GroupByScalar::TimeMillisecond(v) => {
+                ScalarValue::TimestampMillisecond(Some(*v))
+            }
+            GroupByScalar::TimeMicrosecond(v) => {
+                ScalarValue::TimestampMicrosecond(Some(*v))
+            }
+            GroupByScalar::TimeNanosecond(v) => {
+                ScalarValue::TimestampNanosecond(Some(*v))
+            }
             GroupByScalar::Date32(v) => ScalarValue::Date32(Some(*v)),
         }
     }
diff --git a/rust/datafusion/src/scalar.rs b/rust/datafusion/src/scalar.rs
index b2367758493..833f707e971 100644
--- a/rust/datafusion/src/scalar.rs
+++ b/rust/datafusion/src/scalar.rs
@@ -19,16 +19,21 @@
 
 use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
 
-use arrow::array::{
-    ArrayRef, Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
-};
 use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
 use arrow::{
     array::*,
     datatypes::{ArrowNativeType, Float32Type, TimestampNanosecondType},
 };
+use arrow::{
+    array::{
+        ArrayRef, Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
+        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+        UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
+    },
+    datatypes::{
+        TimestampMicrosecondType, TimestampMillisecondType, TimestampSecondType,
+    },
+};
 
 use crate::error::{DataFusionError, Result};
 
@@ -72,12 +77,14 @@ pub enum ScalarValue {
     Date32(Option<i32>),
     /// Date stored as a signed 64bit int
     Date64(Option<i64>),
+    /// Timestamp Second
+    TimestampSecond(Option<i64>),
     /// Timestamp Milliseconds
-    TimeMillisecond(Option<i64>),
+    TimestampMillisecond(Option<i64>),
     /// Timestamp Microseconds
-    TimeMicrosecond(Option<i64>),
+    TimestampMicrosecond(Option<i64>),
     /// Timestamp Nanoseconds
-    TimeNanosecond(Option<i64>),
+    TimestampNanosecond(Option<i64>),
     /// Interval with YearMonth unit
     IntervalYearMonth(Option<i32>),
     /// Interval with DayTime unit
@@ -145,15 +152,18 @@ impl ScalarValue {
             ScalarValue::Int16(_) => DataType::Int16,
             ScalarValue::Int32(_) => DataType::Int32,
             ScalarValue::Int64(_) => DataType::Int64,
-            ScalarValue::TimeMicrosecond(_) => {
+            ScalarValue::TimestampSecond(_) => {
+                DataType::Timestamp(TimeUnit::Second, None)
+            }
+            ScalarValue::TimestampMillisecond(_) => {
+                DataType::Timestamp(TimeUnit::Millisecond, None)
+            }
+            ScalarValue::TimestampMicrosecond(_) => {
                 DataType::Timestamp(TimeUnit::Microsecond, None)
             }
-            ScalarValue::TimeNanosecond(_) => {
+            ScalarValue::TimestampNanosecond(_) => {
                 DataType::Timestamp(TimeUnit::Nanosecond, None)
             }
-            ScalarValue::TimeMillisecond(_) => {
-                DataType::Timestamp(TimeUnit::Millisecond, None)
-            }
             ScalarValue::Float32(_) => DataType::Float32,
             ScalarValue::Float64(_) => DataType::Float64,
             ScalarValue::Utf8(_) => DataType::Utf8,
@@ -209,9 +219,9 @@ impl ScalarValue {
                 | ScalarValue::Utf8(None)
                 | ScalarValue::LargeUtf8(None)
                 | ScalarValue::List(None, _)
-                | ScalarValue::TimeMillisecond(None)
-                | ScalarValue::TimeMicrosecond(None)
-                | ScalarValue::TimeNanosecond(None)
+                | ScalarValue::TimestampMillisecond(None)
+                | ScalarValue::TimestampMicrosecond(None)
+                | ScalarValue::TimestampNanosecond(None)
         )
     }
 
@@ -266,7 +276,15 @@ impl ScalarValue {
                 Some(value) => Arc::new(UInt64Array::from_value(*value, size)),
                 None => new_null_array(&DataType::UInt64, size),
             },
-            ScalarValue::TimeMillisecond(e) => match e {
+            ScalarValue::TimestampSecond(e) => match e {
+                Some(value) => Arc::new(TimestampSecondArray::from_iter_values(
+                    repeat(*value).take(size),
+                )),
+                None => {
+                    new_null_array(&DataType::Timestamp(TimeUnit::Second, None), size)
+                }
+            },
+            ScalarValue::TimestampMillisecond(e) => match e {
                 Some(value) => Arc::new(TimestampMillisecondArray::from_iter_values(
                     repeat(*value).take(size),
                 )),
@@ -275,7 +293,7 @@ impl ScalarValue {
                     size,
                 ),
             },
-            ScalarValue::TimeMicrosecond(e) => match e {
+            ScalarValue::TimestampMicrosecond(e) => match e {
                 Some(value) => {
                     Arc::new(TimestampMicrosecondArray::from_value(*value, size))
                 }
@@ -284,7 +302,7 @@ impl ScalarValue {
                     size,
                 ),
             },
-            ScalarValue::TimeNanosecond(e) => match e {
+            ScalarValue::TimestampNanosecond(e) => match e {
                 Some(value) => {
                     Arc::new(TimestampNanosecondArray::from_value(*value, size))
                 }
@@ -403,6 +421,28 @@ impl ScalarValue {
             DataType::Date64 => {
                 typed_cast!(array, index, Date64Array, Date64)
             }
+            DataType::Timestamp(TimeUnit::Second, _) => {
+                typed_cast!(array, index, TimestampSecondArray, TimestampSecond)
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, _) => {
+                typed_cast!(
+                    array,
+                    index,
+                    TimestampMillisecondArray,
+                    TimestampMillisecond
+                )
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                typed_cast!(
+                    array,
+                    index,
+                    TimestampMicrosecondArray,
+                    TimestampMicrosecond
+                )
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+                typed_cast!(array, index, TimestampNanosecondArray, TimestampNanosecond)
+            }
             other => {
                 return Err(DataFusionError::NotImplemented(format!(
                     "Can't create a scalar of array of type \"{:?}\"",
@@ -525,7 +565,7 @@ impl TryFrom<ScalarValue> for i64 {
     fn try_from(value: ScalarValue) -> Result<Self> {
         match value {
             ScalarValue::Int64(Some(inner_value))
-            | ScalarValue::TimeNanosecond(Some(inner_value)) => Ok(inner_value),
+            | ScalarValue::TimestampNanosecond(Some(inner_value)) => Ok(inner_value),
             _ => Err(DataFusionError::Internal(format!(
                 "Cannot convert {:?} to {}",
                 value,
@@ -561,6 +601,18 @@ impl TryFrom<&DataType> for ScalarValue {
             DataType::UInt64 => ScalarValue::UInt64(None),
             DataType::Utf8 => ScalarValue::Utf8(None),
             DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
+            DataType::Timestamp(TimeUnit::Second, _) => {
+                ScalarValue::TimestampSecond(None)
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, _) => {
+                ScalarValue::TimestampMillisecond(None)
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                ScalarValue::TimestampMicrosecond(None)
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+                ScalarValue::TimestampNanosecond(None)
+            }
             DataType::List(ref nested_type) => {
                 ScalarValue::List(None, nested_type.data_type().clone())
             }
@@ -597,9 +649,10 @@ impl fmt::Display for ScalarValue {
             ScalarValue::UInt16(e) => format_option!(f, e)?,
             ScalarValue::UInt32(e) => format_option!(f, e)?,
             ScalarValue::UInt64(e) => format_option!(f, e)?,
-            ScalarValue::TimeMillisecond(e) => format_option!(f, e)?,
-            ScalarValue::TimeMicrosecond(e) => format_option!(f, e)?,
-            ScalarValue::TimeNanosecond(e) => format_option!(f, e)?,
+            ScalarValue::TimestampSecond(e) => format_option!(f, e)?,
+            ScalarValue::TimestampMillisecond(e) => format_option!(f, e)?,
+            ScalarValue::TimestampMicrosecond(e) => format_option!(f, e)?,
+            ScalarValue::TimestampNanosecond(e) => format_option!(f, e)?,
             ScalarValue::Utf8(e) => format_option!(f, e)?,
             ScalarValue::LargeUtf8(e) => format_option!(f, e)?,
             ScalarValue::Binary(e) => match e {
@@ -658,9 +711,16 @@ impl fmt::Debug for ScalarValue {
             ScalarValue::UInt16(_) => write!(f, "UInt16({})", self),
             ScalarValue::UInt32(_) => write!(f, "UInt32({})", self),
             ScalarValue::UInt64(_) => write!(f, "UInt64({})", self),
-            ScalarValue::TimeMillisecond(_) => write!(f, "TimeMillisecond({})", self),
-            ScalarValue::TimeMicrosecond(_) => write!(f, "TimeMicrosecond({})", self),
-            ScalarValue::TimeNanosecond(_) => write!(f, "TimeNanosecond({})", self),
+            ScalarValue::TimestampSecond(_) => write!(f, "TimestampSecond({})", self),
+            ScalarValue::TimestampMillisecond(_) => {
+                write!(f, "TimestampMillisecond({})", self)
+            }
+            ScalarValue::TimestampMicrosecond(_) => {
+                write!(f, "TimestampMicrosecond({})", self)
+            }
+            ScalarValue::TimestampNanosecond(_) => {
+                write!(f, "TimestampNanosecond({})", self)
+            }
             ScalarValue::Utf8(None) => write!(f, "Utf8({})", self),
             ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{}\")", self),
             ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({})", self),
@@ -694,9 +754,27 @@ impl ScalarType<f32> for Float32Type {
     }
 }
 
+impl ScalarType<i64> for TimestampSecondType {
+    fn scalar(r: Option<i64>) -> ScalarValue {
+        ScalarValue::TimestampSecond(r)
+    }
+}
+
+impl ScalarType<i64> for TimestampMillisecondType {
+    fn scalar(r: Option<i64>) -> ScalarValue {
+        ScalarValue::TimestampMillisecond(r)
+    }
+}
+
+impl ScalarType<i64> for TimestampMicrosecondType {
+    fn scalar(r: Option<i64>) -> ScalarValue {
+        ScalarValue::TimestampMicrosecond(r)
+    }
+}
+
 impl ScalarType<i64> for TimestampNanosecondType {
     fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimeNanosecond(r)
+        ScalarValue::TimestampNanosecond(r)
     }
 }
 
diff --git a/rust/datafusion/src/test/mod.rs b/rust/datafusion/src/test/mod.rs
index 57736189481..926a6922616 100644
--- a/rust/datafusion/src/test/mod.rs
+++ b/rust/datafusion/src/test/mod.rs
@@ -20,7 +20,10 @@
 use crate::datasource::{MemTable, TableProvider};
 use crate::error::Result;
 use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
-use array::ArrayRef;
+use array::{
+    Array, ArrayRef, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray,
+};
 use arrow::array::{self, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
@@ -182,6 +185,93 @@ pub fn make_partition(sz: i32) -> RecordBatch {
     RecordBatch::try_new(schema, vec![arr]).unwrap()
 }
 
+/// Return a new table provider containing all of the supported timestamp types
+pub fn table_with_timestamps() -> Arc<dyn TableProvider> {
+    let batch = make_timestamps();
+    let schema = batch.schema();
+    let partitions = vec![vec![batch]];
+    Arc::new(MemTable::try_new(schema, partitions).unwrap())
+}
+
+/// Return  record batch with all of the supported timestamp types
+/// values
+///
+/// Columns are named:
+/// "nanos" --> TimestampNanosecondArray
+/// "micros" --> TimestampMicrosecondArray
+/// "millis" --> TimestampMillisecondArray
+/// "secs" --> TimestampSecondArray
+/// "names" --> StringArray
+pub fn make_timestamps() -> RecordBatch {
+    let ts_strings = vec![
+        Some("2018-11-13T17:11:10.011375885995"),
+        Some("2011-12-13T11:13:10.12345"),
+        None,
+        Some("2021-1-1T05:11:10.432"),
+    ];
+
+    let ts_nanos = ts_strings
+        .into_iter()
+        .map(|t| {
+            t.map(|t| {
+                t.parse::<chrono::NaiveDateTime>()
+                    .unwrap()
+                    .timestamp_nanos()
+            })
+        })
+        .collect::<Vec<_>>();
+
+    let ts_micros = ts_nanos
+        .iter()
+        .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000))
+        .collect::<Vec<_>>();
+
+    let ts_millis = ts_nanos
+        .iter()
+        .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000000))
+        .collect::<Vec<_>>();
+
+    let ts_secs = ts_nanos
+        .iter()
+        .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000000000))
+        .collect::<Vec<_>>();
+
+    let names = ts_nanos
+        .iter()
+        .enumerate()
+        .map(|(i, _)| format!("Row {}", i))
+        .collect::<Vec<_>>();
+
+    let arr_nanos = TimestampNanosecondArray::from_opt_vec(ts_nanos, None);
+    let arr_micros = TimestampMicrosecondArray::from_opt_vec(ts_micros, None);
+    let arr_millis = TimestampMillisecondArray::from_opt_vec(ts_millis, None);
+    let arr_secs = TimestampSecondArray::from_opt_vec(ts_secs, None);
+
+    let names = names.iter().map(|s| s.as_str()).collect::<Vec<_>>();
+    let arr_names = StringArray::from(names);
+
+    let schema = Schema::new(vec![
+        Field::new("nanos", arr_nanos.data_type().clone(), false),
+        Field::new("micros", arr_micros.data_type().clone(), false),
+        Field::new("millis", arr_millis.data_type().clone(), false),
+        Field::new("secs", arr_secs.data_type().clone(), false),
+        Field::new("name", arr_names.data_type().clone(), false),
+    ]);
+    let schema = Arc::new(schema);
+
+    RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(arr_nanos),
+            Arc::new(arr_micros),
+            Arc::new(arr_millis),
+            Arc::new(arr_secs),
+            Arc::new(arr_names),
+        ],
+    )
+    .unwrap()
+}
+
 pub mod exec;
 pub mod user_defined;
 pub mod variable;

From a102ba2f8b0054871eb441bbf6dc007a9b448ee7 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 13 Apr 2021 08:51:51 -0400
Subject: [PATCH 008/719] ARROW-12288: [C++] Create Scanner interface

To prepare for the AsyncScanner this PR creates a Scanner interface and, along the way, simplifies the current Scanner API so that the new scanner won't need to match.

## What is removed:

* `Scanner::GetFragments` was only used in `FileSystemDataset::Write`.  The correct source of truth for fragments is the `Dataset`.  Note: The python implementation exposed this method but it was not documented or used in any unit test.  I think it can be safely removed and we need not worry about deprecation.
* `Scanner::schema` is redundant and ambiguous.  There are two schemas at the scan level.  The dataset schema (the unified master schema that we expect all fragment schemas to be a subset of) and the projection schema (a combination of the dataset schema and the projection expression).  Both of these are available on the scan options object and there is an accessor for these options so the caller might as well get them from there.  This schema function was exposed via R and used internally there but I think any uses can be easily changed to using the options.
* `FileFormat::splittable` and `Fragment::splittable`.  These were intended to advertise that batch readahead was available on the given fragment/format.  However, there is no need to advertise this.  They are not used by the `SyncScanner` and the `AsyncScanner` will just assume that the format/fragment's will utilize readahead if they can (respecting the readahead options in `ScanOptions`)
* Direct instantiation of `Scanner`.  All `Scanner` creation should go through `ScannerBuilder` now.  This allows the `ScannerBuilder` to determine what implementation to use.  This was mostly the way things were implemented already.  Only a few tests instantiated a `Scanner` directly.

## What is deprecated

* `Scanner::Scan` is going to be deprecated (ARROW-11797).  It will not be implemented by `AsyncScanner`.  I do not actually deprecate it in this PR as I reserve that for ARROW-11797.  Unfortunately, this method was exposed via python & R and likely was used so deprecation is recommended over outright removal.

## What is new

* `Scanner::ScanBatches` and `Scanner::ScanBatchesUnordered` have been added.  These functions will be the new preferred "scan" method going forward.  This allows the parallelization (batch readahead, file readahead, etc.) to be handled by C++ and simplifies the user's life.
* `ScanOptions::batch_readahead` and `ScanOptions::fragment_readahead` options allow more fine grained control over how to perform readahead.  One technicality is that these options will not be respected well by the `SyncScanner` (although I think the current ARROW-11797 utilizes batch readahead) so they are more placeholders for when we implement `AsyncScanner`.
* `ScanOptions::cpu_executor` and `ScanOptions::io_context` are added and should be fairly self explanatory.
* `ScanOptions::use_async` will toggle which scanner to use.

Closes #9947 from westonpace/feature/arrow-12288

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/dataset.h        |   5 -
 cpp/src/arrow/dataset/dataset_test.cc  |   4 +-
 cpp/src/arrow/dataset/file_base.cc     |  27 +---
 cpp/src/arrow/dataset/file_base.h      |   4 -
 cpp/src/arrow/dataset/file_csv.cc      |   2 +-
 cpp/src/arrow/dataset/file_ipc.h       |   2 -
 cpp/src/arrow/dataset/file_ipc_test.cc |   9 +-
 cpp/src/arrow/dataset/file_parquet.h   |   2 -
 cpp/src/arrow/dataset/scanner.cc       | 128 +++++++++++++++--
 cpp/src/arrow/dataset/scanner.h        | 190 +++++++++++++++++++++----
 cpp/src/arrow/dataset/scanner_test.cc  |  44 +++++-
 cpp/src/arrow/dataset/test_util.h      |  53 ++++++-
 cpp/src/jni/dataset/jni_wrapper.cc     |   3 +-
 python/pyarrow/_dataset.pyx            |   8 --
 r/src/dataset.cpp                      |   2 +-
 15 files changed, 396 insertions(+), 87 deletions(-)

diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 6be83059fc1..5d818b23938 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -64,9 +64,6 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
   /// To receive a record batch stream which is fully filtered and projected, use Scanner.
   virtual Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) = 0;
 
-  /// \brief Return true if the fragment can benefit from parallel scanning.
-  virtual bool splittable() const = 0;
-
   virtual std::string type_name() const = 0;
   virtual std::string ToString() const { return type_name(); }
 
@@ -111,8 +108,6 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
 
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
 
-  bool splittable() const override { return false; }
-
   std::string type_name() const override { return "in-memory"; }
 
  protected:
diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc
index 1db96b8b5c3..7aa0e1a2413 100644
--- a/cpp/src/arrow/dataset/dataset_test.cc
+++ b/cpp/src/arrow/dataset/dataset_test.cc
@@ -442,7 +442,7 @@ TEST_F(TestEndToEnd, EndToEndSingleDataset) {
   // In the simplest case, consumption is simply conversion to a Table.
   ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
 
-  auto expected = TableFromJSON(scanner->schema(), {R"([
+  auto expected = TableFromJSON(scanner_builder->projected_schema(), {R"([
     {"sales": 152.25, "model": "3", "country": "CA"},
     {"sales": 273.5, "model": "3", "country": "US"}
   ])"});
@@ -547,7 +547,7 @@ class TestSchemaUnification : public TestUnionDataset {
   void AssertScanEquals(std::shared_ptr<Scanner> scanner,
                         const std::vector<TupleType>& expected_rows) {
     std::vector<std::string> columns;
-    for (const auto& field : scanner->schema()->fields()) {
+    for (const auto& field : scanner->options()->projected_schema->fields()) {
       columns.push_back(field->name());
     }
 
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index ad19bd2041e..7b2f42055b3 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -369,7 +369,7 @@ struct WriteState {
   std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
 };
 
-Status WriteNextBatch(WriteState& state, const std::shared_ptr<ScanTask>& scan_task,
+Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragment,
                       std::shared_ptr<RecordBatch> batch) {
   ARROW_ASSIGN_OR_RAISE(auto groups, state.write_options.partitioning->Partition(batch));
   batch.reset();  // drop to hopefully conserve memory
@@ -382,8 +382,8 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<ScanTask>& scan_t
 
   std::unordered_set<WriteQueue*> need_flushed;
   for (size_t i = 0; i < groups.batches.size(); ++i) {
-    auto partition_expression = and_(std::move(groups.expressions[i]),
-                                     scan_task->fragment()->partition_expression());
+    auto partition_expression =
+        and_(std::move(groups.expressions[i]), fragment->partition_expression());
     auto batch = std::move(groups.batches[i]);
 
     ARROW_ASSIGN_OR_RAISE(auto part,
@@ -432,7 +432,7 @@ Future<> WriteInternal(const ScanOptions& scan_options, WriteState& state,
       ARROW_ASSIGN_OR_RAISE(auto batches_gen, scan_task->ExecuteAsync(cpu_executor));
       std::function<Status(std::shared_ptr<RecordBatch> batch)> batch_visitor =
           [&, scan_task](std::shared_ptr<RecordBatch> batch) {
-            return WriteNextBatch(state, scan_task, std::move(batch));
+            return WriteNextBatch(state, scan_task->fragment(), std::move(batch));
           };
       scan_futs.push_back(VisitAsyncGenerator(batches_gen, batch_visitor));
     } else {
@@ -441,7 +441,7 @@ Future<> WriteInternal(const ScanOptions& scan_options, WriteState& state,
 
         for (auto maybe_batch : batches) {
           ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
-          RETURN_NOT_OK(WriteNextBatch(state, scan_task, std::move(batch)));
+          RETURN_NOT_OK(WriteNextBatch(state, scan_task->fragment(), std::move(batch)));
         }
 
         return Status::OK();
@@ -469,21 +469,8 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   //
   // NB: neither of these will have any impact whatsoever on the common case of writing
   //     an in-memory table to disk.
-  ARROW_ASSIGN_OR_RAISE(auto fragment_it, scanner->GetFragments());
-  ARROW_ASSIGN_OR_RAISE(FragmentVector fragments, fragment_it.ToVector());
-  ScanTaskVector scan_tasks;
-
-  for (const auto& fragment : fragments) {
-    auto options = std::make_shared<ScanOptions>(*scanner->options());
-    // Avoid contention with multithreaded readers
-    options->use_threads = false;
-    ARROW_ASSIGN_OR_RAISE(auto scan_task_it,
-                          Scanner(fragment, std::move(options)).Scan());
-    for (auto maybe_scan_task : scan_task_it) {
-      ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
-      scan_tasks.push_back(std::move(scan_task));
-    }
-  }
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, scanner->Scan());
+  ARROW_ASSIGN_OR_RAISE(ScanTaskVector scan_tasks, scan_task_it.ToVector());
 
   WriteState state(write_options);
   auto res = internal::RunSynchronously<arrow::detail::Empty>(
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index e4e7167aa75..ccc3d54709b 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -134,9 +134,6 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   /// \brief The name identifying the kind of file format
   virtual std::string type_name() const = 0;
 
-  /// \brief Return true if fragments of this format can benefit from parallel scanning.
-  virtual bool splittable() const { return false; }
-
   virtual bool Equals(const FileFormat& other) const = 0;
 
   /// \brief Indicate if the FileSource is supported/readable by this format.
@@ -176,7 +173,6 @@ class ARROW_DS_EXPORT FileFragment : public Fragment {
 
   std::string type_name() const override { return format_->type_name(); }
   std::string ToString() const override { return source_.path(); };
-  bool splittable() const override { return format_->splittable(); }
 
   const FileSource& source() const { return source_; }
   const std::shared_ptr<FileFormat>& format() const { return format_; }
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 677d1be05b7..9a7a9d2de4c 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -45,7 +45,7 @@ using internal::checked_cast;
 using internal::checked_pointer_cast;
 using internal::Executor;
 using internal::SerialExecutor;
-using RecordBatchGenerator = AsyncGenerator<std::shared_ptr<RecordBatch>>;
+using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
 
 Result<std::unordered_set<std::string>> GetColumnNames(
     const csv::ParseOptions& parse_options, util::string_view first_block,
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index a7bcd04a9d2..621eef80635 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -42,8 +42,6 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
     return type_name() == other.type_name();
   }
 
-  bool splittable() const override { return true; }
-
   Result<bool> IsSupported(const FileSource& source) const override;
 
   /// \brief Return the schema of the file if possible.
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index 502b61ca645..ef0c0f62108 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -234,6 +234,13 @@ class TestIpcFileSystemDataset : public testing::Test,
     format_ = ipc_format;
     SetWriteOptions(ipc_format->DefaultWriteOptions());
   }
+
+  std::shared_ptr<Scanner> MakeScanner(const std::shared_ptr<Dataset>& dataset,
+                                       const std::shared_ptr<ScanOptions>& scan_options) {
+    ScannerBuilder builder(dataset, scan_options);
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    return scanner;
+  }
 };
 
 TEST_F(TestIpcFileSystemDataset, WriteWithIdenticalPartitioningSchema) {
@@ -259,7 +266,7 @@ TEST_F(TestIpcFileSystemDataset, WriteExceedsMaxPartitions) {
   // require that no batch be grouped into more than 2 written batches:
   write_options_.max_partitions = 2;
 
-  auto scanner = std::make_shared<Scanner>(dataset_, scan_options_);
+  auto scanner = MakeScanner(dataset_, scan_options_);
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("This exceeds the maximum"),
                                   FileSystemDataset::Write(write_options_, scanner));
 }
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index fa0d7dea843..ac8a746481a 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -70,8 +70,6 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   std::string type_name() const override { return kParquetTypeName; }
 
-  bool splittable() const override { return true; }
-
   bool Equals(const FileFormat& other) const override;
 
   struct ReaderOptions {
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index a8ac24b7799..738c9fc0f62 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -70,7 +70,108 @@ Result<RecordBatchGenerator> ScanTask::ExecuteAsync(internal::Executor*) {
 
 bool ScanTask::supports_async() const { return false; }
 
-Result<FragmentIterator> Scanner::GetFragments() {
+Result<ScanTaskIterator> Scanner::Scan() {
+  // TODO(ARROW-12289) This is overridden in SyncScanner and will never be implemented in
+  // AsyncScanner.  It is deprecated and will eventually go away.
+  return Status::NotImplemented("This scanner does not support the legacy Scan() method");
+}
+
+Result<EnumeratedRecordBatchIterator> Scanner::ScanBatchesUnordered() {
+  // If a scanner doesn't support unordered scanning (i.e. SyncScanner) then we just
+  // fall back to an ordered scan and assign the appropriate tagging
+  ARROW_ASSIGN_OR_RAISE(auto ordered_scan, ScanBatches());
+  return AddPositioningToInOrderScan(std::move(ordered_scan));
+}
+
+Result<EnumeratedRecordBatchIterator> Scanner::AddPositioningToInOrderScan(
+    TaggedRecordBatchIterator scan) {
+  ARROW_ASSIGN_OR_RAISE(auto first, scan.Next());
+  if (IsIterationEnd(first)) {
+    return MakeEmptyIterator<EnumeratedRecordBatch>();
+  }
+  struct State {
+    State(TaggedRecordBatchIterator source, TaggedRecordBatch first)
+        : source(std::move(source)),
+          batch_index(0),
+          fragment_index(0),
+          finished(false),
+          prev_batch(std::move(first)) {}
+    TaggedRecordBatchIterator source;
+    int batch_index;
+    int fragment_index;
+    bool finished;
+    TaggedRecordBatch prev_batch;
+  };
+  struct EnumeratingIterator {
+    Result<EnumeratedRecordBatch> Next() {
+      if (state->finished) {
+        return IterationEnd<EnumeratedRecordBatch>();
+      }
+      ARROW_ASSIGN_OR_RAISE(auto next, state->source.Next());
+      if (IsIterationEnd<TaggedRecordBatch>(next)) {
+        state->finished = true;
+        return EnumeratedRecordBatch{
+            {std::move(state->prev_batch.record_batch), state->batch_index, true},
+            {std::move(state->prev_batch.fragment), state->fragment_index, true}};
+      }
+      auto prev = std::move(state->prev_batch);
+      bool prev_is_last_batch = false;
+      auto prev_batch_index = state->batch_index;
+      auto prev_fragment_index = state->fragment_index;
+      // Reference equality here seems risky but a dataset should have a constant set of
+      // fragments which should be consistent for the lifetime of a scan
+      if (prev.fragment.get() != next.fragment.get()) {
+        state->batch_index = 0;
+        state->fragment_index++;
+        prev_is_last_batch = true;
+      } else {
+        state->batch_index++;
+      }
+      state->prev_batch = std::move(next);
+      return EnumeratedRecordBatch{
+          {std::move(prev.record_batch), prev_batch_index, prev_is_last_batch},
+          {std::move(prev.fragment), prev_fragment_index, false}};
+    }
+    std::shared_ptr<State> state;
+  };
+  return EnumeratedRecordBatchIterator(
+      EnumeratingIterator{std::make_shared<State>(std::move(scan), std::move(first))});
+}
+
+Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
+  // TODO(ARROW-11797) Provide a better implementation that does readahead.  Also, add
+  // unit testing
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, Scan());
+  struct BatchIter {
+    explicit BatchIter(ScanTaskIterator scan_task_it)
+        : scan_task_it(std::move(scan_task_it)) {}
+
+    Result<TaggedRecordBatch> Next() {
+      while (true) {
+        if (current_task == nullptr) {
+          ARROW_ASSIGN_OR_RAISE(current_task, scan_task_it.Next());
+          if (IsIterationEnd<std::shared_ptr<ScanTask>>(current_task)) {
+            return IterationEnd<TaggedRecordBatch>();
+          }
+          ARROW_ASSIGN_OR_RAISE(batch_it, current_task->Execute());
+        }
+        ARROW_ASSIGN_OR_RAISE(auto next, batch_it.Next());
+        if (IsIterationEnd<std::shared_ptr<RecordBatch>>(next)) {
+          current_task = nullptr;
+        } else {
+          return TaggedRecordBatch{next, current_task->fragment()};
+        }
+      }
+    }
+
+    ScanTaskIterator scan_task_it;
+    RecordBatchIterator batch_it;
+    std::shared_ptr<ScanTask> current_task;
+  };
+  return TaggedRecordBatchIterator(BatchIter(std::move(scan_task_it)));
+}
+
+Result<FragmentIterator> SyncScanner::GetFragments() {
   if (fragment_ != nullptr) {
     return MakeVectorIterator(FragmentVector{fragment_});
   }
@@ -81,7 +182,7 @@ Result<FragmentIterator> Scanner::GetFragments() {
   return GetFragmentsFromDatasets({dataset_}, scan_options_->filter);
 }
 
-Result<ScanTaskIterator> Scanner::Scan() {
+Result<ScanTaskIterator> SyncScanner::Scan() {
   // Transforms Iterator<Fragment> into a unified
   // Iterator<ScanTask>. The first Iterator::Next invocation is going to do
   // all the work of unwinding the chained iterators.
@@ -110,7 +211,7 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset,
       fragment_(nullptr),
       scan_options_(std::move(scan_options)) {
   scan_options_->dataset_schema = dataset_->schema();
-  DCHECK_OK(Filter(literal(true)));
+  DCHECK_OK(Filter(scan_options_->filter));
 }
 
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> schema,
@@ -120,13 +221,17 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> schema,
       fragment_(std::move(fragment)),
       scan_options_(std::move(scan_options)) {
   scan_options_->dataset_schema = std::move(schema);
-  DCHECK_OK(Filter(literal(true)));
+  DCHECK_OK(Filter(scan_options_->filter));
 }
 
 const std::shared_ptr<Schema>& ScannerBuilder::schema() const {
   return scan_options_->dataset_schema;
 }
 
+const std::shared_ptr<Schema>& ScannerBuilder::projected_schema() const {
+  return scan_options_->projected_schema;
+}
+
 Status ScannerBuilder::Project(std::vector<std::string> columns) {
   return SetProjection(scan_options_.get(), std::move(columns));
 }
@@ -170,9 +275,15 @@ Result<std::shared_ptr<Scanner>> ScannerBuilder::Finish() {
   }
 
   if (dataset_ == nullptr) {
-    return std::make_shared<Scanner>(fragment_, scan_options_);
+    // AsyncScanner does not support this method of running.  It may in the future
+    return std::make_shared<SyncScanner>(fragment_, scan_options_);
+  }
+  if (scan_options_->use_async) {
+    // TODO(ARROW-12289)
+    return Status::NotImplemented("The asynchronous scanner is not yet available");
+  } else {
+    return std::make_shared<SyncScanner>(dataset_, scan_options_);
   }
-  return std::make_shared<Scanner>(dataset_, scan_options_);
 }
 
 static inline RecordBatchVector FlattenRecordBatchVector(
@@ -202,13 +313,13 @@ struct TableAssemblyState {
   }
 };
 
-Result<std::shared_ptr<Table>> Scanner::ToTable() {
+Result<std::shared_ptr<Table>> SyncScanner::ToTable() {
   return internal::RunSynchronously<std::shared_ptr<Table>>(
       [this](Executor* executor) { return ToTableInternal(executor); },
       scan_options_->use_threads);
 }
 
-Future<std::shared_ptr<Table>> Scanner::ToTableInternal(
+Future<std::shared_ptr<Table>> SyncScanner::ToTableInternal(
     internal::Executor* cpu_executor) {
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, Scan());
   auto task_group = scan_options_->TaskGroup();
@@ -218,6 +329,7 @@ Future<std::shared_ptr<Table>> Scanner::ToTableInternal(
   /// and the mutex/batches fail out of scope.
   auto state = std::make_shared<TableAssemblyState>();
 
+  // TODO (ARROW-11797) Migrate to using ScanBatches()
   size_t scan_task_id = 0;
   std::vector<Future<>> scan_futures;
   for (auto maybe_scan_task : scan_task_it) {
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 9bd4b10847b..ddd86674d39 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -30,8 +30,11 @@
 #include "arrow/dataset/projector.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
+#include "arrow/io/interfaces.h"
 #include "arrow/memory_pool.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
 
 namespace arrow {
@@ -41,6 +44,8 @@ using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>(
 namespace dataset {
 
 constexpr int64_t kDefaultBatchSize = 1 << 20;
+constexpr int32_t kDefaultBatchReadahead = 32;
+constexpr int32_t kDefaultFragmentReadahead = 8;
 
 struct ARROW_DS_EXPORT ScanOptions {
   // Filter and projection
@@ -67,12 +72,48 @@ struct ARROW_DS_EXPORT ScanOptions {
   // Maximum row count for scanned batches.
   int64_t batch_size = kDefaultBatchSize;
 
+  /// How many batches to read ahead within a file
+  ///
+  /// Set to 0 to disable batch readahead
+  ///
+  /// Note: May not be supported by all formats
+  /// Note: May not be supported by all scanners
+  /// Note: Will be ignored if use_threads is set to false
+  int32_t batch_readahead = kDefaultBatchReadahead;
+
+  /// How many files to read ahead
+  ///
+  /// Set to 0 to disable fragment readahead
+  ///
+  /// Note: May not be enforced by all scanners
+  /// Note: Will be ignored if use_threads is set to false
+  int32_t fragment_readahead = kDefaultFragmentReadahead;
+
   /// A pool from which materialized and scanned arrays will be allocated.
   MemoryPool* pool = arrow::default_memory_pool();
 
-  /// Indicate if the Scanner should make use of a ThreadPool.
+  /// Executor on which to run any CPU tasks
+  ///
+  /// Note: Will be ignored if use_threads is set to false
+  internal::Executor* cpu_executor = internal::GetCpuThreadPool();
+
+  /// IOContext for any IO tasks
+  ///
+  /// Note: The IOContext executor will be ignored if use_threads is set to false
+  io::IOContext io_context;
+
+  /// If true the scanner will scan in parallel
+  ///
+  /// Note: If true, this will use threads from both the cpu_executor and the
+  /// io_context.executor
+  /// Note: This  must be true in order for any readahead to happen
   bool use_threads = false;
 
+  /// If true then an asycnhronous implementation of the scanner will be used.
+  /// This implementation is newer and generally performs better.  However, it
+  /// makes extensive use of threading and is still considered experimental
+  bool use_async = false;
+
   /// Fragment-specific scan options.
   std::shared_ptr<FragmentScanOptions> fragment_scan_options;
 
@@ -140,49 +181,148 @@ ARROW_DS_EXPORT Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
     std::vector<std::shared_ptr<RecordBatch>> batches,
     std::shared_ptr<ScanOptions> options);
 
-/// \brief Scanner is a materialized scan operation with context and options
-/// bound. A scanner is the class that glues ScanTask, Fragment,
-/// and Dataset. In python pseudo code, it performs the following:
+template <typename T>
+struct Enumerated {
+  T value;
+  int index;
+  bool last;
+};
+
+/// \brief Combines a record batch with the fragment that the record batch originated
+/// from
 ///
-///  def Scan():
-///    for fragment in self.dataset.GetFragments(this.options.filter):
-///      for scan_task in fragment.Scan(this.options):
-///        yield scan_task
+/// Knowing the source fragment can be useful for debugging & understanding loaded data
+struct TaggedRecordBatch {
+  std::shared_ptr<RecordBatch> record_batch;
+  std::shared_ptr<Fragment> fragment;
+};
+using TaggedRecordBatchGenerator = std::function<Future<TaggedRecordBatch>()>;
+using TaggedRecordBatchIterator = Iterator<TaggedRecordBatch>;
+
+/// \brief Combines a tagged batch with positional information
+///
+/// This is returned when scanning batches in an unordered fashion.  This information is
+/// needed if you ever want to reassemble the batches in order
+struct EnumeratedRecordBatch {
+  Enumerated<std::shared_ptr<RecordBatch>> record_batch;
+  Enumerated<std::shared_ptr<Fragment>> fragment;
+};
+using EnumeratedRecordBatchGenerator = std::function<Future<EnumeratedRecordBatch>()>;
+using EnumeratedRecordBatchIterator = Iterator<EnumeratedRecordBatch>;
+
+}  // namespace dataset
+
+template <>
+struct IterationTraits<dataset::TaggedRecordBatch> {
+  static dataset::TaggedRecordBatch End() {
+    return dataset::TaggedRecordBatch{NULL, NULL};
+  }
+  static bool IsEnd(const dataset::TaggedRecordBatch& val) {
+    return val.record_batch == NULL;
+  }
+};
+
+template <>
+struct IterationTraits<dataset::EnumeratedRecordBatch> {
+  static dataset::EnumeratedRecordBatch End() {
+    return dataset::EnumeratedRecordBatch{{NULL, -1, false}, {NULL, -1, false}};
+  }
+  static bool IsEnd(const dataset::EnumeratedRecordBatch& val) {
+    return val.fragment.value == NULL;
+  }
+};
+
+namespace dataset {
+/// \brief A scanner glues together several dataset classes to load in data.
+/// The dataset contains a collection of fragments and partitioning rules.
+///
+/// The fragments identify independently loadable units of data (i.e. each fragment has
+/// a potentially unique schema and possibly even format.  It should be possible to read
+/// fragments in parallel if desired).
+///
+/// The fragment's format contains the logic necessary to actually create a task to load
+/// the fragment into memory.  That task may or may not support parallel execution of
+/// its own.
+///
+/// The scanner is then responsible for creating scan tasks from every fragment in the
+/// dataset and (potentially) sequencing the loaded record batches together.
+///
+/// The scanner should not buffer the entire dataset in memory (unless asked) instead
+/// yielding record batches as soon as they are ready to scan.  Various readahead
+/// properties control how much data is allowed to be scanned before pausing to let a
+/// slow consumer catchup.
+///
+/// Today the scanner also handles projection & filtering although that may change in
+/// the future.
 class ARROW_DS_EXPORT Scanner {
  public:
-  Scanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
-      : dataset_(std::move(dataset)), scan_options_(std::move(scan_options)) {}
-
-  Scanner(std::shared_ptr<Fragment> fragment, std::shared_ptr<ScanOptions> scan_options)
-      : fragment_(std::move(fragment)), scan_options_(std::move(scan_options)) {}
+  virtual ~Scanner() = default;
 
   /// \brief The Scan operator returns a stream of ScanTask. The caller is
   /// responsible to dispatch/schedule said tasks. Tasks should be safe to run
   /// in a concurrent fashion and outlive the iterator.
-  Result<ScanTaskIterator> Scan();
-
+  ///
+  /// Note: Not supported by the async scanner
+  /// TODO(ARROW-11797) Deprecate Scan()
+  virtual Result<ScanTaskIterator> Scan();
   /// \brief Convert a Scanner into a Table.
   ///
   /// Use this convenience utility with care. This will serially materialize the
   /// Scan result in memory before creating the Table.
-  Result<std::shared_ptr<Table>> ToTable();
+  virtual Result<std::shared_ptr<Table>> ToTable() = 0;
+  /// \brief Scan the dataset into a stream of record batches.  Each batch is tagged
+  /// with the fragment it originated from.  The batches will arrive in order.  The
+  /// order of fragments is determined by the dataset.
+  ///
+  /// Note: The scanner will perform some readahead but will avoid materializing too
+  /// much in memory (this is goverended by the readahead options and use_threads option).
+  /// If the readahead queue fills up then I/O will pause until the calling thread catches
+  /// up.
+  virtual Result<TaggedRecordBatchIterator> ScanBatches() = 0;
+  /// \brief Scan the dataset into a stream of record batches.  Unlike ScanBatches this
+  /// method may allow record batches to be returned out of order.  This allows for more
+  /// efficient scanning: some fragments may be accessed more quickly than others (e.g.
+  /// may be cached in RAM or just happen to get scheduled earlier by the I/O)
+  ///
+  /// To make up for the out-of-order iteration each batch is further tagged with
+  /// positional information.
+  virtual Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered();
 
-  /// \brief GetFragments returns an iterator over all Fragments in this scan.
-  Result<FragmentIterator> GetFragments();
+  const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
 
-  const std::shared_ptr<Schema>& schema() const {
-    return scan_options_->projected_schema;
-  }
+ protected:
+  explicit Scanner(std::shared_ptr<ScanOptions> scan_options)
+      : scan_options_(std::move(scan_options)) {}
 
-  const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
+  Result<EnumeratedRecordBatchIterator> AddPositioningToInOrderScan(
+      TaggedRecordBatchIterator scan);
+
+  const std::shared_ptr<ScanOptions> scan_options_;
+};
+
+class ARROW_DS_EXPORT SyncScanner : public Scanner {
+ public:
+  SyncScanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
+
+  SyncScanner(std::shared_ptr<Fragment> fragment,
+              std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), fragment_(std::move(fragment)) {}
+
+  Result<TaggedRecordBatchIterator> ScanBatches() override;
+
+  Result<ScanTaskIterator> Scan() override;
+
+  Result<std::shared_ptr<Table>> ToTable() override;
 
  protected:
+  /// \brief GetFragments returns an iterator over all Fragments in this scan.
+  Result<FragmentIterator> GetFragments();
   Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
 
   std::shared_ptr<Dataset> dataset_;
   // TODO(ARROW-8065) remove fragment_ after a Dataset is constuctible from fragments
   std::shared_ptr<Fragment> fragment_;
-  std::shared_ptr<ScanOptions> scan_options_;
 };
 
 /// \brief ScannerBuilder is a factory class to construct a Scanner. It is used
@@ -209,7 +349,8 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///         Schema.
   Status Project(std::vector<std::string> columns);
 
-  /// \brief Set expressions which will be evaluated to produce the materialized columns.
+  /// \brief Set expressions which will be evaluated to produce the materialized
+  /// columns.
   ///
   /// Columns which are not referenced may not be read from fragments.
   ///
@@ -255,6 +396,7 @@ class ARROW_DS_EXPORT ScannerBuilder {
   Result<std::shared_ptr<Scanner>> Finish();
 
   const std::shared_ptr<Schema>& schema() const;
+  const std::shared_ptr<Schema>& projected_schema() const;
 
  private:
   std::shared_ptr<Dataset> dataset_;
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index eec8ed21668..ccae126da47 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -38,7 +38,7 @@ constexpr int64_t kBatchSize = 1024;
 
 class TestScanner : public DatasetFixtureMixin {
  protected:
-  Scanner MakeScanner(std::shared_ptr<RecordBatch> batch) {
+  std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<RecordBatch> batch) {
     std::vector<std::shared_ptr<RecordBatch>> batches{static_cast<size_t>(kNumberBatches),
                                                       batch};
 
@@ -47,17 +47,35 @@ class TestScanner : public DatasetFixtureMixin {
 
     EXPECT_OK_AND_ASSIGN(auto dataset, UnionDataset::Make(batch->schema(), children));
 
-    return Scanner{dataset, options_};
+    ScannerBuilder builder(dataset, options_);
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    return scanner;
   }
 
   void AssertScannerEqualsRepetitionsOf(
-      Scanner scanner, std::shared_ptr<RecordBatch> batch,
+      std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
       const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
     auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
 
     // Verifies that the unified BatchReader is equivalent to flattening all the
     // structures of the scanner, i.e. Scanner[Dataset[ScanTask[RecordBatch]]]
-    AssertScannerEquals(expected.get(), &scanner);
+    AssertScannerEquals(expected.get(), scanner.get());
+  }
+
+  void AssertScanBatchesEqualRepetitionsOf(
+      std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
+      const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
+    auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
+
+    AssertScanBatchesEquals(expected.get(), scanner.get());
+  }
+
+  void AssertScanBatchesUnorderedEqualRepetitionsOf(
+      std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
+      const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
+    auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
+
+    AssertScanBatchesUnorderedEquals(expected.get(), scanner.get());
   }
 };
 
@@ -67,6 +85,18 @@ TEST_F(TestScanner, Scan) {
   AssertScannerEqualsRepetitionsOf(MakeScanner(batch), batch);
 }
 
+TEST_F(TestScanner, ScanBatches) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  AssertScanBatchesEqualRepetitionsOf(MakeScanner(batch), batch);
+}
+
+TEST_F(TestScanner, ScanBatchesUnordered) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch), batch);
+}
+
 TEST_F(TestScanner, ScanWithCappedBatchSize) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
@@ -126,7 +156,7 @@ TEST_F(TestScanner, MaterializeMissingColumn) {
   ScannerBuilder builder{schema_, fragment_missing_f64, options_};
   ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
 
-  AssertScannerEqualsRepetitionsOf(*scanner, batch_with_f64);
+  AssertScannerEqualsRepetitionsOf(scanner, batch_with_f64);
 }
 
 TEST_F(TestScanner, ToTable) {
@@ -141,13 +171,13 @@ TEST_F(TestScanner, ToTable) {
   std::shared_ptr<Table> actual;
 
   options_->use_threads = false;
-  ASSERT_OK_AND_ASSIGN(actual, scanner.ToTable());
+  ASSERT_OK_AND_ASSIGN(actual, scanner->ToTable());
   AssertTablesEqual(*expected, *actual);
 
   // There is no guarantee on the ordering when using multiple threads, but
   // since the RecordBatch is always the same it will pass.
   options_->use_threads = true;
-  ASSERT_OK_AND_ASSIGN(actual, scanner.ToTable());
+  ASSERT_OK_AND_ASSIGN(actual, scanner->ToTable());
   AssertTablesEqual(*expected, *actual);
 }
 
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 72cde368013..826e8b7901a 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -43,10 +43,12 @@
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
+#include "arrow/util/thread_pool.h"
 
 namespace arrow {
 namespace dataset {
@@ -137,6 +139,14 @@ class DatasetFixtureMixin : public ::testing::Test {
     }
   }
 
+  /// \brief Assert the value of the next batch yielded by the reader
+  void AssertBatchEquals(RecordBatchReader* expected, const RecordBatch& batch) {
+    std::shared_ptr<RecordBatch> lhs;
+    ASSERT_OK(expected->ReadNext(&lhs));
+    EXPECT_NE(lhs, nullptr);
+    AssertBatchesEqual(*lhs, batch);
+  }
+
   /// \brief Ensure that record batches found in reader are equals to the
   /// record batches yielded by the data fragment.
   void AssertFragmentEquals(RecordBatchReader* expected, Fragment* fragment,
@@ -186,6 +196,46 @@ class DatasetFixtureMixin : public ::testing::Test {
     }
   }
 
+  /// \brief Ensure that record batches found in reader are equals to the
+  /// record batches yielded by a scanner.
+  void AssertScanBatchesEquals(RecordBatchReader* expected, Scanner* scanner,
+                               bool ensure_drained = true) {
+    ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatches());
+
+    ARROW_EXPECT_OK(it.Visit([&](TaggedRecordBatch batch) -> Status {
+      AssertBatchEquals(expected, *batch.record_batch);
+      return Status::OK();
+    }));
+
+    if (ensure_drained) {
+      EnsureRecordBatchReaderDrained(expected);
+    }
+  }
+
+  /// \brief Ensure that record batches found in reader are equals to the
+  /// record batches yielded by a scanner.  Each fragment in the scanner is
+  /// expected to have a single batch.
+  void AssertScanBatchesUnorderedEquals(RecordBatchReader* expected, Scanner* scanner,
+                                        bool ensure_drained = true) {
+    ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatchesUnordered());
+
+    int fragment_counter = 0;
+    bool saw_last_fragment = false;
+    ARROW_EXPECT_OK(it.Visit([&](EnumeratedRecordBatch batch) -> Status {
+      EXPECT_EQ(0, batch.record_batch.index);
+      EXPECT_EQ(true, batch.record_batch.last);
+      EXPECT_EQ(fragment_counter++, batch.fragment.index);
+      EXPECT_FALSE(saw_last_fragment);
+      saw_last_fragment = batch.fragment.last;
+      AssertBatchEquals(expected, *batch.record_batch.value);
+      return Status::OK();
+    }));
+
+    if (ensure_drained) {
+      EnsureRecordBatchReaderDrained(expected);
+    }
+  }
+
   /// \brief Ensure that record batches found in reader are equals to the
   /// record batches yielded by a dataset.
   void AssertDatasetEquals(RecordBatchReader* expected, Dataset* dataset,
@@ -584,7 +634,8 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
 
   void DoWrite(std::shared_ptr<Partitioning> desired_partitioning) {
     write_options_.partitioning = desired_partitioning;
-    auto scanner = std::make_shared<Scanner>(dataset_, scan_options_);
+    auto scanner_builder = ScannerBuilder(dataset_, scan_options_);
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder.Finish());
     ASSERT_OK(FileSystemDataset::Write(write_options_, scanner));
 
     // re-discover the written dataset
diff --git a/cpp/src/jni/dataset/jni_wrapper.cc b/cpp/src/jni/dataset/jni_wrapper.cc
index fe09dc44eca..196bf2b5c05 100644
--- a/cpp/src/jni/dataset/jni_wrapper.cc
+++ b/cpp/src/jni/dataset/jni_wrapper.cc
@@ -475,7 +475,8 @@ Java_org_apache_arrow_dataset_jni_JniWrapper_getSchemaFromScanner(JNIEnv* env, j
   std::shared_ptr<arrow::Schema> schema =
       RetrieveNativeInstance<DisposableScannerAdaptor>(scanner_id)
           ->GetScanner()
-          ->schema();
+          ->options()
+          ->projected_schema;
   return JniGetOrThrow(ToSchemaByteArray(env, schema));
   JNI_METHOD_END(nullptr)
 }
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 3320b472e1b..a6cfd711558 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2791,14 +2791,6 @@ cdef class Scanner(_Weakrefable):
 
         return pyarrow_wrap_table(GetResultValue(result))
 
-    def get_fragments(self):
-        """Returns an iterator over the fragments in this scan.
-        """
-        cdef CFragmentIterator c_fragments = move(GetResultValue(
-            self.scanner.GetFragments()))
-        for maybe_fragment in c_fragments:
-            yield Fragment.wrap(GetResultValue(move(maybe_fragment)))
-
 
 def _get_partition_keys(Expression partition_expression):
     """
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index c8fdb7ae311..dc7ccd693a2 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -457,7 +457,7 @@ cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner)
 // [[dataset::export]]
 std::shared_ptr<arrow::Schema> dataset___Scanner__schema(
     const std::shared_ptr<ds::Scanner>& sc) {
-  return sc->schema();
+  return sc->options()->projected_schema;
 }
 
 // [[dataset::export]]

From 57d430e5f7bb0782bcfdbfda6d45068672982b8a Mon Sep 17 00:00:00 2001
From: Sathis Kumar <sathis.kumar@udemy.com>
Date: Tue, 13 Apr 2021 07:08:08 -0600
Subject: [PATCH 009/719] ARROW-12332: [Rust] [Ballista] Add simple api server
 in scheduler

Implements GET /executors. We can additional endpoints going forward.

Closes #9987 from msathis/master

Authored-by: Sathis Kumar <sathis.kumar@udemy.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 rust/ballista/rust/core/Cargo.toml            |  1 +
 .../rust/core/src/serde/scheduler/mod.rs      |  3 +-
 rust/ballista/rust/scheduler/Cargo.toml       |  7 +-
 rust/ballista/rust/scheduler/README.md        | 11 +++
 .../rust/scheduler/src/api/handlers.rs        | 40 +++++++++
 rust/ballista/rust/scheduler/src/api/mod.rs   | 85 +++++++++++++++++++
 rust/ballista/rust/scheduler/src/lib.rs       |  2 +
 rust/ballista/rust/scheduler/src/main.rs      | 43 ++++++++--
 8 files changed, 183 insertions(+), 9 deletions(-)
 create mode 100644 rust/ballista/rust/scheduler/src/api/handlers.rs
 create mode 100644 rust/ballista/rust/scheduler/src/api/mod.rs

diff --git a/rust/ballista/rust/core/Cargo.toml b/rust/ballista/rust/core/Cargo.toml
index b6301918a1f..f5f6f8574b3 100644
--- a/rust/ballista/rust/core/Cargo.toml
+++ b/rust/ballista/rust/core/Cargo.toml
@@ -34,6 +34,7 @@ async-trait = "0.1.36"
 futures = "0.3"
 log = "0.4"
 prost = "0.7"
+serde = {version = "1", features = ["derive"]}
 sqlparser = "0.8"
 tokio = "1.0"
 tonic = "0.4"
diff --git a/rust/ballista/rust/core/src/serde/scheduler/mod.rs b/rust/ballista/rust/core/src/serde/scheduler/mod.rs
index efee82dbdf3..81d8722d7f4 100644
--- a/rust/ballista/rust/core/src/serde/scheduler/mod.rs
+++ b/rust/ballista/rust/core/src/serde/scheduler/mod.rs
@@ -23,6 +23,7 @@ use arrow::array::{
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::logical_plan::LogicalPlan;
 use datafusion::physical_plan::ExecutionPlan;
+use serde::Serialize;
 use uuid::Uuid;
 
 use super::protobuf;
@@ -67,7 +68,7 @@ pub struct PartitionLocation {
 }
 
 /// Meta-data for an executor, used when fetching shuffle partitions from other executors
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
 pub struct ExecutorMeta {
     pub id: String,
     pub host: String,
diff --git a/rust/ballista/rust/scheduler/Cargo.toml b/rust/ballista/rust/scheduler/Cargo.toml
index 525e28a63cc..b0213d37bda 100644
--- a/rust/ballista/rust/scheduler/Cargo.toml
+++ b/rust/ballista/rust/scheduler/Cargo.toml
@@ -38,14 +38,19 @@ configure_me = "0.4.0"
 env_logger = "0.8"
 etcd-client = { version = "0.6", optional = true }
 futures = "0.3"
+http = "0.2"
+http-body = "0.4"
+hyper = "0.14.4"
 log = "0.4"
 parse_arg = "0.1.3"
 prost = "0.7"
 rand = "0.8"
 serde = {version = "1", features = ["derive"]}
 sled_package = { package = "sled", version = "0.34", optional = true }
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
+tokio = { version = "1.0", features = ["full"] }
 tonic = "0.4"
+tower = { version = "0.4" }
+warp = "0.3"
 
 arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
 datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
diff --git a/rust/ballista/rust/scheduler/README.md b/rust/ballista/rust/scheduler/README.md
index facc6d17698..c2cc090bd67 100644
--- a/rust/ballista/rust/scheduler/README.md
+++ b/rust/ballista/rust/scheduler/README.md
@@ -30,3 +30,14 @@ $ RUST_LOG=info cargo run --release
 ```
 
 By default, the scheduler will bind to `localhost` and listen on port `50051`.
+
+## Connecting to Scheduler
+Scheduler supports REST model also using content negotiation. 
+For e.x if you want to get list of executors connected to the scheduler, 
+you can do (assuming you use default config)
+
+```bash
+curl --request GET \
+  --url http://localhost:50050/executors \
+  --header 'Accept: application/json'
+```
diff --git a/rust/ballista/rust/scheduler/src/api/handlers.rs b/rust/ballista/rust/scheduler/src/api/handlers.rs
new file mode 100644
index 00000000000..c3450215007
--- /dev/null
+++ b/rust/ballista/rust/scheduler/src/api/handlers.rs
@@ -0,0 +1,40 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::SchedulerServer;
+use ballista_core::serde::protobuf::{
+    scheduler_grpc_server::SchedulerGrpc, ExecutorMetadata, GetExecutorMetadataParams,
+    GetExecutorMetadataResult,
+};
+use ballista_core::serde::scheduler::ExecutorMeta;
+use tonic::{Request, Response};
+use warp::Rejection;
+
+pub(crate) async fn list_executors_data(
+    data_server: SchedulerServer,
+) -> Result<impl warp::Reply, Rejection> {
+    let data: Result<Response<GetExecutorMetadataResult>, tonic::Status> = data_server
+        .get_executors_metadata(Request::new(GetExecutorMetadataParams {}))
+        .await;
+    let result = data.unwrap();
+    let res: &GetExecutorMetadataResult = result.get_ref();
+    let vec: &Vec<ExecutorMetadata> = &res.metadata;
+    let metadata: Vec<ExecutorMeta> = vec
+        .iter()
+        .map(|v: &ExecutorMetadata| ExecutorMeta {
+            host: v.host.clone(),
+            port: v.port as u16,
+            id: v.id.clone(),
+        })
+        .collect();
+    Ok(warp::reply::json(&metadata))
+}
diff --git a/rust/ballista/rust/scheduler/src/api/mod.rs b/rust/ballista/rust/scheduler/src/api/mod.rs
new file mode 100644
index 00000000000..29c5cb1af67
--- /dev/null
+++ b/rust/ballista/rust/scheduler/src/api/mod.rs
@@ -0,0 +1,85 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod handlers;
+
+use crate::SchedulerServer;
+use anyhow::Result;
+use std::{
+    pin::Pin,
+    task::{Context as TaskContext, Poll},
+};
+use warp::filters::BoxedFilter;
+use warp::{Buf, Filter, Reply};
+
+pub enum EitherBody<A, B> {
+    Left(A),
+    Right(B),
+}
+
+pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
+pub type HttpBody = dyn http_body::Body<Data = dyn Buf, Error = Error> + 'static;
+
+impl<A, B> http_body::Body for EitherBody<A, B>
+    where
+        A: http_body::Body + Send + Unpin,
+        B: http_body::Body<Data = A::Data> + Send + Unpin,
+        A::Error: Into<Error>,
+        B::Error: Into<Error>,
+{
+    type Data = A::Data;
+    type Error = Error;
+
+    fn poll_data(
+        self: Pin<&mut Self>,
+        cx: &mut TaskContext<'_>,
+    ) -> Poll<Option<Result<Self::Data, Self::Error>>> {
+        match self.get_mut() {
+            EitherBody::Left(b) => Pin::new(b).poll_data(cx).map(map_option_err),
+            EitherBody::Right(b) => Pin::new(b).poll_data(cx).map(map_option_err),
+        }
+    }
+
+    fn poll_trailers(
+        self: Pin<&mut Self>,
+        cx: &mut TaskContext<'_>,
+    ) -> Poll<Result<Option<http::HeaderMap>, Self::Error>> {
+        match self.get_mut() {
+            EitherBody::Left(b) => Pin::new(b).poll_trailers(cx).map_err(Into::into),
+            EitherBody::Right(b) => Pin::new(b).poll_trailers(cx).map_err(Into::into),
+        }
+    }
+
+    fn is_end_stream(&self) -> bool {
+        match self {
+            EitherBody::Left(b) => b.is_end_stream(),
+            EitherBody::Right(b) => b.is_end_stream(),
+        }
+    }
+}
+
+fn map_option_err<T, U: Into<Error>>(err: Option<Result<T, U>>) -> Option<Result<T, Error>> {
+    err.map(|e| e.map_err(Into::into))
+}
+
+fn with_data_server(
+    db: SchedulerServer,
+) -> impl Filter<Extract = (SchedulerServer,), Error = std::convert::Infallible> + Clone {
+    warp::any().map(move || db.clone())
+}
+
+pub fn get_routes(scheduler_server: SchedulerServer) -> BoxedFilter<(impl Reply,)> {
+    let routes = warp::path("executors")
+        .and(with_data_server(scheduler_server))
+        .and_then(handlers::list_executors_data);
+    routes.boxed()
+}
diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
index 8ad2cc7a448..6df6c9ac57c 100644
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ b/rust/ballista/rust/scheduler/src/lib.rs
@@ -17,6 +17,7 @@
 
 //! Support for distributed schedulers, such as Kubernetes
 
+pub mod api;
 pub mod planner;
 pub mod state;
 
@@ -68,6 +69,7 @@ use self::state::{ConfigBackendClient, SchedulerState};
 use datafusion::physical_plan::parquet::ParquetExec;
 use std::time::Instant;
 
+#[derive(Clone)]
 pub struct SchedulerServer {
     state: SchedulerState,
     namespace: String,
diff --git a/rust/ballista/rust/scheduler/src/main.rs b/rust/ballista/rust/scheduler/src/main.rs
index 785ffb47b17..c166fdc388d 100644
--- a/rust/ballista/rust/scheduler/src/main.rs
+++ b/rust/ballista/rust/scheduler/src/main.rs
@@ -17,9 +17,14 @@
 
 //! Ballista Rust scheduler binary.
 
+use anyhow::{Context, Result};
+use futures::future::{self, Either, TryFutureExt};
+use hyper::{service::make_service_fn, Server};
+use std::convert::Infallible;
 use std::{net::SocketAddr, sync::Arc};
+use tonic::transport::Server as TonicServer;
+use tower::Service;
 
-use anyhow::{Context, Result};
 use ballista_core::BALLISTA_VERSION;
 use ballista_core::{
     print_version, serde::protobuf::scheduler_grpc_server::SchedulerGrpcServer,
@@ -29,9 +34,9 @@ use ballista_scheduler::state::EtcdClient;
 #[cfg(feature = "sled")]
 use ballista_scheduler::state::StandaloneClient;
 use ballista_scheduler::{state::ConfigBackendClient, ConfigBackend, SchedulerServer};
+use ballista_scheduler::api::{get_routes, EitherBody, Error};
 
 use log::info;
-use tonic::transport::Server;
 
 #[macro_use]
 extern crate configure_me;
@@ -56,11 +61,35 @@ async fn start_server(
         "Ballista v{} Scheduler listening on {:?}",
         BALLISTA_VERSION, addr
     );
-    let server =
-        SchedulerGrpcServer::new(SchedulerServer::new(config_backend, namespace));
-    Ok(Server::builder()
-        .add_service(server)
-        .serve(addr)
+    Ok(Server::bind(&addr)
+        .serve(make_service_fn(move |_| {
+            let scheduler_server = SchedulerServer::new(config_backend.clone(), namespace.clone());
+            let scheduler_grpc_server = SchedulerGrpcServer::new(scheduler_server.clone());
+
+            let mut tonic = TonicServer::builder()
+                .add_service(scheduler_grpc_server)
+                .into_service();
+            let mut warp = warp::service(get_routes(scheduler_server));
+
+            future::ok::<_, Infallible>(tower::service_fn(
+                move |req: hyper::Request<hyper::Body>| {
+                    let header = req.headers().get(hyper::header::ACCEPT);
+                    if header.is_some() && header.unwrap().eq("application/json") {
+                        return Either::Left(
+                            warp.call(req)
+                                .map_ok(|res| res.map(EitherBody::Left))
+                                .map_err(Error::from),
+                        );
+                    }
+                    Either::Right(
+                        tonic
+                            .call(req)
+                            .map_ok(|res| res.map(EitherBody::Right))
+                            .map_err(Error::from),
+                    )
+                },
+            ))
+        }))
         .await
         .context("Could not start grpc server")?)
 }

From a49d6e235f7e5b7178a985330320980659033f36 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 13 Apr 2021 11:50:39 -0400
Subject: [PATCH 010/719] ARROW-12248: [C++] Avoid looking up
 ARROW_DEFAULT_MEMORY_POOL environment variable too late

In some situations (e.g. R bindings), default_memory_pool() may be called before the Arrow library's global variables were fully initialized.

Closes #9930 from pitrou/ARROW-12248-memory-pool-env-lookup

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/memory_pool.cc | 74 ++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 33 deletions(-)

diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 58a80232fdf..2d19b3d1962 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -105,8 +105,14 @@ struct SupportedBackend {
   MemoryPoolBackend backend;
 };
 
-std::vector<SupportedBackend> SupportedBackends() {
-  std::vector<SupportedBackend> backends = {
+// See ARROW-12248 for why we use static in-function singletons rather than
+// global constants below (in SupportedBackends() and UserSelectedBackend()).
+// In some contexts (especially R bindings) `default_memory_pool()` may be
+// called before all globals are initialized, and then the ARROW_DEFAULT_MEMORY_POOL
+// environment variable would be ignored.
+
+const std::vector<SupportedBackend>& SupportedBackends() {
+  static std::vector<SupportedBackend> backends = {
 #ifdef ARROW_JEMALLOC
       {"jemalloc", MemoryPoolBackend::Jemalloc},
 #endif
@@ -117,42 +123,44 @@ std::vector<SupportedBackend> SupportedBackends() {
   return backends;
 }
 
-const std::vector<SupportedBackend> supported_backends = SupportedBackends();
-
+// Return the MemoryPoolBackend selected by the user through the
+// ARROW_DEFAULT_MEMORY_POOL environment variable, if any.
 util::optional<MemoryPoolBackend> UserSelectedBackend() {
-  auto unsupported_backend = [](const std::string& name) {
-    std::vector<std::string> supported;
-    for (const auto backend : supported_backends) {
-      supported.push_back(std::string("'") + backend.name + "'");
+  static auto user_selected_backend = []() -> util::optional<MemoryPoolBackend> {
+    auto unsupported_backend = [](const std::string& name) {
+      std::vector<std::string> supported;
+      for (const auto backend : SupportedBackends()) {
+        supported.push_back(std::string("'") + backend.name + "'");
+      }
+      ARROW_LOG(WARNING) << "Unsupported backend '" << name << "' specified in "
+                         << kDefaultBackendEnvVar << " (supported backends are "
+                         << internal::JoinStrings(supported, ", ") << ")";
+    };
+
+    auto maybe_name = internal::GetEnvVar(kDefaultBackendEnvVar);
+    if (!maybe_name.ok()) {
+      return {};
     }
-    ARROW_LOG(WARNING) << "Unsupported backend '" << name << "' specified in "
-                       << kDefaultBackendEnvVar << " (supported backends are "
-                       << internal::JoinStrings(supported, ", ") << ")";
-  };
-
-  auto maybe_name = internal::GetEnvVar(kDefaultBackendEnvVar);
-  if (!maybe_name.ok()) {
-    return {};
-  }
-  const auto name = *std::move(maybe_name);
-  if (name.empty()) {
-    // An empty environment variable is considered missing
+    const auto name = *std::move(maybe_name);
+    if (name.empty()) {
+      // An empty environment variable is considered missing
+      return {};
+    }
+    const auto found = std::find_if(
+        SupportedBackends().begin(), SupportedBackends().end(),
+        [&](const SupportedBackend& backend) { return name == backend.name; });
+    if (found != SupportedBackends().end()) {
+      return found->backend;
+    }
+    unsupported_backend(name);
     return {};
-  }
-  const auto found =
-      std::find_if(supported_backends.begin(), supported_backends.end(),
-                   [&](const SupportedBackend& backend) { return name == backend.name; });
-  if (found != supported_backends.end()) {
-    return found->backend;
-  }
-  unsupported_backend(name);
-  return {};
-}
+  }();
 
-const util::optional<MemoryPoolBackend> user_selected_backend = UserSelectedBackend();
+  return user_selected_backend;
+}
 
 MemoryPoolBackend DefaultBackend() {
-  auto backend = user_selected_backend;
+  auto backend = UserSelectedBackend();
   if (backend.has_value()) {
     return backend.value();
   }
@@ -634,7 +642,7 @@ std::string ProxyMemoryPool::backend_name() const { return impl_->backend_name()
 
 std::vector<std::string> SupportedMemoryBackendNames() {
   std::vector<std::string> supported;
-  for (const auto backend : supported_backends) {
+  for (const auto backend : SupportedBackends()) {
     supported.push_back(backend.name);
   }
   return supported;

From d7558bff24993ec69a8c7810734425e821c601b7 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 13 Apr 2021 18:14:32 +0200
Subject: [PATCH 011/719] ARROW-11839: [C++] Use xsimd for generation of
 accelerated bit-unpacking

The custom per-ISA code generation scripts (AVX2, AVX512) are replaced with a single code generation script that outputs xsimd code for any SIMD bit-width, in an ISA-agnostic way.

Also add a Neon optimized version of bit-unpacking that leverages the generated code for 128-bit SIMD.

Closes #9614 from pitrou/ARROW-11839-xsimd-bpacking

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/cmake_modules/SetupCxxFlags.cmake         |    1 +
 cpp/src/arrow/CMakeLists.txt                  |    3 +
 cpp/src/arrow/util/bpacking.cc                |    7 +
 cpp/src/arrow/util/bpacking_avx2.cc           |  114 +-
 cpp/src/arrow/util/bpacking_avx2_codegen.py   |  203 --
 cpp/src/arrow/util/bpacking_avx512.cc         |  114 +-
 cpp/src/arrow/util/bpacking_avx512_codegen.py |  186 --
 cpp/src/arrow/util/bpacking_neon.cc           |   31 +
 cpp/src/arrow/util/bpacking_neon.h            |   28 +
 .../arrow/util/bpacking_simd128_generated.h   | 2138 +++++++++++++++++
 .../arrow/util/bpacking_simd256_generated.h   | 1270 ++++++++++
 .../arrow/util/bpacking_simd512_generated.h   |  836 +++++++
 cpp/src/arrow/util/bpacking_simd_codegen.py   |  209 ++
 cpp/src/arrow/util/bpacking_simd_internal.h   |  138 ++
 cpp/thirdparty/versions.txt                   |    3 +-
 15 files changed, 4670 insertions(+), 611 deletions(-)
 delete mode 100644 cpp/src/arrow/util/bpacking_avx2_codegen.py
 delete mode 100644 cpp/src/arrow/util/bpacking_avx512_codegen.py
 create mode 100644 cpp/src/arrow/util/bpacking_neon.cc
 create mode 100644 cpp/src/arrow/util/bpacking_neon.h
 create mode 100644 cpp/src/arrow/util/bpacking_simd128_generated.h
 create mode 100644 cpp/src/arrow/util/bpacking_simd256_generated.h
 create mode 100644 cpp/src/arrow/util/bpacking_simd512_generated.h
 create mode 100644 cpp/src/arrow/util/bpacking_simd_codegen.py
 create mode 100644 cpp/src/arrow/util/bpacking_simd_internal.h

diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 9f68c560472..6e259559e42 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -452,6 +452,7 @@ if(ARROW_CPU_FLAG STREQUAL "armv8")
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${ARROW_ARMV8_ARCH_FLAG}")
 
   if(NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
+    set(ARROW_HAVE_NEON ON)
     add_definitions(-DARROW_HAVE_NEON)
   endif()
 
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index df72dcc5b6b..3623283f355 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -228,6 +228,9 @@ if(ARROW_HAVE_RUNTIME_AVX512)
   set_source_files_properties(util/bpacking_avx512.cc PROPERTIES COMPILE_FLAGS
                               ${ARROW_AVX512_FLAG})
 endif()
+if(ARROW_HAVE_NEON)
+  list(APPEND ARROW_SRCS util/bpacking_neon.cc)
+endif()
 
 if(APPLE)
   list(APPEND ARROW_SRCS vendored/datetime/ios.mm)
diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc
index 02634755bd0..2e658fd108e 100644
--- a/cpp/src/arrow/util/bpacking.cc
+++ b/cpp/src/arrow/util/bpacking.cc
@@ -27,6 +27,9 @@
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
 #include "arrow/util/bpacking_avx512.h"
 #endif
+#if defined(ARROW_HAVE_NEON)
+#include "arrow/util/bpacking_neon.h"
+#endif
 
 namespace arrow {
 namespace internal {
@@ -163,8 +166,12 @@ struct Unpack32DynamicFunction {
 }  // namespace
 
 int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
+#if defined(ARROW_HAVE_NEON)
+  return unpack32_neon(in, out, batch_size, num_bits);
+#else
   static DynamicDispatch<Unpack32DynamicFunction> dispatch;
   return dispatch.func(in, out, batch_size, num_bits);
+#endif
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/bpacking_avx2.cc b/cpp/src/arrow/util/bpacking_avx2.cc
index 63b914b578a..5a3a7bad3d3 100644
--- a/cpp/src/arrow/util/bpacking_avx2.cc
+++ b/cpp/src/arrow/util/bpacking_avx2.cc
@@ -16,121 +16,15 @@
 // under the License.
 
 #include "arrow/util/bpacking_avx2.h"
-#include "arrow/util/bpacking_avx2_generated.h"
-#include "arrow/util/logging.h"
+#include "arrow/util/bpacking_simd256_generated.h"
+#include "arrow/util/bpacking_simd_internal.h"
 
 namespace arrow {
 namespace internal {
 
 int unpack32_avx2(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
-  batch_size = batch_size / 32 * 32;
-  int num_loops = batch_size / 32;
-
-  switch (num_bits) {
-    case 0:
-      for (int i = 0; i < num_loops; ++i) in = unpack0_32_avx2(in, out + i * 32);
-      break;
-    case 1:
-      for (int i = 0; i < num_loops; ++i) in = unpack1_32_avx2(in, out + i * 32);
-      break;
-    case 2:
-      for (int i = 0; i < num_loops; ++i) in = unpack2_32_avx2(in, out + i * 32);
-      break;
-    case 3:
-      for (int i = 0; i < num_loops; ++i) in = unpack3_32_avx2(in, out + i * 32);
-      break;
-    case 4:
-      for (int i = 0; i < num_loops; ++i) in = unpack4_32_avx2(in, out + i * 32);
-      break;
-    case 5:
-      for (int i = 0; i < num_loops; ++i) in = unpack5_32_avx2(in, out + i * 32);
-      break;
-    case 6:
-      for (int i = 0; i < num_loops; ++i) in = unpack6_32_avx2(in, out + i * 32);
-      break;
-    case 7:
-      for (int i = 0; i < num_loops; ++i) in = unpack7_32_avx2(in, out + i * 32);
-      break;
-    case 8:
-      for (int i = 0; i < num_loops; ++i) in = unpack8_32_avx2(in, out + i * 32);
-      break;
-    case 9:
-      for (int i = 0; i < num_loops; ++i) in = unpack9_32_avx2(in, out + i * 32);
-      break;
-    case 10:
-      for (int i = 0; i < num_loops; ++i) in = unpack10_32_avx2(in, out + i * 32);
-      break;
-    case 11:
-      for (int i = 0; i < num_loops; ++i) in = unpack11_32_avx2(in, out + i * 32);
-      break;
-    case 12:
-      for (int i = 0; i < num_loops; ++i) in = unpack12_32_avx2(in, out + i * 32);
-      break;
-    case 13:
-      for (int i = 0; i < num_loops; ++i) in = unpack13_32_avx2(in, out + i * 32);
-      break;
-    case 14:
-      for (int i = 0; i < num_loops; ++i) in = unpack14_32_avx2(in, out + i * 32);
-      break;
-    case 15:
-      for (int i = 0; i < num_loops; ++i) in = unpack15_32_avx2(in, out + i * 32);
-      break;
-    case 16:
-      for (int i = 0; i < num_loops; ++i) in = unpack16_32_avx2(in, out + i * 32);
-      break;
-    case 17:
-      for (int i = 0; i < num_loops; ++i) in = unpack17_32_avx2(in, out + i * 32);
-      break;
-    case 18:
-      for (int i = 0; i < num_loops; ++i) in = unpack18_32_avx2(in, out + i * 32);
-      break;
-    case 19:
-      for (int i = 0; i < num_loops; ++i) in = unpack19_32_avx2(in, out + i * 32);
-      break;
-    case 20:
-      for (int i = 0; i < num_loops; ++i) in = unpack20_32_avx2(in, out + i * 32);
-      break;
-    case 21:
-      for (int i = 0; i < num_loops; ++i) in = unpack21_32_avx2(in, out + i * 32);
-      break;
-    case 22:
-      for (int i = 0; i < num_loops; ++i) in = unpack22_32_avx2(in, out + i * 32);
-      break;
-    case 23:
-      for (int i = 0; i < num_loops; ++i) in = unpack23_32_avx2(in, out + i * 32);
-      break;
-    case 24:
-      for (int i = 0; i < num_loops; ++i) in = unpack24_32_avx2(in, out + i * 32);
-      break;
-    case 25:
-      for (int i = 0; i < num_loops; ++i) in = unpack25_32_avx2(in, out + i * 32);
-      break;
-    case 26:
-      for (int i = 0; i < num_loops; ++i) in = unpack26_32_avx2(in, out + i * 32);
-      break;
-    case 27:
-      for (int i = 0; i < num_loops; ++i) in = unpack27_32_avx2(in, out + i * 32);
-      break;
-    case 28:
-      for (int i = 0; i < num_loops; ++i) in = unpack28_32_avx2(in, out + i * 32);
-      break;
-    case 29:
-      for (int i = 0; i < num_loops; ++i) in = unpack29_32_avx2(in, out + i * 32);
-      break;
-    case 30:
-      for (int i = 0; i < num_loops; ++i) in = unpack30_32_avx2(in, out + i * 32);
-      break;
-    case 31:
-      for (int i = 0; i < num_loops; ++i) in = unpack31_32_avx2(in, out + i * 32);
-      break;
-    case 32:
-      for (int i = 0; i < num_loops; ++i) in = unpack32_32_avx2(in, out + i * 32);
-      break;
-    default:
-      DCHECK(false) << "Unsupported num_bits";
-  }
-
-  return batch_size;
+  return unpack32_specialized<UnpackBits256<DispatchLevel::AVX2>>(in, out, batch_size,
+                                                                  num_bits);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/bpacking_avx2_codegen.py b/cpp/src/arrow/util/bpacking_avx2_codegen.py
deleted file mode 100644
index e60aed86a29..00000000000
--- a/cpp/src/arrow/util/bpacking_avx2_codegen.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Usage: python bpacking_avx2_codegen.py > bpacking_avx2_generated.h
-
-
-def print_unpack_bit_func(bit):
-    shift = 0
-    shifts = []
-    in_index = 0
-    inls = []
-    mask = (1 << bit) - 1
-    bracket = "{"
-
-    print(
-        f"inline static const uint32_t* unpack{bit}_32_avx2(const uint32_t* in, uint32_t* out) {bracket}")
-    print("  using ::arrow::util::SafeLoad;")
-    print("  uint32_t mask = 0x%x;" % mask)
-    print("  __m256i reg_shifts, reg_inls, reg_masks;")
-    print("  __m256i results;")
-
-    print("")
-    for i in range(32):
-        if shift + bit == 32:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            in_index += 1
-            shift = 0
-        elif shift + bit > 32:  # cross the boundary
-            inls.append(
-                f"SafeLoad(in + {in_index}) >> {shift} | SafeLoad(in + {in_index + 1}) << {32 - shift}")
-            in_index += 1
-            shift = bit - (32 - shift)
-            shifts.append(0)  # zero shift
-        else:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            shift += bit
-
-    print("  reg_masks = _mm256_set1_epi32(mask);")
-    print("")
-
-    print("  // shift the first 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[7]}, {shifts[6]}, {shifts[5]}, {shifts[4]},")
-    print(
-        f"                                {shifts[3]}, {shifts[2]}, {shifts[1]}, {shifts[0]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[7]}, {inls[6]},")
-    print(f"                              {inls[5]}, {inls[4]},")
-    print(f"                              {inls[3]}, {inls[2]},")
-    print(f"                              {inls[1]}, {inls[0]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-    print("")
-
-    print("  // shift the second 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[15]}, {shifts[14]}, {shifts[13]}, {shifts[12]},")
-    print(
-        f"                                {shifts[11]}, {shifts[10]}, {shifts[9]}, {shifts[8]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[15]}, {inls[14]},")
-    print(f"                              {inls[13]}, {inls[12]},")
-    print(f"                              {inls[11]}, {inls[10]},")
-    print(f"                              {inls[9]}, {inls[8]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-    print("")
-
-    print("  // shift the third 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[23]}, {shifts[22]}, {shifts[21]}, {shifts[20]},")
-    print(
-        f"                                {shifts[19]}, {shifts[18]}, {shifts[17]}, {shifts[16]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[23]}, {inls[22]},")
-    print(f"                              {inls[21]}, {inls[20]},")
-    print(f"                              {inls[19]}, {inls[18]},")
-    print(f"                              {inls[17]}, {inls[16]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-    print("")
-
-    print("  // shift the last 8 outs")
-    print(
-        f"  reg_shifts = _mm256_set_epi32({shifts[31]}, {shifts[30]}, {shifts[29]}, {shifts[28]},")
-    print(
-        f"                                {shifts[27]}, {shifts[26]}, {shifts[25]}, {shifts[24]});")
-    print(f"  reg_inls = _mm256_set_epi32({inls[31]}, {inls[30]},")
-    print(f"                              {inls[29]}, {inls[28]},")
-    print(f"                              {inls[27]}, {inls[26]},")
-    print(f"                              {inls[25]}, {inls[24]});")
-    print(
-        "  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);")
-    print("  out += 8;")
-
-    print("")
-    print(f"  in += {bit};")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit0_func():
-    print(
-        "inline static const uint32_t* unpack0_32_avx2(const uint32_t* in, uint32_t* out) {")
-    print("  memset(out, 0x0, 32 * sizeof(*out));")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit32_func():
-    print(
-        "inline static const uint32_t* unpack32_32_avx2(const uint32_t* in, uint32_t* out) {")
-    print("  memcpy(out, in, 32 * sizeof(*out));")
-    print("  in += 32;")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_copyright():
-    print(
-        """// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.""")
-
-
-def print_note():
-    print("//")
-    print("// Automatically generated file; DO NOT EDIT.")
-
-
-def main():
-    print_copyright()
-    print_note()
-    print("")
-    print("#pragma once")
-    print("")
-    print("#include <stdint.h>")
-    print("#include <string.h>")
-    print("")
-    print("#ifdef _MSC_VER")
-    print("#include <intrin.h>")
-    print("#else")
-    print("#include <immintrin.h>")
-    print("#endif")
-    print("")
-    print('#include "arrow/util/ubsan.h"')
-    print("")
-    print("namespace arrow {")
-    print("namespace internal {")
-    print("")
-    print_unpack_bit0_func()
-    print("")
-    for i in range(1, 32):
-        print_unpack_bit_func(i)
-        print("")
-    print_unpack_bit32_func()
-    print("")
-    print("}  // namespace internal")
-    print("}  // namespace arrow")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/cpp/src/arrow/util/bpacking_avx512.cc b/cpp/src/arrow/util/bpacking_avx512.cc
index 98eb4d325af..08ccd3fcd4d 100644
--- a/cpp/src/arrow/util/bpacking_avx512.cc
+++ b/cpp/src/arrow/util/bpacking_avx512.cc
@@ -16,121 +16,15 @@
 // under the License.
 
 #include "arrow/util/bpacking_avx512.h"
-#include "arrow/util/bpacking_avx512_generated.h"
-#include "arrow/util/logging.h"
+#include "arrow/util/bpacking_simd512_generated.h"
+#include "arrow/util/bpacking_simd_internal.h"
 
 namespace arrow {
 namespace internal {
 
 int unpack32_avx512(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
-  batch_size = batch_size / 32 * 32;
-  int num_loops = batch_size / 32;
-
-  switch (num_bits) {
-    case 0:
-      for (int i = 0; i < num_loops; ++i) in = unpack0_32_avx512(in, out + i * 32);
-      break;
-    case 1:
-      for (int i = 0; i < num_loops; ++i) in = unpack1_32_avx512(in, out + i * 32);
-      break;
-    case 2:
-      for (int i = 0; i < num_loops; ++i) in = unpack2_32_avx512(in, out + i * 32);
-      break;
-    case 3:
-      for (int i = 0; i < num_loops; ++i) in = unpack3_32_avx512(in, out + i * 32);
-      break;
-    case 4:
-      for (int i = 0; i < num_loops; ++i) in = unpack4_32_avx512(in, out + i * 32);
-      break;
-    case 5:
-      for (int i = 0; i < num_loops; ++i) in = unpack5_32_avx512(in, out + i * 32);
-      break;
-    case 6:
-      for (int i = 0; i < num_loops; ++i) in = unpack6_32_avx512(in, out + i * 32);
-      break;
-    case 7:
-      for (int i = 0; i < num_loops; ++i) in = unpack7_32_avx512(in, out + i * 32);
-      break;
-    case 8:
-      for (int i = 0; i < num_loops; ++i) in = unpack8_32_avx512(in, out + i * 32);
-      break;
-    case 9:
-      for (int i = 0; i < num_loops; ++i) in = unpack9_32_avx512(in, out + i * 32);
-      break;
-    case 10:
-      for (int i = 0; i < num_loops; ++i) in = unpack10_32_avx512(in, out + i * 32);
-      break;
-    case 11:
-      for (int i = 0; i < num_loops; ++i) in = unpack11_32_avx512(in, out + i * 32);
-      break;
-    case 12:
-      for (int i = 0; i < num_loops; ++i) in = unpack12_32_avx512(in, out + i * 32);
-      break;
-    case 13:
-      for (int i = 0; i < num_loops; ++i) in = unpack13_32_avx512(in, out + i * 32);
-      break;
-    case 14:
-      for (int i = 0; i < num_loops; ++i) in = unpack14_32_avx512(in, out + i * 32);
-      break;
-    case 15:
-      for (int i = 0; i < num_loops; ++i) in = unpack15_32_avx512(in, out + i * 32);
-      break;
-    case 16:
-      for (int i = 0; i < num_loops; ++i) in = unpack16_32_avx512(in, out + i * 32);
-      break;
-    case 17:
-      for (int i = 0; i < num_loops; ++i) in = unpack17_32_avx512(in, out + i * 32);
-      break;
-    case 18:
-      for (int i = 0; i < num_loops; ++i) in = unpack18_32_avx512(in, out + i * 32);
-      break;
-    case 19:
-      for (int i = 0; i < num_loops; ++i) in = unpack19_32_avx512(in, out + i * 32);
-      break;
-    case 20:
-      for (int i = 0; i < num_loops; ++i) in = unpack20_32_avx512(in, out + i * 32);
-      break;
-    case 21:
-      for (int i = 0; i < num_loops; ++i) in = unpack21_32_avx512(in, out + i * 32);
-      break;
-    case 22:
-      for (int i = 0; i < num_loops; ++i) in = unpack22_32_avx512(in, out + i * 32);
-      break;
-    case 23:
-      for (int i = 0; i < num_loops; ++i) in = unpack23_32_avx512(in, out + i * 32);
-      break;
-    case 24:
-      for (int i = 0; i < num_loops; ++i) in = unpack24_32_avx512(in, out + i * 32);
-      break;
-    case 25:
-      for (int i = 0; i < num_loops; ++i) in = unpack25_32_avx512(in, out + i * 32);
-      break;
-    case 26:
-      for (int i = 0; i < num_loops; ++i) in = unpack26_32_avx512(in, out + i * 32);
-      break;
-    case 27:
-      for (int i = 0; i < num_loops; ++i) in = unpack27_32_avx512(in, out + i * 32);
-      break;
-    case 28:
-      for (int i = 0; i < num_loops; ++i) in = unpack28_32_avx512(in, out + i * 32);
-      break;
-    case 29:
-      for (int i = 0; i < num_loops; ++i) in = unpack29_32_avx512(in, out + i * 32);
-      break;
-    case 30:
-      for (int i = 0; i < num_loops; ++i) in = unpack30_32_avx512(in, out + i * 32);
-      break;
-    case 31:
-      for (int i = 0; i < num_loops; ++i) in = unpack31_32_avx512(in, out + i * 32);
-      break;
-    case 32:
-      for (int i = 0; i < num_loops; ++i) in = unpack32_32_avx512(in, out + i * 32);
-      break;
-    default:
-      DCHECK(false) << "Unsupported num_bits";
-  }
-
-  return batch_size;
+  return unpack32_specialized<UnpackBits512<DispatchLevel::AVX512>>(in, out, batch_size,
+                                                                    num_bits);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/bpacking_avx512_codegen.py b/cpp/src/arrow/util/bpacking_avx512_codegen.py
deleted file mode 100644
index df4d7d750da..00000000000
--- a/cpp/src/arrow/util/bpacking_avx512_codegen.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Usage: python bpacking_avx512_codegen.py > bpacking_avx512_generated.h
-
-
-def print_unpack_bit_func(bit):
-    shift = 0
-    shifts = []
-    in_index = 0
-    inls = []
-    mask = (1 << bit) - 1
-    bracket = "{"
-
-    print(
-        f"inline static const uint32_t* unpack{bit}_32_avx512(const uint32_t* in, uint32_t* out) {bracket}")
-    print("  using ::arrow::util::SafeLoad;")
-    print("  uint32_t mask = 0x%x;" % mask)
-    print("  __m512i reg_shifts, reg_inls, reg_masks;")
-    print("  __m512i results;")
-
-    print("")
-    for i in range(32):
-        if shift + bit == 32:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            in_index += 1
-            shift = 0
-        elif shift + bit > 32:  # cross the boundary
-            inls.append(
-                f"SafeLoad(in + {in_index}) >> {shift} | SafeLoad(in + {in_index + 1}) << {32 - shift}")
-            in_index += 1
-            shift = bit - (32 - shift)
-            shifts.append(0)  # zero shift
-        else:
-            shifts.append(shift)
-            inls.append(f"SafeLoad(in + {in_index})")
-            shift += bit
-
-    print("  reg_masks = _mm512_set1_epi32(mask);")
-    print("")
-    print("  // shift the first 16 outs")
-    print(
-        f"  reg_shifts = _mm512_set_epi32({shifts[15]}, {shifts[14]}, {shifts[13]}, {shifts[12]},")
-    print(
-        f"                                {shifts[11]}, {shifts[10]}, {shifts[9]}, {shifts[8]},")
-    print(
-        f"                                {shifts[7]}, {shifts[6]}, {shifts[5]}, {shifts[4]},")
-    print(
-        f"                                {shifts[3]}, {shifts[2]}, {shifts[1]}, {shifts[0]});")
-    print(f"  reg_inls = _mm512_set_epi32({inls[15]}, {inls[14]},")
-    print(f"                              {inls[13]}, {inls[12]},")
-    print(f"                              {inls[11]}, {inls[10]},")
-    print(f"                              {inls[9]}, {inls[8]},")
-    print(f"                              {inls[7]}, {inls[6]},")
-    print(f"                              {inls[5]}, {inls[4]},")
-    print(f"                              {inls[3]}, {inls[2]},")
-    print(f"                              {inls[1]}, {inls[0]});")
-    print(
-        "  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm512_storeu_si512(out, results);")
-    print("  out += 16;")
-    print("")
-    print("  // shift the second 16 outs")
-    print(
-        f"  reg_shifts = _mm512_set_epi32({shifts[31]}, {shifts[30]}, {shifts[29]}, {shifts[28]},")
-    print(
-        f"                                {shifts[27]}, {shifts[26]}, {shifts[25]}, {shifts[24]},")
-    print(
-        f"                                {shifts[23]}, {shifts[22]}, {shifts[21]}, {shifts[20]},")
-    print(
-        f"                                {shifts[19]}, {shifts[18]}, {shifts[17]}, {shifts[16]});")
-    print(f"  reg_inls = _mm512_set_epi32({inls[31]}, {inls[30]},")
-    print(f"                              {inls[29]}, {inls[28]},")
-    print(f"                              {inls[27]}, {inls[26]},")
-    print(f"                              {inls[25]}, {inls[24]},")
-    print(f"                              {inls[23]}, {inls[22]},")
-    print(f"                              {inls[21]}, {inls[20]},")
-    print(f"                              {inls[19]}, {inls[18]},")
-    print(f"                              {inls[17]}, {inls[16]});")
-    print(
-        "  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);")
-    print("  _mm512_storeu_si512(out, results);")
-    print("  out += 16;")
-    print("")
-    print(f"  in += {bit};")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit0_func():
-    print(
-        "inline static const uint32_t* unpack0_32_avx512(const uint32_t* in, uint32_t* out) {")
-    print("  memset(out, 0x0, 32 * sizeof(*out));")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_unpack_bit32_func():
-    print(
-        "inline static const uint32_t* unpack32_32_avx512(const uint32_t* in, uint32_t* out) {")
-    print("  memcpy(out, in, 32 * sizeof(*out));")
-    print("  in += 32;")
-    print("  out += 32;")
-    print("")
-    print("  return in;")
-    print("}")
-
-
-def print_copyright():
-    print(
-        """// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.""")
-
-
-def print_note():
-    print("//")
-    print("// Automatically generated file; DO NOT EDIT.")
-
-
-def main():
-    print_copyright()
-    print_note()
-    print("")
-    print("#pragma once")
-    print("")
-    print("#include <stdint.h>")
-    print("#include <string.h>")
-    print("")
-    print("#ifdef _MSC_VER")
-    print("#include <intrin.h>")
-    print("#else")
-    print("#include <immintrin.h>")
-    print("#endif")
-    print("")
-    print('#include "arrow/util/ubsan.h"')
-    print("")
-    print("namespace arrow {")
-    print("namespace internal {")
-    print("")
-    print_unpack_bit0_func()
-    print("")
-    for i in range(1, 32):
-        print_unpack_bit_func(i)
-        print("")
-    print_unpack_bit32_func()
-    print("")
-    print("}  // namespace internal")
-    print("}  // namespace arrow")
-
-
-if __name__ == '__main__':
-    main()
diff --git a/cpp/src/arrow/util/bpacking_neon.cc b/cpp/src/arrow/util/bpacking_neon.cc
new file mode 100644
index 00000000000..a0bb5dc7a9e
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_neon.cc
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/bpacking_neon.h"
+#include "arrow/util/bpacking_simd128_generated.h"
+#include "arrow/util/bpacking_simd_internal.h"
+
+namespace arrow {
+namespace internal {
+
+int unpack32_neon(const uint32_t* in, uint32_t* out, int batch_size, int num_bits) {
+  return unpack32_specialized<UnpackBits128<DispatchLevel::NEON>>(in, out, batch_size,
+                                                                  num_bits);
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_neon.h b/cpp/src/arrow/util/bpacking_neon.h
new file mode 100644
index 00000000000..9d02cd568ac
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_neon.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+int unpack32_neon(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_simd128_generated.h b/cpp/src/arrow/util/bpacking_simd128_generated.h
new file mode 100644
index 00000000000..f7700fd0e76
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd128_generated.h
@@ -0,0 +1,2138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits128 {
+
+using simd_batch = xsimd::batch<uint32_t, 4>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 4, 5, 6, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 8, 9, 10, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 20, 21, 22, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 24, 25, 26, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 1-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 18, 20, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 18, 20, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 2-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 3, 6, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 12, 15, 18, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 27, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 19, 22, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 28, 0, 2, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 11, 14, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 3-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 4-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 5, 10, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 20, 25, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 8, 13, 18, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1 };
+  shifts = simd_batch{ 16, 21, 26, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 4, 9, 14, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 24, 0, 2, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 5-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 6, 12, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 22, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 6, 12, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 22, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 6-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 7, 14, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 3, 10, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 24, 0, 6, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 23, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 12, 19, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3 };
+  shifts = simd_batch{ 8, 15, 22, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 7-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 8-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5 };
+  shifts = simd_batch{ 0, 9, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1 };
+  shifts = simd_batch{ 4, 13, 22, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 8, 17, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 0, 2, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 20, 0, 6, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 1, 10, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 9-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2 };
+  shifts = simd_batch{ 0, 10, 20, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 16, 0, 4, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2 };
+  shifts = simd_batch{ 0, 10, 20, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 4, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 10-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 11, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 12, 0, 2, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7 };
+  shifts = simd_batch{ 0, 3, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 0, 6, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3 };
+  shifts = simd_batch{ 0, 7, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 8, 19, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 11-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 12, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 12-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 13, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5 };
+  shifts = simd_batch{ 0, 1, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 8, 0, 2, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9 };
+  shifts = simd_batch{ 16, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 4, 17, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1 };
+  shifts = simd_batch{ 0, 5, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 13-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 14, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6 };
+  shifts = simd_batch{ 16, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 14, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6 };
+  shifts = simd_batch{ 16, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 14-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 15, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 11, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 7, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 16, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 12, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 8, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 15-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 16-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9 };
+  shifts = simd_batch{ 4, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5 };
+  shifts = simd_batch{ 8, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 1, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 5, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 9, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 17-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2 };
+  shifts = simd_batch{ 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 2, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2 };
+  shifts = simd_batch{ 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 2, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 18-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 12, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15 };
+  shifts = simd_batch{ 0, 11, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 3, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 8, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 19-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4 };
+  shifts = simd_batch{ 0, 0, 8, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 20-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1 };
+  shifts = simd_batch{ 0, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13 };
+  shifts = simd_batch{ 0, 9, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 8, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17 };
+  shifts = simd_batch{ 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 4, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 21-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14 };
+  shifts = simd_batch{ 0, 6, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14 };
+  shifts = simd_batch{ 0, 6, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 22-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11 };
+  shifts = simd_batch{ 0, 7, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15 };
+  shifts = simd_batch{ 0, 3, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19 };
+  shifts = simd_batch{ 8, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 23-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 24-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17 };
+  shifts = simd_batch{ 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13 };
+  shifts = simd_batch{ 0, 1, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1 };
+  shifts = simd_batch{ 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 25-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10 };
+  shifts = simd_batch{ 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10 };
+  shifts = simd_batch{ 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 26-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7 };
+  shifts = simd_batch{ 0, 3, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 27-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 28-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5 };
+  shifts = simd_batch{ 0, 1, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 29-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 30-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 3
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 4 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 8 to 11
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 12 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 16 to 19
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 20 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 24 to 27
+  words = simd_batch{ SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27 };
+  shifts = simd_batch{ 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  // extract 31-bit bundles 28 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 4;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits128
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/bpacking_simd256_generated.h b/cpp/src/arrow/util/bpacking_simd256_generated.h
new file mode 100644
index 00000000000..a73bafe17e5
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd256_generated.h
@@ -0,0 +1,1270 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits256 {
+
+using simd_batch = xsimd::batch<uint32_t, 8>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3, 4, 5, 6, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 1-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 8, 9, 10, 11, 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 1-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19, 20, 21, 22, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 1-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 24, 25, 26, 27, 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 2-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 2-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 2-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 3, 6, 9, 12, 15, 18, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 3-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 24, 27, 0, 1, 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 3-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 19, 22, 25, 28, 0, 2, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 3-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 11, 14, 17, 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 4-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 4-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 4-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 5, 10, 15, 20, 25, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 5-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 8, 13, 18, 23, 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 5-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 16, 21, 26, 0, 4, 9, 14, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 5-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 24, 0, 2, 7, 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 6-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 6-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 6-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 7, 14, 21, 0, 3, 10, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 7-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 24, 0, 6, 13, 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 7-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 16, 23, 0, 5, 12, 19, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 7-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 8, 15, 22, 0, 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 8-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 8-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 8-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1 };
+  shifts = simd_batch{ 0, 9, 18, 0, 4, 13, 22, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 9-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 8, 17, 0, 3, 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 9-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 0, 2, 11, 20, 0, 6, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 9-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 1, 10, 19, 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 10-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 10-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 10-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 11, 0, 1, 12, 0, 2, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 11-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 3, 14, 0, 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 11-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3 };
+  shifts = simd_batch{ 16, 0, 6, 17, 0, 7, 18, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 11-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 8, 19, 0, 9, 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 12-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 12-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 12-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5 };
+  shifts = simd_batch{ 0, 13, 0, 7, 0, 1, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 13-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 8, 0, 2, 15, 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 13-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 16, 0, 10, 0, 4, 17, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 13-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 5, 18, 0, 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 14-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 14-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 14-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 15, 0, 13, 0, 11, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 15-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 7, 0, 5, 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 15-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 16, 0, 14, 0, 12, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 15-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 8, 0, 6, 0, 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 16-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 16-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 16-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9 };
+  shifts = simd_batch{ 0, 0, 2, 0, 4, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 17-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 8, 0, 10, 0, 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 17-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 1, 0, 3, 0, 5, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 17-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 9, 0, 11, 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 18-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 18-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2 };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 18-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 0, 6, 0, 12, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 19-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 0, 11, 0, 0, 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 19-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11 };
+  shifts = simd_batch{ 0, 3, 0, 9, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 19-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 8, 0, 0, 1, 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 20-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 20-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 20-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13 };
+  shifts = simd_batch{ 0, 0, 10, 0, 0, 9, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 21-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 8, 0, 0, 7, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 21-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 5, 0, 0, 4, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 21-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 22-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 22-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6 };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 22-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 5, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 23-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 6, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 23-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15 };
+  shifts = simd_batch{ 0, 7, 0, 0, 0, 3, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 23-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 8, 0, 0, 0, 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 24-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 24-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 24-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 25-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 1, 0, 0, 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 25-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 25-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 0, 3, 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18, SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 26-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 26-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 26-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15, SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 27-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 27-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19 };
+  shifts = simd_batch{ 0, 0, 0, 1, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 27-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 3, 0, 0, 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 28-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 28-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 28-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9, SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 29-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 29-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25, SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 1, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 29-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 30-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22, SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 30-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 30-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 7
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3, SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 31-bit bundles 8 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11, SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 31-bit bundles 16 to 23
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19, SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  // extract 31-bit bundles 24 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27, SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 8;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits256
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/bpacking_simd512_generated.h b/cpp/src/arrow/util/bpacking_simd512_generated.h
new file mode 100644
index 00000000000..2a62c962cd0
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd512_generated.h
@@ -0,0 +1,836 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits512 {
+
+using simd_batch = xsimd::batch<uint32_t, 16>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 1-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 2-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 0, 1, 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 3-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 19, 22, 25, 28, 0, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 4-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 5, 10, 15, 20, 25, 0, 3, 8, 13, 18, 23, 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 5-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 21, 26, 0, 4, 9, 14, 19, 24, 0, 2, 7, 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10, 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 6-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10, 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 7, 14, 21, 0, 3, 10, 17, 24, 0, 6, 13, 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 7-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 23, 0, 5, 12, 19, 0, 1, 8, 15, 22, 0, 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 8-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 9, 18, 0, 4, 13, 22, 0, 8, 17, 0, 3, 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 9-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 2, 11, 20, 0, 6, 15, 0, 1, 10, 19, 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6, 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 10-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6, 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 11, 0, 1, 12, 0, 2, 13, 0, 3, 14, 0, 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 11-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 16, 0, 6, 17, 0, 7, 18, 0, 8, 19, 0, 9, 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20, 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 12-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20, 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 13, 0, 7, 0, 1, 14, 0, 8, 0, 2, 15, 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 13-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 16, 0, 10, 0, 4, 17, 0, 11, 0, 5, 18, 0, 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2, 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 14-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2, 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 15, 0, 13, 0, 11, 0, 9, 0, 7, 0, 5, 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 15-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 16, 0, 14, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 16-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 17-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0, 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 18-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0, 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 0, 0, 6, 0, 12, 0, 0, 5, 0, 11, 0, 0, 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 19-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 0, 3, 0, 9, 0, 0, 2, 0, 8, 0, 0, 1, 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12, 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 20-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12, 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 0, 0, 10, 0, 0, 9, 0, 0, 8, 0, 0, 7, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 21-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 5, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0, 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 22-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0, 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 6, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 23-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 0, 7, 0, 0, 0, 3, 0, 0, 8, 0, 0, 0, 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 24-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17, SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 25-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 0, 3, 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18, SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 26-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15, SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 27-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 28-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9, SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21, SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 29-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25, SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22, SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 30-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3, SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7, SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11, SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 31-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19, SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23, SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27, SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits512
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
diff --git a/cpp/src/arrow/util/bpacking_simd_codegen.py b/cpp/src/arrow/util/bpacking_simd_codegen.py
new file mode 100644
index 00000000000..d033394df97
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd_codegen.py
@@ -0,0 +1,209 @@
+#!/bin/python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Usage:
+#   python bpacking_simd_codegen.py 128 > bpacking_simd128_generated.h
+#   python bpacking_simd_codegen.py 256 > bpacking_simd256_generated.h
+#   python bpacking_simd_codegen.py 512 > bpacking_simd512_generated.h
+
+from functools import partial
+import sys
+from textwrap import dedent, indent
+
+
+class UnpackGenerator:
+
+    def __init__(self, simd_width):
+        self.simd_width = simd_width
+        if simd_width % 32 != 0:
+            raise("SIMD bit width should be a multiple of 32")
+        self.simd_byte_width = simd_width // 8
+
+    def print_unpack_bit0_func(self):
+        print(
+            "inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {")
+        print("  memset(out, 0x0, 32 * sizeof(*out));")
+        print("  out += 32;")
+        print("")
+        print("  return in;")
+        print("}")
+
+
+    def print_unpack_bit32_func(self):
+        print(
+            "inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {")
+        print("  memcpy(out, in, 32 * sizeof(*out));")
+        print("  in += 32;")
+        print("  out += 32;")
+        print("")
+        print("  return in;")
+        print("}")
+
+    def print_unpack_bit_func(self, bit):
+        def p(code):
+            print(indent(code, prefix='  '))
+
+        shift = 0
+        shifts = []
+        in_index = 0
+        inls = []
+        mask = (1 << bit) - 1
+        bracket = "{"
+
+        print(f"inline static const uint32_t* unpack{bit}_32(const uint32_t* in, uint32_t* out) {{")
+        p(dedent(f"""\
+            uint32_t mask = 0x{mask:0x};
+
+            simd_batch masks(mask);
+            simd_batch words, shifts;
+            simd_batch results;
+            """))
+
+        def safe_load(index):
+            return f"SafeLoad<uint32_t>(in + {index})"
+
+        for i in range(32):
+            if shift + bit == 32:
+                shifts.append(shift)
+                inls.append(safe_load(in_index))
+                in_index += 1
+                shift = 0
+            elif shift + bit > 32:  # cross the boundary
+                inls.append(
+                    f"{safe_load(in_index)} >> {shift} | {safe_load(in_index + 1)} << {32 - shift}")
+                in_index += 1
+                shift = bit - (32 - shift)
+                shifts.append(0)  # zero shift
+            else:
+                shifts.append(shift)
+                inls.append(safe_load(in_index))
+                shift += bit
+
+        bytes_per_batch = self.simd_byte_width
+        words_per_batch = bytes_per_batch // 4
+
+        one_word_template = dedent("""\
+            words = simd_batch{{ {words} }};
+            shifts = simd_batch{{ {shifts} }};
+            results = (words >> shifts) & masks;
+            results.store_unaligned(out);
+            out += {words_per_batch};
+            """)
+
+        for start in range(0, 32, words_per_batch):
+            stop = start + words_per_batch;
+            p(f"""// extract {bit}-bit bundles {start} to {stop - 1}""")
+            p(one_word_template.format(
+                words=", ".join(inls[start:stop]),
+                shifts=", ".join(map(str, shifts[start:stop])),
+                words_per_batch=words_per_batch))
+
+        p(dedent(f"""\
+            in += {bit};
+            return in;"""))
+        print("}")
+
+
+def print_copyright():
+    print(dedent("""\
+        // Licensed to the Apache Software Foundation (ASF) under one
+        // or more contributor license agreements.  See the NOTICE file
+        // distributed with this work for additional information
+        // regarding copyright ownership.  The ASF licenses this file
+        // to you under the Apache License, Version 2.0 (the
+        // "License"); you may not use this file except in compliance
+        // with the License.  You may obtain a copy of the License at
+        //
+        //   http://www.apache.org/licenses/LICENSE-2.0
+        //
+        // Unless required by applicable law or agreed to in writing,
+        // software distributed under the License is distributed on an
+        // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+        // KIND, either express or implied.  See the License for the
+        // specific language governing permissions and limitations
+        // under the License.
+        """))
+
+
+def print_note():
+    print("// Automatically generated file; DO NOT EDIT.")
+    print()
+
+
+def main(simd_width):
+    print_copyright()
+    print_note()
+
+    struct_name = f"UnpackBits{simd_width}"
+
+    # NOTE: templating the UnpackBits struct on the dispatch level avoids
+    # potential name collisions if there are several UnpackBits generations
+    # with the same SIMD width on a given architecture.
+
+    print(dedent(f"""\
+        #pragma once
+
+        #include <cstdint>
+        #include <cstring>
+
+        #include <xsimd/xsimd.hpp>
+
+        #include "arrow/util/dispatch.h"
+        #include "arrow/util/ubsan.h"
+
+        namespace arrow {{
+        namespace internal {{
+        namespace {{
+
+        using ::arrow::util::SafeLoad;
+
+        template <DispatchLevel level>
+        struct {struct_name} {{
+
+        using simd_batch = xsimd::batch<uint32_t, {simd_width // 32}>;
+        """))
+
+    gen = UnpackGenerator(simd_width)
+    gen.print_unpack_bit0_func()
+    print()
+    for i in range(1, 32):
+        gen.print_unpack_bit_func(i)
+        print()
+    gen.print_unpack_bit32_func()
+    print()
+
+    print(dedent(f"""\
+        }};  // struct {struct_name}
+
+        }}  // namespace
+        }}  // namespace internal
+        }}  // namespace arrow
+        """))
+
+
+if __name__ == '__main__':
+    usage = f"""Usage: {__file__} <SIMD bit-width>"""
+    if len(sys.argv) != 2:
+        raise ValueError(usage)
+    try:
+        simd_width = int(sys.argv[1])
+    except ValueError:
+        raise ValueError(usage)
+
+    main(simd_width)
diff --git a/cpp/src/arrow/util/bpacking_simd_internal.h b/cpp/src/arrow/util/bpacking_simd_internal.h
new file mode 100644
index 00000000000..72d23f2d38c
--- /dev/null
+++ b/cpp/src/arrow/util/bpacking_simd_internal.h
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename UnpackBits>
+static int unpack32_specialized(const uint32_t* in, uint32_t* out, int batch_size,
+                                int num_bits) {
+  batch_size = batch_size / 32 * 32;
+  int num_loops = batch_size / 32;
+
+  switch (num_bits) {
+    case 0:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack0_32(in, out + i * 32);
+      break;
+    case 1:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack1_32(in, out + i * 32);
+      break;
+    case 2:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack2_32(in, out + i * 32);
+      break;
+    case 3:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack3_32(in, out + i * 32);
+      break;
+    case 4:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack4_32(in, out + i * 32);
+      break;
+    case 5:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack5_32(in, out + i * 32);
+      break;
+    case 6:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack6_32(in, out + i * 32);
+      break;
+    case 7:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack7_32(in, out + i * 32);
+      break;
+    case 8:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack8_32(in, out + i * 32);
+      break;
+    case 9:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack9_32(in, out + i * 32);
+      break;
+    case 10:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack10_32(in, out + i * 32);
+      break;
+    case 11:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack11_32(in, out + i * 32);
+      break;
+    case 12:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack12_32(in, out + i * 32);
+      break;
+    case 13:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack13_32(in, out + i * 32);
+      break;
+    case 14:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack14_32(in, out + i * 32);
+      break;
+    case 15:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack15_32(in, out + i * 32);
+      break;
+    case 16:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack16_32(in, out + i * 32);
+      break;
+    case 17:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack17_32(in, out + i * 32);
+      break;
+    case 18:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack18_32(in, out + i * 32);
+      break;
+    case 19:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack19_32(in, out + i * 32);
+      break;
+    case 20:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack20_32(in, out + i * 32);
+      break;
+    case 21:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack21_32(in, out + i * 32);
+      break;
+    case 22:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack22_32(in, out + i * 32);
+      break;
+    case 23:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack23_32(in, out + i * 32);
+      break;
+    case 24:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack24_32(in, out + i * 32);
+      break;
+    case 25:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack25_32(in, out + i * 32);
+      break;
+    case 26:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack26_32(in, out + i * 32);
+      break;
+    case 27:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack27_32(in, out + i * 32);
+      break;
+    case 28:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack28_32(in, out + i * 32);
+      break;
+    case 29:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack29_32(in, out + i * 32);
+      break;
+    case 30:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack30_32(in, out + i * 32);
+      break;
+    case 31:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack31_32(in, out + i * 32);
+      break;
+    case 32:
+      for (int i = 0; i < num_loops; ++i) in = UnpackBits::unpack32_32(in, out + i * 32);
+      break;
+    default:
+      DCHECK(false) << "Unsupported num_bits";
+  }
+
+  return batch_size;
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 01b658d6d47..e5ab78c3822 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -52,8 +52,7 @@ ARROW_SNAPPY_BUILD_VERSION=1.1.8
 ARROW_THRIFT_BUILD_VERSION=0.13.0
 ARROW_THRIFT_BUILD_MD5_CHECKSUM=38a27d391a2b03214b444cb13d5664f1
 ARROW_UTF8PROC_BUILD_VERSION=v2.6.1
-# For https://github.com/xtensor-stack/xsimd/pull/419
-ARROW_XSIMD_BUILD_VERSION=e916f3ab1bc513328b627df702226a1d1e2ae3a9
+ARROW_XSIMD_BUILD_VERSION=e9234cd6e6f4428fc260073b2c34ffe86fda1f34
 ARROW_ZLIB_BUILD_VERSION=1.2.11
 ARROW_ZSTD_BUILD_VERSION=v1.4.8
 

From 00a443629c00079ea03c0b9f415d74669d2759a7 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 13 Apr 2021 18:46:24 +0200
Subject: [PATCH 012/719] ARROW-12357: [Archery] Bump Jinja2 version
 requirement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jinja2 < 2.11 doesn't support passing Path objects for filesystem paths.

Closes #10011 from pitrou/ARROW-12357-archery-jinja-req

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/setup.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 892e6b2a8bd..0537e8b4d31 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -24,11 +24,14 @@
 if sys.version_info < (3, 6):
     sys.exit('Python < 3.6 is not supported')
 
+# For pathlib.Path compatibility
+jinja_req = 'jinja2>=2.11'
+
 extras = {
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
-    'release': ['jinja2', 'jira', 'semver', 'gitpython'],
-    'crossbow': ['github3.py', 'jinja2', 'pygit2', 'ruamel.yaml',
+    'release': [jinja_req, 'jira', 'semver', 'gitpython'],
+    'crossbow': ['github3.py', jinja_req, 'pygit2', 'ruamel.yaml',
                  'setuptools_scm'],
 }
 extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']

From a5f3b35168980eb35d5daf77edb2a1611dd71f7d Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 13 Apr 2021 12:46:08 -0700
Subject: [PATCH 013/719] ARROW-11070: [C++][Compute] Implement power kernel

This is to resolve [ARROW-11070](https://issues.apache.org/jira/projects/ARROW/issues/ARROW-11070).

Closes #9841 from rok/ARROW-11070

Lead-authored-by: Rok <rok@mihevc.org>
Co-authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   1 +
 cpp/src/arrow/compute/api_scalar.h            |  14 +++
 .../compute/kernels/scalar_arithmetic.cc      |  87 ++++++++++++++
 .../compute/kernels/scalar_arithmetic_test.cc | 110 +++++++++++++++++-
 docs/source/cpp/compute.rst                   |   4 +
 docs/source/python/api/compute.rst            |   2 +
 6 files changed, 217 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index f4696fbe02a..d169fd2ebde 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -52,6 +52,7 @@ SCALAR_ARITHMETIC_BINARY(Add, "add", "add_checked")
 SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked")
 SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
+SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
 
 // ----------------------------------------------------------------------
 // Set-related operations
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index f59426d8f1b..6032f656c4a 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -204,6 +204,20 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
                      ArithmeticOptions options = ArithmeticOptions(),
                      ExecContext* ctx = NULLPTR);
 
+/// \brief Raise the values of base array to the power of the exponent array values.
+/// Array values must be the same length. If either base or exponent is null the result
+/// will be null.
+///
+/// \param[in] left the base
+/// \param[in] right the exponent
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise base value raised to the power of exponent
+ARROW_EXPORT
+Result<Datum> Power(const Datum& left, const Datum& right,
+                    ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
 /// \brief Compare a numeric array with a scalar.
 ///
 /// \param[in] left datum to compare, must be an Array
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 7abaa1c1a59..260721b08d9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cmath>
+
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/macros.h"
@@ -233,6 +235,70 @@ struct DivideChecked {
   }
 };
 
+struct Power {
+  ARROW_NOINLINE
+  static uint64_t IntegerPower(uint64_t base, uint64_t exp) {
+    // right to left O(logn) power
+    uint64_t pow = 1;
+    while (exp) {
+      pow *= (exp & 1) ? base : 1;
+      base *= base;
+      exp >>= 1;
+    }
+    return pow;
+  }
+
+  template <typename T>
+  static enable_if_integer<T> Call(KernelContext* ctx, T base, T exp) {
+    if (exp < 0) {
+      ctx->SetStatus(
+          Status::Invalid("integers to negative integer powers are not allowed"));
+      return 0;
+    }
+    return static_cast<T>(IntegerPower(base, exp));
+  }
+
+  template <typename T>
+  static enable_if_floating_point<T> Call(KernelContext* ctx, T base, T exp) {
+    return std::pow(base, exp);
+  }
+};
+
+struct PowerChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 base, Arg1 exp) {
+    if (exp < 0) {
+      ctx->SetStatus(
+          Status::Invalid("integers to negative integer powers are not allowed"));
+      return 0;
+    } else if (exp == 0) {
+      return 1;
+    }
+    // left to right O(logn) power with overflow checks
+    bool overflow = false;
+    uint64_t bitmask =
+        1ULL << (63 - BitUtil::CountLeadingZeros(static_cast<uint64_t>(exp)));
+    T pow = 1;
+    while (bitmask) {
+      overflow |= MultiplyWithOverflow(pow, pow, &pow);
+      if (exp & bitmask) {
+        overflow |= MultiplyWithOverflow(pow, base, &pow);
+      }
+      bitmask >>= 1;
+    }
+    if (overflow) {
+      ctx->SetStatus(Status::Invalid("overflow"));
+    }
+    return pow;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 base, Arg1 exp) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
+    return std::pow(base, exp);
+  }
+};
+
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
 ArrayKernelExec NumericEqualTypesBinary(detail::GetTypeId get_id) {
@@ -359,6 +425,18 @@ const FunctionDoc div_checked_doc{
      "integer overflow is encountered."),
     {"dividend", "divisor"}};
 
+const FunctionDoc pow_doc{
+    "Raise arguments to power element-wise",
+    ("Integer to negative integer power returns an error. However, integer overflow\n"
+     "wraps around. If either base or exponent is null the result will be null."),
+    {"base", "exponent"}};
+
+const FunctionDoc pow_checked_doc{
+    "Raise arguments to power element-wise",
+    ("An error is returned when integer to negative integer power is encountered,\n"
+     "or integer overflow is encountered."),
+    {"base", "exponent"}};
+
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -407,6 +485,15 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   auto divide_checked =
       MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(divide_checked)));
+
+  // ----------------------------------------------------------------------
+  auto power = MakeArithmeticFunction<Power>("power", &pow_doc);
+  DCHECK_OK(registry->AddFunction(std::move(power)));
+
+  // ----------------------------------------------------------------------
+  auto power_checked =
+      MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(power_checked)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 4d4f14e1154..cd5f298ae51 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -590,6 +590,114 @@ TYPED_TEST(TestBinaryArithmeticSigned, DivideOverflowRaises) {
   this->AssertBinop(Divide, MakeArray(min), MakeArray(-1), "[0]");
 }
 
+TYPED_TEST(TestBinaryArithmeticFloating, Power) {
+  using CType = typename TestFixture::CType;
+  auto max = std::numeric_limits<CType>::max();
+  this->SetNansEqual(true);
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    // Empty arrays
+    this->AssertBinop(Power, "[]", "[]", "[]");
+    // Ordinary arrays
+    this->AssertBinop(Power, "[3.4, 16, 0.64, 1.2, 0]", "[1, 0.5, 2, 4, 0]",
+                      "[3.4, 4, 0.4096, 2.0736, 1]");
+    // Array with nulls
+    this->AssertBinop(Power, "[null, 1, 3.3, null, 2]", "[1, 4, 2, 5, 0.1]",
+                      "[null, 1, 10.89, null, 1.07177346]");
+    // Scalar exponentiated by array
+    this->AssertBinop(Power, 10.0F, "[null, 1, 2.5, null, 2, 5]",
+                      "[null, 10, 316.227766017, null, 100, 100000]");
+    // Array exponentiated by scalar
+    this->AssertBinop(Power, "[null, 1, 2.5, null, 2, 5]", 10.0F,
+                      "[null, 1, 9536.74316406, null, 1024, 9765625]");
+    // Array with infinity
+    this->AssertBinop(Power, "[3.4, Inf, -Inf, 1.1, 100000]", "[1, 2, 3, Inf, 100000]",
+                      "[3.4, Inf, -Inf, Inf, Inf]");
+    // Array with NaN
+    this->AssertBinop(Power, "[3.4, NaN, 2.0]", "[1, 2, 2.0]", "[3.4, NaN, 4.0]");
+    // Scalar exponentiated by scalar
+    this->AssertBinop(Power, 21.0F, 3.0F, 9261.0F);
+    // Divide by zero
+    this->AssertBinop(Power, "[0.0, 0.0]", "[-1.0, -3.0]", "[Inf, Inf]");
+    // Check overflow behaviour
+    this->AssertBinop(Power, max, 10, INFINITY);
+  }
+
+  // Edge cases - removing NaNs
+  this->AssertBinop(Power, "[1, NaN, 0, null, 1.2, -Inf, Inf, 1.1, 1, 0, 1, 0]",
+                    "[NaN, 0, NaN, 1, null, 1, 2, -Inf, Inf, 0, 0, 42]",
+                    "[1, 1, NaN, null, null, -Inf, Inf, 0, 1, 1, 1, 0]");
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, Power) {
+  using CType = typename TestFixture::CType;
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    // Empty arrays
+    this->AssertBinop(Power, "[]", "[]", "[]");
+    // Ordinary arrays
+    this->AssertBinop(Power, "[3, 2, 6, 2]", "[1, 1, 2, 0]", "[3, 2, 36, 1]");
+    // Array with nulls
+    this->AssertBinop(Power, "[null, 2, 3, null, 20]", "[1, 6, 2, 5, 1]",
+                      "[null, 64, 9, null, 20]");
+    // Scalar exponentiated by array
+    this->AssertBinop(Power, 3, "[null, 3, 4, null, 2]", "[null, 27, 81, null, 9]");
+    // Array exponentiated by scalar
+    this->AssertBinop(Power, "[null, 10, 3, null, 2]", 2, "[null, 100, 9, null, 4]");
+    // Scalar exponentiated by scalar
+    this->AssertBinop(Power, 4, 3, 64);
+    // Edge cases
+    this->AssertBinop(Power, "[0, 1, 0]", "[0, 0, 42]", "[1, 1, 0]");
+  }
+
+  // Overflow raises
+  this->SetOverflowCheck(true);
+  this->AssertBinopRaises(Power, MakeArray(max), MakeArray(10), "overflow");
+  // Disable overflow check
+  this->SetOverflowCheck(false);
+  this->AssertBinop(Power, max, 10, 1);
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, Power) {
+  using CType = typename TestFixture::CType;
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    // Empty arrays
+    this->AssertBinop(Power, "[]", "[]", "[]");
+    // Ordinary arrays
+    this->AssertBinop(Power, "[-3, 2, -6, 2]", "[3, 1, 2, 0]", "[-27, 2, 36, 1]");
+    // Array with nulls
+    this->AssertBinop(Power, "[null, 10, 127, null, -20]", "[1, 2, 1, 5, 1]",
+                      "[null, 100, 127, null, -20]");
+    // Scalar exponentiated by array
+    this->AssertBinop(Power, 11, "[null, 1, null, 2]", "[null, 11, null, 121]");
+    // Array exponentiated by scalar
+    this->AssertBinop(Power, "[null, 1, 3, null, 2]", 3, "[null, 1, 27, null, 8]");
+    // Scalar exponentiated by scalar
+    this->AssertBinop(Power, 16, 1, 16);
+    // Edge cases
+    this->AssertBinop(Power, "[1, 0, -1, 2]", "[0, 42, 0, 1]", "[1, 0, 1, 2]");
+    // Divide by zero raises
+    this->AssertBinopRaises(Power, MakeArray(0), MakeArray(-1),
+                            "integers to negative integer powers are not allowed");
+  }
+
+  // Overflow raises
+  this->SetOverflowCheck(true);
+  this->AssertBinopRaises(Power, MakeArray(max), MakeArray(10), "overflow");
+  // Disable overflow check
+  this->SetOverflowCheck(false);
+  this->AssertBinop(Power, max, 10, 1);
+}
+
 TYPED_TEST(TestBinaryArithmeticFloating, Sub) {
   this->AssertBinop(Subtract, "[]", "[]", "[]");
 
@@ -638,7 +746,7 @@ TYPED_TEST(TestBinaryArithmeticFloating, Mul) {
 }
 
 TEST(TestBinaryArithmetic, DispatchBest) {
-  for (std::string name : {"add", "subtract", "multiply", "divide"}) {
+  for (std::string name : {"add", "subtract", "multiply", "divide", "power"}) {
     for (std::string suffix : {"", "_checked"}) {
       name += suffix;
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 715d5036964..b2ecb3b2ceb 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -272,6 +272,10 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | divide_checked           | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
+| power                    | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| power_checked            | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
 | multiply                 | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
 | multiply_checked         | Binary     | Numeric            | Numeric             |
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index d6efc6a5fea..da16ccdfa29 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -53,6 +53,8 @@ throws an ``ArrowInvalid`` exception when overflow is detected.
    multiply_checked
    subtract
    subtract_checked
+   power
+   power_checked
 
 Comparisons
 -----------

From 5db4092dc1afac5870715f339538eb206ee71fb0 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 13 Apr 2021 12:54:56 -0700
Subject: [PATCH 014/719] ARROW-12316: [C++] Prefer mimalloc on Apple

mimalloc gives better results than jemalloc on macro-benchmarks on macOS.

Closes #10015 from pitrou/ARROW-12316-apple-mimalloc

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/memory_pool.cc | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 2d19b3d1962..f402ccb4172 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -113,13 +113,19 @@ struct SupportedBackend {
 
 const std::vector<SupportedBackend>& SupportedBackends() {
   static std::vector<SupportedBackend> backends = {
-#ifdef ARROW_JEMALLOC
-      {"jemalloc", MemoryPoolBackend::Jemalloc},
+  // ARROW-12316: Apple => mimalloc first, then jemalloc
+  //              non-Apple => jemalloc first, then mimalloc
+#if defined(ARROW_JEMALLOC) && !defined(__APPLE__)
+    {"jemalloc", MemoryPoolBackend::Jemalloc},
 #endif
 #ifdef ARROW_MIMALLOC
-      {"mimalloc", MemoryPoolBackend::Mimalloc},
+    {"mimalloc", MemoryPoolBackend::Mimalloc},
+#endif
+#if defined(ARROW_JEMALLOC) && defined(__APPLE__)
+    {"jemalloc", MemoryPoolBackend::Jemalloc},
 #endif
-      {"system", MemoryPoolBackend::System}};
+    {"system", MemoryPoolBackend::System}
+  };
   return backends;
 }
 

From ab3a08c16997e16b7dae34728c139e36aba3129a Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 13 Apr 2021 15:20:58 -0700
Subject: [PATCH 015/719] ARROW-12304: [R] Update news and polish docs for 4.0

Closes #10001 from nealrichardson/news-4.0

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/DESCRIPTION |  4 +++-
 r/NEWS.md     | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index a355e790a2d..ab598942a09 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -2,9 +2,11 @@ Package: arrow
 Title: Integration to 'Apache' 'Arrow'
 Version: 3.0.0.9000
 Authors@R: c(
+    person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
+    person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
+    person("Jonathan", "Keane", email = "jkeane@gmail.com", role = c("aut")),
     person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut"), comment = c(ORCID = "0000-0002-2444-4226")),
     person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")),
-    person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
     person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")),
     person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")),
     person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph"))
diff --git a/r/NEWS.md b/r/NEWS.md
index 1f1acb89805..664649537e7 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -21,14 +21,49 @@
 
 ## dplyr methods
 
-* `dplyr::mutate()` is now supported in Arrow for many applications. For queries on `Table` and `RecordBatch` that are not yet supported in Arrow, the implementation falls back to pulling data into an R `data.frame` first, as in the previous release. For queries on `Dataset`, it raises an error if the feature is not implemented.
+Many more `dplyr` verbs are supported on Arrow objects:
+
+* `dplyr::mutate()` is now supported in Arrow for many applications. For queries on `Table` and `RecordBatch` that are not yet supported in Arrow, the implementation falls back to pulling data into an in-memory R `data.frame` first, as in the previous release. For queries on `Dataset` (which can be larger than memory), it raises an error if the function is not implemented. The main `mutate()` features that cannot yet be called on Arrow objects are (1) `mutate()` after `group_by()` (which is typically used in combination with aggregation) and (2) queries that use `dplyr::across()`.
+* `dplyr::transmute()` (which calls `mutate()`)
+* `dplyr::group_by()` now preserves the `.drop()` argument and supports on-the-fly definition of columns
+* `dplyr::relocate()` to reorder columns
+* `dplyr::arrange()` to sort rows
+* `dplyr::compute()` to evaluate the lazy expressions and return an Arrow Table. This is equivalent to `dplyr::collect(as_data_frame = FALSE)`, which was added in 2.0.0.
+
+Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
+
 * String functions `nchar()`, `tolower()`, and `toupper()`, along with their `stringr` spellings `str_length()`, `str_to_lower()`, and `str_to_upper()`, are supported in Arrow `dplyr` calls. `str_trim()` is also supported.
+* Regular expression functions `sub()`, `gsub()`, and `grepl()`, along with `str_replace()`, `str_replace_all()`, and `str_detect()`, are supported.
+* `cast(x, type)` and `dictionary_encode()` allow changing the type of columns in Arrow objects; `as.numeric()`, `as.character()`, etc. are exposed as similar type-altering conveniences
+* `dplyr::between()`; the Arrow version also allows the `left` and `right` arguments to be columns in the data and not just scalars
+* Additionally, any Arrow C++ compute function can be called inside a `dplyr` verb. This enables you to access Arrow functions that don't have a direct R mapping. See `list_compute_functions()` for all available functions, which are available in `dplyr` prefixed by `arrow_`.
+
+## Datasets
+
+* `open_dataset()` now accepts a vector of file paths (or even a single file path). Among other things, this enables you to open a single very large file and use `write_dataset()` to partition it without having to read the whole file into memory.
+* Datasets can now detect and read a directory of compressed CSVs
+* `write_dataset()` now defaults to `format = "parquet"` and better validates the `format` argument
+* Invalid input for `schema` in `open_dataset()` is now correctly handled
+* Collecting 0 columns from a Dataset now no longer returns all of the columns
 
 ## Other improvements
 
 * `value_counts()` to tabulate values in an `Array` or `ChunkedArray`, similar to `base::table()`.
 * `StructArray` objects gain data.frame-like methods, including `names()`, `$`, `[[`, and `dim()`.
 * RecordBatch columns can now be added, replaced, or removed by assigning (`<-`) with either `$` or `[[`
+* Similarly, `Schema` can now be edited by assigning in new types. This enables using the CSV reader to detect the schema of a file, modify the `Schema` object for any columns that you want to read in as a different type, and then use that `Schema` to read the data.
+* Better validation when creating a `Table` with a schema, with columns of different lengths, and with scalar value recycling
+* Reading Parquet files in Japanese or other multi-byte locales on Windows no longer hangs (workaround for a [bug in libstdc++](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98723); thanks @yutannihilation for the persistence in discovering this!)
+* If you attempt to read string data that has embedded nul (`\0`) characters, the error message now informs you that you can set `options(arrow.skip_nul = TRUE)` to strip them out. It is not recommended to set this option by default since this code path is sigificantly slower, and most string data does not contain nuls.
+
+## Installation and configuration
+
+* The R package can now support working with an Arrow C++ library that has additional features (such as dataset, parquet, string libraries) disabled, and the bundled build script enables setting environment variables to disable them. See `vignette("install", package = "arrow")` for details. This allows a faster, smaller package build in cases where that is useful, and it enables a minimal, functioning R package build on Solaris.
+* On macOS, it is now possible to use the same bundled C++ build that is used by default on Linux, along with all of its customization parameters, by setting the environment variable `FORCE_BUNDLED_BUILD=true`.
+* `arrow` now uses the `mimalloc` memory allocator by default on macOS, if available (as it is in CRAN binaries), instead of `jemalloc`. There are [configuration issues](https://issues.apache.org/jira/browse/ARROW-6994) with `jemalloc` on macOS, and [benchmark analysis](https://ursalabs.org/blog/2021-r-benchmarks-part-1/) shows that this has negative effects on performance, especially on memory-intensive workflows. `jemalloc` remains the default on Linux; `mimalloc` is default on Windows.
+* Setting the `ARROW_DEFAULT_MEMORY_POOL` environment variable to switch memory allocators now works correctly when the Arrow C++ library has been statically linked (as is usually the case when installing from CRAN).
+* The `arrow_info()` function now reports on the additional optional features, as well as the detected SIMD level. If key features or compression libraries are not enabled in the build, `arrow_info()` will refer to the installation vignette for guidance on how to install a more complete build, if desired.
+* If you attempt to read a file that was compressed with a codec that your Arrow build does not contain support for, the error message now will tell you how to reinstall Arrow with that feature enabled.
 
 # arrow 3.0.0
 

From 7cec72d17bd3f6ae46275d434d7025de18265335 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 14 Apr 2021 15:39:04 +0900
Subject: [PATCH 016/719] ARROW-12309: [JS] Make es2015 bundles the default

Closes #9961 from domoritz/es2015

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/README.md               | 2 +-
 js/bin/integration.js      | 2 +-
 js/examples/read_file.html | 2 +-
 js/gulp/argv.js            | 4 ----
 js/gulp/arrow-task.js      | 8 ++++----
 js/gulp/package-task.js    | 8 ++++----
 js/gulp/test-task.js       | 6 +++---
 js/gulp/util.js            | 4 ++--
 js/gulpfile.js             | 4 ++--
 9 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/js/README.md b/js/README.md
index 4eebd008a09..e799362d966 100644
--- a/js/README.md
+++ b/js/README.md
@@ -224,7 +224,7 @@ The base `apache-arrow` package includes all the compilation targets for conveni
 The targets are also published under the `@apache-arrow` namespace:
 
 ```sh
-npm install apache-arrow # <-- combined es5/UMD, esnext/CommonJS/ESModules/UMD, and TypeScript package
+npm install apache-arrow # <-- combined es2015/UMD + esnext/CommonJS/ESModules/UMD
 npm install @apache-arrow/ts # standalone TypeScript package
 npm install @apache-arrow/es5-cjs # standalone es5/CommonJS package
 npm install @apache-arrow/es5-esm # standalone es5/ESModules package
diff --git a/js/bin/integration.js b/js/bin/integration.js
index c6f6cd7a24e..2e5f16bdf80 100755
--- a/js/bin/integration.js
+++ b/js/bin/integration.js
@@ -30,7 +30,7 @@ const {
     Table,
     RecordBatchReader,
     util: { createElementComparator }
-} = require('../targets/apache-arrow/Arrow.es5.min');
+} = require('../targets/apache-arrow/');
 
 const exists = async (p) => {
     try {
diff --git a/js/examples/read_file.html b/js/examples/read_file.html
index ec96d0e4755..1013fbe79ef 100644
--- a/js/examples/read_file.html
+++ b/js/examples/read_file.html
@@ -86,6 +86,6 @@
       <tbody id="tbody">
       </tbody>
     </table>
-    <script type="text/javascript" src="../targets/es5/umd/Arrow.js"></script>
+    <script type="text/javascript" src="../targets/es2015/umd/Arrow.js"></script>
   </body>
 </html>
diff --git a/js/gulp/argv.js b/js/gulp/argv.js
index 3a028f813f9..0acdad7d5e1 100644
--- a/js/gulp/argv.js
+++ b/js/gulp/argv.js
@@ -15,10 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-const fs = require('fs');
-const glob = require('glob');
-const path = require('path');
-
 const argv = require(`command-line-args`)([
     { name: `all`, type: Boolean },
     { name: 'verbose', alias: `v`, type: Boolean },
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 35880e006ff..93e9475e936 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -32,21 +32,21 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
     const dtsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.ts`;
     const cjsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.js`;
     const esmGlob = `${targetDir(`esnext`, `esm`)}/**/*.js`;
-    const es5UmdGlob = `${targetDir(`es5`, `umd`)}/*.js`;
+    const es2015UmdGlob = `${targetDir(`es2015`, `umd`)}/*.js`;
     const esnextUmdGlob = `${targetDir(`esnext`, `umd`)}/*.js`;
     const cjsSourceMapsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.map`;
     const esmSourceMapsGlob = `${targetDir(`esnext`, `esm`)}/**/*.map`;
-    const es5UmdSourceMapsGlob = `${targetDir(`es5`, `umd`)}/*.map`;
+    const es2015UmdSourceMapsGlob = `${targetDir(`es2015`, `umd`)}/*.map`;
     const esnextUmdSourceMapsGlob = `${targetDir(`esnext`, `umd`)}/*.map`;
     return Observable.forkJoin(
         observableFromStreams(gulp.src(dtsGlob),                 gulp.dest(out)), // copy d.ts files
         observableFromStreams(gulp.src(cjsGlob),                 gulp.dest(out)), // copy esnext cjs files
         observableFromStreams(gulp.src(cjsSourceMapsGlob),       gulp.dest(out)), // copy esnext cjs sourcemaps
         observableFromStreams(gulp.src(esmSourceMapsGlob),       gulp.dest(out)), // copy esnext esm sourcemaps
-        observableFromStreams(gulp.src(es5UmdSourceMapsGlob),    gulp.dest(out)), // copy es5 umd sourcemap files, but don't rename
+        observableFromStreams(gulp.src(es2015UmdSourceMapsGlob), gulp.dest(out)), // copy es2015 umd sourcemap files, but don't rename
         observableFromStreams(gulp.src(esnextUmdSourceMapsGlob), gulp.dest(out)), // copy esnext umd sourcemap files, but don't rename
         observableFromStreams(gulp.src(esmGlob),       gulpRename((p) => { p.extname = '.mjs'; }),          gulp.dest(out)), // copy esnext esm files and rename to `.mjs`
-        observableFromStreams(gulp.src(es5UmdGlob),    gulpRename((p) => { p.basename += `.es5.min`; }),    gulp.dest(out)), // copy es5 umd files and add `.min`
+        observableFromStreams(gulp.src(es2015UmdGlob), gulpRename((p) => { p.basename += `.es2015.min`; }), gulp.dest(out)), // copy es2015 umd files and add `.min`
         observableFromStreams(gulp.src(esnextUmdGlob), gulpRename((p) => { p.basename += `.esnext.min`; }), gulp.dest(out)), // copy esnext umd files and add `.esnext.min`
     ).publish(new ReplaySubject()).refCount();
 }))({});
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index 440a90ea6bf..cb1d97c82dd 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -50,12 +50,12 @@ const createMainPackageJson = (target, format) => (orig) => ({
     browser: `${mainExport}.dom`,
     module: `${mainExport}.dom.mjs`,
     types: `${mainExport}.node.d.ts`,
-    unpkg: `${mainExport}.es5.min.js`,
-    jsdelivr: `${mainExport}.es5.min.js`,
+    unpkg: `${mainExport}.es2015.min.js`,
+    jsdelivr: `${mainExport}.es2015.min.js`,
     sideEffects: false,
     esm: { mode: `all`, sourceMap: true },
 });
-  
+
 const createTypeScriptPackageJson = (target, format) => (orig) => ({
     ...createScopedPackageJSON(target, format)(orig),
     bin: undefined,
@@ -69,7 +69,7 @@ const createTypeScriptPackageJson = (target, format) => (orig) => ({
         ...orig.dependencies
     }
 });
-  
+
 const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
     packageJSONFields.reduce(
         (xs, key) => ({ ...xs, [key]: xs[key] || orig[key] }),
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index 149a58976b5..8c1eab1e3b8 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -146,7 +146,7 @@ async function createTestData() {
             { maxBuffer: Math.pow(2, 53) - 1 }
         );
     }
-    
+
     async function generateCPPStream(filePath, streamPath) {
         await del(streamPath);
         return await exec(
@@ -154,7 +154,7 @@ async function createTestData() {
             { maxBuffer: Math.pow(2, 53) - 1 }
         );
     }
-    
+
     async function generateJavaFile(jsonPath, filePath) {
         await del(filePath);
         return await exec(
@@ -164,7 +164,7 @@ async function createTestData() {
             { maxBuffer: Math.pow(2, 53) - 1 }
         );
     }
-    
+
     async function generateJavaStream(filePath, streamPath) {
         await del(streamPath);
         return await exec(
diff --git a/js/gulp/util.js b/js/gulp/util.js
index fd7a3775ae0..c07f5f3062f 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -67,8 +67,8 @@ const gCCLanguageNames = {
 const UMDSourceTargets = {
     es5: `es5`,
  es2015: `es2015`,
- es2016: `es2015`,
- es2017: `es2015`,
+ es2016: `es2016`,
+ es2017: `es2017`,
  esnext: `esnext`
 };
 
diff --git a/js/gulpfile.js b/js/gulpfile.js
index cf840ad307b..271bd3426d8 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -56,13 +56,13 @@ knownTargets.forEach((target) => {
     ));
 });
 
-// The main "apache-arrow" module builds the es5/umd, esnext/cjs,
+// The main "apache-arrow" module builds the es2015/umd, esnext/cjs,
 // esnext/esm, and esnext/umd targets, then copies and renames the
 // compiled output into the apache-arrow folder
 gulp.task(`build:${npmPkgName}`,
     gulp.series(
         gulp.parallel(
-            `build:${taskName(`es5`, `umd`)}`,
+            `build:${taskName(`es2015`, `umd`)}`,
             `build:${taskName(`esnext`, `cjs`)}`,
             `build:${taskName(`esnext`, `esm`)}`,
             `build:${taskName(`esnext`, `umd`)}`

From c0445d69088a6fbd9c026ecac6a99c6cd4df4865 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 14 Apr 2021 15:41:38 +0900
Subject: [PATCH 017/719] ARROW-12269: [JS] Move to eslint

Closes #9992 from domoritz/eslint

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/rat_exclude_files.txt             |   1 +
 js/.eslintignore                              |   1 +
 js/.eslintrc.js                               |  96 ++++
 js/DEVELOP.md                                 |   2 -
 js/gulp/closure-task.js                       |   4 +-
 js/jest.coverage.config.js                    |  14 +-
 js/package.json                               |  13 +-
 js/perf/index.js                              |  10 +-
 js/perf/table_config.js                       |   8 +-
 js/src/bin/arrow2csv.ts                       |  10 +-
 js/src/builder.ts                             |  21 +-
 js/src/builder/buffer.ts                      |   6 +-
 js/src/builder/map.ts                         |   2 +-
 js/src/builder/run.ts                         |   3 +-
 js/src/builder/union.ts                       |   2 -
 js/src/compute/dataframe.ts                   |   6 +-
 js/src/compute/predicate.ts                   |   6 +-
 js/src/data.ts                                |  12 +-
 js/src/fb/.eslintrc.js                        |  23 +
 js/src/fb/Schema.ts                           |   2 -
 js/src/io/adapters.ts                         |  10 +-
 js/src/io/file.ts                             |   7 +-
 js/src/io/interfaces.ts                       |  13 +-
 js/src/io/node/reader.ts                      |   4 +-
 js/src/io/node/writer.ts                      |   2 +-
 js/src/io/stream.ts                           |  10 +-
 js/src/io/whatwg/builder.ts                   |   4 +-
 js/src/io/whatwg/writer.ts                    |   2 +-
 js/src/ipc/metadata/file.ts                   |   8 +-
 js/src/ipc/metadata/json.ts                   |   3 +-
 js/src/ipc/metadata/message.ts                |   7 +-
 js/src/ipc/reader.ts                          |  14 +-
 js/src/ipc/writer.ts                          |   9 +-
 js/src/recordbatch.ts                         |   2 +-
 js/src/schema.ts                              |   4 +-
 js/src/table.ts                               |   4 +-
 js/src/type.ts                                |  81 ++-
 js/src/util/bn.ts                             |   4 +-
 js/src/util/compat.ts                         |   1 +
 js/src/util/recordbatch.ts                    |   2 +-
 js/src/vector/chunked.ts                      |   2 +-
 js/src/vector/index.ts                        |   4 +-
 js/src/vector/int.ts                          |   6 +-
 js/src/vector/map.ts                          |   8 +-
 js/src/vector/row.ts                          |   6 +-
 js/src/vector/struct.ts                       |   3 +-
 js/src/visitor.ts                             |  12 +-
 js/src/visitor/jsontypeassembler.ts           |   4 +-
 js/src/visitor/jsonvectorassembler.ts         |  16 +-
 js/src/visitor/set.ts                         |   3 +-
 js/src/visitor/vectorloader.ts                |   4 +-
 js/test/.eslintrc.js                          |  31 ++
 js/test/Arrow.ts                              |   2 -
 js/test/generate-test-data.ts                 |  11 +-
 js/test/inference/column.ts                   |   2 +
 js/test/inference/nested.ts                   |   4 +-
 js/test/jest-extensions.ts                    |   4 +-
 js/test/unit/builders/date-tests.ts           |   4 +-
 js/test/unit/builders/utils.ts                |   9 +-
 js/test/unit/generated-data-validators.ts     |   6 +-
 js/test/unit/ipc/helpers.ts                   |   3 -
 .../unit/ipc/reader/from-inference-tests.ts   |   2 +-
 js/test/unit/ipc/reader/json-reader-tests.ts  |   1 -
 js/test/unit/ipc/reader/streams-dom-tests.ts  |   2 -
 js/test/unit/ipc/reader/streams-node-tests.ts |   3 -
 js/test/unit/ipc/writer/json-writer-tests.ts  |   1 -
 .../unit/ipc/writer/stream-writer-tests.ts    |   2 +-
 js/test/unit/ipc/writer/streams-dom-tests.ts  |   5 +-
 js/test/unit/ipc/writer/streams-node-tests.ts |   5 +-
 .../unit/recordbatch/record-batch-tests.ts    |  24 +-
 js/test/unit/table-tests.ts                   |  50 +-
 js/test/unit/table/assign-tests.ts            |   4 +
 js/test/unit/table/serialize-tests.ts         |  20 +-
 js/test/unit/utils.ts                         |   2 +-
 js/test/unit/vector/numeric-vector-tests.ts   |   2 +
 js/test/unit/visitor-tests.ts                 |   4 +-
 js/tsconfig.json                              |   3 +-
 js/tslint.json                                |  39 --
 js/yarn.lock                                  | 467 ++++++++++++++++--
 79 files changed, 820 insertions(+), 393 deletions(-)
 create mode 100644 js/.eslintignore
 create mode 100644 js/.eslintrc.js
 create mode 100644 js/src/fb/.eslintrc.js
 create mode 100644 js/test/.eslintrc.js
 delete mode 100644 js/tslint.json

diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 716a1b98ccc..cee925ce79e 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -130,6 +130,7 @@ js/.npmignore
 js/closure-compiler-scripts/*
 js/src/fb/*.ts
 js/yarn.lock
+js/.eslintignore
 python/cmake_modules
 python/cmake_modules/FindPythonLibsNew.cmake
 python/cmake_modules/SnappyCMakeLists.txt
diff --git a/js/.eslintignore b/js/.eslintignore
new file mode 100644
index 00000000000..a9ba028ceea
--- /dev/null
+++ b/js/.eslintignore
@@ -0,0 +1 @@
+.eslintrc.js
diff --git a/js/.eslintrc.js b/js/.eslintrc.js
new file mode 100644
index 00000000000..7141f1b02d0
--- /dev/null
+++ b/js/.eslintrc.js
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    env: {
+        browser: true,
+        es6: true,
+        node: true,
+    },
+    parser: "@typescript-eslint/parser",
+    parserOptions: {
+        project: "tsconfig.json",
+        sourceType: "module",
+        ecmaVersion: 2020,
+    },
+    plugins: ["@typescript-eslint", "jest"],
+    extends: [
+        "eslint:recommended",
+        "plugin:jest/recommended",
+        "plugin:jest/style",
+        "plugin:@typescript-eslint/recommended",
+    ],
+    rules: {
+        "@typescript-eslint/indent": "off",
+        "@typescript-eslint/member-delimiter-style": [
+            "error",
+            {
+                multiline: {
+                    delimiter: "semi",
+                    requireLast: true,
+                },
+                singleline: {
+                    delimiter: "semi",
+                    requireLast: false,
+                },
+            },
+        ],
+        "@typescript-eslint/no-namespace": ["error", { "allowDeclarations": true }],
+        "@typescript-eslint/no-empty-function": "off",
+        "@typescript-eslint/no-unused-expressions": "off",
+        "@typescript-eslint/no-use-before-define": "off",
+        "@typescript-eslint/no-require-imports": "error",
+        "@typescript-eslint/no-var-requires": "off",  // handled by rule above
+        "@typescript-eslint/quotes": [
+            "error",
+            "single",
+            {
+                avoidEscape: true,
+                allowTemplateLiterals: true
+            },
+        ],
+        "@typescript-eslint/semi": ["error", "always"],
+        "@typescript-eslint/type-annotation-spacing": "error",
+        "@typescript-eslint/explicit-module-boundary-types": "off",
+        "@typescript-eslint/no-explicit-any": "off",
+        "@typescript-eslint/no-misused-new": "off",
+        "@typescript-eslint/ban-ts-comment": "off",
+        "@typescript-eslint/no-non-null-assertion": "off",
+        "@typescript-eslint/no-unused-vars": "off",  // ts already takes care of this
+
+        "brace-style": "off",
+        "curly": ["error", "multi-line"],
+        "eol-last": "error",
+        "no-empty": "off",
+        "no-multiple-empty-lines": "error",
+        "no-trailing-spaces": "error",
+        "no-var": "error",
+
+        "no-cond-assign": "off",
+
+        // rules for later:
+
+        "prefer-const": ["off"],
+        // "prefer-const": ["error", {
+        //     "destructuring": "all"
+        // }],
+
+        // "one-var": ["error", "never"],
+
+        // "brace-style": ["error", "1tbs", { "allowSingleLine": true }],
+    },
+};
diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index 566c473c30f..952a5f25155 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -102,8 +102,6 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm
     sed -i '+s+./flatbuffers+flatbuffers+ig' *_generated.ts
     # Fix the Union createTypeIdsVector typings
     sed -i -r '+s+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Uint8Array+static createTypeIdsVector\(builder: flatbuffers.Builder, data: number\[\] \| Int32Array+ig' Schema_generated.ts
-    # Add `/* tslint:disable:class-name */` to the top of `Schema.ts`
-    echo -e '/* tslint:disable:class-name */\n' | cat - Schema_generated.ts > Schema1.ts && mv Schema1.ts Schema_generated.ts
     # Remove "_generated" suffix from TS files
     mv File{_generated,}.ts && mv Schema{_generated,}.ts && mv Message{_generated,}.ts
     ```
diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index bbb48cebcd3..f525cd25cd0 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -148,7 +148,7 @@ function externBody({ exportName, staticNames, instanceNames }) {
 function externsHeader() {
     return (`${apacheHeader()}
 // @ts-nocheck
-/* tslint:disable */
+/* eslint-disable */
 /**
  * @fileoverview Closure Compiler externs for Arrow
  * @externs
@@ -209,5 +209,5 @@ function apacheHeader() {
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.`
+// under the License.`;
 }
diff --git a/js/jest.coverage.config.js b/js/jest.coverage.config.js
index 72ddd3c9345..ac98794f800 100644
--- a/js/jest.coverage.config.js
+++ b/js/jest.coverage.config.js
@@ -17,14 +17,14 @@
 
 module.exports = {
     ...require('./jest.config'),
-    "reporters": undefined,
-    "coverageReporters": [
-        "lcov", "json"
+    reporters: undefined,
+    coverageReporters: [
+        lcov, 'json'
     ],
-    "globals": {
-        "ts-jest": {
-            "diagnostics": false,
-            "tsConfig": "test/tsconfig.coverage.json"
+    globals: {
+        'ts-jest': {
+            diagnostics: false,
+            tsConfig: 'test/tsconfig.coverage.json'
         }
     }
 };
diff --git a/js/package.json b/js/package.json
index 0e41e2e007d..84d5ba195d8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -20,12 +20,8 @@
     "create:testdata": "gulp create:testdata",
     "test:coverage": "gulp test -t src --coverage",
     "doc": "del-cli ./doc && typedoc --options typedoc.js",
-    "lint": "run-p lint:src lint:test",
-    "lint:ci": "run-p lint:src:ci lint:test:ci",
-    "lint:src": "tslint --fix --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
-    "lint:test": "tslint --fix --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"",
-    "lint:src:ci": "tslint --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
-    "lint:test:ci": "tslint --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"",
+    "lint": "eslint src test --fix",
+    "lint:ci": "eslint src test",
     "prepublishOnly": "echo \"Error: do 'yarn release' instead of 'npm publish'\" && exit 1",
     "version": "yarn && yarn clean:all"
   },
@@ -71,11 +67,15 @@
   "devDependencies": {
     "@types/glob": "7.1.1",
     "@types/jest": "25.2.2",
+    "@typescript-eslint/eslint-plugin": "^4.21.0",
+    "@typescript-eslint/parser": "^4.21.0",
     "async-done": "1.3.1",
     "benchmark": "2.1.4",
     "coveralls": "3.0.3",
     "cpy": "^8.1.2",
     "del-cli": "3.0.1",
+    "eslint": "^7.24.0",
+    "eslint-plugin-jest": "^24.3.5",
     "esm": "3.2.25",
     "glob": "7.1.4",
     "google-closure-compiler": "20200830.0.0",
@@ -99,7 +99,6 @@
     "terser-webpack-plugin": "4.2.2",
     "ts-jest": "26.3.0",
     "ts-node": "9.0.0",
-    "tslint": "6.1.3",
     "typedoc": "0.20.19",
     "typescript": "4.0.2",
     "web-stream-tools": "0.0.1",
diff --git a/js/perf/index.js b/js/perf/index.js
index e332af208a4..7535c9fe729 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -62,12 +62,12 @@ console.log('Running apache-arrow performance tests...\n');
 run();
 
 function run() {
-    var suite = suites.shift();
+    const suite = suites.shift();
     suite && suite.on('complete', function() {
         console.log(suite.name + ':\n' + this.map(function(x) {
-            var str = x.toString();
-            var meanMsPerOp = Math.round(x.stats.mean * 100000)/100;
-            var sliceOf60FPS = Math.round((meanMsPerOp / (1000/60)) * 100000)/1000;
+            const str = x.toString();
+            const meanMsPerOp = Math.round(x.stats.mean * 100000)/100;
+            const sliceOf60FPS = Math.round((meanMsPerOp / (1000/60)) * 100000)/1000;
             return `${str}\n   avg: ${meanMsPerOp}ms\n   ${sliceOf60FPS}% of a frame @ 60FPS ${x.suffix || ''}`;
         }).join('\n') + '\n');
         if (suites.length > 0) {
@@ -149,7 +149,7 @@ function createTableIterateTest(table) {
 }
 
 function createDataFrameDirectCountTest(table, column, test, value) {
-    let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column);
+    let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column), op;
 
     if (test == 'gt') {
         op = () => {
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
index 190908bc328..2946b5ab26a 100644
--- a/js/perf/table_config.js
+++ b/js/perf/table_config.js
@@ -22,11 +22,11 @@ const glob = require('glob');
 const config = [];
 const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
 
-countBys = {
-    "tracks": ['origin', 'destination']
+const countBys = {
+    tracks: ['origin', 'destination']
 }
-counts = {
-    "tracks": [
+const counts = {
+    tracks: [
         {col: 'lat',    test: 'gt', value: 0        },
         {col: 'lng',    test: 'gt', value: 0        },
         {col: 'origin', test: 'eq', value: 'Seattle'},
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index b774844b65e..dd7236eb923 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -17,13 +17,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable */
-
 import * as fs from 'fs';
 import * as stream from 'stream';
 import { valueToString } from '../util/pretty';
 import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node';
 
+/* eslint-disable @typescript-eslint/no-require-imports */
+
 const padLeft = require('pad-left');
 const bignumJSONParse = require('json-bignum').parse;
 const argv = require(`command-line-args`)(cliOpts(), { partial: true });
@@ -78,7 +78,7 @@ function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts
         source.on('end', onEnd).pipe(sink, opts).on('error', onErr);
 
         function onEnd() { done(undefined, resolve); }
-        function onErr(err:any) { done(err, reject); }
+        function onErr(err: any) { done(err, reject); }
         function done(e: any, cb: (e?: any) => void) {
             source.removeListener('end', onEnd);
             sink.removeListener('error', onErr);
@@ -199,7 +199,7 @@ function formatMetadata(metadata: Map<string, string>) {
         `  ${key}: ${formatMetadataValue(val)}`
     ).join(',  \n');
 
-    function formatMetadataValue(value: string = '') {
+    function formatMetadataValue(value = '') {
         let parsed = value;
         try {
             parsed = JSON.stringify(JSON.parse(value), null, 2);
@@ -256,7 +256,7 @@ const typedArrayElementWidths = (() => {
         [Float32Array, maxElementWidth(Float32Array)],
         [Float64Array, maxElementWidth(Float64Array)],
         [Uint8ClampedArray, maxElementWidth(Uint8ClampedArray)]
-    ])
+    ]);
 })();
 
 function cliOpts() {
diff --git a/js/src/builder.ts b/js/src/builder.ts
index 6065711dd79..5b7da80a7a8 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -35,7 +35,7 @@ import {
 export interface BuilderOptions<T extends DataType = any, TNull = any> {
     type: T;
     nullValues?: TNull[] | ReadonlyArray<TNull> | null;
-    children?: { [key: string]: BuilderOptions; } | BuilderOptions[];
+    children?: { [key: string]: BuilderOptions } | BuilderOptions[];
 }
 
 /**
@@ -279,25 +279,20 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
         return this.children.reduce((size, child) => size + child.reservedByteLength, size);
     }
 
-    // @ts-ignore
-    protected _offsets: DataBufferBuilder<Int32Array>;
+    protected _offsets!: DataBufferBuilder<Int32Array>;
     public get valueOffsets() { return this._offsets ? this._offsets.buffer : null; }
 
-    // @ts-ignore
-    protected _values: BufferBuilder<T['TArray'], any>;
+    protected _values!: BufferBuilder<T['TArray'], any>;
     public get values() { return this._values ? this._values.buffer : null; }
 
     protected _nulls: BitmapBufferBuilder;
     public get nullBitmap() { return this._nulls ? this._nulls.buffer : null; }
 
-    // @ts-ignore
-    protected _typeIds: DataBufferBuilder<Int8Array>;
+    protected _typeIds!: DataBufferBuilder<Int8Array>;
     public get typeIds() { return this._typeIds ? this._typeIds.buffer : null; }
 
-    // @ts-ignore
-    protected _isValid: (value: T['TValue'] | TNull) => boolean;
-    // @ts-ignore
-    protected _setValue: (inst: Builder<T>, index: number, value: T['TValue']) => void;
+    protected _isValid!: (value: T['TValue'] | TNull) => boolean;
+    protected _setValue!: (inst: Builder<T>, index: number, value: T['TValue']) => void;
 
     /**
      * Appends a value (or null) to this `Builder`.
@@ -310,7 +305,6 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
      * Validates whether a value is valid (true), or null (false)
      * @param {T['TValue'] | TNull } value The value to compare against null the value representations
      */
-    // @ts-ignore
     public isValid(value: T['TValue'] | TNull): boolean { return this._isValid(value); }
 
     /**
@@ -336,7 +330,6 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
      * @param {number} index
      * @param {T['TValue'] | TNull } value
      */
-    // @ts-ignore
     public setValue(index: number, value: T['TValue']) { this._setValue(this, index, value); }
     public setValid(index: number, valid: boolean) {
         this.length = this._nulls.set(index, +valid).length;
@@ -442,7 +435,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
 
 /** @ignore */
 export abstract class VariableWidthBuilder<T extends Binary | Utf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
-    protected _pendingLength: number = 0;
+    protected _pendingLength = 0;
     protected _offsets: OffsetsBufferBuilder;
     protected _pending: Map<number, any> | undefined;
     constructor(opts: BuilderOptions<T, TNull>) {
diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts
index 7aa336a2a80..3c20cc001b3 100644
--- a/js/src/builder/buffer.ts
+++ b/js/src/builder/buffer.ts
@@ -157,10 +157,8 @@ export class OffsetsBufferBuilder extends DataBufferBuilder<Int32Array> {
 
 /** @ignore */
 export class WideBufferBuilder<T extends TypedArray, R extends BigIntArray> extends BufferBuilder<T, DataValue<T>> {
-    // @ts-ignore
-    public buffer64: R;
-    // @ts-ignore
-    protected _ArrayType64: BigIntArrayConstructor<R>;
+    public buffer64!: R;
+    protected _ArrayType64!: BigIntArrayConstructor<R>;
     public get ArrayType64() {
         return this._ArrayType64 || (this._ArrayType64 = <BigIntArrayConstructor<R>> (this.buffer instanceof Int32Array ? BigInt64Array : BigUint64Array));
     }
diff --git a/js/src/builder/map.ts b/js/src/builder/map.ts
index 806fbc00da9..25affef2c77 100644
--- a/js/src/builder/map.ts
+++ b/js/src/builder/map.ts
@@ -40,7 +40,7 @@ export class MapBuilder<K extends DataType = any, V extends DataType = any, TNul
         pending.set(index, value);
     }
 
-    public addChild(child: Builder<Struct<{ key: K, value: V }>>, name = `${this.numChildren}`) {
+    public addChild(child: Builder<Struct<{ key: K; value: V }>>, name = `${this.numChildren}`) {
         if (this.numChildren > 0) {
             throw new Error('ListBuilder can only have one child.');
         }
diff --git a/js/src/builder/run.ts b/js/src/builder/run.ts
index c4ab84f2a94..5239f51f293 100644
--- a/js/src/builder/run.ts
+++ b/js/src/builder/run.ts
@@ -20,8 +20,7 @@ import { DataType } from '../type';
 
 /** @ignore */
 export class Run<T extends DataType = any, TNull = any> {
-    // @ts-ignore
-    protected _values: ArrayLike<T['TValue'] | TNull>;
+    protected _values!: ArrayLike<T['TValue'] | TNull>;
     public get length() { return this._values.length; }
     public get(index: number) { return this._values[index]; }
     public clear() { this._values = <any> null; return this; }
diff --git a/js/src/builder/union.ts b/js/src/builder/union.ts
index af75702b7fc..18ac05bf69e 100644
--- a/js/src/builder/union.ts
+++ b/js/src/builder/union.ts
@@ -53,13 +53,11 @@ export abstract class UnionBuilder<T extends Union, TNull = any> extends Builder
         return this;
     }
 
-    // @ts-ignore
     public setValue(index: number, value: T['TValue'], childTypeId?: number) {
         this._typeIds.set(index, childTypeId!);
         super.setValue(index, value);
     }
 
-    // @ts-ignore
     public addChild(child: Builder, name = `${this.children.length}`) {
         const childTypeId = this.children.push(child);
         const { type: { children, mode, typeIds } } = this;
diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts
index af9c8c7a38b..4120e386159 100644
--- a/js/src/compute/dataframe.ts
+++ b/js/src/compute/dataframe.ts
@@ -95,16 +95,16 @@ export class DataFrame<T extends { [key: string]: DataType } = any> extends Tabl
 }
 
 /** @ignore */
-export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T,  counts: TCount }> {
+export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T;  counts: TCount }> {
     constructor(values: Vector<T>, counts: V<TCount>) {
-        type R = { values: T, counts: TCount };
+        type R = { values: T; counts: TCount };
         const schema = new Schema<R>([
             new Field('values', values.type),
             new Field('counts', counts.type)
         ]);
         super(new RecordBatch<R>(schema, counts.length, [values, counts]));
     }
-    public toJSON(): Object {
+    public toJSON(): Record<string, unknown> {
         const values = this.getColumnAt(0)!;
         const counts = this.getColumnAt(1)!;
         const result = {} as { [k: string]: number | null };
diff --git a/js/src/compute/predicate.ts b/js/src/compute/predicate.ts
index fe0cd8e3f53..52030763dc3 100644
--- a/js/src/compute/predicate.ts
+++ b/js/src/compute/predicate.ts
@@ -56,10 +56,8 @@ export class Literal<T= any> extends Value<T> {
 
 /** @ignore */
 export class Col<T= any> extends Value<T> {
-    // @ts-ignore
-    public vector: Vector;
-    // @ts-ignore
-    public colidx: number;
+    public vector!: Vector;
+    public colidx!: number;
 
     constructor(public name: string) { super(); }
     bind(batch: RecordBatch): (idx: number, batch?: RecordBatch) => any {
diff --git a/js/src/data.ts b/js/src/data.ts
index 47f644c0a4e..097a39012c9 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -70,14 +70,10 @@ export class Data<T extends DataType = DataType> {
      */
     public dictionary?: Vector;
 
-    // @ts-ignore
-    public readonly values: Buffers<T>[BufferType.DATA];
-    // @ts-ignore
-    public readonly typeIds: Buffers<T>[BufferType.TYPE];
-    // @ts-ignore
-    public readonly nullBitmap: Buffers<T>[BufferType.VALIDITY];
-    // @ts-ignore
-    public readonly valueOffsets: Buffers<T>[BufferType.OFFSET];
+    public readonly values!: Buffers<T>[BufferType.DATA];
+    public readonly typeIds!: Buffers<T>[BufferType.TYPE];
+    public readonly nullBitmap!: Buffers<T>[BufferType.VALIDITY];
+    public readonly valueOffsets!: Buffers<T>[BufferType.OFFSET];
 
     public get typeId(): T['TType'] { return this.type.typeId; }
     public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
diff --git a/js/src/fb/.eslintrc.js b/js/src/fb/.eslintrc.js
new file mode 100644
index 00000000000..d448540e4af
--- /dev/null
+++ b/js/src/fb/.eslintrc.js
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    rules: {
+        "@typescript-eslint/no-require-imports": "off",
+        "@typescript-eslint/no-inferrable-types": "off"
+    },
+};
\ No newline at end of file
diff --git a/js/src/fb/Schema.ts b/js/src/fb/Schema.ts
index f2c7fb10dd3..64014c9e356 100644
--- a/js/src/fb/Schema.ts
+++ b/js/src/fb/Schema.ts
@@ -1,5 +1,3 @@
-/* tslint:disable:class-name */
-
 // automatically generated by the FlatBuffers compiler, do not modify
 
 import { flatbuffers } from 'flatbuffers';
diff --git a/js/src/io/adapters.ts b/js/src/io/adapters.ts
index d14dbe7982f..df0c632ba81 100644
--- a/js/src/io/adapters.ts
+++ b/js/src/io/adapters.ts
@@ -25,9 +25,9 @@ import {
 
 import { ReadableDOMStreamOptions } from './interfaces';
 
-interface ReadableStreamReadResult<T> { done: boolean; value: T; }
-type Uint8ArrayGenerator = Generator<Uint8Array, null, { cmd: 'peek' | 'read', size: number }>;
-type AsyncUint8ArrayGenerator = AsyncGenerator<Uint8Array, null, { cmd: 'peek' | 'read', size: number }>;
+interface ReadableStreamReadResult<T> { done: boolean; value: T }
+type Uint8ArrayGenerator = Generator<Uint8Array, null, { cmd: 'peek' | 'read'; size: number }>;
+type AsyncUint8ArrayGenerator = AsyncGenerator<Uint8Array, null, { cmd: 'peek' | 'read'; size: number }>;
 
 /** @ignore */
 export default {
@@ -212,7 +212,7 @@ class AdaptiveByteReader<T extends ArrayBufferViewInput> {
         try {
             this.supportsBYOB = !!(this.reader = this.getBYOBReader());
         } catch (e) {
-            this.supportsBYOB = !!!(this.reader = this.getDefaultReader());
+            this.supportsBYOB = !(this.reader = this.getDefaultReader());
         }
     }
 
@@ -379,7 +379,7 @@ async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGe
 
     function cleanup<T extends Error | null | void>(events: Event[], err?: T) {
         buffer = buffers = <any> null;
-        return new Promise<T>(async (resolve, reject) => {
+        return new Promise<T>((resolve, reject) => {
             for (const [evt, fn] of events) {
                 stream['off'](evt, fn);
             }
diff --git a/js/src/io/file.ts b/js/src/io/file.ts
index 59f4094be57..1c0661c105b 100644
--- a/js/src/io/file.ts
+++ b/js/src/io/file.ts
@@ -22,7 +22,7 @@ import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
 /** @ignore */
 export class RandomAccessFile extends ByteStream {
     public size: number;
-    public position: number = 0;
+    public position = 0;
     protected buffer: Uint8Array | null;
     constructor(buffer: ArrayBufferViewInput, byteLength?: number) {
         super();
@@ -59,9 +59,8 @@ export class RandomAccessFile extends ByteStream {
 
 /** @ignore */
 export class AsyncRandomAccessFile extends AsyncByteStream {
-    // @ts-ignore
-    public size: number;
-    public position: number = 0;
+    public size!: number;
+    public position = 0;
     public _pending?: Promise<void>;
     protected _handle: FileHandle | null;
     constructor(file: FileHandle, byteLength?: number) {
diff --git a/js/src/io/interfaces.ts b/js/src/io/interfaces.ts
index e057c2d6d57..febd0ea997e 100644
--- a/js/src/io/interfaces.ts
+++ b/js/src/io/interfaces.ts
@@ -23,13 +23,12 @@ export const ITERATOR_DONE: any = Object.freeze({ done: true, value: void (0) })
 /** @ignore */
 export type FileHandle = import('fs').promises.FileHandle;
 /** @ignore */
-export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[]; };
+export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[] };
 /** @ignore */
-export type ReadableDOMStreamOptions = { type: 'bytes' | undefined, autoAllocateChunkSize?: number, highWaterMark?: number };
+export type ReadableDOMStreamOptions = { type: 'bytes' | undefined; autoAllocateChunkSize?: number; highWaterMark?: number };
 
 /** @ignore */
 export class ArrowJSON {
-    // @ts-ignore
     constructor(private _json: ArrowJSONLike) {}
     public get schema(): any { return this._json['schema']; }
     public get batches(): any[] { return (this._json['batches'] || []) as any[]; }
@@ -73,11 +72,11 @@ export abstract class ReadableInterop<T> {
     public tee(): [ReadableStream<T>, ReadableStream<T>] {
         return this._getDOMStream().tee();
     }
-    public pipe<R extends NodeJS.WritableStream>(writable: R, options?: { end?: boolean; }) {
+    public pipe<R extends NodeJS.WritableStream>(writable: R, options?: { end?: boolean }) {
         return this._getNodeStream().pipe(writable, options);
     }
     public pipeTo(writable: WritableStream<T>, options?: PipeOptions) { return this._getDOMStream().pipeTo(writable, options); }
-    public pipeThrough<R extends ReadableStream<any>>(duplex: { writable: WritableStream<T>, readable: R }, options?: PipeOptions) {
+    public pipeThrough<R extends ReadableStream<any>>(duplex: { writable: WritableStream<T>; readable: R }, options?: PipeOptions) {
         return this._getDOMStream().pipeThrough(duplex, options);
     }
 
@@ -93,14 +92,14 @@ export abstract class ReadableInterop<T> {
 }
 
 /** @ignore */
-type Resolution<T> = { resolve: (value?: T | PromiseLike<T>) => void; reject: (reason?: any) => void; };
+type Resolution<T> = { resolve: (value?: T | PromiseLike<T>) => void; reject: (reason?: any) => void };
 
 /** @ignore */
 export class AsyncQueue<TReadable = Uint8Array, TWritable = TReadable> extends ReadableInterop<TReadable>
     implements AsyncIterableIterator<TReadable>, ReadableWritable<TReadable, TWritable> {
 
     protected _values: TWritable[] = [];
-    protected _error?: { error: any; };
+    protected _error?: { error: any };
     protected _closedPromise: Promise<void>;
     protected _closedPromiseResolve?: (value?: any) => void;
     protected resolvers: Resolution<IteratorResult<TReadable>>[] = [];
diff --git a/js/src/io/node/reader.ts b/js/src/io/node/reader.ts
index 7705b4634ad..498fe6a7f68 100644
--- a/js/src/io/node/reader.ts
+++ b/js/src/io/node/reader.ts
@@ -31,8 +31,8 @@ type CB = (error?: Error | null | undefined) => void;
 
 /** @ignore */
 class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
-    private _pulling: boolean = false;
-    private _autoDestroy: boolean = true;
+    private _pulling = false;
+    private _autoDestroy = true;
     private _reader: RecordBatchReader | null;
     private _asyncQueue: AsyncByteQueue | null;
     constructor(options?: DuplexOptions & { autoDestroy: boolean }) {
diff --git a/js/src/io/node/writer.ts b/js/src/io/node/writer.ts
index c5fc80926d8..b7e02782880 100644
--- a/js/src/io/node/writer.ts
+++ b/js/src/io/node/writer.ts
@@ -30,7 +30,7 @@ type CB = (error?: Error | null | undefined) => void;
 
 /** @ignore */
 class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
-    private _pulling: boolean = false;
+    private _pulling = false;
     private _reader: AsyncByteStream | null;
     private _writer: RecordBatchWriter | null;
     constructor(writer: RecordBatchWriter<T>, options?: DuplexOptions) {
diff --git a/js/src/io/stream.ts b/js/src/io/stream.ts
index c714925100c..e25f27895c6 100644
--- a/js/src/io/stream.ts
+++ b/js/src/io/stream.ts
@@ -61,8 +61,7 @@ export class AsyncByteQueue<T extends ArrayBufferViewInput = Uint8Array> extends
 
 /** @ignore */
 export class ByteStream implements IterableIterator<Uint8Array> {
-    // @ts-ignore
-    private source: ByteStreamSource<Uint8Array>;
+    private source!: ByteStreamSource<Uint8Array>;
     constructor(source?: Iterable<ArrayBufferViewInput> | ArrayBufferViewInput) {
         if (source) {
             this.source = new ByteStreamSource(streamAdapters.fromIterable(source));
@@ -78,8 +77,7 @@ export class ByteStream implements IterableIterator<Uint8Array> {
 
 /** @ignore */
 export class AsyncByteStream implements Readable<Uint8Array>, AsyncIterableIterator<Uint8Array> {
-    // @ts-ignore
-    private source: AsyncByteStreamSource<Uint8Array>;
+    private source!: AsyncByteStreamSource<Uint8Array>;
     constructor(source?: PromiseLike<ArrayBufferViewInput> | Response | ReadableStream<ArrayBufferViewInput> | NodeJS.ReadableStream | AsyncIterable<ArrayBufferViewInput> | Iterable<ArrayBufferViewInput>) {
         if (source instanceof AsyncByteStream) {
             this.source = (source as AsyncByteStream).source;
@@ -110,9 +108,9 @@ export class AsyncByteStream implements Readable<Uint8Array>, AsyncIterableItera
 }
 
 /** @ignore */
-type ByteStreamSourceIterator<T> = Generator<T, null, { cmd: 'peek' | 'read', size?: number | null }>;
+type ByteStreamSourceIterator<T> = Generator<T, null, { cmd: 'peek' | 'read'; size?: number | null }>;
 /** @ignore */
-type AsyncByteStreamSourceIterator<T> = AsyncGenerator<T, null, { cmd: 'peek' | 'read', size?: number | null }>;
+type AsyncByteStreamSourceIterator<T> = AsyncGenerator<T, null, { cmd: 'peek' | 'read'; size?: number | null }>;
 
 /** @ignore */
 class ByteStreamSource<T> {
diff --git a/js/src/io/whatwg/builder.ts b/js/src/io/whatwg/builder.ts
index 15a4333c5c9..c65511844b9 100644
--- a/js/src/io/whatwg/builder.ts
+++ b/js/src/io/whatwg/builder.ts
@@ -24,8 +24,8 @@ import { Builder, BuilderOptions } from '../../builder/index';
 export interface BuilderTransformOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> {
     queueingStrategy?: 'bytes' | 'count';
     dictionaryHashFunction?: (value: any) => string | number;
-    readableStrategy?: { highWaterMark?: number, size?: any, type?: 'bytes'; };
-    writableStrategy?: { highWaterMark?: number, size?: any, type?: 'bytes'; };
+    readableStrategy?: { highWaterMark?: number; size?: any; type?: 'bytes' };
+    writableStrategy?: { highWaterMark?: number; size?: any; type?: 'bytes' };
     valueToChildTypeId?: (builder: Builder<T, TNull>, value: any, offset: number) => number;
 }
 
diff --git a/js/src/io/whatwg/writer.ts b/js/src/io/whatwg/writer.ts
index de3b3f1d247..49789bdd33a 100644
--- a/js/src/io/whatwg/writer.ts
+++ b/js/src/io/whatwg/writer.ts
@@ -24,7 +24,7 @@ import { RecordBatchWriter } from '../../ipc/writer';
 export function recordBatchWriterThroughDOMStream<T extends { [key: string]: DataType } = any>(
     this: typeof RecordBatchWriter,
     writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
-    readableStrategy?: { highWaterMark?: number, size?: any }
+    readableStrategy?: { highWaterMark?: number; size?: any }
 ) {
 
     const writer = new this<T>(writableStrategy);
diff --git a/js/src/ipc/metadata/file.ts b/js/src/ipc/metadata/file.ts
index 10bb342f67a..5a1be844e15 100644
--- a/js/src/ipc/metadata/file.ts
+++ b/js/src/ipc/metadata/file.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable:class-name */
+/* eslint-disable @typescript-eslint/naming-convention */
 
 import {
     Block as _Block,
@@ -69,10 +69,8 @@ class Footer_ {
         return b.asUint8Array();
     }
 
-    // @ts-ignore
-    protected _recordBatches: FileBlock[];
-    // @ts-ignore
-    protected _dictionaryBatches: FileBlock[];
+    protected _recordBatches!: FileBlock[];
+    protected _dictionaryBatches!: FileBlock[];
     public get numRecordBatches() { return this._recordBatches.length; }
     public get numDictionaries() { return this._dictionaryBatches.length; }
 
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index 983fa7ed5ea..b8d7d35c40e 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -107,7 +107,6 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>)
         type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries));
         field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
     }
-    // tslint:disable
     // If dictionary encoded and the first time we've seen this dictionary id, decode
     // the data type and child fields, then wrap in a Dictionary type and insert the
     // data type into the dictionary types map.
@@ -130,7 +129,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>)
 }
 
 /** @ignore */
-function customMetadataFromJSON(_metadata?: object) {
+function customMetadataFromJSON(_metadata?: Record<string, string>) {
     return new Map<string, string>(Object.entries(_metadata || {}));
 }
 
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index 484d68afbf7..e6117716107 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -121,7 +121,6 @@ export class Message<T extends MessageHeader = any> {
         throw new Error(`Unrecognized Message header: ${header}`);
     }
 
-    // @ts-ignore
     public body: Uint8Array;
     protected _headerType: T;
     protected _bodyLength: number;
@@ -130,8 +129,7 @@ export class Message<T extends MessageHeader = any> {
     public get version() { return this._version; }
     public get headerType() { return this._headerType; }
     public get bodyLength() { return this._bodyLength; }
-    // @ts-ignore
-    protected _createHeader: MessageHeaderDecoder;
+    protected _createHeader!: MessageHeaderDecoder;
     public header() { return this._createHeader<T>(); }
     public isSchema(): this is Message<MessageHeader.Schema> { return this.headerType === MessageHeader.Schema; }
     public isRecordBatch(): this is Message<MessageHeader.RecordBatch> { return this.headerType === MessageHeader.RecordBatch; }
@@ -180,7 +178,7 @@ export class DictionaryBatch {
     public get nodes(): FieldNode[] { return this.data.nodes; }
     public get buffers(): BufferRegion[] { return this.data.buffers; }
 
-    constructor(data: RecordBatch, id: Long | number, isDelta: boolean = false) {
+    constructor(data: RecordBatch, id: Long | number, isDelta = false) {
         this._data = data;
         this._isDelta = isDelta;
         this._id = typeof id === 'number' ? id : id.low;
@@ -384,7 +382,6 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>) {
         type = decodeFieldType(f, decodeFieldChildren(f, dictionaries));
         field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f));
     }
-    // tslint:disable
     // If dictionary encoded and the first time we've seen this dictionary id, decode
     // the data type and child fields, then wrap in a Dictionary type and insert the
     // data type into the dictionary types map.
diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts
index e44361a7b9b..6b1abb0b22d 100644
--- a/js/src/ipc/reader.ts
+++ b/js/src/ipc/reader.ts
@@ -54,7 +54,7 @@ import {
 /** @ignore */ export type FromArg5 = FileHandle | PromiseLike<FileHandle> | PromiseLike<FromArg4>;
 /** @ignore */ export type FromArgs = FromArg0 | FromArg1 | FromArg2 | FromArg3 | FromArg4 | FromArg5;
 
-/** @ignore */ type OpenOptions = { autoDestroy?: boolean; };
+/** @ignore */ type OpenOptions = { autoDestroy?: boolean };
 /** @ignore */ type RecordBatchReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | RecordBatchStreamReader<T>;
 /** @ignore */ type AsyncRecordBatchReaders<T extends { [key: string]: DataType } = any> = AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>;
 /** @ignore */ type RecordBatchFileReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | AsyncRecordBatchFileReader<T>;
@@ -137,7 +137,7 @@ export class RecordBatchReader<T extends { [key: string]: DataType } = any> exte
         writableStrategy?: ByteLengthQueuingStrategy,
         // @ts-ignore
         readableStrategy?: { autoDestroy: boolean }
-    ): { writable: WritableStream<Uint8Array>, readable: ReadableStream<RecordBatch<T>> } {
+    ): { writable: WritableStream<Uint8Array>; readable: ReadableStream<RecordBatch<T>> } {
         throw new Error(`"throughDOM" not available in this environment`);
     }
 
@@ -318,8 +318,7 @@ interface AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } =
 /** @ignore */
 abstract class RecordBatchReaderImpl<T extends { [key: string]: DataType } = any> implements RecordBatchReaderImpl<T> {
 
-    // @ts-ignore
-    public schema: Schema;
+    public schema!: Schema<T>;
     public closed = false;
     public autoDestroy = true;
     public dictionaries: Map<number, Vector>;
@@ -520,10 +519,8 @@ class AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = a
 /** @ignore */
 class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
 
-    // @ts-ignore
     protected _footer?: Footer;
-    // @ts-ignore
-    protected _handle: RandomAccessFile;
+    protected _handle!: RandomAccessFile;
     public get footer() { return this._footer!; }
     public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
     public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
@@ -593,8 +590,7 @@ class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any
     implements AsyncRecordBatchFileReaderImpl<T> {
 
     protected _footer?: Footer;
-    // @ts-ignore
-    protected _handle: AsyncRandomAccessFile;
+    protected _handle!: AsyncRandomAccessFile;
     public get footer() { return this._footer!; }
     public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
     public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
index 1416b5c86d3..83703c15526 100644
--- a/js/src/ipc/writer.ts
+++ b/js/src/ipc/writer.ts
@@ -61,8 +61,8 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
         // @ts-ignore
         writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
         // @ts-ignore
-        readableStrategy?: { highWaterMark?: number, size?: any }
-    ): { writable: WritableStream<Table<T> | RecordBatch<T>>, readable: ReadableStream<Uint8Array> } {
+        readableStrategy?: { highWaterMark?: number; size?: any }
+    ): { writable: WritableStream<Table<T> | RecordBatch<T>>; readable: ReadableStream<Uint8Array> } {
         throw new Error(`"throughDOM" not available in this environment`);
     }
 
@@ -124,7 +124,6 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
         return this;
     }
     public reset(sink: WritableSink<ArrayBufferViewInput> = this._sink, schema: Schema<T> | null = null) {
-
         if ((sink === this._sink) || (sink instanceof AsyncByteQueue)) {
             this._sink = sink as AsyncByteQueue;
         } else {
@@ -160,7 +159,6 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
     }
 
     public write(payload?: Table<T> | RecordBatch<T> | Iterable<RecordBatch<T>> | null) {
-
         let schema: Schema<T> | null = null;
 
         if (!this._sink) {
@@ -192,7 +190,6 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
     }
 
     protected _writeMessage<T extends MessageHeader>(message: Message<T>, alignment = 8) {
-
         const a = alignment - 1;
         const buffer = Message.encode(message);
         const flatbufferSize = buffer.byteLength;
@@ -458,7 +455,7 @@ async function writeAllAsync<T extends { [key: string]: DataType } = any>(writer
 }
 
 /** @ignore */
-function fieldToJSON({ name, type, nullable }: Field): object {
+function fieldToJSON({ name, type, nullable }: Field): Record<string, unknown> {
     const assembler = new JSONTypeAssembler();
     return {
         'name': name, 'nullable': nullable,
diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts
index bde561dc84c..78b2f6671f1 100644
--- a/js/src/recordbatch.ts
+++ b/js/src/recordbatch.ts
@@ -117,7 +117,7 @@ export class RecordBatch<T extends { [key: string]: DataType } = any>
  * @ignore
  * @private
  */
-/* tslint:disable:class-name */
+/* eslint-disable @typescript-eslint/naming-convention */
 export class _InternalEmptyPlaceholderRecordBatch<T extends { [key: string]: DataType } = any> extends RecordBatch<T> {
     constructor(schema: Schema<T>) {
         super(schema, 0, schema.fields.map((f) => Data.new(f.type, 0, 0, 0)));
diff --git a/js/src/schema.ts b/js/src/schema.ts
index c0de4420ee6..437ffa228ec 100644
--- a/js/src/schema.ts
+++ b/js/src/schema.ts
@@ -76,7 +76,7 @@ export class Schema<T extends { [key: string]: DataType } = any> {
 
 export class Field<T extends DataType = any> {
 
-    public static new<T extends DataType = any>(props: { name: string | number, type: T, nullable?: boolean, metadata?: Map<string, string> | null }): Field<T>;
+    public static new<T extends DataType = any>(props: { name: string | number; type: T; nullable?: boolean; metadata?: Map<string, string> | null }): Field<T>;
     public static new<T extends DataType = any>(name: string | number | Field<T>, type: T, nullable?: boolean, metadata?: Map<string, string> | null): Field<T>;
     /** @nocollapse */
     public static new<T extends DataType = any>(...args: any[]) {
@@ -105,7 +105,7 @@ export class Field<T extends DataType = any> {
     public get typeId() { return this.type.typeId; }
     public get [Symbol.toStringTag]() { return 'Field'; }
     public toString() { return `${this.name}: ${this.type}`; }
-    public clone<R extends DataType = T>(props: { name?: string | number, type?: R, nullable?: boolean, metadata?: Map<string, string> | null }): Field<R>;
+    public clone<R extends DataType = T>(props: { name?: string | number; type?: R; nullable?: boolean; metadata?: Map<string, string> | null }): Field<R>;
     public clone<R extends DataType = T>(name?: string | number | Field<T>, type?: R, nullable?: boolean, metadata?: Map<string, string> | null): Field<R>;
     public clone<R extends DataType = T>(...args: any[]) {
         let [name, type, nullable, metadata] = args;
diff --git a/js/src/table.ts b/js/src/table.ts
index 5c41e14a9f5..8862fd652d5 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -57,9 +57,9 @@ export class Table<T extends { [key: string]: DataType } = any>
                Applicative<Struct<T>, Table<T>> {
 
     /** @nocollapse */
-    public static empty<T extends { [key: string]: DataType } = {}>(schema = new Schema<T>([])) { return new Table<T>(schema, []); }
+    public static empty<T extends { [key: string]: DataType } = Record<string, never>>(schema = new Schema<T>([])) { return new Table<T>(schema, []); }
 
-    public static from(): Table<{}>;
+    public static from(): Table<Record<string, never>>;
     public static from<T extends { [key: string]: DataType } = any>(source: RecordBatchReader<T>): Table<T>;
     public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg0): Table<T>;
     public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg2): Table<T>;
diff --git a/js/src/type.ts b/js/src/type.ts
index e09fb8a8c0d..3920cf2303d 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable:class-name */
+/* eslint-disable @typescript-eslint/naming-convention */
 
 import { Field } from './schema';
 import { flatbuffers } from 'flatbuffers';
@@ -39,8 +39,8 @@ export type IsSigned = { 'true': true; 'false': false };
 export type RowLike<T extends { [key: string]: DataType }> =
       ( Iterable<[string, T[keyof T]['TValue'] | null]> )
     & { [P in keyof T]: T[P]['TValue'] | null }
-    & { get<K extends keyof T>(key: K): T[K]['TValue'] | null; }
-    & { set<K extends keyof T>(key: K, val: T[K]['TValue'] | null): void; }
+    & { get<K extends keyof T>(key: K): T[K]['TValue'] | null }
+    & { set<K extends keyof T>(key: K, val: T[K]['TValue'] | null): void }
     ;
 
 /** @ignore */
@@ -63,7 +63,6 @@ export interface DataType<TType extends Type = Type, TChildren extends { [key: s
  */
 export abstract class DataType<TType extends Type = Type, TChildren extends { [key: string]: DataType } = any> {
 
-    // @ts-ignore
     public [Symbol.toStringTag]: string;
 
     /** @nocollapse */ static            isNull (x: any): x is Null            { return x && x.typeId === Type.Null;            }
@@ -95,7 +94,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends { [k
 }
 
 /** @ignore */
-export interface Null extends DataType<Type.Null> { TArray: void; TValue: null; }
+export interface Null extends DataType<Type.Null> { TArray: void; TValue: null }
 /** @ignore */
 export class Null extends DataType<Type.Null> {
     public toString() { return `Null`; }
@@ -109,19 +108,19 @@ export class Null extends DataType<Type.Null> {
 type Ints = Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64;
 /** @ignore */
 type IType = {
-    [Type.Int   ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray;    TValue: number | bigint | Int32Array | Uint32Array; };
-    [Type.Int8  ]: { bitWidth:           8; isSigned: true;         TArray: Int8Array;   TValue: number; };
-    [Type.Int16 ]: { bitWidth:          16; isSigned: true;         TArray: Int16Array;  TValue: number; };
-    [Type.Int32 ]: { bitWidth:          32; isSigned: true;         TArray: Int32Array;  TValue: number; };
-    [Type.Int64 ]: { bitWidth:          64; isSigned: true;         TArray: Int32Array;  TValue: bigint | Int32Array | Uint32Array; };
-    [Type.Uint8 ]: { bitWidth:           8; isSigned: false;        TArray: Uint8Array;  TValue: number; };
-    [Type.Uint16]: { bitWidth:          16; isSigned: false;        TArray: Uint16Array; TValue: number; };
-    [Type.Uint32]: { bitWidth:          32; isSigned: false;        TArray: Uint32Array; TValue: number; };
-    [Type.Uint64]: { bitWidth:          64; isSigned: false;        TArray: Uint32Array; TValue: bigint | Int32Array | Uint32Array; };
+    [Type.Int   ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray;    TValue: number | bigint | Int32Array | Uint32Array };
+    [Type.Int8  ]: { bitWidth:           8; isSigned: true;         TArray: Int8Array;   TValue: number };
+    [Type.Int16 ]: { bitWidth:          16; isSigned: true;         TArray: Int16Array;  TValue: number };
+    [Type.Int32 ]: { bitWidth:          32; isSigned: true;         TArray: Int32Array;  TValue: number };
+    [Type.Int64 ]: { bitWidth:          64; isSigned: true;         TArray: Int32Array;  TValue: bigint | Int32Array | Uint32Array };
+    [Type.Uint8 ]: { bitWidth:           8; isSigned: false;        TArray: Uint8Array;  TValue: number };
+    [Type.Uint16]: { bitWidth:          16; isSigned: false;        TArray: Uint16Array; TValue: number };
+    [Type.Uint32]: { bitWidth:          32; isSigned: false;        TArray: Uint32Array; TValue: number };
+    [Type.Uint64]: { bitWidth:          64; isSigned: false;        TArray: Uint32Array; TValue: bigint | Int32Array | Uint32Array };
 };
 
 /** @ignore */
-interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TArray']; TValue: IType[T]['TValue']; }
+interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TArray']; TValue: IType[T]['TValue'] }
 /** @ignore */
 class Int_<T extends Ints = Ints> extends DataType<T> {
     constructor(public readonly isSigned: IType[T]['isSigned'],
@@ -178,14 +177,14 @@ Object.defineProperty(Uint64.prototype, 'ArrayType', { value: Uint32Array });
 type Floats = Type.Float | Type.Float16 | Type.Float32 | Type.Float64;
 /** @ignore */
 type FType = {
-    [Type.Float  ]: { precision: Precision;        TArray: FloatArray;    TValue: number; };
-    [Type.Float16]: { precision: Precision.HALF;   TArray: Uint16Array;   TValue: number; };
-    [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array;  TValue: number; };
-    [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array;  TValue: number; };
+    [Type.Float  ]: { precision: Precision;        TArray: FloatArray;    TValue: number };
+    [Type.Float16]: { precision: Precision.HALF;   TArray: Uint16Array;   TValue: number };
+    [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array;  TValue: number };
+    [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array;  TValue: number };
 };
 
 /** @ignore */
-export interface Float<T extends Floats = Floats> extends DataType<T> { TArray: FType[T]['TArray']; TValue: number; }
+export interface Float<T extends Floats = Floats> extends DataType<T> { TArray: FType[T]['TArray']; TValue: number }
 /** @ignore */
 export class Float<T extends Floats = Floats> extends DataType<T> {
     constructor(public readonly precision: Precision) {
@@ -220,7 +219,7 @@ Object.defineProperty(Float32.prototype, 'ArrayType', { value: Float32Array });
 Object.defineProperty(Float64.prototype, 'ArrayType', { value: Float64Array });
 
 /** @ignore */
-export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class Binary extends DataType<Type.Binary> {
     constructor() {
@@ -235,7 +234,7 @@ export class Binary extends DataType<Type.Binary> {
 }
 
 /** @ignore */
-export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class Utf8 extends DataType<Type.Utf8> {
     constructor() {
@@ -250,7 +249,7 @@ export class Utf8 extends DataType<Type.Utf8> {
 }
 
 /** @ignore */
-export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class Bool extends DataType<Type.Bool> {
     constructor() {
@@ -265,7 +264,7 @@ export class Bool extends DataType<Type.Bool> {
 }
 
 /** @ignore */
-export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; ArrayType: TypedArrayConstructor<Uint32Array>; }
+export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; ArrayType: TypedArrayConstructor<Uint32Array> }
 /** @ignore */
 export class Decimal extends DataType<Type.Decimal> {
     constructor(public readonly scale: number,
@@ -285,7 +284,7 @@ export class Decimal extends DataType<Type.Decimal> {
 /** @ignore */
 export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
 /** @ignore */
-export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array>; }
+export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 export class Date_<T extends Dates = Dates> extends DataType<T> {
     constructor(public readonly unit: DateUnit) {
@@ -310,14 +309,14 @@ type Times = Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicro
 /** @ignore */
 type TimesType = {
     [Type.Time           ]: { unit: TimeUnit;             TValue: number | Int32Array };
-    [Type.TimeSecond     ]: { unit: TimeUnit.SECOND;      TValue: number;             };
-    [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number;             };
-    [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array;         };
-    [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND;  TValue: Int32Array;         };
+    [Type.TimeSecond     ]: { unit: TimeUnit.SECOND;      TValue: number             };
+    [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number             };
+    [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array         };
+    [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND;  TValue: Int32Array         };
 };
 
 /** @ignore */
-interface Time_<T extends Times = Times> extends DataType<T> { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: TypedArrayConstructor<Int32Array>; }
+interface Time_<T extends Times = Times> extends DataType<T> { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 class Time_<T extends Times = Times> extends DataType<T> {
     constructor(public readonly unit: TimesType[T]['unit'],
@@ -348,7 +347,7 @@ export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() {
 /** @ignore */
 type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
 /** @ignore */
-interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> { TArray: Int32Array; TValue: number; ArrayType: TypedArrayConstructor<Int32Array>; }
+interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> { TArray: Int32Array; TValue: number; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
     constructor(public readonly unit: TimeUnit,
@@ -379,7 +378,7 @@ export class TimestampNanosecond extends Timestamp_<Type.TimestampNanosecond> {
 /** @ignore */
 type Intervals = Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth;
 /** @ignore */
-interface Interval_<T extends Intervals = Intervals> extends DataType<T> { TArray: Int32Array; TValue: Int32Array; ArrayType: TypedArrayConstructor<Int32Array>; }
+interface Interval_<T extends Intervals = Intervals> extends DataType<T> { TArray: Int32Array; TValue: Int32Array; ArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 class Interval_<T extends Intervals = Intervals> extends DataType<T> {
     constructor(public readonly unit: IntervalUnit) {
@@ -402,7 +401,7 @@ export class IntervalDayTime extends Interval_<Type.IntervalDayTime> { construct
 export class IntervalYearMonth extends Interval_<Type.IntervalYearMonth> { constructor() { super(IntervalUnit.YEAR_MONTH); } }
 
 /** @ignore */
-export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }>  { TArray: IterableArrayLike<T>; TValue: V<T>; }
+export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }>  { TArray: IterableArrayLike<T>; TValue: V<T> }
 /** @ignore */
 export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
     constructor(child: Field<T>) {
@@ -422,7 +421,7 @@ export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T
 }
 
 /** @ignore */
-export interface Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct> { TArray: IterableArrayLike<RowLike<T>>; TValue: RowLike<T>; dataTypes: T; }
+export interface Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct> { TArray: IterableArrayLike<RowLike<T>>; TValue: RowLike<T>; dataTypes: T }
 /** @ignore */
 export class Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct, T> {
     public readonly children: Field<T[keyof T]>[];
@@ -441,7 +440,7 @@ export class Struct<T extends { [key: string]: DataType } = any> extends DataTyp
 /** @ignore */
 type Unions = Type.Union | Type.DenseUnion | Type.SparseUnion;
 /** @ignore */
-interface Union_<T extends Unions = Unions> extends DataType<T> { TArray: Int8Array; TValue: any; ArrayType: TypedArrayConstructor<Int8Array>; }
+interface Union_<T extends Unions = Unions> extends DataType<T> { TArray: Int8Array; TValue: any; ArrayType: TypedArrayConstructor<Int8Array> }
 /** @ignore */
 class Union_<T extends Unions = Unions> extends DataType<T> {
     public readonly mode: UnionMode;
@@ -490,7 +489,7 @@ export class SparseUnion extends Union_<Type.SparseUnion> {
 }
 
 /** @ignore */
-export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; }
+export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
 export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     constructor(public readonly byteWidth: number) {
@@ -506,7 +505,7 @@ export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
 }
 
 /** @ignore */
-export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: IterableArrayLike<T['TArray']>; TValue: V<T>; }
+export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: IterableArrayLike<T['TArray']>; TValue: V<T> }
 /** @ignore */
 export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList, { [0]: T }> {
     public readonly children: Field<T>[];
@@ -529,19 +528,19 @@ export class FixedSizeList<T extends DataType = any> extends DataType<Type.Fixed
 /** @ignore */
 export interface Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map> {
     TArray: IterableArrayLike<Map<TKey['TValue'], TValue['TValue'] | null>>;
-    TChild: Struct<{ key: TKey, value: TValue }>;
+    TChild: Struct<{ key: TKey; value: TValue }>;
     TValue: MapLike<TKey, TValue>;
 }
 
 /** @ignore */
 export class Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map> {
-    constructor(child: Field<Struct<{ key: TKey, value: TValue }>>, keysSorted = false) {
+    constructor(child: Field<Struct<{ key: TKey; value: TValue }>>, keysSorted = false) {
         super();
         this.children = [child];
         this.keysSorted = keysSorted;
     }
     public readonly keysSorted: boolean;
-    public readonly children: Field<Struct<{ key: TKey, value: TValue }>>[];
+    public readonly children: Field<Struct<{ key: TKey; value: TValue }>>[];
     public get typeId() { return Type.Map as Type.Map; }
     public get keyType(): TKey { return this.children[0].type.children[0].type as TKey; }
     public get valueType(): TValue { return this.children[0].type.children[1].type as TValue; }
@@ -560,7 +559,7 @@ const getId = ((atomicDictionaryId) => () => ++atomicDictionaryId)(-1);
 export type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
 
 /** @ignore */
-export interface Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> { TArray: TKey['TArray']; TValue: T['TValue']; }
+export interface Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> { TArray: TKey['TArray']; TValue: T['TValue'] }
 /** @ignore */
 export class Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> {
     public readonly id: number;
diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts
index 7e93dbbdca6..cb85cd41f71 100644
--- a/js/src/util/bn.ts
+++ b/js/src/util/bn.ts
@@ -84,9 +84,9 @@ function bignumToNumber<T extends BN<BigNumArray>>(bn: T) {
 }
 
 /** @ignore */
-export let bignumToString: { <T extends BN<BigNumArray>>(a: T): string; };
+export let bignumToString: { <T extends BN<BigNumArray>>(a: T): string };
 /** @ignore */
-export let bignumToBigInt: { <T extends BN<BigNumArray>>(a: T): bigint; };
+export let bignumToBigInt: { <T extends BN<BigNumArray>>(a: T): bigint };
 
 if (!BigIntAvailable) {
     bignumToString = decimalToString;
diff --git a/js/src/util/compat.ts b/js/src/util/compat.ts
index 2e51ee2ba6d..62fcb772e43 100644
--- a/js/src/util/compat.ts
+++ b/js/src/util/compat.ts
@@ -81,6 +81,7 @@ export { BigUint64ArrayCtor as BigUint64Array, BigUint64ArrayAvailable };
 /** @ignore */ const isBoolean = (x: any) => typeof x === 'boolean';
 /** @ignore */ const isFunction = (x: any) => typeof x === 'function';
 /** @ignore */
+// eslint-disable-next-line @typescript-eslint/ban-types
 export const isObject = (x: any): x is Object => x != null && Object(x) === x;
 
 /** @ignore */
diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts
index 2828a6eb734..97dd42ed160 100644
--- a/js/src/util/recordbatch.ts
+++ b/js/src/util/recordbatch.ts
@@ -70,7 +70,7 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
     const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
 
     let numBatches = 0, batchLength = 0;
-    let i: number = -1, numColumns = columns.length;
+    let i = -1, numColumns = columns.length;
     let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = [];
 
     while (memo.numBatches-- > 0) {
diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts
index a752831c9a7..e3cbf547191 100644
--- a/js/src/vector/chunked.ts
+++ b/js/src/vector/chunked.ts
@@ -91,7 +91,7 @@ export class Chunked<T extends DataType = any>
     protected _chunks: Vector<T>[];
     protected _numChildren: number;
     protected _children?: Chunked[];
-    protected _nullCount: number = -1;
+    protected _nullCount = -1;
     protected _chunkOffsets: Uint32Array;
 
     constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
diff --git a/js/src/vector/index.ts b/js/src/vector/index.ts
index 4711a6be930..af001931995 100644
--- a/js/src/vector/index.ts
+++ b/js/src/vector/index.ts
@@ -90,9 +90,9 @@ function newVector<T extends DataType>(data: Data<T>, ...args: VectorCtorArgs<V<
 }
 
 /** @ignore */
-export interface VectorBuilderOptions<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: Iterable<T['TValue'] | TNull>; }
+export interface VectorBuilderOptions<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: Iterable<T['TValue'] | TNull> }
 /** @ignore */
-export interface VectorBuilderOptionsAsync<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: AsyncIterable<T['TValue'] | TNull>; }
+export interface VectorBuilderOptionsAsync<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: AsyncIterable<T['TValue'] | TNull> }
 
 /** @ignore */
 export function vectorFromValuesWithType<T extends DataType, TNull = any>(newDataType: () => T, input: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull> | VectorBuilderOptions<T, TNull> | VectorBuilderOptionsAsync<T, TNull>) {
diff --git a/js/src/vector/int.ts b/js/src/vector/int.ts
index 74c284e11c4..216efd59ab6 100644
--- a/js/src/vector/int.ts
+++ b/js/src/vector/int.ts
@@ -136,8 +136,7 @@ export class Int64Vector extends IntVector<Int64> {
     public toBigInt64Array() {
         return toBigInt64Array(this.values);
     }
-    // @ts-ignore
-    private _values64: BigInt64Array;
+    private _values64!: BigInt64Array;
     public get values64(): BigInt64Array {
         return this._values64 || (this._values64 = this.toBigInt64Array());
     }
@@ -154,8 +153,7 @@ export class Uint64Vector extends IntVector<Uint64> {
     public toBigUint64Array() {
         return toBigUint64Array(this.values);
     }
-    // @ts-ignore
-    private _values64: BigUint64Array;
+    private _values64!: BigUint64Array;
     public get values64(): BigUint64Array {
         return this._values64 || (this._values64 = this.toBigUint64Array());
     }
diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts
index db7726a2cc3..9975919f7c6 100644
--- a/js/src/vector/map.ts
+++ b/js/src/vector/map.ts
@@ -24,12 +24,12 @@ import { DataType, Map_, Struct, List } from '../type';
 /** @ignore */
 export class MapVector<K extends DataType = any, V extends DataType = any> extends BaseVector<Map_<K, V>> {
     public asList() {
-        const child = this.type.children[0] as Field<Struct<{ key: K, value: V }>>;
-        return Vector.new(this.data.clone(new List<Struct<{ key: K, value: V }>>(child)));
+        const child = this.type.children[0] as Field<Struct<{ key: K; value: V }>>;
+        return Vector.new(this.data.clone(new List<Struct<{ key: K; value: V }>>(child)));
     }
     public bind(index: number): Map_<K, V>['TValue'] {
-        const child = this.getChildAt<Struct<{ key: K, value: V }>>(0);
+        const child = this.getChildAt<Struct<{ key: K; value: V }>>(0)!;
         const { [index]: begin, [index + 1]: end } = this.valueOffsets;
-        return new MapRow(child!.slice(begin, end));
+        return new MapRow(child.slice(begin, end));
     }
 }
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index 7305627fbad..42b75019490 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -158,7 +158,7 @@ abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
 }
 
 export class MapRow<K extends DataType = any, V extends DataType = any> extends Row<K['TValue'], V['TValue'] | null> {
-    constructor(slice: Vector<Struct<{ key: K, value: V }>>) {
+    constructor(slice: Vector<Struct<{ key: K; value: V }>>) {
         super(slice, slice.length);
         return createRowProxy(this);
     }
@@ -224,8 +224,8 @@ const defineRowProxyProperties = (() => {
             ktoi.set(key, ++idx);
             desc.get = getter(key);
             desc.set = setter(key);
-            row.hasOwnProperty(key) || (desc.enumerable = true, Object.defineProperty(row, key, desc));
-            row.hasOwnProperty(idx) || (desc.enumerable = false, Object.defineProperty(row, idx, desc));
+            Object.prototype.hasOwnProperty.call(row, key) || (desc.enumerable = true, Object.defineProperty(row, key, desc));
+            Object.prototype.hasOwnProperty.call(row, idx) || (desc.enumerable = false, Object.defineProperty(row, idx, desc));
         }
         desc.get = desc.set = null;
         return row;
diff --git a/js/src/vector/struct.ts b/js/src/vector/struct.ts
index 00af4aeb380..b825f092e4f 100644
--- a/js/src/vector/struct.ts
+++ b/js/src/vector/struct.ts
@@ -22,8 +22,7 @@ import { DataType, Struct } from '../type';
 /** @ignore */ const kRowIndex = Symbol.for('rowIndex');
 /** @ignore */
 export class StructVector<T extends { [key: string]: DataType } = any> extends BaseVector<Struct<T>> {
-    // @ts-ignore
-    private _row: StructRow<T>;
+    private _row!: StructRow<T>;
     public bind(index: number): Struct<T>['TValue'] {
         const proto = this._row || (this._row = new StructRow<T>(this));
         const bound = Object.create(proto);
diff --git a/js/src/visitor.ts b/js/src/visitor.ts
index 9877a55b7b0..3a63c93f963 100644
--- a/js/src/visitor.ts
+++ b/js/src/visitor.ts
@@ -54,11 +54,10 @@ export abstract class Visitor {
 function getVisitFn<T extends DataType>(visitor: Visitor, node: any, throwIfNotFound = true) {
     let fn: any = null;
     let dtype: T['TType'] = Type.NONE;
-    // tslint:disable
-    if      (node instanceof Data    ) { dtype = inferDType(node.type as T); }
-    else if (node instanceof Vector  ) { dtype = inferDType(node.type as T); }
-    else if (node instanceof DataType) { dtype = inferDType(node      as T); }
-    else if (typeof (dtype = node) !== 'number') { dtype = Type[node] as any as T['TType']; }
+    if      (node instanceof Data    ) dtype = inferDType(node.type as T);
+    else if (node instanceof Vector  ) dtype = inferDType(node.type as T);
+    else if (node instanceof DataType) dtype = inferDType(node      as T);
+    else if (typeof (dtype = node) !== 'number') dtype = Type[node] as any as T['TType'];
 
     switch (dtype) {
         case Type.Null:                 fn = visitor.visitNull; break;
@@ -114,7 +113,7 @@ function getVisitFn<T extends DataType>(visitor: Visitor, node: any, throwIfNotF
 function inferDType<T extends DataType>(type: T): Type {
     switch (type.typeId) {
         case Type.Null: return Type.Null;
-        case Type.Int:
+        case Type.Int: {
             const { bitWidth, isSigned } = (type as any as Int);
             switch (bitWidth) {
                 case  8: return isSigned ? Type.Int8  : Type.Uint8 ;
@@ -124,6 +123,7 @@ function inferDType<T extends DataType>(type: T): Type {
             }
             // @ts-ignore
             return Type.Int;
+        }
         case Type.Float:
             switch((type as any as Float).precision) {
                 case Precision.HALF: return Type.Float16;
diff --git a/js/src/visitor/jsontypeassembler.ts b/js/src/visitor/jsontypeassembler.ts
index 55acba4f1af..54f046f648a 100644
--- a/js/src/visitor/jsontypeassembler.ts
+++ b/js/src/visitor/jsontypeassembler.ts
@@ -22,12 +22,12 @@ import { Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from '../enum'
 
 /** @ignore */
 export interface JSONTypeAssembler extends Visitor {
-    visit<T extends type.DataType>(node: T): object | undefined;
+    visit<T extends type.DataType>(node: T): Record<string, unknown> | undefined;
 }
 
 /** @ignore */
 export class JSONTypeAssembler extends Visitor {
-    public visit<T extends type.DataType>(node: T): object | undefined {
+    public visit<T extends type.DataType>(node: T): Record<string, unknown> | undefined {
         return node == null ? undefined : super.visit(node);
     }
     public visitNull<T extends type.Null>({ typeId }: T) {
diff --git a/js/src/visitor/jsonvectorassembler.ts b/js/src/visitor/jsonvectorassembler.ts
index 8d5f324cd5e..f3c013344fc 100644
--- a/js/src/visitor/jsonvectorassembler.ts
+++ b/js/src/visitor/jsonvectorassembler.ts
@@ -34,24 +34,24 @@ import {
 /** @ignore */
 export interface JSONVectorAssembler extends Visitor {
 
-    visit     <T extends Column>  (node: T  ): object;
-    visitMany <T extends Column>  (cols: T[]): object[];
-    getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string, count: number, VALIDITY: (0 | 1)[], DATA?: any[], OFFSET?: number[], TYPE?: number[], children?: any[] };
+    visit     <T extends Column>  (node: T  ): Record<string, unknown>;
+    visitMany <T extends Column>  (cols: T[]): Record<string, unknown>[];
+    getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string; count: number; VALIDITY: (0 | 1)[]; DATA?: any[]; OFFSET?: number[]; TYPE?: number[]; children?: any[] };
 
-    visitNull                 <T extends Null>            (vector: V<T>): { };
+    visitNull                 <T extends Null>            (vector: V<T>): Record<string, never>;
     visitBool                 <T extends Bool>            (vector: V<T>): { DATA: boolean[] };
     visitInt                  <T extends Int>             (vector: V<T>): { DATA: (number | string)[]  };
     visitFloat                <T extends Float>           (vector: V<T>): { DATA: number[]  };
-    visitUtf8                 <T extends Utf8>            (vector: V<T>): { DATA: string[], OFFSET: number[] };
-    visitBinary               <T extends Binary>          (vector: V<T>): { DATA: string[], OFFSET: number[] };
+    visitUtf8                 <T extends Utf8>            (vector: V<T>): { DATA: string[]; OFFSET: number[] };
+    visitBinary               <T extends Binary>          (vector: V<T>): { DATA: string[]; OFFSET: number[] };
     visitFixedSizeBinary      <T extends FixedSizeBinary> (vector: V<T>): { DATA: string[]  };
     visitDate                 <T extends Date_>           (vector: V<T>): { DATA: number[]  };
     visitTimestamp            <T extends Timestamp>       (vector: V<T>): { DATA: string[]  };
     visitTime                 <T extends Time>            (vector: V<T>): { DATA: number[]  };
     visitDecimal              <T extends Decimal>         (vector: V<T>): { DATA: string[]  };
-    visitList                 <T extends List>            (vector: V<T>): { children: any[], OFFSET: number[] };
+    visitList                 <T extends List>            (vector: V<T>): { children: any[]; OFFSET: number[] };
     visitStruct               <T extends Struct>          (vector: V<T>): { children: any[] };
-    visitUnion                <T extends Union>           (vector: V<T>): { children: any[], TYPE: number[],  };
+    visitUnion                <T extends Union>           (vector: V<T>): { children: any[]; TYPE: number[]  };
     visitInterval             <T extends Interval>        (vector: V<T>): { DATA: number[]  };
     visitFixedSizeList        <T extends FixedSizeList>   (vector: V<T>): { children: any[] };
     visitMap                  <T extends Map_>            (vector: V<T>): { children: any[] };
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index 307c839da78..0c3cea3e619 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -140,11 +140,12 @@ const setNumericX2       = <T extends Numeric2X>      (vector: VectorType<T>, in
     switch (typeof value) {
         case 'bigint': vector.values64[index] = value; break;
         case 'number': vector.values[index * vector.stride] = value; break;
-        default:
+        default: {
             const val = value as T['TArray'];
             const { stride, ArrayType } = vector;
             const long = toArrayBufferView<T['TArray']>(ArrayType, val);
             vector.values.set(long.subarray(0, stride), stride * index);
+        }
     }
 };
 /** @ignore */
diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts
index 91e72c9dbf4..0a7bb41d811 100644
--- a/js/src/visitor/vectorloader.ts
+++ b/js/src/visitor/vectorloader.ts
@@ -38,9 +38,9 @@ export interface VectorLoader extends Visitor {
 export class VectorLoader extends Visitor {
     private bytes: Uint8Array;
     private nodes: FieldNode[];
-    private nodesIndex: number = -1;
+    private nodesIndex = -1;
     private buffers: BufferRegion[];
-    private buffersIndex: number = -1;
+    private buffersIndex = -1;
     private dictionaries: Map<number, Vector<any>>;
     constructor(bytes: Uint8Array, nodes: FieldNode[], buffers: BufferRegion[], dictionaries: Map<number, Vector<any>>) {
         super();
diff --git a/js/test/.eslintrc.js b/js/test/.eslintrc.js
new file mode 100644
index 00000000000..311a356e294
--- /dev/null
+++ b/js/test/.eslintrc.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+    rules: {
+        "@typescript-eslint/no-require-imports": "off",
+        "@typescript-eslint/no-inferrable-types": "off",
+        "@typescript-eslint/naming-convention": "off",
+        "prefer-const": "off",
+        "max-len": "off",
+
+        "jest/no-export": "off",
+        "jest/valid-title": "off",
+        "jest/expect-expect": "off",
+        "jest/no-conditional-expect": "off",
+    },
+};
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index e81c07f5e6d..43c8c167bd7 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -15,12 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable */
 // Dynamically load an Arrow target build based on command line arguments
 
 import 'web-streams-polyfill';
 
-/* tslint:disable */
 // import this before assigning window global since it does a `typeof window` check
 require('web-stream-tools');
 
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 35c040269d2..d12bb88a098 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/* tslint:disable */
 const randomatic = require('randomatic');
 import { TextEncoder } from 'text-encoding-utf-8';
 import { VectorType as V } from '../src/interfaces';
@@ -127,7 +126,7 @@ const defaultStructChildren = () => [
 ];
 
 const defaultMapChild = () => [
-    new Field('', new Struct<{ key: Utf8, value: Float32 }>([
+    new Field('', new Struct<{ key: Utf8; value: Float32 }>([
         new Field('key', new Utf8()),
         new Field('value', new Float32())
     ]))
@@ -228,7 +227,7 @@ export const dictionary = <T extends DataType = Utf8, TKey extends TKeys = Int32
 export const intervalDayTime = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalDayTime(), length, nullCount);
 export const intervalYearMonth = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalYearMonth(), length, nullCount);
 export const fixedSizeList = (length = 100, nullCount = length * 0.2 | 0, listSize = 2, child = defaultListChild) => vectorGenerator.visit(new FixedSizeList(listSize, child), length, nullCount);
-export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = length * 0.2 | 0, child: Field<Struct<{key: TKey, value: TValue}>> = <any> defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
+export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = length * 0.2 | 0, child: Field<Struct<{key: TKey; value: TValue}>> = <any> defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
 
 export const vecs = {
     null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map
@@ -399,7 +398,7 @@ function generateInterval<T extends Interval>(this: TestDataVectorGenerator, typ
         return values;
     });
     iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
-        !valid && data.set(new Int32Array(stride), i * stride)
+        !valid && data.set(new Int32Array(stride), i * stride);
     });
     return { values, vector: Vector.new(Data.Interval(type, 0, length, nullCount, nullBitmap, data)) };
 }
@@ -484,7 +483,7 @@ function generateUnion<T extends Union>(this: TestDataVectorGenerator, type: T,
     const nullBitmap = createBitmap(length, nullCount);
     const typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
         return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
-    }, Object.create(null) as { [key: number]: number })
+    }, Object.create(null) as { [key: number]: number });
 
     if (type.mode === UnionMode.Sparse) {
         const values = memoize(() => {
@@ -556,7 +555,7 @@ function generateMap<T extends Map_>(this: TestDataVectorGenerator,
     const stride = childVec.length / (length - nullCount);
     const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
     const values = memoize(() => {
-        const childValues = child.values() as { key: K; value: V; }[];
+        const childValues = child.values() as { key: K; value: V }[];
         const values: (T['TValue'] | null)[] = [...offsets.slice(1)]
             .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
             .map((o, i) => o == null ? null : (() => {
diff --git a/js/test/inference/column.ts b/js/test/inference/column.ts
index 442dd8c8749..03837612425 100644
--- a/js/test/inference/column.ts
+++ b/js/test/inference/column.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable jest/no-standalone-expect */
+
 import { Data } from '../../src/data';
 import { Field } from '../../src/schema';
 import { Column } from '../../src/column';
diff --git a/js/test/inference/nested.ts b/js/test/inference/nested.ts
index 5a811b576b3..510da89e9f6 100644
--- a/js/test/inference/nested.ts
+++ b/js/test/inference/nested.ts
@@ -21,8 +21,8 @@ import { DataType } from '../../src/type';
 import { Vector, BoolVector } from '../../src/vector/index';
 import { Bool, Int8, Utf8, List, Dictionary, Struct } from '../../src/type';
 
-type NamedSchema = { a: Int8, b: Utf8, c: Dictionary<List<Bool>>; [idx: string]: DataType; };
-type IndexSchema = { 0: Int8, 1: Utf8, 2: Dictionary<List<Bool>>; [idx: number]: DataType; };
+type NamedSchema = { a: Int8; b: Utf8; c: Dictionary<List<Bool>>; [idx: string]: DataType };
+type IndexSchema = { 0: Int8; 1: Utf8; 2: Dictionary<List<Bool>>; [idx: number]: DataType };
 
 checkIndexTypes({ 0: new Int8(), 1: new Utf8(), 2: new Dictionary<List<Bool>>(null!, null!) } as IndexSchema);
 checkNamedTypes({ a: new Int8(), b: new Utf8(), c: new Dictionary<List<Bool>>(null!, null!) } as NamedSchema);
diff --git a/js/test/jest-extensions.ts b/js/test/jest-extensions.ts
index 5a5524bfa52..78937e40d90 100644
--- a/js/test/jest-extensions.ts
+++ b/js/test/jest-extensions.ts
@@ -53,7 +53,7 @@ function toArrowCompare(this: jest.MatcherUtils, actual: any, expected: any) {
 
 function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
     const failures = [] as string[];
-    try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual).toHaveLength(expected.length); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.schema.metadata).toEqual(expected.schema.metadata); } catch (e) { failures.push(`${e}`); }
     (() => {
@@ -74,7 +74,7 @@ function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
 
 function toEqualRecordBatch(this: jest.MatcherUtils, actual: RecordBatch, expected: RecordBatch) {
     const failures = [] as string[];
-    try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual).toHaveLength(expected.length); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
     (() => {
         for (let i = -1, n = actual.numCols; ++i < n;) {
diff --git a/js/test/unit/builders/date-tests.ts b/js/test/unit/builders/date-tests.ts
index a03dc8dc09a..812383494c1 100644
--- a/js/test/unit/builders/date-tests.ts
+++ b/js/test/unit/builders/date-tests.ts
@@ -32,7 +32,6 @@ const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
 const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
 
 describe('DateDayBuilder', () => {
-
     runTestsWithEncoder('encodeAll', encodeAll(() => new DateDay()));
     runTestsWithEncoder('encodeEach: 5', encodeEach(() => new DateDay(), 5));
     runTestsWithEncoder('encodeEach: 25', encodeEach(() => new DateDay(), 25));
@@ -55,7 +54,6 @@ describe('DateDayBuilder', () => {
 });
 
 describe('DateMillisecondBuilder', () => {
-
     runTestsWithEncoder('encodeAll', encodeAll(() => new DateMillisecond()));
     runTestsWithEncoder('encodeEach: 5', encodeEach(() => new DateMillisecond(), 5));
     runTestsWithEncoder('encodeEach: 25', encodeEach(() => new DateMillisecond(), 25));
@@ -77,7 +75,7 @@ describe('DateMillisecondBuilder', () => {
     }
 });
 
-describe('DateMillisecondBuilder', () => {
+describe('DateMillisecondBuilder with nulls', () => {
     const encode = encodeAll(() => new DateMillisecond());
     const dates = [
         null,
diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts
index 2e68b783f26..975828c1075 100644
--- a/js/test/unit/builders/utils.ts
+++ b/js/test/unit/builders/utils.ts
@@ -22,7 +22,6 @@ import { Builder } from '../../Arrow';
 import { DataType, Vector, Chunked } from '../../Arrow';
 
 const rand = Math.random.bind(Math);
-/* tslint:disable */
 const randstr = require('randomatic');
 const randnulls = <T, TNull = null>(values: T[], n: TNull = <any> null) => values.map((x) => Math.random() > 0.25 ? x : n) as (T | TNull)[];
 
@@ -59,7 +58,7 @@ export const int64sNoNulls = (length = 20) => Array.from({ length }, (_, i) => {
         // Int32Array (util.BN is-a Int32Array)
         case 0: return bn;
         // BigInt
-        case 1: return bn[Symbol.toPrimitive]()
+        case 1: return bn[Symbol.toPrimitive]();
         // number
         case 2:
         default: return bn[0];
@@ -77,7 +76,7 @@ export const uint64sNoNulls = (length = 20) => Array.from({ length }, (_, i) =>
         // UInt32Array (util.BN is-a Uint32Array)
         case 0: return bn;
         // BigInt
-        case 1: return bn[Symbol.toPrimitive]()
+        case 1: return bn[Symbol.toPrimitive]();
         // number
         case 2:
         default: return bn[0];
@@ -181,9 +180,9 @@ export function validateVector<T extends DataType>(vals: (T['TValue'] | null)[],
     try {
         for (x of vec) {
             if (nulls.has(y = vals[i])) {
-                expect(x).toEqual(null);
+                expect(x).toBeNull();
             } else if (isInt64Null(nulls, y)) {
-                expect(x).toEqual(null);
+                expect(x).toBeNull();
             } else {
                 expect(x).toArrowCompare(y);
             }
diff --git a/js/test/unit/generated-data-validators.ts b/js/test/unit/generated-data-validators.ts
index 03155459dba..6bcc340e8ff 100644
--- a/js/test/unit/generated-data-validators.ts
+++ b/js/test/unit/generated-data-validators.ts
@@ -25,7 +25,7 @@ import {
 import { util } from '../Arrow';
 const { createElementComparator: compare } = util;
 
-type DeferredTest = { description: string, tests?: DeferredTest[], run: (...args: any[]) => any };
+type DeferredTest = { description: string; tests?: DeferredTest[]; run: (...args: any[]) => any };
 
 function deferTest(description: string, run: (...args: any[]) => any) {
     return { description, run: () => test(description, run) } as DeferredTest;
@@ -73,7 +73,7 @@ export function validateVector({ values: createTestValues, vector, keys }: Gener
     const suites = [
         deferDescribe(`Validate ${vector.type} (sliced=${sliced})`, [
             deferTest(`length is correct`, () => {
-                expect(vector.length).toBe(values.length);
+                expect(vector).toHaveLength(values.length);
             }),
             deferTest(`gets expected values`, () => {
                 expect.hasAssertions();
@@ -94,7 +94,7 @@ export function validateVector({ values: createTestValues, vector, keys }: Gener
                     while (++i < n) {
                         indices.isValid(i)
                             ? expect(indices.get(i)).toBe(keys[i])
-                            : expect(indices.get(i)).toBe(null);
+                            : expect(indices.get(i)).toBeNull();
                     }
                 } catch (e) { throw new Error(`${indices}[${i}]: ${e}`); }
             }) || null as any as DeferredTest,
diff --git a/js/test/unit/ipc/helpers.ts b/js/test/unit/ipc/helpers.ts
index f8ae1609f3f..eebf56f70a0 100644
--- a/js/test/unit/ipc/helpers.ts
+++ b/js/test/unit/ipc/helpers.ts
@@ -29,7 +29,6 @@ import * as fs from 'fs';
 import { fs as memfs } from 'memfs';
 import { Readable, PassThrough } from 'stream';
 
-/* tslint:disable */
 const randomatic = require('randomatic');
 
 export abstract class ArrowIOTestHelper {
@@ -173,8 +172,6 @@ export async function* readableDOMStreamToAsyncIterator<T>(stream: ReadableStrea
             // Else yield the chunk
             yield value as T;
         }
-    } catch (e) {
-        throw e;
     } finally {
         try { stream.locked && reader.releaseLock(); } catch (e) {}
     }
diff --git a/js/test/unit/ipc/reader/from-inference-tests.ts b/js/test/unit/ipc/reader/from-inference-tests.ts
index a901990ceef..01d15fa8003 100644
--- a/js/test/unit/ipc/reader/from-inference-tests.ts
+++ b/js/test/unit/ipc/reader/from-inference-tests.ts
@@ -29,11 +29,11 @@ import {
     AsyncRecordBatchStreamReader
 } from '../../../Arrow';
 
-/* tslint:disable */
 const { parse: bignumJSONParse } = require('json-bignum');
 
 for (const table of generateRandomTables([10, 20, 30])) {
     const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+    // eslint-disable-next-line jest/valid-describe
     describe('RecordBatchReader.from', ((table, name) => () => {
         testFromFile(ArrowIOTestHelper.file(table), name);
         testFromJSON(ArrowIOTestHelper.json(table), name);
diff --git a/js/test/unit/ipc/reader/json-reader-tests.ts b/js/test/unit/ipc/reader/json-reader-tests.ts
index 7a223f03fa5..b41106ecfb7 100644
--- a/js/test/unit/ipc/reader/json-reader-tests.ts
+++ b/js/test/unit/ipc/reader/json-reader-tests.ts
@@ -24,7 +24,6 @@ import { ArrowIOTestHelper } from '../helpers';
 import { RecordBatchReader } from '../../../Arrow';
 import { validateRecordBatchReader } from '../validate';
 
-/* tslint:disable */
 const { parse: bignumJSONParse } = require('json-bignum');
 
 for (const table of generateRandomTables([10, 20, 30])) {
diff --git a/js/test/unit/ipc/reader/streams-dom-tests.ts b/js/test/unit/ipc/reader/streams-dom-tests.ts
index 27aaee917cd..a338ed77e55 100644
--- a/js/test/unit/ipc/reader/streams-dom-tests.ts
+++ b/js/test/unit/ipc/reader/streams-dom-tests.ts
@@ -35,9 +35,7 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
         return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
     const { parse: bignumJSONParse } = require('json-bignum');
-    /* tslint:disable */
     const { concatStream } = require('web-stream-tools').default;
 
     for (const table of generateRandomTables([10, 20, 30])) {
diff --git a/js/test/unit/ipc/reader/streams-node-tests.ts b/js/test/unit/ipc/reader/streams-node-tests.ts
index fe0795e9ffa..609c03a47fe 100644
--- a/js/test/unit/ipc/reader/streams-node-tests.ts
+++ b/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -35,11 +35,8 @@ import { validateRecordBatchAsyncIterator } from '../validate';
         return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
     const { Readable, PassThrough } = require('stream');
-    /* tslint:disable */
     const { parse: bignumJSONParse } = require('json-bignum');
-    /* tslint:disable */
     const concatStream = ((multistream) => (...xs: any[]) =>
         new Readable().wrap(multistream(...xs))
     )(require('multistream'));
diff --git a/js/test/unit/ipc/writer/json-writer-tests.ts b/js/test/unit/ipc/writer/json-writer-tests.ts
index d18cd914117..b461d0f76ce 100644
--- a/js/test/unit/ipc/writer/json-writer-tests.ts
+++ b/js/test/unit/ipc/writer/json-writer-tests.ts
@@ -23,7 +23,6 @@ import {
 import { validateRecordBatchIterator } from '../validate';
 import { Table, RecordBatchJSONWriter } from '../../../Arrow';
 
-/* tslint:disable */
 const { parse: bignumJSONParse } = require('json-bignum');
 
 describe('RecordBatchJSONWriter', () => {
diff --git a/js/test/unit/ipc/writer/stream-writer-tests.ts b/js/test/unit/ipc/writer/stream-writer-tests.ts
index 8f572bf3ee7..3c5cd3c06fe 100644
--- a/js/test/unit/ipc/writer/stream-writer-tests.ts
+++ b/js/test/unit/ipc/writer/stream-writer-tests.ts
@@ -101,7 +101,7 @@ describe('RecordBatchStreamWriter', () => {
 
         expect(resultTable).toEqualTable(sourceTable);
         expect((dictionary as Chunked)).toBeInstanceOf(Chunked);
-        expect((dictionary as Chunked).chunks.length).toBe(20);
+        expect((dictionary as Chunked).chunks).toHaveLength(20);
     });
 });
 
diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts
index 79348e4039a..8a8060b6fb3 100644
--- a/js/test/unit/ipc/writer/streams-dom-tests.ts
+++ b/js/test/unit/ipc/writer/streams-dom-tests.ts
@@ -49,7 +49,6 @@ import {
         return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
     const { parse: bignumJSONParse } = require('json-bignum');
 
     for (const table of generateRandomTables([10, 20, 30])) {
@@ -246,7 +245,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(stream.locked).toBe(false);
         });
 
@@ -266,7 +265,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(stream.locked).toBe(false);
         });
     });
diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts
index ca619b1310e..d341e829496 100644
--- a/js/test/unit/ipc/writer/streams-node-tests.ts
+++ b/js/test/unit/ipc/writer/streams-node-tests.ts
@@ -48,7 +48,6 @@ import {
         return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
     }
 
-    /* tslint:disable */
     const { parse: bignumJSONParse } = require('json-bignum');
 
     for (const table of generateRandomTables([10, 20, 30])) {
@@ -246,7 +245,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(writer.readable).toBe(false);
             expect((writer as any).destroyed).toBe(true);
         });
@@ -268,7 +267,7 @@ import {
                 expect(streamTable).toEqualTable(sourceTable);
             }
 
-            expect(tables.length).toBe(0);
+            expect(tables).toHaveLength(0);
             expect(writer.readable).toBe(false);
             expect((writer as any).destroyed).toBe(true);
         });
diff --git a/js/test/unit/recordbatch/record-batch-tests.ts b/js/test/unit/recordbatch/record-batch-tests.ts
index 7dd064a7416..de3090a5af7 100644
--- a/js/test/unit/recordbatch/record-batch-tests.ts
+++ b/js/test/unit/recordbatch/record-batch-tests.ts
@@ -37,14 +37,14 @@ describe(`RecordBatch`, () => {
             const i32s = new Int32Array(arange(new Array<number>(10)));
 
             let i32 = Vector.new(Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullCount).toBe(0);
 
             const batch = RecordBatch.new([i32], ['i32']);
             i32 = batch.getChildAt(0) as Int32Vector;
 
             expect(batch.schema.fields[0].name).toBe('i32');
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullCount).toBe(0);
 
             expect(i32).toEqualVector(Int32Vector.from(i32s));
@@ -57,8 +57,8 @@ describe(`RecordBatch`, () => {
 
             let i32 = Vector.new(Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
             let f32 = Vector.new(Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -68,8 +68,8 @@ describe(`RecordBatch`, () => {
 
             expect(batch.schema.fields[0].name).toBe('i32');
             expect(batch.schema.fields[1].name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -85,8 +85,8 @@ describe(`RecordBatch`, () => {
             let i32 = Int32Vector.from(i32s);
             let f32 = Float32Vector.from(f32s);
 
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -96,8 +96,8 @@ describe(`RecordBatch`, () => {
 
             expect(batch.schema.fields[0].name).toBe('0');
             expect(batch.schema.fields[1].name).toBe('1');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(i32s.length); // new length should be the same as the longest sibling
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(i32s.length - f32s.length);
 
@@ -116,7 +116,7 @@ describe(`RecordBatch`, () => {
             const batch = numsRecordBatch(32, 27);
             const i32sBatch = batch.select('i32');
             expect(i32sBatch.numCols).toBe(1);
-            expect(i32sBatch.length).toBe(32);
+            expect(i32sBatch).toHaveLength(32);
         });
     });
     describe(`selectAt()`, () => {
@@ -124,7 +124,7 @@ describe(`RecordBatch`, () => {
             const batch = numsRecordBatch(32, 45);
             const f32sBatch = batch.selectAt(1);
             expect(f32sBatch.numCols).toBe(1);
-            expect(f32sBatch.length).toBe(45);
+            expect(f32sBatch).toHaveLength(45);
         });
     });
 });
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 2740ecccea8..32b635f3bab 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -73,7 +73,7 @@ const test_data = [
 ];
 
 function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
-    expect(batch.length).toEqual(table.length);
+    expect(batch).toHaveLength(table.length);
     expect(table.numCols).toEqual(source.numCols);
     expect(batch.numCols).toEqual(source.numCols);
     for (let i = -1, n = source.numCols; ++i < n;) {
@@ -88,13 +88,13 @@ function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch,
 
 describe(`Table`, () => {
     test(`can create an empty table`, () => {
-        expect(Table.empty().length).toEqual(0);
+        expect(Table.empty()).toHaveLength(0);
     });
     test(`Table.from([]) creates an empty table`, () => {
-        expect(Table.from([]).length).toEqual(0);
+        expect(Table.from([])).toHaveLength(0);
     });
     test(`Table.from() creates an empty table`, () => {
-        expect(Table.from().length).toEqual(0);
+        expect(Table.from()).toHaveLength(0);
     });
 
     describe(`new()`, () => {
@@ -104,9 +104,9 @@ describe(`Table`, () => {
             const table = Table.new(i32, f32);
             i32 = table.getColumn('i32')!;
             f32 = table.getColumn('f32')!;
-            expect(table.length).toBe(0);
-            expect(i32.length).toBe(0);
-            expect(f32.length).toBe(0);
+            expect(table).toHaveLength(0);
+            expect(i32).toHaveLength(0);
+            expect(f32).toHaveLength(0);
             expect(i32.toArray()).toBeInstanceOf(Int32Array);
             expect(f32.toArray()).toBeInstanceOf(Float32Array);
         });
@@ -117,7 +117,7 @@ describe(`Table`, () => {
 
             let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
             expect(i32.name).toBe('i32');
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
 
@@ -125,7 +125,7 @@ describe(`Table`, () => {
             i32 = table.getColumnAt(0)!;
 
             expect(i32.name).toBe('i32');
-            expect(i32.length).toBe(i32s.length);
+            expect(i32).toHaveLength(i32s.length);
             expect(i32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
 
@@ -141,8 +141,8 @@ describe(`Table`, () => {
             let f32 = Column.new('f32', Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -154,8 +154,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -175,8 +175,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -188,8 +188,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(i32s.length); // new length should be the same as the longest sibling
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true); // true, with 12 additional nulls
             expect(i32.nullCount).toBe(0);
@@ -214,8 +214,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32');
             expect(f32.name).toBe('f32');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(f32s.length);
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(f32s.length);
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
@@ -227,8 +227,8 @@ describe(`Table`, () => {
 
             expect(i32.name).toBe('i32Renamed');
             expect(f32.name).toBe('f32Renamed');
-            expect(i32.length).toBe(i32s.length);
-            expect(f32.length).toBe(i32s.length); // new length should be the same as the longest sibling
+            expect(i32).toHaveLength(i32s.length);
+            expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
             expect(i32.nullable).toBe(true);
             expect(f32.nullable).toBe(true); // true, with 4 additional nulls
             expect(i32.nullCount).toBe(0);
@@ -273,7 +273,7 @@ describe(`Table`, () => {
             test(`has the correct length`, () => {
                 const table = datum.table();
                 const values = datum.values();
-                expect(table.length).toEqual(values.length);
+                expect(table).toHaveLength(values.length);
             });
             test(`gets expected values`, () => {
                 const table = datum.table();
@@ -497,11 +497,11 @@ describe(`Table`, () => {
             });
             test(`table.select() basic tests`, () => {
                 let selected = table.select('f32', 'dictionary');
-                expect(selected.schema.fields.length).toEqual(2);
+                expect(selected.schema.fields).toHaveLength(2);
                 expect(selected.schema.fields[0]).toEqual(table.schema.fields[0]);
                 expect(selected.schema.fields[1]).toEqual(table.schema.fields[2]);
 
-                expect(selected.length).toEqual(values.length);
+                expect(selected).toHaveLength(values.length);
                 let idx = 0, expected_row;
                 for (let row of selected) {
                     expected_row = values[idx++];
@@ -567,7 +567,7 @@ describe(`Predicate`, () => {
 //     return (new Array(n + 1).join(fill) + str).slice(-1 * n);
 // }
 
-type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8>; };
+type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8> };
 
 function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
 
diff --git a/js/test/unit/table/assign-tests.ts b/js/test/unit/table/assign-tests.ts
index 84d8a8582cf..a9f76dde190 100644
--- a/js/test/unit/table/assign-tests.ts
+++ b/js/test/unit/table/assign-tests.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable jest/no-standalone-expect */
+
 import '../../jest-extensions';
 import { zip } from 'ix/iterable';
 import * as generate from '../../generate-test-data';
@@ -43,6 +45,7 @@ describe('Table.assign()', () => {
         const table = lhs.table.assign(rhs.table);
         const f = assignGeneratedTables(lhs, rhs);
         expect(table.schema.fields.map((f) => f.name)).toEqual(['a', 'b', 'c', 'f']);
+        // eslint-disable-next-line no-sparse-arrays
         validateTable({ ...f([ , , 2], [0,1,3]), table }).run();
     });
     describe(`should assign completely-overlapping fields`, () => {
@@ -51,6 +54,7 @@ describe('Table.assign()', () => {
         const table = lhs.table.assign(rhs.table);
         const f = assignGeneratedTables(lhs, rhs);
         expect(table.schema.fields.map((f) => f.name)).toEqual(['d', 'e', 'f']);
+        // eslint-disable-next-line no-sparse-arrays
         validateTable({ ...f([ , , ], [0,1,2]), table }).run();
     });
 });
diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts
index 9dce2f5c62a..961f71476a2 100644
--- a/js/test/unit/table/serialize-tests.ts
+++ b/js/test/unit/table/serialize-tests.ts
@@ -48,7 +48,7 @@ describe('Table#serialize()', () => {
     test(`Table#empty round-trips through serialization`, () => {
         const source = Table.empty();
         source.schema.metadata.set('foo', 'bar');
-        expect(source.length).toBe(0);
+        expect(source).toHaveLength(0);
         expect(source.numCols).toBe(0);
         const result = Table.from(source.serialize());
         expect(result).toEqualTable(source);
@@ -57,7 +57,7 @@ describe('Table#serialize()', () => {
 
     test(`Schema metadata round-trips through serialization`, () => {
         const source = createTable(schema1, [20]);
-        expect(source.length).toBe(20);
+        expect(source).toHaveLength(20);
         expect(source.numCols).toBe(3);
         const result = Table.from(source.serialize());
         expect(result).toEqualTable(source);
@@ -68,7 +68,7 @@ describe('Table#serialize()', () => {
         const table1 = new Table(nullSchema);
         const table2 = Table.empty();
         const source = table1.assign(table2);
-        expect(source.length).toBe(0);
+        expect(source).toHaveLength(0);
         expect(source.numCols).toBe(1);
         const result = Table.from(source.serialize());
         expect(result).toEqualTable(source);
@@ -102,7 +102,7 @@ describe('Table#serialize()', () => {
             const table1 = table(schema1);
             const source = table1.assign(Table.empty());
             expect(source.numCols).toBe(table1.numCols);
-            expect(source.length).toBe(table1.length);
+            expect(source).toHaveLength(table1.length);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
             expect(result.schema.metadata.get('foo')).toEqual('bar');
@@ -111,7 +111,7 @@ describe('Table#serialize()', () => {
             const table1 = new Table(nullSchema);
             const table2 = table(schema1);
             const source = table1.assign(table2);
-            expect(source.length).toBe(table2.length);
+            expect(source).toHaveLength(table2.length);
             expect(source.numCols).toBe(4);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
@@ -122,7 +122,7 @@ describe('Table#serialize()', () => {
             const table2 = createTable(schema2, [102, 4, 10, 97, 10, 2, 4]);
             const source = table1.assign(table2);
             expect(source.numCols).toBe(6);
-            expect(source.length).toBe(Math.max(table1.length, table2.length));
+            expect(source).toHaveLength(Math.max(table1.length, table2.length));
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
             expect(result.schema.metadata.get('foo')).toEqual('bar');
@@ -142,7 +142,7 @@ describe('Table#serialize()', () => {
             const [begin, end] = [length * .25, length * .75].map((x) => x | 0);
             const source = table1.slice(begin, end);
             expect(source.numCols).toBe(3);
-            expect(source.length).toBe(end - begin);
+            expect(source).toHaveLength(end - begin);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
             expect(result.schema.metadata.get('foo')).toEqual('bar');
@@ -155,9 +155,9 @@ describe('Table#serialize()', () => {
             const slice1 = table1.slice(begin1, end1);
             const slice2 = table1.slice(begin2, end2);
             const source = slice1.concat(slice2);
-            expect(slice1.length).toBe(end1 - begin1);
-            expect(slice2.length).toBe(end2 - begin2);
-            expect(source.length).toBe((end1 - begin1) + (end2 - begin2));
+            expect(slice1).toHaveLength(end1 - begin1);
+            expect(slice2).toHaveLength(end2 - begin2);
+            expect(source).toHaveLength((end1 - begin1) + (end2 - begin2));
             [slice1, slice2, source].forEach((x) => expect(x.numCols).toBe(3));
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
diff --git a/js/test/unit/utils.ts b/js/test/unit/utils.ts
index 7338f712609..c57de487f9e 100644
--- a/js/test/unit/utils.ts
+++ b/js/test/unit/utils.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-export function arange<T extends { length: number; [n: number]: number; }>(arr: T, n = arr.length) {
+export function arange<T extends { length: number; [n: number]: number }>(arr: T, n = arr.length) {
     for (let i = -1; ++i < n; arr[i] = i) { }
     return arr;
 }
diff --git a/js/test/unit/vector/numeric-vector-tests.ts b/js/test/unit/vector/numeric-vector-tests.ts
index 432efaadffa..4c3ad3a46fe 100644
--- a/js/test/unit/vector/numeric-vector-tests.ts
+++ b/js/test/unit/vector/numeric-vector-tests.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable jest/no-identical-title */
+
 import {
     util,
     Data, Vector,
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
index 9a6bf26ffd9..e3339b01f9a 100644
--- a/js/test/unit/visitor-tests.ts
+++ b/js/test/unit/visitor-tests.ts
@@ -111,7 +111,7 @@ describe('Visitor', () => {
         test(`visits Dictionary types`, () => validateBasicVisitor(new Dictionary(null as any, null as any)));
         test(`visits Interval types`, () => validateBasicVisitor(new Interval(0)));
         test(`visits FixedSizeList types`, () => validateBasicVisitor(new FixedSizeList(2, null as any)));
-        test(`visits Map types`, () => validateBasicVisitor(new Map_(new Field('', new Struct<{ key: Int, value: Int }>([] as any[])))));
+        test(`visits Map types`, () => validateBasicVisitor(new Map_(new Field('', new Struct<{ key: Int; value: Int }>([] as any[])))));
         function validateBasicVisitor<T extends DataType>(type: T) {
             const visitor = new BasicVisitor();
             const result = visitor.visit(type);
@@ -157,7 +157,7 @@ describe('Visitor', () => {
         test(`visits IntervalDayTime types`, () => validateFeatureVisitor(new IntervalDayTime()));
         test(`visits IntervalYearMonth types`, () => validateFeatureVisitor(new IntervalYearMonth()));
         test(`visits FixedSizeList types`, () => validateFeatureVisitor(new FixedSizeList(2, null as any)));
-        test(`visits Map types`, () => validateFeatureVisitor(new Map_(new Field('', new Struct<{ key: Int, value: Int }>([] as any[])))));
+        test(`visits Map types`, () => validateFeatureVisitor(new Map_(new Field('', new Struct<{ key: Int; value: Int }>([] as any[])))));
 
         function validateFeatureVisitor<T extends DataType>(type: T) {
             const visitor = new FeatureVisitor();
diff --git a/js/tsconfig.json b/js/tsconfig.json
index 8542ebfc3c4..20163756487 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -8,5 +8,6 @@
     "target": "ESNEXT",
     "module": "commonjs",
     "noEmit": true
-  }
+  },
+  "include": ["src/**/*.ts", "test/**/*.ts"]
 }
diff --git a/js/tslint.json b/js/tslint.json
deleted file mode 100644
index 705ef8cb744..00000000000
--- a/js/tslint.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "rules": {
-    "curly": true,
-    "eofline": false,
-    "align": [true, "parameters"],
-    "class-name": true,
-    "indent": [true, "spaces"],
-    "max-line-length": [false, 150],
-    "no-consecutive-blank-lines": [true],
-    "no-trailing-whitespace": true,
-    "no-duplicate-variable": true,
-    "no-var-keyword": true,
-    "no-empty": false,
-    "no-unused-expression": false,
-    "no-use-before-declare": false,
-    "no-var-requires": true,
-    "no-require-imports": true,
-    "one-line": [true,
-      "check-else",
-      "check-whitespace",
-      "check-open-brace"],
-    "quotemark": [true,
-      "single",
-      "avoid-escape"],
-    "semicolon": [true, "always"],
-    "typedef-whitespace": [true, {
-      "call-signature": "nospace",
-      "index-signature": "nospace",
-      "parameter": "nospace",
-      "property-declaration": "nospace",
-      "variable-declaration": "nospace"
-    }],
-    "whitespace": [true,
-      "check-branch",
-      "check-decl",
-      "check-operator",
-      "check-type"]
-  }
-}
\ No newline at end of file
diff --git a/js/yarn.lock b/js/yarn.lock
index d41dafde930..8de721bb6bd 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -2,6 +2,13 @@
 # yarn lockfile v1
 
 
+"@babel/code-frame@7.12.11":
+  version "7.12.11"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.11.tgz#f4ad435aa263db935b8f10f2c552d23fb716a63f"
+  integrity sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==
+  dependencies:
+    "@babel/highlight" "^7.10.4"
+
 "@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13":
   version "7.12.13"
   resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.13.tgz#dcfc826beef65e75c50e21d3837d7d95798dd658"
@@ -153,7 +160,7 @@
     "@babel/traverse" "^7.13.0"
     "@babel/types" "^7.13.0"
 
-"@babel/highlight@^7.12.13":
+"@babel/highlight@^7.10.4", "@babel/highlight@^7.12.13":
   version "7.13.10"
   resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.13.10.tgz#a8b2a66148f5b27d666b15d81774347a731d52d1"
   integrity sha512-5aPpe5XQPzflQrFwL1/QoeHkP2MsA4JCntcXHRhEsdsfPVkvPi2w7Qix4iV7t5S/oC9OodGrggd8aco1g3SZFg==
@@ -296,6 +303,21 @@
     exec-sh "^0.3.2"
     minimist "^1.2.0"
 
+"@eslint/eslintrc@^0.4.0":
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.4.0.tgz#99cc0a0584d72f1df38b900fb062ba995f395547"
+  integrity sha512-2ZPCc+uNbjV5ERJr+aKSPRwZgKd2z11x0EgLvb1PURmUrn9QNRXFqje0Ldq454PfAVyaJYyrDvvIKSFP4NnBog==
+  dependencies:
+    ajv "^6.12.4"
+    debug "^4.1.1"
+    espree "^7.3.0"
+    globals "^12.1.0"
+    ignore "^4.0.6"
+    import-fresh "^3.2.1"
+    js-yaml "^3.13.1"
+    minimatch "^3.0.4"
+    strip-json-comments "^3.1.1"
+
 "@evocateur/libnpmaccess@^3.1.2":
   version "3.1.2"
   resolved "https://registry.yarnpkg.com/@evocateur/libnpmaccess/-/libnpmaccess-3.1.2.tgz#ecf7f6ce6b004e9f942b098d92200be4a4b1c845"
@@ -1596,7 +1618,7 @@
     jest-diff "^26.0.0"
     pretty-format "^26.0.0"
 
-"@types/json-schema@^7.0.5":
+"@types/json-schema@^7.0.3", "@types/json-schema@^7.0.5":
   version "7.0.7"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
   integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
@@ -1670,6 +1692,76 @@
   dependencies:
     "@types/yargs-parser" "*"
 
+"@typescript-eslint/eslint-plugin@^4.21.0":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.21.0.tgz#3fce2bfa76d95c00ac4f33dff369cb593aab8878"
+  integrity sha512-FPUyCPKZbVGexmbCFI3EQHzCZdy2/5f+jv6k2EDljGdXSRc0cKvbndd2nHZkSLqCNOPk0jB6lGzwIkglXcYVsQ==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "4.21.0"
+    "@typescript-eslint/scope-manager" "4.21.0"
+    debug "^4.1.1"
+    functional-red-black-tree "^1.0.1"
+    lodash "^4.17.15"
+    regexpp "^3.0.0"
+    semver "^7.3.2"
+    tsutils "^3.17.1"
+
+"@typescript-eslint/experimental-utils@4.21.0", "@typescript-eslint/experimental-utils@^4.0.1":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.21.0.tgz#0b0bb7c15d379140a660c003bdbafa71ae9134b6"
+  integrity sha512-cEbgosW/tUFvKmkg3cU7LBoZhvUs+ZPVM9alb25XvR0dal4qHL3SiUqHNrzoWSxaXA9gsifrYrS1xdDV6w/gIA==
+  dependencies:
+    "@types/json-schema" "^7.0.3"
+    "@typescript-eslint/scope-manager" "4.21.0"
+    "@typescript-eslint/types" "4.21.0"
+    "@typescript-eslint/typescript-estree" "4.21.0"
+    eslint-scope "^5.0.0"
+    eslint-utils "^2.0.0"
+
+"@typescript-eslint/parser@^4.21.0":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.21.0.tgz#a227fc2af4001668c3e3f7415d4feee5093894c1"
+  integrity sha512-eyNf7QmE5O/l1smaQgN0Lj2M/1jOuNg2NrBm1dqqQN0sVngTLyw8tdCbih96ixlhbF1oINoN8fDCyEH9SjLeIA==
+  dependencies:
+    "@typescript-eslint/scope-manager" "4.21.0"
+    "@typescript-eslint/types" "4.21.0"
+    "@typescript-eslint/typescript-estree" "4.21.0"
+    debug "^4.1.1"
+
+"@typescript-eslint/scope-manager@4.21.0":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.21.0.tgz#c81b661c4b8af1ec0c010d847a8f9ab76ab95b4d"
+  integrity sha512-kfOjF0w1Ix7+a5T1knOw00f7uAP9Gx44+OEsNQi0PvvTPLYeXJlsCJ4tYnDj5PQEYfpcgOH5yBlw7K+UEI9Agw==
+  dependencies:
+    "@typescript-eslint/types" "4.21.0"
+    "@typescript-eslint/visitor-keys" "4.21.0"
+
+"@typescript-eslint/types@4.21.0":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.21.0.tgz#abdc3463bda5d31156984fa5bc316789c960edef"
+  integrity sha512-+OQaupjGVVc8iXbt6M1oZMwyKQNehAfLYJJ3SdvnofK2qcjfor9pEM62rVjBknhowTkh+2HF+/KdRAc/wGBN2w==
+
+"@typescript-eslint/typescript-estree@4.21.0":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.21.0.tgz#3817bd91857beeaeff90f69f1f112ea58d350b0a"
+  integrity sha512-ZD3M7yLaVGVYLw4nkkoGKumb7Rog7QID9YOWobFDMQKNl+vPxqVIW/uDk+MDeGc+OHcoG2nJ2HphwiPNajKw3w==
+  dependencies:
+    "@typescript-eslint/types" "4.21.0"
+    "@typescript-eslint/visitor-keys" "4.21.0"
+    debug "^4.1.1"
+    globby "^11.0.1"
+    is-glob "^4.0.1"
+    semver "^7.3.2"
+    tsutils "^3.17.1"
+
+"@typescript-eslint/visitor-keys@4.21.0":
+  version "4.21.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.21.0.tgz#990a9acdc124331f5863c2cf21c88ba65233cd8d"
+  integrity sha512-dH22dROWGi5Z6p+Igc8bLVLmwy7vEe8r+8c+raPQU0LxgogPUrRAtRGtvBWmlr9waTu3n+QLt/qrS/hWzk1x5w==
+  dependencies:
+    "@typescript-eslint/types" "4.21.0"
+    eslint-visitor-keys "^2.0.0"
+
 "@webassemblyjs/ast@1.7.11":
   version "1.7.11"
   resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.7.11.tgz#b988582cafbb2b095e8b556526f30c90d057cace"
@@ -1863,6 +1955,11 @@ acorn-globals@^6.0.0:
     acorn "^7.1.1"
     acorn-walk "^7.1.1"
 
+acorn-jsx@^5.3.1:
+  version "5.3.1"
+  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.1.tgz#fc8661e11b7ac1539c47dbfea2e72b3af34d267b"
+  integrity sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==
+
 acorn-walk@^7.1.1:
   version "7.2.0"
   resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-7.2.0.tgz#0de889a601203909b0fbe07b8938dc21d2e967bc"
@@ -1878,7 +1975,7 @@ acorn@^6.0.5:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6"
   integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ==
 
-acorn@^7.1.1:
+acorn@^7.1.1, acorn@^7.4.0:
   version "7.4.1"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa"
   integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
@@ -1927,7 +2024,7 @@ ajv-keywords@^3.1.0, ajv-keywords@^3.5.2:
   resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
   integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==
 
-ajv@^6.1.0, ajv@^6.12.3, ajv@^6.12.4:
+ajv@^6.1.0, ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4:
   version "6.12.6"
   resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
   integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==
@@ -1937,6 +2034,16 @@ ajv@^6.1.0, ajv@^6.12.3, ajv@^6.12.4:
     json-schema-traverse "^0.4.1"
     uri-js "^4.2.2"
 
+ajv@^8.0.1:
+  version "8.1.0"
+  resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.1.0.tgz#45d5d3d36c7cdd808930cc3e603cf6200dbeb736"
+  integrity sha512-B/Sk2Ix7A36fs/ZkuGLIR86EdjbgR6fsAcbx9lOP/QBSXujDNbVmIS/U4Itz5k8fPFDeVZl/zQ/gJW4Jrq6XjQ==
+  dependencies:
+    fast-deep-equal "^3.1.1"
+    json-schema-traverse "^1.0.0"
+    require-from-string "^2.0.2"
+    uri-js "^4.2.2"
+
 ansi-colors@^1.0.1:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-1.1.0.tgz#6374b4dd5d4718ff3ce27a671a3b1cad077132a9"
@@ -1949,6 +2056,11 @@ ansi-colors@^3.0.5:
   resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-3.2.4.tgz#e3a3da4bfbae6c86a9c285625de124a234026fbf"
   integrity sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA==
 
+ansi-colors@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348"
+  integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==
+
 ansi-escapes@^3.2.0:
   version "3.2.0"
   resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-3.2.0.tgz#8780b98ff9dbf5638152d1f1fe5c1d7b4442976b"
@@ -2235,6 +2347,11 @@ assign-symbols@^1.0.0:
   resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
   integrity sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=
 
+astral-regex@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31"
+  integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
+
 async-done@1.3.1:
   version "1.3.1"
   resolved "https://registry.yarnpkg.com/async-done/-/async-done-1.3.1.tgz#14b7b73667b864c8f02b5b253fc9c6eddb777f3e"
@@ -2616,11 +2733,6 @@ buffer@^4.3.0:
     ieee754 "^1.1.4"
     isarray "^1.0.0"
 
-builtin-modules@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-1.1.1.tgz#270f076c5a72c02f5b65a47df94c5fe3a278892f"
-  integrity sha1-Jw8HbFpywC9bZaR9+Uxf46J4iS8=
-
 builtin-status-codes@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz#85982878e21b98e1c66425e03d0174788f569ee8"
@@ -2805,7 +2917,7 @@ caseless@~0.12.0:
   resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
   integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
 
-chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.0, chalk@^2.3.1, chalk@^2.4.1, chalk@^2.4.2:
+chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.1, chalk@^2.4.1, chalk@^2.4.2:
   version "2.4.2"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
   integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
@@ -3106,7 +3218,7 @@ command-line-usage@5.0.5:
     table-layout "^0.4.3"
     typical "^2.6.1"
 
-commander@^2.12.1, commander@^2.20.0:
+commander@^2.20.0:
   version "2.20.3"
   resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
   integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
@@ -3386,7 +3498,7 @@ cross-spawn@^6.0.0, cross-spawn@^6.0.5:
     shebang-command "^1.2.0"
     which "^1.2.9"
 
-cross-spawn@^7.0.0:
+cross-spawn@^7.0.0, cross-spawn@^7.0.2:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
   integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
@@ -3517,7 +3629,7 @@ debug@^2.2.0, debug@^2.3.3:
   dependencies:
     ms "2.0.0"
 
-debug@^4.1.0, debug@^4.1.1:
+debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
   version "4.3.1"
   resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
   integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
@@ -3562,7 +3674,7 @@ deep-extend@~0.6.0:
   resolved "https://registry.yarnpkg.com/deep-extend/-/deep-extend-0.6.0.tgz#c4fa7c95404a17a9c3e8ca7e1537312b736330ac"
   integrity sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==
 
-deep-is@~0.1.3:
+deep-is@^0.1.3, deep-is@~0.1.3:
   version "0.1.3"
   resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
   integrity sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=
@@ -3731,6 +3843,13 @@ dir-glob@^3.0.1:
   dependencies:
     path-type "^4.0.0"
 
+doctrine@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961"
+  integrity sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==
+  dependencies:
+    esutils "^2.0.2"
+
 domain-browser@^1.1.1:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
@@ -3849,6 +3968,13 @@ enhanced-resolve@^4.1.0:
     memory-fs "^0.5.0"
     tapable "^1.0.0"
 
+enquirer@^2.3.5:
+  version "2.3.6"
+  resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d"
+  integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==
+  dependencies:
+    ansi-colors "^4.1.1"
+
 env-paths@^2.2.0:
   version "2.2.1"
   resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.1.tgz#420399d416ce1fbe9bc0a07c62fa68d67fd0f8f2"
@@ -3984,6 +4110,13 @@ escodegen@^2.0.0:
   optionalDependencies:
     source-map "~0.6.1"
 
+eslint-plugin-jest@^24.3.5:
+  version "24.3.5"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.3.5.tgz#71f0b580f87915695c286c3f0eb88cf23664d044"
+  integrity sha512-XG4rtxYDuJykuqhsOqokYIR84/C8pRihRtEpVskYLbIIKGwPNW2ySxdctuVzETZE+MbF/e7wmsnbNVpzM0rDug==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "^4.0.1"
+
 eslint-scope@^4.0.0:
   version "4.0.3"
   resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-4.0.3.tgz#ca03833310f6889a3264781aa82e63eb9cfe7848"
@@ -3992,17 +4125,101 @@ eslint-scope@^4.0.0:
     esrecurse "^4.1.0"
     estraverse "^4.1.1"
 
+eslint-scope@^5.0.0, eslint-scope@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
+  integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==
+  dependencies:
+    esrecurse "^4.3.0"
+    estraverse "^4.1.1"
+
+eslint-utils@^2.0.0, eslint-utils@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-2.1.0.tgz#d2de5e03424e707dc10c74068ddedae708741b27"
+  integrity sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==
+  dependencies:
+    eslint-visitor-keys "^1.1.0"
+
+eslint-visitor-keys@^1.1.0, eslint-visitor-keys@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz#30ebd1ef7c2fdff01c3a4f151044af25fab0523e"
+  integrity sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==
+
+eslint-visitor-keys@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.0.0.tgz#21fdc8fbcd9c795cc0321f0563702095751511a8"
+  integrity sha512-QudtT6av5WXels9WjIM7qz1XD1cWGvX4gGXvp/zBn9nXG02D0utdU3Em2m/QjTnrsk6bBjmCygl3rmj118msQQ==
+
+eslint@^7.24.0:
+  version "7.24.0"
+  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.24.0.tgz#2e44fa62d93892bfdb100521f17345ba54b8513a"
+  integrity sha512-k9gaHeHiFmGCDQ2rEfvULlSLruz6tgfA8DEn+rY9/oYPFFTlz55mM/Q/Rij1b2Y42jwZiK3lXvNTw6w6TXzcKQ==
+  dependencies:
+    "@babel/code-frame" "7.12.11"
+    "@eslint/eslintrc" "^0.4.0"
+    ajv "^6.10.0"
+    chalk "^4.0.0"
+    cross-spawn "^7.0.2"
+    debug "^4.0.1"
+    doctrine "^3.0.0"
+    enquirer "^2.3.5"
+    eslint-scope "^5.1.1"
+    eslint-utils "^2.1.0"
+    eslint-visitor-keys "^2.0.0"
+    espree "^7.3.1"
+    esquery "^1.4.0"
+    esutils "^2.0.2"
+    file-entry-cache "^6.0.1"
+    functional-red-black-tree "^1.0.1"
+    glob-parent "^5.0.0"
+    globals "^13.6.0"
+    ignore "^4.0.6"
+    import-fresh "^3.0.0"
+    imurmurhash "^0.1.4"
+    is-glob "^4.0.0"
+    js-yaml "^3.13.1"
+    json-stable-stringify-without-jsonify "^1.0.1"
+    levn "^0.4.1"
+    lodash "^4.17.21"
+    minimatch "^3.0.4"
+    natural-compare "^1.4.0"
+    optionator "^0.9.1"
+    progress "^2.0.0"
+    regexpp "^3.1.0"
+    semver "^7.2.1"
+    strip-ansi "^6.0.0"
+    strip-json-comments "^3.1.0"
+    table "^6.0.4"
+    text-table "^0.2.0"
+    v8-compile-cache "^2.0.3"
+
 esm@3.2.25:
   version "3.2.25"
   resolved "https://registry.yarnpkg.com/esm/-/esm-3.2.25.tgz#342c18c29d56157688ba5ce31f8431fbb795cc10"
   integrity sha512-U1suiZ2oDVWv4zPO56S0NcR5QriEahGtdN2OR6FiOG4WJvcjBVFB0qI4+eKoWFH483PKGuLuu6V8Z4T5g63UVA==
 
+espree@^7.3.0, espree@^7.3.1:
+  version "7.3.1"
+  resolved "https://registry.yarnpkg.com/espree/-/espree-7.3.1.tgz#f2df330b752c6f55019f8bd89b7660039c1bbbb6"
+  integrity sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==
+  dependencies:
+    acorn "^7.4.0"
+    acorn-jsx "^5.3.1"
+    eslint-visitor-keys "^1.3.0"
+
 esprima@^4.0.0, esprima@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71"
   integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==
 
-esrecurse@^4.1.0:
+esquery@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.4.0.tgz#2148ffc38b82e8c7057dfed48425b3e61f0f24a5"
+  integrity sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==
+  dependencies:
+    estraverse "^5.1.0"
+
+esrecurse@^4.1.0, esrecurse@^4.3.0:
   version "4.3.0"
   resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921"
   integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==
@@ -4014,7 +4231,7 @@ estraverse@^4.1.1:
   resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d"
   integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==
 
-estraverse@^5.2.0:
+estraverse@^5.1.0, estraverse@^5.2.0:
   version "5.2.0"
   resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-5.2.0.tgz#307df42547e6cc7324d3cf03c155d5cdb8c53880"
   integrity sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==
@@ -4212,7 +4429,7 @@ fast-glob@^2.2.6:
     merge2 "^1.2.3"
     micromatch "^3.1.10"
 
-fast-glob@^3.0.3:
+fast-glob@^3.0.3, fast-glob@^3.1.1:
   version "3.2.5"
   resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.5.tgz#7939af2a656de79a4f1901903ee8adcaa7cb9661"
   integrity sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==
@@ -4234,7 +4451,7 @@ fast-levenshtein@^1.0.0:
   resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-1.1.4.tgz#e6a754cc8f15e58987aa9cbd27af66fd6f4e5af9"
   integrity sha1-5qdUzI8V5YmHqpy9J69m/W9OWvk=
 
-fast-levenshtein@~2.0.6:
+fast-levenshtein@^2.0.6, fast-levenshtein@~2.0.6:
   version "2.0.6"
   resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
   integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
@@ -4265,6 +4482,13 @@ figures@^2.0.0:
   dependencies:
     escape-string-regexp "^1.0.5"
 
+file-entry-cache@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"
+  integrity sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==
+  dependencies:
+    flat-cache "^3.0.4"
+
 file-uri-to-path@1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd"
@@ -4384,11 +4608,24 @@ flagged-respawn@^1.0.0:
   resolved "https://registry.yarnpkg.com/flagged-respawn/-/flagged-respawn-1.0.1.tgz#e7de6f1279ddd9ca9aac8a5971d618606b3aab41"
   integrity sha512-lNaHNVymajmk0OJMBn8fVUAU1BtDeKIqKoVhk4xAALB57aALg6b4W0MfJ/cUE0g9YBXy5XhSlPIpYIJ7HaY/3Q==
 
+flat-cache@^3.0.4:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-3.0.4.tgz#61b0338302b2fe9f957dcc32fc2a87f1c3048b11"
+  integrity sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==
+  dependencies:
+    flatted "^3.1.0"
+    rimraf "^3.0.2"
+
 flatbuffers@1.12.0:
   version "1.12.0"
   resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-1.12.0.tgz#72e87d1726cb1b216e839ef02658aa87dcef68aa"
   integrity sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==
 
+flatted@^3.1.0:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.1.tgz#c4b489e80096d9df1dfc97c79871aea7c617c469"
+  integrity sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==
+
 flush-write-stream@^1.0.0, flush-write-stream@^1.0.2:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
@@ -4517,6 +4754,11 @@ function-bind@^1.1.1:
   resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
   integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
 
+functional-red-black-tree@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz#1b0ab3bd553b2a0d6399d29c0e3ea0b252078327"
+  integrity sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=
+
 gauge@~2.7.3:
   version "2.7.4"
   resolved "https://registry.yarnpkg.com/gauge/-/gauge-2.7.4.tgz#2c03405c7538c39d7eb37b317022e325fb018bf7"
@@ -4759,6 +5001,20 @@ globals@^11.1.0:
   resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e"
   integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==
 
+globals@^12.1.0:
+  version "12.4.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-12.4.0.tgz#a18813576a41b00a24a97e7f815918c2e19925f8"
+  integrity sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==
+  dependencies:
+    type-fest "^0.8.1"
+
+globals@^13.6.0:
+  version "13.8.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-13.8.0.tgz#3e20f504810ce87a8d72e55aecf8435b50f4c1b3"
+  integrity sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==
+  dependencies:
+    type-fest "^0.20.2"
+
 globby@^10.0.1:
   version "10.0.2"
   resolved "https://registry.yarnpkg.com/globby/-/globby-10.0.2.tgz#277593e745acaa4646c3ab411289ec47a0392543"
@@ -4773,6 +5029,18 @@ globby@^10.0.1:
     merge2 "^1.2.3"
     slash "^3.0.0"
 
+globby@^11.0.1:
+  version "11.0.3"
+  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.3.tgz#9b1f0cb523e171dd1ad8c7b2a9fb4b644b9593cb"
+  integrity sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==
+  dependencies:
+    array-union "^2.1.0"
+    dir-glob "^3.0.1"
+    fast-glob "^3.1.1"
+    ignore "^5.1.4"
+    merge2 "^1.3.0"
+    slash "^3.0.0"
+
 globby@^9.2.0:
   version "9.2.0"
   resolved "https://registry.yarnpkg.com/globby/-/globby-9.2.0.tgz#fd029a706c703d29bdd170f4b6db3a3f7a7cb63d"
@@ -5166,12 +5434,12 @@ ignore-walk@^3.0.1:
   dependencies:
     minimatch "^3.0.4"
 
-ignore@^4.0.3:
+ignore@^4.0.3, ignore@^4.0.6:
   version "4.0.6"
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc"
   integrity sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==
 
-ignore@^5.1.1:
+ignore@^5.1.1, ignore@^5.1.4:
   version "5.1.8"
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57"
   integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==
@@ -5184,6 +5452,14 @@ import-fresh@^2.0.0:
     caller-path "^2.0.0"
     resolve-from "^3.0.0"
 
+import-fresh@^3.0.0, import-fresh@^3.2.1:
+  version "3.3.0"
+  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b"
+  integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==
+  dependencies:
+    parent-module "^1.0.0"
+    resolve-from "^4.0.0"
+
 import-local@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/import-local/-/import-local-2.0.0.tgz#55070be38a5993cf18ef6db7e961f5bee5c5a09d"
@@ -6240,6 +6516,11 @@ json-schema-traverse@^0.4.1:
   resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
   integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==
 
+json-schema-traverse@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2"
+  integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==
+
 json-schema@0.2.3:
   version "0.2.3"
   resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13"
@@ -6407,6 +6688,14 @@ leven@^3.1.0:
   resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
   integrity sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==
 
+levn@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/levn/-/levn-0.4.1.tgz#ae4562c007473b932a6200d403268dd2fffc6ade"
+  integrity sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==
+  dependencies:
+    prelude-ls "^1.2.1"
+    type-check "~0.4.0"
+
 levn@~0.3.0:
   version "0.3.0"
   resolved "https://registry.yarnpkg.com/levn/-/levn-0.3.0.tgz#3b09924edf9f083c0490fdd4c0bc4421e04764ee"
@@ -6518,6 +6807,11 @@ lodash.clonedeep@^4.5.0:
   resolved "https://registry.yarnpkg.com/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz#e23f3f9c4f8fbdde872529c1071857a086e5ccef"
   integrity sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=
 
+lodash.flatten@^4.4.0:
+  version "4.4.0"
+  resolved "https://registry.yarnpkg.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz#f31c22225a9632d2bbf8e4addbef240aa765a61f"
+  integrity sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=
+
 lodash.get@^4.4.2:
   version "4.4.2"
   resolved "https://registry.yarnpkg.com/lodash.get/-/lodash.get-4.4.2.tgz#2d177f652fa31e939b4438d5341499dfa3825e99"
@@ -6563,12 +6857,17 @@ lodash.templatesettings@^4.0.0:
   dependencies:
     lodash._reinterpolate "^3.0.0"
 
+lodash.truncate@^4.4.2:
+  version "4.4.2"
+  resolved "https://registry.yarnpkg.com/lodash.truncate/-/lodash.truncate-4.4.2.tgz#5a350da0b1113b837ecfffd5812cbe58d6eae193"
+  integrity sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=
+
 lodash.uniq@^4.5.0:
   version "4.5.0"
   resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
   integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
 
-lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
+lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
   version "4.17.21"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
@@ -7037,7 +7336,7 @@ mkdirp@*, mkdirp@1.0.4, mkdirp@1.x, mkdirp@^1.0.3, mkdirp@^1.0.4:
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
   integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
 
-mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@^0.5.3, mkdirp@~0.5.0:
+mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@~0.5.0:
   version "0.5.5"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
   integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==
@@ -7545,6 +7844,18 @@ optionator@^0.8.1:
     type-check "~0.3.2"
     word-wrap "~1.2.3"
 
+optionator@^0.9.1:
+  version "0.9.1"
+  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.1.tgz#4f236a6373dae0566a6d43e1326674f50c291499"
+  integrity sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==
+  dependencies:
+    deep-is "^0.1.3"
+    fast-levenshtein "^2.0.6"
+    levn "^0.4.1"
+    prelude-ls "^1.2.1"
+    type-check "^0.4.0"
+    word-wrap "^1.2.3"
+
 ordered-read-streams@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/ordered-read-streams/-/ordered-read-streams-1.0.1.tgz#77c0cb37c41525d64166d990ffad7ec6a0e1363e"
@@ -7751,6 +8062,13 @@ parallel-transform@^1.1.0:
     inherits "^2.0.3"
     readable-stream "^2.1.5"
 
+parent-module@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
+  integrity sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==
+  dependencies:
+    callsites "^3.0.0"
+
 parse-asn1@^5.0.0, parse-asn1@^5.1.5:
   version "5.1.6"
   resolved "https://registry.yarnpkg.com/parse-asn1/-/parse-asn1-5.1.6.tgz#385080a3ec13cb62a62d39409cb3e88844cdaed4"
@@ -8015,6 +8333,11 @@ posix-character-classes@^0.1.0:
   resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab"
   integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
 
+prelude-ls@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
+  integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==
+
 prelude-ls@~1.1.2:
   version "1.1.2"
   resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
@@ -8060,7 +8383,7 @@ process@^0.11.10:
   resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182"
   integrity sha1-czIwDoQBYb2j5podHZGn1LwW8YI=
 
-progress@^2.0.3:
+progress@^2.0.0, progress@^2.0.3:
   version "2.0.3"
   resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
   integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
@@ -8443,6 +8766,11 @@ regex-not@^1.0.0, regex-not@^1.0.2:
     extend-shallow "^3.0.2"
     safe-regex "^1.1.0"
 
+regexpp@^3.0.0, regexpp@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.1.0.tgz#206d0ad0a5648cffbdb8ae46438f3dc51c9f78e2"
+  integrity sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==
+
 remove-bom-buffer@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/remove-bom-buffer/-/remove-bom-buffer-3.0.0.tgz#c2bf1e377520d324f623892e33c10cac2c252b53"
@@ -8543,6 +8871,11 @@ require-directory@^2.1.1:
   resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
   integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I=
 
+require-from-string@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909"
+  integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==
+
 require-main-filename@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-1.0.1.tgz#97f717b69d48784f5f526a6c5aa8ffdda055a4d1"
@@ -8602,7 +8935,7 @@ resolve-url@^0.2.1:
   resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a"
   integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=
 
-resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.18.1, resolve@^1.20.0, resolve@^1.3.2, resolve@^1.4.0:
+resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.18.1, resolve@^1.20.0, resolve@^1.4.0:
   version "1.20.0"
   resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975"
   integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==
@@ -8775,12 +9108,12 @@ semver-greatest-satisfied-range@^1.1.0:
   dependencies:
     sver-compat "^1.5.0"
 
-"semver@2 || 3 || 4 || 5", "semver@2.x || 3.x || 4 || 5", semver@^5.3.0, semver@^5.4.1, semver@^5.5.0, semver@^5.5.1, semver@^5.6.0, semver@^5.7.0, semver@^5.7.1:
+"semver@2 || 3 || 4 || 5", "semver@2.x || 3.x || 4 || 5", semver@^5.4.1, semver@^5.5.0, semver@^5.5.1, semver@^5.6.0, semver@^5.7.0, semver@^5.7.1:
   version "5.7.1"
   resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
   integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
 
-semver@7.x, semver@^7.3.2, semver@^7.3.4:
+semver@7.x, semver@^7.2.1, semver@^7.3.2, semver@^7.3.4:
   version "7.3.5"
   resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.5.tgz#0b621c879348d8998e4b0e4be94b3f12e6018ef7"
   integrity sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==
@@ -8938,6 +9271,15 @@ slash@^3.0.0:
   resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
   integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
 
+slice-ansi@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b"
+  integrity sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==
+  dependencies:
+    ansi-styles "^4.0.0"
+    astral-regex "^2.0.0"
+    is-fullwidth-code-point "^3.0.0"
+
 slide@^1.1.6:
   version "1.1.6"
   resolved "https://registry.yarnpkg.com/slide/-/slide-1.1.6.tgz#56eb027d65b4d2dce6cb2e2d32c4d4afc9e1d707"
@@ -9386,6 +9728,11 @@ strip-indent@^3.0.0:
   dependencies:
     min-indent "^1.0.0"
 
+strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
+  integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
+
 strong-log-transformer@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/strong-log-transformer/-/strong-log-transformer-2.1.0.tgz#0f5ed78d325e0421ac6f90f7f10e691d6ae3ae10"
@@ -9446,6 +9793,21 @@ table-layout@^0.4.3:
     typical "^2.6.1"
     wordwrapjs "^3.0.0"
 
+table@^6.0.4:
+  version "6.0.9"
+  resolved "https://registry.yarnpkg.com/table/-/table-6.0.9.tgz#790a12bf1e09b87b30e60419bafd6a1fd85536fb"
+  integrity sha512-F3cLs9a3hL1Z7N4+EkSscsel3z55XT950AvB05bwayrNg5T1/gykXtigioTAjbltvbMSJvvhFCbnf6mX+ntnJQ==
+  dependencies:
+    ajv "^8.0.1"
+    is-boolean-object "^1.1.0"
+    is-number-object "^1.0.4"
+    is-string "^1.0.5"
+    lodash.clonedeep "^4.5.0"
+    lodash.flatten "^4.4.0"
+    lodash.truncate "^4.4.2"
+    slice-ansi "^4.0.0"
+    string-width "^4.2.0"
+
 tapable@^1.0.0, tapable@^1.1.0:
   version "1.1.3"
   resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2"
@@ -9576,6 +9938,11 @@ text-extensions@^1.0.0:
   resolved "https://registry.yarnpkg.com/text-extensions/-/text-extensions-1.9.0.tgz#1853e45fee39c945ce6f6c36b2d659b5aabc2a26"
   integrity sha512-wiBrwC1EhBelW12Zy26JeOUkQ5mRu+5o8rpsJk5+2t+Y5vE7e842qtZDQ2g1NpX/29HdyFeJ4nSIhI47ENSxlQ==
 
+text-table@^0.2.0:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4"
+  integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=
+
 thenify-all@^1.0.0:
   version "1.6.0"
   resolved "https://registry.yarnpkg.com/thenify-all/-/thenify-all-1.6.0.tgz#1a1918d402d8fc3f98fbf234db0bcc8cc10e9726"
@@ -9799,34 +10166,15 @@ ts-node@9.0.0:
     source-map-support "^0.5.17"
     yn "3.1.1"
 
-tslib@^1.12.0, tslib@^1.13.0, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
+tslib@^1.12.0, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
   version "1.14.1"
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
   integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
 
-tslint@6.1.3:
-  version "6.1.3"
-  resolved "https://registry.yarnpkg.com/tslint/-/tslint-6.1.3.tgz#5c23b2eccc32487d5523bd3a470e9aa31789d904"
-  integrity sha512-IbR4nkT96EQOvKE2PW/djGz8iGNeJ4rF2mBfiYaR/nvUWYKJhLwimoJKgjIFEIDibBtOevj7BqCRL4oHeWWUCg==
-  dependencies:
-    "@babel/code-frame" "^7.0.0"
-    builtin-modules "^1.1.1"
-    chalk "^2.3.0"
-    commander "^2.12.1"
-    diff "^4.0.1"
-    glob "^7.1.1"
-    js-yaml "^3.13.1"
-    minimatch "^3.0.4"
-    mkdirp "^0.5.3"
-    resolve "^1.3.2"
-    semver "^5.3.0"
-    tslib "^1.13.0"
-    tsutils "^2.29.0"
-
-tsutils@^2.29.0:
-  version "2.29.0"
-  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-2.29.0.tgz#32b488501467acbedd4b85498673a0812aca0b99"
-  integrity sha512-g5JVHCIJwzfISaXpXE1qvNalca5Jwob6FjI4AoPlqMusJ6ftFE7IkkFoMhVLRgK+4Kx3gkzb8UZK5t5yTTvEmA==
+tsutils@^3.17.1:
+  version "3.21.0"
+  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
+  integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==
   dependencies:
     tslib "^1.8.1"
 
@@ -9847,6 +10195,13 @@ tweetnacl@^0.14.3, tweetnacl@~0.14.0:
   resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
   integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=
 
+type-check@^0.4.0, type-check@~0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"
+  integrity sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==
+  dependencies:
+    prelude-ls "^1.2.1"
+
 type-check@~0.3.2:
   version "0.3.2"
   resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.3.2.tgz#5884cab512cf1d355e3fb784f30804b2b520db72"
@@ -9869,6 +10224,11 @@ type-fest@^0.18.0:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.18.1.tgz#db4bc151a4a2cf4eebf9add5db75508db6cc841f"
   integrity sha512-OIAYXk8+ISY+qTOwkHtKqzAuxchoMiD9Udx+FSGQDuiRR+PJKJHc2NJAXlbhkGwTt/4/nKZxELY1w3ReWOL8mw==
 
+type-fest@^0.20.2:
+  version "0.20.2"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.20.2.tgz#1bf207f4b28f91583666cb5fbd327887301cd5f4"
+  integrity sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==
+
 type-fest@^0.21.3:
   version "0.21.3"
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.21.3.tgz#d260a24b0198436e133fa26a524a6d65fa3b2e37"
@@ -10122,6 +10482,11 @@ uuid@^8.3.0:
   resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
   integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
 
+v8-compile-cache@^2.0.3:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee"
+  integrity sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==
+
 v8-to-istanbul@^7.0.0:
   version "7.1.1"
   resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.1.tgz#04bfd1026ba4577de5472df4f5e89af49de5edda"
@@ -10420,7 +10785,7 @@ windows-release@^3.1.0:
   dependencies:
     execa "^1.0.0"
 
-word-wrap@~1.2.3:
+word-wrap@^1.2.3, word-wrap@~1.2.3:
   version "1.2.3"
   resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.3.tgz#610636f6b1f703891bd34771ccb17fb93b47079c"
   integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==

From 9fc589ff71a0bd93d942eb6c440bbbd8d82d4f87 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 14 Apr 2021 16:10:34 +0900
Subject: [PATCH 018/719] ARROW-12354: [Packaging][RPM] Use
 apache.jfrog.io/artifactory/ instead of apache.bintray.com/

Closes #10007 from kou/packaging-linux-yum-artifactory

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/verify-yum.sh                     | 23 +++++--------------
 .../yum/Apache-Arrow.repo                     |  6 ++---
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index 0bde804c3c9..b9c46c43898 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -21,26 +21,22 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
-  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
-  echo "       $0 VERSION release BINTRAY_REPOSITORY"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
-  echo " e.g.: $0 0.13.0 rc kou/arrow # Verify 0.13.0 RC at https://bintray.com/kou/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
 
 VERSION="$1"
 TYPE="$2"
-BINTRAY_REPOSITORY="${3:-apache/arrow}"
 
 local_prefix="/arrow/dev/tasks/linux-packages"
 
-bintray_base_url="https://dl.bintray.com/${BINTRAY_REPOSITORY}/centos"
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow/centos"
 if [ "${TYPE}" = "rc" ]; then
-  bintray_base_url="${bintray_base_url}-rc"
+  artifactory_base_url+="-rc"
 fi
 
 distribution=$(. /etc/os-release && echo "${ID}")
@@ -87,30 +83,23 @@ if [ "${TYPE}" = "local" ]; then
 else
   package_version="${VERSION}"
   ${install_command} \
-    ${bintray_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
+    ${artifactory_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
 fi
 
 if [ "${TYPE}" = "local" ]; then
   sed \
     -i"" \
-    -e "s,baseurl=https://apache.bintray.com/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \
+    -e "s,baseurl=https://apache\.jfrog\.io/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \
     /etc/yum.repos.d/Apache-Arrow.repo
   keys="${local_prefix}/KEYS"
   if [ -f "${keys}" ]; then
     cp "${keys}" /etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
   fi
 else
-  if [ "${BINTRAY_REPOSITORY}" = "apache/arrow" ]; then
-    if [ "${TYPE}" = "rc" ]; then
-      sed \
-        -i"" \
-        -e "s,/centos/,/centos-rc/,g" \
-        /etc/yum.repos.d/Apache-Arrow.repo
-    fi
-  else
+  if [ "${TYPE}" = "rc" ]; then
     sed \
       -i"" \
-      -e "s,baseurl=https://apache.bintray.com/arrow/centos,baseurl=${bintray_base_url},g" \
+      -e "s,/centos/,/centos-rc/,g" \
       /etc/yum.repos.d/Apache-Arrow.repo
   fi
 fi
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
index 16a6d742fb7..fd77306e6f5 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
@@ -17,21 +17,21 @@
 
 [apache-arrow-amazon-linux]
 name=Apache Arrow for Amazon Linux 2 - $basearch
-baseurl=https://apache.bintray.com/arrow/centos/7/$basearch/
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/7/$basearch/
 gpgcheck=1
 enabled=1
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
 
 [apache-arrow-centos]
 name=Apache Arrow for CentOS $releasever - $basearch
-baseurl=https://apache.bintray.com/arrow/centos/$releasever/$basearch/
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
 gpgcheck=1
 enabled=1
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
 
 [apache-arrow-rhel]
 name=Apache Arrow for RHEL $releasever - $basearch
-baseurl=https://apache.bintray.com/arrow/centos/$releasever/$basearch/
+baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
 gpgcheck=1
 enabled=1
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow

From 798a9b46a28de68c18594e7eeb29e698aa3e9e9e Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 14 Apr 2021 17:12:34 +0900
Subject: [PATCH 019/719] ARROW-12351: [CI][Ruby] Use ruby/setup-ruby instead
 of actions/setup-ruby
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because actions/setup-ruby is deprecated:

    Please note: This action is deprecated and should no longer be
    used. The team at GitHub has ceased making and accepting code
    contributions or maintaining issues tracker. Please, migrate your
    workflows to the ruby/setup-ruby, which is being actively
    maintained by the official Ruby organization.

https://github.com/actions/setup-ruby#setup-ruby

Closes #10003 from kou/ci-ruby-setup

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/dev.yml                       | 2 +-
 .github/workflows/ruby.yml                      | 2 +-
 dev/tasks/linux-packages/github.linux.amd64.yml | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 37016efcbfe..d1b01848004 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -79,7 +79,7 @@ jobs:
         with:
           python-version: '3.6'
       - name: Install Ruby
-        uses: actions/setup-ruby@v1
+        uses: ruby/setup-ruby@v1
         with:
           ruby-version: '2.6'
       - name: Install Dependencies
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index d9430f536b2..2b99cddf8da 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -223,7 +223,7 @@ jobs:
         shell: bash
         run: ci/scripts/util_checkout.sh
       - name: Setup Ruby
-        uses: actions/setup-ruby@v1
+        uses: ruby/setup-ruby@v1
         with:
           ruby-version: ${{ matrix.ruby-version }}
       - name: Upgrade MSYS2
diff --git a/dev/tasks/linux-packages/github.linux.amd64.yml b/dev/tasks/linux-packages/github.linux.amd64.yml
index 380f025afaa..4fa056c18c7 100644
--- a/dev/tasks/linux-packages/github.linux.amd64.yml
+++ b/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -28,7 +28,9 @@ jobs:
       {{ macros.github_login_dockerhub()|indent }}
 
       - name: Set up Ruby
-        uses: actions/setup-ruby@v1
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '2.6'
       - name: Free Up Disk Space
         shell: bash
         run: arrow/ci/scripts/util_cleanup.sh

From 2863fdd0e8828605c17b565dcd18672f2e49ce1f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 14 Apr 2021 12:04:27 +0200
Subject: [PATCH 020/719] ARROW-11924: [C++] Add streaming version of
 FileSystem::GetFileInfo

Closes #9995 from pitrou/ARROW-11924-get-file-info-generator

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/filesystem.cc      |  66 +++++-
 cpp/src/arrow/filesystem/filesystem.h       |  46 +++-
 cpp/src/arrow/filesystem/filesystem_test.cc |  11 +-
 cpp/src/arrow/filesystem/mockfs.cc          |  20 +-
 cpp/src/arrow/filesystem/mockfs.h           |  11 +
 cpp/src/arrow/filesystem/s3fs.cc            | 232 +++++++++++++++-----
 cpp/src/arrow/filesystem/s3fs.h             |   4 +-
 cpp/src/arrow/filesystem/s3fs_test.cc       |  31 +++
 cpp/src/arrow/filesystem/test_util.cc       |  23 +-
 cpp/src/arrow/filesystem/test_util.h        |  11 +-
 cpp/src/arrow/io/interfaces.cc              |   5 +-
 cpp/src/arrow/io/util_internal.h            |  11 +
 cpp/src/arrow/testing/future_util.h         |  14 +-
 cpp/src/arrow/type_fwd.h                    |   2 +
 cpp/src/arrow/util/async_generator.h        |  44 +++-
 cpp/src/arrow/util/async_generator_test.cc  |  22 ++
 16 files changed, 461 insertions(+), 92 deletions(-)

diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 7cfe266cdf0..98dc05731b9 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -32,19 +32,24 @@
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/util_internal.h"
 #include "arrow/io/slow.h"
+#include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/parallel.h"
 #include "arrow/util/uri.h"
+#include "arrow/util/vector.h"
 #include "arrow/util/windows_fixup.h"
 
 namespace arrow {
 
+using internal::checked_pointer_cast;
 using internal::TaskHints;
 using internal::Uri;
+using io::internal::SubmitIO;
 
 namespace fs {
 
@@ -143,11 +148,8 @@ auto FileSystemDefer(FileSystem* fs, bool synchronous, DeferredFunc&& func)
   if (synchronous) {
     return std::forward<DeferredFunc>(func)(std::move(self));
   }
-  TaskHints hints;
-  hints.external_id = fs->io_context().external_id();
-  // TODO pass StopToken
-  return DeferNotOk(fs->io_context().executor()->Submit(
-      hints, std::forward<DeferredFunc>(func), std::move(self)));
+  return DeferNotOk(io::internal::SubmitIO(
+      fs->io_context(), std::forward<DeferredFunc>(func), std::move(self)));
 }
 
 }  // namespace
@@ -159,10 +161,11 @@ Future<std::vector<FileInfo>> FileSystem::GetFileInfoAsync(
       [paths](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(paths); });
 }
 
-Future<std::vector<FileInfo>> FileSystem::GetFileInfoAsync(const FileSelector& select) {
-  return FileSystemDefer(
+FileInfoGenerator FileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto fut = FileSystemDefer(
       this, default_async_is_sync_,
       [select](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(select); });
+  return MakeSingleFutureGenerator(std::move(fut));
 }
 
 Status FileSystem::DeleteFiles(const std::vector<std::string>& paths) {
@@ -312,6 +315,23 @@ Result<std::vector<FileInfo>> SubTreeFileSystem::GetFileInfo(const FileSelector&
   return infos;
 }
 
+FileInfoGenerator SubTreeFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto selector = select;
+  selector.base_dir = PrependBase(selector.base_dir);
+  auto gen = base_fs_->GetFileInfoGenerator(selector);
+
+  auto self = checked_pointer_cast<SubTreeFileSystem>(shared_from_this());
+
+  std::function<Result<std::vector<FileInfo>>(const std::vector<FileInfo>& infos)>
+      fix_infos = [self](std::vector<FileInfo> infos) -> Result<std::vector<FileInfo>> {
+    for (auto& info : infos) {
+      RETURN_NOT_OK(self->FixInfo(&info));
+    }
+    return infos;
+  };
+  return MakeMappedGenerator(gen, fix_infos);
+}
+
 Status SubTreeFileSystem::CreateDir(const std::string& path, bool recursive) {
   auto s = path;
   RETURN_NOT_OK(PrependBaseNonEmpty(&s));
@@ -378,6 +398,22 @@ Result<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStream(
   return base_fs_->OpenInputStream(new_info);
 }
 
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputStreamAsync(s);
+}
+
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputStreamAsync(new_info);
+}
+
 Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
     const std::string& path) {
   auto s = path;
@@ -394,6 +430,22 @@ Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
   return base_fs_->OpenInputFile(new_info);
 }
 
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+    const std::string& path) {
+  auto s = path;
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  return base_fs_->OpenInputFileAsync(s);
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+    const FileInfo& info) {
+  auto s = info.path();
+  RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+  FileInfo new_info(info);
+  new_info.set_path(std::move(s));
+  return base_fs_->OpenInputFileAsync(new_info);
+}
+
 Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenOutputStream(
     const std::string& path) {
   auto s = path;
diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h
index f779dd8a13c..2fc58364407 100644
--- a/cpp/src/arrow/filesystem/filesystem.h
+++ b/cpp/src/arrow/filesystem/filesystem.h
@@ -19,6 +19,7 @@
 
 #include <chrono>
 #include <cstdint>
+#include <functional>
 #include <iosfwd>
 #include <memory>
 #include <string>
@@ -141,6 +142,19 @@ struct ARROW_EXPORT FileLocator {
   std::string path;
 };
 
+using FileInfoVector = std::vector<FileInfo>;
+using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
+
+}  // namespace fs
+
+template <>
+struct IterationTraits<fs::FileInfoVector> {
+  static fs::FileInfoVector End() { return {}; }
+  static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
+};
+
+namespace fs {
+
 /// \brief Abstract file system API
 class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> {
  public:
@@ -171,20 +185,22 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// a truly exceptional condition (low-level I/O error, etc.).
   virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
   /// Same, for many targets at once.
-  virtual Result<std::vector<FileInfo>> GetFileInfo(
-      const std::vector<std::string>& paths);
+  virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
   /// Same, according to a selector.
   ///
   /// The selector's base directory will not be part of the results, even if
   /// it exists.
   /// If it doesn't exist, see `FileSelector::allow_not_found`.
-  virtual Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) = 0;
+  virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
 
   /// EXPERIMENTAL: async version of GetFileInfo
-  virtual Future<std::vector<FileInfo>> GetFileInfoAsync(
-      const std::vector<std::string>& paths);
-  /// EXPERIMENTAL: async version of GetFileInfo
-  virtual Future<std::vector<FileInfo>> GetFileInfoAsync(const FileSelector& select);
+  virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
+
+  /// EXPERIMENTAL: streaming async version of GetFileInfo
+  ///
+  /// The returned generator is not async-reentrant, i.e. you need to wait for
+  /// the returned future to complete before calling the generator again.
+  virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
 
   /// Create a directory and subdirectories.
   ///
@@ -314,7 +330,9 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
   using FileSystem::GetFileInfo;
   /// \endcond
   Result<FileInfo> GetFileInfo(const std::string& path) override;
-  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
 
   Status CreateDir(const std::string& path, bool recursive = true) override;
 
@@ -335,6 +353,16 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
       const std::string& path) override;
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const FileInfo& info) override;
+
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const std::string& path) override;
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const FileInfo& info) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const std::string& path) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const FileInfo& info) override;
+
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
@@ -370,7 +398,7 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
 
   using FileSystem::GetFileInfo;
   Result<FileInfo> GetFileInfo(const std::string& path) override;
-  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
 
   Status CreateDir(const std::string& path, bool recursive = true) override;
 
diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index f3b561fc581..8df84ff91e6 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -267,21 +267,26 @@ TEST(PathUtil, ToSlashes) {
 ////////////////////////////////////////////////////////////////////////////
 // Generic MockFileSystem tests
 
+template <typename MockFileSystemType>
 class TestMockFSGeneric : public ::testing::Test, public GenericFileSystemTest {
  public:
   void SetUp() override {
     time_ = TimePoint(TimePoint::duration(42));
-    fs_ = std::make_shared<MockFileSystem>(time_);
+    fs_ = std::make_shared<MockFileSystemType>(time_);
   }
 
  protected:
   std::shared_ptr<FileSystem> GetEmptyFileSystem() override { return fs_; }
 
   TimePoint time_;
-  std::shared_ptr<MockFileSystem> fs_;
+  std::shared_ptr<FileSystem> fs_;
 };
 
-GENERIC_FS_TEST_FUNCTIONS(TestMockFSGeneric);
+using MockFileSystemTypes = ::testing::Types<MockFileSystem, MockAsyncFileSystem>;
+
+TYPED_TEST_SUITE(TestMockFSGeneric, MockFileSystemTypes);
+
+GENERIC_FS_TYPED_TEST_FUNCTIONS(TestMockFSGeneric);
 
 ////////////////////////////////////////////////////////////////////////////
 // Concrete MockFileSystem tests
diff --git a/cpp/src/arrow/filesystem/mockfs.cc b/cpp/src/arrow/filesystem/mockfs.cc
index 294cc85531a..e1ac05ced54 100644
--- a/cpp/src/arrow/filesystem/mockfs.cc
+++ b/cpp/src/arrow/filesystem/mockfs.cc
@@ -31,6 +31,8 @@
 #include "arrow/filesystem/util_internal.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/variant.h"
@@ -536,13 +538,13 @@ Result<FileInfo> MockFileSystem::GetFileInfo(const std::string& path) {
   return info;
 }
 
-Result<std::vector<FileInfo>> MockFileSystem::GetFileInfo(const FileSelector& selector) {
+Result<FileInfoVector> MockFileSystem::GetFileInfo(const FileSelector& selector) {
   auto parts = SplitAbstractPath(selector.base_dir);
   RETURN_NOT_OK(ValidateAbstractPathParts(parts));
 
   auto guard = impl_->lock_guard();
 
-  std::vector<FileInfo> results;
+  FileInfoVector results;
 
   Entry* base_dir = impl_->FindEntry(parts);
   if (base_dir == nullptr) {
@@ -746,6 +748,20 @@ Result<std::shared_ptr<FileSystem>> MockFileSystem::Make(
   return fs;
 }
 
+FileInfoGenerator MockAsyncFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto maybe_infos = GetFileInfo(select);
+  if (maybe_infos.ok()) {
+    // Return the FileInfo entries one by one
+    const auto& infos = *maybe_infos;
+    std::vector<FileInfoVector> chunks(infos.size());
+    std::transform(infos.begin(), infos.end(), chunks.begin(),
+                   [](const FileInfo& info) { return FileInfoVector{info}; });
+    return MakeVectorGenerator(std::move(chunks));
+  } else {
+    return MakeFailingGenerator(maybe_infos);
+  }
+}
+
 }  // namespace internal
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/mockfs.h b/cpp/src/arrow/filesystem/mockfs.h
index 212caf6d7fe..af0a327e263 100644
--- a/cpp/src/arrow/filesystem/mockfs.h
+++ b/cpp/src/arrow/filesystem/mockfs.h
@@ -114,6 +114,17 @@ class ARROW_EXPORT MockFileSystem : public FileSystem {
   std::unique_ptr<Impl> impl_;
 };
 
+class ARROW_EXPORT MockAsyncFileSystem : public MockFileSystem {
+ public:
+  explicit MockAsyncFileSystem(TimePoint current_time,
+                               const io::IOContext& io_context = io::default_io_context())
+      : MockFileSystem(current_time, io_context) {
+    default_async_is_sync_ = false;
+  }
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+};
+
 }  // namespace internal
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 400442d2156..75b1e71cc94 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -74,6 +74,7 @@
 #include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/atomic_shared_ptr.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
@@ -87,6 +88,7 @@ namespace arrow {
 
 using internal::TaskGroup;
 using internal::Uri;
+using io::internal::SubmitIO;
 
 namespace fs {
 
@@ -994,10 +996,9 @@ class ObjectOutputStream final : public io::OutputStream {
         ++upload_state_->parts_in_progress;
       }
       auto client = client_;
-      ARROW_ASSIGN_OR_RAISE(auto fut, io_context_.executor()->Submit(
-                                          io_context_.stop_token(), [client, req]() {
-                                            return client->UploadPart(req);
-                                          }));
+      ARROW_ASSIGN_OR_RAISE(auto fut, SubmitIO(io_context_, [client, req]() {
+                              return client->UploadPart(req);
+                            }));
       // The closure keeps the buffer and the upload state alive
       auto state = upload_state_;
       auto part_number = part_number_;
@@ -1126,6 +1127,11 @@ struct TreeWalker : public std::enable_shared_from_this<TreeWalker> {
 
   template <typename... Args>
   static Status Walk(Args&&... args) {
+    return WalkAsync(std::forward<Args>(args)...).status();
+  }
+
+  template <typename... Args>
+  static Future<> WalkAsync(Args&&... args) {
     auto self = std::make_shared<TreeWalker>(std::forward<Args>(args)...);
     return self->DoWalk();
   }
@@ -1147,12 +1153,12 @@ struct TreeWalker : public std::enable_shared_from_this<TreeWalker> {
   std::shared_ptr<TaskGroup> task_group_;
   std::mutex mutex_;
 
-  Status DoWalk() {
+  Future<> DoWalk() {
     task_group_ =
         TaskGroup::MakeThreaded(io_context_.executor(), io_context_.stop_token());
     WalkChild(base_dir_, /*nesting_depth=*/0);
     // When this returns, ListObjectsV2 tasks either have finished or will exit early
-    return task_group_->Finish();
+    return task_group_->FinishAsync();
   }
 
   bool ok() const { return task_group_->ok(); }
@@ -1249,7 +1255,7 @@ struct TreeWalker : public std::enable_shared_from_this<TreeWalker> {
 // -----------------------------------------------------------------------
 // S3 filesystem implementation
 
-class S3FileSystem::Impl {
+class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Impl> {
  public:
   ClientBuilder builder_;
   io::IOContext io_context_;
@@ -1404,32 +1410,20 @@ class S3FileSystem::Impl {
     return Status::OK();
   }
 
-  // Workhorse for GetTargetStats(FileSelector...)
-  Status Walk(const FileSelector& select, const std::string& bucket,
-              const std::string& key, std::vector<FileInfo>* out) {
-    bool is_empty = true;
-
-    auto handle_error = [&](const AWSError<S3Errors>& error) -> Status {
-      if (select.allow_not_found && IsNotFound(error)) {
-        return Status::OK();
-      }
-      return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
-                                                 "' in bucket '", bucket, "': "),
-                           error);
-    };
-
-    auto handle_recursion = [&](int32_t nesting_depth) -> Result<bool> {
-      RETURN_NOT_OK(CheckNestingDepth(nesting_depth));
-      return select.recursive && nesting_depth <= select.max_recursion;
-    };
+  // A helper class for Walk and WalkAsync
+  struct FileInfoCollector {
+    FileInfoCollector(std::string bucket, std::string key, const FileSelector& select)
+        : bucket(std::move(bucket)),
+          key(std::move(key)),
+          allow_not_found(select.allow_not_found) {}
 
-    auto handle_results = [&](const std::string& prefix,
-                              const S3Model::ListObjectsV2Result& result) -> Status {
+    Status Collect(const std::string& prefix, const S3Model::ListObjectsV2Result& result,
+                   std::vector<FileInfo>* out) {
       // Walk "directories"
-      for (const auto& prefix : result.GetCommonPrefixes()) {
+      for (const auto& child_prefix : result.GetCommonPrefixes()) {
         is_empty = false;
         const auto child_key =
-            internal::RemoveTrailingSlash(FromAwsString(prefix.GetPrefix()));
+            internal::RemoveTrailingSlash(FromAwsString(child_prefix.GetPrefix()));
         std::stringstream child_path;
         child_path << bucket << kSep << child_key;
         FileInfo info;
@@ -1453,6 +1447,49 @@ class S3FileSystem::Impl {
         out->push_back(std::move(info));
       }
       return Status::OK();
+    }
+
+    Status Finish(Impl* impl) {
+      // If no contents were found, perhaps it's an empty "directory",
+      // or perhaps it's a nonexistent entry.  Check.
+      if (is_empty && !allow_not_found) {
+        bool is_actually_empty;
+        RETURN_NOT_OK(impl->IsEmptyDirectory(bucket, key, &is_actually_empty));
+        if (!is_actually_empty) {
+          return PathNotFound(bucket, key);
+        }
+      }
+      return Status::OK();
+    }
+
+    std::string bucket;
+    std::string key;
+    bool allow_not_found;
+    bool is_empty = true;
+  };
+
+  // Workhorse for GetFileInfo(FileSelector...)
+  Status Walk(const FileSelector& select, const std::string& bucket,
+              const std::string& key, std::vector<FileInfo>* out) {
+    FileInfoCollector collector(bucket, key, select);
+
+    auto handle_error = [&](const AWSError<S3Errors>& error) -> Status {
+      if (select.allow_not_found && IsNotFound(error)) {
+        return Status::OK();
+      }
+      return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
+                                                 "' in bucket '", bucket, "': "),
+                           error);
+    };
+
+    auto handle_recursion = [&](int32_t nesting_depth) -> Result<bool> {
+      RETURN_NOT_OK(CheckNestingDepth(nesting_depth));
+      return select.recursive && nesting_depth <= select.max_recursion;
+    };
+
+    auto handle_results = [&](const std::string& prefix,
+                              const S3Model::ListObjectsV2Result& result) -> Status {
+      return collector.Collect(prefix, result, out);
     };
 
     RETURN_NOT_OK(TreeWalker::Walk(client_, io_context_, bucket, key, kListObjectsMaxKeys,
@@ -1460,17 +1497,59 @@ class S3FileSystem::Impl {
 
     // If no contents were found, perhaps it's an empty "directory",
     // or perhaps it's a nonexistent entry.  Check.
-    if (is_empty && !select.allow_not_found) {
-      RETURN_NOT_OK(IsEmptyDirectory(bucket, key, &is_empty));
-      if (!is_empty) {
-        return PathNotFound(bucket, key);
-      }
-    }
+    RETURN_NOT_OK(collector.Finish(this));
     // Sort results for convenience, since they can come massively out of order
     std::sort(out->begin(), out->end(), FileInfo::ByPath{});
     return Status::OK();
   }
 
+  // Workhorse for GetFileInfoGenerator(FileSelector...)
+  FileInfoGenerator WalkAsync(const FileSelector& select, const std::string& bucket,
+                              const std::string& key) {
+    PushGenerator<std::vector<FileInfo>> gen;
+    auto producer = gen.producer();
+    auto collector = std::make_shared<FileInfoCollector>(bucket, key, select);
+    auto self = shared_from_this();
+
+    auto handle_error = [select, bucket, key](const AWSError<S3Errors>& error) -> Status {
+      if (select.allow_not_found && IsNotFound(error)) {
+        return Status::OK();
+      }
+      return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
+                                                 "' in bucket '", bucket, "': "),
+                           error);
+    };
+
+    auto handle_recursion = [select, self](int32_t nesting_depth) -> Result<bool> {
+      RETURN_NOT_OK(self->CheckNestingDepth(nesting_depth));
+      return select.recursive && nesting_depth <= select.max_recursion;
+    };
+
+    auto handle_results =
+        [collector, producer](
+            const std::string& prefix,
+            const S3Model::ListObjectsV2Result& result) mutable -> Status {
+      std::vector<FileInfo> out;
+      RETURN_NOT_OK(collector->Collect(prefix, result, &out));
+      if (!out.empty()) {
+        producer.Push(std::move(out));
+      }
+      return Status::OK();
+    };
+
+    TreeWalker::WalkAsync(client_, io_context_, bucket, key, kListObjectsMaxKeys,
+                          handle_results, handle_error, handle_recursion)
+        .AddCallback([collector, producer,
+                      self](const Result<::arrow::detail::Empty>& res) mutable {
+          auto st = collector->Finish(self.get());
+          if (!st.ok()) {
+            producer.Push(st);
+          }
+          producer.Close();
+        });
+    return gen;
+  }
+
   Status WalkForDeleteDir(const std::string& bucket, const std::string& key,
                           std::vector<std::string>* file_keys,
                           std::vector<std::string>* dir_keys) {
@@ -1550,10 +1629,9 @@ class S3FileSystem::Impl {
       }
       req.SetBucket(ToAwsString(bucket));
       req.SetDelete(std::move(del));
-      ARROW_ASSIGN_OR_RAISE(auto fut, io_context_.executor()->Submit(
-                                          io_context_.stop_token(), [client, req]() {
-                                            return client->DeleteObjects(req);
-                                          }));
+      ARROW_ASSIGN_OR_RAISE(auto fut, SubmitIO(io_context_, [client, req]() {
+                              return client->DeleteObjects(req);
+                            }));
       futures.push_back(std::move(fut).Then(delete_cb));
     }
 
@@ -1598,17 +1676,29 @@ class S3FileSystem::Impl {
     return Status::OK();
   }
 
-  Status ListBuckets(std::vector<std::string>* out) {
-    out->clear();
-    auto outcome = client_->ListBuckets();
+  static Result<std::vector<std::string>> ProcessListBuckets(
+      const Aws::S3::Model::ListBucketsOutcome& outcome) {
     if (!outcome.IsSuccess()) {
       return ErrorToStatus(std::forward_as_tuple("When listing buckets: "),
                            outcome.GetError());
     }
+    std::vector<std::string> buckets;
+    buckets.reserve(outcome.GetResult().GetBuckets().size());
     for (const auto& bucket : outcome.GetResult().GetBuckets()) {
-      out->emplace_back(FromAwsString(bucket.GetName()));
+      buckets.emplace_back(FromAwsString(bucket.GetName()));
     }
-    return Status::OK();
+    return buckets;
+  }
+
+  Result<std::vector<std::string>> ListBuckets() {
+    auto outcome = client_->ListBuckets();
+    return ProcessListBuckets(outcome);
+  }
+
+  Future<std::vector<std::string>> ListBucketsAsync(io::IOContext ctx) {
+    auto self = shared_from_this();
+    return DeferNotOk(SubmitIO(ctx, [self]() { return self->client_->ListBuckets(); }))
+        .Then(Impl::ProcessListBuckets);
   }
 
   Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const std::string& s,
@@ -1641,7 +1731,7 @@ class S3FileSystem::Impl {
 };
 
 S3FileSystem::S3FileSystem(const S3Options& options, const io::IOContext& io_context)
-    : FileSystem(io_context), impl_(new Impl{options, io_context}) {
+    : FileSystem(io_context), impl_(std::make_shared<Impl>(options, io_context)) {
   default_async_is_sync_ = false;
 }
 
@@ -1736,15 +1826,14 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {
   }
 }
 
-Result<std::vector<FileInfo>> S3FileSystem::GetFileInfo(const FileSelector& select) {
+Result<FileInfoVector> S3FileSystem::GetFileInfo(const FileSelector& select) {
   ARROW_ASSIGN_OR_RAISE(auto base_path, S3Path::FromString(select.base_dir));
 
-  std::vector<FileInfo> results;
+  FileInfoVector results;
 
   if (base_path.empty()) {
     // List all buckets
-    std::vector<std::string> buckets;
-    RETURN_NOT_OK(impl_->ListBuckets(&buckets));
+    ARROW_ASSIGN_OR_RAISE(auto buckets, impl_->ListBuckets());
     for (const auto& bucket : buckets) {
       FileInfo info;
       info.set_path(bucket);
@@ -1762,6 +1851,51 @@ Result<std::vector<FileInfo>> S3FileSystem::GetFileInfo(const FileSelector& sele
   return results;
 }
 
+FileInfoGenerator S3FileSystem::GetFileInfoGenerator(const FileSelector& select) {
+  auto maybe_base_path = S3Path::FromString(select.base_dir);
+  if (!maybe_base_path.ok()) {
+    return MakeFailingGenerator<FileInfoVector>(maybe_base_path.status());
+  }
+  auto base_path = *std::move(maybe_base_path);
+
+  if (base_path.empty()) {
+    // List all buckets, then possibly recurse
+    PushGenerator<AsyncGenerator<FileInfoVector>> gen;
+    auto producer = gen.producer();
+
+    auto fut = impl_->ListBucketsAsync(io_context());
+    auto impl = impl_->shared_from_this();
+    fut.AddCallback(
+        [producer, select, impl](const Result<std::vector<std::string>>& res) mutable {
+          if (!res.ok()) {
+            producer.Push(res.status());
+            producer.Close();
+            return;
+          }
+          FileInfoVector buckets;
+          for (const auto& bucket : *res) {
+            buckets.push_back(FileInfo{bucket, FileType::Directory});
+          }
+          // Generate all bucket infos
+          auto buckets_fut = Future<FileInfoVector>::MakeFinished(std::move(buckets));
+          producer.Push(MakeSingleFutureGenerator(buckets_fut));
+          if (select.recursive) {
+            // Generate recursive walk for each bucket in turn
+            for (const auto& bucket : *buckets_fut.result()) {
+              producer.Push(impl->WalkAsync(select, bucket.path(), ""));
+            }
+          }
+          producer.Close();
+        });
+
+    return MakeConcatenatedGenerator(
+        AsyncGenerator<AsyncGenerator<FileInfoVector>>{std::move(gen)});
+  }
+
+  // Nominal case -> walk a single bucket
+  return impl_->WalkAsync(select, base_path.bucket, base_path.key);
+}
+
 Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {
   ARROW_ASSIGN_OR_RAISE(auto path, S3Path::FromString(s));
 
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index ac384fcba71..a7f72fb1a1f 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -149,6 +149,8 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
 
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+
   Status CreateDir(const std::string& path, bool recursive = true) override;
 
   Status DeleteDir(const std::string& path) override;
@@ -206,7 +208,7 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   explicit S3FileSystem(const S3Options& options, const io::IOContext&);
 
   class Impl;
-  std::unique_ptr<Impl> impl_;
+  std::shared_ptr<Impl> impl_;
 };
 
 enum class S3LogLevel : int8_t { Off, Fatal, Error, Warn, Info, Debug, Trace };
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index c79d9f715be..f5efcda5120 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -70,9 +70,12 @@
 #include "arrow/filesystem/test_util.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
@@ -641,6 +644,34 @@ TEST_F(TestS3FS, GetFileInfoSelectorRecursive) {
   AssertFileInfo(infos[1], "bucket/somedir/subdir/subfile", FileType::File, 8);
 }
 
+TEST_F(TestS3FS, GetFileInfoGenerator) {
+  FileSelector select;
+  FileInfoVector infos;
+
+  // Root dir
+  select.base_dir = "";
+  CollectFileInfoGenerator(fs_->GetFileInfoGenerator(select), &infos);
+  ASSERT_EQ(infos.size(), 2);
+  SortInfos(&infos);
+  AssertFileInfo(infos[0], "bucket", FileType::Directory);
+  AssertFileInfo(infos[1], "empty-bucket", FileType::Directory);
+
+  // Root dir, recursive
+  select.recursive = true;
+  CollectFileInfoGenerator(fs_->GetFileInfoGenerator(select), &infos);
+  ASSERT_EQ(infos.size(), 7);
+  SortInfos(&infos);
+  AssertFileInfo(infos[0], "bucket", FileType::Directory);
+  AssertFileInfo(infos[1], "bucket/emptydir", FileType::Directory);
+  AssertFileInfo(infos[2], "bucket/somedir", FileType::Directory);
+  AssertFileInfo(infos[3], "bucket/somedir/subdir", FileType::Directory);
+  AssertFileInfo(infos[4], "bucket/somedir/subdir/subfile", FileType::File, 8);
+  AssertFileInfo(infos[5], "bucket/somefile", FileType::File, 9);
+  AssertFileInfo(infos[6], "empty-bucket", FileType::Directory);
+
+  // Non-root dir case is tested by generic tests
+}
+
 TEST_F(TestS3FS, CreateDir) {
   FileInfo st;
 
diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index 93d84c06b88..466b8826aef 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -30,7 +30,9 @@
 #include "arrow/status.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
+#include "arrow/util/vector.h"
 
 using ::testing::ElementsAre;
 
@@ -111,6 +113,12 @@ void SortInfos(std::vector<FileInfo>* infos) {
   std::sort(infos->begin(), infos->end(), FileInfo::ByPath{});
 }
 
+void CollectFileInfoGenerator(FileInfoGenerator gen, FileInfoVector* out_infos) {
+  auto fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto nested_infos, fut);
+  *out_infos = ::arrow::internal::FlattenVectors(nested_infos);
+}
+
 void AssertFileInfo(const FileInfo& info, const std::string& path, FileType type) {
   ASSERT_EQ(info.path(), path);
   ASSERT_EQ(info.type(), type) << "For path '" << info.path() << "'";
@@ -681,7 +689,7 @@ void GenericFileSystemTest::TestGetFileInfoSelector(FileSystem* fs) {
   ASSERT_RAISES(IOError, fs->GetFileInfo(s));
 }
 
-void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
+void GenericFileSystemTest::TestGetFileInfoGenerator(FileSystem* fs) {
   ASSERT_OK(fs->CreateDir("AB/CD"));
   CreateFile(fs, "abc", "data");
   CreateFile(fs, "AB/def", "some data");
@@ -691,9 +699,11 @@ void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
   FileSelector s;
   s.base_dir = "";
   std::vector<FileInfo> infos;
+  std::vector<std::vector<FileInfo>> nested_infos;
 
   // Non-recursive
-  ASSERT_FINISHES_OK_AND_ASSIGN(infos, fs->GetFileInfoAsync(s));
+  auto gen = fs->GetFileInfoGenerator(s);
+  CollectFileInfoGenerator(std::move(gen), &infos);
   SortInfos(&infos);
   ASSERT_EQ(infos.size(), 2);
   AssertFileInfo(infos[0], "AB", FileType::Directory);
@@ -702,7 +712,7 @@ void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
   // Recursive
   s.base_dir = "AB";
   s.recursive = true;
-  ASSERT_FINISHES_OK_AND_ASSIGN(infos, fs->GetFileInfoAsync(s));
+  CollectFileInfoGenerator(fs->GetFileInfoGenerator(s), &infos);
   SortInfos(&infos);
   ASSERT_EQ(infos.size(), 4);
   AssertFileInfo(infos[0], "AB/CD", FileType::Directory);
@@ -712,9 +722,10 @@ void GenericFileSystemTest::TestGetFileInfoSelectorAsync(FileSystem* fs) {
 
   // Doesn't exist
   s.base_dir = "XX";
-  ASSERT_RAISES(IOError, fs->GetFileInfoAsync(s).result());
+  auto fut = CollectAsyncGenerator(fs->GetFileInfoGenerator(s));
+  ASSERT_FINISHES_AND_RAISES(IOError, fut);
   s.allow_not_found = true;
-  ASSERT_FINISHES_OK_AND_ASSIGN(infos, fs->GetFileInfoAsync(s));
+  CollectFileInfoGenerator(fs->GetFileInfoGenerator(s), &infos);
   ASSERT_EQ(infos.size(), 0);
 }
 
@@ -1025,7 +1036,7 @@ GENERIC_FS_TEST_DEFINE(TestGetFileInfoVector)
 GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelector)
 GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelectorWithRecursion)
 GENERIC_FS_TEST_DEFINE(TestGetFileInfoAsync)
-GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelectorAsync)
+GENERIC_FS_TEST_DEFINE(TestGetFileInfoGenerator)
 GENERIC_FS_TEST_DEFINE(TestOpenOutputStream)
 GENERIC_FS_TEST_DEFINE(TestOpenAppendStream)
 GENERIC_FS_TEST_DEFINE(TestOpenInputStream)
diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h
index 232d06f9ff9..79417561277 100644
--- a/cpp/src/arrow/filesystem/test_util.h
+++ b/cpp/src/arrow/filesystem/test_util.h
@@ -43,7 +43,10 @@ void CreateFile(FileSystem* fs, const std::string& path, const std::string& data
 
 // Sort a vector of FileInfo by lexicographic path order
 ARROW_TESTING_EXPORT
-void SortInfos(std::vector<FileInfo>* infos);
+void SortInfos(FileInfoVector* infos);
+
+ARROW_TESTING_EXPORT
+void CollectFileInfoGenerator(FileInfoGenerator gen, FileInfoVector* out_infos);
 
 ARROW_TESTING_EXPORT
 void AssertFileInfo(const FileInfo& info, const std::string& path, FileType type);
@@ -109,7 +112,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestGetFileInfoSelector();
   void TestGetFileInfoSelectorWithRecursion();
   void TestGetFileInfoAsync();
-  void TestGetFileInfoSelectorAsync();
+  void TestGetFileInfoGenerator();
   void TestOpenOutputStream();
   void TestOpenAppendStream();
   void TestOpenInputStream();
@@ -154,7 +157,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestGetFileInfoSelector(FileSystem* fs);
   void TestGetFileInfoSelectorWithRecursion(FileSystem* fs);
   void TestGetFileInfoAsync(FileSystem* fs);
-  void TestGetFileInfoSelectorAsync(FileSystem* fs);
+  void TestGetFileInfoGenerator(FileSystem* fs);
   void TestOpenOutputStream(FileSystem* fs);
   void TestOpenAppendStream(FileSystem* fs);
   void TestOpenInputStream(FileSystem* fs);
@@ -185,7 +188,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelector)              \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelectorWithRecursion) \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoAsync)                 \
-  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelectorAsync)         \
+  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoGenerator)             \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenOutputStream)                 \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenAppendStream)                 \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputStream)                  \
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index dc2112ebddd..d052c016837 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -136,9 +136,8 @@ Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(const IOContext& ctx
   TaskHints hints;
   hints.io_size = nbytes;
   hints.external_id = ctx.external_id();
-  return DeferNotOk(ctx.executor()->Submit(std::move(hints), [self, position, nbytes] {
-    return self->ReadAt(position, nbytes);
-  }));
+  return DeferNotOk(internal::SubmitIO(
+      ctx, [self, position, nbytes] { return self->ReadAt(position, nbytes); }));
 }
 
 Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(int64_t position,
diff --git a/cpp/src/arrow/io/util_internal.h b/cpp/src/arrow/io/util_internal.h
index f7112277bb6..b1d75d1d0bd 100644
--- a/cpp/src/arrow/io/util_internal.h
+++ b/cpp/src/arrow/io/util_internal.h
@@ -18,9 +18,11 @@
 #pragma once
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "arrow/io/interfaces.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
@@ -50,6 +52,15 @@ std::vector<ReadRange> CoalesceReadRanges(std::vector<ReadRange> ranges,
 ARROW_EXPORT
 ::arrow::internal::ThreadPool* GetIOThreadPool();
 
+template <typename... SubmitArgs>
+auto SubmitIO(IOContext io_context, SubmitArgs&&... submit_args)
+    -> decltype(std::declval<::arrow::internal::Executor*>()->Submit(submit_args...)) {
+  ::arrow::internal::TaskHints hints;
+  hints.external_id = io_context.external_id();
+  return io_context.executor()->Submit(hints, io_context.stop_token(),
+                                       std::forward<SubmitArgs>(submit_args)...);
+}
+
 }  // namespace internal
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/future_util.h b/cpp/src/arrow/testing/future_util.h
index 44fa78c375c..a61a9894fe3 100644
--- a/cpp/src/arrow/testing/future_util.h
+++ b/cpp/src/arrow/testing/future_util.h
@@ -47,15 +47,15 @@
 
 #define ASSERT_FINISHES_AND_RAISES(ENUM, expr) \
   do {                                         \
-    auto&& fut = (expr);                       \
-    ASSERT_FINISHES_IMPL(fut);                 \
-    ASSERT_RAISES(ENUM, fut.status());         \
+    auto&& _fut = (expr);                      \
+    ASSERT_FINISHES_IMPL(_fut);                \
+    ASSERT_RAISES(ENUM, _fut.status());        \
   } while (false)
 
-#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, future_name) \
-  auto future_name = (rexpr);                                       \
-  ASSERT_FINISHES_IMPL(future_name);                                \
-  ASSERT_OK_AND_ASSIGN(lhs, future_name.result());
+#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, _future_name) \
+  auto _future_name = (rexpr);                                       \
+  ASSERT_FINISHES_IMPL(_future_name);                                \
+  ASSERT_OK_AND_ASSIGN(lhs, _future_name.result());
 
 #define ASSERT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
   ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr,  \
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 46018ef13be..7eb318c8b41 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -29,6 +29,8 @@ namespace arrow {
 
 template <typename T>
 class Iterator;
+template <typename T>
+struct IterationTraits;
 
 template <typename T>
 class Result;
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index f034cea9983..06e823abf28 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -44,7 +44,7 @@ namespace arrow {
 // the utilities Visit/Collect/Await take care to do this).
 //
 // Asynchronous reentrancy on the other hand means the function is called again before the
-// future returned by the function is marekd finished (but after the call to get the
+// future returned by the function is marked finished (but after the call to get the
 // future returns).  Some of these generators are async-reentrant while others (e.g.
 // those that depend on ordered processing like decompression) are not.  Read the MakeXYZ
 // function comments to determine which generators support async reentrancy.
@@ -1332,4 +1332,46 @@ Result<Iterator<T>> MakeReadaheadIterator(Iterator<T> it, int readahead_queue_si
   return MakeGeneratorIterator(std::move(owned_bg_generator));
 }
 
+/// \brief Make a generator that returns a single pre-generated future
+///
+/// This generator is async-reentrant.
+template <typename T>
+std::function<Future<T>()> MakeSingleFutureGenerator(Future<T> future) {
+  assert(future.is_valid());
+  auto state = std::make_shared<Future<T>>(std::move(future));
+  return [state]() -> Future<T> {
+    auto fut = std::move(*state);
+    if (fut.is_valid()) {
+      return fut;
+    } else {
+      return AsyncGeneratorEnd<T>();
+    }
+  };
+}
+
+/// \brief Make a generator that always fails with a given error
+///
+/// This generator is async-reentrant.
+template <typename T>
+AsyncGenerator<T> MakeFailingGenerator(Status st) {
+  assert(!st.ok());
+  auto state = std::make_shared<Status>(std::move(st));
+  return [state]() -> Future<T> {
+    auto st = std::move(*state);
+    if (!st.ok()) {
+      return std::move(st);
+    } else {
+      return AsyncGeneratorEnd<T>();
+    }
+  };
+}
+
+/// \brief Make a generator that always fails with a given error
+///
+/// This overload allows inferring the return type from the argument.
+template <typename T>
+AsyncGenerator<T> MakeFailingGenerator(const Result<T>& result) {
+  return MakeFailingGenerator<T>(result.status());
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 51e4f948d38..36d06297049 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -1299,4 +1299,26 @@ TEST(PushGenerator, Stress) {
   }
 }
 
+TEST(SingleFutureGenerator, Basics) {
+  auto fut = Future<TestInt>::Make();
+  auto gen = MakeSingleFutureGenerator(fut);
+  auto collect_fut = CollectAsyncGenerator(gen);
+  AssertNotFinished(collect_fut);
+  fut.MarkFinished(TestInt{42});
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto collected, collect_fut);
+  ASSERT_EQ(collected, std::vector<TestInt>{42});
+  // Generator exhausted
+  collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_OK_AND_EQ(std::vector<TestInt>{}, collect_fut);
+}
+
+TEST(FailingGenerator, Basics) {
+  auto gen = MakeFailingGenerator<TestInt>(Status::IOError("zzz"));
+  auto collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_AND_RAISES(IOError, collect_fut);
+  // Generator exhausted
+  collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_FINISHES_OK_AND_EQ(std::vector<TestInt>{}, collect_fut);
+}
+
 }  // namespace arrow

From fe83dcad19d1ee82280a7c9cb75363975531e9e9 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Wed, 14 Apr 2021 07:07:04 -0400
Subject: [PATCH 021/719] ARROW-12361: [Rust] [DataFusion] Allow users to
 override physical optimization rules

Closes #10013 from andygrove/df-replace-phys-opt-rules

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/datafusion/src/execution/context.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index 07d5b629e1b..833d7b66281 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -680,6 +680,15 @@ impl ExecutionConfig {
         self
     }
 
+    /// Replace the physical optimizer rules
+    pub fn with_physical_optimizer_rules(
+        mut self,
+        physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.physical_optimizers = physical_optimizers;
+        self
+    }
+
     /// Adds a new [`OptimizerRule`]
     pub fn add_optimizer_rule(
         mut self,

From c3f55a45dff96f269134665755d999c7e098601e Mon Sep 17 00:00:00 2001
From: Christoph Schulze <christoph.schulze@signavio.com>
Date: Wed, 14 Apr 2021 07:08:00 -0400
Subject: [PATCH 022/719] ARROW-12294: [Rust] Fix boolean kleene kernels with
 no remainder

[PR 9772](https://github.com/apache/arrow/pull/9772) introduced a bug. The boolean kleene kernel would not iterate over the bit chunks of the batch, if there is no validity bitmap on the left or the right input. It will only process the bits of the remainder word. The inital unit test didn't pick a large enough batch size to test this scenario, which is why this was not detected by the tests earlier.

Closes #9965 from ch-sc/ARROW-12294-boolean-kleene-kernels-no-remainder

Authored-by: Christoph Schulze <christoph.schulze@signavio.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/compute/kernels/boolean.rs | 143 +++++++++++++---------
 1 file changed, 85 insertions(+), 58 deletions(-)

diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs
index b835d60637f..e1d5592d423 100644
--- a/rust/arrow/src/compute/kernels/boolean.rs
+++ b/rust/arrow/src/compute/kernels/boolean.rs
@@ -34,7 +34,6 @@ use crate::error::{ArrowError, Result};
 use crate::util::bit_util::{ceil, round_upto_multiple_of_64};
 use core::iter;
 use lexical_core::Integer;
-use std::iter::FromIterator;
 
 fn binary_boolean_kleene_kernel<F>(
     left: &BooleanArray,
@@ -52,37 +51,19 @@ where
 
     // length and offset of boolean array is measured in bits
     let len = left.len();
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let left_buffer = left.values();
-    let right_buffer = right.values();
-
-    // If we do not have a validity bitmap, we just use an empty buffer
-    let (left_validity, left_validity_len) = left.data_ref().null_buffer().map_or_else(
-        || (Buffer::from_iter(iter::empty::<bool>()), 0),
-        |buffer| (buffer.clone(), len),
-    );
-    let (right_validity, right_validity_len) =
-        right.data_ref().null_buffer().map_or_else(
-            || (Buffer::from_iter(iter::empty::<bool>()), 0),
-            |buffer| (buffer.clone(), len),
-        );
-
-    let left_chunks = left_buffer.bit_chunks(left_offset, len);
-    let left_valid_chunks = left_validity.bit_chunks(left_offset, left_validity_len);
-    let right_chunks = right_buffer.bit_chunks(right_offset, len);
-    let right_valid_chunks = right_validity.bit_chunks(right_offset, right_validity_len);
 
     // result length measured in bytes (incl. remainder)
     let mut result_len = round_upto_multiple_of_64(len) / 8;
-    // if remainder is absent, the kleene_op code would always resize the result buffers,
-    // which is both unnecessary and expensive. We can prevent the resizing by always
-    // adding 8 additional bytes to the length of both buffers. All bits of these 8 bytes
-    // will always be 0 though.
-    if left_chunks.remainder_len().is_zero() {
+    // The iterator that applies the kleene_op closure always chains an additional iteration
+    // for the remainder chunk, even without a remainder. If the remainder is absent
+    // (length % 64 == 0), kleene_op would resize the result buffers (value_buffer and
+    // valid_buffer) to store 8 additional bytes, because result_len wouldn't include a remainder
+    // chunk. The resizing is unnecessary and expensive. We can prevent it by adding 8 bytes to
+    // result_len here. Nonetheless, all bits of these 8 bytes will be 0.
+    if len % 64 == 0 {
         result_len += 8;
     }
+
     let mut value_buffer = MutableBuffer::new(result_len);
     let mut valid_buffer = MutableBuffer::new(result_len);
 
@@ -102,41 +83,77 @@ where
         valid_buffer.extend_from_slice(&[valid]);
     };
 
-    // To get rid off the additional remainder logic we would need an iterator
-    // which contains a possible remainder word.
-    let remainder = (
-        (
-            left_chunks.remainder_bits(),
-            left_valid_chunks.remainder_bits(),
-        ),
-        (
-            right_chunks.remainder_bits(),
-            right_valid_chunks.remainder_bits(),
-        ),
-    );
+    let left_offset = left.offset();
+    let right_offset = right.offset();
 
-    let base_iter = left_chunks
-        .iter()
-        .zip(left_valid_chunks.iter())
-        .zip(right_chunks.iter().zip(right_valid_chunks.iter()))
-        .chain(iter::once(remainder));
+    let left_buffer = left.values();
+    let right_buffer = right.values();
+
+    let left_chunks = left_buffer.bit_chunks(left_offset, len);
+    let right_chunks = right_buffer.bit_chunks(right_offset, len);
+
+    let left_rem = left_chunks.remainder_bits();
+    let right_rem = right_chunks.remainder_bits();
+
+    let opt_left_valid_chunks_and_rem = left
+        .data_ref()
+        .null_buffer()
+        .map(|b| b.bit_chunks(left_offset, len))
+        .map(|chunks| (chunks.iter(), chunks.remainder_bits()));
+    let opt_right_valid_chunks_and_rem = right
+        .data_ref()
+        .null_buffer()
+        .map(|b| b.bit_chunks(right_offset, len))
+        .map(|chunks| (chunks.iter(), chunks.remainder_bits()));
 
     match (
-        left.data_ref().null_buffer().is_some(),
-        right.data_ref().null_buffer().is_some(),
+        opt_left_valid_chunks_and_rem,
+        opt_right_valid_chunks_and_rem,
     ) {
-        (true, true) => base_iter.for_each(kleene_op),
-        (true, false) => base_iter
-            .map(|(left, (right_data, _))| (left, (right_data, u64::MAX)))
-            .for_each(kleene_op),
-        (false, true) => base_iter
-            .map(|((left_data, _), right)| ((left_data, u64::MAX), right))
-            .for_each(kleene_op),
-        (false, false) => base_iter
-            .map(|((left_data, _), (right_data, _))| {
-                ((left_data, u64::MAX), (right_data, u64::MAX))
-            })
-            .for_each(kleene_op),
+        (
+            Some((left_valid_chunks, left_valid_rem)),
+            Some((right_valid_chunks, right_valid_rem)),
+        ) => {
+            left_chunks
+                .iter()
+                .zip(left_valid_chunks)
+                .zip(right_chunks.iter().zip(right_valid_chunks))
+                .chain(iter::once((
+                    (left_rem, left_valid_rem),
+                    (right_rem, right_valid_rem),
+                )))
+                .for_each(kleene_op);
+        }
+        (Some((left_valid_chunks, left_valid_rem)), None) => {
+            left_chunks
+                .iter()
+                .zip(left_valid_chunks)
+                .zip(right_chunks.iter().zip(iter::repeat(u64::MAX)))
+                .chain(iter::once((
+                    (left_rem, left_valid_rem),
+                    (right_rem, u64::MAX),
+                )))
+                .for_each(kleene_op);
+        }
+        (None, Some((right_valid_chunks, right_valid_rem))) => {
+            left_chunks
+                .iter()
+                .zip(iter::repeat(u64::MAX))
+                .zip(right_chunks.iter().zip(right_valid_chunks))
+                .chain(iter::once((
+                    (left_rem, u64::MAX),
+                    (right_rem, right_valid_rem),
+                )))
+                .for_each(kleene_op);
+        }
+        (None, None) => {
+            left_chunks
+                .iter()
+                .zip(iter::repeat(u64::MAX))
+                .zip(right_chunks.iter().zip(iter::repeat(u64::MAX)))
+                .chain(iter::once(((left_rem, u64::MAX), (right_rem, u64::MAX))))
+                .for_each(kleene_op);
+        }
     };
 
     let bool_buffer: Buffer = value_buffer.into();
@@ -628,6 +645,16 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_boolean_array_kleene_no_remainder() {
+        let n = 1024;
+        let a = BooleanArray::from(vec![true; n]);
+        let b = BooleanArray::from(vec![None; n]);
+        let result = or_kleene(&a, &b).unwrap();
+
+        assert_eq!(result, a);
+    }
+
     #[test]
     fn test_bool_array_and_kleene_nulls() {
         let a = BooleanArray::from(vec![

From 635ae7725ad3991912e1812ff907b2d81535cf3d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 14 Apr 2021 14:06:24 +0200
Subject: [PATCH 023/719] ARROW-12374: [CI][C++][cron] Use Ubuntu 20.04 instead
 of 16.04
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because we dropped support for Ubuntu 16.04.

Closes #10021 from kou/ci-cron-remove-ubuntu-16.04

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/cpp_cron.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cpp_cron.yml b/.github/workflows/cpp_cron.yml
index c229ad93be3..9e4f3cf388b 100644
--- a/.github/workflows/cpp_cron.yml
+++ b/.github/workflows/cpp_cron.yml
@@ -46,8 +46,8 @@ jobs:
         name:
           - amd64-debian-10-cpp
           - amd64-fedora-33-cpp
-          - amd64-ubuntu-16.04-cpp
           - amd64-ubuntu-18.04-cpp
+          - amd64-ubuntu-20.04-cpp
         include:
           - name: amd64-debian-10-cpp
             image: debian-cpp
@@ -57,14 +57,14 @@ jobs:
             image: fedora-cpp
             title: AMD64 Fedora 33 C++
             fedora: 33
-          - name: amd64-ubuntu-16.04-cpp
-            image: ubuntu-cpp
-            title: AMD64 Ubuntu 16.04 C++
-            ubuntu: 16.04
           - name: amd64-ubuntu-18.04-cpp
             image: ubuntu-cpp
             title: AMD64 Ubuntu 18.04 C++
             ubuntu: 18.04
+          - name: amd64-ubuntu-20.04-cpp
+            image: ubuntu-cpp
+            title: AMD64 Ubuntu 20.04 C++
+            ubuntu: 20.04
     env:
       # the defaults here should correspond to the values in .env
       ARCH: 'amd64'

From fb892c639d5881b3e2014c3e232a5e78a79ab3cd Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Wed, 14 Apr 2021 14:13:38 +0200
Subject: [PATCH 024/719] ARROW-12330: [Developer] Restore values at counters
 column of Archery benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR restores values at `counters` column of Archery benchmark. #9140 always suppressed values at `counters` column regardless `--no-counter`. In addition, this PR stores `counters` value into json file.

Before
```
% archery benchmark diff --benchmark-filter="SetBitsTo" --output=head2.json HEAD HEAD~1
...
---------------------------------------------------------------------------
Benchmark                 Time             CPU   Iterations UserCounters...
---------------------------------------------------------------------------
SetBitsTo/2            8.15 ns         8.15 ns     81991087 bytes_per_second=234.044M/s
SetBitsTo/16           7.78 ns         7.78 ns     89928878 bytes_per_second=1.91429G/s
SetBitsTo/1024         13.9 ns         13.9 ns     50372172 bytes_per_second=68.6182G/s
SetBitsTo/131072       3508 ns         3508 ns       199335 bytes_per_second=34.7944G/s
----------------------------------------------------------------------
Non-regressions: (4)
----------------------------------------------------------------------
        benchmark         baseline        contender  change % counters
     SetBitsTo/16    1.877 GiB/sec    1.914 GiB/sec     1.975       {}
      SetBitsTo/2  230.566 MiB/sec  234.044 MiB/sec     1.509       {}
 SetBitsTo/131072   34.722 GiB/sec   34.794 GiB/sec     0.207       {}
   SetBitsTo/1024   68.593 GiB/sec   68.618 GiB/sec     0.037       {}
```

After
```
---------------------------------------------------------------------------
Benchmark                 Time             CPU   Iterations UserCounters...
---------------------------------------------------------------------------
SetBitsTo/2            8.39 ns         8.39 ns     81980047 bytes_per_second=227.438M/s
SetBitsTo/16           7.88 ns         7.88 ns     84936624 bytes_per_second=1.89105G/s
SetBitsTo/1024         13.9 ns         13.9 ns     50376587 bytes_per_second=68.6064G/s
SetBitsTo/131072       3513 ns         3513 ns       200598 bytes_per_second=34.7447G/s
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Non-regressions: (4)
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        benchmark         baseline        contender  change %                                                                                                       counters
      SetBitsTo/2  227.438 MiB/sec  235.984 MiB/sec     3.757     {'run_name': 'SetBitsTo/2', 'repetitions': 0, 'repetition_index': 0, 'threads': 1, 'iterations': 81980047}
     SetBitsTo/16    1.891 GiB/sec    1.913 GiB/sec     1.137    {'run_name': 'SetBitsTo/16', 'repetitions': 0, 'repetition_index': 0, 'threads': 1, 'iterations': 84936624}
 SetBitsTo/131072   34.745 GiB/sec   34.771 GiB/sec     0.075  {'run_name': 'SetBitsTo/131072', 'repetitions': 0, 'repetition_index': 0, 'threads': 1, 'iterations': 200598}
   SetBitsTo/1024   68.606 GiB/sec   68.624 GiB/sec     0.026  {'run_name': 'SetBitsTo/1024', 'repetitions': 0, 'repetition_index': 0, 'threads': 1, 'iterations': 50376587}
```

Closes #9985 from kiszk/ARROW-12330

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/benchmark/codec.py       |  1 +
 dev/archery/archery/benchmark/google.py      |  5 +--
 dev/archery/archery/tests/test_benchmarks.py | 37 ++++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/dev/archery/archery/benchmark/codec.py b/dev/archery/archery/benchmark/codec.py
index 359dea9b9f3..4157890d13d 100644
--- a/dev/archery/archery/benchmark/codec.py
+++ b/dev/archery/archery/benchmark/codec.py
@@ -50,6 +50,7 @@ def encode(b):
             "values": b.values,
             "time_unit": b.time_unit,
             "times": b.times,
+            "counters": b.counters,
         }
 
     @staticmethod
diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index c1644dcbd9c..ebcc5263645 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -157,8 +157,9 @@ def __init__(self, name, runs):
         values = [b.value for b in self.runs]
         times = [b.real_time for b in self.runs]
         # Slight kludge to extract the UserCounters for each benchmark
-        self.counters = self.runs[0].counters
-        super().__init__(name, unit, less_is_better, values, time_unit, times)
+        counters = self.runs[0].counters
+        super().__init__(name, unit, less_is_better, values, time_unit, times,
+                         counters)
 
     def __repr__(self):
         return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs)
diff --git a/dev/archery/archery/tests/test_benchmarks.py b/dev/archery/archery/tests/test_benchmarks.py
index dffe698d41d..fab1e8d4432 100644
--- a/dev/archery/archery/tests/test_benchmarks.py
+++ b/dev/archery/archery/tests/test_benchmarks.py
@@ -152,6 +152,12 @@ def test_items_per_second():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 5964,
+                     "null_percent": 0.0,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "items_per_second",
         "less_is_better": False,
@@ -180,6 +186,11 @@ def test_bytes_per_second():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 47,
+                     "repetition_index": 1,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "bytes_per_second",
         "less_is_better": False,
@@ -212,6 +223,12 @@ def test_both_items_and_bytes_per_second():
     }
     # Note that bytes_per_second trumps items_per_second
     archery_result = {
+        "counters": {"iterations": 5964,
+                     "null_percent": 0.0,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "bytes_per_second",
         "less_is_better": False,
@@ -239,6 +256,11 @@ def test_neither_items_nor_bytes_per_second():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
@@ -266,6 +288,11 @@ def test_prefer_real_time():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
@@ -292,6 +319,11 @@ def test_prefer_cpu_time():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,
@@ -330,6 +362,11 @@ def test_omits_aggregates():
         "time_unit": "ns",
     }
     archery_result = {
+        "counters": {"iterations": 352765,
+                     "repetition_index": 0,
+                     "repetitions": 0,
+                     "run_name": name,
+                     "threads": 1},
         "name": name,
         "unit": "ns",
         "less_is_better": True,

From 9c85e5465a5738f5ba9a2455d1b566948f89d0f3 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 14 Apr 2021 08:30:43 -0400
Subject: [PATCH 025/719] ARROW-12287: [C++] Create enumerating generator

Adds an enumerating generator which tags items with their index as well as whether or not they were the last item in the sequence.  This is needed for reassembly potentially out of order record batches during scan.

Closes #9945 from westonpace/feature/arrow-12287

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/async_generator.h       | 74 ++++++++++++++++++++++
 cpp/src/arrow/util/async_generator_test.cc | 46 ++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 06e823abf28..5f42037017f 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1070,6 +1070,80 @@ AsyncGenerator<T> MakeConcatenatedGenerator(AsyncGenerator<AsyncGenerator<T>> so
   return MergedGenerator<T>(std::move(source), 1);
 }
 
+template <typename T>
+struct Enumerated {
+  T value;
+  int index;
+  bool last;
+};
+
+template <typename T>
+struct IterationTraits<Enumerated<T>> {
+  static Enumerated<T> End() { return Enumerated<T>{IterationEnd<T>(), -1, false}; }
+  static bool IsEnd(const Enumerated<T>& val) { return val.index < 0; }
+};
+
+/// \see MakeEnumeratedGenerator
+template <typename T>
+class EnumeratingGenerator {
+ public:
+  EnumeratingGenerator(AsyncGenerator<T> source, T initial_value)
+      : state_(std::make_shared<State>(std::move(source), std::move(initial_value))) {}
+
+  Future<Enumerated<T>> operator()() {
+    if (state_->finished) {
+      return AsyncGeneratorEnd<Enumerated<T>>();
+    } else {
+      auto state = state_;
+      return state->source().Then([state](const T& next) {
+        auto finished = IsIterationEnd<T>(next);
+        auto prev = Enumerated<T>{state->prev_value, state->prev_index, finished};
+        state->prev_value = next;
+        state->prev_index++;
+        state->finished = finished;
+        return prev;
+      });
+    }
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source, T initial_value)
+        : source(std::move(source)), prev_value(std::move(initial_value)), prev_index(0) {
+      finished = IsIterationEnd<T>(prev_value);
+    }
+
+    AsyncGenerator<T> source;
+    T prev_value;
+    int prev_index;
+    bool finished;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// Wraps items from a source generator with positional information
+///
+/// When used with MakeMergedGenerator and MakeSequencingGenerator this allows items to be
+/// processed in a "first-available" fashion and later resequenced which can reduce the
+/// impact of sources with erratic performance (e.g. a filesystem where some items may
+/// take longer to read than others).
+///
+/// TODO(ARROW-12371) Would require this generator be async-reentrant
+///
+/// \see MakeSequencingGenerator for an example of putting items back in order
+///
+/// This generator is not async-reentrant
+///
+/// This generator buffers one item (so it knows which item is the last item)
+template <typename T>
+AsyncGenerator<Enumerated<T>> MakeEnumeratedGenerator(AsyncGenerator<T> source) {
+  return FutureFirstGenerator<Enumerated<T>>(
+      source().Then([source](const T& initial_value) -> AsyncGenerator<Enumerated<T>> {
+        return EnumeratingGenerator<T>(std::move(source), initial_value);
+      }));
+}
+
 /// \see MakeTransferredGenerator
 template <typename T>
 class TransferringGenerator {
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 36d06297049..be39261640e 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -228,6 +228,8 @@ class GeneratorTestFixture : public ::testing::TestWithParam<bool> {
     return gen;
   }
 
+  AsyncGenerator<TestInt> MakeEmptySource() { return MakeSource({}); }
+
   AsyncGenerator<TestInt> MakeFailingSource() {
     AsyncGenerator<TestInt> gen = [] {
       return Future<TestInt>::MakeFinished(Status::Invalid("XYZ"));
@@ -1040,6 +1042,50 @@ TEST(TestAsyncUtil, ReadaheadFailed) {
   ASSERT_TRUE(IsIterationEnd(definitely_last));
 }
 
+class EnumeratorTestFixture : public GeneratorTestFixture {
+ protected:
+  void AssertEnumeratedCorrectly(AsyncGenerator<Enumerated<TestInt>>& gen,
+                                 int num_items) {
+    auto collected = CollectAsyncGenerator(gen);
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto items, collected);
+    EXPECT_EQ(num_items, items.size());
+
+    for (const auto& item : items) {
+      ASSERT_EQ(item.index, item.value.value);
+      bool last = item.index == num_items - 1;
+      ASSERT_EQ(last, item.last);
+    }
+    AssertGeneratorExhausted(gen);
+  }
+};
+
+TEST_P(EnumeratorTestFixture, Basic) {
+  constexpr int NITEMS = 100;
+
+  auto source = MakeSource(RangeVector(NITEMS));
+  auto enumerated = MakeEnumeratedGenerator(std::move(source));
+
+  AssertEnumeratedCorrectly(enumerated, NITEMS);
+}
+
+TEST_P(EnumeratorTestFixture, Empty) {
+  auto source = MakeEmptySource();
+  auto enumerated = MakeEnumeratedGenerator(std::move(source));
+  AssertGeneratorExhausted(enumerated);
+}
+
+TEST_P(EnumeratorTestFixture, Error) {
+  auto source = FailsAt(MakeSource({1, 2, 3}), 1);
+  auto enumerated = MakeEnumeratedGenerator(std::move(source));
+
+  // Even though the first item finishes ok the enumerator buffers it.  The error then
+  // takes priority over the buffered result.
+  ASSERT_FINISHES_AND_RAISES(Invalid, enumerated());
+}
+
+INSTANTIATE_TEST_SUITE_P(EnumeratedTests, EnumeratorTestFixture,
+                         ::testing::Values(false, true));
+
 class SequencerTestFixture : public GeneratorTestFixture {
  protected:
   void RandomShuffle(std::vector<TestInt>& values) {

From 31c8c750513f8c75a5e76a267354cb897c56468e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 14 Apr 2021 15:06:57 +0200
Subject: [PATCH 026/719] ARROW-12262: [Doc] Enable S3 and Flight in docs build

Also use Ubuntu 20.04 by default, instead of 18.04.

Closes #9933 from pitrou/ARROW-12262-ubuntu-docs

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .env                                          |   2 +-
 .github/workflows/cpp.yml                     |   2 +-
 ci/docker/linux-apt-docs.dockerfile           |  14 +-
 ci/docker/ubuntu-20.10-cpp.dockerfile         | 137 ++++++++++++++++++
 ci/scripts/docs_build.sh                      |   6 +-
 dev/release/post-09-docs.sh                   |   1 +
 dev/tasks/tasks.yml                           |   8 +-
 docker-compose.yml                            |   1 +
 .../cpp/examples/row_columnar_conversion.rst  |   2 +-
 docs/source/status.rst                        |   2 +-
 10 files changed, 162 insertions(+), 13 deletions(-)
 create mode 100644 ci/docker/ubuntu-20.10-cpp.dockerfile

diff --git a/.env b/.env
index cd6b57e004a..c4eb6d2de43 100644
--- a/.env
+++ b/.env
@@ -42,7 +42,7 @@ ULIMIT_CORE=-1
 REPO=apache/arrow-dev
 CUDA=9.1
 DEBIAN=10
-UBUNTU=18.04
+UBUNTU=20.04
 FEDORA=33
 PYTHON=3.6
 LLVM=11
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 5f25deb4512..0bcf3460ad4 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -59,7 +59,7 @@ jobs:
           - image: conda-cpp
             title: AMD64 Conda C++
           - image: ubuntu-cpp-sanitizer
-            title: AMD64 Ubuntu 18.04 C++ ASAN UBSAN
+            title: AMD64 Ubuntu 20.04 C++ ASAN UBSAN
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 604a05afb07..46c31bbd480 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -18,9 +18,10 @@
 ARG base
 FROM ${base}
 
-ARG r=3.6
+ARG r=4.0
 ARG jdk=8
 
+# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/
 RUN apt-get update -y && \
     apt-get install -y \
         dirmngr \
@@ -29,8 +30,8 @@ RUN apt-get update -y && \
     apt-key adv \
         --keyserver keyserver.ubuntu.com \
         --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
-    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran35/' && \
-    apt-get install -y \
+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \
+    apt-get install -y --no-install-recommends \
         autoconf-archive \
         automake \
         curl \
@@ -43,6 +44,7 @@ RUN apt-get update -y && \
         libgirepository1.0-dev \
         libglib2.0-doc \
         libharfbuzz-dev \
+        libtiff-dev \
         libtool \
         libxml2-dev \
         ninja-build \
@@ -72,7 +74,6 @@ RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \
     rm -rf /var/lib/apt/lists/* && \
     npm install -g yarn
 
-# Sphinx is pinned because of ARROW-9693
 RUN pip install \
         meson \
         breathe \
@@ -96,8 +97,11 @@ COPY r/DESCRIPTION /arrow/r/
 RUN /arrow/ci/scripts/r_deps.sh /arrow && \
     R -e "install.packages('pkgdown')"
 
-ENV ARROW_PYTHON=ON \
+ENV ARROW_FLIGHT=ON \
+    ARROW_PYTHON=ON \
+    ARROW_S3=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
     ARROW_BUILD_UTILITIES=OFF \
     ARROW_USE_GLOG=OFF \
+    CMAKE_UNITY_BUILD=ON \
diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile
new file mode 100644
index 00000000000..80eb072e7ed
--- /dev/null
+++ b/ci/docker/ubuntu-20.10-cpp.dockerfile
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.10
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          apt-transport-https \
+          ca-certificates \
+          gnupg \
+          wget && \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+      echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${llvm} main" > \
+         /etc/apt/sources.list.d/llvm.list && \
+      if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+        echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${clang_tools} main" > \
+           /etc/apt/sources.list.d/clang-tools.list; \
+      fi \
+    fi && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        clang-${clang_tools} \
+        clang-${llvm} \
+        clang-format-${clang_tools} \
+        clang-tidy-${clang_tools} \
+        llvm-${llvm}-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        autoconf \
+        ca-certificates \
+        ccache \
+        cmake \
+        g++ \
+        gcc \
+        gdb \
+        git \
+        libbenchmark-dev \
+        libboost-filesystem-dev \
+        libboost-system-dev \
+        libbrotli-dev \
+        libbz2-dev \
+        libgflags-dev \
+        libcurl4-openssl-dev \
+        libgoogle-glog-dev \
+        libgrpc++-dev \
+        liblz4-dev \
+        libprotobuf-dev \
+        libprotoc-dev \
+        libre2-dev \
+        libsnappy-dev \
+        libssl-dev \
+        libthrift-dev \
+        libutf8proc-dev \
+        libzstd-dev \
+        make \
+        ninja-build \
+        pkg-config \
+        protobuf-compiler \
+        protobuf-compiler-grpc \
+        rapidjson-dev \
+        tzdata \
+        wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh \
+     /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=OFF \
+    ARROW_GANDIVA=ON \
+    ARROW_HDFS=ON \
+    ARROW_HOME=/usr/local \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_NO_DEPRECATED_API=ON \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_PLASMA=ON \
+    ARROW_S3=ON \
+    ARROW_USE_ASAN=OFF \
+    ARROW_USE_CCACHE=ON \
+    ARROW_USE_UBSAN=OFF \
+    ARROW_WITH_BROTLI=ON \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_LZ4=ON \
+    ARROW_WITH_SNAPPY=ON \
+    ARROW_WITH_ZLIB=ON \
+    ARROW_WITH_ZSTD=ON \
+    AWSSDK_SOURCE=BUNDLED \
+    GTest_SOURCE=BUNDLED \
+    ORC_SOURCE=BUNDLED \
+    PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
+    PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3
diff --git a/ci/scripts/docs_build.sh b/ci/scripts/docs_build.sh
index a0d926a335e..e6ee768ee87 100755
--- a/ci/scripts/docs_build.sh
+++ b/ci/scripts/docs_build.sh
@@ -27,8 +27,10 @@ export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 export CFLAGS="-DARROW_NO_DEPRECATED_API"
 export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
 
-# Prose and Python
-sphinx-build -b html ${arrow_dir}/docs/source ${build_dir}
+ncpus=$(python3 -c "import os; print(os.cpu_count())")
+
+# Sphinx docs
+sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}
 
 # C++ - original doxygen
 # rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp
diff --git a/dev/release/post-09-docs.sh b/dev/release/post-09-docs.sh
index 51e74541eb7..c9f75b48b2c 100755
--- a/dev/release/post-09-docs.sh
+++ b/dev/release/post-09-docs.sh
@@ -46,6 +46,7 @@ git checkout "${release_tag}"
 archery docker run \
   -v "${ARROW_SITE_DIR}/docs:/build/docs" \
   -e ARROW_DOCS_VERSION="${version}" \
+  -e UBUNTU=20.10 \
   ubuntu-docs
 
 : ${PUSH:=1}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index dcf49414fcc..0080b387663 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -674,6 +674,8 @@ tasks:
     params:
       target: "ubuntu-bionic"
       task_namespace: "apt"
+      env:
+        UBUNTU: 18.04
       upload_extensions:
         - .ddeb
         - .deb
@@ -1474,6 +1476,8 @@ tasks:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
+      env:
+        UBUNTU: 18.04
       run: ubuntu-r-sanitizer
 
   test-debian-10-go-1.15:
@@ -1484,12 +1488,12 @@ tasks:
         GO: 1.15
       run: debian-go
 
-  test-ubuntu-18.04-docs:
+  test-ubuntu-20.10-docs:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
       env:
-        UBUNTU: 18.04
+        UBUNTU: "20.10"
       run: ubuntu-docs
 
   ############################## vcpkg tests ##################################
diff --git a/docker-compose.yml b/docker-compose.yml
index 539d5adcb97..b22b1f6b536 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -366,6 +366,7 @@ services:
       ARROW_FUZZING: "ON"  # Check fuzz regressions
       ARROW_JEMALLOC: "OFF"
       ARROW_ORC: "OFF"
+      ARROW_S3: "OFF"
       ARROW_USE_ASAN: "ON"
       ARROW_USE_UBSAN: "ON"
       # utf8proc 2.1.0 in Ubuntu Bionic has test failures
diff --git a/docs/source/cpp/examples/row_columnar_conversion.rst b/docs/source/cpp/examples/row_columnar_conversion.rst
index 02fd61b50c7..3f45864c228 100644
--- a/docs/source/cpp/examples/row_columnar_conversion.rst
+++ b/docs/source/cpp/examples/row_columnar_conversion.rst
@@ -24,4 +24,4 @@ Row to columnar conversion
 The following example converts an array of structs to a :class:`arrow::Table`
 instance, and then converts it back to the original array of structs.
 
-.. literalinclude:: ../../../../cpp/examples/arrow/row-wise-conversion-example.cc
+.. literalinclude:: ../../../../cpp/examples/arrow/row_wise_conversion_example.cc
diff --git a/docs/source/status.rst b/docs/source/status.rst
index acf5af90d52..176d35eb12b 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -126,7 +126,7 @@ IPC Format
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Sparse tensors              | ✓     |       |       |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Buffer compression          | ✓     | ✓ (3) | ✓    |            |       |       | ✓     |
+| Buffer compression          | ✓     | ✓ (3) | ✓     |            |       |       | ✓     |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Endianness conversion       | ✓ (2) |       |       |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+

From 6f2e05d7e2bdd8f58adb96251bd07d744973e2c0 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 14 Apr 2021 09:30:32 -0400
Subject: [PATCH 027/719] ARROW-11677: [C++][Docs] Add basic C++ datasets
 documentation

This is mostly based on the Python documentation. This also adds a new C++ example to accompany the documentation.

Closes #9810 from lidavidm/arrow-11677

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/apidoc/Doxyfile                           |   1 +
 cpp/examples/arrow/CMakeLists.txt             |   5 +
 .../arrow/dataset_documentation_example.cc    | 355 +++++++++++++++
 cpp/src/arrow/dataset/dataset.h               |  18 +-
 cpp/src/arrow/dataset/discovery.h             |  88 ++--
 cpp/src/arrow/dataset/file_base.h             |  16 +
 cpp/src/arrow/dataset/file_csv.h              |   6 +
 cpp/src/arrow/dataset/file_ipc.h              |   6 +
 cpp/src/arrow/dataset/file_parquet.h          |  69 +--
 cpp/src/arrow/dataset/partition.h             |  26 +-
 cpp/src/arrow/dataset/scanner.h               |  86 ++--
 docs/source/cpp/api.rst                       |   1 +
 docs/source/cpp/api/dataset.rst               |  75 ++++
 docs/source/cpp/dataset.rst                   | 403 ++++++++++++++++++
 .../dataset_documentation_example.rst         |  27 ++
 docs/source/cpp/examples/index.rst            |   1 +
 docs/source/cpp/getting_started.rst           |   1 +
 docs/source/cpp/io.rst                        |   2 +
 docs/source/python/dataset.rst                |  38 +-
 19 files changed, 1097 insertions(+), 127 deletions(-)
 create mode 100644 cpp/examples/arrow/dataset_documentation_example.cc
 create mode 100644 docs/source/cpp/api/dataset.rst
 create mode 100644 docs/source/cpp/dataset.rst
 create mode 100644 docs/source/cpp/examples/dataset_documentation_example.rst

diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index f6b782276e3..d8b0928ed3c 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -2170,6 +2170,7 @@ PREDEFINED             = __attribute__(x)= \
                          __declspec(x)= \
                          PARQUET_EXPORT= \
                          ARROW_EXPORT= \
+                         ARROW_DS_EXPORT= \
                          ARROW_FLIGHT_EXPORT= \
                          ARROW_EXTERN_TEMPLATE= \
                          ARROW_DEPRECATED(x)=
diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt
index 1abbf52ac3e..aad68744fe6 100644
--- a/cpp/examples/arrow/CMakeLists.txt
+++ b/cpp/examples/arrow/CMakeLists.txt
@@ -28,4 +28,9 @@ if (ARROW_PARQUET AND ARROW_DATASET)
     EXTRA_LINK_LIBS
     ${DATASET_EXAMPLES_LINK_LIBS})
   add_dependencies(dataset_parquet_scan_example parquet)
+
+  ADD_ARROW_EXAMPLE(dataset_documentation_example
+    EXTRA_LINK_LIBS
+    ${DATASET_EXAMPLES_LINK_LIBS})
+  add_dependencies(dataset_documentation_example parquet)
 endif()
diff --git a/cpp/examples/arrow/dataset_documentation_example.cc b/cpp/examples/arrow/dataset_documentation_example.cc
new file mode 100644
index 00000000000..6954460d413
--- /dev/null
+++ b/cpp/examples/arrow/dataset_documentation_example.cc
@@ -0,0 +1,355 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This example showcases various ways to work with Datasets. It's
+// intended to be paired with the documentation.
+
+#include <arrow/api.h>
+#include <arrow/compute/cast.h>
+#include <arrow/dataset/dataset.h>
+#include <arrow/dataset/discovery.h>
+#include <arrow/dataset/expression.h>
+#include <arrow/dataset/file_base.h>
+#include <arrow/dataset/file_ipc.h>
+#include <arrow/dataset/file_parquet.h>
+#include <arrow/dataset/scanner.h>
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/ipc/writer.h>
+#include <arrow/util/iterator.h>
+#include <parquet/arrow/writer.h>
+
+#include <iostream>
+#include <vector>
+
+namespace ds = arrow::dataset;
+namespace fs = arrow::fs;
+
+#define ABORT_ON_FAILURE(expr)                     \
+  do {                                             \
+    arrow::Status status_ = (expr);                \
+    if (!status_.ok()) {                           \
+      std::cerr << status_.message() << std::endl; \
+      abort();                                     \
+    }                                              \
+  } while (0);
+
+// Generate some data for the rest of this example.
+std::shared_ptr<arrow::Table> CreateTable() {
+  auto schema =
+      arrow::schema({arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()),
+                     arrow::field("c", arrow::int64())});
+  std::shared_ptr<arrow::Array> array_a;
+  std::shared_ptr<arrow::Array> array_b;
+  std::shared_ptr<arrow::Array> array_c;
+  arrow::NumericBuilder<arrow::Int64Type> builder;
+  ABORT_ON_FAILURE(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
+  ABORT_ON_FAILURE(builder.Finish(&array_a));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
+  ABORT_ON_FAILURE(builder.Finish(&array_b));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
+  ABORT_ON_FAILURE(builder.Finish(&array_c));
+  return arrow::Table::Make(schema, {array_a, array_b, array_c});
+}
+
+// Set up a dataset by writing two Parquet files.
+std::string CreateExampleParquetDataset(const std::shared_ptr<fs::FileSystem>& filesystem,
+                                        const std::string& root_path) {
+  auto base_path = root_path + "/parquet_dataset";
+  ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
+  // Create an Arrow Table
+  auto table = CreateTable();
+  // Write it into two Parquet files
+  auto output = filesystem->OpenOutputStream(base_path + "/data1.parquet").ValueOrDie();
+  ABORT_ON_FAILURE(parquet::arrow::WriteTable(
+      *table->Slice(0, 5), arrow::default_memory_pool(), output, /*chunk_size=*/2048));
+  output = filesystem->OpenOutputStream(base_path + "/data2.parquet").ValueOrDie();
+  ABORT_ON_FAILURE(parquet::arrow::WriteTable(
+      *table->Slice(5), arrow::default_memory_pool(), output, /*chunk_size=*/2048));
+  return base_path;
+}
+
+// Set up a dataset by writing two Feather files.
+std::string CreateExampleFeatherDataset(const std::shared_ptr<fs::FileSystem>& filesystem,
+                                        const std::string& root_path) {
+  auto base_path = root_path + "/feather_dataset";
+  ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
+  // Create an Arrow Table
+  auto table = CreateTable();
+  // Write it into two Feather files
+  auto output = filesystem->OpenOutputStream(base_path + "/data1.feather").ValueOrDie();
+  auto writer = arrow::ipc::MakeFileWriter(output.get(), table->schema()).ValueOrDie();
+  ABORT_ON_FAILURE(writer->WriteTable(*table->Slice(0, 5)));
+  ABORT_ON_FAILURE(writer->Close());
+  output = filesystem->OpenOutputStream(base_path + "/data2.feather").ValueOrDie();
+  writer = arrow::ipc::MakeFileWriter(output.get(), table->schema()).ValueOrDie();
+  ABORT_ON_FAILURE(writer->WriteTable(*table->Slice(5)));
+  ABORT_ON_FAILURE(writer->Close());
+  return base_path;
+}
+
+// Set up a dataset by writing files with partitioning
+std::string CreateExampleParquetHivePartitionedDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem, const std::string& root_path) {
+  auto base_path = root_path + "/parquet_dataset";
+  ABORT_ON_FAILURE(filesystem->CreateDir(base_path));
+  // Create an Arrow Table
+  auto schema = arrow::schema(
+      {arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()),
+       arrow::field("c", arrow::int64()), arrow::field("part", arrow::utf8())});
+  std::vector<std::shared_ptr<arrow::Array>> arrays(4);
+  arrow::NumericBuilder<arrow::Int64Type> builder;
+  ABORT_ON_FAILURE(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
+  ABORT_ON_FAILURE(builder.Finish(&arrays[0]));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
+  ABORT_ON_FAILURE(builder.Finish(&arrays[1]));
+  builder.Reset();
+  ABORT_ON_FAILURE(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
+  ABORT_ON_FAILURE(builder.Finish(&arrays[2]));
+  arrow::StringBuilder string_builder;
+  ABORT_ON_FAILURE(
+      string_builder.AppendValues({"a", "a", "a", "a", "a", "b", "b", "b", "b", "b"}));
+  ABORT_ON_FAILURE(string_builder.Finish(&arrays[3]));
+  auto table = arrow::Table::Make(schema, arrays);
+  // Write it using Datasets
+  auto dataset = std::make_shared<ds::InMemoryDataset>(table);
+  auto scanner_builder = dataset->NewScan().ValueOrDie();
+  auto scanner = scanner_builder->Finish().ValueOrDie();
+
+  // The partition schema determines which fields are part of the partitioning.
+  auto partition_schema = arrow::schema({arrow::field("part", arrow::utf8())});
+  // We'll use Hive-style partitioning, which creates directories with "key=value" pairs.
+  auto partitioning = std::make_shared<ds::HivePartitioning>(partition_schema);
+  // We'll write Parquet files.
+  auto format = std::make_shared<ds::ParquetFileFormat>();
+  ds::FileSystemDatasetWriteOptions write_options;
+  write_options.file_write_options = format->DefaultWriteOptions();
+  write_options.filesystem = filesystem;
+  write_options.base_dir = base_path;
+  write_options.partitioning = partitioning;
+  write_options.basename_template = "part{i}.parquet";
+  ABORT_ON_FAILURE(ds::FileSystemDataset::Write(write_options, scanner));
+  return base_path;
+}
+
+// Read the whole dataset with the given format, without partitioning.
+std::shared_ptr<arrow::Table> ScanWholeDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  // Create a dataset by scanning the filesystem for files
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Print out the fragments
+  for (const auto& fragment : dataset->GetFragments().ValueOrDie()) {
+    std::cout << "Found fragment: " << (*fragment)->ToString() << std::endl;
+  }
+  // Read the entire dataset as a Table
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+
+// Read a dataset, but select only column "b" and only rows where b < 4.
+//
+// This is useful when you only want a few columns from a dataset. Where possible,
+// Datasets will push down the column selection such that less work is done.
+std::shared_ptr<arrow::Table> FilterAndSelectDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Read specified columns with a row filter
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  ABORT_ON_FAILURE(scan_builder->Project({"b"}));
+  ABORT_ON_FAILURE(scan_builder->Filter(ds::less(ds::field_ref("b"), ds::literal(4))));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+
+// Read a dataset, but with column projection.
+//
+// This is useful to derive new columns from existing data. For example, here we
+// demonstrate casting a column to a different type, and turning a numeric column into a
+// boolean column based on a predicate. You could also rename columns or perform
+// computations involving multiple columns.
+std::shared_ptr<arrow::Table> ProjectDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Read specified columns with a row filter
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  ABORT_ON_FAILURE(scan_builder->Project(
+      {
+          // Leave column "a" as-is.
+          ds::field_ref("a"),
+          // Cast column "b" to float32.
+          ds::call("cast", {ds::field_ref("b")},
+                   arrow::compute::CastOptions::Safe(arrow::float32())),
+          // Derive a boolean column from "c".
+          ds::equal(ds::field_ref("c"), ds::literal(1)),
+      },
+      {"a_renamed", "b_as_float32", "c_1"}));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+
+// Read a dataset, but with column projection.
+//
+// This time, we read all original columns plus one derived column. This simply combines
+// the previous two examples: selecting a subset of columns by name, and deriving new
+// columns with an expression.
+std::shared_ptr<arrow::Table> SelectAndProjectDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
+                                                    ds::FileSystemFactoryOptions())
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Read specified columns with a row filter
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  std::vector<std::string> names;
+  std::vector<ds::Expression> exprs;
+  // Read all the original columns.
+  for (const auto& field : dataset->schema()->fields()) {
+    names.push_back(field->name());
+    exprs.push_back(ds::field_ref(field->name()));
+  }
+  // Also derive a new column.
+  names.push_back("b_large");
+  exprs.push_back(ds::greater(ds::field_ref("b"), ds::literal(1)));
+  ABORT_ON_FAILURE(scan_builder->Project(exprs, names));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+
+// Read an entire dataset, but with partitioning information.
+std::shared_ptr<arrow::Table> ScanPartitionedDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  selector.recursive = true;  // Make sure to search subdirectories
+  ds::FileSystemFactoryOptions options;
+  // We'll use Hive-style partitioning. We'll let Arrow Datasets infer the partition
+  // schema.
+  options.partitioning = ds::HivePartitioning::MakeFactory();
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  // Print out the fragments
+  for (const auto& fragment : dataset->GetFragments().ValueOrDie()) {
+    std::cout << "Found fragment: " << (*fragment)->ToString() << std::endl;
+    std::cout << "Partition expression: "
+              << (*fragment)->partition_expression().ToString() << std::endl;
+  }
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+
+// Read an entire dataset, but with partitioning information. Also, filter the dataset on
+// the partition values.
+std::shared_ptr<arrow::Table> FilterPartitionedDataset(
+    const std::shared_ptr<fs::FileSystem>& filesystem,
+    const std::shared_ptr<ds::FileFormat>& format, const std::string& base_dir) {
+  fs::FileSelector selector;
+  selector.base_dir = base_dir;
+  selector.recursive = true;
+  ds::FileSystemFactoryOptions options;
+  options.partitioning = ds::HivePartitioning::MakeFactory();
+  auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                     .ValueOrDie();
+  auto dataset = factory->Finish().ValueOrDie();
+  auto scan_builder = dataset->NewScan().ValueOrDie();
+  // Filter based on the partition values. This will mean that we won't even read the
+  // files whose partition expressions don't match the filter.
+  ABORT_ON_FAILURE(
+      scan_builder->Filter(ds::equal(ds::field_ref("part"), ds::literal("b"))));
+  auto scanner = scan_builder->Finish().ValueOrDie();
+  return scanner->ToTable().ValueOrDie();
+}
+
+int main(int argc, char** argv) {
+  if (argc < 3) {
+    // Fake success for CI purposes.
+    return EXIT_SUCCESS;
+  }
+
+  std::string uri = argv[1];
+  std::string format_name = argv[2];
+  std::string mode = argc > 3 ? argv[3] : "no_filter";
+  std::string root_path;
+  auto fs = fs::FileSystemFromUri(uri, &root_path).ValueOrDie();
+
+  std::string base_path;
+  std::shared_ptr<ds::FileFormat> format;
+  if (format_name == "feather") {
+    format = std::make_shared<ds::IpcFileFormat>();
+    base_path = CreateExampleFeatherDataset(fs, root_path);
+  } else if (format_name == "parquet") {
+    format = std::make_shared<ds::ParquetFileFormat>();
+    base_path = CreateExampleParquetDataset(fs, root_path);
+  } else if (format_name == "parquet_hive") {
+    format = std::make_shared<ds::ParquetFileFormat>();
+    base_path = CreateExampleParquetHivePartitionedDataset(fs, root_path);
+  } else {
+    std::cerr << "Unknown format: " << format_name << std::endl;
+    std::cerr << "Supported formats: feather, parquet, parquet_hive" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::shared_ptr<arrow::Table> table;
+  if (mode == "no_filter") {
+    table = ScanWholeDataset(fs, format, base_path);
+  } else if (mode == "filter") {
+    table = FilterAndSelectDataset(fs, format, base_path);
+  } else if (mode == "project") {
+    table = ProjectDataset(fs, format, base_path);
+  } else if (mode == "select_project") {
+    table = SelectAndProjectDataset(fs, format, base_path);
+  } else if (mode == "partitioned") {
+    table = ScanPartitionedDataset(fs, format, base_path);
+  } else if (mode == "filter_partitioned") {
+    table = FilterPartitionedDataset(fs, format, base_path);
+  } else {
+    std::cerr << "Unknown mode: " << mode << std::endl;
+    std::cerr
+        << "Supported modes: no_filter, filter, project, select_project, partitioned"
+        << std::endl;
+    return EXIT_FAILURE;
+  }
+  std::cout << "Read " << table->num_rows() << " rows" << std::endl;
+  std::cout << table->ToString() << std::endl;
+  return EXIT_SUCCESS;
+}
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 5d818b23938..12c199dc210 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -91,6 +91,8 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
 /// the results of a scan. These are options which make sense to change between
 /// repeated reads of the same dataset, such as format-specific conversion options
 /// (that do not affect the schema).
+///
+/// \ingroup dataset-scanning
 class ARROW_DS_EXPORT FragmentScanOptions {
  public:
   virtual std::string type_name() const = 0;
@@ -98,6 +100,10 @@ class ARROW_DS_EXPORT FragmentScanOptions {
   virtual ~FragmentScanOptions() = default;
 };
 
+/// \defgroup dataset-implementations Concrete implementations
+///
+/// @{
+
 /// \brief A trivial Fragment that yields ScanTask out of a fixed set of
 /// RecordBatch.
 class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
@@ -116,6 +122,8 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
   RecordBatchVector record_batches_;
 };
 
+/// @}
+
 /// \brief A container of zero or more Fragments.
 ///
 /// A Dataset acts as a union of Fragments, e.g. files deeply nested in a
@@ -160,6 +168,10 @@ class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
   Expression partition_expression_ = literal(true);
 };
 
+/// \addtogroup dataset-implementations
+///
+/// @{
+
 /// \brief A Source which yields fragments wrapping a stream of record batches.
 ///
 /// The record batches must match the schema provided to the source at construction.
@@ -171,13 +183,15 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
     virtual RecordBatchIterator Get() const = 0;
   };
 
+  /// Construct a dataset from a schema and a factory of record batch iterators.
   InMemoryDataset(std::shared_ptr<Schema> schema,
                   std::shared_ptr<RecordBatchGenerator> get_batches)
       : Dataset(std::move(schema)), get_batches_(std::move(get_batches)) {}
 
-  // Convenience constructor taking a fixed list of batches
+  /// Convenience constructor taking a fixed list of batches
   InMemoryDataset(std::shared_ptr<Schema> schema, RecordBatchVector batches);
 
+  /// Convenience constructor taking a Table
   explicit InMemoryDataset(std::shared_ptr<Table> table);
   explicit InMemoryDataset(std::shared_ptr<RecordBatchReader> reader);
 
@@ -221,5 +235,7 @@ class ARROW_DS_EXPORT UnionDataset : public Dataset {
   friend class UnionDatasetFactory;
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/discovery.h b/cpp/src/arrow/dataset/discovery.h
index 94c49ff0b85..cfe741a5d17 100644
--- a/cpp/src/arrow/dataset/discovery.h
+++ b/cpp/src/arrow/dataset/discovery.h
@@ -38,6 +38,10 @@
 namespace arrow {
 namespace dataset {
 
+/// \defgroup dataset-discovery Discovery API
+///
+/// @{
+
 struct InspectOptions {
   /// See `fragments` property.
   static constexpr int kInspectAllFragments = -1;
@@ -85,11 +89,14 @@ class ARROW_DS_EXPORT DatasetFactory {
 
   /// \brief Create a Dataset
   Result<std::shared_ptr<Dataset>> Finish();
+  /// \brief Create a Dataset with the given schema (see \a InspectOptions::schema)
   Result<std::shared_ptr<Dataset>> Finish(std::shared_ptr<Schema> schema);
+  /// \brief Create a Dataset with the given options
   virtual Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) = 0;
 
   /// \brief Optional root partition for the resulting Dataset.
   const Expression& root_partition() const { return root_partition_; }
+  /// \brief Set the root partition for the resulting Dataset.
   Status SetRootPartition(Expression partition) {
     root_partition_ = std::move(partition);
     return Status::OK();
@@ -103,8 +110,11 @@ class ARROW_DS_EXPORT DatasetFactory {
   Expression root_partition_;
 };
 
+/// @}
+
 /// \brief DatasetFactory provides a way to inspect/discover a Dataset's
 /// expected schema before materialization.
+/// \ingroup dataset-implementations
 class ARROW_DS_EXPORT UnionDatasetFactory : public DatasetFactory {
  public:
   static Result<std::shared_ptr<DatasetFactory>> Make(
@@ -132,51 +142,52 @@ class ARROW_DS_EXPORT UnionDatasetFactory : public DatasetFactory {
   std::vector<std::shared_ptr<DatasetFactory>> factories_;
 };
 
+/// \ingroup dataset-filesystem
 struct FileSystemFactoryOptions {
-  // Either an explicit Partitioning or a PartitioningFactory to discover one.
-  //
-  // If a factory is provided, it will be used to infer a schema for partition fields
-  // based on file and directory paths then construct a Partitioning. The default
-  // is a Partitioning which will yield no partition information.
-  //
-  // The (explicit or discovered) partitioning will be applied to discovered files
-  // and the resulting partition information embedded in the Dataset.
+  /// Either an explicit Partitioning or a PartitioningFactory to discover one.
+  ///
+  /// If a factory is provided, it will be used to infer a schema for partition fields
+  /// based on file and directory paths then construct a Partitioning. The default
+  /// is a Partitioning which will yield no partition information.
+  ///
+  /// The (explicit or discovered) partitioning will be applied to discovered files
+  /// and the resulting partition information embedded in the Dataset.
   PartitioningOrFactory partitioning{Partitioning::Default()};
 
-  // For the purposes of applying the partitioning, paths will be stripped
-  // of the partition_base_dir. Files not matching the partition_base_dir
-  // prefix will be skipped for partition discovery. The ignored files will still
-  // be part of the Dataset, but will not have partition information.
-  //
-  // Example:
-  // partition_base_dir = "/dataset";
-  //
-  // - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
-  //
-  // - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
-  //
-  // This is useful for partitioning which parses directory when ordering
-  // is important, e.g. DirectoryPartitioning.
+  /// For the purposes of applying the partitioning, paths will be stripped
+  /// of the partition_base_dir. Files not matching the partition_base_dir
+  /// prefix will be skipped for partition discovery. The ignored files will still
+  /// be part of the Dataset, but will not have partition information.
+  ///
+  /// Example:
+  /// partition_base_dir = "/dataset";
+  ///
+  /// - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
+  ///
+  /// - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
+  ///
+  /// This is useful for partitioning which parses directory when ordering
+  /// is important, e.g. DirectoryPartitioning.
   std::string partition_base_dir;
 
-  // Invalid files (via selector or explicitly) will be excluded by checking
-  // with the FileFormat::IsSupported method.  This will incur IO for each files
-  // in a serial and single threaded fashion. Disabling this feature will skip the
-  // IO, but unsupported files may be present in the Dataset
-  // (resulting in an error at scan time).
+  /// Invalid files (via selector or explicitly) will be excluded by checking
+  /// with the FileFormat::IsSupported method.  This will incur IO for each files
+  /// in a serial and single threaded fashion. Disabling this feature will skip the
+  /// IO, but unsupported files may be present in the Dataset
+  /// (resulting in an error at scan time).
   bool exclude_invalid_files = false;
 
-  // When discovering from a Selector (and not from an explicit file list), ignore
-  // files and directories matching any of these prefixes.
-  //
-  // Example (with selector = "/dataset/**"):
-  // selector_ignore_prefixes = {"_", ".DS_STORE" };
-  //
-  // - "/dataset/data.csv" -> not ignored
-  // - "/dataset/_metadata" -> ignored
-  // - "/dataset/.DS_STORE" -> ignored
-  // - "/dataset/_hidden/dat" -> ignored
-  // - "/dataset/nested/.DS_STORE" -> ignored
+  /// When discovering from a Selector (and not from an explicit file list), ignore
+  /// files and directories matching any of these prefixes.
+  ///
+  /// Example (with selector = "/dataset/**"):
+  /// selector_ignore_prefixes = {"_", ".DS_STORE" };
+  ///
+  /// - "/dataset/data.csv" -> not ignored
+  /// - "/dataset/_metadata" -> ignored
+  /// - "/dataset/.DS_STORE" -> ignored
+  /// - "/dataset/_hidden/dat" -> ignored
+  /// - "/dataset/nested/.DS_STORE" -> ignored
   std::vector<std::string> selector_ignore_prefixes = {
       ".",
       "_",
@@ -185,6 +196,7 @@ struct FileSystemFactoryOptions {
 
 /// \brief FileSystemDatasetFactory creates a Dataset from a vector of
 /// fs::FileInfo or a fs::FileSelector.
+/// \ingroup dataset-filesystem
 class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
  public:
   /// \brief Build a FileSystemDatasetFactory from an explicit list of
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index ccc3d54709b..c4c70d65d2f 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -39,6 +39,10 @@ namespace arrow {
 
 namespace dataset {
 
+/// \defgroup dataset-filesystem File system datasets
+///
+/// @{
+
 /// \brief The path and filesystem where an actual file is located or a buffer which can
 /// be read like a file
 class ARROW_DS_EXPORT FileSource {
@@ -153,16 +157,20 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
       FileSource source, Expression partition_expression,
       std::shared_ptr<Schema> physical_schema);
 
+  /// \brief Create a FileFragment for a FileSource.
   Result<std::shared_ptr<FileFragment>> MakeFragment(FileSource source,
                                                      Expression partition_expression);
 
+  /// \brief Create a FileFragment for a FileSource.
   Result<std::shared_ptr<FileFragment>> MakeFragment(
       FileSource source, std::shared_ptr<Schema> physical_schema = NULLPTR);
 
+  /// \brief Create a writer for this format.
   virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
       std::shared_ptr<FileWriteOptions> options) const = 0;
 
+  /// \brief Get default write options for this format.
   virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() = 0;
 };
 
@@ -258,6 +266,7 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   std::shared_ptr<FragmentSubtrees> subtrees_;
 };
 
+/// \brief Options for writing a file of this format.
 class ARROW_DS_EXPORT FileWriteOptions {
  public:
   virtual ~FileWriteOptions() = default;
@@ -273,14 +282,18 @@ class ARROW_DS_EXPORT FileWriteOptions {
   std::shared_ptr<FileFormat> format_;
 };
 
+/// \brief A writer for this format.
 class ARROW_DS_EXPORT FileWriter {
  public:
   virtual ~FileWriter() = default;
 
+  /// \brief Write the given batch.
   virtual Status Write(const std::shared_ptr<RecordBatch>& batch) = 0;
 
+  /// \brief Write all batches from the reader.
   Status Write(RecordBatchReader* batches);
 
+  /// \brief Indicate that writing is done.
   virtual Status Finish();
 
   const std::shared_ptr<FileFormat>& format() const { return options_->format(); }
@@ -301,6 +314,7 @@ class ARROW_DS_EXPORT FileWriter {
   std::shared_ptr<io::OutputStream> destination_;
 };
 
+/// \brief Options for writing a dataset.
 struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   /// Options for individual fragment writing.
   std::shared_ptr<FileWriteOptions> file_write_options;
@@ -326,5 +340,7 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   }
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index b235195c5e3..7232f37658c 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -33,6 +33,10 @@ namespace dataset {
 
 constexpr char kCsvTypeName[] = "csv";
 
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
 /// \brief A FileFormat implementation that reads from and writes to Csv files
 class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
  public:
@@ -75,5 +79,7 @@ struct ARROW_DS_EXPORT CsvFragmentScanOptions : public FragmentScanOptions {
   csv::ReadOptions read_options = csv::ReadOptions::Defaults();
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index 621eef80635..aa3444eefa4 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -31,6 +31,10 @@
 namespace arrow {
 namespace dataset {
 
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
 constexpr char kIpcTypeName[] = "ipc";
 
 /// \brief A FileFormat implementation that reads from and writes to Ipc files
@@ -101,5 +105,7 @@ class ARROW_DS_EXPORT IpcFileWriter : public FileWriter {
   friend class IpcFileFormat;
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index ac8a746481a..734917e6384 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -57,6 +57,10 @@ struct SchemaManifest;
 namespace arrow {
 namespace dataset {
 
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
 constexpr char kParquetTypeName[] = "parquet";
 
 /// \brief A FileFormat implementation that reads from Parquet files
@@ -166,13 +170,13 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
     return physical_schema_;
   }
 
-  // Return a filtered subset of row group indices.
+  /// Return a filtered subset of row group indices.
   Result<std::vector<int>> FilterRowGroups(Expression predicate);
 
   ParquetFileFormat& parquet_format_;
 
-  // Indices of row groups selected by this fragment,
-  // or util::nullopt if all row groups are selected.
+  /// Indices of row groups selected by this fragment,
+  /// or util::nullopt if all row groups are selected.
   util::optional<std::vector<int>> row_groups_;
 
   std::vector<Expression> statistics_expressions_;
@@ -207,8 +211,10 @@ class ARROW_DS_EXPORT ParquetFragmentScanOptions : public FragmentScanOptions {
 
 class ARROW_DS_EXPORT ParquetFileWriteOptions : public FileWriteOptions {
  public:
+  /// \brief Parquet writer properties.
   std::shared_ptr<parquet::WriterProperties> writer_properties;
 
+  /// \brief Parquet Arrow writer properties.
   std::shared_ptr<parquet::ArrowWriterProperties> arrow_writer_properties;
 
  protected:
@@ -237,38 +243,39 @@ class ARROW_DS_EXPORT ParquetFileWriter : public FileWriter {
   friend class ParquetFileFormat;
 };
 
+/// \brief Options for making a FileSystemDataset from a Parquet _metadata file.
 struct ParquetFactoryOptions {
-  // Either an explicit Partitioning or a PartitioningFactory to discover one.
-  //
-  // If a factory is provided, it will be used to infer a schema for partition fields
-  // based on file and directory paths then construct a Partitioning. The default
-  // is a Partitioning which will yield no partition information.
-  //
-  // The (explicit or discovered) partitioning will be applied to discovered files
-  // and the resulting partition information embedded in the Dataset.
+  /// Either an explicit Partitioning or a PartitioningFactory to discover one.
+  ///
+  /// If a factory is provided, it will be used to infer a schema for partition fields
+  /// based on file and directory paths then construct a Partitioning. The default
+  /// is a Partitioning which will yield no partition information.
+  ///
+  /// The (explicit or discovered) partitioning will be applied to discovered files
+  /// and the resulting partition information embedded in the Dataset.
   PartitioningOrFactory partitioning{Partitioning::Default()};
 
-  // For the purposes of applying the partitioning, paths will be stripped
-  // of the partition_base_dir. Files not matching the partition_base_dir
-  // prefix will be skipped for partition discovery. The ignored files will still
-  // be part of the Dataset, but will not have partition information.
-  //
-  // Example:
-  // partition_base_dir = "/dataset";
-  //
-  // - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
-  //
-  // - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
-  //
-  // This is useful for partitioning which parses directory when ordering
-  // is important, e.g. DirectoryPartitioning.
+  /// For the purposes of applying the partitioning, paths will be stripped
+  /// of the partition_base_dir. Files not matching the partition_base_dir
+  /// prefix will be skipped for partition discovery. The ignored files will still
+  /// be part of the Dataset, but will not have partition information.
+  ///
+  /// Example:
+  /// partition_base_dir = "/dataset";
+  ///
+  /// - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
+  ///
+  /// - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
+  ///
+  /// This is useful for partitioning which parses directory when ordering
+  /// is important, e.g. DirectoryPartitioning.
   std::string partition_base_dir;
 
-  // Assert that all ColumnChunk paths are consistent. The parquet spec allows for
-  // ColumnChunk data to be stored in multiple files, but ParquetDatasetFactory
-  // supports only a single file with all ColumnChunk data. If this flag is set
-  // construction of a ParquetDatasetFactory will raise an error if ColumnChunk
-  // data is not resident in a single file.
+  /// Assert that all ColumnChunk paths are consistent. The parquet spec allows for
+  /// ColumnChunk data to be stored in multiple files, but ParquetDatasetFactory
+  /// supports only a single file with all ColumnChunk data. If this flag is set
+  /// construction of a ParquetDatasetFactory will raise an error if ColumnChunk
+  /// data is not resident in a single file.
   bool validate_column_chunk_paths = false;
 };
 
@@ -351,5 +358,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
   Result<std::shared_ptr<Schema>> PartitionSchema();
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h
index 74e6c607106..6330725b7a6 100644
--- a/cpp/src/arrow/dataset/partition.h
+++ b/cpp/src/arrow/dataset/partition.h
@@ -38,6 +38,10 @@ namespace dataset {
 // ----------------------------------------------------------------------
 // Partitioning
 
+/// \defgroup dataset-partitioning Partitioning API
+///
+/// @{
+
 /// \brief Interface for parsing partition expressions from string partition
 /// identifiers.
 ///
@@ -76,6 +80,7 @@ class ARROW_DS_EXPORT Partitioning {
   /// \brief A default Partitioning which always yields scalar(true)
   static std::shared_ptr<Partitioning> Default();
 
+  /// \brief The partition schema.
   const std::shared_ptr<Schema>& schema() { return schema_; }
 
  protected:
@@ -84,6 +89,7 @@ class ARROW_DS_EXPORT Partitioning {
   std::shared_ptr<Schema> schema_;
 };
 
+/// \brief Options for inferring a partitioning.
 struct PartitioningFactoryOptions {
   /// When inferring a schema for partition fields, yield dictionary encoded types
   /// instead of plain. This can be more efficient when materializing virtual
@@ -96,6 +102,7 @@ struct PartitioningFactoryOptions {
   std::shared_ptr<Schema> schema;
 };
 
+/// \brief Options for inferring a hive-style partitioning.
 struct HivePartitioningFactoryOptions : PartitioningFactoryOptions {
   /// The hive partitioning scheme maps null to a hard coded fallback string.
   std::string null_fallback;
@@ -165,14 +172,18 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
 /// parsed to ("year"_ == 2009 and "month"_ == 11)
 class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
  public:
-  // If a field in schema is of dictionary type, the corresponding element of dictionaries
-  // must be contain the dictionary of values for that field.
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
   explicit DirectoryPartitioning(std::shared_ptr<Schema> schema,
                                  ArrayVector dictionaries = {})
       : KeyValuePartitioning(std::move(schema), std::move(dictionaries)) {}
 
   std::string type_name() const override { return "schema"; }
 
+  /// \brief Create a factory for a directory partitioning.
+  ///
+  /// \param[in] field_names The names for the partition fields. Types will be
+  ///     inferred.
   static std::shared_ptr<PartitioningFactory> MakeFactory(
       std::vector<std::string> field_names, PartitioningFactoryOptions = {});
 
@@ -182,6 +193,7 @@ class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
   Result<std::string> FormatValues(const ScalarVector& values) const override;
 };
 
+/// \brief The default fallback used for null values in a Hive-style partitioning.
 static constexpr char kDefaultHiveNullFallback[] = "__HIVE_DEFAULT_PARTITION__";
 
 /// \brief Multi-level, directory based partitioning
@@ -195,8 +207,8 @@ static constexpr char kDefaultHiveNullFallback[] = "__HIVE_DEFAULT_PARTITION__";
 /// "/day=321/ignored=3.4/year=2009" parses to ("year"_ == 2009 and "day"_ == 321)
 class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
  public:
-  // If a field in schema is of dictionary type, the corresponding element of dictionaries
-  // must be contain the dictionary of values for that field.
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
   explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries = {},
                             std::string null_fallback = kDefaultHiveNullFallback)
       : KeyValuePartitioning(std::move(schema), std::move(dictionaries)),
@@ -208,6 +220,7 @@ class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
   static util::optional<Key> ParseKey(const std::string& segment,
                                       const std::string& null_fallback);
 
+  /// \brief Create a factory for a hive partitioning.
   static std::shared_ptr<PartitioningFactory> MakeFactory(
       HivePartitioningFactoryOptions = {});
 
@@ -288,10 +301,13 @@ class ARROW_DS_EXPORT PartitioningOrFactory {
     return *this = PartitioningOrFactory(std::move(factory));
   }
 
+  /// \brief The partitioning (if given).
   const std::shared_ptr<Partitioning>& partitioning() const { return partitioning_; }
 
+  /// \brief The partition factory (if given).
   const std::shared_ptr<PartitioningFactory>& factory() const { return factory_; }
 
+  /// \brief Get the partition schema, inferring it with the given factory if needed.
   Result<std::shared_ptr<Schema>> GetOrInferSchema(const std::vector<std::string>& paths);
 
  private:
@@ -299,5 +315,7 @@ class ARROW_DS_EXPORT PartitioningOrFactory {
   std::shared_ptr<Partitioning> partitioning_;
 };
 
+/// @}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index ddd86674d39..37765c10488 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -43,33 +43,39 @@ using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>(
 
 namespace dataset {
 
+/// \defgroup dataset-scanning Scanning API
+///
+/// @{
+
 constexpr int64_t kDefaultBatchSize = 1 << 20;
 constexpr int32_t kDefaultBatchReadahead = 32;
 constexpr int32_t kDefaultFragmentReadahead = 8;
 
+/// Scan-specific options, which can be changed between scans of the same dataset.
 struct ARROW_DS_EXPORT ScanOptions {
-  // Filter and projection
+  /// A row filter (which will be pushed down to partitioning/reading if supported).
   Expression filter = literal(true);
+  /// A projection expression (which can add/remove/rename columns).
   Expression projection;
 
-  // Schema with which batches will be read from fragments. This is also known as the
-  // "reader schema" it will be used (for example) in constructing CSV file readers to
-  // identify column types for parsing. Usually only a subset of its fields (see
-  // MaterializedFields) will be materialized during a scan.
+  /// Schema with which batches will be read from fragments. This is also known as the
+  /// "reader schema" it will be used (for example) in constructing CSV file readers to
+  /// identify column types for parsing. Usually only a subset of its fields (see
+  /// MaterializedFields) will be materialized during a scan.
   std::shared_ptr<Schema> dataset_schema;
 
-  // Schema of projected record batches. This is independent of dataset_schema as its
-  // fields are derived from the projection. For example, let
-  //
-  //   dataset_schema = {"a": int32, "b": int32, "id": utf8}
-  //   projection = project({equal(field_ref("a"), field_ref("b"))}, {"a_plus_b"})
-  //
-  // (no filter specified). In this case, the projected_schema would be
-  //
-  //   {"a_plus_b": int32}
+  /// Schema of projected record batches. This is independent of dataset_schema as its
+  /// fields are derived from the projection. For example, let
+  ///
+  ///   dataset_schema = {"a": int32, "b": int32, "id": utf8}
+  ///   projection = project({equal(field_ref("a"), field_ref("b"))}, {"a_plus_b"})
+  ///
+  /// (no filter specified). In this case, the projected_schema would be
+  ///
+  ///   {"a_plus_b": int32}
   std::shared_ptr<Schema> projected_schema;
 
-  // Maximum row count for scanned batches.
+  /// Maximum row count for scanned batches.
   int64_t batch_size = kDefaultBatchSize;
 
   /// How many batches to read ahead within a file
@@ -133,7 +139,7 @@ struct ARROW_DS_EXPORT ScanOptions {
   // sub-selection optimization.
   std::vector<std::string> MaterializedFields() const;
 
-  /// Return a threaded or serial TaskGroup according to use_threads.
+  // Return a threaded or serial TaskGroup according to use_threads.
   std::shared_ptr<internal::TaskGroup> TaskGroup() const;
 };
 
@@ -162,25 +168,6 @@ class ARROW_DS_EXPORT ScanTask {
   std::shared_ptr<Fragment> fragment_;
 };
 
-/// \brief A trivial ScanTask that yields the RecordBatch of an array.
-class ARROW_DS_EXPORT InMemoryScanTask : public ScanTask {
- public:
-  InMemoryScanTask(std::vector<std::shared_ptr<RecordBatch>> record_batches,
-                   std::shared_ptr<ScanOptions> options,
-                   std::shared_ptr<Fragment> fragment)
-      : ScanTask(std::move(options), std::move(fragment)),
-        record_batches_(std::move(record_batches)) {}
-
-  Result<RecordBatchIterator> Execute() override;
-
- protected:
-  std::vector<std::shared_ptr<RecordBatch>> record_batches_;
-};
-
-ARROW_DS_EXPORT Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
-    std::vector<std::shared_ptr<RecordBatch>> batches,
-    std::shared_ptr<ScanOptions> options);
-
 template <typename T>
 struct Enumerated {
   T value;
@@ -210,6 +197,8 @@ struct EnumeratedRecordBatch {
 using EnumeratedRecordBatchGenerator = std::function<Future<EnumeratedRecordBatch>()>;
 using EnumeratedRecordBatchIterator = Iterator<EnumeratedRecordBatch>;
 
+/// @}
+
 }  // namespace dataset
 
 template <>
@@ -233,6 +222,11 @@ struct IterationTraits<dataset::EnumeratedRecordBatch> {
 };
 
 namespace dataset {
+
+/// \defgroup dataset-scanning Scanning API
+///
+/// @{
+
 /// \brief A scanner glues together several dataset classes to load in data.
 /// The dataset contains a collection of fragments and partitioning rules.
 ///
@@ -288,6 +282,7 @@ class ARROW_DS_EXPORT Scanner {
   /// positional information.
   virtual Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered();
 
+  /// \brief Get the options for this scan.
   const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
 
  protected:
@@ -404,5 +399,26 @@ class ARROW_DS_EXPORT ScannerBuilder {
   std::shared_ptr<ScanOptions> scan_options_;
 };
 
+/// @}
+
+/// \brief A trivial ScanTask that yields the RecordBatch of an array.
+class ARROW_DS_EXPORT InMemoryScanTask : public ScanTask {
+ public:
+  InMemoryScanTask(std::vector<std::shared_ptr<RecordBatch>> record_batches,
+                   std::shared_ptr<ScanOptions> options,
+                   std::shared_ptr<Fragment> fragment)
+      : ScanTask(std::move(options), std::move(fragment)),
+        record_batches_(std::move(record_batches)) {}
+
+  Result<RecordBatchIterator> Execute() override;
+
+ protected:
+  std::vector<std::shared_ptr<RecordBatch>> record_batches_;
+};
+
+ARROW_DS_EXPORT Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
+    std::vector<std::shared_ptr<RecordBatch>> batches,
+    std::shared_ptr<ScanOptions> options);
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/docs/source/cpp/api.rst b/docs/source/cpp/api.rst
index 80adb17b630..3df16a178bb 100644
--- a/docs/source/cpp/api.rst
+++ b/docs/source/cpp/api.rst
@@ -39,3 +39,4 @@ API Reference
    api/cuda
    api/flight
    api/filesystem
+   api/dataset
diff --git a/docs/source/cpp/api/dataset.rst b/docs/source/cpp/api/dataset.rst
new file mode 100644
index 00000000000..f285f3633be
--- /dev/null
+++ b/docs/source/cpp/api/dataset.rst
@@ -0,0 +1,75 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=======
+Dataset
+=======
+
+.. sidebar:: Contents
+
+   .. contents:: :local:
+
+Interface
+=========
+
+.. doxygenclass:: arrow::dataset::Fragment
+   :members:
+
+.. doxygenclass:: arrow::dataset::Dataset
+   :members:
+
+Partitioning
+============
+
+.. doxygengroup:: dataset-partitioning
+   :content-only:
+   :members:
+
+Dataset discovery/factories
+===========================
+
+.. doxygengroup:: dataset-discovery
+   :content-only:
+   :members:
+
+Scanning
+========
+
+.. doxygengroup:: dataset-scanning
+   :content-only:
+   :members:
+
+Concrete implementations
+========================
+
+.. doxygengroup:: dataset-implementations
+   :content-only:
+   :members:
+
+File System Datasets
+--------------------
+
+.. doxygengroup:: dataset-filesystem
+   :content-only:
+   :members:
+
+File Formats
+------------
+
+.. doxygengroup:: dataset-file-formats
+   :content-only:
+   :members:
diff --git a/docs/source/cpp/dataset.rst b/docs/source/cpp/dataset.rst
new file mode 100644
index 00000000000..be33e892c2f
--- /dev/null
+++ b/docs/source/cpp/dataset.rst
@@ -0,0 +1,403 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+================
+Tabular Datasets
+================
+
+.. seealso::
+   :doc:`Dataset API reference <api/dataset>`
+
+.. warning::
+
+    The ``arrow::dataset`` namespace is experimental, and a stable API
+    is not yet guaranteed.
+
+The Arrow Datasets library provides functionality to efficiently work with
+tabular, potentially larger than memory, and multi-file datasets. This includes:
+
+* A unified interface that supports different sources and file formats (currently,
+  Parquet, Feather / Arrow IPC, and CSV files) and different file systems (local,
+  cloud).
+* Discovery of sources (crawling directories, handling partitioned datasets with
+  various partitioning schemes, basic schema normalization, ...)
+* Optimized reading with predicate pushdown (filtering rows), projection
+  (selecting and deriving columns), and optionally parallel reading.
+
+The goal is to expand support to other file formats and data sources
+(e.g. database connections) in the future.
+
+Reading Datasets
+----------------
+
+For the examples below, let's create a small dataset consisting
+of a directory with two parquet files:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 50-85
+   :linenos:
+   :lineno-match:
+
+(See the full example at bottom: :ref:`cpp-dataset-full-example`.)
+
+Dataset discovery
+~~~~~~~~~~~~~~~~~
+
+A :class:`arrow::dataset::Dataset` object can be created using the various
+:class:`arrow::dataset::DatasetFactory` objects. Here, we'll use the
+:class:`arrow::dataset::FileSystemDatasetFactory`, which can create a dataset
+given a base directory path:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 151-165
+   :emphasize-lines: 6-11
+   :linenos:
+   :lineno-match:
+
+We're also passing the filesystem to use and the file format to use for reading.
+This lets us choose between (for example) reading local files or files in Amazon
+S3, or between Parquet and CSV.
+
+In addition to searching a base directory, we can list file paths manually.
+
+Creating a :class:`arrow::dataset::Dataset` does not begin reading the data
+itself. It only crawls the directory to find all the files (if needed), which can
+be retrieved with :func:`arrow::dataset::FileSystemDataset::files`:
+
+.. code-block:: cpp
+
+   // Print out the files crawled (only for FileSystemDataset)
+   for (const auto& filename : dataset->files()) {
+     std::cout << filename << std::endl;
+   }
+
+…and infers the dataset's schema (by default from the first file):
+
+.. code-block:: cpp
+
+   std::cout << dataset->schema()->ToString() << std::endl;
+
+Using the :func:`arrow::dataset::Dataset::NewScan` method, we can build a
+:class:`arrow::dataset::Scanner` and read the dataset (or a portion of it) into
+a :class:`arrow::Table` with the :func:`arrow::dataset::Scanner::ToTable`
+method:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 151-170
+   :emphasize-lines: 16-19
+   :linenos:
+   :lineno-match:
+
+.. TODO: iterative loading not documented pending API changes
+.. note:: Depending on the size of your dataset, this can require a lot of
+          memory; see :ref:`cpp-dataset-filtering-data` below on
+          filtering/projecting.
+
+Reading different file formats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples use Parquet files on local disk, but the Dataset API
+provides a consistent interface across multiple file formats and filesystems.
+(See :ref:`cpp-dataset-cloud-storage` for more information on the latter.)
+Currently, Parquet, Feather / Arrow IPC, and CSV file formats are supported;
+more formats are planned in the future.
+
+If we save the table as Feather files instead of Parquet files:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 87-104
+   :linenos:
+   :lineno-match:
+
+…then we can read the Feather file by passing an :class:`arrow::dataset::IpcFileFormat`:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 318,334
+   :linenos:
+
+Customizing file formats
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+:class:`arrow::dataset::FileFormat` objects have properties that control how
+files are read. For example::
+
+  auto format = std::make_shared<ds::ParquetFileFormat>();
+  format->reader_options.dict_columns.insert("a");
+
+Will configure column ``"a"`` to be dictionary-encoded when read. Similarly,
+setting :member:`arrow::dataset::CsvFileFormat::parse_options` lets us change
+things like reading comma-separated or tab-separated data.
+
+Additionally, passing an :class:`arrow::dataset::FragmentScanOptions` to
+:func:`arrow::dataset::ScannerBuilder::FragmentScanOptions` offers fine-grained
+control over data scanning. For example, for CSV files, we can change what values
+are converted into Boolean true and false at scan time.
+
+.. _cpp-dataset-filtering-data:
+
+Filtering data
+--------------
+
+So far, we've been reading the entire dataset, but if we need only a subset of the
+data, this can waste time or memory reading data we don't need. The
+:class:`arrow::dataset::Scanner` offers control over what data to read.
+
+In this snippet, we use :func:`arrow::dataset::ScannerBuilder::Project` to select
+which columns to read:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 172-191
+   :emphasize-lines: 16
+   :linenos:
+   :lineno-match:
+
+Some formats, such as Parquet, can reduce I/O costs here by reading only the
+specified columns from the filesystem.
+
+A filter can be provided with :func:`arrow::dataset::ScannerBuilder::Filter`, so
+that rows which do not match the filter predicate will not be included in the
+returned table. Again, some formats, such as Parquet, can use this filter to
+reduce the amount of I/O needed.
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 172-191
+   :emphasize-lines: 17
+   :linenos:
+   :lineno-match:
+
+.. TODO Expressions not documented pending renamespacing
+
+Projecting columns
+------------------
+
+In addition to selecting columns, :func:`arrow::dataset::ScannerBuilder::Project`
+can also be used for more complex projections, such as renaming columns, casting
+them to other types, and even deriving new columns based on evaluating
+expressions.
+
+In this case, we pass a vector of expressions used to construct column values
+and a vector of names for the columns:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 193-223
+   :emphasize-lines: 18-28
+   :linenos:
+   :lineno-match:
+
+This also determines the column selection; only the given columns will be
+present in the resulting table. If you want to include a derived column in
+*addition* to the existing columns, you can build up the expressions from the
+dataset schema:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 225-254
+   :emphasize-lines: 17-27
+   :linenos:
+   :lineno-match:
+
+.. note:: When combining filters and projections, Arrow will determine all
+          necessary columns to read. For instance, if you filter on a column that
+          isn't ultimately selected, Arrow will still read the column to evaluate
+          the filter.
+
+Reading and writing partitioned data
+------------------------------------
+
+So far, we've been working with datasets consisting of flat directories with
+files. Oftentimes, a dataset will have one or more columns that are frequently
+filtered on. Instead of having to read and then filter the data, by organizing the
+files into a nested directory structure, we can define a partitioned dataset,
+where sub-directory names hold information about which subset of the data is
+stored in that directory. Then, we can more efficiently filter data by using that
+information to avoid loading files that don't match the filter.
+
+For example, a dataset partitioned by year and month may have the following layout:
+
+.. code-block:: text
+
+   dataset_name/
+     year=2007/
+       month=01/
+          data0.parquet
+          data1.parquet
+          ...
+       month=02/
+          data0.parquet
+          data1.parquet
+          ...
+       month=03/
+       ...
+     year=2008/
+       month=01/
+       ...
+     ...
+
+The above partitioning scheme is using "/key=value/" directory names, as found in
+Apache Hive. Under this convention, the file at
+``dataset_name/year=2007/month=01/data0.parquet`` contains only data for which
+``year == 2007`` and ``month == 01``.
+
+Let's create a small partitioned dataset. For this, we'll use Arrow's dataset
+writing functionality.
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 106-149
+   :emphasize-lines: 25-42
+   :linenos:
+   :lineno-match:
+
+The above created a directory with two subdirectories ("part=a" and "part=b"),
+and the Parquet files written in those directories no longer include the "part"
+column.
+
+Reading this dataset, we now specify that the dataset should use a Hive-like
+partitioning scheme:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 256-279
+   :emphasize-lines: 7,9-11
+   :linenos:
+   :lineno-match:
+
+Although the partition fields are not included in the actual Parquet files,
+they will be added back to the resulting table when scanning this dataset:
+
+.. code-block:: text
+
+   $ ./debug/dataset_documentation_example file:///tmp parquet_hive partitioned
+   Found fragment: /tmp/parquet_dataset/part=a/part0.parquet
+   Partition expression: (part == "a")
+   Found fragment: /tmp/parquet_dataset/part=b/part1.parquet
+   Partition expression: (part == "b")
+   Read 20 rows
+   a: int64
+     -- field metadata --
+     PARQUET:field_id: '1'
+   b: double
+     -- field metadata --
+     PARQUET:field_id: '2'
+   c: int64
+     -- field metadata --
+     PARQUET:field_id: '3'
+   part: string
+   ----
+   # snip...
+
+We can now filter on the partition keys, which avoids loading files
+altogether if they do not match the filter:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 281-301
+   :emphasize-lines: 15-18
+   :linenos:
+   :lineno-match:
+
+Different partitioning schemes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above example uses a Hive-like directory scheme, such as "/year=2009/month=11/day=15".
+We specified this by passing the Hive partitioning factory. In this case, the types of
+the partition keys are inferred from the file paths.
+
+It is also possible to directly construct the partitioning and explicitly define
+the schema of the partition keys. For example:
+
+.. code-block:: cpp
+
+    auto part = std::make_shared<ds::HivePartitioning>(arrow::schema({
+        arrow::field("year", arrow::int16()),
+        arrow::field("month", arrow::int8()),
+        arrow::field("day", arrow::int32())
+    }));
+
+Arrow supports another partitioning scheme, "directory partitioning", where the
+segments in the file path represent the values of the partition keys without
+including the name (the field names are implicit in the segment's index). For
+example, given field names "year", "month", and "day", one path might be
+"/2019/11/15".
+
+Since the names are not included in the file paths, these must be specified
+when constructing a directory partitioning:
+
+.. code-block:: cpp
+
+    auto part = ds::DirectoryPartitioning::MakeFactory({"year", "month", "day"});
+
+Directory partitioning also supports providing a full schema rather than inferring
+types from file paths.
+
+Reading from other data sources
+-------------------------------
+
+Reading in-memory data
+~~~~~~~~~~~~~~~~~~~~~~
+
+If you already have data in memory that you'd like to use with the Datasets API
+(e.g. to filter/project data, or to write it out to a filesystem), you can wrap it
+in an :class:`arrow::dataset::InMemoryDataset`:
+
+.. code-block:: cpp
+
+   auto table = arrow::Table::FromRecordBatches(...);
+   auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(std::move(table));
+   // Scan the dataset, filter, it, etc.
+   auto scanner_builder = dataset->NewScan();
+
+In the example, we used the InMemoryDataset to write our example data to local
+disk which was used in the rest of the example:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :lines: 106-149
+   :emphasize-lines: 24-28
+   :linenos:
+   :lineno-match:
+
+.. _cpp-dataset-cloud-storage:
+
+Reading from cloud storage
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to local files, Arrow Datasets also support reading from cloud
+storage systems, such as Amazon S3, by passing a different filesystem.
+
+See the :ref:`filesystem <cpp-filesystems>` docs for more details on the available
+filesystems.
+
+.. _cpp-dataset-full-example:
+
+Full Example
+------------
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :linenos:
diff --git a/docs/source/cpp/examples/dataset_documentation_example.rst b/docs/source/cpp/examples/dataset_documentation_example.rst
new file mode 100644
index 00000000000..2bc993f246b
--- /dev/null
+++ b/docs/source/cpp/examples/dataset_documentation_example.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Arrow Datasets example
+=========================
+
+The file ``cpp/examples/arrow/dataset_documentation_example.cc``
+located inside the source tree contains an example of using Arrow
+Datasets to read, write, select, and filter data. :doc:`../dataset`
+has a full walkthrough of the example.
diff --git a/docs/source/cpp/examples/index.rst b/docs/source/cpp/examples/index.rst
index 2bdfcc315bc..d365960a00d 100644
--- a/docs/source/cpp/examples/index.rst
+++ b/docs/source/cpp/examples/index.rst
@@ -22,5 +22,6 @@ Examples
    :maxdepth: 1
 
    cmake_minimal_build
+   dataset_documentation_example
    row_columnar_conversion
    std::tuple-like ranges to Arrow <tuple_range_conversion>
diff --git a/docs/source/cpp/getting_started.rst b/docs/source/cpp/getting_started.rst
index 033d299d159..d6cfb177044 100644
--- a/docs/source/cpp/getting_started.rst
+++ b/docs/source/cpp/getting_started.rst
@@ -36,4 +36,5 @@ User Guide
    parquet
    csv
    json
+   dataset
    flight
diff --git a/docs/source/cpp/io.rst b/docs/source/cpp/io.rst
index 501998b73a4..6e1d261c008 100644
--- a/docs/source/cpp/io.rst
+++ b/docs/source/cpp/io.rst
@@ -64,6 +64,8 @@ Concrete implementations are available for :class:`in-memory writes <BufferOutpu
 
 .. cpp:namespace:: arrow::fs
 
+.. _cpp-filesystems:
+
 Filesystems
 ===========
 
diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst
index 614c2bf2a25..3753a0bdec4 100644
--- a/docs/source/python/dataset.rst
+++ b/docs/source/python/dataset.rst
@@ -28,19 +28,19 @@ Tabular Datasets
     and a stable API is not yet guaranteed.
 
 The ``pyarrow.dataset`` module provides functionality to efficiently work with
-tabular, potentially larger than memory and multi-file datasets:
+tabular, potentially larger than memory, and multi-file datasets. This includes:
 
-* A unified interface for different sources: supporting different sources and
-  file formats (Parquet, Feather files) and different file systems (local,
-  cloud).
+* A unified interface that supports different sources and file formats
+  (Parquet, Feather / Arrow IPC, and CSV files) and different file systems
+  (local, cloud).
 * Discovery of sources (crawling directories, handle directory-based partitioned
   datasets, basic schema normalization, ..)
 * Optimized reading with predicate pushdown (filtering rows), projection
-  (selecting columns), parallel reading or fine-grained managing of tasks.
+  (selecting and deriving columns), and optionally parallel reading.
 
-Currently, only Parquet and Feather / Arrow IPC files are supported. The goal
-is to expand this in the future to other file formats and data sources (e.g.
-database connections).
+Currently, only Parquet, Feather / Arrow IPC, and CSV files are supported. The
+goal is to expand this in the future to other file formats and data sources
+(e.g. database connections).
 
 For those familiar with the existing :class:`pyarrow.parquet.ParquetDataset` for
 reading Parquet datasets: ``pyarrow.dataset``'s goal is similar but not specific
@@ -87,11 +87,11 @@ can pass it the path to the directory containing the data files:
     dataset = ds.dataset(base / "parquet_dataset", format="parquet")
     dataset
 
-In addition to a base directory path, :func:`dataset` accepts a path to a single
-file or a list of file paths.
+In addition to searching a base directory, :func:`dataset` accepts a path to a
+single file or a list of file paths.
 
-Creating a :class:`Dataset` object loads nothing into memory, it only crawls the
-directory to find all the files:
+Creating a :class:`Dataset` object does not begin reading the data itself. If
+needed, it only crawls the directory to find all the files:
 
 .. ipython:: python
 
@@ -117,11 +117,11 @@ Reading different file formats
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The above examples use Parquet files as dataset source but the Dataset API
-provides a consistent interface across multiple file formats and sources.
-Currently, Parquet and Feather / Arrow IPC file format are supported; more
-formats are planned in the future.
+provides a consistent interface across multiple file formats and filesystems.
+Currently, Parquet, Feather / Arrow IPC, and CSV file formats are supported;
+more formats are planned in the future.
 
-If we save the table as a Feather file instead of Parquet files:
+If we save the table as Feather files instead of Parquet files:
 
 .. ipython:: python
 
@@ -129,7 +129,7 @@ If we save the table as a Feather file instead of Parquet files:
 
     feather.write_feather(table, base / "data.feather")
 
-then we can read the Feather file using the same functions, but with specifying
+…then we can read the Feather file using the same functions, but with specifying
 ``format="feather"``:
 
 .. ipython:: python
@@ -272,7 +272,7 @@ and the Parquet files written in those directories no longer include the "part"
 column.
 
 Reading this dataset with :func:`dataset`, we now specify that the dataset
-uses a hive-like partitioning scheme with the `partitioning` keyword:
+should use a hive-like partitioning scheme with the `partitioning` keyword:
 
 .. ipython:: python
 
@@ -288,7 +288,7 @@ they will be added back to the resulting table when scanning this dataset:
     dataset.to_table().to_pandas().head(3)
 
 We can now filter on the partition keys, which avoids loading files
-altogether if they do not match the predicate:
+altogether if they do not match the filter:
 
 .. ipython:: python
 

From e99e70d97e8d37d7d026987cccef48fe19902eac Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 14 Apr 2021 10:00:39 -0400
Subject: [PATCH 028/719] ARROW-12220: [C++][CI] Thread sanitizer failure

The background generator kept reading from the source even after the downstream had given up on it.  Other than the obvious memory / resource usage problems this also meant that callback handlers could reference deleted state downstream.  Now we block the destructor until the background thread is finished and we stop the background thread early if all consumer references are lost.

Closes #9941 from westonpace/bugfix/arrow-12220

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/async_generator.h       | 215 ++++++++++++++-------
 cpp/src/arrow/util/async_generator_test.cc |  59 ++++++
 cpp/src/arrow/util/thread_pool.cc          |   6 +
 cpp/src/arrow/util/thread_pool.h           |   3 +
 4 files changed, 218 insertions(+), 65 deletions(-)

diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 5f42037017f..304a50c1408 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1213,9 +1213,8 @@ class BackgroundGenerator {
  public:
   explicit BackgroundGenerator(Iterator<T> it, internal::Executor* io_executor, int max_q,
                                int q_restart)
-      : state_(std::make_shared<State>(io_executor, std::move(it), max_q, q_restart)) {}
-
-  ~BackgroundGenerator() {}
+      : state_(std::make_shared<State>(io_executor, std::move(it), max_q, q_restart)),
+        cleanup_(std::make_shared<Cleanup>(state_.get())) {}
 
   Future<T> operator()() {
     auto guard = state_->mutex.Lock();
@@ -1230,16 +1229,14 @@ class BackgroundGenerator {
     } else {
       auto next = Future<T>::MakeFinished(std::move(state_->queue.front()));
       state_->queue.pop();
-      if (!state_->running &&
-          static_cast<int>(state_->queue.size()) <= state_->q_restart) {
-        state_->RestartTask(state_, std::move(guard));
+      if (state_->NeedsRestart()) {
+        return state_->RestartTask(state_, std::move(guard), std::move(next));
       }
       return next;
     }
-    if (!state_->running) {
-      // This branch should only be needed to start the background thread on the first
-      // call
-      state_->RestartTask(state_, std::move(guard));
+    // This should only trigger the very first time this method is called
+    if (state_->NeedsRestart()) {
+      return state_->RestartTask(state_, std::move(guard), std::move(waiting_future));
     }
     return waiting_future;
   }
@@ -1248,11 +1245,12 @@ class BackgroundGenerator {
   struct State {
     State(internal::Executor* io_executor, Iterator<T> it, int max_q, int q_restart)
         : io_executor(io_executor),
+          max_q(max_q),
+          q_restart(q_restart),
           it(std::move(it)),
-          running(false),
+          reading(false),
           finished(false),
-          max_q(max_q),
-          q_restart(q_restart) {}
+          should_shutdown(false) {}
 
     void ClearQueue() {
       while (!queue.empty()) {
@@ -1260,78 +1258,165 @@ class BackgroundGenerator {
       }
     }
 
-    void RestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard) {
-      if (!finished) {
-        running = true;
-        auto spawn_status = io_executor->Spawn([state]() { Task()(std::move(state)); });
-        if (!spawn_status.ok()) {
-          running = false;
-          finished = true;
-          if (waiting_future.has_value()) {
-            auto to_deliver = std::move(waiting_future.value());
-            waiting_future.reset();
-            guard.Unlock();
-            to_deliver.MarkFinished(spawn_status);
-          } else {
-            ClearQueue();
-            queue.push(spawn_status);
-          }
+    bool TaskIsRunning() const { return task_finished.is_valid(); }
+
+    bool NeedsRestart() const {
+      return !finished && !reading && static_cast<int>(queue.size()) <= q_restart;
+    }
+
+    void DoRestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard) {
+      // If we get here we are actually going to start a new task so let's create a
+      // task_finished future for it
+      state->task_finished = Future<>::Make();
+      state->reading = true;
+      auto spawn_status = io_executor->Spawn(
+          [state]() { BackgroundGenerator::WorkerTask(std::move(state)); });
+      if (!spawn_status.ok()) {
+        // If we can't spawn a new task then send an error to the consumer (either via a
+        // waiting future or the queue) and mark ourselves finished
+        state->finished = true;
+        state->task_finished = Future<>();
+        if (waiting_future.has_value()) {
+          auto to_deliver = std::move(waiting_future.value());
+          waiting_future.reset();
+          guard.Unlock();
+          to_deliver.MarkFinished(spawn_status);
+        } else {
+          ClearQueue();
+          queue.push(spawn_status);
         }
       }
     }
 
+    Future<T> RestartTask(std::shared_ptr<State> state, util::Mutex::Guard guard,
+                          Future<T> next) {
+      if (TaskIsRunning()) {
+        // If the task is still cleaning up we need to wait for it to finish before
+        // restarting.  We also want to block the consumer until we've restarted the
+        // reader to avoid multiple restarts
+        return task_finished.Then([state, next](...) {
+          // This may appear dangerous (recursive mutex) but we should be guaranteed the
+          // outer guard has been released by this point.  We know...
+          // * task_finished is not already finished (it would be invalid in that case)
+          // * task_finished will not be marked complete until we've given up the mutex
+          auto guard_ = state->mutex.Lock();
+          state->DoRestartTask(state, std::move(guard_));
+          return next;
+        });
+      }
+      // Otherwise we can restart immediately
+      DoRestartTask(std::move(state), std::move(guard));
+      return next;
+    }
+
     internal::Executor* io_executor;
+    const int max_q;
+    const int q_restart;
     Iterator<T> it;
-    bool running;
+
+    // If true, the task is actively pumping items from the queue and does not need a
+    // restart
+    bool reading;
+    // Set to true when a terminal item arrives
     bool finished;
-    int max_q;
-    int q_restart;
+    // Signal to the background task to end early because consumers have given up on it
+    bool should_shutdown;
+    // If the queue is empty then the consumer will create a waiting future and wait for
+    // it
     std::queue<Result<T>> queue;
     util::optional<Future<T>> waiting_future;
+    // Every background task is given a future to complete when it is entirely finished
+    // processing and ready for the next task to start or for State to be destroyed
+    Future<> task_finished;
     util::Mutex mutex;
   };
 
-  class Task {
-   public:
-    void operator()(std::shared_ptr<State> state) {
-      // while condition can't be based on state_ because it is run outside the mutex
-      bool running = true;
-      while (running) {
-        auto next = state->it.Next();
-        // Need to capture state->waiting_future inside the mutex to mark finished outside
-        Future<T> waiting_future;
-        {
-          auto guard = state->mutex.Lock();
+  // Cleanup task that will be run when all consumer references to the generator are lost
+  struct Cleanup {
+    explicit Cleanup(State* state) : state(state) {}
+    ~Cleanup() {
+      Future<> finish_fut;
+      {
+        auto lock = state->mutex.Lock();
+        if (!state->TaskIsRunning()) {
+          return;
+        }
+        // Signal the current task to stop and wait for it to finish
+        state->should_shutdown = true;
+        finish_fut = state->task_finished;
+      }
+      // Using future as a condition variable here
+      Status st = finish_fut.status();
+      ARROW_UNUSED(st);
+    }
+    State* state;
+  };
 
-          if (!next.ok() || IsIterationEnd<T>(*next)) {
-            state->finished = true;
-            state->running = false;
-            if (!next.ok()) {
-              state->ClearQueue();
-            }
-          }
-          if (state->waiting_future.has_value()) {
-            waiting_future = std::move(state->waiting_future.value());
-            state->waiting_future.reset();
-          } else {
-            state->queue.push(std::move(next));
-            if (static_cast<int>(state->queue.size()) >= state->max_q) {
-              state->running = false;
-            }
+  static void WorkerTask(std::shared_ptr<State> state) {
+    // We need to capture the state to read while outside the mutex
+    bool reading = true;
+    while (reading) {
+      auto next = state->it.Next();
+      // Need to capture state->waiting_future inside the mutex to mark finished outside
+      Future<T> waiting_future;
+      {
+        auto guard = state->mutex.Lock();
+
+        if (state->should_shutdown) {
+          state->finished = true;
+          break;
+        }
+
+        if (!next.ok() || IsIterationEnd<T>(*next)) {
+          // Terminal item.  Mark finished to true, send this last item, and quit
+          state->finished = true;
+          if (!next.ok()) {
+            state->ClearQueue();
           }
-          running = state->running;
         }
-        // This must happen outside the task.  Although presumably there is a transferring
-        // generator on the other end that will quickly transfer any callbacks off of this
-        // thread so we can continue looping.  Still, best not to rely on that
-        if (waiting_future.is_valid()) {
-          waiting_future.MarkFinished(next);
+        // At this point we are going to send an item.  Either we will add it to the
+        // queue or deliver it to a waiting future.
+        if (state->waiting_future.has_value()) {
+          waiting_future = std::move(state->waiting_future.value());
+          state->waiting_future.reset();
+        } else {
+          state->queue.push(std::move(next));
+          // We just filled up the queue so it is time to quit.  We may need to notify
+          // a cleanup task so we transition to Quitting
+          if (static_cast<int>(state->queue.size()) >= state->max_q) {
+            state->reading = false;
+          }
         }
+        reading = state->reading && !state->finished;
+      }
+      // This should happen outside the mutex.  Presumably there is a
+      // transferring generator on the other end that will quickly transfer any
+      // callbacks off of this thread so we can continue looping.  Still, best not to
+      // rely on that
+      if (waiting_future.is_valid()) {
+        waiting_future.MarkFinished(next);
       }
     }
-  };
+    // Once we've sent our last item we can notify any waiters that we are done and so
+    // either state can be cleaned up or a new background task can be started
+    Future<> task_finished;
+    {
+      auto guard = state->mutex.Lock();
+      // After we give up the mutex state can be safely deleted.  We will no longer
+      // reference it.  We can safely transition to idle now.
+      task_finished = state->task_finished;
+      state->task_finished = Future<>();
+    }
+    task_finished.MarkFinished();
+  }
 
   std::shared_ptr<State> state_;
+  // state_ is held by both the generator and the background thread so it won't be cleaned
+  // up when all consumer references are relinquished.  cleanup_ is only held by the
+  // generator so it will be destructed when the last consumer reference is gone.  We use
+  // this to cleanup / stop the background generator in case the consuming end stops
+  // listening (e.g. due to a downstream error)
+  std::shared_ptr<Cleanup> cleanup_;
 };
 
 constexpr int kDefaultBackgroundMaxQ = 32;
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index be39261640e..3aae6087c1d 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -816,6 +816,65 @@ TEST_P(BackgroundGeneratorTestFixture, StopAndRestart) {
   AssertGeneratorExhausted(generator);
 }
 
+struct TrackingIterator {
+  explicit TrackingIterator(bool slow)
+      : token(std::make_shared<bool>(false)), slow(slow) {}
+
+  Result<TestInt> Next() {
+    if (slow) {
+      SleepABit();
+    }
+    return TestInt(0);
+  }
+  std::weak_ptr<bool> GetWeakTargetRef() { return std::weak_ptr<bool>(token); }
+
+  std::shared_ptr<bool> token;
+  bool slow;
+};
+
+TEST_P(BackgroundGeneratorTestFixture, AbortReading) {
+  // If there is an error downstream then it is likely the chain will abort and the
+  // background generator will lose all references and should abandon reading
+  TrackingIterator source(IsSlow());
+  auto tracker = source.GetWeakTargetRef();
+  auto iter = Iterator<TestInt>(std::move(source));
+  std::shared_ptr<AsyncGenerator<TestInt>> generator;
+  {
+    ASSERT_OK_AND_ASSIGN(
+        auto gen, MakeBackgroundGenerator(std::move(iter), internal::GetCpuThreadPool()));
+    generator = std::make_shared<AsyncGenerator<TestInt>>(gen);
+  }
+
+  // Poll one item to start it up
+  ASSERT_FINISHES_OK_AND_EQ(TestInt(0), (*generator)());
+  ASSERT_FALSE(tracker.expired());
+  // Remove last reference to generator, should trigger and wait for cleanup
+  generator.reset();
+  // Cleanup should have ensured no more reference to the source.  It may take a moment
+  // to expire because the background thread has to destruct itself
+  BusyWait(10, [&tracker] { return tracker.expired(); });
+}
+
+TEST_P(BackgroundGeneratorTestFixture, AbortOnIdleBackground) {
+  // Tests what happens when the downstream aborts while the background thread is idle
+  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
+
+  auto source = PossiblySlowVectorIt(RangeVector(100), IsSlow());
+  std::shared_ptr<AsyncGenerator<TestInt>> generator;
+  {
+    ASSERT_OK_AND_ASSIGN(auto gen,
+                         MakeBackgroundGenerator(std::move(source), thread_pool.get()));
+    generator = std::make_shared<AsyncGenerator<TestInt>>(gen);
+  }
+  ASSERT_FINISHES_OK_AND_EQ(TestInt(0), (*generator)());
+
+  // The generator should pretty quickly fill up the queue and idle
+  BusyWait(10, [&thread_pool] { return thread_pool->GetNumTasks() == 0; });
+
+  // Now delete the generator and hope we don't deadlock
+  generator.reset();
+}
+
 struct SlowEmptyIterator {
   Result<TestInt> Next() {
     if (called_) {
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index 873b9335e74..cd523609d27 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -272,6 +272,12 @@ int ThreadPool::GetCapacity() {
   return state_->desired_capacity_;
 }
 
+int ThreadPool::GetNumTasks() {
+  ProtectAgainstFork();
+  std::unique_lock<std::mutex> lock(state_->mutex_);
+  return state_->tasks_queued_or_running_;
+}
+
 int ThreadPool::GetActualCapacity() {
   ProtectAgainstFork();
   std::unique_lock<std::mutex> lock(state_->mutex_);
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index c4d4d1869c6..cd964385c6e 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -264,6 +264,9 @@ class ARROW_EXPORT ThreadPool : public Executor {
   // match this value.
   int GetCapacity() override;
 
+  // Return the number of tasks either running or in the queue.
+  int GetNumTasks();
+
   // Dynamically change the number of worker threads.
   //
   // This function always returns immediately.

From cb7a6248d9ea912d0166cc437d2c04cdef4e1ed1 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 14 Apr 2021 12:21:10 -0400
Subject: [PATCH 029/719] ARROW-12379: [C++] Fix ThreadSanitizer failure in
 SerialExecutor

Closes #10025 from pitrou/ARROW-12379-tsan

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/thread_pool.cc      | 43 ++++++++++++---------
 cpp/src/arrow/util/thread_pool.h       |  2 +-
 cpp/src/arrow/util/thread_pool_test.cc | 53 +++++++++++++++++---------
 3 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index cd523609d27..6465ebbc6fc 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -22,7 +22,6 @@
 #include <deque>
 #include <list>
 #include <mutex>
-#include <queue>
 #include <string>
 #include <thread>
 #include <vector>
@@ -46,44 +45,54 @@ struct Task {
 }  // namespace
 
 struct SerialExecutor::State {
-  std::queue<Task> task_queue;
+  std::deque<Task> task_queue;
   std::mutex mutex;
   std::condition_variable wait_for_tasks;
-  bool finished;
+  bool finished{false};
 };
 
-SerialExecutor::SerialExecutor() : state_(new State()) {}
-SerialExecutor::~SerialExecutor() {}
+SerialExecutor::SerialExecutor() : state_(std::make_shared<State>()) {}
+
+SerialExecutor::~SerialExecutor() = default;
 
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-  // The serial task queue is truly serial (no mutex needed) but SpawnReal may be called
-  // from external threads (e.g. when transferring back from blocking I/O threads) so a
-  // mutex is needed
+  // While the SerialExecutor runs tasks synchronously on its main thread,
+  // SpawnReal may be called from external threads (e.g. when transferring back
+  // from blocking I/O threads), so we need to keep the state alive *and* to
+  // lock its contents.
+  //
+  // Note that holding the lock while notifying the condition variable may
+  // not be sufficient, as some exit paths in the main thread are unlocked.
+  auto state = state_;
   {
-    std::lock_guard<std::mutex> lg(state_->mutex);
-    state_->task_queue.push(
+    std::lock_guard<std::mutex> lk(state->mutex);
+    state->task_queue.push_back(
         Task{std::move(task), std::move(stop_token), std::move(stop_callback)});
   }
-  state_->wait_for_tasks.notify_one();
+  state->wait_for_tasks.notify_one();
   return Status::OK();
 }
 
 void SerialExecutor::MarkFinished() {
-  std::lock_guard<std::mutex> lk(state_->mutex);
-  state_->finished = true;
-  // Keep the lock when notifying to avoid situations where the SerialExecutor
-  // would start being destroyed while the notify_one() call is still ongoing.
-  state_->wait_for_tasks.notify_one();
+  // Same comment as SpawnReal above
+  auto state = state_;
+  {
+    std::lock_guard<std::mutex> lk(state->mutex);
+    state->finished = true;
+  }
+  state->wait_for_tasks.notify_one();
 }
 
 void SerialExecutor::RunLoop() {
+  // This is called from the SerialExecutor's main thread, so the
+  // state is guaranteed to be kept alive.
   std::unique_lock<std::mutex> lk(state_->mutex);
 
   while (!state_->finished) {
     while (!state_->task_queue.empty()) {
       Task task = std::move(state_->task_queue.front());
-      state_->task_queue.pop();
+      state_->task_queue.pop_front();
       lk.unlock();
       if (!task.stop_token.IsStopRequested()) {
         std::move(task.callable)();
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index cd964385c6e..c388680befc 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -225,7 +225,7 @@ class ARROW_EXPORT SerialExecutor : public Executor {
 
   // State uses mutex
   struct State;
-  std::unique_ptr<State> state_;
+  std::shared_ptr<State> state_;
 
   template <typename T>
   Result<T> Run(TopLevelTask<T> initial_task) {
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 2390f8c1a41..9926ac1a7a4 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -135,6 +135,30 @@ class TestRunSynchronously : public testing::TestWithParam<bool> {
   Status RunVoid(FnOnce<Future<>(Executor*)> top_level_task) {
     return RunSynchronouslyVoid(std::move(top_level_task), UseThreads());
   }
+
+  void TestContinueAfterExternal(bool transfer_to_main_thread) {
+    bool continuation_ran = false;
+    EXPECT_OK_AND_ASSIGN(auto external_pool, ThreadPool::Make(1));
+    auto top_level_task = [&](Executor* executor) {
+      struct Callback {
+        Status operator()(...) {
+          *continuation_ran = true;
+          return Status::OK();
+        }
+        bool* continuation_ran;
+      };
+      auto fut = DeferNotOk(external_pool->Submit([&] {
+        SleepABit();
+        return Status::OK();
+      }));
+      if (transfer_to_main_thread) {
+        fut = executor->Transfer(fut);
+      }
+      return fut.Then(Callback{&continuation_ran});
+    };
+    ASSERT_OK(RunVoid(std::move(top_level_task)));
+    EXPECT_TRUE(continuation_ran);
+  }
 };
 
 TEST_P(TestRunSynchronously, SimpleRun) {
@@ -209,25 +233,16 @@ TEST_P(TestRunSynchronously, StopTokenSubmit) {
 }
 
 TEST_P(TestRunSynchronously, ContinueAfterExternal) {
-  bool continuation_ran = false;
-  EXPECT_OK_AND_ASSIGN(auto mock_io_pool, ThreadPool::Make(1));
-  auto top_level_task = [&](Executor* executor) {
-    struct Callback {
-      Status operator()(...) {
-        continuation_ran = true;
-        return Status::OK();
-      }
-      bool& continuation_ran;
-    };
-    return executor
-        ->Transfer(DeferNotOk(mock_io_pool->Submit([&] {
-          SleepABit();
-          return Status::OK();
-        })))
-        .Then(Callback{continuation_ran});
-  };
-  ASSERT_OK(RunVoid(std::move(top_level_task)));
-  EXPECT_TRUE(continuation_ran);
+  // The future returned by the top-level task completes on another thread.
+  // This can trigger delicate race conditions in the SerialExecutor code,
+  // especially destruction.
+  this->TestContinueAfterExternal(/*transfer_to_main_thread=*/false);
+}
+
+TEST_P(TestRunSynchronously, ContinueAfterExternalTransferred) {
+  // Like above, but the future is transferred back to the serial executor
+  // after completion on an external thread.
+  this->TestContinueAfterExternal(/*transfer_to_main_thread=*/true);
 }
 
 TEST_P(TestRunSynchronously, SchedulerAbort) {

From 8f35024f54b2580edf84a49337d2c9e1600685ba Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 14 Apr 2021 13:50:28 -0400
Subject: [PATCH 030/719] ARROW-12385: [R] [CI] fix cran picking in CI

wrap in local, update OSes in RSPM

Closes #10031 from jonkeane/ARROW-12385

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 ci/etc/rprofile | 90 +++++++++++++++++++++++++------------------------
 1 file changed, 46 insertions(+), 44 deletions(-)

diff --git a/ci/etc/rprofile b/ci/etc/rprofile
index 524eb50aee0..229a0101a25 100644
--- a/ci/etc/rprofile
+++ b/ci/etc/rprofile
@@ -1,51 +1,53 @@
-.pick_cran <- function() {
-  # Return a CRAN repo URL, preferring RSPM binaries if available for this OS
-  rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest"
-  supported_os <- c("xenial", "bionic", "centos7", "opensuse42", "opensuse15")
-
-  if (nzchar(Sys.which("lsb_release"))) {
-    os <- tolower(system("lsb_release -cs", intern = TRUE))
-    if (os %in% supported_os) {
-      return(sprintf(rspm_template, os))
+ local({
+  .pick_cran <- function() {
+    # Return a CRAN repo URL, preferring RSPM binaries if available for this OS
+    rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest"
+    supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152")
+  
+    if (nzchar(Sys.which("lsb_release"))) {
+      os <- tolower(system("lsb_release -cs", intern = TRUE))
+      if (os %in% supported_os) {
+        return(sprintf(rspm_template, os))
+      }
     }
-  }
-  if (file.exists("/etc/os-release")) {
-    os_release <- readLines("/etc/os-release")
-    vals <- sub("^.*=(.*)$", "\\1", os_release)
-    os <- intersect(vals, supported_os)
-    if (length(os)) {
-      # e.g. "bionic"
-      return(sprintf(rspm_template, os))
-    } else {
-      names(vals) <- sub("^(.*)=.*$", "\\1", os_release)
-      if (vals["ID"] == "opensuse") {
-        version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"])
-        os <- paste0("opensuse", version)
-        if (os %in% supported_os) {
-          return(sprintf(rspm_template, os))
+    if (file.exists("/etc/os-release")) {
+      os_release <- readLines("/etc/os-release")
+      vals <- sub("^.*=(.*)$", "\\1", os_release)
+      os <- intersect(vals, supported_os)
+      if (length(os)) {
+        # e.g. "bionic"
+        return(sprintf(rspm_template, os))
+      } else {
+        names(vals) <- sub("^(.*)=.*$", "\\1", os_release)
+        if (vals["ID"] == "opensuse") {
+          version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"])
+          os <- paste0("opensuse", version)
+          if (os %in% supported_os) {
+            return(sprintf(rspm_template, os))
+          }
         }
       }
     }
-  }
-  if (file.exists("/etc/system-release")) {
-    # Something like "CentOS Linux release 7.7.1908 (Core)"
-    system_release <- tolower(utils::head(readLines("/etc/system-release"), 1))
-    # Extract from that the distro and the major version number
-    os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release)
-    if (os %in% supported_os) {
-      return(sprintf(rspm_template, os))
+    if (file.exists("/etc/system-release")) {
+      # Something like "CentOS Linux release 7.7.1908 (Core)"
+      system_release <- tolower(utils::head(readLines("/etc/system-release"), 1))
+      # Extract from that the distro and the major version number
+      os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release)
+      if (os %in% supported_os) {
+        return(sprintf(rspm_template, os))
+      }
     }
+  
+    return("https://cloud.r-project.org")
   }
-
-  return("https://cloud.r-project.org")
-}
-
-options(
-  Ncpus = parallel::detectCores(),
-  repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"),
-  HTTPUserAgent = sprintf(
-    'R/%s R (%s)',
-    getRversion(),
-    paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
+  
+  options(
+    Ncpus = parallel::detectCores(),
+    repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"),
+    HTTPUserAgent = sprintf(
+      'R/%s R (%s)',
+      getRversion(),
+      paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
+    )
   )
-)
+})

From a5356c36b4f1d1822578f8619e4b770d2145198e Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 15 Apr 2021 04:22:44 +0900
Subject: [PATCH 031/719] ARROW-12353: [Packaging][deb] Rename -archive-keyring
 to -apt-source

Because lintian recommends that a package that puts files to
/etc/apt/sources.list.d/ uses -apt-source suffix.

See also: https://lintian.debian.net/tags/package-installs-apt-sources

This also changes repository URL to
https://apache.jfrog.io/artifactory/ from https://apache.bintray.com/ .

Closes #10006 from kou/packaging-linux-apt-source

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/00-prepare-test.rb                |  4 +-
 dev/release/rat_exclude_files.txt             | 10 ++---
 dev/release/verify-apt.sh                     | 39 +++++++------------
 dev/tasks/linux-packages/Rakefile             |  2 +-
 .../Rakefile                                  |  6 +--
 .../apt/debian-bullseye/Dockerfile            |  0
 .../apt/debian-buster/Dockerfile              |  0
 .../apt/ubuntu-bionic/Dockerfile              |  0
 .../apt/ubuntu-focal/Dockerfile               |  0
 .../apt/ubuntu-groovy/Dockerfile              |  0
 .../apt/ubuntu-xenial/Dockerfile              |  0
 .../debian/apache-arrow-apt-source.install}   |  0
 .../apache-arrow-apt-source/debian/changelog  |  0
 .../debian/compat                             |  0
 .../debian/control                            |  6 ++-
 .../debian/copyright                          |  0
 .../debian/rules                              |  8 ++--
 .../debian/source/format                      |  0
 .../debian/changelog                          | 29 --------------
 .../linux-packages/travis.linux.arm64.yml     |  2 +-
 20 files changed, 34 insertions(+), 72 deletions(-)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/Rakefile (92%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/apt/debian-bullseye/Dockerfile (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/apt/debian-buster/Dockerfile (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/apt/ubuntu-bionic/Dockerfile (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/apt/ubuntu-focal/Dockerfile (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/apt/ubuntu-groovy/Dockerfile (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/apt/ubuntu-xenial/Dockerfile (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install => apache-arrow-apt-source/debian/apache-arrow-apt-source.install} (100%)
 create mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/debian/compat (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/debian/control (77%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/debian/copyright (100%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/debian/rules (75%)
 rename dev/tasks/linux-packages/{apache-arrow-archive-keyring => apache-arrow-apt-source}/debian/source/format (100%)
 delete mode 100644 dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog

diff --git a/dev/release/00-prepare-test.rb b/dev/release/00-prepare-test.rb
index 53bd5e89bf2..9e2a798e221 100644
--- a/dev/release/00-prepare-test.rb
+++ b/dev/release/00-prepare-test.rb
@@ -97,9 +97,9 @@ def test_linux_packages
     today = Time.now.utc.strftime("%a %b %d %Y")
     expected_changes = [
       {
-        path: "#{base_dir}/apache-arrow-archive-keyring/debian/changelog",
+        path: "#{base_dir}/apache-arrow-apt-source/debian/changelog",
         sampled_hunks: [
-          "+apache-arrow-archive-keyring (#{@release_version}-1) " +
+          "+apache-arrow-apt-source (#{@release_version}-1) " +
           "unstable; urgency=low",
         ],
       },
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index cee925ce79e..ce32044c902 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -50,11 +50,11 @@ dev/archery/archery/tests/fixtures/*
 dev/archery/archery/crossbow/tests/fixtures/*
 dev/release/rat_exclude_files.txt
 dev/tasks/homebrew-formulae/apache-arrow.rb
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/compat
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules
-dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/source/format
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
+dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
 dev/tasks/linux-packages/apache-arrow/debian/compat
 dev/tasks/linux-packages/apache-arrow/debian/control.in
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index 57f44fa212b..e7b87a3a4da 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -21,20 +21,16 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
-  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
-  echo "       $0 VERSION release BINTRAY_REPOSITORY"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
-  echo " e.g.: $0 0.13.0 rc kou/arrow # Verify 0.13.0 RC at https://bintray.com/kou/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
 
 VERSION="$1"
 TYPE="$2"
-BINTRAY_REPOSITORY="${3:-apache/arrow}"
 
 local_prefix="/arrow/dev/tasks/linux-packages"
 
@@ -47,9 +43,9 @@ apt install -y -V \
 
 code_name="$(lsb_release --codename --short)"
 distribution="$(lsb_release --id --short | tr 'A-Z' 'a-z')"
-bintray_base_url="https://dl.bintray.com/${BINTRAY_REPOSITORY}/${distribution}"
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow/${distribution}"
 if [ "${TYPE}" = "rc" ]; then
-  bintray_base_url="${bintray_base_url}-rc"
+  artifactory_base_url+="-rc"
 fi
 
 have_flight=yes
@@ -80,18 +76,18 @@ if [ "${TYPE}" = "local" ]; then
       ;;
   esac
   package_version+="-1"
-  keyring_archive_path="${local_prefix}/apt/repositories"
-  keyring_archive_path+="/${distribution}/pool/${code_name}/main"
-  keyring_archive_path+="/a/apache-arrow-archive-keyring"
-  keyring_archive_path+="/apache-arrow-archive-keyring_${package_version}_all.deb"
-  apt install -y -V "${keyring_archive_path}"
+  apt_source_path="${local_prefix}/apt/repositories"
+  apt_source_path+="/${distribution}/pool/${code_name}/main"
+  apt_source_path+="/a/apache-arrow-apt-source"
+  apt_source_path+="/apache-arrow-apt-source_${package_version}_all.deb"
+  apt install -y -V "${apt_source_path}"
 else
   package_version="${VERSION}-1"
-  keyring_archive_base_name="apache-arrow-archive-keyring-latest-${code_name}.deb"
+  apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb"
   curl \
-    --output "${keyring_archive_base_name}" \
-    "${bintray_base_url}/${keyring_archive_base_name}"
-  apt install -y -V "./${keyring_archive_base_name}"
+    --output "${apt_source_base_name}" \
+    "${artifactory_base_url}/${apt_source_base_name}"
+  apt install -y -V "./${apt_source_base_name}"
 fi
 
 if [ "${TYPE}" = "local" ]; then
@@ -103,21 +99,14 @@ if [ "${TYPE}" = "local" ]; then
   if [ -f "${keys}" ]; then
     gpg \
       --no-default-keyring \
-      --keyring /usr/share/keyrings/apache-arrow-archive-keyring.gpg \
+      --keyring /usr/share/keyrings/apache-arrow-apt-source.gpg \
       --import "${keys}"
   fi
 else
-  if [ "${BINTRAY_REPOSITORY}" = "apache/arrow" ]; then
-    if [ "${TYPE}" = "rc" ]; then
-      sed \
-        -i"" \
-        -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
-        /etc/apt/sources.list.d/apache-arrow.sources
-    fi
-  else
+  if [ "${TYPE}" = "rc" ]; then
     sed \
       -i"" \
-      -e "s,^URIs: .*,URIs: ${bintray_base_url}/,g" \
+      -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
       /etc/apt/sources.list.d/apache-arrow.sources
   fi
 fi
diff --git a/dev/tasks/linux-packages/Rakefile b/dev/tasks/linux-packages/Rakefile
index e45a56c8bb1..a84a43ae517 100644
--- a/dev/tasks/linux-packages/Rakefile
+++ b/dev/tasks/linux-packages/Rakefile
@@ -24,7 +24,7 @@ require_relative "helper"
 
 packages = [
   "apache-arrow",
-  "apache-arrow-archive-keyring",
+  "apache-arrow-apt-source",
   "apache-arrow-release",
 ]
 
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/Rakefile b/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
similarity index 92%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/Rakefile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
index 0f91e0a5eb1..210fa951ee4 100644
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/Rakefile
@@ -20,12 +20,12 @@
 require_relative "../helper"
 require_relative "../package-task"
 
-class ApacheArrowArchiveKeyringPackageTask < PackageTask
+class ApacheArrowAptSourcePackageTask < PackageTask
   include Helper::ApacheArrow
 
   def initialize
     release_time = detect_release_time
-    super("apache-arrow-archive-keyring",
+    super("apache-arrow-apt-source",
           detect_version(release_time),
           release_time,
           :rc_build_type => :release)
@@ -60,5 +60,5 @@ class ApacheArrowArchiveKeyringPackageTask < PackageTask
   end
 end
 
-task = ApacheArrowArchiveKeyringPackageTask.new
+task = ApacheArrowAptSourcePackageTask.new
 task.define
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-bullseye/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/debian-buster/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-buster/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-bionic/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-bionic/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-focal/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-groovy/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-groovy/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-groovy/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-xenial/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-xenial/Dockerfile
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/apt/ubuntu-xenial/Dockerfile
rename to dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-xenial/Dockerfile
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/apache-arrow-archive-keyring.install
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/compat b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/compat
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
similarity index 77%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
index 3855234a1bb..f54d52f98a2 100644
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/control
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/control
@@ -1,4 +1,4 @@
-Source: apache-arrow-archive-keyring
+Source: apache-arrow-apt-source
 Section: misc
 Priority: important
 Maintainer: Apache Arrow Developers <dev@arrow.apache.org>
@@ -9,9 +9,11 @@ Build-Depends:
 Standards-Version: 3.9.7
 Homepage: https://arrow.apache.org/
 
-Package: apache-arrow-archive-keyring
+Package: apache-arrow-apt-source
 Section: misc
 Architecture: all
+Replaces: apache-arrow-archive-keyring
+Breaks: apache-arrow-archive-keyring
 Depends:
   ${misc:Depends},
   apt-transport-https,
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/copyright b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/copyright
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/copyright
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
similarity index 75%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
index ec0e386b130..bf7a85c8c8b 100755
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules
@@ -12,22 +12,22 @@ export DH_OPTIONS
 override_dh_auto_build:
 	gpg \
 	  --no-default-keyring \
-	  --keyring ./apache-arrow-archive-keyring.gpg \
+	  --keyring ./apache-arrow-apt-source.gpg \
 	  --import KEYS
 
 	( \
 	  distribution=$$(lsb_release --id --short | tr 'A-Z' 'a-z'); \
 	  code_name=$$(lsb_release --codename --short); \
 	  echo "Types: deb deb-src"; \
-	  echo "URIs: https://apache.bintray.com/arrow/$${distribution}/"; \
+	  echo "URIs: https://apache.jfrog.io/artifactory/arrow/$${distribution}/"; \
 	  echo "Suites: $${code_name}"; \
 	  echo "Components: main"; \
-	  echo "Signed-By: /usr/share/keyrings/apache-arrow-archive-keyring.gpg"; \
+	  echo "Signed-By: /usr/share/keyrings/apache-arrow-apt-source.gpg"; \
 	) > apache-arrow.sources
 
 override_dh_install:
 	install -d debian/tmp/usr/share/keyrings/
-	install -m 0644 apache-arrow-archive-keyring.gpg \
+	install -m 0644 apache-arrow-apt-source.gpg \
 	  debian/tmp/usr/share/keyrings/
 
 	install -d debian/tmp/etc/apt/sources.list.d/
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/source/format b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/source/format
rename to dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format
diff --git a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog b/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog
deleted file mode 100644
index 22fba76301e..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-archive-keyring/debian/changelog
+++ /dev/null
@@ -1,29 +0,0 @@
-apache-arrow-archive-keyring (3.0.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Mon, 18 Jan 2021 21:33:18 -0000
-
-apache-arrow-archive-keyring (2.0.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Mon, 12 Oct 2020 23:38:01 -0000
-
-apache-arrow-archive-keyring (1.0.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Mon, 20 Jul 2020 20:41:07 -0000
-
-apache-arrow-archive-keyring (0.17.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Thu, 16 Apr 2020 12:05:43 -0000
-
-apache-arrow-archive-keyring (0.16.0-1) unstable; urgency=low
-
-  * New upstream release.
-
- -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Thu, 30 Jan 2020 20:21:44 -0000
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index aba604161d8..cf8bed3f91f 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -95,7 +95,7 @@ script:
   - rake version:update
   - |
       rake docker:pull || :
-  - pushd apache-arrow-archive-keyring/apt
+  - pushd apache-arrow-apt-source/apt
   - |
       for target in debian-* ubuntu-*; do
         cp -a ${target} ${target}-arm64

From a865dc9f7a58528ca54938c228a954cdef41832e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 15 Apr 2021 04:33:18 +0900
Subject: [PATCH 032/719] ARROW-12375: [Release] Remove rebase post-release
 scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're going to release from a maintenance branch from now on, so we won't rebase neither the master branch nor the pull requests again.

Closes #10022 from kszucs/ARROW-12375

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/generate_force_push_script.py | 61 -----------------------
 dev/release/post-00-rebase.sh             | 42 ----------------
 2 files changed, 103 deletions(-)
 delete mode 100755 dev/release/generate_force_push_script.py
 delete mode 100755 dev/release/post-00-rebase.sh

diff --git a/dev/release/generate_force_push_script.py b/dev/release/generate_force_push_script.py
deleted file mode 100755
index b6cd760bc60..00000000000
--- a/dev/release/generate_force_push_script.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/python
-##############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-##############################################################################
-
-# This script generates a series of shell commands
-# to rebase all open pull requests off of master
-# and force push the updates.
-
-from http.client import HTTPSConnection
-import json
-from collections import defaultdict
-
-client = HTTPSConnection('api.github.com')
-client.request('GET',
-               '/repos/apache/arrow/pulls?state=open&per_page=100',
-               headers={'User-Agent': 'ApacheArrowRebaser'})
-response = client.getresponse()
-json_content = response.read()
-if response.status != 200:
-    error_msg = 'GitHub connection error:{}'.format(json_content)
-    raise Exception(error_msg)
-
-parsed_content = json.loads(json_content)
-if len(parsed_content) == 100:
-    print("# WARNING: Only the most recent 100 PRs will be processed")
-
-repos = defaultdict(list)
-for pr in parsed_content:
-    head = pr['head']
-    repos[head['repo']['full_name']].append(head['label'])
-
-for repo, labels in repos.items():
-    print('git clone git@github.com:{}.git'.format(repo))
-    print('cd arrow')
-    print('git remote add upstream https://github.com/apache/arrow.git')
-    print('git fetch --all --prune --tags --force')
-    for label in labels:
-        # Labels are in the form 'user:branch'
-        owner, branch = label.split(':')
-        print('git checkout {}'.format(branch))
-        print('(git rebase upstream/master && git push --force) || ' +
-              '(echo "Rebase failed for {}" && '.format(label) +
-              'git rebase --abort)')
-    print('cd ..')
-    print('rm -rf arrow')
diff --git a/dev/release/post-00-rebase.sh b/dev/release/post-00-rebase.sh
deleted file mode 100755
index c80ce24c2e3..00000000000
--- a/dev/release/post-00-rebase.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-set -e
-set -u
-
-if [ "$#" -ne 1 ]; then
-  echo "Usage: $0 <local-release-branch>"
-  exit
-fi
-
-local_release_branch=$1
-
-echo "Fetch the latest commits"
-git fetch --all --prune
-echo "Checkout the master branch"
-git checkout master
-echo "Apply the latest commits on the master branch"
-git rebase apache/master
-echo "Apply the unpushed commits on the local release branch"
-git rebase ${local_release_branch}
-echo "Push the rebased branch to master"
-git push --force apache master
-
-echo "Success! The rebased commits are available here:"
-echo "  https://github.com/apache/arrow/commits/master"

From ccdbbe3b765d4df9b2b91270a69ad78a086f2d77 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 15 Apr 2021 04:54:28 +0900
Subject: [PATCH 033/719] ARROW-12384: [JS] Use let/const and clean up eslint
 rules

Closes #10030 from domoritz/fix-style

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/.eslintrc.js                |  29 +++----
 js/src/bin/arrow2csv.ts        |   6 +-
 js/src/builder.ts              |   4 +-
 js/src/builder/dictionary.ts   |   4 +-
 js/src/builder/valid.ts        |   2 +-
 js/src/column.ts               |   2 +-
 js/src/compute/dataframe.ts    |   4 +-
 js/src/data.ts                 |   5 +-
 js/src/fb/File.ts              |  18 ++---
 js/src/fb/Message.ts           |  42 +++++------
 js/src/fb/Schema.ts            | 134 ++++++++++++++++-----------------
 js/src/io/adapters.ts          |  10 +--
 js/src/io/file.ts              |   4 +-
 js/src/io/node/iterable.ts     |   6 +-
 js/src/io/stream.ts            |   3 +-
 js/src/ipc/metadata/json.ts    |   2 +
 js/src/ipc/metadata/message.ts |   7 +-
 js/src/ipc/reader.ts           |   6 +-
 js/src/recordbatch.ts          |   2 +-
 js/src/table.ts                |   6 +-
 js/src/type.ts                 |   8 +-
 js/src/util/args.ts            |  20 +++--
 js/src/util/bit.ts             |   4 +-
 js/src/util/bn.ts              |  15 ++--
 js/src/util/buffer.ts          |  12 +--
 js/src/util/int.ts             |  24 +++---
 js/src/util/math.ts            |   8 +-
 js/src/util/recordbatch.ts     |   9 ++-
 js/src/util/vector.ts          |   6 +-
 js/src/vector/chunked.ts       |  22 +++---
 js/src/vector/float.ts         |   6 +-
 js/src/vector/int.ts           |   6 +-
 js/src/vector/row.ts           |   3 +-
 33 files changed, 231 insertions(+), 208 deletions(-)

diff --git a/js/.eslintrc.js b/js/.eslintrc.js
index 7141f1b02d0..6d5020db10b 100644
--- a/js/.eslintrc.js
+++ b/js/.eslintrc.js
@@ -35,7 +35,6 @@ module.exports = {
         "plugin:@typescript-eslint/recommended",
     ],
     rules: {
-        "@typescript-eslint/indent": "off",
         "@typescript-eslint/member-delimiter-style": [
             "error",
             {
@@ -50,9 +49,6 @@ module.exports = {
             },
         ],
         "@typescript-eslint/no-namespace": ["error", { "allowDeclarations": true }],
-        "@typescript-eslint/no-empty-function": "off",
-        "@typescript-eslint/no-unused-expressions": "off",
-        "@typescript-eslint/no-use-before-define": "off",
         "@typescript-eslint/no-require-imports": "error",
         "@typescript-eslint/no-var-requires": "off",  // handled by rule above
         "@typescript-eslint/quotes": [
@@ -65,6 +61,10 @@ module.exports = {
         ],
         "@typescript-eslint/semi": ["error", "always"],
         "@typescript-eslint/type-annotation-spacing": "error",
+        "@typescript-eslint/indent": "off",
+        "@typescript-eslint/no-empty-function": "off",
+        "@typescript-eslint/no-unused-expressions": "off",
+        "@typescript-eslint/no-use-before-define": "off",
         "@typescript-eslint/explicit-module-boundary-types": "off",
         "@typescript-eslint/no-explicit-any": "off",
         "@typescript-eslint/no-misused-new": "off",
@@ -72,25 +72,16 @@ module.exports = {
         "@typescript-eslint/no-non-null-assertion": "off",
         "@typescript-eslint/no-unused-vars": "off",  // ts already takes care of this
 
-        "brace-style": "off",
+        "prefer-const": ["error", {
+            "destructuring": "all"
+        }],
         "curly": ["error", "multi-line"],
+        "brace-style": ["error", "1tbs", { "allowSingleLine": true }],
         "eol-last": "error",
-        "no-empty": "off",
         "no-multiple-empty-lines": "error",
         "no-trailing-spaces": "error",
         "no-var": "error",
-
-        "no-cond-assign": "off",
-
-        // rules for later:
-
-        "prefer-const": ["off"],
-        // "prefer-const": ["error", {
-        //     "destructuring": "all"
-        // }],
-
-        // "one-var": ["error", "never"],
-
-        // "brace-style": ["error", "1tbs", { "allowSingleLine": true }],
+        "no-empty": "off",
+        "no-cond-assign": "off"
     },
 };
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index dd7236eb923..064b6ee5934 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -89,9 +89,9 @@ function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts
 
 async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStream) {
 
-    let json = new AsyncByteQueue();
-    let stream = new AsyncByteQueue();
-    let source = createSourceStream();
+    const json = new AsyncByteQueue();
+    const stream = new AsyncByteQueue();
+    const source = createSourceStream();
     let reader: RecordBatchReader | null = null;
     let readers: AsyncIterable<RecordBatchReader> | null = null;
     // tee the input source, just in case it's JSON
diff --git a/js/src/builder.ts b/js/src/builder.ts
index 5b7da80a7a8..86db953065a 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -492,7 +492,7 @@ function throughIterable<T extends DataType = any, TNull = any>(options: Iterabl
     const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength';
     return function*(source: Iterable<T['TValue'] | TNull>) {
         let numChunks = 0;
-        let builder = Builder.new(options);
+        const builder = Builder.new(options);
         for (const value of source) {
             if (builder.append(value)[sizeProperty] >= highWaterMark) {
                 ++numChunks && (yield builder.toVector());
@@ -514,7 +514,7 @@ function throughAsyncIterable<T extends DataType = any, TNull = any>(options: It
     const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength';
     return async function* (source: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull>) {
         let numChunks = 0;
-        let builder = Builder.new(options);
+        const builder = Builder.new(options);
         for await (const value of source) {
             if (builder.append(value)[sizeProperty] >= highWaterMark) {
                 ++numChunks && (yield builder.toVector());
diff --git a/js/src/builder/dictionary.ts b/js/src/builder/dictionary.ts
index dda2df2c11a..6602825dd16 100644
--- a/js/src/builder/dictionary.ts
+++ b/js/src/builder/dictionary.ts
@@ -61,8 +61,8 @@ export class DictionaryBuilder<T extends Dictionary, TNull = any> extends Builde
         return valid;
     }
     public setValue(index: number, value: T['TValue']) {
-        let keysToIndices = this._keysToIndices;
-        let key = this.valueToKey(value);
+        const keysToIndices = this._keysToIndices;
+        const key = this.valueToKey(value);
         let idx = keysToIndices[key];
         if (idx === undefined) {
             keysToIndices[key] = idx = this._dictionaryOffset + this.dictionary.append(value).length - 1;
diff --git a/js/src/builder/valid.ts b/js/src/builder/valid.ts
index c07144610db..ae5b799fb06 100644
--- a/js/src/builder/valid.ts
+++ b/js/src/builder/valid.ts
@@ -47,7 +47,7 @@ export function createIsValidFunction<T extends DataType = any, TNull = any>(nul
     }
 
     let fnBody = '';
-    let noNaNs = nullValues.filter((x) => x === x);
+    const noNaNs = nullValues.filter((x) => x === x);
 
     if (noNaNs.length > 0) {
         fnBody = `
diff --git a/js/src/column.ts b/js/src/column.ts
index 0336e884c7a..48b40e5a1b3 100644
--- a/js/src/column.ts
+++ b/js/src/column.ts
@@ -92,7 +92,7 @@ export class Column<T extends DataType = any>
 
         if (index < 0 || index >= this.numChildren) { return null; }
 
-        let columns = this._children || (this._children = []);
+        const columns = this._children || (this._children = []);
         let column: Column<R>, field: Field<R>, chunks: Vector<R>[];
 
         if (column = columns[index]) { return column; }
diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts
index 4120e386159..ecebce09394 100644
--- a/js/src/compute/dataframe.ts
+++ b/js/src/compute/dataframe.ts
@@ -86,7 +86,7 @@ export class DataFrame<T extends { [key: string]: DataType } = any> extends Tabl
             const keys = (count_by.vector as V<Dictionary>).indices;
             // yield all indices
             for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                let key = keys.get(index);
+                const key = keys.get(index);
                 if (key !== null) { counts[key]++; }
             }
         }
@@ -274,7 +274,7 @@ export class FilteredDataFrame<T extends { [key: string]: DataType } = any> exte
             const keys = (count_by.vector as V<Dictionary>).indices;
             // yield all indices
             for (let index = -1, numRows = batch.length; ++index < numRows;) {
-                let key = keys.get(index);
+                const key = keys.get(index);
                 if (key !== null && predicate(index, batch)) { counts[key]++; }
             }
         }
diff --git a/js/src/data.ts b/js/src/data.ts
index 097a39012c9..2a549088c65 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -82,7 +82,7 @@ export class Data<T extends DataType = DataType> {
     }
     public get byteLength(): number {
         let byteLength = 0;
-        let { valueOffsets, values, nullBitmap, typeIds } = this;
+        const { valueOffsets, values, nullBitmap, typeIds } = this;
         valueOffsets && (byteLength += valueOffsets.byteLength);
         values       && (byteLength += values.byteLength);
         nullBitmap   && (byteLength += nullBitmap.byteLength);
@@ -162,7 +162,8 @@ export class Data<T extends DataType = DataType> {
     }
 
     protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
-        let arr: any, { buffers } = this;
+        let arr: any;
+        const { buffers } = this;
         // If typeIds exist, slice the typeIds buffer
         (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length));
         // If offsets exist, only slice the offsets buffer
diff --git a/js/src/fb/File.ts b/js/src/fb/File.ts
index a82437b187e..5746dd183a5 100644
--- a/js/src/fb/File.ts
+++ b/js/src/fb/File.ts
@@ -47,7 +47,7 @@ export class Footer {
      * @returns MetadataVersion
      */
     version(): NS13596923344997147894.MetadataVersion {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1;
     }
 
@@ -56,7 +56,7 @@ export class Footer {
      * @returns Schema|null
      */
     schema(obj?: NS13596923344997147894.Schema): NS13596923344997147894.Schema | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new NS13596923344997147894.Schema()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -66,7 +66,7 @@ export class Footer {
      * @returns Block
      */
     dictionaries(index: number, obj?: Block): Block | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null;
     }
 
@@ -74,7 +74,7 @@ export class Footer {
      * @returns number
      */
     dictionariesLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -84,7 +84,7 @@ export class Footer {
      * @returns Block
      */
     recordBatches(index: number, obj?: Block): Block | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? (obj || new Block()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 24, this.bb!) : null;
     }
 
@@ -92,7 +92,7 @@ export class Footer {
      * @returns number
      */
     recordBatchesLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -104,7 +104,7 @@ export class Footer {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -112,7 +112,7 @@ export class Footer {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -205,7 +205,7 @@ export class Footer {
      * @returns flatbuffers.Offset
      */
     static endFooter(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
diff --git a/js/src/fb/Message.ts b/js/src/fb/Message.ts
index da240d96bf3..973eb042534 100644
--- a/js/src/fb/Message.ts
+++ b/js/src/fb/Message.ts
@@ -161,7 +161,7 @@ export class BodyCompression {
      * @returns CompressionType
      */
     codec(): CompressionType {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt8(this.bb_pos + offset)) : CompressionType.LZ4_FRAME;
     }
 
@@ -171,7 +171,7 @@ export class BodyCompression {
      * @returns BodyCompressionMethod
      */
     method(): BodyCompressionMethod {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? /**  */ (this.bb!.readInt8(this.bb_pos + offset)) : BodyCompressionMethod.BUFFER;
     }
 
@@ -203,7 +203,7 @@ export class BodyCompression {
      * @returns flatbuffers.Offset
      */
     static endBodyCompression(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -262,7 +262,7 @@ export class RecordBatch {
      * @returns flatbuffers.Long
      */
     length(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -274,7 +274,7 @@ export class RecordBatch {
      * @returns FieldNode
      */
     nodes(index: number, obj?: FieldNode): FieldNode | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new FieldNode()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null;
     }
 
@@ -282,7 +282,7 @@ export class RecordBatch {
      * @returns number
      */
     nodesLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -299,7 +299,7 @@ export class RecordBatch {
      * @returns Buffer
      */
     buffers(index: number, obj?: NS13596923344997147894.Buffer): NS13596923344997147894.Buffer | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? (obj || new NS13596923344997147894.Buffer()).__init(this.bb!.__vector(this.bb_pos + offset) + index * 16, this.bb!) : null;
     }
 
@@ -307,7 +307,7 @@ export class RecordBatch {
      * @returns number
      */
     buffersLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -318,7 +318,7 @@ export class RecordBatch {
      * @returns BodyCompression|null
      */
     compression(obj?: BodyCompression): BodyCompression | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? (obj || new BodyCompression()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -382,7 +382,7 @@ export class RecordBatch {
      * @returns flatbuffers.Offset
      */
     static endRecordBatch(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -443,7 +443,7 @@ export class DictionaryBatch {
      * @returns flatbuffers.Long
      */
     id(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -452,7 +452,7 @@ export class DictionaryBatch {
      * @returns RecordBatch|null
      */
     data(obj?: RecordBatch): RecordBatch | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new RecordBatch()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -464,7 +464,7 @@ export class DictionaryBatch {
      * @returns boolean
      */
     isDelta(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -504,7 +504,7 @@ export class DictionaryBatch {
      * @returns flatbuffers.Offset
      */
     static endDictionaryBatch(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -557,7 +557,7 @@ export class Message {
      * @returns MetadataVersion
      */
     version(): NS13596923344997147894.MetadataVersion {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1;
     }
 
@@ -565,7 +565,7 @@ export class Message {
      * @returns MessageHeader
      */
     headerType(): MessageHeader {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? /**  */ (this.bb!.readUint8(this.bb_pos + offset)) : MessageHeader.NONE;
     }
 
@@ -574,7 +574,7 @@ export class Message {
      * @returns ?flatbuffers.Table
      */
     header<T extends flatbuffers.Table>(obj: T): T | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null;
     }
 
@@ -582,7 +582,7 @@ export class Message {
      * @returns flatbuffers.Long
      */
     bodyLength(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -592,7 +592,7 @@ export class Message {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -600,7 +600,7 @@ export class Message {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -677,7 +677,7 @@ export class Message {
      * @returns flatbuffers.Offset
      */
     static endMessage(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
diff --git a/js/src/fb/Schema.ts b/js/src/fb/Schema.ts
index 64014c9e356..f675bc2a062 100644
--- a/js/src/fb/Schema.ts
+++ b/js/src/fb/Schema.ts
@@ -230,7 +230,7 @@ export class Null {
      * @returns flatbuffers.Offset
      */
     static endNull(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -292,7 +292,7 @@ export class Struct_ {
      * @returns flatbuffers.Offset
      */
     static endStruct_(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -350,7 +350,7 @@ export class List {
      * @returns flatbuffers.Offset
      */
     static endList(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -411,7 +411,7 @@ export class LargeList {
      * @returns flatbuffers.Offset
      */
     static endLargeList(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -463,7 +463,7 @@ export class FixedSizeList {
      * @returns number
      */
     listSize(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -487,7 +487,7 @@ export class FixedSizeList {
      * @returns flatbuffers.Offset
      */
     static endFixedSizeList(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -565,7 +565,7 @@ export class Map {
      * @returns boolean
      */
     keysSorted(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -589,7 +589,7 @@ export class Map {
      * @returns flatbuffers.Offset
      */
     static endMap(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -645,7 +645,7 @@ export class Union {
      * @returns UnionMode
      */
     mode(): UnionMode {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : UnionMode.Sparse;
     }
 
@@ -654,7 +654,7 @@ export class Union {
      * @returns number
      */
     typeIds(index: number): number | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.readInt32(this.bb!.__vector(this.bb_pos + offset) + index * 4) : 0;
     }
 
@@ -662,7 +662,7 @@ export class Union {
      * @returns number
      */
     typeIdsLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -670,7 +670,7 @@ export class Union {
      * @returns Int32Array
      */
     typeIdsArray(): Int32Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? new Int32Array(this.bb!.bytes().buffer, this.bb!.bytes().byteOffset + this.bb!.__vector(this.bb_pos + offset), this.bb!.__vector_len(this.bb_pos + offset)) : null;
     }
 
@@ -723,7 +723,7 @@ export class Union {
      * @returns flatbuffers.Offset
      */
     static endUnion(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -775,7 +775,7 @@ export class Int {
      * @returns number
      */
     bitWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -783,7 +783,7 @@ export class Int {
      * @returns boolean
      */
     isSigned(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -815,7 +815,7 @@ export class Int {
      * @returns flatbuffers.Offset
      */
     static endInt(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -867,7 +867,7 @@ export class FloatingPoint {
      * @returns Precision
      */
     precision(): Precision {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : Precision.HALF;
     }
 
@@ -891,7 +891,7 @@ export class FloatingPoint {
      * @returns flatbuffers.Offset
      */
     static endFloatingPoint(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -952,7 +952,7 @@ export class Utf8 {
      * @returns flatbuffers.Offset
      */
     static endUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1012,7 +1012,7 @@ export class Binary {
      * @returns flatbuffers.Offset
      */
     static endBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1073,7 +1073,7 @@ export class LargeUtf8 {
      * @returns flatbuffers.Offset
      */
     static endLargeUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1134,7 +1134,7 @@ export class LargeBinary {
      * @returns flatbuffers.Offset
      */
     static endLargeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1186,7 +1186,7 @@ export class FixedSizeBinary {
      * @returns number
      */
     byteWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -1210,7 +1210,7 @@ export class FixedSizeBinary {
      * @returns flatbuffers.Offset
      */
     static endFixedSizeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1269,7 +1269,7 @@ export class Bool {
      * @returns flatbuffers.Offset
      */
     static endBool(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1326,7 +1326,7 @@ export class Decimal {
      * @returns number
      */
     precision(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -1336,7 +1336,7 @@ export class Decimal {
      * @returns number
      */
     scale(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 0;
     }
 
@@ -1347,7 +1347,7 @@ export class Decimal {
      * @returns number
      */
     bitWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 128;
     }
 
@@ -1387,7 +1387,7 @@ export class Decimal {
      * @returns flatbuffers.Offset
      */
     static endDecimal(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1447,7 +1447,7 @@ export class Date {
      * @returns DateUnit
      */
     unit(): DateUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : DateUnit.MILLISECOND;
     }
 
@@ -1471,7 +1471,7 @@ export class Date {
      * @returns flatbuffers.Offset
      */
     static endDate(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1526,7 +1526,7 @@ export class Time {
      * @returns TimeUnit
      */
     unit(): TimeUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND;
     }
 
@@ -1534,7 +1534,7 @@ export class Time {
      * @returns number
      */
     bitWidth(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.readInt32(this.bb_pos + offset) : 32;
     }
 
@@ -1566,7 +1566,7 @@ export class Time {
      * @returns flatbuffers.Offset
      */
     static endTime(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1625,7 +1625,7 @@ export class Timestamp {
      * @returns TimeUnit
      */
     unit(): TimeUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.SECOND;
     }
 
@@ -1657,7 +1657,7 @@ export class Timestamp {
     timezone(): string | null;
     timezone(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     timezone(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -1689,7 +1689,7 @@ export class Timestamp {
      * @returns flatbuffers.Offset
      */
     static endTimestamp(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1741,7 +1741,7 @@ export class Interval {
      * @returns IntervalUnit
      */
     unit(): IntervalUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : IntervalUnit.YEAR_MONTH;
     }
 
@@ -1765,7 +1765,7 @@ export class Interval {
      * @returns flatbuffers.Offset
      */
     static endInterval(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1816,7 +1816,7 @@ export class Duration {
      * @returns TimeUnit
      */
     unit(): TimeUnit {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND;
     }
 
@@ -1840,7 +1840,7 @@ export class Duration {
      * @returns flatbuffers.Offset
      */
     static endDuration(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1898,7 +1898,7 @@ export class KeyValue {
     key(): string | null;
     key(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     key(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -1909,7 +1909,7 @@ export class KeyValue {
     value(): string | null;
     value(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     value(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -1941,7 +1941,7 @@ export class KeyValue {
      * @returns flatbuffers.Offset
      */
     static endKeyValue(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -1997,7 +1997,7 @@ export class DictionaryEncoding {
      * @returns flatbuffers.Long
      */
     id(): flatbuffers.Long {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.readInt64(this.bb_pos + offset) : this.bb!.createLong(0, 0);
     }
 
@@ -2012,7 +2012,7 @@ export class DictionaryEncoding {
      * @returns Int|null
      */
     indexType(obj?: Int): Int | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new Int()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -2025,7 +2025,7 @@ export class DictionaryEncoding {
      * @returns boolean
      */
     isOrdered(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -2033,7 +2033,7 @@ export class DictionaryEncoding {
      * @returns DictionaryKind
      */
     dictionaryKind(): DictionaryKind {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : DictionaryKind.DenseArray;
     }
 
@@ -2081,7 +2081,7 @@ export class DictionaryEncoding {
      * @returns flatbuffers.Offset
      */
     static endDictionaryEncoding(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -2144,7 +2144,7 @@ export class Field {
     name(): string | null;
     name(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
     name(optionalEncoding?: any): string | Uint8Array | null {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? this.bb!.__string(this.bb_pos + offset, optionalEncoding) : null;
     }
 
@@ -2154,7 +2154,7 @@ export class Field {
      * @returns boolean
      */
     nullable(): boolean {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? !!this.bb!.readInt8(this.bb_pos + offset) : false;
     }
 
@@ -2162,7 +2162,7 @@ export class Field {
      * @returns Type
      */
     typeType(): Type {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? /**  */ (this.bb!.readUint8(this.bb_pos + offset)) : Type.NONE;
     }
 
@@ -2173,7 +2173,7 @@ export class Field {
      * @returns ?flatbuffers.Table
      */
     type<T extends flatbuffers.Table>(obj: T): T | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.__union(obj, this.bb_pos + offset) : null;
     }
 
@@ -2184,7 +2184,7 @@ export class Field {
      * @returns DictionaryEncoding|null
      */
     dictionary(obj?: DictionaryEncoding): DictionaryEncoding | null {
-        let offset = this.bb!.__offset(this.bb_pos, 12);
+        const offset = this.bb!.__offset(this.bb_pos, 12);
         return offset ? (obj || new DictionaryEncoding()).__init(this.bb!.__indirect(this.bb_pos + offset), this.bb!) : null;
     }
 
@@ -2197,7 +2197,7 @@ export class Field {
      * @returns Field
      */
     children(index: number, obj?: Field): Field | null {
-        let offset = this.bb!.__offset(this.bb_pos, 14);
+        const offset = this.bb!.__offset(this.bb_pos, 14);
         return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2205,7 +2205,7 @@ export class Field {
      * @returns number
      */
     childrenLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 14);
+        const offset = this.bb!.__offset(this.bb_pos, 14);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2217,7 +2217,7 @@ export class Field {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: KeyValue): KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 16);
+        const offset = this.bb!.__offset(this.bb_pos, 16);
         return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2225,7 +2225,7 @@ export class Field {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 16);
+        const offset = this.bb!.__offset(this.bb_pos, 16);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2339,7 +2339,7 @@ export class Field {
      * @returns flatbuffers.Offset
      */
     static endField(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
@@ -2461,7 +2461,7 @@ export class Schema {
      * @returns Endianness
      */
     endianness(): Endianness {
-        let offset = this.bb!.__offset(this.bb_pos, 4);
+        const offset = this.bb!.__offset(this.bb_pos, 4);
         return offset ? /**  */ (this.bb!.readInt16(this.bb_pos + offset)) : Endianness.Little;
     }
 
@@ -2471,7 +2471,7 @@ export class Schema {
      * @returns Field
      */
     fields(index: number, obj?: Field): Field | null {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? (obj || new Field()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2479,7 +2479,7 @@ export class Schema {
      * @returns number
      */
     fieldsLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 6);
+        const offset = this.bb!.__offset(this.bb_pos, 6);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2489,7 +2489,7 @@ export class Schema {
      * @returns KeyValue
      */
     customMetadata(index: number, obj?: KeyValue): KeyValue | null {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? (obj || new KeyValue()).__init(this.bb!.__indirect(this.bb!.__vector(this.bb_pos + offset) + index * 4), this.bb!) : null;
     }
 
@@ -2497,7 +2497,7 @@ export class Schema {
      * @returns number
      */
     customMetadataLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 8);
+        const offset = this.bb!.__offset(this.bb_pos, 8);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2508,7 +2508,7 @@ export class Schema {
      * @returns flatbuffers.Long
      */
     features(index: number): flatbuffers.Long | null {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? /**  */ (this.bb!.readInt64(this.bb!.__vector(this.bb_pos + offset) + index * 8)) : this.bb!.createLong(0, 0);
     }
 
@@ -2516,7 +2516,7 @@ export class Schema {
      * @returns number
      */
     featuresLength(): number {
-        let offset = this.bb!.__offset(this.bb_pos, 10);
+        const offset = this.bb!.__offset(this.bb_pos, 10);
         return offset ? this.bb!.__vector_len(this.bb_pos + offset) : 0;
     }
 
@@ -2627,7 +2627,7 @@ export class Schema {
      * @returns flatbuffers.Offset
      */
     static endSchema(builder: flatbuffers.Builder): flatbuffers.Offset {
-        let offset = builder.endObject();
+        const offset = builder.endObject();
         return offset;
     }
 
diff --git a/js/src/io/adapters.ts b/js/src/io/adapters.ts
index df0c632ba81..a83346ef74f 100644
--- a/js/src/io/adapters.ts
+++ b/js/src/io/adapters.ts
@@ -75,7 +75,7 @@ function* fromIterable<T extends ArrayBufferViewInput>(source: Iterable<T> | T):
     ({ cmd, size } = yield <any> null);
 
     // initialize the iterator
-    let it = toUint8ArrayIterator(source)[Symbol.iterator]();
+    const it = toUint8ArrayIterator(source)[Symbol.iterator]();
 
     try {
         do {
@@ -121,7 +121,7 @@ async function* fromAsyncIterable<T extends ArrayBufferViewInput>(source: AsyncI
     ({ cmd, size } = (yield <any> null)!);
 
     // initialize the iterator
-    let it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator]();
+    const it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator]();
 
     try {
         do {
@@ -171,7 +171,7 @@ async function* fromDOMStream<T extends ArrayBufferViewInput>(source: ReadableSt
     ({ cmd, size } = yield <any> null);
 
     // initialize the reader and lock the stream
-    let it = new AdaptiveByteReader(source);
+    const it = new AdaptiveByteReader(source);
 
     try {
         do {
@@ -297,7 +297,7 @@ type EventName = 'end' | 'error' | 'readable';
 type Event = [EventName, (_: any) => void, Promise<[EventName, Error | null]>];
 /** @ignore */
 const onEvent = <T extends string>(stream: NodeJS.ReadableStream, event: T) => {
-    let handler = (_: any) => resolve([event, _]);
+    const handler = (_: any) => resolve([event, _]);
     let resolve: (value?: [T, any] | PromiseLike<[T, any]>) => void;
     return [event, handler, new Promise<[T, any]>(
         (r) => (resolve = r) && stream['once'](event, handler)
@@ -307,7 +307,7 @@ const onEvent = <T extends string>(stream: NodeJS.ReadableStream, event: T) => {
 /** @ignore */
 async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGenerator {
 
-    let events: Event[] = [];
+    const events: Event[] = [];
     let event: EventName = 'error';
     let done = false, err: Error | null = null;
     let cmd: 'peek' | 'read', size: number, bufferLength = 0;
diff --git a/js/src/io/file.ts b/js/src/io/file.ts
index 1c0661c105b..20b7dbf02df 100644
--- a/js/src/io/file.ts
+++ b/js/src/io/file.ts
@@ -90,8 +90,8 @@ export class AsyncRandomAccessFile extends AsyncByteStream {
         if (file && position < size) {
             if (typeof nBytes !== 'number') { nBytes = Infinity; }
             let pos = position, offset = 0, bytesRead = 0;
-            let end = Math.min(size, pos + Math.min(size - pos, nBytes));
-            let buffer = new Uint8Array(Math.max(0, (this.position = end) - pos));
+            const end = Math.min(size, pos + Math.min(size - pos, nBytes));
+            const buffer = new Uint8Array(Math.max(0, (this.position = end) - pos));
             while ((pos += bytesRead) < end && (offset += bytesRead) < buffer.byteLength) {
                 ({ bytesRead } = await file.read(buffer, offset, buffer.byteLength - offset, pos));
             }
diff --git a/js/src/io/node/iterable.ts b/js/src/io/node/iterable.ts
index b174d1eed44..8bf5ad72a0c 100644
--- a/js/src/io/node/iterable.ts
+++ b/js/src/io/node/iterable.ts
@@ -51,7 +51,8 @@ class IterableReadable<T extends Uint8Array | any> extends Readable {
         }
     }
     _destroy(e: Error | null, cb: (e: Error | null) => void) {
-        let it = this._iterator, fn: any;
+        const it = this._iterator;
+        let fn: any;
         it && (fn = e != null && it.throw || it.return);
         fn && fn.call(it, e);
         cb && cb(null);
@@ -90,7 +91,8 @@ class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
         }
     }
     _destroy(e: Error | null, cb: (e: Error | null) => void) {
-        let it = this._iterator, fn: any;
+        const it = this._iterator;
+        let fn: any;
         it && (fn = e != null && it.throw || it.return);
         fn && fn.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
     }
diff --git a/js/src/io/stream.ts b/js/src/io/stream.ts
index e25f27895c6..2384ab0b96f 100644
--- a/js/src/io/stream.ts
+++ b/js/src/io/stream.ts
@@ -49,7 +49,8 @@ export class AsyncByteQueue<T extends ArrayBufferViewInput = Uint8Array> extends
     public toUint8Array(sync?: false): Promise<Uint8Array>;
     public toUint8Array(sync = false) {
         return sync ? joinUint8Arrays(this._values as any[])[0] : (async () => {
-            let buffers = [], byteLength = 0;
+            const buffers = [];
+            let byteLength = 0;
             for await (const chunk of this) {
                 buffers.push(chunk);
                 byteLength += chunk.byteLength;
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index b8d7d35c40e..399615c31d4 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable brace-style */
+
 import { Schema, Field } from '../../schema';
 import {
     DataType, Dictionary, TimeBitWidth,
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index e6117716107..2ebb73e4c0f 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/* eslint-disable brace-style */
+
 import { flatbuffers } from 'flatbuffers';
 
 import {
@@ -90,7 +92,8 @@ export class Message<T extends MessageHeader = any> {
 
     /** @nocollapse */
     public static encode<T extends MessageHeader>(message: Message<T>) {
-        let b = new Builder(), headerOffset = -1;
+        const b = new Builder();
+        let headerOffset = -1;
         if (message.isSchema()) {
             headerOffset = Schema.encode(b, message.header() as Schema);
         } else if (message.isRecordBatch()) {
@@ -520,7 +523,7 @@ function encodeField(b: Builder, field: Field) {
     let typeOffset = -1;
     let dictionaryOffset = -1;
 
-    let type = field.type;
+    const type = field.type;
     let typeId: Type = <any> field.typeId;
 
     if (!DataType.isDictionary(type)) {
diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts
index 6b1abb0b22d..1ed634c12ec 100644
--- a/js/src/ipc/reader.ts
+++ b/js/src/ipc/reader.ts
@@ -413,7 +413,8 @@ class RecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> e
     }
     public next(): IteratorResult<RecordBatch<T>> {
         if (this.closed) { return ITERATOR_DONE; }
-        let message: Message | null, { _reader: reader } = this;
+        let message: Message | null;
+        const { _reader: reader } = this;
         while (message = this._readNextMessageAndValidate()) {
             if (message.isSchema()) {
                 this.reset(message.header());
@@ -487,7 +488,8 @@ class AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = a
     }
     public async next() {
         if (this.closed) { return ITERATOR_DONE; }
-        let message: Message | null, { _reader: reader } = this;
+        let message: Message | null;
+        const { _reader: reader } = this;
         while (message = await this._readNextMessageAndValidate()) {
             if (message.isSchema()) {
                 await this.reset(message.header());
diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts
index 78b2f6671f1..5463a387fae 100644
--- a/js/src/recordbatch.ts
+++ b/js/src/recordbatch.ts
@@ -70,7 +70,7 @@ export class RecordBatch<T extends { [key: string]: DataType } = any>
     constructor(schema: Schema<T>, data: Data<Struct<T>>, children?: Vector[]);
     constructor(...args: any[]) {
         let data: Data<Struct<T>>;
-        let schema = args[0] as Schema<T>;
+        const schema = args[0] as Schema<T>;
         let children: Vector[] | undefined;
         if (args[1] instanceof Data) {
             [, data, children] = (args as [any, Data<Struct<T>>, Vector<T[keyof T]>[]?]);
diff --git a/js/src/table.ts b/js/src/table.ts
index 8862fd652d5..23f02b0a207 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -76,7 +76,7 @@ export class Table<T extends { [key: string]: DataType } = any>
         if (!input) { return Table.empty(); }
 
         if (typeof input === 'object') {
-            let table = isIterable(input['values']) ? tableFromIterable<T, TNull>(input)
+            const table = isIterable(input['values']) ? tableFromIterable<T, TNull>(input)
                  : isAsyncIterable(input['values']) ? tableFromAsyncIterable<T, TNull>(input)
                                                     : null;
             if (table !== null) { return table; }
@@ -95,7 +95,7 @@ export class Table<T extends { [key: string]: DataType } = any>
             const schema = reader.schema;
             const batches: RecordBatch[] = [];
             if (schema) {
-                for await (let batch of reader) {
+                for await (const batch of reader) {
                     batches.push(batch);
                 }
                 return new Table<T>(schema, batches);
@@ -182,7 +182,7 @@ export class Table<T extends { [key: string]: DataType } = any>
 
         if (args[0] instanceof Schema) { schema = args.shift(); }
 
-        let chunks = selectArgs<RecordBatch<T>>(RecordBatch, args);
+        const chunks = selectArgs<RecordBatch<T>>(RecordBatch, args);
 
         if (!schema && !(schema = chunks[0] && chunks[0].schema)) {
             throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
diff --git a/js/src/type.ts b/js/src/type.ts
index 3920cf2303d..782b44a279c 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -459,9 +459,11 @@ class Union_<T extends Unions = Unions> extends DataType<T> {
         }, Object.create(null) as { [key: number]: number });
     }
     public get typeId() { return Type.Union as T; }
-    public toString() { return `${this[Symbol.toStringTag]}<${
+    public toString() {
+ return `${this[Symbol.toStringTag]}<${
         this.children.map((x) => `${x.type}`).join(` | `)
-    }>`; }
+    }>`;
+}
     protected static [Symbol.toStringTag] = ((proto: Union_) => {
         (<any> proto).mode = null;
         (<any> proto).typeIds = null;
@@ -596,7 +598,7 @@ export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16
 
 /** @ignore */
 export function strideForType(type: DataType) {
-    let t: any = type;
+    const t: any = type;
     switch (type.typeId) {
         case Type.Decimal: return 4;
         case Type.Timestamp: return 2;
diff --git a/js/src/util/args.ts b/js/src/util/args.ts
index ca6f6381a59..c9c9d111193 100644
--- a/js/src/util/args.ts
+++ b/js/src/util/args.ts
@@ -49,7 +49,8 @@ export const selectColumnChildrenArgs = <T extends Column>(Ctor: RecordBatchCtor
 /** @ignore */
 function _selectArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectArgs(Ctor, value, res, j).length;
@@ -61,7 +62,8 @@ function _selectArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
 /** @ignore */
 function _selectChunkArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectChunkArgs(Ctor, value, res, j).length;
@@ -75,7 +77,8 @@ function _selectChunkArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
 /** @ignore */
 function _selectVectorChildrenArgs<T extends Vector>(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectVectorChildrenArgs(Ctor, value, res, j).length;
@@ -89,7 +92,8 @@ function _selectVectorChildrenArgs<T extends Vector>(Ctor: RecordBatchCtor, vals
 /** @ignore */
 function _selectColumnChildrenArgs<T extends Column>(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) {
     let value: any, j = idx;
-    let i = -1, n = vals.length;
+    let i = -1;
+    const n = vals.length;
     while (++i < n) {
         if (isArray(value = vals[i])) {
             j = _selectColumnChildrenArgs(Ctor, value, res, j).length;
@@ -105,7 +109,8 @@ const toKeysAndValues = (xs: [any[], any[]], [k, v]: [any, any], i: number) => (
 
 /** @ignore */
 function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], ret: [Field<T[keyof T]>[], Vector<T[keyof T]>[]]): [Field<T[keyof T]>[], (T[keyof T] | Vector<T[keyof T]>)[]] {
-    let keys: any[], n: number;
+    let keys: any[];
+    let n: number;
     switch (n = vals.length) {
         case 0: return ret;
         case 1:
@@ -124,10 +129,11 @@ function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], re
 
     let fieldIndex = -1;
     let valueIndex = -1;
-    let idx = -1, len = vals.length;
+    let idx = -1;
+    const len = vals.length;
     let field: number | string | Field<T[keyof T]>;
     let val: Vector<T[keyof T]> | Data<T[keyof T]>;
-    let [fields, values] = ret as [Field<T[keyof T]>[], any[]];
+    const [fields, values] = ret as [Field<T[keyof T]>[], any[]];
 
     while (++idx < len) {
         val = vals[idx];
diff --git a/js/src/util/bit.ts b/js/src/util/bit.ts
index 4b0a0cd1e80..e4c3d267ecf 100644
--- a/js/src/util/bit.ts
+++ b/js/src/util/bit.ts
@@ -48,7 +48,7 @@ export function truncateBitmap(offset: number, length: number, bitmap: Uint8Arra
 
 /** @ignore */
 export function packBools(values: Iterable<any>) {
-    let xs: number[] = [];
+    const xs: number[] = [];
     let i = 0, bit = 0, byte = 0;
     for (const value of values) {
         value && (byte |= 1 << bit);
@@ -58,7 +58,7 @@ export function packBools(values: Iterable<any>) {
         }
     }
     if (i === 0 || bit > 0) { xs[i++] = byte; }
-    let b = new Uint8Array((xs.length + 7) & ~7);
+    const b = new Uint8Array((xs.length + 7) & ~7);
     b.set(xs);
     return b;
 }
diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts
index cb85cd41f71..7c71969a419 100644
--- a/js/src/util/bn.ts
+++ b/js/src/util/bn.ts
@@ -71,9 +71,11 @@ Object.assign(DecimalBigNum.prototype,  BigNum.prototype, { 'constructor': Decim
 
 /** @ignore */
 function bignumToNumber<T extends BN<BigNumArray>>(bn: T) {
-    let { buffer, byteOffset, length, 'signed': signed } = bn;
-    let words = new Int32Array(buffer, byteOffset, length);
-    let number = 0, i = 0, n = words.length, hi, lo;
+    const { buffer, byteOffset, length, 'signed': signed } = bn;
+    const words = new Int32Array(buffer, byteOffset, length);
+    let number = 0, i = 0;
+    const n = words.length;
+    let hi, lo;
     while (i < n) {
         lo = words[i++];
         hi = words[i++];
@@ -99,10 +101,11 @@ if (!BigIntAvailable) {
 /** @ignore */
 function decimalToString<T extends BN<BigNumArray>>(a: T) {
     let digits = '';
-    let base64 = new Uint32Array(2);
+    const base64 = new Uint32Array(2);
     let base32 = new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2);
-    let checks = new Uint32Array((base32 = new Uint16Array(base32).reverse()).buffer);
-    let i = -1, n = base32.length - 1;
+    const checks = new Uint32Array((base32 = new Uint16Array(base32).reverse()).buffer);
+    let i = -1;
+    const n = base32.length - 1;
     do {
         for (base64[0] = base32[i = 0]; i < n;) {
             base32[i++] = base64[1] = base64[0] / 10;
diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts
index dfdfefc5f2e..dde131eb5e2 100644
--- a/js/src/util/buffer.ts
+++ b/js/src/util/buffer.ts
@@ -27,7 +27,7 @@ const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBu
 
 /** @ignore */
 function collapseContiguousByteRanges(chunks: Uint8Array[]) {
-    let result = chunks[0] ? [chunks[0]] : [];
+    const result = chunks[0] ? [chunks[0]] : [];
     let xOffset: number, yOffset: number, xLen: number, yLen: number;
     for (let x, y, i = 0, j = 0, n = chunks.length; ++i < n;) {
         x = result[j];
@@ -63,10 +63,11 @@ export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Ui
     // collapse chunks that share the same underlying ArrayBuffer and whose byte ranges overlap,
     // to avoid unnecessarily copying the bytes to do this buffer join. This is a common case during
     // streaming, where we may be reading partial byte ranges out of the same underlying ArrayBuffer
-    let result = collapseContiguousByteRanges(chunks);
-    let byteLength = result.reduce((x, b) => x + b.byteLength, 0);
+    const result = collapseContiguousByteRanges(chunks);
+    const byteLength = result.reduce((x, b) => x + b.byteLength, 0);
     let source: Uint8Array, sliced: Uint8Array, buffer: Uint8Array | void;
-    let offset = 0, index = -1, length = Math.min(size || Infinity, byteLength);
+    let offset = 0, index = -1;
+    const length = Math.min(size || Infinity, byteLength);
     for (let n = result.length; ++index < n;) {
         source = result[index];
         sliced = source.subarray(0, Math.min(source.length, length - offset));
@@ -224,7 +225,8 @@ export function rebaseValueOffsets(offset: number, length: number, valueOffsets:
 
 /** @ignore */
 export function compareArrayLike<T extends ArrayLike<any>>(a: T, b: T) {
-    let i = 0, n = a.length;
+    let i = 0;
+    const n = a.length;
     if (n !== b.length) { return false; }
     if (n > 0) {
         do { if (a[i] !== b[i]) { return false; } } while (++i < n);
diff --git a/js/src/util/int.ts b/js/src/util/int.ts
index 48aabb07005..147106dbb30 100644
--- a/js/src/util/int.ts
+++ b/js/src/util/int.ts
@@ -147,7 +147,7 @@ export class Uint64 extends BaseInt64 {
     public static fromString(str: string, out_buffer = new Uint32Array(2)): Uint64 {
         const length = str.length;
 
-        let out = new Uint64(out_buffer);
+        const out = new Uint64(out_buffer);
         for (let posn = 0; posn < length;) {
             const group = kInt32DecimalDigits < length - posn ?
                           kInt32DecimalDigits : length - posn;
@@ -174,13 +174,13 @@ export class Uint64 extends BaseInt64 {
 
     /** @nocollapse */
     public static multiply(left: Uint64, right: Uint64): Uint64 {
-        let rtrn = new Uint64(new Uint32Array(left.buffer));
+        const rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
     /** @nocollapse */
     public static add(left: Uint64, right: Uint64): Uint64 {
-        let rtrn = new Uint64(new Uint32Array(left.buffer));
+        const rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 }
@@ -238,7 +238,7 @@ export class Int64 extends BaseInt64 {
         const negate = str.startsWith('-');
         const length = str.length;
 
-        let out = new Int64(out_buffer);
+        const out = new Int64(out_buffer);
         for (let posn = negate ? 1 : 0; posn < length;) {
             const group = kInt32DecimalDigits < length - posn ?
                           kInt32DecimalDigits : length - posn;
@@ -264,13 +264,13 @@ export class Int64 extends BaseInt64 {
 
     /** @nocollapse */
     public static multiply(left: Int64, right: Int64): Int64 {
-        let rtrn = new Int64(new Uint32Array(left.buffer));
+        const rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
     /** @nocollapse */
     public static add(left: Int64, right: Int64): Int64 {
-        let rtrn = new Int64(new Uint32Array(left.buffer));
+        const rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 }
@@ -320,7 +320,7 @@ export class Int128 {
         let product = Uint64.multiply(L3, R3);
         this.buffer[0] = product.low();
 
-        let sum = new Uint64(new Uint32Array([product.high(), 0]));
+        const sum = new Uint64(new Uint32Array([product.high(), 0]));
 
         product = Uint64.multiply(L2, R3);
         sum.plus(product);
@@ -333,7 +333,7 @@ export class Int128 {
         this.buffer[3] = (sum.lessThan(product) ? 1 : 0);
 
         this.buffer[2] = sum.high();
-        let high = new Uint64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
+        const high = new Uint64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
 
         high.plus(Uint64.multiply(L1, R3))
             .plus(Uint64.multiply(L2, R2))
@@ -347,7 +347,7 @@ export class Int128 {
     }
 
     public plus(other: Int128): Int128 {
-        let sums = new Uint32Array(4);
+        const sums = new Uint32Array(4);
         sums[3] = (this.buffer[3] + other.buffer[3]) >>> 0;
         sums[2] = (this.buffer[2] + other.buffer[2]) >>> 0;
         sums[1] = (this.buffer[1] + other.buffer[1]) >>> 0;
@@ -377,13 +377,13 @@ export class Int128 {
 
     /** @nocollapse */
     public static multiply(left: Int128, right: Int128): Int128 {
-        let rtrn = new Int128(new Uint32Array(left.buffer));
+        const rtrn = new Int128(new Uint32Array(left.buffer));
         return rtrn.times(right);
     }
 
     /** @nocollapse */
     public static add(left: Int128, right: Int128): Int128 {
-        let rtrn = new Int128(new Uint32Array(left.buffer));
+        const rtrn = new Int128(new Uint32Array(left.buffer));
         return rtrn.plus(right);
     }
 
@@ -412,7 +412,7 @@ export class Int128 {
         const negate = str.startsWith('-');
         const length = str.length;
 
-        let out = new Int128(out_buffer);
+        const out = new Int128(out_buffer);
         for (let posn = negate ? 1 : 0; posn < length;) {
             const group = kInt32DecimalDigits < length - posn ?
                           kInt32DecimalDigits : length - posn;
diff --git a/js/src/util/math.ts b/js/src/util/math.ts
index e9b600aadf3..47678e1a961 100644
--- a/js/src/util/math.ts
+++ b/js/src/util/math.ts
@@ -26,9 +26,9 @@ const u32 = new Uint32Array(f64.buffer);
  * @ignore
  */
 export function uint16ToFloat64(h: number) {
-    let expo = (h & 0x7C00) >> 10;
-    let sigf = (h & 0x03FF) / 1024;
-    let sign = (-1) ** ((h & 0x8000) >> 15);
+    const expo = (h & 0x7C00) >> 10;
+    const sigf = (h & 0x03FF) / 1024;
+    const sign = (-1) ** ((h & 0x8000) >> 15);
     switch (expo) {
         case 0x1F: return sign * (sigf ? NaN : 1 / 0);
         case 0x00: return sign * (sigf ? 6.103515625e-5 * sigf : 0);
@@ -54,7 +54,7 @@ export function float64ToUint16(d: number) {
     // 0x7ff00000 = 01111111 11110000 00000000 00000000 -- masks the 21st-31st bits
     // 0x000fffff = 00000000 00001111 11111111 11111111 -- masks the 1st-20th bit
 
-    let sign = (u32[1] & 0x80000000) >> 16 & 0xFFFF;
+    const sign = (u32[1] & 0x80000000) >> 16 & 0xFFFF;
     let expo = (u32[1] & 0x7ff00000), sigf = 0x0000;
 
     if (expo >= 0x40f00000) {
diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts
index 97dd42ed160..37a630858d9 100644
--- a/js/src/util/recordbatch.ts
+++ b/js/src/util/recordbatch.ts
@@ -36,7 +36,8 @@ export function ensureSameLengthData<T extends { [key: string]: DataType } = any
 ) {
     let data: Data<T[keyof T]>;
     let field: Field<T[keyof T]>;
-    let i = -1, n = chunks.length;
+    let i = -1;
+    const n = chunks.length;
     const fields = [...schema.fields];
     const batchData = [] as Data<T[keyof T]>[];
     const bitmapLength = ((batchLength + 63) & ~63) >> 3;
@@ -70,7 +71,8 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
     const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
 
     let numBatches = 0, batchLength = 0;
-    let i = -1, numColumns = columns.length;
+    let i = -1;
+    const numColumns = columns.length;
     let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = [];
 
     while (memo.numBatches-- > 0) {
@@ -97,7 +99,8 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
 function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, childData: Data<T[keyof T]>[], columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
     let data: Data<T[keyof T]>;
     let field: Field<T[keyof T]>;
-    let length = 0, i = -1, n = columns.length;
+    let length = 0, i = -1;
+    const n = columns.length;
     const bitmapLength = ((batchLength + 63) & ~63) >> 3;
     while (++i < n) {
         if ((data = childData[i]) && ((length = data.length) >= batchLength)) {
diff --git a/js/src/util/vector.ts b/js/src/util/vector.ts
index 4a465936d67..a6cfd0373f1 100644
--- a/js/src/util/vector.ts
+++ b/js/src/util/vector.ts
@@ -46,7 +46,7 @@ export function clampRange<T extends RangeLike, N extends ClampRangeThen<T> = Cl
     // Adjust args similar to Array.prototype.slice. Normalize begin/end to
     // clamp between 0 and length, and wrap around on negative indices, e.g.
     // slice(-1, 5) or slice(5, -1)
-    let { length: len = 0 } = source;
+    const { length: len = 0 } = source;
     let lhs = typeof begin !== 'number' ? 0 : begin;
     let rhs = typeof end !== 'number' ? len : end;
     // wrap around on negative start/end positions
@@ -65,7 +65,7 @@ const isNaNFast = (value: any) => value !== value;
 
 /** @ignore */
 export function createElementComparator(search: any) {
-    let typeofSearch = typeof search;
+    const typeofSearch = typeof search;
     // Compare primitives
     if (typeofSearch !== 'object' || search === null) {
         // Compare NaN
@@ -177,7 +177,7 @@ function compareObject(comparators: ((x: any) => boolean)[], obj: Map<any, any>,
     const rValItr = obj instanceof Map ? obj.values() : Object.values(obj)[Symbol.iterator]();
 
     let i = 0;
-    let n = comparators.length;
+    const n = comparators.length;
     let rVal = rValItr.next();
     let lKey = lKeyItr.next();
     let rKey = rKeyItr.next();
diff --git a/js/src/vector/chunked.ts b/js/src/vector/chunked.ts
index e3cbf547191..656c4a1b6c7 100644
--- a/js/src/vector/chunked.ts
+++ b/js/src/vector/chunked.ts
@@ -166,7 +166,7 @@ export class Chunked<T extends DataType = any>
 
         if (index < 0 || index >= this._numChildren) { return null; }
 
-        let columns = this._children || (this._children = []);
+        const columns = this._children || (this._children = []);
         let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
 
         if (child = columns[index]) { return child; }
@@ -185,9 +185,10 @@ export class Chunked<T extends DataType = any>
     public search(index: number): [number, number] | null;
     public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
     public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
-        let idx = index;
+        const idx = index;
         // binary search to find the child vector and value indices
-        let offsets = this._chunkOffsets, rhs = offsets.length - 1;
+        const offsets = this._chunkOffsets;
+        let rhs = offsets.length - 1;
         // return early if out of bounds, or if there's just one child
         if (idx < 0            ) { return null; }
         if (idx >= offsets[rhs]) { return null; }
@@ -228,15 +229,16 @@ export class Chunked<T extends DataType = any>
         let ArrayType: any = this._type.ArrayType;
         if (n <= 0) { return new ArrayType(0); }
         if (n <= 1) { return chunks[0].toArray(); }
-        let len = 0, src = new Array(n);
+        let len = 0;
+        const src = new Array(n);
         for (let i = -1; ++i < n;) {
             len += (src[i] = chunks[i].toArray()).length;
         }
         if (ArrayType !== src[0].constructor) {
             ArrayType = src[0].constructor;
         }
-        let dst = new ArrayType(len);
-        let set: any = ArrayType === Array ? arraySet : typedSet;
+        const dst = new ArrayType(len);
+        const set: any = ArrayType === Array ? arraySet : typedSet;
         for (let i = -1, idx = 0; ++i < n;) {
             idx = set(src[i], dst, idx);
         }
@@ -246,7 +248,8 @@ export class Chunked<T extends DataType = any>
     protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
     protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
     protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
-        let i = chunkIndex - 1, n = _chunks.length;
+        let i = chunkIndex - 1;
+        const n = _chunks.length;
         let start = fromIndex, offset = 0, found = -1;
         while (++i < n) {
             if (~(found = _chunks[i].indexOf(element, start))) {
@@ -285,8 +288,9 @@ export class Chunked<T extends DataType = any>
 
 /** @ignore */
 function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
-    let offsets = new Uint32Array((vectors || []).length + 1);
-    let offset = offsets[0] = 0, length = offsets.length;
+    const offsets = new Uint32Array((vectors || []).length + 1);
+    let offset = offsets[0] = 0;
+    const length = offsets.length;
     for (let index = 0; ++index < length;) {
         offsets[index] = (offset += vectors[index - 1].length);
     }
diff --git a/js/src/vector/float.ts b/js/src/vector/float.ts
index cb15d154415..2e3151d9077 100644
--- a/js/src/vector/float.ts
+++ b/js/src/vector/float.ts
@@ -68,7 +68,7 @@ export class FloatVector<T extends Float = Float> extends BaseVector<T> {
         let ArrowType = vectorTypeToDataType(this);
 
         if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) {
-            let InputType = arrayTypeToDataType(input.constructor as FloatArrayCtor) || ArrowType;
+            const InputType = arrayTypeToDataType(input.constructor as FloatArrayCtor) || ArrowType;
             // Special case, infer the Arrow DataType from the input if calling the base
             // FloatVector.from with a TypedArray, e.g. `FloatVector.from(new Float32Array())`
             if (ArrowType === null) {
@@ -77,8 +77,8 @@ export class FloatVector<T extends Float = Float> extends BaseVector<T> {
             // If the DataType inferred from the Vector constructor matches the
             // DataType inferred from the input arguments, return zero-copy view
             if (ArrowType && ArrowType === InputType) {
-                let type = new ArrowType();
-                let length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
+                const type = new ArrowType();
+                const length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
                 // If the ArrowType is Float16 but the input type isn't a Uint16Array,
                 // let the Float16Builder handle casting the input values to Uint16s.
                 if (!convertTo16Bit(ArrowType, input.constructor)) {
diff --git a/js/src/vector/int.ts b/js/src/vector/int.ts
index 216efd59ab6..c12863297e6 100644
--- a/js/src/vector/int.ts
+++ b/js/src/vector/int.ts
@@ -87,11 +87,11 @@ export class IntVector<T extends Int = Int> extends BaseVector<T> {
     /** @nocollapse */
     public static from<T extends Int, TNull = any>(this: IntVectorConstructors, ...args: FromArgs<T, TNull>) {
 
-        let [input, is64bit = false] = args;
+        const [input, is64bit = false] = args;
         let ArrowType = vectorTypeToDataType(this, is64bit);
 
         if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) {
-            let InputType = arrayTypeToDataType(input.constructor as IntArrayCtor, is64bit) || ArrowType;
+            const InputType = arrayTypeToDataType(input.constructor as IntArrayCtor, is64bit) || ArrowType;
             // Special case, infer the Arrow DataType from the input if calling the base
             // IntVector.from with a TypedArray, e.g. `IntVector.from(new Int32Array())`
             if (ArrowType === null) {
@@ -100,7 +100,7 @@ export class IntVector<T extends Int = Int> extends BaseVector<T> {
             // If the DataType inferred from the Vector constructor matches the
             // DataType inferred from the input arguments, return zero-copy view
             if (ArrowType && ArrowType === InputType) {
-                let type = new ArrowType();
+                const type = new ArrowType();
                 let length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
                 // If the ArrowType is 64bit but the input type is 32bit pairs, update the logical length
                 if (convert32To64Bit(ArrowType, input.constructor)) {
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index 42b75019490..a759a8096e8 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -217,7 +217,8 @@ Object.setPrototypeOf(Row.prototype, Map.prototype);
 const defineRowProxyProperties = (() => {
     const desc = { enumerable: true, configurable: false, get: null as any, set: null as any };
     return <T extends Row>(row: T) => {
-        let idx = -1, ktoi = row[kKeyToIdx] || (row[kKeyToIdx] = new Map());
+        let idx = -1;
+        const ktoi = row[kKeyToIdx] || (row[kKeyToIdx] = new Map());
         const getter = (key: any) => function(this: T) { return this.get(key); };
         const setter = (key: any) => function(this: T, val: any) { return this.set(key, val); };
         for (const key of row.keys()) {

From 568046a5c97322d8cce4c4c8b901678eb9986f81 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 14 Apr 2021 15:59:14 -0400
Subject: [PATCH 034/719] ARROW-11475: [C++] Revert "Upgrade mimalloc"

This reverts commit 8780ca4b12d30a0118c3114edcad27da907d8e7c in order to avoid https://github.com/microsoft/mimalloc/issues/363 as discovered in #10019.

Closes #10024 from lidavidm/arrow-11475

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 ++--
 cpp/thirdparty/versions.txt                 | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 3f686346bb1..923e18bafb9 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1561,10 +1561,10 @@ if(ARROW_MIMALLOC)
   endif()
 
   set(MIMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/mimalloc_ep/src/mimalloc_ep")
-  set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/lib/mimalloc-2.0/include")
+  set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/include")
   set(
     MIMALLOC_STATIC_LIB
-    "${MIMALLOC_PREFIX}/lib/mimalloc-2.0/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
     )
 
   set(MIMALLOC_CMAKE_ARGS
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index e5ab78c3822..637435d19b4 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -40,7 +40,9 @@ ARROW_GRPC_BUILD_VERSION=v1.35.0
 ARROW_GTEST_BUILD_VERSION=1.10.0
 ARROW_JEMALLOC_BUILD_VERSION=5.2.1
 ARROW_LZ4_BUILD_VERSION=v1.9.3
-ARROW_MIMALLOC_BUILD_VERSION=v2.0.0
+# mimalloc 1.6.7 didn't build on Visual Studio 2015
+# https://github.com/microsoft/mimalloc/issues/353
+ARROW_MIMALLOC_BUILD_VERSION=v1.6.4
 ARROW_ORC_BUILD_VERSION=1.6.6
 ARROW_PROTOBUF_BUILD_VERSION=v3.14.0
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require

From 05ec4386b4f9d7743aeb89ae33b5f9520e0928e7 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 14 Apr 2021 17:58:26 -0400
Subject: [PATCH 035/719] ARROW-12161: [C++][Dataset] Revert async CSV reader
 in datasets

Reverts the streaming CSV reader and the async workaround introduced for it.  It will be reintroduced, more cleanly, in ARROW-12355

Closes #10019 from westonpace/feature/revert-arrow-12161

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/csv/reader.cc              | 223 ++++++++---------------
 cpp/src/arrow/csv/reader.h               |  11 --
 cpp/src/arrow/csv/reader_test.cc         |  15 +-
 cpp/src/arrow/dataset/file_base.cc       |  25 +--
 cpp/src/arrow/dataset/file_csv.cc        |  75 +++-----
 cpp/src/arrow/dataset/file_test.cc       |  29 ---
 cpp/src/arrow/dataset/scanner.cc         |  39 +---
 cpp/src/arrow/dataset/scanner.h          |   2 -
 cpp/src/arrow/dataset/scanner_internal.h |  90 +--------
 cpp/src/arrow/dataset/scanner_test.cc    |  15 --
 cpp/src/arrow/dataset/test_util.h        | 151 ---------------
 cpp/src/arrow/record_batch.h             |   9 -
 cpp/src/arrow/util/future.h              |   2 +-
 cpp/src/arrow/util/thread_pool.h         |   5 +-
 r/tests/testthat/test-dataset.R          |   1 -
 15 files changed, 123 insertions(+), 569 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index c4352360e6b..5b6e11efdaf 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -199,19 +199,6 @@ class SerialBlockReader : public BlockReader {
     return MakeTransformedIterator(std::move(buffer_iterator), block_reader_fn);
   }
 
-  static AsyncGenerator<CSVBlock> MakeAsyncIterator(
-      AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
-      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer) {
-    auto block_reader =
-        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer);
-    // Wrap shared pointer in callable
-    Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
-        [block_reader](std::shared_ptr<Buffer> next) {
-          return (*block_reader)(std::move(next));
-        };
-    return MakeTransformedGenerator(std::move(buffer_generator), block_reader_fn);
-  }
-
   Result<TransformFlow<CSVBlock>> operator()(std::shared_ptr<Buffer> next_buffer) {
     if (buffer_ == nullptr) {
       return TransformFinish();
@@ -585,25 +572,22 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
 
 class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
  public:
-  BaseStreamingReader(io::IOContext io_context, Executor* cpu_executor,
-                      std::shared_ptr<io::InputStream> input,
-                      const ReadOptions& read_options, const ParseOptions& parse_options,
-                      const ConvertOptions& convert_options)
-      : ReaderMixin(io_context, std::move(input), read_options, parse_options,
-                    convert_options),
-        cpu_executor_(cpu_executor) {}
+  using ReaderMixin::ReaderMixin;
 
-  virtual Future<std::shared_ptr<csv::StreamingReader>> Init() = 0;
+  virtual Status Init() = 0;
 
   std::shared_ptr<Schema> schema() const override { return schema_; }
 
   Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
-    auto next_fut = ReadNextAsync();
-    auto next_result = next_fut.result();
-    return std::move(next_result).Value(batch);
+    do {
+      RETURN_NOT_OK(ReadNext().Value(batch));
+    } while (*batch != nullptr && (*batch)->num_rows() == 0);
+    return Status::OK();
   }
 
  protected:
+  virtual Result<std::shared_ptr<RecordBatch>> ReadNext() = 0;
+
   // Make column decoders from conversion schema
   Status MakeColumnDecoders() {
     for (const auto& column : conversion_schema_.columns) {
@@ -686,141 +670,101 @@ class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
   std::vector<std::shared_ptr<ColumnDecoder>> column_decoders_;
   std::shared_ptr<Schema> schema_;
   std::shared_ptr<RecordBatch> pending_batch_;
-  AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator_;
-  Executor* cpu_executor_;
+  Iterator<std::shared_ptr<Buffer>> buffer_iterator_;
   bool eof_ = false;
 };
 
 /////////////////////////////////////////////////////////////////////////
 // Serial StreamingReader implementation
 
-class SerialStreamingReader : public BaseStreamingReader,
-                              public std::enable_shared_from_this<SerialStreamingReader> {
+class SerialStreamingReader : public BaseStreamingReader {
  public:
   using BaseStreamingReader::BaseStreamingReader;
 
-  Future<std::shared_ptr<csv::StreamingReader>> Init() override {
+  Status Init() override {
     ARROW_ASSIGN_OR_RAISE(auto istream_it,
                           io::MakeInputStreamIterator(input_, read_options_.block_size));
 
-    // TODO Consider exposing readahead as a read option (ARROW-12090)
-    ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
-                                                              io_context_.executor()));
-
-    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
-
-    buffer_generator_ = CSVBufferIterator::MakeAsync(std::move(transferred_it));
+    // Since we're converting serially, no need to readahead more than one block
+    int32_t block_queue_size = 1;
+    ARROW_ASSIGN_OR_RAISE(auto rh_it,
+                          MakeReadaheadIterator(std::move(istream_it), block_queue_size));
+    buffer_iterator_ = CSVBufferIterator::Make(std::move(rh_it));
     task_group_ = internal::TaskGroup::MakeSerial(io_context_.stop_token());
 
-    auto self = shared_from_this();
     // Read schema from first batch
-    return ReadNextAsync().Then([self](const std::shared_ptr<RecordBatch>& first_batch)
-                                    -> Result<std::shared_ptr<csv::StreamingReader>> {
-      self->pending_batch_ = first_batch;
-      DCHECK_NE(self->schema_, nullptr);
-      return self;
-    });
+    ARROW_ASSIGN_OR_RAISE(pending_batch_, ReadNext());
+    DCHECK_NE(schema_, nullptr);
+    return Status::OK();
   }
 
-  Result<std::shared_ptr<RecordBatch>> DecodeBatchAndUpdateSchema() {
-    auto maybe_batch = DecodeNextBatch();
-    if (schema_ == nullptr && maybe_batch.ok()) {
-      schema_ = (*maybe_batch)->schema();
+ protected:
+  Result<std::shared_ptr<RecordBatch>> ReadNext() override {
+    if (eof_) {
+      return nullptr;
+    }
+    if (io_context_.stop_token().IsStopRequested()) {
+      eof_ = true;
+      return io_context_.stop_token().Poll();
+    }
+    if (!block_iterator_) {
+      Status st = SetupReader();
+      if (!st.ok()) {
+        // Can't setup reader => bail out
+        eof_ = true;
+        return st;
+      }
     }
-    return maybe_batch;
-  }
-
-  Future<std::shared_ptr<RecordBatch>> DoReadNext(
-      std::shared_ptr<SerialStreamingReader> self) {
     auto batch = std::move(pending_batch_);
     if (batch != nullptr) {
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
+      return batch;
     }
 
     if (!source_eof_) {
-      return block_generator_()
-          .Then([self](const CSVBlock& maybe_block) -> Status {
-            if (!IsIterationEnd(maybe_block)) {
-              self->last_block_index_ = maybe_block.block_index;
-              auto maybe_parsed = self->ParseAndInsert(
-                  maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                  maybe_block.block_index, maybe_block.is_final);
-              if (!maybe_parsed.ok()) {
-                // Parse error => bail out
-                self->eof_ = true;
-                return maybe_parsed.status();
-              }
-              RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
-            } else {
-              self->source_eof_ = true;
-              for (auto& decoder : self->column_decoders_) {
-                decoder->SetEOF(self->last_block_index_ + 1);
-              }
-            }
-            return Status::OK();
-          })
-          .Then([self](const ::arrow::detail::Empty& st)
-                    -> Result<std::shared_ptr<RecordBatch>> {
-            return self->DecodeBatchAndUpdateSchema();
-          });
-    }
-    return Future<std::shared_ptr<RecordBatch>>::MakeFinished(
-        DecodeBatchAndUpdateSchema());
-  }
-
-  Future<std::shared_ptr<RecordBatch>> ReadNextSkippingEmpty(
-      std::shared_ptr<SerialStreamingReader> self) {
-    return DoReadNext(self).Then([self](const std::shared_ptr<RecordBatch>& batch) {
-      if (batch != nullptr && batch->num_rows() == 0) {
-        return self->ReadNextSkippingEmpty(self);
+      ARROW_ASSIGN_OR_RAISE(auto maybe_block, block_iterator_.Next());
+      if (!IsIterationEnd(maybe_block)) {
+        last_block_index_ = maybe_block.block_index;
+        auto maybe_parsed = ParseAndInsert(maybe_block.partial, maybe_block.completion,
+                                           maybe_block.buffer, maybe_block.block_index,
+                                           maybe_block.is_final);
+        if (!maybe_parsed.ok()) {
+          // Parse error => bail out
+          eof_ = true;
+          return maybe_parsed.status();
+        }
+        RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
+      } else {
+        source_eof_ = true;
+        for (auto& decoder : column_decoders_) {
+          decoder->SetEOF(last_block_index_ + 1);
+        }
       }
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
-    });
-  }
-
-  Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
-    if (eof_) {
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(nullptr);
     }
-    if (io_context_.stop_token().IsStopRequested()) {
-      eof_ = true;
-      return io_context_.stop_token().Poll();
-    }
-    auto self = shared_from_this();
-    if (!block_generator_) {
-      return SetupReader(self).Then([self](const Result<::arrow::detail::Empty>& res)
-                                        -> Future<std::shared_ptr<RecordBatch>> {
-        if (!res.ok()) {
-          self->eof_ = true;
-          return res.status();
-        }
-        return self->ReadNextSkippingEmpty(self);
-      });
-    } else {
-      return self->ReadNextSkippingEmpty(self);
+
+    auto maybe_batch = DecodeNextBatch();
+    if (schema_ == nullptr && maybe_batch.ok()) {
+      schema_ = (*maybe_batch)->schema();
     }
+    return maybe_batch;
   };
 
- protected:
-  Future<> SetupReader(std::shared_ptr<SerialStreamingReader> self) {
-    return buffer_generator_().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
-      if (first_buffer == nullptr) {
-        return Status::Invalid("Empty CSV file");
-      }
-      auto own_first_buffer = first_buffer;
-      RETURN_NOT_OK(self->ProcessHeader(own_first_buffer, &own_first_buffer));
-      RETURN_NOT_OK(self->MakeColumnDecoders());
+  Status SetupReader() {
+    ARROW_ASSIGN_OR_RAISE(auto first_buffer, buffer_iterator_.Next());
+    if (first_buffer == nullptr) {
+      return Status::Invalid("Empty CSV file");
+    }
+    RETURN_NOT_OK(ProcessHeader(first_buffer, &first_buffer));
+    RETURN_NOT_OK(MakeColumnDecoders());
 
-      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
-          std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
-          std::move(own_first_buffer));
-      return Status::OK();
-    });
+    block_iterator_ = SerialBlockReader::MakeIterator(std::move(buffer_iterator_),
+                                                      MakeChunker(parse_options_),
+                                                      std::move(first_buffer));
+    return Status::OK();
   }
 
   bool source_eof_ = false;
   int64_t last_block_index_ = 0;
-  AsyncGenerator<CSVBlock> block_generator_;
+  Iterator<CSVBlock> block_iterator_;
 };
 
 /////////////////////////////////////////////////////////////////////////
@@ -999,14 +943,15 @@ Result<std::shared_ptr<TableReader>> MakeTableReader(
   return reader;
 }
 
-Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
+Result<std::shared_ptr<StreamingReader>> MakeStreamingReader(
     io::IOContext io_context, std::shared_ptr<io::InputStream> input,
     internal::Executor* cpu_executor, const ReadOptions& read_options,
     const ParseOptions& parse_options, const ConvertOptions& convert_options) {
   std::shared_ptr<BaseStreamingReader> reader;
-  reader = std::make_shared<SerialStreamingReader>(
-      io_context, cpu_executor, input, read_options, parse_options, convert_options);
-  return reader->Init();
+  reader = std::make_shared<SerialStreamingReader>(io_context, input, read_options,
+                                                   parse_options, convert_options);
+  RETURN_NOT_OK(reader->Init());
+  return reader;
 }
 
 }  // namespace
@@ -1036,11 +981,8 @@ Result<std::shared_ptr<StreamingReader>> StreamingReader::Make(
     const ConvertOptions& convert_options) {
   auto io_context = io::IOContext(pool);
   auto cpu_executor = internal::GetCpuThreadPool();
-  auto reader_fut = MakeStreamingReader(io_context, std::move(input), cpu_executor,
-                                        read_options, parse_options, convert_options);
-  auto reader_result = reader_fut.result();
-  ARROW_ASSIGN_OR_RAISE(auto reader, reader_result);
-  return reader;
+  return MakeStreamingReader(io_context, std::move(input), cpu_executor, read_options,
+                             parse_options, convert_options);
 }
 
 Result<std::shared_ptr<StreamingReader>> StreamingReader::Make(
@@ -1048,17 +990,6 @@ Result<std::shared_ptr<StreamingReader>> StreamingReader::Make(
     const ReadOptions& read_options, const ParseOptions& parse_options,
     const ConvertOptions& convert_options) {
   auto cpu_executor = internal::GetCpuThreadPool();
-  auto reader_fut = MakeStreamingReader(io_context, std::move(input), cpu_executor,
-                                        read_options, parse_options, convert_options);
-  auto reader_result = reader_fut.result();
-  ARROW_ASSIGN_OR_RAISE(auto reader, reader_result);
-  return reader;
-}
-
-Future<std::shared_ptr<StreamingReader>> StreamingReader::MakeAsync(
-    io::IOContext io_context, std::shared_ptr<io::InputStream> input,
-    internal::Executor* cpu_executor, const ReadOptions& read_options,
-    const ParseOptions& parse_options, const ConvertOptions& convert_options) {
   return MakeStreamingReader(io_context, std::move(input), cpu_executor, read_options,
                              parse_options, convert_options);
 }
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index 72f1375cc3c..8e56824a0ac 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -65,17 +65,6 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
   virtual ~StreamingReader() = default;
 
   /// Create a StreamingReader instance
-  ///
-  /// This involves some I/O as the first batch must be loaded during the creation process
-  /// so it is returned as a future
-  ///
-  /// Currently, the StreamingReader is not async-reentrant and does not do any fan-out
-  /// parsing (see ARROW-11889)
-  static Future<std::shared_ptr<StreamingReader>> MakeAsync(
-      io::IOContext io_context, std::shared_ptr<io::InputStream> input,
-      internal::Executor* cpu_executor, const ReadOptions&, const ParseOptions&,
-      const ConvertOptions&);
-
   static Result<std::shared_ptr<StreamingReader>> Make(
       io::IOContext io_context, std::shared_ptr<io::InputStream> input,
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 228ab71301a..dbe6b1d4f20 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -32,14 +32,10 @@
 #include "arrow/table.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
-#include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
 #include "arrow/util/thread_pool.h"
 
 namespace arrow {
-
-using RecordBatchGenerator = AsyncGenerator<std::shared_ptr<RecordBatch>>;
-
 namespace csv {
 
 // Allows the streaming reader to be used in tests that expect a table reader
@@ -49,17 +45,12 @@ class StreamingReaderAsTableReader : public TableReader {
       : reader_(std::move(reader)) {}
   virtual ~StreamingReaderAsTableReader() = default;
   virtual Result<std::shared_ptr<Table>> Read() {
-    auto table_fut = ReadAsync();
-    auto table_res = table_fut.result();
-    ARROW_ASSIGN_OR_RAISE(auto table, table_res);
+    std::shared_ptr<Table> table;
+    RETURN_NOT_OK(reader_->ReadAll(&table));
     return table;
   }
   virtual Future<std::shared_ptr<Table>> ReadAsync() {
-    auto reader = reader_;
-    RecordBatchGenerator rb_generator = [reader]() { return reader->ReadNextAsync(); };
-    return CollectAsyncGenerator(rb_generator).Then([](const RecordBatchVector& rbs) {
-      return Table::FromRecordBatches(rbs);
-    });
+    return Future<std::shared_ptr<Table>>::MakeFinished(Read());
   }
 
  private:
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 7b2f42055b3..f4a3a0bc9f8 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -428,25 +428,16 @@ Future<> WriteInternal(const ScanOptions& scan_options, WriteState& state,
   auto task_group = scan_options.TaskGroup();
 
   for (const auto& scan_task : scan_tasks) {
-    if (scan_task->supports_async()) {
-      ARROW_ASSIGN_OR_RAISE(auto batches_gen, scan_task->ExecuteAsync(cpu_executor));
-      std::function<Status(std::shared_ptr<RecordBatch> batch)> batch_visitor =
-          [&, scan_task](std::shared_ptr<RecordBatch> batch) {
-            return WriteNextBatch(state, scan_task->fragment(), std::move(batch));
-          };
-      scan_futs.push_back(VisitAsyncGenerator(batches_gen, batch_visitor));
-    } else {
-      task_group->Append([&, scan_task] {
-        ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
+    task_group->Append([&, scan_task] {
+      ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
 
-        for (auto maybe_batch : batches) {
-          ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
-          RETURN_NOT_OK(WriteNextBatch(state, scan_task->fragment(), std::move(batch)));
-        }
+      for (auto maybe_batch : batches) {
+        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+        RETURN_NOT_OK(WriteNextBatch(state, scan_task->fragment(), std::move(batch)));
+      }
 
-        return Status::OK();
-      });
-    }
+      return Status::OK();
+    });
   }
   scan_futs.push_back(task_group->FinishAsync());
   return AllComplete(scan_futs);
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 9a7a9d2de4c..8ba6505524c 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -34,7 +34,6 @@
 #include "arrow/io/compressed.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
-#include "arrow/util/async_generator.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 
@@ -114,53 +113,34 @@ static inline Result<csv::ReadOptions> GetReadOptions(
   return read_options;
 }
 
-static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
+static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
     const FileSource& source, const CsvFileFormat& format,
-    internal::Executor* cpu_executor,
     const std::shared_ptr<ScanOptions>& scan_options = nullptr,
     MemoryPool* pool = default_memory_pool()) {
   ARROW_ASSIGN_OR_RAISE(auto reader_options, GetReadOptions(format, scan_options));
 
+  util::string_view first_block;
   ARROW_ASSIGN_OR_RAISE(auto input, source.OpenCompressed());
   ARROW_ASSIGN_OR_RAISE(
       input, io::BufferedInputStream::Create(reader_options.block_size,
                                              default_memory_pool(), std::move(input)));
+  ARROW_ASSIGN_OR_RAISE(first_block, input->Peek(reader_options.block_size));
 
-  auto peek_fut = DeferNotOk(input->io_context().executor()->Submit(
-      [input, reader_options] { return input->Peek(reader_options.block_size); }));
-
-  return peek_fut.Then([=](const util::string_view& first_block)
-                           -> Future<std::shared_ptr<csv::StreamingReader>> {
-    const auto& parse_options = format.parse_options;
-    auto convert_options = csv::ConvertOptions::Defaults();
-    if (scan_options != nullptr) {
-      ARROW_ASSIGN_OR_RAISE(convert_options,
-                            GetConvertOptions(format, scan_options, first_block, pool));
-    }
-
-    return csv::StreamingReader::MakeAsync(io::default_io_context(), std::move(input),
-                                           cpu_executor, reader_options, parse_options,
-                                           convert_options)
-        .Then(
-            [](const std::shared_ptr<csv::StreamingReader>& maybe_reader)
-                -> Result<std::shared_ptr<csv::StreamingReader>> { return maybe_reader; },
-            [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
-              return err.WithMessage("Could not open CSV input source '", source.path(),
-                                     "': ", err);
-            });
-  });
-}
+  const auto& parse_options = format.parse_options;
+  auto convert_options = csv::ConvertOptions::Defaults();
+  if (scan_options != nullptr) {
+    ARROW_ASSIGN_OR_RAISE(convert_options,
+                          GetConvertOptions(format, scan_options, first_block, pool));
+  }
 
-static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
-    const FileSource& source, const CsvFileFormat& format,
-    const std::shared_ptr<ScanOptions>& scan_options = nullptr,
-    MemoryPool* pool = default_memory_pool()) {
-  bool use_threads = (scan_options != nullptr && scan_options->use_threads);
-  return internal::RunSynchronously<std::shared_ptr<csv::StreamingReader>>(
-      [&](Executor* executor) {
-        return OpenReaderAsync(source, format, executor, scan_options, pool);
-      },
-      use_threads);
+  auto maybe_reader =
+      csv::StreamingReader::Make(io::IOContext(pool), std::move(input), reader_options,
+                                 parse_options, convert_options);
+  if (!maybe_reader.ok()) {
+    return maybe_reader.status().WithMessage("Could not open CSV input source '",
+                                             source.path(), "': ", maybe_reader.status());
+  }
+  return maybe_reader;
 }
 
 /// \brief A ScanTask backed by an Csv file.
@@ -174,20 +154,9 @@ class CsvScanTask : public ScanTask {
         source_(fragment->source()) {}
 
   Result<RecordBatchIterator> Execute() override {
-    ARROW_ASSIGN_OR_RAISE(auto gen, ExecuteAsync(internal::GetCpuThreadPool()));
-    return MakeGeneratorIterator(std::move(gen));
-  }
-
-  bool supports_async() const override { return true; }
-
-  Result<RecordBatchGenerator> ExecuteAsync(internal::Executor* cpu_executor) override {
-    auto reader_fut =
-        OpenReaderAsync(source_, *format_, cpu_executor, options(), options()->pool);
-    auto generator_fut = reader_fut.Then(
-        [](const std::shared_ptr<csv::StreamingReader>& reader) -> RecordBatchGenerator {
-          return [reader]() { return reader->ReadNextAsync(); };
-        });
-    return MakeFromFuture(generator_fut);
+    ARROW_ASSIGN_OR_RAISE(auto reader,
+                          OpenReader(source_, *format_, options(), options()->pool));
+    return IteratorFromReader(std::move(reader));
   }
 
  private:
@@ -225,8 +194,8 @@ Result<ScanTaskIterator> CsvFileFormat::ScanFile(
     std::shared_ptr<ScanOptions> options,
     const std::shared_ptr<FileFragment>& fragment) const {
   auto this_ = checked_pointer_cast<const CsvFileFormat>(shared_from_this());
-  auto task = std::make_shared<CsvScanTask>(std::move(this_), std::move(options),
-                                            std::move(fragment));
+  auto task =
+      std::make_shared<CsvScanTask>(std::move(this_), std::move(options), fragment);
 
   return MakeVectorIterator<std::shared_ptr<ScanTask>>({std::move(task)});
 }
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index fdbb4512758..c7ce5154d0a 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -249,35 +249,6 @@ TEST_F(TestFileSystemDataset, FragmentPartitions) {
                 });
 }
 
-class TestFilesystemDatasetNestedParallelism : public NestedParallelismMixin {};
-
-TEST_F(TestFilesystemDatasetNestedParallelism, Write) {
-  constexpr int NUM_BATCHES = 32;
-  RecordBatchVector batches;
-  for (int i = 0; i < NUM_BATCHES; i++) {
-    batches.push_back(ConstantArrayGenerator::Zeroes(/*size=*/1, schema_));
-  }
-  auto dataset = std::make_shared<NestedParallelismDataset>(schema_, std::move(batches));
-  ScannerBuilder builder{dataset, options_};
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-
-  ASSERT_OK_AND_ASSIGN(auto output_dir, TemporaryDir::Make("nested-parallel-dataset"));
-
-  auto format = std::make_shared<DiscardingRowCountingFormat>();
-  auto rows_written = std::make_shared<std::atomic<int>>(0);
-  std::shared_ptr<FileWriteOptions> file_write_options =
-      std::make_shared<DiscardingRowCountingFileWriteOptions>(rows_written);
-  FileSystemDatasetWriteOptions dataset_write_options;
-  dataset_write_options.file_write_options = file_write_options;
-  dataset_write_options.basename_template = "{i}";
-  dataset_write_options.partitioning = std::make_shared<HivePartitioning>(schema({}));
-  dataset_write_options.base_dir = output_dir->path().ToString();
-  dataset_write_options.filesystem = std::make_shared<fs::LocalFileSystem>();
-
-  ASSERT_OK(FileSystemDataset::Write(dataset_write_options, scanner));
-  ASSERT_EQ(NUM_BATCHES, rows_written->load());
-}
-
 // Tests of subtree pruning
 
 struct TestPathTree {
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 738c9fc0f62..52eebfeb29e 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -64,12 +64,6 @@ Result<RecordBatchIterator> InMemoryScanTask::Execute() {
   return MakeVectorIterator(record_batches_);
 }
 
-Result<RecordBatchGenerator> ScanTask::ExecuteAsync(internal::Executor*) {
-  return Status::NotImplemented("Async is not implemented for this scan task yet");
-}
-
-bool ScanTask::supports_async() const { return false; }
-
 Result<ScanTaskIterator> Scanner::Scan() {
   // TODO(ARROW-12289) This is overridden in SyncScanner and will never be implemented in
   // AsyncScanner.  It is deprecated and will eventually go away.
@@ -331,37 +325,22 @@ Future<std::shared_ptr<Table>> SyncScanner::ToTableInternal(
 
   // TODO (ARROW-11797) Migrate to using ScanBatches()
   size_t scan_task_id = 0;
-  std::vector<Future<>> scan_futures;
   for (auto maybe_scan_task : scan_task_it) {
     ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
 
     auto id = scan_task_id++;
-    if (scan_task->supports_async()) {
-      ARROW_ASSIGN_OR_RAISE(auto scan_gen, scan_task->ExecuteAsync(cpu_executor));
-      auto scan_fut = CollectAsyncGenerator(std::move(scan_gen))
-                          .Then([state, id](const RecordBatchVector& rbs) {
-                            state->Emplace(rbs, id);
-                          });
-      scan_futures.push_back(std::move(scan_fut));
-    } else {
-      task_group->Append([state, id, scan_task] {
-        ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
-        ARROW_ASSIGN_OR_RAISE(auto local, batch_it.ToVector());
-        state->Emplace(std::move(local), id);
-        return Status::OK();
-      });
-    }
+    task_group->Append([state, id, scan_task] {
+      ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
+      ARROW_ASSIGN_OR_RAISE(auto local, batch_it.ToVector());
+      state->Emplace(std::move(local), id);
+      return Status::OK();
+    });
   }
   auto scan_options = scan_options_;
-  scan_futures.push_back(task_group->FinishAsync());
   // Wait for all tasks to complete, or the first error
-  return AllComplete(scan_futures)
-      .Then(
-          [scan_options, state](const detail::Empty&) -> Result<std::shared_ptr<Table>> {
-            return Table::FromRecordBatches(
-                scan_options->projected_schema,
-                FlattenRecordBatchVector(std::move(state->batches)));
-          });
+  RETURN_NOT_OK(task_group->Finish());
+  return Table::FromRecordBatches(scan_options->projected_schema,
+                                  FlattenRecordBatchVector(std::move(state->batches)));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 37765c10488..c4da6da7b80 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -152,8 +152,6 @@ class ARROW_DS_EXPORT ScanTask {
   /// resulting from the Scan. Execution semantics are encapsulated in the
   /// particular ScanTask implementation
   virtual Result<RecordBatchIterator> Execute() = 0;
-  virtual Result<RecordBatchGenerator> ExecuteAsync(internal::Executor* cpu_executor);
-  virtual bool supports_async() const;
 
   virtual ~ScanTask() = default;
 
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index d334c094d31..292ea6ce372 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -38,8 +38,6 @@ using internal::Executor;
 
 namespace dataset {
 
-// TODO(ARROW-7001) This synchronous version is no longer needed, can use async version
-// regardless of sync/async of source
 inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, Expression filter,
                                              MemoryPool* pool) {
   return MakeMaybeMapIterator(
@@ -64,38 +62,6 @@ inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, Expression
       std::move(it));
 }
 
-inline Result<std::shared_ptr<RecordBatch>> DoFilterRecordBatch(
-    const Expression& filter, MemoryPool* pool, const std::shared_ptr<RecordBatch>& in) {
-  compute::ExecContext exec_context{pool};
-  ARROW_ASSIGN_OR_RAISE(Datum mask,
-                        ExecuteScalarExpression(filter, Datum(in), &exec_context));
-
-  if (mask.is_scalar()) {
-    const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
-    if (mask_scalar.is_valid && mask_scalar.value) {
-      return std::move(in);
-    }
-    return in->Slice(0, 0);
-  }
-
-  ARROW_ASSIGN_OR_RAISE(
-      Datum filtered,
-      compute::Filter(in, mask, compute::FilterOptions::Defaults(), &exec_context));
-  return filtered.record_batch();
-}
-
-inline RecordBatchGenerator FilterRecordBatch(RecordBatchGenerator rbs, Expression filter,
-                                              MemoryPool* pool) {
-  // TODO(ARROW-7001) This changes to auto
-  std::function<Result<std::shared_ptr<RecordBatch>>(const std::shared_ptr<RecordBatch>&)>
-      mapper = [=](const std::shared_ptr<RecordBatch>& in) {
-        return DoFilterRecordBatch(filter, pool, in);
-      };
-  return MakeMappedGenerator(std::move(rbs), mapper);
-}
-
-// TODO(ARROW-7001) This synchronous version is no longer needed, all branches use async
-// version
 inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
                                               Expression projection, MemoryPool* pool) {
   return MakeMaybeMapIterator(
@@ -119,35 +85,6 @@ inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
       std::move(it));
 }
 
-inline Result<std::shared_ptr<RecordBatch>> DoProjectRecordBatch(
-    const Expression& projection, MemoryPool* pool,
-    const std::shared_ptr<RecordBatch>& in) {
-  compute::ExecContext exec_context{pool};
-  ARROW_ASSIGN_OR_RAISE(Datum projected,
-                        ExecuteScalarExpression(projection, Datum(in), &exec_context));
-  DCHECK_EQ(projected.type()->id(), Type::STRUCT);
-  if (projected.shape() == ValueDescr::SCALAR) {
-    // Only virtual columns are projected. Broadcast to an array
-    ARROW_ASSIGN_OR_RAISE(projected,
-                          MakeArrayFromScalar(*projected.scalar(), in->num_rows(), pool));
-  }
-
-  ARROW_ASSIGN_OR_RAISE(auto out,
-                        RecordBatch::FromStructArray(projected.array_as<StructArray>()));
-
-  return out->ReplaceSchemaMetadata(in->schema()->metadata());
-}
-
-inline RecordBatchGenerator ProjectRecordBatch(RecordBatchGenerator rbs,
-                                               Expression projection, MemoryPool* pool) {
-  // TODO(ARROW-7001) This changes to auto
-  std::function<Result<std::shared_ptr<RecordBatch>>(const std::shared_ptr<RecordBatch>&)>
-      mapper = [=](const std::shared_ptr<RecordBatch>& in) {
-        return DoProjectRecordBatch(projection, pool, in);
-      };
-  return MakeMappedGenerator(std::move(rbs), mapper);
-}
-
 class FilterAndProjectScanTask : public ScanTask {
  public:
   explicit FilterAndProjectScanTask(std::shared_ptr<ScanTask> task, Expression partition)
@@ -155,9 +92,7 @@ class FilterAndProjectScanTask : public ScanTask {
         task_(std::move(task)),
         partition_(std::move(partition)) {}
 
-  bool supports_async() const override { return task_->supports_async(); }
-
-  Result<RecordBatchIterator> ExecuteSync() {
+  Result<RecordBatchIterator> Execute() override {
     ARROW_ASSIGN_OR_RAISE(auto it, task_->Execute());
 
     ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
@@ -173,29 +108,6 @@ class FilterAndProjectScanTask : public ScanTask {
                               options_->pool);
   }
 
-  Result<RecordBatchIterator> Execute() override { return ExecuteSync(); }
-
-  Result<RecordBatchGenerator> ExecuteAsync(Executor* cpu_executor) override {
-    if (!task_->supports_async()) {
-      return Status::Invalid(
-          "ExecuteAsync should not have been called on FilterAndProjectScanTask if the "
-          "source task did not support async");
-    }
-    ARROW_ASSIGN_OR_RAISE(auto gen, task_->ExecuteAsync(cpu_executor));
-
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
-                          SimplifyWithGuarantee(options()->filter, partition_));
-
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
-                          SimplifyWithGuarantee(options()->projection, partition_));
-
-    RecordBatchGenerator filter_gen =
-        FilterRecordBatch(std::move(gen), simplified_filter, options_->pool);
-
-    return ProjectRecordBatch(std::move(filter_gen), simplified_projection,
-                              options_->pool);
-  }
-
  private:
   std::shared_ptr<ScanTask> task_;
   Expression partition_;
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index ccae126da47..d1d0e45b827 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -181,21 +181,6 @@ TEST_F(TestScanner, ToTable) {
   AssertTablesEqual(*expected, *actual);
 }
 
-class TestScannerNestedParallelism : public NestedParallelismMixin {};
-
-TEST_F(TestScannerNestedParallelism, Scan) {
-  constexpr int NUM_BATCHES = 32;
-  RecordBatchVector batches;
-  for (int i = 0; i < NUM_BATCHES; i++) {
-    batches.push_back(ConstantArrayGenerator::Zeroes(/*size=*/1, schema_));
-  }
-  auto dataset = std::make_shared<NestedParallelismDataset>(schema_, std::move(batches));
-  ScannerBuilder builder{dataset, options_};
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-  ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
-  ASSERT_EQ(table->num_rows(), NUM_BATCHES);
-}
-
 class TestScannerBuilder : public ::testing::Test {
   void SetUp() override {
     DatasetVector sources;
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 826e8b7901a..ea4c41e63c5 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -832,156 +832,5 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   std::shared_ptr<ScanOptions> scan_options_;
 };
 
-// These test cases will run on a thread pool with 1 thread.  Any illegal (non-async)
-// nested parallelism should deadlock the test
-class NestedParallelismMixin : public ::testing::Test {
- protected:
-  static void SetUpTestSuite() {}
-
-  void TearDown() override {
-    if (old_capacity_ > 0) {
-      ASSERT_OK(internal::GetCpuThreadPool()->SetCapacity(old_capacity_));
-    }
-  }
-
-  void SetUp() override {
-    old_capacity_ = internal::GetCpuThreadPool()->GetCapacity();
-    ASSERT_OK(internal::GetCpuThreadPool()->SetCapacity(1));
-    schema_ = schema({field("i32", int32())});
-    options_ = std::make_shared<ScanOptions>();
-    options_->dataset_schema = schema_;
-    options_->use_threads = true;
-  }
-
-  class NestedParallelismScanTask : public ScanTask {
-   public:
-    explicit NestedParallelismScanTask(std::shared_ptr<ScanTask> target)
-        : ScanTask(target->options(), target->fragment()), target_(std::move(target)) {}
-    virtual ~NestedParallelismScanTask() = default;
-
-    Result<RecordBatchIterator> Execute() override {
-      // We could just return an invalid status here but this way it is easy to verify the
-      // test is checking what it is supposed to be checking by just changing
-      // supports_async() to false (will deadlock)
-      ADD_FAILURE() << "NestedParallelismScanTask::Execute should never be called.  You "
-                       "should be deadlocked right now";
-      ARROW_ASSIGN_OR_RAISE(auto batch_gen, ExecuteAsync(internal::GetCpuThreadPool()));
-      return MakeGeneratorIterator(std::move(batch_gen));
-    }
-
-    Result<RecordBatchGenerator> ExecuteAsync(internal::Executor* cpu_executor) override {
-      ARROW_ASSIGN_OR_RAISE(auto batches_it, target_->Execute());
-      ARROW_ASSIGN_OR_RAISE(auto batches, batches_it.ToVector());
-      auto generator_fut = DeferNotOk(
-          cpu_executor->Submit([batches] { return MakeVectorGenerator(batches); }));
-      return MakeFromFuture(generator_fut);
-    }
-
-    bool supports_async() const override { return true; }
-
-   private:
-    std::shared_ptr<ScanTask> target_;
-  };
-
-  class NestedParallelismFragment : public InMemoryFragment {
-   public:
-    explicit NestedParallelismFragment(RecordBatchVector record_batches,
-                                       Expression expr = literal(true))
-        : InMemoryFragment(std::move(record_batches), std::move(expr)) {}
-
-    Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
-      ARROW_ASSIGN_OR_RAISE(auto scan_task_it, InMemoryFragment::Scan(options));
-      return MakeMaybeMapIterator(
-          [](std::shared_ptr<ScanTask> task) -> Result<std::shared_ptr<ScanTask>> {
-            return std::make_shared<NestedParallelismScanTask>(std::move(task));
-          },
-          std::move(scan_task_it));
-    }
-  };
-
-  class NestedParallelismDataset : public InMemoryDataset {
-   public:
-    NestedParallelismDataset(std::shared_ptr<Schema> sch, RecordBatchVector batches)
-        : InMemoryDataset(std::move(sch), std::move(batches)) {}
-
-   protected:
-    Result<FragmentIterator> GetFragmentsImpl(Expression) override {
-      auto schema = this->schema();
-
-      auto create_fragment =
-          [schema](
-              std::shared_ptr<RecordBatch> batch) -> Result<std::shared_ptr<Fragment>> {
-        RecordBatchVector batches{batch};
-        return std::make_shared<NestedParallelismFragment>(std::move(batches));
-      };
-
-      return MakeMaybeMapIterator(std::move(create_fragment), get_batches_->Get());
-    }
-  };
-
-  class DiscardingRowCountingFileWriteOptions : public FileWriteOptions {
-   public:
-    explicit DiscardingRowCountingFileWriteOptions(
-        std::shared_ptr<std::atomic<int>> row_counter)
-        : FileWriteOptions(
-              std::make_shared<DiscardingRowCountingFormat>(std::move(row_counter))) {}
-  };
-
-  class DiscardingRowCountingFileWriter : public FileWriter {
-   public:
-    explicit DiscardingRowCountingFileWriter(std::shared_ptr<std::atomic<int>> row_count)
-        : FileWriter(NULL, NULL, NULL), row_count_(std::move(row_count)) {}
-    virtual ~DiscardingRowCountingFileWriter() = default;
-
-    Status Write(const std::shared_ptr<RecordBatch>& batch) override {
-      row_count_->fetch_add(static_cast<int>(batch->num_rows()));
-      return Status::OK();
-    }
-    Status Finish() override { return Status::OK(); };
-
-   protected:
-    Status FinishInternal() override { return Status::OK(); };
-
-   private:
-    std::shared_ptr<std::atomic<int>> row_count_;
-  };
-
-  class DiscardingRowCountingFormat : public FileFormat {
-   public:
-    DiscardingRowCountingFormat() : row_count_(std::make_shared<std::atomic<int>>(0)) {}
-    explicit DiscardingRowCountingFormat(std::shared_ptr<std::atomic<int>> row_count)
-        : row_count_(std::move(row_count)) {}
-    virtual ~DiscardingRowCountingFormat() = default;
-
-    std::string type_name() const override { return "discarding-row-counting"; }
-    bool Equals(const FileFormat& other) const override { return true; }
-    Result<bool> IsSupported(const FileSource& source) const override {
-      return Status::NotImplemented("Should not be called");
-    }
-    Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override {
-      return Status::NotImplemented("Should not be called");
-    }
-    Result<ScanTaskIterator> ScanFile(
-        std::shared_ptr<ScanOptions> options,
-        const std::shared_ptr<FileFragment>& file) const override {
-      return Status::NotImplemented("Should not be called");
-    }
-    Result<std::shared_ptr<FileWriter>> MakeWriter(
-        std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-        std::shared_ptr<FileWriteOptions> options) const override {
-      return std::make_shared<DiscardingRowCountingFileWriter>(row_count_);
-    }
-    std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
-
-   private:
-    std::shared_ptr<std::atomic<int>> row_count_;
-  };
-
- protected:
-  int old_capacity_ = 0;
-  std::shared_ptr<Schema> schema_;
-  std::shared_ptr<ScanOptions> options_;
-};
-
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index e45f598019d..4650e806360 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -25,7 +25,6 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
-#include "arrow/util/future.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -208,14 +207,6 @@ class ARROW_EXPORT RecordBatchReader {
   /// \return Status
   virtual Status ReadNext(std::shared_ptr<RecordBatch>* batch) = 0;
 
-  // Fallback to sync implementation until all other readers are converted(ARROW-11770)
-  // and then this could become pure virtual with ReadNext falling back to async impl.
-  virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() {
-    std::shared_ptr<RecordBatch> batch;
-    ARROW_RETURN_NOT_OK(ReadNext(&batch));
-    return Future<std::shared_ptr<RecordBatch>>::MakeFinished(std::move(batch));
-  }
-
   /// \brief Iterator interface
   Result<std::shared_ptr<RecordBatch>> Next() {
     std::shared_ptr<RecordBatch> batch;
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index 21754ec073a..4c8de912f81 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -718,7 +718,7 @@ Future<BreakValueType> Loop(Iterate iterate) {
         return true;
       }
       if (control_res->has_value()) {
-        break_fut.MarkFinished(*std::move(*control_res));
+        break_fut.MarkFinished(**control_res);
         return true;
       }
       return false;
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index c388680befc..fc7dc85b15e 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -106,9 +106,8 @@ class ARROW_EXPORT Executor {
   Future<T> Transfer(Future<T> future) {
     auto transferred = Future<T>::Make();
     auto callback = [this, transferred](const Result<T>& result) mutable {
-      auto spawn_status = Spawn([transferred, result]() mutable {
-        transferred.MarkFinished(std::move(result));
-      });
+      auto spawn_status =
+          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
       if (!spawn_status.ok()) {
         transferred.MarkFinished(spawn_status);
       }
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 192b4b4220d..9943292bf91 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -354,7 +354,6 @@ test_that("IPC/Feather format data", {
 })
 
 test_that("CSV dataset", {
-  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-12181
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")

From b5045ed833aaff35e6c8064ac7d908c19a5f48fa Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 14 Apr 2021 18:29:48 -0400
Subject: [PATCH 036/719] ARROW-12382: [C++] Bundle xsimd if runtime SIMD level
 is set

This should fix nightly Conda builds (or at least let them progress).

Closes #10029 from lidavidm/arrow-12382

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 923e18bafb9..83ea3aa9b96 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1938,8 +1938,8 @@ macro(build_xsimd)
   set(XSIMD_VENDORED TRUE)
 endmacro()
 
-# For now xsimd is always bundled from upstream
-if(NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
+if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
+   OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE"))
   set(xsimd_SOURCE "BUNDLED")
   resolve_dependency(xsimd)
   # TODO: Don't use global includes but rather target_include_directories

From 894fab074ca7cf10af251d4b4217d428596626a2 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 14 Apr 2021 16:32:44 -0700
Subject: [PATCH 037/719] ARROW-11468: [R] Allow user to pass schema to
 read_json_arrow()

A couple of things I wanted to check are expected behaviour:

1. If I specify in the schema that a numeric column should be a string column, I get the error `Error: Invalid: JSON parse error: Column(/third_col) changed from string to number in row 0`
 (e.g. if I run the following)
```
tf <- tempfile()
writeLines('
    { "hello": 3.5, "world": 2, "third_col": 99}
    { "hello": 3.25, "world": 5, "third_col": 98}
    { "hello": 3.125, "world": 8, "third_col": 97 }
    { "hello": 0.0, "world": 10, "third_col": 96}
', tf)
read_json_arrow(tf, schema = schema(third_col = utf8(), world = float64()))
```
2. As can be seen in the tests output (will delete the `print` statements before this is merged), table columns are returned in the order specified in the schema and then the columns not mentioned in the schema.

Closes #9950 from thisisnic/ARROW-11468

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/arrowExports.R           |  8 +++-
 r/R/json.R                   | 16 +++++--
 r/man/read_json_arrow.Rd     | 10 +++-
 r/src/arrowExports.cpp       | 29 +++++++++---
 r/src/json.cpp               | 12 ++++-
 r/tests/testthat/test-json.R | 88 ++++++++++++++++++++++++++++++++++++
 6 files changed, 149 insertions(+), 14 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 9811dc9f8d3..a33cf222fdc 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1052,8 +1052,12 @@ json___ReadOptions__initialize <- function(use_threads, block_size){
     .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-json___ParseOptions__initialize <- function(newlines_in_values){
-    .Call(`_arrow_json___ParseOptions__initialize`, newlines_in_values)
+json___ParseOptions__initialize1 <- function(newlines_in_values){
+    .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+}
+
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){
+    .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
 json___TableReader__Make <- function(input, read_options, parse_options){
diff --git a/r/R/json.R b/r/R/json.R
index cc16774050b..89595a5b0ae 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -20,6 +20,7 @@
 #' Using [JsonTableReader]
 #'
 #' @inheritParams read_delim_arrow
+#' @param schema [Schema] that describes the table.
 #' @param ... Additional options passed to `JsonTableReader$create()`
 #'
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
@@ -38,12 +39,13 @@
 read_json_arrow <- function(file,
                             col_select = NULL,
                             as_data_frame = TRUE,
+                            schema = NULL,
                             ...) {
   if (!inherits(file, "InputStream")) {
     file <- make_readable_file(file)
     on.exit(file$close())
   }
-  tab <- JsonTableReader$create(file, ...)$Read()
+  tab <- JsonTableReader$create(file, schema = schema, ...)$Read()
 
   col_select <- enquo(col_select)
   if (!quo_is_null(col_select)) {
@@ -69,7 +71,8 @@ JsonTableReader <- R6Class("JsonTableReader", inherit = ArrowObject,
 )
 JsonTableReader$create <- function(file,
                                    read_options = JsonReadOptions$create(),
-                                   parse_options = JsonParseOptions$create(),
+                                   parse_options = JsonParseOptions$create(schema = schema),
+                                   schema = NULL,
                                    ...) {
   assert_is(file, "InputStream")
   json___TableReader__Make(file, read_options, parse_options)
@@ -91,6 +94,11 @@ JsonReadOptions$create <- function(use_threads = option_use_threads(), block_siz
 #' @docType class
 #' @export
 JsonParseOptions <- R6Class("JsonParseOptions", inherit = ArrowObject)
-JsonParseOptions$create <- function(newlines_in_values = FALSE) {
-  json___ParseOptions__initialize(newlines_in_values)
+JsonParseOptions$create <- function(newlines_in_values = FALSE, schema = NULL) {
+  if (is.null(schema)) {
+    json___ParseOptions__initialize1(newlines_in_values)
+  } else {
+    json___ParseOptions__initialize2(newlines_in_values, schema)
+  }
+  
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 81118914849..83765b2c51a 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -4,7 +4,13 @@
 \alias{read_json_arrow}
 \title{Read a JSON file}
 \usage{
-read_json_arrow(file, col_select = NULL, as_data_frame = TRUE, ...)
+read_json_arrow(
+  file,
+  col_select = NULL,
+  as_data_frame = TRUE,
+  schema = NULL,
+  ...
+)
 }
 \arguments{
 \item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
@@ -22,6 +28,8 @@ of columns, as used in \code{dplyr::select()}.}
 \item{as_data_frame}{Should the function return a \code{data.frame} (default) or
 an Arrow \link{Table}?}
 
+\item{schema}{\link{Schema} that describes the table.}
+
 \item{...}{Additional options passed to \code{JsonTableReader$create()}}
 }
 \value{
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 4c2ebed670e..6dc50c3af4c 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -4115,16 +4115,32 @@ extern "C" SEXP _arrow_json___ReadOptions__initialize(SEXP use_threads_sexp, SEX
 
 // json.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(bool newlines_in_values);
-extern "C" SEXP _arrow_json___ParseOptions__initialize(SEXP newlines_in_values_sexp){
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize1(bool newlines_in_values);
+extern "C" SEXP _arrow_json___ParseOptions__initialize1(SEXP newlines_in_values_sexp){
 BEGIN_CPP11
 	arrow::r::Input<bool>::type newlines_in_values(newlines_in_values_sexp);
-	return cpp11::as_sexp(json___ParseOptions__initialize(newlines_in_values));
+	return cpp11::as_sexp(json___ParseOptions__initialize1(newlines_in_values));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_json___ParseOptions__initialize(SEXP newlines_in_values_sexp){
-	Rf_error("Cannot call json___ParseOptions__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_json___ParseOptions__initialize1(SEXP newlines_in_values_sexp){
+	Rf_error("Cannot call json___ParseOptions__initialize1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// json.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize2(bool newlines_in_values, const std::shared_ptr<arrow::Schema>& explicit_schema);
+extern "C" SEXP _arrow_json___ParseOptions__initialize2(SEXP newlines_in_values_sexp, SEXP explicit_schema_sexp){
+BEGIN_CPP11
+	arrow::r::Input<bool>::type newlines_in_values(newlines_in_values_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type explicit_schema(explicit_schema_sexp);
+	return cpp11::as_sexp(json___ParseOptions__initialize2(newlines_in_values, explicit_schema));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_json___ParseOptions__initialize2(SEXP newlines_in_values_sexp, SEXP explicit_schema_sexp){
+	Rf_error("Cannot call json___ParseOptions__initialize2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -6835,7 +6851,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, 
 		{ "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, 
 		{ "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, 
-		{ "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, 
+		{ "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1}, 
+		{ "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2}, 
 		{ "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, 
 		{ "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, 
 		{ "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, 
diff --git a/r/src/json.cpp b/r/src/json.cpp
index 87d40623f6b..edc5e075754 100644
--- a/r/src/json.cpp
+++ b/r/src/json.cpp
@@ -31,7 +31,7 @@ std::shared_ptr<arrow::json::ReadOptions> json___ReadOptions__initialize(bool us
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize1(
     bool newlines_in_values) {
   auto res =
       std::make_shared<arrow::json::ParseOptions>(arrow::json::ParseOptions::Defaults());
@@ -39,6 +39,16 @@ std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(
   return res;
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize2(
+    bool newlines_in_values, const std::shared_ptr<arrow::Schema>& explicit_schema) {
+  auto res =
+      std::make_shared<arrow::json::ParseOptions>(arrow::json::ParseOptions::Defaults());
+  res->newlines_in_values = newlines_in_values;
+  res->explicit_schema = explicit_schema;
+  return res;
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::json::TableReader> json___TableReader__Make(
     const std::shared_ptr<arrow::io::InputStream>& input,
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index b0b508bbc4b..ad5ff8a1260 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -86,6 +86,94 @@ test_that("read_json_arrow() supports col_select=", {
   expect_equal(names(tab2), c("hello", "world"))
 })
 
+test_that("read_json_arrow(schema=) with empty schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+  
+  tab1 <- read_json_arrow(tf, schema = schema())
+  
+  expect_identical(
+    tab1, 
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      world = c(2L, 5L, 8L, 10L),
+      third_col = c(99L,98L,97L,96L)
+    )               
+  )
+})
+
+test_that("read_json_arrow(schema=) with partial schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+  
+  tab1 <- read_json_arrow(tf, schema = schema(third_col = float64(), world = float64()))
+  
+  expect_identical(
+    tab1, 
+    tibble::tibble(
+      third_col = c(99,98,97,96),
+      world = c(2, 5, 8, 10),
+      hello = c(3.5, 3.25, 3.125, 0)
+    )               
+  )
+  
+  tf2 <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": "99"}
+    { "hello": 3.25, "world": 5, "third_col": "98"}
+    { "hello": 3.125, "world": 8, "third_col": "97"}
+  ', tf2)
+  
+  tab2 <- read_json_arrow(tf2, schema = schema(third_col = string(), world = float64()))
+  
+  expect_identical(
+    tab2, 
+    tibble::tibble(
+      third_col = c("99","98","97"),
+      world = c(2, 5, 8),
+      hello = c(3.5, 3.25, 3.125)
+    )               
+  )
+})
+
+test_that("read_json_arrow(schema=) with full schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97}
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+  
+  tab1 <- read_json_arrow(
+    tf,
+    schema = schema(
+      hello = float64(),
+      third_col = float64(),
+      world = float64()
+    )
+  )
+  
+  expect_identical(
+    tab1, 
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      third_col = c(99,98,97,96),
+      world = c(2, 5, 8, 10)
+    )               
+  )
+})
+
 test_that("Can read json file with nested columns (ARROW-5503)", {
   tf <- tempfile()
   on.exit(unlink(tf))

From df20489faaf0272e185ac20f39f65c1cb3d01083 Mon Sep 17 00:00:00 2001
From: Mauricio Vargas <mvargas@dcc.uchile.cl>
Date: Wed, 14 Apr 2021 16:35:54 -0700
Subject: [PATCH 038/719] ARROW-12370: [R] Bindings for power kernel

Closes #10020 from pachamaltese/arrow12370

Lead-authored-by: Mauricio Vargas <mvargas@dcc.uchile.cl>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/expression.R                      |  2 +-
 r/tests/testthat/test-compute-arith.R | 27 +++++++++++++++++++++++++++
 r/tests/testthat/test-dplyr-filter.R  | 18 ++++++++++++++++--
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index 1974fc7f59b..b3fdd52a5d0 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -148,7 +148,7 @@ cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
   # we don't actually use divide_checked with `%%`, rather it is rewritten to
   # use %/% above.
   "%%" = "divide_checked",
-  # TODO: "^"  (ARROW-11070)
+  "^" = "power_checked",
   "%in%" = "is_in_meta_binary"
 )
 
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index d3cd2eedf6d..9d146fd04e6 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -81,6 +81,33 @@ test_that("Division", {
   expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
 })
 
+test_that("Power", {
+  a <- Array$create(c(1:4, NA_integer_))
+  b <- a$cast(float64())
+  c <- a$cast(int64())
+  d <- a$cast(uint64())
+
+  expect_equal(a^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(a^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(a^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(a^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(b^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(b^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(b^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(b^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(c^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(c^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(c^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(c^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(d^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(d^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(d^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(d^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+})
+
 test_that("Dates casting", {
   a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
 
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index c4ab042380f..d1bd3cec607 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -115,6 +115,14 @@ test_that("filtering with arithmetic", {
       collect(),
     tbl
   )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(dbl^2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("filtering with expression + autocasting", {
@@ -133,6 +141,14 @@ test_that("filtering with expression + autocasting", {
       collect(),
     tbl
   )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int^2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("More complex select/filter", {
@@ -284,7 +300,6 @@ test_that("filter environment scope", {
   # Also for functions
   # 'could not find function "isEqualTo"' because we haven't defined it yet
   expect_dplyr_error(input %>% filter(isEqualTo(int, 4)), tbl)
-  
 
   skip("Need to substitute in user defined function too")
   # TODO: fix this: this isEqualTo function is eagerly evaluating; it should
@@ -399,5 +414,4 @@ test_that("filter() with .data pronoun", {
       collect(),
     tbl
   )
-  
 })

From c7485b7f7f2e80a61da175a61d7f5d49b982a732 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 14 Apr 2021 16:42:17 -0700
Subject: [PATCH 039/719] ARROW-11477: [R][Doc] Reorganize and improve README
 and vignette content

Closes #10014 from ianmcook/ARROW-11477

Lead-authored-by: Ian Cook <ianmcook@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/README.md             | 405 +++++++++++++++++++++++-----------------
 r/vignettes/arrow.Rmd   |  39 ++++
 r/vignettes/dataset.Rmd | 159 ++++++++--------
 3 files changed, 354 insertions(+), 249 deletions(-)

diff --git a/r/README.md b/r/README.md
index c103000f5f6..b568a362c95 100644
--- a/r/README.md
+++ b/r/README.md
@@ -4,31 +4,57 @@
 [![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amaster+event%3Apush)
 [![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
 
-[Apache Arrow](https://arrow.apache.org/) is a cross-language
-development platform for in-memory data. It specifies a standardized
+**[Apache Arrow](https://arrow.apache.org/) is a cross-language
+development platform for in-memory data.** It specifies a standardized
 language-independent columnar memory format for flat and hierarchical
 data, organized for efficient analytic operations on modern hardware. It
 also provides computational libraries and zero-copy streaming messaging
 and interprocess communication.
 
-The `arrow` package exposes an interface to the Arrow C++ library to
-access many of its features in R. This includes support for analyzing
-large, multi-file datasets (`open_dataset()`), working with individual
-Parquet (`read_parquet()`, `write_parquet()`) and Feather
-(`read_feather()`, `write_feather()`) files, as well as lower-level
-access to Arrow memory and messages.
+**The `arrow` package exposes an interface to the Arrow C++ library,
+enabling access to many of its features in R.** It provides low-level
+access to the Arrow C++ library API and higher-level access through a
+`dplyr` backend and familiar R functions.
+
+## What can the `arrow` package do?
+
+-   Read and write **Parquet files** (`read_parquet()`,
+    `write_parquet()`), an efficient and widely used columnar format
+-   Read and write **Feather files** (`read_feather()`,
+    `write_feather()`), a format optimized for speed and
+    interoperability
+-   Analyze, process, and write **multi-file, larger-than-memory
+    datasets** (`open_dataset()`, `write_dataset()`)
+-   Read **large CSV and JSON files** with excellent **speed and
+    efficiency** (`read_csv_arrow()`, `read_json_arrow()`)
+-   Manipulate and analyze Arrow data with **`dplyr` verbs**
+-   Read and write files in **Amazon S3** buckets with no additional
+    function calls
+-   Exercise **fine control over column types** for seamless
+    interoperability with databases and data warehouse systems
+-   Use **compression codecs** including Snappy, gzip, Brotli,
+    Zstandard, LZ4, LZO, and bzip2 for reading and writing data
+-   Enable **zero-copy data sharing** between **R and Python**
+-   Connect to **Arrow Flight** RPC servers to send and receive large
+    datasets over networks
+-   Access and manipulate Arrow objects through **low-level bindings**
+    to the C++ library
+-   Provide a **toolkit for building connectors** to other applications
+    and services that use Arrow
 
 ## Installation
 
+### Installing the latest release version
+
 Install the latest release of `arrow` from CRAN with
 
-```r
+``` r
 install.packages("arrow")
 ```
 
 Conda users can install `arrow` from conda-forge with
 
-```
+``` shell
 conda install -c conda-forge --strict-channel-priority r-arrow
 ```
 
@@ -36,218 +62,245 @@ Installing a released version of the `arrow` package requires no
 additional system dependencies. For macOS and Windows, CRAN hosts binary
 packages that contain the Arrow C++ library. On Linux, source package
 installation will also build necessary C++ dependencies. For a faster,
-more complete installation, set the environment variable `NOT_CRAN=true`.
-See `vignette("install", package = "arrow")` for details.
+more complete installation, set the environment variable
+`NOT_CRAN=true`. See `vignette("install", package = "arrow")` for
+details.
 
-## Installing a development version
+### Installing a development version
 
-Development versions of the package (binary and source) are built daily and hosted at
-<https://arrow-r-nightly.s3.amazonaws.com>. To install from there:
+Development versions of the package (binary and source) are built
+nightly and hosted at <https://arrow-r-nightly.s3.amazonaws.com>. To
+install from there:
 
 ``` r
 install.packages("arrow", repos = "https://arrow-r-nightly.s3.amazonaws.com")
 ```
 
-Or
-
-```r
-arrow::install_arrow(nightly = TRUE)
-```
-
-Conda users can install `arrow` nightlies from our nightlies channel using:
+Conda users can install `arrow` nightly builds with
 
-```
+``` shell
 conda install -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow
 ```
 
-These daily package builds are not official Apache releases and are not
-recommended for production use. They may be useful for testing bug fixes
-and new features under active development.
+If you already have a version of `arrow` installed, you can switch to
+the latest nightly development version with
 
-## Developing
-
-Windows and macOS users who wish to contribute to the R package and
-don’t need to alter the Arrow C++ library may be able to obtain a
-recent version of the library without building from source. On macOS,
-you may install the C++ library using [Homebrew](https://brew.sh/):
-
-``` shell
-# For the released version:
-brew install apache-arrow
-# Or for a development version, you can try:
-brew install apache-arrow --HEAD
+``` r
+arrow::install_arrow(nightly = TRUE)
 ```
 
-On Windows, you can download a .zip file with the arrow dependencies from the
-[nightly repository](https://arrow-r-nightly.s3.amazonaws.com/libarrow/bin/windows/),
-and then set the `RWINLIB_LOCAL` environment variable to point to that
-zip file before installing the `arrow` R package. Version numbers in that
-repository correspond to dates, and you will likely want the most recent.
+These nightly package builds are not official Apache releases and are
+not recommended for production use. They may be useful for testing bug
+fixes and new features under active development.
 
-If you need to alter both the Arrow C++ library and the R package code,
-or if you can’t get a binary version of the latest C++ library
-elsewhere, you’ll need to build it from source too.
+## Usage
 
-First, install the C++ library. See the [developer
-guide](https://arrow.apache.org/docs/developers/cpp/building.html) for details.
-It's recommended to make a `build` directory inside of the `cpp` directory of
-the Arrow git repository (it is git-ignored). Assuming you are inside `cpp/build`,
-you'll first call `cmake` to configure the build and then `make install`.
-For the R package, you'll need to enable several features in the C++ library
-using `-D` flags:
+Among the many applications of the `arrow` package, two of the most accessible are:
 
-```
-cmake \
-  -DARROW_COMPUTE=ON \
-  -DARROW_CSV=ON \
-  -DARROW_DATASET=ON \
-  -DARROW_FILESYSTEM=ON \
-  -DARROW_JEMALLOC=ON \
-  -DARROW_JSON=ON \
-  -DARROW_PARQUET=ON \
-  -DCMAKE_BUILD_TYPE=release \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  ..
-```
-
-where `..` is the path to the `cpp/` directory when you're in `cpp/build`.
+-   High-performance reading and writing of data files with multiple
+    file formats and compression codecs, including built-in support for
+    cloud storage
+-   Analyzing and manipulating bigger-than-memory data with `dplyr`
+    verbs
 
-To enable optional features including S3 support, an alternative memory allocator, and additional compression libraries, add some or all of these flags:
+The sections below describe these two uses and illustrate them with
+basic examples. The sections below mention two Arrow data structures:
 
-```
-  -DARROW_S3=ON \
-  -DARROW_MIMALLOC=ON \
-  -DARROW_WITH_BROTLI=ON \
-  -DARROW_WITH_BZ2=ON \
-  -DARROW_WITH_LZ4=ON \
-  -DARROW_WITH_SNAPPY=ON \
-  -DARROW_WITH_ZLIB=ON \
-  -DARROW_WITH_ZSTD=ON \
-```
+-   `Table`: a tabular, column-oriented data structure capable of
+    storing and processing large amounts of data more efficiently than
+    R’s built-in `data.frame` and with SQL-like column data types that
+    afford better interoperability with databases and data warehouse
+    systems
+-   `Dataset`: a data structure functionally similar to `Table` but with
+    the capability to work on larger-than-memory data partitioned across
+    multiple files
 
-Other flags that may be useful:
+### Reading and writing data files with `arrow`
 
-* `-DARROW_EXTRA_ERROR_CONTEXT=ON` makes errors coming from the C++ library point to files and line numbers
-* `-DBOOST_SOURCE=BUNDLED`, for example, or any other dependency `*_SOURCE`, if you have a system version of a C++ dependency that doesn't work correctly with Arrow. This tells the build to compile its own version of the dependency from source.
+The `arrow` package provides functions for reading single data files in
+several common formats. By default, calling any of these functions
+returns an R `data.frame`. To return an Arrow `Table`, set argument
+`as_data_frame = FALSE`.
 
-Note that after any change to the C++ library, you must reinstall it and
-run `make clean` or `git clean -fdx .` to remove any cached object code
-in the `r/src/` directory before reinstalling the R package. This is
-only necessary if you make changes to the C++ library source; you do not
-need to manually purge object files if you are only editing R or C++
-code inside `r/`.
+-   `read_parquet()`: read a file in Parquet format
+-   `read_feather()`: read a file in Feather format (the Apache Arrow
+    IPC format)
+-   `read_delim_arrow()`: read a delimited text file (default delimiter
+    is comma)
+-   `read_csv_arrow()`: read a comma-separated values (CSV) file
+-   `read_tsv_arrow()`: read a tab-separated values (TSV) file
+-   `read_json_arrow()`: read a JSON data file
 
-Once you’ve built the C++ library, you can install the R package and its
-dependencies, along with additional dev dependencies, from the git
-checkout:
+For writing data to single files, the `arrow` package provides the
+functions `write_parquet()` and `write_feather()`. These can be used
+with R `data.frame` and Arrow `Table` objects.
 
-``` shell
-cd ../../r
+For example, let’s write the Star Wars characters data that’s included
+in `dplyr` to a Parquet file, then read it back in. Parquet is a popular
+choice for storing analytic data; it is optimized for reduced file sizes
+and fast read performance, especially for column-based access patterns.
+Parquet is widely supported by many tools and platforms.
 
-Rscript -e '
-options(repos = "https://cloud.r-project.org/")
-if (!require("remotes")) install.packages("remotes")
-remotes::install_deps(dependencies = TRUE)
-'
+First load the `arrow` and `dplyr` packages:
 
-R CMD INSTALL .
+``` r
+library(arrow, warn.conflicts = FALSE)
+library(dplyr, warn.conflicts = FALSE)
 ```
 
-If you need to set any compilation flags while building the C++
-extensions, you can use the `ARROW_R_CXXFLAGS` environment variable. For
-example, if you are using `perf` to profile the R extensions, you may
-need to set
+Then write the `data.frame` named `starwars` to a Parquet file at
+`file_path`:
 
-``` shell
-export ARROW_R_CXXFLAGS=-fno-omit-frame-pointer
+``` r
+file_path <- tempfile()
+write_parquet(starwars, file_path)
 ```
 
-If the package fails to install/load with an error like this:
-
-    ** testing if installed package can be loaded from temporary location
-    Error: package or namespace load failed for 'arrow' in dyn.load(file, DLLpath = DLLpath, ...):
-    unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
-    dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
+Then read the Parquet file into an R `data.frame` named `sw`:
 
-ensure that `-DARROW_INSTALL_NAME_RPATH=OFF` was passed (this is important on
-macOS to prevent problems at link time and is a no-op on other platforms).
-Alternativelly, try setting the environment variable `R_LD_LIBRARY_PATH` to
-wherever Arrow C++ was put in `make install`, e.g. `export
-R_LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
-
-When installing from source, if the R and C++ library versions do not
-match, installation may fail. If you’ve previously installed the
-libraries and want to upgrade the R package, you’ll need to update the
-Arrow C++ library first.
-
-For any other build/configuration challenges, see the [C++ developer
-guide](https://arrow.apache.org/docs/developers/cpp/building.html) and
-`vignette("install", package = "arrow")`.
-
-### Editing C++ code
+``` r
+sw <- read_parquet(file_path)
+```
 
-The `arrow` package uses some customized tools on top of `cpp11` to
-prepare its C++ code in `src/`. If you change C++ code in the R package,
-you will need to set the `ARROW_R_DEV` environment variable to `TRUE`
-(optionally, add it to your`~/.Renviron` file to persist across
-sessions) so that the `data-raw/codegen.R` file is used for code
-generation.
+R object attributes are preserved when writing data to Parquet or
+Feather files and when reading those files back into R. This enables
+round-trip writing and reading of `sf::sf` objects, R `data.frame`s with
+with `haven::labelled` columns, and `data.frame`s with other custom
+attributes.
 
-We use Google C++ style in our C++ code. Check for style errors with
+For reading and writing larger files or sets of multiple files, `arrow`
+defines `Dataset` objects and provides the functions `open_dataset()`
+and `write_dataset()`, which enable analysis and processing of
+bigger-than-memory data, including the ability to partition data into
+smaller chunks without loading the full data into memory. For examples
+of these functions, see `vignette("dataset", package = "arrow")`.
 
-    ./lint.sh
+All these functions can read and write files in the local filesystem or
+in Amazon S3 (by passing S3 URIs beginning with `s3://`). For more
+details, see `vignette("fs", package = "arrow")`
 
-Fix any style issues before committing with
+### Using `dplyr` with `arrow`
 
-    ./lint.sh --fix
+The `arrow` package provides a `dplyr` backend enabling manipulation of
+Arrow tabular data with `dplyr` verbs. To use it, first load both
+packages `arrow` and `dplyr`. Then load data into an Arrow `Table` or
+`Dataset` object. For example, read the Parquet file written in the
+previous example into an Arrow `Table` named `sw`:
 
-The lint script requires Python 3 and `clang-format-8`. If the command
-isn’t found, you can explicitly provide the path to it like
-`CLANG_FORMAT=$(which clang-format-8) ./lint.sh`. On macOS, you can get
-this by installing LLVM via Homebrew and running the script as
-`CLANG_FORMAT=$(brew --prefix llvm@8)/bin/clang-format ./lint.sh`
+``` r
+sw <- read_parquet(file_path, as_data_frame = FALSE)
+```
 
-### Running tests
+Next, pipe on `dplyr` verbs:
 
-Some tests are conditionally enabled based on the availability of certain
-features in the package build (S3 support, compression libraries, etc.).
-Others are generally skipped by default but can be enabled with environment
-variables or other settings:
+``` r
+result <- sw %>%
+  filter(homeworld == "Tatooine") %>%
+  rename(height_cm = height, mass_kg = mass) %>%
+  mutate(height_in = height_cm / 2.54, mass_lbs = mass_kg * 2.2046) %>%
+  arrange(desc(birth_year)) %>%
+  select(name, height_in, mass_lbs)
+```
 
-* All tests are skipped on Linux if the package builds without the C++ libarrow.
-  To make the build fail if libarrow is not available (as in, to test that
-  the C++ build was successful), set `TEST_R_WITH_ARROW=TRUE`
-* Some tests are disabled unless `ARROW_R_DEV=TRUE`
-* Tests that require allocating >2GB of memory to test Large types are disabled
-  unless `ARROW_LARGE_MEMORY_TESTS=TRUE`
-* Integration tests against a real S3 bucket are disabled unless credentials
-  are set in `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`; these are available
-  on request
-* S3 tests using [MinIO](https://min.io/) locally are enabled if the
-  `minio server` process is found running. If you're running MinIO with custom
-  settings, you can set `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, and
-  `MINIO_PORT` to override the defaults.
+The `arrow` package uses lazy evaluation to delay computation until the
+result is required. This speeds up processing by enabling the Arrow C++
+library to perform multiple computations in one operation. `result` is
+an object with class `arrow_dplyr_query` which represents all the
+computations to be performed:
 
-### Useful functions
+``` r
+result
+#> Table (query)
+#> name: string
+#> height_in: expr
+#> mass_lbs: expr
+#>
+#> * Filter: equal(homeworld, "Tatooine")
+#> * Sorted by birth_year [desc]
+#> See $.data for the source Arrow object
+```
 
-Within an R session, these can help with package development:
+To perform these computations and materialize the result, call
+`compute()` or `collect()`. `compute()` returns an Arrow `Table`,
+suitable for passing to other `arrow` or `dplyr` functions:
 
 ``` r
-devtools::load_all() # Load the dev package
-devtools::test(filter="^regexp$") # Run the test suite, optionally filtering file names
-devtools::document() # Update roxygen documentation
-pkgdown::build_site() # To preview the documentation website
-devtools::check() # All package checks; see also below
-covr::package_coverage() # See test coverage statistics
+result %>% compute()
+#> Table
+#> 10 rows x 3 columns
+#> $name <string>
+#> $height_in <double>
+#> $mass_lbs <double>
 ```
 
-Any of those can be run from the command line by wrapping them in `R -e
-'$COMMAND'`. There’s also a `Makefile` to help with some common tasks
-from the command line (`make test`, `make doc`, `make clean`, etc.)
-
-### Full package validation
+`collect()` returns an R `data.frame`, suitable for viewing or passing
+to other R functions for analysis or visualization:
 
-``` shell
-R CMD build .
-R CMD check arrow_*.tar.gz --as-cran
+``` r
+result %>% collect()
+#> # A tibble: 10 x 3
+#>    name               height_in mass_lbs
+#>    <chr>                  <dbl>    <dbl>
+#>  1 C-3PO                   65.7    165.
+#>  2 Cliegg Lars             72.0     NA  
+#>  3 Shmi Skywalker          64.2     NA  
+#>  4 Owen Lars               70.1    265.
+#>  5 Beru Whitesun lars      65.0    165.
+#>  6 Darth Vader             79.5    300.
+#>  7 Anakin Skywalker        74.0    185.
+#>  8 Biggs Darklighter       72.0    185.
+#>  9 Luke Skywalker          67.7    170.
+#> 10 R5-D4                   38.2     70.5
 ```
+
+The `arrow` package works with most single-table `dplyr` verbs except those that
+compute aggregates, such as `summarise()` and `mutate()` after
+`group_by()`. Inside `dplyr` verbs, Arrow offers support for many
+functions and operators, with common functions mapped to their base R and
+tidyverse equivalents. The
+[changelog](https://arrow.apache.org/docs/r/news/index.html) lists many of them.
+If there are additional functions you would
+like to see implemented, please file an issue as described in the
+[Getting help](#getting-help) section below.
+
+For `dplyr` queries on `Table` objects, if the `arrow` package detects
+an unimplemented function within a `dplyr` verb, it automatically calls
+`collect()` to return the data as an R `data.frame` before processing
+that `dplyr` verb. For queries on `Dataset` objects (which can be larger
+than memory), it raises an error if the function is unimplemented;
+you need to explicitly tell it to `collect()`.
+
+### Additional features
+
+Other applications of `arrow` are described in the following vignettes:
+
+-   `vignette("python", package = "arrow")`: use `arrow` and
+    `reticulate` to pass data between R and Python
+-   `vignette("flight", package = "arrow")`: connect to Arrow Flight RPC
+    servers to send and receive data
+-   `vignette("arrow", package = "arrow")`: access and manipulate Arrow
+    objects through low-level bindings to the C++ library
+
+## Getting help
+
+If you encounter a bug, please file an issue with a minimal reproducible
+example on the [Apache Jira issue
+tracker](https://issues.apache.org/jira/projects/ARROW/issues). Create
+an account or log in, then click **Create** to file an issue. Select the
+project **Apache Arrow (ARROW)**, select the component **R**, and begin
+the issue summary with **`[R]`** followed by a space. For more
+information, see the **Report bugs and propose features** section of the
+[Contributing to Apache
+Arrow](https://arrow.apache.org/docs/developers/contributing.html) page
+in the Arrow developer documentation.
+
+We welcome questions, discussion, and contributions from users of the
+`arrow` package. For information about mailing lists and other venues
+for engaging with the Arrow developer and user communities, please see
+the [Apache Arrow Community](https://arrow.apache.org/community/) page.
+
+------------------------------------------------------------------------
+
+All participation in the Apache Arrow project is governed by the Apache
+Software Foundation’s [code of
+conduct](https://www.apache.org/foundation/policies/conduct.html).
diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd
index e38296828fb..21cbbe48d61 100644
--- a/r/vignettes/arrow.Rmd
+++ b/r/vignettes/arrow.Rmd
@@ -72,6 +72,45 @@ to other applications and services that use Arrow. One example is Spark: the
 move data to and from Spark, yielding [significant performance
 gains](http://arrow.apache.org/blog/2019/01/25/r-spark-improvements/).
 
+# Object hierarchy
+
+## Metadata objects
+
+Arrow defines the following classes for representing metadata:
+
+| Class      | Description                                        | How to create an instance        |
+| ---------- | -------------------------------------------------- | -------------------------------- |
+| `DataType` | attribute controlling how values are represented   | functions in `help("data-type")` |
+| `Field`    | a character string name and a `DataType`           | `field(name, type)`              |
+| `Schema`   | list of `Field`s                                   | `schema(...)`                    |
+
+## Data objects
+
+Arrow defines the following classes for representing zero-dimensional (scalar),
+one-dimensional (array/vector-like), and two-dimensional (tabular/data
+frame-like) data:
+
+| Dim | Class          | Description                               | How to create an instance                                                  |
+| --- | -------------- | ----------------------------------------- | -------------------------------------------------------------------------- |
+| 0   | `Scalar`       | single value and its `DataType`           | `Scalar$create(value, type)`                                               |
+| 1   | `Array`        | vector of values and its `DataType`       | `Array$create(vector, type)`                                               | 
+| 1   | `ChunkedArray` | vectors of values and their `DataType`    | `ChunkedArray$create(..., type)` or alias `chunked_array(..., type)`       |
+| 2   | `RecordBatch`  | list of `Array`s with a `Schema`          | `RecordBatch$create(...)` or alias `record_batch(...)`                     |
+| 2   | `Table`        | list of `ChunkedArray` with a `Schema`    | `Table$create(...)` or `arrow::read_*(file, as_data_frame = FALSE)`        |
+| 2   | `Dataset`      | list of `Table`s  with the same `Schema`  | `Dataset$create(sources, schema)` or alias `open_dataset(sources, schema)` |
+
+Each of these is defined as an `R6` class in the `arrow` R package and
+corresponds to a class of the same name in the Arrow C++ library. The `arrow`
+package provides a variety of `R6` and S3 methods for interacting with instances
+of these classes.
+
+For convenience, the `arrow package also defines several synthetic classes that
+do not exist in the C++ library, including:
+
+* `ArrowDatum`: inherited by `Scalar`, `Array`, and `ChunkedArray`
+* `ArrowTabular`: inherited by `RecordBatch` and `Table`
+* `ArrowObject`: inherited by all Arrow objects
+
 # Internals
 
 ## Mapping of R <--> Arrow types
diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd
index 32389b95162..b5e17578b29 100644
--- a/r/vignettes/dataset.Rmd
+++ b/r/vignettes/dataset.Rmd
@@ -20,11 +20,11 @@ and what is on the immediate development roadmap.
 The [New York City taxi trip record data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
 is widely used in big data exercises and competitions.
 For demonstration purposes, we have hosted a Parquet-formatted version
-of about 10 years of the trip data in a public AWS S3 bucket.
+of about 10 years of the trip data in a public Amazon S3 bucket.
 
-The total file size is around 37 gigabytes, even in the efficient Parquet file format.
-That's bigger than memory on most people's computers,
-so we can't just read it all in and stack it into a single data frame.
+The total file size is around 37 gigabytes, even in the efficient Parquet file
+format. That's bigger than memory on most people's computers, so we can't just
+read it all in and stack it into a single data frame.
 
 In Windows and macOS binary packages, S3 support is included.
 On Linux when installing from source, S3 support is not enabled by default,
@@ -102,11 +102,11 @@ ds <- open_dataset("nyc-taxi", partitioning = c("year", "month"))
 
 The default file format for `open_dataset()` is Parquet; if we had a directory
 of Arrow format files, we could include `format = "arrow"` in the call.
-Other supported formats include: "feather" (an alias for "arrow", as Feather v2
-is the Arrow file format), "csv", "tsv" (for tab-delimited), and "text" for
-generic text-delimited files. For text files, you can pass any parsing options
-("delim", "quote", etc.) to `open_dataset()` that you would otherwise pass to
-`read_csv_arrow()`.
+Other supported formats include: `"feather"` (an alias for `"arrow"`, as Feather
+v2 is the Arrow file format), `"csv"`, `"tsv"` (for tab-delimited), and `"text"`
+for generic text-delimited files. For text files, you can pass any parsing
+options (`delim`, `quote`, etc.) to `open_dataset()` that you would otherwise
+pass to `read_csv_arrow()`.
 
 The `partitioning` argument lets us specify how the file paths provide information
 about how the dataset is chunked into different files. Our files in this example
@@ -119,12 +119,12 @@ have file paths like
 ```
 
 By providing a character vector to `partitioning`, we're saying that the first
-path segment gives the value for "year" and the second segment is "month".
-Every row in `2009/01/data.parquet` has a value of 2009 for "year"
-and 1 for "month", even though those columns may not actually be present in the file.
+path segment gives the value for `year` and the second segment is `month`.
+Every row in `2009/01/data.parquet` has a value of 2009 for `year`
+and 1 for `month`, even though those columns may not actually be present in the file.
 
 Indeed, when we look at the dataset, we see that in addition to the columns present
-in every file, there are also columns "year" and "month".
+in every file, there are also columns `year` and `month`.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds
@@ -139,7 +139,7 @@ passenger_count: int8
 trip_distance: float
 pickup_longitude: float
 pickup_latitude: float
-rate_code_id: string
+rate_code_id: null
 store_and_fwd_flag: string
 dropoff_longitude: float
 dropoff_latitude: float
@@ -150,10 +150,6 @@ mta_tax: float
 tip_amount: float
 tolls_amount: float
 total_amount: float
-improvement_surcharge: float
-pickup_location_id: int32
-dropoff_location_id: int32
-congestion_surcharge: float
 year: int32
 month: int32
 
@@ -182,16 +178,16 @@ files, we've parsed file paths to identify partitions, and we've read the
 headers of the Parquet files to inspect their schemas so that we can make sure
 they all line up.
 
-In the current release, `arrow` supports methods for selecting a window of data:
-`select()`, `rename()`, and `filter()`. Aggregation is not yet supported,
-nor is deriving or projecting new columns, so before you call `summarize()` or
-`mutate()`, you'll need to `collect()` the data first,
-which pulls your selected window of data into an in-memory R data frame.
-While we could have made those methods `collect()` the data they needed
-automatically and invisibly to the end user,
-we thought it best to make it explicit when you're pulling data into memory
-so that you can construct your queries most efficiently
-and not be surprised when some query consumes way more resources than expected.
+In the current release, `arrow` supports the dplyr verbs `mutate()`, 
+`transmute()`, `select()`, `rename()`, `relocate()`, `filter()`, and 
+`arrange()`. Aggregation is not yet supported, so before you call `summarise()`
+or other verbs with aggregate functions, use `collect()` to pull the selected
+subset of the data into an in-memory R data frame.
+
+If you attempt to call unsupported `dplyr` verbs or unimplemented functions in
+your query on an Arrow Dataset, the `arrow` package raises an error. However,
+for `dplyr` queries on `Table` objects (which are typically smaller in size) the
+package automatically calls `collect()` before processing that `dplyr` verb.
 
 Here's an example. Suppose I was curious about tipping behavior among the
 longest taxi rides. Let's find the median tip percentage for rides with
@@ -201,10 +197,11 @@ fares greater than $100 in 2015, broken down by the number of passengers:
 system.time(ds %>%
   filter(total_amount > 100, year == 2015) %>%
   select(tip_amount, total_amount, passenger_count) %>%
+  mutate(tip_pct = 100 * tip_amount / total_amount) %>%
   group_by(passenger_count) %>%
   collect() %>%
-  summarize(
-    tip_pct = median(100 * tip_amount / total_amount),
+  summarise(
+    median_tip_pct = median(tip_pct),
     n = n()
   ) %>%
   print())
@@ -213,34 +210,38 @@ system.time(ds %>%
 ```{r, echo = FALSE, eval = !file.exists("nyc-taxi")}
 cat("
 # A tibble: 10 x 3
-   passenger_count tip_pct      n
-             <int>   <dbl>  <int>
- 1               0    9.84    380
- 2               1   16.7  143087
- 3               2   16.6   34418
- 4               3   14.4    8922
- 5               4   11.4    4771
- 6               5   16.7    5806
- 7               6   16.7    3338
- 8               7   16.7      11
- 9               8   16.7      32
-10               9   16.7      42
+   passenger_count median_tip_pct      n
+             <int>          <dbl>  <int>
+ 1               0           9.84    380
+ 2               1          16.7  143087
+ 3               2          16.6   34418
+ 4               3          14.4    8922
+ 5               4          11.4    4771
+ 6               5          16.7    5806
+ 7               6          16.7    3338
+ 8               7          16.7      11
+ 9               8          16.7      32
+10               9          16.7      42
 
    user  system elapsed
   4.436   1.012   1.402
 ")
 ```
 
-We just selected a window out of a dataset with around 2 billion rows
-and aggregated on it in under 2 seconds on my laptop. How does this work?
+We just selected a subset out of a dataset with around 2 billion rows, computed
+a new column, and aggregated on it in under 2 seconds on my laptop. How does
+this work?
 
-First, `select()`/`rename()`, `filter()`, and `group_by()`
-record their actions but don't evaluate on the data until you run `collect()`.
+First, 
+`mutate()`/`transmute()`, `select()`/`rename()`/`relocate()`, `filter()`, 
+`group_by()`, and `arrange()` record their actions but don't evaluate on the
+data until you run `collect()`.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds %>%
   filter(total_amount > 100, year == 2015) %>%
   select(tip_amount, total_amount, passenger_count) %>%
+  mutate(tip_pct = 100 * tip_amount / total_amount) %>%
   group_by(passenger_count)
 ```
 
@@ -250,21 +251,22 @@ FileSystemDataset (query)
 tip_amount: float
 total_amount: float
 passenger_count: int8
+tip_pct: expr
 
-* Filter: ((total_amount > 100:double) and (year == 2015:double))
+* Filter: ((total_amount > 100) and (year == 2015))
 * Grouped by passenger_count
 See $.data for the source Arrow object
 ")
 ```
 
-This returns instantly and shows the window selection you've made, without
+This returns instantly and shows the manipulations you've made, without
 loading data from the files. Because the evaluation of these queries is deferred,
-you can build up a query that selects down to a small window without generating
+you can build up a query that selects down to a small subset without generating
 intermediate datasets that would potentially be large.
 
 Second, all work is pushed down to the individual data files,
 and depending on the file format, chunks of data within the files. As a result,
-we can select a window of data from a much larger dataset by collecting the
+we can select a subset of data from a much larger dataset by collecting the
 smaller slices from each file--we don't have to load the whole dataset in memory
 in order to slice from it.
 
@@ -278,9 +280,17 @@ avoid scanning because they have no rows where `total_amount > 100`.
 ## More dataset options
 
 There are a few ways you can control the Dataset creation to adapt to special use cases.
-For one, you can specify a `schema` argument to declare the columns and their data types.
-This is useful if you have data files that have different storage schema
-(for example, a column could be `int32` in one and `int8` in another)
+For one, if you are working with a single file or a set of files that are not
+all in the same directory, you can provide a file path or a vector of multiple
+file paths to `open_dataset()`. This is useful if, for example, you have a
+single CSV file that is too big to read into memory. You could pass the file
+path to `open_dataset()`, use `group_by()` to partition the Dataset into
+manageable chunks, then use `write_dataset()` to write each chunk to a separate
+Parquet file---all without needing to read the full CSV file into R.
+
+You can specify a `schema` argument to `open_dataset()` to declare the columns
+and their data types. This is useful if you have data files that have different
+storage schema (for example, a column could be `int32` in one and `int8` in another)
 and you want to ensure that the resulting Dataset has a specific type.
 To be clear, it's not necessary to specify a schema, even in this example of
 mixed integer types, because the Dataset constructor will reconcile differences like these.
@@ -289,7 +299,7 @@ The schema specification just lets you declare what you want the result to be.
 Similarly, you can provide a Schema in the `partitioning` argument of `open_dataset()`
 in order to declare the types of the virtual columns that define the partitions.
 This would be useful, in our taxi dataset example, if you wanted to keep
-"month" as a string instead of an integer for some reason.
+`month` as a string instead of an integer for some reason.
 
 Another feature of Datasets is that they can be composed of multiple data sources.
 That is, you may have a directory of partitioned Parquet files in one location,
@@ -322,9 +332,10 @@ by calling `write_dataset()` on it:
 write_dataset(ds, "nyc-taxi/feather", format = "feather")
 ```
 
-Next, let's imagine that the "payment_type" column is something we often filter on,
-so we want to partition the data by that variable. By doing so we ensure that a filter like
-`payment_type == 3` will touch only a subset of files where payment_type is always 3.
+Next, let's imagine that the `payment_type` column is something we often filter
+on, so we want to partition the data by that variable. By doing so we ensure
+that a filter like `payment_type == "Cash"` will touch only a subset of files
+where `payment_type` is always `"Cash"`.
 
 One natural way to express the columns you want to partition on is to use the
 `group_by()` method:
@@ -339,33 +350,35 @@ This will write files to a directory tree that looks like this:
 
 ```r
 system("tree nyc-taxi/feather")
+```
 
-# feather
-# ├── payment_type=1
-# │   └── part-5.feather
-# ├── payment_type=2
-# │   └── part-0.feather
-# ...
-# └── payment_type=5
-#     └── part-2.feather
-#
-# 5 directories, 25 files
+```
+## feather
+## ├── payment_type=1
+## │   └── part-18.feather
+## ├── payment_type=2
+## │   └── part-19.feather
+## ...
+## └── payment_type=UNK
+##     └── part-17.feather
+##
+## 18 directories, 23 files
 ```
 
-Note that the directory names are `payment_type=1` and similar:
+Note that the directory names are `payment_type=Cash` and similar:
 this is the Hive-style partitioning described above. This means that when
 we call `open_dataset()` on this directory, we don't have to declare what the
 partitions are because they can be read from the file paths.
-(To instead write bare values for partition segments,
-i.e. `1` rather than `payment_type=1`, call `write_dataset()` with `hive_style = FALSE`.)
+(To instead write bare values for partition segments, i.e. `Cash` rather than 
+`payment_type=Cash`, call `write_dataset()` with `hive_style = FALSE`.)
 
-Perhaps, though, `payment_type == 3` is the only data we ever care about,
+Perhaps, though, `payment_type == "Cash"` is the only data we ever care about,
 and we just want to drop the rest and have a smaller working set.
 For this, we can `filter()` them out when writing:
 
 ```r
 ds %>%
-  filter(payment_type == 3) %>%
+  filter(payment_type == "Cash") %>%
   write_dataset("nyc-taxi/feather", format = "feather")
 ```
 
@@ -381,4 +394,4 @@ ds %>%
 ```
 
 Note that while you can select a subset of columns,
-you cannot currently rename columns when writing.
+you cannot currently rename columns when writing a dataset.

From 579429002535f9948ce2d50e508e0f88f397ac65 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 15 Apr 2021 09:17:10 +0900
Subject: [PATCH 040/719] ARROW-12273: [JS] [Rust] Remove coveralls

https://coveralls.io/github/apache/arrow is not being updated anymore.

Closes #10036 from domoritz/coveralls

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/README.md         |  1 -
 js/package.json      |  1 -
 js/yarn.lock         | 31 ++-----------------------------
 rust/arrow/README.md |  2 --
 4 files changed, 2 insertions(+), 33 deletions(-)

diff --git a/js/README.md b/js/README.md
index e799362d966..586eceaf7e7 100644
--- a/js/README.md
+++ b/js/README.md
@@ -21,7 +21,6 @@
 
 [![npm version](https://img.shields.io/npm/v/apache-arrow.svg)](https://www.npmjs.com/package/apache-arrow)
 [![Build Status](https://travis-ci.org/apache/arrow.svg?branch=master)](https://travis-ci.org/apache/arrow)
-[![Coverage Status](https://coveralls.io/repos/github/apache/arrow/badge.svg)](https://coveralls.io/github/apache/arrow)
 
 Arrow is a set of technologies that enable big data systems to process and transfer data quickly.
 
diff --git a/js/package.json b/js/package.json
index 84d5ba195d8..4cbdffd085a 100644
--- a/js/package.json
+++ b/js/package.json
@@ -71,7 +71,6 @@
     "@typescript-eslint/parser": "^4.21.0",
     "async-done": "1.3.1",
     "benchmark": "2.1.4",
-    "coveralls": "3.0.3",
     "cpy": "^8.1.2",
     "del-cli": "3.0.1",
     "eslint": "^7.24.0",
diff --git a/js/yarn.lock b/js/yarn.lock
index 8de721bb6bd..f889f9a5f62 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -3419,18 +3419,6 @@ cosmiconfig@^5.1.0:
     js-yaml "^3.13.1"
     parse-json "^4.0.0"
 
-coveralls@3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/coveralls/-/coveralls-3.0.3.tgz#83b1c64aea1c6afa69beaf50b55ac1bc4d13e2b8"
-  integrity sha512-viNfeGlda2zJr8Gj1zqXpDMRjw9uM54p7wzZdvLRyOgnAfCe974Dq4veZkjJdxQXbmdppu6flEajFYseHYaUhg==
-  dependencies:
-    growl "~> 1.10.0"
-    js-yaml "^3.11.0"
-    lcov-parse "^0.0.10"
-    log-driver "^1.2.7"
-    minimist "^1.2.0"
-    request "^2.86.0"
-
 cp-file@^7.0.0:
   version "7.0.0"
   resolved "https://registry.yarnpkg.com/cp-file/-/cp-file-7.0.0.tgz#b9454cfd07fe3b974ab9ea0e5f29655791a9b8cd"
@@ -5102,11 +5090,6 @@ graceful-fs@4.X, graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, g
   resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee"
   integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==
 
-"growl@~> 1.10.0":
-  version "1.10.5"
-  resolved "https://registry.yarnpkg.com/growl/-/growl-1.10.5.tgz#f2735dc2283674fa67478b10181059355c369e5e"
-  integrity sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==
-
 growly@^1.3.0:
   version "1.3.0"
   resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081"
@@ -6446,7 +6429,7 @@ js-tokens@^4.0.0:
   resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
   integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==
 
-js-yaml@^3.11.0, js-yaml@^3.13.1:
+js-yaml@^3.13.1:
   version "3.14.1"
   resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537"
   integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==
@@ -6647,11 +6630,6 @@ lcid@^1.0.0:
   dependencies:
     invert-kv "^1.0.0"
 
-lcov-parse@^0.0.10:
-  version "0.0.10"
-  resolved "https://registry.yarnpkg.com/lcov-parse/-/lcov-parse-0.0.10.tgz#1b0b8ff9ac9c7889250582b70b71315d9da6d9a3"
-  integrity sha1-GwuP+ayceIklBYK3C3ExXZ2m2aM=
-
 lead@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/lead/-/lead-1.0.0.tgz#6f14f99a37be3a9dd784f5495690e5903466ee42"
@@ -6872,11 +6850,6 @@ lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
 
-log-driver@^1.2.7:
-  version "1.2.7"
-  resolved "https://registry.yarnpkg.com/log-driver/-/log-driver-1.2.7.tgz#63b95021f0702fedfa2c9bb0a24e7797d71871d8"
-  integrity sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg==
-
 loud-rejection@^1.0.0:
   version "1.6.0"
   resolved "https://registry.yarnpkg.com/loud-rejection/-/loud-rejection-1.6.0.tgz#5b46f80147edee578870f086d04821cf998e551f"
@@ -8840,7 +8813,7 @@ request-promise-native@^1.0.9:
     stealthy-require "^1.1.1"
     tough-cookie "^2.3.3"
 
-request@^2.86.0, request@^2.88.0, request@^2.88.2:
+request@^2.88.0, request@^2.88.2:
   version "2.88.2"
   resolved "https://registry.yarnpkg.com/request/-/request-2.88.2.tgz#d73c918731cb5a87da047e207234146f664d12b3"
   integrity sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==
diff --git a/rust/arrow/README.md b/rust/arrow/README.md
index ad085246a9f..54f00a492b6 100644
--- a/rust/arrow/README.md
+++ b/rust/arrow/README.md
@@ -19,8 +19,6 @@
 
 # Native Rust implementation of Apache Arrow
 
-[![Coverage Status](https://coveralls.io/repos/github/apache/arrow/badge.svg)](https://coveralls.io/github/apache/arrow)
-
 This crate contains a native Rust implementation of the [Arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html).
 
 ## Developer's guide

From 02cdeaba23850e4b973051fc9e31eba0d9e93917 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 15 Apr 2021 09:25:13 +0900
Subject: [PATCH 041/719] ARROW-12376: [Dev] Log traceback for unexpected
 exceptions in archery trigger-bot

Some earlier attempts at invoking Crossbow failed and the reason wasn't clear. This logs full tracebacks for unexpected exceptions to make it easier to tell what happened.

Example where we ran into a mysterious error: https://github.com/apache/arrow/pull/9941#issuecomment-819363464

Closes #10037 from lidavidm/arrow-12376

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/archery/archery/bot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 27cf2470907..c69cf9112da 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -146,7 +146,7 @@ def handle_issue_comment(self, command, payload):
             logger.error(e)
             pull.create_issue_comment("```\n{}\n```".format(e.message))
         except Exception as e:
-            logger.error(e)
+            logger.exception(e)
             comment.create_reaction('-1')
         else:
             comment.create_reaction('+1')

From d575858e8ff847e8d4e7b6c121e8041cbe2b93c2 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 14 Apr 2021 21:09:14 -0400
Subject: [PATCH 042/719] ARROW-11797: [C++][Dataset] Provide batch stream
 Scanner methods

Closes #9589 from bkietz/11797-Provide-Scanner-methods-t

Lead-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Co-authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/file_base.cc           |  16 ++
 cpp/src/arrow/dataset/file_csv_test.cc       |  16 +-
 cpp/src/arrow/dataset/scanner.cc             | 264 ++++++++++++++++---
 cpp/src/arrow/dataset/scanner.h              |  15 +-
 cpp/src/arrow/dataset/scanner_test.cc        | 148 ++++++++++-
 cpp/src/arrow/dataset/test_util.h            |   9 +-
 cpp/src/jni/dataset/jni_wrapper.cc           |  44 +---
 python/pyarrow/_dataset.pyx                  | 104 +++++++-
 python/pyarrow/dataset.py                    |   1 +
 python/pyarrow/includes/libarrow_dataset.pxd |   9 +
 python/pyarrow/tests/test_dataset.py         |  56 ++--
 r/R/arrowExports.R                           |   8 +
 r/R/dataset-scan.R                           |  24 +-
 r/R/dataset.R                                |  33 +--
 r/src/arrowExports.cpp                       |  33 +++
 r/src/dataset.cpp                            |  50 +++-
 r/tests/testthat/test-dataset.R              |  15 ++
 17 files changed, 675 insertions(+), 170 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index f4a3a0bc9f8..c3b4433b6de 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -460,9 +460,25 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   //
   // NB: neither of these will have any impact whatsoever on the common case of writing
   //     an in-memory table to disk.
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+
+  // TODO: (ARROW-11782/ARROW-12288) Remove calls to Scan()
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, scanner->Scan());
   ARROW_ASSIGN_OR_RAISE(ScanTaskVector scan_tasks, scan_task_it.ToVector());
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
   WriteState state(write_options);
   auto res = internal::RunSynchronously<arrow::detail::Empty>(
       [&](internal::Executor* cpu_executor) -> Future<> {
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 99ca7cc0f42..0ae6fa532ca 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -270,17 +270,11 @@ N/A,bar
   ASSERT_OK(builder.Project({"str"}));
   ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
 
-  ASSERT_OK_AND_ASSIGN(auto scan_task_it, scanner->Scan());
-  for (auto maybe_scan_task : scan_task_it) {
-    ASSERT_OK_AND_ASSIGN(auto scan_task, maybe_scan_task);
-    ASSERT_OK_AND_ASSIGN(auto batch_it, scan_task->Execute());
-    for (auto maybe_batch : batch_it) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      // Run through the scan checking for errors to ensure that "f64" is read with the
-      // specified type and does not revert to the inferred type (if it reverts to
-      // inferring float64 then evaluation of the comparison expression should break)
-    }
-  }
+  ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+  // Run through the scan checking for errors to ensure that "f64" is read with the
+  // specified type and does not revert to the inferred type (if it reverts to
+  // inferring float64 then evaluation of the comparison expression should break)
+  ASSERT_OK(batch_it.Visit([](TaggedRecordBatch) { return Status::OK(); }));
 }
 
 INSTANTIATE_TEST_SUITE_P(TestUncompressedCsv, TestCsvFileFormat,
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 52eebfeb29e..5095c2e8ad6 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -18,10 +18,15 @@
 #include "arrow/dataset/scanner.h"
 
 #include <algorithm>
+#include <condition_variable>
 #include <memory>
 #include <mutex>
+#include <sstream>
 
+#include "arrow/array/array_primitive.h"
 #include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/scanner_internal.h"
@@ -132,37 +137,124 @@ Result<EnumeratedRecordBatchIterator> Scanner::AddPositioningToInOrderScan(
       EnumeratingIterator{std::make_shared<State>(std::move(scan), std::move(first))});
 }
 
-Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
-  // TODO(ARROW-11797) Provide a better implementation that does readahead.  Also, add
-  // unit testing
-  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, Scan());
-  struct BatchIter {
-    explicit BatchIter(ScanTaskIterator scan_task_it)
-        : scan_task_it(std::move(scan_task_it)) {}
-
-    Result<TaggedRecordBatch> Next() {
-      while (true) {
-        if (current_task == nullptr) {
-          ARROW_ASSIGN_OR_RAISE(current_task, scan_task_it.Next());
-          if (IsIterationEnd<std::shared_ptr<ScanTask>>(current_task)) {
-            return IterationEnd<TaggedRecordBatch>();
-          }
-          ARROW_ASSIGN_OR_RAISE(batch_it, current_task->Execute());
-        }
-        ARROW_ASSIGN_OR_RAISE(auto next, batch_it.Next());
-        if (IsIterationEnd<std::shared_ptr<RecordBatch>>(next)) {
-          current_task = nullptr;
-        } else {
-          return TaggedRecordBatch{next, current_task->fragment()};
-        }
+struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState> {
+  explicit ScanBatchesState(ScanTaskIterator scan_task_it,
+                            std::shared_ptr<TaskGroup> task_group_)
+      : scan_tasks(std::move(scan_task_it)), task_group(std::move(task_group_)) {}
+
+  void ResizeBatches(size_t task_index) {
+    if (task_batches.size() <= task_index) {
+      task_batches.resize(task_index + 1);
+      task_drained.resize(task_index + 1);
+    }
+  }
+
+  void Push(TaggedRecordBatch batch, size_t task_index) {
+    {
+      std::lock_guard<std::mutex> lock(mutex);
+      ResizeBatches(task_index);
+      task_batches[task_index].push_back(std::move(batch));
+    }
+    ready.notify_one();
+  }
+
+  Status Finish(size_t task_index) {
+    {
+      std::lock_guard<std::mutex> lock(mutex);
+      ResizeBatches(task_index);
+      task_drained[task_index] = true;
+    }
+    ready.notify_one();
+    return Status::OK();
+  }
+
+  void PushScanTask() {
+    if (no_more_tasks) return;
+    std::unique_lock<std::mutex> lock(mutex);
+    auto maybe_task = scan_tasks.Next();
+    if (!maybe_task.ok()) {
+      no_more_tasks = true;
+      iteration_error = maybe_task.status();
+      return;
+    }
+    auto scan_task = maybe_task.ValueOrDie();
+    if (IsIterationEnd(scan_task)) {
+      no_more_tasks = true;
+      return;
+    }
+    auto state = shared_from_this();
+    auto id = next_scan_task_id++;
+    ResizeBatches(id);
+
+    lock.unlock();
+    task_group->Append([state, id, scan_task]() {
+      ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
+      for (auto maybe_batch : batch_it) {
+        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+        state->Push(TaggedRecordBatch{std::move(batch), scan_task->fragment()}, id);
       }
+      return state->Finish(id);
+    });
+  }
+
+  Result<TaggedRecordBatch> Pop() {
+    std::unique_lock<std::mutex> lock(mutex);
+    ready.wait(lock, [this, &lock] {
+      while (pop_cursor < task_batches.size()) {
+        // queue for current scan task contains at least one batch, pop that
+        if (!task_batches[pop_cursor].empty()) return true;
+        // queue is empty but will be appended to eventually, wait for that
+        if (!task_drained[pop_cursor]) return false;
+
+        // Finished draining current scan task, enqueue a new one
+        ++pop_cursor;
+        // Must unlock since serial task group will execute synchronously
+        lock.unlock();
+        PushScanTask();
+        lock.lock();
+      }
+      DCHECK(no_more_tasks);
+      // all scan tasks drained (or getting next task failed), terminate
+      return true;
+    });
+
+    if (pop_cursor == task_batches.size()) {
+      // Don't report an error until we yield up everything we can first
+      RETURN_NOT_OK(iteration_error);
+      return IterationEnd<TaggedRecordBatch>();
     }
 
-    ScanTaskIterator scan_task_it;
-    RecordBatchIterator batch_it;
-    std::shared_ptr<ScanTask> current_task;
-  };
-  return TaggedRecordBatchIterator(BatchIter(std::move(scan_task_it)));
+    auto batch = std::move(task_batches[pop_cursor].front());
+    task_batches[pop_cursor].pop_front();
+    return batch;
+  }
+
+  /// Protecting mutating accesses to batches
+  std::mutex mutex;
+  std::condition_variable ready;
+  ScanTaskIterator scan_tasks;
+  std::shared_ptr<TaskGroup> task_group;
+  int next_scan_task_id = 0;
+  bool no_more_tasks = false;
+  Status iteration_error;
+  std::vector<std::deque<TaggedRecordBatch>> task_batches;
+  std::vector<bool> task_drained;
+  size_t pop_cursor = 0;
+};
+
+Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
+  auto task_group = scan_options_->TaskGroup();
+  auto state = std::make_shared<ScanBatchesState>(std::move(scan_task_it), task_group);
+  for (int i = 0; i < scan_options_->fragment_readahead; i++) {
+    state->PushScanTask();
+  }
+  return MakeFunctionIterator([task_group, state]() -> Result<TaggedRecordBatch> {
+    ARROW_ASSIGN_OR_RAISE(auto batch, state->Pop());
+    if (!IsIterationEnd(batch)) return batch;
+    RETURN_NOT_OK(task_group->Finish());
+    return IterationEnd<TaggedRecordBatch>();
+  });
 }
 
 Result<FragmentIterator> SyncScanner::GetFragments() {
@@ -176,7 +268,30 @@ Result<FragmentIterator> SyncScanner::GetFragments() {
   return GetFragmentsFromDatasets({dataset_}, scan_options_->filter);
 }
 
-Result<ScanTaskIterator> SyncScanner::Scan() {
+Result<ScanTaskIterator> SyncScanner::Scan() { return ScanInternal(); }
+
+Status SyncScanner::Scan(std::function<Status(TaggedRecordBatch)> visitor) {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
+
+  auto task_group = scan_options_->TaskGroup();
+
+  for (auto maybe_scan_task : scan_task_it) {
+    ARROW_ASSIGN_OR_RAISE(auto scan_task, maybe_scan_task);
+    task_group->Append([scan_task, visitor] {
+      ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
+      for (auto maybe_batch : batch_it) {
+        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+        RETURN_NOT_OK(
+            visitor(TaggedRecordBatch{std::move(batch), scan_task->fragment()}));
+      }
+      return Status::OK();
+    });
+  }
+
+  return task_group->Finish();
+}
+
+Result<ScanTaskIterator> SyncScanner::ScanInternal() {
   // Transforms Iterator<Fragment> into a unified
   // Iterator<ScanTask>. The first Iterator::Next invocation is going to do
   // all the work of unwinding the chained iterators.
@@ -315,7 +430,7 @@ Result<std::shared_ptr<Table>> SyncScanner::ToTable() {
 
 Future<std::shared_ptr<Table>> SyncScanner::ToTableInternal(
     internal::Executor* cpu_executor) {
-  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, Scan());
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
   auto task_group = scan_options_->TaskGroup();
 
   /// Wraps the state in a shared_ptr to ensure that failing ScanTasks don't
@@ -343,5 +458,94 @@ Future<std::shared_ptr<Table>> SyncScanner::ToTableInternal(
                                   FlattenRecordBatchVector(std::move(state->batches)));
 }
 
+Result<std::shared_ptr<Table>> Scanner::TakeRows(const Array& indices) {
+  if (indices.null_count() != 0) {
+    return Status::NotImplemented("null take indices");
+  }
+
+  compute::ExecContext ctx(scan_options_->pool);
+
+  const Array* original_indices;
+  // If we have to cast, this is the backing reference
+  std::shared_ptr<Array> original_indices_ptr;
+  if (indices.type_id() != Type::INT64) {
+    ARROW_ASSIGN_OR_RAISE(
+        original_indices_ptr,
+        compute::Cast(indices, int64(), compute::CastOptions::Safe(), &ctx));
+    original_indices = original_indices_ptr.get();
+  } else {
+    original_indices = &indices;
+  }
+
+  std::shared_ptr<Array> unsort_indices;
+  {
+    ARROW_ASSIGN_OR_RAISE(
+        auto sort_indices,
+        compute::SortIndices(*original_indices, compute::SortOrder::Ascending, &ctx));
+    ARROW_ASSIGN_OR_RAISE(original_indices_ptr,
+                          compute::Take(*original_indices, *sort_indices,
+                                        compute::TakeOptions::Defaults(), &ctx));
+    original_indices = original_indices_ptr.get();
+    ARROW_ASSIGN_OR_RAISE(
+        unsort_indices,
+        compute::SortIndices(*sort_indices, compute::SortOrder::Ascending, &ctx));
+  }
+
+  RecordBatchVector out_batches;
+
+  auto raw_indices = static_cast<const Int64Array&>(*original_indices).raw_values();
+  int64_t offset = 0, row_begin = 0;
+
+  ARROW_ASSIGN_OR_RAISE(auto batch_it, ScanBatches());
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, batch_it.Next());
+    if (IsIterationEnd(batch)) break;
+    if (offset == original_indices->length()) break;
+    DCHECK_LT(offset, original_indices->length());
+
+    int64_t length = 0;
+    while (offset + length < original_indices->length()) {
+      auto rel_index = raw_indices[offset + length] - row_begin;
+      if (rel_index >= batch.record_batch->num_rows()) break;
+      ++length;
+    }
+    DCHECK_LE(offset + length, original_indices->length());
+    if (length == 0) {
+      row_begin += batch.record_batch->num_rows();
+      continue;
+    }
+
+    Datum rel_indices = original_indices->Slice(offset, length);
+    ARROW_ASSIGN_OR_RAISE(rel_indices,
+                          compute::Subtract(rel_indices, Datum(row_begin),
+                                            compute::ArithmeticOptions(), &ctx));
+
+    ARROW_ASSIGN_OR_RAISE(Datum out_batch,
+                          compute::Take(batch.record_batch, rel_indices,
+                                        compute::TakeOptions::Defaults(), &ctx));
+    out_batches.push_back(out_batch.record_batch());
+
+    offset += length;
+    row_begin += batch.record_batch->num_rows();
+  }
+
+  if (offset < original_indices->length()) {
+    std::stringstream error;
+    const int64_t max_values_shown = 3;
+    const int64_t num_remaining = original_indices->length() - offset;
+    for (int64_t i = 0; i < std::min<int64_t>(max_values_shown, num_remaining); i++) {
+      if (i > 0) error << ", ";
+      error << static_cast<const Int64Array*>(original_indices)->Value(offset + i);
+    }
+    if (num_remaining > max_values_shown) error << ", ...";
+    return Status::IndexError("Some indices were out of bounds: ", error.str());
+  }
+  ARROW_ASSIGN_OR_RAISE(Datum out, Table::FromRecordBatches(options()->projected_schema,
+                                                            std::move(out_batches)));
+  ARROW_ASSIGN_OR_RAISE(
+      out, compute::Take(out, unsort_indices, compute::TakeOptions::Defaults(), &ctx));
+  return out.table();
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index c4da6da7b80..9720346b410 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -255,8 +255,14 @@ class ARROW_DS_EXPORT Scanner {
   /// in a concurrent fashion and outlive the iterator.
   ///
   /// Note: Not supported by the async scanner
-  /// TODO(ARROW-11797) Deprecate Scan()
+  /// Planned for removal from the public API in ARROW-11782.
+  ARROW_DEPRECATED("Deprecated in 4.0.0 for removal in 5.0.0. Use ScanBatches().")
   virtual Result<ScanTaskIterator> Scan();
+
+  /// \brief Apply a visitor to each RecordBatch as it is scanned. If multiple threads
+  /// are used (via use_threads), the visitor will be invoked from those threads and is
+  /// responsible for any synchronization.
+  virtual Status Scan(std::function<Status(TaggedRecordBatch)> visitor) = 0;
   /// \brief Convert a Scanner into a Table.
   ///
   /// Use this convenience utility with care. This will serially materialize the
@@ -279,6 +285,10 @@ class ARROW_DS_EXPORT Scanner {
   /// To make up for the out-of-order iteration each batch is further tagged with
   /// positional information.
   virtual Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered();
+  /// \brief A convenience to synchronously load the given rows by index.
+  ///
+  /// Will only consume as many batches as needed from ScanBatches().
+  virtual Result<std::shared_ptr<Table>> TakeRows(const Array& indices);
 
   /// \brief Get the options for this scan.
   const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
@@ -306,12 +316,15 @@ class ARROW_DS_EXPORT SyncScanner : public Scanner {
 
   Result<ScanTaskIterator> Scan() override;
 
+  Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
+
   Result<std::shared_ptr<Table>> ToTable() override;
 
  protected:
   /// \brief GetFragments returns an iterator over all Fragments in this scan.
   Result<FragmentIterator> GetFragments();
   Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
+  Result<ScanTaskIterator> ScanInternal();
 
   std::shared_ptr<Dataset> dataset_;
   // TODO(ARROW-8065) remove fragment_ after a Dataset is constuctible from fragments
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index d1d0e45b827..3a2d37f1ce1 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -19,12 +19,19 @@
 
 #include <memory>
 
+#include <gmock/gmock.h>
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 #include "arrow/testing/generator.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/range.h"
 
 using testing::ElementsAre;
 using testing::IsEmpty;
@@ -36,8 +43,11 @@ constexpr int64_t kNumberChildDatasets = 2;
 constexpr int64_t kNumberBatches = 16;
 constexpr int64_t kBatchSize = 1024;
 
-class TestScanner : public DatasetFixtureMixin {
+class TestScanner : public DatasetFixtureMixin,
+                    public ::testing::WithParamInterface<bool> {
  protected:
+  bool UseThreads() { return GetParam(); }
+
   std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<RecordBatch> batch) {
     std::vector<std::shared_ptr<RecordBatch>> batches{static_cast<size_t>(kNumberBatches),
                                                       batch};
@@ -48,6 +58,7 @@ class TestScanner : public DatasetFixtureMixin {
     EXPECT_OK_AND_ASSIGN(auto dataset, UnionDataset::Make(batch->schema(), children));
 
     ScannerBuilder builder(dataset, options_);
+    ARROW_EXPECT_OK(builder.UseThreads(UseThreads()));
     EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
     return scanner;
   }
@@ -79,25 +90,25 @@ class TestScanner : public DatasetFixtureMixin {
   }
 };
 
-TEST_F(TestScanner, Scan) {
+TEST_P(TestScanner, Scan) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
   AssertScannerEqualsRepetitionsOf(MakeScanner(batch), batch);
 }
 
-TEST_F(TestScanner, ScanBatches) {
+TEST_P(TestScanner, ScanBatches) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
   AssertScanBatchesEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
-TEST_F(TestScanner, ScanBatchesUnordered) {
+TEST_P(TestScanner, ScanBatchesUnordered) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
   AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
-TEST_F(TestScanner, ScanWithCappedBatchSize) {
+TEST_P(TestScanner, ScanWithCappedBatchSize) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
   options_->batch_size = kBatchSize / 2;
@@ -106,7 +117,7 @@ TEST_F(TestScanner, ScanWithCappedBatchSize) {
                                    kNumberChildDatasets * kNumberBatches * 2);
 }
 
-TEST_F(TestScanner, FilteredScan) {
+TEST_P(TestScanner, FilteredScan) {
   SetSchema({field("f64", float64())});
 
   double value = 0.5;
@@ -136,7 +147,7 @@ TEST_F(TestScanner, FilteredScan) {
   AssertScannerEqualsRepetitionsOf(MakeScanner(batch), filtered_batch);
 }
 
-TEST_F(TestScanner, MaterializeMissingColumn) {
+TEST_P(TestScanner, MaterializeMissingColumn) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch_missing_f64 =
       ConstantArrayGenerator::Zeroes(kBatchSize, schema({field("i32", int32())}));
@@ -159,7 +170,7 @@ TEST_F(TestScanner, MaterializeMissingColumn) {
   AssertScannerEqualsRepetitionsOf(scanner, batch_with_f64);
 }
 
-TEST_F(TestScanner, ToTable) {
+TEST_P(TestScanner, ToTable) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
   std::vector<std::shared_ptr<RecordBatch>> batches{kNumberBatches * kNumberChildDatasets,
@@ -170,17 +181,128 @@ TEST_F(TestScanner, ToTable) {
   auto scanner = MakeScanner(batch);
   std::shared_ptr<Table> actual;
 
-  options_->use_threads = false;
-  ASSERT_OK_AND_ASSIGN(actual, scanner->ToTable());
-  AssertTablesEqual(*expected, *actual);
-
   // There is no guarantee on the ordering when using multiple threads, but
   // since the RecordBatch is always the same it will pass.
-  options_->use_threads = true;
   ASSERT_OK_AND_ASSIGN(actual, scanner->ToTable());
   AssertTablesEqual(*expected, *actual);
 }
 
+TEST_P(TestScanner, ScanWithVisitor) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  auto scanner = MakeScanner(batch);
+  ASSERT_OK(scanner->Scan([batch](TaggedRecordBatch scanned_batch) {
+    AssertBatchesEqual(*batch, *scanned_batch.record_batch);
+    return Status::OK();
+  }));
+}
+
+TEST_P(TestScanner, TakeIndices) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  ArrayVector arrays(2);
+  ArrayFromVector<Int32Type>(internal::Iota<int32_t>(kBatchSize), &arrays[0]);
+  ArrayFromVector<DoubleType>(internal::Iota<double>(static_cast<double>(kBatchSize)),
+                              &arrays[1]);
+  auto batch = RecordBatch::Make(schema_, kBatchSize, arrays);
+
+  auto scanner = MakeScanner(batch);
+
+  std::shared_ptr<Array> indices;
+  {
+    ArrayFromVector<Int64Type>(internal::Iota(kBatchSize), &indices);
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches({batch}));
+    ASSERT_EQ(expected->num_rows(), kBatchSize);
+    AssertTablesEqual(*expected, *taken);
+  }
+  {
+    ArrayFromVector<Int64Type>({7, 5, 3, 1}, &indices);
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+    ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
+    ASSERT_EQ(expected.table()->num_rows(), 4);
+    AssertTablesEqual(*expected.table(), *taken);
+  }
+  {
+    ArrayFromVector<Int64Type>({kBatchSize + 2, kBatchSize + 1}, &indices);
+    ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
+    ASSERT_EQ(expected.table()->num_rows(), 2);
+    AssertTablesEqual(*expected.table(), *taken);
+  }
+  {
+    ArrayFromVector<Int64Type>({1, 3, 5, 7, kBatchSize + 1, 2 * kBatchSize + 2},
+                               &indices);
+    ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
+    ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+    ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
+    ASSERT_EQ(expected.table()->num_rows(), 6);
+    AssertTablesEqual(*expected.table(), *taken);
+  }
+  {
+    auto base = kNumberChildDatasets * kNumberBatches * kBatchSize;
+    ArrayFromVector<Int64Type>({base + 1}, &indices);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IndexError, ::testing::HasSubstr("Some indices were out of bounds: 32769"),
+        scanner->TakeRows(*indices));
+  }
+  {
+    auto base = kNumberChildDatasets * kNumberBatches * kBatchSize;
+    ArrayFromVector<Int64Type>(
+        {1, 2, base + 1, base + 2, base + 3, base + 4, base + 5, base + 6}, &indices);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IndexError,
+        ::testing::HasSubstr("Some indices were out of bounds: 32769, 32770, 32771, ..."),
+        scanner->TakeRows(*indices));
+  }
+}
+
+class FailingFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    int index = 0;
+    auto self = shared_from_this();
+    return MakeFunctionIterator([=]() mutable -> Result<std::shared_ptr<ScanTask>> {
+      if (index > 16) {
+        return Status::Invalid("Oh no, we failed!");
+      }
+      RecordBatchVector batches = {record_batches_[index++ % record_batches_.size()]};
+      return std::make_shared<InMemoryScanTask>(batches, options, self);
+    });
+  }
+};
+
+TEST_P(TestScanner, ScanBatchesFailure) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  RecordBatchVector batches = {batch, batch, batch, batch};
+
+  ScannerBuilder builder(schema_, std::make_shared<FailingFragment>(batches), options_);
+  ASSERT_OK(builder.UseThreads(UseThreads()));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+
+  ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+
+  int counter = 0;
+  while (true) {
+    // Make sure we get all batches that were yielded before the failing scan task
+    auto maybe_batch = batch_it.Next();
+    if (counter++ <= 16) {
+      ASSERT_OK_AND_ASSIGN(auto scanned_batch, maybe_batch);
+      AssertBatchesEqual(*batch, *scanned_batch.record_batch);
+      ASSERT_NE(nullptr, scanned_batch.fragment);
+    } else {
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                      maybe_batch);
+      break;
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner, ::testing::Bool());
+
 class TestScannerBuilder : public ::testing::Test {
   void SetUp() override {
     DatasetVector sources;
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index ea4c41e63c5..1d1266de671 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -184,10 +184,13 @@ class DatasetFixtureMixin : public ::testing::Test {
   /// record batches yielded by a scanner.
   void AssertScannerEquals(RecordBatchReader* expected, Scanner* scanner,
                            bool ensure_drained = true) {
-    ASSERT_OK_AND_ASSIGN(auto it, scanner->Scan());
+    ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatches());
 
-    ARROW_EXPECT_OK(it.Visit([&](std::shared_ptr<ScanTask> task) -> Status {
-      AssertScanTaskEquals(expected, task.get(), false);
+    ARROW_EXPECT_OK(it.Visit([&](TaggedRecordBatch batch) -> Status {
+      std::shared_ptr<RecordBatch> lhs;
+      RETURN_NOT_OK(expected->ReadNext(&lhs));
+      EXPECT_NE(lhs, nullptr);
+      AssertBatchesEqual(*lhs, *batch.record_batch);
       return Status::OK();
     }));
 
diff --git a/cpp/src/jni/dataset/jni_wrapper.cc b/cpp/src/jni/dataset/jni_wrapper.cc
index 196bf2b5c05..d61fb3f964e 100644
--- a/cpp/src/jni/dataset/jni_wrapper.cc
+++ b/cpp/src/jni/dataset/jni_wrapper.cc
@@ -140,55 +140,29 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener {
 class DisposableScannerAdaptor {
  public:
   DisposableScannerAdaptor(std::shared_ptr<arrow::dataset::Scanner> scanner,
-                           arrow::dataset::ScanTaskIterator task_itr) {
-    this->scanner_ = std::move(scanner);
-    this->task_itr_ = std::move(task_itr);
-  }
+                           arrow::dataset::TaggedRecordBatchIterator batch_itr)
+      : scanner_(std::move(scanner)), batch_itr_(std::move(batch_itr)) {}
 
   static arrow::Result<std::shared_ptr<DisposableScannerAdaptor>> Create(
       std::shared_ptr<arrow::dataset::Scanner> scanner) {
-    ARROW_ASSIGN_OR_RAISE(arrow::dataset::ScanTaskIterator task_itr, scanner->Scan())
-    return std::make_shared<DisposableScannerAdaptor>(scanner, std::move(task_itr));
+    ARROW_ASSIGN_OR_RAISE(auto batch_itr, scanner->ScanBatches())
+    return std::make_shared<DisposableScannerAdaptor>(scanner, std::move(batch_itr));
   }
 
   arrow::Result<std::shared_ptr<arrow::RecordBatch>> Next() {
-    do {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::RecordBatch> batch, NextBatch())
-      if (batch != nullptr) {
-        return batch;
-      }
-      // batch is null, current task is fully consumed
-      ARROW_ASSIGN_OR_RAISE(bool has_next_task, NextTask())
-      if (!has_next_task) {
-        // no more tasks
-        return nullptr;
-      }
-      // new task appended, read again
-    } while (true);
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::RecordBatch> batch, NextBatch());
+    return batch;
   }
 
   const std::shared_ptr<arrow::dataset::Scanner>& GetScanner() const { return scanner_; }
 
  private:
-  arrow::dataset::ScanTaskIterator task_itr_;
   std::shared_ptr<arrow::dataset::Scanner> scanner_;
-  std::shared_ptr<arrow::dataset::ScanTask> current_task_ = nullptr;
-  arrow::RecordBatchIterator current_batch_itr_ =
-      arrow::MakeEmptyIterator<std::shared_ptr<arrow::RecordBatch>>();
-
-  arrow::Result<bool> NextTask() {
-    ARROW_ASSIGN_OR_RAISE(current_task_, task_itr_.Next())
-    if (current_task_ == nullptr) {
-      return false;
-    }
-    ARROW_ASSIGN_OR_RAISE(current_batch_itr_, current_task_->Execute())
-    return true;
-  }
+  arrow::dataset::TaggedRecordBatchIterator batch_itr_;
 
   arrow::Result<std::shared_ptr<arrow::RecordBatch>> NextBatch() {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::RecordBatch> batch,
-                          current_batch_itr_.Next())
-    return batch;
+    ARROW_ASSIGN_OR_RAISE(auto batch, batch_itr_.Next())
+    return batch.record_batch;
   }
 };
 
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index a6cfd711558..46f78d48d30 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -22,6 +22,7 @@
 from cpython.object cimport Py_LT, Py_EQ, Py_GT, Py_LE, Py_NE, Py_GE
 from cython.operator cimport dereference as deref
 
+import collections
 import os
 import warnings
 
@@ -369,6 +370,9 @@ cdef class Dataset(_Weakrefable):
         responsible to execute and dispatch the individual tasks, so custom
         local task scheduling can be implemented.
 
+        .. deprecated:: 4.0.0
+           Use `to_batches` instead.
+
         Parameters
         ----------
         columns : list of str, default None
@@ -914,6 +918,9 @@ cdef class Fragment(_Weakrefable):
         responsible to execute and dispatch the individual tasks, so custom
         local task scheduling can be implemented.
 
+        .. deprecated:: 4.0.0
+           Use `to_batches` instead.
+
         Parameters
         ----------
         schema : Schema
@@ -2569,7 +2576,8 @@ cdef class ScanTask(_Weakrefable):
 cdef class RecordBatchIterator(_Weakrefable):
     """An iterator over a sequence of record batches."""
     cdef:
-        ScanTask task
+        # An object that must be kept alive with the iterator.
+        object iterator_owner
         # Iterator is a non-POD type and Cython uses offsetof, leading
         # to a compiler warning unless wrapped like so
         shared_ptr[CRecordBatchIterator] iterator
@@ -2578,10 +2586,10 @@ cdef class RecordBatchIterator(_Weakrefable):
         _forbid_instantiation(self.__class__, subclasses_instead=False)
 
     @staticmethod
-    cdef wrap(ScanTask task, CRecordBatchIterator iterator):
+    cdef wrap(object owner, CRecordBatchIterator iterator):
         cdef RecordBatchIterator self = \
             RecordBatchIterator.__new__(RecordBatchIterator)
-        self.task = task
+        self.iterator_owner = owner
         self.iterator = make_shared[CRecordBatchIterator](move(iterator))
         return self
 
@@ -2597,6 +2605,43 @@ cdef class RecordBatchIterator(_Weakrefable):
         return pyarrow_wrap_batch(record_batch)
 
 
+class TaggedRecordBatch(collections.namedtuple(
+        "TaggedRecordBatch", ["record_batch", "fragment"])):
+    """A combination of a record batch and the fragment it came from."""
+
+
+cdef class TaggedRecordBatchIterator(_Weakrefable):
+    """An iterator over a sequence of record batches with fragments."""
+    cdef:
+        object iterator_owner
+        shared_ptr[CTaggedRecordBatchIterator] iterator
+
+    def __init__(self):
+        _forbid_instantiation(self.__class__, subclasses_instead=False)
+
+    @staticmethod
+    cdef wrap(object owner, CTaggedRecordBatchIterator iterator):
+        cdef TaggedRecordBatchIterator self = \
+            TaggedRecordBatchIterator.__new__(TaggedRecordBatchIterator)
+        self.iterator_owner = owner
+        self.iterator = make_shared[CTaggedRecordBatchIterator](
+            move(iterator))
+        return self
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        cdef CTaggedRecordBatch batch
+        with nogil:
+            batch = GetResultValue(move(self.iterator.get().Next()))
+        if batch.record_batch == NULL:
+            raise StopIteration
+        return TaggedRecordBatch(
+            record_batch=pyarrow_wrap_batch(batch.record_batch),
+            fragment=Fragment.wrap(batch.fragment))
+
+
 _DEFAULT_BATCH_SIZE = 2**20
 
 
@@ -2753,10 +2798,22 @@ cdef class Scanner(_Weakrefable):
         The caller is responsible to dispatch/schedule said tasks. Tasks should
         be safe to run in a concurrent fashion and outlive the iterator.
 
+        .. deprecated:: 4.0.0
+           Use `to_batches` instead.
+
         Returns
         -------
         scan_tasks : iterator of ScanTask
         """
+        import warnings
+        warnings.warn("Scanner.scan is deprecated as of 4.0.0, "
+                      "please use Scanner.to_batches instead.",
+                      DeprecationWarning)
+        # Planned for removal in ARROW-11782
+        # Make this method eager so the warning appears immediately
+        return self._scan()
+
+    def _scan(self):
         for maybe_task in GetResultValue(self.scanner.Scan()):
             yield ScanTask.wrap(GetResultValue(move(maybe_task)))
 
@@ -2770,9 +2827,27 @@ cdef class Scanner(_Weakrefable):
         -------
         record_batches : iterator of RecordBatch
         """
-        for task in self.scan():
-            for batch in task.execute():
-                yield batch
+        def _iterator(batch_iter):
+            for batch in batch_iter:
+                yield batch.record_batch
+        # Don't make ourselves a generator so errors are raised immediately
+        return _iterator(self.scan_batches())
+
+    def scan_batches(self):
+        """Consume a Scanner in record batches with corresponding fragments.
+
+        Sequentially executes the ScanTasks as the returned generator gets
+        consumed.
+
+        Returns
+        -------
+        record_batches : iterator of TaggedRecordBatch
+        """
+        cdef CTaggedRecordBatchIterator iterator
+        with nogil:
+            iterator = move(GetResultValue(self.scanner.ScanBatches()))
+        # Don't make ourselves a generator so errors are raised immediately
+        return TaggedRecordBatchIterator.wrap(self, move(iterator))
 
     def to_table(self):
         """Convert a Scanner into a Table.
@@ -2791,6 +2866,23 @@ cdef class Scanner(_Weakrefable):
 
         return pyarrow_wrap_table(GetResultValue(result))
 
+    def take(self, object indices):
+        """Select rows of data by index.
+
+        Will only consume as many batches of the underlying dataset as
+        needed. Otherwise, this is equivalent to
+        ``to_table().take(indices)``.
+
+        Returns
+        -------
+        table : Table
+        """
+        cdef CResult[shared_ptr[CTable]] result
+        cdef shared_ptr[CArray] c_indices = pyarrow_unwrap_array(indices)
+        with nogil:
+            result = self.scanner.TakeRows(deref(c_indices))
+        return pyarrow_wrap_table(GetResultValue(result))
+
 
 def _get_partition_keys(Expression partition_expression):
     """
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 615cb2516dc..ea17507c23d 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -50,6 +50,7 @@
     RowGroupInfo,
     Scanner,
     ScanTask,
+    TaggedRecordBatch,
     UnionDataset,
     UnionDatasetFactory,
     _get_partition_keys,
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 06ec69c8b80..4da29783b20 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -86,11 +86,20 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches,
                           CExpression partition_expression)
 
+    cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch":
+        shared_ptr[CRecordBatch] record_batch
+        shared_ptr[CFragment] fragment
+
+    ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \
+        "arrow::dataset::TaggedRecordBatchIterator"
+
     cdef cppclass CScanner "arrow::dataset::Scanner":
         CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions])
         CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions])
         CResult[CScanTaskIterator] Scan()
+        CResult[CTaggedRecordBatchIterator] ScanBatches()
         CResult[shared_ptr[CTable]] ToTable()
+        CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
         CResult[CFragmentIterator] GetFragments()
         const shared_ptr[CScanOptions]& options()
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 31f4e080461..26c14e14822 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -307,14 +307,17 @@ def test_dataset(dataset):
     # TODO(kszucs): test non-boolean Exprs for filter do raise
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
     expected_f64 = pa.array([0, 1, 2, 3, 4], type=pa.float64())
-    for task in dataset.scan():
-        assert isinstance(task, ds.ScanTask)
-        for batch in task.execute():
-            assert batch.column(0).equals(expected_i64)
-            assert batch.column(1).equals(expected_f64)
+    with pytest.deprecated_call():
+        dataset.scan()
 
-    batches = dataset.to_batches()
-    assert all(isinstance(batch, pa.RecordBatch) for batch in batches)
+    for batch in dataset.to_batches():
+        assert isinstance(batch, pa.RecordBatch)
+        assert batch.column(0).equals(expected_i64)
+        assert batch.column(1).equals(expected_f64)
+
+    for batch in ds.Scanner.from_dataset(dataset).scan_batches():
+        assert isinstance(batch, ds.TaggedRecordBatch)
+        assert isinstance(batch.fragment, ds.Fragment)
 
     table = dataset.to_table()
     assert isinstance(table, pa.Table)
@@ -334,7 +337,8 @@ def test_dataset_execute_iterator(dataset):
     # ARROW-11596: this would segfault due to Cython raising
     # StopIteration without holding the GIL. (Fixed on Cython master,
     # post 3.0a6)
-    tasks = dataset.scan()
+    with pytest.deprecated_call():
+        tasks = dataset.scan()
     task = next(tasks)
     iterator = task.execute()
     thread = threading.Thread(target=lambda: next(iterator))
@@ -348,7 +352,6 @@ def test_scanner(dataset):
     scanner = ds.Scanner.from_dataset(dataset,
                                       memory_pool=pa.default_memory_pool())
     assert isinstance(scanner, ds.Scanner)
-    assert len(list(scanner.scan())) == 2
 
     with pytest.raises(pa.ArrowInvalid):
         ds.Scanner.from_dataset(dataset, columns=['unknown'])
@@ -357,10 +360,15 @@ def test_scanner(dataset):
                                       memory_pool=pa.default_memory_pool())
 
     assert isinstance(scanner, ds.Scanner)
-    assert len(list(scanner.scan())) == 2
-    for task in scanner.scan():
-        for batch in task.execute():
-            assert batch.num_columns == 1
+    for batch in scanner.to_batches():
+        assert batch.num_columns == 1
+
+    table = scanner.to_table()
+    for i in range(table.num_rows):
+        indices = pa.array([i])
+        assert table.take(indices) == scanner.take(indices)
+    with pytest.raises(pa.ArrowIndexError):
+        scanner.take(pa.array([table.num_rows]))
 
 
 def test_abstract_classes():
@@ -640,7 +648,6 @@ def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
 
     dataset = factory.finish()
     assert isinstance(dataset, ds.FileSystemDataset)
-    assert len(list(dataset.scan())) == 2
 
     scanner = ds.Scanner.from_dataset(dataset)
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
@@ -649,18 +656,20 @@ def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
         pa.array([0, 1, 2, 3, 4], type=pa.int32()),
         pa.array("0 1 2 3 4".split(), type=pa.string())
     )
-    for task, group, key in zip(scanner.scan(), [1, 2], ['xxx', 'yyy']):
+    iterator = scanner.scan_batches()
+    for (batch, fragment), group, key in zip(iterator, [1, 2], ['xxx', 'yyy']):
         expected_group = pa.array([group] * 5, type=pa.int32())
         expected_key = pa.array([key] * 5, type=pa.string())
         expected_const = pa.array([group - 1] * 5, type=pa.int64())
-        for batch in task.execute():
-            assert batch.num_columns == 6
-            assert batch[0].equals(expected_i64)
-            assert batch[1].equals(expected_f64)
-            assert batch[2].equals(expected_str)
-            assert batch[3].equals(expected_const)
-            assert batch[4].equals(expected_group)
-            assert batch[5].equals(expected_key)
+        # Can't compare or really introspect expressions from Python
+        assert fragment.partition_expression is not None
+        assert batch.num_columns == 6
+        assert batch[0].equals(expected_i64)
+        assert batch[1].equals(expected_f64)
+        assert batch[2].equals(expected_str)
+        assert batch[3].equals(expected_const)
+        assert batch[4].equals(expected_group)
+        assert batch[5].equals(expected_key)
 
     table = dataset.to_table()
     assert isinstance(table, pa.Table)
@@ -1719,6 +1728,7 @@ def test_construct_in_memory():
         assert dataset.to_table() == table
         assert dataset.to_table() == table
         assert next(dataset.get_fragments()).to_table() == table
+        assert pa.Table.from_batches(list(dataset.to_batches())) == table
 
     # When constructed from readers/iterators, should be one-shot
     match = "InMemoryDataset was already consumed"
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index a33cf222fdc..c432a135524 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -504,6 +504,10 @@ dataset___Scanner__ToTable <- function(scanner){
     .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
 }
 
+dataset___Scanner__ScanBatches <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
+}
+
 dataset___Scanner__head <- function(scanner, n){
     .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
@@ -524,6 +528,10 @@ dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, p
     invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
+dataset___Scanner__TakeRows <- function(scanner, indices){
+    .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
+}
+
 Int8__initialize <- function(){
     .Call(`_arrow_Int8__initialize`)
 }
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index f7ede663c7f..8bec8978098 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -56,7 +56,12 @@
 Scanner <- R6Class("Scanner", inherit = ArrowObject,
   public = list(
     ToTable = function() dataset___Scanner__ToTable(self),
-    Scan = function() dataset___Scanner__Scan(self)
+    ScanBatches = function() dataset___Scanner__ScanBatches(self),
+    Scan = function() {
+        # Planned for removal in ARROW-11782
+        .Deprecated("ScanBatches")
+        dataset___Scanner__Scan(self)
+    }
   ),
   active = list(
     schema = function() dataset___Scanner__schema(self)
@@ -142,17 +147,12 @@ map_batches <- function(X, FUN, ..., .data.frame = TRUE) {
   scanner <- Scanner$create(ensure_group_vars(X))
   FUN <- as_mapper(FUN)
   # message("Making ScanTasks")
-  lapply(scanner$Scan(), function(scan_task) {
-    # This outer lapply could be parallelized
-    # message("Making Batches")
-    lapply(scan_task$Execute(), function(batch) {
-      # message("Processing Batch")
-      # This inner lapply cannot be parallelized
-      # TODO: wrap batch in arrow_dplyr_query with X$selected_columns,
-      # X$temp_columns, and X$group_by_vars
-      # if X is arrow_dplyr_query, if some other arg (.dplyr?) == TRUE
-      FUN(batch, ...)
-    })
+  lapply(scanner$ScanBatches(), function(batch) {
+    # message("Processing Batch")
+    # TODO: wrap batch in arrow_dplyr_query with X$selected_columns,
+    # X$temp_columns, and X$group_by_vars
+    # if X is arrow_dplyr_query, if some other arg (.dplyr?) == TRUE
+    FUN(batch, ...)
   })
 }
 
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 007ffc95dc8..266633964b1 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -285,13 +285,10 @@ tail.Dataset <- function(x, n = 6L, ...) {
   result <- list()
   batch_num <- 0
   scanner <- Scanner$create(ensure_group_vars(x))
-  for (scan_task in rev(dataset___Scanner__Scan(scanner))) {
-    for (batch in rev(scan_task$Execute())) {
-      batch_num <- batch_num + 1
-      result[[batch_num]] <- tail(batch, n)
-      n <- n - nrow(batch)
-      if (n <= 0) break
-    }
+  for (batch in rev(dataset___Scanner__ScanBatches(scanner))) {
+    batch_num <- batch_num + 1
+    result[[batch_num]] <- tail(batch, n)
+    n <- n - nrow(batch)
     if (n <= 0) break
   }
   Table$create(!!!rev(result))
@@ -314,28 +311,10 @@ tail.Dataset <- function(x, n = 6L, ...) {
 }
 
 take_dataset_rows <- function(x, i) {
-  # TODO: move this to cpp
   if (!is.numeric(i) || any(i < 0)) {
     stop("Only slicing with positive indices is supported", call. = FALSE)
   }
-  result <- list()
-  result_order <- order(i)
-  i <- sort(i) - 1L
   scanner <- Scanner$create(ensure_group_vars(x))
-  for (scan_task in dataset___Scanner__Scan(scanner)) {
-    for (batch in scan_task$Execute()) {
-      # Take all rows that are in this batch
-      this_batch_nrows <- batch$num_rows
-      in_this_batch <- i > -1L & i < this_batch_nrows
-      if (any(in_this_batch)) {
-        result[[length(result) + 1L]] <- batch$Take(i[in_this_batch])
-      }
-      i <- i - this_batch_nrows
-      if (all(i < 0L)) break
-    }
-    if (all(i < 0L)) break
-  }
-  tab <- Table$create(!!!result)
-  # Now sort
-  tab$Take(result_order - 1L)
+  i <- Array$create(i - 1)
+  dataset___Scanner__TakeRows(scanner, i)
 }
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 6dc50c3af4c..42532e6c3c2 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1984,6 +1984,21 @@ extern "C" SEXP _arrow_dataset___Scanner__ToTable(SEXP scanner_sexp){
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+cpp11::list dataset___Scanner__ScanBatches(const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Scanner__ScanBatches(SEXP scanner_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	return cpp11::as_sexp(dataset___Scanner__ScanBatches(scanner));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__ScanBatches(SEXP scanner_sexp){
+	Rf_error("Cannot call dataset___Scanner__ScanBatches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<arrow::Table> dataset___Scanner__head(const std::shared_ptr<ds::Scanner>& scanner, int n);
@@ -2066,6 +2081,22 @@ extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SE
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(const std::shared_ptr<ds::Scanner>& scanner, const std::shared_ptr<arrow::Array>& indices);
+extern "C" SEXP _arrow_dataset___Scanner__TakeRows(SEXP scanner_sexp, SEXP indices_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type indices(indices_sexp);
+	return cpp11::as_sexp(dataset___Scanner__TakeRows(scanner, indices));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__TakeRows(SEXP scanner_sexp, SEXP indices_sexp){
+	Rf_error("Cannot call dataset___Scanner__TakeRows(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // datatype.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::DataType> Int8__initialize();
@@ -6714,11 +6745,13 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, 
 		{ "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, 
 		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, 
+		{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, 
 		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, 
 		{ "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, 
 		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, 
 		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
 		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, 
+		{ "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2}, 
 		{ "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, 
 		{ "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, 
 		{ "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index dc7ccd693a2..af321d75db6 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -19,6 +19,8 @@
 
 #if defined(ARROW_R_WITH_DATASET)
 
+#include <arrow/array.h>
+#include <arrow/compute/api.h>
 #include <arrow/dataset/api.h>
 #include <arrow/filesystem/filesystem.h>
 #include <arrow/ipc/writer.h>
@@ -421,25 +423,42 @@ std::shared_ptr<arrow::Table> dataset___Scanner__ToTable(
   return ValueOrStop(scanner->ToTable());
 }
 
+// [[dataset::export]]
+cpp11::list dataset___Scanner__ScanBatches(const std::shared_ptr<ds::Scanner>& scanner) {
+  auto it = ValueOrStop(scanner->ScanBatches());
+  arrow::RecordBatchVector batches;
+  StopIfNotOk(it.Visit([&](ds::TaggedRecordBatch tagged_batch) {
+    batches.push_back(std::move(tagged_batch.record_batch));
+    return arrow::Status::OK();
+  }));
+  return arrow::r::to_r_list(batches);
+}
+
 // [[dataset::export]]
 std::shared_ptr<arrow::Table> dataset___Scanner__head(
     const std::shared_ptr<ds::Scanner>& scanner, int n) {
   // TODO: make this a full Slice with offset > 0
+  auto it = ValueOrStop(scanner->ScanBatches());
   std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
-  std::shared_ptr<arrow::RecordBatch> current_batch;
-
-  for (auto st : ValueOrStop(scanner->Scan())) {
-    for (auto b : ValueOrStop(ValueOrStop(st)->Execute())) {
-      current_batch = ValueOrStop(b);
-      batches.push_back(current_batch->Slice(0, n));
-      n -= current_batch->num_rows();
-      if (n < 0) break;
-    }
+  while (true) {
+    auto current_batch = ValueOrStop(it.Next());
+    if (arrow::IsIterationEnd(current_batch)) break;
+    batches.push_back(current_batch.record_batch->Slice(0, n));
+    n -= current_batch.record_batch->num_rows();
     if (n < 0) break;
   }
   return ValueOrStop(arrow::Table::FromRecordBatches(std::move(batches)));
 }
 
+// TODO (ARROW-11782) Remove calls to Scan()
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+
 // [[dataset::export]]
 cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner) {
   auto it = ValueOrStop(scanner->Scan());
@@ -454,6 +473,12 @@ cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner)
   return arrow::r::to_r_list(out);
 }
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
 // [[dataset::export]]
 std::shared_ptr<arrow::Schema> dataset___Scanner__schema(
     const std::shared_ptr<ds::Scanner>& sc) {
@@ -489,4 +514,11 @@ void dataset___Dataset__Write(
   StopIfNotOk(ds::FileSystemDataset::Write(opts, scanner));
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(
+    const std::shared_ptr<ds::Scanner>& scanner,
+    const std::shared_ptr<arrow::Array>& indices) {
+  return ValueOrStop(scanner->TakeRows(*indices));
+}
+
 #endif
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 9943292bf91..eb7408c982f 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1345,6 +1345,21 @@ test_that("Dataset and query print methods", {
   )
 })
 
+test_that("Scanner$Scan is deprecated", {
+  ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
+  expect_deprecated(
+    ds$NewScan()$Finish()$Scan(),
+    "ScanBatches"
+  )
+})
+
+test_that("Scanner$ScanBatches", {
+  ds <- open_dataset(ipc_dir, format = "feather")
+  batches <- ds$NewScan()$Finish()$ScanBatches()
+  table <- Table$create(!!!batches)
+  expect_equivalent(as.data.frame(table), rbind(df1, df2))
+})
+
 expect_scan_result <- function(ds, schm) {
   sb <- ds$NewScan()
   expect_r6_class(sb, "ScannerBuilder")

From 818c57c4fc758dc796d0df52657ba630532162a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 15 Apr 2021 10:33:50 +0900
Subject: [PATCH 043/719] ARROW-12381: [Packaging][Python] macOS wheels are
 built with wrong package kind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10027 from kszucs/wheel-package-kind

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/python_wheel_macos_build.sh     | 2 +-
 ci/scripts/python_wheel_manylinux_build.sh | 2 +-
 ci/scripts/python_wheel_windows_build.bat  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 7a021f70f74..93e4939af23 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -75,7 +75,7 @@ cmake \
     -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
     -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
     -DARROW_ORC=${ARROW_ORC} \
-    -DARROW_PACKAGE_KIND="manylinux${MANYLINUX_VERSION}" \
+    -DARROW_PACKAGE_KIND="python-wheel-macos" \
     -DARROW_PARQUET=${ARROW_PARQUET} \
     -DARROW_PLASMA=${ARROW_PLASMA} \
     -DARROW_PYTHON=ON \
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index 83aa623b49b..312e1c3b9b7 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -87,7 +87,7 @@ cmake \
     -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
     -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
     -DARROW_ORC=${ARROW_ORC} \
-    -DARROW_PACKAGE_KIND="manylinux${MANYLINUX_VERSION}" \
+    -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \
     -DARROW_PARQUET=${ARROW_PARQUET} \
     -DARROW_PLASMA=${ARROW_PLASMA} \
     -DARROW_PYTHON=ON \
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index 18c1b657b21..23be7f512d6 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -64,7 +64,7 @@ cmake ^
     -DARROW_HDFS=%ARROW_HDFS% ^
     -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^
     -DARROW_ORC=%ARROW_ORC% ^
-    -DARROW_PACKAGE_KIND="wheel-windows" ^
+    -DARROW_PACKAGE_KIND="python-wheel-windows" ^
     -DARROW_PARQUET=%ARROW_PARQUET% ^
     -DARROW_PYTHON=ON ^
     -DARROW_S3=%ARROW_S3% ^

From 1c0641d3277db33cb3530248be3f31ee01fc0a8f Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 14 Apr 2021 19:32:37 -0700
Subject: [PATCH 044/719] ARROW-12017: [R] [Documentation] Make proper
 developing arrow docs

Closes #9898 from jonkeane/ARROW-12017-dev-docs

Lead-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 dev/tasks/r/github.devdocs.yml |  92 ++++++
 dev/tasks/tasks.yml            |   4 +
 docker-compose.yml             |   2 +-
 r/NEWS.md                      |   5 +-
 r/_pkgdown.yml                 |  15 +
 r/configure                    |  10 +-
 r/pkgdown/extra.js             |  65 +++++
 r/tools/nixlibs.R              |   2 +-
 r/vignettes/developing.Rmd     | 520 +++++++++++++++++++++++++++++++++
 r/vignettes/install.Rmd        |  67 +----
 10 files changed, 724 insertions(+), 58 deletions(-)
 create mode 100644 dev/tasks/r/github.devdocs.yml
 create mode 100644 r/pkgdown/extra.js
 create mode 100644 r/vignettes/developing.Rmd

diff --git a/dev/tasks/r/github.devdocs.yml b/dev/tasks/r/github.devdocs.yml
new file mode 100644
index 00000000000..1224a2555c8
--- /dev/null
+++ b/dev/tasks/r/github.devdocs.yml
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  devdocs:
+    name: 'R devdocs {{ "${{ matrix.os }}" }} system install: {{ "${{ matrix.system-install }}" }}'
+    runs-on: {{ "${{ matrix.os }}" }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macOS-latest, ubuntu-20.04]
+        # should the install method install libarrow into a system directory
+        # or a temporary directory. old is the same as a temporary
+        # directory, but an old version of libarrow will be installed
+        # into a system directory first (to make sure we can link correctly when building)
+        system-install: [true, false]
+
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-pandoc@v1
+      - name: Install knitr, rmarkdown
+        run: |
+          install.packages(c("rmarkdown", "knitr", "sessioninfo"))
+        shell: Rscript {0}
+      - name: Session info
+        run: |
+          options(width = 100)
+          pkgs <- installed.packages()[, "Package"]
+          sessioninfo::session_info(pkgs, include_base = TRUE)
+        shell: Rscript {0}
+      - name: Write the install script
+        env:
+          RUN_DEVDOCS: TRUE
+          DEVDOCS_MACOS: {{ "${{contains(matrix.os, 'macOS')}}" }}
+          DEVDOCS_UBUNTU: {{ "${{contains(matrix.os, 'ubuntu')}}" }}
+          DEVDOCS_SYSTEM_INSTALL: {{ "${{contains(matrix.system-install, 'true')}}" }}
+          DEVDOCS_PRIOR_SYSTEM_INSTALL: {{ "${{contains(matrix.system-install, 'old')}}" }}
+        run: |
+          # This isn't actually rendering the docs, but will save arrow/r/vignettes/script.sh 
+          # which can be sourced to install arrow.
+          rmarkdown::render("arrow/r/vignettes/developing.Rmd")
+        shell: Rscript {0}
+      - name: Install from the devdocs
+        env:
+          LIBARROW_BINARY: FALSE
+          ARROW_R_DEV: TRUE
+        run: bash arrow/r/vignettes/script.sh
+        shell: bash
+      - name: Ensure that the Arrow package is loadable and we have the correct one
+        run: |
+          echo $LD_LIBRARY_PATH
+          R --no-save <<EOF
+          Sys.getenv("LD_LIBRARY_PATH")
+          library(arrow)
+          arrow_info()
+          EOF
+        shell: bash -l {0}
+      - name: Save the install script
+        uses: actions/upload-artifact@v2
+        with:
+          name: {{ "devdocs-script_os-${{ matrix.os }}_sysinstall-${{ matrix.system-install }}" }}
+          path: arrow/r/vignettes/script.sh
+        if: always()
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 0080b387663..8014c4ba353 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1389,6 +1389,10 @@ tasks:
     ci: github
     template: r/github.macos-linux.local.yml
 
+  test-r-devdocs:
+    ci: github
+    template: r/github.devdocs.yml
+
   test-r-rhub-ubuntu-gcc-release:
     ci: azure
     template: r/azure.linux.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index b22b1f6b536..4a3092ec04d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1016,7 +1016,7 @@ services:
     shm_size: *shm-size
     environment:
       LIBARROW_DOWNLOAD: "false"
-      ARROW_HOME: "/arrow"
+      ARROW_SOURCE_HOME: "/arrow"
       ARROW_R_DEV: ${ARROW_R_DEV}
       # To test for CRAN release, delete ^^ these two env vars so we download the Apache release
       ARROW_USE_PKG_CONFIG: "false"
diff --git a/r/NEWS.md b/r/NEWS.md
index 664649537e7..cd8c31fb8b0 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -54,7 +54,8 @@ Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
 * Similarly, `Schema` can now be edited by assigning in new types. This enables using the CSV reader to detect the schema of a file, modify the `Schema` object for any columns that you want to read in as a different type, and then use that `Schema` to read the data.
 * Better validation when creating a `Table` with a schema, with columns of different lengths, and with scalar value recycling
 * Reading Parquet files in Japanese or other multi-byte locales on Windows no longer hangs (workaround for a [bug in libstdc++](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98723); thanks @yutannihilation for the persistence in discovering this!)
-* If you attempt to read string data that has embedded nul (`\0`) characters, the error message now informs you that you can set `options(arrow.skip_nul = TRUE)` to strip them out. It is not recommended to set this option by default since this code path is sigificantly slower, and most string data does not contain nuls.
+* If you attempt to read string data that has embedded nul (`\0`) characters, the error message now informs you that you can set `options(arrow.skip_nul = TRUE)` to strip them out. It is not recommended to set this option by default since this code path is significantly slower, and most string data does not contain nuls.
+* `read_json_arrow()` now accepts a schema: `read_json_arrow("file.json", schema = schema(col_a = float64(), col_b = string()))`
 
 ## Installation and configuration
 
@@ -64,6 +65,8 @@ Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
 * Setting the `ARROW_DEFAULT_MEMORY_POOL` environment variable to switch memory allocators now works correctly when the Arrow C++ library has been statically linked (as is usually the case when installing from CRAN).
 * The `arrow_info()` function now reports on the additional optional features, as well as the detected SIMD level. If key features or compression libraries are not enabled in the build, `arrow_info()` will refer to the installation vignette for guidance on how to install a more complete build, if desired.
 * If you attempt to read a file that was compressed with a codec that your Arrow build does not contain support for, the error message now will tell you how to reinstall Arrow with that feature enabled.
+* A new vignette about developer environment setup `vignette("developing", package = "arrow")`.
+* When building from source, you can use the environment variable `ARROW_HOME` to point to a specific directory where the Arrow libraries are. This is similar to passing `INCLUDE_DIR` and `LIB_DIR`.
 
 # arrow 3.0.0
 
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index ab20769c3e6..bb77b416aab 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -57,6 +57,21 @@ navbar:
           href: https://arrow.apache.org/docs/python
         - text: R
           href: index.html
+    articles:
+      text: Articles
+      menu:
+        - text: Installing the Arrow Package on Linux
+          href: articles/install.html
+        - text: Working with Arrow Datasets and dplyr
+          href: articles/dataset.html
+        - text: Working with Cloud Storage (S3)
+          href: articles/fs.html
+        - text: Apache Arrow in Python and R with reticulate
+          href: articles/python.html
+        - text: Connecting to Flight RPC Servers
+          href: articles/flight.html
+        - text: Arrow R Developer Guide
+          href: articles/developing.html
 reference:
   - title: Multi-file datasets
     contents:
diff --git a/r/configure b/r/configure
index 3473fd5cdc0..eea7af6cf5b 100755
--- a/r/configure
+++ b/r/configure
@@ -66,8 +66,12 @@ if [ "$FORCE_AUTOBREW" = "true" ] || [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
 fi
 
 # Note that cflags may be empty in case of success
-if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then
-  echo "*** Using INCLUDE_DIR/LIB_DIR"
+if [ "$ARROW_HOME" ]; then
+  echo "*** Using ARROW_HOME as the source of libarrow"
+  PKG_CFLAGS="-I$ARROW_HOME/include $PKG_CFLAGS"
+  PKG_DIRS="-L$ARROW_HOME/lib"
+elif [ "$INCLUDE_DIR" ] && [ "$LIB_DIR" ]; then
+  echo "*** Using INCLUDE_DIR/LIB_DIR as the source of libarrow"
   PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
   PKG_DIRS="-L$LIB_DIR"
 else
@@ -80,7 +84,7 @@ else
     # TODO: what about --libs-only-other?
   fi
 
-  if [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
+  if [ "$PKGCONFIG_CFLAGS" ] && [ "$PKGCONFIG_LIBS" ]; then
     echo "*** Arrow C++ libraries found via pkg-config"
     PKG_CFLAGS="$PKGCONFIG_CFLAGS"
     PKG_LIBS=${PKGCONFIG_LIBS}
diff --git a/r/pkgdown/extra.js b/r/pkgdown/extra.js
new file mode 100644
index 00000000000..aca15c56625
--- /dev/null
+++ b/r/pkgdown/extra.js
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+(function () {
+  // Load the rmarkdown tabset script
+  var script = document.createElement("script");
+  script.type = "text/javascript";
+  script.async = true;
+  script.src =
+    "https://cdn.jsdelivr.net/gh/rstudio/rmarkdown@47d837d3d9cd5e8e212b05767454f058db7d2789/inst/rmd/h/navigation-1.1/tabsets.js";
+  script.integrity = "sha256-Rs54TE1FCN1uLM4f7VQEMiRTl1Ia7TiQLkMruItwV+Q=";
+  script.crossOrigin = "anonymous";
+
+  // Run the processing as the onload callback
+  script.onload = () => {
+    // Monkey patch the .html method to use the .text method
+    $(document).ready(function () {
+      (function ($) {
+        $.fn.html = function (content) {
+          return this.text();
+        };
+      })(jQuery);
+
+      window.buildTabsets("toc");
+    });
+
+    $(document).ready(function () {
+      $(".tabset-dropdown > .nav-tabs > li").click(function () {
+        $(this).parent().toggleClass("nav-tabs-open");
+      });
+    });
+
+    $(document).ready(function () {
+      /**
+       * The tabset creation above sometimes relies on empty headers to stop the
+       * tabbing. Though they shouldn't be included in the TOC in the first place,
+       * this will remove empty headers from the TOC after it's created.
+       */
+
+      // find all the empty <a> elements and remove them (and their parents)
+      var empty_a = $("#toc").find("a").filter(":empty");
+      empty_a.parent().remove();
+
+      // now find any empty <ul>s and remove them too
+      var empty_ul = $("#toc").find("ul").filter(":empty");
+      empty_ul.remove();
+    });
+  };
+
+  document.head.appendChild(script);
+})();
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 8aacb3eb109..9daae4b67aa 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -261,7 +261,7 @@ apache_download <- function(destfile, n_mirrors = 3) {
   downloaded
 }
 
-find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) {
+find_local_source <- function(arrow_home = Sys.getenv("ARROW_SOURCE_HOME", "..")) {
   if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) {
     # We're in a git checkout of arrow, so we can build it
     cat("*** Found local C++ source\n")
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
new file mode 100644
index 00000000000..38027a9ad51
--- /dev/null
+++ b/r/vignettes/developing.Rmd
@@ -0,0 +1,520 @@
+---
+title: "Arrow R Developer Guide"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Arrow R Developer Guide}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup options, include=FALSE}
+knitr::opts_chunk$set(error = TRUE, eval = FALSE)
+
+# Get environment variables describing what to evaluate
+run <- tolower(Sys.getenv("RUN_DEVDOCS", "false")) == "true"
+macos <- tolower(Sys.getenv("DEVDOCS_MACOS", "false")) == "true"
+ubuntu <- tolower(Sys.getenv("DEVDOCS_UBUNTU", "false")) == "true"
+sys_install <- tolower(Sys.getenv("DEVDOCS_SYSTEM_INSTALL", "false")) == "true"
+
+# Update the source knit_hook to save the chunk (if it is marked to be saved)
+knit_hooks_source <- knitr::knit_hooks$get("source")
+knitr::knit_hooks$set(source = function(x, options) {
+  # Extra paranoia about when this will write the chunks to the script, we will
+  # only save when:
+  #   * CI is true
+  #   * RUN_DEVDOCS is true
+  #   * options$save is TRUE (and a check that not NULL won't crash it)
+  if (as.logical(Sys.getenv("CI", FALSE)) && run && !is.null(options$save) && options$save)
+    cat(x, file = "script.sh", append = TRUE, sep = "\n")
+  # but hide the blocks we want hidden:
+  if (!is.null(options$hide) && options$hide) {
+    return(NULL)
+  }
+  knit_hooks_source(x, options)
+})
+```
+
+```{bash, save=run, hide=TRUE}
+# Stop on failure, echo input as we go
+set -e
+set -x
+```
+
+If you're looking to contribute to `arrow`, this document can help you set up a development environment that will enable you to write code and run tests locally. It outlines how to build the various components that make up the Arrow project and R package, as well as some common troubleshooting and workflows developers use. Many contributions can be accomplished with the instructions in [R-only development](#r-only-development). But if you're working on both the C++ library and the R package, the [Developer environment setup](#-developer-environment-setup) section will guide you through setting up a developer environment.
+
+This document is intended only for developers of Apache Arrow or the Arrow R package. Users of the package in R do not need to do any of this setup. If you're looking for how to install Arrow, see [the instructions in the readme](https://arrow.apache.org/docs/r/#installation); Linux users can find more details on building from source at `vignette("install", package = "arrow")`.
+
+This document is a work in progress and will grow + change as the Apache Arrow project grows and changes. We have tried to make these steps as robust as possible (in fact, we even test exactly these instructions on our nightly CI to ensure they don't become stale!), but certain custom configurations might conflict with these instructions and there are differences of opinion across developers about if and what the one true way to set up development environments like this is.  We also solicit any feedback you have about things that are confusing or additions you would like to see here. Please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) if there you see anything that is confusing, odd, or just plain wrong.
+
+## R-only development
+
+Windows and macOS users who wish to contribute to the R package and
+don’t need to alter the Arrow C++ library may be able to obtain a
+recent version of the library without building from source. On macOS,
+you may install the C++ library using [Homebrew](https://brew.sh/):
+
+``` shell
+# For the released version:
+brew install apache-arrow
+# Or for a development version, you can try:
+brew install apache-arrow --HEAD
+```
+
+On Windows and Linux, you can download a .zip file with the arrow dependencies from the
+nightly repository.
+Windows users then can set the `RWINLIB_LOCAL` environment variable to point to that
+zip file before installing the `arrow` R package. On Linux, you'll need to create a `libarrow` directory inside the R package directory and unzip that file into it. Version numbers in that
+repository correspond to dates, and you will likely want the most recent.
+
+To see what nightlies are available, you can use Arrow's (or any other S3 client's) S3 listing functionality to see what is in the bucket `s3://arrow-r-nightly/libarrow/bin`:
+
+```
+nightly <- s3_bucket("arrow-r-nightly")
+nightly$ls("libarrow/bin")
+```
+
+## Developer environment setup
+
+If you need to alter both the Arrow C++ library and the R package code, or if you can’t get a binary version of the latest C++ library elsewhere, you’ll need to build it from source too. This section discusses how to set up a C++ build configured to work with the R package. For more general resources, see the [Arrow C++ developer
+guide](https://arrow.apache.org/docs/developers/cpp/building.html).
+
+There are four major steps to the process — the first three are relevant to all Arrow developers, and the last one is specific to the R bindings:
+
+1. Configuring the Arrow library build (using `cmake`) — this specifies how you want the build to go, what features to include, etc.
+2. Building the Arrow library — this actually compiles the Arrow library
+3. Install the Arrow library — this organizes and moves the compiled Arrow library files into the location specified in the configuration
+4. Building the R package — this builds the C++ code in the R package, and installs the R package for you
+
+### Install dependencies {.tabset}
+
+The Arrow C++ library will by default use system dependencies if suitable versions are found; if they are not present, it will build them during its own build process. The only dependencies that one needs to install outside of the build process are `cmake` (for configuring the build) and `openssl` if you are building with S3 support.
+
+For a faster build, you may choose to install on the system more C++ library dependencies (such as `lz4`, `zstd`, etc.) so that they don't need to be built from source in the Arrow build. This is optional.
+
+#### macOS
+```{bash, save=run & macos}
+brew install cmake openssl
+```
+
+#### Ubuntu
+```{bash, save=run & ubuntu}
+sudo apt install -y cmake libcurl4-openssl-dev libssl-dev
+```
+
+### Configure the Arrow build {.tabset}
+
+You can choose to build and then install the Arrow library into a user-defined directory or into a system-level directory. You only need to do one of these two options.
+
+It is recommended that you install the arrow library to a user-level directory to be used in development. This is so that the development version you are using doesn't overwrite a released version of Arrow you may have installed. You are also able to have more than one version of the Arrow library to link to with this approach (by using different `ARROW_HOME` directories for the different versions). This approach also matches the recommendations for other Arrow bindings like [Python](http://arrow.apache.org/docs/developers/python.html). 
+
+#### Configure for installing to a user directory
+
+In this example we will install it to a directory called `dist` that has the same parent as our `arrow` checkout, but it could be named or located anywhere you would like. However, note that your installation of the Arrow R package will point to this directory and need it to remain intact for the package to continue to work. This is one reason we recommend *not* placing it inside of the arrow git checkout.
+
+```{bash, save=run & !sys_install}
+export ARROW_HOME=$(pwd)/dist
+mkdir $ARROW_HOME
+```
+
+_Special instructions on Linux:_ You will need to set `LD_LIBRARY_PATH` to the `lib` directory that will is under where we set `$ARROW_HOME`, before launching R and using Arrow. One way to do this is to add it to your profile (we use `~/.bash_profile` here, but you might need to put this in a different file depending on your setup, e.g. if you use a shell other than `bash`). On macOS we do not need to do this because the macOS shared library paths are hardcoded to their locations during build time.
+
+```{bash, save=run & ubuntu & !sys_install}
+export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
+echo "export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH" >> ~/.bash_profile
+```
+
+Now we can move into the arrow repository to start the build process. You will need to create a directory into which the C++ build will put its contents. It is recommended to make a `build` directory inside of the `cpp` directory of the Arrow git repository (it is git-ignored, so you won't accidentally check it in). And then, change directories to be inside `cpp/build`:
+
+```{bash, save=run & !sys_install}
+pushd arrow
+mkdir -p cpp/build
+pushd cpp/build
+```
+
+You’ll first call `cmake` to configure the build and then `make install`. For the R package, you’ll need to enable several features in the C++ library using `-D` flags:
+
+```{bash, save=run & !sys_install}
+cmake \
+  -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DARROW_COMPUTE=ON \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_JEMALLOC=ON \
+  -DARROW_JSON=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
+  ..
+```
+
+`..` refers to the C++ source directory: we're in `cpp/build`, and the source is in `cpp`.
+
+#### Configure to install to a system directory
+
+If you would like to install Arrow as a system library you can do that as well. This is in some respects simpler, but if you already have Arrow libraries installed there, it would disrupt them and possibly require `sudo` permissions.
+
+Now we can move into the arrow repository to start the build process. You will need to create a directory into which the C++ build will put its contents. It is recommended to make a `build` directory inside of the `cpp` directory of the Arrow git repository (it is git-ignored, so you won't accidentally check it in). And then, change directories to be inside `cpp/build`:
+
+```{bash, save=run & sys_install}
+pushd arrow
+mkdir -p cpp/build
+pushd cpp/build
+```
+
+You’ll first call `cmake` to configure the build and then `make install`. For the R package, you’ll need to enable several features in the C++ library using `-D` flags:
+
+```{bash, save=run & sys_install}
+cmake \
+  -DARROW_COMPUTE=ON \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_JEMALLOC=ON \
+  -DARROW_JSON=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
+  ..
+```
+
+`..` refers to the C++ source directory: we're in `cpp/build`, and the source is in `cpp`.
+
+### More Arrow features
+
+To enable optional features including: S3 support, an alternative memory allocator, and additional compression libraries, add some or all of these flags (the trailing `\` makes them easier to paste into a bash shell on a new line):
+
+``` shell
+  -DARROW_MIMALLOC=ON \
+  -DARROW_WITH_BROTLI=ON \
+  -DARROW_WITH_BZ2=ON \
+  -DARROW_WITH_LZ4=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZSTD=ON \
+```
+
+Other flags that may be useful:
+
+* `-DBoost_SOURCE=BUNDLED` and `-DThrift_SOURCE=bundled`, for example, or any other dependency `*_SOURCE`, if you have a system version of a C++ dependency that doesn't work correctly with Arrow. This tells the build to compile its own version of the dependency from source.
+* `-DCMAKE_BUILD_TYPE=debug` or `-DCMAKE_BUILD_TYPE=relwithdebinfo` can be useful for debugging. You probably don't want to do this generally because a debug build is much slower at runtime than the default `release` build.
+
+_Note_ `cmake` is particularly sensitive to whitespacing, if you see errors, check that you don't have any errant whitespace around
+
+### Build Arrow
+
+You can add `-j#` between `make` and `install` here too to speed up compilation by running in parallel (where `#` is the number of cores you have available).
+
+```{bash, save=run & !(sys_install & ubuntu)}
+make -j8 install
+```
+
+If you are installing on linux, and you are installing to the system, you may
+need to use `sudo`:
+
+```{bash, save=run & sys_install & ubuntu}
+sudo make install
+```
+
+
+### Build the Arrow R package
+
+Once you’ve built the C++ library, you can install the R package and its
+dependencies, along with additional dev dependencies, from the git
+checkout:
+
+```{bash, save=run}
+popd # To go back to the root directory of the project, from cpp/build
+
+pushd r
+R -e 'install.packages("remotes"); remotes::install_deps(dependencies = TRUE)'
+
+R CMD INSTALL .
+```
+
+### Compilation flags
+
+If you need to set any compilation flags while building the C++
+extensions, you can use the `ARROW_R_CXXFLAGS` environment variable. For
+example, if you are using `perf` to profile the R extensions, you may
+need to set
+
+``` shell
+export ARROW_R_CXXFLAGS=-fno-omit-frame-pointer
+```
+
+### Developer Experience
+
+With the setups described here, you should not need to rebuild the Arrow library or even the C++ source in the R package as you iterated and work on the R package. The only time those should need to be rebuilt is if you have changed the C++ in the R package (and even then, `R CMD INSTALL .` should only need to recompile the files that have changed) _or_ if the Arrow library C++ has changed and there is a mismatch between the Arrow Library and the R package. If you find yourself rebuilding either or both each time you install the package or run tests, something is probably wrong with your set up.
+
+<details>
+<summary>For a full build: a `cmake` command with all of the R-relevant optional dependencies turned on</summary>
+<p>
+
+``` shell
+cmake \
+  -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DARROW_COMPUTE=ON \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_FILESYSTEM=ON \
+  -DARROW_JEMALLOC=ON \
+  -DARROW_JSON=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
+  -DARROW_MIMALLOC=ON \
+  -DARROW_WITH_BROTLI=ON \
+  -DARROW_WITH_BZ2=ON \
+  -DARROW_WITH_LZ4=ON \
+  -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZSTD=ON \
+  ..
+```
+</p>
+</details>  
+
+## Troublshooting
+
+Note that after any change to the C++ library, you must reinstall it and
+run `make clean` or `git clean -fdx .` to remove any cached object code
+in the `r/src/` directory before reinstalling the R package. This is
+only necessary if you make changes to the C++ library source; you do not
+need to manually purge object files if you are only editing R or C++
+code inside `r/`.
+
+### Arrow library-R package mismatches
+
+If the Arrow library and the R package have diverged, you will see errors like:
+
+```
+Error: package or namespace load failed for ‘arrow’ in dyn.load(file, DLLpath = DLLpath, ...):
+ unable to load shared object '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so':
+  dlopen(/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so, 6): Symbol not found: __ZN5arrow2io16RandomAccessFile9ReadAsyncERKNS0_9IOContextExx
+  Referenced from: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so
+  Expected in: flat namespace
+ in /Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so
+Error: loading failed
+Execution halted
+ERROR: loading failed
+```
+
+To resolve this, try rebuilding the Arrow library from [Building Arrow above](#building-arrow).
+
+### Multiple versions of Arrow library
+
+If rebuilding the Arrow library doesn't work and you are [installing from a user-level directory](#installing-to-another-directory) and you already have a previous installation of libarrow in a system directory or you get you may get errors like the following when you install the R package:
+
+```
+Error: package or namespace load failed for ‘arrow’ in dyn.load(file, DLLpath = DLLpath, ...):
+ unable to load shared object '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so':
+  dlopen(/Library/Frameworks/R.framework/Versions/4.0/Resources/library/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: /usr/local/lib/libarrow.400.dylib
+  Referenced from: /usr/local/lib/libparquet.400.dylib
+  Reason: image not found
+```
+
+You need to make sure that you don't let R link to your system library when building arrow. You can do this a number of different ways:
+
+* Setting the `MAKEFLAGS` environment variable to `"LDFLAGS="` (see below for an example) this is the recommended way to accomplish this
+* Using {withr}'s `with_makevars(list(LDFLAGS = ""), ...)`
+* adding `LDFLAGS=` to your `~/.R/Makevars` file (the least recommended way, though it is a common debugging approach suggested online)
+
+```{bash, save=run & !sys_install & macos, hide=TRUE}
+# Setup troubleshooting section
+# install a system-level arrow on macOS
+brew install apache-arrow
+```
+
+
+```{bash, save=run & !sys_install & ubuntu, hide=TRUE}
+# Setup troubleshooting section
+# install a system-level arrow on macOS
+sudo apt update
+sudo apt install -y -V ca-certificates lsb-release wget
+wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
+sudo apt install -y -V ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
+sudo apt update
+sudo apt install -y -V libarrow-dev
+```
+
+```{bash, save=run & !sys_install & macos}
+MAKEFLAGS="LDFLAGS=" R CMD INSTALL .
+```
+
+
+### `rpath` issues
+
+If the package fails to install/load with an error like this:
+
+```
+  ** testing if installed package can be loaded from temporary location
+  Error: package or namespace load failed for 'arrow' in dyn.load(file, DLLpath = DLLpath, ...):
+  unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
+  dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
+```
+
+ensure that `-DARROW_INSTALL_NAME_RPATH=OFF` was passed (this is important on
+macOS to prevent problems at link time and is a no-op on other platforms).
+Alternatively, try setting the environment variable `R_LD_LIBRARY_PATH` to
+wherever Arrow C++ was put in `make install`, e.g. `export
+R_LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
+
+When installing from source, if the R and C++ library versions do not
+match, installation may fail. If you’ve previously installed the
+libraries and want to upgrade the R package, you’ll need to update the
+Arrow C++ library first.
+
+For any other build/configuration challenges, see the [C++ developer
+guide](https://arrow.apache.org/docs/developers/cpp/building.html).
+
+
+## Using `remotes::install_github(...)`
+
+If you need an Arrow installation from a specific repository or at a specific ref,
+`remotes::install_github("apache/arrow/r", build = FALSE)`
+should work on most platforms (with the notable exception of Windows).
+The `build = FALSE` argument is important so that the installation can access the
+C++ source in the `cpp/` directory in `apache/arrow`.
+
+As with other installation methods, setting the environment variables `LIBARROW_MINIMAL=false` and `ARROW_R_DEV=true` will provide a more full-featured version of Arrow and provide more verbose output, respectively.
+
+For example, to install from the (fictional) branch `bugfix` from `apache/arrow` one could:
+
+```r
+Sys.setenv(LIBARROW_MINIMAL="false")
+remotes::install_github("apache/arrow/r@bugfix", build = FALSE)
+```
+
+Developers may wish to use this method of installing a specific commit
+separate from another Arrow development environment or system installation
+(e.g. we use this in [arrowbench](https://github.com/ursacomputing/arrowbench) to install development versions of arrow isolated from the system install). If you already have Arrow C++ libraries installed system-wide, you may need to set some additional variables in order to isolate this build from your system libraries:
+
+* Setting the environment variable `FORCE_BUNDLED_BUILD` to `true` will skip the `pkg-config` search for Arrow libraries and attempt to build from the same source at the repository+ref given.
+* You may also need to set the Makevars `CPPFLAGS` and `LDFLAGS` to `""` in order to prevent the installation process from attempting to link to already installed system versions of Arrow. One way to do this temporarily is wrapping your `remotes::install_github()` call like so: `withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(...))`.
+
+## What happens when you `R CMD INSTALL`?
+
+There are a number of scripts that are triggered when `R CMD INSTALL .`. For Arrow users, these should all just work without configuration and pull in the most complete pieces (e.g. official binaries that we host) so the installation process is easy. However knowing about these scripts can help troubleshoot if things go wrong in them or things go wrong in an install:
+
+* `configure` and `configure.win` These scripts are triggered during `R CMD INSTALL .` on non-Windows and Windows platforms, respectively. They handle finding the Arrow library, setting up the build variables necessary, and writing the package Makevars file that is used to compile the C++ code in the R package.
+* `tools/nixlibs.R` This script is sometimes called by `configure` on Linux (or on any non-windows OS with the environment variable `FORCE_BUNDLED_BUILD=true`). This sets up the build process for our bundled builds (which is the default on linux). The operative logic is at the end of the script, but it will do the following (and it will stop with the first one that succeeds and some of the steps are only checked if they are enabled via an environment variable):
+  * Check if there is an already built libarrow in `arrow/r/libarrow-{version}`, use that to link against if it exists.
+  * Check if a binary is available from our hosted unofficial builds.
+  * Download the Arrow source and build the Arrow Library from source.
+  * `*** Proceed without C++` dependencies (this is an error and the package will not work, but if you see this message you know the previous steps have not succeeded/were not enabled)
+* `inst/build_arrow_static.sh` this script builds Arrow for a bundled, static build. It is called by `tools/nixlibs.R` when the Arrow library is being built. (If you're looking at this script, and you've gotten this far, it should look _incredibly_ familiar: it's basically the contents of this guide in script form — with a few important changes)
+
+## Editing C++ code in the R package
+
+The `arrow` package uses some customized tools on top of `cpp11` to prepare its
+C++ code in `src/`. This is because we have some features that are only enabled
+and built conditionally during build time. If you change C++ code in the R
+package, you will need to set the `ARROW_R_DEV` environment variable to `true`
+(optionally, add it to your `~/.Renviron` file to persist across sessions) so
+that the `data-raw/codegen.R` file is used for code generation. The `Makefile` 
+commands also handles this automatically.
+
+We use Google C++ style in our C++ code. The easiest way to accomplish this is
+use an editors/IDE that formats your code for you. Many popular editors/IDEs 
+have support for running `clang-format` on C++ files when you save them. 
+Installing/enabling the appropriate plugin may save you much frustration.
+
+Check for style errors with
+
+``` shell
+./lint.sh
+```
+
+Fix any style issues before committing with
+
+``` shell
+./lint.sh --fix
+```
+
+The lint script requires Python 3 and `clang-format-8`. If the command
+isn’t found, you can explicitly provide the path to it like
+`CLANG_FORMAT=$(which clang-format-8) ./lint.sh`. On macOS, you can get
+this by installing LLVM via Homebrew and running the script as
+`CLANG_FORMAT=$(brew --prefix llvm@8)/bin/clang-format ./lint.sh`
+
+_Note_ that the lint script requires Python 3 and the Python dependencies 
+(note that `cmake_format is pinned to a specific version):
+
+* autopep8
+* flake8
+* cmake_format==0.5.2
+
+## Running tests
+
+Some tests are conditionally enabled based on the availability of certain
+features in the package build (S3 support, compression libraries, etc.).
+Others are generally skipped by default but can be enabled with environment
+variables or other settings:
+
+* All tests are skipped on Linux if the package builds without the C++ libarrow.
+  To make the build fail if libarrow is not available (as in, to test that
+  the C++ build was successful), set `TEST_R_WITH_ARROW=true`
+* Some tests are disabled unless `ARROW_R_DEV=true`
+* Tests that require allocating >2GB of memory to test Large types are disabled
+  unless `ARROW_LARGE_MEMORY_TESTS=true`
+* Integration tests against a real S3 bucket are disabled unless credentials
+  are set in `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`; these are available
+  on request
+* S3 tests using [MinIO](https://min.io/) locally are enabled if the
+  `minio server` process is found running. If you're running MinIO with custom
+  settings, you can set `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, and
+  `MINIO_PORT` to override the defaults.
+
+## Github workflows
+
+On a pull request, there are some actions you can trigger by commenting on the PR. We have additional CI checks that run nightly and can be requested on demand using an internal tool called [crosssbow](https://arrow.apache.org/docs/developers/crossbow.html). A few important GitHub comment commands include:
+
+* `@github-actions crossbow submit -g r` for all extended R CI tests
+* `@github-actions crossbow submit {task-name}` for running a specific task. See the `r:` group definition near the beginning of the [crossbow configuration](https://github.com/apache/arrow/blob/master/dev/tasks/tasks.yml) for a list of glob expression patterns that match names of items in the `tasks:` list below it.
+* `@github-actions autotune` will run and fix lint c++ linting errors + run R documentation (among other cleanup tasks) and commit them to the branch
+
+
+## Useful functions for Arrow developers
+
+Within an R session, these can help with package development:
+
+``` r
+# Load the dev package
+devtools::load_all()
+
+# Run the test suite, optionally filtering file names
+devtools::test(filter="^regexp$")
+# or the Makefile alternative from the arrow/r directory in a shell:
+make test file=regexp
+
+# Update roxygen documentation
+devtools::document()
+
+# To preview the documentation website
+pkgdown::build_site()
+
+# All package checks; see also below
+devtools::check()
+
+# See test coverage statistics
+covr::report()
+covr::package_coverage()
+```
+
+Any of those can be run from the command line by wrapping them in `R -e
+'$COMMAND'`. There’s also a `Makefile` to help with some common tasks
+from the command line (`make test`, `make doc`, `make clean`, etc.)
+
+### Full package validation
+
+``` shell
+R CMD build .
+R CMD check arrow_*.tar.gz --as-cran
+```
diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd
index c68136911da..47ae8944b71 100644
--- a/r/vignettes/install.Rmd
+++ b/r/vignettes/install.Rmd
@@ -1,8 +1,8 @@
 ---
-title: "Installing the Arrow Package"
+title: "Installing the Arrow Package on Linux"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Installing the Arrow Package}
+  %\VignetteIndexEntry{Installing the Arrow Package on Linux}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
@@ -168,31 +168,6 @@ which calls `tools/nixlibs.R`.
 If the C++ library is built from source, `inst/build_arrow_static.sh` is executed.
 This build script is also what is used to generate the prebuilt binaries.
 
-
-# Using `remotes::install_github(...)`
-
-If you need an Arrow installation from a specific repository or at a specific ref,
-`remotes::install_github("apache/arrow/r", build = FALSE)`
-should work on most platforms (with the notable exception of Windows).
-The `build = FALSE` argument is important so that the installation can access the
-C++ source in the `cpp/` directory in `apache/arrow`.
-
-As with other installation methods, setting the environment variables `LIBARROW_MINIMAL=false` and `ARROW_R_DEV=true` will provide a more full-featured version of Arrow and provide more verbose output, respectively.
-
-For example, to install from the (fictional) branch `bugfix` from `apache/arrow` one could:
-
-```r
-Sys.setenv(LIBARROW_MINIMAL="false")
-remotes::install_github("apache/arrow/r@bugfix", build = FALSE)
-```
-
-Developers may wish to use this method of installing a specific commit
-separate from another Arrow development environment or system installation
-(e.g. we use this in [arrowbench](https://github.com/ursacomputing/arrowbench) to install development versions of arrow isolated from the system install). If you already have Arrow C++ libraries installed system-wide, you may need to set some additional variables in order to isolate this build from your system libraries:
-
-* Setting the environment variable `FORCE_BUNDLED_BUILD` to `true` will skip the `pkg-config` search for Arrow libraries and attempt to build from the same source at the repository+ref given.
-* You may also need to set the Makevars `CPPFLAGS` and `LDFLAGS` to `""` in order to prevent the installation process from attempting to link to already installed system versions of Arrow. One way to do this temporarily is wrapping your `remotes::install_github()` call like so: `withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(...))`.
-
 # Troubleshooting
 
 The intent is that `install.packages("arrow")` will just work and handle all C++
@@ -244,30 +219,6 @@ Similarly, if you're using Arrow system libraries, running `update.packages()`
 after a new release of the `arrow` package will likely fail unless you first
 update the system packages.
 
-## Using a local Arrow C++ build
-
-If you've built the Arrow C++ libraries locally from source
-but haven't installed them where `pkg-config` will find them,
-there are a few options for telling the R package how to locate them.
-You can set `PKG_CONFIG_PATH` to `/path/to/your/installation/pkgconfig`
-(that is, `PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/pkgconfig`,
-if you've set those variables).
-Alternatively, you can set the `INCLUDE_DIR` and `LIB_DIR` environment variables
-to point to their location.
-
-If the package fails to install/load with an error like this:
-
-```
-** testing if installed package can be loaded from temporary location
-Error: package or namespace load failed for 'arrow' in dyn.load(file, DLLpath = DLLpath, ...):
-unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
-dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
-```
-
-try setting the environment variable `R_LD_LIBRARY_PATH` to wherever Arrow C++
-was put in `make install`, e.g. `export R_LD_LIBRARY_PATH=/usr/local/lib`, and
-retry installing the R package.
-
 ## Using prebuilt binaries
 
 If the R package finds and downloads a prebuilt binary of the C++ library,
@@ -335,6 +286,17 @@ See discussion [here](https://issues.apache.org/jira/browse/ARROW-8556).
 
 ## Summary of build environment variables
 
+Some features are optional when you build Arrow from source. With the exception of `ARROW_S3`, these are all `ON` by default in the bundled C++ build, but you can set them to `OFF` to disable them.
+
+* `ARROW_S3`: If set to `ON` S3 support will be built as long as the 
+  dependencies are met; if they are not met, the build script will turn this `OFF` 
+* `ARROW_JEMALLOC` for the `jemalloc` memory allocator
+* `ARROW_PARQUET`
+* `ARROW_DATASET`
+* `ARROW_WITH_RE2` for the RE2 regular expression library, used in some string compute functions
+* `ARROW_WITH_UTF8PROC` for the UTF8Proc string library, used in many other string compute functions
+
+There are a number of other variables that affect the `configure` script and the bundled build script.
 By default, these are all unset. All boolean variables are case-insensitive.
 
 * `ARROW_USE_PKG_CONFIG`: If set to `false`, the configure script
@@ -379,7 +341,8 @@ By default, these are all unset. All boolean variables are case-insensitive.
   The directory will be created if it does not exist.
 * `CMAKE`: When building the C++ library from source, you can specify a
   `/path/to/cmake` to use a different version than whatever is found on the `$PATH`
-
+  
+  
 # Contributing
 
 As mentioned above, please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues)

From 1e6819c04eadfdf334cf62fbf4618df98ae3d97f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Apr 2021 11:21:36 +0200
Subject: [PATCH 045/719] ARROW-12057: [Python] Remove direct usage of pandas'
 Block subclasses (partly)

Closes #10017 from jorisvandenbossche/ARROW-12057-pandas-block-classes

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/pandas_compat.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 91b38b8426c..e4b13175fe1 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -647,7 +647,6 @@ def get_datetimetz_type(values, dtype, type_):
 
 
 def dataframe_to_serialized_dict(frame):
-    import pandas.core.internals as _int
     block_manager = frame._data
 
     blocks = []
@@ -657,11 +656,11 @@ def dataframe_to_serialized_dict(frame):
         values = block.values
         block_data = {}
 
-        if isinstance(block, _int.DatetimeTZBlock):
+        if _pandas_api.is_datetimetz(values.dtype):
             block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
             if hasattr(values, 'values'):
                 values = values.values
-        elif isinstance(block, _int.CategoricalBlock):
+        elif _pandas_api.is_categorical(values):
             block_data.update(dictionary=values.categories,
                               ordered=values.ordered)
             values = values.codes
@@ -670,10 +669,8 @@ def dataframe_to_serialized_dict(frame):
             block=values
         )
 
-        # If we are dealing with an object array, pickle it instead. Note that
-        # we do not use isinstance here because _int.CategoricalBlock is a
-        # subclass of _int.ObjectBlock.
-        if type(block) == _int.ObjectBlock:
+        # If we are dealing with an object array, pickle it instead.
+        if values.dtype == np.dtype(object):
             block_data['object'] = None
             block_data['block'] = builtin_pickle.dumps(
                 values, protocol=builtin_pickle.HIGHEST_PROTOCOL)
@@ -731,8 +728,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
         cat = _pandas_api.categorical_type.from_codes(
             block_arr, categories=item['dictionary'],
             ordered=item['ordered'])
-        block = _int.make_block(cat, placement=placement,
-                                klass=_int.CategoricalBlock)
+        block = _int.make_block(cat, placement=placement)
     elif 'timezone' in item:
         dtype = make_datetimetz(item['timezone'])
         block = _int.make_block(block_arr, placement=placement,
@@ -740,7 +736,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
                                 dtype=dtype)
     elif 'object' in item:
         block = _int.make_block(builtin_pickle.loads(block_arr),
-                                placement=placement, klass=_int.ObjectBlock)
+                                placement=placement)
     elif 'py_array' in item:
         # create ExtensionBlock
         arr = item['py_array']
@@ -751,8 +747,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
             raise ValueError("This column does not support to be converted "
                              "to a pandas ExtensionArray")
         pd_ext_arr = pandas_dtype.__from_arrow__(arr)
-        block = _int.make_block(pd_ext_arr, placement=placement,
-                                klass=_int.ExtensionBlock)
+        block = _int.make_block(pd_ext_arr, placement=placement)
     else:
         block = _int.make_block(block_arr, placement=placement)
 

From 15137e2dba2fcd97c0a17abddb3cfc9263a1d2f1 Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Thu, 15 Apr 2021 15:47:36 +0530
Subject: [PATCH 046/719] ARROW-7215: [C++][Gandiva] Implement
 castVARCHAR(numeric_type) functions

This PR implements the castVARCHAR for numeric values inside the Gandiva.

It replaces the logic of the https://github.com/apache/arrow/pull/8158 PR  to change the function output to match the Java language patterns.

Closes #9816 from anthonylouisbsb/feature/fix-castvarchar-to-match-java-impl and squashes the following commits:

7df55a58d <Anthony Louis> Apply formatting changes
7a724c0fd <Anthony Louis> Remove unnecessary macros
4fb8a7f44 <Anthony Louis> Refactor if chain
e78705136 <Anthony Louis> Add test to infinity case
b62b856a0 <Anthony Louis> Add comments for changes
cec11bbf0 <Anthony Louis> Add tests to check Java compatibility
302139c78 <Anthony Louis> Add emit trailing point tests
efb94b901 <Anthony Louis> Add -0.0 inside cast test
523e60a56 <Anthony Louis> Add custom constructor inside the class
34f2f926d <Anthony Louis> Add class to print in formatted way
e244502b3 <Anthony Louis> Fix tests to consider java formatting
33bc5b2de <Projjal Chanda> added castvarchar(numeric_types) functions

Lead-authored-by: Anthony Louis <anthony@simbioseventures.com>
Co-authored-by: Projjal Chanda <iam@pchanda.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/arrow/util/formatting.cc              |  18 ++
 cpp/src/arrow/util/formatting.h               |  16 ++
 .../double-conversion/double-conversion.cc    |  20 +-
 .../double-conversion/double-conversion.h     |  11 +
 cpp/src/gandiva/formatting_utils.h            |  69 ++++++
 cpp/src/gandiva/function_registry_string.cc   |  16 ++
 cpp/src/gandiva/gdv_function_stubs.cc         | 119 +++++++++
 cpp/src/gandiva/gdv_function_stubs.h          |  13 +
 cpp/src/gandiva/gdv_function_stubs_test.cc    | 130 ++++++++++
 .../gandiva/evaluator/ProjectorTest.java      | 226 ++++++++++++++++--
 10 files changed, 618 insertions(+), 20 deletions(-)
 create mode 100644 cpp/src/gandiva/formatting_utils.h

diff --git a/cpp/src/arrow/util/formatting.cc b/cpp/src/arrow/util/formatting.cc
index 9e4d25c0e2b..c16d42ce5cf 100644
--- a/cpp/src/arrow/util/formatting.cc
+++ b/cpp/src/arrow/util/formatting.cc
@@ -43,11 +43,29 @@ struct FloatToStringFormatter::Impl {
       : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
                    'e', -6, 10, 6, 0) {}
 
+  Impl(int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
+       int decimal_in_shortest_low, int decimal_in_shortest_high,
+       int max_leading_padding_zeroes_in_precision_mode,
+       int max_trailing_padding_zeroes_in_precision_mode)
+      : converter_(flags, inf_symbol, nan_symbol, exp_character, decimal_in_shortest_low,
+                   decimal_in_shortest_high, max_leading_padding_zeroes_in_precision_mode,
+                   max_trailing_padding_zeroes_in_precision_mode) {}
+
   DoubleToStringConverter converter_;
 };
 
 FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
 
+FloatToStringFormatter::FloatToStringFormatter(
+    int flags, const char* inf_symbol, const char* nan_symbol, char exp_character,
+    int decimal_in_shortest_low, int decimal_in_shortest_high,
+    int max_leading_padding_zeroes_in_precision_mode,
+    int max_trailing_padding_zeroes_in_precision_mode)
+    : impl_(new Impl(flags, inf_symbol, nan_symbol, exp_character,
+                     decimal_in_shortest_low, decimal_in_shortest_high,
+                     max_leading_padding_zeroes_in_precision_mode,
+                     max_trailing_padding_zeroes_in_precision_mode)) {}
+
 FloatToStringFormatter::~FloatToStringFormatter() {}
 
 int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
index 5f4b251a38c..566c9795f83 100644
--- a/cpp/src/arrow/util/formatting.h
+++ b/cpp/src/arrow/util/formatting.h
@@ -31,6 +31,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/double_conversion.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/time.h"
 #include "arrow/util/visibility.h"
@@ -219,6 +220,11 @@ class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type>
 class ARROW_EXPORT FloatToStringFormatter {
  public:
   FloatToStringFormatter();
+  FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol,
+                         char exp_character, int decimal_in_shortest_low,
+                         int decimal_in_shortest_high,
+                         int max_leading_padding_zeroes_in_precision_mode,
+                         int max_trailing_padding_zeroes_in_precision_mode);
   ~FloatToStringFormatter();
 
   // Returns the number of characters written
@@ -239,6 +245,16 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter {
 
   explicit FloatToStringFormatterMixin(const std::shared_ptr<DataType>& = NULLPTR) {}
 
+  FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol,
+                              char exp_character, int decimal_in_shortest_low,
+                              int decimal_in_shortest_high,
+                              int max_leading_padding_zeroes_in_precision_mode,
+                              int max_trailing_padding_zeroes_in_precision_mode)
+      : FloatToStringFormatter(flags, inf_symbol, nan_symbol, exp_character,
+                               decimal_in_shortest_low, decimal_in_shortest_high,
+                               max_leading_padding_zeroes_in_precision_mode,
+                               max_trailing_padding_zeroes_in_precision_mode) {}
+
   template <typename Appender>
   Return<Appender> operator()(value_type value, Appender&& append) {
     char buffer[buffer_size];
diff --git a/cpp/src/arrow/vendored/double-conversion/double-conversion.cc b/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
index 5d5d6f13116..27e70b4c90d 100644
--- a/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
+++ b/cpp/src/arrow/vendored/double-conversion/double-conversion.cc
@@ -84,7 +84,25 @@ void DoubleToStringConverter::CreateExponentialRepresentation(
     StringBuilder* result_builder) const {
   ASSERT(length != 0);
   result_builder->AddCharacter(decimal_digits[0]);
-  if (length != 1) {
+
+  /* If the mantissa of the scientific notation representation is an integer number,
+   * the EMIT_TRAILING_DECIMAL_POINT flag will add a '.' character at the end of the
+   * representation:
+   * - With EMIT_TRAILING_DECIMAL_POINT enabled -> 0.0009 => 9.E-4
+   * - With EMIT_TRAILING_DECIMAL_POINT disabled -> 0.0009 => 9E-4
+   *
+   * If the mantissa is an integer and the EMIT_TRAILING_ZERO_AFTER_POINT flag is enabled
+   * it will add a '0' character at the end of the mantissa representation. Note that that
+   * flag depends on EMIT_TRAILING_DECIMAL_POINT flag be enabled.*/
+  if(length == 1){
+    if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
+      result_builder->AddCharacter('.');
+
+      if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
+          result_builder->AddCharacter('0');
+      }
+    }
+  } else {
     result_builder->AddCharacter('.');
     result_builder->AddSubstring(&decimal_digits[1], length-1);
   }
diff --git a/cpp/src/arrow/vendored/double-conversion/double-conversion.h b/cpp/src/arrow/vendored/double-conversion/double-conversion.h
index 6dbc0997c61..9dc3ebd8dfd 100644
--- a/cpp/src/arrow/vendored/double-conversion/double-conversion.h
+++ b/cpp/src/arrow/vendored/double-conversion/double-conversion.h
@@ -104,6 +104,17 @@ class DoubleToStringConverter {
   //   ToPrecision(230.0, 2) -> "230"
   //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
   //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  //
+  // When converting numbers to scientific notation representation, if the mantissa of
+  // the representation is an integer number, the EMIT_TRAILING_DECIMAL_POINT flag will
+  // add a '.' character at the end of the representation:
+  // - With EMIT_TRAILING_DECIMAL_POINT enabled -> 0.0009 => 9.E-4
+  // - With EMIT_TRAILING_DECIMAL_POINT disabled -> 0.0009 => 9E-4
+  //
+  // If the mantissa is an integer and the EMIT_TRAILING_ZERO_AFTER_POINT flag is enabled
+  // it will add a '0' character at the end of the mantissa representation. Note that that
+  // flag depends on EMIT_TRAILING_DECIMAL_POINT flag be enabled.
+  // - With EMIT_TRAILING_ZERO_AFTER_POINT enabled -> 0.0009 => 9.0E-4
   DoubleToStringConverter(int flags,
                           const char* infinity_symbol,
                           const char* nan_symbol,
diff --git a/cpp/src/gandiva/formatting_utils.h b/cpp/src/gandiva/formatting_utils.h
new file mode 100644
index 00000000000..7bc6a49696a
--- /dev/null
+++ b/cpp/src/gandiva/formatting_utils.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/type.h"
+#include "arrow/util/formatting.h"
+#include "arrow/vendored/double-conversion/double-conversion.h"
+
+namespace gandiva {
+
+/// \brief The entry point for conversion to strings.
+template <typename ARROW_TYPE, typename Enable = void>
+class GdvStringFormatter;
+
+using double_conversion::DoubleToStringConverter;
+
+template <typename ARROW_TYPE>
+class FloatToStringGdvMixin
+    : public arrow::internal::FloatToStringFormatterMixin<ARROW_TYPE> {
+ public:
+  using arrow::internal::FloatToStringFormatterMixin<
+      ARROW_TYPE>::FloatToStringFormatterMixin;
+
+  // The mixin is a modified version of the existent FloatToStringFormatterMixin, but
+  // it defines some specific parameters in the FloatToStringFormatterMixin to cast
+  // the float numbers to string using the same patterns like Java.
+  //
+  // The Java real numbers are represented in two ways following these rules:
+  //- If the number is greater or equals than 10^7 and less than 10^(-3)
+  //  it will be represented using scientific notation, e.g:
+  //      - 0.000012 -> 1.2E-5
+  //      - 10000002.3 -> 1.00000023E7
+  //- If the numbers are between that interval above, they are showed as is.
+  explicit FloatToStringGdvMixin(const std::shared_ptr<arrow::DataType>& = NULLPTR)
+      : arrow::internal::FloatToStringFormatterMixin<ARROW_TYPE>(
+            DoubleToStringConverter::EMIT_TRAILING_ZERO_AFTER_POINT |
+                DoubleToStringConverter::EMIT_TRAILING_DECIMAL_POINT,
+            "Infinity", "NaN", 'E', -3, 7, 3, 1) {}
+};
+
+template <>
+class GdvStringFormatter<arrow::FloatType>
+    : public FloatToStringGdvMixin<arrow::FloatType> {
+ public:
+  using FloatToStringGdvMixin::FloatToStringGdvMixin;
+};
+
+template <>
+class GdvStringFormatter<arrow::DoubleType>
+    : public FloatToStringGdvMixin<arrow::DoubleType> {
+ public:
+  using FloatToStringGdvMixin::FloatToStringGdvMixin;
+};
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 3c0d714f164..d1f97cdb3e8 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -92,6 +92,22 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "castVARCHAR_utf8_int64",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("castVARCHAR", {}, DataTypeVector{int32(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_int32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{int64(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_int64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{float32(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_float32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARCHAR", {}, DataTypeVector{float64(), int64()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_castVARCHAR_float64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
       NativeFunction("castVARCHAR", {}, DataTypeVector{decimal128(), int64()}, utf8(),
                      kResultNullIfNull, "castVARCHAR_decimal128_int64",
                      NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 2d0e1a7ce87..832eebcaa1a 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -20,12 +20,15 @@
 #include <string>
 #include <vector>
 
+#include "arrow/util/formatting.h"
 #include "arrow/util/value_parsing.h"
 #include "gandiva/engine.h"
 #include "gandiva/exported_funcs.h"
+#include "gandiva/formatting_utils.h"
 #include "gandiva/hash_utils.h"
 #include "gandiva/in_holder.h"
 #include "gandiva/like_holder.h"
+#include "gandiva/precompiled/types.h"
 #include "gandiva/random_generator_holder.h"
 #include "gandiva/to_date_holder.h"
 
@@ -303,6 +306,86 @@ CAST_NUMERIC_FROM_STRING(float, arrow::FloatType, FLOAT4)
 CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
 
 #undef CAST_NUMERIC_FROM_STRING
+
+#define GDV_FN_CAST_VARCHAR_INTEGER(IN_TYPE, ARROW_TYPE)                                 \
+  GANDIVA_EXPORT                                                                         \
+  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
+                                                   int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                       \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    if (len == 0) {                                                                      \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                       \
+    char* ret = reinterpret_cast<char*>(                                                 \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
+    if (ret == nullptr) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
+      int64_t size = static_cast<int64_t>(v.size());                                     \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
+      memcpy(ret, v.data(), *out_len);                                                   \
+      return arrow::Status::OK();                                                        \
+    });                                                                                  \
+    if (!status.ok()) {                                                                  \
+      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
+      gdv_fn_context_set_error_msg(context, err.c_str());                                \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    return ret;                                                                          \
+  }
+
+#define GDV_FN_CAST_VARCHAR_REAL(IN_TYPE, ARROW_TYPE)                                    \
+  GANDIVA_EXPORT                                                                         \
+  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
+                                                   int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                       \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    if (len == 0) {                                                                      \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    gandiva::GdvStringFormatter<arrow::ARROW_TYPE> formatter;                            \
+    char* ret = reinterpret_cast<char*>(                                                 \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
+    if (ret == nullptr) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
+      int64_t size = static_cast<int64_t>(v.size());                                     \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
+      memcpy(ret, v.data(), *out_len);                                                   \
+      return arrow::Status::OK();                                                        \
+    });                                                                                  \
+    if (!status.ok()) {                                                                  \
+      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
+      gdv_fn_context_set_error_msg(context, err.c_str());                                \
+      *out_len = 0;                                                                      \
+      return "";                                                                         \
+    }                                                                                    \
+    return ret;                                                                          \
+  }
+
+GDV_FN_CAST_VARCHAR_INTEGER(int32, Int32Type)
+GDV_FN_CAST_VARCHAR_INTEGER(int64, Int64Type)
+GDV_FN_CAST_VARCHAR_REAL(float32, FloatType)
+GDV_FN_CAST_VARCHAR_REAL(float64, DoubleType)
+
+#undef GDV_FN_CAST_VARCHAR_INTEGER
+#undef GDV_FN_CAST_VARCHAR_REAL
 }
 
 namespace gandiva {
@@ -471,6 +554,42 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_utf8", types->double_type(), args,
                                   reinterpret_cast<void*>(gdv_fn_castFLOAT8_utf8));
 
+  // gdv_fn_castVARCHAR_int32_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->i32_type(),       // int32_t value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_int32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_int32_int64));
+
+  // gdv_fn_castVARCHAR_int64_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->i64_type(),       // int64_t value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_int64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_int64_int64));
+
+  // gdv_fn_castVARCHAR_float32_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->float_type(),     // float value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_float32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_float32_int64));
+
+  // gdv_fn_castVARCHAR_float64_int64
+  args = {types->i64_type(),       // int64_t execution_context
+          types->double_type(),    // double value
+          types->i64_type(),       // int64_t len
+          types->i32_ptr_type()};  // int32_t* out_len
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARCHAR_float64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARCHAR_float64_int64));
+
   // gdv_fn_sha1_int8
   args = {
       types->i64_type(),     // context
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 255e9af367b..0a6cd70ca7c 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -95,4 +95,17 @@ float gdv_fn_castFLOAT4_utf8(int64_t context, const char* data, int32_t data_len
 
 GANDIVA_EXPORT
 double gdv_fn_castFLOAT8_utf8(int64_t context, const char* data, int32_t data_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_int32_int64(int64_t context, int32_t value, int64_t len,
+                                           int32_t* out_len);
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_int64_int64(int64_t context, int64_t value, int64_t len,
+                                           int32_t* out_len);
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_float32_int64(int64_t context, float value, int64_t len,
+                                             int32_t* out_len);
+GANDIVA_EXPORT
+const char* gdv_fn_castVARCHAR_float64_int64(int64_t context, double value, int64_t len,
+                                             int32_t* out_len);
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 90ac1dfa540..8f44ce27982 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -160,4 +160,134 @@ TEST(TestGdvFnStubs, TestCastFloat8) {
   ctx.Reset();
 }
 
+TEST(TestGdvFnStubs, TestCastVARCHARFromInt32) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, -46, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-46");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 2147483647, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "2147483647");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, -2147483647 - 1, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-2147483648");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 34567, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "345");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 347, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 347, -1, &out_len);
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVARCHARFromInt64) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str =
+      gdv_fn_castVARCHAR_int64_int64(ctx_ptr, 9223372036854775807LL, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9223372036854775807");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str =
+      gdv_fn_castVARCHAR_int64_int64(ctx_ptr, -9223372036854775807LL - 1, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-9223372036854775808");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_int64_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_int64_int64(ctx_ptr, 12345, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestCastVARCHARFromFloat) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 4.567f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "4.567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, -3.4567f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-3.4567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.00001f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.0E-5");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.00099999f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9.9999E-4");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.0f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 10.00000f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "10.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 1.2345f, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.2");
+  EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestCastVARCHARFromDouble) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  const char* out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 4.567, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "4.567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, -3.4567, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-3.4567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 0.00001, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.0E-5");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float32_int64(ctx_ptr, 0.00099999f, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9.9999E-4");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 0.0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 10.0000000000, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "10.0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARCHAR_float64_int64(ctx_ptr, 1.2345, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1.2");
+  EXPECT_FALSE(ctx.has_error());
+}
+
 }  // namespace gandiva
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 446efd12840..606c1a922e5 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -1193,7 +1193,7 @@ public void testInExpr() throws GandivaException, Exception {
     Field c1 = Field.nullable("c1", int32);
 
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4, 5, 15, 16));
+        TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4, 5, 15, 16));
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
@@ -1208,10 +1208,10 @@ public void testInExpr() throws GandivaException, Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, c1Data, c2Validity));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, c1Data, c2Validity));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1297,7 +1297,7 @@ public void testInExprStrings() throws GandivaException, Exception {
     List<TreeNode> args = Lists.newArrayList(TreeBuilder.makeField(c1), l1, l2);
     TreeNode substr = TreeBuilder.makeFunction("substr", args, new ArrowType.Utf8());
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou"));
+        TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou"));
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
@@ -1305,8 +1305,8 @@ public void testInExprStrings() throws GandivaException, Exception {
     int numRows = 16;
     byte[] validity = new byte[]{(byte) 255, 0};
     String[] c1Values = new String[]{"one", "two", "three", "four", "five", "six", "seven",
-      "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
-      "sixteen"};
+        "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+        "sixteen"};
 
     ArrowBuf c1Validity = buf(validity);
     List<ArrowBuf> dataBufsX = stringBufs(c1Values);
@@ -1314,10 +1314,10 @@ public void testInExprStrings() throws GandivaException, Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1509,9 +1509,9 @@ public void testDateTrunc() throws Exception {
 
     Field resultField = Field.nullable("result", date64);
     List<ExpressionTree> exprs =
-            Lists.newArrayList(
-                    TreeBuilder.makeExpression(dateToYear, resultField),
-                    TreeBuilder.makeExpression(dateToMonth, resultField));
+        Lists.newArrayList(
+            TreeBuilder.makeExpression(dateToYear, resultField),
+            TreeBuilder.makeExpression(dateToMonth, resultField));
 
     Schema schema = new Schema(Lists.newArrayList(dateField));
     Projector eval = Projector.make(schema, exprs);
@@ -1544,10 +1544,10 @@ public void testDateTrunc() throws Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode),
-                    Lists.newArrayList(bufValidity, millisData));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode),
+            Lists.newArrayList(bufValidity, millisData));
 
     List<ValueVector> output = new ArrayList<ValueVector>();
     for (int i = 0; i < exprs.size(); i++) {
@@ -2044,6 +2044,194 @@ public void testEvaluateWithUnsetTargetHostCPU() throws Exception {
     releaseRecordBatch(batch);
     releaseValueVectors(output);
     eval.close();
+  }  
+
+  @Test
+  public void testCastVarcharFromInteger() throws Exception {
+    Field inField = Field.nullable("input", int32);
+    Field lenField = Field.nullable("outLength", int64);
+
+    TreeNode inNode = TreeBuilder.makeField(inField);
+    TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+    TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(inNode, lenNode),
+        new ArrowType.Utf8());
+
+    Field resultField = Field.nullable("result", new ArrowType.Utf8());
+    List<ExpressionTree> exprs =
+        Lists.newArrayList(
+            TreeBuilder.makeExpression(tsToString, resultField));
+
+    Schema schema = new Schema(Lists.newArrayList(inField, lenField));
+    Projector eval = Projector.make(schema, exprs);
+
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    int[] values =
+        new int[] {
+            2345,
+            2345,
+            2345,
+            2345,
+            -2345,
+        };
+    long[] lenValues =
+        new long[] {
+            0L, 4L, 2L, 6L, 5L
+        };
+
+    String[] expValues =
+        new String[] {
+            "",
+            Integer.toString(2345).substring(0, 4),
+            Integer.toString(2345).substring(0, 2),
+            Integer.toString(2345),
+            Integer.toString(-2345)
+        };
+
+    ArrowBuf bufValidity = buf(validity);
+    ArrowBuf bufData = intBuf(values);
+    ArrowBuf lenValidity = buf(validity);
+    ArrowBuf lenData = longBuf(lenValues);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(bufValidity, bufData, lenValidity, lenData));
+
+    List<ValueVector> output = new ArrayList<>();
+    for (int i = 0; i < exprs.size(); i++) {
+      VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+      charVector.allocateNew(numRows * 5, numRows);
+      output.add(charVector);
+    }
+    eval.evaluate(batch, output);
+    eval.close();
+
+    for (ValueVector valueVector : output) {
+      VarCharVector charVector = (VarCharVector) valueVector;
+
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(charVector.isNull(j));
+        assertEquals(expValues[j], new String(charVector.get(j)));
+      }
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
+  @Test
+  public void testCastVarcharFromFloat() throws Exception {
+    Field inField = Field.nullable("input", float64);
+    Field lenField = Field.nullable("outLength", int64);
+
+    TreeNode inNode = TreeBuilder.makeField(inField);
+    TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+    TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(inNode, lenNode),
+        new ArrowType.Utf8());
+
+    Field resultField = Field.nullable("result", new ArrowType.Utf8());
+    List<ExpressionTree> exprs =
+        Lists.newArrayList(
+            TreeBuilder.makeExpression(tsToString, resultField));
+
+    Schema schema = new Schema(Lists.newArrayList(inField, lenField));
+    Projector eval = Projector.make(schema, exprs);
+
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    double[] values =
+        new double[] {
+            0.0,
+            -0.0,
+            1.0,
+            0.001,
+            0.0009,
+            0.00099893,
+            999999.9999,
+            10000000.0,
+            23943410000000.343434,
+            Double.POSITIVE_INFINITY,
+            Double.NEGATIVE_INFINITY,
+            Double.NaN,
+            23.45,
+            23.45,
+            -23.45,
+        };
+    long[] lenValues =
+        new long[] {
+            6L, 6L, 6L, 6L, 10L, 15L, 15L, 15L, 30L,
+            15L, 15L, 15L, 0L, 6L, 6L
+        };
+
+    /* The Java real numbers are represented in two ways and Gandiva must
+     * follow the same rules:
+     * - If the number is greater or equals than 10^7 and less than 10^(-3)
+     *   it will be represented using scientific notation, e.g:
+     *       - 0.000012 -> 1.2E-5
+     *       - 10000002.3 -> 1.00000023E7
+     * - If the numbers are between that interval above, they are showed as is.
+     *
+     * The test checks if the Gandiva function casts the number with the same notation of the
+     * Java.
+     * */
+    String[] expValues =
+        new String[] {
+            Double.toString(0.0), // must be cast to -> "0.0"
+            Double.toString(-0.0), // must be cast to -> "-0.0"
+            Double.toString(1.0), // must be cast to -> "1.0"
+            Double.toString(0.001), // must be cast to -> "0.001"
+            Double.toString(0.0009), // must be cast to -> "9E-4"
+            Double.toString(0.00099893), // must be cast to -> "9E-4"
+            Double.toString(999999.9999), // must be cast to -> "999999.9999"
+            Double.toString(10000000.0), // must be cast to 1E7
+            Double.toString(23943410000000.343434),
+            Double.toString(Double.POSITIVE_INFINITY),
+            Double.toString(Double.NEGATIVE_INFINITY),
+            Double.toString(Double.NaN),
+            "",
+            Double.toString(23.45),
+            Double.toString(-23.45)
+        };
+
+    ArrowBuf bufValidity = buf(validity);
+    ArrowBuf bufData = doubleBuf(values);
+    ArrowBuf lenValidity = buf(validity);
+    ArrowBuf lenData = longBuf(lenValues);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(bufValidity, bufData, lenValidity, lenData));
+
+    List<ValueVector> output = new ArrayList<>();
+    for (int i = 0; i < exprs.size(); i++) {
+      VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+      charVector.allocateNew(numRows * 5, numRows);
+      output.add(charVector);
+    }
+    eval.evaluate(batch, output);
+    eval.close();
+
+    for (ValueVector valueVector : output) {
+      VarCharVector charVector = (VarCharVector) valueVector;
+
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(charVector.isNull(j));
+        assertEquals(expValues[j], new String(charVector.get(j)));
+      }
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
   }
 
 }

From 9178c134d16d83ca7b900a7cabf4142e2f84f0bd Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Thu, 15 Apr 2021 06:39:55 -0400
Subject: [PATCH 047/719] ARROW-12337: [Rust] add DoubleEndedIterator and
 ExactSizeIterator traits

This PR implements the traits `DoubleEndedIterator` and `ExactSizeIterator` for the arrow array iterators. For the trait `ExactSizeIterator` this is an indication of the types system that their size is known, and `DoubleEndedIterator`  make them iterable in reverse order. Both include the improve of the iterators.

Regarding this, I notice that the iterators check bounds twice.

```rust
    fn next(&mut self) -> Option<Self::Item> {
        let i = self.current;
        if i >= self.current_end { // first bounds check
            None
        } else if self.array.is_null(i) {
            self.current += 1;
            Some(None)
        } else {
            self.current += 1;
            Some(Some(self.array.value(i)))  // second bounds check in `array.value`
        }
    }
```

In some implementations `self.array.value` includes a second bounds check. Shall I propose a PR that uses `self.array.value_unchecked`? This is safe as the bounds are already checked.

Closes #9994 from ritchie46/more_iterator_traits

Authored-by: Ritchie Vink <ritchie46@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/array/iterator.rs | 161 +++++++++++++++++++++++++------
 1 file changed, 132 insertions(+), 29 deletions(-)

diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs
index cd891ba7b5d..28dbe3d55c6 100644
--- a/rust/arrow/src/array/iterator.rs
+++ b/rust/arrow/src/array/iterator.rs
@@ -155,8 +155,8 @@ where
     T: StringOffsetSizeTrait,
 {
     array: &'a GenericStringArray<T>,
-    i: usize,
-    len: usize,
+    current: usize,
+    current_end: usize,
 }
 
 impl<'a, T: StringOffsetSizeTrait> GenericStringIter<'a, T> {
@@ -164,8 +164,8 @@ impl<'a, T: StringOffsetSizeTrait> GenericStringIter<'a, T> {
     pub fn new(array: &'a GenericStringArray<T>) -> Self {
         GenericStringIter::<T> {
             array,
-            i: 0,
-            len: array.len(),
+            current: 0,
+            current_end: array.len(),
         }
     }
 }
@@ -174,20 +174,40 @@ impl<'a, T: StringOffsetSizeTrait> std::iter::Iterator for GenericStringIter<'a,
     type Item = Option<&'a str>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
+        let i = self.current;
+        if i >= self.current_end {
             None
         } else if self.array.is_null(i) {
-            self.i += 1;
+            self.current += 1;
             Some(None)
         } else {
-            self.i += 1;
+            self.current += 1;
             Some(Some(self.array.value(i)))
         }
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len - self.i, Some(self.len - self.i))
+        (
+            self.current_end - self.current,
+            Some(self.current_end - self.current),
+        )
+    }
+}
+
+impl<'a, T: StringOffsetSizeTrait> std::iter::DoubleEndedIterator
+    for GenericStringIter<'a, T>
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.current_end == self.current {
+            None
+        } else {
+            self.current_end -= 1;
+            Some(if self.array.is_null(self.current_end) {
+                None
+            } else {
+                Some(self.array.value(self.current_end))
+            })
+        }
     }
 }
 
@@ -204,8 +224,8 @@ where
     T: BinaryOffsetSizeTrait,
 {
     array: &'a GenericBinaryArray<T>,
-    i: usize,
-    len: usize,
+    current: usize,
+    current_end: usize,
 }
 
 impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryIter<'a, T> {
@@ -213,8 +233,8 @@ impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryIter<'a, T> {
     pub fn new(array: &'a GenericBinaryArray<T>) -> Self {
         GenericBinaryIter::<T> {
             array,
-            i: 0,
-            len: array.len(),
+            current: 0,
+            current_end: array.len(),
         }
     }
 }
@@ -223,39 +243,65 @@ impl<'a, T: BinaryOffsetSizeTrait> std::iter::Iterator for GenericBinaryIter<'a,
     type Item = Option<&'a [u8]>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
+        let i = self.current;
+        if i >= self.current_end {
             None
         } else if self.array.is_null(i) {
-            self.i += 1;
+            self.current += 1;
             Some(None)
         } else {
-            self.i += 1;
+            self.current += 1;
             Some(Some(self.array.value(i)))
         }
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len - self.i, Some(self.len - self.i))
+        (
+            self.current_end - self.current,
+            Some(self.current_end - self.current),
+        )
     }
 }
 
+impl<'a, T: BinaryOffsetSizeTrait> std::iter::DoubleEndedIterator
+    for GenericBinaryIter<'a, T>
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.current_end == self.current {
+            None
+        } else {
+            self.current_end -= 1;
+            Some(if self.array.is_null(self.current_end) {
+                None
+            } else {
+                Some(self.array.value(self.current_end))
+            })
+        }
+    }
+}
+
+/// all arrays have known size.
+impl<'a, T: BinaryOffsetSizeTrait> std::iter::ExactSizeIterator
+    for GenericBinaryIter<'a, T>
+{
+}
+
 #[derive(Debug)]
 pub struct GenericListArrayIter<'a, S>
 where
     S: OffsetSizeTrait,
 {
     array: &'a GenericListArray<S>,
-    i: usize,
-    len: usize,
+    current: usize,
+    current_end: usize,
 }
 
 impl<'a, S: OffsetSizeTrait> GenericListArrayIter<'a, S> {
     pub fn new(array: &'a GenericListArray<S>) -> Self {
         GenericListArrayIter::<S> {
             array,
-            i: 0,
-            len: array.len(),
+            current: 0,
+            current_end: array.len(),
         }
     }
 }
@@ -264,26 +310,46 @@ impl<'a, S: OffsetSizeTrait> std::iter::Iterator for GenericListArrayIter<'a, S>
     type Item = Option<ArrayRef>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        let i = self.i;
-        if i >= self.len {
+        let i = self.current;
+        if i >= self.current_end {
             None
         } else if self.array.is_null(i) {
-            self.i += 1;
+            self.current += 1;
             Some(None)
         } else {
-            self.i += 1;
+            self.current += 1;
             Some(Some(self.array.value(i)))
         }
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.len - self.i, Some(self.len - self.i))
+        (
+            self.current_end - self.current,
+            Some(self.current_end - self.current),
+        )
+    }
+}
+
+impl<'a, S: OffsetSizeTrait> std::iter::DoubleEndedIterator
+    for GenericListArrayIter<'a, S>
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.current_end == self.current {
+            None
+        } else {
+            self.current_end -= 1;
+            Some(if self.array.is_null(self.current_end) {
+                None
+            } else {
+                Some(self.array.value(self.current_end))
+            })
+        }
     }
 }
 
 /// all arrays have known size.
-impl<'a, T: BinaryOffsetSizeTrait> std::iter::ExactSizeIterator
-    for GenericBinaryIter<'a, T>
+impl<'a, S: OffsetSizeTrait> std::iter::ExactSizeIterator
+    for GenericListArrayIter<'a, S>
 {
 }
 
@@ -305,6 +371,13 @@ mod tests {
 
         let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
         assert_eq!(result, expected);
+
+        // check if DoubleEndedIterator is implemented
+        let result: Int32Array = array.iter().rev().collect();
+        let rev_array = Int32Array::from(vec![Some(4), None, Some(2), None, Some(0)]);
+        assert_eq!(result, rev_array);
+        // check if ExactSizeIterator is implemented
+        let _ = array.iter().rposition(|opt_b| opt_b == Some(1));
     }
 
     #[test]
@@ -344,6 +417,14 @@ mod tests {
         let expected =
             StringArray::from(vec![Some("ab"), None, Some("aaab"), None, Some("aaaaab")]);
         assert_eq!(result, expected);
+
+        // check if DoubleEndedIterator is implemented
+        let result: StringArray = array.iter().rev().collect();
+        let rev_array =
+            StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
+        assert_eq!(result, rev_array);
+        // check if ExactSizeIterator is implemented
+        let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
     }
 
     #[test]
@@ -360,6 +441,20 @@ mod tests {
         let result: BinaryArray = array.iter().collect();
 
         assert_eq!(result, array);
+
+        // check if DoubleEndedIterator is implemented
+        let result: BinaryArray = array.iter().rev().collect();
+        let rev_array = BinaryArray::from(vec![
+            Some(b"aaaaa" as &[u8]),
+            None,
+            Some(b"aaa"),
+            None,
+            Some(b"a"),
+        ]);
+        assert_eq!(result, rev_array);
+
+        // check if ExactSizeIterator is implemented
+        let _ = array.iter().rposition(|opt_b| opt_b == Some(&[9]));
     }
 
     #[test]
@@ -370,5 +465,13 @@ mod tests {
         let result: BooleanArray = array.iter().collect();
 
         assert_eq!(result, array);
+
+        // check if DoubleEndedIterator is implemented
+        let result: BooleanArray = array.iter().rev().collect();
+        let rev_array = BooleanArray::from(vec![Some(false), None, Some(true)]);
+        assert_eq!(result, rev_array);
+
+        // check if ExactSizeIterator is implemented
+        let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
     }
 }

From 61935aa37a7d24c03672fefd9af82ba13a2e7ae9 Mon Sep 17 00:00:00 2001
From: Neville Dipale <nevilledips@gmail.com>
Date: Thu, 15 Apr 2021 06:41:41 -0400
Subject: [PATCH 048/719] ARROW-12250: [Rust] [Parquet] Fix failing
 arrow_writer test

A copy-paste mistake when creating the FSB test.
The sporadic failure happens if two tests try to write to the same file.

Closes #10043 from nevi-me/ARROW-12250

Authored-by: Neville Dipale <nevilledips@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/parquet/src/arrow/arrow_writer.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index 5716aaeacb7..e8eaf334e2b 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -1237,7 +1237,7 @@ mod tests {
         builder.append_value(b"1112").unwrap();
         let array = Arc::new(builder.finish());
 
-        one_column_roundtrip("timestamp_millisecond_single_column", array, true);
+        one_column_roundtrip("fixed_size_binary_single_column", array, true);
     }
 
     #[test]

From 645c3f948628905957a01e2bbd10292987ab213f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Apr 2021 12:50:50 +0200
Subject: [PATCH 049/719] ARROW-11472: [Python][CI] Remove temporary pin of
 numpy in kartothek integration build

Follow-up on https://github.com/apache/arrow/pull/9396

Closes #10044 from jorisvandenbossche/ARROW-11472-remove-pin

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/docker/conda-python-kartothek.dockerfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ci/docker/conda-python-kartothek.dockerfile b/ci/docker/conda-python-kartothek.dockerfile
index b1c1ed860a9..d523161822c 100644
--- a/ci/docker/conda-python-kartothek.dockerfile
+++ b/ci/docker/conda-python-kartothek.dockerfile
@@ -38,9 +38,7 @@ RUN conda install -c conda-forge -q \
         storefact \
         toolz \
         urlquote \
-        zstandard \
-        # temporary pin for numpy (see https://issues.apache.org/jira/browse/ARROW-11472)
-        numpy=1.19 && \
+        zstandard && \
     conda clean --all
 
 ARG kartothek=latest

From 1251f5362119ef8d353146b98be56cd04fd93729 Mon Sep 17 00:00:00 2001
From: Qingping Hou <dave2008713@gmail.com>
Date: Thu, 15 Apr 2021 06:56:31 -0400
Subject: [PATCH 050/719] ARROW-12317: [Rust] JSON writer support for time,
 duration and date

This PR adds support for Time and Date date types in Rust arrow JSON writer module.

Closes #9993 from houqp/qp_datetime

Authored-by: Qingping Hou <dave2008713@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/array/array_primitive.rs |  29 ++-
 rust/arrow/src/datatypes/types.rs       |   4 +
 rust/arrow/src/json/writer.rs           | 259 +++++++++++++++++++++++-
 rust/arrow/src/temporal_conversions.rs  |  26 ++-
 4 files changed, 305 insertions(+), 13 deletions(-)

diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
index 9fdc0be33d8..2280952f12c 100644
--- a/rust/arrow/src/array/array_primitive.rs
+++ b/rust/arrow/src/array/array_primitive.rs
@@ -22,7 +22,7 @@ use std::fmt;
 use std::iter::{FromIterator, IntoIterator};
 use std::mem;
 
-use chrono::prelude::*;
+use chrono::{prelude::*, Duration};
 
 use super::array::print_long_array;
 use super::raw_pointer::RawPtrBox;
@@ -202,6 +202,24 @@ fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> {
     }
 }
 
+fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
+    match T::DATA_TYPE {
+        DataType::Duration(unit) => match unit {
+            TimeUnit::Second => Some(temporal_conversions::duration_s_to_duration(v)),
+            TimeUnit::Millisecond => {
+                Some(temporal_conversions::duration_ms_to_duration(v))
+            }
+            TimeUnit::Microsecond => {
+                Some(temporal_conversions::duration_us_to_duration(v))
+            }
+            TimeUnit::Nanosecond => {
+                Some(temporal_conversions::duration_ns_to_duration(v))
+            }
+        },
+        _ => None,
+    }
+}
+
 impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
 where
     i64: std::convert::From<T::Native>,
@@ -227,6 +245,13 @@ where
     pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
         as_time::<T>(i64::from(self.value(i)))
     }
+
+    /// Returns a value as a chrono `Duration`
+    ///
+    /// If a data type cannot be converted to `Duration`, a `None` is returned
+    pub fn value_as_duration(&self, i: usize) -> Option<Duration> {
+        as_duration::<T>(i64::from(self.value(i)))
+    }
 }
 
 impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
@@ -385,8 +410,10 @@ def_numeric_from_vec!(DurationSecondType);
 def_numeric_from_vec!(DurationMillisecondType);
 def_numeric_from_vec!(DurationMicrosecondType);
 def_numeric_from_vec!(DurationNanosecondType);
+def_numeric_from_vec!(TimestampSecondType);
 def_numeric_from_vec!(TimestampMillisecondType);
 def_numeric_from_vec!(TimestampMicrosecondType);
+def_numeric_from_vec!(TimestampNanosecondType);
 
 impl<T: ArrowTimestampType> PrimitiveArray<T> {
     /// Construct a timestamp array from a vec of i64 values and an optional timezone
diff --git a/rust/arrow/src/datatypes/types.rs b/rust/arrow/src/datatypes/types.rs
index 77a1783d191..30c9aae8956 100644
--- a/rust/arrow/src/datatypes/types.rs
+++ b/rust/arrow/src/datatypes/types.rs
@@ -152,6 +152,10 @@ impl ArrowTemporalType for Time64MicrosecondType {}
 impl ArrowTemporalType for Time64NanosecondType {}
 // impl ArrowTemporalType for IntervalYearMonthType {}
 // impl ArrowTemporalType for IntervalDayTimeType {}
+impl ArrowTemporalType for DurationSecondType {}
+impl ArrowTemporalType for DurationMillisecondType {}
+impl ArrowTemporalType for DurationMicrosecondType {}
+impl ArrowTemporalType for DurationNanosecondType {}
 
 /// A timestamp type allows us to create array builders that take a timestamp.
 pub trait ArrowTimestampType: ArrowTemporalType {
diff --git a/rust/arrow/src/json/writer.rs b/rust/arrow/src/json/writer.rs
index c872b727d09..27c1ff138aa 100644
--- a/rust/arrow/src/json/writer.rs
+++ b/rust/arrow/src/json/writer.rs
@@ -219,8 +219,8 @@ macro_rules! set_column_by_array_type {
     };
 }
 
-macro_rules! set_timestamp_column_by_array_type {
-    ($array_type:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident) => {
+macro_rules! set_temporal_column_by_array_type {
+    ($array_type:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident, $cast_fn:ident) => {
         let arr = $array.as_any().downcast_ref::<$array_type>().unwrap();
 
         $rows
@@ -229,7 +229,7 @@ macro_rules! set_timestamp_column_by_array_type {
             .take($row_count)
             .for_each(|(i, row)| {
                 if !arr.is_null(i) {
-                    if let Some(v) = arr.value_as_datetime(i) {
+                    if let Some(v) = arr.$cast_fn(i) {
                         row.insert($col_name.to_string(), v.to_string().into());
                     }
                 }
@@ -302,40 +302,144 @@ fn set_column_for_json_rows(
         DataType::Utf8 => {
             set_column_by_array_type!(as_string_array, col_name, rows, array, row_count);
         }
+        DataType::Date32 => {
+            set_temporal_column_by_array_type!(
+                Date32Array,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_date
+            );
+        }
+        DataType::Date64 => {
+            set_temporal_column_by_array_type!(
+                Date64Array,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_date
+            );
+        }
         DataType::Timestamp(TimeUnit::Second, _) => {
-            set_timestamp_column_by_array_type!(
+            set_temporal_column_by_array_type!(
                 TimestampSecondArray,
                 col_name,
                 rows,
                 array,
-                row_count
+                row_count,
+                value_as_datetime
             );
         }
         DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            set_timestamp_column_by_array_type!(
+            set_temporal_column_by_array_type!(
                 TimestampMillisecondArray,
                 col_name,
                 rows,
                 array,
-                row_count
+                row_count,
+                value_as_datetime
             );
         }
         DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            set_timestamp_column_by_array_type!(
+            set_temporal_column_by_array_type!(
                 TimestampMicrosecondArray,
                 col_name,
                 rows,
                 array,
-                row_count
+                row_count,
+                value_as_datetime
             );
         }
         DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            set_timestamp_column_by_array_type!(
+            set_temporal_column_by_array_type!(
                 TimestampNanosecondArray,
                 col_name,
                 rows,
                 array,
-                row_count
+                row_count,
+                value_as_datetime
+            );
+        }
+        DataType::Time32(TimeUnit::Second) => {
+            set_temporal_column_by_array_type!(
+                Time32SecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_time
+            );
+        }
+        DataType::Time32(TimeUnit::Millisecond) => {
+            set_temporal_column_by_array_type!(
+                Time32MillisecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_time
+            );
+        }
+        DataType::Time64(TimeUnit::Microsecond) => {
+            set_temporal_column_by_array_type!(
+                Time64MicrosecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_time
+            );
+        }
+        DataType::Time64(TimeUnit::Nanosecond) => {
+            set_temporal_column_by_array_type!(
+                Time64NanosecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_time
+            );
+        }
+        DataType::Duration(TimeUnit::Second) => {
+            set_temporal_column_by_array_type!(
+                DurationSecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_duration
+            );
+        }
+        DataType::Duration(TimeUnit::Millisecond) => {
+            set_temporal_column_by_array_type!(
+                DurationMillisecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_duration
+            );
+        }
+        DataType::Duration(TimeUnit::Microsecond) => {
+            set_temporal_column_by_array_type!(
+                DurationMicrosecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_duration
+            );
+        }
+        DataType::Duration(TimeUnit::Nanosecond) => {
+            set_temporal_column_by_array_type!(
+                DurationNanosecondArray,
+                col_name,
+                rows,
+                array,
+                row_count,
+                value_as_duration
             );
         }
         DataType::Struct(_) => {
@@ -563,6 +667,7 @@ where
 
 #[cfg(test)]
 mod tests {
+    use std::convert::TryFrom;
     use std::fs::{read_to_string, File};
     use std::sync::Arc;
 
@@ -660,6 +765,138 @@ mod tests {
         );
     }
 
+    #[test]
+    fn write_dates() {
+        let ts_string = "2018-11-13T17:11:10.011375885995";
+        let ts_millis = ts_string
+            .parse::<chrono::NaiveDateTime>()
+            .unwrap()
+            .timestamp_millis();
+
+        let arr_date32 = Date32Array::from(vec![
+            Some(i32::try_from(ts_millis / 1000 / (60 * 60 * 24)).unwrap()),
+            None,
+        ]);
+        let arr_date64 = Date64Array::from(vec![Some(ts_millis), None]);
+        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
+
+        let schema = Schema::new(vec![
+            Field::new("date32", arr_date32.data_type().clone(), false),
+            Field::new("date64", arr_date64.data_type().clone(), false),
+            Field::new("name", arr_names.data_type().clone(), false),
+        ]);
+        let schema = Arc::new(schema);
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(arr_date32),
+                Arc::new(arr_date64),
+                Arc::new(arr_names),
+            ],
+        )
+        .unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[batch]).unwrap();
+        }
+
+        assert_eq!(
+            String::from_utf8(buf).unwrap(),
+            r#"{"date32":"2018-11-13","date64":"2018-11-13","name":"a"}
+{"name":"b"}
+"#
+        );
+    }
+
+    #[test]
+    fn write_times() {
+        let arr_time32sec = Time32SecondArray::from(vec![Some(120), None]);
+        let arr_time32msec = Time32MillisecondArray::from(vec![Some(120), None]);
+        let arr_time64usec = Time64MicrosecondArray::from(vec![Some(120), None]);
+        let arr_time64nsec = Time64NanosecondArray::from(vec![Some(120), None]);
+        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
+
+        let schema = Schema::new(vec![
+            Field::new("time32sec", arr_time32sec.data_type().clone(), false),
+            Field::new("time32msec", arr_time32msec.data_type().clone(), false),
+            Field::new("time64usec", arr_time64usec.data_type().clone(), false),
+            Field::new("time64nsec", arr_time64nsec.data_type().clone(), false),
+            Field::new("name", arr_names.data_type().clone(), false),
+        ]);
+        let schema = Arc::new(schema);
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(arr_time32sec),
+                Arc::new(arr_time32msec),
+                Arc::new(arr_time64usec),
+                Arc::new(arr_time64nsec),
+                Arc::new(arr_names),
+            ],
+        )
+        .unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[batch]).unwrap();
+        }
+
+        assert_eq!(
+            String::from_utf8(buf).unwrap(),
+            r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
+{"name":"b"}
+"#
+        );
+    }
+
+    #[test]
+    fn write_durations() {
+        let arr_durationsec = DurationSecondArray::from(vec![Some(120), None]);
+        let arr_durationmsec = DurationMillisecondArray::from(vec![Some(120), None]);
+        let arr_durationusec = DurationMicrosecondArray::from(vec![Some(120), None]);
+        let arr_durationnsec = DurationNanosecondArray::from(vec![Some(120), None]);
+        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
+
+        let schema = Schema::new(vec![
+            Field::new("duration_sec", arr_durationsec.data_type().clone(), false),
+            Field::new("duration_msec", arr_durationmsec.data_type().clone(), false),
+            Field::new("duration_usec", arr_durationusec.data_type().clone(), false),
+            Field::new("duration_nsec", arr_durationnsec.data_type().clone(), false),
+            Field::new("name", arr_names.data_type().clone(), false),
+        ]);
+        let schema = Arc::new(schema);
+
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(arr_durationsec),
+                Arc::new(arr_durationmsec),
+                Arc::new(arr_durationusec),
+                Arc::new(arr_durationnsec),
+                Arc::new(arr_names),
+            ],
+        )
+        .unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[batch]).unwrap();
+        }
+
+        assert_eq!(
+            String::from_utf8(buf).unwrap(),
+            r#"{"duration_sec":"PT120S","duration_msec":"PT0.120S","duration_usec":"PT0.000120S","duration_nsec":"PT0.000000120S","name":"a"}
+{"name":"b"}
+"#
+        );
+    }
+
     #[test]
     fn write_nested_structs() {
         let schema = Schema::new(vec![
diff --git a/rust/arrow/src/temporal_conversions.rs b/rust/arrow/src/temporal_conversions.rs
index 4033839e7d9..2d6d6776f59 100644
--- a/rust/arrow/src/temporal_conversions.rs
+++ b/rust/arrow/src/temporal_conversions.rs
@@ -17,7 +17,7 @@
 
 //! Conversion methods for dates and times.
 
-use chrono::{NaiveDateTime, NaiveTime};
+use chrono::{Duration, NaiveDateTime, NaiveTime};
 
 /// Number of seconds in a day
 const SECONDS_IN_DAY: i64 = 86_400;
@@ -125,3 +125,27 @@ pub fn timestamp_ns_to_datetime(v: i64) -> NaiveDateTime {
         (v % NANOSECONDS) as u32,
     )
 }
+
+/// converts a `i64` representing a `duration(s)` to [`Duration`]
+#[inline]
+pub fn duration_s_to_duration(v: i64) -> Duration {
+    Duration::seconds(v)
+}
+
+/// converts a `i64` representing a `duration(ms)` to [`Duration`]
+#[inline]
+pub fn duration_ms_to_duration(v: i64) -> Duration {
+    Duration::milliseconds(v)
+}
+
+/// converts a `i64` representing a `duration(us)` to [`Duration`]
+#[inline]
+pub fn duration_us_to_duration(v: i64) -> Duration {
+    Duration::microseconds(v)
+}
+
+/// converts a `i64` representing a `duration(ns)` to [`Duration`]
+#[inline]
+pub fn duration_ns_to_duration(v: i64) -> Duration {
+    Duration::nanoseconds(v)
+}

From 95b8f996fbe1467a76a3731349732ae646724ef3 Mon Sep 17 00:00:00 2001
From: witchard <witchard@hotmail.co.uk>
Date: Thu, 15 Apr 2021 07:02:18 -0400
Subject: [PATCH 051/719] ARROW-12397: [Rust] [DataFusion] Simplify readme
 example

Update readme to remove unnecessary additional lines.

Closes #10038 from witchard/datafusion_readme

Authored-by: witchard <witchard@hotmail.co.uk>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/datafusion/README.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/rust/datafusion/README.md b/rust/datafusion/README.md
index d0bb7d38892..e5849b84ca7 100644
--- a/rust/datafusion/README.md
+++ b/rust/datafusion/README.md
@@ -69,10 +69,7 @@ use arrow::record_batch::RecordBatch;
 
 #[tokio::main]
 async fn main() -> datafusion::error::Result<()> {
-  // create the dataframe
-  let mut ctx = ExecutionContext::new();
-  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-
+  // register the table
   let mut ctx = ExecutionContext::new();
   ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
 

From 26a5a049b46c6949eb99f09a41da56232f0564c8 Mon Sep 17 00:00:00 2001
From: "Heres, Daniel" <danielheres@gmail.com>
Date: Thu, 15 Apr 2021 07:03:49 -0400
Subject: [PATCH 052/719] ARROW-12390: [Rust] Inline from_trusted_len_iter,
 try_from_trusted_len_iter, extend_from_slice

This helps with further optimizing performance on quite some kernels:

```
length                  time:   [721.26 ns 725.59 ns 731.42 ns]
                        change: [-69.510% -69.125% -68.566%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
  7 (7.00%) high mild
  4 (4.00%) high severe
take i32 512            time:   [376.15 ns 378.51 ns 381.09 ns]
                        change: [-18.681% -18.212% -17.542%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
  3 (3.00%) high mild
  2 (2.00%) high severe

take i32 1024           time:   [640.71 ns 641.87 ns 643.19 ns]
                        change: [-28.880% -28.695% -28.516%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
  2 (2.00%) high mild
  1 (1.00%) high severe

take i32 nulls 512      time:   [634.80 ns 635.63 ns 636.56 ns]
                        change: [-24.797% -23.403% -22.272%] (p = 0.00 < 0.05)
                        Performance has improved.

take i32 nulls 1024     time:   [1.0084 us 1.0093 us 1.0103 us]
                        change: [-44.482% -42.312% -40.209%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
  1 (1.00%) low mild
  5 (5.00%) high mild
  2 (2.00%) high severe

take bool 512           time:   [1.4068 us 1.4188 us 1.4315 us]
                        change: [-49.139% -48.684% -48.208%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

take bool 1024          time:   [2.6662 us 2.6807 us 2.6973 us]
                        change: [-55.452% -55.133% -54.803%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild

take bool nulls 512     time:   [1.3331 us 1.3452 us 1.3617 us]
                        change: [-31.198% -30.327% -29.414%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
  1 (1.00%) high mild
  2 (2.00%) high severe

take bool nulls 1024    time:   [2.4789 us 2.4968 us 2.5229 us]
                        change: [-40.358% -39.969% -39.558%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 4 outliers among 100 measurements (4.00%)
  3 (3.00%) high mild
  1 (1.00%) high severe

```

FYI @jorgecarleitao

Closes #10039 from Dandandan/inline_from_trusted_len_iter

Authored-by: Heres, Daniel <danielheres@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/buffer/immutable.rs | 2 ++
 rust/arrow/src/buffer/mutable.rs   | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/rust/arrow/src/buffer/immutable.rs b/rust/arrow/src/buffer/immutable.rs
index c09e4ddc48a..cd6a2a3c130 100644
--- a/rust/arrow/src/buffer/immutable.rs
+++ b/rust/arrow/src/buffer/immutable.rs
@@ -275,6 +275,7 @@ impl Buffer {
     // 1. there is no trait `TrustedLen` in stable rust and therefore
     //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
     // 2. `from_trusted_len_iter` is faster.
+    #[inline]
     pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
         iterator: I,
     ) -> Self {
@@ -287,6 +288,7 @@ impl Buffer {
     /// # Safety
     /// This method assumes that the iterator's size is correct and is undefined behavior
     /// to use it on an iterator that reports an incorrect length.
+    #[inline]
     pub unsafe fn try_from_trusted_len_iter<
         E,
         T: ArrowNativeType,
diff --git a/rust/arrow/src/buffer/mutable.rs b/rust/arrow/src/buffer/mutable.rs
index 3351be7d73d..d7fd5b9d200 100644
--- a/rust/arrow/src/buffer/mutable.rs
+++ b/rust/arrow/src/buffer/mutable.rs
@@ -263,6 +263,7 @@ impl MutableBuffer {
     /// buffer.extend_from_slice(&[2u32, 0]);
     /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
     /// ```
+    #[inline]
     pub fn extend_from_slice<T: ToByteSlice>(&mut self, items: &[T]) {
         let len = items.len();
         let additional = len * std::mem::size_of::<T>();
@@ -391,6 +392,7 @@ impl MutableBuffer {
     // 1. there is no trait `TrustedLen` in stable rust and therefore
     //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
     // 2. `from_trusted_len_iter` is faster.
+    #[inline]
     pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
         iterator: I,
     ) -> Self {
@@ -432,6 +434,7 @@ impl MutableBuffer {
     // 1. there is no trait `TrustedLen` in stable rust and therefore
     //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
     // 2. `from_trusted_len_iter_bool` is faster.
+    #[inline]
     pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
         mut iterator: I,
     ) -> Self {
@@ -476,6 +479,7 @@ impl MutableBuffer {
     /// # Safety
     /// This method assumes that the iterator's size is correct and is undefined behavior
     /// to use it on an iterator that reports an incorrect length.
+    #[inline]
     pub unsafe fn try_from_trusted_len_iter<
         E,
         T: ArrowNativeType,

From c394a20da5a11d220f979293ef4ed6601eea7346 Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Thu, 15 Apr 2021 13:33:25 +0200
Subject: [PATCH 053/719] ARROW-12246: [CI] Sync conda recipes with upstream
 feedstock

(I am first trying with linux)

Closes #9923 from jorisvandenbossche/ARROW-12246-conda-recipes

Lead-authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Uwe L. Korn <uwe.korn@quantco.com>
---
 ...on10.2numpy1.17python3.6.____cpython.yaml} | 35 +++----
 ...on10.2numpy1.17python3.7.____cpython.yaml} | 35 +++----
 ...on10.2numpy1.17python3.8.____cpython.yaml} | 35 +++----
 ...on10.2numpy1.19python3.9.____cpython.yaml} | 33 ++++---
 ...onNonenumpy1.17python3.6.____cpython.yaml} | 33 ++++---
 ...onNonenumpy1.17python3.7.____cpython.yaml} | 33 ++++---
 ...onNonenumpy1.17python3.8.____cpython.yaml} | 33 ++++---
 ...onNonenumpy1.19python3.9.____cpython.yaml} | 31 +++---
 ...sx_64_numpy1.17python3.6.____cpython.yaml} | 28 +++---
 ...sx_64_numpy1.17python3.7.____cpython.yaml} | 28 +++---
 ...sx_64_numpy1.17python3.8.____cpython.yaml} | 29 +++---
 ...sx_64_numpy1.19python3.9.____cpython.yaml} | 26 +++--
 .../osx_arm64_python3.8.____cpython.yaml      | 65 +++++++++++++
 .../osx_arm64_python3.9.____cpython.yaml      | 65 +++++++++++++
 ...onNonenumpy1.17python3.6.____cpython.yaml} | 20 ++--
 ...onNonenumpy1.17python3.7.____cpython.yaml} | 20 ++--
 ...onNonenumpy1.17python3.8.____cpython.yaml} | 20 ++--
 ...onNonenumpy1.19python3.9.____cpython.yaml} | 18 ++--
 .../conda-recipes/.scripts/logging_utils.sh   | 30 ++++++
 .../conda-recipes/arrow-cpp/bld-arrow.bat     | 14 +++
 .../conda-recipes/arrow-cpp/bld-pyarrow.bat   | 14 ++-
 .../conda-recipes/arrow-cpp/build-arrow.sh    | 39 +++++---
 .../conda-recipes/arrow-cpp/build-pyarrow.sh  | 14 +--
 dev/tasks/conda-recipes/arrow-cpp/meta.yaml   | 97 ++++++++++---------
 dev/tasks/conda-recipes/azure.osx.yml         | 11 ++-
 dev/tasks/conda-recipes/build_steps.sh        |  4 +
 dev/tasks/conda-recipes/run_docker_build.sh   |  4 +
 dev/tasks/tasks.yml                           | 57 ++++++++---
 28 files changed, 557 insertions(+), 314 deletions(-)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_version9.2python3.6.____cpython.yaml => linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml} (76%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_version9.2python3.7.____cpython.yaml => linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml} (76%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_version9.2python3.8.____cpython.yaml => linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml} (76%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_version9.2python3.9.____cpython.yaml => linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml} (77%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_versionNonepython3.6.____cpython.yaml => linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml} (77%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_versionNonepython3.7.____cpython.yaml => linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml} (77%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_versionNonepython3.8.____cpython.yaml => linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml} (77%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_cuda_compiler_versionNonepython3.9.____cpython.yaml => linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml} (78%)
 rename dev/tasks/conda-recipes/.ci_support/{osx_python3.6.____cpython.yaml => osx_64_numpy1.17python3.6.____cpython.yaml} (81%)
 rename dev/tasks/conda-recipes/.ci_support/{osx_python3.7.____cpython.yaml => osx_64_numpy1.17python3.7.____cpython.yaml} (81%)
 rename dev/tasks/conda-recipes/.ci_support/{osx_python3.8.____cpython.yaml => osx_64_numpy1.17python3.8.____cpython.yaml} (77%)
 rename dev/tasks/conda-recipes/.ci_support/{osx_python3.9.____cpython.yaml => osx_64_numpy1.19python3.9.____cpython.yaml} (82%)
 create mode 100644 dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
 create mode 100644 dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
 rename dev/tasks/conda-recipes/.ci_support/{win_python3.6.____cpython.yaml => win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml} (82%)
 rename dev/tasks/conda-recipes/.ci_support/{win_python3.7.____cpython.yaml => win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml} (82%)
 rename dev/tasks/conda-recipes/.ci_support/{win_python3.8.____cpython.yaml => win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml} (82%)
 rename dev/tasks/conda-recipes/.ci_support/{win_python3.9.____cpython.yaml => win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml} (83%)
 create mode 100644 dev/tasks/conda-recipes/.scripts/logging_utils.sh
 mode change 100755 => 100644 dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
 mode change 100755 => 100644 dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh

diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
similarity index 76%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.6.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
index 4c5061d6ff2..dd4c04197c9 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -15,30 +15,28 @@ channel_targets:
 cuda_compiler:
 - nvcc
 cuda_compiler_version:
-- '9.2'
+- '10.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-cuda:9.2
+- quay.io/condaforge/linux-anvil-cuda:10.2
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
similarity index 76%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.7.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
index db820f3ff26..f0c43929b56 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -15,30 +15,28 @@ channel_targets:
 cuda_compiler:
 - nvcc
 cuda_compiler_version:
-- '9.2'
+- '10.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-cuda:9.2
+- quay.io/condaforge/linux-anvil-cuda:10.2
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
similarity index 76%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.8.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
index 24810f3c0f1..149e70f438b 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -15,30 +15,28 @@ channel_targets:
 cuda_compiler:
 - nvcc
 cuda_compiler_version:
-- '9.2'
+- '10.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-cuda:9.2
+- quay.io/condaforge/linux-anvil-cuda:10.2
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
similarity index 77%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.9.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
index ebe422be367..fb15d4e7156 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_version9.2python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -15,30 +15,28 @@ channel_targets:
 cuda_compiler:
 - nvcc
 cuda_compiler_version:
-- '9.2'
+- '10.2'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-cuda:9.2
+- quay.io/condaforge/linux-anvil-cuda:10.2
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
 - '1.19'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
similarity index 77%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.6.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
index 358814fb0da..d977f9e5779 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +19,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
similarity index 77%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.7.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index 054a730ee50..6ffa87a5eb9 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +19,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
similarity index 77%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.8.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
index a3bfc58ea8f..7105f634953 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +19,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
similarity index 78%
rename from dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.9.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
index 88541395052..efe0148cc81 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_cuda_compiler_versionNonepython3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -1,13 +1,13 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +19,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
 - '1.19'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,18 +49,21 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cuda_compiler_version
+  - cdt_name
   - docker_image
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
similarity index 81%
rename from dev/tasks/conda-recipes/.ci_support/osx_python3.6.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
index c019508dce4..7b2dbb34d76 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
@@ -1,15 +1,13 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +17,24 @@ cuda_compiler_version:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 macos_machine:
 - x86_64-apple-darwin13.4.0
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,16 +47,18 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - osx-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
similarity index 81%
rename from dev/tasks/conda-recipes/.ci_support/osx_python3.7.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
index 888071766a5..8e3e828ab8a 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
@@ -1,15 +1,13 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +17,24 @@ cuda_compiler_version:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 macos_machine:
 - x86_64-apple-darwin13.4.0
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,16 +47,18 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - osx-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
similarity index 77%
rename from dev/tasks/conda-recipes/.ci_support/osx_python3.8.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
index ad449921253..cdd53c6006e 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
@@ -1,15 +1,13 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.54
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +17,24 @@ cuda_compiler_version:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.30'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 macos_machine:
 - x86_64-apple-darwin13.4.0
 numpy:
-- '1.17.3'
+- '1.17'
 orc:
-- 1.6.4
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,11 +47,18 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2020.08.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - osx-64
+thrift_cpp:
+- 0.14.1
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
similarity index 82%
rename from dev/tasks/conda-recipes/.ci_support/osx_python3.9.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
index c630217d111..37df6a9ec53 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
@@ -1,15 +1,13 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -19,26 +17,24 @@ cuda_compiler_version:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 macos_machine:
 - x86_64-apple-darwin13.4.0
 numpy:
 - '1.19'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -51,16 +47,18 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - osx-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
new file mode 100644
index 00000000000..5894b8ee70b
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '11.0'
+aws_sdk_cpp:
+- 1.8.151
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge/label/rust_dev,conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- 0.4.0
+grpc_cpp:
+- '1.36'
+libprotobuf:
+- '3.15'
+lz4_c:
+- 1.9.3
+macos_machine:
+- arm64-apple-darwin20.0.0
+numpy:
+- '1.19'
+orc:
+- 1.6.7
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.8.* *_cpython
+re2:
+- 2021.04.01
+snappy:
+- '1'
+target_platform:
+- osx-arm64
+thrift_cpp:
+- 0.14.1
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
new file mode 100644
index 00000000000..4e6014c5db8
--- /dev/null
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
@@ -0,0 +1,65 @@
+MACOSX_DEPLOYMENT_TARGET:
+- '11.0'
+aws_sdk_cpp:
+- 1.8.151
+bzip2:
+- '1'
+c_compiler:
+- clang
+c_compiler_version:
+- '11'
+channel_sources:
+- conda-forge/label/rust_dev,conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler_version:
+- None
+cxx_compiler:
+- clangxx
+cxx_compiler_version:
+- '11'
+gflags:
+- '2.2'
+glog:
+- 0.4.0
+grpc_cpp:
+- '1.36'
+libprotobuf:
+- '3.15'
+lz4_c:
+- 1.9.3
+macos_machine:
+- arm64-apple-darwin20.0.0
+numpy:
+- '1.19'
+orc:
+- 1.6.7
+pin_run_as_build:
+  bzip2:
+    max_pin: x
+  lz4-c:
+    max_pin: x.x.x
+  python:
+    min_pin: x.x
+    max_pin: x.x
+  zlib:
+    max_pin: x.x
+python:
+- 3.9.* *_cpython
+re2:
+- 2021.04.01
+snappy:
+- '1'
+target_platform:
+- osx-arm64
+thrift_cpp:
+- 0.14.1
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - python
+  - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.4'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
similarity index 82%
rename from dev/tasks/conda-recipes/.ci_support/win_python3.6.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
index 40199c679de..8fbbb64af9c 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -1,7 +1,5 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
@@ -10,6 +8,8 @@ channel_sources:
 - conda-forge,defaults
 channel_targets:
 - conda-forge main
+cuda_compiler:
+- nvcc
 cuda_compiler_version:
 - None
 cxx_compiler:
@@ -19,16 +19,14 @@ gflags:
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -41,11 +39,13 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - win-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
similarity index 82%
rename from dev/tasks/conda-recipes/.ci_support/win_python3.7.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index 88d17108a98..4b702a38980 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -1,7 +1,5 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
@@ -10,6 +8,8 @@ channel_sources:
 - conda-forge,defaults
 channel_targets:
 - conda-forge main
+cuda_compiler:
+- nvcc
 cuda_compiler_version:
 - None
 cxx_compiler:
@@ -19,16 +19,14 @@ gflags:
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -41,11 +39,13 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - win-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
similarity index 82%
rename from dev/tasks/conda-recipes/.ci_support/win_python3.8.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
index e18785de135..6ae6c2fde4f 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -1,7 +1,5 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
@@ -10,6 +8,8 @@ channel_sources:
 - conda-forge,defaults
 channel_targets:
 - conda-forge main
+cuda_compiler:
+- nvcc
 cuda_compiler_version:
 - None
 cxx_compiler:
@@ -19,16 +19,14 @@ gflags:
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -41,11 +39,13 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - win-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.ci_support/win_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
similarity index 83%
rename from dev/tasks/conda-recipes/.ci_support/win_python3.9.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
index 6177f96ce01..73a8b5099bb 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -1,7 +1,5 @@
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.151
 bzip2:
 - '1'
 c_compiler:
@@ -10,6 +8,8 @@ channel_sources:
 - conda-forge,defaults
 channel_targets:
 - conda-forge main
+cuda_compiler:
+- nvcc
 cuda_compiler_version:
 - None
 cxx_compiler:
@@ -19,16 +19,14 @@ gflags:
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.36'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
 - '1.19'
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -41,11 +39,13 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - win-64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.scripts/logging_utils.sh b/dev/tasks/conda-recipes/.scripts/logging_utils.sh
new file mode 100644
index 00000000000..a53ef3f2c7a
--- /dev/null
+++ b/dev/tasks/conda-recipes/.scripts/logging_utils.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Provide a unified interface for the different logging
+# utilities CI providers offer. If unavailable, provide
+# a compatible fallback (e.g. bare `echo xxxxxx`).
+
+function startgroup {
+    # Start a foldable group of log lines
+    # Pass a single argument, quoted
+    case ${CI:-} in
+        azure )
+            echo "##[group]$1";;
+        travis )
+            echo "$1"
+            echo -en 'travis_fold:start:'"${1// /}"'\\r';;
+        * )
+            echo "$1";;
+    esac
+}
+
+function endgroup {
+    # End a foldable group of log lines
+    # Pass a single argument, quoted
+    case ${CI:-} in
+        azure )
+            echo "##[endgroup]";;
+        travis )
+            echo -en 'travis_fold:end:'"${1// /}"'\\r';;
+    esac
+}
diff --git a/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat b/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
index cc2ed71fe3e..2cc6ed1ba3e 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
+++ b/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
@@ -1,6 +1,19 @@
+@echo on
+
 mkdir "%SRC_DIR%"\cpp\build
 pushd "%SRC_DIR%"\cpp\build
 
+:: Enable CUDA support
+if "%cuda_compiler_version%"=="None" (
+    set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=OFF"
+) else (
+    REM this should move to nvcc-feedstock
+    set "CUDA_PATH=%CUDA_PATH:\=/%"
+    set "CUDA_HOME=%CUDA_HOME:\=/%"
+
+    set "EXTRA_CMAKE_ARGS=-DARROW_CUDA=ON"
+)
+
 cmake -G "Ninja" ^
       -DBUILD_SHARED_LIBS=ON ^
       -DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^
@@ -31,6 +44,7 @@ cmake -G "Ninja" ^
       -DARROW_S3:BOOL=ON ^
       -DBoost_NO_BOOST_CMAKE=ON ^
       -DCMAKE_UNITY_BUILD=ON ^
+      %EXTRA_CMAKE_ARGS% ^
       ..
 if errorlevel 1 exit 1
 
diff --git a/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat b/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
index 8f3357748df..89cec3710c3 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
+++ b/dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
@@ -2,18 +2,17 @@
 pushd "%SRC_DIR%"\python
 
 @rem the symlinks for cmake modules don't work here
+@rem NOTE: In contrast to conda-forge, they work here as we clone from git.
 @rem del cmake_modules\BuildUtils.cmake
 @rem del cmake_modules\SetupCxxFlags.cmake
+@rem del cmake_modules\CompilerInfo.cmake
 @rem del cmake_modules\FindNumPy.cmake
 @rem del cmake_modules\FindPythonLibsNew.cmake
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\BuildUtils.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\SetupCxxFlags.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
+@rem copy /Y "%SRC_DIR%\cpp\cmake_modules\CompilerInfo.cmake" cmake_modules\
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindNumPy.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
 @rem copy /Y "%SRC_DIR%\cpp\cmake_modules\FindPythonLibsNew.cmake" cmake_modules\
-@rem if errorlevel 1 exit 1
 
 SET ARROW_HOME=%LIBRARY_PREFIX%
 SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION%
@@ -26,6 +25,13 @@ SET PYARROW_WITH_GANDIVA=1
 SET PYARROW_WITH_PARQUET=1
 SET PYARROW_CMAKE_GENERATOR=Ninja
 
+:: Enable CUDA support
+if "%cuda_compiler_version%"=="None" (
+    set "PYARROW_WITH_CUDA=0"
+) else (
+    set "PYARROW_WITH_CUDA=1"
+)
+
 %PYTHON%   setup.py ^
            build_ext ^
            install --single-version-externally-managed ^
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh b/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
old mode 100755
new mode 100644
index e15fc92808c..f9c1d975ec3
--- a/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
@@ -34,17 +34,26 @@ else
     EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=OFF"
 fi
 
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+    # We need llvm 11+ support in Arrow for this
+    EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=OFF"
+    sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt
+    sed -ie 's;"--with-jemalloc-prefix\=je_arrow_";"--with-jemalloc-prefix\=je_arrow_" "--with-lg-page\=14";g' ../cmake_modules/ThirdpartyToolchain.cmake
+else
+    EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON"
+fi
+
 cmake \
-    -DBUILD_SHARED_LIBS=ON \
     -DARROW_BOOST_USE_SHARED=ON \
     -DARROW_BUILD_BENCHMARKS=OFF \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
     -DARROW_BUILD_UTILITIES=OFF \
+    -DBUILD_SHARED_LIBS=ON \
     -DARROW_DATASET=ON \
     -DARROW_DEPENDENCY_SOURCE=SYSTEM \
     -DARROW_FLIGHT=ON \
-    -DARROW_GANDIVA=ON \
+    -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=ON \
     -DARROW_HDFS=ON \
     -DARROW_JEMALLOC=ON \
     -DARROW_MIMALLOC=ON \
@@ -55,34 +64,32 @@ cmake \
     -DARROW_PYTHON=ON \
     -DARROW_S3=ON \
     -DARROW_SIMD_LEVEL=NONE \
+    -DARROW_USE_LD_GOLD=ON \
     -DARROW_WITH_BROTLI=ON \
     -DARROW_WITH_BZ2=ON \
     -DARROW_WITH_LZ4=ON \
     -DARROW_WITH_SNAPPY=ON \
     -DARROW_WITH_ZLIB=ON \
     -DARROW_WITH_ZSTD=ON \
-    -DARROW_USE_LD_GOLD=ON \
-    -DCMAKE_AR=${AR} \
     -DCMAKE_BUILD_TYPE=release \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DCMAKE_INSTALL_PREFIX=$PREFIX \
-    -DCMAKE_RANLIB=${RANLIB} \
     -DLLVM_TOOLS_BINARY_DIR=$PREFIX/bin \
-    -DCMAKE_UNITY_BUILD=ON \
+    -DPython3_EXECUTABLE=${PYTHON} \
+    -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc \
     -GNinja \
     ${EXTRA_CMAKE_ARGS} \
     ..
 
-# Decrease parallelism a bit as we will otherwise get out-of-memory problems
-# This is only necessary on Travis
-if [ "${TRAVIS}" = "true" ]; then
-# if [ "$(uname -m)" = "ppc64le" ]; then
-    echo "Using $(grep -c ^processor /proc/cpuinfo) CPUs"
-    CPU_COUNT=$(grep -c ^processor /proc/cpuinfo)
-    CPU_COUNT=$((CPU_COUNT / 4))
-    ninja install -j${CPU_COUNT}
-else
-    ninja install
+# Commented out until jemalloc and mimalloc are fixed upstream
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+     ninja jemalloc_ep-prefix/src/jemalloc_ep-stamp/jemalloc_ep-patch mimalloc_ep-prefix/src/mimalloc_ep-stamp/mimalloc_ep-patch
+     cp $BUILD_PREFIX/share/gnuconfig/config.* jemalloc_ep-prefix/src/jemalloc_ep/build-aux/
+     sed -ie 's/list(APPEND mi_cflags -march=native)//g' mimalloc_ep-prefix/src/mimalloc_ep/CMakeLists.txt
+     # Use the correct register for thread-local storage
+     sed -ie 's/tpidr_el0/tpidrro_el0/g' mimalloc_ep-prefix/src/mimalloc_ep/include/mimalloc-internal.h
 fi
 
+ninja install
+
 popd
diff --git a/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh b/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
old mode 100755
new mode 100644
index d0fb55de580..a394e999f7b
--- a/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
+++ b/dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
@@ -9,10 +9,14 @@ export PARQUET_HOME=$PREFIX
 export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION
 export PYARROW_BUILD_TYPE=release
 export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
-export PYARROW_BUNDLE_PLASMA_EXECUTABLE=0
 export PYARROW_WITH_DATASET=1
 export PYARROW_WITH_FLIGHT=1
-export PYARROW_WITH_GANDIVA=1
+if [[ "${target_platform}" == "osx-arm64" ]]; then
+    # We need llvm 11+ support in Arrow for this
+    export PYARROW_WITH_GANDIVA=0
+else
+    export PYARROW_WITH_GANDIVA=1
+fi
 export PYARROW_WITH_HDFS=1
 export PYARROW_WITH_ORC=1
 export PYARROW_WITH_PARQUET=1
@@ -22,16 +26,14 @@ export PYARROW_CMAKE_GENERATOR=Ninja
 BUILD_EXT_FLAGS=""
 
 # Enable CUDA support
-if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]
-then
+if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]; then
     export PYARROW_WITH_CUDA=1
 else
     export PYARROW_WITH_CUDA=0
 fi
 
 # Resolve: Make Error at cmake_modules/SetupCxxFlags.cmake:338 (message): Unsupported arch flag: -march=.
-if [[ "$(uname -m)" = "aarch64" ]]
-then
+if [[ "${target_platform}" == "linux-aarch64" ]]; then
     export PYARROW_CMAKE_OPTIONS="-DARROW_ARMV8_ARCH=armv8-a"
 fi
 
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index cdbfc5d5b4d..48a8629866d 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -1,9 +1,9 @@
+# NOTE: In constrast to the conda-forge recipe, ARROW_VERSION is a templated variable here.
 {% set version = ARROW_VERSION %}
-{% set number = "0" %}
-{% set cuda_enabled = cuda_compiler_version is not undefined and cuda_compiler_version == '9.2' %}
-{% set build_ext_version = "1.0.1" %}
+{% set cuda_enabled = cuda_compiler_version != "None" %}
+{% set build_ext_version = ARROW_VERSION %}
 {% set build_ext = "cuda" if cuda_enabled else "cpu" %}
-{% set proc_build_number = "1" %}
+{% set proc_build_number = "0" %}
 
 package:
   name: arrow-cpp-ext
@@ -14,7 +14,10 @@ source:
 
 build:
   number: 0
-  skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
+  # for cuda on win/linux, building with 9.2 is enough to be compatible with all later versions,
+  # since arrow is only using libcuda, and not libcudart.
+  skip: true  # [(win or linux) and cuda_compiler_version not in ("None", "10.2")]
+  skip: true  # [osx and cuda_compiler_version != "None"]
   run_exports:
     - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
 
@@ -40,27 +43,31 @@ outputs:
     version: {{ version }}
     build:
       string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
-      skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
       run_exports:
         - {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
       ignore_run_exports:
         - cudatoolkit
-        # Gandiva only needs headers
-        - boost-cpp
       track_features:
         {{ "- arrow-cuda" if cuda_enabled else "" }}
     requirements:
       build:
-        - cmake 3.16.*
+        - python                                 # [build_platform != target_platform]
+        - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+        - cython                                 # [build_platform != target_platform]
+        - numpy                                  # [build_platform != target_platform]
+        - gnuconfig                              # [osx and arm64]
+        - libprotobuf
+        - grpc-cpp
+        - cmake
         - autoconf  # [unix]
         - ninja
         - make  # [unix]
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
-        - {{ compiler("cuda") }}  # [cuda_compiler_version not in (undefined, "None")]
+        - {{ compiler("cuda") }}  # [cuda_compiler_version != "None"]
       host:
         - aws-sdk-cpp
-        - boost-cpp
+        - boost-cpp >=1.70
         - brotli
         - bzip2
         - c-ares
@@ -68,8 +75,8 @@ outputs:
         - glog
         - grpc-cpp
         - libprotobuf
-        - clangdev 11
-        - llvmdev 11
+        - clangdev 10  # [not (osx and arm64)]
+        - llvmdev 10   # [not (osx and arm64)]
         - libutf8proc
         - lz4-c
         - numpy
@@ -78,25 +85,15 @@ outputs:
         - rapidjson
         - re2
         - snappy
-        - thrift-cpp >=0.11
+        - thrift-cpp
         - zlib
         - zstd
       run:
         - {{ pin_compatible('numpy', lower_bound='1.16') }}
-        - aws-sdk-cpp
-        - brotli
-        - c-ares
-        - gflags
-        - glog
-        - grpc-cpp
-        - lz4-c
         - python
-        - re2
-        - zlib
-        - zstd
       run_constrained:
         - arrow-cpp-proc * {{ build_ext }}
-        - cudatoolkit >=9.2  # [cuda_compiler_version not in (undefined, "None")]
+        - cudatoolkit >=9.2  # [cuda_compiler_version != "None"]
 
     about:
       home: http://github.com/apache/arrow
@@ -111,7 +108,7 @@ outputs:
         - test -f $PREFIX/include/arrow/api.h              # [unix]
         - test -f $PREFIX/include/arrow/flight/types.h     # [unix]
         - test -f $PREFIX/include/plasma/client.h          # [unix]
-        - test -f $PREFIX/include/gandiva/engine.h         # [unix and not (aarch64 or ppc64le)]
+        - test -f $PREFIX/include/gandiva/engine.h         # [unix and not (osx and arm64)]
         - test -f $PREFIX/include/parquet/api/reader.h     # [unix]
         - if not exist %LIBRARY_INC%\\arrow\\api.h exit 1            # [win]
         - if not exist %LIBRARY_INC%\\gandiva\\engine.h exit 1       # [win]
@@ -123,15 +120,16 @@ outputs:
         - test -f $PREFIX/lib/libarrow_flight.so     # [linux]
         - test -f $PREFIX/lib/libarrow_python.so     # [linux]
         - test -f $PREFIX/lib/libparquet.so          # [linux]
-        - test -f $PREFIX/lib/libgandiva.so          # [linux and not (aarch64 or ppc64le)]
+        - test -f $PREFIX/lib/libgandiva.so          # [linux]
         - test -f $PREFIX/lib/libplasma.so           # [linux]
-        {{ "- test %s -f $PREFIX/lib/libarrow_cuda.so" % (['!', ''][cuda_enabled]) }}                         # [linux]
-        {{ "- test %s -f $PREFIX/lib/libarrow_cuda.dylib" % (['!', ''][cuda_enabled]) }}                      # [osx]
-        {{ "- if %s exist %%PREFIX%%\\Library\\bin\\arrow_cuda.dll exit 1" % (['', 'not'][cuda_enabled]) }}   # [win]
+        - test -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT}               # [(cuda_compiler_version != "None") and unix]
+        - test ! -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT}             # [(cuda_compiler_version == "None") and unix]
+        - if not exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1  # [(cuda_compiler_version != "None") and win]
+        - if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1      # [(cuda_compiler_version == "None") and win]
         - test -f $PREFIX/lib/libarrow.dylib          # [osx]
         - test -f $PREFIX/lib/libarrow_dataset.dylib  # [osx]
         - test -f $PREFIX/lib/libarrow_python.dylib   # [osx]
-        - test -f $PREFIX/lib/libgandiva.dylib        # [osx]
+        - test -f $PREFIX/lib/libgandiva.dylib        # [osx and not arm64]
         - test -f $PREFIX/lib/libparquet.dylib        # [osx]
         - test -f $PREFIX/lib/libplasma.dylib         # [osx]
         - if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1          # [win]
@@ -162,20 +160,23 @@ outputs:
     version: {{ version }}
     build:
       string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
-      skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
       ignore_run_exports:
         - cudatoolkit
       track_features:
         {{ "- arrow-cuda" if cuda_enabled else "" }}
     requirements:
       build:
-        - cmake 3.16.*
+        - python                                 # [build_platform != target_platform]
+        - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+        - cython                                 # [build_platform != target_platform]
+        - numpy                                  # [build_platform != target_platform]
+        - cmake
         - ninja
         - make  # [unix]
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
         # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda
-        - {{ compiler("cuda") }}  # [cuda_compiler_version not in (undefined, "None")]
+        - {{ compiler("cuda") }}  # [cuda_compiler_version != "None"]
       host:
         - {{ pin_subpackage('arrow-cpp', exact=True) }}
         - cython
@@ -192,7 +193,7 @@ outputs:
         - python
       run_constrained:
         - arrow-cpp-proc * {{ build_ext }}
-        - cudatoolkit >=9.2  # [cuda_compiler_version not in (undefined, "None")]
+        - cudatoolkit >=9.2  # [cuda_compiler_version != "None"]
 
     about:
       home: http://github.com/apache/arrow
@@ -206,7 +207,7 @@ outputs:
         - pyarrow
         - pyarrow.dataset
         - pyarrow.flight
-        - pyarrow.gandiva
+        - pyarrow.gandiva  # [not (osx and arm64)]
         - pyarrow.orc      # [unix]
         - pyarrow.parquet
         - pyarrow.plasma   # [unix]
@@ -215,31 +216,39 @@ outputs:
         - pyarrow._hdfs
         # We can only test importing cuda package but cannot run when a
         # CUDA device is not available, for instance, when building from CI.
-        - pyarrow.cuda     # [cuda_compiler_version not in (undefined, "None")]
+        # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see
+        # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows
+        # However, we check below for (at least) the presence of a correctly-compiled module
+        - pyarrow.cuda     # [cuda_compiler_version != "None" and not win]
       commands:
-        - test ! -f ${SP_DIR}/pyarrow/plasma-store-server       # [unix]
-        - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py       # [unix]
-        - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1  # [win]
+        - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py                         # [unix]
+        - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1                    # [win]
+        # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y"
+        - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1  # [win and cuda_compiler_version != "None"]
+
   - name: pyarrow-tests
     script: build-pyarrow.sh  # [not win]
     script: bld-pyarrow.bat   # [win]
     version: {{ version }}
     build:
       string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }}
-      skip: true  # [cuda_compiler_version not in (undefined, "None", "9.2")]
       ignore_run_exports:
         - cudatoolkit
       track_features:
         {{ "- arrow-cuda" if cuda_enabled else "" }}
     requirements:
       build:
-        - cmake 3.16.*
+        - python                                 # [build_platform != target_platform]
+        - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+        - cython                                 # [build_platform != target_platform]
+        - numpy                                  # [build_platform != target_platform]
+        - cmake
         - ninja
         - make  # [unix]
         - {{ compiler('c') }}
         - {{ compiler('cxx') }}
         # pyarrow does not require nvcc but it needs to link against libraries in arrow-cpp=*=*cuda
-        - {{ compiler("cuda") }}  # [cuda_compiler_version not in (undefined, "None")]
+        - {{ compiler("cuda") }}  # [cuda_compiler_version != "None"]
       host:
         - {{ pin_subpackage('arrow-cpp', exact=True) }}
         - {{ pin_subpackage('pyarrow', exact=True) }}
@@ -254,7 +263,7 @@ outputs:
         - python
       run_constrained:
         - arrow-cpp-proc * {{ build_ext }}
-        - cudatoolkit >=9.2  # [cuda_compiler_version not in (undefined, "None")]
+        - cudatoolkit >=9.2  # [cuda_compiler_version != "None"]
 
     about:
       home: http://github.com/apache/arrow
diff --git a/dev/tasks/conda-recipes/azure.osx.yml b/dev/tasks/conda-recipes/azure.osx.yml
index 58afa8045cb..dbb1a68aca6 100755
--- a/dev/tasks/conda-recipes/azure.osx.yml
+++ b/dev/tasks/conda-recipes/azure.osx.yml
@@ -58,9 +58,14 @@ jobs:
 
   - script: |
       source activate base
-      conda build arrow-cpp parquet-cpp \
+      set +x
+      if [[ "${CONFIG}" == osx_arm* ]]; then
+        EXTRA_CB_OPTIONS="${EXTRA_CB_OPTIONS:-} --no-test"
+      fi
+      conda build arrow-cpp \
         -m ./.ci_support/${CONFIG}.yaml \
         --clobber-file ./.ci_support/clobber_${CONFIG}.yaml \
+        ${EXTRA_CB_OPTIONS:-} \
         --output-folder ./build_artifacts
 
       if [ ! -z "${R_CONFIG}" ]; then
@@ -71,5 +76,5 @@ jobs:
     workingDirectory: arrow/dev/tasks/conda-recipes
     displayName: Build recipes
 
-  {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2") }}
-  {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-64/*.tar.bz2") }}
+  {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
+  {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
diff --git a/dev/tasks/conda-recipes/build_steps.sh b/dev/tasks/conda-recipes/build_steps.sh
index 8f1743f5946..25864c08a70 100755
--- a/dev/tasks/conda-recipes/build_steps.sh
+++ b/dev/tasks/conda-recipes/build_steps.sh
@@ -1,5 +1,9 @@
 #!/usr/bin/env bash
 
+# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI
+# 	setup. The next time this is updated to the current version on conda-forge,
+#       you will also make this additions afterwards.
+
 # PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
 # will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
 # changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
diff --git a/dev/tasks/conda-recipes/run_docker_build.sh b/dev/tasks/conda-recipes/run_docker_build.sh
index 8a900379487..7645c43e2fa 100755
--- a/dev/tasks/conda-recipes/run_docker_build.sh
+++ b/dev/tasks/conda-recipes/run_docker_build.sh
@@ -1,5 +1,9 @@
 #!/usr/bin/env bash
 
+# NOTE: This script has been slightly adopted to suite the Apache Arrow / crossbow CI
+# 	setup. The next time this is updated to the current version on conda-forge,
+#       you will also make this additions afterwards.
+
 # PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
 # will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
 # changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 8014c4ba353..983475226a2 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -166,7 +166,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.6.____cpython
+      config: linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
       r_config: linux_64_r_base3.6
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -176,7 +176,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.7.____cpython
+      config: linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
       r_config: linux_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -186,7 +186,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.8.____cpython
+      config: linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -195,7 +195,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_versionNonepython3.9.____cpython
+      config: linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -204,7 +204,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_version9.2python3.6.____cpython
+      config: linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cuda.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cuda.tar.bz2
@@ -213,7 +213,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_version9.2python3.7.____cpython
+      config: linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cuda.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cuda.tar.bz2
@@ -222,7 +222,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_version9.2python3.8.____cpython
+      config: linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cuda.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cuda.tar.bz2
@@ -231,7 +231,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_cuda_compiler_version9.2python3.9.____cpython
+      config: linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
       - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
@@ -278,7 +278,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.6.____cpython
+      config: osx_64_numpy1.17python3.6.____cpython
       r_config: osx_64_r_base3.6
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -288,7 +288,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.7.____cpython
+      config: osx_64_numpy1.17python3.7.____cpython
       r_config: osx_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -298,7 +298,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.8.____cpython
+      config: osx_64_numpy1.17python3.8.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -307,7 +307,25 @@ tasks:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
-      config: osx_python3.9.____cpython
+      config: osx_64_numpy1.19python3.9.____cpython
+    artifacts:
+      - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+
+  conda-osx-arm64-clang-py38:
+    ci: azure
+    template: conda-recipes/azure.osx.yml
+    params:
+      config: osx_arm64_python3.8.____cpython
+    artifacts:
+      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+  conda-osx-arm64-clang-py39:
+    ci: azure
+    template: conda-recipes/azure.osx.yml
+    params:
+      config: osx_arm64_python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -318,7 +336,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
-      config: win_python3.6.____cpython
+      config: win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
       r_config: win_64_r_base3.6
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -328,7 +346,7 @@ tasks:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
-      config: win_python3.7.____cpython
+      config: win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
       r_config: win_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -338,7 +356,16 @@ tasks:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
-      config: win_python3.8.____cpython
+      config: win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython
+    artifacts:
+      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
+
+  conda-win-vs2017-py39:
+    ci: azure
+    template: conda-recipes/azure.win.yml
+    params:
+      config: win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2

From d4ca31f4834ad9d562c6dd145147601ddfa3b8bc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Apr 2021 14:40:58 +0200
Subject: [PATCH 054/719] ARROW-12396: [Python][Docs] Clarify
 serialization/filesystem docstrings about deprecated status

Currently the docstring itself says nothing about it, there is only the warning when calling the function.

Closes #10045 from jorisvandenbossche/ARROW-12396-serialization-docs

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/hdfs.py           | 16 +++++++++--
 python/pyarrow/serialization.pxi | 49 +++++++++++++++++++++++++++-----
 2 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index 7ad379bd660..c4daac9fd1a 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -28,9 +28,13 @@
 
 class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
     """
-    FileSystem interface for HDFS cluster.
+    DEPRECATED: FileSystem interface for HDFS cluster.
 
     See pyarrow.hdfs.connect for full connection details
+
+    .. deprecated:: 2.0
+        ``pyarrow.hdfs.HadoopFileSystem`` is deprecated,
+        please use ``pyarrow.fs.HadoopFileSystem`` instead.
     """
 
     def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
@@ -184,13 +188,19 @@ def _libhdfs_walk_files_dirs(top_path, contents):
 def connect(host="default", port=0, user=None, kerb_ticket=None,
             extra_conf=None):
     """
-    Connect to an HDFS cluster. All parameters are optional and should
-    only be set if the defaults need to be overridden.
+    DEPRECATED: Connect to an HDFS cluster.
+
+    All parameters are optional and should only be set if the defaults need
+    to be overridden.
 
     Authentication should be automatic if the HDFS cluster uses Kerberos.
     However, if a username is specified, then the ticket cache will likely
     be required.
 
+    .. deprecated:: 2.0
+        ``pyarrow.hdfs.connect`` is deprecated,
+        please use ``pyarrow.fs.HadoopFileSystem`` instead.
+
     Parameters
     ----------
     host : NameNode. Set to "default" for fs.defaultFS from core-site.xml.
diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index c6e40c145b8..9177b2aa27b 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -355,10 +355,14 @@ cdef class SerializedPyObject(_Weakrefable):
 
 def serialize(object value, SerializationContext context=None):
     """
-    EXPERIMENTAL: Serialize a general Python sequence for transient storage
+    DEPRECATED: Serialize a general Python sequence for transient storage
     and transport.
 
-    This may have better performance and memory efficiency than Python pickle.
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Notes
     -----
@@ -398,7 +402,13 @@ def _serialize(object value, SerializationContext context=None):
 
 def serialize_to(object value, sink, SerializationContext context=None):
     """
-    EXPERIMENTAL: Serialize a Python sequence to a file.
+    DEPRECATED: Serialize a Python sequence to a file.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Parameters
     ----------
@@ -417,7 +427,13 @@ def serialize_to(object value, sink, SerializationContext context=None):
 
 def read_serialized(source, base=None):
     """
-    EXPERIMENTAL: Read serialized Python sequence from file-like object.
+    DEPRECATED: Read serialized Python sequence from file-like object.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Parameters
     ----------
@@ -449,7 +465,13 @@ def _read_serialized(source, base=None):
 
 def deserialize_from(source, object base, SerializationContext context=None):
     """
-    EXPERIMENTAL: Deserialize a Python sequence from a file.
+    DEPRECATED: Deserialize a Python sequence from a file.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     This only can interact with data produced by pyarrow.serialize or
     pyarrow.serialize_to.
@@ -476,7 +498,14 @@ def deserialize_from(source, object base, SerializationContext context=None):
 
 def deserialize_components(components, SerializationContext context=None):
     """
-    Reconstruct Python object from output of SerializedPyObject.to_components.
+    DEPRECATED: Reconstruct Python object from output of
+    SerializedPyObject.to_components.
+
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
 
     Parameters
     ----------
@@ -495,9 +524,15 @@ def deserialize_components(components, SerializationContext context=None):
 
 def deserialize(obj, SerializationContext context=None):
     """
-    EXPERIMENTAL: Deserialize Python object from Buffer or other Python
+    DEPRECATED: Deserialize Python object from Buffer or other Python
     object supporting the buffer protocol.
 
+    .. deprecated:: 2.0
+        The custom serialization functionality is deprecated in pyarrow 2.0,
+        and will be removed in a future version. Use the standard library
+        ``pickle`` or the IPC functionality of pyarrow (see :ref:`ipc` for
+        more).
+
     This only can interact with data produced by pyarrow.serialize or
     pyarrow.serialize_to.
 

From 926452bcbebe9e952420688ad9a046bc16aa2ad8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Apr 2021 16:19:16 +0200
Subject: [PATCH 055/719] ARROW-12188: [Docs] Switch to pydata-sphinx-theme for
 the main sphinx docs

Closes #9876 from jorisvandenbossche/ARROW-12188-docs-theme

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/conda_env_sphinx.yml                  |   2 +-
 ci/docker/linux-apt-docs.dockerfile      |   2 +-
 docs/requirements.txt                    |   2 +-
 docs/source/_static/arrow.png            | Bin 0 -> 21636 bytes
 docs/source/_static/favicon.ico          | Bin 0 -> 15086 bytes
 docs/source/_static/theme_overrides.css  |  53 +++++++++++++++++++++++
 docs/source/_templates/docs-sidebar.html |  19 ++++++++
 docs/source/_templates/layout.html       |  14 ++----
 docs/source/conf.py                      |  14 +++---
 docs/source/cpp/api/array.rst            |   4 --
 docs/source/cpp/api/compute.rst          |   4 --
 docs/source/cpp/api/dataset.rst          |   4 --
 docs/source/cpp/api/filesystem.rst       |   4 --
 docs/source/cpp/api/io.rst               |   4 --
 docs/source/cpp/api/ipc.rst              |   4 --
 docs/source/cpp/compute.rst              |   4 --
 docs/source/cpp/memory.rst               |   4 --
 docs/source/cpp/parquet.rst              |   4 --
 docs/source/developers/cpp/windows.rst   |   2 +-
 docs/source/format/Guidelines.rst        |   2 +
 docs/source/format/IPC.rst               |   2 +
 docs/source/format/Layout.rst            |   2 +
 docs/source/format/Metadata.rst          |   2 +
 docs/source/python/plasma.rst            |   3 --
 python/pyarrow/dataset.py                |   4 +-
 python/pyarrow/parquet.py                |   8 ++--
 26 files changed, 102 insertions(+), 65 deletions(-)
 create mode 100644 docs/source/_static/arrow.png
 create mode 100644 docs/source/_static/favicon.ico
 create mode 100644 docs/source/_templates/docs-sidebar.html

diff --git a/ci/conda_env_sphinx.yml b/ci/conda_env_sphinx.yml
index 8654d231065..49388e2b437 100644
--- a/ci/conda_env_sphinx.yml
+++ b/ci/conda_env_sphinx.yml
@@ -21,4 +21,4 @@ doxygen
 ipython
 # Pinned per ARROW-9693
 sphinx=3.1.2
-sphinx_rtd_theme
+pydata-sphinx-theme
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 46c31bbd480..20cb889f28d 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -79,7 +79,7 @@ RUN pip install \
         breathe \
         ipython \
         sphinx \
-        sphinx_rtd_theme
+        pydata-sphinx-theme
 
 COPY c_glib/Gemfile /arrow/c_glib/
 RUN gem install --no-document bundler && \
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 80411408149..0dbca692225 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,4 +2,4 @@ breathe
 ipython
 numpydoc
 sphinx==2.4.4
-sphinx_rtd_theme
+pydata-sphinx-theme
diff --git a/docs/source/_static/arrow.png b/docs/source/_static/arrow.png
new file mode 100644
index 0000000000000000000000000000000000000000..72104b075b86cda7dce60a16a71c6c687b9ede78
GIT binary patch
literal 21636
zcmbTc1yodFxHd`%D5*3e-CfcxA{|PnARs*;j0~M3-6c{Y-CZ-Z2-4jQC_Taq0>U8q
zZ}j}<taI<V>)!j%V(qoRZ@=+8@B4h;-m`}&U2PR2{D=4$7#Kuqs!v~FVBkS9FtDcY
zVgoZ>30D`u;7V6hUl|x492^uC6`7ftadC0o+}scn5@KRv0`rQBioo$dKV*MDadB}e
zDJg&f2L}fh7M8HEFe@wTKbzODU-R+t>FMeHrwecu2uMv$edo>{3kwTi9yq^$|9*IQ
zIN(c8P97Q>>f+)8%>ViG=bzWb#f7u8Gmz@v5J<$v#s-`N5!BSw{s9AyczAf&*x0zZ
zxT2z>`uh5Sfs&FEa0D&^mcZ4=j~_caIs!Jp%;n`JU<MeIkdRPNP;hf|1KCecPXT-2
z+|ttWpCe$v%gYO#1B?IzBO@c=C?+NbL_0q}r=g(%Pyt}jXf(hbs1+k4BajOqNJ>h2
zb#+BaNeNi~BLWx$L;$8TGBQ9400S^QJ3C`xVFAF{+S&q@VP|K5`t&Ifd4GSOh=>T7
zpP88f>Jt(Y;_2xLzyQ!vQBkd}tpUE8nwmd<{sioSm>)iT@bU3+aBvtJ8p_DX08oKI
zAR8N-!ootp1qjmA)C9Nyz+S$5$;`|Q6uG;*dwqRfN=mA{yc|Ft78Vv069XgyLe<pN
zTwh-UIJ2^{l9H0@>gs?Q01UvWqoV`pg{!M8!1~jtPhDMINF<VzlT%Po5TFdSJ2o~p
zFE6jUx_Wna7l;60C@n2bNJz-f&!3&01*#Mo842|L<HwHxR2U4_-`|fyp}f7lwY9b3
za5z8#00y)P0)YULM@L7erltTLg2CXqxjBF=Py`?dKmiJc0s!#w@qxku{URnN23QCQ
z2?1Ii9v%kTWNK;(gaSGUhyf5Y07hM1ot~Z^zzNU+(gDf@G@ziM00>w@LIThppoAbl
zFdvX(4g5k|4+9$uyB)K=z5PySGxQIv6Az~c+S%TIa7oolzw&bedIu`@aHqYUz`N%@
z(GQ%{`Madeb-B@<DKFW=`#o)SIE8WJxhpd5%_Q!Hj65K6VW#U;B`A<4EAf|B{N3ut
zs>mxsY_9!`vz_vRp;RlK3LnjpAs;n<cls0N7ph{NT0)faU{Ocg>#M3F7X#gAqF2?|
zSJz<!NlMo>nF(<YCISb0xh~q*oAk?b<vAW^>~sCKX`%k-`%{C>6)Eq64TO$1-@7~7
z?Jl*I79_=JiqWHgZcdMlHrFI9748Kfn>*_ifOV;Br1n%%-+O7lCj{V)5&Y?@jH(Rl
z_VoYkV|9vLU{7Sk59NeB3D)u5v?RkP#qABLb6g#^s(ev%{r?>L*VnEH981jZyhEwU
zn35<Xnd=~o;H*{3^7*H*@dht!_t~egEC)0~8HU;F_!+L%cu@$)Fc+Ofu`dNofiS8+
zIzY&fa*pB{MgeC?$To0Yf6I_4m|?aT1^Nsx*#}{O{>x;xTm1Mx{G6;I;*+S9|4g6;
z#CQ51;+LPFv^W7#Mc~&5xZnMnYH!hS!DxfC*q5%xI_YS|3GXQ0ACVtw)3sHtJ`-z!
zrhG7c)EVXVa*3#0PxRMl`+uDB?cYK+K*2Hm=l)VkgA;XYkRbt-8D7}GX8z6<!7*eE
zPEKyh0fH4CK*9#og<H-;DBSNwQC<LI4H%|(Ic{g>|2_?e!=J)xhB|12$N%!|tOpFr
z{>$JWC!kWf|1NW*K^Uu(7uSA3**Y6(gG>G;_%DOoIrb$Wr~nf8d)a^H{1@kZD~=k2
z;ql+<+d;5@P5me8lO5RFZAJg$`wQxS^8X*w)X1*_fx7;ttO43(`41-mg8>kgDn4g)
zh!y2#-eSA1pfUOkm$>4m_AB-Gx#wr7p#L)pzkssZYc;}vzAVBq+W(OO1f&z-kM6pi
zy=~mTZr#rQv%H=Azx3)PB7qtsXj1;{WKF;3KZ4EtRo#E@=gA@b=C<{>!T!nTw#)w%
z{68d^j|dXSkazwo#sA*+|7zv`Px5<Tx@d#*{$>1cUF%wG%b3XQ6^!2)m?6(!XeAdT
zZ{xovPnOQVjx<;IFllu)=Co!Z;GyKSWg<uya|wSnKKJt?-Av3@k<;#trRR^1M~^ag
zeqZp~ACHvz>@16j*t5nwYs`AqxSa156RAp>i@bn2K84XawM0hRDw3Uo9!`QHmiU;N
zO&))S1AoLbpa%~IF>BXApD>YL!M9gvFh0162VZ$f%fQ!wVcV{Wn>fbyGIF}A!|dtP
z7o0S}(Acd-r{&g|%e#45YtS1;$JPW~B;Bp`4-(Hs0=~Dy^<Ck1NSHkoPW*THc~^J^
zB*ojZJoy(Jq3nKh7XKHd7q|@0%uK2%(10wCfO|>%BCY{U_tr@sl2zoERYbNd>q46#
zF_Dk)u40?B<P4XQy9Kx0?gq_VyPw2bC|AOZr>cON$2jt}t&n57ETose3NpRlS!=c4
zceVKk&yKb<*jrcNdD0ED?+>pmShOn4wa5$!YTXh9_4XEYdB5(D%a--p&Q?=0sy1zP
zxGt8ZCFbYh&Z5fA7IzxP=y)~;Z)*N2B5-7h(Ot0?G|l<+THxJ>i;-*$KO`{_DLwkV
z?vm+zW~KyE;rZb1oBTl^k!G@5*Mcd&qwxZ&B^Z&Vax1(aKq<cXQ2p3RM#R&Lih`F&
zbBtCjE;1xcy+!q*b^nr9Vw0EjlNnH%t;FiC)c8nb53@6)prrGl@j7u+8YqdJ*`+B<
z&<5#srwsn0NB>vzjPL7b)n$<SmXyK}J0J*_Q1O!1yDBef=HFdT$#1dR^F{MAl|;2q
zO+68!m#8naE!rduD)a9Yi_BQ#IW@WrSo-y0bPtwbY|dD8BIS8B{OuP#4afGDfrSI>
zOYDACX<$wN#{u73A_nAFFjrH1x>8V7;gO20CW|mNp=ppx=n@~-2DY;YX_2^co|oUZ
z%F&Y3oc7~QxV@CvjObz(!RpCH)pcp!a)`L+;!8F3-jhDYso2IWRvx5RVJSS>{bMy3
zGEWWCx1FLR|0B-AtPe@3sclBG+QC3FQ>o$oWf@>~hek|Jgkw|{MyKY~^0evow9U~$
z(Xw-%Y#4-B$Y)?&+cH%A@XZ4cY`3H-K2pW_sE(}1EJ!cbGB{}y7DgvVH*`&_1+s~`
zI?4PR6QjLG#%Ix3eO{c(I^&R-6V7bC`N2V)Mr;YGst02`Mjl4AV8pu2rl8i(Xw<Ql
zN4POs|Fr+y^hxbU@x21+ew;CcDu;fnp8$DiHZCXSduc2<V0Z^!;A8MsalyYf>7?Su
z)IP`fb$a?WI)&uBEdP4-3#m$Qkln$UrPtjJ!T6i^&(g_>c^54;k}MYI_cfZIJUz8o
z95hUitba}LBbfX%EAeI`_JBv>jIw0D(dmBg0sY>wOv8tyHn;g5hE`}oC;WIsT3}K>
zVHNGQ91zWbY?gq#wCJ%b((StAJ6RbbPEp;v>|f&|q=dy#9-`C`wx^?Zy2yaytld17
zgJqagdfvJp%vD=_8JV1`TVnf_r_`FJ$ZSrAgqi#6?0M%U*PI6rHPI032#6Z{1L$)Q
zL&x`3*9)%BrsbdD1~}@j@0nw@_i{d^eHzG(A=cV@xW=&0!V+(z_T3S!0+B5=%HzK5
zG-yxfLE|zCa2Wf5)3in=d3RQ>0zCYd8vZ4{Wx$d?(z~XCD10&$!9Jw;`*&vBRf!<7
zSq31&a=EQ*rffT@Q5tVj<TuPx`qg?EuZw>zp<#vi=d;bshl)9`p;1ToWoJ&!CBmDR
zPk|I52i&(oWszqb`OoaqP!HVleeLSaG5cwvguD1Ve~yF6wwQfOk!|kLHOOW;AP3od
zrJUofm@3`3m$6e8rw_{`$b}yKwB6sb793)ox||)|Yu;PT0HCS-)?N1eL7pa4Cl9z`
zbL&SeIf4e=K0)nMK?g$m^SfISlfx|eORA49(JW7WqUoLd__IGJbDWlBA)D8pm6C`k
z=McTfdJerR(akBE0GCD_X6z^ty@E#V$Ix&4+}iFaKo>!KMDL_n`?aY>MSQKUKmXQQ
zAR`q>2wCa@lbKNeC92Fe9s&?eufVivpcctfA{u3fiySDPM0Lqnsy`$WjX<#13;pBh
zvJA&Ltb>y;U^KOa-62*q7-NU?4CO!52$3AjxOn0UH0Xt6DC^PrNV^c}H&>YR+!KEn
zaT;WP{E)b_RpU2SIG#q3R@L*PEk?LrCg`(%`#a>=PRFum0o<UR8M8U7bN8-6r%ET{
z7nYL`CF1l{N%Y%mpL_bU#aWR^uM&7WMDIyhes208L?wUejHb}<N~M(P#V5!&kEc*G
zA^2057A53%M;7T}h9X@CgII==s74;D`NE2q)4Uq{t=!-dVd<Qv_=Dw9uLk%{MVYS7
z_7DkO?voSs9MH`Yc4>%E27^*ltSW@4IP2TG*SHOA5u{!qKZB_g=y9*yH@p$i`Rh?X
zHP-~qPu0&`#hT(t*thArAbR+s*5*lhu0B|zj26(2;TmgjbjKy5F|iIr>o4@mTj=IW
z`Eq>8c}=pU<l+WUDtO>so1^mN;hMQ?sQ+KvaKb5zYerIiZ|EW83=7kckerA;0D-{Z
zJ#=eWK)a{(Z~(z?_Qo|x5=)~{SF{wFG`v_MojCzkA^R+|(IHK~tmQL<)o=J`Hrhxx
zCAnxB9Z*yGWklIb+uZMze2q`D{!+-8SPP>37gG)VvLkEVm>7`vWx0#A{`fbHPCxVS
zh?t0kPfskV^5~jI%*3DHb7_Ggt7E%DkX|usxIYU8x+HN649+o#(_cM&%GUy{7j-Kb
z+*SMlPF`kGS2E}HRil)t3|m4gy3EM;Phjl`m@I8w9rmaCA#gKqdgm+sByh(y%n4OO
zU{!$gLSLXXB8!wC4^ce`CTp7jt=<~48RSdND06`cpItr1beV3P@qk$^l9)7a96;CZ
zCY>yRnt>J2k3kJ9oLCXp{c<A#clC`sEJ%k?ngvqx9y>cdWqRZ&o|oAGPaTmc?GPzS
zM9ZzG6msge_?`?B(LOGh$-haJ6EQqZxt|ld^&o#(Q~dc|m8DTX@ntQwncz1D(QU*w
zzTOymd~2HRGT7xf(^{-9;>$U8LoSeOtx)cl0$)c0h((!>!F%=RBda;>tK|J}9tELh
zxu*D(phm4+sj>2dBcciOsXpA95l@OKq`9~VXbV%rt(A}PYU}D=Iil6L*l3POzWY==
zw@ypu;-McM%pal6{KdO`(f&a@G8}(4E&OX}OAFwcuHxgF1!?7#Brq+JwqcL+a2<4|
zZ3^wQ><2LR%L&v-b5@SzM_QJh5Jyyac0kXx>?xXqAw~2ZaMi95E>qFpMk}P1adI8&
zh}z^Z+4_#KJikcU88TIcV)C#cw^v5nXzQMSv9(xs{YZQBe5|k=5&z8Vqd!cpO&KbX
z1nSO*H=2<muU`@-9ix6+r^$Ztd~Tb~IkeSZ^U`N?C2q&fZuIc3)$X9PEs_bYp!IBN
zbSyhZ^3?~fo!KmQ%Z@+iHj-aH*CH$@J?f`HN2Vrvs^Y(Ht=h6Y#ifQa$d?m8A;zd=
z*cp-y*x?Fv$YOZ11Nz1)cRdU7pu0Q8XmBtI^eNuUz8Stha+oq0U6V?ZiBgtJx!^?Q
z@J199@j3Xt<wxK;KseybsMY8MFMCUQHX#8kXvT#W{~**!>Kla}9~)f3X=${<X@6vL
z%#!U5lNB`KTnokH9D(@Wnf;)UR#gWk37G~p3O%2@$Ut_aH^`VnhXYech7m$YizHEg
zW;g~izQB2TbnKC6mTlbQ9Ky7>Gc3W@91O|FoU42_6!d!7FJ?i&V;*dB$jL-QlY_%H
zEv(2d>R_|J^3_=51KBi@JTNxGNqh&i6t{geU`H2w6_>0QGDSxbz;Tf5Io|ljV2sj|
z7~28jVzCSaI(zlL()zHAP-=iBID|uI|1xkvWVVtSuJCh7HqC1q^jL^*Y;I|hqk=kI
z4IO%x($e(7UvsQ4+tK<;OR}IX8RrvR4eSbA%q-~2esIqcgJC@h)tw?fj9BhcZaetK
zbqX}=;j(NZ5bDyYpwjwzsPp`5Ts3t4{S+Oy43Rp-nE#g6?4u24ea0N*sv^u$JaE!%
zE}7LRKI4dK3O?^yFJdGbZbsB4Voy*`GfKA|oVUWsHj_49PV@q+G0O{5Qs;rovGivA
zSQXMbsI9z4E^zXJA+D|3P3$R9Z&z5!6i^9<oJd%c7v-=iIU}6nW^fD~U=(n4W-@;E
zW5aK54c=_>HpMTqD;Xt@`%Q55K>@rGCQX<o%CUH1{&Ob@6`Ydu_NUdG)k_4J!#X(i
zmAy915h70gz|R@EiC-V@TjJ-QI5_sv+R4652*`<U%YyVdi2r;&ux2W4W@{ijMF*}I
z3HoTtohMvK`w0%9wpR`etvrrOKxD~GA?4d9moS~mTSb7Y-@3;xF{C}0h@A7Yd!i2L
zUj9r2-3-Rf@f(KyoNE`dp1yhy-ni+%xD9e+6O-anl@r5Ordt_*s@Zb0`Ro{?#Z_MY
z*e3_pl&vS880U4IEs1<4c0w!U_Oi$WJRe(kX^knG=_tN-Pz=Wi248jgG2Q^Dd5bMJ
z4>1%la6@}AuHn`=_uj>kK-lHz@Pobb99z7w`+$S9xI&DVGcag$_&XT>u-9L4147P$
zg54J2OI2_TKH#uKH1YZWZE*e1!0Z1W^S>qd?}MO!*6{ytDgGf@U;AtQ3i1y&`t<?I
zE9EZmRke$Mu`pVzk$tK^v`$Ptb>c&)vK-%$HB+Rcu0ojV%-u%|4y>}%eSLkfUsJ1Q
z4`#}%{0UL}wGDDzeaDc}-)oB&)Qz-AT#N~Vr8utwI0lBVB5V@WNQ<E$;2;i6VPov#
zAfEwK7#JO&;J_3S#wRNOPFi3N6XSPiz+aozyt)=IHx@eQwE91$9hT!;%+ixDglF9#
z<?xUQ1(6tMwDWBeJ{sSfAfd;3L7@H(ZQ*Xc@Q%UljhlVD$(8k`fCoTC#R<e%VC8Hi
zYLg|&w2E?&;dQ~`v)YqS_jyjI5m#QYHkF}YLc>2w<hp`H8aq3iDDI|`MWu<@cdR0j
z<{fe$9|yIjj6N$kd{H)e7dNHq$4I!oi-ecF>?8ljKAr`hfVTA+<owr=2Dq>iSz8`C
z>$j~KYwno&3<R7UhKA|Sc+QfC;|%Xhn)bNgE1<Zvd~)b;54qj{%)aqm9*_G3``mu<
zczr2yT`A0#`cWZ4+^%(D`f$LXqpgeK4{L+ZHoi~^{`o$TaI+inlF1}l#$bWfggbLP
zurAT1uc1KPwovR6;@#Lj=N{0NJ`d_Fv00e}LCi(E*qh)Q?bMcy%Zt%68eyr{%N;~B
z8ecD*cN|r}9A*Y+Oj5mc`_O2-BfV&sJMPpe$vA9iY@BF&cnmgDBPEIP(;jFogfBpB
z)`*^H%8%E(OxT_`9Yj=7IT!bDL})+=P04k(6K_T{xA{#{URg6GZ&Hr`%4998Z?%x>
zp+k1w_MP!N)3{dx`*c%)Qm0OaX57HbxP*SAnt5Y80w)?hgOyX&coLZ}QfkSstTCkq
z*^vDWP7dOCBad7}KWc8A68({1oAJ3EH4dK<CQh%^$!V^&mgxxA7_#?C-(yt?4*%*v
zXatC6eq<8#0ddJqAfZBr$U)_cFlglrj2o!Yjdvy#MH=nCWHUB?G3EEp-uKCi{rXl7
z+Xg)&<8066fXuJ?aEBH#Dc<#aT7?17OeHtMoB<LS>cV#%NE^p`*}Vh<ZfJZ!f_-@k
zISr-#1U0`S+O_e5b)1wy33h{LN%_1u$~+OY{Yyh>=kUrk%*B4c%dX6e`?mmpU8^Un
zlDUbh8LvxLzo!bU?%+BG?*er8CHU*gCpGgV(Do>i-p-*CUA@Wcm?EC+=0C@JXjQCZ
zuJO}YTVs3ci^oZ{xdvZ6w;jYar|pIE97!_@v)Wc>eJ>T#TxN#r==`-lu89%&DUeoj
z!Kyk3*6F8NQ45fbkCID7q}L<ptj8hFQ==<K4zht>3SvSrE>v#e+i9<@%~*S+?c5Rx
z3CzSCnIZeI!)(BT>}}4mOa=DRKt9?ufdQ1l=WEwI?&Q0A_btWRK0gnvh}_m%`Z3Na
zpazj8q?010!RLD`si5u7)eS#9)*KoeKQ`{iR5qF6aHiq0x<nFNi^y=<&s^qHAeoBy
zJ0sR}0*WR6G~5EFAfvs*Jo-diY=LFhHYLx&hFUBWuZ(csvX0q+lwuDRn7}7^ykyKB
zq*v+=FN!5@ryhLltFw0~>5!;tz`%1q&w00a%8fu)-^=xypWt{J3^WAT1J*{dY-Wtj
z<NG}<ppxdZzW&;Ts=4G9&vWf}vv^T5CbpH_7mTL5tN9KfNAc=RK+)5kUXwyE-w$a|
z%=ul8(yP7NG@*Ez(f5aZb-%MOblrBgScLfUDXh!W1$J2Zu>FTm`{y?+A3)lBvezDI
zg$owe;q*zh5(F_^Fw}jq!H=>fkIoRKUK^60G8MQR@mwAwOkvti3<JOE^RieK+4|H^
zkgZ(Vx@a7GXWLPWhz=^cDbPObO&4vv-^g*BvQ4W9+<d~%p9YEQu>1O=_d@TjILxiS
zmdaM^izHs4)K5a)?6o-OZFf!eemBF6;w&5AgmGHi=V&gT3|7Uxp|O7d&mf+5#3yX|
zx&!(*AOQ2y&B*UVe}DF?(JI7kpF9U%wS8&FQ2fK;LNS;Ot;dr@KYIIpNBk7E|FvHt
z7W>RIezjgp5|qf4|7;|ajeU3lSA@|1gJvw-g?2$*Y85qk^fxc2=dE>VMKkVO{mHEg
zYM@Rgvv0W;Uhl2PE-qKsvVB&M<?_0|Lwfwj4+3)0S596*1&dXSd^!;KxRRgAVo;%_
zwE>m7l1**JoL#%CHIUK~GvRy1P34y+;d!6y&DIX)d|J-g@XRIkEsmavzyEV_b{zf9
z__@e0R_`|X<WgEEh~OrN(~NA|ihBC~oL>tnvx0$nv}ac4$t>#|aLLHg$zc)=laf!F
zmHO{QIdMhLAGuZz^z0-{8a$Wevn<6|aA-wwC+(fiPJJy>74a9eXXj^}Y$89*XYw;Q
zdlg#6@?;!v;?r&UwJd*>l*)P{F=qD-*B<xkB)@jKUo^8eARbWrF#$SLp1AIeH~%oH
z&83z+v8@w}K>j52kC8`%8Dut;lZbt4*G-KMPQ81uVM9r?8wZ_z>;ehk)$C6XW6K$N
zn`nH-^>elA0J2y8G5Q#t`CL7FWil@OS}t%;u#SS|@C^P7`@5;Mu%Y}LdDrC_-b?Z{
z`1y!=gV}R#P1~upWEtr6-k<X`YISp;QQe($U2{*ex&-(gO&iif@=Do5wgC~(tK2qJ
zCZ46VKAU|D#Z>GO`tW(5KxmcU@sUg5*OrDWIlqh1R<a_>z~~`RK$qDLTycJSj*ow_
zfcERP+_ls?4bi=Il}{;2!PXEGcja%ocy)jK1DXB3O6T%}@I&U#L|(ESRD|v&14BS$
zfgm<>W0CiHk7*Ir%}&Ub?=Ns0V843X%`q5um3(00sgV9Rk(IRp9TGAnE?Wo6x-*N=
zE!$$81*yc$CxhX6z6aOOT)g%=q!o@1IfM>UpXAmN`3LyNzSc4?qPBZeQ;|BNiJ9V~
zJpObtJntiX?b|4*1A?ba_9141Az2<eIShn`e5-QjZuj|vLd=ml&w~Ok*RNT;MW%OM
zxhohrU5PqfvS1vq@a2^|Sc=!4&h8Ue3JHowUf&Pi)plIp+YtFFU(&lW>TEBjEs&6A
zYnPVVH$=(1{PySEM-oBt?)6tjF@0Cv7NBDbEy=&d*nvwO5SN>OaMo<PS6$VHoZ3b^
z&vgbK-`Vz^?LCy&H#E$ZhM;~Urdu|=ZvuzmI}j`W**94RG=hx?P36e)sB~8eU-Sb)
zisj|`59zfvmjz{`FYkro15#?Ek~%On1cy>N>c@OvQYuf{kjU5Nd^q~?5l>d9@0TAN
zMtQEHqg}DJR~u_rd$roTF=DQrkIwd*nrx3YjVnKYZm)D1{Ebtw!@C@Cc0MROZe99e
z1%b;_!=EbsTi%3+hoM{5#_!EMTL=NugBa(6WS}Z=*hyBY?*VaP$gfvi25h3x3WBeO
zZp$7d?EN#(zzxb7=8rWFlsDB`ad1%<k<Ukm;)Qy%4r6TYnhVw};fqH^QBbPDKLdg6
zfS$iyrBl%M^G;GX_`?D1!snCxjq9Ju{P`kg_q<9l(a0|ag}p`oa&c1nJ5LijkumGc
zv{yC0>3K_IuTDVMGTSKVtJR4csSRS_<>Zy}SK=^gM<!Ni)aQ%bP*;hz`ykxrmOVO;
zFD3z4{>pRLbUG}ZSk!~=m2uD_ioj?n#M#T)3vbNIyCJ&#mj#%NuZ^%e_tobtPOl@f
z55hFzlaDTJHVwfCJnkorhL@Q-{s_V8-lbAA^B*Ovuto4{Rpq@BM*9A_bdrLsC(Eru
z$v?hDfjrV>ar7R1ou|>%iEm<hpXSz`N?P}Jot)zhEL$(qmGfHJ3fM9H0PZKNbi4sS
z3?S#D36ollA81*Sca+7S|Dpdy2$_}|1lyM=@lqIdKaO&0d{k{mHvgE~Ny{bEy@DdD
zVDT#GBYfyH<}dENz+-#x^bo{~>plfM)!<(GGxB-=Un`3{_x0|H!=%l4t{*9x?^$t=
zj+L6d2J-ID*>8V@V@};!70rL?{}A4|&nGcur!21A62>-u>VV#mTOVG34Plz?QwuS(
zh~mPs35zU~zE=C^i-w89!fg20zaxjvM4^uJ8t&vn(V%F03>AiG8&>1oeek#L%^fQm
zZPB0jE&Wpa5~AsDju{pXe-i_na5>I1XYcvcz(ZHC^bSLa_2=5%;stg=a@|J_Js!eO
zpK)7;+C(<#q<OLC5)01BxNLM4uJr1_MIdSoW+tgbv{F+#opGQcDWdmpraosGc82=3
zc?C`8#3kozBy>M_a3=7-QztQMRb7<!ARTb*fY44HWb(M-TYd3AAMBdcB9QCuFn=?t
zCEcw%z8&}{Mp$_3(VYQSv#)uQ_b$VwXZ9L=5Wtc}!J+om$0)|{YwYV_g$oHN0++gX
z{d1P6gr%zJL(KOa7vIe1*ag8&C&qsCb(-OLh`uQTgR_EPdin_qgvbZeevl(t66vK$
zlE8S>f?lim3v=+aSgUx5Q!4g*7N(=`B7TrlCmZtjn3r&~mk*@B5DBwt)BSLNH*%bq
zIimDllSVj&>zJDP^EY?JA^mUnA)Fo|sbwoN50c}VY@X71d4x3NjR!8znXaxed`i)l
zwXcE%95i4G6()~76uz=xr%?E`qvjtMEfAjPWv|`f*)lnSNq&9tBCkN&@U-cz0MsuK
zE9B#>(Vc_QJA=>G315{~in60Vei7h|xZ1LJpr;5l^%}5po7HR{Qc~k)a<o)|Wb^C<
zqLf=6;Z-HA&W)2L=WGddXBD47tHb+h$xTeq5Grd+ia@C}_-O!IKz%XCM;H1;Lq+Xa
z3c4lep!6sf#pQo@&gi1UyHSA^GJx{YJS;iKpJe~0?enAR*+;kVJZ-y{4ub|C$b!1k
zFoxQ(G?Xe>Y@uftbwhpURi+BTulRe4u1glrtIkB^BZy>2!{<rYyp7KgGt!<gkrq$L
zw<E>%jQn#txTs}_!EDcNI>9Hcsijb;TD`#;emltYYaa5QA%RIbU*nxA8Qx3q&lT52
zSGQR#F=EzhuK2Y{z6b?RA*ev;oDt$)-}Y1O9rDu82A7gg9Yzz2qLXUNze@`c$md8w
zmUG(dAg$%0eFvO}GEhjEVQ<f_C77WSab%y?1LIy?zO$-Z1sap7BGrZ`Pu89FPxtDS
zjKNP0N9(2a(|XqBEl*^i=&-rfp55GE^p(%mNy<4?Px0k~#h$n#O#+E`)Cs~3NQ1ln
z76hKT+%B!l^U1N&f5Qjt#*0;v5jF|;<S`!Ar<dGN%jaw*ShAYrzBcDB3}O6*BimQI
zy4I^R4Eku-@<(Iv$Ams#a*a#^>W@zM$4x6RdJQX1#LRvI6!qTu(^eTDO*vm-|2NAv
z34wKg{!1`7%yrduEi$}R(&18Af4snNh^LlaiW-h{(Cw5jyvzq>edrsSF=J|=tFysa
z8PJ!9(|rJNt6v_2r^7LS5{~(~Lx+tzRnX40+Y;1pQTpIn#cNJ1w-g6e8O*AzB`c^o
zG5S(Gs?M?J81P%oH1k@{@o^st0dKg*I^6{lZfSVa!N+@kRvx)7e6jnyPW(x<^jp{w
zZubM-#OMXG&Kp3^sNzLAPqH-ycexxHqU#qEpD*$(KgdV)p2MU*1ynL|^M}l-M9<?_
zK6o-1_I~KM3d1#3;8I(b{SJuC2*;gYrKJbK%ABjxvO~2UPV(tK+(1E^MXEZ?P!D-e
z#lU(dB)YEGIFb?nZ>!Bjvmkgy{HnCJlV*Y5yMEw{E6u7MOitcz5Z;pYq!4zan6In%
z);mFZ?s9?xvf%sp@_aDIbU1r%-FN?efpF&@8i92m@OT$Vo;-0D6sHo!ZT(gq9Lt4O
z5o0@nT^Zk2xjVS?3?4!rDD~F9`Xf9hf4F|Rk8~@f!Q$!IR|%6dIyfz6o>BdXC4nK&
zpMt?p7G!id=$@a|E$WRBejX(3MFGm{HmM1aG~Sevk1B~PL7~R!R2nM#@*VV0*0{<b
zSAU$6E^5?DbZ*&cN<nz<E?XR11KZt_U(=`&0PB14JK3XG%7s>Z5*1&McUOr_x$p`u
zHd$)V2N^^$_Yj27>#-|^qg%$d*a}d(FSxj9h!hk6apu5~w=02~l*@f=NF2RuS9)92
zqC9cMtm?Y6<3Vbs-Rl7(1OL5QbK$7hPc$e7SxxRSQ3Srvf_vFtOQ*xz1V45O+`Qgm
zaW4mGipqtg?O1_1h%xZjn$s}%-iB~45*w-?YI#$|AsSX~1Mc^7UlNMLkSkYa>2Q4D
z`{XKBF^emY1m9&m>fWa`FF|OD9ClSkKmPH+z2_cSj|U5xjAuo?qW6%R3<=#pKR-gu
zf?DiaZiYe{^`^W+^vc@jCj+P(QN+*Ct5l!T66MS9&KXYDljxeUP|ijy)xz+8&y1Qs
zr!@Vs<HkDlB(p=LW&heO7O%vit#C9Ddkol2{YH2umRhOclQ<LQ<(D&f%ESB$2LV=i
zwa<QD##J<&GO9DR{$2fE_Mk;1jQ)E&Z)#(oq5H!>kKnLw=RHfX&J~6!Sz6wCtwY=0
zCG){TI^)sEAhl6wEY%ez<=@>j?t@*++QU#Vb71+_1uc9S_-XrF$L=bgC2d+lg;z{^
zLGUVnwtuau`r*gWe6XxMArocbQjR2`Bis6i*`L#nzPuCv&U+{ZE#Qe{LYRZcy>S%s
zgDUPgl>Gh-yBkgGu*X&8D2(%ds>R{>zU_~aoLemwdp{R-&!zZA5+(Zm@h>sxP7qd2
zL@OnH^4|SOYe%jL!>DxGy!?fW`X^j>oD~g_u>~kb0(s<$R{g%%*9xqu2Y(!CRf%Nd
zP}e&Tl-zbc`)!!rI{HnB?JA=KIGd})BDAUOolDE9Dc1NOI{hfI69)j3m*7YUhz;rx
zpu!qf$9J!LR|7h($p?TRX&AEidx?Ze@n8C-cG#be45jaP494$UfH--wn1?vqe)Oi9
zw@nPwE1wqM&D<H@%N~z7K>Lqf9O7O2Z|<QlbH{hGk5BpK74Gsi@WIJmhIF^b*gkY1
z_?qqtET$p3ONYP(3)08OHBDFF_q%mm#nqOpT-rg0+BAR0UaG$|?Gu7uPdeW%&`mFb
z4fwp9{HhJ5TGl`>M>><t-LLGW1VI|6SD@~Y?<LJyg-An6dT&Wac~h*ZZtCJ?`BO@3
z@}zxrNF2f40%L8p8ZrmHzI21}ZYOoE0{gVuH*}ZZdPO^55)qSJ&UiI6!nt#nzOQ^_
zmVXE#%BDQjXbIY<c|wzGfk3?>&Yz*+K?Dl+T0$AZL_@M2odXx(wAmt5)}1CPq0`v%
z4L<|6mq4x%LJBO$7_{eVbJ*rrKKN=`$=wnU!y>;PpZg2vaaenkyncvIY(tZ6-Mf|k
zl!4!Vd|?z#Y4q(uhUEFewWhszUS-SQbjIZ{HpQ4gr7eBp-5;up75R<4d6zn`4Sg9)
zX148er$RvdWrV@PrQF+b9*0gA1FHD{z&~WmKLdYo)DVU49%QHhUx=(RRR)@*qJ*=m
z!@9>IwD;zYqfVzf2rCtz?BW?ps+ByDC{Z2@))Q1YF97Y}1v<e@mOyrbWM{1$q;Ce=
zpTthV4>z6P8PvMZ6)wN@#3H{gV%O_|u1;D)#e-)p?sC<#Ye*G!OY(RuoW(rBNBswk
zpW?0xORQZ$7xotlVH?Fwcin<*)u0vpE9krVX_Oj>KvyBShV|RmS@QP^vHIQ41<2Mz
zhWnGBNwZqr_OS?Q*!3$W^w|2{rBjcyyLvwh++cpc>pds8y%Ka-Gu!h>$l=oZjZrO%
zgu?9g5rUpC7T0P^u${}c&EI3`ch@tETKp)zMtDdg{HHy(qUrWk?o|gP#dtmVqVvGV
zGP7LN={NP#oAuTQG4`NksLdMf(`n1=0BG0^u7P}2P*3Xv8$&vow<p_QR3Z!YiOmp!
zYhNs(Yaz3S)W_D)h#b7^wxdClFJAzP!nps6c9KHLbUIvc@BwmtgRl$=`|W`qg$(3b
zsDP*6;VdoFR%YnTxp|*mKVYV1ZJxRkV?ls;W1m=QiCtnEY>|!sA|4<o75=4{cTnsI
zn}yJ;w%Ft35U$~sbX5i6+U8>(T3)a5LyHKEot+mn(7v@xcZ*m@=p(6Pbp__kh_x%g
zDsVNLv$A<Oxob=GDebE;4$RZg#}jZc72l)1Et^DR4Rk|np<l4>QgGTA;Qzv!{Ke+?
zC6AyAFa9@vt-uDg5gx^lKS0one!o9-{(gu+I{~;2D0Ehp6T64o9(<OK-_~U*f2N}`
zal1~nmaCq`KY_|=hl2gFUsxMW!?{<72lVScpvp5<Aq>|p-@cpluUos(jJq8fd*Q1+
zJS*FQ^SZX2zY<f;@V^JXQ|J|64Ex?_EWp6m_X^5jrimt7=<JavQT?O!I)N^^#_)?3
zC(5ZdIwtF1CmWK#qlZVmMvf&vqUsMV11GnSjI??N^`W#AYkFGo2^wK~BERDeN52=K
zo)TS-l*=BAyh@lS^0F(2=Oi+<$v(%;3T_p99!eBI@&bor8qNs7wW`ZRMNm$6&{k67
z=6Rjh!gSA^cc&)NJ6EBq3*F;uKFWzSe>|gQE!k{U`NzWr7;Qx1A(ViwsDxI8@{q5f
zwA+y<N0Ta#RK?9|9^nwlRUo@fSl1p!%X#tRu)2OPNzY2k^vKAJCoC)4B2F!Mcy{G6
zO?S-Q>4)H#;+q>*L1Zsxp)#zRC|@{-eL+cmPmP>dd!ncUkD@&*+hS*}TJfE6l?XaJ
z!BVr|)k}<qWuU47EffyD5{E;ClTlJneuhUzz%M0|Ugi34WZ;!#UOM3>wuj`IqPnI$
zD%_2j`~uXRMO0(5HPDIP0t^;kqrhnL7x<O3M=_+&#SV;uMOeO<N=ISKTkt?=_#ljM
zK5>J)m1)VVnu++^?!0iV^$!17D)A|}nQ@VV6l)hX#+d&fMM?MQt-ELONqr%Cpft0Z
zHf+bX(Gx6$mWo=<fL`KHSQ^Ur&{A$G8c;b~rCbsTY&c9$Vyc66D^UKY_I9@pADRK6
z1~tnye2^IbEzbqFEM5-*B0Jb3+e=79Jp}aQYTfj-?+Z%5akB-_KK#-}GFj@E-f&KC
zx>-0p&5SFk9@zVdi(p#q841slyTMdX0Ars?cJDd7l1n#qADb6Y`sLs&4@j6D)C|;P
zo0v<SCSP_)Sr~{agjoV!lk3{hV}h#3pXt1IVoq1{dl`!YuU_PQzb&1@J=pe~N6C&6
zrei~74&Cwlu|k*{;M18r0&c-6Qz)!B`FRwR`+fg>t5muTTfS+fhtv>+mRq1q!*(u?
zETTJ3WkK-M9&Dixm~S}YvcgFyEL;X({^;W#8p;zo7oSa)y~KsvOjf3#6D{S$)zd`z
ziaN?dYwbc<&^)jrh<JwO)zM#K3#Ah#Jc64zh6EGWlSt+Xh9UHcGk+a@$ExJ&A$bDw
z&)z&5=$fzM@heVDicRIS%`m7(k41i4&59kdwbh%$KJ50kqGZ|#A$_6xf`PdJwD=&2
zg1+^a<XDen*WSfB9x4wCKUkBTZyQ5ramuUiZ9>FC^qIzj!5NMWzXw#Zc>g#f=r@mu
zJ>}r->5NFW>zex;(zGEBTAj-{h2YyVhweqO=>5uzof2&+K-+FdNh}KqFa$!gcJ7{U
zP|@-{J_fg3Fd)WQ+L;(B*qRrkDn?(2=^1rOC(Ye5vE^Fkn-@jbyv0WNX#L6gIIj4U
z!WVj+L2$lN-~;-RIlkxh0aaqPMQX`c%Y00rbwTE4MQTK3_*4qGU$vzaIS&C47HO-q
zHql~-8Ts#XP@t-?>oZX){2pew=Zy;V_?|#D@kRvvjpMo8G=qWY^_vHYL;o_HdG3M!
zxc$a-&b?d(p5xwSqaN~wu0E!gXH4(PLK$tW6kQOXfoV$#0QWT4_X%Q{_WAxzBxR3)
z#i^ZMzq10}zRqc8?0984TXp)VF!x7u&65L(P3)tuS>%-WIN;M~;2jEVEk$nTmq?6~
zve7*UgRP1NMuSAc&s*BX_QSIbtMiR&)LGr!<PI0=X#{@1OlsziN~A(h{2O858mx+D
z>e4%gw&$WP_v@5&R(#x9<8IY9_@ddYdh5EXhtY1Fsvp1h{LW&hu$Ij|pB3-%p(nN4
z=pn>viJ5vxV@#y%n~4r4iQ%mnC#$B!*!->Qq;#_@PJW`P6sO+O)984sV5IYmJmozJ
zxHS6S7diO&d#DE5tkyI-Dp1gWp`!<M=|kY|5$HFQ(|KF6r;a==xw@V{44a@I`?wVz
z?#Si)F?X_2^Uf@Pj1akJ@>6gv&EI%sRX>^v)7YL{CzjL5{v0a=TtT#^sK<`jFHWES
zsQiZO=<be`W>PGXhE*;g?#wAc5V>u4E2Vlj^v;`#?0U+$<tNFqU5^&Eb%h;nCH6jD
zTC4sj?00FU`%ouV8TgUuML%$d1i75q0}o|4L4|(h)cHzvI&zZtw8{QNp}qt6TWDyC
z<ZZR0rVK#=W1}Ci=Rnp-+zNkpq#3;zl;`OUH@xK;z}Bhu<4uv|iT!4ZO>@^=S~nN{
zpbAyb;D=kjtfnA4(XbzfPp!2^aLcUSkv!=XV5n;bJf+)LNYC7)nK}qTRencRbhDsx
z{gZw|)q(B;8$%8rn+<SZZ6kAROPggYMKfC~>nL#NQkY+)N93UJO@KW?AfrtK{Sdg1
zBb{UaM8GfhyW}+mjkgAt6$exQBXOF;UE{3q7Me=Qo5Fyi3(5$dul1qYEnD?|)W!)d
z(L81cA+yFKGnk0N9`4rgt8Xc<>Qtp`eN}NMJeDV<XQqk?RUr}hZVqCq8rNzUn+*~?
zJSsm{Mon8l7Deh+rn2g(oG<HxZmyS`M~Co2kL|=A2JG1uYOTLSk=c*ucGVw>8K3IF
zBEr%-zFI)@fENlQzcITA<IBVJj03A5OQb3D2PZS#7FexuWylNl<D;tgBug#+RSNjV
zRPBDY08BB9t&7xRgoJDuIi<C{!4JG%D8Fd|Ve+$i+{;csB8cmeWbz<N9iawR(`hMi
zJ3w`XThWn|W%WE<SA?<&XzriQBUahFnz30g>>z#<+A%z4tHGjAM*6-g{NxtzL2Rr$
zzoIESeV7{iH$=%wQu=bxB}4Va^w++L0migLCODh*EB|zR#27bj{8hc5u){pO=z_`E
z;(96WDMx|XcYO~J!Ro!I?!wmpHZl)Le^9=j9L@Hi6nRwCF1>M+jA{#cNYr9((X#uZ
zg;cWq+`~rN^`TrdE21-J^uuh1pWDN``tD*bxNKy%Ex+PwL8as6^Shvjk&qFp^F$P`
z;H3BwFZ>3y2q$dxTptR7SfyP!yQy@V<7xA7DD^f#_;J~=|H{RoZFZc3pLptG^405e
zo5bj^^o3?iy?un^Q+y4Ib@C1ww2c+0sT`s?V<7|V!sYL&?#C8P7dWceY%ajk^ldhm
zY`t?L2sb*WJ5H7>gyCrFC&*|(=DY>>NtU)Skg2Wp42|)x%I~RGcSdiA>h+LZ#zu#3
zziA@~E!mp$SS}-)!xDOXzY;1}D9w*e7AC8KmN)aV5AoulREZB28$4={=&(GZwfX+y
z7q<M(@<-{s>b5J%d_#p{*UzNJ0{!=f{Ri5;tx2Z%u&tScnCfs%gP6+#dTQkz@RpWU
zA4%dZxeFOkgGAj=xh1Crf9cJyM7=Ns$(n&)iy3Fw2vNA+agHZJ#5;HQJpmYtbOVk?
zk1D^;_9az(YNE37>2a6ntKe4U0-8PE)&%GU6q_)MB?P%&xg=L;_ObWr;gj4VnETrg
z9~kezP%C@f@!42anbcXB8PGOgXl`^!9`MA$X}2{8_fTr;hRPa^#B~xFvN252WrRi*
z<rbN-cR#)EJ+glo(o~-#QIFsF=vCz%i7}8E4~h0R@YM!>#0qSG)oogQID3a-@<~K<
zE(}wAaO=WEdqn(Em{q_8=$<i2aSZJt^F*I{HUn`Jd!W@@LE#`kThh;mhN{Nfw7!1*
z9`n$$P`Uz3^YJM!T$uXKUYOTOH&puN3McGA<&@VAtDkH5#QjuFhTD5gBGe<u>cG;{
zg?K|EK4@%^tEJ5IwN0_2@Xv3pBlm<raT{VN=8U^!N@6I-CvD16udD~3i4BFO)i+u(
zRetMTPpMCg?CX72F9D;GUEA>)%RD&9JlNBrjsyN_vTFzJ@48L7tR&0*a4If`oQp>T
zQWiyO^xcj-Cr9~FjyR&6hTfNYoRo2j4Qo)~awP7x{%Njs%vLjc#lzs|8!ct%F&Ssw
zGZScCG*%}ba(jBwuCEToVR7<BNu|L4`y&h1&R^5or6z4vD{>zCo8Fd6&^N)%baCns
z+HCSH&N@uF_=!dAzBeDoyuJ*O_X{(vhlDgz1xCLHb?6LNt(-QtPx_V)(o-PD;tHM`
z0BJrjD|_!2H$+5kyx;d}TG8*X=UlDG1YDNTD~2E*w7gQ_1LaTU(cq0o%G^U!7Ebwn
z`rZx&4Q4n`4`avJzYGZXPf;Aue<=h!?uqiW<nW>Oiuole5R{-p<9uGYvB^!C*Yxhb
z3dd(}HGA401%0ZfN0T5Wb>AN)X?~1`ppEZ@ZxPRvK);_CHVGj>5e2UZ*eDgMRqfY^
zS)#?k&GeRY!toUDjd@ADO1O*DM<K>-aWRK8cCqnbf*LY-H7|TYE6MRb&jg&OFY!Xb
zp3U3jtAy{vz_8^Pwh5yE2fuM-McPuTL7|xxaTCywB<9)k>q#`Jkol(u_A^KOM_-N6
zOhxU#riWa&nlqP|ah*K1RiNSMk0XZZAl}caIIph2r_@62#SO2|21TO7CyARl0Bhi}
zxy^O^G<{9kwSP}lyjr(V%LF=1o~bt<F)ZK07zzP5W9O!eyqA;MTMsIN{hEG_3L$Lq
zSRTpgWh)h}lA7X^L5MzF{drul`1~O6+y@E^aZiz(Tzu^<)lJP^w|xl)DoW>M2!bi`
zDafBJeqw<et0nQ<A^7!dr3<;iXwo0VQ}Acr<$|I2?tl1lx0>1Jj!4V4M~RSU!Y65-
z7GgH;MxGiLxN>wQ->$m&oNi%9?m>9a&i#EHcvXbLZ`o`ZZD7`q2evV?d<+wUS#3yJ
z2zSP(x&6r`kojlTntCuq*H)~MeG0DlZV4SVW{UqyG~>t8=d17!yS+MDfB?lj@o3cI
zwz<@Rk~ZK&r}(OL&1Ac@y(4rp*cvF>--;M((329-xV&x#Ug3@07FW7VL)TedYAUXb
z9PvKSm&k~YoQ5dh5_;i{Y0Vx`1pBIVE<H(-B!^qUtl_dvKOxA|GP=TL5o$VJzzVmE
zjuiliY(Fm~5U$|*g=Jl>_nq@n%f1xlz_F2Re&#6BlD6E5udz=$^RZ5SDo&K>4@2r0
zH`DXTpa~EbJU}k?e$E;fa5ER+;(oSFn!Dbs!-Ed4TwxyuSf{2H!oryKj-XR<6y%8T
z5n}xIL8fjxd`+6E3gV9iS9OWE<}IH+{??W`qjZ*r4e>0E84oNgI3R}qjb!QM#k%li
z&ZSxiozi)a?aRi|oqPP3jUGJQ00o+~JhiHRXEpTW=-A;sfP-!!Ewi$^zXZ|CoCK~n
z<V+uFim~$j$?#H^dUd4p#UiZ@K+^e?r7^xu+-l<ITprKnu{|4KC<IP`c-dAD!tz#F
zqIs1g$q0pUUKzaSVrHqlU8|5*ikZFSbg&Vpu!zw-7^2`Lt=#;?Rzw*!9-PpN6iB6(
zEmTnuQ>e^PoMUF$cWeXLSh<|aLc++)7OLBm!8XLor@#V{#_7XDYCqqeWc6-EM=)nS
zU|s9I?@AQZ(TTrkbz9`usJqQC8u<ATpdwYh=SEPWEM`2$dfg=y4gJ=&TBhDzkqFF>
zN0FxqsdN8|wmf}A=v@5z%5*9YN#0h>3wI^I&(J9SBvZgmnn|w}9U?q>w=o5CTiHVN
z<3z8HpQ&7_^<&TF#UrqQVilL88iYsLG#Y4)5IG{O&j~-k2daFQdx0H($~^HHclUh+
zF|KhuZ*8WZ`<D_xl`8I`&=UtW@z;8`&*`QE@ZU1PAZb7S=MGUqt$Y4QNiPcSmkP#T
zr}qx<5;LcT=NW>Ox0*|~ObkD0!=II_L8R@5pv9or;%|YfE}s|BG45~AB)?KJE<E(5
zy(ds1@!>ILplRF9z-v&-?Mk-eL-c=;Y6%-N#0%<TxcFpYE%P-HKG>s-S|u5xds2J+
z+l%$I#)cGlpDc@hd5YRndCwWWafV1%zncbL1ScRU6@Obfgie?ad$-KrcVk~oU_7Kw
z@aB`DjEuygrKKdj_i8IBF)}<HhahH6o%t1$VsCtp_Q;Co#cH`;_PD%Y#bC=pRk`Q+
zaaXEYo~N`B-c$}n3d-x3W?~xXA#pnH8<+Rt;uj89_VoT3Yrnu<S=kDPy_T0>yJ~;w
zjjx!3WId%xpC(qbvcp$SN6FJs52;%PmT>GAygNeAA-^Z778VqV^{vZNcwIxQ*wsqx
zL3}N_Z6b1any8fp=h)fd%iSUdhR`zNbdAd>Cw~UZj&GA|@9cBC!qc2oi62`z)+Gde
zixs$>@gR9RT;;Es)9SSmu`!Tvu-tMmq7y+(hxGgrK@s3}Vsh~%HIMy-M$}0k)xvlg
zE^GWd`2*N<xkcL;`0cdh31N@1@%j1gdie9-5i64=0@izX+a`!LkhWY3^1>a!n+#b&
zzLq0FA0hS<b-`J(NHy`60N45{y7)W8!rRjzzdSd1(COwOd~Zh^EeW|o<@k|w^{jqN
zG>^3A!2dQUwUDV)kMIPs+<3R0NJ8+Hbmx=IG^{EChAXEQC#fhOTYEfjc2Xr{WevIZ
z3-HCz)^LW~>iHQ`*0kZ{3N=2@@t`Rg_7Jd7@U?L9wRrept|)`F!t3;2O^wQVwG696
zj88H@%_hE*lJuZ7ghqx)XYj`m0MGiSKt+|vtoT*p)2w(YQ$V1L_k-%<Z$s?4J=You
zCkImp=r;qdv&X*QMou>ap8SO>JT;HRx3z>2uUp)*D#4x{CA`e>J}FGS4?Wmot*kkl
ztVnPZJHmmlh=4~{590pJ@#YdPH7zd(KxWpiD63=rE%uw(_jkgSJYKW&K05uw%H;?A
zD;L~@ucgxrHADR&X0T10x&kPo=v9Z@9UCRmER(Ky<6@S8Np%l<SzgUT!_>)|5x&bA
z5dhNo3g~U^8o9<^|7B%17;oe>j!?NM3;tQCPpBq##+5ySJ+vkTSp0!@u0Kk0f#*!$
zZ7&`wNh#^`E*joJHE~$L4uDBo((a+%KHu@;=8SQzX!}^m;6ksCtfzC^$p~@*i<$>_
zQ;{RK2{5-Y7WZVe7fuj3Jr9V6-a(ksC+g8zIqL-0bnBC<4_5&X-Y~J8<A55H1}m3}
zZ16PYug5_Ai)kBbJH#8v57xFO48r~APM2=`^-aG-nmkD6Je=EZ4F1fhh`I*X<tk`H
zJ#lKmxXJt@QN&>-&v?VqS(G`x)lr*^lqy?L2EAavMb)%2=BjqsnH^+fnUVrJO*^LC
zLC`<tyNhpCMm9br{GuTx+l;uF&N4{f-tk8X(6_2BubSNJgmV)?F+E*qm}PA$_H#tB
zY_1vDO8A9v#)=tMvHwxZG!HBAKH=8bEp$4`>Uu!4-(zYt*QLYA5E@2*iguOZh*nh&
zlW}NySd_loq|KpL2=H}olU+dm?x4MSx%wEVNII7={RtyC96W*+i!)GF6902Q*uG{y
zVX&bASSPpj5jp!%E}G@<*u9{ucq^iW4KUu|_3zU6`(9Mm%-CD(rVi5Tqn2D3|68CW
zHg{GsjzJ?lp;L4!R){E|zv<pVL{4wty`y8*ISnm%N<1=j**m`CPEBTxIutAhl5l*+
z{$I6RX;c&05^juwqA(0Ds0cC$7&Z|XqC`Oy7f^N#$}XUYKr%Lgs2CDA97Rwu5D-E3
z$c`XlCjyEK8Uz%OwS%~H5Rt}hjG95-O_;;ed4Jw{=e+#6r|MSK_f_4c)1CCaZ=m#Q
z>#8~TKTowM4J)=#cMc{jD(9?!(c{OSmip4kt4pbMOHAk9^t;cKyxvMsPdxcd8@&8L
zO4qehe}&xIvE7ybQg=u84tIXYoC6&*dxb`cGfQ-7_1b-p7)tehVX|#SHWD!=Y86i9
zD&n5Wd1<9bc)uBozUoF7&*(iDvc-Ov^}!VcarQayuDjAIoilyOU_xC-FAS<Grs^{T
z`%Dc*2j9&aK(;Z4m^)@<4BypgwpkWys(+cEcsZ2P_@Uk@J;vw?y69F`Fm>=lK<yep
zi$)}h+Ep^Bc9PM*0vX+fzvL{^^LO0hyWO9<`Ga3>xZciBvXzv9FuQfdmZd7<ec3%q
z`8Ti7+d6DJ>`RO9RPSB<m2&iI3YwD94gLZMo{SEq_NF~wN}2kQS3V<fxouS$VU1Qr
zYr2xtF17Bd*Xzr^*?%ZayDCD@@1bbKyh#NUB$!GwsPE=C=`G~X3A7t7Vi)KdiP~>#
z8wUnQ&emSJ{zI9fTl581?$br!2g%wjgDdE_*dFVFloTo47k`j@<+CZFW!cWbQZ|pI
zL$)E*nmlu|`(s{CyLZ7Nur1Y9GFU|+lU)`xIH^q6FVJY6PO{xlus-gzEB!sKYMObt
z`ii)&+<cC~xwyIZ_tY0=seMZv*zO7Myl6QyIJkuxb9wPx_+z9aT~!zl=b#Ekr&75?
z4v&h?Qx|G%)Vi!%Qi5H?_p2rPoP6u7;u?DfIRiFMjwZGVA#egC>xg@$#rQgR>6(tV
z#3Wo-^FY&f7qdI}4^FSZl!`*mnbUM{yqgJ(U2vVq88ycOSOMRC+@;FJ1;jUB+AF@=
zf6;5%w7u*>hWFGlA7PR2r+a^FM*K2SVzsm7(8Wqs!*UDrXUR`%Er;F*Y1KVABsBE2
z3E=Dw>|j<HKCd`ku|Ygz_u+W(vSJ<g75q|w?Qai?v^G4jfx^%&WPa<Ux-{^7IzDDl
zDn3`;-Q>=!DBi;&<8zEd64l<{h-e7%On@6)ecEq&(*pX=J*}NDLm4}+V&d`y-hbD)
z$nSh)aA9<132mzV`5DCOOgPX}d#fRZ%OD<b^c+~1-BbPWqGEP}6O+(Mb%=@gFsGRi
zCr_cgKfdk5j@y<G*`S_8#w{zb9fh+3YW0hlJcBY(u`us}uV8G|jV131@hvlR@8AuM
zUd!HGnoYNxEU)Xaj(`xF3AKWsP)ygXi7L|I5OYtBRi&zlV|R9Ru(;We7kq$&gWT=w
z<LrnM>nu<#Ciw!BcJoPT(1-3%13|5k8^)eLWO~derJA%<es$d~sM+sTRBcHU%=80a
z0QSA>X(|-Yv08~RSfljooPq^am$x@CN7}ge_il<^b>ggGHwDY^6|2w6zukzr22ql@
z_NNa_S$Pen-OWK;0$p$#k#sAk|K`2~-;t#6y>IPHmhYRhak6r5k~CzEQ0pw26m>#j
z&9czA+n?@R<4t=s7<(#xN4n}#Rio4=zW{tmLKR3&zpKUc3fk480zpK{GiNGC6Vhpw
zHCXJkw@($Of#YY`nPX+WzrLP!WM)KKFA6OdTT~+oj*r5r5rj)Ay{K4mm3cMNr1E<J
zH6l&3|1K)FAl*8JG=)>y|GmSyw>`I~vDc|hwmCe_=pu`rA26x@E7`L!ac@+rI!2*l
zibNMGXxaaNA(etVAW51KrtJz~x1>>UpT9+1g9{Uah}Ez$Y2Cw$zJ-PoR^zaj>f<a_
zSb{?O)`pUUtUOexa?YXzJqe5;;o4l8xW}s$G|n%?|B;6_I8lXobnevb*K?LkvFO{)
z-amM5lKJxE@OMj6@bI-yzKRtwRqnc{VB>0-pf_FtJpcxR+9F9_h(e-?D|HOe^Pd^V
ziH>h#7Fl45T}kjS;5JHy4PSOxGTs^jcUw2y28;4i1?q8;ij{tIr?Te?*Iq%Z_qkEo
zK8BJ^7>qDck}6Aag}m-tvgO&?al8PG&fiRsVsu2fmWS2}Rn+dt)B4#=s6K)=sAE>&
z3mp)J&)a>DktEZ3Dc<<YIzC6!Kk(^t0Xpn<W(mVuBvdJ{EPFH87pL-rxq@_1@vffD
zE4P_3mW%E*G9CCM03rVZ{vN~gq7NZv;1V(n=`=FduB<~#=x5CsX0ix^%ZCgy(R6x?
z8N(RJzeM3(4^9!4vtReSuumK#>_~Cn<*z>JiJD^Ho-eLh=dQIjziwf{&QAmbdi#sg
zV>L!RR;mkY+>JBQ>ReFNFU}FaVt8A+Gp^YR29`e9Ok||tf54JK3KdGm+#E(*MtIdc
zat<2KPts&th~}jUwlcOL0y-FT7PEk9j%D!3d8mV|cZp~<mXS}+M?cDXH~WP;IV0d{
zu%ToHV7iWr=fX~{TJHExneFC<p(cf>bkNaAqBq`K43FfK3sH%^<QN!1DP_?v%y*wj
z5h{>T;nTr0p@OV(cq9+7e%-3&f$!(H37K5F^-WLc!DpoORZ?^cegi(+CQn1vL_!oD
z#I*C}*6BNN8$O|Ntv#3<<$qRo=r-20QvwjB@yNl+(FO4jU_TzPz419GL)lbfAxgz+
zW!8`L$W>?>{bX`<8W^w4t&A|FbI{RLVgNYbdMveUG?f^Mu42A2U%<*6&szdF*2r;Q
zI;87yqYEng;V}Vy46NtiS3X)Bxza@Z6>M!MV=@%36~vWZ!e7IFvf`twMP<G4=3Id(
z!xogNKXr93xd!FYPjaQ^GPp;Q!qa9FbI~AXHC}{tKYRoKWg~N2HT?0U7TF@+n0=IJ
zIrkExHd|*=*Y8gZG|jf8Uzy_3f2(O!gFoY#=7G5#I`PdRFP0I?^8}gb>V%gsxl)>J
zIPRT|g$GtC3)5YwP*jTw=C~1A4j^$P3oRfr#Ld`ubx65p1&2HvYbKH)0XG)rWux;j
zMSn&pkzq9MSN#tqvw`l%(ah`&+<>^b;AcR_(P1)UW;K1xmCD}pLy(Y(AhdCChM}Yr
zavm;`<8ML~y4J3o0O$Q1hdSXm@DF88ZuFmSh=SFLk@0{QBxsBzde*m9&ZP<D*6xhE
zbHL1o-a0NEGj(zurzo(PzzLn;{e!Y|0!Pv_5kPysF#R+f%+X|vHprd+vidHvL&nNI
z8m>Di`p<&=9MTe|Ewg!5pMOnXnbv>0InXwHm$HZN?b(E-Cwk8uIj-j1e>ir@NS)UU
zC|P`U7w-NjT_b|9)Qj{e{WLO`gP#>E?#PI|gBGNa&BTiS*QqJw*%1RlRA|r1Sj5d-
zQr9v16oumGdw<q59A(C|38hqK{G&1qMCg@A7>%^PsOf+Ex=I8u02d6!fJ<9q_<XVB
zJyg;$x&TTJYg@`N5;u3F&?5`E%_UI8u%~k6J)oq>mEMmgl-?36$fU1Oo=Y5}kTNU}
z7AW$Y1a+^Jr4?aLf+~IFIXQK9h7lmU1bK|kp}xsbOuI-P`U3FO(v;CSc%$#rYUo`h
zF3&+50KMHHcOEGt6;Fcd-i+6z_s@|i1@^N@dF5zNM-8E+?W1^n69A;OGiN}5?PSQj
zq-1H-nXO*4Vt0KBoh30iRCOe?{k$l=DM2Uuw_lUi^UU?d75OjJ)K4;t4~=C6tJA)N
zqXc+T3+S<(WQ?`*qB_=G3HArltQ*J$bu446#24}xIcK4UE_f>avJzA~FS#zcj6-B(
zfaO)YtGlk=3JAVE{NJE=cbxmhj`kdBG{1}hS*ygc24dW@(Su;^eakNk{koop=h`A4
z+iO!MK@s^_8D^|6tVzbrp{1dP&NzoC>Bx~Go3c#(3Y_s*#Z2&?aM;q>aw2{F@P&>9
zC~ezpVY)Krh|9pQu193y_l{5}L22=z18_efi$EbV>^bDc;k&J&cWL8RtHZ(vh|s$t
zAhmf4ybmK0kFSFm86ptu1h^6Ju~LQT3q0hG9t;I`UDhp6Rfu&8mL2N()8}S|l9K%F
zy>;Kq;xoaakN@MxX!!4g0(DqJHl;s7{3!J6Ru^u!R<~Z$Ak*4ybhIs9?|<}POE1=S

literal 0
HcmV?d00001

diff --git a/docs/source/_static/favicon.ico b/docs/source/_static/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..33a554a8a8233fa391a556cef45490f0f0ddcfbf
GIT binary patch
literal 15086
zcmeI3du$v>9mn^4w$FJu=fqZ8TaC{pq)Dq*1VyxVaD3qpklKRy2T|yYO6f}=l?Z|e
zX;LRxB1J-S5du{zD1}xC6@)}9@hFvS_m!%u6p1!cc__U$(1@lY$D?52;_c@<-d&CN
zX3u-cog>4I^z(1#H^0aCH#;*sySMhdjF<JcZuQ_p-j6nU-qoJx4GjhD9?v_0tOu_C
zyW2hQYbb0%2NUl$NODu&JI`8wTO-iG5m<|_u6q3Ca=E@0D^{$l$0mr8uhS-t`mHGK
zgK}w<rIX2IuFYn%53O9evNx?wM@PpM*gc5;Kw6uWdUvDu4`@G>Hizrcoh65d)8@eS
z+6w-^VE0p~A8J$){<F}(ptLy%|9_ylv^jKjb%pqS>>h7xYb(GTQND<iIY@s038l#)
z#4nq(X>zc}Up7A1;uqL8B8R>B3-PDT!N#8|hdBNQ<`Bo9CWkowMRRcBZ%_^{{EOw_
z!e29oB>Z6x?d|Oa>gQ5+VWhRI_;GTs2j=c5y4p+P`f5AZ1N+n_*6fJ{8?zqHF^ARo
zydU~^g72yv7|V9B>_-3Z2{u(Z?AWm*)7jbiDRTdFg3bA?>6Q5WI@Hu0ip64v`&d(Q
zU|g0Yhtbi|WuJrQ(sI8JV`F2fayZ8|y(DvKWMrgiIV{zBXyw4JS)(k~b=bard(-9;
z=`H*9uv9q=4-Yr{IvggKTKDM#oGqf~>ENt!4>o^;lFm!tN3V~5Q?_l}rW|fy?w&&6
zve8-N=R7~~=BU1c%{5rO2_^Y`nRCviwpXxiB&Szu*qmaY-)`H`&I@Cb4f4Oz{~6n6
zu{zkV#O8%0pY)wz+&*mk;oP|ayC;%tke|lxW}YuImR&dX2cZ8#u5r@yx}F<5p;U$O
zXd3oat5(Iae-lu(u<xP2BNcFzTG*@RfSEI`aZjE>tvTbKGpkpxZmrdXY17O(vz*7h
zg*{=yAP&w1&r@~Xr*%%MIumfOzX-d>lWdTi{oNjS&O|zcPeKj5ui{JPOOXBkHp;_=
zT?WXng(aOqtFX(4oUg*}G}XwzRo8i@t}_ApVR63yOu`-*CB6T2VwVkPQ2YJAox}NH
zm+LD0i(r?H3%~13AYD@u@SiVs`CfoO1$NoQ@z=yI0|)*_V3+F%e@g7KvGFg8T?R4y
zjlwS1!tdN?)R|C(U;9WZ?DApZe~0ECDE0YW_BG1BI>tS8^HC~qVs~@w3>@}X@!<En
zlXPgmYZP|v|DWWU?;}+KYY4zS0^Puv+GnvE7H0f66YF-AQa|(6bQpAYcXxO6^z@|0
zAv4ZN?d05+#($`@cJ10M_p@x+Zt?$(5%5i614`m&{WCsy%c+WgO8iJ={G|9~rTDt|
z;Yy(-K2gb+#^Hx5xl$2+u#znm#P7?L^YH1SH=Bc(3J3RR;pP1FSOz|uD;0F2sAS9e
zgX0`zGP5~!5y=7EIDi`maN_`O9KcZ@WA@X=vFXPr9BG-aMSP^j4^sJ^L#jB&yRZ23
zIE*=MbPY~pPTQD^2YDR|@|+0rJ{*k0WH2sAf^nJ(#w~GNgYn+NG#Y|-LeY0xe9P4;
zRU>c(l2ujGK)~MG#kTf-_GG?*a~Ix0_ONenboVYQ{SF!jp`>qY*5LaIaNO44-|yUy
z*>m*`(68|E0jS>c2nIT{AAnpK^gH$o_&)=NTLuOOsxa^jqWGuLdl^0gucthQGAqIS
z*6+fg_(!Fq7{U-Ahamo2fuG}bV)$`jmxLjTZ)32>Ar@bLjx#6Hj>C7Y_*EFlWd;3D
z+B)%Z;Jdet$V#>j^|Tl^qr1RAZGCqUT?=w4(*7MJe1<{{8#ZjntY5#rjq7qdvNK_S
zz|&%Q5Bg8VY~vVq(w~T-udh!S?)ZPkFgQ4v>Fw>k;N!4v-MR~kLE~_tF=$;Zf?=5X
zyjan?_%#%*JL*~&<E%-uSHs7c^YL}@Hf!Nl^t=GWE~@dh`3klll1@@#U+cj4H|V$H
zYaf1!drqJ1cz)``&o3Q1-{Lne^E`$A<@CQA(|HH^7mzs%{yXX4N2k7%isB=Cmiw{%
zg|T)2vfopkMdx8K^-70w88i(=KB((i^~;d!e&>;Qvt#ciw)1(d6}$R+GZz|hz0YaP
zmmqeo$@4rhZ|0!h+3LNA)<d*D>@~*OLf<5Q+wP~nV2uTP!ILo^`K%e6eii%trM)Fu
z-}Bwxg7X)$1+k^Wn$h_)JpU<=w6SF##aHa`daENScKoW?(uu{dC$`K@{Dj!*kK&_a
z?`!Ha#fq<cLz*!UEDie!evdlx4t4a+!E@+5YVE0EpW#9L5{<7w&V9`&I{x7EbL{6=
zIl4koH?cp9-uh4w=Q{t=uXkY1{XDL3x5ImsEwk&~M>A03v!8VJ{)+ox*0x(N_4)S*
z^M8v|uldCz$Na+NQ9pn989z5M<4??#{FzdDyj0#>EbkpH%#IG`D}(vmTs}8p6tahA
z{9JZoZcQ#%xnxa#cKDJ)dG~O!RN6g0GhOm0)IISG{*L1J82(?gL;8A8q1O^aQ1qUn
z=4hixN&9|xUfcZrn9Ra9b8475qh}vexW*SD+esto{pB(G4ng4@y9fQJvD<`9?|V1V
zmd;JAhpJ~}<iCORoP8gpxpF6@{calDjjVwJ_1BTT4Zo2&9ry7G`qV!Q9YALY{xc}_
zv4wo}J(u<$y}!TFf#Z7mPD1jrKjvc!9Gj7Sp1#P314q3+KJD;v9Wnj{X*`6Z05{)4
zQP+4}7U0l+_7?R!;8CHymBw!&<^Da!t`Gi@+LBX$AN=D`8Iq5$LZ$?^FVT?g4}x)j
zfVTcuNVcPJ<@iTP<8&`%zMBjD+O~tpCh50rm0M`LpSqbZ=v__yMM!ZUA?{Gjc8oqd
zZe)vG<vB>Uk7BF$O*hf5Kn62oHbDJ2WX2R8&Qq$UNbiT}nR7c5UFY8;Q`~U={()-r
z+%I`p(A$IU#bFzM9kQKa8%!FDU9`<uh4PSdf9VM8;>^_!D4OHUkt`}&7tW%0!J-dN
odEUqo&wKEg=e=|nKlW_npC$VFXb)s>P=6NL{q*&tBi+#MU0rfKs{jB1

literal 0
HcmV?d00001

diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 91670a741e5..f623b3b3c49 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -17,6 +17,59 @@
  * under the License.
  */
 
+
+/* Customizing with theme CSS variables */
+
+:root {
+  --pst-color-active-navigation: 215, 70, 51;
+  --pst-color-link-hover: 215, 70, 51;
+  --pst-color-headerlink: 215, 70, 51;
+  /* Use normal text color (like h3, ..) instead of primary color */
+  --pst-color-h1: var(--color-text-base);
+  --pst-color-h2: var(--color-text-base);
+  /* Use softer blue from bootstrap's default info color */
+  --pst-color-info: 23, 162, 184;
+  --pst-header-height: 0px;
+}
+
+code {
+  color: rgb(215, 70, 51);
+}
+
+.footer {
+  text-align: center;
+}
+
+/* Ensure the logo is properly displayed */
+
+.navbar-brand {
+  height: auto;
+  width: auto;
+}
+
+a.navbar-brand img {
+  height: auto;
+  width: auto;
+  max-height: 15vh;
+  max-width: 100%;
+}
+
+
+/* Limit the max height of the sidebar navigation section. Because in our
+custimized template, there is more content above the navigation, i.e.
+larger logo: if we don't decrease the max-height, it will overlap with
+the footer.
+Details: min(15vh, 110px) for the logo size, 8rem for search box etc*/
+
+@media (min-width:720px) {
+  @supports (position:-webkit-sticky) or (position:sticky) {
+    .bd-links {
+      max-height: calc(100vh - min(15vh, 110px) - 8rem)
+    }
+  }
+}
+
+
 /* Fix table text wrapping in RTD theme,
  * see https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
  */
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
new file mode 100644
index 00000000000..f6ee66cadaa
--- /dev/null
+++ b/docs/source/_templates/docs-sidebar.html
@@ -0,0 +1,19 @@
+
+<a class="navbar-brand" href="{{ pathto(master_doc) }}">
+  <img src="{{ pathto('_static/' + logo, 1) }}" class="logo" alt="logo">
+</a>
+
+<form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+  <i class="icon fas fa-search"></i>
+  <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+</form>
+
+<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+  <div class="bd-toc-item active">
+    {% if "python/api" in pagename or "python/generated" in pagename %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+    {% else %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+    {% endif %}
+  </div>
+</nav>
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
index 8ee71e4bb27..a9d0f30bcf8 100644
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -1,13 +1,5 @@
-{# Import the theme's layout. #}
-{% extends "!layout.html" %}
+{% extends "pydata_sphinx_theme/layout.html" %}
 
-{%- block footer %}
-<script async src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1"></script>
-<script>
-  window.dataLayer = window.dataLayer || [];
-  function gtag(){dataLayer.push(arguments);}
-  gtag('js', new Date());
-
-  gtag('config', 'UA-107500873-1');
-</script>
+{# Silence the navbar #}
+{% block docs_navbar %}
 {% endblock %}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 47d88a9a166..05a45531f4a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -179,14 +179,15 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = 'pydata_sphinx_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #
 html_theme_options = {
-    'nosidebar': True
+    "show_toc_level": 2,
+    "google_analytics_id": "UA-107500873-1",
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
@@ -204,13 +205,13 @@
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-# html_logo = None
+html_logo = "_static/arrow.png"
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or
 # 32x32 pixels large.
 #
-# html_favicon = None
+html_favicon = "_static/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -239,7 +240,10 @@
 
 # Custom sidebar templates, maps document names to template names.
 #
-# html_sidebars = {}
+html_sidebars = {
+#    '**': ['sidebar-logo.html', 'sidebar-search-bs.html', 'sidebar-nav-bs.html'],
+    '**': ['docs-sidebar.html'],
+}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst
index 70e21faa011..bb981d1a047 100644
--- a/docs/source/cpp/api/array.rst
+++ b/docs/source/cpp/api/array.rst
@@ -19,10 +19,6 @@
 Arrays
 ======
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 .. doxygenclass:: arrow::Array
    :project: arrow_cpp
    :members:
diff --git a/docs/source/cpp/api/compute.rst b/docs/source/cpp/api/compute.rst
index 9a71cce1288..3b0a89f83f8 100644
--- a/docs/source/cpp/api/compute.rst
+++ b/docs/source/cpp/api/compute.rst
@@ -18,10 +18,6 @@
 Compute Functions
 =================
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Datum class
 -----------
 
diff --git a/docs/source/cpp/api/dataset.rst b/docs/source/cpp/api/dataset.rst
index f285f3633be..3f0df8a4537 100644
--- a/docs/source/cpp/api/dataset.rst
+++ b/docs/source/cpp/api/dataset.rst
@@ -19,10 +19,6 @@
 Dataset
 =======
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Interface
 =========
 
diff --git a/docs/source/cpp/api/filesystem.rst b/docs/source/cpp/api/filesystem.rst
index 957e7321730..02fff9a6c2e 100644
--- a/docs/source/cpp/api/filesystem.rst
+++ b/docs/source/cpp/api/filesystem.rst
@@ -19,10 +19,6 @@
 Filesystems
 ===========
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Interface
 =========
 
diff --git a/docs/source/cpp/api/io.rst b/docs/source/cpp/api/io.rst
index 37023ec696c..735136a0d47 100644
--- a/docs/source/cpp/api/io.rst
+++ b/docs/source/cpp/api/io.rst
@@ -19,10 +19,6 @@
 Input / output
 ==============
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Interfaces
 ==========
 
diff --git a/docs/source/cpp/api/ipc.rst b/docs/source/cpp/api/ipc.rst
index 2a9e656fa05..6822b986a75 100644
--- a/docs/source/cpp/api/ipc.rst
+++ b/docs/source/cpp/api/ipc.rst
@@ -22,10 +22,6 @@
 Arrow IPC
 =========
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 IPC options
 ===========
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index b2ecb3b2ceb..92ac8886f87 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -23,10 +23,6 @@
 Compute Functions
 =================
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 The generic Compute API
 =======================
 
diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst
index ac1ae5e4f27..415a3ae1852 100644
--- a/docs/source/cpp/memory.rst
+++ b/docs/source/cpp/memory.rst
@@ -25,10 +25,6 @@ Memory Management
 .. seealso::
    :doc:`Memory management API reference <api/memory>`
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 Buffers
 =======
 
diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst
index a81fadb8eda..d69bf1c6b56 100644
--- a/docs/source/cpp/parquet.rst
+++ b/docs/source/cpp/parquet.rst
@@ -27,10 +27,6 @@ Reading and writing Parquet files
 .. seealso::
    :ref:`Parquet reader and writer API reference <cpp-api-parquet>`.
 
-.. sidebar:: Contents
-
-   .. contents:: :local:
-
 The `Parquet format <https://parquet.apache.org/documentation/latest/>`__
 is a space-efficient columnar storage format for complex data.  The Parquet
 C++ implementation is part of the Apache Arrow project and benefits
diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst
index 8df443d097a..4de67ba402c 100644
--- a/docs/source/developers/cpp/windows.rst
+++ b/docs/source/developers/cpp/windows.rst
@@ -194,7 +194,7 @@ to the Unix-specific ``ccache``).
 Newer versions of Visual Studio include Ninja. To see if your Visual Studio
 includes Ninja, run the initialization command shown
 :ref:`above<windows-system-setup>` (``vcvarsall.bat`` or ``VsDevCmd.bat``), then
- run ``ninja --version``.
+run ``ninja --version``.
 
 If Ninja is not included in your version of Visual Studio, and you are using
 conda, activate your conda environment and install Ninja and clcache:
diff --git a/docs/source/format/Guidelines.rst b/docs/source/format/Guidelines.rst
index fec6c7f4f26..40624521a7a 100644
--- a/docs/source/format/Guidelines.rst
+++ b/docs/source/format/Guidelines.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 Implementation Guidelines
 =========================
 
diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst
index cc9f4198618..65b47f7d71c 100644
--- a/docs/source/format/IPC.rst
+++ b/docs/source/format/IPC.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 IPC
 ===
 
diff --git a/docs/source/format/Layout.rst b/docs/source/format/Layout.rst
index cb83ae152f8..4568f31c58c 100644
--- a/docs/source/format/Layout.rst
+++ b/docs/source/format/Layout.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 Physical Memory Layout
 ======================
 
diff --git a/docs/source/format/Metadata.rst b/docs/source/format/Metadata.rst
index ae15bf286f6..55045abb0af 100644
--- a/docs/source/format/Metadata.rst
+++ b/docs/source/format/Metadata.rst
@@ -15,6 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+:orphan:
+
 Format Metadata
 ===============
 
diff --git a/docs/source/python/plasma.rst b/docs/source/python/plasma.rst
index 25248c16a77..e373bd0a69d 100644
--- a/docs/source/python/plasma.rst
+++ b/docs/source/python/plasma.rst
@@ -21,9 +21,6 @@
 The Plasma In-Memory Object Store
 =================================
 
-.. contents:: Contents
-  :depth: 3
-
 .. note::
 
    As present, Plasma is only supported for use on Linux and macOS.
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index ea17507c23d..b3c142f6323 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -518,8 +518,8 @@ def dataset(source, schema=None, format=None, filesystem=None,
 
     Parameters
     ----------
-    source : path, list of paths, dataset, list of datasets, (list of) batches
-             or tables, iterable of batches, RecordBatchReader, or URI
+    source : path, list of paths, dataset, list of datasets, (list of) batches\
+or tables, iterable of batches, RecordBatchReader, or URI
         Path pointing to a single file:
             Open a FileSystemDataset from a single file.
         Path pointing to a directory:
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 1b3602de8cd..4564740bc83 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -547,8 +547,8 @@ def _sanitize_table(table, new_schema, flavor):
     LogicalTypes.md#nested-types>`_, defaults to ``False``.
     For ``use_compliant_nested_type=True``, this will write into a list
     with 3-level structure where the middle level, named ``list``,
-    is a repeated group with a single field named ``element``
-    ::
+    is a repeated group with a single field named ``element``::
+
         <list-repetition> group <name> (LIST) {
             repeated group list {
                   <element-repetition> <element-type> element;
@@ -558,8 +558,8 @@ def _sanitize_table(table, new_schema, flavor):
     For ``use_compliant_nested_type=False``, this will also write into a list
     with 3-level structure, where the name of the single field of the middle
     level ``list`` is taken from the element name for nested columns in Arrow,
-    which defaults to ``item``
-    ::
+    which defaults to ``item``::
+
         <list-repetition> group <name> (LIST) {
             repeated group list {
                 <element-repetition> <element-type> item;

From 150224c6e7e29a7bc287d18eb24f7cc2e9474809 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 15 Apr 2021 17:03:17 +0200
Subject: [PATCH 056/719] ARROW-12151: [Docs] Add Jira component + summary
 conventions to the docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10035 from jonkeane/ARROW-12151

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 docs/source/developers/contributing.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/source/developers/contributing.rst b/docs/source/developers/contributing.rst
index 1eeeafe07fe..9aecf8a6915 100644
--- a/docs/source/developers/contributing.rst
+++ b/docs/source/developers/contributing.rst
@@ -92,7 +92,13 @@ right people see it:
   issue pertains to (for example "Python" or "C++").
 * Also prefix the issue title with the component name in brackets, for example
   ``[Python] issue name`` ; this helps when navigating lists of open issues,
-  and it also makes our changelogs more readable.
+  and it also makes our changelogs more readable. Most prefixes are exactly the 
+  same as the **Component** name, with the following exceptions:
+
+  * **Component:** Continuous Integration — **Summary prefix:** [CI]
+  * **Component:** Developer Tools — **Summary prefix:** [Dev]
+  * **Component:** Documentation — **Summary prefix:** [Docs]
+
 * If you're reporting something that used to work in a previous version
   but doesn't work in the current release, you can add the "Affects version"
   field. For feature requests and other proposals, "Affects version" isn't

From 2da0a3724a396616d3f1b52f9f28e04a12fb3bfd Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 15 Apr 2021 17:05:35 +0200
Subject: [PATCH 057/719] ARROW-9731: [C++][Python][R][Dataset] Implement
 Scanner::Head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This ports the head() method from R to C++ and exposes it in Python.

Closes #10047 from lidavidm/arrow-9731-2

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/dataset/scanner.cc             | 16 +++++++++
 cpp/src/arrow/dataset/scanner.h              |  2 ++
 cpp/src/arrow/dataset/scanner_test.cc        | 35 ++++++++++++++++++++
 python/pyarrow/_dataset.pyx                  | 34 +++++++++++++++++++
 python/pyarrow/includes/libarrow_dataset.pxd |  1 +
 python/pyarrow/tests/test_dataset.py         | 22 ++++++++++++
 r/src/dataset.cpp                            | 11 +-----
 7 files changed, 111 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 5095c2e8ad6..f7bd3c063e5 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -547,5 +547,21 @@ Result<std::shared_ptr<Table>> Scanner::TakeRows(const Array& indices) {
   return out.table();
 }
 
+Result<std::shared_ptr<Table>> Scanner::Head(int64_t num_rows) {
+  if (num_rows == 0) {
+    return Table::FromRecordBatches(options()->projected_schema, {});
+  }
+  ARROW_ASSIGN_OR_RAISE(auto batch_iterator, ScanBatches());
+  RecordBatchVector batches;
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, batch_iterator.Next());
+    if (IsIterationEnd(batch)) break;
+    batches.push_back(batch.record_batch->Slice(0, num_rows));
+    num_rows -= batch.record_batch->num_rows();
+    if (num_rows <= 0) break;
+  }
+  return Table::FromRecordBatches(options()->projected_schema, batches);
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 9720346b410..956fbbb2ee3 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -289,6 +289,8 @@ class ARROW_DS_EXPORT Scanner {
   ///
   /// Will only consume as many batches as needed from ScanBatches().
   virtual Result<std::shared_ptr<Table>> TakeRows(const Array& indices);
+  /// \brief Get the first N rows.
+  virtual Result<std::shared_ptr<Table>> Head(int64_t num_rows);
 
   /// \brief Get the options for this scan.
   const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 3a2d37f1ce1..b4e374a7795 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -301,6 +301,41 @@ TEST_P(TestScanner, ScanBatchesFailure) {
   }
 }
 
+TEST_P(TestScanner, Head) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+
+  auto scanner = MakeScanner(batch);
+  std::shared_ptr<Table> expected, actual;
+
+  ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(0));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {batch}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(kBatchSize));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {batch->Slice(0, 1)}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(1));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected,
+                       Table::FromRecordBatches(schema_, {batch, batch->Slice(0, 1)}));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(kBatchSize + 1));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, scanner->ToTable());
+  ASSERT_OK_AND_ASSIGN(actual,
+                       scanner->Head(kBatchSize * kNumberBatches * kNumberChildDatasets));
+  AssertTablesEqual(*expected, *actual);
+
+  ASSERT_OK_AND_ASSIGN(expected, scanner->ToTable());
+  ASSERT_OK_AND_ASSIGN(
+      actual, scanner->Head(kBatchSize * kNumberBatches * kNumberChildDatasets + 100));
+  AssertTablesEqual(*expected, *actual);
+}
+
 INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner, ::testing::Bool());
 
 class TestScannerBuilder : public ::testing::Test {
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 46f78d48d30..619942840c4 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -457,6 +457,17 @@ cdef class Dataset(_Weakrefable):
         """
         return self._scanner(**kwargs).to_table()
 
+    def head(self, int num_rows, **kwargs):
+        """Load the first N rows of the dataset.
+
+        See scan method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self._scanner(**kwargs).head(num_rows)
+
     @property
     def schema(self):
         """The common schema of the full Dataset"""
@@ -989,6 +1000,17 @@ cdef class Fragment(_Weakrefable):
         """
         return self._scanner(schema=schema, **kwargs).to_table()
 
+    def head(self, int num_rows, **kwargs):
+        """Load the first N rows of the fragment.
+
+        See scan method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self._scanner(**kwargs).head(num_rows)
+
 
 cdef class FileFragment(Fragment):
     """A Fragment representing a data file."""
@@ -2883,6 +2905,18 @@ cdef class Scanner(_Weakrefable):
             result = self.scanner.TakeRows(deref(c_indices))
         return pyarrow_wrap_table(GetResultValue(result))
 
+    def head(self, int num_rows):
+        """Load the first N rows of the dataset.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        cdef CResult[shared_ptr[CTable]] result
+        with nogil:
+            result = self.scanner.Head(num_rows)
+        return pyarrow_wrap_table(GetResultValue(result))
+
 
 def _get_partition_keys(Expression partition_expression):
     """
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 4da29783b20..16f6c5c0183 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -100,6 +100,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CResult[CTaggedRecordBatchIterator] ScanBatches()
         CResult[shared_ptr[CTable]] ToTable()
         CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
+        CResult[shared_ptr[CTable]] Head(int64_t num_rows)
         CResult[CFragmentIterator] GetFragments()
         const shared_ptr[CScanOptions]& options()
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 26c14e14822..6ca6b095936 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -371,6 +371,28 @@ def test_scanner(dataset):
         scanner.take(pa.array([table.num_rows]))
 
 
+def test_head(dataset):
+    result = dataset.head(0)
+    assert result == pa.Table.from_batches([], schema=dataset.schema)
+
+    result = dataset.head(1, columns=['i64']).to_pydict()
+    assert result == {'i64': [0]}
+
+    result = dataset.head(2, columns=['i64'],
+                          filter=ds.field('i64') > 1).to_pydict()
+    assert result == {'i64': [2, 3]}
+
+    result = dataset.head(1024, columns=['i64']).to_pydict()
+    assert result == {'i64': list(range(5)) * 2}
+
+    fragment = next(dataset.get_fragments())
+    result = fragment.head(1, columns=['i64']).to_pydict()
+    assert result == {'i64': [0]}
+
+    result = fragment.head(1024, columns=['i64']).to_pydict()
+    assert result == {'i64': list(range(5))}
+
+
 def test_abstract_classes():
     classes = [
         ds.FileFormat,
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index af321d75db6..c7ef39b5b62 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -438,16 +438,7 @@ cpp11::list dataset___Scanner__ScanBatches(const std::shared_ptr<ds::Scanner>& s
 std::shared_ptr<arrow::Table> dataset___Scanner__head(
     const std::shared_ptr<ds::Scanner>& scanner, int n) {
   // TODO: make this a full Slice with offset > 0
-  auto it = ValueOrStop(scanner->ScanBatches());
-  std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
-  while (true) {
-    auto current_batch = ValueOrStop(it.Next());
-    if (arrow::IsIterationEnd(current_batch)) break;
-    batches.push_back(current_batch.record_batch->Slice(0, n));
-    n -= current_batch.record_batch->num_rows();
-    if (n < 0) break;
-  }
-  return ValueOrStop(arrow::Table::FromRecordBatches(std::move(batches)));
+  return ValueOrStop(scanner->Head(n));
 }
 
 // TODO (ARROW-11782) Remove calls to Scan()

From 22bebf8278cbbed08f82f1ec664ed4e4577cd175 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Thu, 15 Apr 2021 17:23:46 +0200
Subject: [PATCH 058/719] ARROW-11568: [C++][Compute] Rewrite mode kernel

Arrow mode kernel performance is bad compared with scipy.stats.mode
(based on numpy.unique). Arrow mode kernel stores value:count pair in
a map, while numpy.unique sorts the input array then count the adjacent
same values. Per my test, the map approach only wins when there are
many duplicated values (length / value_range > 100), looks not very
useful in practice.

This patch rewrites mode kernel to use the sort and count approach for
floating points and integers with wide value range. 2x performance
improvement is observed.

Closes #10009 from cyb70289/11568-mode-optimize

Lead-authored-by: Yibo Cai <yibo.cai@arm.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/aggregate_benchmark.cc    |  40 +-
 .../arrow/compute/kernels/aggregate_mode.cc   | 560 +++++++++---------
 .../compute/kernels/aggregate_quantile.cc     |  67 +--
 .../arrow/compute/kernels/aggregate_test.cc   |   2 +-
 cpp/src/arrow/compute/kernels/util_internal.h |  90 +++
 cpp/src/arrow/compute/kernels/vector_sort.cc  |  13 +-
 cpp/src/arrow/util/bit_run_reader.h           |  16 +-
 7 files changed, 402 insertions(+), 386 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
index 42be0c36544..39cfeb039a8 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_benchmark.cc
@@ -501,21 +501,31 @@ SUM_KERNEL_BENCHMARK(SumKernelInt64, Int64Type);
 //
 
 template <typename ArrowType>
-void ModeKernelBench(benchmark::State& state) {
+void ModeKernel(benchmark::State& state, int min, int max) {
   using CType = typename TypeTraits<ArrowType>::CType;
 
   RegressionArgs args(state);
   const int64_t array_size = args.size / sizeof(CType);
   auto rand = random::RandomArrayGenerator(1924);
-  auto array = rand.Numeric<ArrowType>(array_size, -100, 100, args.null_proportion);
+  auto array = rand.Numeric<ArrowType>(array_size, min, max, args.null_proportion);
 
   for (auto _ : state) {
     ABORT_NOT_OK(Mode(array).status());
   }
 }
 
+template <typename ArrowType>
+void ModeKernelNarrow(benchmark::State& state) {
+  ModeKernel<ArrowType>(state, -5000, 8000);  // max - min < 16384
+}
+
+template <>
+void ModeKernelNarrow<Int8Type>(benchmark::State& state) {
+  ModeKernel<Int8Type>(state, -128, 127);
+}
+
 template <>
-void ModeKernelBench<BooleanType>(benchmark::State& state) {
+void ModeKernelNarrow<BooleanType>(benchmark::State& state) {
   RegressionArgs args(state);
   auto rand = random::RandomArrayGenerator(1924);
   auto array = rand.Boolean(args.size * 8, 0.5, args.null_proportion);
@@ -525,19 +535,23 @@ void ModeKernelBench<BooleanType>(benchmark::State& state) {
   }
 }
 
-static void ModeKernelBenchArgs(benchmark::internal::Benchmark* bench) {
-  BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
+template <typename ArrowType>
+void ModeKernelWide(benchmark::State& state) {
+  ModeKernel<ArrowType>(state, -1234567, 7654321);
 }
 
-#define MODE_KERNEL_BENCHMARK(FuncName, Type)                                     \
-  static void FuncName(benchmark::State& state) { ModeKernelBench<Type>(state); } \
-  BENCHMARK(FuncName)->Apply(ModeKernelBenchArgs)
+static void ModeKernelArgs(benchmark::internal::Benchmark* bench) {
+  BenchmarkSetArgsWithSizes(bench, {1 * 1024 * 1024});  // 1M
+}
 
-MODE_KERNEL_BENCHMARK(ModeKernelBoolean, BooleanType);
-MODE_KERNEL_BENCHMARK(ModeKernelInt8, Int8Type);
-MODE_KERNEL_BENCHMARK(ModeKernelInt16, Int16Type);
-MODE_KERNEL_BENCHMARK(ModeKernelInt32, Int32Type);
-MODE_KERNEL_BENCHMARK(ModeKernelInt64, Int64Type);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, BooleanType)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, Int8Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, Int32Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelNarrow, Int64Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, Int32Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, Int64Type)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, FloatType)->Apply(ModeKernelArgs);
+BENCHMARK_TEMPLATE(ModeKernelWide, DoubleType)->Apply(ModeKernelArgs);
 
 //
 // MinMax
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 3a60cab2cca..7ac0dd3c707 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -17,13 +17,15 @@
 
 #include <cmath>
 #include <queue>
-#include <unordered_map>
+#include <utility>
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/stl_allocator.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
@@ -31,340 +33,312 @@ namespace internal {
 
 namespace {
 
+using ModeState = OptionsWrapper<ModeOptions>;
+
 constexpr char kModeFieldName[] = "mode";
 constexpr char kCountFieldName[] = "count";
 
-// {value:count} map
-template <typename CType>
-using CounterMap = std::unordered_map<CType, int64_t>;
-
-// map based counter for floating points
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<std::is_floating_point<CType>::value, CounterMap<CType>> CountValuesByMap(
-    const ArrayType& array, int64_t& nan_count) {
-  CounterMap<CType> value_counts_map;
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  nan_count = 0;
-  if (array.length() > array.null_count()) {
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             const auto value = values[pos + i];
-                                             if (std::isnan(value)) {
-                                               ++nan_count;
-                                             } else {
-                                               ++value_counts_map[value];
-                                             }
-                                           }
-                                         });
-  }
+constexpr uint64_t kCountEOF = ~0ULL;
 
-  return value_counts_map;
-}
+template <typename InType, typename CType = typename InType::c_type>
+Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
+                                                  Datum* out) {
+  const auto& mode_type = TypeTraits<InType>::type_singleton();
+  const auto& count_type = int64();
 
-// map base counter for non floating points
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<!std::is_floating_point<CType>::value, CounterMap<CType>> CountValuesByMap(
-    const ArrayType& array) {
-  CounterMap<CType> value_counts_map;
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  if (array.length() > array.null_count()) {
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             ++value_counts_map[values[pos + i]];
-                                           }
-                                         });
-  }
+  auto mode_data = ArrayData::Make(mode_type, /*length=*/n, /*null_count=*/0);
+  mode_data->buffers.resize(2, nullptr);
+  auto count_data = ArrayData::Make(count_type, n, 0);
+  count_data->buffers.resize(2, nullptr);
 
-  return value_counts_map;
-}
+  CType* mode_buffer = nullptr;
+  int64_t* count_buffer = nullptr;
 
-// vector based counter for int8 or integers with small value range
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-CounterMap<CType> CountValuesByVector(const ArrayType& array, CType min, CType max) {
-  const int range = static_cast<int>(max - min);
-  DCHECK(range >= 0 && range < 64 * 1024 * 1024);
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  std::vector<int64_t> value_counts_vector(range + 1);
-  if (array.length() > array.null_count()) {
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             ++value_counts_vector[values[pos + i] - min];
-                                           }
-                                         });
+  if (n > 0) {
+    ARROW_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx->Allocate(n * sizeof(CType)));
+    ARROW_ASSIGN_OR_RAISE(count_data->buffers[1], ctx->Allocate(n * sizeof(int64_t)));
+    mode_buffer = mode_data->template GetMutableValues<CType>(1);
+    count_buffer = count_data->template GetMutableValues<int64_t>(1);
   }
 
-  // Transfer value counts to a map to be consistent with other chunks
-  CounterMap<CType> value_counts_map(range + 1);
-  for (int i = 0; i <= range; ++i) {
-    CType value = static_cast<CType>(i + min);
-    int64_t count = value_counts_vector[i];
-    if (count) {
-      value_counts_map[value] = count;
-    }
-  }
+  const auto& out_type =
+      struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)});
+  *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0));
 
-  return value_counts_map;
+  return std::make_pair(mode_buffer, count_buffer);
 }
 
-// map or vector based counter for int16/32/64 per value range
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-CounterMap<CType> CountValuesByMapOrVector(const ArrayType& array) {
-  // see https://issues.apache.org/jira/browse/ARROW-9873
-  static constexpr int kMinArraySize = 8192 / sizeof(CType);
-  static constexpr int kMaxValueRange = 16384;
-  const ArrayData& data = *array.data();
-  const CType* values = data.GetValues<CType>(1);
-
-  if ((array.length() - array.null_count()) >= kMinArraySize) {
-    CType min = std::numeric_limits<CType>::max();
-    CType max = std::numeric_limits<CType>::min();
-
-    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                                         [&](int64_t pos, int64_t len) {
-                                           for (int64_t i = 0; i < len; ++i) {
-                                             const auto value = values[pos + i];
-                                             min = std::min(min, value);
-                                             max = std::max(max, value);
-                                           }
-                                         });
-
-    if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
-      return CountValuesByVector(array, min, max);
+// find top-n value:count pairs with minimal heap
+// suboptimal for tiny or large n, possibly okay as we're not in hot path
+template <typename InType, typename Generator>
+void Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
+  using CType = typename InType::c_type;
+
+  using ValueCountPair = std::pair<CType, uint64_t>;
+  auto gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) {
+    const bool rhs_is_nan = rhs.first != rhs.first;  // nan as largest value
+    return lhs.second > rhs.second ||
+           (lhs.second == rhs.second && (lhs.first < rhs.first || rhs_is_nan));
+  };
+
+  std::priority_queue<ValueCountPair, std::vector<ValueCountPair>, decltype(gt)> min_heap(
+      std::move(gt));
+
+  const ModeOptions& options = ModeState::Get(ctx);
+  while (true) {
+    const ValueCountPair& value_count = gen();
+    DCHECK_NE(value_count.second, 0);
+    if (value_count.second == kCountEOF) break;
+    if (static_cast<int64_t>(min_heap.size()) < options.n) {
+      min_heap.push(value_count);
+    } else if (gt(value_count, min_heap.top())) {
+      min_heap.pop();
+      min_heap.push(value_count);
     }
   }
-  return CountValuesByMap(array);
-}
+  const int64_t n = min_heap.size();
 
-// bool
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<is_boolean_type<typename ArrayType::TypeClass>::value, CounterMap<CType>>
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  // we need just count ones and zeros
-  CounterMap<CType> map;
-  if (array.length() > array.null_count()) {
-    map[true] = array.true_count();
-    map[false] = array.length() - array.null_count() - map[true];
-  }
-  nan_count = 0;
-  return map;
-}
+  CType* mode_buffer;
+  int64_t* count_buffer;
+  KERNEL_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer), ctx,
+                         PrepareOutput<InType>(n, ctx, out));
 
-// int8
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<is_integer_type<typename ArrayType::TypeClass>::value && sizeof(CType) == 1,
-            CounterMap<CType>>
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  using Limits = std::numeric_limits<CType>;
-  nan_count = 0;
-  return CountValuesByVector(array, Limits::min(), Limits::max());
+  for (int64_t i = n - 1; i >= 0; --i) {
+    std::tie(mode_buffer[i], count_buffer[i]) = min_heap.top();
+    min_heap.pop();
+  }
 }
 
-// int16/32/64
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<is_integer_type<typename ArrayType::TypeClass>::value && (sizeof(CType) > 1),
-            CounterMap<CType>>
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  nan_count = 0;
-  return CountValuesByMapOrVector(array);
-}
+// count value occurances for integers with narrow value range
+// O(1) space, O(n) time
+template <typename T>
+struct CountModer {
+  using CType = typename T::c_type;
 
-// float/double
-template <typename ArrayType, typename CType = typename ArrayType::TypeClass::c_type>
-enable_if_t<(std::is_floating_point<CType>::value), CounterMap<CType>>  // NOLINT format
-CountValues(const ArrayType& array, int64_t& nan_count) {
-  nan_count = 0;
-  return CountValuesByMap(array, nan_count);
-}
+  CType min;
+  std::vector<uint64_t> counts;
 
-template <typename ArrowType>
-struct ModeState {
-  using ThisType = ModeState<ArrowType>;
-  using CType = typename ArrowType::c_type;
-
-  void MergeFrom(ThisType&& state) {
-    if (this->value_counts.empty()) {
-      this->value_counts = std::move(state.value_counts);
-    } else {
-      for (const auto& value_count : state.value_counts) {
-        auto value = value_count.first;
-        auto count = value_count.second;
-        this->value_counts[value] += count;
-      }
-    }
-    if (is_floating_type<ArrowType>::value) {
-      this->nan_count += state.nan_count;
-    }
+  CountModer(CType min, CType max) {
+    uint32_t value_range = static_cast<uint32_t>(max - min) + 1;
+    DCHECK_LT(value_range, 1 << 20);
+    this->min = min;
+    this->counts.resize(value_range, 0);
   }
 
-  // find top-n value/count pairs with min-heap (priority queue with '>' comparator)
-  void Finalize(CType* modes, int64_t* counts, const int64_t n) {
-    DCHECK(n >= 1 && n <= this->DistinctValues());
-
-    // mode 'greater than' comparator: larger count or same count with smaller value
-    using ValueCountPair = std::pair<CType, int64_t>;
-    auto mode_gt = [](const ValueCountPair& lhs, const ValueCountPair& rhs) {
-      const bool rhs_is_nan = rhs.first != rhs.first;  // nan as largest value
-      return lhs.second > rhs.second ||
-             (lhs.second == rhs.second && (lhs.first < rhs.first || rhs_is_nan));
+  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // count values in all chunks, ignore nulls
+    const Datum& datum = batch[0];
+    CountValues<CType>(this->counts.data(), datum, this->min);
+
+    // generator to emit next value:count pair
+    int index = 0;
+    auto gen = [&]() {
+      for (; index < static_cast<int>(counts.size()); ++index) {
+        if (counts[index] != 0) {
+          auto value_count =
+              std::make_pair(static_cast<CType>(index + this->min), counts[index]);
+          ++index;
+          return value_count;
+        }
+      }
+      return std::pair<CType, uint64_t>(0, kCountEOF);
     };
 
-    // initialize min-heap with first n modes
-    std::vector<ValueCountPair> vector(n);
-    // push nan if exists
-    const bool has_nan = is_floating_type<ArrowType>::value && this->nan_count > 0;
-    if (has_nan) {
-      vector[0] = std::make_pair(static_cast<CType>(NAN), this->nan_count);
-    }
-    // push n or n-1 modes
-    auto it = this->value_counts.cbegin();
-    for (int i = has_nan; i < n; ++i) {
-      vector[i] = *it++;
-    }
-    // turn to min-heap
-    std::priority_queue<ValueCountPair, std::vector<ValueCountPair>, decltype(mode_gt)>
-        min_heap(std::move(mode_gt), std::move(vector));
-
-    // iterate and insert modes into min-heap
-    // - mode < heap top: ignore mode
-    // - mode > heap top: discard heap top, insert mode
-    for (; it != this->value_counts.cend(); ++it) {
-      if (mode_gt(*it, min_heap.top())) {
-        min_heap.pop();
-        min_heap.push(*it);
+    Finalize<T>(ctx, out, std::move(gen));
+  }
+};
+
+// booleans can be handled more straightforward
+template <>
+struct CountModer<BooleanType> {
+  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    int64_t counts[2]{};
+
+    const Datum& datum = batch[0];
+    for (const auto& array : datum.chunks()) {
+      if (array->length() > array->null_count()) {
+        const int64_t true_count =
+            arrow::internal::checked_pointer_cast<BooleanArray>(array)->true_count();
+        const int64_t false_count = array->length() - array->null_count() - true_count;
+        counts[true] += true_count;
+        counts[false] += false_count;
       }
     }
 
-    // pop modes from min-heap and insert into output array (in reverse order)
-    DCHECK_EQ(min_heap.size(), static_cast<size_t>(n));
-    for (int64_t i = n - 1; i >= 0; --i) {
-      std::tie(modes[i], counts[i]) = min_heap.top();
-      min_heap.pop();
+    const ModeOptions& options = ModeState::Get(ctx);
+    const int64_t distinct_values = (counts[0] != 0) + (counts[1] != 0);
+    const int64_t n = std::min(options.n, distinct_values);
+
+    bool* mode_buffer;
+    int64_t* count_buffer;
+    KERNEL_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer), ctx,
+                           PrepareOutput<BooleanType>(n, ctx, out));
+
+    if (n >= 1) {
+      const bool index = counts[1] > counts[0];
+      mode_buffer[0] = index;
+      count_buffer[0] = counts[index];
+      if (n == 2) {
+        mode_buffer[1] = !index;
+        count_buffer[1] = counts[!index];
+      }
     }
   }
-
-  int64_t DistinctValues() const {
-    return this->value_counts.size() +
-           (is_floating_type<ArrowType>::value && this->nan_count > 0);
-  }
-
-  int64_t nan_count = 0;  // only make sense to floating types
-  CounterMap<CType> value_counts;
 };
 
-template <typename ArrowType>
-struct ModeImpl : public ScalarAggregator {
-  using ThisType = ModeImpl<ArrowType>;
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using CType = typename ArrowType::c_type;
-
-  ModeImpl(const std::shared_ptr<DataType>& out_type, const ModeOptions& options)
-      : out_type(out_type), options(options) {}
-
-  void Consume(KernelContext*, const ExecBatch& batch) override {
-    ArrayType array(batch[0].array());
-    this->state.value_counts = CountValues(array, this->state.nan_count);
-  }
+// copy and sort approach for floating points or integers with wide value range
+// O(n) space, O(nlogn) time
+template <typename T>
+struct SortModer {
+  using CType = typename T::c_type;
+  using Allocator = arrow::stl::allocator<CType>;
+
+  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // copy all chunks to a buffer, ignore nulls and nans
+    std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
+
+    uint64_t nan_count = 0;
+    const Datum& datum = batch[0];
+    const int64_t in_length = datum.length() - datum.null_count();
+    if (in_length > 0) {
+      in_buffer.resize(in_length);
+      CopyNonNullValues<sizeof(CType)>(datum, in_buffer.data());
+
+      // drop nan
+      if (is_floating_type<T>::value) {
+        const auto& it = std::remove_if(in_buffer.begin(), in_buffer.end(),
+                                        [](CType v) { return v != v; });
+        nan_count = in_buffer.end() - it;
+        in_buffer.resize(it - in_buffer.begin());
+      }
+    }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
-    auto& other = checked_cast<ThisType&>(src);
-    this->state.MergeFrom(std::move(other.state));
-  }
+    // sort the input data to count same values
+    std::sort(in_buffer.begin(), in_buffer.end());
+
+    // generator to emit next value:count pair
+    auto it = in_buffer.cbegin();
+    auto gen = [&]() {
+      if (ARROW_PREDICT_FALSE(it == in_buffer.cend())) {
+        // handle NAN at last
+        if (nan_count > 0) {
+          auto value_count = std::make_pair(static_cast<CType>(NAN), nan_count);
+          nan_count = 0;
+          return value_count;
+        }
+        return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+      }
+      // count same values
+      const CType value = *it;
+      uint64_t count = 0;
+      do {
+        ++it;
+        ++count;
+      } while (it != in_buffer.cend() && *it == value);
+      return std::make_pair(value, count);
+    };
 
-  static std::shared_ptr<ArrayData> MakeArrayData(
-      const std::shared_ptr<DataType>& data_type, int64_t n) {
-    auto data = ArrayData::Make(data_type, n, 0);
-    data->buffers.resize(2);
-    data->buffers[0] = nullptr;
-    data->buffers[1] = nullptr;
-    return data;
+    Finalize<T>(ctx, out, std::move(gen));
   }
+};
 
-  void Finalize(KernelContext* ctx, Datum* out) override {
-    const auto& mode_type = TypeTraits<ArrowType>::type_singleton();
-    const auto& count_type = int64();
-    const auto& out_type =
-        struct_({field(kModeFieldName, mode_type), field(kCountFieldName, count_type)});
-
-    int64_t n = this->options.n;
-    if (n > state.DistinctValues()) {
-      n = state.DistinctValues();
-    } else if (n < 0) {
-      n = 0;
-    }
-
-    auto mode_data = this->MakeArrayData(mode_type, n);
-    auto count_data = this->MakeArrayData(count_type, n);
-    if (n > 0) {
-      KERNEL_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx,
-                             ctx->Allocate(n * sizeof(CType)));
-      KERNEL_ASSIGN_OR_RAISE(count_data->buffers[1], ctx,
-                             ctx->Allocate(n * sizeof(int64_t)));
-      CType* mode_buffer = mode_data->template GetMutableValues<CType>(1);
-      int64_t* count_buffer = count_data->template GetMutableValues<int64_t>(1);
-      this->state.Finalize(mode_buffer, count_buffer, n);
+// pick counting or sorting approach per integers value range
+template <typename T>
+struct CountOrSortModer {
+  using CType = typename T::c_type;
+
+  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cross point to benefit from counting approach
+    // about 2x improvement for int32/64 from micro-benchmarking
+    static constexpr int kMinArraySize = 8192;
+    static constexpr int kMaxValueRange = 32768;
+
+    const Datum& datum = batch[0];
+    if (datum.length() - datum.null_count() >= kMinArraySize) {
+      CType min, max;
+      std::tie(min, max) = GetMinMax<CType>(datum);
+
+      if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
+        CountModer<T>(min, max).Exec(ctx, batch, out);
+        return;
+      }
     }
 
-    *out = Datum(ArrayData::Make(out_type, n, {nullptr}, {mode_data, count_data}, 0));
+    SortModer<T>().Exec(ctx, batch, out);
   }
+};
 
-  std::shared_ptr<DataType> out_type;
-  ModeState<ArrowType> state;
-  ModeOptions options;
+template <typename InType, typename Enable = void>
+struct Moder;
+
+template <>
+struct Moder<Int8Type> {
+  CountModer<Int8Type> impl;
+  Moder() : impl(-128, 127) {}
 };
 
-struct ModeInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  const std::shared_ptr<DataType>& out_type;
-  const ModeOptions& options;
+template <>
+struct Moder<UInt8Type> {
+  CountModer<UInt8Type> impl;
+  Moder() : impl(0, 255) {}
+};
 
-  ModeInitState(KernelContext* ctx, const DataType& in_type,
-                const std::shared_ptr<DataType>& out_type, const ModeOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+template <>
+struct Moder<BooleanType> {
+  CountModer<BooleanType> impl;
+};
 
-  Status Visit(const DataType&) { return Status::NotImplemented("No mode implemented"); }
+template <typename InType>
+struct Moder<InType, enable_if_t<(is_integer_type<InType>::value &&
+                                  (sizeof(typename InType::c_type) > 1))>> {
+  CountOrSortModer<InType> impl;
+};
 
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No mode implemented");
-  }
+template <typename InType>
+struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> {
+  SortModer<InType> impl;
+};
 
-  template <typename Type>
-  enable_if_t<is_number_type<Type>::value || is_boolean_type<Type>::value, Status> Visit(
-      const Type&) {
-    state.reset(new ModeImpl<Type>(out_type, options));
-    return Status::OK();
-  }
+template <typename _, typename InType>
+struct ModeExecutor {
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (ctx->state() == nullptr) {
+      ctx->SetStatus(Status::Invalid("Mode requires ModeOptions"));
+      return;
+    }
+    const ModeOptions& options = ModeState::Get(ctx);
+    if (options.n <= 0) {
+      ctx->SetStatus(Status::Invalid("ModeOption::n must be strictly positive"));
+      return;
+    }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
-    return std::move(state);
+    Moder<InType>().impl.Exec(ctx, batch, out);
   }
 };
 
-std::unique_ptr<KernelState> ModeInit(KernelContext* ctx, const KernelInitArgs& args) {
-  ModeInitState visitor(ctx, *args.inputs[0].type,
-                        args.kernel->signature->out_type().type(),
-                        static_cast<const ModeOptions&>(*args.options));
-  return visitor.Create();
+VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type) {
+  VectorKernel kernel;
+  kernel.init = ModeState::Init;
+  kernel.can_execute_chunkwise = false;
+  kernel.output_chunked = false;
+  auto out_type =
+      struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
+  kernel.signature =
+      KernelSignature::Make({InputType::Array(in_type)}, ValueDescr::Array(out_type));
+  return kernel;
+}
+
+void AddBooleanModeKernel(VectorFunction* func) {
+  VectorKernel kernel = NewModeKernel(boolean());
+  kernel.exec = ModeExecutor<StructType, BooleanType>::Exec;
+  DCHECK_OK(func->AddKernel(kernel));
 }
 
-void AddModeKernels(KernelInit init, const std::vector<std::shared_ptr<DataType>>& types,
-                    ScalarAggregateFunction* func) {
-  for (const auto& ty : types) {
-    // array[T] -> array[struct<mode: T, count: int64_t>]
-    auto out_ty = struct_({field(kModeFieldName, ty), field(kCountFieldName, int64())});
-    auto sig = KernelSignature::Make({InputType::Array(ty)}, ValueDescr::Array(out_ty));
-    AddAggKernel(std::move(sig), init, func);
+void AddNumericModeKernels(VectorFunction* func) {
+  for (const auto& type : NumericTypes()) {
+    VectorKernel kernel = NewModeKernel(type);
+    kernel.exec = GenerateNumeric<ModeExecutor, StructType>(*type);
+    DCHECK_OK(func->AddKernel(kernel));
   }
 }
 
@@ -379,19 +353,15 @@ const FunctionDoc mode_doc{
     {"array"},
     "ModeOptions"};
 
-std::shared_ptr<ScalarAggregateFunction> AddModeAggKernels() {
-  static auto default_mode_options = ModeOptions::Defaults();
-  auto func = std::make_shared<ScalarAggregateFunction>("mode", Arity::Unary(), &mode_doc,
-                                                        &default_mode_options);
-  AddModeKernels(ModeInit, {boolean()}, func.get());
-  AddModeKernels(ModeInit, NumericTypes(), func.get());
-  return func;
-}
-
 }  // namespace
 
 void RegisterScalarAggregateMode(FunctionRegistry* registry) {
-  DCHECK_OK(registry->AddFunction(AddModeAggKernels()));
+  static auto default_options = ModeOptions::Defaults();
+  auto func = std::make_shared<VectorFunction>("mode", Arity::Unary(), &mode_doc,
+                                               &default_options);
+  AddBooleanModeKernel(func.get());
+  AddNumericModeKernels(func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 705ecd4f9d5..f0de1be2793 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -20,8 +20,8 @@
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/stl_allocator.h"
-#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
@@ -29,9 +29,6 @@ namespace internal {
 
 namespace {
 
-using arrow::internal::checked_pointer_cast;
-using arrow::internal::VisitSetBitRunsVoid;
-
 using QuantileState = internal::OptionsWrapper<QuantileOptions>;
 
 // output is at some input data point, not interpolated
@@ -90,12 +87,7 @@ struct SortQuantiler {
     const int64_t in_length = datum.length() - datum.null_count();
     if (in_length > 0) {
       in_buffer.resize(in_length);
-
-      int64_t index = 0;
-      for (const auto& array : datum.chunks()) {
-        index += CopyArray(in_buffer.data() + index, *array);
-      }
-      DCHECK_EQ(index, in_length);
+      CopyNonNullValues<sizeof(CType)>(datum, in_buffer.data());
 
       // drop nan
       if (is_floating_type<InType>::value) {
@@ -119,9 +111,8 @@ struct SortQuantiler {
 
     // calculate quantiles
     if (out_length > 0) {
-      const auto out_bit_width = checked_pointer_cast<NumberType>(out_type)->bit_width();
       KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * out_bit_width / 8));
+                             ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
 
       // find quantiles in descending order
       std::vector<int64_t> q_indices(out_length);
@@ -154,22 +145,6 @@ struct SortQuantiler {
     *out = Datum(std::move(out_data));
   }
 
-  int64_t CopyArray(CType* buffer, const Array& array) {
-    const int64_t n = array.length() - array.null_count();
-    if (n > 0) {
-      int64_t index = 0;
-      const ArrayData& data = *array.data();
-      const CType* values = data.GetValues<CType>(1);
-      VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                          [&](int64_t pos, int64_t len) {
-                            memcpy(buffer + index, values + pos, len * sizeof(CType));
-                            index += len;
-                          });
-      DCHECK_EQ(index, n);
-    }
-    return n;
-  }
-
   // return quantile located exactly at some input data point
   CType GetQuantileAtDataPoint(std::vector<CType, Allocator>& in, uint64_t* last_index,
                                double q,
@@ -248,7 +223,7 @@ struct CountQuantiler {
     uint32_t value_range = static_cast<uint32_t>(max - min) + 1;
     DCHECK_LT(value_range, 1 << 30);
     this->min = min;
-    this->counts.resize(value_range);
+    this->counts.resize(value_range, 0);
   }
 
   void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
@@ -256,20 +231,7 @@ struct CountQuantiler {
 
     // count values in all chunks, ignore nulls
     const Datum& datum = batch[0];
-    const int64_t in_length = datum.length() - datum.null_count();
-    if (in_length > 0) {
-      for (auto& c : this->counts) c = 0;
-      for (const auto& array : datum.chunks()) {
-        const ArrayData& data = *array->data();
-        const CType* values = data.GetValues<CType>(1);
-        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                            [&](int64_t pos, int64_t len) {
-                              for (int64_t i = 0; i < len; ++i) {
-                                ++this->counts[values[pos + i] - this->min];
-                              }
-                            });
-      }
-    }
+    int64_t in_length = CountValues<CType>(this->counts.data(), datum, this->min);
 
     // prepare out array
     int64_t out_length = options.q.size();
@@ -285,9 +247,8 @@ struct CountQuantiler {
 
     // calculate quantiles
     if (out_length > 0) {
-      const auto out_bit_width = checked_pointer_cast<NumberType>(out_type)->bit_width();
       KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * out_bit_width / 8));
+                             ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
 
       // find quantiles in ascending order
       std::vector<int64_t> q_indices(out_length);
@@ -388,20 +349,8 @@ struct CountOrSortQuantiler {
 
     const Datum& datum = batch[0];
     if (datum.length() - datum.null_count() >= kMinArraySize) {
-      CType min = std::numeric_limits<CType>::max();
-      CType max = std::numeric_limits<CType>::min();
-
-      for (const auto& array : datum.chunks()) {
-        const ArrayData& data = *array->data();
-        const CType* values = data.GetValues<CType>(1);
-        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                            [&](int64_t pos, int64_t len) {
-                              for (int64_t i = 0; i < len; ++i) {
-                                min = std::min(min, values[pos + i]);
-                                max = std::max(max, values[pos + i]);
-                              }
-                            });
-      }
+      CType min, max;
+      std::tie(min, max) = GetMinMax<CType>(datum);
 
       if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
         CountQuantiler<InType>(min, max).Exec(ctx, batch, out);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 22e7f512e97..ad7e391495e 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -1078,7 +1078,7 @@ TEST_F(TestInt32ModeKernel, SmallValueRange) {
 }
 
 TEST_F(TestInt32ModeKernel, LargeValueRange) {
-  // Large value range => should exercise hashmap-based Mode implementation
+  // Large value range => should exercise sorter-based Mode implementation
   CheckModeWithRange<ArrowType>(-10000000, 10000000);
 }
 
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h b/cpp/src/arrow/compute/kernels/util_internal.h
index f614439ffb8..326de2f56f5 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -18,11 +18,13 @@
 #pragma once
 
 #include <cstdint>
+#include <utility>
 
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/type_fwd.h"
+#include "arrow/util/bit_run_reader.h"
 
 namespace arrow {
 namespace compute {
@@ -62,6 +64,94 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr);
 ArrayKernelExec TrivialScalarUnaryAsArraysExec(
     ArrayKernelExec exec, NullHandling::type null_handling = NullHandling::INTERSECTION);
 
+// Return (min, max) of a numerical array, ignore nulls.
+// For empty array, return the maximal number limit as 'min', and minimal limit as 'max'.
+template <typename T>
+ARROW_NOINLINE std::pair<T, T> GetMinMax(const ArrayData& data) {
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::lowest();
+
+  const T* values = data.GetValues<T>(1);
+  arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                                       [&](int64_t pos, int64_t len) {
+                                         for (int64_t i = 0; i < len; ++i) {
+                                           min = std::min(min, values[pos + i]);
+                                           max = std::max(max, values[pos + i]);
+                                         }
+                                       });
+
+  return std::make_pair(min, max);
+}
+
+template <typename T>
+std::pair<T, T> GetMinMax(const Datum& datum) {
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::lowest();
+
+  for (const auto& array : datum.chunks()) {
+    T local_min, local_max;
+    std::tie(local_min, local_max) = GetMinMax<T>(*array->data());
+    min = std::min(min, local_min);
+    max = std::max(max, local_max);
+  }
+
+  return std::make_pair(min, max);
+}
+
+// Count value occurrences of an array, ignore nulls.
+// 'counts' must be zeroed and with enough size.
+template <typename T>
+ARROW_NOINLINE int64_t CountValues(uint64_t* counts, const ArrayData& data, T min) {
+  const int64_t n = data.length - data.GetNullCount();
+  if (n > 0) {
+    const T* values = data.GetValues<T>(1);
+    arrow::internal::VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                                         [&](int64_t pos, int64_t len) {
+                                           for (int64_t i = 0; i < len; ++i) {
+                                             ++counts[values[pos + i] - min];
+                                           }
+                                         });
+  }
+  return n;
+}
+
+template <typename T>
+int64_t CountValues(uint64_t* counts, const Datum& datum, T min) {
+  int64_t n = 0;
+  for (const auto& array : datum.chunks()) {
+    n += CountValues<T>(counts, *array->data(), min);
+  }
+  return n;
+}
+
+// Copy numerical array values to a buffer, ignore nulls.
+template <size_t SizeOfCType>
+ARROW_NOINLINE int64_t CopyNonNullValues(const ArrayData& data, void* out) {
+  uint8_t* u8_buffer = reinterpret_cast<uint8_t*>(out);
+  const int64_t n = data.length - data.GetNullCount();
+  if (n > 0) {
+    int64_t index = 0;
+    const uint8_t* u8_values = data.GetValues<uint8_t>(1);
+    arrow::internal::VisitSetBitRunsVoid(
+        data.buffers[0], data.offset, data.length, [&](int64_t pos, int64_t len) {
+          memcpy(u8_buffer + index * SizeOfCType, u8_values + pos * SizeOfCType,
+                 len * SizeOfCType);
+          index += len;
+        });
+  }
+  return n;
+}
+
+template <size_t SizeOfCType>
+int64_t CopyNonNullValues(const Datum& datum, void* out) {
+  uint8_t* u8_buffer = reinterpret_cast<uint8_t*>(out);
+  int64_t n = 0;
+  for (const auto& array : datum.chunks()) {
+    n += CopyNonNullValues<SizeOfCType>(*array->data(), u8_buffer + n * SizeOfCType);
+  }
+  return n;
+}
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index a29c9311d86..8593613c8f5 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -25,6 +25,7 @@
 #include "arrow/array/data.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/table.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
@@ -492,16 +493,8 @@ class ArrayCountOrCompareSorter {
   uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& values,
                  int64_t offset, const ArraySortOptions& options) {
     if (values.length() >= countsort_min_len_ && values.length() > values.null_count()) {
-      c_type min{std::numeric_limits<c_type>::max()};
-      c_type max{std::numeric_limits<c_type>::min()};
-
-      VisitRawValuesInline(
-          values,
-          [&](c_type v) {
-            min = std::min(min, v);
-            max = std::max(max, v);
-          },
-          []() {});
+      c_type min, max;
+      std::tie(min, max) = GetMinMax<c_type>(*values.data());
 
       // For signed int32/64, (max - min) may overflow and trigger UBSAN.
       // Cast to largest unsigned type(uint64_t) before subtraction.
diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h
index 5933ccf3b9a..3e196628477 100644
--- a/cpp/src/arrow/util/bit_run_reader.h
+++ b/cpp/src/arrow/util/bit_run_reader.h
@@ -462,8 +462,8 @@ using ReverseSetBitRunReader = BaseSetBitRunReader</*Reverse=*/true>;
 // - don't inline SetBitRunReader constructor, it doesn't hurt performance
 // - un-inline NextRun hurts 'many null' cases a bit, but improves normal cases
 template <typename Visit>
-Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
-                       Visit&& visit) {
+inline Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
+                              Visit&& visit) {
   if (bitmap == NULLPTR) {
     // Assuming all set (as in a null bitmap)
     return visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
@@ -480,8 +480,8 @@ Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
 }
 
 template <typename Visit>
-void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
-                         Visit&& visit) {
+inline void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
+                                Visit&& visit) {
   if (bitmap == NULLPTR) {
     // Assuming all set (as in a null bitmap)
     visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
@@ -498,15 +498,15 @@ void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
 }
 
 template <typename Visit>
-Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
-                       int64_t length, Visit&& visit) {
+inline Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                              int64_t length, Visit&& visit) {
   return VisitSetBitRuns(bitmap ? bitmap->data() : NULLPTR, offset, length,
                          std::forward<Visit>(visit));
 }
 
 template <typename Visit>
-void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
-                         int64_t length, Visit&& visit) {
+inline void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                                int64_t length, Visit&& visit) {
   VisitSetBitRunsVoid(bitmap ? bitmap->data() : NULLPTR, offset, length,
                       std::forward<Visit>(visit));
 }

From d11aaf4520f3c8beb1cecad0a34f1b01692607af Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 15 Apr 2021 17:33:25 +0200
Subject: [PATCH 059/719] ARROW-12383: [JS] Upgrade dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10028 from domoritz/updates

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 js/gulp/closure-task.js |   4 +-
 js/package.json         |  29 ++-
 js/test/Arrow.ts        |   2 +-
 js/yarn.lock            | 499 ++++++++++++++++++----------------------
 4 files changed, 241 insertions(+), 293 deletions(-)

diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index f525cd25cd0..d0ecb12d874 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -77,7 +77,9 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
                 `${src}/**/*.js` /* <-- then source globs */
             ], { base: `./` }),
             sourcemaps.init(),
-            closureCompiler(createClosureArgs(entry_point, externs)),
+            closureCompiler(createClosureArgs(entry_point, externs), {
+                platform: ['native', 'java', 'javascript']
+            }),
             // rename the sourcemaps from *.js.map files to *.min.js.map
             sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }),
             gulp.dest(out)
diff --git a/js/package.json b/js/package.json
index 4cbdffd085a..880a5b9c11c 100644
--- a/js/package.json
+++ b/js/package.json
@@ -53,22 +53,22 @@
     "jest.coverage.config.js"
   ],
   "dependencies": {
-    "@types/flatbuffers": "^1.9.1",
-    "@types/node": "^12.0.4",
+    "@types/flatbuffers": "^1.10.0",
+    "@types/node": "^14.14.37",
     "@types/text-encoding-utf-8": "^1.0.1",
-    "command-line-args": "5.0.2",
-    "command-line-usage": "5.0.5",
+    "command-line-args": "5.1.1",
+    "command-line-usage": "6.1.1",
     "flatbuffers": "1.12.0",
     "json-bignum": "^0.0.3",
     "pad-left": "^2.1.0",
     "text-encoding-utf-8": "^1.0.2",
-    "tslib": "^1.12.0"
+    "tslib": "^2.2.0"
   },
   "devDependencies": {
     "@types/glob": "7.1.1",
-    "@types/jest": "25.2.2",
-    "@typescript-eslint/eslint-plugin": "^4.21.0",
-    "@typescript-eslint/parser": "^4.21.0",
+    "@types/jest": "26.0.22",
+    "@typescript-eslint/eslint-plugin": "^4.22.0",
+    "@typescript-eslint/parser": "^4.22.0",
     "async-done": "1.3.1",
     "benchmark": "2.1.4",
     "cpy": "^8.1.2",
@@ -77,16 +77,15 @@
     "eslint-plugin-jest": "^24.3.5",
     "esm": "3.2.25",
     "glob": "7.1.4",
-    "google-closure-compiler": "20200830.0.0",
+    "google-closure-compiler": "20210406.0.0",
     "gulp": "4.0.2",
     "gulp-json-transform": "0.4.6",
     "gulp-rename": "1.4.0",
     "gulp-sourcemaps": "2.6.5",
     "gulp-typescript": "5.0.1",
     "ix": "2.5.3",
-    "jest": "26.3.0",
+    "jest": "26.6.3",
     "jest-silent-reporter": "0.1.2",
-    "json": "9.0.6",
     "lerna": "3.22.1",
     "memfs": "2.15.2",
     "mkdirp": "1.0.4",
@@ -96,12 +95,12 @@
     "rxjs": "5.5.11",
     "source-map-loader": "0.2.4",
     "terser-webpack-plugin": "4.2.2",
-    "ts-jest": "26.3.0",
-    "ts-node": "9.0.0",
-    "typedoc": "0.20.19",
+    "ts-jest": "26.5.4",
+    "ts-node": "9.1.1",
+    "typedoc": "0.20.35",
     "typescript": "4.0.2",
     "web-stream-tools": "0.0.1",
-    "web-streams-polyfill": "2.0.3",
+    "web-streams-polyfill": "3.0.3",
     "webpack": "4.29.0",
     "xml2js": "0.4.19"
   },
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index 43c8c167bd7..f70cb29db05 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -17,7 +17,7 @@
 
 // Dynamically load an Arrow target build based on command line arguments
 
-import 'web-streams-polyfill';
+import 'web-streams-polyfill/es6';
 
 // import this before assigning window global since it does a `typeof window` check
 require('web-stream-tools');
diff --git a/js/yarn.lock b/js/yarn.lock
index f889f9a5f62..a2eb4484b22 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -448,7 +448,7 @@
     jest-util "^26.6.2"
     slash "^3.0.0"
 
-"@jest/core@^26.3.0", "@jest/core@^26.6.3":
+"@jest/core@^26.6.3":
   version "26.6.3"
   resolved "https://registry.yarnpkg.com/@jest/core/-/core-26.6.3.tgz#7639fcb3833d748a4656ada54bde193051e45fad"
   integrity sha512-xvV1kKbhfUqFVuZ8Cyo+JPpipAHHAV3kcDBftiduK8EICXmTFddryy3P7NfZt8Pv37rA9nEJBKCCkglCPt/Xjw==
@@ -632,16 +632,6 @@
     "@types/istanbul-reports" "^1.1.1"
     "@types/yargs" "^13.0.0"
 
-"@jest/types@^25.5.0":
-  version "25.5.0"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-25.5.0.tgz#4d6a4793f7b9599fc3680877b856a97dbccf2a9d"
-  integrity sha512-OXD0RgQ86Tu3MazKo8bnrkDRaDXXMGUqd+kTtLtK1Zb7CRzQcaSRPPPV37SvYTdevXEBVxe0HXylEjs8ibkmCw==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    "@types/istanbul-reports" "^1.1.1"
-    "@types/yargs" "^15.0.0"
-    chalk "^3.0.0"
-
 "@jest/types@^26.6.2":
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/@jest/types/-/types-26.6.2.tgz#bef5a532030e1d88a2f5a6d933f84e97226ed48e"
@@ -1546,7 +1536,7 @@
   resolved "https://registry.yarnpkg.com/@types/events/-/events-3.0.0.tgz#2862f3f58a9a7f7c3e78d79f130dd4d71c25c2a7"
   integrity sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==
 
-"@types/flatbuffers@^1.9.1":
+"@types/flatbuffers@^1.10.0":
   version "1.10.0"
   resolved "https://registry.yarnpkg.com/@types/flatbuffers/-/flatbuffers-1.10.0.tgz#aa74e30ffdc86445f2f060e1808fc9d56b5603ba"
   integrity sha512-7btbphLrKvo5yl/5CC2OCxUSMx1wV1wvGT1qDXkSt7yi00/YW7E8k6qzXqJHsp+WU0eoG7r6MTQQXI9lIvd0qA==
@@ -1602,15 +1592,7 @@
   dependencies:
     "@types/istanbul-lib-report" "*"
 
-"@types/jest@25.2.2":
-  version "25.2.2"
-  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-25.2.2.tgz#6a752e7a00f69c3e790ea00c345029d5cefa92bf"
-  integrity sha512-aRctFbG8Pb7DSLzUt/fEtL3q/GKb9mretFuYhRub2J0q6NhzBYbx9HTQzHrWgBNIxYOlxGNVe6Z54cpbUt+Few==
-  dependencies:
-    jest-diff "^25.2.1"
-    pretty-format "^25.2.1"
-
-"@types/jest@26.x":
+"@types/jest@26.0.22":
   version "26.0.22"
   resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.22.tgz#8308a1debdf1b807aa47be2838acdcd91e88fbe6"
   integrity sha512-eeWwWjlqxvBxc4oQdkueW5OF/gtfSceKk4OnOAGlUSwS/liBRtZppbJuz1YkgbrbfGOoeBHun9fOvXnjNwrSOw==
@@ -1633,7 +1615,7 @@
   resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
   integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
 
-"@types/node@*", "@types/node@>= 8":
+"@types/node@*", "@types/node@>= 8", "@types/node@^14.14.37":
   version "14.14.37"
   resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.37.tgz#a3dd8da4eb84a996c36e331df98d82abd76b516e"
   integrity sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==
@@ -1643,11 +1625,6 @@
   resolved "https://registry.yarnpkg.com/@types/node/-/node-11.15.50.tgz#a8c76622a20320d4a04adf2002b04737c510ef11"
   integrity sha512-kG/ZmA/uD1L1gVD7vVXQB6v+ICZlJgvakrodHiltT3Zq0YjXq5H9tfgop8MsdMGCwrcLJg9QCQDRP4DZsn9T/g==
 
-"@types/node@^12.0.4":
-  version "12.20.7"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.7.tgz#1cb61fd0c85cb87e728c43107b5fd82b69bc9ef8"
-  integrity sha512-gWL8VUkg8VRaCAUgG9WmhefMqHmMblxe2rVpMF86nZY/+ZysU+BkAp+3cz03AixWDSSz0ks5WX59yAhv/cDwFA==
-
 "@types/normalize-package-data@^2.4.0":
   version "2.4.0"
   resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
@@ -1692,13 +1669,13 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@^4.21.0":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.21.0.tgz#3fce2bfa76d95c00ac4f33dff369cb593aab8878"
-  integrity sha512-FPUyCPKZbVGexmbCFI3EQHzCZdy2/5f+jv6k2EDljGdXSRc0cKvbndd2nHZkSLqCNOPk0jB6lGzwIkglXcYVsQ==
+"@typescript-eslint/eslint-plugin@^4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.22.0.tgz#3d5f29bb59e61a9dba1513d491b059e536e16dbc"
+  integrity sha512-U8SP9VOs275iDXaL08Ln1Fa/wLXfj5aTr/1c0t0j6CdbOnxh+TruXu1p4I0NAvdPBQgoPjHsgKn28mOi0FzfoA==
   dependencies:
-    "@typescript-eslint/experimental-utils" "4.21.0"
-    "@typescript-eslint/scope-manager" "4.21.0"
+    "@typescript-eslint/experimental-utils" "4.22.0"
+    "@typescript-eslint/scope-manager" "4.22.0"
     debug "^4.1.1"
     functional-red-black-tree "^1.0.1"
     lodash "^4.17.15"
@@ -1706,7 +1683,19 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
-"@typescript-eslint/experimental-utils@4.21.0", "@typescript-eslint/experimental-utils@^4.0.1":
+"@typescript-eslint/experimental-utils@4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.22.0.tgz#68765167cca531178e7b650a53456e6e0bef3b1f"
+  integrity sha512-xJXHHl6TuAxB5AWiVrGhvbGL8/hbiCQ8FiWwObO3r0fnvBdrbWEDy1hlvGQOAWc6qsCWuWMKdVWlLAEMpxnddg==
+  dependencies:
+    "@types/json-schema" "^7.0.3"
+    "@typescript-eslint/scope-manager" "4.22.0"
+    "@typescript-eslint/types" "4.22.0"
+    "@typescript-eslint/typescript-estree" "4.22.0"
+    eslint-scope "^5.0.0"
+    eslint-utils "^2.0.0"
+
+"@typescript-eslint/experimental-utils@^4.0.1":
   version "4.21.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.21.0.tgz#0b0bb7c15d379140a660c003bdbafa71ae9134b6"
   integrity sha512-cEbgosW/tUFvKmkg3cU7LBoZhvUs+ZPVM9alb25XvR0dal4qHL3SiUqHNrzoWSxaXA9gsifrYrS1xdDV6w/gIA==
@@ -1718,14 +1707,14 @@
     eslint-scope "^5.0.0"
     eslint-utils "^2.0.0"
 
-"@typescript-eslint/parser@^4.21.0":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.21.0.tgz#a227fc2af4001668c3e3f7415d4feee5093894c1"
-  integrity sha512-eyNf7QmE5O/l1smaQgN0Lj2M/1jOuNg2NrBm1dqqQN0sVngTLyw8tdCbih96ixlhbF1oINoN8fDCyEH9SjLeIA==
+"@typescript-eslint/parser@^4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.22.0.tgz#e1637327fcf796c641fe55f73530e90b16ac8fe8"
+  integrity sha512-z/bGdBJJZJN76nvAY9DkJANYgK3nlRstRRi74WHm3jjgf2I8AglrSY+6l7ogxOmn55YJ6oKZCLLy+6PW70z15Q==
   dependencies:
-    "@typescript-eslint/scope-manager" "4.21.0"
-    "@typescript-eslint/types" "4.21.0"
-    "@typescript-eslint/typescript-estree" "4.21.0"
+    "@typescript-eslint/scope-manager" "4.22.0"
+    "@typescript-eslint/types" "4.22.0"
+    "@typescript-eslint/typescript-estree" "4.22.0"
     debug "^4.1.1"
 
 "@typescript-eslint/scope-manager@4.21.0":
@@ -1736,11 +1725,24 @@
     "@typescript-eslint/types" "4.21.0"
     "@typescript-eslint/visitor-keys" "4.21.0"
 
+"@typescript-eslint/scope-manager@4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.22.0.tgz#ed411545e61161a8d702e703a4b7d96ec065b09a"
+  integrity sha512-OcCO7LTdk6ukawUM40wo61WdeoA7NM/zaoq1/2cs13M7GyiF+T4rxuA4xM+6LeHWjWbss7hkGXjFDRcKD4O04Q==
+  dependencies:
+    "@typescript-eslint/types" "4.22.0"
+    "@typescript-eslint/visitor-keys" "4.22.0"
+
 "@typescript-eslint/types@4.21.0":
   version "4.21.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.21.0.tgz#abdc3463bda5d31156984fa5bc316789c960edef"
   integrity sha512-+OQaupjGVVc8iXbt6M1oZMwyKQNehAfLYJJ3SdvnofK2qcjfor9pEM62rVjBknhowTkh+2HF+/KdRAc/wGBN2w==
 
+"@typescript-eslint/types@4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.22.0.tgz#0ca6fde5b68daf6dba133f30959cc0688c8dd0b6"
+  integrity sha512-sW/BiXmmyMqDPO2kpOhSy2Py5w6KvRRsKZnV0c4+0nr4GIcedJwXAq+RHNK4lLVEZAJYFltnnk1tJSlbeS9lYA==
+
 "@typescript-eslint/typescript-estree@4.21.0":
   version "4.21.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.21.0.tgz#3817bd91857beeaeff90f69f1f112ea58d350b0a"
@@ -1754,6 +1756,19 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
+"@typescript-eslint/typescript-estree@4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.22.0.tgz#b5d95d6d366ff3b72f5168c75775a3e46250d05c"
+  integrity sha512-TkIFeu5JEeSs5ze/4NID+PIcVjgoU3cUQUIZnH3Sb1cEn1lBo7StSV5bwPuJQuoxKXlzAObjYTilOEKRuhR5yg==
+  dependencies:
+    "@typescript-eslint/types" "4.22.0"
+    "@typescript-eslint/visitor-keys" "4.22.0"
+    debug "^4.1.1"
+    globby "^11.0.1"
+    is-glob "^4.0.1"
+    semver "^7.3.2"
+    tsutils "^3.17.1"
+
 "@typescript-eslint/visitor-keys@4.21.0":
   version "4.21.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.21.0.tgz#990a9acdc124331f5863c2cf21c88ba65233cd8d"
@@ -1762,6 +1777,14 @@
     "@typescript-eslint/types" "4.21.0"
     eslint-visitor-keys "^2.0.0"
 
+"@typescript-eslint/visitor-keys@4.22.0":
+  version "4.22.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.22.0.tgz#169dae26d3c122935da7528c839f42a8a42f6e47"
+  integrity sha512-nnMu4F+s4o0sll6cBSsTeVsT4cwxB7zECK3dFxzEjPBii9xLpq4yqqsy/FU5zMfan6G60DKZSCXAa3sHJZrcYw==
+  dependencies:
+    "@typescript-eslint/types" "4.22.0"
+    eslint-visitor-keys "^2.0.0"
+
 "@webassemblyjs/ast@1.7.11":
   version "1.7.11"
   resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.7.11.tgz#b988582cafbb2b095e8b556526f30c90d057cace"
@@ -2182,14 +2205,6 @@ argparse@^1.0.7:
   dependencies:
     sprintf-js "~1.0.2"
 
-argv-tools@^0.1.1:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/argv-tools/-/argv-tools-0.1.2.tgz#fc4918a70775b8cc5f8296fa0cfea137bd8a8229"
-  integrity sha512-wxqoymY0BEu9NblZVQiOTOAiJUjPhaa/kbNMjC2h6bnrmUSgnxKgWJo3lzXvi3bHJRwXyqK/dHzMlZVRT89Cxg==
-  dependencies:
-    array-back "^2.0.0"
-    find-replace "^2.0.1"
-
 arr-diff@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520"
@@ -2219,12 +2234,15 @@ arr-union@^3.1.0:
   resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4"
   integrity sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=
 
-array-back@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/array-back/-/array-back-2.0.0.tgz#6877471d51ecc9c9bfa6136fb6c7d5fe69748022"
-  integrity sha512-eJv4pLLufP3g5kcZry0j6WXpIbzYw9GUB4mVJZno9wfwiBxbizTnHCw3VJb07cBihbFX48Y7oSrW9y+gt4glyw==
-  dependencies:
-    typical "^2.6.1"
+array-back@^3.0.1:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/array-back/-/array-back-3.1.0.tgz#b8859d7a508871c9a7b2cf42f99428f65e96bfb0"
+  integrity sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==
+
+array-back@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/array-back/-/array-back-4.0.1.tgz#9b80312935a52062e1a233a9c7abeb5481b30e90"
+  integrity sha512-Z/JnaVEXv+A9xabHzN43FiiiWEE7gPCRXMrVmRm00tWbjZRul1iHm7ECzlyNq1p4a4ATXz+G9FJ3GqGOkOV3fg==
 
 array-differ@^2.0.3:
   version "2.1.0"
@@ -2926,14 +2944,6 @@ chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.1, chalk@^2.4.1, chalk@^2.4.2:
     escape-string-regexp "^1.0.5"
     supports-color "^5.3.0"
 
-chalk@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
-  integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
-  dependencies:
-    ansi-styles "^4.1.0"
-    supports-color "^7.1.0"
-
 chalk@^4.0.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
@@ -3197,26 +3207,25 @@ combined-stream@^1.0.6, combined-stream@~1.0.6:
   dependencies:
     delayed-stream "~1.0.0"
 
-command-line-args@5.0.2:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.0.2.tgz#c4e56b016636af1323cf485aa25c3cb203dfbbe4"
-  integrity sha512-/qPcbL8zpqg53x4rAaqMFlRV4opN3pbla7I7k9x8kyOBMQoGT6WltjN6sXZuxOXw6DgdK7Ad+ijYS5gjcr7vlA==
+command-line-args@5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.1.1.tgz#88e793e5bb3ceb30754a86863f0401ac92fd369a"
+  integrity sha512-hL/eG8lrll1Qy1ezvkant+trihbGnaKaeEjj6Scyr3DN+RC7iQ5Rz84IeLERfAWDGo0HBSNAakczwgCilDXnWg==
   dependencies:
-    argv-tools "^0.1.1"
-    array-back "^2.0.0"
-    find-replace "^2.0.1"
+    array-back "^3.0.1"
+    find-replace "^3.0.0"
     lodash.camelcase "^4.3.0"
-    typical "^2.6.1"
+    typical "^4.0.0"
 
-command-line-usage@5.0.5:
-  version "5.0.5"
-  resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-5.0.5.tgz#5f25933ffe6dedd983c635d38a21d7e623fda357"
-  integrity sha512-d8NrGylA5oCXSbGoKz05FkehDAzSmIm4K03S5VDh4d5lZAtTWfc3D1RuETtuQCn8129nYfJfDdF7P/lwcz1BlA==
+command-line-usage@6.1.1:
+  version "6.1.1"
+  resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-6.1.1.tgz#c908e28686108917758a49f45efb4f02f76bc03f"
+  integrity sha512-F59pEuAR9o1SF/bD0dQBDluhpT4jJQNWUHEuVBqpDmCUo6gPjCi+m9fCWnWZVR/oG6cMTUms4h+3NPl74wGXvA==
   dependencies:
-    array-back "^2.0.0"
-    chalk "^2.4.1"
-    table-layout "^0.4.3"
-    typical "^2.6.1"
+    array-back "^4.0.1"
+    chalk "^2.4.2"
+    table-layout "^1.0.1"
+    typical "^5.2.0"
 
 commander@^2.20.0:
   version "2.20.3"
@@ -3475,6 +3484,11 @@ create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7:
     safe-buffer "^5.0.1"
     sha.js "^2.4.8"
 
+create-require@^1.1.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
+  integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==
+
 cross-spawn@^6.0.0, cross-spawn@^6.0.5:
   version "6.0.5"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
@@ -3793,11 +3807,6 @@ dezalgo@^1.0.0:
     asap "^2.0.0"
     wrappy "1"
 
-diff-sequences@^25.2.6:
-  version "25.2.6"
-  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-25.2.6.tgz#5f467c00edd35352b7bca46d7927d60e687a76dd"
-  integrity sha512-Hq8o7+6GaZeoFjtpgvRBUknSXNeJiCx7V9Fr94ZMljNiCr9n9L8H8aJqgWOQiDDGdyn29fRNcDdRVJ5fdyihfg==
-
 diff-sequences@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.6.2.tgz#48ba99157de1923412eed41db6b6d4aa9ca7c0b1"
@@ -4522,13 +4531,12 @@ find-cache-dir@^3.3.1:
     make-dir "^3.0.2"
     pkg-dir "^4.1.0"
 
-find-replace@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-2.0.1.tgz#6d9683a7ca20f8f9aabeabad07e4e2580f528550"
-  integrity sha512-LzDo3Fpa30FLIBsh6DCDnMN1KW2g4QKkqKmejlImgWY67dDFPX/x9Kh/op/GK522DchQXEvDi/wD48HKW49XOQ==
+find-replace@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-3.0.0.tgz#3e7e23d3b05167a76f770c9fbd5258b0def68c38"
+  integrity sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==
   dependencies:
-    array-back "^2.0.0"
-    test-value "^3.0.0"
+    array-back "^3.0.1"
 
 find-up@^1.0.0:
   version "1.1.2"
@@ -4672,7 +4680,7 @@ fs-extra@^8.1.0:
     jsonfile "^4.0.0"
     universalify "^0.1.0"
 
-fs-extra@^9.0.1:
+fs-extra@^9.1.0:
   version "9.1.0"
   resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
   integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==
@@ -5050,40 +5058,40 @@ glogg@^1.0.0:
   dependencies:
     sparkles "^1.0.0"
 
-google-closure-compiler-java@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20200830.0.0.tgz#627cbddb17fd0012f901450ee06a617bdb7023a7"
-  integrity sha512-DLlcY875mQB7PA9wtfbPBVL9chJj+si/cmxyp3euw7x09MiFYynR4tmQJ9KjWUffPbhvCRDEO/jKcVyNWQVS1Q==
+google-closure-compiler-java@^20210406.0.0:
+  version "20210406.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20210406.0.0.tgz#f2be1f825e1c08027698e24dc3ad7c762a7b838d"
+  integrity sha512-hVOoFiIenZuicZSLqi4sNdwzWeg9hRi3acpvOy6WPwKQUuUNkSXNtUiiXpKgCY5puDs49onhV7FzAHoQ/908lg==
 
-google-closure-compiler-linux@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20200830.0.0.tgz#c231a735b29b2d94ecfbe01ce86f3182ee85495a"
-  integrity sha512-QfxFA3+fOrNe0RH2lcXmkdiaM97KvZQOtO3trobNvfkMNr2h9OUtpXkqWExwolo/jsJWNumsdaRnEAwEthMUOw==
+google-closure-compiler-linux@^20210406.0.0:
+  version "20210406.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20210406.0.0.tgz#e31ecb9ae6cdfb4e1b02052d8f24ac457e8fd435"
+  integrity sha512-KzE39AD3OOZMkR1TtE3nwPBhB3eEJwH8w4Jm3vx2k4veFhryWASFAnDMfHcASzlzjk05tPjecuFtGrHhVafL+w==
 
-google-closure-compiler-osx@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20200830.0.0.tgz#daa7cf607374fc6a85e3f5be2cc323867988d607"
-  integrity sha512-qHKjRBJVq2+2mT25eoT6iOMVbUGT02sJUwkdLlsohWKV4sMEY8/nwnkZYsdm7KnPJnmQLlrfYJ1ZTh1VTlAJpQ==
+google-closure-compiler-osx@^20210406.0.0:
+  version "20210406.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20210406.0.0.tgz#17049155b2eba6a74b383d6b013d929a8a3a6d6a"
+  integrity sha512-Kph0hewevDC2T3uEQSRFoZAI5oE18ceyx5gUy93B0fd8cbL7vUCVjazBcHKOUiQ/Opq2CT96V0moCSFEhq8d1w==
 
-google-closure-compiler-windows@^20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20200830.0.0.tgz#881cb0cd5adb4002987103d6f9c91450791a0df4"
-  integrity sha512-IpJAyxJo+GQ2DSVC4sslPydhIPyWRINkdNynIK/Bk+vbM/7i4LoEm/Y5rY/KJOLRCSds+s3Ov9LYdFkN8C//7g==
+google-closure-compiler-windows@^20210406.0.0:
+  version "20210406.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20210406.0.0.tgz#030a8f4bc7d9aa3fbcfe6028a8b8e23bc0cab755"
+  integrity sha512-IlFWn3vv8SLCRcxK6MSfRgnU4we7zy+s6OczmEmH4wymkpRM6aydAaD4Vxz68i00Om0hkT5l2oO3cFq5FiQBLg==
 
-google-closure-compiler@20200830.0.0:
-  version "20200830.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20200830.0.0.tgz#2c76bc20b23275e4811d4b92ef0d840314910839"
-  integrity sha512-Pri8kyHGmd2xqLM38QBarx+fdkm2HuLniGz7GimbdjQ1KUuPNIz7IJOYc8NGGwYPGAB45vg4IZRk/LepAqnoxg==
+google-closure-compiler@20210406.0.0:
+  version "20210406.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20210406.0.0.tgz#954be1b1347ccfab00dbcaed5c6296133e710e0c"
+  integrity sha512-qaQqEjIneTK5OXYfZmGnWwy5S1nYLeTTphpbc7LzhsvEq4s2xapKCi6fC8VsbCHZvgq8z5VNomMJU97ErRCyGQ==
   dependencies:
     chalk "2.x"
-    google-closure-compiler-java "^20200830.0.0"
+    google-closure-compiler-java "^20210406.0.0"
     minimist "1.x"
     vinyl "2.x"
     vinyl-sourcemaps-apply "^0.2.0"
   optionalDependencies:
-    google-closure-compiler-linux "^20200830.0.0"
-    google-closure-compiler-osx "^20200830.0.0"
-    google-closure-compiler-windows "^20200830.0.0"
+    google-closure-compiler-linux "^20210406.0.0"
+    google-closure-compiler-osx "^20210406.0.0"
+    google-closure-compiler-windows "^20210406.0.0"
 
 graceful-fs@4.X, graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.4:
   version "4.2.6"
@@ -5182,7 +5190,7 @@ gulplog@^1.0.0:
   dependencies:
     glogg "^1.0.0"
 
-handlebars@^4.7.6:
+handlebars@^4.7.6, handlebars@^4.7.7:
   version "4.7.7"
   resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.7.tgz#9ce33416aad02dbd6c8fafa8240d5d98004945a1"
   integrity sha512-aAcXm5OAfE/8IXkcZvCepKU3VzW1/39Fb5ZuqMtgI/hT8X2YgoMvBY5dLhq/cpOvw7Lk1nK/UF71aLG/ZnVYRA==
@@ -5998,7 +6006,7 @@ jest-changed-files@^26.6.2:
     execa "^4.0.0"
     throat "^5.0.0"
 
-jest-cli@^26.3.0:
+jest-cli@^26.6.3:
   version "26.6.3"
   resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-26.6.3.tgz#43117cfef24bc4cd691a174a8796a532e135e92a"
   integrity sha512-GF9noBSa9t08pSyl3CY4frMrqp+aQXFGFkf5hEPbh/pIUFYWMK6ZLTfbmadxJVcJrdRoChlWQsA2VkJcDFK8hg==
@@ -6041,16 +6049,6 @@ jest-config@^26.6.3:
     micromatch "^4.0.2"
     pretty-format "^26.6.2"
 
-jest-diff@^25.2.1:
-  version "25.5.0"
-  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-25.5.0.tgz#1dd26ed64f96667c068cef026b677dfa01afcfa9"
-  integrity sha512-z1kygetuPiREYdNIumRpAHY6RXiGmp70YHptjdaxTWGmA085W3iCnXNx0DhflK3vwrKmrRWyY1wUpkPMVxMK7A==
-  dependencies:
-    chalk "^3.0.0"
-    diff-sequences "^25.2.6"
-    jest-get-type "^25.2.6"
-    pretty-format "^25.5.0"
-
 jest-diff@^26.0.0, jest-diff@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-26.6.2.tgz#1aa7468b52c3a68d7d5c5fdcdfcd5e49bd164394"
@@ -6104,11 +6102,6 @@ jest-environment-node@^26.6.2:
     jest-mock "^26.6.2"
     jest-util "^26.6.2"
 
-jest-get-type@^25.2.6:
-  version "25.2.6"
-  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-25.2.6.tgz#0b0a32fab8908b44d508be81681487dbabb8d877"
-  integrity sha512-DxjtyzOHjObRM+sM1knti6or+eOgcGU4xVSb2HNP1TqO4ahsT+rqZg+nyqHWJSvWgKC5cG3QjGFBqxLghiF/Ig==
-
 jest-get-type@^26.3.0:
   version "26.3.0"
   resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.3.0.tgz#e97dc3c3f53c2b406ca7afaed4493b1d099199e0"
@@ -6351,18 +6344,6 @@ jest-snapshot@^26.6.2:
     pretty-format "^26.6.2"
     semver "^7.3.2"
 
-jest-util@26.x, jest-util@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1"
-  integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    is-ci "^2.0.0"
-    micromatch "^4.0.2"
-
 jest-util@^24.0.0:
   version "24.9.0"
   resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-24.9.0.tgz#7396814e48536d2e85a37de3e4c431d7cb140162"
@@ -6381,6 +6362,18 @@ jest-util@^24.0.0:
     slash "^2.0.0"
     source-map "^0.6.0"
 
+jest-util@^26.1.0, jest-util@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1"
+  integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    is-ci "^2.0.0"
+    micromatch "^4.0.2"
+
 jest-validate@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-26.6.2.tgz#23d380971587150467342911c3d7b4ac57ab20ec"
@@ -6415,14 +6408,14 @@ jest-worker@^26.3.0, jest-worker@^26.6.2:
     merge-stream "^2.0.0"
     supports-color "^7.0.0"
 
-jest@26.3.0:
-  version "26.3.0"
-  resolved "https://registry.yarnpkg.com/jest/-/jest-26.3.0.tgz#366e25827831e65743a324bc476de54f41f2e07b"
-  integrity sha512-LFCry7NS6bTa4BUGUHC+NvZ3B9WG7Jv8F+Lb96dAJFM23LMwSsL5RiJcw9S+nejsh8lS1VxHq+RSH4Xa9tujpA==
+jest@26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest/-/jest-26.6.3.tgz#40e8fdbe48f00dfa1f0ce8121ca74b88ac9148ef"
+  integrity sha512-lGS5PXGAzR4RF7V5+XObhqz2KZIDUA1yD0DG6pBVmy10eh0ZIXQImRuzocsI/N2XZ1GrLFwTS27In2i2jlpq1Q==
   dependencies:
-    "@jest/core" "^26.3.0"
+    "@jest/core" "^26.6.3"
     import-local "^3.0.2"
-    jest-cli "^26.3.0"
+    jest-cli "^26.6.3"
 
 js-tokens@^4.0.0:
   version "4.0.0"
@@ -6519,7 +6512,7 @@ json-stringify-safe@^5.0.1, json-stringify-safe@~5.0.1:
   resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
   integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
 
-json5@2.x, json5@^2.1.0, json5@^2.1.2:
+json5@2.x, json5@^2.1.2:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3"
   integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==
@@ -6533,11 +6526,6 @@ json5@^1.0.1:
   dependencies:
     minimist "^1.2.0"
 
-json@9.0.6:
-  version "9.0.6"
-  resolved "https://registry.yarnpkg.com/json/-/json-9.0.6.tgz#7972c2a5a48a42678db2730c7c2c4ee6e4e24585"
-  integrity sha1-eXLCpaSKQmeNsnMMfCxO5uTiRYU=
-
 jsonfile@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb"
@@ -6800,16 +6788,6 @@ lodash.ismatch@^4.4.0:
   resolved "https://registry.yarnpkg.com/lodash.ismatch/-/lodash.ismatch-4.4.0.tgz#756cb5150ca3ba6f11085a78849645f188f85f37"
   integrity sha1-dWy1FQyjum8RCFp4hJZF8Yj4Xzc=
 
-lodash.memoize@4.x:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe"
-  integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4=
-
-lodash.padend@^4.6.1:
-  version "4.6.1"
-  resolved "https://registry.yarnpkg.com/lodash.padend/-/lodash.padend-4.6.1.tgz#53ccba047d06e158d311f45da625f4e49e6f166e"
-  integrity sha1-U8y6BH0G4VjTEfRdpiX05J5vFm4=
-
 lodash.set@^4.3.2:
   version "4.3.2"
   resolved "https://registry.yarnpkg.com/lodash.set/-/lodash.set-4.3.2.tgz#d8757b1da807dde24816b0d6a84bea1a76230b23"
@@ -6845,7 +6823,7 @@ lodash.uniq@^4.5.0:
   resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
   integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
 
-lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
+lodash@4.x, lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
   version "4.17.21"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
@@ -6974,10 +6952,10 @@ map-visit@^1.0.0:
   dependencies:
     object-visit "^1.0.0"
 
-marked@^1.2.5:
-  version "1.2.9"
-  resolved "https://registry.yarnpkg.com/marked/-/marked-1.2.9.tgz#53786f8b05d4c01a2a5a76b7d1ec9943d29d72dc"
-  integrity sha512-H8lIX2SvyitGX+TRdtS06m1jHMijKN/XjfH6Ooii9fvxMlh8QdqBfBDkGUpMWH2kQNrtixjzYUa3SH8ROTgRRw==
+marked@^2.0.1:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.3.tgz#3551c4958c4da36897bda2a16812ef1399c8d6b0"
+  integrity sha512-5otztIIcJfPc2qGTN8cVtOJEjNJZ0jwa46INMagrYfk0EvqtRuEHLsEe0LrFS0/q+ZRKT0+kXK7P2T1AN5lWRA==
 
 matchdep@^2.0.0:
   version "2.0.0"
@@ -8316,16 +8294,6 @@ prelude-ls@~1.1.2:
   resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
   integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
 
-pretty-format@^25.2.1, pretty-format@^25.5.0:
-  version "25.5.0"
-  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-25.5.0.tgz#7873c1d774f682c34b8d48b6743a2bf2ac55791a"
-  integrity sha512-kbo/kq2LQ/A/is0PQwsEHM7Ca6//bGPPvU6UnsdDRSKTWxT/ru/xb88v4BJf6a69H+uTytOEsTusT9ksd/1iWQ==
-  dependencies:
-    "@jest/types" "^25.5.0"
-    ansi-regex "^5.0.0"
-    ansi-styles "^4.0.0"
-    react-is "^16.12.0"
-
 pretty-format@^26.0.0, pretty-format@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-26.6.2.tgz#e35c2705f14cb7fe2fe94fa078345b444120fc93"
@@ -8551,11 +8519,6 @@ randomfill@^1.0.3:
     randombytes "^2.0.5"
     safe-buffer "^5.1.0"
 
-react-is@^16.12.0:
-  version "16.13.1"
-  resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
-  integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
-
 react-is@^17.0.1:
   version "17.0.2"
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0"
@@ -8726,10 +8689,10 @@ redent@^3.0.0:
     indent-string "^4.0.0"
     strip-indent "^3.0.0"
 
-reduce-flatten@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/reduce-flatten/-/reduce-flatten-1.0.1.tgz#258c78efd153ddf93cb561237f61184f3696e327"
-  integrity sha1-JYx479FT3fk8tWEjf2EYTzaW4yc=
+reduce-flatten@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/reduce-flatten/-/reduce-flatten-2.0.0.tgz#734fd84e65f375d7ca4465c69798c25c9d10ae27"
+  integrity sha512-EJ4UNY/U1t2P/2k6oqotuX2Cc3T6nxJwsM0N0asT7dhrtH1ltUxDn4NalSYmPE2rCkVpcf/X6R0wDwcFpzhd4w==
 
 regex-not@^1.0.0, regex-not@^1.0.2:
   version "1.0.2"
@@ -9190,29 +9153,12 @@ shellwords@^0.1.1:
   resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b"
   integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==
 
-shiki-languages@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/shiki-languages/-/shiki-languages-0.2.7.tgz#7230b675b96d37a36ac1bf995525375ce69f3924"
-  integrity sha512-REmakh7pn2jCn9GDMRSK36oDgqhh+rSvJPo77sdWTOmk44C5b0XlYPwJZcFOMJWUZJE0c7FCbKclw4FLwUKLRw==
-  dependencies:
-    vscode-textmate "^5.2.0"
-
-shiki-themes@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/shiki-themes/-/shiki-themes-0.2.7.tgz#6e04451d832152e0fc969876a7bd926b3963c1f2"
-  integrity sha512-ZMmboDYw5+SEpugM8KGUq3tkZ0vXg+k60XX6NngDK7gc1Sv6YLUlanpvG3evm57uKJvfXsky/S5MzSOTtYKLjA==
-  dependencies:
-    json5 "^2.1.0"
-    vscode-textmate "^5.2.0"
-
-shiki@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.2.7.tgz#d2547548ed8742673730e1e4bbe792a77c445540"
-  integrity sha512-bwVc7cdtYYHEO9O+XJ8aNOskKRfaQd5Y4ovLRfbQkmiLSUaR+bdlssbZUUhbQ0JAFMYcTcJ5tjG5KtnufttDHQ==
+shiki@^0.9.3:
+  version "0.9.3"
+  resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.9.3.tgz#7bf7bcf3ed50ca525ec89cc09254abce4264d5ca"
+  integrity sha512-NEjg1mVbAUrzRv2eIcUt3TG7X9svX7l3n3F5/3OdFq+/BxUdmBOeKGiH4icZJBLHy354Shnj6sfBTemea2e7XA==
   dependencies:
     onigasm "^2.2.5"
-    shiki-languages "^0.2.7"
-    shiki-themes "^0.2.7"
     vscode-textmate "^5.2.0"
 
 side-channel@^1.0.4:
@@ -9755,16 +9701,15 @@ symbol-tree@^3.2.4:
   resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.4.tgz#430637d248ba77e078883951fb9aa0eed7c63fa2"
   integrity sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==
 
-table-layout@^0.4.3:
-  version "0.4.5"
-  resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-0.4.5.tgz#d906de6a25fa09c0c90d1d08ecd833ecedcb7378"
-  integrity sha512-zTvf0mcggrGeTe/2jJ6ECkJHAQPIYEwDoqsiqBjI24mvRmQbInK5jq33fyypaCBxX08hMkfmdOqj6haT33EqWw==
+table-layout@^1.0.1:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-1.0.2.tgz#c4038a1853b0136d63365a734b6931cf4fad4a04"
+  integrity sha512-qd/R7n5rQTRFi+Zf2sk5XVVd9UQl6ZkduPFC3S7WEGJAmetDTjY3qPN50eSKzwuzEyQKy5TN2TiZdkIjos2L6A==
   dependencies:
-    array-back "^2.0.0"
+    array-back "^4.0.1"
     deep-extend "~0.6.0"
-    lodash.padend "^4.6.1"
-    typical "^2.6.1"
-    wordwrapjs "^3.0.0"
+    typical "^5.2.0"
+    wordwrapjs "^4.0.0"
 
 table@^6.0.4:
   version "6.0.9"
@@ -9893,14 +9838,6 @@ test-exclude@^6.0.0:
     glob "^7.1.4"
     minimatch "^3.0.4"
 
-test-value@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/test-value/-/test-value-3.0.0.tgz#9168c062fab11a86b8d444dd968bb4b73851ce92"
-  integrity sha512-sVACdAWcZkSU9x7AOmJo5TqE+GyNJknHaHsMrR6ZnhjVlVN9Yx6FjHrsKZ3BjIpPCT68zYesPWkakrNupwfOTQ==
-  dependencies:
-    array-back "^2.0.0"
-    typical "^2.6.1"
-
 text-encoding-utf-8@^1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz#585b62197b0ae437e3c7b5d0af27ac1021e10d13"
@@ -10111,39 +10048,44 @@ trim-off-newlines@^1.0.0:
   resolved "https://registry.yarnpkg.com/trim-off-newlines/-/trim-off-newlines-1.0.1.tgz#9f9ba9d9efa8764c387698bcbfeb2c848f11adb3"
   integrity sha1-n5up2e+odkw4dpi8v+sshI8RrbM=
 
-ts-jest@26.3.0:
-  version "26.3.0"
-  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-26.3.0.tgz#6b2845045347dce394f069bb59358253bc1338a9"
-  integrity sha512-Jq2uKfx6bPd9+JDpZNMBJMdMQUC3sJ08acISj8NXlVgR2d5OqslEHOR2KHMgwymu8h50+lKIm0m0xj/ioYdW2Q==
+ts-jest@26.5.4:
+  version "26.5.4"
+  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-26.5.4.tgz#207f4c114812a9c6d5746dd4d1cdf899eafc9686"
+  integrity sha512-I5Qsddo+VTm94SukBJ4cPimOoFZsYTeElR2xy6H2TOVs+NsvgYglW8KuQgKoApOKuaU/Ix/vrF9ebFZlb5D2Pg==
   dependencies:
-    "@types/jest" "26.x"
     bs-logger "0.x"
     buffer-from "1.x"
     fast-json-stable-stringify "2.x"
-    jest-util "26.x"
+    jest-util "^26.1.0"
     json5 "2.x"
-    lodash.memoize "4.x"
+    lodash "4.x"
     make-error "1.x"
     mkdirp "1.x"
     semver "7.x"
-    yargs-parser "18.x"
+    yargs-parser "20.x"
 
-ts-node@9.0.0:
-  version "9.0.0"
-  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-9.0.0.tgz#e7699d2a110cc8c0d3b831715e417688683460b3"
-  integrity sha512-/TqB4SnererCDR/vb4S/QvSZvzQMJN8daAslg7MeaiHvD8rDZsSfXmNeNumyZZzMned72Xoq/isQljYSt8Ynfg==
+ts-node@9.1.1:
+  version "9.1.1"
+  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-9.1.1.tgz#51a9a450a3e959401bda5f004a72d54b936d376d"
+  integrity sha512-hPlt7ZACERQGf03M253ytLY3dHbGNGrAq9qIHWUY9XHYl1z7wYngSr3OQ5xmui8o2AaxsONxIzjafLUiWBo1Fg==
   dependencies:
     arg "^4.1.0"
+    create-require "^1.1.0"
     diff "^4.0.1"
     make-error "^1.1.1"
     source-map-support "^0.5.17"
     yn "3.1.1"
 
-tslib@^1.12.0, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
+tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
   version "1.14.1"
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
   integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
 
+tslib@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.2.0.tgz#fb2c475977e35e241311ede2693cee1ec6698f5c"
+  integrity sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==
+
 tsutils@^3.17.1:
   version "3.21.0"
   resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
@@ -10244,37 +10186,42 @@ typedarray@^0.0.6:
   resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
   integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
 
-typedoc-default-themes@^0.12.5:
+typedoc-default-themes@^0.12.9:
   version "0.12.10"
   resolved "https://registry.yarnpkg.com/typedoc-default-themes/-/typedoc-default-themes-0.12.10.tgz#614c4222fe642657f37693ea62cad4dafeddf843"
   integrity sha512-fIS001cAYHkyQPidWXmHuhs8usjP5XVJjWB8oZGqkTowZaz3v7g3KDZeeqE82FBrmkAnIBOY3jgy7lnPnqATbA==
 
-typedoc@0.20.19:
-  version "0.20.19"
-  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.20.19.tgz#4871f659bc03a545c572066329273f1b30fb1cba"
-  integrity sha512-9FjQ1xQGtxpXm8R5QKvU8wFBaaYe8RW3NzrhGWB8RigbOALwG+4ywJ/EyArPGWXvmXYB7I8h2YHzeyFvZ2s0ow==
+typedoc@0.20.35:
+  version "0.20.35"
+  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.20.35.tgz#c36996098cbeb2ef63d9d7991262a071b98336a3"
+  integrity sha512-7sNca19LXg2hgyGHq3b33tQ1YFApmd8aBDEzWQ2ry4VDkw/NdFWkysGiGRY1QckDCB0gVH8+MlXA4K71IB3azg==
   dependencies:
     colors "^1.4.0"
-    fs-extra "^9.0.1"
-    handlebars "^4.7.6"
-    lodash "^4.17.20"
+    fs-extra "^9.1.0"
+    handlebars "^4.7.7"
+    lodash "^4.17.21"
     lunr "^2.3.9"
-    marked "^1.2.5"
+    marked "^2.0.1"
     minimatch "^3.0.0"
     progress "^2.0.3"
     shelljs "^0.8.4"
-    shiki "^0.2.7"
-    typedoc-default-themes "^0.12.5"
+    shiki "^0.9.3"
+    typedoc-default-themes "^0.12.9"
 
 typescript@4.0.2:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.0.2.tgz#7ea7c88777c723c681e33bf7988be5d008d05ac2"
   integrity sha512-e4ERvRV2wb+rRZ/IQeb3jm2VxBsirQLpQhdxplZ2MEzGvDkkMmPglecnNDfSUBivMjP93vRbngYYDQqQ/78bcQ==
 
-typical@^2.6.1:
-  version "2.6.1"
-  resolved "https://registry.yarnpkg.com/typical/-/typical-2.6.1.tgz#5c080e5d661cbbe38259d2e70a3c7253e873881d"
-  integrity sha1-XAgOXWYcu+OCWdLnCjxyU+hziB0=
+typical@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/typical/-/typical-4.0.0.tgz#cbeaff3b9d7ae1e2bbfaf5a4e6f11eccfde94fc4"
+  integrity sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw==
+
+typical@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/typical/-/typical-5.2.0.tgz#4daaac4f2b5315460804f0acf6cb69c52bb93066"
+  integrity sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==
 
 uglify-js@^3.1.4:
   version "3.13.3"
@@ -10621,10 +10568,10 @@ web-stream-tools@0.0.1:
   resolved "https://registry.yarnpkg.com/web-stream-tools/-/web-stream-tools-0.0.1.tgz#6d2c06a6f5f46eab5e73d82285bae3c9b5ee71a0"
   integrity sha512-MZUYhvTAMMy1u07OJL2pyp/tdrIu15fRJlGgnfvCQVXBS4cBNbIV1+6veYfVhTfnq0ZLispgx4nv17QxpuX+6w==
 
-web-streams-polyfill@2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-2.0.3.tgz#0c396f069a5eedc96c711393b12f2c67cf283a00"
-  integrity sha512-pOqiHmL3RBAGS+SgOR42RbPU6nc8/n15N2rsOXFYHLnTfs2Z8QHs8AizOeOaYEnhwPN4+hu3M2D9XvAqzvt6MA==
+web-streams-polyfill@3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.0.3.tgz#f49e487eedeca47a207c1aee41ee5578f884b42f"
+  integrity sha512-d2H/t0eqRNM4w2WvmTdoeIvzAUSpK7JmATB8Nr2lb7nQ9BTIJVjbQ/TRFVEh2gUH1HwclPdoPtfMoFfetXaZnA==
 
 webidl-conversions@^4.0.2:
   version "4.0.2"
@@ -10768,13 +10715,13 @@ wordwrap@^1.0.0:
   resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb"
   integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus=
 
-wordwrapjs@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/wordwrapjs/-/wordwrapjs-3.0.0.tgz#c94c372894cadc6feb1a66bff64e1d9af92c5d1e"
-  integrity sha512-mO8XtqyPvykVCsrwj5MlOVWvSnCdT+C+QVbm6blradR7JExAhbkZ7hZ9A+9NUtwzSqrlUo9a67ws0EiILrvRpw==
+wordwrapjs@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/wordwrapjs/-/wordwrapjs-4.0.1.tgz#d9790bccfb110a0fc7836b5ebce0937b37a8b98f"
+  integrity sha512-kKlNACbvHrkpIw6oPeYDSmdCTu2hdMHoyXLTcUKala++lx5Y+wjJ/e474Jqv5abnVmwxw08DiTuHmw69lJGksA==
   dependencies:
-    reduce-flatten "^1.0.1"
-    typical "^2.6.1"
+    reduce-flatten "^2.0.0"
+    typical "^5.2.0"
 
 worker-farm@^1.7.0:
   version "1.7.0"
@@ -10918,13 +10865,10 @@ yallist@^4.0.0:
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
   integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
 
-yargs-parser@18.x, yargs-parser@^18.1.2, yargs-parser@^18.1.3:
-  version "18.1.3"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
-  integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
-  dependencies:
-    camelcase "^5.0.0"
-    decamelize "^1.2.0"
+yargs-parser@20.x, yargs-parser@^20.2.3:
+  version "20.2.7"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
+  integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
 
 yargs-parser@5.0.0-security.0:
   version "5.0.0-security.0"
@@ -10942,10 +10886,13 @@ yargs-parser@^15.0.1:
     camelcase "^5.0.0"
     decamelize "^1.2.0"
 
-yargs-parser@^20.2.3:
-  version "20.2.7"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
-  integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
+yargs-parser@^18.1.2, yargs-parser@^18.1.3:
+  version "18.1.3"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
+  integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
+  dependencies:
+    camelcase "^5.0.0"
+    decamelize "^1.2.0"
 
 yargs@^14.2.2:
   version "14.2.3"

From fac30e7aea6dde08e2d948d34f0bd3489a3b92bd Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 15 Apr 2021 11:43:16 -0400
Subject: [PATCH 060/719] ARROW-12408: [R] Delete Scan()

Suppressing deprecation warnings is disallowed by `R CMD check`, so let's just delete it entirely.

Closes #10053 from lidavidm/arrow-12408

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 r/NEWS.md                       |  1 +
 r/R/arrowExports.R              |  4 ----
 r/R/dataset-scan.R              |  7 +------
 r/src/arrowExports.cpp          | 16 ----------------
 r/src/dataset.cpp               | 29 -----------------------------
 r/tests/testthat/test-dataset.R | 10 +---------
 6 files changed, 3 insertions(+), 64 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index cd8c31fb8b0..312f99e3f41 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -45,6 +45,7 @@ Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
 * `write_dataset()` now defaults to `format = "parquet"` and better validates the `format` argument
 * Invalid input for `schema` in `open_dataset()` is now correctly handled
 * Collecting 0 columns from a Dataset now no longer returns all of the columns
+* The `Scanner$Scan()` method has been removed; use `Scanner$ScanBatches()`
 
 ## Other improvements
 
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index c432a135524..51cdcf85df0 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -512,10 +512,6 @@ dataset___Scanner__head <- function(scanner, n){
     .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
 
-dataset___Scanner__Scan <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__Scan`, scanner)
-}
-
 dataset___Scanner__schema <- function(sc){
     .Call(`_arrow_dataset___Scanner__schema`, sc)
 }
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 8bec8978098..750401e1736 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -56,12 +56,7 @@
 Scanner <- R6Class("Scanner", inherit = ArrowObject,
   public = list(
     ToTable = function() dataset___Scanner__ToTable(self),
-    ScanBatches = function() dataset___Scanner__ScanBatches(self),
-    Scan = function() {
-        # Planned for removal in ARROW-11782
-        .Deprecated("ScanBatches")
-        dataset___Scanner__Scan(self)
-    }
+    ScanBatches = function() dataset___Scanner__ScanBatches(self)
   ),
   active = list(
     schema = function() dataset___Scanner__schema(self)
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 42532e6c3c2..87f0130eeff 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2015,21 +2015,6 @@ extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){
 }
 #endif
 
-// dataset.cpp
-#if defined(ARROW_R_WITH_DATASET)
-cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner);
-extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
-BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
-	return cpp11::as_sexp(dataset___Scanner__Scan(scanner));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){
-	Rf_error("Cannot call dataset___Scanner__Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<arrow::Schema> dataset___Scanner__schema(const std::shared_ptr<ds::Scanner>& sc);
@@ -6747,7 +6732,6 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, 
 		{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, 
 		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, 
-		{ "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, 
 		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, 
 		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
 		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index c7ef39b5b62..a38ff86ae05 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -441,35 +441,6 @@ std::shared_ptr<arrow::Table> dataset___Scanner__head(
   return ValueOrStop(scanner->Head(n));
 }
 
-// TODO (ARROW-11782) Remove calls to Scan()
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#elif defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
-
-// [[dataset::export]]
-cpp11::list dataset___Scanner__Scan(const std::shared_ptr<ds::Scanner>& scanner) {
-  auto it = ValueOrStop(scanner->Scan());
-  std::vector<std::shared_ptr<ds::ScanTask>> out;
-  std::shared_ptr<ds::ScanTask> scan_task;
-  // TODO(npr): can this iteration be parallelized?
-  for (auto st : it) {
-    scan_task = ValueOrStop(st);
-    out.push_back(scan_task);
-  }
-
-  return arrow::r::to_r_list(out);
-}
-
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#elif defined(_MSC_VER)
-#pragma warning(pop)
-#endif
-
 // [[dataset::export]]
 std::shared_ptr<arrow::Schema> dataset___Scanner__schema(
     const std::shared_ptr<ds::Scanner>& sc) {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index eb7408c982f..4570c1f5762 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1345,14 +1345,6 @@ test_that("Dataset and query print methods", {
   )
 })
 
-test_that("Scanner$Scan is deprecated", {
-  ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
-  expect_deprecated(
-    ds$NewScan()$Finish()$Scan(),
-    "ScanBatches"
-  )
-})
-
 test_that("Scanner$ScanBatches", {
   ds <- open_dataset(ipc_dir, format = "feather")
   batches <- ds$NewScan()$Finish()$ScanBatches()
@@ -1388,7 +1380,7 @@ test_that("Assembling a Dataset manually and getting a Table", {
   fmt <- FileFormat$create("parquet")
   factory <- FileSystemDatasetFactory$create(fs, selector, NULL, fmt, partitioning = partitioning)
   expect_r6_class(factory, "FileSystemDatasetFactory")
-  
+
   schm <- factory$Inspect()
   expect_r6_class(schm, "Schema")
 

From cd4df5ebfd6e0d49d6a80fe5d74dff1361af578d Mon Sep 17 00:00:00 2001
From: sjgupta2 <sjgupta2@illinois.edu>
Date: Thu, 15 Apr 2021 18:58:16 +0200
Subject: [PATCH 061/719] ARROW-8900: [C++][Python] Expose Proxy Options as
 parameters for S3FileSystem

As discussed in the comments on the JIRA issue, I've added a simple struct called `S3ProxyOptions` which holds the fields for proxy-related information to pass to the AWS SDK when building the S3 client.
I added a simple `FromUri` helper function since these options can be derived from environment variables like `http_proxy`, `HTTP_PROXY`, `HTTPS_PROXY`, etc. I didn't automatically honor these environment variables, but that is something we could do in a follow-up PR (or leave it for the users to do).
Also added the options to the pyarrow interface for the file-system.

I tested the proxy functionality by setting up a simple proxy on my local machine and checking the logs to ensure that the traffic from the file-system was flowing through it.

Closes #9996 from sahil1105/sahil/s3-http-proxy

Lead-authored-by: sjgupta2 <sjgupta2@illinois.edu>
Co-authored-by: Sahil Gupta <sahil1105@hotmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc        |  52 +++++++++
 cpp/src/arrow/filesystem/s3fs.h         |  19 ++++
 python/pyarrow/_s3fs.pyx                |  43 +++++++-
 python/pyarrow/includes/libarrow_fs.pxd |  13 +++
 python/pyarrow/tests/test_fs.py         | 141 ++++++++++++++++++++++++
 5 files changed, 266 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 75b1e71cc94..96a88660655 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -172,6 +172,32 @@ Status EnsureS3Initialized() {
   return Status::OK();
 }
 
+// -----------------------------------------------------------------------
+// S3ProxyOptions implementation
+
+Result<S3ProxyOptions> S3ProxyOptions::FromUri(const Uri& uri) {
+  S3ProxyOptions options;
+
+  options.scheme = uri.scheme();
+  options.host = uri.host();
+  options.port = uri.port();
+  options.username = uri.username();
+  options.password = uri.password();
+
+  return options;
+}
+
+Result<S3ProxyOptions> S3ProxyOptions::FromUri(const std::string& uri_string) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri);
+}
+
+bool S3ProxyOptions::Equals(const S3ProxyOptions& other) const {
+  return (scheme == other.scheme && host == other.host && port == other.port &&
+          username == other.username && password == other.password);
+}
+
 // -----------------------------------------------------------------------
 // S3Options implementation
 
@@ -317,6 +343,7 @@ Result<S3Options> S3Options::FromUri(const std::string& uri_string,
 bool S3Options::Equals(const S3Options& other) const {
   return (region == other.region && endpoint_override == other.endpoint_override &&
           scheme == other.scheme && background_writes == other.background_writes &&
+          proxy_options.Equals(other.proxy_options) &&
           GetAccessKey() == other.GetAccessKey() &&
           GetSecretKey() == other.GetSecretKey() &&
           GetSessionToken() == other.GetSessionToken());
@@ -515,6 +542,31 @@ class ClientBuilder {
     }
 
     const bool use_virtual_addressing = options_.endpoint_override.empty();
+
+    /// Set proxy options if provided
+    if (!options_.proxy_options.scheme.empty()) {
+      if (options_.proxy_options.scheme == "http") {
+        client_config_.proxyScheme = Aws::Http::Scheme::HTTP;
+      } else if (options_.proxy_options.scheme == "https") {
+        client_config_.proxyScheme = Aws::Http::Scheme::HTTPS;
+      } else {
+        return Status::Invalid("Invalid proxy connection scheme '",
+                               options_.proxy_options.scheme, "'");
+      }
+    }
+    if (!options_.proxy_options.host.empty()) {
+      client_config_.proxyHost = ToAwsString(options_.proxy_options.host);
+    }
+    if (options_.proxy_options.port != -1) {
+      client_config_.proxyPort = options_.proxy_options.port;
+    }
+    if (!options_.proxy_options.username.empty()) {
+      client_config_.proxyUserName = ToAwsString(options_.proxy_options.username);
+    }
+    if (!options_.proxy_options.password.empty()) {
+      client_config_.proxyPassword = ToAwsString(options_.proxy_options.password);
+    }
+
     return std::make_shared<S3Client>(
         credentials_provider_, client_config_,
         Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index a7f72fb1a1f..6e73ed436c5 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -40,6 +40,22 @@ class STSClient;
 namespace arrow {
 namespace fs {
 
+/// Options for using a proxy for S3
+struct ARROW_EXPORT S3ProxyOptions {
+  std::string scheme;
+  std::string host;
+  int port = -1;
+  std::string username;
+  std::string password;
+
+  /// Initialize from URI such as http://username:password@host:port
+  /// or http://host:port
+  static Result<S3ProxyOptions> FromUri(const std::string& uri);
+  static Result<S3ProxyOptions> FromUri(const ::arrow::internal::Uri& uri);
+
+  bool Equals(const S3ProxyOptions& other) const;
+};
+
 /// Options for the S3FileSystem implementation.
 struct ARROW_EXPORT S3Options {
   /// AWS region to connect to.
@@ -66,6 +82,9 @@ struct ARROW_EXPORT S3Options {
   /// Frequency (in seconds) to refresh temporary credentials from assumed role
   int load_frequency;
 
+  /// If connection is through a proxy, set options here
+  S3ProxyOptions proxy_options;
+
   /// AWS credentials provider
   std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider;
 
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index ccec4600d41..1a907d02ca9 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -94,6 +94,19 @@ cdef class S3FileSystem(FileSystem):
     background_writes: boolean, default True
         Whether OutputStream writes will be issued in the background, without
         blocking.
+    proxy_options: dict or str, default None
+        If a proxy is used, provide the options here. Supported options are:
+        'scheme' (str: 'http' or 'https'; required), 'host' (str; required),
+        'port' (int; required), 'username' (str; optional),
+        'password' (str; optional).
+        A proxy URI (str) can also be provided, in which case these options
+        will be derived from the provided URI.
+        The following are equivalent::
+
+            S3FileSystem(proxy_options='http://username:password@localhost:8020')
+            S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost',
+                                        'port': 8020, 'username': 'username',
+                                        'password': 'password'})
     """
 
     cdef:
@@ -103,7 +116,7 @@ cdef class S3FileSystem(FileSystem):
                  anonymous=False, region=None, scheme=None,
                  endpoint_override=None, bint background_writes=True,
                  role_arn=None, session_name=None, external_id=None,
-                 load_frequency=900):
+                 load_frequency=900, proxy_options=None):
         cdef:
             CS3Options options
             shared_ptr[CS3FileSystem] wrapped
@@ -168,6 +181,25 @@ cdef class S3FileSystem(FileSystem):
         if background_writes is not None:
             options.background_writes = background_writes
 
+        if proxy_options is not None:
+            if isinstance(proxy_options, dict):
+                options.proxy_options.scheme = tobytes(proxy_options["scheme"])
+                options.proxy_options.host = tobytes(proxy_options["host"])
+                options.proxy_options.port = proxy_options["port"]
+                proxy_username = proxy_options.get("username", None)
+                if proxy_username:
+                    options.proxy_options.username = tobytes(proxy_username)
+                proxy_password = proxy_options.get("password", None)
+                if proxy_password:
+                    options.proxy_options.password = tobytes(proxy_password)
+            elif isinstance(proxy_options, str):
+                options.proxy_options = GetResultValue(
+                    CS3ProxyOptions.FromUriString(tobytes(proxy_options)))
+            else:
+                raise TypeError(
+                    "'proxy_options': expected 'dict' or 'str', "
+                    f"got {type(proxy_options)} instead.")
+
         with nogil:
             wrapped = GetResultValue(CS3FileSystem.Make(options))
 
@@ -209,7 +241,14 @@ cdef class S3FileSystem(FileSystem):
                 session_name=frombytes(opts.session_name),
                 external_id=frombytes(opts.external_id),
                 load_frequency=opts.load_frequency,
-                background_writes=opts.background_writes
+                background_writes=opts.background_writes,
+                proxy_options={'scheme': frombytes(opts.proxy_options.scheme),
+                               'host': frombytes(opts.proxy_options.host),
+                               'port': opts.proxy_options.port,
+                               'username': frombytes(
+                                   opts.proxy_options.username),
+                               'password': frombytes(
+                                   opts.proxy_options.password)}
             ),)
         )
 
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 35d630d85da..ee1b8a70aef 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -125,6 +125,18 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
     cdef struct CS3GlobalOptions "arrow::fs::S3GlobalOptions":
         CS3LogLevel log_level
 
+    cdef cppclass CS3ProxyOptions "arrow::fs::S3ProxyOptions":
+        c_string scheme
+        c_string host
+        int port
+        c_string username
+        c_string password
+        c_bool Equals(const CS3ProxyOptions& other)
+
+        @staticmethod
+        CResult[CS3ProxyOptions] FromUriString "FromUri"(
+            const c_string& uri_string)
+
     cdef cppclass CS3Options "arrow::fs::S3Options":
         c_string region
         c_string endpoint_override
@@ -134,6 +146,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_string session_name
         c_string external_id
         int load_frequency
+        CS3ProxyOptions proxy_options
         void ConfigureDefaultCredentials()
         void ConfigureAccessKey(const c_string& access_key,
                                 const c_string& secret_key,
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 1beecc66b12..1af6967595b 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1052,6 +1052,147 @@ def test_s3_options(monkeypatch):
         )
 
 
+@pytest.mark.s3
+def test_s3_proxy_options(monkeypatch):
+    from pyarrow.fs import S3FileSystem
+
+    # The following two are equivalent:
+    proxy_opts_1_dict = {'scheme': 'http', 'host': 'localhost', 'port': 8999}
+    proxy_opts_1_str = 'http://localhost:8999'
+    # The following two are equivalent:
+    proxy_opts_2_dict = {'scheme': 'https', 'host': 'localhost', 'port': 8080}
+    proxy_opts_2_str = 'https://localhost:8080'
+
+    # Check dict case for 'proxy_options'
+    fs = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    fs = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    # Check str case for 'proxy_options'
+    fs = S3FileSystem(proxy_options=proxy_opts_1_str)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    fs = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    # Check that two FSs using the same proxy_options dict are equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    # Check that two FSs using the same proxy_options str are equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    # Check that two FSs using equivalent proxy_options
+    # (one dict, one str) are equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 == fs2
+    assert pickle.loads(pickle.dumps(fs1)) == fs2
+    assert pickle.loads(pickle.dumps(fs2)) == fs1
+
+    # Check that two FSs using nonequivalent proxy_options are not equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    # Check that two FSs (one using proxy_options and the other not)
+    # are not equal
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_dict)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_1_str)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_dict)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    fs1 = S3FileSystem(proxy_options=proxy_opts_2_str)
+    fs2 = S3FileSystem()
+    assert fs1 != fs2
+    assert pickle.loads(pickle.dumps(fs1)) != fs2
+    assert pickle.loads(pickle.dumps(fs2)) != fs1
+
+    # Only dict and str are supported
+    with pytest.raises(TypeError):
+        S3FileSystem(proxy_options=('http', 'localhost', 9090))
+    # Missing scheme
+    with pytest.raises(KeyError):
+        S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090})
+    # Missing host
+    with pytest.raises(KeyError):
+        S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090})
+    # Missing port
+    with pytest.raises(KeyError):
+        S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
+    # Invalid proxy URI (invalid scheme htttps)
+    with pytest.raises(pa.ArrowInvalid):
+        S3FileSystem(proxy_options='htttps://localhost:9000')
+    # Invalid proxy_options dict (invalid scheme htttps)
+    with pytest.raises(pa.ArrowInvalid):
+        S3FileSystem(proxy_options={'scheme': 'htttp', 'host': 'localhost',
+                                    'port': 8999})
+
+
 @pytest.mark.hdfs
 def test_hdfs_options(hdfs_connection):
     from pyarrow.fs import HadoopFileSystem

From 1cabc80edbe607a5d7cdcae5cc7b940aeeecf96b Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Thu, 15 Apr 2021 10:41:05 -0700
Subject: [PATCH 062/719] ARROW-12401: [R] Fix guard around
 dataset___Scanner__TakeRows

Closes #10051 from ianmcook/ARROW-12401

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/src/arrowExports.cpp | 2 +-
 r/src/dataset.cpp      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 87f0130eeff..d68aaf70251 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2067,7 +2067,7 @@ extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SE
 #endif
 
 // dataset.cpp
-#if defined(ARROW_R_WITH_ARROW)
+#if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(const std::shared_ptr<ds::Scanner>& scanner, const std::shared_ptr<arrow::Array>& indices);
 extern "C" SEXP _arrow_dataset___Scanner__TakeRows(SEXP scanner_sexp, SEXP indices_sexp){
 BEGIN_CPP11
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index a38ff86ae05..f4d7746eb10 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -476,7 +476,7 @@ void dataset___Dataset__Write(
   StopIfNotOk(ds::FileSystemDataset::Write(opts, scanner));
 }
 
-// [[arrow::export]]
+// [[dataset::export]]
 std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(
     const std::shared_ptr<ds::Scanner>& scanner,
     const std::shared_ptr<arrow::Array>& indices) {

From 28ed8f72795d3dec87b7baef08f9bba83926ddbe Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 15 Apr 2021 10:44:54 -0700
Subject: [PATCH 063/719] ARROW-12406: [R] Fix checkbashism violation in
 configure

Fix checkbashim foo=bar; export foo

Closes #10052 from jonkeane/ARROW-12406

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/configure b/r/configure
index eea7af6cf5b..f6c1013b474 100755
--- a/r/configure
+++ b/r/configure
@@ -148,7 +148,7 @@ else
       if [ "$UNAME" = "Darwin" ] && [ "${OPENSSL_ROOT_DIR}" = "" ]; then
         brew --prefix openssl >/dev/null 2>&1
         if [ $? -eq 0 ]; then
-          export OPENSSL_ROOT_DIR="$(brew --prefix openssl)"
+          OPENSSL_ROOT_DIR="`brew --prefix openssl`"; export OPENSSL_ROOT_DIR
         fi
       fi
 

From 04e52565beb1d5196f3919d57ca613be4e58fdef Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 15 Apr 2021 10:57:30 -0700
Subject: [PATCH 064/719] ARROW-12409: [R] Remove LazyData from DESCRIPTION

Remove LazyData since we don't have data (and CRAN will complain about this soon)

Closes #10057 from jonkeane/patch-2

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index ab598942a09..7a63b9e7ebc 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -22,7 +22,6 @@ URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/
 BugReports: https://issues.apache.org/jira/projects/ARROW/issues
 Encoding: UTF-8
 Language: en-US
-LazyData: true
 SystemRequirements: C++11; for AWS S3 support on Linux, libcurl and openssl (optional)
 Biarch: true
 Imports:

From 04b6de678f4b3e0e399144f38540d6b62327f72b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 15 Apr 2021 20:02:57 +0200
Subject: [PATCH 065/719] ARROW-12405: [Packaging] Fix apt artifact patterns
 and artifact uploading from travis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds: https://github.com/ursacomputing/crossbow/branches/all?query=build-242

Closes #10050 from kszucs/apt-crossbow-patterns

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/release/binary-task.rb                    |  2 +-
 .../linux-packages/travis.linux.arm64.yml     |  4 +-
 dev/tasks/macros.jinja                        |  4 +-
 .../python-wheels/travis.linux.arm64.yml      |  1 +
 dev/tasks/tasks.yml                           | 40 +++++++++----------
 5 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index babb8012f67..42bc1fe4766 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1316,7 +1316,7 @@ def define_apt_rc_tasks
 
         desc "Update RC APT repositories"
         task :update do
-          apt_update(apt_rc_repositiries_dir)
+          apt_update(apt_rc_repositories_dir)
           apt_targets.each do |distribution, code_name, component|
             base_dir = "#{apt_rc_repositories_dir}/#{distribution}"
             dists_dir = "#{base_dir}/dists/#{code_name}"
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index cf8bed3f91f..6078942e737 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -68,6 +68,7 @@ env:
     - YUM_TARGETS={{ target }}
 
 before_script:
+  - set -e
   {{ macros.travis_checkout_arrow() }}
   {{ macros.travis_docker_login() }}
 
@@ -144,4 +145,5 @@ script:
   - popd
 
 after_success:
-  {{ macros.travis_upload_releases(upload_extensions) }}
+  {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/**/*{}") %}
+  {{ macros.travis_upload_releases(patterns) }}
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index db1b64cd649..bfbd6ec2588 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -170,9 +170,9 @@ on:
 
 {%- macro travis_upload_releases(pattern) -%}
   - sudo -H pip3 install pygit2==1.0
-  - sudo -H pip3 install arrow/dev/archery[crossbow]
+  - sudo -H pip3 install -e arrow/dev/archery[crossbow]
   - |
-    archery crossbow
+    archery crossbow \
       --queue-path $(pwd) \
       --queue-remote {{ queue_remote_url }} \
       upload-artifacts \
diff --git a/dev/tasks/python-wheels/travis.linux.arm64.yml b/dev/tasks/python-wheels/travis.linux.arm64.yml
index 137ad6b2a56..a5c0f7408d2 100644
--- a/dev/tasks/python-wheels/travis.linux.arm64.yml
+++ b/dev/tasks/python-wheels/travis.linux.arm64.yml
@@ -45,6 +45,7 @@ env:
     - PYTHON={{ python_version }}
 
 before_script:
+  - set -e
   {{ macros.travis_checkout_arrow() }}
   {{ macros.travis_docker_login() }}
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 983475226a2..eab3e15ce92 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -448,10 +448,10 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
+      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+      - apache-arrow-apt-source_{no_rc_version}-1.dsc
+      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
@@ -578,10 +578,10 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
+      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+      - apache-arrow-apt-source_{no_rc_version}-1.dsc
+      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
@@ -710,10 +710,10 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
+      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+      - apache-arrow-apt-source_{no_rc_version}-1.dsc
+      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
@@ -816,10 +816,10 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
+      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+      - apache-arrow-apt-source_{no_rc_version}-1.dsc
+      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
@@ -922,10 +922,10 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
-      - apache-arrow-archive-keyring_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-archive-keyring_{no_rc_version}-1.dsc
-      - apache-arrow-archive-keyring_{no_rc_version}-1_all.deb
-      - apache-arrow-archive-keyring_{no_rc_version}.orig.tar.gz
+      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
+      - apache-arrow-apt-source_{no_rc_version}-1.dsc
+      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
+      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz

From 45847e1be53bc696538401347f8170a6c8b515aa Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 15 Apr 2021 11:03:54 -0700
Subject: [PATCH 066/719] ARROW-12389: [R] [Docs] Add note about autocasting

Add a note about strict(er) comparison logic

Closes #10034 from jonkeane/ARROW-12389

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 312f99e3f41..c064ac749fe 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -37,6 +37,7 @@ Over 100 functions can now be called on Arrow objects inside a `dplyr` verb:
 * `cast(x, type)` and `dictionary_encode()` allow changing the type of columns in Arrow objects; `as.numeric()`, `as.character()`, etc. are exposed as similar type-altering conveniences
 * `dplyr::between()`; the Arrow version also allows the `left` and `right` arguments to be columns in the data and not just scalars
 * Additionally, any Arrow C++ compute function can be called inside a `dplyr` verb. This enables you to access Arrow functions that don't have a direct R mapping. See `list_compute_functions()` for all available functions, which are available in `dplyr` prefixed by `arrow_`.
+* Arrow C++ compute functions now do more systematic type promotion when called on data with different types (e.g. int32 and float64). Previously, Scalars in an expressions were always cast to match the type of the corresponding Array, so this new type promotion enables, among other things, operations on two columns (Arrays) in a dataset. As a side effect, some comparisons that worked in prior versions are no longer supported: for example, `dplyr::filter(arrow_dataset, string_column == 3)` will error with a message about the type mismatch between the numeric `3` and the string type of `string_column`.
 
 ## Datasets
 

From 49455ec010b0627cda9dfb5b2f8ee8285a3bb2a1 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 15 Apr 2021 20:07:16 +0200
Subject: [PATCH 067/719] ARROW-12367: [C++] Stop producing when PushGenerator
 was destroyed

When a PushGenerator is lost but its Producer is left dangling, avoid pushing values and inform the caller about it.

Closes #10055 from pitrou/ARROW-12367-push-generator-lost

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc           |  6 +-
 cpp/src/arrow/util/async_generator.h       | 69 +++++++++++++++-------
 cpp/src/arrow/util/async_generator_test.cc | 61 +++++++++++++------
 3 files changed, 96 insertions(+), 40 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 96a88660655..ab6c8fad92a 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1572,7 +1572,11 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
                            error);
     };
 
-    auto handle_recursion = [select, self](int32_t nesting_depth) -> Result<bool> {
+    auto handle_recursion = [producer, select,
+                             self](int32_t nesting_depth) -> Result<bool> {
+      if (producer.is_closed()) {
+        return false;
+      }
       RETURN_NOT_OK(self->CheckNestingDepth(nesting_depth));
       return select.recursive && nesting_depth <= select.max_recursion;
     };
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 304a50c1408..f274478fd75 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -766,23 +766,33 @@ class PushGenerator {
   /// Producer API for PushGenerator
   class Producer {
    public:
-    explicit Producer(std::shared_ptr<State> state) : state_(std::move(state)) {}
+    explicit Producer(const std::shared_ptr<State> state) : weak_state_(state) {}
 
-    /// Push a value on the queue
-    void Push(Result<T> result) {
-      auto lock = state_->mutex.Lock();
-      if (state_->finished) {
+    /// \brief Push a value on the queue
+    ///
+    /// True is returned if the value was pushed, false if the generator is
+    /// already closed or destroyed.  If the latter, it is recommended to stop
+    /// producing any further values.
+    bool Push(Result<T> result) {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return false;
+      }
+      auto lock = state->mutex.Lock();
+      if (state->finished) {
         // Closed early
-        return;
+        return false;
       }
-      if (state_->consumer_fut.has_value()) {
-        auto fut = std::move(state_->consumer_fut.value());
-        state_->consumer_fut.reset();
+      if (state->consumer_fut.has_value()) {
+        auto fut = std::move(state->consumer_fut.value());
+        state->consumer_fut.reset();
         lock.Unlock();  // unlock before potentially invoking a callback
         fut.MarkFinished(std::move(result));
-        return;
+      } else {
+        state->result_q.push_back(std::move(result));
       }
-      state_->result_q.push_back(std::move(result));
+      return true;
     }
 
     /// \brief Tell the consumer we have finished producing
@@ -790,28 +800,43 @@ class PushGenerator {
     /// It is allowed to call this and later call Push() again ("early close").
     /// In this case, calls to Push() after the queue is closed are silently
     /// ignored.  This can help implementing non-trivial cancellation cases.
-    void Close() {
-      auto lock = state_->mutex.Lock();
-      if (state_->finished) {
+    ///
+    /// True is returned on success, false if the generator is already closed
+    /// or destroyed.
+    bool Close() {
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return false;
+      }
+      auto lock = state->mutex.Lock();
+      if (state->finished) {
         // Already closed
-        return;
+        return false;
       }
-      state_->finished = true;
-      if (state_->consumer_fut.has_value()) {
-        auto fut = std::move(state_->consumer_fut.value());
-        state_->consumer_fut.reset();
+      state->finished = true;
+      if (state->consumer_fut.has_value()) {
+        auto fut = std::move(state->consumer_fut.value());
+        state->consumer_fut.reset();
         lock.Unlock();  // unlock before potentially invoking a callback
         fut.MarkFinished(IterationTraits<T>::End());
       }
+      return true;
     }
 
+    /// Return whether the generator was closed or destroyed.
     bool is_closed() const {
-      auto lock = state_->mutex.Lock();
-      return state_->finished;
+      auto state = weak_state_.lock();
+      if (!state) {
+        // Generator was destroyed
+        return true;
+      }
+      auto lock = state->mutex.Lock();
+      return state->finished;
     }
 
    private:
-    const std::shared_ptr<State> state_;
+    const std::weak_ptr<State> weak_state_;
   };
 
   PushGenerator() : state_(std::make_shared<State>()) {}
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 3aae6087c1d..38f71ba6c6a 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -26,6 +26,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/async_generator.h"
+#include "arrow/util/optional.h"
 #include "arrow/util/test_common.h"
 #include "arrow/util/vector.h"
 
@@ -1270,13 +1271,16 @@ TEST(PushGenerator, Empty) {
 
   auto fut = gen();
   AssertNotFinished(fut);
-  producer.Close();
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), fut);
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
 
   // Close idempotent
   fut = gen();
-  producer.Close();
+  ASSERT_FALSE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), fut);
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
@@ -1287,8 +1291,8 @@ TEST(PushGenerator, Success) {
   auto producer = gen.producer();
   std::vector<Future<TestInt>> futures;
 
-  producer.Push(TestInt{1});
-  producer.Push(TestInt{2});
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_TRUE(producer.Push(TestInt{2}));
   for (int i = 0; i < 3; ++i) {
     futures.push_back(gen());
   }
@@ -1296,13 +1300,16 @@ TEST(PushGenerator, Success) {
   ASSERT_FINISHES_OK_AND_EQ(TestInt{2}, futures[1]);
   AssertNotFinished(futures[2]);
 
-  producer.Push(TestInt{3});
+  ASSERT_TRUE(producer.Push(TestInt{3}));
   ASSERT_FINISHES_OK_AND_EQ(TestInt{3}, futures[2]);
-  producer.Push(TestInt{4});
+  ASSERT_TRUE(producer.Push(TestInt{4}));
   futures.push_back(gen());
   ASSERT_FINISHES_OK_AND_EQ(TestInt{4}, futures[3]);
-  producer.Push(TestInt{5});
-  producer.Close();
+  ASSERT_TRUE(producer.Push(TestInt{5}));
+
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   for (int i = 0; i < 4; ++i) {
     futures.push_back(gen());
   }
@@ -1318,8 +1325,8 @@ TEST(PushGenerator, Errors) {
   auto producer = gen.producer();
   std::vector<Future<TestInt>> futures;
 
-  producer.Push(TestInt{1});
-  producer.Push(Status::Invalid("2"));
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_TRUE(producer.Push(Status::Invalid("2")));
   for (int i = 0; i < 3; ++i) {
     futures.push_back(gen());
   }
@@ -1327,12 +1334,15 @@ TEST(PushGenerator, Errors) {
   ASSERT_FINISHES_AND_RAISES(Invalid, futures[1]);
   AssertNotFinished(futures[2]);
 
-  producer.Push(Status::IOError("3"));
-  producer.Push(TestInt{4});
+  ASSERT_TRUE(producer.Push(Status::IOError("3")));
+  ASSERT_TRUE(producer.Push(TestInt{4}));
   ASSERT_FINISHES_AND_RAISES(IOError, futures[2]);
   futures.push_back(gen());
   ASSERT_FINISHES_OK_AND_EQ(TestInt{4}, futures[3]);
-  producer.Close();
+
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
 }
 
@@ -1341,13 +1351,17 @@ TEST(PushGenerator, CloseEarly) {
   auto producer = gen.producer();
   std::vector<Future<TestInt>> futures;
 
-  producer.Push(TestInt{1});
-  producer.Push(TestInt{2});
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_TRUE(producer.Push(TestInt{2}));
   for (int i = 0; i < 3; ++i) {
     futures.push_back(gen());
   }
-  producer.Close();
-  producer.Push(TestInt{3});
+  ASSERT_FALSE(producer.is_closed());
+  ASSERT_TRUE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
+  ASSERT_FALSE(producer.Push(TestInt{3}));
+  ASSERT_FALSE(producer.Close());
+  ASSERT_TRUE(producer.is_closed());
 
   ASSERT_FINISHES_OK_AND_EQ(TestInt{1}, futures[0]);
   ASSERT_FINISHES_OK_AND_EQ(TestInt{2}, futures[1]);
@@ -1355,6 +1369,19 @@ TEST(PushGenerator, CloseEarly) {
   ASSERT_FINISHES_OK_AND_EQ(IterationTraits<TestInt>::End(), gen());
 }
 
+TEST(PushGenerator, DanglingProducer) {
+  util::optional<PushGenerator<TestInt>> gen;
+  gen.emplace();
+  auto producer = gen->producer();
+
+  ASSERT_TRUE(producer.Push(TestInt{1}));
+  ASSERT_FALSE(producer.is_closed());
+  gen.reset();
+  ASSERT_TRUE(producer.is_closed());
+  ASSERT_FALSE(producer.Push(TestInt{2}));
+  ASSERT_FALSE(producer.Close());
+}
+
 TEST(PushGenerator, Stress) {
   const int NTHREADS = 20;
   const int NVALUES = 2000;

From ec6436e617211c0f91a8140699c6d83e13d6d971 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Thu, 15 Apr 2021 20:43:46 +0200
Subject: [PATCH 068/719] ARROW-12402: [Rust] [DataFusion] Implement SQL
 metrics example
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This introduces a new method on `ExecutionPlan` to be able to access generic metrics from any physical operator.

One metric is implemented to demonstrate usage.

Closes #10049 from andygrove/ARROW-12402

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .../src/physical_plan/hash_aggregate.rs       | 38 ++++++++++++++-
 rust/datafusion/src/physical_plan/mod.rs      | 46 +++++++++++++++++++
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/rust/datafusion/src/physical_plan/hash_aggregate.rs b/rust/datafusion/src/physical_plan/hash_aggregate.rs
index 1a4cb17ea39..b78e8bca550 100644
--- a/rust/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/rust/datafusion/src/physical_plan/hash_aggregate.rs
@@ -18,7 +18,7 @@
 //! Defines the execution plan for the hash aggregate operation
 
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};
 
 use ahash::RandomState;
@@ -28,7 +28,7 @@ use futures::{
 };
 
 use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr};
+use crate::physical_plan::{Accumulator, AggregateExpr, MetricType, SQLMetric};
 use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning, PhysicalExpr};
 
 use arrow::{
@@ -94,6 +94,8 @@ pub struct HashAggregateExec {
     /// same as input.schema() but for the final aggregate it will be the same as the input
     /// to the partial aggregate
     input_schema: SchemaRef,
+    /// Metric to track number of output rows
+    output_rows: Arc<Mutex<SQLMetric>>,
 }
 
 fn create_schema(
@@ -142,6 +144,11 @@ impl HashAggregateExec {
 
         let schema = Arc::new(schema);
 
+        let output_rows = Arc::new(Mutex::new(SQLMetric::new(
+            "outputRows",
+            MetricType::Counter,
+        )));
+
         Ok(HashAggregateExec {
             mode,
             group_expr,
@@ -149,6 +156,7 @@ impl HashAggregateExec {
             input,
             schema,
             input_schema,
+            output_rows,
         })
     }
 
@@ -223,6 +231,7 @@ impl ExecutionPlan for HashAggregateExec {
                 group_expr,
                 self.aggr_expr.clone(),
                 input,
+                self.output_rows.clone(),
             )))
         }
     }
@@ -244,6 +253,15 @@ impl ExecutionPlan for HashAggregateExec {
             )),
         }
     }
+
+    fn metrics(&self) -> HashMap<String, SQLMetric> {
+        let mut metrics = HashMap::new();
+        metrics.insert(
+            "outputRows".to_owned(),
+            self.output_rows.lock().unwrap().clone(),
+        );
+        metrics
+    }
 }
 
 /*
@@ -277,6 +295,7 @@ pin_project! {
         #[pin]
         output: futures::channel::oneshot::Receiver<ArrowResult<RecordBatch>>,
         finished: bool,
+        output_rows: Arc<Mutex<SQLMetric>>,
     }
 }
 
@@ -628,6 +647,7 @@ impl GroupedHashAggregateStream {
         group_expr: Vec<Arc<dyn PhysicalExpr>>,
         aggr_expr: Vec<Arc<dyn AggregateExpr>>,
         input: SendableRecordBatchStream,
+        output_rows: Arc<Mutex<SQLMetric>>,
     ) -> Self {
         let (tx, rx) = futures::channel::oneshot::channel();
 
@@ -648,6 +668,7 @@ impl GroupedHashAggregateStream {
             schema,
             output: rx,
             finished: false,
+            output_rows,
         }
     }
 }
@@ -667,6 +688,8 @@ impl Stream for GroupedHashAggregateStream {
             return Poll::Ready(None);
         }
 
+        let output_rows = self.output_rows.clone();
+
         // is the output ready?
         let this = self.project();
         let output_poll = this.output.poll(cx);
@@ -680,6 +703,12 @@ impl Stream for GroupedHashAggregateStream {
                     Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
                     Ok(result) => result,
                 };
+
+                if let Ok(batch) = &result {
+                    let mut output_rows = output_rows.lock().unwrap();
+                    output_rows.add(batch.num_rows())
+                }
+
                 Poll::Ready(Some(result))
             }
             Poll::Pending => Poll::Pending,
@@ -1255,6 +1284,11 @@ mod tests {
         ];
 
         assert_batches_sorted_eq!(&expected, &result);
+
+        let metrics = merged_aggregate.metrics();
+        let output_rows = metrics.get("outputRows").unwrap();
+        assert_eq!(3, output_rows.value());
+
         Ok(())
     }
 
diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs
index d529e98f75d..054d585e8e3 100644
--- a/rust/datafusion/src/physical_plan/mod.rs
+++ b/rust/datafusion/src/physical_plan/mod.rs
@@ -33,6 +33,7 @@ use async_trait::async_trait;
 use futures::stream::Stream;
 
 use self::merge::MergeExec;
+use hashbrown::HashMap;
 
 /// Trait for types that stream [arrow::record_batch::RecordBatch]
 pub trait RecordBatchStream: Stream<Item = ArrowResult<RecordBatch>> {
@@ -46,6 +47,46 @@ pub trait RecordBatchStream: Stream<Item = ArrowResult<RecordBatch>> {
 /// Trait for a stream of record batches.
 pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send + Sync>>;
 
+/// SQL metric type
+#[derive(Debug, Clone)]
+pub enum MetricType {
+    /// Simple counter
+    Counter,
+}
+
+/// SQL metric such as counter (number of input or output rows) or timing information about
+/// a physical operator.
+#[derive(Debug, Clone)]
+pub struct SQLMetric {
+    /// Metric name
+    name: String,
+    /// Metric value
+    value: usize,
+    /// Metric type
+    metric_type: MetricType,
+}
+
+impl SQLMetric {
+    /// Create a new SQLMetric
+    pub fn new(name: &str, metric_type: MetricType) -> Self {
+        Self {
+            name: name.to_owned(),
+            value: 0,
+            metric_type,
+        }
+    }
+
+    /// Add to the value
+    pub fn add(&mut self, n: usize) {
+        self.value += n;
+    }
+
+    /// Get the current value
+    pub fn value(&self) -> usize {
+        self.value
+    }
+}
+
 /// Physical query planner that converts a `LogicalPlan` to an
 /// `ExecutionPlan` suitable for execution.
 pub trait PhysicalPlanner {
@@ -84,6 +125,11 @@ pub trait ExecutionPlan: Debug + Send + Sync {
 
     /// creates an iterator
     async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream>;
+
+    /// Return a snapshot of the metrics collected during execution
+    fn metrics(&self) -> HashMap<String, SQLMetric> {
+        HashMap::new()
+    }
 }
 
 /// Execute the [ExecutionPlan] and collect the results in memory

From 958c19ac1b3dc392d6a4f0ec466c0cd9e554e738 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Thu, 15 Apr 2021 20:56:56 +0200
Subject: [PATCH 069/719] ARROW-12335: [Rust] [Ballista] Use latest DataFusion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates Ballista to use the most recent DataFusion version.

Changes made:

- Ballista overrides physical optimizer rules to remove `Repartition`
- Added serde support for new `TryCast` expression
- Updated DataFrame API usage to use `Vec<_>` instead of `&[_]`
- Renamed some timestamp scalar variants
- HashJoinExec updated to take new `CollectLeft` argument
- Removed hard-coded batch size from serde code for `CsvScanExec`

Closes #9991 from andygrove/ballista-bump-df-version

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 rust/ballista/.dockerignore                   | 18 ++++++++
 rust/ballista/rust/Cargo.toml                 |  6 +--
 rust/ballista/rust/benchmarks/tpch/Cargo.toml | 10 ++--
 rust/ballista/rust/client/Cargo.toml          |  8 +++-
 rust/ballista/rust/client/src/context.rs      | 14 ++++--
 rust/ballista/rust/core/Cargo.toml            | 10 ++--
 rust/ballista/rust/core/proto/ballista.proto  |  6 +++
 rust/ballista/rust/core/src/datasource.rs     |  1 +
 .../core/src/serde/logical_plan/from_proto.rs | 46 +++++++++++--------
 .../rust/core/src/serde/logical_plan/mod.rs   | 28 +++++------
 .../core/src/serde/logical_plan/to_proto.rs   | 14 ++----
 .../src/serde/physical_plan/from_proto.rs     | 24 +++++++---
 .../rust/core/src/serde/physical_plan/mod.rs  |  2 +
 .../core/src/serde/physical_plan/to_proto.rs  | 13 +++++-
 rust/ballista/rust/executor/Cargo.toml        | 10 ++--
 rust/ballista/rust/scheduler/Cargo.toml       |  7 ++-
 rust/ballista/rust/scheduler/src/api/mod.rs   | 14 +++---
 rust/ballista/rust/scheduler/src/lib.rs       | 13 +++---
 rust/ballista/rust/scheduler/src/main.rs      |  8 ++--
 rust/ballista/rust/scheduler/src/planner.rs   | 13 +++++-
 .../ballista/rust/scheduler/src/test_utils.rs | 16 ++++++-
 21 files changed, 189 insertions(+), 92 deletions(-)
 create mode 100644 rust/ballista/.dockerignore

diff --git a/rust/ballista/.dockerignore b/rust/ballista/.dockerignore
new file mode 100644
index 00000000000..3cde49e0a0c
--- /dev/null
+++ b/rust/ballista/.dockerignore
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+rust/**/target
diff --git a/rust/ballista/rust/Cargo.toml b/rust/ballista/rust/Cargo.toml
index d1f588f3bd7..5e344e004b8 100644
--- a/rust/ballista/rust/Cargo.toml
+++ b/rust/ballista/rust/Cargo.toml
@@ -25,6 +25,6 @@ members = [
     "scheduler",
 ]
 
-[profile.release]
-lto = true
-codegen-units = 1
+#[profile.release]
+#lto = true
+#codegen-units = 1
diff --git a/rust/ballista/rust/benchmarks/tpch/Cargo.toml b/rust/ballista/rust/benchmarks/tpch/Cargo.toml
index 55a0fe1330c..8c37f8898fc 100644
--- a/rust/ballista/rust/benchmarks/tpch/Cargo.toml
+++ b/rust/ballista/rust/benchmarks/tpch/Cargo.toml
@@ -27,9 +27,13 @@ edition = "2018"
 [dependencies]
 ballista = { path="../../client" }
 
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
-parquet = { git = "https://github.com/apache/arrow", rev="46161d2" }
+#arrow = { path = "../../../../arrow"  }
+#datafusion = { path = "../../../../datafusion" }
+#parquet = { path = "../../../../parquet"  }
+
+arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+parquet = { git = "https://github.com/apache/arrow", rev="fe83dca" }
 
 
 env_logger = "0.8"
diff --git a/rust/ballista/rust/client/Cargo.toml b/rust/ballista/rust/client/Cargo.toml
index 966e2dcbb31..8ee5d427bae 100644
--- a/rust/ballista/rust/client/Cargo.toml
+++ b/rust/ballista/rust/client/Cargo.toml
@@ -30,5 +30,9 @@ ballista-core = { path = "../core" }
 futures = "0.3"
 log = "0.4"
 tokio = "1.0"
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
+
+#arrow = { path = "../../../arrow"  }
+#datafusion = { path = "../../../datafusion" }
+
+arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
diff --git a/rust/ballista/rust/client/src/context.rs b/rust/ballista/rust/client/src/context.rs
index 8b2431f56c2..0556c2948da 100644
--- a/rust/ballista/rust/client/src/context.rs
+++ b/rust/ballista/rust/client/src/context.rs
@@ -36,6 +36,7 @@ use ballista_core::{
 };
 
 use arrow::datatypes::Schema;
+use datafusion::catalog::TableReference;
 use datafusion::execution::context::ExecutionContext;
 use datafusion::logical_plan::{DFSchema, Expr, LogicalPlan, Partitioning};
 use datafusion::physical_plan::csv::CsvReadOptions;
@@ -148,7 +149,10 @@ impl BallistaContext {
         for (name, plan) in &state.tables {
             let plan = ctx.optimize(plan)?;
             let execution_plan = ctx.create_physical_plan(&plan)?;
-            ctx.register_table(name, Arc::new(DFTableAdapter::new(plan, execution_plan)));
+            ctx.register_table(
+                TableReference::Bare { table: name },
+                Arc::new(DFTableAdapter::new(plan, execution_plan)),
+            )?;
         }
         let df = ctx.sql(sql)?;
         Ok(BallistaDataFrame::from(self.state.clone(), df))
@@ -267,7 +271,7 @@ impl BallistaDataFrame {
         ))
     }
 
-    pub fn select(&self, expr: &[Expr]) -> Result<BallistaDataFrame> {
+    pub fn select(&self, expr: Vec<Expr>) -> Result<BallistaDataFrame> {
         Ok(Self::from(
             self.state.clone(),
             self.df.select(expr).map_err(BallistaError::from)?,
@@ -283,8 +287,8 @@ impl BallistaDataFrame {
 
     pub fn aggregate(
         &self,
-        group_expr: &[Expr],
-        aggr_expr: &[Expr],
+        group_expr: Vec<Expr>,
+        aggr_expr: Vec<Expr>,
     ) -> Result<BallistaDataFrame> {
         Ok(Self::from(
             self.state.clone(),
@@ -301,7 +305,7 @@ impl BallistaDataFrame {
         ))
     }
 
-    pub fn sort(&self, expr: &[Expr]) -> Result<BallistaDataFrame> {
+    pub fn sort(&self, expr: Vec<Expr>) -> Result<BallistaDataFrame> {
         Ok(Self::from(
             self.state.clone(),
             self.df.sort(expr).map_err(BallistaError::from)?,
diff --git a/rust/ballista/rust/core/Cargo.toml b/rust/ballista/rust/core/Cargo.toml
index f5f6f8574b3..60c38725bf7 100644
--- a/rust/ballista/rust/core/Cargo.toml
+++ b/rust/ballista/rust/core/Cargo.toml
@@ -39,10 +39,14 @@ sqlparser = "0.8"
 tokio = "1.0"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-arrow-flight = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
 
+#arrow = { path = "../../../arrow"  }
+#arrow-flight = { path = "../../../arrow-flight"  }
+#datafusion = { path = "../../../datafusion" }
+
+arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+arrow-flight = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
 
 [dev-dependencies]
 
diff --git a/rust/ballista/rust/core/proto/ballista.proto b/rust/ballista/rust/core/proto/ballista.proto
index ff0727b7887..5733921bc92 100644
--- a/rust/ballista/rust/core/proto/ballista.proto
+++ b/rust/ballista/rust/core/proto/ballista.proto
@@ -59,6 +59,7 @@ message LogicalExprNode {
     InListNode in_list = 14;
     bool wildcard = 15;
     ScalarFunctionNode scalar_function = 16;
+    TryCastNode try_cast = 17;
   }
 }
 
@@ -172,6 +173,11 @@ message CastNode {
   ArrowType arrow_type = 2;
 }
 
+message TryCastNode {
+  LogicalExprNode expr = 1;
+  ArrowType arrow_type = 2;
+}
+
 message SortExprNode {
   LogicalExprNode expr = 1;
   bool asc = 2;
diff --git a/rust/ballista/rust/core/src/datasource.rs b/rust/ballista/rust/core/src/datasource.rs
index 531f63df40e..8ff0df44e4b 100644
--- a/rust/ballista/rust/core/src/datasource.rs
+++ b/rust/ballista/rust/core/src/datasource.rs
@@ -57,6 +57,7 @@ impl TableProvider for DFTableAdapter {
         _projection: &Option<Vec<usize>>,
         _batch_size: usize,
         _filters: &[Expr],
+        _limit: Option<usize>,
     ) -> DFResult<Arc<dyn ExecutionPlan>> {
         Ok(self.plan.clone())
     }
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
index 087ebdbf507..93084260662 100644
--- a/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
+++ b/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
@@ -52,14 +52,13 @@ impl TryInto<LogicalPlan> for &protobuf::LogicalPlanNode {
         match plan {
             LogicalPlanType::Projection(projection) => {
                 let input: LogicalPlan = convert_box_required!(projection.input)?;
+                let x: Vec<Expr> = projection
+                    .expr
+                    .iter()
+                    .map(|expr| expr.try_into())
+                    .collect::<Result<Vec<_>, _>>()?;
                 LogicalPlanBuilder::from(&input)
-                    .project(
-                        &projection
-                            .expr
-                            .iter()
-                            .map(|expr| expr.try_into())
-                            .collect::<Result<Vec<_>, _>>()?,
-                    )?
+                    .project(x)?
                     .build()
                     .map_err(|e| e.into())
             }
@@ -89,7 +88,7 @@ impl TryInto<LogicalPlan> for &protobuf::LogicalPlanNode {
                     .map(|expr| expr.try_into())
                     .collect::<Result<Vec<_>, _>>()?;
                 LogicalPlanBuilder::from(&input)
-                    .aggregate(&group_expr, &aggr_expr)?
+                    .aggregate(group_expr, aggr_expr)?
                     .build()
                     .map_err(|e| e.into())
             }
@@ -148,7 +147,7 @@ impl TryInto<LogicalPlan> for &protobuf::LogicalPlanNode {
                     .map(|expr| expr.try_into())
                     .collect::<Result<Vec<Expr>, _>>()?;
                 LogicalPlanBuilder::from(&input)
-                    .sort(&sort_expr)?
+                    .sort(sort_expr)?
                     .build()
                     .map_err(|e| e.into())
             }
@@ -511,10 +510,10 @@ fn typechecked_scalar_value_conversion(
             ScalarValue::Date32(Some(*v))
         }
         (Value::TimeMicrosecondValue(v), PrimitiveScalarType::TimeMicrosecond) => {
-            ScalarValue::TimeMicrosecond(Some(*v))
+            ScalarValue::TimestampMicrosecond(Some(*v))
         }
         (Value::TimeNanosecondValue(v), PrimitiveScalarType::TimeMicrosecond) => {
-            ScalarValue::TimeNanosecond(Some(*v))
+            ScalarValue::TimestampNanosecond(Some(*v))
         }
         (Value::Utf8Value(v), PrimitiveScalarType::Utf8) => {
             ScalarValue::Utf8(Some(v.to_owned()))
@@ -547,10 +546,10 @@ fn typechecked_scalar_value_conversion(
                     PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None),
                     PrimitiveScalarType::Date32 => ScalarValue::Date32(None),
                     PrimitiveScalarType::TimeMicrosecond => {
-                        ScalarValue::TimeMicrosecond(None)
+                        ScalarValue::TimestampMicrosecond(None)
                     }
                     PrimitiveScalarType::TimeNanosecond => {
-                        ScalarValue::TimeNanosecond(None)
+                        ScalarValue::TimestampNanosecond(None)
                     }
                     PrimitiveScalarType::Null => {
                         return Err(proto_error(
@@ -610,10 +609,10 @@ impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::scalar_value::Value
                 ScalarValue::Date32(Some(*v))
             }
             protobuf::scalar_value::Value::TimeMicrosecondValue(v) => {
-                ScalarValue::TimeMicrosecond(Some(*v))
+                ScalarValue::TimestampMicrosecond(Some(*v))
             }
             protobuf::scalar_value::Value::TimeNanosecondValue(v) => {
-                ScalarValue::TimeNanosecond(Some(*v))
+                ScalarValue::TimestampNanosecond(Some(*v))
             }
             protobuf::scalar_value::Value::ListValue(v) => v.try_into()?,
             protobuf::scalar_value::Value::NullListValue(v) => {
@@ -776,10 +775,10 @@ impl TryInto<datafusion::scalar::ScalarValue> for protobuf::PrimitiveScalarType
             protobuf::PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None),
             protobuf::PrimitiveScalarType::Date32 => ScalarValue::Date32(None),
             protobuf::PrimitiveScalarType::TimeMicrosecond => {
-                ScalarValue::TimeMicrosecond(None)
+                ScalarValue::TimestampMicrosecond(None)
             }
             protobuf::PrimitiveScalarType::TimeNanosecond => {
-                ScalarValue::TimeNanosecond(None)
+                ScalarValue::TimestampNanosecond(None)
             }
         })
     }
@@ -829,10 +828,10 @@ impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::ScalarValue {
                 ScalarValue::Date32(Some(*v))
             }
             protobuf::scalar_value::Value::TimeMicrosecondValue(v) => {
-                ScalarValue::TimeMicrosecond(Some(*v))
+                ScalarValue::TimestampMicrosecond(Some(*v))
             }
             protobuf::scalar_value::Value::TimeNanosecondValue(v) => {
-                ScalarValue::TimeNanosecond(Some(*v))
+                ScalarValue::TimestampNanosecond(Some(*v))
             }
             protobuf::scalar_value::Value::ListValue(scalar_list) => {
                 let protobuf::ScalarListValue {
@@ -962,6 +961,15 @@ impl TryInto<Expr> for &protobuf::LogicalExprNode {
                 let data_type = arrow_type.try_into()?;
                 Ok(Expr::Cast { expr, data_type })
             }
+            ExprType::TryCast(cast) => {
+                let expr = Box::new(parse_required_expr(&cast.expr)?);
+                let arrow_type: &protobuf::ArrowType = cast
+                    .arrow_type
+                    .as_ref()
+                    .ok_or_else(|| proto_error("Protobuf deserialization error: CastNode message missing required field 'arrow_type'"))?;
+                let data_type = arrow_type.try_into()?;
+                Ok(Expr::TryCast { expr, data_type })
+            }
             ExprType::Sort(sort) => Ok(Expr::Sort {
                 expr: Box::new(parse_required_expr(&sort.expr)?),
                 asc: sort.asc,
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/mod.rs b/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
index 50a529b6fa1..48dd96c4d3f 100644
--- a/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
+++ b/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
@@ -82,7 +82,7 @@ mod roundtrip_tests {
                 CsvReadOptions::new().schema(&schema).has_header(true),
                 Some(vec![3, 4]),
             )
-            .and_then(|plan| plan.sort(&[col("salary")]))
+            .and_then(|plan| plan.sort(vec![col("salary")]))
             .and_then(|plan| plan.build())
             .map_err(BallistaError::DataFusionError)?,
         );
@@ -212,8 +212,8 @@ mod roundtrip_tests {
             ScalarValue::LargeUtf8(None),
             ScalarValue::List(None, DataType::Boolean),
             ScalarValue::Date32(None),
-            ScalarValue::TimeMicrosecond(None),
-            ScalarValue::TimeNanosecond(None),
+            ScalarValue::TimestampMicrosecond(None),
+            ScalarValue::TimestampNanosecond(None),
             ScalarValue::Boolean(Some(true)),
             ScalarValue::Boolean(Some(false)),
             ScalarValue::Float32(Some(1.0)),
@@ -252,11 +252,11 @@ mod roundtrip_tests {
             ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))),
             ScalarValue::Date32(Some(0)),
             ScalarValue::Date32(Some(i32::MAX)),
-            ScalarValue::TimeNanosecond(Some(0)),
-            ScalarValue::TimeNanosecond(Some(i64::MAX)),
-            ScalarValue::TimeMicrosecond(Some(0)),
-            ScalarValue::TimeMicrosecond(Some(i64::MAX)),
-            ScalarValue::TimeMicrosecond(None),
+            ScalarValue::TimestampNanosecond(Some(0)),
+            ScalarValue::TimestampNanosecond(Some(i64::MAX)),
+            ScalarValue::TimestampMicrosecond(Some(0)),
+            ScalarValue::TimestampMicrosecond(Some(i64::MAX)),
+            ScalarValue::TimestampMicrosecond(None),
             ScalarValue::List(
                 Some(vec![
                     ScalarValue::Float32(Some(-213.1)),
@@ -610,8 +610,8 @@ mod roundtrip_tests {
             ScalarValue::Utf8(None),
             ScalarValue::LargeUtf8(None),
             ScalarValue::Date32(None),
-            ScalarValue::TimeMicrosecond(None),
-            ScalarValue::TimeNanosecond(None),
+            ScalarValue::TimestampMicrosecond(None),
+            ScalarValue::TimestampNanosecond(None),
             //ScalarValue::List(None, DataType::Boolean)
         ];
 
@@ -679,7 +679,7 @@ mod roundtrip_tests {
             CsvReadOptions::new().schema(&schema).has_header(true),
             Some(vec![3, 4]),
         )
-        .and_then(|plan| plan.sort(&[col("salary")]))
+        .and_then(|plan| plan.sort(vec![col("salary")]))
         .and_then(|plan| plan.explain(true))
         .and_then(|plan| plan.build())
         .map_err(BallistaError::DataFusionError)?;
@@ -689,7 +689,7 @@ mod roundtrip_tests {
             CsvReadOptions::new().schema(&schema).has_header(true),
             Some(vec![3, 4]),
         )
-        .and_then(|plan| plan.sort(&[col("salary")]))
+        .and_then(|plan| plan.sort(vec![col("salary")]))
         .and_then(|plan| plan.explain(false))
         .and_then(|plan| plan.build())
         .map_err(BallistaError::DataFusionError)?;
@@ -742,7 +742,7 @@ mod roundtrip_tests {
             CsvReadOptions::new().schema(&schema).has_header(true),
             Some(vec![3, 4]),
         )
-        .and_then(|plan| plan.sort(&[col("salary")]))
+        .and_then(|plan| plan.sort(vec![col("salary")]))
         .and_then(|plan| plan.build())
         .map_err(BallistaError::DataFusionError)?;
         roundtrip_test!(plan);
@@ -784,7 +784,7 @@ mod roundtrip_tests {
             CsvReadOptions::new().schema(&schema).has_header(true),
             Some(vec![3, 4]),
         )
-        .and_then(|plan| plan.aggregate(&[col("state")], &[max(col("salary"))]))
+        .and_then(|plan| plan.aggregate(vec![col("state")], vec![max(col("salary"))]))
         .and_then(|plan| plan.build())
         .map_err(BallistaError::DataFusionError)?;
 
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
index 69b53502fc9..a181f98b6eb 100644
--- a/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
+++ b/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
@@ -641,12 +641,12 @@ impl TryFrom<&datafusion::scalar::ScalarValue> for protobuf::ScalarValue {
             datafusion::scalar::ScalarValue::Date32(val) => {
                 create_proto_scalar(val, PrimitiveScalarType::Date32, |s| Value::Date32Value(*s))
             }
-            datafusion::scalar::ScalarValue::TimeMicrosecond(val) => {
+            datafusion::scalar::ScalarValue::TimestampMicrosecond(val) => {
                 create_proto_scalar(val, PrimitiveScalarType::TimeMicrosecond, |s| {
                     Value::TimeMicrosecondValue(*s)
                 })
             }
-            datafusion::scalar::ScalarValue::TimeNanosecond(val) => {
+            datafusion::scalar::ScalarValue::TimestampNanosecond(val) => {
                 create_proto_scalar(val, PrimitiveScalarType::TimeNanosecond, |s| {
                     Value::TimeNanosecondValue(*s)
                 })
@@ -939,10 +939,7 @@ impl TryInto<protobuf::LogicalPlanNode> for &LogicalPlan {
                 })
             }
             LogicalPlan::Extension { .. } => unimplemented!(),
-            // _ => Err(BallistaError::General(format!(
-            //     "logical plan to_proto {:?}",
-            //     self
-            // ))),
+            LogicalPlan::Union { .. } => unimplemented!(),
         }
     }
 }
@@ -1161,10 +1158,7 @@ impl TryInto<protobuf::LogicalExprNode> for &Expr {
             Expr::Wildcard => Ok(protobuf::LogicalExprNode {
                 expr_type: Some(protobuf::logical_expr_node::ExprType::Wildcard(true)),
             }),
-            // _ => Err(BallistaError::General(format!(
-            //     "logical expr to_proto {:?}",
-            //     self
-            // ))),
+            Expr::TryCast { .. } => unimplemented!(),
         }
     }
 }
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
index cb04a3e8196..be0777dbb9a 100644
--- a/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
+++ b/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
@@ -30,11 +30,15 @@ use crate::serde::{proto_error, protobuf};
 use crate::{convert_box_required, convert_required};
 
 use arrow::datatypes::{DataType, Schema, SchemaRef};
+use datafusion::catalog::catalog::{
+    CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider,
+};
 use datafusion::execution::context::{ExecutionConfig, ExecutionContextState};
 use datafusion::logical_plan::{DFSchema, Expr};
 use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateFunction};
 use datafusion::physical_plan::expressions::col;
 use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
+use datafusion::physical_plan::hash_join::PartitionMode;
 use datafusion::physical_plan::merge::MergeExec;
 use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
 use datafusion::physical_plan::{
@@ -102,15 +106,13 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
                     .file_extension(&scan.file_extension)
                     .delimiter(scan.delimiter.as_bytes()[0])
                     .schema(&schema);
-                // TODO we don't care what the DataFusion batch size was because Ballista will
-                // have its own configs. Hard-code for now.
-                let batch_size = 32768;
                 let projection = scan.projection.iter().map(|i| *i as usize).collect();
                 Ok(Arc::new(CsvExec::try_new(
                     &scan.path,
                     options,
                     Some(projection),
-                    batch_size,
+                    scan.batch_size as usize,
+                    None,
                 )?))
             }
             PhysicalPlanType::ParquetScan(scan) => {
@@ -123,6 +125,7 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
                     None,
                     scan.batch_size as usize,
                     scan.num_partitions as usize,
+                    None,
                 )?))
             }
             PhysicalPlanType::CoalesceBatches(coalesce_batches) => {
@@ -215,8 +218,10 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
                     .collect::<Result<Vec<_>, _>>()?;
 
                 let df_planner = DefaultPhysicalPlanner::default();
+                let catalog_list =
+                    Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
                 let ctx_state = ExecutionContextState {
-                    datasources: Default::default(),
+                    catalog_list,
                     scalar_functions: Default::default(),
                     var_provider: Default::default(),
                     aggregate_functions: Default::default(),
@@ -294,7 +299,11 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
                     protobuf::JoinType::Right => JoinType::Right,
                 };
                 Ok(Arc::new(HashJoinExec::try_new(
-                    left, right, &on, &join_type,
+                    left,
+                    right,
+                    &on,
+                    &join_type,
+                    PartitionMode::CollectLeft,
                 )?))
             }
             PhysicalPlanType::ShuffleReader(shuffle_reader) => {
@@ -374,8 +383,9 @@ fn compile_expr(
     schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>, BallistaError> {
     let df_planner = DefaultPhysicalPlanner::default();
+    let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
     let state = ExecutionContextState {
-        datasources: HashMap::new(),
+        catalog_list,
         scalar_functions: HashMap::new(),
         var_provider: HashMap::new(),
         aggregate_functions: HashMap::new(),
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/mod.rs b/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
index a6f146c7384..e7985cc84a9 100644
--- a/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
+++ b/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
@@ -40,6 +40,7 @@ mod roundtrip_tests {
 
     use super::super::super::error::Result;
     use super::super::protobuf;
+    use datafusion::physical_plan::hash_join::PartitionMode;
 
     fn roundtrip_test(exec_plan: Arc<dyn ExecutionPlan>) -> Result<()> {
         let proto: protobuf::PhysicalPlanNode = exec_plan.clone().try_into()?;
@@ -84,6 +85,7 @@ mod roundtrip_tests {
             Arc::new(EmptyExec::new(false, Arc::new(schema_right))),
             &[("col".to_string(), "col".to_string())],
             &JoinType::Inner,
+            PartitionMode::CollectLeft,
         )?))
     }
 
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs b/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
index 24c69c4692a..5352c1f7775 100644
--- a/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
+++ b/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
@@ -28,10 +28,10 @@ use std::{
 
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion::physical_plan::csv::CsvExec;
-use datafusion::physical_plan::expressions::CastExpr;
 use datafusion::physical_plan::expressions::{
     CaseExpr, InListExpr, IsNotNullExpr, IsNullExpr, NegativeExpr, NotExpr,
 };
+use datafusion::physical_plan::expressions::{CastExpr, TryCastExpr};
 use datafusion::physical_plan::filter::FilterExec;
 use datafusion::physical_plan::hash_aggregate::AggregateMode;
 use datafusion::physical_plan::hash_join::HashJoinExec;
@@ -236,7 +236,7 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
                         schema: Some(exec.file_schema().as_ref().into()),
                         has_header: exec.has_header(),
                         delimiter: delimiter.to_string(),
-                        batch_size: 32768,
+                        batch_size: exec.batch_size() as u32,
                     },
                 )),
             })
@@ -510,6 +510,15 @@ impl TryFrom<Arc<dyn PhysicalExpr>> for protobuf::LogicalExprNode {
                     },
                 ))),
             })
+        } else if let Some(cast) = expr.downcast_ref::<TryCastExpr>() {
+            Ok(protobuf::LogicalExprNode {
+                expr_type: Some(protobuf::logical_expr_node::ExprType::TryCast(
+                    Box::new(protobuf::TryCastNode {
+                        expr: Some(Box::new(cast.expr().clone().try_into()?)),
+                        arrow_type: Some(cast.cast_type().into()),
+                    }),
+                )),
+            })
         } else if let Some(expr) = expr.downcast_ref::<ScalarFunctionExpr>() {
             let fun: BuiltinScalarFunction =
                 BuiltinScalarFunction::from_str(expr.name())?;
diff --git a/rust/ballista/rust/executor/Cargo.toml b/rust/ballista/rust/executor/Cargo.toml
index 743b62cc100..beed860fd94 100644
--- a/rust/ballista/rust/executor/Cargo.toml
+++ b/rust/ballista/rust/executor/Cargo.toml
@@ -45,9 +45,13 @@ tokio-stream = "0.1"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-arrow-flight = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
+#arrow = { path = "../../../arrow"  }
+#arrow-flight = { path = "../../../arrow-flight"  }
+#datafusion = { path = "../../../datafusion" }
+
+arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+arrow-flight = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
 
 [dev-dependencies]
 
diff --git a/rust/ballista/rust/scheduler/Cargo.toml b/rust/ballista/rust/scheduler/Cargo.toml
index b0213d37bda..57342dd633e 100644
--- a/rust/ballista/rust/scheduler/Cargo.toml
+++ b/rust/ballista/rust/scheduler/Cargo.toml
@@ -52,8 +52,11 @@ tonic = "0.4"
 tower = { version = "0.4" }
 warp = "0.3"
 
-arrow = { git = "https://github.com/apache/arrow", rev="46161d2" }
-datafusion = { git = "https://github.com/apache/arrow", rev="46161d2" }
+#arrow = { path = "../../../arrow"  }
+#datafusion = { path = "../../../datafusion" }
+
+arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
 
 [dev-dependencies]
 ballista-core = { path = "../core" }
diff --git a/rust/ballista/rust/scheduler/src/api/mod.rs b/rust/ballista/rust/scheduler/src/api/mod.rs
index 29c5cb1af67..9e14378564a 100644
--- a/rust/ballista/rust/scheduler/src/api/mod.rs
+++ b/rust/ballista/rust/scheduler/src/api/mod.rs
@@ -30,11 +30,11 @@ pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 pub type HttpBody = dyn http_body::Body<Data = dyn Buf, Error = Error> + 'static;
 
 impl<A, B> http_body::Body for EitherBody<A, B>
-    where
-        A: http_body::Body + Send + Unpin,
-        B: http_body::Body<Data = A::Data> + Send + Unpin,
-        A::Error: Into<Error>,
-        B::Error: Into<Error>,
+where
+    A: http_body::Body + Send + Unpin,
+    B: http_body::Body<Data = A::Data> + Send + Unpin,
+    A::Error: Into<Error>,
+    B::Error: Into<Error>,
 {
     type Data = A::Data;
     type Error = Error;
@@ -67,7 +67,9 @@ impl<A, B> http_body::Body for EitherBody<A, B>
     }
 }
 
-fn map_option_err<T, U: Into<Error>>(err: Option<Result<T, U>>) -> Option<Result<T, Error>> {
+fn map_option_err<T, U: Into<Error>>(
+    err: Option<Result<T, U>>,
+) -> Option<Result<T, Error>> {
     err.map(|e| e.map_err(Into::into))
 }
 
diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
index 6df6c9ac57c..1bd4722e5cb 100644
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ b/rust/ballista/rust/scheduler/src/lib.rs
@@ -201,12 +201,13 @@ impl SchedulerGrpc for SchedulerServer {
 
         match file_type {
             FileType::Parquet => {
-                let parquet_exec = ParquetExec::try_from_path(&path, None, None, 1024, 1)
-                    .map_err(|e| {
-                        let msg = format!("Error opening parquet files: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
+                let parquet_exec =
+                    ParquetExec::try_from_path(&path, None, None, 1024, 1, None)
+                        .map_err(|e| {
+                            let msg = format!("Error opening parquet files: {}", e);
+                            error!("{}", msg);
+                            tonic::Status::internal(msg)
+                        })?;
 
                 //TODO include statistics and any other info needed to reconstruct ParquetExec
                 Ok(Response::new(GetFileMetadataResult {
diff --git a/rust/ballista/rust/scheduler/src/main.rs b/rust/ballista/rust/scheduler/src/main.rs
index c166fdc388d..6f746292f65 100644
--- a/rust/ballista/rust/scheduler/src/main.rs
+++ b/rust/ballista/rust/scheduler/src/main.rs
@@ -29,12 +29,12 @@ use ballista_core::BALLISTA_VERSION;
 use ballista_core::{
     print_version, serde::protobuf::scheduler_grpc_server::SchedulerGrpcServer,
 };
+use ballista_scheduler::api::{get_routes, EitherBody, Error};
 #[cfg(feature = "etcd")]
 use ballista_scheduler::state::EtcdClient;
 #[cfg(feature = "sled")]
 use ballista_scheduler::state::StandaloneClient;
 use ballista_scheduler::{state::ConfigBackendClient, ConfigBackend, SchedulerServer};
-use ballista_scheduler::api::{get_routes, EitherBody, Error};
 
 use log::info;
 
@@ -63,8 +63,10 @@ async fn start_server(
     );
     Ok(Server::bind(&addr)
         .serve(make_service_fn(move |_| {
-            let scheduler_server = SchedulerServer::new(config_backend.clone(), namespace.clone());
-            let scheduler_grpc_server = SchedulerGrpcServer::new(scheduler_server.clone());
+            let scheduler_server =
+                SchedulerServer::new(config_backend.clone(), namespace.clone());
+            let scheduler_grpc_server =
+                SchedulerGrpcServer::new(scheduler_server.clone());
 
             let mut tonic = TonicServer::builder()
                 .add_service(scheduler_grpc_server)
diff --git a/rust/ballista/rust/scheduler/src/planner.rs b/rust/ballista/rust/scheduler/src/planner.rs
index f06dcfdfcec..e9f668a7d5f 100644
--- a/rust/ballista/rust/scheduler/src/planner.rs
+++ b/rust/ballista/rust/scheduler/src/planner.rs
@@ -34,7 +34,10 @@ use ballista_core::{
     execution_plans::{QueryStageExec, ShuffleReaderExec, UnresolvedShuffleExec},
     serde::scheduler::PartitionLocation,
 };
-use datafusion::execution::context::ExecutionContext;
+use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
+use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches;
+use datafusion::physical_optimizer::merge_exec::AddMergeExec;
+use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule;
 use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
 use datafusion::physical_plan::hash_join::HashJoinExec;
 use datafusion::physical_plan::merge::MergeExec;
@@ -136,7 +139,13 @@ impl DistributedPlanner {
         }
 
         if let Some(adapter) = execution_plan.as_any().downcast_ref::<DFTableAdapter>() {
-            let ctx = ExecutionContext::new();
+            // remove Repartition rule because that isn't supported yet
+            let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
+                Arc::new(CoalesceBatches::new()),
+                Arc::new(AddMergeExec::new()),
+            ];
+            let config = ExecutionConfig::new().with_physical_optimizer_rules(rules);
+            let ctx = ExecutionContext::with_config(config);
             Ok((ctx.create_physical_plan(&adapter.logical_plan)?, stages))
         } else if let Some(merge) = execution_plan.as_any().downcast_ref::<MergeExec>() {
             let query_stage = create_query_stage(
diff --git a/rust/ballista/rust/scheduler/src/test_utils.rs b/rust/ballista/rust/scheduler/src/test_utils.rs
index 94397404777..330cc9a9332 100644
--- a/rust/ballista/rust/scheduler/src/test_utils.rs
+++ b/rust/ballista/rust/scheduler/src/test_utils.rs
@@ -15,10 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::sync::Arc;
+
 use ballista_core::error::Result;
 
 use arrow::datatypes::{DataType, Field, Schema};
-use datafusion::execution::context::ExecutionContext;
+use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
+use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches;
+use datafusion::physical_optimizer::merge_exec::AddMergeExec;
+use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule;
 use datafusion::physical_plan::csv::CsvReadOptions;
 
 pub const TPCH_TABLES: &[&str] = &[
@@ -26,7 +31,14 @@ pub const TPCH_TABLES: &[&str] = &[
 ];
 
 pub fn datafusion_test_context(path: &str) -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
+    // remove Repartition rule because that isn't supported yet
+    let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
+        Arc::new(CoalesceBatches::new()),
+        Arc::new(AddMergeExec::new()),
+    ];
+    let config = ExecutionConfig::new().with_physical_optimizer_rules(rules);
+    let mut ctx = ExecutionContext::with_config(config);
+
     for table in TPCH_TABLES {
         let schema = get_tpch_schema(table);
         let options = CsvReadOptions::new()

From 6888c41726126850b62f625bb84fcf1d7182f0da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 15 Apr 2021 20:58:20 +0200
Subject: [PATCH 070/719] ARROW-6103: [Release][Java] Remove mvn release plugin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We want to stop using the maven release plugin because it commits to the repo both in the `prepare` and `perform` tasks and adds complexity to the release process.

This PR removes the plugin and updates the release preparation script to use the maven versions plugin to update the version number in the pom files.

We will need to run `mvn deploy` to publish the artifacts and I'm not sure where that needs to happen (`mvn release:perform` was previously invoking the `deploy` phase).

Closes #9155 from andygrove/remove-mvn-release-plugin

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ...{00-prepare-test.rb => 01-prepare-test.rb} |  4 +-
 dev/release/{00-prepare.sh => 01-prepare.sh}  | 72 ++++++++++---------
 dev/release/03-binary-submit.sh               | 46 ++++++++++++
 .../{01-perform.sh => 04-binary-download.sh}  | 40 ++++-------
 .../{03-binary.sh => 05-binary-upload.sh}     | 17 +++--
 dev/release/post-11-java.sh                   | 69 ++++++++++++++++++
 java/pom.xml                                  | 12 +---
 7 files changed, 178 insertions(+), 82 deletions(-)
 rename dev/release/{00-prepare-test.rb => 01-prepare-test.rb} (99%)
 rename dev/release/{00-prepare.sh => 01-prepare.sh} (83%)
 create mode 100755 dev/release/03-binary-submit.sh
 rename dev/release/{01-perform.sh => 04-binary-download.sh} (55%)
 rename dev/release/{03-binary.sh => 05-binary-upload.sh} (91%)
 create mode 100755 dev/release/post-11-java.sh

diff --git a/dev/release/00-prepare-test.rb b/dev/release/01-prepare-test.rb
similarity index 99%
rename from dev/release/00-prepare-test.rb
rename to dev/release/01-prepare-test.rb
index 9e2a798e221..b316ad20a9d 100644
--- a/dev/release/00-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -31,7 +31,7 @@ def setup
       Dir.chdir(@test_git_repository) do
         @tag_name = "apache-arrow-#{@release_version}"
         @release_branch = "release-#{@release_version}-rc0"
-        @script = "dev/release/00-prepare.sh"
+        @script = "dev/release/01-prepare.sh"
         git("checkout", "-b", @release_branch, @current_commit)
         yield
       end
@@ -54,7 +54,7 @@ def prepare(*targets)
       env["PREPARE_#{target}"] = "1"
     end
     env = env.merge(additional_env)
-    sh(env, @script, @release_version, @next_version)
+    sh(env, @script, @release_version, @next_version, "0")
   end
 
   def parse_patch(patch)
diff --git a/dev/release/00-prepare.sh b/dev/release/01-prepare.sh
similarity index 83%
rename from dev/release/00-prepare.sh
rename to dev/release/01-prepare.sh
index 3e3ce19656a..80703c2d87f 100755
--- a/dev/release/00-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -21,8 +21,8 @@ set -ue
 
 SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <version> <next_version>"
+if [ "$#" -ne 3 ]; then
+  echo "Usage: $0 <version> <next_version> <rc-num>"
   exit 1
 fi
 
@@ -172,17 +172,47 @@ update_versions() {
 
 version=$1
 next_version=$2
-next_version_snapshot=${next_version}-SNAPSHOT
-tag=apache-arrow-${version}
+next_version_snapshot="${next_version}-SNAPSHOT"
+rc_number=$3
+
+release_tag="apache-arrow-${version}"
+release_branch="release-${version}"
+release_candidate_branch="release-${version}-rc${rc_number}"
 
 : ${PREPARE_DEFAULT:=1}
 : ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}}
 : ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}}
 : ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}}
+: ${PREPARE_BRANCH:=${PREPARE_DEFAULT}}
 : ${PREPARE_TAG:=${PREPARE_DEFAULT}}
 : ${PREPARE_VERSION_POST_TAG:=${PREPARE_DEFAULT}}
 : ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}}
 
+if [ ${PREPARE_TAG} -gt 0 ]; then
+  if [ $(git tag -l "${release_tag}") ]; then
+    echo "Delete existing git tag $release_tag"
+    git tag -d "${release_tag}"
+  fi
+fi
+
+if [ ${PREPARE_BRANCH} -gt 0 ]; then
+  if [[ $(git branch -l "${release_candidate_branch}") ]]; then
+    next_rc_number=$(($rc_number+1))
+    echo "Branch ${release_candidate_branch} already exists, so create a new release candidate:"
+    echo "1. Checkout the master branch for major releases and maint-<version> for patch releases."
+    echo "2. Execute the script again with bumped RC number."
+    echo "Commands:"
+    echo "   git checkout master"
+    echo "   dev/release/01-prepare.sh ${version} ${next_version} ${next_rc_number}"
+    exit 1
+  fi
+
+  echo "Create local branch ${release_candidate_branch} for release candidate ${rc_number}"
+  git checkout -b ${release_candidate_branch}
+fi
+
+############################## Pre-Tag Commits ##############################
+
 if [ ${PREPARE_CHANGELOG} -gt 0 ]; then
   echo "Updating changelog for $version"
   # Update changelog
@@ -204,40 +234,16 @@ if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then
 fi
 
 if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then
-  echo "prepare release ${version} on tag ${tag} then reset to version ${next_version_snapshot}"
+  echo "Prepare release ${version} on tag ${release_tag} then reset to version ${next_version_snapshot}"
 
   update_versions "${version}" "${next_version}" "release"
   git commit -m "[Release] Update versions for ${version}"
 fi
 
+############################## Tag the Release ##############################
+
 if [ ${PREPARE_TAG} -gt 0 ]; then
-  profile=arrow-jni # this includes components which depend on arrow cpp.
-  pushd "${SOURCE_DIR}/../../java"
-  git submodule update --init --recursive
-  cpp_dir="${PWD}/../cpp"
-  cpp_build_dir=$(mktemp -d -t "apache-arrow-cpp.XXXXX")
-  pushd ${cpp_build_dir}
-  cmake \
-    -DARROW_GANDIVA=ON \
-    -DARROW_GANDIVA_JAVA=ON \
-    -DARROW_JNI=ON \
-    -DARROW_ORC=ON \
-    -DCMAKE_BUILD_TYPE=release \
-    -G Ninja \
-    "${cpp_dir}"
-  ninja
-  popd
-  mvn release:clean
-  mvn \
-    release:prepare \
-    -Darguments=-Darrow.cpp.build.dir=${cpp_build_dir}/release \
-    -DautoVersionSubmodules \
-    -DdevelopmentVersion=${next_version_snapshot} \
-    -DreleaseVersion=${version} \
-    -Dtag=${tag} \
-    -P ${profile}
-  rm -rf ${cpp_build_dir}
-  popd
+  git tag -a "${release_tag}" -m "[Release] Apache Arrow Release ${version}"
 fi
 
 ############################## Post-Tag Commits #############################
@@ -283,5 +289,3 @@ if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then
     cd -
   fi
 fi
-
-echo "Finish staging binary artifacts by running: dev/release/01-perform.sh"
diff --git a/dev/release/03-binary-submit.sh b/dev/release/03-binary-submit.sh
new file mode 100755
index 00000000000..1bdbc2077be
--- /dev/null
+++ b/dev/release/03-binary-submit.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  exit
+fi
+
+version=$1
+rc_number=$2
+version_with_rc="${version}-rc${rc_number}"
+crossbow_job_prefix="release-${version_with_rc}"
+
+release_tag="apache-arrow-${version}"
+release_candidate_branch="release-${version}-rc${rc_number}"
+
+: ${GIT_REMOTE:="origin"}
+
+git checkout ${release_candidate_branch}
+git push -u ${GIT_REMOTE} ${release_candidate_branch}
+
+# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
+# are jobs submitted with the same prefix (the integer at the end is auto
+# incremented)
+archery crossbow submit \
+    --job-prefix ${crossbow_job_prefix} \
+    --arrow-version ${version_with_rc} \
+    --group packaging
diff --git a/dev/release/01-perform.sh b/dev/release/04-binary-download.sh
similarity index 55%
rename from dev/release/01-perform.sh
rename to dev/release/04-binary-download.sh
index 94ae61f4a22..d0b61b05884 100755
--- a/dev/release/01-perform.sh
+++ b/dev/release/04-binary-download.sh
@@ -17,34 +17,22 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-set -e
-
-SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-pushd "${SOURCE_DIR}/../../java"
-git submodule update --init --recursive
+set -e
 
-profile=arrow-jni # this includes components which depend on arrow cpp.
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  exit
+fi
 
-cpp_dir="${PWD}/../cpp"
-cpp_build_dir=$(mktemp -d -t "apache-arrow-cpp.XXXXX")
-pushd ${cpp_build_dir}
-cmake \
-  -DARROW_GANDIVA=ON \
-  -DARROW_GANDIVA_JAVA=ON \
-  -DARROW_JNI=ON \
-  -DARROW_ORC=ON \
-  -DCMAKE_BUILD_TYPE=release \
-  -G Ninja \
-  "${cpp_dir}"
-ninja
-popd
+version=$1
+rc_number=$2
+version_with_rc="${version}-rc${rc_number}"
+crossbow_job_prefix="release-${version_with_rc}"
 
-export ARROW_TEST_DATA=${PWD}/../testing/data
-mvn \
-  release:perform \
-  -Darguments=-Darrow.cpp.build.dir=${cpp_build_dir}/release \
-  -P ${profile}
-rm -rf ${cpp_build_dir}
+# archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
+# are jobs submitted with the same prefix (the integer at the end is auto
+# incremented)
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-0"}
 
-popd
+archery crossbow download-artifacts ${CROSSBOW_JOB_ID}
diff --git a/dev/release/03-binary.sh b/dev/release/05-binary-upload.sh
similarity index 91%
rename from dev/release/03-binary.sh
rename to dev/release/05-binary-upload.sh
index 3b845a1bdf0..4a360c28b04 100755
--- a/dev/release/03-binary.sh
+++ b/dev/release/05-binary-upload.sh
@@ -23,19 +23,20 @@ set -o pipefail
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-if [ "$#" -ne 3 ]; then
-  echo "Usage: $0 <version> <rc-num> <artifact-dir>"
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
   exit
 fi
 
 version=$1
 rc=$2
-artifact_dir=$3
 
-if [ -z "$artifact_dir" ]; then
-  echo "artifact_dir is empty"
-  exit 1
-fi
+version_with_rc="${version}-rc${rc}"
+crossbow_job_prefix="release-${version_with_rc}"
+crossbow_package_dir="${SOURCE_DIR}/../../packages"
+
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-0"}
+artifact_dir="${crossbow_package_dir}/${CROSSBOW_JOB_ID}"
 
 if [ ! -e "$artifact_dir" ]; then
   echo "$artifact_dir does not exist"
@@ -47,8 +48,6 @@ if [ ! -d "$artifact_dir" ]; then
   exit 1
 fi
 
-artifact_dir="$(pwd)/${artifact_dir}"
-
 cd "${SOURCE_DIR}"
 
 : ${BINTRAY_REPOSITORY_CUSTOM:=${BINTRAY_REPOSITORY:-}}
diff --git a/dev/release/post-11-java.sh b/dev/release/post-11-java.sh
new file mode 100755
index 00000000000..d9dc32a7f55
--- /dev/null
+++ b/dev/release/post-11-java.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -o pipefail
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 1 ]; then
+  echo "Usage: $0 <version>"
+  exit
+fi
+
+version=$1
+archive_name=apache-arrow-${version}
+tar_gz=${archive_name}.tar.gz
+
+rm -f ${tar_gz}
+curl \
+  --remote-name \
+  --fail \
+  https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
+rm -rf ${archive_name}
+tar xf ${tar_gz}
+
+# build the jni bindings similarly like the 01-perform.sh does
+mkdir -p ${archive_name}/cpp/java-build
+pushd ${archive_name}/cpp/java-build
+cmake \
+  -DARROW_GANDIVA=ON \
+  -DARROW_GANDIVA_JAVA=ON \
+  -DARROW_JNI=ON \
+  -DARROW_ORC=ON \
+  -DCMAKE_BUILD_TYPE=release \
+  -G Ninja \
+  ..
+ninja
+popd
+
+# go in the java subfolder
+pushd ${archive_name}/java
+# stage the artifacts using both the apache-release and arrow-jni profiles
+mvn -Papache-release,arrow-jni -Darrow.cpp.build.dir=$(realpath ../cpp/java-build) deploy
+popd
+
+echo "Success! The maven artifacts have been stated. Proceed with the following steps:"
+echo "1. Login to the apache repository: https://repository.apache.org/#stagingRepositories"
+echo "2. Select the arrow staging repository you just just created: orgapachearrow-100x"
+echo "3. Click the \"close\" button"
+echo "4. Once validation has passed, click the \"release\" button"
+echo ""
+echo "Note, that you must set up Maven to be able to publish to Apache's repositories."
+echo "Read more at https://www.apache.org/dev/publishing-maven-artifacts.html."
diff --git a/java/pom.xml b/java/pom.xml
index c776b833a17..35a87cb08ec 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -401,17 +401,7 @@
             <argLine>-Darrow.vector.max_allocation_bytes=1048576</argLine>
           </configuration>
         </plugin>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-release-plugin</artifactId>
-          <version>2.5.2</version>
-          <configuration>
-            <useReleaseProfile>false</useReleaseProfile>
-            <pushChanges>false</pushChanges>
-            <goals>deploy</goals>
-            <arguments>-Papache-release ${arguments}</arguments>
-          </configuration>
-        </plugin>
+
 
         <!--This plugin's configuration is used to store Eclipse m2e settings
           only. It has no influence on the Maven build itself. -->

From d7f90cab1c95744affdce57091d4d26c4afd7c9a Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Date: Thu, 15 Apr 2021 15:12:22 -0400
Subject: [PATCH 071/719] ARROW-12400: [Rust] Re-enable tests in
 arrow::array::transform

These tests were all commented out in #9329. Given the PR made changes to the commented out code, I'm inclined to think this was an accidental omission? If not, happy for this to be closed :grinning:

FYI @jorgecarleitao

Signed-off-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>

Closes #10048 from tustvold/enable-transform-tests

Authored-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/array/transform/mod.rs | 109 +++++++++++---------------
 1 file changed, 46 insertions(+), 63 deletions(-)

diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs
index 82dfc1c4707..4dc7b56d1c3 100644
--- a/rust/arrow/src/array/transform/mod.rs
+++ b/rust/arrow/src/array/transform/mod.rs
@@ -398,7 +398,6 @@ impl<'a> MutableArrayData<'a> {
     }
 }
 
-/*
 #[cfg(test)]
 mod tests {
     use std::{convert::TryFrom, sync::Arc};
@@ -424,8 +423,7 @@ mod tests {
     #[test]
     fn test_primitive() {
         let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.data();
-        let arrays = vec![b];
+        let arrays = vec![b.data()];
         let mut a = MutableArrayData::new(arrays, false, 3);
         a.extend(0, 0, 2);
         let result = a.freeze();
@@ -439,8 +437,7 @@ mod tests {
     fn test_primitive_offset() {
         let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
         let b = b.slice(1, 2);
-        let b = b.data();
-        let arrays = vec![b];
+        let arrays = vec![b.data()];
         let mut a = MutableArrayData::new(arrays, false, 2);
         a.extend(0, 0, 2);
         let result = a.freeze();
@@ -453,8 +450,8 @@ mod tests {
     #[test]
     fn test_primitive_null_offset() {
         let b = UInt8Array::from(vec![Some(1), None, Some(3)]);
-        let b = b.slice(1, 2).data();
-        let arrays = vec![b];
+        let b = b.slice(1, 2);
+        let arrays = vec![b.data()];
         let mut a = MutableArrayData::new(arrays, false, 2);
         a.extend(0, 0, 2);
         let result = a.freeze();
@@ -466,8 +463,8 @@ mod tests {
     #[test]
     fn test_primitive_null_offset_nulls() {
         let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.slice(1, 2).data();
-        let arrays = vec![b];
+        let b = b.slice(1, 2);
+        let arrays = vec![b.data()];
         let mut a = MutableArrayData::new(arrays, true, 2);
         a.extend(0, 0, 2);
         a.extend_nulls(3);
@@ -489,8 +486,8 @@ mod tests {
         builder.append(true)?;
         builder.values().append_slice(&[6, 7, 8])?;
         builder.append(true)?;
-        let array = builder.finish().data();
-        let arrays = vec![array];
+        let array = builder.finish();
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
         mutable.extend(0, 0, 1);
@@ -512,9 +509,8 @@ mod tests {
     /// tests extending from a variable-sized (strings and binary) array w/ offset with nulls
     #[test]
     fn test_variable_sized_nulls() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
-        let arrays = vec![array];
+        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -531,11 +527,10 @@ mod tests {
     /// with an offset and nulls
     #[test]
     fn test_variable_sized_offsets() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
+        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
         let array = array.slice(1, 3);
 
-        let arrays = vec![&array];
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -550,11 +545,10 @@ mod tests {
 
     #[test]
     fn test_string_offsets() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
+        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
         let array = array.slice(1, 3);
 
-        let arrays = vec![&array];
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -569,10 +563,10 @@ mod tests {
 
     #[test]
     fn test_multiple_with_nulls() {
-        let array1 = StringArray::from(vec!["hello", "world"]).data();
-        let array2 = StringArray::from(vec![Some("1"), None]).data();
+        let array1 = StringArray::from(vec!["hello", "world"]);
+        let array2 = StringArray::from(vec![Some("1"), None]);
 
-        let arrays = vec![array1, array2];
+        let arrays = vec![array1.data(), array2.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 5);
 
@@ -589,11 +583,10 @@ mod tests {
 
     #[test]
     fn test_string_null_offset_nulls() {
-        let array =
-            StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).data();
+        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
         let array = array.slice(1, 3);
 
-        let arrays = vec![&array];
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, true, 0);
 
@@ -609,9 +602,8 @@ mod tests {
 
     #[test]
     fn test_bool() {
-        let array =
-            BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]).data();
-        let arrays = vec![array];
+        let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -626,9 +618,9 @@ mod tests {
 
     #[test]
     fn test_null() {
-        let array1 = NullArray::new(10).data();
-        let array2 = NullArray::new(5).data();
-        let arrays = vec![array1, array2];
+        let array1 = NullArray::new(10);
+        let array2 = NullArray::new(5);
+        let arrays = vec![array1.data(), array2.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -698,9 +690,8 @@ mod tests {
 
         let array =
             StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .data();
-        let arrays = vec![array];
+                .unwrap();
+        let arrays = vec![array.data()];
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
         mutable.extend(0, 1, 3);
@@ -735,14 +726,13 @@ mod tests {
         let array =
             StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
                 .unwrap()
-                .slice(1, 3)
-                .data();
-        let arrays = vec![array.as_ref()];
+                .slice(1, 3);
+        let arrays = vec![array.data()];
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
         mutable.extend(0, 1, 3);
         let data = mutable.freeze();
-        let array = StructArray::from(Arc::new(data));
+        let array = StructArray::from(data);
 
         let expected_strings: ArrayRef =
             Arc::new(StringArray::from(vec![None, Some("mark")]));
@@ -774,9 +764,8 @@ mod tests {
 
         let array =
             StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .data();
-        let arrays = vec![array];
+                .unwrap();
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -812,9 +801,8 @@ mod tests {
 
         let array =
             StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .data();
-        let arrays = vec![array, array];
+                .unwrap();
+        let arrays = vec![array.data(), array.data()];
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
         mutable.extend(0, 1, 3);
@@ -838,12 +826,11 @@ mod tests {
         let array = FixedSizeBinaryArray::try_from_iter(
             vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter(),
         )
-        .expect("Failed to create FixedSizeBinaryArray from iterable")
-        .data();
+        .expect("Failed to create FixedSizeBinaryArray from iterable");
         let array = array.slice(1, 2);
         // = [[0, 1], [0, 2]] due to the offset = 1
 
-        let arrays = vec![&array];
+        let arrays = vec![array.data()];
 
         let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -869,7 +856,7 @@ mod tests {
         builder.values().append_slice(&[6, 7, 8])?;
         builder.values().append_slice(&[9, 10, 11])?;
         builder.append(true)?;
-        let a = builder.finish().data();
+        let a = builder.finish();
 
         let a_builder = Int64Builder::new(24);
         let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder);
@@ -880,10 +867,10 @@ mod tests {
         a_builder.append(true)?;
         let b = a_builder.finish();
 
-        let b = b.data();
         let c = b.slice(1, 2);
 
-        let mut mutable = MutableArrayData::new(vec![a, b, &c], false, 1);
+        let mut mutable =
+            MutableArrayData::new(vec![a.data(), b.data(), c.data()], false, 1);
         mutable.extend(0, 0, a.len());
         mutable.extend(1, 0, b.len());
         mutable.extend(2, 0, c.len());
@@ -1016,7 +1003,7 @@ mod tests {
         builder.values().append_value("Arrow")?;
         builder.values().append_null()?;
         builder.append(true)?;
-        let a = builder.finish().data();
+        let a = builder.finish();
 
         // [["alpha", "beta"], [None], ["gamma", "delta", None]]
         let mut builder = ListBuilder::new(StringBuilder::new(32));
@@ -1029,9 +1016,9 @@ mod tests {
         builder.values().append_value("delta")?;
         builder.values().append_null()?;
         builder.append(true)?;
-        let b = builder.finish().data();
+        let b = builder.finish();
 
-        let mut mutable = MutableArrayData::new(vec![a, b], false, 10);
+        let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10);
 
         mutable.extend(0, 0, a.len());
         mutable.extend(1, 0, b.len());
@@ -1082,8 +1069,7 @@ mod tests {
     fn test_fixed_size_binary_append() {
         let a = vec![Some(vec![1, 2]), Some(vec![3, 4]), Some(vec![5, 6])];
         let a = FixedSizeBinaryArray::try_from_sparse_iter(a.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .data();
+            .expect("Failed to create FixedSizeBinaryArray from iterable");
 
         let b = vec![
             None,
@@ -1094,10 +1080,9 @@ mod tests {
             None,
         ];
         let b = FixedSizeBinaryArray::try_from_sparse_iter(b.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .data();
+            .expect("Failed to create FixedSizeBinaryArray from iterable");
 
-        let mut mutable = MutableArrayData::new(vec![a, b], false, 10);
+        let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10);
 
         mutable.extend(0, 0, a.len());
         mutable.extend(1, 0, b.len());
@@ -1127,9 +1112,8 @@ mod tests {
             // b[4..4]
         ];
         let expected = FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .data();
-        assert_eq!(&result, expected.as_ref());
+            .expect("Failed to create FixedSizeBinaryArray from iterable");
+        assert_eq!(&result, expected.data());
     }
 
     /*
@@ -1220,4 +1204,3 @@ mod tests {
     }
     */
 }
- */

From 3e5895d1dbc95ece2dccfdd1f953a9057d28f0af Mon Sep 17 00:00:00 2001
From: Nathaniel Bauernfeind <nate.bauernfeind@gmail.com>
Date: Thu, 15 Apr 2021 20:19:10 -0700
Subject: [PATCH 072/719] ARROW-12111: [Java] Generate flatbuffer files using
 flatc 1.12.0

This is a resolution of ARROW-12111, which came up on the dev mailing list titled "[Java] Source control of generated flatbuffers code", the main complaint was that the plugin used to generate the flatbuffer files was unavailable for windows developers.

I, personally, would also like to see this upgraded to flatbuffers 1.12 from 1.9.

Closes #10058 from nbauernfeind/java-flatbuffer-1.12

Authored-by: Nathaniel Bauernfeind <nate.bauernfeind@gmail.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 java/README.md                                |  52 ++++--
 java/format/pom.xml                           | 120 ------------
 .../java/org/apache/arrow/flatbuf/Binary.java |  51 ++++++
 .../java/org/apache/arrow/flatbuf/Block.java  |  61 ++++++
 .../apache/arrow/flatbuf/BodyCompression.java |  72 ++++++++
 .../arrow/flatbuf/BodyCompressionMethod.java  |  43 +++++
 .../java/org/apache/arrow/flatbuf/Bool.java   |  48 +++++
 .../java/org/apache/arrow/flatbuf/Buffer.java |  63 +++++++
 .../apache/arrow/flatbuf/CompressionType.java |  30 +++
 .../java/org/apache/arrow/flatbuf/Date.java   |  65 +++++++
 .../org/apache/arrow/flatbuf/DateUnit.java    |  30 +++
 .../org/apache/arrow/flatbuf/Decimal.java     |  81 ++++++++
 .../apache/arrow/flatbuf/DictionaryBatch.java |  79 ++++++++
 .../arrow/flatbuf/DictionaryEncoding.java     |  88 +++++++++
 .../apache/arrow/flatbuf/DictionaryKind.java  |  36 ++++
 .../org/apache/arrow/flatbuf/Duration.java    |  57 ++++++
 .../org/apache/arrow/flatbuf/Endianness.java  |  34 ++++
 .../org/apache/arrow/flatbuf/Feature.java     |  62 +++++++
 .../java/org/apache/arrow/flatbuf/Field.java  | 120 ++++++++++++
 .../org/apache/arrow/flatbuf/FieldNode.java   |  68 +++++++
 .../apache/arrow/flatbuf/FixedSizeBinary.java |  60 ++++++
 .../apache/arrow/flatbuf/FixedSizeList.java   |  60 ++++++
 .../apache/arrow/flatbuf/FloatingPoint.java   |  57 ++++++
 .../java/org/apache/arrow/flatbuf/Footer.java | 100 ++++++++++
 .../java/org/apache/arrow/flatbuf/Int.java    |  61 ++++++
 .../org/apache/arrow/flatbuf/Interval.java    |  57 ++++++
 .../apache/arrow/flatbuf/IntervalUnit.java    |  30 +++
 .../org/apache/arrow/flatbuf/KeyValue.java    |  70 +++++++
 .../org/apache/arrow/flatbuf/LargeBinary.java |  52 ++++++
 .../org/apache/arrow/flatbuf/LargeList.java   |  52 ++++++
 .../org/apache/arrow/flatbuf/LargeUtf8.java   |  52 ++++++
 .../java/org/apache/arrow/flatbuf/List.java   |  48 +++++
 .../java/org/apache/arrow/flatbuf/Map.java    |  87 +++++++++
 .../org/apache/arrow/flatbuf/Message.java     |  81 ++++++++
 .../apache/arrow/flatbuf/MessageHeader.java   |  44 +++++
 .../apache/arrow/flatbuf/MetadataVersion.java |  54 ++++++
 .../java/org/apache/arrow/flatbuf/Null.java   |  51 ++++++
 .../org/apache/arrow/flatbuf/Precision.java   |  31 ++++
 .../org/apache/arrow/flatbuf/RecordBatch.java | 103 +++++++++++
 .../java/org/apache/arrow/flatbuf/Schema.java | 102 +++++++++++
 .../flatbuf/SparseMatrixCompressedAxis.java   |  30 +++
 .../arrow/flatbuf/SparseMatrixIndexCSX.java   | 114 ++++++++++++
 .../apache/arrow/flatbuf/SparseTensor.java    |  92 ++++++++++
 .../arrow/flatbuf/SparseTensorIndex.java      |  32 ++++
 .../arrow/flatbuf/SparseTensorIndexCOO.java   | 118 ++++++++++++
 .../arrow/flatbuf/SparseTensorIndexCSF.java   | 173 ++++++++++++++++++
 .../org/apache/arrow/flatbuf/Struct_.java     |  53 ++++++
 .../java/org/apache/arrow/flatbuf/Tensor.java |  91 +++++++++
 .../org/apache/arrow/flatbuf/TensorDim.java   |  74 ++++++++
 .../java/org/apache/arrow/flatbuf/Time.java   |  66 +++++++
 .../org/apache/arrow/flatbuf/TimeUnit.java    |  32 ++++
 .../org/apache/arrow/flatbuf/Timestamp.java   |  93 ++++++++++
 .../java/org/apache/arrow/flatbuf/Type.java   |  55 ++++++
 .../java/org/apache/arrow/flatbuf/Union.java  |  74 ++++++++
 .../org/apache/arrow/flatbuf/UnionMode.java   |  30 +++
 .../java/org/apache/arrow/flatbuf/Utf8.java   |  51 ++++++
 java/pom.xml                                  |   5 +-
 57 files changed, 3561 insertions(+), 134 deletions(-)
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/List.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
 create mode 100644 java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java

diff --git a/java/README.md b/java/README.md
index 9e6f657457e..29d1fcf4c44 100644
--- a/java/README.md
+++ b/java/README.md
@@ -24,7 +24,7 @@
 The following guides explain the fundamental data structures used in the Java implementation of Apache Arrow.
 
 - [ValueVector](https://arrow.apache.org/docs/java/vector.html) is an abstraction that is used to store a sequence of values having the same type in an individual column.
-- [VectorSchemaRoot](https://arrow.apache.org/docs/java/vector_schema_root.html) is a container that can hold multiple vectors based on a schema. 
+- [VectorSchemaRoot](https://arrow.apache.org/docs/java/vector_schema_root.html) is a container that can hold multiple vectors based on a schema.
 - The [Reading/Writing IPC formats](https://arrow.apache.org/docs/java/ipc.html) guide explains how to stream record batches as well as serializing record batches to files.
 
 Generated javadoc documentation is available [here](https://arrow.apache.org/docs/java/).
@@ -64,26 +64,56 @@ and arrow-format into a single JAR.  Using the classifier "shade-format-flatbuff
 pom.xml will make use of this JAR, you can then exclude/resolve the original dependency to
 a version of your choosing.
 
+### Updating the flatbuffers generated code
+
+1. Verify that your version of flatc matches the declared dependency:
+
+```bash
+$ flatc --version
+flatc version 1.12.0
+
+$ grep "dep.fbs.version" java/pom.xml
+    <dep.fbs.version>1.12.0</dep.fbs.version>
+```
+
+2. Generate the flatbuffer java files by performing the following:
+
+```bash
+cd $ARROW_HOME
+
+# remove the existing files
+rm -rf java/format/src
+
+# regenerate from the .fbs files
+flatc --java -o java/format/src/main/java format/*.fbs
+
+# prepend license header
+find java/format/src -type f | while read file; do
+  (cat header | while read line; do echo "// $line"; done; cat $file) > $file.tmp
+  mv $file.tmp $file
+done
+```
+
 ## Performance Tuning
 
 There are several system/environmental variables that users can configure.  These trade off safety (they turn off checking) for speed.  Typically they are only used in production settings after the code has been thoroughly tested without using them.
 
-* Bounds Checking for memory accesses: Bounds checking is on by default.  You can disable it by setting either the 
+* Bounds Checking for memory accesses: Bounds checking is on by default.  You can disable it by setting either the
 system property("arrow.enable_unsafe_memory_access") or the environmental variable
-("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS") to "true". When both the system property and the environmental 
+("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS") to "true". When both the system property and the environmental
 variable are set, the system property takes precedence.
 
-* null checking for gets: ValueVector get methods (not getObject) methods by default verify the slot is not null.  You can disable it by setting either the 
-system property("arrow.enable_null_check_for_get") or the environmental variable 
-("ARROW_ENABLE_NULL_CHECK_FOR_GET") to "false". When both the system property and the environmental 
-variable are set, the system property takes precedence. 
+* null checking for gets: ValueVector get methods (not getObject) methods by default verify the slot is not null.  You can disable it by setting either the
+system property("arrow.enable_null_check_for_get") or the environmental variable
+("ARROW_ENABLE_NULL_CHECK_FOR_GET") to "false". When both the system property and the environmental
+variable are set, the system property takes precedence.
 
 ## Java Properties
 
  * For java 9 or later, should set "-Dio.netty.tryReflectionSetAccessible=true".
 This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by netty.
  * To support duplicate fields in a `StructVector` enable "-Darrow.struct.conflict.policy=CONFLICT_APPEND".
-Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for 
+Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for
 conflicting or duplicate fields set this JVM flag or use the correct static constructor methods for `StructVector`s.
 
 ## Java Code Style Guide
@@ -118,10 +148,10 @@ See [Logback Configuration][1] for more details.
 
 ## Integration Tests
 
-Integration tests which require more time or more memory can be run by activating 
+Integration tests which require more time or more memory can be run by activating
 the `integration-tests` profile. This activates the [maven failsafe][4] plugin
 and any class prefixed with `IT` will be run during the testing phase. The integration
-tests currently require a larger amount of memory (>4GB) and time to complete. To activate 
+tests currently require a larger amount of memory (>4GB) and time to complete. To activate
 the profile:
 
 ```bash
@@ -131,4 +161,4 @@ mvn -Pintegration-tests <rest of mvn arguments>
 [1]: https://logback.qos.ch/manual/configuration.html
 [2]: https://github.com/apache/arrow/blob/master/cpp/README.md
 [3]: http://google.github.io/styleguide/javaguide.html
-[4]: https://maven.apache.org/surefire/maven-failsafe-plugin/
\ No newline at end of file
+[4]: https://maven.apache.org/surefire/maven-failsafe-plugin/
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 07290ac7998..4d6599e0bfa 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -23,13 +23,6 @@
 <name>Arrow Format</name>
 <description>Generated Java files from the IPC Flatbuffer definitions.</description>
 
-  <properties>
-    <flatc.download.skip>false</flatc.download.skip>
-    <flatc.executable>${project.build.directory}/flatc-${os.detected.classifier}-${dep.flatc.version}.exe</flatc.executable>
-    <flatc.generated.files>${project.build.directory}/generated-sources/flatc</flatc.generated.files>
-    <os-maven-plugin.version>1.5.0.Final</os-maven-plugin.version>
-  </properties>
-
   <dependencies>
     <dependency>
       <groupId>com.google.flatbuffers</groupId>
@@ -38,121 +31,8 @@
   </dependencies>
 
  <build>
-  <extensions>
-    <!-- provides os.detected.classifier (i.e. linux-x86_64, osx-x86_64) property -->
-    <extension>
-      <groupId>kr.motd.maven</groupId>
-      <artifactId>os-maven-plugin</artifactId>
-      <version>${os-maven-plugin.version}</version>
-    </extension>
-  </extensions>
 
   <plugins>
-    <plugin> <!-- download the flatbuffer compiler -->
-      <groupId>org.apache.maven.plugins</groupId>
-      <artifactId>maven-dependency-plugin</artifactId>
-      <executions>
-        <execution>
-          <id>copy-flatc</id>
-          <phase>initialize</phase>
-          <goals>
-            <goal>copy</goal>
-          </goals>
-          <configuration>
-            <artifactItems>
-              <artifactItem>
-                <groupId>com.github.icexelloss</groupId>
-                <artifactId>flatc-${os.detected.classifier}</artifactId>
-                <version>${dep.flatc.version}</version>
-                <type>exe</type>
-                <overWrite>true</overWrite>
-                <outputDirectory>${project.build.directory}</outputDirectory>
-              </artifactItem>
-            </artifactItems>
-            <skip>${flatc.download.skip}</skip>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
-    <plugin>
-      <groupId>org.codehaus.mojo</groupId>
-      <artifactId>exec-maven-plugin</artifactId>
-      <version>1.4.0</version>
-      <executions>
-        <execution> <!-- make the flatbuffer compiler executable -->
-          <id>script-chmod</id>
-          <goals>
-            <goal>exec</goal>
-          </goals>
-          <phase>generate-sources</phase>
-          <configuration>
-            <executable>chmod</executable>
-            <arguments>
-              <argument>+x</argument>
-              <argument>${project.build.directory}/flatc-${os.detected.classifier}-${dep.flatc.version}.exe</argument>
-            </arguments>
-            <skip>${flatc.download.skip}</skip>
-          </configuration>
-        </execution>
-        <execution> <!-- generate sources by executing the flatbuffer compiler -->
-          <goals>
-            <goal>exec</goal>
-          </goals>
-          <phase>generate-sources</phase>
-          <configuration>
-            <executable>${flatc.executable}</executable>
-            <arguments>
-              <argument>-j</argument>
-              <argument>-o</argument>
-              <argument>${flatc.generated.files}</argument>
-              <argument>../../format/Schema.fbs</argument>
-              <argument>../../format/Tensor.fbs</argument>
-              <argument>../../format/SparseTensor.fbs</argument>
-              <argument>../../format/File.fbs</argument>
-              <argument>../../format/Message.fbs</argument>
-            </arguments>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
-    <plugin> <!-- add the license header to the generated files -->
-      <groupId>com.mycila</groupId>
-      <artifactId>license-maven-plugin</artifactId>
-      <version>2.3</version>
-      <configuration>
-        <header>${basedir}/../../header</header>
-        <includes>
-          <include>**/*.java</include>
-        </includes>
-      </configuration>
-      <executions>
-        <execution>
-          <phase>process-sources</phase>
-          <goals>
-            <goal>format</goal>
-          </goals>
-        </execution>
-      </executions>
-    </plugin>
-    <plugin> <!-- add generated sources to classpath -->
-      <groupId>org.codehaus.mojo</groupId>
-      <artifactId>build-helper-maven-plugin</artifactId>
-      <version>1.9.1</version>
-      <executions>
-        <execution>
-          <id>add-generated-sources-to-classpath</id>
-          <phase>generate-sources</phase>
-          <goals>
-            <goal>add-source</goal>
-          </goals>
-          <configuration>
-            <sources>
-              <source>${flatc.generated.files}</source>
-            </sources>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
     <plugin> <!-- no checkstyle on the generated code -->
       <groupId>org.apache.maven.plugins</groupId>
       <artifactId>maven-checkstyle-plugin</artifactId>
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
new file mode 100644
index 00000000000..f2ea5250278
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Opaque binary data
+ */
+public final class Binary extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Binary getRootAsBinary(ByteBuffer _bb) { return getRootAsBinary(_bb, new Binary()); }
+  public static Binary getRootAsBinary(ByteBuffer _bb, Binary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Binary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startBinary(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endBinary(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Binary get(int j) { return get(new Binary(), j); }
+    public Binary get(Binary obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
new file mode 100644
index 00000000000..e1435f83250
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Block extends Struct {
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Block __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Index to the start of the RecordBlock (note this is past the Message header)
+   */
+  public long offset() { return bb.getLong(bb_pos + 0); }
+  /**
+   * Length of the metadata
+   */
+  public int metaDataLength() { return bb.getInt(bb_pos + 8); }
+  /**
+   * Length of the data (this is aligned so there can be a gap between this and
+   * the metadata).
+   */
+  public long bodyLength() { return bb.getLong(bb_pos + 16); }
+
+  public static int createBlock(FlatBufferBuilder builder, long offset, int metaDataLength, long bodyLength) {
+    builder.prep(8, 24);
+    builder.putLong(bodyLength);
+    builder.pad(4);
+    builder.putInt(metaDataLength);
+    builder.putLong(offset);
+    return builder.offset();
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Block get(int j) { return get(new Block(), j); }
+    public Block get(Block obj, int j) {  return obj.__assign(__element(j), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
new file mode 100644
index 00000000000..650454eb154
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Optional compression for the memory buffers constituting IPC message
+ * bodies. Intended for use with RecordBatch but could be used for other
+ * message types
+ */
+public final class BodyCompression extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb) { return getRootAsBodyCompression(_bb, new BodyCompression()); }
+  public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb, BodyCompression obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Compressor library
+   */
+  public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * Indicates the way the record batch body was compressed
+   */
+  public byte method() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; }
+
+  public static int createBodyCompression(FlatBufferBuilder builder,
+      byte codec,
+      byte method) {
+    builder.startTable(2);
+    BodyCompression.addMethod(builder, method);
+    BodyCompression.addCodec(builder, codec);
+    return BodyCompression.endBodyCompression(builder);
+  }
+
+  public static void startBodyCompression(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addCodec(FlatBufferBuilder builder, byte codec) { builder.addByte(0, codec, 0); }
+  public static void addMethod(FlatBufferBuilder builder, byte method) { builder.addByte(1, method, 0); }
+  public static int endBodyCompression(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public BodyCompression get(int j) { return get(new BodyCompression(), j); }
+    public BodyCompression get(BodyCompression obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
new file mode 100644
index 00000000000..48cff16e751
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * Provided for forward compatibility in case we need to support different
+ * strategies for compressing the IPC message body (like whole-body
+ * compression rather than buffer-level) in the future
+ */
+public final class BodyCompressionMethod {
+  private BodyCompressionMethod() { }
+  /**
+   * Each constituent buffer is first compressed with the indicated
+   * compressor, and then written with the uncompressed length in the first 8
+   * bytes as a 64-bit little-endian signed integer followed by the compressed
+   * buffer bytes (and then padding as required by the protocol). The
+   * uncompressed length may be set to -1 to indicate that the data that
+   * follows is not compressed, which can be useful for cases where
+   * compression does not yield appreciable savings.
+   */
+  public static final byte BUFFER = 0;
+
+  public static final String[] names = { "BUFFER", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
new file mode 100644
index 00000000000..e6b54e4b7bf
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Bool extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Bool getRootAsBool(ByteBuffer _bb) { return getRootAsBool(_bb, new Bool()); }
+  public static Bool getRootAsBool(ByteBuffer _bb, Bool obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Bool __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startBool(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endBool(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Bool get(int j) { return get(new Bool(), j); }
+    public Bool get(Bool obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
new file mode 100644
index 00000000000..589ed0b711e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A Buffer represents a single contiguous memory segment
+ */
+public final class Buffer extends Struct {
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Buffer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The relative offset into the shared memory page where the bytes for this
+   * buffer starts
+   */
+  public long offset() { return bb.getLong(bb_pos + 0); }
+  /**
+   * The absolute length (in bytes) of the memory buffer. The memory is found
+   * from offset (inclusive) to offset + length (non-inclusive). When building
+   * messages using the encapsulated IPC message, padding bytes may be written
+   * after a buffer, but such padding bytes do not need to be accounted for in
+   * the size here.
+   */
+  public long length() { return bb.getLong(bb_pos + 8); }
+
+  public static int createBuffer(FlatBufferBuilder builder, long offset, long length) {
+    builder.prep(8, 16);
+    builder.putLong(length);
+    builder.putLong(offset);
+    return builder.offset();
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Buffer get(int j) { return get(new Buffer(), j); }
+    public Buffer get(Buffer obj, int j) {  return obj.__assign(__element(j), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java b/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
new file mode 100644
index 00000000000..0597ffd30ab
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class CompressionType {
+  private CompressionType() { }
+  public static final byte LZ4_FRAME = 0;
+  public static final byte ZSTD = 1;
+
+  public static final String[] names = { "LZ4_FRAME", "ZSTD", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
new file mode 100644
index 00000000000..b2fcc9e39e3
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
+ * epoch (1970-01-01), stored in either of two units:
+ *
+ * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
+ *   leap seconds), where the values are evenly divisible by 86400000
+ * * Days (32 bits) since the UNIX epoch
+ */
+public final class Date extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, new Date()); }
+  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+
+  public static int createDate(FlatBufferBuilder builder,
+      short unit) {
+    builder.startTable(1);
+    Date.addUnit(builder, unit);
+    return Date.endDate(builder);
+  }
+
+  public static void startDate(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+  public static int endDate(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Date get(int j) { return get(new Date(), j); }
+    public Date get(Date obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
new file mode 100644
index 00000000000..f2c96f45b2e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class DateUnit {
+  private DateUnit() { }
+  public static final short DAY = 0;
+  public static final short MILLISECOND = 1;
+
+  public static final String[] names = { "DAY", "MILLISECOND", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
new file mode 100644
index 00000000000..8ffaa1ebb73
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Exact decimal value represented as an integer value in two's
+ * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
+ * are used. The representation uses the endianness indicated
+ * in the Schema.
+ */
+public final class Decimal extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Decimal getRootAsDecimal(ByteBuffer _bb) { return getRootAsDecimal(_bb, new Decimal()); }
+  public static Decimal getRootAsDecimal(ByteBuffer _bb, Decimal obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Decimal __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Total number of decimal digits
+   */
+  public int precision() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+  /**
+   * Number of digits after the decimal point "."
+   */
+  public int scale() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+  /**
+   * Number of bits per value. The only accepted widths are 128 and 256.
+   * We use bitWidth for consistency with Int::bitWidth.
+   */
+  public int bitWidth() { int o = __offset(8); return o != 0 ? bb.getInt(o + bb_pos) : 128; }
+
+  public static int createDecimal(FlatBufferBuilder builder,
+      int precision,
+      int scale,
+      int bitWidth) {
+    builder.startTable(3);
+    Decimal.addBitWidth(builder, bitWidth);
+    Decimal.addScale(builder, scale);
+    Decimal.addPrecision(builder, precision);
+    return Decimal.endDecimal(builder);
+  }
+
+  public static void startDecimal(FlatBufferBuilder builder) { builder.startTable(3); }
+  public static void addPrecision(FlatBufferBuilder builder, int precision) { builder.addInt(0, precision, 0); }
+  public static void addScale(FlatBufferBuilder builder, int scale) { builder.addInt(1, scale, 0); }
+  public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(2, bitWidth, 128); }
+  public static int endDecimal(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Decimal get(int j) { return get(new Decimal(), j); }
+    public Decimal get(Decimal obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
new file mode 100644
index 00000000000..fe6c59fb51e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * For sending dictionary encoding information. Any Field can be
+ * dictionary-encoded, but in this case none of its children may be
+ * dictionary-encoded.
+ * There is one vector / column per dictionary, but that vector / column
+ * may be spread across multiple dictionary batches by using the isDelta
+ * flag
+ */
+public final class DictionaryBatch extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb) { return getRootAsDictionaryBatch(_bb, new DictionaryBatch()); }
+  public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb, DictionaryBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public DictionaryBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  public org.apache.arrow.flatbuf.RecordBatch data() { return data(new org.apache.arrow.flatbuf.RecordBatch()); }
+  public org.apache.arrow.flatbuf.RecordBatch data(org.apache.arrow.flatbuf.RecordBatch obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * If isDelta is true the values in the dictionary are to be appended to a
+   * dictionary with the indicated id. If isDelta is false this dictionary
+   * should replace the existing dictionary.
+   */
+  public boolean isDelta() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static int createDictionaryBatch(FlatBufferBuilder builder,
+      long id,
+      int dataOffset,
+      boolean isDelta) {
+    builder.startTable(3);
+    DictionaryBatch.addId(builder, id);
+    DictionaryBatch.addData(builder, dataOffset);
+    DictionaryBatch.addIsDelta(builder, isDelta);
+    return DictionaryBatch.endDictionaryBatch(builder);
+  }
+
+  public static void startDictionaryBatch(FlatBufferBuilder builder) { builder.startTable(3); }
+  public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); }
+  public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addOffset(1, dataOffset, 0); }
+  public static void addIsDelta(FlatBufferBuilder builder, boolean isDelta) { builder.addBoolean(2, isDelta, false); }
+  public static int endDictionaryBatch(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public DictionaryBatch get(int j) { return get(new DictionaryBatch(), j); }
+    public DictionaryBatch get(DictionaryBatch obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
new file mode 100644
index 00000000000..8b2bb73e794
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class DictionaryEncoding extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb) { return getRootAsDictionaryEncoding(_bb, new DictionaryEncoding()); }
+  public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb, DictionaryEncoding obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public DictionaryEncoding __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The known dictionary id in the application where this data is used. In
+   * the file or streaming formats, the dictionary ids are found in the
+   * DictionaryBatch messages
+   */
+  public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  /**
+   * The dictionary indices are constrained to be non-negative integers. If
+   * this field is null, the indices must be signed int32. To maximize
+   * cross-language compatibility and performance, implementations are
+   * recommended to prefer signed integer types over unsigned integer types
+   * and to avoid uint64 indices unless they are required by an application.
+   */
+  public org.apache.arrow.flatbuf.Int indexType() { return indexType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indexType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * By default, dictionaries are not ordered, or the order does not have
+   * semantic meaning. In some statistical, applications, dictionary-encoding
+   * is used to represent ordered categorical data, and we provide a way to
+   * preserve that metadata here
+   */
+  public boolean isOrdered() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+  public short dictionaryKind() { int o = __offset(10); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+  public static int createDictionaryEncoding(FlatBufferBuilder builder,
+      long id,
+      int indexTypeOffset,
+      boolean isOrdered,
+      short dictionaryKind) {
+    builder.startTable(4);
+    DictionaryEncoding.addId(builder, id);
+    DictionaryEncoding.addIndexType(builder, indexTypeOffset);
+    DictionaryEncoding.addDictionaryKind(builder, dictionaryKind);
+    DictionaryEncoding.addIsOrdered(builder, isOrdered);
+    return DictionaryEncoding.endDictionaryEncoding(builder);
+  }
+
+  public static void startDictionaryEncoding(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); }
+  public static void addIndexType(FlatBufferBuilder builder, int indexTypeOffset) { builder.addOffset(1, indexTypeOffset, 0); }
+  public static void addIsOrdered(FlatBufferBuilder builder, boolean isOrdered) { builder.addBoolean(2, isOrdered, false); }
+  public static void addDictionaryKind(FlatBufferBuilder builder, short dictionaryKind) { builder.addShort(3, dictionaryKind, 0); }
+  public static int endDictionaryEncoding(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public DictionaryEncoding get(int j) { return get(new DictionaryEncoding(), j); }
+    public DictionaryEncoding get(DictionaryEncoding obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
new file mode 100644
index 00000000000..ecefa4b7655
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Dictionary encoding metadata
+ * Maintained for forwards compatibility, in the future
+ * Dictionaries might be explicit maps between integers and values
+ * allowing for non-contiguous index values
+ */
+public final class DictionaryKind {
+  private DictionaryKind() { }
+  public static final short DenseArray = 0;
+
+  public static final String[] names = { "DenseArray", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
new file mode 100644
index 00000000000..e1495f3002d
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Duration extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Duration getRootAsDuration(ByteBuffer _bb) { return getRootAsDuration(_bb, new Duration()); }
+  public static Duration getRootAsDuration(ByteBuffer _bb, Duration obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Duration __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+
+  public static int createDuration(FlatBufferBuilder builder,
+      short unit) {
+    builder.startTable(1);
+    Duration.addUnit(builder, unit);
+    return Duration.endDuration(builder);
+  }
+
+  public static void startDuration(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+  public static int endDuration(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Duration get(int j) { return get(new Duration(), j); }
+    public Duration get(Duration obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
new file mode 100644
index 00000000000..494a3dcf57f
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Endianness of the platform producing the data
+ */
+public final class Endianness {
+  private Endianness() { }
+  public static final short Little = 0;
+  public static final short Big = 1;
+
+  public static final String[] names = { "Little", "Big", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
new file mode 100644
index 00000000000..a4fa84c3728
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * Represents Arrow Features that might not have full support
+ * within implementations. This is intended to be used in
+ * two scenarios:
+ *  1.  A mechanism for readers of Arrow Streams
+ *      and files to understand that the stream or file makes
+ *      use of a feature that isn't supported or unknown to
+ *      the implementation (and therefore can meet the Arrow
+ *      forward compatibility guarantees).
+ *  2.  A means of negotiating between a client and server
+ *      what features a stream is allowed to use. The enums
+ *      values here are intented to represent higher level
+ *      features, additional details maybe negotiated
+ *      with key-value pairs specific to the protocol.
+ *
+ * Enums added to this list should be assigned power-of-two values
+ * to facilitate exchanging and comparing bitmaps for supported
+ * features.
+ */
+public final class Feature {
+  private Feature() { }
+  /**
+   * Needed to make flatbuffers happy.
+   */
+  public static final long UNUSED = 0;
+  /**
+   * The stream makes use of multiple full dictionaries with the
+   * same ID and assumes clients implement dictionary replacement
+   * correctly.
+   */
+  public static final long DICTIONARY_REPLACEMENT = 1;
+  /**
+   * The stream makes use of compressed bodies as described
+   * in Message.fbs.
+   */
+  public static final long COMPRESSED_BODY = 2;
+
+  public static final String[] names = { "UNUSED", "DICTIONARY_REPLACEMENT", "COMPRESSED_BODY", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
new file mode 100644
index 00000000000..d34501e0ac2
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A field represents a named column in a record / row batch or child of a
+ * nested type.
+ */
+public final class Field extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Field getRootAsField(ByteBuffer _bb) { return getRootAsField(_bb, new Field()); }
+  public static Field getRootAsField(ByteBuffer _bb, Field obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Field __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Name is not required, in i.e. a List
+   */
+  public String name() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
+  public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
+  /**
+   * Whether or not this field can contain nulls. Should be true in general.
+   */
+  public boolean nullable() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+  public byte typeType() { int o = __offset(8); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * This is the type of the decoded value if the field is dictionary encoded.
+   */
+  public Table type(Table obj) { int o = __offset(10); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * Present only if the field is dictionary encoded.
+   */
+  public org.apache.arrow.flatbuf.DictionaryEncoding dictionary() { return dictionary(new org.apache.arrow.flatbuf.DictionaryEncoding()); }
+  public org.apache.arrow.flatbuf.DictionaryEncoding dictionary(org.apache.arrow.flatbuf.DictionaryEncoding obj) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * children apply only to nested data types like Struct, List and Union. For
+   * primitive types children will have length 0.
+   */
+  public org.apache.arrow.flatbuf.Field children(int j) { return children(new org.apache.arrow.flatbuf.Field(), j); }
+  public org.apache.arrow.flatbuf.Field children(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(14); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int childrenLength() { int o = __offset(14); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Field.Vector childrenVector() { return childrenVector(new org.apache.arrow.flatbuf.Field.Vector()); }
+  public org.apache.arrow.flatbuf.Field.Vector childrenVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(14); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * User-defined metadata
+   */
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(16); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(16); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(16); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+  public static int createField(FlatBufferBuilder builder,
+      int nameOffset,
+      boolean nullable,
+      byte type_type,
+      int typeOffset,
+      int dictionaryOffset,
+      int childrenOffset,
+      int custom_metadataOffset) {
+    builder.startTable(7);
+    Field.addCustomMetadata(builder, custom_metadataOffset);
+    Field.addChildren(builder, childrenOffset);
+    Field.addDictionary(builder, dictionaryOffset);
+    Field.addType(builder, typeOffset);
+    Field.addName(builder, nameOffset);
+    Field.addTypeType(builder, type_type);
+    Field.addNullable(builder, nullable);
+    return Field.endField(builder);
+  }
+
+  public static void startField(FlatBufferBuilder builder) { builder.startTable(7); }
+  public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(0, nameOffset, 0); }
+  public static void addNullable(FlatBufferBuilder builder, boolean nullable) { builder.addBoolean(1, nullable, false); }
+  public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(2, typeType, 0); }
+  public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(3, typeOffset, 0); }
+  public static void addDictionary(FlatBufferBuilder builder, int dictionaryOffset) { builder.addOffset(4, dictionaryOffset, 0); }
+  public static void addChildren(FlatBufferBuilder builder, int childrenOffset) { builder.addOffset(5, childrenOffset, 0); }
+  public static int createChildrenVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startChildrenVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(6, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endField(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Field get(int j) { return get(new Field(), j); }
+    public Field get(Field obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
new file mode 100644
index 00000000000..3ea9805f6bc
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Data structures for describing a table row batch (a collection of
+ * equal-length Arrow arrays)
+ * Metadata about a field at some level of a nested type tree (but not
+ * its children).
+ *
+ * For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+ * would have {length: 5, null_count: 2} for its List node, and {length: 6,
+ * null_count: 0} for its Int16 node, as separate FieldNode structs
+ */
+public final class FieldNode extends Struct {
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FieldNode __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The number of value slots in the Arrow array at this level of a nested
+   * tree
+   */
+  public long length() { return bb.getLong(bb_pos + 0); }
+  /**
+   * The number of observed nulls. Fields with null_count == 0 may choose not
+   * to write their physical validity bitmap out as a materialized buffer,
+   * instead setting the length of the bitmap buffer to 0.
+   */
+  public long nullCount() { return bb.getLong(bb_pos + 8); }
+
+  public static int createFieldNode(FlatBufferBuilder builder, long length, long nullCount) {
+    builder.prep(8, 16);
+    builder.putLong(nullCount);
+    builder.putLong(length);
+    return builder.offset();
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FieldNode get(int j) { return get(new FieldNode(), j); }
+    public FieldNode get(FieldNode obj, int j) {  return obj.__assign(__element(j), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
new file mode 100644
index 00000000000..287b34e2258
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FixedSizeBinary extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb) { return getRootAsFixedSizeBinary(_bb, new FixedSizeBinary()); }
+  public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb, FixedSizeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FixedSizeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Number of bytes per value
+   */
+  public int byteWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+
+  public static int createFixedSizeBinary(FlatBufferBuilder builder,
+      int byteWidth) {
+    builder.startTable(1);
+    FixedSizeBinary.addByteWidth(builder, byteWidth);
+    return FixedSizeBinary.endFixedSizeBinary(builder);
+  }
+
+  public static void startFixedSizeBinary(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addByteWidth(FlatBufferBuilder builder, int byteWidth) { builder.addInt(0, byteWidth, 0); }
+  public static int endFixedSizeBinary(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FixedSizeBinary get(int j) { return get(new FixedSizeBinary(), j); }
+    public FixedSizeBinary get(FixedSizeBinary obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
new file mode 100644
index 00000000000..d0d88923871
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FixedSizeList extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb) { return getRootAsFixedSizeList(_bb, new FixedSizeList()); }
+  public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb, FixedSizeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FixedSizeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Number of list items per value
+   */
+  public int listSize() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+
+  public static int createFixedSizeList(FlatBufferBuilder builder,
+      int listSize) {
+    builder.startTable(1);
+    FixedSizeList.addListSize(builder, listSize);
+    return FixedSizeList.endFixedSizeList(builder);
+  }
+
+  public static void startFixedSizeList(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addListSize(FlatBufferBuilder builder, int listSize) { builder.addInt(0, listSize, 0); }
+  public static int endFixedSizeList(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FixedSizeList get(int j) { return get(new FixedSizeList(), j); }
+    public FixedSizeList get(FixedSizeList obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
new file mode 100644
index 00000000000..945fa627d4d
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FloatingPoint extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb) { return getRootAsFloatingPoint(_bb, new FloatingPoint()); }
+  public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb, FloatingPoint obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public FloatingPoint __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short precision() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+  public static int createFloatingPoint(FlatBufferBuilder builder,
+      short precision) {
+    builder.startTable(1);
+    FloatingPoint.addPrecision(builder, precision);
+    return FloatingPoint.endFloatingPoint(builder);
+  }
+
+  public static void startFloatingPoint(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addPrecision(FlatBufferBuilder builder, short precision) { builder.addShort(0, precision, 0); }
+  public static int endFloatingPoint(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public FloatingPoint get(int j) { return get(new FloatingPoint(), j); }
+    public FloatingPoint get(FloatingPoint obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
new file mode 100644
index 00000000000..86fd75e03bd
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Arrow File metadata
+ *
+ */
+public final class Footer extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Footer getRootAsFooter(ByteBuffer _bb) { return getRootAsFooter(_bb, new Footer()); }
+  public static Footer getRootAsFooter(ByteBuffer _bb, Footer obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Footer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public org.apache.arrow.flatbuf.Schema schema() { return schema(new org.apache.arrow.flatbuf.Schema()); }
+  public org.apache.arrow.flatbuf.Schema schema(org.apache.arrow.flatbuf.Schema obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  public org.apache.arrow.flatbuf.Block dictionaries(int j) { return dictionaries(new org.apache.arrow.flatbuf.Block(), j); }
+  public org.apache.arrow.flatbuf.Block dictionaries(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; }
+  public int dictionariesLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Block.Vector dictionariesVector() { return dictionariesVector(new org.apache.arrow.flatbuf.Block.Vector()); }
+  public org.apache.arrow.flatbuf.Block.Vector dictionariesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; }
+  public org.apache.arrow.flatbuf.Block recordBatches(int j) { return recordBatches(new org.apache.arrow.flatbuf.Block(), j); }
+  public org.apache.arrow.flatbuf.Block recordBatches(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; }
+  public int recordBatchesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector() { return recordBatchesVector(new org.apache.arrow.flatbuf.Block.Vector()); }
+  public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; }
+  /**
+   * User-defined metadata
+   */
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+  public static int createFooter(FlatBufferBuilder builder,
+      short version,
+      int schemaOffset,
+      int dictionariesOffset,
+      int recordBatchesOffset,
+      int custom_metadataOffset) {
+    builder.startTable(5);
+    Footer.addCustomMetadata(builder, custom_metadataOffset);
+    Footer.addRecordBatches(builder, recordBatchesOffset);
+    Footer.addDictionaries(builder, dictionariesOffset);
+    Footer.addSchema(builder, schemaOffset);
+    Footer.addVersion(builder, version);
+    return Footer.endFooter(builder);
+  }
+
+  public static void startFooter(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); }
+  public static void addSchema(FlatBufferBuilder builder, int schemaOffset) { builder.addOffset(1, schemaOffset, 0); }
+  public static void addDictionaries(FlatBufferBuilder builder, int dictionariesOffset) { builder.addOffset(2, dictionariesOffset, 0); }
+  public static void startDictionariesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); }
+  public static void addRecordBatches(FlatBufferBuilder builder, int recordBatchesOffset) { builder.addOffset(3, recordBatchesOffset, 0); }
+  public static void startRecordBatchesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endFooter(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+  public static void finishFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Footer get(int j) { return get(new Footer(), j); }
+    public Footer get(Footer obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
new file mode 100644
index 00000000000..94cb96a05f3
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Int extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Int getRootAsInt(ByteBuffer _bb) { return getRootAsInt(_bb, new Int()); }
+  public static Int getRootAsInt(ByteBuffer _bb, Int obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Int __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public int bitWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+  public boolean isSigned() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static int createInt(FlatBufferBuilder builder,
+      int bitWidth,
+      boolean is_signed) {
+    builder.startTable(2);
+    Int.addBitWidth(builder, bitWidth);
+    Int.addIsSigned(builder, is_signed);
+    return Int.endInt(builder);
+  }
+
+  public static void startInt(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(0, bitWidth, 0); }
+  public static void addIsSigned(FlatBufferBuilder builder, boolean isSigned) { builder.addBoolean(1, isSigned, false); }
+  public static int endInt(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Int get(int j) { return get(new Int(), j); }
+    public Int get(Int obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
new file mode 100644
index 00000000000..e690b0badde
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Interval extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Interval getRootAsInterval(ByteBuffer _bb) { return getRootAsInterval(_bb, new Interval()); }
+  public static Interval getRootAsInterval(ByteBuffer _bb, Interval obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Interval __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+  public static int createInterval(FlatBufferBuilder builder,
+      short unit) {
+    builder.startTable(1);
+    Interval.addUnit(builder, unit);
+    return Interval.endInterval(builder);
+  }
+
+  public static void startInterval(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); }
+  public static int endInterval(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Interval get(int j) { return get(new Interval(), j); }
+    public Interval get(Interval obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
new file mode 100644
index 00000000000..76b689bccd1
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class IntervalUnit {
+  private IntervalUnit() { }
+  public static final short YEAR_MONTH = 0;
+  public static final short DAY_TIME = 1;
+
+  public static final String[] names = { "YEAR_MONTH", "DAY_TIME", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java b/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
new file mode 100644
index 00000000000..0c6e9f66ea8
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * user defined key value pairs to add custom metadata to arrow
+ * key namespacing is the responsibility of the user
+ */
+public final class KeyValue extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static KeyValue getRootAsKeyValue(ByteBuffer _bb) { return getRootAsKeyValue(_bb, new KeyValue()); }
+  public static KeyValue getRootAsKeyValue(ByteBuffer _bb, KeyValue obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public KeyValue __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public String key() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer keyAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
+  public ByteBuffer keyInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
+  public String value() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer valueAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+  public ByteBuffer valueInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+  public static int createKeyValue(FlatBufferBuilder builder,
+      int keyOffset,
+      int valueOffset) {
+    builder.startTable(2);
+    KeyValue.addValue(builder, valueOffset);
+    KeyValue.addKey(builder, keyOffset);
+    return KeyValue.endKeyValue(builder);
+  }
+
+  public static void startKeyValue(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addKey(FlatBufferBuilder builder, int keyOffset) { builder.addOffset(0, keyOffset, 0); }
+  public static void addValue(FlatBufferBuilder builder, int valueOffset) { builder.addOffset(1, valueOffset, 0); }
+  public static int endKeyValue(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public KeyValue get(int j) { return get(new KeyValue(), j); }
+    public KeyValue get(KeyValue obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
new file mode 100644
index 00000000000..b7377bbe947
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as Binary, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeBinary extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb) { return getRootAsLargeBinary(_bb, new LargeBinary()); }
+  public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb, LargeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public LargeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startLargeBinary(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endLargeBinary(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public LargeBinary get(int j) { return get(new LargeBinary(), j); }
+    public LargeBinary get(LargeBinary obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
new file mode 100644
index 00000000000..32cc0034c46
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as List, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeList extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static LargeList getRootAsLargeList(ByteBuffer _bb) { return getRootAsLargeList(_bb, new LargeList()); }
+  public static LargeList getRootAsLargeList(ByteBuffer _bb, LargeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public LargeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startLargeList(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endLargeList(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public LargeList get(int j) { return get(new LargeList(), j); }
+    public LargeList get(LargeList obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
new file mode 100644
index 00000000000..7e7a20117de
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as Utf8, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeUtf8 extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb) { return getRootAsLargeUtf8(_bb, new LargeUtf8()); }
+  public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb, LargeUtf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public LargeUtf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startLargeUtf8(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endLargeUtf8(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public LargeUtf8 get(int j) { return get(new LargeUtf8(), j); }
+    public LargeUtf8 get(LargeUtf8 obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/List.java b/java/format/src/main/java/org/apache/arrow/flatbuf/List.java
new file mode 100644
index 00000000000..4493f9c5b3e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/List.java
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class List extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static List getRootAsList(ByteBuffer _bb) { return getRootAsList(_bb, new List()); }
+  public static List getRootAsList(ByteBuffer _bb, List obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public List __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startList(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endList(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public List get(int j) { return get(new List(), j); }
+    public List get(List obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
new file mode 100644
index 00000000000..704426e92d4
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A Map is a logical nested type that is represented as
+ *
+ * List<entries: Struct<key: K, value: V>>
+ *
+ * In this layout, the keys and values are each respectively contiguous. We do
+ * not constrain the key and value types, so the application is responsible
+ * for ensuring that the keys are hashable and unique. Whether the keys are sorted
+ * may be set in the metadata for this field.
+ *
+ * In a field with Map type, the field has a child Struct field, which then
+ * has two children: key type and the second the value type. The names of the
+ * child fields may be respectively "entries", "key", and "value", but this is
+ * not enforced.
+ *
+ * Map
+ * ```text
+ *   - child[0] entries: Struct
+ *     - child[0] key: K
+ *     - child[1] value: V
+ * ```
+ * Neither the "entries" field nor the "key" field may be nullable.
+ *
+ * The metadata is structured so that Arrow systems without special handling
+ * for Map can make Map an alias for List. The "layout" attribute for the Map
+ * field must have the same contents as a List.
+ */
+public final class Map extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Map getRootAsMap(ByteBuffer _bb) { return getRootAsMap(_bb, new Map()); }
+  public static Map getRootAsMap(ByteBuffer _bb, Map obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Map __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Set to true if the keys within each value are sorted
+   */
+  public boolean keysSorted() { int o = __offset(4); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static int createMap(FlatBufferBuilder builder,
+      boolean keysSorted) {
+    builder.startTable(1);
+    Map.addKeysSorted(builder, keysSorted);
+    return Map.endMap(builder);
+  }
+
+  public static void startMap(FlatBufferBuilder builder) { builder.startTable(1); }
+  public static void addKeysSorted(FlatBufferBuilder builder, boolean keysSorted) { builder.addBoolean(0, keysSorted, false); }
+  public static int endMap(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Map get(int j) { return get(new Map(), j); }
+    public Map get(Map obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
new file mode 100644
index 00000000000..c7738ad95a2
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Message extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Message getRootAsMessage(ByteBuffer _bb) { return getRootAsMessage(_bb, new Message()); }
+  public static Message getRootAsMessage(ByteBuffer _bb, Message obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Message __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public byte headerType() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  public Table header(Table obj) { int o = __offset(8); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  public long bodyLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+  public static int createMessage(FlatBufferBuilder builder,
+      short version,
+      byte header_type,
+      int headerOffset,
+      long bodyLength,
+      int custom_metadataOffset) {
+    builder.startTable(5);
+    Message.addBodyLength(builder, bodyLength);
+    Message.addCustomMetadata(builder, custom_metadataOffset);
+    Message.addHeader(builder, headerOffset);
+    Message.addVersion(builder, version);
+    Message.addHeaderType(builder, header_type);
+    return Message.endMessage(builder);
+  }
+
+  public static void startMessage(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); }
+  public static void addHeaderType(FlatBufferBuilder builder, byte headerType) { builder.addByte(1, headerType, 0); }
+  public static void addHeader(FlatBufferBuilder builder, int headerOffset) { builder.addOffset(2, headerOffset, 0); }
+  public static void addBodyLength(FlatBufferBuilder builder, long bodyLength) { builder.addLong(3, bodyLength, 0L); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endMessage(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+  public static void finishMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Message get(int j) { return get(new Message(), j); }
+    public Message get(Message obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java b/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
new file mode 100644
index 00000000000..179b6ba0f54
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * The root Message type
+ * This union enables us to easily send different message types without
+ * redundant storage, and in the future we can easily add new message types.
+ *
+ * Arrow implementations do not need to implement all of the message types,
+ * which may include experimental metadata types. For maximum compatibility,
+ * it is best to send data using RecordBatch
+ */
+public final class MessageHeader {
+  private MessageHeader() { }
+  public static final byte NONE = 0;
+  public static final byte Schema = 1;
+  public static final byte DictionaryBatch = 2;
+  public static final byte RecordBatch = 3;
+  public static final byte Tensor = 4;
+  public static final byte SparseTensor = 5;
+
+  public static final String[] names = { "NONE", "Schema", "DictionaryBatch", "RecordBatch", "Tensor", "SparseTensor", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java b/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
new file mode 100644
index 00000000000..8ce9d84fc2b
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class MetadataVersion {
+  private MetadataVersion() { }
+  /**
+   * 0.1.0 (October 2016).
+   */
+  public static final short V1 = 0;
+  /**
+   * 0.2.0 (February 2017). Non-backwards compatible with V1.
+   */
+  public static final short V2 = 1;
+  /**
+   * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
+   */
+  public static final short V3 = 2;
+  /**
+   * >= 0.8.0 (December 2017). Non-backwards compatible with V3.
+   */
+  public static final short V4 = 3;
+  /**
+   * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+   * metadata and IPC messages). Implementations are recommended to provide a
+   * V4 compatibility mode with V5 format changes disabled.
+   *
+   * Incompatible changes between V4 and V5:
+   * - Union buffer layout has changed. In V5, Unions don't have a validity
+   *   bitmap buffer.
+   */
+  public static final short V5 = 4;
+
+  public static final String[] names = { "V1", "V2", "V3", "V4", "V5", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
new file mode 100644
index 00000000000..b7a30f2e822
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * These are stored in the flatbuffer in the Type union below
+ */
+public final class Null extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Null getRootAsNull(ByteBuffer _bb) { return getRootAsNull(_bb, new Null()); }
+  public static Null getRootAsNull(ByteBuffer _bb, Null obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Null __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startNull(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endNull(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Null get(int j) { return get(new Null(), j); }
+    public Null get(Null obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
new file mode 100644
index 00000000000..e2c42237a67
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class Precision {
+  private Precision() { }
+  public static final short HALF = 0;
+  public static final short SINGLE = 1;
+  public static final short DOUBLE = 2;
+
+  public static final String[] names = { "HALF", "SINGLE", "DOUBLE", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
new file mode 100644
index 00000000000..eb814e07dcc
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A data header describing the shared memory layout of a "record" or "row"
+ * batch. Some systems call this a "row batch" internally and others a "record
+ * batch".
+ */
+public final class RecordBatch extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb) { return getRootAsRecordBatch(_bb, new RecordBatch()); }
+  public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb, RecordBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public RecordBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * number of records / rows. The arrays in the batch should all have this
+   * length
+   */
+  public long length() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  /**
+   * Nodes correspond to the pre-ordered flattened logical schema
+   */
+  public org.apache.arrow.flatbuf.FieldNode nodes(int j) { return nodes(new org.apache.arrow.flatbuf.FieldNode(), j); }
+  public org.apache.arrow.flatbuf.FieldNode nodes(org.apache.arrow.flatbuf.FieldNode obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int nodesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector() { return nodesVector(new org.apache.arrow.flatbuf.FieldNode.Vector()); }
+  public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector(org.apache.arrow.flatbuf.FieldNode.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * Buffers correspond to the pre-ordered flattened buffer tree
+   *
+   * The number of buffers appended to this list depends on the schema. For
+   * example, most primitive arrays will have 2 buffers, 1 for the validity
+   * bitmap and 1 for the values. For struct arrays, there will only be a
+   * single buffer for the validity (nulls) bitmap
+   */
+  public org.apache.arrow.flatbuf.Buffer buffers(int j) { return buffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+  public org.apache.arrow.flatbuf.Buffer buffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int buffersLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Buffer.Vector buffersVector() { return buffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+  public org.apache.arrow.flatbuf.Buffer.Vector buffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * Optional compression of the message body
+   */
+  public org.apache.arrow.flatbuf.BodyCompression compression() { return compression(new org.apache.arrow.flatbuf.BodyCompression()); }
+  public org.apache.arrow.flatbuf.BodyCompression compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+
+  public static int createRecordBatch(FlatBufferBuilder builder,
+      long length,
+      int nodesOffset,
+      int buffersOffset,
+      int compressionOffset) {
+    builder.startTable(4);
+    RecordBatch.addLength(builder, length);
+    RecordBatch.addCompression(builder, compressionOffset);
+    RecordBatch.addBuffers(builder, buffersOffset);
+    RecordBatch.addNodes(builder, nodesOffset);
+    return RecordBatch.endRecordBatch(builder);
+  }
+
+  public static void startRecordBatch(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addLength(FlatBufferBuilder builder, long length) { builder.addLong(0, length, 0L); }
+  public static void addNodes(FlatBufferBuilder builder, int nodesOffset) { builder.addOffset(1, nodesOffset, 0); }
+  public static void startNodesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addBuffers(FlatBufferBuilder builder, int buffersOffset) { builder.addOffset(2, buffersOffset, 0); }
+  public static void startBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addCompression(FlatBufferBuilder builder, int compressionOffset) { builder.addOffset(3, compressionOffset, 0); }
+  public static int endRecordBatch(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public RecordBatch get(int j) { return get(new RecordBatch(), j); }
+    public RecordBatch get(RecordBatch obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
new file mode 100644
index 00000000000..69c025254b2
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A Schema describes the columns in a row batch
+ */
+public final class Schema extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Schema getRootAsSchema(ByteBuffer _bb) { return getRootAsSchema(_bb, new Schema()); }
+  public static Schema getRootAsSchema(ByteBuffer _bb, Schema obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Schema __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * endianness of the buffer
+   * it is Little Endian by default
+   * if endianness doesn't match the underlying system then the vectors need to be converted
+   */
+  public short endianness() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public org.apache.arrow.flatbuf.Field fields(int j) { return fields(new org.apache.arrow.flatbuf.Field(), j); }
+  public org.apache.arrow.flatbuf.Field fields(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int fieldsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Field.Vector fieldsVector() { return fieldsVector(new org.apache.arrow.flatbuf.Field.Vector()); }
+  public org.apache.arrow.flatbuf.Field.Vector fieldsVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+  public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int customMetadataLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+  public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * Features used in the stream/file.
+   */
+  public long features(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int featuresLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public LongVector featuresVector() { return featuresVector(new LongVector()); }
+  public LongVector featuresVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer featuresAsByteBuffer() { return __vector_as_bytebuffer(10, 8); }
+  public ByteBuffer featuresInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); }
+
+  public static int createSchema(FlatBufferBuilder builder,
+      short endianness,
+      int fieldsOffset,
+      int custom_metadataOffset,
+      int featuresOffset) {
+    builder.startTable(4);
+    Schema.addFeatures(builder, featuresOffset);
+    Schema.addCustomMetadata(builder, custom_metadataOffset);
+    Schema.addFields(builder, fieldsOffset);
+    Schema.addEndianness(builder, endianness);
+    return Schema.endSchema(builder);
+  }
+
+  public static void startSchema(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addEndianness(FlatBufferBuilder builder, short endianness) { builder.addShort(0, endianness, 0); }
+  public static void addFields(FlatBufferBuilder builder, int fieldsOffset) { builder.addOffset(1, fieldsOffset, 0); }
+  public static int createFieldsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startFieldsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(2, customMetadataOffset, 0); }
+  public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addFeatures(FlatBufferBuilder builder, int featuresOffset) { builder.addOffset(3, featuresOffset, 0); }
+  public static int createFeaturesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+  public static void startFeaturesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+  public static int endSchema(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+  public static void finishSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Schema get(int j) { return get(new Schema(), j); }
+    public Schema get(Schema obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
new file mode 100644
index 00000000000..2ad314f2e85
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class SparseMatrixCompressedAxis {
+  private SparseMatrixCompressedAxis() { }
+  public static final short Row = 0;
+  public static final short Column = 1;
+
+  public static final String[] names = { "Row", "Column", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
new file mode 100644
index 00000000000..9516a6ec146
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Compressed Sparse format, that is matrix-specific.
+ */
+public final class SparseMatrixIndexCSX extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb) { return getRootAsSparseMatrixIndexCSX(_bb, new SparseMatrixIndexCSX()); }
+  public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb, SparseMatrixIndexCSX obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseMatrixIndexCSX __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Which axis, row or column, is compressed
+   */
+  public short compressedAxis() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  /**
+   * The type of values in indptrBuffer
+   */
+  public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indptrBuffer stores the location and size of indptr array that
+   * represents the range of the rows.
+   * The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+   * The length of this array is 1 + (the number of rows), and the type
+   * of index value is long.
+   *
+   * For example, let X be the following 6x4 matrix:
+   * ```text
+   *   X := [[0, 1, 2, 0],
+   *         [0, 0, 3, 0],
+   *         [0, 4, 0, 5],
+   *         [0, 0, 0, 0],
+   *         [6, 0, 7, 8],
+   *         [0, 9, 0, 0]].
+   * ```
+   * The array of non-zero values in X is:
+   * ```text
+   *   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+   * ```
+   * And the indptr of X is:
+   * ```text
+   *   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+   * ```
+   */
+  public org.apache.arrow.flatbuf.Buffer indptrBuffer() { return indptrBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer indptrBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+  /**
+   * The type of values in indicesBuffer
+   */
+  public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indicesBuffer stores the location and size of the array that
+   * contains the column indices of the corresponding non-zero values.
+   * The type of index value is long.
+   *
+   * For example, the indices of the above X is:
+   * ```text
+   *   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+   * ```
+   * Note that the indices are sorted in lexicographical order for each row.
+   */
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+  public static void startSparseMatrixIndexCSX(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addCompressedAxis(FlatBufferBuilder builder, short compressedAxis) { builder.addShort(0, compressedAxis, 0); }
+  public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(1, indptrTypeOffset, 0); }
+  public static void addIndptrBuffer(FlatBufferBuilder builder, int indptrBufferOffset) { builder.addStruct(2, indptrBufferOffset, 0); }
+  public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(3, indicesTypeOffset, 0); }
+  public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(4, indicesBufferOffset, 0); }
+  public static int endSparseMatrixIndexCSX(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 6);  // indptrType
+    builder.required(o, 8);  // indptrBuffer
+    builder.required(o, 10);  // indicesType
+    builder.required(o, 12);  // indicesBuffer
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseMatrixIndexCSX get(int j) { return get(new SparseMatrixIndexCSX(), j); }
+    public SparseMatrixIndexCSX get(SparseMatrixIndexCSX obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
new file mode 100644
index 00000000000..9b4cdf6e891
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class SparseTensor extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb) { return getRootAsSparseTensor(_bb, new SparseTensor()); }
+  public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb, SparseTensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseTensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * The type of data contained in a value cell.
+   * Currently only fixed-width value types are supported,
+   * no strings or nested types.
+   */
+  public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * The dimensions of the tensor, optionally named.
+   */
+  public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); }
+  public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * The number of non-zero values in a sparse tensor.
+   */
+  public long nonZeroLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  public byte sparseIndexType() { int o = __offset(12); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * Sparse tensor index
+   */
+  public Table sparseIndex(Table obj) { int o = __offset(14); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * The location and size of the tensor's data
+   */
+  public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(16); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+  public static void startSparseTensor(FlatBufferBuilder builder) { builder.startTable(7); }
+  public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); }
+  public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); }
+  public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); }
+  public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addNonZeroLength(FlatBufferBuilder builder, long nonZeroLength) { builder.addLong(3, nonZeroLength, 0L); }
+  public static void addSparseIndexType(FlatBufferBuilder builder, byte sparseIndexType) { builder.addByte(4, sparseIndexType, 0); }
+  public static void addSparseIndex(FlatBufferBuilder builder, int sparseIndexOffset) { builder.addOffset(5, sparseIndexOffset, 0); }
+  public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(6, dataOffset, 0); }
+  public static int endSparseTensor(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 6);  // type
+    builder.required(o, 8);  // shape
+    builder.required(o, 14);  // sparseIndex
+    builder.required(o, 16);  // data
+    return o;
+  }
+  public static void finishSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseTensor get(int j) { return get(new SparseTensor(), j); }
+    public SparseTensor get(SparseTensor obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
new file mode 100644
index 00000000000..5b9444abcf0
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class SparseTensorIndex {
+  private SparseTensorIndex() { }
+  public static final byte NONE = 0;
+  public static final byte SparseTensorIndexCOO = 1;
+  public static final byte SparseMatrixIndexCSX = 2;
+  public static final byte SparseTensorIndexCSF = 3;
+
+  public static final String[] names = { "NONE", "SparseTensorIndexCOO", "SparseMatrixIndexCSX", "SparseTensorIndexCSF", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
new file mode 100644
index 00000000000..a84238d662d
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * EXPERIMENTAL: Data structures for sparse tensors
+ * Coordinate (COO) format of sparse tensor index.
+ *
+ * COO's index list are represented as a NxM matrix,
+ * where N is the number of non-zero values,
+ * and M is the number of dimensions of a sparse tensor.
+ *
+ * indicesBuffer stores the location and size of the data of this indices
+ * matrix.  The value type and the stride of the indices matrix is
+ * specified in indicesType and indicesStrides fields.
+ *
+ * For example, let X be a 2x3x4x5 tensor, and it has the following
+ * 6 non-zero values:
+ * ```text
+ *   X[0, 1, 2, 0] := 1
+ *   X[1, 1, 2, 3] := 2
+ *   X[0, 2, 1, 0] := 3
+ *   X[0, 1, 3, 0] := 4
+ *   X[0, 1, 2, 1] := 5
+ *   X[1, 2, 0, 4] := 6
+ * ```
+ * In COO format, the index matrix of X is the following 4x6 matrix:
+ * ```text
+ *   [[0, 0, 0, 0, 1, 1],
+ *    [1, 1, 1, 2, 1, 2],
+ *    [2, 2, 3, 1, 2, 0],
+ *    [0, 1, 0, 0, 3, 4]]
+ * ```
+ * When isCanonical is true, the indices is sorted in lexicographical order
+ * (row-major order), and it does not have duplicated entries.  Otherwise,
+ * the indices may not be sorted, or may have duplicated entries.
+ */
+public final class SparseTensorIndexCOO extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb) { return getRootAsSparseTensorIndexCOO(_bb, new SparseTensorIndexCOO()); }
+  public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb, SparseTensorIndexCOO obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseTensorIndexCOO __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * The type of values in indicesBuffer
+   */
+  public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * Non-negative byte offsets to advance one value cell along each dimension
+   * If omitted, default to row-major order (C-like).
+   */
+  public long indicesStrides(int j) { int o = __offset(6); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int indicesStridesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public LongVector indicesStridesVector() { return indicesStridesVector(new LongVector()); }
+  public LongVector indicesStridesVector(LongVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer indicesStridesAsByteBuffer() { return __vector_as_bytebuffer(6, 8); }
+  public ByteBuffer indicesStridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 8); }
+  /**
+   * The location and size of the indices matrix's data
+   */
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+  /**
+   * This flag is true if and only if the indices matrix is sorted in
+   * row-major order, and does not have duplicated entries.
+   * This sort order is the same as of Tensorflow's SparseTensor,
+   * but it is inverse order of SciPy's canonical coo_matrix
+   * (SciPy employs column-major order for its coo_matrix).
+   */
+  public boolean isCanonical() { int o = __offset(10); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+  public static void startSparseTensorIndexCOO(FlatBufferBuilder builder) { builder.startTable(4); }
+  public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(0, indicesTypeOffset, 0); }
+  public static void addIndicesStrides(FlatBufferBuilder builder, int indicesStridesOffset) { builder.addOffset(1, indicesStridesOffset, 0); }
+  public static int createIndicesStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+  public static void startIndicesStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+  public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(2, indicesBufferOffset, 0); }
+  public static void addIsCanonical(FlatBufferBuilder builder, boolean isCanonical) { builder.addBoolean(3, isCanonical, false); }
+  public static int endSparseTensorIndexCOO(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 4);  // indicesType
+    builder.required(o, 8);  // indicesBuffer
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseTensorIndexCOO get(int j) { return get(new SparseTensorIndexCOO(), j); }
+    public SparseTensorIndexCOO get(SparseTensorIndexCOO obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
new file mode 100644
index 00000000000..abc4662be18
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Compressed Sparse Fiber (CSF) sparse tensor index.
+ */
+public final class SparseTensorIndexCSF extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb) { return getRootAsSparseTensorIndexCSF(_bb, new SparseTensorIndexCSF()); }
+  public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb, SparseTensorIndexCSF obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public SparseTensorIndexCSF __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * CSF is a generalization of compressed sparse row (CSR) index.
+   * See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
+   *
+   * CSF index recursively compresses each dimension of a tensor into a set
+   * of prefix trees. Each path from a root to leaf forms one tensor
+   * non-zero index. CSF is implemented with two arrays of buffers and one
+   * arrays of integers.
+   *
+   * For example, let X be a 2x3x4x5 tensor and let it have the following
+   * 8 non-zero values:
+   * ```text
+   *   X[0, 0, 0, 1] := 1
+   *   X[0, 0, 0, 2] := 2
+   *   X[0, 1, 0, 0] := 3
+   *   X[0, 1, 0, 2] := 4
+   *   X[0, 1, 1, 0] := 5
+   *   X[1, 1, 1, 0] := 6
+   *   X[1, 1, 1, 1] := 7
+   *   X[1, 1, 1, 2] := 8
+   * ```
+   * As a prefix tree this would be represented as:
+   * ```text
+   *         0          1
+   *        / \         |
+   *       0   1        1
+   *      /   / \       |
+   *     0   0   1      1
+   *    /|  /|   |    /| |
+   *   1 2 0 2   0   0 1 2
+   * ```
+   * The type of values in indptrBuffers
+   */
+  public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indptrBuffers stores the sparsity structure.
+   * Each two consecutive dimensions in a tensor correspond to a buffer in
+   * indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+   * and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+   * `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
+   *
+   * For example, the indptrBuffers for the above X is:
+   * ```text
+   *   indptrBuffer(X) = [
+   *                       [0, 2, 3],
+   *                       [0, 1, 3, 4],
+   *                       [0, 2, 4, 5, 8]
+   *                     ].
+   * ```
+   */
+  public org.apache.arrow.flatbuf.Buffer indptrBuffers(int j) { return indptrBuffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+  public org.apache.arrow.flatbuf.Buffer indptrBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int indptrBuffersLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector() { return indptrBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+  public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * The type of values in indicesBuffers
+   */
+  public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+  public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * indicesBuffers stores values of nodes.
+   * Each tensor dimension corresponds to a buffer in indicesBuffers.
+   * For example, the indicesBuffers for the above X is:
+   * ```text
+   *   indicesBuffer(X) = [
+   *                        [0, 1],
+   *                        [0, 1, 1],
+   *                        [0, 0, 1, 1],
+   *                        [1, 2, 0, 2, 0, 0, 1, 2]
+   *                      ].
+   * ```
+   */
+  public org.apache.arrow.flatbuf.Buffer indicesBuffers(int j) { return indicesBuffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+  public org.apache.arrow.flatbuf.Buffer indicesBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+  public int indicesBuffersLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector() { return indicesBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+  public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+  /**
+   * axisOrder stores the sequence in which dimensions were traversed to
+   * produce the prefix tree.
+   * For example, the axisOrder for the above X is:
+   * ```text
+   *   axisOrder(X) = [0, 1, 2, 3].
+   * ```
+   */
+  public int axisOrder(int j) { int o = __offset(12); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; }
+  public int axisOrderLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+  public IntVector axisOrderVector() { return axisOrderVector(new IntVector()); }
+  public IntVector axisOrderVector(IntVector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer axisOrderAsByteBuffer() { return __vector_as_bytebuffer(12, 4); }
+  public ByteBuffer axisOrderInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 12, 4); }
+
+  public static int createSparseTensorIndexCSF(FlatBufferBuilder builder,
+      int indptrTypeOffset,
+      int indptrBuffersOffset,
+      int indicesTypeOffset,
+      int indicesBuffersOffset,
+      int axisOrderOffset) {
+    builder.startTable(5);
+    SparseTensorIndexCSF.addAxisOrder(builder, axisOrderOffset);
+    SparseTensorIndexCSF.addIndicesBuffers(builder, indicesBuffersOffset);
+    SparseTensorIndexCSF.addIndicesType(builder, indicesTypeOffset);
+    SparseTensorIndexCSF.addIndptrBuffers(builder, indptrBuffersOffset);
+    SparseTensorIndexCSF.addIndptrType(builder, indptrTypeOffset);
+    return SparseTensorIndexCSF.endSparseTensorIndexCSF(builder);
+  }
+
+  public static void startSparseTensorIndexCSF(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(0, indptrTypeOffset, 0); }
+  public static void addIndptrBuffers(FlatBufferBuilder builder, int indptrBuffersOffset) { builder.addOffset(1, indptrBuffersOffset, 0); }
+  public static void startIndptrBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(2, indicesTypeOffset, 0); }
+  public static void addIndicesBuffers(FlatBufferBuilder builder, int indicesBuffersOffset) { builder.addOffset(3, indicesBuffersOffset, 0); }
+  public static void startIndicesBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+  public static void addAxisOrder(FlatBufferBuilder builder, int axisOrderOffset) { builder.addOffset(4, axisOrderOffset, 0); }
+  public static int createAxisOrderVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); }
+  public static void startAxisOrderVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endSparseTensorIndexCSF(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 4);  // indptrType
+    builder.required(o, 6);  // indptrBuffers
+    builder.required(o, 8);  // indicesType
+    builder.required(o, 10);  // indicesBuffers
+    builder.required(o, 12);  // axisOrder
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public SparseTensorIndexCSF get(int j) { return get(new SparseTensorIndexCSF(), j); }
+    public SparseTensorIndexCSF get(SparseTensorIndexCSF obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
new file mode 100644
index 00000000000..1285f288430
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
+ * (according to the physical memory layout). We used Struct_ here as
+ * Struct is a reserved word in Flatbuffers
+ */
+public final class Struct_ extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Struct_ getRootAsStruct_(ByteBuffer _bb) { return getRootAsStruct_(_bb, new Struct_()); }
+  public static Struct_ getRootAsStruct_(ByteBuffer _bb, Struct_ obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Struct_ __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startStruct_(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endStruct_(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Struct_ get(int j) { return get(new Struct_(), j); }
+    public Struct_ get(Struct_ obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
new file mode 100644
index 00000000000..d4466bcf2f5
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Tensor extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Tensor getRootAsTensor(ByteBuffer _bb) { return getRootAsTensor(_bb, new Tensor()); }
+  public static Tensor getRootAsTensor(ByteBuffer _bb, Tensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Tensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+  /**
+   * The type of data contained in a value cell. Currently only fixed-width
+   * value types are supported, no strings or nested types
+   */
+  public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; }
+  /**
+   * The dimensions of the tensor, optionally named
+   */
+  public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); }
+  public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+  public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); }
+  public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+  /**
+   * Non-negative byte offsets to advance one value cell along each dimension
+   * If omitted, default to row-major order (C-like).
+   */
+  public long strides(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int stridesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+  public LongVector stridesVector() { return stridesVector(new LongVector()); }
+  public LongVector stridesVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer stridesAsByteBuffer() { return __vector_as_bytebuffer(10, 8); }
+  public ByteBuffer stridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); }
+  /**
+   * The location and size of the tensor's data
+   */
+  public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); }
+  public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+  public static void startTensor(FlatBufferBuilder builder) { builder.startTable(5); }
+  public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); }
+  public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); }
+  public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); }
+  public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+  public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static void addStrides(FlatBufferBuilder builder, int stridesOffset) { builder.addOffset(3, stridesOffset, 0); }
+  public static int createStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+  public static void startStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+  public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(4, dataOffset, 0); }
+  public static int endTensor(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    builder.required(o, 6);  // type
+    builder.required(o, 8);  // shape
+    builder.required(o, 12);  // data
+    return o;
+  }
+  public static void finishTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+  public static void finishSizePrefixedTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Tensor get(int j) { return get(new Tensor(), j); }
+    public Tensor get(Tensor obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java b/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
new file mode 100644
index 00000000000..fad8caacd2e
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Data structures for dense tensors
+ * Shape data for a single axis in a tensor
+ */
+public final class TensorDim extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static TensorDim getRootAsTensorDim(ByteBuffer _bb) { return getRootAsTensorDim(_bb, new TensorDim()); }
+  public static TensorDim getRootAsTensorDim(ByteBuffer _bb, TensorDim obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public TensorDim __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  /**
+   * Length of dimension
+   */
+  public long size() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+  /**
+   * Name of the dimension, optional
+   */
+  public String name() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+  public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+  public static int createTensorDim(FlatBufferBuilder builder,
+      long size,
+      int nameOffset) {
+    builder.startTable(2);
+    TensorDim.addSize(builder, size);
+    TensorDim.addName(builder, nameOffset);
+    return TensorDim.endTensorDim(builder);
+  }
+
+  public static void startTensorDim(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addSize(FlatBufferBuilder builder, long size) { builder.addLong(0, size, 0L); }
+  public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(1, nameOffset, 0); }
+  public static int endTensorDim(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public TensorDim get(int j) { return get(new TensorDim(), j); }
+    public TensorDim get(TensorDim obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
new file mode 100644
index 00000000000..596d403a3ea
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Time type. The physical storage type depends on the unit
+ * - SECOND and MILLISECOND: 32 bits
+ * - MICROSECOND and NANOSECOND: 64 bits
+ */
+public final class Time extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Time getRootAsTime(ByteBuffer _bb) { return getRootAsTime(_bb, new Time()); }
+  public static Time getRootAsTime(ByteBuffer _bb, Time obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Time __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+  public int bitWidth() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 32; }
+
+  public static int createTime(FlatBufferBuilder builder,
+      short unit,
+      int bitWidth) {
+    builder.startTable(2);
+    Time.addBitWidth(builder, bitWidth);
+    Time.addUnit(builder, unit);
+    return Time.endTime(builder);
+  }
+
+  public static void startTime(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+  public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(1, bitWidth, 32); }
+  public static int endTime(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Time get(int j) { return get(new Time(), j); }
+    public Time get(Time obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
new file mode 100644
index 00000000000..828e44c13f9
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class TimeUnit {
+  private TimeUnit() { }
+  public static final short SECOND = 0;
+  public static final short MILLISECOND = 1;
+  public static final short MICROSECOND = 2;
+  public static final short NANOSECOND = 3;
+
+  public static final String[] names = { "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
new file mode 100644
index 00000000000..66f9cc8911b
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
+ * leap seconds, as a 64-bit integer. Note that UNIX time does not include
+ * leap seconds.
+ *
+ * The Timestamp metadata supports both "time zone naive" and "time zone
+ * aware" timestamps. Read about the timezone attribute for more detail
+ */
+public final class Timestamp extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Timestamp getRootAsTimestamp(ByteBuffer _bb) { return getRootAsTimestamp(_bb, new Timestamp()); }
+  public static Timestamp getRootAsTimestamp(ByteBuffer _bb, Timestamp obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Timestamp __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  /**
+   * The time zone is a string indicating the name of a time zone, one of:
+   *
+   * * As used in the Olson time zone database (the "tz database" or
+   *   "tzdata"), such as "America/New_York"
+   * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+   *
+   * Whether a timezone string is present indicates different semantics about
+   * the data:
+   *
+   * * If the time zone is null or equal to an empty string, the data is "time
+   *   zone naive" and shall be displayed *as is* to the user, not localized
+   *   to the locale of the user. This data can be though of as UTC but
+   *   without having "UTC" as the time zone, it is not considered to be
+   *   localized to any time zone
+   *
+   * * If the time zone is set to a valid value, values can be displayed as
+   *   "localized" to that time zone, even though the underlying 64-bit
+   *   integers are identical to the same data stored in UTC. Converting
+   *   between time zones is a metadata-only operation and does not change the
+   *   underlying values
+   */
+  public String timezone() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+  public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+  public ByteBuffer timezoneInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+  public static int createTimestamp(FlatBufferBuilder builder,
+      short unit,
+      int timezoneOffset) {
+    builder.startTable(2);
+    Timestamp.addTimezone(builder, timezoneOffset);
+    Timestamp.addUnit(builder, unit);
+    return Timestamp.endTimestamp(builder);
+  }
+
+  public static void startTimestamp(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); }
+  public static void addTimezone(FlatBufferBuilder builder, int timezoneOffset) { builder.addOffset(1, timezoneOffset, 0); }
+  public static int endTimestamp(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Timestamp get(int j) { return get(new Timestamp(), j); }
+    public Timestamp get(Timestamp obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
new file mode 100644
index 00000000000..5f1a550cfff
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Top-level Type value, enabling extensible type-specific metadata. We can
+ * add new logical types to Type without breaking backwards compatibility
+ */
+public final class Type {
+  private Type() { }
+  public static final byte NONE = 0;
+  public static final byte Null = 1;
+  public static final byte Int = 2;
+  public static final byte FloatingPoint = 3;
+  public static final byte Binary = 4;
+  public static final byte Utf8 = 5;
+  public static final byte Bool = 6;
+  public static final byte Decimal = 7;
+  public static final byte Date = 8;
+  public static final byte Time = 9;
+  public static final byte Timestamp = 10;
+  public static final byte Interval = 11;
+  public static final byte List = 12;
+  public static final byte Struct_ = 13;
+  public static final byte Union = 14;
+  public static final byte FixedSizeBinary = 15;
+  public static final byte FixedSizeList = 16;
+  public static final byte Map = 17;
+  public static final byte Duration = 18;
+  public static final byte LargeBinary = 19;
+  public static final byte LargeUtf8 = 20;
+  public static final byte LargeList = 21;
+
+  public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
new file mode 100644
index 00000000000..7e282243425
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A union is a complex type with children in Field
+ * By default ids in the type vector refer to the offsets in the children
+ * optionally typeIds provides an indirection between the child offset and the type id
+ * for each child `typeIds[offset]` is the id used in the type vector
+ */
+public final class Union extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Union getRootAsUnion(ByteBuffer _bb) { return getRootAsUnion(_bb, new Union()); }
+  public static Union getRootAsUnion(ByteBuffer _bb, Union obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Union __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+  public short mode() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+  public int typeIds(int j) { int o = __offset(6); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; }
+  public int typeIdsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+  public IntVector typeIdsVector() { return typeIdsVector(new IntVector()); }
+  public IntVector typeIdsVector(IntVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer typeIdsAsByteBuffer() { return __vector_as_bytebuffer(6, 4); }
+  public ByteBuffer typeIdsInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 4); }
+
+  public static int createUnion(FlatBufferBuilder builder,
+      short mode,
+      int typeIdsOffset) {
+    builder.startTable(2);
+    Union.addTypeIds(builder, typeIdsOffset);
+    Union.addMode(builder, mode);
+    return Union.endUnion(builder);
+  }
+
+  public static void startUnion(FlatBufferBuilder builder) { builder.startTable(2); }
+  public static void addMode(FlatBufferBuilder builder, short mode) { builder.addShort(0, mode, 0); }
+  public static void addTypeIds(FlatBufferBuilder builder, int typeIdsOffset) { builder.addOffset(1, typeIdsOffset, 0); }
+  public static int createTypeIdsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); }
+  public static void startTypeIdsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+  public static int endUnion(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Union get(int j) { return get(new Union(), j); }
+    public Union get(Union obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java b/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
new file mode 100644
index 00000000000..23a6013f8e4
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class UnionMode {
+  private UnionMode() { }
+  public static final short Sparse = 0;
+  public static final short Dense = 1;
+
+  public static final String[] names = { "Sparse", "Dense", };
+
+  public static String name(int e) { return names[e]; }
+}
+
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java
new file mode 100644
index 00000000000..d77fe205f42
--- /dev/null
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// 
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Unicode with UTF-8 encoding
+ */
+public final class Utf8 extends Table {
+  public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+  public static Utf8 getRootAsUtf8(ByteBuffer _bb) { return getRootAsUtf8(_bb, new Utf8()); }
+  public static Utf8 getRootAsUtf8(ByteBuffer _bb, Utf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+  public Utf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void startUtf8(FlatBufferBuilder builder) { builder.startTable(0); }
+  public static int endUtf8(FlatBufferBuilder builder) {
+    int o = builder.endTable();
+    return o;
+  }
+
+  public static final class Vector extends BaseVector {
+    public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+    public Utf8 get(int j) { return get(new Utf8(), j); }
+    public Utf8 get(Utf8 obj, int j) {  return obj.__assign(__indirect(__element(j), bb), bb); }
+  }
+}
+
diff --git a/java/pom.xml b/java/pom.xml
index 35a87cb08ec..89be67f775f 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -36,8 +36,7 @@
     <dep.netty.version>4.1.48.Final</dep.netty.version>
     <dep.jackson.version>2.11.4</dep.jackson.version>
     <dep.hadoop.version>2.7.1</dep.hadoop.version>
-    <dep.fbs.version>1.9.0</dep.fbs.version>
-    <dep.flatc.version>1.9.0</dep.flatc.version>
+    <dep.fbs.version>1.12.0</dep.fbs.version>
     <dep.avro.version>1.10.0</dep.avro.version>
     <arrow.vector.classifier />
     <forkCount>2</forkCount>
@@ -702,7 +701,7 @@
 
     <profile>
       <id>error-prone</id>
-      <!-- 
+      <!--
            Do not activate Error Prone while running with Eclipse/M2E as it causes incompatibilities
            with other annotation processors.
            See https://github.com/jbosstools/m2e-apt/issues/62 for details

From b2ceb8f22471f0c4654d6477117d01e897e12088 Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Thu, 15 Apr 2021 20:20:11 -0700
Subject: [PATCH 073/719] ARROW-12104: [Go][Parquet] Second chunk of Ported Go
 Parquet code

Following up from #9671 this is the next chunk of ported code consisting of the generated Thrift Code and the utilities for supporting Encryption, Compression and Reader/Writer Property handling.

Thankfully this is much smaller than the previous chunk, and so should be much easier to review and read.

Closes #9817 from zeroshade/arrow-12104

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 dev/release/rat_exclude_files.txt             |     3 +
 go/parquet/compress/brotli.go                 |   115 +
 go/parquet/compress/compress.go               |   156 +
 go/parquet/compress/compress_test.go          |   138 +
 go/parquet/compress/gzip.go                   |    98 +
 go/parquet/compress/snappy.go                 |    62 +
 go/parquet/compress/zstd.go                   |   112 +
 go/parquet/encryption_properties.go           |   711 +
 go/parquet/encryption_properties_test.go      |   217 +
 go/parquet/go.mod                             |     4 +
 go/parquet/go.sum                             |     8 +
 go/parquet/internal/bmi/Makefile              |     9 +-
 go/parquet/internal/bmi/bitmap_bmi2.s         |     2 +-
 go/parquet/internal/debug/assert_off.go       |    24 +
 go/parquet/internal/debug/assert_on.go        |    28 +
 go/parquet/internal/debug/doc.go              |    23 +
 go/parquet/internal/encryption/aes.go         |   264 +
 go/parquet/internal/encryption/decryptor.go   |   261 +
 go/parquet/internal/encryption/encryptor.go   |   237 +
 .../internal/encryption/key_handling.go       |    62 +
 .../gen-go/parquet/GoUnusedProtection__.go    |     6 +
 .../internal/gen-go/parquet/parquet-consts.go |    23 +
 go/parquet/internal/gen-go/parquet/parquet.go | 10961 ++++++++++++++++
 .../internal/gen-go/parquet/staticcheck.conf  |    17 +
 go/parquet/internal/thrift/helpers.go         |    87 +
 go/parquet/internal/utils/Makefile            |     4 +
 go/parquet/reader_properties.go               |    79 +
 go/parquet/reader_writer_properties_test.go   |    69 +
 go/parquet/types.go                           |   187 +
 go/parquet/writer_properties.go               |   510 +
 30 files changed, 14475 insertions(+), 2 deletions(-)
 create mode 100644 go/parquet/compress/brotli.go
 create mode 100644 go/parquet/compress/compress.go
 create mode 100644 go/parquet/compress/compress_test.go
 create mode 100644 go/parquet/compress/gzip.go
 create mode 100644 go/parquet/compress/snappy.go
 create mode 100644 go/parquet/compress/zstd.go
 create mode 100644 go/parquet/encryption_properties.go
 create mode 100644 go/parquet/encryption_properties_test.go
 create mode 100644 go/parquet/internal/debug/assert_off.go
 create mode 100644 go/parquet/internal/debug/assert_on.go
 create mode 100644 go/parquet/internal/debug/doc.go
 create mode 100644 go/parquet/internal/encryption/aes.go
 create mode 100644 go/parquet/internal/encryption/decryptor.go
 create mode 100644 go/parquet/internal/encryption/encryptor.go
 create mode 100644 go/parquet/internal/encryption/key_handling.go
 create mode 100644 go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
 create mode 100644 go/parquet/internal/gen-go/parquet/parquet-consts.go
 create mode 100644 go/parquet/internal/gen-go/parquet/parquet.go
 create mode 100644 go/parquet/internal/gen-go/parquet/staticcheck.conf
 create mode 100644 go/parquet/internal/thrift/helpers.go
 create mode 100644 go/parquet/reader_properties.go
 create mode 100644 go/parquet/reader_writer_properties_test.go
 create mode 100644 go/parquet/writer_properties.go

diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index ce32044c902..3dc3ad5869c 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -126,6 +126,9 @@ go/arrow/type_string.go
 go/*.tmpldata
 go/*.s
 go/parquet/go.sum
+go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
+go/parquet/internal/gen-go/parquet/parquet-consts.go
+go/parquet/internal/gen-go/parquet/parquet.go
 js/.npmignore
 js/closure-compiler-scripts/*
 js/src/fb/*.ts
diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go
new file mode 100644
index 00000000000..2b45225957b
--- /dev/null
+++ b/go/parquet/compress/brotli.go
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+
+	"github.com/andybalholm/brotli"
+	"github.com/apache/arrow/go/parquet/internal/debug"
+)
+
+type brotliCodec struct{}
+
+func (brotliCodec) NewReader(r io.Reader) io.ReadCloser {
+	return ioutil.NopCloser(brotli.NewReader(r))
+}
+
+func (b brotliCodec) EncodeLevel(dst, src []byte, level int) []byte {
+	if level == DefaultCompressionLevel {
+		level = brotli.DefaultCompression
+	}
+
+	maxlen := int(b.CompressBound(int64(len(src))))
+	if dst == nil || cap(dst) < maxlen {
+		dst = make([]byte, 0, maxlen)
+	}
+	buf := bytes.NewBuffer(dst[:0])
+	w := brotli.NewWriterLevel(buf, level)
+	_, err := w.Write(src)
+	if err != nil {
+		panic(err)
+	}
+	if err := w.Close(); err != nil {
+		panic(err)
+	}
+	return buf.Bytes()
+}
+
+func (b brotliCodec) Encode(dst, src []byte) []byte {
+	return b.EncodeLevel(dst, src, brotli.DefaultCompression)
+}
+
+func (brotliCodec) Decode(dst, src []byte) []byte {
+	rdr := brotli.NewReader(bytes.NewReader(src))
+	if dst != nil {
+		var (
+			sofar       = 0
+			n           = -1
+			err   error = nil
+		)
+		for n != 0 && err == nil {
+			n, err = rdr.Read(dst[sofar:])
+			sofar += n
+		}
+		if err != nil && err != io.EOF {
+			panic(err)
+		}
+		return dst[:sofar]
+	}
+
+	dst, err := ioutil.ReadAll(rdr)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst
+}
+
+// taken from brotli/enc/encode.c:1426
+// BrotliEncoderMaxCompressedSize
+func (brotliCodec) CompressBound(len int64) int64 {
+	// [window bits / empty metadata] + N * [uncompressed] + [last empty]
+	debug.Assert(len > 0, "brotli compressbound should be > 0")
+	nlarge := len >> 14
+	overhead := 2 + (4 * nlarge) + 3 + 1
+	result := len + overhead
+	if len == 0 {
+		return 2
+	}
+	if result < len {
+		return 0
+	}
+	return len
+}
+
+func (brotliCodec) NewWriter(w io.Writer) io.WriteCloser {
+	return brotli.NewWriter(w)
+}
+
+func (brotliCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error) {
+	if level == DefaultCompressionLevel {
+		level = brotli.DefaultCompression
+	}
+	return brotli.NewWriterLevel(w, level), nil
+}
+
+func init() {
+	codecs[Codecs.Brotli] = brotliCodec{}
+}
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
new file mode 100644
index 00000000000..1b6d83687c2
--- /dev/null
+++ b/go/parquet/compress/compress.go
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package compress contains the interfaces and implementations for handling compression/decompression
+// of parquet data at the column levels.
+package compress
+
+import (
+	"compress/flate"
+	"io"
+	"io/ioutil"
+
+	"github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// Compression is an alias to the thrift compression codec enum type for easy use
+type Compression parquet.CompressionCodec
+
+func (c Compression) String() string {
+	return parquet.CompressionCodec(c).String()
+}
+
+// DefaultCompressionLevel will use flate.DefaultCompression since many of the compression libraries
+// use that to denote "use the default".
+const DefaultCompressionLevel = flate.DefaultCompression
+
+// Codecs is a useful struct to provide namespaced enum values to use for specifying the compression type to use
+// which make for easy internal swapping between them and the thrift enum since they are initialized to the same
+// constant values.
+var Codecs = struct {
+	Uncompressed Compression
+	Snappy       Compression
+	Gzip         Compression
+	// LZO is unsupported in this library since LZO license is incompatible with Apache License
+	Lzo    Compression
+	Brotli Compression
+	// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4
+	// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3E
+	Lz4  Compression
+	Zstd Compression
+}{
+	Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),
+	Snappy:       Compression(parquet.CompressionCodec_SNAPPY),
+	Gzip:         Compression(parquet.CompressionCodec_GZIP),
+	Lzo:          Compression(parquet.CompressionCodec_LZO),
+	Brotli:       Compression(parquet.CompressionCodec_BROTLI),
+	Lz4:          Compression(parquet.CompressionCodec_LZ4),
+	Zstd:         Compression(parquet.CompressionCodec_ZSTD),
+}
+
+// Codec is an interface which is implemented for each compression type in order to make the interactions easy to
+// implement. Most consumers won't be calling GetCodec directly.
+type Codec interface {
+	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
+	NewReader(io.Reader) io.ReadCloser
+	// NewWriter provides a wrapper around a write stream to compress data before writing it.
+	NewWriter(io.Writer) io.WriteCloser
+	// NewWriterLevel is like NewWriter but allows specifying the compression level
+	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
+	// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil
+	// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not
+	// overlap since some of the compression types don't allow it.
+	//
+	// The returned slice will be one of the following:
+	//	1. If dst was nil or dst was too small to fit the compressed data, it will be a newly allocated slice
+	//	2. If dst was large enough to fit the compressed data (depending on the compression algorithm it might
+	//		 be required to be at least CompressBound length) then it might be a slice of dst.
+	Encode(dst, src []byte) []byte
+	// EncodeLevel is like Encode, but specifies a particular encoding level instead of the default.
+	EncodeLevel(dst, src []byte, level int) []byte
+	// CompressBound returns the boundary of maximum size of compressed data under the chosen codec.
+	CompressBound(int64) int64
+	// Decode is for decoding a single block rather than a stream, like with Encode, dst must be either nil or
+	// sized large enough to accommodate the uncompressed data and should not overlap with src.
+	//
+	// the returned slice *might* be a slice of dst.
+	Decode(dst, src []byte) []byte
+}
+
+var codecs = map[Compression]Codec{}
+
+type nocodec struct{}
+
+func (nocodec) NewReader(r io.Reader) io.ReadCloser {
+	ret, ok := r.(io.ReadCloser)
+	if !ok {
+		return ioutil.NopCloser(r)
+	}
+	return ret
+}
+
+func (nocodec) Decode(dst, src []byte) []byte {
+	if dst != nil {
+		copy(dst, src)
+	}
+	return dst
+}
+
+type writerNopCloser struct {
+	io.Writer
+}
+
+func (writerNopCloser) Close() error {
+	return nil
+}
+
+func (nocodec) Encode(dst, src []byte) []byte {
+	copy(dst, src)
+	return dst
+}
+
+func (nocodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	copy(dst, src)
+	return dst
+}
+
+func (nocodec) NewWriter(w io.Writer) io.WriteCloser {
+	ret, ok := w.(io.WriteCloser)
+	if !ok {
+		return writerNopCloser{w}
+	}
+	return ret
+}
+
+func (n nocodec) NewWriterLevel(w io.Writer, _ int) (io.WriteCloser, error) {
+	return n.NewWriter(w), nil
+}
+
+func (nocodec) CompressBound(len int64) int64 { return len }
+
+func init() {
+	codecs[Codecs.Uncompressed] = nocodec{}
+}
+
+// GetCodec returns a Codec interface for the requested Compression type
+func GetCodec(typ Compression) (Codec, error) {
+	ret, ok := codecs[typ]
+	if !ok {
+		return nil, xerrors.Errorf("compression for %s unimplemented", typ.String())
+	}
+	return ret, nil
+}
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
new file mode 100644
index 00000000000..acb0c4bacd0
--- /dev/null
+++ b/go/parquet/compress/compress_test.go
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress_test
+
+import (
+	"bytes"
+	"io/ioutil"
+	"math/rand"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet/compress"
+	"github.com/stretchr/testify/assert"
+)
+
+const (
+	RandomDataSize       = 3 * 1024 * 1024
+	CompressibleDataSize = 8 * 1024 * 1024
+)
+
+func makeRandomData(size int) []byte {
+	ret := make([]byte, size)
+	r := rand.New(rand.NewSource(1234))
+	r.Read(ret)
+	return ret
+}
+
+func makeCompressibleData(size int) []byte {
+	const base = "Apache Arrow is a cross-language development platform for in-memory data"
+
+	data := make([]byte, size)
+	n := copy(data, base)
+	for i := n; i < len(data); i *= 2 {
+		copy(data[i:], data[:i])
+	}
+	return data
+}
+
+func TestErrorForUnimplemented(t *testing.T) {
+	_, err := compress.GetCodec(compress.Codecs.Lzo)
+	assert.Error(t, err)
+
+	_, err = compress.GetCodec(compress.Codecs.Lz4)
+	assert.Error(t, err)
+}
+
+func TestCompressDataOneShot(t *testing.T) {
+	tests := []struct {
+		c compress.Compression
+	}{
+		{compress.Codecs.Uncompressed},
+		{compress.Codecs.Snappy},
+		{compress.Codecs.Gzip},
+		{compress.Codecs.Brotli},
+		{compress.Codecs.Zstd},
+		// {compress.Codecs.Lzo},
+		// {compress.Codecs.Lz4},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.c.String(), func(t *testing.T) {
+			codec, err := compress.GetCodec(tt.c)
+			assert.NoError(t, err)
+			data := makeCompressibleData(CompressibleDataSize)
+
+			buf := make([]byte, codec.CompressBound(int64(len(data))))
+			compressed := codec.Encode(buf, data)
+			assert.Same(t, &buf[0], &compressed[0])
+
+			out := make([]byte, len(data))
+			uncompressed := codec.Decode(out, compressed)
+			assert.Same(t, &out[0], &uncompressed[0])
+
+			assert.Exactly(t, data, uncompressed)
+		})
+	}
+}
+
+func TestCompressReaderWriter(t *testing.T) {
+	tests := []struct {
+		c compress.Compression
+	}{
+		{compress.Codecs.Uncompressed},
+		{compress.Codecs.Snappy},
+		{compress.Codecs.Gzip},
+		{compress.Codecs.Brotli},
+		{compress.Codecs.Zstd},
+		// {compress.Codecs.Lzo},
+		// {compress.Codecs.Lz4},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.c.String(), func(t *testing.T) {
+			var buf bytes.Buffer
+			codec, err := compress.GetCodec(tt.c)
+			assert.NoError(t, err)
+			data := makeRandomData(RandomDataSize)
+
+			wr := codec.NewWriter(&buf)
+
+			const chunkSize = 1111
+			input := data
+			for len(input) > 0 {
+				var (
+					n   int
+					err error
+				)
+				if len(input) > chunkSize {
+					n, err = wr.Write(input[:chunkSize])
+				} else {
+					n, err = wr.Write(input)
+				}
+
+				assert.NoError(t, err)
+				input = input[n:]
+			}
+			wr.Close()
+
+			rdr := codec.NewReader(&buf)
+			out, err := ioutil.ReadAll(rdr)
+			assert.NoError(t, err)
+			assert.Exactly(t, data, out)
+		})
+	}
+}
diff --git a/go/parquet/compress/gzip.go b/go/parquet/compress/gzip.go
new file mode 100644
index 00000000000..829d5f823ee
--- /dev/null
+++ b/go/parquet/compress/gzip.go
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+
+	"github.com/klauspost/compress/gzip"
+	"golang.org/x/xerrors"
+)
+
+type gzipCodec struct{}
+
+func (gzipCodec) NewReader(r io.Reader) io.ReadCloser {
+	ret, err := gzip.NewReader(r)
+	if err != nil {
+		panic(xerrors.Errorf("codec: gzip: %w", err))
+	}
+	return ret
+}
+
+func (gzipCodec) Decode(dst, src []byte) []byte {
+	rdr, err := gzip.NewReader(bytes.NewReader(src))
+	if err != nil {
+		panic(err)
+	}
+
+	if dst != nil {
+		n, err := io.ReadFull(rdr, dst)
+		if err != nil {
+			panic(err)
+		}
+		return dst[:n]
+	}
+
+	dst, err = ioutil.ReadAll(rdr)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst
+}
+
+func (g gzipCodec) EncodeLevel(dst, src []byte, level int) []byte {
+	maxlen := int(g.CompressBound(int64(len(src))))
+	if dst == nil || cap(dst) < maxlen {
+		dst = make([]byte, 0, maxlen)
+	}
+	buf := bytes.NewBuffer(dst[:0])
+	w, err := gzip.NewWriterLevel(buf, level)
+	if err != nil {
+		panic(err)
+	}
+	_, err = w.Write(src)
+	if err != nil {
+		panic(err)
+	}
+	if err := w.Close(); err != nil {
+		panic(err)
+	}
+	return buf.Bytes()
+}
+
+func (g gzipCodec) Encode(dst, src []byte) []byte {
+	return g.EncodeLevel(dst, src, DefaultCompressionLevel)
+}
+
+func (gzipCodec) CompressBound(len int64) int64 {
+	return len + ((len + 7) >> 3) + ((len + 63) >> 6) + 5
+}
+
+func (gzipCodec) NewWriter(w io.Writer) io.WriteCloser {
+	return gzip.NewWriter(w)
+}
+
+func (gzipCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error) {
+	return gzip.NewWriterLevel(w, level)
+}
+
+func init() {
+	codecs[Codecs.Gzip] = gzipCodec{}
+}
diff --git a/go/parquet/compress/snappy.go b/go/parquet/compress/snappy.go
new file mode 100644
index 00000000000..6468df780a7
--- /dev/null
+++ b/go/parquet/compress/snappy.go
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"io"
+	"io/ioutil"
+
+	"github.com/golang/snappy"
+)
+
+type snappyCodec struct{}
+
+func (snappyCodec) Encode(dst, src []byte) []byte {
+	return snappy.Encode(dst, src)
+}
+
+func (snappyCodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	return snappy.Encode(dst, src)
+}
+
+func (snappyCodec) Decode(dst, src []byte) []byte {
+	dst, err := snappy.Decode(dst, src)
+	if err != nil {
+		panic(err)
+	}
+	return dst
+}
+
+func (snappyCodec) NewReader(r io.Reader) io.ReadCloser {
+	return ioutil.NopCloser(snappy.NewReader(r))
+}
+
+func (snappyCodec) CompressBound(len int64) int64 {
+	return int64(snappy.MaxEncodedLen(int(len)))
+}
+
+func (snappyCodec) NewWriter(w io.Writer) io.WriteCloser {
+	return snappy.NewBufferedWriter(w)
+}
+
+func (s snappyCodec) NewWriterLevel(w io.Writer, _ int) (io.WriteCloser, error) {
+	return s.NewWriter(w), nil
+}
+
+func init() {
+	codecs[Codecs.Snappy] = snappyCodec{}
+}
diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go
new file mode 100644
index 00000000000..ebc91baeb26
--- /dev/null
+++ b/go/parquet/compress/zstd.go
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"io"
+	"sync"
+
+	"github.com/apache/arrow/go/parquet/internal/debug"
+	"github.com/klauspost/compress/zstd"
+)
+
+type zstdCodec struct{}
+
+type zstdcloser struct {
+	*zstd.Decoder
+}
+
+var (
+	enc         *zstd.Encoder
+	dec         *zstd.Decoder
+	initEncoder sync.Once
+	initDecoder sync.Once
+)
+
+func getencoder() *zstd.Encoder {
+	initEncoder.Do(func() {
+		enc, _ = zstd.NewWriter(nil, zstd.WithZeroFrames(true))
+	})
+	return enc
+}
+
+func getdecoder() *zstd.Decoder {
+	initDecoder.Do(func() {
+		dec, _ = zstd.NewReader(nil)
+	})
+	return dec
+}
+
+func (zstdCodec) Decode(dst, src []byte) []byte {
+	dst, err := getdecoder().DecodeAll(src, dst[:0])
+	if err != nil {
+		panic(err)
+	}
+	return dst
+}
+
+func (z *zstdcloser) Close() error {
+	z.Decoder.Close()
+	return nil
+}
+
+func (zstdCodec) NewReader(r io.Reader) io.ReadCloser {
+	ret, _ := zstd.NewReader(r)
+	return &zstdcloser{ret}
+}
+
+func (zstdCodec) NewWriter(w io.Writer) io.WriteCloser {
+	ret, _ := zstd.NewWriter(w)
+	return ret
+}
+
+func (zstdCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error) {
+	var compressLevel zstd.EncoderLevel
+	if level == DefaultCompressionLevel {
+		compressLevel = zstd.SpeedDefault
+	} else {
+		compressLevel = zstd.EncoderLevelFromZstd(level)
+	}
+	return zstd.NewWriter(w, zstd.WithEncoderLevel(compressLevel))
+}
+
+func (z zstdCodec) Encode(dst, src []byte) []byte {
+	return getencoder().EncodeAll(src, dst[:0])
+}
+
+func (z zstdCodec) EncodeLevel(dst, src []byte, level int) []byte {
+	compressLevel := zstd.EncoderLevelFromZstd(level)
+	if level == DefaultCompressionLevel {
+		compressLevel = zstd.SpeedDefault
+	}
+	enc, _ := zstd.NewWriter(nil, zstd.WithZeroFrames(true), zstd.WithEncoderLevel(compressLevel))
+	return enc.EncodeAll(src, dst[:0])
+}
+
+// from zstd.h, ZSTD_COMPRESSBOUND
+func (zstdCodec) CompressBound(len int64) int64 {
+	debug.Assert(len > 0, "len for zstd CompressBound should be > 0")
+	extra := ((128 << 10) - len) >> 11
+	if len >= (128 << 10) {
+		extra = 0
+	}
+	return len + (len >> 8) + extra
+}
+
+func init() {
+	codecs[Codecs.Zstd] = zstdCodec{}
+}
diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go
new file mode 100644
index 00000000000..bd97e53a401
--- /dev/null
+++ b/go/parquet/encryption_properties.go
@@ -0,0 +1,711 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet
+
+import (
+	"crypto/rand"
+	"unicode/utf8"
+
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// Constants that will be used as the default values with encryption/decryption
+const (
+	// By default we'll use AesGCM as our encryption algorithm
+	DefaultEncryptionAlgorithm       = AesGcm
+	MaximalAadMetadataLength   int32 = 256
+	// if encryption is turned on, we will default to also encrypting the footer
+	DefaultEncryptedFooter = true
+	DefaultCheckSignature  = true
+	// by default if you set the file decryption properties, we will error
+	// on any plaintext files unless otherwise specified.
+	DefaultAllowPlaintextFiles       = false
+	AadFileUniqueLength        int32 = 8
+)
+
+// ColumnPathToDecryptionPropsMap maps column paths to decryption properties
+type ColumnPathToDecryptionPropsMap map[string]*ColumnDecryptionProperties
+
+// ColumnPathToEncryptionPropsMap maps column paths to encryption properties
+type ColumnPathToEncryptionPropsMap map[string]*ColumnEncryptionProperties
+
+// ColumnEncryptionProperties specifies how to encrypt a given column
+type ColumnEncryptionProperties struct {
+	columnPath             string
+	encrypted              bool
+	encryptedWithFooterKey bool
+	key                    string
+	keyMetadata            string
+	utilized               bool
+}
+
+// ColumnPath returns which column these properties are for
+func (ce *ColumnEncryptionProperties) ColumnPath() string {
+	return ce.columnPath
+}
+
+// IsEncrypted returns true if this column is encrypted.
+func (ce *ColumnEncryptionProperties) IsEncrypted() bool { return ce.encrypted }
+
+// IsEncryptedWithFooterKey returns if this column was encrypted with the footer key itself, or false if a separate
+// key was used for encrypting this column.
+func (ce *ColumnEncryptionProperties) IsEncryptedWithFooterKey() bool {
+	return ce.encryptedWithFooterKey
+}
+
+// Key returns the key used for encrypting this column if it isn't encrypted by the footer key
+func (ce *ColumnEncryptionProperties) Key() string { return ce.key }
+
+// KeyMetadata returns the key identifier which is used with a KeyRetriever to get the key for this column if it is not
+// encrypted using the footer key
+func (ce *ColumnEncryptionProperties) KeyMetadata() string { return ce.keyMetadata }
+
+// WipeOutEncryptionKey Clears the encryption key, used after completion of file writing
+func (ce *ColumnEncryptionProperties) WipeOutEncryptionKey() { ce.key = "" }
+
+// IsUtilized returns whether or not these properties have already been used, if the key is empty
+// then this is always false
+func (ce *ColumnEncryptionProperties) IsUtilized() bool {
+	if ce.key == "" {
+		return false
+	}
+	return ce.utilized
+}
+
+// SetUtilized is used for marking it as utilized once it is used in FileEncryptionProperties
+// as the encryption key will be wiped out on completion of writing
+func (ce *ColumnEncryptionProperties) SetUtilized() {
+	ce.utilized = true
+}
+
+// Clone returns a instance of ColumnEncryptionProperties with the same key and metadata
+func (ce *ColumnEncryptionProperties) Clone() *ColumnEncryptionProperties {
+	copy := ce.key
+	return NewColumnEncryptionProperties(ce.columnPath, WithKey(copy), WithKeyMetadata(ce.keyMetadata))
+}
+
+type colEncryptConfig struct {
+	key         string
+	keyMetadata string
+	encrypted   bool
+}
+
+// ColumnEncryptOption how to specify options to the the NewColumnEncryptionProperties function.
+type ColumnEncryptOption func(*colEncryptConfig)
+
+// WithKey sets a column specific key.
+// If key is not set on an encrypted column, the column will be encrypted with the footer key.
+// key length must be either 16, 24, or 32 bytes
+// the key is cloned and will be wiped out (array values set to 0) upon completion of file writing.
+// Caller is responsible for wiping out input key array
+func WithKey(key string) ColumnEncryptOption {
+	return func(c *colEncryptConfig) {
+		if key != "" {
+			c.key = key
+		}
+	}
+}
+
+// WithKeyMetadata sets the key retrieval metadata, use either KeyMetadata or KeyID but not both
+func WithKeyMetadata(keyMeta string) ColumnEncryptOption {
+	return func(c *colEncryptConfig) {
+		c.keyMetadata = keyMeta
+	}
+}
+
+// WithKeyID is a convenience function to set the key metadata using a string id.
+// Set a key retrieval metadata (converted from String). and use either KeyMetadata or KeyID, not both.
+// KeyID will be converted to metadata (UTF-8 Array)
+func WithKeyID(keyID string) ColumnEncryptOption {
+	if !utf8.ValidString(keyID) {
+		panic("parquet: key id should be UTF8 encoded")
+	}
+	return WithKeyMetadata(keyID)
+}
+
+// NewColumnEncryptionProperties constructs properties for the provided column path, modified by the options provided
+func NewColumnEncryptionProperties(name string, opts ...ColumnEncryptOption) *ColumnEncryptionProperties {
+	var cfg colEncryptConfig
+	cfg.encrypted = true
+	for _, o := range opts {
+		o(&cfg)
+	}
+	return &ColumnEncryptionProperties{
+		utilized:               false,
+		encrypted:              cfg.encrypted,
+		encryptedWithFooterKey: cfg.encrypted && cfg.key == "",
+		keyMetadata:            cfg.keyMetadata,
+		key:                    cfg.key,
+		columnPath:             name,
+	}
+}
+
+// ColumnDecryptionProperties are the specifications for how to decrypt a given column.
+type ColumnDecryptionProperties struct {
+	columnPath string
+	key        string
+	utilized   bool
+}
+
+// NewColumnDecryptionProperties constructs a new ColumnDecryptionProperties for the given column path, modified by
+// the provided options
+func NewColumnDecryptionProperties(column string, opts ...ColumnDecryptOption) *ColumnDecryptionProperties {
+	var cfg columnDecryptConfig
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	return &ColumnDecryptionProperties{
+		columnPath: column,
+		utilized:   false,
+		key:        cfg.key,
+	}
+}
+
+// ColumnPath returns which column these properties describe how to decrypt
+func (cd *ColumnDecryptionProperties) ColumnPath() string { return cd.columnPath }
+
+// Key returns the key specified to decrypt this column, or is empty if the Footer Key should be used.
+func (cd *ColumnDecryptionProperties) Key() string { return cd.key }
+
+// IsUtilized returns whether or not these properties have been used for decryption already
+func (cd *ColumnDecryptionProperties) IsUtilized() bool { return cd.utilized }
+
+// SetUtilized is used by the reader to specify when we've decrypted the column and have used the key so we know
+// to wipe out the keys.
+func (cd *ColumnDecryptionProperties) SetUtilized() { cd.utilized = true }
+
+// WipeOutDecryptionKey is called after decryption to ensure the key doesn't stick around and get re-used.
+func (cd *ColumnDecryptionProperties) WipeOutDecryptionKey() { cd.key = "" }
+
+// Clone returns a new instance of ColumnDecryptionProperties with the same key and column
+func (cd *ColumnDecryptionProperties) Clone() *ColumnDecryptionProperties {
+	return NewColumnDecryptionProperties(cd.columnPath, WithDecryptKey(cd.key))
+}
+
+type columnDecryptConfig struct {
+	key string
+}
+
+// ColumnDecryptOption is the type of the options passed for constructing Decryption Properties
+type ColumnDecryptOption func(*columnDecryptConfig)
+
+// WithDecryptKey specifies the key to utilize for decryption
+func WithDecryptKey(key string) ColumnDecryptOption {
+	return func(cfg *columnDecryptConfig) {
+		if key != "" {
+			cfg.key = key
+		}
+	}
+}
+
+// AADPrefixVerifier is an interface for any object that can be used to verify the identity of the file being decrypted.
+// It should panic if the provided AAD identity is bad.
+//
+// In a data set, AAD Prefixes should be collected, and then checked for missing files.
+type AADPrefixVerifier interface {
+	// Verify identity of file. panic if bad
+	Verify(string)
+}
+
+// DecryptionKeyRetriever is an interface for getting the desired key for decryption from metadata. It should take in
+// some metadata identifier and return the actual Key to use for decryption.
+type DecryptionKeyRetriever interface {
+	GetKey(keyMetadata []byte) string
+}
+
+// FileDecryptionProperties define the File Level configuration for decrypting a parquet file. Once constructed they are
+// read only.
+type FileDecryptionProperties struct {
+	footerKey                     string
+	aadPrefix                     string
+	checkPlaintextFooterIntegrity bool
+	plaintextAllowed              bool
+	utilized                      bool
+	columnDecryptProps            ColumnPathToDecryptionPropsMap
+	Verifier                      AADPrefixVerifier
+	KeyRetriever                  DecryptionKeyRetriever
+}
+
+// NewFileDecryptionProperties takes in the options for constructing a new FileDecryptionProperties object, otherwise
+// it will use the default configuration which will check footer integrity of a plaintext footer for an encrypted file
+// for unencrypted parquet files, the decryption properties should not be set.
+func NewFileDecryptionProperties(opts ...FileDecryptionOption) *FileDecryptionProperties {
+	var cfg fileDecryptConfig
+	cfg.checkFooterIntegrity = DefaultCheckSignature
+	cfg.plaintextAllowed = DefaultAllowPlaintextFiles
+	for _, o := range opts {
+		o(&cfg)
+	}
+	return &FileDecryptionProperties{
+		Verifier:                      cfg.verifier,
+		footerKey:                     cfg.footerKey,
+		checkPlaintextFooterIntegrity: cfg.checkFooterIntegrity,
+		KeyRetriever:                  cfg.retriever,
+		aadPrefix:                     cfg.aadPrefix,
+		columnDecryptProps:            cfg.colDecrypt,
+		plaintextAllowed:              cfg.plaintextAllowed,
+		utilized:                      false,
+	}
+}
+
+// ColumnKey returns the key to be used for decrypting the provided column.
+func (fd *FileDecryptionProperties) ColumnKey(path string) string {
+	if d, ok := fd.columnDecryptProps[path]; ok {
+		if d != nil {
+			return d.Key()
+		}
+	}
+	return ""
+}
+
+// FooterKey returns the key utilized for decrypting the Footer if encrypted and any columns that are encrypted with
+// the footer key.
+func (fd *FileDecryptionProperties) FooterKey() string { return fd.footerKey }
+
+// AadPrefix returns the prefix to be supplied for constructing the identification strings when decrypting
+func (fd *FileDecryptionProperties) AadPrefix() string { return fd.aadPrefix }
+
+// PlaintextFooterIntegrity returns whether or not an integrity check will be performed on a plaintext footer for an
+// encrypted file.
+func (fd *FileDecryptionProperties) PlaintextFooterIntegrity() bool {
+	return fd.checkPlaintextFooterIntegrity
+}
+
+// PlaintextFilesAllowed returns whether or not this instance of decryption properties are allowed on a plaintext file.
+func (fd *FileDecryptionProperties) PlaintextFilesAllowed() bool { return fd.plaintextAllowed }
+
+// SetUtilized is called to mark this instance as utilized once it is used to read a file. A single instance
+// can be used for reading one file only. Setting this ensures the keys will be wiped out upon completion of file reading.
+func (fd *FileDecryptionProperties) SetUtilized() { fd.utilized = true }
+
+// IsUtilized returns whether or not this instance has been used to decrypt a file. If the footer key and prefix are
+// empty and there are no column decryption properties, then this is always false.
+func (fd *FileDecryptionProperties) IsUtilized() bool {
+	if fd.footerKey == "" && len(fd.columnDecryptProps) == 0 && fd.aadPrefix == "" {
+		return false
+	}
+	return fd.utilized
+}
+
+// WipeOutDecryptionKeys will clear all the keys for this instance including the column level ones, this will be called
+// after this instance has been utilized.
+func (fd *FileDecryptionProperties) WipeOutDecryptionKeys() {
+	fd.footerKey = ""
+	for _, cd := range fd.columnDecryptProps {
+		cd.WipeOutDecryptionKey()
+	}
+}
+
+// Clone returns a new instance of these properties, changing the prefix if set (keeping the same prefix if left empty)
+func (fd *FileDecryptionProperties) Clone(newAadPrefix string) *FileDecryptionProperties {
+	keyCopy := fd.footerKey
+	colDecryptMapCopy := make(ColumnPathToDecryptionPropsMap)
+	for k, v := range fd.columnDecryptProps {
+		colDecryptMapCopy[k] = v.Clone()
+	}
+	if newAadPrefix == "" {
+		newAadPrefix = fd.aadPrefix
+	}
+	return &FileDecryptionProperties{
+		footerKey:                     keyCopy,
+		KeyRetriever:                  fd.KeyRetriever,
+		checkPlaintextFooterIntegrity: fd.checkPlaintextFooterIntegrity,
+		Verifier:                      fd.Verifier,
+		columnDecryptProps:            colDecryptMapCopy,
+		aadPrefix:                     newAadPrefix,
+		plaintextAllowed:              fd.plaintextAllowed,
+		utilized:                      false,
+	}
+}
+
+type fileDecryptConfig struct {
+	footerKey            string
+	aadPrefix            string
+	verifier             AADPrefixVerifier
+	colDecrypt           ColumnPathToDecryptionPropsMap
+	retriever            DecryptionKeyRetriever
+	checkFooterIntegrity bool
+	plaintextAllowed     bool
+}
+
+// FileDecryptionOption is how to supply options to constructing a new FileDecryptionProperties instance.
+type FileDecryptionOption func(*fileDecryptConfig)
+
+// WithFooterKey sets an explicit footer key. If Applied on a file that contains footer key
+// metadata the metadata will be ignored, the footer will be decrypted/verified with this key.
+//
+// If the explicit key is not set, footer key will be fetched from the key retriever.
+// With explcit keys or AAD prefix, new encryption properties object must be created for each
+// encrypted file.
+//
+// Explicit encryption keys (footer and column) are cloned.
+// Upon completion of file reading, the cloned encryption keys in the properties will be wiped out
+// Caller is responsible for wiping out the input key array
+// footer key length must be either 16, 24, or 32 bytes
+func WithFooterKey(key string) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if key != "" {
+			cfg.footerKey = key
+		}
+	}
+}
+
+// WithPrefixVerifier supplies a verifier object to use for verifying the AAD Prefixes stored in the file.
+func WithPrefixVerifier(verifier AADPrefixVerifier) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if verifier != nil {
+			cfg.verifier = verifier
+		}
+	}
+}
+
+// WithColumnKeys sets explicit column keys.
+//
+// It's also possible to set a key retriever on this property object.
+//
+// Upon file decryption, availability of explicit keys is checked before invocation
+// of the retreiver callback.
+//
+// If an explicit key is available for a footer or a column, its key metadata will be ignored.
+func WithColumnKeys(decrypt ColumnPathToDecryptionPropsMap) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if len(decrypt) == 0 {
+			return
+		}
+		if len(cfg.colDecrypt) != 0 {
+			panic("column properties already set")
+		}
+		for _, v := range decrypt {
+			if v.IsUtilized() {
+				panic("parquet: column properties utilized in another file")
+			}
+			v.SetUtilized()
+		}
+		cfg.colDecrypt = decrypt
+	}
+}
+
+// WithKeyRetriever sets a key retriever callback. It's also possible to set explicit footer or column keys.
+func WithKeyRetriever(retriever DecryptionKeyRetriever) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if retriever != nil {
+			cfg.retriever = retriever
+		}
+	}
+}
+
+// DisableFooterSignatureVerification skips integrity verification of plaintext footers.
+//
+// If not called, integrity of plaintext footers will be checked in runtime, and will panic
+// if the footer signing key is not available
+// or if the footer content and signature don't match
+func DisableFooterSignatureVerification() FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		cfg.checkFooterIntegrity = false
+	}
+}
+
+// WithPlaintextAllowed sets allowing plaintext files.
+//
+// By default, reading plaintext (unencrypted) files is not allowed when using
+// a decryptor.
+//
+// In order to detect files that were not encrypted by mistake.
+// However the default behavior can be overridden by using this method.
+func WithPlaintextAllowed() FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		cfg.plaintextAllowed = true
+	}
+}
+
+// WithDecryptAadPrefix explicitly supplies the file aad prefix.
+//
+// A must when a prefix is used for file encryption, but not stored in the file.
+func WithDecryptAadPrefix(prefix string) FileDecryptionOption {
+	return func(cfg *fileDecryptConfig) {
+		if prefix != "" {
+			cfg.aadPrefix = prefix
+		}
+	}
+}
+
+// Algorithm describes how something was encrypted, representing the EncryptionAlgorithm object from the
+// parquet.thrift file.
+type Algorithm struct {
+	Algo Cipher
+	Aad  struct {
+		AadPrefix       []byte
+		AadFileUnique   []byte
+		SupplyAadPrefix bool
+	}
+}
+
+// ToThrift returns an instance to be used for serializing when writing a file.
+func (e Algorithm) ToThrift() *format.EncryptionAlgorithm {
+	if e.Algo == AesGcm {
+		return &format.EncryptionAlgorithm{
+			AES_GCM_V1: &format.AesGcmV1{
+				AadPrefix:       e.Aad.AadPrefix,
+				AadFileUnique:   e.Aad.AadFileUnique,
+				SupplyAadPrefix: &e.Aad.SupplyAadPrefix,
+			},
+		}
+	}
+	return &format.EncryptionAlgorithm{
+		AES_GCM_CTR_V1: &format.AesGcmCtrV1{
+			AadPrefix:       e.Aad.AadPrefix,
+			AadFileUnique:   e.Aad.AadFileUnique,
+			SupplyAadPrefix: &e.Aad.SupplyAadPrefix,
+		},
+	}
+}
+
+// AlgorithmFromThrift converts the thrift object to the Algorithm struct for easier usage.
+func AlgorithmFromThrift(enc *format.EncryptionAlgorithm) (ret Algorithm) {
+	if enc.IsSetAES_GCM_V1() {
+		ret.Algo = AesGcm
+		ret.Aad.AadFileUnique = enc.AES_GCM_V1.AadFileUnique
+		ret.Aad.AadPrefix = enc.AES_GCM_V1.AadPrefix
+		ret.Aad.SupplyAadPrefix = *enc.AES_GCM_V1.SupplyAadPrefix
+		return
+	}
+	ret.Algo = AesCtr
+	ret.Aad.AadFileUnique = enc.AES_GCM_CTR_V1.AadFileUnique
+	ret.Aad.AadPrefix = enc.AES_GCM_CTR_V1.AadPrefix
+	ret.Aad.SupplyAadPrefix = *enc.AES_GCM_CTR_V1.SupplyAadPrefix
+	return
+}
+
+// FileEncryptionProperties describe how to encrypt a parquet file when writing data.
+type FileEncryptionProperties struct {
+	alg                  Algorithm
+	footerKey            string
+	footerKeyMetadata    string
+	encryptedFooter      bool
+	fileAad              string
+	utilized             bool
+	storeAadPrefixInFile bool
+	aadPrefix            string
+	encryptedCols        ColumnPathToEncryptionPropsMap
+}
+
+// EncryptedFooter returns if the footer for this file should be encrypted or left in plaintext.
+func (fe *FileEncryptionProperties) EncryptedFooter() bool { return fe.encryptedFooter }
+
+// Algorithm returns the description of how we will perform the encryption, the algorithm, prefixes, and so on.
+func (fe *FileEncryptionProperties) Algorithm() Algorithm { return fe.alg }
+
+// FooterKey returns the actual key used to encrypt the footer if it is encrypted, or to encrypt any columns which
+// will be encrypted with it rather than their own keys.
+func (fe *FileEncryptionProperties) FooterKey() string { return fe.footerKey }
+
+// FooterKeyMetadata is used for retrieving a key from the key retriever in order to set the footer key
+func (fe *FileEncryptionProperties) FooterKeyMetadata() string { return fe.footerKeyMetadata }
+
+// FileAad returns the aad identification to be used at the file level which gets concatenated with the row and column
+// information for encrypting data.
+func (fe *FileEncryptionProperties) FileAad() string { return fe.fileAad }
+
+// IsUtilized returns whether or not this instance has been used to encrypt a file
+func (fe *FileEncryptionProperties) IsUtilized() bool { return fe.utilized }
+
+// SetUtilized is called after writing a file. A FileEncryptionProperties object can be used for writing one file only,
+// the encryption keys will be wiped out upon completion of writing the file.
+func (fe *FileEncryptionProperties) SetUtilized() { fe.utilized = true }
+
+// EncryptedColumns returns the mapping of column paths to column encryption properties
+func (fe *FileEncryptionProperties) EncryptedColumns() ColumnPathToEncryptionPropsMap {
+	return fe.encryptedCols
+}
+
+// ColumnEncryptionProperties returns the properties for encrypting a given column.
+//
+// This may be nil for columns that aren't encrypted or may be default properties.
+func (fe *FileEncryptionProperties) ColumnEncryptionProperties(path string) *ColumnEncryptionProperties {
+	if len(fe.encryptedCols) == 0 {
+		return NewColumnEncryptionProperties(path)
+	}
+	if c, ok := fe.encryptedCols[path]; ok {
+		return c
+	}
+	return nil
+}
+
+// Clone allows returning an identical property setup for another file with the option to update the aadPrefix,
+// (if given the empty string, the current aad prefix will be used) since a single instance can only be used
+// to encrypt one file before wiping out the keys.
+func (fe *FileEncryptionProperties) Clone(newAadPrefix string) *FileEncryptionProperties {
+	footerKeyCopy := fe.footerKey
+	encryptedColsCopy := make(ColumnPathToEncryptionPropsMap)
+	for k, v := range fe.encryptedCols {
+		encryptedColsCopy[k] = v.Clone()
+	}
+	if newAadPrefix == "" {
+		newAadPrefix = fe.aadPrefix
+	}
+
+	opts := []EncryptOption{
+		WithAlg(fe.alg.Algo), WithFooterKeyMetadata(fe.footerKeyMetadata),
+		WithAadPrefix(newAadPrefix), WithEncryptedColumns(encryptedColsCopy),
+	}
+	if !fe.encryptedFooter {
+		opts = append(opts, WithPlaintextFooter())
+	}
+	if !fe.storeAadPrefixInFile {
+		opts = append(opts, DisableAadPrefixStorage())
+	}
+	return NewFileEncryptionProperties(footerKeyCopy, opts...)
+}
+
+// WipeOutEncryptionKeys clears all of the encryption keys for this and the columns
+func (fe *FileEncryptionProperties) WipeOutEncryptionKeys() {
+	fe.footerKey = ""
+	for _, elem := range fe.encryptedCols {
+		elem.WipeOutEncryptionKey()
+	}
+}
+
+type configEncrypt struct {
+	cipher               Cipher
+	encryptFooter        bool
+	keyMetadata          string
+	aadprefix            string
+	storeAadPrefixInFile bool
+	encryptedCols        ColumnPathToEncryptionPropsMap
+}
+
+// EncryptOption is used for specifying values when building FileEncryptionProperties
+type EncryptOption func(*configEncrypt)
+
+// WithPlaintextFooter sets the writer to write the footer in plain text, otherwise the footer will be encrypted
+// too (which is the default behavior).
+func WithPlaintextFooter() EncryptOption {
+	return func(cfg *configEncrypt) {
+		cfg.encryptFooter = false
+	}
+}
+
+// WithAlg sets the encryption algorithm to utilize. (default is AesGcm)
+func WithAlg(cipher Cipher) EncryptOption {
+	return func(cfg *configEncrypt) {
+		cfg.cipher = cipher
+	}
+}
+
+// WithFooterKeyID sets a key retrieval metadata to use (converted from string), this must be a utf8 string.
+//
+// use either WithFooterKeyID or WithFooterKeyMetadata, not both.
+func WithFooterKeyID(key string) EncryptOption {
+	if !utf8.ValidString(key) {
+		panic("parquet: footer key id should be UTF8 encoded")
+	}
+	return WithFooterKeyMetadata(key)
+}
+
+// WithFooterKeyMetadata sets a key retrieval metadata to use for getting the key.
+//
+// Use either WithFooterKeyID or WithFooterKeyMetadata, not both.
+func WithFooterKeyMetadata(keyMeta string) EncryptOption {
+	return func(cfg *configEncrypt) {
+		if keyMeta != "" {
+			cfg.keyMetadata = keyMeta
+		}
+	}
+}
+
+// WithAadPrefix sets the AAD prefix to use for encryption and by default will store it in the file
+func WithAadPrefix(aadPrefix string) EncryptOption {
+	return func(cfg *configEncrypt) {
+		if aadPrefix != "" {
+			cfg.aadprefix = aadPrefix
+			cfg.storeAadPrefixInFile = true
+		}
+	}
+}
+
+// DisableAadPrefixStorage will set the properties to not store the AadPrefix in the file. If this isn't called
+// and the AadPrefix is set, then it will be stored. This needs to in the options *after* WithAadPrefix to have an effect.
+func DisableAadPrefixStorage() EncryptOption {
+	return func(cfg *configEncrypt) {
+		cfg.storeAadPrefixInFile = false
+	}
+}
+
+// WithEncryptedColumns sets the map of columns and their properties (keys etc.) If not called, then all columns will
+// be encrypted with the footer key. If called, then columns not in the map will be left unencrypted.
+func WithEncryptedColumns(encrypted ColumnPathToEncryptionPropsMap) EncryptOption {
+	none := func(*configEncrypt) {}
+	if len(encrypted) == 0 {
+		return none
+	}
+	return func(cfg *configEncrypt) {
+		if len(cfg.encryptedCols) != 0 {
+			panic("column properties already set")
+		}
+		for _, v := range encrypted {
+			if v.IsUtilized() {
+				panic("column properties utilized in another file")
+			}
+			v.SetUtilized()
+		}
+		cfg.encryptedCols = encrypted
+	}
+}
+
+// NewFileEncryptionProperties returns a new File Encryption description object using the options provided.
+func NewFileEncryptionProperties(footerKey string, opts ...EncryptOption) *FileEncryptionProperties {
+	var cfg configEncrypt
+	cfg.cipher = DefaultEncryptionAlgorithm
+	cfg.encryptFooter = DefaultEncryptedFooter
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	props := &FileEncryptionProperties{
+		footerKey:            footerKey,
+		footerKeyMetadata:    cfg.keyMetadata,
+		encryptedFooter:      cfg.encryptFooter,
+		aadPrefix:            cfg.aadprefix,
+		storeAadPrefixInFile: cfg.storeAadPrefixInFile,
+		encryptedCols:        cfg.encryptedCols,
+		utilized:             false,
+	}
+
+	aadFileUnique := [AadFileUniqueLength]uint8{}
+	_, err := rand.Read(aadFileUnique[:])
+	if err != nil {
+		panic(err)
+	}
+
+	supplyAadPrefix := false
+	if props.aadPrefix == "" {
+		props.fileAad = string(aadFileUnique[:])
+	} else {
+		props.fileAad = props.aadPrefix + string(aadFileUnique[:])
+		if !props.storeAadPrefixInFile {
+			supplyAadPrefix = true
+		}
+	}
+	props.alg.Algo = cfg.cipher
+	props.alg.Aad.AadFileUnique = aadFileUnique[:]
+	props.alg.Aad.SupplyAadPrefix = supplyAadPrefix
+	if cfg.aadprefix != "" && cfg.storeAadPrefixInFile {
+		props.alg.Aad.AadPrefix = []byte(props.aadPrefix)
+	}
+	return props
+}
diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go
new file mode 100644
index 00000000000..ad7cb6010d9
--- /dev/null
+++ b/go/parquet/encryption_properties_test.go
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/encryption"
+	"github.com/stretchr/testify/assert"
+)
+
+const (
+	FooterEncryptionKey  = "0123456789012345"
+	ColumnEncryptionKey1 = "1234567890123450"
+	ColumnEncryptionKey2 = "1234567890123451"
+	FileName             = "tester"
+)
+
+func TestColumnEncryptedWithOwnKey(t *testing.T) {
+	t.Parallel()
+
+	columnPath1 := "column_1"
+	colprops1 := parquet.NewColumnEncryptionProperties(columnPath1,
+		parquet.WithKey(ColumnEncryptionKey1), parquet.WithKeyID("kc1"))
+
+	assert.Equal(t, columnPath1, colprops1.ColumnPath())
+	assert.True(t, colprops1.IsEncrypted())
+	assert.False(t, colprops1.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, colprops1.Key())
+	assert.Equal(t, "kc1", colprops1.KeyMetadata())
+}
+
+func TestColumnEncryptedWithFooterKey(t *testing.T) {
+	t.Parallel()
+
+	colPath1 := "column_1"
+	colprops1 := parquet.NewColumnEncryptionProperties(colPath1)
+
+	assert.Equal(t, colPath1, colprops1.ColumnPath())
+	assert.True(t, colprops1.IsEncrypted())
+	assert.True(t, colprops1.IsEncryptedWithFooterKey())
+}
+
+func TestUniformEncryption(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey, parquet.WithFooterKeyMetadata("kf"))
+
+	assert.True(t, props.EncryptedFooter())
+	assert.Equal(t, parquet.DefaultEncryptionAlgorithm, props.Algorithm().Algo)
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+	assert.Equal(t, "kf", props.FooterKeyMetadata())
+
+	colPath := parquet.ColumnPathFromString("a_column")
+	outColProps := props.ColumnEncryptionProperties(colPath.String())
+
+	assert.True(t, outColProps.IsEncrypted())
+	assert.True(t, outColProps.IsEncryptedWithFooterKey())
+}
+
+func TestEncryptFooterAndTwoColumns(t *testing.T) {
+	t.Parallel()
+
+	columnPath1 := parquet.ColumnPathFromString("column_1")
+	columnPath2 := parquet.ColumnPathFromString("column_2")
+
+	encryptedColumns := make(parquet.ColumnPathToEncryptionPropsMap)
+	encryptedColumns[columnPath1.String()] = parquet.NewColumnEncryptionProperties(columnPath1.String(),
+		parquet.WithKey(ColumnEncryptionKey1), parquet.WithKeyID("kc1"))
+	encryptedColumns[columnPath2.String()] = parquet.NewColumnEncryptionProperties(columnPath2.String(),
+		parquet.WithKey(ColumnEncryptionKey2), parquet.WithKeyID("kc2"))
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithFooterKeyMetadata("kf"), parquet.WithEncryptedColumns(encryptedColumns))
+
+	assert.True(t, props.EncryptedFooter())
+	assert.Equal(t, parquet.DefaultEncryptionAlgorithm, props.Algorithm().Algo)
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+
+	outColProps1 := props.ColumnEncryptionProperties(columnPath1.String())
+	assert.Equal(t, columnPath1.String(), outColProps1.ColumnPath())
+	assert.True(t, outColProps1.IsEncrypted())
+	assert.False(t, outColProps1.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, outColProps1.Key())
+	assert.Equal(t, "kc1", outColProps1.KeyMetadata())
+
+	outColProps2 := props.ColumnEncryptionProperties(columnPath2.String())
+	assert.Equal(t, columnPath2.String(), outColProps2.ColumnPath())
+	assert.True(t, outColProps2.IsEncrypted())
+	assert.False(t, outColProps2.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey2, outColProps2.Key())
+	assert.Equal(t, "kc2", outColProps2.KeyMetadata())
+
+	columnPath3 := parquet.ColumnPathFromString("column_3")
+	outColProps3 := props.ColumnEncryptionProperties(columnPath3.String())
+	assert.Nil(t, outColProps3)
+}
+
+func TestEncryptTwoColumnsNotFooter(t *testing.T) {
+	t.Parallel()
+
+	columnPath1 := parquet.ColumnPathFromString("column_1")
+	columnPath2 := parquet.ColumnPathFromString("column_2")
+
+	encryptedColumns := make(parquet.ColumnPathToEncryptionPropsMap)
+	encryptedColumns[columnPath1.String()] = parquet.NewColumnEncryptionProperties(columnPath1.String(),
+		parquet.WithKey(ColumnEncryptionKey1), parquet.WithKeyID("kc1"))
+	encryptedColumns[columnPath2.String()] = parquet.NewColumnEncryptionProperties(columnPath2.String(),
+		parquet.WithKey(ColumnEncryptionKey2), parquet.WithKeyID("kc2"))
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithFooterKeyMetadata("kf"), parquet.WithPlaintextFooter(), parquet.WithEncryptedColumns(encryptedColumns))
+
+	assert.False(t, props.EncryptedFooter())
+	assert.Equal(t, parquet.DefaultEncryptionAlgorithm, props.Algorithm().Algo)
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+
+	outColProps1 := props.ColumnEncryptionProperties(columnPath1.String())
+	assert.Equal(t, columnPath1.String(), outColProps1.ColumnPath())
+	assert.True(t, outColProps1.IsEncrypted())
+	assert.False(t, outColProps1.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, outColProps1.Key())
+	assert.Equal(t, "kc1", outColProps1.KeyMetadata())
+
+	outColProps2 := props.ColumnEncryptionProperties(columnPath2.String())
+	assert.Equal(t, columnPath2.String(), outColProps2.ColumnPath())
+	assert.True(t, outColProps2.IsEncrypted())
+	assert.False(t, outColProps2.IsEncryptedWithFooterKey())
+	assert.Equal(t, ColumnEncryptionKey2, outColProps2.Key())
+	assert.Equal(t, "kc2", outColProps2.KeyMetadata())
+
+	columnPath3 := "column_3"
+	outColProps3 := props.ColumnEncryptionProperties(columnPath3)
+	assert.Nil(t, outColProps3)
+}
+
+func TestUseAadPrefix(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey, parquet.WithAadPrefix(FileName))
+
+	assert.Equal(t, FileName, string(props.Algorithm().Aad.AadPrefix))
+	assert.False(t, props.Algorithm().Aad.SupplyAadPrefix)
+}
+
+func TestUseAadPrefixNotStoreInFile(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithAadPrefix(FileName), parquet.DisableAadPrefixStorage())
+
+	assert.Empty(t, props.Algorithm().Aad.AadPrefix)
+	assert.True(t, props.Algorithm().Aad.SupplyAadPrefix)
+}
+
+func TestUseAES_GCM_CTR_V1Algo(t *testing.T) {
+	t.Parallel()
+
+	props := parquet.NewFileEncryptionProperties(FooterEncryptionKey,
+		parquet.WithAlg(parquet.AesCtr))
+
+	assert.Equal(t, parquet.AesCtr, props.Algorithm().Algo)
+}
+
+func TestUseKeyRetriever(t *testing.T) {
+	t.Parallel()
+
+	stringKr1 := make(encryption.StringKeyIDRetriever)
+	stringKr1.PutKey("kf", FooterEncryptionKey)
+	stringKr1.PutKey("kc1", ColumnEncryptionKey1)
+	stringKr1.PutKey("kc2", ColumnEncryptionKey2)
+
+	props := parquet.NewFileDecryptionProperties(parquet.WithKeyRetriever(stringKr1))
+	assert.Equal(t, FooterEncryptionKey, props.KeyRetriever.GetKey([]byte("kf")))
+	assert.Equal(t, ColumnEncryptionKey1, props.KeyRetriever.GetKey([]byte("kc1")))
+	assert.Equal(t, ColumnEncryptionKey2, props.KeyRetriever.GetKey([]byte("kc2")))
+}
+
+func TestSupplyAadPrefix(t *testing.T) {
+	props := parquet.NewFileDecryptionProperties(
+		parquet.WithFooterKey(FooterEncryptionKey), parquet.WithDecryptAadPrefix(FileName))
+	assert.Equal(t, FileName, props.AadPrefix())
+}
+
+func TestSetKey(t *testing.T) {
+	columnPath1 := parquet.ColumnPathFromString("column_1")
+	props := parquet.NewColumnDecryptionProperties(columnPath1.String(), parquet.WithDecryptKey(ColumnEncryptionKey1))
+	assert.Equal(t, ColumnEncryptionKey1, props.Key())
+}
+
+func TestUsingExplicitFooterAndColumnKeys(t *testing.T) {
+	colPath1 := "column_1"
+	colPath2 := "column_2"
+	decryptCols := make(parquet.ColumnPathToDecryptionPropsMap)
+	decryptCols[colPath1] = parquet.NewColumnDecryptionProperties(colPath1, parquet.WithDecryptKey(ColumnEncryptionKey1))
+	decryptCols[colPath2] = parquet.NewColumnDecryptionProperties(colPath2, parquet.WithDecryptKey(ColumnEncryptionKey2))
+
+	props := parquet.NewFileDecryptionProperties(parquet.WithFooterKey(FooterEncryptionKey), parquet.WithColumnKeys(decryptCols))
+	assert.Equal(t, FooterEncryptionKey, props.FooterKey())
+	assert.Equal(t, ColumnEncryptionKey1, props.ColumnKey(colPath1))
+	assert.Equal(t, ColumnEncryptionKey2, props.ColumnKey(colPath2))
+}
diff --git a/go/parquet/go.mod b/go/parquet/go.mod
index 0f36a8dea73..9c415931191 100644
--- a/go/parquet/go.mod
+++ b/go/parquet/go.mod
@@ -19,8 +19,12 @@ module github.com/apache/arrow/go/parquet
 go 1.15
 
 require (
+	github.com/andybalholm/brotli v1.0.1
 	github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa
+	github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4
+	github.com/golang/snappy v0.0.3
 	github.com/klauspost/asmfmt v1.2.3
+	github.com/klauspost/compress v1.11.12
 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
 	github.com/stretchr/testify v1.7.0
diff --git a/go/parquet/go.sum b/go/parquet/go.sum
index 60aa68a5953..be02835cc89 100644
--- a/go/parquet/go.sum
+++ b/go/parquet/go.sum
@@ -3,8 +3,12 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
 github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
+github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
+github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
 github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa h1:0Bhiab9ep1wmbD1Lm17uqPkzgYhcBIZf1CsvrMhFMGI=
 github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ=
+github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4 h1:orNYqmQGnSjgOauLWjHEp9/qIDT98xv/0Aa4Zet3/Y8=
+github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4/go.mod h1:V/LzksIyqd3KZuQ2SunvReTG/UkArhII1dAWY5U1sCE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@@ -29,6 +33,8 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
+github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/flatbuffers v1.11.0 h1:O7CEyB8Cb3/DmtxODGtLHcEvpr81Jm5qLg/hsHnxA2A=
 github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@@ -39,6 +45,8 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
 github.com/klauspost/asmfmt v1.2.3 h1:qEM7SLDo6DXXXz5yTpqUoxhsrtwH30nNR2riO2ZjznY=
 github.com/klauspost/asmfmt v1.2.3/go.mod h1:RAoUvqkWr2rUa2I19qKMEVZQe4BVtcHGTMCUOcCU2Lg=
+github.com/klauspost/compress v1.11.12 h1:famVnQVu7QwryBN4jNseQdUKES71ZAOnB6UQQJPZvqk=
+github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
diff --git a/go/parquet/internal/bmi/Makefile b/go/parquet/internal/bmi/Makefile
index 138b4f1cee4..f196d819243 100644
--- a/go/parquet/internal/bmi/Makefile
+++ b/go/parquet/internal/bmi/Makefile
@@ -15,6 +15,9 @@
 # limitations under the License.
 
 PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/'
+# since we're passing an int16, swap the MOVQ for the argument to a MOVW as per
+# the message given by go vet since it's a 2-byte value.
+PERL_FIXUP_MOVQ_MOVW=perl -i -pe 's/MOVQ rhs\+16\(FP\)/MOVW rhs+16(FP)/'
 C2GOASM=c2goasm -a -f
 CC=clang
 C_FLAGS=-masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \
@@ -37,4 +40,8 @@ _lib/bitmap_bmi2.s: _lib/bitmap_bmi2.c
 	$(CC) -S $(ASM_FLAGS_AVX2) $(ASM_FLAGS_BMI2) $(ASM_FLAGS_POPCNT) $(C_FLAGS)  $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
 
 bitmap_bmi2.s: _lib/bitmap_bmi2.s
-	$(C2GOASM) $^ $@
+	$(C2GOASM) $^ $@ ; $(PERL_FIXUP_MOVQ_MOVW) $@
+
+clean:
+	rm -f $(INTEL_SOURCES)
+	rm -f _lib/$(INTEL_SOURCES)
diff --git a/go/parquet/internal/bmi/bitmap_bmi2.s b/go/parquet/internal/bmi/bitmap_bmi2.s
index c81794d4c4c..d5e5cf4bc03 100644
--- a/go/parquet/internal/bmi/bitmap_bmi2.s
+++ b/go/parquet/internal/bmi/bitmap_bmi2.s
@@ -25,7 +25,7 @@ TEXT ·_levels_to_bitmap(SB), $0-32
 
 	MOVQ levels+0(FP), DI
 	MOVQ numLevels+8(FP), SI
-	MOVQ rhs+16(FP), DX
+	MOVW rhs+16(FP), DX
 	LEAQ LCDATA1<>(SB), BP
 
 	WORD $0xf685             // test    esi, esi
diff --git a/go/parquet/internal/debug/assert_off.go b/go/parquet/internal/debug/assert_off.go
new file mode 100644
index 00000000000..52b9a233169
--- /dev/null
+++ b/go/parquet/internal/debug/assert_off.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !assert
+
+package debug
+
+// Assert will panic with msg if cond is false.
+//
+// msg must be a string, func() string or fmt.Stringer.
+func Assert(cond bool, msg interface{}) {}
diff --git a/go/parquet/internal/debug/assert_on.go b/go/parquet/internal/debug/assert_on.go
new file mode 100644
index 00000000000..188e6831204
--- /dev/null
+++ b/go/parquet/internal/debug/assert_on.go
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build assert
+
+package debug
+
+// Assert will panic with msg if cond is false.
+//
+// msg should be a string or fmt.Stringer
+func Assert(cond bool, msg interface{}) {
+	if !cond {
+		panic(msg)
+	}
+}
diff --git a/go/parquet/internal/debug/doc.go b/go/parquet/internal/debug/doc.go
new file mode 100644
index 00000000000..61684d62538
--- /dev/null
+++ b/go/parquet/internal/debug/doc.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package debug provides APIs for conditional runtime assertions and debug logging.
+//
+// Using Assert
+//
+// To enable runtime assertions, build with the assert tag. When the assert tag is omitted,
+// the code for the assertion will be omitted from the binary.
+package debug
diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go
new file mode 100644
index 00000000000..3138b921f80
--- /dev/null
+++ b/go/parquet/internal/encryption/aes.go
@@ -0,0 +1,264 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
+//
+// Testing for this is done via integration testing at the top level parquet package via attempting to
+// read and write encrypted files with different configurations to match test files in parquet-testing
+package encryption
+
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/rand"
+	"encoding/binary"
+	"io"
+
+	"github.com/apache/arrow/go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// important constants for handling the aes encryption
+const (
+	GcmTagLength = 16
+	NonceLength  = 12
+
+	gcmMode          = 0
+	ctrMode          = 1
+	ctrIVLen         = 16
+	bufferSizeLength = 4
+)
+
+// Module constants for constructing the AAD bytes, the order here is
+// important as the constants are set via iota.
+const (
+	FooterModule int8 = iota
+	ColumnMetaModule
+	DataPageModule
+	DictPageModule
+	DataPageHeaderModule
+	DictPageHeaderModule
+	ColumnIndexModule
+	OffsetIndexModule
+)
+
+type aesEncryptor struct {
+	mode                int
+	ciphertextSizeDelta int
+}
+
+// NewAesEncryptor constructs an encryptor for the passed in cipher and whether
+// or not it's being used to encrypt metadata.
+func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
+	ret := &aesEncryptor{}
+	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
+	if metadata || alg == parquet.AesGcm {
+		ret.mode = gcmMode
+		ret.ciphertextSizeDelta += GcmTagLength
+	} else {
+		ret.mode = ctrMode
+	}
+
+	return ret
+}
+
+// CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
+// above and beyond the plaintext value.
+func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
+
+// SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
+// It returns the number of bytes that were written to w.
+func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
+	if a.mode != gcmMode {
+		panic("must use AES GCM (metadata) encryptor")
+	}
+
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		panic(err)
+	}
+
+	aead, err := cipher.NewGCM(block)
+	if err != nil {
+		panic(err)
+	}
+	if aead.NonceSize() != NonceLength {
+		panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
+	}
+	if aead.Overhead() != GcmTagLength {
+		panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
+	}
+
+	ciphertext := aead.Seal(nil, nonce, footer, aad)
+	bufferSize := uint32(len(ciphertext) + len(nonce))
+	// data is written with a prefix of the size written as a little endian 32bit int.
+	if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
+		panic(err)
+	}
+	w.Write(nonce)
+	w.Write(ciphertext)
+	return bufferSizeLength + int(bufferSize)
+}
+
+// Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
+// Returns the total number of bytes written.
+func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		panic(err)
+	}
+
+	nonce := make([]byte, NonceLength)
+	rand.Read(nonce)
+
+	if a.mode == gcmMode {
+		aead, err := cipher.NewGCM(block)
+		if err != nil {
+			panic(err)
+		}
+		if aead.NonceSize() != NonceLength {
+			panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
+		}
+		if aead.Overhead() != GcmTagLength {
+			panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
+		}
+
+		ciphertext := aead.Seal(nil, nonce, src, aad)
+		bufferSize := len(ciphertext) + len(nonce)
+		// data is written with a prefix of the size written as a little endian 32bit int.
+		if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
+			panic(err)
+		}
+		w.Write(nonce)
+		w.Write(ciphertext)
+		return bufferSizeLength + bufferSize
+	}
+
+	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
+	// counter field.
+	// The first 31 bits of the initial counter field are set to 0, the last bit
+	// is set to 1.
+	iv := make([]byte, ctrIVLen)
+	copy(iv, nonce)
+	iv[ctrIVLen-1] = 1
+
+	bufferSize := NonceLength + len(src)
+	// data is written with a prefix of the size written as a little endian 32bit int.
+	if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
+		panic(err)
+	}
+	w.Write(nonce)
+	cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
+	return bufferSizeLength + bufferSize
+}
+
+type aesDecryptor struct {
+	mode                int
+	ciphertextSizeDelta int
+}
+
+// newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
+// not it is intended to be used for decrypting metadata.
+func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
+	ret := &aesDecryptor{}
+	ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
+	if metadata || alg == parquet.AesGcm {
+		ret.mode = gcmMode
+		ret.ciphertextSizeDelta += GcmTagLength
+	} else {
+		ret.mode = ctrMode
+	}
+
+	return ret
+}
+
+// CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
+// plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
+// the length of the plaintext after decryption.
+func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
+
+// Decrypt returns the plaintext version of the given ciphertext when decrypted
+// with the provided key and AAD security bytes.
+func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		panic(err)
+	}
+
+	writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
+	cipherLen := writtenCiphertextLen + bufferSizeLength
+	nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
+
+	if a.mode == gcmMode {
+		aead, err := cipher.NewGCM(block)
+		if err != nil {
+			panic(err)
+		}
+
+		plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
+		if err != nil {
+			panic(err)
+		}
+		return plain
+	}
+
+	// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
+	// counter field.
+	// The first 31 bits of the initial counter field are set to 0, the last bit
+	// is set to 1.
+	iv := make([]byte, ctrIVLen)
+	copy(iv, nonce)
+	iv[ctrIVLen-1] = 1
+
+	stream := cipher.NewCTR(block, iv)
+	dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
+	stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
+	return dst
+}
+
+// CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
+//
+// This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
+func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
+	buf := bytes.NewBuffer([]byte(fileAad))
+	buf.WriteByte(byte(moduleType))
+
+	if moduleType == FooterModule {
+		return buf.String()
+	}
+
+	binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
+	binary.Write(buf, binary.LittleEndian, columnOrdinal)
+	if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
+		return buf.String()
+	}
+
+	binary.Write(buf, binary.LittleEndian, pageOrdinal)
+	return buf.String()
+}
+
+// CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
+// and decrypting the parquet footer bytes.
+func CreateFooterAad(aadPrefix string) string {
+	return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
+}
+
+// QuickUpdatePageAad updates aad with the new page ordinal, modifying the
+// last two bytes of aad.
+func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
+	binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))
+}
diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go
new file mode 100644
index 00000000000..9a427a75605
--- /dev/null
+++ b/go/parquet/internal/encryption/decryptor.go
@@ -0,0 +1,261 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+)
+
+// FileDecryptor is an interface used by the filereader for decrypting an
+// entire parquet file as we go, usually constructed from the DecryptionProperties
+type FileDecryptor interface {
+	// Returns the key for decrypting the footer if provided
+	GetFooterKey() string
+	// Provides the file level AAD security bytes
+	FileAad() string
+	// return which algorithm this decryptor was constructed for
+	Algorithm() parquet.Cipher
+	// return the FileDecryptionProperties that were used for this decryptor
+	Properties() *parquet.FileDecryptionProperties
+	// Clear out the decryption keys, this is automatically called after every
+	// successfully decrypted file to ensure that keys aren't kept around.
+	WipeOutDecryptionKeys()
+	// GetFooterDecryptor returns a Decryptor interface for use to decrypt the footer
+	// of a parquet file.
+	GetFooterDecryptor() Decryptor
+	// GetFooterDecryptorForColumnMeta returns a Decryptor interface for Column Metadata
+	// in the file footer using the AAD bytes provided.
+	GetFooterDecryptorForColumnMeta(aad string) Decryptor
+	// GetFooterDecryptorForColumnData returns the decryptor that can be used for decrypting
+	// actual column data footer bytes, not column metadata.
+	GetFooterDecryptorForColumnData(aad string) Decryptor
+	// GetColumnMetaDecryptor returns a decryptor for the requested column path, key and AAD bytes
+	// but only for decrypting the row group level metadata
+	GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
+	// GetColumnDataDecryptor returns a decryptor for the requested column path, key, and AAD bytes
+	// but only for the rowgroup column data.
+	GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
+}
+
+type fileDecryptor struct {
+	// the properties contains the key retriever for us to get keys
+	// from the key metadata
+	props *parquet.FileDecryptionProperties
+	// concatenation of aad_prefix (if exists) and aad_file_unique
+	fileAad                 string
+	columnDataMap           map[string]Decryptor
+	columnMetaDataMap       map[string]Decryptor
+	footerMetadataDecryptor Decryptor
+	footerDataDecryptor     Decryptor
+	alg                     parquet.Cipher
+	footerKeyMetadata       string
+	metaDecryptor           *aesDecryptor
+	dataDecryptor           *aesDecryptor
+	mem                     memory.Allocator
+}
+
+// NewFileDecryptor constructs a decryptor from the provided configuration of properties, cipher and key metadata. Using the provided memory allocator or
+// the default allocator if one isn't provided.
+func NewFileDecryptor(props *parquet.FileDecryptionProperties, fileAad string, alg parquet.Cipher, keymetadata string, mem memory.Allocator) FileDecryptor {
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+	return &fileDecryptor{
+		fileAad:           fileAad,
+		props:             props,
+		alg:               alg,
+		footerKeyMetadata: keymetadata,
+		mem:               mem,
+		columnDataMap:     make(map[string]Decryptor),
+		columnMetaDataMap: make(map[string]Decryptor),
+	}
+}
+
+func (d *fileDecryptor) FileAad() string                               { return d.fileAad }
+func (d *fileDecryptor) Properties() *parquet.FileDecryptionProperties { return d.props }
+func (d *fileDecryptor) Algorithm() parquet.Cipher                     { return d.alg }
+func (d *fileDecryptor) GetFooterKey() string {
+	footerKey := d.props.FooterKey()
+	if footerKey == "" {
+		if d.footerKeyMetadata == "" {
+			panic("no footer key or key metadata")
+		}
+		if d.props.KeyRetriever == nil {
+			panic("no footer key or key retriever")
+		}
+		footerKey = d.props.KeyRetriever.GetKey([]byte(d.footerKeyMetadata))
+	}
+	if footerKey == "" {
+		panic("invalid footer encryption key. Could not parse footer metadata")
+	}
+	return footerKey
+}
+
+func (d *fileDecryptor) GetFooterDecryptor() Decryptor {
+	aad := CreateFooterAad(d.fileAad)
+	return d.getFooterDecryptor(aad, true)
+}
+
+func (d *fileDecryptor) GetFooterDecryptorForColumnMeta(aad string) Decryptor {
+	return d.getFooterDecryptor(aad, true)
+}
+
+func (d *fileDecryptor) GetFooterDecryptorForColumnData(aad string) Decryptor {
+	return d.getFooterDecryptor(aad, false)
+}
+
+func (d *fileDecryptor) GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
+	return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, true)
+}
+
+func (d *fileDecryptor) GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
+	return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, false)
+}
+
+func (d *fileDecryptor) WipeOutDecryptionKeys() {
+	d.props.WipeOutDecryptionKeys()
+}
+
+func (d *fileDecryptor) getFooterDecryptor(aad string, metadata bool) Decryptor {
+	if metadata {
+		if d.footerMetadataDecryptor != nil {
+			return d.footerMetadataDecryptor
+		}
+	} else {
+		if d.footerDataDecryptor != nil {
+			return d.footerDataDecryptor
+		}
+	}
+
+	footerKey := d.GetFooterKey()
+
+	// Create both data and metadata decryptors to avoid redundant retrieval of key
+	// from the key_retriever.
+	aesMetaDecrypt := d.getMetaAesDecryptor()
+	aesDataDecrypt := d.getDataAesDecryptor()
+
+	d.footerMetadataDecryptor = &decryptor{
+		decryptor: aesMetaDecrypt,
+		key:       []byte(footerKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+	d.footerDataDecryptor = &decryptor{
+		decryptor: aesDataDecrypt,
+		key:       []byte(footerKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+
+	if metadata {
+		return d.footerMetadataDecryptor
+	}
+	return d.footerDataDecryptor
+}
+
+func (d *fileDecryptor) getColumnDecryptor(columnPath, columnMeta, aad string, metadata bool) Decryptor {
+	if metadata {
+		if res, ok := d.columnMetaDataMap[columnPath]; ok {
+			res.UpdateAad(aad)
+			return res
+		}
+	} else {
+		if res, ok := d.columnDataMap[columnPath]; ok {
+			res.UpdateAad(aad)
+			return res
+		}
+	}
+
+	columnKey := d.props.ColumnKey(columnPath)
+	// No explicit column key given via API. Retrieve via key metadata.
+	if columnKey == "" && columnMeta != "" && d.props.KeyRetriever != nil {
+		columnKey = d.props.KeyRetriever.GetKey([]byte(columnMeta))
+	}
+	if columnKey == "" {
+		panic("hidden column exception, path=" + columnPath)
+	}
+
+	aesDataDecrypt := d.getDataAesDecryptor()
+	aesMetaDecrypt := d.getMetaAesDecryptor()
+
+	d.columnDataMap[columnPath] = &decryptor{
+		decryptor: aesDataDecrypt,
+		key:       []byte(columnKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+	d.columnMetaDataMap[columnPath] = &decryptor{
+		decryptor: aesMetaDecrypt,
+		key:       []byte(columnKey),
+		fileAad:   []byte(d.fileAad),
+		aad:       []byte(aad),
+		mem:       d.mem,
+	}
+
+	if metadata {
+		return d.columnMetaDataMap[columnPath]
+	}
+	return d.columnDataMap[columnPath]
+}
+
+func (d *fileDecryptor) getMetaAesDecryptor() *aesDecryptor {
+	if d.metaDecryptor == nil {
+		d.metaDecryptor = newAesDecryptor(d.alg, true)
+	}
+	return d.metaDecryptor
+}
+
+func (d *fileDecryptor) getDataAesDecryptor() *aesDecryptor {
+	if d.dataDecryptor == nil {
+		d.dataDecryptor = newAesDecryptor(d.alg, false)
+	}
+	return d.dataDecryptor
+}
+
+// Decryptor is the basic interface for any decryptor generated from a FileDecryptor
+type Decryptor interface {
+	// returns the File Level AAD bytes
+	FileAad() string
+	// returns the current allocator that was used for any extra allocations of buffers
+	Allocator() memory.Allocator
+	// returns the CiphertextSizeDelta from the decryptor
+	CiphertextSizeDelta() int
+	// Decrypt just returns the decrypted plaintext from the src ciphertext
+	Decrypt(src []byte) []byte
+	// set the AAD bytes of the decryptor to the provided string
+	UpdateAad(string)
+}
+
+type decryptor struct {
+	decryptor *aesDecryptor
+	key       []byte
+	fileAad   []byte
+	aad       []byte
+	mem       memory.Allocator
+}
+
+func (d *decryptor) Allocator() memory.Allocator { return d.mem }
+func (d *decryptor) FileAad() string             { return string(d.fileAad) }
+func (d *decryptor) UpdateAad(aad string)        { d.aad = []byte(aad) }
+func (d *decryptor) CiphertextSizeDelta() int    { return d.decryptor.CiphertextSizeDelta() }
+func (d *decryptor) Decrypt(src []byte) []byte {
+	return d.decryptor.Decrypt(src, d.key, d.aad)
+}
diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go
new file mode 100644
index 00000000000..dda5c186a81
--- /dev/null
+++ b/go/parquet/internal/encryption/encryptor.go
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+	"io"
+
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+)
+
+// FileEncryptor is the interface for constructing encryptors for the different
+// sections of a parquet file.
+type FileEncryptor interface {
+	// GetFooterEncryptor returns an encryptor for the footer metadata
+	GetFooterEncryptor() Encryptor
+	// GetFooterSigningEncryptor returns an encryptor for creating the signature
+	// for the footer as opposed to encrypting the footer bytes directly.
+	GetFooterSigningEncryptor() Encryptor
+	// GetColumnMetaEncryptor returns an encryptor for the metadata only of the requested
+	// column path string.
+	GetColumnMetaEncryptor(columnPath string) Encryptor
+	// GetColumnDataEncryptor returns an encryptor for the column data ONLY of
+	// the requested column path string.
+	GetColumnDataEncryptor(columnPath string) Encryptor
+	// WipeOutEncryptionKeys deletes the keys that were used for encryption,
+	// called after every successfully encrypted file to ensure against accidental
+	// key re-use.
+	WipeOutEncryptionKeys()
+}
+
+type fileEncryptor struct {
+	props                  *parquet.FileEncryptionProperties
+	columnDataMap          map[string]Encryptor
+	columnMetaDataMap      map[string]Encryptor
+	footerSigningEncryptor Encryptor
+	footerEncryptor        Encryptor
+
+	// Key must be 16, 24, or 32 bytes in length thus there could be up to
+	// three types of meta_encryptors and data_encryptors
+	metaEncryptor *aesEncryptor
+	dataEncryptor *aesEncryptor
+
+	mem memory.Allocator
+}
+
+// NewFileEncryptor returns a new encryptor using the given encryption properties.
+//
+// Panics if the properties passed have already been used to construct an encryptor
+// ie: props.IsUtilized returns true. If mem is nil, will default to memory.DefaultAllocator
+func NewFileEncryptor(props *parquet.FileEncryptionProperties, mem memory.Allocator) FileEncryptor {
+	if props.IsUtilized() {
+		panic("re-using encryption properties for another file")
+	}
+
+	props.SetUtilized()
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+
+	return &fileEncryptor{
+		props:             props,
+		mem:               mem,
+		columnDataMap:     make(map[string]Encryptor),
+		columnMetaDataMap: make(map[string]Encryptor),
+	}
+}
+
+func (e *fileEncryptor) WipeOutEncryptionKeys() {
+	e.props.WipeOutEncryptionKeys()
+}
+
+func (e *fileEncryptor) GetFooterEncryptor() Encryptor {
+	if e.footerEncryptor == nil {
+		alg := e.props.Algorithm().Algo
+		footerAad := CreateFooterAad(e.props.FileAad())
+		footerKey := e.props.FooterKey()
+		enc := e.getMetaAesEncryptor(alg)
+		e.footerEncryptor = &encryptor{
+			aesEncryptor: enc,
+			key:          []byte(footerKey),
+			fileAad:      e.props.FileAad(),
+			aad:          footerAad,
+			mem:          e.mem,
+		}
+	}
+	return e.footerEncryptor
+}
+
+func (e *fileEncryptor) GetFooterSigningEncryptor() Encryptor {
+	if e.footerSigningEncryptor == nil {
+		alg := e.props.Algorithm().Algo
+		footerAad := CreateFooterAad(e.props.FileAad())
+		footerKey := e.props.FooterKey()
+		enc := e.getMetaAesEncryptor(alg)
+		e.footerSigningEncryptor = &encryptor{
+			aesEncryptor: enc,
+			key:          []byte(footerKey),
+			fileAad:      e.props.FileAad(),
+			aad:          footerAad,
+			mem:          e.mem,
+		}
+	}
+	return e.footerSigningEncryptor
+}
+
+func (e *fileEncryptor) getMetaAesEncryptor(alg parquet.Cipher) *aesEncryptor {
+	if e.metaEncryptor == nil {
+		e.metaEncryptor = NewAesEncryptor(alg, true)
+	}
+	return e.metaEncryptor
+}
+
+func (e *fileEncryptor) getDataAesEncryptor(alg parquet.Cipher) *aesEncryptor {
+	if e.dataEncryptor == nil {
+		e.dataEncryptor = NewAesEncryptor(alg, false)
+	}
+	return e.dataEncryptor
+}
+
+func (e *fileEncryptor) GetColumnMetaEncryptor(columnPath string) Encryptor {
+	return e.getColumnEncryptor(columnPath, true)
+}
+
+func (e *fileEncryptor) GetColumnDataEncryptor(columnPath string) Encryptor {
+	return e.getColumnEncryptor(columnPath, false)
+}
+
+func (e *fileEncryptor) getColumnEncryptor(columnPath string, metadata bool) Encryptor {
+	if metadata {
+		if enc, ok := e.columnMetaDataMap[columnPath]; ok {
+			return enc
+		}
+	} else {
+		if enc, ok := e.columnDataMap[columnPath]; ok {
+			return enc
+		}
+	}
+
+	columnProp := e.props.ColumnEncryptionProperties(columnPath)
+	if columnProp == nil {
+		return nil
+	}
+
+	var key string
+	if columnProp.IsEncryptedWithFooterKey() {
+		key = e.props.FooterKey()
+	} else {
+		key = columnProp.Key()
+	}
+
+	alg := e.props.Algorithm().Algo
+	var enc *aesEncryptor
+	if metadata {
+		enc = e.getMetaAesEncryptor(alg)
+	} else {
+		enc = e.getDataAesEncryptor(alg)
+	}
+
+	fileAad := e.props.FileAad()
+	ret := &encryptor{
+		aesEncryptor: enc,
+		key:          []byte(key),
+		fileAad:      fileAad,
+		aad:          "",
+		mem:          e.mem,
+	}
+	if metadata {
+		e.columnMetaDataMap[columnPath] = ret
+	} else {
+		e.columnDataMap[columnPath] = ret
+	}
+	return ret
+}
+
+// Encryptor is the basic interface for encryptors, for now there's only the single
+// aes encryptor implementation, but having it as an interface allows easy addition
+// manipulation of encryptor implementations in the future.
+type Encryptor interface {
+	// FileAad returns the file level AAD bytes for this encryptor
+	FileAad() string
+	// UpdateAad sets the aad bytes for encryption to the provided string
+	UpdateAad(string)
+	// Allocator returns the allocator that was used to construct the encryptor
+	Allocator() memory.Allocator
+	// CiphertextSizeDelta returns the extra bytes that will be added to the ciphertext
+	// for a total size of len(plaintext) + CiphertextSizeDelta bytes
+	CiphertextSizeDelta() int
+	// Encrypt writes the encrypted ciphertext for src to w and returns the total
+	// number of bytes written.
+	Encrypt(w io.Writer, src []byte) int
+	// EncryptColumnMetaData returns true if the column metadata should be encrypted based on the
+	// column encryption settings and footer encryption setting.
+	EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool
+}
+
+type encryptor struct {
+	aesEncryptor *aesEncryptor
+	key          []byte
+	fileAad      string
+	aad          string
+	mem          memory.Allocator
+}
+
+func (e *encryptor) FileAad() string             { return e.fileAad }
+func (e *encryptor) UpdateAad(aad string)        { e.aad = aad }
+func (e *encryptor) Allocator() memory.Allocator { return e.mem }
+func (e *encryptor) CiphertextSizeDelta() int    { return e.aesEncryptor.CiphertextSizeDelta() }
+
+func (e *encryptor) EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool {
+	if properties == nil || !properties.IsEncrypted() {
+		return false
+	}
+	if !encryptFooter {
+		return false
+	}
+	// if not encrypted with footer key then encrypt the metadata
+	return !properties.IsEncryptedWithFooterKey()
+}
+
+func (e *encryptor) Encrypt(w io.Writer, src []byte) int {
+	return e.aesEncryptor.Encrypt(w, src, e.key, []byte(e.aad))
+}
diff --git a/go/parquet/internal/encryption/key_handling.go b/go/parquet/internal/encryption/key_handling.go
new file mode 100644
index 00000000000..b3c36c497ca
--- /dev/null
+++ b/go/parquet/internal/encryption/key_handling.go
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+	"encoding/binary"
+	"unsafe"
+
+	"golang.org/x/xerrors"
+)
+
+// StringKeyIDRetriever implements the KeyRetriever interface GetKey
+// to allow setting in keys with a string id.
+type StringKeyIDRetriever map[string]string
+
+// PutKey adds a key with the given string ID that can be retrieved
+func (s StringKeyIDRetriever) PutKey(keyID, key string) {
+	s[keyID] = key
+}
+
+// GetKey expects the keymetadata to match one of the keys that were added
+// with PutKey and panics if the key cannot be found.
+func (s StringKeyIDRetriever) GetKey(keyMetadata []byte) string {
+	k, ok := s[*(*string)(unsafe.Pointer(&keyMetadata))]
+	if !ok {
+		panic(xerrors.Errorf("parquet: key missing for id %s", keyMetadata))
+	}
+	return k
+}
+
+// IntegerKeyIDRetriever is used for using unsigned 32bit integers as key ids.
+type IntegerKeyIDRetriever map[uint32]string
+
+// PutKey adds keys with uint32 IDs
+func (i IntegerKeyIDRetriever) PutKey(keyID uint32, key string) {
+	i[keyID] = key
+}
+
+// GetKey expects the key metadata bytes to be a little endian uint32 which
+// is then used to retrieve the key bytes. Panics if the key id cannot be found.
+func (i IntegerKeyIDRetriever) GetKey(keyMetadata []byte) string {
+	keyID := binary.LittleEndian.Uint32(keyMetadata)
+	k, ok := i[keyID]
+	if !ok {
+		panic(xerrors.Errorf("parquet: key missing for id %d", keyID))
+	}
+	return k
+}
diff --git a/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
new file mode 100644
index 00000000000..b72118e443e
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
@@ -0,0 +1,6 @@
+// Code generated by Thrift Compiler (0.14.0). DO NOT EDIT.
+
+package parquet
+
+var GoUnusedProtection__ int;
+
diff --git a/go/parquet/internal/gen-go/parquet/parquet-consts.go b/go/parquet/internal/gen-go/parquet/parquet-consts.go
new file mode 100644
index 00000000000..8de3a86d224
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/parquet-consts.go
@@ -0,0 +1,23 @@
+// Code generated by Thrift Compiler (0.14.0). DO NOT EDIT.
+
+package parquet
+
+import(
+	"bytes"
+	"context"
+	"fmt"
+	"time"
+	"github.com/apache/thrift/lib/go/thrift"
+)
+
+// (needed to ensure safety because of naive import list construction.)
+var _ = thrift.ZERO
+var _ = fmt.Printf
+var _ = context.Background
+var _ = time.Now
+var _ = bytes.Equal
+
+
+func init() {
+}
+
diff --git a/go/parquet/internal/gen-go/parquet/parquet.go b/go/parquet/internal/gen-go/parquet/parquet.go
new file mode 100644
index 00000000000..997b6ab91f3
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/parquet.go
@@ -0,0 +1,10961 @@
+// Code generated by Thrift Compiler (0.14.0). DO NOT EDIT.
+
+package parquet
+
+import(
+	"bytes"
+	"context"
+	"database/sql/driver"
+	"errors"
+	"fmt"
+	"time"
+	"github.com/apache/thrift/lib/go/thrift"
+)
+
+// (needed to ensure safety because of naive import list construction.)
+var _ = thrift.ZERO
+var _ = fmt.Printf
+var _ = context.Background
+var _ = time.Now
+var _ = bytes.Equal
+
+//Types supported by Parquet.  These types are intended to be used in combination
+//with the encodings to control the on disk storage format.
+//For example INT16 is not included as a type since a good encoding of INT32
+//would handle this.
+type Type int64
+const (
+  Type_BOOLEAN Type = 0
+  Type_INT32 Type = 1
+  Type_INT64 Type = 2
+  Type_INT96 Type = 3
+  Type_FLOAT Type = 4
+  Type_DOUBLE Type = 5
+  Type_BYTE_ARRAY Type = 6
+  Type_FIXED_LEN_BYTE_ARRAY Type = 7
+)
+
+func (p Type) String() string {
+  switch p {
+  case Type_BOOLEAN: return "BOOLEAN"
+  case Type_INT32: return "INT32"
+  case Type_INT64: return "INT64"
+  case Type_INT96: return "INT96"
+  case Type_FLOAT: return "FLOAT"
+  case Type_DOUBLE: return "DOUBLE"
+  case Type_BYTE_ARRAY: return "BYTE_ARRAY"
+  case Type_FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY"
+  }
+  return "<UNSET>"
+}
+
+func TypeFromString(s string) (Type, error) {
+  switch s {
+  case "BOOLEAN": return Type_BOOLEAN, nil 
+  case "INT32": return Type_INT32, nil 
+  case "INT64": return Type_INT64, nil 
+  case "INT96": return Type_INT96, nil 
+  case "FLOAT": return Type_FLOAT, nil 
+  case "DOUBLE": return Type_DOUBLE, nil 
+  case "BYTE_ARRAY": return Type_BYTE_ARRAY, nil 
+  case "FIXED_LEN_BYTE_ARRAY": return Type_FIXED_LEN_BYTE_ARRAY, nil 
+  }
+  return Type(0), fmt.Errorf("not a valid Type string")
+}
+
+
+func TypePtr(v Type) *Type { return &v }
+
+func (p Type) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *Type) UnmarshalText(text []byte) error {
+q, err := TypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *Type) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = Type(v)
+return nil
+}
+
+func (p * Type) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Common types used by frameworks(e.g. hive, pig) using parquet.  This helps map
+//between types in those frameworks to the base types in parquet.  This is only
+//metadata and not needed to read or write the data.
+type ConvertedType int64
+const (
+  ConvertedType_UTF8 ConvertedType = 0
+  ConvertedType_MAP ConvertedType = 1
+  ConvertedType_MAP_KEY_VALUE ConvertedType = 2
+  ConvertedType_LIST ConvertedType = 3
+  ConvertedType_ENUM ConvertedType = 4
+  ConvertedType_DECIMAL ConvertedType = 5
+  ConvertedType_DATE ConvertedType = 6
+  ConvertedType_TIME_MILLIS ConvertedType = 7
+  ConvertedType_TIME_MICROS ConvertedType = 8
+  ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9
+  ConvertedType_TIMESTAMP_MICROS ConvertedType = 10
+  ConvertedType_UINT_8 ConvertedType = 11
+  ConvertedType_UINT_16 ConvertedType = 12
+  ConvertedType_UINT_32 ConvertedType = 13
+  ConvertedType_UINT_64 ConvertedType = 14
+  ConvertedType_INT_8 ConvertedType = 15
+  ConvertedType_INT_16 ConvertedType = 16
+  ConvertedType_INT_32 ConvertedType = 17
+  ConvertedType_INT_64 ConvertedType = 18
+  ConvertedType_JSON ConvertedType = 19
+  ConvertedType_BSON ConvertedType = 20
+  ConvertedType_INTERVAL ConvertedType = 21
+)
+
+func (p ConvertedType) String() string {
+  switch p {
+  case ConvertedType_UTF8: return "UTF8"
+  case ConvertedType_MAP: return "MAP"
+  case ConvertedType_MAP_KEY_VALUE: return "MAP_KEY_VALUE"
+  case ConvertedType_LIST: return "LIST"
+  case ConvertedType_ENUM: return "ENUM"
+  case ConvertedType_DECIMAL: return "DECIMAL"
+  case ConvertedType_DATE: return "DATE"
+  case ConvertedType_TIME_MILLIS: return "TIME_MILLIS"
+  case ConvertedType_TIME_MICROS: return "TIME_MICROS"
+  case ConvertedType_TIMESTAMP_MILLIS: return "TIMESTAMP_MILLIS"
+  case ConvertedType_TIMESTAMP_MICROS: return "TIMESTAMP_MICROS"
+  case ConvertedType_UINT_8: return "UINT_8"
+  case ConvertedType_UINT_16: return "UINT_16"
+  case ConvertedType_UINT_32: return "UINT_32"
+  case ConvertedType_UINT_64: return "UINT_64"
+  case ConvertedType_INT_8: return "INT_8"
+  case ConvertedType_INT_16: return "INT_16"
+  case ConvertedType_INT_32: return "INT_32"
+  case ConvertedType_INT_64: return "INT_64"
+  case ConvertedType_JSON: return "JSON"
+  case ConvertedType_BSON: return "BSON"
+  case ConvertedType_INTERVAL: return "INTERVAL"
+  }
+  return "<UNSET>"
+}
+
+func ConvertedTypeFromString(s string) (ConvertedType, error) {
+  switch s {
+  case "UTF8": return ConvertedType_UTF8, nil 
+  case "MAP": return ConvertedType_MAP, nil 
+  case "MAP_KEY_VALUE": return ConvertedType_MAP_KEY_VALUE, nil 
+  case "LIST": return ConvertedType_LIST, nil 
+  case "ENUM": return ConvertedType_ENUM, nil 
+  case "DECIMAL": return ConvertedType_DECIMAL, nil 
+  case "DATE": return ConvertedType_DATE, nil 
+  case "TIME_MILLIS": return ConvertedType_TIME_MILLIS, nil 
+  case "TIME_MICROS": return ConvertedType_TIME_MICROS, nil 
+  case "TIMESTAMP_MILLIS": return ConvertedType_TIMESTAMP_MILLIS, nil 
+  case "TIMESTAMP_MICROS": return ConvertedType_TIMESTAMP_MICROS, nil 
+  case "UINT_8": return ConvertedType_UINT_8, nil 
+  case "UINT_16": return ConvertedType_UINT_16, nil 
+  case "UINT_32": return ConvertedType_UINT_32, nil 
+  case "UINT_64": return ConvertedType_UINT_64, nil 
+  case "INT_8": return ConvertedType_INT_8, nil 
+  case "INT_16": return ConvertedType_INT_16, nil 
+  case "INT_32": return ConvertedType_INT_32, nil 
+  case "INT_64": return ConvertedType_INT_64, nil 
+  case "JSON": return ConvertedType_JSON, nil 
+  case "BSON": return ConvertedType_BSON, nil 
+  case "INTERVAL": return ConvertedType_INTERVAL, nil 
+  }
+  return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string")
+}
+
+
+func ConvertedTypePtr(v ConvertedType) *ConvertedType { return &v }
+
+func (p ConvertedType) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *ConvertedType) UnmarshalText(text []byte) error {
+q, err := ConvertedTypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *ConvertedType) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = ConvertedType(v)
+return nil
+}
+
+func (p * ConvertedType) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Representation of Schemas
+type FieldRepetitionType int64
+const (
+  FieldRepetitionType_REQUIRED FieldRepetitionType = 0
+  FieldRepetitionType_OPTIONAL FieldRepetitionType = 1
+  FieldRepetitionType_REPEATED FieldRepetitionType = 2
+)
+
+func (p FieldRepetitionType) String() string {
+  switch p {
+  case FieldRepetitionType_REQUIRED: return "REQUIRED"
+  case FieldRepetitionType_OPTIONAL: return "OPTIONAL"
+  case FieldRepetitionType_REPEATED: return "REPEATED"
+  }
+  return "<UNSET>"
+}
+
+func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error) {
+  switch s {
+  case "REQUIRED": return FieldRepetitionType_REQUIRED, nil 
+  case "OPTIONAL": return FieldRepetitionType_OPTIONAL, nil 
+  case "REPEATED": return FieldRepetitionType_REPEATED, nil 
+  }
+  return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string")
+}
+
+
+func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType { return &v }
+
+func (p FieldRepetitionType) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *FieldRepetitionType) UnmarshalText(text []byte) error {
+q, err := FieldRepetitionTypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *FieldRepetitionType) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = FieldRepetitionType(v)
+return nil
+}
+
+func (p * FieldRepetitionType) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Encodings supported by Parquet.  Not all encodings are valid for all types.  These
+//enums are also used to specify the encoding of definition and repetition levels.
+//See the accompanying doc for the details of the more complicated encodings.
+type Encoding int64
+const (
+  Encoding_PLAIN Encoding = 0
+  Encoding_PLAIN_DICTIONARY Encoding = 2
+  Encoding_RLE Encoding = 3
+  Encoding_BIT_PACKED Encoding = 4
+  Encoding_DELTA_BINARY_PACKED Encoding = 5
+  Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6
+  Encoding_DELTA_BYTE_ARRAY Encoding = 7
+  Encoding_RLE_DICTIONARY Encoding = 8
+  Encoding_BYTE_STREAM_SPLIT Encoding = 9
+)
+
+func (p Encoding) String() string {
+  switch p {
+  case Encoding_PLAIN: return "PLAIN"
+  case Encoding_PLAIN_DICTIONARY: return "PLAIN_DICTIONARY"
+  case Encoding_RLE: return "RLE"
+  case Encoding_BIT_PACKED: return "BIT_PACKED"
+  case Encoding_DELTA_BINARY_PACKED: return "DELTA_BINARY_PACKED"
+  case Encoding_DELTA_LENGTH_BYTE_ARRAY: return "DELTA_LENGTH_BYTE_ARRAY"
+  case Encoding_DELTA_BYTE_ARRAY: return "DELTA_BYTE_ARRAY"
+  case Encoding_RLE_DICTIONARY: return "RLE_DICTIONARY"
+  case Encoding_BYTE_STREAM_SPLIT: return "BYTE_STREAM_SPLIT"
+  }
+  return "<UNSET>"
+}
+
+func EncodingFromString(s string) (Encoding, error) {
+  switch s {
+  case "PLAIN": return Encoding_PLAIN, nil 
+  case "PLAIN_DICTIONARY": return Encoding_PLAIN_DICTIONARY, nil 
+  case "RLE": return Encoding_RLE, nil 
+  case "BIT_PACKED": return Encoding_BIT_PACKED, nil 
+  case "DELTA_BINARY_PACKED": return Encoding_DELTA_BINARY_PACKED, nil 
+  case "DELTA_LENGTH_BYTE_ARRAY": return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil 
+  case "DELTA_BYTE_ARRAY": return Encoding_DELTA_BYTE_ARRAY, nil 
+  case "RLE_DICTIONARY": return Encoding_RLE_DICTIONARY, nil 
+  case "BYTE_STREAM_SPLIT": return Encoding_BYTE_STREAM_SPLIT, nil 
+  }
+  return Encoding(0), fmt.Errorf("not a valid Encoding string")
+}
+
+
+func EncodingPtr(v Encoding) *Encoding { return &v }
+
+func (p Encoding) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *Encoding) UnmarshalText(text []byte) error {
+q, err := EncodingFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *Encoding) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = Encoding(v)
+return nil
+}
+
+func (p * Encoding) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Supported compression algorithms.
+//
+//Codecs added in 2.4 can be read by readers based on 2.4 and later.
+//Codec support may vary between readers based on the format version and
+//libraries available at runtime. Gzip, Snappy, and LZ4 codecs are
+//widely available, while Zstd and Brotli require additional libraries.
+type CompressionCodec int64
+const (
+  CompressionCodec_UNCOMPRESSED CompressionCodec = 0
+  CompressionCodec_SNAPPY CompressionCodec = 1
+  CompressionCodec_GZIP CompressionCodec = 2
+  CompressionCodec_LZO CompressionCodec = 3
+  CompressionCodec_BROTLI CompressionCodec = 4
+  CompressionCodec_LZ4 CompressionCodec = 5
+  CompressionCodec_ZSTD CompressionCodec = 6
+)
+
+func (p CompressionCodec) String() string {
+  switch p {
+  case CompressionCodec_UNCOMPRESSED: return "UNCOMPRESSED"
+  case CompressionCodec_SNAPPY: return "SNAPPY"
+  case CompressionCodec_GZIP: return "GZIP"
+  case CompressionCodec_LZO: return "LZO"
+  case CompressionCodec_BROTLI: return "BROTLI"
+  case CompressionCodec_LZ4: return "LZ4"
+  case CompressionCodec_ZSTD: return "ZSTD"
+  }
+  return "<UNSET>"
+}
+
+func CompressionCodecFromString(s string) (CompressionCodec, error) {
+  switch s {
+  case "UNCOMPRESSED": return CompressionCodec_UNCOMPRESSED, nil 
+  case "SNAPPY": return CompressionCodec_SNAPPY, nil 
+  case "GZIP": return CompressionCodec_GZIP, nil 
+  case "LZO": return CompressionCodec_LZO, nil 
+  case "BROTLI": return CompressionCodec_BROTLI, nil 
+  case "LZ4": return CompressionCodec_LZ4, nil 
+  case "ZSTD": return CompressionCodec_ZSTD, nil 
+  }
+  return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string")
+}
+
+
+func CompressionCodecPtr(v CompressionCodec) *CompressionCodec { return &v }
+
+func (p CompressionCodec) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *CompressionCodec) UnmarshalText(text []byte) error {
+q, err := CompressionCodecFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *CompressionCodec) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = CompressionCodec(v)
+return nil
+}
+
+func (p * CompressionCodec) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+type PageType int64
+const (
+  PageType_DATA_PAGE PageType = 0
+  PageType_INDEX_PAGE PageType = 1
+  PageType_DICTIONARY_PAGE PageType = 2
+  PageType_DATA_PAGE_V2 PageType = 3
+)
+
+func (p PageType) String() string {
+  switch p {
+  case PageType_DATA_PAGE: return "DATA_PAGE"
+  case PageType_INDEX_PAGE: return "INDEX_PAGE"
+  case PageType_DICTIONARY_PAGE: return "DICTIONARY_PAGE"
+  case PageType_DATA_PAGE_V2: return "DATA_PAGE_V2"
+  }
+  return "<UNSET>"
+}
+
+func PageTypeFromString(s string) (PageType, error) {
+  switch s {
+  case "DATA_PAGE": return PageType_DATA_PAGE, nil 
+  case "INDEX_PAGE": return PageType_INDEX_PAGE, nil 
+  case "DICTIONARY_PAGE": return PageType_DICTIONARY_PAGE, nil 
+  case "DATA_PAGE_V2": return PageType_DATA_PAGE_V2, nil 
+  }
+  return PageType(0), fmt.Errorf("not a valid PageType string")
+}
+
+
+func PageTypePtr(v PageType) *PageType { return &v }
+
+func (p PageType) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *PageType) UnmarshalText(text []byte) error {
+q, err := PageTypeFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *PageType) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = PageType(v)
+return nil
+}
+
+func (p * PageType) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+//Enum to annotate whether lists of min/max elements inside ColumnIndex
+//are ordered and if so, in which direction.
+type BoundaryOrder int64
+const (
+  BoundaryOrder_UNORDERED BoundaryOrder = 0
+  BoundaryOrder_ASCENDING BoundaryOrder = 1
+  BoundaryOrder_DESCENDING BoundaryOrder = 2
+)
+
+func (p BoundaryOrder) String() string {
+  switch p {
+  case BoundaryOrder_UNORDERED: return "UNORDERED"
+  case BoundaryOrder_ASCENDING: return "ASCENDING"
+  case BoundaryOrder_DESCENDING: return "DESCENDING"
+  }
+  return "<UNSET>"
+}
+
+func BoundaryOrderFromString(s string) (BoundaryOrder, error) {
+  switch s {
+  case "UNORDERED": return BoundaryOrder_UNORDERED, nil 
+  case "ASCENDING": return BoundaryOrder_ASCENDING, nil 
+  case "DESCENDING": return BoundaryOrder_DESCENDING, nil 
+  }
+  return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string")
+}
+
+
+func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder { return &v }
+
+func (p BoundaryOrder) MarshalText() ([]byte, error) {
+return []byte(p.String()), nil
+}
+
+func (p *BoundaryOrder) UnmarshalText(text []byte) error {
+q, err := BoundaryOrderFromString(string(text))
+if (err != nil) {
+return err
+}
+*p = q
+return nil
+}
+
+func (p *BoundaryOrder) Scan(value interface{}) error {
+v, ok := value.(int64)
+if !ok {
+return errors.New("Scan value is not int64")
+}
+*p = BoundaryOrder(v)
+return nil
+}
+
+func (p * BoundaryOrder) Value() (driver.Value, error) {
+  if p == nil {
+    return nil, nil
+  }
+return int64(*p), nil
+}
+// Statistics per row group and per page
+// All fields are optional.
+// 
+// Attributes:
+//  - Max: DEPRECATED: min and max value of the column. Use min_value and max_value.
+// 
+// Values are encoded using PLAIN encoding, except that variable-length byte
+// arrays do not include a length prefix.
+// 
+// These fields encode min and max values determined by signed comparison
+// only. New files should use the correct order for a column's logical type
+// and store the values in the min_value and max_value fields.
+// 
+// To support older readers, these may be set when the column order is
+// signed.
+//  - Min
+//  - NullCount: count of null value in the column
+//  - DistinctCount: count of distinct values occurring
+//  - MaxValue: Min and max values for the column, determined by its ColumnOrder.
+// 
+// Values are encoded using PLAIN encoding, except that variable-length byte
+// arrays do not include a length prefix.
+//  - MinValue
+type Statistics struct {
+  Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"`
+  Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"`
+  NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"`
+  DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"`
+  MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"`
+  MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"`
+}
+
+func NewStatistics() *Statistics {
+  return &Statistics{}
+}
+
+var Statistics_Max_DEFAULT []byte
+
+func (p *Statistics) GetMax() []byte {
+  return p.Max
+}
+var Statistics_Min_DEFAULT []byte
+
+func (p *Statistics) GetMin() []byte {
+  return p.Min
+}
+var Statistics_NullCount_DEFAULT int64
+func (p *Statistics) GetNullCount() int64 {
+  if !p.IsSetNullCount() {
+    return Statistics_NullCount_DEFAULT
+  }
+return *p.NullCount
+}
+var Statistics_DistinctCount_DEFAULT int64
+func (p *Statistics) GetDistinctCount() int64 {
+  if !p.IsSetDistinctCount() {
+    return Statistics_DistinctCount_DEFAULT
+  }
+return *p.DistinctCount
+}
+var Statistics_MaxValue_DEFAULT []byte
+
+func (p *Statistics) GetMaxValue() []byte {
+  return p.MaxValue
+}
+var Statistics_MinValue_DEFAULT []byte
+
+func (p *Statistics) GetMinValue() []byte {
+  return p.MinValue
+}
+func (p *Statistics) IsSetMax() bool {
+  return p.Max != nil
+}
+
+func (p *Statistics) IsSetMin() bool {
+  return p.Min != nil
+}
+
+func (p *Statistics) IsSetNullCount() bool {
+  return p.NullCount != nil
+}
+
+func (p *Statistics) IsSetDistinctCount() bool {
+  return p.DistinctCount != nil
+}
+
+func (p *Statistics) IsSetMaxValue() bool {
+  return p.MaxValue != nil
+}
+
+func (p *Statistics) IsSetMinValue() bool {
+  return p.MinValue != nil
+}
+
+func (p *Statistics) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *Statistics)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Max = v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Min = v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NullCount = &v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.DistinctCount = &v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.MaxValue = v
+}
+  return nil
+}
+
+func (p *Statistics)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.MinValue = v
+}
+  return nil
+}
+
+func (p *Statistics) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "Statistics"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *Statistics) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMax() {
+    if err := oprot.WriteFieldBegin(ctx, "max", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.Max); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMin() {
+    if err := oprot.WriteFieldBegin(ctx, "min", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.Min); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNullCount() {
+    if err := oprot.WriteFieldBegin(ctx, "null_count", thrift.I64, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.NullCount)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDistinctCount() {
+    if err := oprot.WriteFieldBegin(ctx, "distinct_count", thrift.I64, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.DistinctCount)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMaxValue() {
+    if err := oprot.WriteFieldBegin(ctx, "max_value", thrift.STRING, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.MaxValue); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMinValue() {
+    if err := oprot.WriteFieldBegin(ctx, "min_value", thrift.STRING, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.MinValue); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) }
+  }
+  return err
+}
+
+func (p *Statistics) Equals(other *Statistics) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if bytes.Compare(p.Max, other.Max) != 0 { return false }
+  if bytes.Compare(p.Min, other.Min) != 0 { return false }
+  if p.NullCount != other.NullCount {
+    if p.NullCount == nil || other.NullCount == nil {
+      return false
+    }
+    if (*p.NullCount) != (*other.NullCount) { return false }
+  }
+  if p.DistinctCount != other.DistinctCount {
+    if p.DistinctCount == nil || other.DistinctCount == nil {
+      return false
+    }
+    if (*p.DistinctCount) != (*other.DistinctCount) { return false }
+  }
+  if bytes.Compare(p.MaxValue, other.MaxValue) != 0 { return false }
+  if bytes.Compare(p.MinValue, other.MinValue) != 0 { return false }
+  return true
+}
+
+func (p *Statistics) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("Statistics(%+v)", *p)
+}
+
+// Empty structs to use as logical type annotations
+type StringType struct {
+}
+
+func NewStringType() *StringType {
+  return &StringType{}
+}
+
+func (p *StringType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *StringType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "StringType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *StringType) Equals(other *StringType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *StringType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("StringType(%+v)", *p)
+}
+
+type UUIDType struct {
+}
+
+func NewUUIDType() *UUIDType {
+  return &UUIDType{}
+}
+
+func (p *UUIDType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *UUIDType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "UUIDType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *UUIDType) Equals(other *UUIDType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *UUIDType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("UUIDType(%+v)", *p)
+}
+
+type MapType struct {
+}
+
+func NewMapType() *MapType {
+  return &MapType{}
+}
+
+func (p *MapType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *MapType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "MapType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *MapType) Equals(other *MapType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *MapType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("MapType(%+v)", *p)
+}
+
+type ListType struct {
+}
+
+func NewListType() *ListType {
+  return &ListType{}
+}
+
+func (p *ListType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *ListType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ListType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ListType) Equals(other *ListType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *ListType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ListType(%+v)", *p)
+}
+
+type EnumType struct {
+}
+
+func NewEnumType() *EnumType {
+  return &EnumType{}
+}
+
+func (p *EnumType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *EnumType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "EnumType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EnumType) Equals(other *EnumType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *EnumType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EnumType(%+v)", *p)
+}
+
+type DateType struct {
+}
+
+func NewDateType() *DateType {
+  return &DateType{}
+}
+
+func (p *DateType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *DateType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DateType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DateType) Equals(other *DateType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *DateType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DateType(%+v)", *p)
+}
+
+// Logical type to annotate a column that is always null.
+// 
+// Sometimes when discovering the schema of existing data, values are always
+// null and the physical type can't be determined. This annotation signals
+// the case where the physical type was guessed from all null values.
+type NullType struct {
+}
+
+func NewNullType() *NullType {
+  return &NullType{}
+}
+
+func (p *NullType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *NullType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "NullType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *NullType) Equals(other *NullType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *NullType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("NullType(%+v)", *p)
+}
+
+// Decimal logical type annotation
+// 
+// To maintain forward-compatibility in v1, implementations using this logical
+// type must also set scale and precision on the annotated SchemaElement.
+// 
+// Allowed for physical types: INT32, INT64, FIXED, and BINARY
+// 
+// Attributes:
+//  - Scale
+//  - Precision
+type DecimalType struct {
+  Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"`
+  Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"`
+}
+
+func NewDecimalType() *DecimalType {
+  return &DecimalType{}
+}
+
+
+func (p *DecimalType) GetScale() int32 {
+  return p.Scale
+}
+
+func (p *DecimalType) GetPrecision() int32 {
+  return p.Precision
+}
+func (p *DecimalType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetScale bool = false;
+  var issetPrecision bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetScale = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetPrecision = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetScale{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set"));
+  }
+  if !issetPrecision{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set"));
+  }
+  return nil
+}
+
+func (p *DecimalType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Scale = v
+}
+  return nil
+}
+
+func (p *DecimalType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Precision = v
+}
+  return nil
+}
+
+func (p *DecimalType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DecimalType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DecimalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Scale)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) }
+  return err
+}
+
+func (p *DecimalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Precision)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) }
+  return err
+}
+
+func (p *DecimalType) Equals(other *DecimalType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Scale != other.Scale { return false }
+  if p.Precision != other.Precision { return false }
+  return true
+}
+
+func (p *DecimalType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DecimalType(%+v)", *p)
+}
+
+// Time units for logical types
+type MilliSeconds struct {
+}
+
+func NewMilliSeconds() *MilliSeconds {
+  return &MilliSeconds{}
+}
+
+func (p *MilliSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *MilliSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "MilliSeconds"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *MilliSeconds) Equals(other *MilliSeconds) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *MilliSeconds) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("MilliSeconds(%+v)", *p)
+}
+
+type MicroSeconds struct {
+}
+
+func NewMicroSeconds() *MicroSeconds {
+  return &MicroSeconds{}
+}
+
+func (p *MicroSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *MicroSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "MicroSeconds"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *MicroSeconds) Equals(other *MicroSeconds) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *MicroSeconds) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("MicroSeconds(%+v)", *p)
+}
+
+type NanoSeconds struct {
+}
+
+func NewNanoSeconds() *NanoSeconds {
+  return &NanoSeconds{}
+}
+
+func (p *NanoSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *NanoSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "NanoSeconds"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *NanoSeconds) Equals(other *NanoSeconds) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *NanoSeconds) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("NanoSeconds(%+v)", *p)
+}
+
+// Attributes:
+//  - MILLIS
+//  - MICROS
+//  - NANOS
+type TimeUnit struct {
+  MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"`
+  MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"`
+  NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"`
+}
+
+func NewTimeUnit() *TimeUnit {
+  return &TimeUnit{}
+}
+
+var TimeUnit_MILLIS_DEFAULT *MilliSeconds
+func (p *TimeUnit) GetMILLIS() *MilliSeconds {
+  if !p.IsSetMILLIS() {
+    return TimeUnit_MILLIS_DEFAULT
+  }
+return p.MILLIS
+}
+var TimeUnit_MICROS_DEFAULT *MicroSeconds
+func (p *TimeUnit) GetMICROS() *MicroSeconds {
+  if !p.IsSetMICROS() {
+    return TimeUnit_MICROS_DEFAULT
+  }
+return p.MICROS
+}
+var TimeUnit_NANOS_DEFAULT *NanoSeconds
+func (p *TimeUnit) GetNANOS() *NanoSeconds {
+  if !p.IsSetNANOS() {
+    return TimeUnit_NANOS_DEFAULT
+  }
+return p.NANOS
+}
+func (p *TimeUnit) CountSetFieldsTimeUnit() int {
+  count := 0
+  if (p.IsSetMILLIS()) {
+    count++
+  }
+  if (p.IsSetMICROS()) {
+    count++
+  }
+  if (p.IsSetNANOS()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *TimeUnit) IsSetMILLIS() bool {
+  return p.MILLIS != nil
+}
+
+func (p *TimeUnit) IsSetMICROS() bool {
+  return p.MICROS != nil
+}
+
+func (p *TimeUnit) IsSetNANOS() bool {
+  return p.NANOS != nil
+}
+
+func (p *TimeUnit) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MILLIS = &MilliSeconds{}
+  if err := p.MILLIS.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MICROS = &MicroSeconds{}
+  if err := p.MICROS.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.NANOS = &NanoSeconds{}
+  if err := p.NANOS.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err)
+  }
+  return nil
+}
+
+func (p *TimeUnit) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsTimeUnit(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "TimeUnit"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TimeUnit) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMILLIS() {
+    if err := oprot.WriteFieldBegin(ctx, "MILLIS", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) }
+    if err := p.MILLIS.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) }
+  }
+  return err
+}
+
+func (p *TimeUnit) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMICROS() {
+    if err := oprot.WriteFieldBegin(ctx, "MICROS", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) }
+    if err := p.MICROS.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) }
+  }
+  return err
+}
+
+func (p *TimeUnit) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNANOS() {
+    if err := oprot.WriteFieldBegin(ctx, "NANOS", thrift.STRUCT, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) }
+    if err := p.NANOS.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) }
+  }
+  return err
+}
+
+func (p *TimeUnit) Equals(other *TimeUnit) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.MILLIS.Equals(other.MILLIS) { return false }
+  if !p.MICROS.Equals(other.MICROS) { return false }
+  if !p.NANOS.Equals(other.NANOS) { return false }
+  return true
+}
+
+func (p *TimeUnit) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TimeUnit(%+v)", *p)
+}
+
+// Timestamp logical type annotation
+// 
+// Allowed for physical types: INT64
+// 
+// Attributes:
+//  - IsAdjustedToUTC
+//  - Unit
+type TimestampType struct {
+  IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
+  Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
+}
+
+func NewTimestampType() *TimestampType {
+  return &TimestampType{}
+}
+
+
+func (p *TimestampType) GetIsAdjustedToUTC() bool {
+  return p.IsAdjustedToUTC
+}
+var TimestampType_Unit_DEFAULT *TimeUnit
+func (p *TimestampType) GetUnit() *TimeUnit {
+  if !p.IsSetUnit() {
+    return TimestampType_Unit_DEFAULT
+  }
+return p.Unit
+}
+func (p *TimestampType) IsSetUnit() bool {
+  return p.Unit != nil
+}
+
+func (p *TimestampType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetIsAdjustedToUTC bool = false;
+  var issetUnit bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetIsAdjustedToUTC = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetUnit = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetIsAdjustedToUTC{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set"));
+  }
+  if !issetUnit{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set"));
+  }
+  return nil
+}
+
+func (p *TimestampType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.IsAdjustedToUTC = v
+}
+  return nil
+}
+
+func (p *TimestampType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Unit = &TimeUnit{}
+  if err := p.Unit.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err)
+  }
+  return nil
+}
+
+func (p *TimestampType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "TimestampType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TimestampType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) }
+  return err
+}
+
+func (p *TimestampType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) }
+  if err := p.Unit.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) }
+  return err
+}
+
+func (p *TimestampType) Equals(other *TimestampType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false }
+  if !p.Unit.Equals(other.Unit) { return false }
+  return true
+}
+
+func (p *TimestampType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TimestampType(%+v)", *p)
+}
+
+// Time logical type annotation
+// 
+// Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
+// 
+// Attributes:
+//  - IsAdjustedToUTC
+//  - Unit
+type TimeType struct {
+  IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"`
+  Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"`
+}
+
+func NewTimeType() *TimeType {
+  return &TimeType{}
+}
+
+
+func (p *TimeType) GetIsAdjustedToUTC() bool {
+  return p.IsAdjustedToUTC
+}
+var TimeType_Unit_DEFAULT *TimeUnit
+func (p *TimeType) GetUnit() *TimeUnit {
+  if !p.IsSetUnit() {
+    return TimeType_Unit_DEFAULT
+  }
+return p.Unit
+}
+func (p *TimeType) IsSetUnit() bool {
+  return p.Unit != nil
+}
+
+func (p *TimeType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetIsAdjustedToUTC bool = false;
+  var issetUnit bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetIsAdjustedToUTC = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetUnit = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetIsAdjustedToUTC{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set"));
+  }
+  if !issetUnit{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set"));
+  }
+  return nil
+}
+
+func (p *TimeType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.IsAdjustedToUTC = v
+}
+  return nil
+}
+
+func (p *TimeType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Unit = &TimeUnit{}
+  if err := p.Unit.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err)
+  }
+  return nil
+}
+
+func (p *TimeType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "TimeType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TimeType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) }
+  return err
+}
+
+func (p *TimeType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) }
+  if err := p.Unit.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) }
+  return err
+}
+
+func (p *TimeType) Equals(other *TimeType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false }
+  if !p.Unit.Equals(other.Unit) { return false }
+  return true
+}
+
+func (p *TimeType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TimeType(%+v)", *p)
+}
+
+// Integer logical type annotation
+// 
+// bitWidth must be 8, 16, 32, or 64.
+// 
+// Allowed for physical types: INT32, INT64
+// 
+// Attributes:
+//  - BitWidth
+//  - IsSigned
+type IntType struct {
+  BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"`
+  IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"`
+}
+
+func NewIntType() *IntType {
+  return &IntType{}
+}
+
+
+func (p *IntType) GetBitWidth() int8 {
+  return p.BitWidth
+}
+
+func (p *IntType) GetIsSigned() bool {
+  return p.IsSigned
+}
+func (p *IntType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetBitWidth bool = false;
+  var issetIsSigned bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.BYTE {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetBitWidth = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetIsSigned = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetBitWidth{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set"));
+  }
+  if !issetIsSigned{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set"));
+  }
+  return nil
+}
+
+func (p *IntType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadByte(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := int8(v)
+  p.BitWidth = temp
+}
+  return nil
+}
+
+func (p *IntType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.IsSigned = v
+}
+  return nil
+}
+
+func (p *IntType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "IntType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *IntType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "bitWidth", thrift.BYTE, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) }
+  if err := oprot.WriteByte(ctx, int8(p.BitWidth)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) }
+  return err
+}
+
+func (p *IntType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "isSigned", thrift.BOOL, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.IsSigned)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) }
+  return err
+}
+
+func (p *IntType) Equals(other *IntType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.BitWidth != other.BitWidth { return false }
+  if p.IsSigned != other.IsSigned { return false }
+  return true
+}
+
+func (p *IntType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("IntType(%+v)", *p)
+}
+
+// Embedded JSON logical type annotation
+// 
+// Allowed for physical types: BINARY
+type JsonType struct {
+}
+
+func NewJsonType() *JsonType {
+  return &JsonType{}
+}
+
+func (p *JsonType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *JsonType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "JsonType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *JsonType) Equals(other *JsonType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *JsonType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("JsonType(%+v)", *p)
+}
+
+// Embedded BSON logical type annotation
+// 
+// Allowed for physical types: BINARY
+type BsonType struct {
+}
+
+func NewBsonType() *BsonType {
+  return &BsonType{}
+}
+
+func (p *BsonType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BsonType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "BsonType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BsonType) Equals(other *BsonType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *BsonType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BsonType(%+v)", *p)
+}
+
+// LogicalType annotations to replace ConvertedType.
+// 
+// To maintain compatibility, implementations using LogicalType for a
+// SchemaElement must also set the corresponding ConvertedType from the
+// following table.
+// 
+// Attributes:
+//  - STRING
+//  - MAP
+//  - LIST
+//  - ENUM
+//  - DECIMAL
+//  - DATE
+//  - TIME
+//  - TIMESTAMP
+//  - INTEGER
+//  - UNKNOWN
+//  - JSON
+//  - BSON
+//  - UUID
+type LogicalType struct {
+  STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"`
+  MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"`
+  LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"`
+  ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"`
+  DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"`
+  DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"`
+  TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"`
+  TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"`
+  // unused field # 9
+  INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"`
+  UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"`
+  JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"`
+  BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"`
+  UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"`
+}
+
+func NewLogicalType() *LogicalType {
+  return &LogicalType{}
+}
+
+var LogicalType_STRING_DEFAULT *StringType
+func (p *LogicalType) GetSTRING() *StringType {
+  if !p.IsSetSTRING() {
+    return LogicalType_STRING_DEFAULT
+  }
+return p.STRING
+}
+var LogicalType_MAP_DEFAULT *MapType
+func (p *LogicalType) GetMAP() *MapType {
+  if !p.IsSetMAP() {
+    return LogicalType_MAP_DEFAULT
+  }
+return p.MAP
+}
+var LogicalType_LIST_DEFAULT *ListType
+func (p *LogicalType) GetLIST() *ListType {
+  if !p.IsSetLIST() {
+    return LogicalType_LIST_DEFAULT
+  }
+return p.LIST
+}
+var LogicalType_ENUM_DEFAULT *EnumType
+func (p *LogicalType) GetENUM() *EnumType {
+  if !p.IsSetENUM() {
+    return LogicalType_ENUM_DEFAULT
+  }
+return p.ENUM
+}
+var LogicalType_DECIMAL_DEFAULT *DecimalType
+func (p *LogicalType) GetDECIMAL() *DecimalType {
+  if !p.IsSetDECIMAL() {
+    return LogicalType_DECIMAL_DEFAULT
+  }
+return p.DECIMAL
+}
+var LogicalType_DATE_DEFAULT *DateType
+func (p *LogicalType) GetDATE() *DateType {
+  if !p.IsSetDATE() {
+    return LogicalType_DATE_DEFAULT
+  }
+return p.DATE
+}
+var LogicalType_TIME_DEFAULT *TimeType
+func (p *LogicalType) GetTIME() *TimeType {
+  if !p.IsSetTIME() {
+    return LogicalType_TIME_DEFAULT
+  }
+return p.TIME
+}
+var LogicalType_TIMESTAMP_DEFAULT *TimestampType
+func (p *LogicalType) GetTIMESTAMP() *TimestampType {
+  if !p.IsSetTIMESTAMP() {
+    return LogicalType_TIMESTAMP_DEFAULT
+  }
+return p.TIMESTAMP
+}
+var LogicalType_INTEGER_DEFAULT *IntType
+func (p *LogicalType) GetINTEGER() *IntType {
+  if !p.IsSetINTEGER() {
+    return LogicalType_INTEGER_DEFAULT
+  }
+return p.INTEGER
+}
+var LogicalType_UNKNOWN_DEFAULT *NullType
+func (p *LogicalType) GetUNKNOWN() *NullType {
+  if !p.IsSetUNKNOWN() {
+    return LogicalType_UNKNOWN_DEFAULT
+  }
+return p.UNKNOWN
+}
+var LogicalType_JSON_DEFAULT *JsonType
+func (p *LogicalType) GetJSON() *JsonType {
+  if !p.IsSetJSON() {
+    return LogicalType_JSON_DEFAULT
+  }
+return p.JSON
+}
+var LogicalType_BSON_DEFAULT *BsonType
+func (p *LogicalType) GetBSON() *BsonType {
+  if !p.IsSetBSON() {
+    return LogicalType_BSON_DEFAULT
+  }
+return p.BSON
+}
+var LogicalType_UUID_DEFAULT *UUIDType
+func (p *LogicalType) GetUUID() *UUIDType {
+  if !p.IsSetUUID() {
+    return LogicalType_UUID_DEFAULT
+  }
+return p.UUID
+}
+func (p *LogicalType) CountSetFieldsLogicalType() int {
+  count := 0
+  if (p.IsSetSTRING()) {
+    count++
+  }
+  if (p.IsSetMAP()) {
+    count++
+  }
+  if (p.IsSetLIST()) {
+    count++
+  }
+  if (p.IsSetENUM()) {
+    count++
+  }
+  if (p.IsSetDECIMAL()) {
+    count++
+  }
+  if (p.IsSetDATE()) {
+    count++
+  }
+  if (p.IsSetTIME()) {
+    count++
+  }
+  if (p.IsSetTIMESTAMP()) {
+    count++
+  }
+  if (p.IsSetINTEGER()) {
+    count++
+  }
+  if (p.IsSetUNKNOWN()) {
+    count++
+  }
+  if (p.IsSetJSON()) {
+    count++
+  }
+  if (p.IsSetBSON()) {
+    count++
+  }
+  if (p.IsSetUUID()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *LogicalType) IsSetSTRING() bool {
+  return p.STRING != nil
+}
+
+func (p *LogicalType) IsSetMAP() bool {
+  return p.MAP != nil
+}
+
+func (p *LogicalType) IsSetLIST() bool {
+  return p.LIST != nil
+}
+
+func (p *LogicalType) IsSetENUM() bool {
+  return p.ENUM != nil
+}
+
+func (p *LogicalType) IsSetDECIMAL() bool {
+  return p.DECIMAL != nil
+}
+
+func (p *LogicalType) IsSetDATE() bool {
+  return p.DATE != nil
+}
+
+func (p *LogicalType) IsSetTIME() bool {
+  return p.TIME != nil
+}
+
+func (p *LogicalType) IsSetTIMESTAMP() bool {
+  return p.TIMESTAMP != nil
+}
+
+func (p *LogicalType) IsSetINTEGER() bool {
+  return p.INTEGER != nil
+}
+
+func (p *LogicalType) IsSetUNKNOWN() bool {
+  return p.UNKNOWN != nil
+}
+
+func (p *LogicalType) IsSetJSON() bool {
+  return p.JSON != nil
+}
+
+func (p *LogicalType) IsSetBSON() bool {
+  return p.BSON != nil
+}
+
+func (p *LogicalType) IsSetUUID() bool {
+  return p.UUID != nil
+}
+
+func (p *LogicalType) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 10:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField10(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 11:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField11(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 12:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField12(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 13:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField13(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 14:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField14(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.STRING = &StringType{}
+  if err := p.STRING.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MAP = &MapType{}
+  if err := p.MAP.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.LIST = &ListType{}
+  if err := p.LIST.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  p.ENUM = &EnumType{}
+  if err := p.ENUM.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DECIMAL = &DecimalType{}
+  if err := p.DECIMAL.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DATE = &DateType{}
+  if err := p.DATE.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  p.TIME = &TimeType{}
+  if err := p.TIME.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.TIMESTAMP = &TimestampType{}
+  if err := p.TIMESTAMP.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField10(ctx context.Context, iprot thrift.TProtocol) error {
+  p.INTEGER = &IntType{}
+  if err := p.INTEGER.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField11(ctx context.Context, iprot thrift.TProtocol) error {
+  p.UNKNOWN = &NullType{}
+  if err := p.UNKNOWN.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField12(ctx context.Context, iprot thrift.TProtocol) error {
+  p.JSON = &JsonType{}
+  if err := p.JSON.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField13(ctx context.Context, iprot thrift.TProtocol) error {
+  p.BSON = &BsonType{}
+  if err := p.BSON.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err)
+  }
+  return nil
+}
+
+func (p *LogicalType)  ReadField14(ctx context.Context, iprot thrift.TProtocol) error {
+  p.UUID = &UUIDType{}
+  if err := p.UUID.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err)
+  }
+  return nil
+}
+
+func (p *LogicalType) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsLogicalType(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "LogicalType"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField10(ctx, oprot); err != nil { return err }
+    if err := p.writeField11(ctx, oprot); err != nil { return err }
+    if err := p.writeField12(ctx, oprot); err != nil { return err }
+    if err := p.writeField13(ctx, oprot); err != nil { return err }
+    if err := p.writeField14(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *LogicalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSTRING() {
+    if err := oprot.WriteFieldBegin(ctx, "STRING", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) }
+    if err := p.STRING.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMAP() {
+    if err := oprot.WriteFieldBegin(ctx, "MAP", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) }
+    if err := p.MAP.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetLIST() {
+    if err := oprot.WriteFieldBegin(ctx, "LIST", thrift.STRUCT, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) }
+    if err := p.LIST.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetENUM() {
+    if err := oprot.WriteFieldBegin(ctx, "ENUM", thrift.STRUCT, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) }
+    if err := p.ENUM.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDECIMAL() {
+    if err := oprot.WriteFieldBegin(ctx, "DECIMAL", thrift.STRUCT, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) }
+    if err := p.DECIMAL.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDATE() {
+    if err := oprot.WriteFieldBegin(ctx, "DATE", thrift.STRUCT, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) }
+    if err := p.DATE.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTIME() {
+    if err := oprot.WriteFieldBegin(ctx, "TIME", thrift.STRUCT, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) }
+    if err := p.TIME.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTIMESTAMP() {
+    if err := oprot.WriteFieldBegin(ctx, "TIMESTAMP", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) }
+    if err := p.TIMESTAMP.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetINTEGER() {
+    if err := oprot.WriteFieldBegin(ctx, "INTEGER", thrift.STRUCT, 10); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) }
+    if err := p.INTEGER.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetUNKNOWN() {
+    if err := oprot.WriteFieldBegin(ctx, "UNKNOWN", thrift.STRUCT, 11); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) }
+    if err := p.UNKNOWN.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetJSON() {
+    if err := oprot.WriteFieldBegin(ctx, "JSON", thrift.STRUCT, 12); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) }
+    if err := p.JSON.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetBSON() {
+    if err := oprot.WriteFieldBegin(ctx, "BSON", thrift.STRUCT, 13); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) }
+    if err := p.BSON.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetUUID() {
+    if err := oprot.WriteFieldBegin(ctx, "UUID", thrift.STRUCT, 14); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) }
+    if err := p.UUID.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) }
+  }
+  return err
+}
+
+func (p *LogicalType) Equals(other *LogicalType) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.STRING.Equals(other.STRING) { return false }
+  if !p.MAP.Equals(other.MAP) { return false }
+  if !p.LIST.Equals(other.LIST) { return false }
+  if !p.ENUM.Equals(other.ENUM) { return false }
+  if !p.DECIMAL.Equals(other.DECIMAL) { return false }
+  if !p.DATE.Equals(other.DATE) { return false }
+  if !p.TIME.Equals(other.TIME) { return false }
+  if !p.TIMESTAMP.Equals(other.TIMESTAMP) { return false }
+  if !p.INTEGER.Equals(other.INTEGER) { return false }
+  if !p.UNKNOWN.Equals(other.UNKNOWN) { return false }
+  if !p.JSON.Equals(other.JSON) { return false }
+  if !p.BSON.Equals(other.BSON) { return false }
+  if !p.UUID.Equals(other.UUID) { return false }
+  return true
+}
+
+func (p *LogicalType) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("LogicalType(%+v)", *p)
+}
+
+// Represents a element inside a schema definition.
+//  - if it is a group (inner node) then type is undefined and num_children is defined
+//  - if it is a primitive type (leaf) then type is defined and num_children is undefined
+// the nodes are listed in depth first traversal order.
+// 
+// Attributes:
+//  - Type: Data type for this field. Not set if the current element is a non-leaf node
+//  - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.
+// Otherwise, if specified, this is the maximum bit length to store any of the values.
+// (e.g. a low cardinality INT col could have this set to 3).  Note that this is
+// in the schema, and therefore fixed for the entire file.
+//  - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type.
+// All other nodes must have one
+//  - Name: Name of the field in the schema
+//  - NumChildren: Nested fields.  Since thrift does not support nested fields,
+// the nesting is flattened to a single list by a depth-first traversal.
+// The children count is used to construct the nested relationship.
+// This field is not set when the element is a primitive type
+//  - ConvertedType: When the schema is the result of a conversion from another model
+// Used to record the original type to help with cross conversion.
+//  - Scale: Used when this column contains decimal data.
+// See the DECIMAL converted type for more details.
+//  - Precision
+//  - FieldID: When the original schema supports field ids, this will save the
+// original field id in the parquet schema
+//  - LogicalType: The logical type of this SchemaElement
+// 
+// LogicalType replaces ConvertedType, but ConvertedType is still required
+// for some logical types to ensure forward-compatibility in format v1.
+type SchemaElement struct {
+  Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"`
+  TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"`
+  RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"`
+  Name string `thrift:"name,4,required" db:"name" json:"name"`
+  NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"`
+  ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"`
+  Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"`
+  Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"`
+  FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"`
+  LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"`
+}
+
+func NewSchemaElement() *SchemaElement {
+  return &SchemaElement{}
+}
+
+var SchemaElement_Type_DEFAULT Type
+func (p *SchemaElement) GetType() Type {
+  if !p.IsSetType() {
+    return SchemaElement_Type_DEFAULT
+  }
+return *p.Type
+}
+var SchemaElement_TypeLength_DEFAULT int32
+func (p *SchemaElement) GetTypeLength() int32 {
+  if !p.IsSetTypeLength() {
+    return SchemaElement_TypeLength_DEFAULT
+  }
+return *p.TypeLength
+}
+var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType
+func (p *SchemaElement) GetRepetitionType() FieldRepetitionType {
+  if !p.IsSetRepetitionType() {
+    return SchemaElement_RepetitionType_DEFAULT
+  }
+return *p.RepetitionType
+}
+
+func (p *SchemaElement) GetName() string {
+  return p.Name
+}
+var SchemaElement_NumChildren_DEFAULT int32
+func (p *SchemaElement) GetNumChildren() int32 {
+  if !p.IsSetNumChildren() {
+    return SchemaElement_NumChildren_DEFAULT
+  }
+return *p.NumChildren
+}
+var SchemaElement_ConvertedType_DEFAULT ConvertedType
+func (p *SchemaElement) GetConvertedType() ConvertedType {
+  if !p.IsSetConvertedType() {
+    return SchemaElement_ConvertedType_DEFAULT
+  }
+return *p.ConvertedType
+}
+var SchemaElement_Scale_DEFAULT int32
+func (p *SchemaElement) GetScale() int32 {
+  if !p.IsSetScale() {
+    return SchemaElement_Scale_DEFAULT
+  }
+return *p.Scale
+}
+var SchemaElement_Precision_DEFAULT int32
+func (p *SchemaElement) GetPrecision() int32 {
+  if !p.IsSetPrecision() {
+    return SchemaElement_Precision_DEFAULT
+  }
+return *p.Precision
+}
+var SchemaElement_FieldID_DEFAULT int32
+func (p *SchemaElement) GetFieldID() int32 {
+  if !p.IsSetFieldID() {
+    return SchemaElement_FieldID_DEFAULT
+  }
+return *p.FieldID
+}
+var SchemaElement_LogicalType_DEFAULT *LogicalType
+func (p *SchemaElement) GetLogicalType() *LogicalType {
+  if !p.IsSetLogicalType() {
+    return SchemaElement_LogicalType_DEFAULT
+  }
+return p.LogicalType
+}
+func (p *SchemaElement) IsSetType() bool {
+  return p.Type != nil
+}
+
+func (p *SchemaElement) IsSetTypeLength() bool {
+  return p.TypeLength != nil
+}
+
+func (p *SchemaElement) IsSetRepetitionType() bool {
+  return p.RepetitionType != nil
+}
+
+func (p *SchemaElement) IsSetNumChildren() bool {
+  return p.NumChildren != nil
+}
+
+func (p *SchemaElement) IsSetConvertedType() bool {
+  return p.ConvertedType != nil
+}
+
+func (p *SchemaElement) IsSetScale() bool {
+  return p.Scale != nil
+}
+
+func (p *SchemaElement) IsSetPrecision() bool {
+  return p.Precision != nil
+}
+
+func (p *SchemaElement) IsSetFieldID() bool {
+  return p.FieldID != nil
+}
+
+func (p *SchemaElement) IsSetLogicalType() bool {
+  return p.LogicalType != nil
+}
+
+func (p *SchemaElement) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetName bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetName = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 10:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField10(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetName{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set"));
+  }
+  return nil
+}
+
+func (p *SchemaElement)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := Type(v)
+  p.Type = &temp
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.TypeLength = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  temp := FieldRepetitionType(v)
+  p.RepetitionType = &temp
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.Name = v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.NumChildren = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  temp := ConvertedType(v)
+  p.ConvertedType = &temp
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.Scale = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 8: ", err)
+} else {
+  p.Precision = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.FieldID = &v
+}
+  return nil
+}
+
+func (p *SchemaElement)  ReadField10(ctx context.Context, iprot thrift.TProtocol) error {
+  p.LogicalType = &LogicalType{}
+  if err := p.LogicalType.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err)
+  }
+  return nil
+}
+
+func (p *SchemaElement) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "SchemaElement"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+    if err := p.writeField10(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *SchemaElement) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetType() {
+    if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Type)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTypeLength() {
+    if err := oprot.WriteFieldBegin(ctx, "type_length", thrift.I32, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.TypeLength)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetRepetitionType() {
+    if err := oprot.WriteFieldBegin(ctx, "repetition_type", thrift.I32, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.RepetitionType)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "name", thrift.STRING, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) }
+  if err := oprot.WriteString(ctx, string(p.Name)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) }
+  return err
+}
+
+func (p *SchemaElement) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNumChildren() {
+    if err := oprot.WriteFieldBegin(ctx, "num_children", thrift.I32, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.NumChildren)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetConvertedType() {
+    if err := oprot.WriteFieldBegin(ctx, "converted_type", thrift.I32, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.ConvertedType)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetScale() {
+    if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Scale)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetPrecision() {
+    if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Precision)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFieldID() {
+    if err := oprot.WriteFieldBegin(ctx, "field_id", thrift.I32, 9); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.FieldID)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetLogicalType() {
+    if err := oprot.WriteFieldBegin(ctx, "logicalType", thrift.STRUCT, 10); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) }
+    if err := p.LogicalType.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) }
+  }
+  return err
+}
+
+func (p *SchemaElement) Equals(other *SchemaElement) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Type != other.Type {
+    if p.Type == nil || other.Type == nil {
+      return false
+    }
+    if (*p.Type) != (*other.Type) { return false }
+  }
+  if p.TypeLength != other.TypeLength {
+    if p.TypeLength == nil || other.TypeLength == nil {
+      return false
+    }
+    if (*p.TypeLength) != (*other.TypeLength) { return false }
+  }
+  if p.RepetitionType != other.RepetitionType {
+    if p.RepetitionType == nil || other.RepetitionType == nil {
+      return false
+    }
+    if (*p.RepetitionType) != (*other.RepetitionType) { return false }
+  }
+  if p.Name != other.Name { return false }
+  if p.NumChildren != other.NumChildren {
+    if p.NumChildren == nil || other.NumChildren == nil {
+      return false
+    }
+    if (*p.NumChildren) != (*other.NumChildren) { return false }
+  }
+  if p.ConvertedType != other.ConvertedType {
+    if p.ConvertedType == nil || other.ConvertedType == nil {
+      return false
+    }
+    if (*p.ConvertedType) != (*other.ConvertedType) { return false }
+  }
+  if p.Scale != other.Scale {
+    if p.Scale == nil || other.Scale == nil {
+      return false
+    }
+    if (*p.Scale) != (*other.Scale) { return false }
+  }
+  if p.Precision != other.Precision {
+    if p.Precision == nil || other.Precision == nil {
+      return false
+    }
+    if (*p.Precision) != (*other.Precision) { return false }
+  }
+  if p.FieldID != other.FieldID {
+    if p.FieldID == nil || other.FieldID == nil {
+      return false
+    }
+    if (*p.FieldID) != (*other.FieldID) { return false }
+  }
+  if !p.LogicalType.Equals(other.LogicalType) { return false }
+  return true
+}
+
+func (p *SchemaElement) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("SchemaElement(%+v)", *p)
+}
+
+// Data page header
+// 
+// Attributes:
+//  - NumValues: Number of values, including NULLs, in this data page. *
+//  - Encoding: Encoding used for this data page *
+//  - DefinitionLevelEncoding: Encoding used for definition levels *
+//  - RepetitionLevelEncoding: Encoding used for repetition levels *
+//  - Statistics: Optional statistics for the data in this page*
+type DataPageHeader struct {
+  NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
+  Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
+  DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"`
+  RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"`
+  Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"`
+}
+
+func NewDataPageHeader() *DataPageHeader {
+  return &DataPageHeader{}
+}
+
+
+func (p *DataPageHeader) GetNumValues() int32 {
+  return p.NumValues
+}
+
+func (p *DataPageHeader) GetEncoding() Encoding {
+  return p.Encoding
+}
+
+func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding {
+  return p.DefinitionLevelEncoding
+}
+
+func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding {
+  return p.RepetitionLevelEncoding
+}
+var DataPageHeader_Statistics_DEFAULT *Statistics
+func (p *DataPageHeader) GetStatistics() *Statistics {
+  if !p.IsSetStatistics() {
+    return DataPageHeader_Statistics_DEFAULT
+  }
+return p.Statistics
+}
+func (p *DataPageHeader) IsSetStatistics() bool {
+  return p.Statistics != nil
+}
+
+func (p *DataPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumValues bool = false;
+  var issetEncoding bool = false;
+  var issetDefinitionLevelEncoding bool = false;
+  var issetRepetitionLevelEncoding bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetDefinitionLevelEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetRepetitionLevelEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  if !issetDefinitionLevelEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set"));
+  }
+  if !issetRepetitionLevelEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set"));
+  }
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  temp := Encoding(v)
+  p.DefinitionLevelEncoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := Encoding(v)
+  p.RepetitionLevelEncoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeader)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Statistics = &Statistics{}
+  if err := p.Statistics.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err)
+  }
+  return nil
+}
+
+func (p *DataPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DataPageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DataPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "definition_level_encoding", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelEncoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "repetition_level_encoding", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelEncoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetStatistics() {
+    if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) }
+    if err := p.Statistics.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) }
+  }
+  return err
+}
+
+func (p *DataPageHeader) Equals(other *DataPageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumValues != other.NumValues { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.DefinitionLevelEncoding != other.DefinitionLevelEncoding { return false }
+  if p.RepetitionLevelEncoding != other.RepetitionLevelEncoding { return false }
+  if !p.Statistics.Equals(other.Statistics) { return false }
+  return true
+}
+
+func (p *DataPageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DataPageHeader(%+v)", *p)
+}
+
+type IndexPageHeader struct {
+}
+
+func NewIndexPageHeader() *IndexPageHeader {
+  return &IndexPageHeader{}
+}
+
+func (p *IndexPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *IndexPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "IndexPageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *IndexPageHeader) Equals(other *IndexPageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *IndexPageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("IndexPageHeader(%+v)", *p)
+}
+
+// Attributes:
+//  - NumValues: Number of values in the dictionary *
+//  - Encoding: Encoding using this dictionary page *
+//  - IsSorted: If true, the entries in the dictionary are sorted in ascending order *
+type DictionaryPageHeader struct {
+  NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
+  Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
+  IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"`
+}
+
+func NewDictionaryPageHeader() *DictionaryPageHeader {
+  return &DictionaryPageHeader{}
+}
+
+
+func (p *DictionaryPageHeader) GetNumValues() int32 {
+  return p.NumValues
+}
+
+func (p *DictionaryPageHeader) GetEncoding() Encoding {
+  return p.Encoding
+}
+var DictionaryPageHeader_IsSorted_DEFAULT bool
+func (p *DictionaryPageHeader) GetIsSorted() bool {
+  if !p.IsSetIsSorted() {
+    return DictionaryPageHeader_IsSorted_DEFAULT
+  }
+return *p.IsSorted
+}
+func (p *DictionaryPageHeader) IsSetIsSorted() bool {
+  return p.IsSorted != nil
+}
+
+func (p *DictionaryPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumValues bool = false;
+  var issetEncoding bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  return nil
+}
+
+func (p *DictionaryPageHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *DictionaryPageHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *DictionaryPageHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.IsSorted = &v
+}
+  return nil
+}
+
+func (p *DictionaryPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DictionaryPageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DictionaryPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) }
+  return err
+}
+
+func (p *DictionaryPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) }
+  return err
+}
+
+func (p *DictionaryPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIsSorted() {
+    if err := oprot.WriteFieldBegin(ctx, "is_sorted", thrift.BOOL, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(*p.IsSorted)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) }
+  }
+  return err
+}
+
+func (p *DictionaryPageHeader) Equals(other *DictionaryPageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumValues != other.NumValues { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.IsSorted != other.IsSorted {
+    if p.IsSorted == nil || other.IsSorted == nil {
+      return false
+    }
+    if (*p.IsSorted) != (*other.IsSorted) { return false }
+  }
+  return true
+}
+
+func (p *DictionaryPageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DictionaryPageHeader(%+v)", *p)
+}
+
+// New page format allowing reading levels without decompressing the data
+// Repetition and definition levels are uncompressed
+// The remaining section containing the data is compressed if is_compressed is true
+// 
+// 
+// Attributes:
+//  - NumValues: Number of values, including NULLs, in this data page. *
+//  - NumNulls: Number of NULL values, in this data page.
+// Number of non-null = num_values - num_nulls which is also the number of values in the data section *
+//  - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) *
+//  - Encoding: Encoding used for data in this page *
+//  - DefinitionLevelsByteLength: length of the definition levels
+//  - RepetitionLevelsByteLength: length of the repetition levels
+//  - IsCompressed: whether the values are compressed.
+// Which means the section of the page between
+// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included)
+// is compressed with the compression_codec.
+// If missing it is considered compressed
+//  - Statistics: optional statistics for this column chunk
+type DataPageHeaderV2 struct {
+  NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"`
+  NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"`
+  NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
+  Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"`
+  DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"`
+  RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"`
+  IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"`
+  Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"`
+}
+
+func NewDataPageHeaderV2() *DataPageHeaderV2 {
+  return &DataPageHeaderV2{
+IsCompressed: true,
+}
+}
+
+
+func (p *DataPageHeaderV2) GetNumValues() int32 {
+  return p.NumValues
+}
+
+func (p *DataPageHeaderV2) GetNumNulls() int32 {
+  return p.NumNulls
+}
+
+func (p *DataPageHeaderV2) GetNumRows() int32 {
+  return p.NumRows
+}
+
+func (p *DataPageHeaderV2) GetEncoding() Encoding {
+  return p.Encoding
+}
+
+func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32 {
+  return p.DefinitionLevelsByteLength
+}
+
+func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32 {
+  return p.RepetitionLevelsByteLength
+}
+var DataPageHeaderV2_IsCompressed_DEFAULT bool = true
+
+func (p *DataPageHeaderV2) GetIsCompressed() bool {
+  return p.IsCompressed
+}
+var DataPageHeaderV2_Statistics_DEFAULT *Statistics
+func (p *DataPageHeaderV2) GetStatistics() *Statistics {
+  if !p.IsSetStatistics() {
+    return DataPageHeaderV2_Statistics_DEFAULT
+  }
+return p.Statistics
+}
+func (p *DataPageHeaderV2) IsSetIsCompressed() bool {
+  return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT
+}
+
+func (p *DataPageHeaderV2) IsSetStatistics() bool {
+  return p.Statistics != nil
+}
+
+func (p *DataPageHeaderV2) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumValues bool = false;
+  var issetNumNulls bool = false;
+  var issetNumRows bool = false;
+  var issetEncoding bool = false;
+  var issetDefinitionLevelsByteLength bool = false;
+  var issetRepetitionLevelsByteLength bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumNulls = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumRows = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+        issetDefinitionLevelsByteLength = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+        issetRepetitionLevelsByteLength = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetNumNulls{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set"));
+  }
+  if !issetNumRows{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  if !issetDefinitionLevelsByteLength{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set"));
+  }
+  if !issetRepetitionLevelsByteLength{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set"));
+  }
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.NumNulls = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NumRows = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.DefinitionLevelsByteLength = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.RepetitionLevelsByteLength = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.IsCompressed = v
+}
+  return nil
+}
+
+func (p *DataPageHeaderV2)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Statistics = &Statistics{}
+  if err := p.Statistics.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err)
+  }
+  return nil
+}
+
+func (p *DataPageHeaderV2) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "DataPageHeaderV2"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *DataPageHeaderV2) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_nulls", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumNulls)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumRows)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "definition_levels_byte_length", thrift.I32, 5); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelsByteLength)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "repetition_levels_byte_length", thrift.I32, 6); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelsByteLength)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIsCompressed() {
+    if err := oprot.WriteFieldBegin(ctx, "is_compressed", thrift.BOOL, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(p.IsCompressed)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) }
+  }
+  return err
+}
+
+func (p *DataPageHeaderV2) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetStatistics() {
+    if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) }
+    if err := p.Statistics.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) }
+  }
+  return err
+}
+
+func (p *DataPageHeaderV2) Equals(other *DataPageHeaderV2) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumValues != other.NumValues { return false }
+  if p.NumNulls != other.NumNulls { return false }
+  if p.NumRows != other.NumRows { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.DefinitionLevelsByteLength != other.DefinitionLevelsByteLength { return false }
+  if p.RepetitionLevelsByteLength != other.RepetitionLevelsByteLength { return false }
+  if p.IsCompressed != other.IsCompressed { return false }
+  if !p.Statistics.Equals(other.Statistics) { return false }
+  return true
+}
+
+func (p *DataPageHeaderV2) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("DataPageHeaderV2(%+v)", *p)
+}
+
+// Block-based algorithm type annotation. *
+type SplitBlockAlgorithm struct {
+}
+
+func NewSplitBlockAlgorithm() *SplitBlockAlgorithm {
+  return &SplitBlockAlgorithm{}
+}
+
+func (p *SplitBlockAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *SplitBlockAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "SplitBlockAlgorithm"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *SplitBlockAlgorithm) Equals(other *SplitBlockAlgorithm) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *SplitBlockAlgorithm) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("SplitBlockAlgorithm(%+v)", *p)
+}
+
+// The algorithm used in Bloom filter. *
+// 
+// Attributes:
+//  - BLOCK: Block-based Bloom filter. *
+type BloomFilterAlgorithm struct {
+  BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"`
+}
+
+func NewBloomFilterAlgorithm() *BloomFilterAlgorithm {
+  return &BloomFilterAlgorithm{}
+}
+
+var BloomFilterAlgorithm_BLOCK_DEFAULT *SplitBlockAlgorithm
+func (p *BloomFilterAlgorithm) GetBLOCK() *SplitBlockAlgorithm {
+  if !p.IsSetBLOCK() {
+    return BloomFilterAlgorithm_BLOCK_DEFAULT
+  }
+return p.BLOCK
+}
+func (p *BloomFilterAlgorithm) CountSetFieldsBloomFilterAlgorithm() int {
+  count := 0
+  if (p.IsSetBLOCK()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *BloomFilterAlgorithm) IsSetBLOCK() bool {
+  return p.BLOCK != nil
+}
+
+func (p *BloomFilterAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterAlgorithm)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.BLOCK = &SplitBlockAlgorithm{}
+  if err := p.BLOCK.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BLOCK), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsBloomFilterAlgorithm(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterAlgorithm"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetBLOCK() {
+    if err := oprot.WriteFieldBegin(ctx, "BLOCK", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:BLOCK: ", p), err) }
+    if err := p.BLOCK.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BLOCK), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:BLOCK: ", p), err) }
+  }
+  return err
+}
+
+func (p *BloomFilterAlgorithm) Equals(other *BloomFilterAlgorithm) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.BLOCK.Equals(other.BLOCK) { return false }
+  return true
+}
+
+func (p *BloomFilterAlgorithm) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterAlgorithm(%+v)", *p)
+}
+
+// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
+// algorithm. It uses 64 bits version of xxHash.
+// 
+type XxHash struct {
+}
+
+func NewXxHash() *XxHash {
+  return &XxHash{}
+}
+
+func (p *XxHash) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *XxHash) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "XxHash"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *XxHash) Equals(other *XxHash) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *XxHash) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("XxHash(%+v)", *p)
+}
+
+// The hash function used in Bloom filter. This function takes the hash of a column value
+// using plain encoding.
+// 
+// 
+// Attributes:
+//  - XXHASH: xxHash Strategy. *
+type BloomFilterHash struct {
+  XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"`
+}
+
+func NewBloomFilterHash() *BloomFilterHash {
+  return &BloomFilterHash{}
+}
+
+var BloomFilterHash_XXHASH_DEFAULT *XxHash
+func (p *BloomFilterHash) GetXXHASH() *XxHash {
+  if !p.IsSetXXHASH() {
+    return BloomFilterHash_XXHASH_DEFAULT
+  }
+return p.XXHASH
+}
+func (p *BloomFilterHash) CountSetFieldsBloomFilterHash() int {
+  count := 0
+  if (p.IsSetXXHASH()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *BloomFilterHash) IsSetXXHASH() bool {
+  return p.XXHASH != nil
+}
+
+func (p *BloomFilterHash) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHash)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.XXHASH = &XxHash{}
+  if err := p.XXHASH.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.XXHASH), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHash) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsBloomFilterHash(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterHash"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterHash) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetXXHASH() {
+    if err := oprot.WriteFieldBegin(ctx, "XXHASH", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:XXHASH: ", p), err) }
+    if err := p.XXHASH.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.XXHASH), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:XXHASH: ", p), err) }
+  }
+  return err
+}
+
+func (p *BloomFilterHash) Equals(other *BloomFilterHash) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.XXHASH.Equals(other.XXHASH) { return false }
+  return true
+}
+
+func (p *BloomFilterHash) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterHash(%+v)", *p)
+}
+
+// The compression used in the Bloom filter.
+// 
+type Uncompressed struct {
+}
+
+func NewUncompressed() *Uncompressed {
+  return &Uncompressed{}
+}
+
+func (p *Uncompressed) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *Uncompressed) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "Uncompressed"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *Uncompressed) Equals(other *Uncompressed) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *Uncompressed) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("Uncompressed(%+v)", *p)
+}
+
+// Attributes:
+//  - UNCOMPRESSED
+type BloomFilterCompression struct {
+  UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"`
+}
+
+func NewBloomFilterCompression() *BloomFilterCompression {
+  return &BloomFilterCompression{}
+}
+
+var BloomFilterCompression_UNCOMPRESSED_DEFAULT *Uncompressed
+func (p *BloomFilterCompression) GetUNCOMPRESSED() *Uncompressed {
+  if !p.IsSetUNCOMPRESSED() {
+    return BloomFilterCompression_UNCOMPRESSED_DEFAULT
+  }
+return p.UNCOMPRESSED
+}
+func (p *BloomFilterCompression) CountSetFieldsBloomFilterCompression() int {
+  count := 0
+  if (p.IsSetUNCOMPRESSED()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *BloomFilterCompression) IsSetUNCOMPRESSED() bool {
+  return p.UNCOMPRESSED != nil
+}
+
+func (p *BloomFilterCompression) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterCompression)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.UNCOMPRESSED = &Uncompressed{}
+  if err := p.UNCOMPRESSED.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNCOMPRESSED), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterCompression) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsBloomFilterCompression(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterCompression"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterCompression) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetUNCOMPRESSED() {
+    if err := oprot.WriteFieldBegin(ctx, "UNCOMPRESSED", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:UNCOMPRESSED: ", p), err) }
+    if err := p.UNCOMPRESSED.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNCOMPRESSED), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:UNCOMPRESSED: ", p), err) }
+  }
+  return err
+}
+
+func (p *BloomFilterCompression) Equals(other *BloomFilterCompression) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.UNCOMPRESSED.Equals(other.UNCOMPRESSED) { return false }
+  return true
+}
+
+func (p *BloomFilterCompression) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterCompression(%+v)", *p)
+}
+
+// Bloom filter header is stored at beginning of Bloom filter data of each column
+// and followed by its bitset.
+// 
+// 
+// Attributes:
+//  - NumBytes: The size of bitset in bytes *
+//  - Algorithm: The algorithm for setting bits. *
+//  - Hash: The hash function used for Bloom filter. *
+//  - Compression: The compression used in the Bloom filter *
+type BloomFilterHeader struct {
+  NumBytes int32 `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"`
+  Algorithm *BloomFilterAlgorithm `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"`
+  Hash *BloomFilterHash `thrift:"hash,3,required" db:"hash" json:"hash"`
+  Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"`
+}
+
+func NewBloomFilterHeader() *BloomFilterHeader {
+  return &BloomFilterHeader{}
+}
+
+
+func (p *BloomFilterHeader) GetNumBytes() int32 {
+  return p.NumBytes
+}
+var BloomFilterHeader_Algorithm_DEFAULT *BloomFilterAlgorithm
+func (p *BloomFilterHeader) GetAlgorithm() *BloomFilterAlgorithm {
+  if !p.IsSetAlgorithm() {
+    return BloomFilterHeader_Algorithm_DEFAULT
+  }
+return p.Algorithm
+}
+var BloomFilterHeader_Hash_DEFAULT *BloomFilterHash
+func (p *BloomFilterHeader) GetHash() *BloomFilterHash {
+  if !p.IsSetHash() {
+    return BloomFilterHeader_Hash_DEFAULT
+  }
+return p.Hash
+}
+var BloomFilterHeader_Compression_DEFAULT *BloomFilterCompression
+func (p *BloomFilterHeader) GetCompression() *BloomFilterCompression {
+  if !p.IsSetCompression() {
+    return BloomFilterHeader_Compression_DEFAULT
+  }
+return p.Compression
+}
+func (p *BloomFilterHeader) IsSetAlgorithm() bool {
+  return p.Algorithm != nil
+}
+
+func (p *BloomFilterHeader) IsSetHash() bool {
+  return p.Hash != nil
+}
+
+func (p *BloomFilterHeader) IsSetCompression() bool {
+  return p.Compression != nil
+}
+
+func (p *BloomFilterHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNumBytes bool = false;
+  var issetAlgorithm bool = false;
+  var issetHash bool = false;
+  var issetCompression bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumBytes = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetAlgorithm = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetHash = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetCompression = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNumBytes{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumBytes is not set"));
+  }
+  if !issetAlgorithm{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Algorithm is not set"));
+  }
+  if !issetHash{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Hash is not set"));
+  }
+  if !issetCompression{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Compression is not set"));
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.NumBytes = v
+}
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Algorithm = &BloomFilterAlgorithm{}
+  if err := p.Algorithm.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Algorithm), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Hash = &BloomFilterHash{}
+  if err := p.Hash.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Hash), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Compression = &BloomFilterCompression{}
+  if err := p.Compression.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Compression), err)
+  }
+  return nil
+}
+
+func (p *BloomFilterHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "BloomFilterHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *BloomFilterHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "numBytes", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:numBytes: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.NumBytes)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.numBytes (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:numBytes: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "algorithm", thrift.STRUCT, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:algorithm: ", p), err) }
+  if err := p.Algorithm.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Algorithm), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:algorithm: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "hash", thrift.STRUCT, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:hash: ", p), err) }
+  if err := p.Hash.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Hash), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:hash: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "compression", thrift.STRUCT, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:compression: ", p), err) }
+  if err := p.Compression.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Compression), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:compression: ", p), err) }
+  return err
+}
+
+func (p *BloomFilterHeader) Equals(other *BloomFilterHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.NumBytes != other.NumBytes { return false }
+  if !p.Algorithm.Equals(other.Algorithm) { return false }
+  if !p.Hash.Equals(other.Hash) { return false }
+  if !p.Compression.Equals(other.Compression) { return false }
+  return true
+}
+
+func (p *BloomFilterHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("BloomFilterHeader(%+v)", *p)
+}
+
+// Attributes:
+//  - Type: the type of the page: indicates which of the *_header fields is set *
+//  - UncompressedPageSize: Uncompressed page size in bytes (not including this header) *
+//  - CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header *
+//  - Crc: The 32bit CRC for the page, to be be calculated as follows:
+// - Using the standard CRC32 algorithm
+// - On the data only, i.e. this header should not be included. 'Data'
+//   hereby refers to the concatenation of the repetition levels, the
+//   definition levels and the column value, in this exact order.
+// - On the encoded versions of the repetition levels, definition levels and
+//   column values
+// - On the compressed versions of the repetition levels, definition levels
+//   and column values where possible;
+//   - For v1 data pages, the repetition levels, definition levels and column
+//     values are always compressed together. If a compression scheme is
+//     specified, the CRC shall be calculated on the compressed version of
+//     this concatenation. If no compression scheme is specified, the CRC
+//     shall be calculated on the uncompressed version of this concatenation.
+//   - For v2 data pages, the repetition levels and definition levels are
+//     handled separately from the data and are never compressed (only
+//     encoded). If a compression scheme is specified, the CRC shall be
+//     calculated on the concatenation of the uncompressed repetition levels,
+//     uncompressed definition levels and the compressed column values.
+//     If no compression scheme is specified, the CRC shall be calculated on
+//     the uncompressed concatenation.
+// If enabled, this allows for disabling checksumming in HDFS if only a few
+// pages need to be read.
+// 
+//  - DataPageHeader
+//  - IndexPageHeader
+//  - DictionaryPageHeader
+//  - DataPageHeaderV2
+type PageHeader struct {
+  Type PageType `thrift:"type,1,required" db:"type" json:"type"`
+  UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"`
+  CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"`
+  Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"`
+  DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"`
+  IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"`
+  DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"`
+  DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"`
+}
+
+func NewPageHeader() *PageHeader {
+  return &PageHeader{}
+}
+
+
+func (p *PageHeader) GetType() PageType {
+  return p.Type
+}
+
+func (p *PageHeader) GetUncompressedPageSize() int32 {
+  return p.UncompressedPageSize
+}
+
+func (p *PageHeader) GetCompressedPageSize() int32 {
+  return p.CompressedPageSize
+}
+var PageHeader_Crc_DEFAULT int32
+func (p *PageHeader) GetCrc() int32 {
+  if !p.IsSetCrc() {
+    return PageHeader_Crc_DEFAULT
+  }
+return *p.Crc
+}
+var PageHeader_DataPageHeader_DEFAULT *DataPageHeader
+func (p *PageHeader) GetDataPageHeader() *DataPageHeader {
+  if !p.IsSetDataPageHeader() {
+    return PageHeader_DataPageHeader_DEFAULT
+  }
+return p.DataPageHeader
+}
+var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader
+func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader {
+  if !p.IsSetIndexPageHeader() {
+    return PageHeader_IndexPageHeader_DEFAULT
+  }
+return p.IndexPageHeader
+}
+var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader
+func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader {
+  if !p.IsSetDictionaryPageHeader() {
+    return PageHeader_DictionaryPageHeader_DEFAULT
+  }
+return p.DictionaryPageHeader
+}
+var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2
+func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2 {
+  if !p.IsSetDataPageHeaderV2() {
+    return PageHeader_DataPageHeaderV2_DEFAULT
+  }
+return p.DataPageHeaderV2
+}
+func (p *PageHeader) IsSetCrc() bool {
+  return p.Crc != nil
+}
+
+func (p *PageHeader) IsSetDataPageHeader() bool {
+  return p.DataPageHeader != nil
+}
+
+func (p *PageHeader) IsSetIndexPageHeader() bool {
+  return p.IndexPageHeader != nil
+}
+
+func (p *PageHeader) IsSetDictionaryPageHeader() bool {
+  return p.DictionaryPageHeader != nil
+}
+
+func (p *PageHeader) IsSetDataPageHeaderV2() bool {
+  return p.DataPageHeaderV2 != nil
+}
+
+func (p *PageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetType bool = false;
+  var issetUncompressedPageSize bool = false;
+  var issetCompressedPageSize bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetType = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetUncompressedPageSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetCompressedPageSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetType{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set"));
+  }
+  if !issetUncompressedPageSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set"));
+  }
+  if !issetCompressedPageSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set"));
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := PageType(v)
+  p.Type = temp
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.UncompressedPageSize = v
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.CompressedPageSize = v
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.Crc = &v
+}
+  return nil
+}
+
+func (p *PageHeader)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DataPageHeader = &DataPageHeader{}
+  if err := p.DataPageHeader.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err)
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  p.IndexPageHeader = &IndexPageHeader{}
+  if err := p.IndexPageHeader.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err)
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DictionaryPageHeader = &DictionaryPageHeader{}
+  if err := p.DictionaryPageHeader.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err)
+  }
+  return nil
+}
+
+func (p *PageHeader)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.DataPageHeaderV2 = &DataPageHeaderV2{
+  IsCompressed: true,
+}
+  if err := p.DataPageHeaderV2.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err)
+  }
+  return nil
+}
+
+func (p *PageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "PageHeader"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *PageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) }
+  return err
+}
+
+func (p *PageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "uncompressed_page_size", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.UncompressedPageSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) }
+  return err
+}
+
+func (p *PageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) }
+  return err
+}
+
+func (p *PageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetCrc() {
+    if err := oprot.WriteFieldBegin(ctx, "crc", thrift.I32, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.Crc)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDataPageHeader() {
+    if err := oprot.WriteFieldBegin(ctx, "data_page_header", thrift.STRUCT, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) }
+    if err := p.DataPageHeader.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIndexPageHeader() {
+    if err := oprot.WriteFieldBegin(ctx, "index_page_header", thrift.STRUCT, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) }
+    if err := p.IndexPageHeader.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDictionaryPageHeader() {
+    if err := oprot.WriteFieldBegin(ctx, "dictionary_page_header", thrift.STRUCT, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) }
+    if err := p.DictionaryPageHeader.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDataPageHeaderV2() {
+    if err := oprot.WriteFieldBegin(ctx, "data_page_header_v2", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) }
+    if err := p.DataPageHeaderV2.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) }
+  }
+  return err
+}
+
+func (p *PageHeader) Equals(other *PageHeader) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Type != other.Type { return false }
+  if p.UncompressedPageSize != other.UncompressedPageSize { return false }
+  if p.CompressedPageSize != other.CompressedPageSize { return false }
+  if p.Crc != other.Crc {
+    if p.Crc == nil || other.Crc == nil {
+      return false
+    }
+    if (*p.Crc) != (*other.Crc) { return false }
+  }
+  if !p.DataPageHeader.Equals(other.DataPageHeader) { return false }
+  if !p.IndexPageHeader.Equals(other.IndexPageHeader) { return false }
+  if !p.DictionaryPageHeader.Equals(other.DictionaryPageHeader) { return false }
+  if !p.DataPageHeaderV2.Equals(other.DataPageHeaderV2) { return false }
+  return true
+}
+
+func (p *PageHeader) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("PageHeader(%+v)", *p)
+}
+
+// Wrapper struct to store key values
+// 
+// Attributes:
+//  - Key
+//  - Value
+type KeyValue struct {
+  Key string `thrift:"key,1,required" db:"key" json:"key"`
+  Value *string `thrift:"value,2" db:"value" json:"value,omitempty"`
+}
+
+func NewKeyValue() *KeyValue {
+  return &KeyValue{}
+}
+
+
+func (p *KeyValue) GetKey() string {
+  return p.Key
+}
+var KeyValue_Value_DEFAULT string
+func (p *KeyValue) GetValue() string {
+  if !p.IsSetValue() {
+    return KeyValue_Value_DEFAULT
+  }
+return *p.Value
+}
+func (p *KeyValue) IsSetValue() bool {
+  return p.Value != nil
+}
+
+func (p *KeyValue) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetKey bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetKey = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetKey{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set"));
+  }
+  return nil
+}
+
+func (p *KeyValue)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Key = v
+}
+  return nil
+}
+
+func (p *KeyValue)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Value = &v
+}
+  return nil
+}
+
+func (p *KeyValue) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "KeyValue"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *KeyValue) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "key", thrift.STRING, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) }
+  if err := oprot.WriteString(ctx, string(p.Key)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) }
+  return err
+}
+
+func (p *KeyValue) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetValue() {
+    if err := oprot.WriteFieldBegin(ctx, "value", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) }
+    if err := oprot.WriteString(ctx, string(*p.Value)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) }
+  }
+  return err
+}
+
+func (p *KeyValue) Equals(other *KeyValue) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Key != other.Key { return false }
+  if p.Value != other.Value {
+    if p.Value == nil || other.Value == nil {
+      return false
+    }
+    if (*p.Value) != (*other.Value) { return false }
+  }
+  return true
+}
+
+func (p *KeyValue) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("KeyValue(%+v)", *p)
+}
+
+// Wrapper struct to specify sort order
+// 
+// Attributes:
+//  - ColumnIdx: The column index (in this row group) *
+//  - Descending: If true, indicates this column is sorted in descending order. *
+//  - NullsFirst: If true, nulls will come before non-null values, otherwise,
+// nulls go at the end.
+type SortingColumn struct {
+  ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"`
+  Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"`
+  NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"`
+}
+
+func NewSortingColumn() *SortingColumn {
+  return &SortingColumn{}
+}
+
+
+func (p *SortingColumn) GetColumnIdx() int32 {
+  return p.ColumnIdx
+}
+
+func (p *SortingColumn) GetDescending() bool {
+  return p.Descending
+}
+
+func (p *SortingColumn) GetNullsFirst() bool {
+  return p.NullsFirst
+}
+func (p *SortingColumn) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetColumnIdx bool = false;
+  var issetDescending bool = false;
+  var issetNullsFirst bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetColumnIdx = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetDescending = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNullsFirst = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetColumnIdx{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set"));
+  }
+  if !issetDescending{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set"));
+  }
+  if !issetNullsFirst{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set"));
+  }
+  return nil
+}
+
+func (p *SortingColumn)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.ColumnIdx = v
+}
+  return nil
+}
+
+func (p *SortingColumn)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.Descending = v
+}
+  return nil
+}
+
+func (p *SortingColumn)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NullsFirst = v
+}
+  return nil
+}
+
+func (p *SortingColumn) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "SortingColumn"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *SortingColumn) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "column_idx", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.ColumnIdx)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) }
+  return err
+}
+
+func (p *SortingColumn) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "descending", thrift.BOOL, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.Descending)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) }
+  return err
+}
+
+func (p *SortingColumn) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "nulls_first", thrift.BOOL, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) }
+  if err := oprot.WriteBool(ctx, bool(p.NullsFirst)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) }
+  return err
+}
+
+func (p *SortingColumn) Equals(other *SortingColumn) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.ColumnIdx != other.ColumnIdx { return false }
+  if p.Descending != other.Descending { return false }
+  if p.NullsFirst != other.NullsFirst { return false }
+  return true
+}
+
+func (p *SortingColumn) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("SortingColumn(%+v)", *p)
+}
+
+// statistics of a given page type and encoding
+// 
+// Attributes:
+//  - PageType: the page type (data/dic/...) *
+//  - Encoding: encoding of the page *
+//  - Count: number of pages of this type with this encoding *
+type PageEncodingStats struct {
+  PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"`
+  Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"`
+  Count int32 `thrift:"count,3,required" db:"count" json:"count"`
+}
+
+func NewPageEncodingStats() *PageEncodingStats {
+  return &PageEncodingStats{}
+}
+
+
+func (p *PageEncodingStats) GetPageType() PageType {
+  return p.PageType
+}
+
+func (p *PageEncodingStats) GetEncoding() Encoding {
+  return p.Encoding
+}
+
+func (p *PageEncodingStats) GetCount() int32 {
+  return p.Count
+}
+func (p *PageEncodingStats) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetPageType bool = false;
+  var issetEncoding bool = false;
+  var issetCount bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetPageType = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncoding = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetCount = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetPageType{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set"));
+  }
+  if !issetEncoding{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set"));
+  }
+  if !issetCount{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set"));
+  }
+  return nil
+}
+
+func (p *PageEncodingStats)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := PageType(v)
+  p.PageType = temp
+}
+  return nil
+}
+
+func (p *PageEncodingStats)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  temp := Encoding(v)
+  p.Encoding = temp
+}
+  return nil
+}
+
+func (p *PageEncodingStats)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.Count = v
+}
+  return nil
+}
+
+func (p *PageEncodingStats) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "PageEncodingStats"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *PageEncodingStats) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "page_type", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.PageType)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) }
+  return err
+}
+
+func (p *PageEncodingStats) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) }
+  return err
+}
+
+func (p *PageEncodingStats) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "count", thrift.I32, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Count)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) }
+  return err
+}
+
+func (p *PageEncodingStats) Equals(other *PageEncodingStats) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.PageType != other.PageType { return false }
+  if p.Encoding != other.Encoding { return false }
+  if p.Count != other.Count { return false }
+  return true
+}
+
+func (p *PageEncodingStats) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("PageEncodingStats(%+v)", *p)
+}
+
+// Description for column metadata
+// 
+// Attributes:
+//  - Type: Type of this column *
+//  - Encodings: Set of all encodings used for this column. The purpose is to validate
+// whether we can decode those pages. *
+//  - PathInSchema: Path in schema *
+//  - Codec: Compression codec *
+//  - NumValues: Number of values in this column *
+//  - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) *
+//  - TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages
+// in this column chunk (including the headers) *
+//  - KeyValueMetadata: Optional key/value metadata *
+//  - DataPageOffset: Byte offset from beginning of file to first data page *
+//  - IndexPageOffset: Byte offset from beginning of file to root index page *
+//  - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page *
+//  - Statistics: optional statistics for this column chunk
+//  - EncodingStats: Set of all encodings used for pages in this column chunk.
+// This information can be used to determine if all data pages are
+// dictionary encoded for example *
+//  - BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. *
+type ColumnMetaData struct {
+  Type Type `thrift:"type,1,required" db:"type" json:"type"`
+  Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"`
+  PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"`
+  Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"`
+  NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"`
+  TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"`
+  TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"`
+  KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
+  DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"`
+  IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"`
+  DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"`
+  Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"`
+  EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"`
+  BloomFilterOffset *int64 `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"`
+}
+
+func NewColumnMetaData() *ColumnMetaData {
+  return &ColumnMetaData{}
+}
+
+
+func (p *ColumnMetaData) GetType() Type {
+  return p.Type
+}
+
+func (p *ColumnMetaData) GetEncodings() []Encoding {
+  return p.Encodings
+}
+
+func (p *ColumnMetaData) GetPathInSchema() []string {
+  return p.PathInSchema
+}
+
+func (p *ColumnMetaData) GetCodec() CompressionCodec {
+  return p.Codec
+}
+
+func (p *ColumnMetaData) GetNumValues() int64 {
+  return p.NumValues
+}
+
+func (p *ColumnMetaData) GetTotalUncompressedSize() int64 {
+  return p.TotalUncompressedSize
+}
+
+func (p *ColumnMetaData) GetTotalCompressedSize() int64 {
+  return p.TotalCompressedSize
+}
+var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue
+
+func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue {
+  return p.KeyValueMetadata
+}
+
+func (p *ColumnMetaData) GetDataPageOffset() int64 {
+  return p.DataPageOffset
+}
+var ColumnMetaData_IndexPageOffset_DEFAULT int64
+func (p *ColumnMetaData) GetIndexPageOffset() int64 {
+  if !p.IsSetIndexPageOffset() {
+    return ColumnMetaData_IndexPageOffset_DEFAULT
+  }
+return *p.IndexPageOffset
+}
+var ColumnMetaData_DictionaryPageOffset_DEFAULT int64
+func (p *ColumnMetaData) GetDictionaryPageOffset() int64 {
+  if !p.IsSetDictionaryPageOffset() {
+    return ColumnMetaData_DictionaryPageOffset_DEFAULT
+  }
+return *p.DictionaryPageOffset
+}
+var ColumnMetaData_Statistics_DEFAULT *Statistics
+func (p *ColumnMetaData) GetStatistics() *Statistics {
+  if !p.IsSetStatistics() {
+    return ColumnMetaData_Statistics_DEFAULT
+  }
+return p.Statistics
+}
+var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats
+
+func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats {
+  return p.EncodingStats
+}
+var ColumnMetaData_BloomFilterOffset_DEFAULT int64
+func (p *ColumnMetaData) GetBloomFilterOffset() int64 {
+  if !p.IsSetBloomFilterOffset() {
+    return ColumnMetaData_BloomFilterOffset_DEFAULT
+  }
+return *p.BloomFilterOffset
+}
+func (p *ColumnMetaData) IsSetKeyValueMetadata() bool {
+  return p.KeyValueMetadata != nil
+}
+
+func (p *ColumnMetaData) IsSetIndexPageOffset() bool {
+  return p.IndexPageOffset != nil
+}
+
+func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool {
+  return p.DictionaryPageOffset != nil
+}
+
+func (p *ColumnMetaData) IsSetStatistics() bool {
+  return p.Statistics != nil
+}
+
+func (p *ColumnMetaData) IsSetEncodingStats() bool {
+  return p.EncodingStats != nil
+}
+
+func (p *ColumnMetaData) IsSetBloomFilterOffset() bool {
+  return p.BloomFilterOffset != nil
+}
+
+func (p *ColumnMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetType bool = false;
+  var issetEncodings bool = false;
+  var issetPathInSchema bool = false;
+  var issetCodec bool = false;
+  var issetNumValues bool = false;
+  var issetTotalUncompressedSize bool = false;
+  var issetTotalCompressedSize bool = false;
+  var issetDataPageOffset bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetType = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncodings = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetPathInSchema = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetCodec = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+        issetTotalUncompressedSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+        issetTotalCompressedSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+        issetDataPageOffset = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 10:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField10(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 11:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField11(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 12:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField12(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 13:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField13(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 14:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField14(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetType{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set"));
+  }
+  if !issetEncodings{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set"));
+  }
+  if !issetPathInSchema{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set"));
+  }
+  if !issetCodec{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set"));
+  }
+  if !issetNumValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set"));
+  }
+  if !issetTotalUncompressedSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set"));
+  }
+  if !issetTotalCompressedSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set"));
+  }
+  if !issetDataPageOffset{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set"));
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  temp := Type(v)
+  p.Type = temp
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]Encoding, 0, size)
+  p.Encodings =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem0 Encoding
+    if v, err := iprot.ReadI32(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    temp := Encoding(v)
+    _elem0 = temp
+}
+    p.Encodings = append(p.Encodings, _elem0)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]string, 0, size)
+  p.PathInSchema =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem1 string
+    if v, err := iprot.ReadString(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem1 = v
+}
+    p.PathInSchema = append(p.PathInSchema, _elem1)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := CompressionCodec(v)
+  p.Codec = temp
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.NumValues = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.TotalUncompressedSize = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.TotalCompressedSize = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*KeyValue, 0, size)
+  p.KeyValueMetadata =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem2 := &KeyValue{}
+    if err := _elem2.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err)
+    }
+    p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.DataPageOffset = v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField10(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 10: ", err)
+} else {
+  p.IndexPageOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField11(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 11: ", err)
+} else {
+  p.DictionaryPageOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField12(ctx context.Context, iprot thrift.TProtocol) error {
+  p.Statistics = &Statistics{}
+  if err := p.Statistics.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField13(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*PageEncodingStats, 0, size)
+  p.EncodingStats =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem3 := &PageEncodingStats{}
+    if err := _elem3.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err)
+    }
+    p.EncodingStats = append(p.EncodingStats, _elem3)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnMetaData)  ReadField14(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 14: ", err)
+} else {
+  p.BloomFilterOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ColumnMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+    if err := p.writeField10(ctx, oprot); err != nil { return err }
+    if err := p.writeField11(ctx, oprot); err != nil { return err }
+    if err := p.writeField12(ctx, oprot); err != nil { return err }
+    if err := p.writeField13(ctx, oprot); err != nil { return err }
+    if err := p.writeField14(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encodings", thrift.LIST, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.I32, len(p.Encodings)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.Encodings {
+    if err := oprot.WriteI32(ctx, int32(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.PathInSchema {
+    if err := oprot.WriteString(ctx, string(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "codec", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Codec)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I64, 5); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.NumValues)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "total_uncompressed_size", thrift.I64, 6); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.TotalUncompressedSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 7); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.TotalCompressedSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyValueMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.KeyValueMetadata {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "data_page_offset", thrift.I64, 9); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.DataPageOffset)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) }
+  return err
+}
+
+func (p *ColumnMetaData) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetIndexPageOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "index_page_offset", thrift.I64, 10); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.IndexPageOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetDictionaryPageOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "dictionary_page_offset", thrift.I64, 11); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.DictionaryPageOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetStatistics() {
+    if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 12); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) }
+    if err := p.Statistics.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetEncodingStats() {
+    if err := oprot.WriteFieldBegin(ctx, "encoding_stats", thrift.LIST, 13); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.EncodingStats)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.EncodingStats {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetBloomFilterOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "bloom_filter_offset", thrift.I64, 14); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:bloom_filter_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.BloomFilterOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.bloom_filter_offset (14) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 14:bloom_filter_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnMetaData) Equals(other *ColumnMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Type != other.Type { return false }
+  if len(p.Encodings) != len(other.Encodings) { return false }
+  for i, _tgt := range p.Encodings {
+    _src4 := other.Encodings[i]
+    if _tgt != _src4 { return false }
+  }
+  if len(p.PathInSchema) != len(other.PathInSchema) { return false }
+  for i, _tgt := range p.PathInSchema {
+    _src5 := other.PathInSchema[i]
+    if _tgt != _src5 { return false }
+  }
+  if p.Codec != other.Codec { return false }
+  if p.NumValues != other.NumValues { return false }
+  if p.TotalUncompressedSize != other.TotalUncompressedSize { return false }
+  if p.TotalCompressedSize != other.TotalCompressedSize { return false }
+  if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false }
+  for i, _tgt := range p.KeyValueMetadata {
+    _src6 := other.KeyValueMetadata[i]
+    if !_tgt.Equals(_src6) { return false }
+  }
+  if p.DataPageOffset != other.DataPageOffset { return false }
+  if p.IndexPageOffset != other.IndexPageOffset {
+    if p.IndexPageOffset == nil || other.IndexPageOffset == nil {
+      return false
+    }
+    if (*p.IndexPageOffset) != (*other.IndexPageOffset) { return false }
+  }
+  if p.DictionaryPageOffset != other.DictionaryPageOffset {
+    if p.DictionaryPageOffset == nil || other.DictionaryPageOffset == nil {
+      return false
+    }
+    if (*p.DictionaryPageOffset) != (*other.DictionaryPageOffset) { return false }
+  }
+  if !p.Statistics.Equals(other.Statistics) { return false }
+  if len(p.EncodingStats) != len(other.EncodingStats) { return false }
+  for i, _tgt := range p.EncodingStats {
+    _src7 := other.EncodingStats[i]
+    if !_tgt.Equals(_src7) { return false }
+  }
+  if p.BloomFilterOffset != other.BloomFilterOffset {
+    if p.BloomFilterOffset == nil || other.BloomFilterOffset == nil {
+      return false
+    }
+    if (*p.BloomFilterOffset) != (*other.BloomFilterOffset) { return false }
+  }
+  return true
+}
+
+func (p *ColumnMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnMetaData(%+v)", *p)
+}
+
+type EncryptionWithFooterKey struct {
+}
+
+func NewEncryptionWithFooterKey() *EncryptionWithFooterKey {
+  return &EncryptionWithFooterKey{}
+}
+
+func (p *EncryptionWithFooterKey) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *EncryptionWithFooterKey) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "EncryptionWithFooterKey"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EncryptionWithFooterKey) Equals(other *EncryptionWithFooterKey) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *EncryptionWithFooterKey) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EncryptionWithFooterKey(%+v)", *p)
+}
+
+// Attributes:
+//  - PathInSchema: Column path in schema *
+//  - KeyMetadata: Retrieval metadata of column encryption key *
+type EncryptionWithColumnKey struct {
+  PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"`
+  KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"`
+}
+
+func NewEncryptionWithColumnKey() *EncryptionWithColumnKey {
+  return &EncryptionWithColumnKey{}
+}
+
+
+func (p *EncryptionWithColumnKey) GetPathInSchema() []string {
+  return p.PathInSchema
+}
+var EncryptionWithColumnKey_KeyMetadata_DEFAULT []byte
+
+func (p *EncryptionWithColumnKey) GetKeyMetadata() []byte {
+  return p.KeyMetadata
+}
+func (p *EncryptionWithColumnKey) IsSetKeyMetadata() bool {
+  return p.KeyMetadata != nil
+}
+
+func (p *EncryptionWithColumnKey) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetPathInSchema bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetPathInSchema = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetPathInSchema{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set"));
+  }
+  return nil
+}
+
+func (p *EncryptionWithColumnKey)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]string, 0, size)
+  p.PathInSchema =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem8 string
+    if v, err := iprot.ReadString(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem8 = v
+}
+    p.PathInSchema = append(p.PathInSchema, _elem8)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *EncryptionWithColumnKey)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.KeyMetadata = v
+}
+  return nil
+}
+
+func (p *EncryptionWithColumnKey) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "EncryptionWithColumnKey"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EncryptionWithColumnKey) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:path_in_schema: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.PathInSchema {
+    if err := oprot.WriteString(ctx, string(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:path_in_schema: ", p), err) }
+  return err
+}
+
+func (p *EncryptionWithColumnKey) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *EncryptionWithColumnKey) Equals(other *EncryptionWithColumnKey) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.PathInSchema) != len(other.PathInSchema) { return false }
+  for i, _tgt := range p.PathInSchema {
+    _src9 := other.PathInSchema[i]
+    if _tgt != _src9 { return false }
+  }
+  if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false }
+  return true
+}
+
+func (p *EncryptionWithColumnKey) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EncryptionWithColumnKey(%+v)", *p)
+}
+
+// Attributes:
+//  - ENCRYPTION_WITH_FOOTER_KEY
+//  - ENCRYPTION_WITH_COLUMN_KEY
+type ColumnCryptoMetaData struct {
+  ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"`
+  ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"`
+}
+
+func NewColumnCryptoMetaData() *ColumnCryptoMetaData {
+  return &ColumnCryptoMetaData{}
+}
+
+var ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT *EncryptionWithFooterKey
+func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_FOOTER_KEY() *EncryptionWithFooterKey {
+  if !p.IsSetENCRYPTION_WITH_FOOTER_KEY() {
+    return ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT
+  }
+return p.ENCRYPTION_WITH_FOOTER_KEY
+}
+var ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT *EncryptionWithColumnKey
+func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_COLUMN_KEY() *EncryptionWithColumnKey {
+  if !p.IsSetENCRYPTION_WITH_COLUMN_KEY() {
+    return ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT
+  }
+return p.ENCRYPTION_WITH_COLUMN_KEY
+}
+func (p *ColumnCryptoMetaData) CountSetFieldsColumnCryptoMetaData() int {
+  count := 0
+  if (p.IsSetENCRYPTION_WITH_FOOTER_KEY()) {
+    count++
+  }
+  if (p.IsSetENCRYPTION_WITH_COLUMN_KEY()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_FOOTER_KEY() bool {
+  return p.ENCRYPTION_WITH_FOOTER_KEY != nil
+}
+
+func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_COLUMN_KEY() bool {
+  return p.ENCRYPTION_WITH_COLUMN_KEY != nil
+}
+
+func (p *ColumnCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.ENCRYPTION_WITH_FOOTER_KEY = &EncryptionWithFooterKey{}
+  if err := p.ENCRYPTION_WITH_FOOTER_KEY.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err)
+  }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.ENCRYPTION_WITH_COLUMN_KEY = &EncryptionWithColumnKey{}
+  if err := p.ENCRYPTION_WITH_COLUMN_KEY.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err)
+  }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsColumnCryptoMetaData(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "ColumnCryptoMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetENCRYPTION_WITH_FOOTER_KEY() {
+    if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_FOOTER_KEY", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) }
+    if err := p.ENCRYPTION_WITH_FOOTER_KEY.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetENCRYPTION_WITH_COLUMN_KEY() {
+    if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_COLUMN_KEY", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) }
+    if err := p.ENCRYPTION_WITH_COLUMN_KEY.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnCryptoMetaData) Equals(other *ColumnCryptoMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.ENCRYPTION_WITH_FOOTER_KEY.Equals(other.ENCRYPTION_WITH_FOOTER_KEY) { return false }
+  if !p.ENCRYPTION_WITH_COLUMN_KEY.Equals(other.ENCRYPTION_WITH_COLUMN_KEY) { return false }
+  return true
+}
+
+func (p *ColumnCryptoMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnCryptoMetaData(%+v)", *p)
+}
+
+// Attributes:
+//  - FilePath: File where column data is stored.  If not set, assumed to be same file as
+// metadata.  This path is relative to the current file.
+// 
+//  - FileOffset: Byte offset in file_path to the ColumnMetaData *
+//  - MetaData: Column metadata for this chunk. This is the same content as what is at
+// file_path/file_offset.  Having it here has it replicated in the file
+// metadata.
+// 
+//  - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex *
+//  - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes *
+//  - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex *
+//  - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes *
+//  - CryptoMetadata: Crypto metadata of encrypted columns *
+//  - EncryptedColumnMetadata: Encrypted column metadata for this chunk *
+type ColumnChunk struct {
+  FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"`
+  FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"`
+  MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"`
+  OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"`
+  OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"`
+  ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"`
+  ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"`
+  CryptoMetadata *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"`
+  EncryptedColumnMetadata []byte `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"`
+}
+
+func NewColumnChunk() *ColumnChunk {
+  return &ColumnChunk{}
+}
+
+var ColumnChunk_FilePath_DEFAULT string
+func (p *ColumnChunk) GetFilePath() string {
+  if !p.IsSetFilePath() {
+    return ColumnChunk_FilePath_DEFAULT
+  }
+return *p.FilePath
+}
+
+func (p *ColumnChunk) GetFileOffset() int64 {
+  return p.FileOffset
+}
+var ColumnChunk_MetaData_DEFAULT *ColumnMetaData
+func (p *ColumnChunk) GetMetaData() *ColumnMetaData {
+  if !p.IsSetMetaData() {
+    return ColumnChunk_MetaData_DEFAULT
+  }
+return p.MetaData
+}
+var ColumnChunk_OffsetIndexOffset_DEFAULT int64
+func (p *ColumnChunk) GetOffsetIndexOffset() int64 {
+  if !p.IsSetOffsetIndexOffset() {
+    return ColumnChunk_OffsetIndexOffset_DEFAULT
+  }
+return *p.OffsetIndexOffset
+}
+var ColumnChunk_OffsetIndexLength_DEFAULT int32
+func (p *ColumnChunk) GetOffsetIndexLength() int32 {
+  if !p.IsSetOffsetIndexLength() {
+    return ColumnChunk_OffsetIndexLength_DEFAULT
+  }
+return *p.OffsetIndexLength
+}
+var ColumnChunk_ColumnIndexOffset_DEFAULT int64
+func (p *ColumnChunk) GetColumnIndexOffset() int64 {
+  if !p.IsSetColumnIndexOffset() {
+    return ColumnChunk_ColumnIndexOffset_DEFAULT
+  }
+return *p.ColumnIndexOffset
+}
+var ColumnChunk_ColumnIndexLength_DEFAULT int32
+func (p *ColumnChunk) GetColumnIndexLength() int32 {
+  if !p.IsSetColumnIndexLength() {
+    return ColumnChunk_ColumnIndexLength_DEFAULT
+  }
+return *p.ColumnIndexLength
+}
+var ColumnChunk_CryptoMetadata_DEFAULT *ColumnCryptoMetaData
+func (p *ColumnChunk) GetCryptoMetadata() *ColumnCryptoMetaData {
+  if !p.IsSetCryptoMetadata() {
+    return ColumnChunk_CryptoMetadata_DEFAULT
+  }
+return p.CryptoMetadata
+}
+var ColumnChunk_EncryptedColumnMetadata_DEFAULT []byte
+
+func (p *ColumnChunk) GetEncryptedColumnMetadata() []byte {
+  return p.EncryptedColumnMetadata
+}
+func (p *ColumnChunk) IsSetFilePath() bool {
+  return p.FilePath != nil
+}
+
+func (p *ColumnChunk) IsSetMetaData() bool {
+  return p.MetaData != nil
+}
+
+func (p *ColumnChunk) IsSetOffsetIndexOffset() bool {
+  return p.OffsetIndexOffset != nil
+}
+
+func (p *ColumnChunk) IsSetOffsetIndexLength() bool {
+  return p.OffsetIndexLength != nil
+}
+
+func (p *ColumnChunk) IsSetColumnIndexOffset() bool {
+  return p.ColumnIndexOffset != nil
+}
+
+func (p *ColumnChunk) IsSetColumnIndexLength() bool {
+  return p.ColumnIndexLength != nil
+}
+
+func (p *ColumnChunk) IsSetCryptoMetadata() bool {
+  return p.CryptoMetadata != nil
+}
+
+func (p *ColumnChunk) IsSetEncryptedColumnMetadata() bool {
+  return p.EncryptedColumnMetadata != nil
+}
+
+func (p *ColumnChunk) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetFileOffset bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetFileOffset = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetFileOffset{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set"));
+  }
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.FilePath = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.FileOffset = v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  p.MetaData = &ColumnMetaData{}
+  if err := p.MetaData.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err)
+  }
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  p.OffsetIndexOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.OffsetIndexLength = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.ColumnIndexOffset = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.ColumnIndexLength = &v
+}
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.CryptoMetadata = &ColumnCryptoMetaData{}
+  if err := p.CryptoMetadata.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.CryptoMetadata), err)
+  }
+  return nil
+}
+
+func (p *ColumnChunk)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.EncryptedColumnMetadata = v
+}
+  return nil
+}
+
+func (p *ColumnChunk) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ColumnChunk"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnChunk) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFilePath() {
+    if err := oprot.WriteFieldBegin(ctx, "file_path", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) }
+    if err := oprot.WriteString(ctx, string(*p.FilePath)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.FileOffset)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) }
+  return err
+}
+
+func (p *ColumnChunk) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetMetaData() {
+    if err := oprot.WriteFieldBegin(ctx, "meta_data", thrift.STRUCT, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) }
+    if err := p.MetaData.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetOffsetIndexOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "offset_index_offset", thrift.I64, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.OffsetIndexOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetOffsetIndexLength() {
+    if err := oprot.WriteFieldBegin(ctx, "offset_index_length", thrift.I32, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.OffsetIndexLength)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetColumnIndexOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "column_index_offset", thrift.I64, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.ColumnIndexOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetColumnIndexLength() {
+    if err := oprot.WriteFieldBegin(ctx, "column_index_length", thrift.I32, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) }
+    if err := oprot.WriteI32(ctx, int32(*p.ColumnIndexLength)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetCryptoMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "crypto_metadata", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:crypto_metadata: ", p), err) }
+    if err := p.CryptoMetadata.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.CryptoMetadata), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:crypto_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetEncryptedColumnMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "encrypted_column_metadata", thrift.STRING, 9); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:encrypted_column_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.EncryptedColumnMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.encrypted_column_metadata (9) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 9:encrypted_column_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnChunk) Equals(other *ColumnChunk) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.FilePath != other.FilePath {
+    if p.FilePath == nil || other.FilePath == nil {
+      return false
+    }
+    if (*p.FilePath) != (*other.FilePath) { return false }
+  }
+  if p.FileOffset != other.FileOffset { return false }
+  if !p.MetaData.Equals(other.MetaData) { return false }
+  if p.OffsetIndexOffset != other.OffsetIndexOffset {
+    if p.OffsetIndexOffset == nil || other.OffsetIndexOffset == nil {
+      return false
+    }
+    if (*p.OffsetIndexOffset) != (*other.OffsetIndexOffset) { return false }
+  }
+  if p.OffsetIndexLength != other.OffsetIndexLength {
+    if p.OffsetIndexLength == nil || other.OffsetIndexLength == nil {
+      return false
+    }
+    if (*p.OffsetIndexLength) != (*other.OffsetIndexLength) { return false }
+  }
+  if p.ColumnIndexOffset != other.ColumnIndexOffset {
+    if p.ColumnIndexOffset == nil || other.ColumnIndexOffset == nil {
+      return false
+    }
+    if (*p.ColumnIndexOffset) != (*other.ColumnIndexOffset) { return false }
+  }
+  if p.ColumnIndexLength != other.ColumnIndexLength {
+    if p.ColumnIndexLength == nil || other.ColumnIndexLength == nil {
+      return false
+    }
+    if (*p.ColumnIndexLength) != (*other.ColumnIndexLength) { return false }
+  }
+  if !p.CryptoMetadata.Equals(other.CryptoMetadata) { return false }
+  if bytes.Compare(p.EncryptedColumnMetadata, other.EncryptedColumnMetadata) != 0 { return false }
+  return true
+}
+
+func (p *ColumnChunk) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnChunk(%+v)", *p)
+}
+
+// Attributes:
+//  - Columns: Metadata for each column chunk in this row group.
+// This list must have the same order as the SchemaElement list in FileMetaData.
+// 
+//  - TotalByteSize: Total byte size of all the uncompressed column data in this row group *
+//  - NumRows: Number of rows in this row group *
+//  - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup.
+// The sorting columns can be a subset of all the columns.
+//  - FileOffset: Byte offset from beginning of file to first page (data or dictionary)
+// in this row group *
+//  - TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data
+// in this row group *
+//  - Ordinal: Row group ordinal in the file *
+type RowGroup struct {
+  Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"`
+  TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"`
+  NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
+  SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"`
+  FileOffset *int64 `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"`
+  TotalCompressedSize *int64 `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"`
+  Ordinal *int16 `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"`
+}
+
+func NewRowGroup() *RowGroup {
+  return &RowGroup{}
+}
+
+
+func (p *RowGroup) GetColumns() []*ColumnChunk {
+  return p.Columns
+}
+
+func (p *RowGroup) GetTotalByteSize() int64 {
+  return p.TotalByteSize
+}
+
+func (p *RowGroup) GetNumRows() int64 {
+  return p.NumRows
+}
+var RowGroup_SortingColumns_DEFAULT []*SortingColumn
+
+func (p *RowGroup) GetSortingColumns() []*SortingColumn {
+  return p.SortingColumns
+}
+var RowGroup_FileOffset_DEFAULT int64
+func (p *RowGroup) GetFileOffset() int64 {
+  if !p.IsSetFileOffset() {
+    return RowGroup_FileOffset_DEFAULT
+  }
+return *p.FileOffset
+}
+var RowGroup_TotalCompressedSize_DEFAULT int64
+func (p *RowGroup) GetTotalCompressedSize() int64 {
+  if !p.IsSetTotalCompressedSize() {
+    return RowGroup_TotalCompressedSize_DEFAULT
+  }
+return *p.TotalCompressedSize
+}
+var RowGroup_Ordinal_DEFAULT int16
+func (p *RowGroup) GetOrdinal() int16 {
+  if !p.IsSetOrdinal() {
+    return RowGroup_Ordinal_DEFAULT
+  }
+return *p.Ordinal
+}
+func (p *RowGroup) IsSetSortingColumns() bool {
+  return p.SortingColumns != nil
+}
+
+func (p *RowGroup) IsSetFileOffset() bool {
+  return p.FileOffset != nil
+}
+
+func (p *RowGroup) IsSetTotalCompressedSize() bool {
+  return p.TotalCompressedSize != nil
+}
+
+func (p *RowGroup) IsSetOrdinal() bool {
+  return p.Ordinal != nil
+}
+
+func (p *RowGroup) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetColumns bool = false;
+  var issetTotalByteSize bool = false;
+  var issetNumRows bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetColumns = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetTotalByteSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumRows = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.I16 {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetColumns{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set"));
+  }
+  if !issetTotalByteSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set"));
+  }
+  if !issetNumRows{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set"));
+  }
+  return nil
+}
+
+func (p *RowGroup)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*ColumnChunk, 0, size)
+  p.Columns =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem10 := &ColumnChunk{}
+    if err := _elem10.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem10), err)
+    }
+    p.Columns = append(p.Columns, _elem10)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *RowGroup)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.TotalByteSize = v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NumRows = v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*SortingColumn, 0, size)
+  p.SortingColumns =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem11 := &SortingColumn{}
+    if err := _elem11.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err)
+    }
+    p.SortingColumns = append(p.SortingColumns, _elem11)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *RowGroup)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 5: ", err)
+} else {
+  p.FileOffset = &v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.TotalCompressedSize = &v
+}
+  return nil
+}
+
+func (p *RowGroup)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI16(ctx); err != nil {
+  return thrift.PrependError("error reading field 7: ", err)
+} else {
+  p.Ordinal = &v
+}
+  return nil
+}
+
+func (p *RowGroup) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "RowGroup"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *RowGroup) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "columns", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Columns)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.Columns {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) }
+  return err
+}
+
+func (p *RowGroup) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "total_byte_size", thrift.I64, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.TotalByteSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) }
+  return err
+}
+
+func (p *RowGroup) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) }
+  return err
+}
+
+func (p *RowGroup) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSortingColumns() {
+    if err := oprot.WriteFieldBegin(ctx, "sorting_columns", thrift.LIST, 4); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.SortingColumns)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.SortingColumns {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFileOffset() {
+    if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:file_offset: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.FileOffset)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.file_offset (5) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:file_offset: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTotalCompressedSize() {
+    if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_compressed_size: ", p), err) }
+    if err := oprot.WriteI64(ctx, int64(*p.TotalCompressedSize)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_compressed_size: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetOrdinal() {
+    if err := oprot.WriteFieldBegin(ctx, "ordinal", thrift.I16, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:ordinal: ", p), err) }
+    if err := oprot.WriteI16(ctx, int16(*p.Ordinal)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.ordinal (7) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:ordinal: ", p), err) }
+  }
+  return err
+}
+
+func (p *RowGroup) Equals(other *RowGroup) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.Columns) != len(other.Columns) { return false }
+  for i, _tgt := range p.Columns {
+    _src12 := other.Columns[i]
+    if !_tgt.Equals(_src12) { return false }
+  }
+  if p.TotalByteSize != other.TotalByteSize { return false }
+  if p.NumRows != other.NumRows { return false }
+  if len(p.SortingColumns) != len(other.SortingColumns) { return false }
+  for i, _tgt := range p.SortingColumns {
+    _src13 := other.SortingColumns[i]
+    if !_tgt.Equals(_src13) { return false }
+  }
+  if p.FileOffset != other.FileOffset {
+    if p.FileOffset == nil || other.FileOffset == nil {
+      return false
+    }
+    if (*p.FileOffset) != (*other.FileOffset) { return false }
+  }
+  if p.TotalCompressedSize != other.TotalCompressedSize {
+    if p.TotalCompressedSize == nil || other.TotalCompressedSize == nil {
+      return false
+    }
+    if (*p.TotalCompressedSize) != (*other.TotalCompressedSize) { return false }
+  }
+  if p.Ordinal != other.Ordinal {
+    if p.Ordinal == nil || other.Ordinal == nil {
+      return false
+    }
+    if (*p.Ordinal) != (*other.Ordinal) { return false }
+  }
+  return true
+}
+
+func (p *RowGroup) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("RowGroup(%+v)", *p)
+}
+
+// Empty struct to signal the order defined by the physical or logical type
+type TypeDefinedOrder struct {
+}
+
+func NewTypeDefinedOrder() *TypeDefinedOrder {
+  return &TypeDefinedOrder{}
+}
+
+func (p *TypeDefinedOrder) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+      return err
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *TypeDefinedOrder) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "TypeDefinedOrder"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *TypeDefinedOrder) Equals(other *TypeDefinedOrder) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  return true
+}
+
+func (p *TypeDefinedOrder) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("TypeDefinedOrder(%+v)", *p)
+}
+
+// Union to specify the order used for the min_value and max_value fields for a
+// column. This union takes the role of an enhanced enum that allows rich
+// elements (which will be needed for a collation-based ordering in the future).
+// 
+// Possible values are:
+// * TypeDefinedOrder - the column uses the order defined by its logical or
+//                      physical type (if there is no logical type).
+// 
+// If the reader does not support the value of this union, min and max stats
+// for this column should be ignored.
+// 
+// Attributes:
+//  - TYPE_ORDER: The sort orders for logical types are:
+//   UTF8 - unsigned byte-wise comparison
+//   INT8 - signed comparison
+//   INT16 - signed comparison
+//   INT32 - signed comparison
+//   INT64 - signed comparison
+//   UINT8 - unsigned comparison
+//   UINT16 - unsigned comparison
+//   UINT32 - unsigned comparison
+//   UINT64 - unsigned comparison
+//   DECIMAL - signed comparison of the represented value
+//   DATE - signed comparison
+//   TIME_MILLIS - signed comparison
+//   TIME_MICROS - signed comparison
+//   TIMESTAMP_MILLIS - signed comparison
+//   TIMESTAMP_MICROS - signed comparison
+//   INTERVAL - unsigned comparison
+//   JSON - unsigned byte-wise comparison
+//   BSON - unsigned byte-wise comparison
+//   ENUM - unsigned byte-wise comparison
+//   LIST - undefined
+//   MAP - undefined
+// 
+// In the absence of logical types, the sort order is determined by the physical type:
+//   BOOLEAN - false, true
+//   INT32 - signed comparison
+//   INT64 - signed comparison
+//   INT96 (only used for legacy timestamps) - undefined
+//   FLOAT - signed comparison of the represented value (*)
+//   DOUBLE - signed comparison of the represented value (*)
+//   BYTE_ARRAY - unsigned byte-wise comparison
+//   FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison
+// 
+// (*) Because the sorting order is not specified properly for floating
+//     point values (relations vs. total ordering) the following
+//     compatibility rules should be applied when reading statistics:
+//     - If the min is a NaN, it should be ignored.
+//     - If the max is a NaN, it should be ignored.
+//     - If the min is +0, the row group may contain -0 values as well.
+//     - If the max is -0, the row group may contain +0 values as well.
+//     - When looking for NaN values, min and max should be ignored.
+type ColumnOrder struct {
+  TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"`
+}
+
+func NewColumnOrder() *ColumnOrder {
+  return &ColumnOrder{}
+}
+
+var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder
+func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder {
+  if !p.IsSetTYPE_ORDER() {
+    return ColumnOrder_TYPE_ORDER_DEFAULT
+  }
+return p.TYPE_ORDER
+}
+func (p *ColumnOrder) CountSetFieldsColumnOrder() int {
+  count := 0
+  if (p.IsSetTYPE_ORDER()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *ColumnOrder) IsSetTYPE_ORDER() bool {
+  return p.TYPE_ORDER != nil
+}
+
+func (p *ColumnOrder) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *ColumnOrder)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.TYPE_ORDER = &TypeDefinedOrder{}
+  if err := p.TYPE_ORDER.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err)
+  }
+  return nil
+}
+
+func (p *ColumnOrder) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsColumnOrder(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "ColumnOrder"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnOrder) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetTYPE_ORDER() {
+    if err := oprot.WriteFieldBegin(ctx, "TYPE_ORDER", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) }
+    if err := p.TYPE_ORDER.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnOrder) Equals(other *ColumnOrder) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.TYPE_ORDER.Equals(other.TYPE_ORDER) { return false }
+  return true
+}
+
+func (p *ColumnOrder) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnOrder(%+v)", *p)
+}
+
+// Attributes:
+//  - Offset: Offset of the page in the file *
+//  - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header
+// length
+//  - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages
+// change on record boundaries (r = 0).
+type PageLocation struct {
+  Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"`
+  CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"`
+  FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"`
+}
+
+func NewPageLocation() *PageLocation {
+  return &PageLocation{}
+}
+
+
+func (p *PageLocation) GetOffset() int64 {
+  return p.Offset
+}
+
+func (p *PageLocation) GetCompressedPageSize() int32 {
+  return p.CompressedPageSize
+}
+
+func (p *PageLocation) GetFirstRowIndex() int64 {
+  return p.FirstRowIndex
+}
+func (p *PageLocation) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetOffset bool = false;
+  var issetCompressedPageSize bool = false;
+  var issetFirstRowIndex bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetOffset = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetCompressedPageSize = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetFirstRowIndex = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetOffset{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set"));
+  }
+  if !issetCompressedPageSize{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set"));
+  }
+  if !issetFirstRowIndex{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set"));
+  }
+  return nil
+}
+
+func (p *PageLocation)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Offset = v
+}
+  return nil
+}
+
+func (p *PageLocation)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.CompressedPageSize = v
+}
+  return nil
+}
+
+func (p *PageLocation)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.FirstRowIndex = v
+}
+  return nil
+}
+
+func (p *PageLocation) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "PageLocation"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *PageLocation) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "offset", thrift.I64, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.Offset)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) }
+  return err
+}
+
+func (p *PageLocation) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) }
+  return err
+}
+
+func (p *PageLocation) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "first_row_index", thrift.I64, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.FirstRowIndex)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) }
+  return err
+}
+
+func (p *PageLocation) Equals(other *PageLocation) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Offset != other.Offset { return false }
+  if p.CompressedPageSize != other.CompressedPageSize { return false }
+  if p.FirstRowIndex != other.FirstRowIndex { return false }
+  return true
+}
+
+func (p *PageLocation) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("PageLocation(%+v)", *p)
+}
+
+// Attributes:
+//  - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required
+// that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
+type OffsetIndex struct {
+  PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"`
+}
+
+func NewOffsetIndex() *OffsetIndex {
+  return &OffsetIndex{}
+}
+
+
+func (p *OffsetIndex) GetPageLocations() []*PageLocation {
+  return p.PageLocations
+}
+func (p *OffsetIndex) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetPageLocations bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetPageLocations = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetPageLocations{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set"));
+  }
+  return nil
+}
+
+func (p *OffsetIndex)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*PageLocation, 0, size)
+  p.PageLocations =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem14 := &PageLocation{}
+    if err := _elem14.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err)
+    }
+    p.PageLocations = append(p.PageLocations, _elem14)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *OffsetIndex) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "OffsetIndex"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *OffsetIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "page_locations", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.PageLocations)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.PageLocations {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) }
+  return err
+}
+
+func (p *OffsetIndex) Equals(other *OffsetIndex) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.PageLocations) != len(other.PageLocations) { return false }
+  for i, _tgt := range p.PageLocations {
+    _src15 := other.PageLocations[i]
+    if !_tgt.Equals(_src15) { return false }
+  }
+  return true
+}
+
+func (p *OffsetIndex) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("OffsetIndex(%+v)", *p)
+}
+
+// Description for ColumnIndex.
+// Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]
+// 
+// Attributes:
+//  - NullPages: A list of Boolean values to determine the validity of the corresponding
+// min and max values. If true, a page contains only null values, and writers
+// have to set the corresponding entries in min_values and max_values to
+// byte[0], so that all lists have the same length. If false, the
+// corresponding entries in min_values and max_values must be valid.
+//  - MinValues: Two lists containing lower and upper bounds for the values of each page.
+// These may be the actual minimum and maximum values found on a page, but
+// can also be (more compact) values that do not exist on a page. For
+// example, instead of storing ""Blart Versenwald III", a writer may set
+// min_values[i]="B", max_values[i]="C". Such more compact values must still
+// be valid values within the column's logical type. Readers must make sure
+// that list entries are populated before using them by inspecting null_pages.
+//  - MaxValues
+//  - BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in
+// which direction. This allows readers to perform binary searches in both
+// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
+// if the lists are ordered.
+//  - NullCounts: A list containing the number of null values for each page *
+type ColumnIndex struct {
+  NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"`
+  MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"`
+  MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"`
+  BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"`
+  NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"`
+}
+
+func NewColumnIndex() *ColumnIndex {
+  return &ColumnIndex{}
+}
+
+
+func (p *ColumnIndex) GetNullPages() []bool {
+  return p.NullPages
+}
+
+func (p *ColumnIndex) GetMinValues() [][]byte {
+  return p.MinValues
+}
+
+func (p *ColumnIndex) GetMaxValues() [][]byte {
+  return p.MaxValues
+}
+
+func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder {
+  return p.BoundaryOrder
+}
+var ColumnIndex_NullCounts_DEFAULT []int64
+
+func (p *ColumnIndex) GetNullCounts() []int64 {
+  return p.NullCounts
+}
+func (p *ColumnIndex) IsSetNullCounts() bool {
+  return p.NullCounts != nil
+}
+
+func (p *ColumnIndex) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetNullPages bool = false;
+  var issetMinValues bool = false;
+  var issetMaxValues bool = false;
+  var issetBoundaryOrder bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetNullPages = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetMinValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetMaxValues = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetBoundaryOrder = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetNullPages{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set"));
+  }
+  if !issetMinValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set"));
+  }
+  if !issetMaxValues{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set"));
+  }
+  if !issetBoundaryOrder{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set"));
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]bool, 0, size)
+  p.NullPages =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem16 bool
+    if v, err := iprot.ReadBool(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem16 = v
+}
+    p.NullPages = append(p.NullPages, _elem16)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([][]byte, 0, size)
+  p.MinValues =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem17 []byte
+    if v, err := iprot.ReadBinary(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem17 = v
+}
+    p.MinValues = append(p.MinValues, _elem17)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([][]byte, 0, size)
+  p.MaxValues =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem18 []byte
+    if v, err := iprot.ReadBinary(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem18 = v
+}
+    p.MaxValues = append(p.MaxValues, _elem18)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 4: ", err)
+} else {
+  temp := BoundaryOrder(v)
+  p.BoundaryOrder = temp
+}
+  return nil
+}
+
+func (p *ColumnIndex)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]int64, 0, size)
+  p.NullCounts =  tSlice
+  for i := 0; i < size; i ++ {
+var _elem19 int64
+    if v, err := iprot.ReadI64(ctx); err != nil {
+    return thrift.PrependError("error reading field 0: ", err)
+} else {
+    _elem19 = v
+}
+    p.NullCounts = append(p.NullCounts, _elem19)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *ColumnIndex) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "ColumnIndex"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *ColumnIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "null_pages", thrift.LIST, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.BOOL, len(p.NullPages)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.NullPages {
+    if err := oprot.WriteBool(ctx, bool(v)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "min_values", thrift.LIST, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MinValues)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.MinValues {
+    if err := oprot.WriteBinary(ctx, v); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "max_values", thrift.LIST, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MaxValues)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.MaxValues {
+    if err := oprot.WriteBinary(ctx, v); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "boundary_order", thrift.I32, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.BoundaryOrder)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) }
+  return err
+}
+
+func (p *ColumnIndex) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetNullCounts() {
+    if err := oprot.WriteFieldBegin(ctx, "null_counts", thrift.LIST, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.I64, len(p.NullCounts)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.NullCounts {
+      if err := oprot.WriteI64(ctx, int64(v)); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) }
+  }
+  return err
+}
+
+func (p *ColumnIndex) Equals(other *ColumnIndex) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if len(p.NullPages) != len(other.NullPages) { return false }
+  for i, _tgt := range p.NullPages {
+    _src20 := other.NullPages[i]
+    if _tgt != _src20 { return false }
+  }
+  if len(p.MinValues) != len(other.MinValues) { return false }
+  for i, _tgt := range p.MinValues {
+    _src21 := other.MinValues[i]
+    if bytes.Compare(_tgt, _src21) != 0 { return false }
+  }
+  if len(p.MaxValues) != len(other.MaxValues) { return false }
+  for i, _tgt := range p.MaxValues {
+    _src22 := other.MaxValues[i]
+    if bytes.Compare(_tgt, _src22) != 0 { return false }
+  }
+  if p.BoundaryOrder != other.BoundaryOrder { return false }
+  if len(p.NullCounts) != len(other.NullCounts) { return false }
+  for i, _tgt := range p.NullCounts {
+    _src23 := other.NullCounts[i]
+    if _tgt != _src23 { return false }
+  }
+  return true
+}
+
+func (p *ColumnIndex) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("ColumnIndex(%+v)", *p)
+}
+
+// Attributes:
+//  - AadPrefix: AAD prefix *
+//  - AadFileUnique: Unique file identifier part of AAD suffix *
+//  - SupplyAadPrefix: In files encrypted with AAD prefix without storing it,
+// readers must supply the prefix *
+type AesGcmV1 struct {
+  AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"`
+  AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"`
+  SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"`
+}
+
+func NewAesGcmV1() *AesGcmV1 {
+  return &AesGcmV1{}
+}
+
+var AesGcmV1_AadPrefix_DEFAULT []byte
+
+func (p *AesGcmV1) GetAadPrefix() []byte {
+  return p.AadPrefix
+}
+var AesGcmV1_AadFileUnique_DEFAULT []byte
+
+func (p *AesGcmV1) GetAadFileUnique() []byte {
+  return p.AadFileUnique
+}
+var AesGcmV1_SupplyAadPrefix_DEFAULT bool
+func (p *AesGcmV1) GetSupplyAadPrefix() bool {
+  if !p.IsSetSupplyAadPrefix() {
+    return AesGcmV1_SupplyAadPrefix_DEFAULT
+  }
+return *p.SupplyAadPrefix
+}
+func (p *AesGcmV1) IsSetAadPrefix() bool {
+  return p.AadPrefix != nil
+}
+
+func (p *AesGcmV1) IsSetAadFileUnique() bool {
+  return p.AadFileUnique != nil
+}
+
+func (p *AesGcmV1) IsSetSupplyAadPrefix() bool {
+  return p.SupplyAadPrefix != nil
+}
+
+func (p *AesGcmV1) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *AesGcmV1)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.AadPrefix = v
+}
+  return nil
+}
+
+func (p *AesGcmV1)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.AadFileUnique = v
+}
+  return nil
+}
+
+func (p *AesGcmV1)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.SupplyAadPrefix = &v
+}
+  return nil
+}
+
+func (p *AesGcmV1) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "AesGcmV1"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *AesGcmV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadFileUnique() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSupplyAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmV1) Equals(other *AesGcmV1) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false }
+  if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false }
+  if p.SupplyAadPrefix != other.SupplyAadPrefix {
+    if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil {
+      return false
+    }
+    if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false }
+  }
+  return true
+}
+
+func (p *AesGcmV1) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("AesGcmV1(%+v)", *p)
+}
+
+// Attributes:
+//  - AadPrefix: AAD prefix *
+//  - AadFileUnique: Unique file identifier part of AAD suffix *
+//  - SupplyAadPrefix: In files encrypted with AAD prefix without storing it,
+// readers must supply the prefix *
+type AesGcmCtrV1 struct {
+  AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"`
+  AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"`
+  SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"`
+}
+
+func NewAesGcmCtrV1() *AesGcmCtrV1 {
+  return &AesGcmCtrV1{}
+}
+
+var AesGcmCtrV1_AadPrefix_DEFAULT []byte
+
+func (p *AesGcmCtrV1) GetAadPrefix() []byte {
+  return p.AadPrefix
+}
+var AesGcmCtrV1_AadFileUnique_DEFAULT []byte
+
+func (p *AesGcmCtrV1) GetAadFileUnique() []byte {
+  return p.AadFileUnique
+}
+var AesGcmCtrV1_SupplyAadPrefix_DEFAULT bool
+func (p *AesGcmCtrV1) GetSupplyAadPrefix() bool {
+  if !p.IsSetSupplyAadPrefix() {
+    return AesGcmCtrV1_SupplyAadPrefix_DEFAULT
+  }
+return *p.SupplyAadPrefix
+}
+func (p *AesGcmCtrV1) IsSetAadPrefix() bool {
+  return p.AadPrefix != nil
+}
+
+func (p *AesGcmCtrV1) IsSetAadFileUnique() bool {
+  return p.AadFileUnique != nil
+}
+
+func (p *AesGcmCtrV1) IsSetSupplyAadPrefix() bool {
+  return p.SupplyAadPrefix != nil
+}
+
+func (p *AesGcmCtrV1) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.BOOL {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *AesGcmCtrV1)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.AadPrefix = v
+}
+  return nil
+}
+
+func (p *AesGcmCtrV1)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.AadFileUnique = v
+}
+  return nil
+}
+
+func (p *AesGcmCtrV1)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBool(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.SupplyAadPrefix = &v
+}
+  return nil
+}
+
+func (p *AesGcmCtrV1) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "AesGcmCtrV1"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *AesGcmCtrV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmCtrV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAadFileUnique() {
+    if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmCtrV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetSupplyAadPrefix() {
+    if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) }
+    if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) }
+  }
+  return err
+}
+
+func (p *AesGcmCtrV1) Equals(other *AesGcmCtrV1) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false }
+  if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false }
+  if p.SupplyAadPrefix != other.SupplyAadPrefix {
+    if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil {
+      return false
+    }
+    if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false }
+  }
+  return true
+}
+
+func (p *AesGcmCtrV1) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("AesGcmCtrV1(%+v)", *p)
+}
+
+// Attributes:
+//  - AES_GCM_V1
+//  - AES_GCM_CTR_V1
+type EncryptionAlgorithm struct {
+  AES_GCM_V1 *AesGcmV1 `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"`
+  AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"`
+}
+
+func NewEncryptionAlgorithm() *EncryptionAlgorithm {
+  return &EncryptionAlgorithm{}
+}
+
+var EncryptionAlgorithm_AES_GCM_V1_DEFAULT *AesGcmV1
+func (p *EncryptionAlgorithm) GetAES_GCM_V1() *AesGcmV1 {
+  if !p.IsSetAES_GCM_V1() {
+    return EncryptionAlgorithm_AES_GCM_V1_DEFAULT
+  }
+return p.AES_GCM_V1
+}
+var EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT *AesGcmCtrV1
+func (p *EncryptionAlgorithm) GetAES_GCM_CTR_V1() *AesGcmCtrV1 {
+  if !p.IsSetAES_GCM_CTR_V1() {
+    return EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT
+  }
+return p.AES_GCM_CTR_V1
+}
+func (p *EncryptionAlgorithm) CountSetFieldsEncryptionAlgorithm() int {
+  count := 0
+  if (p.IsSetAES_GCM_V1()) {
+    count++
+  }
+  if (p.IsSetAES_GCM_CTR_V1()) {
+    count++
+  }
+  return count
+
+}
+
+func (p *EncryptionAlgorithm) IsSetAES_GCM_V1() bool {
+  return p.AES_GCM_V1 != nil
+}
+
+func (p *EncryptionAlgorithm) IsSetAES_GCM_CTR_V1() bool {
+  return p.AES_GCM_CTR_V1 != nil
+}
+
+func (p *EncryptionAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  return nil
+}
+
+func (p *EncryptionAlgorithm)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.AES_GCM_V1 = &AesGcmV1{}
+  if err := p.AES_GCM_V1.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_V1), err)
+  }
+  return nil
+}
+
+func (p *EncryptionAlgorithm)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  p.AES_GCM_CTR_V1 = &AesGcmCtrV1{}
+  if err := p.AES_GCM_CTR_V1.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_CTR_V1), err)
+  }
+  return nil
+}
+
+func (p *EncryptionAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if c := p.CountSetFieldsEncryptionAlgorithm(); c != 1 {
+    return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c)
+  }
+  if err := oprot.WriteStructBegin(ctx, "EncryptionAlgorithm"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *EncryptionAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAES_GCM_V1() {
+    if err := oprot.WriteFieldBegin(ctx, "AES_GCM_V1", thrift.STRUCT, 1); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:AES_GCM_V1: ", p), err) }
+    if err := p.AES_GCM_V1.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_V1), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 1:AES_GCM_V1: ", p), err) }
+  }
+  return err
+}
+
+func (p *EncryptionAlgorithm) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetAES_GCM_CTR_V1() {
+    if err := oprot.WriteFieldBegin(ctx, "AES_GCM_CTR_V1", thrift.STRUCT, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:AES_GCM_CTR_V1: ", p), err) }
+    if err := p.AES_GCM_CTR_V1.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_CTR_V1), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:AES_GCM_CTR_V1: ", p), err) }
+  }
+  return err
+}
+
+func (p *EncryptionAlgorithm) Equals(other *EncryptionAlgorithm) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.AES_GCM_V1.Equals(other.AES_GCM_V1) { return false }
+  if !p.AES_GCM_CTR_V1.Equals(other.AES_GCM_CTR_V1) { return false }
+  return true
+}
+
+func (p *EncryptionAlgorithm) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("EncryptionAlgorithm(%+v)", *p)
+}
+
+// Description for file metadata
+// 
+// Attributes:
+//  - Version: Version of this file *
+//  - Schema: Parquet schema for this file.  This schema contains metadata for all the columns.
+// The schema is represented as a tree with a single root.  The nodes of the tree
+// are flattened to a list by doing a depth-first traversal.
+// The column metadata contains the path in the schema for that column which can be
+// used to map columns to nodes in the schema.
+// The first element is the root *
+//  - NumRows: Number of rows in this file *
+//  - RowGroups: Row groups in this file *
+//  - KeyValueMetadata: Optional key/value metadata *
+//  - CreatedBy: String for application that wrote this file.  This should be in the format
+// <Application> version <App Version> (build <App Build Hash>).
+// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
+// 
+//  - ColumnOrders: Sort order used for the min_value and max_value fields of each column in
+// this file. Sort orders are listed in the order matching the columns in the
+// schema. The indexes are not necessary the same though, because only leaf
+// nodes of the schema are represented in the list of sort orders.
+// 
+// Without column_orders, the meaning of the min_value and max_value fields is
+// undefined. To ensure well-defined behaviour, if min_value and max_value are
+// written to a Parquet file, column_orders must be written as well.
+// 
+// The obsolete min and max fields are always sorted by signed comparison
+// regardless of column_orders.
+//  - EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files
+// with plaintext footer. Files with encrypted footer store algorithm id
+// in FileCryptoMetaData structure.
+//  - FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer.
+// Used only in encrypted files with plaintext footer.
+type FileMetaData struct {
+  Version int32 `thrift:"version,1,required" db:"version" json:"version"`
+  Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"`
+  NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"`
+  RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"`
+  KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"`
+  CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"`
+  ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"`
+  EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"`
+  FooterSigningKeyMetadata []byte `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"`
+}
+
+func NewFileMetaData() *FileMetaData {
+  return &FileMetaData{}
+}
+
+
+func (p *FileMetaData) GetVersion() int32 {
+  return p.Version
+}
+
+func (p *FileMetaData) GetSchema() []*SchemaElement {
+  return p.Schema
+}
+
+func (p *FileMetaData) GetNumRows() int64 {
+  return p.NumRows
+}
+
+func (p *FileMetaData) GetRowGroups() []*RowGroup {
+  return p.RowGroups
+}
+var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue
+
+func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue {
+  return p.KeyValueMetadata
+}
+var FileMetaData_CreatedBy_DEFAULT string
+func (p *FileMetaData) GetCreatedBy() string {
+  if !p.IsSetCreatedBy() {
+    return FileMetaData_CreatedBy_DEFAULT
+  }
+return *p.CreatedBy
+}
+var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder
+
+func (p *FileMetaData) GetColumnOrders() []*ColumnOrder {
+  return p.ColumnOrders
+}
+var FileMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm
+func (p *FileMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm {
+  if !p.IsSetEncryptionAlgorithm() {
+    return FileMetaData_EncryptionAlgorithm_DEFAULT
+  }
+return p.EncryptionAlgorithm
+}
+var FileMetaData_FooterSigningKeyMetadata_DEFAULT []byte
+
+func (p *FileMetaData) GetFooterSigningKeyMetadata() []byte {
+  return p.FooterSigningKeyMetadata
+}
+func (p *FileMetaData) IsSetKeyValueMetadata() bool {
+  return p.KeyValueMetadata != nil
+}
+
+func (p *FileMetaData) IsSetCreatedBy() bool {
+  return p.CreatedBy != nil
+}
+
+func (p *FileMetaData) IsSetColumnOrders() bool {
+  return p.ColumnOrders != nil
+}
+
+func (p *FileMetaData) IsSetEncryptionAlgorithm() bool {
+  return p.EncryptionAlgorithm != nil
+}
+
+func (p *FileMetaData) IsSetFooterSigningKeyMetadata() bool {
+  return p.FooterSigningKeyMetadata != nil
+}
+
+func (p *FileMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetVersion bool = false;
+  var issetSchema bool = false;
+  var issetNumRows bool = false;
+  var issetRowGroups bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.I32 {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetVersion = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+        issetSchema = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 3:
+      if fieldTypeId == thrift.I64 {
+        if err := p.ReadField3(ctx, iprot); err != nil {
+          return err
+        }
+        issetNumRows = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 4:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField4(ctx, iprot); err != nil {
+          return err
+        }
+        issetRowGroups = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 5:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField5(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 6:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField6(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 7:
+      if fieldTypeId == thrift.LIST {
+        if err := p.ReadField7(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 8:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField8(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 9:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField9(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetVersion{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set"));
+  }
+  if !issetSchema{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set"));
+  }
+  if !issetNumRows{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set"));
+  }
+  if !issetRowGroups{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set"));
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI32(ctx); err != nil {
+  return thrift.PrependError("error reading field 1: ", err)
+} else {
+  p.Version = v
+}
+  return nil
+}
+
+func (p *FileMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*SchemaElement, 0, size)
+  p.Schema =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem24 := &SchemaElement{}
+    if err := _elem24.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem24), err)
+    }
+    p.Schema = append(p.Schema, _elem24)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField3(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadI64(ctx); err != nil {
+  return thrift.PrependError("error reading field 3: ", err)
+} else {
+  p.NumRows = v
+}
+  return nil
+}
+
+func (p *FileMetaData)  ReadField4(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*RowGroup, 0, size)
+  p.RowGroups =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem25 := &RowGroup{}
+    if err := _elem25.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem25), err)
+    }
+    p.RowGroups = append(p.RowGroups, _elem25)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField5(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*KeyValue, 0, size)
+  p.KeyValueMetadata =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem26 := &KeyValue{}
+    if err := _elem26.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem26), err)
+    }
+    p.KeyValueMetadata = append(p.KeyValueMetadata, _elem26)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField6(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadString(ctx); err != nil {
+  return thrift.PrependError("error reading field 6: ", err)
+} else {
+  p.CreatedBy = &v
+}
+  return nil
+}
+
+func (p *FileMetaData)  ReadField7(ctx context.Context, iprot thrift.TProtocol) error {
+  _, size, err := iprot.ReadListBegin(ctx)
+  if err != nil {
+    return thrift.PrependError("error reading list begin: ", err)
+  }
+  tSlice := make([]*ColumnOrder, 0, size)
+  p.ColumnOrders =  tSlice
+  for i := 0; i < size; i ++ {
+    _elem27 := &ColumnOrder{}
+    if err := _elem27.Read(ctx, iprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem27), err)
+    }
+    p.ColumnOrders = append(p.ColumnOrders, _elem27)
+  }
+  if err := iprot.ReadListEnd(ctx); err != nil {
+    return thrift.PrependError("error reading list end: ", err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField8(ctx context.Context, iprot thrift.TProtocol) error {
+  p.EncryptionAlgorithm = &EncryptionAlgorithm{}
+  if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err)
+  }
+  return nil
+}
+
+func (p *FileMetaData)  ReadField9(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 9: ", err)
+} else {
+  p.FooterSigningKeyMetadata = v
+}
+  return nil
+}
+
+func (p *FileMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "FileMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+    if err := p.writeField3(ctx, oprot); err != nil { return err }
+    if err := p.writeField4(ctx, oprot); err != nil { return err }
+    if err := p.writeField5(ctx, oprot); err != nil { return err }
+    if err := p.writeField6(ctx, oprot); err != nil { return err }
+    if err := p.writeField7(ctx, oprot); err != nil { return err }
+    if err := p.writeField8(ctx, oprot); err != nil { return err }
+    if err := p.writeField9(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *FileMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "version", thrift.I32, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) }
+  if err := oprot.WriteI32(ctx, int32(p.Version)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "schema", thrift.LIST, 2); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Schema)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.Schema {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) }
+  if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil {
+  return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "row_groups", thrift.LIST, 4); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) }
+  if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.RowGroups)); err != nil {
+    return thrift.PrependError("error writing list begin: ", err)
+  }
+  for _, v := range p.RowGroups {
+    if err := v.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+    }
+  }
+  if err := oprot.WriteListEnd(ctx); err != nil {
+    return thrift.PrependError("error writing list end: ", err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) }
+  return err
+}
+
+func (p *FileMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyValueMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 5); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.KeyValueMetadata {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetCreatedBy() {
+    if err := oprot.WriteFieldBegin(ctx, "created_by", thrift.STRING, 6); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) }
+    if err := oprot.WriteString(ctx, string(*p.CreatedBy)); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetColumnOrders() {
+    if err := oprot.WriteFieldBegin(ctx, "column_orders", thrift.LIST, 7); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) }
+    if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.ColumnOrders)); err != nil {
+      return thrift.PrependError("error writing list begin: ", err)
+    }
+    for _, v := range p.ColumnOrders {
+      if err := v.Write(ctx, oprot); err != nil {
+        return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err)
+      }
+    }
+    if err := oprot.WriteListEnd(ctx); err != nil {
+      return thrift.PrependError("error writing list end: ", err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetEncryptionAlgorithm() {
+    if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 8); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:encryption_algorithm: ", p), err) }
+    if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err)
+    }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 8:encryption_algorithm: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetFooterSigningKeyMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "footer_signing_key_metadata", thrift.STRING, 9); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:footer_signing_key_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.FooterSigningKeyMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.footer_signing_key_metadata (9) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 9:footer_signing_key_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileMetaData) Equals(other *FileMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if p.Version != other.Version { return false }
+  if len(p.Schema) != len(other.Schema) { return false }
+  for i, _tgt := range p.Schema {
+    _src28 := other.Schema[i]
+    if !_tgt.Equals(_src28) { return false }
+  }
+  if p.NumRows != other.NumRows { return false }
+  if len(p.RowGroups) != len(other.RowGroups) { return false }
+  for i, _tgt := range p.RowGroups {
+    _src29 := other.RowGroups[i]
+    if !_tgt.Equals(_src29) { return false }
+  }
+  if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false }
+  for i, _tgt := range p.KeyValueMetadata {
+    _src30 := other.KeyValueMetadata[i]
+    if !_tgt.Equals(_src30) { return false }
+  }
+  if p.CreatedBy != other.CreatedBy {
+    if p.CreatedBy == nil || other.CreatedBy == nil {
+      return false
+    }
+    if (*p.CreatedBy) != (*other.CreatedBy) { return false }
+  }
+  if len(p.ColumnOrders) != len(other.ColumnOrders) { return false }
+  for i, _tgt := range p.ColumnOrders {
+    _src31 := other.ColumnOrders[i]
+    if !_tgt.Equals(_src31) { return false }
+  }
+  if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false }
+  if bytes.Compare(p.FooterSigningKeyMetadata, other.FooterSigningKeyMetadata) != 0 { return false }
+  return true
+}
+
+func (p *FileMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("FileMetaData(%+v)", *p)
+}
+
+// Crypto metadata for files with encrypted footer *
+// 
+// Attributes:
+//  - EncryptionAlgorithm: Encryption algorithm. This field is only used for files
+// with encrypted footer. Files with plaintext footer store algorithm id
+// inside footer (FileMetaData structure).
+//  - KeyMetadata: Retrieval metadata of key used for encryption of footer,
+// and (possibly) columns *
+type FileCryptoMetaData struct {
+  EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"`
+  KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"`
+}
+
+func NewFileCryptoMetaData() *FileCryptoMetaData {
+  return &FileCryptoMetaData{}
+}
+
+var FileCryptoMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm
+func (p *FileCryptoMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm {
+  if !p.IsSetEncryptionAlgorithm() {
+    return FileCryptoMetaData_EncryptionAlgorithm_DEFAULT
+  }
+return p.EncryptionAlgorithm
+}
+var FileCryptoMetaData_KeyMetadata_DEFAULT []byte
+
+func (p *FileCryptoMetaData) GetKeyMetadata() []byte {
+  return p.KeyMetadata
+}
+func (p *FileCryptoMetaData) IsSetEncryptionAlgorithm() bool {
+  return p.EncryptionAlgorithm != nil
+}
+
+func (p *FileCryptoMetaData) IsSetKeyMetadata() bool {
+  return p.KeyMetadata != nil
+}
+
+func (p *FileCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error {
+  if _, err := iprot.ReadStructBegin(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err)
+  }
+
+  var issetEncryptionAlgorithm bool = false;
+
+  for {
+    _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx)
+    if err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err)
+    }
+    if fieldTypeId == thrift.STOP { break; }
+    switch fieldId {
+    case 1:
+      if fieldTypeId == thrift.STRUCT {
+        if err := p.ReadField1(ctx, iprot); err != nil {
+          return err
+        }
+        issetEncryptionAlgorithm = true
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    case 2:
+      if fieldTypeId == thrift.STRING {
+        if err := p.ReadField2(ctx, iprot); err != nil {
+          return err
+        }
+      } else {
+        if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+          return err
+        }
+      }
+    default:
+      if err := iprot.Skip(ctx, fieldTypeId); err != nil {
+        return err
+      }
+    }
+    if err := iprot.ReadFieldEnd(ctx); err != nil {
+      return err
+    }
+  }
+  if err := iprot.ReadStructEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err)
+  }
+  if !issetEncryptionAlgorithm{
+    return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field EncryptionAlgorithm is not set"));
+  }
+  return nil
+}
+
+func (p *FileCryptoMetaData)  ReadField1(ctx context.Context, iprot thrift.TProtocol) error {
+  p.EncryptionAlgorithm = &EncryptionAlgorithm{}
+  if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err)
+  }
+  return nil
+}
+
+func (p *FileCryptoMetaData)  ReadField2(ctx context.Context, iprot thrift.TProtocol) error {
+  if v, err := iprot.ReadBinary(ctx); err != nil {
+  return thrift.PrependError("error reading field 2: ", err)
+} else {
+  p.KeyMetadata = v
+}
+  return nil
+}
+
+func (p *FileCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error {
+  if err := oprot.WriteStructBegin(ctx, "FileCryptoMetaData"); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) }
+  if p != nil {
+    if err := p.writeField1(ctx, oprot); err != nil { return err }
+    if err := p.writeField2(ctx, oprot); err != nil { return err }
+  }
+  if err := oprot.WriteFieldStop(ctx); err != nil {
+    return thrift.PrependError("write field stop error: ", err) }
+  if err := oprot.WriteStructEnd(ctx); err != nil {
+    return thrift.PrependError("write struct stop error: ", err) }
+  return nil
+}
+
+func (p *FileCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 1); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:encryption_algorithm: ", p), err) }
+  if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err)
+  }
+  if err := oprot.WriteFieldEnd(ctx); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T write field end error 1:encryption_algorithm: ", p), err) }
+  return err
+}
+
+func (p *FileCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) {
+  if p.IsSetKeyMetadata() {
+    if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) }
+    if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil {
+    return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) }
+    if err := oprot.WriteFieldEnd(ctx); err != nil {
+      return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) }
+  }
+  return err
+}
+
+func (p *FileCryptoMetaData) Equals(other *FileCryptoMetaData) bool {
+  if p == other {
+    return true
+  } else if p == nil || other == nil {
+    return false
+  }
+  if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false }
+  if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false }
+  return true
+}
+
+func (p *FileCryptoMetaData) String() string {
+  if p == nil {
+    return "<nil>"
+  }
+  return fmt.Sprintf("FileCryptoMetaData(%+v)", *p)
+}
+
diff --git a/go/parquet/internal/gen-go/parquet/staticcheck.conf b/go/parquet/internal/gen-go/parquet/staticcheck.conf
new file mode 100644
index 00000000000..d714bfd89ba
--- /dev/null
+++ b/go/parquet/internal/gen-go/parquet/staticcheck.conf
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+checks = ["all", "-ST1005", "-ST1000"]
diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go
new file mode 100644
index 00000000000..e58df01bd6e
--- /dev/null
+++ b/go/parquet/internal/thrift/helpers.go
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package thrift is just some useful helpers for interacting with thrift to
+// make other code easier to read/write and centralize interactions.
+package thrift
+
+import (
+	"bytes"
+	"context"
+	"io"
+
+	"github.com/apache/arrow/go/parquet/internal/encryption"
+	"github.com/apache/thrift/lib/go/thrift"
+)
+
+// default factory for creating thrift protocols for serialization/deserialization
+var protocolFactory = thrift.NewTCompactProtocolFactoryConf(&thrift.TConfiguration{})
+
+// DeserializeThrift deserializes the bytes in buf into the given thrift msg type
+// returns the number of remaining bytes in the buffer that weren't needed for deserialization
+// and any error if there was one, or nil.
+func DeserializeThrift(msg thrift.TStruct, buf []byte) (remain uint64, err error) {
+	tbuf := &thrift.TMemoryBuffer{Buffer: bytes.NewBuffer(buf)}
+	err = msg.Read(context.TODO(), protocolFactory.GetProtocol(tbuf))
+	remain = tbuf.RemainingBytes()
+	return
+}
+
+// SerializeThriftStream writes out the serialized bytes of the passed in type
+// to the given writer stream.
+func SerializeThriftStream(msg thrift.TStruct, w io.Writer) error {
+	return msg.Write(context.TODO(), protocolFactory.GetProtocol(thrift.NewStreamTransportW(w)))
+}
+
+// DeserializeThriftStream populates the given msg by reading from the provided
+// stream until it completes the deserialization.
+func DeserializeThriftStream(msg thrift.TStruct, r io.Reader) error {
+	return msg.Read(context.TODO(), protocolFactory.GetProtocol(thrift.NewStreamTransportR(r)))
+}
+
+// Serializer is an object that can stick around to provide convenience
+// functions and allow object reuse
+type Serializer struct {
+	thrift.TSerializer
+}
+
+// NewThriftSerializer constructs a serializer with a default buffer of 1024
+func NewThriftSerializer() *Serializer {
+	tbuf := thrift.NewTMemoryBufferLen(1024)
+	return &Serializer{thrift.TSerializer{
+		Transport: tbuf,
+		Protocol:  protocolFactory.GetProtocol(tbuf),
+	}}
+}
+
+// Serialize will serialize the given msg to the writer stream w, optionally encrypting it on the way
+// if enc is not nil, returning the total number of bytes written and any error received, or nil
+func (t *Serializer) Serialize(msg thrift.TStruct, w io.Writer, enc encryption.Encryptor) (int, error) {
+	b, err := t.Write(context.Background(), msg)
+	if err != nil {
+		return 0, err
+	}
+
+	if enc == nil {
+		return w.Write(b)
+	}
+
+	var cipherBuf bytes.Buffer
+	cipherBuf.Grow(enc.CiphertextSizeDelta() + len(b))
+	enc.Encrypt(&cipherBuf, b)
+	n, err := cipherBuf.WriteTo(w)
+	return int(n), err
+}
diff --git a/go/parquet/internal/utils/Makefile b/go/parquet/internal/utils/Makefile
index 39057ae1f2e..1de4308dc55 100644
--- a/go/parquet/internal/utils/Makefile
+++ b/go/parquet/internal/utils/Makefile
@@ -66,3 +66,7 @@ unpack_bool_avx2.s: _lib/unpack_bool_avx2.s
 
 unpack_bool_sse4.s: _lib/unpack_bool_sse4.s
 	$(C2GOASM) -a -f $^ $@
+
+clean:
+	rm -f $(INTEL_SOURCES)
+	rm -f $(addprefix _lib/,$(INTEL_SOURCES))
diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go
new file mode 100644
index 00000000000..92abae57dc1
--- /dev/null
+++ b/go/parquet/reader_properties.go
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet
+
+import (
+	"bytes"
+	"io"
+
+	"github.com/apache/arrow/go/arrow/ipc"
+	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
+)
+
+// ReaderProperties are used to define how the file reader will handle buffering and allocating buffers
+type ReaderProperties struct {
+	alloc memory.Allocator
+	// Default buffer size to utilize when reading chunks
+	BufferSize int64
+	// create with NewFileDecryptionProperties if dealing with an encrypted file
+	FileDecryptProps *FileDecryptionProperties
+	// If this is set to true, then the reader will use SectionReader to
+	// just use the read stream when reading data. Otherwise we will buffer
+	// the data we're going to read into memory first and then read that buffer.
+	//
+	// If reading from higher latency IO, like S3, it might improve performance to
+	// set this to true in order to read the entire row group in at once rather than
+	// make multiple smaller data requests. For low latency IO streams or if only
+	// reading small portions / subsets  of the parquet file, this can be set to false
+	// to reduce the amount of IO performed in order to avoid reading excess amounts of data.
+	BufferedStreamEnabled bool
+}
+
+// NewReaderProperties returns the default Reader Properties using the provided allocator.
+//
+// If nil is passed for the allocator, then memory.DefaultAllocator will be used.
+func NewReaderProperties(alloc memory.Allocator) *ReaderProperties {
+	if alloc == nil {
+		alloc = memory.DefaultAllocator
+	}
+	return &ReaderProperties{alloc, DefaultBufSize, nil, false}
+}
+
+// Allocator returns the allocator that the properties were initialized with
+func (r *ReaderProperties) Allocator() memory.Allocator { return r.alloc }
+
+// GetStream returns a section of the underlying reader based on whether or not BufferedStream is enabled.
+//
+// If BufferedStreamEnabled is true, it creates an io.SectionReader, otherwise it will read the entire section
+// into a buffer in memory and return a bytes.NewReader for that buffer.
+func (r *ReaderProperties) GetStream(source io.ReaderAt, start, nbytes int64) (ipc.ReadAtSeeker, error) {
+	if r.BufferedStreamEnabled {
+		return io.NewSectionReader(source, start, nbytes), nil
+	}
+
+	data := make([]byte, nbytes)
+	n, err := source.ReadAt(data, start)
+	if err != nil {
+		return nil, xerrors.Errorf("parquet: tried reading from file, but got error: %w", err)
+	}
+	if n != int(nbytes) {
+		return nil, xerrors.Errorf("parquet: tried reading %d bytes starting at position %d from file but only got %d", nbytes, start, n)
+	}
+
+	return bytes.NewReader(data), nil
+}
diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go
new file mode 100644
index 00000000000..ed10a76bf05
--- /dev/null
+++ b/go/parquet/reader_writer_properties_test.go
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet_test
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/compress"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestReaderPropBasics(t *testing.T) {
+	props := parquet.NewReaderProperties(nil)
+	assert.Equal(t, parquet.DefaultBufSize, props.BufferSize)
+	assert.False(t, props.BufferedStreamEnabled)
+}
+
+func TestWriterPropBasics(t *testing.T) {
+	props := parquet.NewWriterProperties()
+
+	assert.Equal(t, parquet.DefaultDataPageSize, props.DataPageSize())
+	assert.Equal(t, parquet.DefaultDictionaryPageSizeLimit, props.DictionaryPageSizeLimit())
+	assert.Equal(t, parquet.V1, props.Version())
+	assert.Equal(t, parquet.DataPageV1, props.DataPageVersion())
+}
+
+func TestWriterPropAdvanced(t *testing.T) {
+	props := parquet.NewWriterProperties(
+		parquet.WithCompressionFor("gzip", compress.Codecs.Gzip),
+		parquet.WithCompressionFor("zstd", compress.Codecs.Zstd),
+		parquet.WithCompression(compress.Codecs.Snappy),
+		parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked),
+		parquet.WithEncodingFor("delta-length", parquet.Encodings.DeltaLengthByteArray),
+		parquet.WithDataPageVersion(parquet.DataPageV2))
+
+	assert.Equal(t, compress.Codecs.Gzip, props.CompressionPath(parquet.ColumnPathFromString("gzip")))
+	assert.Equal(t, compress.Codecs.Zstd, props.CompressionFor("zstd"))
+	assert.Equal(t, compress.Codecs.Snappy, props.CompressionPath(parquet.ColumnPathFromString("delta-length")))
+	assert.Equal(t, parquet.Encodings.DeltaBinaryPacked, props.EncodingFor("gzip"))
+	assert.Equal(t, parquet.Encodings.DeltaLengthByteArray, props.EncodingPath(parquet.ColumnPathFromString("delta-length")))
+	assert.Equal(t, parquet.DataPageV2, props.DataPageVersion())
+}
+
+func TestReaderPropsGetStreamInsufficient(t *testing.T) {
+	data := "shorter than expected"
+	buf := memory.NewBufferBytes([]byte(data))
+	rdr := bytes.NewReader(buf.Bytes())
+
+	props := parquet.NewReaderProperties(nil)
+	_, err := props.GetStream(rdr, 12, 15)
+	assert.Error(t, err)
+}
diff --git a/go/parquet/types.go b/go/parquet/types.go
index 0f29de08551..b55d664c91e 100644
--- a/go/parquet/types.go
+++ b/go/parquet/types.go
@@ -19,8 +19,12 @@ package parquet
 import (
 	"encoding/binary"
 	"reflect"
+	"strings"
 	"time"
 	"unsafe"
+
+	"github.com/apache/arrow/go/arrow"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
 )
 
 const (
@@ -165,3 +169,186 @@ func (fixedLenByteArrayTraits) CastFromBytes(b []byte) []FixedLenByteArray {
 
 	return res
 }
+
+// Creating our own enums allows avoiding the transitive dependency on the
+// compiled thrift definitions in the public API, allowing us to not export
+// the entire Thrift definitions, while making everything a simple cast between.
+//
+// It also let's us add special values like NONE to distinguish between values
+// that are set or not set
+type (
+	// Type is the physical type as in parquet.thrift
+	Type format.Type
+	// Cipher is the parquet Cipher Algorithms
+	Cipher int
+	// ColumnOrder is the Column Order from the parquet.thrift
+	ColumnOrder *format.ColumnOrder
+	// Version is the parquet version type
+	Version int8
+	// DataPageVersion is the version of the Parquet Data Pages
+	DataPageVersion int8
+	// Encoding is the parquet Encoding type
+	Encoding format.Encoding
+	// Repetition is the underlying parquet field repetition type as in parquet.thrift
+	Repetition format.FieldRepetitionType
+	// ColumnPath is the path from the root of the schema to a given column
+	ColumnPath []string
+)
+
+func (c ColumnPath) String() string {
+	if c == nil {
+		return ""
+	}
+	return strings.Join(c, ".")
+}
+
+// Extend creates a new ColumnPath from an existing one, with the new ColumnPath having s appended to the end.
+func (c ColumnPath) Extend(s string) ColumnPath {
+	p := make([]string, len(c), len(c)+1)
+	copy(p, c)
+	return append(p, s)
+}
+
+// ColumnPathFromString constructs a ColumnPath from a dot separated string
+func ColumnPathFromString(s string) ColumnPath {
+	return strings.Split(s, ".")
+}
+
+// constants for choosing the Aes Algorithm to use for encryption/decryption
+const (
+	AesGcm Cipher = iota
+	AesCtr
+)
+
+// Constants for the parquet Version
+const (
+	V1 Version = 1
+	V2 Version = 2
+)
+
+// constants for the parquet DataPage Version to use
+const (
+	DataPageV1 DataPageVersion = iota
+	DataPageV2
+)
+
+func (e Encoding) String() string {
+	return format.Encoding(e).String()
+}
+
+var (
+	// Types contains constants for the Physical Types that are used in the Parquet Spec
+	//
+	// They can be specified when needed as such: `parquet.Types.Int32` etc. The values
+	// all correspond to the values in parquet.thrift
+	Types = struct {
+		Boolean           Type
+		Int32             Type
+		Int64             Type
+		Int96             Type
+		Float             Type
+		Double            Type
+		ByteArray         Type
+		FixedLenByteArray Type
+		// this only exists as a convienence so we can denote it when necessary
+		// nearly all functions that take a parquet.Type will error/panic if given
+		// Undefined
+		Undefined Type
+	}{
+		Boolean:           Type(format.Type_BOOLEAN),
+		Int32:             Type(format.Type_INT32),
+		Int64:             Type(format.Type_INT64),
+		Int96:             Type(format.Type_INT96),
+		Float:             Type(format.Type_FLOAT),
+		Double:            Type(format.Type_DOUBLE),
+		ByteArray:         Type(format.Type_BYTE_ARRAY),
+		FixedLenByteArray: Type(format.Type_FIXED_LEN_BYTE_ARRAY),
+		Undefined:         Type(format.Type_FIXED_LEN_BYTE_ARRAY + 1),
+	}
+
+	// Encodings contains constants for the encoding types of the column data
+	//
+	// The values used all correspond to the values in parquet.thrift for the
+	// corresponding encoding type.
+	Encodings = struct {
+		Plain                Encoding
+		PlainDict            Encoding
+		RLE                  Encoding
+		RLEDict              Encoding
+		BitPacked            Encoding // deprecated, not implemented
+		DeltaByteArray       Encoding
+		DeltaBinaryPacked    Encoding
+		DeltaLengthByteArray Encoding
+	}{
+		Plain:                Encoding(format.Encoding_PLAIN),
+		PlainDict:            Encoding(format.Encoding_PLAIN_DICTIONARY),
+		RLE:                  Encoding(format.Encoding_RLE),
+		RLEDict:              Encoding(format.Encoding_RLE_DICTIONARY),
+		BitPacked:            Encoding(format.Encoding_BIT_PACKED),
+		DeltaByteArray:       Encoding(format.Encoding_DELTA_BYTE_ARRAY),
+		DeltaBinaryPacked:    Encoding(format.Encoding_DELTA_BINARY_PACKED),
+		DeltaLengthByteArray: Encoding(format.Encoding_DELTA_LENGTH_BYTE_ARRAY),
+	}
+
+	// ColumnOrders contains constants for the Column Ordering fields
+	ColumnOrders = struct {
+		Undefined        ColumnOrder
+		TypeDefinedOrder ColumnOrder
+	}{
+		Undefined:        format.NewColumnOrder(),
+		TypeDefinedOrder: &format.ColumnOrder{TYPE_ORDER: format.NewTypeDefinedOrder()},
+	}
+
+	// DefaultColumnOrder is to use TypeDefinedOrder
+	DefaultColumnOrder = ColumnOrders.TypeDefinedOrder
+
+	// Repetitions contains the constants for Field Repetition Types
+	Repetitions = struct {
+		Required  Repetition
+		Optional  Repetition
+		Repeated  Repetition
+		Undefined Repetition // convenience value
+	}{
+		Required:  Repetition(format.FieldRepetitionType_REQUIRED),
+		Optional:  Repetition(format.FieldRepetitionType_OPTIONAL),
+		Repeated:  Repetition(format.FieldRepetitionType_REPEATED),
+		Undefined: Repetition(format.FieldRepetitionType_REPEATED + 1),
+	}
+)
+
+func (t Type) String() string {
+	switch t {
+	case Types.Undefined:
+		return "UNDEFINED"
+	default:
+		return format.Type(t).String()
+	}
+}
+
+func (r Repetition) String() string {
+	return strings.ToLower(format.FieldRepetitionType(r).String())
+}
+
+// ByteSize returns the number of bytes required to store a single value of
+// the given parquet.Type in memory.
+func (t Type) ByteSize() int {
+	switch t {
+	case Types.Boolean:
+		return 1
+	case Types.Int32:
+		return arrow.Int32SizeBytes
+	case Types.Int64:
+		return arrow.Int64SizeBytes
+	case Types.Int96:
+		return Int96SizeBytes
+	case Types.Float:
+		return arrow.Float32SizeBytes
+	case Types.Double:
+		return arrow.Float64SizeBytes
+	case Types.ByteArray:
+		return ByteArraySizeBytes
+	case Types.FixedLenByteArray:
+		return FixedLenByteArraySizeBytes
+	}
+	panic("no bytesize info for type")
+}
diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go
new file mode 100644
index 00000000000..ef11454a863
--- /dev/null
+++ b/go/parquet/writer_properties.go
@@ -0,0 +1,510 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package parquet
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet/compress"
+)
+
+// Constants for default property values used for the default reader, writer and column props.
+const (
+	// Default Buffer size used for the Reader
+	DefaultBufSize int64 = 4096 * 4
+	// Default data page size limit is 1K it's not guaranteed, but we will try to
+	// cut data pages off at this size where possible.
+	DefaultDataPageSize int64 = 1024 * 1024
+	// Default is for dictionary encoding to be turned on, use WithDictionaryDefault
+	// writer property to change that.
+	DefaultDictionaryEnabled = true
+	// If the dictionary reaches the size of this limitation, the writer will use
+	// the fallback encoding (usually plain) instead of continuing to build the
+	// dictionary index.
+	DefaultDictionaryPageSizeLimit = DefaultDataPageSize
+	// In order to attempt to facilitate data page size limits for writing,
+	// data is written in batches. Increasing the batch size may improve performance
+	// but the larger the batch size, the easier it is to overshoot the datapage limit.
+	DefaultWriteBatchSize int64 = 1024
+	// Default maximum number of rows for a single row group
+	DefaultMaxRowGroupLen int64 = 64 * 1024 * 1024
+	// Default is to have stats enabled for all columns, use writer properties to
+	// change the default, or to enable/disable for specific columns.
+	DefaultStatsEnabled = true
+	// If the stats are larger than 4K the writer will skip writing them out anyways.
+	DefaultMaxStatsSize int64 = 4096
+	DefaultCreatedBy          = "parquet-go version 1.0.0"
+)
+
+// ColumnProperties defines the encoding, codec, and so on for a given column.
+type ColumnProperties struct {
+	Encoding          Encoding
+	Codec             compress.Compression
+	DictionaryEnabled bool
+	StatsEnabled      bool
+	MaxStatsSize      int64
+	CompressionLevel  int
+}
+
+// DefaultColumnProperties returns the default properties which get utilized for writing.
+//
+// The default column properties are the following constants:
+//	Encoding:						Encodings.Plain
+//	Codec:							compress.Codecs.Uncompressed
+//	DictionaryEnabled:	DefaultDictionaryEnabled
+//	StatsEnabled:				DefaultStatsEnabled
+//	MaxStatsSize:				DefaultMaxStatsSize
+//	CompressionLevel:		compress.DefaultCompressionLevel
+func DefaultColumnProperties() ColumnProperties {
+	return ColumnProperties{
+		Encoding:          Encodings.Plain,
+		Codec:             compress.Codecs.Uncompressed,
+		DictionaryEnabled: DefaultDictionaryEnabled,
+		StatsEnabled:      DefaultStatsEnabled,
+		MaxStatsSize:      DefaultMaxStatsSize,
+		CompressionLevel:  compress.DefaultCompressionLevel,
+	}
+}
+
+type writerPropConfig struct {
+	wr            *WriterProperties
+	encodings     map[string]Encoding
+	codecs        map[string]compress.Compression
+	compressLevel map[string]int
+	dictEnabled   map[string]bool
+	statsEnabled  map[string]bool
+}
+
+// WriterProperty is used as the options for building a writer properties instance
+type WriterProperty func(*writerPropConfig)
+
+// WithAllocator specifies the writer to use the given allocator
+func WithAllocator(mem memory.Allocator) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.mem = mem
+	}
+}
+
+// WithDictionaryDefault sets the default value for whether to enable dictionary encoding
+func WithDictionaryDefault(dict bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.DictionaryEnabled = dict
+	}
+}
+
+// WithDictionaryFor allows enabling or disabling dictionary encoding for a given column path string
+func WithDictionaryFor(path string, dict bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.dictEnabled[path] = true
+	}
+}
+
+// WithDictionaryPath is like WithDictionaryFor, but takes a ColumnPath type
+func WithDictionaryPath(path ColumnPath, dict bool) WriterProperty {
+	return WithDictionaryFor(path.String(), dict)
+}
+
+// WithDictionaryPageSizeLimit is the limit of the dictionary at which the writer
+// will fallback to plain encoding instead
+func WithDictionaryPageSizeLimit(limit int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.dictPagesize = limit
+	}
+}
+
+// WithBatchSize specifies the number of rows to use for batch writes to columns
+func WithBatchSize(batch int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.batchSize = batch
+	}
+}
+
+// WithMaxRowGroupLength specifies the number of rows as the maximum number of rows for a given row group in the writer.
+func WithMaxRowGroupLength(nrows int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.maxRowGroupLen = nrows
+	}
+}
+
+// WithDataPageSize specifies the size to use for splitting data pages for column writing.
+func WithDataPageSize(pgsize int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.pageSize = pgsize
+	}
+}
+
+// WithDataPageVersion specifies whether to use Version 1 or Version 2 of the DataPage spec
+func WithDataPageVersion(version DataPageVersion) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.dataPageVersion = version
+	}
+}
+
+// WithVersion specifies which Parquet Spec version to utilize for writing.
+func WithVersion(version Version) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.parquetVersion = version
+	}
+}
+
+// WithCreatedBy specifies the "created by" string to use for the writer
+func WithCreatedBy(createdby string) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.createdBy = createdby
+	}
+}
+
+// WithEncoding defines the encoding that is used when we aren't using dictionary encoding.
+//
+// This is either applied if dictionary encoding is disabled, or if we fallback if the dictionary
+// grew too large.
+func WithEncoding(encoding Encoding) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		if encoding == Encodings.PlainDict || encoding == Encodings.RLEDict {
+			panic("parquet: can't use dictionary encoding as fallback encoding")
+		}
+		cfg.wr.defColumnProps.Encoding = encoding
+	}
+}
+
+// WithEncodingFor is for defining the encoding only for a specific column path. This encoding will be used
+// if dictionary encoding is disabled for the column or if we fallback because the dictionary grew too large
+func WithEncodingFor(path string, encoding Encoding) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		if encoding == Encodings.PlainDict || encoding == Encodings.RLEDict {
+			panic("parquet: can't use dictionary encoding as fallback encoding")
+		}
+		cfg.encodings[path] = encoding
+	}
+}
+
+// WithEncodingPath is the same as WithEncodingFor but takes a ColumnPath directly.
+func WithEncodingPath(path ColumnPath, encoding Encoding) WriterProperty {
+	return WithEncodingFor(path.String(), encoding)
+}
+
+// WithCompression specifies the default compression type to use for column writing.
+func WithCompression(codec compress.Compression) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.Codec = codec
+	}
+}
+
+// WithCompressionFor specifies the compression type for the given column.
+func WithCompressionFor(path string, codec compress.Compression) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.codecs[path] = codec
+	}
+}
+
+// WithCompressionPath is the same as WithCompressionFor but takes a ColumnPath directly.
+func WithCompressionPath(path ColumnPath, codec compress.Compression) WriterProperty {
+	return WithCompressionFor(path.String(), codec)
+}
+
+// WithMaxStatsSize sets a maximum size for the statistics before we decide not to include them.
+func WithMaxStatsSize(maxStatsSize int64) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.MaxStatsSize = maxStatsSize
+	}
+}
+
+// WithCompressionLevel specifies the default compression level for the compressor in every column.
+//
+// The provided compression level is compressor specific. The user would have to know what the available
+// levels are for the selected compressor. If the compressor does not allow for selecting different
+// compression levels, then this function will have no effect. Parquet and Arrow will not validate the
+// passed compression level. If no level is selected by the user or if the special compress.DefaultCompressionLevel
+// value is used, then parquet will select the compression level.
+func WithCompressionLevel(level int) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.CompressionLevel = level
+	}
+}
+
+// WithCompressionLevelFor is like WithCompressionLevel but only for the given column path.
+func WithCompressionLevelFor(path string, level int) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.compressLevel[path] = level
+	}
+}
+
+// WithCompressionLevelPath is the same as WithCompressionLevelFor but takes a ColumnPath
+func WithCompressionLevelPath(path ColumnPath, level int) WriterProperty {
+	return WithCompressionLevelFor(path.String(), level)
+}
+
+// WithStats specifies a default for whether or not to enable column statistics.
+func WithStats(enabled bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.defColumnProps.StatsEnabled = enabled
+	}
+}
+
+// WithStatsFor specifies a per column value as to enable or disable statistics in the resulting file.
+func WithStatsFor(path string, enabled bool) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.statsEnabled[path] = enabled
+	}
+}
+
+// WithStatsPath is the same as WithStatsFor but takes a ColumnPath
+func WithStatsPath(path ColumnPath, enabled bool) WriterProperty {
+	return WithStatsFor(path.String(), enabled)
+}
+
+// WithEncryptionProperties specifies the file level encryption handling for writing the file.
+func WithEncryptionProperties(props *FileEncryptionProperties) WriterProperty {
+	return func(cfg *writerPropConfig) {
+		cfg.wr.encryptionProps = props
+	}
+}
+
+// WriterProperties is the collection of properties to use for writing a parquet file. The values are
+// read only once it has been constructed.
+type WriterProperties struct {
+	mem             memory.Allocator
+	dictPagesize    int64
+	batchSize       int64
+	maxRowGroupLen  int64
+	pageSize        int64
+	parquetVersion  Version
+	createdBy       string
+	dataPageVersion DataPageVersion
+
+	defColumnProps  ColumnProperties
+	columnProps     map[string]*ColumnProperties
+	encryptionProps *FileEncryptionProperties
+}
+
+func defaultWriterProperties() *WriterProperties {
+	return &WriterProperties{
+		mem:             memory.DefaultAllocator,
+		dictPagesize:    DefaultDictionaryPageSizeLimit,
+		batchSize:       DefaultWriteBatchSize,
+		maxRowGroupLen:  DefaultMaxRowGroupLen,
+		pageSize:        DefaultDataPageSize,
+		parquetVersion:  V1,
+		dataPageVersion: DataPageV1,
+		createdBy:       DefaultCreatedBy,
+		defColumnProps:  DefaultColumnProperties(),
+	}
+}
+
+// NewWriterProperties takes a list of options for building the properties. If multiple options are used which conflict
+// then the last option is the one which will take effect. If no WriterProperty options are provided, then the default
+// properties will be utilized for writing.
+//
+// The Default properties use the following constants:
+//	Allocator:					memory.DefaultAllocator
+// 	DictionaryPageSize: DefaultDictionaryPageSizeLimit
+//	BatchSize:					DefaultWriteBatchSize
+//	MaxRowGroupLength:	DefaultMaxRowGroupLen
+//	PageSize:						DefaultDataPageSize
+//	ParquetVersion:			V1
+//	DataPageVersion:		DataPageV1
+//	CreatedBy:					DefaultCreatedBy
+func NewWriterProperties(opts ...WriterProperty) *WriterProperties {
+	cfg := writerPropConfig{
+		wr:            defaultWriterProperties(),
+		encodings:     make(map[string]Encoding),
+		codecs:        make(map[string]compress.Compression),
+		compressLevel: make(map[string]int),
+		dictEnabled:   make(map[string]bool),
+		statsEnabled:  make(map[string]bool),
+	}
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	cfg.wr.columnProps = make(map[string]*ColumnProperties)
+	get := func(key string) *ColumnProperties {
+		if p, ok := cfg.wr.columnProps[key]; ok {
+			return p
+		}
+		cfg.wr.columnProps[key] = new(ColumnProperties)
+		*cfg.wr.columnProps[key] = cfg.wr.defColumnProps
+		return cfg.wr.columnProps[key]
+	}
+
+	for key, value := range cfg.encodings {
+		get(key).Encoding = value
+	}
+
+	for key, value := range cfg.codecs {
+		get(key).Codec = value
+	}
+
+	for key, value := range cfg.compressLevel {
+		get(key).CompressionLevel = value
+	}
+
+	for key, value := range cfg.dictEnabled {
+		get(key).DictionaryEnabled = value
+	}
+
+	for key, value := range cfg.statsEnabled {
+		get(key).StatsEnabled = value
+	}
+	return cfg.wr
+}
+
+// FileEncryptionProperties returns the current encryption properties that were
+// used to create the writer properties.
+func (w *WriterProperties) FileEncryptionProperties() *FileEncryptionProperties {
+	return w.encryptionProps
+}
+
+func (w *WriterProperties) Allocator() memory.Allocator      { return w.mem }
+func (w *WriterProperties) CreatedBy() string                { return w.createdBy }
+func (w *WriterProperties) WriteBatchSize() int64            { return w.batchSize }
+func (w *WriterProperties) DataPageSize() int64              { return w.pageSize }
+func (w *WriterProperties) DictionaryPageSizeLimit() int64   { return w.dictPagesize }
+func (w *WriterProperties) Version() Version                 { return w.parquetVersion }
+func (w *WriterProperties) DataPageVersion() DataPageVersion { return w.dataPageVersion }
+func (w *WriterProperties) MaxRowGroupLength() int64         { return w.maxRowGroupLen }
+
+// Compression returns the default compression type that will be used for any columns that don't
+// have a specific compression defined.
+func (w *WriterProperties) Compression() compress.Compression { return w.defColumnProps.Codec }
+
+// CompressionFor will return the compression type that is specified for the given column path, or
+// the default compression codec if there isn't one specific to this column.
+func (w *WriterProperties) CompressionFor(path string) compress.Compression {
+	if p, ok := w.columnProps[path]; ok {
+		return p.Codec
+	}
+	return w.defColumnProps.Codec
+}
+
+//CompressionPath is the same as CompressionFor but takes a ColumnPath
+func (w *WriterProperties) CompressionPath(path ColumnPath) compress.Compression {
+	return w.CompressionFor(path.String())
+}
+
+// CompressionLevel returns the default compression level that will be used for any column
+// that doesn't have a compression level specified for it.
+func (w *WriterProperties) CompressionLevel() int { return w.defColumnProps.CompressionLevel }
+
+// CompressionLevelFor returns the compression level that will be utilized for the given column,
+// or the default compression level if the column doesn't have a specific level specified.
+func (w *WriterProperties) CompressionLevelFor(path string) int {
+	if p, ok := w.columnProps[path]; ok {
+		return p.CompressionLevel
+	}
+	return w.defColumnProps.CompressionLevel
+}
+
+// CompressionLevelPath is the same as CompressionLevelFor but takes a ColumnPath object
+func (w *WriterProperties) CompressionLevelPath(path ColumnPath) int {
+	return w.CompressionLevelFor(path.String())
+}
+
+// Encoding returns the default encoding that will be utilized for any columns which don't have a different value
+// specified.
+func (w *WriterProperties) Encoding() Encoding { return w.defColumnProps.Encoding }
+
+// EncodingFor returns the encoding that will be used for the given column path, or the default encoding if there
+// isn't one specified for this column.
+func (w *WriterProperties) EncodingFor(path string) Encoding {
+	if p, ok := w.columnProps[path]; ok {
+		return p.Encoding
+	}
+	return w.defColumnProps.Encoding
+}
+
+// EncodingPath is the same as EncodingFor but takes a ColumnPath object
+func (w *WriterProperties) EncodingPath(path ColumnPath) Encoding {
+	return w.EncodingFor(path.String())
+}
+
+// DictionaryIndexEncoding returns which encoding will be used for the Dictionary Index values based on the
+// parquet version. V1 uses PlainDict and V2 uses RLEDict
+func (w *WriterProperties) DictionaryIndexEncoding() Encoding {
+	if w.parquetVersion == V1 {
+		return Encodings.PlainDict
+	}
+	return Encodings.RLEDict
+}
+
+// DictionaryPageEncoding returns the encoding that will be utilized for the DictionaryPage itself based on the parquet
+// version. V1 uses PlainDict, v2 uses Plain
+func (w *WriterProperties) DictionaryPageEncoding() Encoding {
+	if w.parquetVersion == V1 {
+		return Encodings.PlainDict
+	}
+	return Encodings.Plain
+}
+
+// DictionaryEnabled returns the default value as for whether or not dictionary encoding will be utilized for columns
+// that aren't separately specified.
+func (w *WriterProperties) DictionaryEnabled() bool { return w.defColumnProps.DictionaryEnabled }
+
+// DictionaryEnabledFor returns whether or not dictionary encoding will be used for the specified column when writing
+// or the default value if the column was not separately specified.
+func (w *WriterProperties) DictionaryEnabledFor(path string) bool {
+	if p, ok := w.columnProps[path]; ok {
+		return p.DictionaryEnabled
+	}
+	return w.defColumnProps.DictionaryEnabled
+}
+
+// DictionaryEnabledPath is the same as DictionaryEnabledFor but takes a ColumnPath object.
+func (w *WriterProperties) DictionaryEnabledPath(path ColumnPath) bool {
+	return w.DictionaryEnabledFor(path.String())
+}
+
+// StatisticsEnabled returns the default value for whether or not stats are enabled to be written for columns
+// that aren't separately specified.
+func (w *WriterProperties) StatisticsEnabled() bool { return w.defColumnProps.StatsEnabled }
+
+// StatisticsEnabledFor returns whether stats will be written for the given column path, or the default value if
+// it wasn't separately specified.
+func (w *WriterProperties) StatisticsEnabledFor(path string) bool {
+	if p, ok := w.columnProps[path]; ok {
+		return p.StatsEnabled
+	}
+	return w.defColumnProps.StatsEnabled
+}
+
+// StatisticsEnabledPath is the same as StatisticsEnabledFor but takes a ColumnPath object.
+func (w *WriterProperties) StatisticsEnabledPath(path ColumnPath) bool {
+	return w.StatisticsEnabledFor(path.String())
+}
+
+// MaxStatsSize returns the default maximum size for stats
+func (w *WriterProperties) MaxStatsSize() int64 { return w.defColumnProps.MaxStatsSize }
+
+// MaxStatsSizeFor returns the maximum stat size for the given column path
+func (w *WriterProperties) MaxStatsSizeFor(path string) int64 {
+	if p, ok := w.columnProps[path]; ok {
+		return p.MaxStatsSize
+	}
+	return w.defColumnProps.MaxStatsSize
+}
+
+// MaxStatsSizePath is the same as MaxStatsSizeFor but takes a ColumnPath
+func (w *WriterProperties) MaxStatsSizePath(path ColumnPath) int64 {
+	return w.MaxStatsSizeFor(path.String())
+}
+
+// ColumnEncryptionProperties returns the specific properties for encryption that will be used for the given column path
+func (w *WriterProperties) ColumnEncryptionProperties(path string) *ColumnEncryptionProperties {
+	if w.encryptionProps != nil {
+		return w.encryptionProps.ColumnEncryptionProperties(path)
+	}
+	return nil
+}

From 715cb5767e18cee1a0e15a4f43916c9710c3633d Mon Sep 17 00:00:00 2001
From: liyafan82 <fan_li_ya@foxmail.com>
Date: Thu, 15 Apr 2021 20:21:03 -0700
Subject: [PATCH 074/719] ARROW-11999: [Java] Support parallel vector element
 search with user-specified comparator

This is in response to the discussion in https://github.com/apache/arrow/pull/5631#discussion_r339110228

Currently, we only support parallel search with `RangeEqualsVisitor`, which does not support user-specified comparators.
We want to provide the functionality in this issue to support wider range of use cases.

Closes #9736 from liyafan82/fly_0317_par

Authored-by: liyafan82 <fan_li_ya@foxmail.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 .../algorithm/search/ParallelSearcher.java    | 77 +++++++++++++++++--
 .../search/TestParallelSearcher.java          | 52 +++++++++++--
 2 files changed, 116 insertions(+), 13 deletions(-)

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
index 39678a17686..e93eb2c3dea 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -66,8 +66,19 @@ public ParallelSearcher(V vector, ExecutorService threadPool, int numThreads) {
     this.numThreads = numThreads;
   }
 
+  private CompletableFuture<Boolean>[] initSearch() {
+    keyPosition = -1;
+    final CompletableFuture<Boolean>[] futures = new CompletableFuture[numThreads];
+    for (int i = 0; i < futures.length; i++) {
+      futures[i] = new CompletableFuture<>();
+    }
+    return futures;
+  }
+
   /**
-   * Search for the key in the target vector.
+   * Search for the key in the target vector. The element-wise comparison is based on
+   * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise
+   * comparison: equal and un-equal.
    * @param keyVector the vector containing the search key.
    * @param keyIndex the index of the search key in the key vector.
    * @return the position of a matched value in the target vector,
@@ -80,13 +91,8 @@ public ParallelSearcher(V vector, ExecutorService threadPool, int numThreads) {
    * @throws InterruptedException if a thread is interrupted.
    */
   public int search(V keyVector, int keyIndex) throws ExecutionException, InterruptedException {
-    keyPosition = -1;
+    final CompletableFuture<Boolean>[] futures = initSearch();
     final int valueCount = vector.getValueCount();
-    final CompletableFuture<Boolean>[] futures = new CompletableFuture[numThreads];
-    for (int i = 0; i < futures.length; i++) {
-      futures[i] = new CompletableFuture<>();
-    }
-
     for (int i = 0; i < numThreads; i++) {
       final int tid = i;
       threadPool.submit(() -> {
@@ -124,4 +130,61 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
     CompletableFuture.allOf(futures).get();
     return keyPosition;
   }
+
+  /**
+   * Search for the key in the target vector. The element-wise comparison is based on
+   * {@link VectorValueComparator}, so there are three possible results for each element-wise
+   * comparison: less than, equal to and greater than.
+   * @param keyVector the vector containing the search key.
+   * @param keyIndex the index of the search key in the key vector.
+   * @param comparator the comparator for comparing the key against vector elements.
+   * @return the position of a matched value in the target vector,
+   *     or -1 if none is found. Please note that if there are multiple
+   *     matches of the key in the target vector, this method makes no
+   *     guarantees about which instance is returned.
+   *     For an alternative search implementation that always finds the first match of the key,
+   *     see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+   * @throws ExecutionException if an exception occurs in a thread.
+   * @throws InterruptedException if a thread is interrupted.
+   */
+  public int search(
+      V keyVector, int keyIndex, VectorValueComparator<V> comparator) throws ExecutionException, InterruptedException {
+    final CompletableFuture<Boolean>[] futures = initSearch();
+    final int valueCount = vector.getValueCount();
+    for (int i = 0; i < numThreads; i++) {
+      final int tid = i;
+      threadPool.submit(() -> {
+        // convert to long to avoid overflow
+        int start = (int) (((long) valueCount) * tid / numThreads);
+        int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+        if (start >= end) {
+          // no data assigned to this task.
+          futures[tid].complete(false);
+          return;
+        }
+
+        VectorValueComparator<V> localComparator = comparator.createNew();
+        localComparator.attachVectors(vector, keyVector);
+        for (int pos = start; pos < end; pos++) {
+          if (keyPosition != -1) {
+            // the key has been found by another task
+            futures[tid].complete(false);
+            return;
+          }
+          if (localComparator.compare(pos, keyIndex) == 0) {
+            keyPosition = pos;
+            futures[tid].complete(true);
+            return;
+          }
+        }
+
+        // no match value is found.
+        futures[tid].complete(false);
+      });
+    }
+
+    CompletableFuture.allOf(futures).get();
+    return keyPosition;
+  }
 }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
index a01cc1af3bb..767935aaa4b 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
@@ -19,10 +19,15 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.IntVector;
@@ -30,24 +35,51 @@
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
 /**
  * Test cases for {@link ParallelSearcher}.
  */
+@RunWith(Parameterized.class)
 public class TestParallelSearcher {
 
-  private static final int THREAD_COUNT = 10;
+  private enum ComparatorType {
+    EqualityComparator,
+    OrderingComparator;
+  }
 
   private static final int VECTOR_LENGTH = 10000;
 
+  private final int threadCount;
+
   private BufferAllocator allocator;
 
   private ExecutorService threadPool;
 
+  private final ComparatorType comparatorType;
+
+  public TestParallelSearcher(ComparatorType comparatorType, int threadCount) {
+    this.comparatorType = comparatorType;
+    this.threadCount = threadCount;
+  }
+
+  @Parameterized.Parameters(name = "comparator type = {0}, thread count = {1}")
+  public static Collection<Object[]> getComparatorName() {
+    List<Object[]> params = new ArrayList<>();
+    int[] threadCounts = {1, 2, 5, 10, 20, 50};
+    for (ComparatorType type : ComparatorType.values()) {
+      for (int count : threadCounts) {
+        params.add(new Object[] {type, count});
+      }
+    }
+    return params;
+  }
+
   @Before
   public void prepare() {
     allocator = new RootAllocator(1024 * 1024);
-    threadPool = Executors.newFixedThreadPool(THREAD_COUNT);
+    threadPool = Executors.newFixedThreadPool(threadCount);
   }
 
   @After
@@ -63,6 +95,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
       targetVector.allocateNew(VECTOR_LENGTH);
       keyVector.allocateNew(VECTOR_LENGTH);
 
+      // if we are comparing elements using equality semantics, we do not need a comparator here.
+      VectorValueComparator<IntVector> comparator = comparatorType == ComparatorType.EqualityComparator ? null
+          : DefaultVectorComparators.createDefaultComparator(targetVector);
+
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         targetVector.set(i, i);
         keyVector.set(i, i * 2);
@@ -70,9 +106,9 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
       targetVector.setValueCount(VECTOR_LENGTH);
       keyVector.setValueCount(VECTOR_LENGTH);
 
-      ParallelSearcher<IntVector> searcher = new ParallelSearcher<>(targetVector, threadPool, THREAD_COUNT);
+      ParallelSearcher<IntVector> searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        int pos = searcher.search(keyVector, i);
+        int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
         if (i * 2 < VECTOR_LENGTH) {
           assertEquals(i * 2, pos);
         } else {
@@ -89,6 +125,10 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
       targetVector.allocateNew(VECTOR_LENGTH);
       keyVector.allocateNew(VECTOR_LENGTH);
 
+      // if we are comparing elements using equality semantics, we do not need a comparator here.
+      VectorValueComparator<VarCharVector> comparator = comparatorType == ComparatorType.EqualityComparator ? null
+          : DefaultVectorComparators.createDefaultComparator(targetVector);
+
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         targetVector.setSafe(i, String.valueOf(i).getBytes());
         keyVector.setSafe(i, String.valueOf(i * 2).getBytes());
@@ -96,9 +136,9 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
       targetVector.setValueCount(VECTOR_LENGTH);
       keyVector.setValueCount(VECTOR_LENGTH);
 
-      ParallelSearcher<VarCharVector> searcher = new ParallelSearcher<>(targetVector, threadPool, THREAD_COUNT);
+      ParallelSearcher<VarCharVector> searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        int pos = searcher.search(keyVector, i);
+        int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
         if (i * 2 < VECTOR_LENGTH) {
           assertEquals(i * 2, pos);
         } else {

From 66aa3e7c365a8d4c4eca6e23668f2988e714b493 Mon Sep 17 00:00:00 2001
From: Sathis Kumar <sathis.kumar@udemy.com>
Date: Fri, 16 Apr 2021 06:58:46 -0600
Subject: [PATCH 075/719] ARROW-12380: [Rust] [Ballista] Basic scheduler ui

I have placed it in the `ui/scheduler` directory. You can run it using `yarn && yarn start` from within that directory.

Closes #10026 from msathis/ARROW-12380-scheduler-ui

Authored-by: Sathis Kumar <sathis.kumar@udemy.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 dev/release/rat_exclude_files.txt             |     2 +
 .../arrow-flight/src/arrow.flight.protocol.rs |    14 +-
 rust/ballista/rust/scheduler/README.md        |     8 +
 .../rust/scheduler/src/api/handlers.rs        |    41 +-
 rust/ballista/rust/scheduler/src/api/mod.rs   |     4 +-
 rust/ballista/rust/scheduler/src/lib.rs       |    11 +-
 rust/ballista/rust/scheduler/src/main.rs      |     7 +-
 rust/ballista/ui/scheduler/.gitignore         |    23 +
 rust/ballista/ui/scheduler/README.md          |    45 +
 rust/ballista/ui/scheduler/index.d.ts         |    18 +
 rust/ballista/ui/scheduler/package.json       |    58 +
 rust/ballista/ui/scheduler/public/favicon.ico |   Bin 0 -> 3870 bytes
 rust/ballista/ui/scheduler/public/index.html  |    62 +
 rust/ballista/ui/scheduler/public/logo192.png |   Bin 0 -> 5347 bytes
 rust/ballista/ui/scheduler/public/logo512.png |   Bin 0 -> 9664 bytes
 .../ui/scheduler/public/manifest.json         |    25 +
 rust/ballista/ui/scheduler/public/robots.txt  |    20 +
 .../ui/scheduler/react-table-config.d.ts      |   137 +
 rust/ballista/ui/scheduler/src/App.css        |    18 +
 rust/ballista/ui/scheduler/src/App.test.tsx   |    26 +
 rust/ballista/ui/scheduler/src/App.tsx        |    97 +
 .../ui/scheduler/src/components/DataTable.tsx |   131 +
 .../ui/scheduler/src/components/Empty.tsx     |    36 +
 .../ui/scheduler/src/components/Footer.tsx    |    28 +
 .../ui/scheduler/src/components/Header.tsx    |    82 +
 .../ui/scheduler/src/components/NodesList.tsx |    71 +
 .../scheduler/src/components/QueriesList.tsx  |   115 +
 .../ui/scheduler/src/components/Summary.tsx   |    89 +
 .../ui/scheduler/src/components/logo.svg      |    25 +
 rust/ballista/ui/scheduler/src/index.css      |    32 +
 rust/ballista/ui/scheduler/src/index.tsx      |    38 +
 .../ui/scheduler/src/react-app-env.d.ts       |    18 +
 .../ui/scheduler/src/reportWebVitals.ts       |    32 +
 rust/ballista/ui/scheduler/src/setupTests.ts  |    22 +
 rust/ballista/ui/scheduler/tsconfig.json      |    28 +
 rust/ballista/ui/scheduler/yarn.lock          | 12431 ++++++++++++++++
 36 files changed, 13768 insertions(+), 26 deletions(-)
 create mode 100644 rust/ballista/ui/scheduler/.gitignore
 create mode 100644 rust/ballista/ui/scheduler/README.md
 create mode 100644 rust/ballista/ui/scheduler/index.d.ts
 create mode 100644 rust/ballista/ui/scheduler/package.json
 create mode 100644 rust/ballista/ui/scheduler/public/favicon.ico
 create mode 100644 rust/ballista/ui/scheduler/public/index.html
 create mode 100644 rust/ballista/ui/scheduler/public/logo192.png
 create mode 100644 rust/ballista/ui/scheduler/public/logo512.png
 create mode 100644 rust/ballista/ui/scheduler/public/manifest.json
 create mode 100644 rust/ballista/ui/scheduler/public/robots.txt
 create mode 100644 rust/ballista/ui/scheduler/react-table-config.d.ts
 create mode 100644 rust/ballista/ui/scheduler/src/App.css
 create mode 100644 rust/ballista/ui/scheduler/src/App.test.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/App.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/DataTable.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/Empty.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/Footer.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/Header.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/NodesList.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/QueriesList.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/Summary.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/components/logo.svg
 create mode 100644 rust/ballista/ui/scheduler/src/index.css
 create mode 100644 rust/ballista/ui/scheduler/src/index.tsx
 create mode 100644 rust/ballista/ui/scheduler/src/react-app-env.d.ts
 create mode 100644 rust/ballista/ui/scheduler/src/reportWebVitals.ts
 create mode 100644 rust/ballista/ui/scheduler/src/setupTests.ts
 create mode 100644 rust/ballista/ui/scheduler/tsconfig.json
 create mode 100644 rust/ballista/ui/scheduler/yarn.lock

diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 3dc3ad5869c..68f5668098e 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -169,6 +169,7 @@ csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
 *.sgml
 *.css
 *.png
+*.ico
 *.svg
 *.devhelp2
 *.scss
@@ -203,3 +204,4 @@ julia/Arrow/docs/src/manual.md
 julia/Arrow/docs/src/reference.md
 rust/ballista/rust/benchmarks/tpch/queries/q*.sql
 rust/ballista/rust/scheduler/testdata/*
+rust/ballista/ui/scheduler/yarn.lock
diff --git a/rust/arrow-flight/src/arrow.flight.protocol.rs b/rust/arrow-flight/src/arrow.flight.protocol.rs
index 2a87d7b2683..5fce526ff6e 100644
--- a/rust/arrow-flight/src/arrow.flight.protocol.rs
+++ b/rust/arrow-flight/src/arrow.flight.protocol.rs
@@ -499,7 +499,7 @@ pub mod flight_service_server {
     #[async_trait]
     pub trait FlightService: Send + Sync + 'static {
         #[doc = "Server streaming response type for the Handshake method."]
-        type HandshakeStream: Stream<Item = Result<super::HandshakeResponse, tonic::Status>>
+        type HandshakeStream: futures_core::Stream<Item = Result<super::HandshakeResponse, tonic::Status>>
             + Send
             + Sync
             + 'static;
@@ -513,7 +513,7 @@ pub mod flight_service_server {
             request: tonic::Request<tonic::Streaming<super::HandshakeRequest>>,
         ) -> Result<tonic::Response<Self::HandshakeStream>, tonic::Status>;
         #[doc = "Server streaming response type for the ListFlights method."]
-        type ListFlightsStream: Stream<Item = Result<super::FlightInfo, tonic::Status>>
+        type ListFlightsStream: futures_core::Stream<Item = Result<super::FlightInfo, tonic::Status>>
             + Send
             + Sync
             + 'static;
@@ -553,7 +553,7 @@ pub mod flight_service_server {
             request: tonic::Request<super::FlightDescriptor>,
         ) -> Result<tonic::Response<super::SchemaResult>, tonic::Status>;
         #[doc = "Server streaming response type for the DoGet method."]
-        type DoGetStream: Stream<Item = Result<super::FlightData, tonic::Status>>
+        type DoGetStream: futures_core::Stream<Item = Result<super::FlightData, tonic::Status>>
             + Send
             + Sync
             + 'static;
@@ -567,7 +567,7 @@ pub mod flight_service_server {
             request: tonic::Request<super::Ticket>,
         ) -> Result<tonic::Response<Self::DoGetStream>, tonic::Status>;
         #[doc = "Server streaming response type for the DoPut method."]
-        type DoPutStream: Stream<Item = Result<super::PutResult, tonic::Status>>
+        type DoPutStream: futures_core::Stream<Item = Result<super::PutResult, tonic::Status>>
             + Send
             + Sync
             + 'static;
@@ -583,7 +583,7 @@ pub mod flight_service_server {
             request: tonic::Request<tonic::Streaming<super::FlightData>>,
         ) -> Result<tonic::Response<Self::DoPutStream>, tonic::Status>;
         #[doc = "Server streaming response type for the DoExchange method."]
-        type DoExchangeStream: Stream<Item = Result<super::FlightData, tonic::Status>>
+        type DoExchangeStream: futures_core::Stream<Item = Result<super::FlightData, tonic::Status>>
             + Send
             + Sync
             + 'static;
@@ -598,7 +598,7 @@ pub mod flight_service_server {
             request: tonic::Request<tonic::Streaming<super::FlightData>>,
         ) -> Result<tonic::Response<Self::DoExchangeStream>, tonic::Status>;
         #[doc = "Server streaming response type for the DoAction method."]
-        type DoActionStream: Stream<Item = Result<super::Result, tonic::Status>>
+        type DoActionStream: futures_core::Stream<Item = Result<super::Result, tonic::Status>>
             + Send
             + Sync
             + 'static;
@@ -614,7 +614,7 @@ pub mod flight_service_server {
             request: tonic::Request<super::Action>,
         ) -> Result<tonic::Response<Self::DoActionStream>, tonic::Status>;
         #[doc = "Server streaming response type for the ListActions method."]
-        type ListActionsStream: Stream<Item = Result<super::ActionType, tonic::Status>>
+        type ListActionsStream: futures_core::Stream<Item = Result<super::ActionType, tonic::Status>>
             + Send
             + Sync
             + 'static;
diff --git a/rust/ballista/rust/scheduler/README.md b/rust/ballista/rust/scheduler/README.md
index c2cc090bd67..d87eec30e23 100644
--- a/rust/ballista/rust/scheduler/README.md
+++ b/rust/ballista/rust/scheduler/README.md
@@ -41,3 +41,11 @@ curl --request GET \
   --url http://localhost:50050/executors \
   --header 'Accept: application/json'
 ```
+
+## Scheduler UI
+A basic ui for the scheduler is in `ui/scheduler` of the ballista repo. 
+It can be started using the following [yarn](https://yarnpkg.com/) command
+
+```bash
+yarn && yarn start
+```
diff --git a/rust/ballista/rust/scheduler/src/api/handlers.rs b/rust/ballista/rust/scheduler/src/api/handlers.rs
index c3450215007..7293558d0cc 100644
--- a/rust/ballista/rust/scheduler/src/api/handlers.rs
+++ b/rust/ballista/rust/scheduler/src/api/handlers.rs
@@ -19,22 +19,37 @@ use ballista_core::serde::scheduler::ExecutorMeta;
 use tonic::{Request, Response};
 use warp::Rejection;
 
-pub(crate) async fn list_executors_data(
+#[derive(Debug, serde::Serialize)]
+struct StateResponse {
+    executors: Vec<ExecutorMeta>,
+    started: u128,
+    version: String,
+}
+
+pub(crate) async fn scheduler_state(
     data_server: SchedulerServer,
 ) -> Result<impl warp::Reply, Rejection> {
     let data: Result<Response<GetExecutorMetadataResult>, tonic::Status> = data_server
         .get_executors_metadata(Request::new(GetExecutorMetadataParams {}))
         .await;
-    let result = data.unwrap();
-    let res: &GetExecutorMetadataResult = result.get_ref();
-    let vec: &Vec<ExecutorMetadata> = &res.metadata;
-    let metadata: Vec<ExecutorMeta> = vec
-        .iter()
-        .map(|v: &ExecutorMetadata| ExecutorMeta {
-            host: v.host.clone(),
-            port: v.port as u16,
-            id: v.id.clone(),
-        })
-        .collect();
-    Ok(warp::reply::json(&metadata))
+    let metadata: Vec<ExecutorMeta> = match data {
+        Ok(result) => {
+            let res: &GetExecutorMetadataResult = result.get_ref();
+            let vec: &Vec<ExecutorMetadata> = &res.metadata;
+            vec.iter()
+                .map(|v: &ExecutorMetadata| ExecutorMeta {
+                    host: v.host.clone(),
+                    port: v.port as u16,
+                    id: v.id.clone(),
+                })
+                .collect()
+        }
+        Err(_) => vec![],
+    };
+    let response = StateResponse {
+        executors: metadata,
+        started: data_server.start_time,
+        version: data_server.version.clone(),
+    };
+    Ok(warp::reply::json(&response))
 }
diff --git a/rust/ballista/rust/scheduler/src/api/mod.rs b/rust/ballista/rust/scheduler/src/api/mod.rs
index 9e14378564a..45f281a67a3 100644
--- a/rust/ballista/rust/scheduler/src/api/mod.rs
+++ b/rust/ballista/rust/scheduler/src/api/mod.rs
@@ -80,8 +80,8 @@ fn with_data_server(
 }
 
 pub fn get_routes(scheduler_server: SchedulerServer) -> BoxedFilter<(impl Reply,)> {
-    let routes = warp::path("executors")
+    let routes = warp::path("state")
         .and(with_data_server(scheduler_server))
-        .and_then(handlers::list_executors_data);
+        .and_then(handlers::scheduler_state);
     routes.boxed()
 }
diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
index 1bd4722e5cb..54733e32e5e 100644
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ b/rust/ballista/rust/scheduler/src/lib.rs
@@ -67,19 +67,28 @@ use tonic::{Request, Response};
 
 use self::state::{ConfigBackendClient, SchedulerState};
 use datafusion::physical_plan::parquet::ParquetExec;
-use std::time::Instant;
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
 
 #[derive(Clone)]
 pub struct SchedulerServer {
     state: SchedulerState,
     namespace: String,
+    start_time: u128,
+    version: String,
 }
 
 impl SchedulerServer {
     pub fn new(config: Arc<dyn ConfigBackendClient>, namespace: String) -> Self {
+        const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
+
         Self {
             state: SchedulerState::new(config),
             namespace,
+            start_time: SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap()
+                .as_millis(),
+            version: VERSION.unwrap_or("Unknown").to_string(),
         }
     }
 }
diff --git a/rust/ballista/rust/scheduler/src/main.rs b/rust/ballista/rust/scheduler/src/main.rs
index 6f746292f65..205023a4c34 100644
--- a/rust/ballista/rust/scheduler/src/main.rs
+++ b/rust/ballista/rust/scheduler/src/main.rs
@@ -61,17 +61,18 @@ async fn start_server(
         "Ballista v{} Scheduler listening on {:?}",
         BALLISTA_VERSION, addr
     );
+
+    let scheduler_server =
+        SchedulerServer::new(config_backend.clone(), namespace.clone());
     Ok(Server::bind(&addr)
         .serve(make_service_fn(move |_| {
-            let scheduler_server =
-                SchedulerServer::new(config_backend.clone(), namespace.clone());
             let scheduler_grpc_server =
                 SchedulerGrpcServer::new(scheduler_server.clone());
 
             let mut tonic = TonicServer::builder()
                 .add_service(scheduler_grpc_server)
                 .into_service();
-            let mut warp = warp::service(get_routes(scheduler_server));
+            let mut warp = warp::service(get_routes(scheduler_server.clone()));
 
             future::ok::<_, Infallible>(tower::service_fn(
                 move |req: hyper::Request<hyper::Body>| {
diff --git a/rust/ballista/ui/scheduler/.gitignore b/rust/ballista/ui/scheduler/.gitignore
new file mode 100644
index 00000000000..4d29575de80
--- /dev/null
+++ b/rust/ballista/ui/scheduler/.gitignore
@@ -0,0 +1,23 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# production
+/build
+
+# misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
diff --git a/rust/ballista/ui/scheduler/README.md b/rust/ballista/ui/scheduler/README.md
new file mode 100644
index 00000000000..1a196dab2fa
--- /dev/null
+++ b/rust/ballista/ui/scheduler/README.md
@@ -0,0 +1,45 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Ballista UI
+
+
+## Available Scripts
+
+In the project directory, you can run:
+
+### `yarn start`
+
+Runs the app in the development mode.\
+Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
+
+The page will reload if you make edits.\
+You will also see any lint errors in the console.
+
+### `yarn test`
+
+Launches the test runner in the interactive watch mode.\
+See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
+
+### `yarn build`
+
+Builds the app for production to the `build` folder.\
+It correctly bundles React in production mode and optimizes the build for the best performance.
+
+The build is minified and the filenames include the hashes.
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/index.d.ts b/rust/ballista/ui/scheduler/index.d.ts
new file mode 100644
index 00000000000..9f715810217
--- /dev/null
+++ b/rust/ballista/ui/scheduler/index.d.ts
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+declare module "@chakra-ui/icons";
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/package.json b/rust/ballista/ui/scheduler/package.json
new file mode 100644
index 00000000000..fe1e72d68f7
--- /dev/null
+++ b/rust/ballista/ui/scheduler/package.json
@@ -0,0 +1,58 @@
+{
+  "name": "scheduler-ui",
+  "version": "0.1.0",
+  "private": true,
+  "dependencies": {
+    "@chakra-ui/icons": "^1.0.5",
+    "@chakra-ui/react": "^1.3.3",
+    "@emotion/react": "^11.1.5",
+    "@emotion/styled": "^11.1.5",
+    "@testing-library/jest-dom": "^5.11.4",
+    "@testing-library/react": "^11.1.0",
+    "@testing-library/user-event": "^12.1.10",
+    "@types/jest": "^26.0.15",
+    "@types/node": "^12.0.0",
+    "@types/react": "^17.0.0",
+    "@types/react-dom": "^17.0.0",
+    "framer-motion": "^3.7.0",
+    "react": "^17.0.1",
+    "react-dom": "^17.0.1",
+    "react-icons": "^4.2.0",
+    "react-router-dom": "^5.2.0",
+    "react-scripts": "4.0.3",
+    "react-table": "^7.6.3",
+    "react-timeago": "^5.2.0",
+    "typescript": "^4.1.2",
+    "web-vitals": "^1.0.1"
+  },
+  "scripts": {
+    "start": "react-scripts start",
+    "build": "react-scripts build",
+    "test": "react-scripts test",
+    "eject": "react-scripts eject"
+  },
+  "eslintConfig": {
+    "extends": [
+      "react-app",
+      "react-app/jest"
+    ]
+  },
+  "browserslist": {
+    "production": [
+      ">0.2%",
+      "not dead",
+      "not op_mini all"
+    ],
+    "development": [
+      "last 1 chrome version",
+      "last 1 firefox version",
+      "last 1 safari version"
+    ]
+  },
+  "devDependencies": {
+    "@types/react-table": "^7.0.28",
+    "@types/react-timeago": "^4.1.2",
+    "prettier": "^2.2.1"
+  },
+  "proxy": "http://localhost:50050"
+}
diff --git a/rust/ballista/ui/scheduler/public/favicon.ico b/rust/ballista/ui/scheduler/public/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..a11777cc471a4344702741ab1c8a588998b1311a
GIT binary patch
literal 3870
zcma);c{J4h9>;%nil|2-o+rCuEF-(I%-F}ijC~o(k~HKAkr0)!FCj~d>`RtpD?8b;
zXOC1OD!V*IsqUwzbMF1)-gEDD=A573Z-&G7^LoAC9|WO7Xc0Cx1g^Zu0u_SjAPB<A
z`RksU20=ur5rmib*S!+l%h4eS4)^Q+0X>3vGa^W|sj)80f#V0@M_CAZTIO(t--xg=
z!sii`1giyH7EKL_+Wi0ab<)&E_0KD!3Rp2^HNB*K2@PHCs4PWSA32*-^7d{9nH2_E
zmC{C*N*)(vEF1_aMamw2A{ZH5aIDqiabnFdJ|y0%aS|64E$`s2ccV~3lR!u<){eS`
z#^Mx6o(iP1Ix%<jZ{9b!^*}EvPeMb_W#+3mPDk@<s^Oh#VM&a2^K;|820}`)peR}+
zJXt@j)V#7+Js?u;Lb#g$HH)e~Ro^hvl6KSLHq)Y3adj<OOD7?;gwee^gNzCxwD?IA
z8?*}E@b*IiVPUPv3?XqzLRv|{4)GKGzjS`)#ukL7W&K6BHn&1}P(skc69cJ?5^C+V
z@yyqLJg;V2Ul%gZ*?2WiB%bNfz1}F^UeTpW^N?dSY@NL3zDD+Tzk$Cg_=cj!M^ot0
zu%qYEoTU9K@kMP2H52_@<2On}lNX!oZ(oWk^?eSfXAa3M8S?8tzISV2V&9A+_-47Y
z>4dv`t@!&Za-K@mTm#vadc{0aWDV*_%EiGK7qMC_(`exc>-$Gb9~W!w_^{*pYRm~G
zBN{nA<l~YIv(*f3@JAyAZDXwp4d;meFk*lN;rx5VQze6aK!n?W9`Uc4pES2K&V3BC
zkTJK{PcIXdQ?hM;i7~K{wRSeU-w9_32aC}+7nN6r5o<=I@CyjQAS~;jsb7p#@eUT2
zkh1M~1>;cm^w$VWg1O^^<6vY`1XCD|s_zv*g*5&V#wv&s#h$xlUilPe4U@I&UXZbL
z0)%9Uj&@yd03n;!7do+bfixH^FeZ-Ema}s;DQX2gY+7g0s(9;`8GyvPY1*vxiF&|w
z>!vA~GA<~JUqH}d;DfBSi^IT*#lrzXl$fNpq0_T1tA+`A$1?(gLb?e#0>UELvljtQ
zK+*74m0jn&)5yk8mLBv;=@}c{t0ztT<<S2g5CX`xuBQVwYJOMIsv7paOX6ypYJL$a
zJ|Vy}#?V4i+kjXzBq)LcuJEA=z^Z2W4WQ1U@0}*!;_q<!3_ls8PhMM3ii*Ci+cF6=
zF!@E<x#%Yvb!P0>v;Avck$S6D`Z)^c0(jiwKhQsn|LDRY&w(Fmi91I7H6S;b0XM{e
zXp0~(T@k_r-!jkLwd1_Vre^v$G4|kh4}=Gi?$AaJ)3I+^m|Zyj#*?Kp@w(lQdJZf4
z#|IJW5z+S^e9@(6hW6N~{pj8|NO*>1)E=%?nNUAkmv~OY&ZV<PHdt%yO<W_%O|c-T
zC%nAvgv?#h>;m-%?pQ_11)hAr0oAwILrlsGawpxx4D43J&K=n+p3WLnlDsQ$b(9+4
z?mO^hmV^F8MV{4<aA#E-8o{y-by8hR1>Lx>(Q=aHhQ1){0d*(e&s%G=i5rq3;t{JC
zmgbn5Nkl)t@fPH$v;af26lyhH!k+#}_&aBK4baYPbZy$5aFx4}ka<ge$nBI}>&qxl
z$=Rh$W;U)>-=S-0=?7FH9dUAd2(q#4TCAHky!$^~;Dz^j|8_wuKc*YzfdA<NJp8x7
z`_}_7!m44CG`<6nLk0r3A}8e>ht@Q&ror?91Dm!N03=4=O!a)I*0q~p0g$Fm$pmr$
zb;wD;STDIi$@M%y1>p&_>%?UP($15gou_ue1u0!4(%81;qcIW8NyxFEvXpiJ|H4wz
z*mFT(qVx1FKufG11hByuX%lPk4t#WZ{>8ka2efjY`~;AL6vWyQKpJun2nRiZYDij$
zP>4jQXPaP$UC$yIVgGa)jDV;F0l^n(V=HMRB5)20V7&r$<L^Phf(W29K>jmk{UUIe
zVjKroK}JAbD>B`2cwNQ&GDLx8{pg`7hbA~grk|W6LgiZ`8y`{Iq0i>t!3p2}MS6S+
zO_ruKyAElt)rdS>CtF7j{&6rP-#c=7evGMt7B6`7HG|-(WL`bDUAjyn+k$mx$C<FS
ztTQ#rrhaxTX7@2TN#`pson<p6thk-4?N)^;_(Up!_V=f}<~kR)zD%o0iiqseIMZqh
zGU`kZGbN)qs{;AuZP?~%PajDo&b&7)!V!+|VO<ediN}{)OvR~sQ<ZYe%O|)8-DTKw
zTXmYP$VLa(Y>H;q2Dz4x;cPP$hW=`pFfLO)!jaCL@V2+F)So3}vg|%O*^T1j>C2lx
zsURO-zIJC$^$g2byVbRIo^w>UxK}74^TqUiRR#7s_X$e)$6iYG1(PcW7un-va-S&u
zHk9-6Zn&>T==A)lM^D~bk{&rFzCi35>UR!ZjQkdSiNX*-;l4z9j*7|q`TBl~Au`5&
z+c)*8?#-tgUR$Zd%Q3bs96w6k7q@#tUn`5rj+r@_sAVVLqco|6O{ILX&U-&-cbVa3
zY?ngHR@%l{;`ri%H*0EhBWrGjv!LE4db?HEWb5mu*t@{kv|XwK8?npOshmzf=vZA@
zVSN9sL~!sn?r(AK)Q7Jk2(|M67Uy3I{eRy<vjA)m;~)jV3DFGzL)eNbs@Sy80roD>
z_l&Y@A>;vjkWN5I2xvFFTLX0i+`{qz7C_@bo`ZUzDugfq4+>a3?1v%)O+YTd6@Ul7
zAfLfm=nhZ`)P~&v90$&UcF+yXm9sq!qCx3^9gzIcO|Y(js^Fj)Rvq>nQAHI92ap=P
z10A4@prk+<s7nQxb0&o?puD0BStB$NLIA{pVg<pW;2=HJ11ZpVkRkF89w0s#3ef?(
zka>AGWCb`2)dQYFuR$|H6iDE8p}9a?#nV2}LBCoCf(Xi2@szia7#gY>b|l!-U`c}@
zLdhvQjc!BdLJvYvzzzngnw51yRYCqh4}$oRCy-z|v3Hc*d|?^Wj=l~18*E~*cR_kU
z{XsxM1i{V*4GujHQ3DBpl2w4FgFR48Nma@HPgnyKoIEY-MqmMeY=I<%oG~l!f<+FN
z1ZY^;10j4M4<Vo=b&OyEfF!Y);yDCJas8bbVhK~blk}<IGME~h)6n~gdmqP>#HYXP
zw5eJpA_y(>uLQ~OucgxDLuf}fVs272FaMxhn4xnDGIyLXnw>Xsd^J8XhcWIwIoQ9}
z%FoSJTAGW(SRGwJwb=@pY7r$uQRK3Zd~XbxU)ts!4XsJrCycrWSI?e!IqwqIR8+Jh
zlRjZ`UO1I!BtJR_2~7AbkbSm%XQqxEPkz6BTGWx8e}nQ=w7bZ|eVP4?*Tb!$(R)iC
z9)&%bS*u(lXqzitAN)Oo=&Ytn>%Hzjc<5liuPi>zC_nw;Z0AE3Y$Jao_Q90R-gl~5
z_xAb2J%eArrC1CN4G$}-zVvCqF1;H;abAu6G*+PDHSYFx@Tdbfox*uEd3}BUyYY-l
zTfEsOqsi#f9^FoLO;ChK<554qkri&Av~SIM*{fEYRE?vH7pTAOmu2pz3X?Wn*!ROX
ztd54huAk&mFBemMooL33RV-*1f0Q3_(7hl$<#*|WF9P!;r;4_+X~k~uKEqdzZ$5Al
zV63X<s4EnR@itBNL^suG_KHV!zgrw6&Bq&`dNv>N<k2!6lBSoSAvQBw$a}{Sg*d5f
zJqeF6lxH}v-(s5jl(8V8Bv*((#aw(*iLTd8#?8FnMLG#}AorDTkK*%$ni#S{e-*jA
zjy$_xALPmR?$A)F?XdsKy|!Ue+lIR5=csS!ZPu7h{Nc+Sd%?*WHR`S5ByDdhQAsNO
zeyx0!D+fx-a_t<57fQ^<7*WTVDog0}WA0F2_h++_I?f`i|C>@)j$FN#cCD;ek1R#l
zv%pGrhB~KWgoCj%GT?%{@@o(AJGt*PG#l3i>lhmb_twKH^EYvacVY-6bsCl5*^~L0
zonm@lk2UvvTKr2RS%}T>^~EYqdL1q4nD%0n&Xqr^cK^`J5W;lRRB^R-O<zOhVxo?8
zb#fjP=~|*nH<rZsU&F20QcP*BR|)$r#sFFtYi6hV=2&f<YJ%JC0IAdIRdHjO(;S%3
zC;L{EqcHO368@u|<ql>8b&HENO||mo0xaD+S=I8RTlIfVgqN@SXDr2&-)we--K7w=
zJVU8?Z+7k9dy;s;^gDkQa`0nz6N{T?(A&Iz)2!DEecLyRa&FI!id#5Z7B*O2=PsR0
zEvc|8{NS^)!d)MDX(97Xw}m&kEO@5jqRaDZ!+%`wYOI<23q|&js`&o4xvjP7D_xv@
z5hEwpsp{HezI9!~6O{~)lLR@oF7?J7i>1|5a~UuoN=q&6N}EJPV_GD`&M*v8Y`^2j
zKII*d_@Fi$+i*YEW+Hbz<W=zs^XxM$!;??OHDS{MUEdOi9{rF;;#a0RO>n{iQk~yP
z>7N{S4)r*!NwQ`(qcN#8SRQsNK6>{)X12nbF`*7#ecO7I)Q$uZsV+xS4E7aUn+U(K
baj7?x%VD!5Cxk2YbYLNVeiXvvpMCWYo=by@

literal 0
HcmV?d00001

diff --git a/rust/ballista/ui/scheduler/public/index.html b/rust/ballista/ui/scheduler/public/index.html
new file mode 100644
index 00000000000..d902333f034
--- /dev/null
+++ b/rust/ballista/ui/scheduler/public/index.html
@@ -0,0 +1,62 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <meta name="theme-color" content="#000000" />
+    <meta
+      name="description"
+      content="Ballista Scheduler UI - manage nodes & tasks"
+    />
+    <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
+    <!--
+      manifest.json provides metadata used when your web app is installed on a
+      user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
+    -->
+    <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
+    <!--
+      Notice the use of %PUBLIC_URL% in the tags above.
+      It will be replaced with the URL of the `public` folder during the build.
+      Only files inside the `public` folder can be referenced from the HTML.
+
+      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
+      work correctly both with client-side routing and a non-root public URL.
+      Learn how to configure a non-root public URL by running `npm run build`.
+    -->
+    <title>Ballista UI</title>
+  </head>
+  <body>
+    <noscript>You need to enable JavaScript to run this app.</noscript>
+    <div id="root"></div>
+    <!--
+      This HTML file is a template.
+      If you open it directly in the browser, you will see an empty page.
+
+      You can add webfonts, meta tags, or analytics to this file.
+      The build step will place the bundled scripts into the <body> tag.
+
+      To begin the development, run `npm start` or `yarn start`.
+      To create a production bundle, use `npm run build` or `yarn build`.
+    -->
+  </body>
+</html>
diff --git a/rust/ballista/ui/scheduler/public/logo192.png b/rust/ballista/ui/scheduler/public/logo192.png
new file mode 100644
index 0000000000000000000000000000000000000000..fc44b0a3796c0e0a64c3d858ca038bd4570465d9
GIT binary patch
literal 5347
zcmZWtbyO6NvR-oO24RV%BvuJ&=?+<7=`LvyB&A_#M7mSDYw1v6DJkiYl9X<guIKOG
zci*|^ymP*p?>jT!%$dLEBTQ8R9|wd3008in6lFF3GV-6mLi?MoP_y~}QUnaDCHI#t
z7w^m$@6DI)|C8_jrT?q=f8D?0AM?L)Z}xAo^e^W>t$*Y0KlT5=@bBjT9k<?nGGBhQ
zSbehEe6l@wQk?yk{Pz@AcMVld0M;GTCE?4p`2*7=c-2|99C89m^UO&?Z>xb%-KNdk
zeOS1tKO#ChhG7%{ApNBzE2ZVNcxbrin#E1TiAw#BlUhXllzhN$qWez5l;h<YdrI9P
zS<6GhD3leYXm+LY=TY4I>+t^q#Eav8PhR2|T}y5kkflaK`ba-eoE+Z2q@o6P$)=&`
z+(8}+-McnNO>e#$Rr{32ngsZIAX>GH??tqgwUuUz6kjns|LjsB37zUEWd|(&O!)DY
zQLrq%Y>)Y8G`yYbYCx&aVHi@-vZ3|ebG!f$sTQqMgi0hWRJ^Wc+Ibv!udh_r%2|U)
zPi|E^PK?UE!>_4`f`1k4hqqj_$+d!EB_#IYt;f9)fBOumGNyglU(ofY`yHq4Y?B%-
zp&G!MRY<~ajTgIHErMe(Z8JG*;D-PJhd@RX@QatggM7+G(Lz8eZ;73)72Hfx5KDOE
zkT(m}i2;@X2AT5fW?qVp?@WgN$aT+f_6eo?IsLh;jscNRp|8H}Z9p_UBO^SJXpZew
zEK8fz|0Th%(Wr|KZBGTM4yxkA5CFdAj8=QSrT$fKW#tweUFqr0TZ9D<AY0)k`aBx_
z>~a5lF{)%-tTGMK^2tz(y2v$i%V8XAxIywrZCp=)83p(zIk6@S5AWl|Oa2hF`~~^W
zI;KeOSkw1O#TiQ8;U7OPXjZM|KrnN}9arP)m0v$c|L)lF`j_rpG(zW1Qjv$=^|p*f
z>)Na{D&>n`jOWMwB^TM}slgTEcjxTlUby89j1)|6ydRfWERn3|7Zd2&e7?!K&5G$x
z`5U3uFtn4~SZq|LjFVrz$3iln-+ucY4q$BC{CSm7Xe5c1J<=%Oagztj{ifpaZk_bQ
z9Sb-LaQMKp-qJA*bP6DzgE3`}*i1o3GKmo2pn@dj0;He}F=BgINo};6gQF8!n0ULZ
zL>kC0nPSFzlcB7p4<H52f8=qMn2=dQ!;xXD`6jdiBJ2^oNyt+16A(f<i;0;6ddGE;
zQ_@XTca6wSK(vK5KIKHUgO;P>1doao2F7%6IUTi_+!L`MM4o*#Y#0v~WiO8<L#fHx
zI?x?k(&T-}!n%}LcF+uCp*>uSeAUNp=vA2KaR&=jNR2iVwG>7t%sG2x_~yXzY)7K&
zk3p+O0AFZ1eu^T3s};B<g5t4vVJN7*?kWOGhv$ru8HW)vzo*&RaaqNEl3s?|)YGKH
zo63kVeX8eiiI8)8TVI<9KtqUE{ofuaw7$nnPUt#2l$=IC;iDij;8{QXU+uLWA9c~M
z?KiTNfE|~IwacG?sFBRbqY&vgc~Yaopzd0{Lg`-WSBW2a@&8=tG<r`Ob?)2siT;lG
zPzbHtt{(VS9*a_>%6TpJ6h-Y%B^*zT&SN7C=N;g|#dGIVMSOru3iv^SvO>h4<o1)Q
ztk-z{yw|{Hc59vTba3&#6I)4@Z!Z{_&vNhxwseBQJk-micCb@PRsZ-yUF*D=BME?9
zv0H77d40W7BL-#9+(qd9=V7!I>M=t-N1GSLLDqVTcgurco6)3&XpU!FP6Hlrmj}f$
zp95;b)>M~`kxuZF3r~a!rMf4|&1=uMG$;h^g=Kl;H&Np-(pFT9FF@++MMEx3R<rS-
zuB^adWYC5}jnG`RBeLHUV`KdbUu)vW8p$<wk-gJklNpkTMH8;qgxUtn=hQw+aXu!!
z7L<V8=#FBERK(Iy;KSCGArNoBxI|R+%WaYJr`}%uyfu_sJ6N4<E%!ST6&8KTNUgT0
zc=|z>BsK?AU0fPk-#mdR)Wdkj)`>ZMl#^<80kM87VvsI3r_c@_vX=fdQ`_9-d(xiI
z4K;1y1TiPj_RPh*SpDI7U~^QQ?%0&!$Sh#?x_@;ag)P}ZkAik{_WPB4rHyW#%>|Gs
zdbhyt=qQPA7`?h2_8T;-E6HI#im9K>au*(j4;kzwMSLgo6u*}-K`$_Gzgu&XE)udQ
zmQ72^eZd|vzI)~!20JV-v-T|<4@7ruqrj|o4=JJPlybwMg;M$Ud7>h6g()CT@wXm`
zbq=A(t;RJ^{Xxi*Ff~!|3!-l_PS{AyNAU~t{h;(N(PXMEf^R<?TfDfq&c>(B+ZVX3
z8y0;0A8hJYp@g+c*`>eTA|3Tgv9U8#BDTO9@a@gVMDxr(fVaEqL1tl?md{v^j8aUv
zm&%PX4^|<cvLF*HzSDMGV0iHPD$KT$lv#8;LIw%pD|^3Sh^Dv=f=y*RKZlzMkH(pA
zj!TBU#${|io0kf9sBt#c(IUh^Nw?i5pPmkQDL8Jo`ihi{POC*hzPF#9gJ%+*%r~)G
z*hzHaRQu;^GSmtSWXj1<&y{<D%B-d(ca1<IOKZoU>rX|?E4^CkplWWNv*OKM>DxPa
z!RJ)U^0-WJMi)Ksc!^ixOtw^egoAZZ2Cg;X7(5xZG7yL_;UJ#yp*ZD-;I^Z9qkP`}
zwCTs0*%rIVF1sgLervtnUo&brwz?6?PXRuOCS*JI-WL6GKy7-~yi0giTEMmDs_-UX
zo=+nFrW_EfTg>oY72_4Z0*uG>MnXP=c0VpT&*|rvv1i<G)%__T#O;}Vf68{=uDg!&
z$^|uGJ##zrX6I7v^ea{ysV}DJ_zrf_yt8+T?W6jw=&>StW;*^={rP<Gps5k_;Ey{*
zO|;e5vGXQ@h1vJKGQ+`NMmYBKV~Sx1US+h>1y?Hv+6R6bxFMkxpWkJ>m7Ba{>zc_q
zEefC3jsXdyS5??Mz7IET$Kft|EMNJIv7Ny8ZOcKnzf`K5Cd)&`-fTY#W&jnV0l2vt
z?Gqhic}l}mCv1yUEy$%DP}4AN;36$=7aNI^*AzV(eYGeJ(Px-j<^gSDp5dBAv2#?;
zcM<nu%TB#lev5kX<apfcKZZ%hDDU3kXtK*%;R839$alV38VWT{NJnhjF0GL`9rM2k
zVexf3KgbIO)>Xv#aj>%;MiG^q^$0MSg-(uTl!xm49dH!{X0){Ew7ThWV~Gtj7h%ZD
zVN-R-^7Cf0VH!8O)uUHPL2mO2tmE*cecwQv_5CzWeh)ykX8r5Hi`ehYo)d{Jnh&3p
z9ndXT$OW51#H5cFKa76c<%nNkP~<gM?)^OX$gL^Ky|we;1(h|2M#l;#h2Tj`PPB<E
z!n=Eb`hcI+66~)eT{SBi;R$mV2KtH}>FU93b5h-|Cb}ScHs@4Q#|}byWg;KDMJ#|l
zE=MKD<?0c>*F@HDBcX@~QJH%56eh~jfPO-uKm}~t7Vk<jf*+P>HxHT;)4sd+?Wc4*
z>CyR*{w@4(gnYRdFq=^(#-ytb^5ESD?x<0Skhb%Pt?npNW1m+Nv`tr9+qN<3H1f<%
zZvNEqyK5F<KUONUP{U|Z&`@-OcU{=Mb%iZGj^d}>gPsQ`QIu9P0x_}wJR~^CotL|n
zk?dn;tLRw9jJTur4uWoX6iMm914f0AJfB@C74a;_qRrAP4E7l890P&{v<}>_&GLrW
z)klculcg`?zJO~4;BBAa=POU%aN|pmZJn2{hA!d!*lwO%YSIzv8bTJ}=nhC^n<w3-
z-v~(ZP6zhLQOa--Vj)F~k0Ob}euB(Y8{v*v$;WjNYg|Cj9;VkDLv+N+V{aW7CW=3<
z$l$KzIhY7gI#*j8`VKQqt@ea1=E#0c5IVICnVAH{bp_LL1iIVw*Itgfi#Sq7_Q<98
zA1cq2BqF{g9$p1@&gq>}g(ld^rn#kq9Z3)z`k9lvV>y#!F4e{5c$tnr9M{V)0m(Z<
z#88vX6-AW7T2UUwW`g<;8I$Jb!R%z@rCcGT)-2k7&x9kZZT66}Ztid~6t0jKb&9mm
zpa}LCb`bz`{MzpZR#E*QuBiZXI#<`5qxx=&LMr-UUf~@dRk}YI2hbMsAMWOmDzYtm
zjof16D=mc`^B$+_bCG$$@R0t;e?~UkF?7<(vkb70*EQB1rfUWXh$j)R2)+dNAH5%R
zEBs^?N;UMdy}V};59Gu#0$q53$}|+q7CIGg_w_WlvE}AdqoS<7DY1LWS9?TrfmcvT
zaypmplwn=P4;a8-%l^e?f`OpGb}%(_mFsL&GywhyN(-VROj`4~V~9bGv%UhcA|YW%
zs{;nh@aDX11y^HOF<O&mcM-|{L00A>XB$a7#Sr3cEtNd4eLm@Y#fc&j)TGvbbMwze
zXtekX_wJqxe4NhuW$r}cNy|L{V=t#$%SuWEW)YZTH|!iT79k#?632OFse{+BT_gau
zJwQcbH{b}dzKO?^dV&3nTILYlGw{27UJ72ZN){BILd_HV_s$WfI2DC<9LIHFmtyw?
zQ;?MuK7g%Ym+4e^W#5}WDLpko%jPOC=aN)3!=8)s#Rnercak&b3ESRX3z{xfKBF8L
z5%CGkFmGO@x?_mPGlpEej!3!AMddChabyf~nJNZxx!D&{@xEb!TDyvqSj%Y5@A{}9
zRzoBn0?x}=krh{ok3Nn%e)#~uh;6jpezhA)ySb^b#E>73e*frBFu6IZ^D7Ii&rsiU
z%jzygxT-n*joJpY4o&8UXr2s%j^Q{?e-<G_^{J76Mq?|eHl2Q}TIfLz1H}I9fvS=c
zm*oIlbD9$tAnOWfM^xYqm2?aavV7kSFN~t(hX*&jXwdT)(-yUc1(^4$bB@D*Rg4fF
zGv*BCBqRz8`^LRBWj98zY@aQ`B||0ovS-9b;m0T<TXj-Hh5;G|U%0o&CSKp)@EmW@
zChzrZU(8@!L%c_f>voloX`4DQyEK+DmrZh8A$)<mmOk^JRtKa)h*12TXYBu6*SOO3
ze#NvXs$UpPLNJLqoTpKTRV%K2qK9}L;hCtucS=cqUWJH}3K=Em3K@4&JHx{iSFa8E
zqVHD4$k0g3oTIYd{?wVF<(2=uTWaH@w6)NT<>iWL#NO9+Y@!sO2f@rI!@jN@>HOA<
z?q2l{^%mY*PNx2FoX+A7X3N}(RV$B`g&N=e0uvAvEN1W^{*W?zT1i#fxuw10%~))J
zjx#gxoVlXREWZf4hRkgdHx5V_S*;p-y%JtGgQ4}lnA~MBz-AFdxUxU1RIT$`sal|X
zPB6sEVRjGbXIP0U+?rT|y5+ev&OMX*5C$n2SBPZr`jqzrmpVrNciR0e*Wm?fK6DY&
zl(XQZ60yWXV-|Ps!A<n+?vbcQJG{k7=<p3~`+h4Kd_>{EF;=_z(YAF=T(-MkJXUoX
zI{UMQDAV2}Ya?EisdEW;@pE6dt;j0fg5oT2dxCi{wqWJ<)|SR6fxX~5CzblPGr8cb
zUBVJ2CQd~3L?7yfTpLNbt)He1D>*KXI^GK%<`bq^cUq$Q@uJifG>p3LU(!H=C)aEL
zenk7pVg}0{dKU}&l)Y2Y2eFMdS(<j~2+yHkUVn{?C5dsJXag$OUKP&Vl2lSAJL_uI
ztevY_DRGdi^2bgn=Ll@Km6Uk>JS0}oZUuVaf2+K*YFNGHB`^YGcIpnBlMhO7d4@vV
zv(@N}(k#REdul8~fP+^F@ky*wt@~&|(&&meNO>rKDEnB{ykAZ}k>e@lad7to>Ao$B
zz<1(L=#J*u4_LB=8w+*{KFK^u00NAmeNN7pr+Pf+N*Zl^dO{LM-hMHyP6N!~`24jd
zXYP|Ze;dRXKdF2iJG$U{k=S86l@pytLx}$JFFs8e)*Vi?aVBtGJ3JZUj!~c{<R$n(
ziv;4$OAR*24{KJ-u{Mz2C%|m?Lu8%akP2m-8t9?^hJ};KWux0$T6Zc6vmNj_(P^97
znxN8^Fl+G8f)9)fW?Qt`NcWoFLaagnygy3@TZ@Gu-ER?^vZ;^CT6NUUf@sIN!o*#I
zTQDxUq9IS<Y5j7ng8Y<xvPo+D=~nKpr2LflB|zg+Vlqg|&Z#IWz8CdW!h`-uDggJR
z+f9qRnZ^{3x$+Kifl~IZh)$X4>(rw5>vuRF$`^p!P8w1B=O!skwkO5yd4_XuG^QVF
z`-r5K7(IPSiKQ2|U9+`@Js!<HL1C{aO{H=}S{3p}_Edej>g6sfJwAHVd|s?|mnC*q
zp|B|z)(8+mxXyxQ{8Pg3F4|tdpgZZSoU4P&9I8)nHo1@)9_9u&NcT^FI)6|hsAZFk
zZ+arl&@*>RXBf-OZxhZerOr&dN5LW9@gV=oGFbK*J+m#R-|e6(Loz(;g@T^*oO)0R
zN`N=X46b{7yk5FZGr#5&n1!-@j@g02g|X>MOpF3#IjZ_4wg{dX+G9eqS+Es9@6nC7
zD9$NuVJI}6ZlwtUm5cCAiYv0(Yi{%eH+}t)!E^>^KxB5^L~a`4%1~5q6h>d;paC9c
zTj0wTCKrhWf+F#5>EgX<cLYfrtsHC5;@&1Tu=KIwHE|R;*1f&W24i_&2yx+Xe5N7V
z`hmH?m*G_>`sl%POl?oyCq0(w0xoL?L%)|Q7d|Hl92rUYAU#lc**I&^6p=4lNQPa0
znQ|A~i0ip@`B=FW-Q;zh?-wF;Wl5!+q3GXDu-x&}$gUO)NoO7^$BeEIrd~1Dh{Tr`
z8s<(Bn@gZ(mkIGnmYh_ehXnq78QL$pNDi)|QcT*|GtS%nz1uKE+E{7jdEBp%h0}%r
zD2|KmYGiPa4;md-t_m5YDz#c*oV_FqXd85d@eub?9N61QuYcb3CnVWpM(D-^|CmkL
z(F}L&N7qhL2PCq)fRh}XO@U`Yn<<Z#)X^Ij=#WjXr&snbL8Hbkya6{c!+Ay;w1Jlr
z9}X^@zhtUU>?TNGR4L(mF7#4u29{i~@k;pLsgl({YW5`Mo+p=zZn3L*4{JU;++dG9
X@eDJUQo;Ye2mwlRs<JiGX2Jghdw)}T

literal 0
HcmV?d00001

diff --git a/rust/ballista/ui/scheduler/public/logo512.png b/rust/ballista/ui/scheduler/public/logo512.png
new file mode 100644
index 0000000000000000000000000000000000000000..a4e47a6545bc15971f8f63fba70e4013df88a664
GIT binary patch
literal 9664
zcmYj%RZtvEu=T>?y0|+_a0zY+Zo%Dkae}+MySoIppb75o?vUW_?)>@g{U2`ERQIXV
zeY$JrWnMZ$QC<=ii4X|@0H8`si75jB(ElJb00H<f^p#K#{|oMlvZ~_$qS5Nh{~rCn
zA4Y5cVZ*go<F$|f$hFu1n6>AB%>SlLR{!zO|C9P3zxw_U8?1d8uRZ=({Ga4shyN}3
zAK}WA(ds|``G4jA)9}Bt2Hy0+f3rV1E6b|@?hpGA=PI&r8)ah|)I2s(P5Ic*Ndhn^
z*T&j@gbCTv7+8rpYbR^Ty}1AY)YH;p!m948r#%7x^Z@_-w{pDl|1S4`EM3n_PaXvK
z1JF)E3qy$qTj5Xs{jU9k=y%SQ0>8E$;x?p9ayU0bZZeo{5Z@&FKX>}s!0+^>C^D#z
z>xsCPvxD3Z=dP}TTOSJhNTPyVt14VCQ9MQFN`rn!c&_p?&4<5_PGm4a;WS&1(!qKE
z_H$;dDdiPQ!F_gsN`2>`X}$I=B;={R8%L~`>RyKcS$72ai$!2>d(YkciA^J0@X%G4
z4cu!%Ps~2JuJ8ex`&;Fa0NQOq_nDZ&X;^A=oc1&f#3P1(!5il>6?uK4QpEG8z0Rhu
zvBJ+A9RV?z%v?!$=(vcH?*;vRs*+PPbOQ3cdPr5=tOc<a-ro?Zc5la+tVgj!hwG^F
z4*)z+Dj6T#D>Lqmfx@#hOqX0iN)wTTO21jH<>jpmwRIAGw7`a|sl?9y9zRBh>(_%|
zF?h|P7}~RKj?HR+q|4U`CjRmV-$mLW>MScKnNXiv{vD3&2@*u)-6P@h0A`eeZ7}71
zK(w%@R<4lLt`O7fs1E)$5iGb~fPfJ?WxhY7c3Q>T-w#wT&zW522pH-B%r5v#5y^CF
zcC30Se|`D2mY$hAlIULL%-PNXgbbpRHgn<&X3N9W!@BUk@9g*P5mz-YnZBb*-$zMM
z7Qq}ic0mR8n{^L|=+diODdV}Q!gwr?y+2m=3HWwMq4z)DqYVg0J~^}-%7rMR@S1;9
z7GFj6K}i32X;3*$SmzB&HW{PJ55kT+EI#SsZf}<HMwvFaF@TTvjK|r2I5vs2LpffL
z{Bv!nm|BcMhd{9tj}v>bD7nW^Haf}_gXciYKX{QBxIPSx2<c3y_W_ueW=lkplo6_C
z4pVF;!S-6Ziu|Mq`r%r``(lz68Cu3J#n^oDot`%+UFGP6#%tPM4xaP$n-~x$9>Ma?
zHQqgzZq!_{&zg{yxqv3xq8YV+`S}F6A>Gtl39_m;K4dA{pP$BW0oIXJ>jEQ!2V3A2
zdpoTxG&V=(?^q?ZTj2ZUpDUdMb)T?E$}CI>r@}PFPWD9@*%V6;4Ag>D#h>!s)=$0R
zRXvdkZ%|c}ubej`jl?cS$onl9Tw52rBKT)kgyw~Xy%z62Lr%V6Y=f?2)J|bZJ5(Wx
zmji`O;_B+*X@qe-#~`HFP<{8$w@z4@&`q^Q-Zk8JG3>WalhnW1cvnoVw>*R@c&|o8
zZ%w!{Z+M<tG%{r@|BA#vF#4bf!f++tPT5ym8X91BldH}+AI}Y|vX0!&r;lt@eS^lN
zvg`OBp>HeZ*OE4v<xX`%2$O4;S;&Cbv04cU5}9n7>*otkZqz11*s!#s^Gq>+o`8Z5
z^i-qzJLJh9!W-<EsXOxneQlPdVDePK)>;SmFkR<yAIkG=KFv={m{2U06G>8HEZ<d@
zt-Mk%C6JOyyG;Tv=hp@FaMRsh9p2N;-8nqS(z2KtL@(7nZSC(RXHEa2p`gB`jgK!f
zO!Zy))*;8CLtHznXwkD}e&!X(!hBWIP31$_mJ0Qb0%nbgBTMCL4HMpFsK&}NkusiS
z)A#t)!I!l!vB<6_T!LTOk!S`bCf_JCqRZ0G)JH4uX@iT41bzV2n&>JWiXk$40i6)7
zZpr=k2lp}SasbM*Nbn3j$sn0;rUI;%EDbi7T1ZI4qL6PNNM2Y%6{LMIKW+FY_yF3)
zSKQ2<Ya(Kkoy=zdC9*YK)(E7vJkX5gaF83}z?|lmq+>QSujzNMSL2r&bYs`|i2Dnn
z=>}c0>a}>|uT!IiMOA~pVT~R@bGlm}Edf}Kq0?*Af6#mW9f9!}RjW7om0c9Qlp;yK
z)=XQs(|<cGut0+-L3r!cqm1tE6>6GCadQbWIhYF=rf{Y)sj%^Id-ARO0=O^Ad;Ph+
z0?$eE1xhH?{T$QI>0JP75`r)U_$#%K1^BQ8z#uciKf(C701&RyLQWBUp*Q7eyn76}
z6JHpC9}R$J#(R0cDCkXoFSp;j6{x{b&0yE@P7{;pCEpKjS(+1RQy38`=&Yxo%F=3y
zCPeefABp34U-s?WmU#JJw2<Hy#VJPjU_z!blTTddQRvmJ;M1^SwGhk9F3L!VYgE2}
z!hN4|O@-;WQ~A8Ac|siS)QeHnw6sA2IkoVrt&@Qs%P6~@n5!6r8e%GfaPU^w9TIM(
z+qX(?1}UGxDSvKVX1LW8iFMjeq>3dcC{sPPFc2#J$ZgEN%zod}J~8dLm*fx9f6SpO
zn^Ww3bt9-r0XaT2a@Wpw;C23XM}7_14#%QpubrIw5aZtP+CqIFmsG4`Cm6rfxl9n5
z7=r2C-+lM2AB9X0T_`?EW&Byv<FnI6caTN5D)MUOu9(rjGJ}|99fVRv!X=m8I|ntE
zJ6XpQP1)X(+6SBV*7)9sgp(5zk-^p1E@|<-2^-l-ZW#Kj|IJ&(K=R75?+0Sn{(BV|
z)<!{Xjk+B_tZ!}_{^w<QMOVpX(FpR#8=7_$7TdAfPyiOWZvo8WTqZv}@;S*lPA$Rs
zn+2BOVa?j7wIw`|@yC+YqijL$-?j$YqnBw9uWnNX<bc*#<Sqv}z=}R0au2Xj__+Xc
z|5Zi<%3X($k`eB4OfoyCoJfrfsnP_(kI)~k#Slp5==?)J^f|>&K?HS4QLoylJ|OAF
z`8atBNTzJ&AQ<Z&$gy`^x^JOg-uapGljHB_jawUn+lOR$Lal;{U)TVO@l6XlAhXvf
z&}RhuqQ7a6<jLsJ0)_9Tl`lObK+u8*wmYdM+gnW=+v~Cg={2^r6A-TFvKP$LTFKFk
zC%VN!ZkZ6V>!>sOo$?^0xj~D(;kS$`9zbEGd>f6r`NC3X`tX)sWgWUUOQ7w=$TO<q
zW~{Euy_99}%58ATz~`-F(jnUkM{m~L{o=;3Hl9hX$s(cq;5cRA92lsb@Jg~cz*VaL
zt36Y*Oe?E>&*j;=u%25ay-%>3@81tGe^_z*C7pb9y*Ed^H3t$BIKH2o+olp#$q;)_
zfpjCb_^VFg5fU~K)nf*d*r@BCC>UZ!0&b?AGk_jTPXaSnCuW110wjHPPe^9R^;jo3
zwvzTl)C`Zl5}O2}3lec=hZ*$JnkW#7enKKc)(pM${_$9Hc=Sr_A9Biwe*Y=T?~1CK
z6eZ9uPICjy-sMGbZl$yQmpB&`ouS8v{58__t0$JP%i3R&%QR<t`@HqaIe3AGzxCPH
z06(XDO&~Ok$=UP%vG;P&hu?hEJ29wAaM6E!HZ0R;x8r*qHy+!hZxDYg-KGZI`{P_}
zY{dHlfnW6S)?CPAP)zp_!xelMRGuAo@t@!gSdowYtvHr8K9WNNw}a|TzE-87F!WRs
z-#;HoNH5O`b&7Kri+=ag7)^^;3^1?o2Q2qw@}+ZE%fAQU-nq{%`+R|B7FhGK+M!Fl
z2ZyeAFYON2o9at)@lQt2WoWTyBs<V9RDa+*;620gC9bv{?izYvGuFv(YU1!YDK{kN
zfuajP^aW|>3ianbZqDs<2#5FdN@n5bCn^ZtH992~5k(eA|8|@G9u`wdn7bnpg|@{m
z^d6Y`*$Zf2Xr&|g%sai#5}Syvv(>Jnx&EM7-|Jr7!M~zdAyjt*xl;OLhvW-a%H1m0
z*x5*nb=R5u><7lyVpN<INnH%~Yw@M#U6Pu*P(p=#E`62!G$HpM^Fj^SgYNx!W^2fr
zkI!m)izx6Dlg78SlE~FIDdEd}c|raeMkO<=|63PClZI~^epYjlJD}Z`<%|7DCiNUv
zG)@)s+cUFWM~QdlNaB)J5z`+Rh!K6;Qjn|xbp*GZE8Oc@gJVh~Yk^QNmM<N`7=nyt
z^&xA|=4HLov%ZKEejPsm{k;ktCe=zCR9B1@0wmg_efnHnX;*=is!NwZ>AR?q@1U59
zO+)QW<j~4qKP_fJbKV#dkbk5|s_=T+xd;<8uKpNiftfsnY^b*vkT2H1%VS`S<#uK|
zjNMI3R($QKsX+O9r(;Z277$LfqVgbuD{2wsZBsx#6p~V;+BiVs555-sk`S_(uZ4+h
z)<$QI#xEv`Eka6DmEWW&rUOf*Vo9$F6`G&Jq7J`r0+jS%Qxqc#v^D*NyEI1gB}|q!
z)+rEYS;WOK<Wz?e_Z2Q0;QX0^^7`!HvIf7)1y?Hoj9S$VrgX{Ye9I!Bx85oCC)?4z
zjdu{7tR8-C2~=B$IqnW+8OcPpDJW2wE_8+TYdyClF#Az`1L!6t9*pZdLVY;p<yBtF
zOm~+y=m;=-2Tc+I$K4se0R$L&IWm@H&UYad(l8Y*q?01q-iww`%aiBbF149`>wL8t
zyip?u_nI+K$uh{<eXaA|n3IG+8OrGZ)9HGA&^RJ{Jd9>y)~}qj?(w0&=SE^8`_WMM
zTybjG=999h38Yes7}-4*LJ7H)UE8{mE(6;8voE+TYY%33A>S6`G_95^5QHNTo_;Ao
ztIQIZ_}49%{8|=O;isBZ?=7kfdF8_@azfoTd+hEJKWE!)$)N%HIe2cplaK`ry#=pV
z0q{9w-`i0h@!R8K3GC{ivt{70IWG`EP<iX3`qZ%H^f(R!@OED}+3u4g7{Xr9UwpnK
zTOD@;FUScIf-f4;fF&{6twOyC0W6O!P4PKEm%fJY7_abkr=vB+O94OwvhK{ZP6_!?
z<iuvlT@!faRAoB1`yY6GRfnc*q1!>|(1g7i_Q<>aEAT{5(<ns<#%dS?L`x`En%)Ut
z{nCo<KWFUh<S<CDmdO|;fv7JLuUS7^E}0ijJVb)Q<0jWOI=_FiCK24AD%G{4e$NQd
zWv*R@_2{PvzvNMu@Y3QBNJJKAzFJ33r_h+}NP7l{uwC<5(0xcl0^=Em4$LS-ZF-5D
zMD(oR`sZ*UYIe*BY*c~7#G1SLTv3VfBTd_C@@TBwsuESuxm7Y0Uf&u{$l-}_?d>yD
z=!O?kq61VegV+st@XCw475j6vS)_z@efuqQgHQR1T4;|-#OLZNQJPV4k$AX1Uk8Lm
z{N*b*ia=I+MB}kWpupJ~>!C@xEN#Wa7V+7{m4j8c?)ChV=D?o~sjT?0C_AQ<J}v#S
zq&&10i;k!wZ0^l<H$PM2AS4v2B7le67PsGi3{5cEJvQTXYQd9$TA$ATXW$sERJFH|
zUFQmh;BXn<X&*(eK7*8b7K+8>7B-vxqX30s0I_`2$in86#`mAsT-w?j{&AL@B3$;P
z31G4(lV|b}uSD<Q-$cmmD#5!{N;ON{%=s}<yxrxZp;&F{OtN|&Osm7~f0ORXV+M%%
zhys!Gh~U9xxTSrb2pKtcmi71qF!D2BtUcc1(uP<LQ-4B<(+;>CIrjk+M1R!X7s<hT
z2KXhB-@~*Z#DnL&I)I4&$X=6)^|><DE!Cgw9m@wB3B0oPTj6$<u_@p0qZd2rpQY_#
zEFr4$jqoGqJSybV){Dvrnb_tOoKmSO#70t@P~q_L%<9+Qb(JW|nv0-SWLrjEuZTVs
z44b8p8-&PiM|E?GM`){f%M?C9*dLm28~DlBW?*4ua4H+nWN_%3iNC_(B+k``Oazc8
z83kgJUNcy2CKRR@Pn1$!R|+BC1lz16vh1Y$6BfKm&WMiaUzg^B!!Zp$xNrq{)ln-H
zcg5u<qf>4Aabn<)zpgT}#gE|mIvV38^ODy@<&yflpCwS#fRf9ZX3lPV_?8@C5)A;T
zqmouFLFk;qIs4rA=hh=GL~sCFsXHsqO6_y~*AFt93<ymU#4-U}YQ)Pa*UpuA%os{2
z&>9UYVBSx1s(=Kb&5;j7cSowdE;7()CC2|-i9Zz+_BIw8#ll~-tyH?F3{%`QCsY<I
zU5z8T?uMPvp*VYrm~~t-K+6Pgjku>a*b#s*9iCc`1P1oC26?`g<9))EJ3%xz+O!B3
zZ7$j~To)C@PquR>a1+Dh>-a%IvH_Y7^ys|4o?E%3`I&ADXfC8++hAdZfzIT#%C+Jz
z1lU~K_vAm0m8Qk}K$F>|<CsjNZ*?_o$*ZsW3W*ZecdNs4Im>>RPK%<1SI0(G+8q~H
zAsjezyP+u!Se4q3GW)`h`NPSRlMoBjCzNPesWJwVTY!o@G8=(6I%4XHGaSiS3MEBK
zhgGFv6Jc>L$4jVE!I?TQuwvz_%CyO!bLh94nqK11C2W$*aa2ueGopG8DnBICVUORP
zgytv#)49fVXDaR$SukloYC3u7#5H)}1K21=?DKj^U)8G;MS)&Op)g^zR2($<>C*zW
z;X7`hLxiIO#J`ANdyAOJle4V%ppa*(+0i3w;8i*BA_;u8gOO6)MY`ueq7stBMJTB;
z-a0R>hT<!E*EnpUxAxCvwvo$2Z}nSc&KEBz0q7{Fm>*}>z|Gg}@^zDL1MrH+2hsR8
zHc}*9IvuQC^Ju)^#Y{fOr(96rQNPNhxc;mH@W*m206>Lo<*SaaH?~8zg&f&%YiOEG
zGiz?*CP>Bci}!WiS=zj#K5I}>DtpregpP_tfZtPa(N<%vo^#WCQ5BTv0vr%Z{)0q+
z)RbfHktUm|lg&U3YM%lMUM(f<ok0JPn&g&>u}i#kjX9h>GYctkx9Mt_8{@s%!K_EI
zScgwy6%_fR?CG<BS|7E|e1Uiu+4N|3CP*{mA6E>JQtmgNAj^h9B#zma<L`GR52{?r
zw=yYEhBrx2I7mEv4WBN$tAM7|KP9m=OTPk^73y)|tA#lJ(mG>MDWgH55pGuY1Gv7D
z;8Psm(vEPiwn#MgJYu4Ty9D|h!?Rj0ddE|&L3S{IP%H4^N!m`60ZwZw^;eg4sk6K{
ziA^`Sbl_4~f&Oo%n;8Ye(tiAdlZKI!Z=|j$5hS|D$bDJ}p{gh$KN&JZYLUjv4h{NY
zBJ>X9z<S-$t-=L{3#MCguo5ug^BN(csELHS6D1V)g#mO1+{f#R(F2A;Jtz>!xfDGY
z+oh_Z&_e#Q(-}>ssZfm=j$D&4W4FNy&-kAO1~#3Im;F)Nwe{(*75(p=P^VI?X<FsK
z+mujv723Y8RTh-aX#a)Qm;PXW^W`h>0GFakfh+X-px4a%Uw@fSbmp9hM1_~R>?Z8+
ziy|e9>8V*`OP}4x5JjdWp}7eX;lVxp5qS}<UzbgS%F%qxg|}u`F%N~wbUq7r3Tq2N
z`L+(4<Yw>0YZek;SNmm7tEeSF*-dI)6U-A%m6YvCgM(}_=k#a6o^%-K4{`B1+}O4x
zztDT%hVb;v#?j`lTvlFQ3aV#zkX=7<v0Xt+SO4-V7;S>;YFLS$uIzb0E3lozs5`Xy
zi~vF+%{z9uLjKvKPhP%x5f<NLNK1Zu_hJxLjLK{w;{*>~7-Gj+%5N`%^=yk*Qn{`>
z;xj&ROY6g`iy2a@{O)V(jk&8#hHACVDXey5a+KDod_Z&}kHM}xt7}Md@pil{2x7E~
zL$k^d2@Ec2XskjrN+IILw;#7((abu;OJii&v3?60x>d_Ma(onIPtcVnX@ELF0aL?T
zSmWiL3(dOFkt!x=1O!_0n(cAzZW+3nHJ{2S>tgSK?~cF<W~g{Uk=X^%saR^iO2-=d
zF*rKVVAPU1W>ha^y(l@-Mr2W$%MN{#af8J;V*>hdq!gx=d0h$T7l}>91Wh07)9CTX
zh2_ZdQCyFOQ)l(}gft0UZ<Qo&@`u@GIyo^7BB;_Jrh>G`Sh2`x-w`5vC2UD}lZs*5
zG76$akzn}Xi))L3oGJ75#pcN=cX3!=57$Ha=hQ2^lwdyU#a}4JJOz6ddR%zae%#4&
za)bFj)z=YQela(F#Y|Q#dp}PJghITwXouVaMq$BM?K%cXn9^Y@g43$=O)F&ZlOUom
zJiad#dea;-eywBA@e&D6Pdso1?2^(pXiN91?jvcaUyYoKUmvl5G9e$W!okWe*@a<^
z8cQQ6cNSf+UPDx%?_G4a<m)UKh(R<crXCvksf8T4MGW_VPMHrJGOqh#<rdAK%kV`|
zqLv2C)0Oba2mQ50>IiybZHHagF{<S-4D+!Tsu-gt1o$)JW!(&V?v-lI1Lv(lQE6R!
zWjXrkjWX-&v!bw*7_u$ws?*dOF^}ann%C)lp)v!U?&S&S%`~VL={@<rBH$gl7F=4D
zs%B$Bo06T#CB)!Sf;LI9_<<tT&#Jv^`mC8{I3pWeU7jyQ0gh;9%B>;IcD(dPO!#=u
zWfqLcPc^+7Uu#l(B<Qg-R1c!j-uotKRCgB)MF*8IZpiA>pxft{*4lv#*u7X9AOzDO
z1D9?^jIo}?%iz(_dwLa{ex#T}76ZfN_Z-hwpus9y+4xaUu9cX}&P{XrZVWE{1^0yw
zO;YhLEW!pJcbCt3L8~a7>jsaN{V3>tz6_7`&pi%GxZ=V3?3K^<rn`e8a7?eZI-TG+
z{hR_I;2c?$BM1)pjP2l@7#6U3^o=*9Hsp__;N;$8F&5@Ghp#>U+*ryLSb)8^IblJ0
zSRLNDvIxt)S}g30?s_3NX>F?NKIGrG_zB9@Z>uSW3k2es_H2kU;Rnn%j5qP)!XHKE
zPB2mHP~tLCg4K_vH$xv`HbRsJwbZMUV(t=ez;Ec(vyHH)FbfLg`c61I$W_uBB>i^r
z&{_P;369-&>23R%qNIULe=1~T$(DA`ev*EWZ6j(B$(te}x1WvmIll21zvygkS%vwG
zzkR6Z#RKA2!z!C%M!O>!=Gr0(J0FP=-MN=5t-Ir)of50y10W}j`GtRCsXBakrKtG&
zazmITDJMA0C51&BnLY)SY9r)NVTMs);1<=oosS9g31l{4ztjD3#+2H7u_|66b|_*O
z;Qk6nalpqdHOjx|K&vUS_6ITgGll;TdaN*ta=M_YtyC)I9Tmr~VaPrH2q<HCA^;;b
zni;6_t9t~p5;T0mX`UW-c?4TAiadb)6}vsp``(hz(}(&x4ab<TyrI|$niD$NiTl-b
zJt9ixO#S|?KYH3Eadm4D8|NzLhAY993hoQanUS>b6sd~=AcIxV+%z{E&0@y=DPArw
zdV7z(G1hBx7hd{>(cr43^WF%4Y@PXZ?wPpj{OQ#tvc$pABJbvPGvdR`cAtHn)cSEV
zrpu}1tJwQ3y!mSmH*uz*x0o|CS<^w%&KJzsj~DU0cLQUxk5B!hWE>aBkjJle8z~;s
z-!A=($+}Jq_BTK5^B!`R>!MulZN)F=iXXeUd0w5lUsE5VP*H*oCy(<w;IZ?{Pso`R
z;9tSfBWDPpv(ru@ok6#>;?S$p*TVvTxwAeWFB$jHyb0593)$zqalVlDX=GcCN1gU0
zlgU)I$LcXZ8Oyc2TZYTPu@-;7<4YYB-``Qa;IDcvydIA$%kHhJKV^m*-<Eu89DD6r
z$hXxW3}1&`pz`)lE8f*kAC}P(6)qA>zxcvU4viy<a-^x1uJC*fAd9KCgjrYHBR=y`
zw#X)*QjS-7i>&Kr5GVM{IT>WRywKQ9;>SEiQD*NqplK-KK4YR`p0@JW)n_{TU3bt0
zim%;(m1=#v2}zTps=?fU5w^(*y)xT%1vtQH&}50ZF!9YxW=&7*W($2kgKyz1mUgfs
zfV<*XVVIFnohW=|j+@Kfo!#liQR^x>2yQdrG;2o8WZR+XzU_nG=Ed2rK?ntA;K5B{
z>M8+*A4<Ta>!Jm^Bg}aW?R?6;@QG@uQ8&oJ{hFixcfEnJ4QH?A4>P=q29oDGW;L;=
z9-a0;g%c`C+Ai!UmK$NC*4#;Jp<1=TioL=t^YM)<<%u#hnnfSS`nq63QKGO1L8RzX
z@MFDq<H`&N7x6|cHF$jHtc;8QSd3*XDI;%h;Be47aqDn+ovE51)i6?}0L%GiJ>s1z
ztYmxDl@LU)5acvHk)~Z`RW7=aJ_nGD!mOSYD>5Odjn@TK#LY{jf?+piB5AM-CAoT_
z?S-*q7}wyLJzK>N%eMPuFgN)Q_otKP;aqy=D5f!<Uxm0kJ!&((NN1Cc$Lf2D8xbv(
z*WfnV!Kme-C7`<}Hk^(!-La76WI@dSiD?t@Imfnp1{N8W$}|)~%wx6MKY2OYwhJDH
z)z%|ULU9X+--|?(ocK})YRZKw<7x0>7<=n(lNkYRXVpkB{TAYLYg{|(jtRqYmg$xH
zjmq<Cf4$wzOeRC1g`5bkE7g|z=wldi@dYy#eUIYfkuubZe|$MvzfnD`b2{>?B(RE4
zQx^~Pt}gxC2~l=K$$-sYy_r$CO(d=+b3H1MB*y_5g6WLaWTXn+TKQ|hNY^>Mp6k*$
zwkovomhu776vQATqT4blf~g;TY(MWCrf^^yfWJvSAB$p5l;jm@o#=!lqw+Lqfq>X=
z$6~kxfm7`3q4zUEB;u4qa#BdJxO!;xGm)wwuisj{0y2x{R(IGMrsIzDY9LW>m!Y`=
z04sx3IjnYvL<4JqxQ8f7qYd0s2Ig%`ytYPEMKI)s(LD}D@EY>x`VFtqvnADNBdeao
zC96X+MxnwKmjpg{U&gP3HE}1=s!lv&D{6(g_lzyF3A`7Jn*&d_kL<;dAFx!UZ>hB8
z5A*%LsAn;VLp>3${0>M?PSQ)9s3}|h2e?TG4_F{}{Cs>#3Q*t$(CUc}M)I}8cPF6%
z=+h(Kh^8)}gj(0}#e7O^FQ6`~fd1#8#!}LMuo3A0bN`o}PYsm!Y}sdOz$+Tegc=qT
z8x`PH$7lvnhJp{kHWb22l;@7B7|4yL4UOOVM0MP_>P%S1Lnid)+k9{+3D+JFa#Pyf
zhVc#&df87APl4W9X)F3pGS>@etfl=_E5tBcVoOfrD4hmVeTY-cj((pkn%n@EgN{0f
zwb_^Rk0I#i<UGQdc-Nmd=Rb)xhox&LXCiL2JOtMf1nJ{Y*CC^NXhbH@kK=kc_`LQd
zpKZRrfMT*+Mhk36qPN<LRtNnRgTK6F!~*AtcX%l1)YCyR^Cg*|aI@K7&6brfZD+JV
zGcqOky{~wE&Wx}Ojr2$00rvimv@fJs@iLuizXDa>ZuHK!l*lN`ceJn(sI{$Fq6nN&
zE<-=0_2WN}m+*ivmIOxB@#~Q-cZ>l136w{#TIJe478`KE7@=a{>SzPHsKLzYAyBQO
zAtuuF$-JSDy_S@6GW0MOE~R)b;+0f%_NMrW(+V#c_d&U8Z9+ec4=HmOHw?gdjF(Lu
zzra<iFcvmxzT>83M_BoO-1b3;9`%&DHfuUY)6YDV21P$C!Rc?mv&{lx#f8oc6?0?x
zK08{WP65?#>(vPfA-c=MCY|<S!ZyNl<um89EGH-nZopot<9vhnMSrJUdliV1$R@h(
zReDzy8)E@8VrU(MTz_4ai}TcxM)B2^Im7X9WBhxiIczSob@_Q~*btJ>%*1_<3D4NX
zeVTi-JGl2uP_2@0F{G({pxQOXt_d{g_CV6b?jNpfUG9;8yle-^4KHRvZs-_2siata
zt+d_T@U$&t*xaD22(fH(W1r$Mo?3dc%Tncm=C6{V9y{v&VT#^1L04vDrLM9qBoZ4@
z6DBN#m57hX7$C(=#$Y5$bJmwA$T8jKD8+6A!-IJwA{WOfs%s}yxUw^?MRZjF$n_KN
z6`_bGXcmE#5e4Ym)aQJ)xg3Pg0@k`iGuHe?f(5LtuzSq=nS^5z>vqU0EuZ&75V%Z{
zYyhRLN^)$c6Ds{f7*FBpE;n5iglx5PkHfWrj3<K%`xq+5RKqKFc8rLQ*ZRbbx$E1#
z3f|;4cOJ3Ebo^39!B`+!g&)irRekwjXNvz=dRTz5`G+KYEbcaaK8WXc9Bd>`x^j^t
z7ntuV`g!9Xg#^3!x)l*}IW=(Tz3>Y5l4uGaB&lz{GDjm2D5S$CExLT`I1#n^lBH7Y
zDgpMag@`iETKAI=p<5E#LTkw<F5K4Wbo)QRuzF*eH_@ivMrE0Wp~Gnj6dqxd?q0<i
zCg50hY}if?yn)!*`4%$BA^3^>zVR@=yY|uBVI1HG|8h+d;G-qfuj}-ZR6fN>EfCCW
z9~wRQoAPEa#aO?3h?x{YvV*d+NtPkf&4V0k4|L=uj!U{L+oLa(z#&iuhJr3-PjO3R
z5s?=nn_5^*^Rawr>>Nr@K(jwkB#JK-=+HqwfdO<+P5byeim)wvqGlP-P|~Nse8=XF
zz`?RYB|D6SwS}C<!9XcXRWqW$6w&z(j$m~}aKHcZK~n4i+541c<|vO(dRs@`mO_la
zV#-mf$jU#l&0!zW|IK42VgGl#Cw`Pp0u0|_KdVe9>+YQv+;}k6$-%D(@+t14BL@vM
z2q%q?f6D-A5s$_WY3{^G0F131bbh|g!}#BKw=HQ7mx;Dzg4Z*bTLQSfo{ed{4}NZW
zfrRm^Ca$rlE{Ue~uYv>R9{3s<lJFO-AA<uH1E0Ejy3!9=Y^Pj|>mwATcdM_6+yWIO
z*ZRH~uXE@#p$XTbCt5j7j2=86e{9>HIB6xDzV+vAo&B?KUiMP|ttOElepnl%|DPqL
b{|{}U^kRn2wo}j7|0ATu<;8xA7zX}7|B6mN

literal 0
HcmV?d00001

diff --git a/rust/ballista/ui/scheduler/public/manifest.json b/rust/ballista/ui/scheduler/public/manifest.json
new file mode 100644
index 00000000000..080d6c77ac2
--- /dev/null
+++ b/rust/ballista/ui/scheduler/public/manifest.json
@@ -0,0 +1,25 @@
+{
+  "short_name": "React App",
+  "name": "Create React App Sample",
+  "icons": [
+    {
+      "src": "favicon.ico",
+      "sizes": "64x64 32x32 24x24 16x16",
+      "type": "image/x-icon"
+    },
+    {
+      "src": "logo192.png",
+      "type": "image/png",
+      "sizes": "192x192"
+    },
+    {
+      "src": "logo512.png",
+      "type": "image/png",
+      "sizes": "512x512"
+    }
+  ],
+  "start_url": ".",
+  "display": "standalone",
+  "theme_color": "#000000",
+  "background_color": "#ffffff"
+}
diff --git a/rust/ballista/ui/scheduler/public/robots.txt b/rust/ballista/ui/scheduler/public/robots.txt
new file mode 100644
index 00000000000..dc045698d09
--- /dev/null
+++ b/rust/ballista/ui/scheduler/public/robots.txt
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# https://www.robotstxt.org/robotstxt.html
+User-agent: *
+Disallow:
diff --git a/rust/ballista/ui/scheduler/react-table-config.d.ts b/rust/ballista/ui/scheduler/react-table-config.d.ts
new file mode 100644
index 00000000000..4bdce7667ec
--- /dev/null
+++ b/rust/ballista/ui/scheduler/react-table-config.d.ts
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+    UseColumnOrderInstanceProps,
+    UseColumnOrderState,
+    UseExpandedHooks,
+    UseExpandedInstanceProps,
+    UseExpandedOptions,
+    UseExpandedRowProps,
+    UseExpandedState,
+    UseFiltersColumnOptions,
+    UseFiltersColumnProps,
+    UseFiltersInstanceProps,
+    UseFiltersOptions,
+    UseFiltersState,
+    UseGlobalFiltersColumnOptions,
+    UseGlobalFiltersInstanceProps,
+    UseGlobalFiltersOptions,
+    UseGlobalFiltersState,
+    UseGroupByCellProps,
+    UseGroupByColumnOptions,
+    UseGroupByColumnProps,
+    UseGroupByHooks,
+    UseGroupByInstanceProps,
+    UseGroupByOptions,
+    UseGroupByRowProps,
+    UseGroupByState,
+    UsePaginationInstanceProps,
+    UsePaginationOptions,
+    UsePaginationState,
+    UseResizeColumnsColumnOptions,
+    UseResizeColumnsColumnProps,
+    UseResizeColumnsOptions,
+    UseResizeColumnsState,
+    UseRowSelectHooks,
+    UseRowSelectInstanceProps,
+    UseRowSelectOptions,
+    UseRowSelectRowProps,
+    UseRowSelectState,
+    UseRowStateCellProps,
+    UseRowStateInstanceProps,
+    UseRowStateOptions,
+    UseRowStateRowProps,
+    UseRowStateState,
+    UseSortByColumnOptions,
+    UseSortByColumnProps,
+    UseSortByHooks,
+    UseSortByInstanceProps,
+    UseSortByOptions,
+    UseSortByState
+} from 'react-table'
+
+declare module 'react-table' {
+    // take this file as-is, or comment out the sections that don't apply to your plugin configuration
+
+    export interface TableOptions<D extends Record<string, unknown>>
+        extends UseExpandedOptions<D>,
+            UseFiltersOptions<D>,
+            UseGlobalFiltersOptions<D>,
+            UseGroupByOptions<D>,
+            UsePaginationOptions<D>,
+            UseResizeColumnsOptions<D>,
+            UseRowSelectOptions<D>,
+            UseRowStateOptions<D>,
+            UseSortByOptions<D>,
+            // note that having Record here allows you to add anything to the options, this matches the spirit of the
+            // underlying js library, but might be cleaner if it's replaced by a more specific type that matches your
+            // feature set, this is a safe default.
+            Record<string, any> {}
+
+    export interface Hooks<D extends Record<string, unknown> = Record<string, unknown>>
+        extends UseExpandedHooks<D>,
+            UseGroupByHooks<D>,
+            UseRowSelectHooks<D>,
+            UseSortByHooks<D> {}
+
+    export interface TableInstance<D extends Record<string, unknown> = Record<string, unknown>>
+        extends UseColumnOrderInstanceProps<D>,
+            UseExpandedInstanceProps<D>,
+            UseFiltersInstanceProps<D>,
+            UseGlobalFiltersInstanceProps<D>,
+            UseGroupByInstanceProps<D>,
+            UsePaginationInstanceProps<D>,
+            UseRowSelectInstanceProps<D>,
+            UseRowStateInstanceProps<D>,
+            UseSortByInstanceProps<D> {}
+
+    export interface TableState<D extends Record<string, unknown> = Record<string, unknown>>
+        extends UseColumnOrderState<D>,
+            UseExpandedState<D>,
+            UseFiltersState<D>,
+            UseGlobalFiltersState<D>,
+            UseGroupByState<D>,
+            UsePaginationState<D>,
+            UseResizeColumnsState<D>,
+            UseRowSelectState<D>,
+            UseRowStateState<D>,
+            UseSortByState<D> {}
+
+    export interface ColumnInterface<D extends Record<string, unknown> = Record<string, unknown>>
+        extends UseFiltersColumnOptions<D>,
+            UseGlobalFiltersColumnOptions<D>,
+            UseGroupByColumnOptions<D>,
+            UseResizeColumnsColumnOptions<D>,
+            UseSortByColumnOptions<D> {}
+
+    export interface ColumnInstance<D extends Record<string, unknown> = Record<string, unknown>>
+        extends UseFiltersColumnProps<D>,
+            UseGroupByColumnProps<D>,
+            UseResizeColumnsColumnProps<D>,
+            UseSortByColumnProps<D> {}
+
+    export interface Cell<D extends Record<string, unknown> = Record<string, unknown>, V = any>
+        extends UseGroupByCellProps<D>,
+            UseRowStateCellProps<D> {}
+
+    export interface Row<D extends Record<string, unknown> = Record<string, unknown>>
+        extends UseExpandedRowProps<D>,
+            UseGroupByRowProps<D>,
+            UseRowSelectRowProps<D>,
+            UseRowStateRowProps<D> {}
+}
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/src/App.css b/rust/ballista/ui/scheduler/src/App.css
new file mode 100644
index 00000000000..bea95535e9e
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/App.css
@@ -0,0 +1,18 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
diff --git a/rust/ballista/ui/scheduler/src/App.test.tsx b/rust/ballista/ui/scheduler/src/App.test.tsx
new file mode 100644
index 00000000000..20dca216eb2
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/App.test.tsx
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import { render, screen } from "@testing-library/react";
+import App from "./App";
+
+test("renders learn react link", () => {
+  render(<App />);
+  const linkElement = screen.getByText(/learn react/i);
+  expect(linkElement).toBeInTheDocument();
+});
diff --git a/rust/ballista/ui/scheduler/src/App.tsx b/rust/ballista/ui/scheduler/src/App.tsx
new file mode 100644
index 00000000000..5864a27cdf5
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/App.tsx
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React, {useState, useEffect} from "react";
+import {Box, Grid, VStack} from "@chakra-ui/react";
+import {Header} from "./components/Header";
+import { Summary} from "./components/Summary";
+import {QueriesList, Query, QueryStatus} from "./components/QueriesList";
+import {Footer} from "./components/Footer";
+
+import "./App.css";
+
+function uuidv4() {
+  return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
+    var r = (Math.random() * 16) | 0,
+      v = c === "x" ? r : (r & 0x3) | 0x8;
+    return v.toString(16);
+  });
+}
+
+const getRandomQueries = (num: number): Query[] => {
+  const nodes: Query[] = [];
+
+  for (let i = 0; i < num; i++) {
+    nodes.push({
+      started: new Date().toISOString(),
+      query: "SELECT \n" +
+          "    employee.id,\n" +
+          "    employee.first_name,\n" +
+          "    employee.last_name,\n" +
+          "    SUM(DATEDIFF(\"SECOND\", call.start_time, call.end_time)) AS call_duration_sum\n" +
+          "FROM call\n" +
+          "INNER JOIN employee ON call.employee_id = employee.id\n" +
+          "GROUP BY\n" +
+          "    employee.id,\n" +
+          "    employee.first_name,\n" +
+          "    employee.last_name\n" +
+          "ORDER BY\n" +
+          "    employee.id ASC;",
+      status: QueryStatus.RUNNING,
+      progress: Math.round(Math.random() * 100),
+      uuid: uuidv4()
+    });
+  }
+  return nodes;
+};
+
+const queries = getRandomQueries(17);
+
+const App : React.FunctionComponent<any> = () => {
+
+  const [schedulerState, setSchedulerState] = useState(undefined)
+
+  function getSchedulerState() {
+    return fetch(`/state`, {
+      method: 'POST',
+      headers: {
+        'Accept': 'application/json'
+      }
+    })
+      .then(res => res.json())
+      .then(res => setSchedulerState(res));
+  }
+
+  useEffect(() => {
+    getSchedulerState();
+  }, []);
+
+  return (
+    <Box>
+      <Grid minH="100vh">
+        <VStack alignItems={"flex-start"} spacing={0} width={"100%"}>
+          <Header schedulerState={schedulerState} />
+          <Summary schedulerState={schedulerState} />
+          <QueriesList queries={queries} />
+          <Footer />
+        </VStack>
+      </Grid>
+    </Box>
+  );
+}
+
+export default App;
diff --git a/rust/ballista/ui/scheduler/src/components/DataTable.tsx b/rust/ballista/ui/scheduler/src/components/DataTable.tsx
new file mode 100644
index 00000000000..38176d3e34f
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/DataTable.tsx
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import {Link, Table, Thead, Flex, Tbody, Text, Tr, Th, Td, VStack, chakra} from "@chakra-ui/react";
+import {TriangleDownIcon, TriangleUpIcon} from "@chakra-ui/icons";
+import {useTable, useSortBy, usePagination, Column as RTColumn} from "react-table";
+import {HiChevronLeft, HiChevronRight} from "react-icons/all";
+import TimeAgo from "react-timeago";
+
+type RenderFn = (props: any) => React.ReactNode;
+
+interface Row {
+    [name: string]: any;
+}
+
+// eslint-disable-next-line
+export type Column<Row> = RTColumn | {
+    isNumeric?: boolean;
+    render?: RenderFn;
+};
+
+interface DataTableProps {
+    columns: Column<Row>[];
+    data: Row[];
+    pageSize?: number;
+    maxW?: number;
+    pb?: number;
+}
+export const DateCell : (props: any) => React.ReactNode = (props: any) => {
+    return <TimeAgo minPeriod={60} date={props.value}
+                    formatter={(value: number, unit: TimeAgo.Unit, suffix: TimeAgo.Suffix) => {
+                        if (unit === 'second') return 'just now';
+                        const plural: string = value !== 1 ? 's' : '';
+                        return `${value} ${unit}${plural} ${suffix}`;
+                    }}
+    />
+}
+
+export const LinkCell : (props: any) => React.ReactNode = (props: any) => {
+    return (
+        <Link href={props.href} isExternal>
+            {props.value}
+        </Link>
+    )
+}
+
+export const DataTable: React.FunctionComponent<DataTableProps> = ({data, columns, pageSize = 10, maxW, pb}) => {
+        const {
+            getTableProps,
+            getTableBodyProps,
+            headerGroups,
+            rows,
+            prepareRow,
+            pageOptions,
+            canNextPage,
+            nextPage,
+            canPreviousPage,
+            previousPage,
+            state: {pageIndex},
+        } = useTable({columns: columns as any, data, initialState: {pageIndex: 0, pageSize},}, useSortBy, usePagination);
+
+        const last = data.length;
+        const start = (pageIndex * pageSize) + 1;
+        const end = Math.min((pageIndex + 1) * pageSize, last);
+
+        return (
+            <VStack maxW={maxW} pb={pb}>
+                <Table {...getTableProps()} size={"sm"}>
+                    <Thead>
+                        {headerGroups.map((headerGroup) => (
+                            <Tr {...headerGroup.getHeaderGroupProps()}>
+                                {headerGroup.headers.map((column: any) => (
+                                    <Th
+                                        {...column.getHeaderProps(column.getSortByToggleProps())}
+                                        isNumeric={column.isNumeric}
+                                    >
+                                        {column.render("Header")}
+                                        <chakra.span pl="4">
+                                            {column.isSorted ? (
+                                                column.isSortedDesc ? (
+                                                    <TriangleDownIcon aria-label="sorted descending"/>
+                                                ) : (
+                                                    <TriangleUpIcon aria-label="sorted ascending"/>
+                                                )
+                                            ) : null}
+                                        </chakra.span>
+                                    </Th>
+                                ))}
+                            </Tr>
+                        ))}
+                    </Thead>
+                    <Tbody {...getTableBodyProps()}>
+                        {rows.slice(start - 1, end).map((row: any) => {
+                            prepareRow(row);
+                            return (
+                                <Tr {...row.getRowProps()}>
+                                    {row.cells.map((cell: any) => (
+                                        <Td {...cell.getCellProps()} isNumeric={cell.column.isNumeric}>
+                                            {cell.render("Cell")}
+                                        </Td>
+                                    ))}
+                                </Tr>
+                            );
+                        })}
+                    </Tbody>
+                </Table>
+                {pageOptions.length > 1 ?
+                    (<Flex width={"100%"} pr={10} justifyContent={"flex-end"} pt={4}>
+                        <Text fontSize={"sm"} pr={2}>Showing {start} to {end} of {last}. </Text>
+                        <HiChevronLeft color={canPreviousPage ? 'black': 'dimgray'} onClick={previousPage}/>
+                        <HiChevronRight color={canNextPage ? 'black': 'dimgray'} onClick={nextPage}/>
+                    </Flex>) : null}
+            </VStack>
+        );
+    }
+;
diff --git a/rust/ballista/ui/scheduler/src/components/Empty.tsx b/rust/ballista/ui/scheduler/src/components/Empty.tsx
new file mode 100644
index 00000000000..b772e70e4e8
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/Empty.tsx
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import { Flex, Text } from "@chakra-ui/react";
+interface EmptyProps {
+  text: string;
+}
+
+export const Empty: React.FunctionComponent<EmptyProps> = ({ text }) => {
+  return (
+    <Flex
+      minH={100}
+      minW={200}
+      flex={1}
+      alignItems={"center"}
+      justifyContent={"center"}
+    >
+      <Text>{text}</Text>
+    </Flex>
+  );
+};
diff --git a/rust/ballista/ui/scheduler/src/components/Footer.tsx b/rust/ballista/ui/scheduler/src/components/Footer.tsx
new file mode 100644
index 00000000000..ab03898f44b
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/Footer.tsx
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import {Flex, Text} from "@chakra-ui/react";
+
+
+export const Footer: React.FunctionComponent = () => {
+    return (
+        <Flex borderTop={"1px solid #f1f1f1"} w={"100%"} p={4} justifyContent={"flex-end"}>
+            <Text fontSize="md" fontStyle={"italic"}>Licensed under the Apache License, Version 2.0.</Text>
+        </Flex>
+    )
+}
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/src/components/Header.tsx b/rust/ballista/ui/scheduler/src/components/Header.tsx
new file mode 100644
index 00000000000..c0ddd35c726
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/Header.tsx
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import {Box, Flex, Text, Button} from "@chakra-ui/react";
+import Logo from "./logo.svg";
+import {AiFillGithub, HiDocumentText} from "react-icons/all";
+import {SchedulerState} from "./Summary";
+
+export const NavBarContainer: React.FunctionComponent<React.PropsWithChildren<any>> = ({children, ...props}) => {
+    return (
+        <Flex
+            as="nav"
+            align="center"
+            justify="space-between"
+            wrap="wrap"
+            w="100%"
+            padding={1}
+            bg={["white"]}
+            {...props}
+        >
+            {children}
+        </Flex>
+    );
+};
+
+interface HeaderProps {
+    schedulerState?: SchedulerState
+}
+
+export const Header: React.FunctionComponent<HeaderProps> = ({schedulerState}) => {
+    return (
+        <NavBarContainer borderBottom={"1px"} borderBottomColor={"#f1f1f1"}>
+            <Box w="100%" alignItems={"flex-start"}>
+                <NavBarContainer>
+                    <Flex flexDirection={"row"} alignItems={"center"}>
+                        <img alt={"Ballista Logo"} src={Logo}/>
+                        <Text
+                            background={"aliceblue"}
+                            ml={4}
+                            fontSize="md"
+                            padding={1}
+                            borderRadius={4}
+                        >
+                            Version - {schedulerState?.version}
+                        </Text>
+                    </Flex>
+                    <Flex>
+                        <a rel={"noreferrer"} target={"_blank"} href={"https://ballistacompute.org/docs/"}>
+                            <Button mr={4} colorScheme="blue" size="sm" rightIcon={<HiDocumentText/>}>
+                                Docs
+                            </Button>
+                        </a>
+                        <a
+                            rel="noreferrer"
+                            href={"https://github.com/apache/arrow/tree/master/rust/ballista"}
+                            target={"_blank"}
+                        >
+                            <Button colorScheme="blue" size="sm" rightIcon={<AiFillGithub/>}>
+                                Github
+                            </Button>
+                        </a>
+                    </Flex>
+                </NavBarContainer>
+            </Box>
+        </NavBarContainer>
+    );
+};
diff --git a/rust/ballista/ui/scheduler/src/components/NodesList.tsx b/rust/ballista/ui/scheduler/src/components/NodesList.tsx
new file mode 100644
index 00000000000..2690e86b534
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/NodesList.tsx
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import {Box } from "@chakra-ui/react";
+import {Column, DateCell, DataTable} from "./DataTable";
+
+export enum NodeStatus {
+  RUNNING = "RUNNING",
+  TERMINATED = "TERMINATED"
+}
+
+export interface NodeInfo {
+  id: string;
+  host: string;
+  port: number;
+  status: NodeStatus;
+  started: string;
+}
+
+const columns : Column<any>[] = [
+  {
+    Header: "Node",
+    accessor: "id",
+  },
+  {
+    Header: "Host",
+    accessor: "host",
+  },
+  {
+    Header: "Port",
+    accessor: "port",
+  },
+  {
+    Header: "Status",
+    accessor: "status",
+  },
+  {
+    Header: "Started",
+    accessor: "started",
+    Cell: DateCell,
+  },
+];
+
+interface NodesListProps {
+  nodes:  NodeInfo[]
+}
+
+export const NodesList: React.FunctionComponent<NodesListProps> = ({
+  nodes = [],
+}) => {
+  return (
+    <Box flex={1}>
+      <DataTable maxW={960} columns={columns} data={nodes} pageSize={4} />
+    </Box>
+  );
+};
diff --git a/rust/ballista/ui/scheduler/src/components/QueriesList.tsx b/rust/ballista/ui/scheduler/src/components/QueriesList.tsx
new file mode 100644
index 00000000000..2d7166a28eb
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/QueriesList.tsx
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import {CircularProgress, CircularProgressLabel, VStack, Skeleton, Stack, Text, Flex, Box} from "@chakra-ui/react";
+import {Column, DateCell, DataTable, LinkCell} from "./DataTable";
+import {FaStop} from "react-icons/fa";
+import {GrPowerReset} from "react-icons/gr";
+
+export enum QueryStatus {
+    QUEUED = "QUEUED",
+    RUNNING = "RUNNING",
+    FAILED = "FAILED",
+    COMPLETED = "COMPLETED",
+}
+
+export interface Query {
+    uuid: string;
+    query: string;
+    status: QueryStatus;
+    progress: number;
+    started: string;
+}
+
+export interface QueriesListProps {
+    queries?: Query[];
+}
+
+export const ActionsCell: (props: any) => React.ReactNode = (props: any) => {
+    return (
+        <Flex>
+            <FaStop color={"red"} title={"stop"}/>
+            <Box mx={2}></Box>
+            <GrPowerReset title={"Retry"}/>
+        </Flex>
+    )
+}
+
+export const ProgressCell: (props: any) => React.ReactNode = (props: any) => {
+    return (
+        <CircularProgress value={props.value} color="orange.400">
+            <CircularProgressLabel>{props.value}%</CircularProgressLabel>
+        </CircularProgress>
+    )
+}
+
+const columns: Column<any>[] = [
+    {
+        Header: "UUID",
+        accessor: "uuid",
+        Cell: LinkCell
+    },
+    {
+        Header: "Query",
+        accessor: "query",
+    },
+    {
+        Header: "Status",
+        accessor: "status",
+    },
+    {
+        Header: "Progress",
+        accessor: "progress",
+        Cell: ProgressCell,
+    },
+    {
+        Header: "Started",
+        accessor: "started",
+        Cell: DateCell,
+    },
+    {
+        Header: "Actions",
+        accessor: "",
+        Cell: ActionsCell,
+    }
+];
+
+const getSkeletion = () => (
+    <>
+        <Skeleton height={5}/>
+        <Skeleton height={5}/>
+        <Skeleton height={5}/>
+        <Skeleton height={5}/>
+        <Skeleton height={5}/>
+        <Skeleton height={5}/>
+    </>
+)
+
+export const QueriesList: React.FunctionComponent<QueriesListProps> = ({queries}) => {
+    const isLoaded = typeof queries !== "undefined";
+
+    //TODO: Remove blur once queries api is ready
+    return (
+        <VStack flex={1} p={4} w={"100%"} alignItems={"flex-start"} filter="blur(3px)">
+            <Text mb={4}>Queries</Text>
+            <Stack w={"100%"} flex={1}>
+                {isLoaded ? <DataTable columns={columns} data={queries || []} pageSize={10} pb={10}/> : getSkeletion()}
+            </Stack>
+        </VStack>
+    )
+};
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/src/components/Summary.tsx b/rust/ballista/ui/scheduler/src/components/Summary.tsx
new file mode 100644
index 00000000000..2e52498296f
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/Summary.tsx
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import { Box, Text, Flex, VStack } from "@chakra-ui/react";
+import { HiCheckCircle } from "react-icons/hi";
+import TimeAgo from "react-timeago";
+import { NodesList, NodeInfo } from "./NodesList";
+
+const Label: React.FunctionComponent<React.PropsWithChildren<any>> = ({
+  children,
+}) => {
+  return (
+    <Text fontSize="md" fontWeight={"light"} width={90}>
+      {children}
+    </Text>
+  );
+};
+
+export interface SchedulerState {
+  status: string;
+  started: string;
+  version: string;
+  executors: NodeInfo[];
+}
+
+export interface SummaryProps {
+  schedulerState?: SchedulerState
+}
+
+export const Summary: React.FunctionComponent<SummaryProps> = ({schedulerState}) => {
+
+  if (!schedulerState) {
+    return <Text>Scheduler isn't running</Text>
+  }
+
+  return (
+    <Flex bg={"gray.100"} padding={10} width={"100%"}>
+      <Box width={"100%"}>
+        <Flex paddingX={4}>
+          <VStack
+            minWidth={250}
+            fontSize="md"
+            alignItems={"flex-start"}
+            fontWeight={"normal"}
+          >
+            <Text fontWeight={"light"} mb={2}>General Cluster Info</Text>
+            <Flex>
+              <Label>Status</Label>
+              <Flex alignItems={"center"}>
+                <HiCheckCircle color={"green"} />
+                <Text pl={1}>Active</Text>
+              </Flex>
+            </Flex>
+            <Flex>
+              <Label>Nodes</Label>
+              <Text>{schedulerState.executors?.length}</Text>
+            </Flex>
+            <Flex>
+              <Label>Started</Label>
+              <Text>
+                <TimeAgo date={schedulerState.started} />
+              </Text>
+            </Flex>
+            <Flex>
+              <Label>Version</Label>
+              <Text>{schedulerState.version}</Text>
+            </Flex>
+          </VStack>
+          <NodesList nodes={schedulerState.executors} />
+        </Flex>
+      </Box>
+    </Flex>
+  );
+};
diff --git a/rust/ballista/ui/scheduler/src/components/logo.svg b/rust/ballista/ui/scheduler/src/components/logo.svg
new file mode 100644
index 00000000000..3cb5257955a
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/components/logo.svg
@@ -0,0 +1,25 @@
+
+<svg width="276" height="77" viewBox="0 0 276 77" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M270.046 11.5891L270.046 11.5891C270.566 10.3565 269.332 9.089 268.086 9.62327L249.603 17.5458L249.8 18.0054L249.603 17.5458C248.312 18.0994 248.425 19.9782 249.778 20.3676L249.778 20.3676L256.976 22.4391L256.976 22.4391C257.138 22.4859 257.267 22.6145 257.315 22.7818L259.371 30.0321L259.852 29.8957L259.371 30.0321C259.757 31.3911 261.63 31.5107 262.181 30.2071L261.72 30.0125L262.181 30.2071L270.046 11.5891Z" fill="#0097E6" stroke="#0097E6"/>
+    <g filter="url(#filter0_di)">
+        <path d="M30.9609 42.4766C30.9609 41.8672 30.7734 41.4609 30.3984 41.2578C30.0391 41.0547 29.5078 40.9531 28.8047 40.9531H17.9297V39.2656H28.8047C31.3672 39.2656 32.6484 40.3359 32.6484 42.4766C32.6484 43.6797 32.2969 44.5469 31.5938 45.0781C30.8906 45.6094 29.9609 45.875 28.8047 45.875H17.9297V44.1875H28.8047C30.2422 44.1875 30.9609 43.6172 30.9609 42.4766ZM16.9922 56H15.3047V27.3125H28.8047C29.8672 27.3125 30.7734 27.6406 31.5234 28.2969C32.2734 28.9375 32.6484 29.7422 32.6484 30.7109C32.6484 31.9922 32.2969 32.9219 31.5938 33.5C30.8906 34.0625 29.9609 34.3438 28.8047 34.3438H17.9297V32.6562H28.8047C30.2422 32.6562 30.9609 32.0078 30.9609 30.7109C30.9609 30.1797 30.7578 29.7656 30.3516 29.4688C29.9453 29.1562 29.4297 29 28.8047 29H16.9922V56ZM17.9297 36.0312H28.8047C30.3516 36.0312 31.6562 35.5703 32.7188 34.6484C33.7969 33.7109 34.3359 32.3984 34.3359 30.7109C34.3359 29.3516 33.7734 28.1641 32.6484 27.1484C31.5391 26.1328 30.2578 25.625 28.8047 25.625H13.6172V56H11.9297V23.9375H28.8047C30.7266 23.9375 32.4062 24.6172 33.8438 25.9766C35.2969 27.3203 36.0234 28.8984 36.0234 30.7109C36.0234 32.1328 35.6875 33.375 35.0156 34.4375C34.3594 35.5 33.4688 36.3203 32.3438 36.8984C34.7969 37.7734 36.0234 39.5547 36.0234 42.2422C36.0234 44.3672 35.3203 46.0703 33.9141 47.3516C32.5078 48.6172 30.8047 49.25 28.8047 49.25H17.9297V47.5625H28.8047C30.3516 47.5625 31.6562 47.1016 32.7188 46.1797C33.7969 45.2422 34.3359 43.9297 34.3359 42.2422C34.3359 41.1016 33.9297 40.0938 33.1172 39.2188C32.3047 38.3438 31.1094 37.8359 29.5312 37.6953C29.375 37.7109 29.1328 37.7188 28.8047 37.7188H17.9297V36.0312ZM39.3984 30.7109C39.3984 32.8984 38.625 34.875 37.0781 36.6406C38.625 38.3594 39.3984 40.2266 39.3984 42.2422C39.3984 44.1953 38.9062 45.9766 37.9219 47.5859C36.9531 49.1797 35.6562 50.4219 34.0312 51.3125C32.4219 52.1875 30.6797 52.625 28.8047 52.625H17.9297V50.9375H28.8047C31.2422 50.9375 33.3359 50.125 35.0859 48.5C36.8359 46.8594 37.7109 44.7734 37.7109 42.2422C37.7109 41.3359 37.4766 40.3359 37.0078 39.2422C36.5547 38.1328 35.9219 37.2891 35.1094 36.7109C36.8438 35.0859 37.7109 33.0859 37.7109 30.7109C37.7109 28.4297 36.8203 26.4531 35.0391 24.7812C33.2578 23.0938 31.1797 22.25 28.8047 22.25H10.2422V56H8.55469V20.5625H28.8047C30.6641 20.5625 32.4062 21.0234 34.0312 21.9453C35.6719 22.8516 36.9766 24.0938 37.9453 25.6719C38.9141 27.2344 39.3984 28.9141 39.3984 30.7109ZM39 36.7344C40.3906 34.875 41.0859 32.8672 41.0859 30.7109C41.0859 28.6016 40.5234 26.6406 39.3984 24.8281C38.2891 23 36.7891 21.5547 34.8984 20.4922C33.0078 19.4141 30.9766 18.875 28.8047 18.875H6.86719V56H5.17969V17.1875H28.8047C31.2734 17.1875 33.5781 17.7969 35.7188 19.0156C37.875 20.2344 39.5859 21.8906 40.8516 23.9844C42.1328 26.0625 42.7734 28.3047 42.7734 30.7109C42.7734 33.0078 42.125 35.0234 40.8281 36.7578C42.125 38.5547 42.7734 40.3828 42.7734 42.2422C42.7734 44.3359 42.3984 46.2578 41.6484 48.0078C40.8984 49.7422 39.8828 51.1875 38.6016 52.3438C37.3359 53.5 35.8594 54.3984 34.1719 55.0391C32.4844 55.6797 30.6953 56 28.8047 56H17.9297V54.3125H28.8047C30.4453 54.3125 32.0156 54.0078 33.5156 53.3984C35.0156 52.7734 36.3203 51.9297 37.4297 50.8672C38.5391 49.8047 39.4219 48.5234 40.0781 47.0234C40.75 45.5234 41.0859 43.9297 41.0859 42.2422C41.0859 41.4922 40.9062 40.5938 40.5469 39.5469C40.2031 38.4844 39.6875 37.5469 39 36.7344ZM70.5234 39.3594C70.5234 37.3594 69.9062 35.8125 68.6719 34.7188C67.4375 33.625 65.8047 33.0781 63.7734 33.0781C62.0547 33.0781 60.5781 33.6719 59.3438 34.8594C58.1094 36.0469 57.4922 37.4688 57.4922 39.125C57.4922 40.0625 57.6172 40.9062 57.8672 41.6562C58.1172 42.3906 58.4531 43.0078 58.875 43.5078C59.3125 43.9922 59.8281 44.3984 60.4219 44.7266C61.0156 45.0391 61.6328 45.2656 62.2734 45.4062C62.9297 45.5469 63.625 45.6172 64.3594 45.6172C66.1406 45.6172 67.7656 45.1797 69.2344 44.3047V45.9922C67.7188 46.7891 66.0859 47.1875 64.3359 47.1875C63.2109 47.1875 62.1484 47.0391 61.1484 46.7422C60.1641 46.4297 59.2656 45.9688 58.4531 45.3594C57.6406 44.7344 56.9922 43.8906 56.5078 42.8281C56.0391 41.75 55.8047 40.5156 55.8047 39.125C55.8047 36.9688 56.5625 35.1406 58.0781 33.6406C59.5938 32.1406 61.4922 31.3906 63.7734 31.3906C66.2891 31.3906 68.3203 32.1016 69.8672 33.5234C71.4297 34.9297 72.2109 36.875 72.2109 39.3594V56H70.5234V39.3594ZM75.5859 39.3594V56H73.8984V39.3594C73.8984 36.4062 72.9531 34.0625 71.0625 32.3281C69.1719 30.5781 66.7422 29.7031 63.7734 29.7031C61.0234 29.7031 58.7266 30.6172 56.8828 32.4453C55.0391 34.2734 54.1172 36.5 54.1172 39.125C54.1172 40.7188 54.3906 42.1484 54.9375 43.4141C55.5 44.6797 56.2578 45.6953 57.2109 46.4609C58.1641 47.2266 59.2266 47.8125 60.3984 48.2188C61.5859 48.6094 62.8359 48.8047 64.1484 48.8047C65.9297 48.8047 67.625 48.3906 69.2344 47.5625V49.3438C67.5469 50.0469 65.8438 50.3984 64.125 50.3984C62.5781 50.3984 61.1094 50.1641 59.7188 49.6953C58.3281 49.2109 57.0859 48.5156 55.9922 47.6094C54.8984 46.6875 54.0312 45.5 53.3906 44.0469C52.75 42.5781 52.4297 40.9375 52.4297 39.125C52.4297 36.0469 53.5156 33.4297 55.6875 31.2734C57.875 29.1016 60.5703 28.0156 63.7734 28.0156C66.0234 28.0156 68.0391 28.4766 69.8203 29.3984C71.6016 30.3047 73.0078 31.625 74.0391 33.3594C75.0703 35.0938 75.5859 37.0938 75.5859 39.3594ZM78.9609 39.3594V56H77.2734V39.3594C77.2734 36.7812 76.6797 34.5 75.4922 32.5156C74.3203 30.5156 72.7109 28.9844 70.6641 27.9219C68.6172 26.8594 66.3203 26.3281 63.7734 26.3281C61.3516 26.3281 59.1406 26.9062 57.1406 28.0625C55.1406 29.2031 53.5703 30.75 52.4297 32.7031C51.3047 34.6562 50.7422 36.7969 50.7422 39.125C50.7422 41.125 51.1094 42.9531 51.8438 44.6094C52.5781 46.25 53.5625 47.6016 54.7969 48.6641C56.0469 49.7266 57.4609 50.5547 59.0391 51.1484C60.6328 51.7266 62.2969 52.0156 64.0312 52.0156C65.7969 52.0156 67.5312 51.6484 69.2344 50.9141V52.625C67.4531 53.2969 65.7031 53.6328 63.9844 53.6328C61.2812 53.6328 58.7969 53.0234 56.5312 51.8047C54.2656 50.5859 52.4531 48.8594 51.0938 46.625C49.7344 44.3906 49.0547 41.8906 49.0547 39.125C49.0547 37.1562 49.4297 35.2812 50.1797 33.5C50.9453 31.7188 51.9766 30.1797 53.2734 28.8828C54.5703 27.5859 56.1328 26.5547 57.9609 25.7891C59.7891 25.0234 61.7266 24.6406 63.7734 24.6406C66.6328 24.6406 69.2109 25.25 71.5078 26.4688C73.8203 27.6875 75.6406 29.4219 76.9688 31.6719C78.2969 33.9219 78.9609 36.4844 78.9609 39.3594ZM82.3359 39.3594V56H80.6484V39.3594C80.6484 36.1719 79.9062 33.3281 78.4219 30.8281C76.9375 28.3125 74.9141 26.375 72.3516 25.0156C69.7891 23.6406 66.9297 22.9531 63.7734 22.9531C60.7422 22.9531 57.9688 23.6797 55.4531 25.1328C52.9375 26.5859 50.9609 28.5547 49.5234 31.0391C48.0859 33.5078 47.3672 36.2031 47.3672 39.125C47.3672 41.0469 47.6719 42.875 48.2812 44.6094C48.9062 46.3281 49.75 47.8359 50.8125 49.1328C51.8906 50.4141 53.125 51.5234 54.5156 52.4609C55.9219 53.3984 57.4297 54.1094 59.0391 54.5938C60.6641 55.0781 62.3125 55.3203 63.9844 55.3203C65.8281 55.3203 67.5781 54.9688 69.2344 54.2656V56C67.5 56.6094 65.7188 56.9141 63.8906 56.9141C62 56.9141 60.1562 56.6484 58.3594 56.1172C56.5781 55.5859 54.9219 54.8047 53.3906 53.7734C51.875 52.7266 50.5391 51.5 49.3828 50.0938C48.2266 48.6719 47.3203 47.0156 46.6641 45.125C46.0078 43.2344 45.6797 41.2344 45.6797 39.125C45.6797 36.7031 46.1328 34.3984 47.0391 32.2109C47.9609 30.0078 49.2109 28.1094 50.7891 26.5156C52.3828 24.9062 54.3047 23.6328 56.5547 22.6953C58.8203 21.7422 61.2266 21.2656 63.7734 21.2656C66.3672 21.2656 68.8047 21.7031 71.0859 22.5781C73.3672 23.4531 75.3359 24.6719 76.9922 26.2344C78.6484 27.7812 79.9531 29.6875 80.9062 31.9531C81.8594 34.2188 82.3359 36.6875 82.3359 39.3594ZM86.5078 22.25H88.1953V56H86.5078V22.25ZM96.6328 22.25H98.3203V47.5625H110.766V49.25H98.3203V50.9375H110.766V52.625H98.3203V54.3125H110.766V56H96.6328V22.25ZM93.2578 22.25H94.9453V56H93.2578V22.25ZM89.8828 22.25H91.5703V56H89.8828V22.25ZM114.492 22.25H116.18V56H114.492V22.25ZM124.617 22.25H126.305V47.5625H138.75V49.25H126.305V50.9375H138.75V52.625H126.305V54.3125H138.75V56H124.617V22.25ZM121.242 22.25H122.93V56H121.242V22.25ZM117.867 22.25H119.555V56H117.867V22.25ZM152.602 22.25H154.289V56H152.602V22.25ZM149.227 22.25H150.914V56H149.227V22.25ZM145.852 22.25H147.539V56H145.852V22.25ZM142.477 22.25H144.164V56H142.477V22.25ZM174.211 39.9453H171.539C168.258 39.9453 165.734 39.2812 163.969 37.9531C162.203 36.6094 161.32 34.7656 161.32 32.4219C161.32 30.2344 162.359 28.4141 164.438 26.9609C166.516 25.4922 169.43 24.7578 173.18 24.7578C175.68 24.7578 178.008 25.1016 180.164 25.7891C182.336 26.4766 184.141 27.4453 185.578 28.6953L184.266 29.75C182.984 28.7031 181.375 27.8906 179.438 27.3125C177.5 26.7344 175.414 26.4453 173.18 26.4453C169.883 26.4453 167.359 27.0078 165.609 28.1328C163.875 29.2578 163.008 30.6875 163.008 32.4219C163.008 34.2969 163.719 35.7422 165.141 36.7578C166.578 37.7578 168.711 38.2578 171.539 38.2578H174.117C177.445 38.2578 179.992 38.9297 181.758 40.2734C183.539 41.6016 184.43 43.4375 184.43 45.7812C184.43 47.9688 183.391 49.7969 181.312 51.2656C179.234 52.7188 176.32 53.4453 172.57 53.4453C170.07 53.4453 167.734 53.1016 165.562 52.4141C163.406 51.7266 161.609 50.7578 160.172 49.5078L161.484 48.4531C162.766 49.5 164.375 50.3125 166.312 50.8906C168.25 51.4688 170.336 51.7578 172.57 51.7578C175.867 51.7578 178.383 51.1953 180.117 50.0703C181.867 48.9453 182.742 47.5156 182.742 45.7812C182.742 43.9062 182.023 42.4688 180.586 41.4688C179.164 40.4531 177.039 39.9453 174.211 39.9453ZM174.211 43.3203H171.539C167.195 43.3203 163.844 42.3359 161.484 40.3672C159.125 38.3828 157.945 35.7344 157.945 32.4219C157.945 30.9062 158.281 29.4844 158.953 28.1562C159.625 26.8281 160.586 25.6641 161.836 24.6641C163.102 23.6484 164.711 22.8516 166.664 22.2734C168.617 21.6797 170.789 21.3828 173.18 21.3828C176.195 21.3828 179.016 21.8438 181.641 22.7656C184.266 23.6719 186.453 24.9453 188.203 26.5859L186.844 27.5938C185.25 26.1562 183.258 25.0469 180.867 24.2656C178.477 23.4688 175.914 23.0703 173.18 23.0703C168.961 23.0703 165.648 23.9609 163.242 25.7422C160.836 27.5234 159.633 29.75 159.633 32.4219C159.633 35.25 160.664 37.4922 162.727 39.1484C164.789 40.8047 167.727 41.6328 171.539 41.6328H174.117C178.742 41.6328 181.055 43.0156 181.055 45.7812C181.055 47.0312 180.344 48.0625 178.922 48.875C177.516 49.6719 175.398 50.0703 172.57 50.0703C170.586 50.0703 168.742 49.8438 167.039 49.3906C165.336 48.9219 163.93 48.2578 162.82 47.3984L164.156 46.3672C166.109 47.7109 168.914 48.3828 172.57 48.3828C174.914 48.3828 176.633 48.1406 177.727 47.6562C178.82 47.1719 179.367 46.5469 179.367 45.7812C179.367 44.8906 178.984 44.2578 178.219 43.8828C177.453 43.5078 176.117 43.3203 174.211 43.3203ZM174.211 36.5703H171.539C166.977 36.5703 164.695 35.1875 164.695 32.4219C164.695 31.1719 165.398 30.1484 166.805 29.3516C168.227 28.5391 170.352 28.1328 173.18 28.1328C175.164 28.1328 177.008 28.3672 178.711 28.8359C180.414 29.2891 181.82 29.9453 182.93 30.8047L181.594 31.8359C179.641 30.4922 176.836 29.8203 173.18 29.8203C170.836 29.8203 169.117 30.0625 168.023 30.5469C166.93 31.0312 166.383 31.6562 166.383 32.4219C166.383 33.3125 166.766 33.9453 167.531 34.3203C168.297 34.6953 169.633 34.8828 171.539 34.8828H174.211C178.555 34.8828 181.906 35.875 184.266 37.8594C186.625 39.8281 187.805 42.4688 187.805 45.7812C187.805 47.2969 187.469 48.7188 186.797 50.0469C186.125 51.375 185.156 52.5469 183.891 53.5625C182.641 54.5625 181.039 55.3594 179.086 55.9531C177.133 56.5312 174.961 56.8203 172.57 56.8203C169.555 56.8203 166.734 56.3672 164.109 55.4609C161.484 54.5391 159.297 53.2578 157.547 51.6172L158.906 50.6094C160.5 52.0469 162.492 53.1641 164.883 53.9609C167.273 54.7422 169.836 55.1328 172.57 55.1328C176.789 55.1328 180.102 54.2422 182.508 52.4609C184.914 50.6797 186.117 48.4531 186.117 45.7812C186.117 42.9531 185.086 40.7109 183.023 39.0547C180.961 37.3984 178.023 36.5703 174.211 36.5703ZM197.484 31.6016H199.172V56H197.484V31.6016ZM190.734 23.9375V22.25H216.047V23.9375H190.734ZM190.734 27.3125V25.625H216.047V27.3125H190.734ZM190.734 30.6875V29H216.047V30.6875H190.734ZM207.609 31.6016H209.297V56H207.609V31.6016ZM204.234 31.6016H205.922V56H204.234V31.6016ZM200.859 31.6016H202.547V56H200.859V31.6016ZM243.727 39.3594C243.727 37.3594 243.109 35.8125 241.875 34.7188C240.641 33.625 239.008 33.0781 236.977 33.0781C235.258 33.0781 233.781 33.6719 232.547 34.8594C231.312 36.0469 230.695 37.4688 230.695 39.125C230.695 40.0625 230.82 40.9062 231.07 41.6562C231.32 42.3906 231.656 43.0078 232.078 43.5078C232.516 43.9922 233.031 44.3984 233.625 44.7266C234.219 45.0391 234.836 45.2656 235.477 45.4062C236.133 45.5469 236.828 45.6172 237.562 45.6172C239.344 45.6172 240.969 45.1797 242.438 44.3047V45.9922C240.922 46.7891 239.289 47.1875 237.539 47.1875C236.414 47.1875 235.352 47.0391 234.352 46.7422C233.367 46.4297 232.469 45.9688 231.656 45.3594C230.844 44.7344 230.195 43.8906 229.711 42.8281C229.242 41.75 229.008 40.5156 229.008 39.125C229.008 36.9688 229.766 35.1406 231.281 33.6406C232.797 32.1406 234.695 31.3906 236.977 31.3906C239.492 31.3906 241.523 32.1016 243.07 33.5234C244.633 34.9297 245.414 36.875 245.414 39.3594V56H243.727V39.3594ZM248.789 39.3594V56H247.102V39.3594C247.102 36.4062 246.156 34.0625 244.266 32.3281C242.375 30.5781 239.945 29.7031 236.977 29.7031C234.227 29.7031 231.93 30.6172 230.086 32.4453C228.242 34.2734 227.32 36.5 227.32 39.125C227.32 40.7188 227.594 42.1484 228.141 43.4141C228.703 44.6797 229.461 45.6953 230.414 46.4609C231.367 47.2266 232.43 47.8125 233.602 48.2188C234.789 48.6094 236.039 48.8047 237.352 48.8047C239.133 48.8047 240.828 48.3906 242.438 47.5625V49.3438C240.75 50.0469 239.047 50.3984 237.328 50.3984C235.781 50.3984 234.312 50.1641 232.922 49.6953C231.531 49.2109 230.289 48.5156 229.195 47.6094C228.102 46.6875 227.234 45.5 226.594 44.0469C225.953 42.5781 225.633 40.9375 225.633 39.125C225.633 36.0469 226.719 33.4297 228.891 31.2734C231.078 29.1016 233.773 28.0156 236.977 28.0156C239.227 28.0156 241.242 28.4766 243.023 29.3984C244.805 30.3047 246.211 31.625 247.242 33.3594C248.273 35.0938 248.789 37.0938 248.789 39.3594ZM252.164 39.3594V56H250.477V39.3594C250.477 36.7812 249.883 34.5 248.695 32.5156C247.523 30.5156 245.914 28.9844 243.867 27.9219C241.82 26.8594 239.523 26.3281 236.977 26.3281C234.555 26.3281 232.344 26.9062 230.344 28.0625C228.344 29.2031 226.773 30.75 225.633 32.7031C224.508 34.6562 223.945 36.7969 223.945 39.125C223.945 41.125 224.312 42.9531 225.047 44.6094C225.781 46.25 226.766 47.6016 228 48.6641C229.25 49.7266 230.664 50.5547 232.242 51.1484C233.836 51.7266 235.5 52.0156 237.234 52.0156C239 52.0156 240.734 51.6484 242.438 50.9141V52.625C240.656 53.2969 238.906 53.6328 237.188 53.6328C234.484 53.6328 232 53.0234 229.734 51.8047C227.469 50.5859 225.656 48.8594 224.297 46.625C222.938 44.3906 222.258 41.8906 222.258 39.125C222.258 37.1562 222.633 35.2812 223.383 33.5C224.148 31.7188 225.18 30.1797 226.477 28.8828C227.773 27.5859 229.336 26.5547 231.164 25.7891C232.992 25.0234 234.93 24.6406 236.977 24.6406C239.836 24.6406 242.414 25.25 244.711 26.4688C247.023 27.6875 248.844 29.4219 250.172 31.6719C251.5 33.9219 252.164 36.4844 252.164 39.3594ZM255.539 39.3594V56H253.852V39.3594C253.852 36.1719 253.109 33.3281 251.625 30.8281C250.141 28.3125 248.117 26.375 245.555 25.0156C242.992 23.6406 240.133 22.9531 236.977 22.9531C233.945 22.9531 231.172 23.6797 228.656 25.1328C226.141 26.5859 224.164 28.5547 222.727 31.0391C221.289 33.5078 220.57 36.2031 220.57 39.125C220.57 41.0469 220.875 42.875 221.484 44.6094C222.109 46.3281 222.953 47.8359 224.016 49.1328C225.094 50.4141 226.328 51.5234 227.719 52.4609C229.125 53.3984 230.633 54.1094 232.242 54.5938C233.867 55.0781 235.516 55.3203 237.188 55.3203C239.031 55.3203 240.781 54.9688 242.438 54.2656V56C240.703 56.6094 238.922 56.9141 237.094 56.9141C235.203 56.9141 233.359 56.6484 231.562 56.1172C229.781 55.5859 228.125 54.8047 226.594 53.7734C225.078 52.7266 223.742 51.5 222.586 50.0938C221.43 48.6719 220.523 47.0156 219.867 45.125C219.211 43.2344 218.883 41.2344 218.883 39.125C218.883 36.7031 219.336 34.3984 220.242 32.2109C221.164 30.0078 222.414 28.1094 223.992 26.5156C225.586 24.9062 227.508 23.6328 229.758 22.6953C232.023 21.7422 234.43 21.2656 236.977 21.2656C239.57 21.2656 242.008 21.7031 244.289 22.5781C246.57 23.4531 248.539 24.6719 250.195 26.2344C251.852 27.7812 253.156 29.6875 254.109 31.9531C255.062 34.2188 255.539 36.6875 255.539 39.3594Z" fill="#0097E6"/>
+        <path d="M30.3984 41.2578L30.1524 41.6932L30.1603 41.6975L30.3984 41.2578ZM17.9297 40.9531H17.4297V41.4531H17.9297V40.9531ZM17.9297 39.2656V38.7656H17.4297V39.2656H17.9297ZM17.9297 45.875H17.4297V46.375H17.9297V45.875ZM17.9297 44.1875V43.6875H17.4297V44.1875H17.9297ZM16.9922 56V56.5H17.4922V56H16.9922ZM15.3047 56H14.8047V56.5H15.3047V56ZM15.3047 27.3125V26.8125H14.8047V27.3125H15.3047ZM31.5234 28.2969L31.1942 28.6732L31.1987 28.6771L31.5234 28.2969ZM31.5938 33.5L31.9061 33.8905L31.9113 33.8862L31.5938 33.5ZM17.9297 34.3438H17.4297V34.8438H17.9297V34.3438ZM17.9297 32.6562V32.1562H17.4297V32.6562H17.9297ZM30.3516 29.4688L30.0466 29.8652L30.0566 29.8724L30.3516 29.4688ZM16.9922 29V28.5H16.4922V29H16.9922ZM17.9297 36.0312V35.5312H17.4297V36.0312H17.9297ZM32.7188 34.6484L33.0464 35.0261L33.0468 35.0257L32.7188 34.6484ZM32.6484 27.1484L32.3108 27.5172L32.3134 27.5196L32.6484 27.1484ZM13.6172 25.625V25.125H13.1172V25.625H13.6172ZM13.6172 56V56.5H14.1172V56H13.6172ZM11.9297 56H11.4297V56.5H11.9297V56ZM11.9297 23.9375V23.4375H11.4297V23.9375H11.9297ZM33.8438 25.9766L33.5002 26.3399L33.5043 26.3437L33.8438 25.9766ZM35.0156 34.4375L34.593 34.1703L34.5902 34.1748L35.0156 34.4375ZM32.3438 36.8984L32.1152 36.4537L31.0882 36.9815L32.1758 37.3694L32.3438 36.8984ZM33.9141 47.3516L34.2486 47.7232L34.2508 47.7212L33.9141 47.3516ZM17.9297 49.25H17.4297V49.75H17.9297V49.25ZM17.9297 47.5625V47.0625H17.4297V47.5625H17.9297ZM32.7188 46.1797L33.0464 46.5573L33.0468 46.557L32.7188 46.1797ZM29.5312 37.6953L29.5756 37.1973L29.5285 37.1931L29.4815 37.1978L29.5312 37.6953ZM17.9297 37.7188H17.4297V38.2188H17.9297V37.7188ZM37.0781 36.6406L36.702 36.3111L36.4095 36.6451L36.7065 36.9751L37.0781 36.6406ZM37.9219 47.5859L37.4953 47.325L37.4946 47.3262L37.9219 47.5859ZM34.0312 51.3125L34.2701 51.7518L34.2716 51.751L34.0312 51.3125ZM17.9297 52.625H17.4297V53.125H17.9297V52.625ZM17.9297 50.9375V50.4375H17.4297V50.9375H17.9297ZM35.0859 48.5L35.4262 48.8664L35.4279 48.8648L35.0859 48.5ZM37.0078 39.2422L36.5449 39.4313L36.5482 39.4391L37.0078 39.2422ZM35.1094 36.7109L34.7675 36.3461L34.3215 36.764L34.8195 37.1183L35.1094 36.7109ZM35.0391 24.7812L34.6952 25.1442L34.6969 25.1458L35.0391 24.7812ZM10.2422 22.25V21.75H9.74219V22.25H10.2422ZM10.2422 56V56.5H10.7422V56H10.2422ZM8.55469 56H8.05469V56.5H8.55469V56ZM8.55469 20.5625V20.0625H8.05469V20.5625H8.55469ZM34.0312 21.9453L33.7845 22.3802L33.7895 22.383L34.0312 21.9453ZM37.9453 25.6719L37.5192 25.9335L37.5204 25.9353L37.9453 25.6719ZM39 36.7344L38.5996 36.4349L38.3613 36.7536L38.6183 37.0573L39 36.7344ZM39.3984 24.8281L38.971 25.0875L38.9736 25.0918L39.3984 24.8281ZM34.8984 20.4922L34.6507 20.9265L34.6535 20.9281L34.8984 20.4922ZM6.86719 18.875V18.375H6.36719V18.875H6.86719ZM6.86719 56V56.5H7.36719V56H6.86719ZM5.17969 56H4.67969V56.5H5.17969V56ZM5.17969 17.1875V16.6875H4.67969V17.1875H5.17969ZM35.7188 19.0156L35.4714 19.4501L35.4727 19.4509L35.7188 19.0156ZM40.8516 23.9844L40.4236 24.243L40.426 24.2468L40.8516 23.9844ZM40.8281 36.7578L40.4277 36.4584L40.2077 36.7526L40.4227 37.0504L40.8281 36.7578ZM41.6484 48.0078L42.1074 48.2063L42.108 48.2048L41.6484 48.0078ZM38.6016 52.3438L38.2666 51.9725L38.2643 51.9746L38.6016 52.3438ZM17.9297 56H17.4297V56.5H17.9297V56ZM17.9297 54.3125V53.8125H17.4297V54.3125H17.9297ZM33.5156 53.3984L33.7038 53.8617L33.7079 53.86L33.5156 53.3984ZM40.0781 47.0234L39.6218 46.819L39.62 46.823L40.0781 47.0234ZM40.5469 39.5469L40.0711 39.7008L40.074 39.7092L40.5469 39.5469ZM31.4609 42.4766C31.4609 41.772 31.238 41.1439 30.6366 40.8182L30.1603 41.6975C30.3089 41.7779 30.4609 41.9624 30.4609 42.4766H31.4609ZM30.6445 40.8225C30.1744 40.5568 29.5408 40.4531 28.8047 40.4531V41.4531C29.4748 41.4531 29.9037 41.5525 30.1524 41.6931L30.6445 40.8225ZM28.8047 40.4531H17.9297V41.4531H28.8047V40.4531ZM18.4297 40.9531V39.2656H17.4297V40.9531H18.4297ZM17.9297 39.7656H28.8047V38.7656H17.9297V39.7656ZM28.8047 39.7656C30.0286 39.7656 30.8534 40.0231 31.3669 40.4521C31.8633 40.8667 32.1484 41.5093 32.1484 42.4766H33.1484C33.1484 41.3032 32.793 40.3403 32.0081 39.6846C31.2403 39.0433 30.1433 38.7656 28.8047 38.7656V39.7656ZM32.1484 42.4766C32.1484 43.5844 31.8284 44.2742 31.2923 44.6792L31.8952 45.4771C32.7654 44.8196 33.1484 43.775 33.1484 42.4766H32.1484ZM31.2923 44.6792C30.6997 45.127 29.8861 45.375 28.8047 45.375V46.375C30.0358 46.375 31.0816 46.0918 31.8952 45.4771L31.2923 44.6792ZM28.8047 45.375H17.9297V46.375H28.8047V45.375ZM18.4297 45.875V44.1875H17.4297V45.875H18.4297ZM17.9297 44.6875H28.8047V43.6875H17.9297V44.6875ZM28.8047 44.6875C29.5761 44.6875 30.2471 44.5367 30.7327 44.1514C31.2403 43.7487 31.4609 43.1587 31.4609 42.4766H30.4609C30.4609 42.9351 30.3222 43.2005 30.1111 43.3681C29.8779 43.5531 29.4707 43.6875 28.8047 43.6875V44.6875ZM16.9922 55.5H15.3047V56.5H16.9922V55.5ZM15.8047 56V27.3125H14.8047V56H15.8047ZM15.3047 27.8125H28.8047V26.8125H15.3047V27.8125ZM28.8047 27.8125C29.7548 27.8125 30.5417 28.1022 31.1942 28.6732L31.8527 27.9206C31.0052 27.179 29.9796 26.8125 28.8047 26.8125V27.8125ZM31.1987 28.6771C31.8398 29.2247 32.1484 29.8915 32.1484 30.7109H33.1484C33.1484 29.5929 32.7071 28.6503 31.8482 27.9167L31.1987 28.6771ZM32.1484 30.7109C32.1484 31.9064 31.8223 32.6648 31.2762 33.1138L31.9113 33.8862C32.7715 33.1789 33.1484 32.078 33.1484 30.7109H32.1484ZM31.2814 33.1096C30.6895 33.5831 29.8801 33.8438 28.8047 33.8438V34.8438C30.0418 34.8438 31.0918 34.5419 31.9061 33.8904L31.2814 33.1096ZM28.8047 33.8438H17.9297V34.8438H28.8047V33.8438ZM18.4297 34.3438V32.6562H17.4297V34.3438H18.4297ZM17.9297 33.1562H28.8047V32.1562H17.9297V33.1562ZM28.8047 33.1562C29.5885 33.1562 30.2698 32.9806 30.7568 32.5412C31.2536 32.0929 31.4609 31.4505 31.4609 30.7109H30.4609C30.4609 31.2683 30.3089 31.5985 30.0869 31.7987C29.8552 32.0077 29.4584 32.1562 28.8047 32.1562V33.1562ZM31.4609 30.7109C31.4609 30.0306 31.1899 29.4621 30.6466 29.0651L30.0566 29.8724C30.3258 30.0692 30.4609 30.3288 30.4609 30.7109H31.4609ZM30.6564 29.0724C30.1449 28.679 29.5151 28.5 28.8047 28.5V29.5C29.3442 29.5 29.7457 29.6335 30.0467 29.8651L30.6564 29.0724ZM28.8047 28.5H16.9922V29.5H28.8047V28.5ZM16.4922 29V56H17.4922V29H16.4922ZM17.9297 36.5312H28.8047V35.5312H17.9297V36.5312ZM28.8047 36.5312C30.459 36.5312 31.8837 36.0349 33.0464 35.0261L32.3911 34.2708C31.4288 35.1057 30.2441 35.5312 28.8047 35.5312V36.5312ZM33.0468 35.0257C34.2542 33.9759 34.8359 32.5143 34.8359 30.7109H33.8359C33.8359 32.2826 33.3395 33.446 32.3907 34.2711L33.0468 35.0257ZM34.8359 30.7109C34.8359 29.1929 34.2001 27.8756 32.9835 26.7773L32.3134 27.5196C33.3468 28.4525 33.8359 29.5102 33.8359 30.7109H34.8359ZM32.9861 26.7796C31.7881 25.6829 30.3875 25.125 28.8047 25.125V26.125C30.1281 26.125 31.2901 26.5827 32.3108 27.5172L32.9861 26.7796ZM28.8047 25.125H13.6172V26.125H28.8047V25.125ZM13.1172 25.625V56H14.1172V25.625H13.1172ZM13.6172 55.5H11.9297V56.5H13.6172V55.5ZM12.4297 56V23.9375H11.4297V56H12.4297ZM11.9297 24.4375H28.8047V23.4375H11.9297V24.4375ZM28.8047 24.4375C30.5947 24.4375 32.1525 25.0654 33.5002 26.3398L34.1873 25.6133C32.66 24.169 30.8585 23.4375 28.8047 23.4375V24.4375ZM33.5043 26.3437C34.8656 27.6025 35.5234 29.0515 35.5234 30.7109H36.5234C36.5234 28.7454 35.7282 27.0381 34.1832 25.6095L33.5043 26.3437ZM35.5234 30.7109C35.5234 32.0509 35.2079 33.1979 34.593 34.1703L35.4382 34.7047C36.1671 33.5521 36.5234 32.2147 36.5234 30.7109H35.5234ZM34.5902 34.1748C33.9825 35.1588 33.1603 35.9166 32.1152 36.4537L32.5723 37.3432C33.7772 36.724 34.7363 35.8412 35.441 34.7002L34.5902 34.1748ZM32.1758 37.3694C33.3241 37.779 34.1493 38.3866 34.6917 39.1742C35.235 39.9631 35.5234 40.975 35.5234 42.2422H36.5234C36.5234 40.8219 36.1986 39.5994 35.5153 38.6071C34.8311 37.6134 33.8166 36.8929 32.5117 36.4275L32.1758 37.3694ZM35.5234 42.2422C35.5234 44.2448 34.8668 45.8071 33.5773 46.982L34.2508 47.7212C35.7738 46.3335 36.5234 44.4896 36.5234 42.2422H35.5234ZM33.5796 46.9799C32.2707 48.1579 30.6889 48.75 28.8047 48.75V49.75C30.9205 49.75 32.7449 49.0765 34.2485 47.7232L33.5796 46.9799ZM28.8047 48.75H17.9297V49.75H28.8047V48.75ZM18.4297 49.25V47.5625H17.4297V49.25H18.4297ZM17.9297 48.0625H28.8047V47.0625H17.9297V48.0625ZM28.8047 48.0625C30.459 48.0625 31.8837 47.5662 33.0464 46.5573L32.3911 45.802C31.4288 46.6369 30.2441 47.0625 28.8047 47.0625V48.0625ZM33.0468 46.557C34.2542 45.5071 34.8359 44.0456 34.8359 42.2422H33.8359C33.8359 43.8138 33.3395 44.9773 32.3907 45.8024L33.0468 46.557ZM34.8359 42.2422C34.8359 40.969 34.3775 39.8412 33.4836 38.8785L32.7508 39.559C33.4818 40.3463 33.8359 41.2341 33.8359 42.2422H34.8359ZM33.4836 38.8785C32.5626 37.8867 31.2359 37.3452 29.5756 37.1973L29.4869 38.1933C30.9829 38.3266 32.0468 38.8008 32.7508 39.559L33.4836 38.8785ZM29.4815 37.1978C29.3514 37.2108 29.13 37.2188 28.8047 37.2188V38.2188C29.1356 38.2188 29.3986 38.2111 29.581 38.1928L29.4815 37.1978ZM28.8047 37.2188H17.9297V38.2188H28.8047V37.2188ZM18.4297 37.7188V36.0312H17.4297V37.7188H18.4297ZM38.8984 30.7109C38.8984 32.7679 38.1754 34.6295 36.702 36.3111L37.4542 36.9701C39.0746 35.1205 39.8984 33.0289 39.8984 30.7109H38.8984ZM36.7065 36.9751C38.1837 38.6165 38.8984 40.3682 38.8984 42.2422H39.8984C39.8984 40.0849 39.0663 38.1023 37.4498 36.3061L36.7065 36.9751ZM38.8984 42.2422C38.8984 44.1067 38.4299 45.7971 37.4953 47.325L38.3484 47.8468C39.3826 46.1561 39.8984 44.284 39.8984 42.2422H38.8984ZM37.4946 47.3262C36.5711 48.8455 35.3385 50.0259 33.7909 50.874L34.2716 51.751C35.974 50.8179 37.3351 49.5138 38.3491 47.8456L37.4946 47.3262ZM33.7924 50.8732C32.2581 51.7074 30.5983 52.125 28.8047 52.125V53.125C30.7611 53.125 32.5856 52.6676 34.2701 51.7518L33.7924 50.8732ZM28.8047 52.125H17.9297V53.125H28.8047V52.125ZM18.4297 52.625V50.9375H17.4297V52.625H18.4297ZM17.9297 51.4375H28.8047V50.4375H17.9297V51.4375ZM28.8047 51.4375C31.3655 51.4375 33.5816 50.5792 35.4262 48.8664L34.7457 48.1336C33.0903 49.6708 31.1189 50.4375 28.8047 50.4375V51.4375ZM35.4279 48.8648C37.2848 47.1239 38.2109 44.9031 38.2109 42.2422H37.2109C37.2109 44.6438 36.3871 46.5948 34.744 48.1352L35.4279 48.8648ZM38.2109 42.2422C38.2109 41.2506 37.9549 40.1827 37.4674 39.0452L36.5482 39.4391C36.9983 40.4892 37.2109 41.4213 37.2109 42.2422H38.2109ZM37.4707 39.0531C36.989 37.8739 36.3033 36.9468 35.3993 36.3035L34.8195 37.1183C35.5405 37.6313 36.1203 38.3917 36.5449 39.4313L37.4707 39.0531ZM35.4512 37.0758C37.2864 35.3564 38.2109 33.2243 38.2109 30.7109H37.2109C37.2109 32.9476 36.4011 34.8155 34.7675 36.3461L35.4512 37.0758ZM38.2109 30.7109C38.2109 28.281 37.2552 26.1756 35.3812 24.4167L34.6969 25.1458C36.3854 26.7307 37.2109 28.5784 37.2109 30.7109H38.2109ZM35.3829 24.4183C33.5122 22.646 31.3122 21.75 28.8047 21.75V22.75C31.0471 22.75 33.0034 23.5415 34.6952 25.1442L35.3829 24.4183ZM28.8047 21.75H10.2422V22.75H28.8047V21.75ZM9.74219 22.25V56H10.7422V22.25H9.74219ZM10.2422 55.5H8.55469V56.5H10.2422V55.5ZM9.05469 56V20.5625H8.05469V56H9.05469ZM8.55469 21.0625H28.8047V20.0625H8.55469V21.0625ZM28.8047 21.0625C30.5771 21.0625 32.2344 21.5008 33.7845 22.3802L34.278 21.5104C32.5781 20.5461 30.751 20.0625 28.8047 20.0625V21.0625ZM33.7895 22.383C35.3554 23.248 36.5962 24.4299 37.5192 25.9335L38.3714 25.4103C37.3569 23.7576 35.9883 22.4551 34.273 21.5076L33.7895 22.383ZM37.5204 25.9353C38.4413 27.4207 38.8984 29.0094 38.8984 30.7109H39.8984C39.8984 28.8187 39.3869 27.0481 38.3703 25.4084L37.5204 25.9353ZM39.4004 37.0338C40.8504 35.0951 41.5859 32.9836 41.5859 30.7109H40.5859C40.5859 32.7508 39.9308 34.6549 38.5996 36.4349L39.4004 37.0338ZM41.5859 30.7109C41.5859 28.5073 40.9968 26.4552 39.8233 24.5644L38.9736 25.0918C40.0501 26.8261 40.5859 28.6959 40.5859 30.7109H41.5859ZM39.8259 24.5687C38.6712 22.666 37.1079 21.1603 35.1434 20.0563L34.6535 20.9281C36.4702 21.949 37.9069 23.334 38.971 25.0875L39.8259 24.5687ZM35.1461 20.0578C33.1802 18.9368 31.0636 18.375 28.8047 18.375V19.375C30.8895 19.375 32.8354 19.8913 34.6508 20.9265L35.1461 20.0578ZM28.8047 18.375H6.86719V19.375H28.8047V18.375ZM6.36719 18.875V56H7.36719V18.875H6.36719ZM6.86719 55.5H5.17969V56.5H6.86719V55.5ZM5.67969 56V17.1875H4.67969V56H5.67969ZM5.17969 17.6875H28.8047V16.6875H5.17969V17.6875ZM28.8047 17.6875C31.1869 17.6875 33.4063 18.2744 35.4714 19.4501L35.9661 18.5811C33.7499 17.3193 31.36 16.6875 28.8047 16.6875V17.6875ZM35.4727 19.4509C37.5552 20.6279 39.2031 22.2238 40.4237 24.243L41.2795 23.7257C39.9688 21.5575 38.1948 19.8408 35.9648 18.5803L35.4727 19.4509ZM40.426 24.2468C41.6593 26.2472 42.2734 28.3988 42.2734 30.7109H43.2734C43.2734 28.2106 42.6063 25.8778 41.2772 23.722L40.426 24.2468ZM42.2734 30.7109C42.2734 32.9068 41.6559 34.8159 40.4277 36.4584L41.2286 37.0572C42.5941 35.231 43.2734 33.1088 43.2734 30.7109H42.2734ZM40.4227 37.0504C41.6702 38.7789 42.2734 40.5068 42.2734 42.2422H43.2734C43.2734 40.2588 42.5798 38.3305 41.2336 36.4652L40.4227 37.0504ZM42.2734 42.2422C42.2734 44.275 41.9096 46.129 41.1889 47.8109L42.108 48.2048C42.8872 46.3866 43.2734 44.3969 43.2734 42.2422H42.2734ZM41.1895 47.8094C40.4652 49.4842 39.4897 50.8687 38.2666 51.9726L38.9365 52.7149C40.2759 51.5063 41.3316 50.0001 42.1074 48.2063L41.1895 47.8094ZM38.2643 51.9746C37.0477 53.0861 35.6261 53.9522 33.9944 54.5716L34.3493 55.5065C36.0927 54.8447 37.6242 53.9139 38.9388 52.7129L38.2643 51.9746ZM33.9944 54.5716C32.3663 55.1897 30.6376 55.5 28.8047 55.5V56.5C30.753 56.5 32.6025 56.1697 34.3493 55.5065L33.9944 54.5716ZM28.8047 55.5H17.9297V56.5H28.8047V55.5ZM18.4297 56V54.3125H17.4297V56H18.4297ZM17.9297 54.8125H28.8047V53.8125H17.9297V54.8125ZM28.8047 54.8125C30.5089 54.8125 32.1432 54.4957 33.7038 53.8617L33.3274 52.9352C31.888 53.52 30.3817 53.8125 28.8047 53.8125V54.8125ZM33.7079 53.86C35.2615 53.2126 36.6191 52.3359 37.7755 51.2283L37.0838 50.5061C36.0215 51.5235 34.7697 52.3342 33.3233 52.9369L33.7079 53.86ZM37.7755 51.2283C38.9356 50.1172 39.8553 48.7803 40.5362 47.2238L39.62 46.823C38.9885 48.2666 38.1425 49.4922 37.0838 50.5061L37.7755 51.2283ZM40.5344 47.2278C41.2359 45.6618 41.5859 43.9982 41.5859 42.2422H40.5859C40.5859 43.8612 40.2641 45.3851 39.6218 46.819L40.5344 47.2278ZM41.5859 42.2422C41.5859 41.4174 41.3894 40.4612 41.0198 39.3845L40.074 39.7092C40.4231 40.7263 40.5859 41.567 40.5859 42.2422H41.5859ZM41.0226 39.393C40.6598 38.2716 40.1133 37.276 39.3817 36.4114L38.6183 37.0573C39.2617 37.8177 39.7464 38.6971 40.0712 39.7008L41.0226 39.393ZM57.8672 41.6562L57.3928 41.8144L57.3939 41.8174L57.8672 41.6562ZM58.875 43.5078L58.4929 43.8302L58.4983 43.8367L58.5039 43.843L58.875 43.5078ZM60.4219 44.7266L60.18 45.1643L60.189 45.169L60.4219 44.7266ZM62.2734 45.4062L62.1662 45.8946L62.1687 45.8952L62.2734 45.4062ZM69.2344 44.3047H69.7344V43.4248L68.9785 43.8751L69.2344 44.3047ZM69.2344 45.9922L69.4671 46.4347L69.7344 46.2942V45.9922H69.2344ZM61.1484 46.7422L60.9971 47.2188L61.0061 47.2215L61.1484 46.7422ZM58.4531 45.3594L58.1482 45.7557L58.1531 45.7594L58.4531 45.3594ZM56.5078 42.8281L56.0492 43.0275L56.0529 43.0355L56.5078 42.8281ZM69.8672 33.5234L69.5288 33.8916L69.5327 33.8951L69.8672 33.5234ZM72.2109 56V56.5H72.7109V56H72.2109ZM70.5234 56H70.0234V56.5H70.5234V56ZM75.5859 56V56.5H76.0859V56H75.5859ZM73.8984 56H73.3984V56.5H73.8984V56ZM71.0625 32.3281L70.7229 32.6951L70.7245 32.6966L71.0625 32.3281ZM54.9375 43.4141L54.4785 43.6124L54.4806 43.6171L54.9375 43.4141ZM60.3984 48.2188L60.2346 48.6912L60.2422 48.6937L60.3984 48.2188ZM69.2344 47.5625H69.7344V46.7429L69.0056 47.1179L69.2344 47.5625ZM69.2344 49.3438L69.4267 49.8053L69.7344 49.6771V49.3438H69.2344ZM59.7188 49.6953L59.5543 50.1675L59.559 50.1691L59.7188 49.6953ZM55.9922 47.6094L55.6699 47.9917L55.6732 47.9944L55.9922 47.6094ZM53.3906 44.0469L52.9323 44.2468L52.9331 44.2486L53.3906 44.0469ZM55.6875 31.2734L56.0398 31.6283L56.0398 31.6283L55.6875 31.2734ZM69.8203 29.3984L69.5905 29.8425L69.5936 29.8441L69.8203 29.3984ZM78.9609 56V56.5H79.4609V56H78.9609ZM77.2734 56H76.7734V56.5H77.2734V56ZM75.4922 32.5156L75.0608 32.7684L75.0631 32.7724L75.4922 32.5156ZM57.1406 28.0625L57.3883 28.4968L57.3909 28.4954L57.1406 28.0625ZM52.4297 32.7031L51.9979 32.451L51.9964 32.4536L52.4297 32.7031ZM51.8438 44.6094L51.3867 44.812L51.3874 44.8137L51.8438 44.6094ZM54.7969 48.6641L54.4707 49.043L54.4731 49.045L54.7969 48.6641ZM59.0391 51.1484L58.863 51.6164L58.8686 51.6185L59.0391 51.1484ZM69.2344 50.9141H69.7344V50.154L69.0364 50.4549L69.2344 50.9141ZM69.2344 52.625L69.4108 53.0928L69.7344 52.9708V52.625H69.2344ZM50.1797 33.5L49.7203 33.3025L49.7189 33.306L50.1797 33.5ZM71.5078 26.4688L71.2735 26.9104L71.2747 26.9111L71.5078 26.4688ZM82.3359 56V56.5H82.8359V56H82.3359ZM80.6484 56H80.1484V56.5H80.6484V56ZM78.4219 30.8281L77.9913 31.0822L77.9919 31.0834L78.4219 30.8281ZM72.3516 25.0156L72.1152 25.4562L72.1172 25.4573L72.3516 25.0156ZM49.5234 31.0391L49.9555 31.2907L49.9562 31.2895L49.5234 31.0391ZM48.2812 44.6094L47.8095 44.7751L47.8114 44.7802L48.2812 44.6094ZM50.8125 49.1328L50.4257 49.4497L50.4299 49.4547L50.8125 49.1328ZM54.5156 52.4609L54.2361 52.8755L54.2383 52.877L54.5156 52.4609ZM59.0391 54.5938L58.895 55.0725L58.8962 55.0729L59.0391 54.5938ZM69.2344 54.2656H69.7344V53.5102L69.039 53.8054L69.2344 54.2656ZM69.2344 56L69.4001 56.4717L69.7344 56.3543V56H69.2344ZM58.3594 56.1172L58.2165 56.5963L58.2176 56.5967L58.3594 56.1172ZM53.3906 53.7734L53.1064 54.1849L53.1113 54.1882L53.3906 53.7734ZM49.3828 50.0938L48.9949 50.4092L48.9966 50.4113L49.3828 50.0938ZM47.0391 32.2109L46.5778 32.0179L46.5771 32.0196L47.0391 32.2109ZM50.7891 26.5156L50.4338 26.1638L50.4338 26.1638L50.7891 26.5156ZM56.5547 22.6953L56.747 23.1569L56.7486 23.1562L56.5547 22.6953ZM76.9922 26.2344L76.6491 26.5981L76.6509 26.5998L76.9922 26.2344ZM71.0234 39.3594C71.0234 37.2494 70.3675 35.5531 69.0035 34.3445L68.3403 35.093C69.445 36.0719 70.0234 37.4694 70.0234 39.3594H71.0234ZM69.0035 34.3445C67.6577 33.1521 65.8974 32.5781 63.7734 32.5781V33.5781C65.712 33.5781 67.2173 34.0979 68.3403 35.093L69.0035 34.3445ZM63.7734 32.5781C61.9259 32.5781 60.325 33.2216 58.9971 34.499L59.6904 35.2197C60.8313 34.1222 62.1834 33.5781 63.7734 33.5781V32.5781ZM58.9971 34.499C57.6685 35.7772 56.9922 37.3276 56.9922 39.125H57.9922C57.9922 37.6099 58.5502 36.3166 59.6904 35.2197L58.9971 34.499ZM56.9922 39.125C56.9922 40.1065 57.123 41.0049 57.3928 41.8144L58.3415 41.4981C58.1113 40.8076 57.9922 40.0185 57.9922 39.125H56.9922ZM57.3939 41.8174C57.6603 42.6 58.0241 43.2747 58.4929 43.8302L59.2571 43.1854C58.8822 42.741 58.5741 42.1812 58.3405 41.4951L57.3939 41.8174ZM58.5039 43.843C58.98 44.37 59.5398 44.8104 60.18 45.1642L60.6637 44.2889C60.1165 43.9865 59.645 43.6144 59.2461 43.1727L58.5039 43.843ZM60.189 45.169C60.8215 45.5019 61.4808 45.7442 62.1662 45.8946L62.3806 44.9179C61.7848 44.7871 61.2098 44.5762 60.6547 44.2841L60.189 45.169ZM62.1687 45.8952C62.8624 46.0438 63.5931 46.1172 64.3594 46.1172V45.1172C63.6569 45.1172 62.997 45.0499 62.3782 44.9173L62.1687 45.8952ZM64.3594 46.1172C66.2268 46.1172 67.9408 45.6573 69.4903 44.7342L68.9785 43.8751C67.5904 44.7021 66.0545 45.1172 64.3594 45.1172V46.1172ZM68.7344 44.3047V45.9922H69.7344V44.3047H68.7344ZM69.0017 45.5496C67.5594 46.3079 66.0067 46.6875 64.3359 46.6875V47.6875C66.1652 47.6875 67.8781 47.2702 69.4671 46.4347L69.0017 45.5496ZM64.3359 46.6875C63.2554 46.6875 62.2411 46.545 61.2907 46.2629L61.0061 47.2215C62.0558 47.5331 63.1665 47.6875 64.3359 47.6875V46.6875ZM61.2997 46.2656C60.3679 45.9698 59.5199 45.5345 58.7531 44.9594L58.1531 45.7594C59.0113 46.403 59.9602 46.8896 60.9971 47.2187L61.2997 46.2656ZM58.758 44.9631C58.0187 44.3944 57.418 43.6194 56.9628 42.6207L56.0529 43.0355C56.5663 44.1619 57.2625 45.0744 58.1483 45.7557L58.758 44.9631ZM56.9663 42.6288C56.5295 41.624 56.3047 40.4594 56.3047 39.125H55.3047C55.3047 40.5719 55.5486 41.876 56.0493 43.0275L56.9663 42.6288ZM56.3047 39.125C56.3047 37.1006 57.0103 35.4009 58.4298 33.996L57.7264 33.2852C56.1147 34.8803 55.3047 36.8369 55.3047 39.125H56.3047ZM58.4298 33.996C59.8449 32.5956 61.6137 31.8906 63.7734 31.8906V30.8906C61.3706 30.8906 59.3426 31.6857 57.7264 33.2852L58.4298 33.996ZM63.7734 31.8906C66.1899 31.8906 68.0918 32.5707 69.5288 33.8916L70.2056 33.1553C68.5488 31.6325 66.3882 30.8906 63.7734 30.8906V31.8906ZM69.5327 33.8951C70.9706 35.1892 71.7109 36.9885 71.7109 39.3594H72.7109C72.7109 36.7615 71.8888 34.6702 70.2017 33.1518L69.5327 33.8951ZM71.7109 39.3594V56H72.7109V39.3594H71.7109ZM72.2109 55.5H70.5234V56.5H72.2109V55.5ZM71.0234 56V39.3594H70.0234V56H71.0234ZM75.0859 39.3594V56H76.0859V39.3594H75.0859ZM75.5859 55.5H73.8984V56.5H75.5859V55.5ZM74.3984 56V39.3594H73.3984V56H74.3984ZM74.3984 39.3594C74.3984 36.2894 73.4103 33.8034 71.4005 31.9597L70.7245 32.6966C72.4959 34.3216 73.3984 36.5231 73.3984 39.3594H74.3984ZM71.4021 31.9612C69.405 30.1126 66.8476 29.2031 63.7734 29.2031V30.2031C66.6368 30.2031 68.9387 31.0436 70.7229 32.6951L71.4021 31.9612ZM63.7734 29.2031C60.9002 29.2031 58.4741 30.1634 56.5308 32.0903L57.2349 32.8004C58.979 31.071 61.1467 30.2031 63.7734 30.2031V29.2031ZM56.5308 32.0903C54.5912 34.0134 53.6172 36.3683 53.6172 39.125H54.6172C54.6172 36.6317 55.487 34.5334 57.2349 32.8004L56.5308 32.0903ZM53.6172 39.125C53.6172 40.7764 53.9007 42.2751 54.4785 43.6124L55.3965 43.2157C54.8806 42.0217 54.6172 40.6611 54.6172 39.125H53.6172ZM54.4806 43.6171C55.0711 44.9458 55.8747 46.0289 56.8978 46.8507L57.5241 46.0711C56.6409 45.3617 55.9289 44.4136 55.3944 43.211L54.4806 43.6171ZM56.8978 46.8507C57.8963 47.6528 59.0094 48.2664 60.2347 48.6912L60.5622 47.7463C59.4437 47.3586 58.4319 46.8003 57.5241 46.0711L56.8978 46.8507ZM60.2422 48.6937C61.4826 49.1017 62.7855 49.3047 64.1484 49.3047V48.3047C62.8863 48.3047 61.6893 48.117 60.5547 47.7438L60.2422 48.6937ZM64.1484 49.3047C66.0106 49.3047 67.7842 48.871 69.4631 48.0071L69.0056 47.1179C67.4658 47.9102 65.8488 48.3047 64.1484 48.3047V49.3047ZM68.7344 47.5625V49.3438H69.7344V47.5625H68.7344ZM69.0421 48.8822C67.4116 49.5616 65.7736 49.8984 64.125 49.8984V50.8984C65.9139 50.8984 67.6822 50.5322 69.4267 49.8053L69.0421 48.8822ZM64.125 49.8984C62.6295 49.8984 61.2149 49.672 59.8785 49.2215L59.559 50.1691C61.0039 50.6561 62.5268 50.8984 64.125 50.8984V49.8984ZM59.8832 49.2231C58.5468 48.7576 57.3574 48.0912 56.3112 47.2244L55.6732 47.9944C56.8145 48.9401 58.1095 49.6642 59.5543 50.1675L59.8832 49.2231ZM56.3144 47.2271C55.2851 46.3595 54.4614 45.2363 53.8481 43.8452L52.9331 44.2486C53.6011 45.7637 54.5118 47.0155 55.67 47.9917L56.3144 47.2271ZM53.8489 43.847C53.2392 42.449 52.9297 40.8777 52.9297 39.125H51.9297C51.9297 40.9973 52.2608 42.7072 52.9323 44.2468L53.8489 43.847ZM52.9297 39.125C52.9297 36.1792 53.9631 33.69 56.0398 31.6283L55.3352 30.9186C53.0682 33.1693 51.9297 35.9145 51.9297 39.125H52.9297ZM56.0398 31.6283C58.1296 29.5534 60.6965 28.5156 63.7734 28.5156V27.5156C60.4441 27.5156 57.6204 28.6498 55.3352 30.9186L56.0398 31.6283ZM63.7734 28.5156C65.9533 28.5156 67.8885 28.9617 69.5905 29.8425L70.0501 28.9544C68.1896 27.9915 66.0936 27.5156 63.7734 27.5156V28.5156ZM69.5936 29.8441C71.2897 30.707 72.6259 31.961 73.6093 33.6149L74.4688 33.1038C73.3897 31.289 71.9134 29.9024 70.047 28.9528L69.5936 29.8441ZM73.6093 33.6149C74.5891 35.2628 75.0859 37.1724 75.0859 39.3594H76.0859C76.0859 37.0151 75.5515 34.9247 74.4688 33.1038L73.6093 33.6149ZM78.4609 39.3594V56H79.4609V39.3594H78.4609ZM78.9609 55.5H77.2734V56.5H78.9609V55.5ZM77.7734 56V39.3594H76.7734V56H77.7734ZM77.7734 39.3594C77.7734 36.7015 77.1603 34.3295 75.9212 32.2589L75.0631 32.7724C76.199 34.6705 76.7734 36.861 76.7734 39.3594H77.7734ZM75.9236 32.2629C74.7043 30.1819 73.0254 28.5843 70.8944 27.4781L70.4337 28.3656C72.3964 29.3845 73.9363 30.8493 75.0608 32.7684L75.9236 32.2629ZM70.8944 27.4781C68.7692 26.3749 66.392 25.8281 63.7734 25.8281V26.8281C66.2486 26.8281 68.4652 27.3438 70.4337 28.3656L70.8944 27.4781ZM63.7734 25.8281C61.2681 25.8281 58.9702 26.4272 56.8904 27.6296L57.3909 28.4954C59.3111 27.3853 61.435 26.8281 63.7734 26.8281V25.8281ZM56.8929 27.6282C54.8181 28.8115 53.1837 30.4206 51.9979 32.451L52.8615 32.9553C53.957 31.0794 55.4632 29.5948 57.3883 28.4968L56.8929 27.6282ZM51.9964 32.4536C50.8262 34.4852 50.2422 36.7123 50.2422 39.125H51.2422C51.2422 36.8815 51.7832 34.8273 52.863 32.9527L51.9964 32.4536ZM50.2422 39.125C50.2422 41.1875 50.6212 43.0856 51.3867 44.812L52.3008 44.4067C51.5976 42.8206 51.2422 41.0625 51.2422 39.125H50.2422ZM51.3874 44.8137C52.1486 46.5143 53.1751 47.9278 54.4707 49.043L55.1231 48.2851C53.9499 47.2753 53.0076 45.9857 52.3001 44.4051L51.3874 44.8137ZM54.4731 49.045C55.7681 50.1459 57.2325 51.003 58.863 51.6164L59.2151 50.6805C57.6894 50.1064 56.3256 49.3073 55.1207 48.2831L54.4731 49.045ZM58.8686 51.6185C60.5182 52.2169 62.2401 52.5156 64.0312 52.5156V51.5156C62.3537 51.5156 60.7474 51.2362 59.2096 50.6784L58.8686 51.6185ZM64.0312 52.5156C65.8684 52.5156 67.6699 52.1331 69.4324 51.3732L69.0364 50.4549C67.3926 51.1637 65.7254 51.5156 64.0312 51.5156V52.5156ZM68.7344 50.9141V52.625H69.7344V50.9141H68.7344ZM69.0579 52.1572C67.327 52.8101 65.6367 53.1328 63.9844 53.1328V54.1328C65.7696 54.1328 67.5792 53.7837 69.4108 53.0928L69.0579 52.1572ZM63.9844 53.1328C61.3596 53.1328 58.9572 52.5419 56.7681 51.3644L56.2944 52.245C58.6365 53.5049 61.2029 54.1328 63.9844 54.1328V53.1328ZM56.7681 51.3644C54.5822 50.1885 52.8348 48.5248 51.5209 46.3651L50.6666 46.8849C52.0714 49.1939 53.949 50.9834 56.2944 52.245L56.7681 51.3644ZM51.5209 46.3651C50.212 44.2136 49.5547 41.8045 49.5547 39.125H48.5547C48.5547 41.9768 49.2568 44.5676 50.6666 46.8849L51.5209 46.3651ZM49.5547 39.125C49.5547 37.2216 49.9169 35.4127 50.6405 33.694L49.7189 33.306C48.9425 35.1498 48.5547 37.0909 48.5547 39.125H49.5547ZM50.6391 33.6974C51.3811 31.9711 52.3775 30.4859 53.627 29.2364L52.9199 28.5293C51.5756 29.8735 50.5095 31.4664 49.7203 33.3026L50.6391 33.6974ZM53.627 29.2364C54.8743 27.9891 56.381 26.9928 58.1541 26.2503L57.7678 25.3279C55.8846 26.1166 54.2663 27.1828 52.9199 28.5293L53.627 29.2364ZM58.1541 26.2503C59.919 25.5111 61.7907 25.1406 63.7734 25.1406V24.1406C61.6625 24.1406 59.6591 24.5358 57.7678 25.3279L58.1541 26.2503ZM63.7734 25.1406C66.5594 25.1406 69.0558 25.7337 71.2735 26.9104L71.7422 26.0271C69.3661 24.7663 66.7062 24.1406 63.7734 24.1406V25.1406ZM71.2747 26.9111C73.5052 28.0866 75.2572 29.7559 76.5382 31.926L77.3993 31.4177C76.0241 29.0879 74.1354 27.2884 71.7409 26.0264L71.2747 26.9111ZM76.5382 31.926C77.8164 34.0915 78.4609 36.5645 78.4609 39.3594H79.4609C79.4609 36.4042 78.7774 33.7523 77.3993 31.4177L76.5382 31.926ZM81.8359 39.3594V56H82.8359V39.3594H81.8359ZM82.3359 55.5H80.6484V56.5H82.3359V55.5ZM81.1484 56V39.3594H80.1484V56H81.1484ZM81.1484 39.3594C81.1484 36.091 80.3864 33.1574 78.8518 30.5729L77.9919 31.0834C79.4261 33.4988 80.1484 36.2528 80.1484 39.3594H81.1484ZM78.8525 30.574C77.3216 27.9795 75.2305 25.9768 72.5859 24.5739L72.1172 25.4573C74.5977 26.7732 76.5534 28.6455 77.9913 31.0822L78.8525 30.574ZM72.588 24.575C69.9467 23.1578 67.005 22.4531 63.7734 22.4531V23.4531C66.8544 23.4531 69.6314 24.1235 72.1152 25.4562L72.588 24.575ZM63.7734 22.4531C60.6584 22.4531 57.7981 23.2009 55.203 24.6999L55.7032 25.5658C58.1394 24.1585 60.826 23.4531 63.7734 23.4531V22.4531ZM55.203 24.6999C52.6125 26.1962 50.5726 28.2275 49.0907 30.7887L49.9562 31.2895C51.3493 28.8819 53.2625 26.9757 55.7032 25.5658L55.203 24.6999ZM49.0913 30.7875C47.6083 33.3344 46.8672 36.1168 46.8672 39.125H47.8672C47.8672 36.2894 48.5636 33.6812 49.9555 31.2907L49.0913 30.7875ZM46.8672 39.125C46.8672 41.1007 47.1806 42.9851 47.8095 44.7751L48.753 44.4436C48.1632 42.7649 47.8672 40.993 47.8672 39.125H46.8672ZM47.8114 44.7802C48.4539 46.5472 49.3244 48.1054 50.4257 49.4497L51.1993 48.8159C50.1756 47.5665 49.3586 46.1091 48.7511 44.4385L47.8114 44.7802ZM50.4299 49.4547C51.538 50.7716 52.8071 51.9121 54.2361 52.8755L54.7951 52.0464C53.4429 51.1348 52.2433 50.0566 51.1951 48.8109L50.4299 49.4547ZM54.2383 52.877C55.6854 53.8417 57.238 54.5739 58.895 55.0725L59.1832 54.115C57.6213 53.6449 56.1584 52.9552 54.793 52.0449L54.2383 52.877ZM58.8962 55.0729C60.5669 55.5709 62.2634 55.8203 63.9844 55.8203V54.8203C62.3616 54.8203 60.7612 54.5853 59.1819 54.1146L58.8962 55.0729ZM63.9844 55.8203C65.8936 55.8203 67.7102 55.4559 69.4298 54.7259L69.039 53.8054C67.4461 54.4816 65.7627 54.8203 63.9844 54.8203V55.8203ZM68.7344 54.2656V56H69.7344V54.2656H68.7344ZM69.0686 55.5283C67.3872 56.119 65.662 56.4141 63.8906 56.4141V57.4141C65.7755 57.4141 67.6128 57.0997 69.4001 56.4717L69.0686 55.5283ZM63.8906 56.4141C62.0476 56.4141 60.2516 56.1552 58.5011 55.6377L58.2176 56.5967C60.0609 57.1417 61.9524 57.4141 63.8906 57.4141V56.4141ZM58.5023 55.638C56.7692 55.1211 55.1589 54.3615 53.6699 53.3587L53.1113 54.1882C54.6848 55.2479 56.3871 56.0507 58.2165 56.5963L58.5023 55.638ZM53.6748 53.362C52.1969 52.3412 50.8954 51.1462 49.769 49.7762L48.9966 50.4113C50.1827 51.8538 51.5531 53.1119 53.1065 54.1848L53.6748 53.362ZM49.7707 49.7783C48.6545 48.4056 47.7753 46.8017 47.1364 44.961L46.1917 45.289C46.8653 47.2296 47.7986 48.9382 48.9949 50.4092L49.7707 49.7783ZM47.1364 44.961C46.4994 43.1259 46.1797 41.1816 46.1797 39.125H45.1797C45.1797 41.2872 45.5162 43.3428 46.1917 45.289L47.1364 44.961ZM46.1797 39.125C46.1797 36.7673 46.6205 34.5277 47.501 32.4023L46.5771 32.0196C45.6452 34.2691 45.1797 36.639 45.1797 39.125H46.1797ZM47.5003 32.4039C48.399 30.2562 49.6143 28.4126 51.1444 26.8674L50.4338 26.1638C48.8076 27.8061 47.5229 29.7594 46.5778 32.0179L47.5003 32.4039ZM51.1443 26.8674C52.6881 25.3085 54.5533 24.0709 56.747 23.1569L56.3624 22.2338C54.056 23.1948 52.0775 24.504 50.4338 26.1638L51.1443 26.8674ZM56.7486 23.1562C58.9506 22.2298 61.2907 21.7656 63.7734 21.7656V20.7656C61.1624 20.7656 58.69 21.2546 56.3608 22.2344L56.7486 23.1562ZM63.7734 21.7656C66.3098 21.7656 68.6862 22.1932 70.9069 23.045L71.265 22.1113C68.9231 21.213 66.4246 20.7656 63.7734 20.7656V21.7656ZM70.9069 23.045C73.1317 23.8983 75.0436 25.0835 76.6491 26.5981L77.3353 25.8707C75.6282 24.2602 73.6026 23.0079 71.265 22.1113L70.9069 23.045ZM76.6509 26.5998C78.2514 28.0946 79.5172 29.9408 80.4454 32.147L81.3671 31.7592C80.389 29.4342 79.0455 27.4679 77.3335 25.869L76.6509 26.5998ZM80.4454 32.147C81.3705 34.3461 81.8359 36.7482 81.8359 39.3594H82.8359C82.8359 36.6268 82.3482 34.0914 81.3671 31.7592L80.4454 32.147ZM86.5078 22.25V21.75H86.0078V22.25H86.5078ZM88.1953 22.25H88.6953V21.75H88.1953V22.25ZM88.1953 56V56.5H88.6953V56H88.1953ZM86.5078 56H86.0078V56.5H86.5078V56ZM96.6328 22.25V21.75H96.1328V22.25H96.6328ZM98.3203 22.25H98.8203V21.75H98.3203V22.25ZM98.3203 47.5625H97.8203V48.0625H98.3203V47.5625ZM110.766 47.5625H111.266V47.0625H110.766V47.5625ZM110.766 49.25V49.75H111.266V49.25H110.766ZM98.3203 49.25V48.75H97.8203V49.25H98.3203ZM98.3203 50.9375H97.8203V51.4375H98.3203V50.9375ZM110.766 50.9375H111.266V50.4375H110.766V50.9375ZM110.766 52.625V53.125H111.266V52.625H110.766ZM98.3203 52.625V52.125H97.8203V52.625H98.3203ZM98.3203 54.3125H97.8203V54.8125H98.3203V54.3125ZM110.766 54.3125H111.266V53.8125H110.766V54.3125ZM110.766 56V56.5H111.266V56H110.766ZM96.6328 56H96.1328V56.5H96.6328V56ZM93.2578 22.25V21.75H92.7578V22.25H93.2578ZM94.9453 22.25H95.4453V21.75H94.9453V22.25ZM94.9453 56V56.5H95.4453V56H94.9453ZM93.2578 56H92.7578V56.5H93.2578V56ZM89.8828 22.25V21.75H89.3828V22.25H89.8828ZM91.5703 22.25H92.0703V21.75H91.5703V22.25ZM91.5703 56V56.5H92.0703V56H91.5703ZM89.8828 56H89.3828V56.5H89.8828V56ZM86.5078 22.75H88.1953V21.75H86.5078V22.75ZM87.6953 22.25V56H88.6953V22.25H87.6953ZM88.1953 55.5H86.5078V56.5H88.1953V55.5ZM87.0078 56V22.25H86.0078V56H87.0078ZM96.6328 22.75H98.3203V21.75H96.6328V22.75ZM97.8203 22.25V47.5625H98.8203V22.25H97.8203ZM98.3203 48.0625H110.766V47.0625H98.3203V48.0625ZM110.266 47.5625V49.25H111.266V47.5625H110.266ZM110.766 48.75H98.3203V49.75H110.766V48.75ZM97.8203 49.25V50.9375H98.8203V49.25H97.8203ZM98.3203 51.4375H110.766V50.4375H98.3203V51.4375ZM110.266 50.9375V52.625H111.266V50.9375H110.266ZM110.766 52.125H98.3203V53.125H110.766V52.125ZM97.8203 52.625V54.3125H98.8203V52.625H97.8203ZM98.3203 54.8125H110.766V53.8125H98.3203V54.8125ZM110.266 54.3125V56H111.266V54.3125H110.266ZM110.766 55.5H96.6328V56.5H110.766V55.5ZM97.1328 56V22.25H96.1328V56H97.1328ZM93.2578 22.75H94.9453V21.75H93.2578V22.75ZM94.4453 22.25V56H95.4453V22.25H94.4453ZM94.9453 55.5H93.2578V56.5H94.9453V55.5ZM93.7578 56V22.25H92.7578V56H93.7578ZM89.8828 22.75H91.5703V21.75H89.8828V22.75ZM91.0703 22.25V56H92.0703V22.25H91.0703ZM91.5703 55.5H89.8828V56.5H91.5703V55.5ZM90.3828 56V22.25H89.3828V56H90.3828ZM114.492 22.25V21.75H113.992V22.25H114.492ZM116.18 22.25H116.68V21.75H116.18V22.25ZM116.18 56V56.5H116.68V56H116.18ZM114.492 56H113.992V56.5H114.492V56ZM124.617 22.25V21.75H124.117V22.25H124.617ZM126.305 22.25H126.805V21.75H126.305V22.25ZM126.305 47.5625H125.805V48.0625H126.305V47.5625ZM138.75 47.5625H139.25V47.0625H138.75V47.5625ZM138.75 49.25V49.75H139.25V49.25H138.75ZM126.305 49.25V48.75H125.805V49.25H126.305ZM126.305 50.9375H125.805V51.4375H126.305V50.9375ZM138.75 50.9375H139.25V50.4375H138.75V50.9375ZM138.75 52.625V53.125H139.25V52.625H138.75ZM126.305 52.625V52.125H125.805V52.625H126.305ZM126.305 54.3125H125.805V54.8125H126.305V54.3125ZM138.75 54.3125H139.25V53.8125H138.75V54.3125ZM138.75 56V56.5H139.25V56H138.75ZM124.617 56H124.117V56.5H124.617V56ZM121.242 22.25V21.75H120.742V22.25H121.242ZM122.93 22.25H123.43V21.75H122.93V22.25ZM122.93 56V56.5H123.43V56H122.93ZM121.242 56H120.742V56.5H121.242V56ZM117.867 22.25V21.75H117.367V22.25H117.867ZM119.555 22.25H120.055V21.75H119.555V22.25ZM119.555 56V56.5H120.055V56H119.555ZM117.867 56H117.367V56.5H117.867V56ZM114.492 22.75H116.18V21.75H114.492V22.75ZM115.68 22.25V56H116.68V22.25H115.68ZM116.18 55.5H114.492V56.5H116.18V55.5ZM114.992 56V22.25H113.992V56H114.992ZM124.617 22.75H126.305V21.75H124.617V22.75ZM125.805 22.25V47.5625H126.805V22.25H125.805ZM126.305 48.0625H138.75V47.0625H126.305V48.0625ZM138.25 47.5625V49.25H139.25V47.5625H138.25ZM138.75 48.75H126.305V49.75H138.75V48.75ZM125.805 49.25V50.9375H126.805V49.25H125.805ZM126.305 51.4375H138.75V50.4375H126.305V51.4375ZM138.25 50.9375V52.625H139.25V50.9375H138.25ZM138.75 52.125H126.305V53.125H138.75V52.125ZM125.805 52.625V54.3125H126.805V52.625H125.805ZM126.305 54.8125H138.75V53.8125H126.305V54.8125ZM138.25 54.3125V56H139.25V54.3125H138.25ZM138.75 55.5H124.617V56.5H138.75V55.5ZM125.117 56V22.25H124.117V56H125.117ZM121.242 22.75H122.93V21.75H121.242V22.75ZM122.43 22.25V56H123.43V22.25H122.43ZM122.93 55.5H121.242V56.5H122.93V55.5ZM121.742 56V22.25H120.742V56H121.742ZM117.867 22.75H119.555V21.75H117.867V22.75ZM119.055 22.25V56H120.055V22.25H119.055ZM119.555 55.5H117.867V56.5H119.555V55.5ZM118.367 56V22.25H117.367V56H118.367ZM152.602 22.25V21.75H152.102V22.25H152.602ZM154.289 22.25H154.789V21.75H154.289V22.25ZM154.289 56V56.5H154.789V56H154.289ZM152.602 56H152.102V56.5H152.602V56ZM149.227 22.25V21.75H148.727V22.25H149.227ZM150.914 22.25H151.414V21.75H150.914V22.25ZM150.914 56V56.5H151.414V56H150.914ZM149.227 56H148.727V56.5H149.227V56ZM145.852 22.25V21.75H145.352V22.25H145.852ZM147.539 22.25H148.039V21.75H147.539V22.25ZM147.539 56V56.5H148.039V56H147.539ZM145.852 56H145.352V56.5H145.852V56ZM142.477 22.25V21.75H141.977V22.25H142.477ZM144.164 22.25H144.664V21.75H144.164V22.25ZM144.164 56V56.5H144.664V56H144.164ZM142.477 56H141.977V56.5H142.477V56ZM152.602 22.75H154.289V21.75H152.602V22.75ZM153.789 22.25V56H154.789V22.25H153.789ZM154.289 55.5H152.602V56.5H154.289V55.5ZM153.102 56V22.25H152.102V56H153.102ZM149.227 22.75H150.914V21.75H149.227V22.75ZM150.414 22.25V56H151.414V22.25H150.414ZM150.914 55.5H149.227V56.5H150.914V55.5ZM149.727 56V22.25H148.727V56H149.727ZM145.852 22.75H147.539V21.75H145.852V22.75ZM147.039 22.25V56H148.039V22.25H147.039ZM147.539 55.5H145.852V56.5H147.539V55.5ZM146.352 56V22.25H145.352V56H146.352ZM142.477 22.75H144.164V21.75H142.477V22.75ZM143.664 22.25V56H144.664V22.25H143.664ZM144.164 55.5H142.477V56.5H144.164V55.5ZM142.977 56V22.25H141.977V56H142.977ZM163.969 37.9531L163.666 38.351L163.668 38.3527L163.969 37.9531ZM164.438 26.9609L164.724 27.3707L164.726 27.3693L164.438 26.9609ZM180.164 25.7891L180.012 26.2654L180.013 26.2658L180.164 25.7891ZM185.578 28.6953L185.891 29.0851L186.358 28.7104L185.906 28.318L185.578 28.6953ZM184.266 29.75L183.949 30.1372L184.263 30.3935L184.579 30.1398L184.266 29.75ZM165.609 28.1328L165.339 27.7122L165.337 27.7133L165.609 28.1328ZM165.141 36.7578L164.85 37.1647L164.855 37.1683L165.141 36.7578ZM181.758 40.2734L181.455 40.6713L181.459 40.6743L181.758 40.2734ZM181.312 51.2656L181.599 51.6754L181.601 51.6739L181.312 51.2656ZM165.562 52.4141L165.411 52.8904L165.412 52.8908L165.562 52.4141ZM160.172 49.5078L159.859 49.1181L159.392 49.4927L159.844 49.8851L160.172 49.5078ZM161.484 48.4531L161.801 48.0659L161.487 47.8096L161.171 48.0634L161.484 48.4531ZM180.117 50.0703L179.847 49.6497L179.845 49.6508L180.117 50.0703ZM180.586 41.4688L180.295 41.8756L180.3 41.8792L180.586 41.4688ZM161.484 40.3672L161.163 40.7498L161.164 40.7511L161.484 40.3672ZM161.836 24.6641L162.148 25.0545L162.149 25.054L161.836 24.6641ZM166.664 22.2734L166.806 22.7529L166.809 22.7518L166.664 22.2734ZM181.641 22.7656L181.475 23.2374L181.477 23.2383L181.641 22.7656ZM188.203 26.5859L188.501 26.9876L188.982 26.6309L188.545 26.2212L188.203 26.5859ZM186.844 27.5938L186.509 27.965L186.813 28.2391L187.142 27.9954L186.844 27.5938ZM180.867 24.2656L180.709 24.74L180.712 24.7409L180.867 24.2656ZM178.922 48.875L179.168 49.31L179.17 49.3091L178.922 48.875ZM167.039 49.3906L166.906 49.8727L166.911 49.8738L167.039 49.3906ZM162.82 47.3984L162.515 47.0026L162.003 47.3977L162.514 47.7937L162.82 47.3984ZM164.156 46.3672L164.44 45.9553L164.139 45.7486L163.851 45.9714L164.156 46.3672ZM166.805 29.3516L167.051 29.7866L167.053 29.7857L166.805 29.3516ZM178.711 28.8359L178.578 29.318L178.582 29.3191L178.711 28.8359ZM182.93 30.8047L183.235 31.2005L183.747 30.8054L183.236 30.4094L182.93 30.8047ZM181.594 31.8359L181.31 32.2479L181.611 32.4545L181.899 32.2317L181.594 31.8359ZM184.266 37.8594L183.944 38.242L183.945 38.2433L184.266 37.8594ZM183.891 53.5625L184.203 53.9529L184.204 53.9525L183.891 53.5625ZM179.086 55.9531L179.228 56.4326L179.231 56.4315L179.086 55.9531ZM164.109 55.4609L163.944 55.9327L163.946 55.9336L164.109 55.4609ZM157.547 51.6172L157.249 51.2155L156.768 51.5723L157.205 51.982L157.547 51.6172ZM158.906 50.6094L159.241 50.2381L158.937 49.964L158.608 50.2077L158.906 50.6094ZM164.883 53.9609L164.725 54.4353L164.727 54.4362L164.883 53.9609ZM174.211 39.4453H171.539V40.4453H174.211V39.4453ZM171.539 39.4453C168.316 39.4453 165.915 38.7917 164.269 37.5535L163.668 38.3527C165.553 39.7708 168.2 40.4453 171.539 40.4453V39.4453ZM164.272 37.5552C162.638 36.3121 161.82 34.6197 161.82 32.4219H160.82C160.82 34.9115 161.768 36.9067 163.666 38.351L164.272 37.5552ZM161.82 32.4219C161.82 30.4254 162.755 28.7474 164.724 27.3707L164.151 26.5512C161.964 28.0807 160.82 30.0433 160.82 32.4219H161.82ZM164.726 27.3693C166.688 25.9823 169.487 25.2578 173.18 25.2578V24.2578C169.373 24.2578 166.343 25.002 164.149 26.5526L164.726 27.3693ZM173.18 25.2578C175.633 25.2578 177.91 25.5951 180.012 26.2654L180.316 25.3127C178.106 24.608 175.726 24.2578 173.18 24.2578V25.2578ZM180.013 26.2658C182.132 26.9366 183.873 27.8754 185.25 29.0726L185.906 28.318C184.408 27.0152 182.54 26.0166 180.315 25.3124L180.013 26.2658ZM185.265 28.3056L183.952 29.3602L184.579 30.1398L185.891 29.0851L185.265 28.3056ZM184.582 29.3628C183.24 28.266 181.568 27.4263 179.58 26.8334L179.295 27.7916C181.182 28.3549 182.729 29.1403 183.949 30.1372L184.582 29.3628ZM179.58 26.8334C177.592 26.24 175.457 25.9453 173.18 25.9453V26.9453C175.371 26.9453 177.408 27.2287 179.295 27.7916L179.58 26.8334ZM173.18 25.9453C169.838 25.9453 167.204 26.5133 165.339 27.7122L165.88 28.5534C167.515 27.5023 169.927 26.9453 173.18 26.9453V25.9453ZM165.337 27.7133C163.492 28.91 162.508 30.4836 162.508 32.4219H163.508C163.508 30.8914 164.258 29.6056 165.881 28.5523L165.337 27.7133ZM162.508 32.4219C162.508 34.4445 163.286 36.0474 164.85 37.1647L165.431 36.3509C164.152 35.4369 163.508 34.1492 163.508 32.4219H162.508ZM164.855 37.1683C166.414 38.2525 168.666 38.7578 171.539 38.7578V37.7578C168.756 37.7578 166.743 37.2632 165.426 36.3474L164.855 37.1683ZM171.539 38.7578H174.117V37.7578H171.539V38.7578ZM174.117 38.7578C177.388 38.7578 179.81 39.4196 181.455 40.6713L182.061 39.8756C180.174 38.4398 177.503 37.7578 174.117 37.7578V38.7578ZM181.459 40.6743C183.106 41.9021 183.93 43.5853 183.93 45.7812H184.93C184.93 43.2897 183.972 41.301 182.057 39.8726L181.459 40.6743ZM183.93 45.7812C183.93 47.7782 182.994 49.4647 181.024 50.8573L181.601 51.6739C183.787 50.129 184.93 48.1593 184.93 45.7812H183.93ZM181.026 50.8559C179.063 52.2283 176.264 52.9453 172.57 52.9453V53.9453C176.376 53.9453 179.405 53.2092 181.599 51.6754L181.026 50.8559ZM172.57 52.9453C170.117 52.9453 167.832 52.608 165.713 51.9374L165.412 52.8908C167.637 53.5951 170.024 53.9453 172.57 53.9453V52.9453ZM165.714 51.9377C163.611 51.267 161.877 50.3283 160.5 49.1305L159.844 49.8851C161.341 51.1874 163.201 52.1861 165.411 52.8904L165.714 51.9377ZM160.485 49.8976L161.798 48.8429L161.171 48.0634L159.859 49.1181L160.485 49.8976ZM161.168 48.8403C162.51 49.9372 164.182 50.7768 166.17 51.3698L166.455 50.4115C164.568 49.8482 163.021 49.0628 161.801 48.0659L161.168 48.8403ZM166.17 51.3698C168.158 51.9631 170.293 52.2578 172.57 52.2578V51.2578C170.379 51.2578 168.342 50.9744 166.455 50.4115L166.17 51.3698ZM172.57 52.2578C175.911 52.2578 178.539 51.69 180.389 50.4898L179.845 49.6508C178.227 50.7006 175.823 51.2578 172.57 51.2578V52.2578ZM180.388 50.4909C182.248 49.2951 183.242 47.7216 183.242 45.7812H182.242C182.242 47.3096 181.487 48.5955 179.847 49.6497L180.388 50.4909ZM183.242 45.7812C183.242 43.7562 182.454 42.1593 180.871 41.0583L180.3 41.8792C181.593 42.7782 182.242 44.0563 182.242 45.7812H183.242ZM180.877 41.0619C179.332 39.9586 177.084 39.4453 174.211 39.4453V40.4453C176.994 40.4453 178.996 40.9477 180.295 41.8756L180.877 41.0619ZM174.211 42.8203H171.539V43.8203H174.211V42.8203ZM171.539 42.8203C167.265 42.8203 164.043 41.8513 161.805 39.9833L161.164 40.7511C163.644 42.8206 167.126 43.8203 171.539 43.8203V42.8203ZM161.806 39.9845C159.569 38.1029 158.445 35.5993 158.445 32.4219H157.445C157.445 35.8695 158.681 38.6627 161.163 40.7498L161.806 39.9845ZM158.445 32.4219C158.445 30.9832 158.763 29.6388 159.399 28.382L158.507 27.9305C157.799 29.33 157.445 30.8293 157.445 32.4219H158.445ZM159.399 28.382C160.036 27.1224 160.95 26.0132 162.148 25.0545L161.524 24.2736C160.222 25.3149 159.214 26.5338 158.507 27.9305L159.399 28.382ZM162.149 25.054C163.353 24.088 164.9 23.3171 166.806 22.7529L166.522 21.794C164.522 22.386 162.85 23.2089 161.523 24.2741L162.149 25.054ZM166.809 22.7518C168.708 22.1746 170.83 21.8828 173.18 21.8828V20.8828C170.748 20.8828 168.526 21.1848 166.519 21.7951L166.809 22.7518ZM173.18 21.8828C176.144 21.8828 178.907 22.3357 181.475 23.2374L181.806 22.2939C179.124 21.3518 176.247 20.8828 173.18 20.8828V21.8828ZM181.477 23.2383C184.046 24.1251 186.17 25.3652 187.861 26.9507L188.545 26.2212C186.736 24.5254 184.485 23.2186 181.804 22.293L181.477 23.2383ZM187.905 26.1843L186.546 27.1921L187.142 27.9954L188.501 26.9876L187.905 26.1843ZM187.179 27.2225C185.525 25.7308 183.468 24.5896 181.023 23.7904L180.712 24.7409C183.047 25.5041 184.975 26.5817 186.509 27.965L187.179 27.2225ZM181.025 23.7913C178.58 22.976 175.963 22.5703 173.18 22.5703V23.5703C175.865 23.5703 178.374 23.9615 180.709 24.74L181.025 23.7913ZM173.18 22.5703C168.896 22.5703 165.466 23.474 162.945 25.3403L163.54 26.1441C165.831 24.4479 169.025 23.5703 173.18 23.5703V22.5703ZM162.945 25.3403C160.429 27.2028 159.133 29.5708 159.133 32.4219H160.133C160.133 29.9292 161.243 27.8441 163.54 26.1441L162.945 25.3403ZM159.133 32.4219C159.133 35.3895 160.224 37.7798 162.413 39.5383L163.04 38.7586C161.104 37.2045 160.133 35.1105 160.133 32.4219H159.133ZM162.413 39.5383C164.597 41.2914 167.662 42.1328 171.539 42.1328V41.1328C167.792 41.1328 164.982 40.318 163.04 38.7586L162.413 39.5383ZM171.539 42.1328H174.117V41.1328H171.539V42.1328ZM174.117 42.1328C176.396 42.1328 178.022 42.4764 179.064 43.0991C180.064 43.6973 180.555 44.5686 180.555 45.7812H181.555C181.555 44.2282 180.889 43.0254 179.577 42.2408C178.306 41.4806 176.463 41.1328 174.117 41.1328V42.1328ZM180.555 45.7812C180.555 46.796 179.995 47.6857 178.674 48.4409L179.17 49.3091C180.692 48.4393 181.555 47.2665 181.555 45.7812H180.555ZM178.675 48.44C177.381 49.1732 175.368 49.5703 172.57 49.5703V50.5703C175.429 50.5703 177.65 50.1705 179.168 49.31L178.675 48.44ZM172.57 49.5703C170.624 49.5703 168.824 49.3481 167.168 48.9074L166.911 49.8738C168.66 50.3394 170.548 50.5703 172.57 50.5703V49.5703ZM167.172 48.9086C165.516 48.4528 164.173 47.8137 163.127 47.0032L162.514 47.7937C163.687 48.702 165.156 49.3909 166.906 49.8727L167.172 48.9086ZM163.126 47.7942L164.462 46.763L163.851 45.9714L162.515 47.0026L163.126 47.7942ZM163.873 46.7791C165.943 48.2036 168.864 48.8828 172.57 48.8828V47.8828C168.964 47.8828 166.275 47.2183 164.44 45.9553L163.873 46.7791ZM172.57 48.8828C174.93 48.8828 176.736 48.6419 177.929 48.1134L177.524 47.1991C176.53 47.6394 174.898 47.8828 172.57 47.8828V48.8828ZM177.929 48.1134C179.09 47.5995 179.867 46.8449 179.867 45.7812H178.867C178.867 46.2488 178.551 46.7443 177.524 47.1991L177.929 48.1134ZM179.867 45.7812C179.867 45.2634 179.755 44.7889 179.508 44.3799C179.26 43.9695 178.894 43.6569 178.439 43.4338L177.999 44.3318C178.309 44.4838 178.518 44.675 178.652 44.8975C178.788 45.1212 178.867 45.4085 178.867 45.7812H179.867ZM178.439 43.4338C177.561 43.0038 176.122 42.8203 174.211 42.8203V43.8203C176.113 43.8203 177.346 44.0119 177.999 44.3318L178.439 43.4338ZM174.211 36.0703H171.539V37.0703H174.211V36.0703ZM171.539 36.0703C169.292 36.0703 167.69 35.7269 166.665 35.1056C165.681 34.5088 165.195 33.6377 165.195 32.4219H164.195C164.195 33.9717 164.85 35.1748 166.147 35.9608C167.403 36.7223 169.224 37.0703 171.539 37.0703V36.0703ZM165.195 32.4219C165.195 31.4056 165.749 30.5247 167.051 29.7866L166.558 28.9166C165.048 29.7721 164.195 30.9382 164.195 32.4219H165.195ZM167.053 29.7857C168.362 29.0373 170.383 28.6328 173.18 28.6328V27.6328C170.32 27.6328 168.091 28.0408 166.557 28.9174L167.053 29.7857ZM173.18 28.6328C175.125 28.6328 176.923 28.8625 178.578 29.318L178.844 28.3539C177.092 27.8719 175.203 27.6328 173.18 27.6328V28.6328ZM178.582 29.3191C180.237 29.7593 181.578 30.3905 182.623 31.2L183.236 30.4094C182.062 29.5002 180.591 28.8188 178.839 28.3527L178.582 29.3191ZM182.624 30.4089L181.288 31.4401L181.899 32.2317L183.235 31.2005L182.624 30.4089ZM181.877 31.424C179.807 29.9995 176.886 29.3203 173.18 29.3203V30.3203C176.786 30.3203 179.475 30.9849 181.31 32.2479L181.877 31.424ZM173.18 29.3203C170.82 29.3203 169.014 29.5613 167.821 30.0897L168.226 31.004C169.22 30.5637 170.852 30.3203 173.18 30.3203V29.3203ZM167.821 30.0897C166.66 30.6037 165.883 31.3582 165.883 32.4219H166.883C166.883 31.9543 167.199 31.4588 168.226 31.004L167.821 30.0897ZM165.883 32.4219C165.883 32.9397 165.995 33.4142 166.242 33.8233C166.49 34.2336 166.856 34.5463 167.311 34.7693L167.751 33.8713C167.441 33.7194 167.232 33.5281 167.098 33.3057C166.962 33.0819 166.883 32.7947 166.883 32.4219H165.883ZM167.311 34.7693C168.189 35.1994 169.628 35.3828 171.539 35.3828V34.3828C169.637 34.3828 168.404 34.1913 167.751 33.8713L167.311 34.7693ZM171.539 35.3828H174.211V34.3828H171.539V35.3828ZM174.211 35.3828C178.484 35.3828 181.705 36.3593 183.944 38.242L184.587 37.4767C182.107 35.3907 178.625 34.3828 174.211 34.3828V35.3828ZM183.945 38.2433C186.181 40.1089 187.305 42.6039 187.305 45.7812H188.305C188.305 42.3336 187.069 39.5473 184.586 37.4755L183.945 38.2433ZM187.305 45.7812C187.305 47.2199 186.987 48.5643 186.351 49.8212L187.243 50.2726C187.951 48.8732 188.305 47.3738 188.305 45.7812H187.305ZM186.351 49.8212C185.714 51.0801 184.793 52.1975 183.578 53.1725L184.204 53.9525C185.52 52.8963 186.536 51.6699 187.243 50.2726L186.351 49.8212ZM183.578 53.1721C182.389 54.1232 180.849 54.8946 178.941 55.4747L179.231 56.4315C181.229 55.8242 182.892 55.0018 184.203 53.9529L183.578 53.1721ZM178.944 55.4737C177.044 56.0361 174.921 56.3203 172.57 56.3203V57.3203C175.001 57.3203 177.221 57.0264 179.228 56.4326L178.944 55.4737ZM172.57 56.3203C169.605 56.3203 166.841 55.8749 164.273 54.9883L163.946 55.9336C166.628 56.8594 169.504 57.3203 172.57 57.3203V56.3203ZM164.275 54.9892C161.705 54.0867 159.581 52.8385 157.889 51.2524L157.205 51.982C159.013 53.6771 161.264 54.9914 163.944 55.9327L164.275 54.9892ZM157.845 52.0188L159.204 51.011L158.608 50.2077L157.249 51.2155L157.845 52.0188ZM158.571 50.9807C160.224 52.4717 162.28 53.6204 164.725 54.4353L165.041 53.4866C162.704 52.7078 160.776 51.6221 159.241 50.2381L158.571 50.9807ZM164.727 54.4362C167.172 55.2352 169.788 55.6328 172.57 55.6328V54.6328C169.884 54.6328 167.374 54.2492 165.038 53.4857L164.727 54.4362ZM172.57 55.6328C176.854 55.6328 180.284 54.7292 182.805 52.8628L182.21 52.0591C179.919 53.7552 176.725 54.6328 172.57 54.6328V55.6328ZM182.805 52.8628C185.321 51.0003 186.617 48.6324 186.617 45.7812H185.617C185.617 48.2739 184.507 50.359 182.21 52.0591L182.805 52.8628ZM186.617 45.7812C186.617 42.8136 185.526 40.4233 183.337 38.6648L182.71 39.4445C184.646 40.9986 185.617 43.0926 185.617 45.7812H186.617ZM183.337 38.6648C181.153 36.9117 178.088 36.0703 174.211 36.0703V37.0703C177.958 37.0703 180.768 37.8851 182.71 39.4445L183.337 38.6648ZM197.484 31.6016V31.1016H196.984V31.6016H197.484ZM199.172 31.6016H199.672V31.1016H199.172V31.6016ZM199.172 56V56.5H199.672V56H199.172ZM197.484 56H196.984V56.5H197.484V56ZM190.734 23.9375H190.234V24.4375H190.734V23.9375ZM190.734 22.25V21.75H190.234V22.25H190.734ZM216.047 22.25H216.547V21.75H216.047V22.25ZM216.047 23.9375V24.4375H216.547V23.9375H216.047ZM190.734 27.3125H190.234V27.8125H190.734V27.3125ZM190.734 25.625V25.125H190.234V25.625H190.734ZM216.047 25.625H216.547V25.125H216.047V25.625ZM216.047 27.3125V27.8125H216.547V27.3125H216.047ZM190.734 30.6875H190.234V31.1875H190.734V30.6875ZM190.734 29V28.5H190.234V29H190.734ZM216.047 29H216.547V28.5H216.047V29ZM216.047 30.6875V31.1875H216.547V30.6875H216.047ZM207.609 31.6016V31.1016H207.109V31.6016H207.609ZM209.297 31.6016H209.797V31.1016H209.297V31.6016ZM209.297 56V56.5H209.797V56H209.297ZM207.609 56H207.109V56.5H207.609V56ZM204.234 31.6016V31.1016H203.734V31.6016H204.234ZM205.922 31.6016H206.422V31.1016H205.922V31.6016ZM205.922 56V56.5H206.422V56H205.922ZM204.234 56H203.734V56.5H204.234V56ZM200.859 31.6016V31.1016H200.359V31.6016H200.859ZM202.547 31.6016H203.047V31.1016H202.547V31.6016ZM202.547 56V56.5H203.047V56H202.547ZM200.859 56H200.359V56.5H200.859V56ZM197.484 32.1016H199.172V31.1016H197.484V32.1016ZM198.672 31.6016V56H199.672V31.6016H198.672ZM199.172 55.5H197.484V56.5H199.172V55.5ZM197.984 56V31.6016H196.984V56H197.984ZM191.234 23.9375V22.25H190.234V23.9375H191.234ZM190.734 22.75H216.047V21.75H190.734V22.75ZM215.547 22.25V23.9375H216.547V22.25H215.547ZM216.047 23.4375H190.734V24.4375H216.047V23.4375ZM191.234 27.3125V25.625H190.234V27.3125H191.234ZM190.734 26.125H216.047V25.125H190.734V26.125ZM215.547 25.625V27.3125H216.547V25.625H215.547ZM216.047 26.8125H190.734V27.8125H216.047V26.8125ZM191.234 30.6875V29H190.234V30.6875H191.234ZM190.734 29.5H216.047V28.5H190.734V29.5ZM215.547 29V30.6875H216.547V29H215.547ZM216.047 30.1875H190.734V31.1875H216.047V30.1875ZM207.609 32.1016H209.297V31.1016H207.609V32.1016ZM208.797 31.6016V56H209.797V31.6016H208.797ZM209.297 55.5H207.609V56.5H209.297V55.5ZM208.109 56V31.6016H207.109V56H208.109ZM204.234 32.1016H205.922V31.1016H204.234V32.1016ZM205.422 31.6016V56H206.422V31.6016H205.422ZM205.922 55.5H204.234V56.5H205.922V55.5ZM204.734 56V31.6016H203.734V56H204.734ZM200.859 32.1016H202.547V31.1016H200.859V32.1016ZM202.047 31.6016V56H203.047V31.6016H202.047ZM202.547 55.5H200.859V56.5H202.547V55.5ZM201.359 56V31.6016H200.359V56H201.359ZM231.07 41.6562L230.596 41.8144L230.597 41.8174L231.07 41.6562ZM232.078 43.5078L231.696 43.8302L231.701 43.8367L231.707 43.843L232.078 43.5078ZM233.625 44.7266L233.383 45.1643L233.392 45.169L233.625 44.7266ZM235.477 45.4062L235.369 45.8946L235.372 45.8952L235.477 45.4062ZM242.438 44.3047H242.938V43.4248L242.182 43.8751L242.438 44.3047ZM242.438 45.9922L242.67 46.4347L242.938 46.2942V45.9922H242.438ZM234.352 46.7422L234.2 47.2188L234.209 47.2215L234.352 46.7422ZM231.656 45.3594L231.351 45.7557L231.356 45.7594L231.656 45.3594ZM229.711 42.8281L229.252 43.0275L229.256 43.0355L229.711 42.8281ZM243.07 33.5234L242.732 33.8916L242.736 33.8951L243.07 33.5234ZM245.414 56V56.5H245.914V56H245.414ZM243.727 56H243.227V56.5H243.727V56ZM248.789 56V56.5H249.289V56H248.789ZM247.102 56H246.602V56.5H247.102V56ZM244.266 32.3281L243.926 32.6951L243.928 32.6966L244.266 32.3281ZM228.141 43.4141L227.682 43.6124L227.684 43.6171L228.141 43.4141ZM233.602 48.2188L233.438 48.6912L233.445 48.6937L233.602 48.2188ZM242.438 47.5625H242.938V46.7429L242.209 47.1179L242.438 47.5625ZM242.438 49.3438L242.63 49.8053L242.938 49.6771V49.3438H242.438ZM232.922 49.6953L232.757 50.1675L232.762 50.1691L232.922 49.6953ZM229.195 47.6094L228.873 47.9917L228.876 47.9944L229.195 47.6094ZM226.594 44.0469L226.135 44.2468L226.136 44.2486L226.594 44.0469ZM228.891 31.2734L229.243 31.6283L229.243 31.6283L228.891 31.2734ZM243.023 29.3984L242.794 29.8425L242.797 29.8441L243.023 29.3984ZM252.164 56V56.5H252.664V56H252.164ZM250.477 56H249.977V56.5H250.477V56ZM248.695 32.5156L248.264 32.7684L248.266 32.7724L248.695 32.5156ZM230.344 28.0625L230.591 28.4968L230.594 28.4954L230.344 28.0625ZM225.633 32.7031L225.201 32.451L225.2 32.4536L225.633 32.7031ZM225.047 44.6094L224.59 44.812L224.591 44.8137L225.047 44.6094ZM228 48.6641L227.674 49.043L227.676 49.045L228 48.6641ZM232.242 51.1484L232.066 51.6164L232.072 51.6185L232.242 51.1484ZM242.438 50.9141H242.938V50.154L242.24 50.4549L242.438 50.9141ZM242.438 52.625L242.614 53.0928L242.938 52.9708V52.625H242.438ZM223.383 33.5L222.923 33.3025L222.922 33.306L223.383 33.5ZM244.711 26.4688L244.477 26.9104L244.478 26.9111L244.711 26.4688ZM255.539 56V56.5H256.039V56H255.539ZM253.852 56H253.352V56.5H253.852V56ZM251.625 30.8281L251.194 31.0822L251.195 31.0834L251.625 30.8281ZM245.555 25.0156L245.318 25.4562L245.32 25.4573L245.555 25.0156ZM222.727 31.0391L223.159 31.2907L223.159 31.2895L222.727 31.0391ZM221.484 44.6094L221.013 44.7751L221.014 44.7802L221.484 44.6094ZM224.016 49.1328L223.629 49.4497L223.633 49.4547L224.016 49.1328ZM227.719 52.4609L227.439 52.8755L227.441 52.877L227.719 52.4609ZM232.242 54.5938L232.098 55.0725L232.099 55.0729L232.242 54.5938ZM242.438 54.2656H242.938V53.5102L242.242 53.8054L242.438 54.2656ZM242.438 56L242.603 56.4717L242.938 56.3543V56H242.438ZM231.562 56.1172L231.42 56.5963L231.421 56.5967L231.562 56.1172ZM226.594 53.7734L226.31 54.1849L226.314 54.1882L226.594 53.7734ZM222.586 50.0938L222.198 50.4092L222.2 50.4113L222.586 50.0938ZM220.242 32.2109L219.781 32.0179L219.78 32.0196L220.242 32.2109ZM223.992 26.5156L223.637 26.1638L223.637 26.1638L223.992 26.5156ZM229.758 22.6953L229.95 23.1569L229.952 23.1562L229.758 22.6953ZM250.195 26.2344L249.852 26.5981L249.854 26.5998L250.195 26.2344ZM244.227 39.3594C244.227 37.2494 243.571 35.5531 242.207 34.3445L241.543 35.093C242.648 36.0719 243.227 37.4694 243.227 39.3594H244.227ZM242.207 34.3445C240.861 33.1521 239.1 32.5781 236.977 32.5781V33.5781C238.915 33.5781 240.42 34.0979 241.543 35.093L242.207 34.3445ZM236.977 32.5781C235.129 32.5781 233.528 33.2216 232.2 34.499L232.894 35.2197C234.034 34.1222 235.387 33.5781 236.977 33.5781V32.5781ZM232.2 34.499C230.872 35.7772 230.195 37.3276 230.195 39.125H231.195C231.195 37.6099 231.753 36.3166 232.894 35.2197L232.2 34.499ZM230.195 39.125C230.195 40.1065 230.326 41.0049 230.596 41.8144L231.545 41.4981C231.314 40.8076 231.195 40.0185 231.195 39.125H230.195ZM230.597 41.8174C230.863 42.6 231.227 43.2747 231.696 43.8302L232.46 43.1854C232.085 42.741 231.777 42.1812 231.544 41.4951L230.597 41.8174ZM231.707 43.843C232.183 44.37 232.743 44.8104 233.383 45.1642L233.867 44.2889C233.32 43.9865 232.848 43.6144 232.449 43.1727L231.707 43.843ZM233.392 45.169C234.025 45.5019 234.684 45.7442 235.369 45.8946L235.584 44.9179C234.988 44.7871 234.413 44.5762 233.858 44.2841L233.392 45.169ZM235.372 45.8952C236.066 46.0438 236.796 46.1172 237.562 46.1172V45.1172C236.86 45.1172 236.2 45.0499 235.581 44.9173L235.372 45.8952ZM237.562 46.1172C239.43 46.1172 241.144 45.6573 242.693 44.7342L242.182 43.8751C240.794 44.7021 239.258 45.1172 237.562 45.1172V46.1172ZM241.938 44.3047V45.9922H242.938V44.3047H241.938ZM242.205 45.5496C240.763 46.3079 239.21 46.6875 237.539 46.6875V47.6875C239.368 47.6875 241.081 47.2702 242.67 46.4347L242.205 45.5496ZM237.539 46.6875C236.458 46.6875 235.444 46.545 234.494 46.2629L234.209 47.2215C235.259 47.5331 236.37 47.6875 237.539 47.6875V46.6875ZM234.503 46.2656C233.571 45.9698 232.723 45.5345 231.956 44.9594L231.356 45.7594C232.214 46.403 233.163 46.8896 234.2 47.2187L234.503 46.2656ZM231.961 44.9631C231.222 44.3944 230.621 43.6194 230.166 42.6207L229.256 43.0355C229.769 44.1619 230.466 45.0744 231.351 45.7557L231.961 44.9631ZM230.169 42.6288C229.733 41.624 229.508 40.4594 229.508 39.125H228.508C228.508 40.5719 228.752 41.876 229.252 43.0275L230.169 42.6288ZM229.508 39.125C229.508 37.1006 230.213 35.4009 231.633 33.996L230.93 33.2852C229.318 34.8803 228.508 36.8369 228.508 39.125H229.508ZM231.633 33.996C233.048 32.5956 234.817 31.8906 236.977 31.8906V30.8906C234.574 30.8906 232.546 31.6857 230.93 33.2852L231.633 33.996ZM236.977 31.8906C239.393 31.8906 241.295 32.5707 242.732 33.8916L243.409 33.1553C241.752 31.6325 239.591 30.8906 236.977 30.8906V31.8906ZM242.736 33.8951C244.174 35.1892 244.914 36.9885 244.914 39.3594H245.914C245.914 36.7615 245.092 34.6702 243.405 33.1518L242.736 33.8951ZM244.914 39.3594V56H245.914V39.3594H244.914ZM245.414 55.5H243.727V56.5H245.414V55.5ZM244.227 56V39.3594H243.227V56H244.227ZM248.289 39.3594V56H249.289V39.3594H248.289ZM248.789 55.5H247.102V56.5H248.789V55.5ZM247.602 56V39.3594H246.602V56H247.602ZM247.602 39.3594C247.602 36.2894 246.613 33.8034 244.604 31.9597L243.928 32.6966C245.699 34.3216 246.602 36.5231 246.602 39.3594H247.602ZM244.605 31.9612C242.608 30.1126 240.051 29.2031 236.977 29.2031V30.2031C239.84 30.2031 242.142 31.0436 243.926 32.6951L244.605 31.9612ZM236.977 29.2031C234.103 29.2031 231.677 30.1634 229.734 32.0903L230.438 32.8004C232.182 31.071 234.35 30.2031 236.977 30.2031V29.2031ZM229.734 32.0903C227.794 34.0134 226.82 36.3683 226.82 39.125H227.82C227.82 36.6317 228.69 34.5334 230.438 32.8004L229.734 32.0903ZM226.82 39.125C226.82 40.7764 227.104 42.2751 227.682 43.6124L228.6 43.2157C228.084 42.0217 227.82 40.6611 227.82 39.125H226.82ZM227.684 43.6171C228.274 44.9458 229.078 46.0289 230.101 46.8507L230.727 46.0711C229.844 45.3617 229.132 44.4136 228.598 43.211L227.684 43.6171ZM230.101 46.8507C231.099 47.6528 232.213 48.2664 233.438 48.6912L233.765 47.7463C232.647 47.3586 231.635 46.8003 230.727 46.0711L230.101 46.8507ZM233.445 48.6937C234.686 49.1017 235.989 49.3047 237.352 49.3047V48.3047C236.089 48.3047 234.892 48.117 233.758 47.7438L233.445 48.6937ZM237.352 49.3047C239.214 49.3047 240.987 48.871 242.666 48.0071L242.209 47.1179C240.669 47.9102 239.052 48.3047 237.352 48.3047V49.3047ZM241.938 47.5625V49.3438H242.938V47.5625H241.938ZM242.245 48.8822C240.615 49.5616 238.977 49.8984 237.328 49.8984V50.8984C239.117 50.8984 240.885 50.5322 242.63 49.8053L242.245 48.8822ZM237.328 49.8984C235.833 49.8984 234.418 49.672 233.082 49.2215L232.762 50.1691C234.207 50.6561 235.73 50.8984 237.328 50.8984V49.8984ZM233.086 49.2231C231.75 48.7576 230.56 48.0912 229.514 47.2244L228.876 47.9944C230.018 48.9401 231.313 49.6642 232.757 50.1675L233.086 49.2231ZM229.518 47.2271C228.488 46.3595 227.665 45.2363 227.051 43.8452L226.136 44.2486C226.804 45.7637 227.715 47.0155 228.873 47.9917L229.518 47.2271ZM227.052 43.847C226.442 42.449 226.133 40.8777 226.133 39.125H225.133C225.133 40.9973 225.464 42.7072 226.135 44.2468L227.052 43.847ZM226.133 39.125C226.133 36.1792 227.166 33.69 229.243 31.6283L228.538 30.9186C226.271 33.1693 225.133 35.9145 225.133 39.125H226.133ZM229.243 31.6283C231.333 29.5534 233.9 28.5156 236.977 28.5156V27.5156C233.647 27.5156 230.824 28.6498 228.538 30.9186L229.243 31.6283ZM236.977 28.5156C239.156 28.5156 241.092 28.9617 242.794 29.8425L243.253 28.9544C241.393 27.9915 239.297 27.5156 236.977 27.5156V28.5156ZM242.797 29.8441C244.493 30.707 245.829 31.961 246.812 33.6149L247.672 33.1038C246.593 31.289 245.117 29.9024 243.25 28.9528L242.797 29.8441ZM246.812 33.6149C247.792 35.2628 248.289 37.1724 248.289 39.3594H249.289C249.289 37.0151 248.755 34.9247 247.672 33.1038L246.812 33.6149ZM251.664 39.3594V56H252.664V39.3594H251.664ZM252.164 55.5H250.477V56.5H252.164V55.5ZM250.977 56V39.3594H249.977V56H250.977ZM250.977 39.3594C250.977 36.7015 250.363 34.3295 249.124 32.2589L248.266 32.7724C249.402 34.6705 249.977 36.861 249.977 39.3594H250.977ZM249.127 32.2629C247.907 30.1819 246.229 28.5843 244.098 27.4781L243.637 28.3656C245.6 29.3845 247.139 30.8493 248.264 32.7684L249.127 32.2629ZM244.098 27.4781C241.972 26.3749 239.595 25.8281 236.977 25.8281V26.8281C239.452 26.8281 241.668 27.3438 243.637 28.3656L244.098 27.4781ZM236.977 25.8281C234.471 25.8281 232.173 26.4272 230.093 27.6296L230.594 28.4954C232.514 27.3853 234.638 26.8281 236.977 26.8281V25.8281ZM230.096 27.6282C228.021 28.8115 226.387 30.4206 225.201 32.451L226.065 32.9553C227.16 31.0794 228.666 29.5948 230.591 28.4968L230.096 27.6282ZM225.2 32.4536C224.029 34.4852 223.445 36.7123 223.445 39.125H224.445C224.445 36.8815 224.986 34.8273 226.066 32.9527L225.2 32.4536ZM223.445 39.125C223.445 41.1875 223.824 43.0856 224.59 44.812L225.504 44.4067C224.801 42.8206 224.445 41.0625 224.445 39.125H223.445ZM224.591 44.8137C225.352 46.5143 226.378 47.9278 227.674 49.043L228.326 48.2851C227.153 47.2753 226.211 45.9857 225.503 44.4051L224.591 44.8137ZM227.676 49.045C228.971 50.1459 230.436 51.003 232.066 51.6164L232.418 50.6805C230.893 50.1064 229.529 49.3073 228.324 48.2831L227.676 49.045ZM232.072 51.6185C233.721 52.2169 235.443 52.5156 237.234 52.5156V51.5156C235.557 51.5156 233.951 51.2362 232.413 50.6784L232.072 51.6185ZM237.234 52.5156C239.072 52.5156 240.873 52.1331 242.635 51.3732L242.24 50.4549C240.596 51.1637 238.928 51.5156 237.234 51.5156V52.5156ZM241.938 50.9141V52.625H242.938V50.9141H241.938ZM242.261 52.1572C240.53 52.8101 238.84 53.1328 237.188 53.1328V54.1328C238.973 54.1328 240.782 53.7837 242.614 53.0928L242.261 52.1572ZM237.188 53.1328C234.563 53.1328 232.16 52.5419 229.971 51.3644L229.498 52.245C231.84 53.5049 234.406 54.1328 237.188 54.1328V53.1328ZM229.971 51.3644C227.785 50.1885 226.038 48.5248 224.724 46.3651L223.87 46.8849C225.275 49.1939 227.152 50.9834 229.498 52.245L229.971 51.3644ZM224.724 46.3651C223.415 44.2136 222.758 41.8045 222.758 39.125H221.758C221.758 41.9768 222.46 44.5676 223.87 46.8849L224.724 46.3651ZM222.758 39.125C222.758 37.2216 223.12 35.4127 223.844 33.694L222.922 33.306C222.146 35.1498 221.758 37.0909 221.758 39.125H222.758ZM223.842 33.6974C224.584 31.9711 225.581 30.4859 226.83 29.2364L226.123 28.5293C224.779 29.8735 223.713 31.4664 222.923 33.3026L223.842 33.6974ZM226.83 29.2364C228.077 27.9891 229.584 26.9928 231.357 26.2503L230.971 25.3279C229.088 26.1166 227.469 27.1828 226.123 28.5293L226.83 29.2364ZM231.357 26.2503C233.122 25.5111 234.994 25.1406 236.977 25.1406V24.1406C234.866 24.1406 232.862 24.5358 230.971 25.3279L231.357 26.2503ZM236.977 25.1406C239.763 25.1406 242.259 25.7337 244.477 26.9104L244.945 26.0271C242.569 24.7663 239.909 24.1406 236.977 24.1406V25.1406ZM244.478 26.9111C246.708 28.0866 248.46 29.7559 249.741 31.926L250.602 31.4177C249.227 29.0879 247.339 27.2884 244.944 26.0264L244.478 26.9111ZM249.741 31.926C251.019 34.0915 251.664 36.5645 251.664 39.3594H252.664C252.664 36.4042 251.981 33.7523 250.602 31.4177L249.741 31.926ZM255.039 39.3594V56H256.039V39.3594H255.039ZM255.539 55.5H253.852V56.5H255.539V55.5ZM254.352 56V39.3594H253.352V56H254.352ZM254.352 39.3594C254.352 36.091 253.59 33.1574 252.055 30.5729L251.195 31.0834C252.629 33.4988 253.352 36.2528 253.352 39.3594H254.352ZM252.056 30.574C250.525 27.9795 248.434 25.9768 245.789 24.5739L245.32 25.4573C247.801 26.7732 249.757 28.6455 251.194 31.0822L252.056 30.574ZM245.791 24.575C243.15 23.1578 240.208 22.4531 236.977 22.4531V23.4531C240.058 23.4531 242.835 24.1235 245.318 25.4562L245.791 24.575ZM236.977 22.4531C233.861 22.4531 231.001 23.2009 228.406 24.6999L228.906 25.5658C231.343 24.1585 234.029 23.4531 236.977 23.4531V22.4531ZM228.406 24.6999C225.816 26.1962 223.776 28.2275 222.294 30.7887L223.159 31.2895C224.552 28.8819 226.466 26.9757 228.906 25.5658L228.406 24.6999ZM222.294 30.7875C220.811 33.3344 220.07 36.1168 220.07 39.125H221.07C221.07 36.2894 221.767 33.6812 223.159 31.2907L222.294 30.7875ZM220.07 39.125C220.07 41.1007 220.384 42.9851 221.013 44.7751L221.956 44.4436C221.366 42.7649 221.07 40.993 221.07 39.125H220.07ZM221.014 44.7802C221.657 46.5472 222.528 48.1054 223.629 49.4497L224.402 48.8159C223.379 47.5665 222.562 46.1091 221.954 44.4385L221.014 44.7802ZM223.633 49.4547C224.741 50.7716 226.01 51.9121 227.439 52.8755L227.998 52.0464C226.646 51.1348 225.446 50.0566 224.398 48.8109L223.633 49.4547ZM227.441 52.877C228.888 53.8417 230.441 54.5739 232.098 55.0725L232.386 54.115C230.824 53.6449 229.362 52.9552 227.996 52.0449L227.441 52.877ZM232.099 55.0729C233.77 55.5709 235.467 55.8203 237.188 55.8203V54.8203C235.565 54.8203 233.964 54.5853 232.385 54.1146L232.099 55.0729ZM237.188 55.8203C239.097 55.8203 240.913 55.4559 242.633 54.7259L242.242 53.8054C240.649 54.4816 238.966 54.8203 237.188 54.8203V55.8203ZM241.938 54.2656V56H242.938V54.2656H241.938ZM242.272 55.5283C240.59 56.119 238.865 56.4141 237.094 56.4141V57.4141C238.979 57.4141 240.816 57.0997 242.603 56.4717L242.272 55.5283ZM237.094 56.4141C235.251 56.4141 233.455 56.1552 231.704 55.6377L231.421 56.5967C233.264 57.1417 235.156 57.4141 237.094 57.4141V56.4141ZM231.705 55.638C229.972 55.1211 228.362 54.3615 226.873 53.3587L226.314 54.1882C227.888 55.2479 229.59 56.0507 231.42 56.5963L231.705 55.638ZM226.878 53.362C225.4 52.3412 224.099 51.1462 222.972 49.7762L222.2 50.4113C223.386 51.8538 224.756 53.1119 226.31 54.1848L226.878 53.362ZM222.974 49.7783C221.858 48.4056 220.978 46.8017 220.34 44.961L219.395 45.289C220.068 47.2296 221.002 48.9382 222.198 50.4092L222.974 49.7783ZM220.34 44.961C219.703 43.1259 219.383 41.1816 219.383 39.125H218.383C218.383 41.2872 218.719 43.3428 219.395 45.289L220.34 44.961ZM219.383 39.125C219.383 36.7673 219.824 34.5277 220.704 32.4023L219.78 32.0196C218.848 34.2691 218.383 36.639 218.383 39.125H219.383ZM220.703 32.4039C221.602 30.2562 222.817 28.4126 224.347 26.8674L223.637 26.1638C222.011 27.8061 220.726 29.7594 219.781 32.0179L220.703 32.4039ZM224.347 26.8674C225.891 25.3085 227.756 24.0709 229.95 23.1569L229.566 22.2338C227.259 23.1948 225.281 24.504 223.637 26.1638L224.347 26.8674ZM229.952 23.1562C232.154 22.2298 234.494 21.7656 236.977 21.7656V20.7656C234.366 20.7656 231.893 21.2546 229.564 22.2344L229.952 23.1562ZM236.977 21.7656C239.513 21.7656 241.889 22.1932 244.11 23.045L244.468 22.1113C242.126 21.213 239.628 20.7656 236.977 20.7656V21.7656ZM244.11 23.045C246.335 23.8983 248.247 25.0835 249.852 26.5981L250.538 25.8707C248.831 24.2602 246.806 23.0079 244.468 22.1113L244.11 23.045ZM249.854 26.5998C251.455 28.0946 252.72 29.9408 253.648 32.147L254.57 31.7592C253.592 29.4342 252.249 27.4679 250.537 25.869L249.854 26.5998ZM253.648 32.147C254.574 34.3461 255.039 36.7482 255.039 39.3594H256.039C256.039 36.6268 255.551 34.0914 254.57 31.7592L253.648 32.147Z" fill="#0097E6"/>
+    </g>
+    <defs>
+        <filter id="filter0_di" x="0.679688" y="16.6875" width="259.359" height="48.7266" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
+            <feFlood flood-opacity="0" result="BackgroundImageFix"/>
+            <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"/>
+            <feOffset dy="4"/>
+            <feGaussianBlur stdDeviation="2"/>
+            <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0"/>
+            <feBlend mode="normal" in2="BackgroundImageFix" result="effect1_dropShadow"/>
+            <feBlend mode="normal" in="SourceGraphic" in2="effect1_dropShadow" result="shape"/>
+            <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0" result="hardAlpha"/>
+            <feOffset dy="2"/>
+            <feGaussianBlur stdDeviation="2"/>
+            <feComposite in2="hardAlpha" operator="arithmetic" k2="-1" k3="1"/>
+            <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0"/>
+            <feBlend mode="normal" in2="shape" result="effect2_innerShadow"/>
+        </filter>
+    </defs>
+</svg>
diff --git a/rust/ballista/ui/scheduler/src/index.css b/rust/ballista/ui/scheduler/src/index.css
new file mode 100644
index 00000000000..ef9298e7e96
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/index.css
@@ -0,0 +1,32 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+@import url("https://fonts.googleapis.com/css2?family=Poppins:wght@100;400;600&display=swap");
+
+body {
+  margin: 0;
+  font-family: "Poppins", sans-serif;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+code {
+  font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New",
+    monospace;
+}
diff --git a/rust/ballista/ui/scheduler/src/index.tsx b/rust/ballista/ui/scheduler/src/index.tsx
new file mode 100644
index 00000000000..2d03fe6f004
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/index.tsx
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import React from "react";
+import ReactDOM from "react-dom";
+import { ChakraProvider } from "@chakra-ui/react";
+
+import "./index.css";
+import App from "./App";
+import reportWebVitals from "./reportWebVitals";
+
+ReactDOM.render(
+  <React.StrictMode>
+    <ChakraProvider>
+      <App />
+    </ChakraProvider>
+  </React.StrictMode>,
+  document.getElementById("root")
+);
+
+// If you want to start measuring performance in your app, pass a function
+// to log results (for example: reportWebVitals(console.log))
+// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
+reportWebVitals();
diff --git a/rust/ballista/ui/scheduler/src/react-app-env.d.ts b/rust/ballista/ui/scheduler/src/react-app-env.d.ts
new file mode 100644
index 00000000000..52130497608
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/react-app-env.d.ts
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// <reference types="react-scripts" />
diff --git a/rust/ballista/ui/scheduler/src/reportWebVitals.ts b/rust/ballista/ui/scheduler/src/reportWebVitals.ts
new file mode 100644
index 00000000000..7bb3e76d1aa
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/reportWebVitals.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { ReportHandler } from "web-vitals";
+
+const reportWebVitals = (onPerfEntry?: ReportHandler) => {
+  if (onPerfEntry && onPerfEntry instanceof Function) {
+    import("web-vitals").then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
+      getCLS(onPerfEntry);
+      getFID(onPerfEntry);
+      getFCP(onPerfEntry);
+      getLCP(onPerfEntry);
+      getTTFB(onPerfEntry);
+    });
+  }
+};
+
+export default reportWebVitals;
diff --git a/rust/ballista/ui/scheduler/src/setupTests.ts b/rust/ballista/ui/scheduler/src/setupTests.ts
new file mode 100644
index 00000000000..48482da51ce
--- /dev/null
+++ b/rust/ballista/ui/scheduler/src/setupTests.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// jest-dom adds custom jest matchers for asserting on DOM nodes.
+// allows you to do things like:
+// expect(element).toHaveTextContent(/react/i)
+// learn more: https://github.com/testing-library/jest-dom
+import "@testing-library/jest-dom";
diff --git a/rust/ballista/ui/scheduler/tsconfig.json b/rust/ballista/ui/scheduler/tsconfig.json
new file mode 100644
index 00000000000..6116bcd2125
--- /dev/null
+++ b/rust/ballista/ui/scheduler/tsconfig.json
@@ -0,0 +1,28 @@
+{
+  "compilerOptions": {
+    "target": "es5",
+    "lib": [
+      "dom",
+      "dom.iterable",
+      "esnext"
+    ],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "strict": true,
+    "forceConsistentCasingInFileNames": true,
+    "noFallthroughCasesInSwitch": true,
+    "module": "esnext",
+    "moduleResolution": "node",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx"
+  },
+  "include": [
+    "src",
+    "index.d.ts",
+    "react-table-config.d.ts"
+  ]
+}
diff --git a/rust/ballista/ui/scheduler/yarn.lock b/rust/ballista/ui/scheduler/yarn.lock
new file mode 100644
index 00000000000..f2ea84b87bc
--- /dev/null
+++ b/rust/ballista/ui/scheduler/yarn.lock
@@ -0,0 +1,12431 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+"@babel/code-frame@7.10.4":
+  version "7.10.4"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.10.4.tgz#168da1a36e90da68ae8d49c0f1b48c7c6249213a"
+  integrity sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==
+  dependencies:
+    "@babel/highlight" "^7.10.4"
+
+"@babel/code-frame@7.12.11":
+  version "7.12.11"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.11.tgz#f4ad435aa263db935b8f10f2c552d23fb716a63f"
+  integrity sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==
+  dependencies:
+    "@babel/highlight" "^7.10.4"
+
+"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.10.4", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.5.5":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.13.tgz#dcfc826beef65e75c50e21d3837d7d95798dd658"
+  integrity sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==
+  dependencies:
+    "@babel/highlight" "^7.12.13"
+
+"@babel/compat-data@^7.12.1", "@babel/compat-data@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.12.13.tgz#27e19e0ed3726ccf54067ced4109501765e7e2e8"
+  integrity sha512-U/hshG5R+SIoW7HVWIdmy1cB7s3ki+r3FpyEZiCgpi4tFgPnX/vynY80ZGSASOIrUM6O7VxOgCZgdt7h97bUGg==
+
+"@babel/core@7.12.3":
+  version "7.12.3"
+  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.12.3.tgz#1b436884e1e3bff6fb1328dc02b208759de92ad8"
+  integrity sha512-0qXcZYKZp3/6N2jKYVxZv0aNCsxTSVCiK72DTiTYZAu7sjg73W0/aynWjMbiGd87EQL4WyA8reiJVh92AVla9g==
+  dependencies:
+    "@babel/code-frame" "^7.10.4"
+    "@babel/generator" "^7.12.1"
+    "@babel/helper-module-transforms" "^7.12.1"
+    "@babel/helpers" "^7.12.1"
+    "@babel/parser" "^7.12.3"
+    "@babel/template" "^7.10.4"
+    "@babel/traverse" "^7.12.1"
+    "@babel/types" "^7.12.1"
+    convert-source-map "^1.7.0"
+    debug "^4.1.0"
+    gensync "^1.0.0-beta.1"
+    json5 "^2.1.2"
+    lodash "^4.17.19"
+    resolve "^1.3.2"
+    semver "^5.4.1"
+    source-map "^0.5.0"
+
+"@babel/core@^7.1.0", "@babel/core@^7.12.3", "@babel/core@^7.7.5", "@babel/core@^7.8.4":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.12.17.tgz#993c5e893333107a2815d8e0d73a2c3755e280b2"
+  integrity sha512-V3CuX1aBywbJvV2yzJScRxeiiw0v2KZZYYE3giywxzFJL13RiyPjaaDwhDnxmgFTTS7FgvM2ijr4QmKNIu0AtQ==
+  dependencies:
+    "@babel/code-frame" "^7.12.13"
+    "@babel/generator" "^7.12.17"
+    "@babel/helper-module-transforms" "^7.12.17"
+    "@babel/helpers" "^7.12.17"
+    "@babel/parser" "^7.12.17"
+    "@babel/template" "^7.12.13"
+    "@babel/traverse" "^7.12.17"
+    "@babel/types" "^7.12.17"
+    convert-source-map "^1.7.0"
+    debug "^4.1.0"
+    gensync "^1.0.0-beta.1"
+    json5 "^2.1.2"
+    lodash "^4.17.19"
+    semver "^5.4.1"
+    source-map "^0.5.0"
+
+"@babel/generator@^7.12.1", "@babel/generator@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.12.17.tgz#9ef1dd792d778b32284411df63f4f668a9957287"
+  integrity sha512-DSA7ruZrY4WI8VxuS1jWSRezFnghEoYEFrZcw9BizQRmOZiUsiHl59+qEARGPqPikwA/GPTyRCi7isuCK/oyqg==
+  dependencies:
+    "@babel/types" "^7.12.17"
+    jsesc "^2.5.1"
+    source-map "^0.5.0"
+
+"@babel/helper-annotate-as-pure@^7.10.4", "@babel/helper-annotate-as-pure@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.12.13.tgz#0f58e86dfc4bb3b1fcd7db806570e177d439b6ab"
+  integrity sha512-7YXfX5wQ5aYM/BOlbSccHDbuXXFPxeoUmfWtz8le2yTkTZc+BxsiEnENFoi2SlmA8ewDkG2LgIMIVzzn2h8kfw==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-builder-binary-assignment-operator-visitor@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-builder-binary-assignment-operator-visitor/-/helper-builder-binary-assignment-operator-visitor-7.12.13.tgz#6bc20361c88b0a74d05137a65cac8d3cbf6f61fc"
+  integrity sha512-CZOv9tGphhDRlVjVkAgm8Nhklm9RzSmWpX2my+t7Ua/KT616pEzXsQCjinzvkRvHWJ9itO4f296efroX23XCMA==
+  dependencies:
+    "@babel/helper-explode-assignable-expression" "^7.12.13"
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-compilation-targets@^7.12.1", "@babel/helper-compilation-targets@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.12.17.tgz#91d83fae61ef390d39c3f0507cb83979bab837c7"
+  integrity sha512-5EkibqLVYOuZ89BSg2lv+GG8feywLuvMXNYgf0Im4MssE0mFWPztSpJbildNnUgw0bLI2EsIN4MpSHC2iUJkQA==
+  dependencies:
+    "@babel/compat-data" "^7.12.13"
+    "@babel/helper-validator-option" "^7.12.17"
+    browserslist "^4.14.5"
+    semver "^5.5.0"
+
+"@babel/helper-create-class-features-plugin@^7.12.1", "@babel/helper-create-class-features-plugin@^7.12.13", "@babel/helper-create-class-features-plugin@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helper-create-class-features-plugin/-/helper-create-class-features-plugin-7.12.17.tgz#704b69c8a78d03fb1c5fcc2e7b593f8a65628944"
+  integrity sha512-I/nurmTxIxHV0M+rIpfQBF1oN342+yvl2kwZUrQuOClMamHF1w5tknfZubgNOLRoA73SzBFAdFcpb4M9HwOeWQ==
+  dependencies:
+    "@babel/helper-function-name" "^7.12.13"
+    "@babel/helper-member-expression-to-functions" "^7.12.17"
+    "@babel/helper-optimise-call-expression" "^7.12.13"
+    "@babel/helper-replace-supers" "^7.12.13"
+    "@babel/helper-split-export-declaration" "^7.12.13"
+
+"@babel/helper-create-regexp-features-plugin@^7.12.13":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helper-create-regexp-features-plugin/-/helper-create-regexp-features-plugin-7.12.17.tgz#a2ac87e9e319269ac655b8d4415e94d38d663cb7"
+  integrity sha512-p2VGmBu9oefLZ2nQpgnEnG0ZlRPvL8gAGvPUMQwUdaE8k49rOMuZpOwdQoy5qJf6K8jL3bcAMhVUlHAjIgJHUg==
+  dependencies:
+    "@babel/helper-annotate-as-pure" "^7.12.13"
+    regexpu-core "^4.7.1"
+
+"@babel/helper-explode-assignable-expression@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-explode-assignable-expression/-/helper-explode-assignable-expression-7.12.13.tgz#0e46990da9e271502f77507efa4c9918d3d8634a"
+  integrity sha512-5loeRNvMo9mx1dA/d6yNi+YiKziJZFylZnCo1nmFF4qPU4yJ14abhWESuSMQSlQxWdxdOFzxXjk/PpfudTtYyw==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-function-name@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.12.13.tgz#93ad656db3c3c2232559fd7b2c3dbdcbe0eb377a"
+  integrity sha512-TZvmPn0UOqmvi5G4vvw0qZTpVptGkB1GL61R6lKvrSdIxGm5Pky7Q3fpKiIkQCAtRCBUwB0PaThlx9vebCDSwA==
+  dependencies:
+    "@babel/helper-get-function-arity" "^7.12.13"
+    "@babel/template" "^7.12.13"
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-get-function-arity@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz#bc63451d403a3b3082b97e1d8b3fe5bd4091e583"
+  integrity sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-hoist-variables@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.12.13.tgz#13aba58b7480b502362316ea02f52cca0e9796cd"
+  integrity sha512-KSC5XSj5HreRhYQtZ3cnSnQwDzgnbdUDEFsxkN0m6Q3WrCRt72xrnZ8+h+pX7YxM7hr87zIO3a/v5p/H3TrnVw==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-member-expression-to-functions@^7.12.13", "@babel/helper-member-expression-to-functions@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.12.17.tgz#f82838eb06e1235307b6d71457b6670ff71ee5ac"
+  integrity sha512-Bzv4p3ODgS/qpBE0DiJ9qf5WxSmrQ8gVTe8ClMfwwsY2x/rhykxxy3bXzG7AGTnPB2ij37zGJ/Q/6FruxHxsxg==
+  dependencies:
+    "@babel/types" "^7.12.17"
+
+"@babel/helper-module-imports@^7.0.0", "@babel/helper-module-imports@^7.12.1", "@babel/helper-module-imports@^7.12.13", "@babel/helper-module-imports@^7.7.0":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.12.13.tgz#ec67e4404f41750463e455cc3203f6a32e93fcb0"
+  integrity sha512-NGmfvRp9Rqxy0uHSSVP+SRIW1q31a7Ji10cLBcqSDUngGentY4FRiHOFZFE1CLU5eiL0oE8reH7Tg1y99TDM/g==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-module-transforms@^7.12.1", "@babel/helper-module-transforms@^7.12.13", "@babel/helper-module-transforms@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.12.17.tgz#7c75b987d6dfd5b48e575648f81eaac891539509"
+  integrity sha512-sFL+p6zOCQMm9vilo06M4VHuTxUAwa6IxgL56Tq1DVtA0ziAGTH1ThmJq7xwPqdQlgAbKX3fb0oZNbtRIyA5KQ==
+  dependencies:
+    "@babel/helper-module-imports" "^7.12.13"
+    "@babel/helper-replace-supers" "^7.12.13"
+    "@babel/helper-simple-access" "^7.12.13"
+    "@babel/helper-split-export-declaration" "^7.12.13"
+    "@babel/helper-validator-identifier" "^7.12.11"
+    "@babel/template" "^7.12.13"
+    "@babel/traverse" "^7.12.17"
+    "@babel/types" "^7.12.17"
+    lodash "^4.17.19"
+
+"@babel/helper-optimise-call-expression@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz#5c02d171b4c8615b1e7163f888c1c81c30a2aaea"
+  integrity sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.8.0", "@babel/helper-plugin-utils@^7.8.3":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.12.13.tgz#174254d0f2424d8aefb4dd48057511247b0a9eeb"
+  integrity sha512-C+10MXCXJLiR6IeG9+Wiejt9jmtFpxUc3MQqCmPY8hfCjyUGl9kT+B2okzEZrtykiwrc4dbCPdDoz0A/HQbDaA==
+
+"@babel/helper-remap-async-to-generator@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-remap-async-to-generator/-/helper-remap-async-to-generator-7.12.13.tgz#170365f4140e2d20e5c88f8ba23c24468c296878"
+  integrity sha512-Qa6PU9vNcj1NZacZZI1Mvwt+gXDH6CTfgAkSjeRMLE8HxtDK76+YDId6NQR+z7Rgd5arhD2cIbS74r0SxD6PDA==
+  dependencies:
+    "@babel/helper-annotate-as-pure" "^7.12.13"
+    "@babel/helper-wrap-function" "^7.12.13"
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-replace-supers@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.12.13.tgz#00ec4fb6862546bd3d0aff9aac56074277173121"
+  integrity sha512-pctAOIAMVStI2TMLhozPKbf5yTEXc0OJa0eENheb4w09SrgOWEs+P4nTOZYJQCqs8JlErGLDPDJTiGIp3ygbLg==
+  dependencies:
+    "@babel/helper-member-expression-to-functions" "^7.12.13"
+    "@babel/helper-optimise-call-expression" "^7.12.13"
+    "@babel/traverse" "^7.12.13"
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-simple-access@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.12.13.tgz#8478bcc5cacf6aa1672b251c1d2dde5ccd61a6c4"
+  integrity sha512-0ski5dyYIHEfwpWGx5GPWhH35j342JaflmCeQmsPWcrOQDtCN6C1zKAVRFVbK53lPW2c9TsuLLSUDf0tIGJ5hA==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-skip-transparent-expression-wrappers@^7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/helper-skip-transparent-expression-wrappers/-/helper-skip-transparent-expression-wrappers-7.12.1.tgz#462dc63a7e435ade8468385c63d2b84cce4b3cbf"
+  integrity sha512-Mf5AUuhG1/OCChOJ/HcADmvcHM42WJockombn8ATJG3OnyiSxBK/Mm5x78BQWvmtXZKHgbjdGL2kin/HOLlZGA==
+  dependencies:
+    "@babel/types" "^7.12.1"
+
+"@babel/helper-split-export-declaration@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz#e9430be00baf3e88b0e13e6f9d4eaf2136372b05"
+  integrity sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==
+  dependencies:
+    "@babel/types" "^7.12.13"
+
+"@babel/helper-validator-identifier@^7.12.11":
+  version "7.12.11"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.12.11.tgz#c9a1f021917dcb5ccf0d4e453e399022981fc9ed"
+  integrity sha512-np/lG3uARFybkoHokJUmf1QfEvRVCPbmQeUQpKow5cQ3xWrV9i3rUHodKDJPQfTVX61qKi+UdYk8kik84n7XOw==
+
+"@babel/helper-validator-option@^7.12.1", "@babel/helper-validator-option@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz#d1fbf012e1a79b7eebbfdc6d270baaf8d9eb9831"
+  integrity sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==
+
+"@babel/helper-wrap-function@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/helper-wrap-function/-/helper-wrap-function-7.12.13.tgz#e3ea8cb3ee0a16911f9c1b50d9e99fe8fe30f9ff"
+  integrity sha512-t0aZFEmBJ1LojdtJnhOaQEVejnzYhyjWHSsNSNo8vOYRbAJNh6r6GQF7pd36SqG7OKGbn+AewVQ/0IfYfIuGdw==
+  dependencies:
+    "@babel/helper-function-name" "^7.12.13"
+    "@babel/template" "^7.12.13"
+    "@babel/traverse" "^7.12.13"
+    "@babel/types" "^7.12.13"
+
+"@babel/helpers@^7.12.1", "@babel/helpers@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.12.17.tgz#71e03d2981a6b5ee16899964f4101dc8471d60bc"
+  integrity sha512-tEpjqSBGt/SFEsFikKds1sLNChKKGGR17flIgQKXH4fG6m9gTgl3gnOC1giHNyaBCSKuTfxaSzHi7UnvqiVKxg==
+  dependencies:
+    "@babel/template" "^7.12.13"
+    "@babel/traverse" "^7.12.17"
+    "@babel/types" "^7.12.17"
+
+"@babel/highlight@^7.10.4", "@babel/highlight@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.12.13.tgz#8ab538393e00370b26271b01fa08f7f27f2e795c"
+  integrity sha512-kocDQvIbgMKlWxXe9fof3TQ+gkIPOUSEYhJjqUjvKMez3krV7vbzYCDq39Oj11UAVK7JqPVGQPlgE85dPNlQww==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.12.11"
+    chalk "^2.0.0"
+    js-tokens "^4.0.0"
+
+"@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.12.17", "@babel/parser@^7.12.3", "@babel/parser@^7.7.0":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.12.17.tgz#bc85d2d47db38094e5bb268fc761716e7d693848"
+  integrity sha512-r1yKkiUTYMQ8LiEI0UcQx5ETw5dpTLn9wijn9hk6KkTtOK95FndDN10M+8/s6k/Ymlbivw0Av9q4SlgF80PtHg==
+
+"@babel/plugin-proposal-async-generator-functions@^7.12.1", "@babel/plugin-proposal-async-generator-functions@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-async-generator-functions/-/plugin-proposal-async-generator-functions-7.12.13.tgz#d1c6d841802ffb88c64a2413e311f7345b9e66b5"
+  integrity sha512-1KH46Hx4WqP77f978+5Ye/VUbuwQld2hph70yaw2hXS2v7ER2f3nlpNMu909HO2rbvP0NKLlMVDPh9KXklVMhA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-remap-async-to-generator" "^7.12.13"
+    "@babel/plugin-syntax-async-generators" "^7.8.0"
+
+"@babel/plugin-proposal-class-properties@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-class-properties/-/plugin-proposal-class-properties-7.12.1.tgz#a082ff541f2a29a4821065b8add9346c0c16e5de"
+  integrity sha512-cKp3dlQsFsEs5CWKnN7BnSHOd0EOW8EKpEjkoz1pO2E5KzIDNV9Ros1b0CnmbVgAGXJubOYVBOGCT1OmJwOI7w==
+  dependencies:
+    "@babel/helper-create-class-features-plugin" "^7.12.1"
+    "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-proposal-class-properties@^7.12.1", "@babel/plugin-proposal-class-properties@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-class-properties/-/plugin-proposal-class-properties-7.12.13.tgz#3d2ce350367058033c93c098e348161d6dc0d8c8"
+  integrity sha512-8SCJ0Ddrpwv4T7Gwb33EmW1V9PY5lggTO+A8WjyIwxrSHDUyBw4MtF96ifn1n8H806YlxbVCoKXbbmzD6RD+cA==
+  dependencies:
+    "@babel/helper-create-class-features-plugin" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-proposal-decorators@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-decorators/-/plugin-proposal-decorators-7.12.1.tgz#59271439fed4145456c41067450543aee332d15f"
+  integrity sha512-knNIuusychgYN8fGJHONL0RbFxLGawhXOJNLBk75TniTsZZeA+wdkDuv6wp4lGwzQEKjZi6/WYtnb3udNPmQmQ==
+  dependencies:
+    "@babel/helper-create-class-features-plugin" "^7.12.1"
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/plugin-syntax-decorators" "^7.12.1"
+
+"@babel/plugin-proposal-dynamic-import@^7.12.1", "@babel/plugin-proposal-dynamic-import@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-dynamic-import/-/plugin-proposal-dynamic-import-7.12.17.tgz#e0ebd8db65acc37eac518fa17bead2174e224512"
+  integrity sha512-ZNGoFZqrnuy9H2izB2jLlnNDAfVPlGl5NhFEiFe4D84ix9GQGygF+CWMGHKuE+bpyS/AOuDQCnkiRNqW2IzS1Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-dynamic-import" "^7.8.0"
+
+"@babel/plugin-proposal-export-namespace-from@^7.12.1", "@babel/plugin-proposal-export-namespace-from@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-export-namespace-from/-/plugin-proposal-export-namespace-from-7.12.13.tgz#393be47a4acd03fa2af6e3cde9b06e33de1b446d"
+  integrity sha512-INAgtFo4OnLN3Y/j0VwAgw3HDXcDtX+C/erMvWzuV9v71r7urb6iyMXu7eM9IgLr1ElLlOkaHjJ0SbCmdOQ3Iw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-export-namespace-from" "^7.8.3"
+
+"@babel/plugin-proposal-json-strings@^7.12.1", "@babel/plugin-proposal-json-strings@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-json-strings/-/plugin-proposal-json-strings-7.12.13.tgz#ced7888a2db92a3d520a2e35eb421fdb7fcc9b5d"
+  integrity sha512-v9eEi4GiORDg8x+Dmi5r8ibOe0VXoKDeNPYcTTxdGN4eOWikrJfDJCJrr1l5gKGvsNyGJbrfMftC2dTL6oz7pg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-json-strings" "^7.8.0"
+
+"@babel/plugin-proposal-logical-assignment-operators@^7.12.1", "@babel/plugin-proposal-logical-assignment-operators@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-logical-assignment-operators/-/plugin-proposal-logical-assignment-operators-7.12.13.tgz#575b5d9a08d8299eeb4db6430da6e16e5cf14350"
+  integrity sha512-fqmiD3Lz7jVdK6kabeSr1PZlWSUVqSitmHEe3Z00dtGTKieWnX9beafvavc32kjORa5Bai4QNHgFDwWJP+WtSQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-logical-assignment-operators" "^7.10.4"
+
+"@babel/plugin-proposal-nullish-coalescing-operator@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-nullish-coalescing-operator/-/plugin-proposal-nullish-coalescing-operator-7.12.1.tgz#3ed4fff31c015e7f3f1467f190dbe545cd7b046c"
+  integrity sha512-nZY0ESiaQDI1y96+jk6VxMOaL4LPo/QDHBqL+SF3/vl6dHkTwHlOI8L4ZwuRBHgakRBw5zsVylel7QPbbGuYgg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
+
+"@babel/plugin-proposal-nullish-coalescing-operator@^7.12.1", "@babel/plugin-proposal-nullish-coalescing-operator@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-nullish-coalescing-operator/-/plugin-proposal-nullish-coalescing-operator-7.12.13.tgz#24867307285cee4e1031170efd8a7ac807deefde"
+  integrity sha512-Qoxpy+OxhDBI5kRqliJFAl4uWXk3Bn24WeFstPH0iLymFehSAUR8MHpqU7njyXv/qbo7oN6yTy5bfCmXdKpo1Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
+
+"@babel/plugin-proposal-numeric-separator@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-numeric-separator/-/plugin-proposal-numeric-separator-7.12.1.tgz#0e2c6774c4ce48be412119b4d693ac777f7685a6"
+  integrity sha512-MR7Ok+Af3OhNTCxYVjJZHS0t97ydnJZt/DbR4WISO39iDnhiD8XHrY12xuSJ90FFEGjir0Fzyyn7g/zY6hxbxA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
+
+"@babel/plugin-proposal-numeric-separator@^7.12.1", "@babel/plugin-proposal-numeric-separator@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-numeric-separator/-/plugin-proposal-numeric-separator-7.12.13.tgz#bd9da3188e787b5120b4f9d465a8261ce67ed1db"
+  integrity sha512-O1jFia9R8BUCl3ZGB7eitaAPu62TXJRHn7rh+ojNERCFyqRwJMTmhz+tJ+k0CwI6CLjX/ee4qW74FSqlq9I35w==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
+
+"@babel/plugin-proposal-object-rest-spread@^7.12.1", "@babel/plugin-proposal-object-rest-spread@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-object-rest-spread/-/plugin-proposal-object-rest-spread-7.12.13.tgz#f93f3116381ff94bc676fdcb29d71045cd1ec011"
+  integrity sha512-WvA1okB/0OS/N3Ldb3sziSrXg6sRphsBgqiccfcQq7woEn5wQLNX82Oc4PlaFcdwcWHuQXAtb8ftbS8Fbsg/sg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-object-rest-spread" "^7.8.0"
+    "@babel/plugin-transform-parameters" "^7.12.13"
+
+"@babel/plugin-proposal-optional-catch-binding@^7.12.1", "@babel/plugin-proposal-optional-catch-binding@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-optional-catch-binding/-/plugin-proposal-optional-catch-binding-7.12.13.tgz#4640520afe57728af14b4d1574ba844f263bcae5"
+  integrity sha512-9+MIm6msl9sHWg58NvqpNpLtuFbmpFYk37x8kgnGzAHvX35E1FyAwSUt5hIkSoWJFSAH+iwU8bJ4fcD1zKXOzg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-optional-catch-binding" "^7.8.0"
+
+"@babel/plugin-proposal-optional-chaining@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-optional-chaining/-/plugin-proposal-optional-chaining-7.12.1.tgz#cce122203fc8a32794296fc377c6dedaf4363797"
+  integrity sha512-c2uRpY6WzaVDzynVY9liyykS+kVU+WRZPMPYpkelXH8KBt1oXoI89kPbZKKG/jDT5UK92FTW2fZkZaJhdiBabw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/helper-skip-transparent-expression-wrappers" "^7.12.1"
+    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
+
+"@babel/plugin-proposal-optional-chaining@^7.12.1", "@babel/plugin-proposal-optional-chaining@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-optional-chaining/-/plugin-proposal-optional-chaining-7.12.17.tgz#e382becadc2cb16b7913b6c672d92e4b33385b5c"
+  integrity sha512-TvxwI80pWftrGPKHNfkvX/HnoeSTR7gC4ezWnAL39PuktYUe6r8kEpOLTYnkBTsaoeazXm2jHJ22EQ81sdgfcA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-skip-transparent-expression-wrappers" "^7.12.1"
+    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
+
+"@babel/plugin-proposal-private-methods@^7.12.1", "@babel/plugin-proposal-private-methods@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-private-methods/-/plugin-proposal-private-methods-7.12.13.tgz#ea78a12554d784ecf7fc55950b752d469d9c4a71"
+  integrity sha512-sV0V57uUwpauixvR7s2o75LmwJI6JECwm5oPUY5beZB1nBl2i37hc7CJGqB5G+58fur5Y6ugvl3LRONk5x34rg==
+  dependencies:
+    "@babel/helper-create-class-features-plugin" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-proposal-unicode-property-regex@^7.12.1", "@babel/plugin-proposal-unicode-property-regex@^7.12.13", "@babel/plugin-proposal-unicode-property-regex@^7.4.4":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-unicode-property-regex/-/plugin-proposal-unicode-property-regex-7.12.13.tgz#bebde51339be829c17aaaaced18641deb62b39ba"
+  integrity sha512-XyJmZidNfofEkqFV5VC/bLabGmO5QzenPO/YOfGuEbgU+2sSwMmio3YLb4WtBgcmmdwZHyVyv8on77IUjQ5Gvg==
+  dependencies:
+    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-syntax-async-generators@^7.8.0", "@babel/plugin-syntax-async-generators@^7.8.4":
+  version "7.8.4"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz#a983fb1aeb2ec3f6ed042a210f640e90e786fe0d"
+  integrity sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-bigint@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz#4c9a6f669f5d0cdf1b90a1671e9a146be5300cea"
+  integrity sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-class-properties@^7.12.1", "@babel/plugin-syntax-class-properties@^7.12.13", "@babel/plugin-syntax-class-properties@^7.8.3":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz#b5c987274c4a3a82b89714796931a6b53544ae10"
+  integrity sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-syntax-decorators@^7.12.1":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-decorators/-/plugin-syntax-decorators-7.12.13.tgz#fac829bf3c7ef4a1bc916257b403e58c6bdaf648"
+  integrity sha512-Rw6aIXGuqDLr6/LoBBYE57nKOzQpz/aDkKlMqEwH+Vp0MXbG6H/TfRjaY343LKxzAKAMXIHsQ8JzaZKuDZ9MwA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-syntax-dynamic-import@^7.8.0":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-dynamic-import/-/plugin-syntax-dynamic-import-7.8.3.tgz#62bf98b2da3cd21d626154fc96ee5b3cb68eacb3"
+  integrity sha512-5gdGbFon+PszYzqs83S3E5mpi7/y/8M9eC90MRTZfduQOYW76ig6SOSPNe41IG5LoP3FGBn2N0RjVDSQiS94kQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-export-namespace-from@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-export-namespace-from/-/plugin-syntax-export-namespace-from-7.8.3.tgz#028964a9ba80dbc094c915c487ad7c4e7a66465a"
+  integrity sha512-MXf5laXo6c1IbEbegDmzGPwGNTsHZmEy6QGznu5Sh2UCWvueywb2ee+CCE4zQiZstxU9BMoQO9i6zUFSY0Kj0Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.3"
+
+"@babel/plugin-syntax-flow@^7.12.1":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.12.13.tgz#5df9962503c0a9c918381c929d51d4d6949e7e86"
+  integrity sha512-J/RYxnlSLXZLVR7wTRsozxKT8qbsx1mNKJzXEEjQ0Kjx1ZACcyHgbanNWNCFtc36IzuWhYWPpvJFFoexoOWFmA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-syntax-import-meta@^7.8.3":
+  version "7.10.4"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz#ee601348c370fa334d2207be158777496521fd51"
+  integrity sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-json-strings@^7.8.0", "@babel/plugin-syntax-json-strings@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz#01ca21b668cd8218c9e640cb6dd88c5412b2c96a"
+  integrity sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-jsx@^7.12.1", "@babel/plugin-syntax-jsx@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.12.13.tgz#044fb81ebad6698fe62c478875575bcbb9b70f15"
+  integrity sha512-d4HM23Q1K7oq/SLNmG6mRt85l2csmQ0cHRaxRXjKW0YFdEXqlZ5kzFQKH5Uc3rDJECgu+yCRgPkG04Mm98R/1g==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-syntax-logical-assignment-operators@^7.10.4", "@babel/plugin-syntax-logical-assignment-operators@^7.8.3":
+  version "7.10.4"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz#ca91ef46303530448b906652bac2e9fe9941f699"
+  integrity sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-nullish-coalescing-operator@^7.8.0", "@babel/plugin-syntax-nullish-coalescing-operator@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz#167ed70368886081f74b5c36c65a88c03b66d1a9"
+  integrity sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-numeric-separator@^7.10.4", "@babel/plugin-syntax-numeric-separator@^7.8.3":
+  version "7.10.4"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz#b9b070b3e33570cd9fd07ba7fa91c0dd37b9af97"
+  integrity sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-syntax-object-rest-spread@^7.8.0", "@babel/plugin-syntax-object-rest-spread@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz#60e225edcbd98a640332a2e72dd3e66f1af55871"
+  integrity sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-optional-catch-binding@^7.8.0", "@babel/plugin-syntax-optional-catch-binding@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz#6111a265bcfb020eb9efd0fdfd7d26402b9ed6c1"
+  integrity sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-optional-chaining@^7.8.0", "@babel/plugin-syntax-optional-chaining@^7.8.3":
+  version "7.8.3"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz#4f69c2ab95167e0180cd5336613f8c5788f7d48a"
+  integrity sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.8.0"
+
+"@babel/plugin-syntax-top-level-await@^7.12.1", "@babel/plugin-syntax-top-level-await@^7.12.13", "@babel/plugin-syntax-top-level-await@^7.8.3":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.12.13.tgz#c5f0fa6e249f5b739727f923540cf7a806130178"
+  integrity sha512-A81F9pDwyS7yM//KwbCSDqy3Uj4NMIurtplxphWxoYtNPov7cJsDkAFNNyVlIZ3jwGycVsurZ+LtOA8gZ376iQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-syntax-typescript@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.12.13.tgz#9dff111ca64154cef0f4dc52cf843d9f12ce4474"
+  integrity sha512-cHP3u1JiUiG2LFDKbXnwVad81GvfyIOmCD6HIEId6ojrY0Drfy2q1jw7BwN7dE84+kTnBjLkXoL3IEy/3JPu2w==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-arrow-functions@^7.12.1", "@babel/plugin-transform-arrow-functions@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-arrow-functions/-/plugin-transform-arrow-functions-7.12.13.tgz#eda5670b282952100c229f8a3bd49e0f6a72e9fe"
+  integrity sha512-tBtuN6qtCTd+iHzVZVOMNp+L04iIJBpqkdY42tWbmjIT5wvR2kx7gxMBsyhQtFzHwBbyGi9h8J8r9HgnOpQHxg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-async-to-generator@^7.12.1", "@babel/plugin-transform-async-to-generator@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-async-to-generator/-/plugin-transform-async-to-generator-7.12.13.tgz#fed8c69eebf187a535bfa4ee97a614009b24f7ae"
+  integrity sha512-psM9QHcHaDr+HZpRuJcE1PXESuGWSCcbiGFFhhwfzdbTxaGDVzuVtdNYliAwcRo3GFg0Bc8MmI+AvIGYIJG04A==
+  dependencies:
+    "@babel/helper-module-imports" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-remap-async-to-generator" "^7.12.13"
+
+"@babel/plugin-transform-block-scoped-functions@^7.12.1", "@babel/plugin-transform-block-scoped-functions@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-block-scoped-functions/-/plugin-transform-block-scoped-functions-7.12.13.tgz#a9bf1836f2a39b4eb6cf09967739de29ea4bf4c4"
+  integrity sha512-zNyFqbc3kI/fVpqwfqkg6RvBgFpC4J18aKKMmv7KdQ/1GgREapSJAykLMVNwfRGO3BtHj3YQZl8kxCXPcVMVeg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-block-scoping@^7.12.1", "@babel/plugin-transform-block-scoping@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-block-scoping/-/plugin-transform-block-scoping-7.12.13.tgz#f36e55076d06f41dfd78557ea039c1b581642e61"
+  integrity sha512-Pxwe0iqWJX4fOOM2kEZeUuAxHMWb9nK+9oh5d11bsLoB0xMg+mkDpt0eYuDZB7ETrY9bbcVlKUGTOGWy7BHsMQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-classes@^7.12.1", "@babel/plugin-transform-classes@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-classes/-/plugin-transform-classes-7.12.13.tgz#9728edc1838b5d62fc93ad830bd523b1fcb0e1f6"
+  integrity sha512-cqZlMlhCC1rVnxE5ZGMtIb896ijL90xppMiuWXcwcOAuFczynpd3KYemb91XFFPi3wJSe/OcrX9lXoowatkkxA==
+  dependencies:
+    "@babel/helper-annotate-as-pure" "^7.12.13"
+    "@babel/helper-function-name" "^7.12.13"
+    "@babel/helper-optimise-call-expression" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-replace-supers" "^7.12.13"
+    "@babel/helper-split-export-declaration" "^7.12.13"
+    globals "^11.1.0"
+
+"@babel/plugin-transform-computed-properties@^7.12.1", "@babel/plugin-transform-computed-properties@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-computed-properties/-/plugin-transform-computed-properties-7.12.13.tgz#6a210647a3d67f21f699cfd2a01333803b27339d"
+  integrity sha512-dDfuROUPGK1mTtLKyDPUavmj2b6kFu82SmgpztBFEO974KMjJT+Ytj3/oWsTUMBmgPcp9J5Pc1SlcAYRpJ2hRA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-destructuring@^7.12.1", "@babel/plugin-transform-destructuring@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-destructuring/-/plugin-transform-destructuring-7.12.13.tgz#fc56c5176940c5b41735c677124d1d20cecc9aeb"
+  integrity sha512-Dn83KykIFzjhA3FDPA1z4N+yfF3btDGhjnJwxIj0T43tP0flCujnU8fKgEkf0C1biIpSv9NZegPBQ1J6jYkwvQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-dotall-regex@^7.12.1", "@babel/plugin-transform-dotall-regex@^7.12.13", "@babel/plugin-transform-dotall-regex@^7.4.4":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-dotall-regex/-/plugin-transform-dotall-regex-7.12.13.tgz#3f1601cc29905bfcb67f53910f197aeafebb25ad"
+  integrity sha512-foDrozE65ZFdUC2OfgeOCrEPTxdB3yjqxpXh8CH+ipd9CHd4s/iq81kcUpyH8ACGNEPdFqbtzfgzbT/ZGlbDeQ==
+  dependencies:
+    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-duplicate-keys@^7.12.1", "@babel/plugin-transform-duplicate-keys@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-duplicate-keys/-/plugin-transform-duplicate-keys-7.12.13.tgz#6f06b87a8b803fd928e54b81c258f0a0033904de"
+  integrity sha512-NfADJiiHdhLBW3pulJlJI2NB0t4cci4WTZ8FtdIuNc2+8pslXdPtRRAEWqUY+m9kNOk2eRYbTAOipAxlrOcwwQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-exponentiation-operator@^7.12.1", "@babel/plugin-transform-exponentiation-operator@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-exponentiation-operator/-/plugin-transform-exponentiation-operator-7.12.13.tgz#4d52390b9a273e651e4aba6aee49ef40e80cd0a1"
+  integrity sha512-fbUelkM1apvqez/yYx1/oICVnGo2KM5s63mhGylrmXUxK/IAXSIf87QIxVfZldWf4QsOafY6vV3bX8aMHSvNrA==
+  dependencies:
+    "@babel/helper-builder-binary-assignment-operator-visitor" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-flow-strip-types@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-flow-strip-types/-/plugin-transform-flow-strip-types-7.12.1.tgz#8430decfa7eb2aea5414ed4a3fa6e1652b7d77c4"
+  integrity sha512-8hAtkmsQb36yMmEtk2JZ9JnVyDSnDOdlB+0nEGzIDLuK4yR3JcEjfuFPYkdEPSh8Id+rAMeBEn+X0iVEyho6Hg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/plugin-syntax-flow" "^7.12.1"
+
+"@babel/plugin-transform-for-of@^7.12.1", "@babel/plugin-transform-for-of@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-for-of/-/plugin-transform-for-of-7.12.13.tgz#561ff6d74d9e1c8879cb12dbaf4a14cd29d15cf6"
+  integrity sha512-xCbdgSzXYmHGyVX3+BsQjcd4hv4vA/FDy7Kc8eOpzKmBBPEOTurt0w5fCRQaGl+GSBORKgJdstQ1rHl4jbNseQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-function-name@^7.12.1", "@babel/plugin-transform-function-name@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-function-name/-/plugin-transform-function-name-7.12.13.tgz#bb024452f9aaed861d374c8e7a24252ce3a50051"
+  integrity sha512-6K7gZycG0cmIwwF7uMK/ZqeCikCGVBdyP2J5SKNCXO5EOHcqi+z7Jwf8AmyDNcBgxET8DrEtCt/mPKPyAzXyqQ==
+  dependencies:
+    "@babel/helper-function-name" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-literals@^7.12.1", "@babel/plugin-transform-literals@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-literals/-/plugin-transform-literals-7.12.13.tgz#2ca45bafe4a820197cf315794a4d26560fe4bdb9"
+  integrity sha512-FW+WPjSR7hiUxMcKqyNjP05tQ2kmBCdpEpZHY1ARm96tGQCCBvXKnpjILtDplUnJ/eHZ0lALLM+d2lMFSpYJrQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-member-expression-literals@^7.12.1", "@babel/plugin-transform-member-expression-literals@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-member-expression-literals/-/plugin-transform-member-expression-literals-7.12.13.tgz#5ffa66cd59b9e191314c9f1f803b938e8c081e40"
+  integrity sha512-kxLkOsg8yir4YeEPHLuO2tXP9R/gTjpuTOjshqSpELUN3ZAg2jfDnKUvzzJxObun38sw3wm4Uu69sX/zA7iRvg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-modules-amd@^7.12.1", "@babel/plugin-transform-modules-amd@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-amd/-/plugin-transform-modules-amd-7.12.13.tgz#43db16249b274ee2e551e2422090aa1c47692d56"
+  integrity sha512-JHLOU0o81m5UqG0Ulz/fPC68/v+UTuGTWaZBUwpEk1fYQ1D9LfKV6MPn4ttJKqRo5Lm460fkzjLTL4EHvCprvA==
+  dependencies:
+    "@babel/helper-module-transforms" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    babel-plugin-dynamic-import-node "^2.3.3"
+
+"@babel/plugin-transform-modules-commonjs@^7.12.1", "@babel/plugin-transform-modules-commonjs@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-commonjs/-/plugin-transform-modules-commonjs-7.12.13.tgz#5043b870a784a8421fa1fd9136a24f294da13e50"
+  integrity sha512-OGQoeVXVi1259HjuoDnsQMlMkT9UkZT9TpXAsqWplS/M0N1g3TJAn/ByOCeQu7mfjc5WpSsRU+jV1Hd89ts0kQ==
+  dependencies:
+    "@babel/helper-module-transforms" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-simple-access" "^7.12.13"
+    babel-plugin-dynamic-import-node "^2.3.3"
+
+"@babel/plugin-transform-modules-systemjs@^7.12.1", "@babel/plugin-transform-modules-systemjs@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-systemjs/-/plugin-transform-modules-systemjs-7.12.13.tgz#351937f392c7f07493fc79b2118201d50404a3c5"
+  integrity sha512-aHfVjhZ8QekaNF/5aNdStCGzwTbU7SI5hUybBKlMzqIMC7w7Ho8hx5a4R/DkTHfRfLwHGGxSpFt9BfxKCoXKoA==
+  dependencies:
+    "@babel/helper-hoist-variables" "^7.12.13"
+    "@babel/helper-module-transforms" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-validator-identifier" "^7.12.11"
+    babel-plugin-dynamic-import-node "^2.3.3"
+
+"@babel/plugin-transform-modules-umd@^7.12.1", "@babel/plugin-transform-modules-umd@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-umd/-/plugin-transform-modules-umd-7.12.13.tgz#26c66f161d3456674e344b4b1255de4d530cfb37"
+  integrity sha512-BgZndyABRML4z6ibpi7Z98m4EVLFI9tVsZDADC14AElFaNHHBcJIovflJ6wtCqFxwy2YJ1tJhGRsr0yLPKoN+w==
+  dependencies:
+    "@babel/helper-module-transforms" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-named-capturing-groups-regex@^7.12.1", "@babel/plugin-transform-named-capturing-groups-regex@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-named-capturing-groups-regex/-/plugin-transform-named-capturing-groups-regex-7.12.13.tgz#2213725a5f5bbbe364b50c3ba5998c9599c5c9d9"
+  integrity sha512-Xsm8P2hr5hAxyYblrfACXpQKdQbx4m2df9/ZZSQ8MAhsadw06+jW7s9zsSw6he+mJZXRlVMyEnVktJo4zjk1WA==
+  dependencies:
+    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
+
+"@babel/plugin-transform-new-target@^7.12.1", "@babel/plugin-transform-new-target@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-new-target/-/plugin-transform-new-target-7.12.13.tgz#e22d8c3af24b150dd528cbd6e685e799bf1c351c"
+  integrity sha512-/KY2hbLxrG5GTQ9zzZSc3xWiOy379pIETEhbtzwZcw9rvuaVV4Fqy7BYGYOWZnaoXIQYbbJ0ziXLa/sKcGCYEQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-object-super@^7.12.1", "@babel/plugin-transform-object-super@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-object-super/-/plugin-transform-object-super-7.12.13.tgz#b4416a2d63b8f7be314f3d349bd55a9c1b5171f7"
+  integrity sha512-JzYIcj3XtYspZDV8j9ulnoMPZZnF/Cj0LUxPOjR89BdBVx+zYJI9MdMIlUZjbXDX+6YVeS6I3e8op+qQ3BYBoQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-replace-supers" "^7.12.13"
+
+"@babel/plugin-transform-parameters@^7.12.1", "@babel/plugin-transform-parameters@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-parameters/-/plugin-transform-parameters-7.12.13.tgz#461e76dfb63c2dfd327b8a008a9e802818ce9853"
+  integrity sha512-e7QqwZalNiBRHCpJg/P8s/VJeSRYgmtWySs1JwvfwPqhBbiWfOcHDKdeAi6oAyIimoKWBlwc8oTgbZHdhCoVZA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-property-literals@^7.12.1", "@babel/plugin-transform-property-literals@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-property-literals/-/plugin-transform-property-literals-7.12.13.tgz#4e6a9e37864d8f1b3bc0e2dce7bf8857db8b1a81"
+  integrity sha512-nqVigwVan+lR+g8Fj8Exl0UQX2kymtjcWfMOYM1vTYEKujeyv2SkMgazf2qNcK7l4SDiKyTA/nHCPqL4e2zo1A==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-react-constant-elements@^7.12.1":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-constant-elements/-/plugin-transform-react-constant-elements-7.12.13.tgz#f8ee56888545d53d80f766b3cc1563ab2c241f92"
+  integrity sha512-qmzKVTn46Upvtxv8LQoQ8mTCdUC83AOVQIQm57e9oekLT5cmK9GOMOfcWhe8jMNx4UJXn/UDhVZ/7lGofVNeDQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-react-display-name@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.12.1.tgz#1cbcd0c3b1d6648c55374a22fc9b6b7e5341c00d"
+  integrity sha512-cAzB+UzBIrekfYxyLlFqf/OagTvHLcVBb5vpouzkYkBclRPraiygVnafvAoipErZLI8ANv8Ecn6E/m5qPXD26w==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-transform-react-display-name@^7.12.1", "@babel/plugin-transform-react-display-name@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.12.13.tgz#c28effd771b276f4647411c9733dbb2d2da954bd"
+  integrity sha512-MprESJzI9O5VnJZrL7gg1MpdqmiFcUv41Jc7SahxYsNP2kDkFqClxxTZq+1Qv4AFCamm+GXMRDQINNn+qrxmiA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-react-jsx-development@^7.12.1", "@babel/plugin-transform-react-jsx-development@^7.12.12":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.12.17.tgz#f510c0fa7cd7234153539f9a362ced41a5ca1447"
+  integrity sha512-BPjYV86SVuOaudFhsJR1zjgxxOhJDt6JHNoD48DxWEIxUCAMjV1ys6DYw4SDYZh0b1QsS2vfIA9t/ZsQGsDOUQ==
+  dependencies:
+    "@babel/plugin-transform-react-jsx" "^7.12.17"
+
+"@babel/plugin-transform-react-jsx-self@^7.12.1":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.12.13.tgz#422d99d122d592acab9c35ea22a6cfd9bf189f60"
+  integrity sha512-FXYw98TTJ125GVCCkFLZXlZ1qGcsYqNQhVBQcZjyrwf8FEUtVfKIoidnO8S0q+KBQpDYNTmiGo1gn67Vti04lQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-react-jsx-source@^7.12.1":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.12.13.tgz#051d76126bee5c9a6aa3ba37be2f6c1698856bcb"
+  integrity sha512-O5JJi6fyfih0WfDgIJXksSPhGP/G0fQpfxYy87sDc+1sFmsCS6wr3aAn+whbzkhbjtq4VMqLRaSzR6IsshIC0Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-react-jsx@^7.12.1", "@babel/plugin-transform-react-jsx@^7.12.13", "@babel/plugin-transform-react-jsx@^7.12.17":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.12.17.tgz#dd2c1299f5e26de584939892de3cfc1807a38f24"
+  integrity sha512-mwaVNcXV+l6qJOuRhpdTEj8sT/Z0owAVWf9QujTZ0d2ye9X/K+MTOTSizcgKOj18PGnTc/7g1I4+cIUjsKhBcw==
+  dependencies:
+    "@babel/helper-annotate-as-pure" "^7.12.13"
+    "@babel/helper-module-imports" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-jsx" "^7.12.13"
+    "@babel/types" "^7.12.17"
+
+"@babel/plugin-transform-react-pure-annotations@^7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.12.1.tgz#05d46f0ab4d1339ac59adf20a1462c91b37a1a42"
+  integrity sha512-RqeaHiwZtphSIUZ5I85PEH19LOSzxfuEazoY7/pWASCAIBuATQzpSVD+eT6MebeeZT2F4eSL0u4vw6n4Nm0Mjg==
+  dependencies:
+    "@babel/helper-annotate-as-pure" "^7.10.4"
+    "@babel/helper-plugin-utils" "^7.10.4"
+
+"@babel/plugin-transform-regenerator@^7.12.1", "@babel/plugin-transform-regenerator@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-regenerator/-/plugin-transform-regenerator-7.12.13.tgz#b628bcc9c85260ac1aeb05b45bde25210194a2f5"
+  integrity sha512-lxb2ZAvSLyJ2PEe47hoGWPmW22v7CtSl9jW8mingV4H2sEX/JOcrAj2nPuGWi56ERUm2bUpjKzONAuT6HCn2EA==
+  dependencies:
+    regenerator-transform "^0.14.2"
+
+"@babel/plugin-transform-reserved-words@^7.12.1", "@babel/plugin-transform-reserved-words@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-reserved-words/-/plugin-transform-reserved-words-7.12.13.tgz#7d9988d4f06e0fe697ea1d9803188aa18b472695"
+  integrity sha512-xhUPzDXxZN1QfiOy/I5tyye+TRz6lA7z6xaT4CLOjPRMVg1ldRf0LHw0TDBpYL4vG78556WuHdyO9oi5UmzZBg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-runtime@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-runtime/-/plugin-transform-runtime-7.12.1.tgz#04b792057eb460389ff6a4198e377614ea1e7ba5"
+  integrity sha512-Ac/H6G9FEIkS2tXsZjL4RAdS3L3WHxci0usAnz7laPWUmFiGtj7tIASChqKZMHTSQTQY6xDbOq+V1/vIq3QrWg==
+  dependencies:
+    "@babel/helper-module-imports" "^7.12.1"
+    "@babel/helper-plugin-utils" "^7.10.4"
+    resolve "^1.8.1"
+    semver "^5.5.1"
+
+"@babel/plugin-transform-shorthand-properties@^7.12.1", "@babel/plugin-transform-shorthand-properties@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-shorthand-properties/-/plugin-transform-shorthand-properties-7.12.13.tgz#db755732b70c539d504c6390d9ce90fe64aff7ad"
+  integrity sha512-xpL49pqPnLtf0tVluuqvzWIgLEhuPpZzvs2yabUHSKRNlN7ScYU7aMlmavOeyXJZKgZKQRBlh8rHbKiJDraTSw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-spread@^7.12.1", "@babel/plugin-transform-spread@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-spread/-/plugin-transform-spread-7.12.13.tgz#ca0d5645abbd560719c354451b849f14df4a7949"
+  integrity sha512-dUCrqPIowjqk5pXsx1zPftSq4sT0aCeZVAxhdgs3AMgyaDmoUT0G+5h3Dzja27t76aUEIJWlFgPJqJ/d4dbTtg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-skip-transparent-expression-wrappers" "^7.12.1"
+
+"@babel/plugin-transform-sticky-regex@^7.12.1", "@babel/plugin-transform-sticky-regex@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-sticky-regex/-/plugin-transform-sticky-regex-7.12.13.tgz#760ffd936face73f860ae646fb86ee82f3d06d1f"
+  integrity sha512-Jc3JSaaWT8+fr7GRvQP02fKDsYk4K/lYwWq38r/UGfaxo89ajud321NH28KRQ7xy1Ybc0VUE5Pz8psjNNDUglg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-template-literals@^7.12.1", "@babel/plugin-transform-template-literals@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-template-literals/-/plugin-transform-template-literals-7.12.13.tgz#655037b07ebbddaf3b7752f55d15c2fd6f5aa865"
+  integrity sha512-arIKlWYUgmNsF28EyfmiQHJLJFlAJNYkuQO10jL46ggjBpeb2re1P9K9YGxNJB45BqTbaslVysXDYm/g3sN/Qg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-typeof-symbol@^7.12.1", "@babel/plugin-transform-typeof-symbol@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-typeof-symbol/-/plugin-transform-typeof-symbol-7.12.13.tgz#785dd67a1f2ea579d9c2be722de8c84cb85f5a7f"
+  integrity sha512-eKv/LmUJpMnu4npgfvs3LiHhJua5fo/CysENxa45YCQXZwKnGCQKAg87bvoqSW1fFT+HA32l03Qxsm8ouTY3ZQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-typescript@^7.12.1":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-typescript/-/plugin-transform-typescript-7.12.17.tgz#4aa6a5041888dd2e5d316ec39212b0cf855211bb"
+  integrity sha512-1bIYwnhRoetxkFonuZRtDZPFEjl1l5r+3ITkxLC3mlMaFja+GQFo94b/WHEPjqWLU9Bc+W4oFZbvCGe9eYMu1g==
+  dependencies:
+    "@babel/helper-create-class-features-plugin" "^7.12.17"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-syntax-typescript" "^7.12.13"
+
+"@babel/plugin-transform-unicode-escapes@^7.12.1", "@babel/plugin-transform-unicode-escapes@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-escapes/-/plugin-transform-unicode-escapes-7.12.13.tgz#840ced3b816d3b5127dd1d12dcedc5dead1a5e74"
+  integrity sha512-0bHEkdwJ/sN/ikBHfSmOXPypN/beiGqjo+o4/5K+vxEFNPRPdImhviPakMKG4x96l85emoa0Z6cDflsdBusZbw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/plugin-transform-unicode-regex@^7.12.1", "@babel/plugin-transform-unicode-regex@^7.12.13":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-regex/-/plugin-transform-unicode-regex-7.12.13.tgz#b52521685804e155b1202e83fc188d34bb70f5ac"
+  integrity sha512-mDRzSNY7/zopwisPZ5kM9XKCfhchqIYwAKRERtEnhYscZB79VRekuRSoYbN0+KVe3y8+q1h6A4svXtP7N+UoCA==
+  dependencies:
+    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+
+"@babel/preset-env@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/preset-env/-/preset-env-7.12.1.tgz#9c7e5ca82a19efc865384bb4989148d2ee5d7ac2"
+  integrity sha512-H8kxXmtPaAGT7TyBvSSkoSTUK6RHh61So05SyEbpmr0MCZrsNYn7mGMzzeYoOUCdHzww61k8XBft2TaES+xPLg==
+  dependencies:
+    "@babel/compat-data" "^7.12.1"
+    "@babel/helper-compilation-targets" "^7.12.1"
+    "@babel/helper-module-imports" "^7.12.1"
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/helper-validator-option" "^7.12.1"
+    "@babel/plugin-proposal-async-generator-functions" "^7.12.1"
+    "@babel/plugin-proposal-class-properties" "^7.12.1"
+    "@babel/plugin-proposal-dynamic-import" "^7.12.1"
+    "@babel/plugin-proposal-export-namespace-from" "^7.12.1"
+    "@babel/plugin-proposal-json-strings" "^7.12.1"
+    "@babel/plugin-proposal-logical-assignment-operators" "^7.12.1"
+    "@babel/plugin-proposal-nullish-coalescing-operator" "^7.12.1"
+    "@babel/plugin-proposal-numeric-separator" "^7.12.1"
+    "@babel/plugin-proposal-object-rest-spread" "^7.12.1"
+    "@babel/plugin-proposal-optional-catch-binding" "^7.12.1"
+    "@babel/plugin-proposal-optional-chaining" "^7.12.1"
+    "@babel/plugin-proposal-private-methods" "^7.12.1"
+    "@babel/plugin-proposal-unicode-property-regex" "^7.12.1"
+    "@babel/plugin-syntax-async-generators" "^7.8.0"
+    "@babel/plugin-syntax-class-properties" "^7.12.1"
+    "@babel/plugin-syntax-dynamic-import" "^7.8.0"
+    "@babel/plugin-syntax-export-namespace-from" "^7.8.3"
+    "@babel/plugin-syntax-json-strings" "^7.8.0"
+    "@babel/plugin-syntax-logical-assignment-operators" "^7.10.4"
+    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
+    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
+    "@babel/plugin-syntax-object-rest-spread" "^7.8.0"
+    "@babel/plugin-syntax-optional-catch-binding" "^7.8.0"
+    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
+    "@babel/plugin-syntax-top-level-await" "^7.12.1"
+    "@babel/plugin-transform-arrow-functions" "^7.12.1"
+    "@babel/plugin-transform-async-to-generator" "^7.12.1"
+    "@babel/plugin-transform-block-scoped-functions" "^7.12.1"
+    "@babel/plugin-transform-block-scoping" "^7.12.1"
+    "@babel/plugin-transform-classes" "^7.12.1"
+    "@babel/plugin-transform-computed-properties" "^7.12.1"
+    "@babel/plugin-transform-destructuring" "^7.12.1"
+    "@babel/plugin-transform-dotall-regex" "^7.12.1"
+    "@babel/plugin-transform-duplicate-keys" "^7.12.1"
+    "@babel/plugin-transform-exponentiation-operator" "^7.12.1"
+    "@babel/plugin-transform-for-of" "^7.12.1"
+    "@babel/plugin-transform-function-name" "^7.12.1"
+    "@babel/plugin-transform-literals" "^7.12.1"
+    "@babel/plugin-transform-member-expression-literals" "^7.12.1"
+    "@babel/plugin-transform-modules-amd" "^7.12.1"
+    "@babel/plugin-transform-modules-commonjs" "^7.12.1"
+    "@babel/plugin-transform-modules-systemjs" "^7.12.1"
+    "@babel/plugin-transform-modules-umd" "^7.12.1"
+    "@babel/plugin-transform-named-capturing-groups-regex" "^7.12.1"
+    "@babel/plugin-transform-new-target" "^7.12.1"
+    "@babel/plugin-transform-object-super" "^7.12.1"
+    "@babel/plugin-transform-parameters" "^7.12.1"
+    "@babel/plugin-transform-property-literals" "^7.12.1"
+    "@babel/plugin-transform-regenerator" "^7.12.1"
+    "@babel/plugin-transform-reserved-words" "^7.12.1"
+    "@babel/plugin-transform-shorthand-properties" "^7.12.1"
+    "@babel/plugin-transform-spread" "^7.12.1"
+    "@babel/plugin-transform-sticky-regex" "^7.12.1"
+    "@babel/plugin-transform-template-literals" "^7.12.1"
+    "@babel/plugin-transform-typeof-symbol" "^7.12.1"
+    "@babel/plugin-transform-unicode-escapes" "^7.12.1"
+    "@babel/plugin-transform-unicode-regex" "^7.12.1"
+    "@babel/preset-modules" "^0.1.3"
+    "@babel/types" "^7.12.1"
+    core-js-compat "^3.6.2"
+    semver "^5.5.0"
+
+"@babel/preset-env@^7.12.1", "@babel/preset-env@^7.8.4":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/preset-env/-/preset-env-7.12.17.tgz#94a3793ff089c32ee74d76a3c03a7597693ebaaa"
+  integrity sha512-9PMijx8zFbCwTHrd2P4PJR5nWGH3zWebx2OcpTjqQrHhCiL2ssSR2Sc9ko2BsI2VmVBfoaQmPrlMTCui4LmXQg==
+  dependencies:
+    "@babel/compat-data" "^7.12.13"
+    "@babel/helper-compilation-targets" "^7.12.17"
+    "@babel/helper-module-imports" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-validator-option" "^7.12.17"
+    "@babel/plugin-proposal-async-generator-functions" "^7.12.13"
+    "@babel/plugin-proposal-class-properties" "^7.12.13"
+    "@babel/plugin-proposal-dynamic-import" "^7.12.17"
+    "@babel/plugin-proposal-export-namespace-from" "^7.12.13"
+    "@babel/plugin-proposal-json-strings" "^7.12.13"
+    "@babel/plugin-proposal-logical-assignment-operators" "^7.12.13"
+    "@babel/plugin-proposal-nullish-coalescing-operator" "^7.12.13"
+    "@babel/plugin-proposal-numeric-separator" "^7.12.13"
+    "@babel/plugin-proposal-object-rest-spread" "^7.12.13"
+    "@babel/plugin-proposal-optional-catch-binding" "^7.12.13"
+    "@babel/plugin-proposal-optional-chaining" "^7.12.17"
+    "@babel/plugin-proposal-private-methods" "^7.12.13"
+    "@babel/plugin-proposal-unicode-property-regex" "^7.12.13"
+    "@babel/plugin-syntax-async-generators" "^7.8.0"
+    "@babel/plugin-syntax-class-properties" "^7.12.13"
+    "@babel/plugin-syntax-dynamic-import" "^7.8.0"
+    "@babel/plugin-syntax-export-namespace-from" "^7.8.3"
+    "@babel/plugin-syntax-json-strings" "^7.8.0"
+    "@babel/plugin-syntax-logical-assignment-operators" "^7.10.4"
+    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
+    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
+    "@babel/plugin-syntax-object-rest-spread" "^7.8.0"
+    "@babel/plugin-syntax-optional-catch-binding" "^7.8.0"
+    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
+    "@babel/plugin-syntax-top-level-await" "^7.12.13"
+    "@babel/plugin-transform-arrow-functions" "^7.12.13"
+    "@babel/plugin-transform-async-to-generator" "^7.12.13"
+    "@babel/plugin-transform-block-scoped-functions" "^7.12.13"
+    "@babel/plugin-transform-block-scoping" "^7.12.13"
+    "@babel/plugin-transform-classes" "^7.12.13"
+    "@babel/plugin-transform-computed-properties" "^7.12.13"
+    "@babel/plugin-transform-destructuring" "^7.12.13"
+    "@babel/plugin-transform-dotall-regex" "^7.12.13"
+    "@babel/plugin-transform-duplicate-keys" "^7.12.13"
+    "@babel/plugin-transform-exponentiation-operator" "^7.12.13"
+    "@babel/plugin-transform-for-of" "^7.12.13"
+    "@babel/plugin-transform-function-name" "^7.12.13"
+    "@babel/plugin-transform-literals" "^7.12.13"
+    "@babel/plugin-transform-member-expression-literals" "^7.12.13"
+    "@babel/plugin-transform-modules-amd" "^7.12.13"
+    "@babel/plugin-transform-modules-commonjs" "^7.12.13"
+    "@babel/plugin-transform-modules-systemjs" "^7.12.13"
+    "@babel/plugin-transform-modules-umd" "^7.12.13"
+    "@babel/plugin-transform-named-capturing-groups-regex" "^7.12.13"
+    "@babel/plugin-transform-new-target" "^7.12.13"
+    "@babel/plugin-transform-object-super" "^7.12.13"
+    "@babel/plugin-transform-parameters" "^7.12.13"
+    "@babel/plugin-transform-property-literals" "^7.12.13"
+    "@babel/plugin-transform-regenerator" "^7.12.13"
+    "@babel/plugin-transform-reserved-words" "^7.12.13"
+    "@babel/plugin-transform-shorthand-properties" "^7.12.13"
+    "@babel/plugin-transform-spread" "^7.12.13"
+    "@babel/plugin-transform-sticky-regex" "^7.12.13"
+    "@babel/plugin-transform-template-literals" "^7.12.13"
+    "@babel/plugin-transform-typeof-symbol" "^7.12.13"
+    "@babel/plugin-transform-unicode-escapes" "^7.12.13"
+    "@babel/plugin-transform-unicode-regex" "^7.12.13"
+    "@babel/preset-modules" "^0.1.3"
+    "@babel/types" "^7.12.17"
+    core-js-compat "^3.8.0"
+    semver "^5.5.0"
+
+"@babel/preset-modules@^0.1.3":
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/@babel/preset-modules/-/preset-modules-0.1.4.tgz#362f2b68c662842970fdb5e254ffc8fc1c2e415e"
+  integrity sha512-J36NhwnfdzpmH41M1DrnkkgAqhZaqr/NBdPfQ677mLzlaXo+oDiv1deyCDtgAhz8p328otdob0Du7+xgHGZbKg==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.0.0"
+    "@babel/plugin-proposal-unicode-property-regex" "^7.4.4"
+    "@babel/plugin-transform-dotall-regex" "^7.4.4"
+    "@babel/types" "^7.4.4"
+    esutils "^2.0.2"
+
+"@babel/preset-react@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/preset-react/-/preset-react-7.12.1.tgz#7f022b13f55b6dd82f00f16d1c599ae62985358c"
+  integrity sha512-euCExymHCi0qB9u5fKw7rvlw7AZSjw/NaB9h7EkdTt5+yHRrXdiRTh7fkG3uBPpJg82CqLfp1LHLqWGSCrab+g==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/plugin-transform-react-display-name" "^7.12.1"
+    "@babel/plugin-transform-react-jsx" "^7.12.1"
+    "@babel/plugin-transform-react-jsx-development" "^7.12.1"
+    "@babel/plugin-transform-react-jsx-self" "^7.12.1"
+    "@babel/plugin-transform-react-jsx-source" "^7.12.1"
+    "@babel/plugin-transform-react-pure-annotations" "^7.12.1"
+
+"@babel/preset-react@^7.12.5":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/preset-react/-/preset-react-7.12.13.tgz#5f911b2eb24277fa686820d5bd81cad9a0602a0a"
+  integrity sha512-TYM0V9z6Abb6dj1K7i5NrEhA13oS5ujUYQYDfqIBXYHOc2c2VkFgc+q9kyssIyUfy4/hEwqrgSlJ/Qgv8zJLsA==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/plugin-transform-react-display-name" "^7.12.13"
+    "@babel/plugin-transform-react-jsx" "^7.12.13"
+    "@babel/plugin-transform-react-jsx-development" "^7.12.12"
+    "@babel/plugin-transform-react-pure-annotations" "^7.12.1"
+
+"@babel/preset-typescript@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/preset-typescript/-/preset-typescript-7.12.1.tgz#86480b483bb97f75036e8864fe404cc782cc311b"
+  integrity sha512-hNK/DhmoJPsksdHuI/RVrcEws7GN5eamhi28JkO52MqIxU8Z0QpmiSOQxZHWOHV7I3P4UjHV97ay4TcamMA6Kw==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.10.4"
+    "@babel/plugin-transform-typescript" "^7.12.1"
+
+"@babel/runtime-corejs3@^7.10.2":
+  version "7.12.18"
+  resolved "https://registry.yarnpkg.com/@babel/runtime-corejs3/-/runtime-corejs3-7.12.18.tgz#e5663237e5658e4c09586995d2dd6d2c8cfd6fc0"
+  integrity sha512-ngR7yhNTjDxxe1VYmhqQqqXZWujGb6g0IoA4qeG6MxNGRnIw2Zo8ImY8HfaQ7l3T6GklWhdNfyhWk0C0iocdVA==
+  dependencies:
+    core-js-pure "^3.0.0"
+    regenerator-runtime "^0.13.4"
+
+"@babel/runtime@7.12.1":
+  version "7.12.1"
+  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.12.1.tgz#b4116a6b6711d010b2dad3b7b6e43bf1b9954740"
+  integrity sha512-J5AIf3vPj3UwXaAzb5j1xM4WAQDX3EMgemF8rjCP3SoW09LfRKAXQKt6CoVYl230P6iWdRcBbnLDDdnqWxZSCA==
+  dependencies:
+    regenerator-runtime "^0.13.4"
+
+"@babel/runtime@^7.0.0", "@babel/runtime@^7.1.2", "@babel/runtime@^7.12.1", "@babel/runtime@^7.12.13", "@babel/runtime@^7.12.5", "@babel/runtime@^7.9.2":
+  version "7.13.7"
+  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.13.7.tgz#d494e39d198ee9ca04f4dcb76d25d9d7a1dc961a"
+  integrity sha512-h+ilqoX998mRVM5FtB5ijRuHUDVt5l3yfoOi2uh18Z/O3hvyaHQ39NpxVkCIG5yFs+mLq/ewFp8Bss6zmWv6ZA==
+  dependencies:
+    regenerator-runtime "^0.13.4"
+
+"@babel/runtime@^7.10.2", "@babel/runtime@^7.11.2", "@babel/runtime@^7.5.5", "@babel/runtime@^7.7.2", "@babel/runtime@^7.8.4":
+  version "7.12.18"
+  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.12.18.tgz#af137bd7e7d9705a412b3caaf991fe6aaa97831b"
+  integrity sha512-BogPQ7ciE6SYAUPtlm9tWbgI9+2AgqSam6QivMgXgAT+fKbgppaj4ZX15MHeLC1PVF5sNk70huBu20XxWOs8Cg==
+  dependencies:
+    regenerator-runtime "^0.13.4"
+
+"@babel/template@^7.10.4", "@babel/template@^7.12.13", "@babel/template@^7.3.3":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.12.13.tgz#530265be8a2589dbb37523844c5bcb55947fb327"
+  integrity sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==
+  dependencies:
+    "@babel/code-frame" "^7.12.13"
+    "@babel/parser" "^7.12.13"
+    "@babel/types" "^7.12.13"
+
+"@babel/traverse@^7.1.0", "@babel/traverse@^7.12.1", "@babel/traverse@^7.12.13", "@babel/traverse@^7.12.17", "@babel/traverse@^7.7.0":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.12.17.tgz#40ec8c7ffb502c4e54c7f95492dc11b88d718619"
+  integrity sha512-LGkTqDqdiwC6Q7fWSwQoas/oyiEYw6Hqjve5KOSykXkmFJFqzvGMb9niaUEag3Rlve492Mkye3gLw9FTv94fdQ==
+  dependencies:
+    "@babel/code-frame" "^7.12.13"
+    "@babel/generator" "^7.12.17"
+    "@babel/helper-function-name" "^7.12.13"
+    "@babel/helper-split-export-declaration" "^7.12.13"
+    "@babel/parser" "^7.12.17"
+    "@babel/types" "^7.12.17"
+    debug "^4.1.0"
+    globals "^11.1.0"
+    lodash "^4.17.19"
+
+"@babel/types@^7.0.0", "@babel/types@^7.12.1", "@babel/types@^7.12.13", "@babel/types@^7.12.17", "@babel/types@^7.12.6", "@babel/types@^7.3.0", "@babel/types@^7.3.3", "@babel/types@^7.4.4", "@babel/types@^7.7.0":
+  version "7.12.17"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.12.17.tgz#9d711eb807e0934c90b8b1ca0eb1f7230d150963"
+  integrity sha512-tNMDjcv/4DIcHxErTgwB9q2ZcYyN0sUfgGKUK/mm1FJK7Wz+KstoEekxrl/tBiNDgLK1HGi+sppj1An/1DR4fQ==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.12.11"
+    lodash "^4.17.19"
+    to-fast-properties "^2.0.0"
+
+"@bcoe/v8-coverage@^0.2.3":
+  version "0.2.3"
+  resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
+  integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==
+
+"@chakra-ui/accordion@1.1.2":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/accordion/-/accordion-1.1.2.tgz#b45a44746276100601a39f88e3a5e150a2232294"
+  integrity sha512-ni4lwO7I1f9uHgV/FHZVfyr+FRDabXfX2cqpCtY2+QvBzaWM+55VAHJfbel2N6/eogXy5WSLJyYD5fQmyu7Fpg==
+  dependencies:
+    "@chakra-ui/descendant" "1.0.7"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/transition" "1.0.8"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/alert@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/alert/-/alert-1.1.1.tgz#96286feab8b74f624325da9b51b6960043a7ba71"
+  integrity sha512-Hqbf4VuAL/gL6oLQapoF8BV5zAX41Rm+xN2q8c/jWZx5i3l7kWiQ5jn0dJ0prWnVdNbEPmIAqiU0UkSo/lUOjw==
+  dependencies:
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/avatar@1.1.2":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/avatar/-/avatar-1.1.2.tgz#6b61253ecce850d0ab4c6d0b4045e5cf4969daed"
+  integrity sha512-CqXedZed9bEWzzs+8mkB/4NLmD+JbMetNvVbHtLlENta7jnOJDCMJpaXD9QMmiGKKNuqFHfZlGmLmxIMruZBpg==
+  dependencies:
+    "@chakra-ui/image" "1.0.7"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/breadcrumb@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/breadcrumb/-/breadcrumb-1.1.1.tgz#3f995a20ec0db39b5922dc4d7b12f1871a20e957"
+  integrity sha512-EnbMYwqPI8if0WJ2m/054fKXc+K7GN8jafaLCm4qfWedogF6t3huB0qvLA00Z6HUwTNfdCtESxU44VfGHsbNDQ==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/button@1.1.2":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/button/-/button-1.1.2.tgz#3c7a4e6bf38f3451c2b280ea366fc2da0054e992"
+  integrity sha512-6QEn6cL3v1VcdqCT92uqduLI7ip+VQk4Adxekt5WWHDvkw9WqQz1aOqKzfPTEau27WHBBymB09vJR66CoUZCiw==
+  dependencies:
+    "@chakra-ui/spinner" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/checkbox@1.2.3":
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/checkbox/-/checkbox-1.2.3.tgz#b0574f973515612ce99984c38dde17bc5d603821"
+  integrity sha512-op7o/tt4P9oj/N6X5LZUrdaK+VMWoeZavlPh1WWZJ34e26R8y51eCjTQdQURu95hHuwdm7EMK1wSJsINUgWP7A==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+    "@chakra-ui/visually-hidden" "1.0.4"
+
+"@chakra-ui/clickable@1.0.4":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/clickable/-/clickable-1.0.4.tgz#aa97871ccfe5ec66168a67b6df6e3309bfd34eb4"
+  integrity sha512-KAfOjz2zoF7OGay/rg9x2hPCgwd5WqnsxR3dgo6R6ULQ4dsee602kjy6OYxKyI9e6DUgodI/BDZq+57e7+wd5A==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/close-button@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/close-button/-/close-button-1.1.1.tgz#a9805fe1b8fb2b13e7f27e263d23bed9859bb99d"
+  integrity sha512-vTapJ3kZZ04xxR1c+EO1t7w5BYZmm/7NTCotAPN6SuehlcqzG0YP2t+fHk7YPlXiweTQKL6v5DLCBlSuqSjZtw==
+  dependencies:
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/color-mode@1.1.0":
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/color-mode/-/color-mode-1.1.0.tgz#fdb6f44790897754d2c2c444855be5af228ecb43"
+  integrity sha512-c9BdU/B3/WbYOJlAI6z02IzGBzWX1+icqWf9M+5psgTqCv5jqxOsqUKs39Zw6H+rpYqQqR20/i0hyDmN1eptpw==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/control-box@1.0.4":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/control-box/-/control-box-1.0.4.tgz#82bdce22e771accfc56dd5896582ec475b7d490c"
+  integrity sha512-qiZAawX8EaWxk+QnCtpmGADV3D7mNy3DcPhIPmsY4XYfnh8wl9cJfm2B6u3we8nHhi8eQSReSHGbV5s884bO1g==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/counter@1.0.7":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/counter/-/counter-1.0.7.tgz#78a335bdca4768905235722924262ea98f828954"
+  integrity sha512-wcSqSZIvdumn8yIfpF7r/cuYQ6MQlSoY/WQu6aJuwpky/k4xgfVSQq/VCM/Jcb8VcnYeYmPasnKHo+5RY35gSw==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/css-reset@1.0.0":
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/css-reset/-/css-reset-1.0.0.tgz#8395921b35ef27bee0579a4d730c5ab7f7b39734"
+  integrity sha512-UaPsImGHvCgFO3ayp6Ugafu2/3/EG8wlW/8Y9Ihfk1UFv8cpV+3BfWKmuZ7IcmxcBL9dkP6E8p3/M1T0FB92hg==
+
+"@chakra-ui/descendant@1.0.7":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/descendant/-/descendant-1.0.7.tgz#22e13fd732c742a9a74c0f414b0fbd03310299f2"
+  integrity sha512-PnyLyV8hD+STVr9KYzPN13hCj7pwSLvGtQc3J1d+XXvazBtmwUIaX9WZ632kXQhxlvdz83tOzDbJPQs3e1VU3A==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+
+"@chakra-ui/editable@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/editable/-/editable-1.1.1.tgz#43725979c32bd791d160016fa3a5d0c52e8785b1"
+  integrity sha512-p33kIcqBoM9c+hh10QRoV15Lb/sKT3KJoPwThjyDcBaNyvSyFhrOX0equVxjxD7Y4htp9/G7b8owx767lnobwg==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/focus-lock@1.1.0":
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/focus-lock/-/focus-lock-1.1.0.tgz#5b6e1623216d3ba135828fa508df38890a643059"
+  integrity sha512-yzW/By8DO+9kH4eT5y73POuO3HMDMLdy/1udEy95fcP0RbofIU03ytx439FIFB0JwyF8pUP4PEHB6zI6YxXCbQ==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+    react-focus-lock "2.5.0"
+
+"@chakra-ui/form-control@1.2.1":
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/form-control/-/form-control-1.2.1.tgz#70693011fdf396c292aac81575caba976276d385"
+  integrity sha512-pgp34e5DRHc8B+wUMxZNYBo8W7f2TdIOm04dO24WIIC+mPIu++QsV2O0SPOeN+WJzbirtq/8vibPmaZOfBdQVw==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/hooks@1.1.4":
+  version "1.1.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/hooks/-/hooks-1.1.4.tgz#369280f49c3204ccdf6a31b4436fe4938c15073c"
+  integrity sha512-5E4JT4Bl/JYe75N3/eU6eWkDhLCx1azcKpkvzNAgRFP1QqbXxAjxVnwHiMoNhWNFHBWRhkvTI/z4yBoOc7Rf4w==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+    compute-scroll-into-view "1.0.14"
+    copy-to-clipboard "3.3.1"
+
+"@chakra-ui/icon@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/icon/-/icon-1.1.1.tgz#13317762a58f3e039b14a52f5cf978d037e64b4b"
+  integrity sha512-dL1D1q11MM+cL849jmADSjY8KqOWplAEb/XgLXX/ZZau7GHqYAXIwdYnfXwOYBaypiosUvGsm9g2zU4iHYZtdg==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/icons@^1.0.5":
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/icons/-/icons-1.0.5.tgz#21eee12b9ab12da0430c62ea99e6081a6cec7b14"
+  integrity sha512-l8CgisPAS44ehKLw/hoHI6dfSX7pOAfylv8QkINVPEzhHwnNIg0wHzcm1cGkRXuq3DN8G0z88KmFegFFp02yiA==
+  dependencies:
+    "@chakra-ui/icon" "1.1.1"
+    "@types/react" "^17.0.0"
+
+"@chakra-ui/image@1.0.7":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/image/-/image-1.0.7.tgz#ec49b2abb05549d4358675a8df860608294d19fe"
+  integrity sha512-GqPHBzWgvkmxuovD8sLvwO45Zh+vRa0qIKFg6mBeMwpdQh4aWHZJLw8Ln5Hh3WFRtJcIbZqKQV3dUJln8ZpQkQ==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/input@1.1.2":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/input/-/input-1.1.2.tgz#8d1b1734870168802a4d4c679f9bf0fb7b4bc716"
+  integrity sha512-ipT5RpkwVTTzadvOEXt62m9a7Q3vH0cZf1Dis3xdh2FBJjR1Xk0Nr+jjXxtTj99Rn6UMxsVyq3EsSnH09O8o5A==
+  dependencies:
+    "@chakra-ui/form-control" "1.2.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/layout@1.3.1":
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/layout/-/layout-1.3.1.tgz#5a9e0bc67c3f4ce24e00da75850576d9c0d89d8c"
+  integrity sha512-xkIemd9Sloq0kOnbyxnXO22W5YFlMxxHVJLkX9cN5+13i+Qi/Fk/da+yyP8wQ4g8zxQORFZb5K47GdJneGOUlQ==
+  dependencies:
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/live-region@1.0.4":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/live-region/-/live-region-1.0.4.tgz#3dc528e89fde5fa950cb42816cf0a81c010c66ba"
+  integrity sha512-sJkCqT1chDU04MMgFCy2amq/h/95IoOLbRhuJpWm4V7WIHT/YsQURk0DCHr8JhPXgTJx88jgNve/WFdVtZEmDw==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/media-query@1.0.5":
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/media-query/-/media-query-1.0.5.tgz#bad0c45919d70c3543a1114f988865ebe7720631"
+  integrity sha512-QoZt7YkPKEZhdHQ1M6F6QzRC0gUlLCDDrDEPHn3D4AZzhYcmygH4TlSTi2WwhZjdiwgTRJT4zqpkkdej0sLXuA==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/menu@1.1.2":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/menu/-/menu-1.1.2.tgz#6357a56c713f180d6fe1e570ecb6f8892e3dd2cc"
+  integrity sha512-nUfzsXb/HyNrDyJrzJM7+ZajZMzKLHNXhqerHowOoMekTYJcX6MF/K6Sv78KemMrrAHHrfUh2p/2aK0VHPx6pw==
+  dependencies:
+    "@chakra-ui/clickable" "1.0.4"
+    "@chakra-ui/descendant" "1.0.7"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/popper" "1.1.4"
+    "@chakra-ui/transition" "1.0.8"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/modal@1.6.0":
+  version "1.6.0"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/modal/-/modal-1.6.0.tgz#58ba52bd6181217a886e10a730514679b9aa8ca0"
+  integrity sha512-mrCQTt+Dyo9enbN3YvBjoxxxjC63vT60EBdM+EKoymcHtC6tJ8gH4uvDVSxvAwtd4TWHrv2x+MhI/W/nb6f9Hg==
+  dependencies:
+    "@chakra-ui/close-button" "1.1.1"
+    "@chakra-ui/focus-lock" "1.1.0"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/portal" "1.1.1"
+    "@chakra-ui/transition" "1.0.8"
+    "@chakra-ui/utils" "1.2.0"
+    aria-hidden "^1.1.1"
+    react-remove-scroll "2.4.1"
+
+"@chakra-ui/number-input@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/number-input/-/number-input-1.1.1.tgz#c62999faf05ea50a4cb7f24a0f22eaaf304ca7ee"
+  integrity sha512-BAVfv79andVOEYx3YWLb0RrdLLqE484CwGxjoJ3aE8OprW/WE8Ghl2BO/wxWBXwX/TxnvB6JpC4iM4u6eHufnQ==
+  dependencies:
+    "@chakra-ui/counter" "1.0.7"
+    "@chakra-ui/form-control" "1.2.1"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/pin-input@1.4.0":
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/pin-input/-/pin-input-1.4.0.tgz#ab13ee640e30a02a6c172a12c8453cf01470a92b"
+  integrity sha512-85XXAMNNgX7RG0ca9tU4kJeYYrj9+jtUWINatYXVzcvkx/T1VRM3ohRQDtDdZ7wovzEE90mtzkQGPISZhLKHug==
+  dependencies:
+    "@chakra-ui/descendant" "1.0.7"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/popover@1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/popover/-/popover-1.2.2.tgz#cf8f0449dc0ad1c27460c80c2a037aa6f304a077"
+  integrity sha512-J2let+7e1RbLP/SG+waHI7I/7DWq9KMQnh9baiUxn2PatxNHtnCI+raCAalXFuLQ93fwaBgf++a/BmFYfq3LLw==
+  dependencies:
+    "@chakra-ui/close-button" "1.1.1"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/popper" "1.1.4"
+    "@chakra-ui/portal" "1.1.1"
+    "@chakra-ui/transition" "1.0.8"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/popper@1.1.4":
+  version "1.1.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/popper/-/popper-1.1.4.tgz#6b44115dea192e9e23b270c32a77b19fcc10941a"
+  integrity sha512-eAZ6i/+7jd/fjgqWwE4SuhkKFkkWYDw0A/rfV093FtInc0lHDOjzXBgp5GEEbl4pnoPSP2AYJq+5JJFCjW9zIA==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+    "@popperjs/core" "2.4.4"
+    dequal "2.0.2"
+
+"@chakra-ui/portal@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/portal/-/portal-1.1.1.tgz#8ed8481f8e4b555c3cbcba1745767dc068b0d8e5"
+  integrity sha512-YzD/807srlkC1+F1jRaJYAlgtb2CN24RhYRVliV45xR59RkrgnAVVzt7+KmsUF4N9OK+OJhrMfhyM2sNvthw1g==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/progress@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/progress/-/progress-1.1.1.tgz#416536d804953455bbe7d7196c713510f757e5b6"
+  integrity sha512-lGZaUPvi0ySwvIp1FG2RyrioBBPGyr3TzQxT5nf6eUVTfwps3LlAGcCTfNy59tK8vcwXz0uDDKtobfRFwxj19g==
+  dependencies:
+    "@chakra-ui/theme-tools" "1.0.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/radio@1.2.3":
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/radio/-/radio-1.2.3.tgz#87e859c99f35af9d33cff20adbfed2c0f40811f5"
+  integrity sha512-HZDZnoMKJfJ1xlkWy4713Q5mHvF92EJFTY/ZaE6VHSBb1h+JjE1DmEO2Latt5OITx02Liv0dN0je3Hk1ncsgcg==
+  dependencies:
+    "@chakra-ui/form-control" "1.2.1"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+    "@chakra-ui/visually-hidden" "1.0.4"
+
+"@chakra-ui/react@^1.3.3":
+  version "1.3.3"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/react/-/react-1.3.3.tgz#b45559fba32d018a66a7ec8f01ef9d0f33c0b8a5"
+  integrity sha512-61mVM98U5gTzQdCLqvcQRs4q7vsEHp8T5wNhbkPc1hw1tQ1iW9c7H4RyOQgqNqlolTCRbtoRGuh8TEQI9mQdQg==
+  dependencies:
+    "@chakra-ui/accordion" "1.1.2"
+    "@chakra-ui/alert" "1.1.1"
+    "@chakra-ui/avatar" "1.1.2"
+    "@chakra-ui/breadcrumb" "1.1.1"
+    "@chakra-ui/button" "1.1.2"
+    "@chakra-ui/checkbox" "1.2.3"
+    "@chakra-ui/close-button" "1.1.1"
+    "@chakra-ui/control-box" "1.0.4"
+    "@chakra-ui/counter" "1.0.7"
+    "@chakra-ui/css-reset" "1.0.0"
+    "@chakra-ui/editable" "1.1.1"
+    "@chakra-ui/form-control" "1.2.1"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/image" "1.0.7"
+    "@chakra-ui/input" "1.1.2"
+    "@chakra-ui/layout" "1.3.1"
+    "@chakra-ui/live-region" "1.0.4"
+    "@chakra-ui/media-query" "1.0.5"
+    "@chakra-ui/menu" "1.1.2"
+    "@chakra-ui/modal" "1.6.0"
+    "@chakra-ui/number-input" "1.1.1"
+    "@chakra-ui/pin-input" "1.4.0"
+    "@chakra-ui/popover" "1.2.2"
+    "@chakra-ui/popper" "1.1.4"
+    "@chakra-ui/portal" "1.1.1"
+    "@chakra-ui/progress" "1.1.1"
+    "@chakra-ui/radio" "1.2.3"
+    "@chakra-ui/select" "1.1.1"
+    "@chakra-ui/skeleton" "1.1.3"
+    "@chakra-ui/slider" "1.1.1"
+    "@chakra-ui/spinner" "1.1.1"
+    "@chakra-ui/stat" "1.1.1"
+    "@chakra-ui/switch" "1.1.3"
+    "@chakra-ui/system" "1.3.1"
+    "@chakra-ui/table" "1.1.1"
+    "@chakra-ui/tabs" "1.1.1"
+    "@chakra-ui/tag" "1.1.1"
+    "@chakra-ui/textarea" "1.1.1"
+    "@chakra-ui/theme" "1.6.2"
+    "@chakra-ui/toast" "1.1.11"
+    "@chakra-ui/tooltip" "1.1.2"
+    "@chakra-ui/transition" "1.0.8"
+    "@chakra-ui/utils" "1.2.0"
+    "@chakra-ui/visually-hidden" "1.0.4"
+
+"@chakra-ui/select@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/select/-/select-1.1.1.tgz#54becf1c70183968aab45c1fc01b8dc5c0ae1c60"
+  integrity sha512-7sFPXjBlV/6Ms60hHyLgiCMe93BF6z59HFrGQyXavvN3NNSpR4B4+AQqD5/guRi3GV8TamyPiHVIQIPw6wt2Eg==
+  dependencies:
+    "@chakra-ui/form-control" "1.2.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/skeleton@1.1.3":
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/skeleton/-/skeleton-1.1.3.tgz#86905e88fedf3ed844da87fd0ad01c064988d72a"
+  integrity sha512-F2HK0/QAQ/BmkY0FC8Lhhquxl5bSL0OemF6gyPcmK21tQ5czvUBCElVUybFnYkyVLfihB5pvazKZky8xCEgfIQ==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/media-query" "1.0.5"
+    "@chakra-ui/system" "1.3.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/slider@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/slider/-/slider-1.1.1.tgz#f329984b328588bd5f87d94d73a0be312cb5967b"
+  integrity sha512-D9xZqefmxx2clbd3iNK2bM1zFmygXXNZuwvPFCVWPa82zSOVPnfDjH1n+Z+VjPixEW6fl64sIl99oJdvDuH7wg==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/spinner@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/spinner/-/spinner-1.1.1.tgz#803c3f75dc6af08901a156079c3e068e28ac8b85"
+  integrity sha512-dGT5DVvQwnj4B7FjIl6C/1ZmXgUFSA0ZC7JgQNQdJYc3DgtkGf0a6L+DhiyBneEPb9/RRHQJhHoRPndnHU15QA==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+    "@chakra-ui/visually-hidden" "1.0.4"
+
+"@chakra-ui/stat@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/stat/-/stat-1.1.1.tgz#6ccab1734ea88a0c5d8cd2e2ed9a2c6e1af669e8"
+  integrity sha512-dG7SGe2ZEFugbA4kyCtSLRtwgMQ2pqq3QAWBjA/ZvdEdhL10EGp+bv3Ab0l1qju3DEpfyx3+M6mHxG5zWJ92Fw==
+  dependencies:
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+    "@chakra-ui/visually-hidden" "1.0.4"
+
+"@chakra-ui/styled-system@1.7.1":
+  version "1.7.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/styled-system/-/styled-system-1.7.1.tgz#1483006a8cb123ce1c6f67f2ef4346c8b8fa6a4b"
+  integrity sha512-mhSakTWdh7ZEkqwRdoVW1seumIFq6Yu/Glal4VVcyhxS34V/VPLX0GtRfjC8cSpsYiwGZHX/7WJYWN2cldx+Gg==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+    css-get-unit "1.0.1"
+    csstype "^3.0.6"
+
+"@chakra-ui/switch@1.1.3":
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/switch/-/switch-1.1.3.tgz#aa72379daf1bce12173e26c6d8541d21088f7712"
+  integrity sha512-64o7GL3yFiLlh4qtdK//Bey4wLn8yqib9Duci9T+FRiFWLLDa9ksmlQGeX7Qe4AIolimXGa96Ys8yYAfzDqaJQ==
+  dependencies:
+    "@chakra-ui/checkbox" "1.2.3"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/system@1.3.1":
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/system/-/system-1.3.1.tgz#65d5d6af288d4b04df088cf3e7c94c4c15b40dad"
+  integrity sha512-NMC0ajaCUIYnVxYBS3jRrgRmqK1p39MX1yB9J3BRgfVlKbDizkUOAJaqe+FX//3NVJ++QPJOjUu0azmlR6HYZw==
+  dependencies:
+    "@chakra-ui/color-mode" "1.1.0"
+    "@chakra-ui/styled-system" "1.7.1"
+    "@chakra-ui/utils" "1.2.0"
+    react-fast-compare "3.2.0"
+
+"@chakra-ui/table@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/table/-/table-1.1.1.tgz#a2fb89c21e1405ab56b90225640e8e2fadfc8fd9"
+  integrity sha512-GdYbqN1q/QPQqca3jfWbyWJ7waUg6RbpQbsyyhFNWHDKhOb6H2y+cDZCHrOiFwBahWZxykR4ZcMJnQjPqtB7ag==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/tabs@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/tabs/-/tabs-1.1.1.tgz#a0772d8adb9d1452f103213443b7852837d94cf4"
+  integrity sha512-yfJct0Yfxt2fQ9KgbtVhC89OaB5iD8nLAFi3zhiuNQBp84OXESVfvanr1lHBg3YZLgVLW1O0QQLv3aHVGLtwYg==
+  dependencies:
+    "@chakra-ui/clickable" "1.0.4"
+    "@chakra-ui/descendant" "1.0.7"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/tag@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/tag/-/tag-1.1.1.tgz#b95aee5f579cc45658ef6b75ead413fef772711c"
+  integrity sha512-I1ScSeaUEgNPY7lv2IZ0blTAb13wvu/UqGPuatG71PqI4LIKAtZJVxF/AnxAQY4WpbDKTp/t3z3DfyKr8Ccouw==
+  dependencies:
+    "@chakra-ui/icon" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/textarea@1.1.1":
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/textarea/-/textarea-1.1.1.tgz#d4c5274a79fbbae55eba0a3d3173429e72bd0e83"
+  integrity sha512-Mmw/mVfZSNf/0QpLe8Nnvpp1jAkRtjhKD4eDBG6AW6M0l6tST3LFbDC3qZePmEJpqxkOE3IyQgYEjHjb9PgcMA==
+  dependencies:
+    "@chakra-ui/form-control" "1.2.1"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/theme-tools@1.0.4":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/theme-tools/-/theme-tools-1.0.4.tgz#ead4886e61e3f054e48ca0a95a70d796fe7e3838"
+  integrity sha512-kx34izftAHvtRjxpkgWbnMx5DyGtUi8JoQO8E5bhwjJ5drNQl2yvNeoqLpHjf3YTqBYQqkz3VkzHIeHs3wZEwg==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+    "@types/tinycolor2" "1.4.2"
+    tinycolor2 "1.4.2"
+
+"@chakra-ui/theme@1.6.2":
+  version "1.6.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/theme/-/theme-1.6.2.tgz#6d1ff9ca0fb8461615cae9eb785e51c5faad6703"
+  integrity sha512-yM1pacXJfvpwjBUFvFQm/E3sG51/4IReKB6OB1ddr5i7Z/30cPzUQTeUrbfizGWMkrgMRy1ImEmZbo1ACN6gqw==
+  dependencies:
+    "@chakra-ui/theme-tools" "1.0.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/toast@1.1.11":
+  version "1.1.11"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/toast/-/toast-1.1.11.tgz#1576385315fd894a6ae3fe2d56dacdb8072e9e2c"
+  integrity sha512-joeNDETyPIKdwc0YAq/Qjr68SriJU1nTH9KIkuGwmntHE859DtIhwXtAg7k4ZRoSTFEm3/xtFN7yk2aGhueO1w==
+  dependencies:
+    "@chakra-ui/alert" "1.1.1"
+    "@chakra-ui/close-button" "1.1.1"
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/theme" "1.6.2"
+    "@chakra-ui/transition" "1.0.8"
+    "@chakra-ui/utils" "1.2.0"
+    "@reach/alert" "0.13.0"
+
+"@chakra-ui/tooltip@1.1.2":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/tooltip/-/tooltip-1.1.2.tgz#18372edbd92d8613d6c34c54f6adc8aefe6a501b"
+  integrity sha512-sT7PcgYqa5uvsTpXiCNOZxLhIPFWUtblWnMMyn3QIALsgrSkWCceNyIKs1fTCYVBowb30nYOX3owuoP4CsgWHw==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/popper" "1.1.4"
+    "@chakra-ui/portal" "1.1.1"
+    "@chakra-ui/utils" "1.2.0"
+    "@chakra-ui/visually-hidden" "1.0.4"
+
+"@chakra-ui/transition@1.0.8":
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/transition/-/transition-1.0.8.tgz#b9fc6ab7ec901af7d417f0b84ef2785ec5cbaee2"
+  integrity sha512-c4BArP5Q9nl2R6QDAmigCklkMpGKP1ZYOfF1RD7qboPROZVt+SUNGKW+GHGN7mN0kaWHuCb+sbLXMCqQG/jQmQ==
+  dependencies:
+    "@chakra-ui/hooks" "1.1.4"
+    "@chakra-ui/utils" "1.2.0"
+
+"@chakra-ui/utils@1.2.0":
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/utils/-/utils-1.2.0.tgz#9385950e91455ecf480eb52bca268d3a5f8c9d6d"
+  integrity sha512-oMTX8BX1+MUf+iYUafFm9tNfwa3m1cqzMcE/5iQFmNcqKlZmuADnVL28Yw7jhoeouIjANaGY+f51qj9zHgDn9Q==
+  dependencies:
+    "@types/lodash.mergewith" "4.6.6"
+    "@types/object-assign" "4.0.30"
+    css-box-model "1.2.1"
+    lodash.mergewith "4.6.2"
+
+"@chakra-ui/visually-hidden@1.0.4":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@chakra-ui/visually-hidden/-/visually-hidden-1.0.4.tgz#e83428acff21b4471f57c0a8c8a9467b050d75fc"
+  integrity sha512-RxXmEjwOoMh28lSen4tmkQBRQ21Hi15UGLQTnKfY2LhJyxsojyPT9TSHzehWgFIb8D+N3Er09WLgkd6f/bJqyg==
+  dependencies:
+    "@chakra-ui/utils" "1.2.0"
+
+"@cnakazawa/watch@^1.0.3":
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a"
+  integrity sha512-v9kIhKwjeZThiWrLmj0y17CWoyddASLj9O2yvbZkbvw/N3rWOYy9zkV66ursAoVr0mV15bL8g0c4QZUE6cdDoQ==
+  dependencies:
+    exec-sh "^0.3.2"
+    minimist "^1.2.0"
+
+"@csstools/convert-colors@^1.4.0":
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/@csstools/convert-colors/-/convert-colors-1.4.0.tgz#ad495dc41b12e75d588c6db8b9834f08fa131eb7"
+  integrity sha512-5a6wqoJV/xEdbRNKVo6I4hO3VjyDq//8q2f9I6PBAvMesJHFauXDorcNCsr9RzvsZnaWi5NYCcfyqP1QeFHFbw==
+
+"@csstools/normalize.css@^10.1.0":
+  version "10.1.0"
+  resolved "https://registry.yarnpkg.com/@csstools/normalize.css/-/normalize.css-10.1.0.tgz#f0950bba18819512d42f7197e56c518aa491cf18"
+  integrity sha512-ij4wRiunFfaJxjB0BdrYHIH8FxBJpOwNPhhAcunlmPdXudL1WQV1qoP9un6JsEBAgQH+7UXyyjh0g7jTxXK6tg==
+
+"@emotion/babel-plugin@^11.1.2":
+  version "11.2.0"
+  resolved "https://registry.yarnpkg.com/@emotion/babel-plugin/-/babel-plugin-11.2.0.tgz#f25c6df8ec045dad5ae6ca63df0791673b98c920"
+  integrity sha512-lsnQBnl3l4wu/FJoyHnYRpHJeIPNkOBMbtDUIXcO8luulwRKZXPvA10zd2eXVN6dABIWNX4E34en/jkejIg/yA==
+  dependencies:
+    "@babel/helper-module-imports" "^7.7.0"
+    "@babel/plugin-syntax-jsx" "^7.12.1"
+    "@babel/runtime" "^7.7.2"
+    "@emotion/hash" "^0.8.0"
+    "@emotion/memoize" "^0.7.5"
+    "@emotion/serialize" "^1.0.0"
+    babel-plugin-macros "^2.6.1"
+    convert-source-map "^1.5.0"
+    escape-string-regexp "^4.0.0"
+    find-root "^1.1.0"
+    source-map "^0.5.7"
+    stylis "^4.0.3"
+
+"@emotion/cache@^11.1.3":
+  version "11.1.3"
+  resolved "https://registry.yarnpkg.com/@emotion/cache/-/cache-11.1.3.tgz#c7683a9484bcd38d5562f2b9947873cf66829afd"
+  integrity sha512-n4OWinUPJVaP6fXxWZD9OUeQ0lY7DvtmtSuqtRWT0Ofo/sBLCVSgb4/Oa0Q5eFxcwablRKjUXqXtNZVyEwCAuA==
+  dependencies:
+    "@emotion/memoize" "^0.7.4"
+    "@emotion/sheet" "^1.0.0"
+    "@emotion/utils" "^1.0.0"
+    "@emotion/weak-memoize" "^0.2.5"
+    stylis "^4.0.3"
+
+"@emotion/hash@^0.8.0":
+  version "0.8.0"
+  resolved "https://registry.yarnpkg.com/@emotion/hash/-/hash-0.8.0.tgz#bbbff68978fefdbe68ccb533bc8cbe1d1afb5413"
+  integrity sha512-kBJtf7PH6aWwZ6fka3zQ0p6SBYzx4fl1LoZXE2RrnYST9Xljm7WfKJrU4g/Xr3Beg72MLrp1AWNUmuYJTL7Cow==
+
+"@emotion/is-prop-valid@^0.8.2":
+  version "0.8.8"
+  resolved "https://registry.yarnpkg.com/@emotion/is-prop-valid/-/is-prop-valid-0.8.8.tgz#db28b1c4368a259b60a97311d6a952d4fd01ac1a"
+  integrity sha512-u5WtneEAr5IDG2Wv65yhunPSMLIpuKsbuOktRojfrEiEvRyC85LgPMZI63cr7NUqT8ZIGdSVg8ZKGxIug4lXcA==
+  dependencies:
+    "@emotion/memoize" "0.7.4"
+
+"@emotion/is-prop-valid@^1.1.0":
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/@emotion/is-prop-valid/-/is-prop-valid-1.1.0.tgz#29ef6be1e946fb4739f9707def860f316f668cde"
+  integrity sha512-9RkilvXAufQHsSsjQ3PIzSns+pxuX4EW8EbGeSPjZMHuMx6z/MOzb9LpqNieQX4F3mre3NWS2+X3JNRHTQztUQ==
+  dependencies:
+    "@emotion/memoize" "^0.7.4"
+
+"@emotion/memoize@0.7.4":
+  version "0.7.4"
+  resolved "https://registry.yarnpkg.com/@emotion/memoize/-/memoize-0.7.4.tgz#19bf0f5af19149111c40d98bb0cf82119f5d9eeb"
+  integrity sha512-Ja/Vfqe3HpuzRsG1oBtWTHk2PGZ7GR+2Vz5iYGelAw8dx32K0y7PjVuxK6z1nMpZOqAFsRUPCkK1YjJ56qJlgw==
+
+"@emotion/memoize@^0.7.4", "@emotion/memoize@^0.7.5":
+  version "0.7.5"
+  resolved "https://registry.yarnpkg.com/@emotion/memoize/-/memoize-0.7.5.tgz#2c40f81449a4e554e9fc6396910ed4843ec2be50"
+  integrity sha512-igX9a37DR2ZPGYtV6suZ6whr8pTFtyHL3K/oLUotxpSVO2ASaprmAe2Dkq7tBo7CRY7MMDrAa9nuQP9/YG8FxQ==
+
+"@emotion/react@^11.1.5":
+  version "11.1.5"
+  resolved "https://registry.yarnpkg.com/@emotion/react/-/react-11.1.5.tgz#15e78f9822894cdc296e6f4e0688bac8120dfe66"
+  integrity sha512-xfnZ9NJEv9SU9K2sxXM06lzjK245xSeHRpUh67eARBm3PBHjjKIZlfWZ7UQvD0Obvw6ZKjlC79uHrlzFYpOB/Q==
+  dependencies:
+    "@babel/runtime" "^7.7.2"
+    "@emotion/cache" "^11.1.3"
+    "@emotion/serialize" "^1.0.0"
+    "@emotion/sheet" "^1.0.1"
+    "@emotion/utils" "^1.0.0"
+    "@emotion/weak-memoize" "^0.2.5"
+    hoist-non-react-statics "^3.3.1"
+
+"@emotion/serialize@^1.0.0":
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/@emotion/serialize/-/serialize-1.0.0.tgz#1a61f4f037cf39995c97fc80ebe99abc7b191ca9"
+  integrity sha512-zt1gm4rhdo5Sry8QpCOpopIUIKU+mUSpV9WNmFILUraatm5dttNEaYzUWWSboSMUE6PtN2j1cAsuvcugfdI3mw==
+  dependencies:
+    "@emotion/hash" "^0.8.0"
+    "@emotion/memoize" "^0.7.4"
+    "@emotion/unitless" "^0.7.5"
+    "@emotion/utils" "^1.0.0"
+    csstype "^3.0.2"
+
+"@emotion/sheet@^1.0.0", "@emotion/sheet@^1.0.1":
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/@emotion/sheet/-/sheet-1.0.1.tgz#245f54abb02dfd82326e28689f34c27aa9b2a698"
+  integrity sha512-GbIvVMe4U+Zc+929N1V7nW6YYJtidj31lidSmdYcWozwoBIObXBnaJkKNDjZrLm9Nc0BR+ZyHNaRZxqNZbof5g==
+
+"@emotion/styled@^11.1.5":
+  version "11.1.5"
+  resolved "https://registry.yarnpkg.com/@emotion/styled/-/styled-11.1.5.tgz#3d7bfa58b346e48315f65ee956aeef81f0bea8e0"
+  integrity sha512-nIq7pOBEDqT5xSFbclQ3XFy0q8C9EUU8ECqKN2kJKGxKh+vLz/x26kEih4aOpoAsyzc+R60rQxh7VJiLTUEdmg==
+  dependencies:
+    "@babel/runtime" "^7.7.2"
+    "@emotion/babel-plugin" "^11.1.2"
+    "@emotion/is-prop-valid" "^1.1.0"
+    "@emotion/serialize" "^1.0.0"
+    "@emotion/utils" "^1.0.0"
+
+"@emotion/unitless@^0.7.5":
+  version "0.7.5"
+  resolved "https://registry.yarnpkg.com/@emotion/unitless/-/unitless-0.7.5.tgz#77211291c1900a700b8a78cfafda3160d76949ed"
+  integrity sha512-OWORNpfjMsSSUBVrRBVGECkhWcULOAJz9ZW8uK9qgxD+87M7jHRcvh/A96XXNhXTLmKcoYSQtBEX7lHMO7YRwg==
+
+"@emotion/utils@^1.0.0":
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/@emotion/utils/-/utils-1.0.0.tgz#abe06a83160b10570816c913990245813a2fd6af"
+  integrity sha512-mQC2b3XLDs6QCW+pDQDiyO/EdGZYOygE8s5N5rrzjSI4M3IejPE/JPndCBwRT9z982aqQNi6beWs1UeayrQxxA==
+
+"@emotion/weak-memoize@^0.2.5":
+  version "0.2.5"
+  resolved "https://registry.yarnpkg.com/@emotion/weak-memoize/-/weak-memoize-0.2.5.tgz#8eed982e2ee6f7f4e44c253e12962980791efd46"
+  integrity sha512-6U71C2Wp7r5XtFtQzYrW5iKFT67OixrSxjI4MptCHzdSVlgabczzqLe0ZSgnub/5Kp4hSbpDB1tMytZY9pwxxA==
+
+"@eslint/eslintrc@^0.3.0":
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.3.0.tgz#d736d6963d7003b6514e6324bec9c602ac340318"
+  integrity sha512-1JTKgrOKAHVivSvOYw+sJOunkBjUOvjqWk1DPja7ZFhIS2mX/4EgTT8M7eTK9jrKhL/FvXXEbQwIs3pg1xp3dg==
+  dependencies:
+    ajv "^6.12.4"
+    debug "^4.1.1"
+    espree "^7.3.0"
+    globals "^12.1.0"
+    ignore "^4.0.6"
+    import-fresh "^3.2.1"
+    js-yaml "^3.13.1"
+    lodash "^4.17.20"
+    minimatch "^3.0.4"
+    strip-json-comments "^3.1.1"
+
+"@hapi/address@2.x.x":
+  version "2.1.4"
+  resolved "https://registry.yarnpkg.com/@hapi/address/-/address-2.1.4.tgz#5d67ed43f3fd41a69d4b9ff7b56e7c0d1d0a81e5"
+  integrity sha512-QD1PhQk+s31P1ixsX0H0Suoupp3VMXzIVMSwobR3F3MSUO2YCV0B7xqLcUw/Bh8yuvd3LhpyqLQWTNcRmp6IdQ==
+
+"@hapi/bourne@1.x.x":
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/@hapi/bourne/-/bourne-1.3.2.tgz#0a7095adea067243ce3283e1b56b8a8f453b242a"
+  integrity sha512-1dVNHT76Uu5N3eJNTYcvxee+jzX4Z9lfciqRRHCU27ihbUcYi+iSc2iml5Ke1LXe1SyJCLA0+14Jh4tXJgOppA==
+
+"@hapi/hoek@8.x.x", "@hapi/hoek@^8.3.0":
+  version "8.5.1"
+  resolved "https://registry.yarnpkg.com/@hapi/hoek/-/hoek-8.5.1.tgz#fde96064ca446dec8c55a8c2f130957b070c6e06"
+  integrity sha512-yN7kbciD87WzLGc5539Tn0sApjyiGHAJgKvG9W8C7O+6c7qmoQMfVs0W4bX17eqz6C78QJqqFrtgdK5EWf6Qow==
+
+"@hapi/joi@^15.1.0":
+  version "15.1.1"
+  resolved "https://registry.yarnpkg.com/@hapi/joi/-/joi-15.1.1.tgz#c675b8a71296f02833f8d6d243b34c57b8ce19d7"
+  integrity sha512-entf8ZMOK8sc+8YfeOlM8pCfg3b5+WZIKBfUaaJT8UsjAAPjartzxIYm3TIbjvA4u+u++KbcXD38k682nVHDAQ==
+  dependencies:
+    "@hapi/address" "2.x.x"
+    "@hapi/bourne" "1.x.x"
+    "@hapi/hoek" "8.x.x"
+    "@hapi/topo" "3.x.x"
+
+"@hapi/topo@3.x.x":
+  version "3.1.6"
+  resolved "https://registry.yarnpkg.com/@hapi/topo/-/topo-3.1.6.tgz#68d935fa3eae7fdd5ab0d7f953f3205d8b2bfc29"
+  integrity sha512-tAag0jEcjwH+P2quUfipd7liWCNX2F8NvYjQp2wtInsZxnMlypdw0FtAOLxtvvkO+GSRRbmNi8m/5y42PQJYCQ==
+  dependencies:
+    "@hapi/hoek" "^8.3.0"
+
+"@istanbuljs/load-nyc-config@^1.0.0":
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz#fd3db1d59ecf7cf121e80650bb86712f9b55eced"
+  integrity sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==
+  dependencies:
+    camelcase "^5.3.1"
+    find-up "^4.1.0"
+    get-package-type "^0.1.0"
+    js-yaml "^3.13.1"
+    resolve-from "^5.0.0"
+
+"@istanbuljs/schema@^0.1.2":
+  version "0.1.3"
+  resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98"
+  integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==
+
+"@jest/console@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/console/-/console-26.6.2.tgz#4e04bc464014358b03ab4937805ee36a0aeb98f2"
+  integrity sha512-IY1R2i2aLsLr7Id3S6p2BA82GNWryt4oSvEXLAKc+L2zdi89dSkE8xC1C+0kpATG4JhBJREnQOH7/zmccM2B0g==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    jest-message-util "^26.6.2"
+    jest-util "^26.6.2"
+    slash "^3.0.0"
+
+"@jest/core@^26.6.0", "@jest/core@^26.6.3":
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/@jest/core/-/core-26.6.3.tgz#7639fcb3833d748a4656ada54bde193051e45fad"
+  integrity sha512-xvV1kKbhfUqFVuZ8Cyo+JPpipAHHAV3kcDBftiduK8EICXmTFddryy3P7NfZt8Pv37rA9nEJBKCCkglCPt/Xjw==
+  dependencies:
+    "@jest/console" "^26.6.2"
+    "@jest/reporters" "^26.6.2"
+    "@jest/test-result" "^26.6.2"
+    "@jest/transform" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    ansi-escapes "^4.2.1"
+    chalk "^4.0.0"
+    exit "^0.1.2"
+    graceful-fs "^4.2.4"
+    jest-changed-files "^26.6.2"
+    jest-config "^26.6.3"
+    jest-haste-map "^26.6.2"
+    jest-message-util "^26.6.2"
+    jest-regex-util "^26.0.0"
+    jest-resolve "^26.6.2"
+    jest-resolve-dependencies "^26.6.3"
+    jest-runner "^26.6.3"
+    jest-runtime "^26.6.3"
+    jest-snapshot "^26.6.2"
+    jest-util "^26.6.2"
+    jest-validate "^26.6.2"
+    jest-watcher "^26.6.2"
+    micromatch "^4.0.2"
+    p-each-series "^2.1.0"
+    rimraf "^3.0.0"
+    slash "^3.0.0"
+    strip-ansi "^6.0.0"
+
+"@jest/environment@^26.6.0", "@jest/environment@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-26.6.2.tgz#ba364cc72e221e79cc8f0a99555bf5d7577cf92c"
+  integrity sha512-nFy+fHl28zUrRsCeMB61VDThV1pVTtlEokBRgqPrcT1JNq4yRNIyTHfyht6PqtUvY9IsuLGTrbG8kPXjSZIZwA==
+  dependencies:
+    "@jest/fake-timers" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    jest-mock "^26.6.2"
+
+"@jest/fake-timers@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-26.6.2.tgz#459c329bcf70cee4af4d7e3f3e67848123535aad"
+  integrity sha512-14Uleatt7jdzefLPYM3KLcnUl1ZNikaKq34enpb5XG9i81JpppDb5muZvonvKyrl7ftEHkKS5L5/eB/kxJ+bvA==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    "@sinonjs/fake-timers" "^6.0.1"
+    "@types/node" "*"
+    jest-message-util "^26.6.2"
+    jest-mock "^26.6.2"
+    jest-util "^26.6.2"
+
+"@jest/globals@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-26.6.2.tgz#5b613b78a1aa2655ae908eba638cc96a20df720a"
+  integrity sha512-85Ltnm7HlB/KesBUuALwQ68YTU72w9H2xW9FjZ1eL1U3lhtefjjl5c2MiUbpXt/i6LaPRvoOFJ22yCBSfQ0JIA==
+  dependencies:
+    "@jest/environment" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    expect "^26.6.2"
+
+"@jest/reporters@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-26.6.2.tgz#1f518b99637a5f18307bd3ecf9275f6882a667f6"
+  integrity sha512-h2bW53APG4HvkOnVMo8q3QXa6pcaNt1HkwVsOPMBV6LD/q9oSpxNSYZQYkAnjdMjrJ86UuYeLo+aEZClV6opnw==
+  dependencies:
+    "@bcoe/v8-coverage" "^0.2.3"
+    "@jest/console" "^26.6.2"
+    "@jest/test-result" "^26.6.2"
+    "@jest/transform" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    chalk "^4.0.0"
+    collect-v8-coverage "^1.0.0"
+    exit "^0.1.2"
+    glob "^7.1.2"
+    graceful-fs "^4.2.4"
+    istanbul-lib-coverage "^3.0.0"
+    istanbul-lib-instrument "^4.0.3"
+    istanbul-lib-report "^3.0.0"
+    istanbul-lib-source-maps "^4.0.0"
+    istanbul-reports "^3.0.2"
+    jest-haste-map "^26.6.2"
+    jest-resolve "^26.6.2"
+    jest-util "^26.6.2"
+    jest-worker "^26.6.2"
+    slash "^3.0.0"
+    source-map "^0.6.0"
+    string-length "^4.0.1"
+    terminal-link "^2.0.0"
+    v8-to-istanbul "^7.0.0"
+  optionalDependencies:
+    node-notifier "^8.0.0"
+
+"@jest/source-map@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-26.6.2.tgz#29af5e1e2e324cafccc936f218309f54ab69d535"
+  integrity sha512-YwYcCwAnNmOVsZ8mr3GfnzdXDAl4LaenZP5z+G0c8bzC9/dugL8zRmxZzdoTl4IaS3CryS1uWnROLPFmb6lVvA==
+  dependencies:
+    callsites "^3.0.0"
+    graceful-fs "^4.2.4"
+    source-map "^0.6.0"
+
+"@jest/test-result@^26.6.0", "@jest/test-result@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-26.6.2.tgz#55da58b62df134576cc95476efa5f7949e3f5f18"
+  integrity sha512-5O7H5c/7YlojphYNrK02LlDIV2GNPYisKwHm2QTKjNZeEzezCbwYs9swJySv2UfPMyZ0VdsmMv7jIlD/IKYQpQ==
+  dependencies:
+    "@jest/console" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/istanbul-lib-coverage" "^2.0.0"
+    collect-v8-coverage "^1.0.0"
+
+"@jest/test-sequencer@^26.6.3":
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-26.6.3.tgz#98e8a45100863886d074205e8ffdc5a7eb582b17"
+  integrity sha512-YHlVIjP5nfEyjlrSr8t/YdNfU/1XEt7c5b4OxcXCjyRhjzLYu/rO69/WHPuYcbCWkz8kAeZVZp2N2+IOLLEPGw==
+  dependencies:
+    "@jest/test-result" "^26.6.2"
+    graceful-fs "^4.2.4"
+    jest-haste-map "^26.6.2"
+    jest-runner "^26.6.3"
+    jest-runtime "^26.6.3"
+
+"@jest/transform@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-26.6.2.tgz#5ac57c5fa1ad17b2aae83e73e45813894dcf2e4b"
+  integrity sha512-E9JjhUgNzvuQ+vVAL21vlyfy12gP0GhazGgJC4h6qUt1jSdUXGWJ1wfu/X7Sd8etSgxV4ovT1pb9v5D6QW4XgA==
+  dependencies:
+    "@babel/core" "^7.1.0"
+    "@jest/types" "^26.6.2"
+    babel-plugin-istanbul "^6.0.0"
+    chalk "^4.0.0"
+    convert-source-map "^1.4.0"
+    fast-json-stable-stringify "^2.0.0"
+    graceful-fs "^4.2.4"
+    jest-haste-map "^26.6.2"
+    jest-regex-util "^26.0.0"
+    jest-util "^26.6.2"
+    micromatch "^4.0.2"
+    pirates "^4.0.1"
+    slash "^3.0.0"
+    source-map "^0.6.1"
+    write-file-atomic "^3.0.0"
+
+"@jest/types@^26.6.0", "@jest/types@^26.6.2":
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/@jest/types/-/types-26.6.2.tgz#bef5a532030e1d88a2f5a6d933f84e97226ed48e"
+  integrity sha512-fC6QCp7Sc5sX6g8Tvbmj4XUTbyrik0akgRy03yjXbQaBWWNWGE7SGtJk98m0N8nzegD/7SggrUlivxo5ax4KWQ==
+  dependencies:
+    "@types/istanbul-lib-coverage" "^2.0.0"
+    "@types/istanbul-reports" "^3.0.0"
+    "@types/node" "*"
+    "@types/yargs" "^15.0.0"
+    chalk "^4.0.0"
+
+"@nodelib/fs.scandir@2.1.4":
+  version "2.1.4"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz#d4b3549a5db5de2683e0c1071ab4f140904bbf69"
+  integrity sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==
+  dependencies:
+    "@nodelib/fs.stat" "2.0.4"
+    run-parallel "^1.1.9"
+
+"@nodelib/fs.stat@2.0.4", "@nodelib/fs.stat@^2.0.2":
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz#a3f2dd61bab43b8db8fa108a121cfffe4c676655"
+  integrity sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==
+
+"@nodelib/fs.walk@^1.2.3":
+  version "1.2.6"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz#cce9396b30aa5afe9e3756608f5831adcb53d063"
+  integrity sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==
+  dependencies:
+    "@nodelib/fs.scandir" "2.1.4"
+    fastq "^1.6.0"
+
+"@npmcli/move-file@^1.0.1":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-1.1.2.tgz#1a82c3e372f7cae9253eb66d72543d6b8685c674"
+  integrity sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg==
+  dependencies:
+    mkdirp "^1.0.4"
+    rimraf "^3.0.2"
+
+"@pmmmwh/react-refresh-webpack-plugin@0.4.3":
+  version "0.4.3"
+  resolved "https://registry.yarnpkg.com/@pmmmwh/react-refresh-webpack-plugin/-/react-refresh-webpack-plugin-0.4.3.tgz#1eec460596d200c0236bf195b078a5d1df89b766"
+  integrity sha512-br5Qwvh8D2OQqSXpd1g/xqXKnK0r+Jz6qVKBbWmpUcrbGOxUrf39V5oZ1876084CGn18uMdR5uvPqBv9UqtBjQ==
+  dependencies:
+    ansi-html "^0.0.7"
+    error-stack-parser "^2.0.6"
+    html-entities "^1.2.1"
+    native-url "^0.2.6"
+    schema-utils "^2.6.5"
+    source-map "^0.7.3"
+
+"@popperjs/core@2.4.4":
+  version "2.4.4"
+  resolved "https://registry.yarnpkg.com/@popperjs/core/-/core-2.4.4.tgz#11d5db19bd178936ec89cd84519c4de439574398"
+  integrity sha512-1oO6+dN5kdIA3sKPZhRGJTfGVP4SWV6KqlMOwry4J3HfyD68sl/3KmG7DeYUzvN+RbhXDnv/D8vNNB8168tAMg==
+
+"@reach/alert@0.13.0":
+  version "0.13.0"
+  resolved "https://registry.yarnpkg.com/@reach/alert/-/alert-0.13.0.tgz#1f67b389f49af61286ef03a84f5a57bd3503dadf"
+  integrity sha512-5lpgRnlQ0JHBsRTPfKjD9aFPDZuLcaxTgD5PXdSLb+1CU8WgNbcy+7qSjqnu1uzWS2pQenIEBViV5wGpt63ADw==
+  dependencies:
+    "@reach/utils" "0.13.0"
+    "@reach/visually-hidden" "0.13.0"
+    prop-types "^15.7.2"
+    tslib "^2.0.0"
+
+"@reach/utils@0.13.0":
+  version "0.13.0"
+  resolved "https://registry.yarnpkg.com/@reach/utils/-/utils-0.13.0.tgz#2da775a910d8894bb34e1e94fe95842674f71844"
+  integrity sha512-dypxuyA1Qy3LHxzzyS7jFGPgCCR04b8UEn+Tv/aj6y9V578dULQqkcCyobrdEa+OI8lxH7dFFHa+jH8M/noBrQ==
+  dependencies:
+    "@types/warning" "^3.0.0"
+    tslib "^2.0.0"
+    warning "^4.0.3"
+
+"@reach/visually-hidden@0.13.0":
+  version "0.13.0"
+  resolved "https://registry.yarnpkg.com/@reach/visually-hidden/-/visually-hidden-0.13.0.tgz#cace36d9bb80ffb797374fcaea989391b881038f"
+  integrity sha512-LF11WL9/495Q3d86xNy0VO6ylPI6SqF2xZGg9jpZSXLbFKpQ5Bf0qC7DOJfSf+/yb9WgPgB4m+a48Fz8AO6oZA==
+  dependencies:
+    tslib "^2.0.0"
+
+"@rollup/plugin-node-resolve@^7.1.1":
+  version "7.1.3"
+  resolved "https://registry.yarnpkg.com/@rollup/plugin-node-resolve/-/plugin-node-resolve-7.1.3.tgz#80de384edfbd7bfc9101164910f86078151a3eca"
+  integrity sha512-RxtSL3XmdTAE2byxekYLnx+98kEUOrPHF/KRVjLH+DEIHy6kjIw7YINQzn+NXiH/NTrQLAwYs0GWB+csWygA9Q==
+  dependencies:
+    "@rollup/pluginutils" "^3.0.8"
+    "@types/resolve" "0.0.8"
+    builtin-modules "^3.1.0"
+    is-module "^1.0.0"
+    resolve "^1.14.2"
+
+"@rollup/plugin-replace@^2.3.1":
+  version "2.4.1"
+  resolved "https://registry.yarnpkg.com/@rollup/plugin-replace/-/plugin-replace-2.4.1.tgz#c411b5ab72809fb1bfc8b487d8d02eef661460d3"
+  integrity sha512-XwC1oK5rrtRJ0tn1ioLHS6OV5JTluJF7QE1J/q1hN3bquwjnVxjtMyY9iCnoyH9DQbf92CxajB3o98wZbP3oAQ==
+  dependencies:
+    "@rollup/pluginutils" "^3.1.0"
+    magic-string "^0.25.7"
+
+"@rollup/pluginutils@^3.0.8", "@rollup/pluginutils@^3.1.0":
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/@rollup/pluginutils/-/pluginutils-3.1.0.tgz#706b4524ee6dc8b103b3c995533e5ad680c02b9b"
+  integrity sha512-GksZ6pr6TpIjHm8h9lSQ8pi8BE9VeubNT0OMJ3B5uZJ8pz73NPiqOtCog/x2/QzM1ENChPKxMDhiQuRHsqc+lg==
+  dependencies:
+    "@types/estree" "0.0.39"
+    estree-walker "^1.0.1"
+    picomatch "^2.2.2"
+
+"@sinonjs/commons@^1.7.0":
+  version "1.8.2"
+  resolved "https://registry.yarnpkg.com/@sinonjs/commons/-/commons-1.8.2.tgz#858f5c4b48d80778fde4b9d541f27edc0d56488b"
+  integrity sha512-sruwd86RJHdsVf/AtBoijDmUqJp3B6hF/DGC23C+JaegnDHaZyewCjoVGTdg3J0uz3Zs7NnIT05OBOmML72lQw==
+  dependencies:
+    type-detect "4.0.8"
+
+"@sinonjs/fake-timers@^6.0.1":
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-6.0.1.tgz#293674fccb3262ac782c7aadfdeca86b10c75c40"
+  integrity sha512-MZPUxrmFubI36XS1DI3qmI0YdN1gks62JtFZvxR67ljjSNCeK6U08Zx4msEWOXuofgqUt6zPHSi1H9fbjR/NRA==
+  dependencies:
+    "@sinonjs/commons" "^1.7.0"
+
+"@surma/rollup-plugin-off-main-thread@^1.1.1":
+  version "1.4.2"
+  resolved "https://registry.yarnpkg.com/@surma/rollup-plugin-off-main-thread/-/rollup-plugin-off-main-thread-1.4.2.tgz#e6786b6af5799f82f7ab3a82e53f6182d2b91a58"
+  integrity sha512-yBMPqmd1yEJo/280PAMkychuaALyQ9Lkb5q1ck3mjJrFuEobIfhnQ4J3mbvBoISmR3SWMWV+cGB/I0lCQee79A==
+  dependencies:
+    ejs "^2.6.1"
+    magic-string "^0.25.0"
+
+"@svgr/babel-plugin-add-jsx-attribute@^5.4.0":
+  version "5.4.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-add-jsx-attribute/-/babel-plugin-add-jsx-attribute-5.4.0.tgz#81ef61947bb268eb9d50523446f9c638fb355906"
+  integrity sha512-ZFf2gs/8/6B8PnSofI0inYXr2SDNTDScPXhN7k5EqD4aZ3gi6u+rbmZHVB8IM3wDyx8ntKACZbtXSm7oZGRqVg==
+
+"@svgr/babel-plugin-remove-jsx-attribute@^5.4.0":
+  version "5.4.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-remove-jsx-attribute/-/babel-plugin-remove-jsx-attribute-5.4.0.tgz#6b2c770c95c874654fd5e1d5ef475b78a0a962ef"
+  integrity sha512-yaS4o2PgUtwLFGTKbsiAy6D0o3ugcUhWK0Z45umJ66EPWunAz9fuFw2gJuje6wqQvQWOTJvIahUwndOXb7QCPg==
+
+"@svgr/babel-plugin-remove-jsx-empty-expression@^5.0.1":
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-remove-jsx-empty-expression/-/babel-plugin-remove-jsx-empty-expression-5.0.1.tgz#25621a8915ed7ad70da6cea3d0a6dbc2ea933efd"
+  integrity sha512-LA72+88A11ND/yFIMzyuLRSMJ+tRKeYKeQ+mR3DcAZ5I4h5CPWN9AHyUzJbWSYp/u2u0xhmgOe0+E41+GjEueA==
+
+"@svgr/babel-plugin-replace-jsx-attribute-value@^5.0.1":
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-replace-jsx-attribute-value/-/babel-plugin-replace-jsx-attribute-value-5.0.1.tgz#0b221fc57f9fcd10e91fe219e2cd0dd03145a897"
+  integrity sha512-PoiE6ZD2Eiy5mK+fjHqwGOS+IXX0wq/YDtNyIgOrc6ejFnxN4b13pRpiIPbtPwHEc+NT2KCjteAcq33/F1Y9KQ==
+
+"@svgr/babel-plugin-svg-dynamic-title@^5.4.0":
+  version "5.4.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-svg-dynamic-title/-/babel-plugin-svg-dynamic-title-5.4.0.tgz#139b546dd0c3186b6e5db4fefc26cb0baea729d7"
+  integrity sha512-zSOZH8PdZOpuG1ZVx/cLVePB2ibo3WPpqo7gFIjLV9a0QsuQAzJiwwqmuEdTaW2pegyBE17Uu15mOgOcgabQZg==
+
+"@svgr/babel-plugin-svg-em-dimensions@^5.4.0":
+  version "5.4.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-svg-em-dimensions/-/babel-plugin-svg-em-dimensions-5.4.0.tgz#6543f69526632a133ce5cabab965deeaea2234a0"
+  integrity sha512-cPzDbDA5oT/sPXDCUYoVXEmm3VIoAWAPT6mSPTJNbQaBNUuEKVKyGH93oDY4e42PYHRW67N5alJx/eEol20abw==
+
+"@svgr/babel-plugin-transform-react-native-svg@^5.4.0":
+  version "5.4.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-transform-react-native-svg/-/babel-plugin-transform-react-native-svg-5.4.0.tgz#00bf9a7a73f1cad3948cdab1f8dfb774750f8c80"
+  integrity sha512-3eYP/SaopZ41GHwXma7Rmxcv9uRslRDTY1estspeB1w1ueZWd/tPlMfEOoccYpEMZU3jD4OU7YitnXcF5hLW2Q==
+
+"@svgr/babel-plugin-transform-svg-component@^5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-transform-svg-component/-/babel-plugin-transform-svg-component-5.5.0.tgz#583a5e2a193e214da2f3afeb0b9e8d3250126b4a"
+  integrity sha512-q4jSH1UUvbrsOtlo/tKcgSeiCHRSBdXoIoqX1pgcKK/aU3JD27wmMKwGtpB8qRYUYoyXvfGxUVKchLuR5pB3rQ==
+
+"@svgr/babel-preset@^5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/babel-preset/-/babel-preset-5.5.0.tgz#8af54f3e0a8add7b1e2b0fcd5a882c55393df327"
+  integrity sha512-4FiXBjvQ+z2j7yASeGPEi8VD/5rrGQk4Xrq3EdJmoZgz/tpqChpo5hgXDvmEauwtvOc52q8ghhZK4Oy7qph4ig==
+  dependencies:
+    "@svgr/babel-plugin-add-jsx-attribute" "^5.4.0"
+    "@svgr/babel-plugin-remove-jsx-attribute" "^5.4.0"
+    "@svgr/babel-plugin-remove-jsx-empty-expression" "^5.0.1"
+    "@svgr/babel-plugin-replace-jsx-attribute-value" "^5.0.1"
+    "@svgr/babel-plugin-svg-dynamic-title" "^5.4.0"
+    "@svgr/babel-plugin-svg-em-dimensions" "^5.4.0"
+    "@svgr/babel-plugin-transform-react-native-svg" "^5.4.0"
+    "@svgr/babel-plugin-transform-svg-component" "^5.5.0"
+
+"@svgr/core@^5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/core/-/core-5.5.0.tgz#82e826b8715d71083120fe8f2492ec7d7874a579"
+  integrity sha512-q52VOcsJPvV3jO1wkPtzTuKlvX7Y3xIcWRpCMtBF3MrteZJtBfQw/+u0B1BHy5ColpQc1/YVTrPEtSYIMNZlrQ==
+  dependencies:
+    "@svgr/plugin-jsx" "^5.5.0"
+    camelcase "^6.2.0"
+    cosmiconfig "^7.0.0"
+
+"@svgr/hast-util-to-babel-ast@^5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/hast-util-to-babel-ast/-/hast-util-to-babel-ast-5.5.0.tgz#5ee52a9c2533f73e63f8f22b779f93cd432a5461"
+  integrity sha512-cAaR/CAiZRB8GP32N+1jocovUtvlj0+e65TB50/6Lcime+EA49m/8l+P2ko+XPJ4dw3xaPS3jOL4F2X4KWxoeQ==
+  dependencies:
+    "@babel/types" "^7.12.6"
+
+"@svgr/plugin-jsx@^5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/plugin-jsx/-/plugin-jsx-5.5.0.tgz#1aa8cd798a1db7173ac043466d7b52236b369000"
+  integrity sha512-V/wVh33j12hGh05IDg8GpIUXbjAPnTdPTKuP4VNLggnwaHMPNQNae2pRnyTAILWCQdz5GyMqtO488g7CKM8CBA==
+  dependencies:
+    "@babel/core" "^7.12.3"
+    "@svgr/babel-preset" "^5.5.0"
+    "@svgr/hast-util-to-babel-ast" "^5.5.0"
+    svg-parser "^2.0.2"
+
+"@svgr/plugin-svgo@^5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/plugin-svgo/-/plugin-svgo-5.5.0.tgz#02da55d85320549324e201c7b2e53bf431fcc246"
+  integrity sha512-r5swKk46GuQl4RrVejVwpeeJaydoxkdwkM1mBKOgJLBUJPGaLci6ylg/IjhrRsREKDkr4kbMWdgOtbXEh0fyLQ==
+  dependencies:
+    cosmiconfig "^7.0.0"
+    deepmerge "^4.2.2"
+    svgo "^1.2.2"
+
+"@svgr/webpack@5.5.0":
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/@svgr/webpack/-/webpack-5.5.0.tgz#aae858ee579f5fa8ce6c3166ef56c6a1b381b640"
+  integrity sha512-DOBOK255wfQxguUta2INKkzPj6AIS6iafZYiYmHn6W3pHlycSRRlvWKCfLDG10fXfLWqE3DJHgRUOyJYmARa7g==
+  dependencies:
+    "@babel/core" "^7.12.3"
+    "@babel/plugin-transform-react-constant-elements" "^7.12.1"
+    "@babel/preset-env" "^7.12.1"
+    "@babel/preset-react" "^7.12.5"
+    "@svgr/core" "^5.5.0"
+    "@svgr/plugin-jsx" "^5.5.0"
+    "@svgr/plugin-svgo" "^5.5.0"
+    loader-utils "^2.0.0"
+
+"@testing-library/dom@^7.28.1":
+  version "7.29.6"
+  resolved "https://registry.yarnpkg.com/@testing-library/dom/-/dom-7.29.6.tgz#eb37844fb431186db7960a7ff6749ea65a19617c"
+  integrity sha512-vzTsAXa439ptdvav/4lsKRcGpAQX7b6wBIqia7+iNzqGJ5zjswApxA6jDAsexrc6ue9krWcbh8o+LYkBXW+GCQ==
+  dependencies:
+    "@babel/code-frame" "^7.10.4"
+    "@babel/runtime" "^7.12.5"
+    "@types/aria-query" "^4.2.0"
+    aria-query "^4.2.2"
+    chalk "^4.1.0"
+    dom-accessibility-api "^0.5.4"
+    lz-string "^1.4.4"
+    pretty-format "^26.6.2"
+
+"@testing-library/jest-dom@^5.11.4":
+  version "5.11.9"
+  resolved "https://registry.yarnpkg.com/@testing-library/jest-dom/-/jest-dom-5.11.9.tgz#e6b3cd687021f89f261bd53cbe367041fbd3e975"
+  integrity sha512-Mn2gnA9d1wStlAIT2NU8J15LNob0YFBVjs2aEQ3j8rsfRQo+lAs7/ui1i2TGaJjapLmuNPLTsrm+nPjmZDwpcQ==
+  dependencies:
+    "@babel/runtime" "^7.9.2"
+    "@types/testing-library__jest-dom" "^5.9.1"
+    aria-query "^4.2.2"
+    chalk "^3.0.0"
+    css "^3.0.0"
+    css.escape "^1.5.1"
+    lodash "^4.17.15"
+    redent "^3.0.0"
+
+"@testing-library/react@^11.1.0":
+  version "11.2.5"
+  resolved "https://registry.yarnpkg.com/@testing-library/react/-/react-11.2.5.tgz#ae1c36a66c7790ddb6662c416c27863d87818eb9"
+  integrity sha512-yEx7oIa/UWLe2F2dqK0FtMF9sJWNXD+2PPtp39BvE0Kh9MJ9Kl0HrZAgEuhUJR+Lx8Di6Xz+rKwSdEPY2UV8ZQ==
+  dependencies:
+    "@babel/runtime" "^7.12.5"
+    "@testing-library/dom" "^7.28.1"
+
+"@testing-library/user-event@^12.1.10":
+  version "12.7.3"
+  resolved "https://registry.yarnpkg.com/@testing-library/user-event/-/user-event-12.7.3.tgz#ef674ccb91794e52123b3532c336485d16f453b3"
+  integrity sha512-IdSHkWfbeSSJRFlldvHDWfVX0U18TbXIvLSGII+JbqkJrsflFr4OWlQIua0TvcVVJNna3BNrNvRSvpQ0yvSXlA==
+  dependencies:
+    "@babel/runtime" "^7.12.5"
+
+"@types/anymatch@*":
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/@types/anymatch/-/anymatch-1.3.1.tgz#336badc1beecb9dacc38bea2cf32adf627a8421a"
+  integrity sha512-/+CRPXpBDpo2RK9C68N3b2cOvO0Cf5B9aPijHsoDQTHivnGSObdOF2BRQOYjojWTDy6nQvMjmqRXIxH55VjxxA==
+
+"@types/aria-query@^4.2.0":
+  version "4.2.1"
+  resolved "https://registry.yarnpkg.com/@types/aria-query/-/aria-query-4.2.1.tgz#78b5433344e2f92e8b306c06a5622c50c245bf6b"
+  integrity sha512-S6oPal772qJZHoRZLFc/XoZW2gFvwXusYUmXPXkgxJLuEk2vOt7jc4Yo6z/vtI0EBkbPBVrJJ0B+prLIKiWqHg==
+
+"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.7":
+  version "7.1.12"
+  resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.12.tgz#4d8e9e51eb265552a7e4f1ff2219ab6133bdfb2d"
+  integrity sha512-wMTHiiTiBAAPebqaPiPDLFA4LYPKr6Ph0Xq/6rq1Ur3v66HXyG+clfR9CNETkD7MQS8ZHvpQOtA53DLws5WAEQ==
+  dependencies:
+    "@babel/parser" "^7.1.0"
+    "@babel/types" "^7.0.0"
+    "@types/babel__generator" "*"
+    "@types/babel__template" "*"
+    "@types/babel__traverse" "*"
+
+"@types/babel__generator@*":
+  version "7.6.2"
+  resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.2.tgz#f3d71178e187858f7c45e30380f8f1b7415a12d8"
+  integrity sha512-MdSJnBjl+bdwkLskZ3NGFp9YcXGx5ggLpQQPqtgakVhsWK0hTtNYhjpZLlWQTviGTvF8at+Bvli3jV7faPdgeQ==
+  dependencies:
+    "@babel/types" "^7.0.0"
+
+"@types/babel__template@*":
+  version "7.4.0"
+  resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.4.0.tgz#0c888dd70b3ee9eebb6e4f200e809da0076262be"
+  integrity sha512-NTPErx4/FiPCGScH7foPyr+/1Dkzkni+rHiYHHoTjvwou7AQzJkNeD60A9CXRy+ZEN2B1bggmkTMCDb+Mv5k+A==
+  dependencies:
+    "@babel/parser" "^7.1.0"
+    "@babel/types" "^7.0.0"
+
+"@types/babel__traverse@*", "@types/babel__traverse@^7.0.4", "@types/babel__traverse@^7.0.6":
+  version "7.11.0"
+  resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.11.0.tgz#b9a1efa635201ba9bc850323a8793ee2d36c04a0"
+  integrity sha512-kSjgDMZONiIfSH1Nxcr5JIRMwUetDki63FSQfpTCz8ogF3Ulqm8+mr5f78dUYs6vMiB6gBusQqfQmBvHZj/lwg==
+  dependencies:
+    "@babel/types" "^7.3.0"
+
+"@types/eslint@^7.2.6":
+  version "7.2.6"
+  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.6.tgz#5e9aff555a975596c03a98b59ecd103decc70c3c"
+  integrity sha512-I+1sYH+NPQ3/tVqCeUSBwTE/0heyvtXqpIopUUArlBm0Kpocb8FbMa3AZ/ASKIFpN3rnEx932TTXDbt9OXsNDw==
+  dependencies:
+    "@types/estree" "*"
+    "@types/json-schema" "*"
+
+"@types/estree@*":
+  version "0.0.46"
+  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.46.tgz#0fb6bfbbeabd7a30880504993369c4bf1deab1fe"
+  integrity sha512-laIjwTQaD+5DukBZaygQ79K1Z0jb1bPEMRrkXSLjtCcZm+abyp5YbrqpSLzD42FwWW6gK/aS4NYpJ804nG2brg==
+
+"@types/estree@0.0.39":
+  version "0.0.39"
+  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.39.tgz#e177e699ee1b8c22d23174caaa7422644389509f"
+  integrity sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw==
+
+"@types/glob@^7.1.1":
+  version "7.1.3"
+  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.3.tgz#e6ba80f36b7daad2c685acd9266382e68985c183"
+  integrity sha512-SEYeGAIQIQX8NN6LDKprLjbrd5dARM5EXsd8GI/A5l0apYI1fGMWgPHSe4ZKL4eozlAyI+doUE9XbYS4xCkQ1w==
+  dependencies:
+    "@types/minimatch" "*"
+    "@types/node" "*"
+
+"@types/graceful-fs@^4.1.2":
+  version "4.1.5"
+  resolved "https://registry.yarnpkg.com/@types/graceful-fs/-/graceful-fs-4.1.5.tgz#21ffba0d98da4350db64891f92a9e5db3cdb4e15"
+  integrity sha512-anKkLmZZ+xm4p8JWBf4hElkM4XR+EZeA2M9BAkkTldmcyDY4mbdIJnRghDJH3Ov5ooY7/UAoENtmdMSkaAd7Cw==
+  dependencies:
+    "@types/node" "*"
+
+"@types/html-minifier-terser@^5.0.0":
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/@types/html-minifier-terser/-/html-minifier-terser-5.1.1.tgz#3c9ee980f1a10d6021ae6632ca3e79ca2ec4fb50"
+  integrity sha512-giAlZwstKbmvMk1OO7WXSj4OZ0keXAcl2TQq4LWHiiPH2ByaH7WeUzng+Qej8UPxxv+8lRTuouo0iaNDBuzIBA==
+
+"@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0", "@types/istanbul-lib-coverage@^2.0.1":
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.3.tgz#4ba8ddb720221f432e443bd5f9117fd22cfd4762"
+  integrity sha512-sz7iLqvVUg1gIedBOvlkxPlc8/uVzyS5OwGz1cKjXzkl3FpL3al0crU8YGU1WoHkxn0Wxbw5tyi6hvzJKNzFsw==
+
+"@types/istanbul-lib-report@*":
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#c14c24f18ea8190c118ee7562b7ff99a36552686"
+  integrity sha512-plGgXAPfVKFoYfa9NpYDAkseG+g6Jr294RqeqcqDixSbU34MZVJRi/P+7Y8GDpzkEwLaGZZOpKIEmeVZNtKsrg==
+  dependencies:
+    "@types/istanbul-lib-coverage" "*"
+
+"@types/istanbul-reports@^3.0.0":
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.0.tgz#508b13aa344fa4976234e75dddcc34925737d821"
+  integrity sha512-nwKNbvnwJ2/mndE9ItP/zc2TCzw6uuodnF4EHYWD+gCQDVBuRQL5UzbZD0/ezy1iKsFU2ZQiDqg4M9dN4+wZgA==
+  dependencies:
+    "@types/istanbul-lib-report" "*"
+
+"@types/jest@*", "@types/jest@^26.0.15":
+  version "26.0.20"
+  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.20.tgz#cd2f2702ecf69e86b586e1f5223a60e454056307"
+  integrity sha512-9zi2Y+5USJRxd0FsahERhBwlcvFh6D2GLQnY2FH2BzK8J9s9omvNHIbvABwIluXa0fD8XVKMLTO0aOEuUfACAA==
+  dependencies:
+    jest-diff "^26.0.0"
+    pretty-format "^26.0.0"
+
+"@types/json-schema@*", "@types/json-schema@^7.0.3", "@types/json-schema@^7.0.5", "@types/json-schema@^7.0.6":
+  version "7.0.7"
+  resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
+  integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
+
+"@types/json5@^0.0.29":
+  version "0.0.29"
+  resolved "https://registry.yarnpkg.com/@types/json5/-/json5-0.0.29.tgz#ee28707ae94e11d2b827bcbe5270bcea7f3e71ee"
+  integrity sha1-7ihweulOEdK4J7y+UnC86n8+ce4=
+
+"@types/lodash.mergewith@4.6.6":
+  version "4.6.6"
+  resolved "https://registry.yarnpkg.com/@types/lodash.mergewith/-/lodash.mergewith-4.6.6.tgz#c4698f5b214a433ff35cb2c75ee6ec7f99d79f10"
+  integrity sha512-RY/8IaVENjG19rxTZu9Nukqh0W2UrYgmBj5sdns4hWRZaV8PqR7wIKHFKzvOTjo4zVRV7sVI+yFhAJql12Kfqg==
+  dependencies:
+    "@types/lodash" "*"
+
+"@types/lodash@*":
+  version "4.14.168"
+  resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.168.tgz#fe24632e79b7ade3f132891afff86caa5e5ce008"
+  integrity sha512-oVfRvqHV/V6D1yifJbVRU3TMp8OT6o6BG+U9MkwuJ3U8/CsDHvalRpsxBqivn71ztOFZBTfJMvETbqHiaNSj7Q==
+
+"@types/minimatch@*":
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d"
+  integrity sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA==
+
+"@types/node@*":
+  version "14.14.31"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.31.tgz#72286bd33d137aa0d152d47ec7c1762563d34055"
+  integrity sha512-vFHy/ezP5qI0rFgJ7aQnjDXwAMrG0KqqIH7tQG5PPv3BWBayOPIQNBjVc/P6hhdZfMx51REc6tfDNXHUio893g==
+
+"@types/node@^12.0.0":
+  version "12.20.4"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.4.tgz#73687043dd00fcb6962c60fbf499553a24d6bdf2"
+  integrity sha512-xRCgeE0Q4pT5UZ189TJ3SpYuX/QGl6QIAOAIeDSbAVAd2gX1NxSZup4jNVK7cxIeP8KDSbJgcckun495isP1jQ==
+
+"@types/normalize-package-data@^2.4.0":
+  version "2.4.0"
+  resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
+  integrity sha512-f5j5b/Gf71L+dbqxIpQ4Z2WlmI/mPJ0fOkGGmFgtb6sAu97EPczzbS3/tJKxmcYDj55OX6ssqwDAWOHIYDRDGA==
+
+"@types/object-assign@4.0.30":
+  version "4.0.30"
+  resolved "https://registry.yarnpkg.com/@types/object-assign/-/object-assign-4.0.30.tgz#8949371d5a99f4381ee0f1df0a9b7a187e07e652"
+  integrity sha1-iUk3HVqZ9Dge4PHfCpt6GH4H5lI=
+
+"@types/parse-json@^4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0"
+  integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==
+
+"@types/prettier@^2.0.0":
+  version "2.2.1"
+  resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.2.1.tgz#374e31645d58cb18a07b3ecd8e9dede4deb2cccd"
+  integrity sha512-DxZZbyMAM9GWEzXL+BMZROWz9oo6A9EilwwOMET2UVu2uZTqMWS5S69KVtuVKaRjCUpcrOXRalet86/OpG4kqw==
+
+"@types/prop-types@*":
+  version "15.7.3"
+  resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.3.tgz#2ab0d5da2e5815f94b0b9d4b95d1e5f243ab2ca7"
+  integrity sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==
+
+"@types/q@^1.5.1":
+  version "1.5.4"
+  resolved "https://registry.yarnpkg.com/@types/q/-/q-1.5.4.tgz#15925414e0ad2cd765bfef58842f7e26a7accb24"
+  integrity sha512-1HcDas8SEj4z1Wc696tH56G8OlRaH/sqZOynNNB+HF0WOeXPaxTtbYzJY2oEfiUxjSKjhCKr+MvR7dCHcEelug==
+
+"@types/react-dom@^17.0.0":
+  version "17.0.1"
+  resolved "https://registry.yarnpkg.com/@types/react-dom/-/react-dom-17.0.1.tgz#d92d77d020bfb083e07cc8e0ac9f933599a4d56a"
+  integrity sha512-yIVyopxQb8IDZ7SOHeTovurFq+fXiPICa+GV3gp0Xedsl+MwQlMLKmvrnEjFbQxjliH5YVAEWFh975eVNmKj7Q==
+  dependencies:
+    "@types/react" "*"
+
+"@types/react-table@^7.0.28":
+  version "7.0.28"
+  resolved "https://registry.yarnpkg.com/@types/react-table/-/react-table-7.0.28.tgz#763383c3e7a285892ee64f311ee97a9c254b2bb0"
+  integrity sha512-crPm70S2KYGj3HJ2zCoeT0t8tdIvKDKCClMd1up3Gi/EDiTZraj3JFUsEL3+oXGSyv+n0EGGAf9a+0XsmdGpXA==
+  dependencies:
+    "@types/react" "*"
+
+"@types/react-timeago@^4.1.2":
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/@types/react-timeago/-/react-timeago-4.1.2.tgz#fc365ac4483888e9b47267259416be2fd5cf765f"
+  integrity sha512-gkhU3rH7aZgeRybbm9ie9wHOM9i1I5YhUoto/uqY/DAbeRZuLU8ugl6E97jp65XCl9QTij32Vs7BAX2E/MqOAw==
+  dependencies:
+    "@types/react" "*"
+
+"@types/react@*", "@types/react@^17.0.0":
+  version "17.0.2"
+  resolved "https://registry.yarnpkg.com/@types/react/-/react-17.0.2.tgz#3de24c4efef902dd9795a49c75f760cbe4f7a5a8"
+  integrity sha512-Xt40xQsrkdvjn1EyWe1Bc0dJLcil/9x2vAuW7ya+PuQip4UYUaXyhzWmAbwRsdMgwOFHpfp7/FFZebDU6Y8VHA==
+  dependencies:
+    "@types/prop-types" "*"
+    csstype "^3.0.2"
+
+"@types/resolve@0.0.8":
+  version "0.0.8"
+  resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-0.0.8.tgz#f26074d238e02659e323ce1a13d041eee280e194"
+  integrity sha512-auApPaJf3NPfe18hSoJkp8EbZzer2ISk7o8mCC3M9he/a04+gbMF97NkpD2S8riMGvm4BMRI59/SZQSaLTKpsQ==
+  dependencies:
+    "@types/node" "*"
+
+"@types/source-list-map@*":
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/@types/source-list-map/-/source-list-map-0.1.2.tgz#0078836063ffaf17412349bba364087e0ac02ec9"
+  integrity sha512-K5K+yml8LTo9bWJI/rECfIPrGgxdpeNbj+d53lwN4QjW1MCwlkhUms+gtdzigTeUyBr09+u8BwOIY3MXvHdcsA==
+
+"@types/stack-utils@^2.0.0":
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
+  integrity sha512-RJJrrySY7A8havqpGObOB4W92QXKJo63/jFLLgpvOtsGUqbQZ9Sbgl35KMm1DjC6j7AvmmU2bIno+3IyEaemaw==
+
+"@types/tapable@*", "@types/tapable@^1.0.5":
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/@types/tapable/-/tapable-1.0.6.tgz#a9ca4b70a18b270ccb2bc0aaafefd1d486b7ea74"
+  integrity sha512-W+bw9ds02rAQaMvaLYxAbJ6cvguW/iJXNT6lTssS1ps6QdrMKttqEAMEG/b5CR8TZl3/L7/lH0ZV5nNR1LXikA==
+
+"@types/testing-library__jest-dom@^5.9.1":
+  version "5.9.5"
+  resolved "https://registry.yarnpkg.com/@types/testing-library__jest-dom/-/testing-library__jest-dom-5.9.5.tgz#5bf25c91ad2d7b38f264b12275e5c92a66d849b0"
+  integrity sha512-ggn3ws+yRbOHog9GxnXiEZ/35Mow6YtPZpd7Z5mKDeZS/o7zx3yAle0ov/wjhVB5QT4N2Dt+GNoGCdqkBGCajQ==
+  dependencies:
+    "@types/jest" "*"
+
+"@types/tinycolor2@1.4.2":
+  version "1.4.2"
+  resolved "https://registry.yarnpkg.com/@types/tinycolor2/-/tinycolor2-1.4.2.tgz#721ca5c5d1a2988b4a886e35c2ffc5735b6afbdf"
+  integrity sha512-PeHg/AtdW6aaIO2a+98Xj7rWY4KC1E6yOy7AFknJQ7VXUGNrMlyxDFxJo7HqLtjQms/ZhhQX52mLVW/EX3JGOw==
+
+"@types/uglify-js@*":
+  version "3.12.0"
+  resolved "https://registry.yarnpkg.com/@types/uglify-js/-/uglify-js-3.12.0.tgz#2bb061c269441620d46b946350c8f16d52ef37c5"
+  integrity sha512-sYAF+CF9XZ5cvEBkI7RtrG9g2GtMBkviTnBxYYyq+8BWvO4QtXfwwR6a2LFwCi4evMKZfpv6U43ViYvv17Wz3Q==
+  dependencies:
+    source-map "^0.6.1"
+
+"@types/warning@^3.0.0":
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/@types/warning/-/warning-3.0.0.tgz#0d2501268ad8f9962b740d387c4654f5f8e23e52"
+  integrity sha1-DSUBJorY+ZYrdA04fEZU9fjiPlI=
+
+"@types/webpack-sources@*":
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/@types/webpack-sources/-/webpack-sources-2.1.0.tgz#8882b0bd62d1e0ce62f183d0d01b72e6e82e8c10"
+  integrity sha512-LXn/oYIpBeucgP1EIJbKQ2/4ZmpvRl+dlrFdX7+94SKRUV3Evy3FsfMZY318vGhkWUS5MPhtOM3w1/hCOAOXcg==
+  dependencies:
+    "@types/node" "*"
+    "@types/source-list-map" "*"
+    source-map "^0.7.3"
+
+"@types/webpack@^4.41.8":
+  version "4.41.26"
+  resolved "https://registry.yarnpkg.com/@types/webpack/-/webpack-4.41.26.tgz#27a30d7d531e16489f9c7607c747be6bc1a459ef"
+  integrity sha512-7ZyTfxjCRwexh+EJFwRUM+CDB2XvgHl4vfuqf1ZKrgGvcS5BrNvPQqJh3tsZ0P6h6Aa1qClVHaJZszLPzpqHeA==
+  dependencies:
+    "@types/anymatch" "*"
+    "@types/node" "*"
+    "@types/tapable" "*"
+    "@types/uglify-js" "*"
+    "@types/webpack-sources" "*"
+    source-map "^0.6.0"
+
+"@types/yargs-parser@*":
+  version "20.2.0"
+  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.0.tgz#dd3e6699ba3237f0348cd085e4698780204842f9"
+  integrity sha512-37RSHht+gzzgYeobbG+KWryeAW8J33Nhr69cjTqSYymXVZEN9NbRYWoYlRtDhHKPVT1FyNKwaTPC1NynKZpzRA==
+
+"@types/yargs@^15.0.0":
+  version "15.0.13"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.13.tgz#34f7fec8b389d7f3c1fd08026a5763e072d3c6dc"
+  integrity sha512-kQ5JNTrbDv3Rp5X2n/iUu37IJBDU2gsZ5R/g1/KHOOEc5IKfUFjXT6DENPGduh08I/pamwtEq4oul7gUqKTQDQ==
+  dependencies:
+    "@types/yargs-parser" "*"
+
+"@typescript-eslint/eslint-plugin@^4.5.0":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.15.2.tgz#981b26b4076c62a5a55873fbef3fe98f83360c61"
+  integrity sha512-uiQQeu9tWl3f1+oK0yoAv9lt/KXO24iafxgQTkIYO/kitruILGx3uH+QtIAHqxFV+yIsdnJH+alel9KuE3J15Q==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "4.15.2"
+    "@typescript-eslint/scope-manager" "4.15.2"
+    debug "^4.1.1"
+    functional-red-black-tree "^1.0.1"
+    lodash "^4.17.15"
+    regexpp "^3.0.0"
+    semver "^7.3.2"
+    tsutils "^3.17.1"
+
+"@typescript-eslint/experimental-utils@4.15.2", "@typescript-eslint/experimental-utils@^4.0.1":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.15.2.tgz#5efd12355bd5b535e1831282e6cf465b9a71cf36"
+  integrity sha512-Fxoshw8+R5X3/Vmqwsjc8nRO/7iTysRtDqx6rlfLZ7HbT8TZhPeQqbPjTyk2RheH3L8afumecTQnUc9EeXxohQ==
+  dependencies:
+    "@types/json-schema" "^7.0.3"
+    "@typescript-eslint/scope-manager" "4.15.2"
+    "@typescript-eslint/types" "4.15.2"
+    "@typescript-eslint/typescript-estree" "4.15.2"
+    eslint-scope "^5.0.0"
+    eslint-utils "^2.0.0"
+
+"@typescript-eslint/experimental-utils@^3.10.1":
+  version "3.10.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-3.10.1.tgz#e179ffc81a80ebcae2ea04e0332f8b251345a686"
+  integrity sha512-DewqIgscDzmAfd5nOGe4zm6Bl7PKtMG2Ad0KG8CUZAHlXfAKTF9Ol5PXhiMh39yRL2ChRH1cuuUGOcVyyrhQIw==
+  dependencies:
+    "@types/json-schema" "^7.0.3"
+    "@typescript-eslint/types" "3.10.1"
+    "@typescript-eslint/typescript-estree" "3.10.1"
+    eslint-scope "^5.0.0"
+    eslint-utils "^2.0.0"
+
+"@typescript-eslint/parser@^4.5.0":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.15.2.tgz#c804474321ef76a3955aec03664808f0d6e7872e"
+  integrity sha512-SHeF8xbsC6z2FKXsaTb1tBCf0QZsjJ94H6Bo51Y1aVEZ4XAefaw5ZAilMoDPlGghe+qtq7XdTiDlGfVTOmvA+Q==
+  dependencies:
+    "@typescript-eslint/scope-manager" "4.15.2"
+    "@typescript-eslint/types" "4.15.2"
+    "@typescript-eslint/typescript-estree" "4.15.2"
+    debug "^4.1.1"
+
+"@typescript-eslint/scope-manager@4.15.2":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.15.2.tgz#5725bda656995960ae1d004bfd1cd70320f37f4f"
+  integrity sha512-Zm0tf/MSKuX6aeJmuXexgdVyxT9/oJJhaCkijv0DvJVT3ui4zY6XYd6iwIo/8GEZGy43cd7w1rFMiCLHbRzAPQ==
+  dependencies:
+    "@typescript-eslint/types" "4.15.2"
+    "@typescript-eslint/visitor-keys" "4.15.2"
+
+"@typescript-eslint/types@3.10.1":
+  version "3.10.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-3.10.1.tgz#1d7463fa7c32d8a23ab508a803ca2fe26e758727"
+  integrity sha512-+3+FCUJIahE9q0lDi1WleYzjCwJs5hIsbugIgnbB+dSCYUxl8L6PwmsyOPFZde2hc1DlTo/xnkOgiTLSyAbHiQ==
+
+"@typescript-eslint/types@4.15.2":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.15.2.tgz#04acf3a2dc8001a88985291744241e732ef22c60"
+  integrity sha512-r7lW7HFkAarfUylJ2tKndyO9njwSyoy6cpfDKWPX6/ctZA+QyaYscAHXVAfJqtnY6aaTwDYrOhp+ginlbc7HfQ==
+
+"@typescript-eslint/typescript-estree@3.10.1":
+  version "3.10.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-3.10.1.tgz#fd0061cc38add4fad45136d654408569f365b853"
+  integrity sha512-QbcXOuq6WYvnB3XPsZpIwztBoquEYLXh2MtwVU+kO8jgYCiv4G5xrSP/1wg4tkvrEE+esZVquIPX/dxPlePk1w==
+  dependencies:
+    "@typescript-eslint/types" "3.10.1"
+    "@typescript-eslint/visitor-keys" "3.10.1"
+    debug "^4.1.1"
+    glob "^7.1.6"
+    is-glob "^4.0.1"
+    lodash "^4.17.15"
+    semver "^7.3.2"
+    tsutils "^3.17.1"
+
+"@typescript-eslint/typescript-estree@4.15.2":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.15.2.tgz#c2f7a1e94f3428d229d5ecff3ead6581ee9b62fa"
+  integrity sha512-cGR8C2g5SPtHTQvAymEODeqx90pJHadWsgTtx6GbnTWKqsg7yp6Eaya9nFzUd4KrKhxdYTTFBiYeTPQaz/l8bw==
+  dependencies:
+    "@typescript-eslint/types" "4.15.2"
+    "@typescript-eslint/visitor-keys" "4.15.2"
+    debug "^4.1.1"
+    globby "^11.0.1"
+    is-glob "^4.0.1"
+    semver "^7.3.2"
+    tsutils "^3.17.1"
+
+"@typescript-eslint/visitor-keys@3.10.1":
+  version "3.10.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-3.10.1.tgz#cd4274773e3eb63b2e870ac602274487ecd1e931"
+  integrity sha512-9JgC82AaQeglebjZMgYR5wgmfUdUc+EitGUUMW8u2nDckaeimzW+VsoLV6FoimPv2id3VQzfjwBxEMVz08ameQ==
+  dependencies:
+    eslint-visitor-keys "^1.1.0"
+
+"@typescript-eslint/visitor-keys@4.15.2":
+  version "4.15.2"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.15.2.tgz#3d1c7979ce75bf6acf9691109bd0d6b5706192b9"
+  integrity sha512-TME1VgSb7wTwgENN5KVj4Nqg25hP8DisXxNBojM4Nn31rYaNDIocNm5cmjOFfh42n7NVERxWrDFoETO/76ePyg==
+  dependencies:
+    "@typescript-eslint/types" "4.15.2"
+    eslint-visitor-keys "^2.0.0"
+
+"@webassemblyjs/ast@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.9.0.tgz#bd850604b4042459a5a41cd7d338cbed695ed964"
+  integrity sha512-C6wW5L+b7ogSDVqymbkkvuW9kruN//YisMED04xzeBBqjHa2FYnmvOlS6Xj68xWQRgWvI9cIglsjFowH/RJyEA==
+  dependencies:
+    "@webassemblyjs/helper-module-context" "1.9.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
+    "@webassemblyjs/wast-parser" "1.9.0"
+
+"@webassemblyjs/floating-point-hex-parser@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.9.0.tgz#3c3d3b271bddfc84deb00f71344438311d52ffb4"
+  integrity sha512-TG5qcFsS8QB4g4MhrxK5TqfdNe7Ey/7YL/xN+36rRjl/BlGE/NcBvJcqsRgCP6Z92mRE+7N50pRIi8SmKUbcQA==
+
+"@webassemblyjs/helper-api-error@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.9.0.tgz#203f676e333b96c9da2eeab3ccef33c45928b6a2"
+  integrity sha512-NcMLjoFMXpsASZFxJ5h2HZRcEhDkvnNFOAKneP5RbKRzaWJN36NC4jqQHKwStIhGXu5mUWlUUk7ygdtrO8lbmw==
+
+"@webassemblyjs/helper-buffer@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.9.0.tgz#a1442d269c5feb23fcbc9ef759dac3547f29de00"
+  integrity sha512-qZol43oqhq6yBPx7YM3m9Bv7WMV9Eevj6kMi6InKOuZxhw+q9hOkvq5e/PpKSiLfyetpaBnogSbNCfBwyB00CA==
+
+"@webassemblyjs/helper-code-frame@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.9.0.tgz#647f8892cd2043a82ac0c8c5e75c36f1d9159f27"
+  integrity sha512-ERCYdJBkD9Vu4vtjUYe8LZruWuNIToYq/ME22igL+2vj2dQ2OOujIZr3MEFvfEaqKoVqpsFKAGsRdBSBjrIvZA==
+  dependencies:
+    "@webassemblyjs/wast-printer" "1.9.0"
+
+"@webassemblyjs/helper-fsm@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-fsm/-/helper-fsm-1.9.0.tgz#c05256b71244214671f4b08ec108ad63b70eddb8"
+  integrity sha512-OPRowhGbshCb5PxJ8LocpdX9Kl0uB4XsAjl6jH/dWKlk/mzsANvhwbiULsaiqT5GZGT9qinTICdj6PLuM5gslw==
+
+"@webassemblyjs/helper-module-context@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-module-context/-/helper-module-context-1.9.0.tgz#25d8884b76839871a08a6c6f806c3979ef712f07"
+  integrity sha512-MJCW8iGC08tMk2enck1aPW+BE5Cw8/7ph/VGZxwyvGbJwjktKkDK7vy7gAmMDx88D7mhDTCNKAW5tED+gZ0W8g==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+
+"@webassemblyjs/helper-wasm-bytecode@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.9.0.tgz#4fed8beac9b8c14f8c58b70d124d549dd1fe5790"
+  integrity sha512-R7FStIzyNcd7xKxCZH5lE0Bqy+hGTwS3LJjuv1ZVxd9O7eHCedSdrId/hMOd20I+v8wDXEn+bjfKDLzTepoaUw==
+
+"@webassemblyjs/helper-wasm-section@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.9.0.tgz#5a4138d5a6292ba18b04c5ae49717e4167965346"
+  integrity sha512-XnMB8l3ek4tvrKUUku+IVaXNHz2YsJyOOmz+MMkZvh8h1uSJpSen6vYnw3IoQ7WwEuAhL8Efjms1ZWjqh2agvw==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/helper-buffer" "1.9.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
+    "@webassemblyjs/wasm-gen" "1.9.0"
+
+"@webassemblyjs/ieee754@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.9.0.tgz#15c7a0fbaae83fb26143bbacf6d6df1702ad39e4"
+  integrity sha512-dcX8JuYU/gvymzIHc9DgxTzUUTLexWwt8uCTWP3otys596io0L5aW02Gb1RjYpx2+0Jus1h4ZFqjla7umFniTg==
+  dependencies:
+    "@xtuc/ieee754" "^1.2.0"
+
+"@webassemblyjs/leb128@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.9.0.tgz#f19ca0b76a6dc55623a09cffa769e838fa1e1c95"
+  integrity sha512-ENVzM5VwV1ojs9jam6vPys97B/S65YQtv/aanqnU7D8aSoHFX8GyhGg0CMfyKNIHBuAVjy3tlzd5QMMINa7wpw==
+  dependencies:
+    "@xtuc/long" "4.2.2"
+
+"@webassemblyjs/utf8@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.9.0.tgz#04d33b636f78e6a6813227e82402f7637b6229ab"
+  integrity sha512-GZbQlWtopBTP0u7cHrEx+73yZKrQoBMpwkGEIqlacljhXCkVM1kMQge/Mf+csMJAjEdSwhOyLAS0AoR3AG5P8w==
+
+"@webassemblyjs/wasm-edit@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.9.0.tgz#3fe6d79d3f0f922183aa86002c42dd256cfee9cf"
+  integrity sha512-FgHzBm80uwz5M8WKnMTn6j/sVbqilPdQXTWraSjBwFXSYGirpkSWE2R9Qvz9tNiTKQvoKILpCuTjBKzOIm0nxw==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/helper-buffer" "1.9.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
+    "@webassemblyjs/helper-wasm-section" "1.9.0"
+    "@webassemblyjs/wasm-gen" "1.9.0"
+    "@webassemblyjs/wasm-opt" "1.9.0"
+    "@webassemblyjs/wasm-parser" "1.9.0"
+    "@webassemblyjs/wast-printer" "1.9.0"
+
+"@webassemblyjs/wasm-gen@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.9.0.tgz#50bc70ec68ded8e2763b01a1418bf43491a7a49c"
+  integrity sha512-cPE3o44YzOOHvlsb4+E9qSqjc9Qf9Na1OO/BHFy4OI91XDE14MjFN4lTMezzaIWdPqHnsTodGGNP+iRSYfGkjA==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
+    "@webassemblyjs/ieee754" "1.9.0"
+    "@webassemblyjs/leb128" "1.9.0"
+    "@webassemblyjs/utf8" "1.9.0"
+
+"@webassemblyjs/wasm-opt@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.9.0.tgz#2211181e5b31326443cc8112eb9f0b9028721a61"
+  integrity sha512-Qkjgm6Anhm+OMbIL0iokO7meajkzQD71ioelnfPEj6r4eOFuqm4YC3VBPqXjFyyNwowzbMD+hizmprP/Fwkl2A==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/helper-buffer" "1.9.0"
+    "@webassemblyjs/wasm-gen" "1.9.0"
+    "@webassemblyjs/wasm-parser" "1.9.0"
+
+"@webassemblyjs/wasm-parser@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.9.0.tgz#9d48e44826df4a6598294aa6c87469d642fff65e"
+  integrity sha512-9+wkMowR2AmdSWQzsPEjFU7njh8HTO5MqO8vjwEHuM+AMHioNqSBONRdr0NQQ3dVQrzp0s8lTcYqzUdb7YgELA==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/helper-api-error" "1.9.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
+    "@webassemblyjs/ieee754" "1.9.0"
+    "@webassemblyjs/leb128" "1.9.0"
+    "@webassemblyjs/utf8" "1.9.0"
+
+"@webassemblyjs/wast-parser@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-parser/-/wast-parser-1.9.0.tgz#3031115d79ac5bd261556cecc3fa90a3ef451914"
+  integrity sha512-qsqSAP3QQ3LyZjNC/0jBJ/ToSxfYJ8kYyuiGvtn/8MK89VrNEfwj7BPQzJVHi0jGTRK2dGdJ5PRqhtjzoww+bw==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/floating-point-hex-parser" "1.9.0"
+    "@webassemblyjs/helper-api-error" "1.9.0"
+    "@webassemblyjs/helper-code-frame" "1.9.0"
+    "@webassemblyjs/helper-fsm" "1.9.0"
+    "@xtuc/long" "4.2.2"
+
+"@webassemblyjs/wast-printer@1.9.0":
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.9.0.tgz#4935d54c85fef637b00ce9f52377451d00d47899"
+  integrity sha512-2J0nE95rHXHyQ24cWjMKJ1tqB/ds8z/cyeOZxJhcb+rW+SQASVjuznUSmdz5GpVJTzU8JkhYut0D3siFDD6wsA==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/wast-parser" "1.9.0"
+    "@xtuc/long" "4.2.2"
+
+"@xtuc/ieee754@^1.2.0":
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/@xtuc/ieee754/-/ieee754-1.2.0.tgz#eef014a3145ae477a1cbc00cd1e552336dceb790"
+  integrity sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==
+
+"@xtuc/long@4.2.2":
+  version "4.2.2"
+  resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d"
+  integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==
+
+abab@^2.0.3:
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/abab/-/abab-2.0.5.tgz#c0b678fb32d60fc1219c784d6a826fe385aeb79a"
+  integrity sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q==
+
+accepts@~1.3.4, accepts@~1.3.5, accepts@~1.3.7:
+  version "1.3.7"
+  resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.7.tgz#531bc726517a3b2b41f850021c6cc15eaab507cd"
+  integrity sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==
+  dependencies:
+    mime-types "~2.1.24"
+    negotiator "0.6.2"
+
+acorn-globals@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-6.0.0.tgz#46cdd39f0f8ff08a876619b55f5ac8a6dc770b45"
+  integrity sha512-ZQl7LOWaF5ePqqcX4hLuv/bLXYQNfNWw2c0/yX/TsPRKamzHcTGQnlCjHT3TsmkOUVEPS3crCxiPfdzE/Trlhg==
+  dependencies:
+    acorn "^7.1.1"
+    acorn-walk "^7.1.1"
+
+acorn-jsx@^5.3.1:
+  version "5.3.1"
+  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.1.tgz#fc8661e11b7ac1539c47dbfea2e72b3af34d267b"
+  integrity sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==
+
+acorn-walk@^7.1.1:
+  version "7.2.0"
+  resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-7.2.0.tgz#0de889a601203909b0fbe07b8938dc21d2e967bc"
+  integrity sha512-OPdCF6GsMIP+Az+aWfAAOEt2/+iVDKE7oy6lJ098aoe59oAmK76qV6Gw60SbZ8jHuG2wH058GF4pLFbYamYrVA==
+
+acorn@^6.4.1:
+  version "6.4.2"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6"
+  integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ==
+
+acorn@^7.1.0, acorn@^7.1.1, acorn@^7.4.0:
+  version "7.4.1"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa"
+  integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
+
+address@1.1.2, address@^1.0.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/address/-/address-1.1.2.tgz#bf1116c9c758c51b7a933d296b72c221ed9428b6"
+  integrity sha512-aT6camzM4xEA54YVJYSqxz1kv4IHnQZRtThJJHhUMRExaU5spC7jX5ugSwTaTgJliIgs4VhZOk7htClvQ/LmRA==
+
+adjust-sourcemap-loader@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/adjust-sourcemap-loader/-/adjust-sourcemap-loader-3.0.0.tgz#5ae12fb5b7b1c585e80bbb5a63ec163a1a45e61e"
+  integrity sha512-YBrGyT2/uVQ/c6Rr+t6ZJXniY03YtHGMJQYal368burRGYKqhx9qGTWqcBU5s1CwYY9E/ri63RYyG1IacMZtqw==
+  dependencies:
+    loader-utils "^2.0.0"
+    regex-parser "^2.2.11"
+
+aggregate-error@^3.0.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a"
+  integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==
+  dependencies:
+    clean-stack "^2.0.0"
+    indent-string "^4.0.0"
+
+ajv-errors@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/ajv-errors/-/ajv-errors-1.0.1.tgz#f35986aceb91afadec4102fbd85014950cefa64d"
+  integrity sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==
+
+ajv-keywords@^3.1.0, ajv-keywords@^3.4.1, ajv-keywords@^3.5.2:
+  version "3.5.2"
+  resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
+  integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==
+
+ajv@^6.1.0, ajv@^6.10.0, ajv@^6.10.2, ajv@^6.12.3, ajv@^6.12.4, ajv@^6.12.5:
+  version "6.12.6"
+  resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
+  integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==
+  dependencies:
+    fast-deep-equal "^3.1.1"
+    fast-json-stable-stringify "^2.0.0"
+    json-schema-traverse "^0.4.1"
+    uri-js "^4.2.2"
+
+ajv@^7.0.2:
+  version "7.1.1"
+  resolved "https://registry.yarnpkg.com/ajv/-/ajv-7.1.1.tgz#1e6b37a454021fa9941713f38b952fc1c8d32a84"
+  integrity sha512-ga/aqDYnUy/o7vbsRTFhhTsNeXiYb5JWDIcRIeZfwRNCefwjNTVYCGdGSUrEmiu3yDK3vFvNbgJxvrQW4JXrYQ==
+  dependencies:
+    fast-deep-equal "^3.1.1"
+    json-schema-traverse "^1.0.0"
+    require-from-string "^2.0.2"
+    uri-js "^4.2.2"
+
+alphanum-sort@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/alphanum-sort/-/alphanum-sort-1.0.2.tgz#97a1119649b211ad33691d9f9f486a8ec9fbe0a3"
+  integrity sha1-l6ERlkmyEa0zaR2fn0hqjsn74KM=
+
+ansi-colors@^3.0.0:
+  version "3.2.4"
+  resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-3.2.4.tgz#e3a3da4bfbae6c86a9c285625de124a234026fbf"
+  integrity sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA==
+
+ansi-colors@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348"
+  integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==
+
+ansi-escapes@^4.2.1, ansi-escapes@^4.3.1:
+  version "4.3.1"
+  resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.1.tgz#a5c47cc43181f1f38ffd7076837700d395522a61"
+  integrity sha512-JWF7ocqNrp8u9oqpgV+wH5ftbt+cfvv+PTjOvKLT3AdYly/LmORARfEVT1iyjwN+4MqE5UmVKoAdIBqeoCHgLA==
+  dependencies:
+    type-fest "^0.11.0"
+
+ansi-html@0.0.7, ansi-html@^0.0.7:
+  version "0.0.7"
+  resolved "https://registry.yarnpkg.com/ansi-html/-/ansi-html-0.0.7.tgz#813584021962a9e9e6fd039f940d12f56ca7859e"
+  integrity sha1-gTWEAhliqenm/QOflA0S9WynhZ4=
+
+ansi-regex@^2.0.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-2.1.1.tgz#c3b33ab5ee360d86e0e628f0468ae7ef27d654df"
+  integrity sha1-w7M6te42DYbg5ijwRorn7yfWVN8=
+
+ansi-regex@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-4.1.0.tgz#8b9f8f08cf1acb843756a839ca8c7e3168c51997"
+  integrity sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==
+
+ansi-regex@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.0.tgz#388539f55179bf39339c81af30a654d69f87cb75"
+  integrity sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==
+
+ansi-styles@^3.2.0, ansi-styles@^3.2.1:
+  version "3.2.1"
+  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d"
+  integrity sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==
+  dependencies:
+    color-convert "^1.9.0"
+
+ansi-styles@^4.0.0, ansi-styles@^4.1.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
+  integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
+  dependencies:
+    color-convert "^2.0.1"
+
+anymatch@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-2.0.0.tgz#bcb24b4f37934d9aa7ac17b4adaf89e7c76ef2eb"
+  integrity sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==
+  dependencies:
+    micromatch "^3.1.4"
+    normalize-path "^2.1.1"
+
+anymatch@^3.0.3, anymatch@~3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.1.tgz#c55ecf02185e2469259399310c173ce31233b142"
+  integrity sha512-mM8522psRCqzV+6LhomX5wgp25YVibjh8Wj23I5RPkPppSVSjyKD2A2mBJmWGa+KN7f2D6LNh9jkBCeyLktzjg==
+  dependencies:
+    normalize-path "^3.0.0"
+    picomatch "^2.0.4"
+
+aproba@^1.1.1:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/aproba/-/aproba-1.2.0.tgz#6802e6264efd18c790a1b0d517f0f2627bf2c94a"
+  integrity sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==
+
+argparse@^1.0.7:
+  version "1.0.10"
+  resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911"
+  integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==
+  dependencies:
+    sprintf-js "~1.0.2"
+
+aria-hidden@^1.1.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/aria-hidden/-/aria-hidden-1.1.2.tgz#5354315a29bffdaced3993fccd826817dc8c5272"
+  integrity sha512-WAMH9q3vRimVqP+B0q2eDvx7IPDoY17A2fWwj5atTA/zTYJCNcS6HJ5YErZ5FO3PUHhrV0y0yR1NA0dRNm913A==
+  dependencies:
+    tslib "^1.0.0"
+
+aria-query@^4.2.2:
+  version "4.2.2"
+  resolved "https://registry.yarnpkg.com/aria-query/-/aria-query-4.2.2.tgz#0d2ca6c9aceb56b8977e9fed6aed7e15bbd2f83b"
+  integrity sha512-o/HelwhuKpTj/frsOsbNLNgnNGVIFsVP/SW2BSF14gVl7kAfMOJ6/8wUAUvG1R1NHKrfG+2sHZTu0yauT1qBrA==
+  dependencies:
+    "@babel/runtime" "^7.10.2"
+    "@babel/runtime-corejs3" "^7.10.2"
+
+arity-n@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/arity-n/-/arity-n-1.0.4.tgz#d9e76b11733e08569c0847ae7b39b2860b30b745"
+  integrity sha1-2edrEXM+CFacCEeuezmyhgswt0U=
+
+arr-diff@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520"
+  integrity sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=
+
+arr-flatten@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/arr-flatten/-/arr-flatten-1.1.0.tgz#36048bbff4e7b47e136644316c99669ea5ae91f1"
+  integrity sha512-L3hKV5R/p5o81R7O02IGnwpDmkp6E982XhtbuwSe3O4qOtMMMtodicASA1Cny2U+aCXcNpml+m4dPsvsJ3jatg==
+
+arr-union@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4"
+  integrity sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=
+
+array-flatten@1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/array-flatten/-/array-flatten-1.1.1.tgz#9a5f699051b1e7073328f2a008968b64ea2955d2"
+  integrity sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=
+
+array-flatten@^2.1.0:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/array-flatten/-/array-flatten-2.1.2.tgz#24ef80a28c1a893617e2149b0c6d0d788293b099"
+  integrity sha512-hNfzcOV8W4NdualtqBFPyVO+54DSJuZGY9qT4pRroB6S9e3iiido2ISIC5h9R2sPJ8H3FHCIiEnsv1lPXO3KtQ==
+
+array-includes@^3.1.1, array-includes@^3.1.2:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/array-includes/-/array-includes-3.1.3.tgz#c7f619b382ad2afaf5326cddfdc0afc61af7690a"
+  integrity sha512-gcem1KlBU7c9rB+Rq8/3PPKsK2kjqeEBa3bD5kkQo4nYlOHQCJqIJFqBXDEfwaRuYTT4E+FxA9xez7Gf/e3Q7A==
+  dependencies:
+    call-bind "^1.0.2"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.2"
+    get-intrinsic "^1.1.1"
+    is-string "^1.0.5"
+
+array-union@^1.0.1:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/array-union/-/array-union-1.0.2.tgz#9a34410e4f4e3da23dea375be5be70f24778ec39"
+  integrity sha1-mjRBDk9OPaI96jdb5b5w8kd47Dk=
+  dependencies:
+    array-uniq "^1.0.1"
+
+array-union@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d"
+  integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==
+
+array-uniq@^1.0.1:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/array-uniq/-/array-uniq-1.0.3.tgz#af6ac877a25cc7f74e058894753858dfdb24fdb6"
+  integrity sha1-r2rId6Jcx/dOBYiUdThY39sk/bY=
+
+array-unique@^0.3.2:
+  version "0.3.2"
+  resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428"
+  integrity sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=
+
+array.prototype.flat@^1.2.3:
+  version "1.2.4"
+  resolved "https://registry.yarnpkg.com/array.prototype.flat/-/array.prototype.flat-1.2.4.tgz#6ef638b43312bd401b4c6199fdec7e2dc9e9a123"
+  integrity sha512-4470Xi3GAPAjZqFcljX2xzckv1qeKPizoNkiS0+O4IoPR2ZNpcjE0pkhdihlDouK+x6QOast26B4Q/O9DJnwSg==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.1"
+
+array.prototype.flatmap@^1.2.3:
+  version "1.2.4"
+  resolved "https://registry.yarnpkg.com/array.prototype.flatmap/-/array.prototype.flatmap-1.2.4.tgz#94cfd47cc1556ec0747d97f7c7738c58122004c9"
+  integrity sha512-r9Z0zYoxqHz60vvQbWEdXIEtCwHF0yxaWfno9qzXeNHvfyl3BZqygmGzb84dsubyaXLH4husF+NFgMSdpZhk2Q==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.1"
+    function-bind "^1.1.1"
+
+arrify@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/arrify/-/arrify-2.0.1.tgz#c9655e9331e0abcd588d2a7cad7e9956f66701fa"
+  integrity sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==
+
+asap@~2.0.6:
+  version "2.0.6"
+  resolved "https://registry.yarnpkg.com/asap/-/asap-2.0.6.tgz#e50347611d7e690943208bbdafebcbc2fb866d46"
+  integrity sha1-5QNHYR1+aQlDIIu9r+vLwvuGbUY=
+
+asn1.js@^5.2.0:
+  version "5.4.1"
+  resolved "https://registry.yarnpkg.com/asn1.js/-/asn1.js-5.4.1.tgz#11a980b84ebb91781ce35b0fdc2ee294e3783f07"
+  integrity sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==
+  dependencies:
+    bn.js "^4.0.0"
+    inherits "^2.0.1"
+    minimalistic-assert "^1.0.0"
+    safer-buffer "^2.1.0"
+
+asn1@~0.2.3:
+  version "0.2.4"
+  resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.4.tgz#8d2475dfab553bb33e77b54e59e880bb8ce23136"
+  integrity sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==
+  dependencies:
+    safer-buffer "~2.1.0"
+
+assert-plus@1.0.0, assert-plus@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525"
+  integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=
+
+assert@^1.1.1:
+  version "1.5.0"
+  resolved "https://registry.yarnpkg.com/assert/-/assert-1.5.0.tgz#55c109aaf6e0aefdb3dc4b71240c70bf574b18eb"
+  integrity sha512-EDsgawzwoun2CZkCgtxJbv392v4nbk9XDD06zI+kQYoBM/3RBWLlEyJARDOmhAAosBjWACEkKL6S+lIZtcAubA==
+  dependencies:
+    object-assign "^4.1.1"
+    util "0.10.3"
+
+assign-symbols@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
+  integrity sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=
+
+ast-types-flow@^0.0.7:
+  version "0.0.7"
+  resolved "https://registry.yarnpkg.com/ast-types-flow/-/ast-types-flow-0.0.7.tgz#f70b735c6bca1a5c9c22d982c3e39e7feba3bdad"
+  integrity sha1-9wtzXGvKGlycItmCw+Oef+ujva0=
+
+astral-regex@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31"
+  integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
+
+async-each@^1.0.1:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/async-each/-/async-each-1.0.3.tgz#b727dbf87d7651602f06f4d4ac387f47d91b0cbf"
+  integrity sha512-z/WhQ5FPySLdvREByI2vZiTWwCnF0moMJ1hK9YQwDTHKh6I7/uSckMetoRGb5UBZPC1z0jlw+n/XCgjeH7y1AQ==
+
+async-limiter@~1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/async-limiter/-/async-limiter-1.0.1.tgz#dd379e94f0db8310b08291f9d64c3209766617fd"
+  integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==
+
+async@^2.6.2:
+  version "2.6.3"
+  resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff"
+  integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==
+  dependencies:
+    lodash "^4.17.14"
+
+asynckit@^0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
+  integrity sha1-x57Zf380y48robyXkLzDZkdLS3k=
+
+at-least-node@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2"
+  integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==
+
+atob@^2.1.2:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9"
+  integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==
+
+autoprefixer@^9.6.1:
+  version "9.8.6"
+  resolved "https://registry.yarnpkg.com/autoprefixer/-/autoprefixer-9.8.6.tgz#3b73594ca1bf9266320c5acf1588d74dea74210f"
+  integrity sha512-XrvP4VVHdRBCdX1S3WXVD8+RyG9qeb1D5Sn1DeLiG2xfSpzellk5k54xbUERJ3M5DggQxes39UGOTP8CFrEGbg==
+  dependencies:
+    browserslist "^4.12.0"
+    caniuse-lite "^1.0.30001109"
+    colorette "^1.2.1"
+    normalize-range "^0.1.2"
+    num2fraction "^1.2.2"
+    postcss "^7.0.32"
+    postcss-value-parser "^4.1.0"
+
+aws-sign2@~0.7.0:
+  version "0.7.0"
+  resolved "https://registry.yarnpkg.com/aws-sign2/-/aws-sign2-0.7.0.tgz#b46e890934a9591f2d2f6f86d7e6a9f1b3fe76a8"
+  integrity sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=
+
+aws4@^1.8.0:
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
+  integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
+
+axe-core@^4.0.2:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/axe-core/-/axe-core-4.1.2.tgz#7cf783331320098bfbef620df3b3c770147bc224"
+  integrity sha512-V+Nq70NxKhYt89ArVcaNL9FDryB3vQOd+BFXZIfO3RP6rwtj+2yqqqdHEkacutglPaZLkJeuXKCjCJDMGPtPqg==
+
+axobject-query@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/axobject-query/-/axobject-query-2.2.0.tgz#943d47e10c0b704aa42275e20edf3722648989be"
+  integrity sha512-Td525n+iPOOyUQIeBfcASuG6uJsDOITl7Mds5gFyerkWiX7qhUTdYUBlSgNMyVqtSJqwpt1kXGLdUt6SykLMRA==
+
+babel-eslint@^10.1.0:
+  version "10.1.0"
+  resolved "https://registry.yarnpkg.com/babel-eslint/-/babel-eslint-10.1.0.tgz#6968e568a910b78fb3779cdd8b6ac2f479943232"
+  integrity sha512-ifWaTHQ0ce+448CYop8AdrQiBsGrnC+bMgfyKFdi6EsPLTAWG+QfyDeM6OH+FmWnKvEq5NnBMLvlBUPKQZoDSg==
+  dependencies:
+    "@babel/code-frame" "^7.0.0"
+    "@babel/parser" "^7.7.0"
+    "@babel/traverse" "^7.7.0"
+    "@babel/types" "^7.7.0"
+    eslint-visitor-keys "^1.0.0"
+    resolve "^1.12.0"
+
+babel-extract-comments@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/babel-extract-comments/-/babel-extract-comments-1.0.0.tgz#0a2aedf81417ed391b85e18b4614e693a0351a21"
+  integrity sha512-qWWzi4TlddohA91bFwgt6zO/J0X+io7Qp184Fw0m2JYRSTZnJbFR8+07KmzudHCZgOiKRCrjhylwv9Xd8gfhVQ==
+  dependencies:
+    babylon "^6.18.0"
+
+babel-jest@^26.6.0, babel-jest@^26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-26.6.3.tgz#d87d25cb0037577a0c89f82e5755c5d293c01056"
+  integrity sha512-pl4Q+GAVOHwvjrck6jKjvmGhnO3jHX/xuB9d27f+EJZ/6k+6nMuPjorrYp7s++bKKdANwzElBWnLWaObvTnaZA==
+  dependencies:
+    "@jest/transform" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/babel__core" "^7.1.7"
+    babel-plugin-istanbul "^6.0.0"
+    babel-preset-jest "^26.6.2"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    slash "^3.0.0"
+
+babel-loader@8.1.0:
+  version "8.1.0"
+  resolved "https://registry.yarnpkg.com/babel-loader/-/babel-loader-8.1.0.tgz#c611d5112bd5209abe8b9fa84c3e4da25275f1c3"
+  integrity sha512-7q7nC1tYOrqvUrN3LQK4GwSk/TQorZSOlO9C+RZDZpODgyN4ZlCqE5q9cDsyWOliN+aU9B4JX01xK9eJXowJLw==
+  dependencies:
+    find-cache-dir "^2.1.0"
+    loader-utils "^1.4.0"
+    mkdirp "^0.5.3"
+    pify "^4.0.1"
+    schema-utils "^2.6.5"
+
+babel-plugin-dynamic-import-node@^2.3.3:
+  version "2.3.3"
+  resolved "https://registry.yarnpkg.com/babel-plugin-dynamic-import-node/-/babel-plugin-dynamic-import-node-2.3.3.tgz#84fda19c976ec5c6defef57f9427b3def66e17a3"
+  integrity sha512-jZVI+s9Zg3IqA/kdi0i6UDCybUI3aSBLnglhYbSSjKlV7yF1F/5LWv8MakQmvYpnbJDS6fcBL2KzHSxNCMtWSQ==
+  dependencies:
+    object.assign "^4.1.0"
+
+babel-plugin-istanbul@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/babel-plugin-istanbul/-/babel-plugin-istanbul-6.0.0.tgz#e159ccdc9af95e0b570c75b4573b7c34d671d765"
+  integrity sha512-AF55rZXpe7trmEylbaE1Gv54wn6rwU03aptvRoVIGP8YykoSxqdVLV1TfwflBCE/QtHmqtP8SWlTENqbK8GCSQ==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.0.0"
+    "@istanbuljs/load-nyc-config" "^1.0.0"
+    "@istanbuljs/schema" "^0.1.2"
+    istanbul-lib-instrument "^4.0.0"
+    test-exclude "^6.0.0"
+
+babel-plugin-jest-hoist@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-26.6.2.tgz#8185bd030348d254c6d7dd974355e6a28b21e62d"
+  integrity sha512-PO9t0697lNTmcEHH69mdtYiOIkkOlj9fySqfO3K1eCcdISevLAE0xY59VLLUj0SoiPiTX/JU2CYFpILydUa5Lw==
+  dependencies:
+    "@babel/template" "^7.3.3"
+    "@babel/types" "^7.3.3"
+    "@types/babel__core" "^7.0.0"
+    "@types/babel__traverse" "^7.0.6"
+
+babel-plugin-macros@2.8.0, babel-plugin-macros@^2.6.1:
+  version "2.8.0"
+  resolved "https://registry.yarnpkg.com/babel-plugin-macros/-/babel-plugin-macros-2.8.0.tgz#0f958a7cc6556b1e65344465d99111a1e5e10138"
+  integrity sha512-SEP5kJpfGYqYKpBrj5XU3ahw5p5GOHJ0U5ssOSQ/WBVdwkD2Dzlce95exQTs3jOVWPPKLBN2rlEWkCK7dSmLvg==
+  dependencies:
+    "@babel/runtime" "^7.7.2"
+    cosmiconfig "^6.0.0"
+    resolve "^1.12.0"
+
+babel-plugin-named-asset-import@^0.3.7:
+  version "0.3.7"
+  resolved "https://registry.yarnpkg.com/babel-plugin-named-asset-import/-/babel-plugin-named-asset-import-0.3.7.tgz#156cd55d3f1228a5765774340937afc8398067dd"
+  integrity sha512-squySRkf+6JGnvjoUtDEjSREJEBirnXi9NqP6rjSYsylxQxqBTz+pkmf395i9E2zsvmYUaI40BHo6SqZUdydlw==
+
+babel-plugin-syntax-object-rest-spread@^6.8.0:
+  version "6.13.0"
+  resolved "https://registry.yarnpkg.com/babel-plugin-syntax-object-rest-spread/-/babel-plugin-syntax-object-rest-spread-6.13.0.tgz#fd6536f2bce13836ffa3a5458c4903a597bb3bf5"
+  integrity sha1-/WU28rzhODb/o6VFjEkDpZe7O/U=
+
+babel-plugin-transform-object-rest-spread@^6.26.0:
+  version "6.26.0"
+  resolved "https://registry.yarnpkg.com/babel-plugin-transform-object-rest-spread/-/babel-plugin-transform-object-rest-spread-6.26.0.tgz#0f36692d50fef6b7e2d4b3ac1478137a963b7b06"
+  integrity sha1-DzZpLVD+9rfi1LOsFHgTepY7ewY=
+  dependencies:
+    babel-plugin-syntax-object-rest-spread "^6.8.0"
+    babel-runtime "^6.26.0"
+
+babel-plugin-transform-react-remove-prop-types@0.4.24:
+  version "0.4.24"
+  resolved "https://registry.yarnpkg.com/babel-plugin-transform-react-remove-prop-types/-/babel-plugin-transform-react-remove-prop-types-0.4.24.tgz#f2edaf9b4c6a5fbe5c1d678bfb531078c1555f3a"
+  integrity sha512-eqj0hVcJUR57/Ug2zE1Yswsw4LhuqqHhD+8v120T1cl3kjg76QwtyBrdIk4WVwK+lAhBJVYCd/v+4nc4y+8JsA==
+
+babel-preset-current-node-syntax@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.0.1.tgz#b4399239b89b2a011f9ddbe3e4f401fc40cff73b"
+  integrity sha512-M7LQ0bxarkxQoN+vz5aJPsLBn77n8QgTFmo8WK0/44auK2xlCXrYcUxHFxgU7qW5Yzw/CjmLRK2uJzaCd7LvqQ==
+  dependencies:
+    "@babel/plugin-syntax-async-generators" "^7.8.4"
+    "@babel/plugin-syntax-bigint" "^7.8.3"
+    "@babel/plugin-syntax-class-properties" "^7.8.3"
+    "@babel/plugin-syntax-import-meta" "^7.8.3"
+    "@babel/plugin-syntax-json-strings" "^7.8.3"
+    "@babel/plugin-syntax-logical-assignment-operators" "^7.8.3"
+    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.3"
+    "@babel/plugin-syntax-numeric-separator" "^7.8.3"
+    "@babel/plugin-syntax-object-rest-spread" "^7.8.3"
+    "@babel/plugin-syntax-optional-catch-binding" "^7.8.3"
+    "@babel/plugin-syntax-optional-chaining" "^7.8.3"
+    "@babel/plugin-syntax-top-level-await" "^7.8.3"
+
+babel-preset-jest@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-26.6.2.tgz#747872b1171df032252426586881d62d31798fee"
+  integrity sha512-YvdtlVm9t3k777c5NPQIv6cxFFFapys25HiUmuSgHwIZhfifweR5c5Sf5nwE3MAbfu327CYSvps8Yx6ANLyleQ==
+  dependencies:
+    babel-plugin-jest-hoist "^26.6.2"
+    babel-preset-current-node-syntax "^1.0.0"
+
+babel-preset-react-app@^10.0.0:
+  version "10.0.0"
+  resolved "https://registry.yarnpkg.com/babel-preset-react-app/-/babel-preset-react-app-10.0.0.tgz#689b60edc705f8a70ce87f47ab0e560a317d7045"
+  integrity sha512-itL2z8v16khpuKutx5IH8UdCdSTuzrOhRFTEdIhveZ2i1iBKDrVE0ATa4sFVy+02GLucZNVBWtoarXBy0Msdpg==
+  dependencies:
+    "@babel/core" "7.12.3"
+    "@babel/plugin-proposal-class-properties" "7.12.1"
+    "@babel/plugin-proposal-decorators" "7.12.1"
+    "@babel/plugin-proposal-nullish-coalescing-operator" "7.12.1"
+    "@babel/plugin-proposal-numeric-separator" "7.12.1"
+    "@babel/plugin-proposal-optional-chaining" "7.12.1"
+    "@babel/plugin-transform-flow-strip-types" "7.12.1"
+    "@babel/plugin-transform-react-display-name" "7.12.1"
+    "@babel/plugin-transform-runtime" "7.12.1"
+    "@babel/preset-env" "7.12.1"
+    "@babel/preset-react" "7.12.1"
+    "@babel/preset-typescript" "7.12.1"
+    "@babel/runtime" "7.12.1"
+    babel-plugin-macros "2.8.0"
+    babel-plugin-transform-react-remove-prop-types "0.4.24"
+
+babel-runtime@^6.26.0:
+  version "6.26.0"
+  resolved "https://registry.yarnpkg.com/babel-runtime/-/babel-runtime-6.26.0.tgz#965c7058668e82b55d7bfe04ff2337bc8b5647fe"
+  integrity sha1-llxwWGaOgrVde/4E/yM3vItWR/4=
+  dependencies:
+    core-js "^2.4.0"
+    regenerator-runtime "^0.11.0"
+
+babylon@^6.18.0:
+  version "6.18.0"
+  resolved "https://registry.yarnpkg.com/babylon/-/babylon-6.18.0.tgz#af2f3b88fa6f5c1e4c634d1a0f8eac4f55b395e3"
+  integrity sha512-q/UEjfGJ2Cm3oKV71DJz9d25TPnq5rhBVL2Q4fA5wcC3jcrdn7+SssEybFIxwAvvP+YCsCYNKughoF33GxgycQ==
+
+balanced-match@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
+  integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
+
+base64-js@^1.0.2:
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
+  integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==
+
+base@^0.11.1:
+  version "0.11.2"
+  resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f"
+  integrity sha512-5T6P4xPgpp0YDFvSWwEZ4NoE3aM4QBQXDzmVbraCkFj8zHM+mba8SyqB5DbZWyR7mYHo6Y7BdQo3MoA4m0TeQg==
+  dependencies:
+    cache-base "^1.0.1"
+    class-utils "^0.3.5"
+    component-emitter "^1.2.1"
+    define-property "^1.0.0"
+    isobject "^3.0.1"
+    mixin-deep "^1.2.0"
+    pascalcase "^0.1.1"
+
+batch@0.6.1:
+  version "0.6.1"
+  resolved "https://registry.yarnpkg.com/batch/-/batch-0.6.1.tgz#dc34314f4e679318093fc760272525f94bf25c16"
+  integrity sha1-3DQxT05nkxgJP8dgJyUl+UvyXBY=
+
+bcrypt-pbkdf@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e"
+  integrity sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=
+  dependencies:
+    tweetnacl "^0.14.3"
+
+bfj@^7.0.2:
+  version "7.0.2"
+  resolved "https://registry.yarnpkg.com/bfj/-/bfj-7.0.2.tgz#1988ce76f3add9ac2913fd8ba47aad9e651bfbb2"
+  integrity sha512-+e/UqUzwmzJamNF50tBV6tZPTORow7gQ96iFow+8b562OdMpEK0BcJEq2OSPEDmAbSMBQ7PKZ87ubFkgxpYWgw==
+  dependencies:
+    bluebird "^3.5.5"
+    check-types "^11.1.1"
+    hoopy "^0.1.4"
+    tryer "^1.0.1"
+
+big.js@^5.2.2:
+  version "5.2.2"
+  resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328"
+  integrity sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==
+
+binary-extensions@^1.0.0:
+  version "1.13.1"
+  resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-1.13.1.tgz#598afe54755b2868a5330d2aff9d4ebb53209b65"
+  integrity sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw==
+
+binary-extensions@^2.0.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d"
+  integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==
+
+bindings@^1.5.0:
+  version "1.5.0"
+  resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df"
+  integrity sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==
+  dependencies:
+    file-uri-to-path "1.0.0"
+
+bluebird@^3.5.5:
+  version "3.7.2"
+  resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f"
+  integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==
+
+bn.js@^4.0.0, bn.js@^4.1.0, bn.js@^4.11.9:
+  version "4.11.9"
+  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-4.11.9.tgz#26d556829458f9d1e81fc48952493d0ba3507828"
+  integrity sha512-E6QoYqCKZfgatHTdHzs1RRKP7ip4vvm+EyRUeE2RF0NblwVvb0p6jSVeNTOFxPn26QXN2o6SMfNxKp6kU8zQaw==
+
+bn.js@^5.0.0, bn.js@^5.1.1:
+  version "5.1.3"
+  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-5.1.3.tgz#beca005408f642ebebea80b042b4d18d2ac0ee6b"
+  integrity sha512-GkTiFpjFtUzU9CbMeJ5iazkCzGL3jrhzerzZIuqLABjbwRaFt33I9tUdSNryIptM+RxDet6OKm2WnLXzW51KsQ==
+
+body-parser@1.19.0:
+  version "1.19.0"
+  resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.19.0.tgz#96b2709e57c9c4e09a6fd66a8fd979844f69f08a"
+  integrity sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==
+  dependencies:
+    bytes "3.1.0"
+    content-type "~1.0.4"
+    debug "2.6.9"
+    depd "~1.1.2"
+    http-errors "1.7.2"
+    iconv-lite "0.4.24"
+    on-finished "~2.3.0"
+    qs "6.7.0"
+    raw-body "2.4.0"
+    type-is "~1.6.17"
+
+bonjour@^3.5.0:
+  version "3.5.0"
+  resolved "https://registry.yarnpkg.com/bonjour/-/bonjour-3.5.0.tgz#8e890a183d8ee9a2393b3844c691a42bcf7bc9f5"
+  integrity sha1-jokKGD2O6aI5OzhExpGkK897yfU=
+  dependencies:
+    array-flatten "^2.1.0"
+    deep-equal "^1.0.1"
+    dns-equal "^1.0.0"
+    dns-txt "^2.0.2"
+    multicast-dns "^6.0.1"
+    multicast-dns-service-types "^1.1.0"
+
+boolbase@^1.0.0, boolbase@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
+  integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24=
+
+brace-expansion@^1.1.7:
+  version "1.1.11"
+  resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
+  integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==
+  dependencies:
+    balanced-match "^1.0.0"
+    concat-map "0.0.1"
+
+braces@^2.3.1, braces@^2.3.2:
+  version "2.3.2"
+  resolved "https://registry.yarnpkg.com/braces/-/braces-2.3.2.tgz#5979fd3f14cd531565e5fa2df1abfff1dfaee729"
+  integrity sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==
+  dependencies:
+    arr-flatten "^1.1.0"
+    array-unique "^0.3.2"
+    extend-shallow "^2.0.1"
+    fill-range "^4.0.0"
+    isobject "^3.0.1"
+    repeat-element "^1.1.2"
+    snapdragon "^0.8.1"
+    snapdragon-node "^2.0.1"
+    split-string "^3.0.2"
+    to-regex "^3.0.1"
+
+braces@^3.0.1, braces@~3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107"
+  integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==
+  dependencies:
+    fill-range "^7.0.1"
+
+brorand@^1.0.1, brorand@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/brorand/-/brorand-1.1.0.tgz#12c25efe40a45e3c323eb8675a0a0ce57b22371f"
+  integrity sha1-EsJe/kCkXjwyPrhnWgoM5XsiNx8=
+
+browser-process-hrtime@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
+  integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
+
+browserify-aes@^1.0.0, browserify-aes@^1.0.4:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/browserify-aes/-/browserify-aes-1.2.0.tgz#326734642f403dabc3003209853bb70ad428ef48"
+  integrity sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==
+  dependencies:
+    buffer-xor "^1.0.3"
+    cipher-base "^1.0.0"
+    create-hash "^1.1.0"
+    evp_bytestokey "^1.0.3"
+    inherits "^2.0.1"
+    safe-buffer "^5.0.1"
+
+browserify-cipher@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/browserify-cipher/-/browserify-cipher-1.0.1.tgz#8d6474c1b870bfdabcd3bcfcc1934a10e94f15f0"
+  integrity sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==
+  dependencies:
+    browserify-aes "^1.0.4"
+    browserify-des "^1.0.0"
+    evp_bytestokey "^1.0.0"
+
+browserify-des@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/browserify-des/-/browserify-des-1.0.2.tgz#3af4f1f59839403572f1c66204375f7a7f703e9c"
+  integrity sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==
+  dependencies:
+    cipher-base "^1.0.1"
+    des.js "^1.0.0"
+    inherits "^2.0.1"
+    safe-buffer "^5.1.2"
+
+browserify-rsa@^4.0.0, browserify-rsa@^4.0.1:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/browserify-rsa/-/browserify-rsa-4.1.0.tgz#b2fd06b5b75ae297f7ce2dc651f918f5be158c8d"
+  integrity sha512-AdEER0Hkspgno2aR97SAf6vi0y0k8NuOpGnVH3O99rcA5Q6sh8QxcngtHuJ6uXwnfAXNM4Gn1Gb7/MV1+Ymbog==
+  dependencies:
+    bn.js "^5.0.0"
+    randombytes "^2.0.1"
+
+browserify-sign@^4.0.0:
+  version "4.2.1"
+  resolved "https://registry.yarnpkg.com/browserify-sign/-/browserify-sign-4.2.1.tgz#eaf4add46dd54be3bb3b36c0cf15abbeba7956c3"
+  integrity sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==
+  dependencies:
+    bn.js "^5.1.1"
+    browserify-rsa "^4.0.1"
+    create-hash "^1.2.0"
+    create-hmac "^1.1.7"
+    elliptic "^6.5.3"
+    inherits "^2.0.4"
+    parse-asn1 "^5.1.5"
+    readable-stream "^3.6.0"
+    safe-buffer "^5.2.0"
+
+browserify-zlib@^0.2.0:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/browserify-zlib/-/browserify-zlib-0.2.0.tgz#2869459d9aa3be245fe8fe2ca1f46e2e7f54d73f"
+  integrity sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==
+  dependencies:
+    pako "~1.0.5"
+
+browserslist@4.14.2:
+  version "4.14.2"
+  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.14.2.tgz#1b3cec458a1ba87588cc5e9be62f19b6d48813ce"
+  integrity sha512-HI4lPveGKUR0x2StIz+2FXfDk9SfVMrxn6PLh1JeGUwcuoDkdKZebWiyLRJ68iIPDpMI4JLVDf7S7XzslgWOhw==
+  dependencies:
+    caniuse-lite "^1.0.30001125"
+    electron-to-chromium "^1.3.564"
+    escalade "^3.0.2"
+    node-releases "^1.1.61"
+
+browserslist@^4.0.0, browserslist@^4.12.0, browserslist@^4.14.5, browserslist@^4.16.3, browserslist@^4.6.2, browserslist@^4.6.4:
+  version "4.16.3"
+  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.3.tgz#340aa46940d7db878748567c5dea24a48ddf3717"
+  integrity sha512-vIyhWmIkULaq04Gt93txdh+j02yX/JzlyhLYbV3YQCn/zvES3JnY7TifHHvvr1w5hTDluNKMkV05cs4vy8Q7sw==
+  dependencies:
+    caniuse-lite "^1.0.30001181"
+    colorette "^1.2.1"
+    electron-to-chromium "^1.3.649"
+    escalade "^3.1.1"
+    node-releases "^1.1.70"
+
+bser@2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/bser/-/bser-2.1.1.tgz#e6787da20ece9d07998533cfd9de6f5c38f4bc05"
+  integrity sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==
+  dependencies:
+    node-int64 "^0.4.0"
+
+buffer-from@^1.0.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
+  integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
+
+buffer-indexof@^1.0.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/buffer-indexof/-/buffer-indexof-1.1.1.tgz#52fabcc6a606d1a00302802648ef68f639da268c"
+  integrity sha512-4/rOEg86jivtPTeOUUT61jJO1Ya1TrR/OkqCSZDyq84WJh3LuuiphBYJN+fm5xufIk4XAFcEwte/8WzC8If/1g==
+
+buffer-xor@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9"
+  integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk=
+
+buffer@^4.3.0:
+  version "4.9.2"
+  resolved "https://registry.yarnpkg.com/buffer/-/buffer-4.9.2.tgz#230ead344002988644841ab0244af8c44bbe3ef8"
+  integrity sha512-xq+q3SRMOxGivLhBNaUdC64hDTQwejJ+H0T/NB1XMtTVEwNTrfFF3gAxiyW0Bu/xWEGhjVKgUcMhCrUy2+uCWg==
+  dependencies:
+    base64-js "^1.0.2"
+    ieee754 "^1.1.4"
+    isarray "^1.0.0"
+
+builtin-modules@^3.1.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-3.2.0.tgz#45d5db99e7ee5e6bc4f362e008bf917ab5049887"
+  integrity sha512-lGzLKcioL90C7wMczpkY0n/oART3MbBa8R9OFGE1rJxoVI86u4WAGfEk8Wjv10eKSyTHVGkSo3bvBylCEtk7LA==
+
+builtin-status-codes@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz#85982878e21b98e1c66425e03d0174788f569ee8"
+  integrity sha1-hZgoeOIbmOHGZCXgPQF0eI9Wnug=
+
+bytes@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.0.0.tgz#d32815404d689699f85a4ea4fa8755dd13a96048"
+  integrity sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=
+
+bytes@3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.1.0.tgz#f6cf7933a360e0588fa9fde85651cdc7f805d1f6"
+  integrity sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==
+
+cacache@^12.0.2:
+  version "12.0.4"
+  resolved "https://registry.yarnpkg.com/cacache/-/cacache-12.0.4.tgz#668bcbd105aeb5f1d92fe25570ec9525c8faa40c"
+  integrity sha512-a0tMB40oefvuInr4Cwb3GerbL9xTj1D5yg0T5xrjGCGyfvbxseIXX7BAO/u/hIXdafzOI5JC3wDwHyf24buOAQ==
+  dependencies:
+    bluebird "^3.5.5"
+    chownr "^1.1.1"
+    figgy-pudding "^3.5.1"
+    glob "^7.1.4"
+    graceful-fs "^4.1.15"
+    infer-owner "^1.0.3"
+    lru-cache "^5.1.1"
+    mississippi "^3.0.0"
+    mkdirp "^0.5.1"
+    move-concurrently "^1.0.1"
+    promise-inflight "^1.0.1"
+    rimraf "^2.6.3"
+    ssri "^6.0.1"
+    unique-filename "^1.1.1"
+    y18n "^4.0.0"
+
+cacache@^15.0.5:
+  version "15.0.5"
+  resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.0.5.tgz#69162833da29170d6732334643c60e005f5f17d0"
+  integrity sha512-lloiL22n7sOjEEXdL8NAjTgv9a1u43xICE9/203qonkZUCj5X1UEWIdf2/Y0d6QcCtMzbKQyhrcDbdvlZTs/+A==
+  dependencies:
+    "@npmcli/move-file" "^1.0.1"
+    chownr "^2.0.0"
+    fs-minipass "^2.0.0"
+    glob "^7.1.4"
+    infer-owner "^1.0.4"
+    lru-cache "^6.0.0"
+    minipass "^3.1.1"
+    minipass-collect "^1.0.2"
+    minipass-flush "^1.0.5"
+    minipass-pipeline "^1.2.2"
+    mkdirp "^1.0.3"
+    p-map "^4.0.0"
+    promise-inflight "^1.0.1"
+    rimraf "^3.0.2"
+    ssri "^8.0.0"
+    tar "^6.0.2"
+    unique-filename "^1.1.1"
+
+cache-base@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/cache-base/-/cache-base-1.0.1.tgz#0a7f46416831c8b662ee36fe4e7c59d76f666ab2"
+  integrity sha512-AKcdTnFSWATd5/GCPRxr2ChwIJ85CeyrEyjRHlKxQ56d4XJMGym0uAiKn0xbLOGOl3+yRpOTi484dVCEc5AUzQ==
+  dependencies:
+    collection-visit "^1.0.0"
+    component-emitter "^1.2.1"
+    get-value "^2.0.6"
+    has-value "^1.0.0"
+    isobject "^3.0.1"
+    set-value "^2.0.0"
+    to-object-path "^0.3.0"
+    union-value "^1.0.0"
+    unset-value "^1.0.0"
+
+call-bind@^1.0.0, call-bind@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.2.tgz#b1d4e89e688119c3c9a903ad30abb2f6a919be3c"
+  integrity sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==
+  dependencies:
+    function-bind "^1.1.1"
+    get-intrinsic "^1.0.2"
+
+caller-callsite@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/caller-callsite/-/caller-callsite-2.0.0.tgz#847e0fce0a223750a9a027c54b33731ad3154134"
+  integrity sha1-hH4PzgoiN1CpoCfFSzNzGtMVQTQ=
+  dependencies:
+    callsites "^2.0.0"
+
+caller-path@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/caller-path/-/caller-path-2.0.0.tgz#468f83044e369ab2010fac5f06ceee15bb2cb1f4"
+  integrity sha1-Ro+DBE42mrIBD6xfBs7uFbsssfQ=
+  dependencies:
+    caller-callsite "^2.0.0"
+
+callsites@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/callsites/-/callsites-2.0.0.tgz#06eb84f00eea413da86affefacbffb36093b3c50"
+  integrity sha1-BuuE8A7qQT2oav/vrL/7Ngk7PFA=
+
+callsites@^3.0.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
+  integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==
+
+camel-case@^4.1.1:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/camel-case/-/camel-case-4.1.2.tgz#9728072a954f805228225a6deea6b38461e1bd5a"
+  integrity sha512-gxGWBrTT1JuMx6R+o5PTXMmUnhnVzLQ9SNutD4YqKtI6ap897t3tKECYla6gCWEkplXnlNybEkZg9GEGxKFCgw==
+  dependencies:
+    pascal-case "^3.1.2"
+    tslib "^2.0.3"
+
+camelcase@5.3.1, camelcase@^5.0.0, camelcase@^5.3.1:
+  version "5.3.1"
+  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
+  integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
+
+camelcase@^6.0.0, camelcase@^6.1.0, camelcase@^6.2.0:
+  version "6.2.0"
+  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.2.0.tgz#924af881c9d525ac9d87f40d964e5cea982a1809"
+  integrity sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==
+
+caniuse-api@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/caniuse-api/-/caniuse-api-3.0.0.tgz#5e4d90e2274961d46291997df599e3ed008ee4c0"
+  integrity sha512-bsTwuIg/BZZK/vreVTYYbSWoe2F+71P7K5QGEX+pT250DZbfU1MQ5prOKpPR+LL6uWKK3KMwMCAS74QB3Um1uw==
+  dependencies:
+    browserslist "^4.0.0"
+    caniuse-lite "^1.0.0"
+    lodash.memoize "^4.1.2"
+    lodash.uniq "^4.5.0"
+
+caniuse-lite@^1.0.0, caniuse-lite@^1.0.30000981, caniuse-lite@^1.0.30001109, caniuse-lite@^1.0.30001125, caniuse-lite@^1.0.30001181:
+  version "1.0.30001191"
+  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001191.tgz#bacb432b6701f690c8c5f7c680166b9a9f0843d9"
+  integrity sha512-xJJqzyd+7GCJXkcoBiQ1GuxEiOBCLQ0aVW9HMekifZsAVGdj5eJ4mFB9fEhSHipq9IOk/QXFJUiIr9lZT+EsGw==
+
+capture-exit@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/capture-exit/-/capture-exit-2.0.0.tgz#fb953bfaebeb781f62898239dabb426d08a509a4"
+  integrity sha512-PiT/hQmTonHhl/HFGN+Lx3JJUznrVYJ3+AQsnthneZbvW7x+f08Tk7yLJTLEOUvBTbduLeeBkxEaYXUOUrRq6g==
+  dependencies:
+    rsvp "^4.8.4"
+
+case-sensitive-paths-webpack-plugin@2.3.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/case-sensitive-paths-webpack-plugin/-/case-sensitive-paths-webpack-plugin-2.3.0.tgz#23ac613cc9a856e4f88ff8bb73bbb5e989825cf7"
+  integrity sha512-/4YgnZS8y1UXXmC02xD5rRrBEu6T5ub+mQHLNRj0fzTRbgdBYhsNo2V5EqwgqrExjxsjtF/OpAKAMkKsxbD5XQ==
+
+caseless@~0.12.0:
+  version "0.12.0"
+  resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
+  integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
+
+chalk@2.4.2, chalk@^2.0.0, chalk@^2.4.1, chalk@^2.4.2:
+  version "2.4.2"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
+  integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
+  dependencies:
+    ansi-styles "^3.2.1"
+    escape-string-regexp "^1.0.5"
+    supports-color "^5.3.0"
+
+chalk@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
+  integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
+  dependencies:
+    ansi-styles "^4.1.0"
+    supports-color "^7.1.0"
+
+chalk@^4.0.0, chalk@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
+  integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A==
+  dependencies:
+    ansi-styles "^4.1.0"
+    supports-color "^7.1.0"
+
+char-regex@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/char-regex/-/char-regex-1.0.2.tgz#d744358226217f981ed58f479b1d6bcc29545dcf"
+  integrity sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==
+
+check-types@^11.1.1:
+  version "11.1.2"
+  resolved "https://registry.yarnpkg.com/check-types/-/check-types-11.1.2.tgz#86a7c12bf5539f6324eb0e70ca8896c0e38f3e2f"
+  integrity sha512-tzWzvgePgLORb9/3a0YenggReLKAIb2owL03H2Xdoe5pKcUyWRSEQ8xfCar8t2SIAuEDwtmx2da1YB52YuHQMQ==
+
+chokidar@^2.1.8:
+  version "2.1.8"
+  resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-2.1.8.tgz#804b3a7b6a99358c3c5c61e71d8728f041cff917"
+  integrity sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==
+  dependencies:
+    anymatch "^2.0.0"
+    async-each "^1.0.1"
+    braces "^2.3.2"
+    glob-parent "^3.1.0"
+    inherits "^2.0.3"
+    is-binary-path "^1.0.0"
+    is-glob "^4.0.0"
+    normalize-path "^3.0.0"
+    path-is-absolute "^1.0.0"
+    readdirp "^2.2.1"
+    upath "^1.1.1"
+  optionalDependencies:
+    fsevents "^1.2.7"
+
+chokidar@^3.4.1:
+  version "3.5.1"
+  resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.1.tgz#ee9ce7bbebd2b79f49f304799d5468e31e14e68a"
+  integrity sha512-9+s+Od+W0VJJzawDma/gvBNQqkTiqYTWLuZoyAsivsI4AaWTCzHG06/TMjsf1cYe9Cb97UCEhjz7HvnPk2p/tw==
+  dependencies:
+    anymatch "~3.1.1"
+    braces "~3.0.2"
+    glob-parent "~5.1.0"
+    is-binary-path "~2.1.0"
+    is-glob "~4.0.1"
+    normalize-path "~3.0.0"
+    readdirp "~3.5.0"
+  optionalDependencies:
+    fsevents "~2.3.1"
+
+chownr@^1.1.1:
+  version "1.1.4"
+  resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
+  integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
+
+chownr@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
+  integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
+
+chrome-trace-event@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.2.tgz#234090ee97c7d4ad1a2c4beae27505deffc608a4"
+  integrity sha512-9e/zx1jw7B4CO+c/RXoCsfg/x1AfUBioy4owYH0bJprEYAx5hRFLRhWBqHAG57D0ZM4H7vxbP7bPe0VwhQRYDQ==
+  dependencies:
+    tslib "^1.9.0"
+
+ci-info@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
+  integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
+
+cipher-base@^1.0.0, cipher-base@^1.0.1, cipher-base@^1.0.3:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/cipher-base/-/cipher-base-1.0.4.tgz#8760e4ecc272f4c363532f926d874aae2c1397de"
+  integrity sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==
+  dependencies:
+    inherits "^2.0.1"
+    safe-buffer "^5.0.1"
+
+cjs-module-lexer@^0.6.0:
+  version "0.6.0"
+  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-0.6.0.tgz#4186fcca0eae175970aee870b9fe2d6cf8d5655f"
+  integrity sha512-uc2Vix1frTfnuzxxu1Hp4ktSvM3QaI4oXl4ZUqL1wjTu/BGki9TrCWoqLTg/drR1KwAEarXuRFCG2Svr1GxPFw==
+
+class-utils@^0.3.5:
+  version "0.3.6"
+  resolved "https://registry.yarnpkg.com/class-utils/-/class-utils-0.3.6.tgz#f93369ae8b9a7ce02fd41faad0ca83033190c463"
+  integrity sha512-qOhPa/Fj7s6TY8H8esGu5QNpMMQxz79h+urzrNYN6mn+9BnxlDGf5QZ+XeCDsxSjPqsSR56XOZOJmpeurnLMeg==
+  dependencies:
+    arr-union "^3.1.0"
+    define-property "^0.2.5"
+    isobject "^3.0.0"
+    static-extend "^0.1.1"
+
+clean-css@^4.2.3:
+  version "4.2.3"
+  resolved "https://registry.yarnpkg.com/clean-css/-/clean-css-4.2.3.tgz#507b5de7d97b48ee53d84adb0160ff6216380f78"
+  integrity sha512-VcMWDN54ZN/DS+g58HYL5/n4Zrqe8vHJpGA8KdgUXFU4fuP/aHNw8eld9SyEIyabIMJX/0RaY/fplOo5hYLSFA==
+  dependencies:
+    source-map "~0.6.0"
+
+clean-stack@^2.0.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b"
+  integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
+
+cliui@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/cliui/-/cliui-5.0.0.tgz#deefcfdb2e800784aa34f46fa08e06851c7bbbc5"
+  integrity sha512-PYeGSEmmHM6zvoef2w8TPzlrnNpXIjTipYK780YswmIP9vjxmd6Y2a3CB2Ks6/AU8NHjZugXvo8w3oWM2qnwXA==
+  dependencies:
+    string-width "^3.1.0"
+    strip-ansi "^5.2.0"
+    wrap-ansi "^5.1.0"
+
+cliui@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1"
+  integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==
+  dependencies:
+    string-width "^4.2.0"
+    strip-ansi "^6.0.0"
+    wrap-ansi "^6.2.0"
+
+co@^4.6.0:
+  version "4.6.0"
+  resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184"
+  integrity sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=
+
+coa@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/coa/-/coa-2.0.2.tgz#43f6c21151b4ef2bf57187db0d73de229e3e7ec3"
+  integrity sha512-q5/jG+YQnSy4nRTV4F7lPepBJZ8qBNJJDBuJdoejDyLXgmL7IEo+Le2JDZudFTFt7mrCqIRaSjws4ygRCTCAXA==
+  dependencies:
+    "@types/q" "^1.5.1"
+    chalk "^2.4.1"
+    q "^1.1.2"
+
+collect-v8-coverage@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz#cc2c8e94fc18bbdffe64d6534570c8a673b27f59"
+  integrity sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==
+
+collection-visit@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/collection-visit/-/collection-visit-1.0.0.tgz#4bc0373c164bc3291b4d368c829cf1a80a59dca0"
+  integrity sha1-S8A3PBZLwykbTTaMgpzxqApZ3KA=
+  dependencies:
+    map-visit "^1.0.0"
+    object-visit "^1.0.0"
+
+color-convert@^1.9.0, color-convert@^1.9.1:
+  version "1.9.3"
+  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
+  integrity sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==
+  dependencies:
+    color-name "1.1.3"
+
+color-convert@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
+  integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
+  dependencies:
+    color-name "~1.1.4"
+
+color-name@1.1.3:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25"
+  integrity sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=
+
+color-name@^1.0.0, color-name@~1.1.4:
+  version "1.1.4"
+  resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
+  integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
+
+color-string@^1.5.4:
+  version "1.5.4"
+  resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.5.4.tgz#dd51cd25cfee953d138fe4002372cc3d0e504cb6"
+  integrity sha512-57yF5yt8Xa3czSEW1jfQDE79Idk0+AkN/4KWad6tbdxUmAs3MvjxlWSWD4deYytcRfoZ9nhKyFl1kj5tBvidbw==
+  dependencies:
+    color-name "^1.0.0"
+    simple-swizzle "^0.2.2"
+
+color@^3.0.0:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/color/-/color-3.1.3.tgz#ca67fb4e7b97d611dcde39eceed422067d91596e"
+  integrity sha512-xgXAcTHa2HeFCGLE9Xs/R82hujGtu9Jd9x4NW3T34+OMs7VoPsjwzRczKHvTAHeJwWFwX5j15+MgAppE8ztObQ==
+  dependencies:
+    color-convert "^1.9.1"
+    color-string "^1.5.4"
+
+colorette@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/colorette/-/colorette-1.2.1.tgz#4d0b921325c14faf92633086a536db6e89564b1b"
+  integrity sha512-puCDz0CzydiSYOrnXpz/PKd69zRrribezjtE9yd4zvytoRc8+RY/KJPvtPFKZS3E3wP6neGyMe0vOTlHO5L3Pw==
+
+combined-stream@^1.0.6, combined-stream@~1.0.6:
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
+  integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
+  dependencies:
+    delayed-stream "~1.0.0"
+
+commander@^2.20.0:
+  version "2.20.3"
+  resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
+  integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
+
+commander@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/commander/-/commander-4.1.1.tgz#9fd602bd936294e9e9ef46a3f4d6964044b18068"
+  integrity sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==
+
+common-tags@^1.8.0:
+  version "1.8.0"
+  resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.0.tgz#8e3153e542d4a39e9b10554434afaaf98956a937"
+  integrity sha512-6P6g0uetGpW/sdyUy/iQQCbFF0kWVMSIVSyYz7Zgjcgh8mgw8PQzDNZeyZ5DQ2gM7LBoZPHmnjz8rUthkBG5tw==
+
+commondir@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
+  integrity sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=
+
+component-emitter@^1.2.1:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/component-emitter/-/component-emitter-1.3.0.tgz#16e4070fba8ae29b679f2215853ee181ab2eabc0"
+  integrity sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg==
+
+compose-function@3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/compose-function/-/compose-function-3.0.3.tgz#9ed675f13cc54501d30950a486ff6a7ba3ab185f"
+  integrity sha1-ntZ18TzFRQHTCVCkhv9qe6OrGF8=
+  dependencies:
+    arity-n "^1.0.4"
+
+compressible@~2.0.16:
+  version "2.0.18"
+  resolved "https://registry.yarnpkg.com/compressible/-/compressible-2.0.18.tgz#af53cca6b070d4c3c0750fbd77286a6d7cc46fba"
+  integrity sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==
+  dependencies:
+    mime-db ">= 1.43.0 < 2"
+
+compression@^1.7.4:
+  version "1.7.4"
+  resolved "https://registry.yarnpkg.com/compression/-/compression-1.7.4.tgz#95523eff170ca57c29a0ca41e6fe131f41e5bb8f"
+  integrity sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==
+  dependencies:
+    accepts "~1.3.5"
+    bytes "3.0.0"
+    compressible "~2.0.16"
+    debug "2.6.9"
+    on-headers "~1.0.2"
+    safe-buffer "5.1.2"
+    vary "~1.1.2"
+
+compute-scroll-into-view@1.0.14:
+  version "1.0.14"
+  resolved "https://registry.yarnpkg.com/compute-scroll-into-view/-/compute-scroll-into-view-1.0.14.tgz#80e3ebb25d6aa89f42e533956cb4b16a04cfe759"
+  integrity sha512-mKDjINe3tc6hGelUMNDzuhorIUZ7kS7BwyY0r2wQd2HOH2tRuJykiC06iSEX8y1TuhNzvz4GcJnK16mM2J1NMQ==
+
+concat-map@0.0.1:
+  version "0.0.1"
+  resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
+  integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
+
+concat-stream@^1.5.0:
+  version "1.6.2"
+  resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.6.2.tgz#904bdf194cd3122fc675c77fc4ac3d4ff0fd1a34"
+  integrity sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==
+  dependencies:
+    buffer-from "^1.0.0"
+    inherits "^2.0.3"
+    readable-stream "^2.2.2"
+    typedarray "^0.0.6"
+
+confusing-browser-globals@^1.0.10:
+  version "1.0.10"
+  resolved "https://registry.yarnpkg.com/confusing-browser-globals/-/confusing-browser-globals-1.0.10.tgz#30d1e7f3d1b882b25ec4933d1d1adac353d20a59"
+  integrity sha512-gNld/3lySHwuhaVluJUKLePYirM3QNCKzVxqAdhJII9/WXKVX5PURzMVJspS1jTslSqjeuG4KMVTSouit5YPHA==
+
+connect-history-api-fallback@^1.6.0:
+  version "1.6.0"
+  resolved "https://registry.yarnpkg.com/connect-history-api-fallback/-/connect-history-api-fallback-1.6.0.tgz#8b32089359308d111115d81cad3fceab888f97bc"
+  integrity sha512-e54B99q/OUoH64zYYRf3HBP5z24G38h5D3qXu23JGRoigpX5Ss4r9ZnDk3g0Z8uQC2x2lPaJ+UlWBc1ZWBWdLg==
+
+console-browserify@^1.1.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/console-browserify/-/console-browserify-1.2.0.tgz#67063cef57ceb6cf4993a2ab3a55840ae8c49336"
+  integrity sha512-ZMkYO/LkF17QvCPqM0gxw8yUzigAOZOSWSHg91FH6orS7vcEj5dVZTidN2fQ14yBSdg97RqhSNwLUXInd52OTA==
+
+constants-browserify@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/constants-browserify/-/constants-browserify-1.0.0.tgz#c20b96d8c617748aaf1c16021760cd27fcb8cb75"
+  integrity sha1-wguW2MYXdIqvHBYCF2DNJ/y4y3U=
+
+contains-path@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/contains-path/-/contains-path-0.1.0.tgz#fe8cf184ff6670b6baef01a9d4861a5cbec4120a"
+  integrity sha1-/ozxhP9mcLa67wGp1IYaXL7EEgo=
+
+content-disposition@0.5.3:
+  version "0.5.3"
+  resolved "https://registry.yarnpkg.com/content-disposition/-/content-disposition-0.5.3.tgz#e130caf7e7279087c5616c2007d0485698984fbd"
+  integrity sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==
+  dependencies:
+    safe-buffer "5.1.2"
+
+content-type@~1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/content-type/-/content-type-1.0.4.tgz#e138cc75e040c727b1966fe5e5f8c9aee256fe3b"
+  integrity sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==
+
+convert-source-map@1.7.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
+  version "1.7.0"
+  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.7.0.tgz#17a2cb882d7f77d3490585e2ce6c524424a3a442"
+  integrity sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==
+  dependencies:
+    safe-buffer "~5.1.1"
+
+convert-source-map@^0.3.3:
+  version "0.3.5"
+  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-0.3.5.tgz#f1d802950af7dd2631a1febe0596550c86ab3190"
+  integrity sha1-8dgClQr33SYxof6+BZZVDIarMZA=
+
+cookie-signature@1.0.6:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/cookie-signature/-/cookie-signature-1.0.6.tgz#e303a882b342cc3ee8ca513a79999734dab3ae2c"
+  integrity sha1-4wOogrNCzD7oylE6eZmXNNqzriw=
+
+cookie@0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.4.0.tgz#beb437e7022b3b6d49019d088665303ebe9c14ba"
+  integrity sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==
+
+copy-concurrently@^1.0.0:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/copy-concurrently/-/copy-concurrently-1.0.5.tgz#92297398cae34937fcafd6ec8139c18051f0b5e0"
+  integrity sha512-f2domd9fsVDFtaFcbaRZuYXwtdmnzqbADSwhSWYxYB/Q8zsdUUFMXVRwXGDMWmbEzAn1kdRrtI1T/KTFOL4X2A==
+  dependencies:
+    aproba "^1.1.1"
+    fs-write-stream-atomic "^1.0.8"
+    iferr "^0.1.5"
+    mkdirp "^0.5.1"
+    rimraf "^2.5.4"
+    run-queue "^1.0.0"
+
+copy-descriptor@^0.1.0:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d"
+  integrity sha1-Z29us8OZl8LuGsOpJP1hJHSPV40=
+
+copy-to-clipboard@3.3.1:
+  version "3.3.1"
+  resolved "https://registry.yarnpkg.com/copy-to-clipboard/-/copy-to-clipboard-3.3.1.tgz#115aa1a9998ffab6196f93076ad6da3b913662ae"
+  integrity sha512-i13qo6kIHTTpCm8/Wup+0b1mVWETvu2kIMzKoK8FpkLkFxlt0znUAHcMzox+T8sPlqtZXq3CulEjQHsYiGFJUw==
+  dependencies:
+    toggle-selection "^1.0.6"
+
+core-js-compat@^3.6.2, core-js-compat@^3.8.0:
+  version "3.9.0"
+  resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.9.0.tgz#29da39385f16b71e1915565aa0385c4e0963ad56"
+  integrity sha512-YK6fwFjCOKWwGnjFUR3c544YsnA/7DoLL0ysncuOJ4pwbriAtOpvM2bygdlcXbvQCQZ7bBU9CL4t7tGl7ETRpQ==
+  dependencies:
+    browserslist "^4.16.3"
+    semver "7.0.0"
+
+core-js-pure@^3.0.0:
+  version "3.9.0"
+  resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.9.0.tgz#326cc74e1fef8b7443a6a793ddb0adfcd81f9efb"
+  integrity sha512-3pEcmMZC9Cq0D4ZBh3pe2HLtqxpGNJBLXF/kZ2YzK17RbKp94w0HFbdbSx8H8kAlZG5k76hvLrkPm57Uyef+kg==
+
+core-js@^2.4.0:
+  version "2.6.12"
+  resolved "https://registry.yarnpkg.com/core-js/-/core-js-2.6.12.tgz#d9333dfa7b065e347cc5682219d6f690859cc2ec"
+  integrity sha512-Kb2wC0fvsWfQrgk8HU5lW6U/Lcs8+9aaYcy4ZFc6DDlo4nZ7n70dEgE5rtR0oG6ufKDUnrwfWL1mXR5ljDatrQ==
+
+core-js@^3.6.5:
+  version "3.9.0"
+  resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.9.0.tgz#790b1bb11553a2272b36e2625c7179db345492f8"
+  integrity sha512-PyFBJaLq93FlyYdsndE5VaueA9K5cNB7CGzeCj191YYLhkQM0gdZR2SKihM70oF0wdqKSKClv/tEBOpoRmdOVQ==
+
+core-util-is@1.0.2, core-util-is@~1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
+  integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
+
+cosmiconfig@^5.0.0:
+  version "5.2.1"
+  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-5.2.1.tgz#040f726809c591e77a17c0a3626ca45b4f168b1a"
+  integrity sha512-H65gsXo1SKjf8zmrJ67eJk8aIRKV5ff2D4uKZIBZShbhGSpEmsQOPW/SKMKYhSTrqR7ufy6RP69rPogdaPh/kA==
+  dependencies:
+    import-fresh "^2.0.0"
+    is-directory "^0.3.1"
+    js-yaml "^3.13.1"
+    parse-json "^4.0.0"
+
+cosmiconfig@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-6.0.0.tgz#da4fee853c52f6b1e6935f41c1a2fc50bd4a9982"
+  integrity sha512-xb3ZL6+L8b9JLLCx3ZdoZy4+2ECphCMo2PwqgP1tlfVq6M6YReyzBJtvWWtbDSpNr9hn96pkCiZqUcFEc+54Qg==
+  dependencies:
+    "@types/parse-json" "^4.0.0"
+    import-fresh "^3.1.0"
+    parse-json "^5.0.0"
+    path-type "^4.0.0"
+    yaml "^1.7.2"
+
+cosmiconfig@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-7.0.0.tgz#ef9b44d773959cae63ddecd122de23853b60f8d3"
+  integrity sha512-pondGvTuVYDk++upghXJabWzL6Kxu6f26ljFw64Swq9v6sQPUL3EUlVDV56diOjpCayKihL6hVe8exIACU4XcA==
+  dependencies:
+    "@types/parse-json" "^4.0.0"
+    import-fresh "^3.2.1"
+    parse-json "^5.0.0"
+    path-type "^4.0.0"
+    yaml "^1.10.0"
+
+create-ecdh@^4.0.0:
+  version "4.0.4"
+  resolved "https://registry.yarnpkg.com/create-ecdh/-/create-ecdh-4.0.4.tgz#d6e7f4bffa66736085a0762fd3a632684dabcc4e"
+  integrity sha512-mf+TCx8wWc9VpuxfP2ht0iSISLZnt0JgWlrOKZiNqyUZWnjIaCIVNQArMHnCZKfEYRg6IM7A+NeJoN8gf/Ws0A==
+  dependencies:
+    bn.js "^4.1.0"
+    elliptic "^6.5.3"
+
+create-hash@^1.1.0, create-hash@^1.1.2, create-hash@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/create-hash/-/create-hash-1.2.0.tgz#889078af11a63756bcfb59bd221996be3a9ef196"
+  integrity sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==
+  dependencies:
+    cipher-base "^1.0.1"
+    inherits "^2.0.1"
+    md5.js "^1.3.4"
+    ripemd160 "^2.0.1"
+    sha.js "^2.4.0"
+
+create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7:
+  version "1.1.7"
+  resolved "https://registry.yarnpkg.com/create-hmac/-/create-hmac-1.1.7.tgz#69170c78b3ab957147b2b8b04572e47ead2243ff"
+  integrity sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==
+  dependencies:
+    cipher-base "^1.0.3"
+    create-hash "^1.1.0"
+    inherits "^2.0.1"
+    ripemd160 "^2.0.0"
+    safe-buffer "^5.0.1"
+    sha.js "^2.4.8"
+
+cross-spawn@7.0.3, cross-spawn@^7.0.0, cross-spawn@^7.0.2:
+  version "7.0.3"
+  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
+  integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
+  dependencies:
+    path-key "^3.1.0"
+    shebang-command "^2.0.0"
+    which "^2.0.1"
+
+cross-spawn@^6.0.0:
+  version "6.0.5"
+  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
+  integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==
+  dependencies:
+    nice-try "^1.0.4"
+    path-key "^2.0.1"
+    semver "^5.5.0"
+    shebang-command "^1.2.0"
+    which "^1.2.9"
+
+crypto-browserify@^3.11.0:
+  version "3.12.0"
+  resolved "https://registry.yarnpkg.com/crypto-browserify/-/crypto-browserify-3.12.0.tgz#396cf9f3137f03e4b8e532c58f698254e00f80ec"
+  integrity sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==
+  dependencies:
+    browserify-cipher "^1.0.0"
+    browserify-sign "^4.0.0"
+    create-ecdh "^4.0.0"
+    create-hash "^1.1.0"
+    create-hmac "^1.1.0"
+    diffie-hellman "^5.0.0"
+    inherits "^2.0.1"
+    pbkdf2 "^3.0.3"
+    public-encrypt "^4.0.0"
+    randombytes "^2.0.0"
+    randomfill "^1.0.3"
+
+crypto-random-string@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/crypto-random-string/-/crypto-random-string-1.0.0.tgz#a230f64f568310e1498009940790ec99545bca7e"
+  integrity sha1-ojD2T1aDEOFJgAmUB5DsmVRbyn4=
+
+css-blank-pseudo@^0.1.4:
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/css-blank-pseudo/-/css-blank-pseudo-0.1.4.tgz#dfdefd3254bf8a82027993674ccf35483bfcb3c5"
+  integrity sha512-LHz35Hr83dnFeipc7oqFDmsjHdljj3TQtxGGiNWSOsTLIAubSm4TEz8qCaKFpk7idaQ1GfWscF4E6mgpBysA1w==
+  dependencies:
+    postcss "^7.0.5"
+
+css-box-model@1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/css-box-model/-/css-box-model-1.2.1.tgz#59951d3b81fd6b2074a62d49444415b0d2b4d7c1"
+  integrity sha512-a7Vr4Q/kd/aw96bnJG332W9V9LkJO69JRcaCYDUqjp6/z0w6VcZjgAcTbgFxEPfBgdnAwlh3iwu+hLopa+flJw==
+  dependencies:
+    tiny-invariant "^1.0.6"
+
+css-color-names@0.0.4, css-color-names@^0.0.4:
+  version "0.0.4"
+  resolved "https://registry.yarnpkg.com/css-color-names/-/css-color-names-0.0.4.tgz#808adc2e79cf84738069b646cb20ec27beb629e0"
+  integrity sha1-gIrcLnnPhHOAabZGyyDsJ762KeA=
+
+css-declaration-sorter@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/css-declaration-sorter/-/css-declaration-sorter-4.0.1.tgz#c198940f63a76d7e36c1e71018b001721054cb22"
+  integrity sha512-BcxQSKTSEEQUftYpBVnsH4SF05NTuBokb19/sBt6asXGKZ/6VP7PLG1CBCkFDYOnhXhPh0jMhO6xZ71oYHXHBA==
+  dependencies:
+    postcss "^7.0.1"
+    timsort "^0.3.0"
+
+css-get-unit@1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/css-get-unit/-/css-get-unit-1.0.1.tgz#e490b9e56b2cd20f903a22ccafb448382edf7976"
+  integrity sha1-5JC55Wss0g+QOiLMr7RIOC7feXY=
+
+css-has-pseudo@^0.10.0:
+  version "0.10.0"
+  resolved "https://registry.yarnpkg.com/css-has-pseudo/-/css-has-pseudo-0.10.0.tgz#3c642ab34ca242c59c41a125df9105841f6966ee"
+  integrity sha512-Z8hnfsZu4o/kt+AuFzeGpLVhFOGO9mluyHBaA2bA8aCGTwah5sT3WV/fTHH8UNZUytOIImuGPrl/prlb4oX4qQ==
+  dependencies:
+    postcss "^7.0.6"
+    postcss-selector-parser "^5.0.0-rc.4"
+
+css-loader@4.3.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/css-loader/-/css-loader-4.3.0.tgz#c888af64b2a5b2e85462c72c0f4a85c7e2e0821e"
+  integrity sha512-rdezjCjScIrsL8BSYszgT4s476IcNKt6yX69t0pHjJVnPUTDpn4WfIpDQTN3wCJvUvfsz/mFjuGOekf3PY3NUg==
+  dependencies:
+    camelcase "^6.0.0"
+    cssesc "^3.0.0"
+    icss-utils "^4.1.1"
+    loader-utils "^2.0.0"
+    postcss "^7.0.32"
+    postcss-modules-extract-imports "^2.0.0"
+    postcss-modules-local-by-default "^3.0.3"
+    postcss-modules-scope "^2.2.0"
+    postcss-modules-values "^3.0.0"
+    postcss-value-parser "^4.1.0"
+    schema-utils "^2.7.1"
+    semver "^7.3.2"
+
+css-prefers-color-scheme@^3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/css-prefers-color-scheme/-/css-prefers-color-scheme-3.1.1.tgz#6f830a2714199d4f0d0d0bb8a27916ed65cff1f4"
+  integrity sha512-MTu6+tMs9S3EUqzmqLXEcgNRbNkkD/TGFvowpeoWJn5Vfq7FMgsmRQs9X5NXAURiOBmOxm/lLjsDNXDE6k9bhg==
+  dependencies:
+    postcss "^7.0.5"
+
+css-select-base-adapter@^0.1.1:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/css-select-base-adapter/-/css-select-base-adapter-0.1.1.tgz#3b2ff4972cc362ab88561507a95408a1432135d7"
+  integrity sha512-jQVeeRG70QI08vSTwf1jHxp74JoZsr2XSgETae8/xC8ovSnL2WF87GTLO86Sbwdt2lK4Umg4HnnwMO4YF3Ce7w==
+
+css-select@^2.0.0, css-select@^2.0.2:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/css-select/-/css-select-2.1.0.tgz#6a34653356635934a81baca68d0255432105dbef"
+  integrity sha512-Dqk7LQKpwLoH3VovzZnkzegqNSuAziQyNZUcrdDM401iY+R5NkGBXGmtO05/yaXQziALuPogeG0b7UAgjnTJTQ==
+  dependencies:
+    boolbase "^1.0.0"
+    css-what "^3.2.1"
+    domutils "^1.7.0"
+    nth-check "^1.0.2"
+
+css-tree@1.0.0-alpha.37:
+  version "1.0.0-alpha.37"
+  resolved "https://registry.yarnpkg.com/css-tree/-/css-tree-1.0.0-alpha.37.tgz#98bebd62c4c1d9f960ec340cf9f7522e30709a22"
+  integrity sha512-DMxWJg0rnz7UgxKT0Q1HU/L9BeJI0M6ksor0OgqOnF+aRCDWg/N2641HmVyU9KVIu0OVVWOb2IpC9A+BJRnejg==
+  dependencies:
+    mdn-data "2.0.4"
+    source-map "^0.6.1"
+
+css-tree@^1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/css-tree/-/css-tree-1.1.2.tgz#9ae393b5dafd7dae8a622475caec78d3d8fbd7b5"
+  integrity sha512-wCoWush5Aeo48GLhfHPbmvZs59Z+M7k5+B1xDnXbdWNcEF423DoFdqSWE0PM5aNk5nI5cp1q7ms36zGApY/sKQ==
+  dependencies:
+    mdn-data "2.0.14"
+    source-map "^0.6.1"
+
+css-what@^3.2.1:
+  version "3.4.2"
+  resolved "https://registry.yarnpkg.com/css-what/-/css-what-3.4.2.tgz#ea7026fcb01777edbde52124e21f327e7ae950e4"
+  integrity sha512-ACUm3L0/jiZTqfzRM3Hi9Q8eZqd6IK37mMWPLz9PJxkLWllYeRf+EHUSHYEtFop2Eqytaq1FizFVh7XfBnXCDQ==
+
+css.escape@^1.5.1:
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/css.escape/-/css.escape-1.5.1.tgz#42e27d4fa04ae32f931a4b4d4191fa9cddee97cb"
+  integrity sha1-QuJ9T6BK4y+TGktNQZH6nN3ul8s=
+
+css@^2.0.0:
+  version "2.2.4"
+  resolved "https://registry.yarnpkg.com/css/-/css-2.2.4.tgz#c646755c73971f2bba6a601e2cf2fd71b1298929"
+  integrity sha512-oUnjmWpy0niI3x/mPL8dVEI1l7MnG3+HHyRPHf+YFSbK+svOhXpmSOcDURUh2aOCgl2grzrOPt1nHLuCVFULLw==
+  dependencies:
+    inherits "^2.0.3"
+    source-map "^0.6.1"
+    source-map-resolve "^0.5.2"
+    urix "^0.1.0"
+
+css@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/css/-/css-3.0.0.tgz#4447a4d58fdd03367c516ca9f64ae365cee4aa5d"
+  integrity sha512-DG9pFfwOrzc+hawpmqX/dHYHJG+Bsdb0klhyi1sDneOgGOXy9wQIC8hzyVp1e4NRYDBdxcylvywPkkXCHAzTyQ==
+  dependencies:
+    inherits "^2.0.4"
+    source-map "^0.6.1"
+    source-map-resolve "^0.6.0"
+
+cssdb@^4.4.0:
+  version "4.4.0"
+  resolved "https://registry.yarnpkg.com/cssdb/-/cssdb-4.4.0.tgz#3bf2f2a68c10f5c6a08abd92378331ee803cddb0"
+  integrity sha512-LsTAR1JPEM9TpGhl/0p3nQecC2LJ0kD8X5YARu1hk/9I1gril5vDtMZyNxcEpxxDj34YNck/ucjuoUd66K03oQ==
+
+cssesc@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/cssesc/-/cssesc-2.0.0.tgz#3b13bd1bb1cb36e1bcb5a4dcd27f54c5dcb35703"
+  integrity sha512-MsCAG1z9lPdoO/IUMLSBWBSVxVtJ1395VGIQ+Fc2gNdkQ1hNDnQdw3YhA71WJCBW1vdwA0cAnk/DnW6bqoEUYg==
+
+cssesc@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/cssesc/-/cssesc-3.0.0.tgz#37741919903b868565e1c09ea747445cd18983ee"
+  integrity sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==
+
+cssnano-preset-default@^4.0.7:
+  version "4.0.7"
+  resolved "https://registry.yarnpkg.com/cssnano-preset-default/-/cssnano-preset-default-4.0.7.tgz#51ec662ccfca0f88b396dcd9679cdb931be17f76"
+  integrity sha512-x0YHHx2h6p0fCl1zY9L9roD7rnlltugGu7zXSKQx6k2rYw0Hi3IqxcoAGF7u9Q5w1nt7vK0ulxV8Lo+EvllGsA==
+  dependencies:
+    css-declaration-sorter "^4.0.1"
+    cssnano-util-raw-cache "^4.0.1"
+    postcss "^7.0.0"
+    postcss-calc "^7.0.1"
+    postcss-colormin "^4.0.3"
+    postcss-convert-values "^4.0.1"
+    postcss-discard-comments "^4.0.2"
+    postcss-discard-duplicates "^4.0.2"
+    postcss-discard-empty "^4.0.1"
+    postcss-discard-overridden "^4.0.1"
+    postcss-merge-longhand "^4.0.11"
+    postcss-merge-rules "^4.0.3"
+    postcss-minify-font-values "^4.0.2"
+    postcss-minify-gradients "^4.0.2"
+    postcss-minify-params "^4.0.2"
+    postcss-minify-selectors "^4.0.2"
+    postcss-normalize-charset "^4.0.1"
+    postcss-normalize-display-values "^4.0.2"
+    postcss-normalize-positions "^4.0.2"
+    postcss-normalize-repeat-style "^4.0.2"
+    postcss-normalize-string "^4.0.2"
+    postcss-normalize-timing-functions "^4.0.2"
+    postcss-normalize-unicode "^4.0.1"
+    postcss-normalize-url "^4.0.1"
+    postcss-normalize-whitespace "^4.0.2"
+    postcss-ordered-values "^4.1.2"
+    postcss-reduce-initial "^4.0.3"
+    postcss-reduce-transforms "^4.0.2"
+    postcss-svgo "^4.0.2"
+    postcss-unique-selectors "^4.0.1"
+
+cssnano-util-get-arguments@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/cssnano-util-get-arguments/-/cssnano-util-get-arguments-4.0.0.tgz#ed3a08299f21d75741b20f3b81f194ed49cc150f"
+  integrity sha1-7ToIKZ8h11dBsg87gfGU7UnMFQ8=
+
+cssnano-util-get-match@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/cssnano-util-get-match/-/cssnano-util-get-match-4.0.0.tgz#c0e4ca07f5386bb17ec5e52250b4f5961365156d"
+  integrity sha1-wOTKB/U4a7F+xeUiULT1lhNlFW0=
+
+cssnano-util-raw-cache@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/cssnano-util-raw-cache/-/cssnano-util-raw-cache-4.0.1.tgz#b26d5fd5f72a11dfe7a7846fb4c67260f96bf282"
+  integrity sha512-qLuYtWK2b2Dy55I8ZX3ky1Z16WYsx544Q0UWViebptpwn/xDBmog2TLg4f+DBMg1rJ6JDWtn96WHbOKDWt1WQA==
+  dependencies:
+    postcss "^7.0.0"
+
+cssnano-util-same-parent@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/cssnano-util-same-parent/-/cssnano-util-same-parent-4.0.1.tgz#574082fb2859d2db433855835d9a8456ea18bbf3"
+  integrity sha512-WcKx5OY+KoSIAxBW6UBBRay1U6vkYheCdjyVNDm85zt5K9mHoGOfsOsqIszfAqrQQFIIKgjh2+FDgIj/zsl21Q==
+
+cssnano@^4.1.10:
+  version "4.1.10"
+  resolved "https://registry.yarnpkg.com/cssnano/-/cssnano-4.1.10.tgz#0ac41f0b13d13d465487e111b778d42da631b8b2"
+  integrity sha512-5wny+F6H4/8RgNlaqab4ktc3e0/blKutmq8yNlBFXA//nSFFAqAngjNVRzUvCgYROULmZZUoosL/KSoZo5aUaQ==
+  dependencies:
+    cosmiconfig "^5.0.0"
+    cssnano-preset-default "^4.0.7"
+    is-resolvable "^1.0.0"
+    postcss "^7.0.0"
+
+csso@^4.0.2:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/csso/-/csso-4.2.0.tgz#ea3a561346e8dc9f546d6febedd50187cf389529"
+  integrity sha512-wvlcdIbf6pwKEk7vHj8/Bkc0B4ylXZruLvOgs9doS5eOsOpuodOV2zJChSpkp+pRpYQLQMeF04nr3Z68Sta9jA==
+  dependencies:
+    css-tree "^1.1.2"
+
+cssom@^0.4.4:
+  version "0.4.4"
+  resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.4.4.tgz#5a66cf93d2d0b661d80bf6a44fb65f5c2e4e0a10"
+  integrity sha512-p3pvU7r1MyyqbTk+WbNJIgJjG2VmTIaB10rI93LzVPrmDJKkzKYMtxxyAvQXR/NS6otuzveI7+7BBq3SjBS2mw==
+
+cssom@~0.3.6:
+  version "0.3.8"
+  resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.3.8.tgz#9f1276f5b2b463f2114d3f2c75250af8c1a36f4a"
+  integrity sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==
+
+cssstyle@^2.2.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/cssstyle/-/cssstyle-2.3.0.tgz#ff665a0ddbdc31864b09647f34163443d90b0852"
+  integrity sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A==
+  dependencies:
+    cssom "~0.3.6"
+
+csstype@^3.0.2, csstype@^3.0.6:
+  version "3.0.7"
+  resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.0.7.tgz#2a5fb75e1015e84dd15692f71e89a1450290950b"
+  integrity sha512-KxnUB0ZMlnUWCsx2Z8MUsr6qV6ja1w9ArPErJaJaF8a5SOWoHLIszeCTKGRGRgtLgYrs1E8CHkNSP1VZTTPc9g==
+
+cyclist@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-1.0.1.tgz#596e9698fd0c80e12038c2b82d6eb1b35b6224d9"
+  integrity sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk=
+
+d@1, d@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
+  integrity sha512-m62ShEObQ39CfralilEQRjH6oAMtNCV1xJyEx5LpRYUVN+EviphDgUc/F3hnYbADmkiNs67Y+3ylmlG7Lnu+FA==
+  dependencies:
+    es5-ext "^0.10.50"
+    type "^1.0.1"
+
+damerau-levenshtein@^1.0.6:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/damerau-levenshtein/-/damerau-levenshtein-1.0.6.tgz#143c1641cb3d85c60c32329e26899adea8701791"
+  integrity sha512-JVrozIeElnj3QzfUIt8tB8YMluBJom4Vw9qTPpjGYQ9fYlB3D/rb6OordUxf3xeFB35LKWs0xqcO5U6ySvBtug==
+
+dashdash@^1.12.0:
+  version "1.14.1"
+  resolved "https://registry.yarnpkg.com/dashdash/-/dashdash-1.14.1.tgz#853cfa0f7cbe2fed5de20326b8dd581035f6e2f0"
+  integrity sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=
+  dependencies:
+    assert-plus "^1.0.0"
+
+data-urls@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/data-urls/-/data-urls-2.0.0.tgz#156485a72963a970f5d5821aaf642bef2bf2db9b"
+  integrity sha512-X5eWTSXO/BJmpdIKCRuKUgSCgAN0OwliVK3yPKbwIWU1Tdw5BRajxlzMidvh+gwko9AfQ9zIj52pzF91Q3YAvQ==
+  dependencies:
+    abab "^2.0.3"
+    whatwg-mimetype "^2.3.0"
+    whatwg-url "^8.0.0"
+
+debug@2.6.9, debug@^2.2.0, debug@^2.3.3, debug@^2.6.0, debug@^2.6.9:
+  version "2.6.9"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
+  integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
+  dependencies:
+    ms "2.0.0"
+
+debug@^3.1.1, debug@^3.2.6:
+  version "3.2.7"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
+  integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
+  dependencies:
+    ms "^2.1.1"
+
+debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
+  version "4.3.1"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
+  integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
+  dependencies:
+    ms "2.1.2"
+
+decamelize@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
+  integrity sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=
+
+decimal.js@^10.2.0:
+  version "10.2.1"
+  resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.2.1.tgz#238ae7b0f0c793d3e3cea410108b35a2c01426a3"
+  integrity sha512-KaL7+6Fw6i5A2XSnsbhm/6B+NuEA7TZ4vqxnd5tXz9sbKtrN9Srj8ab4vKVdK8YAqZO9P1kg45Y6YLoduPf+kw==
+
+decode-uri-component@^0.2.0:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.0.tgz#eb3913333458775cb84cd1a1fae062106bb87545"
+  integrity sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=
+
+dedent@^0.7.0:
+  version "0.7.0"
+  resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c"
+  integrity sha1-JJXduvbrh0q7Dhvp3yLS5aVEMmw=
+
+deep-equal@^1.0.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/deep-equal/-/deep-equal-1.1.1.tgz#b5c98c942ceffaf7cb051e24e1434a25a2e6076a"
+  integrity sha512-yd9c5AdiqVcR+JjcwUQb9DkhJc8ngNr0MahEBGvDiJw8puWab2yZlh+nkasOnZP+EGTAP6rRp2JzJhJZzvNF8g==
+  dependencies:
+    is-arguments "^1.0.4"
+    is-date-object "^1.0.1"
+    is-regex "^1.0.4"
+    object-is "^1.0.1"
+    object-keys "^1.1.1"
+    regexp.prototype.flags "^1.2.0"
+
+deep-is@^0.1.3, deep-is@~0.1.3:
+  version "0.1.3"
+  resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
+  integrity sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=
+
+deepmerge@^4.2.2:
+  version "4.2.2"
+  resolved "https://registry.yarnpkg.com/deepmerge/-/deepmerge-4.2.2.tgz#44d2ea3679b8f4d4ffba33f03d865fc1e7bf4955"
+  integrity sha512-FJ3UgI4gIl+PHZm53knsuSFpE+nESMr7M4v9QcgB7S63Kj/6WqMiFQJpBBYz1Pt+66bZpP3Q7Lye0Oo9MPKEdg==
+
+default-gateway@^4.2.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/default-gateway/-/default-gateway-4.2.0.tgz#167104c7500c2115f6dd69b0a536bb8ed720552b"
+  integrity sha512-h6sMrVB1VMWVrW13mSc6ia/DwYYw5MN6+exNu1OaJeFac5aSAvwM7lZ0NVfTABuSkQelr4h5oebg3KB1XPdjgA==
+  dependencies:
+    execa "^1.0.0"
+    ip-regex "^2.1.0"
+
+define-properties@^1.1.2, define-properties@^1.1.3:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.3.tgz#cf88da6cbee26fe6db7094f61d870cbd84cee9f1"
+  integrity sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==
+  dependencies:
+    object-keys "^1.0.12"
+
+define-property@^0.2.5:
+  version "0.2.5"
+  resolved "https://registry.yarnpkg.com/define-property/-/define-property-0.2.5.tgz#c35b1ef918ec3c990f9a5bc57be04aacec5c8116"
+  integrity sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=
+  dependencies:
+    is-descriptor "^0.1.0"
+
+define-property@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/define-property/-/define-property-1.0.0.tgz#769ebaaf3f4a63aad3af9e8d304c9bbe79bfb0e6"
+  integrity sha1-dp66rz9KY6rTr56NMEybvnm/sOY=
+  dependencies:
+    is-descriptor "^1.0.0"
+
+define-property@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/define-property/-/define-property-2.0.2.tgz#d459689e8d654ba77e02a817f8710d702cb16e9d"
+  integrity sha512-jwK2UV4cnPpbcG7+VRARKTZPUWowwXA8bzH5NP6ud0oeAxyYPuGZUAC7hMugpCdz4BeSZl2Dl9k66CHJ/46ZYQ==
+  dependencies:
+    is-descriptor "^1.0.2"
+    isobject "^3.0.1"
+
+del@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/del/-/del-4.1.1.tgz#9e8f117222ea44a31ff3a156c049b99052a9f0b4"
+  integrity sha512-QwGuEUouP2kVwQenAsOof5Fv8K9t3D8Ca8NxcXKrIpEHjTXK5J2nXLdP+ALI1cgv8wj7KuwBhTwBkOZSJKM5XQ==
+  dependencies:
+    "@types/glob" "^7.1.1"
+    globby "^6.1.0"
+    is-path-cwd "^2.0.0"
+    is-path-in-cwd "^2.0.0"
+    p-map "^2.0.0"
+    pify "^4.0.1"
+    rimraf "^2.6.3"
+
+delayed-stream@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
+  integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk=
+
+depd@~1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9"
+  integrity sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=
+
+dequal@2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/dequal/-/dequal-2.0.2.tgz#85ca22025e3a87e65ef75a7a437b35284a7e319d"
+  integrity sha512-q9K8BlJVxK7hQYqa6XISGmBZbtQQWVXSrRrWreHC94rMt1QL/Impruc+7p2CYSYuVIUr+YCt6hjrs1kkdJRTug==
+
+des.js@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/des.js/-/des.js-1.0.1.tgz#5382142e1bdc53f85d86d53e5f4aa7deb91e0843"
+  integrity sha512-Q0I4pfFrv2VPd34/vfLrFOoRmlYj3OV50i7fskps1jZWK1kApMWWT9G6RRUeYedLcBDIhnSDaUvJMb3AhUlaEA==
+  dependencies:
+    inherits "^2.0.1"
+    minimalistic-assert "^1.0.0"
+
+destroy@~1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/destroy/-/destroy-1.0.4.tgz#978857442c44749e4206613e37946205826abd80"
+  integrity sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=
+
+detect-newline@^3.0.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
+  integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
+
+detect-node-es@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/detect-node-es/-/detect-node-es-1.0.0.tgz#c0318b9e539a5256ca780dd9575c9345af05b8ed"
+  integrity sha512-S4AHriUkTX9FoFvL4G8hXDcx6t3gp2HpfCza3Q0v6S78gul2hKWifLQbeW+ZF89+hSm2ZIc/uF3J97ZgytgTRg==
+
+detect-node@^2.0.4:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.0.4.tgz#014ee8f8f669c5c58023da64b8179c083a28c46c"
+  integrity sha512-ZIzRpLJrOj7jjP2miAtgqIfmzbxa4ZOr5jJc601zklsfEx9oTzmmj2nVpIPRpNlRTIh8lc1kyViIY7BWSGNmKw==
+
+detect-port-alt@1.1.6:
+  version "1.1.6"
+  resolved "https://registry.yarnpkg.com/detect-port-alt/-/detect-port-alt-1.1.6.tgz#24707deabe932d4a3cf621302027c2b266568275"
+  integrity sha512-5tQykt+LqfJFBEYaDITx7S7cR7mJ/zQmLXZ2qt5w04ainYZw6tBf9dBunMjVeVOdYVRUzUOE4HkY5J7+uttb5Q==
+  dependencies:
+    address "^1.0.1"
+    debug "^2.6.0"
+
+diff-sequences@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.6.2.tgz#48ba99157de1923412eed41db6b6d4aa9ca7c0b1"
+  integrity sha512-Mv/TDa3nZ9sbc5soK+OoA74BsS3mL37yixCvUAQkiuA4Wz6YtwP/K47n2rv2ovzHZvoiQeA5FTQOschKkEwB0Q==
+
+diffie-hellman@^5.0.0:
+  version "5.0.3"
+  resolved "https://registry.yarnpkg.com/diffie-hellman/-/diffie-hellman-5.0.3.tgz#40e8ee98f55a2149607146921c63e1ae5f3d2875"
+  integrity sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==
+  dependencies:
+    bn.js "^4.1.0"
+    miller-rabin "^4.0.0"
+    randombytes "^2.0.0"
+
+dir-glob@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f"
+  integrity sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==
+  dependencies:
+    path-type "^4.0.0"
+
+dns-equal@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/dns-equal/-/dns-equal-1.0.0.tgz#b39e7f1da6eb0a75ba9c17324b34753c47e0654d"
+  integrity sha1-s55/HabrCnW6nBcySzR1PEfgZU0=
+
+dns-packet@^1.3.1:
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.1.tgz#12aa426981075be500b910eedcd0b47dd7deda5a"
+  integrity sha512-0UxfQkMhYAUaZI+xrNZOz/as5KgDU0M/fQ9b6SpkyLbk3GEswDi6PADJVaYJradtRVsRIlF1zLyOodbcTCDzUg==
+  dependencies:
+    ip "^1.1.0"
+    safe-buffer "^5.0.1"
+
+dns-txt@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/dns-txt/-/dns-txt-2.0.2.tgz#b91d806f5d27188e4ab3e7d107d881a1cc4642b6"
+  integrity sha1-uR2Ab10nGI5Ks+fRB9iBocxGQrY=
+  dependencies:
+    buffer-indexof "^1.0.0"
+
+doctrine@1.5.0:
+  version "1.5.0"
+  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-1.5.0.tgz#379dce730f6166f76cefa4e6707a159b02c5a6fa"
+  integrity sha1-N53Ocw9hZvds76TmcHoVmwLFpvo=
+  dependencies:
+    esutils "^2.0.2"
+    isarray "^1.0.0"
+
+doctrine@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-2.1.0.tgz#5cd01fc101621b42c4cd7f5d1a66243716d3f39d"
+  integrity sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==
+  dependencies:
+    esutils "^2.0.2"
+
+doctrine@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961"
+  integrity sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==
+  dependencies:
+    esutils "^2.0.2"
+
+dom-accessibility-api@^0.5.4:
+  version "0.5.4"
+  resolved "https://registry.yarnpkg.com/dom-accessibility-api/-/dom-accessibility-api-0.5.4.tgz#b06d059cdd4a4ad9a79275f9d414a5c126241166"
+  integrity sha512-TvrjBckDy2c6v6RLxPv5QXOnU+SmF9nBII5621Ve5fu6Z/BDrENurBEvlC1f44lKEUVqOpK4w9E5Idc5/EgkLQ==
+
+dom-converter@^0.2:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/dom-converter/-/dom-converter-0.2.0.tgz#6721a9daee2e293682955b6afe416771627bb768"
+  integrity sha512-gd3ypIPfOMr9h5jIKq8E3sHOTCjeirnl0WK5ZdS1AW0Odt0b1PaWaHdJ4Qk4klv+YB9aJBS7mESXjFoDQPu6DA==
+  dependencies:
+    utila "~0.4"
+
+dom-serializer@0:
+  version "0.2.2"
+  resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.2.2.tgz#1afb81f533717175d478655debc5e332d9f9bb51"
+  integrity sha512-2/xPb3ORsQ42nHYiSunXkDjPLBaEj/xTwUO4B7XCZQTRk7EBtTOPaygh10YAAh2OI1Qrp6NWfpAhzswj0ydt9g==
+  dependencies:
+    domelementtype "^2.0.1"
+    entities "^2.0.0"
+
+domain-browser@^1.1.1:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
+  integrity sha512-jnjyiM6eRyZl2H+W8Q/zLMA481hzi0eszAaBUzIVnmYVDBbnLxVNnfu1HgEBvCbL+71FrxMl3E6lpKH7Ge3OXA==
+
+domelementtype@1, domelementtype@^1.3.1:
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f"
+  integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==
+
+domelementtype@^2.0.1:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.1.0.tgz#a851c080a6d1c3d94344aed151d99f669edf585e"
+  integrity sha512-LsTgx/L5VpD+Q8lmsXSHW2WpA+eBlZ9HPf3erD1IoPF00/3JKHZ3BknUVA2QGDNu69ZNmyFmCWBSO45XjYKC5w==
+
+domexception@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/domexception/-/domexception-2.0.1.tgz#fb44aefba793e1574b0af6aed2801d057529f304"
+  integrity sha512-yxJ2mFy/sibVQlu5qHjOkf9J3K6zgmCxgJ94u2EdvDOV09H+32LtRswEcUsmUWN72pVLOEnTSRaIVVzVQgS0dg==
+  dependencies:
+    webidl-conversions "^5.0.0"
+
+domhandler@^2.3.0:
+  version "2.4.2"
+  resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-2.4.2.tgz#8805097e933d65e85546f726d60f5eb88b44f803"
+  integrity sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==
+  dependencies:
+    domelementtype "1"
+
+domutils@^1.5.1, domutils@^1.7.0:
+  version "1.7.0"
+  resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a"
+  integrity sha512-Lgd2XcJ/NjEw+7tFvfKxOzCYKZsdct5lczQ2ZaQY8Djz7pfAD3Gbp8ySJWtreII/vDlMVmxwa6pHmdxIYgttDg==
+  dependencies:
+    dom-serializer "0"
+    domelementtype "1"
+
+dot-case@^3.0.4:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751"
+  integrity sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w==
+  dependencies:
+    no-case "^3.0.4"
+    tslib "^2.0.3"
+
+dot-prop@^5.2.0:
+  version "5.3.0"
+  resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-5.3.0.tgz#90ccce708cd9cd82cc4dc8c3ddd9abdd55b20e88"
+  integrity sha512-QM8q3zDe58hqUqjraQOmzZ1LIH9SWQJTlEKCH4kJ2oQvLZk7RbQXvtDM2XEq3fwkV9CCvvH4LA0AV+ogFsBM2Q==
+  dependencies:
+    is-obj "^2.0.0"
+
+dotenv-expand@5.1.0:
+  version "5.1.0"
+  resolved "https://registry.yarnpkg.com/dotenv-expand/-/dotenv-expand-5.1.0.tgz#3fbaf020bfd794884072ea26b1e9791d45a629f0"
+  integrity sha512-YXQl1DSa4/PQyRfgrv6aoNjhasp/p4qs9FjJ4q4cQk+8m4r6k4ZSiEyytKG8f8W9gi8WsQtIObNmKd+tMzNTmA==
+
+dotenv@8.2.0:
+  version "8.2.0"
+  resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.2.0.tgz#97e619259ada750eea3e4ea3e26bceea5424b16a"
+  integrity sha512-8sJ78ElpbDJBHNeBzUbUVLsqKdccaa/BXF1uPTw3GrvQTBgrQrtObr2mUrE38vzYd8cEv+m/JBfDLioYcfXoaw==
+
+duplexer@^0.1.1:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/duplexer/-/duplexer-0.1.2.tgz#3abe43aef3835f8ae077d136ddce0f276b0400e6"
+  integrity sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==
+
+duplexify@^3.4.2, duplexify@^3.6.0:
+  version "3.7.1"
+  resolved "https://registry.yarnpkg.com/duplexify/-/duplexify-3.7.1.tgz#2a4df5317f6ccfd91f86d6fd25d8d8a103b88309"
+  integrity sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==
+  dependencies:
+    end-of-stream "^1.0.0"
+    inherits "^2.0.1"
+    readable-stream "^2.0.0"
+    stream-shift "^1.0.0"
+
+ecc-jsbn@~0.1.1:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz#3a83a904e54353287874c564b7549386849a98c9"
+  integrity sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=
+  dependencies:
+    jsbn "~0.1.0"
+    safer-buffer "^2.1.0"
+
+ee-first@1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
+  integrity sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=
+
+ejs@^2.6.1:
+  version "2.7.4"
+  resolved "https://registry.yarnpkg.com/ejs/-/ejs-2.7.4.tgz#48661287573dcc53e366c7a1ae52c3a120eec9ba"
+  integrity sha512-7vmuyh5+kuUyJKePhQfRQBhXV5Ce+RnaeeQArKu1EAMpL3WbgMt5WG6uQZpEVvYSSsxMXRKOewtDk9RaTKXRlA==
+
+electron-to-chromium@^1.3.564, electron-to-chromium@^1.3.649:
+  version "1.3.671"
+  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.671.tgz#8feaed6eae42d279fa4611f58c42a5a1eb81b2a0"
+  integrity sha512-RTD97QkdrJKaKwRv9h/wGAaoR2lGxNXEcBXS31vjitgTPwTWAbLdS7cEsBK68eEQy7p6YyT8D5BxBEYHu2SuwQ==
+
+elliptic@^6.5.3:
+  version "6.5.4"
+  resolved "https://registry.yarnpkg.com/elliptic/-/elliptic-6.5.4.tgz#da37cebd31e79a1367e941b592ed1fbebd58abbb"
+  integrity sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==
+  dependencies:
+    bn.js "^4.11.9"
+    brorand "^1.1.0"
+    hash.js "^1.0.0"
+    hmac-drbg "^1.0.1"
+    inherits "^2.0.4"
+    minimalistic-assert "^1.0.1"
+    minimalistic-crypto-utils "^1.0.1"
+
+emittery@^0.7.1:
+  version "0.7.2"
+  resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.7.2.tgz#25595908e13af0f5674ab419396e2fb394cdfa82"
+  integrity sha512-A8OG5SR/ij3SsJdWDJdkkSYUjQdCUx6APQXem0SaEePBSRg4eymGYwBkKo1Y6DU+af/Jn2dBQqDBvjnr9Vi8nQ==
+
+emoji-regex@^7.0.1:
+  version "7.0.3"
+  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-7.0.3.tgz#933a04052860c85e83c122479c4748a8e4c72156"
+  integrity sha512-CwBLREIQ7LvYFB0WyRvwhq5N5qPhc6PMjD6bYggFlI5YyDgl+0vxq5VHbMOFqLg7hfWzmu8T5Z1QofhmTIhItA==
+
+emoji-regex@^8.0.0:
+  version "8.0.0"
+  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
+  integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
+
+emoji-regex@^9.0.0:
+  version "9.2.1"
+  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.1.tgz#c9b25604256bb3428964bead3ab63069d736f7ee"
+  integrity sha512-117l1H6U4X3Krn+MrzYrL57d5H7siRHWraBs7s+LjRuFK7Fe7hJqnJ0skWlinqsycVLU5YAo6L8CsEYQ0V5prg==
+
+emojis-list@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-2.1.0.tgz#4daa4d9db00f9819880c79fa457ae5b09a1fd389"
+  integrity sha1-TapNnbAPmBmIDHn6RXrlsJof04k=
+
+emojis-list@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-3.0.0.tgz#5570662046ad29e2e916e71aae260abdff4f6a78"
+  integrity sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==
+
+encodeurl@~1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59"
+  integrity sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=
+
+end-of-stream@^1.0.0, end-of-stream@^1.1.0:
+  version "1.4.4"
+  resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0"
+  integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==
+  dependencies:
+    once "^1.4.0"
+
+enhanced-resolve@^4.3.0:
+  version "4.5.0"
+  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-4.5.0.tgz#2f3cfd84dbe3b487f18f2db2ef1e064a571ca5ec"
+  integrity sha512-Nv9m36S/vxpsI+Hc4/ZGRs0n9mXqSWGGq49zxb/cJfPAQMbUtttJAlNPS4AQzaBdw/pKskw5bMbekT/Y7W/Wlg==
+  dependencies:
+    graceful-fs "^4.1.2"
+    memory-fs "^0.5.0"
+    tapable "^1.0.0"
+
+enquirer@^2.3.5:
+  version "2.3.6"
+  resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d"
+  integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==
+  dependencies:
+    ansi-colors "^4.1.1"
+
+entities@^1.1.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56"
+  integrity sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==
+
+entities@^2.0.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
+  integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
+
+errno@^0.1.3, errno@~0.1.7:
+  version "0.1.8"
+  resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.8.tgz#8bb3e9c7d463be4976ff888f76b4809ebc2e811f"
+  integrity sha512-dJ6oBr5SQ1VSd9qkk7ByRgb/1SH4JZjCHSW/mr63/QcXO9zLVxvJ6Oy13nio03rxpSnVDDjFor75SjVeZWPW/A==
+  dependencies:
+    prr "~1.0.1"
+
+error-ex@^1.2.0, error-ex@^1.3.1:
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf"
+  integrity sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==
+  dependencies:
+    is-arrayish "^0.2.1"
+
+error-stack-parser@^2.0.6:
+  version "2.0.6"
+  resolved "https://registry.yarnpkg.com/error-stack-parser/-/error-stack-parser-2.0.6.tgz#5a99a707bd7a4c58a797902d48d82803ede6aad8"
+  integrity sha512-d51brTeqC+BHlwF0BhPtcYgF5nlzf9ZZ0ZIUQNZpc9ZB9qw5IJ2diTrBY9jlCJkTLITYPjmiX6OWCwH+fuyNgQ==
+  dependencies:
+    stackframe "^1.1.1"
+
+es-abstract@^1.17.2:
+  version "1.17.7"
+  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.17.7.tgz#a4de61b2f66989fc7421676c1cb9787573ace54c"
+  integrity sha512-VBl/gnfcJ7OercKA9MVaegWsBHFjV492syMudcnQZvt/Dw8ezpcOHYZXa/J96O8vx+g4x65YKhxOwDUh63aS5g==
+  dependencies:
+    es-to-primitive "^1.2.1"
+    function-bind "^1.1.1"
+    has "^1.0.3"
+    has-symbols "^1.0.1"
+    is-callable "^1.2.2"
+    is-regex "^1.1.1"
+    object-inspect "^1.8.0"
+    object-keys "^1.1.1"
+    object.assign "^4.1.1"
+    string.prototype.trimend "^1.0.1"
+    string.prototype.trimstart "^1.0.1"
+
+es-abstract@^1.18.0-next.1, es-abstract@^1.18.0-next.2:
+  version "1.18.0-next.2"
+  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.0-next.2.tgz#088101a55f0541f595e7e057199e27ddc8f3a5c2"
+  integrity sha512-Ih4ZMFHEtZupnUh6497zEL4y2+w8+1ljnCyaTa+adcoafI1GOvMwFlDjBLfWR7y9VLfrjRJe9ocuHY1PSR9jjw==
+  dependencies:
+    call-bind "^1.0.2"
+    es-to-primitive "^1.2.1"
+    function-bind "^1.1.1"
+    get-intrinsic "^1.0.2"
+    has "^1.0.3"
+    has-symbols "^1.0.1"
+    is-callable "^1.2.2"
+    is-negative-zero "^2.0.1"
+    is-regex "^1.1.1"
+    object-inspect "^1.9.0"
+    object-keys "^1.1.1"
+    object.assign "^4.1.2"
+    string.prototype.trimend "^1.0.3"
+    string.prototype.trimstart "^1.0.3"
+
+es-to-primitive@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a"
+  integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==
+  dependencies:
+    is-callable "^1.1.4"
+    is-date-object "^1.0.1"
+    is-symbol "^1.0.2"
+
+es5-ext@^0.10.35, es5-ext@^0.10.50:
+  version "0.10.53"
+  resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.53.tgz#93c5a3acfdbef275220ad72644ad02ee18368de1"
+  integrity sha512-Xs2Stw6NiNHWypzRTY1MtaG/uJlwCk8kH81920ma8mvN8Xq1gsfhZvpkImLQArw8AHnv8MT2I45J3c0R8slE+Q==
+  dependencies:
+    es6-iterator "~2.0.3"
+    es6-symbol "~3.1.3"
+    next-tick "~1.0.0"
+
+es6-iterator@2.0.3, es6-iterator@~2.0.3:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/es6-iterator/-/es6-iterator-2.0.3.tgz#a7de889141a05a94b0854403b2d0a0fbfa98f3b7"
+  integrity sha1-p96IkUGgWpSwhUQDstCg+/qY87c=
+  dependencies:
+    d "1"
+    es5-ext "^0.10.35"
+    es6-symbol "^3.1.1"
+
+es6-symbol@^3.1.1, es6-symbol@~3.1.3:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.3.tgz#bad5d3c1bcdac28269f4cb331e431c78ac705d18"
+  integrity sha512-NJ6Yn3FuDinBaBRWl/q5X/s4koRHBrgKAu+yGI6JCBeiu3qrcbJhwT2GeR/EXVfylRk8dpQVJoLEFhK+Mu31NA==
+  dependencies:
+    d "^1.0.1"
+    ext "^1.1.2"
+
+escalade@^3.0.2, escalade@^3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40"
+  integrity sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==
+
+escape-html@~1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
+  integrity sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=
+
+escape-string-regexp@2.0.0, escape-string-regexp@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz#a30304e99daa32e23b2fd20f51babd07cffca344"
+  integrity sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==
+
+escape-string-regexp@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
+  integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=
+
+escape-string-regexp@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
+  integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
+
+escodegen@^1.14.1:
+  version "1.14.3"
+  resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.14.3.tgz#4e7b81fba61581dc97582ed78cab7f0e8d63f503"
+  integrity sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==
+  dependencies:
+    esprima "^4.0.1"
+    estraverse "^4.2.0"
+    esutils "^2.0.2"
+    optionator "^0.8.1"
+  optionalDependencies:
+    source-map "~0.6.1"
+
+eslint-config-react-app@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/eslint-config-react-app/-/eslint-config-react-app-6.0.0.tgz#ccff9fc8e36b322902844cbd79197982be355a0e"
+  integrity sha512-bpoAAC+YRfzq0dsTk+6v9aHm/uqnDwayNAXleMypGl6CpxI9oXXscVHo4fk3eJPIn+rsbtNetB4r/ZIidFIE8A==
+  dependencies:
+    confusing-browser-globals "^1.0.10"
+
+eslint-import-resolver-node@^0.3.4:
+  version "0.3.4"
+  resolved "https://registry.yarnpkg.com/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.4.tgz#85ffa81942c25012d8231096ddf679c03042c717"
+  integrity sha512-ogtf+5AB/O+nM6DIeBUNr2fuT7ot9Qg/1harBfBtaP13ekEWFQEEMP94BCB7zaNW3gyY+8SHYF00rnqYwXKWOA==
+  dependencies:
+    debug "^2.6.9"
+    resolve "^1.13.1"
+
+eslint-module-utils@^2.6.0:
+  version "2.6.0"
+  resolved "https://registry.yarnpkg.com/eslint-module-utils/-/eslint-module-utils-2.6.0.tgz#579ebd094f56af7797d19c9866c9c9486629bfa6"
+  integrity sha512-6j9xxegbqe8/kZY8cYpcp0xhbK0EgJlg3g9mib3/miLaExuuwc3n5UEfSnU6hWMbT0FAYVvDbL9RrRgpUeQIvA==
+  dependencies:
+    debug "^2.6.9"
+    pkg-dir "^2.0.0"
+
+eslint-plugin-flowtype@^5.2.0:
+  version "5.2.2"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-flowtype/-/eslint-plugin-flowtype-5.2.2.tgz#c6e5dd2fad4e757a1c63e652da6cff597659554f"
+  integrity sha512-C4PlPYpszr9h1cBfUbTNRI1IdxUCF0qrXAHkXS2+bESp7WUUCnvb3UBBnYlaQLvJYJ2lRz+2SPQQ/WyV7p/Tow==
+  dependencies:
+    lodash "^4.17.15"
+    string-natural-compare "^3.0.1"
+
+eslint-plugin-import@^2.22.1:
+  version "2.22.1"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-import/-/eslint-plugin-import-2.22.1.tgz#0896c7e6a0cf44109a2d97b95903c2bb689d7702"
+  integrity sha512-8K7JjINHOpH64ozkAhpT3sd+FswIZTfMZTjdx052pnWrgRCVfp8op9tbjpAk3DdUeI/Ba4C8OjdC0r90erHEOw==
+  dependencies:
+    array-includes "^3.1.1"
+    array.prototype.flat "^1.2.3"
+    contains-path "^0.1.0"
+    debug "^2.6.9"
+    doctrine "1.5.0"
+    eslint-import-resolver-node "^0.3.4"
+    eslint-module-utils "^2.6.0"
+    has "^1.0.3"
+    minimatch "^3.0.4"
+    object.values "^1.1.1"
+    read-pkg-up "^2.0.0"
+    resolve "^1.17.0"
+    tsconfig-paths "^3.9.0"
+
+eslint-plugin-jest@^24.1.0:
+  version "24.1.5"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.1.5.tgz#1e866a9f0deac587d0a3d5d7cefe99815a580de2"
+  integrity sha512-FIP3lwC8EzEG+rOs1y96cOJmMVpdFNreoDJv29B5vIupVssRi8zrSY3QadogT0K3h1Y8TMxJ6ZSAzYUmFCp2hg==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "^4.0.1"
+
+eslint-plugin-jsx-a11y@^6.3.1:
+  version "6.4.1"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.4.1.tgz#a2d84caa49756942f42f1ffab9002436391718fd"
+  integrity sha512-0rGPJBbwHoGNPU73/QCLP/vveMlM1b1Z9PponxO87jfr6tuH5ligXbDT6nHSSzBC8ovX2Z+BQu7Bk5D/Xgq9zg==
+  dependencies:
+    "@babel/runtime" "^7.11.2"
+    aria-query "^4.2.2"
+    array-includes "^3.1.1"
+    ast-types-flow "^0.0.7"
+    axe-core "^4.0.2"
+    axobject-query "^2.2.0"
+    damerau-levenshtein "^1.0.6"
+    emoji-regex "^9.0.0"
+    has "^1.0.3"
+    jsx-ast-utils "^3.1.0"
+    language-tags "^1.0.5"
+
+eslint-plugin-react-hooks@^4.2.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.2.0.tgz#8c229c268d468956334c943bb45fc860280f5556"
+  integrity sha512-623WEiZJqxR7VdxFCKLI6d6LLpwJkGPYKODnkH3D7WpOG5KM8yWueBd8TLsNAetEJNF5iJmolaAKO3F8yzyVBQ==
+
+eslint-plugin-react@^7.21.5:
+  version "7.22.0"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-react/-/eslint-plugin-react-7.22.0.tgz#3d1c542d1d3169c45421c1215d9470e341707269"
+  integrity sha512-p30tuX3VS+NWv9nQot9xIGAHBXR0+xJVaZriEsHoJrASGCJZDJ8JLNM0YqKqI0AKm6Uxaa1VUHoNEibxRCMQHA==
+  dependencies:
+    array-includes "^3.1.1"
+    array.prototype.flatmap "^1.2.3"
+    doctrine "^2.1.0"
+    has "^1.0.3"
+    jsx-ast-utils "^2.4.1 || ^3.0.0"
+    object.entries "^1.1.2"
+    object.fromentries "^2.0.2"
+    object.values "^1.1.1"
+    prop-types "^15.7.2"
+    resolve "^1.18.1"
+    string.prototype.matchall "^4.0.2"
+
+eslint-plugin-testing-library@^3.9.2:
+  version "3.10.1"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-testing-library/-/eslint-plugin-testing-library-3.10.1.tgz#4dd02306d601c3238fdabf1d1dbc5f2a8e85d531"
+  integrity sha512-nQIFe2muIFv2oR2zIuXE4vTbcFNx8hZKRzgHZqJg8rfopIWwoTwtlbCCNELT/jXzVe1uZF68ALGYoDXjLczKiQ==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "^3.10.1"
+
+eslint-scope@^4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-4.0.3.tgz#ca03833310f6889a3264781aa82e63eb9cfe7848"
+  integrity sha512-p7VutNr1O/QrxysMo3E45FjYDTeXBy0iTltPFNSqKAIfjDSXC+4dj+qfyuD8bfAXrW/y6lW3O76VaYNPKfpKrg==
+  dependencies:
+    esrecurse "^4.1.0"
+    estraverse "^4.1.1"
+
+eslint-scope@^5.0.0, eslint-scope@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
+  integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==
+  dependencies:
+    esrecurse "^4.3.0"
+    estraverse "^4.1.1"
+
+eslint-utils@^2.0.0, eslint-utils@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-2.1.0.tgz#d2de5e03424e707dc10c74068ddedae708741b27"
+  integrity sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==
+  dependencies:
+    eslint-visitor-keys "^1.1.0"
+
+eslint-visitor-keys@^1.0.0, eslint-visitor-keys@^1.1.0, eslint-visitor-keys@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz#30ebd1ef7c2fdff01c3a4f151044af25fab0523e"
+  integrity sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==
+
+eslint-visitor-keys@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.0.0.tgz#21fdc8fbcd9c795cc0321f0563702095751511a8"
+  integrity sha512-QudtT6av5WXels9WjIM7qz1XD1cWGvX4gGXvp/zBn9nXG02D0utdU3Em2m/QjTnrsk6bBjmCygl3rmj118msQQ==
+
+eslint-webpack-plugin@^2.5.2:
+  version "2.5.2"
+  resolved "https://registry.yarnpkg.com/eslint-webpack-plugin/-/eslint-webpack-plugin-2.5.2.tgz#4ee17577d6392bf72048080a1678d6237183db81"
+  integrity sha512-ndD9chZ/kaGnjjx7taRg7c6FK/YKb29SSYzaLtPBIYLYJQmZtuKqtQbAvTS2ymiMQT6X0VW9vZIHK0KLstv93Q==
+  dependencies:
+    "@types/eslint" "^7.2.6"
+    arrify "^2.0.1"
+    jest-worker "^26.6.2"
+    micromatch "^4.0.2"
+    schema-utils "^3.0.0"
+
+eslint@^7.11.0:
+  version "7.20.0"
+  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.20.0.tgz#db07c4ca4eda2e2316e7aa57ac7fc91ec550bdc7"
+  integrity sha512-qGi0CTcOGP2OtCQBgWZlQjcTuP0XkIpYFj25XtRTQSHC+umNnp7UMshr2G8SLsRFYDdAPFeHOsiteadmMH02Yw==
+  dependencies:
+    "@babel/code-frame" "7.12.11"
+    "@eslint/eslintrc" "^0.3.0"
+    ajv "^6.10.0"
+    chalk "^4.0.0"
+    cross-spawn "^7.0.2"
+    debug "^4.0.1"
+    doctrine "^3.0.0"
+    enquirer "^2.3.5"
+    eslint-scope "^5.1.1"
+    eslint-utils "^2.1.0"
+    eslint-visitor-keys "^2.0.0"
+    espree "^7.3.1"
+    esquery "^1.4.0"
+    esutils "^2.0.2"
+    file-entry-cache "^6.0.0"
+    functional-red-black-tree "^1.0.1"
+    glob-parent "^5.0.0"
+    globals "^12.1.0"
+    ignore "^4.0.6"
+    import-fresh "^3.0.0"
+    imurmurhash "^0.1.4"
+    is-glob "^4.0.0"
+    js-yaml "^3.13.1"
+    json-stable-stringify-without-jsonify "^1.0.1"
+    levn "^0.4.1"
+    lodash "^4.17.20"
+    minimatch "^3.0.4"
+    natural-compare "^1.4.0"
+    optionator "^0.9.1"
+    progress "^2.0.0"
+    regexpp "^3.1.0"
+    semver "^7.2.1"
+    strip-ansi "^6.0.0"
+    strip-json-comments "^3.1.0"
+    table "^6.0.4"
+    text-table "^0.2.0"
+    v8-compile-cache "^2.0.3"
+
+espree@^7.3.0, espree@^7.3.1:
+  version "7.3.1"
+  resolved "https://registry.yarnpkg.com/espree/-/espree-7.3.1.tgz#f2df330b752c6f55019f8bd89b7660039c1bbbb6"
+  integrity sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==
+  dependencies:
+    acorn "^7.4.0"
+    acorn-jsx "^5.3.1"
+    eslint-visitor-keys "^1.3.0"
+
+esprima@^4.0.0, esprima@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71"
+  integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==
+
+esquery@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.4.0.tgz#2148ffc38b82e8c7057dfed48425b3e61f0f24a5"
+  integrity sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==
+  dependencies:
+    estraverse "^5.1.0"
+
+esrecurse@^4.1.0, esrecurse@^4.3.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921"
+  integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==
+  dependencies:
+    estraverse "^5.2.0"
+
+estraverse@^4.1.1, estraverse@^4.2.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d"
+  integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==
+
+estraverse@^5.1.0, estraverse@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-5.2.0.tgz#307df42547e6cc7324d3cf03c155d5cdb8c53880"
+  integrity sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==
+
+estree-walker@^0.6.1:
+  version "0.6.1"
+  resolved "https://registry.yarnpkg.com/estree-walker/-/estree-walker-0.6.1.tgz#53049143f40c6eb918b23671d1fe3219f3a1b362"
+  integrity sha512-SqmZANLWS0mnatqbSfRP5g8OXZC12Fgg1IwNtLsyHDzJizORW4khDfjPqJZsemPWBB2uqykUah5YpQ6epsqC/w==
+
+estree-walker@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/estree-walker/-/estree-walker-1.0.1.tgz#31bc5d612c96b704106b477e6dd5d8aa138cb700"
+  integrity sha512-1fMXF3YP4pZZVozF8j/ZLfvnR8NSIljt56UhbZ5PeeDmmGHpgpdwQt7ITlGvYaQukCvuBRMLEiKiYC+oeIg4cg==
+
+esutils@^2.0.2:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64"
+  integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==
+
+etag@~1.8.1:
+  version "1.8.1"
+  resolved "https://registry.yarnpkg.com/etag/-/etag-1.8.1.tgz#41ae2eeb65efa62268aebfea83ac7d79299b0887"
+  integrity sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=
+
+eventemitter3@^4.0.0:
+  version "4.0.7"
+  resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
+  integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
+
+events@^3.0.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/events/-/events-3.2.0.tgz#93b87c18f8efcd4202a461aec4dfc0556b639379"
+  integrity sha512-/46HWwbfCX2xTawVfkKLGxMifJYQBWMwY1mjywRtb4c9x8l5NP3KoJtnIOiL1hfdRkIuYhETxQlo62IF8tcnlg==
+
+eventsource@^1.0.7:
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-1.0.7.tgz#8fbc72c93fcd34088090bc0a4e64f4b5cee6d8d0"
+  integrity sha512-4Ln17+vVT0k8aWq+t/bF5arcS3EpT9gYtW66EPacdj/mAFevznsnyoHLPy2BA8gbIQeIHoPsvwmfBftfcG//BQ==
+  dependencies:
+    original "^1.0.0"
+
+evp_bytestokey@^1.0.0, evp_bytestokey@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz#7fcbdb198dc71959432efe13842684e0525acb02"
+  integrity sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==
+  dependencies:
+    md5.js "^1.3.4"
+    safe-buffer "^5.1.1"
+
+exec-sh@^0.3.2:
+  version "0.3.4"
+  resolved "https://registry.yarnpkg.com/exec-sh/-/exec-sh-0.3.4.tgz#3a018ceb526cc6f6df2bb504b2bfe8e3a4934ec5"
+  integrity sha512-sEFIkc61v75sWeOe72qyrqg2Qg0OuLESziUDk/O/z2qgS15y2gWVFrI6f2Qn/qw/0/NCfCEsmNA4zOjkwEZT1A==
+
+execa@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8"
+  integrity sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==
+  dependencies:
+    cross-spawn "^6.0.0"
+    get-stream "^4.0.0"
+    is-stream "^1.1.0"
+    npm-run-path "^2.0.0"
+    p-finally "^1.0.0"
+    signal-exit "^3.0.0"
+    strip-eof "^1.0.0"
+
+execa@^4.0.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a"
+  integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA==
+  dependencies:
+    cross-spawn "^7.0.0"
+    get-stream "^5.0.0"
+    human-signals "^1.1.1"
+    is-stream "^2.0.0"
+    merge-stream "^2.0.0"
+    npm-run-path "^4.0.0"
+    onetime "^5.1.0"
+    signal-exit "^3.0.2"
+    strip-final-newline "^2.0.0"
+
+exit@^0.1.2:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c"
+  integrity sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=
+
+expand-brackets@^2.1.4:
+  version "2.1.4"
+  resolved "https://registry.yarnpkg.com/expand-brackets/-/expand-brackets-2.1.4.tgz#b77735e315ce30f6b6eff0f83b04151a22449622"
+  integrity sha1-t3c14xXOMPa27/D4OwQVGiJEliI=
+  dependencies:
+    debug "^2.3.3"
+    define-property "^0.2.5"
+    extend-shallow "^2.0.1"
+    posix-character-classes "^0.1.0"
+    regex-not "^1.0.0"
+    snapdragon "^0.8.1"
+    to-regex "^3.0.1"
+
+expect@^26.6.0, expect@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/expect/-/expect-26.6.2.tgz#c6b996bf26bf3fe18b67b2d0f51fc981ba934417"
+  integrity sha512-9/hlOBkQl2l/PLHJx6JjoDF6xPKcJEsUlWKb23rKE7KzeDqUZKXKNMW27KIue5JMdBV9HgmoJPcc8HtO85t9IA==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    ansi-styles "^4.0.0"
+    jest-get-type "^26.3.0"
+    jest-matcher-utils "^26.6.2"
+    jest-message-util "^26.6.2"
+    jest-regex-util "^26.0.0"
+
+express@^4.17.1:
+  version "4.17.1"
+  resolved "https://registry.yarnpkg.com/express/-/express-4.17.1.tgz#4491fc38605cf51f8629d39c2b5d026f98a4c134"
+  integrity sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==
+  dependencies:
+    accepts "~1.3.7"
+    array-flatten "1.1.1"
+    body-parser "1.19.0"
+    content-disposition "0.5.3"
+    content-type "~1.0.4"
+    cookie "0.4.0"
+    cookie-signature "1.0.6"
+    debug "2.6.9"
+    depd "~1.1.2"
+    encodeurl "~1.0.2"
+    escape-html "~1.0.3"
+    etag "~1.8.1"
+    finalhandler "~1.1.2"
+    fresh "0.5.2"
+    merge-descriptors "1.0.1"
+    methods "~1.1.2"
+    on-finished "~2.3.0"
+    parseurl "~1.3.3"
+    path-to-regexp "0.1.7"
+    proxy-addr "~2.0.5"
+    qs "6.7.0"
+    range-parser "~1.2.1"
+    safe-buffer "5.1.2"
+    send "0.17.1"
+    serve-static "1.14.1"
+    setprototypeof "1.1.1"
+    statuses "~1.5.0"
+    type-is "~1.6.18"
+    utils-merge "1.0.1"
+    vary "~1.1.2"
+
+ext@^1.1.2:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/ext/-/ext-1.4.0.tgz#89ae7a07158f79d35517882904324077e4379244"
+  integrity sha512-Key5NIsUxdqKg3vIsdw9dSuXpPCQ297y6wBjL30edxwPgt2E44WcWBZey/ZvUc6sERLTxKdyCu4gZFmUbk1Q7A==
+  dependencies:
+    type "^2.0.0"
+
+extend-shallow@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-2.0.1.tgz#51af7d614ad9a9f610ea1bafbb989d6b1c56890f"
+  integrity sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=
+  dependencies:
+    is-extendable "^0.1.0"
+
+extend-shallow@^3.0.0, extend-shallow@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-3.0.2.tgz#26a71aaf073b39fb2127172746131c2704028db8"
+  integrity sha1-Jqcarwc7OfshJxcnRhMcJwQCjbg=
+  dependencies:
+    assign-symbols "^1.0.0"
+    is-extendable "^1.0.1"
+
+extend@~3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
+  integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
+
+extglob@^2.0.4:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/extglob/-/extglob-2.0.4.tgz#ad00fe4dc612a9232e8718711dc5cb5ab0285543"
+  integrity sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==
+  dependencies:
+    array-unique "^0.3.2"
+    define-property "^1.0.0"
+    expand-brackets "^2.1.4"
+    extend-shallow "^2.0.1"
+    fragment-cache "^0.2.1"
+    regex-not "^1.0.0"
+    snapdragon "^0.8.1"
+    to-regex "^3.0.1"
+
+extsprintf@1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
+  integrity sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=
+
+extsprintf@^1.2.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.4.0.tgz#e2689f8f356fad62cca65a3a91c5df5f9551692f"
+  integrity sha1-4mifjzVvrWLMplo6kcXfX5VRaS8=
+
+fast-deep-equal@^3.1.1:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525"
+  integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==
+
+fast-glob@^3.1.1:
+  version "3.2.5"
+  resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.5.tgz#7939af2a656de79a4f1901903ee8adcaa7cb9661"
+  integrity sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==
+  dependencies:
+    "@nodelib/fs.stat" "^2.0.2"
+    "@nodelib/fs.walk" "^1.2.3"
+    glob-parent "^5.1.0"
+    merge2 "^1.3.0"
+    micromatch "^4.0.2"
+    picomatch "^2.2.1"
+
+fast-json-stable-stringify@^2.0.0, fast-json-stable-stringify@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz#874bf69c6f404c2b5d99c481341399fd55892633"
+  integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==
+
+fast-levenshtein@^2.0.6, fast-levenshtein@~2.0.6:
+  version "2.0.6"
+  resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
+  integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
+
+fastq@^1.6.0:
+  version "1.10.1"
+  resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.10.1.tgz#8b8f2ac8bf3632d67afcd65dac248d5fdc45385e"
+  integrity sha512-AWuv6Ery3pM+dY7LYS8YIaCiQvUaos9OB1RyNgaOWnaX+Tik7Onvcsf8x8c+YtDeT0maYLniBip2hox5KtEXXA==
+  dependencies:
+    reusify "^1.0.4"
+
+faye-websocket@^0.11.3:
+  version "0.11.3"
+  resolved "https://registry.yarnpkg.com/faye-websocket/-/faye-websocket-0.11.3.tgz#5c0e9a8968e8912c286639fde977a8b209f2508e"
+  integrity sha512-D2y4bovYpzziGgbHYtGCMjlJM36vAl/y+xUyn1C+FVx8szd1E+86KwVw6XvYSzOP8iMpm1X0I4xJD+QtUb36OA==
+  dependencies:
+    websocket-driver ">=0.5.1"
+
+fb-watchman@^2.0.0:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/fb-watchman/-/fb-watchman-2.0.1.tgz#fc84fb39d2709cf3ff6d743706157bb5708a8a85"
+  integrity sha512-DkPJKQeY6kKwmuMretBhr7G6Vodr7bFwDYTXIkfG1gjvNpaxBTQV3PbXg6bR1c1UP4jPOX0jHUbbHANL9vRjVg==
+  dependencies:
+    bser "2.1.1"
+
+figgy-pudding@^3.5.1:
+  version "3.5.2"
+  resolved "https://registry.yarnpkg.com/figgy-pudding/-/figgy-pudding-3.5.2.tgz#b4eee8148abb01dcf1d1ac34367d59e12fa61d6e"
+  integrity sha512-0btnI/H8f2pavGMN8w40mlSKOfTK2SVJmBfBeVIj3kNw0swwgzyRq0d5TJVOwodFmtvpPeWPN/MCcfuWF0Ezbw==
+
+file-entry-cache@^6.0.0:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"
+  integrity sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==
+  dependencies:
+    flat-cache "^3.0.4"
+
+file-loader@6.1.1:
+  version "6.1.1"
+  resolved "https://registry.yarnpkg.com/file-loader/-/file-loader-6.1.1.tgz#a6f29dfb3f5933a1c350b2dbaa20ac5be0539baa"
+  integrity sha512-Klt8C4BjWSXYQAfhpYYkG4qHNTna4toMHEbWrI5IuVoxbU6uiDKeKAP99R8mmbJi3lvewn/jQBOgU4+NS3tDQw==
+  dependencies:
+    loader-utils "^2.0.0"
+    schema-utils "^3.0.0"
+
+file-uri-to-path@1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd"
+  integrity sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==
+
+filesize@6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/filesize/-/filesize-6.1.0.tgz#e81bdaa780e2451d714d71c0d7a4f3238d37ad00"
+  integrity sha512-LpCHtPQ3sFx67z+uh2HnSyWSLLu5Jxo21795uRDuar/EOuYWXib5EmPaGIBuSnRqH2IODiKA2k5re/K9OnN/Yg==
+
+fill-range@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-4.0.0.tgz#d544811d428f98eb06a63dc402d2403c328c38f7"
+  integrity sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=
+  dependencies:
+    extend-shallow "^2.0.1"
+    is-number "^3.0.0"
+    repeat-string "^1.6.1"
+    to-regex-range "^2.1.0"
+
+fill-range@^7.0.1:
+  version "7.0.1"
+  resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
+  integrity sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==
+  dependencies:
+    to-regex-range "^5.0.1"
+
+finalhandler@~1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.1.2.tgz#b7e7d000ffd11938d0fdb053506f6ebabe9f587d"
+  integrity sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==
+  dependencies:
+    debug "2.6.9"
+    encodeurl "~1.0.2"
+    escape-html "~1.0.3"
+    on-finished "~2.3.0"
+    parseurl "~1.3.3"
+    statuses "~1.5.0"
+    unpipe "~1.0.0"
+
+find-cache-dir@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-2.1.0.tgz#8d0f94cd13fe43c6c7c261a0d86115ca918c05f7"
+  integrity sha512-Tq6PixE0w/VMFfCgbONnkiQIVol/JJL7nRMi20fqzA4NRs9AfeqMGeRdPi3wIhYkxjeBaWh2rxwapn5Tu3IqOQ==
+  dependencies:
+    commondir "^1.0.1"
+    make-dir "^2.0.0"
+    pkg-dir "^3.0.0"
+
+find-cache-dir@^3.3.1:
+  version "3.3.1"
+  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-3.3.1.tgz#89b33fad4a4670daa94f855f7fbe31d6d84fe880"
+  integrity sha512-t2GDMt3oGC/v+BMwzmllWDuJF/xcDtE5j/fCGbqDD7OLuJkj0cfh1YSA5VKPvwMeLFLNDBkwOKZ2X85jGLVftQ==
+  dependencies:
+    commondir "^1.0.1"
+    make-dir "^3.0.2"
+    pkg-dir "^4.1.0"
+
+find-root@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/find-root/-/find-root-1.1.0.tgz#abcfc8ba76f708c42a97b3d685b7e9450bfb9ce4"
+  integrity sha512-NKfW6bec6GfKc0SGx1e07QZY9PE99u0Bft/0rzSD5k3sO/vwkVUpDUKVm5Gpp5Ue3YfShPFTX2070tDs5kB9Ng==
+
+find-up@4.1.0, find-up@^4.0.0, find-up@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19"
+  integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==
+  dependencies:
+    locate-path "^5.0.0"
+    path-exists "^4.0.0"
+
+find-up@^2.0.0, find-up@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/find-up/-/find-up-2.1.0.tgz#45d1b7e506c717ddd482775a2b77920a3c0c57a7"
+  integrity sha1-RdG35QbHF93UgndaK3eSCjwMV6c=
+  dependencies:
+    locate-path "^2.0.0"
+
+find-up@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/find-up/-/find-up-3.0.0.tgz#49169f1d7993430646da61ecc5ae355c21c97b73"
+  integrity sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==
+  dependencies:
+    locate-path "^3.0.0"
+
+flat-cache@^3.0.4:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-3.0.4.tgz#61b0338302b2fe9f957dcc32fc2a87f1c3048b11"
+  integrity sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==
+  dependencies:
+    flatted "^3.1.0"
+    rimraf "^3.0.2"
+
+flatted@^3.1.0:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.1.tgz#c4b489e80096d9df1dfc97c79871aea7c617c469"
+  integrity sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==
+
+flatten@^1.0.2:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/flatten/-/flatten-1.0.3.tgz#c1283ac9f27b368abc1e36d1ff7b04501a30356b"
+  integrity sha512-dVsPA/UwQ8+2uoFe5GHtiBMu48dWLTdsuEd7CKGlZlD78r1TTWBvDuFaFGKCo/ZfEr95Uk56vZoX86OsHkUeIg==
+
+flush-write-stream@^1.0.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
+  integrity sha512-3Z4XhFZ3992uIq0XOqb9AreonueSYphE6oYbpt5+3u06JWklbsPkNv3ZKkP9Bz/r+1MWCaMoSQ28P85+1Yc77w==
+  dependencies:
+    inherits "^2.0.3"
+    readable-stream "^2.3.6"
+
+focus-lock@^0.8.1:
+  version "0.8.1"
+  resolved "https://registry.yarnpkg.com/focus-lock/-/focus-lock-0.8.1.tgz#bb36968abf77a2063fa173cb6c47b12ac8599d33"
+  integrity sha512-/LFZOIo82WDsyyv7h7oc0MJF9ACOvDRdx9rWPZ2pgMfNWu/z8hQDBtOchuB/0BVLmuFOZjV02YwUVzNsWx/EzA==
+  dependencies:
+    tslib "^1.9.3"
+
+follow-redirects@^1.0.0:
+  version "1.13.2"
+  resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.13.2.tgz#dd73c8effc12728ba5cf4259d760ea5fb83e3147"
+  integrity sha512-6mPTgLxYm3r6Bkkg0vNM0HTjfGrOEtsfbhagQvbxDEsEkpNhw582upBaoRZylzen6krEmxXJgt9Ju6HiI4O7BA==
+
+for-in@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80"
+  integrity sha1-gQaNKVqBQuwKxybG4iAMMPttXoA=
+
+forever-agent@~0.6.1:
+  version "0.6.1"
+  resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
+  integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=
+
+fork-ts-checker-webpack-plugin@4.1.6:
+  version "4.1.6"
+  resolved "https://registry.yarnpkg.com/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-4.1.6.tgz#5055c703febcf37fa06405d400c122b905167fc5"
+  integrity sha512-DUxuQaKoqfNne8iikd14SAkh5uw4+8vNifp6gmA73yYNS6ywLIWSLD/n/mBzHQRpW3J7rbATEakmiA8JvkTyZw==
+  dependencies:
+    "@babel/code-frame" "^7.5.5"
+    chalk "^2.4.1"
+    micromatch "^3.1.10"
+    minimatch "^3.0.4"
+    semver "^5.6.0"
+    tapable "^1.0.0"
+    worker-rpc "^0.1.0"
+
+form-data@~2.3.2:
+  version "2.3.3"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6"
+  integrity sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==
+  dependencies:
+    asynckit "^0.4.0"
+    combined-stream "^1.0.6"
+    mime-types "^2.1.12"
+
+forwarded@~0.1.2:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/forwarded/-/forwarded-0.1.2.tgz#98c23dab1175657b8c0573e8ceccd91b0ff18c84"
+  integrity sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=
+
+fragment-cache@^0.2.1:
+  version "0.2.1"
+  resolved "https://registry.yarnpkg.com/fragment-cache/-/fragment-cache-0.2.1.tgz#4290fad27f13e89be7f33799c6bc5a0abfff0d19"
+  integrity sha1-QpD60n8T6Jvn8zeZxrxaCr//DRk=
+  dependencies:
+    map-cache "^0.2.2"
+
+framer-motion@^3.7.0:
+  version "3.7.0"
+  resolved "https://registry.yarnpkg.com/framer-motion/-/framer-motion-3.7.0.tgz#57f4c4899223e1a9d76092a865b1245b48806d6c"
+  integrity sha512-sEmI/1a0vG91aFV7zW9vGHJ0O7IO+V/KAUWpuGFmXYbE7WojAomRMOgz7EkeOMgSm408jewf8/KNEzWK5b2N5g==
+  dependencies:
+    framesync "^5.1.0"
+    hey-listen "^1.0.8"
+    popmotion "9.2.1"
+    style-value-types "4.0.3"
+    tslib "^1.10.0"
+  optionalDependencies:
+    "@emotion/is-prop-valid" "^0.8.2"
+
+framesync@5.1.0, framesync@^5.1.0:
+  version "5.1.0"
+  resolved "https://registry.yarnpkg.com/framesync/-/framesync-5.1.0.tgz#b22639be6e83cf170e5cb3d0497e3e50100a01ef"
+  integrity sha512-31sDH8LxSFoLKDYENzKdI+YJD4vV8sMBpwcAW0/6Es2jZBQBdlqbFnqrYczpsnzpqG+y6EqYPvgFMI2ZDdlnyQ==
+
+fresh@0.5.2:
+  version "0.5.2"
+  resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7"
+  integrity sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=
+
+from2@^2.1.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/from2/-/from2-2.3.0.tgz#8bfb5502bde4a4d36cfdeea007fcca21d7e382af"
+  integrity sha1-i/tVAr3kpNNs/e6gB/zKIdfjgq8=
+  dependencies:
+    inherits "^2.0.1"
+    readable-stream "^2.0.0"
+
+fs-extra@^7.0.0:
+  version "7.0.1"
+  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-7.0.1.tgz#4f189c44aa123b895f722804f55ea23eadc348e9"
+  integrity sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw==
+  dependencies:
+    graceful-fs "^4.1.2"
+    jsonfile "^4.0.0"
+    universalify "^0.1.0"
+
+fs-extra@^8.1.0:
+  version "8.1.0"
+  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-8.1.0.tgz#49d43c45a88cd9677668cb7be1b46efdb8d2e1c0"
+  integrity sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==
+  dependencies:
+    graceful-fs "^4.2.0"
+    jsonfile "^4.0.0"
+    universalify "^0.1.0"
+
+fs-extra@^9.0.1:
+  version "9.1.0"
+  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
+  integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==
+  dependencies:
+    at-least-node "^1.0.0"
+    graceful-fs "^4.2.0"
+    jsonfile "^6.0.1"
+    universalify "^2.0.0"
+
+fs-minipass@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
+  integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==
+  dependencies:
+    minipass "^3.0.0"
+
+fs-write-stream-atomic@^1.0.8:
+  version "1.0.10"
+  resolved "https://registry.yarnpkg.com/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz#b47df53493ef911df75731e70a9ded0189db40c9"
+  integrity sha1-tH31NJPvkR33VzHnCp3tAYnbQMk=
+  dependencies:
+    graceful-fs "^4.1.2"
+    iferr "^0.1.5"
+    imurmurhash "^0.1.4"
+    readable-stream "1 || 2"
+
+fs.realpath@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
+  integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8=
+
+fsevents@^1.2.7:
+  version "1.2.13"
+  resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-1.2.13.tgz#f325cb0455592428bcf11b383370ef70e3bfcc38"
+  integrity sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==
+  dependencies:
+    bindings "^1.5.0"
+    nan "^2.12.1"
+
+fsevents@^2.1.2, fsevents@^2.1.3, fsevents@~2.3.1:
+  version "2.3.2"
+  resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
+  integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
+
+function-bind@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
+  integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
+
+functional-red-black-tree@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz#1b0ab3bd553b2a0d6399d29c0e3ea0b252078327"
+  integrity sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=
+
+gensync@^1.0.0-beta.1:
+  version "1.0.0-beta.2"
+  resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0"
+  integrity sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==
+
+get-caller-file@^2.0.1:
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e"
+  integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==
+
+get-intrinsic@^1.0.2, get-intrinsic@^1.1.0, get-intrinsic@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.1.1.tgz#15f59f376f855c446963948f0d24cd3637b4abc6"
+  integrity sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==
+  dependencies:
+    function-bind "^1.1.1"
+    has "^1.0.3"
+    has-symbols "^1.0.1"
+
+get-nonce@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/get-nonce/-/get-nonce-1.0.1.tgz#fdf3f0278073820d2ce9426c18f07481b1e0cdf3"
+  integrity sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==
+
+get-own-enumerable-property-symbols@^3.0.0:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/get-own-enumerable-property-symbols/-/get-own-enumerable-property-symbols-3.0.2.tgz#b5fde77f22cbe35f390b4e089922c50bce6ef664"
+  integrity sha512-I0UBV/XOz1XkIJHEUDMZAbzCThU/H8DxmSfmdGcKPnVhu2VfFqr34jr9777IyaTYvxjedWhqVIilEDsCdP5G6g==
+
+get-package-type@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/get-package-type/-/get-package-type-0.1.0.tgz#8de2d803cff44df3bc6c456e6668b36c3926e11a"
+  integrity sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==
+
+get-stream@^4.0.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-4.1.0.tgz#c1b255575f3dc21d59bfc79cd3d2b46b1c3a54b5"
+  integrity sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==
+  dependencies:
+    pump "^3.0.0"
+
+get-stream@^5.0.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3"
+  integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==
+  dependencies:
+    pump "^3.0.0"
+
+get-value@^2.0.3, get-value@^2.0.6:
+  version "2.0.6"
+  resolved "https://registry.yarnpkg.com/get-value/-/get-value-2.0.6.tgz#dc15ca1c672387ca76bd37ac0a395ba2042a2c28"
+  integrity sha1-3BXKHGcjh8p2vTesCjlbogQqLCg=
+
+getpass@^0.1.1:
+  version "0.1.7"
+  resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa"
+  integrity sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=
+  dependencies:
+    assert-plus "^1.0.0"
+
+glob-parent@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-3.1.0.tgz#9e6af6299d8d3bd2bd40430832bd113df906c5ae"
+  integrity sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=
+  dependencies:
+    is-glob "^3.1.0"
+    path-dirname "^1.0.0"
+
+glob-parent@^5.0.0, glob-parent@^5.1.0, glob-parent@~5.1.0:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.1.tgz#b6c1ef417c4e5663ea498f1c45afac6916bbc229"
+  integrity sha512-FnI+VGOpnlGHWZxthPGR+QhR78fuiK0sNLkHQv+bL9fQi57lNNdquIbna/WrfROrolq8GK5Ek6BiMwqL/voRYQ==
+  dependencies:
+    is-glob "^4.0.1"
+
+glob@^7.0.3, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6:
+  version "7.1.6"
+  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
+  integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
+  dependencies:
+    fs.realpath "^1.0.0"
+    inflight "^1.0.4"
+    inherits "2"
+    minimatch "^3.0.4"
+    once "^1.3.0"
+    path-is-absolute "^1.0.0"
+
+global-modules@2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/global-modules/-/global-modules-2.0.0.tgz#997605ad2345f27f51539bea26574421215c7780"
+  integrity sha512-NGbfmJBp9x8IxyJSd1P+otYK8vonoJactOogrVfFRIAEY1ukil8RSKDz2Yo7wh1oihl51l/r6W4epkeKJHqL8A==
+  dependencies:
+    global-prefix "^3.0.0"
+
+global-prefix@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/global-prefix/-/global-prefix-3.0.0.tgz#fc85f73064df69f50421f47f883fe5b913ba9b97"
+  integrity sha512-awConJSVCHVGND6x3tmMaKcQvwXLhjdkmomy2W+Goaui8YPgYgXJZewhg3fWC+DlfqqQuWg8AwqjGTD2nAPVWg==
+  dependencies:
+    ini "^1.3.5"
+    kind-of "^6.0.2"
+    which "^1.3.1"
+
+globals@^11.1.0:
+  version "11.12.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e"
+  integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==
+
+globals@^12.1.0:
+  version "12.4.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-12.4.0.tgz#a18813576a41b00a24a97e7f815918c2e19925f8"
+  integrity sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==
+  dependencies:
+    type-fest "^0.8.1"
+
+globby@11.0.1:
+  version "11.0.1"
+  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.1.tgz#9a2bf107a068f3ffeabc49ad702c79ede8cfd357"
+  integrity sha512-iH9RmgwCmUJHi2z5o2l3eTtGBtXek1OYlHrbcxOYugyHLmAsZrPj43OtHThd62Buh/Vv6VyCBD2bdyWcGNQqoQ==
+  dependencies:
+    array-union "^2.1.0"
+    dir-glob "^3.0.1"
+    fast-glob "^3.1.1"
+    ignore "^5.1.4"
+    merge2 "^1.3.0"
+    slash "^3.0.0"
+
+globby@^11.0.1:
+  version "11.0.2"
+  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.2.tgz#1af538b766a3b540ebfb58a32b2e2d5897321d83"
+  integrity sha512-2ZThXDvvV8fYFRVIxnrMQBipZQDr7MxKAmQK1vujaj9/7eF0efG7BPUKJ7jP7G5SLF37xKDXvO4S/KKLj/Z0og==
+  dependencies:
+    array-union "^2.1.0"
+    dir-glob "^3.0.1"
+    fast-glob "^3.1.1"
+    ignore "^5.1.4"
+    merge2 "^1.3.0"
+    slash "^3.0.0"
+
+globby@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/globby/-/globby-6.1.0.tgz#f5a6d70e8395e21c858fb0489d64df02424d506c"
+  integrity sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=
+  dependencies:
+    array-union "^1.0.1"
+    glob "^7.0.3"
+    object-assign "^4.0.1"
+    pify "^2.0.0"
+    pinkie-promise "^2.0.0"
+
+graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4:
+  version "4.2.6"
+  resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee"
+  integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==
+
+growly@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081"
+  integrity sha1-8QdIy+dq+WS3yWyTxrzCivEgwIE=
+
+gzip-size@5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/gzip-size/-/gzip-size-5.1.1.tgz#cb9bee692f87c0612b232840a873904e4c135274"
+  integrity sha512-FNHi6mmoHvs1mxZAds4PpdCS6QG8B4C1krxJsMutgxl5t3+GlRTzzI3NEkifXx2pVsOvJdOGSmIgDhQ55FwdPA==
+  dependencies:
+    duplexer "^0.1.1"
+    pify "^4.0.1"
+
+handle-thing@^2.0.0:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/handle-thing/-/handle-thing-2.0.1.tgz#857f79ce359580c340d43081cc648970d0bb234e"
+  integrity sha512-9Qn4yBxelxoh2Ow62nP+Ka/kMnOXRi8BXnRaUwezLNhqelnN49xKz4F/dPP8OYLxLxq6JDtZb2i9XznUQbNPTg==
+
+har-schema@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/har-schema/-/har-schema-2.0.0.tgz#a94c2224ebcac04782a0d9035521f24735b7ec92"
+  integrity sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=
+
+har-validator@~5.1.3:
+  version "5.1.5"
+  resolved "https://registry.yarnpkg.com/har-validator/-/har-validator-5.1.5.tgz#1f0803b9f8cb20c0fa13822df1ecddb36bde1efd"
+  integrity sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==
+  dependencies:
+    ajv "^6.12.3"
+    har-schema "^2.0.0"
+
+harmony-reflect@^1.4.6:
+  version "1.6.1"
+  resolved "https://registry.yarnpkg.com/harmony-reflect/-/harmony-reflect-1.6.1.tgz#c108d4f2bb451efef7a37861fdbdae72c9bdefa9"
+  integrity sha512-WJTeyp0JzGtHcuMsi7rw2VwtkvLa+JyfEKJCFyfcS0+CDkjQ5lHPu7zEhFZP+PDSRrEgXa5Ah0l1MbgbE41XjA==
+
+has-flag@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd"
+  integrity sha1-tdRU3CGZriJWmfNGfloH87lVuv0=
+
+has-flag@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
+  integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
+
+has-symbols@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.1.tgz#9f5214758a44196c406d9bd76cebf81ec2dd31e8"
+  integrity sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg==
+
+has-value@^0.3.1:
+  version "0.3.1"
+  resolved "https://registry.yarnpkg.com/has-value/-/has-value-0.3.1.tgz#7b1f58bada62ca827ec0a2078025654845995e1f"
+  integrity sha1-ex9YutpiyoJ+wKIHgCVlSEWZXh8=
+  dependencies:
+    get-value "^2.0.3"
+    has-values "^0.1.4"
+    isobject "^2.0.0"
+
+has-value@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/has-value/-/has-value-1.0.0.tgz#18b281da585b1c5c51def24c930ed29a0be6b177"
+  integrity sha1-GLKB2lhbHFxR3vJMkw7SmgvmsXc=
+  dependencies:
+    get-value "^2.0.6"
+    has-values "^1.0.0"
+    isobject "^3.0.0"
+
+has-values@^0.1.4:
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/has-values/-/has-values-0.1.4.tgz#6d61de95d91dfca9b9a02089ad384bff8f62b771"
+  integrity sha1-bWHeldkd/Km5oCCJrThL/49it3E=
+
+has-values@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/has-values/-/has-values-1.0.0.tgz#95b0b63fec2146619a6fe57fe75628d5a39efe4f"
+  integrity sha1-lbC2P+whRmGab+V/51Yo1aOe/k8=
+  dependencies:
+    is-number "^3.0.0"
+    kind-of "^4.0.0"
+
+has@^1.0.0, has@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796"
+  integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==
+  dependencies:
+    function-bind "^1.1.1"
+
+hash-base@^3.0.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/hash-base/-/hash-base-3.1.0.tgz#55c381d9e06e1d2997a883b4a3fddfe7f0d3af33"
+  integrity sha512-1nmYp/rhMDiE7AYkDw+lLwlAzz0AntGIe51F3RfFfEqyQ3feY2eI/NcwC6umIQVOASPMsWJLJScWKSSvzL9IVA==
+  dependencies:
+    inherits "^2.0.4"
+    readable-stream "^3.6.0"
+    safe-buffer "^5.2.0"
+
+hash.js@^1.0.0, hash.js@^1.0.3:
+  version "1.1.7"
+  resolved "https://registry.yarnpkg.com/hash.js/-/hash.js-1.1.7.tgz#0babca538e8d4ee4a0f8988d68866537a003cf42"
+  integrity sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==
+  dependencies:
+    inherits "^2.0.3"
+    minimalistic-assert "^1.0.1"
+
+he@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
+  integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==
+
+hex-color-regex@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/hex-color-regex/-/hex-color-regex-1.1.0.tgz#4c06fccb4602fe2602b3c93df82d7e7dbf1a8a8e"
+  integrity sha512-l9sfDFsuqtOqKDsQdqrMRk0U85RZc0RtOR9yPI7mRVOa4FsR/BVnZ0shmQRM96Ji99kYZP/7hn1cedc1+ApsTQ==
+
+hey-listen@^1.0.8:
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/hey-listen/-/hey-listen-1.0.8.tgz#8e59561ff724908de1aa924ed6ecc84a56a9aa68"
+  integrity sha512-COpmrF2NOg4TBWUJ5UVyaCU2A88wEMkUPK4hNqyCkqHbxT92BbvfjoSozkAIIm6XhicGlJHhFdullInrdhwU8Q==
+
+history@^4.9.0:
+  version "4.10.1"
+  resolved "https://registry.yarnpkg.com/history/-/history-4.10.1.tgz#33371a65e3a83b267434e2b3f3b1b4c58aad4cf3"
+  integrity sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==
+  dependencies:
+    "@babel/runtime" "^7.1.2"
+    loose-envify "^1.2.0"
+    resolve-pathname "^3.0.0"
+    tiny-invariant "^1.0.2"
+    tiny-warning "^1.0.0"
+    value-equal "^1.0.1"
+
+hmac-drbg@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/hmac-drbg/-/hmac-drbg-1.0.1.tgz#d2745701025a6c775a6c545793ed502fc0c649a1"
+  integrity sha1-0nRXAQJabHdabFRXk+1QL8DGSaE=
+  dependencies:
+    hash.js "^1.0.3"
+    minimalistic-assert "^1.0.0"
+    minimalistic-crypto-utils "^1.0.1"
+
+hoist-non-react-statics@^3.1.0, hoist-non-react-statics@^3.3.1:
+  version "3.3.2"
+  resolved "https://registry.yarnpkg.com/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz#ece0acaf71d62c2969c2ec59feff42a4b1a85b45"
+  integrity sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw==
+  dependencies:
+    react-is "^16.7.0"
+
+hoopy@^0.1.4:
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/hoopy/-/hoopy-0.1.4.tgz#609207d661100033a9a9402ad3dea677381c1b1d"
+  integrity sha512-HRcs+2mr52W0K+x8RzcLzuPPmVIKMSv97RGHy0Ea9y/mpcaK+xTrjICA04KAHi4GRzxliNqNJEFYWHghy3rSfQ==
+
+hosted-git-info@^2.1.4:
+  version "2.8.8"
+  resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.8.tgz#7539bd4bc1e0e0a895815a2e0262420b12858488"
+  integrity sha512-f/wzC2QaWBs7t9IYqB4T3sR1xviIViXJRJTWBlx2Gf3g0Xi5vI7Yy4koXQ1c9OYDGHN9sBy1DQ2AB8fqZBWhUg==
+
+hpack.js@^2.1.6:
+  version "2.1.6"
+  resolved "https://registry.yarnpkg.com/hpack.js/-/hpack.js-2.1.6.tgz#87774c0949e513f42e84575b3c45681fade2a0b2"
+  integrity sha1-h3dMCUnlE/QuhFdbPEVoH63ioLI=
+  dependencies:
+    inherits "^2.0.1"
+    obuf "^1.0.0"
+    readable-stream "^2.0.1"
+    wbuf "^1.1.0"
+
+hsl-regex@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/hsl-regex/-/hsl-regex-1.0.0.tgz#d49330c789ed819e276a4c0d272dffa30b18fe6e"
+  integrity sha1-1JMwx4ntgZ4nakwNJy3/owsY/m4=
+
+hsla-regex@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/hsla-regex/-/hsla-regex-1.0.0.tgz#c1ce7a3168c8c6614033a4b5f7877f3b225f9c38"
+  integrity sha1-wc56MWjIxmFAM6S194d/OyJfnDg=
+
+html-comment-regex@^1.1.0:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/html-comment-regex/-/html-comment-regex-1.1.2.tgz#97d4688aeb5c81886a364faa0cad1dda14d433a7"
+  integrity sha512-P+M65QY2JQ5Y0G9KKdlDpo0zK+/OHptU5AaBwUfAIDJZk1MYf32Frm84EcOytfJE0t5JvkAnKlmjsXDnWzCJmQ==
+
+html-encoding-sniffer@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz#42a6dc4fd33f00281176e8b23759ca4e4fa185f3"
+  integrity sha512-D5JbOMBIR/TVZkubHT+OyT2705QvogUW4IBn6nHd756OwieSF9aDYFj4dv6HHEVGYbHaLETa3WggZYWWMyy3ZQ==
+  dependencies:
+    whatwg-encoding "^1.0.5"
+
+html-entities@^1.2.1, html-entities@^1.3.1:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-1.4.0.tgz#cfbd1b01d2afaf9adca1b10ae7dffab98c71d2dc"
+  integrity sha512-8nxjcBcd8wovbeKx7h3wTji4e6+rhaVuPNpMqwWgnHh+N9ToqsCs6XztWRBPQ+UtzsoMAdKZtUENoVzU/EMtZA==
+
+html-escaper@^2.0.0:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
+  integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
+
+html-minifier-terser@^5.0.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/html-minifier-terser/-/html-minifier-terser-5.1.1.tgz#922e96f1f3bb60832c2634b79884096389b1f054"
+  integrity sha512-ZPr5MNObqnV/T9akshPKbVgyOqLmy+Bxo7juKCfTfnjNniTAMdy4hz21YQqoofMBJD2kdREaqPPdThoR78Tgxg==
+  dependencies:
+    camel-case "^4.1.1"
+    clean-css "^4.2.3"
+    commander "^4.1.1"
+    he "^1.2.0"
+    param-case "^3.0.3"
+    relateurl "^0.2.7"
+    terser "^4.6.3"
+
+html-webpack-plugin@4.5.0:
+  version "4.5.0"
+  resolved "https://registry.yarnpkg.com/html-webpack-plugin/-/html-webpack-plugin-4.5.0.tgz#625097650886b97ea5dae331c320e3238f6c121c"
+  integrity sha512-MouoXEYSjTzCrjIxWwg8gxL5fE2X2WZJLmBYXlaJhQUH5K/b5OrqmV7T4dB7iu0xkmJ6JlUuV6fFVtnqbPopZw==
+  dependencies:
+    "@types/html-minifier-terser" "^5.0.0"
+    "@types/tapable" "^1.0.5"
+    "@types/webpack" "^4.41.8"
+    html-minifier-terser "^5.0.1"
+    loader-utils "^1.2.3"
+    lodash "^4.17.15"
+    pretty-error "^2.1.1"
+    tapable "^1.1.3"
+    util.promisify "1.0.0"
+
+htmlparser2@^3.10.1:
+  version "3.10.1"
+  resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.10.1.tgz#bd679dc3f59897b6a34bb10749c855bb53a9392f"
+  integrity sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==
+  dependencies:
+    domelementtype "^1.3.1"
+    domhandler "^2.3.0"
+    domutils "^1.5.1"
+    entities "^1.1.1"
+    inherits "^2.0.1"
+    readable-stream "^3.1.1"
+
+http-deceiver@^1.2.7:
+  version "1.2.7"
+  resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87"
+  integrity sha1-+nFolEq5pRnTN8sL7HKE3D5yPYc=
+
+http-errors@1.7.2:
+  version "1.7.2"
+  resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.2.tgz#4f5029cf13239f31036e5b2e55292bcfbcc85c8f"
+  integrity sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==
+  dependencies:
+    depd "~1.1.2"
+    inherits "2.0.3"
+    setprototypeof "1.1.1"
+    statuses ">= 1.5.0 < 2"
+    toidentifier "1.0.0"
+
+http-errors@~1.6.2:
+  version "1.6.3"
+  resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.6.3.tgz#8b55680bb4be283a0b5bf4ea2e38580be1d9320d"
+  integrity sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=
+  dependencies:
+    depd "~1.1.2"
+    inherits "2.0.3"
+    setprototypeof "1.1.0"
+    statuses ">= 1.4.0 < 2"
+
+http-errors@~1.7.2:
+  version "1.7.3"
+  resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.3.tgz#6c619e4f9c60308c38519498c14fbb10aacebb06"
+  integrity sha512-ZTTX0MWrsQ2ZAhA1cejAwDLycFsd7I7nVtnkT3Ol0aqodaKW+0CTZDQ1uBv5whptCnc8e8HeRRJxRs0kmm/Qfw==
+  dependencies:
+    depd "~1.1.2"
+    inherits "2.0.4"
+    setprototypeof "1.1.1"
+    statuses ">= 1.5.0 < 2"
+    toidentifier "1.0.0"
+
+http-parser-js@>=0.5.1:
+  version "0.5.3"
+  resolved "https://registry.yarnpkg.com/http-parser-js/-/http-parser-js-0.5.3.tgz#01d2709c79d41698bb01d4decc5e9da4e4a033d9"
+  integrity sha512-t7hjvef/5HEK7RWTdUzVUhl8zkEu+LlaE0IYzdMuvbSDipxBRpOn4Uhw8ZyECEa808iVT8XCjzo6xmYt4CiLZg==
+
+http-proxy-middleware@0.19.1:
+  version "0.19.1"
+  resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-0.19.1.tgz#183c7dc4aa1479150306498c210cdaf96080a43a"
+  integrity sha512-yHYTgWMQO8VvwNS22eLLloAkvungsKdKTLO8AJlftYIKNfJr3GK3zK0ZCfzDDGUBttdGc8xFy1mCitvNKQtC3Q==
+  dependencies:
+    http-proxy "^1.17.0"
+    is-glob "^4.0.0"
+    lodash "^4.17.11"
+    micromatch "^3.1.10"
+
+http-proxy@^1.17.0:
+  version "1.18.1"
+  resolved "https://registry.yarnpkg.com/http-proxy/-/http-proxy-1.18.1.tgz#401541f0534884bbf95260334e72f88ee3976549"
+  integrity sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==
+  dependencies:
+    eventemitter3 "^4.0.0"
+    follow-redirects "^1.0.0"
+    requires-port "^1.0.0"
+
+http-signature@~1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.2.0.tgz#9aecd925114772f3d95b65a60abb8f7c18fbace1"
+  integrity sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=
+  dependencies:
+    assert-plus "^1.0.0"
+    jsprim "^1.2.2"
+    sshpk "^1.7.0"
+
+https-browserify@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73"
+  integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=
+
+human-signals@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
+  integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw==
+
+iconv-lite@0.4.24:
+  version "0.4.24"
+  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b"
+  integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==
+  dependencies:
+    safer-buffer ">= 2.1.2 < 3"
+
+icss-utils@^4.0.0, icss-utils@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-4.1.1.tgz#21170b53789ee27447c2f47dd683081403f9a467"
+  integrity sha512-4aFq7wvWyMHKgxsH8QQtGpvbASCf+eM3wPRLI6R+MgAnTCZ6STYsRvttLvRWK0Nfif5piF394St3HeJDaljGPA==
+  dependencies:
+    postcss "^7.0.14"
+
+identity-obj-proxy@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/identity-obj-proxy/-/identity-obj-proxy-3.0.0.tgz#94d2bda96084453ef36fbc5aaec37e0f79f1fc14"
+  integrity sha1-lNK9qWCERT7zb7xarsN+D3nx/BQ=
+  dependencies:
+    harmony-reflect "^1.4.6"
+
+ieee754@^1.1.4:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352"
+  integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==
+
+iferr@^0.1.5:
+  version "0.1.5"
+  resolved "https://registry.yarnpkg.com/iferr/-/iferr-0.1.5.tgz#c60eed69e6d8fdb6b3104a1fcbca1c192dc5b501"
+  integrity sha1-xg7taebY/bazEEofy8ocGS3FtQE=
+
+ignore@^4.0.6:
+  version "4.0.6"
+  resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc"
+  integrity sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==
+
+ignore@^5.1.4:
+  version "5.1.8"
+  resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57"
+  integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==
+
+immer@8.0.1:
+  version "8.0.1"
+  resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.1.tgz#9c73db683e2b3975c424fb0572af5889877ae656"
+  integrity sha512-aqXhGP7//Gui2+UrEtvxZxSquQVXTpZ7KDxfCcKAF3Vysvw0CViVaW9RZ1j1xlIYqaaaipBoqdqeibkc18PNvA==
+
+import-cwd@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/import-cwd/-/import-cwd-2.1.0.tgz#aa6cf36e722761285cb371ec6519f53e2435b0a9"
+  integrity sha1-qmzzbnInYShcs3HsZRn1PiQ1sKk=
+  dependencies:
+    import-from "^2.1.0"
+
+import-fresh@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-2.0.0.tgz#d81355c15612d386c61f9ddd3922d4304822a546"
+  integrity sha1-2BNVwVYS04bGH53dOSLUMEgipUY=
+  dependencies:
+    caller-path "^2.0.0"
+    resolve-from "^3.0.0"
+
+import-fresh@^3.0.0, import-fresh@^3.1.0, import-fresh@^3.2.1:
+  version "3.3.0"
+  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b"
+  integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==
+  dependencies:
+    parent-module "^1.0.0"
+    resolve-from "^4.0.0"
+
+import-from@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/import-from/-/import-from-2.1.0.tgz#335db7f2a7affd53aaa471d4b8021dee36b7f3b1"
+  integrity sha1-M1238qev/VOqpHHUuAId7ja387E=
+  dependencies:
+    resolve-from "^3.0.0"
+
+import-local@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/import-local/-/import-local-2.0.0.tgz#55070be38a5993cf18ef6db7e961f5bee5c5a09d"
+  integrity sha512-b6s04m3O+s3CGSbqDIyP4R6aAwAeYlVq9+WUWep6iHa8ETRf9yei1U48C5MmfJmV9AiLYYBKPMq/W+/WRpQmCQ==
+  dependencies:
+    pkg-dir "^3.0.0"
+    resolve-cwd "^2.0.0"
+
+import-local@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/import-local/-/import-local-3.0.2.tgz#a8cfd0431d1de4a2199703d003e3e62364fa6db6"
+  integrity sha512-vjL3+w0oulAVZ0hBHnxa/Nm5TAurf9YLQJDhqRZyqb+VKGOB6LU8t9H1Nr5CIo16vh9XfJTOoHwU0B71S557gA==
+  dependencies:
+    pkg-dir "^4.2.0"
+    resolve-cwd "^3.0.0"
+
+imurmurhash@^0.1.4:
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea"
+  integrity sha1-khi5srkoojixPcT7a21XbyMUU+o=
+
+indent-string@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
+  integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
+
+indexes-of@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/indexes-of/-/indexes-of-1.0.1.tgz#f30f716c8e2bd346c7b67d3df3915566a7c05607"
+  integrity sha1-8w9xbI4r00bHtn0985FVZqfAVgc=
+
+infer-owner@^1.0.3, infer-owner@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/infer-owner/-/infer-owner-1.0.4.tgz#c4cefcaa8e51051c2a40ba2ce8a3d27295af9467"
+  integrity sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==
+
+inflight@^1.0.4:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
+  integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=
+  dependencies:
+    once "^1.3.0"
+    wrappy "1"
+
+inherits@2, inherits@2.0.4, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.1, inherits@~2.0.3:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
+  integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
+
+inherits@2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.1.tgz#b17d08d326b4423e568eff719f91b0b1cbdf69f1"
+  integrity sha1-sX0I0ya0Qj5Wjv9xn5GwscvfafE=
+
+inherits@2.0.3:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
+  integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
+
+ini@^1.3.5:
+  version "1.3.8"
+  resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
+  integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==
+
+internal-ip@^4.3.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/internal-ip/-/internal-ip-4.3.0.tgz#845452baad9d2ca3b69c635a137acb9a0dad0907"
+  integrity sha512-S1zBo1D6zcsyuC6PMmY5+55YMILQ9av8lotMx447Bq6SAgo/sDK6y6uUKmuYhW7eacnIhFfsPmCNYdDzsnnDCg==
+  dependencies:
+    default-gateway "^4.2.0"
+    ipaddr.js "^1.9.0"
+
+internal-slot@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.3.tgz#7347e307deeea2faac2ac6205d4bc7d34967f59c"
+  integrity sha512-O0DB1JC/sPyZl7cIo78n5dR7eUSwwpYPiXRhTzNxZVAMUuB8vlnRFyLxdrVToks6XPLVnFfbzaVd5WLjhgg+vA==
+  dependencies:
+    get-intrinsic "^1.1.0"
+    has "^1.0.3"
+    side-channel "^1.0.4"
+
+invariant@^2.2.4:
+  version "2.2.4"
+  resolved "https://registry.yarnpkg.com/invariant/-/invariant-2.2.4.tgz#610f3c92c9359ce1db616e538008d23ff35158e6"
+  integrity sha512-phJfQVBuaJM5raOpJjSfkiD6BpbCE4Ns//LaXl6wGYtUBY83nWS6Rf9tXm2e8VaK60JEjYldbPif/A2B1C2gNA==
+  dependencies:
+    loose-envify "^1.0.0"
+
+ip-regex@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
+  integrity sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=
+
+ip@^1.1.0, ip@^1.1.5:
+  version "1.1.5"
+  resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.5.tgz#bdded70114290828c0a039e72ef25f5aaec4354a"
+  integrity sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=
+
+ipaddr.js@1.9.1, ipaddr.js@^1.9.0:
+  version "1.9.1"
+  resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz#bff38543eeb8984825079ff3a2a8e6cbd46781b3"
+  integrity sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==
+
+is-absolute-url@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-2.1.0.tgz#50530dfb84fcc9aa7dbe7852e83a37b93b9f2aa6"
+  integrity sha1-UFMN+4T8yap9vnhS6Do3uTufKqY=
+
+is-absolute-url@^3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-3.0.3.tgz#96c6a22b6a23929b11ea0afb1836c36ad4a5d698"
+  integrity sha512-opmNIX7uFnS96NtPmhWQgQx6/NYFgsUXYMllcfzwWKUMwfo8kku1TvE6hkNcH+Q1ts5cMVrsY7j0bxXQDciu9Q==
+
+is-accessor-descriptor@^0.1.6:
+  version "0.1.6"
+  resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz#a9e12cb3ae8d876727eeef3843f8a0897b5c98d6"
+  integrity sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=
+  dependencies:
+    kind-of "^3.0.2"
+
+is-accessor-descriptor@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz#169c2f6d3df1f992618072365c9b0ea1f6878656"
+  integrity sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==
+  dependencies:
+    kind-of "^6.0.0"
+
+is-arguments@^1.0.4:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/is-arguments/-/is-arguments-1.1.0.tgz#62353031dfbee07ceb34656a6bde59efecae8dd9"
+  integrity sha512-1Ij4lOMPl/xB5kBDn7I+b2ttPMKa8szhEIrXDuXQD/oe3HJLTLhqhgGspwgyGd6MOywBUqVvYicF72lkgDnIHg==
+  dependencies:
+    call-bind "^1.0.0"
+
+is-arrayish@^0.2.1:
+  version "0.2.1"
+  resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d"
+  integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=
+
+is-arrayish@^0.3.1:
+  version "0.3.2"
+  resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.3.2.tgz#4574a2ae56f7ab206896fb431eaeed066fdf8f03"
+  integrity sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==
+
+is-binary-path@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-1.0.1.tgz#75f16642b480f187a711c814161fd3a4a7655898"
+  integrity sha1-dfFmQrSA8YenEcgUFh/TpKdlWJg=
+  dependencies:
+    binary-extensions "^1.0.0"
+
+is-binary-path@~2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09"
+  integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==
+  dependencies:
+    binary-extensions "^2.0.0"
+
+is-buffer@^1.1.5:
+  version "1.1.6"
+  resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be"
+  integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==
+
+is-callable@^1.1.4, is-callable@^1.2.2:
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.3.tgz#8b1e0500b73a1d76c70487636f368e519de8db8e"
+  integrity sha512-J1DcMe8UYTBSrKezuIUTUwjXsho29693unXM2YhJUTR2txK/eG47bvNa/wipPFmZFgr/N6f1GA66dv0mEyTIyQ==
+
+is-ci@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-2.0.0.tgz#6bc6334181810e04b5c22b3d589fdca55026404c"
+  integrity sha512-YfJT7rkpQB0updsdHLGWrvhBJfcfzNNawYDNIyQXJz0IViGf75O8EBPKSdvw2rF+LGCsX4FZ8tcr3b19LcZq4w==
+  dependencies:
+    ci-info "^2.0.0"
+
+is-color-stop@^1.0.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/is-color-stop/-/is-color-stop-1.1.0.tgz#cfff471aee4dd5c9e158598fbe12967b5cdad345"
+  integrity sha1-z/9HGu5N1cnhWFmPvhKWe1za00U=
+  dependencies:
+    css-color-names "^0.0.4"
+    hex-color-regex "^1.1.0"
+    hsl-regex "^1.0.0"
+    hsla-regex "^1.0.0"
+    rgb-regex "^1.0.1"
+    rgba-regex "^1.0.0"
+
+is-core-module@^2.0.0, is-core-module@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.2.0.tgz#97037ef3d52224d85163f5597b2b63d9afed981a"
+  integrity sha512-XRAfAdyyY5F5cOXn7hYQDqh2Xmii+DEfIcQGxK/uNwMHhIkPWO0g8msXcbzLe+MpGoR951MlqM/2iIlU4vKDdQ==
+  dependencies:
+    has "^1.0.3"
+
+is-data-descriptor@^0.1.4:
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56"
+  integrity sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=
+  dependencies:
+    kind-of "^3.0.2"
+
+is-data-descriptor@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz#d84876321d0e7add03990406abbbbd36ba9268c7"
+  integrity sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==
+  dependencies:
+    kind-of "^6.0.0"
+
+is-date-object@^1.0.1:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e"
+  integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g==
+
+is-descriptor@^0.1.0:
+  version "0.1.6"
+  resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-0.1.6.tgz#366d8240dde487ca51823b1ab9f07a10a78251ca"
+  integrity sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==
+  dependencies:
+    is-accessor-descriptor "^0.1.6"
+    is-data-descriptor "^0.1.4"
+    kind-of "^5.0.0"
+
+is-descriptor@^1.0.0, is-descriptor@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-1.0.2.tgz#3b159746a66604b04f8c81524ba365c5f14d86ec"
+  integrity sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==
+  dependencies:
+    is-accessor-descriptor "^1.0.0"
+    is-data-descriptor "^1.0.0"
+    kind-of "^6.0.2"
+
+is-directory@^0.3.1:
+  version "0.3.1"
+  resolved "https://registry.yarnpkg.com/is-directory/-/is-directory-0.3.1.tgz#61339b6f2475fc772fd9c9d83f5c8575dc154ae1"
+  integrity sha1-YTObbyR1/Hcv2cnYP1yFddwVSuE=
+
+is-docker@^2.0.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.1.1.tgz#4125a88e44e450d384e09047ede71adc2d144156"
+  integrity sha512-ZOoqiXfEwtGknTiuDEy8pN2CfE3TxMHprvNer1mXiqwkOT77Rw3YVrUQ52EqAOU3QAWDQ+bQdx7HJzrv7LS2Hw==
+
+is-extendable@^0.1.0, is-extendable@^0.1.1:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89"
+  integrity sha1-YrEQ4omkcUGOPsNqYX1HLjAd/Ik=
+
+is-extendable@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-1.0.1.tgz#a7470f9e426733d81bd81e1155264e3a3507cab4"
+  integrity sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA==
+  dependencies:
+    is-plain-object "^2.0.4"
+
+is-extglob@^2.1.0, is-extglob@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2"
+  integrity sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=
+
+is-fullwidth-code-point@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz#a3b30a5c4f199183167aaab93beefae3ddfb654f"
+  integrity sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=
+
+is-fullwidth-code-point@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d"
+  integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==
+
+is-generator-fn@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-generator-fn/-/is-generator-fn-2.1.0.tgz#7d140adc389aaf3011a8f2a2a4cfa6faadffb118"
+  integrity sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==
+
+is-glob@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-3.1.0.tgz#7ba5ae24217804ac70707b96922567486cc3e84a"
+  integrity sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=
+  dependencies:
+    is-extglob "^2.1.0"
+
+is-glob@^4.0.0, is-glob@^4.0.1, is-glob@~4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.1.tgz#7567dbe9f2f5e2467bc77ab83c4a29482407a5dc"
+  integrity sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==
+  dependencies:
+    is-extglob "^2.1.1"
+
+is-module@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/is-module/-/is-module-1.0.0.tgz#3258fb69f78c14d5b815d664336b4cffb6441591"
+  integrity sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE=
+
+is-negative-zero@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.1.tgz#3de746c18dda2319241a53675908d8f766f11c24"
+  integrity sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==
+
+is-number@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/is-number/-/is-number-3.0.0.tgz#24fd6201a4782cf50561c810276afc7d12d71195"
+  integrity sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=
+  dependencies:
+    kind-of "^3.0.2"
+
+is-number@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b"
+  integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
+
+is-obj@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-1.0.1.tgz#3e4729ac1f5fde025cd7d83a896dab9f4f67db0f"
+  integrity sha1-PkcprB9f3gJc19g6iW2rn09n2w8=
+
+is-obj@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-2.0.0.tgz#473fb05d973705e3fd9620545018ca8e22ef4982"
+  integrity sha512-drqDG3cbczxxEJRoOXcOjtdp1J/lyp1mNn0xaznRs8+muBhgQcrnbspox5X5fOw0HnMnbfDzvnEMEtqDEJEo8w==
+
+is-path-cwd@^2.0.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/is-path-cwd/-/is-path-cwd-2.2.0.tgz#67d43b82664a7b5191fd9119127eb300048a9fdb"
+  integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ==
+
+is-path-in-cwd@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-path-in-cwd/-/is-path-in-cwd-2.1.0.tgz#bfe2dca26c69f397265a4009963602935a053acb"
+  integrity sha512-rNocXHgipO+rvnP6dk3zI20RpOtrAM/kzbB258Uw5BWr3TpXi861yzjo16Dn4hUox07iw5AyeMLHWsujkjzvRQ==
+  dependencies:
+    is-path-inside "^2.1.0"
+
+is-path-inside@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-2.1.0.tgz#7c9810587d659a40d27bcdb4d5616eab059494b2"
+  integrity sha512-wiyhTzfDWsvwAW53OBWF5zuvaOGlZ6PwYxAbPVDhpm+gM09xKQGjBq/8uYN12aDvMxnAnq3dxTyoSoRNmg5YFg==
+  dependencies:
+    path-is-inside "^1.0.2"
+
+is-plain-obj@^1.0.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e"
+  integrity sha1-caUMhCnfync8kqOQpKA7OfzVHT4=
+
+is-plain-object@^2.0.3, is-plain-object@^2.0.4:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
+  integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==
+  dependencies:
+    isobject "^3.0.1"
+
+is-potential-custom-element-name@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.0.tgz#0c52e54bcca391bb2c494b21e8626d7336c6e397"
+  integrity sha1-DFLlS8yjkbssSUsh6GJtczbG45c=
+
+is-regex@^1.0.4, is-regex@^1.1.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.2.tgz#81c8ebde4db142f2cf1c53fc86d6a45788266251"
+  integrity sha512-axvdhb5pdhEVThqJzYXwMlVuZwC+FF2DpcOhTS+y/8jVq4trxyPgfcwIxIKiyeuLlSQYKkmUaPQJ8ZE4yNKXDg==
+  dependencies:
+    call-bind "^1.0.2"
+    has-symbols "^1.0.1"
+
+is-regexp@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/is-regexp/-/is-regexp-1.0.0.tgz#fd2d883545c46bac5a633e7b9a09e87fa2cb5069"
+  integrity sha1-/S2INUXEa6xaYz57mgnof6LLUGk=
+
+is-resolvable@^1.0.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/is-resolvable/-/is-resolvable-1.1.0.tgz#fb18f87ce1feb925169c9a407c19318a3206ed88"
+  integrity sha512-qgDYXFSR5WvEfuS5dMj6oTMEbrrSaM0CrFk2Yiq/gXnBvD9pMa2jGXxyhGLfvhZpuMZe18CJpFxAt3CRs42NMg==
+
+is-root@2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-root/-/is-root-2.1.0.tgz#809e18129cf1129644302a4f8544035d51984a9c"
+  integrity sha512-AGOriNp96vNBd3HtU+RzFEc75FfR5ymiYv8E553I71SCeXBiMsVDUtdio1OEFvrPyLIQ9tVR5RxXIFe5PUFjMg==
+
+is-stream@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44"
+  integrity sha1-EtSj3U5o4Lec6428hBc66A2RykQ=
+
+is-stream@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.0.tgz#bde9c32680d6fae04129d6ac9d921ce7815f78e3"
+  integrity sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw==
+
+is-string@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.5.tgz#40493ed198ef3ff477b8c7f92f644ec82a5cd3a6"
+  integrity sha512-buY6VNRjhQMiF1qWDouloZlQbRhDPCebwxSjxMjxgemYT46YMd2NR0/H+fBhEfWX4A/w9TBJ+ol+okqJKFE6vQ==
+
+is-svg@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/is-svg/-/is-svg-3.0.0.tgz#9321dbd29c212e5ca99c4fa9794c714bcafa2f75"
+  integrity sha512-gi4iHK53LR2ujhLVVj+37Ykh9GLqYHX6JOVXbLAucaG/Cqw9xwdFOjDM2qeifLs1sF1npXXFvDu0r5HNgCMrzQ==
+  dependencies:
+    html-comment-regex "^1.1.0"
+
+is-symbol@^1.0.2:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937"
+  integrity sha512-OwijhaRSgqvhm/0ZdAcXNZt9lYdKFpcRDT5ULUuYXPoT794UNOdU+gpT6Rzo7b4V2HUl/op6GqY894AZwv9faQ==
+  dependencies:
+    has-symbols "^1.0.1"
+
+is-typedarray@^1.0.0, is-typedarray@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/is-typedarray/-/is-typedarray-1.0.0.tgz#e479c80858df0c1b11ddda6940f96011fcda4a9a"
+  integrity sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=
+
+is-windows@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d"
+  integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==
+
+is-wsl@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-1.1.0.tgz#1f16e4aa22b04d1336b66188a66af3c600c3a66d"
+  integrity sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0=
+
+is-wsl@^2.1.1, is-wsl@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271"
+  integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==
+  dependencies:
+    is-docker "^2.0.0"
+
+isarray@0.0.1:
+  version "0.0.1"
+  resolved "https://registry.yarnpkg.com/isarray/-/isarray-0.0.1.tgz#8a18acfca9a8f4177e09abfc6038939b05d1eedf"
+  integrity sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=
+
+isarray@1.0.0, isarray@^1.0.0, isarray@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
+  integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=
+
+isexe@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
+  integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=
+
+isobject@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/isobject/-/isobject-2.1.0.tgz#f065561096a3f1da2ef46272f815c840d87e0c89"
+  integrity sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=
+  dependencies:
+    isarray "1.0.0"
+
+isobject@^3.0.0, isobject@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df"
+  integrity sha1-TkMekrEalzFjaqH5yNHMvP2reN8=
+
+isstream@~0.1.2:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
+  integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=
+
+istanbul-lib-coverage@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/istanbul-lib-coverage/-/istanbul-lib-coverage-3.0.0.tgz#f5944a37c70b550b02a78a5c3b2055b280cec8ec"
+  integrity sha512-UiUIqxMgRDET6eR+o5HbfRYP1l0hqkWOs7vNxC/mggutCMUIhWMm8gAHb8tHlyfD3/l6rlgNA5cKdDzEAf6hEg==
+
+istanbul-lib-instrument@^4.0.0, istanbul-lib-instrument@^4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/istanbul-lib-instrument/-/istanbul-lib-instrument-4.0.3.tgz#873c6fff897450118222774696a3f28902d77c1d"
+  integrity sha512-BXgQl9kf4WTCPCCpmFGoJkz/+uhvm7h7PFKUYxh7qarQd3ER33vHG//qaE8eN25l07YqZPpHXU9I09l/RD5aGQ==
+  dependencies:
+    "@babel/core" "^7.7.5"
+    "@istanbuljs/schema" "^0.1.2"
+    istanbul-lib-coverage "^3.0.0"
+    semver "^6.3.0"
+
+istanbul-lib-report@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#7518fe52ea44de372f460a76b5ecda9ffb73d8a6"
+  integrity sha512-wcdi+uAKzfiGT2abPpKZ0hSU1rGQjUQnLvtY5MpQ7QCTahD3VODhcu4wcfY1YtkGaDD5yuydOLINXsfbus9ROw==
+  dependencies:
+    istanbul-lib-coverage "^3.0.0"
+    make-dir "^3.0.0"
+    supports-color "^7.1.0"
+
+istanbul-lib-source-maps@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.0.tgz#75743ce6d96bb86dc7ee4352cf6366a23f0b1ad9"
+  integrity sha512-c16LpFRkR8vQXyHZ5nLpY35JZtzj1PQY1iZmesUbf1FZHbIupcWfjgOXBY9YHkLEQ6puz1u4Dgj6qmU/DisrZg==
+  dependencies:
+    debug "^4.1.1"
+    istanbul-lib-coverage "^3.0.0"
+    source-map "^0.6.1"
+
+istanbul-reports@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.0.2.tgz#d593210e5000683750cb09fc0644e4b6e27fd53b"
+  integrity sha512-9tZvz7AiR3PEDNGiV9vIouQ/EAcqMXFmkcA1CDFTwOB98OZVDL0PH9glHotf5Ugp6GCOTypfzGWI/OqjWNCRUw==
+  dependencies:
+    html-escaper "^2.0.0"
+    istanbul-lib-report "^3.0.0"
+
+jest-changed-files@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-26.6.2.tgz#f6198479e1cc66f22f9ae1e22acaa0b429c042d0"
+  integrity sha512-fDS7szLcY9sCtIip8Fjry9oGf3I2ht/QT21bAHm5Dmf0mD4X3ReNUf17y+bO6fR8WgbIZTlbyG1ak/53cbRzKQ==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    execa "^4.0.0"
+    throat "^5.0.0"
+
+jest-circus@26.6.0:
+  version "26.6.0"
+  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-26.6.0.tgz#7d9647b2e7f921181869faae1f90a2629fd70705"
+  integrity sha512-L2/Y9szN6FJPWFK8kzWXwfp+FOR7xq0cUL4lIsdbIdwz3Vh6P1nrpcqOleSzr28zOtSHQNV9Z7Tl+KkuK7t5Ng==
+  dependencies:
+    "@babel/traverse" "^7.1.0"
+    "@jest/environment" "^26.6.0"
+    "@jest/test-result" "^26.6.0"
+    "@jest/types" "^26.6.0"
+    "@types/babel__traverse" "^7.0.4"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    co "^4.6.0"
+    dedent "^0.7.0"
+    expect "^26.6.0"
+    is-generator-fn "^2.0.0"
+    jest-each "^26.6.0"
+    jest-matcher-utils "^26.6.0"
+    jest-message-util "^26.6.0"
+    jest-runner "^26.6.0"
+    jest-runtime "^26.6.0"
+    jest-snapshot "^26.6.0"
+    jest-util "^26.6.0"
+    pretty-format "^26.6.0"
+    stack-utils "^2.0.2"
+    throat "^5.0.0"
+
+jest-cli@^26.6.0:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-26.6.3.tgz#43117cfef24bc4cd691a174a8796a532e135e92a"
+  integrity sha512-GF9noBSa9t08pSyl3CY4frMrqp+aQXFGFkf5hEPbh/pIUFYWMK6ZLTfbmadxJVcJrdRoChlWQsA2VkJcDFK8hg==
+  dependencies:
+    "@jest/core" "^26.6.3"
+    "@jest/test-result" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    chalk "^4.0.0"
+    exit "^0.1.2"
+    graceful-fs "^4.2.4"
+    import-local "^3.0.2"
+    is-ci "^2.0.0"
+    jest-config "^26.6.3"
+    jest-util "^26.6.2"
+    jest-validate "^26.6.2"
+    prompts "^2.0.1"
+    yargs "^15.4.1"
+
+jest-config@^26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-26.6.3.tgz#64f41444eef9eb03dc51d5c53b75c8c71f645349"
+  integrity sha512-t5qdIj/bCj2j7NFVHb2nFB4aUdfucDn3JRKgrZnplb8nieAirAzRSHP8uDEd+qV6ygzg9Pz4YG7UTJf94LPSyg==
+  dependencies:
+    "@babel/core" "^7.1.0"
+    "@jest/test-sequencer" "^26.6.3"
+    "@jest/types" "^26.6.2"
+    babel-jest "^26.6.3"
+    chalk "^4.0.0"
+    deepmerge "^4.2.2"
+    glob "^7.1.1"
+    graceful-fs "^4.2.4"
+    jest-environment-jsdom "^26.6.2"
+    jest-environment-node "^26.6.2"
+    jest-get-type "^26.3.0"
+    jest-jasmine2 "^26.6.3"
+    jest-regex-util "^26.0.0"
+    jest-resolve "^26.6.2"
+    jest-util "^26.6.2"
+    jest-validate "^26.6.2"
+    micromatch "^4.0.2"
+    pretty-format "^26.6.2"
+
+jest-diff@^26.0.0, jest-diff@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-26.6.2.tgz#1aa7468b52c3a68d7d5c5fdcdfcd5e49bd164394"
+  integrity sha512-6m+9Z3Gv9wN0WFVasqjCL/06+EFCMTqDEUl/b87HYK2rAPTyfz4ZIuSlPhY51PIQRWx5TaxeF1qmXKe9gfN3sA==
+  dependencies:
+    chalk "^4.0.0"
+    diff-sequences "^26.6.2"
+    jest-get-type "^26.3.0"
+    pretty-format "^26.6.2"
+
+jest-docblock@^26.0.0:
+  version "26.0.0"
+  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-26.0.0.tgz#3e2fa20899fc928cb13bd0ff68bd3711a36889b5"
+  integrity sha512-RDZ4Iz3QbtRWycd8bUEPxQsTlYazfYn/h5R65Fc6gOfwozFhoImx+affzky/FFBuqISPTqjXomoIGJVKBWoo0w==
+  dependencies:
+    detect-newline "^3.0.0"
+
+jest-each@^26.6.0, jest-each@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-26.6.2.tgz#02526438a77a67401c8a6382dfe5999952c167cb"
+  integrity sha512-Mer/f0KaATbjl8MCJ+0GEpNdqmnVmDYqCTJYTvoo7rqmRiDllmp2AYN+06F93nXcY3ur9ShIjS+CO/uD+BbH4A==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    chalk "^4.0.0"
+    jest-get-type "^26.3.0"
+    jest-util "^26.6.2"
+    pretty-format "^26.6.2"
+
+jest-environment-jsdom@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-26.6.2.tgz#78d09fe9cf019a357009b9b7e1f101d23bd1da3e"
+  integrity sha512-jgPqCruTlt3Kwqg5/WVFyHIOJHsiAvhcp2qiR2QQstuG9yWox5+iHpU3ZrcBxW14T4fe5Z68jAfLRh7joCSP2Q==
+  dependencies:
+    "@jest/environment" "^26.6.2"
+    "@jest/fake-timers" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    jest-mock "^26.6.2"
+    jest-util "^26.6.2"
+    jsdom "^16.4.0"
+
+jest-environment-node@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-26.6.2.tgz#824e4c7fb4944646356f11ac75b229b0035f2b0c"
+  integrity sha512-zhtMio3Exty18dy8ee8eJ9kjnRyZC1N4C1Nt/VShN1apyXc8rWGtJ9lI7vqiWcyyXS4BVSEn9lxAM2D+07/Tag==
+  dependencies:
+    "@jest/environment" "^26.6.2"
+    "@jest/fake-timers" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    jest-mock "^26.6.2"
+    jest-util "^26.6.2"
+
+jest-get-type@^26.3.0:
+  version "26.3.0"
+  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.3.0.tgz#e97dc3c3f53c2b406ca7afaed4493b1d099199e0"
+  integrity sha512-TpfaviN1R2pQWkIihlfEanwOXK0zcxrKEE4MlU6Tn7keoXdN6/3gK/xl0yEh8DOunn5pOVGKf8hB4R9gVh04ig==
+
+jest-haste-map@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-26.6.2.tgz#dd7e60fe7dc0e9f911a23d79c5ff7fb5c2cafeaa"
+  integrity sha512-easWIJXIw71B2RdR8kgqpjQrbMRWQBgiBwXYEhtGUTaX+doCjBheluShdDMeR8IMfJiTqH4+zfhtg29apJf/8w==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    "@types/graceful-fs" "^4.1.2"
+    "@types/node" "*"
+    anymatch "^3.0.3"
+    fb-watchman "^2.0.0"
+    graceful-fs "^4.2.4"
+    jest-regex-util "^26.0.0"
+    jest-serializer "^26.6.2"
+    jest-util "^26.6.2"
+    jest-worker "^26.6.2"
+    micromatch "^4.0.2"
+    sane "^4.0.3"
+    walker "^1.0.7"
+  optionalDependencies:
+    fsevents "^2.1.2"
+
+jest-jasmine2@^26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-26.6.3.tgz#adc3cf915deacb5212c93b9f3547cd12958f2edd"
+  integrity sha512-kPKUrQtc8aYwBV7CqBg5pu+tmYXlvFlSFYn18ev4gPFtrRzB15N2gW/Roew3187q2w2eHuu0MU9TJz6w0/nPEg==
+  dependencies:
+    "@babel/traverse" "^7.1.0"
+    "@jest/environment" "^26.6.2"
+    "@jest/source-map" "^26.6.2"
+    "@jest/test-result" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    co "^4.6.0"
+    expect "^26.6.2"
+    is-generator-fn "^2.0.0"
+    jest-each "^26.6.2"
+    jest-matcher-utils "^26.6.2"
+    jest-message-util "^26.6.2"
+    jest-runtime "^26.6.3"
+    jest-snapshot "^26.6.2"
+    jest-util "^26.6.2"
+    pretty-format "^26.6.2"
+    throat "^5.0.0"
+
+jest-leak-detector@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-26.6.2.tgz#7717cf118b92238f2eba65054c8a0c9c653a91af"
+  integrity sha512-i4xlXpsVSMeKvg2cEKdfhh0H39qlJlP5Ex1yQxwF9ubahboQYMgTtz5oML35AVA3B4Eu+YsmwaiKVev9KCvLxg==
+  dependencies:
+    jest-get-type "^26.3.0"
+    pretty-format "^26.6.2"
+
+jest-matcher-utils@^26.6.0, jest-matcher-utils@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-26.6.2.tgz#8e6fd6e863c8b2d31ac6472eeb237bc595e53e7a"
+  integrity sha512-llnc8vQgYcNqDrqRDXWwMr9i7rS5XFiCwvh6DTP7Jqa2mqpcCBBlpCbn+trkG0KNhPu/h8rzyBkriOtBstvWhw==
+  dependencies:
+    chalk "^4.0.0"
+    jest-diff "^26.6.2"
+    jest-get-type "^26.3.0"
+    pretty-format "^26.6.2"
+
+jest-message-util@^26.6.0, jest-message-util@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-26.6.2.tgz#58173744ad6fc0506b5d21150b9be56ef001ca07"
+  integrity sha512-rGiLePzQ3AzwUshu2+Rn+UMFk0pHN58sOG+IaJbk5Jxuqo3NYO1U2/MIR4S1sKgsoYSXSzdtSa0TgrmtUwEbmA==
+  dependencies:
+    "@babel/code-frame" "^7.0.0"
+    "@jest/types" "^26.6.2"
+    "@types/stack-utils" "^2.0.0"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    micromatch "^4.0.2"
+    pretty-format "^26.6.2"
+    slash "^3.0.0"
+    stack-utils "^2.0.2"
+
+jest-mock@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-26.6.2.tgz#d6cb712b041ed47fe0d9b6fc3474bc6543feb302"
+  integrity sha512-YyFjePHHp1LzpzYcmgqkJ0nm0gg/lJx2aZFzFy1S6eUqNjXsOqTK10zNRff2dNfssgokjkG65OlWNcIlgd3zew==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+
+jest-pnp-resolver@^1.2.2:
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c"
+  integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w==
+
+jest-regex-util@^26.0.0:
+  version "26.0.0"
+  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-26.0.0.tgz#d25e7184b36e39fd466c3bc41be0971e821fee28"
+  integrity sha512-Gv3ZIs/nA48/Zvjrl34bf+oD76JHiGDUxNOVgUjh3j890sblXryjY4rss71fPtD/njchl6PSE2hIhvyWa1eT0A==
+
+jest-resolve-dependencies@^26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-26.6.3.tgz#6680859ee5d22ee5dcd961fe4871f59f4c784fb6"
+  integrity sha512-pVwUjJkxbhe4RY8QEWzN3vns2kqyuldKpxlxJlzEYfKSvY6/bMvxoFrYYzUO1Gx28yKWN37qyV7rIoIp2h8fTg==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    jest-regex-util "^26.0.0"
+    jest-snapshot "^26.6.2"
+
+jest-resolve@26.6.0:
+  version "26.6.0"
+  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.6.0.tgz#070fe7159af87b03e50f52ea5e17ee95bbee40e1"
+  integrity sha512-tRAz2bwraHufNp+CCmAD8ciyCpXCs1NQxB5EJAmtCFy6BN81loFEGWKzYu26Y62lAJJe4X4jg36Kf+NsQyiStQ==
+  dependencies:
+    "@jest/types" "^26.6.0"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    jest-pnp-resolver "^1.2.2"
+    jest-util "^26.6.0"
+    read-pkg-up "^7.0.1"
+    resolve "^1.17.0"
+    slash "^3.0.0"
+
+jest-resolve@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.6.2.tgz#a3ab1517217f469b504f1b56603c5bb541fbb507"
+  integrity sha512-sOxsZOq25mT1wRsfHcbtkInS+Ek7Q8jCHUB0ZUTP0tc/c41QHriU/NunqMfCUWsL4H3MHpvQD4QR9kSYhS7UvQ==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    jest-pnp-resolver "^1.2.2"
+    jest-util "^26.6.2"
+    read-pkg-up "^7.0.1"
+    resolve "^1.18.1"
+    slash "^3.0.0"
+
+jest-runner@^26.6.0, jest-runner@^26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-26.6.3.tgz#2d1fed3d46e10f233fd1dbd3bfaa3fe8924be159"
+  integrity sha512-atgKpRHnaA2OvByG/HpGA4g6CSPS/1LK0jK3gATJAoptC1ojltpmVlYC3TYgdmGp+GLuhzpH30Gvs36szSL2JQ==
+  dependencies:
+    "@jest/console" "^26.6.2"
+    "@jest/environment" "^26.6.2"
+    "@jest/test-result" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    emittery "^0.7.1"
+    exit "^0.1.2"
+    graceful-fs "^4.2.4"
+    jest-config "^26.6.3"
+    jest-docblock "^26.0.0"
+    jest-haste-map "^26.6.2"
+    jest-leak-detector "^26.6.2"
+    jest-message-util "^26.6.2"
+    jest-resolve "^26.6.2"
+    jest-runtime "^26.6.3"
+    jest-util "^26.6.2"
+    jest-worker "^26.6.2"
+    source-map-support "^0.5.6"
+    throat "^5.0.0"
+
+jest-runtime@^26.6.0, jest-runtime@^26.6.3:
+  version "26.6.3"
+  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-26.6.3.tgz#4f64efbcfac398331b74b4b3c82d27d401b8fa2b"
+  integrity sha512-lrzyR3N8sacTAMeonbqpnSka1dHNux2uk0qqDXVkMv2c/A3wYnvQ4EXuI013Y6+gSKSCxdaczvf4HF0mVXHRdw==
+  dependencies:
+    "@jest/console" "^26.6.2"
+    "@jest/environment" "^26.6.2"
+    "@jest/fake-timers" "^26.6.2"
+    "@jest/globals" "^26.6.2"
+    "@jest/source-map" "^26.6.2"
+    "@jest/test-result" "^26.6.2"
+    "@jest/transform" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/yargs" "^15.0.0"
+    chalk "^4.0.0"
+    cjs-module-lexer "^0.6.0"
+    collect-v8-coverage "^1.0.0"
+    exit "^0.1.2"
+    glob "^7.1.3"
+    graceful-fs "^4.2.4"
+    jest-config "^26.6.3"
+    jest-haste-map "^26.6.2"
+    jest-message-util "^26.6.2"
+    jest-mock "^26.6.2"
+    jest-regex-util "^26.0.0"
+    jest-resolve "^26.6.2"
+    jest-snapshot "^26.6.2"
+    jest-util "^26.6.2"
+    jest-validate "^26.6.2"
+    slash "^3.0.0"
+    strip-bom "^4.0.0"
+    yargs "^15.4.1"
+
+jest-serializer@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-26.6.2.tgz#d139aafd46957d3a448f3a6cdabe2919ba0742d1"
+  integrity sha512-S5wqyz0DXnNJPd/xfIzZ5Xnp1HrJWBczg8mMfMpN78OJ5eDxXyf+Ygld9wX1DnUWbIbhM1YDY95NjR4CBXkb2g==
+  dependencies:
+    "@types/node" "*"
+    graceful-fs "^4.2.4"
+
+jest-snapshot@^26.6.0, jest-snapshot@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-26.6.2.tgz#f3b0af1acb223316850bd14e1beea9837fb39c84"
+  integrity sha512-OLhxz05EzUtsAmOMzuupt1lHYXCNib0ECyuZ/PZOx9TrZcC8vL0x+DUG3TL+GLX3yHG45e6YGjIm0XwDc3q3og==
+  dependencies:
+    "@babel/types" "^7.0.0"
+    "@jest/types" "^26.6.2"
+    "@types/babel__traverse" "^7.0.4"
+    "@types/prettier" "^2.0.0"
+    chalk "^4.0.0"
+    expect "^26.6.2"
+    graceful-fs "^4.2.4"
+    jest-diff "^26.6.2"
+    jest-get-type "^26.3.0"
+    jest-haste-map "^26.6.2"
+    jest-matcher-utils "^26.6.2"
+    jest-message-util "^26.6.2"
+    jest-resolve "^26.6.2"
+    natural-compare "^1.4.0"
+    pretty-format "^26.6.2"
+    semver "^7.3.2"
+
+jest-util@^26.6.0, jest-util@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1"
+  integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    is-ci "^2.0.0"
+    micromatch "^4.0.2"
+
+jest-validate@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-26.6.2.tgz#23d380971587150467342911c3d7b4ac57ab20ec"
+  integrity sha512-NEYZ9Aeyj0i5rQqbq+tpIOom0YS1u2MVu6+euBsvpgIme+FOfRmoC4R5p0JiAUpaFvFy24xgrpMknarR/93XjQ==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    camelcase "^6.0.0"
+    chalk "^4.0.0"
+    jest-get-type "^26.3.0"
+    leven "^3.1.0"
+    pretty-format "^26.6.2"
+
+jest-watch-typeahead@0.6.1:
+  version "0.6.1"
+  resolved "https://registry.yarnpkg.com/jest-watch-typeahead/-/jest-watch-typeahead-0.6.1.tgz#45221b86bb6710b7e97baaa1640ae24a07785e63"
+  integrity sha512-ITVnHhj3Jd/QkqQcTqZfRgjfyRhDFM/auzgVo2RKvSwi18YMvh0WvXDJFoFED6c7jd/5jxtu4kSOb9PTu2cPVg==
+  dependencies:
+    ansi-escapes "^4.3.1"
+    chalk "^4.0.0"
+    jest-regex-util "^26.0.0"
+    jest-watcher "^26.3.0"
+    slash "^3.0.0"
+    string-length "^4.0.1"
+    strip-ansi "^6.0.0"
+
+jest-watcher@^26.3.0, jest-watcher@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-26.6.2.tgz#a5b683b8f9d68dbcb1d7dae32172d2cca0592975"
+  integrity sha512-WKJob0P/Em2csiVthsI68p6aGKTIcsfjH9Gsx1f0A3Italz43e3ho0geSAVsmj09RWOELP1AZ/DXyJgOgDKxXQ==
+  dependencies:
+    "@jest/test-result" "^26.6.2"
+    "@jest/types" "^26.6.2"
+    "@types/node" "*"
+    ansi-escapes "^4.2.1"
+    chalk "^4.0.0"
+    jest-util "^26.6.2"
+    string-length "^4.0.1"
+
+jest-worker@^24.9.0:
+  version "24.9.0"
+  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-24.9.0.tgz#5dbfdb5b2d322e98567898238a9697bcce67b3e5"
+  integrity sha512-51PE4haMSXcHohnSMdM42anbvZANYTqMrr52tVKPqqsPJMzoP6FYYDVqahX/HrAoKEKz3uUPzSvKs9A3qR4iVw==
+  dependencies:
+    merge-stream "^2.0.0"
+    supports-color "^6.1.0"
+
+jest-worker@^26.5.0, jest-worker@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-26.6.2.tgz#7f72cbc4d643c365e27b9fd775f9d0eaa9c7a8ed"
+  integrity sha512-KWYVV1c4i+jbMpaBC+U++4Va0cp8OisU185o73T1vo99hqi7w8tSJfUXYswwqqrjzwxa6KpRK54WhPvwf5w6PQ==
+  dependencies:
+    "@types/node" "*"
+    merge-stream "^2.0.0"
+    supports-color "^7.0.0"
+
+jest@26.6.0:
+  version "26.6.0"
+  resolved "https://registry.yarnpkg.com/jest/-/jest-26.6.0.tgz#546b25a1d8c888569dbbe93cae131748086a4a25"
+  integrity sha512-jxTmrvuecVISvKFFhOkjsWRZV7sFqdSUAd1ajOKY+/QE/aLBVstsJ/dX8GczLzwiT6ZEwwmZqtCUHLHHQVzcfA==
+  dependencies:
+    "@jest/core" "^26.6.0"
+    import-local "^3.0.2"
+    jest-cli "^26.6.0"
+
+"js-tokens@^3.0.0 || ^4.0.0", js-tokens@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
+  integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==
+
+js-yaml@^3.13.1:
+  version "3.14.1"
+  resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537"
+  integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==
+  dependencies:
+    argparse "^1.0.7"
+    esprima "^4.0.0"
+
+jsbn@~0.1.0:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
+  integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM=
+
+jsdom@^16.4.0:
+  version "16.4.0"
+  resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.4.0.tgz#36005bde2d136f73eee1a830c6d45e55408edddb"
+  integrity sha512-lYMm3wYdgPhrl7pDcRmvzPhhrGVBeVhPIqeHjzeiHN3DFmD1RBpbExbi8vU7BJdH8VAZYovR8DMt0PNNDM7k8w==
+  dependencies:
+    abab "^2.0.3"
+    acorn "^7.1.1"
+    acorn-globals "^6.0.0"
+    cssom "^0.4.4"
+    cssstyle "^2.2.0"
+    data-urls "^2.0.0"
+    decimal.js "^10.2.0"
+    domexception "^2.0.1"
+    escodegen "^1.14.1"
+    html-encoding-sniffer "^2.0.1"
+    is-potential-custom-element-name "^1.0.0"
+    nwsapi "^2.2.0"
+    parse5 "5.1.1"
+    request "^2.88.2"
+    request-promise-native "^1.0.8"
+    saxes "^5.0.0"
+    symbol-tree "^3.2.4"
+    tough-cookie "^3.0.1"
+    w3c-hr-time "^1.0.2"
+    w3c-xmlserializer "^2.0.0"
+    webidl-conversions "^6.1.0"
+    whatwg-encoding "^1.0.5"
+    whatwg-mimetype "^2.3.0"
+    whatwg-url "^8.0.0"
+    ws "^7.2.3"
+    xml-name-validator "^3.0.0"
+
+jsesc@^2.5.1:
+  version "2.5.2"
+  resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-2.5.2.tgz#80564d2e483dacf6e8ef209650a67df3f0c283a4"
+  integrity sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==
+
+jsesc@~0.5.0:
+  version "0.5.0"
+  resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-0.5.0.tgz#e7dee66e35d6fc16f710fe91d5cf69f70f08911d"
+  integrity sha1-597mbjXW/Bb3EP6R1c9p9w8IkR0=
+
+json-parse-better-errors@^1.0.1, json-parse-better-errors@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9"
+  integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==
+
+json-parse-even-better-errors@^2.3.0:
+  version "2.3.1"
+  resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d"
+  integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==
+
+json-schema-traverse@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
+  integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==
+
+json-schema-traverse@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2"
+  integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==
+
+json-schema@0.2.3:
+  version "0.2.3"
+  resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13"
+  integrity sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=
+
+json-stable-stringify-without-jsonify@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651"
+  integrity sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=
+
+json-stringify-safe@~5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
+  integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
+
+json3@^3.3.3:
+  version "3.3.3"
+  resolved "https://registry.yarnpkg.com/json3/-/json3-3.3.3.tgz#7fc10e375fc5ae42c4705a5cc0aa6f62be305b81"
+  integrity sha512-c7/8mbUsKigAbLkD5B010BK4D9LZm7A1pNItkEwiUZRpIN66exu/e7YQWysGun+TRKaJp8MhemM+VkfWv42aCA==
+
+json5@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe"
+  integrity sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==
+  dependencies:
+    minimist "^1.2.0"
+
+json5@^2.1.2:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3"
+  integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==
+  dependencies:
+    minimist "^1.2.5"
+
+jsonfile@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb"
+  integrity sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=
+  optionalDependencies:
+    graceful-fs "^4.1.6"
+
+jsonfile@^6.0.1:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae"
+  integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==
+  dependencies:
+    universalify "^2.0.0"
+  optionalDependencies:
+    graceful-fs "^4.1.6"
+
+jsprim@^1.2.2:
+  version "1.4.1"
+  resolved "https://registry.yarnpkg.com/jsprim/-/jsprim-1.4.1.tgz#313e66bc1e5cc06e438bc1b7499c2e5c56acb6a2"
+  integrity sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=
+  dependencies:
+    assert-plus "1.0.0"
+    extsprintf "1.3.0"
+    json-schema "0.2.3"
+    verror "1.10.0"
+
+"jsx-ast-utils@^2.4.1 || ^3.0.0", jsx-ast-utils@^3.1.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/jsx-ast-utils/-/jsx-ast-utils-3.2.0.tgz#41108d2cec408c3453c1bbe8a4aae9e1e2bd8f82"
+  integrity sha512-EIsmt3O3ljsU6sot/J4E1zDRxfBNrhjyf/OKjlydwgEimQuznlM4Wv7U+ueONJMyEn1WRE0K8dhi3dVAXYT24Q==
+  dependencies:
+    array-includes "^3.1.2"
+    object.assign "^4.1.2"
+
+killable@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/killable/-/killable-1.0.1.tgz#4c8ce441187a061c7474fb87ca08e2a638194892"
+  integrity sha512-LzqtLKlUwirEUyl/nicirVmNiPvYs7l5n8wOPP7fyJVpUPkvCnW/vuiXGpylGUlnPDnB7311rARzAt3Mhswpjg==
+
+kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
+  version "3.2.2"
+  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"
+  integrity sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=
+  dependencies:
+    is-buffer "^1.1.5"
+
+kind-of@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-4.0.0.tgz#20813df3d712928b207378691a45066fae72dd57"
+  integrity sha1-IIE989cSkosgc3hpGkUGb65y3Vc=
+  dependencies:
+    is-buffer "^1.1.5"
+
+kind-of@^5.0.0:
+  version "5.1.0"
+  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-5.1.0.tgz#729c91e2d857b7a419a1f9aa65685c4c33f5845d"
+  integrity sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==
+
+kind-of@^6.0.0, kind-of@^6.0.2:
+  version "6.0.3"
+  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd"
+  integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==
+
+kleur@^3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e"
+  integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==
+
+klona@^2.0.4:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/klona/-/klona-2.0.4.tgz#7bb1e3affb0cb8624547ef7e8f6708ea2e39dfc0"
+  integrity sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==
+
+language-subtag-registry@~0.3.2:
+  version "0.3.21"
+  resolved "https://registry.yarnpkg.com/language-subtag-registry/-/language-subtag-registry-0.3.21.tgz#04ac218bea46f04cb039084602c6da9e788dd45a"
+  integrity sha512-L0IqwlIXjilBVVYKFT37X9Ih11Um5NEl9cbJIuU/SwP/zEEAbBPOnEeeuxVMf45ydWQRDQN3Nqc96OgbH1K+Pg==
+
+language-tags@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/language-tags/-/language-tags-1.0.5.tgz#d321dbc4da30ba8bf3024e040fa5c14661f9193a"
+  integrity sha1-0yHbxNowuovzAk4ED6XBRmH5GTo=
+  dependencies:
+    language-subtag-registry "~0.3.2"
+
+last-call-webpack-plugin@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/last-call-webpack-plugin/-/last-call-webpack-plugin-3.0.0.tgz#9742df0e10e3cf46e5c0381c2de90d3a7a2d7555"
+  integrity sha512-7KI2l2GIZa9p2spzPIVZBYyNKkN+e/SQPpnjlTiPhdbDW3F86tdKKELxKpzJ5sgU19wQWsACULZmpTPYHeWO5w==
+  dependencies:
+    lodash "^4.17.5"
+    webpack-sources "^1.1.0"
+
+leven@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
+  integrity sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==
+
+levn@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/levn/-/levn-0.4.1.tgz#ae4562c007473b932a6200d403268dd2fffc6ade"
+  integrity sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==
+  dependencies:
+    prelude-ls "^1.2.1"
+    type-check "~0.4.0"
+
+levn@~0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/levn/-/levn-0.3.0.tgz#3b09924edf9f083c0490fdd4c0bc4421e04764ee"
+  integrity sha1-OwmSTt+fCDwEkP3UwLxEIeBHZO4=
+  dependencies:
+    prelude-ls "~1.1.2"
+    type-check "~0.3.2"
+
+lines-and-columns@^1.1.6:
+  version "1.1.6"
+  resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"
+  integrity sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=
+
+load-json-file@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-2.0.0.tgz#7947e42149af80d696cbf797bcaabcfe1fe29ca8"
+  integrity sha1-eUfkIUmvgNaWy/eXvKq8/h/inKg=
+  dependencies:
+    graceful-fs "^4.1.2"
+    parse-json "^2.2.0"
+    pify "^2.0.0"
+    strip-bom "^3.0.0"
+
+loader-runner@^2.4.0:
+  version "2.4.0"
+  resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-2.4.0.tgz#ed47066bfe534d7e84c4c7b9998c2a75607d9357"
+  integrity sha512-Jsmr89RcXGIwivFY21FcRrisYZfvLMTWx5kOLc+JTxtpBOG6xML0vzbc6SEQG2FO9/4Fc3wW4LVcB5DmGflaRw==
+
+loader-utils@1.2.3:
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.2.3.tgz#1ff5dc6911c9f0a062531a4c04b609406108c2c7"
+  integrity sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==
+  dependencies:
+    big.js "^5.2.2"
+    emojis-list "^2.0.0"
+    json5 "^1.0.1"
+
+loader-utils@2.0.0, loader-utils@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-2.0.0.tgz#e4cace5b816d425a166b5f097e10cd12b36064b0"
+  integrity sha512-rP4F0h2RaWSvPEkD7BLDFQnvSf+nK+wr3ESUjNTyAGobqrijmW92zc+SO6d4p4B1wh7+B/Jg1mkQe5NYUEHtHQ==
+  dependencies:
+    big.js "^5.2.2"
+    emojis-list "^3.0.0"
+    json5 "^2.1.2"
+
+loader-utils@^1.1.0, loader-utils@^1.2.3, loader-utils@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.0.tgz#c579b5e34cb34b1a74edc6c1fb36bfa371d5a613"
+  integrity sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA==
+  dependencies:
+    big.js "^5.2.2"
+    emojis-list "^3.0.0"
+    json5 "^1.0.1"
+
+locate-path@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-2.0.0.tgz#2b568b265eec944c6d9c0de9c3dbbbca0354cd8e"
+  integrity sha1-K1aLJl7slExtnA3pw9u7ygNUzY4=
+  dependencies:
+    p-locate "^2.0.0"
+    path-exists "^3.0.0"
+
+locate-path@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-3.0.0.tgz#dbec3b3ab759758071b58fe59fc41871af21400e"
+  integrity sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==
+  dependencies:
+    p-locate "^3.0.0"
+    path-exists "^3.0.0"
+
+locate-path@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0"
+  integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==
+  dependencies:
+    p-locate "^4.1.0"
+
+lodash._reinterpolate@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz#0ccf2d89166af03b3663c796538b75ac6e114d9d"
+  integrity sha1-DM8tiRZq8Ds2Y8eWU4t1rG4RTZ0=
+
+lodash.memoize@^4.1.2:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe"
+  integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4=
+
+lodash.mergewith@4.6.2:
+  version "4.6.2"
+  resolved "https://registry.yarnpkg.com/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz#617121f89ac55f59047c7aec1ccd6654c6590f55"
+  integrity sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==
+
+lodash.sortby@^4.7.0:
+  version "4.7.0"
+  resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438"
+  integrity sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=
+
+lodash.template@^4.5.0:
+  version "4.5.0"
+  resolved "https://registry.yarnpkg.com/lodash.template/-/lodash.template-4.5.0.tgz#f976195cf3f347d0d5f52483569fe8031ccce8ab"
+  integrity sha512-84vYFxIkmidUiFxidA/KjjH9pAycqW+h980j7Fuz5qxRtO9pgB7MDFTdys1N7A5mcucRiDyEq4fusljItR1T/A==
+  dependencies:
+    lodash._reinterpolate "^3.0.0"
+    lodash.templatesettings "^4.0.0"
+
+lodash.templatesettings@^4.0.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/lodash.templatesettings/-/lodash.templatesettings-4.2.0.tgz#e481310f049d3cf6d47e912ad09313b154f0fb33"
+  integrity sha512-stgLz+i3Aa9mZgnjr/O+v9ruKZsPsndy7qPZOchbqk2cnTU1ZaldKK+v7m54WoKIyxiuMZTKT2H81F8BeAc3ZQ==
+  dependencies:
+    lodash._reinterpolate "^3.0.0"
+
+lodash.uniq@^4.5.0:
+  version "4.5.0"
+  resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
+  integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
+
+"lodash@>=3.5 <5", lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.5:
+  version "4.17.21"
+  resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
+  integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
+
+loglevel@^1.6.8:
+  version "1.7.1"
+  resolved "https://registry.yarnpkg.com/loglevel/-/loglevel-1.7.1.tgz#005fde2f5e6e47068f935ff28573e125ef72f197"
+  integrity sha512-Hesni4s5UkWkwCGJMQGAh71PaLUmKFM60dHvq0zi/vDhhrzuk+4GgNbTXJ12YYQJn6ZKBDNIjYcuQGKudvqrIw==
+
+loose-envify@^1.0.0, loose-envify@^1.1.0, loose-envify@^1.2.0, loose-envify@^1.3.1, loose-envify@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf"
+  integrity sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==
+  dependencies:
+    js-tokens "^3.0.0 || ^4.0.0"
+
+lower-case@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28"
+  integrity sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==
+  dependencies:
+    tslib "^2.0.3"
+
+lru-cache@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920"
+  integrity sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==
+  dependencies:
+    yallist "^3.0.2"
+
+lru-cache@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-6.0.0.tgz#6d6fe6570ebd96aaf90fcad1dafa3b2566db3a94"
+  integrity sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==
+  dependencies:
+    yallist "^4.0.0"
+
+lz-string@^1.4.4:
+  version "1.4.4"
+  resolved "https://registry.yarnpkg.com/lz-string/-/lz-string-1.4.4.tgz#c0d8eaf36059f705796e1e344811cf4c498d3a26"
+  integrity sha1-wNjq82BZ9wV5bh40SBHPTEmNOiY=
+
+magic-string@^0.25.0, magic-string@^0.25.7:
+  version "0.25.7"
+  resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.25.7.tgz#3f497d6fd34c669c6798dcb821f2ef31f5445051"
+  integrity sha512-4CrMT5DOHTDk4HYDlzmwu4FVCcIYI8gauveasrdCu2IKIFOJ3f0v/8MDGJCDL9oD2ppz/Av1b0Nj345H9M+XIA==
+  dependencies:
+    sourcemap-codec "^1.4.4"
+
+make-dir@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-2.1.0.tgz#5f0310e18b8be898cc07009295a30ae41e91e6f5"
+  integrity sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==
+  dependencies:
+    pify "^4.0.1"
+    semver "^5.6.0"
+
+make-dir@^3.0.0, make-dir@^3.0.2:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
+  integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
+  dependencies:
+    semver "^6.0.0"
+
+makeerror@1.0.x:
+  version "1.0.11"
+  resolved "https://registry.yarnpkg.com/makeerror/-/makeerror-1.0.11.tgz#e01a5c9109f2af79660e4e8b9587790184f5a96c"
+  integrity sha1-4BpckQnyr3lmDk6LlYd5AYT1qWw=
+  dependencies:
+    tmpl "1.0.x"
+
+map-cache@^0.2.2:
+  version "0.2.2"
+  resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf"
+  integrity sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8=
+
+map-visit@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/map-visit/-/map-visit-1.0.0.tgz#ecdca8f13144e660f1b5bd41f12f3479d98dfb8f"
+  integrity sha1-7Nyo8TFE5mDxtb1B8S80edmN+48=
+  dependencies:
+    object-visit "^1.0.0"
+
+md5.js@^1.3.4:
+  version "1.3.5"
+  resolved "https://registry.yarnpkg.com/md5.js/-/md5.js-1.3.5.tgz#b5d07b8e3216e3e27cd728d72f70d1e6a342005f"
+  integrity sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==
+  dependencies:
+    hash-base "^3.0.0"
+    inherits "^2.0.1"
+    safe-buffer "^5.1.2"
+
+mdn-data@2.0.14:
+  version "2.0.14"
+  resolved "https://registry.yarnpkg.com/mdn-data/-/mdn-data-2.0.14.tgz#7113fc4281917d63ce29b43446f701e68c25ba50"
+  integrity sha512-dn6wd0uw5GsdswPFfsgMp5NSB0/aDe6fK94YJV/AJDYXL6HVLWBsxeq7js7Ad+mU2K9LAlwpk6kN2D5mwCPVow==
+
+mdn-data@2.0.4:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/mdn-data/-/mdn-data-2.0.4.tgz#699b3c38ac6f1d728091a64650b65d388502fd5b"
+  integrity sha512-iV3XNKw06j5Q7mi6h+9vbx23Tv7JkjEVgKHW4pimwyDGWm0OIQntJJ+u1C6mg6mK1EaTv42XQ7w76yuzH7M2cA==
+
+media-typer@0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/media-typer/-/media-typer-0.3.0.tgz#8710d7af0aa626f8fffa1ce00168545263255748"
+  integrity sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=
+
+memory-fs@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.4.1.tgz#3a9a20b8462523e447cfbc7e8bb80ed667bfc552"
+  integrity sha1-OpoguEYlI+RHz7x+i7gO1me/xVI=
+  dependencies:
+    errno "^0.1.3"
+    readable-stream "^2.0.1"
+
+memory-fs@^0.5.0:
+  version "0.5.0"
+  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.5.0.tgz#324c01288b88652966d161db77838720845a8e3c"
+  integrity sha512-jA0rdU5KoQMC0e6ppoNRtpp6vjFq6+NY7r8hywnC7V+1Xj/MtHwGIbB1QaK/dunyjWteJzmkpd7ooeWg10T7GA==
+  dependencies:
+    errno "^0.1.3"
+    readable-stream "^2.0.1"
+
+merge-descriptors@1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61"
+  integrity sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=
+
+merge-stream@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60"
+  integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
+
+merge2@^1.3.0:
+  version "1.4.1"
+  resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae"
+  integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==
+
+methods@~1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/methods/-/methods-1.1.2.tgz#5529a4d67654134edcc5266656835b0f851afcee"
+  integrity sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=
+
+microevent.ts@~0.1.1:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/microevent.ts/-/microevent.ts-0.1.1.tgz#70b09b83f43df5172d0205a63025bce0f7357fa0"
+  integrity sha512-jo1OfR4TaEwd5HOrt5+tAZ9mqT4jmpNAusXtyfNzqVm9uiSYFZlKM1wYL4oU7azZW/PxQW53wM0S6OR1JHNa2g==
+
+micromatch@^3.1.10, micromatch@^3.1.4:
+  version "3.1.10"
+  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23"
+  integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==
+  dependencies:
+    arr-diff "^4.0.0"
+    array-unique "^0.3.2"
+    braces "^2.3.1"
+    define-property "^2.0.2"
+    extend-shallow "^3.0.2"
+    extglob "^2.0.4"
+    fragment-cache "^0.2.1"
+    kind-of "^6.0.2"
+    nanomatch "^1.2.9"
+    object.pick "^1.3.0"
+    regex-not "^1.0.0"
+    snapdragon "^0.8.1"
+    to-regex "^3.0.2"
+
+micromatch@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.2.tgz#4fcb0999bf9fbc2fcbdd212f6d629b9a56c39259"
+  integrity sha512-y7FpHSbMUMoyPbYUSzO6PaZ6FyRnQOpHuKwbo1G+Knck95XVU4QAiKdGEnj5wwoS7PlOgthX/09u5iFJ+aYf5Q==
+  dependencies:
+    braces "^3.0.1"
+    picomatch "^2.0.5"
+
+miller-rabin@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/miller-rabin/-/miller-rabin-4.0.1.tgz#f080351c865b0dc562a8462966daa53543c78a4d"
+  integrity sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==
+  dependencies:
+    bn.js "^4.0.0"
+    brorand "^1.0.1"
+
+mime-db@1.46.0, "mime-db@>= 1.43.0 < 2":
+  version "1.46.0"
+  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.46.0.tgz#6267748a7f799594de3cbc8cde91def349661cee"
+  integrity sha512-svXaP8UQRZ5K7or+ZmfNhg2xX3yKDMUzqadsSqi4NCH/KomcH75MAMYAGVlvXn4+b/xOPhS3I2uHKRUzvjY7BQ==
+
+mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24:
+  version "2.1.29"
+  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.29.tgz#1d4ab77da64b91f5f72489df29236563754bb1b2"
+  integrity sha512-Y/jMt/S5sR9OaqteJtslsFZKWOIIqMACsJSiHghlCAyhf7jfVYjKBmLiX8OgpWeW+fjJ2b+Az69aPFPkUOY6xQ==
+  dependencies:
+    mime-db "1.46.0"
+
+mime@1.6.0:
+  version "1.6.0"
+  resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1"
+  integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==
+
+mime@^2.4.4:
+  version "2.5.2"
+  resolved "https://registry.yarnpkg.com/mime/-/mime-2.5.2.tgz#6e3dc6cc2b9510643830e5f19d5cb753da5eeabe"
+  integrity sha512-tqkh47FzKeCPD2PUiPB6pkbMzsCasjxAfC62/Wap5qrUWcb+sFasXUC5I3gYM5iBM8v/Qpn4UK0x+j0iHyFPDg==
+
+mimic-fn@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
+  integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==
+
+min-indent@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869"
+  integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==
+
+mini-create-react-context@^0.4.0:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/mini-create-react-context/-/mini-create-react-context-0.4.1.tgz#072171561bfdc922da08a60c2197a497cc2d1d5e"
+  integrity sha512-YWCYEmd5CQeHGSAKrYvXgmzzkrvssZcuuQDDeqkT+PziKGMgE+0MCCtcKbROzocGBG1meBLl2FotlRwf4gAzbQ==
+  dependencies:
+    "@babel/runtime" "^7.12.1"
+    tiny-warning "^1.0.3"
+
+mini-css-extract-plugin@0.11.3:
+  version "0.11.3"
+  resolved "https://registry.yarnpkg.com/mini-css-extract-plugin/-/mini-css-extract-plugin-0.11.3.tgz#15b0910a7f32e62ffde4a7430cfefbd700724ea6"
+  integrity sha512-n9BA8LonkOkW1/zn+IbLPQmovsL0wMb9yx75fMJQZf2X1Zoec9yTZtyMePcyu19wPkmFbzZZA6fLTotpFhQsOA==
+  dependencies:
+    loader-utils "^1.1.0"
+    normalize-url "1.9.1"
+    schema-utils "^1.0.0"
+    webpack-sources "^1.1.0"
+
+minimalistic-assert@^1.0.0, minimalistic-assert@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz#2e194de044626d4a10e7f7fbc00ce73e83e4d5c7"
+  integrity sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==
+
+minimalistic-crypto-utils@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz#f6c00c1c0b082246e5c4d99dfb8c7c083b2b582a"
+  integrity sha1-9sAMHAsIIkblxNmd+4x8CDsrWCo=
+
+minimatch@3.0.4, minimatch@^3.0.4:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
+  integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
+  dependencies:
+    brace-expansion "^1.1.7"
+
+minimist@^1.1.1, minimist@^1.2.0, minimist@^1.2.5:
+  version "1.2.5"
+  resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
+  integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
+
+minipass-collect@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/minipass-collect/-/minipass-collect-1.0.2.tgz#22b813bf745dc6edba2576b940022ad6edc8c617"
+  integrity sha512-6T6lH0H8OG9kITm/Jm6tdooIbogG9e0tLgpY6mphXSm/A9u8Nq1ryBG+Qspiub9LjWlBPsPS3tWQ/Botq4FdxA==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass-flush@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/minipass-flush/-/minipass-flush-1.0.5.tgz#82e7135d7e89a50ffe64610a787953c4c4cbb373"
+  integrity sha512-JmQSYYpPUqX5Jyn1mXaRwOda1uQ8HP5KAT/oDSLCzt1BYRhQU0/hDtsB1ufZfEEzMZ9aAVmsBw8+FWsIXlClWw==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass-pipeline@^1.2.2:
+  version "1.2.4"
+  resolved "https://registry.yarnpkg.com/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz#68472f79711c084657c067c5c6ad93cddea8214c"
+  integrity sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass@^3.0.0, minipass@^3.1.1:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.3.tgz#7d42ff1f39635482e15f9cdb53184deebd5815fd"
+  integrity sha512-Mgd2GdMVzY+x3IJ+oHnVM+KG3lA5c8tnabyJKmHSaG2kAGpudxuOf8ToDkhumF7UzME7DecbQE9uOZhNm7PuJg==
+  dependencies:
+    yallist "^4.0.0"
+
+minizlib@^2.1.1:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
+  integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==
+  dependencies:
+    minipass "^3.0.0"
+    yallist "^4.0.0"
+
+mississippi@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/mississippi/-/mississippi-3.0.0.tgz#ea0a3291f97e0b5e8776b363d5f0a12d94c67022"
+  integrity sha512-x471SsVjUtBRtcvd4BzKE9kFC+/2TeWgKCgw0bZcw1b9l2X3QX5vCWgF+KaZaYm87Ss//rHnWryupDrgLvmSkA==
+  dependencies:
+    concat-stream "^1.5.0"
+    duplexify "^3.4.2"
+    end-of-stream "^1.1.0"
+    flush-write-stream "^1.0.0"
+    from2 "^2.1.0"
+    parallel-transform "^1.1.0"
+    pump "^3.0.0"
+    pumpify "^1.3.3"
+    stream-each "^1.1.0"
+    through2 "^2.0.0"
+
+mixin-deep@^1.2.0:
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566"
+  integrity sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA==
+  dependencies:
+    for-in "^1.0.2"
+    is-extendable "^1.0.1"
+
+mkdirp@^0.5.1, mkdirp@^0.5.3, mkdirp@^0.5.5, mkdirp@~0.5.1:
+  version "0.5.5"
+  resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
+  integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==
+  dependencies:
+    minimist "^1.2.5"
+
+mkdirp@^1.0.3, mkdirp@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
+  integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
+
+move-concurrently@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/move-concurrently/-/move-concurrently-1.0.1.tgz#be2c005fda32e0b29af1f05d7c4b33214c701f92"
+  integrity sha1-viwAX9oy4LKa8fBdfEszIUxwH5I=
+  dependencies:
+    aproba "^1.1.1"
+    copy-concurrently "^1.0.0"
+    fs-write-stream-atomic "^1.0.8"
+    mkdirp "^0.5.1"
+    rimraf "^2.5.4"
+    run-queue "^1.0.3"
+
+ms@2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
+  integrity sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=
+
+ms@2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.1.tgz#30a5864eb3ebb0a66f2ebe6d727af06a09d86e0a"
+  integrity sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==
+
+ms@2.1.2:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
+  integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
+
+ms@^2.1.1:
+  version "2.1.3"
+  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
+  integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
+
+multicast-dns-service-types@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/multicast-dns-service-types/-/multicast-dns-service-types-1.1.0.tgz#899f11d9686e5e05cb91b35d5f0e63b773cfc901"
+  integrity sha1-iZ8R2WhuXgXLkbNdXw5jt3PPyQE=
+
+multicast-dns@^6.0.1:
+  version "6.2.3"
+  resolved "https://registry.yarnpkg.com/multicast-dns/-/multicast-dns-6.2.3.tgz#a0ec7bd9055c4282f790c3c82f4e28db3b31b229"
+  integrity sha512-ji6J5enbMyGRHIAkAOu3WdV8nggqviKCEKtXcOqfphZZtQrmHKycfynJ2V7eVPUA4NhJ6V7Wf4TmGbTwKE9B6g==
+  dependencies:
+    dns-packet "^1.3.1"
+    thunky "^1.0.2"
+
+nan@^2.12.1:
+  version "2.14.2"
+  resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
+  integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==
+
+nanoid@^3.1.20:
+  version "3.1.20"
+  resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.20.tgz#badc263c6b1dcf14b71efaa85f6ab4c1d6cfc788"
+  integrity sha512-a1cQNyczgKbLX9jwbS/+d7W8fX/RfgYR7lVWwWOGIPNgK2m0MWvrGF6/m4kk6U3QcFMnZf3RIhL0v2Jgh/0Uxw==
+
+nanomatch@^1.2.9:
+  version "1.2.13"
+  resolved "https://registry.yarnpkg.com/nanomatch/-/nanomatch-1.2.13.tgz#b87a8aa4fc0de8fe6be88895b38983ff265bd119"
+  integrity sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA==
+  dependencies:
+    arr-diff "^4.0.0"
+    array-unique "^0.3.2"
+    define-property "^2.0.2"
+    extend-shallow "^3.0.2"
+    fragment-cache "^0.2.1"
+    is-windows "^1.0.2"
+    kind-of "^6.0.2"
+    object.pick "^1.3.0"
+    regex-not "^1.0.0"
+    snapdragon "^0.8.1"
+    to-regex "^3.0.1"
+
+native-url@^0.2.6:
+  version "0.2.6"
+  resolved "https://registry.yarnpkg.com/native-url/-/native-url-0.2.6.tgz#ca1258f5ace169c716ff44eccbddb674e10399ae"
+  integrity sha512-k4bDC87WtgrdD362gZz6zoiXQrl40kYlBmpfmSjwRO1VU0V5ccwJTlxuE72F6m3V0vc1xOf6n3UCP9QyerRqmA==
+  dependencies:
+    querystring "^0.2.0"
+
+natural-compare@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
+  integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=
+
+negotiator@0.6.2:
+  version "0.6.2"
+  resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
+  integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
+
+neo-async@^2.5.0, neo-async@^2.6.1, neo-async@^2.6.2:
+  version "2.6.2"
+  resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
+  integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==
+
+next-tick@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/next-tick/-/next-tick-1.0.0.tgz#ca86d1fe8828169b0120208e3dc8424b9db8342c"
+  integrity sha1-yobR/ogoFpsBICCOPchCS524NCw=
+
+nice-try@^1.0.4:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366"
+  integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==
+
+no-case@^3.0.4:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/no-case/-/no-case-3.0.4.tgz#d361fd5c9800f558551a8369fc0dcd4662b6124d"
+  integrity sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==
+  dependencies:
+    lower-case "^2.0.2"
+    tslib "^2.0.3"
+
+node-forge@^0.10.0:
+  version "0.10.0"
+  resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
+  integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
+
+node-int64@^0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
+  integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs=
+
+node-libs-browser@^2.2.1:
+  version "2.2.1"
+  resolved "https://registry.yarnpkg.com/node-libs-browser/-/node-libs-browser-2.2.1.tgz#b64f513d18338625f90346d27b0d235e631f6425"
+  integrity sha512-h/zcD8H9kaDZ9ALUWwlBUDo6TKF8a7qBSCSEGfjTVIYeqsioSKaAX+BN7NgiMGp6iSIXZ3PxgCu8KS3b71YK5Q==
+  dependencies:
+    assert "^1.1.1"
+    browserify-zlib "^0.2.0"
+    buffer "^4.3.0"
+    console-browserify "^1.1.0"
+    constants-browserify "^1.0.0"
+    crypto-browserify "^3.11.0"
+    domain-browser "^1.1.1"
+    events "^3.0.0"
+    https-browserify "^1.0.0"
+    os-browserify "^0.3.0"
+    path-browserify "0.0.1"
+    process "^0.11.10"
+    punycode "^1.2.4"
+    querystring-es3 "^0.2.0"
+    readable-stream "^2.3.3"
+    stream-browserify "^2.0.1"
+    stream-http "^2.7.2"
+    string_decoder "^1.0.0"
+    timers-browserify "^2.0.4"
+    tty-browserify "0.0.0"
+    url "^0.11.0"
+    util "^0.11.0"
+    vm-browserify "^1.0.1"
+
+node-modules-regexp@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz#8d9dbe28964a4ac5712e9131642107c71e90ec40"
+  integrity sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=
+
+node-notifier@^8.0.0:
+  version "8.0.1"
+  resolved "https://registry.yarnpkg.com/node-notifier/-/node-notifier-8.0.1.tgz#f86e89bbc925f2b068784b31f382afdc6ca56be1"
+  integrity sha512-BvEXF+UmsnAfYfoapKM9nGxnP+Wn7P91YfXmrKnfcYCx6VBeoN5Ez5Ogck6I8Bi5k4RlpqRYaw75pAwzX9OphA==
+  dependencies:
+    growly "^1.3.0"
+    is-wsl "^2.2.0"
+    semver "^7.3.2"
+    shellwords "^0.1.1"
+    uuid "^8.3.0"
+    which "^2.0.2"
+
+node-releases@^1.1.61, node-releases@^1.1.70:
+  version "1.1.70"
+  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.70.tgz#66e0ed0273aa65666d7fe78febe7634875426a08"
+  integrity sha512-Slf2s69+2/uAD79pVVQo8uSiC34+g8GWY8UH2Qtqv34ZfhYrxpYpfzs9Js9d6O0mbDmALuxaTlplnBTnSELcrw==
+
+normalize-package-data@^2.3.2, normalize-package-data@^2.5.0:
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
+  integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==
+  dependencies:
+    hosted-git-info "^2.1.4"
+    resolve "^1.10.0"
+    semver "2 || 3 || 4 || 5"
+    validate-npm-package-license "^3.0.1"
+
+normalize-path@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-2.1.1.tgz#1ab28b556e198363a8c1a6f7e6fa20137fe6aed9"
+  integrity sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=
+  dependencies:
+    remove-trailing-separator "^1.0.1"
+
+normalize-path@^3.0.0, normalize-path@~3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
+  integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
+
+normalize-range@^0.1.2:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/normalize-range/-/normalize-range-0.1.2.tgz#2d10c06bdfd312ea9777695a4d28439456b75942"
+  integrity sha1-LRDAa9/TEuqXd2laTShDlFa3WUI=
+
+normalize-url@1.9.1:
+  version "1.9.1"
+  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-1.9.1.tgz#2cc0d66b31ea23036458436e3620d85954c66c3c"
+  integrity sha1-LMDWazHqIwNkWENuNiDYWVTGbDw=
+  dependencies:
+    object-assign "^4.0.1"
+    prepend-http "^1.0.0"
+    query-string "^4.1.0"
+    sort-keys "^1.0.0"
+
+normalize-url@^3.0.0:
+  version "3.3.0"
+  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-3.3.0.tgz#b2e1c4dc4f7c6d57743df733a4f5978d18650559"
+  integrity sha512-U+JJi7duF1o+u2pynbp2zXDW2/PADgC30f0GsHZtRh+HOcXHnw137TrNlyxxRvWW5fjKd3bcLHPxofWuCjaeZg==
+
+npm-run-path@^2.0.0:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-2.0.2.tgz#35a9232dfa35d7067b4cb2ddf2357b1871536c5f"
+  integrity sha1-NakjLfo11wZ7TLLd8jV7GHFTbF8=
+  dependencies:
+    path-key "^2.0.0"
+
+npm-run-path@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
+  integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
+  dependencies:
+    path-key "^3.0.0"
+
+nth-check@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-1.0.2.tgz#b2bd295c37e3dd58a3bf0700376663ba4d9cf05c"
+  integrity sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==
+  dependencies:
+    boolbase "~1.0.0"
+
+num2fraction@^1.2.2:
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/num2fraction/-/num2fraction-1.2.2.tgz#6f682b6a027a4e9ddfa4564cd2589d1d4e669ede"
+  integrity sha1-b2gragJ6Tp3fpFZM0lidHU5mnt4=
+
+nwsapi@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.0.tgz#204879a9e3d068ff2a55139c2c772780681a38b7"
+  integrity sha512-h2AatdwYH+JHiZpv7pt/gSX1XoRGb7L/qSIeuqA6GwYoF9w1vP1cw42TO0aI2pNyshRK5893hNSl+1//vHK7hQ==
+
+oauth-sign@~0.9.0:
+  version "0.9.0"
+  resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.9.0.tgz#47a7b016baa68b5fa0ecf3dee08a85c679ac6455"
+  integrity sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==
+
+object-assign@^4.0.1, object-assign@^4.1.0, object-assign@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
+  integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
+
+object-copy@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/object-copy/-/object-copy-0.1.0.tgz#7e7d858b781bd7c991a41ba975ed3812754e998c"
+  integrity sha1-fn2Fi3gb18mRpBupde04EnVOmYw=
+  dependencies:
+    copy-descriptor "^0.1.0"
+    define-property "^0.2.5"
+    kind-of "^3.0.3"
+
+object-inspect@^1.8.0, object-inspect@^1.9.0:
+  version "1.9.0"
+  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.9.0.tgz#c90521d74e1127b67266ded3394ad6116986533a"
+  integrity sha512-i3Bp9iTqwhaLZBxGkRfo5ZbE07BQRT7MGu8+nNgwW9ItGp1TzCTw2DLEoWwjClxBjOFI/hWljTAmYGCEwmtnOw==
+
+object-is@^1.0.1:
+  version "1.1.5"
+  resolved "https://registry.yarnpkg.com/object-is/-/object-is-1.1.5.tgz#b9deeaa5fc7f1846a0faecdceec138e5778f53ac"
+  integrity sha512-3cyDsyHgtmi7I7DfSSI2LDp6SK2lwvtbg0p0R1e0RvTqF5ceGx+K2dfSjm1bKDMVCFEDAQvy+o8c6a7VujOddw==
+  dependencies:
+    call-bind "^1.0.2"
+    define-properties "^1.1.3"
+
+object-keys@^1.0.12, object-keys@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e"
+  integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==
+
+object-visit@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/object-visit/-/object-visit-1.0.1.tgz#f79c4493af0c5377b59fe39d395e41042dd045bb"
+  integrity sha1-95xEk68MU3e1n+OdOV5BBC3QRbs=
+  dependencies:
+    isobject "^3.0.0"
+
+object.assign@^4.1.0, object.assign@^4.1.1, object.assign@^4.1.2:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.2.tgz#0ed54a342eceb37b38ff76eb831a0e788cb63940"
+  integrity sha512-ixT2L5THXsApyiUPYKmW+2EHpXXe5Ii3M+f4e+aJFAHao5amFRW6J0OO6c/LU8Be47utCx2GL89hxGB6XSmKuQ==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+    has-symbols "^1.0.1"
+    object-keys "^1.1.1"
+
+object.entries@^1.1.0, object.entries@^1.1.2:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/object.entries/-/object.entries-1.1.3.tgz#c601c7f168b62374541a07ddbd3e2d5e4f7711a6"
+  integrity sha512-ym7h7OZebNS96hn5IJeyUmaWhaSM4SVtAPPfNLQEI2MYWCO2egsITb9nab2+i/Pwibx+R0mtn+ltKJXRSeTMGg==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.1"
+    has "^1.0.3"
+
+object.fromentries@^2.0.2:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/object.fromentries/-/object.fromentries-2.0.4.tgz#26e1ba5c4571c5c6f0890cef4473066456a120b8"
+  integrity sha512-EsFBshs5RUUpQEY1D4q/m59kMfz4YJvxuNCJcv/jWwOJr34EaVnG11ZrZa0UHB3wnzV1wx8m58T4hQL8IuNXlQ==
+  dependencies:
+    call-bind "^1.0.2"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.2"
+    has "^1.0.3"
+
+object.getownpropertydescriptors@^2.0.3, object.getownpropertydescriptors@^2.1.0:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/object.getownpropertydescriptors/-/object.getownpropertydescriptors-2.1.2.tgz#1bd63aeacf0d5d2d2f31b5e393b03a7c601a23f7"
+  integrity sha512-WtxeKSzfBjlzL+F9b7M7hewDzMwy+C8NRssHd1YrNlzHzIDrXcXiNOMrezdAEM4UXixgV+vvnyBeN7Rygl2ttQ==
+  dependencies:
+    call-bind "^1.0.2"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.2"
+
+object.pick@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/object.pick/-/object.pick-1.3.0.tgz#87a10ac4c1694bd2e1cbf53591a66141fb5dd747"
+  integrity sha1-h6EKxMFpS9Lhy/U1kaZhQftd10c=
+  dependencies:
+    isobject "^3.0.1"
+
+object.values@^1.1.0, object.values@^1.1.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/object.values/-/object.values-1.1.2.tgz#7a2015e06fcb0f546bd652486ce8583a4731c731"
+  integrity sha512-MYC0jvJopr8EK6dPBiO8Nb9mvjdypOachO5REGk6MXzujbBrAisKo3HmdEI6kZDL6fC31Mwee/5YbtMebixeag==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.1"
+    has "^1.0.3"
+
+obuf@^1.0.0, obuf@^1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/obuf/-/obuf-1.1.2.tgz#09bea3343d41859ebd446292d11c9d4db619084e"
+  integrity sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==
+
+on-finished@~2.3.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/on-finished/-/on-finished-2.3.0.tgz#20f1336481b083cd75337992a16971aa2d906947"
+  integrity sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=
+  dependencies:
+    ee-first "1.1.1"
+
+on-headers@~1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/on-headers/-/on-headers-1.0.2.tgz#772b0ae6aaa525c399e489adfad90c403eb3c28f"
+  integrity sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==
+
+once@^1.3.0, once@^1.3.1, once@^1.4.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
+  integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E=
+  dependencies:
+    wrappy "1"
+
+onetime@^5.1.0:
+  version "5.1.2"
+  resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
+  integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
+  dependencies:
+    mimic-fn "^2.1.0"
+
+open@^7.0.2:
+  version "7.4.2"
+  resolved "https://registry.yarnpkg.com/open/-/open-7.4.2.tgz#b8147e26dcf3e426316c730089fd71edd29c2321"
+  integrity sha512-MVHddDVweXZF3awtlAS+6pgKLlm/JgxZ90+/NBurBoQctVOOB/zDdVjcyPzQ+0laDGbsWgrRkflI65sQeOgT9Q==
+  dependencies:
+    is-docker "^2.0.0"
+    is-wsl "^2.1.1"
+
+opn@^5.5.0:
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/opn/-/opn-5.5.0.tgz#fc7164fab56d235904c51c3b27da6758ca3b9bfc"
+  integrity sha512-PqHpggC9bLV0VeWcdKhkpxY+3JTzetLSqTCWL/z/tFIbI6G8JCjondXklT1JinczLz2Xib62sSp0T/gKT4KksA==
+  dependencies:
+    is-wsl "^1.1.0"
+
+optimize-css-assets-webpack-plugin@5.0.4:
+  version "5.0.4"
+  resolved "https://registry.yarnpkg.com/optimize-css-assets-webpack-plugin/-/optimize-css-assets-webpack-plugin-5.0.4.tgz#85883c6528aaa02e30bbad9908c92926bb52dc90"
+  integrity sha512-wqd6FdI2a5/FdoiCNNkEvLeA//lHHfG24Ln2Xm2qqdIk4aOlsR18jwpyOihqQ8849W3qu2DX8fOYxpvTMj+93A==
+  dependencies:
+    cssnano "^4.1.10"
+    last-call-webpack-plugin "^3.0.0"
+
+optionator@^0.8.1:
+  version "0.8.3"
+  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"
+  integrity sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==
+  dependencies:
+    deep-is "~0.1.3"
+    fast-levenshtein "~2.0.6"
+    levn "~0.3.0"
+    prelude-ls "~1.1.2"
+    type-check "~0.3.2"
+    word-wrap "~1.2.3"
+
+optionator@^0.9.1:
+  version "0.9.1"
+  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.1.tgz#4f236a6373dae0566a6d43e1326674f50c291499"
+  integrity sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==
+  dependencies:
+    deep-is "^0.1.3"
+    fast-levenshtein "^2.0.6"
+    levn "^0.4.1"
+    prelude-ls "^1.2.1"
+    type-check "^0.4.0"
+    word-wrap "^1.2.3"
+
+original@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/original/-/original-1.0.2.tgz#e442a61cffe1c5fd20a65f3261c26663b303f25f"
+  integrity sha512-hyBVl6iqqUOJ8FqRe+l/gS8H+kKYjrEndd5Pm1MfBtsEKA038HkkdbAl/72EAXGyonD/PFsvmVG+EvcIpliMBg==
+  dependencies:
+    url-parse "^1.4.3"
+
+os-browserify@^0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/os-browserify/-/os-browserify-0.3.0.tgz#854373c7f5c2315914fc9bfc6bd8238fdda1ec27"
+  integrity sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=
+
+p-each-series@^2.1.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/p-each-series/-/p-each-series-2.2.0.tgz#105ab0357ce72b202a8a8b94933672657b5e2a9a"
+  integrity sha512-ycIL2+1V32th+8scbpTvyHNaHe02z0sjgh91XXjAk+ZeXoPN4Z46DVUnzdso0aX4KckKw0FNNFHdjZ2UsZvxiA==
+
+p-finally@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/p-finally/-/p-finally-1.0.0.tgz#3fbcfb15b899a44123b34b6dcc18b724336a2cae"
+  integrity sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=
+
+p-limit@^1.1.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-1.3.0.tgz#b86bd5f0c25690911c7590fcbfc2010d54b3ccb8"
+  integrity sha512-vvcXsLAJ9Dr5rQOPk7toZQZJApBl2K4J6dANSsEuh6QI41JYcsS/qhTGa9ErIUUgK3WNQoJYvylxvjqmiqEA9Q==
+  dependencies:
+    p-try "^1.0.0"
+
+p-limit@^2.0.0, p-limit@^2.2.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1"
+  integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==
+  dependencies:
+    p-try "^2.0.0"
+
+p-limit@^3.0.2:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b"
+  integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==
+  dependencies:
+    yocto-queue "^0.1.0"
+
+p-locate@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-2.0.0.tgz#20a0103b222a70c8fd39cc2e580680f3dde5ec43"
+  integrity sha1-IKAQOyIqcMj9OcwuWAaA893l7EM=
+  dependencies:
+    p-limit "^1.1.0"
+
+p-locate@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-3.0.0.tgz#322d69a05c0264b25997d9f40cd8a891ab0064a4"
+  integrity sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==
+  dependencies:
+    p-limit "^2.0.0"
+
+p-locate@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07"
+  integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==
+  dependencies:
+    p-limit "^2.2.0"
+
+p-map@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175"
+  integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==
+
+p-map@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/p-map/-/p-map-4.0.0.tgz#bb2f95a5eda2ec168ec9274e06a747c3e2904d2b"
+  integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==
+  dependencies:
+    aggregate-error "^3.0.0"
+
+p-retry@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/p-retry/-/p-retry-3.0.1.tgz#316b4c8893e2c8dc1cfa891f406c4b422bebf328"
+  integrity sha512-XE6G4+YTTkT2a0UWb2kjZe8xNwf8bIbnqpc/IS/idOBVhyves0mK5OJgeocjx7q5pvX/6m23xuzVPYT1uGM73w==
+  dependencies:
+    retry "^0.12.0"
+
+p-try@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/p-try/-/p-try-1.0.0.tgz#cbc79cdbaf8fd4228e13f621f2b1a237c1b207b3"
+  integrity sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=
+
+p-try@^2.0.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
+  integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==
+
+pako@~1.0.5:
+  version "1.0.11"
+  resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.11.tgz#6c9599d340d54dfd3946380252a35705a6b992bf"
+  integrity sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==
+
+parallel-transform@^1.1.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/parallel-transform/-/parallel-transform-1.2.0.tgz#9049ca37d6cb2182c3b1d2c720be94d14a5814fc"
+  integrity sha512-P2vSmIu38uIlvdcU7fDkyrxj33gTUy/ABO5ZUbGowxNCopBq/OoD42bP4UmMrJoPyk4Uqf0mu3mtWBhHCZD8yg==
+  dependencies:
+    cyclist "^1.0.1"
+    inherits "^2.0.3"
+    readable-stream "^2.1.5"
+
+param-case@^3.0.3:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/param-case/-/param-case-3.0.4.tgz#7d17fe4aa12bde34d4a77d91acfb6219caad01c5"
+  integrity sha512-RXlj7zCYokReqWpOPH9oYivUzLYZ5vAPIfEmCTNViosC78F8F0H9y7T7gG2M39ymgutxF5gcFEsyZQSph9Bp3A==
+  dependencies:
+    dot-case "^3.0.4"
+    tslib "^2.0.3"
+
+parent-module@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
+  integrity sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==
+  dependencies:
+    callsites "^3.0.0"
+
+parse-asn1@^5.0.0, parse-asn1@^5.1.5:
+  version "5.1.6"
+  resolved "https://registry.yarnpkg.com/parse-asn1/-/parse-asn1-5.1.6.tgz#385080a3ec13cb62a62d39409cb3e88844cdaed4"
+  integrity sha512-RnZRo1EPU6JBnra2vGHj0yhp6ebyjBZpmUCLHWiFhxlzvBCCpAuZ7elsBp1PVAbQN0/04VD/19rfzlBSwLstMw==
+  dependencies:
+    asn1.js "^5.2.0"
+    browserify-aes "^1.0.0"
+    evp_bytestokey "^1.0.0"
+    pbkdf2 "^3.0.3"
+    safe-buffer "^5.1.1"
+
+parse-json@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-2.2.0.tgz#f480f40434ef80741f8469099f8dea18f55a4dc9"
+  integrity sha1-9ID0BDTvgHQfhGkJn43qGPVaTck=
+  dependencies:
+    error-ex "^1.2.0"
+
+parse-json@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-4.0.0.tgz#be35f5425be1f7f6c747184f98a788cb99477ee0"
+  integrity sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=
+  dependencies:
+    error-ex "^1.3.1"
+    json-parse-better-errors "^1.0.1"
+
+parse-json@^5.0.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd"
+  integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==
+  dependencies:
+    "@babel/code-frame" "^7.0.0"
+    error-ex "^1.3.1"
+    json-parse-even-better-errors "^2.3.0"
+    lines-and-columns "^1.1.6"
+
+parse5@5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/parse5/-/parse5-5.1.1.tgz#f68e4e5ba1852ac2cadc00f4555fff6c2abb6178"
+  integrity sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==
+
+parseurl@~1.3.2, parseurl@~1.3.3:
+  version "1.3.3"
+  resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
+  integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==
+
+pascal-case@^3.1.2:
+  version "3.1.2"
+  resolved "https://registry.yarnpkg.com/pascal-case/-/pascal-case-3.1.2.tgz#b48e0ef2b98e205e7c1dae747d0b1508237660eb"
+  integrity sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==
+  dependencies:
+    no-case "^3.0.4"
+    tslib "^2.0.3"
+
+pascalcase@^0.1.1:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14"
+  integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ=
+
+path-browserify@0.0.1:
+  version "0.0.1"
+  resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-0.0.1.tgz#e6c4ddd7ed3aa27c68a20cc4e50e1a4ee83bbc4a"
+  integrity sha512-BapA40NHICOS+USX9SN4tyhq+A2RrN/Ws5F0Z5aMHDp98Fl86lX8Oti8B7uN93L4Ifv4fHOEA+pQw87gmMO/lQ==
+
+path-dirname@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/path-dirname/-/path-dirname-1.0.2.tgz#cc33d24d525e099a5388c0336c6e32b9160609e0"
+  integrity sha1-zDPSTVJeCZpTiMAzbG4yuRYGCeA=
+
+path-exists@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-3.0.0.tgz#ce0ebeaa5f78cb18925ea7d810d7b59b010fd515"
+  integrity sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=
+
+path-exists@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3"
+  integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==
+
+path-is-absolute@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
+  integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
+
+path-is-inside@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/path-is-inside/-/path-is-inside-1.0.2.tgz#365417dede44430d1c11af61027facf074bdfc53"
+  integrity sha1-NlQX3t5EQw0cEa9hAn+s8HS9/FM=
+
+path-key@^2.0.0, path-key@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40"
+  integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=
+
+path-key@^3.0.0, path-key@^3.1.0:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375"
+  integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
+
+path-parse@^1.0.6:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c"
+  integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==
+
+path-to-regexp@0.1.7:
+  version "0.1.7"
+  resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.7.tgz#df604178005f522f15eb4490e7247a1bfaa67f8c"
+  integrity sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=
+
+path-to-regexp@^1.7.0:
+  version "1.8.0"
+  resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-1.8.0.tgz#887b3ba9d84393e87a0a0b9f4cb756198b53548a"
+  integrity sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==
+  dependencies:
+    isarray "0.0.1"
+
+path-type@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/path-type/-/path-type-2.0.0.tgz#f012ccb8415b7096fc2daa1054c3d72389594c73"
+  integrity sha1-8BLMuEFbcJb8LaoQVMPXI4lZTHM=
+  dependencies:
+    pify "^2.0.0"
+
+path-type@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
+  integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
+
+pbkdf2@^3.0.3:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/pbkdf2/-/pbkdf2-3.1.1.tgz#cb8724b0fada984596856d1a6ebafd3584654b94"
+  integrity sha512-4Ejy1OPxi9f2tt1rRV7Go7zmfDQ+ZectEQz3VGUQhgq62HtIRPDyG/JtnwIxs6x3uNMwo2V7q1fMvKjb+Tnpqg==
+  dependencies:
+    create-hash "^1.1.2"
+    create-hmac "^1.1.4"
+    ripemd160 "^2.0.1"
+    safe-buffer "^5.0.1"
+    sha.js "^2.4.8"
+
+performance-now@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
+  integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
+
+picomatch@^2.0.4, picomatch@^2.0.5, picomatch@^2.2.1, picomatch@^2.2.2:
+  version "2.2.2"
+  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.2.2.tgz#21f333e9b6b8eaff02468f5146ea406d345f4dad"
+  integrity sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==
+
+pify@^2.0.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/pify/-/pify-2.3.0.tgz#ed141a6ac043a849ea588498e7dca8b15330e90c"
+  integrity sha1-7RQaasBDqEnqWISY59yosVMw6Qw=
+
+pify@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231"
+  integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==
+
+pinkie-promise@^2.0.0:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/pinkie-promise/-/pinkie-promise-2.0.1.tgz#2135d6dfa7a358c069ac9b178776288228450ffa"
+  integrity sha1-ITXW36ejWMBprJsXh3YogihFD/o=
+  dependencies:
+    pinkie "^2.0.0"
+
+pinkie@^2.0.0:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/pinkie/-/pinkie-2.0.4.tgz#72556b80cfa0d48a974e80e77248e80ed4f7f870"
+  integrity sha1-clVrgM+g1IqXToDnckjoDtT3+HA=
+
+pirates@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/pirates/-/pirates-4.0.1.tgz#643a92caf894566f91b2b986d2c66950a8e2fb87"
+  integrity sha512-WuNqLTbMI3tmfef2TKxlQmAiLHKtFhlsCZnPIpuv2Ow0RDVO8lfy1Opf4NUzlMXLjPl+Men7AuVdX6TA+s+uGA==
+  dependencies:
+    node-modules-regexp "^1.0.0"
+
+pkg-dir@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-2.0.0.tgz#f6d5d1109e19d63edf428e0bd57e12777615334b"
+  integrity sha1-9tXREJ4Z1j7fQo4L1X4Sd3YVM0s=
+  dependencies:
+    find-up "^2.1.0"
+
+pkg-dir@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-3.0.0.tgz#2749020f239ed990881b1f71210d51eb6523bea3"
+  integrity sha512-/E57AYkoeQ25qkxMj5PBOVgF8Kiu/h7cYS30Z5+R7WaiCCBfLq58ZI/dSeaEKb9WVJV5n/03QwrN3IeWIFllvw==
+  dependencies:
+    find-up "^3.0.0"
+
+pkg-dir@^4.1.0, pkg-dir@^4.2.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3"
+  integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==
+  dependencies:
+    find-up "^4.0.0"
+
+pkg-up@3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/pkg-up/-/pkg-up-3.1.0.tgz#100ec235cc150e4fd42519412596a28512a0def5"
+  integrity sha512-nDywThFk1i4BQK4twPQ6TA4RT8bDY96yeuCVBWL3ePARCiEKDRSrNGbFIgUJpLp+XeIR65v8ra7WuJOFUBtkMA==
+  dependencies:
+    find-up "^3.0.0"
+
+pnp-webpack-plugin@1.6.4:
+  version "1.6.4"
+  resolved "https://registry.yarnpkg.com/pnp-webpack-plugin/-/pnp-webpack-plugin-1.6.4.tgz#c9711ac4dc48a685dabafc86f8b6dd9f8df84149"
+  integrity sha512-7Wjy+9E3WwLOEL30D+m8TSTF7qJJUJLONBnwQp0518siuMxUQUbgZwssaFX+QKlZkjHZcw/IpZCt/H0srrntSg==
+  dependencies:
+    ts-pnp "^1.1.6"
+
+popmotion@9.2.1:
+  version "9.2.1"
+  resolved "https://registry.yarnpkg.com/popmotion/-/popmotion-9.2.1.tgz#8bc19214a4f0ba7925a901455d0996131cbec6dc"
+  integrity sha512-kplHK5z2LwYkUXNMCC4+tSYuuAXcG3oatKdsEzJzc1r0I2wM5UnYKITO1ZUnmmFy84VJqIZuoBXwJrWuZuAKkg==
+  dependencies:
+    framesync "5.1.0"
+    hey-listen "^1.0.8"
+    style-value-types "4.0.3"
+    tslib "^1.10.0"
+
+portfinder@^1.0.26:
+  version "1.0.28"
+  resolved "https://registry.yarnpkg.com/portfinder/-/portfinder-1.0.28.tgz#67c4622852bd5374dd1dd900f779f53462fac778"
+  integrity sha512-Se+2isanIcEqf2XMHjyUKskczxbPH7dQnlMjXX6+dybayyHvAf/TCgyMRlzf/B6QDhAEFOGes0pzRo3by4AbMA==
+  dependencies:
+    async "^2.6.2"
+    debug "^3.1.1"
+    mkdirp "^0.5.5"
+
+posix-character-classes@^0.1.0:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab"
+  integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
+
+postcss-attribute-case-insensitive@^4.0.1:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-attribute-case-insensitive/-/postcss-attribute-case-insensitive-4.0.2.tgz#d93e46b504589e94ac7277b0463226c68041a880"
+  integrity sha512-clkFxk/9pcdb4Vkn0hAHq3YnxBQ2p0CGD1dy24jN+reBck+EWxMbxSUqN4Yj7t0w8csl87K6p0gxBe1utkJsYA==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-selector-parser "^6.0.2"
+
+postcss-browser-comments@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-browser-comments/-/postcss-browser-comments-3.0.0.tgz#1248d2d935fb72053c8e1f61a84a57292d9f65e9"
+  integrity sha512-qfVjLfq7HFd2e0HW4s1dvU8X080OZdG46fFbIBFjW7US7YPDcWfRvdElvwMJr2LI6hMmD+7LnH2HcmXTs+uOig==
+  dependencies:
+    postcss "^7"
+
+postcss-calc@^7.0.1:
+  version "7.0.5"
+  resolved "https://registry.yarnpkg.com/postcss-calc/-/postcss-calc-7.0.5.tgz#f8a6e99f12e619c2ebc23cf6c486fdc15860933e"
+  integrity sha512-1tKHutbGtLtEZF6PT4JSihCHfIVldU72mZ8SdZHIYriIZ9fh9k9aWSppaT8rHsyI3dX+KSR+W+Ix9BMY3AODrg==
+  dependencies:
+    postcss "^7.0.27"
+    postcss-selector-parser "^6.0.2"
+    postcss-value-parser "^4.0.2"
+
+postcss-color-functional-notation@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-color-functional-notation/-/postcss-color-functional-notation-2.0.1.tgz#5efd37a88fbabeb00a2966d1e53d98ced93f74e0"
+  integrity sha512-ZBARCypjEDofW4P6IdPVTLhDNXPRn8T2s1zHbZidW6rPaaZvcnCS2soYFIQJrMZSxiePJ2XIYTlcb2ztr/eT2g==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-color-gray@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-color-gray/-/postcss-color-gray-5.0.0.tgz#532a31eb909f8da898ceffe296fdc1f864be8547"
+  integrity sha512-q6BuRnAGKM/ZRpfDascZlIZPjvwsRye7UDNalqVz3s7GDxMtqPY6+Q871liNxsonUw8oC61OG+PSaysYpl1bnw==
+  dependencies:
+    "@csstools/convert-colors" "^1.4.0"
+    postcss "^7.0.5"
+    postcss-values-parser "^2.0.0"
+
+postcss-color-hex-alpha@^5.0.3:
+  version "5.0.3"
+  resolved "https://registry.yarnpkg.com/postcss-color-hex-alpha/-/postcss-color-hex-alpha-5.0.3.tgz#a8d9ca4c39d497c9661e374b9c51899ef0f87388"
+  integrity sha512-PF4GDel8q3kkreVXKLAGNpHKilXsZ6xuu+mOQMHWHLPNyjiUBOr75sp5ZKJfmv1MCus5/DWUGcK9hm6qHEnXYw==
+  dependencies:
+    postcss "^7.0.14"
+    postcss-values-parser "^2.0.1"
+
+postcss-color-mod-function@^3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/postcss-color-mod-function/-/postcss-color-mod-function-3.0.3.tgz#816ba145ac11cc3cb6baa905a75a49f903e4d31d"
+  integrity sha512-YP4VG+xufxaVtzV6ZmhEtc+/aTXH3d0JLpnYfxqTvwZPbJhWqp8bSY3nfNzNRFLgB4XSaBA82OE4VjOOKpCdVQ==
+  dependencies:
+    "@csstools/convert-colors" "^1.4.0"
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-color-rebeccapurple@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-color-rebeccapurple/-/postcss-color-rebeccapurple-4.0.1.tgz#c7a89be872bb74e45b1e3022bfe5748823e6de77"
+  integrity sha512-aAe3OhkS6qJXBbqzvZth2Au4V3KieR5sRQ4ptb2b2O8wgvB3SJBsdG+jsn2BZbbwekDG8nTfcCNKcSfe/lEy8g==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-colormin@^4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/postcss-colormin/-/postcss-colormin-4.0.3.tgz#ae060bce93ed794ac71264f08132d550956bd381"
+  integrity sha512-WyQFAdDZpExQh32j0U0feWisZ0dmOtPl44qYmJKkq9xFWY3p+4qnRzCHeNrkeRhwPHz9bQ3mo0/yVkaply0MNw==
+  dependencies:
+    browserslist "^4.0.0"
+    color "^3.0.0"
+    has "^1.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-convert-values@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-convert-values/-/postcss-convert-values-4.0.1.tgz#ca3813ed4da0f812f9d43703584e449ebe189a7f"
+  integrity sha512-Kisdo1y77KUC0Jmn0OXU/COOJbzM8cImvw1ZFsBgBgMgb1iL23Zs/LXRe3r+EZqM3vGYKdQ2YJVQ5VkJI+zEJQ==
+  dependencies:
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-custom-media@^7.0.8:
+  version "7.0.8"
+  resolved "https://registry.yarnpkg.com/postcss-custom-media/-/postcss-custom-media-7.0.8.tgz#fffd13ffeffad73621be5f387076a28b00294e0c"
+  integrity sha512-c9s5iX0Ge15o00HKbuRuTqNndsJUbaXdiNsksnVH8H4gdc+zbLzr/UasOwNG6CTDpLFekVY4672eWdiiWu2GUg==
+  dependencies:
+    postcss "^7.0.14"
+
+postcss-custom-properties@^8.0.11:
+  version "8.0.11"
+  resolved "https://registry.yarnpkg.com/postcss-custom-properties/-/postcss-custom-properties-8.0.11.tgz#2d61772d6e92f22f5e0d52602df8fae46fa30d97"
+  integrity sha512-nm+o0eLdYqdnJ5abAJeXp4CEU1c1k+eB2yMCvhgzsds/e0umabFrN6HoTy/8Q4K5ilxERdl/JD1LO5ANoYBeMA==
+  dependencies:
+    postcss "^7.0.17"
+    postcss-values-parser "^2.0.1"
+
+postcss-custom-selectors@^5.1.2:
+  version "5.1.2"
+  resolved "https://registry.yarnpkg.com/postcss-custom-selectors/-/postcss-custom-selectors-5.1.2.tgz#64858c6eb2ecff2fb41d0b28c9dd7b3db4de7fba"
+  integrity sha512-DSGDhqinCqXqlS4R7KGxL1OSycd1lydugJ1ky4iRXPHdBRiozyMHrdu0H3o7qNOCiZwySZTUI5MV0T8QhCLu+w==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-selector-parser "^5.0.0-rc.3"
+
+postcss-dir-pseudo-class@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-dir-pseudo-class/-/postcss-dir-pseudo-class-5.0.0.tgz#6e3a4177d0edb3abcc85fdb6fbb1c26dabaeaba2"
+  integrity sha512-3pm4oq8HYWMZePJY+5ANriPs3P07q+LW6FAdTlkFH2XqDdP4HeeJYMOzn0HYLhRSjBO3fhiqSwwU9xEULSrPgw==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-selector-parser "^5.0.0-rc.3"
+
+postcss-discard-comments@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-discard-comments/-/postcss-discard-comments-4.0.2.tgz#1fbabd2c246bff6aaad7997b2b0918f4d7af4033"
+  integrity sha512-RJutN259iuRf3IW7GZyLM5Sw4GLTOH8FmsXBnv8Ab/Tc2k4SR4qbV4DNbyyY4+Sjo362SyDmW2DQ7lBSChrpkg==
+  dependencies:
+    postcss "^7.0.0"
+
+postcss-discard-duplicates@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-discard-duplicates/-/postcss-discard-duplicates-4.0.2.tgz#3fe133cd3c82282e550fc9b239176a9207b784eb"
+  integrity sha512-ZNQfR1gPNAiXZhgENFfEglF93pciw0WxMkJeVmw8eF+JZBbMD7jp6C67GqJAXVZP2BWbOztKfbsdmMp/k8c6oQ==
+  dependencies:
+    postcss "^7.0.0"
+
+postcss-discard-empty@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-discard-empty/-/postcss-discard-empty-4.0.1.tgz#c8c951e9f73ed9428019458444a02ad90bb9f765"
+  integrity sha512-B9miTzbznhDjTfjvipfHoqbWKwd0Mj+/fL5s1QOz06wufguil+Xheo4XpOnc4NqKYBCNqqEzgPv2aPBIJLox0w==
+  dependencies:
+    postcss "^7.0.0"
+
+postcss-discard-overridden@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-discard-overridden/-/postcss-discard-overridden-4.0.1.tgz#652aef8a96726f029f5e3e00146ee7a4e755ff57"
+  integrity sha512-IYY2bEDD7g1XM1IDEsUT4//iEYCxAmP5oDSFMVU/JVvT7gh+l4fmjciLqGgwjdWpQIdb0Che2VX00QObS5+cTg==
+  dependencies:
+    postcss "^7.0.0"
+
+postcss-double-position-gradients@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-double-position-gradients/-/postcss-double-position-gradients-1.0.0.tgz#fc927d52fddc896cb3a2812ebc5df147e110522e"
+  integrity sha512-G+nV8EnQq25fOI8CH/B6krEohGWnF5+3A6H/+JEpOncu5dCnkS1QQ6+ct3Jkaepw1NGVqqOZH6lqrm244mCftA==
+  dependencies:
+    postcss "^7.0.5"
+    postcss-values-parser "^2.0.0"
+
+postcss-env-function@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-env-function/-/postcss-env-function-2.0.2.tgz#0f3e3d3c57f094a92c2baf4b6241f0b0da5365d7"
+  integrity sha512-rwac4BuZlITeUbiBq60h/xbLzXY43qOsIErngWa4l7Mt+RaSkT7QBjXVGTcBHupykkblHMDrBFh30zchYPaOUw==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-flexbugs-fixes@4.2.1:
+  version "4.2.1"
+  resolved "https://registry.yarnpkg.com/postcss-flexbugs-fixes/-/postcss-flexbugs-fixes-4.2.1.tgz#9218a65249f30897deab1033aced8578562a6690"
+  integrity sha512-9SiofaZ9CWpQWxOwRh1b/r85KD5y7GgvsNt1056k6OYLvWUun0czCvogfJgylC22uJTwW1KzY3Gz65NZRlvoiQ==
+  dependencies:
+    postcss "^7.0.26"
+
+postcss-focus-visible@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-focus-visible/-/postcss-focus-visible-4.0.0.tgz#477d107113ade6024b14128317ade2bd1e17046e"
+  integrity sha512-Z5CkWBw0+idJHSV6+Bgf2peDOFf/x4o+vX/pwcNYrWpXFrSfTkQ3JQ1ojrq9yS+upnAlNRHeg8uEwFTgorjI8g==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-focus-within@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-focus-within/-/postcss-focus-within-3.0.0.tgz#763b8788596cee9b874c999201cdde80659ef680"
+  integrity sha512-W0APui8jQeBKbCGZudW37EeMCjDeVxKgiYfIIEo8Bdh5SpB9sxds/Iq8SEuzS0Q4YFOlG7EPFulbbxujpkrV2w==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-font-variant@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-font-variant/-/postcss-font-variant-4.0.1.tgz#42d4c0ab30894f60f98b17561eb5c0321f502641"
+  integrity sha512-I3ADQSTNtLTTd8uxZhtSOrTCQ9G4qUVKPjHiDk0bV75QSxXjVWiJVJ2VLdspGUi9fbW9BcjKJoRvxAH1pckqmA==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-gap-properties@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-gap-properties/-/postcss-gap-properties-2.0.0.tgz#431c192ab3ed96a3c3d09f2ff615960f902c1715"
+  integrity sha512-QZSqDaMgXCHuHTEzMsS2KfVDOq7ZFiknSpkrPJY6jmxbugUPTuSzs/vuE5I3zv0WAS+3vhrlqhijiprnuQfzmg==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-image-set-function@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-image-set-function/-/postcss-image-set-function-3.0.1.tgz#28920a2f29945bed4c3198d7df6496d410d3f288"
+  integrity sha512-oPTcFFip5LZy8Y/whto91L9xdRHCWEMs3e1MdJxhgt4jy2WYXfhkng59fH5qLXSCPN8k4n94p1Czrfe5IOkKUw==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-initial@^3.0.0:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-initial/-/postcss-initial-3.0.2.tgz#f018563694b3c16ae8eaabe3c585ac6319637b2d"
+  integrity sha512-ugA2wKonC0xeNHgirR4D3VWHs2JcU08WAi1KFLVcnb7IN89phID6Qtg2RIctWbnvp1TM2BOmDtX8GGLCKdR8YA==
+  dependencies:
+    lodash.template "^4.5.0"
+    postcss "^7.0.2"
+
+postcss-lab-function@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-lab-function/-/postcss-lab-function-2.0.1.tgz#bb51a6856cd12289ab4ae20db1e3821ef13d7d2e"
+  integrity sha512-whLy1IeZKY+3fYdqQFuDBf8Auw+qFuVnChWjmxm/UhHWqNHZx+B99EwxTvGYmUBqe3Fjxs4L1BoZTJmPu6usVg==
+  dependencies:
+    "@csstools/convert-colors" "^1.4.0"
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-load-config@^2.0.0:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/postcss-load-config/-/postcss-load-config-2.1.2.tgz#c5ea504f2c4aef33c7359a34de3573772ad7502a"
+  integrity sha512-/rDeGV6vMUo3mwJZmeHfEDvwnTKKqQ0S7OHUi/kJvvtx3aWtyWG2/0ZWnzCt2keEclwN6Tf0DST2v9kITdOKYw==
+  dependencies:
+    cosmiconfig "^5.0.0"
+    import-cwd "^2.0.0"
+
+postcss-loader@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-loader/-/postcss-loader-3.0.0.tgz#6b97943e47c72d845fa9e03f273773d4e8dd6c2d"
+  integrity sha512-cLWoDEY5OwHcAjDnkyRQzAXfs2jrKjXpO/HQFcc5b5u/r7aa471wdmChmwfnv7x2u840iat/wi0lQ5nbRgSkUA==
+  dependencies:
+    loader-utils "^1.1.0"
+    postcss "^7.0.0"
+    postcss-load-config "^2.0.0"
+    schema-utils "^1.0.0"
+
+postcss-logical@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-logical/-/postcss-logical-3.0.0.tgz#2495d0f8b82e9f262725f75f9401b34e7b45d5b5"
+  integrity sha512-1SUKdJc2vuMOmeItqGuNaC+N8MzBWFWEkAnRnLpFYj1tGGa7NqyVBujfRtgNa2gXR+6RkGUiB2O5Vmh7E2RmiA==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-media-minmax@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-media-minmax/-/postcss-media-minmax-4.0.0.tgz#b75bb6cbc217c8ac49433e12f22048814a4f5ed5"
+  integrity sha512-fo9moya6qyxsjbFAYl97qKO9gyre3qvbMnkOZeZwlsW6XYFsvs2DMGDlchVLfAd8LHPZDxivu/+qW2SMQeTHBw==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-merge-longhand@^4.0.11:
+  version "4.0.11"
+  resolved "https://registry.yarnpkg.com/postcss-merge-longhand/-/postcss-merge-longhand-4.0.11.tgz#62f49a13e4a0ee04e7b98f42bb16062ca2549e24"
+  integrity sha512-alx/zmoeXvJjp7L4mxEMjh8lxVlDFX1gqWHzaaQewwMZiVhLo42TEClKaeHbRf6J7j82ZOdTJ808RtN0ZOZwvw==
+  dependencies:
+    css-color-names "0.0.4"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+    stylehacks "^4.0.0"
+
+postcss-merge-rules@^4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/postcss-merge-rules/-/postcss-merge-rules-4.0.3.tgz#362bea4ff5a1f98e4075a713c6cb25aefef9a650"
+  integrity sha512-U7e3r1SbvYzO0Jr3UT/zKBVgYYyhAz0aitvGIYOYK5CPmkNih+WDSsS5tvPrJ8YMQYlEMvsZIiqmn7HdFUaeEQ==
+  dependencies:
+    browserslist "^4.0.0"
+    caniuse-api "^3.0.0"
+    cssnano-util-same-parent "^4.0.0"
+    postcss "^7.0.0"
+    postcss-selector-parser "^3.0.0"
+    vendors "^1.0.0"
+
+postcss-minify-font-values@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-minify-font-values/-/postcss-minify-font-values-4.0.2.tgz#cd4c344cce474343fac5d82206ab2cbcb8afd5a6"
+  integrity sha512-j85oO6OnRU9zPf04+PZv1LYIYOprWm6IA6zkXkrJXyRveDEuQggG6tvoy8ir8ZwjLxLuGfNkCZEQG7zan+Hbtg==
+  dependencies:
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-minify-gradients@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-minify-gradients/-/postcss-minify-gradients-4.0.2.tgz#93b29c2ff5099c535eecda56c4aa6e665a663471"
+  integrity sha512-qKPfwlONdcf/AndP1U8SJ/uzIJtowHlMaSioKzebAXSG4iJthlWC9iSWznQcX4f66gIWX44RSA841HTHj3wK+Q==
+  dependencies:
+    cssnano-util-get-arguments "^4.0.0"
+    is-color-stop "^1.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-minify-params@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-minify-params/-/postcss-minify-params-4.0.2.tgz#6b9cef030c11e35261f95f618c90036d680db874"
+  integrity sha512-G7eWyzEx0xL4/wiBBJxJOz48zAKV2WG3iZOqVhPet/9geefm/Px5uo1fzlHu+DOjT+m0Mmiz3jkQzVHe6wxAWg==
+  dependencies:
+    alphanum-sort "^1.0.0"
+    browserslist "^4.0.0"
+    cssnano-util-get-arguments "^4.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+    uniqs "^2.0.0"
+
+postcss-minify-selectors@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-minify-selectors/-/postcss-minify-selectors-4.0.2.tgz#e2e5eb40bfee500d0cd9243500f5f8ea4262fbd8"
+  integrity sha512-D5S1iViljXBj9kflQo4YutWnJmwm8VvIsU1GeXJGiG9j8CIg9zs4voPMdQDUmIxetUOh60VilsNzCiAFTOqu3g==
+  dependencies:
+    alphanum-sort "^1.0.0"
+    has "^1.0.0"
+    postcss "^7.0.0"
+    postcss-selector-parser "^3.0.0"
+
+postcss-modules-extract-imports@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-modules-extract-imports/-/postcss-modules-extract-imports-2.0.0.tgz#818719a1ae1da325f9832446b01136eeb493cd7e"
+  integrity sha512-LaYLDNS4SG8Q5WAWqIJgdHPJrDDr/Lv775rMBFUbgjTz6j34lUznACHcdRWroPvXANP2Vj7yNK57vp9eFqzLWQ==
+  dependencies:
+    postcss "^7.0.5"
+
+postcss-modules-local-by-default@^3.0.3:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/postcss-modules-local-by-default/-/postcss-modules-local-by-default-3.0.3.tgz#bb14e0cc78279d504dbdcbfd7e0ca28993ffbbb0"
+  integrity sha512-e3xDq+LotiGesympRlKNgaJ0PCzoUIdpH0dj47iWAui/kyTgh3CiAr1qP54uodmJhl6p9rN6BoNcdEDVJx9RDw==
+  dependencies:
+    icss-utils "^4.1.1"
+    postcss "^7.0.32"
+    postcss-selector-parser "^6.0.2"
+    postcss-value-parser "^4.1.0"
+
+postcss-modules-scope@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/postcss-modules-scope/-/postcss-modules-scope-2.2.0.tgz#385cae013cc7743f5a7d7602d1073a89eaae62ee"
+  integrity sha512-YyEgsTMRpNd+HmyC7H/mh3y+MeFWevy7V1evVhJWewmMbjDHIbZbOXICC2y+m1xI1UVfIT1HMW/O04Hxyu9oXQ==
+  dependencies:
+    postcss "^7.0.6"
+    postcss-selector-parser "^6.0.0"
+
+postcss-modules-values@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-modules-values/-/postcss-modules-values-3.0.0.tgz#5b5000d6ebae29b4255301b4a3a54574423e7f10"
+  integrity sha512-1//E5jCBrZ9DmRX+zCtmQtRSV6PV42Ix7Bzj9GbwJceduuf7IqP8MgeTXuRDHOWj2m0VzZD5+roFWDuU8RQjcg==
+  dependencies:
+    icss-utils "^4.0.0"
+    postcss "^7.0.6"
+
+postcss-nesting@^7.0.0:
+  version "7.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-nesting/-/postcss-nesting-7.0.1.tgz#b50ad7b7f0173e5b5e3880c3501344703e04c052"
+  integrity sha512-FrorPb0H3nuVq0Sff7W2rnc3SmIcruVC6YwpcS+k687VxyxO33iE1amna7wHuRVzM8vfiYofXSBHNAZ3QhLvYg==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-normalize-charset@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-charset/-/postcss-normalize-charset-4.0.1.tgz#8b35add3aee83a136b0471e0d59be58a50285dd4"
+  integrity sha512-gMXCrrlWh6G27U0hF3vNvR3w8I1s2wOBILvA87iNXaPvSNo5uZAMYsZG7XjCUf1eVxuPfyL4TJ7++SGZLc9A3g==
+  dependencies:
+    postcss "^7.0.0"
+
+postcss-normalize-display-values@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-display-values/-/postcss-normalize-display-values-4.0.2.tgz#0dbe04a4ce9063d4667ed2be476bb830c825935a"
+  integrity sha512-3F2jcsaMW7+VtRMAqf/3m4cPFhPD3EFRgNs18u+k3lTJJlVe7d0YPO+bnwqo2xg8YiRpDXJI2u8A0wqJxMsQuQ==
+  dependencies:
+    cssnano-util-get-match "^4.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-positions@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-positions/-/postcss-normalize-positions-4.0.2.tgz#05f757f84f260437378368a91f8932d4b102917f"
+  integrity sha512-Dlf3/9AxpxE+NF1fJxYDeggi5WwV35MXGFnnoccP/9qDtFrTArZ0D0R+iKcg5WsUd8nUYMIl8yXDCtcrT8JrdA==
+  dependencies:
+    cssnano-util-get-arguments "^4.0.0"
+    has "^1.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-repeat-style@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-repeat-style/-/postcss-normalize-repeat-style-4.0.2.tgz#c4ebbc289f3991a028d44751cbdd11918b17910c"
+  integrity sha512-qvigdYYMpSuoFs3Is/f5nHdRLJN/ITA7huIoCyqqENJe9PvPmLhNLMu7QTjPdtnVf6OcYYO5SHonx4+fbJE1+Q==
+  dependencies:
+    cssnano-util-get-arguments "^4.0.0"
+    cssnano-util-get-match "^4.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-string@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-string/-/postcss-normalize-string-4.0.2.tgz#cd44c40ab07a0c7a36dc5e99aace1eca4ec2690c"
+  integrity sha512-RrERod97Dnwqq49WNz8qo66ps0swYZDSb6rM57kN2J+aoyEAJfZ6bMx0sx/F9TIEX0xthPGCmeyiam/jXif0eA==
+  dependencies:
+    has "^1.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-timing-functions@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-timing-functions/-/postcss-normalize-timing-functions-4.0.2.tgz#8e009ca2a3949cdaf8ad23e6b6ab99cb5e7d28d9"
+  integrity sha512-acwJY95edP762e++00Ehq9L4sZCEcOPyaHwoaFOhIwWCDfik6YvqsYNxckee65JHLKzuNSSmAdxwD2Cud1Z54A==
+  dependencies:
+    cssnano-util-get-match "^4.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-unicode@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-unicode/-/postcss-normalize-unicode-4.0.1.tgz#841bd48fdcf3019ad4baa7493a3d363b52ae1cfb"
+  integrity sha512-od18Uq2wCYn+vZ/qCOeutvHjB5jm57ToxRaMeNuf0nWVHaP9Hua56QyMF6fs/4FSUnVIw0CBPsU0K4LnBPwYwg==
+  dependencies:
+    browserslist "^4.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-url@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-url/-/postcss-normalize-url-4.0.1.tgz#10e437f86bc7c7e58f7b9652ed878daaa95faae1"
+  integrity sha512-p5oVaF4+IHwu7VpMan/SSpmpYxcJMtkGppYf0VbdH5B6hN8YNmVyJLuY9FmLQTzY3fag5ESUUHDqM+heid0UVA==
+  dependencies:
+    is-absolute-url "^2.0.0"
+    normalize-url "^3.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize-whitespace@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-normalize-whitespace/-/postcss-normalize-whitespace-4.0.2.tgz#bf1d4070fe4fcea87d1348e825d8cc0c5faa7d82"
+  integrity sha512-tO8QIgrsI3p95r8fyqKV+ufKlSHh9hMJqACqbv2XknufqEDhDvbguXGBBqxw9nsQoXWf0qOqppziKJKHMD4GtA==
+  dependencies:
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-normalize@8.0.1:
+  version "8.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-normalize/-/postcss-normalize-8.0.1.tgz#90e80a7763d7fdf2da6f2f0f82be832ce4f66776"
+  integrity sha512-rt9JMS/m9FHIRroDDBGSMsyW1c0fkvOJPy62ggxSHUldJO7B195TqFMqIf+lY5ezpDcYOV4j86aUp3/XbxzCCQ==
+  dependencies:
+    "@csstools/normalize.css" "^10.1.0"
+    browserslist "^4.6.2"
+    postcss "^7.0.17"
+    postcss-browser-comments "^3.0.0"
+    sanitize.css "^10.0.0"
+
+postcss-ordered-values@^4.1.2:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/postcss-ordered-values/-/postcss-ordered-values-4.1.2.tgz#0cf75c820ec7d5c4d280189559e0b571ebac0eee"
+  integrity sha512-2fCObh5UanxvSxeXrtLtlwVThBvHn6MQcu4ksNT2tsaV2Fg76R2CV98W7wNSlX+5/pFwEyaDwKLLoEV7uRybAw==
+  dependencies:
+    cssnano-util-get-arguments "^4.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-overflow-shorthand@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-overflow-shorthand/-/postcss-overflow-shorthand-2.0.0.tgz#31ecf350e9c6f6ddc250a78f0c3e111f32dd4c30"
+  integrity sha512-aK0fHc9CBNx8jbzMYhshZcEv8LtYnBIRYQD5i7w/K/wS9c2+0NSR6B3OVMu5y0hBHYLcMGjfU+dmWYNKH0I85g==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-page-break@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-page-break/-/postcss-page-break-2.0.0.tgz#add52d0e0a528cabe6afee8b46e2abb277df46bf"
+  integrity sha512-tkpTSrLpfLfD9HvgOlJuigLuk39wVTbbd8RKcy8/ugV2bNBUW3xU+AIqyxhDrQr1VUj1RmyJrBn1YWrqUm9zAQ==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-place@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-place/-/postcss-place-4.0.1.tgz#e9f39d33d2dc584e46ee1db45adb77ca9d1dcc62"
+  integrity sha512-Zb6byCSLkgRKLODj/5mQugyuj9bvAAw9LqJJjgwz5cYryGeXfFZfSXoP1UfveccFmeq0b/2xxwcTEVScnqGxBg==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-values-parser "^2.0.0"
+
+postcss-preset-env@6.7.0:
+  version "6.7.0"
+  resolved "https://registry.yarnpkg.com/postcss-preset-env/-/postcss-preset-env-6.7.0.tgz#c34ddacf8f902383b35ad1e030f178f4cdf118a5"
+  integrity sha512-eU4/K5xzSFwUFJ8hTdTQzo2RBLbDVt83QZrAvI07TULOkmyQlnYlpwep+2yIK+K+0KlZO4BvFcleOCCcUtwchg==
+  dependencies:
+    autoprefixer "^9.6.1"
+    browserslist "^4.6.4"
+    caniuse-lite "^1.0.30000981"
+    css-blank-pseudo "^0.1.4"
+    css-has-pseudo "^0.10.0"
+    css-prefers-color-scheme "^3.1.1"
+    cssdb "^4.4.0"
+    postcss "^7.0.17"
+    postcss-attribute-case-insensitive "^4.0.1"
+    postcss-color-functional-notation "^2.0.1"
+    postcss-color-gray "^5.0.0"
+    postcss-color-hex-alpha "^5.0.3"
+    postcss-color-mod-function "^3.0.3"
+    postcss-color-rebeccapurple "^4.0.1"
+    postcss-custom-media "^7.0.8"
+    postcss-custom-properties "^8.0.11"
+    postcss-custom-selectors "^5.1.2"
+    postcss-dir-pseudo-class "^5.0.0"
+    postcss-double-position-gradients "^1.0.0"
+    postcss-env-function "^2.0.2"
+    postcss-focus-visible "^4.0.0"
+    postcss-focus-within "^3.0.0"
+    postcss-font-variant "^4.0.0"
+    postcss-gap-properties "^2.0.0"
+    postcss-image-set-function "^3.0.1"
+    postcss-initial "^3.0.0"
+    postcss-lab-function "^2.0.1"
+    postcss-logical "^3.0.0"
+    postcss-media-minmax "^4.0.0"
+    postcss-nesting "^7.0.0"
+    postcss-overflow-shorthand "^2.0.0"
+    postcss-page-break "^2.0.0"
+    postcss-place "^4.0.1"
+    postcss-pseudo-class-any-link "^6.0.0"
+    postcss-replace-overflow-wrap "^3.0.0"
+    postcss-selector-matches "^4.0.0"
+    postcss-selector-not "^4.0.0"
+
+postcss-pseudo-class-any-link@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-pseudo-class-any-link/-/postcss-pseudo-class-any-link-6.0.0.tgz#2ed3eed393b3702879dec4a87032b210daeb04d1"
+  integrity sha512-lgXW9sYJdLqtmw23otOzrtbDXofUdfYzNm4PIpNE322/swES3VU9XlXHeJS46zT2onFO7V1QFdD4Q9LiZj8mew==
+  dependencies:
+    postcss "^7.0.2"
+    postcss-selector-parser "^5.0.0-rc.3"
+
+postcss-reduce-initial@^4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/postcss-reduce-initial/-/postcss-reduce-initial-4.0.3.tgz#7fd42ebea5e9c814609639e2c2e84ae270ba48df"
+  integrity sha512-gKWmR5aUulSjbzOfD9AlJiHCGH6AEVLaM0AV+aSioxUDd16qXP1PCh8d1/BGVvpdWn8k/HiK7n6TjeoXN1F7DA==
+  dependencies:
+    browserslist "^4.0.0"
+    caniuse-api "^3.0.0"
+    has "^1.0.0"
+    postcss "^7.0.0"
+
+postcss-reduce-transforms@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-reduce-transforms/-/postcss-reduce-transforms-4.0.2.tgz#17efa405eacc6e07be3414a5ca2d1074681d4e29"
+  integrity sha512-EEVig1Q2QJ4ELpJXMZR8Vt5DQx8/mo+dGWSR7vWXqcob2gQLyQGsionYcGKATXvQzMPn6DSN1vTN7yFximdIAg==
+  dependencies:
+    cssnano-util-get-match "^4.0.0"
+    has "^1.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+
+postcss-replace-overflow-wrap@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-replace-overflow-wrap/-/postcss-replace-overflow-wrap-3.0.0.tgz#61b360ffdaedca84c7c918d2b0f0d0ea559ab01c"
+  integrity sha512-2T5hcEHArDT6X9+9dVSPQdo7QHzG4XKclFT8rU5TzJPDN7RIRTbO9c4drUISOVemLj03aezStHCR2AIcr8XLpw==
+  dependencies:
+    postcss "^7.0.2"
+
+postcss-safe-parser@5.0.2:
+  version "5.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-safe-parser/-/postcss-safe-parser-5.0.2.tgz#459dd27df6bc2ba64608824ba39e45dacf5e852d"
+  integrity sha512-jDUfCPJbKOABhwpUKcqCVbbXiloe/QXMcbJ6Iipf3sDIihEzTqRCeMBfRaOHxhBuTYqtASrI1KJWxzztZU4qUQ==
+  dependencies:
+    postcss "^8.1.0"
+
+postcss-selector-matches@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-selector-matches/-/postcss-selector-matches-4.0.0.tgz#71c8248f917ba2cc93037c9637ee09c64436fcff"
+  integrity sha512-LgsHwQR/EsRYSqlwdGzeaPKVT0Ml7LAT6E75T8W8xLJY62CE4S/l03BWIt3jT8Taq22kXP08s2SfTSzaraoPww==
+  dependencies:
+    balanced-match "^1.0.0"
+    postcss "^7.0.2"
+
+postcss-selector-not@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-selector-not/-/postcss-selector-not-4.0.1.tgz#263016eef1cf219e0ade9a913780fc1f48204cbf"
+  integrity sha512-YolvBgInEK5/79C+bdFMyzqTg6pkYqDbzZIST/PDMqa/o3qtXenD05apBG2jLgT0/BQ77d4U2UK12jWpilqMAQ==
+  dependencies:
+    balanced-match "^1.0.0"
+    postcss "^7.0.2"
+
+postcss-selector-parser@^3.0.0:
+  version "3.1.2"
+  resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-3.1.2.tgz#b310f5c4c0fdaf76f94902bbaa30db6aa84f5270"
+  integrity sha512-h7fJ/5uWuRVyOtkO45pnt1Ih40CEleeyCHzipqAZO2e5H20g25Y48uYnFUiShvY4rZWNJ/Bib/KVPmanaCtOhA==
+  dependencies:
+    dot-prop "^5.2.0"
+    indexes-of "^1.0.1"
+    uniq "^1.0.1"
+
+postcss-selector-parser@^5.0.0-rc.3, postcss-selector-parser@^5.0.0-rc.4:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-5.0.0.tgz#249044356697b33b64f1a8f7c80922dddee7195c"
+  integrity sha512-w+zLE5Jhg6Liz8+rQOWEAwtwkyqpfnmsinXjXg6cY7YIONZZtgvE0v2O0uhQBs0peNomOJwWRKt6JBfTdTd3OQ==
+  dependencies:
+    cssesc "^2.0.0"
+    indexes-of "^1.0.1"
+    uniq "^1.0.1"
+
+postcss-selector-parser@^6.0.0, postcss-selector-parser@^6.0.2:
+  version "6.0.4"
+  resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-6.0.4.tgz#56075a1380a04604c38b063ea7767a129af5c2b3"
+  integrity sha512-gjMeXBempyInaBqpp8gODmwZ52WaYsVOsfr4L4lDQ7n3ncD6mEyySiDtgzCT+NYC0mmeOLvtsF8iaEf0YT6dBw==
+  dependencies:
+    cssesc "^3.0.0"
+    indexes-of "^1.0.1"
+    uniq "^1.0.1"
+    util-deprecate "^1.0.2"
+
+postcss-svgo@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/postcss-svgo/-/postcss-svgo-4.0.2.tgz#17b997bc711b333bab143aaed3b8d3d6e3d38258"
+  integrity sha512-C6wyjo3VwFm0QgBy+Fu7gCYOkCmgmClghO+pjcxvrcBKtiKt0uCF+hvbMO1fyv5BMImRK90SMb+dwUnfbGd+jw==
+  dependencies:
+    is-svg "^3.0.0"
+    postcss "^7.0.0"
+    postcss-value-parser "^3.0.0"
+    svgo "^1.0.0"
+
+postcss-unique-selectors@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-unique-selectors/-/postcss-unique-selectors-4.0.1.tgz#9446911f3289bfd64c6d680f073c03b1f9ee4bac"
+  integrity sha512-+JanVaryLo9QwZjKrmJgkI4Fn8SBgRO6WXQBJi7KiAVPlmxikB5Jzc4EvXMT2H0/m0RjrVVm9rGNhZddm/8Spg==
+  dependencies:
+    alphanum-sort "^1.0.0"
+    postcss "^7.0.0"
+    uniqs "^2.0.0"
+
+postcss-value-parser@^3.0.0:
+  version "3.3.1"
+  resolved "https://registry.yarnpkg.com/postcss-value-parser/-/postcss-value-parser-3.3.1.tgz#9ff822547e2893213cf1c30efa51ac5fd1ba8281"
+  integrity sha512-pISE66AbVkp4fDQ7VHBwRNXzAAKJjw4Vw7nWI/+Q3vuly7SNfgYXvm6i5IgFylHGK5sP/xHAbB7N49OS4gWNyQ==
+
+postcss-value-parser@^4.0.2, postcss-value-parser@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/postcss-value-parser/-/postcss-value-parser-4.1.0.tgz#443f6a20ced6481a2bda4fa8532a6e55d789a2cb"
+  integrity sha512-97DXOFbQJhk71ne5/Mt6cOu6yxsSfM0QGQyl0L25Gca4yGWEGJaig7l7gbCX623VqTBNGLRLaVUCnNkcedlRSQ==
+
+postcss-values-parser@^2.0.0, postcss-values-parser@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/postcss-values-parser/-/postcss-values-parser-2.0.1.tgz#da8b472d901da1e205b47bdc98637b9e9e550e5f"
+  integrity sha512-2tLuBsA6P4rYTNKCXYG/71C7j1pU6pK503suYOmn4xYrQIzW+opD+7FAFNuGSdZC/3Qfy334QbeMu7MEb8gOxg==
+  dependencies:
+    flatten "^1.0.2"
+    indexes-of "^1.0.1"
+    uniq "^1.0.1"
+
+postcss@7.0.21:
+  version "7.0.21"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.21.tgz#06bb07824c19c2021c5d056d5b10c35b989f7e17"
+  integrity sha512-uIFtJElxJo29QC753JzhidoAhvp/e/Exezkdhfmt8AymWT6/5B7W1WmponYWkHk2eg6sONyTch0A3nkMPun3SQ==
+  dependencies:
+    chalk "^2.4.2"
+    source-map "^0.6.1"
+    supports-color "^6.1.0"
+
+postcss@^7, postcss@^7.0.0, postcss@^7.0.1, postcss@^7.0.14, postcss@^7.0.17, postcss@^7.0.2, postcss@^7.0.26, postcss@^7.0.27, postcss@^7.0.32, postcss@^7.0.5, postcss@^7.0.6:
+  version "7.0.35"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.35.tgz#d2be00b998f7f211d8a276974079f2e92b970e24"
+  integrity sha512-3QT8bBJeX/S5zKTTjTCIjRF3If4avAT6kqxcASlTWEtAFCb9NH0OUxNDfgZSWdP5fJnBYCMEWkIFfWeugjzYMg==
+  dependencies:
+    chalk "^2.4.2"
+    source-map "^0.6.1"
+    supports-color "^6.1.0"
+
+postcss@^8.1.0:
+  version "8.2.6"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.2.6.tgz#5d69a974543b45f87e464bc4c3e392a97d6be9fe"
+  integrity sha512-xpB8qYxgPuly166AGlpRjUdEYtmOWx2iCwGmrv4vqZL9YPVviDVPZPRXxnXr6xPZOdxQ9lp3ZBFCRgWJ7LE3Sg==
+  dependencies:
+    colorette "^1.2.1"
+    nanoid "^3.1.20"
+    source-map "^0.6.1"
+
+prelude-ls@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
+  integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==
+
+prelude-ls@~1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
+  integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
+
+prepend-http@^1.0.0:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/prepend-http/-/prepend-http-1.0.4.tgz#d4f4562b0ce3696e41ac52d0e002e57a635dc6dc"
+  integrity sha1-1PRWKwzjaW5BrFLQ4ALlemNdxtw=
+
+prettier@^2.2.1:
+  version "2.2.1"
+  resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.2.1.tgz#795a1a78dd52f073da0cd42b21f9c91381923ff5"
+  integrity sha512-PqyhM2yCjg/oKkFPtTGUojv7gnZAoG80ttl45O6x2Ug/rMJw4wcc9k6aaf2hibP7BGVCCM33gZoGjyvt9mm16Q==
+
+pretty-bytes@^5.3.0:
+  version "5.6.0"
+  resolved "https://registry.yarnpkg.com/pretty-bytes/-/pretty-bytes-5.6.0.tgz#356256f643804773c82f64723fe78c92c62beaeb"
+  integrity sha512-FFw039TmrBqFK8ma/7OL3sDz/VytdtJr044/QUJtH0wK9lb9jLq9tJyIxUwtQJHwar2BqtiA4iCWSwo9JLkzFg==
+
+pretty-error@^2.1.1:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/pretty-error/-/pretty-error-2.1.2.tgz#be89f82d81b1c86ec8fdfbc385045882727f93b6"
+  integrity sha512-EY5oDzmsX5wvuynAByrmY0P0hcp+QpnAKbJng2A2MPjVKXCxrDSUkzghVJ4ZGPIv+JC4gX8fPUWscC0RtjsWGw==
+  dependencies:
+    lodash "^4.17.20"
+    renderkid "^2.0.4"
+
+pretty-format@^26.0.0, pretty-format@^26.6.0, pretty-format@^26.6.2:
+  version "26.6.2"
+  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-26.6.2.tgz#e35c2705f14cb7fe2fe94fa078345b444120fc93"
+  integrity sha512-7AeGuCYNGmycyQbCqd/3PWH4eOoX/OiCa0uphp57NVTeAGdJGaAliecxwBDHYQCIvrW7aDBZCYeNTP/WX69mkg==
+  dependencies:
+    "@jest/types" "^26.6.2"
+    ansi-regex "^5.0.0"
+    ansi-styles "^4.0.0"
+    react-is "^17.0.1"
+
+process-nextick-args@~2.0.0:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
+  integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
+
+process@^0.11.10:
+  version "0.11.10"
+  resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182"
+  integrity sha1-czIwDoQBYb2j5podHZGn1LwW8YI=
+
+progress@^2.0.0:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
+  integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
+
+promise-inflight@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/promise-inflight/-/promise-inflight-1.0.1.tgz#98472870bf228132fcbdd868129bad12c3c029e3"
+  integrity sha1-mEcocL8igTL8vdhoEputEsPAKeM=
+
+promise@^8.1.0:
+  version "8.1.0"
+  resolved "https://registry.yarnpkg.com/promise/-/promise-8.1.0.tgz#697c25c3dfe7435dd79fcd58c38a135888eaf05e"
+  integrity sha512-W04AqnILOL/sPRXziNicCjSNRruLAuIHEOVBazepu0545DDNGYHz7ar9ZgZ1fMU8/MA4mVxp5rkBWRi6OXIy3Q==
+  dependencies:
+    asap "~2.0.6"
+
+prompts@2.4.0, prompts@^2.0.1:
+  version "2.4.0"
+  resolved "https://registry.yarnpkg.com/prompts/-/prompts-2.4.0.tgz#4aa5de0723a231d1ee9121c40fdf663df73f61d7"
+  integrity sha512-awZAKrk3vN6CroQukBL+R9051a4R3zCZBlJm/HBfrSZ8iTpYix3VX1vU4mveiLpiwmOJT4wokTF9m6HUk4KqWQ==
+  dependencies:
+    kleur "^3.0.3"
+    sisteransi "^1.0.5"
+
+prop-types@^15.6.2, prop-types@^15.7.2:
+  version "15.7.2"
+  resolved "https://registry.yarnpkg.com/prop-types/-/prop-types-15.7.2.tgz#52c41e75b8c87e72b9d9360e0206b99dcbffa6c5"
+  integrity sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==
+  dependencies:
+    loose-envify "^1.4.0"
+    object-assign "^4.1.1"
+    react-is "^16.8.1"
+
+proxy-addr@~2.0.5:
+  version "2.0.6"
+  resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.6.tgz#fdc2336505447d3f2f2c638ed272caf614bbb2bf"
+  integrity sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==
+  dependencies:
+    forwarded "~0.1.2"
+    ipaddr.js "1.9.1"
+
+prr@~1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476"
+  integrity sha1-0/wRS6BplaRexok/SEzrHXj19HY=
+
+psl@^1.1.28:
+  version "1.8.0"
+  resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
+  integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==
+
+public-encrypt@^4.0.0:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/public-encrypt/-/public-encrypt-4.0.3.tgz#4fcc9d77a07e48ba7527e7cbe0de33d0701331e0"
+  integrity sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==
+  dependencies:
+    bn.js "^4.1.0"
+    browserify-rsa "^4.0.0"
+    create-hash "^1.1.0"
+    parse-asn1 "^5.0.0"
+    randombytes "^2.0.1"
+    safe-buffer "^5.1.2"
+
+pump@^2.0.0:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/pump/-/pump-2.0.1.tgz#12399add6e4cf7526d973cbc8b5ce2e2908b3909"
+  integrity sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==
+  dependencies:
+    end-of-stream "^1.1.0"
+    once "^1.3.1"
+
+pump@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
+  integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
+  dependencies:
+    end-of-stream "^1.1.0"
+    once "^1.3.1"
+
+pumpify@^1.3.3:
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/pumpify/-/pumpify-1.5.1.tgz#36513be246ab27570b1a374a5ce278bfd74370ce"
+  integrity sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==
+  dependencies:
+    duplexify "^3.6.0"
+    inherits "^2.0.3"
+    pump "^2.0.0"
+
+punycode@1.3.2:
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d"
+  integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
+
+punycode@^1.2.4:
+  version "1.4.1"
+  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
+  integrity sha1-wNWmOycYgArY4esPpSachN1BhF4=
+
+punycode@^2.1.0, punycode@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
+  integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
+
+q@^1.1.2:
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/q/-/q-1.5.1.tgz#7e32f75b41381291d04611f1bf14109ac00651d7"
+  integrity sha1-fjL3W0E4EpHQRhHxvxQQmsAGUdc=
+
+qs@6.7.0:
+  version "6.7.0"
+  resolved "https://registry.yarnpkg.com/qs/-/qs-6.7.0.tgz#41dc1a015e3d581f1621776be31afb2876a9b1bc"
+  integrity sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==
+
+qs@~6.5.2:
+  version "6.5.2"
+  resolved "https://registry.yarnpkg.com/qs/-/qs-6.5.2.tgz#cb3ae806e8740444584ef154ce8ee98d403f3e36"
+  integrity sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==
+
+query-string@^4.1.0:
+  version "4.3.4"
+  resolved "https://registry.yarnpkg.com/query-string/-/query-string-4.3.4.tgz#bbb693b9ca915c232515b228b1a02b609043dbeb"
+  integrity sha1-u7aTucqRXCMlFbIosaArYJBD2+s=
+  dependencies:
+    object-assign "^4.1.0"
+    strict-uri-encode "^1.0.0"
+
+querystring-es3@^0.2.0:
+  version "0.2.1"
+  resolved "https://registry.yarnpkg.com/querystring-es3/-/querystring-es3-0.2.1.tgz#9ec61f79049875707d69414596fd907a4d711e73"
+  integrity sha1-nsYfeQSYdXB9aUFFlv2Qek1xHnM=
+
+querystring@0.2.0:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620"
+  integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
+
+querystring@^0.2.0:
+  version "0.2.1"
+  resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.1.tgz#40d77615bb09d16902a85c3e38aa8b5ed761c2dd"
+  integrity sha512-wkvS7mL/JMugcup3/rMitHmd9ecIGd2lhFhK9N3UUQ450h66d1r3Y9nvXzQAW1Lq+wyx61k/1pfKS5KuKiyEbg==
+
+querystringify@^2.1.1:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/querystringify/-/querystringify-2.2.0.tgz#3345941b4153cb9d082d8eee4cda2016a9aef7f6"
+  integrity sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==
+
+queue-microtask@^1.2.2:
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.2.tgz#abf64491e6ecf0f38a6502403d4cda04f372dfd3"
+  integrity sha512-dB15eXv3p2jDlbOiNLyMabYg1/sXvppd8DP2J3EOCQ0AkuSXCW2tP7mnVouVLJKgUMY6yP0kcQDVpLCN13h4Xg==
+
+raf@^3.4.1:
+  version "3.4.1"
+  resolved "https://registry.yarnpkg.com/raf/-/raf-3.4.1.tgz#0742e99a4a6552f445d73e3ee0328af0ff1ede39"
+  integrity sha512-Sq4CW4QhwOHE8ucn6J34MqtZCeWFP2aQSmrlroYgqAV1PjStIhJXxYuTgUIfkEk7zTLjmIjLmU5q+fbD1NnOJA==
+  dependencies:
+    performance-now "^2.1.0"
+
+randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
+  integrity sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==
+  dependencies:
+    safe-buffer "^5.1.0"
+
+randomfill@^1.0.3:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/randomfill/-/randomfill-1.0.4.tgz#c92196fc86ab42be983f1bf31778224931d61458"
+  integrity sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==
+  dependencies:
+    randombytes "^2.0.5"
+    safe-buffer "^5.1.0"
+
+range-parser@^1.2.1, range-parser@~1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031"
+  integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==
+
+raw-body@2.4.0:
+  version "2.4.0"
+  resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.4.0.tgz#a1ce6fb9c9bc356ca52e89256ab59059e13d0332"
+  integrity sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==
+  dependencies:
+    bytes "3.1.0"
+    http-errors "1.7.2"
+    iconv-lite "0.4.24"
+    unpipe "1.0.0"
+
+react-app-polyfill@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/react-app-polyfill/-/react-app-polyfill-2.0.0.tgz#a0bea50f078b8a082970a9d853dc34b6dcc6a3cf"
+  integrity sha512-0sF4ny9v/B7s6aoehwze9vJNWcmCemAUYBVasscVr92+UYiEqDXOxfKjXN685mDaMRNF3WdhHQs76oTODMocFA==
+  dependencies:
+    core-js "^3.6.5"
+    object-assign "^4.1.1"
+    promise "^8.1.0"
+    raf "^3.4.1"
+    regenerator-runtime "^0.13.7"
+    whatwg-fetch "^3.4.1"
+
+react-clientside-effect@^1.2.2:
+  version "1.2.5"
+  resolved "https://registry.yarnpkg.com/react-clientside-effect/-/react-clientside-effect-1.2.5.tgz#e2c4dc3c9ee109f642fac4f5b6e9bf5bcd2219a3"
+  integrity sha512-2bL8qFW1TGBHozGGbVeyvnggRpMjibeZM2536AKNENLECutp2yfs44IL8Hmpn8qjFQ2K7A9PnYf3vc7aQq/cPA==
+  dependencies:
+    "@babel/runtime" "^7.12.13"
+
+react-dev-utils@^11.0.3:
+  version "11.0.3"
+  resolved "https://registry.yarnpkg.com/react-dev-utils/-/react-dev-utils-11.0.3.tgz#b61ed499c7d74f447d4faddcc547e5e671e97c08"
+  integrity sha512-4lEA5gF4OHrcJLMUV1t+4XbNDiJbsAWCH5Z2uqlTqW6dD7Cf5nEASkeXrCI/Mz83sI2o527oBIFKVMXtRf1Vtg==
+  dependencies:
+    "@babel/code-frame" "7.10.4"
+    address "1.1.2"
+    browserslist "4.14.2"
+    chalk "2.4.2"
+    cross-spawn "7.0.3"
+    detect-port-alt "1.1.6"
+    escape-string-regexp "2.0.0"
+    filesize "6.1.0"
+    find-up "4.1.0"
+    fork-ts-checker-webpack-plugin "4.1.6"
+    global-modules "2.0.0"
+    globby "11.0.1"
+    gzip-size "5.1.1"
+    immer "8.0.1"
+    is-root "2.1.0"
+    loader-utils "2.0.0"
+    open "^7.0.2"
+    pkg-up "3.1.0"
+    prompts "2.4.0"
+    react-error-overlay "^6.0.9"
+    recursive-readdir "2.2.2"
+    shell-quote "1.7.2"
+    strip-ansi "6.0.0"
+    text-table "0.2.0"
+
+react-dom@^17.0.1:
+  version "17.0.1"
+  resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-17.0.1.tgz#1de2560474ec9f0e334285662ede52dbc5426fc6"
+  integrity sha512-6eV150oJZ9U2t9svnsspTMrWNyHc6chX0KzDeAOXftRa8bNeOKTTfCJ7KorIwenkHd2xqVTBTCZd79yk/lx/Ug==
+  dependencies:
+    loose-envify "^1.1.0"
+    object-assign "^4.1.1"
+    scheduler "^0.20.1"
+
+react-error-overlay@^6.0.9:
+  version "6.0.9"
+  resolved "https://registry.yarnpkg.com/react-error-overlay/-/react-error-overlay-6.0.9.tgz#3c743010c9359608c375ecd6bc76f35d93995b0a"
+  integrity sha512-nQTTcUu+ATDbrSD1BZHr5kgSD4oF8OFjxun8uAaL8RwPBacGBNPf/yAuVVdx17N8XNzRDMrZ9XcKZHCjPW+9ew==
+
+react-fast-compare@3.2.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/react-fast-compare/-/react-fast-compare-3.2.0.tgz#641a9da81b6a6320f270e89724fb45a0b39e43bb"
+  integrity sha512-rtGImPZ0YyLrscKI9xTpV8psd6I8VAtjKCzQDlzyDvqJA8XOW78TXYQwNRNd8g8JZnDu8q9Fu/1v4HPAVwVdHA==
+
+react-focus-lock@2.5.0:
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/react-focus-lock/-/react-focus-lock-2.5.0.tgz#12e3a3940e897c26e2c2a0408cd25ea3c99b3709"
+  integrity sha512-XLxj6uTXgz0US8TmqNU2jMfnXwZG0mH2r/afQqvPEaX6nyEll5LHVcEXk2XDUQ34RVeLPkO/xK5x6c/qiuSq/A==
+  dependencies:
+    "@babel/runtime" "^7.0.0"
+    focus-lock "^0.8.1"
+    prop-types "^15.6.2"
+    react-clientside-effect "^1.2.2"
+    use-callback-ref "^1.2.1"
+    use-sidecar "^1.0.1"
+
+react-icons@^4.2.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/react-icons/-/react-icons-4.2.0.tgz#6dda80c8a8f338ff96a1851424d63083282630d0"
+  integrity sha512-rmzEDFt+AVXRzD7zDE21gcxyBizD/3NqjbX6cmViAgdqfJ2UiLer8927/QhhrXQV7dEj/1EGuOTPp7JnLYVJKQ==
+
+react-is@^16.6.0, react-is@^16.7.0, react-is@^16.8.1:
+  version "16.13.1"
+  resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
+  integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
+
+react-is@^17.0.1:
+  version "17.0.1"
+  resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.1.tgz#5b3531bd76a645a4c9fb6e693ed36419e3301339"
+  integrity sha512-NAnt2iGDXohE5LI7uBnLnqvLQMtzhkiAOLXTmv+qnF9Ky7xAPcX8Up/xWIhxvLVGJvuLiNc4xQLtuqDRzb4fSA==
+
+react-refresh@^0.8.3:
+  version "0.8.3"
+  resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.8.3.tgz#721d4657672d400c5e3c75d063c4a85fb2d5d68f"
+  integrity sha512-X8jZHc7nCMjaCqoU+V2I0cOhNW+QMBwSUkeXnTi8IPe6zaRWfn60ZzvFDZqWPfmSJfjub7dDW1SP0jaHWLu/hg==
+
+react-remove-scroll-bar@^2.1.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/react-remove-scroll-bar/-/react-remove-scroll-bar-2.2.0.tgz#d4d545a7df024f75d67e151499a6ab5ac97c8cdd"
+  integrity sha512-UU9ZBP1wdMR8qoUs7owiVcpaPwsQxUDC2lypP6mmixaGlARZa7ZIBx1jcuObLdhMOvCsnZcvetOho0wzPa9PYg==
+  dependencies:
+    react-style-singleton "^2.1.0"
+    tslib "^1.0.0"
+
+react-remove-scroll@2.4.1:
+  version "2.4.1"
+  resolved "https://registry.yarnpkg.com/react-remove-scroll/-/react-remove-scroll-2.4.1.tgz#e0af6126621083a5064591d367291a81b2d107f5"
+  integrity sha512-K7XZySEzOHMTq7dDwcHsZA6Y7/1uX5RsWhRXVYv8rdh+y9Qz2nMwl9RX/Mwnj/j7JstCGmxyfyC0zbVGXYh3mA==
+  dependencies:
+    react-remove-scroll-bar "^2.1.0"
+    react-style-singleton "^2.1.0"
+    tslib "^1.0.0"
+    use-callback-ref "^1.2.3"
+    use-sidecar "^1.0.1"
+
+react-router-dom@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-5.2.0.tgz#9e65a4d0c45e13289e66c7b17c7e175d0ea15662"
+  integrity sha512-gxAmfylo2QUjcwxI63RhQ5G85Qqt4voZpUXSEqCwykV0baaOTQDR1f0PmY8AELqIyVc0NEZUj0Gov5lNGcXgsA==
+  dependencies:
+    "@babel/runtime" "^7.1.2"
+    history "^4.9.0"
+    loose-envify "^1.3.1"
+    prop-types "^15.6.2"
+    react-router "5.2.0"
+    tiny-invariant "^1.0.2"
+    tiny-warning "^1.0.0"
+
+react-router@5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/react-router/-/react-router-5.2.0.tgz#424e75641ca8747fbf76e5ecca69781aa37ea293"
+  integrity sha512-smz1DUuFHRKdcJC0jobGo8cVbhO3x50tCL4icacOlcwDOEQPq4TMqwx3sY1TP+DvtTgz4nm3thuo7A+BK2U0Dw==
+  dependencies:
+    "@babel/runtime" "^7.1.2"
+    history "^4.9.0"
+    hoist-non-react-statics "^3.1.0"
+    loose-envify "^1.3.1"
+    mini-create-react-context "^0.4.0"
+    path-to-regexp "^1.7.0"
+    prop-types "^15.6.2"
+    react-is "^16.6.0"
+    tiny-invariant "^1.0.2"
+    tiny-warning "^1.0.0"
+
+react-scripts@4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/react-scripts/-/react-scripts-4.0.3.tgz#b1cafed7c3fa603e7628ba0f187787964cb5d345"
+  integrity sha512-S5eO4vjUzUisvkIPB7jVsKtuH2HhWcASREYWHAQ1FP5HyCv3xgn+wpILAEWkmy+A+tTNbSZClhxjT3qz6g4L1A==
+  dependencies:
+    "@babel/core" "7.12.3"
+    "@pmmmwh/react-refresh-webpack-plugin" "0.4.3"
+    "@svgr/webpack" "5.5.0"
+    "@typescript-eslint/eslint-plugin" "^4.5.0"
+    "@typescript-eslint/parser" "^4.5.0"
+    babel-eslint "^10.1.0"
+    babel-jest "^26.6.0"
+    babel-loader "8.1.0"
+    babel-plugin-named-asset-import "^0.3.7"
+    babel-preset-react-app "^10.0.0"
+    bfj "^7.0.2"
+    camelcase "^6.1.0"
+    case-sensitive-paths-webpack-plugin "2.3.0"
+    css-loader "4.3.0"
+    dotenv "8.2.0"
+    dotenv-expand "5.1.0"
+    eslint "^7.11.0"
+    eslint-config-react-app "^6.0.0"
+    eslint-plugin-flowtype "^5.2.0"
+    eslint-plugin-import "^2.22.1"
+    eslint-plugin-jest "^24.1.0"
+    eslint-plugin-jsx-a11y "^6.3.1"
+    eslint-plugin-react "^7.21.5"
+    eslint-plugin-react-hooks "^4.2.0"
+    eslint-plugin-testing-library "^3.9.2"
+    eslint-webpack-plugin "^2.5.2"
+    file-loader "6.1.1"
+    fs-extra "^9.0.1"
+    html-webpack-plugin "4.5.0"
+    identity-obj-proxy "3.0.0"
+    jest "26.6.0"
+    jest-circus "26.6.0"
+    jest-resolve "26.6.0"
+    jest-watch-typeahead "0.6.1"
+    mini-css-extract-plugin "0.11.3"
+    optimize-css-assets-webpack-plugin "5.0.4"
+    pnp-webpack-plugin "1.6.4"
+    postcss-flexbugs-fixes "4.2.1"
+    postcss-loader "3.0.0"
+    postcss-normalize "8.0.1"
+    postcss-preset-env "6.7.0"
+    postcss-safe-parser "5.0.2"
+    prompts "2.4.0"
+    react-app-polyfill "^2.0.0"
+    react-dev-utils "^11.0.3"
+    react-refresh "^0.8.3"
+    resolve "1.18.1"
+    resolve-url-loader "^3.1.2"
+    sass-loader "^10.0.5"
+    semver "7.3.2"
+    style-loader "1.3.0"
+    terser-webpack-plugin "4.2.3"
+    ts-pnp "1.2.0"
+    url-loader "4.1.1"
+    webpack "4.44.2"
+    webpack-dev-server "3.11.1"
+    webpack-manifest-plugin "2.2.0"
+    workbox-webpack-plugin "5.1.4"
+  optionalDependencies:
+    fsevents "^2.1.3"
+
+react-style-singleton@^2.1.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/react-style-singleton/-/react-style-singleton-2.1.1.tgz#ce7f90b67618be2b6b94902a30aaea152ce52e66"
+  integrity sha512-jNRp07Jza6CBqdRKNgGhT3u9umWvils1xsuMOjZlghBDH2MU0PL2WZor4PGYjXpnRCa9DQSlHMs/xnABWOwYbA==
+  dependencies:
+    get-nonce "^1.0.0"
+    invariant "^2.2.4"
+    tslib "^1.0.0"
+
+react-table@^7.6.3:
+  version "7.6.3"
+  resolved "https://registry.yarnpkg.com/react-table/-/react-table-7.6.3.tgz#76434392b3f62344bdb704f5b227c2f29c1ffb14"
+  integrity sha512-hfPF13zDLxPMpLKzIKCE8RZud9T/XrRTsaCIf8zXpWZIZ2juCl7qrGpo3AQw9eAetXV5DP7s2GDm+hht7qq5Dw==
+
+react-timeago@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/react-timeago/-/react-timeago-5.2.0.tgz#d655d40aa55e4fe08a92234481a6aea7f656ab5d"
+  integrity sha512-wCEEDGQHMdFh/PLp+Hj5vk9ZoC4KjQ5u0u6+KrrY9rny5LqJ2gZvNNEAS4mhSZDV1i7JLgQI5VQTAux7f+vj2w==
+
+react@^17.0.1:
+  version "17.0.1"
+  resolved "https://registry.yarnpkg.com/react/-/react-17.0.1.tgz#6e0600416bd57574e3f86d92edba3d9008726127"
+  integrity sha512-lG9c9UuMHdcAexXtigOZLX8exLWkW0Ku29qPRU8uhF2R9BN96dLCt0psvzPLlHc5OWkgymP3qwTRgbnw5BKx3w==
+  dependencies:
+    loose-envify "^1.1.0"
+    object-assign "^4.1.1"
+
+read-pkg-up@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-2.0.0.tgz#6b72a8048984e0c41e79510fd5e9fa99b3b549be"
+  integrity sha1-a3KoBImE4MQeeVEP1en6mbO1Sb4=
+  dependencies:
+    find-up "^2.0.0"
+    read-pkg "^2.0.0"
+
+read-pkg-up@^7.0.1:
+  version "7.0.1"
+  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-7.0.1.tgz#f3a6135758459733ae2b95638056e1854e7ef507"
+  integrity sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg==
+  dependencies:
+    find-up "^4.1.0"
+    read-pkg "^5.2.0"
+    type-fest "^0.8.1"
+
+read-pkg@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-2.0.0.tgz#8ef1c0623c6a6db0dc6713c4bfac46332b2368f8"
+  integrity sha1-jvHAYjxqbbDcZxPEv6xGMysjaPg=
+  dependencies:
+    load-json-file "^2.0.0"
+    normalize-package-data "^2.3.2"
+    path-type "^2.0.0"
+
+read-pkg@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-5.2.0.tgz#7bf295438ca5a33e56cd30e053b34ee7250c93cc"
+  integrity sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg==
+  dependencies:
+    "@types/normalize-package-data" "^2.4.0"
+    normalize-package-data "^2.5.0"
+    parse-json "^5.0.0"
+    type-fest "^0.6.0"
+
+"readable-stream@1 || 2", readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.6, readable-stream@~2.3.6:
+  version "2.3.7"
+  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57"
+  integrity sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==
+  dependencies:
+    core-util-is "~1.0.0"
+    inherits "~2.0.3"
+    isarray "~1.0.0"
+    process-nextick-args "~2.0.0"
+    safe-buffer "~5.1.1"
+    string_decoder "~1.1.1"
+    util-deprecate "~1.0.1"
+
+readable-stream@^3.0.6, readable-stream@^3.1.1, readable-stream@^3.6.0:
+  version "3.6.0"
+  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
+  integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
+  dependencies:
+    inherits "^2.0.3"
+    string_decoder "^1.1.1"
+    util-deprecate "^1.0.1"
+
+readdirp@^2.2.1:
+  version "2.2.1"
+  resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-2.2.1.tgz#0e87622a3325aa33e892285caf8b4e846529a525"
+  integrity sha512-1JU/8q+VgFZyxwrJ+SVIOsh+KywWGpds3NTqikiKpDMZWScmAYyKIgqkO+ARvNWJfXeXR1zxz7aHF4u4CyH6vQ==
+  dependencies:
+    graceful-fs "^4.1.11"
+    micromatch "^3.1.10"
+    readable-stream "^2.0.2"
+
+readdirp@~3.5.0:
+  version "3.5.0"
+  resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.5.0.tgz#9ba74c019b15d365278d2e91bb8c48d7b4d42c9e"
+  integrity sha512-cMhu7c/8rdhkHXWsY+osBhfSy0JikwpHK/5+imo+LpeasTF8ouErHrlYkwT0++njiyuDvc7OFY5T3ukvZ8qmFQ==
+  dependencies:
+    picomatch "^2.2.1"
+
+recursive-readdir@2.2.2:
+  version "2.2.2"
+  resolved "https://registry.yarnpkg.com/recursive-readdir/-/recursive-readdir-2.2.2.tgz#9946fb3274e1628de6e36b2f6714953b4845094f"
+  integrity sha512-nRCcW9Sj7NuZwa2XvH9co8NPeXUBhZP7CRKJtU+cS6PW9FpCIFoI5ib0NT1ZrbNuPoRy0ylyCaUL8Gih4LSyFg==
+  dependencies:
+    minimatch "3.0.4"
+
+redent@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f"
+  integrity sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==
+  dependencies:
+    indent-string "^4.0.0"
+    strip-indent "^3.0.0"
+
+regenerate-unicode-properties@^8.2.0:
+  version "8.2.0"
+  resolved "https://registry.yarnpkg.com/regenerate-unicode-properties/-/regenerate-unicode-properties-8.2.0.tgz#e5de7111d655e7ba60c057dbe9ff37c87e65cdec"
+  integrity sha512-F9DjY1vKLo/tPePDycuH3dn9H1OTPIkVD9Kz4LODu+F2C75mgjAJ7x/gwy6ZcSNRAAkhNlJSOHRe8k3p+K9WhA==
+  dependencies:
+    regenerate "^1.4.0"
+
+regenerate@^1.4.0:
+  version "1.4.2"
+  resolved "https://registry.yarnpkg.com/regenerate/-/regenerate-1.4.2.tgz#b9346d8827e8f5a32f7ba29637d398b69014848a"
+  integrity sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A==
+
+regenerator-runtime@^0.11.0:
+  version "0.11.1"
+  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.11.1.tgz#be05ad7f9bf7d22e056f9726cee5017fbf19e2e9"
+  integrity sha512-MguG95oij0fC3QV3URf4V2SDYGJhJnJGqvIIgdECeODCT98wSWDAJ94SSuVpYQUoTcGUIL6L4yNB7j1DFFHSBg==
+
+regenerator-runtime@^0.13.4, regenerator-runtime@^0.13.7:
+  version "0.13.7"
+  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz#cac2dacc8a1ea675feaabaeb8ae833898ae46f55"
+  integrity sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==
+
+regenerator-transform@^0.14.2:
+  version "0.14.5"
+  resolved "https://registry.yarnpkg.com/regenerator-transform/-/regenerator-transform-0.14.5.tgz#c98da154683671c9c4dcb16ece736517e1b7feb4"
+  integrity sha512-eOf6vka5IO151Jfsw2NO9WpGX58W6wWmefK3I1zEGr0lOD0u8rwPaNqQL1aRxUaxLeKO3ArNh3VYg1KbaD+FFw==
+  dependencies:
+    "@babel/runtime" "^7.8.4"
+
+regex-not@^1.0.0, regex-not@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/regex-not/-/regex-not-1.0.2.tgz#1f4ece27e00b0b65e0247a6810e6a85d83a5752c"
+  integrity sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A==
+  dependencies:
+    extend-shallow "^3.0.2"
+    safe-regex "^1.1.0"
+
+regex-parser@^2.2.11:
+  version "2.2.11"
+  resolved "https://registry.yarnpkg.com/regex-parser/-/regex-parser-2.2.11.tgz#3b37ec9049e19479806e878cabe7c1ca83ccfe58"
+  integrity sha512-jbD/FT0+9MBU2XAZluI7w2OBs1RBi6p9M83nkoZayQXXU9e8Robt69FcZc7wU4eJD/YFTjn1JdCk3rbMJajz8Q==
+
+regexp.prototype.flags@^1.2.0, regexp.prototype.flags@^1.3.1:
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz#7ef352ae8d159e758c0eadca6f8fcb4eef07be26"
+  integrity sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==
+  dependencies:
+    call-bind "^1.0.2"
+    define-properties "^1.1.3"
+
+regexpp@^3.0.0, regexpp@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.1.0.tgz#206d0ad0a5648cffbdb8ae46438f3dc51c9f78e2"
+  integrity sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==
+
+regexpu-core@^4.7.1:
+  version "4.7.1"
+  resolved "https://registry.yarnpkg.com/regexpu-core/-/regexpu-core-4.7.1.tgz#2dea5a9a07233298fbf0db91fa9abc4c6e0f8ad6"
+  integrity sha512-ywH2VUraA44DZQuRKzARmw6S66mr48pQVva4LBeRhcOltJ6hExvWly5ZjFLYo67xbIxb6W1q4bAGtgfEl20zfQ==
+  dependencies:
+    regenerate "^1.4.0"
+    regenerate-unicode-properties "^8.2.0"
+    regjsgen "^0.5.1"
+    regjsparser "^0.6.4"
+    unicode-match-property-ecmascript "^1.0.4"
+    unicode-match-property-value-ecmascript "^1.2.0"
+
+regjsgen@^0.5.1:
+  version "0.5.2"
+  resolved "https://registry.yarnpkg.com/regjsgen/-/regjsgen-0.5.2.tgz#92ff295fb1deecbf6ecdab2543d207e91aa33733"
+  integrity sha512-OFFT3MfrH90xIW8OOSyUrk6QHD5E9JOTeGodiJeBS3J6IwlgzJMNE/1bZklWz5oTg+9dCMyEetclvCVXOPoN3A==
+
+regjsparser@^0.6.4:
+  version "0.6.7"
+  resolved "https://registry.yarnpkg.com/regjsparser/-/regjsparser-0.6.7.tgz#c00164e1e6713c2e3ee641f1701c4b7aa0a7f86c"
+  integrity sha512-ib77G0uxsA2ovgiYbCVGx4Pv3PSttAx2vIwidqQzbL2U5S4Q+j00HdSAneSBuyVcMvEnTXMjiGgB+DlXozVhpQ==
+  dependencies:
+    jsesc "~0.5.0"
+
+relateurl@^0.2.7:
+  version "0.2.7"
+  resolved "https://registry.yarnpkg.com/relateurl/-/relateurl-0.2.7.tgz#54dbf377e51440aca90a4cd274600d3ff2d888a9"
+  integrity sha1-VNvzd+UUQKypCkzSdGANP/LYiKk=
+
+remove-trailing-separator@^1.0.1:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz#c24bce2a283adad5bc3f58e0d48249b92379d8ef"
+  integrity sha1-wkvOKig62tW8P1jg1IJJuSN52O8=
+
+renderkid@^2.0.4:
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/renderkid/-/renderkid-2.0.5.tgz#483b1ac59c6601ab30a7a596a5965cabccfdd0a5"
+  integrity sha512-ccqoLg+HLOHq1vdfYNm4TBeaCDIi1FLt3wGojTDSvdewUv65oTmI3cnT2E4hRjl1gzKZIPK+KZrXzlUYKnR+vQ==
+  dependencies:
+    css-select "^2.0.2"
+    dom-converter "^0.2"
+    htmlparser2 "^3.10.1"
+    lodash "^4.17.20"
+    strip-ansi "^3.0.0"
+
+repeat-element@^1.1.2:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/repeat-element/-/repeat-element-1.1.3.tgz#782e0d825c0c5a3bb39731f84efee6b742e6b1ce"
+  integrity sha512-ahGq0ZnV5m5XtZLMb+vP76kcAM5nkLqk0lpqAuojSKGgQtn4eRi4ZZGm2olo2zKFH+sMsWaqOCW1dqAnOru72g==
+
+repeat-string@^1.6.1:
+  version "1.6.1"
+  resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637"
+  integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc=
+
+request-promise-core@1.1.4:
+  version "1.1.4"
+  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.4.tgz#3eedd4223208d419867b78ce815167d10593a22f"
+  integrity sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==
+  dependencies:
+    lodash "^4.17.19"
+
+request-promise-native@^1.0.8:
+  version "1.0.9"
+  resolved "https://registry.yarnpkg.com/request-promise-native/-/request-promise-native-1.0.9.tgz#e407120526a5efdc9a39b28a5679bf47b9d9dc28"
+  integrity sha512-wcW+sIUiWnKgNY0dqCpOZkUbF/I+YPi+f09JZIDa39Ec+q82CpSYniDp+ISgTTbKmnpJWASeJBPZmoxH84wt3g==
+  dependencies:
+    request-promise-core "1.1.4"
+    stealthy-require "^1.1.1"
+    tough-cookie "^2.3.3"
+
+request@^2.88.2:
+  version "2.88.2"
+  resolved "https://registry.yarnpkg.com/request/-/request-2.88.2.tgz#d73c918731cb5a87da047e207234146f664d12b3"
+  integrity sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==
+  dependencies:
+    aws-sign2 "~0.7.0"
+    aws4 "^1.8.0"
+    caseless "~0.12.0"
+    combined-stream "~1.0.6"
+    extend "~3.0.2"
+    forever-agent "~0.6.1"
+    form-data "~2.3.2"
+    har-validator "~5.1.3"
+    http-signature "~1.2.0"
+    is-typedarray "~1.0.0"
+    isstream "~0.1.2"
+    json-stringify-safe "~5.0.1"
+    mime-types "~2.1.19"
+    oauth-sign "~0.9.0"
+    performance-now "^2.1.0"
+    qs "~6.5.2"
+    safe-buffer "^5.1.2"
+    tough-cookie "~2.5.0"
+    tunnel-agent "^0.6.0"
+    uuid "^3.3.2"
+
+require-directory@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
+  integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I=
+
+require-from-string@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909"
+  integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==
+
+require-main-filename@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b"
+  integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==
+
+requires-port@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/requires-port/-/requires-port-1.0.0.tgz#925d2601d39ac485e091cf0da5c6e694dc3dcaff"
+  integrity sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=
+
+resolve-cwd@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-2.0.0.tgz#00a9f7387556e27038eae232caa372a6a59b665a"
+  integrity sha1-AKn3OHVW4nA46uIyyqNypqWbZlo=
+  dependencies:
+    resolve-from "^3.0.0"
+
+resolve-cwd@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-3.0.0.tgz#0f0075f1bb2544766cf73ba6a6e2adfebcb13f2d"
+  integrity sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==
+  dependencies:
+    resolve-from "^5.0.0"
+
+resolve-from@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-3.0.0.tgz#b22c7af7d9d6881bc8b6e653335eebcb0a188748"
+  integrity sha1-six699nWiBvItuZTM17rywoYh0g=
+
+resolve-from@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
+  integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==
+
+resolve-from@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69"
+  integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==
+
+resolve-pathname@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/resolve-pathname/-/resolve-pathname-3.0.0.tgz#99d02224d3cf263689becbb393bc560313025dcd"
+  integrity sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==
+
+resolve-url-loader@^3.1.2:
+  version "3.1.2"
+  resolved "https://registry.yarnpkg.com/resolve-url-loader/-/resolve-url-loader-3.1.2.tgz#235e2c28e22e3e432ba7a5d4e305c59a58edfc08"
+  integrity sha512-QEb4A76c8Mi7I3xNKXlRKQSlLBwjUV/ULFMP+G7n3/7tJZ8MG5wsZ3ucxP1Jz8Vevn6fnJsxDx9cIls+utGzPQ==
+  dependencies:
+    adjust-sourcemap-loader "3.0.0"
+    camelcase "5.3.1"
+    compose-function "3.0.3"
+    convert-source-map "1.7.0"
+    es6-iterator "2.0.3"
+    loader-utils "1.2.3"
+    postcss "7.0.21"
+    rework "1.0.1"
+    rework-visit "1.0.0"
+    source-map "0.6.1"
+
+resolve-url@^0.2.1:
+  version "0.2.1"
+  resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a"
+  integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=
+
+resolve@1.18.1:
+  version "1.18.1"
+  resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.18.1.tgz#018fcb2c5b207d2a6424aee361c5a266da8f4130"
+  integrity sha512-lDfCPaMKfOJXjy0dPayzPdF1phampNWr3qFCjAu+rw/qbQmr5jWH5xN2hwh9QKfw9E5v4hwV7A+jrCmL8yjjqA==
+  dependencies:
+    is-core-module "^2.0.0"
+    path-parse "^1.0.6"
+
+resolve@^1.10.0, resolve@^1.12.0, resolve@^1.13.1, resolve@^1.14.2, resolve@^1.17.0, resolve@^1.18.1, resolve@^1.3.2, resolve@^1.8.1:
+  version "1.20.0"
+  resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975"
+  integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==
+  dependencies:
+    is-core-module "^2.2.0"
+    path-parse "^1.0.6"
+
+ret@~0.1.10:
+  version "0.1.15"
+  resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
+  integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==
+
+retry@^0.12.0:
+  version "0.12.0"
+  resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
+  integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=
+
+reusify@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76"
+  integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==
+
+rework-visit@1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/rework-visit/-/rework-visit-1.0.0.tgz#9945b2803f219e2f7aca00adb8bc9f640f842c9a"
+  integrity sha1-mUWygD8hni96ygCtuLyfZA+ELJo=
+
+rework@1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/rework/-/rework-1.0.1.tgz#30806a841342b54510aa4110850cd48534144aa7"
+  integrity sha1-MIBqhBNCtUUQqkEQhQzUhTQUSqc=
+  dependencies:
+    convert-source-map "^0.3.3"
+    css "^2.0.0"
+
+rgb-regex@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/rgb-regex/-/rgb-regex-1.0.1.tgz#c0e0d6882df0e23be254a475e8edd41915feaeb1"
+  integrity sha1-wODWiC3w4jviVKR16O3UGRX+rrE=
+
+rgba-regex@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/rgba-regex/-/rgba-regex-1.0.0.tgz#43374e2e2ca0968b0ef1523460b7d730ff22eeb3"
+  integrity sha1-QzdOLiyglosO8VI0YLfXMP8i7rM=
+
+rimraf@^2.5.4, rimraf@^2.6.3:
+  version "2.7.1"
+  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
+  integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
+  dependencies:
+    glob "^7.1.3"
+
+rimraf@^3.0.0, rimraf@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
+  integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
+  dependencies:
+    glob "^7.1.3"
+
+ripemd160@^2.0.0, ripemd160@^2.0.1:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c"
+  integrity sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==
+  dependencies:
+    hash-base "^3.0.0"
+    inherits "^2.0.1"
+
+rollup-plugin-babel@^4.3.3:
+  version "4.4.0"
+  resolved "https://registry.yarnpkg.com/rollup-plugin-babel/-/rollup-plugin-babel-4.4.0.tgz#d15bd259466a9d1accbdb2fe2fff17c52d030acb"
+  integrity sha512-Lek/TYp1+7g7I+uMfJnnSJ7YWoD58ajo6Oarhlex7lvUce+RCKRuGRSgztDO3/MF/PuGKmUL5iTHKf208UNszw==
+  dependencies:
+    "@babel/helper-module-imports" "^7.0.0"
+    rollup-pluginutils "^2.8.1"
+
+rollup-plugin-terser@^5.3.1:
+  version "5.3.1"
+  resolved "https://registry.yarnpkg.com/rollup-plugin-terser/-/rollup-plugin-terser-5.3.1.tgz#8c650062c22a8426c64268548957463bf981b413"
+  integrity sha512-1pkwkervMJQGFYvM9nscrUoncPwiKR/K+bHdjv6PFgRo3cgPHoRT83y2Aa3GvINj4539S15t/tpFPb775TDs6w==
+  dependencies:
+    "@babel/code-frame" "^7.5.5"
+    jest-worker "^24.9.0"
+    rollup-pluginutils "^2.8.2"
+    serialize-javascript "^4.0.0"
+    terser "^4.6.2"
+
+rollup-pluginutils@^2.8.1, rollup-pluginutils@^2.8.2:
+  version "2.8.2"
+  resolved "https://registry.yarnpkg.com/rollup-pluginutils/-/rollup-pluginutils-2.8.2.tgz#72f2af0748b592364dbd3389e600e5a9444a351e"
+  integrity sha512-EEp9NhnUkwY8aif6bxgovPHMoMoNr2FulJziTndpt5H9RdwC47GSGuII9XxpSdzVGM0GWrNPHV6ie1LTNJPaLQ==
+  dependencies:
+    estree-walker "^0.6.1"
+
+rollup@^1.31.1:
+  version "1.32.1"
+  resolved "https://registry.yarnpkg.com/rollup/-/rollup-1.32.1.tgz#4480e52d9d9e2ae4b46ba0d9ddeaf3163940f9c4"
+  integrity sha512-/2HA0Ec70TvQnXdzynFffkjA6XN+1e2pEv/uKS5Ulca40g2L7KuOE3riasHoNVHOsFD5KKZgDsMk1CP3Tw9s+A==
+  dependencies:
+    "@types/estree" "*"
+    "@types/node" "*"
+    acorn "^7.1.0"
+
+rsvp@^4.8.4:
+  version "4.8.5"
+  resolved "https://registry.yarnpkg.com/rsvp/-/rsvp-4.8.5.tgz#c8f155311d167f68f21e168df71ec5b083113734"
+  integrity sha512-nfMOlASu9OnRJo1mbEk2cz0D56a1MBNrJ7orjRZQG10XDyuvwksKbuXNp6qa+kbn839HwjwhBzhFmdsaEAfauA==
+
+run-parallel@^1.1.9:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
+  integrity sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==
+  dependencies:
+    queue-microtask "^1.2.2"
+
+run-queue@^1.0.0, run-queue@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/run-queue/-/run-queue-1.0.3.tgz#e848396f057d223f24386924618e25694161ec47"
+  integrity sha1-6Eg5bwV9Ij8kOGkkYY4laUFh7Ec=
+  dependencies:
+    aproba "^1.1.1"
+
+safe-buffer@5.1.2, safe-buffer@~5.1.0, safe-buffer@~5.1.1:
+  version "5.1.2"
+  resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
+  integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
+
+safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@^5.2.0, safe-buffer@~5.2.0:
+  version "5.2.1"
+  resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
+  integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
+
+safe-regex@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e"
+  integrity sha1-QKNmnzsHfR6UPURinhV91IAjvy4=
+  dependencies:
+    ret "~0.1.10"
+
+"safer-buffer@>= 2.1.2 < 3", safer-buffer@^2.0.2, safer-buffer@^2.1.0, safer-buffer@~2.1.0:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
+  integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
+
+sane@^4.0.3:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/sane/-/sane-4.1.0.tgz#ed881fd922733a6c461bc189dc2b6c006f3ffded"
+  integrity sha512-hhbzAgTIX8O7SHfp2c8/kREfEn4qO/9q8C9beyY6+tvZ87EpoZ3i1RIEvp27YBswnNbY9mWd6paKVmKbAgLfZA==
+  dependencies:
+    "@cnakazawa/watch" "^1.0.3"
+    anymatch "^2.0.0"
+    capture-exit "^2.0.0"
+    exec-sh "^0.3.2"
+    execa "^1.0.0"
+    fb-watchman "^2.0.0"
+    micromatch "^3.1.4"
+    minimist "^1.1.1"
+    walker "~1.0.5"
+
+sanitize.css@^10.0.0:
+  version "10.0.0"
+  resolved "https://registry.yarnpkg.com/sanitize.css/-/sanitize.css-10.0.0.tgz#b5cb2547e96d8629a60947544665243b1dc3657a"
+  integrity sha512-vTxrZz4dX5W86M6oVWVdOVe72ZiPs41Oi7Z6Km4W5Turyz28mrXSJhhEBZoRtzJWIv3833WKVwLSDWWkEfupMg==
+
+sass-loader@^10.0.5:
+  version "10.1.1"
+  resolved "https://registry.yarnpkg.com/sass-loader/-/sass-loader-10.1.1.tgz#4ddd5a3d7638e7949065dd6e9c7c04037f7e663d"
+  integrity sha512-W6gVDXAd5hR/WHsPicvZdjAWHBcEJ44UahgxcIE196fW2ong0ZHMPO1kZuI5q0VlvMQZh32gpv69PLWQm70qrw==
+  dependencies:
+    klona "^2.0.4"
+    loader-utils "^2.0.0"
+    neo-async "^2.6.2"
+    schema-utils "^3.0.0"
+    semver "^7.3.2"
+
+sax@~1.2.4:
+  version "1.2.4"
+  resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
+  integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==
+
+saxes@^5.0.0:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d"
+  integrity sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw==
+  dependencies:
+    xmlchars "^2.2.0"
+
+scheduler@^0.20.1:
+  version "0.20.1"
+  resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.20.1.tgz#da0b907e24026b01181ecbc75efdc7f27b5a000c"
+  integrity sha512-LKTe+2xNJBNxu/QhHvDR14wUXHRQbVY5ZOYpOGWRzhydZUqrLb2JBvLPY7cAqFmqrWuDED0Mjk7013SZiOz6Bw==
+  dependencies:
+    loose-envify "^1.1.0"
+    object-assign "^4.1.1"
+
+schema-utils@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-1.0.0.tgz#0b79a93204d7b600d4b2850d1f66c2a34951c770"
+  integrity sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==
+  dependencies:
+    ajv "^6.1.0"
+    ajv-errors "^1.0.0"
+    ajv-keywords "^3.1.0"
+
+schema-utils@^2.6.5, schema-utils@^2.7.0, schema-utils@^2.7.1:
+  version "2.7.1"
+  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-2.7.1.tgz#1ca4f32d1b24c590c203b8e7a50bf0ea4cd394d7"
+  integrity sha512-SHiNtMOUGWBQJwzISiVYKu82GiV4QYGePp3odlY1tuKO7gPtphAT5R/py0fA6xtbgLL/RvtJZnU9b8s0F1q0Xg==
+  dependencies:
+    "@types/json-schema" "^7.0.5"
+    ajv "^6.12.4"
+    ajv-keywords "^3.5.2"
+
+schema-utils@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-3.0.0.tgz#67502f6aa2b66a2d4032b4279a2944978a0913ef"
+  integrity sha512-6D82/xSzO094ajanoOSbe4YvXWMfn2A//8Y1+MUqFAJul5Bs+yn36xbK9OtNDcRVSBJ9jjeoXftM6CfztsjOAA==
+  dependencies:
+    "@types/json-schema" "^7.0.6"
+    ajv "^6.12.5"
+    ajv-keywords "^3.5.2"
+
+select-hose@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/select-hose/-/select-hose-2.0.0.tgz#625d8658f865af43ec962bfc376a37359a4994ca"
+  integrity sha1-Yl2GWPhlr0Psliv8N2o3NZpJlMo=
+
+selfsigned@^1.10.8:
+  version "1.10.8"
+  resolved "https://registry.yarnpkg.com/selfsigned/-/selfsigned-1.10.8.tgz#0d17208b7d12c33f8eac85c41835f27fc3d81a30"
+  integrity sha512-2P4PtieJeEwVgTU9QEcwIRDQ/mXJLX8/+I3ur+Pg16nS8oNbrGxEso9NyYWy8NAmXiNl4dlAp5MwoNeCWzON4w==
+  dependencies:
+    node-forge "^0.10.0"
+
+"semver@2 || 3 || 4 || 5", semver@^5.4.1, semver@^5.5.0, semver@^5.5.1, semver@^5.6.0:
+  version "5.7.1"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
+  integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
+
+semver@7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e"
+  integrity sha512-+GB6zVA9LWh6zovYQLALHwv5rb2PHGlJi3lfiqIHxR0uuwCgefcOJc59v9fv1w8GbStwxuuqqAjI9NMAOOgq1A==
+
+semver@7.3.2:
+  version "7.3.2"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.2.tgz#604962b052b81ed0786aae84389ffba70ffd3938"
+  integrity sha512-OrOb32TeeambH6UrhtShmF7CRDqhL6/5XpPNp2DuRH6+9QLw/orhp72j87v8Qa1ScDkvrrBNpZcDejAirJmfXQ==
+
+semver@^6.0.0, semver@^6.3.0:
+  version "6.3.0"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
+  integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
+
+semver@^7.2.1, semver@^7.3.2:
+  version "7.3.4"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.4.tgz#27aaa7d2e4ca76452f98d3add093a72c943edc97"
+  integrity sha512-tCfb2WLjqFAtXn4KEdxIhalnRtoKFN7nAwj0B3ZXCbQloV2tq5eDbcTmT68JJD3nRJq24/XgxtQKFIpQdtvmVw==
+  dependencies:
+    lru-cache "^6.0.0"
+
+send@0.17.1:
+  version "0.17.1"
+  resolved "https://registry.yarnpkg.com/send/-/send-0.17.1.tgz#c1d8b059f7900f7466dd4938bdc44e11ddb376c8"
+  integrity sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==
+  dependencies:
+    debug "2.6.9"
+    depd "~1.1.2"
+    destroy "~1.0.4"
+    encodeurl "~1.0.2"
+    escape-html "~1.0.3"
+    etag "~1.8.1"
+    fresh "0.5.2"
+    http-errors "~1.7.2"
+    mime "1.6.0"
+    ms "2.1.1"
+    on-finished "~2.3.0"
+    range-parser "~1.2.1"
+    statuses "~1.5.0"
+
+serialize-javascript@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-4.0.0.tgz#b525e1238489a5ecfc42afacc3fe99e666f4b1aa"
+  integrity sha512-GaNA54380uFefWghODBWEGisLZFj00nS5ACs6yHa9nLqlLpVLO8ChDGeKRjZnV4Nh4n0Qi7nhYZD/9fCPzEqkw==
+  dependencies:
+    randombytes "^2.1.0"
+
+serialize-javascript@^5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
+  integrity sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==
+  dependencies:
+    randombytes "^2.1.0"
+
+serve-index@^1.9.1:
+  version "1.9.1"
+  resolved "https://registry.yarnpkg.com/serve-index/-/serve-index-1.9.1.tgz#d3768d69b1e7d82e5ce050fff5b453bea12a9239"
+  integrity sha1-03aNabHn2C5c4FD/9bRTvqEqkjk=
+  dependencies:
+    accepts "~1.3.4"
+    batch "0.6.1"
+    debug "2.6.9"
+    escape-html "~1.0.3"
+    http-errors "~1.6.2"
+    mime-types "~2.1.17"
+    parseurl "~1.3.2"
+
+serve-static@1.14.1:
+  version "1.14.1"
+  resolved "https://registry.yarnpkg.com/serve-static/-/serve-static-1.14.1.tgz#666e636dc4f010f7ef29970a88a674320898b2f9"
+  integrity sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==
+  dependencies:
+    encodeurl "~1.0.2"
+    escape-html "~1.0.3"
+    parseurl "~1.3.3"
+    send "0.17.1"
+
+set-blocking@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
+  integrity sha1-BF+XgtARrppoA93TgrJDkrPYkPc=
+
+set-value@^2.0.0, set-value@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/set-value/-/set-value-2.0.1.tgz#a18d40530e6f07de4228c7defe4227af8cad005b"
+  integrity sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw==
+  dependencies:
+    extend-shallow "^2.0.1"
+    is-extendable "^0.1.1"
+    is-plain-object "^2.0.3"
+    split-string "^3.0.1"
+
+setimmediate@^1.0.4:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285"
+  integrity sha1-KQy7Iy4waULX1+qbg3Mqt4VvgoU=
+
+setprototypeof@1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.0.tgz#d0bd85536887b6fe7c0d818cb962d9d91c54e656"
+  integrity sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ==
+
+setprototypeof@1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.1.tgz#7e95acb24aa92f5885e0abef5ba131330d4ae683"
+  integrity sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==
+
+sha.js@^2.4.0, sha.js@^2.4.8:
+  version "2.4.11"
+  resolved "https://registry.yarnpkg.com/sha.js/-/sha.js-2.4.11.tgz#37a5cf0b81ecbc6943de109ba2960d1b26584ae7"
+  integrity sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==
+  dependencies:
+    inherits "^2.0.1"
+    safe-buffer "^5.0.1"
+
+shebang-command@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-1.2.0.tgz#44aac65b695b03398968c39f363fee5deafdf1ea"
+  integrity sha1-RKrGW2lbAzmJaMOfNj/uXer98eo=
+  dependencies:
+    shebang-regex "^1.0.0"
+
+shebang-command@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
+  integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==
+  dependencies:
+    shebang-regex "^3.0.0"
+
+shebang-regex@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3"
+  integrity sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM=
+
+shebang-regex@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
+  integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
+
+shell-quote@1.7.2:
+  version "1.7.2"
+  resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.7.2.tgz#67a7d02c76c9da24f99d20808fcaded0e0e04be2"
+  integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==
+
+shellwords@^0.1.1:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b"
+  integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==
+
+side-channel@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf"
+  integrity sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==
+  dependencies:
+    call-bind "^1.0.0"
+    get-intrinsic "^1.0.2"
+    object-inspect "^1.9.0"
+
+signal-exit@^3.0.0, signal-exit@^3.0.2:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.3.tgz#a1410c2edd8f077b08b4e253c8eacfcaf057461c"
+  integrity sha512-VUJ49FC8U1OxwZLxIbTTrDvLnf/6TDgxZcK8wxR8zs13xpx7xbG60ndBlhNrFi2EMuFRoeDoJO7wthSLq42EjA==
+
+simple-swizzle@^0.2.2:
+  version "0.2.2"
+  resolved "https://registry.yarnpkg.com/simple-swizzle/-/simple-swizzle-0.2.2.tgz#a4da6b635ffcccca33f70d17cb92592de95e557a"
+  integrity sha1-pNprY1/8zMoz9w0Xy5JZLeleVXo=
+  dependencies:
+    is-arrayish "^0.3.1"
+
+sisteransi@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/sisteransi/-/sisteransi-1.0.5.tgz#134d681297756437cc05ca01370d3a7a571075ed"
+  integrity sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==
+
+slash@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
+  integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
+
+slice-ansi@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b"
+  integrity sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==
+  dependencies:
+    ansi-styles "^4.0.0"
+    astral-regex "^2.0.0"
+    is-fullwidth-code-point "^3.0.0"
+
+snapdragon-node@^2.0.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/snapdragon-node/-/snapdragon-node-2.1.1.tgz#6c175f86ff14bdb0724563e8f3c1b021a286853b"
+  integrity sha512-O27l4xaMYt/RSQ5TR3vpWCAB5Kb/czIcqUFOM/C4fYcLnbZUc1PkjTAMjof2pBWaSTwOUd6qUHcFGVGj7aIwnw==
+  dependencies:
+    define-property "^1.0.0"
+    isobject "^3.0.0"
+    snapdragon-util "^3.0.1"
+
+snapdragon-util@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/snapdragon-util/-/snapdragon-util-3.0.1.tgz#f956479486f2acd79700693f6f7b805e45ab56e2"
+  integrity sha512-mbKkMdQKsjX4BAL4bRYTj21edOf8cN7XHdYUJEe+Zn99hVEYcMvKPct1IqNe7+AZPirn8BCDOQBHQZknqmKlZQ==
+  dependencies:
+    kind-of "^3.2.0"
+
+snapdragon@^0.8.1:
+  version "0.8.2"
+  resolved "https://registry.yarnpkg.com/snapdragon/-/snapdragon-0.8.2.tgz#64922e7c565b0e14204ba1aa7d6964278d25182d"
+  integrity sha512-FtyOnWN/wCHTVXOMwvSv26d+ko5vWlIDD6zoUJ7LW8vh+ZBC8QdljveRP+crNrtBwioEUWy/4dMtbBjA4ioNlg==
+  dependencies:
+    base "^0.11.1"
+    debug "^2.2.0"
+    define-property "^0.2.5"
+    extend-shallow "^2.0.1"
+    map-cache "^0.2.2"
+    source-map "^0.5.6"
+    source-map-resolve "^0.5.0"
+    use "^3.1.0"
+
+sockjs-client@^1.5.0:
+  version "1.5.0"
+  resolved "https://registry.yarnpkg.com/sockjs-client/-/sockjs-client-1.5.0.tgz#2f8ff5d4b659e0d092f7aba0b7c386bd2aa20add"
+  integrity sha512-8Dt3BDi4FYNrCFGTL/HtwVzkARrENdwOUf1ZoW/9p3M8lZdFT35jVdrHza+qgxuG9H3/shR4cuX/X9umUrjP8Q==
+  dependencies:
+    debug "^3.2.6"
+    eventsource "^1.0.7"
+    faye-websocket "^0.11.3"
+    inherits "^2.0.4"
+    json3 "^3.3.3"
+    url-parse "^1.4.7"
+
+sockjs@^0.3.21:
+  version "0.3.21"
+  resolved "https://registry.yarnpkg.com/sockjs/-/sockjs-0.3.21.tgz#b34ffb98e796930b60a0cfa11904d6a339a7d417"
+  integrity sha512-DhbPFGpxjc6Z3I+uX07Id5ZO2XwYsWOrYjaSeieES78cq+JaJvVe5q/m1uvjIQhXinhIeCFRH6JgXe+mvVMyXw==
+  dependencies:
+    faye-websocket "^0.11.3"
+    uuid "^3.4.0"
+    websocket-driver "^0.7.4"
+
+sort-keys@^1.0.0:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-1.1.2.tgz#441b6d4d346798f1b4e49e8920adfba0e543f9ad"
+  integrity sha1-RBttTTRnmPG05J6JIK37oOVD+a0=
+  dependencies:
+    is-plain-obj "^1.0.0"
+
+source-list-map@^2.0.0:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/source-list-map/-/source-list-map-2.0.1.tgz#3993bd873bfc48479cca9ea3a547835c7c154b34"
+  integrity sha512-qnQ7gVMxGNxsiL4lEuJwe/To8UnK7fAnmbGEEH8RpLouuKbeEm0lhbQVFIrNSuB+G7tVrAlVsZgETT5nljf+Iw==
+
+source-map-resolve@^0.5.0, source-map-resolve@^0.5.2:
+  version "0.5.3"
+  resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a"
+  integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw==
+  dependencies:
+    atob "^2.1.2"
+    decode-uri-component "^0.2.0"
+    resolve-url "^0.2.1"
+    source-map-url "^0.4.0"
+    urix "^0.1.0"
+
+source-map-resolve@^0.6.0:
+  version "0.6.0"
+  resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.6.0.tgz#3d9df87e236b53f16d01e58150fc7711138e5ed2"
+  integrity sha512-KXBr9d/fO/bWo97NXsPIAW1bFSBOuCnjbNTBMO7N59hsv5i9yzRDfcYwwt0l04+VqnKC+EwzvJZIP/qkuMgR/w==
+  dependencies:
+    atob "^2.1.2"
+    decode-uri-component "^0.2.0"
+
+source-map-support@^0.5.6, source-map-support@~0.5.12, source-map-support@~0.5.19:
+  version "0.5.19"
+  resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
+  integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
+  dependencies:
+    buffer-from "^1.0.0"
+    source-map "^0.6.0"
+
+source-map-url@^0.4.0:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/source-map-url/-/source-map-url-0.4.1.tgz#0af66605a745a5a2f91cf1bbf8a7afbc283dec56"
+  integrity sha512-cPiFOTLUKvJFIg4SKVScy4ilPPW6rFgMgfuZJPNoDuMs3nC1HbMUycBoJw77xFIp6z1UJQJOfx6C9GMH80DiTw==
+
+source-map@0.6.1, source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.0, source-map@~0.6.1:
+  version "0.6.1"
+  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
+  integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
+
+source-map@^0.5.0, source-map@^0.5.6, source-map@^0.5.7:
+  version "0.5.7"
+  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"
+  integrity sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=
+
+source-map@^0.7.3, source-map@~0.7.2:
+  version "0.7.3"
+  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383"
+  integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==
+
+sourcemap-codec@^1.4.4:
+  version "1.4.8"
+  resolved "https://registry.yarnpkg.com/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz#ea804bd94857402e6992d05a38ef1ae35a9ab4c4"
+  integrity sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==
+
+spdx-correct@^3.0.0:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.1.1.tgz#dece81ac9c1e6713e5f7d1b6f17d468fa53d89a9"
+  integrity sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==
+  dependencies:
+    spdx-expression-parse "^3.0.0"
+    spdx-license-ids "^3.0.0"
+
+spdx-exceptions@^2.1.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz#3f28ce1a77a00372683eade4a433183527a2163d"
+  integrity sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==
+
+spdx-expression-parse@^3.0.0:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz#cf70f50482eefdc98e3ce0a6833e4a53ceeba679"
+  integrity sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==
+  dependencies:
+    spdx-exceptions "^2.1.0"
+    spdx-license-ids "^3.0.0"
+
+spdx-license-ids@^3.0.0:
+  version "3.0.7"
+  resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.7.tgz#e9c18a410e5ed7e12442a549fbd8afa767038d65"
+  integrity sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==
+
+spdy-transport@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/spdy-transport/-/spdy-transport-3.0.0.tgz#00d4863a6400ad75df93361a1608605e5dcdcf31"
+  integrity sha512-hsLVFE5SjA6TCisWeJXFKniGGOpBgMLmerfO2aCyCU5s7nJ/rpAepqmFifv/GCbSbueEeAJJnmSQ2rKC/g8Fcw==
+  dependencies:
+    debug "^4.1.0"
+    detect-node "^2.0.4"
+    hpack.js "^2.1.6"
+    obuf "^1.1.2"
+    readable-stream "^3.0.6"
+    wbuf "^1.7.3"
+
+spdy@^4.0.2:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/spdy/-/spdy-4.0.2.tgz#b74f466203a3eda452c02492b91fb9e84a27677b"
+  integrity sha512-r46gZQZQV+Kl9oItvl1JZZqJKGr+oEkB08A6BzkiR7593/7IbtuncXHd2YoYeTsG4157ZssMu9KYvUHLcjcDoA==
+  dependencies:
+    debug "^4.1.0"
+    handle-thing "^2.0.0"
+    http-deceiver "^1.2.7"
+    select-hose "^2.0.0"
+    spdy-transport "^3.0.0"
+
+split-string@^3.0.1, split-string@^3.0.2:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2"
+  integrity sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw==
+  dependencies:
+    extend-shallow "^3.0.0"
+
+sprintf-js@~1.0.2:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
+  integrity sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=
+
+sshpk@^1.7.0:
+  version "1.16.1"
+  resolved "https://registry.yarnpkg.com/sshpk/-/sshpk-1.16.1.tgz#fb661c0bef29b39db40769ee39fa70093d6f6877"
+  integrity sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==
+  dependencies:
+    asn1 "~0.2.3"
+    assert-plus "^1.0.0"
+    bcrypt-pbkdf "^1.0.0"
+    dashdash "^1.12.0"
+    ecc-jsbn "~0.1.1"
+    getpass "^0.1.1"
+    jsbn "~0.1.0"
+    safer-buffer "^2.0.2"
+    tweetnacl "~0.14.0"
+
+ssri@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.1.tgz#2a3c41b28dd45b62b63676ecb74001265ae9edd8"
+  integrity sha512-3Wge10hNcT1Kur4PDFwEieXSCMCJs/7WvSACcrMYrNp+b8kDL1/0wJch5Ni2WrtwEa2IO8OsVfeKIciKCDx/QA==
+  dependencies:
+    figgy-pudding "^3.5.1"
+
+ssri@^8.0.0:
+  version "8.0.1"
+  resolved "https://registry.yarnpkg.com/ssri/-/ssri-8.0.1.tgz#638e4e439e2ffbd2cd289776d5ca457c4f51a2af"
+  integrity sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==
+  dependencies:
+    minipass "^3.1.1"
+
+stable@^0.1.8:
+  version "0.1.8"
+  resolved "https://registry.yarnpkg.com/stable/-/stable-0.1.8.tgz#836eb3c8382fe2936feaf544631017ce7d47a3cf"
+  integrity sha512-ji9qxRnOVfcuLDySj9qzhGSEFVobyt1kIOSkj1qZzYLzq7Tos/oUUWvotUPQLlrsidqsK6tBH89Bc9kL5zHA6w==
+
+stack-utils@^2.0.2:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-2.0.3.tgz#cd5f030126ff116b78ccb3c027fe302713b61277"
+  integrity sha512-gL//fkxfWUsIlFL2Tl42Cl6+HFALEaB1FU76I/Fy+oZjRreP7OPMXFlGbxM7NQsI0ZpUfw76sHnv0WNYuTb7Iw==
+  dependencies:
+    escape-string-regexp "^2.0.0"
+
+stackframe@^1.1.1:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/stackframe/-/stackframe-1.2.0.tgz#52429492d63c62eb989804c11552e3d22e779303"
+  integrity sha512-GrdeshiRmS1YLMYgzF16olf2jJ/IzxXY9lhKOskuVziubpTYcYqyOwYeJKzQkwy7uN0fYSsbsC4RQaXf9LCrYA==
+
+static-extend@^0.1.1:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6"
+  integrity sha1-YICcOcv/VTNyJv1eC1IPNB8ftcY=
+  dependencies:
+    define-property "^0.2.5"
+    object-copy "^0.1.0"
+
+"statuses@>= 1.4.0 < 2", "statuses@>= 1.5.0 < 2", statuses@~1.5.0:
+  version "1.5.0"
+  resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.5.0.tgz#161c7dac177659fd9811f43771fa99381478628c"
+  integrity sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=
+
+stealthy-require@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
+  integrity sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=
+
+stream-browserify@^2.0.1:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.2.tgz#87521d38a44aa7ee91ce1cd2a47df0cb49dd660b"
+  integrity sha512-nX6hmklHs/gr2FuxYDltq8fJA1GDlxKQCz8O/IM4atRqBH8OORmBNgfvW5gG10GT/qQ9u0CzIvr2X5Pkt6ntqg==
+  dependencies:
+    inherits "~2.0.1"
+    readable-stream "^2.0.2"
+
+stream-each@^1.1.0:
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/stream-each/-/stream-each-1.2.3.tgz#ebe27a0c389b04fbcc233642952e10731afa9bae"
+  integrity sha512-vlMC2f8I2u/bZGqkdfLQW/13Zihpej/7PmSiMQsbYddxuTsJp8vRe2x2FvVExZg7FaOds43ROAuFJwPR4MTZLw==
+  dependencies:
+    end-of-stream "^1.1.0"
+    stream-shift "^1.0.0"
+
+stream-http@^2.7.2:
+  version "2.8.3"
+  resolved "https://registry.yarnpkg.com/stream-http/-/stream-http-2.8.3.tgz#b2d242469288a5a27ec4fe8933acf623de6514fc"
+  integrity sha512-+TSkfINHDo4J+ZobQLWiMouQYB+UVYFttRA94FpEzzJ7ZdqcL4uUUQ7WkdkI4DSozGmgBUE/a47L+38PenXhUw==
+  dependencies:
+    builtin-status-codes "^3.0.0"
+    inherits "^2.0.1"
+    readable-stream "^2.3.6"
+    to-arraybuffer "^1.0.0"
+    xtend "^4.0.0"
+
+stream-shift@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.1.tgz#d7088281559ab2778424279b0877da3c392d5a3d"
+  integrity sha512-AiisoFqQ0vbGcZgQPY1cdP2I76glaVA/RauYR4G4thNFgkTqr90yXTo4LYX60Jl+sIlPNHHdGSwo01AvbKUSVQ==
+
+strict-uri-encode@^1.0.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz#279b225df1d582b1f54e65addd4352e18faa0713"
+  integrity sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM=
+
+string-length@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/string-length/-/string-length-4.0.1.tgz#4a973bf31ef77c4edbceadd6af2611996985f8a1"
+  integrity sha512-PKyXUd0LK0ePjSOnWn34V2uD6acUWev9uy0Ft05k0E8xRW+SKcA0F7eMr7h5xlzfn+4O3N+55rduYyet3Jk+jw==
+  dependencies:
+    char-regex "^1.0.2"
+    strip-ansi "^6.0.0"
+
+string-natural-compare@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/string-natural-compare/-/string-natural-compare-3.0.1.tgz#7a42d58474454963759e8e8b7ae63d71c1e7fdf4"
+  integrity sha512-n3sPwynL1nwKi3WJ6AIsClwBMa0zTi54fn2oLU6ndfTSIO05xaznjSf15PcBZU6FNWbmN5Q6cxT4V5hGvB4taw==
+
+string-width@^3.0.0, string-width@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/string-width/-/string-width-3.1.0.tgz#22767be21b62af1081574306f69ac51b62203961"
+  integrity sha512-vafcv6KjVZKSgz06oM/H6GDBrAtz8vdhQakGjFIvNrHA6y3HCF1CInLy+QLq8dTJPQ1b+KDUqDFctkdRW44e1w==
+  dependencies:
+    emoji-regex "^7.0.1"
+    is-fullwidth-code-point "^2.0.0"
+    strip-ansi "^5.1.0"
+
+string-width@^4.1.0, string-width@^4.2.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.0.tgz#952182c46cc7b2c313d1596e623992bd163b72b5"
+  integrity sha512-zUz5JD+tgqtuDjMhwIg5uFVV3dtqZ9yQJlZVfq4I01/K5Paj5UHj7VyrQOJvzawSVlKpObApbfD0Ed6yJc+1eg==
+  dependencies:
+    emoji-regex "^8.0.0"
+    is-fullwidth-code-point "^3.0.0"
+    strip-ansi "^6.0.0"
+
+string.prototype.matchall@^4.0.2:
+  version "4.0.4"
+  resolved "https://registry.yarnpkg.com/string.prototype.matchall/-/string.prototype.matchall-4.0.4.tgz#608f255e93e072107f5de066f81a2dfb78cf6b29"
+  integrity sha512-pknFIWVachNcyqRfaQSeu/FUfpvJTe4uskUSZ9Wc1RijsPuzbZ8TyYT8WCNnntCjUEqQ3vUHMAfVj2+wLAisPQ==
+  dependencies:
+    call-bind "^1.0.2"
+    define-properties "^1.1.3"
+    es-abstract "^1.18.0-next.2"
+    has-symbols "^1.0.1"
+    internal-slot "^1.0.3"
+    regexp.prototype.flags "^1.3.1"
+    side-channel "^1.0.4"
+
+string.prototype.trimend@^1.0.1, string.prototype.trimend@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.3.tgz#a22bd53cca5c7cf44d7c9d5c732118873d6cd18b"
+  integrity sha512-ayH0pB+uf0U28CtjlLvL7NaohvR1amUvVZk+y3DYb0Ey2PUV5zPkkKy9+U1ndVEIXO8hNg18eIv9Jntbii+dKw==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+
+string.prototype.trimstart@^1.0.1, string.prototype.trimstart@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/string.prototype.trimstart/-/string.prototype.trimstart-1.0.3.tgz#9b4cb590e123bb36564401d59824298de50fd5aa"
+  integrity sha512-oBIBUy5lea5tt0ovtOFiEQaBkoBBkyJhZXzJYrSmDo5IUUqbOPvVezuRs/agBIdZ2p2Eo1FD6bD9USyBLfl3xg==
+  dependencies:
+    call-bind "^1.0.0"
+    define-properties "^1.1.3"
+
+string_decoder@^1.0.0, string_decoder@^1.1.1:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
+  integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
+  dependencies:
+    safe-buffer "~5.2.0"
+
+string_decoder@~1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
+  integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==
+  dependencies:
+    safe-buffer "~5.1.0"
+
+stringify-object@^3.3.0:
+  version "3.3.0"
+  resolved "https://registry.yarnpkg.com/stringify-object/-/stringify-object-3.3.0.tgz#703065aefca19300d3ce88af4f5b3956d7556629"
+  integrity sha512-rHqiFh1elqCQ9WPLIC8I0Q/g/wj5J1eMkyoiD6eoQApWHP0FtlK7rqnhmabL5VUY9JQCcqwwvlOaSuutekgyrw==
+  dependencies:
+    get-own-enumerable-property-symbols "^3.0.0"
+    is-obj "^1.0.1"
+    is-regexp "^1.0.0"
+
+strip-ansi@6.0.0, strip-ansi@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.0.tgz#0b1571dd7669ccd4f3e06e14ef1eed26225ae532"
+  integrity sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==
+  dependencies:
+    ansi-regex "^5.0.0"
+
+strip-ansi@^3.0.0, strip-ansi@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-3.0.1.tgz#6a385fb8853d952d5ff05d0e8aaf94278dc63dcf"
+  integrity sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=
+  dependencies:
+    ansi-regex "^2.0.0"
+
+strip-ansi@^5.0.0, strip-ansi@^5.1.0, strip-ansi@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-5.2.0.tgz#8c9a536feb6afc962bdfa5b104a5091c1ad9c0ae"
+  integrity sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==
+  dependencies:
+    ansi-regex "^4.1.0"
+
+strip-bom@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3"
+  integrity sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=
+
+strip-bom@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-4.0.0.tgz#9c3505c1db45bcedca3d9cf7a16f5c5aa3901878"
+  integrity sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==
+
+strip-comments@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/strip-comments/-/strip-comments-1.0.2.tgz#82b9c45e7f05873bee53f37168af930aa368679d"
+  integrity sha512-kL97alc47hoyIQSV165tTt9rG5dn4w1dNnBhOQ3bOU1Nc1hel09jnXANaHJ7vzHLd4Ju8kseDGzlev96pghLFw==
+  dependencies:
+    babel-extract-comments "^1.0.0"
+    babel-plugin-transform-object-rest-spread "^6.26.0"
+
+strip-eof@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/strip-eof/-/strip-eof-1.0.0.tgz#bb43ff5598a6eb05d89b59fcd129c983313606bf"
+  integrity sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=
+
+strip-final-newline@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
+  integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
+
+strip-indent@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001"
+  integrity sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==
+  dependencies:
+    min-indent "^1.0.0"
+
+strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
+  integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
+
+style-loader@1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/style-loader/-/style-loader-1.3.0.tgz#828b4a3b3b7e7aa5847ce7bae9e874512114249e"
+  integrity sha512-V7TCORko8rs9rIqkSrlMfkqA63DfoGBBJmK1kKGCcSi+BWb4cqz0SRsnp4l6rU5iwOEd0/2ePv68SV22VXon4Q==
+  dependencies:
+    loader-utils "^2.0.0"
+    schema-utils "^2.7.0"
+
+style-value-types@4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/style-value-types/-/style-value-types-4.0.3.tgz#3e2e46c50e876757cba02f442c8a0b0dd970c118"
+  integrity sha512-Yk2kpwC88W2HRlJXegWlT0pfLzjKWMjj8DI4s6m2VsZsL1Ht2oUyHl1EgTYIRlFiAnC4rBSQO+EEn0YiYAxQDw==
+  dependencies:
+    hey-listen "^1.0.8"
+    tslib "^1.10.0"
+
+stylehacks@^4.0.0:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/stylehacks/-/stylehacks-4.0.3.tgz#6718fcaf4d1e07d8a1318690881e8d96726a71d5"
+  integrity sha512-7GlLk9JwlElY4Y6a/rmbH2MhVlTyVmiJd1PfTCqFaIBEGMYNsrO/v3SeGTdhBThLg4Z+NbOk/qFMwCa+J+3p/g==
+  dependencies:
+    browserslist "^4.0.0"
+    postcss "^7.0.0"
+    postcss-selector-parser "^3.0.0"
+
+stylis@^4.0.3:
+  version "4.0.7"
+  resolved "https://registry.yarnpkg.com/stylis/-/stylis-4.0.7.tgz#412a90c28079417f3d27c028035095e4232d2904"
+  integrity sha512-OFFeUXFgwnGOKvEXaSv0D0KQ5ADP0n6g3SVONx6I/85JzNZ3u50FRwB3lVIk1QO2HNdI75tbVzc4Z66Gdp9voA==
+
+supports-color@^5.3.0:
+  version "5.5.0"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
+  integrity sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==
+  dependencies:
+    has-flag "^3.0.0"
+
+supports-color@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-6.1.0.tgz#0764abc69c63d5ac842dd4867e8d025e880df8f3"
+  integrity sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==
+  dependencies:
+    has-flag "^3.0.0"
+
+supports-color@^7.0.0, supports-color@^7.1.0:
+  version "7.2.0"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
+  integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
+  dependencies:
+    has-flag "^4.0.0"
+
+supports-hyperlinks@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/supports-hyperlinks/-/supports-hyperlinks-2.1.0.tgz#f663df252af5f37c5d49bbd7eeefa9e0b9e59e47"
+  integrity sha512-zoE5/e+dnEijk6ASB6/qrK+oYdm2do1hjoLWrqUC/8WEIW1gbxFcKuBof7sW8ArN6e+AYvsE8HBGiVRWL/F5CA==
+  dependencies:
+    has-flag "^4.0.0"
+    supports-color "^7.0.0"
+
+svg-parser@^2.0.2:
+  version "2.0.4"
+  resolved "https://registry.yarnpkg.com/svg-parser/-/svg-parser-2.0.4.tgz#fdc2e29e13951736140b76cb122c8ee6630eb6b5"
+  integrity sha512-e4hG1hRwoOdRb37cIMSgzNsxyzKfayW6VOflrwvR+/bzrkyxY/31WkbgnQpgtrNp1SdpJvpUAGTa/ZoiPNDuRQ==
+
+svgo@^1.0.0, svgo@^1.2.2:
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/svgo/-/svgo-1.3.2.tgz#b6dc511c063346c9e415b81e43401145b96d4167"
+  integrity sha512-yhy/sQYxR5BkC98CY7o31VGsg014AKLEPxdfhora76l36hD9Rdy5NZA/Ocn6yayNPgSamYdtX2rFJdcv07AYVw==
+  dependencies:
+    chalk "^2.4.1"
+    coa "^2.0.2"
+    css-select "^2.0.0"
+    css-select-base-adapter "^0.1.1"
+    css-tree "1.0.0-alpha.37"
+    csso "^4.0.2"
+    js-yaml "^3.13.1"
+    mkdirp "~0.5.1"
+    object.values "^1.1.0"
+    sax "~1.2.4"
+    stable "^0.1.8"
+    unquote "~1.1.1"
+    util.promisify "~1.0.0"
+
+symbol-tree@^3.2.4:
+  version "3.2.4"
+  resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.4.tgz#430637d248ba77e078883951fb9aa0eed7c63fa2"
+  integrity sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==
+
+table@^6.0.4:
+  version "6.0.7"
+  resolved "https://registry.yarnpkg.com/table/-/table-6.0.7.tgz#e45897ffbcc1bcf9e8a87bf420f2c9e5a7a52a34"
+  integrity sha512-rxZevLGTUzWna/qBLObOe16kB2RTnnbhciwgPbMMlazz1yZGVEgnZK762xyVdVznhqxrfCeBMmMkgOOaPwjH7g==
+  dependencies:
+    ajv "^7.0.2"
+    lodash "^4.17.20"
+    slice-ansi "^4.0.0"
+    string-width "^4.2.0"
+
+tapable@^1.0.0, tapable@^1.1.3:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2"
+  integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA==
+
+tar@^6.0.2:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83"
+  integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA==
+  dependencies:
+    chownr "^2.0.0"
+    fs-minipass "^2.0.0"
+    minipass "^3.0.0"
+    minizlib "^2.1.1"
+    mkdirp "^1.0.3"
+    yallist "^4.0.0"
+
+temp-dir@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d"
+  integrity sha1-CnwOom06Oa+n4OvqnB/AvE2qAR0=
+
+tempy@^0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/tempy/-/tempy-0.3.0.tgz#6f6c5b295695a16130996ad5ab01a8bd726e8bf8"
+  integrity sha512-WrH/pui8YCwmeiAoxV+lpRH9HpRtgBhSR2ViBPgpGb/wnYDzp21R4MN45fsCGvLROvY67o3byhJRYRONJyImVQ==
+  dependencies:
+    temp-dir "^1.0.0"
+    type-fest "^0.3.1"
+    unique-string "^1.0.0"
+
+terminal-link@^2.0.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/terminal-link/-/terminal-link-2.1.1.tgz#14a64a27ab3c0df933ea546fba55f2d078edc994"
+  integrity sha512-un0FmiRUQNr5PJqy9kP7c40F5BOfpGlYTrxonDChEZB7pzZxRNp/bt+ymiy9/npwXya9KH99nJ/GXFIiUkYGFQ==
+  dependencies:
+    ansi-escapes "^4.2.1"
+    supports-hyperlinks "^2.0.0"
+
+terser-webpack-plugin@4.2.3:
+  version "4.2.3"
+  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-4.2.3.tgz#28daef4a83bd17c1db0297070adc07fc8cfc6a9a"
+  integrity sha512-jTgXh40RnvOrLQNgIkwEKnQ8rmHjHK4u+6UBEi+W+FPmvb+uo+chJXntKe7/3lW5mNysgSWD60KyesnhW8D6MQ==
+  dependencies:
+    cacache "^15.0.5"
+    find-cache-dir "^3.3.1"
+    jest-worker "^26.5.0"
+    p-limit "^3.0.2"
+    schema-utils "^3.0.0"
+    serialize-javascript "^5.0.1"
+    source-map "^0.6.1"
+    terser "^5.3.4"
+    webpack-sources "^1.4.3"
+
+terser-webpack-plugin@^1.4.3:
+  version "1.4.5"
+  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-1.4.5.tgz#a217aefaea330e734ffacb6120ec1fa312d6040b"
+  integrity sha512-04Rfe496lN8EYruwi6oPQkG0vo8C+HT49X687FZnpPF0qMAIHONI6HEXYPKDOE8e5HjXTyKfqRd/agHtH0kOtw==
+  dependencies:
+    cacache "^12.0.2"
+    find-cache-dir "^2.1.0"
+    is-wsl "^1.1.0"
+    schema-utils "^1.0.0"
+    serialize-javascript "^4.0.0"
+    source-map "^0.6.1"
+    terser "^4.1.2"
+    webpack-sources "^1.4.0"
+    worker-farm "^1.7.0"
+
+terser@^4.1.2, terser@^4.6.2, terser@^4.6.3:
+  version "4.8.0"
+  resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.0.tgz#63056343d7c70bb29f3af665865a46fe03a0df17"
+  integrity sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==
+  dependencies:
+    commander "^2.20.0"
+    source-map "~0.6.1"
+    source-map-support "~0.5.12"
+
+terser@^5.3.4:
+  version "5.6.0"
+  resolved "https://registry.yarnpkg.com/terser/-/terser-5.6.0.tgz#138cdf21c5e3100b1b3ddfddf720962f88badcd2"
+  integrity sha512-vyqLMoqadC1uR0vywqOZzriDYzgEkNJFK4q9GeyOBHIbiECHiWLKcWfbQWAUaPfxkjDhapSlZB9f7fkMrvkVjA==
+  dependencies:
+    commander "^2.20.0"
+    source-map "~0.7.2"
+    source-map-support "~0.5.19"
+
+test-exclude@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/test-exclude/-/test-exclude-6.0.0.tgz#04a8698661d805ea6fa293b6cb9e63ac044ef15e"
+  integrity sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==
+  dependencies:
+    "@istanbuljs/schema" "^0.1.2"
+    glob "^7.1.4"
+    minimatch "^3.0.4"
+
+text-table@0.2.0, text-table@^0.2.0:
+  version "0.2.0"
+  resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4"
+  integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=
+
+throat@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/throat/-/throat-5.0.0.tgz#c5199235803aad18754a667d659b5e72ce16764b"
+  integrity sha512-fcwX4mndzpLQKBS1DVYhGAcYaYt7vsHNIvQV+WXMvnow5cgjPphq5CaayLaGsjRdSCKZFNGt7/GYAuXaNOiYCA==
+
+through2@^2.0.0:
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/through2/-/through2-2.0.5.tgz#01c1e39eb31d07cb7d03a96a70823260b23132cd"
+  integrity sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==
+  dependencies:
+    readable-stream "~2.3.6"
+    xtend "~4.0.1"
+
+thunky@^1.0.2:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d"
+  integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA==
+
+timers-browserify@^2.0.4:
+  version "2.0.12"
+  resolved "https://registry.yarnpkg.com/timers-browserify/-/timers-browserify-2.0.12.tgz#44a45c11fbf407f34f97bccd1577c652361b00ee"
+  integrity sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==
+  dependencies:
+    setimmediate "^1.0.4"
+
+timsort@^0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4"
+  integrity sha1-QFQRqOfmM5/mTbmiNN4R3DHgK9Q=
+
+tiny-invariant@^1.0.2, tiny-invariant@^1.0.6:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.1.0.tgz#634c5f8efdc27714b7f386c35e6760991d230875"
+  integrity sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw==
+
+tiny-warning@^1.0.0, tiny-warning@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/tiny-warning/-/tiny-warning-1.0.3.tgz#94a30db453df4c643d0fd566060d60a875d84754"
+  integrity sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==
+
+tinycolor2@1.4.2:
+  version "1.4.2"
+  resolved "https://registry.yarnpkg.com/tinycolor2/-/tinycolor2-1.4.2.tgz#3f6a4d1071ad07676d7fa472e1fac40a719d8803"
+  integrity sha512-vJhccZPs965sV/L2sU4oRQVAos0pQXwsvTLkWYdqJ+a8Q5kPFzJTuOFwy7UniPli44NKQGAglksjvOcpo95aZA==
+
+tmpl@1.0.x:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.4.tgz#23640dd7b42d00433911140820e5cf440e521dd1"
+  integrity sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=
+
+to-arraybuffer@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/to-arraybuffer/-/to-arraybuffer-1.0.1.tgz#7d229b1fcc637e466ca081180836a7aabff83f43"
+  integrity sha1-fSKbH8xjfkZsoIEYCDanqr/4P0M=
+
+to-fast-properties@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e"
+  integrity sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=
+
+to-object-path@^0.3.0:
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/to-object-path/-/to-object-path-0.3.0.tgz#297588b7b0e7e0ac08e04e672f85c1f4999e17af"
+  integrity sha1-KXWIt7Dn4KwI4E5nL4XB9JmeF68=
+  dependencies:
+    kind-of "^3.0.2"
+
+to-regex-range@^2.1.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-2.1.1.tgz#7c80c17b9dfebe599e27367e0d4dd5590141db38"
+  integrity sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg=
+  dependencies:
+    is-number "^3.0.0"
+    repeat-string "^1.6.1"
+
+to-regex-range@^5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
+  integrity sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==
+  dependencies:
+    is-number "^7.0.0"
+
+to-regex@^3.0.1, to-regex@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/to-regex/-/to-regex-3.0.2.tgz#13cfdd9b336552f30b51f33a8ae1b42a7a7599ce"
+  integrity sha512-FWtleNAtZ/Ki2qtqej2CXTOayOH9bHDQF+Q48VpWyDXjbYxA4Yz8iDB31zXOBUlOHHKidDbqGVrTUvQMPmBGBw==
+  dependencies:
+    define-property "^2.0.2"
+    extend-shallow "^3.0.2"
+    regex-not "^1.0.2"
+    safe-regex "^1.1.0"
+
+toggle-selection@^1.0.6:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/toggle-selection/-/toggle-selection-1.0.6.tgz#6e45b1263f2017fa0acc7d89d78b15b8bf77da32"
+  integrity sha1-bkWxJj8gF/oKzH2J14sVuL932jI=
+
+toidentifier@1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/toidentifier/-/toidentifier-1.0.0.tgz#7e1be3470f1e77948bc43d94a3c8f4d7752ba553"
+  integrity sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==
+
+tough-cookie@^2.3.3, tough-cookie@~2.5.0:
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
+  integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
+  dependencies:
+    psl "^1.1.28"
+    punycode "^2.1.1"
+
+tough-cookie@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-3.0.1.tgz#9df4f57e739c26930a018184887f4adb7dca73b2"
+  integrity sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==
+  dependencies:
+    ip-regex "^2.1.0"
+    psl "^1.1.28"
+    punycode "^2.1.1"
+
+tr46@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.0.2.tgz#03273586def1595ae08fedb38d7733cee91d2479"
+  integrity sha512-3n1qG+/5kg+jrbTzwAykB5yRYtQCTqOGKq5U5PE3b0a1/mzo6snDhjGS0zJVJunO0NrT3Dg1MLy5TjWP/UJppg==
+  dependencies:
+    punycode "^2.1.1"
+
+tryer@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/tryer/-/tryer-1.0.1.tgz#f2c85406800b9b0f74c9f7465b81eaad241252f8"
+  integrity sha512-c3zayb8/kWWpycWYg87P71E1S1ZL6b6IJxfb5fvsUgsf0S2MVGaDhDXXjDMpdCpfWXqptc+4mXwmiy1ypXqRAA==
+
+ts-pnp@1.2.0, ts-pnp@^1.1.6:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/ts-pnp/-/ts-pnp-1.2.0.tgz#a500ad084b0798f1c3071af391e65912c86bca92"
+  integrity sha512-csd+vJOb/gkzvcCHgTGSChYpy5f1/XKNsmvBGO4JXS+z1v2HobugDz4s1IeFXM3wZB44uczs+eazB5Q/ccdhQw==
+
+tsconfig-paths@^3.9.0:
+  version "3.9.0"
+  resolved "https://registry.yarnpkg.com/tsconfig-paths/-/tsconfig-paths-3.9.0.tgz#098547a6c4448807e8fcb8eae081064ee9a3c90b"
+  integrity sha512-dRcuzokWhajtZWkQsDVKbWyY+jgcLC5sqJhg2PSgf4ZkH2aHPvaOY8YWGhmjb68b5qqTfasSsDO9k7RUiEmZAw==
+  dependencies:
+    "@types/json5" "^0.0.29"
+    json5 "^1.0.1"
+    minimist "^1.2.0"
+    strip-bom "^3.0.0"
+
+tslib@^1.0.0, tslib@^1.10.0, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
+  version "1.14.1"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
+  integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
+
+tslib@^2.0.0, tslib@^2.0.3:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
+  integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==
+
+tsutils@^3.17.1:
+  version "3.20.0"
+  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.20.0.tgz#ea03ea45462e146b53d70ce0893de453ff24f698"
+  integrity sha512-RYbuQuvkhuqVeXweWT3tJLKOEJ/UUw9GjNEZGWdrLLlM+611o1gwLHBpxoFJKKl25fLprp2eVthtKs5JOrNeXg==
+  dependencies:
+    tslib "^1.8.1"
+
+tty-browserify@0.0.0:
+  version "0.0.0"
+  resolved "https://registry.yarnpkg.com/tty-browserify/-/tty-browserify-0.0.0.tgz#a157ba402da24e9bf957f9aa69d524eed42901a6"
+  integrity sha1-oVe6QC2iTpv5V/mqadUk7tQpAaY=
+
+tunnel-agent@^0.6.0:
+  version "0.6.0"
+  resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
+  integrity sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=
+  dependencies:
+    safe-buffer "^5.0.1"
+
+tweetnacl@^0.14.3, tweetnacl@~0.14.0:
+  version "0.14.5"
+  resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
+  integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=
+
+type-check@^0.4.0, type-check@~0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"
+  integrity sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==
+  dependencies:
+    prelude-ls "^1.2.1"
+
+type-check@~0.3.2:
+  version "0.3.2"
+  resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.3.2.tgz#5884cab512cf1d355e3fb784f30804b2b520db72"
+  integrity sha1-WITKtRLPHTVeP7eE8wgEsrUg23I=
+  dependencies:
+    prelude-ls "~1.1.2"
+
+type-detect@4.0.8:
+  version "4.0.8"
+  resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c"
+  integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==
+
+type-fest@^0.11.0:
+  version "0.11.0"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.11.0.tgz#97abf0872310fed88a5c466b25681576145e33f1"
+  integrity sha512-OdjXJxnCN1AvyLSzeKIgXTXxV+99ZuXl3Hpo9XpJAv9MBcHrrJOQ5kV7ypXOuQie+AmWG25hLbiKdwYTifzcfQ==
+
+type-fest@^0.3.1:
+  version "0.3.1"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.3.1.tgz#63d00d204e059474fe5e1b7c011112bbd1dc29e1"
+  integrity sha512-cUGJnCdr4STbePCgqNFbpVNCepa+kAVohJs1sLhxzdH+gnEoOd8VhbYa7pD3zZYGiURWM2xzEII3fQcRizDkYQ==
+
+type-fest@^0.6.0:
+  version "0.6.0"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.6.0.tgz#8d2a2370d3df886eb5c90ada1c5bf6188acf838b"
+  integrity sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg==
+
+type-fest@^0.8.1:
+  version "0.8.1"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
+  integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==
+
+type-is@~1.6.17, type-is@~1.6.18:
+  version "1.6.18"
+  resolved "https://registry.yarnpkg.com/type-is/-/type-is-1.6.18.tgz#4e552cd05df09467dcbc4ef739de89f2cf37c131"
+  integrity sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==
+  dependencies:
+    media-typer "0.3.0"
+    mime-types "~2.1.24"
+
+type@^1.0.1:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/type/-/type-1.2.0.tgz#848dd7698dafa3e54a6c479e759c4bc3f18847a0"
+  integrity sha512-+5nt5AAniqsCnu2cEQQdpzCAh33kVx8n0VoFidKpB1dVVLAN/F+bgVOqOJqOnEnrhp222clB5p3vUlD+1QAnfg==
+
+type@^2.0.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/type/-/type-2.3.0.tgz#ada7c045f07ead08abf9e2edd29be1a0c0661132"
+  integrity sha512-rgPIqOdfK/4J9FhiVrZ3cveAjRRo5rsQBAIhnylX874y1DX/kEKSVdLsnuHB6l1KTjHyU01VjiMBHgU2adejyg==
+
+typedarray-to-buffer@^3.1.5:
+  version "3.1.5"
+  resolved "https://registry.yarnpkg.com/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz#a97ee7a9ff42691b9f783ff1bc5112fe3fca9080"
+  integrity sha512-zdu8XMNEDepKKR+XYOXAVPtWui0ly0NtohUscw+UmaHiAWT8hrV1rr//H6V+0DvJ3OQ19S979M0laLfX8rm82Q==
+  dependencies:
+    is-typedarray "^1.0.0"
+
+typedarray@^0.0.6:
+  version "0.0.6"
+  resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
+  integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
+
+typescript@^4.1.2:
+  version "4.2.2"
+  resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.2.2.tgz#1450f020618f872db0ea17317d16d8da8ddb8c4c"
+  integrity sha512-tbb+NVrLfnsJy3M59lsDgrzWIflR4d4TIUjz+heUnHZwdF7YsrMTKoRERiIvI2lvBG95dfpLxB21WZhys1bgaQ==
+
+unicode-canonical-property-names-ecmascript@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-1.0.4.tgz#2619800c4c825800efdd8343af7dd9933cbe2818"
+  integrity sha512-jDrNnXWHd4oHiTZnx/ZG7gtUTVp+gCcTTKr8L0HjlwphROEW3+Him+IpvC+xcJEFegapiMZyZe02CyuOnRmbnQ==
+
+unicode-match-property-ecmascript@^1.0.4:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/unicode-match-property-ecmascript/-/unicode-match-property-ecmascript-1.0.4.tgz#8ed2a32569961bce9227d09cd3ffbb8fed5f020c"
+  integrity sha512-L4Qoh15vTfntsn4P1zqnHulG0LdXgjSO035fEpdtp6YxXhMT51Q6vgM5lYdG/5X3MjS+k/Y9Xw4SFCY9IkR0rg==
+  dependencies:
+    unicode-canonical-property-names-ecmascript "^1.0.4"
+    unicode-property-aliases-ecmascript "^1.0.4"
+
+unicode-match-property-value-ecmascript@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/unicode-match-property-value-ecmascript/-/unicode-match-property-value-ecmascript-1.2.0.tgz#0d91f600eeeb3096aa962b1d6fc88876e64ea531"
+  integrity sha512-wjuQHGQVofmSJv1uVISKLE5zO2rNGzM/KCYZch/QQvez7C1hUhBIuZ701fYXExuufJFMPhv2SyL8CyoIfMLbIQ==
+
+unicode-property-aliases-ecmascript@^1.0.4:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/unicode-property-aliases-ecmascript/-/unicode-property-aliases-ecmascript-1.1.0.tgz#dd57a99f6207bedff4628abefb94c50db941c8f4"
+  integrity sha512-PqSoPh/pWetQ2phoj5RLiaqIk4kCNwoV3CI+LfGmWLKI3rE3kl1h59XpX2BjgDrmbxD9ARtQobPGU1SguCYuQg==
+
+union-value@^1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/union-value/-/union-value-1.0.1.tgz#0b6fe7b835aecda61c6ea4d4f02c14221e109847"
+  integrity sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg==
+  dependencies:
+    arr-union "^3.1.0"
+    get-value "^2.0.6"
+    is-extendable "^0.1.1"
+    set-value "^2.0.1"
+
+uniq@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/uniq/-/uniq-1.0.1.tgz#b31c5ae8254844a3a8281541ce2b04b865a734ff"
+  integrity sha1-sxxa6CVIRKOoKBVBzisEuGWnNP8=
+
+uniqs@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/uniqs/-/uniqs-2.0.0.tgz#ffede4b36b25290696e6e165d4a59edb998e6b02"
+  integrity sha1-/+3ks2slKQaW5uFl1KWe25mOawI=
+
+unique-filename@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/unique-filename/-/unique-filename-1.1.1.tgz#1d69769369ada0583103a1e6ae87681b56573230"
+  integrity sha512-Vmp0jIp2ln35UTXuryvjzkjGdRyf9b2lTXuSYUiPmzRcl3FDtYqAwOnTJkAngD9SWhnoJzDbTKwaOrZ+STtxNQ==
+  dependencies:
+    unique-slug "^2.0.0"
+
+unique-slug@^2.0.0:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/unique-slug/-/unique-slug-2.0.2.tgz#baabce91083fc64e945b0f3ad613e264f7cd4e6c"
+  integrity sha512-zoWr9ObaxALD3DOPfjPSqxt4fnZiWblxHIgeWqW8x7UqDzEtHEQLzji2cuJYQFCU6KmoJikOYAZlrTHHebjx2w==
+  dependencies:
+    imurmurhash "^0.1.4"
+
+unique-string@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/unique-string/-/unique-string-1.0.0.tgz#9e1057cca851abb93398f8b33ae187b99caec11a"
+  integrity sha1-nhBXzKhRq7kzmPizOuGHuZyuwRo=
+  dependencies:
+    crypto-random-string "^1.0.0"
+
+universalify@^0.1.0:
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66"
+  integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==
+
+universalify@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.0.tgz#75a4984efedc4b08975c5aeb73f530d02df25717"
+  integrity sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==
+
+unpipe@1.0.0, unpipe@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
+  integrity sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=
+
+unquote@~1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/unquote/-/unquote-1.1.1.tgz#8fded7324ec6e88a0ff8b905e7c098cdc086d544"
+  integrity sha1-j97XMk7G6IoP+LkF58CYzcCG1UQ=
+
+unset-value@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/unset-value/-/unset-value-1.0.0.tgz#8376873f7d2335179ffb1e6fc3a8ed0dfc8ab559"
+  integrity sha1-g3aHP30jNRef+x5vw6jtDfyKtVk=
+  dependencies:
+    has-value "^0.3.1"
+    isobject "^3.0.0"
+
+upath@^1.1.1, upath@^1.1.2, upath@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/upath/-/upath-1.2.0.tgz#8f66dbcd55a883acdae4408af8b035a5044c1894"
+  integrity sha512-aZwGpamFO61g3OlfT7OQCHqhGnW43ieH9WZeP7QxN/G/jS4jfqUkZxoryvJgVPEcrl5NL/ggHsSmLMHuH64Lhg==
+
+uri-js@^4.2.2:
+  version "4.4.1"
+  resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e"
+  integrity sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==
+  dependencies:
+    punycode "^2.1.0"
+
+urix@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72"
+  integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=
+
+url-loader@4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/url-loader/-/url-loader-4.1.1.tgz#28505e905cae158cf07c92ca622d7f237e70a4e2"
+  integrity sha512-3BTV812+AVHHOJQO8O5MkWgZ5aosP7GnROJwvzLS9hWDj00lZ6Z0wNak423Lp9PBZN05N+Jk/N5Si8jRAlGyWA==
+  dependencies:
+    loader-utils "^2.0.0"
+    mime-types "^2.1.27"
+    schema-utils "^3.0.0"
+
+url-parse@^1.4.3, url-parse@^1.4.7:
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.1.tgz#d5fa9890af8a5e1f274a2c98376510f6425f6e3b"
+  integrity sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==
+  dependencies:
+    querystringify "^2.1.1"
+    requires-port "^1.0.0"
+
+url@^0.11.0:
+  version "0.11.0"
+  resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1"
+  integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
+  dependencies:
+    punycode "1.3.2"
+    querystring "0.2.0"
+
+use-callback-ref@^1.2.1, use-callback-ref@^1.2.3:
+  version "1.2.5"
+  resolved "https://registry.yarnpkg.com/use-callback-ref/-/use-callback-ref-1.2.5.tgz#6115ed242cfbaed5915499c0a9842ca2912f38a5"
+  integrity sha512-gN3vgMISAgacF7sqsLPByqoePooY3n2emTH59Ur5d/M8eg4WTWu1xp8i8DHjohftIyEx0S08RiYxbffr4j8Peg==
+
+use-sidecar@^1.0.1:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/use-sidecar/-/use-sidecar-1.0.4.tgz#38398c3723727f9f924bed2343dfa3db6aaaee46"
+  integrity sha512-A5ggIS3/qTdxCAlcy05anO2/oqXOfpmxnpRE1Jm+fHHtCvUvNSZDGqgOSAXPriBVAcw2fMFFkh5v5KqrFFhCMA==
+  dependencies:
+    detect-node-es "^1.0.0"
+    tslib "^1.9.3"
+
+use@^3.1.0:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
+  integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==
+
+util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
+  integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=
+
+util.promisify@1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/util.promisify/-/util.promisify-1.0.0.tgz#440f7165a459c9a16dc145eb8e72f35687097030"
+  integrity sha512-i+6qA2MPhvoKLuxnJNpXAGhg7HphQOSUq2LKMZD0m15EiskXUkMvKdF4Uui0WYeCUGea+o2cw/ZuwehtfsrNkA==
+  dependencies:
+    define-properties "^1.1.2"
+    object.getownpropertydescriptors "^2.0.3"
+
+util.promisify@~1.0.0:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/util.promisify/-/util.promisify-1.0.1.tgz#6baf7774b80eeb0f7520d8b81d07982a59abbaee"
+  integrity sha512-g9JpC/3He3bm38zsLupWryXHoEcS22YHthuPQSJdMy6KNrzIRzWqcsHzD/WUnqe45whVou4VIsPew37DoXWNrA==
+  dependencies:
+    define-properties "^1.1.3"
+    es-abstract "^1.17.2"
+    has-symbols "^1.0.1"
+    object.getownpropertydescriptors "^2.1.0"
+
+util@0.10.3:
+  version "0.10.3"
+  resolved "https://registry.yarnpkg.com/util/-/util-0.10.3.tgz#7afb1afe50805246489e3db7fe0ed379336ac0f9"
+  integrity sha1-evsa/lCAUkZInj23/g7TeTNqwPk=
+  dependencies:
+    inherits "2.0.1"
+
+util@^0.11.0:
+  version "0.11.1"
+  resolved "https://registry.yarnpkg.com/util/-/util-0.11.1.tgz#3236733720ec64bb27f6e26f421aaa2e1b588d61"
+  integrity sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==
+  dependencies:
+    inherits "2.0.3"
+
+utila@~0.4:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/utila/-/utila-0.4.0.tgz#8a16a05d445657a3aea5eecc5b12a4fa5379772c"
+  integrity sha1-ihagXURWV6Oupe7MWxKk+lN5dyw=
+
+utils-merge@1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/utils-merge/-/utils-merge-1.0.1.tgz#9f95710f50a267947b2ccc124741c1028427e713"
+  integrity sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=
+
+uuid@^3.3.2, uuid@^3.4.0:
+  version "3.4.0"
+  resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee"
+  integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==
+
+uuid@^8.3.0:
+  version "8.3.2"
+  resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
+  integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
+
+v8-compile-cache@^2.0.3:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.2.0.tgz#9471efa3ef9128d2f7c6a7ca39c4dd6b5055b132"
+  integrity sha512-gTpR5XQNKFwOd4clxfnhaqvfqMpqEwr4tOtCyz4MtYZX2JYhfr1JvBFKdS+7K/9rfpZR3VLX+YWBbKoxCgS43Q==
+
+v8-to-istanbul@^7.0.0:
+  version "7.1.0"
+  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.0.tgz#5b95cef45c0f83217ec79f8fc7ee1c8b486aee07"
+  integrity sha512-uXUVqNUCLa0AH1vuVxzi+MI4RfxEOKt9pBgKwHbgH7st8Kv2P1m+jvWNnektzBh5QShF3ODgKmUFCf38LnVz1g==
+  dependencies:
+    "@types/istanbul-lib-coverage" "^2.0.1"
+    convert-source-map "^1.6.0"
+    source-map "^0.7.3"
+
+validate-npm-package-license@^3.0.1:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a"
+  integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==
+  dependencies:
+    spdx-correct "^3.0.0"
+    spdx-expression-parse "^3.0.0"
+
+value-equal@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/value-equal/-/value-equal-1.0.1.tgz#1e0b794c734c5c0cade179c437d356d931a34d6c"
+  integrity sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw==
+
+vary@~1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/vary/-/vary-1.1.2.tgz#2299f02c6ded30d4a5961b0b9f74524a18f634fc"
+  integrity sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=
+
+vendors@^1.0.0:
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/vendors/-/vendors-1.0.4.tgz#e2b800a53e7a29b93506c3cf41100d16c4c4ad8e"
+  integrity sha512-/juG65kTL4Cy2su4P8HjtkTxk6VmJDiOPBufWniqQ6wknac6jNiXS9vU+hO3wgusiyqWlzTbVHi0dyJqRONg3w==
+
+verror@1.10.0:
+  version "1.10.0"
+  resolved "https://registry.yarnpkg.com/verror/-/verror-1.10.0.tgz#3a105ca17053af55d6e270c1f8288682e18da400"
+  integrity sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=
+  dependencies:
+    assert-plus "^1.0.0"
+    core-util-is "1.0.2"
+    extsprintf "^1.2.0"
+
+vm-browserify@^1.0.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/vm-browserify/-/vm-browserify-1.1.2.tgz#78641c488b8e6ca91a75f511e7a3b32a86e5dda0"
+  integrity sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==
+
+w3c-hr-time@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz#0a89cdf5cc15822df9c360543676963e0cc308cd"
+  integrity sha512-z8P5DvDNjKDoFIHK7q8r8lackT6l+jo/Ye3HOle7l9nICP9lf1Ci25fy9vHd0JOWewkIFzXIEig3TdKT7JQ5fQ==
+  dependencies:
+    browser-process-hrtime "^1.0.0"
+
+w3c-xmlserializer@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/w3c-xmlserializer/-/w3c-xmlserializer-2.0.0.tgz#3e7104a05b75146cc60f564380b7f683acf1020a"
+  integrity sha512-4tzD0mF8iSiMiNs30BiLO3EpfGLZUT2MSX/G+o7ZywDzliWQ3OPtTZ0PTC3B3ca1UAf4cJMHB+2Bf56EriJuRA==
+  dependencies:
+    xml-name-validator "^3.0.0"
+
+walker@^1.0.7, walker@~1.0.5:
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.7.tgz#2f7f9b8fd10d677262b18a884e28d19618e028fb"
+  integrity sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=
+  dependencies:
+    makeerror "1.0.x"
+
+warning@^4.0.3:
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/warning/-/warning-4.0.3.tgz#16e9e077eb8a86d6af7d64aa1e05fd85b4678ca3"
+  integrity sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==
+  dependencies:
+    loose-envify "^1.0.0"
+
+watchpack-chokidar2@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/watchpack-chokidar2/-/watchpack-chokidar2-2.0.1.tgz#38500072ee6ece66f3769936950ea1771be1c957"
+  integrity sha512-nCFfBIPKr5Sh61s4LPpy1Wtfi0HE8isJ3d2Yb5/Ppw2P2B/3eVSEBjKfN0fmHJSK14+31KwMKmcrzs2GM4P0Ww==
+  dependencies:
+    chokidar "^2.1.8"
+
+watchpack@^1.7.4:
+  version "1.7.5"
+  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-1.7.5.tgz#1267e6c55e0b9b5be44c2023aed5437a2c26c453"
+  integrity sha512-9P3MWk6SrKjHsGkLT2KHXdQ/9SNkyoJbabxnKOoJepsvJjJG8uYTR3yTPxPQvNDI3w4Nz1xnE0TLHK4RIVe/MQ==
+  dependencies:
+    graceful-fs "^4.1.2"
+    neo-async "^2.5.0"
+  optionalDependencies:
+    chokidar "^3.4.1"
+    watchpack-chokidar2 "^2.0.1"
+
+wbuf@^1.1.0, wbuf@^1.7.3:
+  version "1.7.3"
+  resolved "https://registry.yarnpkg.com/wbuf/-/wbuf-1.7.3.tgz#c1d8d149316d3ea852848895cb6a0bfe887b87df"
+  integrity sha512-O84QOnr0icsbFGLS0O3bI5FswxzRr8/gHwWkDlQFskhSPryQXvrTMxjxGP4+iWYoauLoBvfDpkrOauZ+0iZpDA==
+  dependencies:
+    minimalistic-assert "^1.0.0"
+
+web-vitals@^1.0.1:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/web-vitals/-/web-vitals-1.1.0.tgz#7f410d9a1f7a1cd5d952806b45776204b47dc274"
+  integrity sha512-1cx54eRxY/+M0KNKdNpNnuXAXG+vJEvwScV4DiV9rOYDguHoeDIzm09ghBohOPtkqPO5OtPC14FWkNva3SDisg==
+
+webidl-conversions@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"
+  integrity sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==
+
+webidl-conversions@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-6.1.0.tgz#9111b4d7ea80acd40f5270d666621afa78b69514"
+  integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
+
+webpack-dev-middleware@^3.7.2:
+  version "3.7.3"
+  resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-3.7.3.tgz#0639372b143262e2b84ab95d3b91a7597061c2c5"
+  integrity sha512-djelc/zGiz9nZj/U7PTBi2ViorGJXEWo/3ltkPbDyxCXhhEXkW0ce99falaok4TPj+AsxLiXJR0EBOb0zh9fKQ==
+  dependencies:
+    memory-fs "^0.4.1"
+    mime "^2.4.4"
+    mkdirp "^0.5.1"
+    range-parser "^1.2.1"
+    webpack-log "^2.0.0"
+
+webpack-dev-server@3.11.1:
+  version "3.11.1"
+  resolved "https://registry.yarnpkg.com/webpack-dev-server/-/webpack-dev-server-3.11.1.tgz#c74028bf5ba8885aaf230e48a20e8936ab8511f0"
+  integrity sha512-u4R3mRzZkbxQVa+MBWi2uVpB5W59H3ekZAJsQlKUTdl7Elcah2EhygTPLmeFXybQkf9i2+L0kn7ik9SnXa6ihQ==
+  dependencies:
+    ansi-html "0.0.7"
+    bonjour "^3.5.0"
+    chokidar "^2.1.8"
+    compression "^1.7.4"
+    connect-history-api-fallback "^1.6.0"
+    debug "^4.1.1"
+    del "^4.1.1"
+    express "^4.17.1"
+    html-entities "^1.3.1"
+    http-proxy-middleware "0.19.1"
+    import-local "^2.0.0"
+    internal-ip "^4.3.0"
+    ip "^1.1.5"
+    is-absolute-url "^3.0.3"
+    killable "^1.0.1"
+    loglevel "^1.6.8"
+    opn "^5.5.0"
+    p-retry "^3.0.1"
+    portfinder "^1.0.26"
+    schema-utils "^1.0.0"
+    selfsigned "^1.10.8"
+    semver "^6.3.0"
+    serve-index "^1.9.1"
+    sockjs "^0.3.21"
+    sockjs-client "^1.5.0"
+    spdy "^4.0.2"
+    strip-ansi "^3.0.1"
+    supports-color "^6.1.0"
+    url "^0.11.0"
+    webpack-dev-middleware "^3.7.2"
+    webpack-log "^2.0.0"
+    ws "^6.2.1"
+    yargs "^13.3.2"
+
+webpack-log@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/webpack-log/-/webpack-log-2.0.0.tgz#5b7928e0637593f119d32f6227c1e0ac31e1b47f"
+  integrity sha512-cX8G2vR/85UYG59FgkoMamwHUIkSSlV3bBMRsbxVXVUk2j6NleCKjQ/WE9eYg9WY4w25O9w8wKP4rzNZFmUcUg==
+  dependencies:
+    ansi-colors "^3.0.0"
+    uuid "^3.3.2"
+
+webpack-manifest-plugin@2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/webpack-manifest-plugin/-/webpack-manifest-plugin-2.2.0.tgz#19ca69b435b0baec7e29fbe90fb4015de2de4f16"
+  integrity sha512-9S6YyKKKh/Oz/eryM1RyLVDVmy3NSPV0JXMRhZ18fJsq+AwGxUY34X54VNwkzYcEmEkDwNxuEOboCZEebJXBAQ==
+  dependencies:
+    fs-extra "^7.0.0"
+    lodash ">=3.5 <5"
+    object.entries "^1.1.0"
+    tapable "^1.0.0"
+
+webpack-sources@^1.1.0, webpack-sources@^1.3.0, webpack-sources@^1.4.0, webpack-sources@^1.4.1, webpack-sources@^1.4.3:
+  version "1.4.3"
+  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-1.4.3.tgz#eedd8ec0b928fbf1cbfe994e22d2d890f330a933"
+  integrity sha512-lgTS3Xhv1lCOKo7SA5TjKXMjpSM4sBjNV5+q2bqesbSPs5FjGmU6jjtBSkX9b4qW87vDIsCIlUPOEhbZrMdjeQ==
+  dependencies:
+    source-list-map "^2.0.0"
+    source-map "~0.6.1"
+
+webpack@4.44.2:
+  version "4.44.2"
+  resolved "https://registry.yarnpkg.com/webpack/-/webpack-4.44.2.tgz#6bfe2b0af055c8b2d1e90ed2cd9363f841266b72"
+  integrity sha512-6KJVGlCxYdISyurpQ0IPTklv+DULv05rs2hseIXer6D7KrUicRDLFb4IUM1S6LUAKypPM/nSiVSuv8jHu1m3/Q==
+  dependencies:
+    "@webassemblyjs/ast" "1.9.0"
+    "@webassemblyjs/helper-module-context" "1.9.0"
+    "@webassemblyjs/wasm-edit" "1.9.0"
+    "@webassemblyjs/wasm-parser" "1.9.0"
+    acorn "^6.4.1"
+    ajv "^6.10.2"
+    ajv-keywords "^3.4.1"
+    chrome-trace-event "^1.0.2"
+    enhanced-resolve "^4.3.0"
+    eslint-scope "^4.0.3"
+    json-parse-better-errors "^1.0.2"
+    loader-runner "^2.4.0"
+    loader-utils "^1.2.3"
+    memory-fs "^0.4.1"
+    micromatch "^3.1.10"
+    mkdirp "^0.5.3"
+    neo-async "^2.6.1"
+    node-libs-browser "^2.2.1"
+    schema-utils "^1.0.0"
+    tapable "^1.1.3"
+    terser-webpack-plugin "^1.4.3"
+    watchpack "^1.7.4"
+    webpack-sources "^1.4.1"
+
+websocket-driver@>=0.5.1, websocket-driver@^0.7.4:
+  version "0.7.4"
+  resolved "https://registry.yarnpkg.com/websocket-driver/-/websocket-driver-0.7.4.tgz#89ad5295bbf64b480abcba31e4953aca706f5760"
+  integrity sha512-b17KeDIQVjvb0ssuSDF2cYXSg2iztliJ4B9WdsuB6J952qCPKmnVq4DyW5motImXHDC1cBT/1UezrJVsKw5zjg==
+  dependencies:
+    http-parser-js ">=0.5.1"
+    safe-buffer ">=5.1.0"
+    websocket-extensions ">=0.1.1"
+
+websocket-extensions@>=0.1.1:
+  version "0.1.4"
+  resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.4.tgz#7f8473bc839dfd87608adb95d7eb075211578a42"
+  integrity sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg==
+
+whatwg-encoding@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz#5abacf777c32166a51d085d6b4f3e7d27113ddb0"
+  integrity sha512-b5lim54JOPN9HtzvK9HFXvBma/rnfFeqsic0hSpjtDbVxR3dJKLc+KB4V6GgiGOvl7CY/KNh8rxSo9DKQrnUEw==
+  dependencies:
+    iconv-lite "0.4.24"
+
+whatwg-fetch@^3.4.1:
+  version "3.6.1"
+  resolved "https://registry.yarnpkg.com/whatwg-fetch/-/whatwg-fetch-3.6.1.tgz#93bc4005af6c2cc30ba3e42ec3125947c8f54ed3"
+  integrity sha512-IEmN/ZfmMw6G1hgZpVd0LuZXOQDisrMOZrzYd5x3RAK4bMPlJohKUZWZ9t/QsTvH0dV9TbPDcc2OSuIDcihnHA==
+
+whatwg-mimetype@^2.3.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz#3d4b1e0312d2079879f826aff18dbeeca5960fbf"
+  integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==
+
+whatwg-url@^8.0.0:
+  version "8.4.0"
+  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.4.0.tgz#50fb9615b05469591d2b2bd6dfaed2942ed72837"
+  integrity sha512-vwTUFf6V4zhcPkWp/4CQPr1TW9Ml6SF4lVyaIMBdJw5i6qUUJ1QWM4Z6YYVkfka0OUIzVo/0aNtGVGk256IKWw==
+  dependencies:
+    lodash.sortby "^4.7.0"
+    tr46 "^2.0.2"
+    webidl-conversions "^6.1.0"
+
+which-module@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.0.tgz#d9ef07dce77b9902b8a3a8fa4b31c3e3f7e6e87a"
+  integrity sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=
+
+which@^1.2.9, which@^1.3.1:
+  version "1.3.1"
+  resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a"
+  integrity sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==
+  dependencies:
+    isexe "^2.0.0"
+
+which@^2.0.1, which@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
+  integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==
+  dependencies:
+    isexe "^2.0.0"
+
+word-wrap@^1.2.3, word-wrap@~1.2.3:
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.3.tgz#610636f6b1f703891bd34771ccb17fb93b47079c"
+  integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==
+
+workbox-background-sync@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-background-sync/-/workbox-background-sync-5.1.4.tgz#5ae0bbd455f4e9c319e8d827c055bb86c894fd12"
+  integrity sha512-AH6x5pYq4vwQvfRDWH+vfOePfPIYQ00nCEB7dJRU1e0n9+9HMRyvI63FlDvtFT2AvXVRsXvUt7DNMEToyJLpSA==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-broadcast-update@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-broadcast-update/-/workbox-broadcast-update-5.1.4.tgz#0eeb89170ddca7f6914fa3523fb14462891f2cfc"
+  integrity sha512-HTyTWkqXvHRuqY73XrwvXPud/FN6x3ROzkfFPsRjtw/kGZuZkPzfeH531qdUGfhtwjmtO/ZzXcWErqVzJNdXaA==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-build@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-build/-/workbox-build-5.1.4.tgz#23d17ed5c32060c363030c8823b39d0eabf4c8c7"
+  integrity sha512-xUcZn6SYU8usjOlfLb9Y2/f86Gdo+fy1fXgH8tJHjxgpo53VVsqRX0lUDw8/JuyzNmXuo8vXX14pXX2oIm9Bow==
+  dependencies:
+    "@babel/core" "^7.8.4"
+    "@babel/preset-env" "^7.8.4"
+    "@babel/runtime" "^7.8.4"
+    "@hapi/joi" "^15.1.0"
+    "@rollup/plugin-node-resolve" "^7.1.1"
+    "@rollup/plugin-replace" "^2.3.1"
+    "@surma/rollup-plugin-off-main-thread" "^1.1.1"
+    common-tags "^1.8.0"
+    fast-json-stable-stringify "^2.1.0"
+    fs-extra "^8.1.0"
+    glob "^7.1.6"
+    lodash.template "^4.5.0"
+    pretty-bytes "^5.3.0"
+    rollup "^1.31.1"
+    rollup-plugin-babel "^4.3.3"
+    rollup-plugin-terser "^5.3.1"
+    source-map "^0.7.3"
+    source-map-url "^0.4.0"
+    stringify-object "^3.3.0"
+    strip-comments "^1.0.2"
+    tempy "^0.3.0"
+    upath "^1.2.0"
+    workbox-background-sync "^5.1.4"
+    workbox-broadcast-update "^5.1.4"
+    workbox-cacheable-response "^5.1.4"
+    workbox-core "^5.1.4"
+    workbox-expiration "^5.1.4"
+    workbox-google-analytics "^5.1.4"
+    workbox-navigation-preload "^5.1.4"
+    workbox-precaching "^5.1.4"
+    workbox-range-requests "^5.1.4"
+    workbox-routing "^5.1.4"
+    workbox-strategies "^5.1.4"
+    workbox-streams "^5.1.4"
+    workbox-sw "^5.1.4"
+    workbox-window "^5.1.4"
+
+workbox-cacheable-response@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-cacheable-response/-/workbox-cacheable-response-5.1.4.tgz#9ff26e1366214bdd05cf5a43da9305b274078a54"
+  integrity sha512-0bfvMZs0Of1S5cdswfQK0BXt6ulU5kVD4lwer2CeI+03czHprXR3V4Y8lPTooamn7eHP8Iywi5QjyAMjw0qauA==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-core@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-core/-/workbox-core-5.1.4.tgz#8bbfb2362ecdff30e25d123c82c79ac65d9264f4"
+  integrity sha512-+4iRQan/1D8I81nR2L5vcbaaFskZC2CL17TLbvWVzQ4qiF/ytOGF6XeV54pVxAvKUtkLANhk8TyIUMtiMw2oDg==
+
+workbox-expiration@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-expiration/-/workbox-expiration-5.1.4.tgz#92b5df461e8126114943a3b15c55e4ecb920b163"
+  integrity sha512-oDO/5iC65h2Eq7jctAv858W2+CeRW5e0jZBMNRXpzp0ZPvuT6GblUiHnAsC5W5lANs1QS9atVOm4ifrBiYY7AQ==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-google-analytics@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-google-analytics/-/workbox-google-analytics-5.1.4.tgz#b3376806b1ac7d7df8418304d379707195fa8517"
+  integrity sha512-0IFhKoEVrreHpKgcOoddV+oIaVXBFKXUzJVBI+nb0bxmcwYuZMdteBTp8AEDJacENtc9xbR0wa9RDCnYsCDLjA==
+  dependencies:
+    workbox-background-sync "^5.1.4"
+    workbox-core "^5.1.4"
+    workbox-routing "^5.1.4"
+    workbox-strategies "^5.1.4"
+
+workbox-navigation-preload@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-navigation-preload/-/workbox-navigation-preload-5.1.4.tgz#30d1b720d26a05efc5fa11503e5cc1ed5a78902a"
+  integrity sha512-Wf03osvK0wTflAfKXba//QmWC5BIaIZARU03JIhAEO2wSB2BDROWI8Q/zmianf54kdV7e1eLaIEZhth4K4MyfQ==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-precaching@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-precaching/-/workbox-precaching-5.1.4.tgz#874f7ebdd750dd3e04249efae9a1b3f48285fe6b"
+  integrity sha512-gCIFrBXmVQLFwvAzuGLCmkUYGVhBb7D1k/IL7pUJUO5xacjLcFUaLnnsoVepBGAiKw34HU1y/YuqvTKim9qAZA==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-range-requests@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-range-requests/-/workbox-range-requests-5.1.4.tgz#7066a12c121df65bf76fdf2b0868016aa2bab859"
+  integrity sha512-1HSujLjgTeoxHrMR2muDW2dKdxqCGMc1KbeyGcmjZZAizJTFwu7CWLDmLv6O1ceWYrhfuLFJO+umYMddk2XMhw==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-routing@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-routing/-/workbox-routing-5.1.4.tgz#3e8cd86bd3b6573488d1a2ce7385e547b547e970"
+  integrity sha512-8ljknRfqE1vEQtnMtzfksL+UXO822jJlHTIR7+BtJuxQ17+WPZfsHqvk1ynR/v0EHik4x2+826Hkwpgh4GKDCw==
+  dependencies:
+    workbox-core "^5.1.4"
+
+workbox-strategies@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-strategies/-/workbox-strategies-5.1.4.tgz#96b1418ccdfde5354612914964074d466c52d08c"
+  integrity sha512-VVS57LpaJTdjW3RgZvPwX0NlhNmscR7OQ9bP+N/34cYMDzXLyA6kqWffP6QKXSkca1OFo/v6v7hW7zrrguo6EA==
+  dependencies:
+    workbox-core "^5.1.4"
+    workbox-routing "^5.1.4"
+
+workbox-streams@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-streams/-/workbox-streams-5.1.4.tgz#05754e5e3667bdc078df2c9315b3f41210d8cac0"
+  integrity sha512-xU8yuF1hI/XcVhJUAfbQLa1guQUhdLMPQJkdT0kn6HP5CwiPOGiXnSFq80rAG4b1kJUChQQIGPrq439FQUNVrw==
+  dependencies:
+    workbox-core "^5.1.4"
+    workbox-routing "^5.1.4"
+
+workbox-sw@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-sw/-/workbox-sw-5.1.4.tgz#2bb34c9f7381f90d84cef644816d45150011d3db"
+  integrity sha512-9xKnKw95aXwSNc8kk8gki4HU0g0W6KXu+xks7wFuC7h0sembFnTrKtckqZxbSod41TDaGh+gWUA5IRXrL0ECRA==
+
+workbox-webpack-plugin@5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-webpack-plugin/-/workbox-webpack-plugin-5.1.4.tgz#7bfe8c16e40fe9ed8937080ac7ae9c8bde01e79c"
+  integrity sha512-PZafF4HpugZndqISi3rZ4ZK4A4DxO8rAqt2FwRptgsDx7NF8TVKP86/huHquUsRjMGQllsNdn4FNl8CD/UvKmQ==
+  dependencies:
+    "@babel/runtime" "^7.5.5"
+    fast-json-stable-stringify "^2.0.0"
+    source-map-url "^0.4.0"
+    upath "^1.1.2"
+    webpack-sources "^1.3.0"
+    workbox-build "^5.1.4"
+
+workbox-window@^5.1.4:
+  version "5.1.4"
+  resolved "https://registry.yarnpkg.com/workbox-window/-/workbox-window-5.1.4.tgz#2740f7dea7f93b99326179a62f1cc0ca2c93c863"
+  integrity sha512-vXQtgTeMCUq/4pBWMfQX8Ee7N2wVC4Q7XYFqLnfbXJ2hqew/cU1uMTD2KqGEgEpE4/30luxIxgE+LkIa8glBYw==
+  dependencies:
+    workbox-core "^5.1.4"
+
+worker-farm@^1.7.0:
+  version "1.7.0"
+  resolved "https://registry.yarnpkg.com/worker-farm/-/worker-farm-1.7.0.tgz#26a94c5391bbca926152002f69b84a4bf772e5a8"
+  integrity sha512-rvw3QTZc8lAxyVrqcSGVm5yP/IJ2UcB3U0graE3LCFoZ0Yn2x4EoVSqJKdB/T5M+FLcRPjz4TDacRf3OCfNUzw==
+  dependencies:
+    errno "~0.1.7"
+
+worker-rpc@^0.1.0:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/worker-rpc/-/worker-rpc-0.1.1.tgz#cb565bd6d7071a8f16660686051e969ad32f54d5"
+  integrity sha512-P1WjMrUB3qgJNI9jfmpZ/htmBEjFh//6l/5y8SD9hg1Ef5zTTVVoRjTrTEzPrNBQvmhMxkoTsjOXN10GWU7aCg==
+  dependencies:
+    microevent.ts "~0.1.1"
+
+wrap-ansi@^5.1.0:
+  version "5.1.0"
+  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-5.1.0.tgz#1fd1f67235d5b6d0fee781056001bfb694c03b09"
+  integrity sha512-QC1/iN/2/RPVJ5jYK8BGttj5z83LmSKmvbvrXPNCLZSEb32KKVDJDl/MOt2N01qU2H/FkzEa9PKto1BqDjtd7Q==
+  dependencies:
+    ansi-styles "^3.2.0"
+    string-width "^3.0.0"
+    strip-ansi "^5.0.0"
+
+wrap-ansi@^6.2.0:
+  version "6.2.0"
+  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53"
+  integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==
+  dependencies:
+    ansi-styles "^4.0.0"
+    string-width "^4.1.0"
+    strip-ansi "^6.0.0"
+
+wrappy@1:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
+  integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
+
+write-file-atomic@^3.0.0:
+  version "3.0.3"
+  resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-3.0.3.tgz#56bd5c5a5c70481cd19c571bd39ab965a5de56e8"
+  integrity sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q==
+  dependencies:
+    imurmurhash "^0.1.4"
+    is-typedarray "^1.0.0"
+    signal-exit "^3.0.2"
+    typedarray-to-buffer "^3.1.5"
+
+ws@^6.2.1:
+  version "6.2.1"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.1.tgz#442fdf0a47ed64f59b6a5d8ff130f4748ed524fb"
+  integrity sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA==
+  dependencies:
+    async-limiter "~1.0.0"
+
+ws@^7.2.3:
+  version "7.4.3"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.3.tgz#1f9643de34a543b8edb124bdcbc457ae55a6e5cd"
+  integrity sha512-hr6vCR76GsossIRsr8OLR9acVVm1jyfEWvhbNjtgPOrfvAlKzvyeg/P6r8RuDjRyrcQoPQT7K0DGEPc7Ae6jzA==
+
+xml-name-validator@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
+  integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==
+
+xmlchars@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"
+  integrity sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==
+
+xtend@^4.0.0, xtend@~4.0.1:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
+  integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==
+
+y18n@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.1.tgz#8db2b83c31c5d75099bb890b23f3094891e247d4"
+  integrity sha512-wNcy4NvjMYL8gogWWYAO7ZFWFfHcbdbE57tZO8e4cbpj8tfUcwrwqSl3ad8HxpYWCdXcJUCeKKZS62Av1affwQ==
+
+yallist@^3.0.2:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
+  integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==
+
+yallist@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
+  integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
+
+yaml@^1.10.0, yaml@^1.7.2:
+  version "1.10.0"
+  resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.0.tgz#3b593add944876077d4d683fee01081bd9fff31e"
+  integrity sha512-yr2icI4glYaNG+KWONODapy2/jDdMSDnrONSjblABjD9B4Z5LgiircSt8m8sRZFNi08kG9Sm0uSHtEmP3zaEGg==
+
+yargs-parser@^13.1.2:
+  version "13.1.2"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-13.1.2.tgz#130f09702ebaeef2650d54ce6e3e5706f7a4fb38"
+  integrity sha512-3lbsNRf/j+A4QuSZfDRA7HRSfWrzO0YjqTJd5kjAq37Zep1CEgaYmrH9Q3GwPiB9cHyd1Y1UwggGhJGoxipbzg==
+  dependencies:
+    camelcase "^5.0.0"
+    decamelize "^1.2.0"
+
+yargs-parser@^18.1.2:
+  version "18.1.3"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
+  integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
+  dependencies:
+    camelcase "^5.0.0"
+    decamelize "^1.2.0"
+
+yargs@^13.3.2:
+  version "13.3.2"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-13.3.2.tgz#ad7ffefec1aa59565ac915f82dccb38a9c31a2dd"
+  integrity sha512-AX3Zw5iPruN5ie6xGRIDgqkT+ZhnRlZMLMHAs8tg7nRruy2Nb+i5o9bwghAogtM08q1dpr2LVoS8KSTMYpWXUw==
+  dependencies:
+    cliui "^5.0.0"
+    find-up "^3.0.0"
+    get-caller-file "^2.0.1"
+    require-directory "^2.1.1"
+    require-main-filename "^2.0.0"
+    set-blocking "^2.0.0"
+    string-width "^3.0.0"
+    which-module "^2.0.0"
+    y18n "^4.0.0"
+    yargs-parser "^13.1.2"
+
+yargs@^15.4.1:
+  version "15.4.1"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8"
+  integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==
+  dependencies:
+    cliui "^6.0.0"
+    decamelize "^1.2.0"
+    find-up "^4.1.0"
+    get-caller-file "^2.0.1"
+    require-directory "^2.1.1"
+    require-main-filename "^2.0.0"
+    set-blocking "^2.0.0"
+    string-width "^4.2.0"
+    which-module "^2.0.0"
+    y18n "^4.0.0"
+    yargs-parser "^18.1.2"
+
+yocto-queue@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b"
+  integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==

From 9c1e5bd19347635ea9f373bcf93f2cea0231d50a Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Fri, 16 Apr 2021 13:14:16 -0400
Subject: [PATCH 076/719] ARROW-12421: [Rust] [DataFusion] Disable repartition
 rule

Remove repartition rule to fix test regression when running on 24 core threadripper CPU.

This will need further investigation but this patch is needed for the 4.0.0 release.

Closes #10069 from andygrove/remove-repartition

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/datafusion/src/execution/context.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index 833d7b66281..b0f86ec1f97 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -59,7 +59,6 @@ use crate::optimizer::optimizer::OptimizerRule;
 use crate::optimizer::projection_push_down::ProjectionPushDown;
 use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
 use crate::physical_optimizer::merge_exec::AddMergeExec;
-use crate::physical_optimizer::repartition::Repartition;
 
 use crate::physical_plan::csv::CsvReadOptions;
 use crate::physical_plan::planner::DefaultPhysicalPlanner;
@@ -642,7 +641,6 @@ impl ExecutionConfig {
                 Arc::new(LimitPushDown::new()),
             ],
             physical_optimizers: vec![
-                Arc::new(Repartition::new()),
                 Arc::new(CoalesceBatches::new()),
                 Arc::new(AddMergeExec::new()),
             ],

From fdd6ab11a71d4c40b4d24afa8458fed3d4589980 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sat, 17 Apr 2021 10:25:26 -0400
Subject: [PATCH 077/719] ARROW-12429: [C++] Fix incorrectly registered test

Recent versions of Googletest catch this, but Googletest doesn't have releases anymore - they expect everyone to just use any commit from master. Also, the latest version has link issues when used with Arrow.

Closes #10075 from lidavidm/arrow-12429

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/async_generator_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 38f71ba6c6a..474c4f6a0e7 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -594,7 +594,7 @@ TEST_P(MergedGeneratorTestFixture, MergedParallelStress) {
   }
 }
 
-INSTANTIATE_TEST_SUITE_P(MergedGeneratorTests, GeneratorTestFixture,
+INSTANTIATE_TEST_SUITE_P(MergedGeneratorTests, MergedGeneratorTestFixture,
                          ::testing::Values(false, true));
 
 TEST(TestAsyncUtil, FromVector) {

From ade457f92228407ded88f6f8b1c17cf876d01cd5 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sun, 18 Apr 2021 05:15:03 +0900
Subject: [PATCH 078/719] ARROW-12419: [Java] Remove to download flatc binary
 for s390x

This PR is a follow-up of #10058. #10058 avoids executing flatc during the Java build process by statically generating Java files from the schema. Now, flatc is not necessary.

Since flatc for s390x is explicitly downloaded in a script, we can drop this download.

Closes #10067 from kiszk/ARROW-12419

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/java_build.sh | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh
index 54cddb50372..b8a7f7ced74 100755
--- a/ci/scripts/java_build.sh
+++ b/ci/scripts/java_build.sh
@@ -30,15 +30,6 @@ if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then
   wget="wget"
   bintray_base_url="https://dl.bintray.com/apache/arrow"
 
-  bintray_dir="flatc-binary"
-  group="com.github.icexelloss"
-  artifact="flatc-linux-s390_64"
-  ver="1.9.0"
-  extension="exe"
-  target=${artifact}-${ver}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
-  ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
-
   bintray_dir="protoc-binary"
   group="com.google.protobuf"
   artifact="protoc"

From d4c5c3c02e52e76579d95a84ae33491b9c1284c9 Mon Sep 17 00:00:00 2001
From: Adam Lippai <adam@rigo.sk>
Date: Sat, 17 Apr 2021 15:26:29 -0600
Subject: [PATCH 079/719] ARROW-12433: [Rust] Update nightly rust version

Closes #10082 from alippai/new-nightly

Authored-by: Adam Lippai <adam@rigo.sk>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 .env                                | 2 +-
 .github/workflows/rust.yml          | 6 +++---
 ci/docker/linux-apt-lint.dockerfile | 2 +-
 rust/arrow/README.md                | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.env b/.env
index c4eb6d2de43..4fb24bed40a 100644
--- a/.env
+++ b/.env
@@ -47,7 +47,7 @@ FEDORA=33
 PYTHON=3.6
 LLVM=11
 CLANG_TOOLS=8
-RUST=nightly-2020-11-24
+RUST=nightly-2021-03-24
 GO=1.15
 NODE=14
 MAVEN=3.5.4
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 6d87e6b6260..4bb17a2ecaf 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -155,7 +155,7 @@ jobs:
     strategy:
       matrix:
         arch: [amd64]
-        rust: [nightly-2020-11-24]
+        rust: [nightly-2021-03-24]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -267,7 +267,7 @@ jobs:
     strategy:
       matrix:
         arch: [amd64]
-        rust: [nightly-2021-01-19]
+        rust: [nightly-2021-03-24]
     steps:
       - uses: actions/checkout@v2
         with:
@@ -389,7 +389,7 @@ jobs:
     strategy:
       matrix:
         arch: [amd64]
-        rust: [nightly-2020-11-24]
+        rust: [nightly-2021-03-24]
     container:
       image: ${{ matrix.arch }}/rust
       env:
diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile
index 4be9fcc7f62..66538919c49 100644
--- a/ci/docker/linux-apt-lint.dockerfile
+++ b/ci/docker/linux-apt-lint.dockerfile
@@ -46,7 +46,7 @@ COPY ci/scripts/install_iwyu.sh /arrow/ci/scripts/
 RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools}
 
 # Rust linter
-ARG rust=nightly-2019-09-25
+ARG rust=nightly-2021-03-24
 RUN curl https://sh.rustup.rs -sSf | \
     sh -s -- --default-toolchain stable -y
 ENV PATH /root/.cargo/bin:$PATH
diff --git a/rust/arrow/README.md b/rust/arrow/README.md
index 54f00a492b6..674c3fc6c8b 100644
--- a/rust/arrow/README.md
+++ b/rust/arrow/README.md
@@ -80,7 +80,7 @@ Arrow uses the following features:
 * `simd` - Arrow uses the [packed_simd](https://crates.io/crates/packed_simd) crate to optimize many of the
  implementations in the [compute](https://github.com/apache/arrow/tree/master/rust/arrow/src/compute)
  module using SIMD intrinsics. These optimizations are turned *off* by default.
- If the `simd` feature is enabled, an unstable version of Rust is required (we test with `nightly-2020-11-24`)
+ If the `simd` feature is enabled, an unstable version of Rust is required (we test with `nightly-2021-03-24`)
 * `flight` which contains useful functions to convert between the Flight wire format and Arrow data
 * `prettyprint` which is a utility for printing record batches
 

From bb53986a6b7fd486da5c4d0f24de3095c2a95630 Mon Sep 17 00:00:00 2001
From: Ximo Guanter <ximo.guanter@gmail.com>
Date: Sat, 17 Apr 2021 15:35:29 -0600
Subject: [PATCH 080/719] ARROW-12334: [Rust] [Ballista] Aggregate queries
 producing incorrect results

The function that calculated job status from the task status was aggregating all of the partition locations. This is incorrect. Only the partitions of the last stage should be collected.

@andygrove, could you confirm that my assumption that the output of any query is contained in the last stage? If there is a possibility of having multiple output stages, then this fix is incorrect.

Closes #10083 from edrevo/fix-shuffle-reads

Authored-by: Ximo Guanter <ximo.guanter@gmail.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 rust/ballista/rust/scheduler/src/state/mod.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rust/ballista/rust/scheduler/src/state/mod.rs b/rust/ballista/rust/scheduler/src/state/mod.rs
index 614da05c0aa..794e58fb376 100644
--- a/rust/ballista/rust/scheduler/src/state/mod.rs
+++ b/rust/ballista/rust/scheduler/src/state/mod.rs
@@ -349,6 +349,15 @@ impl SchedulerState {
         }
 
         // Check for job completion
+        let last_stage = statuses
+            .iter()
+            .map(|task| task.partition_id.as_ref().unwrap().stage_id)
+            .max()
+            .unwrap();
+        let statuses: Vec<_> = statuses
+            .into_iter()
+            .filter(|task| task.partition_id.as_ref().unwrap().stage_id == last_stage)
+            .collect();
         let mut job_status = statuses
             .iter()
             .map(|status| match &status.status {

From 7ad51bef8841f5c316fc304fc756c70d89cbf9d9 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Sat, 17 Apr 2021 16:27:37 -0600
Subject: [PATCH 081/719] ARROW-12437: [Rust] [Ballista] Create DataFusion
 context without repartition

Ballista plans must not include `RepartitionExec` because it results in incorrect results. Ballista needs to manage it's own repartitioning in a distributed-aware way later on. For now, we just need to configure the DataFusion context to disable repartition.

Closes #10086 from andygrove/ballista-no-repartition

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 rust/ballista/rust/client/src/context.rs |  8 ++++----
 rust/ballista/rust/core/src/utils.rs     | 18 ++++++++++++++++++
 rust/ballista/rust/scheduler/src/lib.rs  |  6 +++---
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/rust/ballista/rust/client/src/context.rs b/rust/ballista/rust/client/src/context.rs
index 0556c2948da..400f6b6183e 100644
--- a/rust/ballista/rust/client/src/context.rs
+++ b/rust/ballista/rust/client/src/context.rs
@@ -33,11 +33,11 @@ use ballista_core::{
     datasource::DFTableAdapter,
     error::{BallistaError, Result},
     memory_stream::MemoryStream,
+    utils::create_datafusion_context,
 };
 
 use arrow::datatypes::Schema;
 use datafusion::catalog::TableReference;
-use datafusion::execution::context::ExecutionContext;
 use datafusion::logical_plan::{DFSchema, Expr, LogicalPlan, Partitioning};
 use datafusion::physical_plan::csv::CsvReadOptions;
 use datafusion::{dataframe::DataFrame, physical_plan::RecordBatchStream};
@@ -94,7 +94,7 @@ impl BallistaContext {
         let path = fs::canonicalize(&path)?;
 
         // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = ExecutionContext::new();
+        let mut ctx = create_datafusion_context();
         let df = ctx.read_parquet(path.to_str().unwrap())?;
         Ok(BallistaDataFrame::from(self.state.clone(), df))
     }
@@ -111,7 +111,7 @@ impl BallistaContext {
         let path = fs::canonicalize(&path)?;
 
         // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = ExecutionContext::new();
+        let mut ctx = create_datafusion_context();
         let df = ctx.read_csv(path.to_str().unwrap(), options)?;
         Ok(BallistaDataFrame::from(self.state.clone(), df))
     }
@@ -143,7 +143,7 @@ impl BallistaContext {
     /// Create a DataFrame from a SQL statement
     pub fn sql(&self, sql: &str) -> Result<BallistaDataFrame> {
         // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = ExecutionContext::new();
+        let mut ctx = create_datafusion_context();
         // register tables
         let state = self.state.lock().unwrap();
         for (name, plan) in &state.tables {
diff --git a/rust/ballista/rust/core/src/utils.rs b/rust/ballista/rust/core/src/utils.rs
index d1c239a585e..ee9c9557e78 100644
--- a/rust/ballista/rust/core/src/utils.rs
+++ b/rust/ballista/rust/core/src/utils.rs
@@ -33,7 +33,11 @@ use arrow::datatypes::{DataType, Field};
 use arrow::ipc::reader::FileReader;
 use arrow::ipc::writer::FileWriter;
 use arrow::record_batch::RecordBatch;
+use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
 use datafusion::logical_plan::Operator;
+use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches;
+use datafusion::physical_optimizer::merge_exec::AddMergeExec;
+use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule;
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion::physical_plan::csv::CsvExec;
 use datafusion::physical_plan::expressions::{BinaryExpr, Column, Literal};
@@ -307,3 +311,17 @@ fn build_exec_plan_diagram(
     }
     Ok(node_id)
 }
+
+/// Create a DataFusion context that is compatible with Ballista
+pub fn create_datafusion_context() -> ExecutionContext {
+    // remove Repartition rule because that isn't supported yet
+    let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
+        Arc::new(CoalesceBatches::new()),
+        Arc::new(AddMergeExec::new()),
+    ];
+    let config = ExecutionConfig::new()
+        .with_concurrency(1)
+        .with_repartition_joins(false)
+        .with_physical_optimizer_rules(rules);
+    ExecutionContext::with_config(config)
+}
diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
index 54733e32e5e..de49bc01f36 100644
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ b/rust/ballista/rust/scheduler/src/lib.rs
@@ -60,12 +60,12 @@ impl parse_arg::ParseArgFromStr for ConfigBackend {
 
 use crate::planner::DistributedPlanner;
 
-use datafusion::execution::context::ExecutionContext;
 use log::{debug, error, info, warn};
 use rand::{distributions::Alphanumeric, thread_rng, Rng};
 use tonic::{Request, Response};
 
 use self::state::{ConfigBackendClient, SchedulerState};
+use ballista_core::utils::create_datafusion_context;
 use datafusion::physical_plan::parquet::ParquetExec;
 use std::time::{Instant, SystemTime, UNIX_EPOCH};
 
@@ -254,7 +254,7 @@ impl SchedulerGrpc for SchedulerServer {
                 Query::Sql(sql) => {
                     //TODO we can't just create a new context because we need a context that has
                     // tables registered from previous SQL statements that have been executed
-                    let mut ctx = ExecutionContext::new();
+                    let mut ctx = create_datafusion_context();
                     let df = ctx.sql(&sql).map_err(|e| {
                         let msg = format!("Error parsing SQL: {}", e);
                         error!("{}", msg);
@@ -303,7 +303,7 @@ impl SchedulerGrpc for SchedulerServer {
             let job_id_spawn = job_id.clone();
             tokio::spawn(async move {
                 // create physical plan using DataFusion
-                let datafusion_ctx = ExecutionContext::new();
+                let datafusion_ctx = create_datafusion_context();
                 macro_rules! fail_job {
                     ($code :expr) => {{
                         match $code {

From 27c4fa20a7bc24f53c2d849d21855d8784e1f45f Mon Sep 17 00:00:00 2001
From: "Heres, Daniel" <danielheres@gmail.com>
Date: Sat, 17 Apr 2021 16:43:35 -0600
Subject: [PATCH 082/719] ARROW-12421: [Rust] [DataFusion] Fix topkexec failure

This reproduces the test failure for the `TopKExec` test that @andygrove was seeing in combination with a 24-core machine.

Also FYI @alamb

(Marked as ready for review to trigger CI)

Closes #10077 from Dandandan/top_k_exec_fail

Authored-by: Heres, Daniel <danielheres@gmail.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 rust/datafusion/src/execution/context.rs   |  2 +
 rust/datafusion/tests/user_defined_plan.rs | 50 +++++++++-------------
 2 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index b0f86ec1f97..c83ca4d8de5 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -59,6 +59,7 @@ use crate::optimizer::optimizer::OptimizerRule;
 use crate::optimizer::projection_push_down::ProjectionPushDown;
 use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
 use crate::physical_optimizer::merge_exec::AddMergeExec;
+use crate::physical_optimizer::repartition::Repartition;
 
 use crate::physical_plan::csv::CsvReadOptions;
 use crate::physical_plan::planner::DefaultPhysicalPlanner;
@@ -642,6 +643,7 @@ impl ExecutionConfig {
             ],
             physical_optimizers: vec![
                 Arc::new(CoalesceBatches::new()),
+                Arc::new(Repartition::new()),
                 Arc::new(AddMergeExec::new()),
             ],
             query_planner: Arc::new(DefaultQueryPlanner {}),
diff --git a/rust/datafusion/tests/user_defined_plan.rs b/rust/datafusion/tests/user_defined_plan.rs
index aae5c597d82..f9f24430104 100644
--- a/rust/datafusion/tests/user_defined_plan.rs
+++ b/rust/datafusion/tests/user_defined_plan.rs
@@ -58,7 +58,7 @@
 //! N elements, reducing the total amount of required buffer memory.
 //!
 
-use futures::{FutureExt, Stream, StreamExt, TryStreamExt};
+use futures::{Stream, StreamExt};
 
 use arrow::{
     array::{Int64Array, StringArray},
@@ -180,6 +180,7 @@ async fn topk_plan() -> Result<()> {
 fn make_topk_context() -> ExecutionContext {
     let config = ExecutionConfig::new()
         .with_query_planner(Arc::new(TopKQueryPlanner {}))
+        .with_concurrency(48)
         .add_optimizer_rule(Arc::new(TopKOptimizerRule {}));
 
     ExecutionContext::with_config(config)
@@ -388,6 +389,7 @@ impl ExecutionPlan for TopKExec {
             input: self.input.execute(partition).await?,
             k: self.k,
             done: false,
+            state: BTreeMap::new(),
         }))
     }
 }
@@ -400,6 +402,8 @@ struct TopKReader {
     k: usize,
     /// Have we produced the output yet?
     done: bool,
+    /// Output
+    state: BTreeMap<i64, String>,
 }
 
 /// Keeps track of the revenue from customer_id and stores if it
@@ -432,7 +436,7 @@ fn accumulate_batch(
     input_batch: &RecordBatch,
     mut top_values: BTreeMap<i64, String>,
     k: &usize,
-) -> Result<BTreeMap<i64, String>> {
+) -> BTreeMap<i64, String> {
     let num_rows = input_batch.num_rows();
     // Assuming the input columns are
     // column[0]: customer_id / UTF8
@@ -457,7 +461,7 @@ fn accumulate_batch(
             k,
         );
     }
-    Ok(top_values)
+    top_values
 }
 
 impl Stream for TopKReader {
@@ -475,41 +479,29 @@ impl Stream for TopKReader {
         // take this as immutable
         let k = self.k;
         let schema = self.schema();
-        let top_values = self
-            .input
-            .as_mut()
-            // Hard coded implementation for sales / customer_id example as BTree
-            .try_fold(
-                BTreeMap::<i64, String>::new(),
-                move |top_values, batch| async move {
-                    accumulate_batch(&batch, top_values, &k)
-                        .map_err(DataFusionError::into_arrow_external_error)
-                },
-            );
-
-        let top_values = top_values.map(|top_values| match top_values {
-            Ok(top_values) => {
-                // make output by walking over the map backwards (so values are descending)
+        let poll = self.input.poll_next_unpin(cx);
+
+        match poll {
+            Poll::Ready(Some(Ok(batch))) => {
+                self.state = accumulate_batch(&batch, self.state.clone(), &k);
+                Poll::Ready(Some(Ok(RecordBatch::new_empty(schema))))
+            }
+            Poll::Ready(None) => {
+                self.done = true;
                 let (revenue, customer): (Vec<i64>, Vec<&String>) =
-                    top_values.iter().rev().unzip();
+                    self.state.iter().rev().unzip();
 
                 let customer: Vec<&str> = customer.iter().map(|&s| &**s).collect();
-                Ok(RecordBatch::try_new(
+                Poll::Ready(Some(RecordBatch::try_new(
                     schema,
                     vec![
                         Arc::new(StringArray::from(customer)),
                         Arc::new(Int64Array::from(revenue)),
                     ],
-                )?)
+                )))
             }
-            Err(e) => Err(e),
-        });
-        let mut top_values = Box::pin(top_values.into_stream());
-
-        top_values.poll_next_unpin(cx).map(|batch| {
-            self.done = true;
-            batch
-        })
+            other => other,
+        }
     }
 }
 

From be8b48608c88440dea04e593a1ac96bb63bda4d9 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Sun, 18 Apr 2021 06:48:42 -0400
Subject: [PATCH 083/719] ARROW-12398: [Rust] remove redundant bound check in
 iterators

This PR removes the bound checks as discussed in #9994.

Furthermore I added `unsafe` versions of the `value` method to `PrimitiveArray` and `BooleanArray`. The `safe` marked methods are actually `unsafe`. This way we can slowly transition to explicitly using the `unsafe` variant and later make the "safe" one truly safe.

For the time being I also added a `debug_assert` bounds check in those "safe" methods that are `unsafe`. That way we at least get a panic in debug mode instead of UB in safe code.

Closes #10046 from ritchie46/iterator_bounds

Authored-by: Ritchie Vink <ritchie46@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/array/array_boolean.rs   | 13 ++++-
 rust/arrow/src/array/array_primitive.rs | 14 ++++-
 rust/arrow/src/array/iterator.rs        | 70 +++++++++++++++++++++----
 3 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs
index 2512a956db4..67af85d167f 100644
--- a/rust/arrow/src/array/array_boolean.rs
+++ b/rust/arrow/src/array/array_boolean.rs
@@ -67,12 +67,21 @@ impl BooleanArray {
         &self.data.buffers()[0]
     }
 
+    /// Returns the boolean value at index `i`.
+    ///
+    /// # Safety
+    /// This doesn't check bounds, the caller must ensure that index < self.len()
+    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
+        let offset = i + self.offset();
+        bit_util::get_bit_raw(self.raw_values.as_ptr(), offset)
+    }
+
     /// Returns the boolean value at index `i`.
     ///
     /// Note this doesn't do any bound checking, for performance reason.
     pub fn value(&self, i: usize) -> bool {
-        let offset = i + self.offset();
-        unsafe { bit_util::get_bit_raw(self.raw_values.as_ptr(), offset) }
+        debug_assert!(i < self.len());
+        unsafe { self.value_unchecked(i) }
     }
 }
 
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
index 2280952f12c..d2b3b6686d9 100644
--- a/rust/arrow/src/array/array_primitive.rs
+++ b/rust/arrow/src/array/array_primitive.rs
@@ -88,14 +88,24 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         PrimitiveBuilder::<T>::new(capacity)
     }
 
+    /// Returns the primitive value at index `i`.
+    ///
+    /// # Safety
+    ///
+    /// caller must ensure that the passed in offset is less than the array len()
+    pub unsafe fn value_unchecked(&self, i: usize) -> T::Native {
+        let offset = i + self.offset();
+        *self.raw_values.as_ptr().add(offset)
+    }
+
     /// Returns the primitive value at index `i`.
     ///
     /// Note this doesn't do any bound checking, for performance reason.
     /// # Safety
     /// caller must ensure that the passed in offset is less than the array len()
     pub fn value(&self, i: usize) -> T::Native {
-        let offset = i + self.offset();
-        unsafe { *self.raw_values.as_ptr().add(offset) }
+        debug_assert!(i < self.len());
+        unsafe { self.value_unchecked(i) }
     }
 
     /// Creates a PrimitiveArray based on an iterator of values without nulls
diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs
index 28dbe3d55c6..d97aa16744c 100644
--- a/rust/arrow/src/array/iterator.rs
+++ b/rust/arrow/src/array/iterator.rs
@@ -56,7 +56,12 @@ impl<'a, T: ArrowPrimitiveType> std::iter::Iterator for PrimitiveIter<'a, T> {
         } else {
             let old = self.current;
             self.current += 1;
-            Some(Some(self.array.value(old)))
+            // Safety:
+            // we just checked bounds in `self.current_end == self.current`
+            // this is safe on the premise that this struct is initialized with
+            // current = array.len()
+            // and that current_end is ever only decremented
+            unsafe { Some(Some(self.array.value_unchecked(old))) }
         }
     }
 
@@ -77,7 +82,12 @@ impl<'a, T: ArrowPrimitiveType> std::iter::DoubleEndedIterator for PrimitiveIter
             Some(if self.array.is_null(self.current_end) {
                 None
             } else {
-                Some(self.array.value(self.current_end))
+                // Safety:
+                // we just checked bounds in `self.current_end == self.current`
+                // this is safe on the premise that this struct is initialized with
+                // current = array.len()
+                // and that current_end is ever only decremented
+                unsafe { Some(self.array.value_unchecked(self.current_end)) }
             })
         }
     }
@@ -118,7 +128,12 @@ impl<'a> std::iter::Iterator for BooleanIter<'a> {
         } else {
             let old = self.current;
             self.current += 1;
-            Some(Some(self.array.value(old)))
+            // Safety:
+            // we just checked bounds in `self.current_end == self.current`
+            // this is safe on the premise that this struct is initialized with
+            // current = array.len()
+            // and that current_end is ever only decremented
+            unsafe { Some(Some(self.array.value_unchecked(old))) }
         }
     }
 
@@ -139,7 +154,12 @@ impl<'a> std::iter::DoubleEndedIterator for BooleanIter<'a> {
             Some(if self.array.is_null(self.current_end) {
                 None
             } else {
-                Some(self.array.value(self.current_end))
+                // Safety:
+                // we just checked bounds in `self.current_end == self.current`
+                // this is safe on the premise that this struct is initialized with
+                // current = array.len()
+                // and that current_end is ever only decremented
+                unsafe { Some(self.array.value_unchecked(self.current_end)) }
             })
         }
     }
@@ -182,7 +202,12 @@ impl<'a, T: StringOffsetSizeTrait> std::iter::Iterator for GenericStringIter<'a,
             Some(None)
         } else {
             self.current += 1;
-            Some(Some(self.array.value(i)))
+            // Safety:
+            // we just checked bounds in `self.current_end == self.current`
+            // this is safe on the premise that this struct is initialized with
+            // current = array.len()
+            // and that current_end is ever only decremented
+            unsafe { Some(Some(self.array.value_unchecked(i))) }
         }
     }
 
@@ -205,7 +230,12 @@ impl<'a, T: StringOffsetSizeTrait> std::iter::DoubleEndedIterator
             Some(if self.array.is_null(self.current_end) {
                 None
             } else {
-                Some(self.array.value(self.current_end))
+                // Safety:
+                // we just checked bounds in `self.current_end == self.current`
+                // this is safe on the premise that this struct is initialized with
+                // current = array.len()
+                // and that current_end is ever only decremented
+                unsafe { Some(self.array.value_unchecked(self.current_end)) }
             })
         }
     }
@@ -251,7 +281,12 @@ impl<'a, T: BinaryOffsetSizeTrait> std::iter::Iterator for GenericBinaryIter<'a,
             Some(None)
         } else {
             self.current += 1;
-            Some(Some(self.array.value(i)))
+            // Safety:
+            // we just checked bounds in `self.current_end == self.current`
+            // this is safe on the premise that this struct is initialized with
+            // current = array.len()
+            // and that current_end is ever only decremented
+            unsafe { Some(Some(self.array.value_unchecked(i))) }
         }
     }
 
@@ -274,7 +309,12 @@ impl<'a, T: BinaryOffsetSizeTrait> std::iter::DoubleEndedIterator
             Some(if self.array.is_null(self.current_end) {
                 None
             } else {
-                Some(self.array.value(self.current_end))
+                // Safety:
+                // we just checked bounds in `self.current_end == self.current`
+                // this is safe on the premise that this struct is initialized with
+                // current = array.len()
+                // and that current_end is ever only decremented
+                unsafe { Some(self.array.value_unchecked(self.current_end)) }
             })
         }
     }
@@ -318,7 +358,12 @@ impl<'a, S: OffsetSizeTrait> std::iter::Iterator for GenericListArrayIter<'a, S>
             Some(None)
         } else {
             self.current += 1;
-            Some(Some(self.array.value(i)))
+            // Safety:
+            // we just checked bounds in `self.current_end == self.current`
+            // this is safe on the premise that this struct is initialized with
+            // current = array.len()
+            // and that current_end is ever only decremented
+            unsafe { Some(Some(self.array.value_unchecked(i))) }
         }
     }
 
@@ -341,7 +386,12 @@ impl<'a, S: OffsetSizeTrait> std::iter::DoubleEndedIterator
             Some(if self.array.is_null(self.current_end) {
                 None
             } else {
-                Some(self.array.value(self.current_end))
+                // Safety:
+                // we just checked bounds in `self.current_end == self.current`
+                // this is safe on the premise that this struct is initialized with
+                // current = array.len()
+                // and that current_end is ever only decremented
+                unsafe { Some(self.array.value_unchecked(self.current_end)) }
             })
         }
     }

From 7e3deb556fb87b4bc86a79053f5e2418ea5e3513 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Date: Sun, 18 Apr 2021 06:52:52 -0400
Subject: [PATCH 084/719] ARROW-12425: [Rust] Fix new_null_array dictionary
 creation

It is my understanding that an arrow array should always have a backing values array, even if the content is all nulls. new_null_array currently violates this as it doesn't allocate the backing store for DictionaryArrays. This causes the concat kernel, and possibly others, to panic with index violations

Signed-off-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>

Closes #10072 from tustvold/null-dictionary-creation

Authored-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/array/array.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index 63d41dffd82..95a3117417e 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -421,14 +421,17 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
         DataType::Union(_) => {
             unimplemented!("Creating null Union array not yet supported")
         }
-        DataType::Dictionary(_, value) => {
+        DataType::Dictionary(key, value) => {
+            let keys = new_null_array(key, length);
+            let keys = keys.data();
+
             make_array(ArrayData::new(
                 data_type.clone(),
                 length,
                 Some(length),
-                Some(MutableBuffer::new_null(length).into()),
+                keys.null_buffer().cloned(),
                 0,
-                vec![MutableBuffer::new(0).into()], // values are empty
+                keys.buffers().into(),
                 vec![new_empty_array(value.as_ref()).data().clone()],
             ))
         }
@@ -629,5 +632,9 @@ mod tests {
 
         let null_array = new_null_array(array.data_type(), 9);
         assert_eq!(&array, &null_array);
+        assert_eq!(
+            array.data().buffers()[0].len(),
+            null_array.data().buffers()[0].len()
+        );
     }
 }

From 6c1f6cce4b3e27c4bfca62aa8afb2345baf2fcfb Mon Sep 17 00:00:00 2001
From: Ximo Guanter <ximo.guanter@gmail.com>
Date: Sun, 18 Apr 2021 07:22:05 -0600
Subject: [PATCH 085/719] ARROW-12436: [Rust][Ballista] Add watch capabilities
 to config backend trait

A small next step towards enabling HA in the scheduler. UT + ITs pass.

cc @andygrove

Closes #10085 from edrevo/state-watch

Authored-by: Ximo Guanter <ximo.guanter@gmail.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 rust/ballista/rust/scheduler/src/lib.rs       |  80 ++--
 .../ballista/rust/scheduler/src/state/etcd.rs |  90 ++++-
 rust/ballista/rust/scheduler/src/state/mod.rs | 348 ++++++++++--------
 .../rust/scheduler/src/state/standalone.rs    |  73 +++-
 4 files changed, 377 insertions(+), 214 deletions(-)

diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
index de49bc01f36..a675153897b 100644
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ b/rust/ballista/rust/scheduler/src/lib.rs
@@ -71,8 +71,7 @@ use std::time::{Instant, SystemTime, UNIX_EPOCH};
 
 #[derive(Clone)]
 pub struct SchedulerServer {
-    state: SchedulerState,
-    namespace: String,
+    state: Arc<SchedulerState>,
     start_time: u128,
     version: String,
 }
@@ -80,10 +79,14 @@ pub struct SchedulerServer {
 impl SchedulerServer {
     pub fn new(config: Arc<dyn ConfigBackendClient>, namespace: String) -> Self {
         const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
+        let state = Arc::new(SchedulerState::new(config, namespace));
+        let state_clone = state.clone();
+
+        // TODO: we should elect a leader in the scheduler cluster and run this only in the leader
+        tokio::spawn(async move { state_clone.synchronize_job_status_loop().await });
 
         Self {
-            state: SchedulerState::new(config),
-            namespace,
+            state,
             start_time: SystemTime::now()
                 .duration_since(UNIX_EPOCH)
                 .unwrap()
@@ -102,7 +105,7 @@ impl SchedulerGrpc for SchedulerServer {
         info!("Received get_executors_metadata request");
         let result = self
             .state
-            .get_executors_metadata(self.namespace.as_str())
+            .get_executors_metadata()
             .await
             .map_err(|e| {
                 let msg = format!("Error reading executors metadata: {}", e);
@@ -135,17 +138,16 @@ impl SchedulerGrpc for SchedulerServer {
                 tonic::Status::internal(msg)
             })?;
             self.state
-                .save_executor_metadata(&self.namespace, metadata.clone())
+                .save_executor_metadata(metadata.clone())
                 .await
                 .map_err(|e| {
                     let msg = format!("Could not save executor metadata: {}", e);
                     error!("{}", msg);
                     tonic::Status::internal(msg)
                 })?;
-            let task_status_empty = task_status.is_empty();
             for task_status in task_status {
                 self.state
-                    .save_task_status(&self.namespace, &task_status)
+                    .save_task_status(&task_status)
                     .await
                     .map_err(|e| {
                         let msg = format!("Could not save task status: {}", e);
@@ -156,7 +158,7 @@ impl SchedulerGrpc for SchedulerServer {
             let task = if can_accept_task {
                 let plan = self
                     .state
-                    .assign_next_schedulable_task(&self.namespace, &metadata.id)
+                    .assign_next_schedulable_task(&metadata.id)
                     .await
                     .map_err(|e| {
                         let msg = format!("Error finding next assignable task: {}", e);
@@ -180,12 +182,6 @@ impl SchedulerGrpc for SchedulerServer {
             } else {
                 None
             };
-            // TODO: this should probably happen asynchronously with a watch on etc/sled
-            if !task_status_empty {
-                if let Err(e) = self.state.synchronize_job_status(&self.namespace).await {
-                    warn!("Could not synchronize jobs and tasks state: {}", e);
-                }
-            }
             lock.unlock().await;
             Ok(Response::new(PollWorkResult { task }))
         } else {
@@ -264,15 +260,11 @@ impl SchedulerGrpc for SchedulerServer {
                 }
             };
             debug!("Received plan for execution: {:?}", plan);
-            let executors = self
-                .state
-                .get_executors_metadata(&self.namespace)
-                .await
-                .map_err(|e| {
-                    let msg = format!("Error reading executors metadata: {}", e);
-                    error!("{}", msg);
-                    tonic::Status::internal(msg)
-                })?;
+            let executors = self.state.get_executors_metadata().await.map_err(|e| {
+                let msg = format!("Error reading executors metadata: {}", e);
+                error!("{}", msg);
+                tonic::Status::internal(msg)
+            })?;
             debug!("Found executors: {:?}", executors);
 
             let job_id: String = {
@@ -287,7 +279,6 @@ impl SchedulerGrpc for SchedulerServer {
             // Save placeholder job metadata
             self.state
                 .save_job_metadata(
-                    &self.namespace,
                     &job_id,
                     &JobStatus {
                         status: Some(job_status::Status::Queued(QueuedJob {})),
@@ -298,7 +289,6 @@ impl SchedulerGrpc for SchedulerServer {
                     tonic::Status::internal(format!("Could not save job metadata: {}", e))
                 })?;
 
-            let namespace = self.namespace.to_owned();
             let state = self.state.clone();
             let job_id_spawn = job_id.clone();
             tokio::spawn(async move {
@@ -311,7 +301,6 @@ impl SchedulerGrpc for SchedulerServer {
                                 warn!("Job {} failed with {}", job_id_spawn, error);
                                 state
                                     .save_job_metadata(
-                                        &namespace,
                                         &job_id_spawn,
                                         &JobStatus {
                                             status: Some(job_status::Status::Failed(
@@ -358,7 +347,6 @@ impl SchedulerGrpc for SchedulerServer {
                 // create distributed physical plan using Ballista
                 if let Err(e) = state
                     .save_job_metadata(
-                        &namespace,
                         &job_id_spawn,
                         &JobStatus {
                             status: Some(job_status::Status::Running(RunningJob {})),
@@ -389,7 +377,6 @@ impl SchedulerGrpc for SchedulerServer {
                 for stage in stages {
                     fail_job!(state
                         .save_stage_plan(
-                            &namespace,
                             &job_id_spawn,
                             stage.stage_id,
                             stage.child.clone()
@@ -410,14 +397,13 @@ impl SchedulerGrpc for SchedulerServer {
                             }),
                             status: None,
                         };
-                        fail_job!(state
-                            .save_task_status(&namespace, &pending_status)
-                            .await
-                            .map_err(|e| {
+                        fail_job!(state.save_task_status(&pending_status).await.map_err(
+                            |e| {
                                 let msg = format!("Could not save task status: {}", e);
                                 error!("{}", msg);
                                 tonic::Status::internal(msg)
-                            }));
+                            }
+                        ));
                     }
                 }
             });
@@ -434,15 +420,11 @@ impl SchedulerGrpc for SchedulerServer {
     ) -> std::result::Result<Response<GetJobStatusResult>, tonic::Status> {
         let job_id = request.into_inner().job_id;
         debug!("Received get_job_status request for job {}", job_id);
-        let job_meta = self
-            .state
-            .get_job_metadata(&self.namespace, &job_id)
-            .await
-            .map_err(|e| {
-                let msg = format!("Error reading job metadata: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?;
+        let job_meta = self.state.get_job_metadata(&job_id).await.map_err(|e| {
+            let msg = format!("Error reading job metadata: {}", e);
+            error!("{}", msg);
+            tonic::Status::internal(msg)
+        })?;
         Ok(Response::new(GetJobStatusResult {
             status: Some(job_meta),
         }))
@@ -468,7 +450,7 @@ mod test {
         let state = Arc::new(StandaloneClient::try_new_temporary()?);
         let namespace = "default";
         let scheduler = SchedulerServer::new(state.clone(), namespace.to_owned());
-        let state = SchedulerState::new(state);
+        let state = SchedulerState::new(state, namespace.to_string());
         let exec_meta = ExecutorMetadata {
             id: "abc".to_owned(),
             host: "".to_owned(),
@@ -487,10 +469,7 @@ mod test {
         // no response task since we told the scheduler we didn't want to accept one
         assert!(response.task.is_none());
         // executor should be registered
-        assert_eq!(
-            state.get_executors_metadata(namespace).await.unwrap().len(),
-            1
-        );
+        assert_eq!(state.get_executors_metadata().await.unwrap().len(), 1);
 
         let request: Request<PollWorkParams> = Request::new(PollWorkParams {
             metadata: Some(exec_meta.clone()),
@@ -505,10 +484,7 @@ mod test {
         // still no response task since there are no tasks in the scheduelr
         assert!(response.task.is_none());
         // executor should be registered
-        assert_eq!(
-            state.get_executors_metadata(namespace).await.unwrap().len(),
-            1
-        );
+        assert_eq!(state.get_executors_metadata().await.unwrap().len(), 1);
         Ok(())
     }
 }
diff --git a/rust/ballista/rust/scheduler/src/state/etcd.rs b/rust/ballista/rust/scheduler/src/state/etcd.rs
index ced24613ebd..807477d8699 100644
--- a/rust/ballista/rust/scheduler/src/state/etcd.rs
+++ b/rust/ballista/rust/scheduler/src/state/etcd.rs
@@ -17,15 +17,18 @@
 
 //! Etcd config backend.
 
-use std::time::Duration;
+use std::{task::Poll, time::Duration};
 
 use crate::state::ConfigBackendClient;
 use ballista_core::error::{ballista_error, Result};
 
-use etcd_client::{GetOptions, LockResponse, PutOptions};
+use etcd_client::{
+    GetOptions, LockResponse, PutOptions, WatchOptions, WatchStream, Watcher,
+};
+use futures::{Stream, StreamExt};
 use log::warn;
 
-use super::Lock;
+use super::{Lock, Watch, WatchEvent};
 
 /// A [`ConfigBackendClient`] implementation that uses etcd to save cluster configuration.
 #[derive(Clone)]
@@ -105,6 +108,87 @@ impl ConfigBackendClient for EtcdClient {
             })?;
         Ok(Box::new(EtcdLockGuard { etcd, lock }))
     }
+
+    async fn watch(&self, prefix: String) -> Result<Box<dyn Watch>> {
+        let mut etcd = self.etcd.clone();
+        let options = WatchOptions::new().with_prefix();
+        let (watcher, stream) = etcd.watch(prefix, Some(options)).await.map_err(|e| {
+            warn!("etcd watch failed: {}", e);
+            ballista_error("etcd watch failed")
+        })?;
+        Ok(Box::new(EtcdWatch {
+            watcher,
+            stream,
+            buffered_events: Vec::new(),
+        }))
+    }
+}
+
+struct EtcdWatch {
+    watcher: Watcher,
+    stream: WatchStream,
+    buffered_events: Vec<WatchEvent>,
+}
+
+#[tonic::async_trait]
+impl Watch for EtcdWatch {
+    async fn cancel(&mut self) -> Result<()> {
+        self.watcher.cancel().await.map_err(|e| {
+            warn!("etcd watch cancel failed: {}", e);
+            ballista_error("etcd watch cancel failed")
+        })
+    }
+}
+
+impl Stream for EtcdWatch {
+    type Item = WatchEvent;
+
+    fn poll_next(
+        self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        let self_mut = self.get_mut();
+        if let Some(event) = self_mut.buffered_events.pop() {
+            Poll::Ready(Some(event))
+        } else {
+            loop {
+                match self_mut.stream.poll_next_unpin(cx) {
+                    Poll::Ready(Some(Err(e))) => {
+                        warn!("Error when watching etcd prefix: {}", e);
+                        continue;
+                    }
+                    Poll::Ready(Some(Ok(v))) => {
+                        self_mut.buffered_events.extend(v.events().iter().map(|ev| {
+                            match ev.event_type() {
+                                etcd_client::EventType::Put => {
+                                    let kv = ev.kv().unwrap();
+                                    WatchEvent::Put(
+                                        kv.key_str().unwrap().to_string(),
+                                        kv.value().to_owned(),
+                                    )
+                                }
+                                etcd_client::EventType::Delete => {
+                                    let kv = ev.kv().unwrap();
+                                    WatchEvent::Delete(kv.key_str().unwrap().to_string())
+                                }
+                            }
+                        }));
+                        if let Some(event) = self_mut.buffered_events.pop() {
+                            return Poll::Ready(Some(event));
+                        } else {
+                            continue;
+                        }
+                    }
+                    Poll::Ready(None) => return Poll::Ready(None),
+                    Poll::Pending => return Poll::Pending,
+                }
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.stream.size_hint()
+    }
 }
 
 struct EtcdLockGuard {
diff --git a/rust/ballista/rust/scheduler/src/state/mod.rs b/rust/ballista/rust/scheduler/src/state/mod.rs
index 794e58fb376..a15efd618ff 100644
--- a/rust/ballista/rust/scheduler/src/state/mod.rs
+++ b/rust/ballista/rust/scheduler/src/state/mod.rs
@@ -20,7 +20,8 @@ use std::{
 };
 
 use datafusion::physical_plan::ExecutionPlan;
-use log::{debug, info};
+use futures::{Stream, StreamExt};
+use log::{debug, error, info};
 use prost::Message;
 use tokio::sync::OwnedMutexGuard;
 
@@ -69,27 +70,46 @@ pub trait ConfigBackendClient: Send + Sync {
     ) -> Result<()>;
 
     async fn lock(&self) -> Result<Box<dyn Lock>>;
+
+    /// Watch all events that happen on a specific prefix.
+    async fn watch(&self, prefix: String) -> Result<Box<dyn Watch>>;
+}
+
+/// A Watch is a cancelable stream of put or delete events in the [ConfigBackendClient]
+#[tonic::async_trait]
+pub trait Watch: Stream<Item = WatchEvent> + Send + Unpin {
+    async fn cancel(&mut self) -> Result<()>;
+}
+
+#[derive(Debug, PartialEq)]
+pub enum WatchEvent {
+    /// Contains the inserted or updated key and the new value
+    Put(String, Vec<u8>),
+
+    /// Contains the deleted key
+    Delete(String),
 }
 
 #[derive(Clone)]
 pub(super) struct SchedulerState {
     config_client: Arc<dyn ConfigBackendClient>,
+    namespace: String,
 }
 
 impl SchedulerState {
-    pub fn new(config_client: Arc<dyn ConfigBackendClient>) -> Self {
-        Self { config_client }
+    pub fn new(config_client: Arc<dyn ConfigBackendClient>, namespace: String) -> Self {
+        Self {
+            config_client,
+            namespace,
+        }
     }
 
-    pub async fn get_executors_metadata(
-        &self,
-        namespace: &str,
-    ) -> Result<Vec<ExecutorMeta>> {
+    pub async fn get_executors_metadata(&self) -> Result<Vec<ExecutorMeta>> {
         let mut result = vec![];
 
         let entries = self
             .config_client
-            .get_from_prefix(&get_executors_prefix(namespace))
+            .get_from_prefix(&get_executors_prefix(&self.namespace))
             .await?;
         for (_key, entry) in entries {
             let meta: ExecutorMetadata = decode_protobuf(&entry)?;
@@ -98,12 +118,8 @@ impl SchedulerState {
         Ok(result)
     }
 
-    pub async fn save_executor_metadata(
-        &self,
-        namespace: &str,
-        meta: ExecutorMeta,
-    ) -> Result<()> {
-        let key = get_executor_key(namespace, &meta.id);
+    pub async fn save_executor_metadata(&self, meta: ExecutorMeta) -> Result<()> {
+        let key = get_executor_key(&self.namespace, &meta.id);
         let meta: ExecutorMetadata = meta.into();
         let value: Vec<u8> = encode_protobuf(&meta)?;
         self.config_client.put(key, value, Some(LEASE_TIME)).await
@@ -111,22 +127,17 @@ impl SchedulerState {
 
     pub async fn save_job_metadata(
         &self,
-        namespace: &str,
         job_id: &str,
         status: &JobStatus,
     ) -> Result<()> {
         debug!("Saving job metadata: {:?}", status);
-        let key = get_job_key(namespace, job_id);
+        let key = get_job_key(&self.namespace, job_id);
         let value = encode_protobuf(status)?;
         self.config_client.put(key, value, None).await
     }
 
-    pub async fn get_job_metadata(
-        &self,
-        namespace: &str,
-        job_id: &str,
-    ) -> Result<JobStatus> {
-        let key = get_job_key(namespace, job_id);
+    pub async fn get_job_metadata(&self, job_id: &str) -> Result<JobStatus> {
+        let key = get_job_key(&self.namespace, job_id);
         let value = &self.config_client.get(&key).await?;
         if value.is_empty() {
             return Err(BallistaError::General(format!(
@@ -138,14 +149,10 @@ impl SchedulerState {
         Ok(value)
     }
 
-    pub async fn save_task_status(
-        &self,
-        namespace: &str,
-        status: &TaskStatus,
-    ) -> Result<()> {
+    pub async fn save_task_status(&self, status: &TaskStatus) -> Result<()> {
         let partition_id = status.partition_id.as_ref().unwrap();
         let key = get_task_status_key(
-            namespace,
+            &self.namespace,
             &partition_id.job_id,
             partition_id.stage_id as usize,
             partition_id.partition_id as usize,
@@ -156,12 +163,11 @@ impl SchedulerState {
 
     pub async fn _get_task_status(
         &self,
-        namespace: &str,
         job_id: &str,
         stage_id: usize,
         partition_id: usize,
     ) -> Result<TaskStatus> {
-        let key = get_task_status_key(namespace, job_id, stage_id, partition_id);
+        let key = get_task_status_key(&self.namespace, job_id, stage_id, partition_id);
         let value = &self.config_client.clone().get(&key).await?;
         if value.is_empty() {
             return Err(BallistaError::General(format!(
@@ -176,12 +182,11 @@ impl SchedulerState {
     // "Unnecessary" lifetime syntax due to https://github.com/rust-lang/rust/issues/63033
     pub async fn save_stage_plan<'a>(
         &'a self,
-        namespace: &'a str,
         job_id: &'a str,
         stage_id: usize,
         plan: Arc<dyn ExecutionPlan>,
     ) -> Result<()> {
-        let key = get_stage_plan_key(namespace, job_id, stage_id);
+        let key = get_stage_plan_key(&self.namespace, job_id, stage_id);
         let value = {
             let proto: PhysicalPlanNode = plan.try_into()?;
             encode_protobuf(&proto)?
@@ -191,11 +196,10 @@ impl SchedulerState {
 
     pub async fn get_stage_plan(
         &self,
-        namespace: &str,
         job_id: &str,
         stage_id: usize,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let key = get_stage_plan_key(namespace, job_id, stage_id);
+        let key = get_stage_plan_key(&self.namespace, job_id, stage_id);
         let value = &self.config_client.get(&key).await?;
         if value.is_empty() {
             return Err(BallistaError::General(format!(
@@ -209,26 +213,21 @@ impl SchedulerState {
 
     pub async fn assign_next_schedulable_task(
         &self,
-        namespace: &str,
         executor_id: &str,
     ) -> Result<Option<(TaskStatus, Arc<dyn ExecutionPlan>)>> {
         let kvs: HashMap<String, Vec<u8>> = self
             .config_client
-            .get_from_prefix(&get_task_prefix(namespace))
+            .get_from_prefix(&get_task_prefix(&self.namespace))
             .await?
             .into_iter()
             .collect();
-        let executors = self.get_executors_metadata(namespace).await?;
+        let executors = self.get_executors_metadata().await?;
         'tasks: for (_key, value) in kvs.iter() {
             let mut status: TaskStatus = decode_protobuf(&value)?;
             if status.status.is_none() {
                 let partition = status.partition_id.as_ref().unwrap();
                 let plan = self
-                    .get_stage_plan(
-                        namespace,
-                        &partition.job_id,
-                        partition.stage_id as usize,
-                    )
+                    .get_stage_plan(&partition.job_id, partition.stage_id as usize)
                     .await?;
 
                 // Let's try to resolve any unresolved shuffles we find
@@ -242,7 +241,7 @@ impl SchedulerState {
                         for partition_id in 0..unresolved_shuffle.partition_count {
                             let referenced_task = kvs
                                 .get(&get_task_status_key(
-                                    namespace,
+                                    &self.namespace,
                                     &partition.job_id,
                                     stage_id,
                                     partition_id,
@@ -286,7 +285,7 @@ impl SchedulerState {
                 status.status = Some(task_status::Status::Running(RunningTask {
                     executor_id: executor_id.to_owned(),
                 }));
-                self.save_task_status(namespace, &status).await?;
+                self.save_task_status(&status).await?;
                 return Ok(Some((status, plan)));
             }
         }
@@ -298,34 +297,58 @@ impl SchedulerState {
         self.config_client.lock().await
     }
 
-    pub async fn synchronize_job_status(&self, namespace: &str) -> Result<()> {
-        let kvs = self
+    /// This function starts a watch over the task keys. Whenever a task changes, it re-evaluates
+    /// the status for the parent job and updates it accordingly.
+    ///
+    /// The future returned by this function never returns (unless an error happens), so it is wise
+    /// to [tokio::spawn] calls to this method.
+    pub async fn synchronize_job_status_loop(&self) -> Result<()> {
+        let watch = self
+            .config_client
+            .watch(get_task_prefix(&self.namespace))
+            .await?;
+        watch.for_each(|event: WatchEvent| async {
+            let key = match event {
+                WatchEvent::Put(key, _value) => key,
+                WatchEvent::Delete(key) => key
+            };
+            let job_id = extract_job_id_from_task_key(&key).unwrap();
+            match self.lock().await {
+                Ok(mut lock) => {
+                    if let Err(e) = self.synchronize_job_status(job_id).await {
+                        error!("Could not update job status for {}. This job might be stuck forever. Error: {}", job_id, e);
+                    }
+                    lock.unlock().await;
+                },
+                Err(e) => error!("Could not lock config backend. Job {} will have an unsynchronized status and might be stuck forever. Error: {}", job_id, e)
+            }
+        }).await;
+
+        Ok(())
+    }
+
+    async fn synchronize_job_status(&self, job_id: &str) -> Result<()> {
+        let value = self
             .config_client
-            .get_from_prefix(&get_job_prefix(namespace))
+            .get(&get_job_key(&self.namespace, job_id))
             .await?;
         let executors: HashMap<String, ExecutorMeta> = self
-            .get_executors_metadata(namespace)
+            .get_executors_metadata()
             .await?
             .into_iter()
             .map(|meta| (meta.id.to_string(), meta))
             .collect();
-        for (key, value) in kvs {
-            let job_id = extract_job_id_from_key(&key)?;
-            let status: JobStatus = decode_protobuf(&value)?;
-            let new_status = self
-                .get_job_status_from_tasks(namespace, job_id, &executors)
-                .await?;
-            if let Some(new_status) = new_status {
-                if status != new_status {
-                    info!(
-                        "Changing status for job {} to {:?}",
-                        job_id, new_status.status
-                    );
-                    debug!("Old status: {:?}", status);
-                    debug!("New status: {:?}", new_status);
-                    self.save_job_metadata(namespace, job_id, &new_status)
-                        .await?;
-                }
+        let status: JobStatus = decode_protobuf(&value)?;
+        let new_status = self.get_job_status_from_tasks(job_id, &executors).await?;
+        if let Some(new_status) = new_status {
+            if status != new_status {
+                info!(
+                    "Changing status for job {} to {:?}",
+                    job_id, new_status.status
+                );
+                debug!("Old status: {:?}", status);
+                debug!("New status: {:?}", new_status);
+                self.save_job_metadata(job_id, &new_status).await?;
             }
         }
         Ok(())
@@ -333,13 +356,12 @@ impl SchedulerState {
 
     async fn get_job_status_from_tasks(
         &self,
-        namespace: &str,
         job_id: &str,
         executors: &HashMap<String, ExecutorMeta>,
     ) -> Result<Option<JobStatus>> {
         let statuses = self
             .config_client
-            .get_from_prefix(&get_task_prefix_for_job(namespace, job_id))
+            .get_from_prefix(&get_task_prefix_for_job(&self.namespace, job_id))
             .await?
             .into_iter()
             .map(|(_k, v)| decode_protobuf::<TaskStatus>(&v))
@@ -446,12 +468,6 @@ fn get_job_prefix(namespace: &str) -> String {
     format!("/ballista/{}/jobs", namespace)
 }
 
-fn extract_job_id_from_key(job_key: &str) -> Result<&str> {
-    job_key.split('/').nth(4).ok_or_else(|| {
-        BallistaError::Internal(format!("Unexpected job key: {}", job_key))
-    })
-}
-
 fn get_job_key(namespace: &str, id: &str) -> String {
     format!("{}/{}", get_job_prefix(namespace), id)
 }
@@ -478,6 +494,12 @@ fn get_task_status_key(
     )
 }
 
+fn extract_job_id_from_task_key(job_key: &str) -> Result<&str> {
+    job_key.split('/').nth(4).ok_or_else(|| {
+        BallistaError::Internal(format!("Unexpected task key: {}", job_key))
+    })
+}
+
 fn get_stage_plan_key(namespace: &str, job_id: &str, stage_id: usize) -> String {
     format!("/ballista/{}/stages/{}/{}", namespace, job_id, stage_id,)
 }
@@ -514,44 +536,39 @@ mod test {
     };
     use ballista_core::{error::BallistaError, serde::scheduler::ExecutorMeta};
 
-    use super::{SchedulerState, StandaloneClient};
+    use super::{
+        extract_job_id_from_task_key, get_task_status_key, SchedulerState,
+        StandaloneClient,
+    };
 
     #[tokio::test]
     async fn executor_metadata() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let meta = ExecutorMeta {
             id: "123".to_owned(),
             host: "localhost".to_owned(),
             port: 123,
         };
-        state.save_executor_metadata("test", meta.clone()).await?;
-        let result = state.get_executors_metadata("test").await?;
+        state.save_executor_metadata(meta.clone()).await?;
+        let result = state.get_executors_metadata().await?;
         assert_eq!(vec![meta], result);
         Ok(())
     }
 
-    #[tokio::test]
-    async fn executor_metadata_empty() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let meta = ExecutorMeta {
-            id: "123".to_owned(),
-            host: "localhost".to_owned(),
-            port: 123,
-        };
-        state.save_executor_metadata("test", meta.clone()).await?;
-        let result = state.get_executors_metadata("test2").await?;
-        assert!(result.is_empty());
-        Ok(())
-    }
-
     #[tokio::test]
     async fn job_metadata() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let meta = JobStatus {
             status: Some(job_status::Status::Queued(QueuedJob {})),
         };
-        state.save_job_metadata("test", "job", &meta).await?;
-        let result = state.get_job_metadata("test", "job").await?;
+        state.save_job_metadata("job", &meta).await?;
+        let result = state.get_job_metadata("job").await?;
         assert!(result.status.is_some());
         match result.status.unwrap() {
             job_status::Status::Queued(_) => (),
@@ -562,19 +579,25 @@ mod test {
 
     #[tokio::test]
     async fn job_metadata_non_existant() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let meta = JobStatus {
             status: Some(job_status::Status::Queued(QueuedJob {})),
         };
-        state.save_job_metadata("test", "job", &meta).await?;
-        let result = state.get_job_metadata("test2", "job2").await;
+        state.save_job_metadata("job", &meta).await?;
+        let result = state.get_job_metadata("job2").await;
         assert!(result.is_err());
         Ok(())
     }
 
     #[tokio::test]
     async fn task_status() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let meta = TaskStatus {
             status: Some(task_status::Status::Failed(FailedTask {
                 error: "error".to_owned(),
@@ -585,8 +608,8 @@ mod test {
                 partition_id: 2,
             }),
         };
-        state.save_task_status("test", &meta).await?;
-        let result = state._get_task_status("test", "job", 1, 2).await?;
+        state.save_task_status(&meta).await?;
+        let result = state._get_task_status("job", 1, 2).await?;
         assert!(result.status.is_some());
         match result.status.unwrap() {
             task_status::Status::Failed(_) => (),
@@ -597,7 +620,10 @@ mod test {
 
     #[tokio::test]
     async fn task_status_non_existant() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let meta = TaskStatus {
             status: Some(task_status::Status::Failed(FailedTask {
                 error: "error".to_owned(),
@@ -608,40 +634,40 @@ mod test {
                 partition_id: 2,
             }),
         };
-        state.save_task_status("test", &meta).await?;
-        let result = state._get_task_status("test", "job", 25, 2).await;
+        state.save_task_status(&meta).await?;
+        let result = state._get_task_status("job", 25, 2).await;
         assert!(result.is_err());
         Ok(())
     }
 
     #[tokio::test]
     async fn task_synchronize_job_status_queued() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let job_id = "job";
         let job_status = JobStatus {
             status: Some(job_status::Status::Queued(QueuedJob {})),
         };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
+        state.save_job_metadata(job_id, &job_status).await?;
+        state.synchronize_job_status(job_id).await?;
+        let result = state.get_job_metadata(job_id).await?;
         assert_eq!(result, job_status);
         Ok(())
     }
 
     #[tokio::test]
     async fn task_synchronize_job_status_running() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let job_id = "job";
         let job_status = JobStatus {
             status: Some(job_status::Status::Running(RunningJob {})),
         };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
+        state.save_job_metadata(job_id, &job_status).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -652,7 +678,7 @@ mod test {
                 partition_id: 0,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
+        state.save_task_status(&meta).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Running(RunningTask {
                 executor_id: "".to_owned(),
@@ -663,24 +689,24 @@ mod test {
                 partition_id: 1,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
+        state.save_task_status(&meta).await?;
+        state.synchronize_job_status(job_id).await?;
+        let result = state.get_job_metadata(job_id).await?;
         assert_eq!(result, job_status);
         Ok(())
     }
 
     #[tokio::test]
     async fn task_synchronize_job_status_running2() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let job_id = "job";
         let job_status = JobStatus {
             status: Some(job_status::Status::Running(RunningJob {})),
         };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
+        state.save_job_metadata(job_id, &job_status).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -691,7 +717,7 @@ mod test {
                 partition_id: 0,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
+        state.save_task_status(&meta).await?;
         let meta = TaskStatus {
             status: None,
             partition_id: Some(PartitionId {
@@ -700,24 +726,24 @@ mod test {
                 partition_id: 1,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
+        state.save_task_status(&meta).await?;
+        state.synchronize_job_status(job_id).await?;
+        let result = state.get_job_metadata(job_id).await?;
         assert_eq!(result, job_status);
         Ok(())
     }
 
     #[tokio::test]
     async fn task_synchronize_job_status_completed() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let job_id = "job";
         let job_status = JobStatus {
             status: Some(job_status::Status::Running(RunningJob {})),
         };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
+        state.save_job_metadata(job_id, &job_status).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -728,7 +754,7 @@ mod test {
                 partition_id: 0,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
+        state.save_task_status(&meta).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -739,9 +765,9 @@ mod test {
                 partition_id: 1,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
+        state.save_task_status(&meta).await?;
+        state.synchronize_job_status(job_id).await?;
+        let result = state.get_job_metadata(job_id).await?;
         match result.status.unwrap() {
             job_status::Status::Completed(_) => (),
             status => panic!("Received status: {:?}", status),
@@ -751,15 +777,15 @@ mod test {
 
     #[tokio::test]
     async fn task_synchronize_job_status_completed2() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let job_id = "job";
         let job_status = JobStatus {
             status: Some(job_status::Status::Queued(QueuedJob {})),
         };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
+        state.save_job_metadata(job_id, &job_status).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -770,7 +796,7 @@ mod test {
                 partition_id: 0,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
+        state.save_task_status(&meta).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -781,9 +807,9 @@ mod test {
                 partition_id: 1,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
+        state.save_task_status(&meta).await?;
+        state.synchronize_job_status(job_id).await?;
+        let result = state.get_job_metadata(job_id).await?;
         match result.status.unwrap() {
             job_status::Status::Completed(_) => (),
             status => panic!("Received status: {:?}", status),
@@ -793,15 +819,15 @@ mod test {
 
     #[tokio::test]
     async fn task_synchronize_job_status_failed() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(Arc::new(StandaloneClient::try_new_temporary()?));
-        let namespace = "default";
+        let state = SchedulerState::new(
+            Arc::new(StandaloneClient::try_new_temporary()?),
+            "test".to_string(),
+        );
         let job_id = "job";
         let job_status = JobStatus {
             status: Some(job_status::Status::Running(RunningJob {})),
         };
-        state
-            .save_job_metadata(namespace, job_id, &job_status)
-            .await?;
+        state.save_job_metadata(job_id, &job_status).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Completed(CompletedTask {
                 executor_id: "".to_owned(),
@@ -812,7 +838,7 @@ mod test {
                 partition_id: 0,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
+        state.save_task_status(&meta).await?;
         let meta = TaskStatus {
             status: Some(task_status::Status::Failed(FailedTask {
                 error: "".to_owned(),
@@ -823,7 +849,7 @@ mod test {
                 partition_id: 1,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
+        state.save_task_status(&meta).await?;
         let meta = TaskStatus {
             status: None,
             partition_id: Some(PartitionId {
@@ -832,13 +858,23 @@ mod test {
                 partition_id: 2,
             }),
         };
-        state.save_task_status(namespace, &meta).await?;
-        state.synchronize_job_status(namespace).await?;
-        let result = state.get_job_metadata(namespace, job_id).await?;
+        state.save_task_status(&meta).await?;
+        state.synchronize_job_status(job_id).await?;
+        let result = state.get_job_metadata(job_id).await?;
         match result.status.unwrap() {
             job_status::Status::Failed(_) => (),
             status => panic!("Received status: {:?}", status),
         }
         Ok(())
     }
+
+    #[test]
+    fn task_extract_job_id_from_task_key() {
+        let job_id = "foo";
+        assert_eq!(
+            extract_job_id_from_task_key(&get_task_status_key("namespace", job_id, 0, 1))
+                .unwrap(),
+            job_id
+        );
+    }
 }
diff --git a/rust/ballista/rust/scheduler/src/state/standalone.rs b/rust/ballista/rust/scheduler/src/state/standalone.rs
index e07d45ece6b..69805c016a1 100644
--- a/rust/ballista/rust/scheduler/src/state/standalone.rs
+++ b/rust/ballista/rust/scheduler/src/state/standalone.rs
@@ -15,15 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{sync::Arc, time::Duration};
+use std::{sync::Arc, task::Poll, time::Duration};
 
 use crate::state::ConfigBackendClient;
 use ballista_core::error::{ballista_error, BallistaError, Result};
 
+use futures::{FutureExt, Stream};
 use log::warn;
+use sled::{Event, Subscriber};
 use tokio::sync::Mutex;
 
-use super::Lock;
+use super::{Lock, Watch, WatchEvent};
 
 /// A [`ConfigBackendClient`] implementation that uses file-based storage to save cluster configuration.
 #[derive(Clone)]
@@ -106,13 +108,57 @@ impl ConfigBackendClient for StandaloneClient {
     async fn lock(&self) -> Result<Box<dyn Lock>> {
         Ok(Box::new(self.lock.clone().lock_owned().await))
     }
+
+    async fn watch(&self, prefix: String) -> Result<Box<dyn Watch>> {
+        Ok(Box::new(SledWatch {
+            subscriber: self.db.watch_prefix(prefix),
+        }))
+    }
+}
+
+struct SledWatch {
+    subscriber: Subscriber,
+}
+
+#[tonic::async_trait]
+impl Watch for SledWatch {
+    async fn cancel(&mut self) -> Result<()> {
+        Ok(())
+    }
+}
+
+impl Stream for SledWatch {
+    type Item = WatchEvent;
+
+    fn poll_next(
+        self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        match self.get_mut().subscriber.poll_unpin(cx) {
+            Poll::Pending => Poll::Pending,
+            Poll::Ready(None) => Poll::Ready(None),
+            Poll::Ready(Some(Event::Insert { key, value })) => {
+                let key = std::str::from_utf8(&key).unwrap().to_owned();
+                Poll::Ready(Some(WatchEvent::Put(key, value.to_vec())))
+            }
+            Poll::Ready(Some(Event::Remove { key })) => {
+                let key = std::str::from_utf8(&key).unwrap().to_owned();
+                Poll::Ready(Some(WatchEvent::Delete(key)))
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.subscriber.size_hint()
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::state::ConfigBackendClient;
+    use crate::state::{ConfigBackendClient, Watch, WatchEvent};
 
     use super::StandaloneClient;
+    use futures::StreamExt;
     use std::result::Result;
 
     fn create_instance() -> Result<StandaloneClient, Box<dyn std::error::Error>> {
@@ -158,4 +204,25 @@ mod tests {
         );
         Ok(())
     }
+
+    #[tokio::test]
+    async fn read_watch() -> Result<(), Box<dyn std::error::Error>> {
+        let client = create_instance()?;
+        let key = "key";
+        let value = "value".as_bytes();
+        let mut watch: Box<dyn Watch> = client.watch(key.to_owned()).await?;
+        client.put(key.to_owned(), value.to_vec(), None).await?;
+        assert_eq!(
+            watch.next().await,
+            Some(WatchEvent::Put(key.to_owned(), value.to_owned()))
+        );
+        let value2 = "value2".as_bytes();
+        client.put(key.to_owned(), value2.to_vec(), None).await?;
+        assert_eq!(
+            watch.next().await,
+            Some(WatchEvent::Put(key.to_owned(), value2.to_owned()))
+        );
+        watch.cancel().await?;
+        Ok(())
+    }
 }

From 26cdb155174302b365e833463ed9060e9c548549 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@users.noreply.github.com>
Date: Sun, 18 Apr 2021 07:23:06 -0600
Subject: [PATCH 086/719] Ballista now uses relative paths for dependencies
 (#10087)

---
 rust/ballista/rust/benchmarks/tpch/Cargo.toml | 11 +++--------
 rust/ballista/rust/client/Cargo.toml          |  7 ++-----
 rust/ballista/rust/core/Cargo.toml            | 10 +++-------
 rust/ballista/rust/executor/Cargo.toml        | 10 +++-------
 rust/ballista/rust/scheduler/Cargo.toml       |  7 ++-----
 5 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/rust/ballista/rust/benchmarks/tpch/Cargo.toml b/rust/ballista/rust/benchmarks/tpch/Cargo.toml
index 8c37f8898fc..822d101d4e9 100644
--- a/rust/ballista/rust/benchmarks/tpch/Cargo.toml
+++ b/rust/ballista/rust/benchmarks/tpch/Cargo.toml
@@ -27,14 +27,9 @@ edition = "2018"
 [dependencies]
 ballista = { path="../../client" }
 
-#arrow = { path = "../../../../arrow"  }
-#datafusion = { path = "../../../../datafusion" }
-#parquet = { path = "../../../../parquet"  }
-
-arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-parquet = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-
+arrow = { path = "../../../../arrow"  }
+datafusion = { path = "../../../../datafusion" }
+parquet = { path = "../../../../parquet"  }
 
 env_logger = "0.8"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
diff --git a/rust/ballista/rust/client/Cargo.toml b/rust/ballista/rust/client/Cargo.toml
index 8ee5d427bae..de3effe87ca 100644
--- a/rust/ballista/rust/client/Cargo.toml
+++ b/rust/ballista/rust/client/Cargo.toml
@@ -31,8 +31,5 @@ futures = "0.3"
 log = "0.4"
 tokio = "1.0"
 
-#arrow = { path = "../../../arrow"  }
-#datafusion = { path = "../../../datafusion" }
-
-arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+arrow = { path = "../../../arrow"  }
+datafusion = { path = "../../../datafusion" }
\ No newline at end of file
diff --git a/rust/ballista/rust/core/Cargo.toml b/rust/ballista/rust/core/Cargo.toml
index 60c38725bf7..e37a1ea7caa 100644
--- a/rust/ballista/rust/core/Cargo.toml
+++ b/rust/ballista/rust/core/Cargo.toml
@@ -40,13 +40,9 @@ tokio = "1.0"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-#arrow = { path = "../../../arrow"  }
-#arrow-flight = { path = "../../../arrow-flight"  }
-#datafusion = { path = "../../../datafusion" }
-
-arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-arrow-flight = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+arrow = { path = "../../../arrow"  }
+arrow-flight = { path = "../../../arrow-flight"  }
+datafusion = { path = "../../../datafusion" }
 
 [dev-dependencies]
 
diff --git a/rust/ballista/rust/executor/Cargo.toml b/rust/ballista/rust/executor/Cargo.toml
index beed860fd94..6b05b7c7fa9 100644
--- a/rust/ballista/rust/executor/Cargo.toml
+++ b/rust/ballista/rust/executor/Cargo.toml
@@ -45,13 +45,9 @@ tokio-stream = "0.1"
 tonic = "0.4"
 uuid = { version = "0.8", features = ["v4"] }
 
-#arrow = { path = "../../../arrow"  }
-#arrow-flight = { path = "../../../arrow-flight"  }
-#datafusion = { path = "../../../datafusion" }
-
-arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-arrow-flight = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+arrow = { path = "../../../arrow"  }
+arrow-flight = { path = "../../../arrow-flight"  }
+datafusion = { path = "../../../datafusion" }
 
 [dev-dependencies]
 
diff --git a/rust/ballista/rust/scheduler/Cargo.toml b/rust/ballista/rust/scheduler/Cargo.toml
index 57342dd633e..71925ee5259 100644
--- a/rust/ballista/rust/scheduler/Cargo.toml
+++ b/rust/ballista/rust/scheduler/Cargo.toml
@@ -52,11 +52,8 @@ tonic = "0.4"
 tower = { version = "0.4" }
 warp = "0.3"
 
-#arrow = { path = "../../../arrow"  }
-#datafusion = { path = "../../../datafusion" }
-
-arrow = { git = "https://github.com/apache/arrow", rev="fe83dca" }
-datafusion = { git = "https://github.com/apache/arrow", rev="fe83dca" }
+arrow = { path = "../../../arrow"  }
+datafusion = { path = "../../../datafusion" }
 
 [dev-dependencies]
 ballista-core = { path = "../core" }

From 9a4ef4696b8b9d46e203f164345ee9c19cbac46c Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Sun, 18 Apr 2021 07:43:49 -0600
Subject: [PATCH 087/719] ARROW-12432: [Rust] [DataFusion] Add metrics to
 SortExec

Add `outputRows` and `sortTime` metrics to SortExec.

Example output from Ballista:

```
SortExec { input: ProjectionExec { expr: [(Column { name: "l_shipmode" }, "l_shipmode"), (Column { name: "SUM(CASE WHEN
  Metrics: sortTime=44444, outputRows=2
```

Closes #10078 from andygrove/sortexec-metrics

Authored-by: Andy Grove <andygrove73@gmail.com>
Signed-off-by: Andy Grove <andygrove73@gmail.com>
---
 .../src/physical_plan/hash_aggregate.rs       |  7 +-
 rust/datafusion/src/physical_plan/mod.rs      | 14 +++-
 rust/datafusion/src/physical_plan/sort.rs     | 71 ++++++++++++++++---
 3 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/rust/datafusion/src/physical_plan/hash_aggregate.rs b/rust/datafusion/src/physical_plan/hash_aggregate.rs
index b78e8bca550..234265022ef 100644
--- a/rust/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/rust/datafusion/src/physical_plan/hash_aggregate.rs
@@ -28,7 +28,7 @@ use futures::{
 };
 
 use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, MetricType, SQLMetric};
+use crate::physical_plan::{Accumulator, AggregateExpr, SQLMetric};
 use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning, PhysicalExpr};
 
 use arrow::{
@@ -144,10 +144,7 @@ impl HashAggregateExec {
 
         let schema = Arc::new(schema);
 
-        let output_rows = Arc::new(Mutex::new(SQLMetric::new(
-            "outputRows",
-            MetricType::Counter,
-        )));
+        let output_rows = SQLMetric::counter("outputRows");
 
         Ok(HashAggregateExec {
             mode,
diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs
index 054d585e8e3..5036dcb921b 100644
--- a/rust/datafusion/src/physical_plan/mod.rs
+++ b/rust/datafusion/src/physical_plan/mod.rs
@@ -18,7 +18,7 @@
 //! Traits for physical query plan, supporting parallel execution for partitioned relations.
 
 use std::fmt::{Debug, Display};
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 use std::{any::Any, pin::Pin};
 
 use crate::execution::context::ExecutionContextState;
@@ -52,6 +52,8 @@ pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send + Sync
 pub enum MetricType {
     /// Simple counter
     Counter,
+    /// Wall clock time in nanoseconds
+    TimeNanos,
 }
 
 /// SQL metric such as counter (number of input or output rows) or timing information about
@@ -67,6 +69,16 @@ pub struct SQLMetric {
 }
 
 impl SQLMetric {
+    /// Create a new metric for tracking a counter
+    pub fn counter(name: &str) -> Arc<Mutex<SQLMetric>> {
+        Arc::new(Mutex::new(SQLMetric::new(name, MetricType::Counter)))
+    }
+
+    /// Create a new metric for tracking time in nanoseconds
+    pub fn time_nanos(name: &str) -> Arc<Mutex<SQLMetric>> {
+        Arc::new(Mutex::new(SQLMetric::new(name, MetricType::TimeNanos)))
+    }
+
     /// Create a new SQLMetric
     pub fn new(name: &str, metric_type: MetricType) -> Self {
         Self {
diff --git a/rust/datafusion/src/physical_plan/sort.rs b/rust/datafusion/src/physical_plan/sort.rs
index 994168c2efb..26855b354db 100644
--- a/rust/datafusion/src/physical_plan/sort.rs
+++ b/rust/datafusion/src/physical_plan/sort.rs
@@ -19,11 +19,14 @@
 
 use std::any::Any;
 use std::pin::Pin;
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};
+use std::time::Instant;
 
+use async_trait::async_trait;
 use futures::stream::Stream;
 use futures::Future;
+use hashbrown::HashMap;
 
 use pin_project_lite::pin_project;
 
@@ -37,9 +40,9 @@ use arrow::{array::ArrayRef, error::ArrowError};
 use super::{RecordBatchStream, SendableRecordBatchStream};
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::{common, Distribution, ExecutionPlan, Partitioning};
-
-use async_trait::async_trait;
+use crate::physical_plan::{
+    common, Distribution, ExecutionPlan, Partitioning, SQLMetric,
+};
 
 /// Sort execution plan
 #[derive(Debug)]
@@ -48,6 +51,10 @@ pub struct SortExec {
     input: Arc<dyn ExecutionPlan>,
     /// Sort expressions
     expr: Vec<PhysicalSortExpr>,
+    /// Output rows
+    output_rows: Arc<Mutex<SQLMetric>>,
+    /// Time to sort batches
+    sort_time_nanos: Arc<Mutex<SQLMetric>>,
 }
 
 impl SortExec {
@@ -56,7 +63,12 @@ impl SortExec {
         expr: Vec<PhysicalSortExpr>,
         input: Arc<dyn ExecutionPlan>,
     ) -> Result<Self> {
-        Ok(Self { expr, input })
+        Ok(Self {
+            expr,
+            input,
+            output_rows: SQLMetric::counter("outputRows"),
+            sort_time_nanos: SQLMetric::time_nanos("sortTime"),
+        })
     }
 
     /// Input schema
@@ -125,7 +137,25 @@ impl ExecutionPlan for SortExec {
         }
         let input = self.input.execute(0).await?;
 
-        Ok(Box::pin(SortStream::new(input, self.expr.clone())))
+        Ok(Box::pin(SortStream::new(
+            input,
+            self.expr.clone(),
+            self.output_rows.clone(),
+            self.sort_time_nanos.clone(),
+        )))
+    }
+
+    fn metrics(&self) -> HashMap<String, SQLMetric> {
+        let mut metrics = HashMap::new();
+        metrics.insert(
+            "outputRows".to_owned(),
+            self.output_rows.lock().unwrap().clone(),
+        );
+        metrics.insert(
+            "sortTime".to_owned(),
+            self.sort_time_nanos.lock().unwrap().clone(),
+        );
+        metrics
     }
 }
 
@@ -194,11 +224,17 @@ pin_project! {
         output: futures::channel::oneshot::Receiver<ArrowResult<Option<RecordBatch>>>,
         finished: bool,
         schema: SchemaRef,
+        output_rows: Arc<Mutex<SQLMetric>>,
     }
 }
 
 impl SortStream {
-    fn new(input: SendableRecordBatchStream, expr: Vec<PhysicalSortExpr>) -> Self {
+    fn new(
+        input: SendableRecordBatchStream,
+        expr: Vec<PhysicalSortExpr>,
+        output_rows: Arc<Mutex<SQLMetric>>,
+        sort_time: Arc<Mutex<SQLMetric>>,
+    ) -> Self {
         let (tx, rx) = futures::channel::oneshot::channel();
 
         let schema = input.schema();
@@ -207,7 +243,13 @@ impl SortStream {
             let sorted_batch = common::collect(input)
                 .await
                 .map_err(DataFusionError::into_arrow_external_error)
-                .and_then(move |batches| sort_batches(&batches, &schema, &expr));
+                .and_then(move |batches| {
+                    let now = Instant::now();
+                    let result = sort_batches(&batches, &schema, &expr);
+                    let mut sort_time = sort_time.lock().unwrap();
+                    sort_time.add(now.elapsed().as_nanos() as usize);
+                    result
+                });
 
             tx.send(sorted_batch)
         });
@@ -216,6 +258,7 @@ impl SortStream {
             output: rx,
             finished: false,
             schema,
+            output_rows,
         }
     }
 }
@@ -224,6 +267,8 @@ impl Stream for SortStream {
     type Item = ArrowResult<RecordBatch>;
 
     fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let output_rows = self.output_rows.clone();
+
         if self.finished {
             return Poll::Ready(None);
         }
@@ -241,6 +286,12 @@ impl Stream for SortStream {
                     Err(e) => Some(Err(ArrowError::ExternalError(Box::new(e)))), // error receiving
                     Ok(result) => result.transpose(),
                 };
+
+                if let Some(Ok(batch)) = &result {
+                    let mut output_rows = output_rows.lock().unwrap();
+                    output_rows.add(batch.num_rows());
+                }
+
                 Poll::Ready(result)
             }
             Poll::Pending => Poll::Pending,
@@ -379,7 +430,9 @@ mod tests {
         assert_eq!(DataType::Float32, *sort_exec.schema().field(0).data_type());
         assert_eq!(DataType::Float64, *sort_exec.schema().field(1).data_type());
 
-        let result: Vec<RecordBatch> = collect(sort_exec).await?;
+        let result: Vec<RecordBatch> = collect(sort_exec.clone()).await?;
+        assert!(sort_exec.metrics().get("sortTime").unwrap().value > 0);
+        assert_eq!(sort_exec.metrics().get("outputRows").unwrap().value, 8);
         assert_eq!(result.len(), 1);
 
         let columns = result[0].columns();

From e2625d23b8ac4c018dd0615b1d5dcb6927cecd12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Sun, 18 Apr 2021 18:01:02 +0200
Subject: [PATCH 088/719] ARROW-12440: [Release][Packaging] Various packaging,
 release script and release verification script fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10091 from kszucs/fix-release-scripts

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/archery.yml                 |   2 +-
 dev/archery/archery/crossbow/cli.py           |   1 +
 dev/archery/archery/release.py                |   2 +-
 dev/release/01-prepare-test.rb                | 187 ++++++++++++++---
 dev/release/01-prepare.sh                     | 190 +-----------------
 dev/release/03-binary-submit.sh               |  10 +-
 dev/release/04-binary-download.sh             |   3 +-
 dev/release/05-binary-upload.sh               |   5 +-
 dev/release/binary-task.rb                    |  22 +-
 dev/release/post-02-binary.sh                 |   2 +-
 dev/release/post-12-version.sh                |  79 ++++++++
 .../{binary-common.sh => utils-binary.sh}     |   0
 dev/release/utils-prepare.sh                  | 167 +++++++++++++++
 dev/release/verify-apt.sh                     |  29 ++-
 dev/release/verify-release-candidate.sh       |  14 +-
 dev/release/verify-yum.sh                     |  26 ++-
 dev/tasks/conda-recipes/azure.win.yml         |   4 +-
 dev/tasks/tasks.yml                           |   2 +-
 dev/tasks/verify-rc/github.linux.yml          |   1 +
 19 files changed, 487 insertions(+), 259 deletions(-)
 create mode 100755 dev/release/post-12-version.sh
 rename dev/release/{binary-common.sh => utils-binary.sh} (100%)
 create mode 100644 dev/release/utils-prepare.sh

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 761e0459543..9f6880efb9f 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -43,7 +43,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Git Fixup
-        if: ${{ github.event_name == 'pull_request' }}
+        if: ${{ github.ref != 'master' }}
         shell: bash
         run: git branch master origin/master
       - name: Free Up Disk Space
diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py
index 71c25e0460f..15dbb5d4e62 100644
--- a/dev/archery/archery/crossbow/cli.py
+++ b/dev/archery/archery/crossbow/cli.py
@@ -194,6 +194,7 @@ def highlight(code):
                               head=arrow_sha, version=arrow_version)
     config = Config.load_yaml(config_path)
     params = dict([p.split("=") for p in params])
+    params["queue_remote_url"] = "https://github.com/org/crossbow"
     job = Job.from_config(config=config, target=target, tasks=[task],
                           params=params)
 
diff --git a/dev/archery/archery/release.py b/dev/archery/archery/release.py
index acfe3fc2373..6baeabc9d59 100644
--- a/dev/archery/archery/release.py
+++ b/dev/archery/archery/release.py
@@ -448,7 +448,7 @@ def commits_to_pick(self, exclude_already_applied=True):
         if self.version.major == 0:
             # treat minor releases as major releases preceeding 1.0.0 release
             commit_range = "apache-arrow-0.{}.0..master".format(
-                self.version.minor - 1
+                self.version.minor
             )
         else:
             commit_range = "apache-arrow-{}.0.0..master".format(
diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index b316ad20a9d..96be7d91ec9 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -30,7 +30,7 @@ def setup
       git("clone", @original_git_repository.to_s, @test_git_repository.to_s)
       Dir.chdir(@test_git_repository) do
         @tag_name = "apache-arrow-#{@release_version}"
-        @release_branch = "release-#{@release_version}-rc0"
+        @release_branch = "testing-release-#{@release_version}-rc0"
         @script = "dev/release/01-prepare.sh"
         git("checkout", "-b", @release_branch, @current_commit)
         yield
@@ -43,18 +43,18 @@ def omit_on_release_branch
     omit("Not for release branch") if on_release_branch?
   end
 
-  def prepare(*targets)
+  def prepare(script, *targets)
     if targets.last.is_a?(Hash)
       additional_env = targets.pop
     else
       additional_env = {}
     end
-    env = {"PREPARE_DEFAULT" => "0"}
+    env = { "PREPARE_DEFAULT" => "0" }
     targets.each do |target|
       env["PREPARE_#{target}"] = "1"
     end
     env = env.merge(additional_env)
-    sh(env, @script, @release_version, @next_version, "0")
+    sh(env, script, @release_version, @next_version, "0")
   end
 
   def parse_patch(patch)
@@ -64,7 +64,7 @@ def parse_patch(patch)
       case line
       when /\A--- a\//
         path = $POSTMATCH.chomp
-        diffs << {path: path, hunks: []}
+        diffs << { path: path, hunks: [] }
         in_hunk = false
       when /\A@@/
         in_hunk = true
@@ -82,7 +82,8 @@ def parse_patch(patch)
   def test_linux_packages
     user = "Arrow Developers"
     email = "dev@arrow.apache.org"
-    prepare("LINUX_PACKAGES",
+    prepare("dev/release/01-prepare.sh",
+            "LINUX_PACKAGES",
             "DEBFULLNAME" => user,
             "DEBEMAIL" => email)
     changes = parse_patch(git("log", "-n", "1", "-p"))
@@ -90,7 +91,6 @@ def test_linux_packages
       {
         path: change[:path],
         sampled_hunks: change[:hunks].collect(&:first),
-        # sampled_hunks: change[:hunks],
       }
     end
     base_dir = "dev/tasks/linux-packages"
@@ -104,8 +104,7 @@ def test_linux_packages
         ],
       },
       {
-        path:
-          "#{base_dir}/apache-arrow-release/yum/apache-arrow-release.spec.in",
+        path: "#{base_dir}/apache-arrow-release/yum/apache-arrow-release.spec.in",
         sampled_hunks: [
           "+* #{today} #{user} <#{email}> - #{@release_version}-1",
         ],
@@ -128,7 +127,7 @@ def test_linux_packages
 
   def test_version_pre_tag
     omit_on_release_branch
-    prepare("VERSION_PRE_TAG")
+    prepare("dev/release/01-prepare.sh", "VERSION_PRE_TAG")
     assert_equal([
                    {
                      path: "c_glib/meson.build",
@@ -179,11 +178,80 @@ def test_version_pre_tag
                         "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\""],
                      ],
                    },
+                   {
+                     path: "java/adapter/avro/pom.xml",
+                     hunks: [
+                       ["-    <version>#{@snapshot_version}</version>",
+                        "+    <version>#{@release_version}</version>"],
+                     ],
+                   },
+                   {
+                     hunks: [
+                       ["-        <version>#{@snapshot_version}</version>",
+                        "+        <version>#{@release_version}</version>"],
+                     ],
+                     path: "java/adapter/jdbc/pom.xml",
+                   },
+                   {
+                     hunks: [
+                       ["-        <version>#{@snapshot_version}</version>",
+                        "+        <version>#{@release_version}</version>"],
+                     ],
+                     path: "java/adapter/orc/pom.xml",
+                   },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/algorithm/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/compression/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"]],
+                     path: "java/dataset/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/flight/flight-core/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/flight/flight-grpc/pom.xml" },
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@release_version}</version>"]],
+                     path: "java/format/pom.xml" },
+                   { hunks: [["-      <version>#{@snapshot_version}</version>",
+                              "+      <version>#{@release_version}</version>"]],
+                     path: "java/gandiva/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/memory-core/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/memory-netty/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/memory-unsafe/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/memory/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"],
+                             ["-            <version>#{@snapshot_version}</version>",
+                              "+            <version>#{@release_version}</version>"]],
+                     path: "java/performance/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"]],
+                     path: "java/plasma/pom.xml" },
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@release_version}</version>"]],
+                     path: "java/pom.xml" },
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
+                              "+        <version>#{@release_version}</version>"]],
+                     path: "java/tools/pom.xml" },
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
+                              "+    <version>#{@release_version}</version>"]],
+                     path: "java/vector/pom.xml" },
                    {
                      path: "js/package.json",
                      hunks: [
                        ["-  \"version\": \"#{@snapshot_version}\"",
-                        "+  \"version\": \"#{@release_version}\""]
+                        "+  \"version\": \"#{@release_version}\""],
                      ],
                    },
                    {
@@ -289,11 +357,11 @@ def test_version_pre_tag
                      ],
                    },
                    {
-                    path: "rust/datafusion-examples/Cargo.toml",
-                    hunks: [
-                      ["-version = \"#{@snapshot_version}\"",
-                       "+version = \"#{@release_version}\""],
-                    ],
+                     path: "rust/datafusion-examples/Cargo.toml",
+                     hunks: [
+                       ["-version = \"#{@snapshot_version}\"",
+                        "+version = \"#{@release_version}\""],
+                     ],
                    },
                    {
                      path: "rust/datafusion/Cargo.toml",
@@ -328,7 +396,7 @@ def test_version_pre_tag
                        ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\", optional = true }",
                         "+arrow = { path = \"../arrow\", version = \"#{@release_version}\", optional = true }"],
                        ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"]
+                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"],
                      ],
                    },
                    {
@@ -375,10 +443,10 @@ def test_version_pre_tag
 
   def test_version_post_tag
     if on_release_branch?
-      prepare("VERSION_POST_TAG")
+      prepare("dev/release/post-12-version.sh", "VERSION_POST_TAG")
     else
-      prepare("VERSION_PRE_TAG",
-              "VERSION_POST_TAG")
+      prepare("dev/release/01-prepare.sh", "VERSION_PRE_TAG")
+      prepare("dev/release/post-12-version.sh", "VERSION_POST_TAG")
     end
     assert_equal([
                    {
@@ -430,6 +498,63 @@ def test_version_post_tag
                         "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""],
                      ],
                    },
+                   { path: "java/adapter/avro/pom.xml",
+                     hunks: [["-    <version>#{@release_version}</version>",
+                             "+    <version>#{@next_snapshot_version}</version>"]] },
+                   { hunks: [["-        <version>#{@release_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/adapter/jdbc/pom.xml" },
+                   { hunks: [["-        <version>#{@release_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/adapter/orc/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/algorithm/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/compression/pom.xml" },
+                   { hunks: [["-        <version>#{@release_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/dataset/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/flight/flight-core/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/flight/flight-grpc/pom.xml" },
+                   { hunks: [["-  <version>#{@release_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/format/pom.xml" },
+                   { hunks: [["-      <version>#{@release_version}</version>",
+                              "+      <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/gandiva/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/memory-core/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/memory-netty/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/memory-unsafe/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/memory/pom.xml" },
+                   { hunks: [["-        <version>#{@release_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"],
+                             ["-            <version>#{@release_version}</version>",
+                              "+            <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/performance/pom.xml" },
+                   { hunks: [["-        <version>#{@release_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/plasma/pom.xml" },
+                   { hunks: [["-  <version>#{@release_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/pom.xml" },
+                   { hunks: [["-        <version>#{@release_version}</version>",
+                              "+        <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/tools/pom.xml" },
+                   { hunks: [["-    <version>#{@release_version}</version>",
+                              "+    <version>#{@next_snapshot_version}</version>"]],
+                     path: "java/vector/pom.xml" },
                    {
                      path: "js/package.json",
                      hunks: [
@@ -541,11 +666,11 @@ def test_version_post_tag
                      ],
                    },
                    {
-                    path: "rust/datafusion-examples/Cargo.toml",
-                    hunks: [
-                      ["-version = \"#{@release_version}\"",
-                      "+version = \"#{@next_snapshot_version}\""],
-                  ],
+                     path: "rust/datafusion-examples/Cargo.toml",
+                     hunks: [
+                       ["-version = \"#{@release_version}\"",
+                        "+version = \"#{@next_snapshot_version}\""],
+                     ],
                    },
                    {
                      path: "rust/datafusion/Cargo.toml",
@@ -580,7 +705,7 @@ def test_version_post_tag
                        ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\", optional = true }",
                         "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\", optional = true }"],
                        ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"]
+                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"],
                      ],
                    },
                    {
@@ -626,12 +751,12 @@ def test_version_post_tag
   end
 
   def test_deb_package_names
-    prepare("DEB_PACKAGE_NAMES")
+    prepare("dev/release/post-12-version.sh", "DEB_PACKAGE_NAMES")
     changes = parse_patch(git("log", "-n", "1", "-p"))
     sampled_changes = changes.collect do |change|
       first_hunk = change[:hunks][0]
-      first_removed_line = first_hunk.find {|line| line.start_with?("-")}
-      first_added_line = first_hunk.find {|line| line.start_with?("+")}
+      first_removed_line = first_hunk.find { |line| line.start_with?("-") }
+      first_added_line = first_hunk.find { |line| line.start_with?("+") }
       {
         sampled_diff: [first_removed_line, first_added_line],
         path: change[:path],
@@ -643,14 +768,14 @@ def test_deb_package_names
           "-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@so_version}.install",
           "+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib#{@next_so_version}.install",
         ],
-        path: "dev/release/rat_exclude_files.txt"
+        path: "dev/release/rat_exclude_files.txt",
       },
       {
         sampled_diff: [
           "-Package: libarrow#{@so_version}",
           "+Package: libarrow#{@next_so_version}",
         ],
-        path: "dev/tasks/linux-packages/apache-arrow/debian/control.in"
+        path: "dev/tasks/linux-packages/apache-arrow/debian/control.in",
       },
       {
         sampled_diff: [
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index 80703c2d87f..051a8f646e5 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -26,149 +26,7 @@ if [ "$#" -ne 3 ]; then
   exit 1
 fi
 
-update_versions() {
-  local base_version=$1
-  local next_version=$2
-  local type=$3
-
-  case ${type} in
-    release)
-      local version=${base_version}
-      local r_version=${base_version}
-      ;;
-    snapshot)
-      local version=${next_version}-SNAPSHOT
-      local r_version=${base_version}.9000
-      ;;
-  esac
-
-  cd "${SOURCE_DIR}/../../c_glib"
-  sed -i.bak -E -e \
-    "s/^version = '.+'/version = '${version}'/" \
-    meson.build
-  rm -f meson.build.bak
-  git add meson.build
-  cd -
-
-  cd "${SOURCE_DIR}/../../ci/scripts"
-  sed -i.bak -E -e \
-    "s/^pkgver=.+/pkgver=${r_version}/" \
-    PKGBUILD
-  rm -f PKGBUILD.bak
-  git add PKGBUILD
-  cd -
-
-  cd "${SOURCE_DIR}/../../cpp"
-  sed -i.bak -E -e \
-    "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \
-    CMakeLists.txt
-  rm -f CMakeLists.txt.bak
-  git add CMakeLists.txt
-
-  sed -i.bak -E -e \
-    "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \
-    vcpkg.json
-  rm -f vcpkg.json.bak
-  git add vcpkg.json
-  cd -
-
-  cd "${SOURCE_DIR}/../../csharp"
-  sed -i.bak -E -e \
-    "s/^    <Version>.+<\/Version>/    <Version>${version}<\/Version>/" \
-    Directory.Build.props
-  rm -f Directory.Build.props.bak
-  git add Directory.Build.props
-  cd -
-
-  cd "${SOURCE_DIR}/../../dev/tasks/homebrew-formulae"
-  sed -i.bak -E -e \
-    "s/arrow-[0-9.]+[0-9]+/arrow-${r_version}/g" \
-    autobrew/apache-arrow.rb
-  rm -f autobrew/apache-arrow.rb.bak
-  git add autobrew/apache-arrow.rb
-  sed -i.bak -E -e \
-    "s/arrow-[0-9.\-]+[0-9SNAPHOT]+/arrow-${version}/g" \
-    apache-arrow.rb
-  rm -f apache-arrow.rb.bak
-  git add apache-arrow.rb
-  cd -
-
-  cd "${SOURCE_DIR}/../../js"
-  sed -i.bak -E -e \
-    "s/^  \"version\": \".+\"/  \"version\": \"${version}\"/" \
-    package.json
-  rm -f package.json.bak
-  git add package.json
-  cd -
-
-  cd "${SOURCE_DIR}/../../matlab"
-  sed -i.bak -E -e \
-    "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \
-    CMakeLists.txt
-  rm -f CMakeLists.txt.bak
-  git add CMakeLists.txt
-  cd -
-
-  cd "${SOURCE_DIR}/../../python"
-  sed -i.bak -E -e \
-    "s/^default_version = '.+'/default_version = '${version}'/" \
-    setup.py
-  rm -f setup.py.bak
-  git add setup.py
-  cd -
-
-  cd "${SOURCE_DIR}/../../r"
-  sed -i.bak -E -e \
-    "s/^Version: .+/Version: ${r_version}/" \
-    DESCRIPTION
-  rm -f DESCRIPTION.bak
-  git add DESCRIPTION
-  if [ ${type} = "snapshot" ]; then
-    # Add a news entry for the new dev version
-    echo "dev"
-    sed -i.bak -E -e \
-      "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \
-      NEWS.md
-  else
-    # Replace dev version with release version
-    echo "release"
-    sed -i.bak -E -e \
-      "0,/^# arrow /s/^# arrow .+/# arrow ${r_version}/" \
-      NEWS.md
-  fi
-  rm -f NEWS.md.bak
-  git add NEWS.md
-  cd -
-
-  cd "${SOURCE_DIR}/../../ruby"
-  sed -i.bak -E -e \
-    "s/^  VERSION = \".+\"/  VERSION = \"${version}\"/g" \
-    */*/*/version.rb
-  rm -f */*/*/version.rb.bak
-  git add */*/*/version.rb
-  cd -
-
-  cd "${SOURCE_DIR}/../../rust"
-  sed -i.bak -E \
-    -e "s/^version = \".+\"/version = \"${version}\"/g" \
-    -e "s/^(arrow = .* version = )\".*\"(( .*)|(, features = .*)|(, optional = .*))$/\\1\"${version}\"\\2/g" \
-    -e "s/^(arrow-flight = .* version = )\".+\"( .*)/\\1\"${version}\"\\2/g" \
-    -e "s/^(parquet = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
-    -e "s/^(parquet_derive = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
-    */Cargo.toml
-  rm -f */Cargo.toml.bak
-  git add */Cargo.toml
-
-  sed -i.bak -E \
-    -e "s/^([^ ]+) = \".+\"/\\1 = \"${version}\"/g" \
-    -e "s,docs\.rs/crate/([^/]+)/[^)]+,docs.rs/crate/\\1/${version},g" \
-    */README.md
-  rm -f */README.md.bak
-  git add */README.md
-  cd -
-}
-
-############################## Pre-Tag Commits ##############################
+. $SOURCE_DIR/utils-prepare.sh
 
 version=$1
 next_version=$2
@@ -185,8 +43,6 @@ release_candidate_branch="release-${version}-rc${rc_number}"
 : ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}}
 : ${PREPARE_BRANCH:=${PREPARE_DEFAULT}}
 : ${PREPARE_TAG:=${PREPARE_DEFAULT}}
-: ${PREPARE_VERSION_POST_TAG:=${PREPARE_DEFAULT}}
-: ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}}
 
 if [ ${PREPARE_TAG} -gt 0 ]; then
   if [ $(git tag -l "${release_tag}") ]; then
@@ -245,47 +101,3 @@ fi
 if [ ${PREPARE_TAG} -gt 0 ]; then
   git tag -a "${release_tag}" -m "[Release] Apache Arrow Release ${version}"
 fi
-
-############################## Post-Tag Commits #############################
-
-if [ ${PREPARE_VERSION_POST_TAG} -gt 0 ]; then
-  echo "Updating versions for ${next_version_snapshot}"
-  update_versions "${version}" "${next_version}" "snapshot"
-  git commit -m "[Release] Update versions for ${next_version_snapshot}"
-fi
-
-if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then
-  echo "Updating .deb package names for ${next_version}"
-  so_version() {
-    local version=$1
-    local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/')
-    local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
-    expr ${major_version} \* 100 + ${minor_version}
-  }
-  deb_lib_suffix=$(so_version $version)
-  next_deb_lib_suffix=$(so_version $next_version)
-  if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then
-    cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow
-    for target in debian*/lib*${deb_lib_suffix}.install; do
-      git mv \
-	${target} \
-	$(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/")
-    done
-    deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
-    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control*
-    rm -f debian*/control*.bak
-    git add debian*/control*
-    cd -
-    cd $SOURCE_DIR/../tasks/
-    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
-    rm -f tasks.yml.bak
-    git add tasks.yml
-    cd -
-    cd $SOURCE_DIR
-    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
-    rm -f rat_exclude_files.txt.bak
-    git add rat_exclude_files.txt
-    git commit -m "[Release] Update .deb package names for $next_version"
-    cd -
-  fi
-fi
diff --git a/dev/release/03-binary-submit.sh b/dev/release/03-binary-submit.sh
index 1bdbc2077be..ea67222258a 100755
--- a/dev/release/03-binary-submit.sh
+++ b/dev/release/03-binary-submit.sh
@@ -28,14 +28,10 @@ version=$1
 rc_number=$2
 version_with_rc="${version}-rc${rc_number}"
 crossbow_job_prefix="release-${version_with_rc}"
-
 release_tag="apache-arrow-${version}"
-release_candidate_branch="release-${version}-rc${rc_number}"
-
-: ${GIT_REMOTE:="origin"}
 
-git checkout ${release_candidate_branch}
-git push -u ${GIT_REMOTE} ${release_candidate_branch}
+: ${ARROW_REPOSITORY:="apache/arrow"}
+: ${ARROW_BRANCH:=$release_tag}
 
 # archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
 # are jobs submitted with the same prefix (the integer at the end is auto
@@ -43,4 +39,6 @@ git push -u ${GIT_REMOTE} ${release_candidate_branch}
 archery crossbow submit \
     --job-prefix ${crossbow_job_prefix} \
     --arrow-version ${version_with_rc} \
+    --arrow-remote "https://github.com/${ARROW_REPOSITORY}" \
+    --arrow-branch ${ARROW_BRANCH} \
     --group packaging
diff --git a/dev/release/04-binary-download.sh b/dev/release/04-binary-download.sh
index d0b61b05884..e2b97ac6a0c 100755
--- a/dev/release/04-binary-download.sh
+++ b/dev/release/04-binary-download.sh
@@ -33,6 +33,7 @@ crossbow_job_prefix="release-${version_with_rc}"
 # archery will submit a job with id: "${crossbow_job_prefix}-0" unless there
 # are jobs submitted with the same prefix (the integer at the end is auto
 # incremented)
-: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-0"}
+: ${CROSSBOW_JOB_NUMBER:="0"}
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
 
 archery crossbow download-artifacts ${CROSSBOW_JOB_ID}
diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh
index 4a360c28b04..979e40c3c6f 100755
--- a/dev/release/05-binary-upload.sh
+++ b/dev/release/05-binary-upload.sh
@@ -35,7 +35,8 @@ version_with_rc="${version}-rc${rc}"
 crossbow_job_prefix="release-${version_with_rc}"
 crossbow_package_dir="${SOURCE_DIR}/../../packages"
 
-: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-0"}
+: ${CROSSBOW_JOB_NUMBER:="0"}
+: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
 artifact_dir="${crossbow_package_dir}/${CROSSBOW_JOB_ID}"
 
 if [ ! -e "$artifact_dir" ]; then
@@ -68,7 +69,7 @@ if [ -n "${SOURCE_BINTRAY_REPOSITORY_CUSTOM}" ]; then
   SOURCE_BINTRAY_REPOSITORY=${SOURCE_BINTRAY_REPOSITORY_CUSTOM}
 fi
 
-. binary-common.sh
+. utils-binary.sh
 
 # By default upload all artifacts.
 # To deactivate one category, deactivate the category and all of its dependents.
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index 42bc1fe4766..7e7561a7722 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1047,7 +1047,11 @@ def apt_targets
     else
       available_apt_targets.select do |distribution, code_name, component|
         env_apt_targets.any? do |env_apt_target|
-          env_apt_target.start_with?("#{distribution}-#{code_name}")
+          if env_apt_target.include?("-")
+            env_apt_target.start_with?("#{distribution}-#{code_name}")
+          else
+            env_apt_target == distribution
+          end
         end
       end
     end
@@ -1078,8 +1082,8 @@ def define_deb_tasks
           Dir.glob("#{source_dir_prefix}*/**/*") do |path|
             next if File.directory?(path)
             base_name = File.basename(path)
-            if base_name.start_with?("apache-arrow-archive-keyring")
-              package_name = "apache-arrow-archive-keyring"
+            if base_name.start_with?("apache-arrow-apt-source")
+              package_name = "apache-arrow-apt-source"
             else
               package_name = "apache-arrow"
             end
@@ -1100,13 +1104,13 @@ def define_deb_tasks
                           destination_path,
                           progress_reporter)
             case base_name
-            when /\A[^_]+-archive-keyring_.*\.deb\z/
-              latest_archive_keyring_package_path = [
+            when /\A[^_]+-apt-source_.*\.deb\z/
+              latest_apt_source_package_path = [
                 distribution_dir,
                 "#{package_name}-latest-#{code_name}.deb"
               ].join("/")
               copy_artifact(path,
-                            latest_archive_keyring_package_path,
+                            latest_apt_source_package_path,
                             progress_reporter)
             end
           end
@@ -1435,7 +1439,11 @@ def yum_targets
     else
       available_yum_targets.select do |distribution, distribution_version|
         env_yum_targets.any? do |env_yum_target|
-          env_yum_target.start_with?("#{distribution}-#{distribution_version}")
+          if env_yum_target.include?("-")
+            env_yum_target.start_with?("#{distribution}-#{distribution_version}")
+          else
+            env_yum_target == distribution
+          end
         end
       end
     end
diff --git a/dev/release/post-02-binary.sh b/dev/release/post-02-binary.sh
index 9f531afad9f..6d3e19e0dca 100755
--- a/dev/release/post-02-binary.sh
+++ b/dev/release/post-02-binary.sh
@@ -45,7 +45,7 @@ if [ -n "${BINTRAY_REPOSITORY_CUSTOM}" ]; then
   BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY_CUSTOM}
 fi
 
-. binary-common.sh
+. utils-binary.sh
 
 # By default deploy all artifacts.
 # To deactivate one category, deactivate the category and all of its dependents.
diff --git a/dev/release/post-12-version.sh b/dev/release/post-12-version.sh
new file mode 100755
index 00000000000..9ca008625d4
--- /dev/null
+++ b/dev/release/post-12-version.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+set -ue
+
+SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ "$#" -ne 3 ]; then
+  echo "Usage: $0 <version> <next_version> <rc-num>"
+  exit 1
+fi
+
+: ${PREPARE_DEFAULT:=1}
+: ${PREPARE_VERSION_POST_TAG:=${PREPARE_DEFAULT}}
+: ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}}
+
+. $SOURCE_DIR/utils-prepare.sh
+
+version=$1
+next_version=$2
+next_version_snapshot="${next_version}-SNAPSHOT"
+
+if [ ${PREPARE_VERSION_POST_TAG} -gt 0 ]; then
+  echo "Updating versions for ${next_version_snapshot}"
+  update_versions "${version}" "${next_version}" "snapshot"
+  git commit -m "[Release] Update versions for ${next_version_snapshot}"
+fi
+
+if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then
+  echo "Updating .deb package names for ${next_version}"
+  so_version() {
+    local version=$1
+    local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/')
+    local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/')
+    expr ${major_version} \* 100 + ${minor_version}
+  }
+  deb_lib_suffix=$(so_version $version)
+  next_deb_lib_suffix=$(so_version $next_version)
+  if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then
+    cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow
+    for target in debian*/lib*${deb_lib_suffix}.install; do
+      git mv \
+	${target} \
+	$(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/")
+    done
+    deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g"
+    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control*
+    rm -f debian*/control*.bak
+    git add debian*/control*
+    cd -
+    cd $SOURCE_DIR/../tasks/
+    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml
+    rm -f tasks.yml.bak
+    git add tasks.yml
+    cd -
+    cd $SOURCE_DIR
+    sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt
+    rm -f rat_exclude_files.txt.bak
+    git add rat_exclude_files.txt
+    git commit -m "[Release] Update .deb package names for $next_version"
+    cd -
+  fi
+fi
diff --git a/dev/release/binary-common.sh b/dev/release/utils-binary.sh
similarity index 100%
rename from dev/release/binary-common.sh
rename to dev/release/utils-binary.sh
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
new file mode 100644
index 00000000000..a1c884125a8
--- /dev/null
+++ b/dev/release/utils-prepare.sh
@@ -0,0 +1,167 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARROW_DIR="${SOURCE_DIR}/../.."
+
+update_versions() {
+  local base_version=$1
+  local next_version=$2
+  local type=$3
+
+  case ${type} in
+    release)
+      local version=${base_version}
+      local r_version=${base_version}
+      ;;
+    snapshot)
+      local version=${next_version}-SNAPSHOT
+      local r_version=${base_version}.9000
+      ;;
+  esac
+
+  pushd "${ARROW_DIR}/c_glib"
+  sed -i.bak -E -e \
+    "s/^version = '.+'/version = '${version}'/" \
+    meson.build
+  rm -f meson.build.bak
+  git add meson.build
+  popd
+
+  pushd "${ARROW_DIR}/ci/scripts"
+  sed -i.bak -E -e \
+    "s/^pkgver=.+/pkgver=${r_version}/" \
+    PKGBUILD
+  rm -f PKGBUILD.bak
+  git add PKGBUILD
+  popd
+
+  pushd "${ARROW_DIR}/cpp"
+  sed -i.bak -E -e \
+    "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \
+    CMakeLists.txt
+  rm -f CMakeLists.txt.bak
+  git add CMakeLists.txt
+
+  sed -i.bak -E -e \
+    "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \
+    vcpkg.json
+  rm -f vcpkg.json.bak
+  git add vcpkg.json
+  popd
+
+  pushd "${ARROW_DIR}/java"
+  mvn versions:set -DnewVersion=${version}
+  find . -type f -name pom.xml.versionsBackup -delete
+  git add "pom.xml"
+  git add "**/pom.xml"
+  popd
+
+  pushd "${ARROW_DIR}/csharp"
+  sed -i.bak -E -e \
+    "s/^    <Version>.+<\/Version>/    <Version>${version}<\/Version>/" \
+    Directory.Build.props
+  rm -f Directory.Build.props.bak
+  git add Directory.Build.props
+  popd
+
+  pushd "${ARROW_DIR}/dev/tasks/homebrew-formulae"
+  sed -i.bak -E -e \
+    "s/arrow-[0-9.]+[0-9]+/arrow-${r_version}/g" \
+    autobrew/apache-arrow.rb
+  rm -f autobrew/apache-arrow.rb.bak
+  git add autobrew/apache-arrow.rb
+  sed -i.bak -E -e \
+    "s/arrow-[0-9.\-]+[0-9SNAPHOT]+/arrow-${version}/g" \
+    apache-arrow.rb
+  rm -f apache-arrow.rb.bak
+  git add apache-arrow.rb
+  popd
+
+  pushd "${ARROW_DIR}/js"
+  sed -i.bak -E -e \
+    "s/^  \"version\": \".+\"/  \"version\": \"${version}\"/" \
+    package.json
+  rm -f package.json.bak
+  git add package.json
+  popd
+
+  pushd "${ARROW_DIR}/matlab"
+  sed -i.bak -E -e \
+    "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \
+    CMakeLists.txt
+  rm -f CMakeLists.txt.bak
+  git add CMakeLists.txt
+  popd
+
+  pushd "${ARROW_DIR}/python"
+  sed -i.bak -E -e \
+    "s/^default_version = '.+'/default_version = '${version}'/" \
+    setup.py
+  rm -f setup.py.bak
+  git add setup.py
+  popd
+
+  pushd "${ARROW_DIR}/r"
+  sed -i.bak -E -e \
+    "s/^Version: .+/Version: ${r_version}/" \
+    DESCRIPTION
+  rm -f DESCRIPTION.bak
+  git add DESCRIPTION
+  if [ ${type} = "snapshot" ]; then
+    # Add a news entry for the new dev version
+    echo "dev"
+    sed -i.bak -E -e \
+      "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \
+      NEWS.md
+  else
+    # Replace dev version with release version
+    echo "release"
+    sed -i.bak -E -e \
+      "0,/^# arrow /s/^# arrow .+/# arrow ${r_version}/" \
+      NEWS.md
+  fi
+  rm -f NEWS.md.bak
+  git add NEWS.md
+  popd
+
+  pushd "${ARROW_DIR}/ruby"
+  sed -i.bak -E -e \
+    "s/^  VERSION = \".+\"/  VERSION = \"${version}\"/g" \
+    */*/*/version.rb
+  rm -f */*/*/version.rb.bak
+  git add */*/*/version.rb
+  popd
+
+  pushd "${ARROW_DIR}/rust"
+  sed -i.bak -E \
+    -e "s/^version = \".+\"/version = \"${version}\"/g" \
+    -e "s/^(arrow = .* version = )\".*\"(( .*)|(, features = .*)|(, optional = .*))$/\\1\"${version}\"\\2/g" \
+    -e "s/^(arrow-flight = .* version = )\".+\"( .*)/\\1\"${version}\"\\2/g" \
+    -e "s/^(parquet = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
+    -e "s/^(parquet_derive = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
+    */Cargo.toml
+  rm -f */Cargo.toml.bak
+  git add */Cargo.toml
+
+  sed -i.bak -E \
+    -e "s/^([^ ]+) = \".+\"/\\1 = \"${version}\"/g" \
+    -e "s,docs\.rs/crate/([^/]+)/[^)]+,docs.rs/crate/\\1/${version},g" \
+    */README.md
+  rm -f */README.md.bak
+  git add */README.md
+  popd
+}
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index e7b87a3a4da..bf7c0bd7050 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -21,10 +21,12 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
+  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
+  echo " e.g.: $0 0.13.0 rc kszucs/arrow # Verify 0.13.0 RC at https://bintray.com/kszucs/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
@@ -84,9 +86,15 @@ if [ "${TYPE}" = "local" ]; then
 else
   package_version="${VERSION}-1"
   apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb"
-  curl \
-    --output "${apt_source_base_name}" \
-    "${artifactory_base_url}/${apt_source_base_name}"
+  if [ $# -eq 3 ]; then
+    curl \
+      --output "${apt_source_base_name}" \
+      "https://dl.bintray.com/$3/${distribution}-rc/${apt_source_base_name}"
+  else
+    curl \
+      --output "${apt_source_base_name}" \
+      "${artifactory_base_url}/${apt_source_base_name}"
+  fi
   apt install -y -V "./${apt_source_base_name}"
 fi
 
@@ -104,10 +112,17 @@ if [ "${TYPE}" = "local" ]; then
   fi
 else
   if [ "${TYPE}" = "rc" ]; then
-    sed \
-      -i"" \
-      -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
-      /etc/apt/sources.list.d/apache-arrow.sources
+    if [ $# -eq 3 ]; then
+      sed \
+        -i"" \
+        -e "s,^URIs: .*/,URIs: https://dl.bintray.com/$3/${distribution}-rc/,g" \
+        /etc/apt/sources.list.d/apache-arrow.sources
+    else
+      sed \
+        -i"" \
+        -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
+        /etc/apt/sources.list.d/apache-arrow.sources
+    fi
   fi
 fi
 
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 3fc926f4e82..888f763a0a3 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -135,7 +135,9 @@ test_binary() {
 }
 
 test_apt() {
-  for target in "debian:buster" \
+  for target in "debian:bullseye" \
+                "arm64v8/debian:bullseye" \
+                "debian:buster" \
                 "arm64v8/debian:buster" \
                 "ubuntu:bionic" \
                 "arm64v8/ubuntu:bionic" \
@@ -144,6 +146,11 @@ test_apt() {
                 "ubuntu:groovy" \
                 "arm64v8/ubuntu:groovy"; do \
     case "${target}" in
+      arm64v8/debian:bullseye)
+        # qemu-user-static in Ubuntu 20.04 has a crash bug:
+        #   https://bugs.launchpad.net/qemu/+bug/1749393
+        continue
+        ;;
       arm64v8/*)
         if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
           : # OK
@@ -485,9 +492,7 @@ test_rust() {
   # raises on any formatting errors
   rustup component add rustfmt --toolchain stable
   cargo +stable fmt --all -- --check
-
-  # we are targeting Rust nightly for releases
-  rustup default nightly
+  rustup default stable
 
   # use local modules because we don't publish modules to crates.io yet
   sed \
@@ -686,6 +691,7 @@ test_wheels() {
   fi
 
   python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
+         --package_type python \
          --regex=${filter_regex} \
          --dest=${download_dir}
 
diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index b9c46c43898..08cd907b461 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -21,10 +21,12 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
+  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
+  echo " e.g.: $0 0.13.0 rc kszucs/arrow # Verify 0.13.0 RC at https://bintray.com/kszucs/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
@@ -82,8 +84,13 @@ if [ "${TYPE}" = "local" ]; then
   ${install_command} "${release_path}"
 else
   package_version="${VERSION}"
-  ${install_command} \
-    ${artifactory_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
+  if [ $# -eq 3 ]; then
+    ${install_command} \
+      https://dl.bintray.com/$3/centos-rc/${distribution_version}/apache-arrow-release-latest.rpm
+  else
+    ${install_command} \
+      ${artifactory_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
+  fi
 fi
 
 if [ "${TYPE}" = "local" ]; then
@@ -97,10 +104,17 @@ if [ "${TYPE}" = "local" ]; then
   fi
 else
   if [ "${TYPE}" = "rc" ]; then
-    sed \
-      -i"" \
-      -e "s,/centos/,/centos-rc/,g" \
-      /etc/yum.repos.d/Apache-Arrow.repo
+    if [ $# -eq 3 ]; then
+      sed \
+        -i"" \
+        -e "s,baseurl=https://apache\.jfrog\.io/artifactory/arrow/centos/,baseurl=https://dl.bintray.com/$3/centos-rc/,g" \
+        /etc/yum.repos.d/Apache-Arrow.repo
+    else
+      sed \
+        -i"" \
+        -e "s,/centos/,/centos-rc/,g" \
+        /etc/yum.repos.d/Apache-Arrow.repo
+    fi
   fi
 fi
 
diff --git a/dev/tasks/conda-recipes/azure.win.yml b/dev/tasks/conda-recipes/azure.win.yml
index a3ec6931caf..422e2f0e913 100755
--- a/dev/tasks/conda-recipes/azure.win.yml
+++ b/dev/tasks/conda-recipes/azure.win.yml
@@ -73,5 +73,5 @@ jobs:
         PYTHONUNBUFFERED: 1
       condition: contains(variables['R_CONFIG'], 'win')
 
-    {{ macros.azure_upload_releases("D:\bld\win-64\*.tar.bz2")|indent(2) }}
-    {{ macros.azure_upload_anaconda("D:\bld\win-64\*.tar.bz2")|indent(2) }}
+    {{ macros.azure_upload_releases("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }}
+    {{ macros.azure_upload_anaconda("D:\\bld\\win-64\\*.tar.bz2")|indent(2) }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index eab3e15ce92..4bf0abfa50c 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1235,7 +1235,7 @@ tasks:
 
 {% for target in ["csharp",
                   "go",
-                  "integration"
+                  "integration",
                   "java",
                   "js",
                   "python",
diff --git a/dev/tasks/verify-rc/github.linux.yml b/dev/tasks/verify-rc/github.linux.yml
index 8729426fd13..2bbde2e7fcb 100644
--- a/dev/tasks/verify-rc/github.linux.yml
+++ b/dev/tasks/verify-rc/github.linux.yml
@@ -47,6 +47,7 @@ jobs:
             jq \
             libboost-all-dev \
             libgirepository1.0-dev \
+            ninja-build \
             qemu-user-static \
             wget
 

From 8eab04470771472c67d767a09b3d6533de41a26b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 19 Apr 2021 01:40:42 +0200
Subject: [PATCH 089/719] ARROW-12420: [C++/Dataset] Reading null columns as
 dictionary not longer possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10093 from kszucs/ARROW-12420

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/compute/exec.cc                 |  1 +
 .../compute/kernels/scalar_cast_nested.cc     |  7 ++++++-
 .../arrow/compute/kernels/scalar_cast_test.cc |  8 +++++++
 python/pyarrow/tests/test_array.py            |  3 ++-
 python/pyarrow/tests/test_dataset.py          | 21 +++++++++++++++++++
 5 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index c3187a3995a..b88248071c2 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -704,6 +704,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     preallocate_contiguous_ =
         (exec_context()->preallocate_contiguous() && kernel_->can_write_into_slices &&
          validity_preallocated_ && !is_nested(output_descr_.type->id()) &&
+         !is_dictionary(output_descr_.type->id()) &&
          data_preallocated_.size() == static_cast<size_t>(output_num_buffers_ - 1) &&
          std::all_of(data_preallocated_.begin(), data_preallocated_.end(),
                      [](const BufferPreallocation& prealloc) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 2592b77ab66..1d81be48288 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -118,7 +118,12 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
   AddCommonCasts(Type::STRUCT, kOutputTargetType, cast_struct.get());
 
-  return {cast_list, cast_large_list, cast_fsl, cast_struct};
+  // So is dictionary
+  auto cast_dictionary =
+      std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dictionary.get());
+
+  return {cast_list, cast_large_list, cast_fsl, cast_struct, cast_dictionary};
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 10e5ed26e5d..6efecbb2ad0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1782,6 +1782,14 @@ TEST(Cast, FromNull) {
   }
 }
 
+TEST(Cast, FromNullToDictionary) {
+  auto from = std::make_shared<NullArray>(10);
+  auto to_type = dictionary(int8(), boolean());
+
+  ASSERT_OK_AND_ASSIGN(auto expected, MakeArrayOfNull(to_type, 10));
+  CheckCast(from, expected);
+}
+
 // ----------------------------------------------------------------------
 // Test casting from DictionaryType
 
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 01ee2977fec..37d69363816 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1358,12 +1358,13 @@ def test_cast_from_null():
         pa.struct([pa.field('a', pa.int32()),
                    pa.field('b', pa.list_(pa.int8())),
                    pa.field('c', pa.string())]),
+        pa.dictionary(pa.int32(), pa.string()),
     ]
     for out_type in out_types:
         _check_cast_case((in_data, in_type, in_data, out_type))
 
     out_types = [
-        pa.dictionary(pa.int32(), pa.string()),
+
         pa.union([pa.field('a', pa.binary(10)),
                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 6ca6b095936..7688cf78ac7 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -3156,3 +3156,24 @@ def test_write_dataset_s3(s3_example_simple):
         "mybucket/dataset3", filesystem=fs, format="ipc", partitioning="hive"
     ).to_table()
     assert result.equals(table)
+
+
+@pytest.mark.parquet
+def test_dataset_null_to_dictionary_cast(tempdir):
+    # ARROW-12420
+    import pyarrow.parquet as pq
+
+    table = pa.table({"a": [None, None]})
+    pq.write_table(table, tempdir / "test.parquet")
+
+    schema = pa.schema([
+        pa.field("a", pa.dictionary(pa.int32(), pa.string()))
+    ])
+    fsds = ds.FileSystemDataset.from_paths(
+        paths=[tempdir / "test.parquet"],
+        schema=schema,
+        format=ds.ParquetFileFormat(),
+        filesystem=fs.LocalFileSystem(),
+    )
+    table = fsds.to_table()
+    assert table.schema == schema

From 2d791b4e427366506e5ce2c288a74d0ed21d900d Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 19 Apr 2021 10:21:30 +0200
Subject: [PATCH 090/719] ARROW-12395: Create RunInSerialExecutor benchmark

Closes #10042 from westonpace/feature/arrow-12395

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/util/thread_pool_benchmark.cc | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/cpp/src/arrow/util/thread_pool_benchmark.cc b/cpp/src/arrow/util/thread_pool_benchmark.cc
index 2ff4d111763..054d616d035 100644
--- a/cpp/src/arrow/util/thread_pool_benchmark.cc
+++ b/cpp/src/arrow/util/thread_pool_benchmark.cc
@@ -103,6 +103,22 @@ static void ThreadPoolSpawn(benchmark::State& state) {  // NOLINT non-const refe
   state.SetItemsProcessed(state.iterations() * nspawns);
 }
 
+// Benchmark SerialExecutor::RunInSerialExecutor
+static void RunInSerialExecutor(benchmark::State& state) {  // NOLINT non-const reference
+  const auto workload_size = static_cast<int32_t>(state.range(0));
+
+  Workload workload(workload_size);
+
+  for (auto _ : state) {
+    SerialExecutor::RunInSerialExecutor<arrow::detail::Empty>(
+        [&](internal::Executor* executor) {
+          return DeferNotOk(executor->Submit(std::ref(workload)));
+        });
+  }
+
+  state.SetItemsProcessed(state.iterations());
+}
+
 // Benchmark ThreadPool::Submit
 static void ThreadPoolSubmit(benchmark::State& state) {  // NOLINT non-const reference
   const auto nthreads = static_cast<int>(state.range(0));
@@ -223,6 +239,7 @@ BENCHMARK(ReferenceWorkloadCost)->Apply(WorkloadCost_Customize);
 #endif
 
 BENCHMARK(SerialTaskGroup)->Apply(WorkloadCost_Customize);
+BENCHMARK(RunInSerialExecutor)->Apply(WorkloadCost_Customize);
 BENCHMARK(ThreadPoolSpawn)->Apply(ThreadPoolSpawn_Customize);
 BENCHMARK(ThreadedTaskGroup)->Apply(ThreadPoolSpawn_Customize);
 BENCHMARK(ThreadPoolSubmit)->Apply(ThreadPoolSpawn_Customize);

From 1dc8f94ba7696ca5f7df20f76e6d81e62cdeaa69 Mon Sep 17 00:00:00 2001
From: Ying Zhou <yingzhou474@gmail.com>
Date: Mon, 19 Apr 2021 13:50:16 +0200
Subject: [PATCH 091/719] ARROW-7906: [C++] [Python] Add ORC write support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This pull request tracks the progress on adding ORC write support. The functionality is not complete yet. However for most types the process of populating a ColumnVectorBatch in ORC using data from Arrow Array.

Arrow data types (arrow::Type::type) I do support:
Boolean: BOOL
Numerical: INT8, INT16, INT32, INT64, FLOAT, DOUBLE
Time-related: DATE32
Binary: BINARY, STRING, LARGE_BINARY, LARGE_STRING, FIXED_SIZE_BINARY
Nested: LIST, LARGE_LIST, FIXED_SIZE_LIST, STRUCT, MAP, DENSE_UNION, SPARSE_UNION

Arrow data types I plan to support:
Numerical: DECIMAL128
Time-related: DATE64, TIMESTAMP
Dictionary: DICTIONARY

Arrow data types I currently do NOT plan to support:
Numerical: UINT8, UINT16, UINT32, UINT64, HALF_FLOAT, DECIMAL256 (There are no corresponding types in ORC. Of course except for in the case of DECIMAL256 we can always cast them into larger types. However I think maybe users need to explicitly do that.)
Time-related: TIME32, TIME64, INTERVAL_MONTHS, INTERVAL_DAY_TIME, DURATION (There are no corresponding types in ORC and it is impossible to cast them into ORC types without losing time-related information)
Extension: EXTENSION

Closes #8648 from mathyingzhou/ARROW-7906_pyarrow_write_orc

Lead-authored-by: Ying Zhou <yingzhou474@gmail.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: Heres, Daniel <danielheres@gmail.com>
Co-authored-by: Dmitry Patsura <zaets28rus@gmail.com>
Co-authored-by: Neville Dipale <nevilledips@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Yibo Cai <yibo.cai@arm.com>
Co-authored-by: Yordan Pavlov <yordan.pavlov@outlook.com>
Co-authored-by: mqy <meng.qingyou@gmail.com>
Co-authored-by: Kenta Murata <mrkn@mrkn.jp>
Co-authored-by: Johannes Müller <JohannesMueller@fico.com>
Co-authored-by: Mahmut Bulut <vertexclique@gmail.com>
Co-authored-by: Ryan Jennings <ryan@ryanj.net>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Jörn Horstmann <joern.horstmann@signavio.com>
Co-authored-by: Daniël Heres <danielheres@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Co-authored-by: Matt Brubeck <mbrubeck@limpet.net>
Co-authored-by: Max Burke <max@urbanlogiq.com>
Co-authored-by: Maarten A. Breddels <maartenbreddels@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 c_glib/arrow-glib/basic-data-type.cpp      |   3 +
 c_glib/test/helper/buildable.rb            |  35 +-
 c_glib/test/test-orc-file-reader.rb        |  52 +-
 cpp/src/arrow/adapters/orc/adapter.cc      | 188 ++++-
 cpp/src/arrow/adapters/orc/adapter.h       |  35 +-
 cpp/src/arrow/adapters/orc/adapter_test.cc | 556 ++++++++++++++-
 cpp/src/arrow/adapters/orc/adapter_util.cc | 787 +++++++++++++++++++--
 cpp/src/arrow/adapters/orc/adapter_util.h  |  18 +-
 cpp/src/arrow/testing/random.cc            |  15 +-
 cpp/src/arrow/testing/random.h             |  13 +
 python/pyarrow/_orc.pxd                    |  10 +-
 python/pyarrow/_orc.pyx                    |  30 +-
 python/pyarrow/orc.py                      |  53 ++
 python/pyarrow/tests/test_orc.py           |  77 +-
 ruby/red-arrow/test/test-orc.rb            |  42 +-
 15 files changed, 1673 insertions(+), 241 deletions(-)

diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 1f2082712da..d7e3ca85f38 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1925,6 +1925,9 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
   case arrow::Type::type::DICTIONARY:
     type = GARROW_TYPE_DICTIONARY_DATA_TYPE;
     break;
+  case arrow::Type::type::MAP:
+    type = GARROW_TYPE_MAP_DATA_TYPE;
+    break;
   case arrow::Type::type::DECIMAL128:
     type = GARROW_TYPE_DECIMAL128_DATA_TYPE;
     break;
diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb
index 3528c2fbdc7..04ae22f8715 100644
--- a/c_glib/test/helper/buildable.rb
+++ b/c_glib/test/helper/buildable.rb
@@ -136,11 +136,7 @@ def build_list_array(value_data_type, values_list, field_name: "value")
       data_type = Arrow::ListDataType.new(value_field)
       builder = Arrow::ListArrayBuilder.new(data_type)
       values_list.each do |values|
-        if values.nil?
-          builder.append_null
-        else
-          append_to_builder(builder, values)
-        end
+        append_to_builder(builder, values)
       end
       builder.finish
     end
@@ -150,11 +146,16 @@ def build_large_list_array(value_data_type, values_list, field_name: "value")
       data_type = Arrow::LargeListDataType.new(value_field)
       builder = Arrow::LargeListArrayBuilder.new(data_type)
       values_list.each do |values|
-        if values.nil?
-          builder.append_null
-        else
-          append_to_builder(builder, values)
-        end
+        append_to_builder(builder, values)
+      end
+      builder.finish
+    end
+
+    def build_map_array(key_data_type, item_data_type, maps)
+      data_type = Arrow::MapDataType.new(key_data_type, item_data_type)
+      builder = Arrow::MapArrayBuilder.new(data_type)
+      maps.each do |map|
+        append_to_builder(builder, map)
       end
       builder.finish
     end
@@ -163,11 +164,7 @@ def build_struct_array(fields, structs)
       data_type = Arrow::StructDataType.new(fields)
       builder = Arrow::StructArrayBuilder.new(data_type)
       structs.each do |struct|
-        if struct.nil?
-          builder.append_null
-        else
-          append_to_builder(builder, struct)
-        end
+        append_to_builder(builder, struct)
       end
       builder.finish
     end
@@ -178,6 +175,14 @@ def append_to_builder(builder, value)
       else
         data_type = builder.value_data_type
         case data_type
+        when Arrow::MapDataType
+          builder.append_value
+          key_builder = builder.key_builder
+          item_builder = builder.item_builder
+          value.each do |k, v|
+            append_to_builder(key_builder, k)
+            append_to_builder(item_builder, v)
+          end
         when Arrow::ListDataType, Arrow::LargeListDataType
           builder.append_value
           value_builder = builder.value_builder
diff --git a/c_glib/test/test-orc-file-reader.rb b/c_glib/test/test-orc-file-reader.rb
index cd57cee4de6..38900cf12f3 100644
--- a/c_glib/test/test-orc-file-reader.rb
+++ b/c_glib/test/test-orc-file-reader.rb
@@ -40,7 +40,7 @@ def test_read_type
 string1: string
 middle: struct<list: list<item: struct<int1: int32, string1: string>>>
 list: list<item: struct<int1: int32, string1: string>>
-map: list<item: struct<key: string, value: struct<int1: int32, string1: string>>>
+map: map<string, struct<int1: int32, string1: string>>
     SCHEMA
   end
 
@@ -80,21 +80,6 @@ def build_middle_array(middles)
     build_struct_array(middle_fields, middles)
   end
 
-  def key_value_fields
-    [
-      Arrow::Field.new("key", Arrow::StringDataType.new),
-      Arrow::Field.new("value", item_data_type),
-    ]
-  end
-
-  def key_value_data_type
-    Arrow::StructDataType.new(key_value_fields)
-  end
-
-  def build_key_value_array(key_value_array)
-    build_list_array(key_value_data_type, key_value_array, field_name: "item")
-  end
-
   def middle_array
     build_middle_array([
                          {
@@ -154,26 +139,21 @@ def list_array
   end
 
   def map_array
-    build_key_value_array([
-                            [
-                            ],
-                            [
-                              {
-                                "key" => "chani",
-                                "value" => {
-                                  "int1" => 5,
-                                  "string1" => "chani",
-                                },
-                              },
-                              {
-                                "key" => "mauddib",
-                                "value" => {
-                                  "int1" => 1,
-                                  "string1" => "mauddib",
-                                },
-                              },
-                            ],
-                          ])
+    build_map_array(Arrow::StringDataType.new,
+                    item_data_type,
+                    [
+                      {},
+                      {
+                        "chani" => {
+                          "int1" => 5,
+                          "string1" => "chani",
+                        },
+                        "mauddib" => {
+                          "int1" => 1,
+                          "string1" => "mauddib",
+                        },
+                      },
+                    ])
   end
 
   def all_columns
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 2c61f8995de..e9dfbd0a8ec 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -16,10 +16,10 @@
 // under the License.
 
 #include "arrow/adapters/orc/adapter.h"
-#include "arrow/adapters/orc/adapter_util.h"
 
 #include <algorithm>
 #include <cstdint>
+#include <functional>
 #include <list>
 #include <memory>
 #include <sstream>
@@ -27,6 +27,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/adapters/orc/adapter_util.h"
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
 #include "arrow/io/interfaces.h"
@@ -44,20 +45,11 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/range.h"
 #include "arrow/util/visibility.h"
-
 #include "orc/Exceptions.hh"
-#include "orc/OrcFile.hh"
 
 // alias to not interfere with nested orc namespace
 namespace liborc = orc;
 
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace adapters {
-namespace orc {
-
 #define ORC_THROW_NOT_OK(s)                   \
   do {                                        \
     Status _s = (s);                          \
@@ -77,6 +69,35 @@ namespace orc {
   ORC_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
                            lhs, rexpr);
 
+#define ORC_BEGIN_CATCH_NOT_OK try {
+#define ORC_END_CATCH_NOT_OK                   \
+  }                                            \
+  catch (const liborc::ParseError& e) {        \
+    return Status::IOError(e.what());          \
+  }                                            \
+  catch (const liborc::InvalidArgument& e) {   \
+    return Status::Invalid(e.what());          \
+  }                                            \
+  catch (const liborc::NotImplementedYet& e) { \
+    return Status::NotImplemented(e.what());   \
+  }
+
+#define ORC_CATCH_NOT_OK(_s)  \
+  ORC_BEGIN_CATCH_NOT_OK(_s); \
+  ORC_END_CATCH_NOT_OK
+
+namespace arrow {
+namespace adapters {
+namespace orc {
+
+namespace {
+
+// The following are required by ORC to be uint64_t
+constexpr uint64_t kOrcWriterBatchSize = 128 * 1024;
+constexpr uint64_t kOrcNaturalWriteSize = 128 * 1024;
+
+using internal::checked_cast;
+
 class ArrowInputFile : public liborc::InputStream {
  public:
   explicit ArrowInputFile(const std::shared_ptr<io::RandomAccessFile>& file)
@@ -129,11 +150,7 @@ class OrcStripeReader : public RecordBatchReader {
 
   Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
     std::unique_ptr<liborc::ColumnVectorBatch> batch;
-    try {
-      batch = row_reader_->createRowBatch(batch_size_);
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+    ORC_CATCH_NOT_OK(batch = row_reader_->createRowBatch(batch_size_));
 
     const liborc::Type& type = row_reader_->getSelectedType();
     if (!row_reader_->next(*batch)) {
@@ -163,6 +180,8 @@ class OrcStripeReader : public RecordBatchReader {
   int64_t batch_size_;
 };
 
+}  // namespace
+
 class ORCFileReader::Impl {
  public:
   Impl() {}
@@ -172,11 +191,7 @@ class ORCFileReader::Impl {
     std::unique_ptr<ArrowInputFile> io_wrapper(new ArrowInputFile(file));
     liborc::ReaderOptions options;
     std::unique_ptr<liborc::Reader> liborc_reader;
-    try {
-      liborc_reader = createReader(std::move(io_wrapper), options);
-    } catch (const liborc::ParseError& e) {
-      return Status::IOError(e.what());
-    }
+    ORC_CATCH_NOT_OK(liborc_reader = createReader(std::move(io_wrapper), options));
     pool_ = pool;
     reader_ = std::move(liborc_reader);
     current_row_ = 0;
@@ -209,11 +224,7 @@ class ORCFileReader::Impl {
 
   Status ReadSchema(const liborc::RowReaderOptions& opts, std::shared_ptr<Schema>* out) {
     std::unique_ptr<liborc::RowReader> row_reader;
-    try {
-      row_reader = reader_->createRowReader(opts);
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+    ORC_CATCH_NOT_OK(row_reader = reader_->createRowReader(opts));
     const liborc::Type& type = row_reader->getSelectedType();
     return GetArrowSchema(type, out);
   }
@@ -342,12 +353,12 @@ class ORCFileReader::Impl {
                    std::shared_ptr<RecordBatch>* out) {
     std::unique_ptr<liborc::RowReader> row_reader;
     std::unique_ptr<liborc::ColumnVectorBatch> batch;
-    try {
-      row_reader = reader_->createRowReader(opts);
-      batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch));
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+
+    ORC_BEGIN_CATCH_NOT_OK
+    row_reader = reader_->createRowReader(opts);
+    batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch));
+    ORC_END_CATCH_NOT_OK
+
     std::unique_ptr<RecordBatchBuilder> builder;
     RETURN_NOT_OK(RecordBatchBuilder::Make(schema, pool_, nrows, &builder));
 
@@ -389,13 +400,12 @@ class ORCFileReader::Impl {
     std::shared_ptr<Schema> schema;
     RETURN_NOT_OK(ReadSchema(opts, &schema));
     std::unique_ptr<liborc::RowReader> row_reader;
-    try {
-      row_reader = reader_->createRowReader(opts);
-      row_reader->seekToRow(current_row_);
-      current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows;
-    } catch (const liborc::ParseError& e) {
-      return Status::Invalid(e.what());
-    }
+
+    ORC_BEGIN_CATCH_NOT_OK
+    row_reader = reader_->createRowReader(opts);
+    row_reader->seekToRow(current_row_);
+    current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows;
+    ORC_END_CATCH_NOT_OK
 
     *out = std::shared_ptr<RecordBatchReader>(
         new OrcStripeReader(std::move(row_reader), schema, batch_size, pool_));
@@ -473,6 +483,108 @@ int64_t ORCFileReader::NumberOfStripes() { return impl_->NumberOfStripes(); }
 
 int64_t ORCFileReader::NumberOfRows() { return impl_->NumberOfRows(); }
 
+namespace {
+
+class ArrowOutputStream : public liborc::OutputStream {
+ public:
+  explicit ArrowOutputStream(arrow::io::OutputStream& output_stream)
+      : output_stream_(output_stream), length_(0) {}
+
+  uint64_t getLength() const override { return length_; }
+
+  uint64_t getNaturalWriteSize() const override { return kOrcNaturalWriteSize; }
+
+  void write(const void* buf, size_t length) override {
+    ORC_THROW_NOT_OK(output_stream_.Write(buf, static_cast<int64_t>(length)));
+    length_ += static_cast<int64_t>(length);
+  }
+
+  // Mandatory due to us implementing an ORC virtual class.
+  // Used by ORC for error messages, not used by Arrow
+  const std::string& getName() const override {
+    static const std::string filename("ArrowOutputFile");
+    return filename;
+  }
+
+  void close() override {
+    if (!output_stream_.closed()) {
+      ORC_THROW_NOT_OK(output_stream_.Close());
+    }
+  }
+
+  void set_length(int64_t length) { length_ = length; }
+
+ private:
+  arrow::io::OutputStream& output_stream_;
+  int64_t length_;
+};
+
+}  // namespace
+
+class ORCFileWriter::Impl {
+ public:
+  Status Open(arrow::io::OutputStream* output_stream) {
+    out_stream_ = std::unique_ptr<liborc::OutputStream>(
+        checked_cast<liborc::OutputStream*>(new ArrowOutputStream(*output_stream)));
+    return Status::OK();
+  }
+
+  Status Write(const Table& table) {
+    std::unique_ptr<liborc::WriterOptions> orc_options =
+        std::unique_ptr<liborc::WriterOptions>(new liborc::WriterOptions());
+    ARROW_ASSIGN_OR_RAISE(auto orc_schema, GetOrcType(*(table.schema())));
+    ORC_CATCH_NOT_OK(
+        writer_ = liborc::createWriter(*orc_schema, out_stream_.get(), *orc_options))
+
+    int64_t num_rows = table.num_rows();
+    const int num_cols_ = table.num_columns();
+    std::vector<int64_t> arrow_index_offset(num_cols_, 0);
+    std::vector<int> arrow_chunk_offset(num_cols_, 0);
+    std::unique_ptr<liborc::ColumnVectorBatch> batch =
+        writer_->createRowBatch(kOrcWriterBatchSize);
+    liborc::StructVectorBatch* root =
+        internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+    while (num_rows > 0) {
+      for (int i = 0; i < num_cols_; i++) {
+        RETURN_NOT_OK(adapters::orc::WriteBatch(
+            *(table.column(i)), kOrcWriterBatchSize, &(arrow_chunk_offset[i]),
+            &(arrow_index_offset[i]), (root->fields)[i]));
+      }
+      root->numElements = (root->fields)[0]->numElements;
+      writer_->add(*batch);
+      batch->clear();
+      num_rows -= kOrcWriterBatchSize;
+    }
+    return Status::OK();
+  }
+
+  Status Close() {
+    writer_->close();
+    return Status::OK();
+  }
+
+ private:
+  std::unique_ptr<liborc::Writer> writer_;
+  std::unique_ptr<liborc::OutputStream> out_stream_;
+};
+
+ORCFileWriter::~ORCFileWriter() {}
+
+ORCFileWriter::ORCFileWriter() { impl_.reset(new ORCFileWriter::Impl()); }
+
+Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open(
+    io::OutputStream* output_stream) {
+  std::unique_ptr<ORCFileWriter> result =
+      std::unique_ptr<ORCFileWriter>(new ORCFileWriter());
+  Status status = result->impl_->Open(output_stream);
+  RETURN_NOT_OK(status);
+  return result;
+}
+
+Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); }
+
+Status ORCFileWriter::Close() { return impl_->Close(); }
+
 }  // namespace orc
 }  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h
index 9bf18674af4..86dfd2c9241 100644
--- a/cpp/src/arrow/adapters/orc/adapter.h
+++ b/cpp/src/arrow/adapters/orc/adapter.h
@@ -26,12 +26,11 @@
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
-
 namespace adapters {
-
 namespace orc {
 
 /// \class ORCFileReader
@@ -142,8 +141,36 @@ class ARROW_EXPORT ORCFileReader {
   ORCFileReader();
 };
 
-}  // namespace orc
+/// \class ORCFileWriter
+/// \brief Write an Arrow Table or RecordBatch to an ORC file.
+class ARROW_EXPORT ORCFileWriter {
+ public:
+  ~ORCFileWriter();
+  /// \brief Creates a new ORC writer.
+  ///
+  /// \param[in] output_stream a pointer to the io::OutputStream to write into
+  /// \return the returned writer object
+  static Result<std::unique_ptr<ORCFileWriter>> Open(io::OutputStream* output_stream);
 
-}  // namespace adapters
+  /// \brief Write a table
+  ///
+  /// \param[in] table the Arrow table from which data is extracted
+  /// \return Status
+  Status Write(const Table& table);
+
+  /// \brief Close an ORC writer (orc::Writer)
+  ///
+  /// \return Status
+  Status Close();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
 
+ private:
+  ORCFileWriter();
+};
+
+}  // namespace orc
+}  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 09e47fb7626..7bf0b61774d 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -15,20 +15,46 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <string>
-
 #include "arrow/adapters/orc/adapter.h"
-#include "arrow/array.h"
-#include "arrow/io/api.h"
 
 #include <gtest/gtest.h>
+
 #include <orc/OrcFile.hh>
+#include <string>
+
+#include "arrow/adapters/orc/adapter_util.h"
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/chunked_array.h"
+#include "arrow/compute/cast.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type.h"
+#include "arrow/util/decimal.h"
 
 namespace liborc = orc;
 
 namespace arrow {
 
-constexpr int DEFAULT_MEM_STREAM_SIZE = 100 * 1024 * 1024;
+using internal::checked_pointer_cast;
+
+constexpr int kDefaultSmallMemStreamSize = 16384 * 5;  // 80KB
+constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024;
+constexpr int64_t kNanoMax = std::numeric_limits<int64_t>::max();
+constexpr int64_t kNanoMin = std::numeric_limits<int64_t>::lowest();
+const int64_t kMicroMax = std::floor(kNanoMax / 1000);
+const int64_t kMicroMin = std::ceil(kNanoMin / 1000);
+const int64_t kMilliMax = std::floor(kMicroMax / 1000);
+const int64_t kMilliMin = std::ceil(kMicroMin / 1000);
+const int64_t kSecondMax = std::floor(kMilliMax / 1000);
+const int64_t kSecondMin = std::ceil(kMilliMin / 1000);
+
+static constexpr random::SeedType kRandomSeed = 0x0ff1ce;
 
 class MemoryOutputStream : public liborc::OutputStream {
  public:
@@ -58,6 +84,189 @@ class MemoryOutputStream : public liborc::OutputStream {
   uint64_t length_, natural_write_size_;
 };
 
+std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
+                                                      int64_t length) {
+  BufferBuilder builder;
+  int32_t offsets[length];
+  ARROW_EXPECT_OK(builder.Resize(4 * length));
+  for (int32_t i = 0; i < length; i++) {
+    offsets[i] = fixed_length * i;
+  }
+  ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
+  std::shared_ptr<Buffer> buffer;
+  ARROW_EXPECT_OK(builder.Finish(&buffer));
+  return buffer;
+}
+
+std::shared_ptr<Array> CastFixedSizeBinaryArrayToBinaryArray(
+    std::shared_ptr<Array> array) {
+  auto fixed_size_binary_array = checked_pointer_cast<FixedSizeBinaryArray>(array);
+  std::shared_ptr<Buffer> value_offsets = GenerateFixedDifferenceBuffer(
+      fixed_size_binary_array->byte_width(), array->length() + 1);
+  return std::make_shared<BinaryArray>(array->length(), value_offsets,
+                                       array->data()->buffers[1],
+                                       array->data()->buffers[0]);
+}
+
+template <typename TargetArrayType>
+std::shared_ptr<Array> CastInt64ArrayToTemporalArray(
+    const std::shared_ptr<DataType>& type, std::shared_ptr<Array> array) {
+  std::shared_ptr<ArrayData> new_array_data =
+      ArrayData::Make(type, array->length(), array->data()->buffers);
+  return std::make_shared<TargetArrayType>(new_array_data);
+}
+
+Result<std::shared_ptr<Array>> GenerateRandomDate64Array(int64_t size,
+                                                         double null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  return CastInt64ArrayToTemporalArray<Date64Array>(
+      date64(), rand.Int64(size, kMilliMin, kMilliMax, null_probability));
+}
+
+Result<std::shared_ptr<Array>> GenerateRandomTimestampArray(int64_t size,
+                                                            arrow::TimeUnit::type type,
+                                                            double null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  switch (type) {
+    case arrow::TimeUnit::type::SECOND: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::SECOND),
+          rand.Int64(size, kSecondMin, kSecondMax, null_probability));
+    }
+    case arrow::TimeUnit::type::MILLI: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::MILLI),
+          rand.Int64(size, kMilliMin, kMilliMax, null_probability));
+    }
+    case arrow::TimeUnit::type::MICRO: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::MICRO),
+          rand.Int64(size, kMicroMin, kMicroMax, null_probability));
+    }
+    case arrow::TimeUnit::type::NANO: {
+      return CastInt64ArrayToTemporalArray<TimestampArray>(
+          timestamp(TimeUnit::NANO),
+          rand.Int64(size, kNanoMin, kNanoMax, null_probability));
+    }
+    default: {
+      return arrow::Status::TypeError("Unknown or unsupported Arrow TimeUnit: ", type);
+    }
+  }
+}
+
+/// \brief Construct a random weak composition of a nonnegative integer
+/// i.e. a way of writing it as the sum of a sequence of n non-negative
+/// integers.
+///
+/// \param[in] n the number of integers in the weak composition
+/// \param[in] sum the integer of which a random weak composition is generated
+/// \param[out] out The generated weak composition
+template <typename T, typename U>
+void RandWeakComposition(int64_t n, T sum, std::vector<U>* out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  out->resize(n, static_cast<T>(0));
+  T remaining_sum = sum;
+  std::generate(out->begin(), out->end() - 1, [&gen, &remaining_sum] {
+    std::uniform_int_distribution<T> d(static_cast<T>(0), remaining_sum);
+    auto res = d(gen);
+    remaining_sum -= res;
+    return static_cast<U>(res);
+  });
+  (*out)[n - 1] += remaining_sum;
+  std::random_shuffle(out->begin(), out->end());
+}
+
+std::shared_ptr<ChunkedArray> GenerateRandomChunkedArray(
+    const std::shared_ptr<DataType>& data_type, int64_t size, int64_t min_num_chunks,
+    int64_t max_num_chunks, double null_probability) {
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+  std::vector<int64_t> num_chunks(1, 0);
+  std::vector<int64_t> current_size_chunks;
+  arrow::randint<int64_t, int64_t>(1, min_num_chunks, max_num_chunks, &num_chunks);
+  int64_t current_num_chunks = num_chunks[0];
+  ArrayVector arrays(current_num_chunks, nullptr);
+  arrow::RandWeakComposition(current_num_chunks, size, &current_size_chunks);
+  for (int j = 0; j < current_num_chunks; j++) {
+    switch (data_type->id()) {
+      case arrow::Type::type::DATE64: {
+        EXPECT_OK_AND_ASSIGN(arrays[j], GenerateRandomDate64Array(current_size_chunks[j],
+                                                                  null_probability));
+        break;
+      }
+      case arrow::Type::type::TIMESTAMP: {
+        EXPECT_OK_AND_ASSIGN(
+            arrays[j],
+            GenerateRandomTimestampArray(
+                current_size_chunks[j],
+                arrow::internal::checked_pointer_cast<arrow::TimestampType>(data_type)
+                    ->unit(),
+                null_probability));
+        break;
+      }
+      default:
+        arrays[j] = rand.ArrayOf(data_type, current_size_chunks[j], null_probability);
+    }
+  }
+  return std::make_shared<ChunkedArray>(arrays);
+}
+
+std::shared_ptr<Table> GenerateRandomTable(const std::shared_ptr<Schema>& schema,
+                                           int64_t size, int64_t min_num_chunks,
+                                           int64_t max_num_chunks,
+                                           double null_probability) {
+  int num_cols = schema->num_fields();
+  ChunkedArrayVector cv;
+  for (int col = 0; col < num_cols; col++) {
+    cv.push_back(GenerateRandomChunkedArray(schema->field(col)->type(), size,
+                                            min_num_chunks, max_num_chunks,
+                                            null_probability));
+  }
+  return Table::Make(schema, cv);
+}
+
+void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
+                               const std::shared_ptr<Table>& expected_output_table,
+                               const int64_t max_size = kDefaultSmallMemStreamSize) {
+  EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+                       io::BufferOutputStream::Create(max_size));
+  EXPECT_OK_AND_ASSIGN(auto writer,
+                       adapters::orc::ORCFileWriter::Open(buffer_output_stream.get()));
+  ARROW_EXPECT_OK(writer->Write(*input_table));
+  ARROW_EXPECT_OK(writer->Close());
+  EXPECT_OK_AND_ASSIGN(auto buffer, buffer_output_stream->Finish());
+  std::shared_ptr<io::RandomAccessFile> in_stream(new io::BufferReader(buffer));
+  std::unique_ptr<adapters::orc::ORCFileReader> reader;
+  ARROW_EXPECT_OK(
+      adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), &reader));
+  std::shared_ptr<Table> actual_output_table;
+  ARROW_EXPECT_OK(reader->Read(&actual_output_table));
+  AssertTablesEqual(*expected_output_table, *actual_output_table, false, false);
+}
+void AssertArrayWriteReadEqual(const std::shared_ptr<Array>& input_array,
+                               const std::shared_ptr<Array>& expected_output_array,
+                               const int64_t max_size = kDefaultSmallMemStreamSize) {
+  std::shared_ptr<Schema> input_schema = schema({field("col0", input_array->type())}),
+                          output_schema =
+                              schema({field("col0", expected_output_array->type())});
+  auto input_chunked_array = std::make_shared<ChunkedArray>(input_array),
+       expected_output_chunked_array =
+           std::make_shared<ChunkedArray>(expected_output_array);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, {input_chunked_array}),
+                         expected_output_table =
+                             Table::Make(output_schema, {expected_output_chunked_array});
+  AssertTableWriteReadEqual(input_table, expected_output_table, max_size);
+}
+
+void SchemaORCWriteReadTest(const std::shared_ptr<Schema>& schema, int64_t size,
+                            int64_t min_num_chunks, int64_t max_num_chunks,
+                            double null_probability,
+                            int64_t max_size = kDefaultSmallMemStreamSize) {
+  const std::shared_ptr<Table> table =
+      GenerateRandomTable(schema, size, min_num_chunks, max_num_chunks, null_probability);
+  AssertTableWriteReadEqual(table, table, max_size);
+}
+
 std::unique_ptr<liborc::Writer> CreateWriter(uint64_t stripe_size,
                                              const liborc::Type& type,
                                              liborc::OutputStream* stream) {
@@ -69,32 +278,34 @@ std::unique_ptr<liborc::Writer> CreateWriter(uint64_t stripe_size,
   return liborc::createWriter(type, stream, options);
 }
 
-TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
-  MemoryOutputStream mem_stream(DEFAULT_MEM_STREAM_SIZE);
+TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) {
+  MemoryOutputStream mem_stream(kDefaultMemStreamSize);
   ORC_UNIQUE_PTR<liborc::Type> type(
       liborc::Type::buildTypeFromString("struct<col1:int,col2:string>"));
 
   constexpr uint64_t stripe_size = 1024;  // 1K
   constexpr uint64_t stripe_count = 10;
-  constexpr uint64_t stripe_row_count = 65535;
+  constexpr uint64_t stripe_row_count = 16384;
   constexpr uint64_t reader_batch_size = 1024;
 
   auto writer = CreateWriter(stripe_size, *type, &mem_stream);
   auto batch = writer->createRowBatch(stripe_row_count);
-  auto struct_batch = dynamic_cast<liborc::StructVectorBatch*>(batch.get());
-  auto long_batch = dynamic_cast<liborc::LongVectorBatch*>(struct_batch->fields[0]);
-  auto str_batch = dynamic_cast<liborc::StringVectorBatch*>(struct_batch->fields[1]);
+  auto struct_batch = internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+  auto long_batch =
+      internal::checked_cast<liborc::LongVectorBatch*>(struct_batch->fields[0]);
+  auto str_batch =
+      internal::checked_cast<liborc::StringVectorBatch*>(struct_batch->fields[1]);
   int64_t accumulated = 0;
 
   for (uint64_t j = 0; j < stripe_count; ++j) {
-    char data_buffer[327675];
+    std::string data_buffer(stripe_row_count * 5, '\0');
     uint64_t offset = 0;
     for (uint64_t i = 0; i < stripe_row_count; ++i) {
       std::string str_data = std::to_string(accumulated % stripe_row_count);
       long_batch->data[i] = static_cast<int64_t>(accumulated % stripe_row_count);
-      str_batch->data[i] = data_buffer + offset;
+      str_batch->data[i] = &data_buffer[offset];
       str_batch->length[i] = static_cast<int64_t>(str_data.size());
-      memcpy(data_buffer + offset, str_data.c_str(), str_data.size());
+      memcpy(&data_buffer[offset], str_data.c_str(), str_data.size());
       accumulated++;
       offset += str_data.size();
     }
@@ -124,8 +335,8 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     std::shared_ptr<RecordBatch> record_batch;
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
     while (record_batch) {
-      auto int32_array = std::dynamic_pointer_cast<Int32Array>(record_batch->column(0));
-      auto str_array = std::dynamic_pointer_cast<StringArray>(record_batch->column(1));
+      auto int32_array = checked_pointer_cast<Int32Array>(record_batch->column(0));
+      auto str_array = checked_pointer_cast<StringArray>(record_batch->column(1));
       for (int j = 0; j < record_batch->num_rows(); ++j) {
         EXPECT_EQ(accumulated % stripe_row_count, int32_array->Value(j));
         EXPECT_EQ(std::to_string(accumulated % stripe_row_count),
@@ -157,4 +368,317 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
   }
 }
+
+// WriteORC tests
+// Trivial
+
+class TestORCWriterTrivialNoConversion : public ::testing::Test {
+ public:
+  TestORCWriterTrivialNoConversion() {
+    table_schema = schema(
+        {field("bool", boolean()), field("int8", int8()), field("int16", int16()),
+         field("int32", int32()), field("int64", int64()), field("float", float32()),
+         field("double", float64()), field("decimal128nz", decimal128(25, 6)),
+         field("decimal128z", decimal128(32, 0)), field("date32", date32()),
+         field("ts3", timestamp(TimeUnit::NANO)), field("string", utf8()),
+         field("binary", binary()),
+         field("struct", struct_({field("a", utf8()), field("b", int64())})),
+         field("list", list(int32())),
+         field("lsl", list(struct_({field("lsl0", list(int32()))}))),
+         field("map", map(utf8(), utf8()))});
+  }
+
+ protected:
+  std::shared_ptr<Schema> table_schema;
+};
+TEST_F(TestORCWriterTrivialNoConversion, writeTrivialChunk) {
+  std::shared_ptr<Table> table = TableFromJSON(table_schema, {R"([])"});
+  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
+}
+TEST_F(TestORCWriterTrivialNoConversion, writeChunkless) {
+  std::shared_ptr<Table> table = TableFromJSON(table_schema, {});
+  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
+}
+class TestORCWriterTrivialWithConversion : public ::testing::Test {
+ public:
+  TestORCWriterTrivialWithConversion() {
+    input_schema = schema(
+        {field("date64", date64()), field("ts0", timestamp(TimeUnit::SECOND)),
+         field("ts1", timestamp(TimeUnit::MILLI)),
+         field("ts2", timestamp(TimeUnit::MICRO)), field("large_string", large_utf8()),
+         field("large_binary", large_binary()),
+         field("fixed_size_binary0", fixed_size_binary(0)),
+         field("fixed_size_binary", fixed_size_binary(5)),
+         field("large_list", large_list(int32())),
+         field("fixed_size_list", fixed_size_list(int32(), 3))}),
+    output_schema = schema(
+        {field("date64", timestamp(TimeUnit::NANO)),
+         field("ts0", timestamp(TimeUnit::NANO)), field("ts1", timestamp(TimeUnit::NANO)),
+         field("ts2", timestamp(TimeUnit::NANO)), field("large_string", utf8()),
+         field("large_binary", binary()), field("fixed_size_binary0", binary()),
+         field("fixed_size_binary", binary()), field("large_list", list(int32())),
+         field("fixed_size_list", list(int32()))});
+  }
+
+ protected:
+  std::shared_ptr<Schema> input_schema, output_schema;
+};
+TEST_F(TestORCWriterTrivialWithConversion, writeTrivialChunk) {
+  std::shared_ptr<Table> input_table = TableFromJSON(input_schema, {R"([])"}),
+                         expected_output_table = TableFromJSON(output_schema, {R"([])"});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize / 16);
+}
+TEST_F(TestORCWriterTrivialWithConversion, writeChunkless) {
+  std::shared_ptr<Table> input_table = TableFromJSON(input_schema, {}),
+                         expected_output_table = TableFromJSON(output_schema, {});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize / 16);
+}
+
+// General
+
+class TestORCWriterNoConversion : public ::testing::Test {
+ public:
+  TestORCWriterNoConversion() {
+    table_schema = schema(
+        {field("bool", boolean()), field("int8", int8()), field("int16", int16()),
+         field("int32", int32()), field("int64", int64()), field("float", float32()),
+         field("double", float64()), field("date32", date32()),
+         field("decimal64", decimal128(18, 4)), field("decimal64z", decimal128(18, 0)),
+         field("ts3", timestamp(TimeUnit::NANO)), field("string", utf8()),
+         field("binary", binary())});
+  }
+
+ protected:
+  std::shared_ptr<Schema> table_schema;
+};
+TEST_F(TestORCWriterNoConversion, writeNoNulls) {
+  SchemaORCWriteReadTest(table_schema, 11203, 5, 10, 0, kDefaultSmallMemStreamSize * 5);
+}
+TEST_F(TestORCWriterNoConversion, writeMixed) {
+  SchemaORCWriteReadTest(table_schema, 9405, 1, 20, 0.6, kDefaultSmallMemStreamSize * 5);
+}
+TEST_F(TestORCWriterNoConversion, writeAllNulls) {
+  SchemaORCWriteReadTest(table_schema, 4006, 1, 5, 1);
+}
+
+// Converts
+// Since Arrow has way more types than ORC type conversions are unavoidable
+class TestORCWriterWithConversion : public ::testing::Test {
+ public:
+  TestORCWriterWithConversion() {
+    input_schema = schema(
+        {field("date64", date64()), field("ts0", timestamp(TimeUnit::SECOND)),
+         field("ts1", timestamp(TimeUnit::MILLI)),
+         field("ts2", timestamp(TimeUnit::MICRO)), field("large_string", large_utf8()),
+         field("large_binary", large_binary()),
+         field("fixed_size_binary0", fixed_size_binary(0)),
+         field("fixed_size_binary", fixed_size_binary(5))}),
+    output_schema = schema(
+        {field("date64", timestamp(TimeUnit::NANO)),
+         field("ts0", timestamp(TimeUnit::NANO)), field("ts1", timestamp(TimeUnit::NANO)),
+         field("ts2", timestamp(TimeUnit::NANO)), field("large_string", utf8()),
+         field("large_binary", binary()), field("fixed_size_binary0", binary()),
+         field("fixed_size_binary", binary())});
+  }
+  void RunTest(int64_t num_rows, double null_possibility,
+               int64_t max_size = kDefaultSmallMemStreamSize) {
+    int64_t num_cols = (input_schema->fields()).size();
+    std::shared_ptr<Table> input_table =
+        GenerateRandomTable(input_schema, num_rows, 1, 1, null_possibility);
+    ArrayVector av(num_cols);
+    for (int i = 0; i < num_cols - 2; i++) {
+      EXPECT_OK_AND_ASSIGN(av[i],
+                           arrow::compute::Cast(*(input_table->column(i)->chunk(0)),
+                                                output_schema->field(i)->type()));
+    }
+    for (int i = num_cols - 2; i < num_cols; i++) {
+      av[i] = CastFixedSizeBinaryArrayToBinaryArray(input_table->column(i)->chunk(0));
+    }
+    std::shared_ptr<Table> expected_output_table = Table::Make(output_schema, av);
+    AssertTableWriteReadEqual(input_table, expected_output_table, max_size);
+  }
+
+ protected:
+  std::shared_ptr<Schema> input_schema, output_schema;
+};
+TEST_F(TestORCWriterWithConversion, writeAllNulls) { RunTest(12000, 1); }
+TEST_F(TestORCWriterWithConversion, writeNoNulls) { RunTest(10009, 0); }
+TEST_F(TestORCWriterWithConversion, writeMixed) { RunTest(8021, 0.5); }
+
+class TestORCWriterSingleArray : public ::testing::Test {
+ public:
+  TestORCWriterSingleArray() : rand(kRandomSeed) {}
+
+ protected:
+  arrow::random::RandomArrayGenerator rand;
+};
+
+// Nested types
+TEST_F(TestORCWriterSingleArray, WriteStruct) {
+  std::vector<std::shared_ptr<Field>> subfields{field("int32", boolean())};
+  const int64_t num_rows = 1234;
+  int num_subcols = subfields.size();
+  ArrayVector av0(num_subcols);
+  for (int i = 0; i < num_subcols; i++) {
+    av0[i] = rand.ArrayOf(subfields[i]->type(), num_rows, 0.4);
+  }
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 0.5);
+  std::shared_ptr<Array> array =
+      std::make_shared<StructArray>(struct_(subfields), num_rows, av0, bitmap);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteStructOfStruct) {
+  std::vector<std::shared_ptr<Field>> subsubfields{
+      field("bool", boolean()),
+      field("int8", int8()),
+      field("int16", int16()),
+      field("int32", int32()),
+      field("int64", int64()),
+      field("date32", date32()),
+      field("ts3", timestamp(TimeUnit::NANO)),
+      field("string", utf8()),
+      field("binary", binary())};
+  const int64_t num_rows = 1234;
+  int num_subsubcols = subsubfields.size();
+  ArrayVector av00(num_subsubcols), av0(1);
+  for (int i = 0; i < num_subsubcols; i++) {
+    av00[i] = rand.ArrayOf(subsubfields[i]->type(), num_rows, 0);
+  }
+  std::shared_ptr<Buffer> bitmap0 = rand.NullBitmap(num_rows, 0);
+  av0[0] = std::make_shared<StructArray>(struct_(subsubfields), num_rows, av00, bitmap0);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 0.2);
+  std::shared_ptr<Array> array = std::make_shared<StructArray>(
+      struct_({field("struct2", struct_(subsubfields))}), num_rows, av0, bitmap);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteList) {
+  const int64_t num_rows = 1234;
+  auto value_array = rand.ArrayOf(int32(), 125 * num_rows, 0);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 100);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteLargeList) {
+  const int64_t num_rows = 1234;
+  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.5);
+  auto output_offsets = rand.Offsets(num_rows + 1, 0, 5 * num_rows, 0.6, false);
+  EXPECT_OK_AND_ASSIGN(auto input_offsets,
+                       arrow::compute::Cast(*output_offsets, int64()));
+  EXPECT_OK_AND_ASSIGN(auto input_array,
+                       arrow::LargeListArray::FromArrays(*input_offsets, *value_array));
+  EXPECT_OK_AND_ASSIGN(auto output_array,
+                       arrow::ListArray::FromArrays(*output_offsets, *value_array));
+  AssertArrayWriteReadEqual(input_array, output_array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteFixedSizeList) {
+  const int64_t num_rows = 1234;
+  std::shared_ptr<Array> value_array = rand.ArrayOf(int32(), 3 * num_rows, 0.8);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 1);
+  std::shared_ptr<Buffer> buffer = GenerateFixedDifferenceBuffer(3, num_rows + 1);
+  std::shared_ptr<Array> input_array = std::make_shared<FixedSizeListArray>(
+                             fixed_size_list(int32(), 3), num_rows, value_array, bitmap),
+                         output_array = std::make_shared<ListArray>(
+                             list(int32()), num_rows, buffer, value_array, bitmap);
+  AssertArrayWriteReadEqual(input_array, output_array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfList) {
+  const int64_t num_rows = 1234;
+  auto value_value_array = rand.ArrayOf(utf8(), 4 * num_rows, 0.5);
+  std::shared_ptr<Array> value_array = rand.List(*value_value_array, 2 * num_rows, 0.7);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0.4);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfListOfList) {
+  const int64_t num_rows = 1234;
+  auto value3_array = rand.ArrayOf(int64(), 12 * num_rows, 0.1);
+  std::shared_ptr<Array> value2_array = rand.List(*value3_array, 5 * num_rows, 0);
+  std::shared_ptr<Array> value_array = rand.List(*value2_array, 2 * num_rows, 0.1);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0.1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 35);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfStruct) {
+  const int64_t num_rows = 1234, num_values = 3 * num_rows;
+  ArrayVector av00(1);
+  av00[0] = rand.ArrayOf(int32(), num_values, 0);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_values, 0.2);
+  std::shared_ptr<Array> value_array = std::make_shared<StructArray>(
+      struct_({field("a", int32())}), num_values, av00, bitmap);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 30);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteStructOfList) {
+  const int64_t num_rows = 1234;
+  ArrayVector av0(1);
+  auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.2);
+  av0[0] = rand.List(*value_array, num_rows, 0);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_rows, 0.2);
+  std::shared_ptr<Array> array = std::make_shared<StructArray>(
+      struct_({field("a", list(int32()))}), num_rows, av0, bitmap);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 20);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteMap) {
+  const int64_t num_rows = 1234;
+  auto key_array = rand.ArrayOf(int32(), 20 * num_rows, 0);
+  auto item_array = rand.ArrayOf(int32(), 20 * num_rows, 1);
+  std::shared_ptr<Array> array = rand.Map(key_array, item_array, num_rows, 0.1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 50);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteStructOfMap) {
+  const int64_t num_rows = 1234, num_values = 5 * num_rows;
+  ArrayVector av0(1);
+  auto key_array = rand.ArrayOf(binary(), num_values, 0);
+  auto item_array = rand.ArrayOf(int32(), num_values, 0.5);
+  av0[0] = rand.Map(key_array, item_array, num_rows, 0.2);
+  std::shared_ptr<Array> array = std::make_shared<StructArray>(
+      struct_({field("a", map(binary(), int32()))}), num_rows, av0);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 20);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteMapOfStruct) {
+  const int64_t num_rows = 1234, num_values = 10 * num_rows;
+  std::shared_ptr<Array> key_array = rand.ArrayOf(utf8(), num_values, 0);
+  ArrayVector av00(1);
+  av00[0] = rand.ArrayOf(int32(), num_values, 0.1);
+  std::shared_ptr<Buffer> bitmap = rand.NullBitmap(num_values, 0.2);
+  std::shared_ptr<Array> item_array = std::make_shared<StructArray>(
+      struct_({field("a", int32())}), num_values, av00, bitmap);
+  std::shared_ptr<Array> array = rand.Map(key_array, item_array, num_rows, 0.1);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteMapOfMap) {
+  const int64_t num_rows = 1234;
+  auto key_key_array = rand.ArrayOf(utf8(), 4 * num_rows, 0);
+  auto key_item_array = rand.ArrayOf(int32(), 4 * num_rows, 0.5);
+  std::shared_ptr<Array> key_array =
+      rand.Map(key_key_array, key_item_array, 2 * num_rows, 0);
+  auto item_key_array = rand.ArrayOf(utf8(), 4 * num_rows, 0);
+  auto item_item_array = rand.ArrayOf(int32(), 4 * num_rows, 0.2);
+  std::shared_ptr<Array> item_array =
+      rand.Map(item_key_array, item_item_array, 2 * num_rows, 0.3);
+  std::shared_ptr<Array> array = rand.Map(key_array, item_array, num_rows, 0.4);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
+TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
+  const int64_t num_rows = 1234;
+  auto value_key_array = rand.ArrayOf(utf8(), 4 * num_rows, 0);
+  auto value_item_array = rand.ArrayOf(int32(), 4 * num_rows, 0.5);
+  std::shared_ptr<Array> value_array =
+      rand.Map(value_key_array, value_item_array, 2 * num_rows, 0.2);
+  std::shared_ptr<Array> array = rand.List(*value_array, num_rows, 0.4);
+  AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.cc b/cpp/src/arrow/adapters/orc/adapter_util.cc
index 5a36e2c0100..f4f974585e2 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_util.cc
@@ -15,18 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/adapters/orc/adapter_util.h"
+
+#include <cmath>
 #include <string>
 #include <vector>
 
-#include "arrow/adapters/orc/adapter_util.h"
 #include "arrow/array/builder_base.h"
 #include "arrow/builder.h"
+#include "arrow/chunked_array.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
+#include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/range.h"
-
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
 #include "orc/Exceptions.hh"
+#include "orc/MemoryPool.hh"
 #include "orc/OrcFile.hh"
 
 // alias to not interfere with nested orc namespace
@@ -34,19 +41,25 @@ namespace liborc = orc;
 
 namespace arrow {
 
-namespace adapters {
+using internal::checked_cast;
 
+namespace adapters {
 namespace orc {
 
-using internal::checked_cast;
+namespace {
 
-// The number of nanoseconds in a second
+// The number of milliseconds, microseconds and nanoseconds in a second
+constexpr int64_t kOneSecondMillis = 1000LL;
+constexpr int64_t kOneMicroNanos = 1000LL;
+constexpr int64_t kOneSecondMicros = 1000000LL;
+constexpr int64_t kOneMilliNanos = 1000000LL;
 constexpr int64_t kOneSecondNanos = 1000000000LL;
 
-Status AppendStructBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                         int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+Status AppendStructBatch(const liborc::Type* type,
+                         liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                         int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<StructBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::StructVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch);
 
   const uint8_t* valid_bytes = nullptr;
   if (batch->hasNulls) {
@@ -61,10 +74,11 @@ Status AppendStructBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cb
   return Status::OK();
 }
 
-Status AppendListBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                       int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+Status AppendListBatch(const liborc::Type* type,
+                       liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                       int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<ListBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::ListVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch);
   liborc::ColumnVectorBatch* elements = batch->elements.get();
   const liborc::Type* elemtype = type->getSubtype(0);
 
@@ -83,37 +97,38 @@ Status AppendListBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbat
   return Status::OK();
 }
 
-Status AppendMapBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                      int64_t offset, int64_t length, ArrayBuilder* abuilder) {
-  auto list_builder = checked_cast<ListBuilder*>(abuilder);
-  auto struct_builder = checked_cast<StructBuilder*>(list_builder->value_builder());
-  auto batch = checked_cast<liborc::MapVectorBatch*>(cbatch);
+Status AppendMapBatch(const liborc::Type* type,
+                      liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                      int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<MapBuilder*>(abuilder);
+  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch);
   liborc::ColumnVectorBatch* keys = batch->keys.get();
-  liborc::ColumnVectorBatch* vals = batch->elements.get();
-  const liborc::Type* keytype = type->getSubtype(0);
-  const liborc::Type* valtype = type->getSubtype(1);
+  liborc::ColumnVectorBatch* items = batch->elements.get();
+  const liborc::Type* key_type = type->getSubtype(0);
+  const liborc::Type* item_type = type->getSubtype(1);
 
   const bool has_nulls = batch->hasNulls;
   for (int64_t i = offset; i < length + offset; i++) {
-    RETURN_NOT_OK(list_builder->Append());
-    int64_t start = batch->offsets[i];
-    int64_t list_length = batch->offsets[i + 1] - start;
-    if (list_length && (!has_nulls || batch->notNull[i])) {
-      RETURN_NOT_OK(struct_builder->AppendValues(list_length, nullptr));
-      RETURN_NOT_OK(AppendBatch(keytype, keys, start, list_length,
-                                struct_builder->field_builder(0)));
-      RETURN_NOT_OK(AppendBatch(valtype, vals, start, list_length,
-                                struct_builder->field_builder(1)));
+    if (!has_nulls || batch->notNull[i]) {
+      int64_t start = batch->offsets[i];
+      int64_t end = batch->offsets[i + 1];
+      RETURN_NOT_OK(builder->Append());
+      RETURN_NOT_OK(
+          AppendBatch(key_type, keys, start, end - start, builder->key_builder()));
+      RETURN_NOT_OK(
+          AppendBatch(item_type, items, start, end - start, builder->item_builder()));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
     }
   }
   return Status::OK();
 }
 
-template <class builder_type, class batch_type, class elem_type>
-Status AppendNumericBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
+template <class BuilderType, class BatchType, class ElemType>
+Status AppendNumericBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
                           int64_t length, ArrayBuilder* abuilder) {
-  auto builder = checked_cast<builder_type*>(abuilder);
-  auto batch = checked_cast<batch_type*>(cbatch);
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -122,16 +137,16 @@ Status AppendNumericBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   if (batch->hasNulls) {
     valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
   }
-  const elem_type* source = batch->data.data() + offset;
+  const ElemType* source = batch->data.data() + offset;
   RETURN_NOT_OK(builder->AppendValues(source, length, valid_bytes));
   return Status::OK();
 }
 
-template <class builder_type, class target_type, class batch_type, class source_type>
-Status AppendNumericBatchCast(liborc::ColumnVectorBatch* cbatch, int64_t offset,
-                              int64_t length, ArrayBuilder* abuilder) {
-  auto builder = checked_cast<builder_type*>(abuilder);
-  auto batch = checked_cast<batch_type*>(cbatch);
+template <class BuilderType, class TargetType, class BatchType, class SourceType>
+Status AppendNumericBatchCast(liborc::ColumnVectorBatch* column_vector_batch,
+                              int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -141,9 +156,9 @@ Status AppendNumericBatchCast(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   if (batch->hasNulls) {
     valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
   }
-  const source_type* source = batch->data.data() + offset;
+  const SourceType* source = batch->data.data() + offset;
   auto cast_iter = internal::MakeLazyRange(
-      [&source](int64_t index) { return static_cast<target_type>(source[index]); },
+      [&source](int64_t index) { return static_cast<TargetType>(source[index]); },
       length);
 
   RETURN_NOT_OK(builder->AppendValues(cast_iter.begin(), cast_iter.end(), valid_bytes));
@@ -151,10 +166,10 @@ Status AppendNumericBatchCast(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-Status AppendBoolBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset, int64_t length,
-                       ArrayBuilder* abuilder) {
+Status AppendBoolBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                       int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<BooleanBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::LongVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::LongVectorBatch*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -174,10 +189,10 @@ Status AppendBoolBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset, int64_
   return Status::OK();
 }
 
-Status AppendTimestampBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
-                            int64_t length, ArrayBuilder* abuilder) {
+Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch,
+                            int64_t offset, int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<TimestampBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::TimestampVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);
 
   if (length == 0) {
     return Status::OK();
@@ -202,11 +217,11 @@ Status AppendTimestampBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-template <class builder_type>
-Status AppendBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
+template <class BuilderType>
+Status AppendBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
                          int64_t length, ArrayBuilder* abuilder) {
-  auto builder = checked_cast<builder_type*>(abuilder);
-  auto batch = checked_cast<liborc::StringVectorBatch*>(cbatch);
+  auto builder = checked_cast<BuilderType*>(abuilder);
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
 
   const bool has_nulls = batch->hasNulls;
   for (int64_t i = offset; i < length + offset; i++) {
@@ -220,10 +235,10 @@ Status AppendBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
-                              int64_t length, ArrayBuilder* abuilder) {
+Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* column_vector_batch,
+                              int64_t offset, int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<FixedSizeBinaryBuilder*>(abuilder);
-  auto batch = checked_cast<liborc::StringVectorBatch*>(cbatch);
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
 
   const bool has_nulls = batch->hasNulls;
   for (int64_t i = offset; i < length + offset; i++) {
@@ -236,13 +251,14 @@ Status AppendFixedBinaryBatch(liborc::ColumnVectorBatch* cbatch, int64_t offset,
   return Status::OK();
 }
 
-Status AppendDecimalBatch(const liborc::Type* type, liborc::ColumnVectorBatch* cbatch,
-                          int64_t offset, int64_t length, ArrayBuilder* abuilder) {
+Status AppendDecimalBatch(const liborc::Type* type,
+                          liborc::ColumnVectorBatch* column_vector_batch, int64_t offset,
+                          int64_t length, ArrayBuilder* abuilder) {
   auto builder = checked_cast<Decimal128Builder*>(abuilder);
 
-  const bool has_nulls = cbatch->hasNulls;
+  const bool has_nulls = column_vector_batch->hasNulls;
   if (type->getPrecision() == 0 || type->getPrecision() > 18) {
-    auto batch = checked_cast<liborc::Decimal128VectorBatch*>(cbatch);
+    auto batch = checked_cast<liborc::Decimal128VectorBatch*>(column_vector_batch);
     for (int64_t i = offset; i < length + offset; i++) {
       if (!has_nulls || batch->notNull[i]) {
         RETURN_NOT_OK(builder->Append(
@@ -252,7 +268,7 @@ Status AppendDecimalBatch(const liborc::Type* type, liborc::ColumnVectorBatch* c
       }
     }
   } else {
-    auto batch = checked_cast<liborc::Decimal64VectorBatch*>(cbatch);
+    auto batch = checked_cast<liborc::Decimal64VectorBatch*>(column_vector_batch);
     for (int64_t i = offset; i < length + offset; i++) {
       if (!has_nulls || batch->notNull[i]) {
         RETURN_NOT_OK(builder->Append(Decimal128(batch->values[i])));
@@ -264,6 +280,8 @@ Status AppendDecimalBatch(const liborc::Type* type, liborc::ColumnVectorBatch* c
   return Status::OK();
 }
 
+}  // namespace
+
 Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
                    int64_t offset, int64_t length, ArrayBuilder* builder) {
   if (type == nullptr) {
@@ -316,6 +334,615 @@ Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
   }
 }
 
+namespace {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+Status WriteBatch(const Array& parray, int64_t orc_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch);
+
+// Make sure children of StructArray have appropriate null.
+Result<std::shared_ptr<Array>> NormalizeArray(const std::shared_ptr<Array>& array) {
+  Type::type kind = array->type_id();
+  switch (kind) {
+    case Type::type::STRUCT: {
+      if (array->null_count() == 0) {
+        return array;
+      } else {
+        auto struct_array = checked_pointer_cast<StructArray>(array);
+        const std::shared_ptr<Buffer> bitmap = struct_array->null_bitmap();
+        std::shared_ptr<DataType> struct_type = struct_array->type();
+        std::size_t size = struct_type->fields().size();
+        std::vector<std::shared_ptr<Array>> new_children(size, nullptr);
+        for (std::size_t i = 0; i < size; i++) {
+          std::shared_ptr<Array> child = struct_array->field(i);
+          const std::shared_ptr<Buffer> child_bitmap = child->null_bitmap();
+          std::shared_ptr<Buffer> final_child_bitmap;
+          if (child_bitmap == nullptr) {
+            final_child_bitmap = bitmap;
+          } else {
+            ARROW_ASSIGN_OR_RAISE(
+                final_child_bitmap,
+                internal::BitmapAnd(default_memory_pool(), bitmap->data(), 0,
+                                    child_bitmap->data(), 0, struct_array->length(), 0));
+          }
+          std::shared_ptr<ArrayData> child_array_data = child->data();
+          std::vector<std::shared_ptr<Buffer>> child_buffers = child_array_data->buffers;
+          child_buffers[0] = final_child_bitmap;
+          std::shared_ptr<ArrayData> new_child_array_data =
+              ArrayData::Make(child->type(), child->length(), child_buffers,
+                              child_array_data->child_data, child_array_data->dictionary);
+          ARROW_ASSIGN_OR_RAISE(new_children[i],
+                                NormalizeArray(MakeArray(new_child_array_data)));
+        }
+        return std::make_shared<StructArray>(struct_type, struct_array->length(),
+                                             new_children, bitmap);
+      }
+    }
+    case Type::type::LIST: {
+      auto list_array = checked_pointer_cast<ListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<ListArray>(list_array->type(), list_array->length(),
+                                         list_array->value_offsets(), value_array,
+                                         list_array->null_bitmap());
+    }
+    case Type::type::LARGE_LIST: {
+      auto list_array = checked_pointer_cast<LargeListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<LargeListArray>(list_array->type(), list_array->length(),
+                                              list_array->value_offsets(), value_array,
+                                              list_array->null_bitmap());
+    }
+    case Type::type::FIXED_SIZE_LIST: {
+      auto list_array = checked_pointer_cast<FixedSizeListArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto value_array, NormalizeArray(list_array->values()));
+      return std::make_shared<FixedSizeListArray>(list_array->type(),
+                                                  list_array->length(), value_array,
+                                                  list_array->null_bitmap());
+    }
+    case Type::type::MAP: {
+      auto map_array = checked_pointer_cast<MapArray>(array);
+      ARROW_ASSIGN_OR_RAISE(auto key_array, NormalizeArray(map_array->keys()));
+      ARROW_ASSIGN_OR_RAISE(auto item_array, NormalizeArray(map_array->items()));
+      return std::make_shared<MapArray>(map_array->type(), map_array->length(),
+                                        map_array->value_offsets(), key_array, item_array,
+                                        map_array->null_bitmap());
+    }
+    default: {
+      return array;
+    }
+  }
+}
+
+template <class DataType, class BatchType, typename Enable = void>
+struct Appender {};
+
+// Types for long/double-like Appender, that is, numeric, boolean or date32
+template <typename T>
+using is_generic_type =
+    std::integral_constant<bool, is_number_type<T>::value ||
+                                     std::is_same<Date32Type, T>::value ||
+                                     is_boolean_type<T>::value>;
+template <typename T, typename R = void>
+using enable_if_generic = enable_if_t<is_generic_type<T>::value, R>;
+
+// Number-like
+template <class DataType, class BatchType>
+struct Appender<DataType, BatchType, enable_if_generic<DataType>> {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  using ValueType = typename TypeTraits<DataType>::CType;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(ValueType v) {
+    batch->data[running_orc_offset] = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  BatchType* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Binary
+template <class DataType>
+struct Appender<DataType, liborc::StringVectorBatch> {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  using COffsetType = typename TypeTraits<DataType>::OffsetType::c_type;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    COffsetType data_length = 0;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset, &data_length)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Decimal
+template <>
+struct Appender<Decimal128Type, liborc::Decimal64VectorBatch> {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    const Decimal128 dec_value(array.GetValue(running_arrow_offset));
+    batch->values[running_orc_offset] = static_cast<int64_t>(dec_value.low_bits());
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const Decimal128Array& array;
+  liborc::Decimal64VectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+template <>
+struct Appender<Decimal128Type, liborc::Decimal128VectorBatch> {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    const Decimal128 dec_value(array.GetValue(running_arrow_offset));
+    batch->values[running_orc_offset] =
+        liborc::Int128(dec_value.high_bits(), dec_value.low_bits());
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const Decimal128Array& array;
+  liborc::Decimal128VectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+};
+
+// Date64 and Timestamp
+template <class DataType>
+struct TimestampAppender {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(int64_t v) {
+    int64_t data = array.Value(running_arrow_offset);
+    batch->notNull[running_orc_offset] = true;
+    batch->data[running_orc_offset] =
+        static_cast<int64_t>(std::floor(data / conversion_factor_from_second));
+    batch->nanoseconds[running_orc_offset] =
+        (data - conversion_factor_from_second * batch->data[running_orc_offset]) *
+        conversion_factor_to_nano;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const ArrayType& array;
+  liborc::TimestampVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+  int64_t conversion_factor_from_second, conversion_factor_to_nano;
+};
+
+// FSB
+struct FixedSizeBinaryAppender {
+  Status VisitNull() {
+    batch->notNull[running_orc_offset] = false;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  Status VisitValue(util::string_view v) {
+    batch->notNull[running_orc_offset] = true;
+    batch->data[running_orc_offset] = reinterpret_cast<char*>(
+        const_cast<uint8_t*>(array.GetValue(running_arrow_offset)));
+    batch->length[running_orc_offset] = data_length;
+    running_orc_offset++;
+    running_arrow_offset++;
+    return Status::OK();
+  }
+  const FixedSizeBinaryArray& array;
+  liborc::StringVectorBatch* batch;
+  int64_t running_orc_offset, running_arrow_offset;
+  const int32_t data_length;
+};
+
+// static_cast from int64_t or double to itself shouldn't introduce overhead
+// Pleae see
+// https://stackoverflow.com/questions/19106826/
+// can-static-cast-to-same-type-introduce-runtime-overhead
+template <class DataType, class BatchType>
+Status WriteGenericBatch(const Array& array, int64_t orc_offset,
+                         liborc::ColumnVectorBatch* column_vector_batch) {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  const ArrayType& array_(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<BatchType*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  Appender<DataType, BatchType> appender{array_, batch, orc_offset, 0};
+  ArrayDataVisitor<DataType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+template <class DataType>
+Status WriteTimestampBatch(const Array& array, int64_t orc_offset,
+                           liborc::ColumnVectorBatch* column_vector_batch,
+                           const int64_t& conversion_factor_from_second,
+                           const int64_t& conversion_factor_to_nano) {
+  using ArrayType = typename TypeTraits<DataType>::ArrayType;
+  const ArrayType& array_(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  TimestampAppender<DataType> appender{array_,
+                                       batch,
+                                       orc_offset,
+                                       0,
+                                       conversion_factor_from_second,
+                                       conversion_factor_to_nano};
+  ArrayDataVisitor<DataType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+Status WriteFixedSizeBinaryBatch(const Array& array, int64_t orc_offset,
+                                 liborc::ColumnVectorBatch* column_vector_batch) {
+  const FixedSizeBinaryArray& array_(checked_cast<const FixedSizeBinaryArray&>(array));
+  auto batch = checked_cast<liborc::StringVectorBatch*>(column_vector_batch);
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  FixedSizeBinaryAppender appender{array_, batch, orc_offset, 0, array_.byte_width()};
+  ArrayDataVisitor<FixedSizeBinaryType> visitor;
+  RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
+  return Status::OK();
+}
+
+Status WriteStructBatch(const Array& array, int64_t orc_offset,
+                        liborc::ColumnVectorBatch* column_vector_batch) {
+  std::shared_ptr<Array> array_ = MakeArray(array.data());
+  std::shared_ptr<StructArray> struct_array(checked_pointer_cast<StructArray>(array_));
+  auto batch = checked_cast<liborc::StructVectorBatch*>(column_vector_batch);
+  std::size_t size = array.type()->fields().size();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  // First fill fields of ColumnVectorBatch
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+    } else {
+      batch->notNull[running_orc_offset] = true;
+    }
+  }
+  // Fill the fields
+  for (std::size_t i = 0; i < size; i++) {
+    batch->fields[i]->resize(orc_offset + arrow_length);
+    RETURN_NOT_OK(WriteBatch(*(struct_array->field(i)), orc_offset, batch->fields[i]));
+  }
+  return Status::OK();
+}
+
+template <class ArrayType>
+Status WriteListBatch(const Array& array, int64_t orc_offset,
+                      liborc::ColumnVectorBatch* column_vector_batch) {
+  const ArrayType& list_array(checked_cast<const ArrayType&>(array));
+  auto batch = checked_cast<liborc::ListVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* element_batch = (batch->elements).get();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  if (orc_offset == 0) {
+    batch->offsets[0] = 0;
+  }
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset];
+    } else {
+      batch->notNull[running_orc_offset] = true;
+      batch->offsets[running_orc_offset + 1] =
+          batch->offsets[running_orc_offset] +
+          list_array.value_offset(running_arrow_offset + 1) -
+          list_array.value_offset(running_arrow_offset);
+      element_batch->resize(batch->offsets[running_orc_offset + 1]);
+      int64_t subarray_arrow_offset = list_array.value_offset(running_arrow_offset),
+              subarray_orc_offset = batch->offsets[running_orc_offset],
+              subarray_orc_length =
+                  batch->offsets[running_orc_offset + 1] - subarray_orc_offset;
+      RETURN_NOT_OK(WriteBatch(
+          *(list_array.values()->Slice(subarray_arrow_offset, subarray_orc_length)),
+          subarray_orc_offset, element_batch));
+    }
+  }
+  return Status::OK();
+}
+
+Status WriteMapBatch(const Array& array, int64_t orc_offset,
+                     liborc::ColumnVectorBatch* column_vector_batch) {
+  const MapArray& map_array(checked_cast<const MapArray&>(array));
+  auto batch = checked_cast<liborc::MapVectorBatch*>(column_vector_batch);
+  liborc::ColumnVectorBatch* key_batch = (batch->keys).get();
+  liborc::ColumnVectorBatch* element_batch = (batch->elements).get();
+  std::shared_ptr<Array> key_array = map_array.keys();
+  std::shared_ptr<Array> element_array = map_array.items();
+  int64_t arrow_length = array.length();
+  int64_t running_arrow_offset = 0, running_orc_offset = orc_offset;
+  if (orc_offset == 0) {
+    batch->offsets[0] = 0;
+  }
+  if (array.null_count()) {
+    batch->hasNulls = true;
+  }
+  for (; running_arrow_offset < arrow_length;
+       running_orc_offset++, running_arrow_offset++) {
+    if (array.IsNull(running_arrow_offset)) {
+      batch->notNull[running_orc_offset] = false;
+      batch->offsets[running_orc_offset + 1] = batch->offsets[running_orc_offset];
+    } else {
+      batch->notNull[running_orc_offset] = true;
+      batch->offsets[running_orc_offset + 1] =
+          batch->offsets[running_orc_offset] +
+          map_array.value_offset(running_arrow_offset + 1) -
+          map_array.value_offset(running_arrow_offset);
+      int64_t subarray_arrow_offset = map_array.value_offset(running_arrow_offset),
+              subarray_orc_offset = batch->offsets[running_orc_offset],
+              new_subarray_orc_offset = batch->offsets[running_orc_offset + 1],
+              subarray_orc_length = new_subarray_orc_offset - subarray_orc_offset;
+      key_batch->resize(new_subarray_orc_offset);
+      element_batch->resize(new_subarray_orc_offset);
+      RETURN_NOT_OK(
+          WriteBatch(*(key_array->Slice(subarray_arrow_offset, subarray_orc_length)),
+                     subarray_orc_offset, key_batch));
+      RETURN_NOT_OK(
+          WriteBatch(*(element_array->Slice(subarray_arrow_offset, subarray_orc_length)),
+                     subarray_orc_offset, element_batch));
+    }
+  }
+  return Status::OK();
+}
+
+Status WriteBatch(const Array& array, int64_t orc_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch) {
+  Type::type kind = array.type_id();
+  column_vector_batch->numElements = orc_offset;
+  switch (kind) {
+    case Type::type::BOOL:
+      return WriteGenericBatch<BooleanType, liborc::LongVectorBatch>(array, orc_offset,
+                                                                     column_vector_batch);
+    case Type::type::INT8:
+      return WriteGenericBatch<Int8Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                  column_vector_batch);
+    case Type::type::INT16:
+      return WriteGenericBatch<Int16Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::INT32:
+      return WriteGenericBatch<Int32Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::INT64:
+      return WriteGenericBatch<Int64Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                   column_vector_batch);
+    case Type::type::FLOAT:
+      return WriteGenericBatch<FloatType, liborc::DoubleVectorBatch>(array, orc_offset,
+                                                                     column_vector_batch);
+    case Type::type::DOUBLE:
+      return WriteGenericBatch<DoubleType, liborc::DoubleVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::BINARY:
+      return WriteGenericBatch<BinaryType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_BINARY:
+      return WriteGenericBatch<LargeBinaryType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::STRING:
+      return WriteGenericBatch<StringType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_STRING:
+      return WriteGenericBatch<LargeStringType, liborc::StringVectorBatch>(
+          array, orc_offset, column_vector_batch);
+    case Type::type::FIXED_SIZE_BINARY:
+      return WriteFixedSizeBinaryBatch(array, orc_offset, column_vector_batch);
+    case Type::type::DATE32:
+      return WriteGenericBatch<Date32Type, liborc::LongVectorBatch>(array, orc_offset,
+                                                                    column_vector_batch);
+    case Type::type::DATE64:
+      return WriteTimestampBatch<Date64Type>(array, orc_offset, column_vector_batch,
+                                             kOneSecondMillis, kOneMilliNanos);
+    case Type::type::TIMESTAMP: {
+      switch (internal::checked_pointer_cast<TimestampType>(array.type())->unit()) {
+        case TimeUnit::type::SECOND:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, 1, kOneSecondNanos);
+        case TimeUnit::type::MILLI:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondMillis, kOneMilliNanos);
+        case TimeUnit::type::MICRO:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondMicros, kOneMicroNanos);
+        case TimeUnit::type::NANO:
+          return WriteTimestampBatch<TimestampType>(
+              array, orc_offset, column_vector_batch, kOneSecondNanos, 1);
+        default:
+          return Status::TypeError("Unknown or unsupported Arrow type: ",
+                                   array.type()->ToString());
+      }
+    }
+    case Type::type::DECIMAL128: {
+      int32_t precision = checked_pointer_cast<Decimal128Type>(array.type())->precision();
+      if (precision > 18) {
+        return WriteGenericBatch<Decimal128Type, liborc::Decimal128VectorBatch>(
+            array, orc_offset, column_vector_batch);
+      } else {
+        return WriteGenericBatch<Decimal128Type, liborc::Decimal64VectorBatch>(
+            array, orc_offset, column_vector_batch);
+      }
+    }
+    case Type::type::STRUCT:
+      return WriteStructBatch(array, orc_offset, column_vector_batch);
+    case Type::type::LIST:
+      return WriteListBatch<ListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::LARGE_LIST:
+      return WriteListBatch<LargeListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::FIXED_SIZE_LIST:
+      return WriteListBatch<FixedSizeListArray>(array, orc_offset, column_vector_batch);
+    case Type::type::MAP:
+      return WriteMapBatch(array, orc_offset, column_vector_batch);
+    default: {
+      return Status::NotImplemented("Unknown or unsupported Arrow type: ",
+                                    array.type()->ToString());
+    }
+  }
+  return Status::OK();
+}
+
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const DataType& type) {
+  Type::type kind = type.id();
+  switch (kind) {
+    case Type::type::BOOL:
+      return liborc::createPrimitiveType(liborc::TypeKind::BOOLEAN);
+    case Type::type::INT8:
+      return liborc::createPrimitiveType(liborc::TypeKind::BYTE);
+    case Type::type::INT16:
+      return liborc::createPrimitiveType(liborc::TypeKind::SHORT);
+    case Type::type::INT32:
+      return liborc::createPrimitiveType(liborc::TypeKind::INT);
+    case Type::type::INT64:
+      return liborc::createPrimitiveType(liborc::TypeKind::LONG);
+    case Type::type::FLOAT:
+      return liborc::createPrimitiveType(liborc::TypeKind::FLOAT);
+    case Type::type::DOUBLE:
+      return liborc::createPrimitiveType(liborc::TypeKind::DOUBLE);
+    // Use STRING instead of VARCHAR for now, both use UTF-8
+    case Type::type::STRING:
+    case Type::type::LARGE_STRING:
+      return liborc::createPrimitiveType(liborc::TypeKind::STRING);
+    case Type::type::BINARY:
+    case Type::type::LARGE_BINARY:
+    case Type::type::FIXED_SIZE_BINARY:
+      return liborc::createPrimitiveType(liborc::TypeKind::BINARY);
+    case Type::type::DATE32:
+      return liborc::createPrimitiveType(liborc::TypeKind::DATE);
+    case Type::type::DATE64:
+    case Type::type::TIMESTAMP:
+      return liborc::createPrimitiveType(liborc::TypeKind::TIMESTAMP);
+    case Type::type::DECIMAL128: {
+      const uint64_t precision =
+          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).precision());
+      const uint64_t scale =
+          static_cast<uint64_t>(checked_cast<const Decimal128Type&>(type).scale());
+      return liborc::createDecimalType(precision, scale);
+    }
+    case Type::type::LIST:
+    case Type::type::FIXED_SIZE_LIST:
+    case Type::type::LARGE_LIST: {
+      std::shared_ptr<DataType> arrow_child_type =
+          checked_cast<const BaseListType&>(type).value_type();
+      ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+      return liborc::createListType(std::move(orc_subtype));
+    }
+    case Type::type::STRUCT: {
+      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
+      std::vector<std::shared_ptr<Field>> arrow_fields =
+          checked_cast<const StructType&>(type).fields();
+      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin();
+           it != arrow_fields.end(); ++it) {
+        std::string field_name = (*it)->name();
+        std::shared_ptr<DataType> arrow_child_type = (*it)->type();
+        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+        out_type->addStructField(field_name, std::move(orc_subtype));
+      }
+      return out_type;
+    }
+    case Type::type::MAP: {
+      std::shared_ptr<DataType> key_arrow_type =
+          checked_cast<const MapType&>(type).key_type();
+      std::shared_ptr<DataType> item_arrow_type =
+          checked_cast<const MapType&>(type).item_type();
+      ARROW_ASSIGN_OR_RAISE(auto key_orc_type, GetOrcType(*key_arrow_type));
+      ARROW_ASSIGN_OR_RAISE(auto item_orc_type, GetOrcType(*item_arrow_type));
+      return liborc::createMapType(std::move(key_orc_type), std::move(item_orc_type));
+    }
+    case Type::type::DENSE_UNION:
+    case Type::type::SPARSE_UNION: {
+      ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createUnionType();
+      std::vector<std::shared_ptr<Field>> arrow_fields =
+          checked_cast<const UnionType&>(type).fields();
+      for (std::vector<std::shared_ptr<Field>>::iterator it = arrow_fields.begin();
+           it != arrow_fields.end(); ++it) {
+        std::string field_name = (*it)->name();
+        std::shared_ptr<DataType> arrow_child_type = (*it)->type();
+        ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+        out_type->addUnionChild(std::move(orc_subtype));
+      }
+      return out_type;
+    }
+    default: {
+      return Status::NotImplemented("Unknown or unsupported Arrow type: ",
+                                    type.ToString());
+    }
+  }
+}
+
+}  // namespace
+
+Status WriteBatch(const ChunkedArray& chunked_array, int64_t length,
+                  int* arrow_chunk_offset, int64_t* arrow_index_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch) {
+  int num_batch = chunked_array.num_chunks();
+  int64_t orc_offset = 0;
+  while (*arrow_chunk_offset < num_batch && orc_offset < length) {
+    ARROW_ASSIGN_OR_RAISE(auto array,
+                          NormalizeArray(chunked_array.chunk(*arrow_chunk_offset)));
+    int64_t num_written_elements =
+        std::min(length - orc_offset, array->length() - *arrow_index_offset);
+    if (num_written_elements > 0) {
+      RETURN_NOT_OK(WriteBatch(*(array->Slice(*arrow_index_offset, num_written_elements)),
+                               orc_offset, column_vector_batch));
+      orc_offset += num_written_elements;
+      *arrow_index_offset += num_written_elements;
+    }
+    if (orc_offset < length) {  // Another Arrow Array done
+      *arrow_index_offset = 0;
+      (*arrow_chunk_offset)++;
+    }
+  }
+  column_vector_batch->numElements = orc_offset;
+  return Status::OK();
+}
+
 Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
   // When subselecting fields on read, liborc will set some nodes to nullptr,
   // so we need to check for nullptr before progressing
@@ -369,15 +996,15 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
       const int scale = static_cast<int>(type->getScale());
       if (precision == 0) {
         // In HIVE 0.11/0.12 precision is set as 0, but means max precision
-        *out = decimal(38, 6);
+        *out = decimal128(38, 6);
       } else {
-        *out = decimal(precision, scale);
+        *out = decimal128(precision, scale);
       }
       break;
     }
     case liborc::LIST: {
       if (subtype_count != 1) {
-        return Status::Invalid("Invalid Orc List type");
+        return Status::TypeError("Invalid Orc List type");
       }
       std::shared_ptr<DataType> elemtype;
       RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &elemtype));
@@ -386,22 +1013,21 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
     }
     case liborc::MAP: {
       if (subtype_count != 2) {
-        return Status::Invalid("Invalid Orc Map type");
+        return Status::TypeError("Invalid Orc Map type");
       }
-      std::shared_ptr<DataType> keytype;
-      std::shared_ptr<DataType> valtype;
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &keytype));
-      RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &valtype));
-      *out = list(struct_({field("key", keytype), field("value", valtype)}));
+      std::shared_ptr<DataType> key_type, item_type;
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &key_type));
+      RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &item_type));
+      *out = map(key_type, item_type);
       break;
     }
     case liborc::STRUCT: {
       std::vector<std::shared_ptr<Field>> fields;
       for (int child = 0; child < subtype_count; ++child) {
-        std::shared_ptr<DataType> elemtype;
-        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elemtype));
+        std::shared_ptr<DataType> elem_type;
+        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
         std::string name = type->getFieldName(child);
-        fields.push_back(field(name, elemtype));
+        fields.push_back(field(name, elem_type));
       }
       *out = struct_(fields);
       break;
@@ -410,21 +1036,34 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
       std::vector<std::shared_ptr<Field>> fields;
       std::vector<int8_t> type_codes;
       for (int child = 0; child < subtype_count; ++child) {
-        std::shared_ptr<DataType> elemtype;
-        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elemtype));
-        fields.push_back(field("_union_" + std::to_string(child), elemtype));
+        std::shared_ptr<DataType> elem_type;
+        RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
+        fields.push_back(field("_union_" + std::to_string(child), elem_type));
         type_codes.push_back(static_cast<int8_t>(child));
       }
       *out = sparse_union(fields, type_codes);
       break;
     }
     default: {
-      return Status::Invalid("Unknown Orc type kind: ", kind);
+      return Status::TypeError("Unknown Orc type kind: ", type->toString());
     }
   }
   return Status::OK();
 }
 
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema) {
+  int numFields = schema.num_fields();
+  ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
+  for (int i = 0; i < numFields; i++) {
+    std::shared_ptr<Field> field = schema.field(i);
+    std::string field_name = field->name();
+    std::shared_ptr<DataType> arrow_child_type = field->type();
+    ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
+    out_type->addStructField(field_name, std::move(orc_subtype));
+  }
+  return out_type;
+}
+
 }  // namespace orc
 }  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.h b/cpp/src/arrow/adapters/orc/adapter_util.h
index 13a62f2bbd3..3e6d0fcc660 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.h
+++ b/cpp/src/arrow/adapters/orc/adapter_util.h
@@ -34,8 +34,24 @@ namespace orc {
 
 Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out);
 
+Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema);
+
 Status AppendBatch(const liborc::Type* type, liborc::ColumnVectorBatch* batch,
-                   int64_t offset, int64_t length, ArrayBuilder* builder);
+                   int64_t offset, int64_t length, arrow::ArrayBuilder* builder);
+
+/// \brief Write a chunked array to an orc::ColumnVectorBatch
+///
+/// \param[in] chunked_array the chunked array
+/// \param[in] length the orc::ColumnVectorBatch size limit
+/// \param[in,out] arrow_chunk_offset The current chunk being processed
+/// \param[in,out] arrow_index_offset The index of the arrow_chunk_offset array
+/// before or after a process
+/// \param[in,out] column_vector_batch the orc::ColumnVectorBatch to be filled
+/// \return Status
+Status WriteBatch(const ChunkedArray& chunked_array, int64_t length,
+                  int* arrow_chunk_offset, int64_t* arrow_index_offset,
+                  liborc::ColumnVectorBatch* column_vector_batch);
+
 }  // namespace orc
 }  // namespace adapters
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index 5fe2bb8b0ef..83aaee1d340 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -17,6 +17,8 @@
 
 #include "arrow/testing/random.h"
 
+#include <gtest/gtest.h>
+
 #include <algorithm>
 #include <cmath>
 #include <limits>
@@ -25,8 +27,6 @@
 #include <type_traits>
 #include <vector>
 
-#include <gtest/gtest.h>
-
 #include "arrow/array.h"
 #include "arrow/array/builder_decimal.h"
 #include "arrow/array/builder_primitive.h"
@@ -504,6 +504,17 @@ std::shared_ptr<Array> RandomArrayGenerator::List(const Array& values, int64_t s
   return *::arrow::ListArray::FromArrays(*offsets, values);
 }
 
+std::shared_ptr<Array> RandomArrayGenerator::Map(const std::shared_ptr<Array>& keys,
+                                                 const std::shared_ptr<Array>& items,
+                                                 int64_t size, double null_probability,
+                                                 bool force_empty_nulls) {
+  DCHECK_EQ(keys->length(), items->length());
+  auto offsets = Offsets(size + 1, static_cast<int32_t>(keys->offset()),
+                         static_cast<int32_t>(keys->offset() + keys->length()),
+                         null_probability, force_empty_nulls);
+  return *::arrow::MapArray::FromArrays(offsets, keys, items);
+}
+
 std::shared_ptr<Array> RandomArrayGenerator::SparseUnion(const ArrayVector& fields,
                                                          int64_t size) {
   DCHECK_GT(fields.size(), 0);
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 5c6b0b4ae77..11bf9017c56 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -321,6 +321,19 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   std::shared_ptr<Array> List(const Array& values, int64_t size, double null_probability,
                               bool force_empty_nulls = false);
 
+  /// \brief Generate a random MapArray
+  ///
+  /// \param[in] keys The underlying keys array
+  /// \param[in] items The underlying items array
+  /// \param[in] size The size of the generated map array
+  /// \param[in] null_probability the probability of a map value being null
+  /// \param[in] force_empty_nulls if true, null map entries must have 0 length
+  ///
+  /// \return a generated Array
+  std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys,
+                             const std::shared_ptr<Array>& items, int64_t size,
+                             double null_probability, bool force_empty_nulls = false);
+
   /// \brief Generate a random SparseUnionArray
   ///
   /// The type ids are chosen randomly, according to a uniform distribution,
diff --git a/python/pyarrow/_orc.pxd b/python/pyarrow/_orc.pxd
index ed4fa7353d5..51d0bbd73a3 100644
--- a/python/pyarrow/_orc.pxd
+++ b/python/pyarrow/_orc.pxd
@@ -22,7 +22,7 @@ from libc.string cimport const_char
 from libcpp.vector cimport vector as std_vector
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
-                                        CTable, CMemoryPool,
+                                        CResult, CTable, CMemoryPool,
                                         CKeyValueMetadata,
                                         CRecordBatch,
                                         CTable,
@@ -51,3 +51,11 @@ cdef extern from "arrow/adapters/orc/adapter.h" \
         int64_t NumberOfStripes()
 
         int64_t NumberOfRows()
+
+    cdef cppclass ORCFileWriter:
+        @staticmethod
+        CResult[unique_ptr[ORCFileWriter]] Open(COutputStream* output_stream)
+
+        CStatus Write(const CTable& table)
+
+        CStatus Close()
diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index 5bdc74f635b..2640057ab16 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -21,6 +21,7 @@
 
 from cython.operator cimport dereference as deref
 from libcpp.vector cimport vector as std_vector
+from libcpp.utility cimport move
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport (check_status, _Weakrefable,
@@ -29,7 +30,10 @@ from pyarrow.lib cimport (check_status, _Weakrefable,
                           pyarrow_wrap_batch,
                           RecordBatch,
                           pyarrow_wrap_table,
-                          get_reader)
+                          pyarrow_unwrap_schema,
+                          pyarrow_unwrap_table,
+                          get_reader,
+                          get_writer)
 
 
 cdef class ORCReader(_Weakrefable):
@@ -109,3 +113,27 @@ cdef class ORCReader(_Weakrefable):
                 check_status(deref(self.reader).Read(indices, &sp_table))
 
         return pyarrow_wrap_table(sp_table)
+
+cdef class ORCWriter(_Weakrefable):
+    cdef:
+        object source
+        unique_ptr[ORCFileWriter] writer
+        shared_ptr[COutputStream] rd_handle
+
+    def open(self, object source):
+        self.source = source
+        get_writer(source, &self.rd_handle)
+        with nogil:
+            self.writer = move(GetResultValue[unique_ptr[ORCFileWriter]](
+                ORCFileWriter.Open(self.rd_handle.get())))
+
+    def write(self, object table):
+        cdef:
+            shared_ptr[CTable] sp_table
+        sp_table = pyarrow_unwrap_table(table)
+        with nogil:
+            check_status(deref(self.writer).Write(deref(sp_table)))
+
+    def close(self):
+        with nogil:
+            check_status(deref(self.writer).Close())
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 226d7e36cfb..e1009bcca08 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -147,3 +147,56 @@ def read(self, columns=None):
         """
         include_indices = self._select_indices(columns)
         return self.reader.read(include_indices=include_indices)
+
+
+class ORCWriter:
+    """
+    Writer interface for a single ORC file
+
+    Parameters
+    ----------
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    """
+
+    def __init__(self, where):
+        self.writer = _orc.ORCWriter()
+        self.writer.open(where)
+
+    def write(self, table):
+        """
+        Write the table into an ORC file. The schema of the table must
+        be equal to the schema used when opening the ORC file.
+
+        Parameters
+        ----------
+        schema : pyarrow.lib.Table
+            The table to be written into the ORC file
+        """
+        self.writer.write(table)
+
+    def close(self):
+        """
+        Close the ORC file
+        """
+        self.writer.close()
+
+
+def write_table(where, table):
+    """
+    Write a table into an ORC file
+
+    Parameters
+    ----------
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    table : pyarrow.lib.Table
+        The table to be written into the ORC file
+    """
+    writer = ORCWriter(where)
+    writer.write(table)
+    writer.close()
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index bdfe6ca17c5..14edad87d71 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -21,6 +21,7 @@
 
 import pyarrow as pa
 
+
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not orc'
 pytestmark = pytest.mark.orc
@@ -33,9 +34,9 @@
     pass
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def datadir(base_datadir):
-    return base_datadir / 'orc'
+    return base_datadir / "orc"
 
 
 def fix_example_values(actual_cols, expected_cols):
@@ -46,12 +47,19 @@ def fix_example_values(actual_cols, expected_cols):
     for name in expected_cols:
         expected = expected_cols[name]
         actual = actual_cols[name]
+        if (name == "map" and
+                [d.keys() == {'key', 'value'} for m in expected for d in m]):
+            # convert [{'key': k, 'value': v}, ...] to [(k, v), ...]
+            for i, m in enumerate(expected):
+                expected_cols[name][i] = [(d['key'], d['value']) for d in m]
+            continue
+
         typ = actual[0].__class__
         if issubclass(typ, datetime.datetime):
             # timestamp fields are represented as strings in JSON files
             expected = pd.to_datetime(expected)
         elif issubclass(typ, datetime.date):
-            # # date fields are represented as strings in JSON files
+            # date fields are represented as strings in JSON files
             expected = expected.dt.date
         elif typ is decimal.Decimal:
             converted_decimals = [None] * len(expected)
@@ -131,35 +139,44 @@ def test_example_using_json(filename, datadir):
 def test_orcfile_empty(datadir):
     from pyarrow import orc
 
-    table = orc.ORCFile(datadir / 'TestOrcFile.emptyFile.orc').read()
+    table = orc.ORCFile(datadir / "TestOrcFile.emptyFile.orc").read()
     assert table.num_rows == 0
 
     expected_schema = pa.schema([
-        ('boolean1', pa.bool_()),
-        ('byte1', pa.int8()),
-        ('short1', pa.int16()),
-        ('int1', pa.int32()),
-        ('long1', pa.int64()),
-        ('float1', pa.float32()),
-        ('double1', pa.float64()),
-        ('bytes1', pa.binary()),
-        ('string1', pa.string()),
-        ('middle', pa.struct([
-            ('list', pa.list_(pa.struct([
-                ('int1', pa.int32()),
-                ('string1', pa.string()),
-            ]))),
-        ])),
-        ('list', pa.list_(pa.struct([
-            ('int1', pa.int32()),
-            ('string1', pa.string()),
-        ]))),
-        ('map', pa.list_(pa.struct([
-            ('key', pa.string()),
-            ('value', pa.struct([
-                ('int1', pa.int32()),
-                ('string1', pa.string()),
-            ])),
-        ]))),
+        ("boolean1", pa.bool_()),
+        ("byte1", pa.int8()),
+        ("short1", pa.int16()),
+        ("int1", pa.int32()),
+        ("long1", pa.int64()),
+        ("float1", pa.float32()),
+        ("double1", pa.float64()),
+        ("bytes1", pa.binary()),
+        ("string1", pa.string()),
+        ("middle", pa.struct(
+            [("list", pa.list_(
+                pa.struct([("int1", pa.int32()),
+                           ("string1", pa.string())])))
+             ])),
+        ("list", pa.list_(
+            pa.struct([("int1", pa.int32()),
+                       ("string1", pa.string())])
+        )),
+        ("map", pa.map_(pa.string(),
+                        pa.struct([("int1", pa.int32()),
+                                   ("string1", pa.string())])
+                        )),
     ])
     assert table.schema == expected_schema
+
+
+def test_orcfile_readwrite():
+    from pyarrow import orc
+
+    buffer_output_stream = pa.BufferOutputStream()
+    a = pa.array([1, None, 3, None])
+    b = pa.array([None, "Arrow", None, "ORC"])
+    table = pa.table({"int64": a, "utf8": b})
+    orc.write_table(buffer_output_stream, table)
+    buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
+    output_table = orc.ORCFile(buffer_reader).read()
+    assert table.equals(output_table)
diff --git a/ruby/red-arrow/test/test-orc.rb b/ruby/red-arrow/test/test-orc.rb
index e534e075c1f..b882da0a1b5 100644
--- a/ruby/red-arrow/test/test-orc.rb
+++ b/ruby/red-arrow/test/test-orc.rb
@@ -118,39 +118,35 @@ def pp_values(values)
                        ]
                      ],
                      [
-                       "map: list<item: " +
-                       "struct<key: string, value: " +
-                       "struct<int1: int32, string1: string>>>",
+                       "map: map<string, struct<int1: int32, string1: string>>",
                        [
                          <<-MAP.chomp
 [
+  keys:
+  []
+  values:
   -- is_valid: all not null
-  -- child 0 type: string
+  -- child 0 type: int32
     []
-  -- child 1 type: struct<int1: int32, string1: string>
-    -- is_valid: all not null
-    -- child 0 type: int32
-      []
-    -- child 1 type: string
-      [],
+  -- child 1 type: string
+    [],
+  keys:
+  [
+    "chani",
+    "mauddib"
+  ]
+  values:
   -- is_valid: all not null
-  -- child 0 type: string
+  -- child 0 type: int32
+    [
+      5,
+      1
+    ]
+  -- child 1 type: string
     [
       "chani",
       "mauddib"
     ]
-  -- child 1 type: struct<int1: int32, string1: string>
-    -- is_valid: all not null
-    -- child 0 type: int32
-      [
-        5,
-        1
-      ]
-    -- child 1 type: string
-      [
-        "chani",
-        "mauddib"
-      ]
 ]
                          MAP
                        ],

From 2b2eeeb7ef751eadb4f49ea616bf5c1c852c707a Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 19 Apr 2021 15:44:13 -0400
Subject: [PATCH 092/719] ARROW-12289: [C++] Create basic AsyncScanner
 implementation

Adds a naive implementation of `AsyncScanner` which is different from `SyncScanner` in a few ways:
* It does not use `ScanTask` and instead relies on `Fragment::ScanBatchesAsync` which returns `RecordBatchGenerator`.
* It does an unordered scan by default (i.e. batches from file N may arrive before all batches from file N-1 have arrived) and can order it if asked for
* It uses the unordered scan for `ToTable`.

It is "naive" because this PR does not add a complete implementation for `FileFragment::ScanBatchesAsync`.  This method relies on `FileFormat::ScanBatchesAsync` (in the same way that `FileFragment::Scan` relies on `FileFormat::ScanFile`).  This method (`FileFormat::ScanBatchesAsync`) _should_ be overridden in each of the formats (to rely on an async reader) but it is not (yet).

As a result, the performance for `AsyncScanner` is poor since it does not do any "per-file" parallelism nor does it do any "per-batch" parallelism.  Follow-up tasks are ARROW-12355 (CSV), ARROW-11772 (IPC), ARROW-11843 (Parquet)

In addition, this PR is built on top of ARROW-12287 so that will need to be merged first.  It will also need to rebase changes from ARROW-12161 and ARROW-11797.

Closes #10008 from westonpace/feature/arrow-12289

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/dataset.cc      |  55 ++++-
 cpp/src/arrow/dataset/dataset.h       |   8 +
 cpp/src/arrow/dataset/file_base.cc    |  59 ++++-
 cpp/src/arrow/dataset/file_base.h     |   8 +-
 cpp/src/arrow/dataset/file_csv.cc     |   5 +-
 cpp/src/arrow/dataset/file_csv.h      |   2 +-
 cpp/src/arrow/dataset/file_ipc.cc     |   4 +-
 cpp/src/arrow/dataset/file_ipc.h      |   2 +-
 cpp/src/arrow/dataset/file_parquet.cc |   2 +-
 cpp/src/arrow/dataset/file_parquet.h  |   2 +-
 cpp/src/arrow/dataset/file_test.cc    |  46 ++++
 cpp/src/arrow/dataset/scanner.cc      | 298 +++++++++++++++++++++++++-
 cpp/src/arrow/dataset/scanner.h       |  47 +---
 cpp/src/arrow/dataset/scanner_test.cc | 194 +++++++++++------
 cpp/src/arrow/dataset/test_util.h     |  37 +++-
 cpp/src/arrow/util/async_generator.h  |  19 +-
 16 files changed, 650 insertions(+), 138 deletions(-)

diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index 2df34145cd9..60d9bd73073 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -95,6 +95,55 @@ Result<ScanTaskIterator> InMemoryFragment::Scan(std::shared_ptr<ScanOptions> opt
   return MakeMapIterator(fn, std::move(batches_it));
 }
 
+Result<RecordBatchGenerator> InMemoryFragment::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options) {
+  struct State {
+    State(std::shared_ptr<InMemoryFragment> fragment, int64_t batch_size)
+        : fragment(std::move(fragment)),
+          batch_index(0),
+          offset(0),
+          batch_size(batch_size) {}
+
+    std::shared_ptr<RecordBatch> Next() {
+      const auto& next_parent = fragment->record_batches_[batch_index];
+      if (offset < next_parent->num_rows()) {
+        auto next = next_parent->Slice(offset, batch_size);
+        offset += batch_size;
+        return next;
+      }
+      batch_index++;
+      offset = 0;
+      return nullptr;
+    }
+
+    bool Finished() { return batch_index >= fragment->record_batches_.size(); }
+
+    std::shared_ptr<InMemoryFragment> fragment;
+    std::size_t batch_index;
+    int64_t offset;
+    int64_t batch_size;
+  };
+
+  struct Generator {
+    Generator(std::shared_ptr<InMemoryFragment> fragment, int64_t batch_size)
+        : state(std::make_shared<State>(std::move(fragment), batch_size)) {}
+
+    Future<std::shared_ptr<RecordBatch>> operator()() {
+      while (!state->Finished()) {
+        auto next = state->Next();
+        if (next) {
+          return Future<std::shared_ptr<RecordBatch>>::MakeFinished(std::move(next));
+        }
+      }
+      return AsyncGeneratorEnd<std::shared_ptr<RecordBatch>>();
+    }
+
+    std::shared_ptr<State> state;
+  };
+  return Generator(internal::checked_pointer_cast<InMemoryFragment>(shared_from_this()),
+                   options->batch_size);
+}
+
 Dataset::Dataset(std::shared_ptr<Schema> schema, Expression partition_expression)
     : schema_(std::move(schema)),
       partition_expression_(std::move(partition_expression)) {}
@@ -189,11 +238,11 @@ Result<FragmentIterator> InMemoryDataset::GetFragmentsImpl(Expression) {
                                " which did not match InMemorySource's: ", *schema);
     }
 
-    RecordBatchVector batches{batch};
-    return std::make_shared<InMemoryFragment>(std::move(batches));
+    return std::make_shared<InMemoryFragment>(RecordBatchVector{std::move(batch)});
   };
 
-  return MakeMaybeMapIterator(std::move(create_fragment), get_batches_->Get());
+  auto batches_it = get_batches_->Get();
+  return MakeMaybeMapIterator(std::move(create_fragment), std::move(batches_it));
 }
 
 Result<std::shared_ptr<UnionDataset>> UnionDataset::Make(std::shared_ptr<Schema> schema,
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 12c199dc210..c5c22d731fc 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -34,6 +34,8 @@
 namespace arrow {
 namespace dataset {
 
+using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
+
 /// \brief A granular piece of a Dataset, such as an individual file.
 ///
 /// A Fragment can be read/scanned separately from other fragments. It yields a
@@ -64,6 +66,10 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
   /// To receive a record batch stream which is fully filtered and projected, use Scanner.
   virtual Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) = 0;
 
+  /// An asynchronous version of Scan
+  virtual Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) = 0;
+
   virtual std::string type_name() const = 0;
   virtual std::string ToString() const { return type_name(); }
 
@@ -113,6 +119,8 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
   explicit InMemoryFragment(RecordBatchVector record_batches, Expression = literal(true));
 
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override;
 
   std::string type_name() const override { return "in-memory"; }
 
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index c3b4433b6de..bf4e17da4b7 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -102,13 +102,70 @@ Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
                        std::move(partition_expression), std::move(physical_schema)));
 }
 
+// TODO(ARROW-12355[CSV], ARROW-11772[IPC], ARROW-11843[Parquet]) The following
+// implementation of ScanBatchesAsync is both ugly and terribly ineffecient.  Each of the
+// formats should provide their own efficient implementation.
+Result<RecordBatchGenerator> FileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& scan_options,
+    const std::shared_ptr<FileFragment>& file) {
+  ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanFile(scan_options, file));
+  struct State {
+    State(std::shared_ptr<ScanOptions> scan_options, ScanTaskIterator scan_task_it)
+        : scan_options(std::move(scan_options)),
+          scan_task_it(std::move(scan_task_it)),
+          current_rb_it(),
+          finished(false) {}
+
+    std::shared_ptr<ScanOptions> scan_options;
+    ScanTaskIterator scan_task_it;
+    RecordBatchIterator current_rb_it;
+    bool finished;
+  };
+  struct Generator {
+    Future<std::shared_ptr<RecordBatch>> operator()() {
+      while (!state->finished) {
+        if (!state->current_rb_it) {
+          RETURN_NOT_OK(PumpScanTask());
+          if (state->finished) {
+            return AsyncGeneratorEnd<std::shared_ptr<RecordBatch>>();
+          }
+        }
+        ARROW_ASSIGN_OR_RAISE(auto next_batch, state->current_rb_it.Next());
+        if (IsIterationEnd(next_batch)) {
+          state->current_rb_it = RecordBatchIterator();
+        } else {
+          return Future<std::shared_ptr<RecordBatch>>::MakeFinished(next_batch);
+        }
+      }
+      return AsyncGeneratorEnd<std::shared_ptr<RecordBatch>>();
+    }
+    Status PumpScanTask() {
+      ARROW_ASSIGN_OR_RAISE(auto next_task, state->scan_task_it.Next());
+      if (IsIterationEnd(next_task)) {
+        state->finished = true;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(state->current_rb_it, next_task->Execute());
+      }
+      return Status::OK();
+    }
+    std::shared_ptr<State> state;
+  };
+  return Generator{std::make_shared<State>(scan_options, std::move(scan_task_it))};
+}
+
 Result<std::shared_ptr<Schema>> FileFragment::ReadPhysicalSchemaImpl() {
   return format_->Inspect(source_);
 }
 
 Result<ScanTaskIterator> FileFragment::Scan(std::shared_ptr<ScanOptions> options) {
   auto self = std::dynamic_pointer_cast<FileFragment>(shared_from_this());
-  return format_->ScanFile(std::move(options), self);
+  return format_->ScanFile(options, self);
+}
+
+Result<RecordBatchGenerator> FileFragment::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options) {
+  auto self = std::dynamic_pointer_cast<FileFragment>(shared_from_this());
+  return format_->ScanBatchesAsync(options, self);
 }
 
 struct FileSystemDataset::FragmentSubtrees {
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index c4c70d65d2f..08359881a20 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -149,9 +149,13 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   /// \brief Open a FileFragment for scanning.
   /// May populate lazy properties of the FileFragment.
   virtual Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file) const = 0;
 
+  virtual Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file);
+
   /// \brief Open a fragment
   virtual Result<std::shared_ptr<FileFragment>> MakeFragment(
       FileSource source, Expression partition_expression,
@@ -178,6 +182,8 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
 class ARROW_DS_EXPORT FileFragment : public Fragment {
  public:
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override;
 
   std::string type_name() const override { return format_->type_name(); }
   std::string ToString() const override { return source_.path(); };
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 8ba6505524c..a8274a545c4 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -191,11 +191,10 @@ Result<std::shared_ptr<Schema>> CsvFileFormat::Inspect(const FileSource& source)
 }
 
 Result<ScanTaskIterator> CsvFileFormat::ScanFile(
-    std::shared_ptr<ScanOptions> options,
+    const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
   auto this_ = checked_pointer_cast<const CsvFileFormat>(shared_from_this());
-  auto task =
-      std::make_shared<CsvScanTask>(std::move(this_), std::move(options), fragment);
+  auto task = std::make_shared<CsvScanTask>(std::move(this_), options, fragment);
 
   return MakeVectorIterator<std::shared_ptr<ScanTask>>({std::move(task)});
 }
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index 7232f37658c..9289c016afb 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -54,7 +54,7 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index 24ea6e36ff2..49893cde6d9 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -168,9 +168,9 @@ Result<std::shared_ptr<Schema>> IpcFileFormat::Inspect(const FileSource& source)
 }
 
 Result<ScanTaskIterator> IpcFileFormat::ScanFile(
-    std::shared_ptr<ScanOptions> options,
+    const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
-  return IpcScanTaskIterator::Make(std::move(options), std::move(fragment));
+  return IpcScanTaskIterator::Make(options, fragment);
 }
 
 //
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index aa3444eefa4..2c65078c754 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -53,7 +53,7 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 8caae949784..497e4128fdf 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -326,7 +326,7 @@ Result<std::unique_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
 }
 
 Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
-    std::shared_ptr<ScanOptions> options,
+    const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
   auto* parquet_fragment = checked_cast<ParquetFileFragment*>(fragment.get());
   std::vector<int> row_groups;
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 734917e6384..790e89c24c2 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -96,7 +96,7 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file) const override;
 
   using FileFormat::MakeFragment;
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index c7ce5154d0a..dbddb5b385b 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/io_util.h"
 
@@ -82,6 +83,51 @@ TEST(FileSource, BufferBased) {
   ASSERT_EQ(source1.buffer(), source3.buffer());
 }
 
+constexpr int kNumScanTasks = 2;
+constexpr int kBatchesPerScanTask = 2;
+constexpr int kRowsPerBatch = 1024;
+class MockFileFormat : public FileFormat {
+  virtual std::string type_name() const { return "mock"; }
+  virtual bool Equals(const FileFormat& other) const { return false; }
+  virtual Result<bool> IsSupported(const FileSource& source) const { return true; }
+  virtual Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const {
+    return Status::NotImplemented("Not needed for test");
+  }
+  virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options) const {
+    return Status::NotImplemented("Not needed for test");
+  }
+  virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() { return nullptr; }
+
+  virtual Result<ScanTaskIterator> ScanFile(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const {
+    auto sch = schema({field("i32", int32())});
+    ScanTaskVector scan_tasks;
+    for (int i = 0; i < kNumScanTasks; i++) {
+      RecordBatchVector batches;
+      for (int j = 0; j < kBatchesPerScanTask; j++) {
+        batches.push_back(ConstantArrayGenerator::Zeroes(kRowsPerBatch, sch));
+      }
+      scan_tasks.push_back(std::make_shared<InMemoryScanTask>(
+          std::move(batches), std::make_shared<ScanOptions>(), nullptr));
+    }
+    return MakeVectorIterator(std::move(scan_tasks));
+  }
+};
+
+TEST(FileFormat, ScanAsync) {
+  MockFileFormat format;
+  auto scan_options = std::make_shared<ScanOptions>();
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, format.ScanBatchesAsync(scan_options, nullptr));
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto batches, CollectAsyncGenerator(batch_gen));
+  ASSERT_EQ(kNumScanTasks * kBatchesPerScanTask, static_cast<int>(batches.size()));
+  for (int i = 0; i < kNumScanTasks * kBatchesPerScanTask; i++) {
+    ASSERT_EQ(kRowsPerBatch, batches[i]->num_rows());
+  }
+}
+
 TEST_F(TestFileSystemDataset, Basic) {
   MakeDataset({});
   AssertFragmentsAreFromPath(*dataset_->GetFragments(), {});
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index f7bd3c063e5..43c024768ea 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -40,6 +40,8 @@
 namespace arrow {
 namespace dataset {
 
+using FragmentGenerator = std::function<Future<std::shared_ptr<Fragment>>()>;
+
 std::vector<std::string> ScanOptions::MaterializedFields() const {
   std::vector<std::string> fields;
 
@@ -242,6 +244,31 @@ struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState>
   size_t pop_cursor = 0;
 };
 
+class ARROW_DS_EXPORT SyncScanner : public Scanner {
+ public:
+  SyncScanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
+
+  SyncScanner(std::shared_ptr<Fragment> fragment,
+              std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), fragment_(std::move(fragment)) {}
+
+  Result<TaggedRecordBatchIterator> ScanBatches() override;
+  Result<ScanTaskIterator> Scan() override;
+  Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
+  Result<std::shared_ptr<Table>> ToTable() override;
+
+ protected:
+  /// \brief GetFragments returns an iterator over all Fragments in this scan.
+  Result<FragmentIterator> GetFragments();
+  Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
+  Result<ScanTaskIterator> ScanInternal();
+
+  std::shared_ptr<Dataset> dataset_;
+  // TODO(ARROW-8065) remove fragment_ after a Dataset is constuctible from fragments
+  std::shared_ptr<Fragment> fragment_;
+};
+
 Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
   auto task_group = scan_options_->TaskGroup();
@@ -311,6 +338,269 @@ Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
   return fragment->Scan(std::move(options));
 }
 
+class ARROW_DS_EXPORT AsyncScanner : public Scanner,
+                                     public std::enable_shared_from_this<AsyncScanner> {
+ public:
+  AsyncScanner(std::shared_ptr<Dataset> dataset,
+               std::shared_ptr<ScanOptions> scan_options)
+      : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
+
+  Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
+  Result<TaggedRecordBatchIterator> ScanBatches() override;
+  Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered() override;
+  Result<std::shared_ptr<Table>> ToTable() override;
+
+ private:
+  Result<TaggedRecordBatchGenerator> ScanBatchesAsync(internal::Executor* executor);
+  Future<> VisitBatchesAsync(std::function<Status(TaggedRecordBatch)> visitor,
+                             internal::Executor* executor);
+  Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync(
+      internal::Executor* executor);
+  Future<std::shared_ptr<Table>> ToTableAsync(internal::Executor* executor);
+
+  Result<FragmentGenerator> GetFragments() const;
+
+  std::shared_ptr<Dataset> dataset_;
+};
+
+namespace {
+
+inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
+    const std::shared_ptr<Scanner>& scanner, const EnumeratedRecordBatch& in) {
+  ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+                        SimplifyWithGuarantee(scanner->options()->filter,
+                                              in.fragment.value->partition_expression()));
+
+  compute::ExecContext exec_context{scanner->options()->pool};
+  ARROW_ASSIGN_OR_RAISE(
+      Datum mask, ExecuteScalarExpression(simplified_filter, Datum(in.record_batch.value),
+                                          &exec_context));
+
+  Datum filtered;
+  if (mask.is_scalar()) {
+    const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
+    if (mask_scalar.is_valid && mask_scalar.value) {
+      // filter matches entire table
+      filtered = in.record_batch.value;
+    } else {
+      // Filter matches nothing
+      filtered = in.record_batch.value->Slice(0, 0);
+    }
+  } else {
+    ARROW_ASSIGN_OR_RAISE(
+        filtered, compute::Filter(in.record_batch.value, mask,
+                                  compute::FilterOptions::Defaults(), &exec_context));
+  }
+
+  ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+                        SimplifyWithGuarantee(scanner->options()->projection,
+                                              in.fragment.value->partition_expression()));
+  ARROW_ASSIGN_OR_RAISE(
+      Datum projected,
+      ExecuteScalarExpression(simplified_projection, filtered, &exec_context));
+
+  DCHECK_EQ(projected.type()->id(), Type::STRUCT);
+  if (projected.shape() == ValueDescr::SCALAR) {
+    // Only virtual columns are projected. Broadcast to an array
+    ARROW_ASSIGN_OR_RAISE(
+        projected,
+        MakeArrayFromScalar(*projected.scalar(), filtered.record_batch()->num_rows(),
+                            scanner->options()->pool));
+  }
+  ARROW_ASSIGN_OR_RAISE(auto out,
+                        RecordBatch::FromStructArray(projected.array_as<StructArray>()));
+  auto projected_batch =
+      out->ReplaceSchemaMetadata(in.record_batch.value->schema()->metadata());
+
+  return EnumeratedRecordBatch{
+      {std::move(projected_batch), in.record_batch.index, in.record_batch.last},
+      in.fragment};
+}
+
+inline EnumeratedRecordBatchGenerator FilterAndProjectRecordBatchAsync(
+    const std::shared_ptr<Scanner>& scanner, EnumeratedRecordBatchGenerator rbs) {
+  auto mapper = [scanner](const EnumeratedRecordBatch& in) {
+    return DoFilterAndProjectRecordBatchAsync(scanner, in);
+  };
+  return MakeMappedGenerator<EnumeratedRecordBatch>(std::move(rbs), mapper);
+}
+
+Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
+    std::shared_ptr<AsyncScanner> scanner,
+    const Enumerated<std::shared_ptr<Fragment>>& fragment) {
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen,
+                        fragment.value->ScanBatchesAsync(scanner->options()));
+  auto enumerated_batch_gen = MakeEnumeratedGenerator(std::move(batch_gen));
+
+  auto combine_fn =
+      [fragment](const Enumerated<std::shared_ptr<RecordBatch>>& record_batch) {
+        return EnumeratedRecordBatch{record_batch, fragment};
+      };
+
+  auto combined_gen = MakeMappedGenerator<EnumeratedRecordBatch>(enumerated_batch_gen,
+                                                                 std::move(combine_fn));
+
+  return FilterAndProjectRecordBatchAsync(scanner, std::move(combined_gen));
+}
+
+Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
+    std::shared_ptr<AsyncScanner> scanner, FragmentGenerator fragment_gen) {
+  auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
+  return MakeMappedGenerator<EnumeratedRecordBatchGenerator>(
+      std::move(enumerated_fragment_gen),
+      [scanner](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
+        return FragmentToBatches(scanner, fragment);
+      });
+}
+
+}  // namespace
+
+Result<FragmentGenerator> AsyncScanner::GetFragments() const {
+  // TODO(ARROW-8163): Async fragment scanning will return AsyncGenerator<Fragment>
+  // here. Current iterator based versions are all fast & sync so we will just ToVector
+  // it
+  ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset_->GetFragments(scan_options_->filter));
+  ARROW_ASSIGN_OR_RAISE(auto fragments_vec, fragments_it.ToVector());
+  return MakeVectorGenerator(std::move(fragments_vec));
+}
+
+Result<TaggedRecordBatchIterator> AsyncScanner::ScanBatches() {
+  ARROW_ASSIGN_OR_RAISE(auto batches_gen, ScanBatchesAsync(internal::GetCpuThreadPool()));
+  return MakeGeneratorIterator(std::move(batches_gen));
+}
+
+Result<EnumeratedRecordBatchIterator> AsyncScanner::ScanBatchesUnordered() {
+  ARROW_ASSIGN_OR_RAISE(auto batches_gen,
+                        ScanBatchesUnorderedAsync(internal::GetCpuThreadPool()));
+  return MakeGeneratorIterator(std::move(batches_gen));
+}
+
+Result<std::shared_ptr<Table>> AsyncScanner::ToTable() {
+  auto table_fut = ToTableAsync(internal::GetCpuThreadPool());
+  return table_fut.result();
+}
+
+Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
+    internal::Executor* cpu_executor) {
+  auto self = shared_from_this();
+  ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen_gen,
+                        FragmentsToBatches(self, std::move(fragment_gen)));
+  return MakeConcatenatedGenerator(std::move(batch_gen_gen));
+}
+
+Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync(
+    internal::Executor* cpu_executor) {
+  ARROW_ASSIGN_OR_RAISE(auto unordered, ScanBatchesUnorderedAsync(cpu_executor));
+  auto left_after_right = [](const EnumeratedRecordBatch& left,
+                             const EnumeratedRecordBatch& right) {
+    // Before any comes first
+    if (left.fragment.value == nullptr) {
+      return false;
+    }
+    if (right.fragment.value == nullptr) {
+      return true;
+    }
+    // Compare batches if fragment is the same
+    if (left.fragment.index == right.fragment.index) {
+      return left.record_batch.index > right.record_batch.index;
+    }
+    // Otherwise compare fragment
+    return left.fragment.index > right.fragment.index;
+  };
+  auto is_next = [](const EnumeratedRecordBatch& prev,
+                    const EnumeratedRecordBatch& next) {
+    // Only true if next is the first batch
+    if (prev.fragment.value == nullptr) {
+      return next.fragment.index == 0 && next.record_batch.index == 0;
+    }
+    // If same fragment, compare batch index
+    if (prev.fragment.index == next.fragment.index) {
+      return next.record_batch.index == prev.record_batch.index + 1;
+    }
+    // Else only if next first batch of next fragment and prev is last batch of previous
+    return next.fragment.index == prev.fragment.index + 1 && prev.record_batch.last &&
+           next.record_batch.index == 0;
+  };
+  auto before_any = EnumeratedRecordBatch{{nullptr, -1, false}, {nullptr, -1, false}};
+  auto sequenced = MakeSequencingGenerator(std::move(unordered), left_after_right,
+                                           is_next, before_any);
+
+  auto unenumerate_fn = [](const EnumeratedRecordBatch& enumerated_batch) {
+    return TaggedRecordBatch{enumerated_batch.record_batch.value,
+                             enumerated_batch.fragment.value};
+  };
+  return MakeMappedGenerator<TaggedRecordBatch>(std::move(sequenced), unenumerate_fn);
+}
+
+struct AsyncTableAssemblyState {
+  /// Protecting mutating accesses to batches
+  std::mutex mutex{};
+  std::vector<RecordBatchVector> batches{};
+
+  void Emplace(const EnumeratedRecordBatch& batch) {
+    std::lock_guard<std::mutex> lock(mutex);
+    auto fragment_index = batch.fragment.index;
+    auto batch_index = batch.record_batch.index;
+    if (static_cast<int>(batches.size()) <= fragment_index) {
+      batches.resize(fragment_index + 1);
+    }
+    if (static_cast<int>(batches[fragment_index].size()) <= batch_index) {
+      batches[fragment_index].resize(batch_index + 1);
+    }
+    batches[fragment_index][batch_index] = batch.record_batch.value;
+  }
+
+  RecordBatchVector Finish() {
+    RecordBatchVector all_batches;
+    for (auto& fragment_batches : batches) {
+      auto end = std::make_move_iterator(fragment_batches.end());
+      for (auto it = std::make_move_iterator(fragment_batches.begin()); it != end; it++) {
+        all_batches.push_back(*it);
+      }
+    }
+    return all_batches;
+  }
+};
+
+Status AsyncScanner::Scan(std::function<Status(TaggedRecordBatch)> visitor) {
+  return internal::RunSynchronouslyVoid(
+      [this, &visitor](Executor* executor) {
+        return VisitBatchesAsync(visitor, executor);
+      },
+      scan_options_->use_threads);
+}
+
+Future<> AsyncScanner::VisitBatchesAsync(std::function<Status(TaggedRecordBatch)> visitor,
+                                         internal::Executor* executor) {
+  ARROW_ASSIGN_OR_RAISE(auto batches_gen, ScanBatchesAsync(executor));
+  return VisitAsyncGenerator(std::move(batches_gen), visitor);
+}
+
+Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
+    internal::Executor* cpu_executor) {
+  auto scan_options = scan_options_;
+  ARROW_ASSIGN_OR_RAISE(auto positioned_batch_gen,
+                        ScanBatchesUnorderedAsync(cpu_executor));
+  /// Wraps the state in a shared_ptr to ensure that failing ScanTasks don't
+  /// invalidate concurrently running tasks when Finish() early returns
+  /// and the mutex/batches fail out of scope.
+  auto state = std::make_shared<AsyncTableAssemblyState>();
+
+  auto table_building_task = [state](const EnumeratedRecordBatch& batch) {
+    state->Emplace(batch);
+    return batch;
+  };
+
+  auto table_building_gen = MakeMappedGenerator<EnumeratedRecordBatch>(
+      positioned_batch_gen, table_building_task);
+
+  return DiscardAllFromAsyncGenerator(table_building_gen)
+      .Then([state, scan_options](const detail::Empty&) {
+        return Table::FromRecordBatches(scan_options->projected_schema, state->Finish());
+      });
+}
+
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset)
     : ScannerBuilder(std::move(dataset), std::make_shared<ScanOptions>()) {}
 
@@ -359,6 +649,11 @@ Status ScannerBuilder::UseThreads(bool use_threads) {
   return Status::OK();
 }
 
+Status ScannerBuilder::UseAsync(bool use_async) {
+  scan_options_->use_async = use_async;
+  return Status::OK();
+}
+
 Status ScannerBuilder::BatchSize(int64_t batch_size) {
   if (batch_size <= 0) {
     return Status::Invalid("BatchSize must be greater than 0, got ", batch_size);
@@ -388,8 +683,7 @@ Result<std::shared_ptr<Scanner>> ScannerBuilder::Finish() {
     return std::make_shared<SyncScanner>(fragment_, scan_options_);
   }
   if (scan_options_->use_async) {
-    // TODO(ARROW-12289)
-    return Status::NotImplemented("The asynchronous scanner is not yet available");
+    return std::make_shared<AsyncScanner>(dataset_, scan_options_);
   } else {
     return std::make_shared<SyncScanner>(dataset_, scan_options_);
   }
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 956fbbb2ee3..6315cf922d0 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -33,6 +33,7 @@
 #include "arrow/io/interfaces.h"
 #include "arrow/memory_pool.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
@@ -98,11 +99,6 @@ struct ARROW_DS_EXPORT ScanOptions {
   /// A pool from which materialized and scanned arrays will be allocated.
   MemoryPool* pool = arrow::default_memory_pool();
 
-  /// Executor on which to run any CPU tasks
-  ///
-  /// Note: Will be ignored if use_threads is set to false
-  internal::Executor* cpu_executor = internal::GetCpuThreadPool();
-
   /// IOContext for any IO tasks
   ///
   /// Note: The IOContext executor will be ignored if use_threads is set to false
@@ -166,13 +162,6 @@ class ARROW_DS_EXPORT ScanTask {
   std::shared_ptr<Fragment> fragment_;
 };
 
-template <typename T>
-struct Enumerated {
-  T value;
-  int index;
-  bool last;
-};
-
 /// \brief Combines a record batch with the fragment that the record batch originated
 /// from
 ///
@@ -305,34 +294,6 @@ class ARROW_DS_EXPORT Scanner {
   const std::shared_ptr<ScanOptions> scan_options_;
 };
 
-class ARROW_DS_EXPORT SyncScanner : public Scanner {
- public:
-  SyncScanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
-      : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
-
-  SyncScanner(std::shared_ptr<Fragment> fragment,
-              std::shared_ptr<ScanOptions> scan_options)
-      : Scanner(std::move(scan_options)), fragment_(std::move(fragment)) {}
-
-  Result<TaggedRecordBatchIterator> ScanBatches() override;
-
-  Result<ScanTaskIterator> Scan() override;
-
-  Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
-
-  Result<std::shared_ptr<Table>> ToTable() override;
-
- protected:
-  /// \brief GetFragments returns an iterator over all Fragments in this scan.
-  Result<FragmentIterator> GetFragments();
-  Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
-  Result<ScanTaskIterator> ScanInternal();
-
-  std::shared_ptr<Dataset> dataset_;
-  // TODO(ARROW-8065) remove fragment_ after a Dataset is constuctible from fragments
-  std::shared_ptr<Fragment> fragment_;
-};
-
 /// \brief ScannerBuilder is a factory class to construct a Scanner. It is used
 /// to pass information, notably a potential filter expression and a subset of
 /// columns to materialize.
@@ -386,6 +347,12 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///        ThreadPool found in ScanOptions;
   Status UseThreads(bool use_threads = true);
 
+  /// \brief Indicate if the Scanner should run in experimental "async" mode
+  ///
+  /// This mode should have considerably better performance on high-latency or parallel
+  /// filesystems but is still experimental
+  Status UseAsync(bool use_async = true);
+
   /// \brief Set the maximum number of rows per RecordBatch.
   ///
   /// \param[in] batch_size the maximum number of rows.
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index b4e374a7795..552102b3eda 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -18,9 +18,11 @@
 #include "arrow/dataset/scanner.h"
 
 #include <memory>
+#include <ostream>
 
 #include <gmock/gmock.h>
 
+#include "arrow/compute/api.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
@@ -39,33 +41,56 @@ using testing::IsEmpty;
 namespace arrow {
 namespace dataset {
 
-constexpr int64_t kNumberChildDatasets = 2;
-constexpr int64_t kNumberBatches = 16;
-constexpr int64_t kBatchSize = 1024;
+struct TestScannerParams {
+  bool use_async;
+  bool use_threads;
+  int num_child_datasets;
+  int num_batches;
+  int items_per_batch;
+
+  static std::vector<TestScannerParams> Values() {
+    std::vector<TestScannerParams> values;
+    for (int sync = 0; sync < 2; sync++) {
+      for (int use_threads = 0; use_threads < 2; use_threads++) {
+        values.push_back(
+            {static_cast<bool>(sync), static_cast<bool>(use_threads), 1, 1, 1024});
+        values.push_back(
+            {static_cast<bool>(sync), static_cast<bool>(use_threads), 2, 16, 1024});
+      }
+    }
+    return values;
+  }
+};
 
-class TestScanner : public DatasetFixtureMixin,
-                    public ::testing::WithParamInterface<bool> {
- protected:
-  bool UseThreads() { return GetParam(); }
+std::ostream& operator<<(std::ostream& out, const TestScannerParams& params) {
+  out << (params.use_async ? "async-" : "sync-")
+      << (params.use_threads ? "threaded-" : "serial-") << params.num_child_datasets
+      << "d-" << params.num_batches << "b-" << params.items_per_batch << "i";
+  return out;
+}
 
+class TestScanner : public DatasetFixtureMixinWithParam<TestScannerParams> {
+ protected:
   std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<RecordBatch> batch) {
-    std::vector<std::shared_ptr<RecordBatch>> batches{static_cast<size_t>(kNumberBatches),
-                                                      batch};
+    std::vector<std::shared_ptr<RecordBatch>> batches{
+        static_cast<size_t>(GetParam().num_batches), batch};
 
-    DatasetVector children{static_cast<size_t>(kNumberChildDatasets),
+    DatasetVector children{static_cast<size_t>(GetParam().num_child_datasets),
                            std::make_shared<InMemoryDataset>(batch->schema(), batches)};
 
     EXPECT_OK_AND_ASSIGN(auto dataset, UnionDataset::Make(batch->schema(), children));
 
     ScannerBuilder builder(dataset, options_);
-    ARROW_EXPECT_OK(builder.UseThreads(UseThreads()));
+    ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
+    ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
     EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
     return scanner;
   }
 
   void AssertScannerEqualsRepetitionsOf(
       std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
-      const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
+      const int64_t total_batches = GetParam().num_child_datasets *
+                                    GetParam().num_batches) {
     auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
 
     // Verifies that the unified BatchReader is equivalent to flattening all the
@@ -75,7 +100,8 @@ class TestScanner : public DatasetFixtureMixin,
 
   void AssertScanBatchesEqualRepetitionsOf(
       std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
-      const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
+      const int64_t total_batches = GetParam().num_child_datasets *
+                                    GetParam().num_batches) {
     auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
 
     AssertScanBatchesEquals(expected.get(), scanner.get());
@@ -83,38 +109,40 @@ class TestScanner : public DatasetFixtureMixin,
 
   void AssertScanBatchesUnorderedEqualRepetitionsOf(
       std::shared_ptr<Scanner> scanner, std::shared_ptr<RecordBatch> batch,
-      const int64_t total_batches = kNumberChildDatasets * kNumberBatches) {
+      const int64_t total_batches = GetParam().num_child_datasets *
+                                    GetParam().num_batches) {
     auto expected = ConstantArrayGenerator::Repeat(total_batches, batch);
 
-    AssertScanBatchesUnorderedEquals(expected.get(), scanner.get());
+    AssertScanBatchesUnorderedEquals(expected.get(), scanner.get(), 1);
   }
 };
 
 TEST_P(TestScanner, Scan) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  AssertScannerEqualsRepetitionsOf(MakeScanner(batch), batch);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
 TEST_P(TestScanner, ScanBatches) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
   AssertScanBatchesEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
 TEST_P(TestScanner, ScanBatchesUnordered) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
   AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch), batch);
 }
 
 TEST_P(TestScanner, ScanWithCappedBatchSize) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  options_->batch_size = kBatchSize / 2;
-  auto expected = batch->Slice(kBatchSize / 2);
-  AssertScannerEqualsRepetitionsOf(MakeScanner(batch), expected,
-                                   kNumberChildDatasets * kNumberBatches * 2);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  options_->batch_size = GetParam().items_per_batch / 2;
+  auto expected = batch->Slice(GetParam().items_per_batch / 2);
+  AssertScanBatchesEqualRepetitionsOf(
+      MakeScanner(batch), expected,
+      GetParam().num_child_datasets * GetParam().num_batches * 2);
 }
 
 TEST_P(TestScanner, FilteredScan) {
@@ -122,7 +150,8 @@ TEST_P(TestScanner, FilteredScan) {
 
   double value = 0.5;
   ASSERT_OK_AND_ASSIGN(auto f64,
-                       ArrayFromBuilderVisitor(float64(), kBatchSize, kBatchSize / 2,
+                       ArrayFromBuilderVisitor(float64(), GetParam().items_per_batch,
+                                               GetParam().items_per_batch / 2,
                                                [&](DoubleBuilder* builder) {
                                                  builder->UnsafeAppend(value);
                                                  builder->UnsafeAppend(-value);
@@ -134,47 +163,58 @@ TEST_P(TestScanner, FilteredScan) {
   auto batch = RecordBatch::Make(schema_, f64->length(), {f64});
 
   value = 0.5;
-  ASSERT_OK_AND_ASSIGN(
-      auto f64_filtered,
-      ArrayFromBuilderVisitor(float64(), kBatchSize / 2, [&](DoubleBuilder* builder) {
-        builder->UnsafeAppend(value);
-        value += 1.0;
-      }));
+  ASSERT_OK_AND_ASSIGN(auto f64_filtered,
+                       ArrayFromBuilderVisitor(float64(), GetParam().items_per_batch / 2,
+                                               [&](DoubleBuilder* builder) {
+                                                 builder->UnsafeAppend(value);
+                                                 value += 1.0;
+                                               }));
 
   auto filtered_batch =
       RecordBatch::Make(schema_, f64_filtered->length(), {f64_filtered});
 
-  AssertScannerEqualsRepetitionsOf(MakeScanner(batch), filtered_batch);
+  AssertScanBatchesEqualRepetitionsOf(MakeScanner(batch), filtered_batch);
+}
+
+TEST_P(TestScanner, ProjectedScan) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  SetProjectedColumns({"i32"});
+  auto batch_in = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  auto batch_out = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch,
+                                                  schema({field("i32", int32())}));
+  AssertScanBatchesUnorderedEqualRepetitionsOf(MakeScanner(batch_in), batch_out);
 }
 
 TEST_P(TestScanner, MaterializeMissingColumn) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch_missing_f64 =
-      ConstantArrayGenerator::Zeroes(kBatchSize, schema({field("i32", int32())}));
+  auto batch_missing_f64 = ConstantArrayGenerator::Zeroes(
+      GetParam().items_per_batch, schema({field("i32", int32())}));
 
   auto fragment_missing_f64 = std::make_shared<InMemoryFragment>(
-      RecordBatchVector{static_cast<size_t>(kNumberChildDatasets * kNumberBatches),
-                        batch_missing_f64},
+      RecordBatchVector{
+          static_cast<size_t>(GetParam().num_child_datasets * GetParam().num_batches),
+          batch_missing_f64},
       equal(field_ref("f64"), literal(2.5)));
 
-  ASSERT_OK_AND_ASSIGN(auto f64, ArrayFromBuilderVisitor(float64(), kBatchSize,
-                                                         [&](DoubleBuilder* builder) {
-                                                           builder->UnsafeAppend(2.5);
-                                                         }));
+  ASSERT_OK_AND_ASSIGN(auto f64,
+                       ArrayFromBuilderVisitor(
+                           float64(), GetParam().items_per_batch,
+                           [&](DoubleBuilder* builder) { builder->UnsafeAppend(2.5); }));
   auto batch_with_f64 =
       RecordBatch::Make(schema_, f64->length(), {batch_missing_f64->column(0), f64});
 
   ScannerBuilder builder{schema_, fragment_missing_f64, options_};
   ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
 
-  AssertScannerEqualsRepetitionsOf(scanner, batch_with_f64);
+  AssertScanBatchesEqualRepetitionsOf(scanner, batch_with_f64);
 }
 
 TEST_P(TestScanner, ToTable) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  std::vector<std::shared_ptr<RecordBatch>> batches{kNumberBatches * kNumberChildDatasets,
-                                                    batch};
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  std::vector<std::shared_ptr<RecordBatch>> batches{
+      static_cast<std::size_t>(GetParam().num_batches * GetParam().num_child_datasets),
+      batch};
 
   ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches(batches));
 
@@ -189,7 +229,7 @@ TEST_P(TestScanner, ToTable) {
 
 TEST_P(TestScanner, ScanWithVisitor) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
   auto scanner = MakeScanner(batch);
   ASSERT_OK(scanner->Scan([batch](TaggedRecordBatch scanned_batch) {
     AssertBatchesEqual(*batch, *scanned_batch.record_batch);
@@ -198,21 +238,24 @@ TEST_P(TestScanner, ScanWithVisitor) {
 }
 
 TEST_P(TestScanner, TakeIndices) {
+  auto batch_size = GetParam().items_per_batch;
+  auto num_batches = GetParam().num_batches;
+  auto num_datasets = GetParam().num_child_datasets;
   SetSchema({field("i32", int32()), field("f64", float64())});
   ArrayVector arrays(2);
-  ArrayFromVector<Int32Type>(internal::Iota<int32_t>(kBatchSize), &arrays[0]);
-  ArrayFromVector<DoubleType>(internal::Iota<double>(static_cast<double>(kBatchSize)),
+  ArrayFromVector<Int32Type>(internal::Iota<int32_t>(batch_size), &arrays[0]);
+  ArrayFromVector<DoubleType>(internal::Iota<double>(static_cast<double>(batch_size)),
                               &arrays[1]);
-  auto batch = RecordBatch::Make(schema_, kBatchSize, arrays);
+  auto batch = RecordBatch::Make(schema_, batch_size, arrays);
 
   auto scanner = MakeScanner(batch);
 
   std::shared_ptr<Array> indices;
   {
-    ArrayFromVector<Int64Type>(internal::Iota(kBatchSize), &indices);
+    ArrayFromVector<Int64Type>(internal::Iota(batch_size), &indices);
     ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
     ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches({batch}));
-    ASSERT_EQ(expected->num_rows(), kBatchSize);
+    ASSERT_EQ(expected->num_rows(), batch_size);
     AssertTablesEqual(*expected, *taken);
   }
   {
@@ -223,16 +266,16 @@ TEST_P(TestScanner, TakeIndices) {
     ASSERT_EQ(expected.table()->num_rows(), 4);
     AssertTablesEqual(*expected.table(), *taken);
   }
-  {
-    ArrayFromVector<Int64Type>({kBatchSize + 2, kBatchSize + 1}, &indices);
+  if (num_batches > 1) {
+    ArrayFromVector<Int64Type>({batch_size + 2, batch_size + 1}, &indices);
     ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
     ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
     ASSERT_OK_AND_ASSIGN(auto expected, compute::Take(table, *indices));
     ASSERT_EQ(expected.table()->num_rows(), 2);
     AssertTablesEqual(*expected.table(), *taken);
   }
-  {
-    ArrayFromVector<Int64Type>({1, 3, 5, 7, kBatchSize + 1, 2 * kBatchSize + 2},
+  if (num_batches > 1) {
+    ArrayFromVector<Int64Type>({1, 3, 5, 7, batch_size + 1, 2 * batch_size + 2},
                                &indices);
     ASSERT_OK_AND_ASSIGN(auto taken, scanner->TakeRows(*indices));
     ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
@@ -241,19 +284,23 @@ TEST_P(TestScanner, TakeIndices) {
     AssertTablesEqual(*expected.table(), *taken);
   }
   {
-    auto base = kNumberChildDatasets * kNumberBatches * kBatchSize;
+    auto base = num_datasets * num_batches * batch_size;
     ArrayFromVector<Int64Type>({base + 1}, &indices);
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        IndexError, ::testing::HasSubstr("Some indices were out of bounds: 32769"),
+        IndexError,
+        ::testing::HasSubstr("Some indices were out of bounds: " +
+                             std::to_string(base + 1)),
         scanner->TakeRows(*indices));
   }
   {
-    auto base = kNumberChildDatasets * kNumberBatches * kBatchSize;
+    auto base = num_datasets * num_batches * batch_size;
     ArrayFromVector<Int64Type>(
         {1, 2, base + 1, base + 2, base + 3, base + 4, base + 5, base + 6}, &indices);
     EXPECT_RAISES_WITH_MESSAGE_THAT(
         IndexError,
-        ::testing::HasSubstr("Some indices were out of bounds: 32769, 32770, 32771, ..."),
+        ::testing::HasSubstr(
+            "Some indices were out of bounds: " + std::to_string(base + 1) + ", " +
+            std::to_string(base + 2) + ", " + std::to_string(base + 3) + ", ..."),
         scanner->TakeRows(*indices));
   }
 }
@@ -276,11 +323,11 @@ class FailingFragment : public InMemoryFragment {
 
 TEST_P(TestScanner, ScanBatchesFailure) {
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
   RecordBatchVector batches = {batch, batch, batch, batch};
 
   ScannerBuilder builder(schema_, std::make_shared<FailingFragment>(batches), options_);
-  ASSERT_OK(builder.UseThreads(UseThreads()));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
   ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
 
   ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
@@ -302,8 +349,11 @@ TEST_P(TestScanner, ScanBatchesFailure) {
 }
 
 TEST_P(TestScanner, Head) {
+  auto batch_size = GetParam().items_per_batch;
+  auto num_batches = GetParam().num_batches;
+  auto num_datasets = GetParam().num_child_datasets;
   SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(batch_size, schema_);
 
   auto scanner = MakeScanner(batch);
   std::shared_ptr<Table> expected, actual;
@@ -313,30 +363,32 @@ TEST_P(TestScanner, Head) {
   AssertTablesEqual(*expected, *actual);
 
   ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {batch}));
-  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(kBatchSize));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(batch_size));
   AssertTablesEqual(*expected, *actual);
 
   ASSERT_OK_AND_ASSIGN(expected, Table::FromRecordBatches(schema_, {batch->Slice(0, 1)}));
   ASSERT_OK_AND_ASSIGN(actual, scanner->Head(1));
   AssertTablesEqual(*expected, *actual);
 
-  ASSERT_OK_AND_ASSIGN(expected,
-                       Table::FromRecordBatches(schema_, {batch, batch->Slice(0, 1)}));
-  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(kBatchSize + 1));
-  AssertTablesEqual(*expected, *actual);
+  if (num_batches > 1) {
+    ASSERT_OK_AND_ASSIGN(expected,
+                         Table::FromRecordBatches(schema_, {batch, batch->Slice(0, 1)}));
+    ASSERT_OK_AND_ASSIGN(actual, scanner->Head(batch_size + 1));
+    AssertTablesEqual(*expected, *actual);
+  }
 
   ASSERT_OK_AND_ASSIGN(expected, scanner->ToTable());
-  ASSERT_OK_AND_ASSIGN(actual,
-                       scanner->Head(kBatchSize * kNumberBatches * kNumberChildDatasets));
+  ASSERT_OK_AND_ASSIGN(actual, scanner->Head(batch_size * num_batches * num_datasets));
   AssertTablesEqual(*expected, *actual);
 
   ASSERT_OK_AND_ASSIGN(expected, scanner->ToTable());
-  ASSERT_OK_AND_ASSIGN(
-      actual, scanner->Head(kBatchSize * kNumberBatches * kNumberChildDatasets + 100));
+  ASSERT_OK_AND_ASSIGN(actual,
+                       scanner->Head(batch_size * num_batches * num_datasets + 100));
   AssertTablesEqual(*expected, *actual);
 }
 
-INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner, ::testing::Bool());
+INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner,
+                         ::testing::ValuesIn(TestScannerParams::Values()));
 
 class TestScannerBuilder : public ::testing::Test {
   void SetUp() override {
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 1d1266de671..b94441e178a 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -216,23 +216,32 @@ class DatasetFixtureMixin : public ::testing::Test {
   }
 
   /// \brief Ensure that record batches found in reader are equals to the
-  /// record batches yielded by a scanner.  Each fragment in the scanner is
-  /// expected to have a single batch.
+  /// record batches yielded by a scanner.
   void AssertScanBatchesUnorderedEquals(RecordBatchReader* expected, Scanner* scanner,
+                                        int expected_batches_per_fragment,
                                         bool ensure_drained = true) {
     ASSERT_OK_AND_ASSIGN(auto it, scanner->ScanBatchesUnordered());
 
     int fragment_counter = 0;
     bool saw_last_fragment = false;
-    ARROW_EXPECT_OK(it.Visit([&](EnumeratedRecordBatch batch) -> Status {
-      EXPECT_EQ(0, batch.record_batch.index);
-      EXPECT_EQ(true, batch.record_batch.last);
-      EXPECT_EQ(fragment_counter++, batch.fragment.index);
-      EXPECT_FALSE(saw_last_fragment);
+    int batch_counter = 0;
+    auto visitor = [&](EnumeratedRecordBatch batch) -> Status {
+      if (batch_counter == 0) {
+        EXPECT_FALSE(saw_last_fragment);
+      }
+      EXPECT_EQ(batch_counter++, batch.record_batch.index);
+      auto last_batch = batch_counter == expected_batches_per_fragment;
+      EXPECT_EQ(last_batch, batch.record_batch.last);
+      EXPECT_EQ(fragment_counter, batch.fragment.index);
+      if (last_batch) {
+        fragment_counter++;
+        batch_counter = 0;
+      }
       saw_last_fragment = batch.fragment.last;
       AssertBatchEquals(expected, *batch.record_batch.value);
       return Status::OK();
-    }));
+    };
+    ARROW_EXPECT_OK(it.Visit(visitor));
 
     if (ensure_drained) {
       EnsureRecordBatchReaderDrained(expected);
@@ -265,10 +274,18 @@ class DatasetFixtureMixin : public ::testing::Test {
     ASSERT_OK_AND_ASSIGN(options_->filter, filter.Bind(*schema_));
   }
 
+  void SetProjectedColumns(std::vector<std::string> column_names) {
+    ASSERT_OK(SetProjection(options_.get(), std::move(column_names)));
+  }
+
   std::shared_ptr<Schema> schema_;
   std::shared_ptr<ScanOptions> options_;
 };
 
+template <typename P>
+class DatasetFixtureMixinWithParam : public DatasetFixtureMixin,
+                                     public ::testing::WithParamInterface<P> {};
+
 /// \brief A dummy FileFormat implementation
 class DummyFileFormat : public FileFormat {
  public:
@@ -290,7 +307,7 @@ class DummyFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning (always returns an empty iterator)
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override {
     return MakeEmptyIterator<std::shared_ptr<ScanTask>>();
   }
@@ -330,7 +347,7 @@ class JSONRecordBatchFileFormat : public FileFormat {
 
   /// \brief Open a file for scanning
   Result<ScanTaskIterator> ScanFile(
-      std::shared_ptr<ScanOptions> options,
+      const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override {
     ARROW_ASSIGN_OR_RAISE(auto file, fragment->source().Open());
     ARROW_ASSIGN_OR_RAISE(int64_t size, file->GetSize());
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index f274478fd75..fd5d0d28e9d 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -110,7 +110,7 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
 /// \brief Waits for an async generator to complete, discarding results.
 template <typename T>
 Future<> DiscardAllFromAsyncGenerator(AsyncGenerator<T> generator) {
-  std::function<Status(T)> visitor = [](...) { return Status::OK(); };
+  std::function<Status(T)> visitor = [](const T&) { return Status::OK(); };
   return VisitAsyncGenerator(generator, visitor);
 }
 
@@ -280,6 +280,23 @@ AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
   return MappingGenerator<T, V>(std::move(source_generator), std::move(map));
 }
 
+template <typename V, typename T, typename MapFunc>
+AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFunc map) {
+  struct MapCallback {
+    MapFunc map;
+
+    Future<V> operator()(const T& val) { return EnsureFuture(map(val)); }
+
+    Future<V> EnsureFuture(Result<V> val) {
+      return Future<V>::MakeFinished(std::move(val));
+    }
+    Future<V> EnsureFuture(V val) { return Future<V>::MakeFinished(std::move(val)); }
+    Future<V> EnsureFuture(Future<V> val) { return val; }
+  };
+  std::function<Future<V>(const T&)> map_fn = MapCallback{map};
+  return MappingGenerator<T, V>(std::move(source_generator), map_fn);
+}
+
 /// \see MakeSequencingGenerator
 template <typename T, typename ComesAfter, typename IsNext>
 class SequencingGenerator {

From 4d3ce2b5dcb3805d838d25eda74672a12e4a4cd3 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Tue, 20 Apr 2021 10:15:37 +0900
Subject: [PATCH 093/719] ARROW-12423: [Docs] Remove Codecov badge

Closes #10102 from domoritz/patch-7

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 133018c72df..e9e13537cc9 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,6 @@
 # Apache Arrow
 
 [![Build Status](https://ci.appveyor.com/api/projects/status/github/apache/arrow/branch/master?svg=true)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/arrow/branch/master)
-[![Coverage Status](https://codecov.io/gh/apache/arrow/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow?branch=master)
 [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/arrow.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:arrow)
 [![License](http://img.shields.io/:license-Apache%202-blue.svg)](https://github.com/apache/arrow/blob/master/LICENSE.txt)
 [![Twitter Follow](https://img.shields.io/twitter/follow/apachearrow.svg?style=social&label=Follow)](https://twitter.com/apachearrow)

From 32e07786af2093e01e83aae5179525ba6eec034c Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Mon, 19 Apr 2021 18:16:59 -0700
Subject: [PATCH 094/719] MINOR: [JS] Remove Travis badge (#10105)

---
 js/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/js/README.md b/js/README.md
index 586eceaf7e7..9ca215363ed 100644
--- a/js/README.md
+++ b/js/README.md
@@ -20,7 +20,6 @@
 # [Apache Arrow](https://github.com/apache/arrow) in JS
 
 [![npm version](https://img.shields.io/npm/v/apache-arrow.svg)](https://www.npmjs.com/package/apache-arrow)
-[![Build Status](https://travis-ci.org/apache/arrow.svg?branch=master)](https://travis-ci.org/apache/arrow)
 
 Arrow is a set of technologies that enable big data systems to process and transfer data quickly.
 

From 930c381bb1348011ad6e531df61d0bd1b832cfe3 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Tue, 20 Apr 2021 10:01:09 -0400
Subject: [PATCH 095/719] ARROW-12475: [C++] Fix 'warn_unused_result' warning

Closes #10107 from cyb70289/12475-clang-warning

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/thread_pool_benchmark.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/util/thread_pool_benchmark.cc b/cpp/src/arrow/util/thread_pool_benchmark.cc
index 054d616d035..b02ca9bb54b 100644
--- a/cpp/src/arrow/util/thread_pool_benchmark.cc
+++ b/cpp/src/arrow/util/thread_pool_benchmark.cc
@@ -110,10 +110,10 @@ static void RunInSerialExecutor(benchmark::State& state) {  // NOLINT non-const
   Workload workload(workload_size);
 
   for (auto _ : state) {
-    SerialExecutor::RunInSerialExecutor<arrow::detail::Empty>(
+    ABORT_NOT_OK(SerialExecutor::RunInSerialExecutor<arrow::detail::Empty>(
         [&](internal::Executor* executor) {
           return DeferNotOk(executor->Submit(std::ref(workload)));
-        });
+        }));
   }
 
   state.SetItemsProcessed(state.iterations());

From 6b4a4aacc80b4be482054855db4af529b0bc5e65 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 20 Apr 2021 16:21:47 +0200
Subject: [PATCH 096/719] ARROW-12466: [Python] Avoid AttributeError crash when
 comparing with None

https://issues.apache.org/jira/browse/ARROW-12466

Closes #10099 from amol-/ARROW-12466

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/array.pxi           | 4 +++-
 python/pyarrow/tests/test_array.py | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 748a64e183a..894ba96c013 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1021,9 +1021,11 @@ cdef class Array(_PandasConvertible):
         try:
             return self.equals(other)
         except TypeError:
+            # This also handles comparing with None
+            # as Array.equals(None) raises a TypeError.
             return NotImplemented
 
-    def equals(Array self, Array other):
+    def equals(Array self, Array other not None):
         return self.ap.Equals(deref(other.ap))
 
     def __len__(self):
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 37d69363816..8e7e21f0bf6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -522,6 +522,9 @@ def test_array_eq():
     assert (arr1 == arr3) is False
     assert (arr1 != arr3) is True
 
+    assert (arr1 == 1) is False
+    assert (arr1 == None) is False  # noqa: E711
+
 
 def test_array_from_buffers():
     values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))

From 893bcc25521bfd33272d60a0848a9449c920e80c Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Wed, 21 Apr 2021 09:04:59 +0900
Subject: [PATCH 097/719] ARROW-12477: [Release] Download aarch64 miniforge

Closes #10109 from cyb70289/12477-aarch64-miniforge

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/verify-release-candidate.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 888f763a0a3..0e9a4a4e579 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -224,6 +224,8 @@ setup_miniconda() {
     else
         MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
     fi
+  elif [ "$(uname)" == "Linux" ] && [ "$(uname -m)" == "aarch64" ]; then
+    MINICONDA_URL=https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh
   else
     MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
   fi

From 3dd6d323ad2f40543cb2abf23c91c237049ee29d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 21 Apr 2021 09:54:54 +0900
Subject: [PATCH 098/719] ARROW-12488: [GLib] Use g_memdup2() with GLib 2.68 or
 later

g_memdup() is deprecated.

Closes #10119 from kou/glib-memdup2

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/plasma-glib/client.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/c_glib/plasma-glib/client.cpp b/c_glib/plasma-glib/client.cpp
index 2a5ccf98bd1..26476f4d6b5 100644
--- a/c_glib/plasma-glib/client.cpp
+++ b/c_glib/plasma-glib/client.cpp
@@ -265,6 +265,10 @@ gplasma_client_create_options_new(void)
   return GPLASMA_CLIENT_CREATE_OPTIONS(options);
 }
 
+#if !GLIB_CHECK_VERSION(2, 68, 0)
+#  define g_memdup2(memory, byte_size) g_memdup(memory, byte_size)
+#endif
+
 /**
  * gplasma_client_create_options_set_metadata:
  * @options: A #GPlasmaClientCreateOptions.
@@ -282,7 +286,7 @@ gplasma_client_create_options_set_metadata(GPlasmaClientCreateOptions *options,
   if (priv->metadata) {
     g_free(priv->metadata);
   }
-  priv->metadata = static_cast<guint8 *>(g_memdup(metadata, size));
+  priv->metadata = static_cast<guint8 *>(g_memdup2(metadata, size));
   priv->metadata_size = size;
 }
 

From 2ed54dbdb8d27b65def2db8d938240030986106c Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 21 Apr 2021 01:42:23 +0000
Subject: [PATCH 099/719] ARROW-12325: [C++] [CI] Nightly gandiva build failing
 due to failure of compiler to move return value

Gandiva build failure caused by failure of GCC 4.8.2 compiler to automatically move return value.

Failed build: https://github.com/ursacomputing/crossbow/runs/2303374510
Godbolt showing I'm not crazy: https://gcc.godbolt.org/z/x138zevhE

Closes #9975 from westonpace/bugfix/ARROW-12325

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/util/vector.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index 67401d496e6..b9f2e2a45aa 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -92,7 +92,7 @@ Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& src) {
   out.reserve(src.size());
   ARROW_RETURN_NOT_OK(MaybeTransform(src.begin(), src.end(), std::back_inserter(out),
                                      std::forward<Fn>(map)));
-  return out;
+  return std::move(out);
 }
 
 template <typename Fn, typename From,
@@ -130,7 +130,7 @@ Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) {
     }
     out.push_back(it->MoveValueUnsafe());
   }
-  return out;
+  return std::move(out);
 }
 
 }  // namespace internal

From c408048d1152efb973245402510dcd57d2ffee5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Wed, 21 Apr 2021 11:03:51 +0900
Subject: [PATCH 100/719] ARROW-12467: [C++][Gandiva] Add support for LLVM12
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10111 from kszucs/ARROW-12467

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .env                                                         | 2 +-
 .travis.yml                                                  | 5 +----
 cpp/CMakeLists.txt                                           | 1 +
 .../apache-arrow/apt/debian-bullseye/Dockerfile              | 4 ++--
 .../linux-packages/apache-arrow/apt/debian-buster/Dockerfile | 4 ++--
 .../linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile  | 4 ++--
 .../linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile | 4 ++--
 7 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/.env b/.env
index 4fb24bed40a..587430579f9 100644
--- a/.env
+++ b/.env
@@ -45,7 +45,7 @@ DEBIAN=10
 UBUNTU=20.04
 FEDORA=33
 PYTHON=3.6
-LLVM=11
+LLVM=12
 CLANG_TOOLS=8
 RUST=nightly-2021-03-24
 GO=1.15
diff --git a/.travis.yml b/.travis.yml
index 2cf70cca982..26b4d78fd2d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -68,7 +68,7 @@ jobs:
           -e CMAKE_UNITY_BUILD=ON
           -e CPP_MAKE_PARALLELISM=4
           "
-        # The LLVM's APT repository provides only arm64 binaries.
+        # The LLVM's APT repository doesn't provide arm64 binaries.
         # We should use LLVM provided by Ubuntu.
         LLVM: "10"
         UBUNTU: "20.04"
@@ -100,9 +100,6 @@ jobs:
           -e cares_SOURCE=BUNDLED
           -e gRPC_SOURCE=BUNDLED
           "
-        # The LLVM's APT repository provides only arm64 binaries.
-        # We should use LLVM provided by Ubuntu.
-        LLVM: "10"
         UBUNTU: "20.04"
 
     - name: "Go on s390x"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1705e854fb1..4e88c32dcd6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -109,6 +109,7 @@ set(ARROW_CMAKE_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
 set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
 
 set(ARROW_LLVM_VERSIONS
+    "12.0"
     "11.1"
     "11.0"
     "10"
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
index fa4961bc97e..761c44406e6 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
@@ -37,7 +37,7 @@ RUN \
   apt install -y -V ${quiet} \
     build-essential \
     ccache \
-    clang-11 \
+    clang \
     cmake \
     debhelper \
     devscripts \
@@ -62,7 +62,7 @@ RUN \
     libthrift-dev \
     libutf8proc-dev \
     libzstd-dev \
-    llvm-11-dev \
+    llvm-dev \
     lsb-release \
     ninja-build \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
index 5dcc1b46b2d..a5c8456e87e 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
@@ -74,8 +74,8 @@ RUN \
     tzdata \
     zlib1g-dev && \
   apt install -y -V -t buster-backports ${quiet} \
-    clang-8 \
-    llvm-8-dev && \
+    clang-11 \
+    llvm-11-dev && \
   if apt list | grep '^nvidia-cuda-toolkit/'; then \
     apt install -y -V ${quiet} nvidia-cuda-toolkit; \
   fi && \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index ad83bfa9002..8fe70edb6ad 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -35,7 +35,7 @@ RUN \
   apt install -y -V ${quiet} \
     build-essential \
     ccache \
-    clang-10 \
+    clang \
     cmake \
     debhelper \
     devscripts \
@@ -58,7 +58,7 @@ RUN \
     libthrift-dev \
     libutf8proc-dev \
     libzstd-dev \
-    llvm-10-dev \
+    llvm-dev \
     lsb-release \
     ninja-build \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
index d60e6320e36..5209be29fb5 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
@@ -35,7 +35,7 @@ RUN \
   apt install -y -V ${quiet} \
     build-essential \
     ccache \
-    clang-11 \
+    clang \
     cmake \
     debhelper \
     devscripts \
@@ -59,7 +59,7 @@ RUN \
     libthrift-dev \
     libutf8proc-dev \
     libzstd-dev \
-    llvm-11-dev \
+    llvm-dev \
     lsb-release \
     ninja-build \
     pkg-config \

From 7c4a07eeddf885189f7a6ad01ba9f98945bfa022 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 21 Apr 2021 11:58:50 +0900
Subject: [PATCH 101/719] ARROW-12487: [C++][Dataset] Fix ScanBatches() hanging

Errors weren't being handled in all paths.

Closes #10115 from lidavidm/arrow-12487

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/dataset/scanner.cc      |  17 ++++-
 cpp/src/arrow/dataset/scanner_test.cc | 103 +++++++++++++++++++++-----
 2 files changed, 100 insertions(+), 20 deletions(-)

diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 43c024768ea..aa95c478dba 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -160,6 +160,19 @@ struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState>
     ready.notify_one();
   }
 
+  template <typename T>
+  Result<T> PushError(Result<T>&& result, size_t task_index) {
+    if (!result.ok()) {
+      {
+        std::lock_guard<std::mutex> lock(mutex);
+        task_drained[task_index] = true;
+        iteration_error = result.status();
+      }
+      ready.notify_one();
+    }
+    return std::move(result);
+  }
+
   Status Finish(size_t task_index) {
     {
       std::lock_guard<std::mutex> lock(mutex);
@@ -190,9 +203,9 @@ struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState>
 
     lock.unlock();
     task_group->Append([state, id, scan_task]() {
-      ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
+      ARROW_ASSIGN_OR_RAISE(auto batch_it, state->PushError(scan_task->Execute(), id));
       for (auto maybe_batch : batch_it) {
-        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
+        ARROW_ASSIGN_OR_RAISE(auto batch, state->PushError(std::move(maybe_batch), id));
         state->Push(TaggedRecordBatch{std::move(batch), scan_task->fragment()}, id);
       }
       return state->Finish(id);
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 552102b3eda..27fcef1f04c 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -321,31 +321,98 @@ class FailingFragment : public InMemoryFragment {
   }
 };
 
+class FailingExecuteScanTask : public InMemoryScanTask {
+ public:
+  using InMemoryScanTask::InMemoryScanTask;
+
+  Result<RecordBatchIterator> Execute() override {
+    return Status::Invalid("Oh no, we failed!");
+  }
+};
+
+class FailingIterationScanTask : public InMemoryScanTask {
+ public:
+  using InMemoryScanTask::InMemoryScanTask;
+
+  Result<RecordBatchIterator> Execute() override {
+    int index = 0;
+    auto batches = record_batches_;
+    return MakeFunctionIterator(
+        [index, batches]() mutable -> Result<std::shared_ptr<RecordBatch>> {
+          if (index < 1) {
+            return batches[index++];
+          }
+          return Status::Invalid("Oh no, we failed!");
+        });
+  }
+};
+
+template <typename T>
+class FailingScanTaskFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    auto self = shared_from_this();
+    ScanTaskVector scan_tasks{std::make_shared<T>(record_batches_, options, self)};
+    return MakeVectorIterator(std::move(scan_tasks));
+  }
+};
+
 TEST_P(TestScanner, ScanBatchesFailure) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
   RecordBatchVector batches = {batch, batch, batch, batch};
+  // Note these tests are only for SyncScanner at the moment
 
-  ScannerBuilder builder(schema_, std::make_shared<FailingFragment>(batches), options_);
-  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-
-  ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
-
-  int counter = 0;
-  while (true) {
-    // Make sure we get all batches that were yielded before the failing scan task
-    auto maybe_batch = batch_it.Next();
-    if (counter++ <= 16) {
-      ASSERT_OK_AND_ASSIGN(auto scanned_batch, maybe_batch);
-      AssertBatchesEqual(*batch, *scanned_batch.record_batch);
-      ASSERT_NE(nullptr, scanned_batch.fragment);
-    } else {
-      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
-                                      maybe_batch);
-      break;
+  // Case 1: failure when getting next scan task
+  {
+    ScannerBuilder builder(schema_, std::make_shared<FailingFragment>(batches), options_);
+    ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+    ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+
+    int counter = 0;
+    while (true) {
+      // Make sure we get all batches that were yielded before the failing scan task
+      auto maybe_batch = batch_it.Next();
+      if (counter++ <= 16) {
+        ASSERT_OK_AND_ASSIGN(auto scanned_batch, maybe_batch);
+        AssertBatchesEqual(*batch, *scanned_batch.record_batch);
+        ASSERT_NE(nullptr, scanned_batch.fragment);
+      } else {
+        EXPECT_RAISES_WITH_MESSAGE_THAT(
+            Invalid, ::testing::HasSubstr("Oh no, we failed!"), maybe_batch);
+        break;
+      }
     }
   }
+
+  // Case 2: failure when calling ScanTask::Execute
+  {
+    ScannerBuilder builder(
+        schema_,
+        std::make_shared<FailingScanTaskFragment<FailingExecuteScanTask>>(batches),
+        options_);
+    ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+    ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                    batch_it.Next());
+  }
+
+  // Case 3: failure when calling RecordBatchIterator::Next
+  {
+    ScannerBuilder builder(
+        schema_,
+        std::make_shared<FailingScanTaskFragment<FailingIterationScanTask>>(batches),
+        options_);
+    ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+    ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+    ASSERT_OK(batch_it.Next());
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                    batch_it.Next());
+  }
 }
 
 TEST_P(TestScanner, Head) {

From be258f2aa646f28f71108332e84f5e3adf1d2637 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 21 Apr 2021 14:01:50 +0200
Subject: [PATCH 102/719] ARROW-12485: [C++] Use mimalloc as the default memory
 allocator on macOS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10117 from jonkeane/ARROW-12485

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/memory_pool.cc | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index f402ccb4172..793a2c8e5df 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -170,13 +170,8 @@ MemoryPoolBackend DefaultBackend() {
   if (backend.has_value()) {
     return backend.value();
   }
-#ifdef ARROW_JEMALLOC
-  return MemoryPoolBackend::Jemalloc;
-#elif defined(ARROW_MIMALLOC)
-  return MemoryPoolBackend::Mimalloc;
-#else
-  return MemoryPoolBackend::System;
-#endif
+  struct SupportedBackend default_backend = SupportedBackends().front();
+  return default_backend.backend;
 }
 
 // A static piece of memory for 0-size allocations, so as to return

From 37c27d1eaf0fa61281ad103c08a0251bb6883ec4 Mon Sep 17 00:00:00 2001
From: "Maarten A. Breddels" <maartenbreddels@gmail.com>
Date: Wed, 21 Apr 2021 16:01:36 +0200
Subject: [PATCH 103/719] ARROW-10195: [C++] Add string struct extract kernel
 using re2

The second commit adds re2 to the linked libraries. @xhochy how should this be done, should I open a separate issue for this?

Closes #8459 from maartenbreddels/ARROW-10195

Lead-authored-by: Maarten A. Breddels <maartenbreddels@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.h            |  11 +-
 .../arrow/compute/kernels/scalar_string.cc    | 244 +++++++++++++++++-
 .../compute/kernels/scalar_string_test.cc     |  78 ++++++
 cpp/src/arrow/compute/kernels/test_util.cc    |   2 +-
 docs/source/cpp/compute.rst                   |  15 ++
 python/pyarrow/_compute.pyx                   |  17 ++
 python/pyarrow/compute.py                     |   1 +
 python/pyarrow/includes/libarrow.pxd          |   5 +
 python/pyarrow/tests/test_compute.py          |   7 +
 9 files changed, 369 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 6032f656c4a..53892ff6b3c 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -71,7 +71,9 @@ struct ARROW_EXPORT SplitPatternOptions : public SplitOptions {
 struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
   explicit ReplaceSubstringOptions(std::string pattern, std::string replacement,
                                    int64_t max_replacements = -1)
-      : pattern(pattern), replacement(replacement), max_replacements(max_replacements) {}
+      : pattern(std::move(pattern)),
+        replacement(std::move(replacement)),
+        max_replacements(max_replacements) {}
 
   /// Pattern to match, literal, or regular expression depending on which kernel is used
   std::string pattern;
@@ -81,6 +83,13 @@ struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
   int64_t max_replacements;
 };
 
+struct ARROW_EXPORT ExtractRegexOptions : public FunctionOptions {
+  explicit ExtractRegexOptions(std::string pattern) : pattern(std::move(pattern)) {}
+
+  /// Regular expression with named capture fields
+  std::string pattern;
+};
+
 /// Options for IsIn and IndexIn functions
 struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
   explicit SetLookupOptions(Datum value_set, bool skip_nulls = false)
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 9ec1fe005d4..d5473749fe1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cctype>
+#include <iterator>
 #include <string>
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -30,17 +31,40 @@
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_nested.h"
 #include "arrow/buffer_builder.h"
+
+#include "arrow/builder.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
+
+using internal::checked_cast;
+
 namespace compute {
 namespace internal {
 
 namespace {
 
+#ifdef ARROW_WITH_RE2
+util::string_view ToStringView(re2::StringPiece piece) {
+  return {piece.data(), piece.length()};
+}
+
+re2::StringPiece ToStringPiece(util::string_view view) {
+  return {view.data(), view.length()};
+}
+
+Status RegexStatus(const RE2& regex) {
+  if (!regex.ok()) {
+    return Status::Invalid("Invalid regular expression: ", regex.error());
+  }
+  return Status::OK();
+}
+#endif
+
 // Code units in the range [a-z] can only be an encoding of an ascii
 // character/codepoint, not the 2nd, 3rd or 4th code unit (byte) of an different
 // codepoint. This guaranteed by non-overlap design of the unicode standard. (see
@@ -449,10 +473,8 @@ struct RegexSubstringMatcher {
   const RE2 regex_match_;
 
   RegexSubstringMatcher(KernelContext* ctx, const MatchSubstringOptions& options)
-      : options_(options), regex_match_(options_.pattern) {
-    if (!regex_match_.ok()) {
-      ctx->SetStatus(Status::Invalid("Regular expression error"));
-    }
+      : options_(options), regex_match_(options_.pattern, RE2::Quiet) {
+    KERNEL_RETURN_IF_ERROR(ctx, RegexStatus(regex_match_));
   }
 
   bool Match(util::string_view current) {
@@ -1390,16 +1412,21 @@ struct RegexSubStringReplacer {
   // we have 2 regexes, one with () around it, one without.
   RegexSubStringReplacer(KernelContext* ctx, const ReplaceSubstringOptions& options)
       : options_(options),
-        regex_find_("(" + options_.pattern + ")"),
-        regex_replacement_(options_.pattern) {
-    if (!(regex_find_.ok() && regex_replacement_.ok())) {
-      ctx->SetStatus(Status::Invalid("Regular expression error"));
-      return;
+        regex_find_("(" + options_.pattern + ")", RE2::Quiet),
+        regex_replacement_(options_.pattern, RE2::Quiet) {
+    KERNEL_RETURN_IF_ERROR(ctx, RegexStatus(regex_find_));
+    KERNEL_RETURN_IF_ERROR(ctx, RegexStatus(regex_replacement_));
+    std::string replacement_error;
+    if (!regex_replacement_.CheckRewriteString(options_.replacement,
+                                               &replacement_error)) {
+      ctx->SetStatus(
+          Status::Invalid("Invalid replacement string: ", std::move(replacement_error)));
     }
   }
 
   Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) {
     re2::StringPiece replacement(options_.replacement);
+
     if (options_.max_replacements == -1) {
       std::string s_copy(s.to_string());
       re2::RE2::GlobalReplace(&s_copy, regex_replacement_, replacement);
@@ -1472,6 +1499,204 @@ const FunctionDoc replace_substring_regex_doc(
     {"strings"}, "ReplaceSubstringOptions");
 #endif
 
+// ----------------------------------------------------------------------
+// Extract with regex
+
+#ifdef ARROW_WITH_RE2
+
+// TODO cache this once per ExtractRegexOptions
+struct ExtractRegexData {
+  // Use unique_ptr<> because RE2 is non-movable
+  std::unique_ptr<RE2> regex;
+  std::vector<std::string> group_names;
+
+  static Result<ExtractRegexData> Make(const ExtractRegexOptions& options) {
+    ExtractRegexData data(options.pattern);
+    RETURN_NOT_OK(RegexStatus(*data.regex));
+
+    const int group_count = data.regex->NumberOfCapturingGroups();
+    const auto& name_map = data.regex->CapturingGroupNames();
+    data.group_names.reserve(group_count);
+
+    for (int i = 0; i < group_count; i++) {
+      auto item = name_map.find(i + 1);  // re2 starts counting from 1
+      if (item == name_map.end()) {
+        // XXX should we instead just create fields with an empty name?
+        return Status::Invalid("Regular expression contains unnamed groups");
+      }
+      data.group_names.emplace_back(item->second);
+    }
+    return std::move(data);
+  }
+
+  Result<ValueDescr> ResolveOutputType(const std::vector<ValueDescr>& args) const {
+    const auto& input_type = args[0].type;
+    if (input_type == nullptr) {
+      // No input type specified => propagate shape
+      return args[0];
+    }
+    // Input type is either String or LargeString and is also the type of each
+    // field in the output struct type.
+    DCHECK(input_type->id() == Type::STRING || input_type->id() == Type::LARGE_STRING);
+    FieldVector fields;
+    fields.reserve(group_names.size());
+    std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields),
+                   [&](const std::string& name) { return field(name, input_type); });
+    return struct_(std::move(fields));
+  }
+
+ private:
+  explicit ExtractRegexData(const std::string& pattern)
+      : regex(new RE2(pattern, RE2::Quiet)) {}
+};
+
+Result<ValueDescr> ResolveExtractRegexOutput(KernelContext* ctx,
+                                             const std::vector<ValueDescr>& args) {
+  using State = OptionsWrapper<ExtractRegexOptions>;
+  ExtractRegexOptions options = State::Get(ctx);
+  ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
+  return data.ResolveOutputType(args);
+}
+
+struct ExtractRegexBase {
+  const ExtractRegexData& data;
+  const int group_count;
+  std::vector<re2::StringPiece> found_values;
+  std::vector<re2::RE2::Arg> args;
+  std::vector<const re2::RE2::Arg*> args_pointers;
+  const re2::RE2::Arg** args_pointers_start;
+  const re2::RE2::Arg* null_arg = nullptr;
+
+  explicit ExtractRegexBase(const ExtractRegexData& data)
+      : data(data),
+        group_count(static_cast<int>(data.group_names.size())),
+        found_values(group_count) {
+    args.reserve(group_count);
+    args_pointers.reserve(group_count);
+
+    for (int i = 0; i < group_count; i++) {
+      args.emplace_back(&found_values[i]);
+      // Since we reserved capacity, we're guaranteed the pointer remains valid
+      args_pointers.push_back(&args[i]);
+    }
+    // Avoid null pointer if there is no capture group
+    args_pointers_start = (group_count > 0) ? args_pointers.data() : &null_arg;
+  }
+
+  bool Match(util::string_view s) {
+    return re2::RE2::PartialMatchN(ToStringPiece(s), *data.regex, args_pointers_start,
+                                   group_count);
+  }
+};
+
+template <typename Type>
+struct ExtractRegex : public ExtractRegexBase {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using State = OptionsWrapper<ExtractRegexOptions>;
+
+  using ExtractRegexBase::ExtractRegexBase;
+
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ExtractRegexOptions options = State::Get(ctx);
+    KERNEL_ASSIGN_OR_RAISE(auto data, ctx, ExtractRegexData::Make(options));
+    ExtractRegex{data}.Extract(ctx, batch, out);
+  }
+
+  void Extract(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    KERNEL_ASSIGN_OR_RAISE(auto descr, ctx,
+                           data.ResolveOutputType(batch.GetDescriptors()));
+    DCHECK_NE(descr.type, nullptr);
+    const auto& type = descr.type;
+
+    if (batch[0].kind() == Datum::ARRAY) {
+      std::unique_ptr<ArrayBuilder> array_builder;
+      KERNEL_RETURN_IF_ERROR(ctx, MakeBuilder(ctx->memory_pool(), type, &array_builder));
+      StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
+
+      std::vector<BuilderType*> field_builders;
+      field_builders.reserve(group_count);
+      for (int i = 0; i < group_count; i++) {
+        field_builders.push_back(
+            checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+      }
+
+      auto visit_null = [&]() {
+        for (int i = 0; i < group_count; i++) {
+          RETURN_NOT_OK(field_builders[i]->AppendEmptyValue());
+        }
+        return struct_builder->AppendNull();
+      };
+      auto visit_value = [&](util::string_view s) {
+        if (Match(s)) {
+          for (int i = 0; i < group_count; i++) {
+            RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
+          }
+          return struct_builder->Append();
+        } else {
+          return visit_null();
+        }
+      };
+      const ArrayData& input = *batch[0].array();
+      KERNEL_RETURN_IF_ERROR(ctx,
+                             VisitArrayDataInline<Type>(input, visit_value, visit_null));
+
+      std::shared_ptr<Array> out_array;
+      KERNEL_RETURN_IF_ERROR(ctx, struct_builder->Finish(&out_array));
+      *out = std::move(out_array);
+    } else {
+      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
+      auto result = std::make_shared<StructScalar>(type);
+      if (input.is_valid && Match(util::string_view(*input.value))) {
+        result->value.reserve(group_count);
+        for (int i = 0; i < group_count; i++) {
+          result->value.push_back(
+              std::make_shared<ScalarType>(found_values[i].as_string()));
+        }
+        result->is_valid = true;
+      } else {
+        result->is_valid = false;
+      }
+      out->value = std::move(result);
+    }
+  }
+};
+
+const FunctionDoc extract_regex_doc(
+    "Extract substrings captured by a regex pattern",
+    ("For each string in `strings`, match the regular expression and, if\n"
+     "successful, emit a struct with field names and values coming from the\n"
+     "regular expression's named capture groups. If the input is null or the\n"
+     "regular expression fails matching, a null output value is emitted.\n"
+     "\n"
+     "Regular expression matching is done using the Google RE2 library."),
+    {"strings"}, "ExtractRegexOptions");
+
+void AddExtractRegex(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("extract_regex", Arity::Unary(),
+                                               &extract_regex_doc);
+  using t32 = ExtractRegex<StringType>;
+  using t64 = ExtractRegex<LargeStringType>;
+  OutputType out_ty(ResolveExtractRegexOutput);
+  ScalarKernel kernel;
+
+  // Null values will be computed based on regex match or not
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.signature.reset(new KernelSignature({utf8()}, out_ty));
+  kernel.exec = t32::Exec;
+  kernel.init = t32::State::Init;
+  DCHECK_OK(func->AddKernel(kernel));
+  kernel.signature.reset(new KernelSignature({large_utf8()}, out_ty));
+  kernel.exec = t64::Exec;
+  kernel.init = t64::State::Init;
+  DCHECK_OK(func->AddKernel(kernel));
+
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif  // ARROW_WITH_RE2
+
 // ----------------------------------------------------------------------
 // strptime string parsing
 
@@ -2153,6 +2378,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   MakeUnaryStringBatchKernelWithState<ReplaceSubStringRegex>(
       "replace_substring_regex", registry, &replace_substring_regex_doc,
       MemAllocation::NO_PREALLOCATE);
+  AddExtractRegex(registry);
 #endif
   AddStrptime(registry);
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 2dd0a4d8c74..577493913b5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -17,6 +17,7 @@
 
 #include <memory>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -366,6 +367,26 @@ TYPED_TEST(TestStringKernels, MatchSubstringRegex) {
   MatchSubstringOptions options_plus{"a+b"};
   this->CheckUnary("match_substring_regex", R"(["aacb", "aab", "dab", "caaab", "b", ""])",
                    boolean(), "[false, true, true, true, false, false]", &options_plus);
+
+  // Unicode character semantics
+  // "\pL" means: unicode category "letter"
+  // (re2 interprets "\w" as ASCII-only: https://github.com/google/re2/wiki/Syntax)
+  MatchSubstringOptions options_unicode{"^\\pL+$"};
+  this->CheckUnary("match_substring_regex", R"(["été", "ß", "€", ""])", boolean(),
+                   "[true, true, false, false]", &options_unicode);
+}
+
+TYPED_TEST(TestStringKernels, MatchSubstringRegexNoOptions) {
+  Datum input = ArrayFromJSON(this->type(), "[]");
+  ASSERT_RAISES(Invalid, CallFunction("match_substring_regex", {input}));
+}
+
+TYPED_TEST(TestStringKernels, MatchSubstringRegexInvalid) {
+  Datum input = ArrayFromJSON(this->type(), "[null]");
+  MatchSubstringOptions options{"invalid["};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
+      CallFunction("match_substring_regex", {input}, &options));
 }
 #endif
 
@@ -495,6 +516,63 @@ TYPED_TEST(TestStringKernels, ReplaceSubstringRegexNoOptions) {
   Datum input = ArrayFromJSON(this->type(), "[]");
   ASSERT_RAISES(Invalid, CallFunction("replace_substring_regex", {input}));
 }
+
+TYPED_TEST(TestStringKernels, ReplaceSubstringRegexInvalid) {
+  Datum input = ArrayFromJSON(this->type(), R"(["foo"])");
+  ReplaceSubstringOptions options{"invalid[", ""};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
+      CallFunction("replace_substring_regex", {input}, &options));
+
+  // Capture group number out of range
+  options = ReplaceSubstringOptions{"(.)", "\\9"};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid replacement string"),
+      CallFunction("replace_substring_regex", {input}, &options));
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegex) {
+  ExtractRegexOptions options{"(?P<letter>[ab])(?P<digit>\\d)"};
+  auto type = struct_({field("letter", this->type()), field("digit", this->type())});
+  this->CheckUnary("extract_regex", R"([])", type, R"([])", &options);
+  this->CheckUnary(
+      "extract_regex", R"(["a1", "b2", "c3", null])", type,
+      R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "2"}, null, null])",
+      &options);
+  this->CheckUnary("extract_regex", R"(["a1", "b2"])", type,
+                   R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "2"}])",
+                   &options);
+  this->CheckUnary("extract_regex", R"(["a1", "zb3z"])", type,
+                   R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "3"}])",
+                   &options);
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegexNoCapture) {
+  // XXX Should we accept this or is it a user error?
+  ExtractRegexOptions options{"foo"};
+  auto type = struct_({});
+  this->CheckUnary("extract_regex", R"(["oofoo", "bar", null])", type,
+                   R"([{}, null, null])", &options);
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegexNoOptions) {
+  Datum input = ArrayFromJSON(this->type(), "[]");
+  ASSERT_RAISES(Invalid, CallFunction("extract_regex", {input}));
+}
+
+TYPED_TEST(TestStringKernels, ExtractRegexInvalid) {
+  Datum input = ArrayFromJSON(this->type(), "[]");
+  ExtractRegexOptions options{"invalid["};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
+      CallFunction("extract_regex", {input}, &options));
+
+  options = ExtractRegexOptions{"(.)"};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("Regular expression contains unnamed groups"),
+      CallFunction("extract_regex", {input}, &options));
+}
+
 #endif
 
 TYPED_TEST(TestStringKernels, Strptime) {
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index a8a0c8b95f3..11d5e76d342 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -118,7 +118,7 @@ void CheckScalar(std::string func_name, const ArrayVector& inputs,
                             expected->Slice(2 * slice_length), options);
   }
 
-  // should also work with an empty slice
+  // Should also work with an empty slice
   CheckScalarNonRecursive(func_name, SliceAll(inputs, 0, 0), expected->Slice(0, 0),
                           options);
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 92ac8886f87..fb50f8cef65 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -580,6 +580,21 @@ when a positive ``max_splits`` is given.
   as separator.
 
 
+String extraction
+~~~~~~~~~~~~~~~~~
+
++--------------------+------------+------------------------------------+---------------+----------------------------------------+
+| Function name      | Arity      | Input types                        | Output type   | Options class                          |
++====================+============+====================================+===============+========================================+
+| extract_regex      | Unary      | String-like                        | Struct (1)    | :struct:`ExtractRegexOptions`          |
++--------------------+------------+------------------------------------+---------------+----------------------------------------+
+
+* \(1) Extract substrings defined by a regular expression using the Google RE2
+  library.  The output struct field names refer to the named capture groups,
+  e.g. 'letter' and 'digit' for the regular expression
+  ``(?P<letter>[ab])(?P<digit>\\d)``.
+
+
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 1515bdcfd36..3af485343f2 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -704,6 +704,23 @@ class ReplaceSubstringOptions(_ReplaceSubstringOptions):
         self._set_options(pattern, replacement, max_replacements)
 
 
+cdef class _ExtractRegexOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CExtractRegexOptions] extract_regex_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.extract_regex_options.get()
+
+    def _set_options(self, pattern):
+        self.extract_regex_options.reset(
+            new CExtractRegexOptions(tobytes(pattern)))
+
+
+class ExtractRegexOptions(_ExtractRegexOptions):
+    def __init__(self, pattern):
+        self._set_options(pattern)
+
+
 cdef class _FilterOptions(FunctionOptions):
     cdef:
         unique_ptr[CFilterOptions] filter_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 3928b9cb904..ec38710b023 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -33,6 +33,7 @@
     CastOptions,
     CountOptions,
     DictionaryEncodeOptions,
+    ExtractRegexOptions,
     FilterOptions,
     MatchSubstringOptions,
     MinMaxOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index ebdcd08334c..45f7c4fee94 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1823,6 +1823,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_string replacement
         int64_t max_replacements
 
+    cdef cppclass CExtractRegexOptions \
+            "arrow::compute::ExtractRegexOptions"(CFunctionOptions):
+        CExtractRegexOptions(c_string pattern)
+        c_string pattern
+
     cdef cppclass CCastOptions" arrow::compute::CastOptions"(CFunctionOptions):
         CCastOptions()
         CCastOptions(c_bool safe)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 94a6189f41c..5ad0d2db91b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -598,6 +598,13 @@ def test_replace_regex():
     assert ar.tolist() == ['f00', 'm00d', None]
 
 
+def test_extract_regex():
+    ar = pa.array(['a1', 'zb2z'])
+    struct = pc.extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
+    assert struct.tolist() == [{'letter': 'a', 'digit': '1'}, {
+        'letter': 'b', 'digit': '2'}]
+
+
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_take(ty, values):
     arr = pa.array(values, type=ty)

From c71becf4d799747b2a90baf715512628dbcf0b54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Wed, 21 Apr 2021 17:17:58 +0200
Subject: [PATCH 104/719] ARROW-12494: [C++] ORC adapter fails to compile on
 GCC 4.8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10121 from kszucs/orc-centos7

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/adapters/orc/adapter.cc      | 2 +-
 cpp/src/arrow/adapters/orc/adapter_util.cc | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index e9dfbd0a8ec..c67bc3c65b9 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -578,7 +578,7 @@ Result<std::unique_ptr<ORCFileWriter>> ORCFileWriter::Open(
       std::unique_ptr<ORCFileWriter>(new ORCFileWriter());
   Status status = result->impl_->Open(output_stream);
   RETURN_NOT_OK(status);
-  return result;
+  return std::move(result);
 }
 
 Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); }
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.cc b/cpp/src/arrow/adapters/orc/adapter_util.cc
index f4f974585e2..f956a6f6217 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_util.cc
@@ -884,7 +884,7 @@ Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const DataType& type) {
         ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
         out_type->addStructField(field_name, std::move(orc_subtype));
       }
-      return out_type;
+      return std::move(out_type);
     }
     case Type::type::MAP: {
       std::shared_ptr<DataType> key_arrow_type =
@@ -907,7 +907,7 @@ Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const DataType& type) {
         ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
         out_type->addUnionChild(std::move(orc_subtype));
       }
-      return out_type;
+      return std::move(out_type);
     }
     default: {
       return Status::NotImplemented("Unknown or unsupported Arrow type: ",
@@ -1061,7 +1061,7 @@ Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema) {
     ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
     out_type->addStructField(field_name, std::move(orc_subtype));
   }
-  return out_type;
+  return std::move(out_type);
 }
 
 }  // namespace orc

From 7a0cb10834d637a6bf56b803afacc1dbc844fcd1 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 21 Apr 2021 17:50:08 +0200
Subject: [PATCH 105/719] ARROW-12495: [C++] Fix NumPyBuffer::mutable_data()

Closes #10122 from pitrou/ARROW-12495-numpy-buffer-mutable-data

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/python/numpy_convert.cc |  4 +-
 cpp/src/arrow/python/python_test.cc   | 63 +++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 6a1440c33b0..11ce0e50309 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -44,12 +44,14 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
 
   if (PyArray_Check(ao)) {
     PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
-    data_ = reinterpret_cast<const uint8_t*>(PyArray_DATA(ndarray));
+    auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
+    data_ = const_cast<const uint8_t*>(ptr);
     size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
     capacity_ = size_;
 
     if (PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE) {
       is_mutable_ = true;
+      mutable_data_ = ptr;
     }
   }
 }
diff --git a/cpp/src/arrow/python/python_test.cc b/cpp/src/arrow/python/python_test.cc
index 33e0ee9b1c9..19eb86a09c6 100644
--- a/cpp/src/arrow/python/python_test.cc
+++ b/cpp/src/arrow/python/python_test.cc
@@ -33,6 +33,8 @@
 #include "arrow/python/arrow_to_pandas.h"
 #include "arrow/python/decimal.h"
 #include "arrow/python/helpers.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/numpy_interop.h"
 #include "arrow/python/python_to_arrow.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
@@ -187,6 +189,67 @@ TEST(PyBuffer, InvalidInputObject) {
   ASSERT_EQ(old_refcnt, Py_REFCNT(input));
 }
 
+// Because of how it is declared, the Numpy C API instance initialized
+// within libarrow_python.dll may not be visible in this test under Windows
+// ("unresolved external symbol arrow_ARRAY_API referenced").
+#ifndef _WIN32
+TEST(PyBuffer, NumpyArray) {
+  const npy_intp dims[1] = {10};
+
+  OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT));
+  PyObject* arr = arr_ref.obj();
+  ASSERT_NE(arr, nullptr);
+  auto old_refcnt = Py_REFCNT(arr);
+
+  ASSERT_OK_AND_ASSIGN(auto buf, PyBuffer::FromPyObject(arr));
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_TRUE(buf->is_mutable());
+  ASSERT_EQ(buf->mutable_data(), buf->data());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  // Read-only
+  PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(arr), NPY_ARRAY_WRITEABLE);
+  ASSERT_OK_AND_ASSIGN(buf, PyBuffer::FromPyObject(arr));
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_FALSE(buf->is_mutable());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+}
+
+TEST(NumPyBuffer, NumpyArray) {
+  const npy_intp dims[1] = {10};
+
+  OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT));
+  PyObject* arr = arr_ref.obj();
+  ASSERT_NE(arr, nullptr);
+  auto old_refcnt = Py_REFCNT(arr);
+
+  auto buf = std::make_shared<NumPyBuffer>(arr);
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_TRUE(buf->is_mutable());
+  ASSERT_EQ(buf->mutable_data(), buf->data());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  // Read-only
+  PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(arr), NPY_ARRAY_WRITEABLE);
+  buf = std::make_shared<NumPyBuffer>(arr);
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_FALSE(buf->is_mutable());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+}
+#endif
+
 class DecimalTest : public ::testing::Test {
  public:
   DecimalTest() : lock_(), decimal_constructor_() {

From 0756865c3335fd5b0173480ab6297ccc1e38a0be Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 21 Apr 2021 21:08:03 +0200
Subject: [PATCH 106/719] ARROW-11660: [C++] Move RecordBatch::SelectColumns
 method from R to C++ library

Closes #10100 from thisisnic/ARROW-11660-RecordBatchSelectColumns

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/record_batch.cc      | 21 +++++++++++++++++++++
 cpp/src/arrow/record_batch.h       |  4 ++++
 cpp/src/arrow/record_batch_test.cc | 28 ++++++++++++++++++++++++++++
 r/src/arrowExports.cpp             |  4 ++--
 r/src/recordbatch.cpp              | 21 ++-------------------
 5 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index cb8b77e2be8..2e3e0f263ec 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -253,6 +253,27 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other) const {
   return true;
 }
 
+Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
+    const std::vector<int>& indices) const {
+  int n = static_cast<int>(indices.size());
+
+  FieldVector fields(n);
+  ArrayVector columns(n);
+
+  for (int i = 0; i < n; i++) {
+    int pos = indices[i];
+    if (pos < 0 || pos > num_columns() - 1) {
+      return Status::Invalid("Invalid column index ", pos, " to select columns.");
+    }
+    fields[i] = schema()->field(pos);
+    columns[i] = column(pos);
+  }
+
+  auto new_schema =
+      std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
+  return RecordBatch::Make(new_schema, num_rows(), std::move(columns));
+}
+
 std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) const {
   return Slice(offset, this->num_rows() - offset);
 }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 4650e806360..59c6d5568e9 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -166,6 +166,10 @@ class ARROW_EXPORT RecordBatch {
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
 
+  /// \brief Return new record batch with specified columns
+  Result<std::shared_ptr<RecordBatch>> SelectColumns(
+      const std::vector<int>& indices) const;
+
   /// \brief Perform cheap validation checks to determine obvious inconsistencies
   /// within the record batch's schema and internal data.
   ///
diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index 73b1393bba9..9de57f183ef 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -255,6 +255,34 @@ TEST_F(TestRecordBatch, RemoveColumn) {
   AssertBatchesEqual(*new_batch, *batch4);
 }
 
+TEST_F(TestRecordBatch, SelectColumns) {
+  const int length = 10;
+
+  auto field1 = field("f1", int32());
+  auto field2 = field("f2", uint8());
+  auto field3 = field("f3", int16());
+
+  auto schema1 = ::arrow::schema({field1, field2, field3});
+
+  auto array1 = MakeRandomArray<Int32Array>(length);
+  auto array2 = MakeRandomArray<UInt8Array>(length);
+  auto array3 = MakeRandomArray<Int16Array>(length);
+
+  auto batch = RecordBatch::Make(schema1, length, {array1, array2, array3});
+
+  ASSERT_OK_AND_ASSIGN(auto subset, batch->SelectColumns({0, 2}));
+  ASSERT_OK(subset->ValidateFull());
+
+  auto expected_schema = ::arrow::schema({schema1->field(0), schema1->field(2)});
+  auto expected =
+      RecordBatch::Make(expected_schema, length, {batch->column(0), batch->column(2)});
+  ASSERT_TRUE(subset->Equals(*expected));
+
+  // Out of bounds indices
+  ASSERT_RAISES(Invalid, batch->SelectColumns({0, 3}));
+  ASSERT_RAISES(Invalid, batch->SelectColumns({-1}));
+}
+
 TEST_F(TestRecordBatch, RemoveColumnEmpty) {
   const int length = 10;
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index d68aaf70251..c5ef6343ced 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -5277,11 +5277,11 @@ extern "C" SEXP _arrow_RecordBatch__GetColumnByName(SEXP batch_sexp, SEXP name_s
 
 // recordbatch.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::RecordBatch> RecordBatch__SelectColumns(const std::shared_ptr<arrow::RecordBatch>& batch, cpp11::integers indices);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__SelectColumns(const std::shared_ptr<arrow::RecordBatch>& batch, const std::vector<int>& indices);
 extern "C" SEXP _arrow_RecordBatch__SelectColumns(SEXP batch_sexp, SEXP indices_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<cpp11::integers>::type indices(indices_sexp);
+	arrow::r::Input<const std::vector<int>&>::type indices(indices_sexp);
 	return cpp11::as_sexp(RecordBatch__SelectColumns(batch, indices));
 END_CPP11
 }
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index 9628d464874..81e20e9ec9a 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -93,25 +93,8 @@ std::shared_ptr<arrow::Array> RecordBatch__GetColumnByName(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__SelectColumns(
-    const std::shared_ptr<arrow::RecordBatch>& batch, cpp11::integers indices) {
-  R_xlen_t n = indices.size();
-  auto nrows = batch->num_rows();
-  auto ncols = batch->num_columns();
-
-  std::vector<std::shared_ptr<arrow::Field>> fields(n);
-  std::vector<std::shared_ptr<arrow::Array>> columns(n);
-
-  for (R_xlen_t i = 0; i < n; i++) {
-    int pos = indices[i];
-    if (pos < 0 || pos > ncols - 1) {
-      cpp11::stop("Invalid column index %d to select columns.", pos);
-    }
-    fields[i] = batch->schema()->field(pos);
-    columns[i] = batch->column(pos);
-  }
-
-  auto schema = std::make_shared<arrow::Schema>(std::move(fields));
-  return arrow::RecordBatch::Make(schema, nrows, columns);
+    const std::shared_ptr<arrow::RecordBatch>& batch, const std::vector<int>& indices) {
+  return ValueOrStop(batch->SelectColumns(indices));
 }
 
 // [[arrow::export]]

From 8444689cd9df91f6ed0a669a938925b998fd75fe Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 21 Apr 2021 12:56:18 -0700
Subject: [PATCH 107/719] ARROW-12185: [R] Bindings for any, all

Closes #10032 from thisisnic/ARROW-12185-any_all

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NAMESPACE                               |  2 ++
 r/R/compute.R                             | 29 ++++++++++++++++++++++
 r/tests/testthat/test-compute-aggregate.R | 30 +++++++++++++++++++++++
 3 files changed, 61 insertions(+)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 567353876ca..4cc8df46d33 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -22,7 +22,9 @@ S3method("names<-",ArrowTabular)
 S3method(Ops,ArrowDatum)
 S3method(Ops,Expression)
 S3method(Ops,array_expression)
+S3method(all,ArrowDatum)
 S3method(all,equal.ArrowObject)
+S3method(any,ArrowDatum)
 S3method(as.character,ArrowDatum)
 S3method(as.character,FileFormat)
 S3method(as.character,FragmentScanOptions)
diff --git a/r/R/compute.R b/r/R/compute.R
index 1b79d76f037..0641bf1615c 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -186,6 +186,35 @@ unique.ArrowDatum <- function(x, incomparables = FALSE, ...) {
   call_function("unique", x)
 }
 
+#' @export
+any.ArrowDatum <- function(..., na.rm = FALSE){
+  
+  a <- collect_arrays_from_dots(list(...))
+  result <- call_function("any", a)
+
+  if (!na.rm && a$null_count > 0 && !as.vector(result)) {
+    # Three-valued logic: with na.rm = FALSE, any(c(TRUE, NA)) returns TRUE but any(c(FALSE, NA)) returns NA
+    # TODO: C++ library should take na.rm for any/all (like ARROW-9054)
+    Scalar$create(NA)
+  } else {
+    result
+  }
+}
+
+#' @export
+all.ArrowDatum <- function(..., na.rm = FALSE){
+  
+  a <- collect_arrays_from_dots(list(...))
+  result <- call_function("all", a)
+  
+  if (!na.rm && a$null_count > 0 && as.vector(result)) {
+    # See comment above in any() about three-valued logic
+    Scalar$create(NA)
+  } else {
+    result
+  }
+}
+
 #' `match` and `%in%` for Arrow objects
 #'
 #' `base::match()` is not a generic, so we can't just define Arrow methods for
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 77010579d78..0621b7779c7 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -351,3 +351,33 @@ test_that("value_counts", {
   expect_identical(as.data.frame(value_counts(a)), result_df)
   expect_identical(as.vector(value_counts(a)$counts), result_df$counts)
 })
+
+test_that("any.Array and any.ChunkedArray", {
+  
+  data <- c(1:10, NA, NA)
+
+  expect_vector_equal(any(input > 5), data)
+  expect_vector_equal(any(input < 1), data)
+  expect_vector_equal(any(input < 1, na.rm = TRUE), data)
+  
+  data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
+  
+  expect_vector_equal(any(input), data_logical)
+  expect_vector_equal(any(input, na.rm = TRUE), data_logical)
+  
+})
+
+test_that("all.Array and all.ChunkedArray", {
+
+  data <- c(1:10, NA, NA)
+  
+  expect_vector_equal(all(input > 5), data)
+  expect_vector_equal(all(input < 11), data)
+  expect_vector_equal(all(input < 11, na.rm = TRUE), data)
+  
+  data_logical <- c(TRUE, TRUE, NA)
+  
+  expect_vector_equal(all(input), data_logical)
+  expect_vector_equal(all(input, na.rm = TRUE), data_logical)
+  
+})

From 09ad229b625d9c9b7a5a0be5ae28bfeb908427a4 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 21 Apr 2021 13:00:57 -0700
Subject: [PATCH 108/719] ARROW-12184: [R] Bindings for na.fail, na.omit,
 na.exclude, na.pass

Closes #10056 from thisisnic/ARROW-12184-na.fail

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NAMESPACE                           | 10 ++++
 r/R/arrow-datum.R                     | 16 +++++
 r/R/arrow-package.R                   |  2 +-
 r/R/arrow-tabular.R                   | 20 +++++++
 r/tests/testthat/helper-expectation.R | 86 ++++++++++++++++++++++++---
 r/tests/testthat/test-Array.R         | 44 +++++++-------
 r/tests/testthat/test-RecordBatch.R   | 22 +++----
 r/tests/testthat/test-Table.R         | 24 ++++----
 r/tests/testthat/test-chunked-array.R | 38 ++++++------
 r/tests/testthat/test-compute-arith.R |  2 +-
 r/tests/testthat/test-na-omit.R       | 73 +++++++++++++++++++++++
 11 files changed, 262 insertions(+), 75 deletions(-)
 create mode 100644 r/tests/testthat/test-na-omit.R

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 4cc8df46d33..117e3de5c22 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -64,6 +64,12 @@ S3method(max,ArrowDatum)
 S3method(mean,ArrowDatum)
 S3method(median,ArrowDatum)
 S3method(min,ArrowDatum)
+S3method(na.exclude,ArrowDatum)
+S3method(na.exclude,ArrowTabular)
+S3method(na.fail,ArrowDatum)
+S3method(na.fail,ArrowTabular)
+S3method(na.omit,ArrowDatum)
+S3method(na.omit,ArrowTabular)
 S3method(names,Dataset)
 S3method(names,FeatherReader)
 S3method(names,RecordBatch)
@@ -320,6 +326,10 @@ importFrom(rlang,set_names)
 importFrom(rlang,syms)
 importFrom(rlang,warn)
 importFrom(stats,median)
+importFrom(stats,na.exclude)
+importFrom(stats,na.fail)
+importFrom(stats,na.omit)
+importFrom(stats,na.pass)
 importFrom(stats,quantile)
 importFrom(tidyselect,contains)
 importFrom(tidyselect,ends_with)
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index dd43307c9cc..4edcb200ea0 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -46,6 +46,22 @@ as.vector.ArrowDatum <- function(x, mode) {
   )
 }
 
+#' @export
+na.omit.ArrowDatum <- function(object, ...){
+  object$Filter(!is.na(object))
+}
+
+#' @export
+na.exclude.ArrowDatum <- na.omit.ArrowDatum
+
+#' @export
+na.fail.ArrowDatum <- function(object, ...){
+  if (object$null_count > 0) {
+    stop("missing values in object", call. = FALSE)
+  }
+  object
+}
+
 filter_rows <- function(x, i, keep_na = TRUE, ...) {
   # General purpose function for [ row subsetting with R semantics
   # Based on the input for `i`, calls x$Filter, x$Slice, or x$Take
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 30d59491d79..51f4987484c 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#' @importFrom stats quantile median
+#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
 #' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep
 #' @importFrom assertthat assert_that is.string
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index f32111688a2..bba5ad5f5e6 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -211,6 +211,26 @@ head.ArrowTabular <- head.ArrowDatum
 #' @export
 tail.ArrowTabular <- tail.ArrowDatum
 
+#' @export
+na.fail.ArrowTabular <- function(object, ...){
+  for (col in seq_len(object$num_columns)) {
+    if (object$column(col - 1L)$null_count > 0) {
+      stop("missing values in object", call. = FALSE)
+    }
+  }
+  object
+}
+
+#' @export
+na.omit.ArrowTabular <- function(object, ...){
+  not_na <- map(object$columns, ~build_array_expression("is_valid", .x))
+  not_na_agg <- Reduce("&", not_na)
+  object$Filter(eval_array_expression(not_na_agg))
+}
+
+#' @export
+na.exclude.ArrowTabular <- na.omit.ArrowTabular 
+
 ToString_tabular <- function(x, ...) {
   # Generic to work with both RecordBatch and Table
   sch <- unlist(strsplit(x$schema$ToString(), "\n"))
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 2ebd44f7bba..595b183e555 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -15,8 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-expect_vector <- function(x, y, ...) {
-  expect_equal(as.vector(x), y, ...)
+expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) {
+  expect_fun <- ifelse(ignore_attr, expect_equivalent, expect_equal)
+  expect_fun(as.vector(x), y, ...)
 }
 
 expect_data_frame <- function(x, y, ...) {
@@ -155,10 +156,10 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
                                vec,  # A vector as reference, will make Array/ChunkedArray with
                                skip_array = NULL, # Msg, if should skip Array test
                                skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
+                               ignore_attr = FALSE, # ignore attributes?
                                ...) {
   expr <- rlang::enquo(expr)
   expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec)))
-
   skip_msg <- NULL
 
   if (is.null(skip_array)) {
@@ -166,21 +167,20 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
       expr,
       rlang::new_data_mask(rlang::env(input = Array$create(vec)))
     )
-    expect_vector(via_array, expected, ...)
+    expect_as_vector(via_array, expected, ignore_attr, ...)
   } else {
     skip_msg <- c(skip_msg, skip_array)
   }
 
   if (is.null(skip_chunked_array)) {
     # split input vector into two to exercise ChunkedArray with >1 chunk
-    vec_split <- length(vec) %/% 2
-    vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
-    vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+    split_vector <- split_vector_as_list(vec)
+    
     via_chunked <- rlang::eval_tidy(
       expr,
-      rlang::new_data_mask(rlang::env(input = ChunkedArray$create(vec1, vec2)))
+      rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
     )
-    expect_vector(via_chunked, expected, ...)
+    expect_as_vector(via_chunked, expected, ignore_attr, ...)
   } else {
     skip_msg <- c(skip_msg, skip_chunked_array)
   }
@@ -189,3 +189,71 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
     skip(paste(skip_msg, collpase = "\n"))
   }
 }
+
+expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input
+                                vec,  # A vector as reference, will make Array/ChunkedArray with
+                                skip_array = NULL, # Msg, if should skip Array test
+                                skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
+                                ...) {
+  
+  expr <- rlang::enquo(expr)
+  
+  msg <- tryCatch(
+    rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))),
+    error = function (e) {
+      msg <- conditionMessage(e)
+      
+      pattern <- i18ize_error_messages()
+      
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
+  )
+  
+  expect_true(identical(typeof(msg), "character"), label = "vector errored")
+  
+  skip_msg <- NULL
+  
+  if (is.null(skip_array)) {
+    
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = Array$create(vec)))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_array)
+  }
+  
+  if (is.null(skip_chunked_array)) {
+    # split input vector into two to exercise ChunkedArray with >1 chunk
+    split_vector <- split_vector_as_list(vec)
+    
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_chunked_array)
+  }
+  
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collpase = "\n"))
+  }
+}
+
+split_vector_as_list <- function(vec){
+  vec_split <- length(vec) %/% 2
+  vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
+  vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+  list(vec1, vec2)
+}
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index b4fa8296d3a..e064f81cdfa 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -94,13 +94,13 @@ test_that("Slice() and RangeEquals()", {
   y <- x$Slice(10)
   expect_equal(y$type, int32())
   expect_equal(length(y), 15L)
-  expect_vector(y, c(101:110, 201:205))
+  expect_as_vector(y, c(101:110, 201:205))
   expect_true(x$RangeEquals(y, 10, 24))
   expect_false(x$RangeEquals(y, 9, 23))
   expect_false(x$RangeEquals(y, 11, 24))
 
   z <- x$Slice(10, 5)
-  expect_vector(z, c(101:105))
+  expect_as_vector(z, c(101:105))
   expect_true(x$RangeEquals(z, 10, 15, 0))
 
   # Input validation
@@ -708,12 +708,12 @@ test_that("Array$Take()", {
 test_that("[ method on Array", {
   vec <- 11:20
   a <- Array$create(vec)
-  expect_vector(a[5:9], vec[5:9])
-  expect_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
-  expect_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
-  expect_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
-  expect_vector(a[-4], vec[-4])
-  expect_vector(a[-1], vec[-1])
+  expect_as_vector(a[5:9], vec[5:9])
+  expect_as_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
+  expect_as_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
+  expect_as_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
+  expect_as_vector(a[-4], vec[-4])
+  expect_as_vector(a[-1], vec[-1])
 })
 
 test_that("[ accepts Arrays and otherwise handles bad input", {
@@ -724,12 +724,12 @@ test_that("[ accepts Arrays and otherwise handles bad input", {
     a[Array$create(ind)],
     "Cannot extract rows with an Array of type double"
   )
-  expect_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
-  expect_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
-  expect_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
+  expect_as_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
+  expect_as_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
+  expect_as_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
 
   filt <- seq_along(vec) %in% ind
-  expect_vector(a[Array$create(filt)], vec[filt])
+  expect_as_vector(a[Array$create(filt)], vec[filt])
 
   expect_error(
     a["string"],
@@ -754,21 +754,21 @@ test_that("[ accepts Expressions", {
   vec <- 11:20
   a <- Array$create(vec)
   b <- Array$create(1:10)
-  expect_vector(a[b > 4], vec[5:10])
+  expect_as_vector(a[b > 4], vec[5:10])
 })
 
 test_that("Array head/tail", {
   vec <- 11:20
   a <- Array$create(vec)
-  expect_vector(head(a), head(vec))
-  expect_vector(head(a, 4), head(vec, 4))
-  expect_vector(head(a, 40), head(vec, 40))
-  expect_vector(head(a, -4), head(vec, -4))
-  expect_vector(head(a, -40), head(vec, -40))
-  expect_vector(tail(a), tail(vec))
-  expect_vector(tail(a, 4), tail(vec, 4))
-  expect_vector(tail(a, 40), tail(vec, 40))
-  expect_vector(tail(a, -40), tail(vec, -40))
+  expect_as_vector(head(a), head(vec))
+  expect_as_vector(head(a, 4), head(vec, 4))
+  expect_as_vector(head(a, 40), head(vec, 40))
+  expect_as_vector(head(a, -4), head(vec, -4))
+  expect_as_vector(head(a, -40), head(vec, -40))
+  expect_as_vector(tail(a), tail(vec))
+  expect_as_vector(tail(a, 4), tail(vec, 4))
+  expect_as_vector(tail(a, 40), tail(vec, 40))
+  expect_as_vector(tail(a, -40), tail(vec, -40))
 })
 
 test_that("Dictionary array: create from arrays, not factor", {
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index ff7f17eca6e..c3797914741 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -155,9 +155,9 @@ test_that("[ on RecordBatch", {
 })
 
 test_that("[[ and $ on RecordBatch", {
-  expect_vector(batch[["int"]], tbl$int)
-  expect_vector(batch$int, tbl$int)
-  expect_vector(batch[[4]], tbl$chr)
+  expect_as_vector(batch[["int"]], tbl$int)
+  expect_as_vector(batch$int, tbl$int)
+  expect_as_vector(batch[[4]], tbl$chr)
   expect_null(batch$qwerty)
   expect_null(batch[["asdf"]])
   expect_error(batch[[c(4, 3)]])
@@ -190,16 +190,16 @@ test_that("[[<- assignment", {
 
   # can replace a column by index
   batch[[2]] <- as.numeric(10:1)
-  expect_vector(batch[[2]], as.numeric(10:1))
+  expect_as_vector(batch[[2]], as.numeric(10:1))
 
   # can add a column by index
   batch[[5]] <- as.numeric(10:1)
-  expect_vector(batch[[5]], as.numeric(10:1))
-  expect_vector(batch[["5"]], as.numeric(10:1))
+  expect_as_vector(batch[[5]], as.numeric(10:1))
+  expect_as_vector(batch[["5"]], as.numeric(10:1))
 
   # can replace a column
   batch[["int"]] <- 10:1
-  expect_vector(batch[["int"]], 10:1)
+  expect_as_vector(batch[["int"]], 10:1)
 
   # can use $
   batch$new <- NULL
@@ -207,11 +207,11 @@ test_that("[[<- assignment", {
   expect_identical(dim(batch), c(10L, 4L))
 
   batch$int <- 1:10
-  expect_vector(batch$int, 1:10)
+  expect_as_vector(batch$int, 1:10)
 
   # recycling
   batch[["atom"]] <- 1L
-  expect_vector(batch[["atom"]], rep(1L, 10))
+  expect_as_vector(batch[["atom"]], rep(1L, 10))
 
   expect_error(
     batch[["atom"]] <- 1:6,
@@ -221,7 +221,7 @@ test_that("[[<- assignment", {
   # assign Arrow array
   array <- Array$create(c(10:1))
   batch$array <- array
-  expect_vector(batch$array, 10:1)
+  expect_as_vector(batch$array, 10:1)
 
   # nonsense indexes
   expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
@@ -498,4 +498,4 @@ test_that("Handling string data with embedded nuls", {
       fixed = TRUE
     )
   })
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 86bda393e2d..3788d416426 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -105,7 +105,7 @@ test_that("[, [[, $ for Table", {
   expect_data_frame(tab[6:7,], tbl[6:7,])
   expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
   expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
-  expect_vector(tab[, "chr", drop = TRUE], tbl$chr)
+  expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr)
   # Take within a single chunk
   expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
   expect_data_frame(tab[rep(c(FALSE, TRUE), 5),], tbl[c(2, 4, 6, 8, 10),])
@@ -123,9 +123,9 @@ test_that("[, [[, $ for Table", {
   # Expression
   expect_data_frame(tab[tab$int > 6,], tbl[tbl$int > 6,])
 
-  expect_vector(tab[["int"]], tbl$int)
-  expect_vector(tab$int, tbl$int)
-  expect_vector(tab[[4]], tbl$chr)
+  expect_as_vector(tab[["int"]], tbl$int)
+  expect_as_vector(tab$int, tbl$int)
+  expect_as_vector(tab[[4]], tbl$chr)
   expect_null(tab$qwerty)
   expect_null(tab[["asdf"]])
   # List-like column slicing
@@ -173,16 +173,16 @@ test_that("[[<- assignment", {
 
   # can replace a column by index
   tab[[2]] <- as.numeric(10:1)
-  expect_vector(tab[[2]], as.numeric(10:1))
+  expect_as_vector(tab[[2]], as.numeric(10:1))
 
   # can add a column by index
   tab[[5]] <- as.numeric(10:1)
-  expect_vector(tab[[5]], as.numeric(10:1))
-  expect_vector(tab[["5"]], as.numeric(10:1))
+  expect_as_vector(tab[[5]], as.numeric(10:1))
+  expect_as_vector(tab[["5"]], as.numeric(10:1))
 
   # can replace a column
   tab[["int"]] <- 10:1
-  expect_vector(tab[["int"]], 10:1)
+  expect_as_vector(tab[["int"]], 10:1)
 
   # can use $
   tab$new <- NULL
@@ -190,11 +190,11 @@ test_that("[[<- assignment", {
   expect_identical(dim(tab), c(10L, 4L))
 
   tab$int <- 1:10
-  expect_vector(tab$int, 1:10)
+  expect_as_vector(tab$int, 1:10)
 
   # recycling
   tab[["atom"]] <- 1L
-  expect_vector(tab[["atom"]], rep(1L, 10))
+  expect_as_vector(tab[["atom"]], rep(1L, 10))
 
   expect_error(
     tab[["atom"]] <- 1:6,
@@ -204,10 +204,10 @@ test_that("[[<- assignment", {
   # assign Arrow array and chunked_array
   array <- Array$create(c(10:1))
   tab$array <- array
-  expect_vector(tab$array, 10:1)
+  expect_as_vector(tab$array, 10:1)
 
   tab$chunked <- chunked_array(1:10)
-  expect_vector(tab$chunked, 1:10)
+  expect_as_vector(tab$chunked, 1:10)
 
   # nonsense indexes
   expect_error(tab[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index e72067a6d5f..f5b2dca2e44 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -312,30 +312,30 @@ test_that("[ ChunkedArray", {
   one_chunk <- chunked_array(2:11)
   x <- chunked_array(1:10, 31:40, 51:55)
   # Slice
-  expect_vector(x[8:12], c(8:10, 31:32))
+  expect_as_vector(x[8:12], c(8:10, 31:32))
   # Take from same chunk
-  expect_vector(x[c(11, 15, 12)], c(31, 35, 32))
+  expect_as_vector(x[c(11, 15, 12)], c(31, 35, 32))
   # Take from multiple chunks (calls Concatenate)
-  expect_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3))
+  expect_as_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3))
   # Take with Array (note these are 0-based)
   take1 <- Array$create(c(10L, 14L, 11L))
-  expect_vector(x[take1], c(31, 35, 32))
+  expect_as_vector(x[take1], c(31, 35, 32))
   # Take with ChunkedArray
   take2 <- ChunkedArray$create(c(10L, 14L), 11L)
-  expect_vector(x[take2], c(31, 35, 32))
+  expect_as_vector(x[take2], c(31, 35, 32))
 
   # Filter (with recycling)
-  expect_vector(
+  expect_as_vector(
     one_chunk[c(FALSE, TRUE, FALSE, FALSE, TRUE)],
     c(3, 6, 8, 11)
   )
   # Filter where both are 1-chunk
-  expect_vector(
+  expect_as_vector(
     one_chunk[ChunkedArray$create(rep(c(FALSE, TRUE, FALSE, FALSE, TRUE), 2))],
     c(3, 6, 8, 11)
   )
   # Filter multi-chunk with logical (-> Array)
-  expect_vector(
+  expect_as_vector(
     x[c(FALSE, TRUE, FALSE, FALSE, TRUE)],
     c(2, 5, 7, 10, 32, 35, 37, 40, 52, 55)
   )
@@ -343,7 +343,7 @@ test_that("[ ChunkedArray", {
   p1 <- c(FALSE, TRUE, FALSE, FALSE, TRUE)
   p2 <- c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE)
   filt <- ChunkedArray$create(p1, p2, p2)
-  expect_vector(
+  expect_as_vector(
     x[filt],
     c(2, 5, 6, 8, 9, 35, 36, 38, 39, 55)
   )
@@ -352,15 +352,15 @@ test_that("[ ChunkedArray", {
 test_that("ChunkedArray head/tail", {
   vec <- 11:20
   a <- ChunkedArray$create(11:15, 16:20)
-  expect_vector(head(a), head(vec))
-  expect_vector(head(a, 4), head(vec, 4))
-  expect_vector(head(a, 40), head(vec, 40))
-  expect_vector(head(a, -4), head(vec, -4))
-  expect_vector(head(a, -40), head(vec, -40))
-  expect_vector(tail(a), tail(vec))
-  expect_vector(tail(a, 4), tail(vec, 4))
-  expect_vector(tail(a, 40), tail(vec, 40))
-  expect_vector(tail(a, -40), tail(vec, -40))
+  expect_as_vector(head(a), head(vec))
+  expect_as_vector(head(a, 4), head(vec, 4))
+  expect_as_vector(head(a, 40), head(vec, 40))
+  expect_as_vector(head(a, -4), head(vec, -4))
+  expect_as_vector(head(a, -40), head(vec, -40))
+  expect_as_vector(tail(a), tail(vec))
+  expect_as_vector(tail(a, 4), tail(vec, 4))
+  expect_as_vector(tail(a, 40), tail(vec, 40))
+  expect_as_vector(tail(a, -40), tail(vec, -40))
 })
 
 test_that("ChunkedArray$Equals", {
@@ -410,4 +410,4 @@ test_that("Handling string data with embedded nuls", {
       fixed = TRUE
     )
   })
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 9d146fd04e6..0b6d8e8dd17 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -23,7 +23,7 @@ test_that("Addition", {
   expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
   expect_identical(as.vector(a + 4L), c(5:8, NA_integer_))
   expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
-  expect_vector(a + 4L, c(5:8, NA_integer_))
+  expect_as_vector(a + 4L, c(5:8, NA_integer_))
   expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
 
   a8 <- a$cast(int8())
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
new file mode 100644
index 00000000000..fd1372fdc5d
--- /dev/null
+++ b/r/tests/testthat/test-na-omit.R
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+data_no_na <- c(2:10)
+data_na <- c(data_no_na, NA_real_)
+scalar_na <- Scalar$create(NA)
+scalar_one <- Scalar$create(1)
+tbl <- Table$create(example_data)
+batch <- record_batch(example_data)
+
+test_that("na.fail on Scalar", {
+  expect_as_vector(na.fail(scalar_one), 1)
+  expect_error(na.fail(scalar_na), "missing values in object")
+})
+
+test_that("na.omit on Array and ChunkedArray", {
+  expect_vector_equal(na.omit(input), data_no_na)
+  expect_vector_equal(na.omit(input), data_na, ignore_attr=TRUE)
+})
+
+test_that("na.exclude on Array and ChunkedArray", {
+  expect_vector_equal(na.exclude(input), data_no_na)
+  expect_vector_equal(na.exclude(input), data_na, ignore_attr=TRUE)
+})
+
+test_that("na.fail on Array and ChunkedArray", {
+  expect_vector_equal(na.fail(input), data_no_na, ignore_attr=TRUE)
+  expect_vector_error(na.fail(input), data_na)
+})
+
+test_that("na.fail on Scalar", {
+  expect_error(na.fail(scalar_na), regexp = "missing values in object")
+  expect_as_vector(na.fail(scalar_one), na.fail(1))
+})
+
+test_that("na.omit on Table", {
+  expect_equivalent(as.data.frame(na.omit(tbl)), na.omit(example_data))
+})
+
+test_that("na.exclude on Table", {
+  expect_equivalent(as.data.frame(na.exclude(tbl)), na.exclude(example_data))
+})
+
+test_that("na.fail on Table", {
+  expect_error(na.fail(tbl), "missing values in object")
+})
+
+test_that("na.omit on RecordBatch", {
+  expect_equivalent(as.data.frame(na.omit(batch)), na.omit(example_data))
+})
+
+test_that("na.exclude on RecordBatch", {
+  expect_equivalent(as.data.frame(na.exclude(batch)), na.omit(example_data))
+})
+
+test_that("na.fail on RecordBatch", {
+  expect_error(na.fail(batch), "missing values in object")
+})
+

From f5bd2a5d3021473cfd5bbd52a1848c5f145f7c1f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 22 Apr 2021 14:52:43 +0900
Subject: [PATCH 109/719] ARROW-12501: [CI][Ruby] Remove needless workaround
 for MinGW build

Closes #10128 from kou/ci-msys2-remove-needless-workaround

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/msys2_system_upgrade.sh | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/ci/scripts/msys2_system_upgrade.sh b/ci/scripts/msys2_system_upgrade.sh
index aecd3089332..646428fbb7a 100755
--- a/ci/scripts/msys2_system_upgrade.sh
+++ b/ci/scripts/msys2_system_upgrade.sh
@@ -19,26 +19,6 @@
 
 set -eux
 
-# https://www.msys2.org/news/#2020-06-29-new-packagers
-msys2_repo_base_url=https://repo.msys2.org/msys
-# Mirror
-msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2
-msys2_keyring_pkg=msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-for suffix in "" ".sig"; do
-  curl \
-    --location \
-    --remote-name \
-    --show-error \
-    --silent \
-    ${msys2_repo_base_url}/x86_64/${msys2_keyring_pkg}${suffix}
-done
-pacman-key --verify ${msys2_keyring_pkg}.sig
-pacman \
-  --noconfirm \
-  --upgrade \
-  ${msys2_keyring_pkg}
-
-
 pacman \
   --noconfirm \
   --refresh \

From 29a9a3e57fde60f427b63dc4c45794cadad87ddc Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 22 Apr 2021 13:08:48 +0200
Subject: [PATCH 110/719] ARROW-10797: [C++] Vendor and use PCG random
 generator library

Closes #8879 from pitrou/ARROW-10797-pcg

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 LICENSE.txt                                   |   22 +
 .../arrow/compute/kernels/aggregate_test.cc   |   11 +-
 cpp/src/arrow/testing/random.cc               |    7 +-
 cpp/src/arrow/testing/util.cc                 |   11 +-
 cpp/src/arrow/util/pcg_random.h               |   31 +
 cpp/src/arrow/vendored/pcg/README.md          |   26 +
 cpp/src/arrow/vendored/pcg/pcg_extras.hpp     |  670 ++++++
 cpp/src/arrow/vendored/pcg/pcg_random.hpp     | 1954 +++++++++++++++++
 cpp/src/arrow/vendored/pcg/pcg_uint128.hpp    | 1008 +++++++++
 9 files changed, 3730 insertions(+), 10 deletions(-)
 create mode 100644 cpp/src/arrow/util/pcg_random.h
 create mode 100644 cpp/src/arrow/vendored/pcg/README.md
 create mode 100644 cpp/src/arrow/vendored/pcg/pcg_extras.hpp
 create mode 100644 cpp/src/arrow/vendored/pcg/pcg_random.hpp
 create mode 100644 cpp/src/arrow/vendored/pcg/pcg_uint128.hpp

diff --git a/LICENSE.txt b/LICENSE.txt
index 4cec07fd0c9..5d4de206545 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -2218,3 +2218,25 @@ https://github.com/pypa/packaging/
 
 which is made available under both the Apache license v2.0 and the
 BSD 2-clause license.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/pcg contain code from
+
+https://github.com/imneme/pcg-cpp
+
+and have the following copyright notice:
+
+Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+                    and the PCG Project contributors.
+
+SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+Licensed under the Apache License, Version 2.0 (provided in
+LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+or under the MIT license (provided in LICENSE-MIT.txt and at
+http://opensource.org/licenses/MIT), at your option. This file may not
+be copied, modified, or distributed except according to those terms.
+
+Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+express or implied.  See your chosen license for details.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index ad7e391495e..da945252de9 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -1303,12 +1303,17 @@ std::pair<double, double> WelfordVar(const ArrayType& array) {
 template <typename ArrowType>
 class TestVarStdKernelRandom : public TestPrimitiveVarStdKernel<ArrowType> {};
 
-typedef ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType,
-                         DoubleType>
-    VarStdRandomTypes;
+using VarStdRandomTypes =
+    ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType>;
 
 TYPED_TEST_SUITE(TestVarStdKernelRandom, VarStdRandomTypes);
+
 TYPED_TEST(TestVarStdKernelRandom, Basics) {
+#if defined(__MINGW32__) && !defined(__MINGW64__)
+  if (TypeParam::type_id == Type::FLOAT) {
+    GTEST_SKIP() << "Precision issues on MinGW32 with float32";
+  }
+#endif
   // Cut array into small chunks
   constexpr int array_size = 5000;
   constexpr int chunk_size_max = 50;
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index 83aaee1d340..f8f5d6f10f0 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -41,6 +41,7 @@
 #include "arrow/util/decimal.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/pcg_random.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
@@ -79,7 +80,7 @@ struct GenerateOptions {
       GenerateTypedDataNoNan(data, n);
       return;
     }
-    std::default_random_engine rng(seed_++);
+    pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);
     std::bernoulli_distribution nan_dist(nan_probability_);
     const ValueType nan_value = std::numeric_limits<ValueType>::quiet_NaN();
@@ -91,7 +92,7 @@ struct GenerateOptions {
   }
 
   void GenerateTypedDataNoNan(ValueType* data, size_t n) {
-    std::default_random_engine rng(seed_++);
+    pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);
 
     // A static cast is required due to the int16 -> int8 handling.
@@ -100,7 +101,7 @@ struct GenerateOptions {
 
   void GenerateBitmap(uint8_t* buffer, size_t n, int64_t* null_count) {
     int64_t count = 0;
-    std::default_random_engine rng(seed_++);
+    pcg32_fast rng(seed_++);
     std::bernoulli_distribution dist(1.0 - probability_);
 
     for (size_t i = 0; i < n; i++) {
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 85885cc8e18..a7085e5772d 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -42,16 +42,19 @@
 #include "arrow/testing/random.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/pcg_random.h"
 
 namespace arrow {
 
+using random::pcg32_fast;
+
 uint64_t random_seed() {
   return std::chrono::high_resolution_clock::now().time_since_epoch().count();
 }
 
 void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
   const int random_seed = 0;
-  std::default_random_engine gen(random_seed);
+  pcg32_fast gen(random_seed);
   std::uniform_real_distribution<double> d(0.0, 1.0);
   std::generate(null_bytes, null_bytes + n,
                 [&d, &gen, &pct_null] { return d(gen) > pct_null; });
@@ -59,7 +62,7 @@ void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
 
 void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid,
                      int random_seed) {
-  std::default_random_engine gen(random_seed);
+  pcg32_fast gen(random_seed);
   std::uniform_real_distribution<double> d(0.0, 1.0);
   is_valid->resize(n, false);
   std::generate(is_valid->begin(), is_valid->end(),
@@ -67,7 +70,7 @@ void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid,
 }
 
 void random_bytes(int64_t n, uint32_t seed, uint8_t* out) {
-  std::default_random_engine gen(seed);
+  pcg32_fast gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
   std::generate(out, out + n, [&d, &gen] { return static_cast<uint8_t>(d(gen)); });
 }
@@ -80,7 +83,7 @@ std::string random_string(int64_t n, uint32_t seed) {
 }
 
 void random_decimals(int64_t n, uint32_t seed, int32_t precision, uint8_t* out) {
-  std::default_random_engine gen(seed);
+  pcg32_fast gen(seed);
   std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
   const int32_t required_bytes = DecimalType::DecimalSize(precision);
   constexpr int32_t byte_width = 16;
diff --git a/cpp/src/arrow/util/pcg_random.h b/cpp/src/arrow/util/pcg_random.h
new file mode 100644
index 00000000000..a53e9ec310e
--- /dev/null
+++ b/cpp/src/arrow/util/pcg_random.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/pcg/pcg_random.hpp"  // IWYU pragma: export
+
+namespace arrow {
+namespace random {
+
+using pcg32 = ::arrow_vendored::pcg32;
+using pcg64 = ::arrow_vendored::pcg64;
+using pcg32_fast = ::arrow_vendored::pcg32_fast;
+using pcg64_fast = ::arrow_vendored::pcg64_fast;
+
+}  // namespace random
+}  // namespace arrow
diff --git a/cpp/src/arrow/vendored/pcg/README.md b/cpp/src/arrow/vendored/pcg/README.md
new file mode 100644
index 00000000000..bf72ea8973e
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/README.md
@@ -0,0 +1,26 @@
+<!--
+PCG Random Number Generation for C++
+
+Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+                    and the PCG Project contributors.
+
+SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+Licensed under the Apache License, Version 2.0 (provided in
+LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+or under the MIT license (provided in LICENSE-MIT.txt and at
+http://opensource.org/licenses/MIT), at your option. This file may not
+be copied, modified, or distributed except according to those terms.
+
+Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+express or implied.  See your chosen license for details.
+
+For additional information about the PCG random number generation scheme,
+visit http://www.pcg-random.org/.
+-->
+
+Sources are taken from git changeset ffd522e7188bef30a00c74dc7eb9de5faff90092
+(https://github.com/imneme/pcg-cpp).
+
+Changes:
+- enclosed in `arrow_vendored` namespace
diff --git a/cpp/src/arrow/vendored/pcg/pcg_extras.hpp b/cpp/src/arrow/vendored/pcg/pcg_extras.hpp
new file mode 100644
index 00000000000..760867e1ebe
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/pcg_extras.hpp
@@ -0,0 +1,670 @@
+/*
+ * PCG Random Number Generation for C++
+ *
+ * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
+ *                     and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied.  See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/*
+ * This file provides support code that is useful for random-number generation
+ * but not specific to the PCG generation scheme, including:
+ *      - 128-bit int support for platforms where it isn't available natively
+ *      - bit twiddling operations
+ *      - I/O of 128-bit and 8-bit integers
+ *      - Handling the evilness of SeedSeq
+ *      - Support for efficiently producing random numbers less than a given
+ *        bound
+ */
+
+#ifndef PCG_EXTRAS_HPP_INCLUDED
+#define PCG_EXTRAS_HPP_INCLUDED 1
+
+#include <cinttypes>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <type_traits>
+#include <utility>
+#include <locale>
+#include <iterator>
+
+#ifdef __GNUC__
+    #include <cxxabi.h>
+#endif
+
+/*
+ * Abstractions for compiler-specific directives
+ */
+
+#ifdef __GNUC__
+    #define PCG_NOINLINE __attribute__((noinline))
+#else
+    #define PCG_NOINLINE
+#endif
+
+/*
+ * Some members of the PCG library use 128-bit math.  When compiling on 64-bit
+ * platforms, both GCC and Clang provide 128-bit integer types that are ideal
+ * for the job.
+ *
+ * On 32-bit platforms (or with other compilers), we fall back to a C++
+ * class that provides 128-bit unsigned integers instead.  It may seem
+ * like we're reinventing the wheel here, because libraries already exist
+ * that support large integers, but most existing libraries provide a very
+ * generic multiprecision code, but here we're operating at a fixed size.
+ * Also, most other libraries are fairly heavyweight.  So we use a direct
+ * implementation.  Sadly, it's much slower than hand-coded assembly or
+ * direct CPU support.
+ *
+ */
+#if __SIZEOF_INT128__ && !PCG_FORCE_EMULATED_128BIT_MATH
+    namespace arrow_vendored {
+    namespace pcg_extras {
+        typedef __uint128_t pcg128_t;
+    }
+    }
+    #define PCG_128BIT_CONSTANT(high,low) \
+            ((pcg_extras::pcg128_t(high) << 64) + low)
+#else
+    #include "pcg_uint128.hpp"
+    namespace arrow_vendored {
+    namespace pcg_extras {
+        typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
+    }
+    }
+    #define PCG_128BIT_CONSTANT(high,low) \
+            pcg_extras::pcg128_t(high,low)
+    #define PCG_EMULATED_128BIT_MATH 1
+#endif
+
+
+namespace arrow_vendored {
+namespace pcg_extras {
+
+/*
+ * We often need to represent a "number of bits".  When used normally, these
+ * numbers are never greater than 128, so an unsigned char is plenty.
+ * If you're using a nonstandard generator of a larger size, you can set
+ * PCG_BITCOUNT_T to have it define it as a larger size.  (Some compilers
+ * might produce faster code if you set it to an unsigned int.)
+ */
+
+#ifndef PCG_BITCOUNT_T
+    typedef uint8_t bitcount_t;
+#else
+    typedef PCG_BITCOUNT_T bitcount_t;
+#endif
+
+/*
+ * C++ requires us to be able to serialize RNG state by printing or reading
+ * it from a stream.  Because we use 128-bit ints, we also need to be able
+ * ot print them, so here is code to do so.
+ *
+ * This code provides enough functionality to print 128-bit ints in decimal
+ * and zero-padded in hex.  It's not a full-featured implementation.
+ */
+
+template <typename CharT, typename Traits>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
+{
+    auto desired_base = out.flags() & out.basefield;
+    bool want_hex = desired_base == out.hex;
+
+    if (want_hex) {
+        uint64_t highpart = uint64_t(value >> 64);
+        uint64_t lowpart  = uint64_t(value);
+        auto desired_width = out.width();
+        if (desired_width > 16) {
+            out.width(desired_width - 16);
+        }
+        if (highpart != 0 || desired_width > 16)
+            out << highpart;
+        CharT oldfill = '\0';
+        if (highpart != 0) {
+            out.width(16);
+            oldfill = out.fill('0');
+        }
+        auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
+        out << lowpart;
+        out.setf(oldflags);
+        if (highpart != 0) {
+            out.fill(oldfill);
+        }
+        return out;
+    }
+    constexpr size_t MAX_CHARS_128BIT = 40;
+
+    char buffer[MAX_CHARS_128BIT];
+    char* pos = buffer+sizeof(buffer);
+    *(--pos) = '\0';
+    constexpr auto BASE = pcg128_t(10ULL);
+    do {
+        auto div = value / BASE;
+        auto mod = uint32_t(value - (div * BASE));
+        *(--pos) = '0' + char(mod);
+        value = div;
+    } while(value != pcg128_t(0ULL));
+    return out << pos;
+}
+
+template <typename CharT, typename Traits>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
+{
+    typename std::basic_istream<CharT,Traits>::sentry s(in);
+
+    if (!s)
+         return in;
+
+    constexpr auto BASE = pcg128_t(10ULL);
+    pcg128_t current(0ULL);
+    bool did_nothing = true;
+    bool overflow = false;
+    for(;;) {
+        CharT wide_ch = in.get();
+        if (!in.good())
+            break;
+        auto ch = in.narrow(wide_ch, '\0');
+        if (ch < '0' || ch > '9') {
+            in.unget();
+            break;
+        }
+        did_nothing = false;
+        pcg128_t digit(uint32_t(ch - '0'));
+        pcg128_t timesbase = current*BASE;
+        overflow = overflow || timesbase < current;
+        current = timesbase + digit;
+        overflow = overflow || current < digit;
+    }
+
+    if (did_nothing || overflow) {
+        in.setstate(std::ios::failbit);
+        if (overflow)
+            current = ~pcg128_t(0ULL);
+    }
+
+    value = current;
+
+    return in;
+}
+
+/*
+ * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
+ * If we just used the provided IO operators, they'd read/write chars,
+ * not ints, so we need to define our own.  We *can* redefine this operator
+ * here because we're in our own namespace.
+ */
+
+template <typename CharT, typename Traits>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
+{
+    return out << uint32_t(value);
+}
+
+template <typename CharT, typename Traits>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
+{
+    uint32_t value = 0xdecea5edU;
+    in >> value;
+    if (!in && value == 0xdecea5edU)
+        return in;
+    if (value > uint8_t(~0)) {
+        in.setstate(std::ios::failbit);
+        value = ~0U;
+    }
+    target = uint8_t(value);
+    return in;
+}
+
+/* Unfortunately, the above functions don't get found in preference to the
+ * built in ones, so we create some more specific overloads that will.
+ * Ugh.
+ */
+
+inline std::ostream& operator<<(std::ostream& out, uint8_t value)
+{
+    return pcg_extras::operator<< <char>(out, value);
+}
+
+inline std::istream& operator>>(std::istream& in, uint8_t& value)
+{
+    return pcg_extras::operator>> <char>(in, value);
+}
+
+
+
+/*
+ * Useful bitwise operations.
+ */
+
+/*
+ * XorShifts are invertable, but they are someting of a pain to invert.
+ * This function backs them out.  It's used by the whacky "inside out"
+ * generator defined later.
+ */
+
+template <typename itype>
+inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
+{
+    if (2*shift >= bits) {
+        return x ^ (x >> shift);
+    }
+    itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
+    itype highmask1 = ~lowmask1;
+    itype top1 = x;
+    itype bottom1 = x & lowmask1;
+    top1 ^= top1 >> shift;
+    top1 &= highmask1;
+    x = top1 | bottom1;
+    itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
+    itype bottom2 = x & lowmask2;
+    bottom2 = unxorshift(bottom2, bits - shift, shift);
+    bottom2 &= lowmask1;
+    return top1 | bottom2;
+}
+
+/*
+ * Rotate left and right.
+ *
+ * In ideal world, compilers would spot idiomatic rotate code and convert it
+ * to a rotate instruction.  Of course, opinions vary on what the correct
+ * idiom is and how to spot it.  For clang, sometimes it generates better
+ * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
+ */
+
+template <typename itype>
+inline itype rotl(itype value, bitcount_t rot)
+{
+    constexpr bitcount_t bits = sizeof(itype) * 8;
+    constexpr bitcount_t mask = bits - 1;
+#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
+    return rot ? (value << rot) | (value >> (bits - rot)) : value;
+#else
+    return (value << rot) | (value >> ((- rot) & mask));
+#endif
+}
+
+template <typename itype>
+inline itype rotr(itype value, bitcount_t rot)
+{
+    constexpr bitcount_t bits = sizeof(itype) * 8;
+    constexpr bitcount_t mask = bits - 1;
+#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
+    return rot ? (value >> rot) | (value << (bits - rot)) : value;
+#else
+    return (value >> rot) | (value << ((- rot) & mask));
+#endif
+}
+
+/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
+ * to properly recognizing idiomatic rotate code, so for we also provide
+ * assembler directives (enabled with PCG_USE_INLINE_ASM).  Boo, hiss.
+ * (I hope that these compilers get better so that this code can die.)
+ *
+ * These overloads will be preferred over the general template code above.
+ */
+#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__  || __i386__)
+
+inline uint8_t rotr(uint8_t value, bitcount_t rot)
+{
+    asm ("rorb   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+
+inline uint16_t rotr(uint16_t value, bitcount_t rot)
+{
+    asm ("rorw   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+
+inline uint32_t rotr(uint32_t value, bitcount_t rot)
+{
+    asm ("rorl   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+
+#if __x86_64__
+inline uint64_t rotr(uint64_t value, bitcount_t rot)
+{
+    asm ("rorq   %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
+    return value;
+}
+#endif // __x86_64__
+
+#elif defined(_MSC_VER)
+  // Use MSVC++ bit rotation intrinsics
+
+#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16)
+
+inline uint8_t rotr(uint8_t value, bitcount_t rot)
+{
+    return _rotr8(value, rot);
+}
+
+inline uint16_t rotr(uint16_t value, bitcount_t rot)
+{
+    return _rotr16(value, rot);
+}
+
+inline uint32_t rotr(uint32_t value, bitcount_t rot)
+{
+    return _rotr(value, rot);
+}
+
+inline uint64_t rotr(uint64_t value, bitcount_t rot)
+{
+    return _rotr64(value, rot);
+}
+
+#endif // PCG_USE_INLINE_ASM
+
+
+/*
+ * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
+ * 32-bit integers with seed data, but sometimes we want to produce
+ * larger or smaller integers.
+ *
+ * The following code handles this annoyance.
+ *
+ * uneven_copy will copy an array of 32-bit ints to an array of larger or
+ * smaller ints (actually, the code is general it only needing forward
+ * iterators).  The copy is identical to the one that would be performed if
+ * we just did memcpy on a standard little-endian machine, but works
+ * regardless of the endian of the machine (or the weirdness of the ints
+ * involved).
+ *
+ * generate_to initializes an array of integers using a SeedSeq
+ * object.  It is given the size as a static constant at compile time and
+ * tries to avoid memory allocation.  If we're filling in 32-bit constants
+ * we just do it directly.  If we need a separate buffer and it's small,
+ * we allocate it on the stack.  Otherwise, we fall back to heap allocation.
+ * Ugh.
+ *
+ * generate_one produces a single value of some integral type using a
+ * SeedSeq object.
+ */
+
+ /* uneven_copy helper, case where destination ints are less than 32 bit. */
+
+template<class SrcIter, class DestIter>
+SrcIter uneven_copy_impl(
+    SrcIter src_first, DestIter dest_first, DestIter dest_last,
+    std::true_type)
+{
+    typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+
+    constexpr bitcount_t SRC_SIZE  = sizeof(src_t);
+    constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
+    constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
+    constexpr bitcount_t SCALE     = SRC_SIZE / DEST_SIZE;
+
+    size_t count = 0;
+    src_t value = 0;
+
+    while (dest_first != dest_last) {
+        if ((count++ % SCALE) == 0)
+            value = *src_first++;       // Get more bits
+        else
+            value >>= DEST_BITS;        // Move down bits
+
+        *dest_first++ = dest_t(value);  // Truncates, ignores high bits.
+    }
+    return src_first;
+}
+
+ /* uneven_copy helper, case where destination ints are more than 32 bit. */
+
+template<class SrcIter, class DestIter>
+SrcIter uneven_copy_impl(
+    SrcIter src_first, DestIter dest_first, DestIter dest_last,
+    std::false_type)
+{
+    typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+
+    constexpr auto SRC_SIZE  = sizeof(src_t);
+    constexpr auto SRC_BITS  = SRC_SIZE * 8;
+    constexpr auto DEST_SIZE = sizeof(dest_t);
+    constexpr auto SCALE     = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
+
+    while (dest_first != dest_last) {
+        dest_t value(0UL);
+        unsigned int shift = 0;
+
+        for (size_t i = 0; i < SCALE; ++i) {
+            value |= dest_t(*src_first++) << shift;
+            shift += SRC_BITS;
+        }
+
+        *dest_first++ = value;
+    }
+    return src_first;
+}
+
+/* uneven_copy, call the right code for larger vs. smaller */
+
+template<class SrcIter, class DestIter>
+inline SrcIter uneven_copy(SrcIter src_first,
+                           DestIter dest_first, DestIter dest_last)
+{
+    typedef typename std::iterator_traits<SrcIter>::value_type  src_t;
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+
+    constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
+
+    return uneven_copy_impl(src_first, dest_first, dest_last,
+                            std::integral_constant<bool, DEST_IS_SMALLER>{});
+}
+
+/* generate_to, fill in a fixed-size array of integral type using a SeedSeq
+ * (actually works for any random-access iterator)
+ */
+
+template <size_t size, typename SeedSeq, typename DestIter>
+inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
+                             std::true_type)
+{
+    generator.generate(dest, dest+size);
+}
+
+template <size_t size, typename SeedSeq, typename DestIter>
+void generate_to_impl(SeedSeq&& generator, DestIter dest,
+                      std::false_type)
+{
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+    constexpr auto DEST_SIZE = sizeof(dest_t);
+    constexpr auto GEN_SIZE  = sizeof(uint32_t);
+
+    constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
+    constexpr size_t FROM_ELEMS =
+        GEN_IS_SMALLER
+            ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
+            : (size + (GEN_SIZE / DEST_SIZE) - 1)
+                / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
+                        //  this odd code ^^^^^^^^^^^^^^^^^ is work-around for
+                        //  a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
+
+    if (FROM_ELEMS <= 1024) {
+        uint32_t buffer[FROM_ELEMS];
+        generator.generate(buffer, buffer+FROM_ELEMS);
+        uneven_copy(buffer, dest, dest+size);
+    } else {
+        uint32_t* buffer = static_cast<uint32_t*>(malloc(GEN_SIZE * FROM_ELEMS));
+        generator.generate(buffer, buffer+FROM_ELEMS);
+        uneven_copy(buffer, dest, dest+size);
+        free(static_cast<void*>(buffer));
+    }
+}
+
+template <size_t size, typename SeedSeq, typename DestIter>
+inline void generate_to(SeedSeq&& generator, DestIter dest)
+{
+    typedef typename std::iterator_traits<DestIter>::value_type dest_t;
+    constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
+
+    generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
+                           std::integral_constant<bool, IS_32BIT>{});
+}
+
+/* generate_one, produce a value of integral type using a SeedSeq
+ * (optionally, we can have it produce more than one and pick which one
+ * we want)
+ */
+
+template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
+inline UInt generate_one(SeedSeq&& generator)
+{
+    UInt result[N];
+    generate_to<N>(std::forward<SeedSeq>(generator), result);
+    return result[i];
+}
+
+template <typename RngType>
+auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
+        -> typename RngType::result_type
+{
+    typedef typename RngType::result_type rtype;
+    rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
+                    % upper_bound;
+    for (;;) {
+        rtype r = rng() - RngType::min();
+        if (r >= threshold)
+            return r % upper_bound;
+    }
+}
+
+template <typename Iter, typename RandType>
+void shuffle(Iter from, Iter to, RandType&& rng)
+{
+    typedef typename std::iterator_traits<Iter>::difference_type delta_t;
+    typedef typename std::remove_reference<RandType>::type::result_type result_t;
+    auto count = to - from;
+    while (count > 1) {
+        delta_t chosen = delta_t(bounded_rand(rng, result_t(count)));
+        --count;
+        --to;
+        using std::swap;
+        swap(*(from + chosen), *to);
+    }
+}
+
+/*
+ * Although std::seed_seq is useful, it isn't everything.  Often we want to
+ * initialize a random-number generator some other way, such as from a random
+ * device.
+ *
+ * Technically, it does not meet the requirements of a SeedSequence because
+ * it lacks some of the rarely-used member functions (some of which would
+ * be impossible to provide).  However the C++ standard is quite specific
+ * that actual engines only called the generate method, so it ought not to be
+ * a problem in practice.
+ */
+
+template <typename RngType>
+class seed_seq_from {
+private:
+    RngType rng_;
+
+    typedef uint_least32_t result_type;
+
+public:
+    template<typename... Args>
+    seed_seq_from(Args&&... args) :
+        rng_(std::forward<Args>(args)...)
+    {
+        // Nothing (else) to do...
+    }
+
+    template<typename Iter>
+    void generate(Iter start, Iter finish)
+    {
+        for (auto i = start; i != finish; ++i)
+            *i = result_type(rng_());
+    }
+
+    constexpr size_t size() const
+    {
+        return (sizeof(typename RngType::result_type) > sizeof(result_type)
+                && RngType::max() > ~size_t(0UL))
+             ? ~size_t(0UL)
+             : size_t(RngType::max());
+    }
+};
+
+/*
+ * Sometimes you might want a distinct seed based on when the program
+ * was compiled.  That way, a particular instance of the program will
+ * behave the same way, but when recompiled it'll produce a different
+ * value.
+ */
+
+template <typename IntType>
+struct static_arbitrary_seed {
+private:
+    static constexpr IntType fnv(IntType hash, const char* pos) {
+        return *pos == '\0'
+             ? hash
+             : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1));
+    }
+
+public:
+    static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)),
+                        __DATE__ __TIME__ __FILE__);
+};
+
+// Sometimes, when debugging or testing, it's handy to be able print the name
+// of a (in human-readable form).  This code allows the idiom:
+//
+//      cout << printable_typename<my_foo_type_t>()
+//
+// to print out my_foo_type_t (or its concrete type if it is a synonym)
+
+#if __cpp_rtti || __GXX_RTTI
+
+template <typename T>
+struct printable_typename {};
+
+template <typename T>
+std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
+    const char *implementation_typename = typeid(T).name();
+#ifdef __GNUC__
+    int status;
+    char* pretty_name =
+        abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status);
+    if (status == 0)
+        out << pretty_name;
+    free(static_cast<void*>(pretty_name));
+    if (status == 0)
+        return out;
+#endif
+    out << implementation_typename;
+    return out;
+}
+
+#endif  // __cpp_rtti || __GXX_RTTI
+
+} // namespace pcg_extras
+} // namespace arrow_vendored
+
+#endif // PCG_EXTRAS_HPP_INCLUDED
diff --git a/cpp/src/arrow/vendored/pcg/pcg_random.hpp b/cpp/src/arrow/vendored/pcg/pcg_random.hpp
new file mode 100644
index 00000000000..a864ba0a2c5
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/pcg_random.hpp
@@ -0,0 +1,1954 @@
+/*
+ * PCG Random Number Generation for C++
+ *
+ * Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+ *                     and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied.  See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/*
+ * This code provides the reference implementation of the PCG family of
+ * random number generators.  The code is complex because it implements
+ *
+ *      - several members of the PCG family, specifically members corresponding
+ *        to the output functions:
+ *             - XSH RR         (good for 64-bit state, 32-bit output)
+ *             - XSH RS         (good for 64-bit state, 32-bit output)
+ *             - XSL RR         (good for 128-bit state, 64-bit output)
+ *             - RXS M XS       (statistically most powerful generator)
+ *             - XSL RR RR      (good for 128-bit state, 128-bit output)
+ *             - and RXS, RXS M, XSH, XSL       (mostly for testing)
+ *      - at potentially *arbitrary* bit sizes
+ *      - with four different techniques for random streams (MCG, one-stream
+ *        LCG, settable-stream LCG, unique-stream LCG)
+ *      - and the extended generation schemes allowing arbitrary periods
+ *      - with all features of C++11 random number generation (and more),
+ *        some of which are somewhat painful, including
+ *            - initializing with a SeedSequence which writes 32-bit values
+ *              to memory, even though the state of the generator may not
+ *              use 32-bit values (it might use smaller or larger integers)
+ *            - I/O for RNGs and a prescribed format, which needs to handle
+ *              the issue that 8-bit and 128-bit integers don't have working
+ *              I/O routines (e.g., normally 8-bit = char, not integer)
+ *            - equality and inequality for RNGs
+ *      - and a number of convenience typedefs to mask all the complexity
+ *
+ * The code employes a fairly heavy level of abstraction, and has to deal
+ * with various C++ minutia.  If you're looking to learn about how the PCG
+ * scheme works, you're probably best of starting with one of the other
+ * codebases (see www.pcg-random.org).  But if you're curious about the
+ * constants for the various output functions used in those other, simpler,
+ * codebases, this code shows how they are calculated.
+ *
+ * On the positive side, at least there are convenience typedefs so that you
+ * can say
+ *
+ *      pcg32 myRNG;
+ *
+ * rather than:
+ *
+ *      pcg_detail::engine<
+ *          uint32_t,                                           // Output Type
+ *          uint64_t,                                           // State Type
+ *          pcg_detail::xsh_rr_mixin<uint32_t, uint64_t>, true, // Output Func
+ *          pcg_detail::specific_stream<uint64_t>,              // Stream Kind
+ *          pcg_detail::default_multiplier<uint64_t>            // LCG Mult
+ *      > myRNG;
+ *
+ */
+
+#ifndef PCG_RAND_HPP_INCLUDED
+#define PCG_RAND_HPP_INCLUDED 1
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+#include <limits>
+#include <iostream>
+#include <iterator>
+#include <type_traits>
+#include <utility>
+#include <locale>
+#include <new>
+#include <stdexcept>
+
+#ifdef _MSC_VER
+    #pragma warning(disable:4146)
+#endif
+
+#ifdef _MSC_VER
+    #define PCG_ALWAYS_INLINE __forceinline
+#elif __GNUC__
+    #define PCG_ALWAYS_INLINE __attribute__((always_inline))
+#else
+    #define PCG_ALWAYS_INLINE inline
+#endif
+
+/*
+ * The pcg_extras namespace contains some support code that is likley to
+ * be useful for a variety of RNGs, including:
+ *      - 128-bit int support for platforms where it isn't available natively
+ *      - bit twiddling operations
+ *      - I/O of 128-bit and 8-bit integers
+ *      - Handling the evilness of SeedSeq
+ *      - Support for efficiently producing random numbers less than a given
+ *        bound
+ */
+
+#include "pcg_extras.hpp"
+
+namespace arrow_vendored {
+namespace pcg_detail {
+
+using namespace pcg_extras;
+
+/*
+ * The LCG generators need some constants to function.  This code lets you
+ * look up the constant by *type*.  For example
+ *
+ *      default_multiplier<uint32_t>::multiplier()
+ *
+ * gives you the default multipler for 32-bit integers.  We use the name
+ * of the constant and not a generic word like value to allow these classes
+ * to be used as mixins.
+ */
+
+template <typename T>
+struct default_multiplier {
+    // Not defined for an arbitrary type
+};
+
+template <typename T>
+struct default_increment {
+    // Not defined for an arbitrary type
+};
+
+#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \
+        template <>                                     \
+        struct what ## _ ## kind<type> {                \
+            static constexpr type kind() {              \
+                return constant;                        \
+            }                                           \
+        };
+
+PCG_DEFINE_CONSTANT(uint8_t,  default, multiplier, 141U)
+PCG_DEFINE_CONSTANT(uint8_t,  default, increment,  77U)
+
+PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U)
+PCG_DEFINE_CONSTANT(uint16_t, default, increment,  47989U)
+
+PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U)
+PCG_DEFINE_CONSTANT(uint32_t, default, increment,  2891336453U)
+
+PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL)
+PCG_DEFINE_CONSTANT(uint64_t, default, increment,  1442695040888963407ULL)
+
+PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier,
+        PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL))
+PCG_DEFINE_CONSTANT(pcg128_t, default, increment,
+        PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL))
+
+/* Alternative (cheaper) multipliers for 128-bit */
+
+template <typename T>
+struct cheap_multiplier : public default_multiplier<T> {
+    // For most types just use the default.
+};
+
+template <>
+struct cheap_multiplier<pcg128_t> {
+    static constexpr uint64_t multiplier() {
+        return 0xda942042e4dd58b5ULL;
+    }
+};
+
+
+/*
+ * Each PCG generator is available in four variants, based on how it applies
+ * the additive constant for its underlying LCG; the variations are:
+ *
+ *     single stream   - all instances use the same fixed constant, thus
+ *                       the RNG always somewhere in same sequence
+ *     mcg             - adds zero, resulting in a single stream and reduced
+ *                       period
+ *     specific stream - the constant can be changed at any time, selecting
+ *                       a different random sequence
+ *     unique stream   - the constant is based on the memory address of the
+ *                       object, thus every RNG has its own unique sequence
+ *
+ * This variation is provided though mixin classes which define a function
+ * value called increment() that returns the nesessary additive constant.
+ */
+
+
+
+/*
+ * unique stream
+ */
+
+
+template <typename itype>
+class unique_stream {
+protected:
+    static constexpr bool is_mcg = false;
+
+    // Is never called, but is provided for symmetry with specific_stream
+    void set_stream(...)
+    {
+        abort();
+    }
+
+public:
+    typedef itype state_type;
+
+    constexpr itype increment() const {
+        return itype(reinterpret_cast<uintptr_t>(this) | 1);
+    }
+
+    constexpr itype stream() const
+    {
+         return increment() >> 1;
+    }
+
+    static constexpr bool can_specify_stream = false;
+
+    static constexpr size_t streams_pow2()
+    {
+        return (sizeof(itype) < sizeof(size_t) ? sizeof(itype)
+                                               : sizeof(size_t))*8 - 1u;
+    }
+
+protected:
+    constexpr unique_stream() = default;
+};
+
+
+/*
+ * no stream (mcg)
+ */
+
+template <typename itype>
+class no_stream {
+protected:
+    static constexpr bool is_mcg = true;
+
+    // Is never called, but is provided for symmetry with specific_stream
+    void set_stream(...)
+    {
+        abort();
+    }
+
+public:
+    typedef itype state_type;
+
+    static constexpr itype increment() {
+        return 0;
+    }
+
+    static constexpr bool can_specify_stream = false;
+
+    static constexpr size_t streams_pow2()
+    {
+        return 0u;
+    }
+
+protected:
+    constexpr no_stream() = default;
+};
+
+
+/*
+ * single stream/sequence (oneseq)
+ */
+
+template <typename itype>
+class oneseq_stream : public default_increment<itype> {
+protected:
+    static constexpr bool is_mcg = false;
+
+    // Is never called, but is provided for symmetry with specific_stream
+    void set_stream(...)
+    {
+        abort();
+    }
+
+public:
+    typedef itype state_type;
+
+    static constexpr itype stream()
+    {
+         return default_increment<itype>::increment() >> 1;
+    }
+
+    static constexpr bool can_specify_stream = false;
+
+    static constexpr size_t streams_pow2()
+    {
+        return 0u;
+    }
+
+protected:
+    constexpr oneseq_stream() = default;
+};
+
+
+/*
+ * specific stream
+ */
+
+template <typename itype>
+class specific_stream {
+protected:
+    static constexpr bool is_mcg = false;
+
+    itype inc_ = default_increment<itype>::increment();
+
+public:
+    typedef itype state_type;
+    typedef itype stream_state;
+
+    constexpr itype increment() const {
+        return inc_;
+    }
+
+    itype stream()
+    {
+         return inc_ >> 1;
+    }
+
+    void set_stream(itype specific_seq)
+    {
+         inc_ = (specific_seq << 1) | 1;
+    }
+
+    static constexpr bool can_specify_stream = true;
+
+    static constexpr size_t streams_pow2()
+    {
+        return (sizeof(itype)*8) - 1u;
+    }
+
+protected:
+    specific_stream() = default;
+
+    specific_stream(itype specific_seq)
+        : inc_(itype(specific_seq << 1) | itype(1U))
+    {
+        // Nothing (else) to do.
+    }
+};
+
+
+/*
+ * This is where it all comes together.  This function joins together three
+ * mixin classes which define
+ *    - the LCG additive constant (the stream)
+ *    - the LCG multiplier
+ *    - the output function
+ * in addition, we specify the type of the LCG state, and the result type,
+ * and whether to use the pre-advance version of the state for the output
+ * (increasing instruction-level parallelism) or the post-advance version
+ * (reducing register pressure).
+ *
+ * Given the high level of parameterization, the code has to use some
+ * template-metaprogramming tricks to handle some of the suble variations
+ * involved.
+ */
+
+template <typename xtype, typename itype,
+          typename output_mixin,
+          bool output_previous = true,
+          typename stream_mixin = oneseq_stream<itype>,
+          typename multiplier_mixin = default_multiplier<itype> >
+class engine : protected output_mixin,
+               public stream_mixin,
+               protected multiplier_mixin {
+protected:
+    itype state_;
+
+    struct can_specify_stream_tag {};
+    struct no_specifiable_stream_tag {};
+
+    using stream_mixin::increment;
+    using multiplier_mixin::multiplier;
+
+public:
+    typedef xtype result_type;
+    typedef itype state_type;
+
+    static constexpr size_t period_pow2()
+    {
+        return sizeof(state_type)*8 - 2*stream_mixin::is_mcg;
+    }
+
+    // It would be nice to use std::numeric_limits for these, but
+    // we can't be sure that it'd be defined for the 128-bit types.
+
+    static constexpr result_type min()
+    {
+        return result_type(0UL);
+    }
+
+    static constexpr result_type max()
+    {
+        return result_type(~result_type(0UL));
+    }
+
+protected:
+    itype bump(itype state)
+    {
+        return state * multiplier() + increment();
+    }
+
+    itype base_generate()
+    {
+        return state_ = bump(state_);
+    }
+
+    itype base_generate0()
+    {
+        itype old_state = state_;
+        state_ = bump(state_);
+        return old_state;
+    }
+
+public:
+    result_type operator()()
+    {
+        if (output_previous)
+            return this->output(base_generate0());
+        else
+            return this->output(base_generate());
+    }
+
+    result_type operator()(result_type upper_bound)
+    {
+        return bounded_rand(*this, upper_bound);
+    }
+
+protected:
+    static itype advance(itype state, itype delta,
+                         itype cur_mult, itype cur_plus);
+
+    static itype distance(itype cur_state, itype newstate, itype cur_mult,
+                          itype cur_plus, itype mask = ~itype(0U));
+
+    itype distance(itype newstate, itype mask = itype(~itype(0U))) const
+    {
+        return distance(state_, newstate, multiplier(), increment(), mask);
+    }
+
+public:
+    void advance(itype delta)
+    {
+        state_ = advance(state_, delta, this->multiplier(), this->increment());
+    }
+
+    void backstep(itype delta)
+    {
+        advance(-delta);
+    }
+
+    void discard(itype delta)
+    {
+        advance(delta);
+    }
+
+    bool wrapped()
+    {
+        if (stream_mixin::is_mcg) {
+            // For MCGs, the low order two bits never change. In this
+            // implementation, we keep them fixed at 3 to make this test
+            // easier.
+            return state_ == 3;
+        } else {
+            return state_ == 0;
+        }
+    }
+
+    engine(itype state = itype(0xcafef00dd15ea5e5ULL))
+        : state_(this->is_mcg ? state|state_type(3U)
+                              : bump(state + this->increment()))
+    {
+        // Nothing else to do.
+    }
+
+    // This function may or may not exist.  It thus has to be a template
+    // to use SFINAE; users don't have to worry about its template-ness.
+
+    template <typename sm = stream_mixin>
+    engine(itype state, typename sm::stream_state stream_seed)
+        : stream_mixin(stream_seed),
+          state_(this->is_mcg ? state|state_type(3U)
+                              : bump(state + this->increment()))
+    {
+        // Nothing else to do.
+    }
+
+    template<typename SeedSeq>
+    engine(SeedSeq&& seedSeq, typename std::enable_if<
+                  !stream_mixin::can_specify_stream
+               && !std::is_convertible<SeedSeq, itype>::value
+               && !std::is_convertible<SeedSeq, engine>::value,
+               no_specifiable_stream_tag>::type = {})
+        : engine(generate_one<itype>(std::forward<SeedSeq>(seedSeq)))
+    {
+        // Nothing else to do.
+    }
+
+    template<typename SeedSeq>
+    engine(SeedSeq&& seedSeq, typename std::enable_if<
+                   stream_mixin::can_specify_stream
+               && !std::is_convertible<SeedSeq, itype>::value
+               && !std::is_convertible<SeedSeq, engine>::value,
+        can_specify_stream_tag>::type = {})
+    {
+        itype seeddata[2];
+        generate_to<2>(std::forward<SeedSeq>(seedSeq), seeddata);
+        seed(seeddata[1], seeddata[0]);
+    }
+
+
+    template<typename... Args>
+    void seed(Args&&... args)
+    {
+        new (this) engine(std::forward<Args>(args)...);
+    }
+
+    template <typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+              typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+    friend bool operator==(const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_lhs, multiplier_mixin_lhs>&,
+                           const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_rhs, multiplier_mixin_rhs>&);
+
+    template <typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+              typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+    friend itype1 operator-(const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_lhs, multiplier_mixin_lhs>&,
+                            const engine<xtype1,itype1,
+                                     output_mixin1,output_previous1,
+                                     stream_mixin_rhs, multiplier_mixin_rhs>&);
+
+    template <typename CharT, typename Traits,
+              typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin1, typename multiplier_mixin1>
+    friend std::basic_ostream<CharT,Traits>&
+    operator<<(std::basic_ostream<CharT,Traits>& out,
+               const engine<xtype1,itype1,
+                              output_mixin1,output_previous1,
+                              stream_mixin1, multiplier_mixin1>&);
+
+    template <typename CharT, typename Traits,
+              typename xtype1, typename itype1,
+              typename output_mixin1, bool output_previous1,
+              typename stream_mixin1, typename multiplier_mixin1>
+    friend std::basic_istream<CharT,Traits>&
+    operator>>(std::basic_istream<CharT,Traits>& in,
+               engine<xtype1, itype1,
+                        output_mixin1, output_previous1,
+                        stream_mixin1, multiplier_mixin1>& rng);
+};
+
+template <typename CharT, typename Traits,
+          typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>& out,
+           const engine<xtype,itype,
+                          output_mixin,output_previous,
+                          stream_mixin, multiplier_mixin>& rng)
+{
+    using pcg_extras::operator<<;
+
+    auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left);
+    auto space = out.widen(' ');
+    auto orig_fill = out.fill();
+
+    out << rng.multiplier() << space
+        << rng.increment() << space
+        << rng.state_;
+
+    out.flags(orig_flags);
+    out.fill(orig_fill);
+    return out;
+}
+
+
+template <typename CharT, typename Traits,
+          typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in,
+           engine<xtype,itype,
+                    output_mixin,output_previous,
+                    stream_mixin, multiplier_mixin>& rng)
+{
+    using pcg_extras::operator>>;
+
+    auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws);
+
+    itype multiplier, increment, state;
+    in >> multiplier >> increment >> state;
+
+    if (!in.fail()) {
+        bool good = true;
+        if (multiplier != rng.multiplier()) {
+           good = false;
+        } else if (rng.can_specify_stream) {
+           rng.set_stream(increment >> 1);
+        } else if (increment != rng.increment()) {
+           good = false;
+        }
+        if (good) {
+            rng.state_ = state;
+        } else {
+            in.clear(std::ios::failbit);
+        }
+    }
+
+    in.flags(orig_flags);
+    return in;
+}
+
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
+             multiplier_mixin>::advance(
+    itype state, itype delta, itype cur_mult, itype cur_plus)
+{
+    // The method used here is based on Brown, "Random Number Generation
+    // with Arbitrary Stride,", Transactions of the American Nuclear
+    // Society (Nov. 1994).  The algorithm is very similar to fast
+    // exponentiation.
+    //
+    // Even though delta is an unsigned integer, we can pass a
+    // signed integer to go backwards, it just goes "the long way round".
+
+    constexpr itype ZERO = 0u;  // itype may be a non-trivial types, so
+    constexpr itype ONE  = 1u;  // we define some ugly constants.
+    itype acc_mult = 1;
+    itype acc_plus = 0;
+    while (delta > ZERO) {
+       if (delta & ONE) {
+          acc_mult *= cur_mult;
+          acc_plus = acc_plus*cur_mult + cur_plus;
+       }
+       cur_plus = (cur_mult+ONE)*cur_plus;
+       cur_mult *= cur_mult;
+       delta >>= 1;
+    }
+    return acc_mult * state + acc_plus;
+}
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin, typename multiplier_mixin>
+itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
+               multiplier_mixin>::distance(
+    itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask)
+{
+    constexpr itype ONE  = 1u;  // itype could be weird, so use constant
+    bool is_mcg = cur_plus == itype(0);
+    itype the_bit = is_mcg ? itype(4u) : itype(1u);
+    itype distance = 0u;
+    while ((cur_state & mask) != (newstate & mask)) {
+       if ((cur_state & the_bit) != (newstate & the_bit)) {
+           cur_state = cur_state * cur_mult + cur_plus;
+           distance |= the_bit;
+       }
+       assert((cur_state & the_bit) == (newstate & the_bit));
+       the_bit <<= 1;
+       cur_plus = (cur_mult+ONE)*cur_plus;
+       cur_mult *= cur_mult;
+    }
+    return is_mcg ? distance >> 2 : distance;
+}
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+          typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+itype operator-(const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
+               const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
+{
+    static_assert(
+        std::is_same<stream_mixin_lhs, stream_mixin_rhs>::value &&
+            std::is_same<multiplier_mixin_lhs, multiplier_mixin_rhs>::value,
+        "Incomparable generators");
+    if (lhs.increment() == rhs.increment()) {
+       return rhs.distance(lhs.state_);
+    } else  {
+       constexpr itype ONE = 1u;
+       itype lhs_diff = lhs.increment() + (lhs.multiplier()-ONE) * lhs.state_;
+       itype rhs_diff = rhs.increment() + (rhs.multiplier()-ONE) * rhs.state_;
+       if ((lhs_diff & itype(3u)) != (rhs_diff & itype(3u))) {
+           rhs_diff = -rhs_diff;
+       }
+       return rhs.distance(rhs_diff, lhs_diff, rhs.multiplier(), itype(0u));
+    }
+}
+
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+          typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+bool operator==(const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
+                const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
+{
+    return    (lhs.multiplier() == rhs.multiplier())
+           && (lhs.increment()  == rhs.increment())
+           && (lhs.state_       == rhs.state_);
+}
+
+template <typename xtype, typename itype,
+          typename output_mixin, bool output_previous,
+          typename stream_mixin_lhs, typename multiplier_mixin_lhs,
+          typename stream_mixin_rhs, typename multiplier_mixin_rhs>
+inline bool operator!=(const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
+                       const engine<xtype,itype,
+                               output_mixin,output_previous,
+                               stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
+{
+    return !operator==(lhs,rhs);
+}
+
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using oneseq_base  = engine<xtype, itype,
+                        output_mixin<xtype, itype>, output_previous,
+                        oneseq_stream<itype>,
+                        multiplier_mixin<itype> >;
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using unique_base = engine<xtype, itype,
+                         output_mixin<xtype, itype>, output_previous,
+                         unique_stream<itype>,
+                         multiplier_mixin<itype> >;
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using setseq_base = engine<xtype, itype,
+                         output_mixin<xtype, itype>, output_previous,
+                         specific_stream<itype>,
+                         multiplier_mixin<itype> >;
+
+template <typename xtype, typename itype,
+         template<typename XT,typename IT> class output_mixin,
+         bool output_previous = (sizeof(itype) <= 8),
+         template<typename IT> class multiplier_mixin = default_multiplier>
+using mcg_base = engine<xtype, itype,
+                      output_mixin<xtype, itype>, output_previous,
+                      no_stream<itype>,
+                      multiplier_mixin<itype> >;
+
+/*
+ * OUTPUT FUNCTIONS.
+ *
+ * These are the core of the PCG generation scheme.  They specify how to
+ * turn the base LCG's internal state into the output value of the final
+ * generator.
+ *
+ * They're implemented as mixin classes.
+ *
+ * All of the classes have code that is written to allow it to be applied
+ * at *arbitrary* bit sizes, although in practice they'll only be used at
+ * standard sizes supported by C++.
+ */
+
+/*
+ * XSH RS -- high xorshift, followed by a random shift
+ *
+ * Fast.  A good performer.
+ */
+
+template <typename xtype, typename itype>
+struct xsh_rs_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t bits        = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t xtypebits   = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t sparebits   = bits - xtypebits;
+        constexpr bitcount_t opbits =
+                              sparebits-5 >= 64 ? 5
+                            : sparebits-4 >= 32 ? 4
+                            : sparebits-3 >= 16 ? 3
+                            : sparebits-2 >= 4  ? 2
+                            : sparebits-1 >= 1  ? 1
+                            :                     0;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t maxrandshift  = mask;
+        constexpr bitcount_t topspare     = opbits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift     = topspare + (xtypebits+maxrandshift)/2;
+        bitcount_t rshift =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        internal ^= internal >> xshift;
+        xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift));
+        return result;
+    }
+};
+
+/*
+ * XSH RR -- high xorshift, followed by a random rotate
+ *
+ * Fast.  A good performer.  Slightly better statistically than XSH RS.
+ */
+
+template <typename xtype, typename itype>
+struct xsh_rr_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t bits        = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t xtypebits   = bitcount_t(sizeof(xtype)*8);
+        constexpr bitcount_t sparebits   = bits - xtypebits;
+        constexpr bitcount_t wantedopbits =
+                              xtypebits >= 128 ? 7
+                            : xtypebits >=  64 ? 6
+                            : xtypebits >=  32 ? 5
+                            : xtypebits >=  16 ? 4
+                            :                    3;
+        constexpr bitcount_t opbits =
+                              sparebits >= wantedopbits ? wantedopbits
+                                                        : sparebits;
+        constexpr bitcount_t amplifier = wantedopbits - opbits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t topspare    = opbits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift      = (topspare + xtypebits)/2;
+        bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask
+                                : 0;
+        bitcount_t amprot = (rot << amplifier) & mask;
+        internal ^= internal >> xshift;
+        xtype result = xtype(internal >> bottomspare);
+        result = rotr(result, amprot);
+        return result;
+    }
+};
+
+/*
+ * RXS -- random xorshift
+ */
+
+template <typename xtype, typename itype>
+struct rxs_mixin {
+static xtype output_rxs(itype internal)
+    {
+        constexpr bitcount_t bits        = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t xtypebits   = bitcount_t(sizeof(xtype)*8);
+        constexpr bitcount_t shift       = bits - xtypebits;
+        constexpr bitcount_t extrashift  = (xtypebits - shift)/2;
+        bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63
+                       : shift > 32+4 ? (internal >> (bits - 5)) & 31
+                       : shift > 16+2 ? (internal >> (bits - 4)) & 15
+                       : shift >  8+1 ? (internal >> (bits - 3)) & 7
+                       : shift >  4+1 ? (internal >> (bits - 2)) & 3
+                       : shift >  2+1 ? (internal >> (bits - 1)) & 1
+                       :              0;
+        internal ^= internal >> (shift + extrashift - rshift);
+        xtype result = internal >> rshift;
+        return result;
+    }
+};
+
+/*
+ * RXS M XS -- random xorshift, mcg multiply, fixed xorshift
+ *
+ * The most statistically powerful generator, but all those steps
+ * make it slower than some of the others.  We give it the rottenest jobs.
+ *
+ * Because it's usually used in contexts where the state type and the
+ * result type are the same, it is a permutation and is thus invertable.
+ * We thus provide a function to invert it.  This function is used to
+ * for the "inside out" generator used by the extended generator.
+ */
+
+/* Defined type-based concepts for the multiplication step.  They're actually
+ * all derived by truncating the 128-bit, which was computed to be a good
+ * "universal" constant.
+ */
+
+template <typename T>
+struct mcg_multiplier {
+    // Not defined for an arbitrary type
+};
+
+template <typename T>
+struct mcg_unmultiplier {
+    // Not defined for an arbitrary type
+};
+
+PCG_DEFINE_CONSTANT(uint8_t,  mcg, multiplier,   217U)
+PCG_DEFINE_CONSTANT(uint8_t,  mcg, unmultiplier, 105U)
+
+PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier,   62169U)
+PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U)
+
+PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier,   277803737U)
+PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U)
+
+PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier,   12605985483714917081ULL)
+PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL)
+
+PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier,
+        PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL))
+PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier,
+        PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL))
+
+
+template <typename xtype, typename itype>
+struct rxs_m_xs_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t opbits = xtypebits >= 128 ? 6
+                                 : xtypebits >=  64 ? 5
+                                 : xtypebits >=  32 ? 4
+                                 : xtypebits >=  16 ? 3
+                                 :                    2;
+        constexpr bitcount_t shift = bits - xtypebits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        bitcount_t rshift =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        internal ^= internal >> (opbits + rshift);
+        internal *= mcg_multiplier<itype>::multiplier();
+        xtype result = internal >> shift;
+        result ^= result >> ((2U*xtypebits+2U)/3U);
+        return result;
+    }
+
+    static itype unoutput(itype internal)
+    {
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t opbits = bits >= 128 ? 6
+                                 : bits >=  64 ? 5
+                                 : bits >=  32 ? 4
+                                 : bits >=  16 ? 3
+                                 :               2;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+
+        internal = unxorshift(internal, bits, (2U*bits+2U)/3U);
+
+        internal *= mcg_unmultiplier<itype>::unmultiplier();
+
+        bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
+        internal = unxorshift(internal, bits, opbits + rshift);
+
+        return internal;
+    }
+};
+
+
+/*
+ * RXS M -- random xorshift, mcg multiply
+ */
+
+template <typename xtype, typename itype>
+struct rxs_m_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t opbits = xtypebits >= 128 ? 6
+                                 : xtypebits >=  64 ? 5
+                                 : xtypebits >=  32 ? 4
+                                 : xtypebits >=  16 ? 3
+                                 :                    2;
+        constexpr bitcount_t shift = bits - xtypebits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0;
+        internal ^= internal >> (opbits + rshift);
+        internal *= mcg_multiplier<itype>::multiplier();
+        xtype result = internal >> shift;
+        return result;
+    }
+};
+
+
+/*
+ * DXSM -- double xorshift multiply
+ *
+ * This is a new, more powerful output permutation (added in 2019).  It's
+ * a more comprehensive scrambling than RXS M, but runs faster on 128-bit
+ * types.  Although primarily intended for use at large sizes, also works
+ * at smaller sizes as well.
+ *
+ * This permutation is similar to xorshift multiply hash functions, except
+ * that one of the multipliers is the LCG multiplier (to avoid needing to
+ * have a second constant) and the other is based on the low-order bits.
+ * This latter aspect means that the scrambling applied to the high bits
+ * depends on the low bits, and makes it (to my eye) impractical to back
+ * out the permutation without having the low-order bits.
+ */
+
+template <typename xtype, typename itype>
+struct dxsm_mixin {
+    inline xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t itypebits = bitcount_t(sizeof(itype) * 8);
+        static_assert(xtypebits <= itypebits/2,
+                      "Output type must be half the size of the state type.");
+        
+        xtype hi = xtype(internal >> (itypebits - xtypebits));
+        xtype lo = xtype(internal);
+
+        lo |= 1;
+        hi ^= hi >> (xtypebits/2);
+	hi *= xtype(cheap_multiplier<itype>::multiplier());
+	hi ^= hi >> (3*(xtypebits/4));
+	hi *= lo;
+	return hi;
+    }
+};
+
+
+/*
+ * XSL RR -- fixed xorshift (to low bits), random rotate
+ *
+ * Useful for 128-bit types that are split across two CPU registers.
+ */
+
+template <typename xtype, typename itype>
+struct xsl_rr_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - xtypebits;
+        constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7
+                                       : xtypebits >=  64 ? 6
+                                       : xtypebits >=  32 ? 5
+                                       : xtypebits >=  16 ? 4
+                                       :                    3;
+        constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
+                                                             : sparebits;
+        constexpr bitcount_t amplifier = wantedopbits - opbits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t topspare = sparebits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
+
+        bitcount_t rot =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        bitcount_t amprot = (rot << amplifier) & mask;
+        internal ^= internal >> xshift;
+        xtype result = xtype(internal >> bottomspare);
+        result = rotr(result, amprot);
+        return result;
+    }
+};
+
+
+/*
+ * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts)
+ *
+ * Useful for 128-bit types that are split across two CPU registers.
+ * If you really want an invertable 128-bit RNG, I guess this is the one.
+ */
+
+template <typename T> struct halfsize_trait {};
+template <> struct halfsize_trait<pcg128_t>  { typedef uint64_t type; };
+template <> struct halfsize_trait<uint64_t>  { typedef uint32_t type; };
+template <> struct halfsize_trait<uint32_t>  { typedef uint16_t type; };
+template <> struct halfsize_trait<uint16_t>  { typedef uint8_t type;  };
+
+template <typename xtype, typename itype>
+struct xsl_rr_rr_mixin {
+    typedef typename halfsize_trait<itype>::type htype;
+
+    static itype output(itype internal)
+    {
+        constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8);
+        constexpr bitcount_t bits      = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - htypebits;
+        constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7
+                                       : htypebits >=  64 ? 6
+                                       : htypebits >=  32 ? 5
+                                       : htypebits >=  16 ? 4
+                                       :                    3;
+        constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
+                                                                : sparebits;
+        constexpr bitcount_t amplifier = wantedopbits - opbits;
+        constexpr bitcount_t mask = (1 << opbits) - 1;
+        constexpr bitcount_t topspare = sparebits;
+        constexpr bitcount_t xshift = (topspare + htypebits) / 2;
+
+        bitcount_t rot =
+            opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0;
+        bitcount_t amprot = (rot << amplifier) & mask;
+        internal ^= internal >> xshift;
+        htype lowbits = htype(internal);
+        lowbits = rotr(lowbits, amprot);
+        htype highbits = htype(internal >> topspare);
+        bitcount_t rot2 = lowbits & mask;
+        bitcount_t amprot2 = (rot2 << amplifier) & mask;
+        highbits = rotr(highbits, amprot2);
+        return (itype(highbits) << topspare) ^ itype(lowbits);
+    }
+};
+
+
+/*
+ * XSH -- fixed xorshift (to high bits)
+ *
+ * You shouldn't use this at 64-bits or less.
+ */
+
+template <typename xtype, typename itype>
+struct xsh_mixin {
+    static xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - xtypebits;
+        constexpr bitcount_t topspare = 0;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
+
+        internal ^= internal >> xshift;
+        xtype result = internal >> bottomspare;
+        return result;
+    }
+};
+
+/*
+ * XSL -- fixed xorshift (to low bits)
+ *
+ * You shouldn't use this at 64-bits or less.
+ */
+
+template <typename xtype, typename itype>
+struct xsl_mixin {
+    inline xtype output(itype internal)
+    {
+        constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8);
+        constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8);
+        constexpr bitcount_t sparebits = bits - xtypebits;
+        constexpr bitcount_t topspare = sparebits;
+        constexpr bitcount_t bottomspare = sparebits - topspare;
+        constexpr bitcount_t xshift = (topspare + xtypebits) / 2;
+
+        internal ^= internal >> xshift;
+        xtype result = internal >> bottomspare;
+        return result;
+    }
+};
+
+
+/* ---- End of Output Functions ---- */
+
+
+template <typename baseclass>
+struct inside_out : private baseclass {
+    inside_out() = delete;
+
+    typedef typename baseclass::result_type result_type;
+    typedef typename baseclass::state_type  state_type;
+    static_assert(sizeof(result_type) == sizeof(state_type),
+                  "Require a RNG whose output function is a permutation");
+
+    static bool external_step(result_type& randval, size_t i)
+    {
+        state_type state = baseclass::unoutput(randval);
+        state = state * baseclass::multiplier() + baseclass::increment()
+                + state_type(i*2);
+        result_type result = baseclass::output(state);
+        randval = result;
+        state_type zero =
+            baseclass::is_mcg ? state & state_type(3U) : state_type(0U);
+        return result == zero;
+    }
+
+    static bool external_advance(result_type& randval, size_t i,
+                                 result_type delta, bool forwards = true)
+    {
+        state_type state = baseclass::unoutput(randval);
+        state_type mult  = baseclass::multiplier();
+        state_type inc   = baseclass::increment() + state_type(i*2);
+        state_type zero =
+            baseclass::is_mcg ? state & state_type(3U) : state_type(0U);
+        state_type dist_to_zero = baseclass::distance(state, zero, mult, inc);
+        bool crosses_zero =
+            forwards ? dist_to_zero <= delta
+                     : (-dist_to_zero) <= delta;
+        if (!forwards)
+            delta = -delta;
+        state = baseclass::advance(state, delta, mult, inc);
+        randval = baseclass::output(state);
+        return crosses_zero;
+    }
+};
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, typename baseclass, typename extvalclass, bool kdd = true>
+class extended : public baseclass {
+public:
+    typedef typename baseclass::state_type  state_type;
+    typedef typename baseclass::result_type result_type;
+    typedef inside_out<extvalclass> insideout;
+
+private:
+    static constexpr bitcount_t rtypebits = sizeof(result_type)*8;
+    static constexpr bitcount_t stypebits = sizeof(state_type)*8;
+
+    static constexpr bitcount_t tick_limit_pow2 = 64U;
+
+    static constexpr size_t table_size  = 1UL << table_pow2;
+    static constexpr size_t table_shift = stypebits - table_pow2;
+    static constexpr state_type table_mask =
+        (state_type(1U) << table_pow2) - state_type(1U);
+
+    static constexpr bool   may_tick  =
+        (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2);
+    static constexpr size_t tick_shift = stypebits - advance_pow2;
+    static constexpr state_type tick_mask  =
+        may_tick ? state_type(
+                       (uint64_t(1) << (advance_pow2*may_tick)) - 1)
+                                        // ^-- stupidity to appease GCC warnings
+                 : ~state_type(0U);
+
+    static constexpr bool may_tock = stypebits < tick_limit_pow2;
+
+    result_type data_[table_size];
+
+    PCG_NOINLINE void advance_table();
+
+    PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true);
+
+    result_type& get_extended_value()
+    {
+        state_type state = this->state_;
+        if (kdd && baseclass::is_mcg) {
+            // The low order bits of an MCG are constant, so drop them.
+            state >>= 2;
+        }
+        size_t index       = kdd ? state &  table_mask
+                                 : state >> table_shift;
+
+        if (may_tick) {
+            bool tick = kdd ? (state & tick_mask) == state_type(0u)
+                            : (state >> tick_shift) == state_type(0u);
+            if (tick)
+                    advance_table();
+        }
+        if (may_tock) {
+            bool tock = state == state_type(0u);
+            if (tock)
+                advance_table();
+        }
+        return data_[index];
+    }
+
+public:
+    static constexpr size_t period_pow2()
+    {
+        return baseclass::period_pow2() + table_size*extvalclass::period_pow2();
+    }
+
+    PCG_ALWAYS_INLINE result_type operator()()
+    {
+        result_type rhs = get_extended_value();
+        result_type lhs = this->baseclass::operator()();
+        return lhs ^ rhs;
+    }
+
+    result_type operator()(result_type upper_bound)
+    {
+        return bounded_rand(*this, upper_bound);
+    }
+
+    void set(result_type wanted)
+    {
+        result_type& rhs = get_extended_value();
+        result_type lhs = this->baseclass::operator()();
+        rhs = lhs ^ wanted;
+    }
+
+    void advance(state_type distance, bool forwards = true);
+
+    void backstep(state_type distance)
+    {
+        advance(distance, false);
+    }
+
+    extended(const result_type* data)
+        : baseclass()
+    {
+        datainit(data);
+    }
+
+    extended(const result_type* data, state_type seed)
+        : baseclass(seed)
+    {
+        datainit(data);
+    }
+
+    // This function may or may not exist.  It thus has to be a template
+    // to use SFINAE; users don't have to worry about its template-ness.
+
+    template <typename bc = baseclass>
+    extended(const result_type* data, state_type seed,
+            typename bc::stream_state stream_seed)
+        : baseclass(seed, stream_seed)
+    {
+        datainit(data);
+    }
+
+    extended()
+        : baseclass()
+    {
+        selfinit();
+    }
+
+    extended(state_type seed)
+        : baseclass(seed)
+    {
+        selfinit();
+    }
+
+    // This function may or may not exist.  It thus has to be a template
+    // to use SFINAE; users don't have to worry about its template-ness.
+
+    template <typename bc = baseclass>
+    extended(state_type seed, typename bc::stream_state stream_seed)
+        : baseclass(seed, stream_seed)
+    {
+        selfinit();
+    }
+
+private:
+    void selfinit();
+    void datainit(const result_type* data);
+
+public:
+
+    template<typename SeedSeq, typename = typename std::enable_if<
+           !std::is_convertible<SeedSeq, result_type>::value
+        && !std::is_convertible<SeedSeq, extended>::value>::type>
+    extended(SeedSeq&& seedSeq)
+        : baseclass(seedSeq)
+    {
+        generate_to<table_size>(seedSeq, data_);
+    }
+
+    template<typename... Args>
+    void seed(Args&&... args)
+    {
+        new (this) extended(std::forward<Args>(args)...);
+    }
+
+    template <bitcount_t table_pow2_, bitcount_t advance_pow2_,
+              typename baseclass_, typename extvalclass_, bool kdd_>
+    friend bool operator==(const extended<table_pow2_, advance_pow2_,
+                                              baseclass_, extvalclass_, kdd_>&,
+                           const extended<table_pow2_, advance_pow2_,
+                                              baseclass_, extvalclass_, kdd_>&);
+
+    template <typename CharT, typename Traits,
+              bitcount_t table_pow2_, bitcount_t advance_pow2_,
+              typename baseclass_, typename extvalclass_, bool kdd_>
+    friend std::basic_ostream<CharT,Traits>&
+    operator<<(std::basic_ostream<CharT,Traits>& out,
+               const extended<table_pow2_, advance_pow2_,
+                              baseclass_, extvalclass_, kdd_>&);
+
+    template <typename CharT, typename Traits,
+              bitcount_t table_pow2_, bitcount_t advance_pow2_,
+              typename baseclass_, typename extvalclass_, bool kdd_>
+    friend std::basic_istream<CharT,Traits>&
+    operator>>(std::basic_istream<CharT,Traits>& in,
+               extended<table_pow2_, advance_pow2_,
+                        baseclass_, extvalclass_, kdd_>&);
+
+};
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::datainit(
+         const result_type* data)
+{
+    for (size_t i = 0; i < table_size; ++i)
+        data_[i] = data[i];
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::selfinit()
+{
+    // We need to fill the extended table with something, and we have
+    // very little provided data, so we use the base generator to
+    // produce values.  Although not ideal (use a seed sequence, folks!),
+    // unexpected correlations are mitigated by
+    //      - using XOR differences rather than the number directly
+    //      - the way the table is accessed, its values *won't* be accessed
+    //        in the same order the were written.
+    //      - any strange correlations would only be apparent if we
+    //        were to backstep the generator so that the base generator
+    //        was generating the same values again
+    result_type lhs = baseclass::operator()();
+    result_type rhs = baseclass::operator()();
+    result_type xdiff = lhs - rhs;
+    for (size_t i = 0; i < table_size; ++i) {
+        data_[i] = baseclass::operator()() ^ xdiff;
+    }
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+bool operator==(const extended<table_pow2, advance_pow2,
+                               baseclass, extvalclass, kdd>& lhs,
+                const extended<table_pow2, advance_pow2,
+                               baseclass, extvalclass, kdd>& rhs)
+{
+    auto& base_lhs = static_cast<const baseclass&>(lhs);
+    auto& base_rhs = static_cast<const baseclass&>(rhs);
+    return base_lhs == base_rhs
+        && std::equal(
+               std::begin(lhs.data_), std::end(lhs.data_),
+               std::begin(rhs.data_)
+           );
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+inline bool operator!=(const extended<table_pow2, advance_pow2,
+                                      baseclass, extvalclass, kdd>& lhs,
+                       const extended<table_pow2, advance_pow2,
+                                      baseclass, extvalclass, kdd>& rhs)
+{
+    return !operator==(lhs, rhs);
+}
+
+template <typename CharT, typename Traits,
+          bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+std::basic_ostream<CharT,Traits>&
+operator<<(std::basic_ostream<CharT,Traits>& out,
+           const extended<table_pow2, advance_pow2,
+                          baseclass, extvalclass, kdd>& rng)
+{
+    using pcg_extras::operator<<;
+
+    auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left);
+    auto space = out.widen(' ');
+    auto orig_fill = out.fill();
+
+    out << rng.multiplier() << space
+        << rng.increment() << space
+        << rng.state_;
+
+    for (const auto& datum : rng.data_)
+        out << space << datum;
+
+    out.flags(orig_flags);
+    out.fill(orig_fill);
+    return out;
+}
+
+template <typename CharT, typename Traits,
+          bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+std::basic_istream<CharT,Traits>&
+operator>>(std::basic_istream<CharT,Traits>& in,
+           extended<table_pow2, advance_pow2,
+                    baseclass, extvalclass, kdd>& rng)
+{
+    extended<table_pow2, advance_pow2, baseclass, extvalclass> new_rng;
+    auto& base_rng = static_cast<baseclass&>(new_rng);
+    in >> base_rng;
+
+    if (in.fail())
+        return in;
+
+    using pcg_extras::operator>>;
+
+    auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws);
+
+    for (auto& datum : new_rng.data_) {
+        in >> datum;
+        if (in.fail())
+            goto bail;
+    }
+
+    rng = new_rng;
+
+bail:
+    in.flags(orig_flags);
+    return in;
+}
+
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void
+extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance_table()
+{
+    bool carry = false;
+    for (size_t i = 0; i < table_size; ++i) {
+        if (carry) {
+            carry = insideout::external_step(data_[i],i+1);
+        }
+        bool carry2 = insideout::external_step(data_[i],i+1);
+        carry = carry || carry2;
+    }
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void
+extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance_table(
+        state_type delta, bool isForwards)
+{
+    typedef typename baseclass::state_type   base_state_t;
+    typedef typename extvalclass::state_type ext_state_t;
+    constexpr bitcount_t basebits = sizeof(base_state_t)*8;
+    constexpr bitcount_t extbits  = sizeof(ext_state_t)*8;
+    static_assert(basebits <= extbits || advance_pow2 > 0,
+                  "Current implementation might overflow its carry");
+
+    base_state_t carry = 0;
+    for (size_t i = 0; i < table_size; ++i) {
+        base_state_t total_delta = carry + delta;
+        ext_state_t  trunc_delta = ext_state_t(total_delta);
+        if (basebits > extbits) {
+            carry = total_delta >> extbits;
+        } else {
+            carry = 0;
+        }
+        carry +=
+            insideout::external_advance(data_[i],i+1, trunc_delta, isForwards);
+    }
+}
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename baseclass, typename extvalclass, bool kdd>
+void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance(
+    state_type distance, bool forwards)
+{
+    static_assert(kdd,
+        "Efficient advance is too hard for non-kdd extension. "
+        "For a weak advance, cast to base class");
+    state_type zero =
+        baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U);
+    if (may_tick) {
+        state_type ticks = distance >> (advance_pow2*may_tick);
+                                        // ^-- stupidity to appease GCC
+                                        // warnings
+        state_type adv_mask =
+            baseclass::is_mcg ? tick_mask << 2 : tick_mask;
+        state_type next_advance_distance = this->distance(zero, adv_mask);
+        if (!forwards)
+            next_advance_distance = (-next_advance_distance) & tick_mask;
+        if (next_advance_distance < (distance & tick_mask)) {
+            ++ticks;
+        }
+        if (ticks)
+            advance_table(ticks, forwards);
+    }
+    if (forwards) {
+        if (may_tock && this->distance(zero) <= distance)
+            advance_table();
+        baseclass::advance(distance);
+    } else {
+        if (may_tock && -(this->distance(zero)) <= distance)
+            advance_table(state_type(1U), false);
+        baseclass::advance(-distance);
+    }
+}
+
+} // namespace pcg_detail
+
+namespace pcg_engines {
+
+using namespace pcg_detail;
+
+/* Predefined types for XSH RS */
+
+typedef oneseq_base<uint8_t,  uint16_t, xsh_rs_mixin>  oneseq_xsh_rs_16_8;
+typedef oneseq_base<uint16_t, uint32_t, xsh_rs_mixin>  oneseq_xsh_rs_32_16;
+typedef oneseq_base<uint32_t, uint64_t, xsh_rs_mixin>  oneseq_xsh_rs_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rs_mixin>  oneseq_xsh_rs_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                       cm_oneseq_xsh_rs_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, xsh_rs_mixin>  unique_xsh_rs_16_8;
+typedef unique_base<uint16_t, uint32_t, xsh_rs_mixin>  unique_xsh_rs_32_16;
+typedef unique_base<uint32_t, uint64_t, xsh_rs_mixin>  unique_xsh_rs_64_32;
+typedef unique_base<uint64_t, pcg128_t, xsh_rs_mixin>  unique_xsh_rs_128_64;
+typedef unique_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                       cm_unique_xsh_rs_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, xsh_rs_mixin>  setseq_xsh_rs_16_8;
+typedef setseq_base<uint16_t, uint32_t, xsh_rs_mixin>  setseq_xsh_rs_32_16;
+typedef setseq_base<uint32_t, uint64_t, xsh_rs_mixin>  setseq_xsh_rs_64_32;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rs_mixin>  setseq_xsh_rs_128_64;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                       cm_setseq_xsh_rs_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, xsh_rs_mixin>  mcg_xsh_rs_16_8;
+typedef mcg_base<uint16_t, uint32_t, xsh_rs_mixin>  mcg_xsh_rs_32_16;
+typedef mcg_base<uint32_t, uint64_t, xsh_rs_mixin>  mcg_xsh_rs_64_32;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rs_mixin>  mcg_xsh_rs_128_64;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rs_mixin, true, cheap_multiplier>
+                                                    cm_mcg_xsh_rs_128_64;
+
+/* Predefined types for XSH RR */
+
+typedef oneseq_base<uint8_t,  uint16_t, xsh_rr_mixin>  oneseq_xsh_rr_16_8;
+typedef oneseq_base<uint16_t, uint32_t, xsh_rr_mixin>  oneseq_xsh_rr_32_16;
+typedef oneseq_base<uint32_t, uint64_t, xsh_rr_mixin>  oneseq_xsh_rr_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rr_mixin>  oneseq_xsh_rr_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                       cm_oneseq_xsh_rr_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, xsh_rr_mixin>  unique_xsh_rr_16_8;
+typedef unique_base<uint16_t, uint32_t, xsh_rr_mixin>  unique_xsh_rr_32_16;
+typedef unique_base<uint32_t, uint64_t, xsh_rr_mixin>  unique_xsh_rr_64_32;
+typedef unique_base<uint64_t, pcg128_t, xsh_rr_mixin>  unique_xsh_rr_128_64;
+typedef unique_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                       cm_unique_xsh_rr_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, xsh_rr_mixin>  setseq_xsh_rr_16_8;
+typedef setseq_base<uint16_t, uint32_t, xsh_rr_mixin>  setseq_xsh_rr_32_16;
+typedef setseq_base<uint32_t, uint64_t, xsh_rr_mixin>  setseq_xsh_rr_64_32;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rr_mixin>  setseq_xsh_rr_128_64;
+typedef setseq_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                       cm_setseq_xsh_rr_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, xsh_rr_mixin>  mcg_xsh_rr_16_8;
+typedef mcg_base<uint16_t, uint32_t, xsh_rr_mixin>  mcg_xsh_rr_32_16;
+typedef mcg_base<uint32_t, uint64_t, xsh_rr_mixin>  mcg_xsh_rr_64_32;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rr_mixin>  mcg_xsh_rr_128_64;
+typedef mcg_base<uint64_t, pcg128_t, xsh_rr_mixin, true, cheap_multiplier>
+                                                    cm_mcg_xsh_rr_128_64;
+
+
+/* Predefined types for RXS M XS */
+
+typedef oneseq_base<uint8_t,  uint8_t, rxs_m_xs_mixin>   oneseq_rxs_m_xs_8_8;
+typedef oneseq_base<uint16_t, uint16_t, rxs_m_xs_mixin>  oneseq_rxs_m_xs_16_16;
+typedef oneseq_base<uint32_t, uint32_t, rxs_m_xs_mixin>  oneseq_rxs_m_xs_32_32;
+typedef oneseq_base<uint64_t, uint64_t, rxs_m_xs_mixin>  oneseq_rxs_m_xs_64_64;
+typedef oneseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin>
+                                                        oneseq_rxs_m_xs_128_128;
+typedef oneseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin, true, cheap_multiplier>
+                                                     cm_oneseq_rxs_m_xs_128_128;
+
+typedef unique_base<uint8_t,  uint8_t, rxs_m_xs_mixin>  unique_rxs_m_xs_8_8;
+typedef unique_base<uint16_t, uint16_t, rxs_m_xs_mixin> unique_rxs_m_xs_16_16;
+typedef unique_base<uint32_t, uint32_t, rxs_m_xs_mixin> unique_rxs_m_xs_32_32;
+typedef unique_base<uint64_t, uint64_t, rxs_m_xs_mixin> unique_rxs_m_xs_64_64;
+typedef unique_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> unique_rxs_m_xs_128_128;
+typedef unique_base<pcg128_t, pcg128_t, rxs_m_xs_mixin, true, cheap_multiplier>
+                                                     cm_unique_rxs_m_xs_128_128;
+
+typedef setseq_base<uint8_t,  uint8_t, rxs_m_xs_mixin>  setseq_rxs_m_xs_8_8;
+typedef setseq_base<uint16_t, uint16_t, rxs_m_xs_mixin> setseq_rxs_m_xs_16_16;
+typedef setseq_base<uint32_t, uint32_t, rxs_m_xs_mixin> setseq_rxs_m_xs_32_32;
+typedef setseq_base<uint64_t, uint64_t, rxs_m_xs_mixin> setseq_rxs_m_xs_64_64;
+typedef setseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> setseq_rxs_m_xs_128_128;
+typedef setseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin, true, cheap_multiplier>
+                                                     cm_setseq_rxs_m_xs_128_128;
+
+                // MCG versions don't make sense here, so aren't defined.
+
+/* Predefined types for RXS M */
+
+typedef oneseq_base<uint8_t,  uint16_t, rxs_m_mixin>  oneseq_rxs_m_16_8;
+typedef oneseq_base<uint16_t, uint32_t, rxs_m_mixin>  oneseq_rxs_m_32_16;
+typedef oneseq_base<uint32_t, uint64_t, rxs_m_mixin>  oneseq_rxs_m_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, rxs_m_mixin>  oneseq_rxs_m_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                      cm_oneseq_rxs_m_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, rxs_m_mixin>  unique_rxs_m_16_8;
+typedef unique_base<uint16_t, uint32_t, rxs_m_mixin>  unique_rxs_m_32_16;
+typedef unique_base<uint32_t, uint64_t, rxs_m_mixin>  unique_rxs_m_64_32;
+typedef unique_base<uint64_t, pcg128_t, rxs_m_mixin>  unique_rxs_m_128_64;
+typedef unique_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                      cm_unique_rxs_m_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, rxs_m_mixin>  setseq_rxs_m_16_8;
+typedef setseq_base<uint16_t, uint32_t, rxs_m_mixin>  setseq_rxs_m_32_16;
+typedef setseq_base<uint32_t, uint64_t, rxs_m_mixin>  setseq_rxs_m_64_32;
+typedef setseq_base<uint64_t, pcg128_t, rxs_m_mixin>  setseq_rxs_m_128_64;
+typedef setseq_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                      cm_setseq_rxs_m_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, rxs_m_mixin>  mcg_rxs_m_16_8;
+typedef mcg_base<uint16_t, uint32_t, rxs_m_mixin>  mcg_rxs_m_32_16;
+typedef mcg_base<uint32_t, uint64_t, rxs_m_mixin>  mcg_rxs_m_64_32;
+typedef mcg_base<uint64_t, pcg128_t, rxs_m_mixin>  mcg_rxs_m_128_64;
+typedef mcg_base<uint64_t, pcg128_t, rxs_m_mixin, true, cheap_multiplier>
+                                                   cm_mcg_rxs_m_128_64;
+
+/* Predefined types for DXSM */
+
+typedef oneseq_base<uint8_t,  uint16_t, dxsm_mixin>  oneseq_dxsm_16_8;
+typedef oneseq_base<uint16_t, uint32_t, dxsm_mixin>  oneseq_dxsm_32_16;
+typedef oneseq_base<uint32_t, uint64_t, dxsm_mixin>  oneseq_dxsm_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, dxsm_mixin>  oneseq_dxsm_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                     cm_oneseq_dxsm_128_64;
+
+typedef unique_base<uint8_t,  uint16_t, dxsm_mixin>  unique_dxsm_16_8;
+typedef unique_base<uint16_t, uint32_t, dxsm_mixin>  unique_dxsm_32_16;
+typedef unique_base<uint32_t, uint64_t, dxsm_mixin>  unique_dxsm_64_32;
+typedef unique_base<uint64_t, pcg128_t, dxsm_mixin>  unique_dxsm_128_64;
+typedef unique_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                     cm_unique_dxsm_128_64;
+
+typedef setseq_base<uint8_t,  uint16_t, dxsm_mixin>  setseq_dxsm_16_8;
+typedef setseq_base<uint16_t, uint32_t, dxsm_mixin>  setseq_dxsm_32_16;
+typedef setseq_base<uint32_t, uint64_t, dxsm_mixin>  setseq_dxsm_64_32;
+typedef setseq_base<uint64_t, pcg128_t, dxsm_mixin>  setseq_dxsm_128_64;
+typedef setseq_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                     cm_setseq_dxsm_128_64;
+
+typedef mcg_base<uint8_t,  uint16_t, dxsm_mixin>  mcg_dxsm_16_8;
+typedef mcg_base<uint16_t, uint32_t, dxsm_mixin>  mcg_dxsm_32_16;
+typedef mcg_base<uint32_t, uint64_t, dxsm_mixin>  mcg_dxsm_64_32;
+typedef mcg_base<uint64_t, pcg128_t, dxsm_mixin>  mcg_dxsm_128_64;
+typedef mcg_base<uint64_t, pcg128_t, dxsm_mixin, true, cheap_multiplier>
+                                                  cm_mcg_dxsm_128_64;
+
+/* Predefined types for XSL RR (only defined for "large" types) */
+
+typedef oneseq_base<uint32_t, uint64_t, xsl_rr_mixin>  oneseq_xsl_rr_64_32;
+typedef oneseq_base<uint64_t, pcg128_t, xsl_rr_mixin>  oneseq_xsl_rr_128_64;
+typedef oneseq_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                       cm_oneseq_xsl_rr_128_64;
+
+typedef unique_base<uint32_t, uint64_t, xsl_rr_mixin>  unique_xsl_rr_64_32;
+typedef unique_base<uint64_t, pcg128_t, xsl_rr_mixin>  unique_xsl_rr_128_64;
+typedef unique_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                       cm_unique_xsl_rr_128_64;
+
+typedef setseq_base<uint32_t, uint64_t, xsl_rr_mixin>  setseq_xsl_rr_64_32;
+typedef setseq_base<uint64_t, pcg128_t, xsl_rr_mixin>  setseq_xsl_rr_128_64;
+typedef setseq_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                       cm_setseq_xsl_rr_128_64;
+
+typedef mcg_base<uint32_t, uint64_t, xsl_rr_mixin>  mcg_xsl_rr_64_32;
+typedef mcg_base<uint64_t, pcg128_t, xsl_rr_mixin>  mcg_xsl_rr_128_64;
+typedef mcg_base<uint64_t, pcg128_t, xsl_rr_mixin, true, cheap_multiplier>
+                                                    cm_mcg_xsl_rr_128_64;
+
+
+/* Predefined types for XSL RR RR (only defined for "large" types) */
+
+typedef oneseq_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
+    oneseq_xsl_rr_rr_64_64;
+typedef oneseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
+    oneseq_xsl_rr_rr_128_128;
+typedef oneseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin, true, cheap_multiplier>
+    cm_oneseq_xsl_rr_rr_128_128;
+
+typedef unique_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
+    unique_xsl_rr_rr_64_64;
+typedef unique_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
+    unique_xsl_rr_rr_128_128;
+typedef unique_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin, true, cheap_multiplier>
+    cm_unique_xsl_rr_rr_128_128;
+
+typedef setseq_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
+    setseq_xsl_rr_rr_64_64;
+typedef setseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
+    setseq_xsl_rr_rr_128_128;
+typedef setseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin, true, cheap_multiplier>
+    cm_setseq_xsl_rr_rr_128_128;
+
+                // MCG versions don't make sense here, so aren't defined.
+
+/* Extended generators */
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std8 = extended<table_pow2, advance_pow2, BaseRNG,
+                          oneseq_rxs_m_xs_8_8, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std16 = extended<table_pow2, advance_pow2, BaseRNG,
+                           oneseq_rxs_m_xs_16_16, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std32 = extended<table_pow2, advance_pow2, BaseRNG,
+                           oneseq_rxs_m_xs_32_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2,
+          typename BaseRNG, bool kdd = true>
+using ext_std64 = extended<table_pow2, advance_pow2, BaseRNG,
+                           oneseq_rxs_m_xs_64_64, kdd>;
+
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_oneseq_rxs_m_xs_32_32 =
+          ext_std32<table_pow2, advance_pow2, oneseq_rxs_m_xs_32_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_mcg_xsh_rs_64_32 =
+          ext_std32<table_pow2, advance_pow2, mcg_xsh_rs_64_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_oneseq_xsh_rs_64_32 =
+          ext_std32<table_pow2, advance_pow2, oneseq_xsh_rs_64_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_setseq_xsh_rr_64_32 =
+          ext_std32<table_pow2, advance_pow2, setseq_xsh_rr_64_32, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_mcg_xsl_rr_128_64 =
+          ext_std64<table_pow2, advance_pow2, mcg_xsl_rr_128_64, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_oneseq_xsl_rr_128_64 =
+          ext_std64<table_pow2, advance_pow2, oneseq_xsl_rr_128_64, kdd>;
+
+template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
+using ext_setseq_xsl_rr_128_64 =
+          ext_std64<table_pow2, advance_pow2, setseq_xsl_rr_128_64, kdd>;
+
+} // namespace pcg_engines
+
+typedef pcg_engines::setseq_xsh_rr_64_32        pcg32;
+typedef pcg_engines::oneseq_xsh_rr_64_32        pcg32_oneseq;
+typedef pcg_engines::unique_xsh_rr_64_32        pcg32_unique;
+typedef pcg_engines::mcg_xsh_rs_64_32           pcg32_fast;
+
+typedef pcg_engines::setseq_xsl_rr_128_64       pcg64;
+typedef pcg_engines::oneseq_xsl_rr_128_64       pcg64_oneseq;
+typedef pcg_engines::unique_xsl_rr_128_64       pcg64_unique;
+typedef pcg_engines::mcg_xsl_rr_128_64          pcg64_fast;
+
+typedef pcg_engines::setseq_rxs_m_xs_8_8        pcg8_once_insecure;
+typedef pcg_engines::setseq_rxs_m_xs_16_16      pcg16_once_insecure;
+typedef pcg_engines::setseq_rxs_m_xs_32_32      pcg32_once_insecure;
+typedef pcg_engines::setseq_rxs_m_xs_64_64      pcg64_once_insecure;
+typedef pcg_engines::setseq_xsl_rr_rr_128_128   pcg128_once_insecure;
+
+typedef pcg_engines::oneseq_rxs_m_xs_8_8        pcg8_oneseq_once_insecure;
+typedef pcg_engines::oneseq_rxs_m_xs_16_16      pcg16_oneseq_once_insecure;
+typedef pcg_engines::oneseq_rxs_m_xs_32_32      pcg32_oneseq_once_insecure;
+typedef pcg_engines::oneseq_rxs_m_xs_64_64      pcg64_oneseq_once_insecure;
+typedef pcg_engines::oneseq_xsl_rr_rr_128_128   pcg128_oneseq_once_insecure;
+
+
+// These two extended RNGs provide two-dimensionally equidistributed
+// 32-bit generators.  pcg32_k2_fast occupies the same space as pcg64,
+// and can be called twice to generate 64 bits, but does not required
+// 128-bit math; on 32-bit systems, it's faster than pcg64 as well.
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<1,16,true>     pcg32_k2;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1,32,true>     pcg32_k2_fast;
+
+// These eight extended RNGs have about as much state as arc4random
+//
+//  - the k variants are k-dimensionally equidistributed
+//  - the c variants offer better crypographic security
+//
+// (just how good the cryptographic security is is an open question)
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true>     pcg32_k64;
+typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true>        pcg32_k64_oneseq;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true>     pcg32_k64_fast;
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false>    pcg32_c64;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false>    pcg32_c64_oneseq;
+typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false>       pcg32_c64_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true>    pcg64_k32;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true>   pcg64_k32_oneseq;
+typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true>      pcg64_k32_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false>   pcg64_c32;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false>  pcg64_c32_oneseq;
+typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false>     pcg64_c32_fast;
+
+// These eight extended RNGs have more state than the Mersenne twister
+//
+//  - the k variants are k-dimensionally equidistributed
+//  - the c variants offer better crypographic security
+//
+// (just how good the cryptographic security is is an open question)
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true>    pcg32_k1024;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true>    pcg32_k1024_fast;
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false>   pcg32_c1024;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false>   pcg32_c1024_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true>   pcg64_k1024;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true>  pcg64_k1024_fast;
+
+typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false>  pcg64_c1024;
+typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast;
+
+// These generators have an insanely huge period (2^524352), and is suitable
+// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary
+// point in the future.   [Actually, over the full period of the generator, it
+// will produce every 64 KB ZIP file 2^64 times!]
+
+typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true>    pcg32_k16384;
+typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true>    pcg32_k16384_fast;
+
+} // namespace arrow_vendored
+
+#ifdef _MSC_VER
+    #pragma warning(default:4146)
+#endif
+
+#endif // PCG_RAND_HPP_INCLUDED
diff --git a/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp b/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
new file mode 100644
index 00000000000..0181e69e4ef
--- /dev/null
+++ b/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
@@ -0,0 +1,1008 @@
+/*
+ * PCG Random Number Generation for C++
+ *
+ * Copyright 2014-2021 Melissa O'Neill <oneill@pcg-random.org>,
+ *                     and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied.  See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/*
+ * This code provides a a C++ class that can provide 128-bit (or higher)
+ * integers.  To produce 2K-bit integers, it uses two K-bit integers,
+ * placed in a union that allowes the code to also see them as four K/2 bit
+ * integers (and access them either directly name, or by index).
+ *
+ * It may seem like we're reinventing the wheel here, because several
+ * libraries already exist that support large integers, but most existing
+ * libraries provide a very generic multiprecision code, but here we're
+ * operating at a fixed size.  Also, most other libraries are fairly
+ * heavyweight.  So we use a direct implementation.  Sadly, it's much slower
+ * than hand-coded assembly or direct CPU support.
+ */
+
+#ifndef PCG_UINT128_HPP_INCLUDED
+#define PCG_UINT128_HPP_INCLUDED 1
+
+#include <cstdint>
+#include <cstdio>
+#include <cassert>
+#include <climits>
+#include <utility>
+#include <initializer_list>
+#include <type_traits>
+
+#if defined(_MSC_VER)  // Use MSVC++ intrinsics
+#include <intrin.h>
+#endif
+
+/*
+ * We want to lay the type out the same way that a native type would be laid
+ * out, which means we must know the machine's endian, at compile time.
+ * This ugliness attempts to do so.
+ */
+
+#ifndef PCG_LITTLE_ENDIAN
+    #if defined(__BYTE_ORDER__)
+        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+            #define PCG_LITTLE_ENDIAN 1
+        #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+            #define PCG_LITTLE_ENDIAN 0
+        #else
+            #error __BYTE_ORDER__ does not match a standard endian, pick a side
+        #endif
+    #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
+        #define PCG_LITTLE_ENDIAN 1
+    #elif __BIG_ENDIAN__ || _BIG_ENDIAN
+        #define PCG_LITTLE_ENDIAN 0
+    #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
+        #define PCG_LITTLE_ENDIAN 1
+    #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
+          || __m68k__ || __mc68000__
+        #define PCG_LITTLE_ENDIAN 0
+    #else
+        #error Unable to determine target endianness
+    #endif
+#endif
+
+#if INTPTR_MAX == INT64_MAX && !defined(PCG_64BIT_SPECIALIZATIONS)
+    #define PCG_64BIT_SPECIALIZATIONS 1
+#endif
+
+namespace arrow_vendored {
+namespace pcg_extras {
+
+// Recent versions of GCC have intrinsics we can use to quickly calculate
+// the number of leading and trailing zeros in a number.  If possible, we
+// use them, otherwise we fall back to old-fashioned bit twiddling to figure
+// them out.
+
+#ifndef PCG_BITCOUNT_T
+    typedef uint8_t bitcount_t;
+#else
+    typedef PCG_BITCOUNT_T bitcount_t;
+#endif
+
+/*
+ * Provide some useful helper functions
+ *      * flog2                 floor(log2(x))
+ *      * trailingzeros         number of trailing zero bits
+ */
+
+#if defined(__GNUC__)   // Any GNU-compatible compiler supporting C++11 has
+                        // some useful intrinsics we can use.
+
+inline bitcount_t flog2(uint32_t v)
+{
+    return 31 - __builtin_clz(v);
+}
+
+inline bitcount_t trailingzeros(uint32_t v)
+{
+    return __builtin_ctz(v);
+}
+
+inline bitcount_t flog2(uint64_t v)
+{
+#if UINT64_MAX == ULONG_MAX
+    return 63 - __builtin_clzl(v);
+#elif UINT64_MAX == ULLONG_MAX
+    return 63 - __builtin_clzll(v);
+#else
+    #error Cannot find a function for uint64_t
+#endif
+}
+
+inline bitcount_t trailingzeros(uint64_t v)
+{
+#if UINT64_MAX == ULONG_MAX
+    return __builtin_ctzl(v);
+#elif UINT64_MAX == ULLONG_MAX
+    return __builtin_ctzll(v);
+#else
+    #error Cannot find a function for uint64_t
+#endif
+}
+
+#elif defined(_MSC_VER)  // Use MSVC++ intrinsics
+
+#pragma intrinsic(_BitScanReverse, _BitScanForward)
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+#pragma intrinsic(_BitScanReverse64, _BitScanForward64)
+#endif
+
+inline bitcount_t flog2(uint32_t v)
+{
+    unsigned long i;
+    _BitScanReverse(&i, v);
+    return bitcount_t(i);
+}
+
+inline bitcount_t trailingzeros(uint32_t v)
+{
+    unsigned long i;
+    _BitScanForward(&i, v);
+    return bitcount_t(i);
+}
+
+inline bitcount_t flog2(uint64_t v)
+{
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+    unsigned long i;
+    _BitScanReverse64(&i, v);
+    return bitcount_t(i);
+#else
+    // 32-bit x86
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+    return high ? 32+flog2(high) : flog2(low);
+#endif
+}
+
+inline bitcount_t trailingzeros(uint64_t v)
+{
+#if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
+    unsigned long i;
+    _BitScanForward64(&i, v);
+    return bitcount_t(i);
+#else
+    // 32-bit x86
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+    return low ? trailingzeros(low) : trailingzeros(high)+32;
+#endif
+}
+
+#else                   // Otherwise, we fall back to bit twiddling
+                        // implementations
+
+inline bitcount_t flog2(uint32_t v)
+{
+    // Based on code by Eric Cole and Mark Dickinson, which appears at
+    // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
+
+    static const uint8_t multiplyDeBruijnBitPos[32] = {
+      0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+      8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
+    };
+
+    v |= v >> 1; // first round down to one less than a power of 2
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+
+    return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
+}
+
+inline bitcount_t trailingzeros(uint32_t v)
+{
+    static const uint8_t multiplyDeBruijnBitPos[32] = {
+      0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+      31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+
+    return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
+}
+
+inline bitcount_t flog2(uint64_t v)
+{
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+
+    return high ? 32+flog2(high) : flog2(low);
+}
+
+inline bitcount_t trailingzeros(uint64_t v)
+{
+    uint32_t high = v >> 32;
+    uint32_t low  = uint32_t(v);
+
+    return low ? trailingzeros(low) : trailingzeros(high)+32;
+}
+
+#endif
+
+inline bitcount_t flog2(uint8_t v)
+{
+    return flog2(uint32_t(v));
+}
+
+inline bitcount_t flog2(uint16_t v)
+{
+    return flog2(uint32_t(v));
+}
+
+#if __SIZEOF_INT128__
+inline bitcount_t flog2(__uint128_t v)
+{
+    uint64_t high = uint64_t(v >> 64);
+    uint64_t low  = uint64_t(v);
+
+    return high ? 64+flog2(high) : flog2(low);
+}
+#endif
+
+inline bitcount_t trailingzeros(uint8_t v)
+{
+    return trailingzeros(uint32_t(v));
+}
+
+inline bitcount_t trailingzeros(uint16_t v)
+{
+    return trailingzeros(uint32_t(v));
+}
+
+#if __SIZEOF_INT128__
+inline bitcount_t trailingzeros(__uint128_t v)
+{
+    uint64_t high = uint64_t(v >> 64);
+    uint64_t low  = uint64_t(v);
+    return low ? trailingzeros(low) : trailingzeros(high)+64;
+}
+#endif
+
+template <typename UInt>
+inline bitcount_t clog2(UInt v)
+{
+    return flog2(v) + ((v & (-v)) != v);
+}
+
+template <typename UInt>
+inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
+{
+    UInt half_result = y + carryin;
+    UInt result = x + half_result;
+    *carryout = (half_result < y) || (result < x);
+    return result;
+}
+
+template <typename UInt>
+inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
+{
+    UInt half_result = y + carryin;
+    UInt result = x - half_result;
+    *carryout = (half_result < y) || (result > x);
+    return result;
+}
+
+
+template <typename UInt, typename UIntX2>
+class uint_x4 {
+// private:
+    static constexpr unsigned int UINT_BITS = sizeof(UInt) * CHAR_BIT;
+public:
+    union {
+#if PCG_LITTLE_ENDIAN
+        struct {
+            UInt v0, v1, v2, v3;
+        } w;
+        struct {
+            UIntX2 v01, v23;
+        } d;
+#else
+        struct {
+            UInt v3, v2, v1, v0;
+        } w;
+        struct {
+            UIntX2 v23, v01;
+        } d;
+#endif
+        // For the array access versions, the code that uses the array
+        // must handle endian itself.  Yuck.
+        UInt wa[4];
+    };
+
+public:
+    uint_x4() = default;
+
+    constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
+#if PCG_LITTLE_ENDIAN
+       : w{v0, v1, v2, v3}
+#else
+       : w{v3, v2, v1, v0}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    constexpr uint_x4(UIntX2 v23, UIntX2 v01)
+#if PCG_LITTLE_ENDIAN
+       : d{v01,v23}
+#else
+       : d{v23,v01}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    constexpr uint_x4(UIntX2 v01)
+#if PCG_LITTLE_ENDIAN
+       : d{v01, UIntX2(0)}
+#else
+       : d{UIntX2(0),v01}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    template<class Integral,
+             typename std::enable_if<(std::is_integral<Integral>::value
+                                      && sizeof(Integral) <= sizeof(UIntX2))
+                                    >::type* = nullptr>
+    constexpr uint_x4(Integral v01)
+#if PCG_LITTLE_ENDIAN
+       : d{UIntX2(v01), UIntX2(0)}
+#else
+       : d{UIntX2(0), UIntX2(v01)}
+#endif
+    {
+        // Nothing (else) to do
+    }
+
+    explicit constexpr operator UIntX2() const
+    {
+        return d.v01;
+    }
+
+    template<class Integral,
+             typename std::enable_if<(std::is_integral<Integral>::value
+                                      && sizeof(Integral) <= sizeof(UIntX2))
+                                    >::type* = nullptr>
+    explicit constexpr operator Integral() const
+    {
+        return Integral(d.v01);
+    }
+
+    explicit constexpr operator bool() const
+    {
+        return d.v01 || d.v23;
+    }
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator*(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator*(const uint_x4<U,V>&, V);
+
+    template<typename U, typename V>
+    friend std::pair< uint_x4<U,V>,uint_x4<U,V> >
+        divmod(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator+(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator-(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator<<(const uint_x4<U,V>&, const bitcount_t shift);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator>>(const uint_x4<U,V>&, const bitcount_t shift);
+
+#if PCG_64BIT_SPECIALIZATIONS
+    template<typename U>
+    friend uint_x4<U,uint64_t> operator<<(const uint_x4<U,uint64_t>&, const bitcount_t shift);
+
+    template<typename U>
+    friend uint_x4<U,uint64_t> operator>>(const uint_x4<U,uint64_t>&, const bitcount_t shift);
+#endif
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator&(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator|(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator^(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator==(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator!=(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator<(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator<=(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator>(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bool operator>=(const uint_x4<U,V>&, const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator~(const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend uint_x4<U,V> operator-(const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bitcount_t flog2(const uint_x4<U,V>&);
+
+    template<typename U, typename V>
+    friend bitcount_t trailingzeros(const uint_x4<U,V>&);
+
+#if PCG_64BIT_SPECIALIZATIONS
+    template<typename U>
+    friend bitcount_t flog2(const uint_x4<U,uint64_t>&);
+
+    template<typename U>
+    friend bitcount_t trailingzeros(const uint_x4<U,uint64_t>&);
+#endif
+
+    uint_x4& operator*=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this * rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator*=(UIntX2 rhs)
+    {
+        uint_x4 result = *this * rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator/=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this / rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator%=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this % rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator+=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this + rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator-=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this - rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator&=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this & rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator|=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this | rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator^=(const uint_x4& rhs)
+    {
+        uint_x4 result = *this ^ rhs;
+        return *this = result;
+    }
+
+    uint_x4& operator>>=(bitcount_t shift)
+    {
+        uint_x4 result = *this >> shift;
+        return *this = result;
+    }
+
+    uint_x4& operator<<=(bitcount_t shift)
+    {
+        uint_x4 result = *this << shift;
+        return *this = result;
+    }
+
+};
+
+template<typename U, typename V>
+bitcount_t flog2(const uint_x4<U,V>& v)
+{
+#if PCG_LITTLE_ENDIAN
+    for (uint8_t i = 4; i !=0; /* dec in loop */) {
+        --i;
+#else
+    for (uint8_t i = 0; i < 4; ++i) {
+#endif
+        if (v.wa[i] == 0)
+             continue;
+        return flog2(v.wa[i]) + uint_x4<U,V>::UINT_BITS*i;
+    }
+    abort();
+}
+
+template<typename U, typename V>
+bitcount_t trailingzeros(const uint_x4<U,V>& v)
+{
+#if PCG_LITTLE_ENDIAN
+    for (uint8_t i = 0; i < 4; ++i) {
+#else
+    for (uint8_t i = 4; i !=0; /* dec in loop */) {
+        --i;
+#endif
+        if (v.wa[i] != 0)
+            return trailingzeros(v.wa[i]) + uint_x4<U,V>::UINT_BITS*i;
+    }
+    return uint_x4<U,V>::UINT_BITS*4;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+template<typename UInt32>
+bitcount_t flog2(const uint_x4<UInt32,uint64_t>& v)
+{
+    return v.d.v23 > 0 ? flog2(v.d.v23) + uint_x4<UInt32,uint64_t>::UINT_BITS*2
+                       : flog2(v.d.v01);
+}
+
+template<typename UInt32>
+bitcount_t trailingzeros(const uint_x4<UInt32,uint64_t>& v)
+{
+    return v.d.v01 == 0 ? trailingzeros(v.d.v23) + uint_x4<UInt32,uint64_t>::UINT_BITS*2
+                        : trailingzeros(v.d.v01);
+}
+#endif
+
+template <typename UInt, typename UIntX2>
+std::pair< uint_x4<UInt,UIntX2>, uint_x4<UInt,UIntX2> >
+    divmod(const uint_x4<UInt,UIntX2>& orig_dividend,
+           const uint_x4<UInt,UIntX2>& divisor)
+{
+    // If the dividend is less than the divisor, the answer is always zero.
+    // This takes care of boundary cases like 0/x (which would otherwise be
+    // problematic because we can't take the log of zero.  (The boundary case
+    // of division by zero is undefined.)
+    if (orig_dividend < divisor)
+        return { uint_x4<UInt,UIntX2>(UIntX2(0)), orig_dividend };
+
+    auto dividend = orig_dividend;
+
+    auto log2_divisor  = flog2(divisor);
+    auto log2_dividend = flog2(dividend);
+    // assert(log2_dividend >= log2_divisor);
+    bitcount_t logdiff = log2_dividend - log2_divisor;
+
+    constexpr uint_x4<UInt,UIntX2> ONE(UIntX2(1));
+    if (logdiff == 0)
+        return { ONE, dividend - divisor };
+
+    // Now we change the log difference to
+    //  floor(log2(divisor)) - ceil(log2(dividend))
+    // to ensure that we *underestimate* the result.
+    logdiff -= 1;
+
+    uint_x4<UInt,UIntX2> quotient(UIntX2(0));
+
+    auto qfactor = ONE << logdiff;
+    auto factor  = divisor << logdiff;
+
+    do {
+        dividend -= factor;
+        quotient += qfactor;
+        while (dividend < factor) {
+            factor  >>= 1;
+            qfactor >>= 1;
+        }
+    } while (dividend >= divisor);
+
+    return { quotient, dividend };
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator/(const uint_x4<UInt,UIntX2>& dividend,
+                               const uint_x4<UInt,UIntX2>& divisor)
+{
+    return divmod(dividend, divisor).first;
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator%(const uint_x4<UInt,UIntX2>& dividend,
+                               const uint_x4<UInt,UIntX2>& divisor)
+{
+    return divmod(dividend, divisor).second;
+}
+
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    constexpr auto UINT_BITS = uint_x4<UInt,UIntX2>::UINT_BITS;
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    bool carryin = false;
+    bool carryout;
+    UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0);
+    r.w.v0 = UInt(a0b0);
+    r.w.v1 = UInt(a0b0 >> UINT_BITS);
+
+    UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0);
+    r.w.v2 = UInt(a1b0 >> UINT_BITS);
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    carryin = false;
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout);
+
+    r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01;
+
+    return r;
+}
+
+ 
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
+                               UIntX2 b01)
+{
+    constexpr auto UINT_BITS = uint_x4<UInt,UIntX2>::UINT_BITS;
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    bool carryin = false;
+    bool carryout;
+    UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(UInt(b01));
+    r.w.v0 = UInt(a0b0);
+    r.w.v1 = UInt(a0b0 >> UINT_BITS);
+
+    UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(UInt(b01));
+    r.w.v2 = UInt(a1b0 >> UINT_BITS);
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b01 >> UINT_BITS);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> UINT_BITS), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    carryin = false;
+    r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
+
+    UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b01 >> UINT_BITS);
+    carryin = false;
+    r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> UINT_BITS), carryin, &carryout);
+
+    r.d.v23 += a.d.v23 * b01;
+
+    return r;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+#if defined(_MSC_VER)
+#pragma intrinsic(_umul128)
+#endif
+
+#if defined(_MSC_VER) || __SIZEOF_INT128__
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator*(const uint_x4<UInt32,uint64_t>& a,
+				   const uint_x4<UInt32,uint64_t>& b)
+{
+#if defined(_MSC_VER)
+    uint64_t hi;
+    uint64_t lo = _umul128(a.d.v01, b.d.v01, &hi);
+#else
+    __uint128_t r = __uint128_t(a.d.v01) * __uint128_t(b.d.v01);
+    uint64_t lo = uint64_t(r);
+    uint64_t hi = r >> 64;
+#endif
+    hi += a.d.v23 * b.d.v01 + a.d.v01 * b.d.v23;
+    return {hi, lo};
+}
+#endif
+#endif
+
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator+(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+
+    bool carryin = false;
+    bool carryout;
+    r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
+    carryin = carryout;
+    r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
+
+    return r;
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+
+    bool carryin = false;
+    bool carryout;
+    r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
+    carryin = carryout;
+    r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
+    carryin = carryout;
+    r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
+    carryin = carryout;
+    r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
+
+    return r;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator+(const uint_x4<UInt32,uint64_t>& a,
+				   const uint_x4<UInt32,uint64_t>& b)
+{
+    uint_x4<UInt32,uint64_t> r = {uint64_t(0u), uint64_t(0u)};
+
+    bool carryin = false;
+    bool carryout;
+    r.d.v01 = addwithcarry(a.d.v01, b.d.v01, carryin, &carryout);
+    carryin = carryout;
+    r.d.v23 = addwithcarry(a.d.v23, b.d.v23, carryin, &carryout);
+
+    return r;
+}
+
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator-(const uint_x4<UInt32,uint64_t>& a,
+				   const uint_x4<UInt32,uint64_t>& b)
+{
+    uint_x4<UInt32,uint64_t> r = {uint64_t(0u), uint64_t(0u)};
+
+    bool carryin = false;
+    bool carryout;
+    r.d.v01 = subwithcarry(a.d.v01, b.d.v01, carryin, &carryout);
+    carryin = carryout;
+    r.d.v23 = subwithcarry(a.d.v23, b.d.v23, carryin, &carryout);
+
+    return r;
+}
+#endif
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator&(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    return uint_x4<UInt,UIntX2>(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator|(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    return uint_x4<UInt,UIntX2>(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator^(const uint_x4<UInt,UIntX2>& a,
+                               const uint_x4<UInt,UIntX2>& b)
+{
+    return uint_x4<UInt,UIntX2>(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator~(const uint_x4<UInt,UIntX2>& v)
+{
+    return uint_x4<UInt,UIntX2>(~v.d.v23, ~v.d.v01);
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& v)
+{
+    return uint_x4<UInt,UIntX2>(0UL,0UL) - v;
+}
+
+template <typename UInt, typename UIntX2>
+bool operator==(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23);
+}
+
+template <typename UInt, typename UIntX2>
+bool operator!=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return !operator==(a,b);
+}
+
+
+template <typename UInt, typename UIntX2>
+bool operator<(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return (a.d.v23 < b.d.v23)
+           || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01));
+}
+
+template <typename UInt, typename UIntX2>
+bool operator>(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return operator<(b,a);
+}
+
+template <typename UInt, typename UIntX2>
+bool operator<=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return !(operator<(b,a));
+}
+
+template <typename UInt, typename UIntX2>
+bool operator>=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
+{
+    return !(operator<(a,b));
+}
+
+
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator<<(const uint_x4<UInt,UIntX2>& v,
+                                const bitcount_t shift)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    const bitcount_t bits    = uint_x4<UInt,UIntX2>::UINT_BITS;
+    const bitcount_t bitmask = bits - 1;
+    const bitcount_t shiftdiv = shift / bits;
+    const bitcount_t shiftmod = shift & bitmask;
+
+    if (shiftmod) {
+        UInt carryover = 0;
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#else
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#endif
+            r.wa[out] = (v.wa[in] << shiftmod) | carryover;
+            carryover = (v.wa[in] >> (bits - shiftmod));
+        }
+    } else {
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#else
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#endif
+            r.wa[out] = v.wa[in];
+        }
+    }
+
+    return r;
+}
+
+template <typename UInt, typename UIntX2>
+uint_x4<UInt,UIntX2> operator>>(const uint_x4<UInt,UIntX2>& v,
+                                const bitcount_t shift)
+{
+    uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
+    const bitcount_t bits    = uint_x4<UInt,UIntX2>::UINT_BITS;
+    const bitcount_t bitmask = bits - 1;
+    const bitcount_t shiftdiv = shift / bits;
+    const bitcount_t shiftmod = shift & bitmask;
+
+    if (shiftmod) {
+        UInt carryover = 0;
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#else
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#endif
+            r.wa[out] = (v.wa[in] >> shiftmod) | carryover;
+            carryover = (v.wa[in] << (bits - shiftmod));
+        }
+    } else {
+#if PCG_LITTLE_ENDIAN
+        for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
+            --out, --in;
+#else
+        for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
+#endif
+            r.wa[out] = v.wa[in];
+        }
+    }
+
+    return r;
+}
+
+#if PCG_64BIT_SPECIALIZATIONS
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator<<(const uint_x4<UInt32,uint64_t>& v,
+				    const bitcount_t shift)
+{
+    constexpr bitcount_t bits2   = uint_x4<UInt32,uint64_t>::UINT_BITS * 2;
+    
+    if (shift >= bits2) {
+        return {v.d.v01 << (shift-bits2), uint64_t(0u)};
+    } else {
+        return {shift ? (v.d.v23 << shift) | (v.d.v01 >> (bits2-shift)) 
+                      : v.d.v23,
+                v.d.v01 << shift};
+    }
+}
+
+template <typename UInt32>
+uint_x4<UInt32,uint64_t> operator>>(const uint_x4<UInt32,uint64_t>& v,
+				    const bitcount_t shift)
+{
+    constexpr bitcount_t bits2   = uint_x4<UInt32,uint64_t>::UINT_BITS * 2;
+    
+    if (shift >= bits2) {
+        return {uint64_t(0u), v.d.v23 >> (shift-bits2)};
+    } else {
+        return {v.d.v23 >> shift,
+                shift ? (v.d.v01 >> shift) | (v.d.v23 << (bits2-shift))
+                      : v.d.v01};
+    }
+}
+#endif
+
+} // namespace pcg_extras
+} // namespace arrow_vendored
+
+#endif // PCG_UINT128_HPP_INCLUDED

From ccf5b9ff6c283ce30d98fad2a8eba95d38bf0acb Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 22 Apr 2021 18:28:29 +0200
Subject: [PATCH 111/719] PARQUET-1998: [C++] Implement LZ4_RAW compression

Closes #9782 from pitrou/PARQUET-1998-lz4-raw

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/generated/parquet_types.cpp    |  8 +--
 cpp/src/generated/parquet_types.h      |  3 +-
 cpp/src/parquet/file_serialize_test.cc |  3 +-
 cpp/src/parquet/parquet.thrift         | 59 ++++++++++++-----------
 cpp/src/parquet/reader_test.cc         | 67 +++++++++++++++++++-------
 cpp/src/parquet/thrift_internal.h      |  8 ++-
 cpp/src/parquet/types.cc               |  5 --
 cpp/submodules/parquet-testing         |  2 +-
 8 files changed, 96 insertions(+), 59 deletions(-)

diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp
index 9233d79c92c..cccd92e2ec4 100644
--- a/cpp/src/generated/parquet_types.cpp
+++ b/cpp/src/generated/parquet_types.cpp
@@ -204,7 +204,8 @@ int _kCompressionCodecValues[] = {
   CompressionCodec::LZO,
   CompressionCodec::BROTLI,
   CompressionCodec::LZ4,
-  CompressionCodec::ZSTD
+  CompressionCodec::ZSTD,
+  CompressionCodec::LZ4_RAW
 };
 const char* _kCompressionCodecNames[] = {
   "UNCOMPRESSED",
@@ -213,9 +214,10 @@ const char* _kCompressionCodecNames[] = {
   "LZO",
   "BROTLI",
   "LZ4",
-  "ZSTD"
+  "ZSTD",
+  "LZ4_RAW"
 };
-const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(7, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+const std::map<int, const char*> _CompressionCodec_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kCompressionCodecValues, _kCompressionCodecNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
 
 std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) {
   std::map<int, const char*>::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val);
diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h
index 2a4e26b0ea2..3d7edd40983 100644
--- a/cpp/src/generated/parquet_types.h
+++ b/cpp/src/generated/parquet_types.h
@@ -116,7 +116,8 @@ struct CompressionCodec {
     LZO = 3,
     BROTLI = 4,
     LZ4 = 5,
-    ZSTD = 6
+    ZSTD = 6,
+    LZ4_RAW = 7
   };
 };
 
diff --git a/cpp/src/parquet/file_serialize_test.cc b/cpp/src/parquet/file_serialize_test.cc
index 3574d379561..eb1133d8a9e 100644
--- a/cpp/src/parquet/file_serialize_test.cc
+++ b/cpp/src/parquet/file_serialize_test.cc
@@ -330,8 +330,7 @@ TYPED_TEST(TestSerialize, SmallFileGzip) {
 
 #ifdef ARROW_WITH_LZ4
 TYPED_TEST(TestSerialize, SmallFileLz4) {
-  ASSERT_NO_FATAL_FAILURE(
-      this->FileSerializeTest(Compression::LZ4, Compression::LZ4_HADOOP));
+  ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::LZ4));
 }
 
 TYPED_TEST(TestSerialize, SmallFileLz4Hadoop) {
diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift
index de875f7a559..8aa98481675 100644
--- a/cpp/src/parquet/parquet.thrift
+++ b/cpp/src/parquet/parquet.thrift
@@ -17,11 +17,11 @@
  * under the License.
  */
 
-cpp_include "parquet/windows_compatibility.h"
-
 /**
  * File format description for the parquet file format
  */
+
+cpp_include "parquet/windows_compatibility.h"
 namespace cpp parquet.format
 namespace java org.apache.parquet.format
 
@@ -473,19 +473,21 @@ enum Encoding {
 /**
  * Supported compression algorithms.
  *
- * Codecs added in 2.4 can be read by readers based on 2.4 and later.
+ * Codecs added in format version X.Y can be read by readers based on X.Y and later.
  * Codec support may vary between readers based on the format version and
- * libraries available at runtime. Gzip, Snappy, and LZ4 codecs are
- * widely available, while Zstd and Brotli require additional libraries.
+ * libraries available at runtime.
+ *
+ * See Compression.md for a detailed specification of these algorithms.
  */
 enum CompressionCodec {
   UNCOMPRESSED = 0;
   SNAPPY = 1;
   GZIP = 2;
   LZO = 3;
-  BROTLI = 4; // Added in 2.4
-  LZ4 = 5;    // Added in 2.4
-  ZSTD = 6;   // Added in 2.4
+  BROTLI = 4;  // Added in 2.4
+  LZ4 = 5;     // DEPRECATED (Added in 2.4)
+  ZSTD = 6;    // Added in 2.4
+  LZ4_RAW = 7; // Added in 2.9
 }
 
 enum PageType {
@@ -568,7 +570,7 @@ struct DataPageHeaderV2 {
   If missing it is considered compressed */
   7: optional bool is_compressed = 1;
 
-  /** optional statistics for this column chunk */
+  /** optional statistics for the data in this page **/
   8: optional Statistics statistics;
 }
 
@@ -581,11 +583,11 @@ union BloomFilterAlgorithm {
 }
 
 /** Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
- * algorithm. It uses 64 bits version of xxHash.
+ * algorithm. It uses 64 bits version of xxHash. 
  **/
 struct XxHash {}
 
-/**
+/** 
  * The hash function used in Bloom filter. This function takes the hash of a column value
  * using plain encoding.
  **/
@@ -648,6 +650,8 @@ struct PageHeader {
    *     uncompressed definition levels and the compressed column values.
    *     If no compression scheme is specified, the CRC shall be calculated on
    *     the uncompressed concatenation.
+   * - In encrypted columns, CRC is calculated after page encryption; the
+   *   encryption itself is performed after page compression (if compressed)
    * If enabled, this allows for disabling checksumming in HDFS if only a few
    * pages need to be read.
    **/
@@ -722,7 +726,7 @@ struct ColumnMetaData {
   /** total byte size of all uncompressed pages in this column chunk (including the headers) **/
   6: required i64 total_uncompressed_size
 
-  /** total byte size of all compressed, and potentially encrypted, pages
+  /** total byte size of all compressed, and potentially encrypted, pages 
    *  in this column chunk (including the headers) **/
   7: required i64 total_compressed_size
 
@@ -756,7 +760,7 @@ struct EncryptionWithFooterKey {
 struct EncryptionWithColumnKey {
   /** Column path in schema **/
   1: required list<string> path_in_schema
-
+  
   /** Retrieval metadata of column encryption key **/
   2: optional binary key_metadata
 }
@@ -795,7 +799,7 @@ struct ColumnChunk {
 
   /** Crypto metadata of encrypted columns **/
   8: optional ColumnCryptoMetaData crypto_metadata
-
+  
   /** Encrypted column metadata for this chunk **/
   9: optional binary encrypted_column_metadata
 }
@@ -821,10 +825,10 @@ struct RowGroup {
    * in this row group **/
   5: optional i64 file_offset
 
-  /** Total byte size of all compressed (and potentially encrypted) column data
+  /** Total byte size of all compressed (and potentially encrypted) column data 
    *  in this row group **/
   6: optional i64 total_compressed_size
-
+  
   /** Row group ordinal in the file **/
   7: optional i16 ordinal
 }
@@ -944,7 +948,7 @@ struct ColumnIndex {
   3: required list<binary> max_values
 
   /**
-   * Stores whether both min_values and max_values are ordered and if so, in
+   * Stores whether both min_values and max_values are orderd and if so, in
    * which direction. This allows readers to perform binary searches in both
    * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
    * if the lists are ordered.
@@ -961,7 +965,7 @@ struct AesGcmV1 {
 
   /** Unique file identifier part of AAD suffix **/
   2: optional binary aad_file_unique
-
+  
   /** In files encrypted with AAD prefix without storing it,
    * readers must supply the prefix **/
   3: optional bool supply_aad_prefix
@@ -973,7 +977,7 @@ struct AesGcmCtrV1 {
 
   /** Unique file identifier part of AAD suffix **/
   2: optional binary aad_file_unique
-
+  
   /** In files encrypted with AAD prefix without storing it,
    * readers must supply the prefix **/
   3: optional bool supply_aad_prefix
@@ -1029,30 +1033,31 @@ struct FileMetaData {
    */
   7: optional list<ColumnOrder> column_orders;
 
-  /**
+  /** 
    * Encryption algorithm. This field is set only in encrypted files
    * with plaintext footer. Files with encrypted footer store algorithm id
    * in FileCryptoMetaData structure.
    */
   8: optional EncryptionAlgorithm encryption_algorithm
 
-  /**
-   * Retrieval metadata of key used for signing the footer.
-   * Used only in encrypted files with plaintext footer.
-   */
+  /** 
+   * Retrieval metadata of key used for signing the footer. 
+   * Used only in encrypted files with plaintext footer. 
+   */ 
   9: optional binary footer_signing_key_metadata
 }
 
 /** Crypto metadata for files with encrypted footer **/
 struct FileCryptoMetaData {
-  /**
+  /** 
    * Encryption algorithm. This field is only used for files
    * with encrypted footer. Files with plaintext footer store algorithm id
    * inside footer (FileMetaData structure).
    */
   1: required EncryptionAlgorithm encryption_algorithm
-
-  /** Retrieval metadata of key used for encryption of footer,
+    
+  /** Retrieval metadata of key used for encryption of footer, 
    *  and (possibly) columns **/
   2: optional binary key_metadata
 }
+
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 7c2f9d7aa58..321531bb8f1 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -60,17 +60,28 @@ std::string nation_dict_truncated_data_page() {
   return data_file("nation.dict-malformed.parquet");
 }
 
-// Compressed using custom Hadoop LZ4 format (block LZ4 format + custom header)
+// LZ4-compressed data files.
+// These files come in three flavours:
+// - legacy "LZ4" compression type, actually compressed with block LZ4 codec
+//   (as emitted by some earlier versions of parquet-cpp)
+// - legacy "LZ4" compression type, actually compressed with custom Hadoop LZ4 codec
+//   (as emitted by parquet-mr)
+// - "LZ4_RAW" compression type (added in Parquet format version 2.9.0)
+
 std::string hadoop_lz4_compressed() { return data_file("hadoop_lz4_compressed.parquet"); }
 
-// Compressed using block LZ4 format
+std::string hadoop_lz4_compressed_larger() {
+  return data_file("hadoop_lz4_compressed_larger.parquet");
+}
+
 std::string non_hadoop_lz4_compressed() {
   return data_file("non_hadoop_lz4_compressed.parquet");
 }
 
-// Larger data compressed using custom Hadoop LZ4 format (several frames)
-std::string hadoop_lz4_compressed_larger() {
-  return data_file("hadoop_lz4_compressed_larger.parquet");
+std::string lz4_raw_compressed() { return data_file("lz4_raw_compressed.parquet"); }
+
+std::string lz4_raw_compressed_larger() {
+  return data_file("lz4_raw_compressed_larger.parquet");
 }
 
 // TODO: Assert on definition and repetition levels
@@ -548,13 +559,25 @@ TEST(TestFileReader, BufferedReads) {
   }
 }
 
-class TestCodec : public ::testing::TestWithParam<std::string> {
+#ifdef ARROW_WITH_LZ4
+struct TestCodecParam {
+  std::string name;
+  std::string small_data_file;
+  std::string larger_data_file;
+};
+
+void PrintTo(const TestCodecParam& p, std::ostream* os) { *os << p.name; }
+
+class TestCodec : public ::testing::TestWithParam<TestCodecParam> {
  protected:
-  const std::string& GetDataFile() { return GetParam(); }
+  const std::string& GetSmallDataFile() { return GetParam().small_data_file; }
+
+  const std::string& GetLargerDataFile() { return GetParam().larger_data_file; }
 };
 
-TEST_P(TestCodec, FileMetadataAndValues) {
-  std::unique_ptr<ParquetFileReader> reader_ = ParquetFileReader::OpenFile(GetDataFile());
+TEST_P(TestCodec, SmallFileMetadataAndValues) {
+  std::unique_ptr<ParquetFileReader> reader_ =
+      ParquetFileReader::OpenFile(GetSmallDataFile());
   std::shared_ptr<RowGroupReader> group = reader_->RowGroup(0);
   const auto rg_metadata = group->metadata();
 
@@ -593,14 +616,14 @@ TEST_P(TestCodec, FileMetadataAndValues) {
   AssertColumnValues(col2, 4, 4, expected_double_values, 4);
 }
 
-#ifdef ARROW_WITH_LZ4
-INSTANTIATE_TEST_SUITE_P(Lz4CodecTests, TestCodec,
-                         ::testing::Values(hadoop_lz4_compressed(),
-                                           non_hadoop_lz4_compressed()));
-
-TEST(TestLz4HadoopCodec, TestSeveralFrames) {
-  // ARROW-9177: Hadoop can compress a data block in several LZ4 "frames"
-  auto file = ParquetFileReader::OpenFile(hadoop_lz4_compressed_larger());
+TEST_P(TestCodec, LargeFileValues) {
+  // Test codec with a larger data file such data may have been compressed
+  // in several "frames" (ARROW-9177)
+  auto file_path = GetParam().larger_data_file;
+  if (file_path.empty()) {
+    GTEST_SKIP() << "Larger data file not available for this codec";
+  }
+  auto file = ParquetFileReader::OpenFile(file_path);
   auto group = file->RowGroup(0);
 
   const int64_t kNumRows = 10000;
@@ -624,6 +647,14 @@ TEST(TestLz4HadoopCodec, TestSeveralFrames) {
   ASSERT_EQ(values[kNumRows - 2], ByteArray("ab52a0cc-c6bb-4d61-8a8f-166dc4b8b13c"));
   ASSERT_EQ(values[kNumRows - 1], ByteArray("85440778-460a-41ac-aa2e-ac3ee41696bf"));
 }
-#endif
+
+std::vector<TestCodecParam> test_codec_params{
+    {"LegacyLZ4Hadoop", hadoop_lz4_compressed(), hadoop_lz4_compressed_larger()},
+    {"LegacyLZ4NonHadoop", non_hadoop_lz4_compressed(), ""},
+    {"LZ4Raw", lz4_raw_compressed(), lz4_raw_compressed_larger()}};
+
+INSTANTIATE_TEST_SUITE_P(Lz4CodecTests, TestCodec, ::testing::ValuesIn(test_codec_params),
+                         testing::PrintToStringParamName());
+#endif  // ARROW_WITH_LZ4
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h
index c9e02696f5d..ea7df209621 100644
--- a/cpp/src/parquet/thrift_internal.h
+++ b/cpp/src/parquet/thrift_internal.h
@@ -102,6 +102,8 @@ static inline Compression::type FromThriftUnsafe(format::CompressionCodec::type
       return Compression::BROTLI;
     case format::CompressionCodec::LZ4:
       return Compression::LZ4_HADOOP;
+    case format::CompressionCodec::LZ4_RAW:
+      return Compression::LZ4;
     case format::CompressionCodec::ZSTD:
       return Compression::ZSTD;
     default:
@@ -212,7 +214,8 @@ inline typename Compression::type LoadEnumSafe(const format::CompressionCodec::t
   // as format::CompressionCodec.
   const auto min_value =
       static_cast<decltype(raw_value)>(format::CompressionCodec::UNCOMPRESSED);
-  const auto max_value = static_cast<decltype(raw_value)>(format::CompressionCodec::ZSTD);
+  const auto max_value =
+      static_cast<decltype(raw_value)>(format::CompressionCodec::LZ4_RAW);
   if (raw_value < min_value || raw_value > max_value) {
     return Compression::UNCOMPRESSED;
   }
@@ -282,9 +285,10 @@ static inline format::CompressionCodec::type ToThrift(Compression::type type) {
       return format::CompressionCodec::LZO;
     case Compression::BROTLI:
       return format::CompressionCodec::BROTLI;
-    // For compatibility with existing source code
     case Compression::LZ4:
+      return format::CompressionCodec::LZ4_RAW;
     case Compression::LZ4_HADOOP:
+      // Deprecated "LZ4" Parquet compression has Hadoop-specific framing
       return format::CompressionCodec::LZ4;
     case Compression::ZSTD:
       return format::CompressionCodec::ZSTD;
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 4e5bcee4ce8..ef23c40662b 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -69,11 +69,6 @@ std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level)
     throw ParquetException(ss.str());
   }
 
-  if (codec == Compression::LZ4) {
-    // For compatibility with existing source code
-    codec = Compression::LZ4_HADOOP;
-  }
-
   PARQUET_ASSIGN_OR_THROW(result, Codec::Create(codec, compression_level));
   return result;
 }
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 8e7badc6a38..ddd89895880 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 8e7badc6a3817a02e06d17b5d8ab6b6dc356e890
+Subproject commit ddd898958803cb89b7156c6350584d1cda0fe8de

From 6b07e63eed32b15b61789e8a93628069bc5d7862 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 22 Apr 2021 13:45:07 -0400
Subject: [PATCH 112/719] ARROW-12496: [C++][Dataset] Ensure AsyncScanner is
 covered by all scanner tests

Some tests directly constructed a Scanner from a Fragment, which isn't supported by the async scanner. This introduces a simple dataset wrapper so all these tests will actually test both sync/async scanners.

Closes #10124 from lidavidm/arrow-12496

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/scanner_test.cc | 136 +++++++++++++++++---------
 cpp/src/arrow/dataset/test_util.h     |  17 ++++
 cpp/src/arrow/testing/gtest_util.h    |  17 ++--
 3 files changed, 112 insertions(+), 58 deletions(-)

diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 27fcef1f04c..d9cb348808b 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -71,6 +71,14 @@ std::ostream& operator<<(std::ostream& out, const TestScannerParams& params) {
 
 class TestScanner : public DatasetFixtureMixinWithParam<TestScannerParams> {
  protected:
+  std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<Dataset> dataset) {
+    ScannerBuilder builder(std::move(dataset), options_);
+    ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
+    ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    return scanner;
+  }
+
   std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<RecordBatch> batch) {
     std::vector<std::shared_ptr<RecordBatch>> batches{
         static_cast<size_t>(GetParam().num_batches), batch};
@@ -79,12 +87,7 @@ class TestScanner : public DatasetFixtureMixinWithParam<TestScannerParams> {
                            std::make_shared<InMemoryDataset>(batch->schema(), batches)};
 
     EXPECT_OK_AND_ASSIGN(auto dataset, UnionDataset::Make(batch->schema(), children));
-
-    ScannerBuilder builder(dataset, options_);
-    ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
-    ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
-    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-    return scanner;
+    return MakeScanner(std::move(dataset));
   }
 
   void AssertScannerEqualsRepetitionsOf(
@@ -203,9 +206,9 @@ TEST_P(TestScanner, MaterializeMissingColumn) {
   auto batch_with_f64 =
       RecordBatch::Make(schema_, f64->length(), {batch_missing_f64->column(0), f64});
 
-  ScannerBuilder builder{schema_, fragment_missing_f64, options_};
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-
+  FragmentVector fragments{fragment_missing_f64};
+  auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+  auto scanner = MakeScanner(std::move(dataset));
   AssertScanBatchesEqualRepetitionsOf(scanner, batch_with_f64);
 }
 
@@ -319,6 +322,23 @@ class FailingFragment : public InMemoryFragment {
       return std::make_shared<InMemoryScanTask>(batches, options, self);
     });
   }
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    struct {
+      Future<std::shared_ptr<RecordBatch>> operator()() {
+        if (index > 16) {
+          return Status::Invalid("Oh no, we failed!");
+        }
+        auto batch = batches[index++ % batches.size()];
+        return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
+      }
+      RecordBatchVector batches;
+      int index = 0;
+    } Generator;
+    Generator.batches = record_batches_;
+    return Generator;
+  }
 };
 
 class FailingExecuteScanTask : public InMemoryScanTask {
@@ -356,62 +376,82 @@ class FailingScanTaskFragment : public InMemoryFragment {
     ScanTaskVector scan_tasks{std::make_shared<T>(record_batches_, options, self)};
     return MakeVectorIterator(std::move(scan_tasks));
   }
+
+  // Unlike the sync case, there's only two places to fail - during
+  // iteration (covered by FailingFragment) or at the initial scan
+  // (covered here)
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    return Status::Invalid("Oh no, we failed!");
+  }
 };
 
+template <typename It, typename GetBatch>
+bool CheckIteratorRaises(const RecordBatch& batch, It batch_it, GetBatch get_batch) {
+  while (true) {
+    auto maybe_batch = batch_it.Next();
+    if (maybe_batch.ok()) {
+      EXPECT_OK_AND_ASSIGN(auto scanned_batch, maybe_batch);
+      if (IsIterationEnd(scanned_batch)) break;
+      AssertBatchesEqual(batch, *get_batch(scanned_batch));
+    } else {
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                      maybe_batch);
+      return true;
+    }
+  }
+  return false;
+}
+
 TEST_P(TestScanner, ScanBatchesFailure) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
   RecordBatchVector batches = {batch, batch, batch, batch};
-  // Note these tests are only for SyncScanner at the moment
+
+  auto check_scanner = [](const RecordBatch& batch, Scanner* scanner) {
+    auto maybe_batch_it = scanner->ScanBatchesUnordered();
+    if (!maybe_batch_it.ok()) {
+      // SyncScanner can fail here as it eagerly consumes the first value
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
+                                      std::move(maybe_batch_it));
+    } else {
+      ASSERT_OK_AND_ASSIGN(auto batch_it, std::move(maybe_batch_it));
+      EXPECT_TRUE(CheckIteratorRaises(
+          batch, std::move(batch_it),
+          [](const EnumeratedRecordBatch& batch) { return batch.record_batch.value; }))
+          << "ScanBatchesUnordered() did not raise an error";
+    }
+    ASSERT_OK_AND_ASSIGN(auto tagged_batch_it, scanner->ScanBatches());
+    EXPECT_TRUE(CheckIteratorRaises(
+        batch, std::move(tagged_batch_it),
+        [](const TaggedRecordBatch& batch) { return batch.record_batch; }))
+        << "ScanBatches() did not raise an error";
+  };
 
   // Case 1: failure when getting next scan task
   {
-    ScannerBuilder builder(schema_, std::make_shared<FailingFragment>(batches), options_);
-    ASSERT_OK(builder.UseThreads(GetParam().use_threads));
-    ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-    ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
-
-    int counter = 0;
-    while (true) {
-      // Make sure we get all batches that were yielded before the failing scan task
-      auto maybe_batch = batch_it.Next();
-      if (counter++ <= 16) {
-        ASSERT_OK_AND_ASSIGN(auto scanned_batch, maybe_batch);
-        AssertBatchesEqual(*batch, *scanned_batch.record_batch);
-        ASSERT_NE(nullptr, scanned_batch.fragment);
-      } else {
-        EXPECT_RAISES_WITH_MESSAGE_THAT(
-            Invalid, ::testing::HasSubstr("Oh no, we failed!"), maybe_batch);
-        break;
-      }
-    }
+    FragmentVector fragments{std::make_shared<FailingFragment>(batches)};
+    auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+    auto scanner = MakeScanner(std::move(dataset));
+    check_scanner(*batch, scanner.get());
   }
 
   // Case 2: failure when calling ScanTask::Execute
   {
-    ScannerBuilder builder(
-        schema_,
-        std::make_shared<FailingScanTaskFragment<FailingExecuteScanTask>>(batches),
-        options_);
-    ASSERT_OK(builder.UseThreads(GetParam().use_threads));
-    ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-    ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
-                                    batch_it.Next());
+    FragmentVector fragments{
+        std::make_shared<FailingScanTaskFragment<FailingExecuteScanTask>>(batches)};
+    auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+    auto scanner = MakeScanner(std::move(dataset));
+    check_scanner(*batch, scanner.get());
   }
 
   // Case 3: failure when calling RecordBatchIterator::Next
   {
-    ScannerBuilder builder(
-        schema_,
-        std::make_shared<FailingScanTaskFragment<FailingIterationScanTask>>(batches),
-        options_);
-    ASSERT_OK(builder.UseThreads(GetParam().use_threads));
-    ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-    ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
-    ASSERT_OK(batch_it.Next());
-    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Oh no, we failed!"),
-                                    batch_it.Next());
+    FragmentVector fragments{
+        std::make_shared<FailingScanTaskFragment<FailingIterationScanTask>>(batches)};
+    auto dataset = std::make_shared<FragmentDataset>(schema_, fragments);
+    auto scanner = MakeScanner(std::move(dataset));
+    check_scanner(*batch, scanner.get());
   }
 }
 
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index b94441e178a..009e1d755e9 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -119,6 +119,23 @@ void EnsureRecordBatchReaderDrained(RecordBatchReader* reader) {
   EXPECT_EQ(batch, nullptr);
 }
 
+/// Test dataset that returns one or more fragments.
+class FragmentDataset : public Dataset {
+ public:
+  FragmentDataset(std::shared_ptr<Schema> schema, FragmentVector fragments)
+      : Dataset(std::move(schema)), fragments_(std::move(fragments)) {}
+  std::string type_name() const override { return "fragment"; }
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(std::shared_ptr<Schema>) const override {
+    return Status::NotImplemented("");
+  }
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override {
+    return MakeVectorIterator(fragments_);
+  }
+  FragmentVector fragments_;
+};
+
 class DatasetFixtureMixin : public ::testing::Test {
  public:
   /// \brief Ensure that record batches found in reader are equals to the
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 0f25ac07767..2ccf171d016 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -67,16 +67,13 @@
     ASSERT_EQ((message), _st.ToString());                                             \
   } while (false)
 
-#define EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr)                          \
-  do {                                                                                \
-    auto _res = (expr);                                                               \
-    ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res);                   \
-    if (!_st.Is##ENUM()) {                                                            \
-      FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
-                    ENUM) ", but got "                                                \
-             << _st.ToString();                                                       \
-    }                                                                                 \
-    EXPECT_THAT(_st.ToString(), (matcher));                                           \
+#define EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr)                             \
+  do {                                                                                   \
+    auto _res = (expr);                                                                  \
+    ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res);                      \
+    EXPECT_TRUE(_st.Is##ENUM()) << "Expected '" ARROW_STRINGIFY(expr) "' to fail with "  \
+                                << ARROW_STRINGIFY(ENUM) ", but got " << _st.ToString(); \
+    EXPECT_THAT(_st.ToString(), (matcher));                                              \
   } while (false)
 
 #define EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(code, matcher, expr) \

From ed4f79c77e5d34d5996fa287df0934dd2f93a12a Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 22 Apr 2021 11:15:56 -0700
Subject: [PATCH 113/719] ARROW-12508: [R] expect_as_vector implementation
 causes test failure on R <= 3.3 & variables defined outside of test_that
 break build when no arrow install

Closes #10133 from thisisnic/ARROW-12508-bug_with_as_vector

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/tests/testthat/helper-expectation.R |  6 +++++-
 r/tests/testthat/test-na-omit.R       | 13 +++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 595b183e555..5b6958a9a7a 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -16,7 +16,11 @@
 # under the License.
 
 expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) {
-  expect_fun <- ifelse(ignore_attr, expect_equivalent, expect_equal)
+  expect_fun <- if(ignore_attr){
+    expect_equivalent
+  } else {
+    expect_equal
+  }
   expect_fun(as.vector(x), y, ...)
 }
 
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
index fd1372fdc5d..834ccb013ec 100644
--- a/r/tests/testthat/test-na-omit.R
+++ b/r/tests/testthat/test-na-omit.R
@@ -17,12 +17,10 @@
 
 data_no_na <- c(2:10)
 data_na <- c(data_no_na, NA_real_)
-scalar_na <- Scalar$create(NA)
-scalar_one <- Scalar$create(1)
-tbl <- Table$create(example_data)
-batch <- record_batch(example_data)
 
 test_that("na.fail on Scalar", {
+  scalar_na <- Scalar$create(NA)
+  scalar_one <- Scalar$create(1)
   expect_as_vector(na.fail(scalar_one), 1)
   expect_error(na.fail(scalar_na), "missing values in object")
 })
@@ -43,31 +41,38 @@ test_that("na.fail on Array and ChunkedArray", {
 })
 
 test_that("na.fail on Scalar", {
+  scalar_one <- Scalar$create(1)
   expect_error(na.fail(scalar_na), regexp = "missing values in object")
   expect_as_vector(na.fail(scalar_one), na.fail(1))
 })
 
 test_that("na.omit on Table", {
+  tbl <- Table$create(example_data)
   expect_equivalent(as.data.frame(na.omit(tbl)), na.omit(example_data))
 })
 
 test_that("na.exclude on Table", {
+  tbl <- Table$create(example_data)
   expect_equivalent(as.data.frame(na.exclude(tbl)), na.exclude(example_data))
 })
 
 test_that("na.fail on Table", {
+  tbl <- Table$create(example_data)
   expect_error(na.fail(tbl), "missing values in object")
 })
 
 test_that("na.omit on RecordBatch", {
+  batch <- record_batch(example_data)
   expect_equivalent(as.data.frame(na.omit(batch)), na.omit(example_data))
 })
 
 test_that("na.exclude on RecordBatch", {
+  batch <- record_batch(example_data)
   expect_equivalent(as.data.frame(na.exclude(batch)), na.omit(example_data))
 })
 
 test_that("na.fail on RecordBatch", {
+  batch <- record_batch(example_data)
   expect_error(na.fail(batch), "missing values in object")
 })
 

From 69870b98d1ebaa30c81e0ca4cdffc70a4d3b98ab Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Fri, 23 Apr 2021 06:37:11 +0900
Subject: [PATCH 114/719] ARROW-12442: [CI] Set job timeouts on GitHub Actions

The default job timeout on Github Actions is 6 hours (360 minutes).

All our jobs normally take much less than that (typically less than 30 minutes for most jobs), but external conditions such as network timeouts may make some jobs take the whole 6 hours before being killed. This in turn prevents other jobs from starting and makes the build queue grow excessively. Also, since build resources on Github Actions are shared between all Apache projects, this impacts other Apache projects as well.

Set a reasonable timeout on most jobs to minimize the repercussion of network issues and such.

Closes #10129 from pitrou/ARROW-12442-gha-timeouts

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/archery.yml     |  1 +
 .github/workflows/cancel.yml      |  1 +
 .github/workflows/cpp.yml         |  5 +++++
 .github/workflows/cpp_cron.yml    |  2 ++
 .github/workflows/csharp.yml      |  3 +++
 .github/workflows/go.yml          |  3 +++
 .github/workflows/integration.yml |  1 +
 .github/workflows/java.yml        |  2 ++
 .github/workflows/java_jni.yml    |  1 +
 .github/workflows/js.yml          |  2 ++
 .github/workflows/julia.yml       |  1 +
 .github/workflows/python.yml      |  2 ++
 .github/workflows/python_cron.yml |  1 +
 .github/workflows/r.yml           |  3 +++
 .github/workflows/ruby.yml        |  3 +++
 .github/workflows/rust.yml        | 10 ++++++++++
 16 files changed, 41 insertions(+)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 9f6880efb9f..e5244ece92b 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -37,6 +37,7 @@ jobs:
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     name: Archery Unittests and Crossbow Check Config
     runs-on: ubuntu-latest
+    timeout-minutes: 15
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
index de980eb6d05..3049ae706b0 100644
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@@ -27,6 +27,7 @@ jobs:
   cancel-stale-workflow-runs:
     name: "Cancel stale workflow runs"
     runs-on: ubuntu-latest
+    timeout-minutes: 15
     steps:
       # Unfortunately, we need to define a separate cancellation step for
       # each workflow where we want to cancel stale runs.
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 0bcf3460ad4..62c8e5f8af2 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -49,6 +49,7 @@ jobs:
     name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
       matrix:
@@ -169,6 +170,7 @@ jobs:
     name: C++ Minimal Build Example
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
     steps:
@@ -185,6 +187,7 @@ jobs:
     name: AMD64 MacOS 10.15 C++
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
     env:
@@ -236,6 +239,7 @@ jobs:
     name: AMD64 ${{ matrix.name }} C++
     runs-on: ${{ matrix.os }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
       matrix:
@@ -305,6 +309,7 @@ jobs:
     name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 45
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/cpp_cron.yml b/.github/workflows/cpp_cron.yml
index 9e4f3cf388b..cd3c6aebaab 100644
--- a/.github/workflows/cpp_cron.yml
+++ b/.github/workflows/cpp_cron.yml
@@ -40,6 +40,7 @@ jobs:
     name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -106,6 +107,7 @@ jobs:
     name: OSS-Fuzz build check
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 03a297bb914..217985a9d10 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -35,6 +35,7 @@ jobs:
     name: AMD64 Ubuntu 18.04 C# ${{ matrix.dotnet }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -65,6 +66,7 @@ jobs:
     name: AMD64 Windows 2019 18.04 C# ${{ matrix.dotnet }}
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -94,6 +96,7 @@ jobs:
     name: AMD64 MacOS 10.15 C# ${{ matrix.dotnet }}
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 574795f5e9b..f70821ffb7f 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -42,6 +42,7 @@ jobs:
     name: AMD64 Debian 10 Go ${{ matrix.go }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -74,6 +75,7 @@ jobs:
     name: AMD64 Windows 2019 Go ${{ matrix.go }}
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
@@ -101,6 +103,7 @@ jobs:
     name: AMD64 MacOS 10.15 Go ${{ matrix.go }}
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 20112553ea2..a4f97be3b9c 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -54,6 +54,7 @@ jobs:
     name: AMD64 Conda Integration Test
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 7f6f29f0f44..bc4ce64f5ae 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -46,6 +46,7 @@ jobs:
     name: AMD64 Debian 9 Java JDK ${{ matrix.jdk }} Maven ${{ matrix.maven }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
@@ -88,6 +89,7 @@ jobs:
     name: AMD64 MacOS 10.15 Java JDK ${{ matrix.jdk }}
     runs-on: macos-latest
     if: github.event_name == 'push'
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 5f25e8c053d..7c747f4e4c8 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -46,6 +46,7 @@ jobs:
     name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset)
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 354c45c60d3..4434cda79ca 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -41,6 +41,7 @@ jobs:
     name: AMD64 Debian 10 NodeJS 14
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
@@ -70,6 +71,7 @@ jobs:
     name: AMD64 MacOS 10.15 NodeJS ${{ matrix.node }}
     runs-on: macos-latest
     if: github.event_name == 'push'
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/julia.yml b/.github/workflows/julia.yml
index 64ea6c947a1..47de39ccfbd 100644
--- a/.github/workflows/julia.yml
+++ b/.github/workflows/julia.yml
@@ -32,6 +32,7 @@ jobs:
     env:
       JULIA_NUM_THREADS: 2
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 9062e93e665..4e3eaad98b5 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -42,6 +42,7 @@ jobs:
     name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -107,6 +108,7 @@ jobs:
     name: AMD64 MacOS 10.15 Python 3
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     env:
       ARROW_HOME: /usr/local
       ARROW_DATASET: ON
diff --git a/.github/workflows/python_cron.yml b/.github/workflows/python_cron.yml
index 7a4401af1c3..a60abddfd3f 100644
--- a/.github/workflows/python_cron.yml
+++ b/.github/workflows/python_cron.yml
@@ -39,6 +39,7 @@ jobs:
     name: ${{ matrix.title }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 7851b6b1915..4fde4fd39ad 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -49,6 +49,7 @@ jobs:
     name: AMD64 Ubuntu ${{ matrix.ubuntu }} R ${{ matrix.r }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -106,6 +107,7 @@ jobs:
     name: "${{ matrix.config.org }}/${{ matrix.config.image }}:${{ matrix.config.tag }}"
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
@@ -163,6 +165,7 @@ jobs:
     name: AMD64 Windows RTools ${{ matrix.rtools }}
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 2b99cddf8da..380df30cf37 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -54,6 +54,7 @@ jobs:
     name: AMD64 Ubuntu ${{ matrix.ubuntu }} GLib & Ruby
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
       matrix:
@@ -100,6 +101,7 @@ jobs:
     name: AMD64 MacOS 10.15 GLib & Ruby
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
     env:
@@ -171,6 +173,7 @@ jobs:
     name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} GLib & Ruby
     runs-on: windows-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 40
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 4bb17a2ecaf..9c0a4ea72f1 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -35,6 +35,7 @@ jobs:
   linux-build-lib:
     name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -86,6 +87,7 @@ jobs:
     name: Test Workspace on AMD64 Rust ${{ matrix.rust }}
     needs: [linux-build-lib]
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -152,6 +154,7 @@ jobs:
   linux-test-simd:
     name: Test SIMD on AMD64 Rust ${{ matrix.rust }}
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -194,6 +197,7 @@ jobs:
   windows-and-macos:
     name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 40
     strategy:
       matrix:
         os: [windows-latest, macos-latest]
@@ -223,6 +227,7 @@ jobs:
     name: Clippy
     needs: [linux-build-lib]
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -264,6 +269,7 @@ jobs:
   miri-checks:
     name: MIRI
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -299,6 +305,7 @@ jobs:
   coverage:
     name: Coverage
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -340,6 +347,7 @@ jobs:
   pyarrow-integration-test:
     name: Test Pyarrow C Data Interface
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         rust: [stable]
@@ -386,6 +394,7 @@ jobs:
   wasm32-build:
     name: Build wasm32 on AMD64 Rust ${{ matrix.rust }}
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]
@@ -430,6 +439,7 @@ jobs:
   default-build:
     name: Check No Defaults on AMD64 Rust ${{ matrix.rust }}
     runs-on: ubuntu-latest
+    timeout-minutes: 40
     strategy:
       matrix:
         arch: [amd64]

From 16a0739d2d1fcb8d4f814c46d941f65091b299d3 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 23 Apr 2021 09:22:20 +0900
Subject: [PATCH 115/719] ARROW-12514: [Release] Don't run Gandiva related Ruby
 test with ARROW_GANDIVA=OFF

Closes #10135 from kou/release-verify-ruby-gandiva

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/verify-release-candidate.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 0e9a4a4e579..fef231c8db3 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -433,10 +433,13 @@ test_js() {
 test_ruby() {
   pushd ruby
 
-  local modules="red-arrow red-plasma red-gandiva red-parquet"
+  local modules="red-arrow red-plasma red-parquet"
   if [ "${ARROW_CUDA}" = "ON" ]; then
     modules="${modules} red-arrow-cuda"
   fi
+  if [ "${ARROW_GANDIVA}" = "ON" ]; then
+    modules="${modules} red-gandiva"
+  fi
 
   for module in ${modules}; do
     pushd ${module}

From 802c4ec1e9149b55855aa826bfa0bba13634fd6a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 23 Apr 2021 09:01:01 -0700
Subject: [PATCH 116/719] ARROW-12503: [C++] Ensure using "lib/" for jemalloc's
 library directory

Some environments may change the default library directory to
"lib64/".

Closes #10139 from kou/cpp-jemalloc-ensure-libdir

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 83ea3aa9b96..01e818b5375 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1495,14 +1495,16 @@ if(ARROW_JEMALLOC)
   set(ARROW_JEMALLOC_USE_SHARED OFF)
   set(JEMALLOC_PREFIX
       "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/")
+  set(JEMALLOC_LIB_DIR "${JEMALLOC_PREFIX}/lib")
   set(JEMALLOC_STATIC_LIB
-      "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
+      "${JEMALLOC_LIB_DIR}/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}")
   set(JEMALLOC_CONFIGURE_COMMAND ./configure "AR=${CMAKE_AR}" "CC=${CMAKE_C_COMPILER}")
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_CONFIGURE_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")
   endif()
   list(APPEND JEMALLOC_CONFIGURE_COMMAND
               "--prefix=${JEMALLOC_PREFIX}"
+              "--libdir=${JEMALLOC_LIB_DIR}"
               "--with-jemalloc-prefix=je_arrow_"
               "--with-private-namespace=je_arrow_private_"
               "--without-export"

From 9c35c2827cf88d0c48eecb123adec810a2ac046a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 23 Apr 2021 13:59:25 -0400
Subject: [PATCH 117/719] ARROW-12518: [Python] Expose Parquet statistics
 has_null_count / has_distinct_count

Closes #10140 from jorisvandenbossche/ARROW-12518-parquet-statistics

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/_parquet.pxd | 2 ++
 python/pyarrow/_parquet.pyx | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 96bfd77552e..7a5eb50aab1 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -241,6 +241,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         int64_t distinct_count() const
         int64_t num_values() const
         bint HasMinMax()
+        bint HasNullCount()
+        bint HasDistinctCount()
         c_bool Equals(const CStatistics&) const
         void Reset()
         c_string EncodeMin()
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 67c1c5a4fc8..a7c428e8996 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -96,6 +96,14 @@ cdef class Statistics(_Weakrefable):
     def has_min_max(self):
         return self.statistics.get().HasMinMax()
 
+    @property
+    def has_null_count(self):
+        return self.statistics.get().HasNullCount()
+
+    @property
+    def has_distinct_count(self):
+        return self.statistics.get().HasDistinctCount()
+
     @property
     def min_raw(self):
         if self.has_min_max:

From f1ac15d38aa0df1fafd26ebf24638af789a1e1b5 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Sat, 24 Apr 2021 05:27:06 +0900
Subject: [PATCH 118/719] ARROW-12489: [Developer] autotune is broken

Closes #10144 from jonkeane/ARROW-12489

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/comment_bot.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 9e103003eee..7531876e71e 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -78,6 +78,16 @@ jobs:
           if changed '^r/src'; then
             echo "CLANG_FORMAT_R=true" >> $GITHUB_ENV
           fi
+      - name: Ensure clang-format has the appropriate versoin
+        if: env.CMAKE_FORMAT == 'true' || 
+          env.CLANG_FORMAT_CPP == 'true' || 
+          env.CLANG_FORMAT_R == 'true' || 
+          endsWith(github.event.comment.body, 'everything')
+        run: |
+          set -e
+          . .env # To get the clang version we use
+          sudo apt update
+          sudo apt install -y clang-format-${CLANG_TOOLS}
       - name: Run cmake_format
         if: env.CMAKE_FORMAT == 'true' || endsWith(github.event.comment.body, 'everything')
         run: |

From bf0b8fcd9c48c7de50a0c2882651fc9cc6b2761a Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sat, 24 Apr 2021 16:44:36 -0400
Subject: [PATCH 119/719] MINOR: [Docs] Update copyright year (#10146)

---
 docs/source/conf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 05a45531f4a..5e2de5207af 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -31,6 +31,7 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 
+import datetime
 import os
 import sys
 from unittest import mock
@@ -107,7 +108,7 @@
 
 # General information about the project.
 project = u'Apache Arrow'
-copyright = u'2016-2019 Apache Software Foundation'
+copyright = f'2016-{datetime.datetime.now().year} Apache Software Foundation'
 author = u'Apache Software Foundation'
 
 # The version info for the project you're documenting, acts as replacement for

From 24f6a6f0c7b94b80cf389cd87b661ef436b69fc9 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 26 Apr 2021 06:02:12 +0900
Subject: [PATCH 120/719] ARROW-12527: [Dev] Don't try getting JIRA information
 for MINOR PR

    $ python3 dev/merge_arrow_pr.py 10146
    Already on 'master'
    Your branch is up to date with 'origin/master'.
    Current branch master is up to date.
    ARROW_HOME = /home/kou/work/cpp/arrow/dev
    PROJECT_NAME = arrow
    Exception ignored in: <function Magic.__del__ at 0x7fa5c912f9d0>
    Traceback (most recent call last):
      File "/usr/lib/python3/dist-packages/magic/__init__.py", line 155, in __del__
        if self.cookie and magic_close:
    AttributeError: 'Magic' object has no attribute 'cookie'
    Restoring head pointer to f1ac15d3
    Note: switching to 'f1ac15d3'.

    You are in 'detached HEAD' state. You can look around, make experimental
    changes and commit them, and you can discard any commits you make in this
    state without impacting any branches by switching back to a branch.

    If you want to create a new branch to retain commits you create, you may
    do so (now or later) by using -c with the switch command. Example:

      git switch -c <new-branch-name>

    Or undo this operation with:

      git switch -

    Turn off this advice by setting config variable advice.detachedHead to false

    HEAD is now at f1ac15d38 ARROW-12489: [Developer] autotune is broken
    Traceback (most recent call last):
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 147, in __init__
        self.issue = jira_con.issue(jira_id)
      File "/usr/local/lib/python3.9/dist-packages/jira/client.py", line 1071, in issue
        issue.find(id, params=params)
      File "/usr/local/lib/python3.9/dist-packages/jira/resources.py", line 201, in find
        self._load(url, params=params)
      File "/usr/local/lib/python3.9/dist-packages/jira/resources.py", line 316, in _load
        r = self._session.get(url, headers=headers, params=params)
      File "/usr/local/lib/python3.9/dist-packages/jira/resilientsession.py", line 151, in get
        return self.__verb('GET', url, **kwargs)
      File "/usr/local/lib/python3.9/dist-packages/jira/resilientsession.py", line 147, in __verb
        raise_on_error(response, verb=verb, **kwargs)
      File "/usr/local/lib/python3.9/dist-packages/jira/resilientsession.py", line 56, in raise_on_error
        raise JIRAError(
    jira.exceptions.JIRAError: JiraError HTTP 404 url: https://issues.apache.org/jira/rest/api/2/issue/None
            text: Issue Does Not Exist

            response headers = {'Date': 'Sat, 24 Apr 2021 20:44:25 GMT', 'Server': 'Apache', 'X-AREQUESTID': '1244x117037738x6', 'X-ASESSIONID': 'dniwfg', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'X-Frame-Options': 'SAMEORIGIN', 'Content-Security-Policy': "frame-ancestors 'self'", 'X-ASEN': 'SEN-2062203', 'X-Seraph-LoginReason': 'OK', 'X-AUSERNAME': 'kou', 'Cache-Control': 'no-cache, no-store, no-transform', 'Content-Type': 'application/json;charset=UTF-8', 'Via': '1.1 jira2-he-de.apache.org', 'Keep-Alive': 'timeout=15, max=98', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked'}
            response text = {"errorMessages":["Issue Does Not Exist"],"errors":{}}

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 608, in <module>
        cli()
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 573, in cli
        pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num)
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 313, in __init__
        self.jira_issue = self._get_jira()
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 346, in _get_jira
        return JiraIssue(self.con, jira_id, project, self.cmd)
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 149, in __init__
        self.cmd.fail("ASF JIRA could not find %s\n%s" % (jira_id, e))
      File "/home/kou/work/cpp/arrow/dev/merge_arrow_pr.py", line 274, in fail
        raise Exception(msg)
    Exception: ASF JIRA could not find None
    JiraError HTTP 404 url: https://issues.apache.org/jira/rest/api/2/issue/None
            text: Issue Does Not Exist

            response headers = {'Date': 'Sat, 24 Apr 2021 20:44:25 GMT', 'Server': 'Apache', 'X-AREQUESTID': '1244x117037738x6', 'X-ASESSIONID': 'dniwfg', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'X-Frame-Options': 'SAMEORIGIN', 'Content-Security-Policy': "frame-ancestors 'self'", 'X-ASEN': 'SEN-2062203', 'X-Seraph-LoginReason': 'OK', 'X-AUSERNAME': 'kou', 'Cache-Control': 'no-cache, no-store, no-transform', 'Content-Type': 'application/json;charset=UTF-8', 'Via': '1.1 jira2-he-de.apache.org', 'Keep-Alive': 'timeout=15, max=98', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked'}
            response text = {"errorMessages":["Issue Does Not Exist"],"errors":{}}

Closes #10150 from kou/dev-merge-minor

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/merge_arrow_pr.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index 373ceb8e20f..b724a1971c0 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -330,6 +330,9 @@ def is_mergeable(self):
         return bool(self._pr_data["mergeable"])
 
     def _get_jira(self):
+        if self.title.startswith("MINOR:"):
+            return None
+
         jira_id = None
         for project, regex in PR_TITLE_REGEXEN:
             m = regex.search(self.title)
@@ -337,7 +340,7 @@ def _get_jira(self):
                 jira_id = m.group(1)
                 break
 
-        if jira_id is None and not self.title.startswith("MINOR:"):
+        if jira_id is None:
             options = ' or '.join('{0}-XXX'.format(project)
                                   for project in SUPPORTED_PROJECTS)
             self.cmd.fail("PR title should be prefixed by a jira id "

From ca83a14e264d00c6b4b29560cc2816c6718f52e7 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Mon, 26 Apr 2021 06:05:06 +0900
Subject: [PATCH 121/719] ARROW-7114: [JS][CI] Enable NodeJS tests for Windows

Closes #10153 from domoritz/windows

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/js.yml | 50 ++++++++++++++++++----------------------
 js/package.json          |  9 ++++----
 js/yarn.lock             |  9 +++++++-
 3 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 4434cda79ca..304ccb0f3aa 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -95,30 +95,26 @@ jobs:
         shell: bash
         run: ci/scripts/js_test.sh $(pwd)
 
-  # TODO(kszucs): the windows build fails with platform specific npm error
-  # windows:
-  #   name: AMD64 Windows 2019 NodeJS ${{ matrix.node }}
-  #   runs-on: windows-latest
-  #   if: github.event_name == 'push'
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       node: [14]
-  #   steps:
-  #     - name: Checkout Arrow
-  #       uses: actions/checkout@v1
-  #       with:
-  #         submodules: true
-  #     - name: Install NodeJS
-  #       uses: actions/setup-node@v1
-  #       with:
-  #         node-version: ${{ matrix.node }}
-  #     - name: Install Platform Dependencies
-  #       shell: bash
-  #       run: yarn add -g cross-env
-  #     - name: Build
-  #       shell: bash
-  #       run: ci/scripts/js_build.sh $(pwd)
-  #     - name: Test
-  #       shell: bash
-  #       run: ci/scripts/js_test.sh $(pwd)
+  windows:
+    name: AMD64 Windows 2019 NodeJS ${{ matrix.node }}
+    runs-on: windows-latest
+    if: github.event_name == 'push'
+    strategy:
+      fail-fast: false
+      matrix:
+        node: [14]
+    steps:
+      - name: Checkout Arrow
+        uses: actions/checkout@v1
+        with:
+          submodules: true
+      - name: Install NodeJS
+        uses: actions/setup-node@v1
+        with:
+          node-version: ${{ matrix.node }}
+      - name: Build
+        shell: bash
+        run: ci/scripts/js_build.sh $(pwd)
+      - name: Test
+        shell: bash
+        run: ci/scripts/js_test.sh $(pwd)
diff --git a/js/package.json b/js/package.json
index 880a5b9c11c..5873e33234d 100644
--- a/js/package.json
+++ b/js/package.json
@@ -7,10 +7,10 @@
   },
   "scripts": {
     "lerna": "lerna",
-    "test": "NODE_NO_WARNINGS=1 gulp test",
-    "build": "NODE_NO_WARNINGS=1 gulp build",
-    "clean": "NODE_NO_WARNINGS=1 gulp clean",
-    "debug": "NODE_NO_WARNINGS=1 gulp debug",
+    "test": "cross-env NODE_NO_WARNINGS=1 gulp test",
+    "build": "cross-env NODE_NO_WARNINGS=1 gulp build",
+    "clean": "cross-env NODE_NO_WARNINGS=1 gulp clean",
+    "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
     "perf": "node ./perf/index.js",
     "test:integration": "node ./bin/integration.js --mode validate",
     "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
@@ -72,6 +72,7 @@
     "async-done": "1.3.1",
     "benchmark": "2.1.4",
     "cpy": "^8.1.2",
+    "cross-env": "^7.0.3",
     "del-cli": "3.0.1",
     "eslint": "^7.24.0",
     "eslint-plugin-jest": "^24.3.5",
diff --git a/js/yarn.lock b/js/yarn.lock
index a2eb4484b22..98e5cf4c694 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -3489,6 +3489,13 @@ create-require@^1.1.0:
   resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
   integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==
 
+cross-env@^7.0.3:
+  version "7.0.3"
+  resolved "https://registry.yarnpkg.com/cross-env/-/cross-env-7.0.3.tgz#865264b29677dc015ba8418918965dd232fc54cf"
+  integrity sha512-+/HKd6EgcQCJGh2PSjZuUitQBQynKor4wrFbRg4DtAgS1aWO+gU52xpH7M9ScGgXSYmAVS9bIJ8EzuaGw0oNAw==
+  dependencies:
+    cross-spawn "^7.0.1"
+
 cross-spawn@^6.0.0, cross-spawn@^6.0.5:
   version "6.0.5"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
@@ -3500,7 +3507,7 @@ cross-spawn@^6.0.0, cross-spawn@^6.0.5:
     shebang-command "^1.2.0"
     which "^1.2.9"
 
-cross-spawn@^7.0.0, cross-spawn@^7.0.2:
+cross-spawn@^7.0.0, cross-spawn@^7.0.1, cross-spawn@^7.0.2:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
   integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==

From b254a30ac51747437913cdca8a3218a1825e9c23 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 26 Apr 2021 11:07:31 -0400
Subject: [PATCH 122/719] ARROW-12500: [C++][Datasets] Ensure better test
 coverage of Dataset file formats

This unifies (most of) the tests across Parquet, Feather, and CSV (with carve-outs for particular cases). In particular, this means all formats are now tested in conjunction with async/sync and serial/threaded scanners. Also, a set of common file format tests were refactored out of the individual tests and centralized.

Closes #10134 from lidavidm/arrow-12500

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/file_csv.cc          |  12 +-
 cpp/src/arrow/dataset/file_csv_test.cc     |  86 ++--
 cpp/src/arrow/dataset/file_ipc_test.cc     | 309 +++-----------
 cpp/src/arrow/dataset/file_parquet_test.cc | 467 +++++++--------------
 cpp/src/arrow/dataset/scanner_test.cc      |  14 +-
 cpp/src/arrow/dataset/test_util.h          | 310 ++++++++++++++
 6 files changed, 590 insertions(+), 608 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index a8274a545c4..4612a1233fc 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -90,10 +90,16 @@ static inline Result<csv::ConvertOptions> GetConvertOptions(
       GetFragmentScanOptions<CsvFragmentScanOptions>(
           kCsvTypeName, scan_options.get(), format.default_fragment_scan_options));
   auto convert_options = csv_scan_options->convert_options;
-  for (FieldRef ref : scan_options->MaterializedFields()) {
-    ARROW_ASSIGN_OR_RAISE(auto field, ref.GetOne(*scan_options->dataset_schema));
-
+  auto materialized = scan_options->MaterializedFields();
+  std::unordered_set<std::string> materialized_fields(materialized.begin(),
+                                                      materialized.end());
+  for (auto field : scan_options->dataset_schema->fields()) {
+    if (materialized_fields.find(field->name()) == materialized_fields.end()) continue;
+    // Ignore virtual columns.
     if (column_names.find(field->name()) == column_names.end()) continue;
+    // Only read the requested columns
+    convert_options.include_columns.push_back(field->name());
+    // Properly set conversion types
     convert_options.column_types[field->name()] = field->type();
   }
   return convert_options;
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 0ae6fa532ca..eff7f28fbed 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/csv/writer.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/partition.h"
@@ -36,7 +37,25 @@
 namespace arrow {
 namespace dataset {
 
-class TestCsvFileFormat : public testing::TestWithParam<Compression::type> {
+class CsvFormatHelper {
+ public:
+  using FormatType = CsvFileFormat;
+  static Result<std::shared_ptr<Buffer>> Write(RecordBatchReader* reader) {
+    ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create());
+    std::shared_ptr<Table> table;
+    RETURN_NOT_OK(reader->ReadAll(&table));
+    auto options = csv::WriteOptions::Defaults();
+    RETURN_NOT_OK(csv::WriteCSV(*table, options, default_memory_pool(), sink.get()));
+    return sink->Finish();
+  }
+
+  static std::shared_ptr<CsvFileFormat> MakeFormat() {
+    return std::make_shared<CsvFileFormat>();
+  }
+};
+
+class TestCsvFileFormat : public FileFormatFixtureMixin<CsvFormatHelper>,
+                          public ::testing::WithParamInterface<Compression::type> {
  public:
   Compression::type GetCompression() { return GetParam(); }
 
@@ -83,16 +102,10 @@ class TestCsvFileFormat : public testing::TestWithParam<Compression::type> {
     EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
     return Batches(std::move(scan_task_it));
   }
-
-  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
-    opts_->dataset_schema = schema(std::move(fields));
-    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
-  }
-
-  std::shared_ptr<CsvFileFormat> format_ = std::make_shared<CsvFileFormat>();
-  std::shared_ptr<ScanOptions> opts_ = std::make_shared<ScanOptions>();
 };
 
+// Basic scanning tests (to exercise compression support); see the parameterized test
+// below for more comprehensive testing of scan behaviors
 TEST_P(TestCsvFileFormat, ScanRecordBatchReader) {
   auto source = GetFileSource(R"(f64
 1.0
@@ -100,7 +113,7 @@ TEST_P(TestCsvFileFormat, ScanRecordBatchReader) {
 N/A
 2)");
   SetSchema({field("f64", float64())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
 
   int64_t row_count = 0;
 
@@ -119,7 +132,7 @@ MYNULL
 N/A
 bar)");
   SetSchema({field("str", utf8())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
   auto fragment_scan_options = std::make_shared<CsvFragmentScanOptions>();
   fragment_scan_options->convert_options.null_values = {"MYNULL"};
   fragment_scan_options->convert_options.strings_can_be_null = true;
@@ -145,7 +158,7 @@ bar)");
   auto defaults = std::make_shared<CsvFragmentScanOptions>();
   defaults->read_options.skip_rows = 1;
   format_->default_fragment_scan_options = defaults;
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
   ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
   AssertSchemaEqual(opts_->dataset_schema, physical_schema);
 
@@ -179,7 +192,7 @@ N/A
 2)");
   // NB: dataset_schema includes a column not present in the file
   SetSchema({field("f64", float64()), field("virtual", int32())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
 
   ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
   AssertSchemaEqual(Schema({field("f64", float64())}), *physical_schema);
@@ -195,22 +208,12 @@ N/A
   ASSERT_EQ(row_count, 3);
 }
 
-TEST_P(TestCsvFileFormat, OpenFailureWithRelevantError) {
-  if (GetCompression() != Compression::type::UNCOMPRESSED) {
-    GTEST_SKIP() << "File source name is different with compression";
-  }
-  auto source = GetFileSource("");
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("<Buffer>"),
-                                  format_->Inspect(*source).status());
-
-  constexpr auto file_name = "herp/derp";
-  ASSERT_OK_AND_ASSIGN(
-      auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr(file_name),
-                                  format_->Inspect({file_name, fs}).status());
+TEST_P(TestCsvFileFormat, InspectFailureWithRelevantError) {
+  TestInspectFailureWithRelevantError(StatusCode::Invalid);
 }
 
 TEST_P(TestCsvFileFormat, Inspect) {
+  TestInspect();
   auto source = GetFileSource(R"(f64
 1.0
 
@@ -221,6 +224,7 @@ N/A
 }
 
 TEST_P(TestCsvFileFormat, IsSupported) {
+  TestIsSupported();
   bool supported;
 
   auto source = GetFileSource("");
@@ -247,7 +251,7 @@ TEST_P(TestCsvFileFormat, NonProjectedFieldWithDifferingTypeFromInferred) {
 ,
 N/A,bar
 2,baz)");
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment = MakeFragment(*source);
   ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
   AssertSchemaEqual(
       Schema({field("betrayal_not_really_f64", float64()), field("str", utf8())}),
@@ -277,6 +281,10 @@ N/A,bar
   ASSERT_OK(batch_it.Visit([](TaggedRecordBatch) { return Status::OK(); }));
 }
 
+TEST_P(TestCsvFileFormat, WriteRecordBatchReader) {
+  GTEST_SKIP() << "Write support not implemented for CSV";
+}
+
 INSTANTIATE_TEST_SUITE_P(TestUncompressedCsv, TestCsvFileFormat,
                          ::testing::Values(Compression::UNCOMPRESSED));
 #ifdef ARROW_WITH_BZ2
@@ -297,5 +305,29 @@ INSTANTIATE_TEST_SUITE_P(TestZSTDCsv, TestCsvFileFormat,
                          ::testing::Values(Compression::ZSTD));
 #endif
 
+class CsvWithNullsHelper : public CsvFormatHelper {
+ public:
+  static std::shared_ptr<CsvFileFormat> MakeFormat() {
+    auto format = std::make_shared<CsvFileFormat>();
+    format->parse_options.ignore_empty_lines = false;
+    return format;
+  }
+};
+
+class TestCsvFileFormatScan : public FileFormatScanMixin<CsvWithNullsHelper> {};
+
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReader) { TestScan(); }
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderWithVirtualColumn) {
+  TestScanWithVirtualColumn();
+}
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderProjected) { TestScanProjected(); }
+TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderProjectedMissingCols) {
+  TestScanProjectedMissingCols();
+}
+
+INSTANTIATE_TEST_SUITE_P(TestScan, TestCsvFileFormatScan,
+                         ::testing::ValuesIn(TestFormatParams::Values()),
+                         TestFormatParams::ToTestNameString);
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index ef0c0f62108..24fed4795da 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -39,172 +39,35 @@
 namespace arrow {
 namespace dataset {
 
-constexpr int64_t kBatchSize = 1UL << 12;
-constexpr int64_t kBatchRepetitions = 1 << 5;
-constexpr int64_t kNumRows = kBatchSize * kBatchRepetitions;
-
 using internal::checked_pointer_cast;
 
-class ArrowIpcWriterMixin : public ::testing::Test {
+class IpcFormatHelper {
  public:
-  std::shared_ptr<Buffer> Write(RecordBatchReader* reader) {
-    EXPECT_OK_AND_ASSIGN(auto sink, io::BufferOutputStream::Create());
-
-    EXPECT_OK_AND_ASSIGN(auto writer, ipc::MakeFileWriter(sink, reader->schema()));
-
+  using FormatType = IpcFileFormat;
+  static Result<std::shared_ptr<Buffer>> Write(RecordBatchReader* reader) {
+    ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create());
+    ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(sink, reader->schema()));
     std::vector<std::shared_ptr<RecordBatch>> batches;
-    ARROW_EXPECT_OK(reader->ReadAll(&batches));
+    RETURN_NOT_OK(reader->ReadAll(&batches));
     for (auto batch : batches) {
-      ARROW_EXPECT_OK(writer->WriteRecordBatch(*batch));
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
     }
-
-    ARROW_EXPECT_OK(writer->Close());
-
-    EXPECT_OK_AND_ASSIGN(auto out, sink->Finish());
-    return out;
+    RETURN_NOT_OK(writer->Close());
+    return sink->Finish();
   }
 
-  std::shared_ptr<Buffer> Write(const Table& table) {
-    EXPECT_OK_AND_ASSIGN(auto sink, io::BufferOutputStream::Create());
-    EXPECT_OK_AND_ASSIGN(auto writer, ipc::MakeFileWriter(sink, table.schema()));
-
-    ARROW_EXPECT_OK(writer->WriteTable(table));
-
-    ARROW_EXPECT_OK(writer->Close());
-
-    EXPECT_OK_AND_ASSIGN(auto out, sink->Finish());
-    return out;
+  static std::shared_ptr<IpcFileFormat> MakeFormat() {
+    return std::make_shared<IpcFileFormat>();
   }
 };
 
-class TestIpcFileFormat : public ArrowIpcWriterMixin {
- public:
-  std::unique_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
-    auto buffer = Write(reader);
-    return internal::make_unique<FileSource>(std::move(buffer));
-  }
-
-  std::unique_ptr<RecordBatchReader> GetRecordBatchReader(
-      std::shared_ptr<Schema> schema) {
-    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
-  }
-
-  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
-                          AllocateResizableBuffer(0));
-    return std::make_shared<io::BufferOutputStream>(buffer);
-  }
-
-  RecordBatchIterator Batches(ScanTaskIterator scan_task_it) {
-    return MakeFlattenIterator(MakeMaybeMapIterator(
-        [](std::shared_ptr<ScanTask> scan_task) { return scan_task->Execute(); },
-        std::move(scan_task_it)));
-  }
-
-  RecordBatchIterator Batches(Fragment* fragment) {
-    EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
-    return Batches(std::move(scan_task_it));
-  }
-
-  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
-    opts_ = std::make_shared<ScanOptions>();
-    opts_->dataset_schema = schema(std::move(fields));
-    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
-  }
-
- protected:
-  std::shared_ptr<IpcFileFormat> format_ = std::make_shared<IpcFileFormat>();
-  std::shared_ptr<ScanOptions> opts_;
-};
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReader) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
-}
-
-TEST_F(TestIpcFileFormat, FragmentScanOptions) {
-  auto reader = GetRecordBatchReader(
-      // ARROW-12077: on Windows/mimalloc/release, nullable list column leads to crash
-      schema({field("list", list(float64()), false,
-                    key_value_metadata({{"max_length", "1"}})),
-              field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  // Set scan options that ensure reading fails
-  auto fragment_scan_options = std::make_shared<IpcFragmentScanOptions>();
-  fragment_scan_options->options = std::make_shared<ipc::IpcReadOptions>();
-  fragment_scan_options->options->max_recursion_depth = 0;
-  opts_->fragment_scan_options = fragment_scan_options;
-  ASSERT_OK_AND_ASSIGN(auto scan_tasks, fragment->Scan(opts_));
-  ASSERT_OK_AND_ASSIGN(auto scan_task, scan_tasks.Next());
-  ASSERT_OK_AND_ASSIGN(auto batches, scan_task->Execute());
-  ASSERT_RAISES(Invalid, batches.Next());
-}
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReaderWithVirtualColumn) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  // NB: dataset_schema includes a column not present in the file
-  SetSchema({reader->schema()->field(0), field("virtual", int32())});
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
-  AssertSchemaEqual(Schema({field("f64", float64())}), *physical_schema);
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    AssertSchemaEqual(*batch->schema(), *physical_schema);
-    row_count += batch->num_rows();
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
-}
-
-TEST_F(TestIpcFileFormat, WriteRecordBatchReader) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
-  auto options = format_->DefaultWriteOptions();
-  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
-
-  ASSERT_OK(writer->Write(GetRecordBatchReader(schema({field("f64", float64())})).get()));
-  ASSERT_OK(writer->Finish());
+class TestIpcFileFormat : public FileFormatFixtureMixin<IpcFormatHelper> {};
 
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
-
-  AssertBufferEqual(*written, *source->buffer());
-}
+TEST_F(TestIpcFileFormat, WriteRecordBatchReader) { TestWrite(); }
 
 TEST_F(TestIpcFileFormat, WriteRecordBatchReaderCustomOptions) {
   auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
   auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
   auto ipc_options =
       checked_pointer_cast<IpcFileWriteOptions>(format_->DefaultWriteOptions());
   if (util::Codec::IsAvailable(Compression::ZSTD)) {
@@ -212,19 +75,21 @@ TEST_F(TestIpcFileFormat, WriteRecordBatchReaderCustomOptions) {
                          util::Codec::Create(Compression::ZSTD));
   }
   ipc_options->metadata = key_value_metadata({{"hello", "world"}});
-  EXPECT_OK_AND_ASSIGN(auto writer,
-                       format_->MakeWriter(sink, reader->schema(), ipc_options));
-  ASSERT_OK(writer->Write(GetRecordBatchReader(schema({field("f64", float64())})).get()));
-  ASSERT_OK(writer->Finish());
 
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
+  auto written = WriteToBuffer(reader->schema(), ipc_options);
+
   EXPECT_OK_AND_ASSIGN(auto ipc_reader, ipc::RecordBatchFileReader::Open(
                                             std::make_shared<io::BufferReader>(written)));
-
   EXPECT_EQ(ipc_reader->metadata()->sorted_pairs(),
             ipc_options->metadata->sorted_pairs());
 }
 
+TEST_F(TestIpcFileFormat, InspectFailureWithRelevantError) {
+  TestInspectFailureWithRelevantError(StatusCode::Invalid);
+}
+TEST_F(TestIpcFileFormat, Inspect) { TestInspect(); }
+TEST_F(TestIpcFileFormat, IsSupported) { TestIsSupported(); }
+
 class TestIpcFileSystemDataset : public testing::Test,
                                  public WriteFileSystemDatasetMixin {
  public:
@@ -271,118 +136,40 @@ TEST_F(TestIpcFileSystemDataset, WriteExceedsMaxPartitions) {
                                   FileSystemDataset::Write(write_options_, scanner));
 }
 
-TEST_F(TestIpcFileFormat, OpenFailureWithRelevantError) {
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  auto result = format_->Inspect(FileSource(buf));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("<Buffer>"),
-                                  result.status());
-
-  constexpr auto file_name = "herp/derp";
-  ASSERT_OK_AND_ASSIGN(
-      auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-  result = format_->Inspect({file_name, fs});
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr(file_name),
-                                  result.status());
-}
-
-static auto f32 = field("f32", float32());
-static auto f64 = field("f64", float64());
-static auto i32 = field("i32", int32());
-static auto i64 = field("i64", int64());
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReaderProjected) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  opts_->filter = equal(field_ref("i32"), literal(0));
-
-  // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-  // we will not drop "i32" even though it is not projected since we need it for
-  // filtering
-  auto expected_schema = schema({f64, i32});
-
-  auto reader = GetRecordBatchReader(opts_->dataset_schema);
-  auto source = GetFileSource(reader.get());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-    AssertSchemaEqual(*batch->schema(), *expected_schema,
-                      /*check_metadata=*/false);
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
-}
-
-TEST_F(TestIpcFileFormat, ScanRecordBatchReaderProjectedMissingCols) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  opts_->filter = equal(field_ref("i32"), literal(0));
+class TestIpcFileFormatScan : public FileFormatScanMixin<IpcFormatHelper> {};
 
-  auto reader_without_i32 = GetRecordBatchReader(schema({f64, i64, f32}));
-  auto reader_without_f64 = GetRecordBatchReader(schema({i64, f32, i32}));
-  auto reader = GetRecordBatchReader(schema({f64, i64, f32, i32}));
-
-  auto readers = {reader.get(), reader_without_i32.get(), reader_without_f64.get()};
-  for (auto reader : readers) {
-    auto source = GetFileSource(reader);
-    ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-    // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-    // we will not drop "i32" even though it is not projected since we need it for
-    // filtering
-    //
-    // in the case where a file doesn't contain a referenced field, we won't
-    // materialize it as nulls later
-    std::shared_ptr<Schema> expected_schema;
-    if (reader == reader_without_i32.get()) {
-      expected_schema = schema({f64});
-    } else if (reader == reader_without_f64.get()) {
-      expected_schema = schema({i32});
-    } else {
-      expected_schema = schema({f64, i32});
-    }
-
-    int64_t row_count = 0;
-
-    for (auto maybe_batch : Batches(fragment.get())) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), *expected_schema,
-                        /*check_metadata=*/false);
-    }
-
-    ASSERT_EQ(row_count, kNumRows);
-  }
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReader) { TestScan(); }
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReaderWithVirtualColumn) {
+  TestScanWithVirtualColumn();
 }
-
-TEST_F(TestIpcFileFormat, Inspect) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
-  EXPECT_EQ(*actual, *reader->schema());
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReaderProjected) { TestScanProjected(); }
+TEST_P(TestIpcFileFormatScan, ScanRecordBatchReaderProjectedMissingCols) {
+  TestScanProjectedMissingCols();
 }
-
-TEST_F(TestIpcFileFormat, IsSupported) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+TEST_P(TestIpcFileFormatScan, FragmentScanOptions) {
+  auto reader = GetRecordBatchReader(
+      // ARROW-12077: on Windows/mimalloc/release, nullable list column leads to crash
+      schema({field("list", list(float64()), false,
+                    key_value_metadata({{"max_length", "1"}})),
+              field("f64", float64())}));
   auto source = GetFileSource(reader.get());
 
-  bool supported = false;
-
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
-
-  buf = std::make_shared<Buffer>(util::string_view("corrupted"));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
+  SetSchema(reader->schema()->fields());
+  auto fragment = MakeFragment(*source);
 
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(*source));
-  EXPECT_EQ(supported, true);
+  // Set scan options that ensure reading fails
+  auto fragment_scan_options = std::make_shared<IpcFragmentScanOptions>();
+  fragment_scan_options->options = std::make_shared<ipc::IpcReadOptions>();
+  fragment_scan_options->options->max_recursion_depth = 0;
+  opts_->fragment_scan_options = fragment_scan_options;
+  ASSERT_OK_AND_ASSIGN(auto scan_tasks, fragment->Scan(opts_));
+  ASSERT_OK_AND_ASSIGN(auto scan_task, scan_tasks.Next());
+  ASSERT_OK_AND_ASSIGN(auto batches, scan_task->Execute());
+  ASSERT_RAISES(Invalid, batches.Next());
 }
+INSTANTIATE_TEST_SUITE_P(TestScan, TestIpcFileFormatScan,
+                         ::testing::ValuesIn(TestFormatParams::Values()),
+                         TestFormatParams::ToTestNameString);
 
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index bb06e7f2b63..04e61c30d41 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -39,10 +39,6 @@
 namespace arrow {
 namespace dataset {
 
-constexpr int64_t kBatchSize = 1UL << 12;
-constexpr int64_t kBatchRepetitions = 1 << 5;
-constexpr int64_t kNumRows = kBatchSize * kBatchRepetitions;
-
 using parquet::ArrowWriterProperties;
 using parquet::default_arrow_writer_properties;
 
@@ -56,9 +52,24 @@ using testing::Pointee;
 
 using internal::checked_pointer_cast;
 
-class ArrowParquetWriterMixin : public ::testing::Test {
+class ParquetFormatHelper {
  public:
-  Status WriteRecordBatch(const RecordBatch& batch, parquet::arrow::FileWriter* writer) {
+  using FormatType = ParquetFileFormat;
+
+  static Result<std::shared_ptr<Buffer>> Write(RecordBatchReader* reader) {
+    auto pool = ::arrow::default_memory_pool();
+    std::shared_ptr<Buffer> out;
+    auto sink = CreateOutputStream(pool);
+    RETURN_NOT_OK(WriteRecordBatchReader(reader, pool, sink));
+    return sink->Finish();
+  }
+  static std::shared_ptr<ParquetFileFormat> MakeFormat() {
+    return std::make_shared<ParquetFileFormat>();
+  }
+
+ private:
+  static Status WriteRecordBatch(const RecordBatch& batch,
+                                 parquet::arrow::FileWriter* writer) {
     auto schema = batch.schema();
     auto size = batch.num_rows();
 
@@ -76,8 +87,8 @@ class ArrowParquetWriterMixin : public ::testing::Test {
     return Status::OK();
   }
 
-  Status WriteRecordBatchReader(RecordBatchReader* reader,
-                                parquet::arrow::FileWriter* writer) {
+  static Status WriteRecordBatchReader(RecordBatchReader* reader,
+                                       parquet::arrow::FileWriter* writer) {
     auto schema = reader->schema();
 
     if (!schema->Equals(*writer->schema(), false)) {
@@ -92,7 +103,7 @@ class ArrowParquetWriterMixin : public ::testing::Test {
         });
   }
 
-  Status WriteRecordBatchReader(
+  static Status WriteRecordBatchReader(
       RecordBatchReader* reader, MemoryPool* pool,
       const std::shared_ptr<io::OutputStream>& sink,
       const std::shared_ptr<WriterProperties>& properties = default_writer_properties(),
@@ -104,52 +115,10 @@ class ArrowParquetWriterMixin : public ::testing::Test {
     RETURN_NOT_OK(WriteRecordBatchReader(reader, writer.get()));
     return writer->Close();
   }
-
-  std::shared_ptr<Buffer> Write(RecordBatchReader* reader) {
-    auto pool = ::arrow::default_memory_pool();
-
-    std::shared_ptr<Buffer> out;
-
-    auto sink = CreateOutputStream(pool);
-
-    ARROW_EXPECT_OK(WriteRecordBatchReader(reader, pool, sink));
-    // XXX the rest of the test may crash if this fails, since out will be nullptr
-    EXPECT_OK_AND_ASSIGN(out, sink->Finish());
-
-    return out;
-  }
-
-  std::shared_ptr<Buffer> Write(const Table& table) {
-    auto pool = ::arrow::default_memory_pool();
-
-    std::shared_ptr<Buffer> out;
-    auto sink = CreateOutputStream(pool);
-
-    ARROW_EXPECT_OK(WriteTable(table, pool, sink, 1U << 16));
-    // XXX the rest of the test may crash if this fails, since out will be nullptr
-    EXPECT_OK_AND_ASSIGN(out, sink->Finish());
-    return out;
-  }
 };
 
-class TestParquetFileFormat : public ArrowParquetWriterMixin {
+class TestParquetFileFormat : public FileFormatFixtureMixin<ParquetFormatHelper> {
  public:
-  std::unique_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
-    auto buffer = Write(reader);
-    return internal::make_unique<FileSource>(std::move(buffer));
-  }
-
-  std::unique_ptr<RecordBatchReader> GetRecordBatchReader(
-      std::shared_ptr<Schema> schema) {
-    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
-  }
-
-  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
-                          AllocateResizableBuffer(0));
-    return std::make_shared<io::BufferOutputStream>(buffer);
-  }
-
   RecordBatchIterator Batches(ScanTaskIterator scan_task_it) {
     return MakeFlattenIterator(MakeMaybeMapIterator(
         [](std::shared_ptr<ScanTask> scan_task) { return scan_task->Execute(); },
@@ -161,10 +130,6 @@ class TestParquetFileFormat : public ArrowParquetWriterMixin {
     return Batches(std::move(scan_task_it));
   }
 
-  void SetFilter(Expression filter) {
-    ASSERT_OK_AND_ASSIGN(opts_->filter, filter.Bind(*opts_->dataset_schema));
-  }
-
   std::shared_ptr<RecordBatch> SingleBatch(Fragment* fragment) {
     auto batches = IteratorToVector(Batches(fragment));
     EXPECT_EQ(batches.size(), 1);
@@ -207,219 +172,167 @@ class TestParquetFileFormat : public ArrowParquetWriterMixin {
       EXPECT_EQ(SingleBatch(parquet_fragment.get())->num_rows(), expected + 1);
     }
   }
-
-  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
-    opts_ = std::make_shared<ScanOptions>();
-    opts_->dataset_schema = schema(std::move(fields));
-    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
-  }
-
- protected:
-  std::shared_ptr<ParquetFileFormat> format_ = std::make_shared<ParquetFileFormat>();
-  std::shared_ptr<ScanOptions> opts_;
 };
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReader) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-
-  SetSchema(reader->schema()->fields());
-  SetFilter(literal(true));
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  int64_t row_count = 0;
-
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-  }
-
-  ASSERT_EQ(row_count, kNumRows);
+TEST_F(TestParquetFileFormat, InspectFailureWithRelevantError) {
+  TestInspectFailureWithRelevantError(StatusCode::IOError);
 }
+TEST_F(TestParquetFileFormat, Inspect) { TestInspect(); }
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderDictEncoded) {
+TEST_F(TestParquetFileFormat, InspectDictEncoded) {
   auto reader = GetRecordBatchReader(schema({field("utf8", utf8())}));
   auto source = GetFileSource(reader.get());
 
-  SetSchema(reader->schema()->fields());
-  SetFilter(literal(true));
-
   format_->reader_options.dict_columns = {"utf8"};
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
-  int64_t row_count = 0;
+  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
 
   Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});
+  AssertSchemaEqual(*actual, expected_schema, /* check_metadata = */ false);
+}
 
-  for (auto maybe_task : scan_task_it) {
-    ASSERT_OK_AND_ASSIGN(auto task, maybe_task);
-    ASSERT_OK_AND_ASSIGN(auto rb_it, task->Execute());
-    for (auto maybe_batch : rb_it) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), expected_schema, /* check_metadata = */ false);
-    }
-  }
+TEST_F(TestParquetFileFormat, IsSupported) { TestIsSupported(); }
 
-  ASSERT_EQ(row_count, kNumRows);
-}
+TEST_F(TestParquetFileFormat, WriteRecordBatchReader) { TestWrite(); }
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderPreBuffer) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
+TEST_F(TestParquetFileFormat, WriteRecordBatchReaderCustomOptions) {
+  TimeUnit::type coerce_timestamps_to = TimeUnit::MICRO,
+                 coerce_timestamps_from = TimeUnit::NANO;
 
-  SetSchema(reader->schema()->fields());
-  SetFilter(literal(true));
+  auto reader =
+      GetRecordBatchReader(schema({field("ts", timestamp(coerce_timestamps_from))}));
+  auto options =
+      checked_pointer_cast<ParquetFileWriteOptions>(format_->DefaultWriteOptions());
+  options->writer_properties = parquet::WriterProperties::Builder()
+                                   .created_by("TestParquetFileFormat")
+                                   ->disable_statistics()
+                                   ->build();
+  options->arrow_writer_properties = parquet::ArrowWriterProperties::Builder()
+                                         .coerce_timestamps(coerce_timestamps_to)
+                                         ->allow_truncated_timestamps()
+                                         ->build();
 
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-  auto fragment_scan_options = std::make_shared<ParquetFragmentScanOptions>();
-  fragment_scan_options->arrow_reader_properties->set_pre_buffer(true);
-  opts_->fragment_scan_options = fragment_scan_options;
-  ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
+  auto written = WriteToBuffer(reader->schema(), options);
 
-  int64_t task_count = 0;
-  int64_t row_count = 0;
+  EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(FileSource{written}));
+  EXPECT_OK_AND_ASSIGN(auto actual_schema, fragment->ReadPhysicalSchema());
+  AssertSchemaEqual(Schema({field("ts", timestamp(coerce_timestamps_to))}),
+                    *actual_schema);
+}
 
-  for (auto maybe_task : scan_task_it) {
-    ASSERT_OK_AND_ASSIGN(auto task, maybe_task);
-    task_count += 1;
-    ASSERT_OK_AND_ASSIGN(auto rb_it, task->Execute());
-    for (auto maybe_batch : rb_it) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-    }
+class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
+                                     public testing::Test {
+ public:
+  void SetUp() override {
+    MakeSourceDataset();
+    check_metadata_ = false;
+    auto parquet_format = std::make_shared<ParquetFileFormat>();
+    format_ = parquet_format;
+    SetWriteOptions(parquet_format->DefaultWriteOptions());
   }
+};
 
-  ASSERT_EQ(task_count, kBatchRepetitions);
-  ASSERT_EQ(row_count, kNumRows);
+TEST_F(TestParquetFileSystemDataset, WriteWithIdenticalPartitioningSchema) {
+  TestWriteWithIdenticalPartitioningSchema();
 }
 
-TEST_F(TestParquetFileFormat, OpenFailureWithRelevantError) {
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  auto result = format_->Inspect(FileSource(buf));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, testing::HasSubstr("<Buffer>"),
-                                  result.status());
-
-  constexpr auto file_name = "herp/derp";
-  ASSERT_OK_AND_ASSIGN(
-      auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-  result = format_->Inspect({file_name, fs});
-  EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, testing::HasSubstr(file_name),
-                                  result.status());
+TEST_F(TestParquetFileSystemDataset, WriteWithUnrelatedPartitioningSchema) {
+  TestWriteWithUnrelatedPartitioningSchema();
 }
 
-static auto f32 = field("f32", float32());
-static auto f64 = field("f64", float64());
-static auto i32 = field("i32", int32());
-static auto i64 = field("i64", int64());
+TEST_F(TestParquetFileSystemDataset, WriteWithSupersetPartitioningSchema) {
+  TestWriteWithSupersetPartitioningSchema();
+}
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderProjected) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  SetFilter(equal(field_ref("i32"), literal(0)));
+TEST_F(TestParquetFileSystemDataset, WriteWithEmptyPartitioningSchema) {
+  TestWriteWithEmptyPartitioningSchema();
+}
 
-  // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-  // we will not drop "i32" even though it is not projected since we need it for
-  // filtering
-  auto expected_schema = schema({f64, i32});
+class TestParquetFileFormatScan : public FileFormatScanMixin<ParquetFormatHelper> {
+ public:
+  std::shared_ptr<RecordBatch> SingleBatch(std::shared_ptr<Fragment> fragment) {
+    auto batches = IteratorToVector(PhysicalBatches(fragment));
+    EXPECT_EQ(batches.size(), 1);
+    return batches.front();
+  }
 
-  auto reader = GetRecordBatchReader(opts_->dataset_schema);
-  auto source = GetFileSource(reader.get());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  void CountRowsAndBatchesInScan(std::shared_ptr<Fragment> fragment,
+                                 int64_t expected_rows, int64_t expected_batches) {
+    int64_t actual_rows = 0;
+    int64_t actual_batches = 0;
 
-  int64_t row_count = 0;
+    for (auto maybe_batch : PhysicalBatches(fragment)) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      actual_rows += batch->num_rows();
+      ++actual_batches;
+    }
 
-  for (auto maybe_batch : Batches(fragment.get())) {
-    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-    row_count += batch->num_rows();
-    AssertSchemaEqual(*batch->schema(), *expected_schema,
-                      /*check_metadata=*/false);
+    EXPECT_EQ(actual_rows, expected_rows);
+    EXPECT_EQ(actual_batches, expected_batches);
   }
 
-  ASSERT_EQ(row_count, kNumRows);
-}
+  void CountRowGroupsInFragment(const std::shared_ptr<Fragment>& fragment,
+                                std::vector<int> expected_row_groups, Expression filter) {
+    SetFilter(filter);
 
-TEST_F(TestParquetFileFormat, ScanRecordBatchReaderProjectedMissingCols) {
-  SetSchema({f64, i64, f32, i32});
-  ASSERT_OK(SetProjection(opts_.get(), {"f64"}));
-  SetFilter(equal(field_ref("i32"), literal(0)));
-
-  auto reader_without_i32 = GetRecordBatchReader(schema({f64, i64, f32}));
-  auto reader_without_f64 = GetRecordBatchReader(schema({i64, f32, i32}));
-  auto reader = GetRecordBatchReader(schema({f64, i64, f32, i32}));
-
-  auto readers = {reader.get(), reader_without_i32.get(), reader_without_f64.get()};
-  for (auto reader : readers) {
-    auto source = GetFileSource(reader);
-    ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-    // NB: projection is applied by the scanner; FileFragment does not evaluate it so
-    // we will not drop "i32" even though it is not projected since we need it for
-    // filtering
-    //
-    // in the case where a file doesn't contain a referenced field, we won't
-    // materialize it as nulls later
-    std::shared_ptr<Schema> expected_schema;
-    if (reader == reader_without_i32.get()) {
-      expected_schema = schema({f64});
-    } else if (reader == reader_without_f64.get()) {
-      expected_schema = schema({i32});
-    } else {
-      expected_schema = schema({f64, i32});
-    }
+    auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragment);
+    ASSERT_OK_AND_ASSIGN(auto fragments, parquet_fragment->SplitByRowGroup(opts_->filter))
 
-    int64_t row_count = 0;
+    EXPECT_EQ(fragments.size(), expected_row_groups.size());
+    for (size_t i = 0; i < fragments.size(); i++) {
+      auto expected = expected_row_groups[i];
+      auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragments[i]);
 
-    for (auto maybe_batch : Batches(fragment.get())) {
-      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
-      row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), *expected_schema,
-                        /*check_metadata=*/false);
+      EXPECT_EQ(parquet_fragment->row_groups(), std::vector<int>{expected});
+      EXPECT_EQ(SingleBatch(parquet_fragment)->num_rows(), expected + 1);
     }
-
-    ASSERT_EQ(row_count, kNumRows);
   }
-}
-
-TEST_F(TestParquetFileFormat, Inspect) {
-  auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
+};
 
-  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
-  AssertSchemaEqual(*actual, *reader->schema(), /*check_metadata=*/false);
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReader) { TestScan(); }
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderProjected) { TestScanProjected(); }
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderProjectedMissingCols) {
+  TestScanProjectedMissingCols();
 }
-
-TEST_F(TestParquetFileFormat, InspectDictEncoded) {
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderDictEncoded) {
   auto reader = GetRecordBatchReader(schema({field("utf8", utf8())}));
   auto source = GetFileSource(reader.get());
 
+  SetSchema(reader->schema()->fields());
+  SetFilter(literal(true));
   format_->reader_options.dict_columns = {"utf8"};
-  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
+  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
 
+  int64_t row_count = 0;
   Schema expected_schema({field("utf8", dictionary(int32(), utf8()))});
-  AssertSchemaEqual(*actual, expected_schema, /* check_metadata = */ false);
-}
 
-TEST_F(TestParquetFileFormat, IsSupported) {
+  for (auto maybe_batch : PhysicalBatches(fragment)) {
+    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+    row_count += batch->num_rows();
+    AssertSchemaEqual(*batch->schema(), expected_schema, /* check_metadata = */ false);
+  }
+  ASSERT_EQ(row_count, expected_rows());
+}
+TEST_P(TestParquetFileFormatScan, ScanRecordBatchReaderPreBuffer) {
   auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
   auto source = GetFileSource(reader.get());
 
-  bool supported = false;
-
-  std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
+  SetSchema(reader->schema()->fields());
+  SetFilter(literal(true));
 
-  buf = std::make_shared<Buffer>(util::string_view("corrupted"));
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
-  ASSERT_EQ(supported, false);
+  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
+  auto fragment_scan_options = std::make_shared<ParquetFragmentScanOptions>();
+  fragment_scan_options->arrow_reader_properties->set_pre_buffer(true);
+  opts_->fragment_scan_options = fragment_scan_options;
+  ASSERT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
 
-  ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(*source));
-  EXPECT_EQ(supported, true);
+  int64_t row_count = 0;
+  for (auto maybe_batch : PhysicalBatches(fragment)) {
+    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+    row_count += batch->num_rows();
+  }
+  ASSERT_EQ(row_count, expected_rows());
 }
-
-TEST_F(TestParquetFileFormat, PredicatePushdown) {
+TEST_P(TestParquetFileFormatScan, PredicatePushdown) {
   // Given a number `n`, the arithmetic dataset creates n RecordBatches where
   // each RecordBatch is keyed by a unique integer in [1, n]. Let `rb_i` denote
   // the record batch keyed by `i`. `rb_i` is composed of `i` rows where all
@@ -432,6 +345,7 @@ TEST_F(TestParquetFileFormat, PredicatePushdown) {
   // applied via ScanOptions' evaluator. Thus, counting the number of returned
   // rows and returned row groups is a good enough proxy to check if pushdown
   // predicate is working.
+
   constexpr int64_t kNumRowGroups = 16;
   constexpr int64_t kTotalNumRows = kNumRowGroups * (kNumRowGroups + 1) / 2;
 
@@ -473,7 +387,7 @@ TEST_F(TestParquetFileFormat, PredicatePushdown) {
                             kNumRowGroups - 5);
 }
 
-TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragments) {
+TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragments) {
   constexpr int64_t kNumRowGroups = 16;
 
   auto reader = ArithmeticDatasetFixture::GetRecordBatchReader(kNumRowGroups);
@@ -524,24 +438,7 @@ TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragments) {
                                 less(field_ref("i64"), literal(8))));
 }
 
-TEST_F(TestParquetFileFormat, PredicatePushdownRowGroupFragmentsUsingStringColumn) {
-  auto table = TableFromJSON(schema({field("x", utf8())}),
-                             {
-                                 R"([{"x": "a"}])",
-                                 R"([{"x": "b"}, {"x": "b"}])",
-                                 R"([{"x": "c"}, {"x": "c"}, {"x": "c"}])",
-                                 R"([{"x": "a"}, {"x": "b"}, {"x": "c"}, {"x": "d"}])",
-                             });
-  TableBatchReader reader(*table);
-  auto source = GetFileSource(&reader);
-
-  SetSchema(reader.schema()->fields());
-  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
-
-  CountRowGroupsInFragment(fragment, {0, 3}, equal(field_ref("x"), literal("a")));
-}
-
-TEST_F(TestParquetFileFormat, ExplicitRowGroupSelection) {
+TEST_P(TestParquetFileFormatScan, ExplicitRowGroupSelection) {
   constexpr int64_t kNumRowGroups = 16;
   constexpr int64_t kTotalNumRows = kNumRowGroups * (kNumRowGroups + 1) / 2;
 
@@ -598,88 +495,26 @@ TEST_F(TestParquetFileFormat, ExplicitRowGroupSelection) {
       row_groups_fragment({kNumRowGroups + 1})->Scan(opts_));
 }
 
-TEST_F(TestParquetFileFormat, WriteRecordBatchReader) {
-  std::shared_ptr<RecordBatchReader> reader =
-      GetRecordBatchReader(schema({field("f64", float64())}));
-  auto source = GetFileSource(reader.get());
-  reader = GetRecordBatchReader(schema({field("f64", float64())}));
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
-  auto options = format_->DefaultWriteOptions();
-  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
-  ASSERT_OK(writer->Write(reader.get()));
-  ASSERT_OK(writer->Finish());
-
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
-
-  AssertBufferEqual(*written, *source->buffer());
-}
-
-TEST_F(TestParquetFileFormat, WriteRecordBatchReaderCustomOptions) {
-  TimeUnit::type coerce_timestamps_to = TimeUnit::MICRO,
-                 coerce_timestamps_from = TimeUnit::NANO;
-
-  std::shared_ptr<RecordBatchReader> reader =
-      GetRecordBatchReader(schema({field("ts", timestamp(coerce_timestamps_from))}));
-
-  SetSchema(reader->schema()->fields());
-
-  EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
-
-  auto options =
-      checked_pointer_cast<ParquetFileWriteOptions>(format_->DefaultWriteOptions());
-  options->writer_properties = parquet::WriterProperties::Builder()
-                                   .created_by("TestParquetFileFormat")
-                                   ->disable_statistics()
-                                   ->build();
-
-  options->arrow_writer_properties = parquet::ArrowWriterProperties::Builder()
-                                         .coerce_timestamps(coerce_timestamps_to)
-                                         ->allow_truncated_timestamps()
-                                         ->build();
-
-  EXPECT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, reader->schema(), options));
-  ASSERT_OK(writer->Write(reader.get()));
-  ASSERT_OK(writer->Finish());
-
-  EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
-  EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(FileSource{written}));
-
-  EXPECT_OK_AND_ASSIGN(auto actual_schema, fragment->ReadPhysicalSchema());
-  AssertSchemaEqual(Schema({field("ts", timestamp(coerce_timestamps_to))}),
-                    *actual_schema);
-}
-
-class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
-                                     public testing::Test {
- public:
-  void SetUp() override {
-    MakeSourceDataset();
-    check_metadata_ = false;
-    auto parquet_format = std::make_shared<ParquetFileFormat>();
-    format_ = parquet_format;
-    SetWriteOptions(parquet_format->DefaultWriteOptions());
-  }
-};
-
-TEST_F(TestParquetFileSystemDataset, WriteWithIdenticalPartitioningSchema) {
-  TestWriteWithIdenticalPartitioningSchema();
-}
+TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragmentsUsingStringColumn) {
+  auto table = TableFromJSON(schema({field("x", utf8())}),
+                             {
+                                 R"([{"x": "a"}])",
+                                 R"([{"x": "b"}, {"x": "b"}])",
+                                 R"([{"x": "c"}, {"x": "c"}, {"x": "c"}])",
+                                 R"([{"x": "a"}, {"x": "b"}, {"x": "c"}, {"x": "d"}])",
+                             });
+  TableBatchReader reader(*table);
+  auto source = GetFileSource(&reader);
 
-TEST_F(TestParquetFileSystemDataset, WriteWithUnrelatedPartitioningSchema) {
-  TestWriteWithUnrelatedPartitioningSchema();
-}
+  SetSchema(reader.schema()->fields());
+  ASSERT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(*source));
 
-TEST_F(TestParquetFileSystemDataset, WriteWithSupersetPartitioningSchema) {
-  TestWriteWithSupersetPartitioningSchema();
+  CountRowGroupsInFragment(fragment, {0, 3}, equal(field_ref("x"), literal("a")));
 }
 
-TEST_F(TestParquetFileSystemDataset, WriteWithEmptyPartitioningSchema) {
-  TestWriteWithEmptyPartitioningSchema();
-}
+INSTANTIATE_TEST_SUITE_P(TestScan, TestParquetFileFormatScan,
+                         ::testing::ValuesIn(TestFormatParams::Values()),
+                         TestFormatParams::ToTestNameString);
 
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index d9cb348808b..4f8c822ea00 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -18,7 +18,6 @@
 #include "arrow/dataset/scanner.h"
 
 #include <memory>
-#include <ostream>
 
 #include <gmock/gmock.h>
 
@@ -48,6 +47,19 @@ struct TestScannerParams {
   int num_batches;
   int items_per_batch;
 
+  std::string ToString() const {
+    // GTest requires this to be alphanumeric
+    std::stringstream ss;
+    ss << (use_async ? "Async" : "Sync") << (use_threads ? "Threaded" : "Serial")
+       << num_child_datasets << "d" << num_batches << "b" << items_per_batch << "r";
+    return ss.str();
+  }
+
+  static std::string ToTestNameString(
+      const ::testing::TestParamInfo<TestScannerParams>& info) {
+    return std::to_string(info.index) + info.param.ToString();
+  }
+
   static std::vector<TestScannerParams> Values() {
     std::vector<TestScannerParams> values;
     for (int sync = 0; sync < 2; sync++) {
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 009e1d755e9..0a65099ce07 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -21,6 +21,7 @@
 #include <ciso646>
 #include <functional>
 #include <memory>
+#include <ostream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -303,6 +304,315 @@ template <typename P>
 class DatasetFixtureMixinWithParam : public DatasetFixtureMixin,
                                      public ::testing::WithParamInterface<P> {};
 
+struct TestFormatParams {
+  bool use_async;
+  int num_batches;
+  int items_per_batch;
+
+  int64_t expected_rows() const { return num_batches * items_per_batch; }
+
+  std::string ToString() const {
+    // GTest requires this to be alphanumeric
+    std::stringstream ss;
+    ss << (use_async ? "Async" : "Sync") << num_batches << "b" << items_per_batch << "r";
+    return ss.str();
+  }
+
+  static std::string ToTestNameString(
+      const ::testing::TestParamInfo<TestFormatParams>& info) {
+    return std::to_string(info.index) + info.param.ToString();
+  }
+
+  static std::vector<TestFormatParams> Values() {
+    std::vector<TestFormatParams> values{{/*async=*/false, 16, 1024},
+                                         {/*async=*/true, 16, 1024}};
+    return values;
+  }
+};
+
+std::ostream& operator<<(std::ostream& out, const TestFormatParams& params) {
+  out << params.ToString();
+  return out;
+}
+
+class FileFormatWriterMixin {
+  virtual std::shared_ptr<Buffer> Write(RecordBatchReader* reader) = 0;
+  virtual std::shared_ptr<Buffer> Write(const Table& table) = 0;
+};
+
+/// FormatHelper should be a class with these static methods:
+/// std::shared_ptr<Buffer> Write(RecordBatchReader* reader);
+/// std::shared_ptr<FileFormat> MakeFormat();
+template <typename FormatHelper>
+class FileFormatFixtureMixin : public ::testing::Test {
+ public:
+  constexpr static int64_t kBatchSize = 1UL << 12;
+  constexpr static int64_t kBatchRepetitions = 1 << 5;
+
+  FileFormatFixtureMixin()
+      : format_(FormatHelper::MakeFormat()), opts_(std::make_shared<ScanOptions>()) {}
+
+  int64_t expected_batches() const { return kBatchRepetitions; }
+  int64_t expected_rows() const { return kBatchSize * kBatchRepetitions; }
+
+  std::shared_ptr<FileFragment> MakeFragment(const FileSource& source) {
+    EXPECT_OK_AND_ASSIGN(auto fragment, format_->MakeFragment(source));
+    return fragment;
+  }
+
+  std::shared_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
+    EXPECT_OK_AND_ASSIGN(auto buffer, FormatHelper::Write(reader));
+    return std::make_shared<FileSource>(std::move(buffer));
+  }
+
+  virtual std::shared_ptr<RecordBatchReader> GetRecordBatchReader(
+      std::shared_ptr<Schema> schema) {
+    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
+  }
+
+  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
+                          AllocateResizableBuffer(0));
+    return std::make_shared<io::BufferOutputStream>(buffer);
+  }
+
+  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
+    opts_->dataset_schema = schema(std::move(fields));
+    ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
+  }
+
+  void SetFilter(Expression filter) {
+    ASSERT_OK_AND_ASSIGN(opts_->filter, filter.Bind(*opts_->dataset_schema));
+  }
+
+  void Project(std::vector<std::string> names) {
+    ASSERT_OK(SetProjection(opts_.get(), std::move(names)));
+  }
+
+  // Shared test cases
+  void TestInspectFailureWithRelevantError(StatusCode code) {
+    std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
+    auto result = format_->Inspect(FileSource(buf));
+    EXPECT_EQ(code, result.status().code());
+    EXPECT_THAT(result.status().ToString(), testing::HasSubstr("<Buffer>"));
+
+    constexpr auto file_name = "herp/derp";
+    ASSERT_OK_AND_ASSIGN(
+        auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
+    result = format_->Inspect({file_name, fs});
+    EXPECT_EQ(code, result.status().code());
+    EXPECT_THAT(result.status().ToString(), testing::HasSubstr(file_name));
+  }
+  void TestInspect() {
+    auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = GetFileSource(reader.get());
+
+    ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
+    AssertSchemaEqual(*actual, *reader->schema(), /*check_metadata=*/false);
+  }
+  void TestIsSupported() {
+    auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = GetFileSource(reader.get());
+
+    bool supported = false;
+
+    std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
+    ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
+    ASSERT_EQ(supported, false);
+
+    buf = std::make_shared<Buffer>(util::string_view("corrupted"));
+    ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf)));
+    ASSERT_EQ(supported, false);
+
+    ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(*source));
+    EXPECT_EQ(supported, true);
+  }
+  std::shared_ptr<Buffer> WriteToBuffer(
+      std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options = nullptr) {
+    auto format = format_;
+    SetSchema(schema->fields());
+    EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
+
+    if (!options) options = format->DefaultWriteOptions();
+    EXPECT_OK_AND_ASSIGN(auto writer, format->MakeWriter(sink, schema, options));
+    ARROW_EXPECT_OK(writer->Write(GetRecordBatchReader(schema).get()));
+    ARROW_EXPECT_OK(writer->Finish());
+    EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
+    return written;
+  }
+  void TestWrite() {
+    auto reader = this->GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = this->GetFileSource(reader.get());
+    auto written = this->WriteToBuffer(reader->schema());
+    AssertBufferEqual(*written, *source->buffer());
+  }
+
+ protected:
+  std::shared_ptr<typename FormatHelper::FormatType> format_;
+  std::shared_ptr<ScanOptions> opts_;
+};
+
+template <typename FormatHelper>
+class FileFormatScanMixin : public FileFormatFixtureMixin<FormatHelper>,
+                            public ::testing::WithParamInterface<TestFormatParams> {
+ public:
+  int64_t expected_batches() const { return GetParam().num_batches; }
+  int64_t expected_rows() const { return GetParam().expected_rows(); }
+
+  std::shared_ptr<RecordBatchReader> GetRecordBatchReader(
+      std::shared_ptr<Schema> schema) override {
+    return MakeGeneratedRecordBatch(schema, GetParam().items_per_batch,
+                                    GetParam().num_batches);
+  }
+
+  // Scan the fragment through the scanner.
+  RecordBatchIterator Batches(std::shared_ptr<Fragment> fragment) {
+    EXPECT_OK_AND_ASSIGN(auto schema, fragment->ReadPhysicalSchema());
+    auto dataset = std::make_shared<FragmentDataset>(schema, FragmentVector{fragment});
+    ScannerBuilder builder(dataset, opts_);
+    ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    EXPECT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+    return MakeMapIterator([](TaggedRecordBatch tagged) { return tagged.record_batch; },
+                           std::move(batch_it));
+  }
+
+  // Scan the fragment directly, without using the scanner.
+  RecordBatchIterator PhysicalBatches(std::shared_ptr<Fragment> fragment) {
+    if (GetParam().use_async) {
+      EXPECT_OK_AND_ASSIGN(auto batch_gen, fragment->ScanBatchesAsync(opts_));
+      EXPECT_OK_AND_ASSIGN(auto batch_it, MakeGeneratorIterator(std::move(batch_gen)));
+      return batch_it;
+    }
+    EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
+    return MakeFlattenIterator(MakeMaybeMapIterator(
+        [](std::shared_ptr<ScanTask> scan_task) { return scan_task->Execute(); },
+        std::move(scan_task_it)));
+  }
+
+  // Shared test cases
+  void TestScan() {
+    auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = this->GetFileSource(reader.get());
+
+    this->SetSchema(reader->schema()->fields());
+    auto fragment = this->MakeFragment(*source);
+
+    int64_t row_count = 0;
+    for (auto maybe_batch : Batches(fragment)) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      row_count += batch->num_rows();
+    }
+    ASSERT_EQ(row_count, GetParam().expected_rows());
+  }
+  // Ensure file formats only return columns needed to fulfill filter/projection
+  void TestScanProjected() {
+    auto f32 = field("f32", float32());
+    auto f64 = field("f64", float64());
+    auto i32 = field("i32", int32());
+    auto i64 = field("i64", int64());
+    this->SetSchema({f64, i64, f32, i32});
+    this->Project({"f64"});
+    this->SetFilter(equal(field_ref("i32"), literal(0)));
+
+    // NB: projection is applied by the scanner; FileFragment does not evaluate it so
+    // we will not drop "i32" even though it is not projected since we need it for
+    // filtering
+    auto expected_schema = schema({f64, i32});
+
+    auto reader = this->GetRecordBatchReader(opts_->dataset_schema);
+    auto source = this->GetFileSource(reader.get());
+    auto fragment = this->MakeFragment(*source);
+
+    int64_t row_count = 0;
+
+    for (auto maybe_batch : PhysicalBatches(fragment)) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      row_count += batch->num_rows();
+      AssertSchemaEqual(*batch->schema(), *expected_schema,
+                        /*check_metadata=*/false);
+    }
+
+    ASSERT_EQ(row_count, expected_rows());
+  }
+  void TestScanProjectedMissingCols() {
+    auto f32 = field("f32", float32());
+    auto f64 = field("f64", float64());
+    auto i32 = field("i32", int32());
+    auto i64 = field("i64", int64());
+    this->SetSchema({f64, i64, f32, i32});
+    this->Project({"f64"});
+    this->SetFilter(equal(field_ref("i32"), literal(0)));
+
+    auto reader_without_i32 = this->GetRecordBatchReader(schema({f64, i64, f32}));
+    auto reader_without_f64 = this->GetRecordBatchReader(schema({i64, f32, i32}));
+    auto reader = this->GetRecordBatchReader(schema({f64, i64, f32, i32}));
+
+    auto readers = {reader.get(), reader_without_i32.get(), reader_without_f64.get()};
+    for (auto reader : readers) {
+      SCOPED_TRACE(reader->schema()->ToString());
+      auto source = this->GetFileSource(reader);
+      auto fragment = this->MakeFragment(*source);
+
+      // NB: projection is applied by the scanner; FileFragment does not evaluate it so
+      // we will not drop "i32" even though it is not projected since we need it for
+      // filtering
+      //
+      // in the case where a file doesn't contain a referenced field, we won't
+      // materialize it as nulls later
+      std::shared_ptr<Schema> expected_schema;
+      if (reader == reader_without_i32.get()) {
+        expected_schema = schema({f64});
+      } else if (reader == reader_without_f64.get()) {
+        expected_schema = schema({i32});
+      } else {
+        expected_schema = schema({f64, i32});
+      }
+
+      int64_t row_count = 0;
+      for (auto maybe_batch : PhysicalBatches(fragment)) {
+        ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+        row_count += batch->num_rows();
+        AssertSchemaEqual(*batch->schema(), *expected_schema,
+                          /*check_metadata=*/false);
+      }
+      ASSERT_EQ(row_count, expected_rows());
+    }
+  }
+  void TestScanWithVirtualColumn() {
+    auto reader = this->GetRecordBatchReader(schema({field("f64", float64())}));
+    auto source = this->GetFileSource(reader.get());
+    // NB: dataset_schema includes a column not present in the file
+    this->SetSchema({reader->schema()->field(0), field("virtual", int32())});
+    auto fragment = this->MakeFragment(*source);
+
+    ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
+    AssertSchemaEqual(Schema({field("f64", float64())}), *physical_schema);
+    {
+      int64_t row_count = 0;
+      for (auto maybe_batch : Batches(fragment)) {
+        ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+        AssertSchemaEqual(*batch->schema(), *opts_->projected_schema);
+        row_count += batch->num_rows();
+      }
+      ASSERT_EQ(row_count, expected_rows());
+    }
+    {
+      int64_t row_count = 0;
+      for (auto maybe_batch : PhysicalBatches(fragment)) {
+        ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+        AssertSchemaEqual(*batch->schema(), *physical_schema);
+        row_count += batch->num_rows();
+      }
+      ASSERT_EQ(row_count, expected_rows());
+    }
+  }
+
+ protected:
+  using FileFormatFixtureMixin<FormatHelper>::opts_;
+};
+
 /// \brief A dummy FileFormat implementation
 class DummyFileFormat : public FileFormat {
  public:

From 887d8f9216449bd53905d6a9243e7c77989ccd8f Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Mon, 26 Apr 2021 14:11:06 -0400
Subject: [PATCH 123/719] ARROW-12492: [Python] Helper method to decode
 DictionaryArray back to Array

https://issues.apache.org/jira/browse/ARROW-12492

Also verified that the doc gets updated and the new method is correctly listed and documented.

Closes #10123 from amol-/ARROW-12492

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/array.pxi           |  6 ++++++
 python/pyarrow/tests/test_array.py | 20 ++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 894ba96c013..81000ce826e 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1992,6 +1992,12 @@ cdef class DictionaryArray(Array):
     def dictionary_encode(self):
         return self
 
+    def dictionary_decode(self):
+        """
+        Decodes the DictionaryArray to an Array.
+        """
+        return self.dictionary.take(self.indices)
+
     @property
     def dictionary(self):
         cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 8e7e21f0bf6..46c5415ea1e 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1532,6 +1532,26 @@ def test_dictionary_encode_zero_length():
     encoded.validate(full=True)
 
 
+def test_dictionary_decode():
+    cases = [
+        (pa.array([1, 2, 3, None, 1, 2, 3]),
+         pa.DictionaryArray.from_arrays(
+             pa.array([0, 1, 2, None, 0, 1, 2], type='int32'),
+             [1, 2, 3])),
+        (pa.array(['foo', None, 'bar', 'foo']),
+         pa.DictionaryArray.from_arrays(
+             pa.array([0, None, 1, 0], type='int32'),
+             ['foo', 'bar'])),
+        (pa.array(['foo', None, 'bar', 'foo'], type=pa.large_binary()),
+         pa.DictionaryArray.from_arrays(
+             pa.array([0, None, 1, 0], type='int32'),
+             pa.array(['foo', 'bar'], type=pa.large_binary()))),
+    ]
+    for expected, arr in cases:
+        result = arr.dictionary_decode()
+        assert result.equals(expected)
+
+
 def test_cast_time32_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int32'),
                    type=pa.time32('s'))

From f046f321e84d11a11939e20b8856d80ab7418102 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 26 Apr 2021 12:23:47 -0700
Subject: [PATCH 124/719] ARROW-12520: [R] Minor docs updates

Closes #10143 from jonkeane/ARROW-12520

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/vignettes/arrow.Rmd      |  2 +-
 r/vignettes/developing.Rmd | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd
index 21cbbe48d61..4c5da501435 100644
--- a/r/vignettes/arrow.Rmd
+++ b/r/vignettes/arrow.Rmd
@@ -104,7 +104,7 @@ corresponds to a class of the same name in the Arrow C++ library. The `arrow`
 package provides a variety of `R6` and S3 methods for interacting with instances
 of these classes.
 
-For convenience, the `arrow package also defines several synthetic classes that
+For convenience, the `arrow` package also defines several synthetic classes that
 do not exist in the C++ library, including:
 
 * `ArrowDatum`: inherited by `Scalar`, `Array`, and `ChunkedArray`
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 38027a9ad51..efb2c80764c 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -116,7 +116,7 @@ export ARROW_HOME=$(pwd)/dist
 mkdir $ARROW_HOME
 ```
 
-_Special instructions on Linux:_ You will need to set `LD_LIBRARY_PATH` to the `lib` directory that will is under where we set `$ARROW_HOME`, before launching R and using Arrow. One way to do this is to add it to your profile (we use `~/.bash_profile` here, but you might need to put this in a different file depending on your setup, e.g. if you use a shell other than `bash`). On macOS we do not need to do this because the macOS shared library paths are hardcoded to their locations during build time.
+_Special instructions on Linux:_ You will need to set `LD_LIBRARY_PATH` to the `lib` directory that is under where we set `$ARROW_HOME`, before launching R and using Arrow. One way to do this is to add it to your profile (we use `~/.bash_profile` here, but you might need to put this in a different file depending on your setup, e.g. if you use a shell other than `bash`). On macOS we do not need to do this because the macOS shared library paths are hardcoded to their locations during build time.
 
 ```{bash, save=run & ubuntu & !sys_install}
 export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
@@ -140,14 +140,14 @@ cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   -DARROW_JEMALLOC=ON \
   -DARROW_JSON=ON \
   -DARROW_PARQUET=ON \
   -DARROW_WITH_SNAPPY=ON \
   -DARROW_WITH_ZLIB=ON \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   ..
 ```
 
@@ -172,14 +172,14 @@ cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   -DARROW_JEMALLOC=ON \
   -DARROW_JSON=ON \
   -DARROW_PARQUET=ON \
   -DARROW_WITH_SNAPPY=ON \
   -DARROW_WITH_ZLIB=ON \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   ..
 ```
 
@@ -191,6 +191,7 @@ To enable optional features including: S3 support, an alternative memory allocat
 
 ``` shell
   -DARROW_MIMALLOC=ON \
+  -DARROW_S3=ON \
   -DARROW_WITH_BROTLI=ON \
   -DARROW_WITH_BZ2=ON \
   -DARROW_WITH_LZ4=ON \
@@ -252,7 +253,7 @@ export ARROW_R_CXXFLAGS=-fno-omit-frame-pointer
 With the setups described here, you should not need to rebuild the Arrow library or even the C++ source in the R package as you iterated and work on the R package. The only time those should need to be rebuilt is if you have changed the C++ in the R package (and even then, `R CMD INSTALL .` should only need to recompile the files that have changed) _or_ if the Arrow library C++ has changed and there is a mismatch between the Arrow Library and the R package. If you find yourself rebuilding either or both each time you install the package or run tests, something is probably wrong with your set up.
 
 <details>
-<summary>For a full build: a `cmake` command with all of the R-relevant optional dependencies turned on</summary>
+<summary>For a full build: a `cmake` command with all of the R-relevant optional dependencies turned on. Development with other languages might require different flags as well. For example, to develop Python, you would need to also add `-DARROW_PYTHON=ON` (though all of the other flags used for Python are already included here).</summary>
 <p>
 
 ``` shell
@@ -262,26 +263,26 @@ cmake \
   -DARROW_COMPUTE=ON \
   -DARROW_CSV=ON \
   -DARROW_DATASET=ON \
+  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_FILESYSTEM=ON \
+  -DARROW_INSTALL_NAME_RPATH=OFF \
   -DARROW_JEMALLOC=ON \
   -DARROW_JSON=ON \
-  -DARROW_PARQUET=ON \
-  -DARROW_WITH_SNAPPY=ON \
-  -DARROW_WITH_ZLIB=ON \
-  -DARROW_INSTALL_NAME_RPATH=OFF \
-  -DARROW_EXTRA_ERROR_CONTEXT=ON \
   -DARROW_MIMALLOC=ON \
+  -DARROW_PARQUET=ON \
+  -DARROW_S3=ON \
   -DARROW_WITH_BROTLI=ON \
   -DARROW_WITH_BZ2=ON \
   -DARROW_WITH_LZ4=ON \
   -DARROW_WITH_SNAPPY=ON \
+  -DARROW_WITH_ZLIB=ON \
   -DARROW_WITH_ZSTD=ON \
   ..
 ```
 </p>
 </details>  
 
-## Troublshooting
+## Troubleshooting
 
 Note that after any change to the C++ library, you must reinstall it and
 run `make clean` or `git clean -fdx .` to remove any cached object code

From a42515699d8e830f1d431ca352cda4e1221235f2 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Tue, 27 Apr 2021 10:27:49 +0900
Subject: [PATCH 125/719] ARROW-12525: [JS] Vector toJSON() returns an array

Closes #10148 from domoritz/vector-array

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/src/vector/base.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/src/vector/base.ts b/js/src/vector/base.ts
index 5fdf1c21cf3..2ceecdda4a0 100644
--- a/js/src/vector/base.ts
+++ b/js/src/vector/base.ts
@@ -96,7 +96,7 @@ export abstract class BaseVector<T extends DataType = any> extends AbstractVecto
         ) as Vector<R>;
     }
 
-    public toJSON(): any { return [...this]; }
+    public toJSON() { return [...this]; }
 
     protected _sliceInternal(self: this, begin: number, end: number) {
         return self.clone(self.data.slice(begin, end - begin), null!);

From 347083417b9cec3de67684b2a6dc09d66e975090 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Tue, 27 Apr 2021 15:03:50 +0900
Subject: [PATCH 126/719] ARROW-12537: [JS] Docs build should not include test
 sources

Closes #10156 from domoritz/dom/docs-build

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/tsconfig/tsconfig.bin.cjs.json | 2 +-
 js/tsconfig/tsconfig.docs.json    | 8 ++++++++
 js/typedoc.js                     | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 js/tsconfig/tsconfig.docs.json

diff --git a/js/tsconfig/tsconfig.bin.cjs.json b/js/tsconfig/tsconfig.bin.cjs.json
index 5c0139bf41b..8a006490aef 100644
--- a/js/tsconfig/tsconfig.bin.cjs.json
+++ b/js/tsconfig/tsconfig.bin.cjs.json
@@ -3,7 +3,7 @@
     "extends": "./tsconfig.base.json",
     "exclude": ["../node_modules"],
     "include": ["../src/bin/*.ts"],
-      "compilerOptions": {
+    "compilerOptions": {
       "target": "esnext",
       "module": "commonjs",
       "declaration": false
diff --git a/js/tsconfig/tsconfig.docs.json b/js/tsconfig/tsconfig.docs.json
new file mode 100644
index 00000000000..c73c307d598
--- /dev/null
+++ b/js/tsconfig/tsconfig.docs.json
@@ -0,0 +1,8 @@
+// Compiler configuration to build the docs
+{
+    "extends": "./tsconfig.base.json",
+    "include": ["../src/**/*.ts"],
+    "compilerOptions": {
+      "target": "ESNEXT"
+    }
+  }
diff --git a/js/typedoc.js b/js/typedoc.js
index e246108e645..1c647012823 100644
--- a/js/typedoc.js
+++ b/js/typedoc.js
@@ -19,7 +19,7 @@ module.exports = {
     entryPoints: ['src/Arrow.dom.ts', 'src/Arrow.node.ts'],
     out: 'doc',
     name: 'Apache Arrow',
-    tsconfig: 'tsconfig.json',
+    tsconfig: 'tsconfig/tsconfig.docs.json',
     excludePrivate: true,
     excludeProtected: true,
     excludeExternals: true,

From 6f893e4a29b499cd7425f5a1a43a9c3a562385ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 27 Apr 2021 15:34:39 +0900
Subject: [PATCH 127/719] ARROW-12555: [Java][Release] Java post-release script
 misses dataset JNI bindings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also fixes https://issues.apache.org/jira/browse/ARROW-12551

Closes #10168 from kszucs/fix-java-post-release-script

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-11-java.sh | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/dev/release/post-11-java.sh b/dev/release/post-11-java.sh
index d9dc32a7f55..86e8d54a0a8 100755
--- a/dev/release/post-11-java.sh
+++ b/dev/release/post-11-java.sh
@@ -39,14 +39,23 @@ curl \
 rm -rf ${archive_name}
 tar xf ${tar_gz}
 
+pushd ${archive_name}
+
+# clone the testing data to the appropiate directories
+git clone https://github.com/apache/arrow-testing.git testing
+git clone https://github.com/apache/parquet-testing.git cpp/submodules/parquet-testing
+
 # build the jni bindings similarly like the 01-perform.sh does
-mkdir -p ${archive_name}/cpp/java-build
-pushd ${archive_name}/cpp/java-build
+mkdir -p cpp/java-build
+pushd cpp/java-build
 cmake \
-  -DARROW_GANDIVA=ON \
+  -DARROW_DATASET=ON \
+  -DARROW_FILESYSTEM=ON \
   -DARROW_GANDIVA_JAVA=ON \
+  -DARROW_GANDIVA=ON \
   -DARROW_JNI=ON \
   -DARROW_ORC=ON \
+  -DARROW_PARQUET=ON \
   -DCMAKE_BUILD_TYPE=release \
   -G Ninja \
   ..
@@ -54,9 +63,12 @@ ninja
 popd
 
 # go in the java subfolder
-pushd ${archive_name}/java
+pushd java
 # stage the artifacts using both the apache-release and arrow-jni profiles
-mvn -Papache-release,arrow-jni -Darrow.cpp.build.dir=$(realpath ../cpp/java-build) deploy
+# Note: on ORC checkstyle failure use -Dcheckstyle.skip=true until https://issues.apache.org/jira/browse/ARROW-12552 gets resolved
+mvn -Papache-release,arrow-jni -Darrow.cpp.build.dir=$(realpath ../cpp/java-build/release) deploy
+popd
+
 popd
 
 echo "Success! The maven artifacts have been stated. Proceed with the following steps:"

From 29130ca54cd773f6f52e17bc78d37fb72d53eb49 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Tue, 27 Apr 2021 11:29:16 +0200
Subject: [PATCH 128/719] ARROW-11990: [C++][Compute] Handle errors
 consistently

Arrow handles errors by returning Status/Result. But in compute kernels,
errors are populated in KernelContext.status. This is not consistent,
and updating KernelContext.status is not thread safe.

This patch removes KernelContext.status and returns kernel errors as
Status/Result.

See big performance improvement for arithmetic kernels, especially the
checked version (up to 4x).

Also see ~50% drops from some filter kernels. Will investigate deeper
as follow up task.

Closes #10098 from cyb70289/11990-kernel-error-handling

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/exec.cc                 |  29 +-
 cpp/src/arrow/compute/exec_test.cc            |  25 +-
 cpp/src/arrow/compute/function.cc             |   3 +-
 cpp/src/arrow/compute/function_benchmark.cc   |  14 +-
 cpp/src/arrow/compute/function_test.cc        |  13 +-
 cpp/src/arrow/compute/kernel.cc               |  10 -
 cpp/src/arrow/compute/kernel.h                |  49 +--
 .../arrow/compute/kernels/aggregate_basic.cc  |  65 ++--
 .../compute/kernels/aggregate_basic_avx2.cc   |  11 +-
 .../compute/kernels/aggregate_basic_avx512.cc |  12 +-
 .../kernels/aggregate_basic_internal.h        |  36 +-
 .../compute/kernels/aggregate_internal.h      |   6 +-
 .../arrow/compute/kernels/aggregate_mode.cc   |  41 +-
 .../compute/kernels/aggregate_quantile.cc     |  34 +-
 .../compute/kernels/aggregate_tdigest.cc      |  20 +-
 .../compute/kernels/aggregate_var_std.cc      |  20 +-
 .../arrow/compute/kernels/codegen_internal.cc |   6 +-
 .../arrow/compute/kernels/codegen_internal.h  | 300 ++++++++-------
 .../arrow/compute/kernels/hash_aggregate.cc   |  36 +-
 .../compute/kernels/scalar_arithmetic.cc      |  83 ++--
 .../arrow/compute/kernels/scalar_boolean.cc   | 205 +++++-----
 .../compute/kernels/scalar_cast_boolean.cc    |   6 +-
 .../compute/kernels/scalar_cast_internal.cc   |  55 +--
 .../compute/kernels/scalar_cast_internal.h    |  12 +-
 .../compute/kernels/scalar_cast_nested.cc     |  24 +-
 .../compute/kernels/scalar_cast_numeric.cc    |  79 ++--
 .../compute/kernels/scalar_cast_string.cc     |  50 ++-
 .../compute/kernels/scalar_cast_temporal.cc   |  88 ++---
 .../arrow/compute/kernels/scalar_compare.cc   |   8 +-
 .../arrow/compute/kernels/scalar_fill_null.cc |  46 +--
 .../arrow/compute/kernels/scalar_nested.cc    |  23 +-
 .../compute/kernels/scalar_set_lookup.cc      |  26 +-
 .../arrow/compute/kernels/scalar_string.cc    | 364 ++++++++++--------
 .../arrow/compute/kernels/scalar_validity.cc  |  43 ++-
 .../arrow/compute/kernels/util_internal.cc    |  17 +-
 cpp/src/arrow/compute/kernels/vector_hash.cc  |  59 +--
 .../arrow/compute/kernels/vector_nested.cc    |  18 +-
 .../arrow/compute/kernels/vector_selection.cc | 150 ++++----
 cpp/src/arrow/compute/kernels/vector_sort.cc  |  15 +-
 cpp/src/arrow/dataset/expression.cc           |   6 +-
 40 files changed, 1090 insertions(+), 1017 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index b88248071c2..6df845f0f44 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -616,8 +616,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
     }
 
-    kernel_->exec(kernel_ctx_, batch, &out);
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out));
     if (!preallocate_contiguous_) {
       // If we are producing chunked output rather than one big array, then
       // emit each chunk as soon as it's available
@@ -794,8 +793,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
         output_descr_.shape == ValueDescr::ARRAY) {
       RETURN_NOT_OK(PropagateNulls(kernel_ctx_, batch, out.mutable_array()));
     }
-    kernel_->exec(kernel_ctx_, batch, &out);
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, batch, &out));
     if (!kernel_->finalize) {
       // If there is no result finalizer (e.g. for hash-based functions, we can
       // emit the processed batch right away rather than waiting
@@ -810,8 +808,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
     if (kernel_->finalize) {
       // Intermediate results require post-processing after the execution is
       // completed (possibly involving some accumulated state)
-      kernel_->finalize(kernel_ctx_, &results_);
-      ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+      RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &results_));
       for (const auto& result : results_) {
         RETURN_NOT_OK(listener->OnResult(result));
       }
@@ -864,8 +861,7 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
     }
 
     Datum out;
-    kernel_->finalize(kernel_ctx_, &out);
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    RETURN_NOT_OK(kernel_->finalize(kernel_ctx_, &out));
     RETURN_NOT_OK(listener->OnResult(std::move(out)));
     return Status::OK();
   }
@@ -879,24 +875,19 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  private:
   Status Consume(const ExecBatch& batch) {
     // FIXME(ARROW-11840) don't merge *any* aggegates for every batch
-    auto batch_state = kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_});
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
+    ARROW_ASSIGN_OR_RAISE(
+        auto batch_state,
+        kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_}));
 
     if (batch_state == nullptr) {
-      kernel_ctx_->SetStatus(
-          Status::Invalid("ScalarAggregation requires non-null kernel state"));
-      return kernel_ctx_->status();
+      return Status::Invalid("ScalarAggregation requires non-null kernel state");
     }
 
     KernelContext batch_ctx(exec_context());
     batch_ctx.SetState(batch_state.get());
 
-    kernel_->consume(&batch_ctx, batch);
-    ARROW_CTX_RETURN_IF_ERROR(&batch_ctx);
-
-    kernel_->merge(kernel_ctx_, std::move(*batch_state), state());
-    ARROW_CTX_RETURN_IF_ERROR(kernel_ctx_);
-
+    RETURN_NOT_OK(kernel_->consume(&batch_ctx, batch));
+    RETURN_NOT_OK(kernel_->merge(kernel_ctx_, std::move(*batch_state), state()));
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index e9bd57596b5..c56e6471c97 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -537,7 +537,7 @@ TEST_F(TestExecBatchIterator, ZeroLengthInputs) {
 // ----------------------------------------------------------------------
 // Scalar function execution
 
-void ExecCopy(KernelContext*, const ExecBatch& batch, Datum* out) {
+Status ExecCopy(KernelContext*, const ExecBatch& batch, Datum* out) {
   DCHECK_EQ(1, batch.num_values());
   const auto& type = checked_cast<const FixedWidthType&>(*batch[0].type());
   int value_size = type.bit_width() / 8;
@@ -547,9 +547,10 @@ void ExecCopy(KernelContext*, const ExecBatch& batch, Datum* out) {
   uint8_t* dst = out_arr->buffers[1]->mutable_data() + out_arr->offset * value_size;
   const uint8_t* src = arg0.buffers[1]->data() + arg0.offset * value_size;
   std::memcpy(dst, src, batch.length * value_size);
+  return Status::OK();
 }
 
-void ExecComputedBitmap(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecComputedBitmap(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Propagate nulls not used. Check that the out bitmap isn't the same already
   // as the input bitmap
   const ArrayData& arg0 = *batch[0].array();
@@ -564,10 +565,10 @@ void ExecComputedBitmap(KernelContext* ctx, const ExecBatch& batch, Datum* out)
 
   internal::CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
                        out_arr->buffers[0]->mutable_data(), out_arr->offset);
-  ExecCopy(ctx, batch, out);
+  return ExecCopy(ctx, batch, out);
 }
 
-void ExecNoPreallocatedData(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecNoPreallocatedData(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Validity preallocated, but not the data
   ArrayData* out_arr = out->mutable_array();
   DCHECK_EQ(0, out_arr->offset);
@@ -575,10 +576,11 @@ void ExecNoPreallocatedData(KernelContext* ctx, const ExecBatch& batch, Datum* o
   int value_size = type.bit_width() / 8;
   Status s = (ctx->Allocate(out_arr->length * value_size).Value(&out_arr->buffers[1]));
   DCHECK_OK(s);
-  ExecCopy(ctx, batch, out);
+  return ExecCopy(ctx, batch, out);
 }
 
-void ExecNoPreallocatedAnything(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecNoPreallocatedAnything(KernelContext* ctx, const ExecBatch& batch,
+                                  Datum* out) {
   // Neither validity nor data preallocated
   ArrayData* out_arr = out->mutable_array();
   DCHECK_EQ(0, out_arr->offset);
@@ -589,7 +591,7 @@ void ExecNoPreallocatedAnything(KernelContext* ctx, const ExecBatch& batch, Datu
                        out_arr->buffers[0]->mutable_data(), /*offset=*/0);
 
   // Reuse the kernel that allocates the data
-  ExecNoPreallocatedData(ctx, batch, out);
+  return ExecNoPreallocatedData(ctx, batch, out);
 }
 
 struct ExampleOptions : public FunctionOptions {
@@ -602,12 +604,13 @@ struct ExampleState : public KernelState {
   explicit ExampleState(std::shared_ptr<Scalar> value) : value(std::move(value)) {}
 };
 
-std::unique_ptr<KernelState> InitStateful(KernelContext*, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> InitStateful(KernelContext*,
+                                                  const KernelInitArgs& args) {
   auto func_options = static_cast<const ExampleOptions*>(args.options);
   return std::unique_ptr<KernelState>(new ExampleState{func_options->value});
 }
 
-void ExecStateful(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecStateful(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // We take the value from the state and multiply the data in batch[0] with it
   ExampleState* state = static_cast<ExampleState*>(ctx->state());
   int32_t multiplier = checked_cast<const Int32Scalar&>(*state->value).value;
@@ -619,12 +622,14 @@ void ExecStateful(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   for (int64_t i = 0; i < arg0.length; ++i) {
     dst[i] = arg0_data[i] * multiplier;
   }
+  return Status::OK();
 }
 
-void ExecAddInt32(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExecAddInt32(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Int32Scalar& arg0 = batch[0].scalar_as<Int32Scalar>();
   const Int32Scalar& arg1 = batch[1].scalar_as<Int32Scalar>();
   out->value = std::make_shared<Int32Scalar>(arg0.value + arg1.value);
+  return Status::OK();
 }
 
 class TestCallScalarFunction : public TestComputeInternals {
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index c8fc8b8dec0..f74bb245d77 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -179,8 +179,7 @@ Result<Datum> Function::Execute(const std::vector<Datum>& args,
 
   KernelContext kernel_ctx{ctx};
   if (kernel->init) {
-    state = kernel->init(&kernel_ctx, {kernel, inputs, options});
-    RETURN_NOT_OK(kernel_ctx.status());
+    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, inputs, options}));
     kernel_ctx.SetState(state.get());
   }
 
diff --git a/cpp/src/arrow/compute/function_benchmark.cc b/cpp/src/arrow/compute/function_benchmark.cc
index 5dc305bdd89..daf03754984 100644
--- a/cpp/src/arrow/compute/function_benchmark.cc
+++ b/cpp/src/arrow/compute/function_benchmark.cc
@@ -78,16 +78,17 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
 
   ExecContext exec_context;
   KernelContext kernel_context(&exec_context);
-  auto cast_state =
-      cast_kernel->init(&kernel_context, {cast_kernel, {double_type}, &cast_options});
-  ABORT_NOT_OK(kernel_context.status());
+  auto cast_state = cast_kernel
+                        ->init(&kernel_context,
+                               KernelInitArgs{cast_kernel, {double_type}, &cast_options})
+                        .ValueOrDie();
   kernel_context.SetState(cast_state.get());
 
   for (auto _ : state) {
     Datum timestamp_scalar = MakeNullScalar(double_type);
     for (Datum int_scalar : int_scalars) {
-      exec(&kernel_context, {{std::move(int_scalar)}, 1}, &timestamp_scalar);
-      ABORT_NOT_OK(kernel_context.status());
+      ABORT_NOT_OK(
+          exec(&kernel_context, {{std::move(int_scalar)}, 1}, &timestamp_scalar));
     }
     benchmark::DoNotOptimize(timestamp_scalar);
   }
@@ -164,8 +165,7 @@ void BM_ExecuteScalarKernelOnScalar(benchmark::State& state) {
     int64_t total = 0;
     for (const auto& scalar : scalars) {
       Datum result{MakeNullScalar(int64())};
-      exec(&kernel_context, ExecBatch{{scalar}, /*length=*/1}, &result);
-      ABORT_NOT_OK(kernel_context.status());
+      ABORT_NOT_OK(exec(&kernel_context, ExecBatch{{scalar}, /*length=*/1}, &result));
       total += result.scalar()->is_valid;
     }
     benchmark::DoNotOptimize(total);
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index b6f1815b89e..581555e931f 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -87,8 +87,7 @@ TEST(VectorFunction, Basics) {
 }
 
 auto ExecNYI = [](KernelContext* ctx, const ExecBatch& args, Datum* out) {
-  ctx->SetStatus(Status::NotImplemented("NYI"));
-  return;
+  return Status::NotImplemented("NYI");
 };
 
 template <typename FunctionType>
@@ -181,13 +180,15 @@ TEST(ScalarAggregateFunction, Basics) {
   ASSERT_EQ(Function::SCALAR_AGGREGATE, func.kind());
 }
 
-std::unique_ptr<KernelState> NoopInit(KernelContext*, const KernelInitArgs&) {
+Result<std::unique_ptr<KernelState>> NoopInit(KernelContext*, const KernelInitArgs&) {
   return nullptr;
 }
 
-void NoopConsume(KernelContext*, const ExecBatch&) {}
-void NoopMerge(KernelContext*, const KernelState&, KernelState*) {}
-void NoopFinalize(KernelContext*, Datum*) {}
+Status NoopConsume(KernelContext*, const ExecBatch&) { return Status::OK(); }
+Status NoopMerge(KernelContext*, const KernelState&, KernelState*) {
+  return Status::OK();
+}
+Status NoopFinalize(KernelContext*, Datum*) { return Status::OK(); }
 
 TEST(ScalarAggregateFunction, DispatchExact) {
   ScalarAggregateFunction func("agg_test", Arity::Unary(), /*doc=*/nullptr);
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 88b42716fa2..8fa740ed247 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -59,16 +59,6 @@ Result<std::shared_ptr<ResizableBuffer>> KernelContext::AllocateBitmap(int64_t n
   return result;
 }
 
-void KernelContext::SetStatus(const Status& status) {
-  if (ARROW_PREDICT_TRUE(status.ok())) {
-    return;
-  }
-  status_ = status;
-}
-
-/// \brief Clear any error status
-void KernelContext::ResetStatus() { status_ = Status::OK(); }
-
 // ----------------------------------------------------------------------
 // Some basic TypeMatcher implementations
 
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index b99b41170d2..0fecea080d8 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -63,22 +63,6 @@ class ARROW_EXPORT KernelContext {
   /// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
   Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);
 
-  /// \brief Indicate that an error has occurred, to be checked by a exec caller
-  /// \param[in] status a Status instance.
-  ///
-  /// \note Will not overwrite a prior set Status, so we will have the first
-  /// error that occurred until ExecContext::ResetStatus is called.
-  void SetStatus(const Status& status);
-
-  /// \brief Clear any error status.
-  void ResetStatus();
-
-  /// \brief Return true if an error has occurred.
-  bool HasError() const { return !status_.ok(); }
-
-  /// \brief Return the current status of the context.
-  const Status& status() const { return status_; }
-
   /// \brief Assign the active KernelState to be utilized for each stage of
   /// kernel execution. Ownership and memory lifetime of the KernelState must
   /// be minded separately.
@@ -96,21 +80,9 @@ class ARROW_EXPORT KernelContext {
 
  private:
   ExecContext* exec_ctx_;
-  Status status_;
   KernelState* state_;
 };
 
-// A macro to invoke for error control flow after invoking functions (such as
-// kernel init or exec functions) that propagate errors via KernelContext.
-#define ARROW_CTX_RETURN_IF_ERROR(CTX)            \
-  do {                                            \
-    if (ARROW_PREDICT_FALSE((CTX)->HasError())) { \
-      Status s = (CTX)->status();                 \
-      (CTX)->ResetStatus();                       \
-      return s;                                   \
-    }                                             \
-  } while (0)
-
 /// \brief The standard kernel execution API that must be implemented for
 /// SCALAR and VECTOR kernel types. This includes both stateless and stateful
 /// kernels. Kernels depending on some execution state access that state via
@@ -119,7 +91,7 @@ class ARROW_EXPORT KernelContext {
 /// into pre-allocated memory if they are able, though for some kernels
 /// (e.g. in cases when a builder like StringBuilder) must be employed this may
 /// not be possible.
-using ArrayKernelExec = std::function<void(KernelContext*, const ExecBatch&, Datum*)>;
+using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>;
 
 /// \brief An type-checking interface to permit customizable validation rules
 /// for use with InputType and KernelSignature. This is for scenarios where the
@@ -523,9 +495,8 @@ struct KernelInitArgs {
 };
 
 /// \brief Common initializer function for all kernel types.
-/// If an error occurs it will be stored in the KernelContext; nullptr will be returned.
-using KernelInit =
-    std::function<std::unique_ptr<KernelState>(KernelContext*, const KernelInitArgs&)>;
+using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
+    KernelContext*, const KernelInitArgs&)>;
 
 /// \brief Base type for kernels. Contains the function signature and
 /// optionally the state initialization function, along with some common
@@ -608,7 +579,7 @@ struct ScalarKernel : public ArrayKernel {
 // VectorKernel (for VectorFunction)
 
 /// \brief See VectorKernel::finalize member for usage
-using VectorFinalize = std::function<void(KernelContext*, std::vector<Datum>*)>;
+using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>;
 
 /// \brief Kernel data structure for implementations of VectorFunction. In
 /// addition to the members found in ArrayKernel, contains an optional
@@ -663,13 +634,13 @@ struct VectorKernel : public ArrayKernel {
 // ----------------------------------------------------------------------
 // ScalarAggregateKernel (for ScalarAggregateFunction)
 
-using ScalarAggregateConsume = std::function<void(KernelContext*, const ExecBatch&)>;
+using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
 
 using ScalarAggregateMerge =
-    std::function<void(KernelContext*, KernelState&&, KernelState*)>;
+    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
 
 // Finalize returns Datum to permit multiple return values
-using ScalarAggregateFinalize = std::function<void(KernelContext*, Datum*)>;
+using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 
 /// \brief Kernel data structure for implementations of
 /// ScalarAggregateFunction. The four necessary components of an aggregation
@@ -707,13 +678,13 @@ struct ScalarAggregateKernel : public Kernel {
 // ----------------------------------------------------------------------
 // HashAggregateKernel (for HashAggregateFunction)
 
-using HashAggregateConsume = std::function<void(KernelContext*, const ExecBatch&)>;
+using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
 
 using HashAggregateMerge =
-    std::function<void(KernelContext*, KernelState&&, KernelState*)>;
+    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
 
 // Finalize returns Datum to permit multiple return values
-using HashAggregateFinalize = std::function<void(KernelContext*, Datum*)>;
+using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 
 /// \brief Kernel data structure for implementations of
 /// HashAggregateFunction. The four necessary components of an aggregation
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 61dc8cb403c..e4eec50c66d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -27,16 +27,16 @@ namespace compute {
 
 namespace {
 
-void AggregateConsume(KernelContext* ctx, const ExecBatch& batch) {
-  checked_cast<ScalarAggregator*>(ctx->state())->Consume(ctx, batch);
+Status AggregateConsume(KernelContext* ctx, const ExecBatch& batch) {
+  return checked_cast<ScalarAggregator*>(ctx->state())->Consume(ctx, batch);
 }
 
-void AggregateMerge(KernelContext* ctx, KernelState&& src, KernelState* dst) {
-  checked_cast<ScalarAggregator*>(dst)->MergeFrom(ctx, std::move(src));
+Status AggregateMerge(KernelContext* ctx, KernelState&& src, KernelState* dst) {
+  return checked_cast<ScalarAggregator*>(dst)->MergeFrom(ctx, std::move(src));
 }
 
-void AggregateFinalize(KernelContext* ctx, Datum* out) {
-  checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, out);
+Status AggregateFinalize(KernelContext* ctx, Datum* out) {
+  return checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, out);
 }
 
 }  // namespace
@@ -58,20 +58,22 @@ namespace aggregate {
 struct CountImpl : public ScalarAggregator {
   explicit CountImpl(CountOptions options) : options(std::move(options)) {}
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     const ArrayData& input = *batch[0].array();
     const int64_t nulls = input.GetNullCount();
     this->nulls += nulls;
     this->non_nulls += input.length - nulls;
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other_state = checked_cast<const CountImpl&>(src);
     this->non_nulls += other_state.non_nulls;
     this->nulls += other_state.nulls;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext* ctx, Datum* out) override {
+  Status Finalize(KernelContext* ctx, Datum* out) override {
     const auto& state = checked_cast<const CountImpl&>(*ctx->state());
     switch (state.options.count_mode) {
       case CountOptions::COUNT_NON_NULL:
@@ -81,9 +83,9 @@ struct CountImpl : public ScalarAggregator {
         *out = Datum(state.nulls);
         break;
       default:
-        ctx->SetStatus(Status::Invalid("Unknown CountOptions encountered"));
-        break;
+        return Status::Invalid("Unknown CountOptions encountered");
     }
+    return Status::OK();
   }
 
   CountOptions options;
@@ -91,7 +93,8 @@ struct CountImpl : public ScalarAggregator {
   int64_t nulls = 0;
 };
 
-std::unique_ptr<KernelState> CountInit(KernelContext*, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> CountInit(KernelContext*,
+                                               const KernelInitArgs& args) {
   return ::arrow::internal::make_unique<CountImpl>(
       static_cast<const CountOptions&>(*args.options));
 }
@@ -105,12 +108,14 @@ struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {};
 template <typename ArrowType>
 struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {};
 
-std::unique_ptr<KernelState> SumInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
+                                             const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(ctx, *args.inputs[0].type);
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> MeanInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
+                                              const KernelInitArgs& args) {
   SumLikeInit<MeanImplDefault> visitor(ctx, *args.inputs[0].type);
   return visitor.Create();
 }
@@ -118,7 +123,8 @@ std::unique_ptr<KernelState> MeanInit(KernelContext* ctx, const KernelInitArgs&
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-std::unique_ptr<KernelState> MinMaxInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::NONE> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const MinMaxOptions&>(*args.options));
@@ -129,10 +135,10 @@ std::unique_ptr<KernelState> MinMaxInit(KernelContext* ctx, const KernelInitArgs
 // Any implementation
 
 struct BooleanAnyImpl : public ScalarAggregator {
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     // short-circuit if seen a True already
     if (this->any == true) {
-      return;
+      return Status::OK();
     }
 
     const auto& data = *batch[0].array();
@@ -147,20 +153,24 @@ struct BooleanAnyImpl : public ScalarAggregator {
       }
       position += block.length;
     }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const BooleanAnyImpl&>(src);
     this->any |= other.any;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     out->value = std::make_shared<BooleanScalar>(this->any);
+    return Status::OK();
   }
+
   bool any = false;
 };
 
-std::unique_ptr<KernelState> AnyInit(KernelContext*, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> AnyInit(KernelContext*, const KernelInitArgs& args) {
   return ::arrow::internal::make_unique<BooleanAnyImpl>();
 }
 
@@ -168,10 +178,10 @@ std::unique_ptr<KernelState> AnyInit(KernelContext*, const KernelInitArgs& args)
 // All implementation
 
 struct BooleanAllImpl : public ScalarAggregator {
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     // short-circuit if seen a false already
     if (this->all == false) {
-      return;
+      return Status::OK();
     }
 
     const auto& data = *batch[0].array();
@@ -186,20 +196,25 @@ struct BooleanAllImpl : public ScalarAggregator {
       }
       position += block.length;
     }
+
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const BooleanAllImpl&>(src);
     this->all &= other.all;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     out->value = std::make_shared<BooleanScalar>(this->all);
+    return Status::OK();
   }
+
   bool all = true;
 };
 
-std::unique_ptr<KernelState> AllInit(KernelContext*, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> AllInit(KernelContext*, const KernelInitArgs& args) {
   return ::arrow::internal::make_unique<BooleanAllImpl>();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index feeb66a1489..a70363aab9b 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -30,13 +30,14 @@ struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {};
 template <typename ArrowType>
 struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {};
 
-std::unique_ptr<KernelState> SumInitAvx2(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(ctx, *args.inputs[0].type);
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> MeanInitAvx2(KernelContext* ctx,
-                                          const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(ctx, *args.inputs[0].type);
   return visitor.Create();
 }
@@ -44,8 +45,8 @@ std::unique_ptr<KernelState> MeanInitAvx2(KernelContext* ctx,
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-std::unique_ptr<KernelState> MinMaxInitAvx2(KernelContext* ctx,
-                                            const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
+                                                    const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::AVX2> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const MinMaxOptions&>(*args.options));
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 522564a8469..1ecbd7041e6 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -30,14 +30,14 @@ struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {};
 template <typename ArrowType>
 struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {};
 
-std::unique_ptr<KernelState> SumInitAvx512(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(ctx, *args.inputs[0].type);
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> MeanInitAvx512(KernelContext* ctx,
-                                            const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
+                                                    const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(ctx, *args.inputs[0].type);
   return visitor.Create();
 }
@@ -45,8 +45,8 @@ std::unique_ptr<KernelState> MeanInitAvx512(KernelContext* ctx,
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-std::unique_ptr<KernelState> MinMaxInitAvx512(KernelContext* ctx,
-                                              const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
+                                                      const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::AVX512> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const MinMaxOptions&>(*args.options));
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index 5029c1855c0..f8db180b1e3 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -58,7 +58,7 @@ struct SumImpl : public ScalarAggregator {
   using SumType = typename FindAccumulatorType<ArrowType>::Type;
   using OutputType = typename TypeTraits<SumType>::ScalarType;
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     const auto& data = batch[0].array();
     this->count = data->length - data->GetNullCount();
     if (is_boolean_type<ArrowType>::value) {
@@ -67,20 +67,23 @@ struct SumImpl : public ScalarAggregator {
       this->sum =
           arrow::compute::detail::SumArray<CType, typename SumType::c_type>(*data);
     }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->count += other.count;
     this->sum += other.sum;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     if (this->count == 0) {
       out->value = std::make_shared<OutputType>();
     } else {
       out->value = MakeScalar(this->sum);
     }
+    return Status::OK();
   }
 
   size_t count = 0;
@@ -89,13 +92,14 @@ struct SumImpl : public ScalarAggregator {
 
 template <typename ArrowType, SimdLevel::type SimdLevel>
 struct MeanImpl : public SumImpl<ArrowType, SimdLevel> {
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     if (this->count == 0) {
       out->value = std::make_shared<DoubleScalar>();
     } else {
       const double mean = static_cast<double>(this->sum) / this->count;
       out->value = std::make_shared<DoubleScalar>(mean);
     }
+    return Status::OK();
   }
 };
 
@@ -124,8 +128,8 @@ struct SumLikeInit {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
     return std::move(state);
   }
 };
@@ -217,7 +221,7 @@ struct MinMaxImpl : public ScalarAggregator {
   MinMaxImpl(const std::shared_ptr<DataType>& out_type, const MinMaxOptions& options)
       : out_type(out_type), options(options) {}
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     StateType local;
 
     ArrayType arr(batch[0].array());
@@ -228,7 +232,7 @@ struct MinMaxImpl : public ScalarAggregator {
 
     if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
       this->state = local;
-      return;
+      return Status::OK();
     }
 
     if (local.has_nulls) {
@@ -239,14 +243,16 @@ struct MinMaxImpl : public ScalarAggregator {
       }
     }
     this->state = local;
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->state += other.state;
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
 
     std::vector<std::shared_ptr<Scalar>> values;
@@ -259,6 +265,7 @@ struct MinMaxImpl : public ScalarAggregator {
                 std::make_shared<ScalarType>(state.max)};
     }
     out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
@@ -331,7 +338,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
   using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
   using MinMaxImpl<BooleanType, SimdLevel>::options;
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     StateType local;
     ArrayType arr(batch[0].array());
 
@@ -343,7 +350,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
     local.has_values = valid_count > 0;
     if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
       this->state = local;
-      return;
+      return Status::OK();
     }
 
     const auto true_count = arr.true_count();
@@ -352,6 +359,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
     local.min = false_count == 0;
 
     this->state = local;
+    return Status::OK();
   }
 };
 
@@ -386,8 +394,8 @@ struct MinMaxInitState {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
diff --git a/cpp/src/arrow/compute/kernels/aggregate_internal.h b/cpp/src/arrow/compute/kernels/aggregate_internal.h
index 67337f22c5b..d74881108ae 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -50,9 +50,9 @@ struct FindAccumulatorType<I, enable_if_floating_point<I>> {
 };
 
 struct ScalarAggregator : public KernelState {
-  virtual void Consume(KernelContext* ctx, const ExecBatch& batch) = 0;
-  virtual void MergeFrom(KernelContext* ctx, KernelState&& src) = 0;
-  virtual void Finalize(KernelContext* ctx, Datum* out) = 0;
+  virtual Status Consume(KernelContext* ctx, const ExecBatch& batch) = 0;
+  virtual Status MergeFrom(KernelContext* ctx, KernelState&& src) = 0;
+  virtual Status Finalize(KernelContext* ctx, Datum* out) = 0;
 };
 
 void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 7ac0dd3c707..f7538ac5249 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -71,7 +71,7 @@ Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
 // find top-n value:count pairs with minimal heap
 // suboptimal for tiny or large n, possibly okay as we're not in hot path
 template <typename InType, typename Generator>
-void Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
+Status Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
   using CType = typename InType::c_type;
 
   using ValueCountPair = std::pair<CType, uint64_t>;
@@ -100,13 +100,15 @@ void Finalize(KernelContext* ctx, Datum* out, Generator&& gen) {
 
   CType* mode_buffer;
   int64_t* count_buffer;
-  KERNEL_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer), ctx,
-                         PrepareOutput<InType>(n, ctx, out));
+  ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
+                        PrepareOutput<InType>(n, ctx, out));
 
   for (int64_t i = n - 1; i >= 0; --i) {
     std::tie(mode_buffer[i], count_buffer[i]) = min_heap.top();
     min_heap.pop();
   }
+
+  return Status::OK();
 }
 
 // count value occurances for integers with narrow value range
@@ -125,7 +127,7 @@ struct CountModer {
     this->counts.resize(value_range, 0);
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // count values in all chunks, ignore nulls
     const Datum& datum = batch[0];
     CountValues<CType>(this->counts.data(), datum, this->min);
@@ -144,14 +146,14 @@ struct CountModer {
       return std::pair<CType, uint64_t>(0, kCountEOF);
     };
 
-    Finalize<T>(ctx, out, std::move(gen));
+    return Finalize<T>(ctx, out, std::move(gen));
   }
 };
 
 // booleans can be handled more straightforward
 template <>
 struct CountModer<BooleanType> {
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     int64_t counts[2]{};
 
     const Datum& datum = batch[0];
@@ -171,8 +173,8 @@ struct CountModer<BooleanType> {
 
     bool* mode_buffer;
     int64_t* count_buffer;
-    KERNEL_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer), ctx,
-                           PrepareOutput<BooleanType>(n, ctx, out));
+    ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
+                          PrepareOutput<BooleanType>(n, ctx, out));
 
     if (n >= 1) {
       const bool index = counts[1] > counts[0];
@@ -183,6 +185,8 @@ struct CountModer<BooleanType> {
         count_buffer[1] = counts[!index];
       }
     }
+
+    return Status::OK();
   }
 };
 
@@ -193,7 +197,7 @@ struct SortModer {
   using CType = typename T::c_type;
   using Allocator = arrow::stl::allocator<CType>;
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // copy all chunks to a buffer, ignore nulls and nans
     std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
 
@@ -238,7 +242,7 @@ struct SortModer {
       return std::make_pair(value, count);
     };
 
-    Finalize<T>(ctx, out, std::move(gen));
+    return Finalize<T>(ctx, out, std::move(gen));
   }
 };
 
@@ -247,7 +251,7 @@ template <typename T>
 struct CountOrSortModer {
   using CType = typename T::c_type;
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // cross point to benefit from counting approach
     // about 2x improvement for int32/64 from micro-benchmarking
     static constexpr int kMinArraySize = 8192;
@@ -259,12 +263,11 @@ struct CountOrSortModer {
       std::tie(min, max) = GetMinMax<CType>(datum);
 
       if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
-        CountModer<T>(min, max).Exec(ctx, batch, out);
-        return;
+        return CountModer<T>(min, max).Exec(ctx, batch, out);
       }
     }
 
-    SortModer<T>().Exec(ctx, batch, out);
+    return SortModer<T>().Exec(ctx, batch, out);
   }
 };
 
@@ -301,18 +304,16 @@ struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> {
 
 template <typename _, typename InType>
 struct ModeExecutor {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (ctx->state() == nullptr) {
-      ctx->SetStatus(Status::Invalid("Mode requires ModeOptions"));
-      return;
+      return Status::Invalid("Mode requires ModeOptions");
     }
     const ModeOptions& options = ModeState::Get(ctx);
     if (options.n <= 0) {
-      ctx->SetStatus(Status::Invalid("ModeOption::n must be strictly positive"));
-      return;
+      return Status::Invalid("ModeOption::n must be strictly positive");
     }
 
-    Moder<InType>().impl.Exec(ctx, batch, out);
+    return Moder<InType>().impl.Exec(ctx, batch, out);
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index f0de1be2793..2bb026dbdbd 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -77,7 +77,7 @@ struct SortQuantiler {
   using CType = typename InType::c_type;
   using Allocator = arrow::stl::allocator<CType>;
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const QuantileOptions& options = QuantileState::Get(ctx);
 
     // copy all chunks to a buffer, ignore nulls and nans
@@ -111,8 +111,8 @@ struct SortQuantiler {
 
     // calculate quantiles
     if (out_length > 0) {
-      KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
 
       // find quantiles in descending order
       std::vector<int64_t> q_indices(out_length);
@@ -143,6 +143,7 @@ struct SortQuantiler {
     }
 
     *out = Datum(std::move(out_data));
+    return Status::OK();
   }
 
   // return quantile located exactly at some input data point
@@ -226,7 +227,7 @@ struct CountQuantiler {
     this->counts.resize(value_range, 0);
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const QuantileOptions& options = QuantileState::Get(ctx);
 
     // count values in all chunks, ignore nulls
@@ -247,8 +248,8 @@ struct CountQuantiler {
 
     // calculate quantiles
     if (out_length > 0) {
-      KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * GetBitWidth(*out_type) / 8));
 
       // find quantiles in ascending order
       std::vector<int64_t> q_indices(out_length);
@@ -277,6 +278,7 @@ struct CountQuantiler {
     }
 
     *out = Datum(std::move(out_data));
+    return Status::OK();
   }
 
   // return quantile located exactly at some input data point
@@ -341,7 +343,7 @@ template <typename InType>
 struct CountOrSortQuantiler {
   using CType = typename InType::c_type;
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // cross point to benefit from histogram approach
     // parameters estimated from ad-hoc benchmarks manually
     static constexpr int kMinArraySize = 65536;
@@ -353,12 +355,11 @@ struct CountOrSortQuantiler {
       std::tie(min, max) = GetMinMax<CType>(datum);
 
       if (static_cast<uint64_t>(max) - static_cast<uint64_t>(min) <= kMaxValueRange) {
-        CountQuantiler<InType>(min, max).Exec(ctx, batch, out);
-        return;
+        return CountQuantiler<InType>(min, max).Exec(ctx, batch, out);
       }
     }
 
-    SortQuantiler<InType>().Exec(ctx, batch, out);
+    return SortQuantiler<InType>().Exec(ctx, batch, out);
   }
 };
 
@@ -390,25 +391,22 @@ struct ExactQuantiler<InType, enable_if_t<is_floating_type<InType>::value>> {
 
 template <typename _, typename InType>
 struct QuantileExecutor {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (ctx->state() == nullptr) {
-      ctx->SetStatus(Status::Invalid("Quantile requires QuantileOptions"));
-      return;
+      return Status::Invalid("Quantile requires QuantileOptions");
     }
 
     const QuantileOptions& options = QuantileState::Get(ctx);
     if (options.q.empty()) {
-      ctx->SetStatus(Status::Invalid("Requires quantile argument"));
-      return;
+      return Status::Invalid("Requires quantile argument");
     }
     for (double q : options.q) {
       if (q < 0 || q > 1) {
-        ctx->SetStatus(Status::Invalid("Quantile must be between 0 and 1"));
-        return;
+        return Status::Invalid("Quantile must be between 0 and 1");
       }
     }
 
-    ExactQuantiler<InType>().impl.Exec(ctx, batch, out);
+    return ExactQuantiler<InType>().impl.Exec(ctx, batch, out);
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index fc8f43b0ae2..fb474a6b8b3 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -39,7 +39,7 @@ struct TDigestImpl : public ScalarAggregator {
   explicit TDigestImpl(const TDigestOptions& options)
       : q{options.q}, tdigest{options.delta, options.buffer_size} {}
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     const ArrayData& data = *batch[0].array();
     const CType* values = data.GetValues<CType>(1);
 
@@ -51,23 +51,25 @@ struct TDigestImpl : public ScalarAggregator {
                             }
                           });
     }
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     auto& other = checked_cast<ThisType&>(src);
     std::vector<TDigest> other_tdigest;
     other_tdigest.push_back(std::move(other.tdigest));
     this->tdigest.Merge(&other_tdigest);
+    return Status::OK();
   }
 
-  void Finalize(KernelContext* ctx, Datum* out) override {
+  Status Finalize(KernelContext* ctx, Datum* out) override {
     const int64_t out_length = this->tdigest.is_empty() ? 0 : this->q.size();
     auto out_data = ArrayData::Make(float64(), out_length, 0);
     out_data->buffers.resize(2, nullptr);
 
     if (out_length > 0) {
-      KERNEL_ASSIGN_OR_RAISE(out_data->buffers[1], ctx,
-                             ctx->Allocate(out_length * sizeof(double)));
+      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1],
+                            ctx->Allocate(out_length * sizeof(double)));
       double* out_buffer = out_data->template GetMutableValues<double>(1);
       for (int64_t i = 0; i < out_length; ++i) {
         out_buffer[i] = this->tdigest.Quantile(this->q[i]);
@@ -75,6 +77,7 @@ struct TDigestImpl : public ScalarAggregator {
     }
 
     *out = Datum(std::move(out_data));
+    return Status::OK();
   }
 
   const std::vector<double>& q;
@@ -105,13 +108,14 @@ struct TDigestInitState {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
 
-std::unique_ptr<KernelState> TDigestInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> TDigestInit(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
   TDigestInitState visitor(ctx, *args.inputs[0].type,
                            static_cast<const TDigestOptions&>(*args.options));
   return visitor.Create();
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index d11e73efd77..29b2adce3bd 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -151,17 +151,19 @@ struct VarStdImpl : public ScalarAggregator {
                       const VarianceOptions& options, VarOrStd return_type)
       : out_type(out_type), options(options), return_type(return_type) {}
 
-  void Consume(KernelContext*, const ExecBatch& batch) override {
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
     ArrayType array(batch[0].array());
     this->state.Consume(array);
+    return Status::OK();
   }
 
-  void MergeFrom(KernelContext*, KernelState&& src) override {
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const ThisType&>(src);
     this->state.MergeFrom(other.state);
+    return Status::OK();
   }
 
-  void Finalize(KernelContext*, Datum* out) override {
+  Status Finalize(KernelContext*, Datum* out) override {
     if (this->state.count <= options.ddof) {
       out->value = std::make_shared<DoubleScalar>();
     } else {
@@ -169,6 +171,7 @@ struct VarStdImpl : public ScalarAggregator {
       out->value =
           std::make_shared<DoubleScalar>(return_type == VarOrStd::Var ? var : sqrt(var));
     }
+    return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
@@ -208,21 +211,22 @@ struct VarStdInitState {
     return Status::OK();
   }
 
-  std::unique_ptr<KernelState> Create() {
-    ctx->SetStatus(VisitTypeInline(in_type, this));
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(in_type, this));
     return std::move(state);
   }
 };
 
-std::unique_ptr<KernelState> StddevInit(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> StddevInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
   VarStdInitState visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const VarianceOptions&>(*args.options), VarOrStd::Std);
   return visitor.Create();
 }
 
-std::unique_ptr<KernelState> VarianceInit(KernelContext* ctx,
-                                          const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> VarianceInit(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
   VarStdInitState visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
       static_cast<const VarianceOptions&>(*args.options), VarOrStd::Var);
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index ad43b7a3aa9..d6a1d4ccbc4 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -28,15 +28,15 @@ namespace arrow {
 namespace compute {
 namespace internal {
 
-void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  ctx->SetStatus(Status::NotImplemented("This kernel is malformed"));
+Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return Status::NotImplemented("This kernel is malformed");
 }
 
 ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec) {
   return [exec](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     ExecBatch flipped_batch = batch;
     std::swap(flipped_batch.values[0], flipped_batch.values[1]);
-    exec(ctx, flipped_batch, out);
+    return exec(ctx, flipped_batch, out);
   };
 }
 
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index b5d6c3807f1..7b394565f7d 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -63,42 +63,6 @@ using internal::VisitTwoBitBlocksVoid;
 namespace compute {
 namespace internal {
 
-#ifdef ARROW_EXTRA_ERROR_CONTEXT
-
-#define KERNEL_RETURN_IF_ERROR(ctx, expr)            \
-  do {                                               \
-    Status _st = (expr);                             \
-    if (ARROW_PREDICT_FALSE(!_st.ok())) {            \
-      _st.AddContextLine(__FILE__, __LINE__, #expr); \
-      ctx->SetStatus(_st);                           \
-      return;                                        \
-    }                                                \
-  } while (0)
-
-#else
-
-#define KERNEL_RETURN_IF_ERROR(ctx, expr) \
-  do {                                    \
-    Status _st = (expr);                  \
-    if (ARROW_PREDICT_FALSE(!_st.ok())) { \
-      ctx->SetStatus(_st);                \
-      return;                             \
-    }                                     \
-  } while (0)
-
-#endif  // ARROW_EXTRA_ERROR_CONTEXT
-
-#define KERNEL_ASSIGN_OR_RAISE_IMPL(result_name, lhs, ctx, rexpr) \
-  auto result_name = (rexpr);                                     \
-  KERNEL_RETURN_IF_ERROR(ctx, (result_name).status());            \
-  lhs = std::move(result_name).MoveValueUnsafe();
-
-#define KERNEL_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
-
-#define KERNEL_ASSIGN_OR_RAISE(lhs, ctx, rexpr)                                          \
-  KERNEL_ASSIGN_OR_RAISE_IMPL(KERNEL_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
-                              lhs, ctx, rexpr);
-
 /// KernelState adapter for the common case of kernels whose only
 /// state is an instance of a subclass of FunctionOptions.
 /// Default FunctionOptions are *not* handled here.
@@ -106,15 +70,14 @@ template <typename OptionsType>
 struct OptionsWrapper : public KernelState {
   explicit OptionsWrapper(OptionsType options) : options(std::move(options)) {}
 
-  static std::unique_ptr<KernelState> Init(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
     if (auto options = static_cast<const OptionsType*>(args.options)) {
       return ::arrow::internal::make_unique<OptionsWrapper>(*options);
     }
 
-    ctx->SetStatus(
-        Status::Invalid("Attempted to initialize KernelState from null FunctionOptions"));
-    return NULLPTR;
+    return Status::Invalid(
+        "Attempted to initialize KernelState from null FunctionOptions");
   }
 
   static const OptionsType& Get(const KernelState& state) {
@@ -133,16 +96,15 @@ struct KernelStateFromFunctionOptions : public KernelState {
   explicit KernelStateFromFunctionOptions(KernelContext* ctx, OptionsType state)
       : state(StateType(ctx, std::move(state))) {}
 
-  static std::unique_ptr<KernelState> Init(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
     if (auto options = static_cast<const OptionsType*>(args.options)) {
       return ::arrow::internal::make_unique<KernelStateFromFunctionOptions>(ctx,
                                                                             *options);
     }
 
-    ctx->SetStatus(
-        Status::Invalid("Attempted to initialize KernelState from null FunctionOptions"));
-    return NULLPTR;
+    return Status::Invalid(
+        "Attempted to initialize KernelState from null FunctionOptions");
   }
 
   static const StateType& Get(const KernelState& state) {
@@ -372,8 +334,9 @@ struct BoxScalar<Decimal256Type> {
 // values, such as Decimal128 rather than util::string_view.
 
 template <typename T, typename VisitFunc, typename NullFunc>
-static void VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
-                                   NullFunc&& null_func) {
+static typename arrow::internal::call_traits::enable_if_return<VisitFunc, void>::type
+VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
+                       NullFunc&& null_func) {
   VisitArrayDataInline<T>(
       arr,
       [&](typename GetViewType<T>::PhysicalType v) {
@@ -382,6 +345,18 @@ static void VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
       std::forward<NullFunc>(null_func));
 }
 
+template <typename T, typename VisitFunc, typename NullFunc>
+static typename arrow::internal::call_traits::enable_if_return<VisitFunc, Status>::type
+VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
+                       NullFunc&& null_func) {
+  VisitArrayDataInline<T>(
+      arr,
+      [&](typename GetViewType<T>::PhysicalType v) {
+        return valid_func(GetViewType<T>::LogicalValue(std::move(v)));
+      },
+      std::forward<NullFunc>(null_func));
+}
+
 // Like VisitArrayValuesInline, but for binary functions.
 
 template <typename Arg0Type, typename Arg1Type, typename VisitFunc, typename NullFunc>
@@ -411,7 +386,7 @@ Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& desc
 // ----------------------------------------------------------------------
 // Generate an array kernel given template classes
 
-void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 ArrayKernelExec MakeFlippedBinaryExec(ArrayKernelExec exec);
 
@@ -469,15 +444,16 @@ namespace applicator {
 //
 // Operator must implement
 //
-// static void Call(KernelContext*, const ArrayData& in, ArrayData* out)
-// static void Call(KernelContext*, const Scalar& in, Scalar* out)
+// static Status Call(KernelContext*, const ArrayData& in, ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& in, Scalar* out)
 template <typename Operator>
-static void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+static Status SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (batch[0].kind() == Datum::SCALAR) {
-    Operator::Call(ctx, *batch[0].scalar(), out->scalar().get());
+    return Operator::Call(ctx, *batch[0].scalar(), out->scalar().get());
   } else if (batch.length > 0) {
-    Operator::Call(ctx, *batch[0].array(), out->mutable_array());
+    return Operator::Call(ctx, *batch[0].array(), out->mutable_array());
   }
+  return Status::OK();
 }
 
 // Generate an ArrayKernelExec given a functor that handles all of its own
@@ -485,29 +461,33 @@ static void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out)
 //
 // Operator must implement
 //
-// static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
-//                  ArrayData* out)
-// static void Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
-//                  ArrayData* out)
-// static void Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
-//                  ArrayData* out)
-// static void Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
-//                  Scalar* out)
+// static Status Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
+//                    ArrayData* out)
+// static Status Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
+//                    Scalar* out)
 template <typename Operator>
-static void SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  if (batch.length == 0) return;
+static Status SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch.length == 0) return Status::OK();
 
   if (batch[0].kind() == Datum::ARRAY) {
     if (batch[1].kind() == Datum::ARRAY) {
-      Operator::Call(ctx, *batch[0].array(), *batch[1].array(), out->mutable_array());
+      return Operator::Call(ctx, *batch[0].array(), *batch[1].array(),
+                            out->mutable_array());
     } else {
-      Operator::Call(ctx, *batch[0].array(), *batch[1].scalar(), out->mutable_array());
+      return Operator::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                            out->mutable_array());
     }
   } else {
     if (batch[1].kind() == Datum::ARRAY) {
-      Operator::Call(ctx, *batch[0].scalar(), *batch[1].array(), out->mutable_array());
+      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].array(),
+                            out->mutable_array());
     } else {
-      Operator::Call(ctx, *batch[0].scalar(), *batch[1].scalar(), out->scalar().get());
+      return Operator::Call(ctx, *batch[0].scalar(), *batch[1].scalar(),
+                            out->scalar().get());
     }
   }
 }
@@ -522,32 +502,34 @@ struct OutputAdapter;
 template <typename Type>
 struct OutputAdapter<Type, enable_if_boolean<Type>> {
   template <typename Generator>
-  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
     auto out_bitmap = out_arr->buffers[1]->mutable_data();
     GenerateBitsUnrolled(out_bitmap, out_arr->offset, out_arr->length,
                          std::forward<Generator>(generator));
+    return Status::OK();
   }
 };
 
 template <typename Type>
 struct OutputAdapter<Type, enable_if_has_c_type_not_boolean<Type>> {
   template <typename Generator>
-  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
     auto out_data = out_arr->GetMutableValues<typename Type::c_type>(1);
     // TODO: Is this as fast as a more explicitly inlined function?
     for (int64_t i = 0; i < out_arr->length; ++i) {
       *out_data++ = generator();
     }
+    return Status::OK();
   }
 };
 
 template <typename Type>
 struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   template <typename Generator>
-  static void Write(KernelContext* ctx, Datum* out, Generator&& generator) {
-    ctx->SetStatus(Status::NotImplemented("NYI"));
+  static Status Write(KernelContext* ctx, Datum* out, Generator&& generator) {
+    return Status::NotImplemented("NYI");
   }
 };
 
@@ -563,8 +545,10 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
 //
 // struct Op {
 //   template <typename OutValue, typename Arg0Value>
-//   static OutValue Call(KernelContext* ctx, Arg0Value val) {
+//   static OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) {
 //     // implementation
+//     // NOTE: "status" should only populated with errors,
+//     //        leave it unmodified to indicate Status::OK()
 //   }
 // };
 template <typename OutType, typename Arg0Type, typename Op>
@@ -572,26 +556,30 @@ struct ScalarUnary {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static void ExecArray(KernelContext* ctx, const ArrayData& arg0, Datum* out) {
+  static Status ExecArray(KernelContext* ctx, const ArrayData& arg0, Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call<OutValue, Arg0Value>(ctx, arg0_it());
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call<OutValue, Arg0Value>(ctx, arg0_it(), &st);
+    }));
+    return st;
   }
 
-  static void ExecScalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+  static Status ExecScalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    Status st = Status::OK();
     Scalar* out_scalar = out->scalar().get();
     if (arg0.is_valid) {
       Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       out_scalar->is_valid = true;
-      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val),
+      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
                               out_scalar);
     } else {
       out_scalar->is_valid = false;
     }
+    return st;
   }
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       return ExecArray(ctx, *batch[0].array(), out);
     } else {
@@ -615,66 +603,69 @@ struct ScalarUnaryNotNullStateful {
 
   template <typename Type, typename Enable = void>
   struct ArrayExec {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ExecBatch& batch,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx,
+                       const ExecBatch& batch, Datum* out) {
       ARROW_LOG(FATAL) << "Missing ArrayExec specialization for output type "
                        << out->type();
+      return Status::NotImplemented("NYI");
     }
   };
 
   template <typename Type>
   struct ArrayExec<
       Type, enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       auto out_data = out_arr->GetMutableValues<OutValue>(1);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            *out_data++ = functor.op.template Call<OutValue, Arg0Value>(ctx, v);
+            *out_data++ = functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st);
           },
           [&]() {
             // null
             ++out_data;
           });
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_base_binary<Type>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
       // NOTE: This code is not currently used by any kernels and has
       // suboptimal performance because it's recomputing the validity bitmap
       // that is already computed by the kernel execution layer. Consider
       // writing a lower-level "output adapter" for base binary types.
       typename TypeTraits<Type>::BuilderType builder;
-      VisitArrayValuesInline<Arg0Type>(
-          arg0,
-          [&](Arg0Value v) {
-            KERNEL_RETURN_IF_ERROR(ctx, builder.Append(functor.op.Call(ctx, v)));
-          },
-          [&]() { KERNEL_RETURN_IF_ERROR(ctx, builder.AppendNull()); });
-      if (!ctx->HasError()) {
+      Status st = Status::OK();
+      RETURN_NOT_OK(VisitArrayValuesInline<Arg0Type>(
+          arg0, [&](Arg0Value v) { return builder.Append(functor.op.Call(ctx, v, &st)); },
+          [&]() { return builder.AppendNull(); }));
+      if (st.ok()) {
         std::shared_ptr<ArrayData> result;
-        ctx->SetStatus(builder.FinishInternal(&result));
+        RETURN_NOT_OK(builder.FinishInternal(&result));
         out->value = std::move(result);
       }
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_t<is_boolean_type<Type>::value>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       FirstTimeBitmapWriter out_writer(out_arr->buffers[1]->mutable_data(),
                                        out_arr->offset, out_arr->length);
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            if (functor.op.template Call<OutValue, Arg0Value>(ctx, v)) {
+            if (functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)) {
               out_writer.Set();
             }
             out_writer.Next();
@@ -685,13 +676,15 @@ struct ScalarUnaryNotNullStateful {
             out_writer.Next();
           });
       out_writer.Finish();
+      return st;
     }
   };
 
   template <typename Type>
   struct ArrayExec<Type, enable_if_decimal<Type>> {
-    static void Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                     Datum* out) {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      Status st = Status::OK();
       ArrayData* out_arr = out->mutable_array();
       // Decimal128 data buffers are not safely reinterpret_cast-able on big-endian
       using endian_agnostic =
@@ -700,24 +693,28 @@ struct ScalarUnaryNotNullStateful {
       VisitArrayValuesInline<Arg0Type>(
           arg0,
           [&](Arg0Value v) {
-            functor.op.template Call<OutValue, Arg0Value>(ctx, v).ToBytes(
-                out_data++->data());
+            functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)
+                .ToBytes(out_data++->data());
           },
           [&]() { ++out_data; });
+      return st;
     }
   };
 
-  void Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+  Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    Status st = Status::OK();
     if (arg0.is_valid) {
       Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      BoxScalar<OutType>::Box(this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val),
-                              out->scalar().get());
+      BoxScalar<OutType>::Box(
+          this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
+          out->scalar().get());
     }
+    return st;
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
-      ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
+      return ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
     } else {
       return Scalar(ctx, *batch[0].scalar(), out);
     }
@@ -732,7 +729,7 @@ struct ScalarUnaryNotNull {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Seed kernel with dummy state
     ScalarUnaryNotNullStateful<OutType, Arg0Type, Op> kernel({});
     return kernel.Exec(ctx, batch, out);
@@ -751,8 +748,11 @@ struct ScalarUnaryNotNull {
 //
 // struct Op {
 //   template <typename OutValue, typename Arg0Value, typename Arg1Value>
-//   static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1) {
+//   static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1, Status* st)
+//   {
 //     // implementation
+//     // NOTE: "status" should only populated with errors,
+//     //       leave it unmodified to indicate Status::OK()
 //   }
 // };
 template <typename OutType, typename Arg0Type, typename Arg1Type, typename Op>
@@ -761,44 +761,52 @@ struct ScalarBinary {
   using Arg0Value = typename GetViewType<Arg0Type>::T;
   using Arg1Value = typename GetViewType<Arg1Type>::T;
 
-  static void ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
-                         Datum* out) {
+  static Status ArrayArray(KernelContext* ctx, const ArrayData& arg0,
+                           const ArrayData& arg1, Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_it(), arg1_it());
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call(ctx, arg0_it(), arg1_it(), &st);
+    }));
+    return st;
   }
 
-  static void ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
-                          Datum* out) {
+  static Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
+                            Datum* out) {
+    Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_it(), arg1_val);
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call(ctx, arg0_it(), arg1_val, &st);
+    }));
+    return st;
   }
 
-  static void ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
-                          Datum* out) {
+  static Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
+                            Datum* out) {
+    Status st = Status::OK();
     auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
-    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_val, arg1_it());
-    });
+    RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
+      return Op::template Call(ctx, arg0_val, arg1_it(), &st);
+    }));
+    return st;
   }
 
-  static void ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                           Datum* out) {
+  static Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
+                             Datum* out) {
+    Status st = Status::OK();
     if (out->scalar()->is_valid) {
       auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(Op::template Call(ctx, arg0_val, arg1_val),
+      BoxScalar<OutType>::Box(Op::template Call(ctx, arg0_val, arg1_val, &st),
                               out->scalar().get());
     }
+    return st;
   }
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       if (batch[1].kind() == Datum::ARRAY) {
         return ArrayArray(ctx, *batch[0].array(), *batch[1].array(), out);
@@ -829,19 +837,22 @@ struct ScalarBinaryNotNullStateful {
 
   // NOTE: In ArrayExec<Type>, Type is really OutputType
 
-  void ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
-                  Datum* out) {
+  Status ArrayArray(KernelContext* ctx, const ArrayData& arg0, const ArrayData& arg1,
+                    Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     VisitTwoArrayValuesInline<Arg0Type, Arg1Type>(
         arg0, arg1,
         [&](Arg0Value u, Arg1Value v) {
-          writer.Write(op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, v));
+          writer.Write(op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, v, &st));
         },
         [&]() { writer.WriteNull(); });
+    return st;
   }
 
-  void ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
-                   Datum* out) {
+  Status ArrayScalar(KernelContext* ctx, const ArrayData& arg0, const Scalar& arg1,
+                     Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     if (arg1.is_valid) {
       const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
@@ -849,14 +860,16 @@ struct ScalarBinaryNotNullStateful {
           arg0,
           [&](Arg0Value u) {
             writer.Write(
-                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val));
+                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val, &st));
           },
           [&]() { writer.WriteNull(); });
     }
+    return st;
   }
 
-  void ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
-                   Datum* out) {
+  Status ScalarArray(KernelContext* ctx, const Scalar& arg0, const ArrayData& arg1,
+                     Datum* out) {
+    Status st = Status::OK();
     OutputArrayWriter<OutType> writer(out->mutable_array());
     if (arg0.is_valid) {
       const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
@@ -864,24 +877,27 @@ struct ScalarBinaryNotNullStateful {
           arg1,
           [&](Arg1Value v) {
             writer.Write(
-                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v));
+                op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v, &st));
           },
           [&]() { writer.WriteNull(); });
     }
+    return st;
   }
 
-  void ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                    Datum* out) {
+  Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
+                      Datum* out) {
+    Status st = Status::OK();
     if (arg0.is_valid && arg1.is_valid) {
       const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
       BoxScalar<OutType>::Box(
-          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val),
+          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
           out->scalar().get());
     }
+    return st;
   }
 
-  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       if (batch[1].kind() == Datum::ARRAY) {
         return ArrayArray(ctx, *batch[0].array(), *batch[1].array(), out);
@@ -908,7 +924,7 @@ struct ScalarBinaryNotNull {
   using Arg0Value = typename GetViewType<Arg0Type>::T;
   using Arg1Value = typename GetViewType<Arg1Type>::T;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Seed kernel with dummy state
     ScalarBinaryNotNullStateful<OutType, Arg0Type, Arg1Type, Op> kernel({});
     return kernel.Exec(ctx, batch, out);
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index dccbe50d9cf..f45e82e04af 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -770,12 +770,11 @@ HashAggregateKernel MakeKernel(InputType argument_type) {
   HashAggregateKernel kernel;
 
   kernel.init = [](KernelContext* ctx,
-                   const KernelInitArgs& args) -> std::unique_ptr<KernelState> {
+                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
     auto impl = ::arrow::internal::make_unique<Impl>();
     // FIXME(bkietz) Init should not take a type. That should be an unboxed template arg
     // for the Impl. Otherwise we're not exposing dispatch as well as we should.
-    ctx->SetStatus(impl->Init(ctx->exec_context(), args.options, args.inputs[0].type));
-    if (ctx->HasError()) return nullptr;
+    RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options, args.inputs[0].type));
     return std::move(impl);
   };
 
@@ -788,17 +787,18 @@ HashAggregateKernel MakeKernel(InputType argument_type) {
           }));
 
   kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) {
-    ctx->SetStatus(checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch));
+    return checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch);
   };
 
   kernel.merge = [](KernelContext* ctx, KernelState&&, KernelState*) {
     // TODO(ARROW-11840) merge two hash tables
-    ctx->SetStatus(Status::NotImplemented("Merge hashed aggregations"));
+    return Status::NotImplemented("Merge hashed aggregations");
   };
 
   kernel.finalize = [](KernelContext* ctx, Datum* out) {
-    KERNEL_ASSIGN_OR_RAISE(*out, ctx,
-                           checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
+    ARROW_ASSIGN_OR_RAISE(*out,
+                          checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
+    return Status::OK();
   };
 
   return kernel;
@@ -843,14 +843,14 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     }
 
     KernelContext kernel_ctx{ctx};
-    states[i] = kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
-                                                             {
-                                                                 in_descrs[i].type,
-                                                                 uint32(),
-                                                                 uint32(),
-                                                             },
-                                                             options});
-    if (kernel_ctx.HasError()) return kernel_ctx.status();
+    ARROW_ASSIGN_OR_RAISE(
+        states[i], kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
+                                                                {
+                                                                    in_descrs[i].type,
+                                                                    uint32(),
+                                                                    uint32(),
+                                                                },
+                                                                options}));
   }
 
   return std::move(states);
@@ -936,8 +936,7 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
       batch_ctx.SetState(states[i].get());
       ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch,
                                                          Datum(grouper->num_groups())}));
-      kernels[i]->consume(&batch_ctx, batch);
-      if (batch_ctx.HasError()) return batch_ctx.status();
+      RETURN_NOT_OK(kernels[i]->consume(&batch_ctx, batch));
     }
   }
 
@@ -949,8 +948,7 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
     KernelContext batch_ctx{ctx};
     batch_ctx.SetState(states[i].get());
     Datum out;
-    kernels[i]->finalize(&batch_ctx, &out);
-    if (batch_ctx.HasError()) return batch_ctx.status();
+    RETURN_NOT_OK(kernels[i]->finalize(&batch_ctx, &out));
     *it++ = out.array();
   }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 260721b08d9..7b9b23e7ff8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -64,34 +64,37 @@ constexpr Unsigned to_unsigned(T signed_) {
 
 struct Add {
   template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return left + right;
   }
 
   template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right,
+                                                      Status*) {
     return left + right;
   }
 
   template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return arrow::internal::SafeSignedAdd(left, right);
   }
 };
 
 struct AddChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(AddWithOverflow(left, right, &result))) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right) {
+  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left + right;
   }
@@ -99,34 +102,37 @@ struct AddChecked {
 
 struct Subtract {
   template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return left - right;
   }
 
   template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right,
+                                                      Status*) {
     return left - right;
   }
 
   template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return arrow::internal::SafeSignedSubtract(left, right);
   }
 };
 
 struct SubtractChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(SubtractWithOverflow(left, right, &result))) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right) {
+  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
@@ -143,17 +149,20 @@ struct Multiply {
   static_assert(std::is_same<decltype(uint64_t() * uint64_t()), uint64_t>::value, "");
 
   template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return left * right;
   }
 
   template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right,
+                                                      Status*) {
     return left * right;
   }
 
   template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right) {
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right,
+                                                    Status*) {
     return to_unsigned(left) * to_unsigned(right);
   }
 
@@ -162,28 +171,28 @@ struct Multiply {
   // behaviour). Therefore we first cast to 32 bit unsigned integers where overflow is
   // well defined.
   template <typename T = void>
-  static constexpr int16_t Call(KernelContext*, int16_t left, int16_t right) {
+  static constexpr int16_t Call(KernelContext*, int16_t left, int16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
   template <typename T = void>
-  static constexpr uint16_t Call(KernelContext*, uint16_t left, uint16_t right) {
+  static constexpr uint16_t Call(KernelContext*, uint16_t left, uint16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
 };
 
 struct MultiplyChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(MultiplyWithOverflow(left, right, &result))) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right) {
+  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left * right;
   }
@@ -191,16 +200,17 @@ struct MultiplyChecked {
 
 struct Divide {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     return left / right;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     T result;
     if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, &result))) {
       if (right == 0) {
-        ctx->SetStatus(Status::Invalid("divide by zero"));
+        *st = Status::Invalid("divide by zero");
       } else {
         result = 0;
       }
@@ -211,24 +221,25 @@ struct Divide {
 
 struct DivideChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result;
     if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, &result))) {
       if (right == 0) {
-        ctx->SetStatus(Status::Invalid("divide by zero"));
+        *st = Status::Invalid("divide by zero");
       } else {
-        ctx->SetStatus(Status::Invalid("overflow"));
+        *st = Status::Invalid("overflow");
       }
     }
     return result;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     if (ARROW_PREDICT_FALSE(right == 0)) {
-      ctx->SetStatus(Status::Invalid("divide by zero"));
+      *st = Status::Invalid("divide by zero");
       return 0;
     }
     return left / right;
@@ -249,27 +260,25 @@ struct Power {
   }
 
   template <typename T>
-  static enable_if_integer<T> Call(KernelContext* ctx, T base, T exp) {
+  static enable_if_integer<T> Call(KernelContext*, T base, T exp, Status* st) {
     if (exp < 0) {
-      ctx->SetStatus(
-          Status::Invalid("integers to negative integer powers are not allowed"));
+      *st = Status::Invalid("integers to negative integer powers are not allowed");
       return 0;
     }
     return static_cast<T>(IntegerPower(base, exp));
   }
 
   template <typename T>
-  static enable_if_floating_point<T> Call(KernelContext* ctx, T base, T exp) {
+  static enable_if_floating_point<T> Call(KernelContext*, T base, T exp, Status*) {
     return std::pow(base, exp);
   }
 };
 
 struct PowerChecked {
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 base, Arg1 exp) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status* st) {
     if (exp < 0) {
-      ctx->SetStatus(
-          Status::Invalid("integers to negative integer powers are not allowed"));
+      *st = Status::Invalid("integers to negative integer powers are not allowed");
       return 0;
     } else if (exp == 0) {
       return 1;
@@ -287,13 +296,13 @@ struct PowerChecked {
       bitmask >>= 1;
     }
     if (overflow) {
-      ctx->SetStatus(Status::Invalid("overflow"));
+      *st = Status::Invalid("overflow");
     }
     return pow;
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 base, Arg1 exp) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 base, Arg1 exp, Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return std::pow(base, exp);
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 009b968809c..d555a81392a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -96,57 +96,62 @@ inline Bitmap GetBitmap(const ArrayData& arr, int index) {
 }
 
 struct Invert {
-  static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
     GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
+    return Status::OK();
   }
 };
 
 template <typename Op>
 struct Commutative {
-  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
-                   ArrayData* out) {
-    Op::Call(ctx, right, left, out);
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    return Op::Call(ctx, right, left, out);
   }
 };
 
 struct And : Commutative<And> {
   using Commutative<And>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
           checked_cast<const BooleanScalar&>(left).value &&
           checked_cast<const BooleanScalar&>(right).value;
     }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    if (!right.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(right).value
-               ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
-               : GetBitmap(*out, 1).SetBitsTo(false);
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
+          : GetBitmap(*out, 1).SetBitsTo(false);
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
                                  right.buffers[1]->data(), right.offset, right.length,
                                  out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
 struct KleeneAnd : Commutative<KleeneAnd> {
   using Commutative<KleeneAnd>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
@@ -155,21 +160,24 @@ struct KleeneAnd : Commutative<KleeneAnd> {
 
     checked_cast<BooleanScalar*>(out)->value = left_true && right_true;
     out->is_valid = left_false || right_false || (left_true && right_true);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
     bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
     bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
 
     if (right_false) {
-      return GetBitmap(*out, 0).SetBitsTo(true),
-             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      GetBitmap(*out, 0).SetBitsTo(true);
+      GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      return Status::OK();
     }
 
     if (right_true) {
-      return GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)),
-             GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff left[i] was false
@@ -178,10 +186,11 @@ struct KleeneAnd : Commutative<KleeneAnd> {
                                     out->offset, out->buffers[0]->mutable_data());
     ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
                                   out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       GetBitmap(*out, 0).SetBitsTo(true);
       return And::Call(ctx, left, right, out);
@@ -193,43 +202,47 @@ struct KleeneAnd : Commutative<KleeneAnd> {
       *out_valid = left_false | right_false | (left_true & right_true);
     };
     ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
 struct Or : Commutative<Or> {
   using Commutative<Or>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
           checked_cast<const BooleanScalar&>(left).value ||
           checked_cast<const BooleanScalar&>(right).value;
     }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    if (!right.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(right).value
-               ? GetBitmap(*out, 1).SetBitsTo(true)
-               : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).SetBitsTo(true)
+          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapOr(left.buffers[1]->data(), left.offset,
                                 right.buffers[1]->data(), right.offset, right.length,
                                 out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
 struct KleeneOr : Commutative<KleeneOr> {
   using Commutative<KleeneOr>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
@@ -238,21 +251,24 @@ struct KleeneOr : Commutative<KleeneOr> {
 
     checked_cast<BooleanScalar*>(out)->value = left_true || right_true;
     out->is_valid = left_true || right_true || (left_false && right_false);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
     bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
     bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
 
     if (right_true) {
-      return GetBitmap(*out, 0).SetBitsTo(true),
-             GetBitmap(*out, 1).SetBitsTo(true);  // all true case
+      GetBitmap(*out, 0).SetBitsTo(true);
+      GetBitmap(*out, 1).SetBitsTo(true);  // all true case
+      return Status::OK();
     }
 
     if (right_false) {
-      return GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)),
-             GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+      return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff left[i] was true
@@ -261,10 +277,11 @@ struct KleeneOr : Commutative<KleeneOr> {
                                  out->offset, out->buffers[0]->mutable_data());
     ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
                                   out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       GetBitmap(*out, 0).SetBitsTo(true);
       return Or::Call(ctx, left, right, out);
@@ -277,86 +294,94 @@ struct KleeneOr : Commutative<KleeneOr> {
       *out_valid = left_true | right_true | (left_false & right_false);
     };
 
-    return ComputeKleene(compute_word, ctx, left, right, out);
+    ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
 struct Xor : Commutative<Xor> {
   using Commutative<Xor>::Call;
 
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
     if (left.is_valid && right.is_valid) {
       checked_cast<BooleanScalar*>(out)->value =
           checked_cast<const BooleanScalar&>(left).value ^
           checked_cast<const BooleanScalar&>(right).value;
     }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    if (!right.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(right).value
-               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1))
-               : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    if (right.is_valid) {
+      checked_cast<const BooleanScalar&>(right).value
+          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(left, 1))
+          : GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapXor(left.buffers[1]->data(), left.offset,
                                  right.buffers[1]->data(), right.offset, right.length,
                                  out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
 struct AndNot {
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
-    And::Call(ctx, left, InvertScalar(right), out);
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    return And::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
-                   ArrayData* out) {
-    if (!left.is_valid) return;  // all null case
-
-    return checked_cast<const BooleanScalar&>(left).value
-               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
-               : GetBitmap(*out, 1).SetBitsTo(false);
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
+    if (left.is_valid) {
+      checked_cast<const BooleanScalar&>(left).value
+          ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+          : GetBitmap(*out, 1).SetBitsTo(false);
+    }
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    And::Call(ctx, left, InvertScalar(right), out);
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    return And::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
                                     right.buffers[1]->data(), right.offset, right.length,
                                     out->offset, out->buffers[1]->mutable_data());
+    return Status::OK();
   }
 };
 
 struct KleeneAndNot {
-  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                   Scalar* out) {
-    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                     Scalar* out) {
+    return KleeneAnd::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                     ArrayData* out) {
     bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
     if (left_false) {
-      return GetBitmap(*out, 0).SetBitsTo(true),
-             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      GetBitmap(*out, 0).SetBitsTo(true);
+      GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+      return Status::OK();
     }
 
     if (left_true) {
-      return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
-             GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+      GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
+      GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+      return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff right[i] was true
@@ -365,15 +390,16 @@ struct KleeneAndNot {
                                  out->offset, out->buffers[0]->mutable_data());
     ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
                                     out->buffers[1]->mutable_data(), out->offset);
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
-                   ArrayData* out) {
-    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                     ArrayData* out) {
+    return KleeneAnd::Call(ctx, left, InvertScalar(right), out);
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
-                   ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                     ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       GetBitmap(*out, 0).SetBitsTo(true);
       return AndNot::Call(ctx, left, right, out);
@@ -386,7 +412,8 @@ struct KleeneAndNot {
       *out_valid = left_false | right_true | (left_true & right_false);
     };
 
-    return ComputeKleene(compute_word, ctx, left, right, out);
+    ComputeKleene(compute_word, ctx, left, right, out);
+    return Status::OK();
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
index e529d3791aa..dad94c1ace7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
@@ -31,17 +31,17 @@ namespace internal {
 
 struct IsNonZero {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     return val != 0;
   }
 };
 
 struct ParseBooleanString {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext* ctx, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status* st) {
     bool result = false;
     if (ARROW_PREDICT_FALSE(!ParseValue<BooleanType>(val.data(), val.size(), &result))) {
-      ctx->SetStatus(Status::Invalid("Failed to parse value: ", val));
+      *st = Status::Invalid("Failed to parse value: ", val);
     }
     return result;
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 7221722d53a..f42635c5dcd 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -148,7 +148,7 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Dat
 
 // ----------------------------------------------------------------------
 
-void UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DCHECK(out->is_array());
 
   DictionaryArray dict_arr(batch[0].array());
@@ -156,32 +156,32 @@ void UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
   const auto& dict_type = *dict_arr.dictionary()->type();
   if (!dict_type.Equals(options.to_type) && !CanCast(dict_type, *options.to_type)) {
-    ctx->SetStatus(Status::Invalid("Cast type ", options.to_type->ToString(),
-                                   " incompatible with dictionary type ",
-                                   dict_type.ToString()));
-    return;
+    return Status::Invalid("Cast type ", options.to_type->ToString(),
+                           " incompatible with dictionary type ", dict_type.ToString());
   }
 
-  KERNEL_ASSIGN_OR_RAISE(*out, ctx,
-                         Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
-                              TakeOptions::Defaults(), ctx->exec_context()));
+  ARROW_ASSIGN_OR_RAISE(*out,
+                        Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
+                             TakeOptions::Defaults(), ctx->exec_context()));
 
   if (!dict_type.Equals(options.to_type)) {
-    KERNEL_ASSIGN_OR_RAISE(*out, ctx, Cast(*out, options));
+    ARROW_ASSIGN_OR_RAISE(*out, Cast(*out, options));
   }
+  return Status::OK();
 }
 
-void OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (out->is_scalar()) {
     out->scalar()->is_valid = false;
-    return;
+  } else {
+    ArrayData* output = out->mutable_array();
+    output->buffers = {nullptr};
+    output->null_count = batch.length;
   }
-  ArrayData* output = out->mutable_array();
-  output->buffers = {nullptr};
-  output->null_count = batch.length;
+  return Status::OK();
 }
 
-void CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const CastOptions& options = checked_cast<const CastState*>(ctx->state())->options;
 
   const DataType& in_type = *batch[0].type();
@@ -190,20 +190,20 @@ void CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray extension(batch[0].array());
 
   Datum casted_storage;
-  KERNEL_RETURN_IF_ERROR(
-      ctx, Cast(*extension.storage(), out->type(), options, ctx->exec_context())
-               .Value(&casted_storage));
+  RETURN_NOT_OK(Cast(*extension.storage(), out->type(), options, ctx->exec_context())
+                    .Value(&casted_storage));
   out->value = casted_storage.array();
+  return Status::OK();
 }
 
-void CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  if (batch[0].is_scalar()) return;
-
-  ArrayData* output = out->mutable_array();
-  std::shared_ptr<Array> nulls;
-  Status s = MakeArrayOfNull(output->type, batch.length).Value(&nulls);
-  KERNEL_RETURN_IF_ERROR(ctx, s);
-  out->value = nulls->data();
+Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (!batch[0].is_scalar()) {
+    ArrayData* output = out->mutable_array();
+    std::shared_ptr<Array> nulls;
+    RETURN_NOT_OK(MakeArrayOfNull(output->type, batch.length).Value(&nulls));
+    out->value = nulls->data();
+  }
+  return Status::OK();
 }
 
 Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
@@ -223,7 +223,7 @@ Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
 
 OutputType kOutputTargetType(ResolveOutputFromOptions);
 
-void ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
   // Make a copy of the buffers into a destination array without carrying
   // the type
@@ -234,6 +234,7 @@ void ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   output->buffers = input.buffers;
   output->offset = input.offset;
   output->child_data = input.child_data;
+  return Status::OK();
 }
 
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
index dabf0c2b061..2419d898a68 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
@@ -37,10 +37,10 @@ struct CastFunctor {};
 template <typename O, typename I>
 struct CastFunctor<
     O, I, enable_if_t<std::is_same<O, I>::value && is_parameter_free_type<I>::value>> {
-  static void Exec(KernelContext*, const ExecBatch&, Datum*) {}
+  static Status Exec(KernelContext*, const ExecBatch&, Datum*) { return Status::OK(); }
 };
 
-void CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status CastFromExtension(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 // Utility for numeric casts
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Datum& input,
@@ -49,11 +49,11 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Dat
 // ----------------------------------------------------------------------
 // Dictionary to other things
 
-void UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status UnpackDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-void OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status OutputAllNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
-void CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status CastFromNull(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 // Adds a cast function where CastFunctor is specialized and the input and output
 // types are parameter free (have a type_singleton). Scalar inputs are handled by
@@ -65,7 +65,7 @@ void AddSimpleCast(InputType in_ty, OutputType out_ty, CastFunction* func) {
       TrivialScalarUnaryAsArraysExec(CastFunctor<OutType, InType>::Exec)));
 }
 
-void ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+Status ZeroCopyCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out);
 
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
                      CastFunction* func);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 1d81be48288..9364120c133 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -35,7 +35,7 @@ namespace compute {
 namespace internal {
 
 template <typename Type>
-void CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   using offset_type = typename Type::offset_type;
   using ScalarType = typename TypeTraits<Type>::ScalarType;
 
@@ -49,13 +49,12 @@ void CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
     DCHECK(!out_scalar->is_valid);
     if (in_scalar.is_valid) {
-      KERNEL_ASSIGN_OR_RAISE(
-          out_scalar->value, ctx,
-          Cast(*in_scalar.value, child_type, options, ctx->exec_context()));
+      ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type, options,
+                                                    ctx->exec_context()));
 
       out_scalar->is_valid = true;
     }
-    return;
+    return Status::OK();
   }
 
   const ArrayData& in_array = *batch[0].array();
@@ -66,11 +65,11 @@ void CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   Datum values = in_array.child_data[0];
 
   if (in_array.offset != 0) {
-    KERNEL_ASSIGN_OR_RAISE(out_array->buffers[0], ctx,
-                           CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
-                                      in_array.offset, in_array.length));
-    KERNEL_ASSIGN_OR_RAISE(out_array->buffers[1], ctx,
-                           ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                          CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
+                                     in_array.offset, in_array.length));
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
+                          ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));
 
     auto offsets = in_array.GetValues<offset_type>(1);
     auto shifted_offsets = out_array->GetMutableValues<offset_type>(1);
@@ -81,11 +80,12 @@ void CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]);
   }
 
-  KERNEL_ASSIGN_OR_RAISE(Datum cast_values, ctx,
-                         Cast(values, child_type, options, ctx->exec_context()));
+  ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                        Cast(values, child_type, options, ctx->exec_context()));
 
   DCHECK_EQ(Datum::ARRAY, cast_values.kind());
   out_array->child_data.push_back(cast_values.array());
+  return Status::OK();
 }
 
 template <typename Type>
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index 160c4ce8857..cc7b533f262 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -36,16 +36,18 @@ using internal::ParseValue;
 namespace compute {
 namespace internal {
 
-void CastIntegerToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastIntegerToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   if (!options.allow_int_overflow) {
-    KERNEL_RETURN_IF_ERROR(ctx, IntegersCanFit(batch[0], *out->type()));
+    RETURN_NOT_OK(IntegersCanFit(batch[0], *out->type()));
   }
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  return Status::OK();
 }
 
-void CastFloatingToFloating(KernelContext*, const ExecBatch& batch, Datum* out) {
+Status CastFloatingToFloating(KernelContext*, const ExecBatch& batch, Datum* out) {
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -168,12 +170,13 @@ Status CheckFloatToIntTruncation(const Datum& input, const Datum& output) {
   return Status::OK();
 }
 
-void CastFloatingToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastFloatingToInteger(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
   if (!options.allow_float_truncate) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckFloatToIntTruncation(batch[0], *out));
+    RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out));
   }
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -246,13 +249,14 @@ Status CheckForIntegerToFloatingTruncation(const Datum& input, Type::type out_ty
   return Status::OK();
 }
 
-void CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   Type::type out_type = out->type()->id();
   if (!options.allow_float_truncate) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckForIntegerToFloatingTruncation(batch[0], out_type));
+    RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type));
   }
   CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0], out);
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -260,7 +264,7 @@ void CastIntegerToFloating(KernelContext* ctx, const ExecBatch& batch, Datum* ou
 
 struct BooleanToNumber {
   template <typename OutValue, typename Arg0Value>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     constexpr auto kOne = static_cast<OutValue>(1);
     constexpr auto kZero = static_cast<OutValue>(0);
     return val ? kOne : kZero;
@@ -269,8 +273,9 @@ struct BooleanToNumber {
 
 template <typename O>
 struct CastFunctor<O, BooleanType, enable_if_number<O>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch,
+                                                                          out);
   }
 };
 
@@ -280,12 +285,11 @@ struct CastFunctor<O, BooleanType, enable_if_number<O>> {
 template <typename OutType>
 struct ParseString {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
     OutValue result = OutValue(0);
     if (ARROW_PREDICT_FALSE(!ParseValue<OutType>(val.data(), val.size(), &result))) {
-      ctx->SetStatus(Status::Invalid("Failed to parse string: '", val,
-                                     "' as a scalar of type ",
-                                     TypeTraits<OutType>::type_singleton()->ToString()));
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            TypeTraits<OutType>::type_singleton()->ToString());
     }
     return result;
   }
@@ -293,8 +297,8 @@ struct ParseString {
 
 template <typename O, typename I>
 struct CastFunctor<O, I, enable_if_base_binary<I>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
   }
 };
 
@@ -303,12 +307,12 @@ struct CastFunctor<O, I, enable_if_base_binary<I>> {
 
 struct DecimalToIntegerMixin {
   template <typename OutValue, typename Arg0Value>
-  OutValue ToInteger(KernelContext* ctx, const Arg0Value& val) const {
+  OutValue ToInteger(KernelContext* ctx, const Arg0Value& val, Status* st) const {
     constexpr auto min_value = std::numeric_limits<OutValue>::min();
     constexpr auto max_value = std::numeric_limits<OutValue>::max();
 
     if (!allow_int_overflow_ && ARROW_PREDICT_FALSE(val < min_value || val > max_value)) {
-      ctx->SetStatus(Status::Invalid("Integer value out of bounds"));
+      *st = Status::Invalid("Integer value out of bounds");
       return OutValue{};  // Zero
     } else {
       return static_cast<OutValue>(val.low_bits());
@@ -326,8 +330,8 @@ struct UnsafeUpscaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
-    return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_));
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
+    return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_), st);
   }
 };
 
@@ -335,8 +339,8 @@ struct UnsafeDownscaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
-    return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false));
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
+    return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false), st);
   }
 };
 
@@ -344,13 +348,13 @@ struct SafeRescaleDecimalToInteger : public DecimalToIntegerMixin {
   using DecimalToIntegerMixin::DecimalToIntegerMixin;
 
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
     auto result = val.Rescale(in_scale_, 0);
     if (ARROW_PREDICT_FALSE(!result.ok())) {
-      ctx->SetStatus(result.status());
+      *st = result.status();
       return OutValue{};  // Zero
     } else {
-      return ToInteger<OutValue>(ctx, *result);
+      return ToInteger<OutValue>(ctx, *result, st);
     }
   }
 };
@@ -360,7 +364,7 @@ struct CastFunctor<O, I,
                    enable_if_t<is_integer_type<O>::value && is_decimal_type<I>::value>> {
   using out_type = typename O::c_type;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
 
     const auto& in_type_inst = checked_cast<const I&>(*batch[0].type());
@@ -420,7 +424,7 @@ struct DecimalConversions<Decimal128, Decimal128> {
 
 struct UnsafeUpscaleDecimal {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status*) const {
     using Conv = DecimalConversions<OutValue, Arg0Value>;
     return Conv::ConvertOutput(Conv::ConvertInput(std::move(val)).IncreaseScaleBy(by_));
   }
@@ -429,7 +433,7 @@ struct UnsafeUpscaleDecimal {
 
 struct UnsafeDownscaleDecimal {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status*) const {
     using Conv = DecimalConversions<OutValue, Arg0Value>;
     return Conv::ConvertOutput(
         Conv::ConvertInput(std::move(val)).ReduceScaleBy(by_, false));
@@ -439,12 +443,12 @@ struct UnsafeDownscaleDecimal {
 
 struct SafeRescaleDecimal {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
     using Conv = DecimalConversions<OutValue, Arg0Value>;
     auto maybe_rescaled =
         Conv::ConvertInput(std::move(val)).Rescale(in_scale_, out_scale_);
     if (ARROW_PREDICT_FALSE(!maybe_rescaled.ok())) {
-      ctx->SetStatus(maybe_rescaled.status());
+      *st = maybe_rescaled.status();
       return {};  // Zero
     }
 
@@ -452,8 +456,7 @@ struct SafeRescaleDecimal {
       return Conv::ConvertOutput(maybe_rescaled.MoveValueUnsafe());
     }
 
-    ctx->SetStatus(
-        Status::Invalid("Decimal value does not fit in precision ", out_precision_));
+    *st = Status::Invalid("Decimal value does not fit in precision ", out_precision_);
     return {};  // Zero
   }
 
@@ -463,7 +466,7 @@ struct SafeRescaleDecimal {
 template <typename O, typename I>
 struct CastFunctor<O, I,
                    enable_if_t<is_decimal_type<O>::value && is_decimal_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
 
     const auto& in_type = checked_cast<const I&>(*batch[0].type());
@@ -497,7 +500,7 @@ struct CastFunctor<O, I,
 
 struct RealToDecimal {
   template <typename OutValue, typename RealType>
-  OutValue Call(KernelContext* ctx, RealType val) const {
+  OutValue Call(KernelContext*, RealType val, Status* st) const {
     auto maybe_decimal = OutValue::FromReal(val, out_precision_, out_scale_);
 
     if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) {
@@ -505,7 +508,7 @@ struct RealToDecimal {
     }
 
     if (!allow_truncate_) {
-      ctx->SetStatus(maybe_decimal.status());
+      *st = maybe_decimal.status();
     }
     return {};  // Zero
   }
@@ -517,7 +520,7 @@ struct RealToDecimal {
 template <typename O, typename I>
 struct CastFunctor<O, I,
                    enable_if_t<is_decimal_type<O>::value && is_floating_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = checked_cast<const CastState*>(ctx->state())->options;
     const auto& out_type = checked_cast<const O&>(*out->type());
     const auto out_scale = out_type.scale();
@@ -534,7 +537,7 @@ struct CastFunctor<O, I,
 
 struct DecimalToReal {
   template <typename RealType, typename Arg0Value>
-  RealType Call(KernelContext* ctx, const Arg0Value& val) const {
+  RealType Call(KernelContext*, const Arg0Value& val, Status*) const {
     return val.template ToReal<RealType>(in_scale_);
   }
 
@@ -544,7 +547,7 @@ struct DecimalToReal {
 template <typename O, typename I>
 struct CastFunctor<O, I,
                    enable_if_t<is_floating_type<O>::value && is_decimal_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& in_type = checked_cast<const I&>(*batch[0].type());
     const auto in_scale = in_type.scale();
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index 6f965a46676..3ce537b7223 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -48,11 +48,11 @@ struct NumericToStringCastFunctor {
   using BuilderType = typename TypeTraits<O>::BuilderType;
   using FormatterType = StringFormatter<I>;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK(out->is_array());
     const ArrayData& input = *batch[0].array();
     ArrayData* output = out->mutable_array();
-    ctx->SetStatus(Convert(ctx, input, output));
+    return Convert(ctx, input, output);
   }
 
   static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData* output) {
@@ -94,33 +94,35 @@ struct Utf8Validator {
 };
 
 template <typename I, typename O>
-void CastBinaryToBinaryOffsets(KernelContext* ctx, const ArrayData& input,
-                               ArrayData* output) {
+Status CastBinaryToBinaryOffsets(KernelContext* ctx, const ArrayData& input,
+                                 ArrayData* output) {
   static_assert(std::is_same<I, O>::value, "Cast same-width offsets (no-op)");
+  return Status::OK();
 }
 
 // Upcast offsets
 template <>
-void CastBinaryToBinaryOffsets<int32_t, int64_t>(KernelContext* ctx,
-                                                 const ArrayData& input,
-                                                 ArrayData* output) {
+Status CastBinaryToBinaryOffsets<int32_t, int64_t>(KernelContext* ctx,
+                                                   const ArrayData& input,
+                                                   ArrayData* output) {
   using input_offset_type = int32_t;
   using output_offset_type = int64_t;
-  KERNEL_ASSIGN_OR_RAISE(
-      output->buffers[1], ctx,
+  ARROW_ASSIGN_OR_RAISE(
+      output->buffers[1],
       ctx->Allocate((output->length + output->offset + 1) * sizeof(output_offset_type)));
   memset(output->buffers[1]->mutable_data(), 0,
          output->offset * sizeof(output_offset_type));
   ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1),
                               output->GetMutableValues<output_offset_type>(1),
                               output->length + 1);
+  return Status::OK();
 }
 
 // Downcast offsets
 template <>
-void CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
-                                                 const ArrayData& input,
-                                                 ArrayData* output) {
+Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
+                                                   const ArrayData& input,
+                                                   ArrayData* output) {
   using input_offset_type = int64_t;
   using output_offset_type = int32_t;
 
@@ -130,22 +132,23 @@ void CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
 
   // Binary offsets are ascending, so it's enough to check the last one for overflow.
   if (input_offsets[input.length] > kMaxOffset) {
-    ctx->SetStatus(Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
-                                   output->type->ToString(), ": input array too large"));
+    return Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
+                           output->type->ToString(), ": input array too large");
   } else {
-    KERNEL_ASSIGN_OR_RAISE(output->buffers[1], ctx,
-                           ctx->Allocate((output->length + output->offset + 1) *
-                                         sizeof(output_offset_type)));
+    ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                          ctx->Allocate((output->length + output->offset + 1) *
+                                        sizeof(output_offset_type)));
     memset(output->buffers[1]->mutable_data(), 0,
            output->offset * sizeof(output_offset_type));
     ::arrow::internal::CastInts(input.GetValues<input_offset_type>(1),
                                 output->GetMutableValues<output_offset_type>(1),
                                 output->length + 1);
+    return Status::OK();
   }
 }
 
 template <typename O, typename I>
-void BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DCHECK(out->is_array());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArrayData& input = *batch[0].array();
@@ -155,17 +158,12 @@ void BinaryToBinaryCastExec(KernelContext* ctx, const ExecBatch& batch, Datum* o
 
     ArrayDataVisitor<I> visitor;
     Utf8Validator validator;
-    Status st = visitor.Visit(input, &validator);
-    if (!st.ok()) {
-      ctx->SetStatus(st);
-      return;
-    }
+    RETURN_NOT_OK(visitor.Visit(input, &validator));
   }
 
   // Start with a zero-copy cast, but change indices to expected size
-  ZeroCopyCastExec(ctx, batch, out);
-
-  CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>(
+  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
+  return CastBinaryToBinaryOffsets<typename I::offset_type, typename O::offset_type>(
       ctx, input, out->mutable_array());
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index d7d1faf7ae5..1a58fce7c74 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -39,8 +39,8 @@ constexpr int64_t kMillisecondsInDay = 86400000;
 // From one timestamp to another
 
 template <typename in_type, typename out_type>
-void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
-               const int64_t factor, const ArrayData& input, ArrayData* output) {
+Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
+                 const int64_t factor, const ArrayData& input, ArrayData* output) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   auto in_data = input.GetValues<in_type>(1);
   auto out_data = output->GetMutableValues<out_type>(1);
@@ -55,10 +55,10 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         out_data[i] = static_cast<out_type>(in_data[i] * factor);
       }
     } else {
-#define RAISE_OVERFLOW_CAST(VAL)                                                  \
-  ctx->SetStatus(Status::Invalid("Casting from ", input.type->ToString(), " to ", \
-                                 output->type->ToString(), " would result in ",   \
-                                 "out of bounds timestamp: ", VAL));
+#define RAISE_OVERFLOW_CAST(VAL)                                          \
+  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                         output->type->ToString(), " would result in ",   \
+                         "out of bounds timestamp: ", VAL);
 
       int64_t max_val = std::numeric_limits<int64_t>::max() / factor;
       int64_t min_val = std::numeric_limits<int64_t>::min() / factor;
@@ -67,7 +67,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         for (int64_t i = 0; i < input.length; i++) {
           if (bit_reader.IsSet() && (in_data[i] < min_val || in_data[i] > max_val)) {
             RAISE_OVERFLOW_CAST(in_data[i]);
-            break;
           }
           out_data[i] = static_cast<out_type>(in_data[i] * factor);
           bit_reader.Next();
@@ -76,7 +75,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         for (int64_t i = 0; i < input.length; i++) {
           if (in_data[i] < min_val || in_data[i] > max_val) {
             RAISE_OVERFLOW_CAST(in_data[i]);
-            break;
           }
           out_data[i] = static_cast<out_type>(in_data[i] * factor);
         }
@@ -90,9 +88,9 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
         out_data[i] = static_cast<out_type>(in_data[i] / factor);
       }
     } else {
-#define RAISE_INVALID_CAST(VAL)                                                   \
-  ctx->SetStatus(Status::Invalid("Casting from ", input.type->ToString(), " to ", \
-                                 output->type->ToString(), " would lose data: ", VAL));
+#define RAISE_INVALID_CAST(VAL)                                           \
+  return Status::Invalid("Casting from ", input.type->ToString(), " to ", \
+                         output->type->ToString(), " would lose data: ", VAL);
 
       if (input.null_count != 0) {
         BitmapReader bit_reader(input.buffers[0]->data(), input.offset, input.length);
@@ -100,7 +98,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
           out_data[i] = static_cast<out_type>(in_data[i] / factor);
           if (bit_reader.IsSet() && (out_data[i] * factor != in_data[i])) {
             RAISE_INVALID_CAST(in_data[i]);
-            break;
           }
           bit_reader.Next();
         }
@@ -109,7 +106,6 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
           out_data[i] = static_cast<out_type>(in_data[i] / factor);
           if (out_data[i] * factor != in_data[i]) {
             RAISE_INVALID_CAST(in_data[i]);
-            break;
           }
         }
       }
@@ -117,6 +113,8 @@ void ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
 #undef RAISE_INVALID_CAST
     }
   }
+
+  return Status::OK();
 }
 
 // <TimestampType, TimestampType> and <DurationType, DurationType>
@@ -125,7 +123,7 @@ struct CastFunctor<
     O, I,
     enable_if_t<(is_timestamp_type<O>::value && is_timestamp_type<I>::value) ||
                 (is_duration_type<O>::value && is_duration_type<I>::value)>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -139,13 +137,14 @@ struct CastFunctor<
     // lengths to make this zero copy in the future but we leave it for now
 
     auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
-    ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input, output);
+    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input,
+                                       output);
   }
 };
 
 template <>
 struct CastFunctor<Date32Type, TimestampType> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -161,13 +160,13 @@ struct CastFunctor<Date32Type, TimestampType> {
     };
 
     const int64_t factor = kTimestampToDateFactors[static_cast<int>(in_type.unit())];
-    ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, factor, input, output);
+    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, factor, input, output);
   }
 };
 
 template <>
 struct CastFunctor<Date64Type, TimestampType> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
@@ -176,10 +175,8 @@ struct CastFunctor<Date64Type, TimestampType> {
     const auto& in_type = checked_cast<const TimestampType&>(*input.type);
 
     auto conversion = util::GetTimestampConversion(in_type.unit(), TimeUnit::MILLI);
-    ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second, input, output);
-    if (!ctx->status().ok()) {
-      return;
-    }
+    RETURN_NOT_OK((ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
+                                               input, output)));
 
     // Ensure that intraday milliseconds have been zeroed out
     auto out_data = output->GetMutableValues<int64_t>(1);
@@ -191,9 +188,7 @@ struct CastFunctor<Date64Type, TimestampType> {
         const int64_t remainder = out_data[i] % kMillisecondsInDay;
         if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && bit_reader.IsSet() &&
                                 remainder > 0)) {
-          ctx->SetStatus(
-              Status::Invalid("Timestamp value had non-zero intraday milliseconds"));
-          break;
+          return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
         }
         out_data[i] -= remainder;
         bit_reader.Next();
@@ -202,13 +197,13 @@ struct CastFunctor<Date64Type, TimestampType> {
       for (int64_t i = 0; i < input.length; ++i) {
         const int64_t remainder = out_data[i] % kMillisecondsInDay;
         if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && remainder > 0)) {
-          ctx->SetStatus(
-              Status::Invalid("Timestamp value had non-zero intraday milliseconds"));
-          break;
+          return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
         }
         out_data[i] -= remainder;
       }
     }
+
+    return Status::OK();
   }
 };
 
@@ -220,7 +215,7 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
   using in_t = typename I::c_type;
   using out_t = typename O::c_type;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const ArrayData& input = *batch[0].array();
@@ -231,7 +226,8 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
     const auto& out_type = checked_cast<const O&>(*output->type);
     DCHECK_NE(in_type.unit(), out_type.unit()) << "Do not cast equal types";
     auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
-    ShiftTime<in_t, out_t>(ctx, conversion.first, conversion.second, input, output);
+    return ShiftTime<in_t, out_t>(ctx, conversion.first, conversion.second, input,
+                                  output);
   }
 };
 
@@ -240,21 +236,21 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
 
 template <>
 struct CastFunctor<Date64Type, Date32Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
-    ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, kMillisecondsInDay,
-                                *batch[0].array(), out->mutable_array());
+    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, kMillisecondsInDay,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
 template <>
 struct CastFunctor<Date32Type, Date64Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
-    ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, kMillisecondsInDay, *batch[0].array(),
-                                out->mutable_array());
+    return ShiftTime<int64_t, int32_t>(ctx, util::DIVIDE, kMillisecondsInDay,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
@@ -263,7 +259,7 @@ struct CastFunctor<Date32Type, Date64Type> {
 
 template <>
 struct CastFunctor<TimestampType, Date32Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
@@ -273,22 +269,22 @@ struct CastFunctor<TimestampType, Date32Type> {
 
     // multiply to achieve days -> unit
     conversion.second *= kMillisecondsInDay / 1000;
-    ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, conversion.second, *batch[0].array(),
-                                out->mutable_array());
+    return ShiftTime<int32_t, int64_t>(ctx, util::MULTIPLY, conversion.second,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
 template <>
 struct CastFunctor<TimestampType, Date64Type> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
 
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
 
     // date64 is ms since epoch
     auto conversion = util::GetTimestampConversion(TimeUnit::MILLI, out_type.unit());
-    ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
-                                *batch[0].array(), out->mutable_array());
+    return ShiftTime<int64_t, int64_t>(ctx, conversion.first, conversion.second,
+                                       *batch[0].array(), out->mutable_array());
   }
 };
 
@@ -297,11 +293,11 @@ struct CastFunctor<TimestampType, Date64Type> {
 
 struct ParseTimestamp {
   template <typename OutValue, typename Arg0Value>
-  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+  OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
     OutValue result = 0;
     if (ARROW_PREDICT_FALSE(!ParseValue(type, val.data(), val.size(), &result))) {
-      ctx->SetStatus(Status::Invalid("Failed to parse string: '", val,
-                                     "' as a scalar of type ", type.ToString()));
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            type.ToString());
     }
     return result;
   }
@@ -311,7 +307,7 @@ struct ParseTimestamp {
 
 template <typename I>
 struct CastFunctor<TimestampType, I, enable_if_t<is_base_binary_type<I>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& out_type = checked_cast<const TimestampType&>(*out->type());
     applicator::ScalarUnaryNotNullStateful<TimestampType, I, ParseTimestamp> kernel(
         ParseTimestamp{out_type});
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 58d3e6fc781..8da97ef2260 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -30,28 +30,28 @@ namespace {
 
 struct Equal {
   template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
     return left == right;
   }
 };
 
 struct NotEqual {
   template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
     return left != right;
   }
 };
 
 struct Greater {
   template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
     return left > right;
   }
 };
 
 struct GreaterEqual {
   template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right) {
+  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
     return left >= right;
   }
 };
diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
index 9624f88e68f..f52e0045470 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
@@ -43,7 +43,7 @@ template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
   using T = typename TypeTraits<Type>::CType;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& data = *batch[0].array();
     const Scalar& fill_value = *batch[1].scalar();
     ArrayData* output = out->mutable_array();
@@ -54,8 +54,8 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
 
     T value = UnboxScalar<Type>::Unbox(fill_value);
     if (data.MayHaveNulls() != 0 && fill_value.is_valid) {
-      KERNEL_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf, ctx,
-                             ctx->Allocate(data.length * sizeof(T)));
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                            ctx->Allocate(data.length * sizeof(T)));
 
       const uint8_t* is_valid = data.buffers[0]->data();
       const T* in_values = data.GetValues<T>(1);
@@ -83,6 +83,8 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
     } else {
       *output = data;
     }
+
+    return Status::OK();
   }
 };
 
@@ -90,15 +92,15 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
 
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& data = *batch[0].array();
     const Scalar& fill_value = *batch[1].scalar();
     ArrayData* output = out->mutable_array();
 
     bool value = UnboxScalar<BooleanType>::Unbox(fill_value);
     if (data.MayHaveNulls() != 0 && fill_value.is_valid) {
-      KERNEL_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf, ctx,
-                             ctx->AllocateBitmap(data.length));
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                            ctx->AllocateBitmap(data.length));
 
       const uint8_t* is_valid = data.buffers[0]->data();
       const uint8_t* data_bitmap = data.buffers[1]->data();
@@ -132,6 +134,8 @@ struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
     } else {
       *output = data;
     }
+
+    return Status::OK();
   }
 };
 
@@ -139,9 +143,10 @@ struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
 
 template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_null_type<Type>::value>> {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // Nothing preallocated, so we assign into the output
     *out->mutable_array() = *batch[0].array();
+    return Status::OK();
   }
 };
 
@@ -151,7 +156,7 @@ template <typename Type>
 struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
   using BuilderType = typename TypeTraits<Type>::BuilderType;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const ArrayData& input = *batch[0].array();
     const auto& fill_value_scalar =
         checked_cast<const BaseBinaryScalar&>(*batch[1].scalar());
@@ -166,22 +171,15 @@ struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
 
     if (null_count > 0 && fill_value_scalar.is_valid) {
       BuilderType builder(input.type, ctx->memory_pool());
-      KERNEL_RETURN_IF_ERROR(ctx, builder.ReserveData(input.buffers[2]->size() +
-                                                      fill_value.length() * null_count));
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Resize(input.length));
-
-      KERNEL_RETURN_IF_ERROR(ctx, VisitArrayDataInline<Type>(
-                                      input,
-                                      [&](util::string_view s) {
-                                        builder.UnsafeAppend(s);
-                                        return Status::OK();
-                                      },
-                                      [&]() {
-                                        builder.UnsafeAppend(fill_value);
-                                        return Status::OK();
-                                      }));
+      RETURN_NOT_OK(builder.ReserveData(input.buffers[2]->size() +
+                                        fill_value.length() * null_count));
+      RETURN_NOT_OK(builder.Resize(input.length));
+
+      VisitArrayDataInline<Type>(
+          input, [&](util::string_view s) { builder.UnsafeAppend(s); },
+          [&]() { builder.UnsafeAppend(fill_value); });
       std::shared_ptr<Array> string_array;
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Finish(&string_array));
+      RETURN_NOT_OK(builder.Finish(&string_array));
       *output = *string_array->data();
       // The builder does not match the logical type, due to
       // GenerateTypeAgnosticVarBinaryBase
@@ -189,6 +187,8 @@ struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
     } else {
       *output = input;
     }
+
+    return Status::OK();
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 8a6a69932c0..e4ab3f9b418 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -29,7 +29,7 @@ namespace internal {
 namespace {
 
 template <typename Type, typename offset_type = typename Type::offset_type>
-void ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   using ScalarType = typename TypeTraits<Type>::ScalarType;
   using OffsetScalarType = typename TypeTraits<Type>::OffsetScalarType;
 
@@ -51,6 +51,8 @@ void ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
           static_cast<offset_type>(arg0.value->length());
     }
   }
+
+  return Status::OK();
 }
 
 const FunctionDoc list_value_length_doc{
@@ -99,16 +101,15 @@ Result<ValueDescr> ProjectResolve(KernelContext* ctx,
   return ValueDescr{struct_(std::move(fields)), shape};
 }
 
-void ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  KERNEL_ASSIGN_OR_RAISE(auto descr, ctx, ProjectResolve(ctx, batch.GetDescriptors()));
+Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ARROW_ASSIGN_OR_RAISE(auto descr, ProjectResolve(ctx, batch.GetDescriptors()));
 
   for (int i = 0; i < batch.num_values(); ++i) {
     const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
     if (batch[i].null_count() > 0 && !field->nullable()) {
-      ctx->SetStatus(Status::Invalid("Output field ", field, " (#", i,
-                                     ") does not allow nulls but the corresponding "
-                                     "argument was not entirely valid."));
-      return;
+      return Status::Invalid("Output field ", field, " (#", i,
+                             ") does not allow nulls but the corresponding "
+                             "argument was not entirely valid.");
     }
   }
 
@@ -120,7 +121,7 @@ void ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
     *out =
         Datum(std::make_shared<StructScalar>(std::move(scalars), std::move(descr.type)));
-    return;
+    return Status::OK();
   }
 
   ArrayVector arrays(batch.num_values());
@@ -130,12 +131,12 @@ void ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       continue;
     }
 
-    KERNEL_ASSIGN_OR_RAISE(
-        arrays[i], ctx,
-        MakeArrayFromScalar(*batch[i].scalar(), batch.length, ctx->memory_pool()));
+    ARROW_ASSIGN_OR_RAISE(arrays[i], MakeArrayFromScalar(*batch[i].scalar(), batch.length,
+                                                         ctx->memory_pool()));
   }
 
   *out = std::make_shared<StructArray>(descr.type, batch.length, std::move(arrays));
+  return Status::OK();
 }
 
 const FunctionDoc project_doc{"Wrap Arrays into a StructArray",
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index ffc1e11a7be..2868b0c743f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -154,7 +154,7 @@ struct InitStateVisitor {
   // Handle Decimal128Type, FixedSizeBinaryType
   Status Visit(const FixedSizeBinaryType& type) { return Init<FixedSizeBinaryType>(); }
 
-  Status GetResult(std::unique_ptr<KernelState>* out) {
+  Result<std::unique_ptr<KernelState>> GetResult() {
     if (!options.value_set.type()->Equals(arg_type)) {
       ARROW_ASSIGN_OR_RAISE(
           options.value_set,
@@ -162,22 +162,18 @@ struct InitStateVisitor {
     }
 
     RETURN_NOT_OK(VisitTypeInline(*arg_type, this));
-    *out = std::move(result);
-    return Status::OK();
+    return std::move(result);
   }
 };
 
-std::unique_ptr<KernelState> InitSetLookup(KernelContext* ctx,
-                                           const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> InitSetLookup(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
   if (args.options == nullptr) {
-    ctx->SetStatus(Status::Invalid(
-        "Attempted to call a set lookup function without SetLookupOptions"));
-    return nullptr;
+    return Status::Invalid(
+        "Attempted to call a set lookup function without SetLookupOptions");
   }
 
-  std::unique_ptr<KernelState> result;
-  ctx->SetStatus(InitStateVisitor{ctx, args}.GetResult(&result));
-  return result;
+  return InitStateVisitor{ctx, args}.GetResult();
 }
 
 struct IndexInVisitor {
@@ -271,8 +267,8 @@ struct IndexInVisitor {
   }
 };
 
-void ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  KERNEL_RETURN_IF_ERROR(ctx, IndexInVisitor(ctx, *batch[0].array(), out).Execute());
+Status ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return IndexInVisitor(ctx, *batch[0].array(), out).Execute();
 }
 
 // ----------------------------------------------------------------------
@@ -351,8 +347,8 @@ struct IsInVisitor {
   Status Execute() { return VisitTypeInline(*data.type, this); }
 };
 
-void ExecIsIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  KERNEL_RETURN_IF_ERROR(ctx, IsInVisitor(ctx, *batch[0].array(), out).Execute());
+Status ExecIsIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return IsInVisitor(ctx, *batch[0].array(), out).Execute();
 }
 
 // Unary set lookup kernels available for the following input types
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index d5473749fe1..065c512c552 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -87,14 +87,14 @@ static inline bool IsAsciiCharacter(T character) {
 
 struct BinaryLength {
   template <typename OutValue, typename Arg0Value = util::string_view>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     return static_cast<OutValue>(val.size());
   }
 };
 
 struct Utf8Length {
   template <typename OutValue, typename Arg0Value = util::string_view>
-  static OutValue Call(KernelContext*, Arg0Value val) {
+  static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     auto str = reinterpret_cast<const uint8_t*>(val.data());
     auto strlen = val.size();
 
@@ -139,10 +139,10 @@ struct StringTransform {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
 
   static int64_t MaxCodeunits(offset_type input_ncodeunits) { return input_ncodeunits; }
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    Derived().Execute(ctx, batch, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Derived().Execute(ctx, batch, out);
   }
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       const ArrayData& input = *batch[0].array();
       ArrayType input_boxed(batch[0].array());
@@ -153,13 +153,11 @@ struct StringTransform {
 
       int64_t output_ncodeunits_max = Derived::MaxCodeunits(input_ncodeunits);
       if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-        ctx->SetStatus(Status::CapacityError(
-            "Result might not fit in a 32bit utf8 array, convert to large_utf8"));
-        return;
+        return Status::CapacityError(
+            "Result might not fit in a 32bit utf8 array, convert to large_utf8");
       }
 
-      KERNEL_ASSIGN_OR_RAISE(auto values_buffer, ctx,
-                             ctx->Allocate(output_ncodeunits_max));
+      ARROW_ASSIGN_OR_RAISE(auto values_buffer, ctx->Allocate(output_ncodeunits_max));
       output->buffers[2] = values_buffer;
 
       // String offsets are preallocated
@@ -175,16 +173,14 @@ struct StringTransform {
         if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
                 input_string, input_string_ncodeunits, output_str + output_ncodeunits,
                 &encoded_nbytes))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
-          return;
+          return Status::Invalid("Invalid UTF8 sequence in input");
         }
         output_ncodeunits += encoded_nbytes;
         output_string_offsets[i + 1] = output_ncodeunits;
       }
 
       // Trim the codepoint buffer, since we allocated too much
-      KERNEL_RETURN_IF_ERROR(
-          ctx, values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true));
+      RETURN_NOT_OK(values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true));
     } else {
       const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
       auto result = checked_pointer_cast<BaseBinaryScalar>(MakeNullScalar(out->type()));
@@ -194,25 +190,23 @@ struct StringTransform {
 
         int64_t output_ncodeunits_max = Derived::MaxCodeunits(data_nbytes);
         if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-          ctx->SetStatus(Status::CapacityError(
-              "Result might not fit in a 32bit utf8 array, convert to large_utf8"));
-          return;
+          return Status::CapacityError(
+              "Result might not fit in a 32bit utf8 array, convert to large_utf8");
         }
-        KERNEL_ASSIGN_OR_RAISE(auto value_buffer, ctx,
-                               ctx->Allocate(output_ncodeunits_max));
+        ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
         result->value = value_buffer;
         offset_type encoded_nbytes = 0;
         if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
                 input.value->data(), data_nbytes, value_buffer->mutable_data(),
                 &encoded_nbytes))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
-          return;
+          return Status::Invalid("Invalid UTF8 sequence in input");
         }
-        KERNEL_RETURN_IF_ERROR(
-            ctx, value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true));
+        RETURN_NOT_OK(value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true));
       }
       out->value = result;
     }
+
+    return Status::OK();
   }
 };
 
@@ -244,9 +238,9 @@ struct StringTransformCodepoint : StringTransform<Type, Derived> {
     // two code units (even) can grow to 3 code units.
     return static_cast<int64_t>(input_ncodeunits) * 3 / 2;
   }
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     EnsureLookupTablesFilled();
-    Base::Execute(ctx, batch, out);
+    return Base::Execute(ctx, batch, out);
   }
 };
 
@@ -293,8 +287,8 @@ Status GetShiftedOffsets(KernelContext* ctx, const Buffer& input_buffer, int64_t
 // Apply `transform` to input character data- this function cannot change the
 // length
 template <typename Type>
-void StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
-                         TransformFunc transform, Datum* out) {
+Status StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
+                           TransformFunc transform, Datum* out) {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
   using offset_type = typename Type::offset_type;
 
@@ -310,14 +304,13 @@ void StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
     } else {
       DCHECK(input.buffers[1]);
       // We must allocate new space for the offsets and shift the existing offsets
-      KERNEL_RETURN_IF_ERROR(
-          ctx, GetShiftedOffsets<offset_type>(ctx, *input.buffers[1], input.offset,
-                                              input.length, &out_arr->buffers[1]));
+      RETURN_NOT_OK(GetShiftedOffsets<offset_type>(ctx, *input.buffers[1], input.offset,
+                                                   input.length, &out_arr->buffers[1]));
     }
 
     // Allocate space for output data
     int64_t data_nbytes = input_boxed.total_values_length();
-    KERNEL_RETURN_IF_ERROR(ctx, ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
+    RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
     if (input.length > 0) {
       transform(input.buffers[2]->data() + input_boxed.value_offset(0), data_nbytes,
                 out_arr->buffers[2]->mutable_data());
@@ -328,11 +321,13 @@ void StringDataTransform(KernelContext* ctx, const ExecBatch& batch,
     if (input.is_valid) {
       result->is_valid = true;
       int64_t data_nbytes = input.value->size();
-      KERNEL_RETURN_IF_ERROR(ctx, ctx->Allocate(data_nbytes).Value(&result->value));
+      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&result->value));
       transform(input.value->data(), data_nbytes, result->value->mutable_data());
     }
     out->value = result;
   }
+
+  return Status::OK();
 }
 
 void TransformAsciiUpper(const uint8_t* input, int64_t length, uint8_t* output) {
@@ -341,8 +336,8 @@ void TransformAsciiUpper(const uint8_t* input, int64_t length, uint8_t* output)
 
 template <typename Type>
 struct AsciiUpper {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    StringDataTransform<Type>(ctx, batch, TransformAsciiUpper, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiUpper, out);
   }
 };
 
@@ -352,8 +347,8 @@ void TransformAsciiLower(const uint8_t* input, int64_t length, uint8_t* output)
 
 template <typename Type>
 struct AsciiLower {
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    StringDataTransform<Type>(ctx, batch, TransformAsciiLower, out);
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiLower, out);
   }
 };
 
@@ -397,10 +392,9 @@ using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
 template <typename Type, typename Matcher>
 struct MatchSubstring {
   using offset_type = typename Type::offset_type;
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // TODO Cache matcher across invocations (for regex compilation)
-    Matcher matcher(ctx, MatchSubstringState::Get(ctx));
-    if (ctx->HasError()) return;
+    ARROW_ASSIGN_OR_RAISE(auto matcher, Matcher::Make(MatchSubstringState::Get(ctx)));
     StringBoolTransform<Type>(
         ctx, batch,
         [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length,
@@ -410,7 +404,7 @@ struct MatchSubstring {
           for (int64_t i = 0; i < length; ++i) {
             const char* current_data = reinterpret_cast<const char*>(data + offsets[i]);
             int64_t current_length = offsets[i + 1] - offsets[i];
-            if (matcher.Match(util::string_view(current_data, current_length))) {
+            if (matcher->Match(util::string_view(current_data, current_length))) {
               bitmap_writer.Set();
             }
             bitmap_writer.Next();
@@ -418,6 +412,7 @@ struct MatchSubstring {
           bitmap_writer.Finish();
         },
         out);
+    return Status::OK();
   }
 };
 
@@ -426,7 +421,12 @@ struct PlainSubstringMatcher {
   const MatchSubstringOptions& options_;
   std::vector<int64_t> prefix_table;
 
-  PlainSubstringMatcher(KernelContext* ctx, const MatchSubstringOptions& options)
+  static Result<std::unique_ptr<PlainSubstringMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    return ::arrow::internal::make_unique<PlainSubstringMatcher>(options);
+  }
+
+  explicit PlainSubstringMatcher(const MatchSubstringOptions& options)
       : options_(options) {
     // Phase 1: Build the prefix table
     const auto pattern_length = options_.pattern.size();
@@ -444,7 +444,7 @@ struct PlainSubstringMatcher {
     }
   }
 
-  bool Match(util::string_view current) {
+  bool Match(util::string_view current) const {
     // Phase 2: Find the prefix in the data
     const auto pattern_length = options_.pattern.size();
     int64_t pattern_pos = 0;
@@ -472,12 +472,17 @@ struct RegexSubstringMatcher {
   const MatchSubstringOptions& options_;
   const RE2 regex_match_;
 
-  RegexSubstringMatcher(KernelContext* ctx, const MatchSubstringOptions& options)
-      : options_(options), regex_match_(options_.pattern, RE2::Quiet) {
-    KERNEL_RETURN_IF_ERROR(ctx, RegexStatus(regex_match_));
+  static Result<std::unique_ptr<RegexSubstringMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    auto matcher = ::arrow::internal::make_unique<RegexSubstringMatcher>(options);
+    RETURN_NOT_OK(RegexStatus(matcher->regex_match_));
+    return std::move(matcher);
   }
 
-  bool Match(util::string_view current) {
+  explicit RegexSubstringMatcher(const MatchSubstringOptions& options)
+      : options_(options), regex_match_(options_.pattern, RE2::Quiet) {}
+
+  bool Match(util::string_view current) const {
     auto piece = re2::StringPiece(current.data(), current.length());
     return re2::RE2::PartialMatch(piece, regex_match_);
   }
@@ -661,8 +666,8 @@ static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
 
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateUnicode {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status* st) {
     if (allow_empty && input_string_ncodeunits == 0) {
       return true;
     }
@@ -673,7 +678,7 @@ struct CharacterPredicateUnicode {
               any |= Derived::PredicateCharacterAny(codepoint);
               return Derived::PredicateCharacterAll(codepoint);
             }))) {
-      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      *st = Status::Invalid("Invalid UTF8 sequence in input");
       return false;
     }
     return all & any;
@@ -686,8 +691,8 @@ struct CharacterPredicateUnicode {
 
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateAscii {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status*) {
     if (allow_empty && input_string_ncodeunits == 0) {
       return true;
     }
@@ -764,8 +769,8 @@ struct IsNumericUnicode : CharacterPredicateUnicode<IsNumericUnicode> {
 #endif
 
 struct IsAscii {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_nascii_characters) {
+  static bool Call(KernelContext*, const uint8_t* input,
+                   size_t input_string_nascii_characters, Status*) {
     return std::all_of(input, input + input_string_nascii_characters,
                        IsAsciiCharacter<uint8_t>);
   }
@@ -826,8 +831,8 @@ struct IsSpaceAscii : CharacterPredicateAscii<IsSpaceAscii> {
 
 #ifdef ARROW_WITH_UTF8PROC
 struct IsTitleUnicode {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status* st) {
     // rules:
     // * 1: lower case follows cased
     // * 2: upper case follows uncased
@@ -854,7 +859,7 @@ struct IsTitleUnicode {
                                  return true;
                                });
     if (!ARROW_PREDICT_TRUE(status)) {
-      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      *st = Status::Invalid("Invalid UTF8 sequence in input");
       return false;
     }
     return rules_1_and_2 & rule_3;
@@ -863,8 +868,8 @@ struct IsTitleUnicode {
 #endif
 
 struct IsTitleAscii {
-  static bool Call(KernelContext* ctx, const uint8_t* input,
-                   size_t input_string_ncodeunits) {
+  static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
+                   Status*) {
     // rules:
     // * 1: lower case follows cased
     // * 2: upper case follows uncased
@@ -1001,15 +1006,15 @@ struct SplitBaseTransform {
 
   static Status CheckOptions(const Options& options) { return Status::OK(); }
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     Options options = State::Get(ctx);
     Derived splitter(options);  // we make an instance to reuse the parts vectors
-    splitter.Split(ctx, batch, out);
+    return splitter.Split(ctx, batch, out);
   }
 
-  void Split(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Split(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     EnsureLookupTablesFilled();  // only needed for unicode
-    KERNEL_RETURN_IF_ERROR(ctx, Derived::CheckOptions(options));
+    RETURN_NOT_OK(Derived::CheckOptions(options));
 
     if (batch[0].kind() == Datum::ARRAY) {
       const ArrayData& input = *batch[0].array();
@@ -1017,9 +1022,9 @@ struct SplitBaseTransform {
 
       BuilderType builder(input.type, ctx->memory_pool());
       // a slight overestimate of the data needed
-      KERNEL_RETURN_IF_ERROR(ctx, builder.ReserveData(input_boxed.total_values_length()));
+      RETURN_NOT_OK(builder.ReserveData(input_boxed.total_values_length()));
       // the minimum amount of strings needed
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Resize(input.length));
+      RETURN_NOT_OK(builder.Resize(input.length));
 
       ArrayData* output_list = out->mutable_array();
       // list offsets were preallocated
@@ -1027,27 +1032,25 @@ struct SplitBaseTransform {
       DCHECK_NE(list_offsets, nullptr);
       // initial value
       *list_offsets++ = 0;
-      KERNEL_RETURN_IF_ERROR(
-          ctx,
-          VisitArrayDataInline<Type>(
-              input,
-              [&](util::string_view s) {
-                RETURN_NOT_OK(Split(s, &builder));
-                if (ARROW_PREDICT_FALSE(builder.length() >
-                                        std::numeric_limits<list_offset_type>::max())) {
-                  return Status::CapacityError("List offset does not fit into 32 bit");
-                }
-                *list_offsets++ = static_cast<list_offset_type>(builder.length());
-                return Status::OK();
-              },
-              [&]() {
-                // null value is already taken from input
-                *list_offsets++ = static_cast<list_offset_type>(builder.length());
-                return Status::OK();
-              }));
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(
+          input,
+          [&](util::string_view s) {
+            RETURN_NOT_OK(Split(s, &builder));
+            if (ARROW_PREDICT_FALSE(builder.length() >
+                                    std::numeric_limits<list_offset_type>::max())) {
+              return Status::CapacityError("List offset does not fit into 32 bit");
+            }
+            *list_offsets++ = static_cast<list_offset_type>(builder.length());
+            return Status::OK();
+          },
+          [&]() {
+            // null value is already taken from input
+            *list_offsets++ = static_cast<list_offset_type>(builder.length());
+            return Status::OK();
+          }));
       // assign list child data
       std::shared_ptr<Array> string_array;
-      KERNEL_RETURN_IF_ERROR(ctx, builder.Finish(&string_array));
+      RETURN_NOT_OK(builder.Finish(&string_array));
       output_list->child_data.push_back(string_array->data());
 
     } else {
@@ -1057,11 +1060,13 @@ struct SplitBaseTransform {
         result->is_valid = true;
         BuilderType builder(input.type, ctx->memory_pool());
         util::string_view s(*input.value);
-        KERNEL_RETURN_IF_ERROR(ctx, Split(s, &builder));
-        KERNEL_RETURN_IF_ERROR(ctx, builder.Finish(&result->value));
+        RETURN_NOT_OK(Split(s, &builder));
+        RETURN_NOT_OK(builder.Finish(&result->value));
       }
       out->value = result;
     }
+
+    return Status::OK();
   }
 };
 
@@ -1314,64 +1319,68 @@ struct ReplaceSubString {
   using OffsetBuilder = TypedBufferBuilder<offset_type>;
   using State = OptionsWrapper<ReplaceSubstringOptions>;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // TODO Cache replacer across invocations (for regex compilation)
-    Replacer replacer{ctx, State::Get(ctx)};
-    if (!ctx->HasError()) {
-      Replace(ctx, batch, &replacer, out);
-    }
+    ARROW_ASSIGN_OR_RAISE(auto replacer, Replacer::Make(State::Get(ctx)));
+    return Replace(ctx, batch, *replacer, out);
   }
 
-  static void Replace(KernelContext* ctx, const ExecBatch& batch, Replacer* replacer,
-                      Datum* out) {
+  static Status Replace(KernelContext* ctx, const ExecBatch& batch,
+                        const Replacer& replacer, Datum* out) {
     ValueDataBuilder value_data_builder(ctx->memory_pool());
     OffsetBuilder offset_builder(ctx->memory_pool());
 
     if (batch[0].kind() == Datum::ARRAY) {
       // We already know how many strings we have, so we can use Reserve/UnsafeAppend
-      KERNEL_RETURN_IF_ERROR(ctx, offset_builder.Reserve(batch[0].array()->length));
+      RETURN_NOT_OK(offset_builder.Reserve(batch[0].array()->length));
       offset_builder.UnsafeAppend(0);  // offsets start at 0
 
       const ArrayData& input = *batch[0].array();
-      KERNEL_RETURN_IF_ERROR(
-          ctx, VisitArrayDataInline<Type>(
-                   input,
-                   [&](util::string_view s) {
-                     RETURN_NOT_OK(replacer->ReplaceString(s, &value_data_builder));
-                     offset_builder.UnsafeAppend(
-                         static_cast<offset_type>(value_data_builder.length()));
-                     return Status::OK();
-                   },
-                   [&]() {
-                     // offset for null value
-                     offset_builder.UnsafeAppend(
-                         static_cast<offset_type>(value_data_builder.length()));
-                     return Status::OK();
-                   }));
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(
+          input,
+          [&](util::string_view s) {
+            RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+            offset_builder.UnsafeAppend(
+                static_cast<offset_type>(value_data_builder.length()));
+            return Status::OK();
+          },
+          [&]() {
+            // offset for null value
+            offset_builder.UnsafeAppend(
+                static_cast<offset_type>(value_data_builder.length()));
+            return Status::OK();
+          }));
       ArrayData* output = out->mutable_array();
-      KERNEL_RETURN_IF_ERROR(ctx, value_data_builder.Finish(&output->buffers[2]));
-      KERNEL_RETURN_IF_ERROR(ctx, offset_builder.Finish(&output->buffers[1]));
+      RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
+      RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1]));
     } else {
       const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
       auto result = std::make_shared<ScalarType>();
       if (input.is_valid) {
         util::string_view s = static_cast<util::string_view>(*input.value);
-        KERNEL_RETURN_IF_ERROR(ctx, replacer->ReplaceString(s, &value_data_builder));
-        KERNEL_RETURN_IF_ERROR(ctx, value_data_builder.Finish(&result->value));
+        RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+        RETURN_NOT_OK(value_data_builder.Finish(&result->value));
         result->is_valid = true;
       }
       out->value = result;
     }
+
+    return Status::OK();
   }
 };
 
 struct PlainSubStringReplacer {
   const ReplaceSubstringOptions& options_;
 
-  PlainSubStringReplacer(KernelContext* ctx, const ReplaceSubstringOptions& options)
+  static Result<std::unique_ptr<PlainSubStringReplacer>> Make(
+      const ReplaceSubstringOptions& options) {
+    return arrow::internal::make_unique<PlainSubStringReplacer>(options);
+  }
+
+  explicit PlainSubStringReplacer(const ReplaceSubstringOptions& options)
       : options_(options) {}
 
-  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) {
+  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
     const char* i = s.begin();
     const char* end = s.end();
     int64_t max_replacements = options_.max_replacements;
@@ -1396,9 +1405,8 @@ struct PlainSubStringReplacer {
       }
     }
     // if we exited early due to max_replacements, add the trailing part
-    RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
-                                  static_cast<int64_t>(end - i)));
-    return Status::OK();
+    return builder->Append(reinterpret_cast<const uint8_t*>(i),
+                           static_cast<int64_t>(end - i));
   }
 };
 
@@ -1408,31 +1416,38 @@ struct RegexSubStringReplacer {
   const RE2 regex_find_;
   const RE2 regex_replacement_;
 
+  static Result<std::unique_ptr<RegexSubStringReplacer>> Make(
+      const ReplaceSubstringOptions& options) {
+    auto replacer = arrow::internal::make_unique<RegexSubStringReplacer>(options);
+
+    RETURN_NOT_OK(RegexStatus(replacer->regex_find_));
+    RETURN_NOT_OK(RegexStatus(replacer->regex_replacement_));
+
+    std::string replacement_error;
+    if (!replacer->regex_replacement_.CheckRewriteString(replacer->options_.replacement,
+                                                         &replacement_error)) {
+      return Status::Invalid("Invalid replacement string: ",
+                             std::move(replacement_error));
+    }
+
+    return std::move(replacer);
+  }
+
   // Using RE2::FindAndConsume we can only find the pattern if it is a group, therefore
   // we have 2 regexes, one with () around it, one without.
-  RegexSubStringReplacer(KernelContext* ctx, const ReplaceSubstringOptions& options)
+  explicit RegexSubStringReplacer(const ReplaceSubstringOptions& options)
       : options_(options),
         regex_find_("(" + options_.pattern + ")", RE2::Quiet),
-        regex_replacement_(options_.pattern, RE2::Quiet) {
-    KERNEL_RETURN_IF_ERROR(ctx, RegexStatus(regex_find_));
-    KERNEL_RETURN_IF_ERROR(ctx, RegexStatus(regex_replacement_));
-    std::string replacement_error;
-    if (!regex_replacement_.CheckRewriteString(options_.replacement,
-                                               &replacement_error)) {
-      ctx->SetStatus(
-          Status::Invalid("Invalid replacement string: ", std::move(replacement_error)));
-    }
-  }
+        regex_replacement_(options_.pattern, RE2::Quiet) {}
 
-  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) {
+  Status ReplaceString(util::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
     re2::StringPiece replacement(options_.replacement);
 
     if (options_.max_replacements == -1) {
       std::string s_copy(s.to_string());
       re2::RE2::GlobalReplace(&s_copy, regex_replacement_, replacement);
-      RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()),
-                                    s_copy.length()));
-      return Status::OK();
+      return builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()),
+                             s_copy.length());
     }
 
     // Since RE2 does not have the concept of max_replacements, we have to do some work
@@ -1467,9 +1482,8 @@ struct RegexSubStringReplacer {
       }
     }
     // If we exited early due to max_replacements, add the trailing part
-    RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
-                                  static_cast<int64_t>(end - i)));
-    return Status::OK();
+    return builder->Append(reinterpret_cast<const uint8_t*>(i),
+                           static_cast<int64_t>(end - i));
   }
 };
 #endif
@@ -1598,21 +1612,20 @@ struct ExtractRegex : public ExtractRegexBase {
 
   using ExtractRegexBase::ExtractRegexBase;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     ExtractRegexOptions options = State::Get(ctx);
-    KERNEL_ASSIGN_OR_RAISE(auto data, ctx, ExtractRegexData::Make(options));
-    ExtractRegex{data}.Extract(ctx, batch, out);
+    ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
+    return ExtractRegex{data}.Extract(ctx, batch, out);
   }
 
-  void Extract(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    KERNEL_ASSIGN_OR_RAISE(auto descr, ctx,
-                           data.ResolveOutputType(batch.GetDescriptors()));
+  Status Extract(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(auto descr, data.ResolveOutputType(batch.GetDescriptors()));
     DCHECK_NE(descr.type, nullptr);
     const auto& type = descr.type;
 
     if (batch[0].kind() == Datum::ARRAY) {
       std::unique_ptr<ArrayBuilder> array_builder;
-      KERNEL_RETURN_IF_ERROR(ctx, MakeBuilder(ctx->memory_pool(), type, &array_builder));
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
       StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
 
       std::vector<BuilderType*> field_builders;
@@ -1639,11 +1652,10 @@ struct ExtractRegex : public ExtractRegexBase {
         }
       };
       const ArrayData& input = *batch[0].array();
-      KERNEL_RETURN_IF_ERROR(ctx,
-                             VisitArrayDataInline<Type>(input, visit_value, visit_null));
+      RETURN_NOT_OK(VisitArrayDataInline<Type>(input, visit_value, visit_null));
 
       std::shared_ptr<Array> out_array;
-      KERNEL_RETURN_IF_ERROR(ctx, struct_builder->Finish(&out_array));
+      RETURN_NOT_OK(struct_builder->Finish(&out_array));
       *out = std::move(out_array);
     } else {
       const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
@@ -1660,6 +1672,8 @@ struct ExtractRegex : public ExtractRegexBase {
       }
       out->value = std::move(result);
     }
+
+    return Status::OK();
   }
 };
 
@@ -1707,12 +1721,11 @@ struct ParseStrptime {
       : parser(TimestampParser::MakeStrptime(options.format)), unit(options.unit) {}
 
   template <typename... Ignored>
-  int64_t Call(KernelContext* ctx, util::string_view val) const {
+  int64_t Call(KernelContext*, util::string_view val, Status* st) const {
     int64_t result = 0;
     if (!(*parser)(val.data(), val.size(), unit, &result)) {
-      ctx->SetStatus(Status::Invalid("Failed to parse string: '", val,
-                                     "' as a scalar of type ",
-                                     TimestampType(unit).ToString()));
+      *st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
+                            TimestampType(unit).ToString());
     }
     return result;
   }
@@ -1722,7 +1735,7 @@ struct ParseStrptime {
 };
 
 template <typename InputType>
-void StrptimeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status StrptimeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   applicator::ScalarUnaryNotNullStateful<TimestampType, InputType, ParseStrptime> kernel{
       ParseStrptime(StrptimeState::Get(ctx))};
   return kernel.Exec(ctx, batch, out);
@@ -1764,9 +1777,9 @@ struct UTF8TrimWhitespaceBase : StringTransform<Type, Derived> {
     *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
     return true;
   }
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     EnsureLookupTablesFilled();
-    Base::Execute(ctx, batch, out);
+    return Base::Execute(ctx, batch, out);
   }
 };
 
@@ -1785,6 +1798,8 @@ struct UTF8RTrimWhitespace
 struct TrimStateUTF8 {
   TrimOptions options_;
   std::vector<bool> codepoints_;
+  Status status_ = Status::OK();
+
   explicit TrimStateUTF8(KernelContext* ctx, TrimOptions options)
       : options_(std::move(options)) {
     if (!ARROW_PREDICT_TRUE(
@@ -1793,7 +1808,7 @@ struct TrimStateUTF8 {
                   std::max(c + 1, static_cast<uint32_t>(codepoints_.size())));
               codepoints_.at(c) = true;
             }))) {
-      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      status_ = Status::Invalid("Invalid UTF8 sequence in input");
     }
   }
 };
@@ -1807,14 +1822,15 @@ struct UTF8TrimBase : StringTransform<Type, Derived> {
 
   explicit UTF8TrimBase(TrimStateUTF8 state) : state_(std::move(state)) {}
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     TrimStateUTF8 state = State::Get(ctx);
-    Derived(state).Execute(ctx, batch, out);
+    RETURN_NOT_OK(state.status_);
+    return Derived(state).Execute(ctx, batch, out);
   }
 
-  void Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     EnsureLookupTablesFilled();
-    Base::Execute(ctx, batch, out);
+    return Base::Execute(ctx, batch, out);
   }
 
   bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
@@ -1843,6 +1859,7 @@ struct UTF8TrimBase : StringTransform<Type, Derived> {
     return true;
   }
 };
+
 template <typename Type>
 struct UTF8Trim : UTF8TrimBase<Type, true, true, UTF8Trim<Type>> {
   using Base = UTF8TrimBase<Type, true, true, UTF8Trim<Type>>;
@@ -1911,9 +1928,9 @@ struct AsciiTrimBase : StringTransform<Type, Derived> {
                   [&](char c) { characters_[static_cast<unsigned char>(c)] = true; });
   }
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     TrimOptions options = State::Get(ctx);
-    Derived(options).Execute(ctx, batch, out);
+    return Derived(options).Execute(ctx, batch, out);
   }
 
   bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
@@ -2155,11 +2172,15 @@ void MakeUnaryStringUTF8TransformKernel(std::string name, FunctionRegistry* regi
 
 #endif
 
-using StringPredicate = std::function<bool(KernelContext*, const uint8_t*, size_t)>;
+// NOTE: Predicate should only populate 'status' with errors,
+//       leave it unmodified to indicate Status::OK()
+using StringPredicate =
+    std::function<bool(KernelContext*, const uint8_t*, size_t, Status*)>;
 
 template <typename Type>
-void ApplyPredicate(KernelContext* ctx, const ExecBatch& batch, StringPredicate predicate,
-                    Datum* out) {
+Status ApplyPredicate(KernelContext* ctx, const ExecBatch& batch,
+                      StringPredicate predicate, Datum* out) {
+  Status st = Status::OK();
   EnsureLookupTablesFilled();
   if (batch[0].kind() == Datum::ARRAY) {
     const ArrayData& input = *batch[0].array();
@@ -2169,20 +2190,21 @@ void ApplyPredicate(KernelContext* ctx, const ExecBatch& batch, StringPredicate
         out_arr->buffers[1]->mutable_data(), out_arr->offset, input.length,
         [&]() -> bool {
           util::string_view val = input_it();
-          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size());
+          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size(),
+                           &st);
         });
   } else {
     const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
     if (input.is_valid) {
-      bool boolean_result =
-          predicate(ctx, input.value->data(), static_cast<size_t>(input.value->size()));
-      if (!ctx->status().ok()) {
-        // UTF decoding can lead to issues
-        return;
+      bool boolean_result = predicate(ctx, input.value->data(),
+                                      static_cast<size_t>(input.value->size()), &st);
+      // UTF decoding can lead to issues
+      if (st.ok()) {
+        out->value = std::make_shared<BooleanScalar>(boolean_result);
       }
-      out->value = std::make_shared<BooleanScalar>(boolean_result);
     }
   }
+  return st;
 }
 
 template <typename Predicate>
@@ -2190,10 +2212,10 @@ void AddUnaryStringPredicate(std::string name, FunctionRegistry* registry,
                              const FunctionDoc* doc) {
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
   auto exec_32 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out);
+    return ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out);
   };
   auto exec_64 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out);
+    return ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out);
   };
   DCHECK_OK(func->AddKernel({utf8()}, boolean(), std::move(exec_32)));
   DCHECK_OK(func->AddKernel({large_utf8()}, boolean(), std::move(exec_64)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index 1d399f322bf..ebb3dca0d1e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -32,11 +32,12 @@ namespace internal {
 namespace {
 
 struct IsValidOperator {
-  static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = in.is_valid;
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     DCHECK_EQ(out->offset, 0);
     DCHECK_LE(out->length, arr.length);
     if (arr.MayHaveNulls()) {
@@ -48,37 +49,40 @@ struct IsValidOperator {
           arr.offset == 0 ? arr.buffers[0]
                           : SliceBuffer(arr.buffers[0], arr.offset / 8,
                                         BitUtil::BytesForBits(out->length + out->offset));
-      return;
+      return Status::OK();
     }
 
     // Input has no nulls => output is entirely true.
-    KERNEL_ASSIGN_OR_RAISE(out->buffers[1], ctx,
-                           ctx->AllocateBitmap(out->length + out->offset));
+    ARROW_ASSIGN_OR_RAISE(out->buffers[1],
+                          ctx->AllocateBitmap(out->length + out->offset));
     BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, true);
+    return Status::OK();
   }
 };
 
 struct IsNullOperator {
-  static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
+    return Status::OK();
   }
 
-  static void Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
+  static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
     if (arr.MayHaveNulls()) {
       // Input has nulls => output is the inverted null (validity) bitmap.
       InvertBitmap(arr.buffers[0]->data(), arr.offset, arr.length,
                    out->buffers[1]->mutable_data(), out->offset);
-      return;
+    } else {
+      // Input has no nulls => output is entirely false.
+      BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length,
+                         false);
     }
-
-    // Input has no nulls => output is entirely false.
-    BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, false);
+    return Status::OK();
   }
 };
 
 struct IsNanOperator {
   template <typename OutType, typename InType>
-  static constexpr OutType Call(KernelContext*, const InType& value) {
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
     return std::isnan(value);
   }
 };
@@ -116,7 +120,7 @@ std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
   return func;
 }
 
-void IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Datum& arg0 = batch[0];
   if (arg0.type()->id() == Type::NA) {
     auto false_value = std::make_shared<BooleanScalar>(false);
@@ -124,17 +128,17 @@ void IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       out->value = false_value;
     } else {
       std::shared_ptr<Array> false_values;
-      KERNEL_RETURN_IF_ERROR(
-          ctx, MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
-                   .Value(&false_values));
+      RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
+                        .Value(&false_values));
       out->value = false_values->data();
     }
+    return Status::OK();
   } else {
-    applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
+    return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
   }
 }
 
-void IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   const Datum& arg0 = batch[0];
   if (arg0.type()->id() == Type::NA) {
     if (arg0.kind() == Datum::SCALAR) {
@@ -145,8 +149,9 @@ void IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       BitUtil::SetBitsTo(out_arr->buffers[1]->mutable_data(), out_arr->offset,
                          out_arr->length, true);
     }
+    return Status::OK();
   } else {
-    applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
+    return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/util_internal.cc b/cpp/src/arrow/compute/kernels/util_internal.cc
index 1656ed2aaf3..846fa26baf2 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/util_internal.cc
@@ -59,24 +59,21 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr) {
 
 ArrayKernelExec TrivialScalarUnaryAsArraysExec(ArrayKernelExec exec,
                                                NullHandling::type null_handling) {
-  return [=](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  return [=](KernelContext* ctx, const ExecBatch& batch, Datum* out) -> Status {
     if (out->is_array()) {
       return exec(ctx, batch, out);
     }
 
     if (null_handling == NullHandling::INTERSECTION && !batch[0].scalar()->is_valid) {
       out->scalar()->is_valid = false;
-      return;
+      return Status::OK();
     }
 
-    KERNEL_ASSIGN_OR_RAISE(Datum array_in, ctx,
-                           MakeArrayFromScalar(*batch[0].scalar(), 1));
-
-    KERNEL_ASSIGN_OR_RAISE(Datum array_out, ctx, MakeArrayFromScalar(*out->scalar(), 1));
-
-    exec(ctx, ExecBatch{{std::move(array_in)}, 1}, &array_out);
-
-    KERNEL_ASSIGN_OR_RAISE(*out, ctx, array_out.make_array()->GetScalar(0));
+    ARROW_ASSIGN_OR_RAISE(Datum array_in, MakeArrayFromScalar(*batch[0].scalar(), 1));
+    ARROW_ASSIGN_OR_RAISE(Datum array_out, MakeArrayFromScalar(*out->scalar(), 1));
+    RETURN_NOT_OK(exec(ctx, ExecBatch{{std::move(array_in)}, 1}, &array_out));
+    ARROW_ASSIGN_OR_RAISE(*out, array_out.make_array()->GetScalar(0));
+    return Status::OK();
   };
 }
 
diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index 0ed15702832..a68e78130f2 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -513,17 +513,19 @@ struct HashKernelTraits<Type, Action, enable_if_has_string_view<Type>> {
 };
 
 template <typename Type, typename Action>
-std::unique_ptr<HashKernel> HashInitImpl(KernelContext* ctx, const KernelInitArgs& args) {
+Result<std::unique_ptr<HashKernel>> HashInitImpl(KernelContext* ctx,
+                                                 const KernelInitArgs& args) {
   using HashKernelType = typename HashKernelTraits<Type, Action>::HashKernel;
   auto result = ::arrow::internal::make_unique<HashKernelType>(
       args.inputs[0].type, args.options, ctx->memory_pool());
-  ctx->SetStatus(result->Reset());
+  RETURN_NOT_OK(result->Reset());
   return std::move(result);
 }
 
 template <typename Type, typename Action>
-std::unique_ptr<KernelState> HashInit(KernelContext* ctx, const KernelInitArgs& args) {
-  return std::move(HashInitImpl<Type, Action>(ctx, args));
+Result<std::unique_ptr<KernelState>> HashInit(KernelContext* ctx,
+                                              const KernelInitArgs& args) {
+  return HashInitImpl<Type, Action>(ctx, args);
 }
 
 template <typename Action>
@@ -574,10 +576,10 @@ KernelInit GetHashInit(Type::type type_id) {
 using DictionaryEncodeState = OptionsWrapper<DictionaryEncodeOptions>;
 
 template <typename Action>
-std::unique_ptr<KernelState> DictionaryHashInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> DictionaryHashInit(KernelContext* ctx,
+                                                        const KernelInitArgs& args) {
   const auto& dict_type = checked_cast<const DictionaryType&>(*args.inputs[0].type);
-  std::unique_ptr<HashKernel> indices_hasher;
+  Result<std::unique_ptr<HashKernel>> indices_hasher;
   switch (dict_type.index_type()->id()) {
     case Type::INT8:
       indices_hasher = HashInitImpl<UInt8Type, Action>(ctx, args);
@@ -595,32 +597,37 @@ std::unique_ptr<KernelState> DictionaryHashInit(KernelContext* ctx,
       DCHECK(false) << "Unsupported dictionary index type";
       break;
   }
-  return ::arrow::internal::make_unique<DictionaryHashKernel>(std::move(indices_hasher));
+  RETURN_NOT_OK(indices_hasher);
+  return ::arrow::internal::make_unique<DictionaryHashKernel>(
+      std::move(indices_hasher.ValueOrDie()));
 }
 
-void HashExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status HashExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->Append(ctx, *batch[0].array()));
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->Flush(out));
+  RETURN_NOT_OK(hash_impl->Append(ctx, *batch[0].array()));
+  RETURN_NOT_OK(hash_impl->Flush(out));
+  return Status::OK();
 }
 
-void UniqueFinalize(KernelContext* ctx, std::vector<Datum>* out) {
+Status UniqueFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
   *out = {Datum(uniques)};
+  return Status::OK();
 }
 
-void DictEncodeFinalize(KernelContext* ctx, std::vector<Datum>* out) {
+Status DictEncodeFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
   auto dict_type = dictionary(int32(), uniques->type);
   auto dict = MakeArray(uniques);
   for (size_t i = 0; i < out->size(); ++i) {
     (*out)[i] =
         std::make_shared<DictionaryArray>(dict_type, (*out)[i].make_array(), dict);
   }
+  return Status::OK();
 }
 
 std::shared_ptr<ArrayData> BoxValueCounts(const std::shared_ptr<ArrayData>& uniques,
@@ -631,33 +638,33 @@ std::shared_ptr<ArrayData> BoxValueCounts(const std::shared_ptr<ArrayData>& uniq
   return std::make_shared<StructArray>(data_type, uniques->length, children)->data();
 }
 
-void ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
+Status ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash_impl = checked_cast<HashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
   Datum value_counts;
 
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->GetDictionary(&uniques));
-  KERNEL_RETURN_IF_ERROR(ctx, hash_impl->FlushFinal(&value_counts));
+  RETURN_NOT_OK(hash_impl->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash_impl->FlushFinal(&value_counts));
   *out = {Datum(BoxValueCounts(uniques, value_counts.array()))};
+  return Status::OK();
 }
 
-void UniqueFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
-  UniqueFinalize(ctx, out);
-  if (ctx->HasError()) {
-    return;
-  }
+Status UniqueFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
+  RETURN_NOT_OK(UniqueFinalize(ctx, out));
   auto hash = checked_cast<DictionaryHashKernel*>(ctx->state());
   (*out)[0].mutable_array()->dictionary = hash->dictionary();
+  return Status::OK();
 }
 
-void ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
+Status ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out) {
   auto hash = checked_cast<DictionaryHashKernel*>(ctx->state());
   std::shared_ptr<ArrayData> uniques;
   Datum value_counts;
-  KERNEL_RETURN_IF_ERROR(ctx, hash->GetDictionary(&uniques));
-  KERNEL_RETURN_IF_ERROR(ctx, hash->FlushFinal(&value_counts));
+  RETURN_NOT_OK(hash->GetDictionary(&uniques));
+  RETURN_NOT_OK(hash->FlushFinal(&value_counts));
   uniques->dictionary = hash->dictionary();
   *out = {Datum(BoxValueCounts(uniques, value_counts.array()))};
+  return Status::OK();
 }
 
 ValueDescr DictEncodeOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc b/cpp/src/arrow/compute/kernels/vector_nested.cc
index b7317e5bea0..b84640854ed 100644
--- a/cpp/src/arrow/compute/kernels/vector_nested.cc
+++ b/cpp/src/arrow/compute/kernels/vector_nested.cc
@@ -27,18 +27,15 @@ namespace internal {
 namespace {
 
 template <typename Type>
-void ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ListFlatten(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   typename TypeTraits<Type>::ArrayType list_array(batch[0].array());
-  Result<std::shared_ptr<Array>> result = list_array.Flatten(ctx->memory_pool());
-  if (!result.ok()) {
-    ctx->SetStatus(result.status());
-    return;
-  }
-  out->value = (*result)->data();
+  ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool()));
+  out->value = result->data();
+  return Status::OK();
 }
 
 template <typename Type, typename offset_type = typename Type::offset_type>
-void ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   typename TypeTraits<Type>::ArrayType list(batch[0].array());
   ArrayData* out_arr = out->mutable_array();
 
@@ -47,8 +44,8 @@ void ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 
   out_arr->length = values_length;
   out_arr->null_count = 0;
-  KERNEL_ASSIGN_OR_RAISE(out_arr->buffers[1], ctx,
-                         ctx->Allocate(values_length * sizeof(offset_type)));
+  ARROW_ASSIGN_OR_RAISE(out_arr->buffers[1],
+                        ctx->Allocate(values_length * sizeof(offset_type)));
   auto out_indices = reinterpret_cast<offset_type*>(out_arr->buffers[1]->mutable_data());
   for (int64_t i = 0; i < list.length(); ++i) {
     // Note: In most cases, null slots are empty, but when they are non-empty
@@ -58,6 +55,7 @@ void ListParentIndices(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
       *out_indices++ = static_cast<offset_type>(i);
     }
   }
+  return Status::OK();
 }
 
 Result<ValueDescr> ValuesType(KernelContext*, const std::vector<ValueDescr>& args) {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 1c96f7699c6..fc7a78a2305 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -490,9 +490,9 @@ void TakeIndexDispatch(const PrimitiveArg& values, const PrimitiveArg& indices,
   }
 }
 
-void PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckIndexBounds(*batch[1].array(), batch[0].length()));
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
 
   PrimitiveArg values = GetPrimitiveArg(*batch[0].array());
@@ -504,23 +504,29 @@ void PrimitiveTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // allocating the validity bitmap altogether and save time and space. A
   // streamlined PrimitiveTakeImpl would need to be written that skips all
   // interactions with the output validity bitmap, though.
-  KERNEL_RETURN_IF_ERROR(ctx, PreallocateData(ctx, indices.length, values.bit_width,
-                                              /*allocate_validity=*/true, out_arr));
+  RETURN_NOT_OK(PreallocateData(ctx, indices.length, values.bit_width,
+                                /*allocate_validity=*/true, out_arr));
   switch (values.bit_width) {
     case 1:
-      return TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
+      TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
+      break;
     case 8:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+      break;
     case 16:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+      break;
     case 32:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+      break;
     case 64:
-      return TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+      break;
     default:
       DCHECK(false) << "Invalid values byte width";
       break;
   }
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -777,7 +783,7 @@ inline void PrimitiveFilterImpl<BooleanType>::WriteNull() {
   BitUtil::ClearBit(out_data_, out_offset_ + out_position_++);
 }
 
-void PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   PrimitiveArg values = GetPrimitiveArg(*batch[0].array());
   PrimitiveArg filter = GetPrimitiveArg(*batch[1].array());
   FilterOptions::NullSelectionBehavior null_selection =
@@ -802,29 +808,30 @@ void PrimitiveFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // validity bitmap.
   bool allocate_validity = values.null_count != 0 || filter.null_count != 0;
 
-  KERNEL_RETURN_IF_ERROR(ctx, PreallocateData(ctx, output_length, values.bit_width,
-                                              allocate_validity, out_arr));
+  RETURN_NOT_OK(
+      PreallocateData(ctx, output_length, values.bit_width, allocate_validity, out_arr));
 
   switch (values.bit_width) {
     case 1:
-      return PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 8:
-      return PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 16:
-      return PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 32:
-      return PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     case 64:
-      return PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr)
-          .Exec();
+      PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec();
+      break;
     default:
       DCHECK(false) << "Invalid values bit width";
       break;
   }
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -1072,7 +1079,7 @@ Status BinaryFilterImpl(KernelContext* ctx, const ArrayData& values,
 #undef APPEND_RAW_DATA
 #undef APPEND_SINGLE_VALUE
 
-void BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   FilterOptions::NullSelectionBehavior null_selection =
       FilterState::Get(ctx).null_selection_behavior;
 
@@ -1094,97 +1101,100 @@ void BinaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (values.null_count == 0 && filter.null_count == 0) {
     // Faster no-nulls case
     if (is_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterNonNullImpl<BinaryType>(ctx, values, filter, output_length,
-                                                   null_selection, out_arr));
+      RETURN_NOT_OK(BinaryFilterNonNullImpl<BinaryType>(
+          ctx, values, filter, output_length, null_selection, out_arr));
     } else if (is_large_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterNonNullImpl<LargeBinaryType>(
-                   ctx, values, filter, output_length, null_selection, out_arr));
+      RETURN_NOT_OK(BinaryFilterNonNullImpl<LargeBinaryType>(
+          ctx, values, filter, output_length, null_selection, out_arr));
     } else {
       DCHECK(false);
     }
   } else {
     // Output may have nulls
-    KERNEL_RETURN_IF_ERROR(
-        ctx, ctx->AllocateBitmap(output_length).Value(&out_arr->buffers[0]));
+    RETURN_NOT_OK(ctx->AllocateBitmap(output_length).Value(&out_arr->buffers[0]));
     if (is_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterImpl<BinaryType>(ctx, values, filter, output_length,
-                                            null_selection, out_arr));
-    } else if (is_large_binary_like(type_id)) {
-      KERNEL_RETURN_IF_ERROR(
-          ctx, BinaryFilterImpl<LargeBinaryType>(ctx, values, filter, output_length,
+      RETURN_NOT_OK(BinaryFilterImpl<BinaryType>(ctx, values, filter, output_length,
                                                  null_selection, out_arr));
+    } else if (is_large_binary_like(type_id)) {
+      RETURN_NOT_OK(BinaryFilterImpl<LargeBinaryType>(ctx, values, filter, output_length,
+                                                      null_selection, out_arr));
     } else {
       DCHECK(false);
     }
   }
+
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Null take and filter
 
-void NullTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status NullTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckIndexBounds(*batch[1].array(), batch[0].length()));
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
   // batch.length doesn't take into account the take indices
   auto new_length = batch[1].array()->length;
   out->value = std::make_shared<NullArray>(new_length)->data();
+  return Status::OK();
 }
 
-void NullFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status NullFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   int64_t output_length = GetFilterOutputSize(
       *batch[1].array(), FilterState::Get(ctx).null_selection_behavior);
   out->value = std::make_shared<NullArray>(output_length)->data();
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Dictionary take and filter
 
-void DictionaryTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status DictionaryTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DictionaryArray values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Take(Datum(values.indices()), batch[1], TakeState::Get(ctx),
-                                   ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(
+      Take(Datum(values.indices()), batch[1], TakeState::Get(ctx), ctx->exec_context())
+          .Value(&result));
   DictionaryArray taken_values(values.type(), result.make_array(), values.dictionary());
   out->value = taken_values.data();
+  return Status::OK();
 }
 
-void DictionaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status DictionaryFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   DictionaryArray dict_values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Filter(Datum(dict_values.indices()), batch[1].array(),
-                                     FilterState::Get(ctx), ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(Filter(Datum(dict_values.indices()), batch[1].array(),
+                       FilterState::Get(ctx), ctx->exec_context())
+                    .Value(&result));
   DictionaryArray filtered_values(dict_values.type(), result.make_array(),
                                   dict_values.dictionary());
   out->value = filtered_values.data();
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
 // Extension take and filter
 
-void ExtensionTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExtensionTake(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Take(Datum(values.storage()), batch[1], TakeState::Get(ctx),
-                                   ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(
+      Take(Datum(values.storage()), batch[1], TakeState::Get(ctx), ctx->exec_context())
+          .Value(&result));
   ExtensionArray taken_values(values.type(), result.make_array());
   out->value = taken_values.data();
+  return Status::OK();
 }
 
-void ExtensionFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status ExtensionFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   ExtensionArray ext_values(batch[0].array());
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Filter(Datum(ext_values.storage()), batch[1].array(),
-                                     FilterState::Get(ctx), ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(Filter(Datum(ext_values.storage()), batch[1].array(),
+                       FilterState::Get(ctx), ctx->exec_context())
+                    .Value(&result));
   ExtensionArray filtered_values(ext_values.type(), result.make_array());
   out->value = filtered_values.data();
+  return Status::OK();
 }
 
 // ----------------------------------------------------------------------
@@ -1742,20 +1752,20 @@ struct StructImpl : public Selection<StructImpl, StructType> {
   }
 };
 
-void StructFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status StructFilter(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // Transform filter to selection indices and then use Take.
   std::shared_ptr<ArrayData> indices;
-  KERNEL_RETURN_IF_ERROR(
-      ctx,
-      GetTakeIndices(*batch[1].array(), FilterState::Get(ctx).null_selection_behavior,
-                     ctx->memory_pool())
-          .Value(&indices));
+  RETURN_NOT_OK(GetTakeIndices(*batch[1].array(),
+                               FilterState::Get(ctx).null_selection_behavior,
+                               ctx->memory_pool())
+                    .Value(&indices));
 
   Datum result;
-  KERNEL_RETURN_IF_ERROR(ctx, Take(batch[0], Datum(indices), TakeOptions::NoBoundsCheck(),
-                                   ctx->exec_context())
-                                  .Value(&result));
+  RETURN_NOT_OK(
+      Take(batch[0], Datum(indices), TakeOptions::NoBoundsCheck(), ctx->exec_context())
+          .Value(&result));
   out->value = result.array();
+  return Status::OK();
 }
 
 #undef LIFT_BASE_MEMBERS
@@ -2064,21 +2074,21 @@ class TakeMetaFunction : public MetaFunction {
 // ----------------------------------------------------------------------
 
 template <typename Impl>
-void FilterExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status FilterExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   // TODO: where are the values and filter length equality checked?
   int64_t output_length = GetFilterOutputSize(
       *batch[1].array(), FilterState::Get(ctx).null_selection_behavior);
   Impl kernel(ctx, batch, output_length, out);
-  KERNEL_RETURN_IF_ERROR(ctx, kernel.ExecFilter());
+  return kernel.ExecFilter();
 }
 
 template <typename Impl>
-void TakeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+Status TakeExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   if (TakeState::Get(ctx).boundscheck) {
-    KERNEL_RETURN_IF_ERROR(ctx, CheckIndexBounds(*batch[1].array(), batch[0].length()));
+    RETURN_NOT_OK(CheckIndexBounds(*batch[1].array(), batch[0].length()));
   }
   Impl kernel(ctx, batch, /*output_length=*/batch[1].length(), out);
-  KERNEL_RETURN_IF_ERROR(ctx, kernel.ExecTake());
+  return kernel.ExecTake();
 }
 
 struct SelectionKernelDescr {
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 8593613c8f5..6c425d65550 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -322,27 +322,25 @@ template <typename OutType, typename InType>
 struct PartitionNthToIndices {
   using ArrayType = typename TypeTraits<InType>::ArrayType;
 
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     using GetView = GetViewType<InType>;
 
     if (ctx->state() == nullptr) {
-      ctx->SetStatus(Status::Invalid("NthToIndices requires PartitionNthOptions"));
-      return;
+      return Status::Invalid("NthToIndices requires PartitionNthOptions");
     }
 
     ArrayType arr(batch[0].array());
 
     int64_t pivot = PartitionNthToIndicesState::Get(ctx).pivot;
     if (pivot > arr.length()) {
-      ctx->SetStatus(Status::IndexError("NthToIndices index out of bound"));
-      return;
+      return Status::IndexError("NthToIndices index out of bound");
     }
     ArrayData* out_arr = out->mutable_array();
     uint64_t* out_begin = out_arr->GetMutableValues<uint64_t>(1);
     uint64_t* out_end = out_begin + arr.length();
     std::iota(out_begin, out_end, 0);
     if (pivot == arr.length()) {
-      return;
+      return Status::OK();
     }
     auto nulls_begin =
         PartitionNulls<ArrayType, NonStablePartitioner>(out_begin, out_end, arr, 0);
@@ -355,6 +353,7 @@ struct PartitionNthToIndices {
                          return lval < rval;
                        });
     }
+    return Status::OK();
   }
 };
 
@@ -559,7 +558,7 @@ using ArraySortIndicesState = internal::OptionsWrapper<ArraySortOptions>;
 template <typename OutType, typename InType>
 struct ArraySortIndices {
   using ArrayType = typename TypeTraits<InType>::ArrayType;
-  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const auto& options = ArraySortIndicesState::Get(ctx);
 
     ArrayType arr(batch[0].array());
@@ -570,6 +569,8 @@ struct ArraySortIndices {
 
     ArraySorter<InType> sorter;
     sorter.impl.Sort(out_begin, out_end, arr, 0, options);
+
+    return Status::OK();
   }
 };
 
diff --git a/cpp/src/arrow/dataset/expression.cc b/cpp/src/arrow/dataset/expression.cc
index 627477b3038..cc126fcc7fb 100644
--- a/cpp/src/arrow/dataset/expression.cc
+++ b/cpp/src/arrow/dataset/expression.cc
@@ -427,10 +427,10 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
 
   compute::KernelContext kernel_context(exec_context);
   if (call.kernel->init) {
-    call.kernel_state =
-        call.kernel->init(&kernel_context, {call.kernel, descrs, call.options.get()});
+    ARROW_ASSIGN_OR_RAISE(
+        call.kernel_state,
+        call.kernel->init(&kernel_context, {call.kernel, descrs, call.options.get()}));
 
-    RETURN_NOT_OK(kernel_context.status());
     kernel_context.SetState(call.kernel_state.get());
   }
 

From e8e24dfc7f01d0260385b587e48af99ae54533fd Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 27 Apr 2021 11:48:33 +0200
Subject: [PATCH 129/719] ARROW-11780: [Python] Avoid crashing when a
 ChunkedArray is provided to StructArray.from_arrays()

https://issues.apache.org/jira/browse/ARROW-11780

Closes #10097 from amol-/ARROW-11780

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/array.pxi           |  5 ++++-
 python/pyarrow/tests/test_array.py | 10 ++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 81000ce826e..fb1aa744711 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2186,7 +2186,10 @@ cdef class StructArray(Array):
 
         arrays = [asarray(x) for x in arrays]
         for arr in arrays:
-            c_arrays.push_back(pyarrow_unwrap_array(arr))
+            c_array = pyarrow_unwrap_array(arr)
+            if c_array == nullptr:
+                raise TypeError(f"Expected Array, got {arr.__class__}")
+            c_arrays.push_back(c_array)
         if names is not None:
             for name in names:
                 c_names.push_back(tobytes(name))
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 46c5415ea1e..fb0f8552672 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -669,6 +669,16 @@ def test_struct_from_arrays():
         pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc])
 
 
+def test_struct_array_from_chunked():
+    # ARROW-11780
+    # Check that we don't segfault when trying to build
+    # a StructArray from a chunked array.
+    chunked_arr = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
+
+    with pytest.raises(TypeError, match="Expected Array"):
+        pa.StructArray.from_arrays([chunked_arr], ["foo"])
+
+
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)

From 2f7d75dd55bf8d0059cfefa7a63bbcbd7001d195 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 27 Apr 2021 14:25:30 +0200
Subject: [PATCH 130/719] ARROW-12541: [Docs] Improve styling/readability of
 tables in the new doc theme

Closes #10161 from jorisvandenbossche/ARROW-12541

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/_static/theme_overrides.css | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index f623b3b3c49..1e972cc6fc4 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -55,6 +55,16 @@ a.navbar-brand img {
 }
 
 
+/* This is the bootstrap CSS style for "table-striped". Since the theme does
+not yet provide an easy way to configure this globaly, it easier to simply
+include this snippet here than updating each table in all rst files to
+add ":class: table-striped" */
+
+.table tbody tr:nth-of-type(odd) {
+  background-color: rgba(0, 0, 0, 0.05);
+}
+
+
 /* Limit the max height of the sidebar navigation section. Because in our
 custimized template, there is more content above the navigation, i.e.
 larger logo: if we don't decrease the max-height, it will overlap with

From c65ffe3d9e6e066a37b498d88047dcc1adc0074f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 27 Apr 2021 15:24:52 +0200
Subject: [PATCH 131/719] ARROW-12482: [Doc][C++][Python] Mention
 CSVStreamingReader pitfalls with type inference

Users may be surprised that type inference is done on the first block, and values can fail converting afterwards.

Closes #10132 from pitrou/ARROW-12482-csv-streaming-doc

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/options.h |  9 +++++++--
 cpp/src/arrow/csv/reader.h  |  9 ++++++++-
 docs/source/python/csv.rst  | 13 +++++++++++--
 python/pyarrow/_csv.pyx     |  4 ++--
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 5c912e7fd85..f183743ac84 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -119,15 +119,20 @@ struct ARROW_EXPORT ReadOptions {
 
   /// Whether to use the global CPU thread pool
   bool use_threads = true;
-  /// Block size we request from the IO layer; also determines the size of
-  /// chunks when use_threads is true
+
+  /// \brief Block size we request from the IO layer.
+  ///
+  /// This will determine multi-threading granularity as well as
+  /// the size of individual record batches.
   int32_t block_size = 1 << 20;  // 1 MB
 
   /// Number of header rows to skip (not including the row of column names, if any)
   int32_t skip_rows = 0;
+
   /// Column names for the target table.
   /// If empty, fall back on autogenerate_column_names.
   std::vector<std::string> column_names;
+
   /// Whether to autogenerate column names if `column_names` is empty.
   /// If true, column names will be of the form "f0", "f1"...
   /// If false, column names will be read from the first CSV row after `skip_rows`.
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index 8e56824a0ac..bb633d0cb06 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -59,7 +59,14 @@ class ARROW_EXPORT TableReader {
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
 };
 
-/// Experimental
+/// \brief A class that reads a CSV file incrementally
+///
+/// Caveats:
+/// - For now, this is always single-threaded (regardless of `ReadOptions::use_threads`.
+/// - Type inference is done on the first block and types are frozen afterwards;
+///   to make sure the right data types are inferred, either set
+///   `ReadOptions::block_size` to a large enough value, or use
+///   `ConvertOptions::column_types` to set the desired data types explicitly.
 class ARROW_EXPORT StreamingReader : public RecordBatchReader {
  public:
   virtual ~StreamingReader() = default;
diff --git a/docs/source/python/csv.rst b/docs/source/python/csv.rst
index ad48ee59e8f..3c0b3993217 100644
--- a/docs/source/python/csv.rst
+++ b/docs/source/python/csv.rst
@@ -92,8 +92,17 @@ Incremental reading
 -------------------
 
 For memory-constrained environments, it is also possible to read a CSV file
-one batch at a time, using :func:`open_csv`.  It currently doesn't support
-parallel reading.
+one batch at a time, using :func:`open_csv`.
+
+There are a few caveats:
+
+1. For now, the incremental reader is always single-threaded (regardless of
+   :attr:`ReadOptions.use_threads`)
+
+2. Type inference is done on the first block and types are frozen afterwards;
+   to make sure the right data types are inferred, either set
+   :attr:`ReadOptions.block_size` to a large enough value, or use
+   :attr:`ConvertOptions.column_types` to set the desired data types explicitly.
 
 Character encoding
 ------------------
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index a98160cfa99..a330664f641 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -57,7 +57,7 @@ cdef class ReadOptions(_Weakrefable):
     block_size : int, optional
         How much bytes to process at a time from the input stream.
         This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
+        the size of individual record batches or table chunks.
     skip_rows: int, optional (default 0)
         The number of rows to skip before the column names (if any)
         and the CSV data.
@@ -110,7 +110,7 @@ cdef class ReadOptions(_Weakrefable):
         """
         How much bytes to process at a time from the input stream.
         This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
+        the size of individual record batches or table chunks.
         """
         return self.options.block_size
 

From d920695956cf59c4b823edfd2d99f1fe2e480341 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 27 Apr 2021 11:00:59 -0400
Subject: [PATCH 132/719] ARROW-12392: [C++] Restore asynchronous streaming CSV
 reader

This restores ARROW-11887.  The only difference is that I kept the async path out of RecordBatchReader.  I now know the file formats do not use RecordBatchReader for the read so there was no need to add it in there yet.

It also now properly consumes (using RunInSerialExecutor) the reader via the nested parallel paths in the synchronous scanner.

Closes #10061 from westonpace/feature/arrow-12392

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/csv/reader.cc              | 223 +++++++++++++++--------
 cpp/src/arrow/csv/reader.h               |  13 ++
 cpp/src/arrow/csv/reader_test.cc         |  10 +-
 cpp/src/arrow/dataset/file_base.cc       |  33 ++--
 cpp/src/arrow/dataset/file_csv.cc        |  87 ++++++---
 cpp/src/arrow/dataset/scanner.cc         |  28 ++-
 cpp/src/arrow/dataset/scanner.h          |   3 +
 cpp/src/arrow/dataset/scanner_internal.h | 126 +++++++++----
 8 files changed, 363 insertions(+), 160 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 5b6e11efdaf..c4352360e6b 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -199,6 +199,19 @@ class SerialBlockReader : public BlockReader {
     return MakeTransformedIterator(std::move(buffer_iterator), block_reader_fn);
   }
 
+  static AsyncGenerator<CSVBlock> MakeAsyncIterator(
+      AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
+      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer) {
+    auto block_reader =
+        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer);
+    // Wrap shared pointer in callable
+    Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
+        [block_reader](std::shared_ptr<Buffer> next) {
+          return (*block_reader)(std::move(next));
+        };
+    return MakeTransformedGenerator(std::move(buffer_generator), block_reader_fn);
+  }
+
   Result<TransformFlow<CSVBlock>> operator()(std::shared_ptr<Buffer> next_buffer) {
     if (buffer_ == nullptr) {
       return TransformFinish();
@@ -572,22 +585,25 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
 
 class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
  public:
-  using ReaderMixin::ReaderMixin;
+  BaseStreamingReader(io::IOContext io_context, Executor* cpu_executor,
+                      std::shared_ptr<io::InputStream> input,
+                      const ReadOptions& read_options, const ParseOptions& parse_options,
+                      const ConvertOptions& convert_options)
+      : ReaderMixin(io_context, std::move(input), read_options, parse_options,
+                    convert_options),
+        cpu_executor_(cpu_executor) {}
 
-  virtual Status Init() = 0;
+  virtual Future<std::shared_ptr<csv::StreamingReader>> Init() = 0;
 
   std::shared_ptr<Schema> schema() const override { return schema_; }
 
   Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
-    do {
-      RETURN_NOT_OK(ReadNext().Value(batch));
-    } while (*batch != nullptr && (*batch)->num_rows() == 0);
-    return Status::OK();
+    auto next_fut = ReadNextAsync();
+    auto next_result = next_fut.result();
+    return std::move(next_result).Value(batch);
   }
 
  protected:
-  virtual Result<std::shared_ptr<RecordBatch>> ReadNext() = 0;
-
   // Make column decoders from conversion schema
   Status MakeColumnDecoders() {
     for (const auto& column : conversion_schema_.columns) {
@@ -670,101 +686,141 @@ class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
   std::vector<std::shared_ptr<ColumnDecoder>> column_decoders_;
   std::shared_ptr<Schema> schema_;
   std::shared_ptr<RecordBatch> pending_batch_;
-  Iterator<std::shared_ptr<Buffer>> buffer_iterator_;
+  AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator_;
+  Executor* cpu_executor_;
   bool eof_ = false;
 };
 
 /////////////////////////////////////////////////////////////////////////
 // Serial StreamingReader implementation
 
-class SerialStreamingReader : public BaseStreamingReader {
+class SerialStreamingReader : public BaseStreamingReader,
+                              public std::enable_shared_from_this<SerialStreamingReader> {
  public:
   using BaseStreamingReader::BaseStreamingReader;
 
-  Status Init() override {
+  Future<std::shared_ptr<csv::StreamingReader>> Init() override {
     ARROW_ASSIGN_OR_RAISE(auto istream_it,
                           io::MakeInputStreamIterator(input_, read_options_.block_size));
 
-    // Since we're converting serially, no need to readahead more than one block
-    int32_t block_queue_size = 1;
-    ARROW_ASSIGN_OR_RAISE(auto rh_it,
-                          MakeReadaheadIterator(std::move(istream_it), block_queue_size));
-    buffer_iterator_ = CSVBufferIterator::Make(std::move(rh_it));
+    // TODO Consider exposing readahead as a read option (ARROW-12090)
+    ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
+                                                              io_context_.executor()));
+
+    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
+
+    buffer_generator_ = CSVBufferIterator::MakeAsync(std::move(transferred_it));
     task_group_ = internal::TaskGroup::MakeSerial(io_context_.stop_token());
 
+    auto self = shared_from_this();
     // Read schema from first batch
-    ARROW_ASSIGN_OR_RAISE(pending_batch_, ReadNext());
-    DCHECK_NE(schema_, nullptr);
-    return Status::OK();
+    return ReadNextAsync().Then([self](const std::shared_ptr<RecordBatch>& first_batch)
+                                    -> Result<std::shared_ptr<csv::StreamingReader>> {
+      self->pending_batch_ = first_batch;
+      DCHECK_NE(self->schema_, nullptr);
+      return self;
+    });
   }
 
- protected:
-  Result<std::shared_ptr<RecordBatch>> ReadNext() override {
-    if (eof_) {
-      return nullptr;
-    }
-    if (io_context_.stop_token().IsStopRequested()) {
-      eof_ = true;
-      return io_context_.stop_token().Poll();
-    }
-    if (!block_iterator_) {
-      Status st = SetupReader();
-      if (!st.ok()) {
-        // Can't setup reader => bail out
-        eof_ = true;
-        return st;
-      }
+  Result<std::shared_ptr<RecordBatch>> DecodeBatchAndUpdateSchema() {
+    auto maybe_batch = DecodeNextBatch();
+    if (schema_ == nullptr && maybe_batch.ok()) {
+      schema_ = (*maybe_batch)->schema();
     }
+    return maybe_batch;
+  }
+
+  Future<std::shared_ptr<RecordBatch>> DoReadNext(
+      std::shared_ptr<SerialStreamingReader> self) {
     auto batch = std::move(pending_batch_);
     if (batch != nullptr) {
-      return batch;
+      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
     }
 
     if (!source_eof_) {
-      ARROW_ASSIGN_OR_RAISE(auto maybe_block, block_iterator_.Next());
-      if (!IsIterationEnd(maybe_block)) {
-        last_block_index_ = maybe_block.block_index;
-        auto maybe_parsed = ParseAndInsert(maybe_block.partial, maybe_block.completion,
-                                           maybe_block.buffer, maybe_block.block_index,
-                                           maybe_block.is_final);
-        if (!maybe_parsed.ok()) {
-          // Parse error => bail out
-          eof_ = true;
-          return maybe_parsed.status();
-        }
-        RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
-      } else {
-        source_eof_ = true;
-        for (auto& decoder : column_decoders_) {
-          decoder->SetEOF(last_block_index_ + 1);
-        }
-      }
+      return block_generator_()
+          .Then([self](const CSVBlock& maybe_block) -> Status {
+            if (!IsIterationEnd(maybe_block)) {
+              self->last_block_index_ = maybe_block.block_index;
+              auto maybe_parsed = self->ParseAndInsert(
+                  maybe_block.partial, maybe_block.completion, maybe_block.buffer,
+                  maybe_block.block_index, maybe_block.is_final);
+              if (!maybe_parsed.ok()) {
+                // Parse error => bail out
+                self->eof_ = true;
+                return maybe_parsed.status();
+              }
+              RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
+            } else {
+              self->source_eof_ = true;
+              for (auto& decoder : self->column_decoders_) {
+                decoder->SetEOF(self->last_block_index_ + 1);
+              }
+            }
+            return Status::OK();
+          })
+          .Then([self](const ::arrow::detail::Empty& st)
+                    -> Result<std::shared_ptr<RecordBatch>> {
+            return self->DecodeBatchAndUpdateSchema();
+          });
     }
+    return Future<std::shared_ptr<RecordBatch>>::MakeFinished(
+        DecodeBatchAndUpdateSchema());
+  }
 
-    auto maybe_batch = DecodeNextBatch();
-    if (schema_ == nullptr && maybe_batch.ok()) {
-      schema_ = (*maybe_batch)->schema();
+  Future<std::shared_ptr<RecordBatch>> ReadNextSkippingEmpty(
+      std::shared_ptr<SerialStreamingReader> self) {
+    return DoReadNext(self).Then([self](const std::shared_ptr<RecordBatch>& batch) {
+      if (batch != nullptr && batch->num_rows() == 0) {
+        return self->ReadNextSkippingEmpty(self);
+      }
+      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
+    });
+  }
+
+  Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
+    if (eof_) {
+      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(nullptr);
+    }
+    if (io_context_.stop_token().IsStopRequested()) {
+      eof_ = true;
+      return io_context_.stop_token().Poll();
+    }
+    auto self = shared_from_this();
+    if (!block_generator_) {
+      return SetupReader(self).Then([self](const Result<::arrow::detail::Empty>& res)
+                                        -> Future<std::shared_ptr<RecordBatch>> {
+        if (!res.ok()) {
+          self->eof_ = true;
+          return res.status();
+        }
+        return self->ReadNextSkippingEmpty(self);
+      });
+    } else {
+      return self->ReadNextSkippingEmpty(self);
     }
-    return maybe_batch;
   };
 
-  Status SetupReader() {
-    ARROW_ASSIGN_OR_RAISE(auto first_buffer, buffer_iterator_.Next());
-    if (first_buffer == nullptr) {
-      return Status::Invalid("Empty CSV file");
-    }
-    RETURN_NOT_OK(ProcessHeader(first_buffer, &first_buffer));
-    RETURN_NOT_OK(MakeColumnDecoders());
+ protected:
+  Future<> SetupReader(std::shared_ptr<SerialStreamingReader> self) {
+    return buffer_generator_().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
+      if (first_buffer == nullptr) {
+        return Status::Invalid("Empty CSV file");
+      }
+      auto own_first_buffer = first_buffer;
+      RETURN_NOT_OK(self->ProcessHeader(own_first_buffer, &own_first_buffer));
+      RETURN_NOT_OK(self->MakeColumnDecoders());
 
-    block_iterator_ = SerialBlockReader::MakeIterator(std::move(buffer_iterator_),
-                                                      MakeChunker(parse_options_),
-                                                      std::move(first_buffer));
-    return Status::OK();
+      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
+          std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
+          std::move(own_first_buffer));
+      return Status::OK();
+    });
   }
 
   bool source_eof_ = false;
   int64_t last_block_index_ = 0;
-  Iterator<CSVBlock> block_iterator_;
+  AsyncGenerator<CSVBlock> block_generator_;
 };
 
 /////////////////////////////////////////////////////////////////////////
@@ -943,15 +999,14 @@ Result<std::shared_ptr<TableReader>> MakeTableReader(
   return reader;
 }
 
-Result<std::shared_ptr<StreamingReader>> MakeStreamingReader(
+Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
     io::IOContext io_context, std::shared_ptr<io::InputStream> input,
     internal::Executor* cpu_executor, const ReadOptions& read_options,
     const ParseOptions& parse_options, const ConvertOptions& convert_options) {
   std::shared_ptr<BaseStreamingReader> reader;
-  reader = std::make_shared<SerialStreamingReader>(io_context, input, read_options,
-                                                   parse_options, convert_options);
-  RETURN_NOT_OK(reader->Init());
-  return reader;
+  reader = std::make_shared<SerialStreamingReader>(
+      io_context, cpu_executor, input, read_options, parse_options, convert_options);
+  return reader->Init();
 }
 
 }  // namespace
@@ -981,8 +1036,11 @@ Result<std::shared_ptr<StreamingReader>> StreamingReader::Make(
     const ConvertOptions& convert_options) {
   auto io_context = io::IOContext(pool);
   auto cpu_executor = internal::GetCpuThreadPool();
-  return MakeStreamingReader(io_context, std::move(input), cpu_executor, read_options,
-                             parse_options, convert_options);
+  auto reader_fut = MakeStreamingReader(io_context, std::move(input), cpu_executor,
+                                        read_options, parse_options, convert_options);
+  auto reader_result = reader_fut.result();
+  ARROW_ASSIGN_OR_RAISE(auto reader, reader_result);
+  return reader;
 }
 
 Result<std::shared_ptr<StreamingReader>> StreamingReader::Make(
@@ -990,6 +1048,17 @@ Result<std::shared_ptr<StreamingReader>> StreamingReader::Make(
     const ReadOptions& read_options, const ParseOptions& parse_options,
     const ConvertOptions& convert_options) {
   auto cpu_executor = internal::GetCpuThreadPool();
+  auto reader_fut = MakeStreamingReader(io_context, std::move(input), cpu_executor,
+                                        read_options, parse_options, convert_options);
+  auto reader_result = reader_fut.result();
+  ARROW_ASSIGN_OR_RAISE(auto reader, reader_result);
+  return reader;
+}
+
+Future<std::shared_ptr<StreamingReader>> StreamingReader::MakeAsync(
+    io::IOContext io_context, std::shared_ptr<io::InputStream> input,
+    internal::Executor* cpu_executor, const ReadOptions& read_options,
+    const ParseOptions& parse_options, const ConvertOptions& convert_options) {
   return MakeStreamingReader(io_context, std::move(input), cpu_executor, read_options,
                              parse_options, convert_options);
 }
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index bb633d0cb06..253911bb4b9 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -71,7 +71,20 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
  public:
   virtual ~StreamingReader() = default;
 
+  virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
+
   /// Create a StreamingReader instance
+  ///
+  /// This involves some I/O as the first batch must be loaded during the creation process
+  /// so it is returned as a future
+  ///
+  /// Currently, the StreamingReader is not async-reentrant and does not do any fan-out
+  /// parsing (see ARROW-11889)
+  static Future<std::shared_ptr<StreamingReader>> MakeAsync(
+      io::IOContext io_context, std::shared_ptr<io::InputStream> input,
+      internal::Executor* cpu_executor, const ReadOptions&, const ParseOptions&,
+      const ConvertOptions&);
+
   static Result<std::shared_ptr<StreamingReader>> Make(
       io::IOContext io_context, std::shared_ptr<io::InputStream> input,
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index dbe6b1d4f20..4f6c175996c 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -32,6 +32,7 @@
 #include "arrow/table.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
 #include "arrow/util/thread_pool.h"
 
@@ -50,7 +51,14 @@ class StreamingReaderAsTableReader : public TableReader {
     return table;
   }
   virtual Future<std::shared_ptr<Table>> ReadAsync() {
-    return Future<std::shared_ptr<Table>>::MakeFinished(Read());
+    auto reader = reader_;
+    AsyncGenerator<std::shared_ptr<RecordBatch>> gen = [reader] {
+      return reader->ReadNextAsync();
+    };
+    return CollectAsyncGenerator(std::move(gen))
+        .Then([](const RecordBatchVector& batches) {
+          return Table::FromRecordBatches(batches);
+        });
   }
 
  private:
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index bf4e17da4b7..d02b094bb43 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -475,29 +475,28 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragme
   return Status::OK();
 }
 
-Future<> WriteInternal(const ScanOptions& scan_options, WriteState& state,
-                       ScanTaskVector scan_tasks, internal::Executor* cpu_executor) {
+Status WriteInternal(const ScanOptions& scan_options, WriteState& state,
+                     ScanTaskVector scan_tasks) {
   // Store a mapping from partitions (represened by their formatted partition expressions)
   // to a WriteQueue which flushes batches into that partition's output file. In principle
   // any thread could produce a batch for any partition, so each task alternates between
   // pushing batches and flushing them to disk.
-  std::vector<Future<>> scan_futs;
   auto task_group = scan_options.TaskGroup();
 
   for (const auto& scan_task : scan_tasks) {
     task_group->Append([&, scan_task] {
-      ARROW_ASSIGN_OR_RAISE(auto batches, scan_task->Execute());
-
-      for (auto maybe_batch : batches) {
-        ARROW_ASSIGN_OR_RAISE(auto batch, maybe_batch);
-        RETURN_NOT_OK(WriteNextBatch(state, scan_task->fragment(), std::move(batch)));
-      }
-
-      return Status::OK();
+      std::function<Status(std::shared_ptr<RecordBatch>)> visitor =
+          [&](std::shared_ptr<RecordBatch> batch) {
+            return WriteNextBatch(state, scan_task->fragment(), std::move(batch));
+          };
+      return internal::SerialExecutor::RunInSerialExecutor<detail::Empty>(
+                 [&](internal::Executor* executor) {
+                   return scan_task->SafeVisit(executor, visitor);
+                 })
+          .status();
     });
   }
-  scan_futs.push_back(task_group->FinishAsync());
-  return AllComplete(scan_futs);
+  return task_group->Finish();
 }
 
 }  // namespace
@@ -537,13 +536,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 #endif
 
   WriteState state(write_options);
-  auto res = internal::RunSynchronously<arrow::detail::Empty>(
-      [&](internal::Executor* cpu_executor) -> Future<> {
-        return WriteInternal(*scanner->options(), state, std::move(scan_tasks),
-                             cpu_executor);
-      },
-      scanner->options()->use_threads);
-  RETURN_NOT_OK(res);
+  RETURN_NOT_OK(WriteInternal(*scanner->options(), state, std::move(scan_tasks)));
 
   auto task_group = scanner->options()->TaskGroup();
   for (const auto& part_queue : state.queues) {
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 4612a1233fc..4dd4fac91d8 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -34,6 +34,7 @@
 #include "arrow/io/compressed.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 
@@ -119,34 +120,59 @@ static inline Result<csv::ReadOptions> GetReadOptions(
   return read_options;
 }
 
-static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
+static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
     const FileSource& source, const CsvFileFormat& format,
-    const std::shared_ptr<ScanOptions>& scan_options = nullptr,
-    MemoryPool* pool = default_memory_pool()) {
+    const std::shared_ptr<ScanOptions>& scan_options, internal::Executor* cpu_executor,
+    MemoryPool* pool) {
   ARROW_ASSIGN_OR_RAISE(auto reader_options, GetReadOptions(format, scan_options));
 
-  util::string_view first_block;
   ARROW_ASSIGN_OR_RAISE(auto input, source.OpenCompressed());
   ARROW_ASSIGN_OR_RAISE(
       input, io::BufferedInputStream::Create(reader_options.block_size,
                                              default_memory_pool(), std::move(input)));
-  ARROW_ASSIGN_OR_RAISE(first_block, input->Peek(reader_options.block_size));
 
-  const auto& parse_options = format.parse_options;
-  auto convert_options = csv::ConvertOptions::Defaults();
-  if (scan_options != nullptr) {
-    ARROW_ASSIGN_OR_RAISE(convert_options,
-                          GetConvertOptions(format, scan_options, first_block, pool));
-  }
+  // Grab the first block and use it to determine the schema and create a reader.  The
+  // input->Peek call blocks so we run the whole thing on the I/O thread pool.
+  return DeferNotOk(input->io_context().executor()->Submit(
+      [=]() -> Future<std::shared_ptr<csv::StreamingReader>> {
+        ARROW_ASSIGN_OR_RAISE(auto first_block, input->Peek(reader_options.block_size));
+        const auto& parse_options = format.parse_options;
+        auto convert_options = csv::ConvertOptions::Defaults();
+        if (scan_options != nullptr) {
+          ARROW_ASSIGN_OR_RAISE(convert_options, GetConvertOptions(format, scan_options,
+                                                                   first_block, pool));
+        }
 
-  auto maybe_reader =
-      csv::StreamingReader::Make(io::IOContext(pool), std::move(input), reader_options,
-                                 parse_options, convert_options);
-  if (!maybe_reader.ok()) {
-    return maybe_reader.status().WithMessage("Could not open CSV input source '",
-                                             source.path(), "': ", maybe_reader.status());
-  }
-  return maybe_reader;
+        auto reader_fut = csv::StreamingReader::MakeAsync(
+            io::default_io_context(), std::move(input), cpu_executor, reader_options,
+            parse_options, convert_options);
+        // Adds the filename to the error
+        return reader_fut.Then(
+            [](const std::shared_ptr<csv::StreamingReader>& maybe_reader)
+                -> Result<std::shared_ptr<csv::StreamingReader>> { return maybe_reader; },
+            [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
+              return err.WithMessage("Could not open CSV input source '", source.path(),
+                                     "': ", err);
+            });
+      }));
+}
+
+static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
+    const FileSource& source, const CsvFileFormat& format,
+    const std::shared_ptr<ScanOptions>& scan_options = nullptr,
+    MemoryPool* pool = default_memory_pool()) {
+  auto open_reader_fut =
+      OpenReaderAsync(source, format, scan_options, internal::GetCpuThreadPool(), pool);
+  return open_reader_fut.result();
+}
+
+static RecordBatchGenerator GeneratorFromReader(
+    const Future<std::shared_ptr<csv::StreamingReader>>& reader) {
+  auto gen_fut = reader.Then(
+      [](const std::shared_ptr<csv::StreamingReader>& reader) -> RecordBatchGenerator {
+        return [reader]() { return reader->ReadNextAsync(); };
+      });
+  return MakeFromFuture(std::move(gen_fut));
 }
 
 /// \brief A ScanTask backed by an Csv file.
@@ -160,9 +186,26 @@ class CsvScanTask : public ScanTask {
         source_(fragment->source()) {}
 
   Result<RecordBatchIterator> Execute() override {
-    ARROW_ASSIGN_OR_RAISE(auto reader,
-                          OpenReader(source_, *format_, options(), options()->pool));
-    return IteratorFromReader(std::move(reader));
+    auto reader_fut = OpenReaderAsync(source_, *format_, options(),
+                                      internal::GetCpuThreadPool(), options()->pool);
+    auto reader_gen = GeneratorFromReader(std::move(reader_fut));
+    return MakeGeneratorIterator(std::move(reader_gen));
+  }
+
+  Future<RecordBatchVector> SafeExecute(internal::Executor* executor) override {
+    auto reader_fut =
+        OpenReaderAsync(source_, *format_, options(), executor, options()->pool);
+    auto reader_gen = GeneratorFromReader(std::move(reader_fut));
+    return CollectAsyncGenerator(reader_gen);
+  }
+
+  Future<> SafeVisit(
+      internal::Executor* executor,
+      std::function<Status(std::shared_ptr<RecordBatch>)> visitor) override {
+    auto reader_fut =
+        OpenReaderAsync(source_, *format_, options(), executor, options()->pool);
+    auto reader_gen = GeneratorFromReader(std::move(reader_fut));
+    return VisitAsyncGenerator(reader_gen, visitor);
   }
 
  private:
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index aa95c478dba..ed43e32b482 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -71,6 +71,20 @@ Result<RecordBatchIterator> InMemoryScanTask::Execute() {
   return MakeVectorIterator(record_batches_);
 }
 
+Future<RecordBatchVector> ScanTask::SafeExecute(internal::Executor* executor) {
+  // If the ScanTask can't possibly be async then just execute it
+  ARROW_ASSIGN_OR_RAISE(auto rb_it, Execute());
+  return Future<RecordBatchVector>::MakeFinished(rb_it.ToVector());
+}
+
+Future<> ScanTask::SafeVisit(
+    internal::Executor* executor,
+    std::function<Status(std::shared_ptr<RecordBatch>)> visitor) {
+  // If the ScanTask can't possibly be async then just execute it
+  ARROW_ASSIGN_OR_RAISE(auto rb_it, Execute());
+  return Future<>::MakeFinished(rb_it.Visit(visitor));
+}
+
 Result<ScanTaskIterator> Scanner::Scan() {
   // TODO(ARROW-12289) This is overridden in SyncScanner and will never be implemented in
   // AsyncScanner.  It is deprecated and will eventually go away.
@@ -730,13 +744,6 @@ struct TableAssemblyState {
 };
 
 Result<std::shared_ptr<Table>> SyncScanner::ToTable() {
-  return internal::RunSynchronously<std::shared_ptr<Table>>(
-      [this](Executor* executor) { return ToTableInternal(executor); },
-      scan_options_->use_threads);
-}
-
-Future<std::shared_ptr<Table>> SyncScanner::ToTableInternal(
-    internal::Executor* cpu_executor) {
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
   auto task_group = scan_options_->TaskGroup();
 
@@ -752,8 +759,11 @@ Future<std::shared_ptr<Table>> SyncScanner::ToTableInternal(
 
     auto id = scan_task_id++;
     task_group->Append([state, id, scan_task] {
-      ARROW_ASSIGN_OR_RAISE(auto batch_it, scan_task->Execute());
-      ARROW_ASSIGN_OR_RAISE(auto local, batch_it.ToVector());
+      ARROW_ASSIGN_OR_RAISE(
+          auto local, internal::SerialExecutor::RunInSerialExecutor<RecordBatchVector>(
+                          [&](internal::Executor* executor) {
+                            return scan_task->SafeExecute(executor);
+                          }));
       state->Emplace(std::move(local), id);
       return Status::OK();
     });
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 6315cf922d0..50660aa03ac 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -148,6 +148,9 @@ class ARROW_DS_EXPORT ScanTask {
   /// resulting from the Scan. Execution semantics are encapsulated in the
   /// particular ScanTask implementation
   virtual Result<RecordBatchIterator> Execute() = 0;
+  virtual Future<RecordBatchVector> SafeExecute(internal::Executor* executor);
+  virtual Future<> SafeVisit(internal::Executor* executor,
+                             std::function<Status(std::shared_ptr<RecordBatch>)> visitor);
 
   virtual ~ScanTask() = default;
 
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 292ea6ce372..507bf82a735 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -38,49 +38,60 @@ using internal::Executor;
 
 namespace dataset {
 
+inline Result<std::shared_ptr<RecordBatch>> FilterSingleBatch(
+    const std::shared_ptr<RecordBatch>& in, const Expression& filter, MemoryPool* pool) {
+  compute::ExecContext exec_context{pool};
+  ARROW_ASSIGN_OR_RAISE(Datum mask,
+                        ExecuteScalarExpression(filter, Datum(in), &exec_context));
+
+  if (mask.is_scalar()) {
+    const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
+    if (mask_scalar.is_valid && mask_scalar.value) {
+      return in;
+    }
+    return in->Slice(0, 0);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(
+      Datum filtered,
+      compute::Filter(in, mask, compute::FilterOptions::Defaults(), &exec_context));
+  return filtered.record_batch();
+}
+
 inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, Expression filter,
                                              MemoryPool* pool) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
-        compute::ExecContext exec_context{pool};
-        ARROW_ASSIGN_OR_RAISE(Datum mask,
-                              ExecuteScalarExpression(filter, Datum(in), &exec_context));
-
-        if (mask.is_scalar()) {
-          const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
-          if (mask_scalar.is_valid && mask_scalar.value) {
-            return std::move(in);
-          }
-          return in->Slice(0, 0);
-        }
-
-        ARROW_ASSIGN_OR_RAISE(
-            Datum filtered,
-            compute::Filter(in, mask, compute::FilterOptions::Defaults(), &exec_context));
-        return filtered.record_batch();
+        return FilterSingleBatch(in, filter, pool);
       },
       std::move(it));
 }
 
+inline Result<std::shared_ptr<RecordBatch>> ProjectSingleBatch(
+    const std::shared_ptr<RecordBatch>& in, const Expression& projection,
+    MemoryPool* pool) {
+  compute::ExecContext exec_context{pool};
+  ARROW_ASSIGN_OR_RAISE(Datum projected,
+                        ExecuteScalarExpression(projection, Datum(in), &exec_context));
+
+  DCHECK_EQ(projected.type()->id(), Type::STRUCT);
+  if (projected.shape() == ValueDescr::SCALAR) {
+    // Only virtual columns are projected. Broadcast to an array
+    ARROW_ASSIGN_OR_RAISE(projected,
+                          MakeArrayFromScalar(*projected.scalar(), in->num_rows(), pool));
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto out,
+                        RecordBatch::FromStructArray(projected.array_as<StructArray>()));
+
+  return out->ReplaceSchemaMetadata(in->schema()->metadata());
+}
+
 inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
                                               Expression projection, MemoryPool* pool) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
-        compute::ExecContext exec_context{pool};
-        ARROW_ASSIGN_OR_RAISE(Datum projected, ExecuteScalarExpression(
-                                                   projection, Datum(in), &exec_context));
-
-        DCHECK_EQ(projected.type()->id(), Type::STRUCT);
-        if (projected.shape() == ValueDescr::SCALAR) {
-          // Only virtual columns are projected. Broadcast to an array
-          ARROW_ASSIGN_OR_RAISE(
-              projected, MakeArrayFromScalar(*projected.scalar(), in->num_rows(), pool));
-        }
-
-        ARROW_ASSIGN_OR_RAISE(
-            auto out, RecordBatch::FromStructArray(projected.array_as<StructArray>()));
-
-        return out->ReplaceSchemaMetadata(in->schema()->metadata());
+        return ProjectSingleBatch(in, projection, pool);
       },
       std::move(it));
 }
@@ -108,6 +119,59 @@ class FilterAndProjectScanTask : public ScanTask {
                               options_->pool);
   }
 
+  Result<RecordBatchIterator> ToFilteredAndProjectedIterator(
+      const RecordBatchVector& rbs) {
+    auto it = MakeVectorIterator(rbs);
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+                          SimplifyWithGuarantee(options()->filter, partition_));
+
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+                          SimplifyWithGuarantee(options()->projection, partition_));
+
+    RecordBatchIterator filter_it =
+        FilterRecordBatch(std::move(it), simplified_filter, options_->pool);
+
+    return ProjectRecordBatch(std::move(filter_it), simplified_projection,
+                              options_->pool);
+  }
+
+  Result<std::shared_ptr<RecordBatch>> FilterAndProjectBatch(
+      const std::shared_ptr<RecordBatch>& batch) {
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+                          SimplifyWithGuarantee(options()->filter, partition_));
+
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+                          SimplifyWithGuarantee(options()->projection, partition_));
+    ARROW_ASSIGN_OR_RAISE(auto filtered,
+                          FilterSingleBatch(batch, simplified_filter, options_->pool));
+    return ProjectSingleBatch(filtered, simplified_projection, options_->pool);
+  }
+
+  inline Future<RecordBatchVector> SafeExecute(internal::Executor* executor) override {
+    return task_->SafeExecute(executor).Then(
+        // This should only be run via SerialExecutor so it should be safe to capture
+        // `this`
+        [this](const RecordBatchVector& rbs) -> Result<RecordBatchVector> {
+          ARROW_ASSIGN_OR_RAISE(auto projected_it, ToFilteredAndProjectedIterator(rbs));
+          return projected_it.ToVector();
+        });
+  }
+
+  inline Future<> SafeVisit(
+      internal::Executor* executor,
+      std::function<Status(std::shared_ptr<RecordBatch>)> visitor) override {
+    auto filter_and_project_visitor =
+        [this, visitor](const std::shared_ptr<RecordBatch>& batch) {
+          ARROW_ASSIGN_OR_RAISE(auto projected, FilterAndProjectBatch(batch));
+          return visitor(projected);
+        };
+    return task_->SafeExecute(executor).Then(
+        [this, filter_and_project_visitor](const RecordBatchVector& rbs) -> Status {
+          ARROW_ASSIGN_OR_RAISE(auto projected_it, ToFilteredAndProjectedIterator(rbs));
+          return projected_it.Visit(filter_and_project_visitor);
+        });
+  }
+
  private:
   std::shared_ptr<ScanTask> task_;
   Expression partition_;

From 5de2fe4fd18794cd0bcbe51be891f7d55285f1ed Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Tue, 27 Apr 2021 08:30:39 -0700
Subject: [PATCH 133/719] ARROW-7948: [Go] Decimal128 Integration fix

Had to redesign a bit of the arrjson handling to do this properly, but I think it's better this way anyways.

Closes #10116 from zeroshade/decimal_integration

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 dev/archery/archery/integration/datagen.py |   1 -
 go/arrow/decimal128/decimal128.go          |  51 +++
 go/arrow/decimal128/decimal128_test.go     |  93 ++++-
 go/arrow/go.mod                            |   5 +-
 go/arrow/go.sum                            |   9 -
 go/arrow/internal/arrjson/arrjson.go       | 410 ++++++++++++++-------
 go/arrow/internal/arrjson/arrjson_test.go  |  93 ++++-
 7 files changed, 488 insertions(+), 174 deletions(-)

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 35ab289cc33..ec4969ede3c 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1527,7 +1527,6 @@ def _temp_path():
         .skip_category('Go'),  # TODO(ARROW-7901)
 
         generate_decimal128_case()
-        .skip_category('Go')  # TODO(ARROW-7948): Decimal + Go
         .skip_category('Rust'),
 
         generate_decimal256_case()
diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
index 2f1b181772d..a0921ce1007 100644
--- a/go/arrow/decimal128/decimal128.go
+++ b/go/arrow/decimal128/decimal128.go
@@ -16,6 +16,10 @@
 
 package decimal128 // import "github.com/apache/arrow/go/arrow/decimal128"
 
+import (
+	"math/big"
+)
+
 var (
 	MaxDecimal128 = New(542101086242752217, 687399551400673280-1)
 )
@@ -54,6 +58,38 @@ func FromI64(v int64) Num {
 	}
 }
 
+// FromBigInt will convert a big.Int to a Num, if the value in v has a
+// BitLen > 128, this will panic.
+func FromBigInt(v *big.Int) (n Num) {
+	if v.BitLen() > 128 {
+		panic("arrow/decimal128: cannot represent value larger than 128bits")
+	}
+
+	// if the value is negative, then get the high and low bytes from
+	// v, and then negate it. this is because Num uses a two's compliment
+	// representation of values and big.Int stores the value as a bool for
+	// the sign and the absolute value of the integer. This means that the
+	// raw bytes are *always* the absolute value.
+	b := v.Bits()
+	n.lo = uint64(b[0])
+	if len(b) > 1 {
+		n.hi = int64(v.Bits()[1])
+	}
+	if v.Sign() < 0 {
+		return n.negated()
+	}
+	return
+}
+
+func (n Num) negated() Num {
+	n.lo = ^n.lo + 1
+	n.hi = ^n.hi
+	if n.lo == 0 {
+		n.hi += 1
+	}
+	return n
+}
+
 // LowBits returns the low bits of the two's complement representation of the number.
 func (n Num) LowBits() uint64 { return n.lo }
 
@@ -71,3 +107,18 @@ func (n Num) Sign() int {
 	}
 	return int(1 | (n.hi >> 63))
 }
+
+func toBigIntPositive(n Num) *big.Int {
+	return (&big.Int{}).SetBits([]big.Word{big.Word(n.lo), big.Word(n.hi)})
+}
+
+// while the code would be simpler to just do lsh/rsh and add
+// it turns out from benchmarking that calling SetBits passing
+// in the words and negating ends up being >2x faster
+func (n Num) BigInt() *big.Int {
+	if n.Sign() < 0 {
+		b := toBigIntPositive(n.negated())
+		return b.Neg(b)
+	}
+	return toBigIntPositive(n)
+}
diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
index cf4ebd4cd1e..5a4fa8ab6a4 100644
--- a/go/arrow/decimal128/decimal128_test.go
+++ b/go/arrow/decimal128/decimal128_test.go
@@ -14,29 +14,32 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package decimal128 // import "github.com/apache/arrow/go/arrow/decimal128"
+package decimal128_test // import "github.com/apache/arrow/go/arrow/decimal128"
 
 import (
 	"fmt"
 	"math"
 	"math/big"
 	"testing"
+
+	"github.com/apache/arrow/go/arrow/decimal128"
+	"github.com/stretchr/testify/assert"
 )
 
 func TestFromU64(t *testing.T) {
 	for _, tc := range []struct {
 		v    uint64
-		want Num
+		want decimal128.Num
 		sign int
 	}{
-		{0, Num{0, 0}, 0},
-		{1, Num{1, 0}, +1},
-		{2, Num{2, 0}, +1},
-		{math.MaxInt64, Num{math.MaxInt64, 0}, +1},
-		{math.MaxUint64, Num{math.MaxUint64, 0}, +1},
+		{0, decimal128.New(0, 0), 0},
+		{1, decimal128.New(0, 1), +1},
+		{2, decimal128.New(0, 2), +1},
+		{math.MaxInt64, decimal128.New(0, math.MaxInt64), +1},
+		{math.MaxUint64, decimal128.New(0, math.MaxUint64), +1},
 	} {
 		t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) {
-			v := FromU64(tc.v)
+			v := decimal128.FromU64(tc.v)
 			ref := new(big.Int).SetUint64(tc.v)
 			if got, want := v, tc.want; got != want {
 				t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref)
@@ -47,10 +50,10 @@ func TestFromU64(t *testing.T) {
 			if got, want := v.Sign(), ref.Sign(); got != want {
 				t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want)
 			}
-			if got, want := v.LowBits(), tc.want.lo; got != want {
+			if got, want := v.LowBits(), tc.want.LowBits(); got != want {
 				t.Fatalf("invalid low-bits: got=%+0#x, want=%+0#x", got, want)
 			}
-			if got, want := v.HighBits(), tc.want.hi; got != want {
+			if got, want := v.HighBits(), tc.want.HighBits(); got != want {
 				t.Fatalf("invalid high-bits: got=%+0#x, want=%+0#x", got, want)
 			}
 		})
@@ -60,17 +63,17 @@ func TestFromU64(t *testing.T) {
 func TestFromI64(t *testing.T) {
 	for _, tc := range []struct {
 		v    int64
-		want Num
+		want decimal128.Num
 		sign int
 	}{
-		{0, Num{0, 0}, 0},
-		{1, Num{1, 0}, 1},
-		{2, Num{2, 0}, 1},
-		{math.MaxInt64, Num{math.MaxInt64, 0}, 1},
-		{math.MinInt64, Num{u64Cnv(math.MinInt64), -1}, -1},
+		{0, decimal128.New(0, 0), 0},
+		{1, decimal128.New(0, 1), 1},
+		{2, decimal128.New(0, 2), 1},
+		{math.MaxInt64, decimal128.New(0, math.MaxInt64), 1},
+		{math.MinInt64, decimal128.New(-1, u64Cnv(math.MinInt64)), -1},
 	} {
 		t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) {
-			v := FromI64(tc.v)
+			v := decimal128.FromI64(tc.v)
 			ref := big.NewInt(tc.v)
 			if got, want := v, tc.want; got != want {
 				t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref)
@@ -81,10 +84,10 @@ func TestFromI64(t *testing.T) {
 			if got, want := v.Sign(), ref.Sign(); got != want {
 				t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want)
 			}
-			if got, want := v.LowBits(), tc.want.lo; got != want {
+			if got, want := v.LowBits(), tc.want.LowBits(); got != want {
 				t.Fatalf("invalid low-bits: got=%+0#x, want=%+0#x", got, want)
 			}
-			if got, want := v.HighBits(), tc.want.hi; got != want {
+			if got, want := v.HighBits(), tc.want.HighBits(); got != want {
 				t.Fatalf("invalid high-bits: got=%+0#x, want=%+0#x", got, want)
 			}
 		})
@@ -92,3 +95,55 @@ func TestFromI64(t *testing.T) {
 }
 
 func u64Cnv(i int64) uint64 { return uint64(i) }
+
+func BenchmarkBigIntToDecimal(b *testing.B) {
+	var (
+		n     decimal128.Num
+		bi, _ = (&big.Int{}).SetString("-340282366920938463463374607431711455", 10)
+	)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		n = decimal128.FromBigInt(bi)
+		if n.Sign() >= 0 {
+			b.FailNow()
+		}
+	}
+}
+
+func BenchmarkDecimalToBigInt(b *testing.B) {
+	var (
+		bi *big.Int
+		n  = decimal128.New(-18446744073709552, 7083549724304524577)
+	)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		bi = n.BigInt()
+		if bi.Sign() >= 0 {
+			b.FailNow()
+		}
+	}
+}
+
+func TestDecimalToBigInt(t *testing.T) {
+	tests := []struct {
+		hi  int64
+		lo  uint64
+		exp string
+	}{
+		{-18446744073709552, 7083549724304524577, "-340282366920938463463374607431711455"},
+		{1, 4611686018427387904, "23058430092136939520"},
+	}
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+			n := decimal128.New(tc.hi, tc.lo)
+			bi := n.BigInt()
+
+			assert.Equal(t, tc.exp, bi.String())
+			n2 := decimal128.FromBigInt(bi)
+			assert.Equal(t, n.LowBits(), n2.LowBits())
+			assert.Equal(t, n.HighBits(), n2.HighBits())
+		})
+	}
+}
diff --git a/go/arrow/go.mod b/go/arrow/go.mod
index 5e7915fa194..229a0fbb459 100644
--- a/go/arrow/go.mod
+++ b/go/arrow/go.mod
@@ -20,12 +20,11 @@ go 1.12
 
 require (
 	github.com/davecgh/go-spew v1.1.0 // indirect
-	github.com/frankban/quicktest v1.11.3 // indirect
 	github.com/golang/protobuf v1.4.2
 	github.com/google/flatbuffers v1.11.0
+	github.com/google/go-cmp v0.5.4 // indirect
 	github.com/klauspost/compress v1.11.13
-	github.com/pierrec/lz4 v2.6.0+incompatible
-	github.com/pierrec/lz4/v4 v4.1.4 // indirect
+	github.com/pierrec/lz4/v4 v4.1.4
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/stretchr/testify v1.2.0
 	golang.org/x/net v0.0.0-20200904194848-62affa334b73 // indirect
diff --git a/go/arrow/go.sum b/go/arrow/go.sum
index 5743321c526..33ae1ba599e 100644
--- a/go/arrow/go.sum
+++ b/go/arrow/go.sum
@@ -9,8 +9,6 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
-github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -35,13 +33,6 @@ github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4=
 github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/pierrec/lz4 v2.6.0+incompatible h1:Ix9yFKn1nSPBLFl/yZknTp8TU5G4Ps0JDmguYK6iH1A=
-github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
 github.com/pierrec/lz4/v4 v4.1.4 h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE=
 github.com/pierrec/lz4/v4 v4.1.4/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index a9f44859a76..149ecc7e77b 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -21,247 +21,346 @@ package arrjson // import "github.com/apache/arrow/go/arrow/internal/arrjson"
 import (
 	"encoding/hex"
 	"encoding/json"
+	"math/big"
 	"strconv"
 	"strings"
 
 	"github.com/apache/arrow/go/arrow"
 	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/decimal128"
 	"github.com/apache/arrow/go/arrow/float16"
 	"github.com/apache/arrow/go/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
-const (
-	kData         = "DATA"
-	kDays         = "days"
-	kDayTime      = "DAY_TIME"
-	kDuration     = "duration"
-	kMilliseconds = "milliseconds"
-	kYearMonth    = "YEAR_MONTH"
-)
-
 type Schema struct {
-	Fields []Field `json:"fields"`
-}
-
-type Field struct {
-	Name     string   `json:"name"`
-	Type     dataType `json:"type"`
-	Nullable bool     `json:"nullable"`
-	Children []Field  `json:"children"`
+	Fields []FieldWrapper `json:"fields"`
 }
 
-type dataType struct {
-	Name      string `json:"name"`
-	Signed    bool   `json:"isSigned,omitempty"`
-	BitWidth  int    `json:"bitWidth,omitempty"`
-	Precision string `json:"precision,omitempty"`
-	ByteWidth int    `json:"byteWidth,omitempty"`
-	ListSize  int32  `json:"listSize,omitempty"`
-	Unit      string `json:"unit,omitempty"`
-	TimeZone  string `json:"timezone,omitempty"`
-	Scale     int    `json:"scale,omitempty"` // for Decimal128
+// FieldWrapper gets used in order to hook into the JSON marshalling and
+// unmarshalling without creating an infinite loop when dealing with the
+// children fields.
+type FieldWrapper struct {
+	Field
 }
 
-func dtypeToJSON(dt arrow.DataType) dataType {
-	switch dt := dt.(type) {
+type Field struct {
+	Name string `json:"name"`
+	// the arrowType will get populated during unmarshalling by processing the
+	// Type, and will be used to generate the Type during Marshalling to JSON
+	arrowType arrow.DataType `json:"-"`
+	// leave this as a json RawMessage in order to partially unmarshal as needed
+	// during marshal/unmarshal time so we can determine what the structure is
+	// actually expected to be.
+	Type     json.RawMessage `json:"type"`
+	Nullable bool            `json:"nullable"`
+	Children []FieldWrapper  `json:"children"`
+}
+
+func (f FieldWrapper) MarshalJSON() ([]byte, error) {
+	var typ interface{}
+	switch dt := f.arrowType.(type) {
 	case *arrow.NullType:
-		return dataType{Name: "null"}
+		typ = nameJSON{"null"}
 	case *arrow.BooleanType:
-		return dataType{Name: "bool"}
+		typ = nameJSON{"bool"}
 	case *arrow.Int8Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 8}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 8}
 	case *arrow.Int16Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 16}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 16}
 	case *arrow.Int32Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 32}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 32}
 	case *arrow.Int64Type:
-		return dataType{Name: "int", Signed: true, BitWidth: 64}
+		typ = bitWidthJSON{Name: "int", Signed: true, BitWidth: 64}
 	case *arrow.Uint8Type:
-		return dataType{Name: "int", BitWidth: 8}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 8}
 	case *arrow.Uint16Type:
-		return dataType{Name: "int", BitWidth: 16}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 16}
 	case *arrow.Uint32Type:
-		return dataType{Name: "int", BitWidth: 32}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 32}
 	case *arrow.Uint64Type:
-		return dataType{Name: "int", BitWidth: 64}
+		typ = bitWidthJSON{Name: "int", Signed: false, BitWidth: 64}
 	case *arrow.Float16Type:
-		return dataType{Name: "floatingpoint", Precision: "HALF"}
+		typ = floatJSON{"floatingpoint", "HALF"}
 	case *arrow.Float32Type:
-		return dataType{Name: "floatingpoint", Precision: "SINGLE"}
+		typ = floatJSON{"floatingpoint", "SINGLE"}
 	case *arrow.Float64Type:
-		return dataType{Name: "floatingpoint", Precision: "DOUBLE"}
+		typ = floatJSON{"floatingpoint", "DOUBLE"}
 	case *arrow.BinaryType:
-		return dataType{Name: "binary"}
+		typ = nameJSON{"binary"}
 	case *arrow.StringType:
-		return dataType{Name: "utf8"}
+		typ = nameJSON{"utf8"}
 	case *arrow.Date32Type:
-		return dataType{Name: "date", Unit: "DAY"}
+		typ = unitZoneJSON{Name: "date", Unit: "DAY"}
 	case *arrow.Date64Type:
-		return dataType{Name: "date", Unit: "MILLISECOND"}
-	case *arrow.Time32Type:
+		typ = unitZoneJSON{Name: "date", Unit: "MILLISECOND"}
+	case *arrow.MonthIntervalType:
+		typ = unitZoneJSON{Name: "interval", Unit: "YEAR_MONTH"}
+	case *arrow.DayTimeIntervalType:
+		typ = unitZoneJSON{Name: "interval", Unit: "DAY_TIME"}
+	case *arrow.DurationType:
 		switch dt.Unit {
 		case arrow.Second:
-			return dataType{Name: "time", Unit: "SECOND", BitWidth: dt.BitWidth()}
+			typ = unitZoneJSON{Name: "duration", Unit: "SECOND"}
 		case arrow.Millisecond:
-			return dataType{Name: "time", Unit: "MILLISECOND", BitWidth: dt.BitWidth()}
-		}
-	case *arrow.Time64Type:
-		switch dt.Unit {
+			typ = unitZoneJSON{Name: "duration", Unit: "MILLISECOND"}
 		case arrow.Microsecond:
-			return dataType{Name: "time", Unit: "MICROSECOND", BitWidth: dt.BitWidth()}
+			typ = unitZoneJSON{Name: "duration", Unit: "MICROSECOND"}
 		case arrow.Nanosecond:
-			return dataType{Name: "time", Unit: "NANOSECOND", BitWidth: dt.BitWidth()}
+			typ = unitZoneJSON{Name: "duration", Unit: "NANOSECOND"}
 		}
-	case *arrow.TimestampType:
+	case *arrow.Time32Type:
 		switch dt.Unit {
 		case arrow.Second:
-			return dataType{Name: "timestamp", Unit: "SECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "SECOND"}
 		case arrow.Millisecond:
-			return dataType{Name: "timestamp", Unit: "MILLISECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "MILLISECOND"}
+		}
+	case *arrow.Time64Type:
+		switch dt.Unit {
 		case arrow.Microsecond:
-			return dataType{Name: "timestamp", Unit: "MICROSECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "MICROSECOND"}
 		case arrow.Nanosecond:
-			return dataType{Name: "timestamp", Unit: "NANOSECOND", TimeZone: dt.TimeZone}
+			typ = bitWidthJSON{Name: "time", BitWidth: dt.BitWidth(), Unit: "NANOSECOND"}
 		}
-	case *arrow.MonthIntervalType:
-		return dataType{Name: "interval", Unit: "YEAR_MONTH"}
-	case *arrow.DayTimeIntervalType:
-		return dataType{Name: "interval", Unit: "DAY_TIME"}
-	case *arrow.DurationType:
+	case *arrow.TimestampType:
 		switch dt.Unit {
 		case arrow.Second:
-			return dataType{Name: "duration", Unit: "SECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "SECOND", TimeZone: dt.TimeZone}
 		case arrow.Millisecond:
-			return dataType{Name: "duration", Unit: "MILLISECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "MILLISECOND", TimeZone: dt.TimeZone}
 		case arrow.Microsecond:
-			return dataType{Name: "duration", Unit: "MICROSECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "MICROSECOND", TimeZone: dt.TimeZone}
 		case arrow.Nanosecond:
-			return dataType{Name: "duration", Unit: "NANOSECOND"}
+			typ = unitZoneJSON{Name: "timestamp", Unit: "NANOSECOND", TimeZone: dt.TimeZone}
 		}
-
 	case *arrow.ListType:
-		return dataType{Name: "list"}
+		typ = nameJSON{"list"}
 	case *arrow.StructType:
-		return dataType{Name: "struct"}
+		typ = nameJSON{"struct"}
 	case *arrow.FixedSizeListType:
-		return dataType{Name: "fixedsizelist", ListSize: dt.Len()}
+		typ = listSizeJSON{"fixedsizelist", dt.Len()}
 	case *arrow.FixedSizeBinaryType:
-		return dataType{
-			Name:      "fixedsizebinary",
-			ByteWidth: dt.ByteWidth,
-		}
+		typ = byteWidthJSON{"fixedsizebinary", dt.ByteWidth}
+	case *arrow.Decimal128Type:
+		typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision)}
+	default:
+		return nil, xerrors.Errorf("unknown arrow.DataType %v", f.arrowType)
+	}
+
+	var err error
+	if f.Type, err = json.Marshal(typ); err != nil {
+		return nil, err
 	}
-	panic(xerrors.Errorf("unknown arrow.DataType %v", dt))
+	return json.Marshal(f.Field)
 }
 
-func dtypeFromJSON(dt dataType, children []Field) arrow.DataType {
-	switch dt.Name {
+func (f *FieldWrapper) UnmarshalJSON(data []byte) error {
+	if err := json.Unmarshal(data, &f.Field); err != nil {
+		return err
+	}
+
+	tmp := nameJSON{}
+	if err := json.Unmarshal(f.Type, &tmp); err != nil {
+		return err
+	}
+
+	switch tmp.Name {
 	case "null":
-		return arrow.Null
+		f.arrowType = arrow.Null
 	case "bool":
-		return arrow.FixedWidthTypes.Boolean
+		f.arrowType = arrow.FixedWidthTypes.Boolean
 	case "int":
-		switch dt.Signed {
+		t := bitWidthJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Signed {
 		case true:
-			switch dt.BitWidth {
+			switch t.BitWidth {
 			case 8:
-				return arrow.PrimitiveTypes.Int8
+				f.arrowType = arrow.PrimitiveTypes.Int8
 			case 16:
-				return arrow.PrimitiveTypes.Int16
+				f.arrowType = arrow.PrimitiveTypes.Int16
 			case 32:
-				return arrow.PrimitiveTypes.Int32
+				f.arrowType = arrow.PrimitiveTypes.Int32
 			case 64:
-				return arrow.PrimitiveTypes.Int64
+				f.arrowType = arrow.PrimitiveTypes.Int64
 			}
 		default:
-			switch dt.BitWidth {
+			switch t.BitWidth {
 			case 8:
-				return arrow.PrimitiveTypes.Uint8
+				f.arrowType = arrow.PrimitiveTypes.Uint8
 			case 16:
-				return arrow.PrimitiveTypes.Uint16
+				f.arrowType = arrow.PrimitiveTypes.Uint16
 			case 32:
-				return arrow.PrimitiveTypes.Uint32
+				f.arrowType = arrow.PrimitiveTypes.Uint32
 			case 64:
-				return arrow.PrimitiveTypes.Uint64
+				f.arrowType = arrow.PrimitiveTypes.Uint64
 			}
 		}
 	case "floatingpoint":
-		switch dt.Precision {
+		t := floatJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Precision {
 		case "HALF":
-			return arrow.FixedWidthTypes.Float16
+			f.arrowType = arrow.FixedWidthTypes.Float16
 		case "SINGLE":
-			return arrow.PrimitiveTypes.Float32
+			f.arrowType = arrow.PrimitiveTypes.Float32
 		case "DOUBLE":
-			return arrow.PrimitiveTypes.Float64
+			f.arrowType = arrow.PrimitiveTypes.Float64
 		}
 	case "binary":
-		return arrow.BinaryTypes.Binary
+		f.arrowType = arrow.BinaryTypes.Binary
 	case "utf8":
-		return arrow.BinaryTypes.String
+		f.arrowType = arrow.BinaryTypes.String
 	case "date":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Unit {
 		case "DAY":
-			return arrow.FixedWidthTypes.Date32
+			f.arrowType = arrow.FixedWidthTypes.Date32
 		case "MILLISECOND":
-			return arrow.FixedWidthTypes.Date64
+			f.arrowType = arrow.FixedWidthTypes.Date64
 		}
 	case "time":
-		switch dt.BitWidth {
+		t := bitWidthJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.BitWidth {
 		case 32:
-			switch dt.Unit {
+			switch t.Unit {
 			case "SECOND":
-				return arrow.FixedWidthTypes.Time32s
+				f.arrowType = arrow.FixedWidthTypes.Time32s
 			case "MILLISECOND":
-				return arrow.FixedWidthTypes.Time32ms
+				f.arrowType = arrow.FixedWidthTypes.Time32ms
 			}
 		case 64:
-			switch dt.Unit {
+			switch t.Unit {
 			case "MICROSECOND":
-				return arrow.FixedWidthTypes.Time64us
+				f.arrowType = arrow.FixedWidthTypes.Time64us
 			case "NANOSECOND":
-				return arrow.FixedWidthTypes.Time64ns
+				f.arrowType = arrow.FixedWidthTypes.Time64ns
 			}
 		}
 	case "timestamp":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = &arrow.TimestampType{TimeZone: t.TimeZone}
+		switch t.Unit {
 		case "SECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Second}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Second
 		case "MILLISECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Millisecond}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Millisecond
 		case "MICROSECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Microsecond}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Microsecond
 		case "NANOSECOND":
-			return &arrow.TimestampType{TimeZone: dt.TimeZone, Unit: arrow.Nanosecond}
+			f.arrowType.(*arrow.TimestampType).Unit = arrow.Nanosecond
 		}
 	case "list":
-		return arrow.ListOf(dtypeFromJSON(children[0].Type, nil))
+		f.arrowType = arrow.ListOf(f.Children[0].arrowType)
 	case "struct":
-		return arrow.StructOf(fieldsFromJSON(children)...)
+		f.arrowType = arrow.StructOf(fieldsFromJSON(f.Children)...)
 	case "fixedsizebinary":
-		return &arrow.FixedSizeBinaryType{ByteWidth: dt.ByteWidth}
+		t := byteWidthJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = &arrow.FixedSizeBinaryType{ByteWidth: t.ByteWidth}
 	case "fixedsizelist":
-		return arrow.FixedSizeListOf(dt.ListSize, dtypeFromJSON(children[0].Type, nil))
+		t := listSizeJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = arrow.FixedSizeListOf(t.ListSize, f.Children[0].arrowType)
 	case "interval":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Unit {
 		case "YEAR_MONTH":
-			return arrow.FixedWidthTypes.MonthInterval
+			f.arrowType = arrow.FixedWidthTypes.MonthInterval
 		case "DAY_TIME":
-			return arrow.FixedWidthTypes.DayTimeInterval
+			f.arrowType = arrow.FixedWidthTypes.DayTimeInterval
 		}
 	case "duration":
-		switch dt.Unit {
+		t := unitZoneJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		switch t.Unit {
 		case "SECOND":
-			return arrow.FixedWidthTypes.Duration_s
+			f.arrowType = arrow.FixedWidthTypes.Duration_s
 		case "MILLISECOND":
-			return arrow.FixedWidthTypes.Duration_ms
+			f.arrowType = arrow.FixedWidthTypes.Duration_ms
 		case "MICROSECOND":
-			return arrow.FixedWidthTypes.Duration_us
+			f.arrowType = arrow.FixedWidthTypes.Duration_us
 		case "NANOSECOND":
-			return arrow.FixedWidthTypes.Duration_ns
+			f.arrowType = arrow.FixedWidthTypes.Duration_ns
 		}
+	case "decimal":
+		t := decimalJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		f.arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)}
+	}
+	if f.arrowType != nil {
+		return nil
 	}
-	panic(xerrors.Errorf("unknown DataType %#v", dt))
+	return xerrors.Errorf("unhandled type unmarshalling from json: %s", tmp.Name)
+}
+
+// the structs below represent various configurations of the Type
+// json block and what fields will be expected. Sometimes there is
+// overlap between the same key used with different types, so it's
+// easier to partial unmarshal and then use these to ensure correct
+// typing.
+
+type nameJSON struct {
+	Name string `json:"name"`
+}
+
+type listSizeJSON struct {
+	Name     string `json:"name"`
+	ListSize int32  `json:"listSize,omitempty"`
+}
+
+type bitWidthJSON struct {
+	Name     string `json:"name"`
+	Signed   bool   `json:"isSigned,omitempty"`
+	BitWidth int    `json:"bitWidth,omitempty"`
+	Unit     string `json:"unit,omitempty"`
+}
+
+type floatJSON struct {
+	Name      string `json:"name"`
+	Precision string `json:"precision,omitempty"`
+}
+
+type unitZoneJSON struct {
+	Name     string `json:"name"`
+	Unit     string `json:"unit,omitempty"`
+	TimeZone string `json:"timezone,omitempty"`
+}
+
+type decimalJSON struct {
+	Name      string `json:"name"`
+	Scale     int    `json:"scale,omitempty"`
+	Precision int    `json:"precision,omitempty"`
+}
+
+type byteWidthJSON struct {
+	Name      string `json:"name"`
+	ByteWidth int    `json:"byteWidth,omitempty"`
 }
 
 func schemaToJSON(schema *arrow.Schema) Schema {
@@ -274,15 +373,15 @@ func schemaFromJSON(schema Schema) *arrow.Schema {
 	return arrow.NewSchema(fieldsFromJSON(schema.Fields), nil)
 }
 
-func fieldsToJSON(fields []arrow.Field) []Field {
-	o := make([]Field, len(fields))
+func fieldsToJSON(fields []arrow.Field) []FieldWrapper {
+	o := make([]FieldWrapper, len(fields))
 	for i, f := range fields {
-		o[i] = Field{
-			Name:     f.Name,
-			Type:     dtypeToJSON(f.Type),
-			Nullable: f.Nullable,
-			Children: []Field{},
-		}
+		o[i] = FieldWrapper{Field{
+			Name:      f.Name,
+			arrowType: f.Type,
+			Nullable:  f.Nullable,
+			Children:  []FieldWrapper{},
+		}}
 		switch dt := f.Type.(type) {
 		case *arrow.ListType:
 			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable}})
@@ -295,10 +394,10 @@ func fieldsToJSON(fields []arrow.Field) []Field {
 	return o
 }
 
-func fieldsFromJSON(fields []Field) []arrow.Field {
+func fieldsFromJSON(fields []FieldWrapper) []arrow.Field {
 	vs := make([]arrow.Field, len(fields))
 	for i, v := range fields {
-		vs[i] = fieldFromJSON(v)
+		vs[i] = fieldFromJSON(v.Field)
 	}
 	return vs
 }
@@ -306,7 +405,7 @@ func fieldsFromJSON(fields []Field) []arrow.Field {
 func fieldFromJSON(f Field) arrow.Field {
 	return arrow.Field{
 		Name:     f.Name,
-		Type:     dtypeFromJSON(f.Type, f.Children),
+		Type:     f.arrowType,
 		Nullable: f.Nullable,
 	}
 }
@@ -617,6 +716,14 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) array.Int
 		bldr.AppendValues(data, valids)
 		return bldr.NewArray()
 
+	case *arrow.Decimal128Type:
+		bldr := array.NewDecimal128Builder(mem, dt)
+		defer bldr.Release()
+		data := decimal128FromJSON(arr.Data)
+		valids := validsFromJSON(arr.Valids)
+		bldr.AppendValues(data, valids)
+		return bldr.NewArray()
+
 	default:
 		panic(xerrors.Errorf("unknown data type %v %T", dt, dt))
 	}
@@ -858,6 +965,14 @@ func arrayToJSON(field arrow.Field, arr array.Interface) Array {
 			Valids: validsToJSON(arr),
 		}
 
+	case *array.Decimal128:
+		return Array{
+			Name:   field.Name,
+			Count:  arr.Len(),
+			Data:   decimal128ToJSON(arr),
+			Valids: validsToJSON(arr),
+		}
+
 	default:
 		panic(xerrors.Errorf("unknown array type %T", arr))
 	}
@@ -1128,6 +1243,27 @@ func f64ToJSON(arr *array.Float64) []interface{} {
 	return o
 }
 
+func decimal128ToJSON(arr *array.Decimal128) []interface{} {
+	o := make([]interface{}, arr.Len())
+	for i := range o {
+		o[i] = arr.Value(i).BigInt().String()
+	}
+	return o
+}
+
+func decimal128FromJSON(vs []interface{}) []decimal128.Num {
+	var tmp big.Int
+	o := make([]decimal128.Num, len(vs))
+	for i, v := range vs {
+		if err := tmp.UnmarshalJSON([]byte(v.(string))); err != nil {
+			panic(xerrors.Errorf("could not convert %v (%T) to decimal128: %w", v, v, err))
+		}
+
+		o[i] = decimal128.FromBigInt(&tmp)
+	}
+	return o
+}
+
 func strFromJSON(vs []interface{}) []string {
 	o := make([]string, len(vs))
 	for i, v := range vs {
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 173db12b401..3f29a540871 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -49,9 +49,6 @@ func TestReadWrite(t *testing.T) {
 
 	for name, recs := range arrdata.Records {
 		t.Run(name, func(t *testing.T) {
-			if name == "decimal128" {
-				t.Skip() // FIXME(sbinet): implement full decimal128 support
-			}
 			mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
 			defer mem.AssertSize(t, 0)
 
@@ -3100,5 +3097,91 @@ func makeDurationsWantJSONs() string {
 }
 
 func makeDecimal128sWantJSONs() string {
-	return `` // FIXME(fredgan): implement full decimal128 JSON support
-}
\ No newline at end of file
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "dec128s",
+        "type": {
+          "name": "decimal",
+          "scale": 1,
+          "precision": 10
+        },
+        "nullable": true,
+        "children": []
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "dec128s",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            "571849066284996100127",
+            "590295810358705651744",
+            "608742554432415203361",
+            "627189298506124754978",
+            "645636042579834306595"
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "dec128s",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            "756316507022091616297",
+            "774763251095801167914",
+            "793209995169510719531",
+            "811656739243220271148",
+            "830103483316929822765"
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "dec128s",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            "940783947759187132467",
+            "959230691832896684084",
+            "977677435906606235701",
+            "996124179980315787318",
+            "1014570924054025338935"
+          ]
+        }
+      ]
+    }
+  ]
+}`
+}

From aa28470ca2a901a80e88874b4341905a3188186f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 27 Apr 2021 17:35:40 +0200
Subject: [PATCH 134/719] [Release][Minor] Bump development versions to
 5.0.0-SNAPSHOT (#10165)

* [Release] Update versions for 5.0.0-SNAPSHOT

* [Release] Update .deb package names for 5.0.0
---
 c_glib/meson.build                            |   2 +-
 ci/scripts/PKGBUILD                           |   2 +-
 cpp/CMakeLists.txt                            |   2 +-
 cpp/vcpkg.json                                |   2 +-
 csharp/Directory.Build.props                  |   2 +-
 dev/release/rat_exclude_files.txt             |  30 +-
 dev/tasks/homebrew-formulae/apache-arrow.rb   |   2 +-
 .../autobrew/apache-arrow.rb                  |   2 +-
 .../apache-arrow/debian/control.in            | 102 +++---
 ....install => libarrow-cuda-glib500.install} |   0
 ...da400.install => libarrow-cuda500.install} |   0
 ...stall => libarrow-dataset-glib500.install} |   0
 ...00.install => libarrow-dataset500.install} |   0
 ...400.install => libarrow-flight500.install} |   0
 ...ib400.install => libarrow-glib500.install} |   0
 ...tall => libarrow-python-flight500.install} |   0
 ...400.install => libarrow-python500.install} |   0
 ...ibarrow400.install => libarrow500.install} |   0
 ...400.install => libgandiva-glib500.install} |   0
 ...ndiva400.install => libgandiva500.install} |   0
 ...400.install => libparquet-glib500.install} |   0
 ...rquet400.install => libparquet500.install} |   0
 ...b400.install => libplasma-glib500.install} |   0
 ...plasma400.install => libplasma500.install} |   0
 dev/tasks/tasks.yml                           | 336 +++++++++---------
 java/adapter/avro/pom.xml                     |   2 +-
 java/adapter/jdbc/pom.xml                     |   2 +-
 java/adapter/orc/pom.xml                      |   2 +-
 java/algorithm/pom.xml                        |   2 +-
 java/compression/pom.xml                      |   2 +-
 java/dataset/pom.xml                          |   2 +-
 java/flight/flight-core/pom.xml               |   2 +-
 java/flight/flight-grpc/pom.xml               |   2 +-
 java/format/pom.xml                           |   2 +-
 java/gandiva/pom.xml                          |   2 +-
 java/memory/memory-core/pom.xml               |   2 +-
 java/memory/memory-netty/pom.xml              |   2 +-
 java/memory/memory-unsafe/pom.xml             |   2 +-
 java/memory/pom.xml                           |   2 +-
 java/performance/pom.xml                      |   4 +-
 java/plasma/pom.xml                           |   2 +-
 java/pom.xml                                  |   2 +-
 java/tools/pom.xml                            |   2 +-
 java/vector/pom.xml                           |   2 +-
 js/package.json                               |   2 +-
 matlab/CMakeLists.txt                         |   2 +-
 python/setup.py                               |   2 +-
 r/DESCRIPTION                                 |   2 +-
 r/NEWS.md                                     |   2 +
 ruby/red-arrow-cuda/lib/arrow-cuda/version.rb |   2 +-
 .../lib/arrow-dataset/version.rb              |   2 +-
 ruby/red-arrow/lib/arrow/version.rb           |   2 +-
 ruby/red-gandiva/lib/gandiva/version.rb       |   2 +-
 ruby/red-parquet/lib/parquet/version.rb       |   2 +-
 ruby/red-plasma/lib/plasma/version.rb         |   2 +-
 rust/arrow-flight/Cargo.toml                  |   4 +-
 .../Cargo.toml                                |   4 +-
 rust/arrow/Cargo.toml                         |   2 +-
 rust/benchmarks/Cargo.toml                    |   2 +-
 rust/datafusion-examples/Cargo.toml           |   2 +-
 rust/datafusion/Cargo.toml                    |   6 +-
 rust/datafusion/README.md                     |   2 +-
 rust/integration-testing/Cargo.toml           |   2 +-
 rust/parquet/Cargo.toml                       |   6 +-
 rust/parquet/README.md                        |   4 +-
 rust/parquet_derive/Cargo.toml                |   4 +-
 rust/parquet_derive/README.md                 |   4 +-
 rust/parquet_derive_test/Cargo.toml           |   6 +-
 68 files changed, 297 insertions(+), 295 deletions(-)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda-glib400.install => libarrow-cuda-glib500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda400.install => libarrow-cuda500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset-glib400.install => libarrow-dataset-glib500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset400.install => libarrow-dataset500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight400.install => libarrow-flight500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-glib400.install => libarrow-glib500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-python-flight400.install => libarrow-python-flight500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-python400.install => libarrow-python500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow400.install => libarrow500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva-glib400.install => libgandiva-glib500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva400.install => libgandiva500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet-glib400.install => libparquet-glib500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet400.install => libparquet500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libplasma-glib400.install => libplasma-glib500.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libplasma400.install => libplasma500.install} (100%)

diff --git a/c_glib/meson.build b/c_glib/meson.build
index 4ac407e97d2..bd1f3a2eca7 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -23,7 +23,7 @@ project('arrow-glib', 'c', 'cpp',
           'cpp_std=c++11',
         ])
 
-version = '4.0.0-SNAPSHOT'
+version = '5.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index c5b55eef42a..366a3c3cc17 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=3.0.0.9000
+pkgver=4.0.0.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 4e88c32dcd6..a6946403deb 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -47,7 +47,7 @@ if(POLICY CMP0074)
   cmake_policy(SET CMP0074 NEW)
 endif()
 
-set(ARROW_VERSION "4.0.0-SNAPSHOT")
+set(ARROW_VERSION "5.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 282677aea7f..c8b573e9e1a 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "4.0.0-SNAPSHOT",
+  "version-string": "5.0.0-SNAPSHOT",
   "dependencies": [
     "abseil",
     {
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index 3ee2af71538..b610a768358 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -29,7 +29,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>4.0.0-SNAPSHOT</Version>
+    <Version>5.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 68f5668098e..a50d729a7d5 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -68,46 +68,46 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow500.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib500.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva500.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib500.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet500.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install
-dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib500.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma500.install
 dev/tasks/linux-packages/apache-arrow/debian/patches/series
 dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
 dev/tasks/linux-packages/apache-arrow/debian/rules
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index 953f1eea1c4..62ae516fed6 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -1,7 +1,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-4.0.0-SNAPSHOT/apache-arrow-4.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-5.0.0-SNAPSHOT/apache-arrow-5.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git"
diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
index 351d7764603..2a8f73ea1ef 100644
--- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
@@ -19,7 +19,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-3.0.0.9000/apache-arrow-3.0.0.9000.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-4.0.0.9000/apache-arrow-4.0.0.9000.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   head "https://github.com/apache/arrow.git"
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index f50b09e6043..d9e08a20452 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -35,7 +35,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
 Homepage: https://arrow.apache.org/
 
-Package: libarrow400
+Package: libarrow500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -47,7 +47,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files.
 
-Package: libarrow-cuda400
+Package: libarrow-cuda500
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -55,12 +55,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-dataset400
+Package: libarrow-dataset500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -68,13 +68,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version}),
-  libparquet400 (= ${binary:Version})
+  libarrow500 (= ${binary:Version}),
+  libparquet500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Dataset module.
 
-Package: libarrow-flight400
+Package: libarrow-flight500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -82,12 +82,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-python400
+Package: libarrow-python500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -95,14 +95,14 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version}),
+  libarrow500 (= ${binary:Version}),
   python3,
   python3-numpy
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Python support.
 
-Package: libarrow-python-flight400
+Package: libarrow-python-flight500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -110,8 +110,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight400 (= ${binary:Version}),
-  libarrow-python400 (= ${binary:Version})
+  libarrow-flight500 (= ${binary:Version}),
+  libarrow-python500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight and Python support.
@@ -122,7 +122,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow400 (= ${binary:Version}),
+  libarrow500 (= ${binary:Version}),
   libbrotli-dev,
   libbz2-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev,
@@ -145,7 +145,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda400 (= ${binary:Version})
+  libarrow-cuda500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -157,7 +157,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-dataset400 (= ${binary:Version}),
+  libarrow-dataset500 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -170,7 +170,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight400 (= ${binary:Version})
+  libarrow-flight500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight RPC system.
@@ -182,7 +182,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-python400 (= ${binary:Version})
+  libarrow-python500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Python support.
@@ -195,12 +195,12 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-python-dev (= ${binary:Version}),
-  libarrow-python-flight400 (= ${binary:Version})
+  libarrow-python-flight500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight and Python support.
 
-Package: libgandiva400
+Package: libgandiva500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -208,7 +208,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow500 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -221,13 +221,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva400 (= ${binary:Version})
+  libgandiva500 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libplasma400
+Package: libplasma500
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -235,7 +235,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda400 (= ${binary:Version})
+  libarrow-cuda500 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ library files to connect plasma-store-server.
@@ -247,7 +247,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libplasma400 (= ${binary:Version})
+  libplasma500 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides plasma-store-server.
@@ -259,12 +259,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
-  libplasma400 (= ${binary:Version})
+  libplasma500 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ header files.
 
-Package: libparquet400
+Package: libparquet500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -283,12 +283,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet400 (= ${binary:Version})
+  libparquet500 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib400
+Package: libarrow-glib500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -296,7 +296,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow400 (= ${binary:Version})
+  libarrow500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -320,7 +320,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib400 (= ${binary:Version}),
+  libarrow-glib500 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -338,7 +338,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib400
+Package: libarrow-cuda-glib500
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -346,8 +346,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libarrow-cuda400 (= ${binary:Version})
+  libarrow-glib500 (= ${binary:Version}),
+  libarrow-cuda500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -371,13 +371,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib400 (= ${binary:Version}),
+  libarrow-cuda-glib500 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libarrow-dataset-glib400
+Package: libarrow-dataset-glib500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -385,8 +385,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libarrow-dataset400 (= ${binary:Version})
+  libarrow-glib500 (= ${binary:Version}),
+  libarrow-dataset500 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for dataset module.
@@ -410,7 +410,7 @@ Depends:
   ${misc:Depends},
   libarrow-dataset-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-dataset-glib400 (= ${binary:Version}),
+  libarrow-dataset-glib500 (= ${binary:Version}),
   gir1.2-arrow-dataset-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -427,7 +427,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
-Package: libgandiva-glib400
+Package: libgandiva-glib500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -435,8 +435,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libgandiva400 (= ${binary:Version})
+  libarrow-glib500 (= ${binary:Version}),
+  libgandiva500 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -462,7 +462,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib400 (= ${binary:Version}),
+  libgandiva-glib500 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -481,7 +481,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libplasma-glib400
+Package: libplasma-glib500
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -489,8 +489,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda-glib400 (= ${binary:Version}),
-  libplasma400 (= ${binary:Version})
+  libarrow-cuda-glib500 (= ${binary:Version}),
+  libplasma500 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides GLib based library files to connect plasma-store-server.
@@ -514,7 +514,7 @@ Depends:
   ${misc:Depends},
   libplasma-dev (= ${binary:Version}),
   libarrow-cuda-glib-dev (= ${binary:Version}),
-  libplasma-glib400 (= ${binary:Version}),
+  libplasma-glib500 (= ${binary:Version}),
   gir1.2-plasma-1.0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
@@ -531,7 +531,7 @@ Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides documentations.
 
-Package: libparquet-glib400
+Package: libparquet-glib500
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -539,8 +539,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib400 (= ${binary:Version}),
-  libparquet400 (= ${binary:Version})
+  libarrow-glib500 (= ${binary:Version}),
+  libparquet500 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -564,7 +564,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib400 (= ${binary:Version}),
+  libparquet-glib500 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-python500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib500.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma500.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma500.install
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 4bf0abfa50c..cc860e068da 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -463,50 +463,50 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
 
@@ -534,36 +534,36 @@ tasks:
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
 
   debian-bullseye-amd64:
     ci: github
@@ -593,50 +593,50 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
 
@@ -664,36 +664,36 @@ tasks:
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
 
   ubuntu-bionic-amd64:
     ci: github
@@ -725,36 +725,36 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
 
@@ -781,27 +781,27 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
 
   ubuntu-focal-amd64:
     ci: github
@@ -831,36 +831,36 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
 
@@ -887,27 +887,27 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
 
   ubuntu-groovy-amd64:
     ci: github
@@ -937,36 +937,36 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
 
@@ -993,27 +993,27 @@ tasks:
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib400_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet400_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
 
   centos-7-amd64:
     ci: github
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index d2b242f93cf..9a424d01200 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -16,7 +16,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index b75135fd8b2..d080aedd532 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>5.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 3ec5f36e75b..6e30d3f0209 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -87,7 +87,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>5.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 3ad44d449df..7d140d95e2e 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-algorithm</artifactId>
   <name>Arrow Algorithms</name>
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index dc0a9586539..c8fc4efc9c4 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-compression</artifactId>
   <name>Arrow Compression</name>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index c4246a89090..d4fea9f0efe 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>5.0.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 1fc87411f37..00ab5141f70 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml
index 8399642ad1f..1c9b66e021a 100644
--- a/java/flight/flight-grpc/pom.xml
+++ b/java/flight/flight-grpc/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-java-root</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 4d6599e0bfa..5c9fc4217fe 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 <parent>
   <artifactId>arrow-java-root</artifactId>
   <groupId>org.apache.arrow</groupId>
-  <version>4.0.0-SNAPSHOT</version>
+  <version>5.0.0-SNAPSHOT</version>
 </parent>
 
 <artifactId>arrow-format</artifactId>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 85343888762..227172a59ca 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -14,7 +14,7 @@
     <parent>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-java-root</artifactId>
-      <version>4.0.0-SNAPSHOT</version>
+      <version>5.0.0-SNAPSHOT</version>
     </parent>
 
     <groupId>org.apache.arrow.gandiva</groupId>
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index b9f4514aed7..dc4c2703306 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index d427f572377..0ea36359c8d 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml
index c1b9af36570..12d6d3a8ce2 100644
--- a/java/memory/memory-unsafe/pom.xml
+++ b/java/memory/memory-unsafe/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 814e3da4417..3882805759f 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <name>Arrow Memory</name>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index dffe7f2cbd2..d9828988b28 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>5.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-performance</artifactId>
     <packaging>jar</packaging>
@@ -86,7 +86,7 @@
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-algorithm</artifactId>
-            <version>4.0.0-SNAPSHOT</version>
+            <version>5.0.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml
index 7e298d70439..ab19e3ff7f9 100644
--- a/java/plasma/pom.xml
+++ b/java/plasma/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>5.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-plasma</artifactId>
     <name>Arrow Plasma Client</name>
diff --git a/java/pom.xml b/java/pom.xml
index 89be67f775f..9fa10bb5745 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>4.0.0-SNAPSHOT</version>
+  <version>5.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 375d2435154..fafa6fa34e2 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
+        <version>5.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-tools</artifactId>
     <name>Arrow Tools</name>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index ed22e9b94cd..2a610d177a2 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>5.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
diff --git a/js/package.json b/js/package.json
index 5873e33234d..976df640046 100644
--- a/js/package.json
+++ b/js/package.json
@@ -108,5 +108,5 @@
   "engines": {
     "node": ">=11.12"
   },
-  "version": "4.0.0-SNAPSHOT"
+  "version": "5.0.0-SNAPSHOT"
 }
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index fb80670b1fd..3c03e6791ee 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -18,7 +18,7 @@
 cmake_minimum_required(VERSION 3.2)
 set(CMAKE_CXX_STANDARD 11)
 
-set(MLARROW_VERSION "4.0.0-SNAPSHOT")
+set(MLARROW_VERSION "5.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
diff --git a/python/setup.py b/python/setup.py
index a2abb050177..5c66ff5d859 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -519,7 +519,7 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
 
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
-default_version = '4.0.0-SNAPSHOT'
+default_version = '5.0.0-SNAPSHOT'
 if (not os.path.exists('../.git') and
         not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
     if os.path.exists('PKG-INFO'):
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 7a63b9e7ebc..7f88320fb3d 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 3.0.0.9000
+Version: 4.0.0.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index c064ac749fe..71b69bb69c4 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,6 +17,8 @@
   under the License.
 -->
 
+# arrow 4.0.0.9000
+
 # arrow 3.0.0.9000
 
 ## dplyr methods
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index d4031e51793..1853aa7cf9b 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "5.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
index 0f1e3541aa5..390ce96fa2d 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowDataset
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "5.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index 052d586ed4f..0c88191f662 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "5.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index 535df8b8c0d..1b45d165bb9 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "5.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index b309873b7be..8d9d134b33c 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "5.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb
index 991dd193937..23a4babd91c 100644
--- a/ruby/red-plasma/lib/plasma/version.rb
+++ b/ruby/red-plasma/lib/plasma/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Plasma
-  VERSION = "4.0.0-SNAPSHOT"
+  VERSION = "5.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/rust/arrow-flight/Cargo.toml b/rust/arrow-flight/Cargo.toml
index c607b562cec..de6aa832315 100644
--- a/rust/arrow-flight/Cargo.toml
+++ b/rust/arrow-flight/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-flight"
 description = "Apache Arrow Flight"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 edition = "2018"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
 homepage = "https://github.com/apache/arrow"
@@ -26,7 +26,7 @@ repository = "https://github.com/apache/arrow"
 license = "Apache-2.0"
 
 [dependencies]
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT" }
+arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
 tonic = "0.4"
 bytes = "1"
 prost = "0.7"
diff --git a/rust/arrow-pyarrow-integration-testing/Cargo.toml b/rust/arrow-pyarrow-integration-testing/Cargo.toml
index ef356b0902e..f95458dbcb5 100644
--- a/rust/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/rust/arrow-pyarrow-integration-testing/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-pyarrow-integration-testing"
 description = ""
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -31,7 +31,7 @@ name = "arrow_pyarrow_integration_testing"
 crate-type = ["cdylib"]
 
 [dependencies]
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT" }
+arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
 pyo3 = { version = "0.12.1", features = ["extension-module"] }
 
 [package.metadata.maturin]
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 5ab1f8cc02b..ac3b72e57b0 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "arrow"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 description = "Rust implementation of Apache Arrow"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
diff --git a/rust/benchmarks/Cargo.toml b/rust/benchmarks/Cargo.toml
index 5cdf0f94ac3..2d64482a09d 100644
--- a/rust/benchmarks/Cargo.toml
+++ b/rust/benchmarks/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-benchmarks"
 description = "Apache Arrow Benchmarks"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 edition = "2018"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
 homepage = "https://github.com/apache/arrow"
diff --git a/rust/datafusion-examples/Cargo.toml b/rust/datafusion-examples/Cargo.toml
index c86e7ccbe3c..673bfe2b87b 100644
--- a/rust/datafusion-examples/Cargo.toml
+++ b/rust/datafusion-examples/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion-examples"
 description = "DataFusion usage examples"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml
index fd1c1b29590..6f46f5da1e5 100644
--- a/rust/datafusion/Cargo.toml
+++ b/rust/datafusion/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion"
 description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -50,8 +50,8 @@ unicode_expressions = ["unicode-segmentation"]
 [dependencies]
 ahash = "0.7"
 hashbrown = "0.11"
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT", features = ["prettyprint"] }
-parquet = { path = "../parquet", version = "4.0.0-SNAPSHOT", features = ["arrow"] }
+arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", features = ["prettyprint"] }
+parquet = { path = "../parquet", version = "5.0.0-SNAPSHOT", features = ["arrow"] }
 sqlparser = "0.9.0"
 clap = "2.33"
 rustyline = {version = "7.0", optional = true}
diff --git a/rust/datafusion/README.md b/rust/datafusion/README.md
index e5849b84ca7..c9a2562998a 100644
--- a/rust/datafusion/README.md
+++ b/rust/datafusion/README.md
@@ -127,7 +127,7 @@ To get started, add the following to your `Cargo.toml` file:
 
 ```toml
 [dependencies]
-datafusion = "4.0.0-SNAPSHOT"
+datafusion = "5.0.0-SNAPSHOT"
 ```
 
 ## Using DataFusion as a binary
diff --git a/rust/integration-testing/Cargo.toml b/rust/integration-testing/Cargo.toml
index 12564c74f14..9c170457e35 100644
--- a/rust/integration-testing/Cargo.toml
+++ b/rust/integration-testing/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-integration-testing"
 description = "Binaries used in the Arrow integration tests"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
index e171196fbc8..b0c91555de0 100644
--- a/rust/parquet/Cargo.toml
+++ b/rust/parquet/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Apache Parquet implementation in Rust"
 homepage = "https://github.com/apache/arrow"
@@ -41,7 +41,7 @@ lz4 = { version = "1.23", optional = true }
 zstd = { version = "0.7", optional = true }
 chrono = "0.4"
 num-bigint = "0.3"
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT", optional = true }
+arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", optional = true }
 base64 = { version = "0.12", optional = true }
 clap = { version = "2.33.3", optional = true }
 serde_json = { version = "1.0", features = ["preserve_order"], optional = true }
@@ -54,7 +54,7 @@ brotli = "3.3"
 flate2 = "1.0"
 lz4 = "1.23"
 zstd = "0.7"
-arrow = { path = "../arrow", version = "4.0.0-SNAPSHOT" }
+arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
 serde_json = { version = "1.0", features = ["preserve_order"] }
 
 [features]
diff --git a/rust/parquet/README.md b/rust/parquet/README.md
index 6abbbc7aaee..836a23bbc12 100644
--- a/rust/parquet/README.md
+++ b/rust/parquet/README.md
@@ -23,7 +23,7 @@
 Add this to your Cargo.toml:
 ```toml
 [dependencies]
-parquet = "4.0.0-SNAPSHOT"
+parquet = "5.0.0-SNAPSHOT"
 ```
 
 and this to your crate root:
@@ -44,7 +44,7 @@ while let Some(record) = iter.next() {
     println!("{}", record);
 }
 ```
-See [crate documentation](https://docs.rs/crate/parquet/4.0.0-SNAPSHOT) on available API.
+See [crate documentation](https://docs.rs/crate/parquet/5.0.0-SNAPSHOT) on available API.
 
 ## Upgrading from versions prior to 4.0
 
diff --git a/rust/parquet_derive/Cargo.toml b/rust/parquet_derive/Cargo.toml
index 6bed07d7a6b..4fbda38c450 100644
--- a/rust/parquet_derive/Cargo.toml
+++ b/rust/parquet_derive/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet_derive"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Derive macros for the Rust implementation of Apache Parquet"
 homepage = "https://github.com/apache/arrow"
@@ -39,4 +39,4 @@ uuid = []
 proc-macro2 = "1.0"
 quote = "1.0"
 syn = { version = "1.0", features = ["full", "extra-traits"] }
-parquet = { path = "../parquet", version = "4.0.0-SNAPSHOT" }
+parquet = { path = "../parquet", version = "5.0.0-SNAPSHOT" }
diff --git a/rust/parquet_derive/README.md b/rust/parquet_derive/README.md
index 47ac8ae056d..2e9e2a04419 100644
--- a/rust/parquet_derive/README.md
+++ b/rust/parquet_derive/README.md
@@ -30,8 +30,8 @@ Derive also has some support for the chrono time library. You must must enable t
 Add this to your Cargo.toml:
 ```toml
 [dependencies]
-parquet = "4.0.0-SNAPSHOT"
-parquet_derive = "4.0.0-SNAPSHOT"
+parquet = "5.0.0-SNAPSHOT"
+parquet_derive = "5.0.0-SNAPSHOT"
 ```
 
 and this to your crate root:
diff --git a/rust/parquet_derive_test/Cargo.toml b/rust/parquet_derive_test/Cargo.toml
index 5914d7ef828..84f88c58d46 100644
--- a/rust/parquet_derive_test/Cargo.toml
+++ b/rust/parquet_derive_test/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet_derive_test"
-version = "4.0.0-SNAPSHOT"
+version = "5.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Integration test package for parquet-derive"
 homepage = "https://github.com/apache/arrow"
@@ -28,5 +28,5 @@ edition = "2018"
 publish = false
 
 [dependencies]
-parquet = { path = "../parquet", version = "4.0.0-SNAPSHOT" }
-parquet_derive = { path = "../parquet_derive", version = "4.0.0-SNAPSHOT" }
+parquet = { path = "../parquet", version = "5.0.0-SNAPSHOT" }
+parquet_derive = { path = "../parquet_derive", version = "5.0.0-SNAPSHOT" }

From 70755a17fcce9bb690b837e72b6c2ba98975684d Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 27 Apr 2021 18:21:31 +0200
Subject: [PATCH 135/719] ARROW-12506: [Python] Improve modularity of pyarrow
 codebase: _feather module

Closes #10131 from amol-/ARROW-12506

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/CMakeLists.txt                        |  1 +
 python/pyarrow/{feather.pxi => _feather.pyx} | 13 ++++++
 python/pyarrow/feather.py                    | 12 +++--
 python/pyarrow/includes/libarrow.pxd         | 24 ----------
 python/pyarrow/includes/libarrow_feather.pxd | 49 ++++++++++++++++++++
 python/pyarrow/lib.pyx                       |  3 --
 python/setup.py                              |  1 +
 7 files changed, 71 insertions(+), 32 deletions(-)
 rename python/pyarrow/{feather.pxi => _feather.pyx} (87%)
 create mode 100644 python/pyarrow/includes/libarrow_feather.pxd

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 0714aa412e1..3058431f0f3 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -390,6 +390,7 @@ set(CYTHON_EXTENSIONS
     _fs
     _compute
     _csv
+    _feather
     _json)
 
 set(LINK_LIBS arrow_shared arrow_python_shared)
diff --git a/python/pyarrow/feather.pxi b/python/pyarrow/_feather.pyx
similarity index 87%
rename from python/pyarrow/feather.pxi
rename to python/pyarrow/_feather.pyx
index a608035b56f..3939dd5e818 100644
--- a/python/pyarrow/feather.pxi
+++ b/python/pyarrow/_feather.pyx
@@ -18,6 +18,19 @@
 # ---------------------------------------------------------------------
 # Implement Feather file format
 
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level=3
+
+from cython.operator cimport dereference as deref
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_feather cimport *
+from pyarrow.lib cimport (check_status, Table, _Weakrefable,
+                          get_writer, get_reader, pyarrow_wrap_table)
+from pyarrow.lib import tobytes
+
 
 class FeatherError(Exception):
     pass
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 025c2330df5..b184e2bf0a0 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -19,9 +19,11 @@
 import os
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
-from pyarrow.lib import (Codec, FeatherError, Table,  # noqa
+from pyarrow.lib import (Codec, Table,  # noqa
                          concat_tables, schema)
 import pyarrow.lib as ext
+from pyarrow import _feather
+from pyarrow._feather import FeatherError  # noqa: F401
 from pyarrow.vendored.version import Version
 
 
@@ -180,9 +182,9 @@ def write_feather(df, dest, compression=None, compression_level=None,
                                                 _FEATHER_SUPPORTED_CODECS))
 
     try:
-        ext.write_feather(table, dest, compression=compression,
-                          compression_level=compression_level,
-                          chunksize=chunksize, version=version)
+        _feather.write_feather(table, dest, compression=compression,
+                               compression_level=compression_level,
+                               chunksize=chunksize, version=version)
     except Exception:
         if isinstance(dest, str):
             try:
@@ -234,7 +236,7 @@ def read_table(source, columns=None, memory_map=True):
     -------
     table : pyarrow.Table
     """
-    reader = ext.FeatherReader()
+    reader = _feather.FeatherReader()
     reader.open(source, use_memory_map=memory_map)
 
     if columns is None:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 45f7c4fee94..747808709b4 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1558,30 +1558,6 @@ cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
             const CIpcWriteOptions& options,
             CIpcPayload* out)
 
-    int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
-    int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
-
-    cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
-        int version
-        int chunksize
-        CCompressionType compression
-        int compression_level
-
-    CStatus WriteFeather" arrow::ipc::feather::WriteTable"\
-        (const CTable& table, COutputStream* out,
-         CFeatherProperties properties)
-
-    cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
-        @staticmethod
-        CResult[shared_ptr[CFeatherReader]] Open(
-            const shared_ptr[CRandomAccessFile]& file)
-        int version()
-        shared_ptr[CSchema] schema()
-
-        CStatus Read(shared_ptr[CTable]* out)
-        CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
-        CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)
-
 
 cdef extern from 'arrow/util/value_parsing.h' namespace 'arrow' nogil:
     cdef cppclass CTimestampParser" arrow::TimestampParser":
diff --git a/python/pyarrow/includes/libarrow_feather.pxd b/python/pyarrow/includes/libarrow_feather.pxd
new file mode 100644
index 00000000000..ddfc8b2e53a
--- /dev/null
+++ b/python/pyarrow/includes/libarrow_feather.pxd
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.libarrow cimport (CCompressionType, CStatus, CTable,
+                                        COutputStream, CResult, shared_ptr,
+                                        vector, CRandomAccessFile, CSchema,
+                                        c_string)
+
+
+cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
+    int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
+    int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
+
+    cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
+        int version
+        int chunksize
+        CCompressionType compression
+        int compression_level
+
+    CStatus WriteFeather" arrow::ipc::feather::WriteTable" \
+        (const CTable& table, COutputStream* out,
+         CFeatherProperties properties)
+
+    cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
+        @staticmethod
+        CResult[shared_ptr[CFeatherReader]] Open(
+            const shared_ptr[CRandomAccessFile]& file)
+        int version()
+        shared_ptr[CSchema] schema()
+
+        CStatus Read(shared_ptr[CTable]* out)
+        CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
+        CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index aa6918b54e3..1866d071121 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -145,9 +145,6 @@ include "io-hdfs.pxi"
 # IPC / Messaging
 include "ipc.pxi"
 
-# Feather format
-include "feather.pxi"
-
 # Python serialization
 include "serialization.pxi"
 
diff --git a/python/setup.py b/python/setup.py
index 5c66ff5d859..b4de5799d66 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -197,6 +197,7 @@ def initialize_options(self):
         '_cuda',
         '_flight',
         '_dataset',
+        '_feather',
         '_parquet',
         '_orc',
         '_plasma',

From 795a48f53b85d74f110f1b8c0cad72c1f96ba255 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Wed, 28 Apr 2021 05:46:26 +0900
Subject: [PATCH 136/719] ARROW-12507: [CI] Remove duplicated cron/nightly
 builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10130 from kszucs/remove-gha-cron

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp_cron.yml    |  66 ------
 .github/workflows/python_cron.yml | 142 -------------
 dev/tasks/tasks.yml               | 321 ++++++++----------------------
 3 files changed, 87 insertions(+), 442 deletions(-)
 delete mode 100644 .github/workflows/python_cron.yml

diff --git a/.github/workflows/cpp_cron.yml b/.github/workflows/cpp_cron.yml
index cd3c6aebaab..c031e5961cb 100644
--- a/.github/workflows/cpp_cron.yml
+++ b/.github/workflows/cpp_cron.yml
@@ -36,72 +36,6 @@ env:
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
 
 jobs:
-  docker:
-    name: ${{ matrix.title }}
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        name:
-          - amd64-debian-10-cpp
-          - amd64-fedora-33-cpp
-          - amd64-ubuntu-18.04-cpp
-          - amd64-ubuntu-20.04-cpp
-        include:
-          - name: amd64-debian-10-cpp
-            image: debian-cpp
-            title: AMD64 Debian 10 C++
-            debian: 10
-          - name: amd64-fedora-33-cpp
-            image: fedora-cpp
-            title: AMD64 Fedora 33 C++
-            fedora: 33
-          - name: amd64-ubuntu-18.04-cpp
-            image: ubuntu-cpp
-            title: AMD64 Ubuntu 18.04 C++
-            ubuntu: 18.04
-          - name: amd64-ubuntu-20.04-cpp
-            image: ubuntu-cpp
-            title: AMD64 Ubuntu 20.04 C++
-            ubuntu: 20.04
-    env:
-      # the defaults here should correspond to the values in .env
-      ARCH: 'amd64'
-      DEBIAN: ${{ matrix.debian || 10 }}
-      FEDORA: ${{ matrix.fedora || 33 }}
-      UBUNTU: ${{ matrix.ubuntu || 18.04 }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        run: ci/scripts/util_cleanup.sh
-      - name: Cache Docker Volumes
-        uses: actions/cache@v2
-        with:
-          path: .docker
-          key: ${{ matrix.name }}-${{ hashFiles('cpp/**') }}
-          restore-keys: ${{ matrix.name }}-
-      - name: Setup Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
-          archery docker run ${{ matrix.image }}
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        run: archery docker push ${{ matrix.image }}
 
   oss-fuzz:
     name: OSS-Fuzz build check
diff --git a/.github/workflows/python_cron.yml b/.github/workflows/python_cron.yml
deleted file mode 100644
index a60abddfd3f..00000000000
--- a/.github/workflows/python_cron.yml
+++ /dev/null
@@ -1,142 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Python Cron
-
-on:
-  push:
-    paths:
-      - '.github/workflows/python_cron.yml'
-  pull_request:
-    paths:
-      - '.github/workflows/python_cron.yml'
-  schedule:
-    - cron: |
-        0 */12 * * *
-
-env:
-  DOCKER_VOLUME_PREFIX: ".docker/"
-  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-
-jobs:
-
-  docker:
-    name: ${{ matrix.title }}
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') && github.repository == 'apache/arrow' }}
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        name:
-          - debian-10-python-3
-          - fedora-33-python-3
-          - ubuntu-18.04-python-3
-          - conda-python-3.7-dask-latest
-          - conda-python-3.7-turbodbc-latest
-          - conda-python-3.7-kartothek-latest
-          - conda-python-3.7-pandas-0.24
-          - conda-python-3.7-pandas-master
-          - conda-python-3.7-hdfs-2.9.2
-        include:
-          - name: debian-10-python-3
-            cache: debian-10-python-3
-            image: debian-python
-            title: AMD64 Debian 10 Python 3
-            debian: 10
-          - name: fedora-33-python-3
-            cache: fedora-33-python-3
-            image: fedora-python
-            title: AMD64 Fedora 33 Python 3
-            fedora: 33
-          - name: ubuntu-18.04-python-3
-            cache: ubuntu-18.04-python-3
-            image: ubuntu-python
-            title: AMD64 Ubuntu 18.04 Python 3
-            ubuntu: 18.04
-          - name: conda-python-3.7-dask-latest
-            cache: conda-python-3.7
-            image: conda-python-dask
-            title: AMD64 Conda Python 3.7 Dask latest
-            dask: latest
-          - name: conda-python-3.7-turbodbc-latest
-            cache: conda-python-3.7
-            image: conda-python-turbodbc
-            title: AMD64 Conda Python 3.7 Turbodbc latest
-            turbodbc: latest
-          - name: conda-python-3.7-kartothek-latest
-            cache: conda-python-3.7
-            image: conda-python-kartothek
-            title: AMD64 Conda Python 3.7 Kartothek latest
-            kartothek: latest
-          - name: conda-python-3.7-pandas-0.24
-            cache: conda-python-3.7
-            image: conda-python-pandas
-            title: AMD64 Conda Python 3.7 Pandas 0.24
-            pandas: 0.24
-          - name: conda-python-3.7-pandas-master
-            cache: conda-python-3.7
-            image: --no-leaf-cache conda-python-pandas
-            title: AMD64 Conda Python 3.7 Pandas master
-            pandas: master
-          - name: conda-python-3.7-hdfs-2.9.2
-            cache: conda-python-3.7
-            image: conda-python-hdfs
-            title: AMD64 Conda Python 3.7 HDFS 2.9.2
-            hdfs: 2.9.2
-    env:
-      # the defaults here should correspond to the values in .env
-      DEBIAN: ${{ matrix.debian || 10 }}
-      FEDORA: ${{ matrix.fedora || 33 }}
-      UBUNTU: ${{ matrix.ubuntu || 18.04 }}
-      PYTHON: ${{ matrix.python || 3.7 }}
-      HDFS: ${{ matrix.hdfs || '2.9.2' }}
-      DASK: ${{ matrix.dask || 'latest' }}
-      TURBODBC: ${{ matrix.turbodbc || 'latest' }}
-      PANDAS: ${{ matrix.pandas || 'latest' }}
-      KARTOTHEK: ${{ matrix.kartothek || 'latest' }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        run: ci/scripts/util_checkout.sh
-      - name: Free Up Disk Space
-        run: ci/scripts/util_cleanup.sh
-      - name: Cache Docker Volumes
-        uses: actions/cache@v2
-        with:
-          path: .docker
-          key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
-          restore-keys: ${{ matrix.cache }}-
-      - name: Setup Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
-          archery docker run ${{ matrix.image }}
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        run: archery docker push ${{ matrix.image }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index cc860e068da..7c542536dbf 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -19,7 +19,7 @@ groups:
   # these groups are just for convenience
   # makes it easier to submit related tasks
 
-  ############################# Packaging tasks ###############################
+{############################# Packaging tasks ###############################}
 
   conda:
     - conda-*
@@ -57,7 +57,7 @@ groups:
     - python-sdist
     - nuget
 
-  ############################# Testing tasks #################################
+{############################# Testing tasks #################################}
 
   test:
     - test-*
@@ -118,7 +118,7 @@ groups:
   verify-rc-source-linux:
     - verify-rc-source-linux-*
 
-  ######################## Tasks to run regularly #############################
+{######################## Tasks to run regularly #############################}
 
   nightly:
     - debian-*
@@ -144,7 +144,7 @@ tasks:
   #              e.g.:
   #     - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2
 
-  ############################## Conda Linux ##################################
+{############################## Conda Linux ##################################}
 
   conda-clean:
     ci: azure
@@ -200,77 +200,31 @@ tasks:
       - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py36-cuda:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cuda.tar.bz2
+{% for python_version, numpy_version in [("3.6", "1.17"),
+                                         ("3.7", "1.17"),
+                                         ("3.8", "1.17"),
+                                         ("3.9", "1.19")] %}
+  {% set pyver = python_version | replace(".", "") %}
 
-  conda-linux-gcc-py37-cuda:
+  conda-linux-gcc-py{{ pyver }}-cuda:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
-      config: linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython
+      config: linux_64_cuda_compiler_version10.2numpy{{ numpy_version }}python{{ python_version }}.____cpython
     artifacts:
-      - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cuda.tar.bz2
+      - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
+      - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
 
-  conda-linux-gcc-py38-cuda:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cuda.tar.bz2
-
-  conda-linux-gcc-py39-cuda:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
-      - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cuda.tar.bz2
-
-  conda-linux-gcc-py36-arm64:
+  conda-linux-gcc-py{{ pyver }}-arm64:
     ci: drone
     template: conda-recipes/drone.yml
     params:
-      config: linux_aarch64_python3.6.____cpython
+      config: linux_aarch64_python{{ python_version }}.____cpython
     artifacts:
-      - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
+      - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
+      - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py37-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
-    params:
-      config: linux_aarch64_python3.7.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
-
-  conda-linux-gcc-py38-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
-    params:
-      config: linux_aarch64_python3.8.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py38(h[a-z0-9]+)_0_cpu.tar.bz2
-
-  conda-linux-gcc-py39-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
-    params:
-      config: linux_aarch64_python3.9.____cpython
-    artifacts:
-      - arrow-cpp-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
-      - pyarrow-{no_rc_version}-py39(h[a-z0-9]+)_0_cpu.tar.bz2
+{% endfor %}
 
   ############################## Conda OSX ####################################
 
@@ -433,7 +387,7 @@ tasks:
     artifacts:
       - pyarrow-{no_rc_version}.tar.gz
 
-  ############################## Linux PKGS ####################################
+{############################## Linux PKGS ####################################}
 
   debian-buster-amd64:
     ci: github
@@ -1233,14 +1187,14 @@ tasks:
         TEST_DEFAULT: 0
       artifact: "wheels"
 
-{% for target in ["csharp",
-                  "go",
-                  "integration",
-                  "java",
-                  "js",
-                  "python",
-                  "ruby",
-                  "rust"] %}
+  {% for target in ["csharp",
+                    "go",
+                    "integration",
+                    "java",
+                    "js",
+                    "python",
+                    "ruby",
+                    "rust"] %}
 
   verify-rc-source-{{ platform }}-{{ target }}:
     ci: github
@@ -1252,7 +1206,7 @@ tasks:
         TEST_{{ target|upper }}: 1
       artifact: "source"
 
-{% endfor %}
+  {% endfor %}
 
 {% endfor %}
 
@@ -1283,21 +1237,23 @@ tasks:
       run: {{ image }}
 {% endfor %}
 
-  test-debian-10-cpp:
+{% for ubuntu_version in ["18.04", "20.04"] %}
+  test-ubuntu-{{ ubuntu_version }}-cpp:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        DEBIAN: 10
-      run: debian-cpp
+        UBUNTU: {{ ubuntu_version }}
+      run: ubuntu-cpp
+{% endfor %}
 
-  test-ubuntu-18.04-cpp:
+  test-debian-10-cpp:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        UBUNTU: 18.04
-      run: ubuntu-cpp
+        DEBIAN: 10
+      run: debian-cpp
 
   test-fedora-33-cpp:
     ci: github
@@ -1323,14 +1279,6 @@ tasks:
         UBUNTU: 18.04
       run: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static ubuntu-cpp"
 
-  test-ubuntu-20.04-cpp:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        UBUNTU: 20.04
-      run: ubuntu-cpp
-
 {% for cpp_standard in [14, 17] %}
   test-ubuntu-20.04-cpp-{{ cpp_standard }}:
     ci: github
@@ -1420,37 +1368,20 @@ tasks:
     ci: github
     template: r/github.devdocs.yml
 
-  test-r-rhub-ubuntu-gcc-release:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rhub
-      r_image: ubuntu-gcc-release
-      r_tag: latest
-
-  test-r-rocker-r-base-latest:
+{% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"),
+                                 ("rocker", "r-base", "latest"),
+                                 ("rstudio", "r-base", "3.6-bionic"),
+                                 ("rstudio", "r-base", "3.6-centos8"),
+                                 ("rstudio", "r-base", "3.6-opensuse15"),
+                                 ("rstudio", "r-base", "3.6-opensuse42")] %}
+  test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}:
     ci: azure
     template: r/azure.linux.yml
     params:
       r_org: rocker
       r_image: r-base
       r_tag: latest
-
-  test-r-rstudio-r-base-3.6-bionic:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-bionic
-
-  test-r-rstudio-r-base-3.6-centos8:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-centos8
+{% endfor %}
 
   test-r-rstudio-r-base-3.6-centos7-devtoolset-8:
     ci: azure
@@ -1461,22 +1392,6 @@ tasks:
       r_tag: 3.6-centos7
       devtoolset_version: 8
 
-  test-r-rstudio-r-base-3.6-opensuse15:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-opensuse15
-
-  test-r-rstudio-r-base-3.6-opensuse42:
-    ci: azure
-    template: r/azure.linux.yml
-    params:
-      r_org: rstudio
-      r_image: r-base
-      r_tag: 3.6-opensuse42
-
   test-r-minimal-build:
     ci: azure
     template: r/azure.linux.yml
@@ -1535,151 +1450,94 @@ tasks:
 
   ############################## Integration tests ############################
 
-  test-conda-python-3.7-pandas-latest:
+{% for python_version, pandas_version, cache_leaf in [("3.6", "0.23", True),
+                                                      ("3.7", "0.24", True),
+                                                      ("3.7", "latest", False),
+                                                      ("3.8", "latest", False),
+                                                      ("3.8", "nightly", False),
+                                                      ("3.7", "master", False)] %}
+  test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        PYTHON: 3.7
-        PANDAS: latest
-      # use the latest pandas release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.8-pandas-latest:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-        PANDAS: latest
+        PYTHON: {{ python_version }}
+        PANDAS: {{ pandas_version }}
+    {% if cache_leaf %}
+      run: conda-python-pandas
+    {% else %}
       # use the latest pandas release, so prevent reusing any cached layers
       run: --no-leaf-cache conda-python-pandas
+    {% endif %}
+{% endfor %}
 
-  test-conda-python-3.8-pandas-nightly:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-        PANDAS: nightly
-        NUMPY: nightly
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.7-pandas-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        PANDAS: master
-      # use the master branch of pandas, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-pandas
-
-  test-conda-python-3.6-pandas-0.23:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.6
-        PANDAS: 0.23
-      run: conda-python-pandas
-
-  test-conda-python-3.7-dask-latest:
+{% for dask_version in ["latest", "master"] %}
+  test-conda-python-3.7-dask-{{ dask_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        DASK: latest
+        DASK: {{ dask_version }}
       # use the latest dask release, so prevent reusing any cached layers
       run: --no-leaf-cache conda-python-dask
+{% endfor %}
 
-  test-conda-python-3.8-dask-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-        DASK: master
-      # use the master branch of dask, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-dask
-
-  test-conda-python-3.8-jpype:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-      run: conda-python-jpype
-
-  test-conda-python-3.7-turbodbc-latest:
+{% for turbodbc_version in ["latest", "master"] %}
+  test-conda-python-3.7-turbodbc-{{ turbodbc_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        TURBODBC: latest
+        TURBODBC: {{ turbodbc_version }}
       # use the latest turbodbc release, so prevent reusing any cached layers
       run: --no-leaf-cache conda-python-turbodbc
+{% endfor %}
 
-  test-conda-python-3.7-turbodbc-master:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        TURBODBC: master
-      # use the master branch of dask, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-turbodbc
-
-  test-conda-python-3.7-kartothek-latest:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.7
-        KARTOTHEK: latest
-      run: --no-leaf-cache conda-python-kartothek
-
-  test-conda-python-3.7-kartothek-master:
+{% for kartothek_version in ["latest", "master"] %}
+  test-conda-python-3.7-kartothek-{{ kartothek_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        KARTOTHEK: master
-      # use the master branch of kartothek, so prevent reusing any layers
+        KARTOTHEK: {{ kartothek_version }}
       run: --no-leaf-cache conda-python-kartothek
+{% endfor %}
 
-  test-conda-python-3.7-hdfs-3.2:
+{% for hdfs_version in ["2.9.2", "3.2.1"] %}
+  test-conda-python-3.7-hdfs-{{ hdfs_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.7
-        HDFS: 3.2.1
+        HDFS: {{ hdfs_version }}
       run: conda-python-hdfs
+{% endfor %}
 
-  test-conda-python-3.7-spark-branch-3.0:
+{% for python_version, spark_version, test_pyarrow_only in [("3.7", "branch-3.0", "true"),
+                                                            ("3.8", "master", "false")] %}
+  test-conda-python-{{ python_version }}-spark-{{ spark_version }}:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        PYTHON: 3.7
-        SPARK: "branch-3.0"
-        TEST_PYARROW_ONLY: "true"
+        PYTHON: {{ python_version }}
+        SPARK: {{ spark_version }}
+        TEST_PYARROW_ONLY: {{ test_pyarrow_only }}
       # use the branch-3.0 of spark, so prevent reusing any layers
       run: --no-leaf-cache conda-python-spark
+{% endfor %}
 
-  test-conda-python-3.8-spark-master:
+  test-conda-python-3.8-jpype:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         PYTHON: 3.8
-        SPARK: master
-      # use the master branch of spark, so prevent reusing any layers
-      run: --no-leaf-cache conda-python-spark
+      run: conda-python-jpype
 
   # Remove the "skipped-" prefix in ARROW-8475
   skipped-test-conda-cpp-hiveserver2:
@@ -1688,16 +1546,11 @@ tasks:
     params:
       run: conda-cpp-hiveserver2
 
-  example-cpp-minimal-build-static:
-    ci: github
-    template: cpp-examples/github.linux.yml
-    params:
-      type: minimal_build
-      run: static
-
-  example-cpp-minimal-build-static-system-dependency:
+{% for kind in ["static", "static-system-dependency"] %}
+  example-cpp-minimal-build-{{ kind }}:
     ci: github
     template: cpp-examples/github.linux.yml
     params:
       type: minimal_build
-      run: static-system-dependency
+      run: {{ kind }}
+{% endfor %}

From b0e51c5c12439db56aa9051eeca81c41df1f052a Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Tue, 27 Apr 2021 17:05:16 -0400
Subject: [PATCH 137/719] ARROW-11950: [C++][Compute] Add unary negative kernel

This pull request adds the Negate arithmetic compute kernel for integral and floating-point types.  The NegateChecked version is not implemented for unsigned integral types and overflow behavior is consistent with equivalent Add/Subtract operations.  The Negate kernels are registered as "negate" and "negate_checked".

This PR also extends support for unary arithmetic compute kernels and tests.

@bkietz please review

Closes #10113 from edponce/ARROW-11950-Compute-Add-unary-negative-kernel

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   8 +
 cpp/src/arrow/compute/api_scalar.h            |  11 +
 .../compute/kernels/scalar_arithmetic.cc      | 118 ++++++++-
 .../compute/kernels/scalar_arithmetic_test.cc | 240 ++++++++++++++++++
 cpp/src/arrow/compute/kernels/test_util.cc    |   6 +
 cpp/src/arrow/compute/kernels/test_util.h     |   3 +
 cpp/src/arrow/testing/gtest_util.h            |   4 +
 cpp/src/arrow/util/int_util_internal.h        |  28 ++
 docs/source/cpp/compute.rst                   |  20 +-
 9 files changed, 419 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index d169fd2ebde..c7c049af980 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -41,6 +41,14 @@ namespace compute {
 // ----------------------------------------------------------------------
 // Arithmetic
 
+#define SCALAR_ARITHMETIC_UNARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)            \
+  Result<Datum> NAME(const Datum& arg, ArithmeticOptions options, ExecContext* ctx) {  \
+    auto func_name = (options.check_overflow) ? REGISTRY_CHECKED_NAME : REGISTRY_NAME; \
+    return CallFunction(func_name, {arg}, ctx);                                        \
+  }
+
+SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
+
 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)           \
   Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
                      ExecContext* ctx) {                                               \
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 53892ff6b3c..3e390df47e7 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -213,6 +213,17 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
                      ArithmeticOptions options = ArithmeticOptions(),
                      ExecContext* ctx = NULLPTR);
 
+/// \brief Negate a value. Array values can be of arbitrary length. If argument
+/// is null the result will be null.
+///
+/// \param[in] arg the value negated
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise negation
+ARROW_EXPORT
+Result<Datum> Negate(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                     ExecContext* ctx = NULLPTR);
+
 /// \brief Raise the values of base array to the power of the exponent array values.
 /// Array values must be the same length. If either base or exponent is null the result
 /// will be null.
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 7b9b23e7ff8..f6f7555ab61 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -18,6 +18,7 @@
 #include <cmath>
 
 #include "arrow/compute/kernels/common.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/macros.h"
 
@@ -26,6 +27,7 @@ namespace arrow {
 using internal::AddWithOverflow;
 using internal::DivideWithOverflow;
 using internal::MultiplyWithOverflow;
+using internal::NegateWithOverflow;
 using internal::SubtractWithOverflow;
 
 namespace compute {
@@ -33,6 +35,8 @@ namespace internal {
 
 using applicator::ScalarBinaryEqualTypes;
 using applicator::ScalarBinaryNotNullEqualTypes;
+using applicator::ScalarUnary;
+using applicator::ScalarUnaryNotNull;
 
 namespace {
 
@@ -246,6 +250,49 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return -arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return ~arg + 1;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    return arrow::internal::SafeSignedNegate(arg);
+  }
+};
+
+struct NegateChecked {
+  template <typename T, typename Arg>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    T result = 0;
+    if (ARROW_PREDICT_FALSE(NegateWithOverflow(arg, &result))) {
+      *st = Status::Invalid("overflow");
+    }
+    return result;
+  }
+
+  template <typename T, typename Arg>
+  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    DCHECK(false) << "This is included only for the purposes of instantiability from the "
+                     "arithmetic kernel generator";
+    return 0;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return -arg;
+  }
+};
+
 struct Power {
   ARROW_NOINLINE
   static uint64_t IntegerPower(uint64_t base, uint64_t exp) {
@@ -310,7 +357,7 @@ struct PowerChecked {
 
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
-ArrayKernelExec NumericEqualTypesBinary(detail::GetTypeId get_id) {
+ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::INT8:
       return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
@@ -349,10 +396,14 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
 
     EnsureDictionaryDecoded(values);
-    ReplaceNullWithOtherType(values);
 
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+    // Only promote types for binary functions
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+
+      if (auto type = CommonNumeric(*values)) {
+        ReplaceTypes(type, values);
+      }
     }
 
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
@@ -365,7 +416,7 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
                                                        const FunctionDoc* doc) {
   auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
   for (const auto& ty : NumericTypes()) {
-    auto exec = NumericEqualTypesBinary<ScalarBinaryEqualTypes, Op>(ty);
+    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
   return func;
@@ -378,12 +429,38 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
                                                               const FunctionDoc* doc) {
   auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
   for (const auto& ty : NumericTypes()) {
-    auto exec = NumericEqualTypesBinary<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    auto exec = ArithmeticExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name,
+                                                            const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnary, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, ty, exec));
+  }
+  return func;
+}
+
+// Like MakeUnaryArithmeticFunction, but for signed arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    if (!arrow::is_unsigned_integer(ty->id())) {
+      auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
+      DCHECK_OK(func->AddKernel({ty}, ty, exec));
+    }
+  }
+  return func;
+}
+
 const FunctionDoc add_doc{"Add the arguments element-wise",
                           ("Results will wrap around on integer overflow.\n"
                            "Use function \"add_checked\" if you want overflow\n"
@@ -396,14 +473,14 @@ const FunctionDoc add_checked_doc{
      "doesn't fail on overflow, use function \"add\"."),
     {"x", "y"}};
 
-const FunctionDoc sub_doc{"Substract the arguments element-wise",
+const FunctionDoc sub_doc{"Subtract the arguments element-wise",
                           ("Results will wrap around on integer overflow.\n"
                            "Use function \"subtract_checked\" if you want overflow\n"
                            "to return an error."),
                           {"x", "y"}};
 
 const FunctionDoc sub_checked_doc{
-    "Substract the arguments element-wise",
+    "Subtract the arguments element-wise",
     ("This function returns an error on overflow.  For a variant that\n"
      "doesn't fail on overflow, use function \"subtract\"."),
     {"x", "y"}};
@@ -434,6 +511,18 @@ const FunctionDoc div_checked_doc{
      "integer overflow is encountered."),
     {"dividend", "divisor"}};
 
+const FunctionDoc negate_doc{"Negate the argument element-wise",
+                             ("Results will wrap around on integer overflow.\n"
+                              "Use function \"negate_checked\" if you want overflow\n"
+                              "to return an error."),
+                             {"x"}};
+
+const FunctionDoc negate_checked_doc{
+    "Negate the arguments element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"negate\"."),
+    {"x"}};
+
 const FunctionDoc pow_doc{
     "Raise arguments to power element-wise",
     ("Integer to negative integer power returns an error. However, integer overflow\n"
@@ -445,7 +534,6 @@ const FunctionDoc pow_checked_doc{
     ("An error is returned when integer to negative integer power is encountered,\n"
      "or integer overflow is encountered."),
     {"base", "exponent"}};
-
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -465,8 +553,7 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // Add subtract(timestamp, timestamp) -> duration
   for (auto unit : AllTimeUnits()) {
     InputType in_type(match::TimestampTypeUnit(unit));
-    auto exec =
-        NumericEqualTypesBinary<ScalarBinaryEqualTypes, Subtract>(Type::TIMESTAMP);
+    auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Subtract>(Type::TIMESTAMP);
     DCHECK_OK(subtract->AddKernel({in_type, in_type}, duration(unit), std::move(exec)));
   }
 
@@ -495,6 +582,15 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
       MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(divide_checked)));
 
+  // ----------------------------------------------------------------------
+  auto negate = MakeUnaryArithmeticFunction<Negate>("negate", &negate_doc);
+  DCHECK_OK(registry->AddFunction(std::move(negate)));
+
+  // ----------------------------------------------------------------------
+  auto negate_checked = MakeUnarySignedArithmeticFunctionNotNull<NegateChecked>(
+      "negate_checked", &negate_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(negate_checked)));
+
   // ----------------------------------------------------------------------
   auto power = MakeArithmeticFunction<Power>("power", &pow_doc);
   DCHECK_OK(registry->AddFunction(std::move(power)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index cd5f298ae51..fafba4b331b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -54,6 +54,116 @@ std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
   return MakeArray(data);
 }
 
+template <typename T>
+class TestUnaryArithmetic : public TestBase {
+ protected:
+  using ArrowType = T;
+  using CType = typename ArrowType::c_type;
+
+  static std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<ArrowType>::type_singleton();
+  }
+
+  using UnaryFunction =
+      std::function<Result<Datum>(const Datum&, ArithmeticOptions, ExecContext*)>;
+
+  void SetUp() override { options_.check_overflow = false; }
+
+  std::shared_ptr<Scalar> MakeNullScalar() {
+    return arrow::MakeNullScalar(type_singleton());
+  }
+
+  std::shared_ptr<Scalar> MakeScalar(CType value) {
+    return *arrow::MakeScalar(type_singleton(), value);
+  }
+
+  // (Scalar)
+  void AssertUnaryOp(UnaryFunction func, CType argument, CType expected) {
+    auto arg = MakeScalar(argument);
+    auto exp = MakeScalar(expected);
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
+    AssertScalarsApproxEqual(*exp, *actual.scalar(), /*verbose=*/true);
+  }
+
+  // (Scalar)
+  void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Scalar>& arg,
+                     const std::shared_ptr<Scalar>& expected) {
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
+    AssertScalarsApproxEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+
+  // (Array)
+  void AssertUnaryOp(UnaryFunction func, const std::string& argument,
+                     const std::string& expected) {
+    auto arg = ArrayFromJSON(type_singleton(), argument);
+    AssertUnaryOp(func, arg, expected);
+  }
+
+  // (Array)
+  void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
+                     const std::string& expected_json) {
+    const auto expected = ArrayFromJSON(type_singleton(), expected_json);
+    ASSERT_OK_AND_ASSIGN(Datum actual, func(arg, options_, nullptr));
+    ValidateAndAssertApproxEqual(actual.make_array(), expected);
+
+    // Also check (Scalar) operations
+    const int64_t length = expected->length();
+    for (int64_t i = 0; i < length; ++i) {
+      const auto expected_scalar = *expected->GetScalar(i);
+      ASSERT_OK_AND_ASSIGN(actual, func(*arg->GetScalar(i), options_, nullptr));
+      AssertScalarsApproxEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true,
+                               equal_options_);
+    }
+  }
+
+  void AssertUnaryOpRaises(UnaryFunction func, const std::string& argument,
+                           const std::string& expected_msg) {
+    auto arg = ArrayFromJSON(type_singleton(), argument);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(expected_msg),
+                                    func(arg, options_, nullptr));
+  }
+
+  void AssertUnaryOpNotImplemented(UnaryFunction func, const std::string& argument) {
+    auto arg = ArrayFromJSON(type_singleton(), argument);
+    const char* expected_msg = "has no kernel matching input types";
+    EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_msg),
+                                    func(arg, options_, nullptr));
+  }
+
+  void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
+                                    const std::string& expected) {
+    const auto exp = ArrayFromJSON(type_singleton(), expected);
+    ValidateAndAssertApproxEqual(actual, exp);
+  }
+
+  void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
+                                    const std::shared_ptr<Array>& expected) {
+    ASSERT_OK(actual->ValidateFull());
+    AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true, equal_options_);
+  }
+
+  void SetOverflowCheck(bool value = true) { options_.check_overflow = value; }
+
+  void SetNansEqual(bool value = true) {
+    this->equal_options_ = equal_options_.nans_equal(value);
+  }
+
+  ArithmeticOptions options_ = ArithmeticOptions();
+  EqualOptions equal_options_ = EqualOptions::Defaults();
+};
+
+template <typename T>
+class TestUnaryArithmeticIntegral : public TestUnaryArithmetic<T> {};
+
+template <typename T>
+class TestUnaryArithmeticSigned : public TestUnaryArithmeticIntegral<T> {};
+
+template <typename T>
+class TestUnaryArithmeticUnsigned : public TestUnaryArithmeticIntegral<T> {};
+
+template <typename T>
+class TestUnaryArithmeticFloating : public TestUnaryArithmetic<T> {};
+
 template <typename T>
 class TestBinaryArithmetic : public TestBase {
  protected:
@@ -214,6 +324,11 @@ using UnsignedIntegerTypes =
 // TODO(kszucs): add half-float
 using FloatingTypes = testing::Types<FloatType, DoubleType>;
 
+TYPED_TEST_SUITE(TestUnaryArithmeticIntegral, IntegralTypes);
+TYPED_TEST_SUITE(TestUnaryArithmeticSigned, SignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryArithmeticUnsigned, UnsignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryArithmeticFloating, FloatingTypes);
+
 TYPED_TEST_SUITE(TestBinaryArithmeticIntegral, IntegralTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticSigned, SignedIntegerTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticUnsigned, UnsignedIntegerTypes);
@@ -817,5 +932,130 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
                                      ArrayFromJSON(uint64(), "[18446744073709551615]")}));
 }
 
+TEST(TestUnaryArithmetic, DispatchBest) {
+  for (std::string name : {"negate"}) {
+    for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
+                           uint64(), float32(), float64()}) {
+      CheckDispatchBest(name, {ty}, {ty});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+    }
+  }
+
+  for (std::string name : {"negate_checked"}) {
+    for (const auto& ty : {int8(), int16(), int32(), int64(), float32(), float64()}) {
+      CheckDispatchBest(name, {ty}, {ty});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+    }
+  }
+
+  for (std::string name : {"negate", "negate_checked"}) {
+    CheckDispatchFails(name, {null()});
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Negate) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty arrays
+    this->AssertUnaryOp(Negate, "[]", "[]");
+    // Array with nulls
+    this->AssertUnaryOp(Negate, "[null]", "[null]");
+    this->AssertUnaryOp(Negate, this->MakeNullScalar(), this->MakeNullScalar());
+    this->AssertUnaryOp(Negate, "[1, null, -10]", "[-1, null, 10]");
+    // Arrays with zeros
+    this->AssertUnaryOp(Negate, "[0, 0, -0]", "[0, -0, 0]");
+    this->AssertUnaryOp(Negate, 0, -0);
+    this->AssertUnaryOp(Negate, -0, 0);
+    this->AssertUnaryOp(Negate, 0, 0);
+    // Ordinary arrays (positive inputs)
+    this->AssertUnaryOp(Negate, "[1, 10, 127]", "[-1, -10, -127]");
+    this->AssertUnaryOp(Negate, 1, -1);
+    this->AssertUnaryOp(Negate, this->MakeScalar(1), this->MakeScalar(-1));
+    // Ordinary arrays (negative inputs)
+    this->AssertUnaryOp(Negate, "[-1, -10, -127]", "[1, 10, 127]");
+    this->AssertUnaryOp(Negate, -1, 1);
+    this->AssertUnaryOp(Negate, MakeArray(-1), "[1]");
+    // Min/max (wrap arounds and overflow)
+    this->AssertUnaryOp(Negate, max, min + 1);
+    if (check_overflow) {
+      this->AssertUnaryOpRaises(Negate, MakeArray(min), "overflow");
+    } else {
+      this->AssertUnaryOp(Negate, min, min);
+    }
+  }
+
+  // Overflow should not be checked on underlying value slots when output would be null
+  this->SetOverflowCheck(true);
+  auto arg = ArrayFromJSON(this->type_singleton(), MakeArray(1, max, min));
+  arg = TweakValidityBit(arg, 1, false);
+  arg = TweakValidityBit(arg, 2, false);
+  this->AssertUnaryOp(Negate, arg, "[-1, null, null]");
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Negate) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  // Empty arrays
+  this->AssertUnaryOp(Negate, "[]", "[]");
+  // Array with nulls
+  this->AssertUnaryOp(Negate, "[null]", "[null]");
+  this->AssertUnaryOp(Negate, this->MakeNullScalar(), this->MakeNullScalar());
+  // Min/max (wrap around)
+  this->AssertUnaryOp(Negate, min, min);
+  this->AssertUnaryOp(Negate, max, 1);
+  this->AssertUnaryOp(Negate, 1, max);
+  // Not implemented kernels
+  this->SetOverflowCheck(true);
+  this->AssertUnaryOpNotImplemented(Negate, "[0]");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Negate) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty arrays
+    this->AssertUnaryOp(Negate, "[]", "[]");
+    // Array with nulls
+    this->AssertUnaryOp(Negate, "[null]", "[null]");
+    this->AssertUnaryOp(Negate, this->MakeNullScalar(), this->MakeNullScalar());
+    this->AssertUnaryOp(Negate, "[1.3, null, -10.80]", "[-1.3, null, 10.80]");
+    // Arrays with zeros
+    this->AssertUnaryOp(Negate, "[0.0, 0.0, -0.0]", "[0.0, -0.0, 0.0]");
+    this->AssertUnaryOp(Negate, 0.0F, -0.0F);
+    this->AssertUnaryOp(Negate, -0.0F, 0.0F);
+    this->AssertUnaryOp(Negate, 0.0F, 0.0F);
+    // Ordinary arrays (positive inputs)
+    this->AssertUnaryOp(Negate, "[1.3, 10.80, 12748.001]", "[-1.3, -10.80, -12748.001]");
+    this->AssertUnaryOp(Negate, 1.3F, -1.3F);
+    this->AssertUnaryOp(Negate, this->MakeScalar(1.3F), this->MakeScalar(-1.3F));
+    // Ordinary arrays (negative inputs)
+    this->AssertUnaryOp(Negate, "[-1.3, -10.80, -12748.001]", "[1.3, 10.80, 12748.001]");
+    this->AssertUnaryOp(Negate, -1.3F, 1.3F);
+    this->AssertUnaryOp(Negate, MakeArray(-1.3F), "[1.3]");
+    // Arrays with infinites
+    this->AssertUnaryOp(Negate, "[Inf, -Inf]", "[-Inf, Inf]");
+    // Arrays with NaNs
+    this->SetNansEqual(true);
+    this->AssertUnaryOp(Negate, "[NaN]", "[NaN]");
+    this->AssertUnaryOp(Negate, "[NaN]", "[-NaN]");
+    this->AssertUnaryOp(Negate, "[-NaN]", "[NaN]");
+    // Min/max
+    this->AssertUnaryOp(Negate, min, max);
+    this->AssertUnaryOp(Negate, max, min);
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index 11d5e76d342..672308452cf 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -196,5 +196,11 @@ void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_v
       << expected_kernel->signature->ToString();
 }
 
+void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> values) {
+  ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
+  ASSERT_NOT_OK(function->DispatchBest(&values));
+  ASSERT_NOT_OK(function->DispatchExact(values));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index 767911888ac..aea3d8360e6 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -148,5 +148,8 @@ void TestRandomPrimitiveCTypes() {
 void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> descrs,
                        std::vector<ValueDescr> exact_descrs);
 
+// Check that function fails to produce a Kernel for the set of ValueDescrs.
+void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> descrs);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 2ccf171d016..757986e13ca 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -98,6 +98,10 @@
                           << _st.ToString();                            \
   } while (false)
 
+#define ASSERT_NOT_OK(expr)                                                         \
+  for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); _st.ok();) \
+  FAIL() << "'" ARROW_STRINGIFY(expr) "' did not failed" << _st.ToString()
+
 #define ABORT_NOT_OK(expr)                                          \
   do {                                                              \
     auto _res = (expr);                                             \
diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_internal.h
index de39229cfdd..162f1d92a2b 100644
--- a/cpp/src/arrow/util/int_util_internal.h
+++ b/cpp/src/arrow/util/int_util_internal.h
@@ -63,6 +63,27 @@ OPS_WITH_OVERFLOW(DivideWithOverflow, div)
 #undef OP_WITH_OVERFLOW
 #undef OPS_WITH_OVERFLOW
 
+// Define functions NegateWithOverflow with the signature `bool(T u, T* out)`
+// where T is a signed integer type.  On overflow, these functions return true.
+// Otherwise, false is returned and `out` is updated with the result of the
+// operation.
+
+#define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
+  static inline bool _func_name(_type u, _type* out) {                    \
+    return !psnip_safe_##_psnip_type##_##_psnip_op(out, u);               \
+  }
+
+#define SIGNED_UNARY_OPS_WITH_OVERFLOW(_func_name, _psnip_op)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64)
+
+SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg)
+
+#undef UNARY_OP_WITH_OVERFLOW
+#undef SIGNED_UNARY_OPS_WITH_OVERFLOW
+
 /// Signed addition with well-defined behaviour on overflow (as unsigned)
 template <typename SignedInt>
 SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
@@ -79,6 +100,13 @@ SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
                                 static_cast<UnsignedInt>(v));
 }
 
+/// Signed negation with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedNegate(SignedInt u) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
+}
+
 /// Signed left shift with well-defined behaviour on negative numbers or overflow
 template <typename SignedInt, typename Shift>
 SignedInt SafeLeftShift(SignedInt u, Shift shift) {
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index fb50f8cef65..592dc4ec1b0 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -246,10 +246,10 @@ Binary functions have the following semantics (which is sometimes called
 Arithmetic functions
 ~~~~~~~~~~~~~~~~~~~~
 
-These functions expect two inputs of numeric type and apply a given binary
-operation to each pair of elements gathered from the inputs.  If any of the
-input elements in a pair is null, the corresponding output element is null.
-Inputs will be cast to the :ref:`common numeric type <common-numeric-type>`
+These functions expect inputs of numeric type and apply a given arithmetic
+operation to each element(s) gathered from the input(s).  If any of the
+input element(s) is null, the corresponding output element is null.
+Input(s) will be cast to the :ref:`common numeric type <common-numeric-type>`
 (and dictionary decoded, if applicable) before the operation is applied.
 
 The default variant of these functions does not detect overflow (the result
@@ -268,14 +268,18 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | divide_checked           | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| power                    | Binary     | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
-| power_checked            | Binary     | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
 | multiply                 | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
 | multiply_checked         | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
+| negate                   | Unary      | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| negate_checked           | Unary      | Signed Numeric     | Signed Numeric      |
++--------------------------+------------+--------------------+---------------------+
+| power                    | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| power_checked            | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
 | subtract                 | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
 | subtract_checked         | Binary     | Numeric            | Numeric             |

From 3a6f6053c74eb698208395091009ac50be9dc29e Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Wed, 28 Apr 2021 09:41:59 +0900
Subject: [PATCH 138/719] ARROW-12570: [JS] Fix issues that blocked the v4.0.0
 release

A few issues had to be fixed manually for the v4.0.0 release:

* ts-jest throwing a type error running the tests on the TS source
* lerna.json really does need those version numbers
* npm has introduced rate limits since v3.0.0
* support npm 2FA one-time-passwords for publish

Closes https://issues.apache.org/jira/browse/ARROW-12570

Closes #10175 from trxcllnt/fix/js-release-fixes

Authored-by: ptaylor <paul.e.taylor@me.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/lerna.json              | 2 ++
 js/npm-release.sh          | 4 +++-
 js/test/jest-extensions.ts | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/js/lerna.json b/js/lerna.json
index 053736e57f1..99d6f64fbd4 100644
--- a/js/lerna.json
+++ b/js/lerna.json
@@ -1,4 +1,6 @@
 {
+  "lerna": "3.22.1",
+  "version": "4.0.0",
   "npmClient": "yarn",
   "packages": [
     "targets/ts",
diff --git a/js/npm-release.sh b/js/npm-release.sh
index 54d9df07cd5..5d92d01cb0a 100755
--- a/js/npm-release.sh
+++ b/js/npm-release.sh
@@ -22,5 +22,7 @@ set -e
 yarn --frozen-lockfile
 yarn gulp
 
+read -p "Please enter your npm 2FA one-time password (or leave empty if you don't have 2FA enabled): " NPM_OTP </dev/tty
+
 # publish the JS target modules to npm
-yarn lerna exec --no-bail -- npm publish
+yarn lerna exec --concurrency 1 --no-bail "npm publish${NPM_OTP:+ --otp=$NPM_OTP}"
diff --git a/js/test/jest-extensions.ts b/js/test/jest-extensions.ts
index 78937e40d90..6adde0b8374 100644
--- a/js/test/jest-extensions.ts
+++ b/js/test/jest-extensions.ts
@@ -105,7 +105,7 @@ function toEqualVector<
     if (v1 == null || v2 == null) {
         return {
             pass: false,
-            message: [
+            message: () => [
                 [columnName, `(${format(this, format1, format2, ' !== ')})`].filter(Boolean).join(':'),
                 `${v1 == null ? 'actual' : 'expected'} is null`
             ].join('\n')

From 08acfa5607481eee1bf58af348c8261478e21c19 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Wed, 28 Apr 2021 11:03:49 +0200
Subject: [PATCH 139/719] ARROW-10910: [Python] Provide better error message
 when trying to read from None source

Closes #10054 from amol-/ARROW-10910

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_parquet.pyx                |  2 +-
 python/pyarrow/tests/parquet/test_basic.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index a7c428e8996..4b435ba1d1c 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -927,7 +927,7 @@ cdef class ParquetReader(_Weakrefable):
         self.pool = maybe_unbox_memory_pool(memory_pool)
         self._metadata = None
 
-    def open(self, object source, bint use_memory_map=True,
+    def open(self, object source not None, bint use_memory_map=True,
              read_dictionary=None, FileMetaData metadata=None,
              int buffer_size=0):
         cdef:
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 8dc3fcebda7..670c7c6ed1f 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -137,6 +137,20 @@ def test_special_chars_filename(tempdir, use_legacy_dataset):
     assert table_read.equals(table)
 
 
+@parametrize_legacy_dataset
+def test_invalid_source(use_legacy_dataset):
+    # Test that we provide an helpful error message pointing out
+    # that None wasn't expected when trying to open a Parquet None file.
+    #
+    # Depending on use_legacy_dataset the message changes slightly
+    # but in both cases it should point out that None wasn't expected.
+    with pytest.raises(TypeError, match="None"):
+        pq.read_table(None, use_legacy_dataset=use_legacy_dataset)
+
+    with pytest.raises(TypeError, match="None"):
+        pq.ParquetFile(None)
+
+
 @pytest.mark.slow
 def test_file_with_over_int16_max_row_groups():
     # PARQUET-1857: Parquet encryption support introduced a INT16_MAX upper

From 654e36937ee8a8f2b6852ff77b810e67eb77dcee Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Wed, 28 Apr 2021 11:05:38 +0200
Subject: [PATCH 140/719] ARROW-9594: [Python] Preserve null indexes in
 DictionaryArray.to_numpy as it's done in DictionaryArray.to_pandas

https://issues.apache.org/jira/browse/ARROW-9594

Closes #10101 from amol-/ARROW-9594

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/python/arrow_to_pandas.cc |  5 ++
 python/pyarrow/array.pxi                |  5 ++
 python/pyarrow/includes/libarrow.pxd    |  1 +
 python/pyarrow/tests/test_array.py      | 70 +++++++++++++++++++++++++
 4 files changed, 81 insertions(+)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index f058e5261ae..cc386f589a7 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -2229,6 +2229,11 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options,
         checked_cast<const DictionaryType&>(*arr->type()).value_type();
     RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr));
     DCHECK_NE(arr->type()->id(), Type::DICTIONARY);
+
+    // The original Python DictionaryArray won't own the memory anymore
+    // as we actually built a new array when we decoded the DictionaryArray
+    // thus let the final resulting numpy array own the memory through a Capsule
+    py_ref = nullptr;
   }
 
   if (options.strings_to_categorical && is_base_binary_like(arr->type()->id())) {
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index fb1aa744711..1c47ea3accc 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1161,6 +1161,11 @@ cdef class Array(_PandasConvertible):
             raise ValueError(
                 "Cannot return a writable array if asking for zero-copy")
 
+        # If there are nulls and the array is a DictionaryArray
+        # decoding the dictionary will make sure nulls are correctly handled.
+        # Decoding a dictionary does imply a copy by the way,
+        # so it can't be done if the user requested a zero_copy.
+        c_options.decode_dictionaries = not zero_copy_only
         c_options.zero_copy_only = zero_copy_only
 
         with nogil:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 747808709b4..8f0f973a791 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2124,6 +2124,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool safe_cast
         c_bool split_blocks
         c_bool self_destruct
+        c_bool decode_dictionaries
         unordered_set[c_string] categorical_columns
         unordered_set[c_string] extension_columns
 
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index fb0f8552672..128e6920dd4 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -701,6 +701,76 @@ def test_dictionary_from_numpy():
             assert d2[i].as_py() == dictionary[indices[i]]
 
 
+def test_dictionary_to_numpy():
+    expected = pa.array(
+        ["foo", "bar", None, "foo"]
+    ).to_numpy(zero_copy_only=False)
+    a = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, None, 0]),
+        pa.array(['foo', 'bar'])
+    )
+    np.testing.assert_array_equal(a.to_numpy(zero_copy_only=False),
+                                  expected)
+
+    with pytest.raises(pa.ArrowInvalid):
+        # If this would be changed to no longer raise in the future,
+        # ensure to test the actual result because, currently, to_numpy takes
+        # for granted that when zero_copy_only=True there will be no nulls
+        # (it's the decoding of the DictionaryArray that handles the nulls and
+        # this is only activated with zero_copy_only=False)
+        a.to_numpy(zero_copy_only=True)
+
+    anonulls = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, 1, 0]),
+        pa.array(['foo', 'bar'])
+    )
+    expected = pa.array(
+        ["foo", "bar", "bar", "foo"]
+    ).to_numpy(zero_copy_only=False)
+    np.testing.assert_array_equal(anonulls.to_numpy(zero_copy_only=False),
+                                  expected)
+
+    with pytest.raises(pa.ArrowInvalid):
+        anonulls.to_numpy(zero_copy_only=True)
+
+    afloat = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, 1, 0]),
+        pa.array([13.7, 11.0])
+    )
+    expected = pa.array([13.7, 11.0, 11.0, 13.7]).to_numpy()
+    np.testing.assert_array_equal(afloat.to_numpy(zero_copy_only=True),
+                                  expected)
+    np.testing.assert_array_equal(afloat.to_numpy(zero_copy_only=False),
+                                  expected)
+
+    afloat2 = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, None, 0]),
+        pa.array([13.7, 11.0])
+    )
+    expected = pa.array(
+        [13.7, 11.0, None, 13.7]
+    ).to_numpy(zero_copy_only=False)
+    assert np.array_equal(
+        afloat2.to_numpy(zero_copy_only=False),
+        expected,
+        equal_nan=True
+    )
+
+    # Testing for integers can reveal problems related to dealing
+    # with None values, as a numpy array of int dtype
+    # can't contain NaN nor None.
+    aints = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, None, 0]),
+        pa.array([7, 11])
+    )
+    expected = pa.array([7, 11, None, 7]).to_numpy(zero_copy_only=False)
+    assert np.array_equal(
+        aints.to_numpy(zero_copy_only=False),
+        expected,
+        equal_nan=True
+    )
+
+
 def test_dictionary_from_boxed_arrays():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)

From 2f9c074d97e6105779fe27f52c3172338970d02e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 28 Apr 2021 11:29:52 +0200
Subject: [PATCH 141/719] ARROW-12554: [C++] Allow duplicates in
 `SetLookupOptions::value_set`

For the `index_in` function, we need to map the memo table indices to indices in the value_set
(they are different in there are duplicates).

This fixes the current benchmark failures for `is_in` and `index_in`.

Closes #10174 from pitrou/ARROW-12554-index-in-duplicates

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/scalar_set_lookup.cc      |  45 +++--
 .../compute/kernels/scalar_set_lookup_test.cc | 185 +++++++++++++++++-
 2 files changed, 217 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 2868b0c743f..3e2e95e5401 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -41,32 +41,52 @@ struct SetLookupState : public KernelState {
 
   Status Init(const SetLookupOptions& options) {
     if (options.value_set.kind() == Datum::ARRAY) {
-      RETURN_NOT_OK(AddArrayValueSet(*options.value_set.array()));
+      const ArrayData& value_set = *options.value_set.array();
+      memo_index_to_value_index.reserve(value_set.length);
+      RETURN_NOT_OK(AddArrayValueSet(options, *options.value_set.array()));
     } else if (options.value_set.kind() == Datum::CHUNKED_ARRAY) {
       const ChunkedArray& value_set = *options.value_set.chunked_array();
+      memo_index_to_value_index.reserve(value_set.length());
+      int64_t offset = 0;
       for (const std::shared_ptr<Array>& chunk : value_set.chunks()) {
-        RETURN_NOT_OK(AddArrayValueSet(*chunk->data()));
+        RETURN_NOT_OK(AddArrayValueSet(options, *chunk->data(), offset));
+        offset += chunk->length();
       }
     } else {
       return Status::Invalid("value_set should be an array or chunked array");
     }
-    if (lookup_table.size() != options.value_set.length()) {
-      return Status::NotImplemented("duplicate values in value_set");
-    }
-    if (!options.skip_nulls) {
-      null_index = lookup_table.GetNull();
+    if (!options.skip_nulls && lookup_table.GetNull() >= 0) {
+      null_index = memo_index_to_value_index[lookup_table.GetNull()];
     }
     return Status::OK();
   }
 
-  Status AddArrayValueSet(const ArrayData& data) {
+  Status AddArrayValueSet(const SetLookupOptions& options, const ArrayData& data,
+                          int64_t start_index = 0) {
     using T = typename GetViewType<Type>::T;
+    int32_t index = static_cast<int32_t>(start_index);
     auto visit_valid = [&](T v) {
+      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size());
       int32_t unused_memo_index;
-      return lookup_table.GetOrInsert(v, &unused_memo_index);
+      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); };
+      auto on_not_found = [&](int32_t memo_index) {
+        DCHECK_EQ(memo_index, memo_size);
+        memo_index_to_value_index.push_back(index);
+      };
+      RETURN_NOT_OK(lookup_table.GetOrInsert(
+          v, std::move(on_found), std::move(on_not_found), &unused_memo_index));
+      ++index;
+      return Status::OK();
     };
     auto visit_null = [&]() {
-      lookup_table.GetOrInsertNull();
+      const auto memo_size = static_cast<int32_t>(memo_index_to_value_index.size());
+      auto on_found = [&](int32_t memo_index) { DCHECK_LT(memo_index, memo_size); };
+      auto on_not_found = [&](int32_t memo_index) {
+        DCHECK_EQ(memo_index, memo_size);
+        memo_index_to_value_index.push_back(index);
+      };
+      lookup_table.GetOrInsertNull(std::move(on_found), std::move(on_not_found));
+      ++index;
       return Status::OK();
     };
 
@@ -75,6 +95,9 @@ struct SetLookupState : public KernelState {
 
   using MemoTable = typename HashTraits<Type>::MemoTableType;
   MemoTable lookup_table;
+  // When there are duplicates in value_set, the MemoTable indices must
+  // be mapped back to indices in the value_set.
+  std::vector<int32_t> memo_index_to_value_index;
   int32_t null_index = -1;
 };
 
@@ -215,7 +238,7 @@ struct IndexInVisitor {
           int32_t index = state.lookup_table.Get(v);
           if (index != -1) {
             // matching needle; output index from value_set
-            this->builder.UnsafeAppend(index);
+            this->builder.UnsafeAppend(state.memo_index_to_value_index[index]);
           } else {
             // no matching needle; output null
             this->builder.UnsafeAppendNull();
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
index 272502caa57..5c8bf98e196 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
@@ -157,6 +157,13 @@ TYPED_TEST(TestIsInKernelPrimitive, IsIn) {
   CheckIsIn(type, "[null, 1, 2, 3, 2]", "[2, null, 1]",
             "[false, true, true, false, true]", /*skip_nulls=*/true);
 
+  // Duplicates in right array
+  CheckIsIn(type, "[null, 1, 2, 3, 2]", "[null, 2, 2, null, 1, 1]",
+            "[true, true, true, false, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, "[null, 1, 2, 3, 2]", "[null, 2, 2, null, 1, 1]",
+            "[false, true, true, false, true]", /*skip_nulls=*/true);
+
   // Empty Arrays
   CheckIsIn(type, "[]", "[]", "[]");
 }
@@ -170,6 +177,10 @@ TEST_F(TestIsInKernel, NullType) {
 
   CheckIsIn(type, "[null, null]", "[null]", "[false, false]", /*skip_nulls=*/true);
   CheckIsIn(type, "[null, null]", "[]", "[false, false]", /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, "[null, null, null]", "[null, null]", "[true, true, true]");
+  CheckIsIn(type, "[null, null]", "[null, null]", "[false, false]", /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, TimeTimestamp) {
@@ -179,6 +190,12 @@ TEST_F(TestIsInKernel, TimeTimestamp) {
               "[true, true, false, true, true]", /*skip_nulls=*/false);
     CheckIsIn(type, "[1, null, 5, 1, 2]", "[2, 1, null]",
               "[true, false, false, true, true]", /*skip_nulls=*/true);
+
+    // Duplicates in right array
+    CheckIsIn(type, "[1, null, 5, 1, 2]", "[2, 1, 1, null, 2]",
+              "[true, true, false, true, true]", /*skip_nulls=*/false);
+    CheckIsIn(type, "[1, null, 5, 1, 2]", "[2, 1, 1, null, 2]",
+              "[true, false, false, true, true]", /*skip_nulls=*/true);
   }
 }
 
@@ -194,6 +211,12 @@ TEST_F(TestIsInKernel, Boolean) {
             "[false, true, true, false, true]", /*skip_nulls=*/false);
   CheckIsIn(type, "[true, false, null, true, false]", "[false, null]",
             "[false, true, false, false, true]", /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, "[true, false, null, true, false]", "[null, false, false, null]",
+            "[false, true, true, false, true]", /*skip_nulls=*/false);
+  CheckIsIn(type, "[true, false, null, true, false]", "[null, false, false, null]",
+            "[false, true, false, false, true]", /*skip_nulls=*/true);
 }
 
 TYPED_TEST_SUITE(TestIsInKernelBinary, BinaryTypes);
@@ -214,6 +237,14 @@ TYPED_TEST(TestIsInKernelBinary, Binary) {
   CheckIsIn(type, R"(["aaa", "", "cc", null, ""])", R"(["aaa", "", null])",
             "[true, true, false, false, true]",
             /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, R"(["aaa", "", "cc", null, ""])",
+            R"([null, "aaa", "aaa", "", "", null])", "[true, true, false, true, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, R"(["aaa", "", "cc", null, ""])",
+            R"([null, "aaa", "aaa", "", "", null])", "[true, true, false, false, true]",
+            /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, FixedSizeBinary) {
@@ -232,6 +263,16 @@ TEST_F(TestIsInKernel, FixedSizeBinary) {
   CheckIsIn(type, R"(["aaa", "bbb", "ccc", null, "bbb"])", R"(["aaa", "bbb", null])",
             "[true, true, false, false, true]",
             /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, R"(["aaa", "bbb", "ccc", null, "bbb"])",
+            R"(["aaa", null, "aaa", "bbb", "bbb", null])",
+            "[true, true, false, true, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, R"(["aaa", "bbb", "ccc", null, "bbb"])",
+            R"(["aaa", null, "aaa", "bbb", "bbb", null])",
+            "[true, true, false, false, true]",
+            /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, Decimal) {
@@ -250,6 +291,16 @@ TEST_F(TestIsInKernel, Decimal) {
   CheckIsIn(type, R"(["12.3", "45.6", "78.9", null, "12.3"])",
             R"(["12.3", "78.9", null])", "[true, false, true, false, true]",
             /*skip_nulls=*/true);
+
+  // Duplicates in right array
+  CheckIsIn(type, R"(["12.3", "45.6", "78.9", null, "12.3"])",
+            R"([null, "12.3", "12.3", "78.9", "78.9", null])",
+            "[true, false, true, true, true]",
+            /*skip_nulls=*/false);
+  CheckIsIn(type, R"(["12.3", "45.6", "78.9", null, "12.3"])",
+            R"([null, "12.3", "12.3", "78.9", "78.9", null])",
+            "[true, false, true, false, true]",
+            /*skip_nulls=*/true);
 }
 
 TEST_F(TestIsInKernel, DictionaryArray) {
@@ -314,6 +365,29 @@ TEST_F(TestIsInKernel, DictionaryArray) {
                         /*value_set_json=*/R"(["C", "B", "A"])",
                         /*expected_json=*/"[false, false, false, true, false]",
                         /*skip_nulls=*/true);
+
+    // With duplicates in value_set
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 2, null, 0]",
+                        /*value_set_json=*/R"(["A", "A", "B", "A", "B", "C"])",
+                        /*expected_json=*/"[true, true, false, true]",
+                        /*skip_nulls=*/false);
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 3, null, 0, 1]",
+                        /*value_set_json=*/R"(["C", "C", "B", "A", null, null, "B"])",
+                        /*expected_json=*/"[true, false, true, true, true]",
+                        /*skip_nulls=*/false);
+    CheckIsInDictionary(/*type=*/utf8(),
+                        /*index_type=*/index_ty,
+                        /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                        /*input_index_json=*/"[1, 3, null, 0, 1]",
+                        /*value_set_json=*/R"(["C", "C", "B", "A", null, null, "B"])",
+                        /*expected_json=*/"[true, false, false, true, true]",
+                        /*skip_nulls=*/true);
   }
 }
 
@@ -335,6 +409,16 @@ TEST_F(TestIsInKernel, ChunkedArrayInvoke) {
   expected = ChunkedArrayFromJSON(
       boolean(), {"[false, true, true, false, false]", "[true, false, false, false]"});
   CheckIsInChunked(input, value_set, expected, /*skip_nulls=*/true);
+
+  // Duplicates in value_set
+  value_set =
+      ChunkedArrayFromJSON(utf8(), {R"(["", null, "", "def"])", R"(["def", null])"});
+  expected = ChunkedArrayFromJSON(
+      boolean(), {"[false, true, true, false, false]", "[true, true, false, false]"});
+  CheckIsInChunked(input, value_set, expected, /*skip_nulls=*/false);
+  expected = ChunkedArrayFromJSON(
+      boolean(), {"[false, true, true, false, false]", "[true, false, false, false]"});
+  CheckIsInChunked(input, value_set, expected, /*skip_nulls=*/true);
 }
 
 // ----------------------------------------------------------------------
@@ -439,6 +523,18 @@ TYPED_TEST(TestIndexInKernelPrimitive, IndexIn) {
                      /* value_set= */ "[null]",
                      /* expected= */ "[0, 0, 0, 0]");
 
+  // Duplicates in value_set
+  this->CheckIndexIn(type,
+                     /* input= */ "[2, 1, 2, 1, 2, 3]",
+                     /* value_set= */ "[2, 2, 1, 1, 1, 3, 3]",
+                     /* expected= */ "[0, 2, 0, 2, 0, 5]");
+
+  // Duplicates and nulls in value_set
+  this->CheckIndexIn(type,
+                     /* input= */ "[2, 1, 2, 1, 2, 3]",
+                     /* value_set= */ "[2, 2, null, null, 1, 1, 1, 3, 3]",
+                     /* expected= */ "[0, 4, 0, 4, 0, 7]");
+
   // No Match
   this->CheckIndexIn(type,
                      /* input= */ "[2, null, 7, 3, 8]",
@@ -463,6 +559,17 @@ TYPED_TEST(TestIndexInKernelPrimitive, SkipNulls) {
                      /*value_set=*/"[1, 3]",
                      /*expected=*/"[null, 0, null, 1, null]",
                      /*skip_nulls=*/true);
+  // Same with duplicates in value_set
+  this->CheckIndexIn(type,
+                     /*input=*/"[0, 1, 2, 3, null]",
+                     /*value_set=*/"[1, 1, 3, 3]",
+                     /*expected=*/"[null, 0, null, 2, null]",
+                     /*skip_nulls=*/false);
+  this->CheckIndexIn(type,
+                     /*input=*/"[0, 1, 2, 3, null]",
+                     /*value_set=*/"[1, 1, 3, 3]",
+                     /*expected=*/"[null, 0, null, 2, null]",
+                     /*skip_nulls=*/true);
 
   // Nulls in value_set
   this->CheckIndexIn(type,
@@ -472,9 +579,15 @@ TYPED_TEST(TestIndexInKernelPrimitive, SkipNulls) {
                      /*skip_nulls=*/false);
   this->CheckIndexIn(type,
                      /*input=*/"[0, 1, 2, 3, null]",
-                     /*value_set=*/"[1, null, 3]",
-                     /*expected=*/"[null, 0, null, 2, null]",
+                     /*value_set=*/"[1, 1, null, null, 3, 3]",
+                     /*expected=*/"[null, 0, null, 4, null]",
                      /*skip_nulls=*/true);
+  // Same with duplicates in value_set
+  this->CheckIndexIn(type,
+                     /*input=*/"[0, 1, 2, 3, null]",
+                     /*value_set=*/"[1, 1, null, null, 3, 3]",
+                     /*expected=*/"[null, 0, null, 4, 2]",
+                     /*skip_nulls=*/false);
 }
 
 TEST_F(TestIndexInKernel, NullType) {
@@ -493,6 +606,12 @@ TEST_F(TestIndexInKernel, TimeTimestamp) {
                /* value_set= */ "[2, 1, null]",
                /* expected= */ "[1, 2, null, 1, 0]");
 
+  // Duplicates in value_set
+  CheckIndexIn(time32(TimeUnit::SECOND),
+               /* input= */ "[1, null, 5, 1, 2]",
+               /* value_set= */ "[2, 2, 1, 1, null, null]",
+               /* expected= */ "[2, 4, null, 2, 0]");
+
   // Needles array has no nulls
   CheckIndexIn(time32(TimeUnit::SECOND),
                /* input= */ "[2, null, 5, 1]",
@@ -531,6 +650,10 @@ TEST_F(TestIndexInKernel, Boolean) {
   CheckIndexIn(boolean(), "[false, null, false, true]", "[false, true, null]",
                "[0, 2, 0, 1]");
 
+  // Duplicates in value_set
+  CheckIndexIn(boolean(), "[false, null, false, true]",
+               "[false, false, true, true, null, null]", "[0, 4, 0, 2]");
+
   // No Nulls
   CheckIndexIn(boolean(), "[true, true, false, true]", "[false, true]", "[1, 1, 0, 1]");
 
@@ -562,6 +685,10 @@ TYPED_TEST(TestIndexInKernelBinary, Binary) {
   this->CheckIndexIn(type, R"(["foo", null, "bar", "foo"])", R"(["foo", null, "bar"])",
                      R"([0, 1, 2, 0])");
 
+  // Duplicates in value_set
+  this->CheckIndexIn(type, R"(["foo", null, "bar", "foo"])",
+                     R"(["foo", "foo", null, null, "bar", "bar"])", R"([0, 2, 4, 0])");
+
   // No match
   this->CheckIndexIn(type,
                      /* input= */ R"(["foo", null, "bar", "foo"])",
@@ -653,6 +780,17 @@ TEST_F(TestIndexInKernel, FixedSizeBinary) {
                /*expected=*/R"([1, null, null, 0, 2, 0])",
                /*skip_nulls=*/true);
 
+  // Duplicates in value_set
+  CheckIndexIn(fixed_size_binary(3),
+               /*input=*/R"(["bbb", null, "ddd", "aaa", "ccc", "aaa"])",
+               /*value_set=*/R"(["aaa", "aaa", null, null, "bbb", "bbb", "ccc"])",
+               /*expected=*/R"([4, 2, null, 0, 6, 0])");
+  CheckIndexIn(fixed_size_binary(3),
+               /*input=*/R"(["bbb", null, "ddd", "aaa", "ccc", "aaa"])",
+               /*value_set=*/R"(["aaa", "aaa", null, null, "bbb", "bbb", "ccc"])",
+               /*expected=*/R"([4, null, null, 0, 6, 0])",
+               /*skip_nulls=*/true);
+
   // Empty input array
   CheckIndexIn(fixed_size_binary(5), R"([])", R"(["bbbbb", null, "aaaaa", "ccccc"])",
                R"([])");
@@ -689,6 +827,18 @@ TEST_F(TestIndexInKernel, Decimal) {
                /*value_set=*/R"(["11", "12"])",
                /*expected=*/R"([1, null, 0, 1, null])",
                /*skip_nulls=*/true);
+
+  // Duplicates in value_set
+  CheckIndexIn(type,
+               /*input=*/R"(["12", null, "11", "12", "13"])",
+               /*value_set=*/R"([null, null, "11", "11", "12", "12"])",
+               /*expected=*/R"([4, 0, 2, 4, null])",
+               /*skip_nulls=*/false);
+  CheckIndexIn(type,
+               /*input=*/R"(["12", null, "11", "12", "13"])",
+               /*value_set=*/R"([null, null, "11", "11", "12", "12"])",
+               /*expected=*/R"([4, null, 2, 4, null])",
+               /*skip_nulls=*/true);
 }
 
 TEST_F(TestIndexInKernel, DictionaryArray) {
@@ -753,6 +903,29 @@ TEST_F(TestIndexInKernel, DictionaryArray) {
                            /*value_set_json=*/R"(["C", "B", "A"])",
                            /*expected_json=*/"[null, null, null, 2, null]",
                            /*skip_nulls=*/true);
+
+    // With duplicates in value_set
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", "B", "C", "D"])",
+                           /*input_index_json=*/"[1, 2, null, 0]",
+                           /*value_set_json=*/R"(["A", "A", "B", "B", "C", "C"])",
+                           /*expected_json=*/"[2, 4, null, 0]",
+                           /*skip_nulls=*/false);
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
+                           /*input_index_json=*/"[1, 3, null, 0, 1]",
+                           /*value_set_json=*/R"(["C", "C", "B", "B", "A", "A", null])",
+                           /*expected_json=*/"[6, null, 6, 4, 6]",
+                           /*skip_nulls=*/false);
+    CheckIndexInDictionary(/*type=*/utf8(),
+                           /*index_type=*/index_ty,
+                           /*input_dictionary_json=*/R"(["A", null, "C", "D"])",
+                           /*input_index_json=*/"[1, 3, null, 0, 1]",
+                           /*value_set_json=*/R"(["C", "C", "B", "B", "A", "A", null])",
+                           /*expected_json=*/"[null, null, null, 4, null]",
+                           /*skip_nulls=*/true);
   }
 }
 
@@ -773,6 +946,14 @@ TEST_F(TestIndexInKernel, ChunkedArrayInvoke) {
   CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/false);
   expected = ChunkedArrayFromJSON(int32(), {"[3, 1, 0, 3, null]", "[1, null, 3, null]"});
   CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/true);
+
+  // Duplicates in value_set
+  value_set = ChunkedArrayFromJSON(
+      utf8(), {R"(["ghi", "ghi", "def"])", R"(["def", null, null, "abc"])"});
+  expected = ChunkedArrayFromJSON(int32(), {"[6, 2, 0, 6, null]", "[2, 4, 6, null]"});
+  CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/false);
+  expected = ChunkedArrayFromJSON(int32(), {"[6, 2, 0, 6, null]", "[2, null, 6, null]"});
+  CheckIndexInChunked(input, value_set, expected, /*skip_nulls=*/true);
 }
 
 TEST(TestSetLookup, DispatchBest) {

From f88a00dfa47956d9b4a3f8fd538cb0b4cc0a44b9 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 28 Apr 2021 08:06:24 -0400
Subject: [PATCH 142/719] ARROW-12386: [C++] Support file parallelism in
 AsyncScanner

This PR adds file parallelism (with appropriate file readahead limits) into the async scanner.

~~**DRAFT**: This PR depends on ARROW-12289 and so will remain in the draft state until the parent is merged.~~

Closes #10076 from westonpace/feature/ARROW-12386--c-support-file-parallelism-in-asyncscanner

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/scanner.cc      |  34 +++-
 cpp/src/arrow/dataset/scanner.h       |   7 +
 cpp/src/arrow/dataset/scanner_test.cc | 283 ++++++++++++++++++++++++++
 cpp/src/arrow/testing/future_util.h   |  18 ++
 4 files changed, 341 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index ed43e32b482..70f15294650 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -284,6 +284,8 @@ class ARROW_DS_EXPORT SyncScanner : public Scanner {
   Result<ScanTaskIterator> Scan() override;
   Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
   Result<std::shared_ptr<Table>> ToTable() override;
+  Result<TaggedRecordBatchGenerator> ScanBatchesAsync() override;
+  Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() override;
 
  protected:
   /// \brief GetFragments returns an iterator over all Fragments in this scan.
@@ -311,6 +313,14 @@ Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
   });
 }
 
+Result<TaggedRecordBatchGenerator> SyncScanner::ScanBatchesAsync() {
+  return Status::NotImplemented("Asynchronous scanning is not supported by SyncScanner");
+}
+
+Result<EnumeratedRecordBatchGenerator> SyncScanner::ScanBatchesUnorderedAsync() {
+  return Status::NotImplemented("Asynchronous scanning is not supported by SyncScanner");
+}
+
 Result<FragmentIterator> SyncScanner::GetFragments() {
   if (fragment_ != nullptr) {
     return MakeVectorIterator(FragmentVector{fragment_});
@@ -374,7 +384,9 @@ class ARROW_DS_EXPORT AsyncScanner : public Scanner,
 
   Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
   Result<TaggedRecordBatchIterator> ScanBatches() override;
+  Result<TaggedRecordBatchGenerator> ScanBatchesAsync() override;
   Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered() override;
+  Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() override;
   Result<std::shared_ptr<Table>> ToTable() override;
 
  private:
@@ -507,13 +519,24 @@ Result<std::shared_ptr<Table>> AsyncScanner::ToTable() {
   return table_fut.result();
 }
 
+Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync() {
+  return ScanBatchesUnorderedAsync(internal::GetCpuThreadPool());
+}
+
 Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
     internal::Executor* cpu_executor) {
   auto self = shared_from_this();
   ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
   ARROW_ASSIGN_OR_RAISE(auto batch_gen_gen,
                         FragmentsToBatches(self, std::move(fragment_gen)));
-  return MakeConcatenatedGenerator(std::move(batch_gen_gen));
+  auto batch_gen_gen_readahead = MakeSerialReadaheadGenerator(
+      std::move(batch_gen_gen), scan_options_->fragment_readahead);
+  return MakeMergedGenerator(std::move(batch_gen_gen_readahead),
+                             scan_options_->fragment_readahead);
+}
+
+Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync() {
+  return ScanBatchesAsync(internal::GetCpuThreadPool());
 }
 
 Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync(
@@ -676,6 +699,15 @@ Status ScannerBuilder::UseThreads(bool use_threads) {
   return Status::OK();
 }
 
+Status ScannerBuilder::FragmentReadahead(int fragment_readahead) {
+  if (fragment_readahead <= 0) {
+    return Status::Invalid("FragmentReadahead must be greater than 0, got ",
+                           fragment_readahead);
+  }
+  scan_options_->fragment_readahead = fragment_readahead;
+  return Status::OK();
+}
+
 Status ScannerBuilder::UseAsync(bool use_async) {
   scan_options_->use_async = use_async;
   return Status::OK();
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 50660aa03ac..79e5986a4de 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -269,6 +269,7 @@ class ARROW_DS_EXPORT Scanner {
   /// If the readahead queue fills up then I/O will pause until the calling thread catches
   /// up.
   virtual Result<TaggedRecordBatchIterator> ScanBatches() = 0;
+  virtual Result<TaggedRecordBatchGenerator> ScanBatchesAsync() = 0;
   /// \brief Scan the dataset into a stream of record batches.  Unlike ScanBatches this
   /// method may allow record batches to be returned out of order.  This allows for more
   /// efficient scanning: some fragments may be accessed more quickly than others (e.g.
@@ -277,6 +278,7 @@ class ARROW_DS_EXPORT Scanner {
   /// To make up for the out-of-order iteration each batch is further tagged with
   /// positional information.
   virtual Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered();
+  virtual Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() = 0;
   /// \brief A convenience to synchronously load the given rows by index.
   ///
   /// Will only consume as many batches as needed from ScanBatches().
@@ -350,6 +352,11 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///        ThreadPool found in ScanOptions;
   Status UseThreads(bool use_threads = true);
 
+  /// \brief Limit how many fragments the scanner will read at once
+  ///
+  /// Note: This is only enforced in "async" mode
+  Status FragmentReadahead(int fragment_readahead);
+
   /// \brief Indicate if the Scanner should run in experimental "async" mode
   ///
   /// This mode should have considerably better performance on high-latency or parallel
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 4f8c822ea00..a83210fdd3b 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -29,6 +29,7 @@
 #include "arrow/dataset/test_util.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
@@ -509,6 +510,288 @@ TEST_P(TestScanner, Head) {
 INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner,
                          ::testing::ValuesIn(TestScannerParams::Values()));
 
+/// These ControlledXyz classes allow for controlling the order in which things are
+/// delivered so that we can test out of order resequencing.  The dataset allows
+/// batches to be delivered on any fragment.  When delivering batches a num_rows
+/// parameter is taken which can be used to differentiate batches.
+class ControlledFragment : public Fragment {
+ public:
+  explicit ControlledFragment(std::shared_ptr<Schema> schema)
+      : Fragment(literal(true), std::move(schema)) {}
+
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    return Status::NotImplemented(
+        "Not needed for testing.  Sync can only return things in-order.");
+  }
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
+    return physical_schema_;
+  }
+  std::string type_name() const override { return "scanner_test.cc::ControlledFragment"; }
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    return record_batch_generator_;
+  };
+
+  void Finish() { ARROW_UNUSED(record_batch_generator_.producer().Close()); }
+  void DeliverBatch(uint32_t num_rows) {
+    auto batch = ConstantArrayGenerator::Zeroes(num_rows, physical_schema_);
+    record_batch_generator_.producer().Push(std::move(batch));
+  }
+
+ private:
+  PushGenerator<std::shared_ptr<RecordBatch>> record_batch_generator_;
+};
+
+// TODO(ARROW-8163) Add testing for fragments arriving out of order
+class ControlledDataset : public Dataset {
+ public:
+  explicit ControlledDataset(int num_fragments)
+      : Dataset(arrow::schema({field("i32", int32())})), fragments_() {
+    for (int i = 0; i < num_fragments; i++) {
+      fragments_.push_back(std::make_shared<ControlledFragment>(schema_));
+    }
+  }
+
+  std::string type_name() const override { return "scanner_test.cc::ControlledDataset"; }
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override {
+    return Status::NotImplemented("Should not be called by unit test");
+  }
+
+  void DeliverBatch(int fragment_index, int num_rows) {
+    fragments_[fragment_index]->DeliverBatch(num_rows);
+  }
+
+  void FinishFragment(int fragment_index) { fragments_[fragment_index]->Finish(); }
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override {
+    std::vector<std::shared_ptr<Fragment>> casted_fragments(fragments_.begin(),
+                                                            fragments_.end());
+    return MakeVectorIterator(std::move(casted_fragments));
+  }
+
+ private:
+  std::vector<std::shared_ptr<ControlledFragment>> fragments_;
+};
+
+constexpr int kNumFragments = 2;
+
+class TestReordering : public ::testing::Test {
+ public:
+  void SetUp() override { dataset_ = std::make_shared<ControlledDataset>(kNumFragments); }
+
+  // Given a vector of fragment indices (one per batch) return a vector
+  // (one per fragment) mapping fragment index to the last occurrence of that
+  // index in order
+  //
+  // This allows us to know when to mark a fragment as finished
+  std::vector<int> GetLastIndices(const std::vector<int>& order) {
+    std::vector<int> last_indices(kNumFragments);
+    for (std::size_t i = 0; i < kNumFragments; i++) {
+      auto last_p = std::find(order.rbegin(), order.rend(), static_cast<int>(i));
+      EXPECT_NE(last_p, order.rend());
+      last_indices[i] = static_cast<int>(std::distance(last_p, order.rend())) - 1;
+    }
+    return last_indices;
+  }
+
+  /// We buffer one item in order to enumerate it (technically this could be avoided if
+  /// delivering in order but easier to have a single code path).  We also can't deliver
+  /// items that don't come next.  These two facts make for some pretty complex logic
+  /// to determine when items are ready to be collected.
+  std::vector<TaggedRecordBatch> DeliverAndCollect(std::vector<int> order,
+                                                   TaggedRecordBatchGenerator gen) {
+    std::vector<TaggedRecordBatch> collected;
+    auto last_indices = GetLastIndices(order);
+    int num_fragments = static_cast<int>(last_indices.size());
+    std::vector<int> batches_seen_for_fragment(num_fragments);
+    auto current_fragment_index = 0;
+    auto seen_fragment = false;
+    for (std::size_t i = 0; i < order.size(); i++) {
+      auto fragment_index = order[i];
+      dataset_->DeliverBatch(fragment_index, static_cast<int>(i));
+      batches_seen_for_fragment[fragment_index]++;
+      if (static_cast<int>(i) == last_indices[fragment_index]) {
+        dataset_->FinishFragment(fragment_index);
+      }
+      if (current_fragment_index == fragment_index) {
+        if (seen_fragment) {
+          EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+          collected.push_back(std::move(next));
+        } else {
+          seen_fragment = true;
+        }
+        if (static_cast<int>(i) == last_indices[fragment_index]) {
+          // Immediately collect your bonus fragment
+          EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+          collected.push_back(std::move(next));
+          // Now collect any batches freed up that couldn't be delivered because they came
+          // from the wrong fragment
+          auto last_fragment_index = fragment_index;
+          fragment_index++;
+          seen_fragment = batches_seen_for_fragment[fragment_index] > 0;
+          while (fragment_index < num_fragments &&
+                 fragment_index != last_fragment_index) {
+            last_fragment_index = fragment_index;
+            for (int j = 0; j < batches_seen_for_fragment[fragment_index] - 1; j++) {
+              EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+              collected.push_back(std::move(next));
+            }
+            if (static_cast<int>(i) >= last_indices[fragment_index]) {
+              EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+              collected.push_back(std::move(next));
+              fragment_index++;
+              if (fragment_index < num_fragments) {
+                seen_fragment = batches_seen_for_fragment[fragment_index] > 0;
+              }
+            }
+          }
+        }
+      }
+    }
+    return collected;
+  }
+
+  struct FragmentStats {
+    int last_index;
+    bool seen;
+  };
+
+  std::vector<FragmentStats> GetFragmentStats(const std::vector<int>& order) {
+    auto last_indices = GetLastIndices(order);
+    std::vector<FragmentStats> fragment_stats;
+    for (std::size_t i = 0; i < last_indices.size(); i++) {
+      fragment_stats.push_back({last_indices[i], false});
+    }
+    return fragment_stats;
+  }
+
+  /// When data arrives out of order then we first have to buffer up 1 item in order to
+  /// know when the last item has arrived (so we can mark it as the last).  This means
+  /// sometimes we deliver an item and don't get one (first in a fragment) and sometimes
+  /// we deliver an item and we end up getting two (last in a fragment)
+  std::vector<EnumeratedRecordBatch> DeliverAndCollect(
+      std::vector<int> order, EnumeratedRecordBatchGenerator gen) {
+    std::vector<EnumeratedRecordBatch> collected;
+    auto fragment_stats = GetFragmentStats(order);
+    for (std::size_t i = 0; i < order.size(); i++) {
+      auto fragment_index = order[i];
+      dataset_->DeliverBatch(fragment_index, static_cast<int>(i));
+      if (static_cast<int>(i) == fragment_stats[fragment_index].last_index) {
+        dataset_->FinishFragment(fragment_index);
+        EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+        collected.push_back(std::move(next));
+      }
+      if (!fragment_stats[fragment_index].seen) {
+        fragment_stats[fragment_index].seen = true;
+      } else {
+        EXPECT_FINISHES_OK_AND_ASSIGN(auto next, gen());
+        collected.push_back(std::move(next));
+      }
+    }
+    return collected;
+  }
+
+  std::shared_ptr<Scanner> MakeScanner(int fragment_readahead = 0) {
+    ScannerBuilder builder(dataset_);
+    // Reordering tests only make sense for async
+    ARROW_EXPECT_OK(builder.UseAsync(true));
+    if (fragment_readahead != 0) {
+      ARROW_EXPECT_OK(builder.FragmentReadahead(fragment_readahead));
+    }
+    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+    return scanner;
+  }
+
+  void AssertBatchesInOrder(const std::vector<TaggedRecordBatch>& batches,
+                            std::vector<int> expected_order) {
+    ASSERT_EQ(expected_order.size(), batches.size());
+    for (std::size_t i = 0; i < batches.size(); i++) {
+      ASSERT_EQ(expected_order[i], batches[i].record_batch->num_rows());
+    }
+  }
+
+  void AssertBatchesInOrder(const std::vector<EnumeratedRecordBatch>& batches,
+                            std::vector<int> expected_batch_indices,
+                            std::vector<int> expected_row_sizes) {
+    ASSERT_EQ(expected_batch_indices.size(), batches.size());
+    for (std::size_t i = 0; i < batches.size(); i++) {
+      ASSERT_EQ(expected_row_sizes[i], batches[i].record_batch.value->num_rows());
+      ASSERT_EQ(expected_batch_indices[i], batches[i].record_batch.index);
+    }
+  }
+
+  std::shared_ptr<ControlledDataset> dataset_;
+};
+
+TEST_F(TestReordering, ScanBatches) {
+  auto scanner = MakeScanner();
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, scanner->ScanBatchesAsync());
+  auto collected = DeliverAndCollect({0, 0, 1, 1, 0}, std::move(batch_gen));
+  AssertBatchesInOrder(collected, {0, 1, 4, 2, 3});
+}
+
+TEST_F(TestReordering, ScanBatchesUnordered) {
+  auto scanner = MakeScanner();
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, scanner->ScanBatchesUnorderedAsync());
+  auto collected = DeliverAndCollect({0, 0, 1, 1, 0}, std::move(batch_gen));
+  AssertBatchesInOrder(collected, {0, 0, 1, 1, 2}, {0, 2, 3, 1, 4});
+}
+
+struct BatchConsumer {
+  explicit BatchConsumer(EnumeratedRecordBatchGenerator generator)
+      : generator(generator), next() {}
+
+  void AssertCanConsume() {
+    if (!next.is_valid()) {
+      next = generator();
+    }
+    ASSERT_FINISHES_OK(next);
+    next = Future<EnumeratedRecordBatch>();
+  }
+
+  void AssertCannotConsume() {
+    if (!next.is_valid()) {
+      next = generator();
+    }
+    SleepABit();
+    ASSERT_FALSE(next.is_finished());
+  }
+
+  void AssertFinished() {
+    if (!next.is_valid()) {
+      next = generator();
+    }
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto last, next);
+    ASSERT_TRUE(IsIterationEnd(last));
+  }
+
+  EnumeratedRecordBatchGenerator generator;
+  Future<EnumeratedRecordBatch> next;
+};
+
+TEST_F(TestReordering, FileReadahead) {
+  auto scanner = MakeScanner(/*fragment_readahead=*/1);
+  ASSERT_OK_AND_ASSIGN(auto batch_gen, scanner->ScanBatchesUnorderedAsync());
+  BatchConsumer consumer(std::move(batch_gen));
+  dataset_->DeliverBatch(0, 0);
+  dataset_->DeliverBatch(0, 1);
+  consumer.AssertCanConsume();
+  consumer.AssertCannotConsume();
+  dataset_->DeliverBatch(1, 0);
+  consumer.AssertCannotConsume();
+  dataset_->FinishFragment(1);
+  // Even though fragment 1 is finished we cannot read it because fragment_readahead
+  // is 1 so we should only be reading fragment 0
+  consumer.AssertCannotConsume();
+  dataset_->FinishFragment(0);
+  consumer.AssertCanConsume();
+  consumer.AssertCanConsume();
+  consumer.AssertFinished();
+}
+
 class TestScannerBuilder : public ::testing::Test {
   void SetUp() override {
     DatasetVector sources;
diff --git a/cpp/src/arrow/testing/future_util.h b/cpp/src/arrow/testing/future_util.h
index a61a9894fe3..0a20b5f4d57 100644
--- a/cpp/src/arrow/testing/future_util.h
+++ b/cpp/src/arrow/testing/future_util.h
@@ -67,6 +67,24 @@
     ASSERT_EQ(expected, _actual);                        \
   } while (0)
 
+#define EXPECT_FINISHES_IMPL(fut)                                   \
+  do {                                                              \
+    EXPECT_TRUE(fut.Wait(300));                                     \
+    if (!fut.is_finished()) {                                       \
+      ADD_FAILURE() << "Future did not finish in a timely fashion"; \
+    }                                                               \
+  } while (false)
+
+#define ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, future_name, lhs, rexpr) \
+  auto future_name = (rexpr);                                                        \
+  EXPECT_FINISHES_IMPL(future_name);                                                 \
+  handle_error(future_name.status());                                                \
+  EXPECT_OK_AND_ASSIGN(lhs, future_name.result());
+
+#define EXPECT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
+  ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(          \
+      ARROW_EXPECT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__), lhs, rexpr);
+
 namespace arrow {
 
 template <typename T>

From 8c3363e25725974c3cdb6563d6320fb2726b0b89 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 28 Apr 2021 15:09:08 +0200
Subject: [PATCH 143/719] ARROW-12407: [Python][Dataset] Remove ScanTask
 bindings

For 5.0.0: remove Python bindings to ScanTask to avoid the deprecation warning at build time.

`scan()` is replaced with `scanner()` which takes the same arguments, but gives back the Scanner directly.

Closes #10062 from lidavidm/arrow-12407

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/_dataset.pyx                  | 141 ++++---------------
 python/pyarrow/dataset.py                    |   1 -
 python/pyarrow/includes/libarrow_dataset.pxd |   3 +
 python/pyarrow/tests/test_dataset.py         |  35 ++---
 4 files changed, 44 insertions(+), 136 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 619942840c4..44f016f5218 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -359,19 +359,12 @@ cdef class Dataset(_Weakrefable):
         for maybe_fragment in c_fragments:
             yield Fragment.wrap(GetResultValue(move(maybe_fragment)))
 
-    def _scanner(self, **kwargs):
-        return Scanner.from_dataset(self, **kwargs)
-
-    def scan(self, **kwargs):
+    def scanner(self, **kwargs):
         """Builds a scan operation against the dataset.
 
-        It produces a stream of ScanTasks which is meant to be a unit of work
-        to be dispatched. The tasks are not executed automatically, the user is
-        responsible to execute and dispatch the individual tasks, so custom
-        local task scheduling can be implemented.
-
-        .. deprecated:: 4.0.0
-           Use `to_batches` instead.
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
 
         Parameters
         ----------
@@ -408,7 +401,7 @@ cdef class Dataset(_Weakrefable):
 
         Returns
         -------
-        scan_tasks : iterator of ScanTask
+        scanner : Scanner
 
         Examples
         --------
@@ -427,21 +420,18 @@ cdef class Dataset(_Weakrefable):
 
         >>> dataset.scan(filter=ds.field("A") > 0)
         """
-        return self._scanner(**kwargs).scan()
+        return Scanner.from_dataset(self, **kwargs)
 
     def to_batches(self, **kwargs):
         """Read the dataset as materialized record batches.
 
-        Builds a scan operation against the dataset and sequentially executes
-        the ScanTasks as the returned generator gets consumed.
-
         See scan method parameters documentation.
 
         Returns
         -------
         record_batches : iterator of RecordBatch
         """
-        return self._scanner(**kwargs).to_batches()
+        return self.scanner(**kwargs).to_batches()
 
     def to_table(self, **kwargs):
         """Read the dataset to an arrow table.
@@ -455,7 +445,7 @@ cdef class Dataset(_Weakrefable):
         -------
         table : Table instance
         """
-        return self._scanner(**kwargs).to_table()
+        return self.scanner(**kwargs).to_table()
 
     def head(self, int num_rows, **kwargs):
         """Load the first N rows of the dataset.
@@ -466,7 +456,7 @@ cdef class Dataset(_Weakrefable):
         -------
         table : Table instance
         """
-        return self._scanner(**kwargs).head(num_rows)
+        return self.scanner(**kwargs).head(num_rows)
 
     @property
     def schema(self):
@@ -918,19 +908,12 @@ cdef class Fragment(_Weakrefable):
         """
         return Expression.wrap(self.fragment.partition_expression())
 
-    def _scanner(self, **kwargs):
-        return Scanner.from_fragment(self, **kwargs)
-
-    def scan(self, Schema schema=None, **kwargs):
+    def scanner(self, Schema schema=None, **kwargs):
         """Builds a scan operation against the dataset.
 
-        It produces a stream of ScanTasks which is meant to be a unit of work
-        to be dispatched. The tasks are not executed automatically, the user is
-        responsible to execute and dispatch the individual tasks, so custom
-        local task scheduling can be implemented.
-
-        .. deprecated:: 4.0.0
-           Use `to_batches` instead.
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
 
         Parameters
         ----------
@@ -971,9 +954,10 @@ cdef class Fragment(_Weakrefable):
 
         Returns
         -------
-        scan_tasks : iterator of ScanTask
+        scanner : Scanner
+
         """
-        return self._scanner(schema=schema, **kwargs).scan()
+        return Scanner.from_fragment(self, schema=schema, **kwargs)
 
     def to_batches(self, Schema schema=None, **kwargs):
         """Read the fragment as materialized record batches.
@@ -984,7 +968,7 @@ cdef class Fragment(_Weakrefable):
         -------
         record_batches : iterator of RecordBatch
         """
-        return self._scanner(schema=schema, **kwargs).to_batches()
+        return self.scanner(schema=schema, **kwargs).to_batches()
 
     def to_table(self, Schema schema=None, **kwargs):
         """Convert this Fragment into a Table.
@@ -998,7 +982,7 @@ cdef class Fragment(_Weakrefable):
         -------
         table : Table
         """
-        return self._scanner(schema=schema, **kwargs).to_table()
+        return self.scanner(schema=schema, **kwargs).to_table()
 
     def head(self, int num_rows, **kwargs):
         """Load the first N rows of the fragment.
@@ -1009,7 +993,7 @@ cdef class Fragment(_Weakrefable):
         -------
         table : Table instance
         """
-        return self._scanner(**kwargs).head(num_rows)
+        return self.scanner(**kwargs).head(num_rows)
 
 
 cdef class FileFragment(Fragment):
@@ -2548,53 +2532,6 @@ cdef class ParquetDatasetFactory(DatasetFactory):
         self.parquet_factory = <CParquetDatasetFactory*> sp.get()
 
 
-cdef class ScanTask(_Weakrefable):
-    """Read record batches from a range of a single data fragment.
-
-    A ScanTask is meant to be a unit of work to be dispatched.
-    """
-
-    cdef:
-        shared_ptr[CScanTask] wrapped
-        CScanTask* task
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__, subclasses_instead=False)
-
-    cdef init(self, shared_ptr[CScanTask]& sp):
-        self.wrapped = sp
-        self.task = self.wrapped.get()
-
-    @staticmethod
-    cdef wrap(shared_ptr[CScanTask]& sp):
-        cdef ScanTask self = ScanTask.__new__(ScanTask)
-        self.init(sp)
-        return self
-
-    cdef inline shared_ptr[CScanTask] unwrap(self) nogil:
-        return self.wrapped
-
-    def execute(self):
-        """Iterate through sequence of materialized record batches.
-
-        Execution semantics are encapsulated in the particular ScanTask
-        implementation.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-        # Return an explicit iterator object instead of using a
-        # generator so that this method is eagerly evaluated (a
-        # generator would mean no work gets done until the first
-        # iteration). This also works around a bug in Cython's
-        # generator.
-        cdef CRecordBatchIterator iterator
-        with nogil:
-            iterator = move(GetResultValue(self.task.Execute()))
-        return RecordBatchIterator.wrap(self, move(iterator))
-
-
 cdef class RecordBatchIterator(_Weakrefable):
     """An iterator over a sequence of record batches."""
     cdef:
@@ -2814,37 +2751,24 @@ cdef class Scanner(_Weakrefable):
         scanner = GetResultValue(builder.get().Finish())
         return Scanner.wrap(scanner)
 
-    def scan(self):
-        """Returns a stream of ScanTasks
-
-        The caller is responsible to dispatch/schedule said tasks. Tasks should
-        be safe to run in a concurrent fashion and outlive the iterator.
+    @property
+    def dataset_schema(self):
+        """The schema with which batches will be read from fragments."""
+        return pyarrow_wrap_schema(
+            self.scanner.options().get().dataset_schema)
 
-        .. deprecated:: 4.0.0
-           Use `to_batches` instead.
+    @property
+    def projected_schema(self):
+        """The materialized schema of the data, accounting for projections.
 
-        Returns
-        -------
-        scan_tasks : iterator of ScanTask
+        This is the schema of any data returned from the scanner.
         """
-        import warnings
-        warnings.warn("Scanner.scan is deprecated as of 4.0.0, "
-                      "please use Scanner.to_batches instead.",
-                      DeprecationWarning)
-        # Planned for removal in ARROW-11782
-        # Make this method eager so the warning appears immediately
-        return self._scan()
-
-    def _scan(self):
-        for maybe_task in GetResultValue(self.scanner.Scan()):
-            yield ScanTask.wrap(GetResultValue(move(maybe_task)))
+        return pyarrow_wrap_schema(
+            self.scanner.options().get().projected_schema)
 
     def to_batches(self):
         """Consume a Scanner in record batches.
 
-        Sequentially executes the ScanTasks as the returned generator gets
-        consumed.
-
         Returns
         -------
         record_batches : iterator of RecordBatch
@@ -2858,9 +2782,6 @@ cdef class Scanner(_Weakrefable):
     def scan_batches(self):
         """Consume a Scanner in record batches with corresponding fragments.
 
-        Sequentially executes the ScanTasks as the returned generator gets
-        consumed.
-
         Returns
         -------
         record_batches : iterator of TaggedRecordBatch
@@ -2970,7 +2891,7 @@ def _filesystemdataset_write(
     c_options.max_partitions = max_partitions
     c_options.basename_template = tobytes(basename_template)
 
-    scanner = data._scanner(use_threads=use_threads)
+    scanner = data.scanner(use_threads=use_threads)
 
     c_scanner = (<Scanner> scanner).unwrap()
     with nogil:
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index b3c142f6323..a5d457ced62 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -49,7 +49,6 @@
     PartitioningFactory,
     RowGroupInfo,
     Scanner,
-    ScanTask,
     TaggedRecordBatch,
     UnionDataset,
     UnionDatasetFactory,
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 16f6c5c0183..82e1c8f13a2 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -59,6 +59,9 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         @staticmethod
         shared_ptr[CScanOptions] Make(shared_ptr[CSchema] schema)
 
+        shared_ptr[CSchema] dataset_schema
+        shared_ptr[CSchema] projected_schema
+
     cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions":
         c_string type_name() const
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 7688cf78ac7..78bbbe2893c 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -21,7 +21,6 @@
 import pathlib
 import pickle
 import textwrap
-import threading
 
 import numpy as np
 import pytest
@@ -307,15 +306,13 @@ def test_dataset(dataset):
     # TODO(kszucs): test non-boolean Exprs for filter do raise
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
     expected_f64 = pa.array([0, 1, 2, 3, 4], type=pa.float64())
-    with pytest.deprecated_call():
-        dataset.scan()
 
     for batch in dataset.to_batches():
         assert isinstance(batch, pa.RecordBatch)
         assert batch.column(0).equals(expected_i64)
         assert batch.column(1).equals(expected_f64)
 
-    for batch in ds.Scanner.from_dataset(dataset).scan_batches():
+    for batch in dataset.scanner().scan_batches():
         assert isinstance(batch, ds.TaggedRecordBatch)
         assert isinstance(batch.fragment, ds.Fragment)
 
@@ -333,37 +330,25 @@ def test_dataset(dataset):
     assert sorted(result['key']) == ['xxx', 'yyy']
 
 
-def test_dataset_execute_iterator(dataset):
-    # ARROW-11596: this would segfault due to Cython raising
-    # StopIteration without holding the GIL. (Fixed on Cython master,
-    # post 3.0a6)
-    with pytest.deprecated_call():
-        tasks = dataset.scan()
-    task = next(tasks)
-    iterator = task.execute()
-    thread = threading.Thread(target=lambda: next(iterator))
-    thread.start()
-    thread.join()
-    with pytest.raises(StopIteration):
-        next(iterator)
-
-
 def test_scanner(dataset):
-    scanner = ds.Scanner.from_dataset(dataset,
-                                      memory_pool=pa.default_memory_pool())
+    scanner = dataset.scanner(memory_pool=pa.default_memory_pool())
     assert isinstance(scanner, ds.Scanner)
 
     with pytest.raises(pa.ArrowInvalid):
-        ds.Scanner.from_dataset(dataset, columns=['unknown'])
+        dataset.scanner(columns=['unknown'])
 
-    scanner = ds.Scanner.from_dataset(dataset, columns=['i64'],
-                                      memory_pool=pa.default_memory_pool())
+    scanner = dataset.scanner(columns=['i64'],
+                              memory_pool=pa.default_memory_pool())
+    assert scanner.dataset_schema == dataset.schema
+    assert scanner.projected_schema == pa.schema([("i64", pa.int64())])
 
     assert isinstance(scanner, ds.Scanner)
     for batch in scanner.to_batches():
+        assert batch.schema == scanner.projected_schema
         assert batch.num_columns == 1
 
     table = scanner.to_table()
+    assert table.schema == scanner.projected_schema
     for i in range(table.num_rows):
         indices = pa.array([i])
         assert table.take(indices) == scanner.take(indices)
@@ -671,7 +656,7 @@ def test_filesystem_factory(mockfs, paths_or_selector, pre_buffer):
     dataset = factory.finish()
     assert isinstance(dataset, ds.FileSystemDataset)
 
-    scanner = ds.Scanner.from_dataset(dataset)
+    scanner = dataset.scanner()
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
     expected_f64 = pa.array([0, 1, 2, 3, 4], type=pa.float64())
     expected_str = pa.DictionaryArray.from_arrays(

From 9218fe4d69b6bb5e60be41b0ba18f2dd30df41cf Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Wed, 28 Apr 2021 08:53:43 -0700
Subject: [PATCH 144/719] ARROW-12517: [Go][Flight] Expose app metadata in
 flight client and server

Adding a convenient way to expose the Application Metadata field in the arrow flight client and server for Go.

Closes #10142 from zeroshade/arrow-12517

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 go/arrow/flight/flight_test.go         | 63 ++++++++++++++++++++++++++
 go/arrow/flight/record_batch_reader.go | 54 ++++++++++++++++++++--
 go/arrow/flight/record_batch_writer.go | 23 +++++++++-
 3 files changed, 134 insertions(+), 6 deletions(-)

diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go
index bd81892fdb9..61bbc3bbba5 100644
--- a/go/arrow/flight/flight_test.go
+++ b/go/arrow/flight/flight_test.go
@@ -19,6 +19,7 @@ package flight_test
 import (
 	"context"
 	"errors"
+	"fmt"
 	"io"
 	"testing"
 
@@ -311,3 +312,65 @@ func TestServer(t *testing.T) {
 		t.Fatalf("got %d, want %d", numRows, fi.TotalRecords)
 	}
 }
+
+type flightMetadataWriterServer struct{}
+
+func (f *flightMetadataWriterServer) DoGet(tkt *flight.Ticket, fs flight.FlightService_DoGetServer) error {
+	recs := arrdata.Records[string(tkt.GetTicket())]
+
+	w := flight.NewRecordWriter(fs, ipc.WithSchema(recs[0].Schema()))
+	defer w.Close()
+	for idx, r := range recs {
+		w.WriteWithAppMetadata(r, []byte(fmt.Sprintf("%d_%s", idx, string(tkt.GetTicket()))) /*metadata*/)
+	}
+	return nil
+}
+
+func TestFlightWithAppMetadata(t *testing.T) {
+	f := &flightMetadataWriterServer{}
+	s := flight.NewFlightServer(nil)
+	s.RegisterFlightService(&flight.FlightServiceService{DoGet: f.DoGet})
+	s.Init("localhost:0")
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewFlightClient(s.Addr().String(), nil, grpc.WithInsecure())
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer client.Close()
+
+	fdata, err := client.DoGet(context.Background(), &flight.Ticket{Ticket: []byte("primitives")})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	r, err := flight.NewRecordReader(fdata)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := arrdata.Records["primitives"]
+	idx := 0
+	for {
+		rec, err := r.Read()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			t.Fatal(err)
+		}
+
+		appMeta := r.LatestAppMetadata()
+		if !array.RecordEqual(expected[idx], rec) {
+			t.Errorf("flight data stream records for idx: %d don't match: \ngot = %#v\nwant = %#v", idx, rec, expected[idx])
+		}
+
+		exMeta := fmt.Sprintf("%d_primitives", idx)
+		if string(appMeta) != exMeta {
+			t.Errorf("flight data stream application metadata mismatch: got: %v, want: %v\n", string(appMeta), exMeta)
+		}
+		idx++
+	}
+}
diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go
index 8e3490fac9f..582187ee2b0 100644
--- a/go/arrow/flight/record_batch_reader.go
+++ b/go/arrow/flight/record_batch_reader.go
@@ -24,6 +24,7 @@ import (
 	"github.com/apache/arrow/go/arrow/internal/debug"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
 )
 
 // DataStreamReader is an interface for receiving flight data messages on a stream
@@ -37,16 +38,23 @@ type dataMessageReader struct {
 
 	refCount int64
 	msg      *ipc.Message
-	err      error
+
+	lastAppMetadata []byte
 }
 
 func (d *dataMessageReader) Message() (*ipc.Message, error) {
 	fd, err := d.rdr.Recv()
 	if err != nil {
+		// clear the previous message in the error case
+		d.msg.Release()
+		d.msg = nil
+		d.lastAppMetadata = nil
 		return nil, err
 	}
 
-	return ipc.NewMessage(memory.NewBufferBytes(fd.DataHeader), memory.NewBufferBytes(fd.DataBody)), nil
+	d.lastAppMetadata = fd.AppMetadata
+	d.msg = ipc.NewMessage(memory.NewBufferBytes(fd.DataHeader), memory.NewBufferBytes(fd.DataBody))
+	return d.msg, nil
 }
 
 func (d *dataMessageReader) Retain() {
@@ -60,15 +68,53 @@ func (d *dataMessageReader) Release() {
 		if d.msg != nil {
 			d.msg.Release()
 			d.msg = nil
+			d.lastAppMetadata = nil
 		}
 	}
 }
 
+// Reader is an ipc.Reader which also keeps track of the metadata from
+// the FlightData messages as they come in, calling LatestAppMetadata
+// will return the metadata bytes from the most recently read message.
+type Reader struct {
+	*ipc.Reader
+	dmr *dataMessageReader
+}
+
+// Retain increases the reference count for the underlying message reader
+// and ipc.Reader which are utilized by this Reader.
+func (r *Reader) Retain() {
+	r.Reader.Retain()
+	r.dmr.Retain()
+}
+
+// Release reduces the reference count for the underlying message reader
+// and ipc.Reader, when the reference counts become zero, the allocated
+// memory is released for the stored record and metadata.
+func (r *Reader) Release() {
+	r.Reader.Release()
+	r.dmr.Release()
+}
+
+// LatestAppMetadata returns the bytes from the AppMetadata field of the
+// most recently read FlightData message that was processed by calling
+// the Next function. The metadata returned would correspond to the record
+// retrieved by calling Record().
+func (r *Reader) LatestAppMetadata() []byte {
+	return r.dmr.lastAppMetadata
+}
+
 // NewRecordReader constructs an ipc reader using the flight data stream reader
 // as the source of the ipc messages, opts passed will be passed to the underlying
 // ipc.Reader such as ipc.WithSchema and ipc.WithAllocator
-func NewRecordReader(r DataStreamReader, opts ...ipc.Option) (*ipc.Reader, error) {
-	return ipc.NewReaderFromMessageReader(&dataMessageReader{rdr: r}, opts...)
+func NewRecordReader(r DataStreamReader, opts ...ipc.Option) (*Reader, error) {
+	rdr := &Reader{dmr: &dataMessageReader{rdr: r}}
+	var err error
+	if rdr.Reader, err = ipc.NewReaderFromMessageReader(rdr.dmr, opts...); err != nil {
+		return nil, xerrors.Errorf("arrow/flight: could not create flight reader: %w", err)
+	}
+
+	return rdr, nil
 }
 
 // DeserializeSchema takes the schema bytes from FlightInfo or SchemaResult
diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go
index 101fbcf16f5..3e4c4d805ac 100644
--- a/go/arrow/flight/record_batch_writer.go
+++ b/go/arrow/flight/record_batch_writer.go
@@ -20,6 +20,7 @@ import (
 	"bytes"
 
 	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 )
@@ -46,17 +47,35 @@ func (f *flightPayloadWriter) WritePayload(payload ipc.Payload) error {
 
 	payload.SerializeBody(&f.buf)
 	f.fd.DataBody = f.buf.Bytes()
+
 	return f.w.Send(&f.fd)
 }
 
 func (f *flightPayloadWriter) Close() error { return nil }
 
+// Writer is an ipc.Writer which also adds a WriteWithAppMetadata function
+// in order to allow adding AppMetadata to the FlightData messages which
+// are written.
+type Writer struct {
+	*ipc.Writer
+	pw *flightPayloadWriter
+}
+
+// WriteWithAppMetadata will write this record with the supplied application
+// metadata attached in the flightData message.
+func (w *Writer) WriteWithAppMetadata(rec array.Record, appMeta []byte) error {
+	w.pw.fd.AppMetadata = appMeta
+	defer func() { w.pw.fd.AppMetadata = nil }()
+	return w.Write(rec)
+}
+
 // NewRecordWriter can be used to construct a writer for arrow flight via
 // the grpc stream handler to write flight data objects and write
 // record batches to the stream. Options passed here will be passed to
 // ipc.NewWriter
-func NewRecordWriter(w DataStreamWriter, opts ...ipc.Option) *ipc.Writer {
-	return ipc.NewWriterWithPayloadWriter(&flightPayloadWriter{w: w}, opts...)
+func NewRecordWriter(w DataStreamWriter, opts ...ipc.Option) *Writer {
+	pw := &flightPayloadWriter{w: w}
+	return &Writer{Writer: ipc.NewWriterWithPayloadWriter(pw, opts...), pw: pw}
 }
 
 // SerializeSchema returns the serialized schema bytes for use in Arrow Flight

From 0d11014ee8e6ce408ddbbdfb788d901dd6c6374f Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Wed, 28 Apr 2021 08:56:55 -0700
Subject: [PATCH 145/719] ARROW-5640: [Go] Implement Arrow Map Array

Took it upon myself to implement the Map Array type for Golang and uncomment the tests appropriately.

Closes #10106 from zeroshade/maptype

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 dev/archery/archery/integration/datagen.py |  12 +-
 docs/source/status.rst                     |   2 +-
 go/arrow/array/array.go                    |   2 +-
 go/arrow/array/array_test.go               |  20 +-
 go/arrow/array/builder.go                  |   2 +
 go/arrow/array/compare.go                  |   8 +-
 go/arrow/array/map.go                      | 272 +++++++++
 go/arrow/array/map_test.go                 | 151 +++++
 go/arrow/datatype_nested.go                |  35 ++
 go/arrow/datatype_nested_test.go           |  70 +++
 go/arrow/example_test.go                   |  64 +++
 go/arrow/internal/arrdata/arrdata.go       | 150 +++++
 go/arrow/internal/arrjson/arrjson.go       |  48 ++
 go/arrow/internal/arrjson/arrjson_test.go  | 637 +++++++++++++++++++++
 go/arrow/ipc/file_reader.go                |  16 +
 go/arrow/ipc/metadata.go                   |  34 +-
 go/arrow/ipc/writer.go                     |  37 ++
 17 files changed, 1535 insertions(+), 25 deletions(-)
 create mode 100644 go/arrow/array/map.go
 create mode 100644 go/arrow/array/map_test.go

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index ec4969ede3c..8e63fe3be86 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1519,12 +1519,12 @@ def _temp_path():
         .skip_category('JS'),
 
         generate_null_case([10, 0])
-        .skip_category('JS')   # TODO(ARROW-7900)
-        .skip_category('Go'),  # TODO(ARROW-7901)
+        .skip_category('Go')    # TODO(ARROW-7901)
+        .skip_category('JS'),   # TODO(ARROW-7900)
 
         generate_null_trivial_case([0, 0])
-        .skip_category('JS')   # TODO(ARROW-7900)
-        .skip_category('Go'),  # TODO(ARROW-7901)
+        .skip_category('Go')    # TODO(ARROW-7901)
+        .skip_category('JS'),   # TODO(ARROW-7900)
 
         generate_decimal128_case()
         .skip_category('Rust'),
@@ -1541,11 +1541,9 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_map_case()
-        .skip_category('Go')  # TODO(ARROW-5620): Map + Go
         .skip_category('Rust'),
 
         generate_non_canonical_map_case()
-        .skip_category('Go')     # TODO(ARROW-5620)
         .skip_category('Java')   # TODO(ARROW-8715)
         .skip_category('JS')     # TODO(ARROW-8716)
         .skip_category('Rust'),
@@ -1588,7 +1586,7 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_extension_case()
-        .skip_category('Go')
+        .skip_category('Go')    # TODO(ARROW-5385)
         .skip_category('JS')
         .skip_category('Rust'),
     ]
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 176d35eb12b..a33908d9a13 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -79,7 +79,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
 | Struct            | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
-| Map               | ✓     | ✓     |       | ✓          |       |       | ✓     |
+| Map               | ✓     | ✓     | ✓     | ✓          |       |       | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
 | Dense Union       | ✓     | ✓     |       |            |       |       | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index 9cbaef9ff12..c191c76c7e9 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -197,7 +197,7 @@ func init() {
 		arrow.STRUCT:            func(data *Data) Interface { return NewStructData(data) },
 		arrow.UNION:             unsupportedArrayType,
 		arrow.DICTIONARY:        unsupportedArrayType,
-		arrow.MAP:               unsupportedArrayType,
+		arrow.MAP:               func(data *Data) Interface { return NewMapData(data) },
 		arrow.EXTENSION:         unsupportedArrayType,
 		arrow.FIXED_SIZE_LIST:   func(data *Data) Interface { return NewFixedSizeListData(data) },
 		arrow.DURATION:          func(data *Data) Interface { return NewDurationData(data) },
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 48c238656b4..904cbd8088f 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -69,26 +69,32 @@ func TestMakeFromData(t *testing.T) {
 		{name: "decimal", d: &testDataType{arrow.DECIMAL}},
 
 		{name: "list", d: &testDataType{arrow.LIST}, child: []*array.Data{
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
 		}},
 
 		{name: "struct", d: &testDataType{arrow.STRUCT}},
 		{name: "struct", d: &testDataType{arrow.STRUCT}, child: []*array.Data{
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
 		}},
 
 		{name: "fixed_size_list", d: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int64), child: []*array.Data{
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
-			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
 		}},
 		{name: "duration", d: &testDataType{arrow.DURATION}},
 
+		{name: "map", d: &testDataType{arrow.MAP}, child: []*array.Data{
+			array.NewData(&testDataType{arrow.STRUCT}, 0 /* length */, make([]*memory.Buffer, 3 /*null bitmap, values, offsets*/), []*array.Data{
+				array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+				array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
+			}, 0 /* nulls */, 0 /* offset */)},
+		},
+
 		// unsupported types
 		{name: "union", d: &testDataType{arrow.UNION}, expPanic: true, expError: "unsupported data type: UNION"},
 		{name: "dictionary", d: &testDataType{arrow.DICTIONARY}, expPanic: true, expError: "unsupported data type: DICTIONARY"},
-		{name: "map", d: &testDataType{arrow.Type(27)}, expPanic: true, expError: "unsupported data type: MAP"},
 		{name: "extension", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "unsupported data type: EXTENSION"},
 
 		// invalid types
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 0066e1dd1bd..fcb8d7670cf 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -277,6 +277,8 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 	case arrow.UNION:
 	case arrow.DICTIONARY:
 	case arrow.MAP:
+		typ := dtype.(*arrow.MapType)
+		return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(), typ.KeysSorted)
 	case arrow.EXTENSION:
 	case arrow.FIXED_SIZE_LIST:
 		typ := dtype.(*arrow.FixedSizeListType)
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 537630dbffc..a02c5ac87be 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -164,7 +164,9 @@ func ArrayEqual(left, right Interface) bool {
 	case *Duration:
 		r := right.(*Duration)
 		return arrayEqualDuration(l, r)
-
+	case *Map:
+		r := right.(*Map)
+		return arrayEqualMap(l, r)
 	default:
 		panic(xerrors.Errorf("arrow/array: unknown array type %T", l))
 	}
@@ -353,7 +355,9 @@ func arrayApproxEqual(left, right Interface, opt equalOption) bool {
 	case *Duration:
 		r := right.(*Duration)
 		return arrayEqualDuration(l, r)
-
+	case *Map:
+		r := right.(*Map)
+		return arrayApproxEqualList(l.List, r.List, opt)
 	default:
 		panic(xerrors.Errorf("arrow/array: unknown array type %T", l))
 	}
diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go
new file mode 100644
index 00000000000..16fc5e91bbd
--- /dev/null
+++ b/go/arrow/array/map.go
@@ -0,0 +1,272 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array // import "github.com/apache/arrow/go/arrow/array"
+
+import (
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+)
+
+// Map represents an immutable sequence of Key/Value structs. It is a
+// logical type that is implemented as a List<Struct: key, value>.
+type Map struct {
+	*List
+	keys, items Interface
+}
+
+// NewMapData returns a new Map array value, from data
+func NewMapData(data *Data) *Map {
+	a := &Map{List: &List{}}
+	a.refCount = 1
+	a.setData(data)
+	return a
+}
+
+// KeysSorted checks the datatype that was used to construct this array and
+// returns the KeysSorted boolean value used to denote if the key array is
+// sorted for each list element.
+//
+// Important note: Nothing is enforced regarding the KeysSorted value, it is
+// solely a metadata field that should be set if keys within each value are sorted.
+// This value is not used at all in regards to comparisons / equality.
+func (a *Map) KeysSorted() bool { return a.DataType().(*arrow.MapType).KeysSorted }
+
+func (a *Map) validateData(data *Data) {
+	if len(data.childData) != 1 || data.childData[0] == nil {
+		panic("arrow/array: expected one child array for map array")
+	}
+
+	if data.childData[0].dtype.ID() != arrow.STRUCT {
+		panic("arrow/array: map array child should be struct type")
+	}
+
+	if data.childData[0].NullN() != 0 {
+		panic("arrow/array: map array child array should have no nulls")
+	}
+
+	if len(data.childData[0].childData) != 2 {
+		panic("arrow/array: map array child array should have two fields")
+	}
+
+	if data.childData[0].childData[0].NullN() != 0 {
+		panic("arrow/array: map array keys array should have no nulls")
+	}
+}
+
+func (a *Map) setData(data *Data) {
+	a.validateData(data)
+
+	a.List.setData(data)
+	a.keys = MakeFromData(data.childData[0].childData[0])
+	a.items = MakeFromData(data.childData[0].childData[1])
+}
+
+// Keys returns the full Array of Key values, equivalent to grabbing
+// the key field of the child struct.
+func (a *Map) Keys() Interface { return a.keys }
+
+// Items returns the full Array of Item values, equivalent to grabbing
+// the Value field (the second field) of the child struct.
+func (a *Map) Items() Interface { return a.items }
+
+// Retain increases the reference count by 1.
+// Retain may be called simultaneously from multiple goroutines.
+func (a *Map) Retain() {
+	a.List.Retain()
+	a.keys.Retain()
+	a.items.Retain()
+}
+
+// Release decreases the reference count by 1.
+// Release may be called simultaneously from multiple goroutines.
+// When the reference count goes to zero, the memory is freed.
+func (a *Map) Release() {
+	a.List.Release()
+	a.keys.Release()
+	a.items.Release()
+}
+
+func arrayEqualMap(left, right *Map) bool {
+	// since Map is implemented using a list, we can just use arrayEqualList
+	return arrayEqualList(left.List, right.List)
+}
+
+type MapBuilder struct {
+	listBuilder *ListBuilder
+
+	etype                   arrow.DataType
+	keytype, itemtype       arrow.DataType
+	keyBuilder, itemBuilder Builder
+	keysSorted              bool
+}
+
+// NewMapBuilder returns a builder, using the provided memory allocator.
+// The created Map builder will create a map array whose keys will be a non-nullable
+// array of type `keytype` and whose mapped items will be a nullable array of itemtype.
+//
+// KeysSorted is not enforced at all by the builder, it should only be set to true
+// building using keys in sorted order for each value. The KeysSorted value will just be
+// used when creating the DataType for the map.
+//
+// Example
+//
+// Simple example provided of converting a []map[string]int32 to an array.Map
+// by using a MapBuilder:
+//
+//   /* assume maplist == []map[string]int32 */
+//   bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false)
+//   defer bldr.Release()
+//   kb := bldr.KeyBuilder().(*array.StringBuilder)
+//   ib := bldr.ItemBuilder().(*array.Int32Builder)
+//   for _, m := range maplist {
+//       bldr.Append(true)
+//       for k, v := range m {
+//            kb.Append(k)
+//            ib.Append(v)
+//       }
+//   }
+//   maparr := bldr.NewMapArray()
+//   defer maparr.Release()
+//
+func NewMapBuilder(mem memory.Allocator, keytype, itemtype arrow.DataType, keysSorted bool) *MapBuilder {
+	etype := arrow.MapOf(keytype, itemtype)
+	etype.KeysSorted = keysSorted
+	listBldr := NewListBuilder(mem, etype.ValueType())
+	keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0)
+	keyBldr.Retain()
+	itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1)
+	itemBldr.Retain()
+	return &MapBuilder{
+		listBuilder: listBldr,
+		keyBuilder:  keyBldr,
+		itemBuilder: itemBldr,
+		etype:       etype,
+		keytype:     keytype,
+		itemtype:    itemtype,
+		keysSorted:  keysSorted,
+	}
+}
+
+// Retain increases the reference count by 1 for the sub-builders (list, key, item).
+// Retain may be called simultaneously from multiple goroutines.
+func (b *MapBuilder) Retain() {
+	b.listBuilder.Retain()
+	b.keyBuilder.Retain()
+	b.itemBuilder.Retain()
+}
+
+// Release decreases the reference count by 1 for the sub builders (list, key, item).
+func (b *MapBuilder) Release() {
+	b.listBuilder.Release()
+	b.keyBuilder.Release()
+	b.itemBuilder.Release()
+}
+
+// Len returns the current number of Maps that are in the builder
+func (b *MapBuilder) Len() int { return b.listBuilder.Len() }
+
+// Cap returns the total number of elements that can be stored
+// without allocating additional memory.
+func (b *MapBuilder) Cap() int { return b.listBuilder.Cap() }
+
+// NullN returns the number of null values in the array builder.
+func (b *MapBuilder) NullN() int { return b.listBuilder.NullN() }
+
+// Append adds a new Map element to the array, calling Append(false) is
+// equivalent to calling AppendNull.
+func (b *MapBuilder) Append(v bool) {
+	b.adjustStructBuilderLen()
+	b.listBuilder.Append(v)
+}
+
+// AppendNull adds a null map entry to the array.
+func (b *MapBuilder) AppendNull() {
+	b.Append(false)
+}
+
+// Reserve enough space for n maps
+func (b *MapBuilder) Reserve(n int) { b.listBuilder.Reserve(n) }
+
+// Resize adjust the space allocated by b to n map elements. If n is greater than
+// b.Cap(), additional memory will be allocated. If n is smaller, the allocated memory may be reduced.
+func (b *MapBuilder) Resize(n int) { b.listBuilder.Resize(n) }
+
+// AppendValues is for bulk appending a group of elements with offsets provided
+// and validity booleans provided.
+func (b *MapBuilder) AppendValues(offsets []int32, valid []bool) {
+	b.adjustStructBuilderLen()
+	b.listBuilder.AppendValues(offsets, valid)
+}
+
+func (b *MapBuilder) init(capacity int)                  { b.listBuilder.init(capacity) }
+func (b *MapBuilder) resize(newBits int, init func(int)) { b.listBuilder.resize(newBits, init) }
+
+func (b *MapBuilder) adjustStructBuilderLen() {
+	sb := b.listBuilder.ValueBuilder().(*StructBuilder)
+	if sb.Len() < b.keyBuilder.Len() {
+		valids := make([]bool, b.keyBuilder.Len()-sb.Len())
+		for i := range valids {
+			valids[i] = true
+		}
+		sb.AppendValues(valids)
+	}
+}
+
+// NewArray creates a new Map array from the memory buffers used by the builder, and
+// resets the builder so it can be used again to build a new Map array.
+func (b *MapBuilder) NewArray() Interface {
+	return b.NewMapArray()
+}
+
+// NewMapArray creates a new Map array from the memory buffers used by the builder, and
+// resets the builder so it can be used again to build a new Map array.
+func (b *MapBuilder) NewMapArray() (a *Map) {
+	data := b.newData()
+	defer data.Release()
+	a = NewMapData(data)
+	return
+}
+
+func (b *MapBuilder) newData() (data *Data) {
+	b.adjustStructBuilderLen()
+	values := b.listBuilder.NewListArray()
+	defer values.Release()
+
+	data = NewData(b.etype,
+		values.Len(), values.data.buffers,
+		values.data.childData, values.NullN(), 0)
+	return
+}
+
+// KeyBuilder returns a builder that can be used to populate the keys of the maps.
+func (b *MapBuilder) KeyBuilder() Builder { return b.keyBuilder }
+
+// ItemBuilder returns a builder that can be used to populate the values that the
+// keys point to.
+func (b *MapBuilder) ItemBuilder() Builder { return b.itemBuilder }
+
+// ValueBuilder can be used instead of separately using the Key/Item builders
+// to build the list as a List of Structs rather than building the keys/items
+// separately.
+func (b *MapBuilder) ValueBuilder() *StructBuilder {
+	return b.listBuilder.ValueBuilder().(*StructBuilder)
+}
+
+var (
+	_ Interface = (*Map)(nil)
+	_ Builder   = (*MapBuilder)(nil)
+)
diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go
new file mode 100644
index 00000000000..9c961555822
--- /dev/null
+++ b/go/arrow/array/map_test.go
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestMapArray(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	var (
+		arr, equalArr, unequalArr *array.Map
+
+		equalValid     = []bool{true, true, true, true, true, true, true}
+		equalOffsets   = []int32{0, 1, 2, 5, 6, 7, 8, 10}
+		equalKeys      = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"}
+		equalValues    = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6}
+		unequalValid   = []bool{true, true, true}
+		unequalOffsets = []int32{0, 1, 4, 7}
+		unequalKeys    = []string{"a", "a", "b", "c", "a", "b", "c"}
+		unequalValues  = []int32{1, 2, 2, 2, 3, 4, 5}
+	)
+
+	bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false)
+	defer bldr.Release()
+
+	kb := bldr.KeyBuilder().(*array.StringBuilder)
+	ib := bldr.ItemBuilder().(*array.Int32Builder)
+
+	bldr.AppendValues(equalOffsets, equalValid)
+	for _, k := range equalKeys {
+		kb.Append(k)
+	}
+	ib.AppendValues(equalValues, nil)
+
+	assert.Equal(t, len(equalValid), bldr.Len())
+	assert.Zero(t, bldr.NullN())
+
+	arr = bldr.NewMapArray()
+	defer arr.Release()
+
+	bldr.AppendValues(equalOffsets, equalValid)
+	for _, k := range equalKeys {
+		kb.Append(k)
+	}
+	ib.AppendValues(equalValues, nil)
+
+	equalArr = bldr.NewMapArray()
+	defer equalArr.Release()
+
+	bldr.AppendValues(unequalOffsets, unequalValid)
+	for _, k := range unequalKeys {
+		kb.Append(k)
+	}
+	ib.AppendValues(unequalValues, nil)
+
+	unequalArr = bldr.NewMapArray()
+	defer unequalArr.Release()
+
+	assert.True(t, array.ArrayEqual(arr, arr))
+	assert.True(t, array.ArrayEqual(arr, equalArr))
+	assert.True(t, array.ArrayEqual(equalArr, arr))
+	assert.False(t, array.ArrayEqual(equalArr, unequalArr))
+	assert.False(t, array.ArrayEqual(unequalArr, equalArr))
+
+	assert.True(t, array.ArraySliceEqual(arr, 0, 1, unequalArr, 0, 1))
+	assert.False(t, array.ArraySliceEqual(arr, 0, 2, unequalArr, 0, 2))
+	assert.False(t, array.ArraySliceEqual(arr, 1, 2, unequalArr, 1, 2))
+	assert.True(t, array.ArraySliceEqual(arr, 2, 3, unequalArr, 2, 3))
+}
+
+func TestMapArrayBuildIntToInt(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	var (
+		dtype      = arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16)
+		keys       = []int16{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5}
+		items      = []int16{1, 1, 2, 3, 5, 8, -1, -1, 0, 1, -1, 2}
+		validItems = []bool{true, true, true, true, true, true, false, false, true, true, false, true}
+		offsets    = []int32{0, 6, 6, 12, 12}
+		validMaps  = []bool{true, false, true, true}
+	)
+
+	bldr := array.NewBuilder(pool, dtype).(*array.MapBuilder)
+	defer bldr.Release()
+
+	bldr.Reserve(len(validMaps))
+
+	kb := bldr.KeyBuilder().(*array.Int16Builder)
+	ib := bldr.ItemBuilder().(*array.Int16Builder)
+
+	bldr.Append(true)
+	kb.AppendValues(keys[:6], nil)
+	ib.AppendValues(items[:6], nil)
+
+	bldr.AppendNull()
+	bldr.Append(true)
+	kb.AppendValues(keys[6:], nil)
+	ib.AppendValues(items[6:], []bool{false, false, true, true, false, true})
+
+	bldr.Append(true)
+	arr := bldr.NewArray().(*array.Map)
+	defer arr.Release()
+
+	assert.Equal(t, arrow.MAP, arr.DataType().ID())
+	assert.EqualValues(t, len(validMaps), arr.Len())
+
+	for i, ex := range validMaps {
+		assert.Equal(t, ex, arr.IsValid(i))
+		assert.Equal(t, !ex, arr.IsNull(i))
+	}
+
+	assert.Equal(t, offsets, arr.Offsets())
+	assert.Equal(t, keys, arr.Keys().(*array.Int16).Int16Values())
+
+	itemArr := arr.Items().(*array.Int16)
+	for i, ex := range validItems {
+		if ex {
+			assert.True(t, itemArr.IsValid(i))
+			assert.False(t, itemArr.IsNull(i))
+			assert.Equal(t, items[i], itemArr.Value(i))
+		} else {
+			assert.False(t, itemArr.IsValid(i))
+			assert.True(t, itemArr.IsNull(i))
+		}
+	}
+
+	assert.Equal(t, "[{[0 1 2 3 4 5] [1 1 2 3 5 8]} (null) {[0 1 2 3 4 5] [(null) (null) 0 1 (null) 2]} {[] []}]", arr.String())
+}
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index c6173fe87cb..47ea77899ef 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -148,6 +148,40 @@ func (t *StructType) FieldByName(name string) (Field, bool) {
 	return t.fields[i], true
 }
 
+type MapType struct {
+	value      *ListType
+	KeysSorted bool
+}
+
+func MapOf(key, item DataType) *MapType {
+	if key == nil || item == nil {
+		panic("arrow: nil key or item type for MapType")
+	}
+
+	return &MapType{value: ListOf(StructOf(Field{Name: "key", Type: key}, Field{Name: "value", Type: item, Nullable: true}))}
+}
+
+func (*MapType) ID() Type     { return MAP }
+func (*MapType) Name() string { return "map" }
+
+func (t *MapType) String() string {
+	var o strings.Builder
+	o.WriteString(fmt.Sprintf("map<%s, %s",
+		t.value.Elem().(*StructType).Field(0).Type,
+		t.value.Elem().(*StructType).Field(1).Type))
+	if t.KeysSorted {
+		o.WriteString(", keys_sorted")
+	}
+	o.WriteString(">")
+	return o.String()
+}
+
+func (t *MapType) KeyField() Field        { return t.value.Elem().(*StructType).Field(0) }
+func (t *MapType) KeyType() DataType      { return t.KeyField().Type }
+func (t *MapType) ItemField() Field       { return t.value.Elem().(*StructType).Field(1) }
+func (t *MapType) ItemType() DataType     { return t.ItemField().Type }
+func (t *MapType) ValueType() *StructType { return t.value.Elem().(*StructType) }
+
 type Field struct {
 	Name     string   // Field name
 	Type     DataType // The field's data type
@@ -177,4 +211,5 @@ func (f Field) String() string {
 var (
 	_ DataType = (*ListType)(nil)
 	_ DataType = (*StructType)(nil)
+	_ DataType = (*MapType)(nil)
 )
diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go
index 34b77373832..94c6a71d99a 100644
--- a/go/arrow/datatype_nested_test.go
+++ b/go/arrow/datatype_nested_test.go
@@ -354,3 +354,73 @@ func TestFixedSizeListOf(t *testing.T) {
 		})
 	}
 }
+
+func TestMapOf(t *testing.T) {
+	for _, tc := range []struct {
+		key, item DataType
+		want      DataType
+		str       string
+	}{
+		{
+			key:  BinaryTypes.String,
+			item: PrimitiveTypes.Uint8,
+			want: &MapType{value: ListOf(StructOf(
+				Field{Name: "key", Type: BinaryTypes.String},
+				Field{Name: "value", Type: PrimitiveTypes.Uint8, Nullable: true},
+			))},
+			str: "map<utf8, uint8>",
+		},
+		{
+			key:  BinaryTypes.String,
+			item: MapOf(PrimitiveTypes.Uint32, FixedWidthTypes.Date32),
+			want: &MapType{value: ListOf(StructOf(
+				Field{Name: "key", Type: BinaryTypes.String},
+				Field{Name: "value", Nullable: true,
+					Type: &MapType{value: ListOf(StructOf(
+						Field{Name: "key", Type: PrimitiveTypes.Uint32},
+						Field{Name: "value", Type: FixedWidthTypes.Date32, Nullable: true},
+					))}},
+			))},
+			str: "map<utf8, map<uint32, date32>>",
+		},
+	} {
+		t.Run("", func(t *testing.T) {
+			got := MapOf(tc.key, tc.item)
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Fatalf("got=%#v, want=%#v", got, tc.want)
+			}
+
+			if got, want := got.ID(), MAP; got != want {
+				t.Fatalf("invalid ID. got=%v, want=%v", got, want)
+			}
+
+			if got, want := got.Name(), "map"; got != want {
+				t.Fatalf("invalid name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.KeyField().Name, "key"; got != want {
+				t.Fatalf("invalid key field name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.ItemField().Name, "value"; got != want {
+				t.Fatalf("invalid item field name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.KeyType(), tc.key; got != want {
+				t.Fatalf("invalid key type. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.ItemType(), tc.item; got != want {
+				t.Fatalf("invalid item type. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.ValueType(), StructOf(got.KeyField(), got.ItemField()); !TypeEqual(got, want) {
+				t.Fatalf("invalid value type. got=%q, want=%q", got, want)
+			}
+
+			if got, want := got.String(), tc.str; got != want {
+				t.Fatalf("invalid String() result. got=%q, want=%q", got, want)
+			}
+		})
+	}
+}
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 6413aeef649..71f86743664 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -593,3 +593,67 @@ func Example_table() {
 	// rec[3]["f1-i32"]: [16 17 18 19 20]
 	// rec[3]["f2-f64"]: [16 17 18 19 20]
 }
+
+// This example demonstrates how to create a Map Array.
+// The resulting array should be:
+//   [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
+func Example_mapArray() {
+	pool := memory.NewGoAllocator()
+	mb := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int16, false)
+	defer mb.Release()
+
+	kb := mb.KeyBuilder().(*array.StringBuilder)
+	ib := mb.ItemBuilder().(*array.Int16Builder)
+
+	keys := []string{"ab", "cd", "ef", "gh"}
+
+	mb.Append(true)
+	kb.AppendValues(keys, nil)
+	ib.AppendValues([]int16{1, 2, 3, 4}, nil)
+
+	mb.AppendNull()
+
+	mb.Append(true)
+	kb.AppendValues(keys, nil)
+	ib.AppendValues([]int16{-1, 2, 5, 1}, []bool{false, true, true, true})
+
+	arr := mb.NewMapArray()
+	defer arr.Release()
+
+	fmt.Printf("NullN() = %d\n", arr.NullN())
+	fmt.Printf("Len()   = %d\n", arr.Len())
+
+	offsets := arr.Offsets()
+	keyArr := arr.Keys().(*array.String)
+	itemArr := arr.Items().(*array.Int16)
+
+	for i := 0; i < arr.Len(); i++ {
+		if arr.IsNull(i) {
+			fmt.Printf("Map[%d] = (null)\n", i)
+			continue
+		}
+
+		fmt.Printf("Map[%d] = {", i)
+		for j := offsets[i]; j < offsets[i+1]; j++ {
+			if j != offsets[i] {
+				fmt.Printf(", ")
+			}
+			fmt.Printf("%v => ", keyArr.Value(int(j)))
+			if itemArr.IsValid(int(j)) {
+				fmt.Printf("%v", itemArr.Value(int(j)))
+			} else {
+				fmt.Printf("(null)")
+			}
+		}
+		fmt.Printf("}\n")
+	}
+	fmt.Printf("Map    = %v\n", arr)
+
+	// Output:
+	// NullN() = 1
+	// Len()   = 3
+	// Map[0] = {ab => 1, cd => 2, ef => 3, gh => 4}
+	// Map[1] = (null)
+	// Map[2] = {ab => (null), cd => 2, ef => 5, gh => 1}
+	// Map    = [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
+}
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 3c5154ab2ec..8f7f59f53b0 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -45,6 +45,7 @@ func init() {
 	Records["intervals"] = makeIntervalsRecords()
 	Records["durations"] = makeDurationsRecords()
 	Records["decimal128"] = makeDecimal128sRecords()
+	Records["maps"] = makeMapsRecords()
 
 	for k := range Records {
 		RecordNames = append(RecordNames, k)
@@ -690,6 +691,128 @@ func makeDecimal128sRecords() []array.Record {
 	return recs
 }
 
+func makeMapsRecords() []array.Record {
+	mem := memory.NewGoAllocator()
+	dtype := arrow.MapOf(arrow.PrimitiveTypes.Int32, arrow.BinaryTypes.String)
+	dtype.KeysSorted = true
+	schema := arrow.NewSchema([]arrow.Field{{Name: "map_int_utf8", Type: dtype, Nullable: true}}, nil)
+
+	mask := []bool{true, false, false, true, true}
+	chunks := [][]array.Interface{
+		{
+			mapOf(mem, dtype.KeysSorted, []array.Interface{
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"111", "222", "333", "444", "555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"1111", "1222", "1333", "1444", "1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"2111", "2222", "2333", "2444", "2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"3111", "3222", "3333", "3444", "3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"4111", "4222", "4333", "4444", "4555"}, mask[:5]),
+					},
+				}, nil),
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-111", "-222", "-333", "-444", "-555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-1111", "-1222", "-1333", "-1444", "-1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-2111", "-2222", "-2333", "-2444", "-2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-3111", "-3222", "-3333", "-3444", "-3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-4111", "-4222", "-4333", "-4444", "-4555"}, mask[:5]),
+					},
+				}, nil),
+			}, []bool{true, false, true, true, true}),
+		},
+		{
+			mapOf(mem, dtype.KeysSorted, []array.Interface{
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-111", "-222", "-333", "-444", "-555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-1111", "-1222", "-1333", "-1444", "-1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-2111", "-2222", "-2333", "-2444", "-2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-3111", "-3222", "-3333", "-3444", "-3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{1, 2, 3, 4, 5}, nil),
+						arrayOf(mem, []string{"-4111", "-4222", "-4333", "-4444", "-4555"}, mask[:5]),
+					},
+				}, nil),
+				structOf(mem, dtype.ValueType(), [][]array.Interface{
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"111", "222", "333", "444", "555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"1111", "1222", "1333", "1444", "1555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"2111", "2222", "2333", "2444", "2555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"3111", "3222", "3333", "3444", "3555"}, mask[:5]),
+					},
+					{
+						arrayOf(mem, []int32{-1, -2, -3, -4, -5}, nil),
+						arrayOf(mem, []string{"4111", "4222", "4333", "4444", "4555"}, mask[:5]),
+					},
+				}, nil),
+			}, []bool{true, false, true, true, true}),
+		},
+	}
+
+	defer func() {
+		for _, chunk := range chunks {
+			for _, col := range chunk {
+				col.Release()
+			}
+		}
+	}()
+
+	recs := make([]array.Record, len(chunks))
+	for i, chunk := range chunks {
+		recs[i] = array.NewRecord(schema, chunk, -1)
+	}
+
+	return recs
+}
+
 func arrayOf(mem memory.Allocator, a interface{}, valids []bool) array.Interface {
 	if mem == nil {
 		mem = memory.NewGoAllocator()
@@ -1050,6 +1173,33 @@ func structOf(mem memory.Allocator, dtype *arrow.StructType, fields [][]array.In
 	return bldr.NewStructArray()
 }
 
+func mapOf(mem memory.Allocator, sortedKeys bool, values []array.Interface, valids []bool) *array.Map {
+	if mem == nil {
+		mem = memory.NewGoAllocator()
+	}
+
+	pairType := values[0].DataType().(*arrow.StructType)
+	bldr := array.NewMapBuilder(mem, pairType.Field(0).Type, pairType.Field(1).Type, sortedKeys)
+	defer bldr.Release()
+
+	valid := func(i int) bool {
+		return valids[i]
+	}
+
+	if valids == nil {
+		valid = func(i int) bool { return true }
+	}
+
+	vb := bldr.ValueBuilder()
+	for i, value := range values {
+		bldr.Append(valid(i))
+		buildArray(vb.FieldBuilder(0), value.(*array.Struct).Field(0))
+		buildArray(vb.FieldBuilder(1), value.(*array.Struct).Field(1))
+	}
+
+	return bldr.NewMapArray()
+}
+
 func buildArray(bldr array.Builder, data array.Interface) {
 	defer data.Release()
 
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index 149ecc7e77b..731e78a5397 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -136,6 +136,8 @@ func (f FieldWrapper) MarshalJSON() ([]byte, error) {
 		}
 	case *arrow.ListType:
 		typ = nameJSON{"list"}
+	case *arrow.MapType:
+		typ = mapJSON{Name: "map", KeysSorted: dt.KeysSorted}
 	case *arrow.StructType:
 		typ = nameJSON{"struct"}
 	case *arrow.FixedSizeListType:
@@ -266,6 +268,14 @@ func (f *FieldWrapper) UnmarshalJSON(data []byte) error {
 		}
 	case "list":
 		f.arrowType = arrow.ListOf(f.Children[0].arrowType)
+	case "map":
+		t := mapJSON{}
+		if err := json.Unmarshal(f.Type, &t); err != nil {
+			return err
+		}
+		pairType := f.Children[0].arrowType
+		f.arrowType = arrow.MapOf(pairType.(*arrow.StructType).Field(0).Type, pairType.(*arrow.StructType).Field(1).Type)
+		f.arrowType.(*arrow.MapType).KeysSorted = t.KeysSorted
 	case "struct":
 		f.arrowType = arrow.StructOf(fieldsFromJSON(f.Children)...)
 	case "fixedsizebinary":
@@ -363,6 +373,11 @@ type byteWidthJSON struct {
 	ByteWidth int    `json:"byteWidth,omitempty"`
 }
 
+type mapJSON struct {
+	Name       string `json:"name"`
+	KeysSorted bool   `json:"keysSorted,omitempty"`
+}
+
 func schemaToJSON(schema *arrow.Schema) Schema {
 	return Schema{
 		Fields: fieldsToJSON(schema.Fields()),
@@ -389,6 +404,8 @@ func fieldsToJSON(fields []arrow.Field) []FieldWrapper {
 			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable}})
 		case *arrow.StructType:
 			o[i].Children = fieldsToJSON(dt.Fields())
+		case *arrow.MapType:
+			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "entries", Type: dt.ValueType()}})
 		}
 	}
 	return o
@@ -652,6 +669,25 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) array.Int
 		bldr.AppendValues(data, valids)
 		return bldr.NewArray()
 
+	case *arrow.MapType:
+		bldr := array.NewMapBuilder(mem, dt.KeyType(), dt.ItemType(), dt.KeysSorted)
+		defer bldr.Release()
+		valids := validsFromJSON(arr.Valids)
+		pairs := arrayFromJSON(mem, dt.ValueType(), arr.Children[0])
+		defer pairs.Release()
+		for i, v := range valids {
+			bldr.Append(v)
+			beg := int64(arr.Offset[i])
+			end := int64(arr.Offset[i+1])
+			slice := array.NewSlice(pairs, beg, end).(*array.Struct)
+			kb := bldr.KeyBuilder()
+			buildArray(kb, slice.Field(0))
+			ib := bldr.ItemBuilder()
+			buildArray(ib, slice.Field(1))
+			slice.Release()
+		}
+		return bldr.NewArray()
+
 	case *arrow.Date32Type:
 		bldr := array.NewDate32Builder(mem)
 		defer bldr.Release()
@@ -863,6 +899,18 @@ func arrayToJSON(field arrow.Field, arr array.Interface) Array {
 		}
 		return o
 
+	case *array.Map:
+		o := Array{
+			Name:   field.Name,
+			Count:  arr.Len(),
+			Valids: validsToJSON(arr),
+			Offset: arr.Offsets(),
+			Children: []Array{
+				arrayToJSON(arrow.Field{Name: "entries", Type: arr.DataType().(*arrow.MapType).ValueType()}, arr.ListValues()),
+			},
+		}
+		return o
+
 	case *array.FixedSizeList:
 		o := Array{
 			Name:   field.Name,
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 3f29a540871..bace8324a53 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -40,6 +40,7 @@ func TestReadWrite(t *testing.T) {
 	wantJSONs["intervals"] = makeIntervalsWantJSONs()
 	wantJSONs["durations"] = makeDurationsWantJSONs()
 	wantJSONs["decimal128"] = makeDecimal128sWantJSONs()
+	wantJSONs["maps"] = makeMapsWantJSONs()
 
 	tempDir, err := ioutil.TempDir("", "go-arrow-read-write-")
 	if err != nil {
@@ -3185,3 +3186,639 @@ func makeDecimal128sWantJSONs() string {
   ]
 }`
 }
+
+func makeMapsWantJSONs() string {
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "map_int_utf8",
+        "type": {
+          "name": "map",
+          "keysSorted": true
+        },
+        "nullable": true,
+        "children": [
+          {
+            "name": "entries",
+            "type": {
+              "name": "struct"
+            },
+            "nullable": false,
+            "children": [
+              {
+                "name": "key",
+                "type": {
+                  "name": "int",
+                  "isSigned": true,
+                  "bitWidth": 32
+                },
+                "nullable": false,
+                "children": []
+              },
+              {
+                "name": "value",
+                "type": {
+                  "name": "utf8"
+                },
+                "nullable": true,
+                "children": []
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 2,
+      "columns": [
+        {
+          "name": "map_int_utf8",
+          "count": 2,
+          "VALIDITY": [
+            1,
+            0
+          ],
+          "OFFSET": [
+            0,
+            25,
+            50
+          ],
+          "children": [
+            {
+              "name": "entries",
+              "count": 50,
+              "VALIDITY": [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1
+              ],
+              "children": [
+                {
+                  "name": "key",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5
+                  ]
+                },
+                {
+                  "name": "value",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    "111",
+                    "",
+                    "",
+                    "444",
+                    "555",
+                    "1111",
+                    "",
+                    "",
+                    "1444",
+                    "1555",
+                    "2111",
+                    "",
+                    "",
+                    "2444",
+                    "2555",
+                    "3111",
+                    "",
+                    "",
+                    "3444",
+                    "3555",
+                    "4111",
+                    "",
+                    "",
+                    "4444",
+                    "4555",
+                    "-111",
+                    "",
+                    "",
+                    "-444",
+                    "-555",
+                    "-1111",
+                    "",
+                    "",
+                    "-1444",
+                    "-1555",
+                    "-2111",
+                    "",
+                    "",
+                    "-2444",
+                    "-2555",
+                    "-3111",
+                    "",
+                    "",
+                    "-3444",
+                    "-3555",
+                    "-4111",
+                    "",
+                    "",
+                    "-4444",
+                    "-4555"
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "count": 2,
+      "columns": [
+        {
+          "name": "map_int_utf8",
+          "count": 2,
+          "VALIDITY": [
+            1,
+            0
+          ],
+          "OFFSET": [
+            0,
+            25,
+            50
+          ],
+          "children": [
+            {
+              "name": "entries",
+              "count": 50,
+              "VALIDITY": [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1
+              ],
+              "children": [
+                {
+                  "name": "key",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5,
+                    -1,
+                    -2,
+                    -3,
+                    -4,
+                    -5
+                  ]
+                },
+                {
+                  "name": "value",
+                  "count": 50,
+                  "VALIDITY": [
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1,
+                    1,
+                    0,
+                    0,
+                    1,
+                    1
+                  ],
+                  "DATA": [
+                    "-111",
+                    "",
+                    "",
+                    "-444",
+                    "-555",
+                    "-1111",
+                    "",
+                    "",
+                    "-1444",
+                    "-1555",
+                    "-2111",
+                    "",
+                    "",
+                    "-2444",
+                    "-2555",
+                    "-3111",
+                    "",
+                    "",
+                    "-3444",
+                    "-3555",
+                    "-4111",
+                    "",
+                    "",
+                    "-4444",
+                    "-4555",
+                    "111",
+                    "",
+                    "",
+                    "444",
+                    "555",
+                    "1111",
+                    "",
+                    "",
+                    "1444",
+                    "1555",
+                    "2111",
+                    "",
+                    "",
+                    "2444",
+                    "2555",
+                    "3111",
+                    "",
+                    "",
+                    "3444",
+                    "3555",
+                    "4111",
+                    "",
+                    "",
+                    "4444",
+                    "4555"
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}`
+}
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 3b38f1bf628..66bd58ab0bb 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -449,6 +449,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 	case *arrow.StructType:
 		return ctx.loadStruct(dt)
 
+	case *arrow.MapType:
+		return ctx.loadMap(dt)
+
 	default:
 		panic(xerrors.Errorf("array type %T not handled yet", dt))
 	}
@@ -525,6 +528,19 @@ func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType
 	return array.MakeFromData(data)
 }
 
+func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) array.Interface {
+	field, buffers := ctx.loadCommon(2)
+	buffers = append(buffers, ctx.buffer())
+
+	sub := ctx.loadChild(dt.ValueType())
+	defer sub.Release()
+
+	data := array.NewData(dt, int(field.Length()), buffers, []*array.Data{sub.Data()}, int(field.NullCount()), 0)
+	defer data.Release()
+
+	return array.NewMapData(data)
+}
+
 func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
 	field, buffers := ctx.loadCommon(2)
 	buffers = append(buffers, ctx.buffer())
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index 5f335f10890..373e705e3e3 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -375,6 +375,13 @@ func (fv *fieldVisitor) visit(field arrow.Field) {
 		flatbuf.DurationAddUnit(fv.b, unit)
 		fv.offset = flatbuf.DurationEnd(fv.b)
 
+	case *arrow.MapType:
+		fv.dtype = flatbuf.TypeMap
+		fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "entries", Type: dt.ValueType()}, fv.memo))
+		flatbuf.MapStart(fv.b)
+		flatbuf.MapAddKeysSorted(fv.b, dt.KeysSorted)
+		fv.offset = flatbuf.MapEnd(fv.b)
+
 	default:
 		err := xerrors.Errorf("arrow/ipc: invalid data type %v", dt)
 		panic(err) // FIXME(sbinet): implement all data-types.
@@ -510,11 +517,6 @@ func typeFromFB(field *flatbuf.Field, children []arrow.Field, md arrow.Metadata)
 }
 
 func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arrow.Field) (arrow.DataType, error) {
-	var (
-		dt  arrow.DataType
-		err error
-	)
-
 	switch typ {
 	case flatbuf.TypeNONE:
 		return nil, xerrors.Errorf("arrow/ipc: Type metadata cannot be none")
@@ -593,12 +595,30 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 		dt.Init(data.Bytes, data.Pos)
 		return durationFromFB(dt)
 
+	case flatbuf.TypeMap:
+		if len(children) != 1 {
+			return nil, xerrors.Errorf("arrow/ipc: Map must have exactly 1 child field")
+		}
+
+		if children[0].Nullable || children[0].Type.ID() != arrow.STRUCT || len(children[0].Type.(*arrow.StructType).Fields()) != 2 {
+			return nil, xerrors.Errorf("arrow/ipc: Map's key-item pairs must be non-nullable structs")
+		}
+
+		pairType := children[0].Type.(*arrow.StructType)
+		if pairType.Field(0).Nullable {
+			return nil, xerrors.Errorf("arrow/ipc: Map's keys must be non-nullable")
+		}
+
+		var dt flatbuf.Map
+		dt.Init(data.Bytes, data.Pos)
+		ret := arrow.MapOf(pairType.Field(0).Type, pairType.Field(1).Type)
+		ret.KeysSorted = dt.KeysSorted()
+		return ret, nil
+
 	default:
 		// FIXME(sbinet): implement all the other types.
 		panic(xerrors.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))
 	}
-
-	return dt, err
 }
 
 func intFromFB(data flatbuf.Int) (arrow.DataType, error) {
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index c4c75722610..83dd7128c1f 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -449,6 +449,43 @@ func (w *recordEncoder) visit(p *Payload, arr array.Interface) error {
 		}
 		w.depth++
 
+	case *arrow.MapType:
+		arr := arr.(*array.Map)
+		voffsets, err := w.getZeroBasedValueOffsets(arr)
+		if err != nil {
+			return xerrors.Errorf("could not retrieve zero-based value offsets for array %T: %w", arr, err)
+		}
+		p.body = append(p.body, voffsets)
+
+		w.depth--
+		var (
+			values        = arr.ListValues()
+			mustRelease   = false
+			values_offset int64
+			values_length int64
+		)
+		defer func() {
+			if mustRelease {
+				values.Release()
+			}
+		}()
+
+		if voffsets != nil {
+			values_offset = int64(arr.Offsets()[0])
+			values_length = int64(arr.Offsets()[arr.Len()]) - values_offset
+		}
+
+		if len(arr.Offsets()) != 0 || values_length < int64(values.Len()) {
+			// must also slice the values
+			values = array.NewSlice(values, values_offset, values_length)
+			mustRelease = true
+		}
+		err = w.visit(p, values)
+
+		if err != nil {
+			return xerrors.Errorf("could not visit list element for array %T: %w", arr, err)
+		}
+		w.depth++
 	case *arrow.ListType:
 		arr := arr.(*array.List)
 		voffsets, err := w.getZeroBasedValueOffsets(arr)

From beb5d1849581a9a22f10cb213e4914eabc0915a2 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 29 Apr 2021 06:13:19 +0900
Subject: [PATCH 146/719] ARROW-11499: [Release] Use Artifactory instead of
 Bintray

Closes #10172 from kou/release-binary-artifactory

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/java_build.sh                      |  22 +-
 dev/release/.env.example                      |  20 +-
 dev/release/02-source-test.rb                 |   8 +-
 dev/release/02-source.sh                      |   8 +-
 dev/release/05-binary-upload.sh               |  13 -
 dev/release/binary-task.rb                    | 862 +++++++-----------
 dev/release/download_rc_binaries.py           |  95 +-
 dev/release/post-02-binary.sh                 |   8 +-
 dev/release/post-03-website.sh                |   8 +-
 dev/release/post-06-csharp.sh                 |   2 +-
 dev/release/verify-apt.sh                     |  29 +-
 .../verify-release-candidate-wheels.bat       |   2 +-
 dev/release/verify-release-candidate.sh       |  22 +-
 dev/release/verify-yum.sh                     |  26 +-
 .../yum/apache-arrow-release.spec.in          |   4 +-
 r/vignettes/developing.Rmd                    |   6 +-
 16 files changed, 450 insertions(+), 685 deletions(-)

diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh
index b8a7f7ced74..886aaf0f2c9 100755
--- a/ci/scripts/java_build.sh
+++ b/ci/scripts/java_build.sh
@@ -25,50 +25,50 @@ with_docs=${3:-false}
 
 if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then
   # Since some files for s390_64 are not available at maven central,
-  # download pre-build files from bintray and install them explicitly
+  # download pre-build files from Artifactory and install them explicitly
   mvn_install="mvn install:install-file"
   wget="wget"
-  bintray_base_url="https://dl.bintray.com/apache/arrow"
+  artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
 
-  bintray_dir="protoc-binary"
+  artifactory_dir="protoc-binary"
   group="com.google.protobuf"
   artifact="protoc"
   ver="3.7.1"
   classifier="linux-s390_64"
   extension="exe"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
   # protoc requires libprotoc.so.18 libprotobuf.so.18
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/libprotoc.so.18
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/libprotobuf.so.18
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotoc.so.18
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotobuf.so.18
   mkdir -p ${ARROW_HOME}/lib
   cp lib*.so.18 ${ARROW_HOME}/lib
   export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib
 
-  bintray_dir="protoc-gen-grpc-java-binary"
+  artifactory_dir="protoc-gen-grpc-java-binary"
   group="io.grpc"
   artifact="protoc-gen-grpc-java"
   ver="1.30.2"
   classifier="linux-s390_64"
   extension="exe"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
 
-  bintray_dir="netty-binary"
+  artifactory_dir="netty-binary"
   group="io.netty"
   artifact="netty-transport-native-unix-common"
   ver="4.1.48.Final"
   classifier="linux-s390_64"
   extension="jar"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
   artifact="netty-transport-native-epoll"
   extension="jar"
   target=${artifact}-${ver}-${classifier}.${extension}
-  ${wget} ${bintray_base_url}/${bintray_dir}/${ver}/${target}
+  ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
   ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
 fi
 
diff --git a/dev/release/.env.example b/dev/release/.env.example
index 0126cdd3f29..50c8ec8e6d2 100644
--- a/dev/release/.env.example
+++ b/dev/release/.env.example
@@ -26,23 +26,7 @@
 # You must set this.
 #GPG_KEY_ID=08D3564B7C6A9CAFBFF6A66791D18FCF079F8007
 
-# The Bintray repository where artifacts are uploaded.
-# You can use your Bintray repository such as kou/arrow for test.
-BINTRAY_REPOSITORY=apache/arrow
-
-# The Bintray repository where released artifacts exist.
-# The released artifacts are used to build APT/Yum repository.
-# The Bintray repository isn't changed. (Download only. No upload.)
-#
-# Normally, you don't need to change this.
-SOURCE_BINTRAY_REPOSITORY=apache/arrow
-
-# The Bintray user name to upload artifacts to Bintray.
-#
-# You must set this.
-#BINTRAY_USER=kou
-
-# The Bintray API key to upload artifacts to Bintray.
+# The Artifactory API key to upload artifacts to Artifactory.
 #
 # You must set this.
-#BINTRAY_API_KEY=secret
+#ARTIFACTORY_API_KEY=secret
diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb
index 7d92881f282..102ff706816 100644
--- a/dev/release/02-source-test.rb
+++ b/dev/release/02-source-test.rb
@@ -135,10 +135,10 @@ def test_vote
 [1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20#{@release_version}
 [2]: https://github.com/apache/arrow/tree/#{@current_commit}
 [3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0
-[4]: https://bintray.com/apache/arrow/centos-rc/#{@release_version}-rc0
-[5]: https://bintray.com/apache/arrow/debian-rc/#{@release_version}-rc0
-[6]: https://bintray.com/apache/arrow/python-rc/#{@release_version}-rc0
-[7]: https://bintray.com/apache/arrow/ubuntu-rc/#{@release_version}-rc0
+[4]: https://apache.jfrog.io/artifactory/arrow/centos-rc/#{@release_version}-rc0
+[5]: https://apache.jfrog.io/artifactory/arrow/debian-rc/#{@release_version}-rc0
+[6]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0
+[7]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/#{@release_version}-rc0
 [8]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
 [9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
     VOTE
diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh
index 89850e7543d..9809f4f5e66 100755
--- a/dev/release/02-source.sh
+++ b/dev/release/02-source.sh
@@ -151,10 +151,10 @@ The vote will be open for at least 72 hours.
 [1]: ${jira_url}/issues/?jql=${jql}
 [2]: https://github.com/apache/arrow/tree/${release_hash}
 [3]: ${rc_url}
-[4]: https://bintray.com/apache/arrow/centos-rc/${version}-rc${rc}
-[5]: https://bintray.com/apache/arrow/debian-rc/${version}-rc${rc}
-[6]: https://bintray.com/apache/arrow/python-rc/${version}-rc${rc}
-[7]: https://bintray.com/apache/arrow/ubuntu-rc/${version}-rc${rc}
+[4]: https://apache.jfrog.io/artifactory/arrow/centos-rc/${version}-rc${rc}
+[5]: https://apache.jfrog.io/artifactory/arrow/debian-rc/${version}-rc${rc}
+[6]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc}
+[7]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/${version}-rc${rc}
 [8]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
 [9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
 MAIL
diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh
index 979e40c3c6f..61ac42b6fb5 100755
--- a/dev/release/05-binary-upload.sh
+++ b/dev/release/05-binary-upload.sh
@@ -51,9 +51,6 @@ fi
 
 cd "${SOURCE_DIR}"
 
-: ${BINTRAY_REPOSITORY_CUSTOM:=${BINTRAY_REPOSITORY:-}}
-: ${SOURCE_BINTRAY_REPOSITORY_CUSTOM:=${SOURCE_BINTRAY_REPOSITORY:-}}}
-
 if [ ! -f .env ]; then
   echo "You must create $(pwd)/.env"
   echo "You can use $(pwd)/.env.example as template"
@@ -61,14 +58,6 @@ if [ ! -f .env ]; then
 fi
 . .env
 
-if [ -n "${BINTRAY_REPOSITORY_CUSTOM}" ]; then
-  BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY_CUSTOM}
-fi
-
-if [ -n "${SOURCE_BINTRAY_REPOSITORY_CUSTOM}" ]; then
-  SOURCE_BINTRAY_REPOSITORY=${SOURCE_BINTRAY_REPOSITORY_CUSTOM}
-fi
-
 . utils-binary.sh
 
 # By default upload all artifacts.
@@ -131,8 +120,6 @@ docker_run \
     "${rake_tasks[@]}" \
     APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
     ARTIFACTS_DIR="${tmp_dir}/artifacts" \
-    BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY} \
     RC=${rc} \
-    SOURCE_BINTRAY_REPOSITORY=${SOURCE_BINTRAY_REPOSITORY} \
     VERSION=${version} \
     YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index 7e7561a7722..2e5c91b16f9 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -57,8 +57,8 @@ def join
     private
     def choose_n_workers(use_case)
       case use_case
-      when :bintray
-        # Too many workers cause Bintray error.
+      when :artifactory
+        # Too many workers cause Artifactory error.
         6
       when :gpg
         # Too many workers cause gpg-agent error.
@@ -242,7 +242,7 @@ def guess_terminal_width_from_env
     end
   end
 
-  class BintrayClient
+  class ArtifactoryClient
     class Error < StandardError
       attr_reader :request
       attr_reader :response
@@ -253,229 +253,209 @@ def initialize(request, response, message)
       end
     end
 
-    def initialize(options={})
-      @options = options
-      repository = @options[:repository]
-      @subject, @repository = repository.split("/", 2) if repository
-      @package = @options[:package]
-      @version = @options[:version]
-      @user = @options[:user]
-      @api_key = @options[:api_key]
+    def initialize(prefix, api_key)
+      @prefix = prefix
+      @api_key = api_key
+      @http = nil
+      restart
+    end
+
+    def restart
+      close
+      @http = start_http(build_url(""))
     end
 
-    def request(method, headers, *components, &block)
-      url = build_request_url(*components)
+    private def start_http(url, &block)
       http = Net::HTTP.new(url.host, url.port)
       http.set_debug_output($stderr) if ENV["DEBUG"]
       http.use_ssl = true
-      http.start do |http|
-        request = build_request(method, url, headers, &block)
-        http.request(request) do |response|
-          case response
-          when Net::HTTPSuccess
-            return JSON.parse(response.body)
-          else
-            message = "failed to request: "
-            message << "#{url}: #{request.method}: "
-            message << "#{response.message} #{response.code}:\n"
-            message << response.body
-            raise Error.new(request, response, message)
-          end
-        end
+      if block_given?
+        http.start(&block)
+      else
+        http
       end
     end
 
-    def repository
-      request(:get,
-              {},
-              "repos",
-              @subject,
-              @repository)
+    def close
+      return if @http.nil?
+      @http.finish if @http.started?
+      @http = nil
     end
 
-    def create_repository
-      request(:post,
-              {},
-              "repos",
-              @subject,
-              @repository) do
-        request = {
-          "name" => @repository,
-          "desc" => "Apache Arrow",
-        }
-        JSON.generate(request)
-      end
-    end
-
-    def ensure_repository
-      begin
-        repository
-      rescue Error => error
-        case error.response
-        when Net::HTTPNotFound
-          create_repository
+    def request(method, headers, path, body: nil, &block)
+      url = build_url(path)
+      request = build_request(method, url, headers, body: body)
+      if ENV["DRY_RUN"]
+        case request
+        when Net::HTTP::Get, Net::HTTP::Head
         else
-          raise
+          p [method, url]
+          return
         end
       end
+      request_internal(@http, request, &block)
     end
 
-    def package
-      request(:get,
-              {},
-              "packages",
-              @subject,
-              @repository,
-              @package)
-    end
-
-    def package_versions
-      begin
-        package["versions"]
-      rescue Error => error
-        case error.response
-        when Net::HTTPNotFound
-          []
+    private def request_internal(http, request, &block)
+      http.request(request) do |response|
+        case response
+        when Net::HTTPSuccess,
+             Net::HTTPNotModified
+          if block_given?
+            return yield(response)
+          else
+            response.read_body
+            return response
+          end
+        when Net::HTTPRedirection
+          redirected_url = URI(response["Location"])
+          redirected_request = Net::HTTP::Get.new(redirected_url, {})
+          start_http(redirected_url) do |redirected_http|
+            request_internal(redirected_http, redirected_request, &block)
+          end
         else
-          raise
+          message = "failed to request: "
+          message << "#{request.uri}: #{request.method}: "
+          message << "#{response.message} #{response.code}:\n"
+          message << response.body
+          raise Error.new(request, response, message)
         end
       end
     end
 
-    def create_package(description)
-      request(:post,
-              {},
-              "packages",
-              @subject,
-              @repository) do
-        request = {
-          "name" => @package,
-          "desc" => description,
-          "licenses" => ["Apache-2.0"],
-          "vcs_url" => "https://github.com/apache/arrow.git",
-          "website_url" => "https://arrow.apache.org/",
-          "issue_tracker_url" => "https://issues.apache.org/jira/browse/ARROW",
-          "github_repo" => "apache/arrow",
-          "public_download_numbers" => true,
-          "public_stats" => true,
-        }
-        JSON.generate(request)
+    def files
+      _files = []
+      directories = [""]
+      until directories.empty?
+        directory = directories.shift
+        list(directory).each do |path|
+          resolved_path = "#{directory}#{path}"
+          case path
+          when "../"
+          when /\/\z/
+            directories << resolved_path
+          else
+            _files << resolved_path
+          end
+        end
       end
+      _files
     end
 
-    def ensure_package(description)
-      begin
-        package
-      rescue Error => error
-        case error.response
-        when Net::HTTPNotFound
-          create_package(description)
-        else
-          raise
+    def list(path)
+      with_retry(3, build_url(path)) do
+        begin
+          request(:get, {}, path) do |response|
+            response.body.scan(/<a href="(.+?)"/).flatten
+          end
+        rescue Error => error
+          case error.response
+          when Net::HTTPNotFound
+            return []
+          else
+            raise
+          end
         end
       end
     end
 
-    def create_version(description)
-      request(:post,
-              {},
-              "packages",
-              @subject,
-              @repository,
-              @package,
-              "versions") do
-        request = {
-          "name" => @version,
-          "desc" => description,
-        }
-        JSON.generate(request)
+    def head(path)
+      with_retry(3, build_url(path)) do
+        request(:head, {}, path)
       end
     end
 
-    def ensure_version(version, description)
-      return if package["versions"].include?(version)
-      create_version(description)
-    end
-
-    def files
-      request(:get,
-              {},
-              "packages",
-              @subject,
-              @repository,
-              @package,
-              "versions",
-              @version,
-              "files")
+    def upload(path, destination_path)
+      with_retry(3, build_url(destination_path)) do
+        sha1 = Digest::SHA1.file(path).hexdigest
+        sha256 = Digest::SHA256.file(path).hexdigest
+        headers = {
+          "X-Artifactory-Last-Modified" => File.mtime(path).rfc2822,
+          "X-Checksum-Deploy" => "false",
+          "X-Checksum-Sha1" => sha1,
+          "X-Checksum-Sha256" => sha256,
+          "Content-Length" => File.size(path).to_s,
+          "Content-Type" => "application/octet-stream",
+        }
+        File.open(path, "rb") do |input|
+          request(:put, headers, destination_path, body: input)
+        end
+      end
     end
 
-    def upload(path, destination_path)
-      sha256 = Digest::SHA256.file(path).hexdigest
-      headers = {
-        "X-Bintray-Override" => "1",
-        "X-Bintray-Package" => @package,
-        "X-Bintray-Publish" => "1",
-        "X-Bintray-Version" => @version,
-        "X-Checksum-Sha2" => sha256,
-        "Content-Length" => File.size(path).to_s,
-      }
-      File.open(path, "rb") do |input|
-        request(:put,
-                headers,
-                "content",
-                @subject,
-                @repository,
-                destination_path) do
-          input
+    def download(path, output_path)
+      with_retry(5, build_url(path)) do
+        begin
+          begin
+            headers = {}
+            if File.exist?(output_path)
+              headers["If-Modified-Since"] = File.mtime(output_path).rfc2822
+            end
+            request(:get, headers, path) do |response|
+              case response
+              when Net::HTTPNotModified
+              else
+                File.open(output_path, "wb") do |output|
+                  response.read_body do |chunk|
+                    output.write(chunk)
+                  end
+                end
+                last_modified = response["Last-Modified"]
+                if last_modified
+                  FileUtils.touch(output_path,
+                                  mtime: Time.rfc2822(last_modified))
+                end
+              end
+            end
+          rescue Error => error
+            case error.response
+            when Net::HTTPNotFound
+              $stderr.puts(error.message)
+              return
+            else
+              raise
+            end
+          end
         end
+      rescue
+        FileUtils.rm_f(output_path)
+        raise
       end
     end
 
     def delete(path)
-      request(:delete,
-              {},
-              "content",
-              @subject,
-              @repository,
-              path)
+      with_retry(3, build_url(path)) do
+        request(:delete, {}, path)
+      end
     end
 
     private
-    def build_request_url(*components)
-      if components.last.is_a?(Hash)
-        parameters = components.pop
-      else
-        parameters = nil
-      end
-      path = components.join("/")
-      url = "https://bintray.com/api/v1/#{path}"
-      if parameters
-        separator = "?"
-        parameters.each do |key, value|
-          url << "#{separator}#{CGI.escape(key)}=#{CGI.escape(value)}"
-          separator = "&"
-        end
-      end
-      URI(url)
+    def build_url(path)
+      URI("https://apache.jfrog.io/artifactory/arrow/#{@prefix}/#{path}")
     end
 
-    def build_request(method, url, headers, &block)
+    def build_request(method, url, headers, body: nil)
+      need_auth = false
       case method
+      when :head
+        request = Net::HTTP::Head.new(url, headers)
       when :get
         request = Net::HTTP::Get.new(url, headers)
       when :post
+        need_auth = true
         request = Net::HTTP::Post.new(url, headers)
       when :put
+        need_auth = true
         request = Net::HTTP::Put.new(url, headers)
       when :delete
+        need_auth = true
         request = Net::HTTP::Delete.new(url, headers)
       else
         raise "unsupported HTTP method: #{method.inspect}"
       end
-      request.basic_auth(@user, @api_key) if @user and @api_key
-      if block_given?
-        request["Content-Type"] = "application/json"
-        body = yield
+      request["Connection"] = "Keep-Alive"
+      request["X-JFrog-Art-Api"] = @api_key if need_auth
+      if body
         if body.is_a?(String)
           request.body = body
         else
@@ -484,209 +464,112 @@ def build_request(method, url, headers, &block)
       end
       request
     end
-  end
 
-  module HashChekable
-    def same_hash?(path, sha256)
-      return false unless File.exist?(path)
-      Digest::SHA256.file(path).hexdigest == sha256
-    end
-  end
-
-  class BintrayDownloader
-    include HashChekable
-
-    def initialize(repository:,
-                   distribution:,
-                   version:,
-                   rc: nil,
-                   destination:,
-                   user:,
-                   api_key:)
-      @repository = repository
-      @distribution = distribution
-      @version = version
-      @rc = rc
-      @destination = destination
-      @user = user
-      @api_key = api_key
-    end
-
-    def download
-      client.ensure_repository
-
-      progress_label = "Downloading: #{package} #{full_version}"
-      progress_reporter = ProgressReporter.new(progress_label)
-      pool = ThreadPool.new(:bintray) do |path, output_path|
-        download_file(path, output_path)
-        progress_reporter.advance
-      end
-      target_files.each do |file|
-        path = file["path"]
-        path_without_package = path.split("/", 2)[1..-1].join("/")
-        output_path = "#{@destination}/#{path_without_package}"
-        yield(output_path)
-        sha256 = file["sha256"]
-        next if same_hash?(output_path, sha256)
-        output_dir = File.dirname(output_path)
-        FileUtils.mkdir_p(output_dir)
-        progress_reporter.increment_max
-        pool << [path, output_path]
-      end
-      pool.join
-      progress_reporter.finish
-    end
-
-    private
-    def package
-      if @rc
-        "#{@distribution}-rc"
-      else
-        @distribution
-      end
-    end
-
-    def full_version
-      if @rc
-        "#{@version}-rc#{@rc}"
-      else
-        @version
-      end
-    end
-
-    def client(options={})
-      default_options = {
-        repository: @repository,
-        package: package,
-        version: full_version,
-        user: @user,
-        api_key: @api_key,
-      }
-      BintrayClient.new(default_options.merge(options))
-    end
-
-    def target_files
-      begin
-        client.files
-      rescue BintrayClient::Error
-        []
-      end
-    end
-
-    def download_file(path, output_path)
-      max_n_retries = 5
+    def with_retry(max_n_retries, target)
       n_retries = 0
-      url = URI("https://dl.bintray.com/#{@repository}/#{path}")
       begin
-        download_url(url, output_path)
-      rescue OpenSSL::OpenSSLError,
+        yield
+      rescue Net::OpenTimeout,
+             OpenSSL::OpenSSLError,
              SocketError,
              SystemCallError,
              Timeout::Error => error
         n_retries += 1
         if n_retries <= max_n_retries
           $stderr.puts
-          $stderr.puts("Retry #{n_retries}: #{url}: " +
+          $stderr.puts("Retry #{n_retries}: #{target}: " +
                        "#{error.class}: #{error.message}")
+          restart
           retry
         else
           raise
         end
       end
     end
+  end
 
-    def download_url(url, output_path)
-      loop do
-        http = Net::HTTP.new(url.host, url.port)
-        http.set_debug_output($stderr) if ENV["DEBUG"]
-        http.use_ssl = true
-        http.start do |http|
-          request = Net::HTTP::Get.new(url)
-          http.request(request) do |response|
-            case response
-            when Net::HTTPSuccess
-              save_response(response, output_path)
-              return
-            when Net::HTTPRedirection
-              url = URI(response["Location"])
-            when Net::HTTPNotFound
-              $stderr.puts(build_download_error_message(url, response))
-              return
-            else
-              raise build_download_error_message(url, response)
-            end
-          end
+  class ArtifactoryClientPool
+    class << self
+      def open(prefix, api_key)
+        pool = new(prefix, api_key)
+        begin
+          yield(pool)
+        ensure
+          pool.close
         end
       end
     end
 
-    def save_response(response, output_path)
-      File.open(output_path, "wb") do |output|
-        response.read_body do |chunk|
-          output.print(chunk)
+    def initialize(prefix, api_key)
+      @prefix = prefix
+      @api_key = api_key
+      @mutex = Thread::Mutex.new
+      @clients = []
+    end
+
+    def pull
+      client = @mutex.synchronize do
+        if @clients.empty?
+          ArtifactoryClient.new(@prefix, @api_key)
+        else
+          @clients.pop
         end
       end
-      last_modified = response["Last-Modified"]
-      if last_modified
-        FileUtils.touch(output_path, mtime: Time.rfc2822(last_modified))
+      begin
+        yield(client)
+      ensure
+        release(client)
       end
     end
 
-    def build_download_error_message(url, response)
-      message = "failed to download: "
-      message << "#{url}: #{response.message} #{response.code}:\n"
-      message << response.body
-      message
+    def release(client)
+      @mutex.synchronize do
+        @clients << client
+      end
     end
-  end
 
-  class BintrayUploader
-    include HashChekable
+    def close
+      @clients.each(&:close)
+    end
+  end
 
-    def initialize(repository:,
-                   distribution:,
-                   distribution_label:,
-                   version:,
+  class ArtifactoryDownloader
+    def initialize(distribution:,
                    rc: nil,
-                   source:,
-                   destination_prefix: "",
-                   user:,
+                   prefix: "",
+                   destination:,
                    api_key:)
-      @repository = repository
       @distribution = distribution
-      @distribution_label = distribution_label
-      @version = version
       @rc = rc
-      @source = source
-      @destination_prefix = destination_prefix
-      @user = user
+      @prefix = prefix
+      @destination = destination
       @api_key = api_key
     end
 
-    def upload
-      client.ensure_repository
-      client.ensure_package(package_description)
-      client.ensure_version(full_version, version_description)
-
-      progress_label = "Uploading: #{package} #{full_version}"
+    def download
+      progress_label = "Downloading: #{package}"
       progress_reporter = ProgressReporter.new(progress_label)
-      pool = ThreadPool.new(:bintray) do |path, relative_path|
-        upload_file(path, relative_path)
-        progress_reporter.advance
-      end
-
-      files = existing_files
-      source = Pathname(@source)
-      source.glob("**/*") do |path|
-        next if path.directory?
-        destination_path =
-          "#{package}/#{@destination_prefix}#{path.relative_path_from(source)}"
-        file = files[destination_path]
-        next if file and same_hash?(path.to_s, file["sha256"])
-        progress_reporter.increment_max
-        pool << [path, destination_path]
+      prefix = "#{package}/#{@prefix}"
+      ArtifactoryClientPool.open(prefix, @api_key) do |client_pool|
+        thread_pool = ThreadPool.new(:artifactory) do |path, output_path|
+          client_pool.pull do |client|
+            client.download(path, output_path)
+          end
+          progress_reporter.advance
+        end
+        files = client_pool.pull do |client|
+          client.files
+        end
+        files.each do |path|
+          output_path = "#{@destination}/#{path}"
+          yield(output_path)
+          output_dir = File.dirname(output_path)
+          FileUtils.mkdir_p(output_dir)
+          progress_reporter.increment_max
+          thread_pool << [path, output_path]
+        end
+        thread_pool.join
       end
-      pool.join
       progress_reporter.finish
     end
 
@@ -698,88 +581,70 @@ def package
         @distribution
       end
     end
+  end
 
-    def full_version
-      if @rc
-        "#{@version}-rc#{@rc}"
-      else
-        @version
-      end
-    end
-
-    def package_description
-      if @rc
-        release_type = "RC"
-      else
-        release_type = "Release"
-      end
-      case @distribution
-      when "debian", "ubuntu"
-        "#{release_type} deb packages for #{@distribution_label}"
-      when "centos"
-        "#{release_type} RPM packages for #{@distribution_label}"
-      else
-        "#{release_type} binaries for #{@distribution_label}"
-      end
+  class ArtifactoryUploader
+    def initialize(distribution:,
+                   rc: nil,
+                   source:,
+                   destination_prefix: "",
+                   sync: false,
+                   api_key:)
+      @distribution = distribution
+      @rc = rc
+      @source = source
+      @destination_prefix = destination_prefix
+      @sync = sync
+      @api_key = api_key
     end
 
-    def version_description
-      if @rc
-        "Apache Arrow #{@version} RC#{@rc} for #{@distribution_label}"
-      else
-        "Apache Arrow #{@version} for #{@distribution_label}"
-      end
-    end
+    def upload
+      progress_label = "Uploading: #{package}"
+      progress_reporter = ProgressReporter.new(progress_label)
+      prefix = "#{package}/#{@destination_prefix}"
+      ArtifactoryClientPool.open(prefix, @api_key) do |client_pool|
+        if @sync
+          existing_files = client_pool.pull do |client|
+            client.files
+          end
+        else
+          existing_files = []
+        end
 
-    def client
-      BintrayClient.new(repository: @repository,
-                        package: package,
-                        version: full_version,
-                        user: @user,
-                        api_key: @api_key)
-    end
+        thread_pool = ThreadPool.new(:artifactory) do |path, relative_path|
+          client_pool.pull do |client|
+            client.upload(path, relative_path)
+          end
+          progress_reporter.advance
+        end
 
-    def existing_files
-      files = {}
-      client.files.each do |file|
-        files[file["path"]] = file
-      end
-      files
-    end
+        source = Pathname(@source)
+        source.glob("**/*") do |path|
+          next if path.directory?
+          destination_path = path.relative_path_from(source)
+          progress_reporter.increment_max
+          existing_files.delete(destination_path.to_s)
+          thread_pool << [path, destination_path]
+        end
+        thread_pool.join
 
-    def upload_file(path, destination_path)
-      max_n_retries = 3
-      n_retries = 0
-      begin
-        begin
-          client.upload(path, destination_path)
-        rescue BintrayClient::Error => error
-          case error.response
-          when Net::HTTPConflict
-            n_retries += 1
-            if n_retries <= max_n_retries
-              client.delete(destination_path)
-              retry
-            else
-              $stderr.puts(error)
+        if @sync
+          existing_files.each do |file|
+            client_pool.pull do |client|
+              client.delete(file)
             end
-          else
-            $stderr.puts(error)
           end
         end
-      rescue OpenSSL::OpenSSLError,
-             SocketError,
-             SystemCallError,
-             Timeout::Error => error
-        n_retries += 1
-        if n_retries <= max_n_retries
-          $stderr.puts
-          $stderr.puts("Retry #{n_retries}: #{path}: " +
-                       "#{error.class}: #{error.message}")
-          retry
-        else
-          raise
-        end
+      end
+      progress_reporter.finish
+    end
+
+    private
+    def package
+      if @rc
+        "#{@distribution}-rc"
+      else
+        @distribution
       end
     end
   end
@@ -824,22 +689,8 @@ def rpm_gpg_key_package_name(id)
     "gpg-pubkey-#{shorten_gpg_key_id(id).downcase}"
   end
 
-  def bintray_user
-    env_value("BINTRAY_USER")
-  end
-
-  def bintray_api_key
-    env_value("BINTRAY_API_KEY")
-  end
-
-  def bintray_repository
-    env_value("BINTRAY_REPOSITORY")
-  end
-
-  def source_bintray_repository
-    env_value("SOURCE_BINTRAY_REPOSITORY") do
-      bintray_repository
-    end
+  def artifactory_api_key
+    env_value("ARTIFACTORY_API_KEY")
   end
 
   def artifacts_dir
@@ -930,36 +781,27 @@ def sign_dir(label, dir)
 
   def download_distribution(distribution,
                             destination,
-                            with_source_repository: false)
+                            with_source_repository: false,
+                            prefix: "")
     existing_paths = {}
     Pathname(destination).glob("**/*") do |path|
       next if path.directory?
       existing_paths[path.to_s] = true
     end
     if with_source_repository
-      source_client = BintrayClient.new(repository: source_bintray_repository,
-                                        package: distribution,
-                                        user: bintray_user,
-                                        api_key: bintray_api_key)
-      source_client.package_versions[0, 10].each do |source_version|
-        downloader = BintrayDownloader.new(repository: source_bintray_repository,
-                                           distribution: distribution,
-                                           version: source_version,
-                                           destination: destination,
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
-        downloader.download do |output_path|
-          existing_paths.delete(output_path)
-        end
+      downloader = ArtifactoryDownloader.new(distribution: distribution,
+                                             prefix: prefix,
+                                             destination: destination,
+                                             api_key: artifactory_api_key)
+      downloader.download do |output_path|
+        existing_paths.delete(output_path)
       end
     end
-    downloader = BintrayDownloader.new(repository: bintray_repository,
-                                       distribution: distribution,
-                                       version: version,
-                                       rc: rc,
-                                       destination: destination,
-                                       user: bintray_user,
-                                       api_key: bintray_api_key)
+    downloader = ArtifactoryDownloader.new(distribution: distribution,
+                                           rc: rc,
+                                           prefix: prefix,
+                                           destination: destination,
+                                           api_key: artifactory_api_key)
     downloader.download do |output_path|
       existing_paths.delete(output_path)
     end
@@ -1149,14 +991,10 @@ def define_deb_tasks
         apt_distributions.each do |distribution|
           distribution_dir = "#{deb_dir}/#{distribution}"
           distribution_label = apt_distribution_label(distribution)
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: distribution,
-                                         distribution_label: distribution_label,
-                                         version: version,
-                                         rc: rc,
-                                         source: distribution_dir,
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: distribution,
+                                             rc: rc,
+                                             source: distribution_dir,
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1335,15 +1173,11 @@ def define_apt_rc_tasks
           apt_distributions.each do |distribution|
             dists_dir = "#{apt_rc_repositories_dir}/#{distribution}/dists"
             distribution_label = apt_distribution_label(distribution)
-            uploader = BintrayUploader.new(repository: bintray_repository,
-                                           distribution: distribution,
-                                           distribution_label: distribution_label,
-                                           version: version,
-                                           rc: rc,
-                                           source: dists_dir,
-                                           destination_prefix: "dists/",
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
+            uploader = ArtifactoryUploader.new(distribution: distribution,
+                                               rc: rc,
+                                               source: dists_dir,
+                                               destination_prefix: "dists/",
+                                               api_key: artifactory_api_key)
             uploader.upload
           end
         end
@@ -1377,13 +1211,9 @@ def define_apt_release_tasks
           apt_distributions.each do |distribution|
             distribution_dir = "#{apt_release_repositories_dir}/#{distribution}"
             distribution_label = apt_distribution_label(distribution)
-            uploader = BintrayUploader.new(repository: bintray_repository,
-                                           distribution: distribution,
-                                           distribution_label: distribution_label,
-                                           version: version,
-                                           source: distribution_dir,
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
+            uploader = ArtifactoryUploader.new(distribution: distribution,
+                                               source: distribution_dir,
+                                               api_key: artifactory_api_key)
             uploader.upload
           end
         end
@@ -1602,14 +1432,10 @@ def define_rpm_tasks
         yum_distributions.each do |distribution|
           distribution_dir = "#{rpm_dir}/#{distribution}"
           distribution_label = yum_distribution_label(distribution)
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: distribution,
-                                         distribution_label: distribution_label,
-                                         version: version,
-                                         rc: rc,
-                                         source: distribution_dir,
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: distribution,
+                                             rc: rc,
+                                             source: distribution_dir,
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1703,15 +1529,12 @@ def define_yum_rc_tasks
                 repodata_dir.relative_path_from(base_dir).to_s
               ].join("/")
               uploader =
-                BintrayUploader.new(repository: bintray_repository,
-                                    distribution: distribution,
-                                    distribution_label: distribution_label,
-                                    version: version,
-                                    rc: rc,
-                                    source: repodata_dir.to_s,
-                                    destination_prefix: "#{relative_dir}/",
-                                    user: bintray_user,
-                                    api_key: bintray_api_key)
+                ArtifactoryUploader.new(distribution: distribution,
+                                        rc: rc,
+                                        source: repodata_dir.to_s,
+                                        destination_prefix: relative_dir,
+                                        sync: true,
+                                        api_key: artifactory_api_key)
               uploader.upload
             end
           end
@@ -1746,13 +1569,9 @@ def define_yum_release_tasks
           yum_distributions.each do |distribution|
             distribution_dir = "#{yum_release_repositories_dir}/#{distribution}"
             distribution_label = yum_distribution_label(distribution)
-            uploader = BintrayUploader.new(repository: bintray_repository,
-                                           distribution: distribution,
-                                           distribution_label: distribution_label,
-                                           version: version,
-                                           source: distribution_dir,
-                                           user: bintray_user,
-                                           api_key: bintray_api_key)
+            uploader = ArtifactoryUploader.new(distribution: distribution,
+                                               source: distribution_dir,
+                                               api_key: artifactory_api_key)
             uploader.upload
           end
         end
@@ -1805,15 +1624,11 @@ def define_generic_data_rc_tasks(label,
 
         desc "Upload #{label} packages"
         task :upload do
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: id.to_s,
-                                         distribution_label: label,
-                                         version: version,
-                                         rc: rc,
-                                         source: rc_dir,
-                                         destination_prefix: "#{full_version}/",
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: id.to_s,
+                                             rc: rc,
+                                             source: rc_dir,
+                                             destination_prefix: "#{full_version}/",
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1835,20 +1650,17 @@ def define_generic_data_release_tasks(label, id, release_dir)
       namespace :release do
         desc "Download RC #{label} packages"
         task :download => release_dir do
-          download_distribution(id.to_s, release_dir)
+          download_distribution(id.to_s,
+                                release_dir,
+                                prefix: "#{full_version}")
         end
 
         desc "Upload release #{label} packages"
         task :upload => release_dir do
-          packages_dir = "#{release_dir}/#{full_version}"
-          uploader = BintrayUploader.new(repository: bintray_repository,
-                                         distribution: id.to_s,
-                                         distribution_label: label,
-                                         version: version,
-                                         source: packages_dir,
-                                         destination_prefix: "#{version}/",
-                                         user: bintray_user,
-                                         api_key: bintray_api_key)
+          uploader = ArtifactoryUploader.new(distribution: id.to_s,
+                                             source: release_dir,
+                                             destination_prefix: "#{version}",
+                                             api_key: artifactory_api_key)
           uploader.upload
         end
       end
@@ -1893,11 +1705,11 @@ def define_summary_tasks
       task :rc do
         puts(<<-SUMMARY)
 Success! The release candidate binaries are available here:
-  https://bintray.com/#{bintray_repository}/debian-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/ubuntu-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/centos-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/python-rc/#{full_version}
-  https://bintray.com/#{bintray_repository}/nuget-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/debian-rc/
+  https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+  https://apache.jfrog.io/artifactory/arrow/centos-rc/
+  https://apache.jfrog.io/artifactory/arrow/python-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{full_version}
         SUMMARY
       end
 
@@ -1905,11 +1717,11 @@ def define_summary_tasks
       task :release do
         puts(<<-SUMMARY)
 Success! The release binaries are available here:
-  https://bintray.com/#{bintray_repository}/debian/#{version}
-  https://bintray.com/#{bintray_repository}/ubuntu/#{version}
-  https://bintray.com/#{bintray_repository}/centos/#{version}
-  https://bintray.com/#{bintray_repository}/python/#{version}
-  https://bintray.com/#{bintray_repository}/nuget/#{version}
+  https://apache.jfrog.io/arrow/debian/
+  https://apache.jfrog.io/arrow/ubuntu/
+  https://apache.jfrog.io/arrow/centos/
+  https://apache.jfrog.io/arrow/python/#{version}
+  https://apache.jfrog.io/arrow/nuget/#{version}
         SUMMARY
       end
     end
diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py
index 5ed8ece7783..3e3d0f7d3a4 100755
--- a/dev/release/download_rc_binaries.py
+++ b/dev/release/download_rc_binaries.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -15,36 +14,44 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
 import re
 
 import argparse
 import concurrent.futures as cf
 import functools
-import hashlib
-import json
 import os
 import subprocess
 import urllib.request
 
 
-BINTRAY_API_ROOT = "https://bintray.com/api/v1"
-BINTRAY_DL_ROOT = "https://dl.bintray.com"
-BINTRAY_REPO = os.getenv('BINTRAY_REPOSITORY', 'apache/arrow')
+ARTIFACTORY_ROOT = "https://apache.jfrog.io/artifactory/arrow"
 DEFAULT_PARALLEL_DOWNLOADS = 8
 
 
-class Bintray:
-
-    def __init__(self, repo=BINTRAY_REPO):
-        self.repo = repo
-
-    def get_file_list(self, package, version):
-        url = os.path.join(BINTRAY_API_ROOT, 'packages', self.repo, package,
-                           'versions', version, 'files')
-        request = urllib.request.urlopen(url).read()
-        return json.loads(request)
+class Artifactory:
+
+    def get_file_list(self, prefix):
+        def traverse(directory, files, directories):
+            url = f'{ARTIFACTORY_ROOT}/{directory}'
+            response = urllib.request.urlopen(url).read().decode()
+            paths = re.findall('<a href="(.+?)"', response)
+            for path in paths:
+                if path == '../':
+                    continue
+                resolved_path = f'{directory}{path}'
+                if path.endswith('/'):
+                    directories.append(resolved_path)
+                else:
+                    files.append(resolved_path)
+        files = []
+        if not prefix.endswith('/'):
+            prefix += '/'
+        directories = [prefix]
+        while len(directories) > 0:
+            directory = directories.pop()
+            traverse(directory, files, directories)
+        return files
 
     def download_files(self, files, dest=None, num_parallel=None,
                        re_match=None):
@@ -69,7 +76,7 @@ def download_files(self, files, dest=None, num_parallel=None,
 
         if re_match is not None:
             regex = re.compile(re_match)
-            files = [x for x in files if regex.match(x['path'])]
+            files = [x for x in files if regex.match(x)]
 
         if num_parallel == 1:
             for path in files:
@@ -81,40 +88,28 @@ def download_files(self, files, dest=None, num_parallel=None,
                 num_parallel
             )
 
-    def _download_file(self, dest, info):
-        relpath = info['path']
-
-        base, filename = os.path.split(relpath)
+    def _download_file(self, dest, path):
+        base, filename = os.path.split(path)
 
         dest_dir = os.path.join(dest, base)
         os.makedirs(dest_dir, exist_ok=True)
 
         dest_path = os.path.join(dest_dir, filename)
 
-        if os.path.exists(dest_path):
-            with open(dest_path, 'rb') as f:
-                sha256sum = hashlib.sha256(f.read()).hexdigest()
-            if sha256sum == info['sha256']:
-                print('Local file {} sha256 matches, skipping'
-                      .format(dest_path))
-                return
-            else:
-                print('Local file sha256 does not match, overwriting')
-
-        print("Downloading {} to {}".format(relpath, dest_path))
+        print("Downloading {} to {}".format(path, dest_path))
 
-        bintray_abspath = os.path.join(BINTRAY_DL_ROOT, self.repo, relpath)
+        url = f'{ARTIFACTORY_ROOT}/{path}'
 
         cmd = [
             'curl', '--fail', '--location', '--retry', '5',
-            '--output', dest_path, bintray_abspath
+            '--output', dest_path, url
         ]
         proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
         stdout, stderr = proc.communicate()
         if proc.returncode != 0:
             raise Exception("Downloading {} failed\nstdout: {}\nstderr: {}"
-                            .format(relpath, stdout, stderr))
+                            .format(path, stdout, stderr))
 
 
 def parallel_map_terminate_early(f, iterable, num_parallel):
@@ -131,12 +126,16 @@ def parallel_map_terminate_early(f, iterable, num_parallel):
                 raise e
 
 
-ARROW_PACKAGE_TYPES = ['centos', 'debian', 'nuget', 'python', 'ubuntu']
+ARROW_REPOSITORY_PACKAGE_TYPES = ['centos', 'debian', 'ubuntu']
+ARROW_STANDALONE_PACKAGE_TYPES = ['nuget', 'python']
+ARROW_PACKAGE_TYPES = \
+    ARROW_REPOSITORY_PACKAGE_TYPES + \
+    ARROW_STANDALONE_PACKAGE_TYPES
 
 
 def download_rc_binaries(version, rc_number, re_match=None, dest=None,
                          num_parallel=None, target_package_type=None):
-    bintray = Bintray()
+    artifactory = Artifactory()
 
     version_string = '{}-rc{}'.format(version, rc_number)
     if target_package_type:
@@ -144,10 +143,22 @@ def download_rc_binaries(version, rc_number, re_match=None, dest=None,
     else:
         package_types = ARROW_PACKAGE_TYPES
     for package_type in package_types:
-        files = bintray.get_file_list('{}-rc'.format(package_type),
-                                      version_string)
-        bintray.download_files(files, re_match=re_match, dest=dest,
-                               num_parallel=num_parallel)
+        if package_type in ARROW_REPOSITORY_PACKAGE_TYPES:
+            prefix = f'{package_type}-rc'
+        else:
+            prefix = f'{package_type}-rc/{version_string}'
+        files = artifactory.get_file_list(prefix)
+        if package_type in ARROW_REPOSITORY_PACKAGE_TYPES:
+            version_pattern = re.compile(r'\d+\.\d+\.\d+')
+
+            def is_old_release(path):
+                match = version_pattern.search(path)
+                if not match:
+                    return False
+                return match[0] != version
+            files = [x for x in files if not is_old_release(x)]
+        artifactory.download_files(files, re_match=re_match, dest=dest,
+                                   num_parallel=num_parallel)
 
 
 if __name__ == '__main__':
diff --git a/dev/release/post-02-binary.sh b/dev/release/post-02-binary.sh
index 6d3e19e0dca..e6aa08301df 100755
--- a/dev/release/post-02-binary.sh
+++ b/dev/release/post-02-binary.sh
@@ -32,8 +32,6 @@ rc=$2
 
 cd "${SOURCE_DIR}"
 
-: ${BINTRAY_REPOSITORY_CUSTOM:=${BINTRAY_REPOSITORY:-}}
-
 if [ ! -f .env ]; then
   echo "You must create $(pwd)/.env"
   echo "You can use $(pwd)/.env.example as template"
@@ -41,10 +39,6 @@ if [ ! -f .env ]; then
 fi
 . .env
 
-if [ -n "${BINTRAY_REPOSITORY_CUSTOM}" ]; then
-  BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY_CUSTOM}
-fi
-
 . utils-binary.sh
 
 # By default deploy all artifacts.
@@ -86,10 +80,10 @@ mkdir -p "${tmp_dir}"
 docker_run \
   ./runner.sh \
   rake \
+    --trace \
     "${rake_tasks[@]}" \
     APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
     ARTIFACTS_DIR="${tmp_dir}/artifacts" \
-    BINTRAY_REPOSITORY=${BINTRAY_REPOSITORY} \
     RC=${rc} \
     VERSION=${version} \
     YUM_TARGETS=$(IFS=,; echo "${yum_targets[*]}")
diff --git a/dev/release/post-03-website.sh b/dev/release/post-03-website.sh
index b427142ea98..8cabf364b46 100755
--- a/dev/release/post-03-website.sh
+++ b/dev/release/post-03-website.sh
@@ -145,10 +145,10 @@ archery release changelog generate ${version} | \
 
 cat <<ANNOUNCE >> "${announce_file}"
 [1]: https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/
-[2]: https://bintray.com/apache/arrow/centos/${version}/
-[3]: https://bintray.com/apache/arrow/debian/${version}/
-[4]: https://bintray.com/apache/arrow/python/${version}/
-[5]: https://bintray.com/apache/arrow/ubuntu/${version}/
+[2]: https://apache.jfrog.io/artifactory/arrow/centos/
+[3]: https://apache.jfrog.io/artifactory/arrow/debian/
+[4]: https://apache.jfrog.io/artifactory/arrow/python/${version}/
+[5]: https://apache.jfrog.io/artifactory/arrow/ubuntu/
 [6]: https://github.com/apache/arrow/releases/tag/apache-arrow-${version}
 ANNOUNCE
 git add "${announce_file}"
diff --git a/dev/release/post-06-csharp.sh b/dev/release/post-06-csharp.sh
index e9572025ab5..84e0eec7c0f 100755
--- a/dev/release/post-06-csharp.sh
+++ b/dev/release/post-06-csharp.sh
@@ -47,7 +47,7 @@ for base_name in ${base_names[@]}; do
       --fail \
       --location \
       --remote-name \
-      https://apache.bintray.com/arrow/nuget/${version}/${path}
+      https://apache.jfrog.io/artifactory/nuget/${version}/${path}
   done
   dotnet nuget push \
     ${base_name}.nupkg \
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index bf7c0bd7050..e7b87a3a4da 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -21,12 +21,10 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
-  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
-  echo " e.g.: $0 0.13.0 rc kszucs/arrow # Verify 0.13.0 RC at https://bintray.com/kszucs/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
@@ -86,15 +84,9 @@ if [ "${TYPE}" = "local" ]; then
 else
   package_version="${VERSION}-1"
   apt_source_base_name="apache-arrow-apt-source-latest-${code_name}.deb"
-  if [ $# -eq 3 ]; then
-    curl \
-      --output "${apt_source_base_name}" \
-      "https://dl.bintray.com/$3/${distribution}-rc/${apt_source_base_name}"
-  else
-    curl \
-      --output "${apt_source_base_name}" \
-      "${artifactory_base_url}/${apt_source_base_name}"
-  fi
+  curl \
+    --output "${apt_source_base_name}" \
+    "${artifactory_base_url}/${apt_source_base_name}"
   apt install -y -V "./${apt_source_base_name}"
 fi
 
@@ -112,17 +104,10 @@ if [ "${TYPE}" = "local" ]; then
   fi
 else
   if [ "${TYPE}" = "rc" ]; then
-    if [ $# -eq 3 ]; then
-      sed \
-        -i"" \
-        -e "s,^URIs: .*/,URIs: https://dl.bintray.com/$3/${distribution}-rc/,g" \
-        /etc/apt/sources.list.d/apache-arrow.sources
-    else
-      sed \
-        -i"" \
-        -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
-        /etc/apt/sources.list.d/apache-arrow.sources
-    fi
+    sed \
+      -i"" \
+      -e "s,^URIs: \\(.*\\)/,URIs: \\1-rc/,g" \
+      /etc/apt/sources.list.d/apache-arrow.sources
   fi
 fi
 
diff --git a/dev/release/verify-release-candidate-wheels.bat b/dev/release/verify-release-candidate-wheels.bat
index 2b57113a1bf..21a0e3a0bc3 100644
--- a/dev/release/verify-release-candidate-wheels.bat
+++ b/dev/release/verify-release-candidate-wheels.bat
@@ -87,7 +87,7 @@ call activate %CONDA_ENV_PATH%
 set WHEEL_FILENAME=pyarrow-%ARROW_VERSION%-cp%PY_VERSION_NO_PERIOD%-cp%PY_VERSION_NO_PERIOD%%ABI_TAG%-win_amd64.whl
 
 @rem Requires GNU Wget for Windows
-wget --no-check-certificate -O %WHEEL_FILENAME% https://bintray.com/apache/arrow/download_file?file_path=python-rc%%2F%ARROW_VERSION%-rc%RC_NUMBER%%%2F%WHEEL_FILENAME% || EXIT /B 1
+wget --no-check-certificate -O %WHEEL_FILENAME% https://apache.jfrog.io/artifactory/arrow/download_file?file_path=python-rc%%2F%ARROW_VERSION%-rc%RC_NUMBER%%%2F%WHEEL_FILENAME% || EXIT /B 1
 
 pip install %WHEEL_FILENAME% || EXIT /B 1
 
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index fef231c8db3..c421e2f0be6 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -128,7 +128,7 @@ test_binary() {
   local download_dir=binaries
   mkdir -p ${download_dir}
 
-  python $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
+  ${PYTHON:-python} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \
          --dest=${download_dir}
 
   verify_dir_artifact_signatures ${download_dir}
@@ -163,8 +163,7 @@ test_apt() {
            "${target}" \
            /arrow/dev/release/verify-apt.sh \
            "${VERSION}" \
-           "rc" \
-           "${BINTRAY_REPOSITORY}"; then
+           "rc"; then
       echo "Failed to verify the APT repository for ${target}"
       exit 1
     fi
@@ -188,8 +187,7 @@ test_yum() {
            "${target}" \
            /arrow/dev/release/verify-yum.sh \
            "${VERSION}" \
-           "rc" \
-           "${BINTRAY_REPOSITORY}"; then
+           "rc"; then
       echo "Failed to verify the Yum repository for ${target}"
       exit 1
     fi
@@ -600,8 +598,6 @@ test_source_distribution() {
 }
 
 test_binary_distribution() {
-  : ${BINTRAY_REPOSITORY:=apache/arrow}
-
   if [ ${TEST_BINARY} -gt 0 ]; then
     test_binary
   fi
@@ -766,7 +762,17 @@ TEST_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS}))
 TEST_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO}))
 TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO}))
 
-NEED_MINICONDA=$((${TEST_CPP} + ${TEST_WHEELS} + ${TEST_BINARY} + ${TEST_INTEGRATION}))
+if [ "${ARTIFACT}" == "source" ]; then
+  NEED_MINICONDA=$((${TEST_CPP} + ${TEST_INTEGRATION}))
+elif [ "${ARTIFACT}" == "wheels" ]; then
+  NEED_MINICONDA=$((${TEST_WHEELS}))
+else
+  if [ -z "${PYTHON:-}" ]; then
+    NEED_MINICONDA=$((${TEST_BINARY}))
+  else
+    NEED_MINICONDA=0
+  fi
+fi
 
 : ${TEST_ARCHIVE:=apache-arrow-${VERSION}.tar.gz}
 case "${TEST_ARCHIVE}" in
diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index 08cd907b461..b9c46c43898 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -21,12 +21,10 @@ set -exu
 
 if [ $# -lt 2 ]; then
   echo "Usage: $0 VERSION rc"
-  echo "       $0 VERSION rc BINTRAY_REPOSITORY"
   echo "       $0 VERSION release"
   echo "       $0 VERSION local"
   echo " e.g.: $0 0.13.0 rc           # Verify 0.13.0 RC"
   echo " e.g.: $0 0.13.0 release      # Verify 0.13.0"
-  echo " e.g.: $0 0.13.0 rc kszucs/arrow # Verify 0.13.0 RC at https://bintray.com/kszucs/arrow"
   echo " e.g.: $0 0.13.0-dev20210203 local # Verify 0.13.0-dev20210203 on local"
   exit 1
 fi
@@ -84,13 +82,8 @@ if [ "${TYPE}" = "local" ]; then
   ${install_command} "${release_path}"
 else
   package_version="${VERSION}"
-  if [ $# -eq 3 ]; then
-    ${install_command} \
-      https://dl.bintray.com/$3/centos-rc/${distribution_version}/apache-arrow-release-latest.rpm
-  else
-    ${install_command} \
-      ${artifactory_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
-  fi
+  ${install_command} \
+    ${artifactory_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
 fi
 
 if [ "${TYPE}" = "local" ]; then
@@ -104,17 +97,10 @@ if [ "${TYPE}" = "local" ]; then
   fi
 else
   if [ "${TYPE}" = "rc" ]; then
-    if [ $# -eq 3 ]; then
-      sed \
-        -i"" \
-        -e "s,baseurl=https://apache\.jfrog\.io/artifactory/arrow/centos/,baseurl=https://dl.bintray.com/$3/centos-rc/,g" \
-        /etc/yum.repos.d/Apache-Arrow.repo
-    else
-      sed \
-        -i"" \
-        -e "s,/centos/,/centos-rc/,g" \
-        /etc/yum.repos.d/Apache-Arrow.repo
-    fi
+    sed \
+      -i"" \
+      -e "s,/centos/,/centos-rc/,g" \
+      /etc/yum.repos.d/Apache-Arrow.repo
   fi
 fi
 
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 9f546569e86..22eaa42341f 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -54,8 +54,8 @@ Apache Arrow release files.
 
 %build
 # We use distribution version explicitly because we can't use symbolic link
-# on Bintray. CentOS uses 7 and 8 but RHEL uses 7Server and 8Server
-# for $releasever. If we can use symbolic link on Bintray, we can use
+# on Artifactory CentOS uses 7 and 8 but RHEL uses 7Server and 8Server
+# for $releasever. If we can use symbolic link on Artifactory we can use
 # $releasever directly.
 distribution_version=$(cut -d: -f5 /etc/system-release-cpe)
 sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index efb2c80764c..dcda13098ef 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -336,11 +336,11 @@ brew install apache-arrow
 
 ```{bash, save=run & !sys_install & ubuntu, hide=TRUE}
 # Setup troubleshooting section
-# install a system-level arrow on macOS
+# install a system-level arrow on Ubuntu
 sudo apt update
 sudo apt install -y -V ca-certificates lsb-release wget
-wget https://apache.bintray.com/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
-sudo apt install -y -V ./apache-arrow-archive-keyring-latest-$(lsb_release --codename --short).deb
+wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
 sudo apt update
 sudo apt install -y -V libarrow-dev
 ```

From 939195183657daa2060970b6fcd1938eab53d44b Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Thu, 29 Apr 2021 10:14:29 +0200
Subject: [PATCH 147/719] ARROW-12506: [Python] Improve modularity of pyarrow
 codebase: _hdfsio module

Second batch of changes related to making pyarrow build more modular. `hdfs-io` is no longer included in `pyarrow.lib` but has been separated to its own module.

This PR is based on https://github.com/apache/arrow/pull/10131

Closes #10159 from amol-/ARROW-12506-2

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/CMakeLists.txt                       |  3 ++-
 python/pyarrow/__init__.py                  |  8 +++++---
 python/pyarrow/{io-hdfs.pxi => _hdfsio.pyx} | 10 ++++++++++
 python/pyarrow/hdfs.py                      |  4 ++--
 python/pyarrow/io.pxi                       |  1 +
 python/pyarrow/lib.pyx                      |  1 -
 python/setup.py                             |  1 +
 7 files changed, 21 insertions(+), 7 deletions(-)
 rename python/pyarrow/{io-hdfs.pxi => _hdfsio.pyx} (97%)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3058431f0f3..3ed518d0109 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -387,10 +387,11 @@ endif()
 
 set(CYTHON_EXTENSIONS
     lib
-    _fs
     _compute
     _csv
     _feather
+    _fs
+    _hdfsio
     _json)
 
 set(LINK_LIBS arrow_shared arrow_python_shared)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index adfd69c18b3..1488f5c42e8 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -164,15 +164,17 @@ def show_versions():
                          log_memory_allocations, jemalloc_set_decay_ms)
 
 # I/O
-from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
+from pyarrow.lib import (NativeFile, PythonFile,
                          BufferedInputStream, BufferedOutputStream,
                          CompressedInputStream, CompressedOutputStream,
                          TransformInputStream, transcoding_input_stream,
                          FixedSizeBufferWriter,
                          BufferReader, BufferOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
-                         create_memory_map, have_libhdfs,
-                         MockOutputStream, input_stream, output_stream)
+                         create_memory_map, MockOutputStream,
+                         input_stream, output_stream)
+
+from pyarrow._hdfsio import HdfsFile, have_libhdfs
 
 from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
                          concat_arrays, concat_tables)
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/_hdfsio.pyx
similarity index 97%
rename from python/pyarrow/io-hdfs.pxi
rename to python/pyarrow/_hdfsio.pyx
index 2cdb1b7bfda..b864f8a686a 100644
--- a/python/pyarrow/io-hdfs.pxi
+++ b/python/pyarrow/_hdfsio.pyx
@@ -18,6 +18,16 @@
 # ----------------------------------------------------------------------
 # HDFS IO implementation
 
+# cython: language_level = 3
+
+import re
+
+from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import frombytes, tobytes, ArrowIOError
+
 from queue import Queue, Empty as QueueEmpty, Full as QueueFull
 
 
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index c4daac9fd1a..56667bd5df5 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -23,10 +23,10 @@
 
 from pyarrow.util import implements, _DEPR_MSG
 from pyarrow.filesystem import FileSystem
-import pyarrow.lib as lib
+import pyarrow._hdfsio as _hdfsio
 
 
-class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
+class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
     """
     DEPRECATED: FileSystem interface for HDFS cluster.
 
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 3fc098478d6..9c501adcc2b 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -27,6 +27,7 @@ import threading
 import time
 import warnings
 from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
+from queue import Queue, Empty as QueueEmpty
 
 from pyarrow.util import _is_path_like, _stringify_path
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 1866d071121..191250b3d5b 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -140,7 +140,6 @@ include "tensor.pxi"
 
 # File IO
 include "io.pxi"
-include "io-hdfs.pxi"
 
 # IPC / Messaging
 include "ipc.pxi"
diff --git a/python/setup.py b/python/setup.py
index b4de5799d66..24d54809a42 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -203,6 +203,7 @@ def initialize_options(self):
         '_plasma',
         '_s3fs',
         '_hdfs',
+        '_hdfsio',
         'gandiva']
 
     def _run_cmake(self):

From 748b756da77ad02f9fe5f431646a16c953d3c94c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 29 Apr 2021 14:26:43 +0200
Subject: [PATCH 148/719] ARROW-12543: [CI][Python] Fix test-conda-python-3.9
 build (gdb version conflict)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10185 from jorisvandenbossche/ARROW-12543

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/docker/conda-cpp.dockerfile | 1 -
 ci/docker/conda.dockerfile     | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index 1a5b87ef729..660ad821655 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -28,7 +28,6 @@ RUN conda install \
         --file arrow/ci/conda_env_gandiva.yml \
         compilers \
         doxygen \
-        gdb \
         valgrind && \
     conda clean --all
 
diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile
index 94de009904a..3ea393d3fa9 100644
--- a/ci/docker/conda.dockerfile
+++ b/ci/docker/conda.dockerfile
@@ -25,7 +25,7 @@ ARG prefix=/opt/conda
 # install build essentials
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update -y -q && \
-    apt-get install -y -q wget tzdata libc6-dbg \
+    apt-get install -y -q wget tzdata libc6-dbg gdb \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 

From 0a3cbd0b7337b464ef55619672727ebe6f65ad05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 29 Apr 2021 16:08:34 +0200
Subject: [PATCH 149/719] ARROW-11633: [CI][Doc] Maven default skin not found
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to fix documentation build via disabling mvn HTTP pooling.

Closes #10163 from kszucs/ARROW-11633

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/java_build.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh
index 886aaf0f2c9..5ef150fd1e7 100755
--- a/ci/scripts/java_build.sh
+++ b/ci/scripts/java_build.sh
@@ -95,7 +95,8 @@ if [ "${ARROW_PLASMA}" = "ON" ]; then
 fi
 
 if [ "${with_docs}" == "true" ]; then
-  ${mvn} -Dcheckstyle.skip=true install site
+  # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633
+  ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false install site
 fi
 
 popd

From 33ef1d67e5ae2c41868f0e1998d2e1aa87ada09f Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Fri, 30 Apr 2021 06:23:39 +0900
Subject: [PATCH 150/719] ARROW-12589: [C++] Compiling on windows doesn't work
 when -DARROW_WITH_BACKTRACE=OFF

    [4/116] Building CXX object src/arrow/CMakeFiles/arrow_shared.dir/Unity/unity_17_cxx.cxx.obj
    FAILED: src/arrow/CMakeFiles/arrow_shared.dir/Unity/unity_17_cxx.cxx.obj
    C:/msys64/mingw64/bin/ccache.exe C:\msys64\mingw64\bin\c++.exe ...
    In file included from src/arrow/CMakeFiles/arrow_shared.dir/Unity/unity_17_cxx.cxx:3:
    C:/msys64/home/javan/arrow/arrow-master/cpp/src/arrow/filesystem/filesystem.cc:81:28: error: function 'std::ostream& arrow::fs::operator<<(std::ostream&, arrow::fs::FileType)' definition is marked dllimport
    81 | ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType ftype) {
    |                            ^~~~~~~~
    [5/116] Building CXX object src/arrow/CMakeFiles/arrow_shared.dir/Unity/unity_2_cxx.cxx.obj

Closes #10188 from amol-/ARROW-12589

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 3623283f355..d95c3b55eff 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -515,11 +515,14 @@ if(ARROW_BUILD_STATIC AND WIN32)
   target_compile_definitions(arrow_static PUBLIC ARROW_STATIC)
 endif()
 
+foreach(LIB_TARGET ${ARROW_LIBRARIES})
+  target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING)
+endforeach()
+
 if(ARROW_WITH_BACKTRACE)
   find_package(Backtrace)
 
   foreach(LIB_TARGET ${ARROW_LIBRARIES})
-    target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING)
     if(Backtrace_FOUND AND ARROW_WITH_BACKTRACE)
       target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_WITH_BACKTRACE)
     endif()

From c50176100cebbed9cd8cde300a45452b3ea45d08 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Thu, 29 Apr 2021 17:32:33 -0400
Subject: [PATCH 151/719] ARROW-12601: [R][Packaging] Fix pkg-config check in
 r/configure

Fixes false positives in a check that pkg-config is installed

Closes #10198 from ianmcook/ARROW-12601

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/configure b/r/configure
index f6c1013b474..2da2b9f0178 100755
--- a/r/configure
+++ b/r/configure
@@ -77,7 +77,7 @@ elif [ "$INCLUDE_DIR" ] && [ "$LIB_DIR" ]; then
 else
   # Use pkg-config if available and allowed
   pkg-config --version >/dev/null 2>&1
-  if [ "$ARROW_USE_PKG_CONFIG" != "false" ] && [ $? -eq 0 ]; then
+  if [ $? -eq 0 ] && [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
     PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}`
     PKGCONFIG_LIBS=`pkg-config --libs-only-l --silence-errors ${PKG_CONFIG_NAME}`
     PKGCONFIG_DIRS=`pkg-config --libs-only-L --silence-errors ${PKG_CONFIG_NAME}`

From 7430bbdfcda5d23cf18d7222da8419eb594d9f3f Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 29 Apr 2021 20:42:29 -0400
Subject: [PATCH 152/719] ARROW-11929: [C++][Dataset][Compute] Promote
 expression to the compute namespace

Moves Expression and its test and benchmark into the compute/exec/ directory. I haven't introduced an exec namespace.

Closes #10166 from bkietz/11929-Promote-Expression-to-the

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/dataset_documentation_example.cc    | 21 ++---
 .../arrow/dataset_parquet_scan_example.cc     | 10 ++-
 cpp/src/arrow/CMakeLists.txt                  |  1 +
 cpp/src/arrow/compute/CMakeLists.txt          |  2 +
 cpp/src/arrow/compute/exec/CMakeLists.txt     | 22 +++++
 .../{dataset => compute/exec}/expression.cc   | 35 ++++++--
 .../{dataset => compute/exec}/expression.h    | 64 +++++++--------
 .../exec}/expression_benchmark.cc             | 31 ++++---
 .../exec}/expression_internal.h               |  6 +-
 .../exec}/expression_test.cc                  | 26 ++++--
 cpp/src/arrow/dataset/CMakeLists.txt          |  5 --
 cpp/src/arrow/dataset/api.h                   |  2 +-
 cpp/src/arrow/dataset/dataset.cc              | 17 ++--
 cpp/src/arrow/dataset/dataset.h               | 31 ++++---
 cpp/src/arrow/dataset/dataset_internal.h      | 14 ++--
 cpp/src/arrow/dataset/discovery.cc            |  2 +-
 cpp/src/arrow/dataset/discovery.h             |  6 +-
 cpp/src/arrow/dataset/file_base.cc            | 22 ++---
 cpp/src/arrow/dataset/file_base.h             | 16 ++--
 cpp/src/arrow/dataset/file_benchmark.cc       |  7 +-
 cpp/src/arrow/dataset/file_parquet.cc         | 44 +++++-----
 cpp/src/arrow/dataset/file_parquet.h          | 14 ++--
 cpp/src/arrow/dataset/file_parquet_test.cc    |  6 +-
 cpp/src/arrow/dataset/file_test.cc            | 10 +--
 cpp/src/arrow/dataset/partition.cc            | 34 ++++----
 cpp/src/arrow/dataset/partition.h             | 22 ++---
 cpp/src/arrow/dataset/partition_test.cc       | 30 +++----
 cpp/src/arrow/dataset/projector.h             |  2 +-
 cpp/src/arrow/dataset/scanner.cc              | 10 +--
 cpp/src/arrow/dataset/scanner.h               | 10 +--
 cpp/src/arrow/dataset/scanner_internal.h      | 38 +++++----
 cpp/src/arrow/dataset/scanner_test.cc         |  2 +-
 cpp/src/arrow/dataset/test_util.h             | 54 ++++++------
 cpp/src/arrow/dataset/type_fwd.h              |  3 +-
 python/pyarrow/includes/libarrow_dataset.pxd  | 27 +++---
 r/NAMESPACE                                   |  1 +
 r/R/arrowExports.R                            | 20 ++---
 r/R/expression.R                              | 10 +--
 r/man/FileFormat.Rd                           |  5 +-
 r/man/FragmentScanOptions.Rd                  | 11 +++
 r/man/arrow-package.Rd                        |  2 +
 r/src/arrowExports.cpp                        | 82 +++++++++----------
 r/src/arrow_types.h                           |  1 +
 r/src/dataset.cpp                             |  7 +-
 r/src/expression.cpp                          | 56 ++++++-------
 45 files changed, 473 insertions(+), 368 deletions(-)
 create mode 100644 cpp/src/arrow/compute/exec/CMakeLists.txt
 rename cpp/src/arrow/{dataset => compute/exec}/expression.cc (97%)
 rename cpp/src/arrow/{dataset => compute/exec}/expression.h (85%)
 rename cpp/src/arrow/{dataset => compute/exec}/expression_benchmark.cc (81%)
 rename cpp/src/arrow/{dataset => compute/exec}/expression_internal.h (99%)
 rename cpp/src/arrow/{dataset => compute/exec}/expression_test.cc (98%)

diff --git a/cpp/examples/arrow/dataset_documentation_example.cc b/cpp/examples/arrow/dataset_documentation_example.cc
index 6954460d413..0fb4ad2f627 100644
--- a/cpp/examples/arrow/dataset_documentation_example.cc
+++ b/cpp/examples/arrow/dataset_documentation_example.cc
@@ -20,9 +20,9 @@
 
 #include <arrow/api.h>
 #include <arrow/compute/cast.h>
+#include <arrow/compute/exec/expression.h>
 #include <arrow/dataset/dataset.h>
 #include <arrow/dataset/discovery.h>
-#include <arrow/dataset/expression.h>
 #include <arrow/dataset/file_base.h>
 #include <arrow/dataset/file_ipc.h>
 #include <arrow/dataset/file_parquet.h>
@@ -37,6 +37,7 @@
 
 namespace ds = arrow::dataset;
 namespace fs = arrow::fs;
+namespace cp = arrow::compute;
 
 #define ABORT_ON_FAILURE(expr)                     \
   do {                                             \
@@ -185,7 +186,7 @@ std::shared_ptr<arrow::Table> FilterAndSelectDataset(
   // Read specified columns with a row filter
   auto scan_builder = dataset->NewScan().ValueOrDie();
   ABORT_ON_FAILURE(scan_builder->Project({"b"}));
-  ABORT_ON_FAILURE(scan_builder->Filter(ds::less(ds::field_ref("b"), ds::literal(4))));
+  ABORT_ON_FAILURE(scan_builder->Filter(cp::less(cp::field_ref("b"), cp::literal(4))));
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
@@ -210,12 +211,12 @@ std::shared_ptr<arrow::Table> ProjectDataset(
   ABORT_ON_FAILURE(scan_builder->Project(
       {
           // Leave column "a" as-is.
-          ds::field_ref("a"),
+          cp::field_ref("a"),
           // Cast column "b" to float32.
-          ds::call("cast", {ds::field_ref("b")},
+          cp::call("cast", {cp::field_ref("b")},
                    arrow::compute::CastOptions::Safe(arrow::float32())),
           // Derive a boolean column from "c".
-          ds::equal(ds::field_ref("c"), ds::literal(1)),
+          cp::equal(cp::field_ref("c"), cp::literal(1)),
       },
       {"a_renamed", "b_as_float32", "c_1"}));
   auto scanner = scan_builder->Finish().ValueOrDie();
@@ -239,15 +240,15 @@ std::shared_ptr<arrow::Table> SelectAndProjectDataset(
   // Read specified columns with a row filter
   auto scan_builder = dataset->NewScan().ValueOrDie();
   std::vector<std::string> names;
-  std::vector<ds::Expression> exprs;
+  std::vector<cp::Expression> exprs;
   // Read all the original columns.
   for (const auto& field : dataset->schema()->fields()) {
     names.push_back(field->name());
-    exprs.push_back(ds::field_ref(field->name()));
+    exprs.push_back(cp::field_ref(field->name()));
   }
   // Also derive a new column.
-  names.push_back("b_large");
-  exprs.push_back(ds::greater(ds::field_ref("b"), ds::literal(1)));
+  names.emplace_back("b_large");
+  exprs.push_back(cp::greater(cp::field_ref("b"), cp::literal(1)));
   ABORT_ON_FAILURE(scan_builder->Project(exprs, names));
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
@@ -295,7 +296,7 @@ std::shared_ptr<arrow::Table> FilterPartitionedDataset(
   // Filter based on the partition values. This will mean that we won't even read the
   // files whose partition expressions don't match the filter.
   ABORT_ON_FAILURE(
-      scan_builder->Filter(ds::equal(ds::field_ref("part"), ds::literal("b"))));
+      scan_builder->Filter(cp::equal(cp::field_ref("part"), cp::literal("b"))));
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
diff --git a/cpp/examples/arrow/dataset_parquet_scan_example.cc b/cpp/examples/arrow/dataset_parquet_scan_example.cc
index 197ca5aa4c6..cd9b89fe380 100644
--- a/cpp/examples/arrow/dataset_parquet_scan_example.cc
+++ b/cpp/examples/arrow/dataset_parquet_scan_example.cc
@@ -16,9 +16,9 @@
 // under the License.
 
 #include <arrow/api.h>
+#include <arrow/compute/exec/expression.h>
 #include <arrow/dataset/dataset.h>
 #include <arrow/dataset/discovery.h>
-#include <arrow/dataset/expression.h>
 #include <arrow/dataset/file_base.h>
 #include <arrow/dataset/file_parquet.h>
 #include <arrow/dataset/scanner.h>
@@ -37,6 +37,8 @@ namespace fs = arrow::fs;
 
 namespace ds = arrow::dataset;
 
+namespace cp = arrow::compute;
+
 #define ABORT_ON_FAILURE(expr)                     \
   do {                                             \
     arrow::Status status_ = (expr);                \
@@ -60,8 +62,8 @@ struct Configuration {
 
   // Indicates the filter by which rows will be filtered. This optimization can
   // make use of partition information and/or file metadata if possible.
-  ds::Expression filter =
-      ds::greater(ds::field_ref("total_amount"), ds::literal(1000.0f));
+  cp::Expression filter =
+      cp::greater(cp::field_ref("total_amount"), cp::literal(1000.0f));
 
   ds::InspectOptions inspect_options{};
   ds::FinishOptions finish_options{};
@@ -146,7 +148,7 @@ std::shared_ptr<ds::Dataset> GetDatasetFromPath(
 
 std::shared_ptr<ds::Scanner> GetScannerFromDataset(std::shared_ptr<ds::Dataset> dataset,
                                                    std::vector<std::string> columns,
-                                                   ds::Expression filter,
+                                                   cp::Expression filter,
                                                    bool use_threads) {
   auto scanner_builder = dataset->NewScan().ValueOrDie();
 
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d95c3b55eff..62ea94b8d02 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -367,6 +367,7 @@ if(ARROW_COMPUTE)
               compute/api_vector.cc
               compute/cast.cc
               compute/exec.cc
+              compute/exec/expression.cc
               compute/function.cc
               compute/kernel.cc
               compute/registry.cc
diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index e781dff90e2..897dc32f357 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -68,3 +68,5 @@ add_arrow_compute_test(internals_test
 add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
 
 add_subdirectory(kernels)
+
+add_subdirectory(exec)
diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt
new file mode 100644
index 00000000000..a10c1dad469
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_install_all_headers("arrow/compute/exec")
+
+add_arrow_compute_test(expression_test PREFIX "arrow-compute")
+
+add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/dataset/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
similarity index 97%
rename from cpp/src/arrow/dataset/expression.cc
rename to cpp/src/arrow/compute/exec/expression.cc
index cc126fcc7fb..1f819cf3d04 100644
--- a/cpp/src/arrow/dataset/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 
 #include <unordered_map>
 #include <unordered_set>
 
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec/expression_internal.h"
 #include "arrow/compute/exec_internal.h"
-#include "arrow/dataset/expression_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
@@ -39,7 +39,7 @@ namespace arrow {
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
-namespace dataset {
+namespace compute {
 
 Expression::Expression(Call call) : impl_(std::make_shared<Impl>(std::move(call))) {}
 
@@ -198,7 +198,7 @@ std::string Expression::ToString() const {
 
   if (auto options = GetStrptimeOptions(*call)) {
     return out + "format=" + options->format +
-           ", unit=" + internal::ToString(options->unit) + ")";
+           ", unit=" + arrow::internal::ToString(options->unit) + ")";
   }
 
   return out + "{NON-REPRESENTABLE OPTIONS})";
@@ -304,8 +304,9 @@ size_t Expression::hash() const {
   }
 
   std::shared_ptr<std::atomic<size_t>> expected = nullptr;
-  internal::atomic_compare_exchange_strong(&const_cast<Call*>(call)->hash, &expected,
-                                           std::make_shared<std::atomic<size_t>>(out));
+  ::arrow::internal::atomic_compare_exchange_strong(
+      &const_cast<Call*>(call)->hash, &expected,
+      std::make_shared<std::atomic<size_t>>(out));
   return out;
 }
 
@@ -525,6 +526,23 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input
         "ExecuteScalarExpression cannot Execute non-scalar expression ", expr.ToString());
   }
 
+  if (input.kind() == Datum::TABLE) {
+    TableBatchReader reader(*input.table());
+    std::shared_ptr<RecordBatch> batch;
+
+    while (true) {
+      RETURN_NOT_OK(reader.ReadNext(&batch));
+      if (batch != nullptr) {
+        break;
+      }
+      ARROW_ASSIGN_OR_RAISE(Datum res, ExecuteScalarExpression(expr, batch));
+      if (res.is_scalar()) {
+        ARROW_ASSIGN_OR_RAISE(res, MakeArrayFromScalar(*res.scalar(), batch->num_rows(),
+                                                       exec_context->memory_pool()));
+      }
+    }
+  }
+
   if (auto lit = expr.literal()) return *lit;
 
   if (auto ref = expr.field_ref()) {
@@ -1156,7 +1174,8 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
 
     Result<std::shared_ptr<Scalar>> GetScalar(const std::string& i) {
       int32_t column_index;
-      if (!internal::ParseValue<Int32Type>(i.data(), i.length(), &column_index)) {
+      if (!::arrow::internal::ParseValue<Int32Type>(i.data(), i.length(),
+                                                    &column_index)) {
         return Status::Invalid("Couldn't parse column_index");
       }
       if (column_index >= batch_.num_columns()) {
@@ -1279,5 +1298,5 @@ Expression operator||(Expression lhs, Expression rhs) {
   return or_(std::move(lhs), std::move(rhs));
 }
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression.h b/cpp/src/arrow/compute/exec/expression.h
similarity index 85%
rename from cpp/src/arrow/dataset/expression.h
rename to cpp/src/arrow/compute/exec/expression.h
index 8bdcb4a0ffa..3d3ce99c257 100644
--- a/cpp/src/arrow/dataset/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -34,14 +34,14 @@
 #include "arrow/util/variant.h"
 
 namespace arrow {
-namespace dataset {
+namespace compute {
 
 /// An unbound expression which maps a single Datum to another Datum.
 /// An expression is one of
 /// - A literal Datum.
 /// - A reference to a single (potentially nested) field of the input Datum.
 /// - A call to a compute function, with arguments specified by other Expressions.
-class ARROW_DS_EXPORT Expression {
+class ARROW_EXPORT Expression {
  public:
   struct Call {
     std::string function_name;
@@ -122,9 +122,9 @@ class ARROW_DS_EXPORT Expression {
   using Impl = util::Variant<Datum, Parameter, Call>;
   std::shared_ptr<Impl> impl_;
 
-  ARROW_DS_EXPORT friend bool Identical(const Expression& l, const Expression& r);
+  ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r);
 
-  ARROW_DS_EXPORT friend void PrintTo(const Expression&, std::ostream*);
+  ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*);
 };
 
 inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
@@ -132,7 +132,7 @@ inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equ
 
 // Factories
 
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Expression literal(Datum lit);
 
 template <typename Arg>
@@ -140,10 +140,10 @@ Expression literal(Arg&& arg) {
   return literal(Datum(std::forward<Arg>(arg)));
 }
 
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Expression field_ref(FieldRef ref);
 
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Expression call(std::string function, std::vector<Expression> arguments,
                 std::shared_ptr<compute::FunctionOptions> options = NULLPTR);
 
@@ -156,11 +156,11 @@ Expression call(std::string function, std::vector<Expression> arguments,
 }
 
 /// Assemble a list of all fields referenced by an Expression at any depth.
-ARROW_DS_EXPORT
+ARROW_EXPORT
 std::vector<FieldRef> FieldsInExpression(const Expression&);
 
 /// Assemble a mapping from field references to known values.
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldValues(
     const Expression& guaranteed_true_predicate);
 
@@ -179,17 +179,17 @@ Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldVal
 /// Weak canonicalization which establishes guarantees for subsequent passes. Even
 /// equivalent Expressions may result in different canonicalized expressions.
 /// TODO this could be a strong canonicalization
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<Expression> Canonicalize(Expression, compute::ExecContext* = NULLPTR);
 
 /// Simplify Expressions based on literal arguments (for example, add(null, x) will always
 /// be null so replace the call with a null literal). Includes early evaluation of all
 /// calls whose arguments are entirely literal.
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<Expression> FoldConstants(Expression);
 
 /// Simplify Expressions by replacing with known values of the fields which it references.
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<Expression> ReplaceFieldsWithKnownValues(
     const std::unordered_map<FieldRef, Datum, FieldRef::Hash>& known_values, Expression);
 
@@ -197,7 +197,7 @@ Result<Expression> ReplaceFieldsWithKnownValues(
 /// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
 /// used to remove redundant function calls from a filter expression or to replace a
 /// reference to a constant-value field with a literal.
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<Expression> SimplifyWithGuarantee(Expression,
                                          const Expression& guaranteed_true_predicate);
 
@@ -207,44 +207,44 @@ Result<Expression> SimplifyWithGuarantee(Expression,
 
 /// Execute a scalar expression against the provided state and input Datum. This
 /// expression must be bound.
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<Datum> ExecuteScalarExpression(const Expression&, const Datum& input,
                                       compute::ExecContext* = NULLPTR);
 
 // Serialization
 
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
 
-ARROW_DS_EXPORT
+ARROW_EXPORT
 Result<Expression> Deserialize(std::shared_ptr<Buffer>);
 
 // Convenience aliases for factories
 
-ARROW_DS_EXPORT Expression project(std::vector<Expression> values,
-                                   std::vector<std::string> names);
+ARROW_EXPORT Expression project(std::vector<Expression> values,
+                                std::vector<std::string> names);
 
-ARROW_DS_EXPORT Expression equal(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
 
-ARROW_DS_EXPORT Expression not_equal(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
 
-ARROW_DS_EXPORT Expression less(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
 
-ARROW_DS_EXPORT Expression less_equal(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
 
-ARROW_DS_EXPORT Expression greater(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
 
-ARROW_DS_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
 
-ARROW_DS_EXPORT Expression is_null(Expression lhs);
+ARROW_EXPORT Expression is_null(Expression lhs);
 
-ARROW_DS_EXPORT Expression is_valid(Expression lhs);
+ARROW_EXPORT Expression is_valid(Expression lhs);
 
-ARROW_DS_EXPORT Expression and_(Expression lhs, Expression rhs);
-ARROW_DS_EXPORT Expression and_(const std::vector<Expression>&);
-ARROW_DS_EXPORT Expression or_(Expression lhs, Expression rhs);
-ARROW_DS_EXPORT Expression or_(const std::vector<Expression>&);
-ARROW_DS_EXPORT Expression not_(Expression operand);
+ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression and_(const std::vector<Expression>&);
+ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression or_(const std::vector<Expression>&);
+ARROW_EXPORT Expression not_(Expression operand);
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression_benchmark.cc b/cpp/src/arrow/compute/exec/expression_benchmark.cc
similarity index 81%
rename from cpp/src/arrow/dataset/expression_benchmark.cc
rename to cpp/src/arrow/compute/exec/expression_benchmark.cc
index 24870f38c14..1899b7caab6 100644
--- a/cpp/src/arrow/dataset/expression_benchmark.cc
+++ b/cpp/src/arrow/compute/exec/expression_benchmark.cc
@@ -18,23 +18,16 @@
 #include "benchmark/benchmark.h"
 
 #include "arrow/compute/cast.h"
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/partition.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 
 namespace arrow {
-namespace dataset {
+namespace compute {
 
-static Expression GetPartitionExpression(const std::string& path, bool infer_dictionary) {
-  auto options = HivePartitioningFactoryOptions();
-  options.infer_dictionary = infer_dictionary;
-  auto factory = HivePartitioning::MakeFactory(options);
-  ASSIGN_OR_ABORT(auto schema, factory->Inspect({path}));
-  ASSIGN_OR_ABORT(auto partitioning, factory->Finish(schema));
-  ASSIGN_OR_ABORT(auto expr, partitioning->Parse(path));
-  return expr;
-}
+std::shared_ptr<Scalar> ninety_nine_dict =
+    DictionaryScalar::Make(MakeScalar(0), ArrayFromJSON(int64(), "[99]"));
 
 // A benchmark of SimplifyWithGuarantee using expressions arising from partitioning.
 static void SimplifyFilterWithGuarantee(benchmark::State& state, Expression filter,
@@ -61,11 +54,15 @@ auto filter_cast_negative =
 auto filter_cast_positive =
     and_(equal(call("cast", {field_ref("a")}, to_int64), literal(99)),
          equal(call("cast", {field_ref("b")}, to_int64), literal(99)));
-// A fully simplified partition expression.
-auto guarantee = GetPartitionExpression("a=99/b=99", /*infer_dictionary=*/false);
-// A partition expression that uses dictionaries, which are inferred by default.
-auto guarantee_dictionary =
-    GetPartitionExpression("a=99/b=99", /*infer_dictionary=*/true);
+
+// An unencoded partition expression for "a=99/b=99".
+auto guarantee = and_(equal(field_ref("a"), literal(int64_t(99))),
+                      equal(field_ref("b"), literal(int64_t(99))));
+
+// A partition expression for "a=99/b=99" that uses dictionaries (inferred by default).
+auto guarantee_dictionary = and_(equal(field_ref("a"), literal(ninety_nine_dict)),
+                                 equal(field_ref("b"), literal(ninety_nine_dict)));
+
 // Negative queries (partition expressions that fail the filter)
 BENCHMARK_CAPTURE(SimplifyFilterWithGuarantee, negative_filter_simple_guarantee_simple,
                   filter_simple_negative, guarantee);
@@ -87,5 +84,5 @@ BENCHMARK_CAPTURE(SimplifyFilterWithGuarantee,
 BENCHMARK_CAPTURE(SimplifyFilterWithGuarantee, positive_filter_cast_guarantee_dictionary,
                   filter_cast_positive, guarantee_dictionary);
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
similarity index 99%
rename from cpp/src/arrow/dataset/expression_internal.h
rename to cpp/src/arrow/compute/exec/expression_internal.h
index 24e60377f5a..7b0cc758f57 100644
--- a/cpp/src/arrow/dataset/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 
 #include <unordered_map>
 #include <unordered_set>
@@ -32,7 +32,7 @@ namespace arrow {
 
 using internal::checked_cast;
 
-namespace dataset {
+namespace compute {
 
 inline const Expression::Call* CallNotNull(const Expression& expr) {
   auto call = expr.call();
@@ -338,5 +338,5 @@ Result<Expression> Modify(Expression expr, const PreVisit& pre,
   return post_call(std::move(expr), nullptr);
 }
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
similarity index 98%
rename from cpp/src/arrow/dataset/expression_test.cc
rename to cpp/src/arrow/compute/exec/expression_test.cc
index 2ab796b052f..ab3fbb4d196 100644
--- a/cpp/src/arrow/dataset/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 
 #include <cstdint>
 #include <memory>
@@ -26,9 +26,8 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "arrow/compute/exec/expression_internal.h"
 #include "arrow/compute/registry.h"
-#include "arrow/dataset/expression_internal.h"
-#include "arrow/dataset/test_util.h"
 #include "arrow/testing/gtest_util.h"
 
 using testing::HasSubstr;
@@ -39,7 +38,24 @@ namespace arrow {
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
-namespace dataset {
+namespace compute {
+
+const std::shared_ptr<Schema> kBoringSchema = schema({
+    field("bool", boolean()),
+    field("i8", int8()),
+    field("i32", int32()),
+    field("i32_req", int32(), /*nullable=*/false),
+    field("u32", uint32()),
+    field("i64", int64()),
+    field("f32", float32()),
+    field("f32_req", float32(), /*nullable=*/false),
+    field("f64", float64()),
+    field("date64", date64()),
+    field("str", utf8()),
+    field("dict_str", dictionary(int32(), utf8())),
+    field("dict_i32", dictionary(int32(), int32())),
+    field("ts_ns", timestamp(TimeUnit::NANO)),
+});
 
 #define EXPECT_OK ARROW_EXPECT_OK
 
@@ -1278,5 +1294,5 @@ TEST(Projection, AugmentWithKnownValues) {
       }));
 }
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt
index 14acbc73a48..f2fde70305e 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -22,7 +22,6 @@ arrow_install_all_headers("arrow/dataset")
 set(ARROW_DATASET_SRCS
     dataset.cc
     discovery.cc
-    expression.cc
     file_base.cc
     file_ipc.cc
     partition.cc
@@ -106,7 +105,6 @@ endfunction()
 
 add_arrow_dataset_test(dataset_test)
 add_arrow_dataset_test(discovery_test)
-add_arrow_dataset_test(expression_test)
 add_arrow_dataset_test(file_ipc_test)
 add_arrow_dataset_test(file_test)
 add_arrow_dataset_test(partition_test)
@@ -121,14 +119,11 @@ if(ARROW_PARQUET)
 endif()
 
 if(ARROW_BUILD_BENCHMARKS)
-  add_arrow_benchmark(expression_benchmark PREFIX "arrow-dataset")
   add_arrow_benchmark(file_benchmark PREFIX "arrow-dataset")
 
   if(ARROW_BUILD_STATIC)
-    target_link_libraries(arrow-dataset-expression-benchmark PUBLIC arrow_dataset_static)
     target_link_libraries(arrow-dataset-file-benchmark PUBLIC arrow_dataset_static)
   else()
-    target_link_libraries(arrow-dataset-expression-benchmark PUBLIC arrow_dataset_shared)
     target_link_libraries(arrow-dataset-file-benchmark PUBLIC arrow_dataset_shared)
   endif()
 endif()
diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h
index da9f5ed371e..f3672a0eff5 100644
--- a/cpp/src/arrow/dataset/api.h
+++ b/cpp/src/arrow/dataset/api.h
@@ -19,9 +19,9 @@
 
 #pragma once
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/discovery.h"
-#include "arrow/dataset/expression.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/file_csv.h"
 #include "arrow/dataset/file_ipc.h"
diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index 60d9bd73073..ab0600dd1a8 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -31,7 +31,7 @@
 namespace arrow {
 namespace dataset {
 
-Fragment::Fragment(Expression partition_expression,
+Fragment::Fragment(compute::Expression partition_expression,
                    std::shared_ptr<Schema> physical_schema)
     : partition_expression_(std::move(partition_expression)),
       physical_schema_(std::move(physical_schema)) {}
@@ -58,14 +58,14 @@ Result<std::shared_ptr<Schema>> InMemoryFragment::ReadPhysicalSchemaImpl() {
 
 InMemoryFragment::InMemoryFragment(std::shared_ptr<Schema> schema,
                                    RecordBatchVector record_batches,
-                                   Expression partition_expression)
+                                   compute::Expression partition_expression)
     : Fragment(std::move(partition_expression), std::move(schema)),
       record_batches_(std::move(record_batches)) {
   DCHECK_NE(physical_schema_, nullptr);
 }
 
 InMemoryFragment::InMemoryFragment(RecordBatchVector record_batches,
-                                   Expression partition_expression)
+                                   compute::Expression partition_expression)
     : Fragment(std::move(partition_expression), /*schema=*/nullptr),
       record_batches_(std::move(record_batches)) {
   // Order of argument evaluation is undefined, so compute physical_schema here
@@ -144,7 +144,7 @@ Result<RecordBatchGenerator> InMemoryFragment::ScanBatchesAsync(
                    options->batch_size);
 }
 
-Dataset::Dataset(std::shared_ptr<Schema> schema, Expression partition_expression)
+Dataset::Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_expression)
     : schema_(std::move(schema)),
       partition_expression_(std::move(partition_expression)) {}
 
@@ -158,11 +158,10 @@ Result<std::shared_ptr<ScannerBuilder>> Dataset::NewScan() {
 }
 
 Result<FragmentIterator> Dataset::GetFragments() {
-  ARROW_ASSIGN_OR_RAISE(auto predicate, literal(true).Bind(*schema_));
-  return GetFragments(std::move(predicate));
+  return GetFragments(compute::literal(true));
 }
 
-Result<FragmentIterator> Dataset::GetFragments(Expression predicate) {
+Result<FragmentIterator> Dataset::GetFragments(compute::Expression predicate) {
   ARROW_ASSIGN_OR_RAISE(
       predicate, SimplifyWithGuarantee(std::move(predicate), partition_expression_));
   return predicate.IsSatisfiable() ? GetFragmentsImpl(std::move(predicate))
@@ -228,7 +227,7 @@ Result<std::shared_ptr<Dataset>> InMemoryDataset::ReplaceSchema(
   return std::make_shared<InMemoryDataset>(std::move(schema), get_batches_);
 }
 
-Result<FragmentIterator> InMemoryDataset::GetFragmentsImpl(Expression) {
+Result<FragmentIterator> InMemoryDataset::GetFragmentsImpl(compute::Expression) {
   auto schema = this->schema();
 
   auto create_fragment =
@@ -269,7 +268,7 @@ Result<std::shared_ptr<Dataset>> UnionDataset::ReplaceSchema(
       new UnionDataset(std::move(schema), std::move(children)));
 }
 
-Result<FragmentIterator> UnionDataset::GetFragmentsImpl(Expression predicate) {
+Result<FragmentIterator> UnionDataset::GetFragmentsImpl(compute::Expression predicate) {
   return GetFragmentsFromDatasets(children_, predicate);
 }
 
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index c5c22d731fc..40a60ffd48e 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -25,7 +25,7 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/util/macros.h"
@@ -75,19 +75,21 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
 
   /// \brief An expression which evaluates to true for all data viewed by this
   /// Fragment.
-  const Expression& partition_expression() const { return partition_expression_; }
+  const compute::Expression& partition_expression() const {
+    return partition_expression_;
+  }
 
   virtual ~Fragment() = default;
 
  protected:
   Fragment() = default;
-  explicit Fragment(Expression partition_expression,
+  explicit Fragment(compute::Expression partition_expression,
                     std::shared_ptr<Schema> physical_schema);
 
   virtual Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() = 0;
 
   util::Mutex physical_schema_mutex_;
-  Expression partition_expression_ = literal(true);
+  compute::Expression partition_expression_ = compute::literal(true);
   std::shared_ptr<Schema> physical_schema_;
 };
 
@@ -115,8 +117,9 @@ class ARROW_DS_EXPORT FragmentScanOptions {
 class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
  public:
   InMemoryFragment(std::shared_ptr<Schema> schema, RecordBatchVector record_batches,
-                   Expression = literal(true));
-  explicit InMemoryFragment(RecordBatchVector record_batches, Expression = literal(true));
+                   compute::Expression = compute::literal(true));
+  explicit InMemoryFragment(RecordBatchVector record_batches,
+                            compute::Expression = compute::literal(true));
 
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
   Result<RecordBatchGenerator> ScanBatchesAsync(
@@ -144,14 +147,16 @@ class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
   Result<std::shared_ptr<ScannerBuilder>> NewScan();
 
   /// \brief GetFragments returns an iterator of Fragments given a predicate.
-  Result<FragmentIterator> GetFragments(Expression predicate);
+  Result<FragmentIterator> GetFragments(compute::Expression predicate);
   Result<FragmentIterator> GetFragments();
 
   const std::shared_ptr<Schema>& schema() const { return schema_; }
 
   /// \brief An expression which evaluates to true for all data viewed by this Dataset.
   /// May be null, which indicates no information is available.
-  const Expression& partition_expression() const { return partition_expression_; }
+  const compute::Expression& partition_expression() const {
+    return partition_expression_;
+  }
 
   /// \brief The name identifying the kind of Dataset
   virtual std::string type_name() const = 0;
@@ -168,12 +173,12 @@ class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
  protected:
   explicit Dataset(std::shared_ptr<Schema> schema) : schema_(std::move(schema)) {}
 
-  Dataset(std::shared_ptr<Schema> schema, Expression partition_expression);
+  Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_expression);
 
-  virtual Result<FragmentIterator> GetFragmentsImpl(Expression predicate) = 0;
+  virtual Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) = 0;
 
   std::shared_ptr<Schema> schema_;
-  Expression partition_expression_ = literal(true);
+  compute::Expression partition_expression_ = compute::literal(true);
 };
 
 /// \addtogroup dataset-implementations
@@ -209,7 +214,7 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
       std::shared_ptr<Schema> schema) const override;
 
  protected:
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override;
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
 
   std::shared_ptr<RecordBatchGenerator> get_batches_;
 };
@@ -233,7 +238,7 @@ class ARROW_DS_EXPORT UnionDataset : public Dataset {
       std::shared_ptr<Schema> schema) const override;
 
  protected:
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override;
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
 
   explicit UnionDataset(std::shared_ptr<Schema> schema, DatasetVector children)
       : Dataset(std::move(schema)), children_(std::move(children)) {}
diff --git a/cpp/src/arrow/dataset/dataset_internal.h b/cpp/src/arrow/dataset/dataset_internal.h
index a5ac474754b..6527eac07dd 100644
--- a/cpp/src/arrow/dataset/dataset_internal.h
+++ b/cpp/src/arrow/dataset/dataset_internal.h
@@ -38,7 +38,7 @@ namespace dataset {
 /// \brief GetFragmentsFromDatasets transforms a vector<Dataset> into a
 /// flattened FragmentIterator.
 inline Result<FragmentIterator> GetFragmentsFromDatasets(const DatasetVector& datasets,
-                                                         Expression predicate) {
+                                                         compute::Expression predicate) {
   // Iterator<Dataset>
   auto datasets_it = MakeVectorIterator(datasets);
 
@@ -108,12 +108,13 @@ struct SubtreeImpl {
     expression_codes partition_expression;
   };
 
-  std::unordered_map<Expression, expression_code, Expression::Hash> expr_to_code_;
-  std::vector<Expression> code_to_expr_;
+  std::unordered_map<compute::Expression, expression_code, compute::Expression::Hash>
+      expr_to_code_;
+  std::vector<compute::Expression> code_to_expr_;
   std::unordered_set<expression_codes> subtree_exprs_;
 
   // Encode a subexpression (returning the existing code if possible).
-  expression_code GetOrInsert(const Expression& expr) {
+  expression_code GetOrInsert(const compute::Expression& expr) {
     auto next_code = static_cast<int>(expr_to_code_.size());
     auto it_success = expr_to_code_.emplace(expr, next_code);
 
@@ -124,7 +125,8 @@ struct SubtreeImpl {
   }
 
   // Encode an expression (recursively breaking up conjunction members if possible).
-  void EncodeConjunctionMembers(const Expression& expr, expression_codes* codes) {
+  void EncodeConjunctionMembers(const compute::Expression& expr,
+                                expression_codes* codes) {
     if (auto call = expr.call()) {
       if (call->function_name == "and_kleene") {
         // expr is a conjunction, encode its arguments
@@ -138,7 +140,7 @@ struct SubtreeImpl {
   }
 
   // Convert an encoded subtree or fragment back into an expression.
-  Expression GetSubtreeExpression(const Encoded& encoded_subtree) {
+  compute::Expression GetSubtreeExpression(const Encoded& encoded_subtree) {
     // Filters will already be simplified by all of a subtree's ancestors, so
     // we only need to simplify the filter by the trailing conjunction member
     // of each subtree.
diff --git a/cpp/src/arrow/dataset/discovery.cc b/cpp/src/arrow/dataset/discovery.cc
index a146bd6185e..70b6930bf2f 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -34,7 +34,7 @@
 namespace arrow {
 namespace dataset {
 
-DatasetFactory::DatasetFactory() : root_partition_(literal(true)) {}
+DatasetFactory::DatasetFactory() : root_partition_(compute::literal(true)) {}
 
 Result<std::shared_ptr<Schema>> DatasetFactory::Inspect(InspectOptions options) {
   ARROW_ASSIGN_OR_RAISE(auto schemas, InspectSchemas(std::move(options)));
diff --git a/cpp/src/arrow/dataset/discovery.h b/cpp/src/arrow/dataset/discovery.h
index cfe741a5d17..5559638448f 100644
--- a/cpp/src/arrow/dataset/discovery.h
+++ b/cpp/src/arrow/dataset/discovery.h
@@ -95,9 +95,9 @@ class ARROW_DS_EXPORT DatasetFactory {
   virtual Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) = 0;
 
   /// \brief Optional root partition for the resulting Dataset.
-  const Expression& root_partition() const { return root_partition_; }
+  const compute::Expression& root_partition() const { return root_partition_; }
   /// \brief Set the root partition for the resulting Dataset.
-  Status SetRootPartition(Expression partition) {
+  Status SetRootPartition(compute::Expression partition) {
     root_partition_ = std::move(partition);
     return Status::OK();
   }
@@ -107,7 +107,7 @@ class ARROW_DS_EXPORT DatasetFactory {
  protected:
   DatasetFactory();
 
-  Expression root_partition_;
+  compute::Expression root_partition_;
 };
 
 /// @}
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index d02b094bb43..1f47fc8ae86 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -86,16 +86,17 @@ Result<std::shared_ptr<io::InputStream>> FileSource::OpenCompressed(
 
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
     FileSource source, std::shared_ptr<Schema> physical_schema) {
-  return MakeFragment(std::move(source), literal(true), std::move(physical_schema));
+  return MakeFragment(std::move(source), compute::literal(true),
+                      std::move(physical_schema));
 }
 
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
-    FileSource source, Expression partition_expression) {
+    FileSource source, compute::Expression partition_expression) {
   return MakeFragment(std::move(source), std::move(partition_expression), nullptr);
 }
 
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
-    FileSource source, Expression partition_expression,
+    FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema) {
   return std::shared_ptr<FileFragment>(
       new FileFragment(std::move(source), shared_from_this(),
@@ -172,11 +173,11 @@ struct FileSystemDataset::FragmentSubtrees {
   // Forest for skipping fragments based on extracted subtree expressions
   Forest forest;
   // fragment indices and subtree expressions in forest order
-  std::vector<util::Variant<int, Expression>> fragments_and_subtrees;
+  std::vector<util::Variant<int, compute::Expression>> fragments_and_subtrees;
 };
 
 Result<std::shared_ptr<FileSystemDataset>> FileSystemDataset::Make(
-    std::shared_ptr<Schema> schema, Expression root_partition,
+    std::shared_ptr<Schema> schema, compute::Expression root_partition,
     std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
     std::vector<std::shared_ptr<FileFragment>> fragments) {
   std::shared_ptr<FileSystemDataset> out(
@@ -215,7 +216,7 @@ std::string FileSystemDataset::ToString() const {
     repr += "\n" + fragment->source().path();
 
     const auto& partition = fragment->partition_expression();
-    if (partition != literal(true)) {
+    if (partition != compute::literal(true)) {
       repr += ": " + partition.ToString();
     }
   }
@@ -264,15 +265,16 @@ void FileSystemDataset::SetupSubtreePruning() {
   });
 }
 
-Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(Expression predicate) {
-  if (predicate == literal(true)) {
+Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(
+    compute::Expression predicate) {
+  if (predicate == compute::literal(true)) {
     // trivial predicate; skip subtree pruning
     return MakeVectorIterator(FragmentVector(fragments_.begin(), fragments_.end()));
   }
 
   std::vector<int> fragment_indices;
 
-  std::vector<Expression> predicates{predicate};
+  std::vector<compute::Expression> predicates{predicate};
   RETURN_NOT_OK(subtrees_->forest.Visit(
       [&](Forest::Ref ref) -> Result<bool> {
         if (auto fragment_index =
@@ -282,7 +284,7 @@ Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(Expression predicat
         }
 
         const auto& subtree_expr =
-            util::get<Expression>(subtrees_->fragments_and_subtrees[ref.i]);
+            util::get<compute::Expression>(subtrees_->fragments_and_subtrees[ref.i]);
         ARROW_ASSIGN_OR_RAISE(auto simplified,
                               SimplifyWithGuarantee(predicates.back(), subtree_expr));
 
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 08359881a20..41f7933aca9 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -158,12 +158,12 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
 
   /// \brief Open a fragment
   virtual Result<std::shared_ptr<FileFragment>> MakeFragment(
-      FileSource source, Expression partition_expression,
+      FileSource source, compute::Expression partition_expression,
       std::shared_ptr<Schema> physical_schema);
 
   /// \brief Create a FileFragment for a FileSource.
-  Result<std::shared_ptr<FileFragment>> MakeFragment(FileSource source,
-                                                     Expression partition_expression);
+  Result<std::shared_ptr<FileFragment>> MakeFragment(
+      FileSource source, compute::Expression partition_expression);
 
   /// \brief Create a FileFragment for a FileSource.
   Result<std::shared_ptr<FileFragment>> MakeFragment(
@@ -193,7 +193,8 @@ class ARROW_DS_EXPORT FileFragment : public Fragment {
 
  protected:
   FileFragment(FileSource source, std::shared_ptr<FileFormat> format,
-               Expression partition_expression, std::shared_ptr<Schema> physical_schema)
+               compute::Expression partition_expression,
+               std::shared_ptr<Schema> physical_schema)
       : Fragment(std::move(partition_expression), std::move(physical_schema)),
         source_(std::move(source)),
         format_(std::move(format)) {}
@@ -226,7 +227,7 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   ///
   /// \return A constructed dataset.
   static Result<std::shared_ptr<FileSystemDataset>> Make(
-      std::shared_ptr<Schema> schema, Expression root_partition,
+      std::shared_ptr<Schema> schema, compute::Expression root_partition,
       std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
       std::vector<std::shared_ptr<FileFragment>> fragments);
 
@@ -258,10 +259,11 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   explicit FileSystemDataset(std::shared_ptr<Schema> schema)
       : Dataset(std::move(schema)) {}
 
-  FileSystemDataset(std::shared_ptr<Schema> schema, Expression partition_expression)
+  FileSystemDataset(std::shared_ptr<Schema> schema,
+                    compute::Expression partition_expression)
       : Dataset(std::move(schema), partition_expression) {}
 
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override;
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
 
   void SetupSubtreePruning();
 
diff --git a/cpp/src/arrow/dataset/file_benchmark.cc b/cpp/src/arrow/dataset/file_benchmark.cc
index 238a83bdc1a..5caea18511d 100644
--- a/cpp/src/arrow/dataset/file_benchmark.cc
+++ b/cpp/src/arrow/dataset/file_benchmark.cc
@@ -17,8 +17,8 @@
 
 #include "benchmark/benchmark.h"
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/discovery.h"
-#include "arrow/dataset/expression.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/file_ipc.h"
 #include "arrow/dataset/partition.h"
@@ -62,7 +62,7 @@ static void GetAllFragments(benchmark::State& state) {
   }
 }
 
-static void GetFilteredFragments(benchmark::State& state, Expression filter) {
+static void GetFilteredFragments(benchmark::State& state, compute::Expression filter) {
   auto dataset = GetDataset();
   ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset->schema()));
   for (auto _ : state) {
@@ -71,6 +71,9 @@ static void GetFilteredFragments(benchmark::State& state, Expression filter) {
   }
 }
 
+using compute::field_ref;
+using compute::literal;
+
 BENCHMARK(GetAllFragments);
 // Drill down to a subtree.
 BENCHMARK_CAPTURE(GetFilteredFragments, single_dir, equal(field_ref("a"), literal(90)));
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 497e4128fdf..8dbc18059b3 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -163,7 +163,7 @@ static Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
   return manifest;
 }
 
-static util::optional<Expression> ColumnChunkStatisticsAsExpression(
+static util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
     const SchemaField& schema_field, const parquet::RowGroupMetaData& metadata) {
   // For the remaining of this function, failure to extract/parse statistics
   // are ignored by returning nullptr. The goal is two fold. First
@@ -182,11 +182,11 @@ static util::optional<Expression> ColumnChunkStatisticsAsExpression(
   }
 
   const auto& field = schema_field.field;
-  auto field_expr = field_ref(field->name());
+  auto field_expr = compute::field_ref(field->name());
 
   // Optimize for corner case where all values are nulls
   if (statistics->num_values() == statistics->null_count()) {
-    return equal(std::move(field_expr), literal(MakeNullScalar(field->type())));
+    return equal(std::move(field_expr), compute::literal(MakeNullScalar(field->type())));
   }
 
   std::shared_ptr<Scalar> min, max;
@@ -197,10 +197,13 @@ static util::optional<Expression> ColumnChunkStatisticsAsExpression(
   auto maybe_min = min->CastTo(field->type());
   auto maybe_max = max->CastTo(field->type());
   if (maybe_min.ok() && maybe_max.ok()) {
-    min = maybe_min.MoveValueUnsafe();
-    max = maybe_max.MoveValueUnsafe();
-    return and_(greater_equal(field_expr, literal(min)),
-                less_equal(field_expr, literal(max)));
+    auto lower_bound =
+        compute::greater_equal(field_expr, compute::literal(maybe_min.MoveValueUnsafe()));
+
+    auto upper_bound = compute::less_equal(std::move(field_expr),
+                                           compute::literal(maybe_max.MoveValueUnsafe()));
+
+    return compute::and_(std::move(lower_bound), std::move(upper_bound));
   }
 
   return util::nullopt;
@@ -383,7 +386,7 @@ Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
 }
 
 Result<std::shared_ptr<ParquetFileFragment>> ParquetFileFormat::MakeFragment(
-    FileSource source, Expression partition_expression,
+    FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema, std::vector<int> row_groups) {
   return std::shared_ptr<ParquetFileFragment>(new ParquetFileFragment(
       std::move(source), shared_from_this(), std::move(partition_expression),
@@ -391,7 +394,7 @@ Result<std::shared_ptr<ParquetFileFragment>> ParquetFileFormat::MakeFragment(
 }
 
 Result<std::shared_ptr<FileFragment>> ParquetFileFormat::MakeFragment(
-    FileSource source, Expression partition_expression,
+    FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema) {
   return std::shared_ptr<FileFragment>(new ParquetFileFragment(
       std::move(source), shared_from_this(), std::move(partition_expression),
@@ -447,7 +450,7 @@ Status ParquetFileWriter::FinishInternal() { return parquet_writer_->Close(); }
 
 ParquetFileFragment::ParquetFileFragment(FileSource source,
                                          std::shared_ptr<FileFormat> format,
-                                         Expression partition_expression,
+                                         compute::Expression partition_expression,
                                          std::shared_ptr<Schema> physical_schema,
                                          util::optional<std::vector<int>> row_groups)
     : FileFragment(std::move(source), std::move(format), std::move(partition_expression),
@@ -494,7 +497,7 @@ Status ParquetFileFragment::SetMetadata(
   metadata_ = std::move(metadata);
   manifest_ = std::move(manifest);
 
-  statistics_expressions_.resize(row_groups_->size(), literal(true));
+  statistics_expressions_.resize(row_groups_->size(), compute::literal(true));
   statistics_expressions_complete_.resize(physical_schema_->num_fields(), false);
 
   for (int row_group : *row_groups_) {
@@ -509,7 +512,8 @@ Status ParquetFileFragment::SetMetadata(
   return Status::OK();
 }
 
-Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(Expression predicate) {
+Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(
+    compute::Expression predicate) {
   RETURN_NOT_OK(EnsureCompleteMetadata());
   ARROW_ASSIGN_OR_RAISE(auto row_groups, FilterRowGroups(predicate));
 
@@ -527,7 +531,8 @@ Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(Expression predicate
   return fragments;
 }
 
-Result<std::shared_ptr<Fragment>> ParquetFileFragment::Subset(Expression predicate) {
+Result<std::shared_ptr<Fragment>> ParquetFileFragment::Subset(
+    compute::Expression predicate) {
   RETURN_NOT_OK(EnsureCompleteMetadata());
   ARROW_ASSIGN_OR_RAISE(auto row_groups, FilterRowGroups(predicate));
   return Subset(std::move(row_groups));
@@ -544,15 +549,16 @@ Result<std::shared_ptr<Fragment>> ParquetFileFragment::Subset(
   return new_fragment;
 }
 
-inline void FoldingAnd(Expression* l, Expression r) {
-  if (*l == literal(true)) {
+inline void FoldingAnd(compute::Expression* l, compute::Expression r) {
+  if (*l == compute::literal(true)) {
     *l = std::move(r);
   } else {
     *l = and_(std::move(*l), std::move(r));
   }
 }
 
-Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(Expression predicate) {
+Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(
+    compute::Expression predicate) {
   auto lock = physical_schema_mutex_.Lock();
 
   DCHECK_NE(metadata_, nullptr);
@@ -724,7 +730,7 @@ ParquetDatasetFactory::CollectParquetFragments(const Partitioning& partitioning)
 
     auto partition_expression =
         partitioning.Parse(StripPrefixAndFilename(path, options_.partition_base_dir))
-            .ValueOr(literal(true));
+            .ValueOr(compute::literal(true));
 
     ARROW_ASSIGN_OR_RAISE(
         auto fragment,
@@ -775,8 +781,8 @@ Result<std::shared_ptr<Dataset>> ParquetDatasetFactory::Finish(FinishOptions opt
   }
 
   ARROW_ASSIGN_OR_RAISE(auto fragments, CollectParquetFragments(*partitioning));
-  return FileSystemDataset::Make(std::move(schema), literal(true), format_, filesystem_,
-                                 std::move(fragments));
+  return FileSystemDataset::Make(std::move(schema), compute::literal(true), format_,
+                                 filesystem_, std::move(fragments));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 790e89c24c2..99e18337ad7 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -103,12 +103,12 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   /// \brief Create a Fragment targeting all RowGroups.
   Result<std::shared_ptr<FileFragment>> MakeFragment(
-      FileSource source, Expression partition_expression,
+      FileSource source, compute::Expression partition_expression,
       std::shared_ptr<Schema> physical_schema) override;
 
   /// \brief Create a Fragment, restricted to the specified row groups.
   Result<std::shared_ptr<ParquetFileFragment>> MakeFragment(
-      FileSource source, Expression partition_expression,
+      FileSource source, compute::Expression partition_expression,
       std::shared_ptr<Schema> physical_schema, std::vector<int> row_groups);
 
   /// \brief Return a FileReader on the given source.
@@ -136,7 +136,7 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 /// significant performance boost when scanning high latency file systems.
 class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
  public:
-  Result<FragmentVector> SplitByRowGroup(Expression predicate);
+  Result<FragmentVector> SplitByRowGroup(compute::Expression predicate);
 
   /// \brief Return the RowGroups selected by this fragment.
   const std::vector<int>& row_groups() const {
@@ -152,12 +152,12 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR);
 
   /// \brief Return fragment which selects a filtered subset of this fragment's RowGroups.
-  Result<std::shared_ptr<Fragment>> Subset(Expression predicate);
+  Result<std::shared_ptr<Fragment>> Subset(compute::Expression predicate);
   Result<std::shared_ptr<Fragment>> Subset(std::vector<int> row_group_ids);
 
  private:
   ParquetFileFragment(FileSource source, std::shared_ptr<FileFormat> format,
-                      Expression partition_expression,
+                      compute::Expression partition_expression,
                       std::shared_ptr<Schema> physical_schema,
                       util::optional<std::vector<int>> row_groups);
 
@@ -171,7 +171,7 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   }
 
   /// Return a filtered subset of row group indices.
-  Result<std::vector<int>> FilterRowGroups(Expression predicate);
+  Result<std::vector<int>> FilterRowGroups(compute::Expression predicate);
 
   ParquetFileFormat& parquet_format_;
 
@@ -179,7 +179,7 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   /// or util::nullopt if all row groups are selected.
   util::optional<std::vector<int>> row_groups_;
 
-  std::vector<Expression> statistics_expressions_;
+  std::vector<compute::Expression> statistics_expressions_;
   std::vector<bool> statistics_expressions_complete_;
   std::shared_ptr<parquet::FileMetaData> metadata_;
   std::shared_ptr<parquet::arrow::SchemaManifest> manifest_;
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 04e61c30d41..599c6240c1c 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -157,7 +157,8 @@ class TestParquetFileFormat : public FileFormatFixtureMixin<ParquetFormatHelper>
   }
 
   void CountRowGroupsInFragment(const std::shared_ptr<Fragment>& fragment,
-                                std::vector<int> expected_row_groups, Expression filter) {
+                                std::vector<int> expected_row_groups,
+                                compute::Expression filter) {
     SetFilter(filter);
 
     auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragment);
@@ -271,7 +272,8 @@ class TestParquetFileFormatScan : public FileFormatScanMixin<ParquetFormatHelper
   }
 
   void CountRowGroupsInFragment(const std::shared_ptr<Fragment>& fragment,
-                                std::vector<int> expected_row_groups, Expression filter) {
+                                std::vector<int> expected_row_groups,
+                                compute::Expression filter) {
     SetFilter(filter);
 
     auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(fragment);
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index dbddb5b385b..0c8954e6b7b 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -170,7 +170,7 @@ TEST_F(TestFileSystemDataset, RootPartitionPruning) {
   auto root_partition = equal(field_ref("i32"), literal(5));
   MakeDataset({fs::File("a"), fs::File("b")}, root_partition);
 
-  auto GetFragments = [&](Expression filter) {
+  auto GetFragments = [&](compute::Expression filter) {
     return *dataset_->GetFragments(*filter.Bind(*dataset_->schema()));
   };
 
@@ -204,7 +204,7 @@ TEST_F(TestFileSystemDataset, TreePartitionPruning) {
       fs::Dir("CA"), fs::File("CA/San Francisco"), fs::File("CA/Franklin"),
   };
 
-  std::vector<Expression> partitions = {
+  std::vector<compute::Expression> partitions = {
       equal(field_ref("state"), literal("NY")),
 
       and_(equal(field_ref("state"), literal("NY")),
@@ -234,7 +234,7 @@ TEST_F(TestFileSystemDataset, TreePartitionPruning) {
   // Default filter should always return all data.
   AssertFragmentsAreFromPath(*dataset_->GetFragments(), all_cities);
 
-  auto GetFragments = [&](Expression filter) {
+  auto GetFragments = [&](compute::Expression filter) {
     return *dataset_->GetFragments(*filter.Bind(*dataset_->schema()));
   };
 
@@ -260,7 +260,7 @@ TEST_F(TestFileSystemDataset, FragmentPartitions) {
       fs::Dir("CA"), fs::File("CA/San Francisco"), fs::File("CA/Franklin"),
   };
 
-  std::vector<Expression> partitions = {
+  std::vector<compute::Expression> partitions = {
       equal(field_ref("state"), literal("NY")),
 
       and_(equal(field_ref("state"), literal("NY")),
@@ -566,7 +566,7 @@ TEST(Subtree, EncodeFragments) {
   auto encoded = tree.EncodeFragments(fragments);
   EXPECT_THAT(
       tree.code_to_expr_,
-      ContainerEq(std::vector<Expression>{
+      ContainerEq(std::vector<compute::Expression>{
           equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")),
           equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3"))}));
   EXPECT_THAT(
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index 43ccd777cf2..f6e7b9a0d28 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -53,18 +53,18 @@ std::shared_ptr<Partitioning> Partitioning::Default() {
 
     std::string type_name() const override { return "default"; }
 
-    Result<Expression> Parse(const std::string& path) const override {
-      return literal(true);
+    Result<compute::Expression> Parse(const std::string& path) const override {
+      return compute::literal(true);
     }
 
-    Result<std::string> Format(const Expression& expr) const override {
+    Result<std::string> Format(const compute::Expression& expr) const override {
       return Status::NotImplemented("formatting paths from ", type_name(),
                                     " Partitioning");
     }
 
     Result<PartitionedBatches> Partition(
         const std::shared_ptr<RecordBatch>& batch) const override {
-      return PartitionedBatches{{batch}, {literal(true)}};
+      return PartitionedBatches{{batch}, {compute::literal(true)}};
     }
   };
 
@@ -103,7 +103,7 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
 
   if (key_indices.empty()) {
     // no fields to group by; return the whole batch
-    return PartitionedBatches{{batch}, {literal(true)}};
+    return PartitionedBatches{{batch}, {compute::literal(true)}};
   }
 
   // assemble an ExecBatch of the key columns
@@ -132,14 +132,15 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
   // assemble partition expressions from the unique keys
   out.expressions.resize(grouper->num_groups());
   for (uint32_t group = 0; group < grouper->num_groups(); ++group) {
-    std::vector<Expression> exprs(num_keys);
+    std::vector<compute::Expression> exprs(num_keys);
 
     for (int i = 0; i < num_keys; ++i) {
       ARROW_ASSIGN_OR_RAISE(auto val, unique_arrays[i]->GetScalar(group));
       const auto& name = batch->schema()->field(key_indices[i])->name();
 
-      exprs[i] = val->is_valid ? equal(field_ref(name), literal(std::move(val)))
-                               : is_null(field_ref(name));
+      exprs[i] = val->is_valid ? compute::equal(compute::field_ref(name),
+                                                compute::literal(std::move(val)))
+                               : compute::is_null(compute::field_ref(name));
     }
     out.expressions[group] = and_(std::move(exprs));
   }
@@ -157,10 +158,10 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
   return out;
 }
 
-Result<Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
+Result<compute::Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
   ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(key.name).FindOneOrNone(*schema_));
   if (match.empty()) {
-    return literal(true);
+    return compute::literal(true);
   }
 
   auto field_index = match[0];
@@ -169,7 +170,7 @@ Result<Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
   std::shared_ptr<Scalar> converted;
 
   if (!key.value.has_value()) {
-    return is_null(field_ref(field->name()));
+    return compute::is_null(compute::field_ref(field->name()));
   } else if (field->type()->id() == Type::DICTIONARY) {
     if (dictionaries_.empty() || dictionaries_[field_index] == nullptr) {
       return Status::Invalid("No dictionary provided for dictionary field ",
@@ -201,22 +202,23 @@ Result<Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
     ARROW_ASSIGN_OR_RAISE(converted, Scalar::Parse(field->type(), *key.value));
   }
 
-  return equal(field_ref(field->name()), literal(std::move(converted)));
+  return compute::equal(compute::field_ref(field->name()),
+                        compute::literal(std::move(converted)));
 }
 
-Result<Expression> KeyValuePartitioning::Parse(const std::string& path) const {
-  std::vector<Expression> expressions;
+Result<compute::Expression> KeyValuePartitioning::Parse(const std::string& path) const {
+  std::vector<compute::Expression> expressions;
 
   for (const Key& key : ParseKeys(path)) {
     ARROW_ASSIGN_OR_RAISE(auto expr, ConvertKey(key));
-    if (expr == literal(true)) continue;
+    if (expr == compute::literal(true)) continue;
     expressions.push_back(std::move(expr));
   }
 
   return and_(std::move(expressions));
 }
 
-Result<std::string> KeyValuePartitioning::Format(const Expression& expr) const {
+Result<std::string> KeyValuePartitioning::Format(const compute::Expression& expr) const {
   ScalarVector values{static_cast<size_t>(schema_->num_fields()), nullptr};
 
   ARROW_ASSIGN_OR_RAISE(auto known_values, ExtractKnownFieldValues(expr));
diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h
index 6330725b7a6..36276e7a3b1 100644
--- a/cpp/src/arrow/dataset/partition.h
+++ b/cpp/src/arrow/dataset/partition.h
@@ -26,7 +26,7 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/dataset/expression.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/util/optional.h"
@@ -67,15 +67,15 @@ class ARROW_DS_EXPORT Partitioning {
   /// produce sub-batches which satisfy mutually exclusive Expressions.
   struct PartitionedBatches {
     RecordBatchVector batches;
-    std::vector<Expression> expressions;
+    std::vector<compute::Expression> expressions;
   };
   virtual Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const = 0;
 
   /// \brief Parse a path into a partition expression
-  virtual Result<Expression> Parse(const std::string& path) const = 0;
+  virtual Result<compute::Expression> Parse(const std::string& path) const = 0;
 
-  virtual Result<std::string> Format(const Expression& expr) const = 0;
+  virtual Result<std::string> Format(const compute::Expression& expr) const = 0;
 
   /// \brief A default Partitioning which always yields scalar(true)
   static std::shared_ptr<Partitioning> Default();
@@ -142,9 +142,9 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
   Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>& batch) const override;
 
-  Result<Expression> Parse(const std::string& path) const override;
+  Result<compute::Expression> Parse(const std::string& path) const override;
 
-  Result<std::string> Format(const Expression& expr) const override;
+  Result<std::string> Format(const compute::Expression& expr) const override;
 
  protected:
   KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries)
@@ -159,7 +159,7 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
   virtual Result<std::string> FormatValues(const ScalarVector& values) const = 0;
 
   /// Convert a Key to a full expression.
-  Result<Expression> ConvertKey(const Key& key) const;
+  Result<compute::Expression> ConvertKey(const Key& key) const;
 
   ArrayVector dictionaries_;
 };
@@ -234,9 +234,9 @@ class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
 /// \brief Implementation provided by lambda or other callable
 class ARROW_DS_EXPORT FunctionPartitioning : public Partitioning {
  public:
-  using ParseImpl = std::function<Result<Expression>(const std::string&)>;
+  using ParseImpl = std::function<Result<compute::Expression>(const std::string&)>;
 
-  using FormatImpl = std::function<Result<std::string>(const Expression&)>;
+  using FormatImpl = std::function<Result<std::string>(const compute::Expression&)>;
 
   FunctionPartitioning(std::shared_ptr<Schema> schema, ParseImpl parse_impl,
                        FormatImpl format_impl = NULLPTR, std::string name = "function")
@@ -247,11 +247,11 @@ class ARROW_DS_EXPORT FunctionPartitioning : public Partitioning {
 
   std::string type_name() const override { return name_; }
 
-  Result<Expression> Parse(const std::string& path) const override {
+  Result<compute::Expression> Parse(const std::string& path) const override {
     return parse_impl_(path);
   }
 
-  Result<std::string> Format(const Expression& expr) const override {
+  Result<std::string> Format(const compute::Expression& expr) const override {
     if (format_impl_) {
       return format_impl_(expr);
     }
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index 06c3cc67674..1c776f18329 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -46,17 +46,17 @@ class TestPartitioning : public ::testing::Test {
     ASSERT_RAISES(Invalid, partitioning_->Parse(path));
   }
 
-  void AssertParse(const std::string& path, Expression expected) {
+  void AssertParse(const std::string& path, compute::Expression expected) {
     ASSERT_OK_AND_ASSIGN(auto parsed, partitioning_->Parse(path));
     ASSERT_EQ(parsed, expected);
   }
 
   template <StatusCode code = StatusCode::Invalid>
-  void AssertFormatError(Expression expr) {
+  void AssertFormatError(compute::Expression expr) {
     ASSERT_EQ(partitioning_->Format(expr).status().code(), code);
   }
 
-  void AssertFormat(Expression expr, const std::string& expected) {
+  void AssertFormat(compute::Expression expr, const std::string& expected) {
     // formatted partition expressions are bound to the schema of the dataset being
     // written
     ASSERT_OK_AND_ASSIGN(auto formatted, partitioning_->Format(expr));
@@ -64,7 +64,8 @@ class TestPartitioning : public ::testing::Test {
 
     // ensure the formatted path round trips the relevant components of the partition
     // expression: roundtripped should be a subset of expr
-    ASSERT_OK_AND_ASSIGN(Expression roundtripped, partitioning_->Parse(formatted));
+    ASSERT_OK_AND_ASSIGN(compute::Expression roundtripped,
+                         partitioning_->Parse(formatted));
 
     ASSERT_OK_AND_ASSIGN(roundtripped, roundtripped.Bind(*written_schema_));
     ASSERT_OK_AND_ASSIGN(auto simplified, SimplifyWithGuarantee(roundtripped, expr));
@@ -81,7 +82,7 @@ class TestPartitioning : public ::testing::Test {
   void AssertPartition(const std::shared_ptr<Partitioning> partitioning,
                        const std::shared_ptr<RecordBatch> full_batch,
                        const RecordBatchVector& expected_batches,
-                       const std::vector<Expression>& expected_expressions) {
+                       const std::vector<compute::Expression>& expected_expressions) {
     ASSERT_OK_AND_ASSIGN(auto partition_results, partitioning->Partition(full_batch));
     std::shared_ptr<RecordBatch> rest = full_batch;
     ASSERT_EQ(partition_results.batches.size(), expected_batches.size());
@@ -91,7 +92,8 @@ class TestPartitioning : public ::testing::Test {
       std::shared_ptr<RecordBatch> actual_batch =
           partition_results.batches[partition_index];
       AssertBatchesEqual(*expected_batches[partition_index], *actual_batch);
-      Expression actual_expression = partition_results.expressions[partition_index];
+      compute::Expression actual_expression =
+          partition_results.expressions[partition_index];
       ASSERT_EQ(expected_expressions[partition_index], actual_expression);
     }
   }
@@ -101,7 +103,7 @@ class TestPartitioning : public ::testing::Test {
                        const std::string& record_batch_json,
                        const std::shared_ptr<Schema> partitioned_schema,
                        const std::vector<std::string>& expected_record_batch_strs,
-                       const std::vector<Expression>& expected_expressions) {
+                       const std::vector<compute::Expression>& expected_expressions) {
     auto record_batch = RecordBatchFromJSON(schema, record_batch_json);
     RecordBatchVector expected_batches;
     for (const auto& expected_record_batch_str : expected_record_batch_strs) {
@@ -161,7 +163,7 @@ TEST_F(TestPartitioning, Partition) {
       R"([{"c": 4}])",
   };
 
-  std::vector<Expression> expected_expressions = {
+  std::vector<compute::Expression> expected_expressions = {
       and_(equal(field_ref("a"), literal(3)), equal(field_ref("b"), literal("x"))),
       and_(equal(field_ref("a"), literal(1)), is_null(field_ref("b"))),
       and_(is_null(field_ref("a")), is_null(field_ref("b"))),
@@ -562,7 +564,7 @@ TEST_F(TestPartitioning, EtlThenHive) {
               field("hour", int8()), field("alpha", int32()), field("beta", float32())});
 
   partitioning_ = std::make_shared<FunctionPartitioning>(
-      schm, [&](const std::string& path) -> Result<Expression> {
+      schm, [&](const std::string& path) -> Result<compute::Expression> {
         auto segments = fs::internal::SplitAbstractPath(path);
         if (segments.size() < etl_fields.size() + alphabeta_fields.size()) {
           return Status::Invalid("path ", path, " can't be parsed");
@@ -604,8 +606,8 @@ TEST_F(TestPartitioning, Set) {
   // An adhoc partitioning which parses segments like "/x in [1 4 5]"
   // into (field_ref("x") == 1 or field_ref("x") == 4 or field_ref("x") == 5)
   partitioning_ = std::make_shared<FunctionPartitioning>(
-      schm, [&](const std::string& path) -> Result<Expression> {
-        std::vector<Expression> subexpressions;
+      schm, [&](const std::string& path) -> Result<compute::Expression> {
+        std::vector<compute::Expression> subexpressions;
         for (auto segment : fs::internal::SplitAbstractPath(path)) {
           std::smatch matches;
 
@@ -643,8 +645,8 @@ class RangePartitioning : public Partitioning {
 
   std::string type_name() const override { return "range"; }
 
-  Result<Expression> Parse(const std::string& path) const override {
-    std::vector<Expression> ranges;
+  Result<compute::Expression> Parse(const std::string& path) const override {
+    std::vector<compute::Expression> ranges;
 
     for (auto segment : fs::internal::SplitAbstractPath(path)) {
       auto key = HivePartitioning::ParseKey(segment, "");
@@ -688,7 +690,7 @@ class RangePartitioning : public Partitioning {
     return Status::OK();
   }
 
-  Result<std::string> Format(const Expression&) const override { return ""; }
+  Result<std::string> Format(const compute::Expression&) const override { return ""; }
   Result<PartitionedBatches> Partition(
       const std::shared_ptr<RecordBatch>&) const override {
     return Status::OK();
diff --git a/cpp/src/arrow/dataset/projector.h b/cpp/src/arrow/dataset/projector.h
index d3171fbfb3d..86d38f0af23 100644
--- a/cpp/src/arrow/dataset/projector.h
+++ b/cpp/src/arrow/dataset/projector.h
@@ -25,7 +25,7 @@
 namespace arrow {
 namespace dataset {
 
-// FIXME this is superceded by Expression::Bind
+// FIXME this is superceded by compute::Expression::Bind
 ARROW_DS_EXPORT Status CheckProjectable(const Schema& from, const Schema& to);
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 70f15294650..41fa7ec5c77 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -45,7 +45,7 @@ using FragmentGenerator = std::function<Future<std::shared_ptr<Fragment>>()>;
 std::vector<std::string> ScanOptions::MaterializedFields() const {
   std::vector<std::string> fields;
 
-  for (const Expression* expr : {&filter, &projection}) {
+  for (const compute::Expression* expr : {&filter, &projection}) {
     for (const FieldRef& ref : FieldsInExpression(*expr)) {
       DCHECK(ref.name());
       fields.push_back(*ref.name());
@@ -406,7 +406,7 @@ namespace {
 
 inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
     const std::shared_ptr<Scanner>& scanner, const EnumeratedRecordBatch& in) {
-  ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+  ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
                         SimplifyWithGuarantee(scanner->options()->filter,
                                               in.fragment.value->partition_expression()));
 
@@ -431,7 +431,7 @@ inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
                                   compute::FilterOptions::Defaults(), &exec_context));
   }
 
-  ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+  ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                         SimplifyWithGuarantee(scanner->options()->projection,
                                               in.fragment.value->partition_expression()));
   ARROW_ASSIGN_OR_RAISE(
@@ -685,12 +685,12 @@ Status ScannerBuilder::Project(std::vector<std::string> columns) {
   return SetProjection(scan_options_.get(), std::move(columns));
 }
 
-Status ScannerBuilder::Project(std::vector<Expression> exprs,
+Status ScannerBuilder::Project(std::vector<compute::Expression> exprs,
                                std::vector<std::string> names) {
   return SetProjection(scan_options_.get(), std::move(exprs), std::move(names));
 }
 
-Status ScannerBuilder::Filter(const Expression& filter) {
+Status ScannerBuilder::Filter(const compute::Expression& filter) {
   return SetFilter(scan_options_.get(), filter);
 }
 
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 79e5986a4de..15bd27ab4f3 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -25,8 +25,8 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/dataset.h"
-#include "arrow/dataset/expression.h"
 #include "arrow/dataset/projector.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
@@ -55,9 +55,9 @@ constexpr int32_t kDefaultFragmentReadahead = 8;
 /// Scan-specific options, which can be changed between scans of the same dataset.
 struct ARROW_DS_EXPORT ScanOptions {
   /// A row filter (which will be pushed down to partitioning/reading if supported).
-  Expression filter = literal(true);
+  compute::Expression filter = compute::literal(true);
   /// A projection expression (which can add/remove/rename columns).
-  Expression projection;
+  compute::Expression projection;
 
   /// Schema with which batches will be read from fragments. This is also known as the
   /// "reader schema" it will be used (for example) in constructing CSV file readers to
@@ -333,7 +333,7 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///
   /// \return Failure if any referenced column does not exists in the dataset's
   ///         Schema.
-  Status Project(std::vector<Expression> exprs, std::vector<std::string> names);
+  Status Project(std::vector<compute::Expression> exprs, std::vector<std::string> names);
 
   /// \brief Set the filter expression to return only rows matching the filter.
   ///
@@ -346,7 +346,7 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///
   /// \return Failure if any referenced columns does not exist in the dataset's
   ///         Schema.
-  Status Filter(const Expression& filter);
+  Status Filter(const compute::Expression& filter);
 
   /// \brief Indicate if the Scanner should make use of the available
   ///        ThreadPool found in ScanOptions;
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 507bf82a735..56065d9983e 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -39,7 +39,8 @@ using internal::Executor;
 namespace dataset {
 
 inline Result<std::shared_ptr<RecordBatch>> FilterSingleBatch(
-    const std::shared_ptr<RecordBatch>& in, const Expression& filter, MemoryPool* pool) {
+    const std::shared_ptr<RecordBatch>& in, const compute::Expression& filter,
+    MemoryPool* pool) {
   compute::ExecContext exec_context{pool};
   ARROW_ASSIGN_OR_RAISE(Datum mask,
                         ExecuteScalarExpression(filter, Datum(in), &exec_context));
@@ -58,7 +59,8 @@ inline Result<std::shared_ptr<RecordBatch>> FilterSingleBatch(
   return filtered.record_batch();
 }
 
-inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, Expression filter,
+inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it,
+                                             compute::Expression filter,
                                              MemoryPool* pool) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
@@ -68,7 +70,7 @@ inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, Expression
 }
 
 inline Result<std::shared_ptr<RecordBatch>> ProjectSingleBatch(
-    const std::shared_ptr<RecordBatch>& in, const Expression& projection,
+    const std::shared_ptr<RecordBatch>& in, const compute::Expression& projection,
     MemoryPool* pool) {
   compute::ExecContext exec_context{pool};
   ARROW_ASSIGN_OR_RAISE(Datum projected,
@@ -88,7 +90,8 @@ inline Result<std::shared_ptr<RecordBatch>> ProjectSingleBatch(
 }
 
 inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
-                                              Expression projection, MemoryPool* pool) {
+                                              compute::Expression projection,
+                                              MemoryPool* pool) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
         return ProjectSingleBatch(in, projection, pool);
@@ -98,7 +101,8 @@ inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
 
 class FilterAndProjectScanTask : public ScanTask {
  public:
-  explicit FilterAndProjectScanTask(std::shared_ptr<ScanTask> task, Expression partition)
+  explicit FilterAndProjectScanTask(std::shared_ptr<ScanTask> task,
+                                    compute::Expression partition)
       : ScanTask(task->options(), task->fragment()),
         task_(std::move(task)),
         partition_(std::move(partition)) {}
@@ -106,10 +110,10 @@ class FilterAndProjectScanTask : public ScanTask {
   Result<RecordBatchIterator> Execute() override {
     ARROW_ASSIGN_OR_RAISE(auto it, task_->Execute());
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
                           SimplifyWithGuarantee(options()->filter, partition_));
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                           SimplifyWithGuarantee(options()->projection, partition_));
 
     RecordBatchIterator filter_it =
@@ -122,10 +126,10 @@ class FilterAndProjectScanTask : public ScanTask {
   Result<RecordBatchIterator> ToFilteredAndProjectedIterator(
       const RecordBatchVector& rbs) {
     auto it = MakeVectorIterator(rbs);
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
                           SimplifyWithGuarantee(options()->filter, partition_));
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                           SimplifyWithGuarantee(options()->projection, partition_));
 
     RecordBatchIterator filter_it =
@@ -137,10 +141,10 @@ class FilterAndProjectScanTask : public ScanTask {
 
   Result<std::shared_ptr<RecordBatch>> FilterAndProjectBatch(
       const std::shared_ptr<RecordBatch>& batch) {
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
                           SimplifyWithGuarantee(options()->filter, partition_));
 
-    ARROW_ASSIGN_OR_RAISE(Expression simplified_projection,
+    ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                           SimplifyWithGuarantee(options()->projection, partition_));
     ARROW_ASSIGN_OR_RAISE(auto filtered,
                           FilterSingleBatch(batch, simplified_filter, options_->pool));
@@ -174,7 +178,7 @@ class FilterAndProjectScanTask : public ScanTask {
 
  private:
   std::shared_ptr<ScanTask> task_;
-  Expression partition_;
+  compute::Expression partition_;
 };
 
 /// \brief GetScanTaskIterator transforms an Iterator<Fragment> in a
@@ -209,7 +213,7 @@ inline Status NestedFieldRefsNotImplemented() {
   return Status::NotImplemented("Nested field references in scans.");
 }
 
-inline Status SetProjection(ScanOptions* options, const Expression& projection) {
+inline Status SetProjection(ScanOptions* options, const compute::Expression& projection) {
   ARROW_ASSIGN_OR_RAISE(options->projection, projection.Bind(*options->dataset_schema));
 
   if (options->projection.type()->id() != Type::STRUCT) {
@@ -223,7 +227,7 @@ inline Status SetProjection(ScanOptions* options, const Expression& projection)
   return Status::OK();
 }
 
-inline Status SetProjection(ScanOptions* options, std::vector<Expression> exprs,
+inline Status SetProjection(ScanOptions* options, std::vector<compute::Expression> exprs,
                             std::vector<std::string> names) {
   compute::ProjectOptions project_options{std::move(names)};
 
@@ -243,14 +247,14 @@ inline Status SetProjection(ScanOptions* options, std::vector<Expression> exprs,
 }
 
 inline Status SetProjection(ScanOptions* options, std::vector<std::string> names) {
-  std::vector<Expression> exprs(names.size());
+  std::vector<compute::Expression> exprs(names.size());
   for (size_t i = 0; i < exprs.size(); ++i) {
-    exprs[i] = field_ref(names[i]);
+    exprs[i] = compute::field_ref(names[i]);
   }
   return SetProjection(options, std::move(exprs), std::move(names));
 }
 
-inline Status SetFilter(ScanOptions* options, const Expression& filter) {
+inline Status SetFilter(ScanOptions* options, const compute::Expression& filter) {
   for (const auto& ref : FieldsInExpression(filter)) {
     if (!ref.name()) return NestedFieldRefsNotImplemented();
 
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index a83210fdd3b..17f4e079ae4 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -566,7 +566,7 @@ class ControlledDataset : public Dataset {
   void FinishFragment(int fragment_index) { fragments_[fragment_index]->Finish(); }
 
  protected:
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override {
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override {
     std::vector<std::shared_ptr<Fragment>> casted_fragments(fragments_.begin(),
                                                             fragments_.end());
     return MakeVectorIterator(std::move(casted_fragments));
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 0a65099ce07..83ae4bbf1e8 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -31,6 +31,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "arrow/compute/exec/expression.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/discovery.h"
 #include "arrow/dataset/file_base.h"
@@ -54,22 +55,22 @@
 namespace arrow {
 namespace dataset {
 
-const std::shared_ptr<Schema> kBoringSchema = schema({
-    field("bool", boolean()),
-    field("i8", int8()),
-    field("i32", int32()),
-    field("i32_req", int32(), /*nullable=*/false),
-    field("u32", uint32()),
-    field("i64", int64()),
-    field("f32", float32()),
-    field("f32_req", float32(), /*nullable=*/false),
-    field("f64", float64()),
-    field("date64", date64()),
-    field("str", utf8()),
-    field("dict_str", dictionary(int32(), utf8())),
-    field("dict_i32", dictionary(int32(), int32())),
-    field("ts_ns", timestamp(TimeUnit::NANO)),
-});
+using compute::call;
+using compute::field_ref;
+using compute::literal;
+
+using compute::and_;
+using compute::equal;
+using compute::greater;
+using compute::greater_equal;
+using compute::is_null;
+using compute::is_valid;
+using compute::less;
+using compute::less_equal;
+using compute::not_;
+using compute::not_equal;
+using compute::or_;
+using compute::project;
 
 using fs::internal::GetAbstractPathExtension;
 using internal::checked_cast;
@@ -125,13 +126,15 @@ class FragmentDataset : public Dataset {
  public:
   FragmentDataset(std::shared_ptr<Schema> schema, FragmentVector fragments)
       : Dataset(std::move(schema)), fragments_(std::move(fragments)) {}
+
   std::string type_name() const override { return "fragment"; }
+
   Result<std::shared_ptr<Dataset>> ReplaceSchema(std::shared_ptr<Schema>) const override {
     return Status::NotImplemented("");
   }
 
  protected:
-  Result<FragmentIterator> GetFragmentsImpl(Expression predicate) override {
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override {
     return MakeVectorIterator(fragments_);
   }
   FragmentVector fragments_;
@@ -288,7 +291,7 @@ class DatasetFixtureMixin : public ::testing::Test {
     SetFilter(literal(true));
   }
 
-  void SetFilter(Expression filter) {
+  void SetFilter(compute::Expression filter) {
     ASSERT_OK_AND_ASSIGN(options_->filter, filter.Bind(*schema_));
   }
 
@@ -381,7 +384,7 @@ class FileFormatFixtureMixin : public ::testing::Test {
     ASSERT_OK(SetProjection(opts_.get(), opts_->dataset_schema->field_names()));
   }
 
-  void SetFilter(Expression filter) {
+  void SetFilter(compute::Expression filter) {
     ASSERT_OK_AND_ASSIGN(opts_->filter, filter.Bind(*opts_->dataset_schema));
   }
 
@@ -740,9 +743,9 @@ struct MakeFileSystemDatasetMixin {
   }
 
   void MakeDataset(const std::vector<fs::FileInfo>& infos,
-                   Expression root_partition = literal(true),
-                   std::vector<Expression> partitions = {},
-                   std::shared_ptr<Schema> s = kBoringSchema) {
+                   compute::Expression root_partition = literal(true),
+                   std::vector<compute::Expression> partitions = {},
+                   std::shared_ptr<Schema> s = schema({})) {
     auto n_fragments = infos.size();
     if (partitions.empty()) {
       partitions.resize(n_fragments, literal(true));
@@ -801,8 +804,9 @@ void AssertFragmentsAreFromPath(FragmentIterator it, std::vector<std::string> ex
               testing::UnorderedElementsAreArray(expected));
 }
 
-static std::vector<Expression> PartitionExpressionsOf(const FragmentVector& fragments) {
-  std::vector<Expression> partition_expressions;
+static std::vector<compute::Expression> PartitionExpressionsOf(
+    const FragmentVector& fragments) {
+  std::vector<compute::Expression> partition_expressions;
   std::transform(fragments.begin(), fragments.end(),
                  std::back_inserter(partition_expressions),
                  [](const std::shared_ptr<Fragment>& fragment) {
@@ -812,7 +816,7 @@ static std::vector<Expression> PartitionExpressionsOf(const FragmentVector& frag
 }
 
 void AssertFragmentsHavePartitionExpressions(std::shared_ptr<Dataset> dataset,
-                                             std::vector<Expression> expected) {
+                                             std::vector<compute::Expression> expected) {
   ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset->GetFragments());
   for (auto& expr : expected) {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*dataset->schema()));
diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h
index 6ba65a63afd..0a4148c49e1 100644
--- a/cpp/src/arrow/dataset/type_fwd.h
+++ b/cpp/src/arrow/dataset/type_fwd.h
@@ -30,6 +30,7 @@ namespace arrow {
 namespace compute {
 
 class ExecContext;
+class Expression;
 
 }  // namespace compute
 
@@ -73,8 +74,6 @@ class ParquetFragmentScanOptions;
 class ParquetFileWriter;
 class ParquetFileWriteOptions;
 
-class Expression;
-
 class Partitioning;
 class PartitioningFactory;
 class PartitioningOrFactory;
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 82e1c8f13a2..bff1a2bbb54 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -32,28 +32,37 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         pass
 
 
-cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
+cdef extern from "arrow/compute/exec/expression.h" \
+        namespace "arrow::compute" nogil:
 
-    cdef cppclass CExpression "arrow::dataset::Expression":
+    cdef cppclass CExpression "arrow::compute::Expression":
         c_bool Equals(const CExpression& other) const
         c_string ToString() const
         CResult[CExpression] Bind(const CSchema&)
 
     cdef CExpression CMakeScalarExpression \
-        "arrow::dataset::literal"(shared_ptr[CScalar] value)
+        "arrow::compute::literal"(shared_ptr[CScalar] value)
 
     cdef CExpression CMakeFieldExpression \
-        "arrow::dataset::field_ref"(c_string name)
+        "arrow::compute::field_ref"(c_string name)
 
     cdef CExpression CMakeCallExpression \
-        "arrow::dataset::call"(c_string function,
+        "arrow::compute::call"(c_string function,
                                vector[CExpression] arguments,
                                shared_ptr[CFunctionOptions] options)
 
     cdef CResult[shared_ptr[CBuffer]] CSerializeExpression \
-        "arrow::dataset::Serialize"(const CExpression&)
+        "arrow::compute::Serialize"(const CExpression&)
+
     cdef CResult[CExpression] CDeserializeExpression \
-        "arrow::dataset::Deserialize"(shared_ptr[CBuffer])
+        "arrow::compute::Deserialize"(shared_ptr[CBuffer])
+
+    cdef CResult[unordered_map[CFieldRef, CDatum, CFieldRefHash]] \
+        CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"(
+            const CExpression& partition_expression)
+
+
+cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
     cdef cppclass CScanOptions "arrow::dataset::ScanOptions":
         @staticmethod
@@ -331,10 +340,6 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioning] partitioning() const
         shared_ptr[CPartitioningFactory] factory() const
 
-    cdef CResult[unordered_map[CFieldRef, CDatum, CFieldRefHash]] \
-        CExtractKnownFieldValues "arrow::dataset::ExtractKnownFieldValues"(
-            const CExpression& partition_expression)
-
     cdef cppclass CFileSystemFactoryOptions \
             "arrow::dataset::FileSystemFactoryOptions":
         CPartitioningOrFactory partitioning
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 117e3de5c22..607177235e9 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -162,6 +162,7 @@ export(ParquetArrowReaderProperties)
 export(ParquetFileFormat)
 export(ParquetFileReader)
 export(ParquetFileWriter)
+export(ParquetFragmentScanOptions)
 export(ParquetVersionType)
 export(ParquetWriterProperties)
 export(Partitioning)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 51cdcf85df0..b8d72c30ed6 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -756,24 +756,24 @@ FixedSizeListType__list_size <- function(type){
     .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
-dataset___expr__call <- function(func_name, argument_list, options){
-    .Call(`_arrow_dataset___expr__call`, func_name, argument_list, options)
+compute___expr__call <- function(func_name, argument_list, options){
+    .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-dataset___expr__field_ref <- function(name){
-    .Call(`_arrow_dataset___expr__field_ref`, name)
+compute___expr__field_ref <- function(name){
+    .Call(`_arrow_compute___expr__field_ref`, name)
 }
 
-dataset___expr__get_field_ref_name <- function(ref){
-    .Call(`_arrow_dataset___expr__get_field_ref_name`, ref)
+compute___expr__get_field_ref_name <- function(x){
+    .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
-dataset___expr__scalar <- function(x){
-    .Call(`_arrow_dataset___expr__scalar`, x)
+compute___expr__scalar <- function(x){
+    .Call(`_arrow_compute___expr__scalar`, x)
 }
 
-dataset___expr__ToString <- function(x){
-    .Call(`_arrow_dataset___expr__ToString`, x)
+compute___expr__ToString <- function(x){
+    .Call(`_arrow_compute___expr__ToString`, x)
 }
 
 ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
diff --git a/r/R/expression.R b/r/R/expression.R
index b3fdd52a5d0..1e104677d8b 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -253,7 +253,7 @@ print.array_expression <- function(x, ...) {
 #' @export
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
-    ToString = function() dataset___expr__ToString(self),
+    ToString = function() compute___expr__ToString(self),
     cast = function(to_type, safe = TRUE, ...) {
       opts <- list(
         to_type = to_type,
@@ -265,7 +265,7 @@ Expression <- R6Class("Expression", inherit = ArrowObject,
     }
   ),
   active = list(
-    field_name = function() dataset___expr__get_field_ref_name(self)
+    field_name = function() compute___expr__get_field_ref_name(self)
   )
 )
 Expression$create <- function(function_name,
@@ -273,14 +273,14 @@ Expression$create <- function(function_name,
                               args = list(...),
                               options = empty_named_list()) {
   assert_that(is.string(function_name))
-  dataset___expr__call(function_name, args, options)
+  compute___expr__call(function_name, args, options)
 }
 Expression$field_ref <- function(name) {
   assert_that(is.string(name))
-  dataset___expr__field_ref(name)
+  compute___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
-  dataset___expr__scalar(Scalar$create(x))
+  compute___expr__scalar(Scalar$create(x))
 }
 
 build_dataset_expression <- function(FUN,
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index 795027e1f24..b8d4dc01bad 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -28,11 +28,8 @@ delimiter for text files
 
 `format = "parquet"``:
 \itemize{
-\item \code{use_buffered_stream}: Read files through buffered input streams rather than
-loading entire row groups at once. This may be enabled
-to reduce memory overhead. Disabled by default.
-\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB.
 \item \code{dict_columns}: Names of columns which should be read as dictionaries.
+\item Any Parquet options from \link{FragmentScanOptions}.
 }
 
 \code{format = "text"}: see \link{CsvParseOptions}. Note that you can specify them either
diff --git a/r/man/FragmentScanOptions.Rd b/r/man/FragmentScanOptions.Rd
index 8bafbb0b21c..103d0589505 100644
--- a/r/man/FragmentScanOptions.Rd
+++ b/r/man/FragmentScanOptions.Rd
@@ -3,6 +3,7 @@
 \name{FragmentScanOptions}
 \alias{FragmentScanOptions}
 \alias{CsvFragmentScanOptions}
+\alias{ParquetFragmentScanOptions}
 \title{Format-specific scan options}
 \description{
 A \code{FragmentScanOptions} holds options specific to a \code{FileFormat} and a scan
@@ -14,14 +15,24 @@ operation.
 \itemize{
 \item \code{format}: A string identifier of the file format. Currently supported values:
 \itemize{
+\item "parquet"
 \item "csv"/"text", aliases for the same format.
 }
 \item \code{...}: Additional format-specific options
 
+`format = "parquet"``:
+\itemize{
+\item \code{use_buffered_stream}: Read files through buffered input streams rather than
+loading entire row groups at once. This may be enabled
+to reduce memory overhead. Disabled by default.
+\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB.
+\item \code{pre_buffer}: Pre-buffer the raw Parquet data. This can improve performance
+on high-latency filesystems. Disabled by default.
 \code{format = "text"}: see \link{CsvConvertOptions}. Note that options can only be
 specified with the Arrow C++ library naming. Also, "block_size" from
 \link{CsvReadOptions} may be given.
 }
+}
 
 It returns the appropriate subclass of \code{FragmentScanOptions}
 (e.g. \code{CsvFragmentScanOptions}).
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 0c19402a045..ca6d32a895a 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -26,6 +26,8 @@ Useful links:
 
 Authors:
 \itemize{
+  \item Ian Cook \email{ianmcook@gmail.com}
+  \item Jonathan Keane \email{jkeane@gmail.com}
   \item Romain François \email{romain@rstudio.com} (\href{https://orcid.org/0000-0002-2444-4226}{ORCID})
   \item Jeroen Ooms \email{jeroen@berkeley.edu}
   \item Apache Arrow \email{dev@arrow.apache.org} [copyright holder]
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index c5ef6343ced..3feef14a873 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1855,11 +1855,11 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectNames(SEXP sb_sexp, SEXP
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::vector<std::shared_ptr<ds::Expression>>& exprs, const std::vector<std::string>& names);
+void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::vector<std::shared_ptr<compute::Expression>>& exprs, const std::vector<std::string>& names);
 extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP exprs_sexp, SEXP names_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<ds::ScannerBuilder>&>::type sb(sb_sexp);
-	arrow::r::Input<const std::vector<std::shared_ptr<ds::Expression>>&>::type exprs(exprs_sexp);
+	arrow::r::Input<const std::vector<std::shared_ptr<compute::Expression>>&>::type exprs(exprs_sexp);
 	arrow::r::Input<const std::vector<std::string>&>::type names(names_sexp);
 	dataset___ScannerBuilder__ProjectExprs(sb, exprs, names);
 	return R_NilValue;
@@ -1873,11 +1873,11 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-void dataset___ScannerBuilder__Filter(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::shared_ptr<ds::Expression>& expr);
+void dataset___ScannerBuilder__Filter(const std::shared_ptr<ds::ScannerBuilder>& sb, const std::shared_ptr<compute::Expression>& expr);
 extern "C" SEXP _arrow_dataset___ScannerBuilder__Filter(SEXP sb_sexp, SEXP expr_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<ds::ScannerBuilder>&>::type sb(sb_sexp);
-	arrow::r::Input<const std::shared_ptr<ds::Expression>&>::type expr(expr_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type expr(expr_sexp);
 	dataset___ScannerBuilder__Filter(sb, expr);
 	return R_NilValue;
 END_CPP11
@@ -2927,79 +2927,79 @@ extern "C" SEXP _arrow_FixedSizeListType__list_size(SEXP type_sexp){
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::Expression> dataset___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options);
-extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options);
+extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
 BEGIN_CPP11
 	arrow::r::Input<std::string>::type func_name(func_name_sexp);
 	arrow::r::Input<cpp11::list>::type argument_list(argument_list_sexp);
 	arrow::r::Input<cpp11::list>::type options(options_sexp);
-	return cpp11::as_sexp(dataset___expr__call(func_name, argument_list, options));
+	return cpp11::as_sexp(compute___expr__call(func_name, argument_list, options));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
-	Rf_error("Cannot call dataset___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){
+	Rf_error("Cannot call compute___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::Expression> dataset___expr__field_ref(std::string name);
-extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name);
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
 BEGIN_CPP11
 	arrow::r::Input<std::string>::type name(name_sexp);
-	return cpp11::as_sexp(dataset___expr__field_ref(name));
+	return cpp11::as_sexp(compute___expr__field_ref(name));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){
-	Rf_error("Cannot call dataset___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+	Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::string dataset___expr__get_field_ref_name(const std::shared_ptr<ds::Expression>& ref);
-extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::string compute___expr__get_field_ref_name(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Expression>&>::type ref(ref_sexp);
-	return cpp11::as_sexp(dataset___expr__get_field_ref_name(ref));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(compute___expr__get_field_ref_name(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){
-	Rf_error("Cannot call dataset___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){
+	Rf_error("Cannot call compute___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::Expression> dataset___expr__scalar(const std::shared_ptr<arrow::Scalar>& x);
-extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__scalar(const std::shared_ptr<arrow::Scalar>& x);
+extern "C" SEXP _arrow_compute___expr__scalar(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Scalar>&>::type x(x_sexp);
-	return cpp11::as_sexp(dataset___expr__scalar(x));
+	return cpp11::as_sexp(compute___expr__scalar(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){
-	Rf_error("Cannot call dataset___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__scalar(SEXP x_sexp){
+	Rf_error("Cannot call compute___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // expression.cpp
-#if defined(ARROW_R_WITH_DATASET)
-std::string dataset___expr__ToString(const std::shared_ptr<ds::Expression>& x);
-extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){
+#if defined(ARROW_R_WITH_ARROW)
+std::string compute___expr__ToString(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<ds::Expression>&>::type x(x_sexp);
-	return cpp11::as_sexp(dataset___expr__ToString(x));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(compute___expr__ToString(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){
-	Rf_error("Cannot call dataset___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){
+	Rf_error("Cannot call compute___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -6793,11 +6793,11 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, 
 		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, 
 		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, 
-		{ "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, 
-		{ "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, 
-		{ "_arrow_dataset___expr__get_field_ref_name", (DL_FUNC) &_arrow_dataset___expr__get_field_ref_name, 1}, 
-		{ "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, 
-		{ "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, 
+		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, 
+		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
+		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
+		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
+		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
 		{ "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, 
 		{ "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, 
 		{ "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index b94ab764729..5f7c725ffec 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -55,6 +55,7 @@
 namespace ds = ::arrow::dataset;
 #endif
 
+namespace compute = ::arrow::compute;
 namespace fs = ::arrow::fs;
 
 SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index f4d7746eb10..7d8ccae6eee 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -31,6 +31,7 @@
 
 namespace ds = ::arrow::dataset;
 namespace fs = ::arrow::fs;
+namespace compute = ::arrow::compute;
 
 namespace cpp11 {
 
@@ -370,10 +371,10 @@ void dataset___ScannerBuilder__ProjectNames(const std::shared_ptr<ds::ScannerBui
 // [[dataset::export]]
 void dataset___ScannerBuilder__ProjectExprs(
     const std::shared_ptr<ds::ScannerBuilder>& sb,
-    const std::vector<std::shared_ptr<ds::Expression>>& exprs,
+    const std::vector<std::shared_ptr<compute::Expression>>& exprs,
     const std::vector<std::string>& names) {
   // We have shared_ptrs of expressions but need the Expressions
-  std::vector<ds::Expression> expressions;
+  std::vector<compute::Expression> expressions;
   for (auto expr : exprs) {
     expressions.push_back(*expr);
   }
@@ -382,7 +383,7 @@ void dataset___ScannerBuilder__ProjectExprs(
 
 // [[dataset::export]]
 void dataset___ScannerBuilder__Filter(const std::shared_ptr<ds::ScannerBuilder>& sb,
-                                      const std::shared_ptr<ds::Expression>& expr) {
+                                      const std::shared_ptr<compute::Expression>& expr) {
   StopIfNotOk(sb->Filter(*expr));
 }
 
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index 0e8fd52034d..798853edd72 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -17,54 +17,54 @@
 
 #include "./arrow_types.h"
 
-#if defined(ARROW_R_WITH_DATASET)
+#if defined(ARROW_R_WITH_ARROW)
 
 #include <arrow/compute/api_scalar.h>
-#include <arrow/dataset/api.h>
-namespace ds = ::arrow::dataset;
+#include <arrow/compute/exec/expression.h>
 
-std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
-    std::string func_name, cpp11::list options);
+namespace compute = ::arrow::compute;
 
-// [[dataset::export]]
-std::shared_ptr<ds::Expression> dataset___expr__call(std::string func_name,
-                                                     cpp11::list argument_list,
-                                                     cpp11::list options) {
-  std::vector<ds::Expression> arguments;
+std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
+                                                               cpp11::list options);
+
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name,
+                                                          cpp11::list argument_list,
+                                                          cpp11::list options) {
+  std::vector<compute::Expression> arguments;
   for (SEXP argument : argument_list) {
-    auto argument_ptr = cpp11::as_cpp<std::shared_ptr<ds::Expression>>(argument);
+    auto argument_ptr = cpp11::as_cpp<std::shared_ptr<compute::Expression>>(argument);
     arguments.push_back(*argument_ptr);
   }
 
   auto options_ptr = make_compute_options(func_name, options);
 
-  return std::make_shared<ds::Expression>(
-      ds::call(std::move(func_name), std::move(arguments), std::move(options_ptr)));
+  return std::make_shared<compute::Expression>(
+      compute::call(std::move(func_name), std::move(arguments), std::move(options_ptr)));
 }
 
-// [[dataset::export]]
-std::shared_ptr<ds::Expression> dataset___expr__field_ref(std::string name) {
-  return std::make_shared<ds::Expression>(ds::field_ref(std::move(name)));
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name) {
+  return std::make_shared<compute::Expression>(compute::field_ref(std::move(name)));
 }
 
-// [[dataset::export]]
-std::string dataset___expr__get_field_ref_name(
-    const std::shared_ptr<ds::Expression>& ref) {
-  auto field_ref = ref->field_ref();
-  if (field_ref == nullptr) {
-    return "";
+// [[arrow::export]]
+std::string compute___expr__get_field_ref_name(
+    const std::shared_ptr<compute::Expression>& x) {
+  if (auto field_ref = x->field_ref()) {
+    return *field_ref->name();
   }
-  return *field_ref->name();
+  return "";
 }
 
-// [[dataset::export]]
-std::shared_ptr<ds::Expression> dataset___expr__scalar(
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__scalar(
     const std::shared_ptr<arrow::Scalar>& x) {
-  return std::make_shared<ds::Expression>(ds::literal(std::move(x)));
+  return std::make_shared<compute::Expression>(compute::literal(std::move(x)));
 }
 
-// [[dataset::export]]
-std::string dataset___expr__ToString(const std::shared_ptr<ds::Expression>& x) {
+// [[arrow::export]]
+std::string compute___expr__ToString(const std::shared_ptr<compute::Expression>& x) {
   return x->ToString();
 }
 

From 308e7e9a2861c23981e304e446a4d821112f49a9 Mon Sep 17 00:00:00 2001
From: ZMZ <zmz@yanhuangdata.com>
Date: Fri, 30 Apr 2021 16:26:33 +0800
Subject: [PATCH 153/719] support null in makeif

---
 cpp/src/gandiva/llvm_generator.cc  |  3 ++
 cpp/src/gandiva/tests/null_test.cc | 47 +++++++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index f7f1d464474..dcbe6c3384a 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -1145,6 +1145,9 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition,
   // Emit the merge block.
   builder->SetInsertPoint(merge_bb);
   auto llvm_type = types->IRType(result_type->id());
+  if (llvm_type == nullptr) {
+    return nullptr;
+  }
   llvm::PHINode* result_value = builder->CreatePHI(llvm_type, 2, "res_value");
   result_value->addIncoming(then_lvalue->data(), then_bb);
   result_value->addIncoming(else_lvalue->data(), else_bb);
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
index e018ab0dff4..db67117c3ef 100644
--- a/cpp/src/gandiva/tests/null_test.cc
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -57,10 +57,7 @@ TEST_F(TestNull, TestSimple) {
   EXPECT_TRUE(status.ok()) << status.message();
 
   arrow::ArrayVector outputs;
-  auto nb = std::make_shared<arrow::NullBuilder>();
-  auto _ = nb->AppendNulls(4);
-  std::shared_ptr<arrow::NullArray> null_array;
-  _ = nb->Finish(&null_array);
+  auto null_array = std::make_shared<arrow::NullArray>(4);
   auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
   status = projector->Evaluate(*in_batch, pool_, &outputs);
   EXPECT_TRUE(status.ok());
@@ -106,10 +103,7 @@ TEST_F(TestNull, TestOps) {
   EXPECT_TRUE(status.ok()) << status.message();
 
   arrow::ArrayVector outputs;
-  auto nb = std::make_shared<arrow::NullBuilder>();
-  auto _ = nb->AppendNulls(4);
-  std::shared_ptr<arrow::NullArray> null_array;
-  _ = nb->Finish(&null_array);
+  auto null_array = std::make_shared<arrow::NullArray>(4);
   auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
   status = projector->Evaluate(*in_batch, pool_, &outputs);
   EXPECT_TRUE(status.ok());
@@ -125,4 +119,41 @@ TEST_F(TestNull, TestOps) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(7));
 }
 
+TEST_F(TestNull, TestMakeIf) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+
+  auto null_node = TreeExprBuilder::MakeNull(null());
+  auto expr_1 = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeIf(TreeExprBuilder::MakeLiteral(true), null_node, null_node,
+                              null()),
+      res_1);
+  auto expr_2 = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeIf(TreeExprBuilder::MakeLiteral(false), null_node, null_node,
+                              null()),
+      res_2);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {expr_1, expr_2}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto null_array = std::make_shared<arrow::NullArray>(4);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  for (auto& output : outputs) {
+    EXPECT_EQ(output->null_count(), 4);
+  }
+}
+
 }  // namespace gandiva

From 9da02ce4b2ba192fb51f5419511a0721a84ad5fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 30 Apr 2021 13:56:23 +0200
Subject: [PATCH 154/719] ARROW-12600: [CI] Push docker images from crossbow
 tasks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Manually submitted build: https://github.com/ursacomputing/crossbow/runs/2467679252

Closes #10197 from kszucs/push-docker-images-from-crossbow-tasks

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/tasks/docker-tests/github.linux.yml | 21 +++++++----
 dev/tasks/tasks.yml                     | 46 ++++++++++++++-----------
 2 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 255c9ac14c4..06837a88ded 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -23,6 +23,12 @@ jobs:
   test:
     name: Docker Test
     runs-on: ubuntu-latest
+  {% if env is defined %}
+    env:
+    {% for key, value in env.items() %}
+      {{ key }}: {{ value }}
+    {% endfor %}
+  {% endif %}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_install_archery()|indent }}
@@ -33,10 +39,11 @@ jobs:
 
       - name: Execute Docker Build
         shell: bash
-        {% if env is defined %}
-        env:
-        {% for key, value in env.items() %}
-          {{ key }}: {{ value }}
-        {% endfor %}
-        {% endif %}
-        run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ run }}
+        run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ flags|default("") }} {{ image }}
+
+    {% if arrow.branch == 'master' %}
+      {{ macros.github_login_dockerhub()|indent }}
+      - name: Push Docker Image
+        shell: bash
+        run: archery docker push {{ image }}
+    {% endif %}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 7c542536dbf..b074407cc63 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1234,7 +1234,7 @@ tasks:
     ci: github
     template: docker-tests/github.linux.yml
     params:
-      run: {{ image }}
+      image: {{ image }}
 {% endfor %}
 
 {% for ubuntu_version in ["18.04", "20.04"] %}
@@ -1244,7 +1244,7 @@ tasks:
     params:
       env:
         UBUNTU: {{ ubuntu_version }}
-      run: ubuntu-cpp
+      image: ubuntu-cpp
 {% endfor %}
 
   test-debian-10-cpp:
@@ -1253,7 +1253,7 @@ tasks:
     params:
       env:
         DEBIAN: 10
-      run: debian-cpp
+      image: debian-cpp
 
   test-fedora-33-cpp:
     ci: github
@@ -1261,7 +1261,7 @@ tasks:
     params:
       env:
         FEDORA: 33
-      run: fedora-cpp
+      image: fedora-cpp
 
   test-ubuntu-18.04-cpp-release:
     ci: github
@@ -1269,7 +1269,8 @@ tasks:
     params:
       env:
         UBUNTU: 18.04
-      run: "-e ARROW_BUILD_TYPE=release ubuntu-cpp"
+      flags: "-e ARROW_BUILD_TYPE=release"
+      image: ubuntu-cpp
 
   test-ubuntu-18.04-cpp-static:
     ci: github
@@ -1277,7 +1278,8 @@ tasks:
     params:
       env:
         UBUNTU: 18.04
-      run: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static ubuntu-cpp"
+      flags: "-e ARROW_BUILD_SHARED=OFF -e ARROW_BUILD_STATIC=ON -e ARROW_TEST_LINKAGE=static"
+      image: ubuntu-cpp
 
 {% for cpp_standard in [14, 17] %}
   test-ubuntu-20.04-cpp-{{ cpp_standard }}:
@@ -1286,7 +1288,8 @@ tasks:
     params:
       env:
         UBUNTU: 20.04
-      run: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }} ubuntu-cpp"
+      flags: "-e CMAKE_ARGS=-DCMAKE_CXX_STANDARD={{ cpp_standard }}"
+      image: ubuntu-cpp
 {% endfor %}
 
   test-ubuntu-20.04-cpp-thread-sanitizer:
@@ -1296,7 +1299,7 @@ tasks:
       env:
         CLANG_TOOLS: 11
         UBUNTU: 20.04
-      run: ubuntu-cpp-thread-sanitizer
+      image: ubuntu-cpp-thread-sanitizer
 
 {% for python_version in ["3.6", "3.7", "3.8", "3.9"] %}
   test-conda-python-{{ python_version }}:
@@ -1305,7 +1308,7 @@ tasks:
     params:
       env:
         PYTHON: {{ python_version }}
-      run: conda-python
+      image: conda-python
 {% endfor %}
 
   test-conda-python-3.8-hypothesis:
@@ -1318,7 +1321,7 @@ tasks:
         PYTHON: 3.8
         # limit to execute hypothesis tests only
         PYTEST_ARGS: "-m hypothesis"
-      run: conda-python-pandas
+      image: conda-python-pandas
 
   test-debian-10-python-3:
     ci: azure
@@ -1464,11 +1467,10 @@ tasks:
         PYTHON: {{ python_version }}
         PANDAS: {{ pandas_version }}
     {% if cache_leaf %}
-      run: conda-python-pandas
-    {% else %}
       # use the latest pandas release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-pandas
+      flags: --no-leaf-cache
     {% endif %}
+      image: conda-python-pandas
 {% endfor %}
 
 {% for dask_version in ["latest", "master"] %}
@@ -1480,7 +1482,8 @@ tasks:
         PYTHON: 3.7
         DASK: {{ dask_version }}
       # use the latest dask release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-dask
+      flags: --no-leaf-cache
+      image: conda-python-dask
 {% endfor %}
 
 {% for turbodbc_version in ["latest", "master"] %}
@@ -1492,7 +1495,8 @@ tasks:
         PYTHON: 3.7
         TURBODBC: {{ turbodbc_version }}
       # use the latest turbodbc release, so prevent reusing any cached layers
-      run: --no-leaf-cache conda-python-turbodbc
+      flags: --no-leaf-cache
+      image: conda-python-turbodbc
 {% endfor %}
 
 {% for kartothek_version in ["latest", "master"] %}
@@ -1503,7 +1507,8 @@ tasks:
       env:
         PYTHON: 3.7
         KARTOTHEK: {{ kartothek_version }}
-      run: --no-leaf-cache conda-python-kartothek
+      flags: --no-leaf-cache
+      image: conda-python-kartothek
 {% endfor %}
 
 {% for hdfs_version in ["2.9.2", "3.2.1"] %}
@@ -1514,7 +1519,7 @@ tasks:
       env:
         PYTHON: 3.7
         HDFS: {{ hdfs_version }}
-      run: conda-python-hdfs
+      image: conda-python-hdfs
 {% endfor %}
 
 {% for python_version, spark_version, test_pyarrow_only in [("3.7", "branch-3.0", "true"),
@@ -1528,7 +1533,8 @@ tasks:
         SPARK: {{ spark_version }}
         TEST_PYARROW_ONLY: {{ test_pyarrow_only }}
       # use the branch-3.0 of spark, so prevent reusing any layers
-      run: --no-leaf-cache conda-python-spark
+      flags: --no-leaf-cache
+      image: conda-python-spark
 {% endfor %}
 
   test-conda-python-3.8-jpype:
@@ -1537,14 +1543,14 @@ tasks:
     params:
       env:
         PYTHON: 3.8
-      run: conda-python-jpype
+      image: conda-python-jpype
 
   # Remove the "skipped-" prefix in ARROW-8475
   skipped-test-conda-cpp-hiveserver2:
     ci: github
     template: docker-tests/github.linux.yml
     params:
-      run: conda-cpp-hiveserver2
+      image: conda-cpp-hiveserver2
 
 {% for kind in ["static", "static-system-dependency"] %}
   example-cpp-minimal-build-{{ kind }}:

From 73c4bd1afe99bbd1ea56471f03a83e802d470d6d Mon Sep 17 00:00:00 2001
From: Ying Zhou <yingzhou474@gmail.com>
Date: Fri, 30 Apr 2021 09:13:34 -0400
Subject: [PATCH 155/719] MINOR: Alter the feature matrix to indicate that the
 ORC writer is available in Arrow C++ Library

Closes #10182 from mathyingzhou/patch-1

Authored-by: Ying Zhou <yingzhou474@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/status.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/status.rst b/docs/source/status.rst
index a33908d9a13..a06ef40968a 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -221,7 +221,7 @@ Third-Party Data Formats
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
 | CSV                         | R       |         | R/W   |            |       | R/W     | R/W   |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
-| ORC                         | R       |         |       |            |       |         |       |
+| ORC                         | R/W     |         |       |            |       |         |       |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
 | Parquet                     | R/W     | R (2)   |       |            |       | R/W (1) |       |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+

From d76909adbe8830eef8e3826dbba5c5dff38b0588 Mon Sep 17 00:00:00 2001
From: Jeroen Ooms <jeroenooms@gmail.com>
Date: Fri, 30 Apr 2021 15:09:28 -0500
Subject: [PATCH 156/719] ARROW-11926: [R] preparations for ucrt toolchains

Minimal changes to link to ucrt builds specifically once supported (and will be safely redundant until then).

Closes #10217 from jeroen/winucrt

Lead-authored-by: Jeroen Ooms <jeroenooms@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/configure.win     |  2 +-
 r/src/Makevars.ucrt | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 r/src/Makevars.ucrt

diff --git a/r/configure.win b/r/configure.win
index d645834fac8..f31bf8f9cf4 100644
--- a/r/configure.win
+++ b/r/configure.win
@@ -50,7 +50,7 @@ AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-man
 # NOTE: If you make changes to the libraries below, you should also change
 # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD
 PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC -DARROW_R_WITH_ARROW -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET"
-PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
+PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '"-L${RWINLIB}/lib"'$(R_ARCH)$(CRT) '"-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
 
 # S3 and re2 support only for Rtools40 (i.e. R >= 4.0)
 "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e 'R.version$major >= 4' | grep TRUE >/dev/null 2>&1
diff --git a/r/src/Makevars.ucrt b/r/src/Makevars.ucrt
new file mode 100644
index 00000000000..52488eb2b85
--- /dev/null
+++ b/r/src/Makevars.ucrt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+CRT=-ucrt
+include Makevars.win

From 2ece340a731765c530a5a274deaeefe156ac1da9 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 1 May 2021 05:17:08 +0900
Subject: [PATCH 157/719] ARROW-10031: [CI][Java] Support Java benchmark in
 Archery

This PR supports Java benchmark in Ursabot. The implementation is based on [this suggestion](https://mail-archives.apache.org/mod_mbox/arrow-dev/202008.mbox/%3cCABNn7+q35j7QWsHJBX8omdewKT+F1p_M7r1_F6szs4dqc+Luyg@mail.gmail.com%3e)

Here are work items.
- [x] Support `--language=[cpp|java]` option in `diff`
- [x] Enable to build java binding
- [x] Enable to run Java benchmarks
- [x] Allows us to filter/select benchmarks
- [x] Enable to collect results
- [x] Apply the same changes to `run` and `list`

Closes #8210 from kiszk/ARROW-10031

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/archery/archery/benchmark/jmh.py    | 201 +++++++++++++++++++++++
 dev/archery/archery/benchmark/runner.py | 169 ++++++++++++++++----
 dev/archery/archery/cli.py              | 155 ++++++++++++++----
 dev/archery/archery/lang/java.py        |  47 ++++++
 dev/archery/archery/utils/maven.py      | 204 ++++++++++++++++++++++++
 dev/archery/archery/utils/source.py     |   5 +
 java/performance/pom.xml                |  11 ++
 7 files changed, 723 insertions(+), 69 deletions(-)
 create mode 100644 dev/archery/archery/benchmark/jmh.py
 create mode 100644 dev/archery/archery/utils/maven.py

diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
new file mode 100644
index 00000000000..f531b6de163
--- /dev/null
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from itertools import filterfalse, groupby, tee
+import json
+import subprocess
+from tempfile import NamedTemporaryFile
+
+from .core import Benchmark
+from ..utils.command import Command
+from ..utils.maven import Maven
+
+
+def partition(pred, iterable):
+    # adapted from python's examples
+    t1, t2 = tee(iterable)
+    return list(filter(pred, t1)), list(filterfalse(pred, t2))
+
+
+class JavaMicrobenchmarkHarnessCommand(Command):
+    """ Run a Java Micro Benchmark Harness
+
+    This assumes the binary supports the standard command line options,
+    notably `-Dbenchmark_filter`
+    """
+
+    def __init__(self, build, benchmark_filter=None):
+        self.benchmark_filter = benchmark_filter
+        self.build = build
+        self.maven = Maven()
+
+    """ Extract benchmark names from output between "Benchmarks:" and "[INFO]".
+    Assume the following output:
+      ...
+      Benchmarks:
+      org.apache.arrow.vector.IntBenchmarks.setIntDirectly
+      ...
+      org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
+      org.apache.arrow.vector.IntBenchmarks.setWithWriter
+      ...
+      [INFO]
+    """
+
+    def list_benchmarks(self):
+        argv = []
+        if self.benchmark_filter:
+            argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
+        result = self.build.list(
+            *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        lists = []
+        benchmarks = False
+        for line in str.splitlines(result.stdout.decode("utf-8")):
+            if not benchmarks:
+                if line.startswith("Benchmarks:"):
+                    benchmarks = True
+            else:
+                if line.startswith("org.apache.arrow"):
+                    lists.append(line)
+                if line.startswith("[INFO]"):
+                    break
+        return lists
+
+    def results(self, repetitions):
+        with NamedTemporaryFile(suffix=".json") as out:
+            argv = ["-Dbenchmark.runs={}".format(repetitions),
+                    "-Dbenchmark.resultfile={}".format(out.name),
+                    "-Dbenchmark.resultformat=json"]
+            if self.benchmark_filter:
+                argv.append(
+                    "-Dbenchmark.filter={}".format(self.benchmark_filter)
+                )
+
+            self.build.benchmark(*argv, check=True)
+            return json.load(out)
+
+
+class JavaMicrobenchmarkHarnessObservation:
+    """ Represents one run of a single Java Microbenchmark Harness
+    """
+
+    def __init__(self, benchmark, primaryMetric,
+                 forks, warmupIterations, measurementIterations, **counters):
+        self.name = benchmark
+        self.primaryMetric = primaryMetric
+        self.score = primaryMetric["score"]
+        self.score_unit = primaryMetric["scoreUnit"]
+        self.forks = forks
+        self.warmups = warmupIterations
+        self.runs = measurementIterations
+        self.counters = {
+            "mode": counters["mode"],
+            "threads": counters["threads"],
+            "warmups": warmupIterations,
+            "warmupTime": counters["warmupTime"],
+            "measurements": measurementIterations,
+            "measurementTime": counters["measurementTime"],
+            "jvmArgs": counters["jvmArgs"]
+        }
+        self.reciprocal_value = True if self.score_unit.endswith(
+            "/op") else False
+        if self.score_unit.startswith("ops/"):
+            idx = self.score_unit.find("/")
+            self.normalizePerSec(self.score_unit[idx+1:])
+        elif self.score_unit.endswith("/op"):
+            idx = self.score_unit.find("/")
+            self.normalizePerSec(self.score_unit[:idx])
+        else:
+            self.normalizeFactor = 1
+
+    @property
+    def value(self):
+        """ Return the benchmark value."""
+        val = 1 / self.score if self.reciprocal_value else self.score
+        return val * self.normalizeFactor
+
+    def normalizePerSec(self, unit):
+        if unit == "ns":
+            self.normalizeFactor = 1000 * 1000 * 1000
+        elif unit == "us":
+            self.normalizeFactor = 1000 * 1000
+        elif unit == "ms":
+            self.normalizeFactor = 1000
+        elif unit == "min":
+            self.normalizeFactor = 1 / 60
+        elif unit == "hr":
+            self.normalizeFactor = 1 / (60 * 60)
+        elif unit == "day":
+            self.normalizeFactor = 1 / (60 * 60 * 24)
+        else:
+            self.normalizeFactor = 1
+
+    @property
+    def unit(self):
+        if self.score_unit.startswith("ops/"):
+            return "items_per_second"
+        elif self.score_unit.endswith("/op"):
+            return "items_per_second"
+        else:
+            return "?"
+
+    def __repr__(self):
+        return str(self.value)
+
+
+class JavaMicrobenchmarkHarness(Benchmark):
+    """ A set of JavaMicrobenchmarkHarnessObservations. """
+
+    def __init__(self, name, runs):
+        """ Initialize a JavaMicrobenchmarkHarness.
+
+        Parameters
+        ----------
+        name: str
+              Name of the benchmark
+        forks: int
+        warmups: int
+        runs: int
+        runs: list(JavaMicrobenchmarkHarnessObservation)
+              Repetitions of JavaMicrobenchmarkHarnessObservation run.
+
+        """
+        self.name = name
+        self.runs = sorted(runs, key=lambda b: b.value)
+        unit = self.runs[0].unit
+        time_unit = "N/A"
+        less_is_better = not unit.endswith("per_second")
+        values = [b.value for b in self.runs]
+        times = []
+        # Slight kludge to extract the UserCounters for each benchmark
+        counters = self.runs[0].counters
+        super().__init__(name, unit, less_is_better, values, time_unit, times,
+                         counters)
+
+    def __repr__(self):
+        return "JavaMicrobenchmark[name={},runs={}]".format(
+            self.name, self.runs)
+
+    @classmethod
+    def from_json(cls, payload):
+        def group_key(x):
+            return x.name
+
+        benchmarks = map(
+            lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
+        groups = groupby(sorted(benchmarks, key=group_key), group_key)
+        return [cls(k, list(bs)) for k, bs in groups]
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 5718bcaf108..fc6d354b180 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -22,8 +22,11 @@
 
 from .core import BenchmarkSuite
 from .google import GoogleBenchmarkCommand, GoogleBenchmark
+from .jmh import JavaMicrobenchmarkHarnessCommand, JavaMicrobenchmarkHarness
 from ..lang.cpp import CppCMakeDefinition, CppConfiguration
+from ..lang.java import JavaMavenDefinition, JavaConfiguration
 from ..utils.cmake import CMakeBuild
+from ..utils.maven import MavenBuild
 from ..utils.logger import logger
 
 
@@ -50,40 +53,8 @@ def suites(self):
 
     @staticmethod
     def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
-        """ Returns a BenchmarkRunner from a path or a git revision.
-
-        First, it checks if `rev_or_path` is a valid path (or string) of a json
-        object that can deserialize to a BenchmarkRunner. If so, it initialize
-        a StaticBenchmarkRunner from it. This allows memoizing the result of a
-        run in a file or a string.
-
-        Second, it checks if `rev_or_path` points to a valid CMake build
-        directory.  If so, it creates a CppBenchmarkRunner with this existing
-        CMakeBuild.
-
-        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
-        the given revision and create a fresh CMakeBuild.
-        """
-        build = None
-        if StaticBenchmarkRunner.is_json_result(rev_or_path):
-            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
-        elif CMakeBuild.is_build_dir(rev_or_path):
-            build = CMakeBuild.from_path(rev_or_path)
-            return CppBenchmarkRunner(build, **kwargs)
-        else:
-            # Revisions can references remote via the `/` character, ensure
-            # that the revision is path friendly
-            path_rev = rev_or_path.replace("/", "_")
-            root_rev = os.path.join(root, path_rev)
-            os.mkdir(root_rev)
-
-            clone_dir = os.path.join(root_rev, "arrow")
-            # Possibly checkout the sources at given revision, no need to
-            # perform cleanup on cloned repository as root_rev is reclaimed.
-            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
-            cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
-            build_dir = os.path.join(root_rev, "build")
-            return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+        raise NotImplementedError(
+            "BenchmarkRunner must implement from_rev_or_path")
 
 
 class StaticBenchmarkRunner(BenchmarkRunner):
@@ -210,3 +181,133 @@ def suites(self):
                 continue
 
             yield suite
+
+    @staticmethod
+    def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
+        """ Returns a BenchmarkRunner from a path or a git revision.
+
+        First, it checks if `rev_or_path` is a valid path (or string) of a json
+        object that can deserialize to a BenchmarkRunner. If so, it initialize
+        a StaticBenchmarkRunner from it. This allows memoizing the result of a
+        run in a file or a string.
+
+        Second, it checks if `rev_or_path` points to a valid CMake build
+        directory.  If so, it creates a CppBenchmarkRunner with this existing
+        CMakeBuild.
+
+        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+        the given revision and create a fresh CMakeBuild.
+        """
+        build = None
+        if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+        elif CMakeBuild.is_build_dir(rev_or_path):
+            build = CMakeBuild.from_path(rev_or_path)
+            return CppBenchmarkRunner(build, **kwargs)
+        else:
+            # Revisions can references remote via the `/` character, ensure
+            # that the revision is path friendly
+            path_rev = rev_or_path.replace("/", "_")
+            root_rev = os.path.join(root, path_rev)
+            os.mkdir(root_rev)
+
+            clone_dir = os.path.join(root_rev, "arrow")
+            # Possibly checkout the sources at given revision, no need to
+            # perform cleanup on cloned repository as root_rev is reclaimed.
+            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+            cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
+            build_dir = os.path.join(root_rev, "build")
+            return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+
+
+class JavaBenchmarkRunner(BenchmarkRunner):
+    """ Run suites for Java. """
+
+    # default repetitions is 5 for Java microbenchmark harness
+    def __init__(self, build, **kwargs):
+        """ Initialize a JavaBenchmarkRunner. """
+        self.build = build
+        super().__init__(**kwargs)
+
+    @staticmethod
+    def default_configuration(**kwargs):
+        """ Returns the default benchmark configuration. """
+        return JavaConfiguration(**kwargs)
+
+    def suite(self, name):
+        """ Returns the resulting benchmarks for a given suite. """
+        # update .m2 directory, which installs target jars
+        self.build.build()
+
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(
+            self.build, self.benchmark_filter)
+
+        # Ensure there will be data
+        benchmark_names = suite_cmd.list_benchmarks()
+        if not benchmark_names:
+            return None
+
+        results = suite_cmd.results(repetitions=self.repetitions)
+        benchmarks = JavaMicrobenchmarkHarness.from_json(results)
+        return BenchmarkSuite(name, benchmarks)
+
+    @property
+    def list_benchmarks(self):
+        """ Returns all suite names """
+        # Ensure build is up-to-date to run benchmarks
+        self.build.build()
+
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build)
+        benchmark_names = suite_cmd.list_benchmarks()
+        for benchmark_name in benchmark_names:
+            yield "{}".format(benchmark_name)
+
+    @property
+    def suites(self):
+        """ Returns all suite for a runner. """
+        suite_name = "JavaBenchmark"
+        suite = self.suite(suite_name)
+
+        # Filter may exclude all benchmarks
+        if not suite:
+            logger.debug("Suite {} executed but no results"
+                         .format(suite_name))
+            return
+
+        yield suite
+
+    @staticmethod
+    def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
+        """ Returns a BenchmarkRunner from a path or a git revision.
+
+        First, it checks if `rev_or_path` is a valid path (or string) of a json
+        object that can deserialize to a BenchmarkRunner. If so, it initialize
+        a StaticBenchmarkRunner from it. This allows memoizing the result of a
+        run in a file or a string.
+
+        Second, it checks if `rev_or_path` points to a valid Maven build
+        directory.  If so, it creates a JavaBenchmarkRunner with this existing
+        MavenBuild.
+
+        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+        the given revision and create a fresh MavenBuild.
+        """
+        if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+        elif MavenBuild.is_build_dir(rev_or_path):
+            maven_def = JavaMavenDefinition(rev_or_path, maven_conf)
+            return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs)
+        else:
+            # Revisions can references remote via the `/` character, ensure
+            # that the revision is path friendly
+            path_rev = rev_or_path.replace("/", "_")
+            root_rev = os.path.join(root, path_rev)
+            os.mkdir(root_rev)
+
+            clone_dir = os.path.join(root_rev, "arrow")
+            # Possibly checkout the sources at given revision, no need to
+            # perform cleanup on cloned repository as root_rev is reclaimed.
+            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+            maven_def = JavaMavenDefinition(src_rev.java, maven_conf)
+            build_dir = os.path.join(root_rev, "arrow/java")
+            return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index bcaddf1c795..06dd6b60370 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -27,7 +27,7 @@
 
 from .benchmark.codec import JsonEncoder
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
-from .benchmark.runner import BenchmarkRunner, CppBenchmarkRunner
+from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner
 from .lang.cpp import CppCMakeDefinition, CppConfiguration
 from .utils.lint import linter, python_numpydoc, LintValidationException
 from .utils.logger import logger, ctx as log_ctx
@@ -120,6 +120,15 @@ def cpp_toolchain_options(cmd):
     return _apply_options(cmd, options)
 
 
+def java_toolchain_options(cmd):
+    options = [
+        click.option("--java-home", metavar="<java_home>",
+                     help="Path to Java Developers Kit."),
+        click.option("--java-options", help="java compiler options."),
+    ]
+    return _apply_options(cmd, options)
+
+
 def _apply_options(cmd, options):
     for option in options:
         cmd = option(cmd)
@@ -132,6 +141,7 @@ def _apply_options(cmd, options):
               help="Specify Arrow source directory")
 # toolchain
 @cpp_toolchain_options
+@java_toolchain_options
 @click.option("--build-type", default=None, type=build_type,
               help="CMake's CMAKE_BUILD_TYPE")
 @click.option("--warn-level", default="production", type=warn_level_type,
@@ -357,6 +367,11 @@ def benchmark(ctx):
 
 
 def benchmark_common_options(cmd):
+    def check_language(ctx, param, value):
+        if value not in {"cpp", "java"}:
+            raise click.BadParameter("cpp or java is supported now")
+        return value
+
     options = [
         click.option("--src", metavar="<arrow_src>", show_default=True,
                      default=None, callback=validate_arrow_sources,
@@ -367,11 +382,21 @@ def benchmark_common_options(cmd):
         click.option("--output", metavar="<output>",
                      type=click.File("w", encoding="utf8"), default="-",
                      help="Capture output result into file."),
+        click.option("--language", metavar="<lang>", type=str, default="cpp",
+                     show_default=True, callback=check_language,
+                     help="Specify target language for the benchmark"),
+        click.option("--build-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn build. "
+                     "Can be stacked. For language=java"),
+        click.option("--benchmark-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn benchmark. "
+                     "Can be stacked. For language=java"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
-                     "Can be stacked"),
+                     "Can be stacked. For language=cpp")
     ]
 
+    cmd = java_toolchain_options(cmd)
     cmd = cpp_toolchain_options(cmd)
     return _apply_options(cmd, options)
 
@@ -392,19 +417,33 @@ def benchmark_filter_options(cmd):
 @click.argument("rev_or_path", metavar="[<rev_or_path>]",
                 default="WORKSPACE", required=False)
 @benchmark_common_options
+@benchmark_filter_options
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                   **kwargs):
+                   java_home, java_options, build_extras, benchmark_extras,
+                   language, **kwargs):
     """ List benchmark suite.
     """
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
+
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf)
+
+        elif language == "java":
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
 
-        runner_base = BenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf)
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf)
 
         for b in runner_base.list_benchmarks:
             click.echo(b, file=output)
@@ -415,12 +454,15 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 default="WORKSPACE", required=False)
 @benchmark_common_options
 @benchmark_filter_options
-@click.option("--repetitions", type=int, default=1, show_default=True,
+@click.option("--repetitions", type=int, default=-1,
               help=("Number of repetitions of each benchmark. Increasing "
-                    "may improve result precision."))
+                    "may improve result precision. "
+                    "[default: 1 for cpp, 5 for java"))
 @click.pass_context
 def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                  suite_filter, benchmark_filter, repetitions, **kwargs):
+                  java_home, java_options, build_extras, benchmark_extras,
+                  language, suite_filter, benchmark_filter, repetitions,
+                  **kwargs):
     """ Run benchmark suite.
 
     This command will run the benchmark suite for a single build. This is
@@ -456,13 +498,29 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_base = BenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 1
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+
+        elif language == "java":
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 5
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
 
         json.dump(runner_base, output, cls=JsonEncoder)
 
@@ -475,7 +533,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
               help="Regression failure threshold in percentage.")
 @click.option("--repetitions", type=int, default=1, show_default=True,
               help=("Number of repetitions of each benchmark. Increasing "
-                    "may improve result precision."))
+                    "may improve result precision. "
+                    "[default: 1 for cpp, 5 for java"))
 @click.option("--no-counters", type=BOOL, default=False, is_flag=True,
               help="Hide counters field in diff report.")
 @click.argument("contender", metavar="[<contender>",
@@ -483,8 +542,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
                 required=False)
 @click.pass_context
-def benchmark_diff(ctx, src, preserve, output, cmake_extras,
+def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
                    suite_filter, benchmark_filter, repetitions, no_counters,
+                   java_home, java_options, build_extras, benchmark_extras,
                    threshold, contender, baseline, **kwargs):
     """Compare (diff) benchmark runs.
 
@@ -560,26 +620,47 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
         logger.debug("Comparing {} (contender) with {} (baseline)"
                      .format(contender, baseline))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_cont = BenchmarkRunner.from_rev_or_path(
-            src, root, contender, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter,
-            benchmark_filter=benchmark_filter)
-        runner_base = BenchmarkRunner.from_rev_or_path(
-            src, root, baseline, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter,
-            benchmark_filter=benchmark_filter)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 1
+            runner_cont = CppBenchmarkRunner.from_rev_or_path(
+                src, root, contender, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter,
+                benchmark_filter=benchmark_filter)
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, baseline, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter,
+                benchmark_filter=benchmark_filter)
+
+        elif language == "java":
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
+
+            repetitions = repetitions if repetitions != -1 else 5
+            runner_cont = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, contender, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, baseline, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
 
         runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
 
         # TODO(kszucs): test that the output is properly formatted jsonlines
         comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+        ren_counters = language == "java"
         formatted = _format_comparisons_with_pandas(comparisons_json,
-                                                    no_counters)
+                                                    no_counters, ren_counters)
         output.write(formatted)
         output.write('\n')
 
@@ -593,7 +674,8 @@ def _get_comparisons_as_json(comparisons):
     return buf.getvalue()
 
 
-def _format_comparisons_with_pandas(comparisons_json, no_counters):
+def _format_comparisons_with_pandas(comparisons_json, no_counters,
+                                    ren_counters):
     import pandas as pd
     df = pd.read_json(StringIO(comparisons_json), lines=True)
     # parse change % so we can sort by it
@@ -604,7 +686,10 @@ def _format_comparisons_with_pandas(comparisons_json, no_counters):
     if not no_counters:
         fields += ['counters']
 
-    df = df[fields].sort_values(by='change %', ascending=False)
+    df = df[fields]
+    if ren_counters:
+        df = df.rename(columns={'counters': 'configurations'})
+    df = df.sort_values(by='change %', ascending=False)
 
     def labelled(title, df):
         if len(df) == 0:
diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index 24743b67fd7..bc169adf647 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -15,7 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+
 from ..utils.command import Command, CommandStackMixin, default_bin
+from ..utils.maven import MavenDefinition
 
 
 class Java(Command):
@@ -28,3 +31,47 @@ def __init__(self, jar, *args, **kwargs):
         self.jar = jar
         self.argv = ("-jar", jar)
         Java.__init__(self, *args, **kwargs)
+
+
+class JavaConfiguration:
+    def __init__(self,
+
+                 # toolchain
+                 java_home=None, java_options=None,
+                 # build & benchmark
+                 build_extras=None, benchmark_extras=None):
+        self.java_home = java_home
+        self.java_options = java_options
+
+        self.build_extras = list(build_extras) if build_extras else []
+        self.benchmark_extras = list(
+            benchmark_extras) if benchmark_extras else []
+
+    @property
+    def build_definitions(self):
+        return self.build_extras
+
+    @property
+    def benchmark_definitions(self):
+        return self.benchmark_extras
+
+    @property
+    def environment(self):
+        env = os.environ.copy()
+
+        if self.java_home:
+            env["JAVA_HOME"] = self.java_home
+
+        if self.java_options:
+            env["JAVA_OPTIONS"] = self.java_options
+
+        return env
+
+
+class JavaMavenDefinition(MavenDefinition):
+    def __init__(self, source, conf, **kwargs):
+        self.configuration = conf
+        super().__init__(source, **kwargs,
+                         build_definitions=conf.build_definitions,
+                         benchmark_definitions=conf.benchmark_definitions,
+                         env=conf.environment)
diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
new file mode 100644
index 00000000000..96a3bf5bd99
--- /dev/null
+++ b/dev/archery/archery/utils/maven.py
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from .command import Command, default_bin
+
+
+class Maven(Command):
+    def __init__(self, maven_bin=None):
+        self.bin = default_bin(maven_bin, "mvn")
+
+
+maven = Maven()
+
+
+class MavenDefinition:
+    """ MavenDefinition captures the maven invocation arguments.
+
+    It allows creating build directories with the same definition, e.g.
+    ```
+    build_1 = maven_def.build("/tmp/build-1")
+    build_2 = maven_def.build("/tmp/build-2")
+
+    ...
+
+    build1.install()
+    build2.install()
+    """
+
+    def __init__(self, source, build_definitions=None,
+                 benchmark_definitions=None, env=None):
+        """ Initialize a MavenDefinition
+
+        Parameters
+        ----------
+        source : str
+                 Source directory where the top-level pom.xml is
+                 located. This is usually the root of the project.
+        build_definitions: list(str), optional
+        benchmark_definitions: list(str), optional
+        """
+        self.source = os.path.abspath(source)
+        self.build_definitions = build_definitions if build_definitions else []
+        self.benchmark_definitions =\
+            benchmark_definitions if benchmark_definitions else []
+        self.env = env
+
+    @property
+    def build_arguments(self):
+        """" Return the arguments to maven invocation for build. """
+        arguments = self.build_definitions + [
+            "-B", "-DskipTests", "-Drat.skip=true",
+            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."
+            "Slf4jMavenTransferListener=warn",
+            "-T", "2C", "install"
+        ]
+        return arguments
+
+    def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs):
+        """ Invoke maven into a build directory.
+
+        Parameters
+        ----------
+        build_dir : str
+                    Directory in which the Maven build will be instantiated.
+        force : bool
+                not used now
+        """
+        if os.path.exists(build_dir):
+            # Extra safety to ensure we're deleting a build folder.
+            if not MavenBuild.is_build_dir(build_dir):
+                raise FileExistsError(
+                    "{} is not a maven build".format(build_dir)
+                )
+
+        cmd_kwargs = cmd_kwargs if cmd_kwargs else {}
+        assert MavenBuild.is_build_dir(build_dir)
+        maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs)
+        return MavenBuild(build_dir, definition=self, **kwargs)
+
+    @property
+    def list_arguments(self):
+        """" Return the arguments to maven invocation for list """
+        arguments = [
+            "-Dskip.perf.benchmarks=false", "-Dbenchmark.list=-lp", "install"
+        ]
+        return arguments
+
+    @property
+    def benchmark_arguments(self):
+        """" Return the arguments to maven invocation for benchmark """
+        arguments = self.benchmark_definitions + [
+            "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
+            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false "
+            "-Darrow.enable_unsafe_memory_access=true\"",
+            "install"
+        ]
+        return arguments
+
+    def __repr__(self):
+        return "MavenDefinition[source={}]".format(self.source)
+
+
+class MavenBuild(Maven):
+    """ MavenBuild represents a build directory initialized by maven.
+
+    The build instance can be used to build/test/install. It alleviates the
+    user to know which generator is used.
+    """
+
+    def __init__(self, build_dir, definition=None):
+        """ Initialize a MavenBuild.
+
+        The caller must ensure that maven was invoked in the build directory.
+
+        Parameters
+        ----------
+        definition : MavenDefinition
+                     The definition to build from.
+        build_dir : str
+                    The build directory to setup into.
+        """
+        assert MavenBuild.is_build_dir(build_dir)
+        super().__init__()
+        self.build_dir = os.path.abspath(build_dir)
+        self.definition = definition
+
+    @property
+    def binaries_dir(self):
+        return self.build_dir
+
+    def run(self, *argv, verbose=False, cwd=None, **kwargs):
+        extra = []
+        if verbose:
+            extra.append("-X")
+        if cwd is None:
+            cwd = self.build_dir
+        # Commands must be ran under the directory where pom.xml exists
+        return super().run(*extra, *argv, **kwargs, cwd=cwd)
+
+    def build(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.build_arguments
+        cwd = self.binaries_dir
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    def list(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.list_arguments
+        cwd = self.binaries_dir + "/performance"
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    def benchmark(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.benchmark_arguments
+        cwd = self.binaries_dir + "/performance"
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    @staticmethod
+    def is_build_dir(path):
+        """ Indicate if a path is Maven top directory.
+
+        This method only checks for the existence of paths and does not do any
+        validation whatsoever.
+        """
+        pom_xml = os.path.join(path, "pom.xml")
+        performance_dir = os.path.join(path, "performance")
+        return os.path.exists(pom_xml) and os.path.isdir(performance_dir)
+
+    @staticmethod
+    def from_path(path):
+        """ Instantiate a Maven from a path.
+
+        This is used to recover from an existing physical directory (created
+        with or without Maven).
+
+        Note that this method is not idempotent as the original definition will
+        be lost.
+        """
+        if not MavenBuild.is_build_dir(path):
+            raise ValueError("Not a valid MavenBuild path: {}".format(path))
+
+        return MavenBuild(path, definition=None)
+
+    def __repr__(self):
+        return ("MavenBuild["
+                "build = {},"
+                "definition = {}]".format(self.build_dir,
+                                          self.definition))
diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py
index d30b4f152e5..f7e47a5a1b6 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -68,6 +68,11 @@ def dev(self):
         """ Returns the dev directory of an Arrow sources. """
         return self.path / "dev"
 
+    @property
+    def java(self):
+        """ Returns the java directory of an Arrow sources. """
+        return self.path / "java"
+
     @property
     def python(self):
         """ Returns the python directory of an Arrow sources. """
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index d9828988b28..d6a0b950d4a 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -99,8 +99,12 @@
         <skip.perf.benchmarks>true</skip.perf.benchmarks>
         <benchmark.filter>.*</benchmark.filter>
         <benchmark.forks>1</benchmark.forks>
+        <benchmark.jvmargs> </benchmark.jvmargs>
         <benchmark.warmups>5</benchmark.warmups>
         <benchmark.runs>5</benchmark.runs>
+        <benchmark.list> </benchmark.list>
+        <benchmark.resultfile>jmh-result.json</benchmark.resultfile>
+        <benchmark.resultformat>json</benchmark.resultformat>
     </properties>
 
     <build>
@@ -169,10 +173,17 @@
                         <argument>${benchmark.filter}</argument>
                         <argument>-f</argument>
                         <argument>${benchmark.forks}</argument>
+                        <argument>-jvmArgs</argument>
+                        <argument>${benchmark.jvmargs}</argument>
                         <argument>-wi</argument>
                         <argument>${benchmark.warmups}</argument>
                         <argument>-i</argument>
                         <argument>${benchmark.runs}</argument>
+                        <argument>${benchmark.list}</argument>
+                        <argument>-rff</argument>
+                        <argument>${benchmark.resultfile}</argument>
+                        <argument>-rf</argument>
+                        <argument>${benchmark.resultformat}</argument>
                     </arguments>
                 </configuration>
             </plugin>

From a7f2d81a77f1f8885d6190ac1615e107603dd9b2 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sat, 1 May 2021 06:22:23 +0900
Subject: [PATCH 158/719] ARROW-12528: [JS] Support typed arrays in Table.new

Now we can create an Arrow Table directly from typed arrays (not just from Arrow Vectors)

```ts
const t = Table.new({
  f32: new Float32Array([1, 2, 3]),
  i32: new Int32Array([1, 2, 3]),
  str: Utf8Vector.from(['foo', 'bar'])
})
```

Type of `t` inferred from the typed array (as well as the vector but that's not new).

```ts
Table<{
    f32: Float32;
    i32: Int32;
    str: Utf8;
}>
```

Closes #10151 from domoritz/dom/typed-arrays

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/src/interfaces.ts        | 14 +++++++++++
 js/src/table.ts             |  5 ++--
 js/src/util/args.ts         | 49 ++++++++++++++++++++++++++++++++++---
 js/src/vector/float.ts      |  2 +-
 js/src/vector/int.ts        |  2 +-
 js/test/unit/table-tests.ts | 18 +++++++++++++-
 js/test/unit/utils-tests.ts | 33 +++++++++++++++++++++++++
 7 files changed, 115 insertions(+), 8 deletions(-)
 create mode 100644 js/test/unit/utils-tests.ts

diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts
index 8e16c4150e3..43977ca7af5 100644
--- a/js/src/interfaces.ts
+++ b/js/src/interfaces.ts
@@ -162,6 +162,20 @@ export type DataTypeCtor<T extends Type | DataType | VectorType = any> =
                            never
     ;
 
+/** @ignore */
+export type TypedArrayDataType<T extends Exclude<TypedArray, Uint8ClampedArray> | BigIntArray> =
+    T extends Int8Array ? type.Int8 :
+    T extends Int16Array ? type.Int16 :
+    T extends Int32Array ? type.Int32 :
+    T extends BigInt64Array ? type.Int64 :
+    T extends Uint8Array ? type.Uint8 :
+    T extends Uint16Array ? type.Uint16 :
+    T extends Uint32Array ? type.Uint32 :
+    T extends BigUint64Array ? type.Uint64 :
+    T extends Float32Array ? type.Float32 :
+    T extends Float64Array ? type.Float64 :
+    never;
+
 /** @ignore */
 type TypeToVector<T extends Type> = {
     [key: number               ]: vecs.Vector<any>                ;
diff --git a/js/src/table.ts b/js/src/table.ts
index 23f02b0a207..e94e1adf195 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -28,8 +28,9 @@ import { isPromise, isIterable, isAsyncIterable } from './util/compat';
 import { RecordBatchFileWriter, RecordBatchStreamWriter } from './ipc/writer';
 import { distributeColumnsIntoRecordBatches, distributeVectorsIntoRecordBatches } from './util/recordbatch';
 import { Vector, Chunked, StructVector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
+import { TypedArray, TypedArrayDataType } from './interfaces';
 
-type VectorMap = { [key: string]: Vector };
+type VectorMap = { [key: string]: Vector | Exclude<TypedArray, Uint8ClampedArray> };
 type Fields<T extends { [key: string]: DataType }> = (keyof T)[] | Field<T[keyof T]>[];
 type ChildData<T extends { [key: string]: DataType }> = Data<T[keyof T]>[] | Vector<T[keyof T]>[];
 type Columns<T extends { [key: string]: DataType }> = Column<T[keyof T]>[] | Column<T[keyof T]>[][];
@@ -165,7 +166,7 @@ export class Table<T extends { [key: string]: DataType } = any>
      * 125,000 bytes (`((1e6 + 63) & ~63) >> 3`), or approx. `0.11MiB`
      */
     public static new<T extends { [key: string]: DataType } = any>(...columns: Columns<T>): Table<T>;
-    public static new<T extends VectorMap = any>(children: T): Table<{ [P in keyof T]: T[P]['type'] }>;
+    public static new<T extends VectorMap = any>(children: T): Table<{ [P in keyof T]: T[P] extends Vector ? T[P]['type'] : T[P] extends Exclude<TypedArray, Uint8ClampedArray> ? TypedArrayDataType<T[P]> : never}>;
     public static new<T extends { [key: string]: DataType } = any>(children: ChildData<T>, fields?: Fields<T>): Table<T>;
     /** @nocollapse */
     public static new(...cols: any[]) {
diff --git a/js/src/util/args.ts b/js/src/util/args.ts
index c9c9d111193..ff56f775dd3 100644
--- a/js/src/util/args.ts
+++ b/js/src/util/args.ts
@@ -19,13 +19,55 @@ import { Data } from '../data';
 import { Field } from '../schema';
 import { Column } from '../column';
 import { Vector } from '../vector';
-import { DataType } from '../type';
+import { DataType, Float32, Float64, FloatArray, IntArray, Int16, Int32, Int64, Int8, Uint16, Uint32, Uint64, Uint8 } from '../type';
 import { Chunked } from '../vector/chunked';
+import { BigIntArray, TypedArray as TypedArray_ } from '../interfaces';
+import { FloatArrayCtor } from '../vector/float';
+import { IntArrayCtor } from '../vector/int';
 
 type RecordBatchCtor = typeof import('../recordbatch').RecordBatch;
 
 const isArray = Array.isArray;
 
+type TypedArray = Exclude<TypedArray_ | BigIntArray, Uint8ClampedArray>;
+
+/** @ignore */
+export function isTypedArray(arr: any): arr is TypedArray {
+    return ArrayBuffer.isView(arr) && 'BYTES_PER_ELEMENT' in arr;
+}
+
+
+/** @ignore */
+type ArrayCtor = FloatArrayCtor | IntArrayCtor;
+
+/** @ignore */
+export function arrayTypeToDataType(ctor: ArrayCtor) {
+    switch (ctor) {
+        case Int8Array:         return Int8;
+        case Int16Array:        return Int16;
+        case Int32Array:        return Int32;
+        case BigInt64Array:     return Int64;
+        case Uint8Array:        return Uint8;
+        case Uint16Array:       return Uint16;
+        case Uint32Array:       return Uint32;
+        case BigUint64Array:    return Uint64;
+        case Float32Array:      return Float32;
+        case Float64Array:      return Float64;
+        default: return null;
+    }
+}
+
+/** @ignore */
+function vectorFromTypedArray(array: TypedArray): Vector {
+    const ArrowType = arrayTypeToDataType(array.constructor as ArrayCtor);
+    if (!ArrowType) {
+        throw new TypeError('Unrecognized Array input');
+    }
+    const type = new ArrowType();
+    const data = Data.new(type, 0, array.length, 0, [undefined, array as IntArray | FloatArray]);
+    return Vector.new(data);
+}
+
 /** @ignore */
 export const selectArgs = <T>(Ctor: any, vals: any[]) => _selectArgs(Ctor, vals, [], 0) as T[];
 /** @ignore */
@@ -34,6 +76,7 @@ export const selectColumnArgs = <T extends { [key: string]: DataType }>(args: an
     return values.map((x, i) =>
         x instanceof Column ? Column.new(x.field.clone(fields[i]), x) :
         x instanceof Vector ? Column.new(fields[i], x) as Column<T[keyof T]> :
+        isTypedArray(x)     ? Column.new(fields[i], vectorFromTypedArray(x)) as Column<T[keyof T]> :
                               Column.new(fields[i], [] as Vector<T[keyof T]>[]));
 };
 
@@ -108,7 +151,7 @@ function _selectColumnChildrenArgs<T extends Column>(Ctor: RecordBatchCtor, vals
 const toKeysAndValues = (xs: [any[], any[]], [k, v]: [any, any], i: number) => (xs[0][i] = k, xs[1][i] = v, xs);
 
 /** @ignore */
-function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], ret: [Field<T[keyof T]>[], Vector<T[keyof T]>[]]): [Field<T[keyof T]>[], (T[keyof T] | Vector<T[keyof T]>)[]] {
+function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], ret: [Field<T[keyof T]>[], (Vector<T[keyof T]> | TypedArray)[]]): [Field<T[keyof T]>[], (T[keyof T] | Vector<T[keyof T]> | TypedArray)[]] {
     let keys: any[];
     let n: number;
     switch (n = vals.length) {
@@ -117,7 +160,7 @@ function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], re
             keys = ret[0];
             if (!(vals[0])) { return ret; }
             if (isArray(vals[0])) { return _selectFieldArgs(vals[0], ret); }
-            if (!(vals[0] instanceof Data || vals[0] instanceof Vector || vals[0] instanceof DataType)) {
+            if (!(vals[0] instanceof Data || vals[0] instanceof Vector || isTypedArray(vals[0]) || vals[0] instanceof DataType)) {
                 [keys, vals] = Object.entries(vals[0]).reduce(toKeysAndValues, ret);
             }
             break;
diff --git a/js/src/vector/float.ts b/js/src/vector/float.ts
index 2e3151d9077..8260d2b27db 100644
--- a/js/src/vector/float.ts
+++ b/js/src/vector/float.ts
@@ -41,7 +41,7 @@ type FromInput<T extends Float, TNull = any> =
     VectorBuilderOptionsAsync<T, TNull> ;
 
 /** @ignore */
-type FloatArrayCtor = TypedArrayConstructor<FloatArray>;
+export type FloatArrayCtor = TypedArrayConstructor<FloatArray>;
 
 /** @ignore */
 export class FloatVector<T extends Float = Float> extends BaseVector<T> {
diff --git a/js/src/vector/int.ts b/js/src/vector/int.ts
index c12863297e6..dbfba58c9d9 100644
--- a/js/src/vector/int.ts
+++ b/js/src/vector/int.ts
@@ -51,7 +51,7 @@ type FromInput<T extends Int, TNull = any> =
 type FromArgs<T extends Int, TNull = any> = [FromInput<T, TNull>, boolean?];
 
 /** @ignore */
-type IntArrayCtor = TypedArrayConstructor<IntArray> | BigIntArrayConstructor<BigIntArray>;
+export type IntArrayCtor = TypedArrayConstructor<IntArray> | BigIntArrayConstructor<BigIntArray>;
 
 /** @ignore */
 export class IntVector<T extends Int = Int> extends BaseVector<T> {
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 32b635f3bab..d863f7581a4 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -20,7 +20,7 @@ import {
     predicate,
     Data, Schema, Field, Table, RecordBatch, Column,
     Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
-    Struct, Float32, Int32, Dictionary, Utf8, Int8
+    Struct, Float32, Int32, Dictionary, Utf8, Int8, Type
 } from '../Arrow';
 import { arange } from './utils';
 
@@ -242,6 +242,22 @@ describe(`Table`, () => {
             expect(i32).toEqualVector(Int32Vector.from(i32s));
             expect(f32).toEqualVector(new Float32Vector(f32Expected));
         });
+
+        test(`creates a new Table from Typed Arrays`, () => {
+            let i32s = Int32Array.from({length: 10}, (_, i) => i);
+            let f32s = Float32Array.from({length: 10}, (_, i) => i);
+            const table = Table.new({ i32s, f32s });
+            const i32 = table.getColumn('i32s')!;
+            const f32 = table.getColumn('f32s')!;
+
+            expect(table).toHaveLength(10);
+            expect(i32).toHaveLength(10);
+            expect(f32).toHaveLength(10);
+            expect(i32.toArray()).toBeInstanceOf(Int32Array);
+            expect(f32.toArray()).toBeInstanceOf(Float32Array);
+            expect(i32.toArray()).toEqual(i32s);
+            expect(f32.toArray()).toEqual(f32s);
+        });
     });
 
     test(`Table.serialize() serializes sliced RecordBatches`, () => {
diff --git a/js/test/unit/utils-tests.ts b/js/test/unit/utils-tests.ts
new file mode 100644
index 00000000000..00553c4f1b4
--- /dev/null
+++ b/js/test/unit/utils-tests.ts
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { isTypedArray } from '../../src/util/args';
+
+
+describe('isTypedArray', () => {
+    test('works for typed arrays', () => {
+        expect(isTypedArray(new Int8Array())).toBeTruthy();
+        expect(isTypedArray(new Int32Array())).toBeTruthy();
+        expect(isTypedArray(new BigInt64Array())).toBeTruthy();
+    });
+
+    test('does not recognize arrays, buffers, or data views', () => {
+        expect(isTypedArray(new Array([1, 2, 3]))).toBeFalsy();
+        expect(isTypedArray(new ArrayBuffer(10))).toBeFalsy();
+        expect(isTypedArray(new DataView(new ArrayBuffer(10)))).toBeFalsy();
+    });
+});

From a9873574296c4c6ae1ca77147bfce8cbcf2b48c6 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 1 May 2021 07:14:28 +0900
Subject: [PATCH 159/719] ARROW-12593: [Packaging][Ubuntu] Add support for
 Ubuntu 21.04

Closes #10192 from kou/packaging-ubuntu-21.04

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/01-prepare-test.rb                |   4 +-
 dev/release/binary-task.rb                    |   1 +
 .../apt/ubuntu-hirsute/Dockerfile             |  41 ++
 .../linux-packages/apache-arrow/Rakefile      |   2 -
 .../apt/debian-bullseye/Dockerfile            |   4 +-
 .../apt/debian-bullseye/qemu-dummy-static     |  33 --
 .../apache-arrow/apt/debian-buster/Dockerfile |   2 -
 .../apt/debian-buster/qemu-dummy-static       |  33 --
 .../apache-arrow/apt/ubuntu-bionic/Dockerfile |   2 -
 .../apt/ubuntu-bionic/qemu-dummy-static       |  33 --
 .../apache-arrow/apt/ubuntu-focal/Dockerfile  |   2 -
 .../apt/ubuntu-focal/qemu-dummy-static        |  33 --
 .../apache-arrow/apt/ubuntu-groovy/Dockerfile |   4 +-
 .../apt/ubuntu-groovy/qemu-dummy-static       |  33 --
 .../apt/ubuntu-hirsute-arm64/from             |  18 +
 .../apt/ubuntu-hirsute/Dockerfile             |  82 +++
 .../apache-arrow/debian/control.in            |   2 +
 .../apache-arrow/yum/centos-7/Dockerfile      |   2 -
 .../yum/centos-7/qemu-dummy-static            |  33 --
 .../apache-arrow/yum/centos-8/Dockerfile      |   2 -
 .../yum/centos-8/qemu-dummy-static            |  33 --
 .../linux-packages/github.linux.amd64.yml     |   9 +-
 dev/tasks/linux-packages/package-task.rb      |   2 +
 .../linux-packages/travis.linux.arm64.yml     |   5 +
 dev/tasks/tasks.yml                           | 535 +-----------------
 25 files changed, 193 insertions(+), 757 deletions(-)
 create mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/qemu-dummy-static
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static
 create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
 create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static
 delete mode 100755 dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static

diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index 96be7d91ec9..007e4da040c 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -779,8 +779,8 @@ def test_deb_package_names
       },
       {
         sampled_diff: [
-          "-      - libarrow-glib#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
-          "+      - libarrow-glib#{@next_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
+          "-      - libarrow-dataset-glib#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
+          "+      - libarrow-dataset-glib#{@next_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb",
         ],
         path: "dev/tasks/tasks.yml",
       },
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index 2e5c91b16f9..0795f732bcd 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -868,6 +868,7 @@ def available_apt_targets
       ["ubuntu", "bionic", "main"],
       ["ubuntu", "focal", "main"],
       ["ubuntu", "groovy", "main"],
+      ["ubuntu", "hirsute", "main"],
     ]
   end
 
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
new file mode 100644
index 00000000000..8b6fd7f0ec9
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-hirsute/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:hirsute
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    debhelper \
+    devscripts \
+    fakeroot \
+    gnupg \
+    lsb-release && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile
index d4848e417be..13d8fc81e90 100644
--- a/dev/tasks/linux-packages/apache-arrow/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -84,8 +84,6 @@ class ApacheArrowPackageTask < PackageTask
   end
 
   def apt_arm64_cuda_available_target?(target)
-    # ubuntu-20.10 has navidia-cuda-toolkit but not libcuda1.
-    # ubuntu-21.04 may support this.
     false
   end
 
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
index 761c44406e6..b35af302db8 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=debian:bullseye
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
@@ -56,6 +54,8 @@ RUN \
     libgrpc++-dev \
     libgtest-dev \
     liblz4-dev \
+    libprotoc-dev \
+    libprotobuf-dev \
     libre2-dev \
     libsnappy-dev \
     libssl-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
index a5c8456e87e..0c681ba7c2a 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=debian:buster
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
index 60be9295194..af5aac1ed86 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=ubuntu:bionic
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index 8fe70edb6ad..c8f1794afe0 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=ubuntu:focal
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
index 5209be29fb5..8d36a5f80ae 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=ubuntu:groovy
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
@@ -53,6 +51,8 @@ RUN \
     libgrpc++-dev \
     libgtest-dev \
     liblz4-dev \
+    libprotoc-dev \
+    libprotobuf-dev \
     libre2-dev \
     libsnappy-dev \
     libssl-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
new file mode 100644
index 00000000000..f19ea9022e5
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:hirsute
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
new file mode 100644
index 00000000000..0b8f2499b5b
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:hirsute
+FROM ${FROM}
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    ccache \
+    clang \
+    cmake \
+    debhelper \
+    devscripts \
+    git \
+    gtk-doc-tools \
+    libboost-filesystem-dev \
+    libboost-system-dev \
+    libbrotli-dev \
+    libbz2-dev \
+    libcurl4-openssl-dev \
+    libgirepository1.0-dev \
+    libglib2.0-doc \
+    libgmock-dev \
+    libgoogle-glog-dev \
+    libgrpc++-dev \
+    libgtest-dev \
+    liblz4-dev \
+    libprotoc-dev \
+    libprotobuf-dev \
+    libre2-dev \
+    libsnappy-dev \
+    libssl-dev \
+    libthrift-dev \
+    libutf8proc-dev \
+    libzstd-dev \
+    llvm-dev \
+    lsb-release \
+    ninja-build \
+    pkg-config \
+    protobuf-compiler-grpc \
+    python3-dev \
+    python3-numpy \
+    python3-pip \
+    python3-setuptools \
+    rapidjson-dev \
+    tzdata \
+    zlib1g-dev && \
+  if apt list | grep -q '^libcuda1'; then \
+    apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+  else \
+    :; \
+  fi && \
+  apt clean && \
+  python3 -m pip install --no-use-pep517 meson && \
+  ln -s /usr/local/bin/meson /usr/bin/ && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index d9e08a20452..b846abd6f06 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -18,6 +18,8 @@ Build-Depends:
 @USE_SYSTEM_GRPC@  libgrpc++-dev,
   libgtest-dev,
   liblz4-dev,
+@USE_SYSTEM_GRPC@  libprotoc-dev,
+@USE_SYSTEM_GRPC@  libprotobuf-dev,
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index 8c6c9d66d25..d4c56a50235 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=centos:7
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 ARG DEBUG
 
 RUN \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
index 66c435c333d..bb30de16b6b 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
@@ -18,8 +18,6 @@
 ARG FROM=centos:8
 FROM ${FROM}
 
-COPY qemu-* /usr/bin/
-
 ARG DEBUG
 
 RUN \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static
deleted file mode 100755
index c42e0962def..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/qemu-dummy-static
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
-# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
-# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
-# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
-#
-# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
-# the "COPY" is failed. It means that we always require "qemu*" even if we
-# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
-# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
-# need.
-#
-# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
-# Azure Pipelines uses old Ubuntu (18.04).
-# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/github.linux.amd64.yml b/dev/tasks/linux-packages/github.linux.amd64.yml
index 4fa056c18c7..9cd54748361 100644
--- a/dev/tasks/linux-packages/github.linux.amd64.yml
+++ b/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -22,6 +22,13 @@
 jobs:
   package:
     name: Package
+    # We can't use Ubuntu 20.04 because it doesn't ship neither
+    # createrepo nor createrepo_c. We'll be able to use Ubuntu
+    # 22.04. It will ship createrepo_c. Or we can build createrepo_c
+    # from source like we do in travis.linux.arm64.yml.
+    #
+    # Note that createrepo or createrepo_c is only needed to test Yum
+    # repository.
     runs-on: ubuntu-18.04
     steps:
       {{ macros.github_checkout_arrow()|indent }}
@@ -30,7 +37,7 @@ jobs:
       - name: Set up Ruby
         uses: ruby/setup-ruby@v1
         with:
-          ruby-version: '2.6'
+          ruby-version: '3.0'
       - name: Free Up Disk Space
         shell: bash
         run: arrow/ci/scripts/util_cleanup.sh
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index 59f34593501..b3227bf84b5 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -257,6 +257,8 @@ def apt_targets_default
       # "ubuntu-focal-arm64",
       "ubuntu-groovy",
       # "ubuntu-groovy-arm64",
+      "ubuntu-hirsute",
+      # "ubuntu-hirsute-arm64",
     ]
   end
 
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index 6078942e737..b3f7ec75d2a 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -37,6 +37,10 @@ addons:
       - rake
       - rpm
 
+      # https://bugs.launchpad.net/ubuntu/+source/glibc/+bug/1916485
+      # We need to use runc 1.0.0~rc93 or later from focal-updated.
+      - runc
+
       # To build createrepo_c from source.
       # We can remove them when we can install createrepo_c package
       - cmake
@@ -52,6 +56,7 @@ addons:
       - libzstd-dev
       - pkg-config
       - zlib1g-dev
+    update: true
 
 services:
   - docker
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index b074407cc63..d48da7dc114 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -389,11 +389,27 @@ tasks:
 
 {############################## Linux PKGS ####################################}
 
-  debian-buster-amd64:
+{% for target in ["debian-buster",
+                  "debian-bullseye",
+                  "ubuntu-bionic",
+                  "ubuntu-focal",
+                  "ubuntu-groovy",
+                  "ubuntu-hirsute"] %}
+  {% for architecture in ["amd64", "arm64"] %}
+  {{ target }}-{{ architecture }}:
+    {% if architecture == "amd64" %}
     ci: github
     template: linux-packages/github.linux.amd64.yml
+    {% else %}
+    ci: travis
+    template: linux-packages/travis.linux.arm64.yml
+    {% endif %}
     params:
-      target: "debian-buster"
+    {% if architecture == "amd64" %}
+      target: "{{ target }}"
+    {% else %}
+      target: "{{ target }}-arm64"
+    {% endif %}
       task_namespace: "apt"
       upload_extensions:
         - .ddeb
@@ -402,6 +418,7 @@ tasks:
         - .dsc
         - .orig.tar.gz
     artifacts:
+    {% if architecture == "amd64" %}
       - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
       - apache-arrow-apt-source_{no_rc_version}-1.dsc
       - apache-arrow-apt-source_{no_rc_version}-1_all.deb
@@ -409,82 +426,11 @@ tasks:
       - apache-arrow_{no_rc_version}-1.debian.tar.xz
       - apache-arrow_{no_rc_version}-1.dsc
       - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
-
-  debian-buster-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "debian-buster-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
+    {% endif %}
       - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
@@ -492,6 +438,7 @@ tasks:
       - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
@@ -500,6 +447,9 @@ tasks:
       - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
@@ -518,72 +468,15 @@ tasks:
       - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-
-  debian-bullseye-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "debian-bullseye"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-apt-source_{no_rc_version}-1.dsc
-      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
-      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+    {% if architecture == "amd64" %}
       - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
@@ -593,381 +486,9 @@ tasks:
       - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
-
-  debian-bullseye-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "debian-bullseye-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-bionic-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "ubuntu-bionic"
-      task_namespace: "apt"
-      env:
-        UBUNTU: 18.04
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-apt-source_{no_rc_version}-1.dsc
-      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
-      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-bionic-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "ubuntu-bionic-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-focal-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "ubuntu-focal"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-apt-source_{no_rc_version}-1.dsc
-      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
-      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-focal-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "ubuntu-focal-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-groovy-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "ubuntu-groovy"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow-apt-source_{no_rc_version}-1.dsc
-      - apache-arrow-apt-source_{no_rc_version}-1_all.deb
-      - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
-      - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
-
-  ubuntu-groovy-arm64:
-    ci: travis
-    template: linux-packages/travis.linux.arm64.yml
-    params:
-      target: "ubuntu-groovy-arm64"
-      task_namespace: "apt"
-      upload_extensions:
-        - .ddeb
-        - .deb
-        - .debian.tar.xz
-        - .dsc
-        - .orig.tar.gz
-    artifacts:
-      - apache-arrow_{no_rc_version}-1.debian.tar.xz
-      - apache-arrow_{no_rc_version}-1.dsc
-      - apache-arrow_{no_rc_version}.orig.tar.gz
-      - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
+    {% endif %}
+  {% endfor %}
+{% endfor %}
 
   centos-7-amd64:
     ci: github

From bf187e4bbe2c87e57929740068521863a9a21ffc Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sat, 1 May 2021 07:21:35 +0900
Subject: [PATCH 160/719] ARROW-12605: [Documentation] Update line numbers in
 cpp/dataset.rst

Use the start-after/end-before properties so we aren't hardcoding (as many) line numbers in the documentation.

Closes #10209 from lidavidm/arrow-12605

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../arrow/dataset_documentation_example.cc    | 18 ++++++++
 docs/source/cpp/dataset.rst                   | 46 ++++++++++++-------
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/cpp/examples/arrow/dataset_documentation_example.cc b/cpp/examples/arrow/dataset_documentation_example.cc
index 0fb4ad2f627..1aac66d4a6c 100644
--- a/cpp/examples/arrow/dataset_documentation_example.cc
+++ b/cpp/examples/arrow/dataset_documentation_example.cc
@@ -48,6 +48,7 @@ namespace cp = arrow::compute;
     }                                              \
   } while (0);
 
+// (Doc section: Reading Datasets)
 // Generate some data for the rest of this example.
 std::shared_ptr<arrow::Table> CreateTable() {
   auto schema =
@@ -84,7 +85,9 @@ std::string CreateExampleParquetDataset(const std::shared_ptr<fs::FileSystem>& f
       *table->Slice(5), arrow::default_memory_pool(), output, /*chunk_size=*/2048));
   return base_path;
 }
+// (Doc section: Reading Datasets)
 
+// (Doc section: Reading different file formats)
 // Set up a dataset by writing two Feather files.
 std::string CreateExampleFeatherDataset(const std::shared_ptr<fs::FileSystem>& filesystem,
                                         const std::string& root_path) {
@@ -103,7 +106,9 @@ std::string CreateExampleFeatherDataset(const std::shared_ptr<fs::FileSystem>& f
   ABORT_ON_FAILURE(writer->Close());
   return base_path;
 }
+// (Doc section: Reading different file formats)
 
+// (Doc section: Reading and writing partitioned data)
 // Set up a dataset by writing files with partitioning
 std::string CreateExampleParquetHivePartitionedDataset(
     const std::shared_ptr<fs::FileSystem>& filesystem, const std::string& root_path) {
@@ -148,7 +153,9 @@ std::string CreateExampleParquetHivePartitionedDataset(
   ABORT_ON_FAILURE(ds::FileSystemDataset::Write(write_options, scanner));
   return base_path;
 }
+// (Doc section: Reading and writing partitioned data)
 
+// (Doc section: Dataset discovery)
 // Read the whole dataset with the given format, without partitioning.
 std::shared_ptr<arrow::Table> ScanWholeDataset(
     const std::shared_ptr<fs::FileSystem>& filesystem,
@@ -169,7 +176,9 @@ std::shared_ptr<arrow::Table> ScanWholeDataset(
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
+// (Doc section: Dataset discovery)
 
+// (Doc section: Filtering data)
 // Read a dataset, but select only column "b" and only rows where b < 4.
 //
 // This is useful when you only want a few columns from a dataset. Where possible,
@@ -190,7 +199,9 @@ std::shared_ptr<arrow::Table> FilterAndSelectDataset(
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
+// (Doc section: Filtering data)
 
+// (Doc section: Projecting columns)
 // Read a dataset, but with column projection.
 //
 // This is useful to derive new columns from existing data. For example, here we
@@ -222,7 +233,9 @@ std::shared_ptr<arrow::Table> ProjectDataset(
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
+// (Doc section: Projecting columns)
 
+// (Doc section: Projecting columns #2)
 // Read a dataset, but with column projection.
 //
 // This time, we read all original columns plus one derived column. This simply combines
@@ -253,7 +266,9 @@ std::shared_ptr<arrow::Table> SelectAndProjectDataset(
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
+// (Doc section: Projecting columns #2)
 
+// (Doc section: Reading and writing partitioned data #2)
 // Read an entire dataset, but with partitioning information.
 std::shared_ptr<arrow::Table> ScanPartitionedDataset(
     const std::shared_ptr<fs::FileSystem>& filesystem,
@@ -278,7 +293,9 @@ std::shared_ptr<arrow::Table> ScanPartitionedDataset(
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
+// (Doc section: Reading and writing partitioned data #2)
 
+// (Doc section: Reading and writing partitioned data #3)
 // Read an entire dataset, but with partitioning information. Also, filter the dataset on
 // the partition values.
 std::shared_ptr<arrow::Table> FilterPartitionedDataset(
@@ -300,6 +317,7 @@ std::shared_ptr<arrow::Table> FilterPartitionedDataset(
   auto scanner = scan_builder->Finish().ValueOrDie();
   return scanner->ToTable().ValueOrDie();
 }
+// (Doc section: Reading and writing partitioned data #3)
 
 int main(int argc, char** argv) {
   if (argc < 3) {
diff --git a/docs/source/cpp/dataset.rst b/docs/source/cpp/dataset.rst
index be33e892c2f..8c16592e4d9 100644
--- a/docs/source/cpp/dataset.rst
+++ b/docs/source/cpp/dataset.rst
@@ -52,7 +52,8 @@ of a directory with two parquet files:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 50-85
+   :start-after: (Doc section: Reading Datasets)
+   :end-before: (Doc section: Reading Datasets)
    :linenos:
    :lineno-match:
 
@@ -68,7 +69,8 @@ given a base directory path:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 151-165
+   :start-after: (Doc section: Dataset discovery)
+   :end-before: (Doc section: Dataset discovery)
    :emphasize-lines: 6-11
    :linenos:
    :lineno-match:
@@ -103,7 +105,8 @@ method:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 151-170
+   :start-after: (Doc section: Dataset discovery)
+   :end-before: (Doc section: Dataset discovery)
    :emphasize-lines: 16-19
    :linenos:
    :lineno-match:
@@ -126,16 +129,19 @@ If we save the table as Feather files instead of Parquet files:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 87-104
+   :start-after: (Doc section: Reading different file formats)
+   :end-before: (Doc section: Reading different file formats)
    :linenos:
    :lineno-match:
 
 …then we can read the Feather file by passing an :class:`arrow::dataset::IpcFileFormat`:
 
-.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
-   :language: cpp
-   :lines: 318,334
-   :linenos:
+.. code-block:: cpp
+
+    auto format = std::make_shared<ds::ParquetFileFormat>();
+    // ...
+    auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                       .ValueOrDie();
 
 Customizing file formats
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -169,7 +175,8 @@ which columns to read:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 172-191
+   :start-after: (Doc section: Filtering data)
+   :end-before: (Doc section: Filtering data)
    :emphasize-lines: 16
    :linenos:
    :lineno-match:
@@ -184,7 +191,8 @@ reduce the amount of I/O needed.
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 172-191
+   :start-after: (Doc section: Filtering data)
+   :end-before: (Doc section: Filtering data)
    :emphasize-lines: 17
    :linenos:
    :lineno-match:
@@ -204,7 +212,8 @@ and a vector of names for the columns:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 193-223
+   :start-after: (Doc section: Projecting columns)
+   :end-before: (Doc section: Projecting columns)
    :emphasize-lines: 18-28
    :linenos:
    :lineno-match:
@@ -216,7 +225,8 @@ dataset schema:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 225-254
+   :start-after: (Doc section: Projecting columns #2)
+   :end-before: (Doc section: Projecting columns #2)
    :emphasize-lines: 17-27
    :linenos:
    :lineno-match:
@@ -268,7 +278,8 @@ writing functionality.
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 106-149
+   :start-after: (Doc section: Reading and writing partitioned data)
+   :end-before: (Doc section: Reading and writing partitioned data)
    :emphasize-lines: 25-42
    :linenos:
    :lineno-match:
@@ -282,7 +293,8 @@ partitioning scheme:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 256-279
+   :start-after: (Doc section: Reading and writing partitioned data #2)
+   :end-before: (Doc section: Reading and writing partitioned data #2)
    :emphasize-lines: 7,9-11
    :linenos:
    :lineno-match:
@@ -316,7 +328,8 @@ altogether if they do not match the filter:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 281-301
+   :start-after: (Doc section: Reading and writing partitioned data #3)
+   :end-before: (Doc section: Reading and writing partitioned data #3)
    :emphasize-lines: 15-18
    :linenos:
    :lineno-match:
@@ -377,7 +390,8 @@ disk which was used in the rest of the example:
 
 .. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
    :language: cpp
-   :lines: 106-149
+   :start-after: Reading and writing partitioned data
+   :end-before: Reading and writing partitioned data
    :emphasize-lines: 24-28
    :linenos:
    :lineno-match:

From eec855dcd613afbfb637f7f56cdc68f88b842c52 Mon Sep 17 00:00:00 2001
From: Zachary Blackwood <zachary.blackwood@dtn.com>
Date: Sat, 1 May 2021 07:47:24 +0900
Subject: [PATCH 161/719] MINOR: [Python] Fix documentation to
 allow_truncated_timestamps

This surprised me when trying to use this flag to avoid this exception:

```python
df.to_parquet("test.parquet")
*** pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=UTC] to timestamp[us] would lose data:
1520751599999999999

df.to_parquet("test.parquet", coerce_timestamps='us')
*** pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=UTC] to timestamp[us] would lose data:
1520751599999999999

df.to_parquet("test.parquet", allow_truncated_timestamps=True)
*** pyarrow.lib.ArrowInvalid: Casting from timestamp[ns, tz=UTC] to timestamp[us] would lose data:
1520751599999999999

df.to_parquet("test.parquet", coerce_timestamps='us', allow_truncated_timestamps=True)
# Succeeded without exception
```

Closes #10211 from blackary/fix-allow-truncated-timestamps-doc

Authored-by: Zachary Blackwood <zachary.blackwood@dtn.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 python/pyarrow/parquet.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 4564740bc83..88683d95013 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -510,7 +510,9 @@ def _sanitize_table(table, new_schema, flavor):
 allow_truncated_timestamps : bool, default False
     Allow loss of data when coercing timestamps to a particular
     resolution. E.g. if microsecond or nanosecond data is lost when coercing to
-    'ms', do not raise an exception.
+    'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+    will NOT result in the truncation exception being ignored unless
+    ``coerce_timestamps`` is not None.
 compression : str or dict
     Specify the compression codec, either on a general basis or per-column.
     Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.

From 2746266addddf71d20a4fe49381497b894c4d15c Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Sat, 1 May 2021 08:02:27 +0900
Subject: [PATCH 162/719] ARROW-12591: [Java][Gandiva] Create single Gandiva
 jar for MacOS and Linux

Today, there are two different tasks that generate Gandiva's jars for MacOS and Linux. The objective is to create a single jar with the shared lib for the two operating systems.

Closes #10189 from anthonylouisbsb/feature/create-single-jar-macos-linux

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/gandiva-jars/build-java.sh          |  38 +-----
 .../gandiva-jars/check-shared-dependencies.sh |  58 +++++++++
 dev/tasks/gandiva-jars/github.linux.yml       |  47 --------
 dev/tasks/gandiva-jars/github.osx.yml         |  46 --------
 dev/tasks/gandiva-jars/github.yml             | 111 ++++++++++++++++++
 dev/tasks/tasks.yml                           |  10 +-
 6 files changed, 175 insertions(+), 135 deletions(-)
 create mode 100755 dev/tasks/gandiva-jars/check-shared-dependencies.sh
 delete mode 100644 dev/tasks/gandiva-jars/github.linux.yml
 delete mode 100644 dev/tasks/gandiva-jars/github.osx.yml
 create mode 100644 dev/tasks/gandiva-jars/github.yml

diff --git a/dev/tasks/gandiva-jars/build-java.sh b/dev/tasks/gandiva-jars/build-java.sh
index 7dec07115a3..79af606d3d0 100755
--- a/dev/tasks/gandiva-jars/build-java.sh
+++ b/dev/tasks/gandiva-jars/build-java.sh
@@ -22,43 +22,13 @@ set -e
 CPP_BUILD_DIR=$GITHUB_WORKSPACE/arrow/dist/
 
 pushd java
-  if [[ $OS_NAME == "linux" ]]; then
-    SO_DEP=ldd
-    GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.so
-    WHITELIST=(linux-vdso libz librt libdl libpthread libstdc++ libm libgcc_s libc ld-linux-x86-64)
-  else
-    SO_DEP="otool -L"
-    GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.dylib
-    WHITELIST=(libgandiva_jni libz libncurses libSystem libc++)
-  fi
-
-  # print the shared library dependencies
-  eval "$SO_DEP" "$GANDIVA_LIB"
-
-  if [[ $CHECK_SHARED_DEPENDENCIES ]] ; then
-    # exit if any shared library not in whitelisted set is found
-    echo "Checking shared dependencies"
-    while read -r line
-    do
-      found=false
-      for item in "${WHITELIST[@]}"
-      do
-        if [[ "$line" == *"$item"* ]] ; then
-            found=true
-        fi
-      done
-      if [[ "$found" == false ]] ; then
-        echo "Unexpected shared dependency found"
-        exit 1
-      fi
-    done < <(eval "$SO_DEP" "$GANDIVA_LIB" | awk '{print $1}')
-  fi
-
   # build the entire project
   mvn clean install -q -DskipTests -P arrow-jni -Darrow.cpp.build.dir=$CPP_BUILD_DIR
   # test only gandiva
   mvn test -q -P arrow-jni -pl gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR
 
-  # copy the jars to distribution folder
-  find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $CPP_BUILD_DIR \;
+  if [[ $COPY_JAR_TO_DISTRIBUTION_FOLDER ]] ; then
+    # copy the jars to distribution folder
+    find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $CPP_BUILD_DIR \;
+  fi
 popd
diff --git a/dev/tasks/gandiva-jars/check-shared-dependencies.sh b/dev/tasks/gandiva-jars/check-shared-dependencies.sh
new file mode 100755
index 00000000000..ce93ff57183
--- /dev/null
+++ b/dev/tasks/gandiva-jars/check-shared-dependencies.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+CPP_BUILD_DIR=$GITHUB_WORKSPACE/arrow/dist/
+
+if [[ $OS_NAME == "linux" ]]; then
+  SO_DEP=ldd
+  GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.so
+  WHITELIST=(linux-vdso libz librt libdl libpthread libstdc++ libm libgcc_s libc ld-linux-x86-64)
+else
+  SO_DEP="otool -L"
+  GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.dylib
+  WHITELIST=(libgandiva_jni libz libncurses libSystem libc++)
+fi
+
+# print the shared library dependencies
+$SO_DEP "$GANDIVA_LIB" | tee dependencies_temp_file.txt 
+
+if [[ $CHECK_SHARED_DEPENDENCIES ]] ; then
+  # exit if any shared library not in whitelisted set is found
+  echo "Checking shared dependencies"
+
+  awk '{print $1}' dependencies_temp_file.txt | \
+  while read -r line
+  do
+    found=false
+    
+    for item in "${WHITELIST[@]}"
+    do
+    if [[ "$line" == *"$item"* ]] ; then
+        found=true
+    fi
+    done
+
+    if [[ "$found" == false ]] ; then
+      echo "Unexpected shared dependency found $line"
+      exit 1
+    fi
+  done
+fi
\ No newline at end of file
diff --git a/dev/tasks/gandiva-jars/github.linux.yml b/dev/tasks/gandiva-jars/github.linux.yml
deleted file mode 100644
index aabcdbee0ef..00000000000
--- a/dev/tasks/gandiva-jars/github.linux.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  package:
-    name: Package Gandiva
-    runs-on: ubuntu-18.04
-    steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
-      - name: Build Gandiva
-        run: |
-          python3 -VV
-          cd arrow
-          mkdir -p dist
-          export CC="gcc-4.9" CXX="g++-4.9"
-          ulimit -c unlimited -S
-          set -e
-          docker run -v $PWD:/arrow quay.io/anthonylouisbsb/arrow:gandivadocker /arrow/dev/tasks/gandiva-jars/build-cpp-linux.sh
-          dev/tasks/gandiva-jars/build-java.sh
-        env:
-          OS_NAME: "linux"
-          CHECK_SHARED_DEPENDENCIES: true
-
-      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/gandiva-jars/github.osx.yml b/dev/tasks/gandiva-jars/github.osx.yml
deleted file mode 100644
index 3dd6fe46bb6..00000000000
--- a/dev/tasks/gandiva-jars/github.osx.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  package:
-    name: Package Gandiva
-    runs-on: macos-latest
-    steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
-      - name: Build Gandiva
-        run: |
-          cd arrow
-          mkdir -p dist
-          export ARROW_TEST_DATA=$PWD/testing/data
-          set -e
-          dev/tasks/gandiva-jars/build-cpp-osx.sh
-          dev/tasks/gandiva-jars/build-java.sh
-        env:
-          OS_NAME: "osx"
-          CHECK_SHARED_DEPENDENCIES: true
-          MACOSX_DEPLOYMENT_TARGET: "10.11"
-
-      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/gandiva-jars/github.yml b/dev/tasks/gandiva-jars/github.yml
new file mode 100644
index 00000000000..a1ac093c47b
--- /dev/null
+++ b/dev/tasks/gandiva-jars/github.yml
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  build-cpp-ubuntu:
+    name: Build C++ Gandiva Libs Ubuntu
+    runs-on: ubuntu-18.04
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
+          git -C arrow submodule update --init --recursive
+      - name: Build Gandiva
+        run: |
+          python3 -VV
+          cd arrow
+          mkdir -p dist
+          export CC="gcc-4.9" CXX="g++-4.9"
+          ulimit -c unlimited -S
+          set -e
+          docker run -v $PWD:/arrow quay.io/anthonylouisbsb/arrow:gandivadocker /arrow/dev/tasks/gandiva-jars/build-cpp-linux.sh
+          dev/tasks/gandiva-jars/check-shared-dependencies.sh
+        env:
+          OS_NAME: "linux"
+          CHECK_SHARED_DEPENDENCIES: true
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: ubuntu-shared-lib
+          path: arrow/dist/libgandiva_jni.so
+  build-cpp-macos:
+    name: Build C++ Gandiva Libs MacOS
+    runs-on: macos-latest
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
+          git -C arrow submodule update --init --recursive
+      - name: Build Gandiva
+        run: |
+          cd arrow
+          mkdir -p dist
+          export ARROW_TEST_DATA=$PWD/testing/data
+          set -e
+          dev/tasks/gandiva-jars/build-cpp-osx.sh
+          dev/tasks/gandiva-jars/check-shared-dependencies.sh
+        env:
+          OS_NAME: "osx"
+          CHECK_SHARED_DEPENDENCIES: true
+          MACOSX_DEPLOYMENT_TARGET: "10.11"
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: macos-shared-lib
+          path: arrow/dist/libgandiva_jni.dylib
+  package-jar:
+    name: Build Gandiva Jar
+    runs-on: macos-latest
+    needs: [build-cpp-macos, build-cpp-ubuntu]
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
+          git -C arrow submodule update --init --recursive
+          mkdir -p arrow/dist
+      - name: Download Linux Gandiva Library
+        uses: actions/download-artifact@v2
+        with:
+          name: ubuntu-shared-lib
+          path: arrow/dist
+      - name: Download MacOS Gandiva Library
+        uses: actions/download-artifact@v2
+        with:
+          name: macos-shared-lib
+          path: arrow/dist
+      - name: Build Gandiva Jar
+        run: |
+          cd arrow
+          export ARROW_TEST_DATA=$PWD/testing/data
+          set -e
+          dev/tasks/gandiva-jars/build-java.sh
+        env:
+          OS_NAME: "osx"
+          COPY_JAR_TO_DISTRIBUTION_FOLDER: true
+          MACOSX_DEPLOYMENT_TARGET: "10.11"
+  
+      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index d48da7dc114..c0a9fe69d26 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -658,15 +658,9 @@ tasks:
 
   ############################## Gandiva Tasks ################################
 
-  gandiva-jar-ubuntu:
+  gandiva-jar:
     ci: github
-    template: gandiva-jars/github.linux.yml
-    artifacts:
-      - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
-
-  gandiva-jar-osx:
-    ci: github
-    template: gandiva-jars/github.osx.yml
+    template: gandiva-jars/github.yml
     artifacts:
       - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
 

From 9808f914c4e10f1edcb20ffb6065f97bf9a79093 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sat, 1 May 2021 22:26:15 +0800
Subject: [PATCH 163/719] ARROW-12581: [C++][FlightRPC] Allow benchmarking
 DoPut with a data file

Closes #10212 from lidavidm/arrow-12581

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/flight/flight_benchmark.cc | 98 ++++++++++++++++--------
 1 file changed, 68 insertions(+), 30 deletions(-)

diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index f9f60c40c91..288f03c1055 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -23,6 +23,7 @@
 
 #include <gflags/gflags.h>
 
+#include "arrow/io/file.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
 #include "arrow/record_batch.h"
@@ -58,6 +59,9 @@ DEFINE_string(compression, "",
               "Leave blank to disable compression.\n"
               "E.g., \"zstd\":   zstd with default compression level.\n"
               "      \"zstd:7\": zstd with compression leve = 7.\n");
+DEFINE_string(
+    data_file, "",
+    "Instead of random data, use data from the given IPC file. Only affects -test_put.");
 
 namespace perf = arrow::flight::perf;
 
@@ -164,29 +168,35 @@ arrow::Result<PerformanceResult> RunDoGetTest(FlightClient* client,
   return PerformanceResult{num_batches, num_records, num_bytes};
 }
 
-arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
-                                              const FlightCallOptions& call_options,
-                                              const perf::Token& token,
-                                              const FlightEndpoint& endpoint,
-                                              PerformanceStats* stats) {
-  std::unique_ptr<FlightStreamWriter> writer;
-  std::unique_ptr<FlightMetadataReader> reader;
+struct SizedBatch {
+  std::shared_ptr<arrow::RecordBatch> batch;
+  int64_t bytes;
+};
+
+arrow::Result<std::vector<SizedBatch>> GetPutData(const perf::Token& token) {
+  if (!FLAGS_data_file.empty()) {
+    ARROW_ASSIGN_OR_RAISE(auto file, arrow::io::ReadableFile::Open(FLAGS_data_file));
+    ARROW_ASSIGN_OR_RAISE(auto reader,
+                          arrow::ipc::RecordBatchFileReader::Open(std::move(file)));
+    std::vector<SizedBatch> batches(reader->num_record_batches());
+    for (int i = 0; i < reader->num_record_batches(); i++) {
+      ARROW_ASSIGN_OR_RAISE(batches[i].batch, reader->ReadRecordBatch(i));
+      RETURN_NOT_OK(arrow::ipc::GetRecordBatchSize(*batches[i].batch, &batches[i].bytes));
+    }
+    return batches;
+  }
+
   std::shared_ptr<Schema> schema =
       arrow::schema({field("a", int64()), field("b", int64()), field("c", int64()),
                      field("d", int64())});
-  RETURN_NOT_OK(
-      client->DoPut(call_options, FlightDescriptor{}, schema, &writer, &reader));
 
   // This is hard-coded for right now, 4 columns each with int64
   const int bytes_per_record = 32;
 
-  int64_t num_bytes = 0;
-  int64_t num_records = 0;
-  int64_t num_batches = 0;
-
   std::shared_ptr<ResizableBuffer> buffer;
   std::vector<std::shared_ptr<Array>> arrays;
 
+  const int64_t total_records = token.definition().records_per_stream();
   const int32_t length = token.definition().records_per_batch();
   const int32_t ncolumns = 4;
   for (int i = 0; i < ncolumns; ++i) {
@@ -197,32 +207,55 @@ arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
   }
 
   std::shared_ptr<RecordBatch> batch = RecordBatch::Make(schema, length, arrays);
+  std::vector<SizedBatch> batches;
 
   int64_t records_sent = 0;
-  const int64_t total_records = token.definition().records_per_stream();
-  StopWatch timer;
   while (records_sent < total_records) {
     if (records_sent + length > total_records) {
       const int last_length = total_records - records_sent;
-      RETURN_NOT_OK(writer->WriteRecordBatch(*(batch->Slice(0, last_length))));
-      num_records += last_length;
       // Hard-coded
-      num_bytes += last_length * bytes_per_record;
+      batches.push_back(SizedBatch{batch->Slice(0, last_length),
+                                   /*bytes=*/last_length * bytes_per_record});
       records_sent += last_length;
     } else {
-      timer.Start();
-      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
-      stats->AddLatency(timer.Stop());
-      num_records += length;
       // Hard-coded
-      num_bytes += length * bytes_per_record;
+      batches.push_back(SizedBatch{batch, /*bytes=*/length * bytes_per_record});
       records_sent += length;
     }
-    ++num_batches;
   }
+  return batches;
+}
 
+arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
+                                              const FlightCallOptions& call_options,
+                                              const perf::Token& token,
+                                              const FlightEndpoint& endpoint,
+                                              PerformanceStats* stats) {
+  ARROW_ASSIGN_OR_RAISE(const auto batches, GetPutData(token));
+  StopWatch timer;
+  int64_t num_records = 0;
+  int64_t num_bytes = 0;
+  std::unique_ptr<FlightStreamWriter> writer;
+  std::unique_ptr<FlightMetadataReader> reader;
+  RETURN_NOT_OK(client->DoPut(call_options, FlightDescriptor{},
+                              batches[0].batch->schema(), &writer, &reader));
+  for (size_t i = 0; i < batches.size(); i++) {
+    auto batch = batches[i];
+    auto is_last = i == (batches.size() - 1);
+    if (is_last) {
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch.batch));
+      num_records += batch.batch->num_rows();
+      num_bytes += batch.bytes;
+    } else {
+      timer.Start();
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch.batch));
+      stats->AddLatency(timer.Stop());
+      num_records += batch.batch->num_rows();
+      num_bytes += batch.bytes;
+    }
+  }
   RETURN_NOT_OK(writer->Close());
-  return PerformanceResult{num_batches, num_records, num_bytes};
+  return PerformanceResult{static_cast<int64_t>(batches.size()), num_records, num_bytes};
 }
 
 Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_options,
@@ -283,12 +316,13 @@ Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_optio
     RETURN_NOT_OK(task.status());
   }
 
-  // Check that number of rows read / written is as expected
-  int64_t records_for_run = stats->total_records - start_total_records;
-  if (records_for_run != static_cast<int64_t>(plan->total_records())) {
-    return Status::Invalid("Did not consume expected number of records");
+  if (FLAGS_data_file.empty()) {
+    // Check that number of rows read / written is as expected
+    int64_t records_for_run = stats->total_records - start_total_records;
+    if (records_for_run != static_cast<int64_t>(plan->total_records())) {
+      return Status::Invalid("Did not consume expected number of records");
+    }
   }
-
   return Status::OK();
 }
 
@@ -381,6 +415,10 @@ int main(int argc, char** argv) {
 
     call_options.write_options.codec = std::move(codec);
   }
+  if (!FLAGS_data_file.empty() && !FLAGS_test_put) {
+    std::cerr << "A data file can only be specified with \"-test_put\"" << std::endl;
+    return 1;
+  }
 
   std::unique_ptr<arrow::flight::TestServer> server;
   arrow::flight::Location location;

From 4363fefe46dc357a9013f0f4bcdc235e1e2e8124 Mon Sep 17 00:00:00 2001
From: Stephen <sgates786@gmail.com>
Date: Sat, 1 May 2021 12:11:11 -0500
Subject: [PATCH 164/719] [MINOR][Julia] Fix typo in docs

---
 julia/Arrow/docs/src/manual.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/julia/Arrow/docs/src/manual.md b/julia/Arrow/docs/src/manual.md
index 8f49f14f302..b819a1b3b73 100644
--- a/julia/Arrow/docs/src/manual.md
+++ b/julia/Arrow/docs/src/manual.md
@@ -2,7 +2,7 @@
 
 The goal of this documentation is to provide a brief introduction to the arrow data format, then provide a walk-through of the functionality provided in the Arrow.jl Julia package, with an aim to expose a little of the machinery "under the hood" to help explain how things work and how that influences real-world use-cases for the arrow data format.
 
-The best place to learn about the Apache arrow project is [the website itself](https://arrow.apache.org/), specifically the data format [specification](https://arrow.apache.org/docs/format/Columnar.html). Put briefly, the arrow project provides a formal speficiation for how columnar, "table" data can be laid out efficiently in memory to standardize and maximize the ability to share data across languages/platforms. In the current [apache/arrow GitHub repository](https://github.com/apache/arrow), language implementations exist for C++, Java, Go, Javascript, Rust, to name a few. Other database vendors and data processing frameworks/applications have also built support for the arrow format, allowing for a wide breadth of possibility for applications to "speak the data language" of arrow.
+The best place to learn about the Apache arrow project is [the website itself](https://arrow.apache.org/), specifically the data format [specification](https://arrow.apache.org/docs/format/Columnar.html). Put briefly, the arrow project provides a formal specification for how columnar, "table" data can be laid out efficiently in memory to standardize and maximize the ability to share data across languages/platforms. In the current [apache/arrow GitHub repository](https://github.com/apache/arrow), language implementations exist for C++, Java, Go, Javascript, Rust, to name a few. Other database vendors and data processing frameworks/applications have also built support for the arrow format, allowing for a wide breadth of possibility for applications to "speak the data language" of arrow.
 
 The [Arrow.jl](https://github.com/JuliaData/Arrow.jl) Julia package is another implementation, allowing the ability to both read and write data in the arrow format. As a data format, arrow specifies an exact memory layout to be used for columnar table data, and as such, "reading" involves custom Julia objects ([`Arrow.Table`](@ref) and [`Arrow.Stream`](@ref)), which read the *metadata* of an "arrow memory blob", then *wrap* the array data contained therein, having learned the type and size, amongst other properties, from the metadata. Let's take a closer look at what this "reading" of arrow memory really means/looks like.
 

From 06c751b8d8b0e692b80ff3402eb5f7375f4ec6ca Mon Sep 17 00:00:00 2001
From: "Jorge C. Leitao" <jorgecarleitao@gmail.com>
Date: Mon, 3 May 2021 13:58:09 +0200
Subject: [PATCH 165/719] ARROW-12444: [Rust] Remove rust
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes rust from this repository and uses git clone to fetch the arrow-rs repo for integration tests.

Each commit is an independent change.

Note: Rust implementations have moved to:
* https://github.com/apache/arrow-rs
* https://github.com/apache/arrow-datafusion

Closes #10096 from jorgecarleitao/rm-rust

Authored-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .env                                          |     1 -
 .github/workflows/cancel.yml                  |     7 -
 .github/workflows/dev.yml                     |     2 +-
 .github/workflows/dev_pr/labeler.yml          |    11 -
 .github/workflows/integration.yml             |     5 +
 .github/workflows/rust.yml                    |   480 -
 .pre-commit-config.yaml                       |     8 -
 README.md                                     |     2 +-
 ci/detect-changes.py                          |     7 +-
 ci/docker/linux-apt-lint.dockerfile           |     9 -
 dev/archery/archery/cli.py                    |     1 -
 dev/archery/archery/lang/rust.py              |    23 -
 dev/archery/archery/utils/lint.py             |    20 +-
 dev/archery/archery/utils/source.py           |     5 -
 dev/release/01-prepare-test.rb                |   226 -
 dev/release/post-07-rust.sh                   |    74 -
 dev/release/rat_exclude_files.txt             |     6 -
 dev/release/utils-prepare.sh                  |    19 -
 docs/source/developers/contributing.rst       |     2 +-
 ...atlab_interface_for_apache_arrow_design.md |     2 +-
 rust/.gitignore                               |     5 -
 rust/Cargo.toml                               |    34 -
 rust/README.md                                |   186 -
 rust/arrow-flight/Cargo.toml                  |    45 -
 rust/arrow-flight/README.md                   |    29 -
 rust/arrow-flight/build.rs                    |    54 -
 rust/arrow-flight/examples/server.rs          |   131 -
 .../arrow-flight/src/arrow.flight.protocol.rs |  1039 --
 rust/arrow-flight/src/lib.rs                  |    20 -
 rust/arrow-flight/src/utils.rs                |   167 -
 .../.cargo/config                             |    22 -
 .../.gitignore                                |     2 -
 .../Cargo.toml                                |    38 -
 .../README.md                                 |    57 -
 .../pyproject.toml                            |    20 -
 .../src/lib.rs                                |   188 -
 .../tests/test_sql.py                         |    99 -
 rust/arrow/Cargo.toml                         |   151 -
 rust/arrow/README.md                          |   206 -
 rust/arrow/benches/aggregate_kernels.rs       |    67 -
 rust/arrow/benches/arithmetic_kernels.rs      |   103 -
 rust/arrow/benches/array_from_vec.rs          |   120 -
 rust/arrow/benches/array_slice.rs             |    52 -
 rust/arrow/benches/bit_length_kernel.rs       |    46 -
 rust/arrow/benches/boolean_kernels.rs         |    51 -
 rust/arrow/benches/buffer_bit_ops.rs          |    59 -
 rust/arrow/benches/buffer_create.rs           |   190 -
 rust/arrow/benches/builder.rs                 |   116 -
 rust/arrow/benches/cast_kernels.rs            |   185 -
 rust/arrow/benches/comparison_kernels.rs      |   201 -
 rust/arrow/benches/concatenate_kernel.rs      |    66 -
 rust/arrow/benches/csv_writer.rs              |    70 -
 rust/arrow/benches/equal.rs                   |    57 -
 rust/arrow/benches/filter_kernels.rs          |   106 -
 rust/arrow/benches/json_reader.rs             |   112 -
 rust/arrow/benches/length_kernel.rs           |    47 -
 rust/arrow/benches/mutable_array.rs           |    60 -
 rust/arrow/benches/sort_kernel.rs             |   121 -
 rust/arrow/benches/take_kernels.rs            |   128 -
 rust/arrow/build.rs                           |    26 -
 rust/arrow/examples/builders.rs               |   131 -
 rust/arrow/examples/dynamic_types.rs          |   101 -
 rust/arrow/examples/read_csv.rs               |    43 -
 rust/arrow/examples/read_csv_infer_schema.rs  |    36 -
 rust/arrow/examples/tensor_builder.rs         |    67 -
 rust/arrow/format-0ed34c83.patch              |   220 -
 rust/arrow/regen.sh                           |   157 -
 rust/arrow/src/alloc/alignment.rs             |   119 -
 rust/arrow/src/alloc/mod.rs                   |   136 -
 rust/arrow/src/alloc/types.rs                 |    71 -
 rust/arrow/src/arch/avx512.rs                 |    73 -
 rust/arrow/src/arch/mod.rs                    |    22 -
 rust/arrow/src/array/array.rs                 |   640 -
 rust/arrow/src/array/array_binary.rs          |  1157 --
 rust/arrow/src/array/array_boolean.rs         |   291 -
 rust/arrow/src/array/array_dictionary.rs      |   408 -
 rust/arrow/src/array/array_list.rs            |  1056 --
 rust/arrow/src/array/array_primitive.rs       |   942 --
 rust/arrow/src/array/array_string.rs          |   528 -
 rust/arrow/src/array/array_struct.rs          |   531 -
 rust/arrow/src/array/array_union.rs           |   831 --
 rust/arrow/src/array/builder.rs               |  3171 ----
 rust/arrow/src/array/cast.rs                  |    84 -
 rust/arrow/src/array/data.rs                  |   679 -
 rust/arrow/src/array/equal/boolean.rs         |    93 -
 rust/arrow/src/array/equal/decimal.rs         |    76 -
 rust/arrow/src/array/equal/dictionary.rs      |    82 -
 rust/arrow/src/array/equal/fixed_binary.rs    |    76 -
 rust/arrow/src/array/equal/fixed_list.rs      |    80 -
 rust/arrow/src/array/equal/list.rs            |   172 -
 rust/arrow/src/array/equal/mod.rs             |  1277 --
 rust/arrow/src/array/equal/null.rs            |    31 -
 rust/arrow/src/array/equal/primitive.rs       |    73 -
 rust/arrow/src/array/equal/structure.rs       |    90 -
 rust/arrow/src/array/equal/utils.rs           |   264 -
 rust/arrow/src/array/equal/variable_size.rs   |   110 -
 rust/arrow/src/array/equal_json.rs            |  1113 --
 rust/arrow/src/array/ffi.rs                   |   168 -
 rust/arrow/src/array/iterator.rs              |   527 -
 rust/arrow/src/array/mod.rs                   |   283 -
 rust/arrow/src/array/null.rs                  |   155 -
 rust/arrow/src/array/ord.rs                   |   310 -
 rust/arrow/src/array/raw_pointer.rs           |    64 -
 rust/arrow/src/array/transform/boolean.rs     |    45 -
 .../arrow/src/array/transform/fixed_binary.rs |    65 -
 rust/arrow/src/array/transform/list.rs        |    99 -
 rust/arrow/src/array/transform/mod.rs         |  1206 --
 rust/arrow/src/array/transform/null.rs        |    26 -
 rust/arrow/src/array/transform/primitive.rs   |    40 -
 rust/arrow/src/array/transform/structure.rs   |    67 -
 rust/arrow/src/array/transform/utils.rs       |    76 -
 .../src/array/transform/variable_size.rs      |   105 -
 rust/arrow/src/bitmap.rs                      |   157 -
 rust/arrow/src/buffer/immutable.rs            |   541 -
 rust/arrow/src/buffer/mod.rs                  |    69 -
 rust/arrow/src/buffer/mutable.rs              |   749 -
 rust/arrow/src/buffer/ops.rs                  |   429 -
 rust/arrow/src/bytes.rs                       |   159 -
 rust/arrow/src/compute/kernels/aggregate.rs   |   975 --
 rust/arrow/src/compute/kernels/arithmetic.rs  |  1009 --
 rust/arrow/src/compute/kernels/arity.rs       |    74 -
 rust/arrow/src/compute/kernels/boolean.rs     |  1146 --
 rust/arrow/src/compute/kernels/cast.rs        |  3843 -----
 rust/arrow/src/compute/kernels/cast_utils.rs  |   299 -
 rust/arrow/src/compute/kernels/comparison.rs  |  1619 --
 rust/arrow/src/compute/kernels/concat.rs      |   387 -
 rust/arrow/src/compute/kernels/filter.rs      |   584 -
 rust/arrow/src/compute/kernels/length.rs      |   385 -
 rust/arrow/src/compute/kernels/limit.rs       |   200 -
 rust/arrow/src/compute/kernels/mod.rs         |    37 -
 rust/arrow/src/compute/kernels/regexp.rs      |   160 -
 rust/arrow/src/compute/kernels/sort.rs        |  2246 ---
 rust/arrow/src/compute/kernels/substring.rs   |   269 -
 rust/arrow/src/compute/kernels/take.rs        |  1621 --
 rust/arrow/src/compute/kernels/temporal.rs    |   187 -
 rust/arrow/src/compute/kernels/window.rs      |   109 -
 rust/arrow/src/compute/kernels/zip.rs         |    87 -
 rust/arrow/src/compute/mod.rs                 |    36 -
 rust/arrow/src/compute/util.rs                |   463 -
 rust/arrow/src/csv/mod.rs                     |    27 -
 rust/arrow/src/csv/reader.rs                  |  1291 --
 rust/arrow/src/csv/writer.rs                  |   651 -
 rust/arrow/src/datatypes/datatype.rs          |   477 -
 rust/arrow/src/datatypes/field.rs             |   541 -
 rust/arrow/src/datatypes/mod.rs               |  1241 --
 rust/arrow/src/datatypes/native.rs            |   333 -
 rust/arrow/src/datatypes/numeric.rs           |   534 -
 rust/arrow/src/datatypes/schema.rs            |   337 -
 rust/arrow/src/datatypes/types.rs             |   185 -
 rust/arrow/src/error.rs                       |   134 -
 rust/arrow/src/ffi.rs                         |   997 --
 rust/arrow/src/ipc/convert.rs                 |   871 --
 rust/arrow/src/ipc/gen/File.rs                |   491 -
 rust/arrow/src/ipc/gen/Message.rs             |  1346 --
 rust/arrow/src/ipc/gen/Schema.rs              |  4586 ------
 rust/arrow/src/ipc/gen/SparseTensor.rs        |  1902 ---
 rust/arrow/src/ipc/gen/Tensor.rs              |   913 --
 rust/arrow/src/ipc/gen/mod.rs                 |    31 -
 rust/arrow/src/ipc/mod.rs                     |    39 -
 rust/arrow/src/ipc/reader.rs                  |  1160 --
 rust/arrow/src/ipc/writer.rs                  |  1160 --
 rust/arrow/src/json/mod.rs                    |    27 -
 rust/arrow/src/json/reader.rs                 |  2949 ----
 rust/arrow/src/json/writer.rs                 |  1206 --
 rust/arrow/src/lib.rs                         |   162 -
 rust/arrow/src/record_batch.rs                |   434 -
 rust/arrow/src/temporal_conversions.rs        |   151 -
 rust/arrow/src/tensor.rs                      |   495 -
 rust/arrow/src/util/bench_util.rs             |   155 -
 rust/arrow/src/util/bit_chunk_iterator.rs     |   257 -
 rust/arrow/src/util/bit_util.rs               |   322 -
 rust/arrow/src/util/data_gen.rs               |   347 -
 rust/arrow/src/util/display.rs                |   298 -
 rust/arrow/src/util/integration_util.rs       |   957 --
 rust/arrow/src/util/mod.rs                    |    31 -
 rust/arrow/src/util/pretty.rs                 |   421 -
 rust/arrow/src/util/serialization.rs          |    33 -
 rust/arrow/src/util/string_writer.rs          |   105 -
 rust/arrow/src/util/test_util.rs              |   211 -
 rust/arrow/src/util/trusted_len.rs            |    82 -
 rust/arrow/src/zz_memory_check.rs             |    31 -
 rust/arrow/test/data/arrays.json              |     3 -
 rust/arrow/test/data/basic.json               |    12 -
 rust/arrow/test/data/basic_nulls.json         |    12 -
 rust/arrow/test/data/integration.json         |   808 -
 .../test/data/list_string_dict_nested.json    |     3 -
 .../data/list_string_dict_nested_nulls.json   |     3 -
 rust/arrow/test/data/mixed_arrays.json        |     4 -
 rust/arrow/test/data/mixed_arrays.json.gz     |   Bin 141 -> 0 bytes
 rust/arrow/test/data/nested_structs.json      |     4 -
 rust/arrow/test/data/null_test.csv            |     6 -
 rust/arrow/test/data/uk_cities.csv            |    37 -
 .../test/data/uk_cities_with_headers.csv      |    38 -
 rust/arrow/test/data/various_types.csv        |     6 -
 .../arrow/test/data/various_types_invalid.csv |     6 -
 rust/ballista/.dockerignore                   |    18 -
 rust/ballista/README.md                       |    64 -
 rust/ballista/dev/build-rust-base.sh          |    21 -
 rust/ballista/dev/build-rust.sh               |    24 -
 rust/ballista/dev/integration-tests.sh        |    28 -
 rust/ballista/docker/README.md                |    29 -
 rust/ballista/docker/rust-base.dockerfile     |    99 -
 rust/ballista/docker/rust.dockerfile          |    71 -
 rust/ballista/docs/README.md                  |    37 -
 rust/ballista/docs/architecture.md            |    75 -
 rust/ballista/docs/dev-env-rust.md            |    38 -
 rust/ballista/docs/images/query-execution.png |   Bin 11378 -> 0 bytes
 rust/ballista/docs/integration-testing.md     |    32 -
 rust/ballista/docs/release-process.md         |    68 -
 rust/ballista/docs/rust-docker.md             |    66 -
 rust/ballista/docs/user-guide/.gitignore      |     2 -
 rust/ballista/docs/user-guide/README.md       |    36 -
 rust/ballista/docs/user-guide/book.toml       |    23 -
 rust/ballista/docs/user-guide/src/SUMMARY.md  |    30 -
 .../docs/user-guide/src/client-rust.md        |    22 -
 rust/ballista/docs/user-guide/src/clients.md  |    22 -
 .../docs/user-guide/src/configuration.md      |    32 -
 .../docs/user-guide/src/deployment.md         |    26 -
 .../docs/user-guide/src/docker-compose.md     |    55 -
 rust/ballista/docs/user-guide/src/faq.md      |    31 -
 .../src/img/ballista-architecture.png         |   Bin 21225 -> 0 bytes
 .../docs/user-guide/src/introduction.md       |    52 -
 .../docs/user-guide/src/kubernetes.md         |   216 -
 .../docs/user-guide/src/standalone.md         |    92 -
 rust/ballista/rust/.dockerignore              |    23 -
 rust/ballista/rust/.gitignore                 |     2 -
 rust/ballista/rust/Cargo.toml                 |    30 -
 .../rust/benchmarks/tpch/.dockerignore        |    25 -
 rust/ballista/rust/benchmarks/tpch/.gitignore |     1 -
 rust/ballista/rust/benchmarks/tpch/Cargo.toml |    36 -
 rust/ballista/rust/benchmarks/tpch/README.md  |   104 -
 .../rust/benchmarks/tpch/docker-compose.yaml  |    62 -
 .../rust/benchmarks/tpch/entrypoint.sh        |    22 -
 .../rust/benchmarks/tpch/queries/q1.sql       |    21 -
 .../rust/benchmarks/tpch/queries/q10.sql      |    31 -
 .../rust/benchmarks/tpch/queries/q11.sql      |    27 -
 .../rust/benchmarks/tpch/queries/q12.sql      |    30 -
 .../rust/benchmarks/tpch/queries/q13.sql      |    20 -
 .../rust/benchmarks/tpch/queries/q14.sql      |    13 -
 .../rust/benchmarks/tpch/queries/q16.sql      |    30 -
 .../rust/benchmarks/tpch/queries/q17.sql      |    17 -
 .../rust/benchmarks/tpch/queries/q18.sql      |    32 -
 .../rust/benchmarks/tpch/queries/q19.sql      |    35 -
 .../rust/benchmarks/tpch/queries/q2.sql       |    43 -
 .../rust/benchmarks/tpch/queries/q20.sql      |    37 -
 .../rust/benchmarks/tpch/queries/q21.sql      |    39 -
 .../rust/benchmarks/tpch/queries/q22.sql      |    37 -
 .../rust/benchmarks/tpch/queries/q3.sql       |    22 -
 .../rust/benchmarks/tpch/queries/q4.sql       |    21 -
 .../rust/benchmarks/tpch/queries/q5.sql       |    24 -
 .../rust/benchmarks/tpch/queries/q6.sql       |     9 -
 .../rust/benchmarks/tpch/queries/q7.sql       |    39 -
 .../rust/benchmarks/tpch/queries/q8.sql       |    37 -
 .../rust/benchmarks/tpch/queries/q9.sql       |    32 -
 rust/ballista/rust/benchmarks/tpch/run.sh     |    25 -
 .../ballista/rust/benchmarks/tpch/src/main.rs |   360 -
 .../ballista/rust/benchmarks/tpch/tpch-gen.sh |    33 -
 .../rust/benchmarks/tpch/tpchgen.dockerfile   |    32 -
 rust/ballista/rust/client/Cargo.toml          |    35 -
 rust/ballista/rust/client/README.md           |    22 -
 .../rust/client/src/columnar_batch.rs         |   167 -
 rust/ballista/rust/client/src/context.rs      |   400 -
 rust/ballista/rust/client/src/lib.rs          |    20 -
 rust/ballista/rust/client/src/prelude.rs      |    23 -
 rust/ballista/rust/core/Cargo.toml            |    50 -
 rust/ballista/rust/core/README.md             |    21 -
 rust/ballista/rust/core/build.rs              |    26 -
 rust/ballista/rust/core/proto/ballista.proto  |   824 -
 rust/ballista/rust/core/src/client.rs         |   224 -
 rust/ballista/rust/core/src/datasource.rs     |    72 -
 rust/ballista/rust/core/src/error.rs          |   172 -
 .../rust/core/src/execution_plans/mod.rs      |    27 -
 .../core/src/execution_plans/query_stage.rs   |    92 -
 .../src/execution_plans/shuffle_reader.rs     |   106 -
 .../src/execution_plans/unresolved_shuffle.rs |   101 -
 rust/ballista/rust/core/src/lib.rs            |    34 -
 rust/ballista/rust/core/src/memory_stream.rs  |    93 -
 .../core/src/serde/logical_plan/from_proto.rs |  1200 --
 .../rust/core/src/serde/logical_plan/mod.rs   |   929 --
 .../core/src/serde/logical_plan/to_proto.rs   |  1233 --
 rust/ballista/rust/core/src/serde/mod.rs      |    69 -
 .../src/serde/physical_plan/from_proto.rs     |   398 -
 .../rust/core/src/serde/physical_plan/mod.rs  |   178 -
 .../core/src/serde/physical_plan/to_proto.rs  |   556 -
 .../core/src/serde/scheduler/from_proto.rs    |   124 -
 .../rust/core/src/serde/scheduler/mod.rs      |   262 -
 .../rust/core/src/serde/scheduler/to_proto.rs |    90 -
 rust/ballista/rust/core/src/utils.rs          |   327 -
 rust/ballista/rust/executor/Cargo.toml        |    59 -
 rust/ballista/rust/executor/README.md         |    31 -
 rust/ballista/rust/executor/build.rs          |    24 -
 .../examples/example_executor_config.toml     |    22 -
 .../rust/executor/executor_config_spec.toml   |    79 -
 rust/ballista/rust/executor/src/collect.rs    |   127 -
 .../rust/executor/src/execution_loop.rs       |   172 -
 .../rust/executor/src/flight_service.rs       |   374 -
 rust/ballista/rust/executor/src/lib.rs        |    52 -
 rust/ballista/rust/executor/src/main.rs       |   176 -
 rust/ballista/rust/scheduler/Cargo.toml       |    66 -
 rust/ballista/rust/scheduler/README.md        |    51 -
 rust/ballista/rust/scheduler/build.rs         |    24 -
 .../rust/scheduler/scheduler_config_spec.toml |    60 -
 .../rust/scheduler/src/api/handlers.rs        |    55 -
 rust/ballista/rust/scheduler/src/api/mod.rs   |    87 -
 rust/ballista/rust/scheduler/src/lib.rs       |   490 -
 rust/ballista/rust/scheduler/src/main.rs      |   156 -
 rust/ballista/rust/scheduler/src/planner.rs   |   494 -
 .../ballista/rust/scheduler/src/state/etcd.rs |   205 -
 rust/ballista/rust/scheduler/src/state/mod.rs |   880 --
 .../rust/scheduler/src/state/standalone.rs    |   228 -
 .../ballista/rust/scheduler/src/test_utils.rs |   148 -
 .../scheduler/testdata/customer/customer.tbl  |    10 -
 .../testdata/lineitem/partition0.tbl          |    10 -
 .../testdata/lineitem/partition1.tbl          |    10 -
 .../rust/scheduler/testdata/nation/nation.tbl |    10 -
 .../rust/scheduler/testdata/orders/orders.tbl |    10 -
 .../rust/scheduler/testdata/part/part.tbl     |    10 -
 .../scheduler/testdata/partsupp/partsupp.tbl  |    10 -
 .../rust/scheduler/testdata/region/region.tbl |     5 -
 .../scheduler/testdata/supplier/supplier.tbl  |    10 -
 rust/ballista/ui/scheduler/.gitignore         |    23 -
 rust/ballista/ui/scheduler/README.md          |    45 -
 rust/ballista/ui/scheduler/index.d.ts         |    18 -
 rust/ballista/ui/scheduler/package.json       |    58 -
 rust/ballista/ui/scheduler/public/favicon.ico |   Bin 3870 -> 0 bytes
 rust/ballista/ui/scheduler/public/index.html  |    62 -
 rust/ballista/ui/scheduler/public/logo192.png |   Bin 5347 -> 0 bytes
 rust/ballista/ui/scheduler/public/logo512.png |   Bin 9664 -> 0 bytes
 .../ui/scheduler/public/manifest.json         |    25 -
 rust/ballista/ui/scheduler/public/robots.txt  |    20 -
 .../ui/scheduler/react-table-config.d.ts      |   137 -
 rust/ballista/ui/scheduler/src/App.css        |    18 -
 rust/ballista/ui/scheduler/src/App.test.tsx   |    26 -
 rust/ballista/ui/scheduler/src/App.tsx        |    97 -
 .../ui/scheduler/src/components/DataTable.tsx |   131 -
 .../ui/scheduler/src/components/Empty.tsx     |    36 -
 .../ui/scheduler/src/components/Footer.tsx    |    28 -
 .../ui/scheduler/src/components/Header.tsx    |    82 -
 .../ui/scheduler/src/components/NodesList.tsx |    71 -
 .../scheduler/src/components/QueriesList.tsx  |   115 -
 .../ui/scheduler/src/components/Summary.tsx   |    89 -
 .../ui/scheduler/src/components/logo.svg      |    25 -
 rust/ballista/ui/scheduler/src/index.css      |    32 -
 rust/ballista/ui/scheduler/src/index.tsx      |    38 -
 .../ui/scheduler/src/react-app-env.d.ts       |    18 -
 .../ui/scheduler/src/reportWebVitals.ts       |    32 -
 rust/ballista/ui/scheduler/src/setupTests.ts  |    22 -
 rust/ballista/ui/scheduler/tsconfig.json      |    28 -
 rust/ballista/ui/scheduler/yarn.lock          | 12431 ----------------
 rust/benchmarks/Cargo.toml                    |    42 -
 rust/benchmarks/README.md                     |   120 -
 rust/benchmarks/src/bin/nyctaxi.rs            |   151 -
 rust/benchmarks/src/bin/tpch.rs               |  1692 ---
 rust/datafusion-examples/Cargo.toml           |    39 -
 rust/datafusion-examples/examples/README.md   |    28 -
 rust/datafusion-examples/examples/csv_sql.rs  |    52 -
 .../datafusion-examples/examples/dataframe.rs |    47 -
 .../examples/dataframe_in_memory.rs           |    67 -
 .../examples/flight_client.rs                 |    79 -
 .../examples/flight_server.rs                 |   213 -
 .../examples/parquet_sql.rs                   |    50 -
 .../examples/simple_udaf.rs                   |   170 -
 .../examples/simple_udf.rs                    |   151 -
 rust/datafusion/Cargo.toml                    |    99 -
 rust/datafusion/DEVELOPERS.md                 |    92 -
 rust/datafusion/Dockerfile                    |    25 -
 rust/datafusion/README.md                     |   356 -
 .../datafusion/benches/aggregate_query_sql.rs |   248 -
 rust/datafusion/benches/filter_query_sql.rs   |    91 -
 rust/datafusion/benches/math_query_sql.rs     |   111 -
 rust/datafusion/benches/scalar.rs             |    30 -
 .../benches/sort_limit_query_sql.rs           |   148 -
 rust/datafusion/docs/cli.md                   |    95 -
 .../docs/images/DataFusion-Logo-Dark.png      |   Bin 20134 -> 0 bytes
 .../docs/images/DataFusion-Logo-Dark.svg      |     1 -
 .../docs/images/DataFusion-Logo-Light.png     |   Bin 19102 -> 0 bytes
 .../docs/images/DataFusion-Logo-Light.svg     |     1 -
 rust/datafusion/src/bin/main.rs               |    25 -
 rust/datafusion/src/bin/repl.rs               |   140 -
 rust/datafusion/src/catalog/catalog.rs        |   139 -
 .../src/catalog/information_schema.rs         |   492 -
 rust/datafusion/src/catalog/mod.rs            |   146 -
 rust/datafusion/src/catalog/schema.rs         |   104 -
 rust/datafusion/src/dataframe.rs              |   286 -
 rust/datafusion/src/datasource/csv.rs         |   144 -
 rust/datafusion/src/datasource/datasource.rs  |   103 -
 rust/datafusion/src/datasource/empty.rs       |    80 -
 rust/datafusion/src/datasource/memory.rs      |   472 -
 rust/datafusion/src/datasource/mod.rs         |    28 -
 rust/datafusion/src/datasource/parquet.rs     |   373 -
 rust/datafusion/src/error.rs                  |   120 -
 rust/datafusion/src/execution/context.rs      |  3123 ----
 .../src/execution/dataframe_impl.rs           |   374 -
 rust/datafusion/src/execution/mod.rs          |    21 -
 rust/datafusion/src/lib.rs                    |   211 -
 rust/datafusion/src/logical_plan/builder.rs   |   595 -
 rust/datafusion/src/logical_plan/dfschema.rs  |   521 -
 rust/datafusion/src/logical_plan/display.rs   |   270 -
 rust/datafusion/src/logical_plan/expr.rs      |  1505 --
 rust/datafusion/src/logical_plan/extension.rs |    79 -
 rust/datafusion/src/logical_plan/mod.rs       |    50 -
 rust/datafusion/src/logical_plan/operators.rs |   135 -
 rust/datafusion/src/logical_plan/plan.rs      |  1095 --
 rust/datafusion/src/logical_plan/registry.rs  |    34 -
 .../src/optimizer/constant_folding.rs         |   591 -
 .../src/optimizer/filter_push_down.rs         |  1021 --
 .../src/optimizer/hash_build_probe_order.rs   |   257 -
 .../src/optimizer/limit_push_down.rs          |   252 -
 rust/datafusion/src/optimizer/mod.rs          |    27 -
 rust/datafusion/src/optimizer/optimizer.rs    |    32 -
 .../src/optimizer/projection_push_down.rs     |   542 -
 rust/datafusion/src/optimizer/utils.rs        |   489 -
 .../physical_optimizer/coalesce_batches.rs    |    88 -
 .../src/physical_optimizer/merge_exec.rs      |    74 -
 rust/datafusion/src/physical_optimizer/mod.rs |    24 -
 .../src/physical_optimizer/optimizer.rs       |    39 -
 .../src/physical_optimizer/repartition.rs     |   186 -
 .../src/physical_plan/aggregates.rs           |   258 -
 .../src/physical_plan/array_expressions.rs    |   127 -
 .../src/physical_plan/coalesce_batches.rs     |   316 -
 rust/datafusion/src/physical_plan/common.rs   |   104 -
 .../src/physical_plan/crypto_expressions.rs   |   198 -
 rust/datafusion/src/physical_plan/csv.rs      |   401 -
 .../src/physical_plan/datetime_expressions.rs |   559 -
 .../src/physical_plan/distinct_expressions.rs |   557 -
 rust/datafusion/src/physical_plan/empty.rs    |   186 -
 rust/datafusion/src/physical_plan/explain.rs  |   125 -
 .../src/physical_plan/expressions/average.rs  |   293 -
 .../src/physical_plan/expressions/binary.rs   |  1101 --
 .../src/physical_plan/expressions/case.rs     |   597 -
 .../src/physical_plan/expressions/cast.rs     |   301 -
 .../src/physical_plan/expressions/coercion.rs |   208 -
 .../src/physical_plan/expressions/column.rs   |    86 -
 .../src/physical_plan/expressions/count.rs    |   235 -
 .../src/physical_plan/expressions/in_list.rs  |   458 -
 .../physical_plan/expressions/is_not_null.rs  |   119 -
 .../src/physical_plan/expressions/is_null.rs  |   119 -
 .../src/physical_plan/expressions/literal.rs  |   108 -
 .../src/physical_plan/expressions/min_max.rs  |   655 -
 .../src/physical_plan/expressions/mod.rs      |   135 -
 .../src/physical_plan/expressions/negative.rs |   133 -
 .../src/physical_plan/expressions/not.rs      |   158 -
 .../src/physical_plan/expressions/nullif.rs   |   188 -
 .../src/physical_plan/expressions/sum.rs      |   373 -
 .../src/physical_plan/expressions/try_cast.rs |   247 -
 rust/datafusion/src/physical_plan/filter.rs   |   240 -
 .../datafusion/src/physical_plan/functions.rs |  3767 -----
 .../src/physical_plan/group_scalar.rs         |   212 -
 .../src/physical_plan/hash_aggregate.rs       |  1395 --
 .../datafusion/src/physical_plan/hash_join.rs |  1265 --
 .../src/physical_plan/hash_utils.rs           |   201 -
 rust/datafusion/src/physical_plan/limit.rs    |   338 -
 .../src/physical_plan/math_expressions.rs     |   118 -
 rust/datafusion/src/physical_plan/memory.rs   |   161 -
 rust/datafusion/src/physical_plan/merge.rs    |   225 -
 rust/datafusion/src/physical_plan/mod.rs      |   369 -
 rust/datafusion/src/physical_plan/parquet.rs  |  1535 --
 rust/datafusion/src/physical_plan/planner.rs  |  1106 --
 .../src/physical_plan/projection.rs           |   232 -
 .../src/physical_plan/regex_expressions.rs    |   172 -
 .../src/physical_plan/repartition.rs          |   461 -
 rust/datafusion/src/physical_plan/sort.rs     |   478 -
 .../src/physical_plan/string_expressions.rs   |   595 -
 .../src/physical_plan/type_coercion.rs        |   361 -
 rust/datafusion/src/physical_plan/udaf.rs     |   168 -
 rust/datafusion/src/physical_plan/udf.rs      |   112 -
 .../src/physical_plan/unicode_expressions.rs  |   532 -
 rust/datafusion/src/physical_plan/union.rs    |   143 -
 rust/datafusion/src/prelude.rs                |    37 -
 rust/datafusion/src/scalar.rs                 |   821 -
 rust/datafusion/src/sql/mod.rs                |    23 -
 rust/datafusion/src/sql/parser.rs             |   380 -
 rust/datafusion/src/sql/planner.rs            |  2723 ----
 rust/datafusion/src/sql/utils.rs              |   376 -
 rust/datafusion/src/test/exec.rs              |   102 -
 rust/datafusion/src/test/mod.rs               |   346 -
 rust/datafusion/src/test/user_defined.rs      |    76 -
 rust/datafusion/src/test/variable.rs          |    58 -
 rust/datafusion/src/variable/mod.rs           |    36 -
 rust/datafusion/tests/aggregate_simple.csv    |    16 -
 rust/datafusion/tests/custom_sources.rs       |   200 -
 rust/datafusion/tests/customer.csv            |     4 -
 rust/datafusion/tests/dataframe.rs            |    79 -
 rust/datafusion/tests/example.csv             |     2 -
 .../tests/provider_filter_pushdown.rs         |   177 -
 rust/datafusion/tests/sql.rs                  |  2707 ----
 rust/datafusion/tests/user_defined_plan.rs    |   512 -
 rust/integration-testing/Cargo.toml           |    45 -
 rust/integration-testing/README.md            |    30 -
 .../src/bin/arrow-file-to-stream.rs           |    43 -
 .../src/bin/arrow-json-integration-test.rs    |   180 -
 .../src/bin/arrow-stream-to-file.rs           |    34 -
 .../src/bin/flight-test-integration-client.rs |    62 -
 .../src/bin/flight-test-integration-server.rs |    55 -
 .../src/flight_client_scenarios.rs            |    20 -
 .../auth_basic_proto.rs                       |   109 -
 .../integration_test.rs                       |   271 -
 .../src/flight_client_scenarios/middleware.rs |    83 -
 .../src/flight_server_scenarios.rs            |    48 -
 .../auth_basic_proto.rs                       |   225 -
 .../integration_test.rs                       |   385 -
 .../src/flight_server_scenarios/middleware.rs |   150 -
 rust/integration-testing/src/lib.rs           |   601 -
 rust/parquet/Cargo.toml                       |    78 -
 rust/parquet/README.md                        |   126 -
 rust/parquet/benches/arrow_writer.rs          |   202 -
 rust/parquet/build.rs                         |    43 -
 rust/parquet/src/arrow/array_reader.rs        |  2530 ----
 rust/parquet/src/arrow/arrow_reader.rs        |   671 -
 rust/parquet/src/arrow/arrow_writer.rs        |  1402 --
 rust/parquet/src/arrow/converter.rs           |   454 -
 rust/parquet/src/arrow/levels.rs              |  1411 --
 rust/parquet/src/arrow/mod.rs                 |    69 -
 rust/parquet/src/arrow/record_reader.rs       |   794 -
 rust/parquet/src/arrow/schema.rs              |  1945 ---
 rust/parquet/src/basic.rs                     |  1969 ---
 rust/parquet/src/bin/parquet-read.rs          |   128 -
 rust/parquet/src/bin/parquet-rowcount.rs      |    87 -
 rust/parquet/src/bin/parquet-schema.rs        |   104 -
 rust/parquet/src/column/mod.rs                |   124 -
 rust/parquet/src/column/page.rs               |   306 -
 rust/parquet/src/column/reader.rs             |  1356 --
 rust/parquet/src/column/writer.rs             |  1908 ---
 rust/parquet/src/compression.rs               |   393 -
 rust/parquet/src/data_type.rs                 |  1358 --
 rust/parquet/src/encodings/decoding.rs        |  1387 --
 rust/parquet/src/encodings/encoding.rs        |  1334 --
 rust/parquet/src/encodings/levels.rs          |   563 -
 rust/parquet/src/encodings/mod.rs             |    21 -
 rust/parquet/src/encodings/rle.rs             |   831 --
 rust/parquet/src/errors.rs                    |   146 -
 rust/parquet/src/file/footer.rs               |   263 -
 rust/parquet/src/file/metadata.rs             |   789 -
 rust/parquet/src/file/mod.rs                  |   110 -
 rust/parquet/src/file/properties.rs           |   679 -
 rust/parquet/src/file/reader.rs               |   206 -
 rust/parquet/src/file/serialized_reader.rs    |   771 -
 rust/parquet/src/file/statistics.rs           |   664 -
 rust/parquet/src/file/writer.rs               |  1164 --
 rust/parquet/src/lib.rs                       |    57 -
 rust/parquet/src/record/api.rs                |  1846 ---
 rust/parquet/src/record/mod.rs                |    28 -
 rust/parquet/src/record/reader.rs             |  1667 ---
 rust/parquet/src/record/record_writer.rs      |    26 -
 rust/parquet/src/record/triplet.rs            |   561 -
 rust/parquet/src/schema/mod.rs                |    67 -
 rust/parquet/src/schema/parser.rs             |  1241 --
 rust/parquet/src/schema/printer.rs            |   827 -
 rust/parquet/src/schema/types.rs              |  2078 ---
 rust/parquet/src/schema/visitor.rs            |   240 -
 rust/parquet/src/util/bit_packing.rs          |  3662 -----
 rust/parquet/src/util/bit_util.rs             |  1143 --
 rust/parquet/src/util/cursor.rs               |   260 -
 rust/parquet/src/util/hash_util.rs            |   172 -
 rust/parquet/src/util/io.rs                   |   329 -
 rust/parquet/src/util/memory.rs               |   532 -
 rust/parquet/src/util/mod.rs                  |    27 -
 .../parquet/src/util/test_common/file_util.rs |    73 -
 rust/parquet/src/util/test_common/mod.rs      |    33 -
 .../parquet/src/util/test_common/page_util.rs |   313 -
 rust/parquet/src/util/test_common/rand_gen.rs |   139 -
 rust/parquet/tests/custom_writer.rs           |   100 -
 rust/parquet_derive/Cargo.toml                |    42 -
 rust/parquet_derive/README.md                 |    98 -
 rust/parquet_derive/src/lib.rs                |   126 -
 rust/parquet_derive/src/parquet_field.rs      |   920 --
 rust/parquet_derive_test/Cargo.toml           |    32 -
 rust/parquet_derive_test/src/lib.rs           |   131 -
 rust/pre-commit.sh                            |    88 -
 rust/rustfmt.toml                             |    24 -
 570 files changed, 12 insertions(+), 194031 deletions(-)
 delete mode 100644 .github/workflows/rust.yml
 delete mode 100644 dev/archery/archery/lang/rust.py
 delete mode 100755 dev/release/post-07-rust.sh
 delete mode 100644 rust/.gitignore
 delete mode 100644 rust/Cargo.toml
 delete mode 100644 rust/README.md
 delete mode 100644 rust/arrow-flight/Cargo.toml
 delete mode 100644 rust/arrow-flight/README.md
 delete mode 100644 rust/arrow-flight/build.rs
 delete mode 100644 rust/arrow-flight/examples/server.rs
 delete mode 100644 rust/arrow-flight/src/arrow.flight.protocol.rs
 delete mode 100644 rust/arrow-flight/src/lib.rs
 delete mode 100644 rust/arrow-flight/src/utils.rs
 delete mode 100644 rust/arrow-pyarrow-integration-testing/.cargo/config
 delete mode 100644 rust/arrow-pyarrow-integration-testing/.gitignore
 delete mode 100644 rust/arrow-pyarrow-integration-testing/Cargo.toml
 delete mode 100644 rust/arrow-pyarrow-integration-testing/README.md
 delete mode 100644 rust/arrow-pyarrow-integration-testing/pyproject.toml
 delete mode 100644 rust/arrow-pyarrow-integration-testing/src/lib.rs
 delete mode 100644 rust/arrow-pyarrow-integration-testing/tests/test_sql.py
 delete mode 100644 rust/arrow/Cargo.toml
 delete mode 100644 rust/arrow/README.md
 delete mode 100644 rust/arrow/benches/aggregate_kernels.rs
 delete mode 100644 rust/arrow/benches/arithmetic_kernels.rs
 delete mode 100644 rust/arrow/benches/array_from_vec.rs
 delete mode 100644 rust/arrow/benches/array_slice.rs
 delete mode 100644 rust/arrow/benches/bit_length_kernel.rs
 delete mode 100644 rust/arrow/benches/boolean_kernels.rs
 delete mode 100644 rust/arrow/benches/buffer_bit_ops.rs
 delete mode 100644 rust/arrow/benches/buffer_create.rs
 delete mode 100644 rust/arrow/benches/builder.rs
 delete mode 100644 rust/arrow/benches/cast_kernels.rs
 delete mode 100644 rust/arrow/benches/comparison_kernels.rs
 delete mode 100644 rust/arrow/benches/concatenate_kernel.rs
 delete mode 100644 rust/arrow/benches/csv_writer.rs
 delete mode 100644 rust/arrow/benches/equal.rs
 delete mode 100644 rust/arrow/benches/filter_kernels.rs
 delete mode 100644 rust/arrow/benches/json_reader.rs
 delete mode 100644 rust/arrow/benches/length_kernel.rs
 delete mode 100644 rust/arrow/benches/mutable_array.rs
 delete mode 100644 rust/arrow/benches/sort_kernel.rs
 delete mode 100644 rust/arrow/benches/take_kernels.rs
 delete mode 100644 rust/arrow/build.rs
 delete mode 100644 rust/arrow/examples/builders.rs
 delete mode 100644 rust/arrow/examples/dynamic_types.rs
 delete mode 100644 rust/arrow/examples/read_csv.rs
 delete mode 100644 rust/arrow/examples/read_csv_infer_schema.rs
 delete mode 100644 rust/arrow/examples/tensor_builder.rs
 delete mode 100644 rust/arrow/format-0ed34c83.patch
 delete mode 100755 rust/arrow/regen.sh
 delete mode 100644 rust/arrow/src/alloc/alignment.rs
 delete mode 100644 rust/arrow/src/alloc/mod.rs
 delete mode 100644 rust/arrow/src/alloc/types.rs
 delete mode 100644 rust/arrow/src/arch/avx512.rs
 delete mode 100644 rust/arrow/src/arch/mod.rs
 delete mode 100644 rust/arrow/src/array/array.rs
 delete mode 100644 rust/arrow/src/array/array_binary.rs
 delete mode 100644 rust/arrow/src/array/array_boolean.rs
 delete mode 100644 rust/arrow/src/array/array_dictionary.rs
 delete mode 100644 rust/arrow/src/array/array_list.rs
 delete mode 100644 rust/arrow/src/array/array_primitive.rs
 delete mode 100644 rust/arrow/src/array/array_string.rs
 delete mode 100644 rust/arrow/src/array/array_struct.rs
 delete mode 100644 rust/arrow/src/array/array_union.rs
 delete mode 100644 rust/arrow/src/array/builder.rs
 delete mode 100644 rust/arrow/src/array/cast.rs
 delete mode 100644 rust/arrow/src/array/data.rs
 delete mode 100644 rust/arrow/src/array/equal/boolean.rs
 delete mode 100644 rust/arrow/src/array/equal/decimal.rs
 delete mode 100644 rust/arrow/src/array/equal/dictionary.rs
 delete mode 100644 rust/arrow/src/array/equal/fixed_binary.rs
 delete mode 100644 rust/arrow/src/array/equal/fixed_list.rs
 delete mode 100644 rust/arrow/src/array/equal/list.rs
 delete mode 100644 rust/arrow/src/array/equal/mod.rs
 delete mode 100644 rust/arrow/src/array/equal/null.rs
 delete mode 100644 rust/arrow/src/array/equal/primitive.rs
 delete mode 100644 rust/arrow/src/array/equal/structure.rs
 delete mode 100644 rust/arrow/src/array/equal/utils.rs
 delete mode 100644 rust/arrow/src/array/equal/variable_size.rs
 delete mode 100644 rust/arrow/src/array/equal_json.rs
 delete mode 100644 rust/arrow/src/array/ffi.rs
 delete mode 100644 rust/arrow/src/array/iterator.rs
 delete mode 100644 rust/arrow/src/array/mod.rs
 delete mode 100644 rust/arrow/src/array/null.rs
 delete mode 100644 rust/arrow/src/array/ord.rs
 delete mode 100644 rust/arrow/src/array/raw_pointer.rs
 delete mode 100644 rust/arrow/src/array/transform/boolean.rs
 delete mode 100644 rust/arrow/src/array/transform/fixed_binary.rs
 delete mode 100644 rust/arrow/src/array/transform/list.rs
 delete mode 100644 rust/arrow/src/array/transform/mod.rs
 delete mode 100644 rust/arrow/src/array/transform/null.rs
 delete mode 100644 rust/arrow/src/array/transform/primitive.rs
 delete mode 100644 rust/arrow/src/array/transform/structure.rs
 delete mode 100644 rust/arrow/src/array/transform/utils.rs
 delete mode 100644 rust/arrow/src/array/transform/variable_size.rs
 delete mode 100644 rust/arrow/src/bitmap.rs
 delete mode 100644 rust/arrow/src/buffer/immutable.rs
 delete mode 100644 rust/arrow/src/buffer/mod.rs
 delete mode 100644 rust/arrow/src/buffer/mutable.rs
 delete mode 100644 rust/arrow/src/buffer/ops.rs
 delete mode 100644 rust/arrow/src/bytes.rs
 delete mode 100644 rust/arrow/src/compute/kernels/aggregate.rs
 delete mode 100644 rust/arrow/src/compute/kernels/arithmetic.rs
 delete mode 100644 rust/arrow/src/compute/kernels/arity.rs
 delete mode 100644 rust/arrow/src/compute/kernels/boolean.rs
 delete mode 100644 rust/arrow/src/compute/kernels/cast.rs
 delete mode 100644 rust/arrow/src/compute/kernels/cast_utils.rs
 delete mode 100644 rust/arrow/src/compute/kernels/comparison.rs
 delete mode 100644 rust/arrow/src/compute/kernels/concat.rs
 delete mode 100644 rust/arrow/src/compute/kernels/filter.rs
 delete mode 100644 rust/arrow/src/compute/kernels/length.rs
 delete mode 100644 rust/arrow/src/compute/kernels/limit.rs
 delete mode 100644 rust/arrow/src/compute/kernels/mod.rs
 delete mode 100644 rust/arrow/src/compute/kernels/regexp.rs
 delete mode 100644 rust/arrow/src/compute/kernels/sort.rs
 delete mode 100644 rust/arrow/src/compute/kernels/substring.rs
 delete mode 100644 rust/arrow/src/compute/kernels/take.rs
 delete mode 100644 rust/arrow/src/compute/kernels/temporal.rs
 delete mode 100644 rust/arrow/src/compute/kernels/window.rs
 delete mode 100644 rust/arrow/src/compute/kernels/zip.rs
 delete mode 100644 rust/arrow/src/compute/mod.rs
 delete mode 100644 rust/arrow/src/compute/util.rs
 delete mode 100644 rust/arrow/src/csv/mod.rs
 delete mode 100644 rust/arrow/src/csv/reader.rs
 delete mode 100644 rust/arrow/src/csv/writer.rs
 delete mode 100644 rust/arrow/src/datatypes/datatype.rs
 delete mode 100644 rust/arrow/src/datatypes/field.rs
 delete mode 100644 rust/arrow/src/datatypes/mod.rs
 delete mode 100644 rust/arrow/src/datatypes/native.rs
 delete mode 100644 rust/arrow/src/datatypes/numeric.rs
 delete mode 100644 rust/arrow/src/datatypes/schema.rs
 delete mode 100644 rust/arrow/src/datatypes/types.rs
 delete mode 100644 rust/arrow/src/error.rs
 delete mode 100644 rust/arrow/src/ffi.rs
 delete mode 100644 rust/arrow/src/ipc/convert.rs
 delete mode 100644 rust/arrow/src/ipc/gen/File.rs
 delete mode 100644 rust/arrow/src/ipc/gen/Message.rs
 delete mode 100644 rust/arrow/src/ipc/gen/Schema.rs
 delete mode 100644 rust/arrow/src/ipc/gen/SparseTensor.rs
 delete mode 100644 rust/arrow/src/ipc/gen/Tensor.rs
 delete mode 100644 rust/arrow/src/ipc/gen/mod.rs
 delete mode 100644 rust/arrow/src/ipc/mod.rs
 delete mode 100644 rust/arrow/src/ipc/reader.rs
 delete mode 100644 rust/arrow/src/ipc/writer.rs
 delete mode 100644 rust/arrow/src/json/mod.rs
 delete mode 100644 rust/arrow/src/json/reader.rs
 delete mode 100644 rust/arrow/src/json/writer.rs
 delete mode 100644 rust/arrow/src/lib.rs
 delete mode 100644 rust/arrow/src/record_batch.rs
 delete mode 100644 rust/arrow/src/temporal_conversions.rs
 delete mode 100644 rust/arrow/src/tensor.rs
 delete mode 100644 rust/arrow/src/util/bench_util.rs
 delete mode 100644 rust/arrow/src/util/bit_chunk_iterator.rs
 delete mode 100644 rust/arrow/src/util/bit_util.rs
 delete mode 100644 rust/arrow/src/util/data_gen.rs
 delete mode 100644 rust/arrow/src/util/display.rs
 delete mode 100644 rust/arrow/src/util/integration_util.rs
 delete mode 100644 rust/arrow/src/util/mod.rs
 delete mode 100644 rust/arrow/src/util/pretty.rs
 delete mode 100644 rust/arrow/src/util/serialization.rs
 delete mode 100644 rust/arrow/src/util/string_writer.rs
 delete mode 100644 rust/arrow/src/util/test_util.rs
 delete mode 100644 rust/arrow/src/util/trusted_len.rs
 delete mode 100644 rust/arrow/src/zz_memory_check.rs
 delete mode 100644 rust/arrow/test/data/arrays.json
 delete mode 100644 rust/arrow/test/data/basic.json
 delete mode 100644 rust/arrow/test/data/basic_nulls.json
 delete mode 100644 rust/arrow/test/data/integration.json
 delete mode 100644 rust/arrow/test/data/list_string_dict_nested.json
 delete mode 100644 rust/arrow/test/data/list_string_dict_nested_nulls.json
 delete mode 100644 rust/arrow/test/data/mixed_arrays.json
 delete mode 100644 rust/arrow/test/data/mixed_arrays.json.gz
 delete mode 100644 rust/arrow/test/data/nested_structs.json
 delete mode 100644 rust/arrow/test/data/null_test.csv
 delete mode 100644 rust/arrow/test/data/uk_cities.csv
 delete mode 100644 rust/arrow/test/data/uk_cities_with_headers.csv
 delete mode 100644 rust/arrow/test/data/various_types.csv
 delete mode 100644 rust/arrow/test/data/various_types_invalid.csv
 delete mode 100644 rust/ballista/.dockerignore
 delete mode 100644 rust/ballista/README.md
 delete mode 100755 rust/ballista/dev/build-rust-base.sh
 delete mode 100755 rust/ballista/dev/build-rust.sh
 delete mode 100755 rust/ballista/dev/integration-tests.sh
 delete mode 100644 rust/ballista/docker/README.md
 delete mode 100644 rust/ballista/docker/rust-base.dockerfile
 delete mode 100644 rust/ballista/docker/rust.dockerfile
 delete mode 100644 rust/ballista/docs/README.md
 delete mode 100644 rust/ballista/docs/architecture.md
 delete mode 100644 rust/ballista/docs/dev-env-rust.md
 delete mode 100644 rust/ballista/docs/images/query-execution.png
 delete mode 100644 rust/ballista/docs/integration-testing.md
 delete mode 100644 rust/ballista/docs/release-process.md
 delete mode 100644 rust/ballista/docs/rust-docker.md
 delete mode 100644 rust/ballista/docs/user-guide/.gitignore
 delete mode 100644 rust/ballista/docs/user-guide/README.md
 delete mode 100644 rust/ballista/docs/user-guide/book.toml
 delete mode 100644 rust/ballista/docs/user-guide/src/SUMMARY.md
 delete mode 100644 rust/ballista/docs/user-guide/src/client-rust.md
 delete mode 100644 rust/ballista/docs/user-guide/src/clients.md
 delete mode 100644 rust/ballista/docs/user-guide/src/configuration.md
 delete mode 100644 rust/ballista/docs/user-guide/src/deployment.md
 delete mode 100644 rust/ballista/docs/user-guide/src/docker-compose.md
 delete mode 100644 rust/ballista/docs/user-guide/src/faq.md
 delete mode 100644 rust/ballista/docs/user-guide/src/img/ballista-architecture.png
 delete mode 100644 rust/ballista/docs/user-guide/src/introduction.md
 delete mode 100644 rust/ballista/docs/user-guide/src/kubernetes.md
 delete mode 100644 rust/ballista/docs/user-guide/src/standalone.md
 delete mode 100644 rust/ballista/rust/.dockerignore
 delete mode 100644 rust/ballista/rust/.gitignore
 delete mode 100644 rust/ballista/rust/Cargo.toml
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/.dockerignore
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/.gitignore
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/Cargo.toml
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/README.md
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/docker-compose.yaml
 delete mode 100755 rust/ballista/rust/benchmarks/tpch/entrypoint.sh
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q1.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q10.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q11.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q12.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q13.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q14.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q16.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q17.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q18.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q19.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q2.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q20.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q21.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q22.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q3.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q4.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q5.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q6.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q7.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q8.sql
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/queries/q9.sql
 delete mode 100755 rust/ballista/rust/benchmarks/tpch/run.sh
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/src/main.rs
 delete mode 100755 rust/ballista/rust/benchmarks/tpch/tpch-gen.sh
 delete mode 100644 rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile
 delete mode 100644 rust/ballista/rust/client/Cargo.toml
 delete mode 100644 rust/ballista/rust/client/README.md
 delete mode 100644 rust/ballista/rust/client/src/columnar_batch.rs
 delete mode 100644 rust/ballista/rust/client/src/context.rs
 delete mode 100644 rust/ballista/rust/client/src/lib.rs
 delete mode 100644 rust/ballista/rust/client/src/prelude.rs
 delete mode 100644 rust/ballista/rust/core/Cargo.toml
 delete mode 100644 rust/ballista/rust/core/README.md
 delete mode 100644 rust/ballista/rust/core/build.rs
 delete mode 100644 rust/ballista/rust/core/proto/ballista.proto
 delete mode 100644 rust/ballista/rust/core/src/client.rs
 delete mode 100644 rust/ballista/rust/core/src/datasource.rs
 delete mode 100644 rust/ballista/rust/core/src/error.rs
 delete mode 100644 rust/ballista/rust/core/src/execution_plans/mod.rs
 delete mode 100644 rust/ballista/rust/core/src/execution_plans/query_stage.rs
 delete mode 100644 rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs
 delete mode 100644 rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs
 delete mode 100644 rust/ballista/rust/core/src/lib.rs
 delete mode 100644 rust/ballista/rust/core/src/memory_stream.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/logical_plan/mod.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/mod.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/physical_plan/mod.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/scheduler/from_proto.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/scheduler/mod.rs
 delete mode 100644 rust/ballista/rust/core/src/serde/scheduler/to_proto.rs
 delete mode 100644 rust/ballista/rust/core/src/utils.rs
 delete mode 100644 rust/ballista/rust/executor/Cargo.toml
 delete mode 100644 rust/ballista/rust/executor/README.md
 delete mode 100644 rust/ballista/rust/executor/build.rs
 delete mode 100644 rust/ballista/rust/executor/examples/example_executor_config.toml
 delete mode 100644 rust/ballista/rust/executor/executor_config_spec.toml
 delete mode 100644 rust/ballista/rust/executor/src/collect.rs
 delete mode 100644 rust/ballista/rust/executor/src/execution_loop.rs
 delete mode 100644 rust/ballista/rust/executor/src/flight_service.rs
 delete mode 100644 rust/ballista/rust/executor/src/lib.rs
 delete mode 100644 rust/ballista/rust/executor/src/main.rs
 delete mode 100644 rust/ballista/rust/scheduler/Cargo.toml
 delete mode 100644 rust/ballista/rust/scheduler/README.md
 delete mode 100644 rust/ballista/rust/scheduler/build.rs
 delete mode 100644 rust/ballista/rust/scheduler/scheduler_config_spec.toml
 delete mode 100644 rust/ballista/rust/scheduler/src/api/handlers.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/api/mod.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/lib.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/main.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/planner.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/state/etcd.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/state/mod.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/state/standalone.rs
 delete mode 100644 rust/ballista/rust/scheduler/src/test_utils.rs
 delete mode 100644 rust/ballista/rust/scheduler/testdata/customer/customer.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/nation/nation.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/orders/orders.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/part/part.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/region/region.tbl
 delete mode 100644 rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl
 delete mode 100644 rust/ballista/ui/scheduler/.gitignore
 delete mode 100644 rust/ballista/ui/scheduler/README.md
 delete mode 100644 rust/ballista/ui/scheduler/index.d.ts
 delete mode 100644 rust/ballista/ui/scheduler/package.json
 delete mode 100644 rust/ballista/ui/scheduler/public/favicon.ico
 delete mode 100644 rust/ballista/ui/scheduler/public/index.html
 delete mode 100644 rust/ballista/ui/scheduler/public/logo192.png
 delete mode 100644 rust/ballista/ui/scheduler/public/logo512.png
 delete mode 100644 rust/ballista/ui/scheduler/public/manifest.json
 delete mode 100644 rust/ballista/ui/scheduler/public/robots.txt
 delete mode 100644 rust/ballista/ui/scheduler/react-table-config.d.ts
 delete mode 100644 rust/ballista/ui/scheduler/src/App.css
 delete mode 100644 rust/ballista/ui/scheduler/src/App.test.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/App.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/DataTable.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/Empty.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/Footer.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/Header.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/NodesList.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/QueriesList.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/Summary.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/components/logo.svg
 delete mode 100644 rust/ballista/ui/scheduler/src/index.css
 delete mode 100644 rust/ballista/ui/scheduler/src/index.tsx
 delete mode 100644 rust/ballista/ui/scheduler/src/react-app-env.d.ts
 delete mode 100644 rust/ballista/ui/scheduler/src/reportWebVitals.ts
 delete mode 100644 rust/ballista/ui/scheduler/src/setupTests.ts
 delete mode 100644 rust/ballista/ui/scheduler/tsconfig.json
 delete mode 100644 rust/ballista/ui/scheduler/yarn.lock
 delete mode 100644 rust/benchmarks/Cargo.toml
 delete mode 100644 rust/benchmarks/README.md
 delete mode 100644 rust/benchmarks/src/bin/nyctaxi.rs
 delete mode 100644 rust/benchmarks/src/bin/tpch.rs
 delete mode 100644 rust/datafusion-examples/Cargo.toml
 delete mode 100644 rust/datafusion-examples/examples/README.md
 delete mode 100644 rust/datafusion-examples/examples/csv_sql.rs
 delete mode 100644 rust/datafusion-examples/examples/dataframe.rs
 delete mode 100644 rust/datafusion-examples/examples/dataframe_in_memory.rs
 delete mode 100644 rust/datafusion-examples/examples/flight_client.rs
 delete mode 100644 rust/datafusion-examples/examples/flight_server.rs
 delete mode 100644 rust/datafusion-examples/examples/parquet_sql.rs
 delete mode 100644 rust/datafusion-examples/examples/simple_udaf.rs
 delete mode 100644 rust/datafusion-examples/examples/simple_udf.rs
 delete mode 100644 rust/datafusion/Cargo.toml
 delete mode 100644 rust/datafusion/DEVELOPERS.md
 delete mode 100644 rust/datafusion/Dockerfile
 delete mode 100644 rust/datafusion/README.md
 delete mode 100644 rust/datafusion/benches/aggregate_query_sql.rs
 delete mode 100644 rust/datafusion/benches/filter_query_sql.rs
 delete mode 100644 rust/datafusion/benches/math_query_sql.rs
 delete mode 100644 rust/datafusion/benches/scalar.rs
 delete mode 100644 rust/datafusion/benches/sort_limit_query_sql.rs
 delete mode 100644 rust/datafusion/docs/cli.md
 delete mode 100644 rust/datafusion/docs/images/DataFusion-Logo-Dark.png
 delete mode 100644 rust/datafusion/docs/images/DataFusion-Logo-Dark.svg
 delete mode 100644 rust/datafusion/docs/images/DataFusion-Logo-Light.png
 delete mode 100644 rust/datafusion/docs/images/DataFusion-Logo-Light.svg
 delete mode 100644 rust/datafusion/src/bin/main.rs
 delete mode 100644 rust/datafusion/src/bin/repl.rs
 delete mode 100644 rust/datafusion/src/catalog/catalog.rs
 delete mode 100644 rust/datafusion/src/catalog/information_schema.rs
 delete mode 100644 rust/datafusion/src/catalog/mod.rs
 delete mode 100644 rust/datafusion/src/catalog/schema.rs
 delete mode 100644 rust/datafusion/src/dataframe.rs
 delete mode 100644 rust/datafusion/src/datasource/csv.rs
 delete mode 100644 rust/datafusion/src/datasource/datasource.rs
 delete mode 100644 rust/datafusion/src/datasource/empty.rs
 delete mode 100644 rust/datafusion/src/datasource/memory.rs
 delete mode 100644 rust/datafusion/src/datasource/mod.rs
 delete mode 100644 rust/datafusion/src/datasource/parquet.rs
 delete mode 100644 rust/datafusion/src/error.rs
 delete mode 100644 rust/datafusion/src/execution/context.rs
 delete mode 100644 rust/datafusion/src/execution/dataframe_impl.rs
 delete mode 100644 rust/datafusion/src/execution/mod.rs
 delete mode 100644 rust/datafusion/src/lib.rs
 delete mode 100644 rust/datafusion/src/logical_plan/builder.rs
 delete mode 100644 rust/datafusion/src/logical_plan/dfschema.rs
 delete mode 100644 rust/datafusion/src/logical_plan/display.rs
 delete mode 100644 rust/datafusion/src/logical_plan/expr.rs
 delete mode 100644 rust/datafusion/src/logical_plan/extension.rs
 delete mode 100644 rust/datafusion/src/logical_plan/mod.rs
 delete mode 100644 rust/datafusion/src/logical_plan/operators.rs
 delete mode 100644 rust/datafusion/src/logical_plan/plan.rs
 delete mode 100644 rust/datafusion/src/logical_plan/registry.rs
 delete mode 100644 rust/datafusion/src/optimizer/constant_folding.rs
 delete mode 100644 rust/datafusion/src/optimizer/filter_push_down.rs
 delete mode 100644 rust/datafusion/src/optimizer/hash_build_probe_order.rs
 delete mode 100644 rust/datafusion/src/optimizer/limit_push_down.rs
 delete mode 100644 rust/datafusion/src/optimizer/mod.rs
 delete mode 100644 rust/datafusion/src/optimizer/optimizer.rs
 delete mode 100644 rust/datafusion/src/optimizer/projection_push_down.rs
 delete mode 100644 rust/datafusion/src/optimizer/utils.rs
 delete mode 100644 rust/datafusion/src/physical_optimizer/coalesce_batches.rs
 delete mode 100644 rust/datafusion/src/physical_optimizer/merge_exec.rs
 delete mode 100644 rust/datafusion/src/physical_optimizer/mod.rs
 delete mode 100644 rust/datafusion/src/physical_optimizer/optimizer.rs
 delete mode 100644 rust/datafusion/src/physical_optimizer/repartition.rs
 delete mode 100644 rust/datafusion/src/physical_plan/aggregates.rs
 delete mode 100644 rust/datafusion/src/physical_plan/array_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/coalesce_batches.rs
 delete mode 100644 rust/datafusion/src/physical_plan/common.rs
 delete mode 100644 rust/datafusion/src/physical_plan/crypto_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/csv.rs
 delete mode 100644 rust/datafusion/src/physical_plan/datetime_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/distinct_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/empty.rs
 delete mode 100644 rust/datafusion/src/physical_plan/explain.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/average.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/binary.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/case.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/cast.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/coercion.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/column.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/count.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/in_list.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/is_not_null.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/is_null.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/literal.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/min_max.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/mod.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/negative.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/not.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/nullif.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/sum.rs
 delete mode 100644 rust/datafusion/src/physical_plan/expressions/try_cast.rs
 delete mode 100644 rust/datafusion/src/physical_plan/filter.rs
 delete mode 100644 rust/datafusion/src/physical_plan/functions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/group_scalar.rs
 delete mode 100644 rust/datafusion/src/physical_plan/hash_aggregate.rs
 delete mode 100644 rust/datafusion/src/physical_plan/hash_join.rs
 delete mode 100644 rust/datafusion/src/physical_plan/hash_utils.rs
 delete mode 100644 rust/datafusion/src/physical_plan/limit.rs
 delete mode 100644 rust/datafusion/src/physical_plan/math_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/memory.rs
 delete mode 100644 rust/datafusion/src/physical_plan/merge.rs
 delete mode 100644 rust/datafusion/src/physical_plan/mod.rs
 delete mode 100644 rust/datafusion/src/physical_plan/parquet.rs
 delete mode 100644 rust/datafusion/src/physical_plan/planner.rs
 delete mode 100644 rust/datafusion/src/physical_plan/projection.rs
 delete mode 100644 rust/datafusion/src/physical_plan/regex_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/repartition.rs
 delete mode 100644 rust/datafusion/src/physical_plan/sort.rs
 delete mode 100644 rust/datafusion/src/physical_plan/string_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/type_coercion.rs
 delete mode 100644 rust/datafusion/src/physical_plan/udaf.rs
 delete mode 100644 rust/datafusion/src/physical_plan/udf.rs
 delete mode 100644 rust/datafusion/src/physical_plan/unicode_expressions.rs
 delete mode 100644 rust/datafusion/src/physical_plan/union.rs
 delete mode 100644 rust/datafusion/src/prelude.rs
 delete mode 100644 rust/datafusion/src/scalar.rs
 delete mode 100644 rust/datafusion/src/sql/mod.rs
 delete mode 100644 rust/datafusion/src/sql/parser.rs
 delete mode 100644 rust/datafusion/src/sql/planner.rs
 delete mode 100644 rust/datafusion/src/sql/utils.rs
 delete mode 100644 rust/datafusion/src/test/exec.rs
 delete mode 100644 rust/datafusion/src/test/mod.rs
 delete mode 100644 rust/datafusion/src/test/user_defined.rs
 delete mode 100644 rust/datafusion/src/test/variable.rs
 delete mode 100644 rust/datafusion/src/variable/mod.rs
 delete mode 100644 rust/datafusion/tests/aggregate_simple.csv
 delete mode 100644 rust/datafusion/tests/custom_sources.rs
 delete mode 100644 rust/datafusion/tests/customer.csv
 delete mode 100644 rust/datafusion/tests/dataframe.rs
 delete mode 100644 rust/datafusion/tests/example.csv
 delete mode 100644 rust/datafusion/tests/provider_filter_pushdown.rs
 delete mode 100644 rust/datafusion/tests/sql.rs
 delete mode 100644 rust/datafusion/tests/user_defined_plan.rs
 delete mode 100644 rust/integration-testing/Cargo.toml
 delete mode 100644 rust/integration-testing/README.md
 delete mode 100644 rust/integration-testing/src/bin/arrow-file-to-stream.rs
 delete mode 100644 rust/integration-testing/src/bin/arrow-json-integration-test.rs
 delete mode 100644 rust/integration-testing/src/bin/arrow-stream-to-file.rs
 delete mode 100644 rust/integration-testing/src/bin/flight-test-integration-client.rs
 delete mode 100644 rust/integration-testing/src/bin/flight-test-integration-server.rs
 delete mode 100644 rust/integration-testing/src/flight_client_scenarios.rs
 delete mode 100644 rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
 delete mode 100644 rust/integration-testing/src/flight_client_scenarios/integration_test.rs
 delete mode 100644 rust/integration-testing/src/flight_client_scenarios/middleware.rs
 delete mode 100644 rust/integration-testing/src/flight_server_scenarios.rs
 delete mode 100644 rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
 delete mode 100644 rust/integration-testing/src/flight_server_scenarios/integration_test.rs
 delete mode 100644 rust/integration-testing/src/flight_server_scenarios/middleware.rs
 delete mode 100644 rust/integration-testing/src/lib.rs
 delete mode 100644 rust/parquet/Cargo.toml
 delete mode 100644 rust/parquet/README.md
 delete mode 100644 rust/parquet/benches/arrow_writer.rs
 delete mode 100644 rust/parquet/build.rs
 delete mode 100644 rust/parquet/src/arrow/array_reader.rs
 delete mode 100644 rust/parquet/src/arrow/arrow_reader.rs
 delete mode 100644 rust/parquet/src/arrow/arrow_writer.rs
 delete mode 100644 rust/parquet/src/arrow/converter.rs
 delete mode 100644 rust/parquet/src/arrow/levels.rs
 delete mode 100644 rust/parquet/src/arrow/mod.rs
 delete mode 100644 rust/parquet/src/arrow/record_reader.rs
 delete mode 100644 rust/parquet/src/arrow/schema.rs
 delete mode 100644 rust/parquet/src/basic.rs
 delete mode 100644 rust/parquet/src/bin/parquet-read.rs
 delete mode 100644 rust/parquet/src/bin/parquet-rowcount.rs
 delete mode 100644 rust/parquet/src/bin/parquet-schema.rs
 delete mode 100644 rust/parquet/src/column/mod.rs
 delete mode 100644 rust/parquet/src/column/page.rs
 delete mode 100644 rust/parquet/src/column/reader.rs
 delete mode 100644 rust/parquet/src/column/writer.rs
 delete mode 100644 rust/parquet/src/compression.rs
 delete mode 100644 rust/parquet/src/data_type.rs
 delete mode 100644 rust/parquet/src/encodings/decoding.rs
 delete mode 100644 rust/parquet/src/encodings/encoding.rs
 delete mode 100644 rust/parquet/src/encodings/levels.rs
 delete mode 100644 rust/parquet/src/encodings/mod.rs
 delete mode 100644 rust/parquet/src/encodings/rle.rs
 delete mode 100644 rust/parquet/src/errors.rs
 delete mode 100644 rust/parquet/src/file/footer.rs
 delete mode 100644 rust/parquet/src/file/metadata.rs
 delete mode 100644 rust/parquet/src/file/mod.rs
 delete mode 100644 rust/parquet/src/file/properties.rs
 delete mode 100644 rust/parquet/src/file/reader.rs
 delete mode 100644 rust/parquet/src/file/serialized_reader.rs
 delete mode 100644 rust/parquet/src/file/statistics.rs
 delete mode 100644 rust/parquet/src/file/writer.rs
 delete mode 100644 rust/parquet/src/lib.rs
 delete mode 100644 rust/parquet/src/record/api.rs
 delete mode 100644 rust/parquet/src/record/mod.rs
 delete mode 100644 rust/parquet/src/record/reader.rs
 delete mode 100644 rust/parquet/src/record/record_writer.rs
 delete mode 100644 rust/parquet/src/record/triplet.rs
 delete mode 100644 rust/parquet/src/schema/mod.rs
 delete mode 100644 rust/parquet/src/schema/parser.rs
 delete mode 100644 rust/parquet/src/schema/printer.rs
 delete mode 100644 rust/parquet/src/schema/types.rs
 delete mode 100644 rust/parquet/src/schema/visitor.rs
 delete mode 100644 rust/parquet/src/util/bit_packing.rs
 delete mode 100644 rust/parquet/src/util/bit_util.rs
 delete mode 100644 rust/parquet/src/util/cursor.rs
 delete mode 100644 rust/parquet/src/util/hash_util.rs
 delete mode 100644 rust/parquet/src/util/io.rs
 delete mode 100644 rust/parquet/src/util/memory.rs
 delete mode 100644 rust/parquet/src/util/mod.rs
 delete mode 100644 rust/parquet/src/util/test_common/file_util.rs
 delete mode 100644 rust/parquet/src/util/test_common/mod.rs
 delete mode 100644 rust/parquet/src/util/test_common/page_util.rs
 delete mode 100644 rust/parquet/src/util/test_common/rand_gen.rs
 delete mode 100644 rust/parquet/tests/custom_writer.rs
 delete mode 100644 rust/parquet_derive/Cargo.toml
 delete mode 100644 rust/parquet_derive/README.md
 delete mode 100644 rust/parquet_derive/src/lib.rs
 delete mode 100644 rust/parquet_derive/src/parquet_field.rs
 delete mode 100644 rust/parquet_derive_test/Cargo.toml
 delete mode 100644 rust/parquet_derive_test/src/lib.rs
 delete mode 100755 rust/pre-commit.sh
 delete mode 100644 rust/rustfmt.toml

diff --git a/.env b/.env
index 587430579f9..510e11d9568 100644
--- a/.env
+++ b/.env
@@ -47,7 +47,6 @@ FEDORA=33
 PYTHON=3.6
 LLVM=12
 CLANG_TOOLS=8
-RUST=nightly-2021-03-24
 GO=1.15
 NODE=14
 MAVEN=3.5.4
diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
index 3049ae706b0..da668bcdc3b 100644
--- a/.github/workflows/cancel.yml
+++ b/.github/workflows/cancel.yml
@@ -115,10 +115,3 @@ jobs:
           token: ${{ secrets.GITHUB_TOKEN }}
           workflowFileName: ruby.yml
           skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Rust runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: rust.yml
-          skipEventTypes: '["push", "schedule"]'
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index d1b01848004..fdbb53c29da 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -29,7 +29,7 @@ env:
 jobs:
 
   lint:
-    name: Lint C++, Python, R, Rust, Docker, RAT
+    name: Lint C++, Python, R, Docker, RAT
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     steps:
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 098e1bad7f4..235b5918902 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -45,19 +45,9 @@ lang-R:
 lang-ruby:
   - ruby/**/*
 
-lang-rust:
-  - rust/**/*
-
-datafusion:
-  - rust/datafusion/**/*
-
-ballista:
-  - rust/ballista/**/*
-
 flight:
   - cpp/src/arrow/flight/**/*
   - r/R/flight.*
-  - rust/arrow-flight/**/*
   - python/pyarrow/*flight.*
 
 gandiva:
@@ -71,4 +61,3 @@ parquet:
   - cpp/src/parquet/**/*
   - r/R/parquet.*
   - ruby/red-parquet/**/*
-  - rust/parquet*/**/*
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index a4f97be3b9c..fb41f36caf8 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -62,6 +62,11 @@ jobs:
           fetch-depth: 0
       - name: Fetch Submodules and Tags
         run: ci/scripts/util_checkout.sh
+      - name: Checkout Arrow Rust
+        uses: actions/checkout@v2
+        with:
+          repository: apache/arrow-rs
+          path: rust
       - name: Free Up Disk Space
         run: ci/scripts/util_cleanup.sh
       - name: Cache Docker Volumes
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
deleted file mode 100644
index 9c0a4ea72f1..00000000000
--- a/.github/workflows/rust.yml
+++ /dev/null
@@ -1,480 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Rust
-
-on:
-  push:
-    paths:
-      - '.github/workflows/rust.yml'
-      - 'rust/**'
-      - 'format/Flight.proto'
-  pull_request:
-    paths:
-      - '.github/workflows/rust.yml'
-      - 'rust/**'
-      - 'format/Flight.proto'
-
-jobs:
-
-  # build the library, a compilation step used by multiple steps below
-  linux-build-lib:
-    name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-    steps:
-      - uses: actions/checkout@v2
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          # these represent dependencies downloaded by cargo
-          # and thus do not depend on the OS, arch nor rust version.
-          path: /github/home/.cargo
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          # these represent compiled steps of both dependencies and arrow
-          # and thus are specific for a particular OS, arch and rust version.
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}-
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Build Workspace
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust
-          cargo build
-      # Ballista is currently not part of the main workspace so requires a separate build step
-      - name: Build Ballista
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/ballista/rust
-          # snmalloc requires cmake so build without default features
-          cargo build --no-default-features
-
-  # test the crate
-  linux-test:
-    name: Test Workspace on AMD64 Rust ${{ matrix.rust }}
-    needs: [linux-build-lib]
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Run tests
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust
-          # run tests on all workspace members with default feature list
-          cargo test
-          # test datafusion examples
-          cd datafusion-examples
-          cargo test --no-default-features
-          cargo run --example csv_sql
-          cargo run --example parquet_sql
-          cd ..
-          cd arrow
-          # re-run tests on arrow workspace with additional features
-          cargo test --features=prettyprint
-          cargo run --example builders
-          cargo run --example dynamic_types
-          cargo run --example read_csv
-          cargo run --example read_csv_infer_schema
-      # Ballista is currently not part of the main workspace so requires a separate test step
-      - name: Run Ballista tests
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/ballista/rust
-          # snmalloc requires cmake so build without default features
-          cargo test --no-default-features
-
-  # test the --features "simd" of the arrow crate. This requires nightly.
-  linux-test-simd:
-    name: Test SIMD on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2021-03-24]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Run tests
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/arrow
-          cargo test --features "simd"
-
-  windows-and-macos:
-    name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        os: [windows-latest, macos-latest]
-        rust: [stable]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      # TODO: this won't cache anything, which is expensive. Setup this action
-      # with a OS-dependent path.
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Run tests
-        shell: bash
-        run: |
-          export ARROW_TEST_DATA=$(pwd)/testing/data
-          export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
-          # do not produce debug symbols to keep memory usage down
-          export RUSTFLAGS="-C debuginfo=0"
-          cd rust
-          cargo test
-
-  clippy:
-    name: Clippy
-    needs: [linux-build-lib]
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy
-      - name: Run clippy
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust
-          cargo clippy --all-targets --workspace -- -D warnings -A clippy::redundant_field_names
-
-  miri-checks:
-    name: MIRI
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2021-03-24]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - uses: actions/cache@v2
-        with:
-          path: |
-            ~/.cargo/registry
-            ~/.cargo/git
-            target
-          key: ${{ runner.os }}-cargo-miri-${{ hashFiles('**/Cargo.lock') }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy miri
-      - name: Run Miri Checks
-        env:
-          RUST_BACKTRACE: full
-          RUST_LOG: 'trace'
-        run: |
-          export MIRIFLAGS="-Zmiri-disable-isolation"
-          cd rust
-          cargo miri setup
-          cargo clean
-          # Ignore MIRI errors until we can get a clean run
-          cargo miri test || true
-
-  coverage:
-    name: Coverage
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/.cargo
-          # this key is not equal because the user is different than on a container (runner vs github)
-          key: cargo-coverage-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/target
-          # this key is not equal because coverage uses different compilation flags.
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}-
-      - name: Run coverage
-        run: |
-          export CARGO_HOME="/home/runner/.cargo"
-          export CARGO_TARGET_DIR="/home/runner/target"
-
-          export ARROW_TEST_DATA=$(pwd)/testing/data
-          export PARQUET_TEST_DATA=$(pwd)/cpp/submodules/parquet-testing/data
-
-          # 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
-          # see https://github.com/xd009642/tarpaulin/issues/618
-          cargo install --version 0.16.0 cargo-tarpaulin
-          cd rust
-          cargo tarpaulin --out Xml
-      - name: Report coverage
-        continue-on-error: true
-        run: bash <(curl -s https://codecov.io/bash)
-
-  # test FFI against the C-Data interface exposed by pyarrow
-  pyarrow-integration-test:
-    name: Test Pyarrow C Data Interface
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        rust: [stable]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/.cargo
-          key: cargo-maturin-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /home/runner/target
-          # this key is not equal because maturin uses different compilation flags.
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}-
-      - uses: actions/setup-python@v2
-        with:
-          python-version: '3.7'
-      - name: Install Python dependencies
-        run: python -m pip install --upgrade pip setuptools wheel
-      - name: Run tests
-        run: |
-          export CARGO_HOME="/home/runner/.cargo"
-          export CARGO_TARGET_DIR="/home/runner/target"
-
-          cd rust/arrow-pyarrow-integration-testing
-
-          python -m venv venv
-          source venv/bin/activate
-
-          pip install maturin==0.8.2 toml==0.10.1 pyarrow==1.0.0
-          maturin develop
-          python -m unittest discover tests
-
-  # test the arrow crate builds against wasm32 in stable rust
-  wasm32-build:
-    name: Build wasm32 on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2021-03-24]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-wasm32-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup override set ${{ matrix.rust }}
-          rustup component add rustfmt
-          rustup target add wasm32-unknown-unknown
-      - name: Build arrow crate
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/arrow
-          cargo build --target wasm32-unknown-unknown
-
-  # test the projects can build without default features
-  default-build:
-    name: Check No Defaults on AMD64 Rust ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    timeout-minutes: 40
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable full debug symbol generation to speed up CI build and keep memory down
-        # "1" means line tables only, which is useful for panic tracebacks.
-        RUSTFLAGS: "-C debuginfo=1"
-        ARROW_TEST_DATA: /__w/arrow/arrow/testing/data
-        PARQUET_TEST_DATA: /__w/arrow/arrow/cpp/submodules/parquet-testing/data
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-wasm32-cache-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup override set ${{ matrix.rust }}
-          rustup component add rustfmt
-      - name: Build arrow crate
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd rust/arrow
-          cargo check --all-targets --no-default-features
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9d2d2d81d68..8b5a24476d8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,14 +29,6 @@ repos:
         entry: bash -c "git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar && ./dev/release/run-rat.sh arrow-src.tar"
         always_run: true
         pass_filenames: false
-      - id: rustfmt
-        name: Rust Format
-        language: system
-        entry: bash -c "cd rust && cargo +stable fmt --all -- --check"
-        files: ^rust/.*\.rs$
-        types:
-          - file
-          - rust
       - id: cmake-format
         name: CMake Format
         language: python
diff --git a/README.md b/README.md
index e9e13537cc9..efe63e1b269 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ Major components of the project include:
  - [Python libraries](https://github.com/apache/arrow/tree/master/python)
  - [R libraries](https://github.com/apache/arrow/tree/master/r)
  - [Ruby libraries](https://github.com/apache/arrow/tree/master/ruby)
- - [Rust libraries](https://github.com/apache/arrow/tree/master/rust)
+ - [Rust libraries](https://github.com/apache/arrow-rs)
 
 Arrow is an [Apache Software Foundation](https://www.apache.org) project. Learn more at
 [arrow.apache.org](https://arrow.apache.org).
diff --git a/ci/detect-changes.py b/ci/detect-changes.py
index c32f6e040dd..14e71ed48ce 100644
--- a/ci/detect-changes.py
+++ b/ci/detect-changes.py
@@ -140,7 +140,7 @@ def list_github_actions_affected_files():
 
 
 LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python',
-                   'r', 'ruby', 'rust', 'csharp']
+                   'r', 'ruby', 'csharp']
 
 ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev']
 
@@ -161,7 +161,7 @@ def list_github_actions_affected_files():
 }
 
 COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js',
-              'rust', 'csharp', 'go', 'docs', 'python', 'dev'}
+              'csharp', 'go', 'docs', 'python', 'dev'}
 
 
 def get_affected_topics(affected_files):
@@ -298,7 +298,6 @@ def test_get_affected_topics():
         'python': True,
         'r': True,
         'ruby': True,
-        'rust': False,
         'csharp': False,
         'integration': True,
         'dev': False
@@ -315,7 +314,6 @@ def test_get_affected_topics():
         'python': True,
         'r': True,
         'ruby': True,
-        'rust': True,
         'csharp': True,
         'integration': True,
         'dev': False
@@ -332,7 +330,6 @@ def test_get_affected_topics():
         'python': True,
         'r': True,
         'ruby': True,
-        'rust': True,
         'csharp': True,
         'integration': True,
         'dev': True,
diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile
index 66538919c49..c711c4c883c 100644
--- a/ci/docker/linux-apt-lint.dockerfile
+++ b/ci/docker/linux-apt-lint.dockerfile
@@ -45,15 +45,6 @@ COPY --from=hadolint /bin/hadolint /usr/bin/hadolint
 COPY ci/scripts/install_iwyu.sh /arrow/ci/scripts/
 RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools}
 
-# Rust linter
-ARG rust=nightly-2021-03-24
-RUN curl https://sh.rustup.rs -sSf | \
-    sh -s -- --default-toolchain stable -y
-ENV PATH /root/.cargo/bin:$PATH
-RUN rustup install ${rust} && \
-    rustup default ${rust} && \
-    rustup component add rustfmt
-
 # Use python3 by default in scripts
 RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
     ln -s /usr/bin/pip3 /usr/local/bin/pip
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 06dd6b60370..5329e0abbe2 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -282,7 +282,6 @@ def build(ctx, src, build_dir, force, targets, **kwargs):
     LintCheck('rat',
               "Check all sources files for license texts via Apache RAT."),
     LintCheck('r', "Lint R files."),
-    LintCheck('rust', "Lint Rust files."),
     LintCheck('docker', "Lint Dockerfiles with hadolint."),
 ]
 
diff --git a/dev/archery/archery/lang/rust.py b/dev/archery/archery/lang/rust.py
deleted file mode 100644
index b1d765b7d52..00000000000
--- a/dev/archery/archery/lang/rust.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from ..utils.command import Command, default_bin
-
-
-class Cargo(Command):
-    def __init__(self, cargo_bin=None):
-        self.bin = default_bin(cargo_bin, "cargo")
diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py
index 3b94d0139c0..0b0e8b46948 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -26,7 +26,6 @@
 from .git import git
 from .logger import logger
 from ..lang.cpp import CppCMakeDefinition, CppConfiguration
-from ..lang.rust import Cargo
 from ..lang.python import Autopep8, Flake8, NumpyDoc
 from .rat import Rat, exclusion_from_globs
 from .tmpdir import tmpdir
@@ -292,20 +291,6 @@ def r_linter(src):
     yield LintResult.from_cmd(Bash().run(r_lint_sh, check=False))
 
 
-def rust_linter(src):
-    """Run Rust linter."""
-    logger.info("Running Rust linter")
-    cargo = Cargo()
-
-    if not cargo.available:
-        logger.error("Rust linter requested but cargo executable not found.")
-        return
-
-    yield LintResult.from_cmd(cargo.run("+stable", "fmt", "--all", "--",
-                                        "--check", cwd=src.rust,
-                                        check=False))
-
-
 class Hadolint(Command):
     def __init__(self, hadolint_bin=None):
         self.bin = default_bin(hadolint_bin, "hadolint")
@@ -341,7 +326,7 @@ def docker_linter(src):
 def linter(src, fix=False, *, clang_format=False, cpplint=False,
            clang_tidy=False, iwyu=False, iwyu_all=False,
            python=False, numpydoc=False, cmake_format=False, rat=False,
-           r=False, rust=False, docker=False):
+           r=False, docker=False):
     """Run all linters."""
     with tmpdir(prefix="arrow-lint-") as root:
         build_dir = os.path.join(root, "cpp-build")
@@ -375,9 +360,6 @@ def linter(src, fix=False, *, clang_format=False, cpplint=False,
         if r:
             results.extend(r_linter(src))
 
-        if rust:
-            results.extend(rust_linter(src))
-
         if docker:
             results.extend(docker_linter(src))
 
diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py
index f7e47a5a1b6..1080cb75d67 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -88,11 +88,6 @@ def r(self):
         """ Returns the r directory of an Arrow sources. """
         return self.path / "r"
 
-    @property
-    def rust(self):
-        """ Returns the rust directory of an Arrow sources. """
-        return self.path / "rust"
-
     @property
     def git_backed(self):
         """ Indicate if the sources are backed by git. """
diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index 007e4da040c..3cc5418df0f 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -324,119 +324,6 @@ def test_version_pre_tag
                         "+  VERSION = \"#{@release_version}\""],
                      ],
                    },
-                   {
-                     path: "rust/arrow-flight/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow-pyarrow-integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/benchmarks/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion-examples/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\", features = [\"prettyprint\"] }",
-                        "-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\", features = [\"arrow\"] }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\", features = [\"prettyprint\"] }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\", features = [\"arrow\"] }"],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion/README.md",
-                     hunks: [
-                       ["-datafusion = \"#{@snapshot_version}\"",
-                        "+datafusion = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\", optional = true }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\", optional = true }"],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@snapshot_version}\"",
-                        "+parquet = \"#{@release_version}\""],
-                       ["-See [crate documentation](https://docs.rs/crate/parquet/#{@snapshot_version}) on available API.",
-                        "+See [crate documentation](https://docs.rs/crate/parquet/#{@release_version}) on available API."],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@snapshot_version}\"",
-                        "-parquet_derive = \"#{@snapshot_version}\"",
-                        "+parquet = \"#{@release_version}\"",
-                        "+parquet_derive = \"#{@release_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive_test/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@snapshot_version}\"",
-                        "+version = \"#{@release_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\" }",
-                        "-parquet_derive = { path = \"../parquet_derive\", version = \"#{@snapshot_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
-                        "+parquet_derive = { path = \"../parquet_derive\", version = \"#{@release_version}\" }"],
-                     ],
-                   },
                  ],
                  parse_patch(git("log", "-n", "1", "-p")))
   end
@@ -633,119 +520,6 @@ def test_version_post_tag
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
-                   {
-                     path: "rust/arrow-flight/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow-pyarrow-integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/arrow/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/benchmarks/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion-examples/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\", features = [\"prettyprint\"] }",
-                        "-parquet = { path = \"../parquet\", version = \"#{@release_version}\", features = [\"arrow\"] }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\", features = [\"prettyprint\"] }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\", features = [\"arrow\"] }"],
-                     ],
-                   },
-                   {
-                     path: "rust/datafusion/README.md",
-                     hunks: [
-                       ["-datafusion = \"#{@release_version}\"",
-                        "+datafusion = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/integration-testing/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\", optional = true }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\", optional = true }"],
-                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
-                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@release_version}\"",
-                        "+parquet = \"#{@next_snapshot_version}\""],
-                       ["-See [crate documentation](https://docs.rs/crate/parquet/#{@release_version}) on available API.",
-                        "+See [crate documentation](https://docs.rs/crate/parquet/#{@next_snapshot_version}) on available API."],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive/README.md",
-                     hunks: [
-                       ["-parquet = \"#{@release_version}\"",
-                        "-parquet_derive = \"#{@release_version}\"",
-                        "+parquet = \"#{@next_snapshot_version}\"",
-                        "+parquet_derive = \"#{@next_snapshot_version}\""],
-                     ],
-                   },
-                   {
-                     path: "rust/parquet_derive_test/Cargo.toml",
-                     hunks: [
-                       ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_snapshot_version}\""],
-                       ["-parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
-                        "-parquet_derive = { path = \"../parquet_derive\", version = \"#{@release_version}\" }",
-                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\" }",
-                        "+parquet_derive = { path = \"../parquet_derive\", version = \"#{@next_snapshot_version}\" }"],
-                     ],
-                   },
                  ],
                  parse_patch(git("log", "-n", "1", "-p")))
   end
diff --git a/dev/release/post-07-rust.sh b/dev/release/post-07-rust.sh
deleted file mode 100755
index 3c94607565f..00000000000
--- a/dev/release/post-07-rust.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# -*- indent-tabs-mode: nil; sh-indentation: 2; sh-basic-offset: 2 -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-set -e
-set -o pipefail
-
-SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-if [ "$#" -ne 1 ]; then
-  echo "Usage: $0 <version>"
-  exit
-fi
-
-version=$1
-
-: ${INSTALL_RUST:=no}
-
-if [ "${INSTALL_RUST}" == "yes" ]; then
-  export RUSTUP_HOME="$(pwd)/release-rustup"
-  export CARGO_HOME="${RUSTUP_HOME}"
-  rm -rf "${RUSTUP_HOME}"
-  curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path
-  export PATH="${RUSTUP_HOME}/bin:$PATH"
-  source "${RUSTUP_HOME}/env"
-  rustup default stable
-  cargo login
-fi
-
-archive_name=apache-arrow-${version}
-tar_gz=${archive_name}.tar.gz
-rm -f ${tar_gz}
-curl \
-  --remote-name \
-  --fail \
-  https://downloads.apache.org/arrow/arrow-${version}/${tar_gz}
-rm -rf ${archive_name}
-tar xf ${tar_gz}
-modules=()
-for cargo_toml in ${archive_name}/rust/*/Cargo.toml; do
-  module_dir=$(dirname ${cargo_toml})
-  pushd ${module_dir}
-  cargo publish --allow-dirty
-  modules+=($(basename ${module_dir}))
-  popd
-done
-popd
-rm -rf ${archive_name}
-rm -f ${tar_gz}
-
-if [ "${INSTALL_RUST}" == "yes" ]; then
-  rm -rf "${RUSTUP_HOME}"
-fi
-
-echo "Success! The released packages are available here:"
-for module in ${modules[@]}; do
-  echo "  https://crates.io/crates/${module}/${version}"
-done
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index a50d729a7d5..13e431ceb8d 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -190,9 +190,6 @@ r/inst/include/cpp11.hpp
 r/inst/include/cpp11/*.hpp
 .gitattributes
 ruby/red-arrow/.yardopts
-rust/arrow/test/data/*.csv
-rust/rust-toolchain
-rust/arrow-flight/src/arrow.flight.protocol.rs
 julia/Arrow/Project.toml
 julia/Arrow/README.md
 julia/Arrow/docs/Manifest.toml
@@ -202,6 +199,3 @@ julia/Arrow/docs/mkdocs.yml
 julia/Arrow/docs/src/index.md
 julia/Arrow/docs/src/manual.md
 julia/Arrow/docs/src/reference.md
-rust/ballista/rust/benchmarks/tpch/queries/q*.sql
-rust/ballista/rust/scheduler/testdata/*
-rust/ballista/ui/scheduler/yarn.lock
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
index a1c884125a8..93ddb18b77c 100644
--- a/dev/release/utils-prepare.sh
+++ b/dev/release/utils-prepare.sh
@@ -145,23 +145,4 @@ update_versions() {
   rm -f */*/*/version.rb.bak
   git add */*/*/version.rb
   popd
-
-  pushd "${ARROW_DIR}/rust"
-  sed -i.bak -E \
-    -e "s/^version = \".+\"/version = \"${version}\"/g" \
-    -e "s/^(arrow = .* version = )\".*\"(( .*)|(, features = .*)|(, optional = .*))$/\\1\"${version}\"\\2/g" \
-    -e "s/^(arrow-flight = .* version = )\".+\"( .*)/\\1\"${version}\"\\2/g" \
-    -e "s/^(parquet = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
-    -e "s/^(parquet_derive = .* version = )\".*\"(( .*)|(, features = .*))$/\\1\"${version}\"\\2/g" \
-    */Cargo.toml
-  rm -f */Cargo.toml.bak
-  git add */Cargo.toml
-
-  sed -i.bak -E \
-    -e "s/^([^ ]+) = \".+\"/\\1 = \"${version}\"/g" \
-    -e "s,docs\.rs/crate/([^/]+)/[^)]+,docs.rs/crate/\\1/${version},g" \
-    */README.md
-  rm -f */README.md.bak
-  git add */README.md
-  popd
 }
diff --git a/docs/source/developers/contributing.rst b/docs/source/developers/contributing.rst
index 9aecf8a6915..e75d2c6336f 100644
--- a/docs/source/developers/contributing.rst
+++ b/docs/source/developers/contributing.rst
@@ -215,7 +215,7 @@ in the end. To make the review process smooth for everyone, try to
   for maintainers to accept.
 * Add new unit tests for your code.
 * Follow the style guides for the part(s) of the project you're modifying.
-  Some languages (C++, Python, and Rust, for example) run a lint check in
+  Some languages (C++ and Python, for example) run a lint check in
   continuous integration. For all languages, see their respective developer
   documentation and READMEs for style guidance. In general, try to make it look
   as if the codebase has a single author, and emulate any conventions you see,
diff --git a/matlab/doc/matlab_interface_for_apache_arrow_design.md b/matlab/doc/matlab_interface_for_apache_arrow_design.md
index de2bb13c39d..5d64c8e85bb 100644
--- a/matlab/doc/matlab_interface_for_apache_arrow_design.md
+++ b/matlab/doc/matlab_interface_for_apache_arrow_design.md
@@ -362,5 +362,5 @@ The table below provides a high-level roadmap for the development of specific ca
 [Add-On Explorer]: https://www.mathworks.com/help/matlab/matlab_env/get-add-ons.html
 [JavaScript user]: https://github.com/apache/arrow/tree/master/js
 [`apache-arrow` package via the `npm` package manager]: https://www.npmjs.com/package/apache-arrow
-[Rust user]: https://github.com/apache/arrow/tree/master/rust
+[Rust user]: https://github.com/apache/arrow-rs
 [`arrow` crate via the `cargo` package manager]: https://crates.io/crates/arrow
diff --git a/rust/.gitignore b/rust/.gitignore
deleted file mode 100644
index 389f4ab254b..00000000000
--- a/rust/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-Cargo.lock
-target
-rusty-tags.vi
-.history
-.flatbuffers/
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
deleted file mode 100644
index de26f87c778..00000000000
--- a/rust/Cargo.toml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[workspace]
-members = [
-        "arrow",
-        "parquet",
-        "parquet_derive",
-        "parquet_derive_test",
-        "datafusion",
-        "datafusion-examples",
-        "arrow-flight",
-        "integration-testing",
-	"benchmarks",
-]
-
-# this package is excluded because it requires different compilation flags, thereby significantly changing
-# how it is compiled within the workspace, causing the whole workspace to be compiled from scratch
-# this way, this is a stand-alone package that compiles independently of the others.
-exclude = ["arrow-pyarrow-integration-testing", "ballista"]
diff --git a/rust/README.md b/rust/README.md
deleted file mode 100644
index 7fdef29bcdb..00000000000
--- a/rust/README.md
+++ /dev/null
@@ -1,186 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Native Rust implementation of Apache Arrow
-
-[![Coverage Status](https://codecov.io/gh/apache/arrow/rust/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow?branch=master)
-
-Welcome to the implementation of Arrow, the popular in-memory columnar format, in [Rust](https://www.rust-lang.org/).
-
-This part of the Arrow project is divided in 4 main components:
-
-| Crate     | Description | Documentation |
-|-----------|-------------|---------------|
-|Arrow        | Core functionality (memory layout, arrays, low level computations) | [(README)](arrow/README.md) |
-|Parquet      | Parquet support | [(README)](parquet/README.md) |
-|Arrow-flight | Arrow data between processes | [(README)](arrow-flight/README.md) |
-|DataFusion   | In-memory query engine with SQL support | [(README)](datafusion/README.md) |
-|Ballista     | Distributed query execution | [(README)](ballista/README.md) |
-
-Independently, they support a vast array of functionality for in-memory computations.
-
-Together, they allow users to write an SQL query or a `DataFrame` (using the `datafusion` crate), run it against a parquet file (using the `parquet` crate), evaluate it in-memory using Arrow's columnar format (using the `arrow` crate), and send to another process (using the `arrow-flight` crate).
-
-Generally speaking, the `arrow` crate offers functionality to develop code that uses Arrow arrays, and `datafusion` offers most operations typically found in SQL, with the notable exceptions of:
-
-* `join`
-* `window` functions
-
-There are too many features to enumerate here, but some notable mentions:
-
-* `Arrow` implements all formats in the specification except certain dictionaries
-* `Arrow` supports SIMD operations to some of its vertical operations
-* `DataFusion` supports `async` execution
-* `DataFusion` supports user-defined functions, aggregates, and whole execution nodes
-
-You can find more details about each crate in their respective READMEs.
-
-## Arrow Rust Community
-
-We use the official [ASF Slack](https://s.apache.org/slack-invite) for informal discussions and coordination. This is 
-a great place to meet other contributors and get guidance on where to contribute. Join us in the `arrow-rust` channel.
-
-We use [ASF JIRA](https://issues.apache.org/jira/secure/Dashboard.jspa) as the system of record for new features
-and bug fixes and this plays a critical role in the release process.
-
-For design discussions we generally collaborate on Google documents and file a JIRA linking to the document.
-
-There is also a bi-weekly Rust-specific sync call for the Arrow Rust community. This is hosted on Google Meet
-at https://meet.google.com/ctp-yujs-aee on alternate Wednesday's at 09:00 US/Pacific, 12:00 US/Eastern. During 
-US daylight savings time this corresponds to 16:00 UTC and at other times this is 17:00 UTC.
-
-## Developer's guide to Arrow Rust
-
-### How to compile
-
-This is a standard cargo project with workspaces. To build it, you need to have `rust` and `cargo`:
-
-```bash
-cd /rust && cargo build
-```
-
-You can also use rust's official docker image:
-
-```bash
-docker run --rm -v $(pwd)/rust:/rust -it rust /bin/bash -c "cd /rust && cargo build"
-```
-
-The command above assumes that are in the root directory of the project, not in the same
-directory as this README.md.
-
-You can also compile specific workspaces:
-
-```bash
-cd /rust/arrow && cargo build
-```
-
-### Git Submodules
-
-Before running tests and examples, it is necessary to set up the local development environment.
-
-The tests rely on test data that is contained in git submodules.
-
-To pull down this data run the following:
-
-```bash
-git submodule update --init
-```
-
-This populates data in two git submodules:
-
-- `../cpp/submodules/parquet_testing/data` (sourced from https://github.com/apache/parquet-testing.git)
-- `../testing` (sourced from https://github.com/apache/arrow-testing)
-
-By default, `cargo test` will look for these directories at their
-standard location. The following environment variables can be used to override the location:
-
-```bash
-# Optionaly specify a different location for test data
-export PARQUET_TEST_DATA=$(cd ../cpp/submodules/parquet-testing/data; pwd)
-export ARROW_TEST_DATA=$(cd ../testing/data; pwd)
-```
-
-From here on, this is a pure Rust project and `cargo` can be used to run tests, benchmarks, docs and examples as usual.
-
-
-### Running the tests
-
-Run tests using the Rust standard `cargo test` command:
-
-```bash
-# run all tests.
-cargo test
-
-
-# run only tests for the arrow crate
-cargo test -p arrow
-```
-
-## Code Formatting
-
-Our CI uses `rustfmt` to check code formatting. Before submitting a
-PR be sure to run the following and check for lint issues:
-
-```bash
-cargo +stable fmt --all -- --check
-```
-
-## Clippy Lints
-
-We recommend using `clippy` for checking lints during development. While we do not yet enforce `clippy` checks, we recommend not introducing new `clippy` errors or warnings.
-
-Run the following to check for clippy lints.
-
-```
-cargo clippy
-```
-
-If you use Visual Studio Code with the `rust-analyzer` plugin, you can enable `clippy` to run each time you save a file. See https://users.rust-lang.org/t/how-to-use-clippy-in-vs-code-with-rust-analyzer/41881.
-
-One of the concerns with `clippy` is that it often produces a lot of false positives, or that some recommendations may hurt readability. We do not have a policy of which lints are ignored, but if you disagree with a `clippy` lint, you may disable the lint and briefly justify it.
-
-Search for `allow(clippy::` in the codebase to identify lints that are ignored/allowed. We currently prefer ignoring lints on the lowest unit possible.
-* If you are introducing a line that returns a lint warning or error, you may disable the lint on that line.
-* If you have several lints on a function or module, you may disable the lint on the function or module.
-* If a lint is pervasive across multiple modules, you may disable it at the crate level.
-
-## Git Pre-Commit Hook
-
-We can use [git pre-commit hook](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks) to automate various kinds of git pre-commit checking/formatting.
-
-Suppose you are in the root directory of the project.
-
-First check if the file already exists:
-
-```bash
-ls -l .git/hooks/pre-commit
-```
-
-If the file already exists, to avoid mistakenly **overriding**, you MAY have to check
-the link source or file content. Else if not exist, let's safely soft link [pre-commit.sh](pre-commit.sh) as file `.git/hooks/pre-commit`:
-
-```
-ln -s  ../../rust/pre-commit.sh .git/hooks/pre-commit
-```
-
-If sometimes you want to commit without checking, just run `git commit` with `--no-verify`:
-
-```bash
-git commit --no-verify -m "... commit message ..."
-```
diff --git a/rust/arrow-flight/Cargo.toml b/rust/arrow-flight/Cargo.toml
deleted file mode 100644
index de6aa832315..00000000000
--- a/rust/arrow-flight/Cargo.toml
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-flight"
-description = "Apache Arrow Flight"
-version = "5.0.0-SNAPSHOT"
-edition = "2018"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-license = "Apache-2.0"
-
-[dependencies]
-arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
-tonic = "0.4"
-bytes = "1"
-prost = "0.7"
-prost-derive = "0.7"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-futures = { version = "0.3", default-features = false, features = ["alloc"]}
-
-[build-dependencies]
-tonic-build = "0.4"
-# Pin specific version of the tonic-build dependencies to avoid auto-generated
-# (and checked in) arrow.flight.protocol.rs from changing
-proc-macro2 = "=1.0.24"
-
-#[lib]
-#name = "flight"
-#path = "src/lib.rs"
diff --git a/rust/arrow-flight/README.md b/rust/arrow-flight/README.md
deleted file mode 100644
index ba63f65bc48..00000000000
--- a/rust/arrow-flight/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Apache Arrow Flight
-
-Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
-
-This crate simply provides the Rust implementation of the [Flight.proto](../../format/Flight.proto) gRPC protocol and provides an example that demonstrates how to build a Flight server implemented with Tonic.
-
-Note that building a Flight server also requires an implementation of Arrow IPC which is based on the Flatbuffers serialization framework. The Rust implementation of Arrow IPC is not yet complete although the generated Flatbuffers code is available as part of the core Arrow crate.
-
-
-
diff --git a/rust/arrow-flight/build.rs b/rust/arrow-flight/build.rs
deleted file mode 100644
index ca232551455..00000000000
--- a/rust/arrow-flight/build.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{
-    env,
-    fs::OpenOptions,
-    io::{Read, Write},
-    path::Path,
-};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // avoid rerunning build if the file has not changed
-    println!("cargo:rerun-if-changed=../../format/Flight.proto");
-
-    // override the build location, in order to check in the changes to proto files
-    env::set_var("OUT_DIR", "src");
-
-    // The current working directory can vary depending on how the project is being
-    // built or released so we build an absolute path to the proto file
-    let path = Path::new("../../format/Flight.proto");
-    if path.exists() {
-        tonic_build::compile_protos("../../format/Flight.proto")?;
-        // read file contents to string
-        let mut file = OpenOptions::new()
-            .read(true)
-            .open("src/arrow.flight.protocol.rs")?;
-        let mut buffer = String::new();
-        file.read_to_string(&mut buffer)?;
-        // append warning that file was auto-generate
-        let mut file = OpenOptions::new()
-            .write(true)
-            .truncate(true)
-            .open("src/arrow.flight.protocol.rs")?;
-        file.write_all("// This file was automatically generated through the build.rs script, and should not be edited.\n\n".as_bytes())?;
-        file.write_all(buffer.as_bytes())?;
-    }
-
-    // As the proto file is checked in, the build should not fail if the file is not found
-    Ok(())
-}
diff --git a/rust/arrow-flight/examples/server.rs b/rust/arrow-flight/examples/server.rs
deleted file mode 100644
index 75d05378710..00000000000
--- a/rust/arrow-flight/examples/server.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-
-use futures::Stream;
-use tonic::transport::Server;
-use tonic::{Request, Response, Status, Streaming};
-
-use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
-    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-
-#[derive(Clone)]
-pub struct FlightServiceImpl {}
-
-#[tonic::async_trait]
-impl FlightService for FlightServiceImpl {
-    type HandshakeStream = Pin<
-        Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + Sync + 'static>,
-    >;
-    type ListFlightsStream =
-        Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + Sync + 'static>>;
-    type DoGetStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-    type DoPutStream =
-        Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + Sync + 'static>>;
-    type DoActionStream = Pin<
-        Box<
-            dyn Stream<Item = Result<arrow_flight::Result, Status>>
-                + Send
-                + Sync
-                + 'static,
-        >,
-    >;
-    type ListActionsStream =
-        Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + Sync + 'static>>;
-    type DoExchangeStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        _request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_put(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let addr = "[::1]:50051".parse()?;
-    let service = FlightServiceImpl {};
-
-    let svc = FlightServiceServer::new(service);
-
-    Server::builder().add_service(svc).serve(addr).await?;
-
-    Ok(())
-}
diff --git a/rust/arrow-flight/src/arrow.flight.protocol.rs b/rust/arrow-flight/src/arrow.flight.protocol.rs
deleted file mode 100644
index 5fce526ff6e..00000000000
--- a/rust/arrow-flight/src/arrow.flight.protocol.rs
+++ /dev/null
@@ -1,1039 +0,0 @@
-// This file was automatically generated through the build.rs script, and should not be edited.
-
-///
-/// The request that a client provides to a server on handshake.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct HandshakeRequest {
-    ///
-    /// A defined protocol version
-    #[prost(uint64, tag = "1")]
-    pub protocol_version: u64,
-    ///
-    /// Arbitrary auth/handshake info.
-    #[prost(bytes = "vec", tag = "2")]
-    pub payload: ::prost::alloc::vec::Vec<u8>,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct HandshakeResponse {
-    ///
-    /// A defined protocol version
-    #[prost(uint64, tag = "1")]
-    pub protocol_version: u64,
-    ///
-    /// Arbitrary auth/handshake info.
-    #[prost(bytes = "vec", tag = "2")]
-    pub payload: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// A message for doing simple auth.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BasicAuth {
-    #[prost(string, tag = "2")]
-    pub username: ::prost::alloc::string::String,
-    #[prost(string, tag = "3")]
-    pub password: ::prost::alloc::string::String,
-}
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Empty {}
-///
-/// Describes an available action, including both the name used for execution
-/// along with a short description of the purpose of the action.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ActionType {
-    #[prost(string, tag = "1")]
-    pub r#type: ::prost::alloc::string::String,
-    #[prost(string, tag = "2")]
-    pub description: ::prost::alloc::string::String,
-}
-///
-/// A service specific expression that can be used to return a limited set
-/// of available Arrow Flight streams.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Criteria {
-    #[prost(bytes = "vec", tag = "1")]
-    pub expression: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// An opaque action specific for the service.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Action {
-    #[prost(string, tag = "1")]
-    pub r#type: ::prost::alloc::string::String,
-    #[prost(bytes = "vec", tag = "2")]
-    pub body: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// An opaque result returned after executing an action.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Result {
-    #[prost(bytes = "vec", tag = "1")]
-    pub body: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// Wrap the result of a getSchema call
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct SchemaResult {
-    /// schema of the dataset as described in Schema.fbs::Schema.
-    #[prost(bytes = "vec", tag = "1")]
-    pub schema: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// The name or tag for a Flight. May be used as a way to retrieve or generate
-/// a flight or be used to expose a set of previously defined flights.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightDescriptor {
-    #[prost(enumeration = "flight_descriptor::DescriptorType", tag = "1")]
-    pub r#type: i32,
-    ///
-    /// Opaque value used to express a command. Should only be defined when
-    /// type = CMD.
-    #[prost(bytes = "vec", tag = "2")]
-    pub cmd: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// List of strings identifying a particular dataset. Should only be defined
-    /// when type = PATH.
-    #[prost(string, repeated, tag = "3")]
-    pub path: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
-}
-/// Nested message and enum types in `FlightDescriptor`.
-pub mod flight_descriptor {
-    ///
-    /// Describes what type of descriptor is defined.
-    #[derive(
-        Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration,
-    )]
-    #[repr(i32)]
-    pub enum DescriptorType {
-        /// Protobuf pattern, not used.
-        Unknown = 0,
-        ///
-        /// A named path that identifies a dataset. A path is composed of a string
-        /// or list of strings describing a particular dataset. This is conceptually
-        ///  similar to a path inside a filesystem.
-        Path = 1,
-        ///
-        /// An opaque command to generate a dataset.
-        Cmd = 2,
-    }
-}
-///
-/// The access coordinates for retrieval of a dataset. With a FlightInfo, a
-/// consumer is able to determine how to retrieve a dataset.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightInfo {
-    /// schema of the dataset as described in Schema.fbs::Schema.
-    #[prost(bytes = "vec", tag = "1")]
-    pub schema: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// The descriptor associated with this info.
-    #[prost(message, optional, tag = "2")]
-    pub flight_descriptor: ::core::option::Option<FlightDescriptor>,
-    ///
-    /// A list of endpoints associated with the flight. To consume the whole
-    /// flight, all endpoints must be consumed.
-    #[prost(message, repeated, tag = "3")]
-    pub endpoint: ::prost::alloc::vec::Vec<FlightEndpoint>,
-    /// Set these to -1 if unknown.
-    #[prost(int64, tag = "4")]
-    pub total_records: i64,
-    #[prost(int64, tag = "5")]
-    pub total_bytes: i64,
-}
-///
-/// A particular stream or split associated with a flight.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightEndpoint {
-    ///
-    /// Token used to retrieve this stream.
-    #[prost(message, optional, tag = "1")]
-    pub ticket: ::core::option::Option<Ticket>,
-    ///
-    /// A list of URIs where this ticket can be redeemed. If the list is
-    /// empty, the expectation is that the ticket can only be redeemed on the
-    /// current service where the ticket was generated.
-    #[prost(message, repeated, tag = "2")]
-    pub location: ::prost::alloc::vec::Vec<Location>,
-}
-///
-/// A location where a Flight service will accept retrieval of a particular
-/// stream given a ticket.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Location {
-    #[prost(string, tag = "1")]
-    pub uri: ::prost::alloc::string::String,
-}
-///
-/// An opaque identifier that the service can use to retrieve a particular
-/// portion of a stream.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Ticket {
-    #[prost(bytes = "vec", tag = "1")]
-    pub ticket: ::prost::alloc::vec::Vec<u8>,
-}
-///
-/// A batch of Arrow data as part of a stream of batches.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FlightData {
-    ///
-    /// The descriptor of the data. This is only relevant when a client is
-    /// starting a new DoPut stream.
-    #[prost(message, optional, tag = "1")]
-    pub flight_descriptor: ::core::option::Option<FlightDescriptor>,
-    ///
-    /// Header for message data as described in Message.fbs::Message.
-    #[prost(bytes = "vec", tag = "2")]
-    pub data_header: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// Application-defined metadata.
-    #[prost(bytes = "vec", tag = "3")]
-    pub app_metadata: ::prost::alloc::vec::Vec<u8>,
-    ///
-    /// The actual batch of Arrow data. Preferably handled with minimal-copies
-    /// coming last in the definition to help with sidecar patterns (it is
-    /// expected that some implementations will fetch this field off the wire
-    /// with specialized code to avoid extra memory copies).
-    #[prost(bytes = "vec", tag = "1000")]
-    pub data_body: ::prost::alloc::vec::Vec<u8>,
-}
-///*
-/// The response message associated with the submission of a DoPut.
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct PutResult {
-    #[prost(bytes = "vec", tag = "1")]
-    pub app_metadata: ::prost::alloc::vec::Vec<u8>,
-}
-#[doc = r" Generated client implementations."]
-pub mod flight_service_client {
-    #![allow(unused_variables, dead_code, missing_docs)]
-    use tonic::codegen::*;
-    #[doc = ""]
-    #[doc = " A flight service is an endpoint for retrieving or storing Arrow data. A"]
-    #[doc = " flight service can expose one or more predefined endpoints that can be"]
-    #[doc = " accessed using the Arrow Flight Protocol. Additionally, a flight service"]
-    #[doc = " can expose a set of actions that are available."]
-    pub struct FlightServiceClient<T> {
-        inner: tonic::client::Grpc<T>,
-    }
-    impl FlightServiceClient<tonic::transport::Channel> {
-        #[doc = r" Attempt to create a new client by connecting to a given endpoint."]
-        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
-        where
-            D: std::convert::TryInto<tonic::transport::Endpoint>,
-            D::Error: Into<StdError>,
-        {
-            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
-            Ok(Self::new(conn))
-        }
-    }
-    impl<T> FlightServiceClient<T>
-    where
-        T: tonic::client::GrpcService<tonic::body::BoxBody>,
-        T::ResponseBody: Body + HttpBody + Send + 'static,
-        T::Error: Into<StdError>,
-        <T::ResponseBody as HttpBody>::Error: Into<StdError> + Send,
-    {
-        pub fn new(inner: T) -> Self {
-            let inner = tonic::client::Grpc::new(inner);
-            Self { inner }
-        }
-        pub fn with_interceptor(
-            inner: T,
-            interceptor: impl Into<tonic::Interceptor>,
-        ) -> Self {
-            let inner = tonic::client::Grpc::with_interceptor(inner, interceptor);
-            Self { inner }
-        }
-        #[doc = ""]
-        #[doc = " Handshake between client and server. Depending on the server, the"]
-        #[doc = " handshake may be required to determine the token that should be used for"]
-        #[doc = " future operations. Both request and response are streams to allow multiple"]
-        #[doc = " round-trips depending on auth mechanism."]
-        pub async fn handshake(
-            &mut self,
-            request: impl tonic::IntoStreamingRequest<Message = super::HandshakeRequest>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::HandshakeResponse>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/Handshake",
-            );
-            self.inner
-                .streaming(request.into_streaming_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Get a list of available streams given a particular criteria. Most flight"]
-        #[doc = " services will expose one or more streams that are readily available for"]
-        #[doc = " retrieval. This api allows listing the streams available for"]
-        #[doc = " consumption. A user can also provide a criteria. The criteria can limit"]
-        #[doc = " the subset of streams that can be listed via this interface. Each flight"]
-        #[doc = " service allows its own definition of how to consume criteria."]
-        pub async fn list_flights(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Criteria>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::FlightInfo>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/ListFlights",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get information about how the flight can be"]
-        #[doc = " consumed. This is a useful interface if the consumer of the interface"]
-        #[doc = " already can identify the specific flight to consume. This interface can"]
-        #[doc = " also allow a consumer to generate a flight stream through a specified"]
-        #[doc = " descriptor. For example, a flight descriptor might be something that"]
-        #[doc = " includes a SQL statement or a Pickled Python operation that will be"]
-        #[doc = " executed. In those cases, the descriptor will not be previously available"]
-        #[doc = " within the list of available streams provided by ListFlights but will be"]
-        #[doc = " available for consumption for the duration defined by the specific flight"]
-        #[doc = " service."]
-        pub async fn get_flight_info(
-            &mut self,
-            request: impl tonic::IntoRequest<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::FlightInfo>, tonic::Status> {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/GetFlightInfo",
-            );
-            self.inner.unary(request.into_request(), path, codec).await
-        }
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema"]
-        #[doc = " This is used when a consumer needs the Schema of flight stream. Similar to"]
-        #[doc = " GetFlightInfo this interface may generate a new flight that was not previously"]
-        #[doc = " available in ListFlights."]
-        pub async fn get_schema(
-            &mut self,
-            request: impl tonic::IntoRequest<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::SchemaResult>, tonic::Status> {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/GetSchema",
-            );
-            self.inner.unary(request.into_request(), path, codec).await
-        }
-        #[doc = ""]
-        #[doc = " Retrieve a single stream associated with a particular descriptor"]
-        #[doc = " associated with the referenced ticket. A Flight can be composed of one or"]
-        #[doc = " more streams where each stream can be retrieved using a separate opaque"]
-        #[doc = " ticket that the flight service uses for managing a collection of streams."]
-        pub async fn do_get(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Ticket>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::FlightData>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoGet",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Push a stream to the flight service associated with a particular"]
-        #[doc = " flight stream. This allows a client of a flight service to upload a stream"]
-        #[doc = " of data. Depending on the particular flight service, a client consumer"]
-        #[doc = " could be allowed to upload a single stream per descriptor or an unlimited"]
-        #[doc = " number. In the latter, the service might implement a 'seal' action that"]
-        #[doc = " can be applied to a descriptor once all streams are uploaded."]
-        pub async fn do_put(
-            &mut self,
-            request: impl tonic::IntoStreamingRequest<Message = super::FlightData>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::PutResult>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoPut",
-            );
-            self.inner
-                .streaming(request.into_streaming_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Open a bidirectional data channel for a given descriptor. This"]
-        #[doc = " allows clients to send and receive arbitrary Arrow data and"]
-        #[doc = " application-specific metadata in a single logical stream. In"]
-        #[doc = " contrast to DoGet/DoPut, this is more suited for clients"]
-        #[doc = " offloading computation (rather than storage) to a Flight service."]
-        pub async fn do_exchange(
-            &mut self,
-            request: impl tonic::IntoStreamingRequest<Message = super::FlightData>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::FlightData>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoExchange",
-            );
-            self.inner
-                .streaming(request.into_streaming_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " Flight services can support an arbitrary number of simple actions in"]
-        #[doc = " addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut"]
-        #[doc = " operations that are potentially available. DoAction allows a flight client"]
-        #[doc = " to do a specific action against a flight service. An action includes"]
-        #[doc = " opaque request and response objects that are specific to the type action"]
-        #[doc = " being undertaken."]
-        pub async fn do_action(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Action>,
-        ) -> Result<tonic::Response<tonic::codec::Streaming<super::Result>>, tonic::Status>
-        {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/DoAction",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-        #[doc = ""]
-        #[doc = " A flight service exposes all of the available action types that it has"]
-        #[doc = " along with descriptions. This allows different flight consumers to"]
-        #[doc = " understand the capabilities of the flight service."]
-        pub async fn list_actions(
-            &mut self,
-            request: impl tonic::IntoRequest<super::Empty>,
-        ) -> Result<
-            tonic::Response<tonic::codec::Streaming<super::ActionType>>,
-            tonic::Status,
-        > {
-            self.inner.ready().await.map_err(|e| {
-                tonic::Status::new(
-                    tonic::Code::Unknown,
-                    format!("Service was not ready: {}", e.into()),
-                )
-            })?;
-            let codec = tonic::codec::ProstCodec::default();
-            let path = http::uri::PathAndQuery::from_static(
-                "/arrow.flight.protocol.FlightService/ListActions",
-            );
-            self.inner
-                .server_streaming(request.into_request(), path, codec)
-                .await
-        }
-    }
-    impl<T: Clone> Clone for FlightServiceClient<T> {
-        fn clone(&self) -> Self {
-            Self {
-                inner: self.inner.clone(),
-            }
-        }
-    }
-    impl<T> std::fmt::Debug for FlightServiceClient<T> {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, "FlightServiceClient {{ ... }}")
-        }
-    }
-}
-#[doc = r" Generated server implementations."]
-pub mod flight_service_server {
-    #![allow(unused_variables, dead_code, missing_docs)]
-    use tonic::codegen::*;
-    #[doc = "Generated trait containing gRPC methods that should be implemented for use with FlightServiceServer."]
-    #[async_trait]
-    pub trait FlightService: Send + Sync + 'static {
-        #[doc = "Server streaming response type for the Handshake method."]
-        type HandshakeStream: futures_core::Stream<Item = Result<super::HandshakeResponse, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Handshake between client and server. Depending on the server, the"]
-        #[doc = " handshake may be required to determine the token that should be used for"]
-        #[doc = " future operations. Both request and response are streams to allow multiple"]
-        #[doc = " round-trips depending on auth mechanism."]
-        async fn handshake(
-            &self,
-            request: tonic::Request<tonic::Streaming<super::HandshakeRequest>>,
-        ) -> Result<tonic::Response<Self::HandshakeStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the ListFlights method."]
-        type ListFlightsStream: futures_core::Stream<Item = Result<super::FlightInfo, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Get a list of available streams given a particular criteria. Most flight"]
-        #[doc = " services will expose one or more streams that are readily available for"]
-        #[doc = " retrieval. This api allows listing the streams available for"]
-        #[doc = " consumption. A user can also provide a criteria. The criteria can limit"]
-        #[doc = " the subset of streams that can be listed via this interface. Each flight"]
-        #[doc = " service allows its own definition of how to consume criteria."]
-        async fn list_flights(
-            &self,
-            request: tonic::Request<super::Criteria>,
-        ) -> Result<tonic::Response<Self::ListFlightsStream>, tonic::Status>;
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get information about how the flight can be"]
-        #[doc = " consumed. This is a useful interface if the consumer of the interface"]
-        #[doc = " already can identify the specific flight to consume. This interface can"]
-        #[doc = " also allow a consumer to generate a flight stream through a specified"]
-        #[doc = " descriptor. For example, a flight descriptor might be something that"]
-        #[doc = " includes a SQL statement or a Pickled Python operation that will be"]
-        #[doc = " executed. In those cases, the descriptor will not be previously available"]
-        #[doc = " within the list of available streams provided by ListFlights but will be"]
-        #[doc = " available for consumption for the duration defined by the specific flight"]
-        #[doc = " service."]
-        async fn get_flight_info(
-            &self,
-            request: tonic::Request<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::FlightInfo>, tonic::Status>;
-        #[doc = ""]
-        #[doc = " For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema"]
-        #[doc = " This is used when a consumer needs the Schema of flight stream. Similar to"]
-        #[doc = " GetFlightInfo this interface may generate a new flight that was not previously"]
-        #[doc = " available in ListFlights."]
-        async fn get_schema(
-            &self,
-            request: tonic::Request<super::FlightDescriptor>,
-        ) -> Result<tonic::Response<super::SchemaResult>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoGet method."]
-        type DoGetStream: futures_core::Stream<Item = Result<super::FlightData, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Retrieve a single stream associated with a particular descriptor"]
-        #[doc = " associated with the referenced ticket. A Flight can be composed of one or"]
-        #[doc = " more streams where each stream can be retrieved using a separate opaque"]
-        #[doc = " ticket that the flight service uses for managing a collection of streams."]
-        async fn do_get(
-            &self,
-            request: tonic::Request<super::Ticket>,
-        ) -> Result<tonic::Response<Self::DoGetStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoPut method."]
-        type DoPutStream: futures_core::Stream<Item = Result<super::PutResult, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Push a stream to the flight service associated with a particular"]
-        #[doc = " flight stream. This allows a client of a flight service to upload a stream"]
-        #[doc = " of data. Depending on the particular flight service, a client consumer"]
-        #[doc = " could be allowed to upload a single stream per descriptor or an unlimited"]
-        #[doc = " number. In the latter, the service might implement a 'seal' action that"]
-        #[doc = " can be applied to a descriptor once all streams are uploaded."]
-        async fn do_put(
-            &self,
-            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-        ) -> Result<tonic::Response<Self::DoPutStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoExchange method."]
-        type DoExchangeStream: futures_core::Stream<Item = Result<super::FlightData, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Open a bidirectional data channel for a given descriptor. This"]
-        #[doc = " allows clients to send and receive arbitrary Arrow data and"]
-        #[doc = " application-specific metadata in a single logical stream. In"]
-        #[doc = " contrast to DoGet/DoPut, this is more suited for clients"]
-        #[doc = " offloading computation (rather than storage) to a Flight service."]
-        async fn do_exchange(
-            &self,
-            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-        ) -> Result<tonic::Response<Self::DoExchangeStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the DoAction method."]
-        type DoActionStream: futures_core::Stream<Item = Result<super::Result, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " Flight services can support an arbitrary number of simple actions in"]
-        #[doc = " addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut"]
-        #[doc = " operations that are potentially available. DoAction allows a flight client"]
-        #[doc = " to do a specific action against a flight service. An action includes"]
-        #[doc = " opaque request and response objects that are specific to the type action"]
-        #[doc = " being undertaken."]
-        async fn do_action(
-            &self,
-            request: tonic::Request<super::Action>,
-        ) -> Result<tonic::Response<Self::DoActionStream>, tonic::Status>;
-        #[doc = "Server streaming response type for the ListActions method."]
-        type ListActionsStream: futures_core::Stream<Item = Result<super::ActionType, tonic::Status>>
-            + Send
-            + Sync
-            + 'static;
-        #[doc = ""]
-        #[doc = " A flight service exposes all of the available action types that it has"]
-        #[doc = " along with descriptions. This allows different flight consumers to"]
-        #[doc = " understand the capabilities of the flight service."]
-        async fn list_actions(
-            &self,
-            request: tonic::Request<super::Empty>,
-        ) -> Result<tonic::Response<Self::ListActionsStream>, tonic::Status>;
-    }
-    #[doc = ""]
-    #[doc = " A flight service is an endpoint for retrieving or storing Arrow data. A"]
-    #[doc = " flight service can expose one or more predefined endpoints that can be"]
-    #[doc = " accessed using the Arrow Flight Protocol. Additionally, a flight service"]
-    #[doc = " can expose a set of actions that are available."]
-    #[derive(Debug)]
-    pub struct FlightServiceServer<T: FlightService> {
-        inner: _Inner<T>,
-    }
-    struct _Inner<T>(Arc<T>, Option<tonic::Interceptor>);
-    impl<T: FlightService> FlightServiceServer<T> {
-        pub fn new(inner: T) -> Self {
-            let inner = Arc::new(inner);
-            let inner = _Inner(inner, None);
-            Self { inner }
-        }
-        pub fn with_interceptor(
-            inner: T,
-            interceptor: impl Into<tonic::Interceptor>,
-        ) -> Self {
-            let inner = Arc::new(inner);
-            let inner = _Inner(inner, Some(interceptor.into()));
-            Self { inner }
-        }
-    }
-    impl<T, B> Service<http::Request<B>> for FlightServiceServer<T>
-    where
-        T: FlightService,
-        B: HttpBody + Send + Sync + 'static,
-        B::Error: Into<StdError> + Send + 'static,
-    {
-        type Response = http::Response<tonic::body::BoxBody>;
-        type Error = Never;
-        type Future = BoxFuture<Self::Response, Self::Error>;
-        fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-            Poll::Ready(Ok(()))
-        }
-        fn call(&mut self, req: http::Request<B>) -> Self::Future {
-            let inner = self.inner.clone();
-            match req.uri().path() {
-                "/arrow.flight.protocol.FlightService/Handshake" => {
-                    #[allow(non_camel_case_types)]
-                    struct HandshakeSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::StreamingService<super::HandshakeRequest>
-                        for HandshakeSvc<T>
-                    {
-                        type Response = super::HandshakeResponse;
-                        type ResponseStream = T::HandshakeStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<
-                                tonic::Streaming<super::HandshakeRequest>,
-                            >,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).handshake(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = HandshakeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/ListFlights" => {
-                    #[allow(non_camel_case_types)]
-                    struct ListFlightsSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Criteria>
-                        for ListFlightsSvc<T>
-                    {
-                        type Response = super::FlightInfo;
-                        type ResponseStream = T::ListFlightsStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Criteria>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).list_flights(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = ListFlightsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/GetFlightInfo" => {
-                    #[allow(non_camel_case_types)]
-                    struct GetFlightInfoSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::UnaryService<super::FlightDescriptor>
-                        for GetFlightInfoSvc<T>
-                    {
-                        type Response = super::FlightInfo;
-                        type Future =
-                            BoxFuture<tonic::Response<Self::Response>, tonic::Status>;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::FlightDescriptor>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut =
-                                async move { (*inner).get_flight_info(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1.clone();
-                        let inner = inner.0;
-                        let method = GetFlightInfoSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/GetSchema" => {
-                    #[allow(non_camel_case_types)]
-                    struct GetSchemaSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::UnaryService<super::FlightDescriptor>
-                        for GetSchemaSvc<T>
-                    {
-                        type Response = super::SchemaResult;
-                        type Future =
-                            BoxFuture<tonic::Response<Self::Response>, tonic::Status>;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::FlightDescriptor>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).get_schema(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1.clone();
-                        let inner = inner.0;
-                        let method = GetSchemaSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.unary(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoGet" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoGetSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Ticket>
-                        for DoGetSvc<T>
-                    {
-                        type Response = super::FlightData;
-                        type ResponseStream = T::DoGetStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Ticket>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_get(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoGetSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoPut" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoPutSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::StreamingService<super::FlightData>
-                        for DoPutSvc<T>
-                    {
-                        type Response = super::PutResult;
-                        type ResponseStream = T::DoPutStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_put(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoPutSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoExchange" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoExchangeSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::StreamingService<super::FlightData>
-                        for DoExchangeSvc<T>
-                    {
-                        type Response = super::FlightData;
-                        type ResponseStream = T::DoExchangeStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<tonic::Streaming<super::FlightData>>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_exchange(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoExchangeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/DoAction" => {
-                    #[allow(non_camel_case_types)]
-                    struct DoActionSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Action>
-                        for DoActionSvc<T>
-                    {
-                        type Response = super::Result;
-                        type ResponseStream = T::DoActionStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Action>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).do_action(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = DoActionSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                "/arrow.flight.protocol.FlightService/ListActions" => {
-                    #[allow(non_camel_case_types)]
-                    struct ListActionsSvc<T: FlightService>(pub Arc<T>);
-                    impl<T: FlightService>
-                        tonic::server::ServerStreamingService<super::Empty>
-                        for ListActionsSvc<T>
-                    {
-                        type Response = super::ActionType;
-                        type ResponseStream = T::ListActionsStream;
-                        type Future = BoxFuture<
-                            tonic::Response<Self::ResponseStream>,
-                            tonic::Status,
-                        >;
-                        fn call(
-                            &mut self,
-                            request: tonic::Request<super::Empty>,
-                        ) -> Self::Future {
-                            let inner = self.0.clone();
-                            let fut = async move { (*inner).list_actions(request).await };
-                            Box::pin(fut)
-                        }
-                    }
-                    let inner = self.inner.clone();
-                    let fut = async move {
-                        let interceptor = inner.1;
-                        let inner = inner.0;
-                        let method = ListActionsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
-                        let mut grpc = if let Some(interceptor) = interceptor {
-                            tonic::server::Grpc::with_interceptor(codec, interceptor)
-                        } else {
-                            tonic::server::Grpc::new(codec)
-                        };
-                        let res = grpc.server_streaming(method, req).await;
-                        Ok(res)
-                    };
-                    Box::pin(fut)
-                }
-                _ => Box::pin(async move {
-                    Ok(http::Response::builder()
-                        .status(200)
-                        .header("grpc-status", "12")
-                        .header("content-type", "application/grpc")
-                        .body(tonic::body::BoxBody::empty())
-                        .unwrap())
-                }),
-            }
-        }
-    }
-    impl<T: FlightService> Clone for FlightServiceServer<T> {
-        fn clone(&self) -> Self {
-            let inner = self.inner.clone();
-            Self { inner }
-        }
-    }
-    impl<T: FlightService> Clone for _Inner<T> {
-        fn clone(&self) -> Self {
-            Self(self.0.clone(), self.1.clone())
-        }
-    }
-    impl<T: std::fmt::Debug> std::fmt::Debug for _Inner<T> {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, "{:?}", self.0)
-        }
-    }
-    impl<T: FlightService> tonic::transport::NamedService for FlightServiceServer<T> {
-        const NAME: &'static str = "arrow.flight.protocol.FlightService";
-    }
-}
diff --git a/rust/arrow-flight/src/lib.rs b/rust/arrow-flight/src/lib.rs
deleted file mode 100644
index 6af2e748678..00000000000
--- a/rust/arrow-flight/src/lib.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-include!("arrow.flight.protocol.rs");
-
-pub mod utils;
diff --git a/rust/arrow-flight/src/utils.rs b/rust/arrow-flight/src/utils.rs
deleted file mode 100644
index 659668c0baf..00000000000
--- a/rust/arrow-flight/src/utils.rs
+++ /dev/null
@@ -1,167 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities to assist with reading and writing Arrow data as Flight messages
-
-use std::convert::TryFrom;
-
-use crate::{FlightData, SchemaResult};
-
-use arrow::array::ArrayRef;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::{ArrowError, Result};
-use arrow::ipc::{convert, reader, writer, writer::EncodedData, writer::IpcWriteOptions};
-use arrow::record_batch::RecordBatch;
-
-/// Convert a `RecordBatch` to a vector of `FlightData` representing the bytes of the dictionaries
-/// and a `FlightData` representing the bytes of the batch's values
-pub fn flight_data_from_arrow_batch(
-    batch: &RecordBatch,
-    options: &IpcWriteOptions,
-) -> (Vec<FlightData>, FlightData) {
-    let data_gen = writer::IpcDataGenerator::default();
-    let mut dictionary_tracker = writer::DictionaryTracker::new(false);
-
-    let (encoded_dictionaries, encoded_batch) = data_gen
-        .encoded_batch(batch, &mut dictionary_tracker, &options)
-        .expect("DictionaryTracker configured above to not error on replacement");
-
-    let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
-    let flight_batch = encoded_batch.into();
-
-    (flight_dictionaries, flight_batch)
-}
-
-impl From<EncodedData> for FlightData {
-    fn from(data: EncodedData) -> Self {
-        FlightData {
-            data_header: data.ipc_message,
-            data_body: data.arrow_data,
-            ..Default::default()
-        }
-    }
-}
-
-/// Convert a `Schema` to `SchemaResult` by converting to an IPC message
-pub fn flight_schema_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> SchemaResult {
-    SchemaResult {
-        schema: flight_schema_as_flatbuffer(schema, options),
-    }
-}
-
-/// Convert a `Schema` to `FlightData` by converting to an IPC message
-pub fn flight_data_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> FlightData {
-    let data_header = flight_schema_as_flatbuffer(schema, options);
-    FlightData {
-        data_header,
-        ..Default::default()
-    }
-}
-
-/// Convert a `Schema` to bytes in the format expected in `FlightInfo.schema`
-pub fn ipc_message_from_arrow_schema(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> Result<Vec<u8>> {
-    let encoded_data = flight_schema_as_encoded_data(arrow_schema, options);
-
-    let mut schema = vec![];
-    arrow::ipc::writer::write_message(&mut schema, encoded_data, options)?;
-    Ok(schema)
-}
-
-fn flight_schema_as_flatbuffer(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> Vec<u8> {
-    let encoded_data = flight_schema_as_encoded_data(arrow_schema, options);
-    encoded_data.ipc_message
-}
-
-fn flight_schema_as_encoded_data(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> EncodedData {
-    let data_gen = writer::IpcDataGenerator::default();
-    data_gen.schema_to_bytes(arrow_schema, options)
-}
-
-/// Try convert `FlightData` into an Arrow Schema
-///
-/// Returns an error if the `FlightData` header is not a valid IPC schema
-impl TryFrom<&FlightData> for Schema {
-    type Error = ArrowError;
-    fn try_from(data: &FlightData) -> Result<Self> {
-        convert::schema_from_bytes(&data.data_header[..]).map_err(|err| {
-            ArrowError::ParseError(format!(
-                "Unable to convert flight data to Arrow schema: {}",
-                err
-            ))
-        })
-    }
-}
-
-/// Try convert `SchemaResult` into an Arrow Schema
-///
-/// Returns an error if the `FlightData` header is not a valid IPC schema
-impl TryFrom<&SchemaResult> for Schema {
-    type Error = ArrowError;
-    fn try_from(data: &SchemaResult) -> Result<Self> {
-        convert::schema_from_bytes(&data.schema[..]).map_err(|err| {
-            ArrowError::ParseError(format!(
-                "Unable to convert schema result to Arrow schema: {}",
-                err
-            ))
-        })
-    }
-}
-
-/// Convert a FlightData message to a RecordBatch
-pub fn flight_data_to_arrow_batch(
-    data: &FlightData,
-    schema: SchemaRef,
-    dictionaries_by_field: &[Option<ArrayRef>],
-) -> Result<RecordBatch> {
-    // check that the data_header is a record batch message
-    let message = arrow::ipc::root_as_message(&data.data_header[..]).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to get root as message: {:?}", err))
-    })?;
-
-    message
-        .header_as_record_batch()
-        .ok_or_else(|| {
-            ArrowError::ParseError(
-                "Unable to convert flight data header to a record batch".to_string(),
-            )
-        })
-        .map(|batch| {
-            reader::read_record_batch(
-                &data.data_body,
-                batch,
-                schema,
-                &dictionaries_by_field,
-            )
-        })?
-}
-
-// TODO: add more explicit conversion that exposes flight descriptor and metadata options
diff --git a/rust/arrow-pyarrow-integration-testing/.cargo/config b/rust/arrow-pyarrow-integration-testing/.cargo/config
deleted file mode 100644
index a127967f66c..00000000000
--- a/rust/arrow-pyarrow-integration-testing/.cargo/config
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[target.x86_64-apple-darwin]
-rustflags = [
-  "-C", "link-arg=-undefined",
-  "-C", "link-arg=dynamic_lookup",
-]
\ No newline at end of file
diff --git a/rust/arrow-pyarrow-integration-testing/.gitignore b/rust/arrow-pyarrow-integration-testing/.gitignore
deleted file mode 100644
index 82adb58b4d6..00000000000
--- a/rust/arrow-pyarrow-integration-testing/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-__pycache__
-venv
diff --git a/rust/arrow-pyarrow-integration-testing/Cargo.toml b/rust/arrow-pyarrow-integration-testing/Cargo.toml
deleted file mode 100644
index f95458dbcb5..00000000000
--- a/rust/arrow-pyarrow-integration-testing/Cargo.toml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-pyarrow-integration-testing"
-description = ""
-version = "5.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow" ]
-edition = "2018"
-
-[lib]
-name = "arrow_pyarrow_integration_testing"
-crate-type = ["cdylib"]
-
-[dependencies]
-arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
-pyo3 = { version = "0.12.1", features = ["extension-module"] }
-
-[package.metadata.maturin]
-requires-dist = ["pyarrow>=1"]
diff --git a/rust/arrow-pyarrow-integration-testing/README.md b/rust/arrow-pyarrow-integration-testing/README.md
deleted file mode 100644
index 7e78aa9ec70..00000000000
--- a/rust/arrow-pyarrow-integration-testing/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Arrow c integration
-
-This is a Rust crate that tests compatibility between Rust's Arrow implementation and PyArrow.
-
-Note that this crate uses two languages and an external ABI:
-* `Rust`
-* `Python`
-* C ABI privately exposed by `Pyarrow`.
-
-## Basic idea
-
-Pyarrow exposes a C ABI to convert arrow arrays from and to its C implementation, see [here](https://arrow.apache.org/docs/format/CDataInterface.html).
-
-This package uses the equivalent struct in Rust (`arrow::array::ArrowArray`), and verifies that
-we can use pyarrow's interface to move pointers from and to Rust.
-
-## Relevant literature
-
-* [Arrow's CDataInterface](https://arrow.apache.org/docs/format/CDataInterface.html)
-* [Rust's FFI](https://doc.rust-lang.org/nomicon/ffi.html)
-* [Pyarrow private binds](https://github.com/apache/arrow/blob/ae1d24efcc3f1ac2a876d8d9f544a34eb04ae874/python/pyarrow/array.pxi#L1226)
-* [PyO3](https://docs.rs/pyo3/0.12.1/pyo3/index.html)
-
-## How to develop
-
-```bash
-# prepare development environment (used to build wheel / install in development)
-python -m venv venv
-venv/bin/pip install maturin==0.8.2 toml==0.10.1 pyarrow==1.0.0
-```
-
-Whenever rust code changes (your changes or via git pull):
-
-```bash
-source venv/bin/activate
-maturin develop
-python -m unittest discover tests
-```
diff --git a/rust/arrow-pyarrow-integration-testing/pyproject.toml b/rust/arrow-pyarrow-integration-testing/pyproject.toml
deleted file mode 100644
index 27480690e06..00000000000
--- a/rust/arrow-pyarrow-integration-testing/pyproject.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[build-system]
-requires = ["maturin"]
-build-backend = "maturin"
diff --git a/rust/arrow-pyarrow-integration-testing/src/lib.rs b/rust/arrow-pyarrow-integration-testing/src/lib.rs
deleted file mode 100644
index 5b5462d9c15..00000000000
--- a/rust/arrow-pyarrow-integration-testing/src/lib.rs
+++ /dev/null
@@ -1,188 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This library demonstrates a minimal usage of Rust's C data interface to pass
-//! arrays from and to Python.
-
-use std::error;
-use std::fmt;
-use std::sync::Arc;
-
-use pyo3::exceptions::PyOSError;
-use pyo3::wrap_pyfunction;
-use pyo3::{libc::uintptr_t, prelude::*};
-
-use arrow::array::{make_array_from_raw, ArrayRef, Int64Array};
-use arrow::compute::kernels;
-use arrow::error::ArrowError;
-use arrow::ffi;
-
-/// an error that bridges ArrowError with a Python error
-#[derive(Debug)]
-enum PyO3ArrowError {
-    ArrowError(ArrowError),
-}
-
-impl fmt::Display for PyO3ArrowError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            PyO3ArrowError::ArrowError(ref e) => e.fmt(f),
-        }
-    }
-}
-
-impl error::Error for PyO3ArrowError {
-    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
-        match *self {
-            // The cause is the underlying implementation error type. Is implicitly
-            // cast to the trait object `&error::Error`. This works because the
-            // underlying type already implements the `Error` trait.
-            PyO3ArrowError::ArrowError(ref e) => Some(e),
-        }
-    }
-}
-
-impl From<ArrowError> for PyO3ArrowError {
-    fn from(err: ArrowError) -> PyO3ArrowError {
-        PyO3ArrowError::ArrowError(err)
-    }
-}
-
-impl From<PyO3ArrowError> for PyErr {
-    fn from(err: PyO3ArrowError) -> PyErr {
-        PyOSError::new_err(err.to_string())
-    }
-}
-
-fn to_rust(ob: PyObject, py: Python) -> PyResult<ArrayRef> {
-    // prepare a pointer to receive the Array struct
-    let (array_pointer, schema_pointer) =
-        ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
-
-    // make the conversion through PyArrow's private API
-    // this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds
-    ob.call_method1(
-        py,
-        "_export_to_c",
-        (array_pointer as uintptr_t, schema_pointer as uintptr_t),
-    )?;
-
-    let array = unsafe { make_array_from_raw(array_pointer, schema_pointer) }
-        .map_err(|e| PyO3ArrowError::from(e))?;
-    Ok(array)
-}
-
-fn to_py(array: ArrayRef, py: Python) -> PyResult<PyObject> {
-    let (array_pointer, schema_pointer) =
-        array.to_raw().map_err(|e| PyO3ArrowError::from(e))?;
-
-    let pa = py.import("pyarrow")?;
-
-    let array = pa.getattr("Array")?.call_method1(
-        "_import_from_c",
-        (array_pointer as uintptr_t, schema_pointer as uintptr_t),
-    )?;
-    Ok(array.to_object(py))
-}
-
-/// Returns `array + array` of an int64 array.
-#[pyfunction]
-fn double(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // perform some operation
-    let array =
-        array
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .ok_or(PyO3ArrowError::ArrowError(ArrowError::ParseError(
-                "Expects an int64".to_string(),
-            )))?;
-    let array =
-        kernels::arithmetic::add(&array, &array).map_err(|e| PyO3ArrowError::from(e))?;
-    let array = Arc::new(array);
-
-    // export
-    to_py(array, py)
-}
-
-/// calls a lambda function that receives and returns an array
-/// whose result must be the array multiplied by two
-#[pyfunction]
-fn double_py(lambda: PyObject, py: Python) -> PyResult<bool> {
-    // create
-    let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
-    let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;
-
-    // to py
-    let array = to_py(array, py)?;
-
-    let array = lambda.call1(py, (array,))?;
-
-    let array = to_rust(array, py)?;
-
-    Ok(array == expected)
-}
-
-/// Returns the substring
-#[pyfunction]
-fn substring(array: PyObject, start: i64, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // substring
-    let array = kernels::substring::substring(array.as_ref(), start, &None)
-        .map_err(|e| PyO3ArrowError::from(e))?;
-
-    // export
-    to_py(array, py)
-}
-
-/// Returns the concatenate
-#[pyfunction]
-fn concatenate(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // concat
-    let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])
-        .map_err(|e| PyO3ArrowError::from(e))?;
-
-    // export
-    to_py(array, py)
-}
-
-/// Converts to rust and back to python
-#[pyfunction]
-fn round_trip(array: PyObject, py: Python) -> PyResult<PyObject> {
-    // import
-    let array = to_rust(array, py)?;
-
-    // export
-    to_py(array, py)
-}
-
-#[pymodule]
-fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
-    m.add_wrapped(wrap_pyfunction!(double))?;
-    m.add_wrapped(wrap_pyfunction!(double_py))?;
-    m.add_wrapped(wrap_pyfunction!(substring))?;
-    m.add_wrapped(wrap_pyfunction!(concatenate))?;
-    m.add_wrapped(wrap_pyfunction!(round_trip))?;
-    Ok(())
-}
diff --git a/rust/arrow-pyarrow-integration-testing/tests/test_sql.py b/rust/arrow-pyarrow-integration-testing/tests/test_sql.py
deleted file mode 100644
index c0de382057c..00000000000
--- a/rust/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# -*- coding: utf-8 -*-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import unittest
-
-import pyarrow
-import arrow_pyarrow_integration_testing
-
-
-class TestCase(unittest.TestCase):
-    def test_primitive_python(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array([1, 2, 3])
-        b = arrow_pyarrow_integration_testing.double(a)
-        self.assertEqual(b, pyarrow.array([2, 4, 6]))
-        del a
-        del b
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_primitive_rust(self):
-        """
-        Rust -> Python -> Rust
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-
-        def double(array):
-            array = array.to_pylist()
-            return pyarrow.array([x * 2 if x is not None else None for x in array])
-
-        is_correct = arrow_pyarrow_integration_testing.double_py(double)
-        self.assertTrue(is_correct)
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_string_python(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array(["a", None, "ccc"])
-        b = arrow_pyarrow_integration_testing.substring(a, 1)
-        self.assertEqual(b, pyarrow.array(["", None, "cc"]))
-        del a
-        del b
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_time32_python(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array([None, 1, 2], pyarrow.time32('s'))
-        b = arrow_pyarrow_integration_testing.concatenate(a)
-        expected = pyarrow.array([None, 1, 2] + [None, 1, 2], pyarrow.time32('s'))
-        self.assertEqual(b, expected)
-        del a
-        del b
-        del expected
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-    def test_list_array(self):
-        """
-        Python -> Rust -> Python
-        """
-        old_allocated = pyarrow.total_allocated_bytes()
-        a = pyarrow.array([[], None, [1, 2], [4, 5, 6]], pyarrow.list_(pyarrow.int64()))
-        b = arrow_pyarrow_integration_testing.round_trip(a)
-
-        b.validate(full=True)
-        assert a.to_pylist() == b.to_pylist()
-        assert a.type == b.type
-        del a
-        del b
-        # No leak of C++ memory
-        self.assertEqual(old_allocated, pyarrow.total_allocated_bytes())
-
-
-
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
deleted file mode 100644
index ac3b72e57b0..00000000000
--- a/rust/arrow/Cargo.toml
+++ /dev/null
@@ -1,151 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow"
-version = "5.0.0-SNAPSHOT"
-description = "Rust implementation of Apache Arrow"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow" ]
-include = [
-    "benches/*.rs",
-    "src/**/*.rs",
-    "Cargo.toml",
-]
-edition = "2018"
-
-[lib]
-name = "arrow"
-path = "src/lib.rs"
-
-[dependencies]
-serde = { version = "1.0", features = ["rc"] }
-serde_derive = "1.0"
-serde_json = { version = "1.0", features = ["preserve_order"] }
-indexmap = "1.6"
-rand = "0.7"
-csv = "1.1"
-num = "0.3"
-regex = "1.3"
-lazy_static = "1.4"
-packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
-chrono = "0.4"
-flatbuffers = "^0.8"
-hex = "0.4"
-prettytable-rs = { version = "0.8.0", optional = true }
-lexical-core = "^0.7"
-
-[features]
-default = []
-avx512 = []
-simd = ["packed_simd"]
-prettyprint = ["prettytable-rs"]
-# this is only intended to be used in single-threaded programs: it verifies that
-# all allocated memory is being released (no memory leaks).
-# See README for details
-memory-check = []
-
-[dev-dependencies]
-criterion = "0.3"
-flate2 = "1"
-tempfile = "3"
-
-[build-dependencies]
-cfg_aliases = "0.1"
-
-[[bench]]
-name = "aggregate_kernels"
-harness = false
-
-[[bench]]
-name = "array_from_vec"
-harness = false
-
-[[bench]]
-name = "builder"
-harness = false
-
-[[bench]]
-name = "buffer_bit_ops"
-harness = false
-
-[[bench]]
-name = "boolean_kernels"
-harness = false
-
-[[bench]]
-name = "arithmetic_kernels"
-harness = false
-
-[[bench]]
-name = "cast_kernels"
-harness = false
-
-[[bench]]
-name = "comparison_kernels"
-harness = false
-
-[[bench]]
-name = "filter_kernels"
-harness = false
-
-[[bench]]
-name = "take_kernels"
-harness = false
-
-[[bench]]
-name = "length_kernel"
-harness = false
-
-[[bench]]
-name = "bit_length_kernel"
-harness = false
-
-[[bench]]
-name = "sort_kernel"
-harness = false
-
-[[bench]]
-name = "csv_writer"
-harness = false
-
-[[bench]]
-name = "json_reader"
-harness = false
-
-[[bench]]
-name = "equal"
-harness = false
-
-[[bench]]
-name = "array_slice"
-harness = false
-
-[[bench]]
-name = "concatenate_kernel"
-harness = false
-
-[[bench]]
-name = "mutable_array"
-harness = false
-
-[[bench]]
-name = "buffer_create"
-harness = false
diff --git a/rust/arrow/README.md b/rust/arrow/README.md
deleted file mode 100644
index 674c3fc6c8b..00000000000
--- a/rust/arrow/README.md
+++ /dev/null
@@ -1,206 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Native Rust implementation of Apache Arrow
-
-This crate contains a native Rust implementation of the [Arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html).
-
-## Developer's guide
-
-Common information for all Rust libraries in this project, including
-testing, code formatting, and lints, can be found in the main Arrow
-Rust [README.md](../README.md).
-
-Please refer to [lib.rs](src/lib.rs) for an introduction to this
-specific crate and its current functionality.
-
-### How to check memory allocations
-
-This crate heavily uses `unsafe` due to how memory is allocated in cache lines.
-We have a small tool to verify that this crate does not leak memory (beyond what the compiler already does)
-
-Run it with
-
-```bash
-cargo test --features memory-check --lib -- --test-threads 1
-```
-
-This runs all unit-tests on a single thread and counts all allocations and de-allocations.
-
-## Examples
-
-The examples folder shows how to construct some different types of Arrow
-arrays, including dynamic arrays created at runtime.
-
-Examples can be run using the `cargo run --example` command. For example:
-
-```bash
-cargo run --example builders
-cargo run --example dynamic_types
-cargo run --example read_csv
-```
-
-## IPC
-
-The expected flatc version is 1.12.0+, built from [flatbuffers](https://github.com/google/flatbuffers)
-master at fixed commit ID, by regen.sh.
-
-The IPC flatbuffer code was generated by running this command from the root of the project:
-
-```bash
-./regen.sh
-```
-
-The above script will run the `flatc` compiler and perform some adjustments to the source code:
-
-- Replace `type__` with `type_`
-- Remove `org::apache::arrow::flatbuffers` namespace
-- Add includes to each generated file
-
-## Features
-
-Arrow uses the following features:
-
-* `simd` - Arrow uses the [packed_simd](https://crates.io/crates/packed_simd) crate to optimize many of the
- implementations in the [compute](https://github.com/apache/arrow/tree/master/rust/arrow/src/compute)
- module using SIMD intrinsics. These optimizations are turned *off* by default.
- If the `simd` feature is enabled, an unstable version of Rust is required (we test with `nightly-2021-03-24`)
-* `flight` which contains useful functions to convert between the Flight wire format and Arrow data
-* `prettyprint` which is a utility for printing record batches
-
-Other than `simd` all the other features are enabled by default. Disabling `prettyprint` might be necessary in order to
-compile Arrow to the `wasm32-unknown-unknown` WASM target.
-
-## Guidelines in usage of `unsafe`
-
-[`unsafe`](https://doc.rust-lang.org/book/ch19-01-unsafe-rust.html) has a high maintenance cost because debugging and testing it is difficult, time consuming, often requires external tools (e.g. `valgrind`), and requires a higher-than-usual attention to details. Undefined behavior is particularly difficult to identify and test, and usage of `unsafe` is the [primary cause of undefined behavior](https://doc.rust-lang.org/reference/behavior-considered-undefined.html) in a program written in Rust.
-For two real world examples of where `unsafe` has consumed time in the past in this project see [#8545](https://github.com/apache/arrow/pull/8645) and [8829](https://github.com/apache/arrow/pull/8829)
-This crate only accepts the usage of `unsafe` code upon careful consideration, and strives to avoid it to the largest possible extent.
-
-### When can `unsafe` be used?
-
-Generally, `unsafe` should only be used when a `safe` counterpart is not available and there is no `safe` way to achieve additional performance in that area. The following is a summary of the current components of the crate that require `unsafe`:
-
-* alloc, dealloc and realloc of buffers along cache lines
-* Interpreting bytes as certain rust types, for access, representation and compute
-* Foreign interfaces (C data interface)
-* Inter-process communication (IPC)
-* SIMD
-* Performance (e.g. omit bounds checks, use of pointers to avoid bound checks)
-
-#### cache-line aligned memory management
-
-The arrow format recommends storing buffers aligned with cache lines, and this crate adopts this behavior.
-However, Rust's global allocator does not allocate memory aligned with cache-lines. As such, many of the low-level operations related to memory management require `unsafe`.
-
-#### Interpreting bytes
-
-The arrow format is specified in bytes (`u8`), which can be logically represented as certain types
-depending on the `DataType`.
-For many operations, such as access, representation, numerical computation and string manipulation,
-it is often necessary to interpret bytes as other physical types (e.g. `i32`).
-
-Usage of `unsafe` for the purpose of interpreting bytes in their corresponding type (according to the arrow specification) is allowed. Specifically, the pointer to the byte slice must be aligned to the type that it intends to represent and the length of the slice is a multiple of the size of the target type of the transmutation.
-
-#### FFI
-
-The arrow format declares an ABI for zero-copy from and to libraries that implement the specification
-(foreign interfaces). In Rust, receiving and sending pointers via FFI requires usage of `unsafe` due to
-the impossibility of the compiler to derive the invariants (such as lifetime, null pointers, and pointer alignment) from the source code alone as they are part of the FFI contract.
-
-#### IPC
-
-The arrow format declares a IPC protocol, which this crate supports. IPC is equivalent to a FFI in that the rust compiler can't reason about the contract's invariants.
-
-#### SIMD
-
-The API provided by the `packed_simd` library is currently `unsafe`. However, SIMD offers a significant performance improvement over non-SIMD operations.
-
-#### Performance
-
-Some operations are significantly faster when `unsafe` is used.
-
-A common usage of `unsafe` is to offer an API to access the `i`th element of an array (e.g. `UInt32Array`).
-This requires accessing the values buffer e.g. `array.buffers()[0]`, picking the slice
-`[i * size_of<i32>(), (i + 1) * size_of<i32>()]`, and then transmuting it to `i32`. In safe Rust,
-this operation requires boundary checks that are detrimental to performance.
-
-Usage of `unsafe` for performance reasons is justified only when all other alternatives have been exhausted and the performance benefits are sufficiently large (e.g. >~10%).
-
-### Considerations when introducing `unsafe`
-
-Usage of `unsafe` in this crate *must*:
-
-* not expose a public API as `safe` when there are necessary invariants for that API to be defined behavior.
-* have code documentation for why `safe` is not used / possible
-* have code documentation about which invariant the user needs to enforce to ensure [soundness](https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#soundness-of-code--of-a-library), or which
-* invariant is being preserved.
-* if applicable, use `debug_assert`s to relevant invariants (e.g. bound checks)
-
-Example of code documentation:
-
-```rust
-// JUSTIFICATION
-//  Benefit
-//      Describe the benefit of using unsafe. E.g.
-//      "30% performance degradation if the safe counterpart is used, see bench X."
-//  Soundness
-//      Describe why the code remains sound (according to the definition of rust's unsafe code guidelines). E.g.
-//      "We bounded check these values at initialization and the array is immutable."
-let ... = unsafe { ... };
-```
-
-When adding this documentation to existing code that is not sound and cannot trivially be fixed, we should file
-specific JIRA issues and reference them in these code comments. For example:
-
-```rust
-//  Soundness
-//      This is not sound because .... see https://issues.apache.org/jira/browse/ARROW-nnnnn
-```
-
-# Publishing to crates.io
-
-An Arrow committer can publish this crate after an official project release has
-been made to crates.io using the following instructions.
-
-Follow [these
-instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to
-create an account and login to crates.io before asking to be added as an owner
-of the [arrow crate](https://crates.io/crates/arrow).
-
-Checkout the tag for the version to be released. For example:
-
-```bash
-git checkout apache-arrow-0.11.0
-```
-
-If the Cargo.toml in this tag already contains `version = "0.11.0"` (as it
-should) then the crate can be published with the following command:
-
-```bash
-cargo publish
-```
-
-If the Cargo.toml does not have the correct version then it will be necessary
-to modify it manually. Since there is now a modified file locally that is not
-committed to GitHub it will be necessary to use the following command.
-
-```bash
-cargo publish --allow-dirty
-```
diff --git a/rust/arrow/benches/aggregate_kernels.rs b/rust/arrow/benches/aggregate_kernels.rs
deleted file mode 100644
index 1724b7349c5..00000000000
--- a/rust/arrow/benches/aggregate_kernels.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::compute::kernels::aggregate::*;
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn bench_sum(arr_a: &Float32Array) {
-    criterion::black_box(sum(&arr_a).unwrap());
-}
-
-fn bench_min(arr_a: &Float32Array) {
-    criterion::black_box(min(&arr_a).unwrap());
-}
-
-fn bench_max(arr_a: &Float32Array) {
-    criterion::black_box(max(&arr_a).unwrap());
-}
-
-fn bench_min_string(arr_a: &StringArray) {
-    criterion::black_box(min_string(&arr_a).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
-
-    c.bench_function("sum 512", |b| b.iter(|| bench_sum(&arr_a)));
-    c.bench_function("min 512", |b| b.iter(|| bench_min(&arr_a)));
-    c.bench_function("max 512", |b| b.iter(|| bench_max(&arr_a)));
-
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.5);
-
-    c.bench_function("sum nulls 512", |b| b.iter(|| bench_sum(&arr_a)));
-    c.bench_function("min nulls 512", |b| b.iter(|| bench_min(&arr_a)));
-    c.bench_function("max nulls 512", |b| b.iter(|| bench_max(&arr_a)));
-
-    let arr_b = create_string_array::<i32>(512, 0.0);
-    c.bench_function("min string 512", |b| b.iter(|| bench_min_string(&arr_b)));
-
-    let arr_b = create_string_array::<i32>(512, 0.5);
-    c.bench_function("min nulls string 512", |b| {
-        b.iter(|| bench_min_string(&arr_b))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs
deleted file mode 100644
index 721157e2846..00000000000
--- a/rust/arrow/benches/arithmetic_kernels.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-use rand::Rng;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::compute::kernels::limit::*;
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-use arrow::{compute::kernels::arithmetic::*, util::test_util::seedable_rng};
-
-fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
-    let null_density = if with_nulls { 0.5 } else { 0.0 };
-    let array = create_primitive_array::<Float32Type>(size, null_density);
-    Arc::new(array)
-}
-
-fn bench_add(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(add(arr_a, arr_b).unwrap());
-}
-
-fn bench_subtract(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(subtract(&arr_a, &arr_b).unwrap());
-}
-
-fn bench_multiply(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(multiply(&arr_a, &arr_b).unwrap());
-}
-
-fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) {
-    let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
-    let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(divide(&arr_a, &arr_b).unwrap());
-}
-
-fn bench_divide_scalar(array: &ArrayRef, divisor: f32) {
-    let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
-    criterion::black_box(divide_scalar(&array, divisor).unwrap());
-}
-
-fn bench_limit(arr_a: &ArrayRef, max: usize) {
-    criterion::black_box(limit(arr_a, max));
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_array(512, false);
-    let arr_b = create_array(512, false);
-    let scalar = seedable_rng().gen();
-
-    c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
-    c.bench_function("subtract 512", |b| {
-        b.iter(|| bench_subtract(&arr_a, &arr_b))
-    });
-    c.bench_function("multiply 512", |b| {
-        b.iter(|| bench_multiply(&arr_a, &arr_b))
-    });
-    c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
-    c.bench_function("divide_scalar 512", |b| {
-        b.iter(|| bench_divide_scalar(&arr_a, scalar))
-    });
-    c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512)));
-
-    let arr_a_nulls = create_array(512, false);
-    let arr_b_nulls = create_array(512, false);
-    c.bench_function("add_nulls_512", |b| {
-        b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
-    });
-    c.bench_function("divide_nulls_512", |b| {
-        b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
-    });
-    c.bench_function("divide_scalar_nulls_512", |b| {
-        b.iter(|| bench_divide_scalar(&arr_a_nulls, scalar))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/array_from_vec.rs b/rust/arrow/benches/array_from_vec.rs
deleted file mode 100644
index 7740c6bc34e..00000000000
--- a/rust/arrow/benches/array_from_vec.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::buffer::Buffer;
-use arrow::datatypes::*;
-use std::{convert::TryFrom, sync::Arc};
-
-fn array_from_vec(n: usize) {
-    let mut v: Vec<u8> = Vec::with_capacity(n);
-    for i in 0..n {
-        v.push((i & 0xffff) as u8);
-    }
-    let arr_data = ArrayDataBuilder::new(DataType::Int32)
-        .add_buffer(Buffer::from(v))
-        .build();
-    criterion::black_box(Int32Array::from(arr_data));
-}
-
-fn array_string_from_vec(n: usize) {
-    let mut v: Vec<Option<&str>> = Vec::with_capacity(n);
-    for i in 0..n {
-        if i % 2 == 0 {
-            v.push(Some("hello world"));
-        } else {
-            v.push(None);
-        }
-    }
-    criterion::black_box(StringArray::from(v));
-}
-
-fn struct_array_values(
-    n: usize,
-) -> (
-    &'static str,
-    Vec<Option<&'static str>>,
-    &'static str,
-    Vec<Option<i32>>,
-) {
-    let mut strings: Vec<Option<&str>> = Vec::with_capacity(n);
-    let mut ints: Vec<Option<i32>> = Vec::with_capacity(n);
-    for _ in 0..n / 4 {
-        strings.extend_from_slice(&[Some("joe"), None, None, Some("mark")]);
-        ints.extend_from_slice(&[Some(1), Some(2), None, Some(4)]);
-    }
-    ("f1", strings, "f2", ints)
-}
-
-fn struct_array_from_vec(
-    field1: &str,
-    strings: &[Option<&str>],
-    field2: &str,
-    ints: &[Option<i32>],
-) {
-    let strings: ArrayRef = Arc::new(StringArray::from(strings.to_owned()));
-    let ints: ArrayRef = Arc::new(Int32Array::from(ints.to_owned()));
-
-    criterion::black_box(
-        StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap(),
-    );
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
-    c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
-    c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
-
-    c.bench_function("array_string_from_vec 128", |b| {
-        b.iter(|| array_string_from_vec(128))
-    });
-    c.bench_function("array_string_from_vec 256", |b| {
-        b.iter(|| array_string_from_vec(256))
-    });
-    c.bench_function("array_string_from_vec 512", |b| {
-        b.iter(|| array_string_from_vec(512))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(128);
-    c.bench_function("struct_array_from_vec 128", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(256);
-    c.bench_function("struct_array_from_vec 256", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(512);
-    c.bench_function("struct_array_from_vec 512", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-
-    let (field1, strings, field2, ints) = struct_array_values(1024);
-    c.bench_function("struct_array_from_vec 1024", |b| {
-        b.iter(|| struct_array_from_vec(&field1, &strings, &field2, &ints))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/array_slice.rs b/rust/arrow/benches/array_slice.rs
deleted file mode 100644
index a535c80d217..00000000000
--- a/rust/arrow/benches/array_slice.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use std::sync::Arc;
-
-fn create_array_slice(array: &ArrayRef, length: usize) -> ArrayRef {
-    array.slice(0, length)
-}
-
-fn create_array_with_nulls(size: usize) -> ArrayRef {
-    let array: Float64Array = (0..size)
-        .map(|i| if i % 2 == 0 { Some(1.0) } else { None })
-        .collect();
-    Arc::new(array)
-}
-
-fn array_slice_benchmark(c: &mut Criterion) {
-    let array = create_array_with_nulls(4096);
-    c.bench_function("array_slice 128", |b| {
-        b.iter(|| create_array_slice(&array, 128))
-    });
-    c.bench_function("array_slice 512", |b| {
-        b.iter(|| create_array_slice(&array, 512))
-    });
-    c.bench_function("array_slice 2048", |b| {
-        b.iter(|| create_array_slice(&array, 2048))
-    });
-}
-
-criterion_group!(benches, array_slice_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/bit_length_kernel.rs b/rust/arrow/benches/bit_length_kernel.rs
deleted file mode 100644
index 51d31345712..00000000000
--- a/rust/arrow/benches/bit_length_kernel.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::{array::*, compute::kernels::length::bit_length};
-
-fn bench_bit_length(array: &StringArray) {
-    criterion::black_box(bit_length(array).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-        [&v[..], &v[..]].concat()
-    }
-
-    // double ["hello", " ", "world", "!"] 10 times
-    let mut values = vec!["one", "on", "o", ""];
-    for _ in 0..10 {
-        values = double_vec(values);
-    }
-    let array = StringArray::from(values);
-
-    c.bench_function("bit_length", |b| b.iter(|| bench_bit_length(&array)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/boolean_kernels.rs b/rust/arrow/benches/boolean_kernels.rs
deleted file mode 100644
index 6559c4e4caf..00000000000
--- a/rust/arrow/benches/boolean_kernels.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use arrow::util::bench_util::create_boolean_array;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::kernels::boolean as boolean_kernels;
-
-fn bench_and(lhs: &BooleanArray, rhs: &BooleanArray) {
-    criterion::black_box(boolean_kernels::and(lhs, rhs).unwrap());
-}
-
-fn bench_or(lhs: &BooleanArray, rhs: &BooleanArray) {
-    criterion::black_box(boolean_kernels::or(lhs, rhs).unwrap());
-}
-
-fn bench_not(array: &BooleanArray) {
-    criterion::black_box(boolean_kernels::not(&array).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let size = 2usize.pow(15);
-    let array1 = create_boolean_array(size, 0.0, 0.5);
-    let array2 = create_boolean_array(size, 0.0, 0.5);
-    c.bench_function("and", |b| b.iter(|| bench_and(&array1, &array2)));
-    c.bench_function("or", |b| b.iter(|| bench_or(&array1, &array2)));
-    c.bench_function("not", |b| b.iter(|| bench_not(&array1)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/buffer_bit_ops.rs b/rust/arrow/benches/buffer_bit_ops.rs
deleted file mode 100644
index 063f39c9272..00000000000
--- a/rust/arrow/benches/buffer_bit_ops.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::buffer::{Buffer, MutableBuffer};
-
-///  Helper function to create arrays
-fn create_buffer(size: usize) -> Buffer {
-    let mut result = MutableBuffer::new(size).with_bitset(size, false);
-
-    for i in 0..size {
-        result.as_slice_mut()[i] = 0b01010101 << i << (i % 4);
-    }
-
-    result.into()
-}
-
-fn bench_buffer_and(left: &Buffer, right: &Buffer) {
-    criterion::black_box((left & right).unwrap());
-}
-
-fn bench_buffer_or(left: &Buffer, right: &Buffer) {
-    criterion::black_box((left | right).unwrap());
-}
-
-fn bit_ops_benchmark(c: &mut Criterion) {
-    let left = create_buffer(512 * 10);
-    let right = create_buffer(512 * 10);
-
-    c.bench_function("buffer_bit_ops and", |b| {
-        b.iter(|| bench_buffer_and(&left, &right))
-    });
-
-    c.bench_function("buffer_bit_ops or", |b| {
-        b.iter(|| bench_buffer_or(&left, &right))
-    });
-}
-
-criterion_group!(benches, bit_ops_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/buffer_create.rs b/rust/arrow/benches/buffer_create.rs
deleted file mode 100644
index d628e031ce6..00000000000
--- a/rust/arrow/benches/buffer_create.rs
+++ /dev/null
@@ -1,190 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use arrow::util::test_util::seedable_rng;
-use criterion::Criterion;
-use rand::distributions::Uniform;
-use rand::Rng;
-
-extern crate arrow;
-
-use arrow::{
-    buffer::{Buffer, MutableBuffer},
-    datatypes::ToByteSlice,
-};
-
-fn mutable_buffer_from_iter(data: &[Vec<bool>]) -> Vec<Buffer> {
-    criterion::black_box(
-        data.iter()
-            .map(|vec| vec.iter().copied().collect::<MutableBuffer>().into())
-            .collect::<Vec<_>>(),
-    )
-}
-
-fn buffer_from_iter(data: &[Vec<bool>]) -> Vec<Buffer> {
-    criterion::black_box(
-        data.iter()
-            .map(|vec| vec.iter().copied().collect::<Buffer>())
-            .collect::<Vec<_>>(),
-    )
-}
-
-fn mutable_buffer_iter_bitset(data: &[Vec<bool>]) -> Vec<Buffer> {
-    criterion::black_box({
-        data.iter()
-            .map(|datum| {
-                let mut result = MutableBuffer::new((data.len() + 7) / 8)
-                    .with_bitset(datum.len(), false);
-                for (i, value) in datum.iter().enumerate() {
-                    if *value {
-                        unsafe {
-                            arrow::util::bit_util::set_bit_raw(result.as_mut_ptr(), i);
-                        }
-                    }
-                }
-                result.into()
-            })
-            .collect::<Vec<_>>()
-    })
-}
-
-fn mutable_iter_extend_from_slice(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut result = MutableBuffer::new(capacity);
-
-        data.iter().for_each(|vec| {
-            vec.iter()
-                .for_each(|elem| result.extend_from_slice(elem.to_byte_slice()))
-        });
-
-        result.into()
-    })
-}
-
-fn mutable_buffer(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut result = MutableBuffer::new(capacity);
-
-        data.iter().for_each(|vec| result.extend_from_slice(vec));
-
-        result.into()
-    })
-}
-
-fn mutable_buffer_extend(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut result = MutableBuffer::new(capacity);
-
-        data.iter()
-            .for_each(|vec| result.extend(vec.iter().copied()));
-
-        result.into()
-    })
-}
-
-fn from_slice(data: &[Vec<u32>], capacity: usize) -> Buffer {
-    criterion::black_box({
-        let mut a = Vec::<u32>::with_capacity(capacity);
-
-        data.iter().for_each(|vec| a.extend(vec));
-
-        Buffer::from(a.to_byte_slice())
-    })
-}
-
-fn create_data(size: usize) -> Vec<Vec<u32>> {
-    let rng = &mut seedable_rng();
-    let range = Uniform::new(0, 33);
-
-    (0..size)
-        .map(|_| {
-            let size = rng.sample(range);
-            seedable_rng()
-                .sample_iter(&range)
-                .take(size as usize)
-                .collect()
-        })
-        .collect()
-}
-
-fn create_data_bool(size: usize) -> Vec<Vec<bool>> {
-    let rng = &mut seedable_rng();
-    let range = Uniform::new(0, 33);
-
-    (0..size)
-        .map(|_| {
-            let size = rng.sample(range);
-            seedable_rng()
-                .sample_iter(&range)
-                .take(size as usize)
-                .map(|x| x > 15)
-                .collect()
-        })
-        .collect()
-}
-fn benchmark(c: &mut Criterion) {
-    let size = 2usize.pow(15);
-    let data = create_data(size);
-
-    let bool_data = create_data_bool(size);
-    let cap = data.iter().map(|i| i.len()).sum();
-    let byte_cap = cap * std::mem::size_of::<u32>();
-
-    c.bench_function("mutable iter extend_from_slice", |b| {
-        b.iter(|| {
-            mutable_iter_extend_from_slice(
-                criterion::black_box(&data),
-                criterion::black_box(0),
-            )
-        })
-    });
-    c.bench_function("mutable", |b| {
-        b.iter(|| mutable_buffer(criterion::black_box(&data), criterion::black_box(0)))
-    });
-
-    c.bench_function("mutable extend", |b| {
-        b.iter(|| mutable_buffer_extend(&data, 0))
-    });
-
-    c.bench_function("mutable prepared", |b| {
-        b.iter(|| {
-            mutable_buffer(criterion::black_box(&data), criterion::black_box(byte_cap))
-        })
-    });
-
-    c.bench_function("from_slice", |b| {
-        b.iter(|| from_slice(criterion::black_box(&data), criterion::black_box(0)))
-    });
-    c.bench_function("from_slice prepared", |b| {
-        b.iter(|| from_slice(criterion::black_box(&data), criterion::black_box(cap)))
-    });
-
-    c.bench_function("MutableBuffer iter bitset", |b| {
-        b.iter(|| mutable_buffer_iter_bitset(criterion::black_box(&bool_data)))
-    });
-    c.bench_function("MutableBuffer::from_iter bool", |b| {
-        b.iter(|| mutable_buffer_from_iter(criterion::black_box(&bool_data)))
-    });
-    c.bench_function("Buffer::from_iter bool", |b| {
-        b.iter(|| buffer_from_iter(criterion::black_box(&bool_data)))
-    });
-}
-
-criterion_group!(benches, benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/builder.rs b/rust/arrow/benches/builder.rs
deleted file mode 100644
index fd9f319e397..00000000000
--- a/rust/arrow/benches/builder.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-extern crate criterion;
-extern crate rand;
-
-use std::mem::size_of;
-
-use criterion::*;
-use rand::distributions::Standard;
-
-use arrow::array::*;
-use arrow::util::test_util::seedable_rng;
-use rand::Rng;
-
-// Build arrays with 512k elements.
-const BATCH_SIZE: usize = 8 << 10;
-const NUM_BATCHES: usize = 64;
-
-fn bench_primitive(c: &mut Criterion) {
-    let data: [i64; BATCH_SIZE] = [100; BATCH_SIZE];
-
-    let mut group = c.benchmark_group("bench_primitive");
-    group.throughput(Throughput::Bytes(
-        ((data.len() * NUM_BATCHES * size_of::<i64>()) as u32).into(),
-    ));
-    group.bench_function("bench_primitive", |b| {
-        b.iter(|| {
-            let mut builder = Int64Builder::new(64);
-            for _ in 0..NUM_BATCHES {
-                let _ = black_box(builder.append_slice(&data[..]));
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-fn bench_primitive_nulls(c: &mut Criterion) {
-    let mut group = c.benchmark_group("bench_primitive_nulls");
-    group.bench_function("bench_primitive_nulls", |b| {
-        b.iter(|| {
-            let mut builder = UInt8Builder::new(64);
-            for _ in 0..NUM_BATCHES * BATCH_SIZE {
-                let _ = black_box(builder.append_null());
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-fn bench_bool(c: &mut Criterion) {
-    let data: Vec<bool> = seedable_rng()
-        .sample_iter(&Standard)
-        .take(BATCH_SIZE)
-        .collect();
-    let data_len = data.len();
-
-    let mut group = c.benchmark_group("bench_bool");
-    group.throughput(Throughput::Bytes(
-        ((data_len * NUM_BATCHES * size_of::<bool>()) as u32).into(),
-    ));
-    group.bench_function("bench_bool", |b| {
-        b.iter(|| {
-            let mut builder = BooleanBuilder::new(64);
-            for _ in 0..NUM_BATCHES {
-                let _ = black_box(builder.append_slice(&data[..]));
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-fn bench_string(c: &mut Criterion) {
-    const SAMPLE_STRING: &str = "sample string";
-    let mut group = c.benchmark_group("bench_primitive");
-    group.throughput(Throughput::Bytes(
-        ((BATCH_SIZE * NUM_BATCHES * SAMPLE_STRING.len()) as u32).into(),
-    ));
-    group.bench_function("bench_string", |b| {
-        b.iter(|| {
-            let mut builder = StringBuilder::new(64);
-            for _ in 0..NUM_BATCHES * BATCH_SIZE {
-                let _ = black_box(builder.append_value(SAMPLE_STRING));
-            }
-            black_box(builder.finish());
-        })
-    });
-    group.finish();
-}
-
-criterion_group!(
-    benches,
-    bench_primitive,
-    bench_primitive_nulls,
-    bench_bool,
-    bench_string
-);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/cast_kernels.rs b/rust/arrow/benches/cast_kernels.rs
deleted file mode 100644
index d164e1facfd..00000000000
--- a/rust/arrow/benches/cast_kernels.rs
+++ /dev/null
@@ -1,185 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-use rand::distributions::{Distribution, Standard, Uniform};
-use rand::Rng;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::cast;
-use arrow::datatypes::*;
-use arrow::util::bench_util::*;
-use arrow::util::test_util::seedable_rng;
-
-fn build_array<T: ArrowPrimitiveType>(size: usize) -> ArrayRef
-where
-    Standard: Distribution<T::Native>,
-{
-    let array = create_primitive_array::<T>(size, 0.1);
-    Arc::new(array)
-}
-
-fn build_utf8_date_array(size: usize, with_nulls: bool) -> ArrayRef {
-    use chrono::NaiveDate;
-
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-    let mut builder = StringBuilder::new(size);
-    let range = Uniform::new(0, 737776);
-
-    for _ in 0..size {
-        if with_nulls && rng.gen::<f32>() > 0.8 {
-            builder.append_null().unwrap();
-        } else {
-            let string = NaiveDate::from_num_days_from_ce(rng.sample(range))
-                .format("%Y-%m-%d")
-                .to_string();
-            builder.append_value(&string).unwrap();
-        }
-    }
-    Arc::new(builder.finish())
-}
-
-fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
-    use chrono::NaiveDateTime;
-
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-    let mut builder = StringBuilder::new(size);
-    let range = Uniform::new(0, 1608071414123);
-
-    for _ in 0..size {
-        if with_nulls && rng.gen::<f32>() > 0.8 {
-            builder.append_null().unwrap();
-        } else {
-            let string = NaiveDateTime::from_timestamp(rng.sample(range), 0)
-                .format("%Y-%m-%dT%H:%M:%S")
-                .to_string();
-            builder.append_value(&string).unwrap();
-        }
-    }
-    Arc::new(builder.finish())
-}
-
-// cast array from specified primitive array type to desired data type
-fn cast_array(array: &ArrayRef, to_type: DataType) {
-    criterion::black_box(cast(array, &to_type).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let i32_array = build_array::<Int32Type>(512);
-    let i64_array = build_array::<Int64Type>(512);
-    let f32_array = build_array::<Float32Type>(512);
-    let f32_utf8_array = cast(&build_array::<Float32Type>(512), &DataType::Utf8).unwrap();
-
-    let f64_array = build_array::<Float64Type>(512);
-    let date64_array = build_array::<Date64Type>(512);
-    let date32_array = build_array::<Date32Type>(512);
-    let time32s_array = build_array::<Time32SecondType>(512);
-    let time64ns_array = build_array::<Time64NanosecondType>(512);
-    let time_ns_array = build_array::<TimestampNanosecondType>(512);
-    let time_ms_array = build_array::<TimestampMillisecondType>(512);
-    let utf8_date_array = build_utf8_date_array(512, true);
-    let utf8_date_time_array = build_utf8_date_time_array(512, true);
-
-    c.bench_function("cast int32 to int32 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Int32))
-    });
-    c.bench_function("cast int32 to uint32 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::UInt32))
-    });
-    c.bench_function("cast int32 to float32 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Float32))
-    });
-    c.bench_function("cast int32 to float64 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Float64))
-    });
-    c.bench_function("cast int32 to int64 512", |b| {
-        b.iter(|| cast_array(&i32_array, DataType::Int64))
-    });
-    c.bench_function("cast float32 to int32 512", |b| {
-        b.iter(|| cast_array(&f32_array, DataType::Int32))
-    });
-    c.bench_function("cast float64 to float32 512", |b| {
-        b.iter(|| cast_array(&f64_array, DataType::Float32))
-    });
-    c.bench_function("cast float64 to uint64 512", |b| {
-        b.iter(|| cast_array(&f64_array, DataType::UInt64))
-    });
-    c.bench_function("cast int64 to int32 512", |b| {
-        b.iter(|| cast_array(&i64_array, DataType::Int32))
-    });
-    c.bench_function("cast date64 to date32 512", |b| {
-        b.iter(|| cast_array(&date64_array, DataType::Date32))
-    });
-    c.bench_function("cast date32 to date64 512", |b| {
-        b.iter(|| cast_array(&date32_array, DataType::Date64))
-    });
-    c.bench_function("cast time32s to time32ms 512", |b| {
-        b.iter(|| cast_array(&time32s_array, DataType::Time32(TimeUnit::Millisecond)))
-    });
-    c.bench_function("cast time32s to time64us 512", |b| {
-        b.iter(|| cast_array(&time32s_array, DataType::Time64(TimeUnit::Microsecond)))
-    });
-    c.bench_function("cast time64ns to time32s 512", |b| {
-        b.iter(|| cast_array(&time64ns_array, DataType::Time32(TimeUnit::Second)))
-    });
-    c.bench_function("cast timestamp_ns to timestamp_s 512", |b| {
-        b.iter(|| {
-            cast_array(
-                &time_ns_array,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            )
-        })
-    });
-    c.bench_function("cast timestamp_ms to timestamp_ns 512", |b| {
-        b.iter(|| {
-            cast_array(
-                &time_ms_array,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            )
-        })
-    });
-    c.bench_function("cast utf8 to f32", |b| {
-        b.iter(|| cast_array(&f32_utf8_array, DataType::Float32))
-    });
-    c.bench_function("cast i64 to string 512", |b| {
-        b.iter(|| cast_array(&i64_array, DataType::Utf8))
-    });
-    c.bench_function("cast f32 to string 512", |b| {
-        b.iter(|| cast_array(&f32_array, DataType::Utf8))
-    });
-
-    c.bench_function("cast timestamp_ms to i64 512", |b| {
-        b.iter(|| cast_array(&time_ms_array, DataType::Int64))
-    });
-    c.bench_function("cast utf8 to date32 512", |b| {
-        b.iter(|| cast_array(&utf8_date_array, DataType::Date32))
-    });
-    c.bench_function("cast utf8 to date64 512", |b| {
-        b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/comparison_kernels.rs b/rust/arrow/benches/comparison_kernels.rs
deleted file mode 100644
index a3df556efcf..00000000000
--- a/rust/arrow/benches/comparison_kernels.rs
+++ /dev/null
@@ -1,201 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::compute::*;
-use arrow::datatypes::ArrowNumericType;
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn bench_eq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    eq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_eq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    eq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_neq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    neq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_neq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    neq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_lt<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    lt(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_lt_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    lt_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_lt_eq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    lt_eq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_lt_eq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    lt_eq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_gt<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    gt(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_gt_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    gt_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_gt_eq<T>(arr_a: &PrimitiveArray<T>, arr_b: &PrimitiveArray<T>)
-where
-    T: ArrowNumericType,
-{
-    gt_eq(criterion::black_box(arr_a), criterion::black_box(arr_b)).unwrap();
-}
-
-fn bench_gt_eq_scalar<T>(arr_a: &PrimitiveArray<T>, value_b: T::Native)
-where
-    T: ArrowNumericType,
-{
-    gt_eq_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_like_utf8_scalar(arr_a: &StringArray, value_b: &str) {
-    like_utf8_scalar(criterion::black_box(arr_a), criterion::black_box(value_b)).unwrap();
-}
-
-fn bench_nlike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
-    nlike_utf8_scalar(criterion::black_box(arr_a), criterion::black_box(value_b))
-        .unwrap();
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let size = 65536;
-    let arr_a = create_primitive_array_with_seed::<Float32Type>(size, 0.0, 42);
-    let arr_b = create_primitive_array_with_seed::<Float32Type>(size, 0.0, 43);
-
-    let arr_string = create_string_array::<i32>(size, 0.0);
-
-    c.bench_function("eq Float32", |b| b.iter(|| bench_eq(&arr_a, &arr_b)));
-    c.bench_function("eq scalar Float32", |b| {
-        b.iter(|| bench_eq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("neq Float32", |b| b.iter(|| bench_neq(&arr_a, &arr_b)));
-    c.bench_function("neq scalar Float32", |b| {
-        b.iter(|| bench_neq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("lt Float32", |b| b.iter(|| bench_lt(&arr_a, &arr_b)));
-    c.bench_function("lt scalar Float32", |b| {
-        b.iter(|| bench_lt_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("lt_eq Float32", |b| b.iter(|| bench_lt_eq(&arr_a, &arr_b)));
-    c.bench_function("lt_eq scalar Float32", |b| {
-        b.iter(|| bench_lt_eq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("gt Float32", |b| b.iter(|| bench_gt(&arr_a, &arr_b)));
-    c.bench_function("gt scalar Float32", |b| {
-        b.iter(|| bench_gt_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("gt_eq Float32", |b| b.iter(|| bench_gt_eq(&arr_a, &arr_b)));
-    c.bench_function("gt_eq scalar Float32", |b| {
-        b.iter(|| bench_gt_eq_scalar(&arr_a, 1.0))
-    });
-
-    c.bench_function("like_utf8 scalar equals", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx"))
-    });
-
-    c.bench_function("like_utf8 scalar contains", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx%"))
-    });
-
-    c.bench_function("like_utf8 scalar ends with", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx%"))
-    });
-
-    c.bench_function("like_utf8 scalar starts with", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx"))
-    });
-
-    c.bench_function("like_utf8 scalar complex", |b| {
-        b.iter(|| bench_like_utf8_scalar(&arr_string, "%xx_xx%xxx"))
-    });
-
-    c.bench_function("nlike_utf8 scalar equals", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx"))
-    });
-
-    c.bench_function("nlike_utf8 scalar contains", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx%"))
-    });
-
-    c.bench_function("nlike_utf8 scalar ends with", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx%"))
-    });
-
-    c.bench_function("nlike_utf8 scalar starts with", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx"))
-    });
-
-    c.bench_function("nlike_utf8 scalar complex", |b| {
-        b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xx_xx%xxx"))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/concatenate_kernel.rs b/rust/arrow/benches/concatenate_kernel.rs
deleted file mode 100644
index 3fff2abd179..00000000000
--- a/rust/arrow/benches/concatenate_kernel.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::concat;
-use arrow::datatypes::*;
-use arrow::util::bench_util::*;
-
-fn bench_concat(v1: &dyn Array, v2: &dyn Array) {
-    criterion::black_box(concat(&[v1, v2]).unwrap());
-}
-
-fn bench_concat_arrays(arrays: &[&dyn Array]) {
-    criterion::black_box(concat(arrays).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let v1 = create_primitive_array::<Int32Type>(1024, 0.0);
-    let v2 = create_primitive_array::<Int32Type>(1024, 0.0);
-    c.bench_function("concat i32 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
-
-    let v1 = create_primitive_array::<Int32Type>(1024, 0.5);
-    let v2 = create_primitive_array::<Int32Type>(1024, 0.5);
-    c.bench_function("concat i32 nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
-    });
-
-    let small_array = create_primitive_array::<Int32Type>(4, 0.0);
-    let arrays: Vec<_> = (0..1024).map(|_| &small_array as &dyn Array).collect();
-    c.bench_function("concat 1024 arrays i32 4", |b| {
-        b.iter(|| bench_concat_arrays(&arrays))
-    });
-
-    let v1 = create_string_array::<i32>(1024, 0.0);
-    let v2 = create_string_array::<i32>(1024, 0.0);
-    c.bench_function("concat str 1024", |b| b.iter(|| bench_concat(&v1, &v2)));
-
-    let v1 = create_string_array::<i32>(1024, 0.5);
-    let v2 = create_string_array::<i32>(1024, 0.5);
-    c.bench_function("concat str nulls 1024", |b| {
-        b.iter(|| bench_concat(&v1, &v2))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/csv_writer.rs b/rust/arrow/benches/csv_writer.rs
deleted file mode 100644
index 9b018530938..00000000000
--- a/rust/arrow/benches/csv_writer.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-extern crate criterion;
-
-use criterion::*;
-
-use arrow::array::*;
-use arrow::csv;
-use arrow::datatypes::*;
-use arrow::record_batch::RecordBatch;
-use std::fs::File;
-use std::sync::Arc;
-
-fn record_batches_to_csv() {
-    let schema = Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Float64, true),
-        Field::new("c3", DataType::UInt32, false),
-        Field::new("c3", DataType::Boolean, true),
-    ]);
-
-    let c1 = StringArray::from(vec![
-        "Lorem ipsum dolor sit amet",
-        "consectetur adipiscing elit",
-        "sed do eiusmod tempor",
-    ]);
-    let c2 = PrimitiveArray::<Float64Type>::from(vec![
-        Some(123.564532),
-        None,
-        Some(-556132.25),
-    ]);
-    let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-    let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-
-    let b = RecordBatch::try_new(
-        Arc::new(schema),
-        vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-    )
-    .unwrap();
-    let file = File::create("target/bench_write_csv.csv").unwrap();
-    let mut writer = csv::Writer::new(file);
-    let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
-    #[allow(clippy::unit_arg)]
-    criterion::black_box(for batch in batches {
-        writer.write(batch).unwrap()
-    });
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("record_batches_to_csv", |b| b.iter(record_batches_to_csv));
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/equal.rs b/rust/arrow/benches/equal.rs
deleted file mode 100644
index af535506e86..00000000000
--- a/rust/arrow/benches/equal.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Allowed because we use `arr == arr` in benchmarks
-#![allow(clippy::eq_op)]
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn bench_equal<A: Array + PartialEq<A>>(arr_a: &A) {
-    criterion::black_box(arr_a == arr_a);
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
-    c.bench_function("equal_512", |b| b.iter(|| bench_equal(&arr_a)));
-
-    let arr_a_nulls = create_primitive_array::<Float32Type>(512, 0.5);
-    c.bench_function("equal_nulls_512", |b| b.iter(|| bench_equal(&arr_a_nulls)));
-
-    let arr_a = create_string_array::<i32>(512, 0.0);
-    c.bench_function("equal_string_512", |b| b.iter(|| bench_equal(&arr_a)));
-
-    let arr_a_nulls = create_string_array::<i32>(512, 0.5);
-    c.bench_function("equal_string_nulls_512", |b| {
-        b.iter(|| bench_equal(&arr_a_nulls))
-    });
-
-    let arr_a = create_boolean_array(512, 0.0, 0.5);
-    c.bench_function("equal_bool_512", |b| b.iter(|| bench_equal(&arr_a)));
-
-    let arr_a = create_boolean_array(513, 0.0, 0.5);
-    c.bench_function("equal_bool_513", |b| b.iter(|| bench_equal(&arr_a)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/filter_kernels.rs b/rust/arrow/benches/filter_kernels.rs
deleted file mode 100644
index ca317b4676c..00000000000
--- a/rust/arrow/benches/filter_kernels.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-extern crate arrow;
-
-use arrow::compute::Filter;
-use arrow::util::bench_util::*;
-
-use arrow::array::*;
-use arrow::compute::{build_filter, filter};
-use arrow::datatypes::{Float32Type, UInt8Type};
-
-use criterion::{criterion_group, criterion_main, Criterion};
-
-fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) {
-    criterion::black_box(filter(data_array, filter_array).unwrap());
-}
-
-fn bench_built_filter<'a>(filter: &Filter<'a>, data: &impl Array) {
-    criterion::black_box(filter(&data.data()));
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let size = 65536;
-    let filter_array = create_boolean_array(size, 0.0, 0.5);
-    let dense_filter_array = create_boolean_array(size, 0.0, 1.0 - 1.0 / 1024.0);
-    let sparse_filter_array = create_boolean_array(size, 0.0, 1.0 / 1024.0);
-
-    let filter = build_filter(&filter_array).unwrap();
-    let dense_filter = build_filter(&dense_filter_array).unwrap();
-    let sparse_filter = build_filter(&sparse_filter_array).unwrap();
-
-    let data_array = create_primitive_array::<UInt8Type>(size, 0.0);
-
-    c.bench_function("filter u8", |b| {
-        b.iter(|| bench_filter(&data_array, &filter_array))
-    });
-    c.bench_function("filter u8 high selectivity", |b| {
-        b.iter(|| bench_filter(&data_array, &dense_filter_array))
-    });
-    c.bench_function("filter u8 low selectivity", |b| {
-        b.iter(|| bench_filter(&data_array, &sparse_filter_array))
-    });
-
-    c.bench_function("filter context u8", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context u8 high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context u8 low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-
-    let data_array = create_primitive_array::<UInt8Type>(size, 0.5);
-    c.bench_function("filter context u8 w NULLs", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context u8 w NULLs high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context u8 w NULLs low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-
-    let data_array = create_primitive_array::<Float32Type>(size, 0.5);
-    c.bench_function("filter f32", |b| {
-        b.iter(|| bench_filter(&data_array, &filter_array))
-    });
-    c.bench_function("filter context f32", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context f32 high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context f32 low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-
-    let data_array = create_string_array::<i32>(size, 0.5);
-    c.bench_function("filter context string", |b| {
-        b.iter(|| bench_built_filter(&filter, &data_array))
-    });
-    c.bench_function("filter context string high selectivity", |b| {
-        b.iter(|| bench_built_filter(&dense_filter, &data_array))
-    });
-    c.bench_function("filter context string low selectivity", |b| {
-        b.iter(|| bench_built_filter(&sparse_filter, &data_array))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/json_reader.rs b/rust/arrow/benches/json_reader.rs
deleted file mode 100644
index ef3ddf0537b..00000000000
--- a/rust/arrow/benches/json_reader.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-extern crate criterion;
-
-use criterion::*;
-
-use arrow::datatypes::*;
-use arrow::json::ReaderBuilder;
-use std::io::Cursor;
-use std::sync::Arc;
-
-fn json_primitive_to_record_batch() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, true),
-        Field::new("c2", DataType::Float64, true),
-        Field::new("c3", DataType::UInt32, true),
-        Field::new("c4", DataType::Boolean, true),
-    ]));
-    let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-    let json_content = r#"
-        {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false}
-        {"c1": "twelve", "c2": -55555555555555.2, "c3": 3}
-        {"c1": null, "c2": 3, "c3": 125, "c4": null}
-        {"c2": -35, "c3": 100.0, "c4": true}
-        {"c1": "fifteen", "c2": null, "c4": true}
-        {"c1": "eleven", "c2": 6.2222222225, "c3": 5.0, "c4": false}
-        {"c1": "twelve", "c2": -55555555555555.2, "c3": 3}
-        {"c1": null, "c2": 3, "c3": 125, "c4": null}
-        {"c2": -35, "c3": 100.0, "c4": true}
-        {"c1": "fifteen", "c2": null, "c4": true}
-        "#;
-    let cursor = Cursor::new(json_content);
-    let mut reader = builder.build(cursor).unwrap();
-    #[allow(clippy::unit_arg)]
-    criterion::black_box({
-        reader.next().unwrap();
-    });
-}
-
-fn json_list_primitive_to_record_batch() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-            true,
-        ),
-        Field::new(
-            "c2",
-            DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-            true,
-        ),
-        Field::new(
-            "c3",
-            DataType::List(Box::new(Field::new("item", DataType::UInt32, true))),
-            true,
-        ),
-        Field::new(
-            "c4",
-            DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-            true,
-        ),
-    ]));
-    let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-    let json_content = r#"
-        {"c1": ["eleven"], "c2": [6.2222222225, -3.2, null], "c3": [5.0, 6], "c4": [false, true]}
-        {"c1": ["twelve"], "c2": [-55555555555555.2, 12500000.0], "c3": [3, 4, 5]}
-        {"c1": null, "c2": [3], "c3": [125, 127, 129], "c4": [null, false, true]}
-        {"c2": [-35], "c3": [100.0, 200.0], "c4": null}
-        {"c1": ["fifteen"], "c2": [null, 2.1, 1.5, -3], "c4": [true, false, null]}
-        {"c1": ["fifteen"], "c2": [], "c4": [true, false, null]}
-        {"c1": ["eleven"], "c2": [6.2222222225, -3.2, null], "c3": [5.0, 6], "c4": [false, true]}
-        {"c1": ["twelve"], "c2": [-55555555555555.2, 12500000.0], "c3": [3, 4, 5]}
-        {"c1": null, "c2": [3], "c3": [125, 127, 129], "c4": [null, false, true]}
-        {"c2": [-35], "c3": [100.0, 200.0], "c4": null}
-        {"c1": ["fifteen"], "c2": [null, 2.1, 1.5, -3], "c4": [true, false, null]}
-        {"c1": ["fifteen"], "c2": [], "c4": [true, false, null]}
-        "#;
-    let cursor = Cursor::new(json_content);
-    let mut reader = builder.build(cursor).unwrap();
-    #[allow(clippy::unit_arg)]
-    criterion::black_box({
-        reader.next().unwrap();
-    });
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("json_primitive_to_record_batch", |b| {
-        b.iter(json_primitive_to_record_batch)
-    });
-    c.bench_function("json_list_primitive_to_record_batch", |b| {
-        b.iter(json_list_primitive_to_record_batch)
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/length_kernel.rs b/rust/arrow/benches/length_kernel.rs
deleted file mode 100644
index b70f6374f8f..00000000000
--- a/rust/arrow/benches/length_kernel.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::compute::kernels::length::length;
-
-fn bench_length(array: &StringArray) {
-    criterion::black_box(length(array).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-        [&v[..], &v[..]].concat()
-    }
-
-    // double ["hello", " ", "world", "!"] 10 times
-    let mut values = vec!["one", "on", "o", ""];
-    for _ in 0..10 {
-        values = double_vec(values);
-    }
-    let array = StringArray::from(values);
-
-    c.bench_function("length", |b| b.iter(|| bench_length(&array)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/mutable_array.rs b/rust/arrow/benches/mutable_array.rs
deleted file mode 100644
index 52da38a1d54..00000000000
--- a/rust/arrow/benches/mutable_array.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use rand::Rng;
-
-extern crate arrow;
-
-use arrow::util::test_util::seedable_rng;
-use arrow::{array::*, util::bench_util::create_string_array};
-
-fn create_slices(size: usize) -> Vec<(usize, usize)> {
-    let rng = &mut seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            let start = rng.gen_range(0, size / 2);
-            let end = rng.gen_range(start + 1, size);
-            (start, end)
-        })
-        .collect()
-}
-
-fn bench<T: Array>(v1: &T, slices: &[(usize, usize)]) {
-    let mut mutable = MutableArrayData::new(vec![v1.data_ref()], false, 5);
-    for (start, end) in slices {
-        mutable.extend(0, *start, *end)
-    }
-    mutable.freeze();
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let v1 = create_string_array::<i32>(1024, 0.0);
-    let v2 = create_slices(1024);
-    c.bench_function("mutable str 1024", |b| b.iter(|| bench(&v1, &v2)));
-
-    let v1 = create_string_array::<i32>(1024, 0.5);
-    let v2 = create_slices(1024);
-    c.bench_function("mutable str nulls 1024", |b| b.iter(|| bench(&v1, &v2)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/sort_kernel.rs b/rust/arrow/benches/sort_kernel.rs
deleted file mode 100644
index 74dc0ceae18..00000000000
--- a/rust/arrow/benches/sort_kernel.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::compute::kernels::sort::{lexsort, SortColumn};
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Float32Type};
-
-fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
-    let null_density = if with_nulls { 0.5 } else { 0.0 };
-    let array = create_primitive_array::<Float32Type>(size, null_density);
-    Arc::new(array)
-}
-
-fn bench_sort(arr_a: &ArrayRef, array_b: &ArrayRef, limit: Option<usize>) {
-    let columns = vec![
-        SortColumn {
-            values: arr_a.clone(),
-            options: None,
-        },
-        SortColumn {
-            values: array_b.clone(),
-            options: None,
-        },
-    ];
-
-    criterion::black_box(lexsort(&columns, limit).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_array(2u64.pow(10) as usize, false);
-    let arr_b = create_array(2u64.pow(10) as usize, false);
-
-    c.bench_function("sort 2^10", |b| b.iter(|| bench_sort(&arr_a, &arr_b, None)));
-
-    let arr_a = create_array(2u64.pow(12) as usize, false);
-    let arr_b = create_array(2u64.pow(12) as usize, false);
-
-    c.bench_function("sort 2^12", |b| b.iter(|| bench_sort(&arr_a, &arr_b, None)));
-
-    let arr_a = create_array(2u64.pow(10) as usize, true);
-    let arr_b = create_array(2u64.pow(10) as usize, true);
-
-    c.bench_function("sort nulls 2^10", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
-    });
-
-    let arr_a = create_array(2u64.pow(12) as usize, true);
-    let arr_b = create_array(2u64.pow(12) as usize, true);
-
-    c.bench_function("sort nulls 2^12", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
-    });
-
-    // with limit
-    {
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 10", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 100", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 1000", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, false);
-        let arr_b = create_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 2^12", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
-        });
-
-        let arr_a = create_array(2u64.pow(12) as usize, true);
-        let arr_b = create_array(2u64.pow(12) as usize, true);
-
-        c.bench_function("sort nulls 2^12 limit 10", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
-        });
-        c.bench_function("sort nulls 2^12 limit 100", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
-        });
-        c.bench_function("sort nulls 2^12 limit 1000", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
-        });
-        c.bench_function("sort nulls 2^12 limit 2^12", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
-        });
-    }
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/benches/take_kernels.rs b/rust/arrow/benches/take_kernels.rs
deleted file mode 100644
index 2853eb5d476..00000000000
--- a/rust/arrow/benches/take_kernels.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use rand::Rng;
-
-extern crate arrow;
-
-use arrow::compute::take;
-use arrow::datatypes::*;
-use arrow::util::test_util::seedable_rng;
-use arrow::{array::*, util::bench_util::*};
-
-fn create_random_index(size: usize, null_density: f32) -> UInt32Array {
-    let mut rng = seedable_rng();
-    let mut builder = UInt32Builder::new(size);
-    for _ in 0..size {
-        if rng.gen::<f32>() < null_density {
-            builder.append_null().unwrap()
-        } else {
-            let value = rng.gen_range::<u32, _, _>(0u32, size as u32);
-            builder.append_value(value).unwrap();
-        }
-    }
-    builder.finish()
-}
-
-fn bench_take(values: &dyn Array, indices: &UInt32Array) {
-    criterion::black_box(take(values, &indices, None).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let values = create_primitive_array::<Int32Type>(512, 0.0);
-    let indices = create_random_index(512, 0.0);
-    c.bench_function("take i32 512", |b| b.iter(|| bench_take(&values, &indices)));
-    let values = create_primitive_array::<Int32Type>(1024, 0.0);
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take i32 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let indices = create_random_index(512, 0.5);
-    c.bench_function("take i32 nulls 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-    let values = create_primitive_array::<Int32Type>(1024, 0.0);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take i32 nulls 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_boolean_array(512, 0.0, 0.5);
-    let indices = create_random_index(512, 0.0);
-    c.bench_function("take bool 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-    let values = create_boolean_array(1024, 0.0, 0.5);
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take bool 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_boolean_array(512, 0.0, 0.5);
-    let indices = create_random_index(512, 0.5);
-    c.bench_function("take bool nulls 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-    let values = create_boolean_array(1024, 0.0, 0.5);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take bool nulls 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(512, 0.0);
-    let indices = create_random_index(512, 0.0);
-    c.bench_function("take str 512", |b| b.iter(|| bench_take(&values, &indices)));
-
-    let values = create_string_array::<i32>(1024, 0.0);
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take str 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(512, 0.0);
-    let indices = create_random_index(512, 0.5);
-    c.bench_function("take str null indices 512", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(1024, 0.0);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take str null indices 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(1024, 0.5);
-
-    let indices = create_random_index(1024, 0.0);
-    c.bench_function("take str null values 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-
-    let values = create_string_array::<i32>(1024, 0.5);
-    let indices = create_random_index(1024, 0.5);
-    c.bench_function("take str null values null indices 1024", |b| {
-        b.iter(|| bench_take(&values, &indices))
-    });
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);
diff --git a/rust/arrow/build.rs b/rust/arrow/build.rs
deleted file mode 100644
index 2e3a711533c..00000000000
--- a/rust/arrow/build.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use cfg_aliases::cfg_aliases;
-
-fn main() {
-    println!("cargo:rerun-if-changed=build.rs");
-    // Setup cfg aliases
-    cfg_aliases! {
-        simd: { all(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"), feature = "simd") },
-    }
-}
diff --git a/rust/arrow/examples/builders.rs b/rust/arrow/examples/builders.rs
deleted file mode 100644
index 61cce0ed97a..00000000000
--- a/rust/arrow/examples/builders.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///! Many builders are available to easily create different types of arrow arrays
-extern crate arrow;
-
-use std::sync::Arc;
-
-use arrow::array::{
-    Array, ArrayData, BooleanArray, Int32Array, Int32Builder, ListArray, PrimitiveArray,
-    StringArray, StructArray,
-};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType, Date64Type, Field, Time64NanosecondType, ToByteSlice};
-
-fn main() {
-    // Primitive Arrays
-    //
-    // Primitive arrays are arrays of fixed-width primitive types (bool, u8, u16, u32,
-    // u64, i8, i16, i32, i64, f32, f64)
-
-    // Create a new builder with a capacity of 100
-    let mut primitive_array_builder = Int32Builder::new(100);
-
-    // Append an individual primitive value
-    primitive_array_builder.append_value(55).unwrap();
-
-    // Append a null value
-    primitive_array_builder.append_null().unwrap();
-
-    // Append a slice of primitive values
-    primitive_array_builder.append_slice(&[39, 89, 12]).unwrap();
-
-    // Append lots of values
-    primitive_array_builder.append_null().unwrap();
-    primitive_array_builder
-        .append_slice(&(25..50).collect::<Vec<i32>>())
-        .unwrap();
-
-    // Build the `PrimitiveArray`
-    let primitive_array = primitive_array_builder.finish();
-    // Long arrays will have an ellipsis printed in the middle
-    println!("{:?}", primitive_array);
-
-    // Arrays can also be built from `Vec<Option<T>>`. `None`
-    // represents a null value in the array.
-    let date_array: PrimitiveArray<Date64Type> =
-        vec![Some(1550902545147), None, Some(1550902545147)].into();
-    println!("{:?}", date_array);
-
-    let time_array: PrimitiveArray<Time64NanosecondType> =
-        (0..100).collect::<Vec<i64>>().into();
-    println!("{:?}", time_array);
-
-    // We can build arrays directly from the underlying buffers.
-
-    // BinaryArrays are arrays of byte arrays, where each byte array
-    // is a slice of an underlying buffer.
-
-    // Array data: ["hello", null, "parquet"]
-    let values: [u8; 12] = [
-        b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-    ];
-    let offsets: [i32; 4] = [0, 5, 5, 12];
-
-    let array_data = ArrayData::builder(DataType::Utf8)
-        .len(3)
-        .add_buffer(Buffer::from(offsets.to_byte_slice()))
-        .add_buffer(Buffer::from(&values[..]))
-        .null_bit_buffer(Buffer::from([0b00000101]))
-        .build();
-    let binary_array = StringArray::from(array_data);
-    println!("{:?}", binary_array);
-
-    // ListArrays are similar to ByteArrays: they are arrays of other
-    // arrays, where each child array is a slice of the underlying
-    // buffer.
-    let value_data = ArrayData::builder(DataType::Int32)
-        .len(8)
-        .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
-        .build();
-
-    // Construct a buffer for value offsets, for the nested array:
-    //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-    let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
-
-    // Construct a list array from the above two
-    let list_data_type =
-        DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-    let list_data = ArrayData::builder(list_data_type)
-        .len(3)
-        .add_buffer(value_offsets)
-        .add_child_data(value_data)
-        .build();
-    let list_array = ListArray::from(list_data);
-
-    println!("{:?}", list_array);
-
-    // StructArrays are arrays of tuples, where each tuple element is
-    // from a child array. (In other words, they're like zipping
-    // multiple columns into one and giving each subcolumn a label.)
-
-    // StructArrays can be constructed using the StructArray::from
-    // helper, which takes the underlying arrays and field types.
-    let struct_array = StructArray::from(vec![
-        (
-            Field::new("b", DataType::Boolean, false),
-            Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                as Arc<dyn Array>,
-        ),
-        (
-            Field::new("c", DataType::Int32, false),
-            Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
-        ),
-    ]);
-    println!("{:?}", struct_array);
-}
diff --git a/rust/arrow/examples/dynamic_types.rs b/rust/arrow/examples/dynamic_types.rs
deleted file mode 100644
index 58e41560e23..00000000000
--- a/rust/arrow/examples/dynamic_types.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///! This example demonstrates dealing with mixed types dynamically at runtime
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::array::*;
-use arrow::datatypes::*;
-use arrow::error::Result;
-use arrow::record_batch::*;
-
-fn main() -> Result<()> {
-    // define schema
-    let schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new(
-            "nested",
-            DataType::Struct(vec![
-                Field::new("a", DataType::Utf8, false),
-                Field::new("b", DataType::Float64, false),
-                Field::new("c", DataType::Float64, false),
-            ]),
-            false,
-        ),
-    ]);
-
-    // create some data
-    let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-    let nested = StructArray::from(vec![
-        (
-            Field::new("a", DataType::Utf8, false),
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])) as Arc<dyn Array>,
-        ),
-        (
-            Field::new("b", DataType::Float64, false),
-            Arc::new(Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5])),
-        ),
-        (
-            Field::new("c", DataType::Float64, false),
-            Arc::new(Float64Array::from(vec![2.2, 3.3, 4.4, 5.5, 6.6])),
-        ),
-    ]);
-
-    // build a record batch
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?;
-
-    process(&batch);
-    Ok(())
-}
-
-/// Create a new batch by performing a projection of id, nested.c
-fn process(batch: &RecordBatch) {
-    let id = batch.column(0);
-    let nested = batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<StructArray>()
-        .unwrap();
-
-    let _nested_b = nested
-        .column(1)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-    let nested_c: &Float64Array = nested
-        .column(2)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-
-    let projected_schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("sum", DataType::Float64, false),
-    ]);
-
-    let _ = RecordBatch::try_new(
-        Arc::new(projected_schema),
-        vec![
-            id.clone(), // NOTE: this is cloning the Arc not the array data
-            Arc::new(Float64Array::from(nested_c.data().clone())),
-        ],
-    );
-}
diff --git a/rust/arrow/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs
deleted file mode 100644
index 9e2b9c34c86..00000000000
--- a/rust/arrow/examples/read_csv.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-
-use std::fs::File;
-use std::sync::Arc;
-
-use arrow::csv;
-use arrow::datatypes::{DataType, Field, Schema};
-#[cfg(feature = "prettyprint")]
-use arrow::util::pretty::print_batches;
-
-fn main() {
-    let schema = Schema::new(vec![
-        Field::new("city", DataType::Utf8, false),
-        Field::new("lat", DataType::Float64, false),
-        Field::new("lng", DataType::Float64, false),
-    ]);
-
-    let file = File::open("test/data/uk_cities.csv").unwrap();
-
-    let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
-    let _batch = csv.next().unwrap().unwrap();
-    #[cfg(feature = "prettyprint")]
-    {
-        print_batches(&[_batch]).unwrap();
-    }
-}
diff --git a/rust/arrow/examples/read_csv_infer_schema.rs b/rust/arrow/examples/read_csv_infer_schema.rs
deleted file mode 100644
index 93253e72cff..00000000000
--- a/rust/arrow/examples/read_csv_infer_schema.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate arrow;
-
-use arrow::csv;
-#[cfg(feature = "prettyprint")]
-use arrow::util::pretty::print_batches;
-use std::fs::File;
-
-fn main() {
-    let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-    let builder = csv::ReaderBuilder::new()
-        .has_header(true)
-        .infer_schema(Some(100));
-    let mut csv = builder.build(file).unwrap();
-    let _batch = csv.next().unwrap().unwrap();
-    #[cfg(feature = "prettyprint")]
-    {
-        print_batches(&[_batch]).unwrap();
-    }
-}
diff --git a/rust/arrow/examples/tensor_builder.rs b/rust/arrow/examples/tensor_builder.rs
deleted file mode 100644
index 1ef53920e04..00000000000
--- a/rust/arrow/examples/tensor_builder.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///! Tensor builder example
-extern crate arrow;
-
-use arrow::array::*; //{Int32BufferBuilder, Float32BufferBuilder};
-use arrow::buffer::Buffer;
-use arrow::datatypes::ToByteSlice;
-use arrow::error::Result;
-use arrow::tensor::{Float32Tensor, Int32Tensor};
-
-fn main() -> Result<()> {
-    // Building a tensor using the buffer builder for Int32
-    // The buffer builder will pad the appended numbers
-    // to match the required size for each buffer
-    let mut builder = Int32BufferBuilder::new(16);
-    for i in 0..16 {
-        builder.append(i);
-    }
-    let buf = builder.finish();
-
-    // When building a tensor the buffer and shape are required
-    // The new function will estimate the expected stride for the
-    // storage data
-    let tensor = Int32Tensor::try_new(buf, Some(vec![2, 8]), None, None)?;
-    println!("Int32 Tensor");
-    println!("{:?}", tensor);
-
-    // Creating a tensor using float type buffer builder
-    let mut builder = Float32BufferBuilder::new(4);
-    builder.append(1.0);
-    builder.append(2.0);
-    builder.append(3.0);
-    builder.append(4.0);
-    let buf = builder.finish();
-
-    // When building the tensor the buffer and shape are necessary
-    // The new function will estimate the expected stride for the
-    // storage data
-    let tensor = Float32Tensor::try_new(buf, Some(vec![2, 2]), None, None)?;
-    println!("\nFloat32 Tensor");
-    println!("{:?}", tensor);
-
-    // In order to build a tensor from an array the function to_byte_slice add the
-    // required padding to the elements in the array.
-    let buf = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7, 9, 10].to_byte_slice());
-    let tensor = Int32Tensor::try_new(buf, Some(vec![2, 5]), None, None)?;
-    println!("\nInt32 Tensor");
-    println!("{:?}", tensor);
-
-    Ok(())
-}
diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch
deleted file mode 100644
index 5da0a0c51f0..00000000000
--- a/rust/arrow/format-0ed34c83.patch
+++ /dev/null
@@ -1,220 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-diff --git a/format/Message.fbs b/format/Message.fbs
-index 1a7e0dfff..f1c18d765 100644
---- a/format/Message.fbs
-+++ b/format/Message.fbs
-@@ -28,7 +28,7 @@ namespace org.apache.arrow.flatbuf;
- /// Metadata about a field at some level of a nested type tree (but not
- /// its children).
- ///
--/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
-+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
- /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
- /// null_count: 0} for its Int16 node, as separate FieldNode structs
- struct FieldNode {
-diff --git a/format/Schema.fbs b/format/Schema.fbs
-index 3b37e5d85..3b00dd478 100644
---- a/format/Schema.fbs
-+++ b/format/Schema.fbs
-@@ -110,10 +110,11 @@ table FixedSizeList {
- /// not enforced.
- ///
- /// Map
-+/// ```text
- ///   - child[0] entries: Struct
- ///     - child[0] key: K
- ///     - child[1] value: V
--///
-+/// ```
- /// Neither the "entries" field nor the "key" field may be nullable.
- ///
- /// The metadata is structured so that Arrow systems without special handling
-@@ -129,7 +130,7 @@ enum UnionMode:short { Sparse, Dense }
- /// A union is a complex type with children in Field
- /// By default ids in the type vector refer to the offsets in the children
- /// optionally typeIds provides an indirection between the child offset and the type id
--/// for each child typeIds[offset] is the id used in the type vector
-+/// for each child `typeIds[offset]` is the id used in the type vector
- table Union {
-   mode: UnionMode;
-   typeIds: [ int ]; // optional, describes typeid of each child.
-diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
-index 3fe8a7582..a6fd2f9e7 100644
---- a/format/SparseTensor.fbs
-+++ b/format/SparseTensor.fbs
-@@ -37,21 +37,21 @@ namespace org.apache.arrow.flatbuf;
- ///
- /// For example, let X be a 2x3x4x5 tensor, and it has the following
- /// 6 non-zero values:
--///
-+/// ```text
- ///   X[0, 1, 2, 0] := 1
- ///   X[1, 1, 2, 3] := 2
- ///   X[0, 2, 1, 0] := 3
- ///   X[0, 1, 3, 0] := 4
- ///   X[0, 1, 2, 1] := 5
- ///   X[1, 2, 0, 4] := 6
--///
-+/// ```
- /// In COO format, the index matrix of X is the following 4x6 matrix:
--///
-+/// ```text
- ///   [[0, 0, 0, 0, 1, 1],
- ///    [1, 1, 1, 2, 1, 2],
- ///    [2, 2, 3, 1, 2, 0],
- ///    [0, 1, 0, 0, 3, 4]]
--///
-+/// ```
- /// When isCanonical is true, the indices is sorted in lexicographical order
- /// (row-major order), and it does not have duplicated entries.  Otherwise,
- /// the indices may not be sorted, or may have duplicated entries.
-@@ -86,26 +86,27 @@ table SparseMatrixIndexCSX {
- 
-   /// indptrBuffer stores the location and size of indptr array that
-   /// represents the range of the rows.
--  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
-+  /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
-   /// The length of this array is 1 + (the number of rows), and the type
-   /// of index value is long.
-   ///
-   /// For example, let X be the following 6x4 matrix:
--  ///
-+  /// ```text
-   ///   X := [[0, 1, 2, 0],
-   ///         [0, 0, 3, 0],
-   ///         [0, 4, 0, 5],
-   ///         [0, 0, 0, 0],
-   ///         [6, 0, 7, 8],
-   ///         [0, 9, 0, 0]].
--  ///
-+  /// ```
-   /// The array of non-zero values in X is:
--  ///
-+  /// ```text
-   ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
--  ///
-+  /// ```
-   /// And the indptr of X is:
--  ///
-+  /// ```text
-   ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
-+  /// ```
-   indptrBuffer: Buffer (required);
- 
-   /// The type of values in indicesBuffer
-@@ -116,9 +117,9 @@ table SparseMatrixIndexCSX {
-   /// The type of index value is long.
-   ///
-   /// For example, the indices of the above X is:
--  ///
-+  /// ```text
-   ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
--  ///
-+  /// ```
-   /// Note that the indices are sorted in lexicographical order for each row.
-   indicesBuffer: Buffer (required);
- }
-@@ -126,7 +127,7 @@ table SparseMatrixIndexCSX {
- /// Compressed Sparse Fiber (CSF) sparse tensor index.
- table SparseTensorIndexCSF {
-   /// CSF is a generalization of compressed sparse row (CSR) index.
--  /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
-+  /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
-   ///
-   /// CSF index recursively compresses each dimension of a tensor into a set
-   /// of prefix trees. Each path from a root to leaf forms one tensor
-@@ -135,7 +136,7 @@ table SparseTensorIndexCSF {
-   ///
-   /// For example, let X be a 2x3x4x5 tensor and let it have the following
-   /// 8 non-zero values:
--  ///
-+  /// ```text
-   ///   X[0, 0, 0, 1] := 1
-   ///   X[0, 0, 0, 2] := 2
-   ///   X[0, 1, 0, 0] := 3
-@@ -144,9 +145,9 @@ table SparseTensorIndexCSF {
-   ///   X[1, 1, 1, 0] := 6
-   ///   X[1, 1, 1, 1] := 7
-   ///   X[1, 1, 1, 2] := 8
--  ///
-+  /// ```
-   /// As a prefix tree this would be represented as:
--  ///
-+  /// ```text
-   ///         0          1
-   ///        / \         |
-   ///       0   1        1
-@@ -154,24 +155,24 @@ table SparseTensorIndexCSF {
-   ///     0   0   1      1
-   ///    /|  /|   |    /| |
-   ///   1 2 0 2   0   0 1 2
--
-+  /// ```
-   /// The type of values in indptrBuffers
-   indptrType: Int (required);
- 
-   /// indptrBuffers stores the sparsity structure.
-   /// Each two consecutive dimensions in a tensor correspond to a buffer in
--  /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
--  /// and indptrBuffers[dim][i + 1] signify a range of nodes in
--  /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
-+  /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
-+  /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
-+  /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
-   ///
-   /// For example, the indptrBuffers for the above X is:
--  ///
-+  /// ```text
-   ///   indptrBuffer(X) = [
-   ///                       [0, 2, 3],
-   ///                       [0, 1, 3, 4],
-   ///                       [0, 2, 4, 5, 8]
-   ///                     ].
--  ///
-+  /// ```
-   indptrBuffers: [Buffer] (required);
- 
-   /// The type of values in indicesBuffers
-@@ -180,22 +181,22 @@ table SparseTensorIndexCSF {
-   /// indicesBuffers stores values of nodes.
-   /// Each tensor dimension corresponds to a buffer in indicesBuffers.
-   /// For example, the indicesBuffers for the above X is:
--  ///
-+  /// ```text
-   ///   indicesBuffer(X) = [
-   ///                        [0, 1],
-   ///                        [0, 1, 1],
-   ///                        [0, 0, 1, 1],
-   ///                        [1, 2, 0, 2, 0, 0, 1, 2]
-   ///                      ].
--  ///
-+  /// ```
-   indicesBuffers: [Buffer] (required);
- 
-   /// axisOrder stores the sequence in which dimensions were traversed to
-   /// produce the prefix tree.
-   /// For example, the axisOrder for the above X is:
--  ///
-+  /// ```text
-   ///   axisOrder(X) = [0, 1, 2, 3].
--  ///
-+  /// ```
-   axisOrder: [int] (required);
- }
-
diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
deleted file mode 100755
index 9d384b6b63b..00000000000
--- a/rust/arrow/regen.sh
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/bin/bash -e
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-# Change to the toplevel Rust directory
-pushd $DIR/../../
-
-echo "Build flatc from source ..."
-
-FB_URL="https://github.com/google/flatbuffers"
-# https://github.com/google/flatbuffers/pull/6393
-FB_COMMIT="408cf5802415e1dea65fef7489a6c2f3740fb381"
-FB_DIR="rust/arrow/.flatbuffers"
-FLATC="$FB_DIR/bazel-bin/flatc"
-
-if [ -z $(which bazel) ]; then
-    echo "bazel is required to build flatc"
-    exit 1
-fi
-
-echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
-
-if [ ! -e $FB_DIR ]; then
-    echo "git clone $FB_URL ..."
-    git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
-else
-    echo "git pull $FB_URL ..."
-    git -C $FB_DIR pull
-fi
-
-echo "hard reset to $FB_COMMIT"
-git -C $FB_DIR reset --hard $FB_COMMIT
-
-pushd $FB_DIR
-echo "run: bazel build :flatc ..."
-bazel build :flatc
-popd
-
-FB_PATCH="rust/arrow/format-0ed34c83.patch"
-echo "Patch flatbuffer files with ${FB_PATCH} for cargo doc"
-echo "NOTE: the patch MAY need update in case of changes in format/*.fbs"
-git apply --check ${FB_PATCH} && git apply ${FB_PATCH}
-
-# Execute the code generation:
-$FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
-
-# Reset changes to format/
-git checkout -- format
-
-# Now the files are wrongly named so we have to change that.
-popd
-pushd $DIR/src/ipc/gen
-
-PREFIX=$(cat <<'HEREDOC'
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use std::{cmp::Ordering, mem};
-use flatbuffers::EndianScalar;
-
-HEREDOC
-)
-
-SCHEMA_IMPORT="\nuse crate::ipc::gen::Schema::*;"
-SPARSE_TENSOR_IMPORT="\nuse crate::ipc::gen::SparseTensor::*;"
-TENSOR_IMPORT="\nuse crate::ipc::gen::Tensor::*;"
-
-# For flatbuffer(1.12.0+), remove: use crate::${name}::\*;
-names=("File" "Message" "Schema" "SparseTensor" "Tensor")
-
-# Remove all generated lines we don't need
-for f in `ls *.rs`; do
-    if [[ $f == "mod.rs" ]]; then
-        continue
-    fi
-
-    echo "Modifying: $f"
-    sed -i '' '/extern crate flatbuffers;/d' $f
-    sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f
-    sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f
-    sed -i '' '/pub mod org {/d' $f
-    sed -i '' '/pub mod apache {/d' $f
-    sed -i '' '/pub mod arrow {/d' $f
-    sed -i '' '/pub mod flatbuf {/d' $f
-    sed -i '' '/}  \/\/ pub mod flatbuf/d' $f
-    sed -i '' '/}  \/\/ pub mod arrow/d' $f
-    sed -i '' '/}  \/\/ pub mod apache/d' $f
-    sed -i '' '/}  \/\/ pub mod org/d' $f
-    sed -i '' '/use std::mem;/d' $f
-    sed -i '' '/use std::cmp::Ordering;/d' $f
-
-    # required by flatc 1.12.0+
-    sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
-    for name in ${names[@]}; do
-        sed -i '' "/use crate::${name}::\*;/d" $f
-        sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
-    done
-
-    # Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_".
-    sed -i '' 's/type__/type_/g' $f
-    sed -i '' 's/TYPE__/TYPE_/g' $f
-
-    # Some files need prefixes
-    if [[ $f == "File.rs" ]]; then 
-        # Now prefix the file with the static contents
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
-    elif [[ $f == "Message.rs" ]]; then
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
-    elif [[ $f == "SparseTensor.rs" ]]; then
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
-    elif [[ $f == "Tensor.rs" ]]; then
-        echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
-    else
-        echo "${PREFIX}" | cat - $f > temp && mv temp $f
-    fi
-done
-
-# Return back to base directory
-popd
-cargo +stable fmt -- src/ipc/gen/*
-
-echo "DONE!"
-echo "Please run 'cargo doc' and 'cargo test' with nightly and stable, "
-echo "and fix possible errors or warnings!"
diff --git a/rust/arrow/src/alloc/alignment.rs b/rust/arrow/src/alloc/alignment.rs
deleted file mode 100644
index dbf4602f83a..00000000000
--- a/rust/arrow/src/alloc/alignment.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation
-// should align well with usage pattern of cache access and block sizes on layers of storage levels from
-// registers to non-volatile memory. These alignments are all cache aware alignments incorporated
-// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach mimicks Intel TBB's
-// cache_aligned_allocator which exploits cache locality and minimizes prefetch signals
-// resulting in less round trip time between the layers of storage.
-// For further info: https://software.intel.com/en-us/node/506094
-
-// 32-bit architecture and things other than netburst microarchitecture are using 64 bytes.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// Intel x86_64:
-// L2D streamer from L1:
-// Loads data or instructions from memory to the second-level cache. To use the streamer,
-// organize the data or instructions in blocks of 128 bytes, aligned on 128 bytes.
-// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86_64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// 24Kc:
-// Data Line Size
-// - https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
-// - https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips64")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for powerpc
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for the ppc 64
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// e.g.: sifive
-// - https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
-// in general all of them are the same.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "riscv")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// This size is same across all hardware for this architecture.
-// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "s390x")]
-pub const ALIGNMENT: usize = 1 << 8;
-
-// This size is same across all hardware for this architecture.
-// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// On ARM cache line sizes are fixed. both v6 and v7.
-// Need to add board specific or platform specific things later.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv6")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv7")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Operating Systems cache size determines this.
-// Currently no way to determine this without runtime inference.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "wasm32")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// Same as v6 and v7.
-// List goes like that:
-// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "arm")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Combined from 4 sectors. Volta says 128.
-// Prevent chunk optimizations better to go to the default size.
-// If you have smaller data with less padded functionality then use 32 with force option.
-// - https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx")]
-pub const ALIGNMENT: usize = 1 << 7;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// This size is same across all hardware for this architecture.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "aarch64")]
-pub const ALIGNMENT: usize = 1 << 6;
diff --git a/rust/arrow/src/alloc/mod.rs b/rust/arrow/src/alloc/mod.rs
deleted file mode 100644
index a225d32dd82..00000000000
--- a/rust/arrow/src/alloc/mod.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines memory-related functions, such as allocate/deallocate/reallocate memory
-//! regions, cache and allocation alignments.
-
-use std::mem::size_of;
-use std::ptr::NonNull;
-use std::{
-    alloc::{handle_alloc_error, Layout},
-    sync::atomic::AtomicIsize,
-};
-
-mod alignment;
-mod types;
-
-pub use alignment::ALIGNMENT;
-pub use types::NativeType;
-
-// If this number is not zero after all objects have been `drop`, there is a memory leak
-pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
-
-#[inline]
-unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
-    NonNull::new_unchecked(ALIGNMENT as *mut T)
-}
-
-/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values.
-/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have
-/// an unknown or non-zero value and is semantically similar to `malloc`.
-pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
-    unsafe {
-        if size == 0 {
-            null_pointer()
-        } else {
-            let size = size * size_of::<T>();
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
-
-            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
-            let raw_ptr = std::alloc::alloc(layout) as *mut T;
-            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
-        }
-    }
-}
-
-/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them.
-/// This is more performant than using [allocate_aligned] and setting all bytes to zero
-/// and is semantically similar to `calloc`.
-pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
-    unsafe {
-        if size == 0 {
-            null_pointer()
-        } else {
-            let size = size * size_of::<T>();
-            ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst);
-
-            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
-            let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
-            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
-        }
-    }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the caller does not ensure all
-/// of the following:
-///
-/// * ptr must denote a block of memory currently allocated via this allocator,
-///
-/// * size must be the same size that was used to allocate that block of memory,
-pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
-    if ptr != null_pointer() {
-        let size = size * size_of::<T>();
-        ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst);
-        std::alloc::dealloc(
-            ptr.as_ptr() as *mut u8,
-            Layout::from_size_align_unchecked(size, ALIGNMENT),
-        );
-    }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the caller does not ensure all
-/// of the following:
-///
-/// * ptr must be currently allocated via this allocator,
-///
-/// * new_size must be greater than zero.
-///
-/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e.,
-/// the rounded value must be less than usize::MAX).
-pub unsafe fn reallocate<T: NativeType>(
-    ptr: NonNull<T>,
-    old_size: usize,
-    new_size: usize,
-) -> NonNull<T> {
-    let old_size = old_size * size_of::<T>();
-    let new_size = new_size * size_of::<T>();
-    if ptr == null_pointer() {
-        return allocate_aligned(new_size);
-    }
-
-    if new_size == 0 {
-        free_aligned(ptr, old_size);
-        return null_pointer();
-    }
-
-    ALLOCATIONS.fetch_add(
-        new_size as isize - old_size as isize,
-        std::sync::atomic::Ordering::SeqCst,
-    );
-    let raw_ptr = std::alloc::realloc(
-        ptr.as_ptr() as *mut u8,
-        Layout::from_size_align_unchecked(old_size, ALIGNMENT),
-        new_size,
-    ) as *mut T;
-    NonNull::new(raw_ptr).unwrap_or_else(|| {
-        handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT))
-    })
-}
diff --git a/rust/arrow/src/alloc/types.rs b/rust/arrow/src/alloc/types.rs
deleted file mode 100644
index c1f0ef99580..00000000000
--- a/rust/arrow/src/alloc/types.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::datatypes::DataType;
-
-/// A type that Rust's custom allocator knows how to allocate and deallocate.
-/// This is implemented for all Arrow's physical types whose in-memory representation
-/// matches Rust's physical types. Consider this trait sealed.
-/// # Safety
-/// Do not implement this trait.
-pub unsafe trait NativeType:
-    Sized + Copy + std::fmt::Debug + std::fmt::Display + PartialEq + Default + Sized + 'static
-{
-    type Bytes: AsRef<[u8]>;
-
-    /// Whether a DataType is a valid type for this physical representation.
-    fn is_valid(data_type: &DataType) -> bool;
-
-    /// How this type represents itself as bytes in little endianess.
-    /// This is used for IPC, where data is communicated with a specific endianess.
-    fn to_le_bytes(&self) -> Self::Bytes;
-}
-
-macro_rules! create_native {
-    ($native_ty:ty,$($impl_pattern:pat)|+) => {
-        unsafe impl NativeType for $native_ty {
-            type Bytes = [u8; std::mem::size_of::<Self>()];
-
-            #[inline]
-            fn to_le_bytes(&self) -> Self::Bytes {
-                Self::to_le_bytes(*self)
-            }
-
-            #[inline]
-            fn is_valid(data_type: &DataType) -> bool {
-                matches!(data_type, $($impl_pattern)|+)
-            }
-        }
-    };
-}
-
-create_native!(u8, DataType::UInt8);
-create_native!(u16, DataType::UInt16);
-create_native!(u32, DataType::UInt32);
-create_native!(u64, DataType::UInt64);
-create_native!(i8, DataType::Int8);
-create_native!(i16, DataType::Int16);
-create_native!(
-    i32,
-    DataType::Int32 | DataType::Date32 | DataType::Time32(_)
-);
-create_native!(
-    i64,
-    DataType::Int64 | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _)
-);
-create_native!(f32, DataType::Float32);
-create_native!(f64, DataType::Float64);
diff --git a/rust/arrow/src/arch/avx512.rs b/rust/arrow/src/arch/avx512.rs
deleted file mode 100644
index 264532f3594..00000000000
--- a/rust/arrow/src/arch/avx512.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub(crate) const AVX512_U8X64_LANES: usize = 64;
-
-#[target_feature(enable = "avx512f")]
-pub(crate) unsafe fn avx512_bin_and(left: &[u8], right: &[u8], res: &mut [u8]) {
-    use core::arch::x86_64::{__m512i, _mm512_and_si512, _mm512_loadu_epi64};
-
-    let l: __m512i = _mm512_loadu_epi64(left.as_ptr() as *const _);
-    let r: __m512i = _mm512_loadu_epi64(right.as_ptr() as *const _);
-    let f = _mm512_and_si512(l, r);
-    let s = &f as *const __m512i as *const u8;
-    let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
-    std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
-}
-
-#[target_feature(enable = "avx512f")]
-pub(crate) unsafe fn avx512_bin_or(left: &[u8], right: &[u8], res: &mut [u8]) {
-    use core::arch::x86_64::{__m512i, _mm512_loadu_epi64, _mm512_or_si512};
-
-    let l: __m512i = _mm512_loadu_epi64(left.as_ptr() as *const _);
-    let r: __m512i = _mm512_loadu_epi64(right.as_ptr() as *const _);
-    let f = _mm512_or_si512(l, r);
-    let s = &f as *const __m512i as *const u8;
-    let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
-    std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_bitwise_and_avx512() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe {
-            avx512_bin_and(&buf1, &buf2, &mut buf3);
-        };
-        for i in buf3.iter() {
-            assert_eq!(&0b00110000u8, i);
-        }
-    }
-
-    #[test]
-    fn test_bitwise_or_avx512() {
-        let buf1 = [0b00010011u8; 64];
-        let buf2 = [0b11100000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe {
-            avx512_bin_or(&buf1, &buf2, &mut buf3);
-        };
-        for i in buf3.iter() {
-            assert_eq!(&0b11110011u8, i);
-        }
-    }
-}
diff --git a/rust/arrow/src/arch/mod.rs b/rust/arrow/src/arch/mod.rs
deleted file mode 100644
index 56d8f4c0e2c..00000000000
--- a/rust/arrow/src/arch/mod.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-///
-/// Arch module contains architecture specific code.
-/// Be aware that not all machines have these specific operations available.
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub(crate) mod avx512;
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
deleted file mode 100644
index 95a3117417e..00000000000
--- a/rust/arrow/src/array/array.rs
+++ /dev/null
@@ -1,640 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt;
-use std::sync::Arc;
-use std::{any::Any, convert::TryFrom};
-
-use super::*;
-use crate::array::equal_json::JsonEqual;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::error::Result;
-use crate::ffi;
-
-/// Trait for dealing with different types of array at runtime when the type of the
-/// array is not known in advance.
-pub trait Array: fmt::Debug + Send + Sync + JsonEqual {
-    /// Returns the array as [`Any`](std::any::Any) so that it can be
-    /// downcasted to a specific implementation.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let batch = RecordBatch::try_new(
-    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
-    ///     vec![Arc::new(id)]
-    /// )?;
-    ///
-    /// let int32array = batch
-    ///     .column(0)
-    ///     .as_any()
-    ///     .downcast_ref::<Int32Array>()
-    ///     .expect("Failed to downcast");
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn as_any(&self) -> &Any;
-
-    /// Returns a reference to the underlying data of this array.
-    fn data(&self) -> &ArrayData;
-
-    /// Returns a reference-counted pointer to the underlying data of this array.
-    fn data_ref(&self) -> &ArrayData {
-        self.data()
-    }
-
-    /// Returns a reference to the [`DataType`](crate::datatypes::DataType) of this array.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::datatypes::DataType;
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    ///
-    /// assert_eq!(*array.data_type(), DataType::Int32);
-    /// ```
-    fn data_type(&self) -> &DataType {
-        self.data_ref().data_type()
-    }
-
-    /// Returns a zero-copy slice of this array with the indicated offset and length.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// // Make slice over the values [2, 3, 4]
-    /// let array_slice = array.slice(1, 3);
-    ///
-    /// assert_eq!(array_slice.as_ref(), &Int32Array::from(vec![2, 3, 4]));
-    /// ```
-    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
-        make_array(self.data_ref().slice(offset, length))
-    }
-
-    /// Returns the length (i.e., number of elements) of this array.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    ///
-    /// assert_eq!(array.len(), 5);
-    /// ```
-    fn len(&self) -> usize {
-        self.data_ref().len()
-    }
-
-    /// Returns whether this array is empty.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    ///
-    /// assert_eq!(array.is_empty(), false);
-    /// ```
-    fn is_empty(&self) -> bool {
-        self.data_ref().is_empty()
-    }
-
-    /// Returns the offset into the underlying data used by this array(-slice).
-    /// Note that the underlying data can be shared by many arrays.
-    /// This defaults to `0`.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// // Make slice over the values [2, 3, 4]
-    /// let array_slice = array.slice(1, 3);
-    ///
-    /// assert_eq!(array.offset(), 0);
-    /// assert_eq!(array_slice.offset(), 1);
-    /// ```
-    fn offset(&self) -> usize {
-        self.data_ref().offset()
-    }
-
-    /// Returns whether the element at `index` is null.
-    /// When using this function on a slice, the index is relative to the slice.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![Some(1), None]);
-    ///
-    /// assert_eq!(array.is_null(0), false);
-    /// assert_eq!(array.is_null(1), true);
-    /// ```
-    fn is_null(&self, index: usize) -> bool {
-        self.data_ref().is_null(index)
-    }
-
-    /// Returns whether the element at `index` is not null.
-    /// When using this function on a slice, the index is relative to the slice.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// let array = Int32Array::from(vec![Some(1), None]);
-    ///
-    /// assert_eq!(array.is_valid(0), true);
-    /// assert_eq!(array.is_valid(1), false);
-    /// ```
-    fn is_valid(&self, index: usize) -> bool {
-        self.data_ref().is_valid(index)
-    }
-
-    /// Returns the total number of null values in this array.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::{Array, Int32Array};
-    ///
-    /// // Construct an array with values [1, NULL, NULL]
-    /// let array = Int32Array::from(vec![Some(1), None, None]);
-    ///
-    /// assert_eq!(array.null_count(), 2);
-    /// ```
-    fn null_count(&self) -> usize {
-        self.data_ref().null_count()
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this array.
-    fn get_buffer_memory_size(&self) -> usize;
-
-    /// Returns the total number of bytes of memory occupied physically by this array.
-    fn get_array_memory_size(&self) -> usize;
-
-    /// returns two pointers that represent this array in the C Data Interface (FFI)
-    fn to_raw(
-        &self,
-    ) -> Result<(*const ffi::FFI_ArrowArray, *const ffi::FFI_ArrowSchema)> {
-        let data = self.data().clone();
-        let array = ffi::ArrowArray::try_from(data)?;
-        Ok(ffi::ArrowArray::into_raw(array))
-    }
-}
-
-/// A reference-counted reference to a generic `Array`.
-pub type ArrayRef = Arc<Array>;
-
-/// Constructs an array using the input `data`.
-/// Returns a reference-counted `Array` instance.
-pub fn make_array(data: ArrayData) -> ArrayRef {
-    match data.data_type() {
-        DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
-        DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
-        DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
-        DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
-        DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
-        DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
-        DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
-        DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
-        DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
-        DataType::Float16 => panic!("Float16 datatype not supported"),
-        DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
-        DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
-        DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
-        DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
-        DataType::Time32(TimeUnit::Second) => {
-            Arc::new(Time32SecondArray::from(data)) as ArrayRef
-        }
-        DataType::Time32(TimeUnit::Millisecond) => {
-            Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
-        }
-        DataType::Time64(TimeUnit::Microsecond) => {
-            Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Time64(TimeUnit::Nanosecond) => {
-            Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            Arc::new(TimestampSecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            Arc::new(DurationSecondArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
-        }
-        DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
-        DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
-        DataType::FixedSizeBinary(_) => {
-            Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef
-        }
-        DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
-        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
-        DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
-        DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
-        DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
-        DataType::Union(_) => Arc::new(UnionArray::from(data)) as ArrayRef,
-        DataType::FixedSizeList(_, _) => {
-            Arc::new(FixedSizeListArray::from(data)) as ArrayRef
-        }
-        DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
-            DataType::Int8 => {
-                Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef
-            }
-            DataType::Int16 => {
-                Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef
-            }
-            DataType::Int32 => {
-                Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef
-            }
-            DataType::Int64 => {
-                Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt8 => {
-                Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt16 => {
-                Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt32 => {
-                Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt64 => {
-                Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef
-            }
-            dt => panic!("Unexpected dictionary key type {:?}", dt),
-        },
-        DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
-        DataType::Decimal(_, _) => Arc::new(DecimalArray::from(data)) as ArrayRef,
-        dt => panic!("Unexpected data type {:?}", dt),
-    }
-}
-
-/// Creates a new empty array
-pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
-    let data = ArrayData::new_empty(data_type);
-    make_array(data)
-}
-/// Creates a new array of `data_type` of length `length` filled entirely of `NULL` values
-pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
-    // context: https://github.com/apache/arrow/pull/9469#discussion_r574761687
-    match data_type {
-        DataType::Null => Arc::new(NullArray::new(length)),
-        DataType::Boolean => {
-            let null_buf: Buffer = MutableBuffer::new_null(length).into();
-            make_array(ArrayData::new(
-                data_type.clone(),
-                length,
-                Some(length),
-                Some(null_buf.clone()),
-                0,
-                vec![null_buf],
-                vec![],
-            ))
-        }
-        DataType::Int8 => new_null_sized_array::<Int8Type>(data_type, length),
-        DataType::UInt8 => new_null_sized_array::<UInt8Type>(data_type, length),
-        DataType::Int16 => new_null_sized_array::<Int16Type>(data_type, length),
-        DataType::UInt16 => new_null_sized_array::<UInt16Type>(data_type, length),
-        DataType::Float16 => unreachable!(),
-        DataType::Int32 => new_null_sized_array::<Int32Type>(data_type, length),
-        DataType::UInt32 => new_null_sized_array::<UInt32Type>(data_type, length),
-        DataType::Float32 => new_null_sized_array::<Float32Type>(data_type, length),
-        DataType::Date32 => new_null_sized_array::<Date32Type>(data_type, length),
-        // expanding this into Date23{unit}Type results in needless branching
-        DataType::Time32(_) => new_null_sized_array::<Int32Type>(data_type, length),
-        DataType::Int64 => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::UInt64 => new_null_sized_array::<UInt64Type>(data_type, length),
-        DataType::Float64 => new_null_sized_array::<Float64Type>(data_type, length),
-        DataType::Date64 => new_null_sized_array::<Date64Type>(data_type, length),
-        // expanding this into Timestamp{unit}Type results in needless branching
-        DataType::Timestamp(_, _) => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::Time64(_) => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::Duration(_) => new_null_sized_array::<Int64Type>(data_type, length),
-        DataType::Interval(unit) => match unit {
-            IntervalUnit::YearMonth => {
-                new_null_sized_array::<IntervalYearMonthType>(data_type, length)
-            }
-            IntervalUnit::DayTime => {
-                new_null_sized_array::<IntervalDayTimeType>(data_type, length)
-            }
-        },
-        DataType::FixedSizeBinary(value_len) => make_array(ArrayData::new(
-            data_type.clone(),
-            length,
-            Some(length),
-            Some(MutableBuffer::new_null(length).into()),
-            0,
-            vec![Buffer::from(vec![0u8; *value_len as usize * length])],
-            vec![],
-        )),
-        DataType::Binary | DataType::Utf8 => {
-            new_null_binary_array::<i32>(data_type, length)
-        }
-        DataType::LargeBinary | DataType::LargeUtf8 => {
-            new_null_binary_array::<i64>(data_type, length)
-        }
-        DataType::List(field) => {
-            new_null_list_array::<i32>(data_type, field.data_type(), length)
-        }
-        DataType::LargeList(field) => {
-            new_null_list_array::<i64>(data_type, field.data_type(), length)
-        }
-        DataType::FixedSizeList(field, value_len) => make_array(ArrayData::new(
-            data_type.clone(),
-            length,
-            Some(length),
-            Some(MutableBuffer::new_null(length).into()),
-            0,
-            vec![],
-            vec![
-                new_null_array(field.data_type(), *value_len as usize * length)
-                    .data()
-                    .clone(),
-            ],
-        )),
-        DataType::Struct(fields) => make_array(ArrayData::new(
-            data_type.clone(),
-            length,
-            Some(length),
-            Some(MutableBuffer::new_null(length).into()),
-            0,
-            vec![],
-            fields
-                .iter()
-                .map(|field| ArrayData::new_empty(field.data_type()))
-                .collect(),
-        )),
-        DataType::Union(_) => {
-            unimplemented!("Creating null Union array not yet supported")
-        }
-        DataType::Dictionary(key, value) => {
-            let keys = new_null_array(key, length);
-            let keys = keys.data();
-
-            make_array(ArrayData::new(
-                data_type.clone(),
-                length,
-                Some(length),
-                keys.null_buffer().cloned(),
-                0,
-                keys.buffers().into(),
-                vec![new_empty_array(value.as_ref()).data().clone()],
-            ))
-        }
-        DataType::Decimal(_, _) => {
-            unimplemented!("Creating null Decimal array not yet supported")
-        }
-    }
-}
-
-#[inline]
-fn new_null_list_array<OffsetSize: OffsetSizeTrait>(
-    data_type: &DataType,
-    child_data_type: &DataType,
-    length: usize,
-) -> ArrayRef {
-    make_array(ArrayData::new(
-        data_type.clone(),
-        length,
-        Some(length),
-        Some(MutableBuffer::new_null(length).into()),
-        0,
-        vec![Buffer::from(
-            vec![OffsetSize::zero(); length + 1].to_byte_slice(),
-        )],
-        vec![ArrayData::new_empty(child_data_type)],
-    ))
-}
-
-#[inline]
-fn new_null_binary_array<OffsetSize: OffsetSizeTrait>(
-    data_type: &DataType,
-    length: usize,
-) -> ArrayRef {
-    make_array(ArrayData::new(
-        data_type.clone(),
-        length,
-        Some(length),
-        Some(MutableBuffer::new_null(length).into()),
-        0,
-        vec![
-            Buffer::from(vec![OffsetSize::zero(); length + 1].to_byte_slice()),
-            MutableBuffer::new(0).into(),
-        ],
-        vec![],
-    ))
-}
-
-#[inline]
-fn new_null_sized_array<T: ArrowPrimitiveType>(
-    data_type: &DataType,
-    length: usize,
-) -> ArrayRef {
-    make_array(ArrayData::new(
-        data_type.clone(),
-        length,
-        Some(length),
-        Some(MutableBuffer::new_null(length).into()),
-        0,
-        vec![Buffer::from(vec![0u8; length * T::get_byte_width()])],
-        vec![],
-    ))
-}
-
-/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
-/// # Safety
-/// Assumes that these pointers represent valid C Data Interfaces, both in memory
-/// representation and lifetime via the `release` mechanism.
-pub unsafe fn make_array_from_raw(
-    array: *const ffi::FFI_ArrowArray,
-    schema: *const ffi::FFI_ArrowSchema,
-) -> Result<ArrayRef> {
-    let array = ffi::ArrowArray::try_from_raw(array, schema)?;
-    let data = ArrayData::try_from(array)?;
-    Ok(make_array(data))
-}
-// Helper function for printing potentially long arrays.
-pub(super) fn print_long_array<A, F>(
-    array: &A,
-    f: &mut fmt::Formatter,
-    print_item: F,
-) -> fmt::Result
-where
-    A: Array,
-    F: Fn(&A, usize, &mut fmt::Formatter) -> fmt::Result,
-{
-    let head = std::cmp::min(10, array.len());
-
-    for i in 0..head {
-        if array.is_null(i) {
-            writeln!(f, "  null,")?;
-        } else {
-            write!(f, "  ")?;
-            print_item(&array, i, f)?;
-            writeln!(f, ",")?;
-        }
-    }
-    if array.len() > 10 {
-        if array.len() > 20 {
-            writeln!(f, "  ...{} elements...,", array.len() - 20)?;
-        }
-
-        let tail = std::cmp::max(head, array.len() - 10);
-
-        for i in tail..array.len() {
-            if array.is_null(i) {
-                writeln!(f, "  null,")?;
-            } else {
-                write!(f, "  ")?;
-                print_item(&array, i, f)?;
-                writeln!(f, ",")?;
-            }
-        }
-    }
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn test_empty_primitive() {
-        let array = new_empty_array(&DataType::Int32);
-        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(a.len(), 0);
-        let expected: &[i32] = &[];
-        assert_eq!(a.values(), expected);
-    }
-
-    #[test]
-    fn test_empty_variable_sized() {
-        let array = new_empty_array(&DataType::Utf8);
-        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(a.len(), 0);
-        assert_eq!(a.value_offsets()[0], 0i32);
-    }
-
-    #[test]
-    fn test_empty_list_primitive() {
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let array = new_empty_array(&data_type);
-        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(a.len(), 0);
-        assert_eq!(a.value_offsets()[0], 0i32);
-    }
-
-    #[test]
-    fn test_null_boolean() {
-        let array = new_null_array(&DataType::Boolean, 9);
-        let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(a.len(), 9);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_primitive() {
-        let array = new_null_array(&DataType::Int32, 9);
-        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(a.len(), 9);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_variable_sized() {
-        let array = new_null_array(&DataType::Utf8, 9);
-        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(a.len(), 9);
-        assert_eq!(a.value_offsets()[9], 0i32);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_list_primitive() {
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let array = new_null_array(&data_type, 9);
-        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(a.len(), 9);
-        assert_eq!(a.value_offsets()[9], 0i32);
-        for i in 0..9 {
-            assert!(a.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_null_dictionary() {
-        let values = vec![None, None, None, None, None, None, None, None, None]
-            as Vec<Option<&str>>;
-
-        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
-        let array = Arc::new(array) as ArrayRef;
-
-        let null_array = new_null_array(array.data_type(), 9);
-        assert_eq!(&array, &null_array);
-        assert_eq!(
-            array.data().buffers()[0].len(),
-            null_array.data().buffers()[0].len()
-        );
-    }
-}
diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs
deleted file mode 100644
index bd04afa4c1f..00000000000
--- a/rust/arrow/src/array/array_binary.rs
+++ /dev/null
@@ -1,1157 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::{From, TryInto};
-use std::fmt;
-use std::mem;
-use std::{any::Any, iter::FromIterator};
-
-use super::{
-    array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData,
-    FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
-};
-use crate::buffer::Buffer;
-use crate::error::ArrowError;
-use crate::util::bit_util;
-use crate::{buffer::MutableBuffer, datatypes::DataType};
-
-/// Like OffsetSizeTrait, but specialized for Binary
-// This allow us to expose a constant datatype for the GenericBinaryArray
-pub trait BinaryOffsetSizeTrait: OffsetSizeTrait {
-    const DATA_TYPE: DataType;
-}
-
-impl BinaryOffsetSizeTrait for i32 {
-    const DATA_TYPE: DataType = DataType::Binary;
-}
-
-impl BinaryOffsetSizeTrait for i64 {
-    const DATA_TYPE: DataType = DataType::LargeBinary;
-}
-
-pub struct GenericBinaryArray<OffsetSize: BinaryOffsetSizeTrait> {
-    data: ArrayData,
-    value_offsets: RawPtrBox<OffsetSize>,
-    value_data: RawPtrBox<u8>,
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
-    /// Returns the length for value at index `i`.
-    #[inline]
-    pub fn value_length(&self, i: usize) -> OffsetSize {
-        let offsets = self.value_offsets();
-        offsets[i + 1] - offsets[i]
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[1].clone()
-    }
-
-    /// Returns the offset values in the offsets buffer
-    #[inline]
-    pub fn value_offsets(&self) -> &[OffsetSize] {
-        // Soundness
-        //     pointer alignment & location is ensured by RawPtrBox
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_offsets.as_ptr().add(self.data.offset()),
-                self.len() + 1,
-            )
-        }
-    }
-
-    /// Returns the element at index `i` as bytes slice
-    /// # Safety
-    /// Caller is responsible for ensuring that the index is within the bounds of the array
-    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
-        let end = *self.value_offsets().get_unchecked(i + 1);
-        let start = *self.value_offsets().get_unchecked(i);
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-
-        // Safety of `to_isize().unwrap()`
-        // `start` and `end` are &OffsetSize, which is a generic type that implements the
-        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-        // both of which should cleanly cast to isize on an architecture that supports
-        // 32/64-bit offsets
-        std::slice::from_raw_parts(
-            self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-            (end - start).to_usize().unwrap(),
-        )
-    }
-
-    /// Returns the element at index `i` as bytes slice
-    pub fn value(&self, i: usize) -> &[u8] {
-        assert!(i < self.data.len(), "BinaryArray out of bounds access");
-        //Soundness: length checked above, offset buffer length is 1 larger than logical array length
-        let end = unsafe { self.value_offsets().get_unchecked(i + 1) };
-        let start = unsafe { self.value_offsets().get_unchecked(i) };
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-
-        // Safety of `to_isize().unwrap()`
-        // `start` and `end` are &OffsetSize, which is a generic type that implements the
-        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-        // both of which should cleanly cast to isize on an architecture that supports
-        // 32/64-bit offsets
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-                (*end - *start).to_usize().unwrap(),
-            )
-        }
-    }
-
-    /// Creates a [GenericBinaryArray] from a vector of byte slices
-    pub fn from_vec(v: Vec<&[u8]>) -> Self {
-        let mut offsets = Vec::with_capacity(v.len() + 1);
-        let mut values = Vec::new();
-        let mut length_so_far: OffsetSize = OffsetSize::zero();
-        offsets.push(length_so_far);
-        for s in &v {
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s);
-        }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        GenericBinaryArray::<OffsetSize>::from(array_data)
-    }
-
-    /// Creates a [GenericBinaryArray] from a vector of Optional (null) byte slices
-    pub fn from_opt_vec(v: Vec<Option<&[u8]>>) -> Self {
-        v.into_iter().collect()
-    }
-
-    fn from_list(v: GenericListArray<OffsetSize>) -> Self {
-        assert_eq!(
-            v.data_ref().child_data()[0].child_data().len(),
-            0,
-            "BinaryArray can only be created from list array of u8 values \
-             (i.e. List<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data_ref().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "BinaryArray can only be created from List<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(v.data_ref().buffers()[0].clone())
-            .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data_ref().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray<T> {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> GenericBinaryIter<'a, T> {
-        GenericBinaryIter::<'a, T>::new(&self)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let prefix = if OffsetSize::is_large() { "Large" } else { "" };
-
-        write!(f, "{}BinaryArray\n[\n", prefix)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> Array for GenericBinaryArray<OffsetSize> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [$name].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [$name].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayData>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.data_type(),
-            &<OffsetSize as BinaryOffsetSizeTrait>::DATA_TYPE,
-            "[Large]BinaryArray expects Datatype::[Large]Binary"
-        );
-        assert_eq!(
-            data.buffers().len(),
-            2,
-            "BinaryArray data should contain 2 buffers only (offsets and values)"
-        );
-        let offsets = data.buffers()[0].as_ptr();
-        let values = data.buffers()[1].as_ptr();
-        Self {
-            data,
-            value_offsets: unsafe { RawPtrBox::new(offsets) },
-            value_data: unsafe { RawPtrBox::new(values) },
-        }
-    }
-}
-
-impl<Ptr, OffsetSize: BinaryOffsetSizeTrait> FromIterator<Option<Ptr>>
-    for GenericBinaryArray<OffsetSize>
-where
-    Ptr: AsRef<[u8]>,
-{
-    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let mut offsets = Vec::with_capacity(data_len + 1);
-        let mut values = Vec::new();
-        let mut null_buf = MutableBuffer::new_null(data_len);
-        let mut length_so_far: OffsetSize = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        {
-            let null_slice = null_buf.as_slice_mut();
-
-            for (i, s) in iter.enumerate() {
-                if let Some(s) = s {
-                    let s = s.as_ref();
-                    bit_util::set_bit(null_slice, i);
-                    length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                    values.extend_from_slice(s);
-                }
-                // always add an element in offsets
-                offsets.push(length_so_far);
-            }
-        }
-
-        // calculate actual data_len, which may be different from the iterator's upper bound
-        let data_len = offsets.len() - 1;
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(data_len)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .null_bit_buffer(null_buf.into())
-            .build();
-        Self::from(array_data)
-    }
-}
-
-/// An array where each element is a byte whose maximum length is represented by a i32.
-pub type BinaryArray = GenericBinaryArray<i32>;
-
-/// An array where each element is a byte whose maximum length is represented by a i64.
-pub type LargeBinaryArray = GenericBinaryArray<i64>;
-
-impl<'a, T: BinaryOffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
-    type Item = Option<&'a [u8]>;
-    type IntoIter = GenericBinaryIter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        GenericBinaryIter::<'a, T>::new(self)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<Option<&[u8]>>>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn from(v: Vec<Option<&[u8]>>) -> Self {
-        GenericBinaryArray::<OffsetSize>::from_opt_vec(v)
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<&[u8]>>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn from(v: Vec<&[u8]>) -> Self {
-        GenericBinaryArray::<OffsetSize>::from_vec(v)
-    }
-}
-
-impl<T: BinaryOffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
-    fn from(v: GenericListArray<T>) -> Self {
-        GenericBinaryArray::<T>::from_list(v)
-    }
-}
-
-/// A type of `FixedSizeListArray` whose elements are binaries.
-pub struct FixedSizeBinaryArray {
-    data: ArrayData,
-    value_data: RawPtrBox<u8>,
-    length: i32,
-}
-
-impl FixedSizeBinaryArray {
-    /// Returns the element at index `i` as a byte slice.
-    pub fn value(&self, i: usize) -> &[u8] {
-        assert!(
-            i < self.data.len(),
-            "FixedSizeBinaryArray out of bounds access"
-        );
-        let offset = i.checked_add(self.data.offset()).unwrap();
-        unsafe {
-            let pos = self.value_offset_at(offset);
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
-        }
-    }
-
-    /// Returns the offset for the element at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.data.offset() + i)
-    }
-
-    /// Returns the length for an element.
-    ///
-    /// All elements have the same length as the array is a fixed size.
-    #[inline]
-    pub fn value_length(&self) -> i32 {
-        self.length
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[0].clone()
-    }
-
-    /// Create an array from an iterable argument of sparse byte slices.
-    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
-    /// contain `None` items.
-    ///
-    /// # Examles
-    ///
-    /// ```
-    /// use arrow::array::FixedSizeBinaryArray;
-    /// let input_arg = vec![
-    ///     None,
-    ///     Some(vec![7, 8]),
-    ///     Some(vec![9, 10]),
-    ///     None,
-    ///     Some(vec![13, 14]),
-    ///     None,
-    /// ];
-    /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
-    /// ```
-    ///
-    /// # Errors
-    ///
-    /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
-    where
-        T: Iterator<Item = Option<U>>,
-        U: AsRef<[u8]>,
-    {
-        let mut len = 0;
-        let mut size = None;
-        let mut byte = 0;
-        let mut null_buf = MutableBuffer::from_len_zeroed(0);
-        let mut buffer = MutableBuffer::from_len_zeroed(0);
-        let mut prepend = 0;
-        iter.try_for_each(|item| -> Result<(), ArrowError> {
-            // extend null bitmask by one byte per each 8 items
-            if byte == 0 {
-                null_buf.push(0u8);
-                byte = 8;
-            }
-            byte -= 1;
-
-            if let Some(slice) = item {
-                let slice = slice.as_ref();
-                if let Some(size) = size {
-                    if size != slice.len() {
-                        return Err(ArrowError::InvalidArgumentError(format!(
-                            "Nested array size mismatch: one is {}, and the other is {}",
-                            size,
-                            slice.len()
-                        )));
-                    }
-                } else {
-                    size = Some(slice.len());
-                    buffer.extend_zeros(slice.len() * prepend);
-                }
-                bit_util::set_bit(null_buf.as_slice_mut(), len);
-                buffer.extend_from_slice(slice);
-            } else if let Some(size) = size {
-                buffer.extend_zeros(size);
-            } else {
-                prepend += 1;
-            }
-
-            len += 1;
-
-            Ok(())
-        })?;
-
-        if len == 0 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Input iterable argument has no data".to_owned(),
-            ));
-        }
-
-        let size = size.unwrap_or(0);
-        let array_data = ArrayData::new(
-            DataType::FixedSizeBinary(size as i32),
-            len,
-            None,
-            Some(null_buf.into()),
-            0,
-            vec![buffer.into()],
-            vec![],
-        );
-        Ok(FixedSizeBinaryArray::from(array_data))
-    }
-
-    /// Create an array from an iterable argument of byte slices.
-    ///
-    /// # Examles
-    ///
-    /// ```
-    /// use arrow::array::FixedSizeBinaryArray;
-    /// let input_arg = vec![
-    ///     vec![1, 2],
-    ///     vec![3, 4],
-    ///     vec![5, 6],
-    /// ];
-    /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
-    /// ```
-    ///
-    /// # Errors
-    ///
-    /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
-    where
-        T: Iterator<Item = U>,
-        U: AsRef<[u8]>,
-    {
-        let mut len = 0;
-        let mut size = None;
-        let mut buffer = MutableBuffer::from_len_zeroed(0);
-        iter.try_for_each(|item| -> Result<(), ArrowError> {
-            let slice = item.as_ref();
-            if let Some(size) = size {
-                if size != slice.len() {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "Nested array size mismatch: one is {}, and the other is {}",
-                        size,
-                        slice.len()
-                    )));
-                }
-            } else {
-                size = Some(slice.len());
-            }
-            buffer.extend_from_slice(slice);
-
-            len += 1;
-
-            Ok(())
-        })?;
-
-        if len == 0 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Input iterable argument has no data".to_owned(),
-            ));
-        }
-
-        let size = size.unwrap_or(0);
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as i32))
-            .len(len)
-            .add_buffer(buffer.into())
-            .build();
-        Ok(FixedSizeBinaryArray::from(array_data))
-    }
-
-    #[inline]
-    fn value_offset_at(&self, i: usize) -> i32 {
-        self.length * i as i32
-    }
-}
-
-impl From<ArrayData> for FixedSizeBinaryArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "FixedSizeBinaryArray data should contain 1 buffer only (values)"
-        );
-        let value_data = data.buffers()[0].as_ptr();
-        let length = match data.data_type() {
-            DataType::FixedSizeBinary(len) => *len,
-            _ => panic!("Expected data type to be FixedSizeBinary"),
-        };
-        Self {
-            data,
-            value_data: unsafe { RawPtrBox::new(value_data) },
-            length,
-        }
-    }
-}
-
-/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
-impl From<FixedSizeListArray> for FixedSizeBinaryArray {
-    fn from(v: FixedSizeListArray) -> Self {
-        assert_eq!(
-            v.data_ref().child_data()[0].child_data().len(),
-            0,
-            "FixedSizeBinaryArray can only be created from list array of u8 values \
-             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data_ref().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
-            .len(v.len())
-            .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data_ref().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-}
-
-impl fmt::Debug for FixedSizeBinaryArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl Array for FixedSizeBinaryArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeBinaryArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [FixedSizeBinaryArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-/// A type of `DecimalArray` whose elements are binaries.
-pub struct DecimalArray {
-    data: ArrayData,
-    value_data: RawPtrBox<u8>,
-    precision: usize,
-    scale: usize,
-    length: i32,
-}
-
-impl DecimalArray {
-    /// Returns the element at index `i` as i128.
-    pub fn value(&self, i: usize) -> i128 {
-        assert!(i < self.data.len(), "DecimalArray out of bounds access");
-        let offset = i.checked_add(self.data.offset()).unwrap();
-        let raw_val = unsafe {
-            let pos = self.value_offset_at(offset);
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
-        };
-        let as_array = raw_val.try_into();
-        match as_array {
-            Ok(v) if raw_val.len() == 16 => i128::from_le_bytes(v),
-            _ => panic!("DecimalArray elements are not 128bit integers."),
-        }
-    }
-
-    /// Returns the offset for the element at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.data.offset() + i)
-    }
-
-    /// Returns the length for an element.
-    ///
-    /// All elements have the same length as the array is a fixed size.
-    #[inline]
-    pub fn value_length(&self) -> i32 {
-        self.length
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[0].clone()
-    }
-
-    #[inline]
-    fn value_offset_at(&self, i: usize) -> i32 {
-        self.length * i as i32
-    }
-
-    pub fn from_fixed_size_list_array(
-        v: FixedSizeListArray,
-        precision: usize,
-        scale: usize,
-    ) -> Self {
-        assert_eq!(
-            v.data_ref().child_data()[0].child_data().len(),
-            0,
-            "DecimalArray can only be created from list array of u8 values \
-             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data_ref().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "DecimalArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(DataType::Decimal(precision, scale))
-            .len(v.len())
-            .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data_ref().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-    pub fn precision(&self) -> usize {
-        self.precision
-    }
-
-    pub fn scale(&self) -> usize {
-        self.scale
-    }
-}
-
-impl From<ArrayData> for DecimalArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "DecimalArray data should contain 1 buffer only (values)"
-        );
-        let values = data.buffers()[0].as_ptr();
-        let (precision, scale) = match data.data_type() {
-            DataType::Decimal(precision, scale) => (*precision, *scale),
-            _ => panic!("Expected data type to be Decimal"),
-        };
-        let length = 16;
-        Self {
-            data,
-            value_data: unsafe { RawPtrBox::new(values) },
-            precision,
-            scale,
-            length,
-        }
-    }
-}
-
-impl fmt::Debug for DecimalArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl Array for DecimalArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [DecimalArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [DecimalArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        array::{LargeListArray, ListArray},
-        datatypes::Field,
-    };
-
-    use super::*;
-
-    #[test]
-    fn test_binary_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data = ArrayData::builder(DataType::Binary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = BinaryArray::from(array_data);
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
-            binary_array.value_unchecked(0)
-        });
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(2)
-        );
-        assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
-            binary_array.value_unchecked(2)
-        });
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(7, binary_array.value_length(2));
-        for i in 0..3 {
-            assert!(binary_array.is_valid(i));
-            assert!(!binary_array.is_null(i));
-        }
-
-        // Test binary array with offset
-        let array_data = ArrayData::builder(DataType::Binary)
-            .len(4)
-            .offset(1)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = BinaryArray::from(array_data);
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(1)
-        );
-        assert_eq!(5, binary_array.value_offsets()[0]);
-        assert_eq!(0, binary_array.value_length(0));
-        assert_eq!(5, binary_array.value_offsets()[1]);
-        assert_eq!(7, binary_array.value_length(1));
-    }
-
-    #[test]
-    fn test_large_binary_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let offsets: [i64; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data = ArrayData::builder(DataType::LargeBinary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = LargeBinaryArray::from(array_data);
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], unsafe {
-            binary_array.value_unchecked(0)
-        });
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([] as [u8; 0], unsafe { binary_array.value_unchecked(1) });
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(2)
-        );
-        assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
-            binary_array.value_unchecked(2)
-        });
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(7, binary_array.value_length(2));
-        for i in 0..3 {
-            assert!(binary_array.is_valid(i));
-            assert!(!binary_array.is_null(i));
-        }
-
-        // Test binary array with offset
-        let array_data = ArrayData::builder(DataType::LargeBinary)
-            .len(4)
-            .offset(1)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = LargeBinaryArray::from(array_data);
-        assert_eq!(
-            [b'p', b'a', b'r', b'q', b'u', b'e', b't'],
-            binary_array.value(1)
-        );
-        assert_eq!([b'p', b'a', b'r', b'q', b'u', b'e', b't'], unsafe {
-            binary_array.value_unchecked(1)
-        });
-        assert_eq!(5, binary_array.value_offsets()[0]);
-        assert_eq!(0, binary_array.value_length(0));
-        assert_eq!(5, binary_array.value_offsets()[1]);
-        assert_eq!(7, binary_array.value_length(1));
-    }
-
-    #[test]
-    fn test_binary_array_from_list_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let values_data = ArrayData::builder(DataType::UInt8)
-            .len(12)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data1 = ArrayData::builder(DataType::Binary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array1 = BinaryArray::from(array_data1);
-
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::UInt8, false)));
-        let array_data2 = ArrayData::builder(data_type)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(values_data)
-            .build();
-        let list_array = ListArray::from(array_data2);
-        let binary_array2 = BinaryArray::from(list_array);
-
-        assert_eq!(2, binary_array2.data().buffers().len());
-        assert_eq!(0, binary_array2.data().child_data().len());
-
-        assert_eq!(binary_array1.len(), binary_array2.len());
-        assert_eq!(binary_array1.null_count(), binary_array2.null_count());
-        assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
-        for i in 0..binary_array1.len() {
-            assert_eq!(binary_array1.value(i), binary_array2.value(i));
-            assert_eq!(binary_array1.value(i), unsafe {
-                binary_array2.value_unchecked(i)
-            });
-            assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
-        }
-    }
-
-    #[test]
-    fn test_large_binary_array_from_list_array() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let values_data = ArrayData::builder(DataType::UInt8)
-            .len(12)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let offsets: [i64; 4] = [0, 5, 5, 12];
-
-        // Array data: ["hello", "", "parquet"]
-        let array_data1 = ArrayData::builder(DataType::LargeBinary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array1 = LargeBinaryArray::from(array_data1);
-
-        let data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::UInt8, false)));
-        let array_data2 = ArrayData::builder(data_type)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(values_data)
-            .build();
-        let list_array = LargeListArray::from(array_data2);
-        let binary_array2 = LargeBinaryArray::from(list_array);
-
-        assert_eq!(2, binary_array2.data().buffers().len());
-        assert_eq!(0, binary_array2.data().child_data().len());
-
-        assert_eq!(binary_array1.len(), binary_array2.len());
-        assert_eq!(binary_array1.null_count(), binary_array2.null_count());
-        assert_eq!(binary_array1.value_offsets(), binary_array2.value_offsets());
-        for i in 0..binary_array1.len() {
-            assert_eq!(binary_array1.value(i), binary_array2.value(i));
-            assert_eq!(binary_array1.value(i), unsafe {
-                binary_array2.value_unchecked(i)
-            });
-            assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i));
-        }
-    }
-
-    fn test_generic_binary_array_from_opt_vec<T: BinaryOffsetSizeTrait>() {
-        let values: Vec<Option<&[u8]>> =
-            vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
-        let array = GenericBinaryArray::<T>::from_opt_vec(values);
-        assert_eq!(array.len(), 5);
-        assert_eq!(array.value(0), b"one");
-        assert_eq!(array.value(1), b"two");
-        assert_eq!(array.value(3), b"");
-        assert_eq!(array.value(4), b"three");
-        assert_eq!(array.is_null(0), false);
-        assert_eq!(array.is_null(1), false);
-        assert_eq!(array.is_null(2), true);
-        assert_eq!(array.is_null(3), false);
-        assert_eq!(array.is_null(4), false);
-    }
-
-    #[test]
-    fn test_large_binary_array_from_opt_vec() {
-        test_generic_binary_array_from_opt_vec::<i64>()
-    }
-
-    #[test]
-    fn test_binary_array_from_opt_vec() {
-        test_generic_binary_array_from_opt_vec::<i32>()
-    }
-
-    #[test]
-    fn test_binary_array_from_unbound_iter() {
-        // iterator that doesn't declare (upper) size bound
-        let value_iter = (0..)
-            .scan(0usize, |pos, i| {
-                if *pos < 10 {
-                    *pos += 1;
-                    Some(Some(format!("value {}", i)))
-                } else {
-                    // actually returns up to 10 values
-                    None
-                }
-            })
-            // limited using take()
-            .take(100);
-
-        let (_, upper_size_bound) = value_iter.size_hint();
-        // the upper bound, defined by take above, is 100
-        assert_eq!(upper_size_bound, Some(100));
-        let binary_array: BinaryArray = value_iter.collect();
-        // but the actual number of items in the array should be 10
-        assert_eq!(binary_array.len(), 10);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "assertion failed: `(left == right)`\n  left: `UInt32`,\n \
-                    right: `UInt8`: BinaryArray can only be created from List<u8> arrays, \
-                    mismatched data types."
-    )]
-    fn test_binary_array_from_incorrect_list_array() {
-        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
-        let values_data = ArrayData::builder(DataType::UInt32)
-            .len(12)
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-
-        let data_type =
-            DataType::List(Box::new(Field::new("item", DataType::UInt32, false)));
-        let array_data = ArrayData::builder(data_type)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(values_data)
-            .build();
-        let list_array = ListArray::from(array_data);
-        BinaryArray::from(list_array);
-    }
-
-    #[test]
-    fn test_fixed_size_binary_array() {
-        let values: [u8; 15] = *b"hellotherearrow";
-
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(3)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
-        assert_eq!(3, fixed_size_binary_array.len());
-        assert_eq!(0, fixed_size_binary_array.null_count());
-        assert_eq!(
-            [b'h', b'e', b'l', b'l', b'o'],
-            fixed_size_binary_array.value(0)
-        );
-        assert_eq!(
-            [b't', b'h', b'e', b'r', b'e'],
-            fixed_size_binary_array.value(1)
-        );
-        assert_eq!(
-            [b'a', b'r', b'r', b'o', b'w'],
-            fixed_size_binary_array.value(2)
-        );
-        assert_eq!(5, fixed_size_binary_array.value_length());
-        assert_eq!(10, fixed_size_binary_array.value_offset(2));
-        for i in 0..3 {
-            assert!(fixed_size_binary_array.is_valid(i));
-            assert!(!fixed_size_binary_array.is_null(i));
-        }
-
-        // Test binary array with offset
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(2)
-            .offset(1)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
-        assert_eq!(
-            [b't', b'h', b'e', b'r', b'e'],
-            fixed_size_binary_array.value(0)
-        );
-        assert_eq!(
-            [b'a', b'r', b'r', b'o', b'w'],
-            fixed_size_binary_array.value(1)
-        );
-        assert_eq!(2, fixed_size_binary_array.len());
-        assert_eq!(5, fixed_size_binary_array.value_offset(0));
-        assert_eq!(5, fixed_size_binary_array.value_length());
-        assert_eq!(10, fixed_size_binary_array.value_offset(1));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "FixedSizeBinaryArray can only be created from list array of u8 values \
-                    (i.e. FixedSizeList<PrimitiveArray<u8>>)."
-    )]
-    fn test_fixed_size_binary_array_from_incorrect_list_array() {
-        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
-        let values_data = ArrayData::builder(DataType::UInt32)
-            .len(12)
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .add_child_data(ArrayData::builder(DataType::Boolean).build())
-            .build();
-
-        let array_data = ArrayData::builder(DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Binary, false)),
-            4,
-        ))
-        .len(3)
-        .add_child_data(values_data)
-        .build();
-        let list_array = FixedSizeListArray::from(array_data);
-        FixedSizeBinaryArray::from(list_array);
-    }
-
-    #[test]
-    #[should_panic(expected = "BinaryArray out of bounds access")]
-    fn test_binary_array_get_value_index_out_of_bound() {
-        let values: [u8; 12] =
-            [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-        let array_data = ArrayData::builder(DataType::Binary)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let binary_array = BinaryArray::from(array_data);
-        binary_array.value(4);
-    }
-
-    #[test]
-    fn test_binary_array_fmt_debug() {
-        let values: [u8; 15] = *b"hellotherearrow";
-
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(3)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let arr = FixedSizeBinaryArray::from(array_data);
-        assert_eq!(
-            "FixedSizeBinaryArray<5>\n[\n  [104, 101, 108, 108, 111],\n  [116, 104, 101, 114, 101],\n  [97, 114, 114, 111, 119],\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_decimal_array() {
-        // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255];
-        let values: [u8; 32] = [
-            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
-            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        ];
-        let array_data = ArrayData::builder(DataType::Decimal(23, 6))
-            .len(2)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let decimal_array = DecimalArray::from(array_data);
-        assert_eq!(8_887_000_000, decimal_array.value(0));
-        assert_eq!(-8_887_000_000, decimal_array.value(1));
-        assert_eq!(16, decimal_array.value_length());
-    }
-
-    #[test]
-    fn test_decimal_array_fmt_debug() {
-        let values: [u8; 32] = [
-            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
-            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        ];
-        let array_data = ArrayData::builder(DataType::Decimal(23, 6))
-            .len(2)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        let arr = DecimalArray::from(array_data);
-        assert_eq!(
-            "DecimalArray<23, 6>\n[\n  8887000000,\n  -8887000000,\n]",
-            format!("{:?}", arr)
-        );
-    }
-}
diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs
deleted file mode 100644
index 67af85d167f..00000000000
--- a/rust/arrow/src/array/array_boolean.rs
+++ /dev/null
@@ -1,291 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::borrow::Borrow;
-use std::convert::From;
-use std::iter::{FromIterator, IntoIterator};
-use std::mem;
-use std::{any::Any, fmt};
-
-use super::*;
-use super::{array::print_long_array, raw_pointer::RawPtrBox};
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::util::bit_util;
-
-/// Array of bools
-pub struct BooleanArray {
-    data: ArrayData,
-    /// Pointer to the value array. The lifetime of this must be <= to the value buffer
-    /// stored in `data`, so it's safe to store.
-    raw_values: RawPtrBox<u8>,
-}
-
-impl fmt::Debug for BooleanArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "BooleanArray\n[\n")?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl BooleanArray {
-    /// Returns the length of this array.
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns whether this array is empty.
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    // Returns a new boolean array builder
-    pub fn builder(capacity: usize) -> BooleanBuilder {
-        BooleanBuilder::new(capacity)
-    }
-
-    /// Returns a `Buffer` holding all the values of this array.
-    ///
-    /// Note this doesn't take the offset of this array into account.
-    pub fn values(&self) -> &Buffer {
-        &self.data.buffers()[0]
-    }
-
-    /// Returns the boolean value at index `i`.
-    ///
-    /// # Safety
-    /// This doesn't check bounds, the caller must ensure that index < self.len()
-    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
-        let offset = i + self.offset();
-        bit_util::get_bit_raw(self.raw_values.as_ptr(), offset)
-    }
-
-    /// Returns the boolean value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    pub fn value(&self, i: usize) -> bool {
-        debug_assert!(i < self.len());
-        unsafe { self.value_unchecked(i) }
-    }
-}
-
-impl Array for BooleanArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [BooleanArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [BooleanArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl From<Vec<bool>> for BooleanArray {
-    fn from(data: Vec<bool>) -> Self {
-        let mut mut_buf = MutableBuffer::new_null(data.len());
-        {
-            let mut_slice = mut_buf.as_slice_mut();
-            for (i, b) in data.iter().enumerate() {
-                if *b {
-                    bit_util::set_bit(mut_slice, i);
-                }
-            }
-        }
-        let array_data = ArrayData::builder(DataType::Boolean)
-            .len(data.len())
-            .add_buffer(mut_buf.into())
-            .build();
-        BooleanArray::from(array_data)
-    }
-}
-
-impl From<Vec<Option<bool>>> for BooleanArray {
-    fn from(data: Vec<Option<bool>>) -> Self {
-        BooleanArray::from_iter(data.iter())
-    }
-}
-
-impl From<ArrayData> for BooleanArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "BooleanArray data should contain a single buffer only (values buffer)"
-        );
-        let ptr = data.buffers()[0].as_ptr();
-        Self {
-            data,
-            raw_values: unsafe { RawPtrBox::new(ptr) },
-        }
-    }
-}
-
-impl<'a> IntoIterator for &'a BooleanArray {
-    type Item = Option<bool>;
-    type IntoIter = BooleanIter<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        BooleanIter::<'a>::new(self)
-    }
-}
-
-impl<'a> BooleanArray {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> BooleanIter<'a> {
-        BooleanIter::<'a>::new(&self)
-    }
-}
-
-impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
-    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let num_bytes = bit_util::ceil(data_len, 8);
-        let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
-        let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);
-
-        let data = val_buf.as_slice_mut();
-
-        let null_slice = null_buf.as_slice_mut();
-        iter.enumerate().for_each(|(i, item)| {
-            if let Some(a) = item.borrow() {
-                bit_util::set_bit(null_slice, i);
-                if *a {
-                    bit_util::set_bit(data, i);
-                }
-            }
-        });
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            data_len,
-            None,
-            Some(null_buf.into()),
-            0,
-            vec![val_buf.into()],
-            vec![],
-        );
-        BooleanArray::from(data)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::buffer::Buffer;
-    use crate::datatypes::DataType;
-
-    #[test]
-    fn test_boolean_fmt_debug() {
-        let arr = BooleanArray::from(vec![true, false, false]);
-        assert_eq!(
-            "BooleanArray\n[\n  true,\n  false,\n  false,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_boolean_with_null_fmt_debug() {
-        let mut builder = BooleanArray::builder(3);
-        builder.append_value(true).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(false).unwrap();
-        let arr = builder.finish();
-        assert_eq!(
-            "BooleanArray\n[\n  true,\n  null,\n  false,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_boolean_array_from_vec() {
-        let buf = Buffer::from([10_u8]);
-        let arr = BooleanArray::from(vec![false, true, false, true]);
-        assert_eq!(&buf, arr.values());
-        assert_eq!(4, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..4 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
-        }
-    }
-
-    #[test]
-    fn test_boolean_array_from_vec_option() {
-        let buf = Buffer::from([10_u8]);
-        let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
-        assert_eq!(&buf, arr.values());
-        assert_eq!(4, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        for i in 0..4 {
-            if i == 2 {
-                assert!(arr.is_null(i));
-                assert!(!arr.is_valid(i));
-            } else {
-                assert!(!arr.is_null(i));
-                assert!(arr.is_valid(i));
-                assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
-            }
-        }
-    }
-
-    #[test]
-    fn test_boolean_array_builder() {
-        // Test building a boolean array with ArrayData builder and offset
-        // 000011011
-        let buf = Buffer::from([27_u8]);
-        let buf2 = buf.clone();
-        let data = ArrayData::builder(DataType::Boolean)
-            .len(5)
-            .offset(2)
-            .add_buffer(buf)
-            .build();
-        let arr = BooleanArray::from(data);
-        assert_eq!(&buf2, arr.values());
-        assert_eq!(5, arr.len());
-        assert_eq!(2, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..3 {
-            assert_eq!(i != 0, arr.value(i), "failed at {}", i);
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "BooleanArray data should contain a single buffer only \
-                               (values buffer)")]
-    fn test_boolean_array_invalid_buffer_len() {
-        let data = ArrayData::builder(DataType::Boolean).len(5).build();
-        BooleanArray::from(data);
-    }
-}
diff --git a/rust/arrow/src/array/array_dictionary.rs b/rust/arrow/src/array/array_dictionary.rs
deleted file mode 100644
index 5948658157e..00000000000
--- a/rust/arrow/src/array/array_dictionary.rs
+++ /dev/null
@@ -1,408 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::iter::IntoIterator;
-use std::mem;
-use std::{convert::From, iter::FromIterator};
-
-use super::{
-    make_array, Array, ArrayData, ArrayRef, PrimitiveArray, PrimitiveBuilder,
-    StringArray, StringBuilder, StringDictionaryBuilder,
-};
-use crate::datatypes::ArrowNativeType;
-use crate::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType, DataType};
-
-/// A dictionary array where each element is a single value indexed by an integer key.
-/// This is mostly used to represent strings or a limited set of primitive types as integers,
-/// for example when doing NLP analysis or representing chromosomes by name.
-///
-/// Example **with nullable** data:
-///
-/// ```
-/// use arrow::array::{DictionaryArray, Int8Array};
-/// use arrow::datatypes::Int8Type;
-/// let test = vec!["a", "a", "b", "c"];
-/// let array : DictionaryArray<Int8Type> = test.iter().map(|&x| if x == "b" {None} else {Some(x)}).collect();
-/// assert_eq!(array.keys(), &Int8Array::from(vec![Some(0), Some(0), None, Some(1)]));
-/// ```
-///
-/// Example **without nullable** data:
-///
-/// ```
-/// use arrow::array::{DictionaryArray, Int8Array};
-/// use arrow::datatypes::Int8Type;
-/// let test = vec!["a", "a", "b", "c"];
-/// let array : DictionaryArray<Int8Type> = test.into_iter().collect();
-/// assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
-/// ```
-pub struct DictionaryArray<K: ArrowPrimitiveType> {
-    /// Data of this dictionary. Note that this is _not_ compatible with the C Data interface,
-    /// as, in the current implementation, `values` below are the first child of this struct.
-    data: ArrayData,
-
-    /// The keys of this dictionary. These are constructed from the buffer and null bitmap
-    /// of `data`.
-    /// Also, note that these do not correspond to the true values of this array. Rather, they map
-    /// to the real values.
-    keys: PrimitiveArray<K>,
-
-    /// Array of dictionary values (can by any DataType).
-    values: ArrayRef,
-
-    /// Values are ordered.
-    is_ordered: bool,
-}
-
-impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
-    /// Return an iterator to the keys of this dictionary.
-    pub fn keys(&self) -> &PrimitiveArray<K> {
-        &self.keys
-    }
-
-    /// Returns an array view of the keys of this dictionary
-    pub fn keys_array(&self) -> PrimitiveArray<K> {
-        let data = self.data_ref();
-        let keys_data = ArrayData::new(
-            K::DATA_TYPE,
-            data.len(),
-            Some(data.null_count()),
-            data.null_buffer().cloned(),
-            data.offset(),
-            data.buffers().to_vec(),
-            vec![],
-        );
-        PrimitiveArray::<K>::from(keys_data)
-    }
-
-    /// Returns the lookup key by doing reverse dictionary lookup
-    pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
-        let rd_buf: &StringArray =
-            self.values.as_any().downcast_ref::<StringArray>().unwrap();
-
-        (0..rd_buf.len())
-            .position(|i| rd_buf.value(i) == value)
-            .map(K::Native::from_usize)
-            .flatten()
-    }
-
-    /// Returns an `ArrayRef` to the dictionary values.
-    pub fn values(&self) -> ArrayRef {
-        self.values.clone()
-    }
-
-    /// Returns a clone of the value type of this list.
-    pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
-    }
-
-    /// The length of the dictionary is the length of the keys array.
-    pub fn len(&self) -> usize {
-        self.keys.len()
-    }
-
-    /// Whether this dictionary is empty
-    pub fn is_empty(&self) -> bool {
-        self.keys.is_empty()
-    }
-
-    // Currently exists for compatibility purposes with Arrow IPC.
-    pub fn is_ordered(&self) -> bool {
-        self.is_ordered
-    }
-}
-
-/// Constructs a `DictionaryArray` from an array data reference.
-impl<T: ArrowPrimitiveType> From<ArrayData> for DictionaryArray<T> {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "DictionaryArray data should contain a single buffer only (keys)."
-        );
-        assert_eq!(
-            data.child_data().len(),
-            1,
-            "DictionaryArray should contain a single child array (values)."
-        );
-
-        if let DataType::Dictionary(key_data_type, _) = data.data_type() {
-            if key_data_type.as_ref() != &T::DATA_TYPE {
-                panic!("DictionaryArray's data type must match.")
-            };
-            // create a zero-copy of the keys' data
-            let keys = PrimitiveArray::<T>::from(ArrayData::new(
-                T::DATA_TYPE,
-                data.len(),
-                Some(data.null_count()),
-                data.null_buffer().cloned(),
-                data.offset(),
-                data.buffers().to_vec(),
-                vec![],
-            ));
-            let values = make_array(data.child_data()[0].clone());
-            Self {
-                data,
-                keys,
-                values,
-                is_ordered: false,
-            }
-        } else {
-            panic!("DictionaryArray must have Dictionary data type.")
-        }
-    }
-}
-
-/// Constructs a `DictionaryArray` from an iterator of optional strings.
-impl<'a, T: ArrowPrimitiveType + ArrowDictionaryKeyType> FromIterator<Option<&'a str>>
-    for DictionaryArray<T>
-{
-    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
-        let it = iter.into_iter();
-        let (lower, _) = it.size_hint();
-        let key_builder = PrimitiveBuilder::<T>::new(lower);
-        let value_builder = StringBuilder::new(256);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        it.for_each(|i| {
-            if let Some(i) = i {
-                // Note: impl ... for Result<DictionaryArray<T>> fails with
-                // error[E0117]: only traits defined in the current crate can be implemented for arbitrary types
-                builder
-                    .append(i)
-                    .expect("Unable to append a value to a dictionary array.");
-            } else {
-                builder
-                    .append_null()
-                    .expect("Unable to append a null value to a dictionary array.");
-            }
-        });
-
-        builder.finish()
-    }
-}
-
-/// Constructs a `DictionaryArray` from an iterator of strings.
-impl<'a, T: ArrowPrimitiveType + ArrowDictionaryKeyType> FromIterator<&'a str>
-    for DictionaryArray<T>
-{
-    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
-        let it = iter.into_iter();
-        let (lower, _) = it.size_hint();
-        let key_builder = PrimitiveBuilder::<T>::new(lower);
-        let value_builder = StringBuilder::new(256);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        it.for_each(|i| {
-            builder
-                .append(i)
-                .expect("Unable to append a value to a dictionary array.");
-        });
-
-        builder.finish()
-    }
-}
-
-impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    fn get_buffer_memory_size(&self) -> usize {
-        // Since both `keys` and `values` derive (are references from) `data`, we only need to account for `data`.
-        self.data.get_buffer_memory_size()
-    }
-
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size()
-            + self.keys.get_array_memory_size()
-            + self.values.get_array_memory_size()
-            + mem::size_of_val(self)
-    }
-}
-
-impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(
-            f,
-            "DictionaryArray {{keys: {:?} values: {:?}}}",
-            self.keys, self.values
-        )
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::{
-        array::Int16Array,
-        datatypes::{Int32Type, Int8Type, UInt32Type, UInt8Type},
-    };
-    use crate::{
-        array::Int16DictionaryArray, array::PrimitiveDictionaryBuilder,
-        datatypes::DataType,
-    };
-    use crate::{buffer::Buffer, datatypes::ToByteSlice};
-
-    #[test]
-    fn test_dictionary_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int8)
-            .len(8)
-            .add_buffer(Buffer::from(
-                &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
-            ))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        let keys = Buffer::from(&[2_i16, 3, 4].to_byte_slice());
-
-        // Construct a dictionary array from the above two
-        let key_type = DataType::Int16;
-        let value_type = DataType::Int8;
-        let dict_data_type =
-            DataType::Dictionary(Box::new(key_type), Box::new(value_type));
-        let dict_data = ArrayData::builder(dict_data_type.clone())
-            .len(3)
-            .add_buffer(keys.clone())
-            .add_child_data(value_data.clone())
-            .build();
-        let dict_array = Int16DictionaryArray::from(dict_data);
-
-        let values = dict_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int8, dict_array.value_type());
-        assert_eq!(3, dict_array.len());
-
-        // Null count only makes sense in terms of the component arrays.
-        assert_eq!(0, dict_array.null_count());
-        assert_eq!(0, dict_array.values().null_count());
-        assert_eq!(dict_array.keys(), &Int16Array::from(vec![2_i16, 3, 4]));
-
-        // Now test with a non-zero offset
-        let dict_data = ArrayData::builder(dict_data_type)
-            .len(2)
-            .offset(1)
-            .add_buffer(keys)
-            .add_child_data(value_data.clone())
-            .build();
-        let dict_array = Int16DictionaryArray::from(dict_data);
-
-        let values = dict_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int8, dict_array.value_type());
-        assert_eq!(2, dict_array.len());
-        assert_eq!(dict_array.keys(), &Int16Array::from(vec![3_i16, 4]));
-    }
-
-    #[test]
-    fn test_dictionary_array_fmt_debug() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        let array = builder.finish();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n  0,\n  null,\n  1,\n] values: PrimitiveArray<UInt32>\n[\n  12345678,\n  22345678,\n]}\n",
-            format!("{:?}", array)
-        );
-
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(20);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        for _ in 0..20 {
-            builder.append(1).unwrap();
-        }
-        let array = builder.finish();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<UInt8>\n[\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n  0,\n] values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
-            format!("{:?}", array)
-        );
-    }
-
-    #[test]
-    fn test_dictionary_array_from_iter() {
-        let test = vec!["a", "a", "b", "c"];
-        let array: DictionaryArray<Int8Type> = test
-            .iter()
-            .map(|&x| if x == "b" { None } else { Some(x) })
-            .collect();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  null,\n  1,\n] values: StringArray\n[\n  \"a\",\n  \"c\",\n]}\n",
-            format!("{:?}", array)
-        );
-
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-        assert_eq!(
-            "DictionaryArray {keys: PrimitiveArray<Int8>\n[\n  0,\n  0,\n  1,\n  2,\n] values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
-            format!("{:?}", array)
-        );
-    }
-
-    #[test]
-    fn test_dictionary_array_reverse_lookup_key() {
-        let test = vec!["a", "a", "b", "c"];
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-
-        assert_eq!(array.lookup_key("c"), Some(2));
-
-        // Direction of building a dictionary is the iterator direction
-        let test = vec!["t3", "t3", "t2", "t2", "t1", "t3", "t4", "t1", "t0"];
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-
-        assert_eq!(array.lookup_key("t1"), Some(2));
-        assert_eq!(array.lookup_key("non-existent"), None);
-    }
-
-    #[test]
-    fn test_dictionary_keys_as_primitive_array() {
-        let test = vec!["a", "b", "c", "a"];
-        let array: DictionaryArray<Int8Type> = test.into_iter().collect();
-
-        let keys = array.keys_array();
-        assert_eq!(&DataType::Int8, keys.data_type());
-        assert_eq!(0, keys.null_count());
-        assert_eq!(&[0, 1, 2, 0], keys.values());
-    }
-
-    #[test]
-    fn test_dictionary_keys_as_primitive_array_with_null() {
-        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
-        let array: DictionaryArray<Int32Type> = test.into_iter().collect();
-
-        let keys = array.keys_array();
-        assert_eq!(&DataType::Int32, keys.data_type());
-        assert_eq!(3, keys.null_count());
-
-        assert_eq!(true, keys.is_valid(0));
-        assert_eq!(false, keys.is_valid(1));
-        assert_eq!(true, keys.is_valid(2));
-        assert_eq!(false, keys.is_valid(3));
-        assert_eq!(false, keys.is_valid(4));
-        assert_eq!(true, keys.is_valid(5));
-
-        assert_eq!(0, keys.value(0));
-        assert_eq!(1, keys.value(2));
-        assert_eq!(0, keys.value(5));
-    }
-}
diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs
deleted file mode 100644
index 0e334631adf..00000000000
--- a/rust/arrow/src/array/array_list.rs
+++ /dev/null
@@ -1,1056 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::mem;
-
-use num::Num;
-
-use super::{
-    array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayData,
-    ArrayRef, BooleanBufferBuilder, GenericListArrayIter, PrimitiveArray,
-};
-use crate::{
-    buffer::MutableBuffer,
-    datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType, Field},
-    error::ArrowError,
-};
-
-/// trait declaring an offset size, relevant for i32 vs i64 array types.
-pub trait OffsetSizeTrait: ArrowNativeType + Num + Ord + std::ops::AddAssign {
-    fn is_large() -> bool;
-}
-
-impl OffsetSizeTrait for i32 {
-    #[inline]
-    fn is_large() -> bool {
-        false
-    }
-}
-
-impl OffsetSizeTrait for i64 {
-    #[inline]
-    fn is_large() -> bool {
-        true
-    }
-}
-
-pub struct GenericListArray<OffsetSize> {
-    data: ArrayData,
-    values: ArrayRef,
-    value_offsets: RawPtrBox<OffsetSize>,
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
-    /// Returns a reference to the values of this list.
-    pub fn values(&self) -> ArrayRef {
-        self.values.clone()
-    }
-
-    /// Returns a clone of the value type of this list.
-    pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
-    }
-
-    /// Returns ith value of this list array.
-    /// # Safety
-    /// Caller must ensure that the index is within the array bounds
-    pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
-        let end = *self.value_offsets().get_unchecked(i + 1);
-        let start = *self.value_offsets().get_unchecked(i);
-        self.values
-            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
-    }
-
-    /// Returns ith value of this list array.
-    pub fn value(&self, i: usize) -> ArrayRef {
-        let end = self.value_offsets()[i + 1];
-        let start = self.value_offsets()[i];
-        self.values
-            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
-    }
-
-    /// Returns the offset values in the offsets buffer
-    #[inline]
-    pub fn value_offsets(&self) -> &[OffsetSize] {
-        // Soundness
-        //     pointer alignment & location is ensured by RawPtrBox
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_offsets.as_ptr().add(self.data.offset()),
-                self.len() + 1,
-            )
-        }
-    }
-
-    /// Returns the length for value at index `i`.
-    #[inline]
-    pub fn value_length(&self, i: usize) -> OffsetSize {
-        let offsets = self.value_offsets();
-        offsets[i + 1] - offsets[i]
-    }
-
-    /// constructs a new iterator
-    pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
-        GenericListArrayIter::<'a, OffsetSize>::new(&self)
-    }
-
-    #[inline]
-    fn get_type(data_type: &DataType) -> Option<&DataType> {
-        if OffsetSize::is_large() {
-            if let DataType::LargeList(child) = data_type {
-                Some(child.data_type())
-            } else {
-                None
-            }
-        } else if let DataType::List(child) = data_type {
-            Some(child.data_type())
-        } else {
-            None
-        }
-    }
-
-    /// Creates a [`GenericListArray`] from an iterator of primitive values
-    /// # Example
-    /// ```
-    /// # use arrow::array::ListArray;
-    /// # use arrow::datatypes::Int32Type;
-    /// let data = vec![
-    ///    Some(vec![Some(0), Some(1), Some(2)]),
-    ///    None,
-    ///    Some(vec![Some(3), None, Some(5)]),
-    ///    Some(vec![Some(6), Some(7)]),
-    /// ];
-    /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
-    /// println!("{:?}", list_array);
-    /// ```
-    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
-    where
-        T: ArrowPrimitiveType,
-        P: AsRef<[Option<<T as ArrowPrimitiveType>::Native>]>
-            + IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
-        I: IntoIterator<Item = Option<P>>,
-    {
-        let iterator = iter.into_iter();
-        let (lower, _) = iterator.size_hint();
-
-        let mut offsets =
-            MutableBuffer::new((lower + 1) * std::mem::size_of::<OffsetSize>());
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        let mut null_buf = BooleanBufferBuilder::new(lower);
-
-        let values: PrimitiveArray<T> = iterator
-            .filter_map(|maybe_slice| {
-                // regardless of whether the item is Some, the offsets and null buffers must be updated.
-                match &maybe_slice {
-                    Some(x) => {
-                        length_so_far +=
-                            OffsetSize::from_usize(x.as_ref().len()).unwrap();
-                        null_buf.append(true);
-                    }
-                    None => null_buf.append(false),
-                };
-                offsets.push(length_so_far);
-                maybe_slice
-            })
-            .flatten()
-            .collect();
-
-        let field = Box::new(Field::new("item", T::DATA_TYPE, true));
-        let data_type = if OffsetSize::is_large() {
-            DataType::LargeList(field)
-        } else {
-            DataType::List(field)
-        };
-        let data = ArrayData::builder(data_type)
-            .len(null_buf.len())
-            .add_buffer(offsets.into())
-            .add_child_data(values.data().clone())
-            .null_bit_buffer(null_buf.into())
-            .build();
-        Self::from(data)
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
-    fn from(data: ArrayData) -> Self {
-        Self::try_new_from_array_data(data).expect(
-            "Expected infallable creation of GenericListArray from ArrayDataRef failed",
-        )
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
-    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
-        if data.buffers().len() != 1 {
-            return Err(ArrowError::InvalidArgumentError(
-                format!("ListArray data should contain a single buffer only (value offsets), had {}",
-                        data.len())));
-        }
-
-        if data.child_data().len() != 1 {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "ListArray should contain a single child array (values array), had {}",
-                data.child_data().len()
-            )));
-        }
-
-        let values = data.child_data()[0].clone();
-
-        if let Some(child_data_type) = Self::get_type(data.data_type()) {
-            if values.data_type() != child_data_type {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "[Large]ListArray's child datatype {:?} does not \
-                             correspond to the List's datatype {:?}",
-                    values.data_type(),
-                    child_data_type
-                )));
-            }
-        } else {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
-                data.data_type()
-            )));
-        }
-
-        let values = make_array(values);
-        let value_offsets = data.buffers()[0].as_ptr();
-
-        let value_offsets = unsafe { RawPtrBox::<OffsetSize>::new(value_offsets) };
-        unsafe {
-            if !(*value_offsets.as_ptr().offset(0)).is_zero() {
-                return Err(ArrowError::InvalidArgumentError(String::from(
-                    "offsets do not start at zero",
-                )));
-            }
-        }
-        Ok(Self {
-            data,
-            values,
-            value_offsets,
-        })
-    }
-}
-
-impl<OffsetSize: 'static + OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [ListArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [ListArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericListArray<OffsetSize> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let prefix = if OffsetSize::is_large() { "Large" } else { "" };
-
-        write!(f, "{}ListArray\n[\n", prefix)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-/// A list array where each element is a variable-sized sequence of values with the same
-/// type whose memory offsets between elements are represented by a i32.
-pub type ListArray = GenericListArray<i32>;
-
-/// A list array where each element is a variable-sized sequence of values with the same
-/// type whose memory offsets between elements are represented by a i64.
-pub type LargeListArray = GenericListArray<i64>;
-
-/// A list array where each element is a fixed-size sequence of values with the same
-/// type whose maximum length is represented by a i32.
-pub struct FixedSizeListArray {
-    data: ArrayData,
-    values: ArrayRef,
-    length: i32,
-}
-
-impl FixedSizeListArray {
-    /// Returns a reference to the values of this list.
-    pub fn values(&self) -> ArrayRef {
-        self.values.clone()
-    }
-
-    /// Returns a clone of the value type of this list.
-    pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
-    }
-
-    /// Returns ith value of this list array.
-    pub fn value(&self, i: usize) -> ArrayRef {
-        self.values
-            .slice(self.value_offset(i) as usize, self.value_length() as usize)
-    }
-
-    /// Returns the offset for value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.data.offset() + i)
-    }
-
-    /// Returns the length for value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    #[inline]
-    pub const fn value_length(&self) -> i32 {
-        self.length
-    }
-
-    #[inline]
-    const fn value_offset_at(&self, i: usize) -> i32 {
-        i as i32 * self.length
-    }
-}
-
-impl From<ArrayData> for FixedSizeListArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            0,
-            "FixedSizeListArray data should not contain a buffer for value offsets"
-        );
-        assert_eq!(
-            data.child_data().len(),
-            1,
-            "FixedSizeListArray should contain a single child array (values array)"
-        );
-        let values = make_array(data.child_data()[0].clone());
-        let length = match data.data_type() {
-            DataType::FixedSizeList(_, len) => {
-                if *len > 0 {
-                    // check that child data is multiple of length
-                    assert_eq!(
-                        values.len() % *len as usize,
-                        0,
-                        "FixedSizeListArray child array length should be a multiple of {}",
-                        len
-                    );
-                }
-
-                *len
-            }
-            _ => {
-                panic!("FixedSizeListArray data should contain a FixedSizeList data type")
-            }
-        };
-        Self {
-            data,
-            values,
-            length,
-        }
-    }
-}
-
-impl Array for FixedSizeListArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeListArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [FixedSizeListArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size()
-            + self.values().get_array_memory_size()
-            + mem::size_of_val(self)
-    }
-}
-
-impl fmt::Debug for FixedSizeListArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FixedSizeListArray<{}>\n[\n", self.value_length())?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        alloc,
-        array::ArrayData,
-        array::Int32Array,
-        buffer::Buffer,
-        datatypes::Field,
-        datatypes::{Int32Type, ToByteSlice},
-        util::bit_util,
-    };
-
-    use super::*;
-
-    fn create_from_buffers() -> ListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data)
-    }
-
-    #[test]
-    fn test_from_iter_primitive() {
-        let data = vec![
-            Some(vec![Some(0), Some(1), Some(2)]),
-            Some(vec![Some(3), Some(4), Some(5)]),
-            Some(vec![Some(6), Some(7)]),
-        ];
-        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
-
-        let another = create_from_buffers();
-        assert_eq!(list_array, another)
-    }
-
-    #[test]
-    fn test_list_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type.clone())
-            .len(3)
-            .add_buffer(value_offsets.clone())
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        assert_eq!(
-            0,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            0,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-
-        // Now test with a non-zero offset
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .offset(1)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[1]);
-        assert_eq!(2, list_array.value_length(1));
-        assert_eq!(
-            3,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            3,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-    }
-
-    #[test]
-    fn test_large_list_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type.clone())
-            .len(3)
-            .add_buffer(value_offsets.clone())
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = LargeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        assert_eq!(
-            0,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            0,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-
-        // Now test with a non-zero offset
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .offset(1)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = LargeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[1]);
-        assert_eq!(2, list_array.value_length(1));
-        assert_eq!(
-            3,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(
-            3,
-            unsafe { list_array.value_unchecked(0) }
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-    }
-
-    #[test]
-    fn test_fixed_size_list_array() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(9)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8]))
-            .build();
-
-        // Construct a list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
-        let list_data = ArrayData::builder(list_data_type.clone())
-            .len(3)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(2));
-        assert_eq!(3, list_array.value_length());
-        assert_eq!(
-            0,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-
-        // Now test with a non-zero offset
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .offset(1)
-            .add_child_data(value_data.clone())
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(
-            3,
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .value(0)
-        );
-        assert_eq!(6, list_array.value_offset(1));
-        assert_eq!(3, list_array.value_length());
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "FixedSizeListArray child array length should be a multiple of 3"
-    )]
-    fn test_fixed_size_list_array_unequal_children() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_child_data(value_data)
-            .build();
-        FixedSizeListArray::from(list_data);
-    }
-
-    #[test]
-    fn test_list_array_slice() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
-        // 01011001 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(9, list_array.len());
-        assert_eq!(4, list_array.null_count());
-        assert_eq!(2, list_array.value_offsets()[3]);
-        assert_eq!(2, list_array.value_length(3));
-
-        let sliced_array = list_array.slice(1, 6);
-        assert_eq!(6, sliced_array.len());
-        assert_eq!(1, sliced_array.offset());
-        assert_eq!(3, sliced_array.null_count());
-
-        for i in 0..sliced_array.len() {
-            if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
-                assert!(sliced_array.is_valid(i));
-            } else {
-                assert!(sliced_array.is_null(i));
-            }
-        }
-
-        // Check offset and length for each non-null value.
-        let sliced_list_array =
-            sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(2, sliced_list_array.value_offsets()[2]);
-        assert_eq!(2, sliced_list_array.value_length(2));
-        assert_eq!(4, sliced_list_array.value_offsets()[3]);
-        assert_eq!(2, sliced_list_array.value_length(3));
-        assert_eq!(6, sliced_list_array.value_offsets()[5]);
-        assert_eq!(3, sliced_list_array.value_length(5));
-    }
-
-    #[test]
-    fn test_large_list_array_slice() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
-        // 01011001 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.clone())
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = LargeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(9, list_array.len());
-        assert_eq!(4, list_array.null_count());
-        assert_eq!(2, list_array.value_offsets()[3]);
-        assert_eq!(2, list_array.value_length(3));
-
-        let sliced_array = list_array.slice(1, 6);
-        assert_eq!(6, sliced_array.len());
-        assert_eq!(1, sliced_array.offset());
-        assert_eq!(3, sliced_array.null_count());
-
-        for i in 0..sliced_array.len() {
-            if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
-                assert!(sliced_array.is_valid(i));
-            } else {
-                assert!(sliced_array.is_null(i));
-            }
-        }
-
-        // Check offset and length for each non-null value.
-        let sliced_list_array = sliced_array
-            .as_any()
-            .downcast_ref::<LargeListArray>()
-            .unwrap();
-        assert_eq!(2, sliced_list_array.value_offsets()[2]);
-        assert_eq!(2, sliced_list_array.value_length(2));
-        assert_eq!(4, sliced_list_array.value_offsets()[3]);
-        assert_eq!(2, sliced_list_array.value_length(3));
-        assert_eq!(6, sliced_list_array.value_offsets()[5]);
-        assert_eq!(3, sliced_list_array.value_length(5));
-    }
-
-    #[test]
-    #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
-    fn test_list_array_index_out_of_bound() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
-        // 01011001 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = LargeListArray::from(list_data);
-        assert_eq!(9, list_array.len());
-
-        list_array.value(10);
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_slice() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Set null buts for the nested array:
-        //  [[0, 1], null, null, [6, 7], [8, 9]]
-        // 01011001 00000001
-        let mut null_bits: [u8; 1] = [0; 1];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-
-        // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(5)
-            .add_child_data(value_data.clone())
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        let values = list_array.values();
-        assert_eq!(&value_data, values.data());
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(5, list_array.len());
-        assert_eq!(2, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(3));
-        assert_eq!(2, list_array.value_length());
-
-        let sliced_array = list_array.slice(1, 4);
-        assert_eq!(4, sliced_array.len());
-        assert_eq!(1, sliced_array.offset());
-        assert_eq!(2, sliced_array.null_count());
-
-        for i in 0..sliced_array.len() {
-            if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
-                assert!(sliced_array.is_valid(i));
-            } else {
-                assert!(sliced_array.is_null(i));
-            }
-        }
-
-        // Check offset and length for each non-null value.
-        let sliced_list_array = sliced_array
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap();
-        assert_eq!(2, sliced_list_array.value_length());
-        assert_eq!(6, sliced_list_array.value_offset(2));
-        assert_eq!(8, sliced_list_array.value_offset(3));
-    }
-
-    #[test]
-    #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")]
-    fn test_fixed_size_list_array_index_out_of_bound() {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Set null buts for the nested array:
-        //  [[0, 1], null, null, [6, 7], [8, 9]]
-        // 01011001 00000001
-        let mut null_bits: [u8; 1] = [0; 1];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 3);
-        bit_util::set_bit(&mut null_bits, 4);
-
-        // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(5)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array = FixedSizeListArray::from(list_data);
-
-        list_array.value(10);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "ListArray data should contain a single buffer only (value offsets)"
-    )]
-    fn test_list_array_invalid_buffer_len() {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "ListArray should contain a single child array (values array)"
-    )]
-    fn test_list_array_invalid_child_array_len() {
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .build();
-        ListArray::from(list_data);
-    }
-
-    #[test]
-    #[should_panic(expected = "offsets do not start at zero")]
-    fn test_list_array_invalid_value_offset_start() {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        let value_offsets = Buffer::from_slice_ref(&[2, 2, 5, 7]);
-
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data);
-    }
-
-    #[test]
-    #[should_panic(expected = "memory is not aligned")]
-    fn test_primitive_array_alignment() {
-        let ptr = alloc::allocate_aligned::<u8>(8);
-        let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
-        let buf2 = buf.slice(1);
-        let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
-        Int32Array::from(array_data);
-    }
-
-    #[test]
-    #[should_panic(expected = "memory is not aligned")]
-    fn test_list_array_alignment() {
-        let ptr = alloc::allocate_aligned::<u8>(8);
-        let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
-        let buf2 = buf.slice(1);
-
-        let values: [i32; 8] = [0; 8];
-        let value_data = ArrayData::builder(DataType::Int32)
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .add_buffer(buf2)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data);
-    }
-}
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
deleted file mode 100644
index d2b3b6686d9..00000000000
--- a/rust/arrow/src/array/array_primitive.rs
+++ /dev/null
@@ -1,942 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::borrow::Borrow;
-use std::convert::From;
-use std::fmt;
-use std::iter::{FromIterator, IntoIterator};
-use std::mem;
-
-use chrono::{prelude::*, Duration};
-
-use super::array::print_long_array;
-use super::raw_pointer::RawPtrBox;
-use super::*;
-use crate::temporal_conversions;
-use crate::util::bit_util;
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    util::trusted_len_unzip,
-};
-
-/// Number of seconds in a day
-const SECONDS_IN_DAY: i64 = 86_400;
-/// Number of milliseconds in a second
-const MILLISECONDS: i64 = 1_000;
-/// Number of microseconds in a second
-const MICROSECONDS: i64 = 1_000_000;
-/// Number of nanoseconds in a second
-const NANOSECONDS: i64 = 1_000_000_000;
-
-/// Array whose elements are of primitive types.
-pub struct PrimitiveArray<T: ArrowPrimitiveType> {
-    /// Underlying ArrayData
-    /// # Safety
-    /// must have exactly one buffer, aligned to type T
-    data: ArrayData,
-    /// Pointer to the value array. The lifetime of this must be <= to the value buffer
-    /// stored in `data`, so it's safe to store.
-    /// # Safety
-    /// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
-    /// raw_values must have alignment for type T::NativeType
-    raw_values: RawPtrBox<T::Native>,
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
-    /// Returns the length of this array.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns whether this array is empty.
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    /// Returns a slice of the values of this array
-    #[inline]
-    pub fn values(&self) -> &[T::Native] {
-        // Soundness
-        //     raw_values alignment & location is ensured by fn from(ArrayDataRef)
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.raw_values.as_ptr().add(self.data.offset()),
-                self.len(),
-            )
-        }
-    }
-
-    // Returns a new primitive array builder
-    pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
-        PrimitiveBuilder::<T>::new(capacity)
-    }
-
-    /// Returns the primitive value at index `i`.
-    ///
-    /// # Safety
-    ///
-    /// caller must ensure that the passed in offset is less than the array len()
-    pub unsafe fn value_unchecked(&self, i: usize) -> T::Native {
-        let offset = i + self.offset();
-        *self.raw_values.as_ptr().add(offset)
-    }
-
-    /// Returns the primitive value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
-    /// # Safety
-    /// caller must ensure that the passed in offset is less than the array len()
-    pub fn value(&self, i: usize) -> T::Native {
-        debug_assert!(i < self.len());
-        unsafe { self.value_unchecked(i) }
-    }
-
-    /// Creates a PrimitiveArray based on an iterator of values without nulls
-    pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
-        let val_buf: Buffer = iter.into_iter().collect();
-        let data = ArrayData::new(
-            T::DATA_TYPE,
-            val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
-            None,
-            None,
-            0,
-            vec![val_buf],
-            vec![],
-        );
-        PrimitiveArray::from(data)
-    }
-
-    /// Creates a PrimitiveArray based on a constant value with `count` elements
-    pub fn from_value(value: T::Native, count: usize) -> Self {
-        // # Safety: length is known
-        let val_buf = unsafe { Buffer::from_trusted_len_iter((0..count).map(|_| value)) };
-        let data = ArrayData::new(
-            T::DATA_TYPE,
-            val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
-            None,
-            None,
-            0,
-            vec![val_buf],
-            vec![],
-        );
-        PrimitiveArray::from(data)
-    }
-}
-
-impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [PrimitiveArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [PrimitiveArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of::<RawPtrBox<T::Native>>()
-    }
-}
-
-fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
-    match T::DATA_TYPE {
-        DataType::Date32 => Some(temporal_conversions::date32_to_datetime(v as i32)),
-        DataType::Date64 => Some(temporal_conversions::date64_to_datetime(v)),
-        DataType::Time32(_) | DataType::Time64(_) => None,
-        DataType::Timestamp(unit, _) => match unit {
-            TimeUnit::Second => Some(temporal_conversions::timestamp_s_to_datetime(v)),
-            TimeUnit::Millisecond => {
-                Some(temporal_conversions::timestamp_ms_to_datetime(v))
-            }
-            TimeUnit::Microsecond => {
-                Some(temporal_conversions::timestamp_us_to_datetime(v))
-            }
-            TimeUnit::Nanosecond => {
-                Some(temporal_conversions::timestamp_ns_to_datetime(v))
-            }
-        },
-        // interval is not yet fully documented [ARROW-3097]
-        DataType::Interval(_) => None,
-        _ => None,
-    }
-}
-
-fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> {
-    as_datetime::<T>(v).map(|datetime| datetime.date())
-}
-
-fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> {
-    match T::DATA_TYPE {
-        DataType::Time32(unit) => {
-            // safe to immediately cast to u32 as `self.value(i)` is positive i32
-            let v = v as u32;
-            match unit {
-                TimeUnit::Second => Some(temporal_conversions::time32s_to_time(v as i32)),
-                TimeUnit::Millisecond => {
-                    Some(temporal_conversions::time32ms_to_time(v as i32))
-                }
-                _ => None,
-            }
-        }
-        DataType::Time64(unit) => match unit {
-            TimeUnit::Microsecond => Some(temporal_conversions::time64us_to_time(v)),
-            TimeUnit::Nanosecond => Some(temporal_conversions::time64ns_to_time(v)),
-            _ => None,
-        },
-        DataType::Timestamp(_, _) => as_datetime::<T>(v).map(|datetime| datetime.time()),
-        DataType::Date32 | DataType::Date64 => Some(NaiveTime::from_hms(0, 0, 0)),
-        DataType::Interval(_) => None,
-        _ => None,
-    }
-}
-
-fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
-    match T::DATA_TYPE {
-        DataType::Duration(unit) => match unit {
-            TimeUnit::Second => Some(temporal_conversions::duration_s_to_duration(v)),
-            TimeUnit::Millisecond => {
-                Some(temporal_conversions::duration_ms_to_duration(v))
-            }
-            TimeUnit::Microsecond => {
-                Some(temporal_conversions::duration_us_to_duration(v))
-            }
-            TimeUnit::Nanosecond => {
-                Some(temporal_conversions::duration_ns_to_duration(v))
-            }
-        },
-        _ => None,
-    }
-}
-
-impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
-where
-    i64: std::convert::From<T::Native>,
-{
-    /// Returns value as a chrono `NaiveDateTime`, handling time resolution
-    ///
-    /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
-    /// A valid value is expected, thus the user should first check for validity.
-    pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
-        as_datetime::<T>(i64::from(self.value(i)))
-    }
-
-    /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
-    ///
-    /// If a data type cannot be converted to `NaiveDate`, a `None` is returned
-    pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> {
-        self.value_as_datetime(i).map(|datetime| datetime.date())
-    }
-
-    /// Returns a value as a chrono `NaiveTime`
-    ///
-    /// `Date32` and `Date64` return UTC midnight as they do not have time resolution
-    pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
-        as_time::<T>(i64::from(self.value(i)))
-    }
-
-    /// Returns a value as a chrono `Duration`
-    ///
-    /// If a data type cannot be converted to `Duration`, a `None` is returned
-    pub fn value_as_duration(&self, i: usize) -> Option<Duration> {
-        as_duration::<T>(i64::from(self.value(i)))
-    }
-}
-
-impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
-        print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
-            DataType::Date32 | DataType::Date64 => {
-                let v = self.value(index).to_isize().unwrap() as i64;
-                match as_date::<T>(v) {
-                    Some(date) => write!(f, "{:?}", date),
-                    None => write!(f, "null"),
-                }
-            }
-            DataType::Time32(_) | DataType::Time64(_) => {
-                let v = self.value(index).to_isize().unwrap() as i64;
-                match as_time::<T>(v) {
-                    Some(time) => write!(f, "{:?}", time),
-                    None => write!(f, "null"),
-                }
-            }
-            DataType::Timestamp(_, _) => {
-                let v = self.value(index).to_isize().unwrap() as i64;
-                match as_datetime::<T>(v) {
-                    Some(datetime) => write!(f, "{:?}", datetime),
-                    None => write!(f, "null"),
-                }
-            }
-            _ => fmt::Debug::fmt(&array.value(index), f),
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> {
-    type Item = Option<<T as ArrowPrimitiveType>::Native>;
-    type IntoIter = PrimitiveIter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        PrimitiveIter::<'a, T>::new(self)
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> PrimitiveIter<'a, T> {
-        PrimitiveIter::<'a, T>::new(&self)
-    }
-}
-
-impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as ArrowPrimitiveType>::Native>>>
-    FromIterator<Ptr> for PrimitiveArray<T>
-{
-    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (lower, _) = iter.size_hint();
-
-        let mut null_buf = BooleanBufferBuilder::new(lower);
-
-        let buffer: Buffer = iter
-            .map(|item| {
-                if let Some(a) = item.borrow() {
-                    null_buf.append(true);
-                    *a
-                } else {
-                    null_buf.append(false);
-                    // this ensures that null items on the buffer are not arbitrary.
-                    // This is important because falible operations can use null values (e.g. a vectorized "add")
-                    // which may panic (e.g. overflow if the number on the slots happen to be very large).
-                    T::Native::default()
-                }
-            })
-            .collect();
-
-        let data = ArrayData::new(
-            T::DATA_TYPE,
-            null_buf.len(),
-            None,
-            Some(null_buf.into()),
-            0,
-            vec![buffer],
-            vec![],
-        );
-        PrimitiveArray::from(data)
-    }
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
-    /// Creates a [`PrimitiveArray`] from an iterator of trusted length.
-    /// # Safety
-    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
-    /// I.e. that `size_hint().1` correctly reports its length.
-    #[inline]
-    pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
-    where
-        P: std::borrow::Borrow<Option<<T as ArrowPrimitiveType>::Native>>,
-        I: IntoIterator<Item = P>,
-    {
-        let iterator = iter.into_iter();
-        let (_, upper) = iterator.size_hint();
-        let len = upper.expect("trusted_len_unzip requires an upper limit");
-
-        let (null, buffer) = trusted_len_unzip(iterator);
-
-        let data =
-            ArrayData::new(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
-        PrimitiveArray::from(data)
-    }
-}
-
-// TODO: the macro is needed here because we'd get "conflicting implementations" error
-// otherwise with both `From<Vec<T::Native>>` and `From<Vec<Option<T::Native>>>`.
-// We should revisit this in future.
-macro_rules! def_numeric_from_vec {
-    ( $ty:ident ) => {
-        impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for PrimitiveArray<$ty> {
-            fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self {
-                let array_data = ArrayData::builder($ty::DATA_TYPE)
-                    .len(data.len())
-                    .add_buffer(Buffer::from_slice_ref(&data))
-                    .build();
-                PrimitiveArray::from(array_data)
-            }
-        }
-
-        // Constructs a primitive array from a vector. Should only be used for testing.
-        impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>>
-            for PrimitiveArray<$ty>
-        {
-            fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self {
-                PrimitiveArray::from_iter(data.iter())
-            }
-        }
-    };
-}
-
-def_numeric_from_vec!(Int8Type);
-def_numeric_from_vec!(Int16Type);
-def_numeric_from_vec!(Int32Type);
-def_numeric_from_vec!(Int64Type);
-def_numeric_from_vec!(UInt8Type);
-def_numeric_from_vec!(UInt16Type);
-def_numeric_from_vec!(UInt32Type);
-def_numeric_from_vec!(UInt64Type);
-def_numeric_from_vec!(Float32Type);
-def_numeric_from_vec!(Float64Type);
-
-def_numeric_from_vec!(Date32Type);
-def_numeric_from_vec!(Date64Type);
-def_numeric_from_vec!(Time32SecondType);
-def_numeric_from_vec!(Time32MillisecondType);
-def_numeric_from_vec!(Time64MicrosecondType);
-def_numeric_from_vec!(Time64NanosecondType);
-def_numeric_from_vec!(IntervalYearMonthType);
-def_numeric_from_vec!(IntervalDayTimeType);
-def_numeric_from_vec!(DurationSecondType);
-def_numeric_from_vec!(DurationMillisecondType);
-def_numeric_from_vec!(DurationMicrosecondType);
-def_numeric_from_vec!(DurationNanosecondType);
-def_numeric_from_vec!(TimestampSecondType);
-def_numeric_from_vec!(TimestampMillisecondType);
-def_numeric_from_vec!(TimestampMicrosecondType);
-def_numeric_from_vec!(TimestampNanosecondType);
-
-impl<T: ArrowTimestampType> PrimitiveArray<T> {
-    /// Construct a timestamp array from a vec of i64 values and an optional timezone
-    pub fn from_vec(data: Vec<i64>, timezone: Option<String>) -> Self {
-        let array_data =
-            ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
-                .len(data.len())
-                .add_buffer(Buffer::from_slice_ref(&data))
-                .build();
-        PrimitiveArray::from(array_data)
-    }
-}
-
-impl<T: ArrowTimestampType> PrimitiveArray<T> {
-    /// Construct a timestamp array from a vec of Option<i64> values and an optional timezone
-    pub fn from_opt_vec(data: Vec<Option<i64>>, timezone: Option<String>) -> Self {
-        // TODO: duplicated from def_numeric_from_vec! macro, it looks possible to convert to generic
-        let data_len = data.len();
-        let mut null_buf = MutableBuffer::new_null(data_len);
-        let mut val_buf = MutableBuffer::new(data_len * mem::size_of::<i64>());
-
-        {
-            let null_slice = null_buf.as_slice_mut();
-            for (i, v) in data.iter().enumerate() {
-                if let Some(n) = v {
-                    bit_util::set_bit(null_slice, i);
-                    val_buf.push(*n);
-                } else {
-                    val_buf.push(0i64);
-                }
-            }
-        }
-
-        let array_data =
-            ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
-                .len(data_len)
-                .add_buffer(val_buf.into())
-                .null_bit_buffer(null_buf.into())
-                .build();
-        PrimitiveArray::from(array_data)
-    }
-}
-
-/// Constructs a `PrimitiveArray` from an array data reference.
-impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.buffers().len(),
-            1,
-            "PrimitiveArray data should contain a single buffer only (values buffer)"
-        );
-
-        let ptr = data.buffers()[0].as_ptr();
-        Self {
-            data,
-            raw_values: unsafe { RawPtrBox::new(ptr) },
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::thread;
-
-    use crate::buffer::Buffer;
-    use crate::datatypes::DataType;
-
-    #[test]
-    fn test_primitive_array_from_vec() {
-        let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
-        let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
-        assert_eq!(buf, arr.data.buffers()[0]);
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-
-        assert_eq!(64, arr.get_buffer_memory_size());
-        assert_eq!(136, arr.get_array_memory_size());
-    }
-
-    #[test]
-    fn test_primitive_array_from_vec_option() {
-        // Test building a primitive array with null values
-        let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(2, arr.null_count());
-        for i in 0..5 {
-            if i % 2 == 0 {
-                assert!(!arr.is_null(i));
-                assert!(arr.is_valid(i));
-                assert_eq!(i as i32, arr.value(i));
-            } else {
-                assert!(arr.is_null(i));
-                assert!(!arr.is_valid(i));
-            }
-        }
-
-        assert_eq!(128, arr.get_buffer_memory_size());
-        assert_eq!(216, arr.get_array_memory_size());
-    }
-
-    #[test]
-    fn test_date64_array_from_vec_option() {
-        // Test building a primitive array with null values
-        // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions
-        // work
-        let arr: PrimitiveArray<Date64Type> =
-            vec![Some(1550902545147), None, Some(1550902545147)].into();
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        for i in 0..3 {
-            if i % 2 == 0 {
-                assert!(!arr.is_null(i));
-                assert!(arr.is_valid(i));
-                assert_eq!(1550902545147, arr.value(i));
-                // roundtrip to and from datetime
-                assert_eq!(
-                    1550902545147,
-                    arr.value_as_datetime(i).unwrap().timestamp_millis()
-                );
-            } else {
-                assert!(arr.is_null(i));
-                assert!(!arr.is_valid(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_time32_millisecond_array_from_vec() {
-        // 1:        00:00:00.001
-        // 37800005: 10:30:00.005
-        // 86399210: 23:59:59.210
-        let arr: PrimitiveArray<Time32MillisecondType> =
-            vec![1, 37_800_005, 86_399_210].into();
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
-        for (i, formatted) in formatted.iter().enumerate().take(3) {
-            // check that we can't create dates or datetimes from time instances
-            assert_eq!(None, arr.value_as_datetime(i));
-            assert_eq!(None, arr.value_as_date(i));
-            let time = arr.value_as_time(i).unwrap();
-            assert_eq!(*formatted, time.format("%H:%M:%S%.3f").to_string());
-        }
-    }
-
-    #[test]
-    fn test_time64_nanosecond_array_from_vec() {
-        // Test building a primitive array with null values
-        // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions
-        // work
-
-        // 1e6:        00:00:00.001
-        // 37800005e6: 10:30:00.005
-        // 86399210e6: 23:59:59.210
-        let arr: PrimitiveArray<Time64NanosecondType> =
-            vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into();
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
-        for (i, item) in formatted.iter().enumerate().take(3) {
-            // check that we can't create dates or datetimes from time instances
-            assert_eq!(None, arr.value_as_datetime(i));
-            assert_eq!(None, arr.value_as_date(i));
-            let time = arr.value_as_time(i).unwrap();
-            assert_eq!(*item, time.format("%H:%M:%S%.3f").to_string());
-        }
-    }
-
-    #[test]
-    fn test_interval_array_from_vec() {
-        // intervals are currently not treated specially, but are Int32 and Int64 arrays
-        let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        // a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
-        let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-    }
-
-    #[test]
-    fn test_duration_array_from_vec() {
-        let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-
-        let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]);
-        assert_eq!(3, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(1, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(1, arr.values()[0]);
-        assert!(arr.is_null(1));
-        assert_eq!(-5, arr.value(2));
-        assert_eq!(-5, arr.values()[2]);
-    }
-
-    #[test]
-    fn test_timestamp_array_from_vec() {
-        let arr = TimestampSecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-
-        let arr = TimestampMillisecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-
-        let arr = TimestampMicrosecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-
-        let arr = TimestampNanosecondArray::from_vec(vec![1, -5], None);
-        assert_eq!(2, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        assert_eq!(1, arr.value(0));
-        assert_eq!(-5, arr.value(1));
-        assert_eq!(&[1, -5], arr.values());
-    }
-
-    #[test]
-    fn test_primitive_array_slice() {
-        let arr = Int32Array::from(vec![
-            Some(0),
-            None,
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-            Some(6),
-            None,
-            None,
-        ]);
-        assert_eq!(9, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(4, arr.null_count());
-
-        let arr2 = arr.slice(2, 5);
-        assert_eq!(5, arr2.len());
-        assert_eq!(2, arr2.offset());
-        assert_eq!(1, arr2.null_count());
-
-        for i in 0..arr2.len() {
-            assert_eq!(i == 1, arr2.is_null(i));
-            assert_eq!(i != 1, arr2.is_valid(i));
-        }
-        let int_arr2 = arr2.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, int_arr2.values()[0]);
-        assert_eq!(&[4, 5, 6], &int_arr2.values()[2..5]);
-
-        let arr3 = arr2.slice(2, 3);
-        assert_eq!(3, arr3.len());
-        assert_eq!(4, arr3.offset());
-        assert_eq!(0, arr3.null_count());
-
-        let int_arr3 = arr3.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(&[4, 5, 6], int_arr3.values());
-        assert_eq!(4, int_arr3.value(0));
-        assert_eq!(5, int_arr3.value(1));
-        assert_eq!(6, int_arr3.value(2));
-    }
-
-    #[test]
-    fn test_boolean_array_slice() {
-        let arr = BooleanArray::from(vec![
-            Some(true),
-            None,
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-        ]);
-
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(3, arr.null_count());
-
-        let arr2 = arr.slice(3, 5);
-        assert_eq!(5, arr2.len());
-        assert_eq!(3, arr2.offset());
-        assert_eq!(1, arr2.null_count());
-
-        let bool_arr = arr2.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        assert_eq!(false, bool_arr.is_valid(0));
-
-        assert_eq!(true, bool_arr.is_valid(1));
-        assert_eq!(true, bool_arr.value(1));
-
-        assert_eq!(true, bool_arr.is_valid(2));
-        assert_eq!(false, bool_arr.value(2));
-
-        assert_eq!(true, bool_arr.is_valid(3));
-        assert_eq!(true, bool_arr.value(3));
-
-        assert_eq!(true, bool_arr.is_valid(4));
-        assert_eq!(false, bool_arr.value(4));
-    }
-
-    #[test]
-    fn test_int32_fmt_debug() {
-        let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
-        assert_eq!(
-            "PrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_fmt_debug_up_to_20_elements() {
-        (1..=20).for_each(|i| {
-            let values = (0..i).collect::<Vec<i16>>();
-            let array_expected = format!(
-                "PrimitiveArray<Int16>\n[\n{}\n]",
-                values
-                    .iter()
-                    .map(|v| { format!("  {},", v) })
-                    .collect::<Vec<String>>()
-                    .join("\n")
-            );
-            let array = Int16Array::from(values);
-
-            assert_eq!(array_expected, format!("{:?}", array));
-        })
-    }
-
-    #[test]
-    fn test_int32_with_null_fmt_debug() {
-        let mut builder = Int32Array::builder(3);
-        builder.append_slice(&[0, 1]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_slice(&[3, 4]).unwrap();
-        let arr = builder.finish();
-        assert_eq!(
-            "PrimitiveArray<Int32>\n[\n  0,\n  1,\n  null,\n  3,\n  4,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_timestamp_fmt_debug() {
-        let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from_vec(
-                vec![1546214400000, 1546214400000, -1546214400000],
-                None,
-            );
-        assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n  2018-12-31T00:00:00,\n  2018-12-31T00:00:00,\n  1921-01-02T00:00:00,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_date32_fmt_debug() {
-        let arr: PrimitiveArray<Date32Type> = vec![12356, 13548, -365].into();
-        assert_eq!(
-            "PrimitiveArray<Date32>\n[\n  2003-10-31,\n  2007-02-04,\n  1969-01-01,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_time32second_fmt_debug() {
-        let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into();
-        assert_eq!(
-            "PrimitiveArray<Time32(Second)>\n[\n  02:00:01,\n  16:40:54,\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "invalid time")]
-    fn test_time32second_invalid_neg() {
-        // The panic should come from chrono, not from arrow
-        let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into();
-        println!("{:?}", arr);
-    }
-
-    #[test]
-    fn test_primitive_array_builder() {
-        // Test building a primitive array with ArrayData builder and offset
-        let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
-        let buf2 = buf.clone();
-        let data = ArrayData::builder(DataType::Int32)
-            .len(5)
-            .offset(2)
-            .add_buffer(buf)
-            .build();
-        let arr = Int32Array::from(data);
-        assert_eq!(buf2, arr.data.buffers()[0]);
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.null_count());
-        for i in 0..3 {
-            assert_eq!((i + 2) as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_from_iter_values() {
-        // Test building a primitive array with from_iter_values
-        let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10);
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.null_count());
-        for i in 0..10i32 {
-            assert_eq!(i, arr.value(i as usize));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_from_unbound_iter() {
-        // iterator that doesn't declare (upper) size bound
-        let value_iter = (0..)
-            .scan(0usize, |pos, i| {
-                if *pos < 10 {
-                    *pos += 1;
-                    Some(Some(i))
-                } else {
-                    // actually returns up to 10 values
-                    None
-                }
-            })
-            // limited using take()
-            .take(100);
-
-        let (_, upper_size_bound) = value_iter.size_hint();
-        // the upper bound, defined by take above, is 100
-        assert_eq!(upper_size_bound, Some(100));
-        let primitive_array: PrimitiveArray<Int32Type> = value_iter.collect();
-        // but the actual number of items in the array should be 10
-        assert_eq!(primitive_array.len(), 10);
-    }
-
-    #[test]
-    #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
-                               (values buffer)")]
-    fn test_primitive_array_invalid_buffer_len() {
-        let data = ArrayData::builder(DataType::Int32).len(5).build();
-        Int32Array::from(data);
-    }
-
-    #[test]
-    fn test_access_array_concurrently() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let ret = thread::spawn(move || a.value(3)).join();
-
-        assert!(ret.is_ok());
-        assert_eq!(8, ret.ok().unwrap());
-    }
-}
diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs
deleted file mode 100644
index 0519148e6f4..00000000000
--- a/rust/arrow/src/array/array_string.rs
+++ /dev/null
@@ -1,528 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::From;
-use std::fmt;
-use std::mem;
-use std::{any::Any, iter::FromIterator};
-
-use super::{
-    array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericListArray,
-    GenericStringIter, OffsetSizeTrait,
-};
-use crate::buffer::Buffer;
-use crate::util::bit_util;
-use crate::{buffer::MutableBuffer, datatypes::DataType};
-
-/// Like OffsetSizeTrait, but specialized for Strings
-// This allow us to expose a constant datatype for the GenericStringArray
-pub trait StringOffsetSizeTrait: OffsetSizeTrait {
-    const DATA_TYPE: DataType;
-}
-
-impl StringOffsetSizeTrait for i32 {
-    const DATA_TYPE: DataType = DataType::Utf8;
-}
-
-impl StringOffsetSizeTrait for i64 {
-    const DATA_TYPE: DataType = DataType::LargeUtf8;
-}
-
-/// Generic struct for \[Large\]StringArray
-pub struct GenericStringArray<OffsetSize: StringOffsetSizeTrait> {
-    data: ArrayData,
-    value_offsets: RawPtrBox<OffsetSize>,
-    value_data: RawPtrBox<u8>,
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
-    /// Returns the length for the element at index `i`.
-    #[inline]
-    pub fn value_length(&self, i: usize) -> OffsetSize {
-        let offsets = self.value_offsets();
-        offsets[i + 1] - offsets[i]
-    }
-
-    /// Returns the offset values in the offsets buffer
-    #[inline]
-    pub fn value_offsets(&self) -> &[OffsetSize] {
-        // Soundness
-        //     pointer alignment & location is ensured by RawPtrBox
-        //     buffer bounds/offset is ensured by the ArrayData instance.
-        unsafe {
-            std::slice::from_raw_parts(
-                self.value_offsets.as_ptr().add(self.data.offset()),
-                self.len() + 1,
-            )
-        }
-    }
-
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.data.buffers()[1].clone()
-    }
-
-    /// Returns the element at index
-    /// # Safety
-    /// caller is responsible for ensuring that index is within the array bounds
-    pub unsafe fn value_unchecked(&self, i: usize) -> &str {
-        let end = self.value_offsets().get_unchecked(i + 1);
-        let start = self.value_offsets().get_unchecked(i);
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-        // ISSUE: utf-8 well formedness is not checked
-
-        // Safety of `to_isize().unwrap()`
-        // `start` and `end` are &OffsetSize, which is a generic type that implements the
-        // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-        // both of which should cleanly cast to isize on an architecture that supports
-        // 32/64-bit offsets
-        let slice = std::slice::from_raw_parts(
-            self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-            (*end - *start).to_usize().unwrap(),
-        );
-        std::str::from_utf8_unchecked(slice)
-    }
-
-    /// Returns the element at index `i` as &str
-    pub fn value(&self, i: usize) -> &str {
-        assert!(i < self.data.len(), "StringArray out of bounds access");
-        //Soundness: length checked above, offset buffer length is 1 larger than logical array length
-        let end = unsafe { self.value_offsets().get_unchecked(i + 1) };
-        let start = unsafe { self.value_offsets().get_unchecked(i) };
-
-        // Soundness
-        // pointer alignment & location is ensured by RawPtrBox
-        // buffer bounds/offset is ensured by the value_offset invariants
-        // ISSUE: utf-8 well formedness is not checked
-        unsafe {
-            // Safety of `to_isize().unwrap()`
-            // `start` and `end` are &OffsetSize, which is a generic type that implements the
-            // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
-            // both of which should cleanly cast to isize on an architecture that supports
-            // 32/64-bit offsets
-            let slice = std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(start.to_isize().unwrap()),
-                (*end - *start).to_usize().unwrap(),
-            );
-            std::str::from_utf8_unchecked(slice)
-        }
-    }
-
-    fn from_list(v: GenericListArray<OffsetSize>) -> Self {
-        assert_eq!(
-            v.data().child_data()[0].child_data().len(),
-            0,
-            "StringArray can only be created from list array of u8 values \
-             (i.e. List<PrimitiveArray<u8>>)."
-        );
-        assert_eq!(
-            v.data().child_data()[0].data_type(),
-            &DataType::UInt8,
-            "StringArray can only be created from List<u8> arrays, mismatched data types."
-        );
-
-        let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(v.data().buffers()[0].clone())
-            .add_buffer(v.data().child_data()[0].buffers()[0].clone());
-        if let Some(bitmap) = v.data().null_bitmap() {
-            builder = builder.null_bit_buffer(bitmap.bits.clone())
-        }
-
-        let data = builder.build();
-        Self::from(data)
-    }
-
-    pub(crate) fn from_vec(v: Vec<&str>) -> Self {
-        let mut offsets =
-            MutableBuffer::new((v.len() + 1) * std::mem::size_of::<OffsetSize>());
-        let mut values = MutableBuffer::new(0);
-
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for s in &v {
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s.as_bytes());
-        }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(v.len())
-            .add_buffer(offsets.into())
-            .add_buffer(values.into())
-            .build();
-        Self::from(array_data)
-    }
-
-    pub(crate) fn from_opt_vec(v: Vec<Option<&str>>) -> Self {
-        v.into_iter().collect()
-    }
-
-    /// Creates a `GenericStringArray` based on an iterator of values without nulls
-    pub fn from_iter_values<Ptr, I: IntoIterator<Item = Ptr>>(iter: I) -> Self
-    where
-        Ptr: AsRef<str>,
-    {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let mut offsets =
-            MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
-        let mut values = MutableBuffer::new(0);
-
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for i in iter {
-            let s = i.as_ref();
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            offsets.push(length_so_far);
-            values.extend_from_slice(s.as_bytes());
-        }
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(data_len)
-            .add_buffer(offsets.into())
-            .add_buffer(values.into())
-            .build();
-        Self::from(array_data)
-    }
-}
-
-impl<'a, Ptr, OffsetSize: StringOffsetSizeTrait> FromIterator<Option<Ptr>>
-    for GenericStringArray<OffsetSize>
-where
-    Ptr: AsRef<str>,
-{
-    fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
-        let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
-
-        let offset_size = std::mem::size_of::<OffsetSize>();
-        let mut offsets = MutableBuffer::new((data_len + 1) * offset_size);
-        let mut values = MutableBuffer::new(0);
-        let mut null_buf = MutableBuffer::new_null(data_len);
-        let null_slice = null_buf.as_slice_mut();
-        let mut length_so_far = OffsetSize::zero();
-        offsets.push(length_so_far);
-
-        for (i, s) in iter.enumerate() {
-            let value_bytes = if let Some(ref s) = s {
-                // set null bit
-                bit_util::set_bit(null_slice, i);
-                let s_bytes = s.as_ref().as_bytes();
-                length_so_far += OffsetSize::from_usize(s_bytes.len()).unwrap();
-                s_bytes
-            } else {
-                b""
-            };
-            values.extend_from_slice(value_bytes);
-            offsets.push(length_so_far);
-        }
-
-        // calculate actual data_len, which may be different from the iterator's upper bound
-        let data_len = (offsets.len() / offset_size) - 1;
-        let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
-            .len(data_len)
-            .add_buffer(offsets.into())
-            .add_buffer(values.into())
-            .null_bit_buffer(null_buf.into())
-            .build();
-        Self::from(array_data)
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> IntoIterator for &'a GenericStringArray<T> {
-    type Item = Option<&'a str>;
-    type IntoIter = GenericStringIter<'a, T>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        GenericStringIter::<'a, T>::new(self)
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> GenericStringArray<T> {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> GenericStringIter<'a, T> {
-        GenericStringIter::<'a, T>::new(&self)
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let prefix = if OffsetSize::is_large() { "Large" } else { "" };
-
-        write!(f, "{}StringArray\n[\n", prefix)?;
-        print_long_array(self, f, |array, index, f| {
-            fmt::Debug::fmt(&array.value(index), f)
-        })?;
-        write!(f, "]")
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> Array for GenericStringArray<OffsetSize> {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [$name].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [$name].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> From<ArrayData>
-    for GenericStringArray<OffsetSize>
-{
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.data_type(),
-            &<OffsetSize as StringOffsetSizeTrait>::DATA_TYPE,
-            "[Large]StringArray expects Datatype::[Large]Utf8"
-        );
-        assert_eq!(
-            data.buffers().len(),
-            2,
-            "StringArray data should contain 2 buffers only (offsets and values)"
-        );
-        let offsets = data.buffers()[0].as_ptr();
-        let values = data.buffers()[1].as_ptr();
-        Self {
-            data,
-            value_offsets: unsafe { RawPtrBox::new(offsets) },
-            value_data: unsafe { RawPtrBox::new(values) },
-        }
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> From<Vec<Option<&str>>>
-    for GenericStringArray<OffsetSize>
-{
-    fn from(v: Vec<Option<&str>>) -> Self {
-        GenericStringArray::<OffsetSize>::from_opt_vec(v)
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> From<Vec<&str>>
-    for GenericStringArray<OffsetSize>
-{
-    fn from(v: Vec<&str>) -> Self {
-        GenericStringArray::<OffsetSize>::from_vec(v)
-    }
-}
-
-/// An array where each element is a variable-sized sequence of bytes representing a string
-/// whose maximum length (in bytes) is represented by a i32.
-pub type StringArray = GenericStringArray<i32>;
-
-/// An array where each element is a variable-sized sequence of bytes representing a string
-/// whose maximum length (in bytes) is represented by a i64.
-pub type LargeStringArray = GenericStringArray<i64>;
-
-impl<T: StringOffsetSizeTrait> From<GenericListArray<T>> for GenericStringArray<T> {
-    fn from(v: GenericListArray<T>) -> Self {
-        GenericStringArray::<T>::from_list(v)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::array::{ListBuilder, StringBuilder};
-
-    use super::*;
-
-    #[test]
-    fn test_string_array_from_u8_slice() {
-        let values: Vec<&str> = vec!["hello", "", "parquet"];
-
-        // Array data: ["hello", "", "parquet"]
-        let string_array = StringArray::from(values);
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
-        assert_eq!("", string_array.value(1));
-        assert_eq!("", unsafe { string_array.value_unchecked(1) });
-        assert_eq!("parquet", string_array.value(2));
-        assert_eq!("parquet", unsafe { string_array.value_unchecked(2) });
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(7, string_array.value_length(2));
-        for i in 0..3 {
-            assert!(string_array.is_valid(i));
-            assert!(!string_array.is_null(i));
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "[Large]StringArray expects Datatype::[Large]Utf8")]
-    fn test_string_array_from_int() {
-        let array = LargeStringArray::from(vec!["a", "b"]);
-        StringArray::from(array.data().clone());
-    }
-
-    #[test]
-    fn test_large_string_array_from_u8_slice() {
-        let values: Vec<&str> = vec!["hello", "", "parquet"];
-
-        // Array data: ["hello", "", "parquet"]
-        let string_array = LargeStringArray::from(values);
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
-        assert_eq!("", string_array.value(1));
-        assert_eq!("", unsafe { string_array.value_unchecked(1) });
-        assert_eq!("parquet", string_array.value(2));
-        assert_eq!("parquet", unsafe { string_array.value_unchecked(2) });
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(7, string_array.value_length(2));
-        for i in 0..3 {
-            assert!(string_array.is_valid(i));
-            assert!(!string_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_nested_string_array() {
-        let string_builder = StringBuilder::new(3);
-        let mut list_of_string_builder = ListBuilder::new(string_builder);
-
-        list_of_string_builder.values().append_value("foo").unwrap();
-        list_of_string_builder.values().append_value("bar").unwrap();
-        list_of_string_builder.append(true).unwrap();
-
-        list_of_string_builder
-            .values()
-            .append_value("foobar")
-            .unwrap();
-        list_of_string_builder.append(true).unwrap();
-        let list_of_strings = list_of_string_builder.finish();
-
-        assert_eq!(list_of_strings.len(), 2);
-
-        let first_slot = list_of_strings.value(0);
-        let first_list = first_slot.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(first_list.len(), 2);
-        assert_eq!(first_list.value(0), "foo");
-        assert_eq!(unsafe { first_list.value_unchecked(0) }, "foo");
-        assert_eq!(first_list.value(1), "bar");
-        assert_eq!(unsafe { first_list.value_unchecked(1) }, "bar");
-
-        let second_slot = list_of_strings.value(1);
-        let second_list = second_slot.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(second_list.len(), 1);
-        assert_eq!(second_list.value(0), "foobar");
-        assert_eq!(unsafe { second_list.value_unchecked(0) }, "foobar");
-    }
-
-    #[test]
-    #[should_panic(expected = "StringArray out of bounds access")]
-    fn test_string_array_get_value_index_out_of_bound() {
-        let values: [u8; 12] = [
-            b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
-        ];
-        let offsets: [i32; 4] = [0, 5, 5, 12];
-        let array_data = ArrayData::builder(DataType::Utf8)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_buffer(Buffer::from_slice_ref(&values))
-            .build();
-        let string_array = StringArray::from(array_data);
-        string_array.value(4);
-    }
-
-    #[test]
-    fn test_string_array_fmt_debug() {
-        let arr: StringArray = vec!["hello", "arrow"].into();
-        assert_eq!(
-            "StringArray\n[\n  \"hello\",\n  \"arrow\",\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_large_string_array_fmt_debug() {
-        let arr: LargeStringArray = vec!["hello", "arrow"].into();
-        assert_eq!(
-            "LargeStringArray\n[\n  \"hello\",\n  \"arrow\",\n]",
-            format!("{:?}", arr)
-        );
-    }
-
-    #[test]
-    fn test_string_array_from_iter() {
-        let data = vec![Some("hello"), None, Some("arrow")];
-        // from Vec<Option<&str>>
-        let array1 = StringArray::from(data.clone());
-        // from Iterator<Option<&str>>
-        let array2: StringArray = data.clone().into_iter().collect();
-        // from Iterator<Option<String>>
-        let array3: StringArray =
-            data.into_iter().map(|x| x.map(|s| s.to_string())).collect();
-
-        assert_eq!(array1, array2);
-        assert_eq!(array2, array3);
-    }
-
-    #[test]
-    fn test_string_array_from_iter_values() {
-        let data = vec!["hello", "hello2"];
-        let array1 = StringArray::from_iter_values(data.iter());
-
-        assert_eq!(array1.value(0), "hello");
-        assert_eq!(array1.value(1), "hello2");
-    }
-
-    #[test]
-    fn test_string_array_from_unbound_iter() {
-        // iterator that doesn't declare (upper) size bound
-        let string_iter = (0..)
-            .scan(0usize, |pos, i| {
-                if *pos < 10 {
-                    *pos += 1;
-                    Some(Some(format!("value {}", i)))
-                } else {
-                    // actually returns up to 10 values
-                    None
-                }
-            })
-            // limited using take()
-            .take(100);
-
-        let (_, upper_size_bound) = string_iter.size_hint();
-        // the upper bound, defined by take above, is 100
-        assert_eq!(upper_size_bound, Some(100));
-        let string_array: StringArray = string_iter.collect();
-        // but the actual number of items in the array should be 10
-        assert_eq!(string_array.len(), 10);
-    }
-}
diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs
deleted file mode 100644
index 59ee527e5f8..00000000000
--- a/rust/arrow/src/array/array_struct.rs
+++ /dev/null
@@ -1,531 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::convert::{From, TryFrom};
-use std::fmt;
-use std::iter::IntoIterator;
-use std::mem;
-
-use super::{make_array, Array, ArrayData, ArrayRef};
-use crate::datatypes::DataType;
-use crate::error::{ArrowError, Result};
-use crate::{
-    buffer::{buffer_bin_or, Buffer},
-    datatypes::Field,
-};
-
-/// A nested array type where each child (called *field*) is represented by a separate
-/// array.
-pub struct StructArray {
-    data: ArrayData,
-    pub(crate) boxed_fields: Vec<ArrayRef>,
-}
-
-impl StructArray {
-    /// Returns the field at `pos`.
-    pub fn column(&self, pos: usize) -> &ArrayRef {
-        &self.boxed_fields[pos]
-    }
-
-    /// Return the number of fields in this struct array
-    pub fn num_columns(&self) -> usize {
-        self.boxed_fields.len()
-    }
-
-    /// Returns the fields of the struct array
-    pub fn columns(&self) -> Vec<&ArrayRef> {
-        self.boxed_fields.iter().collect()
-    }
-
-    /// Returns child array refs of the struct array
-    pub fn columns_ref(&self) -> Vec<ArrayRef> {
-        self.boxed_fields.clone()
-    }
-
-    /// Return field names in this struct array
-    pub fn column_names(&self) -> Vec<&str> {
-        match self.data.data_type() {
-            DataType::Struct(fields) => fields
-                .iter()
-                .map(|f| f.name().as_str())
-                .collect::<Vec<&str>>(),
-            _ => unreachable!("Struct array's data type is not struct!"),
-        }
-    }
-
-    /// Return child array whose field name equals to column_name
-    ///
-    /// Note: A schema can currently have duplicate field names, in which case
-    /// the first field will always be selected.
-    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
-    pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
-        self.column_names()
-            .iter()
-            .position(|c| c == &column_name)
-            .map(|pos| self.column(pos))
-    }
-}
-
-impl From<ArrayData> for StructArray {
-    fn from(data: ArrayData) -> Self {
-        let mut boxed_fields = vec![];
-        for cd in data.child_data() {
-            let child_data = if data.offset() != 0 || data.len() != cd.len() {
-                cd.slice(data.offset(), data.len())
-            } else {
-                cd.clone()
-            };
-            boxed_fields.push(make_array(child_data));
-        }
-        Self { data, boxed_fields }
-    }
-}
-
-impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
-    type Error = ArrowError;
-
-    /// builds a StructArray from a vector of names and arrays.
-    /// This errors if the values have a different length.
-    /// An entry is set to Null when all values are null.
-    fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self> {
-        let values_len = values.len();
-
-        // these will be populated
-        let mut fields = Vec::with_capacity(values_len);
-        let mut child_data = Vec::with_capacity(values_len);
-
-        // len: the size of the arrays.
-        let mut len: Option<usize> = None;
-        // null: the null mask of the arrays.
-        let mut null: Option<Buffer> = None;
-        for (field_name, array) in values {
-            let child_datum = array.data();
-            let child_datum_len = child_datum.len();
-            if let Some(len) = len {
-                if len != child_datum_len {
-                    return Err(ArrowError::InvalidArgumentError(
-                        format!("Array of field \"{}\" has length {}, but previous elements have length {}.
-                        All arrays in every entry in a struct array must have the same length.", field_name, child_datum_len, len)
-                    ));
-                }
-            } else {
-                len = Some(child_datum_len)
-            }
-            child_data.push(child_datum.clone());
-            fields.push(Field::new(
-                field_name,
-                array.data_type().clone(),
-                child_datum.null_buffer().is_some(),
-            ));
-
-            if let Some(child_null_buffer) = child_datum.null_buffer() {
-                let child_datum_offset = child_datum.offset();
-
-                null = Some(if let Some(null_buffer) = &null {
-                    buffer_bin_or(
-                        null_buffer,
-                        0,
-                        child_null_buffer,
-                        child_datum_offset,
-                        child_datum_len,
-                    )
-                } else {
-                    child_null_buffer.bit_slice(child_datum_offset, child_datum_len)
-                });
-            } else if null.is_some() {
-                // when one of the fields has no nulls, them there is no null in the array
-                null = None;
-            }
-        }
-        let len = len.unwrap();
-
-        let mut builder = ArrayData::builder(DataType::Struct(fields))
-            .len(len)
-            .child_data(child_data);
-        if let Some(null_buffer) = null {
-            builder = builder.null_bit_buffer(null_buffer);
-        }
-
-        Ok(StructArray::from(builder.build()))
-    }
-}
-
-impl Array for StructArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the length (i.e., number of elements) of this array
-    fn len(&self) -> usize {
-        self.data_ref().len()
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [StructArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [StructArray].
-    fn get_array_memory_size(&self) -> usize {
-        self.data.get_array_memory_size() + mem::size_of_val(self)
-    }
-}
-
-impl From<Vec<(Field, ArrayRef)>> for StructArray {
-    fn from(v: Vec<(Field, ArrayRef)>) -> Self {
-        let (field_types, field_values): (Vec<_>, Vec<_>) = v.into_iter().unzip();
-
-        // Check the length of the child arrays
-        let length = field_values[0].len();
-        for i in 1..field_values.len() {
-            assert_eq!(
-                length,
-                field_values[i].len(),
-                "all child arrays of a StructArray must have the same length"
-            );
-            assert_eq!(
-                field_types[i].data_type(),
-                field_values[i].data().data_type(),
-                "the field data types must match the array data in a StructArray"
-            )
-        }
-
-        let data = ArrayData::builder(DataType::Struct(field_types))
-            .child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
-            .len(length)
-            .build();
-        Self::from(data)
-    }
-}
-
-impl fmt::Debug for StructArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "StructArray\n[\n")?;
-        for (child_index, name) in self.column_names().iter().enumerate() {
-            let column = self.column(child_index);
-            writeln!(
-                f,
-                "-- child {}: \"{}\" ({:?})",
-                child_index,
-                name,
-                column.data_type()
-            )?;
-            fmt::Debug::fmt(column, f)?;
-            writeln!(f)?;
-        }
-        write!(f, "]")
-    }
-}
-
-impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
-    fn from(pair: (Vec<(Field, ArrayRef)>, Buffer)) -> Self {
-        let (field_types, field_values): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
-
-        // Check the length of the child arrays
-        let length = field_values[0].len();
-        for i in 1..field_values.len() {
-            assert_eq!(
-                length,
-                field_values[i].len(),
-                "all child arrays of a StructArray must have the same length"
-            );
-            assert_eq!(
-                field_types[i].data_type(),
-                field_values[i].data().data_type(),
-                "the field data types must match the array data in a StructArray"
-            )
-        }
-
-        let data = ArrayData::builder(DataType::Struct(field_types))
-            .null_bit_buffer(pair.1)
-            .child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
-            .len(length)
-            .build();
-        Self::from(data)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::{
-        array::BooleanArray, array::Float32Array, array::Float64Array, array::Int32Array,
-        array::StringArray, bitmap::Bitmap,
-    };
-    use crate::{
-        array::Int64Array,
-        datatypes::{DataType, Field},
-    };
-    use crate::{buffer::Buffer, datatypes::ToByteSlice};
-
-    #[test]
-    fn test_struct_array_builder() {
-        let array = BooleanArray::from(vec![false, false, true, true]);
-        let boolean_data = array.data();
-        let array = Int64Array::from(vec![42, 28, 19, 31]);
-        let int_data = array.data();
-
-        let fields = vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Int64, false),
-        ];
-        let struct_array_data = ArrayData::builder(DataType::Struct(fields))
-            .len(4)
-            .add_child_data(boolean_data.clone())
-            .add_child_data(int_data.clone())
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(boolean_data, struct_array.column(0).data());
-        assert_eq!(int_data, struct_array.column(1).data());
-    }
-
-    #[test]
-    fn test_struct_array_from() {
-        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
-        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
-
-        let struct_array = StructArray::from(vec![
-            (
-                Field::new("b", DataType::Boolean, false),
-                boolean.clone() as ArrayRef,
-            ),
-            (
-                Field::new("c", DataType::Int32, false),
-                int.clone() as ArrayRef,
-            ),
-        ]);
-        assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
-        assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
-        assert_eq!(4, struct_array.len());
-        assert_eq!(0, struct_array.null_count());
-        assert_eq!(0, struct_array.offset());
-    }
-
-    /// validates that the in-memory representation follows [the spec](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
-    #[test]
-    fn test_struct_array_from_vec() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-        ]));
-        let ints: ArrayRef =
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
-
-        let arr =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-
-        let struct_data = arr.data();
-        assert_eq!(4, struct_data.len());
-        assert_eq!(1, struct_data.null_count());
-        assert_eq!(
-            // 00001011
-            &Some(Bitmap::from(Buffer::from(&[11_u8]))),
-            struct_data.null_bitmap()
-        );
-
-        let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Buffer::from(&[9_u8]))
-            .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
-            .add_buffer(Buffer::from(b"joemark"))
-            .build();
-
-        let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Buffer::from(&[11_u8]))
-            .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
-            .build();
-
-        assert_eq!(&expected_string_data, arr.column(0).data());
-
-        // TODO: implement equality for ArrayData
-        assert_eq!(expected_int_data.len(), arr.column(1).data().len());
-        assert_eq!(
-            expected_int_data.null_count(),
-            arr.column(1).data().null_count()
-        );
-        assert_eq!(
-            expected_int_data.null_bitmap(),
-            arr.column(1).data().null_bitmap()
-        );
-        let expected_value_buf = expected_int_data.buffers()[0].clone();
-        let actual_value_buf = arr.column(1).data().buffers()[0].clone();
-        for i in 0..expected_int_data.len() {
-            if !expected_int_data.is_null(i) {
-                assert_eq!(
-                    expected_value_buf.as_slice()[i * 4..(i + 1) * 4],
-                    actual_value_buf.as_slice()[i * 4..(i + 1) * 4]
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_struct_array_from_vec_error() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            // 3 elements, not 4
-        ]));
-        let ints: ArrayRef =
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
-
-        let arr =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]);
-
-        match arr {
-            Err(ArrowError::InvalidArgumentError(e)) => {
-                assert!(e.starts_with("Array of field \"f2\" has length 4, but previous elements have length 3."));
-            }
-            _ => panic!("This test got an unexpected error type"),
-        };
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "the field data types must match the array data in a StructArray"
-    )]
-    fn test_struct_array_from_mismatched_types() {
-        StructArray::from(vec![
-            (
-                Field::new("b", DataType::Int16, false),
-                Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                    as Arc<Array>,
-            ),
-            (
-                Field::new("c", DataType::Utf8, false),
-                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
-            ),
-        ]);
-    }
-
-    #[test]
-    fn test_struct_array_slice() {
-        let boolean_data = ArrayData::builder(DataType::Boolean)
-            .len(5)
-            .add_buffer(Buffer::from([0b00010000]))
-            .null_bit_buffer(Buffer::from([0b00010001]))
-            .build();
-        let int_data = ArrayData::builder(DataType::Int32)
-            .len(5)
-            .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
-            .null_bit_buffer(Buffer::from([0b00000110]))
-            .build();
-
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, false));
-        field_types.push(Field::new("b", DataType::Int32, false));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            .add_child_data(boolean_data.clone())
-            .add_child_data(int_data.clone())
-            .null_bit_buffer(Buffer::from([0b00010111]))
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(5, struct_array.len());
-        assert_eq!(1, struct_array.null_count());
-        assert!(struct_array.is_valid(0));
-        assert!(struct_array.is_valid(1));
-        assert!(struct_array.is_valid(2));
-        assert!(struct_array.is_null(3));
-        assert!(struct_array.is_valid(4));
-        assert_eq!(&boolean_data, struct_array.column(0).data());
-        assert_eq!(&int_data, struct_array.column(1).data());
-
-        let c0 = struct_array.column(0);
-        let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(5, c0.len());
-        assert_eq!(3, c0.null_count());
-        assert!(c0.is_valid(0));
-        assert_eq!(false, c0.value(0));
-        assert!(c0.is_null(1));
-        assert!(c0.is_null(2));
-        assert!(c0.is_null(3));
-        assert!(c0.is_valid(4));
-        assert_eq!(true, c0.value(4));
-
-        let c1 = struct_array.column(1);
-        let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c1.len());
-        assert_eq!(3, c1.null_count());
-        assert!(c1.is_null(0));
-        assert!(c1.is_valid(1));
-        assert_eq!(28, c1.value(1));
-        assert!(c1.is_valid(2));
-        assert_eq!(42, c1.value(2));
-        assert!(c1.is_null(3));
-        assert!(c1.is_null(4));
-
-        let sliced_array = struct_array.slice(2, 3);
-        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(3, sliced_array.len());
-        assert_eq!(2, sliced_array.offset());
-        assert_eq!(1, sliced_array.null_count());
-        assert!(sliced_array.is_valid(0));
-        assert!(sliced_array.is_null(1));
-        assert!(sliced_array.is_valid(2));
-
-        let sliced_c0 = sliced_array.column(0);
-        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(3, sliced_c0.len());
-        assert_eq!(2, sliced_c0.offset());
-        assert!(sliced_c0.is_null(0));
-        assert!(sliced_c0.is_null(1));
-        assert!(sliced_c0.is_valid(2));
-        assert_eq!(true, sliced_c0.value(2));
-
-        let sliced_c1 = sliced_array.column(1);
-        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(3, sliced_c1.len());
-        assert_eq!(2, sliced_c1.offset());
-        assert!(sliced_c1.is_valid(0));
-        assert_eq!(42, sliced_c1.value(0));
-        assert!(sliced_c1.is_null(1));
-        assert!(sliced_c1.is_null(2));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "all child arrays of a StructArray must have the same length"
-    )]
-    fn test_invalid_struct_child_array_lengths() {
-        StructArray::from(vec![
-            (
-                Field::new("b", DataType::Float32, false),
-                Arc::new(Float32Array::from(vec![1.1])) as Arc<Array>,
-            ),
-            (
-                Field::new("c", DataType::Float64, false),
-                Arc::new(Float64Array::from(vec![2.2, 3.3])),
-            ),
-        ]);
-    }
-}
diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs
deleted file mode 100644
index 083d5bba15b..00000000000
--- a/rust/arrow/src/array/array_union.rs
+++ /dev/null
@@ -1,831 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains the `UnionArray` type.
-//!
-//! Each slot in a `UnionArray` can have a value chosen from a number of types.  Each of the
-//! possible types are named like the fields of a [`StructArray`](crate::array::StructArray).
-//! A `UnionArray` can have two possible memory layouts, "dense" or "sparse".  For more information
-//! on please see the [specification](https://arrow.apache.org/docs/format/Columnar.html#union-layout).
-//!
-//! Builders are provided for `UnionArray`'s involving primitive types.  `UnionArray`'s of nested
-//! types are also supported but not via `UnionBuilder`, see the tests for examples.
-//!
-//! # Example: Dense Memory Layout
-//!
-//! ```
-//! use arrow::array::UnionBuilder;
-//! use arrow::datatypes::{Float64Type, Int32Type};
-//!
-//! # fn main() -> arrow::error::Result<()> {
-//! let mut builder = UnionBuilder::new_dense(3);
-//! builder.append::<Int32Type>("a", 1).unwrap();
-//! builder.append::<Float64Type>("b", 3.0).unwrap();
-//! builder.append::<Int32Type>("a", 4).unwrap();
-//! let union = builder.build().unwrap();
-//!
-//! assert_eq!(union.type_id(0), 0_i8);
-//! assert_eq!(union.type_id(1), 1_i8);
-//! assert_eq!(union.type_id(2), 0_i8);
-//!
-//! assert_eq!(union.value_offset(0), 0_i32);
-//! assert_eq!(union.value_offset(1), 0_i32);
-//! assert_eq!(union.value_offset(2), 1_i32);
-//!
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! # Example: Sparse Memory Layout
-//! ```
-//! use arrow::array::UnionBuilder;
-//! use arrow::datatypes::{Float64Type, Int32Type};
-//!
-//! # fn main() -> arrow::error::Result<()> {
-//! let mut builder = UnionBuilder::new_sparse(3);
-//! builder.append::<Int32Type>("a", 1).unwrap();
-//! builder.append::<Float64Type>("b", 3.0).unwrap();
-//! builder.append::<Int32Type>("a", 4).unwrap();
-//! let union = builder.build().unwrap();
-//!
-//! assert_eq!(union.type_id(0), 0_i8);
-//! assert_eq!(union.type_id(1), 1_i8);
-//! assert_eq!(union.type_id(2), 0_i8);
-//!
-//! assert_eq!(union.value_offset(0), 0_i32);
-//! assert_eq!(union.value_offset(1), 1_i32);
-//! assert_eq!(union.value_offset(2), 2_i32);
-//!
-//! # Ok(())
-//! # }
-//! ```
-use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef};
-use crate::buffer::Buffer;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-use core::fmt;
-use std::any::Any;
-use std::mem;
-use std::mem::size_of;
-
-/// An Array that can represent slots of varying types.
-pub struct UnionArray {
-    data: ArrayData,
-    boxed_fields: Vec<ArrayRef>,
-}
-
-impl UnionArray {
-    /// Creates a new `UnionArray`.
-    ///
-    /// Accepts type ids, child arrays and optionally offsets (for dense unions) to create
-    /// a new `UnionArray`.  This method makes no attempt to validate the data provided by the
-    /// caller and assumes that each of the components are correct and consistent with each other.
-    /// See `try_new` for an alternative that validates the data provided.
-    ///
-    /// # Data Consistency
-    ///
-    /// The `type_ids` `Buffer` should contain `i8` values.  These values should be greater than
-    /// zero and must be less than the number of children provided in `child_arrays`.  These values
-    /// are used to index into the `child_arrays`.
-    ///
-    /// The `value_offsets` `Buffer` is only provided in the case of a dense union, sparse unions
-    /// should use `None`.  If provided the `value_offsets` `Buffer` should contain `i32` values.
-    /// These values should be greater than zero and must be less than the length of the overall
-    /// array.
-    ///
-    /// In both cases above we use signed integer types to maintain compatibility with other
-    /// Arrow implementations.
-    ///
-    /// In both of the cases above we are accepting `Buffer`'s which are assumed to be representing
-    /// `i8` and `i32` values respectively.  `Buffer` objects are untyped and no attempt is made
-    /// to ensure that the data provided is valid.
-    pub fn new(
-        type_ids: Buffer,
-        value_offsets: Option<Buffer>,
-        child_arrays: Vec<(Field, ArrayRef)>,
-        bitmap_data: Option<Buffer>,
-    ) -> Self {
-        let (field_types, field_values): (Vec<_>, Vec<_>) =
-            child_arrays.into_iter().unzip();
-        let len = type_ids.len();
-        let mut builder = ArrayData::builder(DataType::Union(field_types))
-            .add_buffer(type_ids)
-            .child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
-            .len(len);
-        if let Some(bitmap) = bitmap_data {
-            builder = builder.null_bit_buffer(bitmap)
-        }
-        let data = match value_offsets {
-            Some(b) => builder.add_buffer(b).build(),
-            None => builder.build(),
-        };
-        Self::from(data)
-    }
-    /// Attempts to create a new `UnionArray` and validates the inputs provided.
-    pub fn try_new(
-        type_ids: Buffer,
-        value_offsets: Option<Buffer>,
-        child_arrays: Vec<(Field, ArrayRef)>,
-        bitmap: Option<Buffer>,
-    ) -> Result<Self> {
-        if let Some(b) = &value_offsets {
-            let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len());
-            if ((type_ids.len() - nulls) * 4) != b.len() {
-                return Err(ArrowError::InvalidArgumentError(
-                    "Type Ids and Offsets represent a different number of array slots."
-                        .to_string(),
-                ));
-            }
-        }
-
-        // Check the type_ids
-        let type_id_slice: &[i8] = unsafe { type_ids.typed_data() };
-        let invalid_type_ids = type_id_slice
-            .iter()
-            .filter(|i| *i < &0)
-            .collect::<Vec<&i8>>();
-        if !invalid_type_ids.is_empty() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Type Ids must be positive and cannot be greater than the number of \
-                child arrays, found:\n{:?}",
-                invalid_type_ids
-            )));
-        }
-
-        // Check the value offsets if provided
-        if let Some(offset_buffer) = &value_offsets {
-            let max_len = type_ids.len() as i32;
-            let offsets_slice: &[i32] = unsafe { offset_buffer.typed_data() };
-            let invalid_offsets = offsets_slice
-                .iter()
-                .filter(|i| *i < &0 || *i > &max_len)
-                .collect::<Vec<&i32>>();
-            if !invalid_offsets.is_empty() {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Offsets must be positive and within the length of the Array, \
-                    found:\n{:?}",
-                    invalid_offsets
-                )));
-            }
-        }
-
-        Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap))
-    }
-
-    /// Accesses the child array for `type_id`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the `type_id` provided is less than zero or greater than the number of types
-    /// in the `Union`.
-    pub fn child(&self, type_id: i8) -> ArrayRef {
-        assert!(0 <= type_id);
-        assert!((type_id as usize) < self.boxed_fields.len());
-        self.boxed_fields[type_id as usize].clone()
-    }
-
-    /// Returns the `type_id` for the array slot at `index`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is greater than the length of the array.
-    pub fn type_id(&self, index: usize) -> i8 {
-        assert!(index - self.offset() < self.len());
-        self.data().buffers()[0].as_slice()[index] as i8
-    }
-
-    /// Returns the offset into the underlying values array for the array slot at `index`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is greater than the length of the array.
-    pub fn value_offset(&self, index: usize) -> i32 {
-        assert!(index - self.offset() < self.len());
-        if self.is_dense() {
-            // In format v4 unions had their own validity bitmap and offsets are compressed by omitting null values
-            // Starting with v5 unions don't have a validity bitmap and it's possible to directly index into the offsets buffer
-            let valid_slots = match self.data.null_buffer() {
-                Some(b) => b.count_set_bits_offset(0, index),
-                None => index,
-            };
-            self.data().buffers()[1].as_slice()[valid_slots * size_of::<i32>()] as i32
-        } else {
-            index as i32
-        }
-    }
-
-    /// Returns the array's value at `index`.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is greater than the length of the array.
-    pub fn value(&self, index: usize) -> ArrayRef {
-        let type_id = self.type_id(self.offset() + index);
-        let value_offset = self.value_offset(self.offset() + index) as usize;
-        let child_data = self.boxed_fields[type_id as usize].clone();
-        child_data.slice(value_offset, 1)
-    }
-
-    /// Returns the names of the types in the union.
-    pub fn type_names(&self) -> Vec<&str> {
-        match self.data.data_type() {
-            DataType::Union(fields) => fields
-                .iter()
-                .map(|f| f.name().as_str())
-                .collect::<Vec<&str>>(),
-            _ => unreachable!("Union array's data type is not a union!"),
-        }
-    }
-
-    /// Returns whether the `UnionArray` is dense (or sparse if `false`).
-    fn is_dense(&self) -> bool {
-        self.data().buffers().len() == 2
-    }
-}
-
-impl From<ArrayData> for UnionArray {
-    fn from(data: ArrayData) -> Self {
-        let mut boxed_fields = vec![];
-        for cd in data.child_data() {
-            boxed_fields.push(make_array(cd.clone()));
-        }
-        Self { data, boxed_fields }
-    }
-}
-
-impl Array for UnionArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [UnionArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        let mut size = self.data.get_buffer_memory_size();
-        for field in &self.boxed_fields {
-            size += field.get_buffer_memory_size();
-        }
-        size
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [UnionArray].
-    fn get_array_memory_size(&self) -> usize {
-        let mut size = self.data.get_array_memory_size();
-        size += mem::size_of_val(self) - mem::size_of_val(&self.boxed_fields);
-        for field in &self.boxed_fields {
-            size += field.get_array_memory_size();
-        }
-        size
-    }
-}
-
-impl fmt::Debug for UnionArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let header = if self.is_dense() {
-            "UnionArray(Dense)\n["
-        } else {
-            "UnionArray(Sparse)\n["
-        };
-        writeln!(f, "{}", header)?;
-
-        writeln!(f, "-- type id buffer:")?;
-        writeln!(f, "{:?}", self.data().buffers()[0])?;
-
-        if self.is_dense() {
-            writeln!(f, "-- offsets buffer:")?;
-            writeln!(f, "{:?}", self.data().buffers()[1])?;
-        }
-
-        for (child_index, name) in self.type_names().iter().enumerate() {
-            let column = &self.boxed_fields[child_index];
-            writeln!(
-                f,
-                "-- child {}: \"{}\" ({:?})",
-                child_index,
-                *name,
-                column.data_type()
-            )?;
-            fmt::Debug::fmt(column, f)?;
-            writeln!(f)?;
-        }
-        writeln!(f, "]")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::array::*;
-    use crate::buffer::Buffer;
-    use crate::datatypes::{DataType, Field};
-
-    #[test]
-    fn test_dense_i32() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int32Type>("b", 2).unwrap();
-        builder.append::<Int32Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Int32Type>("c", 5).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        builder.append::<Int32Type>("b", 7).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
-        let expected_value_offsets = vec![0_i32, 0, 0, 1, 1, 2, 1];
-        let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
-
-        // Check type ids
-        assert_eq!(
-            union.data().buffers()[0],
-            Buffer::from_slice_ref(&expected_type_ids)
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets
-        assert_eq!(
-            union.data().buffers()[1],
-            Buffer::from_slice_ref(&expected_value_offsets)
-        );
-        for (i, id) in expected_value_offsets.iter().enumerate() {
-            assert_eq!(&union.value_offset(i), id);
-        }
-
-        // Check data
-        assert_eq!(
-            union.data().child_data()[0].buffers()[0],
-            Buffer::from_slice_ref(&[1_i32, 4, 6])
-        );
-        assert_eq!(
-            union.data().child_data()[1].buffers()[0],
-            Buffer::from_slice_ref(&[2_i32, 7])
-        );
-        assert_eq!(
-            union.data().child_data()[2].buffers()[0],
-            Buffer::from_slice_ref(&[3_i32, 5]),
-        );
-
-        assert_eq!(expected_array_values.len(), union.len());
-        for (i, expected_value) in expected_array_values.iter().enumerate() {
-            assert_eq!(false, union.is_null(i));
-            let slot = union.value(i);
-            let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-            assert_eq!(slot.len(), 1);
-            let value = slot.value(0);
-            assert_eq!(expected_value, &value);
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Int64Type>("c", 5).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        assert_eq!(5, union.len());
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            assert_eq!(false, union.is_null(i));
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(3_i64, value);
-                }
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(5_i64, value);
-                }
-                4 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed_with_nulls() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 10).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        assert_eq!(5, union.len());
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(3_i64, value);
-                }
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(10_i32, value);
-                }
-                3 => assert!(union.is_null(i)),
-                4 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed_with_nulls_and_offset() {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 10).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        let slice = union.slice(2, 3);
-        let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
-
-        assert_eq!(3, new_union.len());
-        for i in 0..new_union.len() {
-            let slot = new_union.value(i);
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(10_i32, value);
-                }
-                1 => assert!(new_union.is_null(i)),
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_dense_mixed_with_str() {
-        let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
-        let int_array = Int32Array::from(vec![5, 6]);
-        let float_array = Float64Array::from(vec![10.0]);
-
-        let type_ids = [1_i8, 0, 0, 2, 0, 1];
-        let value_offsets = [0_i32, 0, 1, 0, 2, 1];
-
-        let type_id_buffer = Buffer::from_slice_ref(&type_ids);
-        let value_offsets_buffer = Buffer::from_slice_ref(&value_offsets);
-
-        let mut children: Vec<(Field, Arc<Array>)> = Vec::new();
-        children.push((
-            Field::new("A", DataType::Utf8, false),
-            Arc::new(string_array),
-        ));
-        children.push((Field::new("B", DataType::Int32, false), Arc::new(int_array)));
-        children.push((
-            Field::new("C", DataType::Float64, false),
-            Arc::new(float_array),
-        ));
-        let array = UnionArray::try_new(
-            type_id_buffer,
-            Some(value_offsets_buffer),
-            children,
-            None,
-        )
-        .unwrap();
-
-        // Check type ids
-        assert_eq!(Buffer::from_slice_ref(&type_ids), array.data().buffers()[0]);
-        for (i, id) in type_ids.iter().enumerate() {
-            assert_eq!(id, &array.type_id(i));
-        }
-
-        // Check offsets
-        assert_eq!(
-            Buffer::from_slice_ref(&value_offsets),
-            array.data().buffers()[1]
-        );
-        for (i, id) in value_offsets.iter().enumerate() {
-            assert_eq!(id, &array.value_offset(i));
-        }
-
-        // Check values
-        assert_eq!(6, array.len());
-
-        let slot = array.value(0);
-        let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
-        assert_eq!(5, value);
-
-        let slot = array.value(1);
-        let value = slot
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .value(0);
-        assert_eq!("foo", value);
-
-        let slot = array.value(2);
-        let value = slot
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .value(0);
-        assert_eq!("bar", value);
-
-        let slot = array.value(3);
-        let value = slot
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap()
-            .value(0);
-        assert!(10.0 - value < f64::EPSILON);
-
-        let slot = array.value(4);
-        let value = slot
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .value(0);
-        assert_eq!("baz", value);
-
-        let slot = array.value(5);
-        let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
-        assert_eq!(6, value);
-    }
-
-    #[test]
-    fn test_sparse_i32() {
-        let mut builder = UnionBuilder::new_sparse(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int32Type>("b", 2).unwrap();
-        builder.append::<Int32Type>("c", 3).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Int32Type>("c", 5).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        builder.append::<Int32Type>("b", 7).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
-        let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
-
-        // Check type ids
-        assert_eq!(
-            Buffer::from_slice_ref(&expected_type_ids),
-            union.data().buffers()[0]
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets, sparse union should only have a single buffer
-        assert_eq!(union.data().buffers().len(), 1);
-
-        // Check data
-        assert_eq!(
-            union.data().child_data()[0].buffers()[0],
-            Buffer::from_slice_ref(&[1_i32, 0, 0, 4, 0, 6, 0]),
-        );
-        assert_eq!(
-            Buffer::from_slice_ref(&[0_i32, 2_i32, 0, 0, 0, 0, 7]),
-            union.data().child_data()[1].buffers()[0]
-        );
-        assert_eq!(
-            Buffer::from_slice_ref(&[0_i32, 0, 3_i32, 0, 5, 0, 0]),
-            union.data().child_data()[2].buffers()[0]
-        );
-
-        assert_eq!(expected_array_values.len(), union.len());
-        for (i, expected_value) in expected_array_values.iter().enumerate() {
-            assert_eq!(false, union.is_null(i));
-            let slot = union.value(i);
-            let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-            assert_eq!(slot.len(), 1);
-            let value = slot.value(0);
-            assert_eq!(expected_value, &value);
-        }
-    }
-
-    #[test]
-    fn test_sparse_mixed() {
-        let mut builder = UnionBuilder::new_sparse(5);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Float64Type>("c", 3.0).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        builder.append::<Float64Type>("c", 5.0).unwrap();
-        builder.append::<Int32Type>("a", 6).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 1, 0, 1, 0];
-
-        // Check type ids
-        assert_eq!(
-            Buffer::from_slice_ref(&expected_type_ids),
-            union.data().buffers()[0]
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
-        assert_eq!(union.data().buffers().len(), 1);
-
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            assert_eq!(false, union.is_null(i));
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(value - 3_f64 < f64::EPSILON);
-                }
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(5_f64 - value < f64::EPSILON);
-                }
-                4 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(6_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_sparse_mixed_with_nulls() {
-        let mut builder = UnionBuilder::new_sparse(5);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Float64Type>("c", 3.0).unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        let union = builder.build().unwrap();
-
-        let expected_type_ids = vec![0_i8, 0, 1, 0];
-
-        // Check type ids
-        assert_eq!(
-            Buffer::from_slice_ref(&expected_type_ids),
-            union.data().buffers()[0]
-        );
-        for (i, id) in expected_type_ids.iter().enumerate() {
-            assert_eq!(id, &union.type_id(i));
-        }
-
-        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
-        assert_eq!(union.data().buffers().len(), 1);
-
-        for i in 0..union.len() {
-            let slot = union.value(i);
-            match i {
-                0 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(1_i32, value);
-                }
-                1 => assert!(union.is_null(i)),
-                2 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(value - 3_f64 < f64::EPSILON);
-                }
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-
-    #[test]
-    fn test_sparse_mixed_with_nulls_and_offset() {
-        let mut builder = UnionBuilder::new_sparse(5);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Float64Type>("c", 3.0).unwrap();
-        builder.append_null().unwrap();
-        builder.append::<Int32Type>("a", 4).unwrap();
-        let union = builder.build().unwrap();
-
-        let slice = union.slice(1, 4);
-        let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
-
-        assert_eq!(4, new_union.len());
-        for i in 0..new_union.len() {
-            let slot = new_union.value(i);
-            match i {
-                0 => assert!(new_union.is_null(i)),
-                1 => {
-                    let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
-                    assert_eq!(false, new_union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert!(value - 3_f64 < f64::EPSILON);
-                }
-                2 => assert!(new_union.is_null(i)),
-                3 => {
-                    let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert_eq!(false, new_union.is_null(i));
-                    assert_eq!(slot.len(), 1);
-                    let value = slot.value(0);
-                    assert_eq!(4_i32, value);
-                }
-                _ => unreachable!(),
-            }
-        }
-    }
-}
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
deleted file mode 100644
index 38df92ebb46..00000000000
--- a/rust/arrow/src/array/builder.rs
+++ /dev/null
@@ -1,3171 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
-//! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
-//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
-//! object.
-
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt;
-use std::marker::PhantomData;
-use std::mem;
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-///  Converts a `MutableBuffer` to a `BufferBuilder<T>`.
-///
-/// `slots` is the number of array slots currently represented in the `MutableBuffer`.
-pub(crate) fn mutable_buffer_to_builder<T: ArrowNativeType>(
-    mutable_buffer: MutableBuffer,
-    slots: usize,
-) -> BufferBuilder<T> {
-    BufferBuilder::<T> {
-        buffer: mutable_buffer,
-        len: slots,
-        _marker: PhantomData,
-    }
-}
-
-///  Converts a `BufferBuilder<T>` into its underlying `MutableBuffer`.
-///
-/// `From` is not implemented because associated type bounds are unstable.
-pub(crate) fn builder_to_mutable_buffer<T: ArrowNativeType>(
-    builder: BufferBuilder<T>,
-) -> MutableBuffer {
-    builder.buffer
-}
-
-/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
-///
-/// A [`Buffer`](crate::buffer::Buffer) is the underlying data
-/// structure of Arrow's [`Arrays`](crate::array::Array).
-///
-/// For all supported types, there are type definitions for the
-/// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
-///
-/// # Example:
-///
-/// ```
-/// use arrow::array::UInt8BufferBuilder;
-///
-/// # fn main() -> arrow::error::Result<()> {
-/// let mut builder = UInt8BufferBuilder::new(100);
-/// builder.append_slice(&[42, 43, 44]);
-/// builder.append(45);
-/// let buffer = builder.finish();
-///
-/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Debug)]
-pub struct BufferBuilder<T: ArrowNativeType> {
-    buffer: MutableBuffer,
-    len: usize,
-    _marker: PhantomData<T>,
-}
-
-impl<T: ArrowNativeType> BufferBuilder<T> {
-    /// Creates a new builder with initial capacity for _at least_ `capacity`
-    /// elements of type `T`.
-    ///
-    /// The capacity can later be manually adjusted with the
-    /// [`reserve()`](BufferBuilder::reserve) method.
-    /// Also the
-    /// [`append()`](BufferBuilder::append),
-    /// [`append_slice()`](BufferBuilder::append_slice) and
-    /// [`advance()`](BufferBuilder::advance)
-    /// methods automatically increase the capacity if needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    ///
-    /// assert!(builder.capacity() >= 10);
-    /// ```
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        let buffer = MutableBuffer::new(capacity * mem::size_of::<T>());
-
-        Self {
-            buffer,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    /// Returns the current number of array elements in the internal buffer.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.len(), 1);
-    /// ```
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the internal buffer is empty.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.is_empty(), false);
-    /// ```
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the actual capacity (number of elements) of the internal buffer.
-    ///
-    /// Note: the internal capacity returned by this method might be larger than
-    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
-    /// functions.
-    pub fn capacity(&self) -> usize {
-        let byte_capacity = self.buffer.capacity();
-        byte_capacity / std::mem::size_of::<T>()
-    }
-
-    /// Increases the number of elements in the internal buffer by `n`
-    /// and resizes the buffer as needed.
-    ///
-    /// The values of the newly added elements are 0.
-    /// This method is usually used when appending `NULL` values to the buffer
-    /// as they still require physical memory space.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.advance(2);
-    ///
-    /// assert_eq!(builder.len(), 2);
-    /// ```
-    #[inline]
-    pub fn advance(&mut self, i: usize) {
-        let new_buffer_len = (self.len + i) * mem::size_of::<T>();
-        self.buffer.resize(new_buffer_len, 0);
-        self.len += i;
-    }
-
-    /// Reserves memory for _at least_ `n` more elements of type `T`.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.reserve(10);
-    ///
-    /// assert!(builder.capacity() >= 20);
-    /// ```
-    #[inline]
-    pub fn reserve(&mut self, n: usize) {
-        self.buffer.reserve(n * mem::size_of::<T>());
-    }
-
-    /// Appends a value of type `T` into the builder,
-    /// growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.len(), 1);
-    /// ```
-    #[inline]
-    pub fn append(&mut self, v: T) {
-        self.reserve(1);
-        self.buffer.push(v);
-        self.len += 1;
-    }
-
-    /// Appends a value of type `T` into the builder N times,
-    /// growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_n(10, 42);
-    ///
-    /// assert_eq!(builder.len(), 10);
-    /// ```
-    #[inline]
-    pub fn append_n(&mut self, n: usize, v: T) {
-        self.reserve(n);
-        for _ in 0..n {
-            self.buffer.push(v);
-        }
-        self.len += n;
-    }
-
-    /// Appends a slice of type `T`, growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_slice(&[42, 44, 46]);
-    ///
-    /// assert_eq!(builder.len(), 3);
-    /// ```
-    #[inline]
-    pub fn append_slice(&mut self, slice: &[T]) {
-        self.buffer.extend_from_slice(slice);
-        self.len += slice.len();
-    }
-
-    /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer).
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_slice(&[42, 44, 46]);
-    ///
-    /// let buffer = builder.finish();
-    ///
-    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
-    /// ```
-    #[inline]
-    pub fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
-    }
-}
-
-#[derive(Debug)]
-pub struct BooleanBufferBuilder {
-    buffer: MutableBuffer,
-    len: usize,
-}
-
-impl BooleanBufferBuilder {
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        let byte_capacity = bit_util::ceil(capacity, 8);
-        let buffer = MutableBuffer::from_len_zeroed(byte_capacity);
-        Self { buffer, len: 0 }
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.buffer.capacity() * 8
-    }
-
-    #[inline]
-    pub fn advance(&mut self, additional: usize) {
-        let new_len = self.len + additional;
-        let new_len_bytes = bit_util::ceil(new_len, 8);
-        if new_len_bytes > self.buffer.len() {
-            self.buffer.resize(new_len_bytes, 0);
-        }
-        self.len = new_len;
-    }
-
-    /// Reserve space to at least `additional` new bits.
-    /// Capacity will be `>= self.len() + additional`.
-    /// New bytes are uninitialized and reading them is undefined behavior.
-    #[inline]
-    pub fn reserve(&mut self, additional: usize) {
-        let capacity = self.len + additional;
-        if capacity > self.capacity() {
-            // convert differential to bytes
-            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
-            self.buffer.reserve(additional);
-        }
-    }
-
-    #[inline]
-    pub fn append(&mut self, v: bool) {
-        self.advance(1);
-        if v {
-            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
-        }
-    }
-
-    #[inline]
-    pub fn append_n(&mut self, additional: usize, v: bool) {
-        self.advance(additional);
-        if additional > 0 && v {
-            let offset = self.len() - additional;
-            (0..additional).for_each(|i| unsafe {
-                bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i)
-            })
-        }
-    }
-
-    #[inline]
-    pub fn append_slice(&mut self, slice: &[bool]) {
-        let additional = slice.len();
-        self.advance(additional);
-
-        let offset = self.len() - additional;
-        for (i, v) in slice.iter().enumerate() {
-            if *v {
-                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
-            }
-        }
-    }
-
-    #[inline]
-    pub fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
-    }
-}
-
-impl From<BooleanBufferBuilder> for Buffer {
-    #[inline]
-    fn from(builder: BooleanBufferBuilder) -> Self {
-        builder.buffer.into()
-    }
-}
-
-/// Trait for dealing with different array builders at runtime
-pub trait ArrayBuilder: Any {
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize;
-
-    /// Returns whether number of array slots is zero
-    fn is_empty(&self) -> bool;
-
-    /// Builds the array
-    fn finish(&mut self) -> ArrayRef;
-
-    /// Returns the builder as a non-mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call non-mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_ref` to get a reference on the specific builder.
-    fn as_any(&self) -> &Any;
-
-    /// Returns the builder as a mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_mut` to get a reference on the specific builder.
-    fn as_any_mut(&mut self) -> &mut Any;
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any>;
-}
-
-///  Array builder for fixed-width primitive types
-#[derive(Debug)]
-pub struct BooleanBuilder {
-    values_builder: BooleanBufferBuilder,
-    bitmap_builder: BooleanBufferBuilder,
-}
-
-impl BooleanBuilder {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
-        Self {
-            values_builder: BooleanBufferBuilder::new(capacity),
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> usize {
-        self.values_builder.capacity()
-    }
-
-    /// Appends a value of type `T` into the builder
-    #[inline]
-    pub fn append_value(&mut self, v: bool) -> Result<()> {
-        self.bitmap_builder.append(true);
-        self.values_builder.append(v);
-        Ok(())
-    }
-
-    /// Appends a null slot into the builder
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.bitmap_builder.append(false);
-        self.values_builder.advance(1);
-        Ok(())
-    }
-
-    /// Appends an `Option<T>` into the builder
-    #[inline]
-    pub fn append_option(&mut self, v: Option<bool>) -> Result<()> {
-        match v {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Appends a slice of type `T` into the builder
-    #[inline]
-    pub fn append_slice(&mut self, v: &[bool]) -> Result<()> {
-        self.bitmap_builder.append_n(v.len(), true);
-        self.values_builder.append_slice(v);
-        Ok(())
-    }
-
-    /// Appends values from a slice of type `T` and a validity boolean slice
-    #[inline]
-    pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> {
-        if values.len() != is_valid.len() {
-            return Err(ArrowError::InvalidArgumentError(
-                "Value and validity lengths must be equal".to_string(),
-            ));
-        }
-        self.bitmap_builder.append_slice(is_valid);
-        self.values_builder.append_slice(values);
-        Ok(())
-    }
-
-    /// Builds the [BooleanArray] and reset this builder.
-    pub fn finish(&mut self) -> BooleanArray {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let null_count = len - null_bit_buffer.count_set_bits();
-        let mut builder = ArrayData::builder(DataType::Boolean)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer);
-        }
-        let data = builder.build();
-        BooleanArray::from(data)
-    }
-}
-
-impl ArrayBuilder for BooleanBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.values_builder.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.values_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-///  Array builder for fixed-width primitive types
-#[derive(Debug)]
-pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
-    values_builder: BufferBuilder<T::Native>,
-    /// We only materialize the builder when we add `false`.
-    /// This optimization is **very** important for performance of `StringBuilder`.
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.values_builder.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.values_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
-        Self {
-            values_builder: BufferBuilder::<T::Native>::new(capacity),
-            bitmap_builder: None,
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> usize {
-        self.values_builder.capacity()
-    }
-
-    /// Appends a value of type `T` into the builder
-    #[inline]
-    pub fn append_value(&mut self, v: T::Native) -> Result<()> {
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append(true);
-        }
-        self.values_builder.append(v);
-        Ok(())
-    }
-
-    /// Appends a null slot into the builder
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.materialize_bitmap_builder();
-        self.bitmap_builder.as_mut().unwrap().append(false);
-        self.values_builder.advance(1);
-        Ok(())
-    }
-
-    /// Appends an `Option<T>` into the builder
-    #[inline]
-    pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
-        match v {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Appends a slice of type `T` into the builder
-    #[inline]
-    pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_n(v.len(), true);
-        }
-        self.values_builder.append_slice(v);
-        Ok(())
-    }
-
-    /// Appends values from a slice of type `T` and a validity boolean slice
-    #[inline]
-    pub fn append_values(
-        &mut self,
-        values: &[T::Native],
-        is_valid: &[bool],
-    ) -> Result<()> {
-        if values.len() != is_valid.len() {
-            return Err(ArrowError::InvalidArgumentError(
-                "Value and validity lengths must be equal".to_string(),
-            ));
-        }
-        if is_valid.iter().any(|v| !*v) {
-            self.materialize_bitmap_builder();
-        }
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_slice(is_valid);
-        }
-        self.values_builder.append_slice(values);
-        Ok(())
-    }
-
-    /// Builds the `PrimitiveArray` and reset this builder.
-    pub fn finish(&mut self) -> PrimitiveArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let mut builder = ArrayData::builder(T::DATA_TYPE)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
-        }
-        let data = builder.build();
-        PrimitiveArray::<T>::from(data)
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let data_type = DataType::Dictionary(
-            Box::new(T::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
-        let mut builder = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
-        }
-        builder = builder.add_child_data(values.data().clone());
-        DictionaryArray::<T>::from(builder.build())
-    }
-
-    fn materialize_bitmap_builder(&mut self) {
-        if self.bitmap_builder.is_some() {
-            return;
-        }
-        let mut b = BooleanBufferBuilder::new(0);
-        b.reserve(self.values_builder.capacity());
-        b.append_n(self.values_builder.len, true);
-        self.bitmap_builder = Some(b);
-    }
-}
-
-///  Array builder for `ListArray`
-#[derive(Debug)]
-pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
-    offsets_builder: BufferBuilder<OffsetSize>,
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: OffsetSize,
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    pub fn new(values_builder: T) -> Self {
-        let capacity = values_builder.len();
-        Self::with_capacity(values_builder, capacity)
-    }
-
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    /// `capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
-        let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
-        let len = OffsetSize::zero();
-        offsets_builder.append(len);
-        Self {
-            offsets_builder,
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            values_builder,
-            len,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
-    for GenericListBuilder<OffsetSize, T>
-where
-    T: 'static,
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.len.to_usize().unwrap()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == OffsetSize::zero()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
-where
-    T: 'static,
-{
-    /// Returns the child array builder as a mutable reference.
-    ///
-    /// This mutable reference can be used to append values into the child array builder,
-    /// but you must call `append` to delimit each distinct list value.
-    pub fn values(&mut self) -> &mut T {
-        &mut self.values_builder
-    }
-
-    /// Finish the current variable-length list array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.offsets_builder
-            .append(OffsetSize::from_usize(self.values_builder.len()).unwrap());
-        self.bitmap_builder.append(is_valid);
-        self.len += OffsetSize::one();
-        Ok(())
-    }
-
-    /// Builds the `ListArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
-        let len = self.len();
-        self.len = OffsetSize::zero();
-        let values_arr = self
-            .values_builder
-            .as_any_mut()
-            .downcast_mut::<T>()
-            .unwrap()
-            .finish();
-        let values_data = values_arr.data();
-
-        let offset_buffer = self.offsets_builder.finish();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        self.offsets_builder.append(self.len);
-        let field = Box::new(Field::new(
-            "item",
-            values_data.data_type().clone(),
-            true, // TODO: find a consistent way of getting this
-        ));
-        let data_type = if OffsetSize::is_large() {
-            DataType::LargeList(field)
-        } else {
-            DataType::List(field)
-        };
-        let data = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(offset_buffer)
-            .add_child_data(values_data.clone())
-            .null_bit_buffer(null_bit_buffer)
-            .build();
-
-        GenericListArray::<OffsetSize>::from(data)
-    }
-}
-
-pub type ListBuilder<T> = GenericListBuilder<i32, T>;
-pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
-
-///  Array builder for `ListArray`
-#[derive(Debug)]
-pub struct FixedSizeListBuilder<T: ArrayBuilder> {
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: usize,
-    list_len: i32,
-}
-
-impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
-    /// Creates a new `FixedSizeListBuilder` from a given values array builder
-    /// `length` is the number of values within each array
-    pub fn new(values_builder: T, length: i32) -> Self {
-        let capacity = values_builder.len();
-        Self::with_capacity(values_builder, length, capacity)
-    }
-
-    /// Creates a new `FixedSizeListBuilder` from a given values array builder
-    /// `length` is the number of values within each array
-    /// `capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self {
-        let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
-        offsets_builder.append(0);
-        Self {
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            values_builder,
-            len: 0,
-            list_len: length,
-        }
-    }
-}
-
-impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
-where
-    T: 'static,
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<T: ArrayBuilder> FixedSizeListBuilder<T>
-where
-    T: 'static,
-{
-    /// Returns the child array builder as a mutable reference.
-    ///
-    /// This mutable reference can be used to append values into the child array builder,
-    /// but you must call `append` to delimit each distinct list value.
-    pub fn values(&mut self) -> &mut T {
-        &mut self.values_builder
-    }
-
-    pub fn value_length(&self) -> i32 {
-        self.list_len
-    }
-
-    /// Finish the current variable-length list array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Builds the `FixedSizeListBuilder` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeListArray {
-        let len = self.len();
-        self.len = 0;
-        let values_arr = self
-            .values_builder
-            .as_any_mut()
-            .downcast_mut::<T>()
-            .unwrap()
-            .finish();
-        let values_data = values_arr.data();
-
-        // check that values_data length is multiple of len if we have data
-        if len != 0 {
-            assert!(
-                values_data.len() / len == self.list_len as usize,
-                "Values of FixedSizeList must have equal lengths, values have length {} and list has {}",
-                values_data.len() / len,
-                self.list_len
-            );
-        }
-
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let data = ArrayData::builder(DataType::FixedSizeList(
-            Box::new(Field::new("item", values_data.data_type().clone(), true)),
-            self.list_len,
-        ))
-        .len(len)
-        .add_child_data(values_data.clone())
-        .null_bit_buffer(null_bit_buffer)
-        .build();
-
-        FixedSizeListArray::from(data)
-    }
-}
-
-///  Array builder for `BinaryArray`
-#[derive(Debug)]
-pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
-pub type BinaryBuilder = GenericBinaryBuilder<i32>;
-pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
-
-#[derive(Debug)]
-pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
-pub type StringBuilder = GenericStringBuilder<i32>;
-pub type LargeStringBuilder = GenericStringBuilder<i64>;
-
-#[derive(Debug)]
-pub struct FixedSizeBinaryBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-}
-
-#[derive(Debug)]
-pub struct DecimalBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-    precision: usize,
-    scale: usize,
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> ArrayBuilder
-    for GenericBinaryBuilder<OffsetSize>
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> ArrayBuilder
-    for GenericStringBuilder<OffsetSize>
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        let a = GenericStringBuilder::<OffsetSize>::finish(self);
-        Arc::new(a)
-    }
-}
-
-impl ArrayBuilder for FixedSizeBinaryBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl ArrayBuilder for DecimalBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
-    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Appends a single byte value into the builder's values array.
-    ///
-    /// Note, when appending individual byte values you must call `append` to delimit each
-    /// distinct list value.
-    #[inline]
-    pub fn append_byte(&mut self, value: u8) -> Result<()> {
-        self.builder.values().append_value(value)?;
-        Ok(())
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `BinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
-        GenericBinaryArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> GenericStringBuilder<OffsetSize> {
-    /// Creates a new `StringBuilder`,
-    /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Creates a new `StringBuilder`,
-    /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    /// `item_capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(data_capacity);
-        Self {
-            builder: GenericListBuilder::with_capacity(values_builder, item_capacity),
-        }
-    }
-
-    /// Appends a string into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the string appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<str>) -> Result<()> {
-        self.builder
-            .values()
-            .append_slice(value.as_ref().as_bytes())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `StringArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericStringArray<OffsetSize> {
-        GenericStringArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl FixedSizeBinaryBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, byte_width: i32) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        if self.builder.value_length() != value.as_ref().len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
-            ));
-        }
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `FixedSizeBinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeBinaryArray {
-        FixedSizeBinaryArray::from(self.builder.finish())
-    }
-}
-
-impl DecimalBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        let byte_width = 16;
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-            precision,
-            scale,
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: i128) -> Result<()> {
-        let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
-            value,
-            self.builder.value_length() as usize,
-        )?;
-        if self.builder.value_length() != value_as_bytes.len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as DecimalBuilder value lengths".to_string()
-            ));
-        }
-        self.builder
-            .values()
-            .append_slice(value_as_bytes.as_slice())?;
-        self.builder.append(true)
-    }
-
-    fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
-        if size > 16 {
-            return Err(ArrowError::InvalidArgumentError(
-                "DecimalBuilder only supports values up to 16 bytes.".to_string(),
-            ));
-        }
-        let res = v.to_le_bytes();
-        let start_byte = 16 - size;
-        Ok(res[start_byte..16].to_vec())
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `DecimalArray` and reset this builder.
-    pub fn finish(&mut self) -> DecimalArray {
-        DecimalArray::from_fixed_size_list_array(
-            self.builder.finish(),
-            self.precision,
-            self.scale,
-        )
-    }
-}
-
-/// Array builder for Struct types.
-///
-/// Note that callers should make sure that methods of all the child field builders are
-/// properly called to maintain the consistency of the data structure.
-pub struct StructBuilder {
-    fields: Vec<Field>,
-    field_builders: Vec<Box<ArrayBuilder>>,
-    bitmap_builder: BooleanBufferBuilder,
-    len: usize,
-}
-
-impl fmt::Debug for StructBuilder {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("StructBuilder")
-            .field("fields", &self.fields)
-            .field("bitmap_builder", &self.bitmap_builder)
-            .field("len", &self.len)
-            .finish()
-    }
-}
-
-impl ArrayBuilder for StructBuilder {
-    /// Returns the number of array slots in the builder.
-    ///
-    /// Note that this always return the first child field builder's length, and it is
-    /// the caller's responsibility to maintain the consistency that all the child field
-    /// builder should have the equal number of elements.
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Builds the array.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-
-    /// Returns the builder as a non-mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call non-mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_ref` to get a reference on the specific builder.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_mut` to get a reference on the specific builder.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-}
-
-/// Returns a builder with capacity `capacity` that corresponds to the datatype `DataType`
-/// This function is useful to construct arrays from an arbitrary vectors with known/expected
-/// schema.
-pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<ArrayBuilder> {
-    match datatype {
-        DataType::Null => unimplemented!(),
-        DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
-        DataType::Int8 => Box::new(Int8Builder::new(capacity)),
-        DataType::Int16 => Box::new(Int16Builder::new(capacity)),
-        DataType::Int32 => Box::new(Int32Builder::new(capacity)),
-        DataType::Int64 => Box::new(Int64Builder::new(capacity)),
-        DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
-        DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
-        DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
-        DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
-        DataType::Float32 => Box::new(Float32Builder::new(capacity)),
-        DataType::Float64 => Box::new(Float64Builder::new(capacity)),
-        DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
-        DataType::FixedSizeBinary(len) => {
-            Box::new(FixedSizeBinaryBuilder::new(capacity, *len))
-        }
-        DataType::Decimal(precision, scale) => {
-            Box::new(DecimalBuilder::new(capacity, *precision, *scale))
-        }
-        DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
-        DataType::Date32 => Box::new(Date32Builder::new(capacity)),
-        DataType::Date64 => Box::new(Date64Builder::new(capacity)),
-        DataType::Time32(TimeUnit::Second) => {
-            Box::new(Time32SecondBuilder::new(capacity))
-        }
-        DataType::Time32(TimeUnit::Millisecond) => {
-            Box::new(Time32MillisecondBuilder::new(capacity))
-        }
-        DataType::Time64(TimeUnit::Microsecond) => {
-            Box::new(Time64MicrosecondBuilder::new(capacity))
-        }
-        DataType::Time64(TimeUnit::Nanosecond) => {
-            Box::new(Time64NanosecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            Box::new(TimestampSecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            Box::new(TimestampMillisecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            Box::new(TimestampMicrosecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            Box::new(TimestampNanosecondBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            Box::new(IntervalYearMonthBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            Box::new(IntervalDayTimeBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            Box::new(DurationSecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            Box::new(DurationMillisecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            Box::new(DurationMicrosecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            Box::new(DurationNanosecondBuilder::new(capacity))
-        }
-        DataType::Struct(fields) => {
-            Box::new(StructBuilder::from_fields(fields.clone(), capacity))
-        }
-        t => panic!("Data type {:?} is not currently supported", t),
-    }
-}
-
-impl StructBuilder {
-    pub fn new(fields: Vec<Field>, field_builders: Vec<Box<ArrayBuilder>>) -> Self {
-        Self {
-            fields,
-            field_builders,
-            bitmap_builder: BooleanBufferBuilder::new(0),
-            len: 0,
-        }
-    }
-
-    pub fn from_fields(fields: Vec<Field>, capacity: usize) -> Self {
-        let mut builders = Vec::with_capacity(fields.len());
-        for field in &fields {
-            builders.push(make_builder(field.data_type(), capacity));
-        }
-        Self::new(fields, builders)
-    }
-
-    /// Returns a mutable reference to the child field builder at index `i`.
-    /// Result will be `None` if the input type `T` provided doesn't match the actual
-    /// field builder's type.
-    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
-        self.field_builders[i].as_any_mut().downcast_mut::<T>()
-    }
-
-    /// Returns the number of fields for the struct this builder is building.
-    pub fn num_fields(&self) -> usize {
-        self.field_builders.len()
-    }
-
-    /// Appends an element (either null or non-null) to the struct. The actual elements
-    /// should be appended for each child sub-array in a consistent way.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Appends a null element to the struct.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `StructArray` and reset this builder.
-    pub fn finish(&mut self) -> StructArray {
-        let mut child_data = Vec::with_capacity(self.field_builders.len());
-        for f in &mut self.field_builders {
-            let arr = f.finish();
-            child_data.push(arr.data().clone());
-        }
-
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let null_count = self.len - null_bit_buffer.count_set_bits();
-        let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
-            .len(self.len)
-            .child_data(child_data);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer);
-        }
-
-        self.len = 0;
-
-        StructArray::from(builder.build())
-    }
-}
-
-/// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`.
-#[derive(Debug)]
-struct FieldData {
-    /// The type id for this field
-    type_id: i8,
-    /// The Arrow data type represented in the `values_buffer`, which is untyped
-    data_type: DataType,
-    /// A buffer containing the values for this field in raw bytes
-    values_buffer: Option<MutableBuffer>,
-    ///  The number of array slots represented by the buffer
-    slots: usize,
-    /// A builder for the bitmap if required (for Sparse Unions)
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl FieldData {
-    /// Creates a new `FieldData`.
-    fn new(
-        type_id: i8,
-        data_type: DataType,
-        bitmap_builder: Option<BooleanBufferBuilder>,
-    ) -> Self {
-        Self {
-            type_id,
-            data_type,
-            values_buffer: Some(MutableBuffer::new(1)),
-            slots: 0,
-            bitmap_builder,
-        }
-    }
-
-    /// Appends a single value to this `FieldData`'s `values_buffer`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_to_values_buffer<T: ArrowPrimitiveType>(
-        &mut self,
-        v: T::Native,
-    ) -> Result<()> {
-        let values_buffer = self
-            .values_buffer
-            .take()
-            .expect("Values buffer was never created");
-        let mut builder: BufferBuilder<T::Native> =
-            mutable_buffer_to_builder(values_buffer, self.slots);
-        builder.append(v);
-        let mutable_buffer = builder_to_mutable_buffer(builder);
-        self.values_buffer = Some(mutable_buffer);
-
-        self.slots += 1;
-        if let Some(b) = &mut self.bitmap_builder {
-            b.append(true)
-        };
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> {
-        if let Some(b) = &mut self.bitmap_builder {
-            let values_buffer = self
-                .values_buffer
-                .take()
-                .expect("Values buffer was never created");
-            let mut builder: BufferBuilder<T::Native> =
-                mutable_buffer_to_builder(values_buffer, self.slots);
-            builder.advance(1);
-            let mutable_buffer = builder_to_mutable_buffer(builder);
-            self.values_buffer = Some(mutable_buffer);
-            self.slots += 1;
-            b.append(false);
-        };
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData` when the type is not known at compile time.
-    ///
-    /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null
-    /// slots to the fields that are not being appended to in the case of sparse unions.  This
-    /// method solves this problem by appending dynamically based on `DataType`.
-    ///
-    /// Note, this method does **not** update the length of the `UnionArray` (this is done by the
-    /// main append operation) and assumes that it is called from a method that is generic over `T`
-    /// where `T` satisfies the bound `ArrowPrimitiveType`.
-    fn append_null_dynamic(&mut self) -> Result<()> {
-        match self.data_type {
-            DataType::Null => unimplemented!(),
-            DataType::Int8 => self.append_null::<Int8Type>()?,
-            DataType::Int16 => self.append_null::<Int16Type>()?,
-            DataType::Int32
-            | DataType::Date32
-            | DataType::Time32(_)
-            | DataType::Interval(IntervalUnit::YearMonth) => {
-                self.append_null::<Int32Type>()?
-            }
-            DataType::Int64
-            | DataType::Timestamp(_, _)
-            | DataType::Date64
-            | DataType::Time64(_)
-            | DataType::Interval(IntervalUnit::DayTime)
-            | DataType::Duration(_) => self.append_null::<Int64Type>()?,
-            DataType::UInt8 => self.append_null::<UInt8Type>()?,
-            DataType::UInt16 => self.append_null::<UInt16Type>()?,
-            DataType::UInt32 => self.append_null::<UInt32Type>()?,
-            DataType::UInt64 => self.append_null::<UInt64Type>()?,
-            DataType::Float32 => self.append_null::<Float32Type>()?,
-            DataType::Float64 => self.append_null::<Float64Type>()?,
-            _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."),
-        };
-        Ok(())
-    }
-}
-
-/// Builder type for creating a new `UnionArray`.
-#[derive(Debug)]
-pub struct UnionBuilder {
-    /// The current number of slots in the array
-    len: usize,
-    /// Maps field names to `FieldData` instances which track the builders for that field
-    fields: HashMap<String, FieldData>,
-    /// Builder to keep track of type ids
-    type_id_builder: Int8BufferBuilder,
-    /// Builder to keep track of offsets (`None` for sparse unions)
-    value_offset_builder: Option<Int32BufferBuilder>,
-    /// Optional builder for null slots
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl UnionBuilder {
-    /// Creates a new dense array builder.
-    pub fn new_dense(capacity: usize) -> Self {
-        Self {
-            len: 0,
-            fields: HashMap::default(),
-            type_id_builder: Int8BufferBuilder::new(capacity),
-            value_offset_builder: Some(Int32BufferBuilder::new(capacity)),
-            bitmap_builder: None,
-        }
-    }
-
-    /// Creates a new sparse array builder.
-    pub fn new_sparse(capacity: usize) -> Self {
-        Self {
-            len: 0,
-            fields: HashMap::default(),
-            type_id_builder: Int8BufferBuilder::new(capacity),
-            value_offset_builder: None,
-            bitmap_builder: None,
-        }
-    }
-
-    /// Appends a null to this builder.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        if self.bitmap_builder.is_none() {
-            let mut builder = BooleanBufferBuilder::new(self.len + 1);
-            for _ in 0..self.len {
-                builder.append(true);
-            }
-            self.bitmap_builder = Some(builder)
-        }
-        self.bitmap_builder
-            .as_mut()
-            .expect("Cannot be None")
-            .append(false);
-
-        self.type_id_builder.append(i8::default());
-
-        // Handle sparse union
-        if self.value_offset_builder.is_none() {
-            for (_, fd) in self.fields.iter_mut() {
-                fd.append_null_dynamic()?;
-            }
-        }
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Appends a value to this builder.
-    #[inline]
-    pub fn append<T: ArrowPrimitiveType>(
-        &mut self,
-        type_name: &str,
-        v: T::Native,
-    ) -> Result<()> {
-        let type_name = type_name.to_string();
-
-        let mut field_data = match self.fields.remove(&type_name) {
-            Some(data) => data,
-            None => match self.value_offset_builder {
-                Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE, None),
-                None => {
-                    let mut fd = FieldData::new(
-                        self.fields.len() as i8,
-                        T::DATA_TYPE,
-                        Some(BooleanBufferBuilder::new(1)),
-                    );
-                    for _ in 0..self.len {
-                        fd.append_null::<T>()?;
-                    }
-                    fd
-                }
-            },
-        };
-        self.type_id_builder.append(field_data.type_id);
-
-        match &mut self.value_offset_builder {
-            // Dense Union
-            Some(offset_builder) => {
-                offset_builder.append(field_data.slots as i32);
-            }
-            // Sparse Union
-            None => {
-                for (name, fd) in self.fields.iter_mut() {
-                    if name != &type_name {
-                        fd.append_null_dynamic()?;
-                    }
-                }
-            }
-        }
-        field_data.append_to_values_buffer::<T>(v)?;
-        self.fields.insert(type_name, field_data);
-
-        // Update the bitmap builder if it exists
-        if let Some(b) = &mut self.bitmap_builder {
-            b.append(true);
-        }
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Builds this builder creating a new `UnionArray`.
-    pub fn build(mut self) -> Result<UnionArray> {
-        let type_id_buffer = self.type_id_builder.finish();
-        let value_offsets_buffer = self.value_offset_builder.map(|mut b| b.finish());
-        let mut children = Vec::new();
-        for (
-            name,
-            FieldData {
-                type_id,
-                data_type,
-                values_buffer,
-                slots,
-                bitmap_builder,
-            },
-        ) in self.fields.into_iter()
-        {
-            let buffer = values_buffer
-                .expect("The `values_buffer` should only ever be None inside the `append` method.")
-                .into();
-            let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
-                .add_buffer(buffer)
-                .len(slots);
-            //                .build();
-            let arr_data_ref = match bitmap_builder {
-                Some(mut bb) => arr_data_builder.null_bit_buffer(bb.finish()).build(),
-                None => arr_data_builder.build(),
-            };
-            let array_ref = make_array(arr_data_ref);
-            children.push((type_id, (Field::new(&name, data_type, false), array_ref)))
-        }
-
-        children.sort_by(|a, b| {
-            a.0.partial_cmp(&b.0)
-                .expect("This will never be None as type ids are always i8 values.")
-        });
-        let children: Vec<_> = children.into_iter().map(|(_, b)| b).collect();
-        let bitmap = self.bitmap_builder.map(|mut b| b.finish());
-
-        UnionArray::try_new(type_id_buffer, value_offsets_buffer, children, bitmap)
-    }
-}
-
-/// Array builder for `DictionaryArray`. For example to map a set of byte indices
-/// to f32 values. Note that the use of a `HashMap` here will not scale to very large
-/// arrays or result in an ordered dictionary.
-#[derive(Debug)]
-pub struct PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    keys_builder: PrimitiveBuilder<K>,
-    values_builder: PrimitiveBuilder<V>,
-    map: HashMap<Box<[u8]>, K::Native>,
-}
-
-impl<K, V> PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder.
-    pub fn new(
-        keys_builder: PrimitiveBuilder<K>,
-        values_builder: PrimitiveBuilder<V>,
-    ) -> Self {
-        Self {
-            keys_builder,
-            values_builder,
-            map: HashMap::new(),
-        }
-    }
-}
-
-impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Returns the builder as an non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as an mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.keys_builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.keys_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<K, V> PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    #[inline]
-    pub fn append(&mut self, value: V::Native) -> Result<K::Native> {
-        if let Some(&key) = self.map.get(value.to_byte_slice()) {
-            // Append existing value.
-            self.keys_builder.append_value(key)?;
-            Ok(key)
-        } else {
-            // Append new value.
-            let key = K::Native::from_usize(self.values_builder.len())
-                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-            self.values_builder.append_value(value)?;
-            self.keys_builder.append_value(key as K::Native)?;
-            self.map.insert(value.to_byte_slice().into(), key);
-            Ok(key)
-        }
-    }
-
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.keys_builder.append_null()
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish(&mut self) -> DictionaryArray<K> {
-        self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
-    }
-}
-
-/// Array builder for `DictionaryArray` that stores Strings. For example to map a set of byte indices
-/// to String values. Note that the use of a `HashMap` here will not scale to very large
-/// arrays or result in an ordered dictionary.
-///
-/// ```
-/// use arrow::{
-///   array::{
-///     Int8Array, StringArray,
-///     PrimitiveBuilder, StringBuilder, StringDictionaryBuilder,
-///   },
-///   datatypes::Int8Type,
-/// };
-///
-/// // Create a dictionary array indexed by bytes whose values are Strings.
-/// // It can thus hold up to 256 distinct string values.
-///
-/// let key_builder = PrimitiveBuilder::<Int8Type>::new(100);
-/// let value_builder = StringBuilder::new(100);
-/// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-///
-/// // The builder builds the dictionary value by value
-/// builder.append("abc").unwrap();
-/// builder.append_null().unwrap();
-/// builder.append("def").unwrap();
-/// builder.append("def").unwrap();
-/// builder.append("abc").unwrap();
-/// let array = builder.finish();
-///
-/// assert_eq!(
-///   array.keys(),
-///   &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
-/// );
-///
-/// // Values are polymorphic and so require a downcast.
-/// let av = array.values();
-/// let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-///
-/// assert_eq!(ava.value(0), "abc");
-/// assert_eq!(ava.value(1), "def");
-///
-/// ```
-#[derive(Debug)]
-pub struct StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    keys_builder: PrimitiveBuilder<K>,
-    values_builder: StringBuilder,
-    map: HashMap<Box<[u8]>, K::Native>,
-}
-
-impl<K> StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder.
-    pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self {
-        Self {
-            keys_builder,
-            values_builder,
-            map: HashMap::new(),
-        }
-    }
-
-    /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary
-    /// which is initialized with the given values.
-    /// The indices of those dictionary values are used as keys.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use arrow::datatypes::Int16Type;
-    /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array};
-    /// use std::convert::TryFrom;
-    ///
-    /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]);
-    ///
-    /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap();
-    /// builder.append("def").unwrap();
-    /// builder.append_null().unwrap();
-    /// builder.append("abc").unwrap();
-    ///
-    /// let dictionary_array = builder.finish();
-    ///
-    /// let keys = dictionary_array.keys();
-    ///
-    /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)]));
-    /// ```
-    pub fn new_with_dictionary(
-        keys_builder: PrimitiveBuilder<K>,
-        dictionary_values: &StringArray,
-    ) -> Result<Self> {
-        let dict_len = dictionary_values.len();
-        let mut values_builder =
-            StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len());
-        let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len);
-        for i in 0..dict_len {
-            if dictionary_values.is_valid(i) {
-                let value = dictionary_values.value(i);
-                map.insert(
-                    value.as_bytes().into(),
-                    K::Native::from_usize(i)
-                        .ok_or(ArrowError::DictionaryKeyOverflowError)?,
-                );
-                values_builder.append_value(value)?;
-            } else {
-                values_builder.append_null()?;
-            }
-        }
-        Ok(Self {
-            keys_builder,
-            values_builder,
-            map,
-        })
-    }
-}
-
-impl<K> ArrayBuilder for StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Returns the builder as an non-mutable `Any` reference.
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    /// Returns the builder as an mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.keys_builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.keys_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<K> StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    pub fn append(&mut self, value: impl AsRef<str>) -> Result<K::Native> {
-        if let Some(&key) = self.map.get(value.as_ref().as_bytes()) {
-            // Append existing value.
-            self.keys_builder.append_value(key)?;
-            Ok(key)
-        } else {
-            // Append new value.
-            let key = K::Native::from_usize(self.values_builder.len())
-                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-            self.values_builder.append_value(value.as_ref())?;
-            self.keys_builder.append_value(key as K::Native)?;
-            self.map.insert(value.as_ref().as_bytes().into(), key);
-            Ok(key)
-        }
-    }
-
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.keys_builder.append_null()
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish(&mut self) -> DictionaryArray<K> {
-        self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::array::Array;
-    use crate::bitmap::Bitmap;
-
-    #[test]
-    fn test_builder_i32_empty() {
-        let mut b = Int32BufferBuilder::new(5);
-        assert_eq!(0, b.len());
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(0, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_alloc_zero_bytes() {
-        let mut b = Int32BufferBuilder::new(0);
-        b.append(123);
-        let a = b.finish();
-        assert_eq!(4, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32() {
-        let mut b = Int32BufferBuilder::new(5);
-        for i in 0..5 {
-            b.append(i);
-        }
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(20, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_grow_buffer() {
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        for i in 0..20 {
-            b.append(i);
-        }
-        assert_eq!(32, b.capacity());
-        let a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_builder_finish() {
-        let mut b = Int32BufferBuilder::new(5);
-        assert_eq!(16, b.capacity());
-        for i in 0..10 {
-            b.append(i);
-        }
-        let mut a = b.finish();
-        assert_eq!(40, a.len());
-        assert_eq!(0, b.len());
-        assert_eq!(0, b.capacity());
-
-        // Try build another buffer after cleaning up.
-        for i in 0..20 {
-            b.append(i)
-        }
-        assert_eq!(32, b.capacity());
-        a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_reserve() {
-        let mut b = UInt8BufferBuilder::new(2);
-        assert_eq!(64, b.capacity());
-        b.reserve(64);
-        assert_eq!(64, b.capacity());
-        b.reserve(65);
-        assert_eq!(128, b.capacity());
-
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        b.reserve(16);
-        assert_eq!(16, b.capacity());
-        b.reserve(17);
-        assert_eq!(32, b.capacity());
-    }
-
-    #[test]
-    fn test_append_slice() {
-        let mut b = UInt8BufferBuilder::new(0);
-        b.append_slice(b"Hello, ");
-        b.append_slice(b"World!");
-        let buffer = b.finish();
-        assert_eq!(13, buffer.len());
-
-        let mut b = Int32BufferBuilder::new(0);
-        b.append_slice(&[32, 54]);
-        let buffer = b.finish();
-        assert_eq!(8, buffer.len());
-    }
-
-    #[test]
-    fn test_append_values() -> Result<()> {
-        let mut a = Int8Builder::new(0);
-        a.append_value(1)?;
-        a.append_null()?;
-        a.append_value(-2)?;
-        assert_eq!(a.len(), 3);
-
-        // append values
-        let values = &[1, 2, 3, 4];
-        let is_valid = &[true, true, false, true];
-        a.append_values(values, is_valid)?;
-
-        assert_eq!(a.len(), 7);
-        let array = a.finish();
-        assert_eq!(array.value(0), 1);
-        assert_eq!(array.is_null(1), true);
-        assert_eq!(array.value(2), -2);
-        assert_eq!(array.value(3), 1);
-        assert_eq!(array.value(4), 2);
-        assert_eq!(array.is_null(5), true);
-        assert_eq!(array.value(6), 4);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_write_bytes() {
-        let mut b = BooleanBufferBuilder::new(4);
-        b.append(false);
-        b.append(true);
-        b.append(false);
-        b.append(true);
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-
-        let mut b = BooleanBufferBuilder::new(4);
-        b.append_slice(&[false, true, false, true]);
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-    }
-
-    #[test]
-    fn test_boolean_array_builder_append_slice() {
-        let arr1 =
-            BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
-
-        let mut builder = BooleanArray::builder(0);
-        builder.append_slice(&[true, false]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(false).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1, arr2);
-    }
-
-    #[test]
-    fn test_boolean_array_builder_append_slice_large() {
-        let arr1 = BooleanArray::from(vec![true; 513]);
-
-        let mut builder = BooleanArray::builder(512);
-        builder.append_slice(&[true; 513]).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1, arr2);
-    }
-
-    #[test]
-    fn test_boolean_builder_increases_buffer_len() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanBufferBuilder::new(8);
-
-        for i in 0..16 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.append(true);
-            } else {
-                builder.append(false);
-            }
-        }
-        let buf2 = builder.finish();
-
-        assert_eq!(buf.len(), buf2.len());
-        assert_eq!(buf.as_slice(), buf2.as_slice());
-    }
-
-    #[test]
-    fn test_primitive_array_builder_i32() {
-        let mut builder = Int32Array::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_date32() {
-        let mut builder = Date32Array::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_timestamp_second() {
-        let mut builder = TimestampSecondArray::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i64, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_bool() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanArray::builder(10);
-        for i in 0..10 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.append_value(true).unwrap();
-            } else {
-                builder.append_value(false).unwrap();
-            }
-        }
-
-        let arr = builder.finish();
-        assert_eq!(&buf, arr.values());
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..10 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_option() {
-        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_option(Some(0)).unwrap();
-        builder.append_option(None).unwrap();
-        builder.append_option(Some(2)).unwrap();
-        builder.append_option(None).unwrap();
-        builder.append_option(Some(4)).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_null() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_value(0).unwrap();
-        builder.append_value(2).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_slice() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_slice(&[0, 2]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_finish() {
-        let mut builder = Int32Builder::new(5);
-        builder.append_slice(&[2, 4, 6, 8]).unwrap();
-        let mut arr = builder.finish();
-        assert_eq!(4, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
-        arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_value(4).unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 3, 6, 8]),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_large_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = LargeListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_value(4).unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
-        assert_eq!(
-            Buffer::from_slice_ref(&[0i64, 3, 6, 8]),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offsets()[2]);
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_large_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = LargeListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offsets()[2]);
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(2));
-        assert_eq!(3, list_array.value_length());
-    }
-
-    #[test]
-    fn test_list_array_builder_finish() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = ListBuilder::new(values_builder);
-
-        builder.values().append_slice(&[1, 2, 3]).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_slice(&[4, 5, 6]).unwrap();
-        builder.append(true).unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.values().append_slice(&[7, 8, 9]).unwrap();
-        builder.append(true).unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder_empty() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        let arr = builder.finish();
-        assert_eq!(0, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder_finish() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        builder.values().append_slice(&[1, 2, 3]).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_slice(&[4, 5, 6]).unwrap();
-        builder.append(true).unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.values().append_slice(&[7, 8, 9]).unwrap();
-        builder.append(true).unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_list_list_array_builder() {
-        let primitive_builder = Int32Builder::new(10);
-        let values_builder = ListBuilder::new(primitive_builder);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
-        builder.values().values().append_value(1).unwrap();
-        builder.values().values().append_value(2).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().values().append_value(3).unwrap();
-        builder.values().values().append_value(4).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.values().values().append_value(5).unwrap();
-        builder.values().values().append_value(6).unwrap();
-        builder.values().values().append_value(7).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().append(false).unwrap();
-        builder.values().values().append_value(8).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.append(false).unwrap();
-
-        builder.values().values().append_value(9).unwrap();
-        builder.values().values().append_value(10).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        let list_array = builder.finish();
-
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 2, 5, 5, 6]),
-            list_array.data().buffers()[0].clone()
-        );
-
-        assert_eq!(6, list_array.values().data().len());
-        assert_eq!(1, list_array.values().data().null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 2, 4, 7, 7, 8, 10]),
-            list_array.values().data().buffers()[0].clone()
-        );
-
-        assert_eq!(10, list_array.values().data().child_data()[0].len());
-        assert_eq!(0, list_array.values().data().child_data()[0].null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
-            list_array.values().data().child_data()[0].buffers()[0].clone()
-        );
-    }
-
-    #[test]
-    fn test_binary_array_builder() {
-        let mut builder = BinaryBuilder::new(20);
-
-        builder.append_byte(b'h').unwrap();
-        builder.append_byte(b'e').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_byte(b'w').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append_byte(b'r').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let binary_array = builder.finish();
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_large_binary_array_builder() {
-        let mut builder = LargeBinaryBuilder::new(20);
-
-        builder.append_byte(b'h').unwrap();
-        builder.append_byte(b'e').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_byte(b'w').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append_byte(b'r').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let binary_array = builder.finish();
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_string_array_builder() {
-        let mut builder = StringBuilder::new(20);
-
-        builder.append_value("hello").unwrap();
-        builder.append(true).unwrap();
-        builder.append_value("world").unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("", string_array.value(1));
-        assert_eq!("world", string_array.value(2));
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(5, string_array.value_length(2));
-    }
-
-    #[test]
-    fn test_fixed_size_binary_builder() {
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-
-        //  [b"hello", null, "arrow"]
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"arrow").unwrap();
-        let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
-
-        assert_eq!(
-            &DataType::FixedSizeBinary(5),
-            fixed_size_binary_array.data_type()
-        );
-        assert_eq!(3, fixed_size_binary_array.len());
-        assert_eq!(1, fixed_size_binary_array.null_count());
-        assert_eq!(10, fixed_size_binary_array.value_offset(2));
-        assert_eq!(5, fixed_size_binary_array.value_length());
-    }
-
-    #[test]
-    fn test_decimal_builder() {
-        let mut builder = DecimalBuilder::new(30, 23, 6);
-
-        builder.append_value(8_887_000_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(-8_887_000_000).unwrap();
-        let decimal_array: DecimalArray = builder.finish();
-
-        assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
-        assert_eq!(3, decimal_array.len());
-        assert_eq!(1, decimal_array.null_count());
-        assert_eq!(32, decimal_array.value_offset(2));
-        assert_eq!(16, decimal_array.value_length());
-    }
-
-    #[test]
-    fn test_string_array_builder_finish() {
-        let mut builder = StringBuilder::new(10);
-
-        builder.append_value("hello").unwrap();
-        builder.append_value("world").unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.append_value("arrow").unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_string_array_builder_append_string() {
-        let mut builder = StringBuilder::new(20);
-
-        let var = "hello".to_owned();
-        builder.append_value(&var).unwrap();
-        builder.append(true).unwrap();
-        builder.append_value("world").unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("", string_array.value(1));
-        assert_eq!("world", string_array.value(2));
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(5, string_array.value_length(2));
-    }
-
-    #[test]
-    fn test_struct_array_builder() {
-        let string_builder = StringBuilder::new(4);
-        let int_builder = Int32Builder::new(4);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Utf8, false));
-        field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
-        fields.push(Field::new("f2", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        assert_eq!(2, builder.num_fields());
-
-        let string_builder = builder
-            .field_builder::<StringBuilder>(0)
-            .expect("builder at field 0 should be string builder");
-        string_builder.append_value("joe").unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_value("mark").unwrap();
-
-        let int_builder = builder
-            .field_builder::<Int32Builder>(1)
-            .expect("builder at field 1 should be int builder");
-        int_builder.append_value(1).unwrap();
-        int_builder.append_value(2).unwrap();
-        int_builder.append_null().unwrap();
-        int_builder.append_value(4).unwrap();
-
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_null().unwrap();
-        builder.append(true).unwrap();
-
-        let arr = builder.finish();
-
-        let struct_data = arr.data();
-        assert_eq!(4, struct_data.len());
-        assert_eq!(1, struct_data.null_count());
-        assert_eq!(
-            &Some(Bitmap::from(Buffer::from(&[11_u8]))),
-            struct_data.null_bitmap()
-        );
-
-        let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Buffer::from(&[9_u8]))
-            .add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
-            .add_buffer(Buffer::from_slice_ref(b"joemark"))
-            .build();
-
-        let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Buffer::from_slice_ref(&[11_u8]))
-            .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
-            .build();
-
-        assert_eq!(&expected_string_data, arr.column(0).data());
-
-        // TODO: implement equality for ArrayData
-        assert_eq!(expected_int_data.len(), arr.column(1).data().len());
-        assert_eq!(
-            expected_int_data.null_count(),
-            arr.column(1).data().null_count()
-        );
-        assert_eq!(
-            expected_int_data.null_bitmap(),
-            arr.column(1).data().null_bitmap()
-        );
-        let expected_value_buf = expected_int_data.buffers()[0].clone();
-        let actual_value_buf = arr.column(1).data().buffers()[0].clone();
-        for i in 0..expected_int_data.len() {
-            if !expected_int_data.is_null(i) {
-                assert_eq!(
-                    expected_value_buf.as_slice()[i * 4..(i + 1) * 4],
-                    actual_value_buf.as_slice()[i * 4..(i + 1) * 4]
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_struct_array_builder_finish() {
-        let int_builder = Int32Builder::new(10);
-        let bool_builder = BooleanBuilder::new(10);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
-        fields.push(Field::new("f2", DataType::Boolean, false));
-        field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        builder
-            .field_builder::<Int32Builder>(0)
-            .unwrap()
-            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-            .unwrap();
-        builder
-            .field_builder::<BooleanBuilder>(1)
-            .unwrap()
-            .append_slice(&[
-                false, true, false, true, false, true, false, true, false, true,
-            ])
-            .unwrap();
-
-        // Append slot values - all are valid.
-        for _ in 0..10 {
-            assert!(builder.append(true).is_ok())
-        }
-
-        assert_eq!(10, builder.len());
-
-        let arr = builder.finish();
-
-        assert_eq!(10, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder
-            .field_builder::<Int32Builder>(0)
-            .unwrap()
-            .append_slice(&[1, 3, 5, 7, 9])
-            .unwrap();
-        builder
-            .field_builder::<BooleanBuilder>(1)
-            .unwrap()
-            .append_slice(&[false, true, false, true, false])
-            .unwrap();
-
-        // Append slot values - all are valid.
-        for _ in 0..5 {
-            assert!(builder.append(true).is_ok())
-        }
-
-        assert_eq!(5, builder.len());
-
-        let arr = builder.finish();
-
-        assert_eq!(5, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_struct_array_builder_from_schema() {
-        let mut fields = Vec::new();
-        fields.push(Field::new("f1", DataType::Float32, false));
-        fields.push(Field::new("f2", DataType::Utf8, false));
-        let mut sub_fields = Vec::new();
-        sub_fields.push(Field::new("g1", DataType::Int32, false));
-        sub_fields.push(Field::new("g2", DataType::Boolean, false));
-        let struct_type = DataType::Struct(sub_fields);
-        fields.push(Field::new("f3", struct_type, false));
-
-        let mut builder = StructBuilder::from_fields(fields, 5);
-        assert_eq!(3, builder.num_fields());
-        assert!(builder.field_builder::<Float32Builder>(0).is_some());
-        assert!(builder.field_builder::<StringBuilder>(1).is_some());
-        assert!(builder.field_builder::<StructBuilder>(2).is_some());
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
-    )]
-    fn test_struct_array_builder_from_schema_unsupported_type() {
-        let mut fields = Vec::new();
-        fields.push(Field::new("f1", DataType::Int16, false));
-        let list_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true)));
-        fields.push(Field::new("f2", list_type, false));
-
-        let _ = StructBuilder::from_fields(fields, 5);
-    }
-
-    #[test]
-    fn test_struct_array_builder_field_builder_type_mismatch() {
-        let int_builder = Int32Builder::new(10);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
-    }
-
-    #[test]
-    fn test_primitive_dictionary_builder() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &UInt8Array::from(vec![Some(0), None, Some(1)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
-        let avs: &[u32] = ava.values();
-
-        assert_eq!(array.is_null(0), false);
-        assert_eq!(array.is_null(1), true);
-        assert_eq!(array.is_null(2), false);
-
-        assert_eq!(avs, &[12345678, 22345678]);
-    }
-
-    #[test]
-    fn test_string_dictionary_builder() {
-        let key_builder = PrimitiveBuilder::<Int8Type>::new(5);
-        let value_builder = StringBuilder::new(2);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-
-        assert_eq!(ava.value(0), "abc");
-        assert_eq!(ava.value(1), "def");
-    }
-
-    #[test]
-    fn test_string_dictionary_builder_with_existing_dictionary() {
-        let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]);
-
-        let key_builder = PrimitiveBuilder::<Int8Type>::new(6);
-        let mut builder =
-            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
-                .unwrap();
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        builder.append("ghi").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-
-        assert_eq!(ava.is_valid(0), false);
-        assert_eq!(ava.value(1), "def");
-        assert_eq!(ava.value(2), "abc");
-        assert_eq!(ava.value(3), "ghi");
-    }
-
-    #[test]
-    fn test_string_dictionary_builder_with_reserved_null_value() {
-        let dictionary: Vec<Option<&str>> = vec![None];
-        let dictionary = StringArray::from(dictionary);
-
-        let key_builder = PrimitiveBuilder::<Int16Type>::new(4);
-        let mut builder =
-            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
-                .unwrap();
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(array.is_null(1), true);
-        assert_eq!(array.is_valid(1), false);
-
-        let keys = array.keys_array();
-
-        assert_eq!(keys.value(0), 1);
-        assert_eq!(keys.is_null(1), true);
-        // zero initialization is currently guaranteed by Buffer allocation and resizing
-        assert_eq!(keys.value(1), 0);
-        assert_eq!(keys.value(2), 2);
-        assert_eq!(keys.value(3), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "DictionaryKeyOverflowError")]
-    fn test_primitive_dictionary_overflow() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(257);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(257);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        // 256 unique keys.
-        for i in 0..256 {
-            builder.append(i + 1000).unwrap();
-        }
-        // Special error if the key overflows (256th entry)
-        builder.append(1257).unwrap();
-    }
-}
diff --git a/rust/arrow/src/array/cast.rs b/rust/arrow/src/array/cast.rs
deleted file mode 100644
index 0477f2831f9..00000000000
--- a/rust/arrow/src/array/cast.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines helper functions for force Array type downcast
-
-use crate::array::*;
-use crate::datatypes::*;
-
-/// Force downcast ArrayRef to PrimitiveArray<T>
-pub fn as_primitive_array<T>(arr: &ArrayRef) -> &PrimitiveArray<T>
-where
-    T: ArrowPrimitiveType,
-{
-    arr.as_any()
-        .downcast_ref::<PrimitiveArray<T>>()
-        .expect("Unable to downcast to primitive array")
-}
-
-/// Force downcast ArrayRef to DictionaryArray<T>
-pub fn as_dictionary_array<T>(arr: &ArrayRef) -> &DictionaryArray<T>
-where
-    T: ArrowDictionaryKeyType,
-{
-    arr.as_any()
-        .downcast_ref::<DictionaryArray<T>>()
-        .expect("Unable to downcast to dictionary array")
-}
-
-#[doc = "Force downcast ArrayRef to GenericListArray"]
-pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &ArrayRef) -> &GenericListArray<S> {
-    arr.as_any()
-        .downcast_ref::<GenericListArray<S>>()
-        .expect("Unable to downcast to list array")
-}
-
-#[doc = "Force downcast ArrayRef to ListArray"]
-#[inline]
-pub fn as_list_array(arr: &ArrayRef) -> &ListArray {
-    as_generic_list_array::<i32>(arr)
-}
-
-#[doc = "Force downcast ArrayRef to LargeListArray"]
-#[inline]
-pub fn as_large_list_array(arr: &ArrayRef) -> &LargeListArray {
-    as_generic_list_array::<i64>(arr)
-}
-
-macro_rules! array_downcast_fn {
-    ($name: ident, $arrty: ty, $arrty_str:expr) => {
-        #[doc = "Force downcast ArrayRef to "]
-        #[doc = $arrty_str]
-        pub fn $name(arr: &ArrayRef) -> &$arrty {
-            arr.as_any().downcast_ref::<$arrty>().expect(concat!(
-                "Unable to downcast to typed array through ",
-                stringify!($name)
-            ))
-        }
-    };
-
-    // use recursive macro to generate dynamic doc string for a given array type
-    ($name: ident, $arrty: ty) => {
-        array_downcast_fn!($name, $arrty, stringify!($arrty));
-    };
-}
-
-array_downcast_fn!(as_string_array, StringArray);
-array_downcast_fn!(as_largestring_array, LargeStringArray);
-array_downcast_fn!(as_boolean_array, BooleanArray);
-array_downcast_fn!(as_null_array, NullArray);
-array_downcast_fn!(as_struct_array, StructArray);
diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs
deleted file mode 100644
index 7ae3858e35c..00000000000
--- a/rust/arrow/src/array/data.rs
+++ /dev/null
@@ -1,679 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
-//! common attributes and operations for Arrow array.
-
-use std::mem;
-use std::sync::Arc;
-
-use crate::datatypes::{DataType, IntervalUnit};
-use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    util::bit_util,
-};
-
-use super::equal::equal;
-
-#[inline]
-pub(crate) fn count_nulls(
-    null_bit_buffer: Option<&Buffer>,
-    offset: usize,
-    len: usize,
-) -> usize {
-    if let Some(buf) = null_bit_buffer {
-        len.checked_sub(buf.count_set_bits_offset(offset, len))
-            .unwrap()
-    } else {
-        0
-    }
-}
-
-/// creates 2 [`MutableBuffer`]s with a given `capacity` (in slots).
-#[inline]
-pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
-    let empty_buffer = MutableBuffer::new(0);
-    match data_type {
-        DataType::Null => [empty_buffer, MutableBuffer::new(0)],
-        DataType::Boolean => {
-            let bytes = bit_util::ceil(capacity, 8);
-            let buffer = MutableBuffer::new(bytes);
-            [buffer, empty_buffer]
-        }
-        DataType::UInt8 => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
-        DataType::UInt16 => [
-            MutableBuffer::new(capacity * mem::size_of::<u16>()),
-            empty_buffer,
-        ],
-        DataType::UInt32 => [
-            MutableBuffer::new(capacity * mem::size_of::<u32>()),
-            empty_buffer,
-        ],
-        DataType::UInt64 => [
-            MutableBuffer::new(capacity * mem::size_of::<u64>()),
-            empty_buffer,
-        ],
-        DataType::Int8 => [
-            MutableBuffer::new(capacity * mem::size_of::<i8>()),
-            empty_buffer,
-        ],
-        DataType::Int16 => [
-            MutableBuffer::new(capacity * mem::size_of::<i16>()),
-            empty_buffer,
-        ],
-        DataType::Int32 => [
-            MutableBuffer::new(capacity * mem::size_of::<i32>()),
-            empty_buffer,
-        ],
-        DataType::Int64 => [
-            MutableBuffer::new(capacity * mem::size_of::<i64>()),
-            empty_buffer,
-        ],
-        DataType::Float32 => [
-            MutableBuffer::new(capacity * mem::size_of::<f32>()),
-            empty_buffer,
-        ],
-        DataType::Float64 => [
-            MutableBuffer::new(capacity * mem::size_of::<f64>()),
-            empty_buffer,
-        ],
-        DataType::Date32 | DataType::Time32(_) => [
-            MutableBuffer::new(capacity * mem::size_of::<i32>()),
-            empty_buffer,
-        ],
-        DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Duration(_)
-        | DataType::Timestamp(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<i64>()),
-            empty_buffer,
-        ],
-        DataType::Interval(IntervalUnit::YearMonth) => [
-            MutableBuffer::new(capacity * mem::size_of::<i32>()),
-            empty_buffer,
-        ],
-        DataType::Interval(IntervalUnit::DayTime) => [
-            MutableBuffer::new(capacity * mem::size_of::<i64>()),
-            empty_buffer,
-        ],
-        DataType::Utf8 | DataType::Binary => {
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
-            // safety: `unsafe` code assumes that this buffer is initialized with one element
-            buffer.push(0i32);
-            [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
-        }
-        DataType::LargeUtf8 | DataType::LargeBinary => {
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
-            // safety: `unsafe` code assumes that this buffer is initialized with one element
-            buffer.push(0i64);
-            [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
-        }
-        DataType::List(_) => {
-            // offset buffer always starts with a zero
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
-            buffer.push(0i32);
-            [buffer, empty_buffer]
-        }
-        DataType::LargeList(_) => {
-            // offset buffer always starts with a zero
-            let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
-            buffer.push(0i64);
-            [buffer, empty_buffer]
-        }
-        DataType::FixedSizeBinary(size) => {
-            [MutableBuffer::new(capacity * *size as usize), empty_buffer]
-        }
-        DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => [
-                MutableBuffer::new(capacity * mem::size_of::<u8>()),
-                empty_buffer,
-            ],
-            DataType::UInt16 => [
-                MutableBuffer::new(capacity * mem::size_of::<u16>()),
-                empty_buffer,
-            ],
-            DataType::UInt32 => [
-                MutableBuffer::new(capacity * mem::size_of::<u32>()),
-                empty_buffer,
-            ],
-            DataType::UInt64 => [
-                MutableBuffer::new(capacity * mem::size_of::<u64>()),
-                empty_buffer,
-            ],
-            DataType::Int8 => [
-                MutableBuffer::new(capacity * mem::size_of::<i8>()),
-                empty_buffer,
-            ],
-            DataType::Int16 => [
-                MutableBuffer::new(capacity * mem::size_of::<i16>()),
-                empty_buffer,
-            ],
-            DataType::Int32 => [
-                MutableBuffer::new(capacity * mem::size_of::<i32>()),
-                empty_buffer,
-            ],
-            DataType::Int64 => [
-                MutableBuffer::new(capacity * mem::size_of::<i64>()),
-                empty_buffer,
-            ],
-            _ => unreachable!(),
-        },
-        DataType::Float16 => unreachable!(),
-        DataType::FixedSizeList(_, _) | DataType::Struct(_) => {
-            [empty_buffer, MutableBuffer::new(0)]
-        }
-        DataType::Decimal(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
-        DataType::Union(_) => unimplemented!(),
-    }
-}
-
-/// Maps 2 [`MutableBuffer`]s into a vector of [Buffer]s whose size depends on `data_type`.
-#[inline]
-pub(crate) fn into_buffers(
-    data_type: &DataType,
-    buffer1: MutableBuffer,
-    buffer2: MutableBuffer,
-) -> Vec<Buffer> {
-    match data_type {
-        DataType::Null | DataType::Struct(_) => vec![],
-        DataType::Utf8
-        | DataType::Binary
-        | DataType::LargeUtf8
-        | DataType::LargeBinary => vec![buffer1.into(), buffer2.into()],
-        _ => vec![buffer1.into()],
-    }
-}
-
-/// An generic representation of Arrow array data which encapsulates common attributes and
-/// operations for Arrow array. Specific operations for different arrays types (e.g.,
-/// primitive, list, struct) are implemented in `Array`.
-#[derive(Debug, Clone)]
-pub struct ArrayData {
-    /// The data type for this array data
-    data_type: DataType,
-
-    /// The number of elements in this array data
-    len: usize,
-
-    /// The number of null elements in this array data
-    null_count: usize,
-
-    /// The offset into this array data, in number of items
-    offset: usize,
-
-    /// The buffers for this array data. Note that depending on the array types, this
-    /// could hold different kinds of buffers (e.g., value buffer, value offset buffer)
-    /// at different positions.
-    buffers: Vec<Buffer>,
-
-    /// The child(ren) of this array. Only non-empty for nested types, currently
-    /// `ListArray` and `StructArray`.
-    child_data: Vec<ArrayData>,
-
-    /// The null bitmap. A `None` value for this indicates all values are non-null in
-    /// this array.
-    null_bitmap: Option<Bitmap>,
-}
-
-pub type ArrayDataRef = Arc<ArrayData>;
-
-impl ArrayData {
-    pub fn new(
-        data_type: DataType,
-        len: usize,
-        null_count: Option<usize>,
-        null_bit_buffer: Option<Buffer>,
-        offset: usize,
-        buffers: Vec<Buffer>,
-        child_data: Vec<ArrayData>,
-    ) -> Self {
-        let null_count = match null_count {
-            None => count_nulls(null_bit_buffer.as_ref(), offset, len),
-            Some(null_count) => null_count,
-        };
-        let null_bitmap = null_bit_buffer.map(Bitmap::from);
-        Self {
-            data_type,
-            len,
-            null_count,
-            offset,
-            buffers,
-            child_data,
-            null_bitmap,
-        }
-    }
-
-    /// Returns a builder to construct a `ArrayData` instance.
-    #[inline]
-    pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
-        ArrayDataBuilder::new(data_type)
-    }
-
-    /// Returns a reference to the data type of this array data
-    #[inline]
-    pub const fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    /// Returns a slice of buffers for this array data
-    pub fn buffers(&self) -> &[Buffer] {
-        &self.buffers[..]
-    }
-
-    /// Returns a slice of children data arrays
-    pub fn child_data(&self) -> &[ArrayData] {
-        &self.child_data[..]
-    }
-
-    /// Returns whether the element at index `i` is null
-    pub fn is_null(&self, i: usize) -> bool {
-        if let Some(ref b) = self.null_bitmap {
-            return !b.is_set(self.offset + i);
-        }
-        false
-    }
-
-    /// Returns a reference to the null bitmap of this array data
-    #[inline]
-    pub const fn null_bitmap(&self) -> &Option<Bitmap> {
-        &self.null_bitmap
-    }
-
-    /// Returns a reference to the null buffer of this array data.
-    pub fn null_buffer(&self) -> Option<&Buffer> {
-        self.null_bitmap().as_ref().map(|b| b.buffer_ref())
-    }
-
-    /// Returns whether the element at index `i` is not null
-    pub fn is_valid(&self, i: usize) -> bool {
-        if let Some(ref b) = self.null_bitmap {
-            return b.is_set(self.offset + i);
-        }
-        true
-    }
-
-    /// Returns the length (i.e., number of elements) of this array
-    #[inline]
-    pub const fn len(&self) -> usize {
-        self.len
-    }
-
-    // Returns whether array data is empty
-    #[inline]
-    pub const fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the offset of this array
-    #[inline]
-    pub const fn offset(&self) -> usize {
-        self.offset
-    }
-
-    /// Returns the total number of nulls in this array
-    #[inline]
-    pub const fn null_count(&self) -> usize {
-        self.null_count
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [ArrayData].
-    pub fn get_buffer_memory_size(&self) -> usize {
-        let mut size = 0;
-        for buffer in &self.buffers {
-            size += buffer.capacity();
-        }
-        if let Some(bitmap) = &self.null_bitmap {
-            size += bitmap.get_buffer_memory_size()
-        }
-        for child in &self.child_data {
-            size += child.get_buffer_memory_size();
-        }
-        size
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [ArrayData].
-    pub fn get_array_memory_size(&self) -> usize {
-        let mut size = 0;
-        // Calculate size of the fields that don't have [get_array_memory_size] method internally.
-        size += mem::size_of_val(self)
-            - mem::size_of_val(&self.buffers)
-            - mem::size_of_val(&self.null_bitmap)
-            - mem::size_of_val(&self.child_data);
-
-        // Calculate rest of the fields top down which contain actual data
-        for buffer in &self.buffers {
-            size += mem::size_of_val(&buffer);
-            size += buffer.capacity();
-        }
-        if let Some(bitmap) = &self.null_bitmap {
-            size += bitmap.get_array_memory_size()
-        }
-        for child in &self.child_data {
-            size += child.get_array_memory_size();
-        }
-
-        size
-    }
-
-    /// Creates a zero-copy slice of itself. This creates a new [ArrayData]
-    /// with a different offset, len and a shifted null bitmap.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `offset + length > self.len()`.
-    pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
-        assert!((offset + length) <= self.len());
-
-        let mut new_data = self.clone();
-
-        new_data.len = length;
-        new_data.offset = offset + self.offset;
-
-        new_data.null_count =
-            count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);
-
-        new_data
-    }
-
-    /// Returns the `buffer` as a slice of type `T` starting at self.offset
-    /// # Panics
-    /// This function panics if:
-    /// * the buffer is not byte-aligned with type T, or
-    /// * the datatype is `Boolean` (it corresponds to a bit-packed buffer where the offset is not applicable)
-    #[inline]
-    pub(crate) fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
-        let values = unsafe { self.buffers[buffer].as_slice().align_to::<T>() };
-        if !values.0.is_empty() || !values.2.is_empty() {
-            panic!("The buffer is not byte-aligned with its interpretation")
-        };
-        assert_ne!(self.data_type, DataType::Boolean);
-        &values.1[self.offset..]
-    }
-
-    /// Returns a new empty [ArrayData] valid for `data_type`.
-    pub(super) fn new_empty(data_type: &DataType) -> Self {
-        let buffers = new_buffers(data_type, 0);
-        let [buffer1, buffer2] = buffers;
-        let buffers = into_buffers(data_type, buffer1, buffer2);
-
-        let child_data = match data_type {
-            DataType::Null
-            | DataType::Boolean
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Timestamp(_, _)
-            | DataType::Utf8
-            | DataType::Binary
-            | DataType::LargeUtf8
-            | DataType::LargeBinary
-            | DataType::Interval(_)
-            | DataType::FixedSizeBinary(_)
-            | DataType::Decimal(_, _) => vec![],
-            DataType::List(field) => {
-                vec![Self::new_empty(field.data_type())]
-            }
-            DataType::FixedSizeList(field, _) => {
-                vec![Self::new_empty(field.data_type())]
-            }
-            DataType::LargeList(field) => {
-                vec![Self::new_empty(field.data_type())]
-            }
-            DataType::Struct(fields) => fields
-                .iter()
-                .map(|field| Self::new_empty(field.data_type()))
-                .collect(),
-            DataType::Union(_) => unimplemented!(),
-            DataType::Dictionary(_, data_type) => {
-                vec![Self::new_empty(data_type)]
-            }
-            DataType::Float16 => unreachable!(),
-        };
-
-        Self::new(data_type.clone(), 0, Some(0), None, 0, buffers, child_data)
-    }
-}
-
-impl PartialEq for ArrayData {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self, other)
-    }
-}
-
-/// Builder for `ArrayData` type
-#[derive(Debug)]
-pub struct ArrayDataBuilder {
-    data_type: DataType,
-    len: usize,
-    null_count: Option<usize>,
-    null_bit_buffer: Option<Buffer>,
-    offset: usize,
-    buffers: Vec<Buffer>,
-    child_data: Vec<ArrayData>,
-}
-
-impl ArrayDataBuilder {
-    #[inline]
-    pub const fn new(data_type: DataType) -> Self {
-        Self {
-            data_type,
-            len: 0,
-            null_count: None,
-            null_bit_buffer: None,
-            offset: 0,
-            buffers: vec![],
-            child_data: vec![],
-        }
-    }
-
-    #[inline]
-    pub const fn len(mut self, n: usize) -> Self {
-        self.len = n;
-        self
-    }
-
-    pub fn null_bit_buffer(mut self, buf: Buffer) -> Self {
-        self.null_bit_buffer = Some(buf);
-        self
-    }
-
-    #[inline]
-    pub const fn offset(mut self, n: usize) -> Self {
-        self.offset = n;
-        self
-    }
-
-    pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
-        self.buffers = v;
-        self
-    }
-
-    pub fn add_buffer(mut self, b: Buffer) -> Self {
-        self.buffers.push(b);
-        self
-    }
-
-    pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
-        self.child_data = v;
-        self
-    }
-
-    pub fn add_child_data(mut self, r: ArrayData) -> Self {
-        self.child_data.push(r);
-        self
-    }
-
-    pub fn build(self) -> ArrayData {
-        ArrayData::new(
-            self.data_type,
-            self.len,
-            self.null_count,
-            self.null_bit_buffer,
-            self.offset,
-            self.buffers,
-            self.child_data,
-        )
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::buffer::Buffer;
-    use crate::util::bit_util;
-
-    #[test]
-    fn test_new() {
-        let arr_data =
-            ArrayData::new(DataType::Boolean, 10, Some(1), None, 2, vec![], vec![]);
-        assert_eq!(10, arr_data.len());
-        assert_eq!(1, arr_data.null_count());
-        assert_eq!(2, arr_data.offset());
-        assert_eq!(0, arr_data.buffers().len());
-        assert_eq!(0, arr_data.child_data().len());
-    }
-
-    #[test]
-    fn test_builder() {
-        let child_arr_data = ArrayData::new(
-            DataType::Int32,
-            5,
-            Some(0),
-            None,
-            0,
-            vec![Buffer::from_slice_ref(&[1i32, 2, 3, 4, 5])],
-            vec![],
-        );
-        let v = vec![0, 1, 2, 3];
-        let b1 = Buffer::from(&v[..]);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(20)
-            .offset(5)
-            .add_buffer(b1)
-            .null_bit_buffer(Buffer::from(vec![
-                0b01011111, 0b10110101, 0b01100011, 0b00011110,
-            ]))
-            .add_child_data(child_arr_data.clone())
-            .build();
-
-        assert_eq!(20, arr_data.len());
-        assert_eq!(10, arr_data.null_count());
-        assert_eq!(5, arr_data.offset());
-        assert_eq!(1, arr_data.buffers().len());
-        assert_eq!(&[0, 1, 2, 3], arr_data.buffers()[0].as_slice());
-        assert_eq!(1, arr_data.child_data().len());
-        assert_eq!(child_arr_data, arr_data.child_data()[0]);
-    }
-
-    #[test]
-    fn test_null_count() {
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(16)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        assert_eq!(13, arr_data.null_count());
-
-        // Test with offset
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(12)
-            .offset(2)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        assert_eq!(10, arr_data.null_count());
-    }
-
-    #[test]
-    fn test_null_buffer_ref() {
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let arr_data = ArrayData::builder(DataType::Int32)
-            .len(16)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        assert!(arr_data.null_buffer().is_some());
-        assert_eq!(&bit_v, arr_data.null_buffer().unwrap().as_slice());
-    }
-
-    #[test]
-    fn test_slice() {
-        let mut bit_v: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut bit_v, 0);
-        bit_util::set_bit(&mut bit_v, 3);
-        bit_util::set_bit(&mut bit_v, 10);
-        let data = ArrayData::builder(DataType::Int32)
-            .len(16)
-            .null_bit_buffer(Buffer::from(bit_v))
-            .build();
-        let new_data = data.slice(1, 15);
-        assert_eq!(data.len() - 1, new_data.len());
-        assert_eq!(1, new_data.offset());
-        assert_eq!(data.null_count(), new_data.null_count());
-
-        // slice of a slice (removes one null)
-        let new_data = new_data.slice(1, 14);
-        assert_eq!(data.len() - 2, new_data.len());
-        assert_eq!(2, new_data.offset());
-        assert_eq!(data.null_count() - 1, new_data.null_count());
-    }
-
-    #[test]
-    fn test_equality() {
-        let int_data = ArrayData::builder(DataType::Int32).build();
-        let float_data = ArrayData::builder(DataType::Float32).build();
-        assert_ne!(int_data, float_data);
-    }
-
-    #[test]
-    fn test_count_nulls() {
-        let null_buffer = Some(Buffer::from(vec![0b00010110, 0b10011111]));
-        let count = count_nulls(null_buffer.as_ref(), 0, 16);
-        assert_eq!(count, 7);
-
-        let count = count_nulls(null_buffer.as_ref(), 4, 8);
-        assert_eq!(count, 3);
-    }
-}
diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs
deleted file mode 100644
index 35c9786e49f..00000000000
--- a/rust/arrow/src/array/equal/boolean.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::util::bit_util::get_bit;
-
-use super::utils::{equal_bits, equal_len};
-
-pub(super) fn boolean_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    mut lhs_start: usize,
-    mut rhs_start: usize,
-    mut len: usize,
-) -> bool {
-    let lhs_values = lhs.buffers()[0].as_slice();
-    let rhs_values = rhs.buffers()[0].as_slice();
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        // Optimize performance for starting offset at u8 boundary.
-        if lhs_start % 8 == 0 && rhs_start % 8 == 0 {
-            let quot = len / 8;
-            if quot > 0
-                && !equal_len(
-                    lhs_values,
-                    rhs_values,
-                    lhs_start / 8 + lhs.offset(),
-                    rhs_start / 8 + rhs.offset(),
-                    quot,
-                )
-            {
-                return false;
-            }
-
-            // Calculate for suffix bits.
-            let rem = len % 8;
-            if rem == 0 {
-                return true;
-            } else {
-                let aligned_bits = len - rem;
-                lhs_start += aligned_bits;
-                rhs_start += aligned_bits;
-                len = rem
-            }
-        }
-
-        equal_bits(
-            lhs_values,
-            rhs_values,
-            lhs_start + lhs.offset(),
-            rhs_start + rhs.offset(),
-            len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-
-        let lhs_start = lhs.offset() + lhs_start;
-        let rhs_start = rhs.offset() + rhs_start;
-
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos);
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos);
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_bits(lhs_values, rhs_values, lhs_pos, rhs_pos, 1)
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs
deleted file mode 100644
index 1ee6ec9b543..00000000000
--- a/rust/arrow/src/array/equal/decimal.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::util::bit_util::get_bit;
-
-use super::utils::equal_len;
-
-pub(super) fn decimal_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let size = match lhs.data_type() {
-        DataType::Decimal(_, _) => 16,
-        _ => unreachable!(),
-    };
-
-    let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
-    let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_len(
-            lhs_values,
-            rhs_values,
-            size * lhs_start,
-            size * rhs_start,
-            size * len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_len(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos * size,
-                        rhs_pos * size,
-                        size, // 1 * size since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/dictionary.rs b/rust/arrow/src/array/equal/dictionary.rs
deleted file mode 100644
index 22add2494d2..00000000000
--- a/rust/arrow/src/array/equal/dictionary.rs
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::ArrowNativeType;
-use crate::util::bit_util::get_bit;
-
-use super::equal_range;
-
-pub(super) fn dictionary_equal<T: ArrowNativeType>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_keys = lhs.buffer::<T>(0);
-    let rhs_keys = rhs.buffer::<T>(0);
-
-    let lhs_values = &lhs.child_data()[0];
-    let rhs_values = &rhs.child_data()[0];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            equal_range(
-                lhs_values,
-                rhs_values,
-                lhs_values.null_buffer(),
-                rhs_values.null_buffer(),
-                lhs_keys[lhs_pos].to_usize().unwrap(),
-                rhs_keys[rhs_pos].to_usize().unwrap(),
-                1,
-            )
-        })
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_range(
-                        lhs_values,
-                        rhs_values,
-                        lhs_values.null_buffer(),
-                        rhs_values.null_buffer(),
-                        lhs_keys[lhs_pos].to_usize().unwrap(),
-                        rhs_keys[rhs_pos].to_usize().unwrap(),
-                        1,
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/fixed_binary.rs b/rust/arrow/src/array/equal/fixed_binary.rs
deleted file mode 100644
index 5f8f93232d5..00000000000
--- a/rust/arrow/src/array/equal/fixed_binary.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::util::bit_util::get_bit;
-
-use super::utils::equal_len;
-
-pub(super) fn fixed_binary_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let size = match lhs.data_type() {
-        DataType::FixedSizeBinary(i) => *i as usize,
-        _ => unreachable!(),
-    };
-
-    let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
-    let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_len(
-            lhs_values,
-            rhs_values,
-            size * lhs_start,
-            size * rhs_start,
-            size * len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_len(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos * size,
-                        rhs_pos * size,
-                        size, // 1 * size since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/fixed_list.rs b/rust/arrow/src/array/equal/fixed_list.rs
deleted file mode 100644
index e708a06efcd..00000000000
--- a/rust/arrow/src/array/equal/fixed_list.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::util::bit_util::get_bit;
-
-use super::equal_range;
-
-pub(super) fn fixed_list_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let size = match lhs.data_type() {
-        DataType::FixedSizeList(_, i) => *i as usize,
-        _ => unreachable!(),
-    };
-
-    let lhs_values = &lhs.child_data()[0];
-    let rhs_values = &rhs.child_data()[0];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_range(
-            lhs_values,
-            rhs_values,
-            lhs_values.null_buffer(),
-            rhs_values.null_buffer(),
-            size * lhs_start,
-            size * rhs_start,
-            size * len,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_range(
-                        lhs_values,
-                        rhs_values,
-                        lhs_values.null_buffer(),
-                        rhs_values.null_buffer(),
-                        lhs_pos * size,
-                        rhs_pos * size,
-                        size, // 1 * size since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/list.rs b/rust/arrow/src/array/equal/list.rs
deleted file mode 100644
index 331cdc7c614..00000000000
--- a/rust/arrow/src/array/equal/list.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
-    array::ArrayData,
-    array::{data::count_nulls, OffsetSizeTrait},
-    buffer::Buffer,
-    util::bit_util::get_bit,
-};
-
-use super::{equal_range, utils::child_logical_null_buffer};
-
-fn lengths_equal<T: OffsetSizeTrait>(lhs: &[T], rhs: &[T]) -> bool {
-    // invariant from `base_equal`
-    debug_assert_eq!(lhs.len(), rhs.len());
-
-    if lhs.is_empty() {
-        return true;
-    }
-
-    if lhs[0] == T::zero() && rhs[0] == T::zero() {
-        return lhs == rhs;
-    };
-
-    // The expensive case, e.g.
-    // [0, 2, 4, 6, 9] == [4, 6, 8, 10, 13]
-    lhs.windows(2)
-        .zip(rhs.windows(2))
-        .all(|(lhs_offsets, rhs_offsets)| {
-            // length of left == length of right
-            (lhs_offsets[1] - lhs_offsets[0]) == (rhs_offsets[1] - rhs_offsets[0])
-        })
-}
-
-#[allow(clippy::too_many_arguments)]
-#[inline]
-fn offset_value_equal<T: OffsetSizeTrait>(
-    lhs_values: &ArrayData,
-    rhs_values: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_offsets: &[T],
-    rhs_offsets: &[T],
-    lhs_pos: usize,
-    rhs_pos: usize,
-    len: usize,
-) -> bool {
-    let lhs_start = lhs_offsets[lhs_pos].to_usize().unwrap();
-    let rhs_start = rhs_offsets[rhs_pos].to_usize().unwrap();
-    let lhs_len = lhs_offsets[lhs_pos + len] - lhs_offsets[lhs_pos];
-    let rhs_len = rhs_offsets[rhs_pos + len] - rhs_offsets[rhs_pos];
-
-    lhs_len == rhs_len
-        && equal_range(
-            lhs_values,
-            rhs_values,
-            lhs_nulls,
-            rhs_nulls,
-            lhs_start,
-            rhs_start,
-            lhs_len.to_usize().unwrap(),
-        )
-}
-
-pub(super) fn list_equal<T: OffsetSizeTrait>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_offsets = lhs.buffer::<T>(0);
-    let rhs_offsets = rhs.buffer::<T>(0);
-
-    // There is an edge-case where a n-length list that has 0 children, results in panics.
-    // For example; an array with offsets [0, 0, 0, 0, 0] has 4 slots, but will have
-    // no valid children.
-    // Under logical equality, the child null bitmap will be an empty buffer, as there are
-    // no child values. This causes panics when trying to count set bits.
-    //
-    // We caught this by chance from an accidental test-case, but due to the nature of this
-    // crash only occuring on list equality checks, we are adding a check here, instead of
-    // on the buffer/bitmap utilities, as a length check would incur a penalty for almost all
-    // other use-cases.
-    //
-    // The solution is to check the number of child values from offsets, and return `true` if
-    // they = 0. Empty arrays are equal, so this is correct.
-    //
-    // It's unlikely that one would create a n-length list array with no values, where n > 0,
-    // however, one is more likely to slice into a list array and get a region that has 0
-    // child values.
-    // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot.
-    let lhs_child_length = lhs_offsets.get(len).unwrap().to_usize().unwrap()
-        - lhs_offsets.first().unwrap().to_usize().unwrap();
-    let rhs_child_length = rhs_offsets.get(len).unwrap().to_usize().unwrap()
-        - rhs_offsets.first().unwrap().to_usize().unwrap();
-
-    if lhs_child_length == 0 && lhs_child_length == rhs_child_length {
-        return true;
-    }
-
-    let lhs_values = &lhs.child_data()[0];
-    let rhs_values = &rhs.child_data()[0];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    // compute the child logical bitmap
-    let child_lhs_nulls =
-        child_logical_null_buffer(lhs, lhs_nulls, lhs.child_data().get(0).unwrap());
-    let child_rhs_nulls =
-        child_logical_null_buffer(rhs, rhs_nulls, rhs.child_data().get(0).unwrap());
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        lengths_equal(
-            &lhs_offsets[lhs_start..lhs_start + len],
-            &rhs_offsets[rhs_start..rhs_start + len],
-        ) && equal_range(
-            lhs_values,
-            rhs_values,
-            child_lhs_nulls.as_ref(),
-            child_rhs_nulls.as_ref(),
-            lhs_offsets[lhs_start].to_usize().unwrap(),
-            rhs_offsets[rhs_start].to_usize().unwrap(),
-            (lhs_offsets[len] - lhs_offsets[lhs_start])
-                .to_usize()
-                .unwrap(),
-        )
-    } else {
-        // get a ref of the parent null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && offset_value_equal::<T>(
-                        lhs_values,
-                        rhs_values,
-                        child_lhs_nulls.as_ref(),
-                        child_rhs_nulls.as_ref(),
-                        lhs_offsets,
-                        rhs_offsets,
-                        lhs_pos,
-                        rhs_pos,
-                        1,
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs
deleted file mode 100644
index 0924fc193a6..00000000000
--- a/rust/arrow/src/array/equal/mod.rs
+++ /dev/null
@@ -1,1277 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Module containing functionality to compute array equality.
-//! This module uses [ArrayData] and does not
-//! depend on dynamic casting of `Array`.
-
-use super::{
-    Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray,
-    FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
-    GenericStringArray, NullArray, OffsetSizeTrait, PrimitiveArray,
-    StringOffsetSizeTrait, StructArray,
-};
-
-use crate::{
-    buffer::Buffer,
-    datatypes::{ArrowPrimitiveType, DataType, IntervalUnit},
-};
-
-mod boolean;
-mod decimal;
-mod dictionary;
-mod fixed_binary;
-mod fixed_list;
-mod list;
-mod null;
-mod primitive;
-mod structure;
-mod utils;
-mod variable_size;
-
-// these methods assume the same type, len and null count.
-// For this reason, they are not exposed and are instead used
-// to build the generic functions below (`equal_range` and `equal`).
-use boolean::boolean_equal;
-use decimal::decimal_equal;
-use dictionary::dictionary_equal;
-use fixed_binary::fixed_binary_equal;
-use fixed_list::fixed_list_equal;
-use list::list_equal;
-use null::null_equal;
-use primitive::primitive_equal;
-use structure::struct_equal;
-use variable_size::variable_sized_equal;
-
-impl PartialEq for dyn Array {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<T: Array> PartialEq<T> for dyn Array {
-    fn eq(&self, other: &T) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for NullArray {
-    fn eq(&self, other: &NullArray) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
-    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for BooleanArray {
-    fn eq(&self, other: &BooleanArray) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for FixedSizeBinaryArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for DecimalArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for FixedSizeListArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-impl PartialEq for StructArray {
-    fn eq(&self, other: &Self) -> bool {
-        equal(self.data(), other.data())
-    }
-}
-
-/// Compares the values of two [ArrayData] starting at `lhs_start` and `rhs_start` respectively
-/// for `len` slots. The null buffers `lhs_nulls` and `rhs_nulls` inherit parent nullability.
-///
-/// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
-/// This then affects the null count of the array, thus the merged nulls are passed separately
-/// as `lhs_nulls` and `rhs_nulls` variables to functions.
-/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
-#[inline]
-fn equal_values(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    match lhs.data_type() {
-        DataType::Null => null_equal(lhs, rhs, lhs_start, rhs_start, len),
-        DataType::Boolean => {
-            boolean_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::UInt8 => primitive_equal::<u8>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::UInt16 => primitive_equal::<u16>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::UInt32 => primitive_equal::<u32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::UInt64 => primitive_equal::<u64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int8 => primitive_equal::<i8>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int16 => primitive_equal::<i16>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int32 => primitive_equal::<i32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Int64 => primitive_equal::<i64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Float32 => primitive_equal::<f32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Float64 => primitive_equal::<f64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => primitive_equal::<i32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Date64
-        | DataType::Interval(IntervalUnit::DayTime)
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_) => primitive_equal::<i64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::Utf8 | DataType::Binary => variable_sized_equal::<i32>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::LargeUtf8 | DataType::LargeBinary => variable_sized_equal::<i64>(
-            lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-        ),
-        DataType::FixedSizeBinary(_) => {
-            fixed_binary_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::Decimal(_, _) => {
-            decimal_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::List(_) => {
-            list_equal::<i32>(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::LargeList(_) => {
-            list_equal::<i64>(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::FixedSizeList(_, _) => {
-            fixed_list_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::Struct(_) => {
-            struct_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        }
-        DataType::Union(_) => unimplemented!("See ARROW-8576"),
-        DataType::Dictionary(data_type, _) => match data_type.as_ref() {
-            DataType::Int8 => dictionary_equal::<i8>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::Int16 => dictionary_equal::<i16>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::Int32 => dictionary_equal::<i32>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::Int64 => dictionary_equal::<i64>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt8 => dictionary_equal::<u8>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt16 => dictionary_equal::<u16>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt32 => dictionary_equal::<u32>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            DataType::UInt64 => dictionary_equal::<u64>(
-                lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len,
-            ),
-            _ => unreachable!(),
-        },
-        DataType::Float16 => unreachable!(),
-    }
-}
-
-fn equal_range(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    utils::base_equal(lhs, rhs)
-        && utils::equal_nulls(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-        && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-}
-
-/// Logically compares two [ArrayData].
-/// Two arrays are logically equal if and only if:
-/// * their data types are equal
-/// * their lengths are equal
-/// * their null counts are equal
-/// * their null bitmaps are equal
-/// * each of their items are equal
-/// two items are equal when their in-memory representation is physically equal (i.e. same bit content).
-/// The physical comparison depend on the data type.
-/// # Panics
-/// This function may panic whenever any of the [ArrayData] does not follow the Arrow specification.
-/// (e.g. wrong number of buffers, buffer `len` does not correspond to the declared `len`)
-pub fn equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
-    let lhs_nulls = lhs.null_buffer();
-    let rhs_nulls = rhs.null_buffer();
-    utils::base_equal(lhs, rhs)
-        && lhs.null_count() == rhs.null_count()
-        && utils::equal_nulls(lhs, rhs, lhs_nulls, rhs_nulls, 0, 0, lhs.len())
-        && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, 0, 0, lhs.len())
-}
-
-#[cfg(test)]
-mod tests {
-    use std::convert::TryFrom;
-    use std::sync::Arc;
-
-    use crate::array::{
-        array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, BooleanArray,
-        DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray,
-        Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
-        StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
-    };
-    use crate::array::{GenericStringArray, Int32Array};
-    use crate::buffer::Buffer;
-    use crate::datatypes::{Field, Int16Type, ToByteSlice};
-
-    use super::*;
-
-    #[test]
-    fn test_null_equal() {
-        let a = NullArray::new(12);
-        let a = a.data();
-        let b = NullArray::new(12);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = NullArray::new(10);
-        let b = b.data();
-        test_equal(&a, &b, false);
-
-        // Test the case where offset != 0
-
-        let a_slice = a.slice(2, 3);
-        let b_slice = b.slice(1, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(5, 4);
-        let b_slice = b.slice(3, 3);
-        test_equal(&a_slice, &b_slice, false);
-    }
-
-    #[test]
-    fn test_boolean_equal() {
-        let a = BooleanArray::from(vec![false, false, true]);
-        let a = a.data();
-        let b = BooleanArray::from(vec![false, false, true]);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = BooleanArray::from(vec![false, false, false]);
-        let b = b.data();
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_boolean_equal_nulls() {
-        let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
-        let a = a.data();
-        let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = BooleanArray::from(vec![None, None, None, Some(true)]);
-        let b = b.data();
-        test_equal(&a, &b, false);
-
-        let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]);
-        let b = b.data();
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_boolean_equal_offset() {
-        let a = BooleanArray::from(vec![false, true, false, true, false, false, true]);
-        let a = a.data();
-        let b =
-            BooleanArray::from(vec![true, false, false, false, true, false, true, true]);
-        let b = b.data();
-        assert_eq!(equal(a, b), false);
-        assert_eq!(equal(b, a), false);
-
-        let a_slice = a.slice(2, 3);
-        let b_slice = b.slice(3, 3);
-        assert_eq!(equal(&a_slice, &b_slice), true);
-        assert_eq!(equal(&b_slice, &a_slice), true);
-
-        let a_slice = a.slice(3, 4);
-        let b_slice = b.slice(4, 4);
-        assert_eq!(equal(&a_slice, &b_slice), false);
-        assert_eq!(equal(&b_slice, &a_slice), false);
-
-        // Test the optimization cases where null_count == 0 and starts at 0 and len >= size_of(u8)
-
-        // Elements fill in `u8`'s exactly.
-        let mut vector = vec![false, false, true, true, true, true, true, true];
-        let a = BooleanArray::from(vector.clone());
-        let a = a.data();
-        let b = BooleanArray::from(vector.clone());
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        // Elements fill in `u8`s + suffix bits.
-        vector.push(true);
-        let a = BooleanArray::from(vector.clone());
-        let a = a.data();
-        let b = BooleanArray::from(vector);
-        let b = b.data();
-        test_equal(&a, &b, true);
-    }
-
-    #[test]
-    fn test_primitive() {
-        let cases = vec![
-            (
-                vec![Some(1), Some(2), Some(3)],
-                vec![Some(1), Some(2), Some(3)],
-                true,
-            ),
-            (
-                vec![Some(1), Some(2), Some(3)],
-                vec![Some(1), Some(2), Some(4)],
-                false,
-            ),
-            (
-                vec![Some(1), Some(2), None],
-                vec![Some(1), Some(2), None],
-                true,
-            ),
-            (
-                vec![Some(1), None, Some(3)],
-                vec![Some(1), Some(2), None],
-                false,
-            ),
-            (
-                vec![Some(1), None, None],
-                vec![Some(1), Some(2), None],
-                false,
-            ),
-        ];
-
-        for (lhs, rhs, expected) in cases {
-            let lhs = Int32Array::from(lhs);
-            let lhs = lhs.data();
-            let rhs = Int32Array::from(rhs);
-            let rhs = rhs.data();
-            test_equal(&lhs, &rhs, expected);
-        }
-    }
-
-    #[test]
-    fn test_primitive_slice() {
-        let cases = vec![
-            (
-                vec![Some(1), Some(2), Some(3)],
-                (0, 1),
-                vec![Some(1), Some(2), Some(3)],
-                (0, 1),
-                true,
-            ),
-            (
-                vec![Some(1), Some(2), Some(3)],
-                (1, 1),
-                vec![Some(1), Some(2), Some(3)],
-                (2, 1),
-                false,
-            ),
-            (
-                vec![Some(1), Some(2), None],
-                (1, 1),
-                vec![Some(1), None, Some(2)],
-                (2, 1),
-                true,
-            ),
-            (
-                vec![None, Some(2), None],
-                (1, 1),
-                vec![None, None, Some(2)],
-                (2, 1),
-                true,
-            ),
-            (
-                vec![Some(1), None, Some(2), None, Some(3)],
-                (2, 2),
-                vec![None, Some(2), None, Some(3)],
-                (1, 2),
-                true,
-            ),
-        ];
-
-        for (lhs, slice_lhs, rhs, slice_rhs, expected) in cases {
-            let lhs = Int32Array::from(lhs);
-            let lhs = lhs.data();
-            let lhs = lhs.slice(slice_lhs.0, slice_lhs.1);
-            let rhs = Int32Array::from(rhs);
-            let rhs = rhs.data();
-            let rhs = rhs.slice(slice_rhs.0, slice_rhs.1);
-
-            test_equal(&lhs, &rhs, expected);
-        }
-    }
-
-    fn test_equal(lhs: &ArrayData, rhs: &ArrayData, expected: bool) {
-        // equality is symmetric
-        assert_eq!(equal(lhs, lhs), true, "\n{:?}\n{:?}", lhs, lhs);
-        assert_eq!(equal(rhs, rhs), true, "\n{:?}\n{:?}", rhs, rhs);
-
-        assert_eq!(equal(lhs, rhs), expected, "\n{:?}\n{:?}", lhs, rhs);
-        assert_eq!(equal(rhs, lhs), expected, "\n{:?}\n{:?}", rhs, lhs);
-    }
-
-    fn binary_cases() -> Vec<(Vec<Option<String>>, Vec<Option<String>>, bool)> {
-        let base = vec![
-            Some("hello".to_owned()),
-            None,
-            None,
-            Some("world".to_owned()),
-            None,
-            None,
-        ];
-        let not_base = vec![
-            Some("hello".to_owned()),
-            Some("foo".to_owned()),
-            None,
-            Some("world".to_owned()),
-            None,
-            None,
-        ];
-        vec![
-            (
-                vec![Some("hello".to_owned()), Some("world".to_owned())],
-                vec![Some("hello".to_owned()), Some("world".to_owned())],
-                true,
-            ),
-            (
-                vec![Some("hello".to_owned()), Some("world".to_owned())],
-                vec![Some("hello".to_owned()), Some("arrow".to_owned())],
-                false,
-            ),
-            (base.clone(), base.clone(), true),
-            (base, not_base, false),
-        ]
-    }
-
-    fn test_generic_string_equal<OffsetSize: StringOffsetSizeTrait>() {
-        let cases = binary_cases();
-
-        for (lhs, rhs, expected) in cases {
-            let lhs = lhs.iter().map(|x| x.as_deref()).collect();
-            let rhs = rhs.iter().map(|x| x.as_deref()).collect();
-            let lhs = GenericStringArray::<OffsetSize>::from_opt_vec(lhs);
-            let lhs = lhs.data();
-            let rhs = GenericStringArray::<OffsetSize>::from_opt_vec(rhs);
-            let rhs = rhs.data();
-            test_equal(lhs, rhs, expected);
-        }
-    }
-
-    #[test]
-    fn test_string_equal() {
-        test_generic_string_equal::<i32>()
-    }
-
-    #[test]
-    fn test_large_string_equal() {
-        test_generic_string_equal::<i64>()
-    }
-
-    fn test_generic_binary_equal<OffsetSize: BinaryOffsetSizeTrait>() {
-        let cases = binary_cases();
-
-        for (lhs, rhs, expected) in cases {
-            let lhs = lhs
-                .iter()
-                .map(|x| x.as_deref().map(|x| x.as_bytes()))
-                .collect();
-            let rhs = rhs
-                .iter()
-                .map(|x| x.as_deref().map(|x| x.as_bytes()))
-                .collect();
-            let lhs = GenericBinaryArray::<OffsetSize>::from_opt_vec(lhs);
-            let lhs = lhs.data();
-            let rhs = GenericBinaryArray::<OffsetSize>::from_opt_vec(rhs);
-            let rhs = rhs.data();
-            test_equal(lhs, rhs, expected);
-        }
-    }
-
-    #[test]
-    fn test_binary_equal() {
-        test_generic_binary_equal::<i32>()
-    }
-
-    #[test]
-    fn test_large_binary_equal() {
-        test_generic_binary_equal::<i64>()
-    }
-
-    #[test]
-    fn test_string_offset() {
-        let a = StringArray::from(vec![Some("a"), None, Some("b")]);
-        let a = a.data();
-        let a = a.slice(2, 1);
-        let b = StringArray::from(vec![Some("b")]);
-        let b = b.data();
-
-        test_equal(&a, &b, true);
-    }
-
-    #[test]
-    fn test_string_offset_larger() {
-        let a = StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]);
-        let a = a.data();
-        let b = StringArray::from(vec![None, Some("b"), None, Some("c")]);
-        let b = b.data();
-
-        test_equal(&a.slice(2, 2), &b.slice(0, 2), false);
-        test_equal(&a.slice(2, 2), &b.slice(1, 2), true);
-        test_equal(&a.slice(2, 2), &b.slice(2, 2), false);
-    }
-
-    #[test]
-    fn test_null() {
-        let a = NullArray::new(2);
-        let a = a.data();
-        let b = NullArray::new(2);
-        let b = b.data();
-        test_equal(&a, &b, true);
-
-        let b = NullArray::new(1);
-        let b = b.data();
-        test_equal(&a, &b, false);
-    }
-
-    fn create_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(data: T) -> ArrayData {
-        let mut builder = ListBuilder::new(Int32Builder::new(10));
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref()).unwrap();
-                builder.append(true).unwrap()
-            } else {
-                builder.append(false).unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_list_equal() {
-        let a = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        test_equal(&a, &b, true);
-
-        let b = create_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_list_null() {
-        let a =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
-        let b =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
-        test_equal(&a, &b, true);
-
-        let b = create_list_array(&[
-            Some(&[1, 2]),
-            None,
-            Some(&[5, 6]),
-            Some(&[3, 4]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, false);
-
-        let b =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
-        test_equal(&a, &b, false);
-
-        // a list where the nullness of values is determined by the list's bitmap
-        let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]);
-        let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(6)
-        .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
-        .add_child_data(c_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00001001]))
-        .build();
-
-        let d_values = Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            Some(4),
-            None,
-            None,
-        ]);
-        let d = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(6)
-        .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
-        .add_child_data(d_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00001001]))
-        .build();
-        test_equal(&c, &d, true);
-    }
-
-    // Test the case where offset != 0
-    #[test]
-    fn test_list_offsets() {
-        let a =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
-        let b =
-            create_list_array(&[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
-        test_equal(&a_slice, &b_slice, true);
-    }
-
-    fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
-        data: T,
-    ) -> ArrayData {
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.append_value(v.as_ref()).unwrap();
-            } else {
-                builder.append_null().unwrap();
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_fixed_size_binary_equal() {
-        let a = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]);
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world")]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"arrow")]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_fixed_size_binary_null() {
-        let a = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]);
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"world")]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), Some(b"world"), None]);
-        test_equal(&a, &b, false);
-
-        let b = create_fixed_size_binary_array(&[Some(b"hello"), None, Some(b"arrow")]);
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_fixed_size_binary_offsets() {
-        // Test the case where offset != 0
-        let a = create_fixed_size_binary_array(&[
-            Some(b"hello"),
-            None,
-            None,
-            Some(b"world"),
-            None,
-            None,
-        ]);
-        let b = create_fixed_size_binary_array(&[
-            Some(b"hello"),
-            None,
-            None,
-            Some(b"arrow"),
-            None,
-            None,
-        ]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(3, 1);
-        let b_slice = b.slice(3, 1);
-        test_equal(&a_slice, &b_slice, false);
-    }
-
-    fn create_decimal_array(data: &[Option<i128>]) -> ArrayData {
-        let mut builder = DecimalBuilder::new(20, 23, 6);
-
-        for d in data {
-            if let Some(v) = d {
-                builder.append_value(*v).unwrap();
-            } else {
-                builder.append_null().unwrap();
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_decimal_equal() {
-        let a = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]);
-        let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]);
-        test_equal(&a, &b, true);
-
-        let b = create_decimal_array(&[Some(15_887_000_000), Some(-8_887_000_000)]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_decimal_null() {
-        let a = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]);
-        let b = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]);
-        test_equal(&a, &b, true);
-
-        let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000), None]);
-        test_equal(&a, &b, false);
-
-        let b = create_decimal_array(&[Some(15_887_000_000), None, Some(-8_887_000_000)]);
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_decimal_offsets() {
-        // Test the case where offset != 0
-        let a = create_decimal_array(&[
-            Some(8_887_000_000),
-            None,
-            None,
-            Some(-8_887_000_000),
-            None,
-            None,
-        ]);
-        let b = create_decimal_array(&[
-            None,
-            Some(8_887_000_000),
-            None,
-            None,
-            Some(15_887_000_000),
-            None,
-            None,
-        ]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(1, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(1, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(5, 1);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(3, 3);
-        let b_slice = b.slice(4, 3);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(1, 3);
-        let b_slice = b.slice(2, 3);
-        test_equal(&a_slice, &b_slice, false);
-
-        let b = create_decimal_array(&[
-            None,
-            None,
-            None,
-            Some(-8_887_000_000),
-            Some(-3_000),
-            None,
-        ]);
-        let a_slice = a.slice(1, 3);
-        let b_slice = b.slice(1, 3);
-        test_equal(&a_slice, &b_slice, true);
-    }
-
-    /// Create a fixed size list of 2 value lengths
-    fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
-        data: T,
-    ) -> ArrayData {
-        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(10), 3);
-
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref()).unwrap();
-                builder.append(true).unwrap()
-            } else {
-                for _ in 0..builder.value_length() {
-                    builder.values().append_null().unwrap();
-                }
-                builder.append(false).unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_fixed_size_list_equal() {
-        let a = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_list_array(&[Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
-        test_equal(&a, &b, false);
-    }
-
-    // Test the case where null_count > 0
-    #[test]
-    fn test_fixed_list_null() {
-        let a = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, true);
-
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            Some(&[7, 8, 9]),
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, false);
-
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[3, 6, 9]),
-            None,
-            None,
-        ]);
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_fixed_list_offsets() {
-        // Test the case where offset != 0
-        let a = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[4, 5, 6]),
-            None,
-            None,
-        ]);
-        let b = create_fixed_size_list_array(&[
-            Some(&[1, 2, 3]),
-            None,
-            None,
-            Some(&[3, 6, 9]),
-            None,
-            None,
-        ]);
-
-        let a_slice = a.slice(0, 3);
-        let b_slice = b.slice(0, 3);
-        test_equal(&a_slice, &b_slice, true);
-
-        let a_slice = a.slice(0, 5);
-        let b_slice = b.slice(0, 5);
-        test_equal(&a_slice, &b_slice, false);
-
-        let a_slice = a.slice(4, 1);
-        let b_slice = b.slice(4, 1);
-        test_equal(&a_slice, &b_slice, true);
-    }
-
-    #[test]
-    fn test_struct_equal() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let a =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-        let a = a.data();
-
-        let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)]).unwrap();
-        let b = b.data();
-
-        test_equal(&a, &b, true);
-    }
-
-    #[test]
-    fn test_struct_equal_null() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-        let ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 0]));
-
-        let a = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(ints.data_ref().clone())
-        .build();
-        let a = crate::array::make_array(a);
-
-        let b = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(ints_non_null.data_ref().clone())
-        .build();
-        let b = crate::array::make_array(b);
-
-        test_equal(a.data_ref(), b.data_ref(), true);
-
-        // test with arrays that are not equal
-        let c_ints_non_null: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 0, 4]));
-        let c = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(c_ints_non_null.data_ref().clone())
-        .build();
-        let c = crate::array::make_array(c);
-
-        test_equal(a.data_ref(), c.data_ref(), false);
-
-        // test a nested struct
-        let a = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f3",
-            a.data_type().clone(),
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00011110]))
-        .len(5)
-        .add_child_data(a.data_ref().clone())
-        .build();
-        let a = crate::array::make_array(a);
-
-        // reconstruct b, but with different data where the first struct is null
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joanne"), // difference
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let b = ArrayData::builder(DataType::Struct(vec![
-            Field::new("f1", DataType::Utf8, true),
-            Field::new("f2", DataType::Int32, true),
-        ]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings.data_ref().clone())
-        .add_child_data(ints_non_null.data_ref().clone())
-        .build();
-
-        let b = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f3",
-            b.data_type().clone(),
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00011110]))
-        .len(5)
-        .add_child_data(b)
-        .build();
-        let b = crate::array::make_array(b);
-
-        test_equal(a.data_ref(), b.data_ref(), true);
-    }
-
-    #[test]
-    fn test_struct_equal_null_variable_size() {
-        // the string arrays differ, but where the struct array is null
-        let strings1: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doel"),
-        ]));
-        let strings2: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joel"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-
-        let a = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f1",
-            DataType::Utf8,
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00001010]))
-        .len(5)
-        .add_child_data(strings1.data_ref().clone())
-        .build();
-        let a = crate::array::make_array(a);
-
-        let b = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f1",
-            DataType::Utf8,
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00001010]))
-        .len(5)
-        .add_child_data(strings2.data_ref().clone())
-        .build();
-        let b = crate::array::make_array(b);
-
-        test_equal(a.data_ref(), b.data_ref(), true);
-
-        // test with arrays that are not equal
-        let strings3: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("mark"),
-            None,
-            None,
-            Some("doe"),
-            Some("joe"),
-        ]));
-        let c = ArrayData::builder(DataType::Struct(vec![Field::new(
-            "f1",
-            DataType::Utf8,
-            true,
-        )]))
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .len(5)
-        .add_child_data(strings3.data_ref().clone())
-        .build();
-        let c = crate::array::make_array(c);
-
-        test_equal(a.data_ref(), c.data_ref(), false);
-    }
-
-    fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData {
-        let values = StringArray::from(values.to_vec());
-        let mut builder = StringDictionaryBuilder::new_with_dictionary(
-            PrimitiveBuilder::<Int16Type>::new(3),
-            &values,
-        )
-        .unwrap();
-        for key in keys {
-            if let Some(v) = key {
-                builder.append(v).unwrap();
-            } else {
-                builder.append_null().unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_dictionary_equal() {
-        // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
-        let a = create_dictionary_array(
-            &["a", "b", "c"],
-            &[Some("a"), Some("b"), Some("a"), Some("c")],
-        );
-        // different representation (values and keys are swapped), same result
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), Some("b"), Some("a"), Some("c")],
-        );
-        test_equal(&a, &b, true);
-
-        // different len
-        let b =
-            create_dictionary_array(&["a", "c", "b"], &[Some("a"), Some("b"), Some("a")]);
-        test_equal(&a, &b, false);
-
-        // different key
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), Some("b"), Some("a"), Some("a")],
-        );
-        test_equal(&a, &b, false);
-
-        // different values, same keys
-        let b = create_dictionary_array(
-            &["a", "b", "d"],
-            &[Some("a"), Some("b"), Some("a"), Some("d")],
-        );
-        test_equal(&a, &b, false);
-    }
-
-    #[test]
-    fn test_dictionary_equal_null() {
-        // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
-        let a = create_dictionary_array(
-            &["a", "b", "c"],
-            &[Some("a"), None, Some("a"), Some("c")],
-        );
-
-        // equal to self
-        test_equal(&a, &a, true);
-
-        // different representation (values and keys are swapped), same result
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), None, Some("a"), Some("c")],
-        );
-        test_equal(&a, &b, true);
-
-        // different null position
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), Some("b"), Some("a"), None],
-        );
-        test_equal(&a, &b, false);
-
-        // different key
-        let b = create_dictionary_array(
-            &["a", "c", "b"],
-            &[Some("a"), None, Some("a"), Some("a")],
-        );
-        test_equal(&a, &b, false);
-
-        // different values, same keys
-        let b = create_dictionary_array(
-            &["a", "b", "d"],
-            &[Some("a"), None, Some("a"), Some("d")],
-        );
-        test_equal(&a, &b, false);
-    }
-}
diff --git a/rust/arrow/src/array/equal/null.rs b/rust/arrow/src/array/equal/null.rs
deleted file mode 100644
index f287a382507..00000000000
--- a/rust/arrow/src/array/equal/null.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-#[inline]
-pub(super) fn null_equal(
-    _lhs: &ArrayData,
-    _rhs: &ArrayData,
-    _lhs_start: usize,
-    _rhs_start: usize,
-    _len: usize,
-) -> bool {
-    // a null buffer's range is always true, as every element is by definition equal (to null).
-    // We only need to compare data_types
-    true
-}
diff --git a/rust/arrow/src/array/equal/primitive.rs b/rust/arrow/src/array/equal/primitive.rs
deleted file mode 100644
index db7587915c8..00000000000
--- a/rust/arrow/src/array/equal/primitive.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::mem::size_of;
-
-use crate::array::{data::count_nulls, ArrayData};
-use crate::buffer::Buffer;
-use crate::util::bit_util::get_bit;
-
-use super::utils::equal_len;
-
-pub(super) fn primitive_equal<T>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let byte_width = size_of::<T>();
-    let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * byte_width..];
-    let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * byte_width..];
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        // without nulls, we just need to compare slices
-        equal_len(
-            lhs_values,
-            rhs_values,
-            lhs_start * byte_width,
-            rhs_start * byte_width,
-            len * byte_width,
-        )
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_len(
-                        lhs_values,
-                        rhs_values,
-                        lhs_pos * byte_width,
-                        rhs_pos * byte_width,
-                        byte_width, // 1 * byte_width since we are comparing a single entry
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs
deleted file mode 100644
index b3cc4029e9e..00000000000
--- a/rust/arrow/src/array/equal/structure.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
-    array::data::count_nulls, array::ArrayData, buffer::Buffer, util::bit_util::get_bit,
-};
-
-use super::{equal_range, utils::child_logical_null_buffer};
-
-/// Compares the values of two [ArrayData] starting at `lhs_start` and `rhs_start` respectively
-/// for `len` slots. The null buffers `lhs_nulls` and `rhs_nulls` inherit parent nullability.
-///
-/// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
-/// This then affects the null count of the array, thus the merged nulls are passed separately
-/// as `lhs_nulls` and `rhs_nulls` variables to functions.
-/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
-fn equal_values(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    lhs.child_data()
-        .iter()
-        .zip(rhs.child_data())
-        .all(|(lhs_values, rhs_values)| {
-            // merge the null data
-            let lhs_merged_nulls = child_logical_null_buffer(lhs, lhs_nulls, lhs_values);
-            let rhs_merged_nulls = child_logical_null_buffer(rhs, rhs_nulls, rhs_values);
-            equal_range(
-                lhs_values,
-                rhs_values,
-                lhs_merged_nulls.as_ref(),
-                rhs_merged_nulls.as_ref(),
-                lhs_start,
-                rhs_start,
-                len,
-            )
-        })
-}
-
-pub(super) fn struct_equal(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    // we have to recalculate null counts from the null buffers
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-    if lhs_null_count == 0 && rhs_null_count == 0 {
-        equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len)
-    } else {
-        // get a ref of the null buffer bytes, to use in testing for nullness
-        let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
-        let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice();
-        // with nulls, we need to compare item by item whenever it is not null
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-            // if both struct and child had no null buffers,
-            let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
-            let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_pos, rhs_pos, 1)
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal/utils.rs b/rust/arrow/src/array/equal/utils.rs
deleted file mode 100644
index d0108d23649..00000000000
--- a/rust/arrow/src/array/equal/utils.rs
+++ /dev/null
@@ -1,264 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{data::count_nulls, ArrayData, OffsetSizeTrait};
-use crate::bitmap::Bitmap;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::DataType;
-use crate::util::bit_util;
-
-// whether bits along the positions are equal
-// `lhs_start`, `rhs_start` and `len` are _measured in bits_.
-#[inline]
-pub(super) fn equal_bits(
-    lhs_values: &[u8],
-    rhs_values: &[u8],
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    (0..len).all(|i| {
-        bit_util::get_bit(lhs_values, lhs_start + i)
-            == bit_util::get_bit(rhs_values, rhs_start + i)
-    })
-}
-
-#[inline]
-pub(super) fn equal_nulls(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-    if lhs_null_count > 0 || rhs_null_count > 0 {
-        let lhs_values = lhs_nulls.unwrap().as_slice();
-        let rhs_values = rhs_nulls.unwrap().as_slice();
-        equal_bits(
-            lhs_values,
-            rhs_values,
-            lhs_start + lhs.offset(),
-            rhs_start + rhs.offset(),
-            len,
-        )
-    } else {
-        true
-    }
-}
-
-#[inline]
-pub(super) fn base_equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
-    lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len()
-}
-
-// whether the two memory regions are equal
-#[inline]
-pub(super) fn equal_len(
-    lhs_values: &[u8],
-    rhs_values: &[u8],
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    lhs_values[lhs_start..(lhs_start + len)] == rhs_values[rhs_start..(rhs_start + len)]
-}
-
-/// Computes the logical validity bitmap of the array data using the
-/// parent's array data. The parent should be a list or struct, else
-/// the logical bitmap of the array is returned unaltered.
-///
-/// Parent data is passed along with the parent's logical bitmap, as
-/// nested arrays could have a logical bitmap different to the physical
-/// one on the `ArrayData`.
-pub(super) fn child_logical_null_buffer(
-    parent_data: &ArrayData,
-    logical_null_buffer: Option<&Buffer>,
-    child_data: &ArrayData,
-) -> Option<Buffer> {
-    let parent_len = parent_data.len();
-    let parent_bitmap = logical_null_buffer
-        .cloned()
-        .map(Bitmap::from)
-        .unwrap_or_else(|| {
-            let ceil = bit_util::ceil(parent_len, 8);
-            Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
-        });
-    let self_null_bitmap = child_data.null_bitmap().clone().unwrap_or_else(|| {
-        let ceil = bit_util::ceil(child_data.len(), 8);
-        Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
-    });
-    match parent_data.data_type() {
-        DataType::List(_) => Some(logical_list_bitmap::<i32>(
-            parent_data,
-            parent_bitmap,
-            self_null_bitmap,
-        )),
-        DataType::LargeList(_) => Some(logical_list_bitmap::<i64>(
-            parent_data,
-            parent_bitmap,
-            self_null_bitmap,
-        )),
-        DataType::FixedSizeList(_, len) => {
-            let len = *len as usize;
-            let array_offset = parent_data.offset();
-            let bitmap_len = bit_util::ceil(parent_len * len, 8);
-            let mut buffer = MutableBuffer::from_len_zeroed(bitmap_len);
-            let mut null_slice = buffer.as_slice_mut();
-            (array_offset..parent_len + array_offset).for_each(|index| {
-                let start = index * len;
-                let end = start + len;
-                let mask = parent_bitmap.is_set(index);
-                (start..end).for_each(|child_index| {
-                    if mask && self_null_bitmap.is_set(child_index) {
-                        bit_util::set_bit(&mut null_slice, child_index);
-                    }
-                });
-            });
-            Some(buffer.into())
-        }
-        DataType::Struct(_) => {
-            // Arrow implementations are free to pad data, which can result in null buffers not
-            // having the same length.
-            // Rust bitwise comparisons will return an error if left AND right is performed on
-            // buffers of different length.
-            // This might be a valid case during integration testing, where we read Arrow arrays
-            // from IPC data, which has padding.
-            //
-            // We first perform a bitwise comparison, and if there is an error, we revert to a
-            // slower method that indexes into the buffers one-by-one.
-            let result = &parent_bitmap & &self_null_bitmap;
-            if let Ok(bitmap) = result {
-                return Some(bitmap.bits);
-            }
-            // slow path
-            let array_offset = parent_data.offset();
-            let mut buffer = MutableBuffer::new_null(parent_len);
-            let mut null_slice = buffer.as_slice_mut();
-            (0..parent_len).for_each(|index| {
-                if parent_bitmap.is_set(index + array_offset)
-                    && self_null_bitmap.is_set(index + array_offset)
-                {
-                    bit_util::set_bit(&mut null_slice, index);
-                }
-            });
-            Some(buffer.into())
-        }
-        DataType::Union(_) => {
-            unimplemented!("Logical equality not yet implemented for union arrays")
-        }
-        DataType::Dictionary(_, _) => {
-            unimplemented!("Logical equality not yet implemented for nested dictionaries")
-        }
-        data_type => panic!("Data type {:?} is not a supported nested type", data_type),
-    }
-}
-
-// Calculate a list child's logical bitmap/buffer
-#[inline]
-fn logical_list_bitmap<OffsetSize: OffsetSizeTrait>(
-    parent_data: &ArrayData,
-    parent_bitmap: Bitmap,
-    child_bitmap: Bitmap,
-) -> Buffer {
-    let offsets = parent_data.buffer::<OffsetSize>(0);
-    let offset_start = offsets.first().unwrap().to_usize().unwrap();
-    let offset_len = offsets.get(parent_data.len()).unwrap().to_usize().unwrap();
-    let mut buffer = MutableBuffer::new_null(offset_len - offset_start);
-    let mut null_slice = buffer.as_slice_mut();
-
-    offsets
-        .windows(2)
-        .enumerate()
-        .take(offset_len - offset_start)
-        .for_each(|(index, window)| {
-            let start = window[0].to_usize().unwrap();
-            let end = window[1].to_usize().unwrap();
-            let mask = parent_bitmap.is_set(index);
-            (start..end).for_each(|child_index| {
-                if mask && child_bitmap.is_set(child_index) {
-                    bit_util::set_bit(&mut null_slice, child_index - offset_start);
-                }
-            });
-        });
-    buffer.into()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::datatypes::{Field, ToByteSlice};
-
-    #[test]
-    fn test_logical_null_buffer() {
-        let child_data = ArrayData::builder(DataType::Int32)
-            .len(11)
-            .add_buffer(Buffer::from(
-                vec![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_byte_slice(),
-            ))
-            .build();
-
-        let data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            false,
-        ))))
-        .len(7)
-        .add_buffer(Buffer::from(vec![0, 0, 3, 5, 6, 9, 10, 11].to_byte_slice()))
-        .null_bit_buffer(Buffer::from(vec![0b01011010]))
-        .add_child_data(child_data.clone())
-        .build();
-
-        // Get the child logical null buffer. The child is non-nullable, but because the list has nulls,
-        // we expect the child to logically have some nulls, inherited from the parent:
-        // [1, 2, 3, null, null, 6, 7, 8, 9, null, 11]
-        let nulls = child_logical_null_buffer(
-            &data,
-            data.null_buffer(),
-            data.child_data().get(0).unwrap(),
-        );
-        let expected = Some(Buffer::from(vec![0b11100111, 0b00000101]));
-        assert_eq!(nulls, expected);
-
-        // test with offset
-        let data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            false,
-        ))))
-        .len(4)
-        .offset(3)
-        .add_buffer(Buffer::from(vec![0, 0, 3, 5, 6, 9, 10, 11].to_byte_slice()))
-        // the null_bit_buffer doesn't have an offset, i.e. cleared the 3 offset bits 0b[---]01011[010]
-        .null_bit_buffer(Buffer::from(vec![0b00001011]))
-        .add_child_data(child_data)
-        .build();
-
-        let nulls = child_logical_null_buffer(
-            &data,
-            data.null_buffer(),
-            data.child_data().get(0).unwrap(),
-        );
-
-        let expected = Some(Buffer::from(vec![0b00101111]));
-        assert_eq!(nulls, expected);
-    }
-}
diff --git a/rust/arrow/src/array/equal/variable_size.rs b/rust/arrow/src/array/equal/variable_size.rs
deleted file mode 100644
index ecb3bc2a3c2..00000000000
--- a/rust/arrow/src/array/equal/variable_size.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::buffer::Buffer;
-use crate::util::bit_util::get_bit;
-use crate::{
-    array::data::count_nulls,
-    array::{ArrayData, OffsetSizeTrait},
-};
-
-use super::utils::equal_len;
-
-fn offset_value_equal<T: OffsetSizeTrait>(
-    lhs_values: &[u8],
-    rhs_values: &[u8],
-    lhs_offsets: &[T],
-    rhs_offsets: &[T],
-    lhs_pos: usize,
-    rhs_pos: usize,
-    len: usize,
-) -> bool {
-    let lhs_start = lhs_offsets[lhs_pos].to_usize().unwrap();
-    let rhs_start = rhs_offsets[rhs_pos].to_usize().unwrap();
-    let lhs_len = lhs_offsets[lhs_pos + len] - lhs_offsets[lhs_pos];
-    let rhs_len = rhs_offsets[rhs_pos + len] - rhs_offsets[rhs_pos];
-
-    lhs_len == rhs_len
-        && equal_len(
-            lhs_values,
-            rhs_values,
-            lhs_start,
-            rhs_start,
-            lhs_len.to_usize().unwrap(),
-        )
-}
-
-pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
-    lhs: &ArrayData,
-    rhs: &ArrayData,
-    lhs_nulls: Option<&Buffer>,
-    rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
-) -> bool {
-    let lhs_offsets = lhs.buffer::<T>(0);
-    let rhs_offsets = rhs.buffer::<T>(0);
-
-    // the offsets of the `ArrayData` are ignored as they are only applied to the offset buffer.
-    let lhs_values = lhs.buffers()[1].as_slice();
-    let rhs_values = rhs.buffers()[1].as_slice();
-
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
-
-    if lhs_null_count == 0
-        && rhs_null_count == 0
-        && !lhs_values.is_empty()
-        && !rhs_values.is_empty()
-    {
-        offset_value_equal(
-            lhs_values,
-            rhs_values,
-            lhs_offsets,
-            rhs_offsets,
-            lhs_start,
-            rhs_start,
-            len,
-        )
-    } else {
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
-
-            // the null bits can still be `None`, so we don't unwrap
-            let lhs_is_null = !lhs_nulls
-                .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
-                .unwrap_or(false);
-            let rhs_is_null = !rhs_nulls
-                .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
-                .unwrap_or(false);
-
-            lhs_is_null
-                || (lhs_is_null == rhs_is_null)
-                    && offset_value_equal(
-                        lhs_values,
-                        rhs_values,
-                        lhs_offsets,
-                        rhs_offsets,
-                        lhs_pos,
-                        rhs_pos,
-                        1,
-                    )
-        })
-    }
-}
diff --git a/rust/arrow/src/array/equal_json.rs b/rust/arrow/src/array/equal_json.rs
deleted file mode 100644
index 043174b9ac8..00000000000
--- a/rust/arrow/src/array/equal_json.rs
+++ /dev/null
@@ -1,1113 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::*;
-use crate::datatypes::*;
-use array::Array;
-use hex::FromHex;
-use serde_json::value::Value::{Null as JNull, Object, String as JString};
-use serde_json::Value;
-
-/// Trait for comparing arrow array with json array
-pub trait JsonEqual {
-    /// Checks whether arrow array equals to json array.
-    fn equals_json(&self, json: &[&Value]) -> bool;
-
-    /// Checks whether arrow array equals to json array.
-    fn equals_json_values(&self, json: &[Value]) -> bool {
-        let refs = json.iter().collect::<Vec<&Value>>();
-
-        self.equals_json(&refs)
-    }
-}
-
-/// Implement array equals for numeric type
-impl<T: ArrowPrimitiveType> JsonEqual for PrimitiveArray<T> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        self.len() == json.len()
-            && (0..self.len()).all(|i| match json[i] {
-                Value::Null => self.is_null(i),
-                v => {
-                    self.is_valid(i)
-                        && Some(v) == self.value(i).into_json_value().as_ref()
-                }
-            })
-    }
-}
-
-/// Implement array equals for numeric type
-impl JsonEqual for BooleanArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        self.len() == json.len()
-            && (0..self.len()).all(|i| match json[i] {
-                Value::Null => self.is_null(i),
-                v => {
-                    self.is_valid(i)
-                        && Some(v) == self.value(i).into_json_value().as_ref()
-                }
-            })
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<Value> for PrimitiveArray<T> {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(array) => self.equals_json_values(&array),
-            _ => false,
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<PrimitiveArray<T>> for Value {
-    fn eq(&self, arrow: &PrimitiveArray<T>) -> bool {
-        match self {
-            Value::Array(array) => arrow.equals_json_values(&array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> JsonEqual for GenericListArray<OffsetSize> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            Value::Array(v) => self.is_valid(i) && self.value(i).equals_json_values(v),
-            Value::Null => self.is_null(i) || self.value_length(i).is_zero(),
-            _ => false,
-        })
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> PartialEq<Value> for GenericListArray<OffsetSize> {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> PartialEq<GenericListArray<OffsetSize>> for Value {
-    fn eq(&self, arrow: &GenericListArray<OffsetSize>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> JsonEqual for DictionaryArray<T> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        // todo: this is wrong: we must test the values also
-        self.keys().equals_json(json)
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<Value> for DictionaryArray<T> {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> PartialEq<DictionaryArray<T>> for Value {
-    fn eq(&self, arrow: &DictionaryArray<T>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for FixedSizeListArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            Value::Array(v) => self.is_valid(i) && self.value(i).equals_json_values(v),
-            Value::Null => self.is_null(i) || self.value_length() == 0,
-            _ => false,
-        })
-    }
-}
-
-impl PartialEq<Value> for FixedSizeListArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<FixedSizeListArray> for Value {
-    fn eq(&self, arrow: &FixedSizeListArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for StructArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        let all_object = json.iter().all(|v| matches!(v, Object(_) | JNull));
-
-        if !all_object {
-            return false;
-        }
-
-        for column_name in self.column_names() {
-            let json_values = json
-                .iter()
-                .map(|obj| obj.get(column_name).unwrap_or(&Value::Null))
-                .collect::<Vec<&Value>>();
-
-            if !self
-                .column_by_name(column_name)
-                .map(|arr| arr.equals_json(&json_values))
-                .unwrap_or(false)
-            {
-                return false;
-            }
-        }
-
-        true
-    }
-}
-
-impl PartialEq<Value> for StructArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<StructArray> for Value {
-    fn eq(&self, arrow: &StructArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> JsonEqual for GenericBinaryArray<OffsetSize> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                // binary data is sometimes hex encoded, this checks if bytes are equal,
-                // and if not converting to hex is attempted
-                self.is_valid(i)
-                    && (s.as_str().as_bytes() == self.value(i)
-                        || Vec::from_hex(s.as_str()) == Ok(self.value(i).to_vec()))
-            }
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq<Value>
-    for GenericBinaryArray<OffsetSize>
-{
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: BinaryOffsetSizeTrait> PartialEq<GenericBinaryArray<OffsetSize>>
-    for Value
-{
-    fn eq(&self, arrow: &GenericBinaryArray<OffsetSize>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> JsonEqual for GenericStringArray<OffsetSize> {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => self.is_valid(i) && s.as_str() == self.value(i),
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> PartialEq<Value>
-    for GenericStringArray<OffsetSize>
-{
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl<OffsetSize: StringOffsetSizeTrait> PartialEq<GenericStringArray<OffsetSize>>
-    for Value
-{
-    fn eq(&self, arrow: &GenericStringArray<OffsetSize>) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for FixedSizeBinaryArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                // binary data is sometimes hex encoded, this checks if bytes are equal,
-                // and if not converting to hex is attempted
-                self.is_valid(i)
-                    && (s.as_str().as_bytes() == self.value(i)
-                        || Vec::from_hex(s.as_str()) == Ok(self.value(i).to_vec()))
-            }
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl PartialEq<Value> for FixedSizeBinaryArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<FixedSizeBinaryArray> for Value {
-    fn eq(&self, arrow: &FixedSizeBinaryArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for DecimalArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        (0..self.len()).all(|i| match json[i] {
-            JString(s) => {
-                self.is_valid(i)
-                    && (s
-                        .parse::<i128>()
-                        .map_or_else(|_| false, |v| v == self.value(i)))
-            }
-            JNull => self.is_null(i),
-            _ => false,
-        })
-    }
-}
-
-impl PartialEq<Value> for DecimalArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<DecimalArray> for Value {
-    fn eq(&self, arrow: &DecimalArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl JsonEqual for UnionArray {
-    fn equals_json(&self, _json: &[&Value]) -> bool {
-        unimplemented!(
-            "Added to allow UnionArray to implement the Array trait: see ARROW-8547"
-        )
-    }
-}
-
-impl JsonEqual for NullArray {
-    fn equals_json(&self, json: &[&Value]) -> bool {
-        if self.len() != json.len() {
-            return false;
-        }
-
-        // all JSON values must be nulls
-        json.iter().all(|&v| v == &JNull)
-    }
-}
-
-impl PartialEq<NullArray> for Value {
-    fn eq(&self, arrow: &NullArray) -> bool {
-        match self {
-            Value::Array(json_array) => arrow.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-impl PartialEq<Value> for NullArray {
-    fn eq(&self, json: &Value) -> bool {
-        match json {
-            Value::Array(json_array) => self.equals_json_values(&json_array),
-            _ => false,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::error::Result;
-    use std::{convert::TryFrom, sync::Arc};
-
-    fn create_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
-        builder: &mut ListBuilder<Int32Builder>,
-        data: T,
-    ) -> Result<ListArray> {
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref())?;
-                builder.append(true)?
-            } else {
-                builder.append(false)?
-            }
-        }
-        Ok(builder.finish())
-    }
-
-    /// Create a fixed size list of 2 value lengths
-    fn create_fixed_size_list_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
-        builder: &mut FixedSizeListBuilder<Int32Builder>,
-        data: T,
-    ) -> Result<FixedSizeListArray> {
-        for d in data.as_ref() {
-            if let Some(v) = d {
-                builder.values().append_slice(v.as_ref())?;
-                builder.append(true)?
-            } else {
-                for _ in 0..builder.value_length() {
-                    builder.values().append_null()?;
-                }
-                builder.append(false)?
-            }
-        }
-        Ok(builder.finish())
-    }
-
-    #[test]
-    fn test_primitive_json_equal() {
-        // Test equaled array
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                1, null, 2, 3
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequaled array
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                1, 1, 2, 3
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                1, 1
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test not json array type case
-        let arrow_array = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-               "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_list_json_equal() {
-        // Test equal case
-        let arrow_array = create_list_array(
-            &mut ListBuilder::new(Int32Builder::new(10)),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                null,
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array = create_list_array(
-            &mut ListBuilder::new(Int32Builder::new(10)),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                [7, 8],
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array = create_list_array(
-            &mut ListBuilder::new(Int32Builder::new(10)),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-               "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_fixed_size_list_json_equal() {
-        // Test equal case
-        let arrow_array = create_fixed_size_list_array(
-            &mut FixedSizeListBuilder::new(Int32Builder::new(10), 3),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                null,
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        println!("{:?}", arrow_array);
-        println!("{:?}", json_array);
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array = create_fixed_size_list_array(
-            &mut FixedSizeListBuilder::new(Int32Builder::new(10), 3),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                [1, 2, 3],
-                [7, 8, 9],
-                [4, 5, 6]
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array = create_fixed_size_list_array(
-            &mut FixedSizeListBuilder::new(Int32Builder::new(10), 3),
-            &[Some(&[1, 2, 3]), None, Some(&[4, 5, 6])],
-        )
-        .unwrap();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-               "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_string_json_equal() {
-        // Test the equal case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None, None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "world",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None, None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                1,
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_binary_json_equal() {
-        // Test the equal case
-        let mut builder = BinaryBuilder::new(6);
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"world").unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        let arrow_array = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "world",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None, None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "arrow",
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let arrow_array =
-            StringArray::from(vec![Some("hello"), None, None, Some("world"), None]);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                1,
-                null,
-                null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_fixed_size_binary_json_equal() {
-        // Test the equal case
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"world").unwrap();
-        let arrow_array: FixedSizeBinaryArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                "world"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"world").unwrap();
-        let arrow_array: FixedSizeBinaryArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                "arrow"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                null,
-                "world"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                1
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_decimal_json_equal() {
-        // Test the equal case
-        let mut builder = DecimalBuilder::new(30, 23, 6);
-        builder.append_value(1_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(-250).unwrap();
-        let arrow_array: DecimalArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "1000",
-                null,
-                "-250"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal case
-        builder.append_value(1_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(55).unwrap();
-        let arrow_array: DecimalArray = builder.finish();
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "1000",
-                null,
-                "-250"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test unequal length case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "1000",
-                null,
-                null,
-                "55"
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            {
-                "a": 1
-            }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect value type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                "hello",
-                null,
-                1
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_struct_json_equal() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let arrow_array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-              {
-                "f1": "joe",
-                "f2": 1
-              },
-              {
-                "f2": 2
-              },
-              null,
-              {
-                "f1": "mark",
-                "f2": 4
-              },
-              {
-                "f1": "doe",
-                "f2": 5
-              }
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequal length case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-              {
-                "f1": "joe",
-                "f2": 1
-              },
-              {
-                "f2": 2
-              },
-              null,
-              {
-                "f1": "mark",
-                "f2": 4
-              }
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test incorrect type case
-        let json_array: Value = serde_json::from_str(
-            r#"
-              {
-                "f1": "joe",
-                "f2": 1
-              }
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-
-        // Test not all object case
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-              {
-                "f1": "joe",
-                "f2": 1
-              },
-              2,
-              null,
-              {
-                "f1": "mark",
-                "f2": 4
-              }
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-
-    #[test]
-    fn test_null_json_equal() {
-        // Test equaled array
-        let arrow_array = NullArray::new(4);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                null, null, null, null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.eq(&json_array));
-        assert!(json_array.eq(&arrow_array));
-
-        // Test unequaled array
-        let arrow_array = NullArray::new(2);
-        let json_array: Value = serde_json::from_str(
-            r#"
-            [
-                null, null, null
-            ]
-        "#,
-        )
-        .unwrap();
-        assert!(arrow_array.ne(&json_array));
-        assert!(json_array.ne(&arrow_array));
-    }
-}
diff --git a/rust/arrow/src/array/ffi.rs b/rust/arrow/src/array/ffi.rs
deleted file mode 100644
index 450685bf522..00000000000
--- a/rust/arrow/src/array/ffi.rs
+++ /dev/null
@@ -1,168 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains functionality to load an ArrayData from the C Data Interface
-
-use std::convert::TryFrom;
-
-use crate::{
-    error::{ArrowError, Result},
-    ffi,
-};
-
-use super::ArrayData;
-use crate::datatypes::DataType;
-use crate::ffi::ArrowArray;
-
-impl TryFrom<ffi::ArrowArray> for ArrayData {
-    type Error = ArrowError;
-
-    fn try_from(value: ffi::ArrowArray) -> Result<Self> {
-        let child_data = value.children()?;
-
-        let child_type = if !child_data.is_empty() {
-            Some(child_data[0].data_type().clone())
-        } else {
-            None
-        };
-
-        let data_type = value.data_type(child_type)?;
-
-        let len = value.len();
-        let offset = value.offset();
-        let null_count = value.null_count();
-        let buffers = value.buffers()?;
-        let null_bit_buffer = value.null_bit_buffer();
-
-        Ok(ArrayData::new(
-            data_type,
-            len,
-            Some(null_count),
-            null_bit_buffer,
-            offset,
-            buffers,
-            child_data,
-        ))
-    }
-}
-
-impl TryFrom<ArrayData> for ffi::ArrowArray {
-    type Error = ArrowError;
-
-    fn try_from(value: ArrayData) -> Result<Self> {
-        // If parent is nullable, then children also must be nullable
-        // so we pass this nullable to the creation of hte child data
-        let nullable = match value.data_type() {
-            DataType::List(field) => field.is_nullable(),
-            DataType::LargeList(field) => field.is_nullable(),
-            _ => false,
-        };
-
-        let len = value.len();
-        let offset = value.offset() as usize;
-        let null_count = value.null_count();
-        let buffers = value.buffers().to_vec();
-        let null_buffer = value.null_buffer().cloned();
-        let child_data = value
-            .child_data()
-            .iter()
-            .map(|arr| {
-                let len = arr.len();
-                let offset = arr.offset() as usize;
-                let null_count = arr.null_count();
-                let buffers = arr.buffers().to_vec();
-                let null_buffer = arr.null_buffer().cloned();
-
-                // Note: the nullable comes from the parent data.
-                unsafe {
-                    ArrowArray::try_new(
-                        arr.data_type(),
-                        len,
-                        null_count,
-                        null_buffer,
-                        offset,
-                        buffers,
-                        vec![],
-                        nullable,
-                    )
-                    .expect("infallible")
-                }
-            })
-            .collect::<Vec<_>>();
-
-        unsafe {
-            ffi::ArrowArray::try_new(
-                value.data_type(),
-                len,
-                null_count,
-                null_buffer,
-                offset,
-                buffers,
-                child_data,
-                nullable,
-            )
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::error::Result;
-    use crate::{
-        array::{Array, ArrayData, Int64Array, UInt32Array, UInt64Array},
-        ffi::ArrowArray,
-    };
-    use std::convert::TryFrom;
-
-    fn test_round_trip(expected: &ArrayData) -> Result<()> {
-        // create a `ArrowArray` from the data.
-        let d1 = ArrowArray::try_from(expected.clone())?;
-
-        // here we export the array as 2 pointers. We would have no control over ownership if it was not for
-        // the release mechanism.
-        let (array, schema) = ArrowArray::into_raw(d1);
-
-        // simulate an external consumer by being the consumer
-        let d1 = unsafe { ArrowArray::try_from_raw(array, schema) }?;
-
-        let result = &ArrayData::try_from(d1)?;
-
-        assert_eq!(result, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn test_u32() -> Result<()> {
-        let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
-        let data = array.data();
-        test_round_trip(data)
-    }
-
-    #[test]
-    fn test_u64() -> Result<()> {
-        let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
-        let data = array.data();
-        test_round_trip(data)
-    }
-
-    #[test]
-    fn test_i64() -> Result<()> {
-        let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
-        let data = array.data();
-        test_round_trip(data)
-    }
-}
diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs
deleted file mode 100644
index d97aa16744c..00000000000
--- a/rust/arrow/src/array/iterator.rs
+++ /dev/null
@@ -1,527 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::datatypes::ArrowPrimitiveType;
-
-use super::{
-    Array, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray,
-    GenericListArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
-    StringOffsetSizeTrait,
-};
-
-/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray
-// Note: This implementation is based on std's [Vec]s' [IntoIter].
-#[derive(Debug)]
-pub struct PrimitiveIter<'a, T: ArrowPrimitiveType> {
-    array: &'a PrimitiveArray<T>,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a, T: ArrowPrimitiveType> PrimitiveIter<'a, T> {
-    /// create a new iterator
-    pub fn new(array: &'a PrimitiveArray<T>) -> Self {
-        PrimitiveIter::<T> {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> std::iter::Iterator for PrimitiveIter<'a, T> {
-    type Item = Option<T::Native>;
-
-    #[inline]
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.current == self.current_end {
-            None
-        } else if self.array.is_null(self.current) {
-            self.current += 1;
-            Some(None)
-        } else {
-            let old = self.current;
-            self.current += 1;
-            // Safety:
-            // we just checked bounds in `self.current_end == self.current`
-            // this is safe on the premise that this struct is initialized with
-            // current = array.len()
-            // and that current_end is ever only decremented
-            unsafe { Some(Some(self.array.value_unchecked(old))) }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.array.len() - self.current,
-            Some(self.array.len() - self.current),
-        )
-    }
-}
-
-impl<'a, T: ArrowPrimitiveType> std::iter::DoubleEndedIterator for PrimitiveIter<'a, T> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                // Safety:
-                // we just checked bounds in `self.current_end == self.current`
-                // this is safe on the premise that this struct is initialized with
-                // current = array.len()
-                // and that current_end is ever only decremented
-                unsafe { Some(self.array.value_unchecked(self.current_end)) }
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a, T: ArrowPrimitiveType> std::iter::ExactSizeIterator for PrimitiveIter<'a, T> {}
-
-/// an iterator that returns Some(bool) or None.
-// Note: This implementation is based on std's [Vec]s' [IntoIter].
-#[derive(Debug)]
-pub struct BooleanIter<'a> {
-    array: &'a BooleanArray,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a> BooleanIter<'a> {
-    /// create a new iterator
-    pub fn new(array: &'a BooleanArray) -> Self {
-        BooleanIter {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a> std::iter::Iterator for BooleanIter<'a> {
-    type Item = Option<bool>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.current == self.current_end {
-            None
-        } else if self.array.is_null(self.current) {
-            self.current += 1;
-            Some(None)
-        } else {
-            let old = self.current;
-            self.current += 1;
-            // Safety:
-            // we just checked bounds in `self.current_end == self.current`
-            // this is safe on the premise that this struct is initialized with
-            // current = array.len()
-            // and that current_end is ever only decremented
-            unsafe { Some(Some(self.array.value_unchecked(old))) }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.array.len() - self.current,
-            Some(self.array.len() - self.current),
-        )
-    }
-}
-
-impl<'a> std::iter::DoubleEndedIterator for BooleanIter<'a> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                // Safety:
-                // we just checked bounds in `self.current_end == self.current`
-                // this is safe on the premise that this struct is initialized with
-                // current = array.len()
-                // and that current_end is ever only decremented
-                unsafe { Some(self.array.value_unchecked(self.current_end)) }
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a> std::iter::ExactSizeIterator for BooleanIter<'a> {}
-
-/// an iterator that returns `Some(&str)` or `None`, for string arrays
-#[derive(Debug)]
-pub struct GenericStringIter<'a, T>
-where
-    T: StringOffsetSizeTrait,
-{
-    array: &'a GenericStringArray<T>,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a, T: StringOffsetSizeTrait> GenericStringIter<'a, T> {
-    /// create a new iterator
-    pub fn new(array: &'a GenericStringArray<T>) -> Self {
-        GenericStringIter::<T> {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> std::iter::Iterator for GenericStringIter<'a, T> {
-    type Item = Option<&'a str>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.current;
-        if i >= self.current_end {
-            None
-        } else if self.array.is_null(i) {
-            self.current += 1;
-            Some(None)
-        } else {
-            self.current += 1;
-            // Safety:
-            // we just checked bounds in `self.current_end == self.current`
-            // this is safe on the premise that this struct is initialized with
-            // current = array.len()
-            // and that current_end is ever only decremented
-            unsafe { Some(Some(self.array.value_unchecked(i))) }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.current_end - self.current,
-            Some(self.current_end - self.current),
-        )
-    }
-}
-
-impl<'a, T: StringOffsetSizeTrait> std::iter::DoubleEndedIterator
-    for GenericStringIter<'a, T>
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                // Safety:
-                // we just checked bounds in `self.current_end == self.current`
-                // this is safe on the premise that this struct is initialized with
-                // current = array.len()
-                // and that current_end is ever only decremented
-                unsafe { Some(self.array.value_unchecked(self.current_end)) }
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a, T: StringOffsetSizeTrait> std::iter::ExactSizeIterator
-    for GenericStringIter<'a, T>
-{
-}
-
-/// an iterator that returns `Some(&[u8])` or `None`, for binary arrays
-#[derive(Debug)]
-pub struct GenericBinaryIter<'a, T>
-where
-    T: BinaryOffsetSizeTrait,
-{
-    array: &'a GenericBinaryArray<T>,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryIter<'a, T> {
-    /// create a new iterator
-    pub fn new(array: &'a GenericBinaryArray<T>) -> Self {
-        GenericBinaryIter::<T> {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> std::iter::Iterator for GenericBinaryIter<'a, T> {
-    type Item = Option<&'a [u8]>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.current;
-        if i >= self.current_end {
-            None
-        } else if self.array.is_null(i) {
-            self.current += 1;
-            Some(None)
-        } else {
-            self.current += 1;
-            // Safety:
-            // we just checked bounds in `self.current_end == self.current`
-            // this is safe on the premise that this struct is initialized with
-            // current = array.len()
-            // and that current_end is ever only decremented
-            unsafe { Some(Some(self.array.value_unchecked(i))) }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.current_end - self.current,
-            Some(self.current_end - self.current),
-        )
-    }
-}
-
-impl<'a, T: BinaryOffsetSizeTrait> std::iter::DoubleEndedIterator
-    for GenericBinaryIter<'a, T>
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                // Safety:
-                // we just checked bounds in `self.current_end == self.current`
-                // this is safe on the premise that this struct is initialized with
-                // current = array.len()
-                // and that current_end is ever only decremented
-                unsafe { Some(self.array.value_unchecked(self.current_end)) }
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a, T: BinaryOffsetSizeTrait> std::iter::ExactSizeIterator
-    for GenericBinaryIter<'a, T>
-{
-}
-
-#[derive(Debug)]
-pub struct GenericListArrayIter<'a, S>
-where
-    S: OffsetSizeTrait,
-{
-    array: &'a GenericListArray<S>,
-    current: usize,
-    current_end: usize,
-}
-
-impl<'a, S: OffsetSizeTrait> GenericListArrayIter<'a, S> {
-    pub fn new(array: &'a GenericListArray<S>) -> Self {
-        GenericListArrayIter::<S> {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-impl<'a, S: OffsetSizeTrait> std::iter::Iterator for GenericListArrayIter<'a, S> {
-    type Item = Option<ArrayRef>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let i = self.current;
-        if i >= self.current_end {
-            None
-        } else if self.array.is_null(i) {
-            self.current += 1;
-            Some(None)
-        } else {
-            self.current += 1;
-            // Safety:
-            // we just checked bounds in `self.current_end == self.current`
-            // this is safe on the premise that this struct is initialized with
-            // current = array.len()
-            // and that current_end is ever only decremented
-            unsafe { Some(Some(self.array.value_unchecked(i))) }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.current_end - self.current,
-            Some(self.current_end - self.current),
-        )
-    }
-}
-
-impl<'a, S: OffsetSizeTrait> std::iter::DoubleEndedIterator
-    for GenericListArrayIter<'a, S>
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        if self.current_end == self.current {
-            None
-        } else {
-            self.current_end -= 1;
-            Some(if self.array.is_null(self.current_end) {
-                None
-            } else {
-                // Safety:
-                // we just checked bounds in `self.current_end == self.current`
-                // this is safe on the premise that this struct is initialized with
-                // current = array.len()
-                // and that current_end is ever only decremented
-                unsafe { Some(self.array.value_unchecked(self.current_end)) }
-            })
-        }
-    }
-}
-
-/// all arrays have known size.
-impl<'a, S: OffsetSizeTrait> std::iter::ExactSizeIterator
-    for GenericListArrayIter<'a, S>
-{
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use crate::array::{ArrayRef, BinaryArray, BooleanArray, Int32Array, StringArray};
-
-    #[test]
-    fn test_primitive_array_iter_round_trip() {
-        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-        let array = Arc::new(array) as ArrayRef;
-
-        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        // to and from iter, with a +1
-        let result: Int32Array = array.iter().map(|e| e.map(|e| e + 1)).collect();
-
-        let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
-        assert_eq!(result, expected);
-
-        // check if DoubleEndedIterator is implemented
-        let result: Int32Array = array.iter().rev().collect();
-        let rev_array = Int32Array::from(vec![Some(4), None, Some(2), None, Some(0)]);
-        assert_eq!(result, rev_array);
-        // check if ExactSizeIterator is implemented
-        let _ = array.iter().rposition(|opt_b| opt_b == Some(1));
-    }
-
-    #[test]
-    fn test_double_ended() {
-        let array = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-        let mut a = array.iter();
-        assert_eq!(a.next(), Some(Some(0)));
-        assert_eq!(a.next(), Some(None));
-        assert_eq!(a.next_back(), Some(Some(4)));
-        assert_eq!(a.next_back(), Some(None));
-        assert_eq!(a.next_back(), Some(Some(2)));
-        // the two sides have met: None is returned by both
-        assert_eq!(a.next_back(), None);
-        assert_eq!(a.next(), None);
-    }
-
-    #[test]
-    fn test_string_array_iter_round_trip() {
-        let array =
-            StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
-        let array = Arc::new(array) as ArrayRef;
-
-        let array = array.as_any().downcast_ref::<StringArray>().unwrap();
-
-        // to and from iter, with a +1
-        let result: StringArray = array
-            .iter()
-            .map(|e| {
-                e.map(|e| {
-                    let mut a = e.to_string();
-                    a.push('b');
-                    a
-                })
-            })
-            .collect();
-
-        let expected =
-            StringArray::from(vec![Some("ab"), None, Some("aaab"), None, Some("aaaaab")]);
-        assert_eq!(result, expected);
-
-        // check if DoubleEndedIterator is implemented
-        let result: StringArray = array.iter().rev().collect();
-        let rev_array =
-            StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
-        assert_eq!(result, rev_array);
-        // check if ExactSizeIterator is implemented
-        let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
-    }
-
-    #[test]
-    fn test_binary_array_iter_round_trip() {
-        let array = BinaryArray::from(vec![
-            Some(b"a" as &[u8]),
-            None,
-            Some(b"aaa"),
-            None,
-            Some(b"aaaaa"),
-        ]);
-
-        // to and from iter
-        let result: BinaryArray = array.iter().collect();
-
-        assert_eq!(result, array);
-
-        // check if DoubleEndedIterator is implemented
-        let result: BinaryArray = array.iter().rev().collect();
-        let rev_array = BinaryArray::from(vec![
-            Some(b"aaaaa" as &[u8]),
-            None,
-            Some(b"aaa"),
-            None,
-            Some(b"a"),
-        ]);
-        assert_eq!(result, rev_array);
-
-        // check if ExactSizeIterator is implemented
-        let _ = array.iter().rposition(|opt_b| opt_b == Some(&[9]));
-    }
-
-    #[test]
-    fn test_boolean_array_iter_round_trip() {
-        let array = BooleanArray::from(vec![Some(true), None, Some(false)]);
-
-        // to and from iter
-        let result: BooleanArray = array.iter().collect();
-
-        assert_eq!(result, array);
-
-        // check if DoubleEndedIterator is implemented
-        let result: BooleanArray = array.iter().rev().collect();
-        let rev_array = BooleanArray::from(vec![Some(false), None, Some(true)]);
-        assert_eq!(result, rev_array);
-
-        // check if ExactSizeIterator is implemented
-        let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
-    }
-}
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
deleted file mode 100644
index 65cf30832e2..00000000000
--- a/rust/arrow/src/array/mod.rs
+++ /dev/null
@@ -1,283 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! The central type in Apache Arrow are arrays, represented
-//! by the [`Array` trait](crate::array::Array).
-//! An array represents a known-length sequence of values all
-//! having the same type.
-//!
-//! Internally, those values are represented by one or several
-//! [buffers](crate::buffer::Buffer), the number and meaning
-//! of which depend on the array’s data type, as documented in
-//! [the Arrow data layout specification](https://arrow.apache.org/docs/format/Columnar.html).
-//! For example, the type `Int16Array` represents an Apache
-//! Arrow array of 16-bit integers.
-//!
-//! Those buffers consist of the value data itself and an
-//! optional [bitmap buffer](crate::bitmap::Bitmap) that
-//! indicates which array entries are null values.
-//! The bitmap buffer can be entirely omitted if the array is
-//! known to have zero null values.
-//!
-//! There are concrete implementations of this trait for each
-//! data type, that help you access individual values of the
-//! array.
-//!
-//! # Building an Array
-//!
-//! Arrow's `Arrays` are immutable, but there is the trait
-//! [`ArrayBuilder`](crate::array::ArrayBuilder)
-//! that helps you with constructing new `Arrays`. As with the
-//! `Array` trait, there are builder implementations for all
-//! concrete array types.
-//!
-//! # Example
-//! ```
-//! extern crate arrow;
-//!
-//! use arrow::array::Int16Array;
-//!
-//! // Create a new builder with a capacity of 100
-//! let mut builder = Int16Array::builder(100);
-//!
-//! // Append a single primitive value
-//! builder.append_value(1).unwrap();
-//!
-//! // Append a null value
-//! builder.append_null().unwrap();
-//!
-//! // Append a slice of primitive values
-//! builder.append_slice(&[2, 3, 4]).unwrap();
-//!
-//! // Build the array
-//! let array = builder.finish();
-//!
-//! assert_eq!(
-//!     5,
-//!     array.len(),
-//!     "The array has 5 values, counting the null value"
-//! );
-//!
-//! assert_eq!(2, array.value(2), "Get the value with index 2");
-//!
-//! assert_eq!(
-//!     &array.values()[3..5],
-//!     &[3, 4],
-//!     "Get slice of len 2 starting at idx 3"
-//! )
-//! ```
-
-#[allow(clippy::module_inception)]
-mod array;
-mod array_binary;
-mod array_boolean;
-mod array_dictionary;
-mod array_list;
-mod array_primitive;
-mod array_string;
-mod array_struct;
-mod array_union;
-mod builder;
-mod cast;
-mod data;
-mod equal;
-mod equal_json;
-mod ffi;
-mod iterator;
-mod null;
-mod ord;
-mod raw_pointer;
-mod transform;
-
-use crate::datatypes::*;
-
-// --------------------- Array & ArrayData ---------------------
-
-pub use self::array::Array;
-pub use self::array::ArrayRef;
-pub use self::data::ArrayData;
-pub use self::data::ArrayDataBuilder;
-pub use self::data::ArrayDataRef;
-
-pub use self::array_binary::BinaryArray;
-pub use self::array_binary::DecimalArray;
-pub use self::array_binary::FixedSizeBinaryArray;
-pub use self::array_binary::LargeBinaryArray;
-pub use self::array_boolean::BooleanArray;
-pub use self::array_dictionary::DictionaryArray;
-pub use self::array_list::FixedSizeListArray;
-pub use self::array_list::LargeListArray;
-pub use self::array_list::ListArray;
-pub use self::array_primitive::PrimitiveArray;
-pub use self::array_string::LargeStringArray;
-pub use self::array_string::StringArray;
-pub use self::array_struct::StructArray;
-pub use self::array_union::UnionArray;
-pub use self::null::NullArray;
-
-pub use self::array::make_array;
-pub use self::array::new_empty_array;
-pub use self::array::new_null_array;
-
-pub type Int8Array = PrimitiveArray<Int8Type>;
-pub type Int16Array = PrimitiveArray<Int16Type>;
-pub type Int32Array = PrimitiveArray<Int32Type>;
-pub type Int64Array = PrimitiveArray<Int64Type>;
-pub type UInt8Array = PrimitiveArray<UInt8Type>;
-pub type UInt16Array = PrimitiveArray<UInt16Type>;
-pub type UInt32Array = PrimitiveArray<UInt32Type>;
-pub type UInt64Array = PrimitiveArray<UInt64Type>;
-pub type Float32Array = PrimitiveArray<Float32Type>;
-pub type Float64Array = PrimitiveArray<Float64Type>;
-
-pub type Int8DictionaryArray = DictionaryArray<Int8Type>;
-pub type Int16DictionaryArray = DictionaryArray<Int16Type>;
-pub type Int32DictionaryArray = DictionaryArray<Int32Type>;
-pub type Int64DictionaryArray = DictionaryArray<Int64Type>;
-pub type UInt8DictionaryArray = DictionaryArray<UInt8Type>;
-pub type UInt16DictionaryArray = DictionaryArray<UInt16Type>;
-pub type UInt32DictionaryArray = DictionaryArray<UInt32Type>;
-pub type UInt64DictionaryArray = DictionaryArray<UInt64Type>;
-
-pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
-pub type Date32Array = PrimitiveArray<Date32Type>;
-pub type Date64Array = PrimitiveArray<Date64Type>;
-pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
-pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
-pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
-pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
-pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
-pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
-pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
-
-pub use self::array_binary::BinaryOffsetSizeTrait;
-pub use self::array_binary::GenericBinaryArray;
-pub use self::array_list::GenericListArray;
-pub use self::array_list::OffsetSizeTrait;
-pub use self::array_string::GenericStringArray;
-pub use self::array_string::StringOffsetSizeTrait;
-
-// --------------------- Array Builder ---------------------
-
-pub use self::builder::BooleanBufferBuilder;
-pub use self::builder::BufferBuilder;
-
-pub type Int8BufferBuilder = BufferBuilder<i8>;
-pub type Int16BufferBuilder = BufferBuilder<i16>;
-pub type Int32BufferBuilder = BufferBuilder<i32>;
-pub type Int64BufferBuilder = BufferBuilder<i64>;
-pub type UInt8BufferBuilder = BufferBuilder<u8>;
-pub type UInt16BufferBuilder = BufferBuilder<u16>;
-pub type UInt32BufferBuilder = BufferBuilder<u32>;
-pub type UInt64BufferBuilder = BufferBuilder<u64>;
-pub type Float32BufferBuilder = BufferBuilder<f32>;
-pub type Float64BufferBuilder = BufferBuilder<f64>;
-
-pub type TimestampSecondBufferBuilder = BufferBuilder<TimestampSecondType>;
-pub type TimestampMillisecondBufferBuilder = BufferBuilder<TimestampMillisecondType>;
-pub type TimestampMicrosecondBufferBuilder = BufferBuilder<TimestampMicrosecondType>;
-pub type TimestampNanosecondBufferBuilder = BufferBuilder<TimestampNanosecondType>;
-pub type Date32BufferBuilder = BufferBuilder<Date32Type>;
-pub type Date64BufferBuilder = BufferBuilder<Date64Type>;
-pub type Time32SecondBufferBuilder = BufferBuilder<Time32SecondType>;
-pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
-pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
-pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
-pub type IntervalYearMonthBufferBuilder = BufferBuilder<IntervalYearMonthType>;
-pub type IntervalDayTimeBufferBuilder = BufferBuilder<IntervalDayTimeType>;
-pub type DurationSecondBufferBuilder = BufferBuilder<DurationSecondType>;
-pub type DurationMillisecondBufferBuilder = BufferBuilder<DurationMillisecondType>;
-pub type DurationMicrosecondBufferBuilder = BufferBuilder<DurationMicrosecondType>;
-pub type DurationNanosecondBufferBuilder = BufferBuilder<DurationNanosecondType>;
-
-pub use self::builder::ArrayBuilder;
-pub use self::builder::BinaryBuilder;
-pub use self::builder::BooleanBuilder;
-pub use self::builder::DecimalBuilder;
-pub use self::builder::FixedSizeBinaryBuilder;
-pub use self::builder::FixedSizeListBuilder;
-pub use self::builder::GenericStringBuilder;
-pub use self::builder::LargeBinaryBuilder;
-pub use self::builder::LargeListBuilder;
-pub use self::builder::LargeStringBuilder;
-pub use self::builder::ListBuilder;
-pub use self::builder::PrimitiveBuilder;
-pub use self::builder::PrimitiveDictionaryBuilder;
-pub use self::builder::StringBuilder;
-pub use self::builder::StringDictionaryBuilder;
-pub use self::builder::StructBuilder;
-pub use self::builder::UnionBuilder;
-
-pub type Int8Builder = PrimitiveBuilder<Int8Type>;
-pub type Int16Builder = PrimitiveBuilder<Int16Type>;
-pub type Int32Builder = PrimitiveBuilder<Int32Type>;
-pub type Int64Builder = PrimitiveBuilder<Int64Type>;
-pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
-pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
-pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
-pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
-pub type Float32Builder = PrimitiveBuilder<Float32Type>;
-pub type Float64Builder = PrimitiveBuilder<Float64Type>;
-
-pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
-pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
-pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
-pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
-pub type Date32Builder = PrimitiveBuilder<Date32Type>;
-pub type Date64Builder = PrimitiveBuilder<Date64Type>;
-pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
-pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
-pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
-pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
-pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
-pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
-pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
-pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
-pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
-pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
-
-pub use self::transform::MutableArrayData;
-
-// --------------------- Array Iterator ---------------------
-
-pub use self::iterator::*;
-
-// --------------------- Array Equality ---------------------
-
-pub use self::equal_json::JsonEqual;
-
-// --------------------- Array's values comparison ---------------------
-
-pub use self::ord::{build_compare, DynComparator};
-
-// --------------------- Array downcast helper functions ---------------------
-
-pub use self::cast::{
-    as_boolean_array, as_dictionary_array, as_generic_list_array, as_large_list_array,
-    as_largestring_array, as_list_array, as_null_array, as_primitive_array,
-    as_string_array, as_struct_array,
-};
-
-// ------------------------------ C Data Interface ---------------------------
-
-pub use self::array::make_array_from_raw;
diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs
deleted file mode 100644
index 8e95bb00ed1..00000000000
--- a/rust/arrow/src/array/null.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains the `NullArray` type.
-//!
-//! A `NullArray` is a simplified array where all values are null.
-//!
-//! # Example: Create an array
-//!
-//! ```
-//! use arrow::array::{Array, NullArray};
-//!
-//! # fn main() -> arrow::error::Result<()> {
-//! let array = NullArray::new(10);
-//!
-//! assert_eq!(array.len(), 10);
-//! assert_eq!(array.null_count(), 10);
-//!
-//! # Ok(())
-//! # }
-//! ```
-
-use std::any::Any;
-use std::fmt;
-use std::mem;
-
-use crate::array::{Array, ArrayData};
-use crate::datatypes::*;
-
-/// An Array where all elements are nulls
-pub struct NullArray {
-    data: ArrayData,
-}
-
-impl NullArray {
-    /// Create a new null array of the specified length
-    pub fn new(length: usize) -> Self {
-        let array_data = ArrayData::builder(DataType::Null).len(length).build();
-        NullArray::from(array_data)
-    }
-}
-
-impl Array for NullArray {
-    fn as_any(&self) -> &Any {
-        self
-    }
-
-    fn data(&self) -> &ArrayData {
-        &self.data
-    }
-
-    /// Returns whether the element at `index` is null.
-    /// All elements of a `NullArray` are always null.
-    fn is_null(&self, _index: usize) -> bool {
-        true
-    }
-
-    /// Returns whether the element at `index` is valid.
-    /// All elements of a `NullArray` are always invalid.
-    fn is_valid(&self, _index: usize) -> bool {
-        false
-    }
-
-    /// Returns the total number of null values in this array.
-    /// The null count of a `NullArray` always equals its length.
-    fn null_count(&self) -> usize {
-        self.data_ref().len()
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [NullArray].
-    fn get_buffer_memory_size(&self) -> usize {
-        self.data.get_buffer_memory_size()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [NullArray].
-    fn get_array_memory_size(&self) -> usize {
-        mem::size_of_val(self)
-    }
-}
-
-impl From<ArrayData> for NullArray {
-    fn from(data: ArrayData) -> Self {
-        assert_eq!(
-            data.data_type(),
-            &DataType::Null,
-            "NullArray data type should be Null"
-        );
-        assert_eq!(
-            data.buffers().len(),
-            0,
-            "NullArray data should contain 0 buffers"
-        );
-        assert!(
-            data.null_buffer().is_none(),
-            "NullArray data should not contain a null buffer, as no buffers are required"
-        );
-        Self { data }
-    }
-}
-
-impl fmt::Debug for NullArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "NullArray({})", self.len())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_null_array() {
-        let null_arr = NullArray::new(32);
-
-        assert_eq!(null_arr.len(), 32);
-        assert_eq!(null_arr.null_count(), 32);
-        assert_eq!(null_arr.is_valid(0), false);
-
-        assert_eq!(0, null_arr.get_buffer_memory_size());
-        assert_eq!(
-            null_arr.get_buffer_memory_size() + std::mem::size_of::<NullArray>(),
-            null_arr.get_array_memory_size()
-        );
-    }
-
-    #[test]
-    fn test_null_array_slice() {
-        let array1 = NullArray::new(32);
-
-        let array2 = array1.slice(8, 16);
-        assert_eq!(array2.len(), 16);
-        assert_eq!(array2.null_count(), 16);
-        assert_eq!(array2.offset(), 8);
-    }
-
-    #[test]
-    fn test_debug_null_array() {
-        let array = NullArray::new(1024 * 1024);
-        assert_eq!(format!("{:?}", array), "NullArray(1048576)");
-    }
-}
diff --git a/rust/arrow/src/array/ord.rs b/rust/arrow/src/array/ord.rs
deleted file mode 100644
index efd68b12264..00000000000
--- a/rust/arrow/src/array/ord.rs
+++ /dev/null
@@ -1,310 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains functions and function factories to compare arrays.
-
-use std::cmp::Ordering;
-
-use crate::array::*;
-use crate::datatypes::TimeUnit;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-use num::Float;
-
-/// Compare the values at two arbitrary indices in two arrays.
-pub type DynComparator<'a> = Box<dyn Fn(usize, usize) -> Ordering + 'a>;
-
-/// compares two floats, placing NaNs at last
-fn cmp_nans_last<T: Float>(a: &T, b: &T) -> Ordering {
-    match (a.is_nan(), b.is_nan()) {
-        (true, true) => Ordering::Equal,
-        (true, false) => Ordering::Greater,
-        (false, true) => Ordering::Less,
-        _ => a.partial_cmp(b).unwrap(),
-    }
-}
-
-fn compare_primitives<'a, T: ArrowPrimitiveType>(
-    left: &'a Array,
-    right: &'a Array,
-) -> DynComparator<'a>
-where
-    T::Native: Ord,
-{
-    let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
-}
-
-fn compare_boolean<'a>(left: &'a Array, right: &'a Array) -> DynComparator<'a> {
-    let left = left.as_any().downcast_ref::<BooleanArray>().unwrap();
-    let right = right.as_any().downcast_ref::<BooleanArray>().unwrap();
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
-}
-
-fn compare_float<'a, T: ArrowPrimitiveType>(
-    left: &'a Array,
-    right: &'a Array,
-) -> DynComparator<'a>
-where
-    T::Native: Float,
-{
-    let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j)))
-}
-
-fn compare_string<'a, T>(left: &'a Array, right: &'a Array) -> DynComparator<'a>
-where
-    T: StringOffsetSizeTrait,
-{
-    let left = left
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .unwrap();
-    let right = right
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .unwrap();
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
-}
-
-fn compare_dict_string<'a, T>(left: &'a Array, right: &'a Array) -> DynComparator<'a>
-where
-    T: ArrowDictionaryKeyType,
-{
-    let left = left.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
-    let right = right.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
-    let left_keys = left.keys_array();
-    let right_keys = right.keys_array();
-
-    let left_values = StringArray::from(left.values().data().clone());
-    let right_values = StringArray::from(left.values().data().clone());
-
-    Box::new(move |i: usize, j: usize| {
-        let key_left = left_keys.value(i).to_usize().unwrap();
-        let key_right = right_keys.value(j).to_usize().unwrap();
-        let left = left_values.value(key_left);
-        let right = right_values.value(key_right);
-        left.cmp(&right)
-    })
-}
-
-/// returns a comparison function that compares two values at two different positions
-/// between the two arrays.
-/// The arrays' types must be equal.
-/// # Example
-/// ```
-/// use arrow::array::{build_compare, Int32Array};
-///
-/// # fn main() -> arrow::error::Result<()> {
-/// let array1 = Int32Array::from(vec![1, 2]);
-/// let array2 = Int32Array::from(vec![3, 4]);
-///
-/// let cmp = build_compare(&array1, &array2)?;
-///
-/// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2)
-/// assert_eq!(std::cmp::Ordering::Less, (cmp)(0, 1));
-/// # Ok(())
-/// # }
-/// ```
-// This is a factory of comparisons.
-// The lifetime 'a enforces that we cannot use the closure beyond any of the array's lifetime.
-pub fn build_compare<'a>(left: &'a Array, right: &'a Array) -> Result<DynComparator<'a>> {
-    use DataType::*;
-    use IntervalUnit::*;
-    use TimeUnit::*;
-    Ok(match (left.data_type(), right.data_type()) {
-        (a, b) if a != b => {
-            return Err(ArrowError::InvalidArgumentError(
-                "Can't compare arrays of different types".to_string(),
-            ));
-        }
-        (Boolean, Boolean) => compare_boolean(left, right),
-        (UInt8, UInt8) => compare_primitives::<UInt8Type>(left, right),
-        (UInt16, UInt16) => compare_primitives::<UInt16Type>(left, right),
-        (UInt32, UInt32) => compare_primitives::<UInt32Type>(left, right),
-        (UInt64, UInt64) => compare_primitives::<UInt64Type>(left, right),
-        (Int8, Int8) => compare_primitives::<Int8Type>(left, right),
-        (Int16, Int16) => compare_primitives::<Int16Type>(left, right),
-        (Int32, Int32) => compare_primitives::<Int32Type>(left, right),
-        (Int64, Int64) => compare_primitives::<Int64Type>(left, right),
-        (Float32, Float32) => compare_float::<Float32Type>(left, right),
-        (Float64, Float64) => compare_float::<Float64Type>(left, right),
-        (Date32, Date32) => compare_primitives::<Date32Type>(left, right),
-        (Date64, Date64) => compare_primitives::<Date64Type>(left, right),
-        (Time32(Second), Time32(Second)) => {
-            compare_primitives::<Time32SecondType>(left, right)
-        }
-        (Time32(Millisecond), Time32(Millisecond)) => {
-            compare_primitives::<Time32MillisecondType>(left, right)
-        }
-        (Time64(Microsecond), Time64(Microsecond)) => {
-            compare_primitives::<Time64MicrosecondType>(left, right)
-        }
-        (Time64(Nanosecond), Time64(Nanosecond)) => {
-            compare_primitives::<Time64NanosecondType>(left, right)
-        }
-        (Timestamp(Second, _), Timestamp(Second, _)) => {
-            compare_primitives::<TimestampSecondType>(left, right)
-        }
-        (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => {
-            compare_primitives::<TimestampMillisecondType>(left, right)
-        }
-        (Timestamp(Microsecond, _), Timestamp(Microsecond, _)) => {
-            compare_primitives::<TimestampMicrosecondType>(left, right)
-        }
-        (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => {
-            compare_primitives::<TimestampNanosecondType>(left, right)
-        }
-        (Interval(YearMonth), Interval(YearMonth)) => {
-            compare_primitives::<IntervalYearMonthType>(left, right)
-        }
-        (Interval(DayTime), Interval(DayTime)) => {
-            compare_primitives::<IntervalDayTimeType>(left, right)
-        }
-        (Duration(Second), Duration(Second)) => {
-            compare_primitives::<DurationSecondType>(left, right)
-        }
-        (Duration(Millisecond), Duration(Millisecond)) => {
-            compare_primitives::<DurationMillisecondType>(left, right)
-        }
-        (Duration(Microsecond), Duration(Microsecond)) => {
-            compare_primitives::<DurationMicrosecondType>(left, right)
-        }
-        (Duration(Nanosecond), Duration(Nanosecond)) => {
-            compare_primitives::<DurationNanosecondType>(left, right)
-        }
-        (Utf8, Utf8) => compare_string::<i32>(left, right),
-        (LargeUtf8, LargeUtf8) => compare_string::<i64>(left, right),
-        (
-            Dictionary(key_type_lhs, value_type_lhs),
-            Dictionary(key_type_rhs, value_type_rhs),
-        ) => {
-            if value_type_lhs.as_ref() != &DataType::Utf8
-                || value_type_rhs.as_ref() != &DataType::Utf8
-            {
-                return Err(ArrowError::InvalidArgumentError(
-                    "Arrow still does not support comparisons of non-string dictionary arrays"
-                        .to_string(),
-                ));
-            }
-            match (key_type_lhs.as_ref(), key_type_rhs.as_ref()) {
-                (a, b) if a != b => {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "Can't compare arrays of different types".to_string(),
-                    ));
-                }
-                (UInt8, UInt8) => compare_dict_string::<UInt8Type>(left, right),
-                (UInt16, UInt16) => compare_dict_string::<UInt16Type>(left, right),
-                (UInt32, UInt32) => compare_dict_string::<UInt32Type>(left, right),
-                (UInt64, UInt64) => compare_dict_string::<UInt64Type>(left, right),
-                (Int8, Int8) => compare_dict_string::<Int8Type>(left, right),
-                (Int16, Int16) => compare_dict_string::<Int16Type>(left, right),
-                (Int32, Int32) => compare_dict_string::<Int32Type>(left, right),
-                (Int64, Int64) => compare_dict_string::<Int64Type>(left, right),
-                (lhs, _) => {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "Dictionaries do not support keys of type {:?}",
-                        lhs
-                    )))
-                }
-            }
-        }
-        (lhs, _) => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "The data type type {:?} has no natural order",
-                lhs
-            )))
-        }
-    })
-}
-
-#[cfg(test)]
-pub mod tests {
-    use super::*;
-    use crate::array::{Float64Array, Int32Array};
-    use crate::error::Result;
-    use std::cmp::Ordering;
-    use std::iter::FromIterator;
-
-    #[test]
-    fn test_i32() -> Result<()> {
-        let array = Int32Array::from(vec![1, 2]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        Ok(())
-    }
-
-    #[test]
-    fn test_i32_i32() -> Result<()> {
-        let array1 = Int32Array::from(vec![1]);
-        let array2 = Int32Array::from(vec![2]);
-
-        let cmp = build_compare(&array1, &array2)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 0));
-        Ok(())
-    }
-
-    #[test]
-    fn test_f64() -> Result<()> {
-        let array = Float64Array::from(vec![1.0, 2.0]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        Ok(())
-    }
-
-    #[test]
-    fn test_f64_nan() -> Result<()> {
-        let array = Float64Array::from(vec![1.0, f64::NAN]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        Ok(())
-    }
-
-    #[test]
-    fn test_f64_zeros() -> Result<()> {
-        let array = Float64Array::from(vec![-0.0, 0.0]);
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Equal, (cmp)(0, 1));
-        assert_eq!(Ordering::Equal, (cmp)(1, 0));
-        Ok(())
-    }
-
-    #[test]
-    fn test_dict() -> Result<()> {
-        let data = vec!["a", "b", "c", "a", "a", "c", "c"];
-        let array = DictionaryArray::<Int16Type>::from_iter(data.into_iter());
-
-        let cmp = build_compare(&array, &array)?;
-
-        assert_eq!(Ordering::Less, (cmp)(0, 1));
-        assert_eq!(Ordering::Equal, (cmp)(3, 4));
-        assert_eq!(Ordering::Greater, (cmp)(2, 3));
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/array/raw_pointer.rs b/rust/arrow/src/array/raw_pointer.rs
deleted file mode 100644
index 185e1cbe98a..00000000000
--- a/rust/arrow/src/array/raw_pointer.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::ptr::NonNull;
-
-/// This struct is highly `unsafe` and offers the possibility to self-reference a [arrow::Buffer] from [arrow::array::ArrayData].
-/// as a pointer to the beginning of its contents.
-pub(super) struct RawPtrBox<T> {
-    ptr: NonNull<T>,
-}
-
-impl<T> RawPtrBox<T> {
-    /// # Safety
-    /// The user must guarantee that:
-    /// * the contents where `ptr` points to are never `moved`. This is guaranteed when they are Pinned.
-    /// * the lifetime of this struct does not outlive the lifetime of `ptr`.
-    /// Failure to fulfill any the above conditions results in undefined behavior.
-    /// # Panic
-    /// This function panics if:
-    /// * `ptr` is null
-    /// * `ptr` is not aligned to a slice of type `T`. This is guaranteed if it was built from a slice of type `T`.
-    pub(super) unsafe fn new(ptr: *const u8) -> Self {
-        let ptr = NonNull::new(ptr as *mut u8).expect("Pointer cannot be null");
-        assert_eq!(
-            ptr.as_ptr().align_offset(std::mem::align_of::<T>()),
-            0,
-            "memory is not aligned"
-        );
-        Self { ptr: ptr.cast() }
-    }
-
-    pub(super) fn as_ptr(&self) -> *const T {
-        self.ptr.as_ptr()
-    }
-}
-
-unsafe impl<T> Send for RawPtrBox<T> {}
-unsafe impl<T> Sync for RawPtrBox<T> {}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    #[should_panic(expected = "memory is not aligned")]
-    fn test_primitive_array_alignment() {
-        let bytes = vec![0u8, 1u8];
-        unsafe { RawPtrBox::<u64>::new(bytes.as_ptr().offset(1)) };
-    }
-}
diff --git a/rust/arrow/src/array/transform/boolean.rs b/rust/arrow/src/array/transform/boolean.rs
deleted file mode 100644
index 18291497173..00000000000
--- a/rust/arrow/src/array/transform/boolean.rs
+++ /dev/null
@@ -1,45 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-use super::{
-    Extend, _MutableArrayData,
-    utils::{resize_for_bits, set_bits},
-};
-
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    let values = array.buffers()[0].as_slice();
-    Box::new(
-        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-            let buffer = &mut mutable.buffer1;
-            resize_for_bits(buffer, mutable.len + len);
-            set_bits(
-                &mut buffer.as_slice_mut(),
-                values,
-                mutable.len,
-                array.offset() + start,
-                len,
-            );
-        },
-    )
-}
-
-pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
-    let buffer = &mut mutable.buffer1;
-    resize_for_bits(buffer, mutable.len + len);
-}
diff --git a/rust/arrow/src/array/transform/fixed_binary.rs b/rust/arrow/src/array/transform/fixed_binary.rs
deleted file mode 100644
index 36952d46a4d..00000000000
--- a/rust/arrow/src/array/transform/fixed_binary.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{array::ArrayData, datatypes::DataType};
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    let size = match array.data_type() {
-        DataType::FixedSizeBinary(i) => *i as usize,
-        _ => unreachable!(),
-    };
-
-    let values = &array.buffers()[0].as_slice()[array.offset() * size..];
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without null issues
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let buffer = &mut mutable.buffer1;
-                buffer.extend_from_slice(&values[start * size..(start + len) * size]);
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                // nulls present: append item by item, ignoring null entries
-                let values_buffer = &mut mutable.buffer1;
-
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // append value
-                        let bytes = &values[i * size..(i + 1) * size];
-                        values_buffer.extend_from_slice(bytes);
-                    } else {
-                        values_buffer.extend_zeros(size);
-                    }
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
-    let size = match mutable.data_type {
-        DataType::FixedSizeBinary(i) => i as usize,
-        _ => unreachable!(),
-    };
-
-    let values_buffer = &mut mutable.buffer1;
-    values_buffer.extend_zeros(len * size);
-}
diff --git a/rust/arrow/src/array/transform/list.rs b/rust/arrow/src/array/transform/list.rs
deleted file mode 100644
index 8eb2bd1778d..00000000000
--- a/rust/arrow/src/array/transform/list.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::{ArrayData, OffsetSizeTrait};
-
-use super::{
-    Extend, _MutableArrayData,
-    utils::{extend_offsets, get_last_offset},
-};
-
-pub(super) fn build_extend<T: OffsetSizeTrait>(array: &ArrayData) -> Extend {
-    let offsets = array.buffer::<T>(0);
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without nullability checks
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                // offsets
-                extend_offsets::<T>(
-                    offset_buffer,
-                    last_offset,
-                    &offsets[start..start + len + 1],
-                );
-
-                mutable.child_data[0].extend(
-                    index,
-                    offsets[start].to_usize().unwrap(),
-                    offsets[start + len].to_usize().unwrap(),
-                )
-            },
-        )
-    } else {
-        // nulls present: append item by item, ignoring null entries
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                let delta_len = array.len() - array.null_count();
-                offset_buffer.reserve(delta_len * std::mem::size_of::<T>());
-
-                let child = &mut mutable.child_data[0];
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // compute the new offset
-                        last_offset += offsets[i + 1] - offsets[i];
-
-                        // append value
-                        child.extend(
-                            index,
-                            offsets[i].to_usize().unwrap(),
-                            offsets[i + 1].to_usize().unwrap(),
-                        );
-                    }
-                    // append offset
-                    offset_buffer.push(last_offset);
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls<T: OffsetSizeTrait>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
-    let offset_buffer = &mut mutable.buffer1;
-
-    // this is safe due to how offset is built. See details on `get_last_offset`
-    let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-    (0..len).for_each(|_| offset_buffer.push(last_offset))
-}
diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs
deleted file mode 100644
index 4dc7b56d1c3..00000000000
--- a/rust/arrow/src/array/transform/mod.rs
+++ /dev/null
@@ -1,1206 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{buffer::MutableBuffer, datatypes::DataType, util::bit_util};
-
-use super::{
-    data::{into_buffers, new_buffers},
-    ArrayData,
-};
-
-mod boolean;
-mod fixed_binary;
-mod list;
-mod null;
-mod primitive;
-mod structure;
-mod utils;
-mod variable_size;
-
-type ExtendNullBits<'a> = Box<Fn(&mut _MutableArrayData, usize, usize) + 'a>;
-// function that extends `[start..start+len]` to the mutable array.
-// this is dynamic because different data_types influence how buffers and childs are extended.
-type Extend<'a> = Box<Fn(&mut _MutableArrayData, usize, usize, usize) + 'a>;
-
-type ExtendNulls = Box<Fn(&mut _MutableArrayData, usize) -> ()>;
-
-/// A mutable [ArrayData] that knows how to freeze itself into an [ArrayData].
-/// This is just a data container.
-#[derive(Debug)]
-struct _MutableArrayData<'a> {
-    pub data_type: DataType,
-    pub null_count: usize,
-
-    pub len: usize,
-    pub null_buffer: MutableBuffer,
-
-    // arrow specification only allows up to 3 buffers (2 ignoring the nulls above).
-    // Thus, we place them in the stack to avoid bound checks and greater data locality.
-    pub buffer1: MutableBuffer,
-    pub buffer2: MutableBuffer,
-    pub child_data: Vec<MutableArrayData<'a>>,
-}
-
-impl<'a> _MutableArrayData<'a> {
-    fn freeze(self, dictionary: Option<ArrayData>) -> ArrayData {
-        let buffers = into_buffers(&self.data_type, self.buffer1, self.buffer2);
-
-        let child_data = match self.data_type {
-            DataType::Dictionary(_, _) => vec![dictionary.unwrap()],
-            _ => {
-                let mut child_data = Vec::with_capacity(self.child_data.len());
-                for child in self.child_data {
-                    child_data.push(child.freeze());
-                }
-                child_data
-            }
-        };
-        ArrayData::new(
-            self.data_type,
-            self.len,
-            Some(self.null_count),
-            if self.null_count > 0 {
-                Some(self.null_buffer.into())
-            } else {
-                None
-            },
-            0,
-            buffers,
-            child_data,
-        )
-    }
-}
-
-fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits {
-    if let Some(bitmap) = array.null_bitmap() {
-        let bytes = bitmap.bits.as_slice();
-        Box::new(move |mutable, start, len| {
-            utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len);
-            mutable.null_count += utils::set_bits(
-                mutable.null_buffer.as_slice_mut(),
-                bytes,
-                mutable.len,
-                array.offset() + start,
-                len,
-            );
-        })
-    } else if use_nulls {
-        Box::new(|mutable, _, len| {
-            utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len);
-            let write_data = mutable.null_buffer.as_slice_mut();
-            let offset = mutable.len;
-            (0..len).for_each(|i| {
-                bit_util::set_bit(write_data, offset + i);
-            });
-        })
-    } else {
-        Box::new(|_, _, _| {})
-    }
-}
-
-/// Struct to efficiently and interactively create an [ArrayData] from an existing [ArrayData] by
-/// copying chunks.
-/// The main use case of this struct is to perform unary operations to arrays of arbitrary types, such as `filter` and `take`.
-/// # Example:
-///
-/// ```
-/// use arrow::{array::{Int32Array, Array, MutableArrayData}};
-///
-/// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-/// let array = array.data();
-/// // Create a new `MutableArrayData` from an array and with a capacity of 4.
-/// // Capacity here is equivalent to `Vec::with_capacity`
-/// let arrays = vec![array];
-/// let mut mutable = MutableArrayData::new(arrays, false, 4);
-/// mutable.extend(0, 1, 3); // extend from the slice [1..3], [2,3]
-/// mutable.extend(0, 0, 3); // extend from the slice [0..3], [1,2,3]
-/// // `.freeze()` to convert `MutableArrayData` into a `ArrayData`.
-/// let new_array = Int32Array::from(mutable.freeze());
-/// assert_eq!(Int32Array::from(vec![2, 3, 1, 2, 3]), new_array);
-/// ```
-pub struct MutableArrayData<'a> {
-    arrays: Vec<&'a ArrayData>,
-    // The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to
-    // mutability invariants (interior mutability):
-    // [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not
-    // [MutableArrayData] itself
-    data: _MutableArrayData<'a>,
-
-    // the child data of the `Array` in Dictionary arrays.
-    // This is not stored in `MutableArrayData` because these values constant and only needed
-    // at the end, when freezing [_MutableArrayData].
-    dictionary: Option<ArrayData>,
-
-    // function used to extend values from arrays. This function's lifetime is bound to the array
-    // because it reads values from it.
-    extend_values: Vec<Extend<'a>>,
-    // function used to extend nulls from arrays. This function's lifetime is bound to the array
-    // because it reads nulls from it.
-    extend_null_bits: Vec<ExtendNullBits<'a>>,
-
-    // function used to extend nulls.
-    // this is independent of the arrays and therefore has no lifetime.
-    extend_nulls: ExtendNulls,
-}
-
-impl<'a> std::fmt::Debug for MutableArrayData<'a> {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        // ignores the closures.
-        f.debug_struct("MutableArrayData")
-            .field("data", &self.data)
-            .finish()
-    }
-}
-
-fn build_extend(array: &ArrayData) -> Extend {
-    use crate::datatypes::*;
-    match array.data_type() {
-        DataType::Null => null::build_extend(array),
-        DataType::Boolean => boolean::build_extend(array),
-        DataType::UInt8 => primitive::build_extend::<u8>(array),
-        DataType::UInt16 => primitive::build_extend::<u16>(array),
-        DataType::UInt32 => primitive::build_extend::<u32>(array),
-        DataType::UInt64 => primitive::build_extend::<u64>(array),
-        DataType::Int8 => primitive::build_extend::<i8>(array),
-        DataType::Int16 => primitive::build_extend::<i16>(array),
-        DataType::Int32 => primitive::build_extend::<i32>(array),
-        DataType::Int64 => primitive::build_extend::<i64>(array),
-        DataType::Float32 => primitive::build_extend::<f32>(array),
-        DataType::Float64 => primitive::build_extend::<f64>(array),
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => {
-            primitive::build_extend::<i32>(array)
-        }
-        DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => {
-            primitive::build_extend::<i64>(array)
-        }
-        DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
-        DataType::LargeUtf8 | DataType::LargeBinary => {
-            variable_size::build_extend::<i64>(array)
-        }
-        DataType::List(_) => list::build_extend::<i32>(array),
-        DataType::LargeList(_) => list::build_extend::<i64>(array),
-        DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => primitive::build_extend::<u8>(array),
-            DataType::UInt16 => primitive::build_extend::<u16>(array),
-            DataType::UInt32 => primitive::build_extend::<u32>(array),
-            DataType::UInt64 => primitive::build_extend::<u64>(array),
-            DataType::Int8 => primitive::build_extend::<i8>(array),
-            DataType::Int16 => primitive::build_extend::<i16>(array),
-            DataType::Int32 => primitive::build_extend::<i32>(array),
-            DataType::Int64 => primitive::build_extend::<i64>(array),
-            _ => unreachable!(),
-        },
-        DataType::Struct(_) => structure::build_extend(array),
-        DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array),
-        DataType::Float16 => unreachable!(),
-        /*
-        DataType::FixedSizeList(_, _) => {}
-        DataType::Union(_) => {}
-        */
-        _ => todo!("Take and filter operations still not supported for this datatype"),
-    }
-}
-
-fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
-    use crate::datatypes::*;
-    Box::new(match data_type {
-        DataType::Null => null::extend_nulls,
-        DataType::Boolean => boolean::extend_nulls,
-        DataType::UInt8 => primitive::extend_nulls::<u8>,
-        DataType::UInt16 => primitive::extend_nulls::<u16>,
-        DataType::UInt32 => primitive::extend_nulls::<u32>,
-        DataType::UInt64 => primitive::extend_nulls::<u64>,
-        DataType::Int8 => primitive::extend_nulls::<i8>,
-        DataType::Int16 => primitive::extend_nulls::<i16>,
-        DataType::Int32 => primitive::extend_nulls::<i32>,
-        DataType::Int64 => primitive::extend_nulls::<i64>,
-        DataType::Float32 => primitive::extend_nulls::<f32>,
-        DataType::Float64 => primitive::extend_nulls::<f64>,
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => primitive::extend_nulls::<i32>,
-        DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>,
-        DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
-        DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>,
-        DataType::List(_) => list::extend_nulls::<i32>,
-        DataType::LargeList(_) => list::extend_nulls::<i64>,
-        DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => primitive::extend_nulls::<u8>,
-            DataType::UInt16 => primitive::extend_nulls::<u16>,
-            DataType::UInt32 => primitive::extend_nulls::<u32>,
-            DataType::UInt64 => primitive::extend_nulls::<u64>,
-            DataType::Int8 => primitive::extend_nulls::<i8>,
-            DataType::Int16 => primitive::extend_nulls::<i16>,
-            DataType::Int32 => primitive::extend_nulls::<i32>,
-            DataType::Int64 => primitive::extend_nulls::<i64>,
-            _ => unreachable!(),
-        },
-        DataType::Struct(_) => structure::extend_nulls,
-        DataType::FixedSizeBinary(_) => fixed_binary::extend_nulls,
-        DataType::Float16 => unreachable!(),
-        /*
-        DataType::FixedSizeList(_, _) => {}
-        DataType::Union(_) => {}
-        */
-        _ => todo!("Take and filter operations still not supported for this datatype"),
-    })
-}
-
-impl<'a> MutableArrayData<'a> {
-    /// returns a new [MutableArrayData] with capacity to `capacity` slots and specialized to create an
-    /// [ArrayData] from multiple `arrays`.
-    ///
-    /// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls
-    /// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls].
-    /// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used.
-    pub fn new(arrays: Vec<&'a ArrayData>, mut use_nulls: bool, capacity: usize) -> Self {
-        let data_type = arrays[0].data_type();
-        use crate::datatypes::*;
-
-        // if any of the arrays has nulls, insertions from any array requires setting bits
-        // as there is at least one array with nulls.
-        if arrays.iter().any(|array| array.null_count() > 0) {
-            use_nulls = true;
-        };
-
-        let [buffer1, buffer2] = new_buffers(data_type, capacity);
-
-        let child_data = match &data_type {
-            DataType::Null
-            | DataType::Boolean
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Timestamp(_, _)
-            | DataType::Utf8
-            | DataType::Binary
-            | DataType::LargeUtf8
-            | DataType::LargeBinary
-            | DataType::Interval(_)
-            | DataType::FixedSizeBinary(_) => vec![],
-            DataType::List(_) | DataType::LargeList(_) => {
-                let childs = arrays
-                    .iter()
-                    .map(|array| &array.child_data()[0])
-                    .collect::<Vec<_>>();
-                vec![MutableArrayData::new(childs, use_nulls, capacity)]
-            }
-            // the dictionary type just appends keys and clones the values.
-            DataType::Dictionary(_, _) => vec![],
-            DataType::Float16 => unreachable!(),
-            DataType::Struct(fields) => (0..fields.len())
-                .map(|i| {
-                    let child_arrays = arrays
-                        .iter()
-                        .map(|array| &array.child_data()[i])
-                        .collect::<Vec<_>>();
-                    MutableArrayData::new(child_arrays, use_nulls, capacity)
-                })
-                .collect::<Vec<_>>(),
-            _ => {
-                todo!("Take and filter operations still not supported for this datatype")
-            }
-        };
-
-        let dictionary = match &data_type {
-            DataType::Dictionary(_, _) => Some(arrays[0].child_data()[0].clone()),
-            _ => None,
-        };
-
-        let extend_nulls = build_extend_nulls(data_type);
-
-        let extend_null_bits = arrays
-            .iter()
-            .map(|array| build_extend_null_bits(array, use_nulls))
-            .collect();
-
-        let null_bytes = bit_util::ceil(capacity, 8);
-        let null_buffer = MutableBuffer::from_len_zeroed(null_bytes);
-
-        let extend_values = arrays.iter().map(|array| build_extend(array)).collect();
-
-        let data = _MutableArrayData {
-            data_type: data_type.clone(),
-            len: 0,
-            null_count: 0,
-            null_buffer,
-            buffer1,
-            buffer2,
-            child_data,
-        };
-        Self {
-            arrays,
-            data,
-            dictionary,
-            extend_values,
-            extend_null_bits,
-            extend_nulls,
-        }
-    }
-
-    /// Extends this [MutableArrayData] with elements from the bounded [ArrayData] at `start`
-    /// and for a size of `len`.
-    /// # Panic
-    /// This function panics if the range is out of bounds, i.e. if `start + len >= array.len()`.
-    pub fn extend(&mut self, index: usize, start: usize, end: usize) {
-        let len = end - start;
-        (self.extend_null_bits[index])(&mut self.data, start, len);
-        (self.extend_values[index])(&mut self.data, index, start, len);
-        self.data.len += len;
-    }
-
-    /// Extends this [MutableArrayData] with null elements, disregarding the bound arrays
-    pub fn extend_nulls(&mut self, len: usize) {
-        self.data.null_count += len;
-        (self.extend_nulls)(&mut self.data, len);
-        self.data.len += len;
-    }
-
-    /// Creates a [ArrayData] from the pushed regions up to this point, consuming `self`.
-    pub fn freeze(self) -> ArrayData {
-        self.data.freeze(self.dictionary)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::{convert::TryFrom, sync::Arc};
-
-    use super::*;
-
-    use crate::{
-        array::{
-            Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray,
-            FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array,
-            Int64Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
-            StringDictionaryBuilder, StructArray, UInt8Array,
-        },
-        buffer::Buffer,
-        datatypes::Field,
-    };
-    use crate::{
-        array::{ListArray, StringBuilder},
-        error::Result,
-    };
-
-    /// tests extending from a primitive array w/ offset nor nulls
-    #[test]
-    fn test_primitive() {
-        let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let arrays = vec![b.data()];
-        let mut a = MutableArrayData::new(arrays, false, 3);
-        a.extend(0, 0, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected = UInt8Array::from(vec![Some(1), Some(2)]);
-        assert_eq!(array, expected);
-    }
-
-    /// tests extending from a primitive array with offset w/ nulls
-    #[test]
-    fn test_primitive_offset() {
-        let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.slice(1, 2);
-        let arrays = vec![b.data()];
-        let mut a = MutableArrayData::new(arrays, false, 2);
-        a.extend(0, 0, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected = UInt8Array::from(vec![Some(2), Some(3)]);
-        assert_eq!(array, expected);
-    }
-
-    /// tests extending from a primitive array with offset and nulls
-    #[test]
-    fn test_primitive_null_offset() {
-        let b = UInt8Array::from(vec![Some(1), None, Some(3)]);
-        let b = b.slice(1, 2);
-        let arrays = vec![b.data()];
-        let mut a = MutableArrayData::new(arrays, false, 2);
-        a.extend(0, 0, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected = UInt8Array::from(vec![None, Some(3)]);
-        assert_eq!(array, expected);
-    }
-
-    #[test]
-    fn test_primitive_null_offset_nulls() {
-        let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]);
-        let b = b.slice(1, 2);
-        let arrays = vec![b.data()];
-        let mut a = MutableArrayData::new(arrays, true, 2);
-        a.extend(0, 0, 2);
-        a.extend_nulls(3);
-        a.extend(0, 1, 2);
-        let result = a.freeze();
-        let array = UInt8Array::from(result);
-        let expected =
-            UInt8Array::from(vec![Some(2), Some(3), None, None, None, Some(3)]);
-        assert_eq!(array, expected);
-    }
-
-    #[test]
-    fn test_list_null_offset() -> Result<()> {
-        let int_builder = Int64Builder::new(24);
-        let mut builder = ListBuilder::<Int64Builder>::new(int_builder);
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[4, 5])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[6, 7, 8])?;
-        builder.append(true)?;
-        let array = builder.finish();
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-        mutable.extend(0, 0, 1);
-
-        let result = mutable.freeze();
-        let array = ListArray::from(result);
-
-        let int_builder = Int64Builder::new(24);
-        let mut builder = ListBuilder::<Int64Builder>::new(int_builder);
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        let expected = builder.finish();
-
-        assert_eq!(array, expected);
-
-        Ok(())
-    }
-
-    /// tests extending from a variable-sized (strings and binary) array w/ offset with nulls
-    #[test]
-    fn test_variable_sized_nulls() {
-        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![Some("bc"), None]);
-        assert_eq!(result, expected);
-    }
-
-    /// tests extending from a variable-sized (strings and binary) array
-    /// with an offset and nulls
-    #[test]
-    fn test_variable_sized_offsets() {
-        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
-        let array = array.slice(1, 3);
-
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 0, 3);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_string_offsets() {
-        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
-        let array = array.slice(1, 3);
-
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 0, 3);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_multiple_with_nulls() {
-        let array1 = StringArray::from(vec!["hello", "world"]);
-        let array2 = StringArray::from(vec![Some("1"), None]);
-
-        let arrays = vec![array1.data(), array2.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 5);
-
-        mutable.extend(0, 0, 2);
-        mutable.extend(1, 0, 2);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected =
-            StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_string_null_offset_nulls() {
-        let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]);
-        let array = array.slice(1, 3);
-
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, true, 0);
-
-        mutable.extend(0, 1, 3);
-        mutable.extend_nulls(1);
-
-        let result = mutable.freeze();
-        let result = StringArray::from(result);
-
-        let expected = StringArray::from(vec![None, Some("defh"), None]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_bool() {
-        let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-
-        let result = mutable.freeze();
-        let result = BooleanArray::from(result);
-
-        let expected = BooleanArray::from(vec![Some(true), None]);
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_null() {
-        let array1 = NullArray::new(10);
-        let array2 = NullArray::new(5);
-        let arrays = vec![array1.data(), array2.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        mutable.extend(1, 0, 1);
-
-        let result = mutable.freeze();
-        let result = NullArray::from(result);
-
-        let expected = NullArray::new(3);
-        assert_eq!(result, expected);
-    }
-
-    fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData {
-        let values = StringArray::from(values.to_vec());
-        let mut builder = StringDictionaryBuilder::new_with_dictionary(
-            PrimitiveBuilder::<Int16Type>::new(3),
-            &values,
-        )
-        .unwrap();
-        for key in keys {
-            if let Some(v) = key {
-                builder.append(v).unwrap();
-            } else {
-                builder.append_null().unwrap()
-            }
-        }
-        builder.finish().data().clone()
-    }
-
-    #[test]
-    fn test_dictionary() {
-        // (a, b, c), (0, 1, 0, 2) => (a, b, a, c)
-        let array = create_dictionary_array(
-            &["a", "b", "c"],
-            &[Some("a"), Some("b"), None, Some("c")],
-        );
-        let arrays = vec![&array];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-
-        let result = mutable.freeze();
-        let result = DictionaryArray::from(result);
-
-        let expected = Int16Array::from(vec![Some(1), None]);
-        assert_eq!(result.keys(), &expected);
-    }
-
-    #[test]
-    fn test_struct() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-        let arrays = vec![array.data()];
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected = StructArray::try_from(vec![
-            ("f1", strings.slice(1, 2)),
-            ("f2", ints.slice(1, 2)),
-        ])
-        .unwrap();
-        assert_eq!(array, expected)
-    }
-
-    #[test]
-    fn test_struct_offset() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap()
-                .slice(1, 3);
-        let arrays = vec![array.data()];
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected_strings: ArrayRef =
-            Arc::new(StringArray::from(vec![None, Some("mark")]));
-        let expected = StructArray::try_from(vec![
-            ("f1", expected_strings),
-            ("f2", ints.slice(2, 2)),
-        ])
-        .unwrap();
-
-        assert_eq!(array, expected);
-    }
-
-    #[test]
-    fn test_struct_nulls() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected_string = Arc::new(StringArray::from(vec![None, None])) as ArrayRef;
-        let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef;
-
-        let expected =
-            StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
-                .unwrap();
-        assert_eq!(array, expected)
-    }
-
-    #[test]
-    fn test_struct_many() {
-        let strings: ArrayRef = Arc::new(StringArray::from(vec![
-            Some("joe"),
-            None,
-            None,
-            Some("mark"),
-            Some("doe"),
-        ]));
-        let ints: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            Some(4),
-            Some(5),
-        ]));
-
-        let array =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
-        let arrays = vec![array.data(), array.data()];
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 3);
-        mutable.extend(1, 0, 2);
-        let data = mutable.freeze();
-        let array = StructArray::from(data);
-
-        let expected_string =
-            Arc::new(StringArray::from(vec![None, None, Some("joe"), None])) as ArrayRef;
-        let expected_int =
-            Arc::new(Int32Array::from(vec![Some(2), None, Some(1), Some(2)])) as ArrayRef;
-
-        let expected =
-            StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
-                .unwrap();
-        assert_eq!(array, expected)
-    }
-
-    #[test]
-    fn test_binary_fixed_sized_offsets() {
-        let array = FixedSizeBinaryArray::try_from_iter(
-            vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter(),
-        )
-        .expect("Failed to create FixedSizeBinaryArray from iterable");
-        let array = array.slice(1, 2);
-        // = [[0, 1], [0, 2]] due to the offset = 1
-
-        let arrays = vec![array.data()];
-
-        let mut mutable = MutableArrayData::new(arrays, false, 0);
-
-        mutable.extend(0, 1, 2);
-        mutable.extend(0, 0, 1);
-
-        let result = mutable.freeze();
-        let result = FixedSizeBinaryArray::from(result);
-
-        let expected =
-            FixedSizeBinaryArray::try_from_iter(vec![vec![0, 2], vec![0, 1]].into_iter())
-                .expect("Failed to create FixedSizeBinaryArray from iterable");
-        assert_eq!(result, expected);
-    }
-
-    #[test]
-    fn test_list_append() -> Result<()> {
-        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(24));
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[4, 5])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[6, 7, 8])?;
-        builder.values().append_slice(&[9, 10, 11])?;
-        builder.append(true)?;
-        let a = builder.finish();
-
-        let a_builder = Int64Builder::new(24);
-        let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder);
-        a_builder.values().append_slice(&[12, 13])?;
-        a_builder.append(true)?;
-        a_builder.append(true)?;
-        a_builder.values().append_slice(&[14, 15])?;
-        a_builder.append(true)?;
-        let b = a_builder.finish();
-
-        let c = b.slice(1, 2);
-
-        let mut mutable =
-            MutableArrayData::new(vec![a.data(), b.data(), c.data()], false, 1);
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(2, 0, c.len());
-
-        let finished = mutable.freeze();
-
-        let expected_int_array = Int64Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            Some(7),
-            Some(8),
-            Some(9),
-            Some(10),
-            Some(11),
-            // append first array
-            Some(12),
-            Some(13),
-            Some(14),
-            Some(15),
-            // append second array
-            Some(14),
-            Some(15),
-        ]);
-        let list_value_offsets =
-            Buffer::from_slice_ref(&[0i32, 3, 5, 11, 13, 13, 15, 15, 17]);
-        let expected_list_data = ArrayData::new(
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
-            8,
-            None,
-            None,
-            0,
-            vec![list_value_offsets],
-            vec![expected_int_array.data().clone()],
-        );
-        assert_eq!(finished, expected_list_data);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_list_nulls_append() -> Result<()> {
-        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(32));
-        builder.values().append_slice(&[1, 2, 3])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[4, 5])?;
-        builder.append(true)?;
-        builder.append(false)?;
-        builder.values().append_slice(&[6, 7, 8])?;
-        builder.values().append_null()?;
-        builder.values().append_null()?;
-        builder.values().append_slice(&[9, 10, 11])?;
-        builder.append(true)?;
-        let a = builder.finish();
-        let a = a.data();
-
-        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(32));
-        builder.values().append_slice(&[12, 13])?;
-        builder.append(true)?;
-        builder.append(false)?;
-        builder.append(true)?;
-        builder.values().append_null()?;
-        builder.values().append_null()?;
-        builder.values().append_slice(&[14, 15])?;
-        builder.append(true)?;
-        let b = builder.finish();
-        let b = b.data();
-        let c = b.slice(1, 2);
-        let d = b.slice(2, 2);
-
-        let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10);
-
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(2, 0, c.len());
-        mutable.extend(3, 0, d.len());
-        let result = mutable.freeze();
-
-        let expected_int_array = Int64Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            Some(7),
-            Some(8),
-            None,
-            None,
-            Some(9),
-            Some(10),
-            Some(11),
-            // second array
-            Some(12),
-            Some(13),
-            None,
-            None,
-            Some(14),
-            Some(15),
-            // slice(1, 2) results in no values added
-            None,
-            None,
-            Some(14),
-            Some(15),
-        ]);
-        let list_value_offsets =
-            Buffer::from_slice_ref(&[0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]);
-        let expected_list_data = ArrayData::new(
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
-            12,
-            None,
-            Some(Buffer::from(&[0b11011011, 0b1110])),
-            0,
-            vec![list_value_offsets],
-            vec![expected_int_array.data().clone()],
-        );
-        assert_eq!(result, expected_list_data);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_list_of_strings_append() -> Result<()> {
-        // [["alpha", "beta", None]]
-        let mut builder = ListBuilder::new(StringBuilder::new(32));
-        builder.values().append_value("Hello")?;
-        builder.values().append_value("Arrow")?;
-        builder.values().append_null()?;
-        builder.append(true)?;
-        let a = builder.finish();
-
-        // [["alpha", "beta"], [None], ["gamma", "delta", None]]
-        let mut builder = ListBuilder::new(StringBuilder::new(32));
-        builder.values().append_value("alpha")?;
-        builder.values().append_value("beta")?;
-        builder.append(true)?;
-        builder.values().append_null()?;
-        builder.append(true)?;
-        builder.values().append_value("gamma")?;
-        builder.values().append_value("delta")?;
-        builder.values().append_null()?;
-        builder.append(true)?;
-        let b = builder.finish();
-
-        let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10);
-
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(1, 1, 3);
-        mutable.extend(1, 0, 0);
-        let result = mutable.freeze();
-
-        let expected_string_array = StringArray::from(vec![
-            // extend a[0..a.len()]
-            // a[0]
-            Some("Hello"),
-            Some("Arrow"),
-            None,
-            // extend b[0..b.len()]
-            // b[0]
-            Some("alpha"),
-            Some("beta"),
-            // b[1]
-            None,
-            // b[2]
-            Some("gamma"),
-            Some("delta"),
-            None,
-            // extend b[1..3]
-            // b[1]
-            None,
-            // b[2]
-            Some("gamma"),
-            Some("delta"),
-            None,
-            // extend b[0..0]
-        ]);
-        let list_value_offsets = Buffer::from_slice_ref(&[0, 3, 5, 6, 9, 10, 13]);
-        let expected_list_data = ArrayData::new(
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-            6,
-            None,
-            None,
-            0,
-            vec![list_value_offsets],
-            vec![expected_string_array.data().clone()],
-        );
-        assert_eq!(result, expected_list_data);
-        Ok(())
-    }
-
-    #[test]
-    fn test_fixed_size_binary_append() {
-        let a = vec![Some(vec![1, 2]), Some(vec![3, 4]), Some(vec![5, 6])];
-        let a = FixedSizeBinaryArray::try_from_sparse_iter(a.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable");
-
-        let b = vec![
-            None,
-            Some(vec![7, 8]),
-            Some(vec![9, 10]),
-            None,
-            Some(vec![13, 14]),
-            None,
-        ];
-        let b = FixedSizeBinaryArray::try_from_sparse_iter(b.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable");
-
-        let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10);
-
-        mutable.extend(0, 0, a.len());
-        mutable.extend(1, 0, b.len());
-        mutable.extend(1, 1, 4);
-        mutable.extend(1, 2, 3);
-        mutable.extend(1, 5, 5);
-        let result = mutable.freeze();
-
-        let expected = vec![
-            // a
-            Some(vec![1, 2]),
-            Some(vec![3, 4]),
-            Some(vec![5, 6]),
-            // b
-            None,
-            Some(vec![7, 8]),
-            Some(vec![9, 10]),
-            None,
-            Some(vec![13, 14]),
-            None,
-            // b[1..4]
-            Some(vec![7, 8]),
-            Some(vec![9, 10]),
-            None,
-            // b[2..3]
-            Some(vec![9, 10]),
-            // b[4..4]
-        ];
-        let expected = FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable");
-        assert_eq!(&result, expected.data());
-    }
-
-    /*
-    // this is an old test used on a meanwhile removed dead code
-    // that is still useful when `MutableArrayData` supports fixed-size lists.
-    #[test]
-    fn test_fixed_size_list_append() -> Result<()> {
-        let int_builder = UInt16Builder::new(64);
-        let mut builder = FixedSizeListBuilder::<UInt16Builder>::new(int_builder, 2);
-        builder.values().append_slice(&[1, 2])?;
-        builder.append(true)?;
-        builder.values().append_slice(&[3, 4])?;
-        builder.append(false)?;
-        builder.values().append_slice(&[5, 6])?;
-        builder.append(true)?;
-
-        let a_builder = UInt16Builder::new(64);
-        let mut a_builder = FixedSizeListBuilder::<UInt16Builder>::new(a_builder, 2);
-        a_builder.values().append_slice(&[7, 8])?;
-        a_builder.append(true)?;
-        a_builder.values().append_slice(&[9, 10])?;
-        a_builder.append(true)?;
-        a_builder.values().append_slice(&[11, 12])?;
-        a_builder.append(false)?;
-        a_builder.values().append_slice(&[13, 14])?;
-        a_builder.append(true)?;
-        a_builder.values().append_null()?;
-        a_builder.values().append_null()?;
-        a_builder.append(true)?;
-        let a = a_builder.finish();
-
-        // append array
-        builder.append_data(&[
-            a.data(),
-            a.slice(1, 3).data(),
-            a.slice(2, 1).data(),
-            a.slice(5, 0).data(),
-        ])?;
-        let finished = builder.finish();
-
-        let expected_int_array = UInt16Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            // append first array
-            Some(7),
-            Some(8),
-            Some(9),
-            Some(10),
-            Some(11),
-            Some(12),
-            Some(13),
-            Some(14),
-            None,
-            None,
-            // append slice(1, 3)
-            Some(9),
-            Some(10),
-            Some(11),
-            Some(12),
-            Some(13),
-            Some(14),
-            // append slice(2, 1)
-            Some(11),
-            Some(12),
-        ]);
-        let expected_list_data = ArrayData::new(
-            DataType::FixedSizeList(
-                Box::new(Field::new("item", DataType::UInt16, true)),
-                2,
-            ),
-            12,
-            None,
-            None,
-            0,
-            vec![],
-            vec![expected_int_array.data()],
-        );
-        let expected_list =
-            FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayData);
-        assert_eq!(&expected_list.values(), &finished.values());
-        assert_eq!(expected_list.len(), finished.len());
-
-        Ok(())
-    }
-    */
-}
diff --git a/rust/arrow/src/array/transform/null.rs b/rust/arrow/src/array/transform/null.rs
deleted file mode 100644
index e1335e17971..00000000000
--- a/rust/arrow/src/array/transform/null.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend(_: &ArrayData) -> Extend {
-    Box::new(move |_, _, _, _| {})
-}
-
-pub(super) fn extend_nulls(_: &mut _MutableArrayData, _: usize) {}
diff --git a/rust/arrow/src/array/transform/primitive.rs b/rust/arrow/src/array/transform/primitive.rs
deleted file mode 100644
index 032bb4a8779..00000000000
--- a/rust/arrow/src/array/transform/primitive.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::mem::size_of;
-
-use crate::{array::ArrayData, datatypes::ArrowNativeType};
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend<T: ArrowNativeType>(array: &ArrayData) -> Extend {
-    let values = array.buffer::<T>(0);
-    Box::new(
-        move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-            mutable
-                .buffer1
-                .extend_from_slice(&values[start..start + len]);
-        },
-    )
-}
-
-pub(super) fn extend_nulls<T: ArrowNativeType>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
-    mutable.buffer1.extend_zeros(len * size_of::<T>());
-}
diff --git a/rust/arrow/src/array/transform/structure.rs b/rust/arrow/src/array/transform/structure.rs
deleted file mode 100644
index c019f5ac6a9..00000000000
--- a/rust/arrow/src/array/transform/structure.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::ArrayData;
-
-use super::{Extend, _MutableArrayData};
-
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
-    if array.null_count() == 0 {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                mutable.child_data.iter_mut().for_each(|child| {
-                    child.extend(
-                        index,
-                        array.offset() + start,
-                        array.offset() + start + len,
-                    )
-                })
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData,
-                  index: usize,
-                  start: usize,
-                  len: usize| {
-                (array.offset() + start..array.offset() + start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend(index, i, i + 1))
-                    } else {
-                        mutable
-                            .child_data
-                            .iter_mut()
-                            .for_each(|child| child.extend_nulls(1))
-                    }
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
-    mutable
-        .child_data
-        .iter_mut()
-        .for_each(|child| child.extend_nulls(len))
-}
diff --git a/rust/arrow/src/array/transform/utils.rs b/rust/arrow/src/array/transform/utils.rs
deleted file mode 100644
index 8c718c70c17..00000000000
--- a/rust/arrow/src/array/transform/utils.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{array::OffsetSizeTrait, buffer::MutableBuffer, util::bit_util};
-
-/// extends the `buffer` to be able to hold `len` bits, setting all bits of the new size to zero.
-#[inline]
-pub(super) fn resize_for_bits(buffer: &mut MutableBuffer, len: usize) {
-    let needed_bytes = bit_util::ceil(len, 8);
-    if buffer.len() < needed_bytes {
-        buffer.resize(needed_bytes, 0);
-    }
-}
-
-/// sets all bits on `write_data` on the range `[offset_write..offset_write+len]` to be equal to the
-/// bits on `data` on the range `[offset_read..offset_read+len]`
-pub(super) fn set_bits(
-    write_data: &mut [u8],
-    data: &[u8],
-    offset_write: usize,
-    offset_read: usize,
-    len: usize,
-) -> usize {
-    let mut count = 0;
-    (0..len).for_each(|i| {
-        if bit_util::get_bit(data, offset_read + i) {
-            bit_util::set_bit(write_data, offset_write + i);
-        } else {
-            count += 1;
-        }
-    });
-    count
-}
-
-pub(super) fn extend_offsets<T: OffsetSizeTrait>(
-    buffer: &mut MutableBuffer,
-    mut last_offset: T,
-    offsets: &[T],
-) {
-    buffer.reserve(offsets.len() * std::mem::size_of::<T>());
-    offsets.windows(2).for_each(|offsets| {
-        // compute the new offset
-        let length = offsets[1] - offsets[0];
-        last_offset += length;
-        buffer.push(last_offset);
-    });
-}
-
-#[inline]
-pub(super) unsafe fn get_last_offset<T: OffsetSizeTrait>(
-    offset_buffer: &MutableBuffer,
-) -> T {
-    // JUSTIFICATION
-    //  Benefit
-    //      20% performance improvement extend of variable sized arrays (see bench `mutable_array`)
-    //  Soundness
-    //      * offset buffer is always extended in slices of T and aligned accordingly.
-    //      * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid.
-    let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::<T>();
-    debug_assert!(prefix.is_empty() && suffix.is_empty());
-    *offsets.get_unchecked(offsets.len() - 1)
-}
diff --git a/rust/arrow/src/array/transform/variable_size.rs b/rust/arrow/src/array/transform/variable_size.rs
deleted file mode 100644
index c9304dbca20..00000000000
--- a/rust/arrow/src/array/transform/variable_size.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{
-    array::{ArrayData, OffsetSizeTrait},
-    buffer::MutableBuffer,
-};
-
-use super::{
-    Extend, _MutableArrayData,
-    utils::{extend_offsets, get_last_offset},
-};
-
-#[inline]
-fn extend_offset_values<T: OffsetSizeTrait>(
-    buffer: &mut MutableBuffer,
-    offsets: &[T],
-    values: &[u8],
-    start: usize,
-    len: usize,
-) {
-    let start_values = offsets[start].to_usize().unwrap();
-    let end_values = offsets[start + len].to_usize().unwrap();
-    let new_values = &values[start_values..end_values];
-    buffer.extend_from_slice(new_values);
-}
-
-pub(super) fn build_extend<T: OffsetSizeTrait>(array: &ArrayData) -> Extend {
-    let offsets = array.buffer::<T>(0);
-    let values = array.buffers()[1].as_slice();
-    if array.null_count() == 0 {
-        // fast case where we can copy regions without null issues
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-                let values_buffer = &mut mutable.buffer2;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let last_offset = unsafe { get_last_offset(offset_buffer) };
-
-                extend_offsets::<T>(
-                    offset_buffer,
-                    last_offset,
-                    &offsets[start..start + len + 1],
-                );
-                // values
-                extend_offset_values::<T>(values_buffer, offsets, values, start, len);
-            },
-        )
-    } else {
-        Box::new(
-            move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
-                let offset_buffer = &mut mutable.buffer1;
-                let values_buffer = &mut mutable.buffer2;
-
-                // this is safe due to how offset is built. See details on `get_last_offset`
-                let mut last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-                // nulls present: append item by item, ignoring null entries
-                offset_buffer.reserve(len * std::mem::size_of::<T>());
-
-                (start..start + len).for_each(|i| {
-                    if array.is_valid(i) {
-                        // compute the new offset
-                        let length = offsets[i + 1] - offsets[i];
-                        last_offset += length;
-
-                        // append value
-                        let bytes = &values[offsets[i].to_usize().unwrap()
-                            ..offsets[i + 1].to_usize().unwrap()];
-                        values_buffer.extend_from_slice(bytes);
-                    }
-                    // offsets are always present
-                    offset_buffer.push(last_offset);
-                })
-            },
-        )
-    }
-}
-
-pub(super) fn extend_nulls<T: OffsetSizeTrait>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
-    let offset_buffer = &mut mutable.buffer1;
-
-    // this is safe due to how offset is built. See details on `get_last_offset`
-    let last_offset: T = unsafe { get_last_offset(offset_buffer) };
-
-    (0..len).for_each(|_| offset_buffer.push(last_offset))
-}
diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs
deleted file mode 100644
index b977f550999..00000000000
--- a/rust/arrow/src/bitmap.rs
+++ /dev/null
@@ -1,157 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines a bitmap, which is used to track which values in an Arrow array are null.
-//! This is called a "validity bitmap" in the Arrow documentation.
-
-use crate::buffer::Buffer;
-use crate::error::Result;
-use crate::util::bit_util;
-use std::mem;
-
-use std::ops::{BitAnd, BitOr};
-
-#[derive(Debug, Clone)]
-pub struct Bitmap {
-    pub(crate) bits: Buffer,
-}
-
-impl Bitmap {
-    pub fn new(num_bits: usize) -> Self {
-        let num_bytes = num_bits / 8 + if num_bits % 8 > 0 { 1 } else { 0 };
-        let r = num_bytes % 64;
-        let len = if r == 0 {
-            num_bytes
-        } else {
-            num_bytes + 64 - r
-        };
-        Bitmap {
-            bits: Buffer::from(&vec![0xFF; len]),
-        }
-    }
-
-    pub fn len(&self) -> usize {
-        self.bits.len()
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.bits.is_empty()
-    }
-
-    pub fn is_set(&self, i: usize) -> bool {
-        assert!(i < (self.bits.len() << 3));
-        unsafe { bit_util::get_bit_raw(self.bits.as_ptr(), i) }
-    }
-
-    pub fn buffer_ref(&self) -> &Buffer {
-        &self.bits
-    }
-
-    pub fn into_buffer(self) -> Buffer {
-        self.bits
-    }
-
-    /// Returns the total number of bytes of memory occupied by the buffers owned by this [Bitmap].
-    pub fn get_buffer_memory_size(&self) -> usize {
-        self.bits.capacity()
-    }
-
-    /// Returns the total number of bytes of memory occupied physically by this [Bitmap].
-    pub fn get_array_memory_size(&self) -> usize {
-        self.bits.capacity() + mem::size_of_val(self)
-    }
-}
-
-impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {
-    type Output = Result<Bitmap>;
-
-    fn bitand(self, rhs: &'b Bitmap) -> Result<Bitmap> {
-        Ok(Bitmap::from((&self.bits & &rhs.bits)?))
-    }
-}
-
-impl<'a, 'b> BitOr<&'b Bitmap> for &'a Bitmap {
-    type Output = Result<Bitmap>;
-
-    fn bitor(self, rhs: &'b Bitmap) -> Result<Bitmap> {
-        Ok(Bitmap::from((&self.bits | &rhs.bits)?))
-    }
-}
-
-impl From<Buffer> for Bitmap {
-    fn from(buf: Buffer) -> Self {
-        Self { bits: buf }
-    }
-}
-
-impl PartialEq for Bitmap {
-    fn eq(&self, other: &Self) -> bool {
-        // buffer equality considers capacity, but here we want to only compare
-        // actual data contents
-        let self_len = self.bits.len();
-        let other_len = other.bits.len();
-        if self_len != other_len {
-            return false;
-        }
-        self.bits.as_slice()[..self_len] == other.bits.as_slice()[..self_len]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_bitmap_length() {
-        assert_eq!(64, Bitmap::new(63 * 8).len());
-        assert_eq!(64, Bitmap::new(64 * 8).len());
-        assert_eq!(128, Bitmap::new(65 * 8).len());
-    }
-
-    #[test]
-    fn test_bitwise_and() {
-        let bitmap1 = Bitmap::from(Buffer::from([0b01101010]));
-        let bitmap2 = Bitmap::from(Buffer::from([0b01001110]));
-        assert_eq!(
-            Bitmap::from(Buffer::from([0b01001010])),
-            (&bitmap1 & &bitmap2).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_bitwise_or() {
-        let bitmap1 = Bitmap::from(Buffer::from([0b01101010]));
-        let bitmap2 = Bitmap::from(Buffer::from([0b01001110]));
-        assert_eq!(
-            Bitmap::from(Buffer::from([0b01101110])),
-            (&bitmap1 | &bitmap2).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_bitmap_is_set() {
-        let bitmap = Bitmap::from(Buffer::from([0b01001010]));
-        assert_eq!(false, bitmap.is_set(0));
-        assert_eq!(true, bitmap.is_set(1));
-        assert_eq!(false, bitmap.is_set(2));
-        assert_eq!(true, bitmap.is_set(3));
-        assert_eq!(false, bitmap.is_set(4));
-        assert_eq!(false, bitmap.is_set(5));
-        assert_eq!(true, bitmap.is_set(6));
-        assert_eq!(false, bitmap.is_set(7));
-    }
-}
diff --git a/rust/arrow/src/buffer/immutable.rs b/rust/arrow/src/buffer/immutable.rs
deleted file mode 100644
index cd6a2a3c130..00000000000
--- a/rust/arrow/src/buffer/immutable.rs
+++ /dev/null
@@ -1,541 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt::Debug;
-use std::iter::FromIterator;
-use std::ptr::NonNull;
-use std::sync::Arc;
-use std::{convert::AsRef, usize};
-
-use crate::util::bit_chunk_iterator::BitChunks;
-use crate::{
-    bytes::{Bytes, Deallocation},
-    datatypes::ArrowNativeType,
-    ffi,
-};
-
-use super::ops::bitwise_unary_op_helper;
-use super::MutableBuffer;
-
-/// Buffer represents a contiguous memory region that can be shared with other buffers and across
-/// thread boundaries.
-#[derive(Clone, PartialEq, Debug)]
-pub struct Buffer {
-    /// the internal byte buffer.
-    data: Arc<Bytes>,
-
-    /// The offset into the buffer.
-    offset: usize,
-}
-
-impl Buffer {
-    /// Auxiliary method to create a new Buffer
-    #[inline]
-    pub fn from_bytes(bytes: Bytes) -> Self {
-        Buffer {
-            data: Arc::new(bytes),
-            offset: 0,
-        }
-    }
-
-    /// Initializes a [Buffer] from a slice of items.
-    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: &T) -> Self {
-        let slice = items.as_ref();
-        let len = slice.len();
-        let mut buffer = MutableBuffer::with_capacity(len);
-        buffer.extend_from_slice(slice);
-        buffer.into()
-    }
-
-    /// Creates a buffer from an existing memory region (must already be byte-aligned), this
-    /// `Buffer` will free this piece of memory when dropped.
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
-    pub unsafe fn from_raw_parts(ptr: NonNull<u8>, len: usize, capacity: usize) -> Self {
-        assert!(len <= capacity);
-        Buffer::build_with_arguments(ptr, len, Deallocation::Native(capacity))
-    }
-
-    /// Creates a buffer from an existing memory region (must already be byte-aligned), this
-    /// `Buffer` **does not** free this piece of memory when dropped.
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `data` - An [ffi::FFI_ArrowArray] with the data
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes and that the foreign deallocator frees the region.
-    pub unsafe fn from_unowned(
-        ptr: NonNull<u8>,
-        len: usize,
-        data: Arc<ffi::FFI_ArrowArray>,
-    ) -> Self {
-        Buffer::build_with_arguments(ptr, len, Deallocation::Foreign(data))
-    }
-
-    /// Auxiliary method to create a new Buffer
-    unsafe fn build_with_arguments(
-        ptr: NonNull<u8>,
-        len: usize,
-        deallocation: Deallocation,
-    ) -> Self {
-        let bytes = Bytes::new(ptr, len, deallocation);
-        Buffer {
-            data: Arc::new(bytes),
-            offset: 0,
-        }
-    }
-
-    /// Returns the number of bytes in the buffer
-    pub fn len(&self) -> usize {
-        self.data.len() - self.offset
-    }
-
-    /// Returns the capacity of this buffer.
-    /// For exernally owned buffers, this returns zero
-    pub fn capacity(&self) -> usize {
-        self.data.capacity()
-    }
-
-    /// Returns whether the buffer is empty.
-    pub fn is_empty(&self) -> bool {
-        self.data.len() - self.offset == 0
-    }
-
-    /// Returns the byte slice stored in this buffer
-    pub fn as_slice(&self) -> &[u8] {
-        &self.data[self.offset..]
-    }
-
-    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
-    /// Doing so allows the same memory region to be shared between buffers.
-    /// # Panics
-    /// Panics iff `offset` is larger than `len`.
-    pub fn slice(&self, offset: usize) -> Self {
-        assert!(
-            offset <= self.len(),
-            "the offset of the new Buffer cannot exceed the existing length"
-        );
-        Self {
-            data: self.data.clone(),
-            offset: self.offset + offset,
-        }
-    }
-
-    /// Returns a pointer to the start of this buffer.
-    ///
-    /// Note that this should be used cautiously, and the returned pointer should not be
-    /// stored anywhere, to avoid dangling pointers.
-    pub fn as_ptr(&self) -> *const u8 {
-        unsafe { self.data.ptr().as_ptr().add(self.offset) }
-    }
-
-    /// View buffer as typed slice.
-    ///
-    /// # Safety
-    ///
-    /// `ArrowNativeType` is public so that it can be used as a trait bound for other public
-    /// components, such as the `ToByteSlice` trait.  However, this means that it can be
-    /// implemented by user defined types, which it is not intended for.
-    ///
-    /// Also `typed_data::<bool>` is unsafe as `0x00` and `0x01` are the only valid values for
-    /// `bool` in Rust.  However, `bool` arrays in Arrow are bit-packed which breaks this condition.
-    /// View buffer as typed slice.
-    pub unsafe fn typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T] {
-        // JUSTIFICATION
-        //  Benefit
-        //      Many of the buffers represent specific types, and consumers of `Buffer` often need to re-interpret them.
-        //  Soundness
-        //      * The pointer is non-null by construction
-        //      * alignment asserted below.
-        let (prefix, offsets, suffix) = self.as_slice().align_to::<T>();
-        assert!(prefix.is_empty() && suffix.is_empty());
-        offsets
-    }
-
-    /// Returns a slice of this buffer starting at a certain bit offset.
-    /// If the offset is byte-aligned the returned buffer is a shallow clone,
-    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
-    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
-        if offset % 8 == 0 && len % 8 == 0 {
-            return self.slice(offset / 8);
-        }
-
-        bitwise_unary_op_helper(&self, offset, len, |a| a)
-    }
-
-    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
-    /// in larger chunks and starting at arbitrary bit offsets.
-    /// Note that both `offset` and `length` are measured in bits.
-    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
-        BitChunks::new(&self.as_slice(), offset, len)
-    }
-
-    /// Returns the number of 1-bits in this buffer.
-    pub fn count_set_bits(&self) -> usize {
-        let len_in_bits = self.len() * 8;
-        // self.offset is already taken into consideration by the bit_chunks implementation
-        self.count_set_bits_offset(0, len_in_bits)
-    }
-
-    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
-    /// inspected. Note that both `offset` and `length` are measured in bits.
-    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
-        let chunks = self.bit_chunks(offset, len);
-        let mut count = chunks.iter().map(|c| c.count_ones() as usize).sum();
-        count += chunks.remainder_bits().count_ones() as usize;
-
-        count
-    }
-}
-
-/// Creating a `Buffer` instance by copying the memory from a `AsRef<[u8]>` into a newly
-/// allocated memory region.
-impl<T: AsRef<[u8]>> From<T> for Buffer {
-    fn from(p: T) -> Self {
-        // allocate aligned memory buffer
-        let slice = p.as_ref();
-        let len = slice.len();
-        let mut buffer = MutableBuffer::new(len);
-        buffer.extend_from_slice(slice);
-        buffer.into()
-    }
-}
-
-/// Creating a `Buffer` instance by storing the boolean values into the buffer
-impl std::iter::FromIterator<bool> for Buffer {
-    fn from_iter<I>(iter: I) -> Self
-    where
-        I: IntoIterator<Item = bool>,
-    {
-        MutableBuffer::from_iter(iter).into()
-    }
-}
-
-impl std::ops::Deref for Buffer {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
-    }
-}
-
-unsafe impl Sync for Buffer {}
-unsafe impl Send for Buffer {}
-
-impl From<MutableBuffer> for Buffer {
-    #[inline]
-    fn from(buffer: MutableBuffer) -> Self {
-        buffer.into_buffer()
-    }
-}
-
-impl Buffer {
-    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
-    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::Buffer;
-    /// let v = vec![1u32];
-    /// let iter = v.iter().map(|x| x * 2);
-    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
-    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
-    /// ```
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    // This implementation is required for two reasons:
-    // 1. there is no trait `TrustedLen` in stable rust and therefore
-    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
-    // 2. `from_trusted_len_iter` is faster.
-    #[inline]
-    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
-        iterator: I,
-    ) -> Self {
-        MutableBuffer::from_trusted_len_iter(iterator).into()
-    }
-
-    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
-    /// if any of the items of the iterator is an error.
-    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    #[inline]
-    pub unsafe fn try_from_trusted_len_iter<
-        E,
-        T: ArrowNativeType,
-        I: Iterator<Item = std::result::Result<T, E>>,
-    >(
-        iterator: I,
-    ) -> std::result::Result<Self, E> {
-        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
-    }
-}
-
-impl<T: ArrowNativeType> FromIterator<T> for Buffer {
-    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
-        let mut iterator = iter.into_iter();
-        let size = std::mem::size_of::<T>();
-
-        // first iteration, which will likely reserve sufficient space for the buffer.
-        let mut buffer = match iterator.next() {
-            None => MutableBuffer::new(0),
-            Some(element) => {
-                let (lower, _) = iterator.size_hint();
-                let mut buffer = MutableBuffer::new(lower.saturating_add(1) * size);
-                unsafe {
-                    std::ptr::write(buffer.as_mut_ptr() as *mut T, element);
-                    buffer.set_len(size);
-                }
-                buffer
-            }
-        };
-
-        buffer.extend_from_iter(iterator);
-        buffer.into()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::thread;
-
-    use super::*;
-
-    #[test]
-    fn test_buffer_data_equality() {
-        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
-        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
-        assert_eq!(buf1, buf2);
-
-        // slice with same offset should still preserve equality
-        let buf3 = buf1.slice(2);
-        assert_ne!(buf1, buf3);
-        let buf4 = buf2.slice(2);
-        assert_eq!(buf3, buf4);
-
-        // Different capacities should still preserve equality
-        let mut buf2 = MutableBuffer::new(65);
-        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
-
-        let buf2 = buf2.into();
-        assert_eq!(buf1, buf2);
-
-        // unequal because of different elements
-        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
-        assert_ne!(buf1, buf2);
-
-        // unequal because of different length
-        let buf2 = Buffer::from(&[0, 1, 2, 3]);
-        assert_ne!(buf1, buf2);
-    }
-
-    #[test]
-    fn test_from_raw_parts() {
-        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
-        assert_eq!(5, buf.len());
-        assert!(!buf.as_ptr().is_null());
-        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
-    }
-
-    #[test]
-    fn test_from_vec() {
-        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
-        assert_eq!(5, buf.len());
-        assert!(!buf.as_ptr().is_null());
-        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
-    }
-
-    #[test]
-    fn test_copy() {
-        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
-        let buf2 = buf;
-        assert_eq!(5, buf2.len());
-        assert_eq!(64, buf2.capacity());
-        assert!(!buf2.as_ptr().is_null());
-        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
-    }
-
-    #[test]
-    fn test_slice() {
-        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
-        let buf2 = buf.slice(2);
-
-        assert_eq!([6, 8, 10], buf2.as_slice());
-        assert_eq!(3, buf2.len());
-        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
-
-        let buf3 = buf2.slice(1);
-        assert_eq!([8, 10], buf3.as_slice());
-        assert_eq!(2, buf3.len());
-        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
-
-        let buf4 = buf.slice(5);
-        let empty_slice: [u8; 0] = [];
-        assert_eq!(empty_slice, buf4.as_slice());
-        assert_eq!(0, buf4.len());
-        assert!(buf4.is_empty());
-        assert_eq!(buf2.slice(2).as_slice(), &[10]);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
-    fn test_slice_offset_out_of_bound() {
-        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
-        buf.slice(6);
-    }
-
-    #[test]
-    fn test_access_concurrently() {
-        let buffer = Buffer::from(vec![1, 2, 3, 4, 5]);
-        let buffer2 = buffer.clone();
-        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
-
-        let buffer_copy = thread::spawn(move || {
-            // access buffer in another thread.
-            buffer
-        })
-        .join();
-
-        assert!(buffer_copy.is_ok());
-        assert_eq!(buffer2, buffer_copy.ok().unwrap());
-    }
-
-    macro_rules! check_as_typed_data {
-        ($input: expr, $native_t: ty) => {{
-            let buffer = Buffer::from_slice_ref($input);
-            let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() };
-            assert_eq!($input, slice);
-        }};
-    }
-
-    #[test]
-    #[allow(clippy::float_cmp)]
-    fn test_as_typed_data() {
-        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
-        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
-        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
-        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
-        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
-        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
-        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
-        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
-        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
-        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
-    }
-
-    #[test]
-    fn test_count_bits() {
-        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits());
-        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits());
-        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits());
-        assert_eq!(6, Buffer::from(&[0b01001001, 0b01010010]).count_set_bits());
-        assert_eq!(16, Buffer::from(&[0b11111111, 0b11111111]).count_set_bits());
-    }
-
-    #[test]
-    fn test_count_bits_slice() {
-        assert_eq!(
-            0,
-            Buffer::from(&[0b11111111, 0b00000000])
-                .slice(1)
-                .count_set_bits()
-        );
-        assert_eq!(
-            8,
-            Buffer::from(&[0b11111111, 0b11111111])
-                .slice(1)
-                .count_set_bits()
-        );
-        assert_eq!(
-            3,
-            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
-                .slice(2)
-                .count_set_bits()
-        );
-        assert_eq!(
-            6,
-            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
-                .slice(1)
-                .count_set_bits()
-        );
-        assert_eq!(
-            16,
-            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
-                .slice(2)
-                .count_set_bits()
-        );
-    }
-
-    #[test]
-    fn test_count_bits_offset_slice() {
-        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
-        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
-        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
-        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
-        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
-        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
-        assert_eq!(
-            16,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
-        );
-        assert_eq!(
-            10,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
-        );
-        assert_eq!(
-            10,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
-        );
-        assert_eq!(
-            8,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
-        );
-        assert_eq!(
-            5,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
-        );
-        assert_eq!(
-            0,
-            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
-        );
-        assert_eq!(
-            2,
-            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
-        );
-        assert_eq!(
-            4,
-            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
-        );
-    }
-}
diff --git a/rust/arrow/src/buffer/mod.rs b/rust/arrow/src/buffer/mod.rs
deleted file mode 100644
index cc5c63b1c37..00000000000
--- a/rust/arrow/src/buffer/mod.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents
-//! a contiguous memory region that can be shared via `offsets`.
-
-mod immutable;
-pub use immutable::*;
-mod mutable;
-pub use mutable::*;
-mod ops;
-pub(super) use ops::*;
-
-use crate::error::{ArrowError, Result};
-use std::ops::{BitAnd, BitOr, Not};
-
-impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
-    type Output = Result<Buffer>;
-
-    fn bitand(self, rhs: &'b Buffer) -> Result<Buffer> {
-        if self.len() != rhs.len() {
-            return Err(ArrowError::ComputeError(
-                "Buffers must be the same size to apply Bitwise AND.".to_string(),
-            ));
-        }
-
-        let len_in_bits = self.len() * 8;
-        Ok(buffer_bin_and(&self, 0, &rhs, 0, len_in_bits))
-    }
-}
-
-impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
-    type Output = Result<Buffer>;
-
-    fn bitor(self, rhs: &'b Buffer) -> Result<Buffer> {
-        if self.len() != rhs.len() {
-            return Err(ArrowError::ComputeError(
-                "Buffers must be the same size to apply Bitwise OR.".to_string(),
-            ));
-        }
-
-        let len_in_bits = self.len() * 8;
-
-        Ok(buffer_bin_or(&self, 0, &rhs, 0, len_in_bits))
-    }
-}
-
-impl Not for &Buffer {
-    type Output = Buffer;
-
-    fn not(self) -> Buffer {
-        let len_in_bits = self.len() * 8;
-        buffer_unary_not(&self, 0, len_in_bits)
-    }
-}
diff --git a/rust/arrow/src/buffer/mutable.rs b/rust/arrow/src/buffer/mutable.rs
deleted file mode 100644
index d7fd5b9d200..00000000000
--- a/rust/arrow/src/buffer/mutable.rs
+++ /dev/null
@@ -1,749 +0,0 @@
-use std::ptr::NonNull;
-
-use crate::{
-    alloc,
-    bytes::{Bytes, Deallocation},
-    datatypes::{ArrowNativeType, ToByteSlice},
-    util::bit_util,
-};
-
-use super::Buffer;
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items.
-/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to have its pointer aligned
-/// along cache lines and in multiple of 64 bytes.
-/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
-/// to insert many items, and `into` to convert it to [`Buffer`].
-/// # Example
-/// ```
-/// # use arrow::buffer::{Buffer, MutableBuffer};
-/// let mut buffer = MutableBuffer::new(0);
-/// buffer.push(256u32);
-/// buffer.extend_from_slice(&[1u32]);
-/// let buffer: Buffer = buffer.into();
-/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
-/// ```
-#[derive(Debug)]
-pub struct MutableBuffer {
-    // dangling iff capacity = 0
-    data: NonNull<u8>,
-    // invariant: len <= capacity
-    len: usize,
-    capacity: usize,
-}
-
-impl MutableBuffer {
-    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        Self::with_capacity(capacity)
-    }
-
-    /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`.
-    #[inline]
-    pub fn with_capacity(capacity: usize) -> Self {
-        let capacity = bit_util::round_upto_multiple_of_64(capacity);
-        let ptr = alloc::allocate_aligned(capacity);
-        Self {
-            data: ptr,
-            len: 0,
-            capacity,
-        }
-    }
-
-    /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where
-    /// all bytes are guaranteed to be `0u8`.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::{Buffer, MutableBuffer};
-    /// let mut buffer = MutableBuffer::from_len_zeroed(127);
-    /// assert_eq!(buffer.len(), 127);
-    /// assert!(buffer.capacity() >= 127);
-    /// let data = buffer.as_slice_mut();
-    /// assert_eq!(data[126], 0u8);
-    /// ```
-    pub fn from_len_zeroed(len: usize) -> Self {
-        let new_capacity = bit_util::round_upto_multiple_of_64(len);
-        let ptr = alloc::allocate_aligned_zeroed(new_capacity);
-        Self {
-            data: ptr,
-            len,
-            capacity: new_capacity,
-        }
-    }
-
-    /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
-    /// This is useful to create a buffer for packed bitmaps.
-    pub fn new_null(len: usize) -> Self {
-        let num_bytes = bit_util::ceil(len, 8);
-        MutableBuffer::from_len_zeroed(num_bytes)
-    }
-
-    /// Set the bits in the range of `[0, end)` to 0 (if `val` is false), or 1 (if `val`
-    /// is true). Also extend the length of this buffer to be `end`.
-    ///
-    /// This is useful when one wants to clear (or set) the bits and then manipulate
-    /// the buffer directly (e.g., modifying the buffer by holding a mutable reference
-    /// from `data_mut()`).
-    pub fn with_bitset(mut self, end: usize, val: bool) -> Self {
-        assert!(end <= self.capacity);
-        let v = if val { 255 } else { 0 };
-        unsafe {
-            std::ptr::write_bytes(self.data.as_ptr(), v, end);
-            self.len = end;
-        }
-        self
-    }
-
-    /// Ensure that `count` bytes from `start` contain zero bits
-    ///
-    /// This is used to initialize the bits in a buffer, however, it has no impact on the
-    /// `len` of the buffer and so can be used to initialize the memory region from
-    /// `len` to `capacity`.
-    pub fn set_null_bits(&mut self, start: usize, count: usize) {
-        assert!(start + count <= self.capacity);
-        unsafe {
-            std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count);
-        }
-    }
-
-    /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff
-    /// `self.len + additional > capacity`.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::{Buffer, MutableBuffer};
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.reserve(253); // allocates for the first time
-    /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
-    /// let buffer: Buffer = buffer.into();
-    /// assert_eq!(buffer.len(), 253);
-    /// ```
-    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
-    // exits.
-    #[inline(always)]
-    pub fn reserve(&mut self, additional: usize) {
-        let required_cap = self.len + additional;
-        if required_cap > self.capacity {
-            // JUSTIFICATION
-            //  Benefit
-            //      necessity
-            //  Soundness
-            //      `self.data` is valid for `self.capacity`.
-            let (ptr, new_capacity) =
-                unsafe { reallocate(self.data, self.capacity, required_cap) };
-            self.data = ptr;
-            self.capacity = new_capacity;
-        }
-    }
-
-    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
-    /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::{Buffer, MutableBuffer};
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.resize(253, 2); // allocates for the first time
-    /// assert_eq!(buffer.as_slice()[252], 2u8);
-    /// ```
-    // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just
-    // exits.
-    #[inline(always)]
-    pub fn resize(&mut self, new_len: usize, value: u8) {
-        if new_len > self.len {
-            let diff = new_len - self.len;
-            self.reserve(diff);
-            // write the value
-            unsafe { self.data.as_ptr().add(self.len).write_bytes(value, diff) };
-        }
-        // this truncates the buffer when new_len < self.len
-        self.len = new_len;
-    }
-
-    /// Returns whether this buffer is empty or not.
-    #[inline]
-    pub const fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the length (the number of bytes written) in this buffer.
-    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
-    #[inline]
-    pub const fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns the total capacity in this buffer.
-    /// The invariant `buffer.len() <= buffer.capacity()` is always upheld.
-    #[inline]
-    pub const fn capacity(&self) -> usize {
-        self.capacity
-    }
-
-    /// Clear all existing data from this buffer.
-    pub fn clear(&mut self) {
-        self.len = 0
-    }
-
-    /// Returns the data stored in this buffer as a slice.
-    pub fn as_slice(&self) -> &[u8] {
-        self
-    }
-
-    /// Returns the data stored in this buffer as a mutable slice.
-    pub fn as_slice_mut(&mut self) -> &mut [u8] {
-        self
-    }
-
-    /// Returns a raw pointer to this buffer's internal memory
-    /// This pointer is guaranteed to be aligned along cache-lines.
-    #[inline]
-    pub const fn as_ptr(&self) -> *const u8 {
-        self.data.as_ptr()
-    }
-
-    /// Returns a mutable raw pointer to this buffer's internal memory
-    /// This pointer is guaranteed to be aligned along cache-lines.
-    #[inline]
-    pub fn as_mut_ptr(&mut self) -> *mut u8 {
-        self.data.as_ptr()
-    }
-
-    #[deprecated(
-        since = "2.0.0",
-        note = "This method is deprecated in favour of `into` from the trait `Into`."
-    )]
-    /// Freezes this buffer and return an immutable version of it.
-    pub fn freeze(self) -> Buffer {
-        self.into_buffer()
-    }
-
-    #[inline]
-    pub(super) fn into_buffer(self) -> Buffer {
-        let bytes = unsafe {
-            Bytes::new(self.data, self.len, Deallocation::Native(self.capacity))
-        };
-        std::mem::forget(self);
-        Buffer::from_bytes(bytes)
-    }
-
-    /// View this buffer asa slice of a specific type.
-    /// # Safety
-    /// This function must only be used when this buffer was extended with items of type `T`.
-    /// Failure to do so results in undefined behavior.
-    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
-        unsafe {
-            let (prefix, offsets, suffix) = self.as_slice_mut().align_to_mut::<T>();
-            assert!(prefix.is_empty() && suffix.is_empty());
-            offsets
-        }
-    }
-
-    /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.extend_from_slice(&[2u32, 0]);
-    /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
-    /// ```
-    #[inline]
-    pub fn extend_from_slice<T: ToByteSlice>(&mut self, items: &[T]) {
-        let len = items.len();
-        let additional = len * std::mem::size_of::<T>();
-        self.reserve(additional);
-        unsafe {
-            let dst = self.data.as_ptr().add(self.len);
-            let src = items.as_ptr() as *const u8;
-            std::ptr::copy_nonoverlapping(src, dst, additional)
-        }
-        self.len += additional;
-    }
-
-    /// Extends the buffer with a new item, increasing its capacity if needed.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let mut buffer = MutableBuffer::new(0);
-    /// buffer.push(256u32);
-    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
-    /// ```
-    #[inline]
-    pub fn push<T: ToByteSlice>(&mut self, item: T) {
-        let additional = std::mem::size_of::<T>();
-        self.reserve(additional);
-        unsafe {
-            let dst = self.data.as_ptr().add(self.len) as *mut T;
-            std::ptr::write(dst, item);
-        }
-        self.len += additional;
-    }
-
-    /// Extends the buffer with a new item, without checking for sufficient capacity
-    /// Safety
-    /// Caller must ensure that the capacity()-len()>=size_of<T>()
-    #[inline]
-    unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
-        let additional = std::mem::size_of::<T>();
-        let dst = self.data.as_ptr().add(self.len) as *mut T;
-        std::ptr::write(dst, item);
-        self.len += additional;
-    }
-
-    /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed.
-    #[inline]
-    pub fn extend_zeros(&mut self, additional: usize) {
-        self.resize(self.len + additional, 0);
-    }
-
-    /// # Safety
-    /// The caller must ensure that the buffer was properly initialized up to `len`.
-    #[inline]
-    pub(crate) unsafe fn set_len(&mut self, len: usize) {
-        assert!(len <= self.capacity());
-        self.len = len;
-    }
-}
-
-/// # Safety
-/// `ptr` must be allocated for `old_capacity`.
-#[inline]
-unsafe fn reallocate(
-    ptr: NonNull<u8>,
-    old_capacity: usize,
-    new_capacity: usize,
-) -> (NonNull<u8>, usize) {
-    let new_capacity = bit_util::round_upto_multiple_of_64(new_capacity);
-    let new_capacity = std::cmp::max(new_capacity, old_capacity * 2);
-    let ptr = alloc::reallocate(ptr, old_capacity, new_capacity);
-    (ptr, new_capacity)
-}
-
-impl<A: ArrowNativeType> Extend<A> for MutableBuffer {
-    #[inline]
-    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
-        let iterator = iter.into_iter();
-        self.extend_from_iter(iterator)
-    }
-}
-
-impl MutableBuffer {
-    #[inline]
-    pub(super) fn extend_from_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
-        &mut self,
-        mut iterator: I,
-    ) {
-        let size = std::mem::size_of::<T>();
-        let (lower, _) = iterator.size_hint();
-        let additional = lower * size;
-        self.reserve(additional);
-
-        // this is necessary because of https://github.com/rust-lang/rust/issues/32155
-        let mut len = SetLenOnDrop::new(&mut self.len);
-        let mut dst = unsafe { self.data.as_ptr().add(len.local_len) as *mut T };
-        let capacity = self.capacity;
-
-        while len.local_len + size <= capacity {
-            if let Some(item) = iterator.next() {
-                unsafe {
-                    std::ptr::write(dst, item);
-                    dst = dst.add(1);
-                }
-                len.local_len += size;
-            } else {
-                break;
-            }
-        }
-        drop(len);
-
-        iterator.for_each(|item| self.push(item));
-    }
-
-    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
-    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let v = vec![1u32];
-    /// let iter = v.iter().map(|x| x * 2);
-    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
-    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
-    /// ```
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    // This implementation is required for two reasons:
-    // 1. there is no trait `TrustedLen` in stable rust and therefore
-    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
-    // 2. `from_trusted_len_iter` is faster.
-    #[inline]
-    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
-        iterator: I,
-    ) -> Self {
-        let (_, upper) = iterator.size_hint();
-        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
-        let len = upper * std::mem::size_of::<T>();
-
-        let mut buffer = MutableBuffer::new(len);
-
-        let mut dst = buffer.data.as_ptr() as *mut T;
-        for item in iterator {
-            // note how there is no reserve here (compared with `extend_from_iter`)
-            std::ptr::write(dst, item);
-            dst = dst.add(1);
-        }
-        assert_eq!(
-            dst.offset_from(buffer.data.as_ptr() as *mut T) as usize,
-            upper,
-            "Trusted iterator length was not accurately reported"
-        );
-        buffer.len = len;
-        buffer
-    }
-
-    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
-    /// # use arrow::buffer::MutableBuffer;
-    /// # Example
-    /// ```
-    /// # use arrow::buffer::MutableBuffer;
-    /// let v = vec![false, true, false];
-    /// let iter = v.iter().map(|x| *x || true);
-    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
-    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
-    /// ```
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    // This implementation is required for two reasons:
-    // 1. there is no trait `TrustedLen` in stable rust and therefore
-    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
-    // 2. `from_trusted_len_iter_bool` is faster.
-    #[inline]
-    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
-        mut iterator: I,
-    ) -> Self {
-        let (_, upper) = iterator.size_hint();
-        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
-
-        let mut result = {
-            let byte_capacity: usize = upper.saturating_add(7) / 8;
-            MutableBuffer::new(byte_capacity)
-        };
-
-        'a: loop {
-            let mut byte_accum: u8 = 0;
-            let mut mask: u8 = 1;
-
-            //collect (up to) 8 bits into a byte
-            while mask != 0 {
-                if let Some(value) = iterator.next() {
-                    byte_accum |= match value {
-                        true => mask,
-                        false => 0,
-                    };
-                    mask <<= 1;
-                } else {
-                    if mask != 1 {
-                        // Add last byte
-                        result.push_unchecked(byte_accum);
-                    }
-                    break 'a;
-                }
-            }
-
-            // Soundness: from_trusted_len
-            result.push_unchecked(byte_accum);
-        }
-        result
-    }
-
-    /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
-    /// if any of the items of the iterator is an error.
-    /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
-    /// # Safety
-    /// This method assumes that the iterator's size is correct and is undefined behavior
-    /// to use it on an iterator that reports an incorrect length.
-    #[inline]
-    pub unsafe fn try_from_trusted_len_iter<
-        E,
-        T: ArrowNativeType,
-        I: Iterator<Item = std::result::Result<T, E>>,
-    >(
-        iterator: I,
-    ) -> std::result::Result<Self, E> {
-        let (_, upper) = iterator.size_hint();
-        let upper = upper.expect("try_from_trusted_len_iter requires an upper limit");
-        let len = upper * std::mem::size_of::<T>();
-
-        let mut buffer = MutableBuffer::new(len);
-
-        let mut dst = buffer.data.as_ptr() as *mut T;
-        for item in iterator {
-            // note how there is no reserve here (compared with `extend_from_iter`)
-            std::ptr::write(dst, item?);
-            dst = dst.add(1);
-        }
-        assert_eq!(
-            dst.offset_from(buffer.data.as_ptr() as *mut T) as usize,
-            upper,
-            "Trusted iterator length was not accurately reported"
-        );
-        buffer.len = len;
-        Ok(buffer)
-    }
-}
-
-impl std::ops::Deref for MutableBuffer {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) }
-    }
-}
-
-impl std::ops::DerefMut for MutableBuffer {
-    fn deref_mut(&mut self) -> &mut [u8] {
-        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
-    }
-}
-
-impl Drop for MutableBuffer {
-    fn drop(&mut self) {
-        unsafe { alloc::free_aligned(self.data, self.capacity) };
-    }
-}
-
-impl PartialEq for MutableBuffer {
-    fn eq(&self, other: &MutableBuffer) -> bool {
-        if self.len != other.len {
-            return false;
-        }
-        if self.capacity != other.capacity {
-            return false;
-        }
-        self.as_slice() == other.as_slice()
-    }
-}
-
-unsafe impl Sync for MutableBuffer {}
-unsafe impl Send for MutableBuffer {}
-
-struct SetLenOnDrop<'a> {
-    len: &'a mut usize,
-    local_len: usize,
-}
-
-impl<'a> SetLenOnDrop<'a> {
-    #[inline]
-    fn new(len: &'a mut usize) -> Self {
-        SetLenOnDrop {
-            local_len: *len,
-            len,
-        }
-    }
-}
-
-impl Drop for SetLenOnDrop<'_> {
-    #[inline]
-    fn drop(&mut self) {
-        *self.len = self.local_len;
-    }
-}
-
-/// Creating a `MutableBuffer` instance by setting bits according to the boolean values
-impl std::iter::FromIterator<bool> for MutableBuffer {
-    fn from_iter<I>(iter: I) -> Self
-    where
-        I: IntoIterator<Item = bool>,
-    {
-        let mut iterator = iter.into_iter();
-        let mut result = {
-            let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
-            MutableBuffer::new(byte_capacity)
-        };
-
-        loop {
-            let mut exhausted = false;
-            let mut byte_accum: u8 = 0;
-            let mut mask: u8 = 1;
-
-            //collect (up to) 8 bits into a byte
-            while mask != 0 {
-                if let Some(value) = iterator.next() {
-                    byte_accum |= match value {
-                        true => mask,
-                        false => 0,
-                    };
-                    mask <<= 1;
-                } else {
-                    exhausted = true;
-                    break;
-                }
-            }
-
-            // break if the iterator was exhausted before it provided a bool for this byte
-            if exhausted && mask == 1 {
-                break;
-            }
-
-            //ensure we have capacity to write the byte
-            if result.len() == result.capacity() {
-                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
-                let additional_byte_capacity = 1usize.saturating_add(
-                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
-                );
-                result.reserve(additional_byte_capacity)
-            }
-
-            // Soundness: capacity was allocated above
-            unsafe { result.push_unchecked(byte_accum) };
-            if exhausted {
-                break;
-            }
-        }
-        result
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_mutable_new() {
-        let buf = MutableBuffer::new(63);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(0, buf.len());
-        assert!(buf.is_empty());
-    }
-
-    #[test]
-    fn test_mutable_extend_from_slice() {
-        let mut buf = MutableBuffer::new(100);
-        buf.extend_from_slice(b"hello");
-        assert_eq!(5, buf.len());
-        assert_eq!(b"hello", buf.as_slice());
-
-        buf.extend_from_slice(b" world");
-        assert_eq!(11, buf.len());
-        assert_eq!(b"hello world", buf.as_slice());
-
-        buf.clear();
-        assert_eq!(0, buf.len());
-        buf.extend_from_slice(b"hello arrow");
-        assert_eq!(11, buf.len());
-        assert_eq!(b"hello arrow", buf.as_slice());
-    }
-
-    #[test]
-    fn mutable_extend_from_iter() {
-        let mut buf = MutableBuffer::new(0);
-        buf.extend(vec![1u32, 2]);
-        assert_eq!(8, buf.len());
-        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
-
-        buf.extend(vec![3u32, 4]);
-        assert_eq!(16, buf.len());
-        assert_eq!(
-            &[1u8, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
-            buf.as_slice()
-        );
-    }
-
-    #[test]
-    fn test_from_trusted_len_iter() {
-        let iter = vec![1u32, 2].into_iter();
-        let buf = unsafe { Buffer::from_trusted_len_iter(iter) };
-        assert_eq!(8, buf.len());
-        assert_eq!(&[1u8, 0, 0, 0, 2, 0, 0, 0], buf.as_slice());
-    }
-
-    #[test]
-    fn test_mutable_reserve() {
-        let mut buf = MutableBuffer::new(1);
-        assert_eq!(64, buf.capacity());
-
-        // Reserving a smaller capacity should have no effect.
-        buf.reserve(10);
-        assert_eq!(64, buf.capacity());
-
-        buf.reserve(80);
-        assert_eq!(128, buf.capacity());
-
-        buf.reserve(129);
-        assert_eq!(256, buf.capacity());
-    }
-
-    #[test]
-    fn test_mutable_resize() {
-        let mut buf = MutableBuffer::new(1);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(0, buf.len());
-
-        buf.resize(20, 0);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(20, buf.len());
-
-        buf.resize(10, 0);
-        assert_eq!(64, buf.capacity());
-        assert_eq!(10, buf.len());
-
-        buf.resize(100, 0);
-        assert_eq!(128, buf.capacity());
-        assert_eq!(100, buf.len());
-
-        buf.resize(30, 0);
-        assert_eq!(128, buf.capacity());
-        assert_eq!(30, buf.len());
-
-        buf.resize(0, 0);
-        assert_eq!(128, buf.capacity());
-        assert_eq!(0, buf.len());
-    }
-
-    #[test]
-    fn test_mutable_into() {
-        let mut buf = MutableBuffer::new(1);
-        buf.extend_from_slice(b"aaaa bbbb cccc dddd");
-        assert_eq!(19, buf.len());
-        assert_eq!(64, buf.capacity());
-        assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice());
-
-        let immutable_buf: Buffer = buf.into();
-        assert_eq!(19, immutable_buf.len());
-        assert_eq!(64, immutable_buf.capacity());
-        assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice());
-    }
-
-    #[test]
-    fn test_mutable_equal() {
-        let mut buf = MutableBuffer::new(1);
-        let mut buf2 = MutableBuffer::new(1);
-
-        buf.extend_from_slice(&[0xaa]);
-        buf2.extend_from_slice(&[0xaa, 0xbb]);
-        assert!(buf != buf2);
-
-        buf.extend_from_slice(&[0xbb]);
-        assert_eq!(buf, buf2);
-
-        buf2.reserve(65);
-        assert!(buf != buf2);
-    }
-}
diff --git a/rust/arrow/src/buffer/ops.rs b/rust/arrow/src/buffer/ops.rs
deleted file mode 100644
index fbcb9510944..00000000000
--- a/rust/arrow/src/buffer/ops.rs
+++ /dev/null
@@ -1,429 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[cfg(feature = "simd")]
-use crate::util::bit_util;
-#[cfg(feature = "simd")]
-use packed_simd::u8x64;
-
-#[cfg(feature = "avx512")]
-use crate::arch::avx512::*;
-use crate::util::bit_util::ceil;
-#[cfg(any(feature = "simd", feature = "avx512"))]
-use std::borrow::BorrowMut;
-
-use super::{Buffer, MutableBuffer};
-
-/// Apply a bitwise operation `simd_op` / `scalar_op` to two inputs using simd instructions and return the result as a Buffer.
-/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
-/// and the `scalar_op` gets applied to remaining bytes.
-/// Contrary to the non-simd version `bitwise_bin_op_helper`, the offset and length is specified in bytes
-/// and this version does not support operations starting at arbitrary bit offsets.
-#[cfg(simd)]
-pub fn bitwise_bin_op_simd_helper<F_SIMD, F_SCALAR>(
-    left: &Buffer,
-    left_offset: usize,
-    right: &Buffer,
-    right_offset: usize,
-    len: usize,
-    simd_op: F_SIMD,
-    scalar_op: F_SCALAR,
-) -> Buffer
-where
-    F_SIMD: Fn(u8x64, u8x64) -> u8x64,
-    F_SCALAR: Fn(u8, u8) -> u8,
-{
-    let mut result = MutableBuffer::new(len).with_bitset(len, false);
-    let lanes = u8x64::lanes();
-
-    let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
-    let mut right_chunks = right.as_slice()[right_offset..].chunks_exact(lanes);
-    let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-        .for_each(|(res, (left, right))| {
-            unsafe { bit_util::bitwise_bin_op_simd(&left, &right, res, &simd_op) };
-        });
-
-    result_chunks
-        .into_remainder()
-        .iter_mut()
-        .zip(
-            left_chunks
-                .remainder()
-                .iter()
-                .zip(right_chunks.remainder().iter()),
-        )
-        .for_each(|(res, (left, right))| {
-            *res = scalar_op(*left, *right);
-        });
-
-    result.into()
-}
-
-/// Apply a bitwise operation `simd_op` / `scalar_op` to one input using simd instructions and return the result as a Buffer.
-/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
-/// and the `scalar_op` gets applied to remaining bytes.
-/// Contrary to the non-simd version `bitwise_unary_op_helper`, the offset and length is specified in bytes
-/// and this version does not support operations starting at arbitrary bit offsets.
-#[cfg(simd)]
-pub fn bitwise_unary_op_simd_helper<F_SIMD, F_SCALAR>(
-    left: &Buffer,
-    left_offset: usize,
-    len: usize,
-    simd_op: F_SIMD,
-    scalar_op: F_SCALAR,
-) -> Buffer
-where
-    F_SIMD: Fn(u8x64) -> u8x64,
-    F_SCALAR: Fn(u8) -> u8,
-{
-    let mut result = MutableBuffer::new(len).with_bitset(len, false);
-    let lanes = u8x64::lanes();
-
-    let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
-    let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut())
-        .for_each(|(res, left)| unsafe {
-            let data_simd = u8x64::from_slice_unaligned_unchecked(left);
-            let simd_result = simd_op(data_simd);
-            simd_result.write_to_slice_unaligned_unchecked(res);
-        });
-
-    result_chunks
-        .into_remainder()
-        .iter_mut()
-        .zip(left_chunks.remainder().iter())
-        .for_each(|(res, left)| {
-            *res = scalar_op(*left);
-        });
-
-    result.into()
-}
-
-/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
-/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
-pub fn bitwise_bin_op_helper<F>(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-    op: F,
-) -> Buffer
-where
-    F: Fn(u64, u64) -> u64,
-{
-    let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
-    let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
-
-    let chunks = left_chunks
-        .iter()
-        .zip(right_chunks.iter())
-        .map(|(left, right)| op(left, right));
-    // Soundness: `BitChunks` is a trusted len iterator
-    let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
-
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
-    buffer.extend_from_slice(rem);
-
-    buffer.into()
-}
-
-/// Apply a bitwise operation `op` to one input and return the result as a Buffer.
-/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
-pub fn bitwise_unary_op_helper<F>(
-    left: &Buffer,
-    offset_in_bits: usize,
-    len_in_bits: usize,
-    op: F,
-) -> Buffer
-where
-    F: Fn(u64) -> u64,
-{
-    // reserve capacity and set length so we can get a typed view of u64 chunks
-    let mut result =
-        MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
-
-    let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
-    let result_chunks = result.typed_data_mut::<u64>().iter_mut();
-
-    result_chunks
-        .zip(left_chunks.iter())
-        .for_each(|(res, left)| {
-            *res = op(left);
-        });
-
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
-    result.extend_from_slice(rem);
-
-    result.into()
-}
-
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        let len = len_in_bits / 8;
-        let left_offset = left_offset_in_bits / 8;
-        let right_offset = right_offset_in_bits / 8;
-
-        let mut result = MutableBuffer::new(len).with_bitset(len, false);
-
-        let mut left_chunks =
-            left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut right_chunks =
-            right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut result_chunks =
-            result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
-
-        result_chunks
-            .borrow_mut()
-            .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-            .for_each(|(res, (left, right))| unsafe {
-                avx512_bin_and(left, right, res);
-            });
-
-        result_chunks
-            .into_remainder()
-            .iter_mut()
-            .zip(
-                left_chunks
-                    .remainder()
-                    .iter()
-                    .zip(right_chunks.remainder().iter()),
-            )
-            .for_each(|(res, (left, right))| {
-                *res = *left & *right;
-            });
-
-        result.into()
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a & b,
-        )
-    }
-}
-
-#[cfg(all(feature = "simd", not(feature = "avx512")))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        bitwise_bin_op_simd_helper(
-            &left,
-            left_offset_in_bits / 8,
-            &right,
-            right_offset_in_bits / 8,
-            len_in_bits / 8,
-            |a, b| a & b,
-            |a, b| a & b,
-        )
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a & b,
-        )
-    }
-}
-
-// Note: do not target specific features like x86 without considering
-// other targets like wasm32, as those would fail to build
-#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    bitwise_bin_op_helper(
-        &left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a & b,
-    )
-}
-
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        let len = len_in_bits / 8;
-        let left_offset = left_offset_in_bits / 8;
-        let right_offset = right_offset_in_bits / 8;
-
-        let mut result = MutableBuffer::new(len).with_bitset(len, false);
-
-        let mut left_chunks =
-            left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut right_chunks =
-            right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut result_chunks =
-            result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
-
-        result_chunks
-            .borrow_mut()
-            .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-            .for_each(|(res, (left, right))| unsafe {
-                avx512_bin_or(left, right, res);
-            });
-
-        result_chunks
-            .into_remainder()
-            .iter_mut()
-            .zip(
-                left_chunks
-                    .remainder()
-                    .iter()
-                    .zip(right_chunks.remainder().iter()),
-            )
-            .for_each(|(res, (left, right))| {
-                *res = *left | *right;
-            });
-
-        result.into()
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a | b,
-        )
-    }
-}
-
-#[cfg(all(feature = "simd", not(feature = "avx512")))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        bitwise_bin_op_simd_helper(
-            &left,
-            left_offset_in_bits / 8,
-            &right,
-            right_offset_in_bits / 8,
-            len_in_bits / 8,
-            |a, b| a | b,
-            |a, b| a | b,
-        )
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a | b,
-        )
-    }
-}
-
-#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    bitwise_bin_op_helper(
-        &left,
-        left_offset_in_bits,
-        right,
-        right_offset_in_bits,
-        len_in_bits,
-        |a, b| a | b,
-    )
-}
-
-pub fn buffer_unary_not(
-    left: &Buffer,
-    offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    // SIMD implementation if available and byte-aligned
-    #[cfg(simd)]
-    if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 {
-        return bitwise_unary_op_simd_helper(
-            &left,
-            offset_in_bits / 8,
-            len_in_bits / 8,
-            |a| !a,
-            |a| !a,
-        );
-    }
-    // Default implementation
-    #[allow(unreachable_code)]
-    {
-        bitwise_unary_op_helper(&left, offset_in_bits, len_in_bits, |a| !a)
-    }
-}
diff --git a/rust/arrow/src/bytes.rs b/rust/arrow/src/bytes.rs
deleted file mode 100644
index 38fa4439b42..00000000000
--- a/rust/arrow/src/bytes.rs
+++ /dev/null
@@ -1,159 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains an implementation of a contiguous immutable memory region that knows
-//! how to de-allocate itself, [`Bytes`].
-//! Note that this is a low-level functionality of this crate.
-
-use core::slice;
-use std::ptr::NonNull;
-use std::sync::Arc;
-use std::{fmt::Debug, fmt::Formatter};
-
-use crate::{alloc, ffi};
-
-/// Mode of deallocating memory regions
-pub enum Deallocation {
-    /// Native deallocation, using Rust deallocator with Arrow-specific memory aligment
-    Native(usize),
-    /// Foreign interface, via a callback
-    Foreign(Arc<ffi::FFI_ArrowArray>),
-}
-
-impl Debug for Deallocation {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        match self {
-            Deallocation::Native(capacity) => {
-                write!(f, "Deallocation::Native {{ capacity: {} }}", capacity)
-            }
-            Deallocation::Foreign(_) => {
-                write!(f, "Deallocation::Foreign {{ capacity: unknown }}")
-            }
-        }
-    }
-}
-
-/// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
-/// This structs' API is inspired by the `bytes::Bytes`, but it is not limited to using rust's
-/// global allocator nor u8 aligmnent.
-///
-/// In the most common case, this buffer is allocated using [`allocate_aligned`](memory::allocate_aligned)
-/// and deallocated accordingly [`free_aligned`](memory::free_aligned).
-/// When the region is allocated by an foreign allocator, [Deallocation::Foreign], this calls the
-/// foreign deallocator to deallocate the region when it is no longer needed.
-pub struct Bytes {
-    /// The raw pointer to be begining of the region
-    ptr: NonNull<u8>,
-
-    /// The number of bytes visible to this region. This is always smaller than its capacity (when avaliable).
-    len: usize,
-
-    /// how to deallocate this region
-    deallocation: Deallocation,
-}
-
-impl Bytes {
-    /// Takes ownership of an allocated memory region,
-    ///
-    /// # Arguments
-    ///
-    /// * `ptr` - Pointer to raw parts
-    /// * `len` - Length of raw parts in **bytes**
-    /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes**
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
-    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
-    #[inline]
-    pub unsafe fn new(
-        ptr: std::ptr::NonNull<u8>,
-        len: usize,
-        deallocation: Deallocation,
-    ) -> Bytes {
-        Bytes {
-            ptr,
-            len,
-            deallocation,
-        }
-    }
-
-    fn as_slice(&self) -> &[u8] {
-        self
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    #[inline]
-    pub fn ptr(&self) -> NonNull<u8> {
-        self.ptr
-    }
-
-    pub fn capacity(&self) -> usize {
-        match self.deallocation {
-            Deallocation::Native(capacity) => capacity,
-            // we cannot determine this in general,
-            // and thus we state that this is externally-owned memory
-            Deallocation::Foreign(_) => 0,
-        }
-    }
-}
-
-impl Drop for Bytes {
-    #[inline]
-    fn drop(&mut self) {
-        match &self.deallocation {
-            Deallocation::Native(capacity) => {
-                unsafe { alloc::free_aligned::<u8>(self.ptr, *capacity) };
-            }
-            // foreign interface knows how to deallocate itself.
-            Deallocation::Foreign(_) => (),
-        }
-    }
-}
-
-impl std::ops::Deref for Bytes {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
-    }
-}
-
-impl PartialEq for Bytes {
-    fn eq(&self, other: &Bytes) -> bool {
-        self.as_slice() == other.as_slice()
-    }
-}
-
-impl Debug for Bytes {
-    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        write!(f, "Bytes {{ ptr: {:?}, len: {}, data: ", self.ptr, self.len,)?;
-
-        f.debug_list().entries(self.iter()).finish()?;
-
-        write!(f, " }}")
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/aggregate.rs b/rust/arrow/src/compute/kernels/aggregate.rs
deleted file mode 100644
index d0e3f22f541..00000000000
--- a/rust/arrow/src/compute/kernels/aggregate.rs
+++ /dev/null
@@ -1,975 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines aggregations over Arrow arrays.
-
-use std::ops::Add;
-
-use crate::array::{
-    Array, BooleanArray, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait,
-};
-use crate::datatypes::{ArrowNativeType, ArrowNumericType};
-
-/// Generic test for NaN, the optimizer should be able to remove this for integer types.
-#[inline]
-fn is_nan<T: ArrowNativeType + PartialOrd + Copy>(a: T) -> bool {
-    #[allow(clippy::eq_op)]
-    !(a == a)
-}
-
-/// Helper macro to perform min/max of strings
-fn min_max_string<T: StringOffsetSizeTrait, F: Fn(&str, &str) -> bool>(
-    array: &GenericStringArray<T>,
-    cmp: F,
-) -> Option<&str> {
-    let null_count = array.null_count();
-
-    if null_count == array.len() {
-        return None;
-    }
-    let data = array.data();
-    let mut n;
-    if null_count == 0 {
-        n = array.value(0);
-        for i in 1..data.len() {
-            let item = array.value(i);
-            if cmp(&n, item) {
-                n = item;
-            }
-        }
-    } else {
-        n = "";
-        let mut has_value = false;
-
-        for i in 0..data.len() {
-            let item = array.value(i);
-            if data.is_valid(i) && (!has_value || cmp(&n, item)) {
-                has_value = true;
-                n = item;
-            }
-        }
-    }
-    Some(n)
-}
-
-/// Returns the minimum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-#[cfg(not(simd))]
-pub fn min<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    T::Native: ArrowNativeType,
-{
-    min_max_helper(array, |a, b| (is_nan(*a) & !is_nan(*b)) || a > b)
-}
-
-/// Returns the maximum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-#[cfg(not(simd))]
-pub fn max<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    T::Native: ArrowNativeType,
-{
-    min_max_helper(array, |a, b| (!is_nan(*a) & is_nan(*b)) || a < b)
-}
-
-/// Returns the maximum value in the string array, according to the natural order.
-pub fn max_string<T: StringOffsetSizeTrait>(
-    array: &GenericStringArray<T>,
-) -> Option<&str> {
-    min_max_string(array, |a, b| a < b)
-}
-
-/// Returns the minimum value in the string array, according to the natural order.
-pub fn min_string<T: StringOffsetSizeTrait>(
-    array: &GenericStringArray<T>,
-) -> Option<&str> {
-    min_max_string(array, |a, b| a > b)
-}
-
-/// Helper function to perform min/max lambda function on values from a numeric array.
-fn min_max_helper<T, F>(array: &PrimitiveArray<T>, cmp: F) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    F: Fn(&T::Native, &T::Native) -> bool,
-{
-    let null_count = array.null_count();
-
-    // Includes case array.len() == 0
-    if null_count == array.len() {
-        return None;
-    }
-
-    let data = array.data();
-    let m = array.values();
-    let mut n;
-
-    if null_count == 0 {
-        // optimized path for arrays without null values
-        n = m[1..]
-            .iter()
-            .fold(m[0], |max, item| if cmp(&max, item) { *item } else { max });
-    } else {
-        n = T::default_value();
-        let mut has_value = false;
-        for (i, item) in m.iter().enumerate() {
-            if data.is_valid(i) && (!has_value || cmp(&n, item)) {
-                has_value = true;
-                n = *item
-            }
-        }
-    }
-    Some(n)
-}
-
-/// Returns the minimum value in the boolean array.
-///
-/// ```
-/// use arrow::{
-///   array::BooleanArray,
-///   compute::min_boolean,
-/// };
-///
-/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
-/// assert_eq!(min_boolean(&a), Some(false))
-/// ```
-pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
-    // short circuit if all nulls / zero length array
-    if array.null_count() == array.len() {
-        return None;
-    }
-
-    // Note the min bool is false (0), so short circuit as soon as we see it
-    array
-        .iter()
-        .find(|&b| b == Some(false))
-        .flatten()
-        .or(Some(true))
-}
-
-/// Returns the maximum value in the boolean array
-///
-/// ```
-/// use arrow::{
-///   array::BooleanArray,
-///   compute::max_boolean,
-/// };
-///
-/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
-/// assert_eq!(max_boolean(&a), Some(true))
-/// ```
-pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
-    // short circuit if all nulls / zero length array
-    if array.null_count() == array.len() {
-        return None;
-    }
-
-    // Note the max bool is true (1), so short circuit as soon as we see it
-    array
-        .iter()
-        .find(|&b| b == Some(true))
-        .flatten()
-        .or(Some(false))
-}
-
-/// Returns the sum of values in the array.
-///
-/// Returns `None` if the array is empty or only contains null values.
-#[cfg(not(simd))]
-pub fn sum<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T: ArrowNumericType,
-    T::Native: Add<Output = T::Native>,
-{
-    let null_count = array.null_count();
-
-    if null_count == array.len() {
-        return None;
-    }
-
-    let data: &[T::Native] = array.values();
-
-    match array.data().null_buffer() {
-        None => {
-            let sum = data.iter().fold(T::default_value(), |accumulator, value| {
-                accumulator + *value
-            });
-
-            Some(sum)
-        }
-        Some(buffer) => {
-            let mut sum = T::default_value();
-            let data_chunks = data.chunks_exact(64);
-            let remainder = data_chunks.remainder();
-
-            let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
-            data_chunks
-                .zip(bit_chunks.iter())
-                .for_each(|(chunk, mask)| {
-                    // index_mask has value 1 << i in the loop
-                    let mut index_mask = 1;
-                    chunk.iter().for_each(|value| {
-                        if (mask & index_mask) != 0 {
-                            sum = sum + *value;
-                        }
-                        index_mask <<= 1;
-                    });
-                });
-
-            let remainder_bits = bit_chunks.remainder_bits();
-
-            remainder.iter().enumerate().for_each(|(i, value)| {
-                if remainder_bits & (1 << i) != 0 {
-                    sum = sum + *value;
-                }
-            });
-
-            Some(sum)
-        }
-    }
-}
-
-#[cfg(simd)]
-mod simd {
-    use super::is_nan;
-    use crate::array::{Array, PrimitiveArray};
-    use crate::datatypes::ArrowNumericType;
-    use std::marker::PhantomData;
-    use std::ops::Add;
-
-    pub(super) trait SimdAggregate<T: ArrowNumericType> {
-        type ScalarAccumulator;
-        type SimdAccumulator;
-
-        /// Returns the accumulator for aggregating scalar values
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator;
-
-        /// Returns the accumulator for aggregating simd chunks of values
-        fn init_accumulator_chunk() -> Self::SimdAccumulator;
-
-        /// Updates the accumulator with the values of one chunk
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        );
-
-        /// Updates the accumulator with the values of one chunk according to the given vector mask
-        fn accumulate_chunk_nullable(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-            mask: T::SimdMask,
-        );
-
-        /// Updates the accumulator with one value
-        fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native);
-
-        /// Reduces the vector lanes of the simd accumulator and the scalar accumulator to a single value
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native>;
-    }
-
-    pub(super) struct SumAggregate<T: ArrowNumericType> {
-        phantom: PhantomData<T>,
-    }
-
-    impl<T: ArrowNumericType> SimdAggregate<T> for SumAggregate<T>
-    where
-        T::Native: Add<Output = T::Native>,
-    {
-        type ScalarAccumulator = T::Native;
-        type SimdAccumulator = T::Simd;
-
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator {
-            T::default_value()
-        }
-
-        fn init_accumulator_chunk() -> Self::SimdAccumulator {
-            T::init(Self::init_accumulator_scalar())
-        }
-
-        fn accumulate_chunk_non_null(accumulator: &mut T::Simd, chunk: T::Simd) {
-            *accumulator = *accumulator + chunk;
-        }
-
-        fn accumulate_chunk_nullable(
-            accumulator: &mut T::Simd,
-            chunk: T::Simd,
-            vecmask: T::SimdMask,
-        ) {
-            let zero = T::init(T::default_value());
-            let blended = T::mask_select(vecmask, chunk, zero);
-
-            *accumulator = *accumulator + blended;
-        }
-
-        fn accumulate_scalar(accumulator: &mut T::Native, value: T::Native) {
-            *accumulator = *accumulator + value
-        }
-
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native> {
-            // we can't use T::lanes() as the slice len because it is not const,
-            // instead always reserve the maximum number of lanes
-            let mut tmp = [T::default_value(); 64];
-            let slice = &mut tmp[0..T::lanes()];
-            T::write(simd_accumulator, slice);
-
-            let mut reduced = Self::init_accumulator_scalar();
-            slice
-                .iter()
-                .for_each(|value| Self::accumulate_scalar(&mut reduced, *value));
-
-            Self::accumulate_scalar(&mut reduced, scalar_accumulator);
-
-            // result can not be None because we checked earlier for the null count
-            Some(reduced)
-        }
-    }
-
-    pub(super) struct MinAggregate<T: ArrowNumericType> {
-        phantom: PhantomData<T>,
-    }
-
-    impl<T: ArrowNumericType> SimdAggregate<T> for MinAggregate<T>
-    where
-        T::Native: PartialOrd,
-    {
-        type ScalarAccumulator = (T::Native, bool);
-        type SimdAccumulator = (T::Simd, T::SimdMask);
-
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator {
-            (T::default_value(), false)
-        }
-
-        fn init_accumulator_chunk() -> Self::SimdAccumulator {
-            (T::init(T::default_value()), T::mask_init(false))
-        }
-
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        ) {
-            let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
-            let is_lt = acc_is_nan | T::lt(chunk, accumulator.0);
-            let first_or_lt = !accumulator.1 | is_lt;
-
-            accumulator.0 = T::mask_select(first_or_lt, chunk, accumulator.0);
-            accumulator.1 = T::mask_init(true);
-        }
-
-        fn accumulate_chunk_nullable(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-            vecmask: T::SimdMask,
-        ) {
-            let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
-            let is_lt = vecmask & (acc_is_nan | T::lt(chunk, accumulator.0));
-            let first_or_lt = !accumulator.1 | is_lt;
-
-            accumulator.0 = T::mask_select(first_or_lt, chunk, accumulator.0);
-            accumulator.1 |= vecmask;
-        }
-
-        fn accumulate_scalar(
-            accumulator: &mut Self::ScalarAccumulator,
-            value: T::Native,
-        ) {
-            if !accumulator.1 {
-                accumulator.0 = value;
-            } else {
-                let acc_is_nan = is_nan(accumulator.0);
-                if acc_is_nan || value < accumulator.0 {
-                    accumulator.0 = value
-                }
-            }
-            accumulator.1 = true
-        }
-
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native> {
-            // we can't use T::lanes() as the slice len because it is not const,
-            // instead always reserve the maximum number of lanes
-            let mut tmp = [T::default_value(); 64];
-            let slice = &mut tmp[0..T::lanes()];
-            T::write(simd_accumulator.0, slice);
-
-            let mut reduced = Self::init_accumulator_scalar();
-            slice
-                .iter()
-                .enumerate()
-                .filter(|(i, _value)| T::mask_get(&simd_accumulator.1, *i))
-                .for_each(|(_i, value)| Self::accumulate_scalar(&mut reduced, *value));
-
-            if scalar_accumulator.1 {
-                Self::accumulate_scalar(&mut reduced, scalar_accumulator.0);
-            }
-
-            if reduced.1 {
-                Some(reduced.0)
-            } else {
-                None
-            }
-        }
-    }
-
-    pub(super) struct MaxAggregate<T: ArrowNumericType> {
-        phantom: PhantomData<T>,
-    }
-
-    impl<T: ArrowNumericType> SimdAggregate<T> for MaxAggregate<T>
-    where
-        T::Native: PartialOrd,
-    {
-        type ScalarAccumulator = (T::Native, bool);
-        type SimdAccumulator = (T::Simd, T::SimdMask);
-
-        fn init_accumulator_scalar() -> Self::ScalarAccumulator {
-            (T::default_value(), false)
-        }
-
-        fn init_accumulator_chunk() -> Self::SimdAccumulator {
-            (T::init(T::default_value()), T::mask_init(false))
-        }
-
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        ) {
-            let chunk_is_nan = !T::eq(chunk, chunk);
-            let is_gt = chunk_is_nan | T::gt(chunk, accumulator.0);
-            let first_or_gt = !accumulator.1 | is_gt;
-
-            accumulator.0 = T::mask_select(first_or_gt, chunk, accumulator.0);
-            accumulator.1 = T::mask_init(true);
-        }
-
-        fn accumulate_chunk_nullable(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-            vecmask: T::SimdMask,
-        ) {
-            let chunk_is_nan = !T::eq(chunk, chunk);
-            let is_gt = vecmask & (chunk_is_nan | T::gt(chunk, accumulator.0));
-            let first_or_gt = !accumulator.1 | is_gt;
-
-            accumulator.0 = T::mask_select(first_or_gt, chunk, accumulator.0);
-            accumulator.1 |= vecmask;
-        }
-
-        fn accumulate_scalar(
-            accumulator: &mut Self::ScalarAccumulator,
-            value: T::Native,
-        ) {
-            if !accumulator.1 {
-                accumulator.0 = value;
-            } else {
-                let value_is_nan = is_nan(value);
-                if value_is_nan || value > accumulator.0 {
-                    accumulator.0 = value
-                }
-            }
-            accumulator.1 = true;
-        }
-
-        fn reduce(
-            simd_accumulator: Self::SimdAccumulator,
-            scalar_accumulator: Self::ScalarAccumulator,
-        ) -> Option<T::Native> {
-            // we can't use T::lanes() as the slice len because it is not const,
-            // instead always reserve the maximum number of lanes
-            let mut tmp = [T::default_value(); 64];
-            let slice = &mut tmp[0..T::lanes()];
-            T::write(simd_accumulator.0, slice);
-
-            let mut reduced = Self::init_accumulator_scalar();
-            slice
-                .iter()
-                .enumerate()
-                .filter(|(i, _value)| T::mask_get(&simd_accumulator.1, *i))
-                .for_each(|(_i, value)| Self::accumulate_scalar(&mut reduced, *value));
-
-            if scalar_accumulator.1 {
-                Self::accumulate_scalar(&mut reduced, scalar_accumulator.0);
-            }
-
-            if reduced.1 {
-                Some(reduced.0)
-            } else {
-                None
-            }
-        }
-    }
-
-    pub(super) fn simd_aggregation<T: ArrowNumericType, A: SimdAggregate<T>>(
-        array: &PrimitiveArray<T>,
-    ) -> Option<T::Native> {
-        let null_count = array.null_count();
-
-        if null_count == array.len() {
-            return None;
-        }
-
-        let data: &[T::Native] = array.values();
-
-        let mut chunk_acc = A::init_accumulator_chunk();
-        let mut rem_acc = A::init_accumulator_scalar();
-
-        match array.data().null_buffer() {
-            None => {
-                let data_chunks = data.chunks_exact(64);
-                let remainder = data_chunks.remainder();
-
-                data_chunks.for_each(|chunk| {
-                    chunk.chunks_exact(T::lanes()).for_each(|chunk| {
-                        let chunk = T::load(&chunk);
-                        A::accumulate_chunk_non_null(&mut chunk_acc, chunk);
-                    });
-                });
-
-                remainder.iter().for_each(|value| {
-                    A::accumulate_scalar(&mut rem_acc, *value);
-                });
-            }
-            Some(buffer) => {
-                // process data in chunks of 64 elements since we also get 64 bits of validity information at a time
-                let data_chunks = data.chunks_exact(64);
-                let remainder = data_chunks.remainder();
-
-                let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
-                let remainder_bits = bit_chunks.remainder_bits();
-
-                data_chunks.zip(bit_chunks).for_each(|(chunk, mut mask)| {
-                    // split chunks further into slices corresponding to the vector length
-                    // the compiler is able to unroll this inner loop and remove bounds checks
-                    // since the outer chunk size (64) is always a multiple of the number of lanes
-                    chunk.chunks_exact(T::lanes()).for_each(|chunk| {
-                        let vecmask = T::mask_from_u64(mask);
-                        let chunk = T::load(&chunk);
-
-                        A::accumulate_chunk_nullable(&mut chunk_acc, chunk, vecmask);
-
-                        // skip the shift and avoid overflow for u8 type, which uses 64 lanes.
-                        mask >>= T::lanes() % 64;
-                    });
-                });
-
-                remainder.iter().enumerate().for_each(|(i, value)| {
-                    if remainder_bits & (1 << i) != 0 {
-                        A::accumulate_scalar(&mut rem_acc, *value)
-                    }
-                });
-            }
-        }
-
-        A::reduce(chunk_acc, rem_acc)
-    }
-}
-
-/// Returns the sum of values in the array.
-///
-/// Returns `None` if the array is empty or only contains null values.
-#[cfg(simd)]
-pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T::Native: Add<Output = T::Native>,
-{
-    use simd::*;
-
-    simd::simd_aggregation::<T, SumAggregate<T>>(&array)
-}
-
-#[cfg(simd)]
-/// Returns the minimum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T::Native: PartialOrd,
-{
-    use simd::*;
-
-    simd::simd_aggregation::<T, MinAggregate<T>>(&array)
-}
-
-#[cfg(simd)]
-/// Returns the maximum value in the array, according to the natural order.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
-where
-    T::Native: PartialOrd,
-{
-    use simd::*;
-
-    simd::simd_aggregation::<T, MaxAggregate<T>>(&array)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::*;
-    use crate::compute::add;
-
-    #[test]
-    fn test_primitive_array_sum() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        assert_eq!(15, sum(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_array_float_sum() {
-        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
-        assert!(16.5 - sum(&a).unwrap() < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_primitive_array_sum_with_nulls() {
-        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
-        assert_eq!(10, sum(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_array_sum_all_nulls() {
-        let a = Int32Array::from(vec![None, None, None]);
-        assert_eq!(None, sum(&a));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_64() {
-        let a: Int64Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(i) } else { None })
-            .collect();
-        let b: Int64Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_32() {
-        let a: Int32Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(i) } else { None })
-            .collect();
-        let b: Int32Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_16() {
-        let a: Int16Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(i) } else { None })
-            .collect();
-        let b: Int16Array = (1..=100)
-            .map(|i| if i % 3 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_sum_large_8() {
-        // include fewer values than other large tests so the result does not overflow the u8
-        let a: UInt8Array = (1..=100)
-            .map(|i| if i % 33 == 0 { Some(i) } else { None })
-            .collect();
-        let b: UInt8Array = (1..=100)
-            .map(|i| if i % 33 == 0 { Some(0) } else { Some(i) })
-            .collect();
-        // create an array that actually has non-zero values at the invalid indices
-        let c = add(&a, &b).unwrap();
-        assert_eq!(Some((1..=100).filter(|i| i % 33 == 0).sum()), sum(&c));
-    }
-
-    #[test]
-    fn test_primitive_array_min_max() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        assert_eq!(5, min(&a).unwrap());
-        assert_eq!(9, max(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_array_min_max_with_nulls() {
-        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
-        assert_eq!(5, min(&a).unwrap());
-        assert_eq!(9, max(&a).unwrap());
-    }
-
-    #[test]
-    fn test_primitive_min_max_1() {
-        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
-        assert_eq!(Some(2), min(&a));
-        assert_eq!(Some(5), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_large_nonnull_array() {
-        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
-        // min/max are on boundaries of chunked data
-        assert_eq!(Some(1.0), min(&a));
-        assert_eq!(Some(256.0), max(&a));
-
-        // max is last value in remainder after chunking
-        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(255.0), max(&a));
-
-        // max is first value in remainder after chunking
-        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(257.0), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_large_nullable_array() {
-        let a: Float64Array = (0..256)
-            .map(|i| {
-                if (i + 1) % 3 == 0 {
-                    None
-                } else {
-                    Some((i + 1) as f64)
-                }
-            })
-            .collect();
-        // min/max are on boundaries of chunked data
-        assert_eq!(Some(1.0), min(&a));
-        assert_eq!(Some(256.0), max(&a));
-
-        let a: Float64Array = (0..256)
-            .map(|i| {
-                if i == 0 || i == 255 {
-                    None
-                } else {
-                    Some((i + 1) as f64)
-                }
-            })
-            .collect();
-        // boundaries of chunked data are null
-        assert_eq!(Some(2.0), min(&a));
-        assert_eq!(Some(255.0), max(&a));
-
-        let a: Float64Array = (0..256)
-            .map(|i| if i != 100 { None } else { Some((i) as f64) })
-            .collect();
-        // a single non-null value somewhere in the middle
-        assert_eq!(Some(100.0), min(&a));
-        assert_eq!(Some(100.0), max(&a));
-
-        // max is last value in remainder after chunking
-        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(255.0), max(&a));
-
-        // max is first value in remainder after chunking
-        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
-        assert_eq!(Some(257.0), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_edge_cases() {
-        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
-        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
-        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
-
-        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
-        assert_eq!(Some(f64::MIN), min(&a));
-        assert_eq!(Some(f64::MIN), max(&a));
-
-        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
-        assert_eq!(Some(f64::MAX), min(&a));
-        assert_eq!(Some(f64::MAX), max(&a));
-
-        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
-        assert_eq!(Some(f64::INFINITY), min(&a));
-        assert_eq!(Some(f64::INFINITY), max(&a));
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_all_nans_non_null() {
-        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
-        assert!(max(&a).unwrap().is_nan());
-        assert!(min(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_first_nan_nonnull() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 0 {
-                    Some(f64::NAN)
-                } else {
-                    Some(i as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_last_nan_nonnull() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 99 {
-                    Some(f64::NAN)
-                } else {
-                    Some((i + 1) as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_first_nan_nullable() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 0 {
-                    Some(f64::NAN)
-                } else if i % 2 == 0 {
-                    None
-                } else {
-                    Some(i as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_last_nan_nullable() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                if i == 99 {
-                    Some(f64::NAN)
-                } else if i % 2 == 0 {
-                    None
-                } else {
-                    Some(i as f64)
-                }
-            })
-            .collect();
-        assert_eq!(Some(1.0), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_primitive_min_max_float_inf_and_nans() {
-        let a: Float64Array = (0..100)
-            .map(|i| {
-                let x = match i % 10 {
-                    0 => f64::NEG_INFINITY,
-                    1 => f64::MIN,
-                    2 => f64::MAX,
-                    4 => f64::INFINITY,
-                    5 => f64::NAN,
-                    _ => i as f64,
-                };
-                Some(x)
-            })
-            .collect();
-        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
-        assert!(max(&a).unwrap().is_nan());
-    }
-
-    #[test]
-    fn test_string_min_max_with_nulls() {
-        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]);
-        assert_eq!("a", min_string(&a).unwrap());
-        assert_eq!("c", max_string(&a).unwrap());
-    }
-
-    #[test]
-    fn test_string_min_max_all_nulls() {
-        let a = StringArray::from(vec![None, None]);
-        assert_eq!(None, min_string(&a));
-        assert_eq!(None, max_string(&a));
-    }
-
-    #[test]
-    fn test_string_min_max_1() {
-        let a = StringArray::from(vec![None, None, Some("b"), Some("a")]);
-        assert_eq!(Some("a"), min_string(&a));
-        assert_eq!(Some("b"), max_string(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_empty() {
-        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
-        assert_eq!(None, min_boolean(&a));
-        assert_eq!(None, max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_all_null() {
-        let a = BooleanArray::from(vec![None, None]);
-        assert_eq!(None, min_boolean(&a));
-        assert_eq!(None, max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_no_null() {
-        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max() {
-        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-
-        let a =
-            BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-    }
-
-    #[test]
-    fn test_boolean_min_max_smaller() {
-        let a = BooleanArray::from(vec![Some(false)]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(false), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![None, Some(false)]);
-        assert_eq!(Some(false), min_boolean(&a));
-        assert_eq!(Some(false), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![None, Some(true)]);
-        assert_eq!(Some(true), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-
-        let a = BooleanArray::from(vec![Some(true)]);
-        assert_eq!(Some(true), min_boolean(&a));
-        assert_eq!(Some(true), max_boolean(&a));
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs
deleted file mode 100644
index d7aadf144d4..00000000000
--- a/rust/arrow/src/compute/kernels/arithmetic.rs
+++ /dev/null
@@ -1,1009 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines basic arithmetic kernels for `PrimitiveArrays`.
-//!
-//! These kernels can leverage SIMD if available on your system.  Currently no runtime
-//! detection is provided, you should enable the specific SIMD intrinsics using
-//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
-//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-
-use std::ops::{Add, Div, Mul, Neg, Sub};
-
-use num::{One, Zero};
-
-use crate::buffer::Buffer;
-#[cfg(simd)]
-use crate::buffer::MutableBuffer;
-#[cfg(not(simd))]
-use crate::compute::kernels::arity::unary;
-use crate::compute::util::combine_option_bitmap;
-use crate::datatypes;
-use crate::datatypes::ArrowNumericType;
-use crate::error::{ArrowError, Result};
-use crate::{array::*, util::bit_util};
-use num::traits::Pow;
-#[cfg(simd)]
-use std::borrow::BorrowMut;
-#[cfg(simd)]
-use std::slice::{ChunksExact, ChunksExactMut};
-
-/// SIMD vectorized version of `unary_math_op` above specialized for signed numerical values.
-#[cfg(simd)]
-fn simd_signed_unary_math_op<T, SIMD_OP, SCALAR_OP>(
-    array: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowSignedNumericType,
-    SIMD_OP: Fn(T::SignedSimd) -> T::SignedSimd,
-    SCALAR_OP: Fn(T::Native) -> T::Native,
-{
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, input_slice)| {
-            let simd_input = T::load_signed(input_slice);
-            let simd_result = T::signed_unary_op(simd_input, &simd_op);
-            T::write_signed(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder.into_iter().zip(array_remainder).for_each(
-        |(scalar_result, scalar_input)| {
-            *scalar_result = scalar_op(*scalar_input);
-        },
-    );
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-#[cfg(simd)]
-fn simd_float_unary_math_op<T, SIMD_OP, SCALAR_OP>(
-    array: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowFloatNumericType,
-    SIMD_OP: Fn(T::Simd) -> T::Simd,
-    SCALAR_OP: Fn(T::Native) -> T::Native,
-{
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, input_slice)| {
-            let simd_input = T::load(input_slice);
-            let simd_result = T::unary_op(simd_input, &simd_op);
-            T::write(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder.into_iter().zip(array_remainder).for_each(
-        |(scalar_result, scalar_input)| {
-            *scalar_result = scalar_op(*scalar_input);
-        },
-    );
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Helper function to perform math lambda function on values from two arrays. If either
-/// left or right value is null then the output value is also null, so `1 + null` is
-/// `null`.
-///
-/// # Errors
-///
-/// This function errors if the arrays have different lengths
-pub fn math_op<T, F>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    op: F,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> T::Native,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let values = left
-        .values()
-        .iter()
-        .zip(right.values().iter())
-        .map(|(l, r)| op(*l, *r));
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size.
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Helper function to divide two arrays.
-///
-/// # Errors
-///
-/// This function errors if:
-/// * the arrays have different lengths
-/// * a division by zero is found
-fn math_divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Div<Output = T::Native> + Zero,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let buffer = if let Some(b) = &null_bit_buffer {
-        let values = left.values().iter().zip(right.values()).enumerate().map(
-            |(i, (left, right))| {
-                let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) };
-                if is_valid {
-                    if right.is_zero() {
-                        Err(ArrowError::DivideByZero)
-                    } else {
-                        Ok(*left / *right)
-                    }
-                } else {
-                    Ok(T::default_value())
-                }
-            },
-        );
-        unsafe { Buffer::try_from_trusted_len_iter(values) }
-    } else {
-        // no value is null
-        let values = left
-            .values()
-            .iter()
-            .zip(right.values())
-            .map(|(left, right)| {
-                if right.is_zero() {
-                    Err(ArrowError::DivideByZero)
-                } else {
-                    Ok(*left / *right)
-                }
-            });
-        unsafe { Buffer::try_from_trusted_len_iter(values) }
-    }?;
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Scalar-divisor version of `math_divide`.
-fn math_divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Div<Output = T::Native> + Zero,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let values = array.values().iter().map(|value| *value / divisor);
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized version of `math_op` above.
-#[cfg(simd)]
-fn simd_math_op<T, SIMD_OP, SCALAR_OP>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    SIMD_OP: Fn(T::Simd, T::Simd) -> T::Simd,
-    SCALAR_OP: Fn(T::Native, T::Native) -> T::Native,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let lanes = T::lanes();
-    let buffer_size = left.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut left_chunks = left.values().chunks_exact(lanes);
-    let mut right_chunks = right.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-        .for_each(|(result_slice, (left_slice, right_slice))| {
-            let simd_left = T::load(left_slice);
-            let simd_right = T::load(right_slice);
-            let simd_result = T::bin_op(simd_left, simd_right, &simd_op);
-            T::write(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(left_remainder.iter().zip(right_remainder.iter()))
-        .for_each(|(scalar_result, (scalar_left, scalar_right))| {
-            *scalar_result = scalar_op(*scalar_left, *scalar_right);
-        });
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized implementation of `left / right`.
-/// If any of the lanes marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero`
-/// is returned. The contents of no-valid lanes are undefined.
-#[cfg(simd)]
-#[inline]
-fn simd_checked_divide<T: ArrowNumericType>(
-    valid_mask: Option<u64>,
-    left: T::Simd,
-    right: T::Simd,
-) -> Result<T::Simd>
-where
-    T::Native: One + Zero,
-{
-    let zero = T::init(T::Native::zero());
-    let one = T::init(T::Native::one());
-
-    let right_no_invalid_zeros = match valid_mask {
-        Some(mask) => {
-            let simd_mask = T::mask_from_u64(mask);
-            // select `1` for invalid lanes, which will be a no-op during division later
-            T::mask_select(simd_mask, right, one)
-        }
-        None => right,
-    };
-
-    let zero_mask = T::eq(right_no_invalid_zeros, zero);
-
-    if T::mask_any(zero_mask) {
-        Err(ArrowError::DivideByZero)
-    } else {
-        Ok(T::bin_op(left, right_no_invalid_zeros, |a, b| a / b))
-    }
-}
-
-/// Scalar implementation of `left / right` for the remainder elements after complete chunks have been processed using SIMD.
-/// If any of the values marked as valid in `valid_mask` are `0` then an `ArrowError::DivideByZero` is returned.
-#[cfg(simd)]
-#[inline]
-fn simd_checked_divide_remainder<T: ArrowNumericType>(
-    valid_mask: Option<u64>,
-    left_chunks: ChunksExact<T::Native>,
-    right_chunks: ChunksExact<T::Native>,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Div<Output = T::Native>,
-{
-    let result_remainder = result_chunks.into_remainder();
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(left_remainder.iter().zip(right_remainder.iter()))
-        .enumerate()
-        .try_for_each(|(i, (result_scalar, (left_scalar, right_scalar)))| {
-            if valid_mask.map(|mask| mask & (1 << i) != 0).unwrap_or(true) {
-                if *right_scalar == T::Native::zero() {
-                    return Err(ArrowError::DivideByZero);
-                }
-                *result_scalar = *left_scalar / *right_scalar;
-            }
-            Ok(())
-        })?;
-
-    Ok(())
-}
-
-/// Scalar-divisor version of `simd_checked_divide_remainder`.
-#[cfg(simd)]
-#[inline]
-fn simd_checked_divide_scalar_remainder<T: ArrowNumericType>(
-    array_chunks: ChunksExact<T::Native>,
-    divisor: T::Native,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(array_remainder.iter())
-        .for_each(|(result_scalar, array_scalar)| {
-            *result_scalar = *array_scalar / divisor;
-        });
-
-    Ok(())
-}
-
-/// SIMD vectorized version of `divide`.
-///
-/// The divide kernels need their own implementation as there is a need to handle situations
-/// where a divide by `0` occurs.  This is complicated by `NULL` slots and padding.
-#[cfg(simd)]
-fn simd_divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Div<Output = T::Native>,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different length".to_string(),
-        ));
-    }
-
-    // Create the combined `Bitmap`
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let lanes = T::lanes();
-    let buffer_size = left.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    match &null_bit_buffer {
-        Some(b) => {
-            // combine_option_bitmap returns a slice or new buffer starting at 0
-            let valid_chunks = b.bit_chunks(0, left.len());
-
-            // process data in chunks of 64 elements since we also get 64 bits of validity information at a time
-            let mut result_chunks = result.typed_data_mut().chunks_exact_mut(64);
-            let mut left_chunks = left.values().chunks_exact(64);
-            let mut right_chunks = right.values().chunks_exact(64);
-
-            valid_chunks
-                .iter()
-                .zip(
-                    result_chunks
-                        .borrow_mut()
-                        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())),
-                )
-                .try_for_each(
-                    |(mut mask, (result_slice, (left_slice, right_slice)))| {
-                        // split chunks further into slices corresponding to the vector length
-                        // the compiler is able to unroll this inner loop and remove bounds checks
-                        // since the outer chunk size (64) is always a multiple of the number of lanes
-                        result_slice
-                            .chunks_exact_mut(lanes)
-                            .zip(left_slice.chunks_exact(lanes).zip(right_slice.chunks_exact(lanes)))
-                            .try_for_each(|(result_slice, (left_slice, right_slice))| -> Result<()> {
-                                let simd_left = T::load(left_slice);
-                                let simd_right = T::load(right_slice);
-
-                                let simd_result = simd_checked_divide::<T>(Some(mask), simd_left, simd_right)?;
-
-                                T::write(simd_result, result_slice);
-
-                                // skip the shift and avoid overflow for u8 type, which uses 64 lanes.
-                                mask >>= T::lanes() % 64;
-
-                                Ok(())
-                            })
-                    },
-                )?;
-
-            let valid_remainder = valid_chunks.remainder_bits();
-
-            simd_checked_divide_remainder::<T>(
-                Some(valid_remainder),
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-        None => {
-            let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-            let mut left_chunks = left.values().chunks_exact(lanes);
-            let mut right_chunks = right.values().chunks_exact(lanes);
-
-            result_chunks
-                .borrow_mut()
-                .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-                .try_for_each(
-                    |(result_slice, (left_slice, right_slice))| -> Result<()> {
-                        let simd_left = T::load(left_slice);
-                        let simd_right = T::load(right_slice);
-
-                        let simd_result =
-                            simd_checked_divide::<T>(None, simd_left, simd_right)?;
-
-                        T::write(simd_result, result_slice);
-
-                        Ok(())
-                    },
-                )?;
-
-            simd_checked_divide_remainder::<T>(
-                None,
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-    }
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized version of `divide_scalar`.
-#[cfg(simd)]
-fn simd_divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    let mut result_chunks = result.typed_data_mut().chunks_exact_mut(lanes);
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, array_slice)| {
-            let simd_left = T::load(array_slice);
-            let simd_right = T::init(divisor);
-
-            let simd_result = T::bin_op(simd_left, simd_right, |a, b| a / b);
-            T::write(simd_result, result_slice);
-        });
-
-    simd_checked_divide_scalar_remainder::<T>(array_chunks, divisor, result_chunks)?;
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Perform `left + right` operation on two arrays. If either left or right value is null
-/// then the result is also null.
-pub fn add<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    #[cfg(simd)]
-    return simd_math_op(&left, &right, |a, b| a + b, |a, b| a + b);
-    #[cfg(not(simd))]
-    return math_op(left, right, |a, b| a + b);
-}
-
-/// Perform `left - right` operation on two arrays. If either left or right value is null
-/// then the result is also null.
-pub fn subtract<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    #[cfg(simd)]
-    return simd_math_op(&left, &right, |a, b| a - b, |a, b| a - b);
-    #[cfg(not(simd))]
-    return math_op(left, right, |a, b| a - b);
-}
-
-/// Perform `-` operation on an array. If value is null then the result is also null.
-pub fn negate<T>(array: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowSignedNumericType,
-    T::Native: Neg<Output = T::Native>,
-{
-    #[cfg(simd)]
-    return simd_signed_unary_math_op(array, |x| -x, |x| -x);
-    #[cfg(not(simd))]
-    return Ok(unary(array, |x| -x));
-}
-
-/// Raise array with floating point values to the power of a scalar.
-pub fn powf_scalar<T>(
-    array: &PrimitiveArray<T>,
-    raise: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowFloatNumericType,
-    T::Native: Pow<T::Native, Output = T::Native>,
-{
-    #[cfg(simd)]
-    {
-        let raise_vector = T::init(raise);
-        return simd_float_unary_math_op(
-            array,
-            |x| T::pow(x, raise_vector),
-            |x| x.pow(raise),
-        );
-    }
-    #[cfg(not(simd))]
-    return Ok(unary(array, |x| x.pow(raise)));
-}
-
-/// Perform `left * right` operation on two arrays. If either left or right value is null
-/// then the result is also null.
-pub fn multiply<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
-{
-    #[cfg(simd)]
-    return simd_math_op(&left, &right, |a, b| a * b, |a, b| a * b);
-    #[cfg(not(simd))]
-    return math_op(left, right, |a, b| a * b);
-}
-
-/// Perform `left / right` operation on two arrays. If either left or right value is null
-/// then the result is also null. If any right hand value is zero then the result of this
-/// operation will be `Err(ArrowError::DivideByZero)`.
-pub fn divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero
-        + One,
-{
-    #[cfg(simd)]
-    return simd_divide(&left, &right);
-    #[cfg(not(simd))]
-    return math_divide(&left, &right);
-}
-
-/// Divide every value in an array by a scalar. If any value in the array is null then the
-/// result is also null. If the scalar is zero then the result of this operation will be
-/// `Err(ArrowError::DivideByZero)`.
-pub fn divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero
-        + One,
-{
-    #[cfg(simd)]
-    return simd_divide_scalar(&array, divisor);
-    #[cfg(not(simd))]
-    return math_divide_scalar(&array, divisor);
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::Int32Array;
-
-    #[test]
-    fn test_primitive_array_add() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
-        let c = add(&a, &b).unwrap();
-        assert_eq!(11, c.value(0));
-        assert_eq!(13, c.value(1));
-        assert_eq!(15, c.value(2));
-        assert_eq!(17, c.value(3));
-        assert_eq!(17, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_add_sliced() {
-        let a = Int32Array::from(vec![0, 0, 0, 5, 6, 7, 8, 9, 0]);
-        let b = Int32Array::from(vec![0, 0, 0, 6, 7, 8, 9, 8, 0]);
-        let a = a.slice(3, 5);
-        let b = b.slice(3, 5);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        assert_eq!(5, a.value(0));
-        assert_eq!(6, b.value(0));
-
-        let c = add(&a, &b).unwrap();
-        assert_eq!(5, c.len());
-        assert_eq!(11, c.value(0));
-        assert_eq!(13, c.value(1));
-        assert_eq!(15, c.value(2));
-        assert_eq!(17, c.value(3));
-        assert_eq!(17, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_add_mismatched_length() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8]);
-        let e = add(&a, &b)
-            .err()
-            .expect("should have failed due to different lengths");
-        assert_eq!(
-            "ComputeError(\"Cannot perform math operation on arrays of different length\")",
-            format!("{:?}", e)
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_subtract() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![5, 4, 3, 2, 1]);
-        let c = subtract(&a, &b).unwrap();
-        assert_eq!(-4, c.value(0));
-        assert_eq!(-2, c.value(1));
-        assert_eq!(0, c.value(2));
-        assert_eq!(2, c.value(3));
-        assert_eq!(4, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_multiply() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
-        let c = multiply(&a, &b).unwrap();
-        assert_eq!(30, c.value(0));
-        assert_eq!(42, c.value(1));
-        assert_eq!(56, c.value(2));
-        assert_eq!(72, c.value(3));
-        assert_eq!(72, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide() {
-        let a = Int32Array::from(vec![15, 15, 8, 1, 9]);
-        let b = Int32Array::from(vec![5, 6, 8, 9, 1]);
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(3, c.value(0));
-        assert_eq!(2, c.value(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_scalar() {
-        let a = Int32Array::from(vec![15, 14, 9, 8, 1]);
-        let b = 3;
-        let c = divide_scalar(&a, b).unwrap();
-        let expected = Int32Array::from(vec![5, 4, 3, 2, 0]);
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_primitive_array_divide_sliced() {
-        let a = Int32Array::from(vec![0, 0, 0, 15, 15, 8, 1, 9, 0]);
-        let b = Int32Array::from(vec![0, 0, 0, 5, 6, 8, 9, 1, 0]);
-        let a = a.slice(3, 5);
-        let b = b.slice(3, 5);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(5, c.len());
-        assert_eq!(3, c.value(0));
-        assert_eq!(2, c.value(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_with_nulls() {
-        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]);
-        let b = Int32Array::from(vec![Some(5), Some(6), Some(8), Some(9), None, None]);
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(3, c.value(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(true, c.is_null(4));
-        assert_eq!(true, c.is_null(5));
-    }
-
-    #[test]
-    fn test_primitive_array_divide_scalar_with_nulls() {
-        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]);
-        let b = 3;
-        let c = divide_scalar(&a, b).unwrap();
-        let expected =
-            Int32Array::from(vec![Some(5), None, Some(2), Some(0), Some(3), None]);
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_primitive_array_divide_with_nulls_sliced() {
-        let a = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            Some(15),
-            None,
-            Some(8),
-            Some(1),
-            Some(9),
-            None,
-            None,
-        ]);
-        let b = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            Some(5),
-            Some(6),
-            Some(8),
-            Some(9),
-            None,
-            None,
-            None,
-        ]);
-
-        let a = a.slice(8, 6);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        let b = b.slice(8, 6);
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        let c = divide(&a, &b).unwrap();
-        assert_eq!(6, c.len());
-        assert_eq!(3, c.value(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(1, c.value(2));
-        assert_eq!(0, c.value(3));
-        assert_eq!(true, c.is_null(4));
-        assert_eq!(true, c.is_null(5));
-    }
-
-    #[test]
-    #[should_panic(expected = "DivideByZero")]
-    fn test_primitive_array_divide_by_zero() {
-        let a = Int32Array::from(vec![15]);
-        let b = Int32Array::from(vec![0]);
-        divide(&a, &b).unwrap();
-    }
-
-    #[test]
-    fn test_primitive_array_divide_f64() {
-        let a = Float64Array::from(vec![15.0, 15.0, 8.0]);
-        let b = Float64Array::from(vec![5.0, 6.0, 8.0]);
-        let c = divide(&a, &b).unwrap();
-        assert!(3.0 - c.value(0) < f64::EPSILON);
-        assert!(2.5 - c.value(1) < f64::EPSILON);
-        assert!(1.0 - c.value(2) < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_primitive_array_add_with_nulls() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), None]);
-        let b = Int32Array::from(vec![None, None, Some(6), Some(7)]);
-        let c = add(&a, &b).unwrap();
-        assert_eq!(true, c.is_null(0));
-        assert_eq!(true, c.is_null(1));
-        assert_eq!(false, c.is_null(2));
-        assert_eq!(true, c.is_null(3));
-        assert_eq!(13, c.value(2));
-    }
-
-    #[test]
-    fn test_primitive_array_negate() {
-        let a: Int64Array = (0..100).into_iter().map(Some).collect();
-        let actual = negate(&a).unwrap();
-        let expected: Int64Array = (0..100).into_iter().map(|i| Some(-i)).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_arithmetic_kernel_should_not_rely_on_padding() {
-        let a: UInt8Array = (0..128_u8).into_iter().map(Some).collect();
-        let a = a.slice(63, 65);
-        let a = a.as_any().downcast_ref::<UInt8Array>().unwrap();
-
-        let b: UInt8Array = (0..128_u8).into_iter().map(Some).collect();
-        let b = b.slice(63, 65);
-        let b = b.as_any().downcast_ref::<UInt8Array>().unwrap();
-
-        let actual = add(&a, &b).unwrap();
-        let actual: Vec<Option<u8>> = actual.iter().collect();
-        let expected: Vec<Option<u8>> = (63..63_u8 + 65_u8)
-            .into_iter()
-            .map(|i| Some(i + i))
-            .collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_primitive_array_raise_power_scalar() {
-        let a = Float64Array::from(vec![1.0, 2.0, 3.0]);
-        let actual = powf_scalar(&a, 2.0).unwrap();
-        let expected = Float64Array::from(vec![1.0, 4.0, 9.0]);
-        assert_eq!(expected, actual);
-        let a = Float64Array::from(vec![Some(1.0), None, Some(3.0)]);
-        let actual = powf_scalar(&a, 2.0).unwrap();
-        let expected = Float64Array::from(vec![Some(1.0), None, Some(9.0)]);
-        assert_eq!(expected, actual);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/arity.rs b/rust/arrow/src/compute/kernels/arity.rs
deleted file mode 100644
index 4aa7f3d6e5d..00000000000
--- a/rust/arrow/src/compute/kernels/arity.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernels suitable to perform operations to primitive arrays.
-
-use crate::array::{Array, ArrayData, PrimitiveArray};
-use crate::buffer::Buffer;
-use crate::datatypes::ArrowPrimitiveType;
-
-#[inline]
-fn into_primitive_array_data<I: ArrowPrimitiveType, O: ArrowPrimitiveType>(
-    array: &PrimitiveArray<I>,
-    buffer: Buffer,
-) -> ArrayData {
-    ArrayData::new(
-        O::DATA_TYPE,
-        array.len(),
-        None,
-        array.data_ref().null_buffer().cloned(),
-        0,
-        vec![buffer],
-        vec![],
-    )
-}
-
-/// Applies an unary and infalible function to a primitive array.
-/// This is the fastest way to perform an operation on a primitive array when
-/// the benefits of a vectorized operation outweights the cost of branching nulls and non-nulls.
-/// # Implementation
-/// This will apply the function for all values, including those on null slots.
-/// This implies that the operation must be infalible for any value of the corresponding type
-/// or this function may panic.
-/// # Example
-/// ```rust
-/// # use arrow::array::Int32Array;
-/// # use arrow::datatypes::Int32Type;
-/// # use arrow::compute::kernels::arity::unary;
-/// # fn main() {
-/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
-/// let c = unary::<_, _, Int32Type>(&array, |x| x * 2 + 1);
-/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
-/// # }
-/// ```
-pub fn unary<I, F, O>(array: &PrimitiveArray<I>, op: F) -> PrimitiveArray<O>
-where
-    I: ArrowPrimitiveType,
-    O: ArrowPrimitiveType,
-    F: Fn(I::Native) -> O::Native,
-{
-    let values = array.values().iter().map(|v| op(*v));
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size because arrays are sized.
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
-
-    let data = into_primitive_array_data::<_, O>(array, buffer);
-    PrimitiveArray::<O>::from(data)
-}
diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs
deleted file mode 100644
index e1d5592d423..00000000000
--- a/rust/arrow/src/compute/kernels/boolean.rs
+++ /dev/null
@@ -1,1146 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines boolean kernels on Arrow `BooleanArray`'s, e.g. `AND`, `OR` and `NOT`.
-//!
-//! These kernels can leverage SIMD if available on your system.  Currently no runtime
-//! detection is provided, you should enable the specific SIMD intrinsics using
-//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
-//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-
-use std::ops::Not;
-
-use crate::array::{Array, ArrayData, BooleanArray, PrimitiveArray};
-use crate::buffer::{
-    buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer,
-};
-use crate::compute::util::combine_option_bitmap;
-use crate::datatypes::{ArrowNumericType, DataType};
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util::{ceil, round_upto_multiple_of_64};
-use core::iter;
-use lexical_core::Integer;
-
-fn binary_boolean_kleene_kernel<F>(
-    left: &BooleanArray,
-    right: &BooleanArray,
-    op: F,
-) -> Result<BooleanArray>
-where
-    F: Fn(u64, u64, u64, u64) -> (u64, u64),
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform bitwise operation on arrays of different length".to_string(),
-        ));
-    }
-
-    // length and offset of boolean array is measured in bits
-    let len = left.len();
-
-    // result length measured in bytes (incl. remainder)
-    let mut result_len = round_upto_multiple_of_64(len) / 8;
-    // The iterator that applies the kleene_op closure always chains an additional iteration
-    // for the remainder chunk, even without a remainder. If the remainder is absent
-    // (length % 64 == 0), kleene_op would resize the result buffers (value_buffer and
-    // valid_buffer) to store 8 additional bytes, because result_len wouldn't include a remainder
-    // chunk. The resizing is unnecessary and expensive. We can prevent it by adding 8 bytes to
-    // result_len here. Nonetheless, all bits of these 8 bytes will be 0.
-    if len % 64 == 0 {
-        result_len += 8;
-    }
-
-    let mut value_buffer = MutableBuffer::new(result_len);
-    let mut valid_buffer = MutableBuffer::new(result_len);
-
-    let kleene_op = |((left_data, left_valid), (right_data, right_valid)): (
-        (u64, u64),
-        (u64, u64),
-    )| {
-        let left_true = left_valid & left_data;
-        let left_false = left_valid & !left_data;
-
-        let right_true = right_valid & right_data;
-        let right_false = right_valid & !right_data;
-
-        let (value, valid) = op(left_true, left_false, right_true, right_false);
-
-        value_buffer.extend_from_slice(&[value]);
-        valid_buffer.extend_from_slice(&[valid]);
-    };
-
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let left_buffer = left.values();
-    let right_buffer = right.values();
-
-    let left_chunks = left_buffer.bit_chunks(left_offset, len);
-    let right_chunks = right_buffer.bit_chunks(right_offset, len);
-
-    let left_rem = left_chunks.remainder_bits();
-    let right_rem = right_chunks.remainder_bits();
-
-    let opt_left_valid_chunks_and_rem = left
-        .data_ref()
-        .null_buffer()
-        .map(|b| b.bit_chunks(left_offset, len))
-        .map(|chunks| (chunks.iter(), chunks.remainder_bits()));
-    let opt_right_valid_chunks_and_rem = right
-        .data_ref()
-        .null_buffer()
-        .map(|b| b.bit_chunks(right_offset, len))
-        .map(|chunks| (chunks.iter(), chunks.remainder_bits()));
-
-    match (
-        opt_left_valid_chunks_and_rem,
-        opt_right_valid_chunks_and_rem,
-    ) {
-        (
-            Some((left_valid_chunks, left_valid_rem)),
-            Some((right_valid_chunks, right_valid_rem)),
-        ) => {
-            left_chunks
-                .iter()
-                .zip(left_valid_chunks)
-                .zip(right_chunks.iter().zip(right_valid_chunks))
-                .chain(iter::once((
-                    (left_rem, left_valid_rem),
-                    (right_rem, right_valid_rem),
-                )))
-                .for_each(kleene_op);
-        }
-        (Some((left_valid_chunks, left_valid_rem)), None) => {
-            left_chunks
-                .iter()
-                .zip(left_valid_chunks)
-                .zip(right_chunks.iter().zip(iter::repeat(u64::MAX)))
-                .chain(iter::once((
-                    (left_rem, left_valid_rem),
-                    (right_rem, u64::MAX),
-                )))
-                .for_each(kleene_op);
-        }
-        (None, Some((right_valid_chunks, right_valid_rem))) => {
-            left_chunks
-                .iter()
-                .zip(iter::repeat(u64::MAX))
-                .zip(right_chunks.iter().zip(right_valid_chunks))
-                .chain(iter::once((
-                    (left_rem, u64::MAX),
-                    (right_rem, right_valid_rem),
-                )))
-                .for_each(kleene_op);
-        }
-        (None, None) => {
-            left_chunks
-                .iter()
-                .zip(iter::repeat(u64::MAX))
-                .zip(right_chunks.iter().zip(iter::repeat(u64::MAX)))
-                .chain(iter::once(((left_rem, u64::MAX), (right_rem, u64::MAX))))
-                .for_each(kleene_op);
-        }
-    };
-
-    let bool_buffer: Buffer = value_buffer.into();
-    let bool_valid_buffer: Buffer = valid_buffer.into();
-
-    let array_data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        Some(bool_valid_buffer),
-        left_offset,
-        vec![bool_buffer],
-        vec![],
-    );
-
-    Ok(BooleanArray::from(array_data))
-}
-
-/// Helper function to implement binary kernels
-fn binary_boolean_kernel<F>(
-    left: &BooleanArray,
-    right: &BooleanArray,
-    op: F,
-) -> Result<BooleanArray>
-where
-    F: Fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform bitwise operation on arrays of different length".to_string(),
-        ));
-    }
-
-    let len = left.len();
-
-    let left_data = left.data_ref();
-    let right_data = right.data_ref();
-    let null_bit_buffer = combine_option_bitmap(&left_data, &right_data, len)?;
-
-    let left_buffer = &left_data.buffers()[0];
-    let right_buffer = &right_data.buffers()[0];
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let values = op(&left_buffer, left_offset, &right_buffer, right_offset, len);
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        null_bit_buffer,
-        0,
-        vec![values],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Performs `AND` operation on two arrays. If either left or right value is null then the
-/// result is also null.
-/// # Error
-/// This function errors when the arrays have different lengths.
-/// # Example
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::and;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let b = BooleanArray::from(vec![Some(true), Some(true), Some(false)]);
-/// let and_ab = and(&a, &b)?;
-/// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    binary_boolean_kernel(&left, &right, buffer_bin_and)
-}
-
-/// Logical 'and' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` and `null` = `null`
-/// * `null` and `true` = `null`
-/// * `false` and `null` = `false`
-/// * `null` and `false` = `false`
-/// * `null` and `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'and' false is always false.
-/// For a different null behavior, see function \"and\".
-///
-/// # Example
-///
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::and_kleene;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let and_ab = and_kleene(&a, &b)?;
-/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None]));
-/// # Ok(())
-/// # }
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    if left.null_count().is_zero() && right.null_count().is_zero() {
-        return and(left, right);
-    }
-
-    let op = |left_true, left_false, right_true, right_false| {
-        (
-            left_true & right_true,
-            left_false | right_false | (left_true & right_true),
-        )
-    };
-
-    binary_boolean_kleene_kernel(left, right, op)
-}
-
-/// Performs `OR` operation on two arrays. If either left or right value is null then the
-/// result is also null.
-/// # Error
-/// This function errors when the arrays have different lengths.
-/// # Example
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::or;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let b = BooleanArray::from(vec![Some(true), Some(true), Some(false)]);
-/// let or_ab = or(&a, &b)?;
-/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    binary_boolean_kernel(&left, &right, buffer_bin_or)
-}
-
-/// Logical 'or' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` or `null` = `true`
-/// * `null` or `true` = `true`
-/// * `false` or `null` = `null`
-/// * `null` or `false` = `null`
-/// * `null` or `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'or' true is always true.
-/// For a different null behavior, see function \"or\".
-///
-/// # Example
-///
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::or_kleene;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let or_ab = or_kleene(&a, &b)?;
-/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None]));
-/// # Ok(())
-/// # }
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
-    if left.null_count().is_zero() && right.null_count().is_zero() {
-        return or(left, right);
-    }
-
-    let op = |left_true, left_false, right_true, right_false| {
-        (
-            left_true | right_true,
-            left_true | right_true | (left_false & right_false),
-        )
-    };
-
-    binary_boolean_kleene_kernel(left, right, op)
-}
-
-/// Performs unary `NOT` operation on an arrays. If value is null then the result is also
-/// null.
-/// # Error
-/// This function never errors. It returns an error for consistency.
-/// # Example
-/// ```rust
-/// use arrow::array::BooleanArray;
-/// use arrow::error::Result;
-/// use arrow::compute::kernels::boolean::not;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let not_a = not(&a)?;
-/// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
-    let left_offset = left.offset();
-    let len = left.len();
-
-    let data = left.data_ref();
-    let null_bit_buffer = data
-        .null_bitmap()
-        .as_ref()
-        .map(|b| b.bits.slice(left_offset));
-
-    let values = buffer_unary_not(&data.buffers()[0], left_offset, len);
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        null_bit_buffer,
-        0,
-        vec![values],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Returns a non-null [BooleanArray] with whether each value of the array is null.
-/// # Error
-/// This function never errors.
-/// # Example
-/// ```rust
-/// # use arrow::error::Result;
-/// use arrow::array::BooleanArray;
-/// use arrow::compute::kernels::boolean::is_null;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let a_is_null = is_null(&a)?;
-/// assert_eq!(a_is_null, BooleanArray::from(vec![false, false, true]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn is_null(input: &Array) -> Result<BooleanArray> {
-    let len = input.len();
-
-    let output = match input.data_ref().null_buffer() {
-        None => {
-            let len_bytes = ceil(len, 8);
-            MutableBuffer::from_len_zeroed(len_bytes).into()
-        }
-        Some(buffer) => buffer_unary_not(buffer, input.offset(), len),
-    };
-
-    let data =
-        ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]);
-
-    Ok(BooleanArray::from(data))
-}
-
-/// Returns a non-null [BooleanArray] with whether each value of the array is not null.
-/// # Error
-/// This function never errors.
-/// # Example
-/// ```rust
-/// # use arrow::error::Result;
-/// use arrow::array::BooleanArray;
-/// use arrow::compute::kernels::boolean::is_not_null;
-/// # fn main() -> Result<()> {
-/// let a = BooleanArray::from(vec![Some(false), Some(true), None]);
-/// let a_is_not_null = is_not_null(&a)?;
-/// assert_eq!(a_is_not_null, BooleanArray::from(vec![true, true, false]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn is_not_null(input: &Array) -> Result<BooleanArray> {
-    let len = input.len();
-
-    let output = match input.data_ref().null_buffer() {
-        None => {
-            let len_bytes = ceil(len, 8);
-            MutableBuffer::new(len_bytes)
-                .with_bitset(len_bytes, true)
-                .into()
-        }
-        Some(buffer) => buffer.bit_slice(input.offset(), len),
-    };
-
-    let data =
-        ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]);
-
-    Ok(BooleanArray::from(data))
-}
-
-/// Copies original array, setting null bit to true if a secondary comparison boolean array is set to true.
-/// Typically used to implement NULLIF.
-// NOTE: For now this only supports Primitive Arrays.  Although the code could be made generic, the issue
-// is that currently the bitmap operations result in a final bitmap which is aligned to bit 0, and thus
-// the left array's data needs to be sliced to a new offset, and for non-primitive arrays shifting the
-// data might be too complicated.   In the future, to avoid shifting left array's data, we could instead
-// shift the final bitbuffer to the right, prepending with 0's instead.
-pub fn nullif<T>(
-    left: &PrimitiveArray<T>,
-    right: &BooleanArray,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-    let left_data = left.data();
-    let right_data = right.data();
-
-    // If left has no bitmap, create a new one with all values set for nullity op later
-    // left=0 (null)   right=null       output bitmap=null
-    // left=0          right=1          output bitmap=null
-    // left=1 (set)    right=null       output bitmap=set   (passthrough)
-    // left=1          right=1 & comp=true    output bitmap=null
-    // left=1          right=1 & comp=false   output bitmap=set
-    //
-    // Thus: result = left null bitmap & (!right_values | !right_bitmap)
-    //              OR left null bitmap & !(right_values & right_bitmap)
-    //
-    // Do the right expression !(right_values & right_bitmap) first since there are two steps
-    // TRICK: convert BooleanArray buffer as a bitmap for faster operation
-    let right_combo_buffer = match right.data().null_bitmap() {
-        Some(right_bitmap) => {
-            // NOTE: right values and bitmaps are combined and stay at bit offset right.offset()
-            (right.values() & &right_bitmap.bits).ok().map(|b| b.not())
-        }
-        None => Some(!right.values()),
-    };
-
-    // AND of original left null bitmap with right expression
-    // Here we take care of the possible offsets of the left and right arrays all at once.
-    let modified_null_buffer = match left_data.null_bitmap() {
-        Some(left_null_bitmap) => match right_combo_buffer {
-            Some(rcb) => Some(buffer_bin_and(
-                &left_null_bitmap.bits,
-                left_data.offset(),
-                &rcb,
-                right_data.offset(),
-                left_data.len(),
-            )),
-            None => Some(
-                left_null_bitmap
-                    .bits
-                    .bit_slice(left_data.offset(), left.len()),
-            ),
-        },
-        None => right_combo_buffer
-            .map(|rcb| rcb.bit_slice(right_data.offset(), right_data.len())),
-    };
-
-    // Align/shift left data on offset as needed, since new bitmaps are shifted and aligned to 0 already
-    // NOTE: this probably only works for primitive arrays.
-    let data_buffers = if left.offset() == 0 {
-        left_data.buffers().to_vec()
-    } else {
-        // Shift each data buffer by type's bit_width * offset.
-        left_data
-            .buffers()
-            .iter()
-            .map(|buf| buf.slice(left.offset() * T::get_byte_width()))
-            .collect::<Vec<_>>()
-    };
-
-    // Construct new array with same values but modified null bitmap
-    // TODO: shift data buffer as needed
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        left.len(),
-        None, // force new to compute the number of null bits
-        modified_null_buffer,
-        0, // No need for offset since left data has been shifted
-        data_buffers,
-        left_data.child_data().to_vec(),
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::{ArrayRef, Int32Array};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_bool_array_and() {
-        let a = BooleanArray::from(vec![false, false, true, true]);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or() {
-        let a = BooleanArray::from(vec![false, false, true, true]);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-        let c = or(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, true, true, true]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = or(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_binary_boolean_kleene_kernel() {
-        // the kleene kernel is based on chunking and we want to also create
-        // cases, where the number of values is not a multiple of 64
-        for &value in [true, false].iter() {
-            for &is_valid in [true, false].iter() {
-                for &n in [0usize, 1, 63, 64, 65, 127, 128].iter() {
-                    let a = BooleanArray::from(vec![Some(true); n]);
-                    let b = BooleanArray::from(vec![None; n]);
-
-                    let result = binary_boolean_kleene_kernel(&a, &b, |_, _, _, _| {
-                        let tmp_value = if value { u64::MAX } else { 0 };
-                        let tmp_is_valid = if is_valid { u64::MAX } else { 0 };
-                        (tmp_value, tmp_is_valid)
-                    })
-                    .unwrap();
-
-                    assert_eq!(result.len(), n);
-                    (0..n).for_each(|idx| {
-                        assert_eq!(value, result.value(idx));
-                        assert_eq!(is_valid, result.is_valid(idx));
-                    });
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn test_boolean_array_kleene_no_remainder() {
-        let n = 1024;
-        let a = BooleanArray::from(vec![true; n]);
-        let b = BooleanArray::from(vec![None; n]);
-        let result = or_kleene(&a, &b).unwrap();
-
-        assert_eq!(result, a);
-    }
-
-    #[test]
-    fn test_bool_array_and_kleene_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = and_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            Some(false),
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_kleene_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = or_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            None,
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_kleene_right_sided_nulls() {
-        let a = BooleanArray::from(vec![false, false, false, true, true, true]);
-
-        // ensure null bitmap of a is absent
-        assert!(a.data_ref().null_bitmap().is_none());
-
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-
-        // ensure null bitmap of b is present
-        assert!(b.data_ref().null_bitmap().is_some());
-
-        let c = or_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_or_kleene_left_sided_nulls() {
-        let a = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-
-        // ensure null bitmap of b is absent
-        assert!(a.data_ref().null_bitmap().is_some());
-
-        let b = BooleanArray::from(vec![false, false, false, true, true, true]);
-
-        // ensure null bitmap of a is present
-        assert!(b.data_ref().null_bitmap().is_none());
-
-        let c = or_kleene(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_not() {
-        let a = BooleanArray::from(vec![false, true]);
-        let c = not(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![true, false]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_and_nulls() {
-        let a = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            Some(true),
-            Some(true),
-            Some(true),
-        ]);
-        let b = BooleanArray::from(vec![
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![
-            None,
-            None,
-            None,
-            None,
-            Some(false),
-            Some(false),
-            None,
-            Some(false),
-            Some(true),
-        ]);
-
-        assert_eq!(c, expected);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_same_offset() {
-        let a = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, false, true,
-            true,
-        ]);
-        let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
-        ]);
-
-        let a = a.slice(8, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let b = b.slice(8, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_same_offset_mod8() {
-        let a = BooleanArray::from(vec![
-            false, false, true, true, false, false, false, false, false, false, false,
-            false,
-        ]);
-        let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
-        ]);
-
-        let a = a.slice(0, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let b = b.slice(8, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_offset1() {
-        let a = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, false, true,
-            true,
-        ]);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-
-        let a = a.slice(8, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_sliced_offset2() {
-        let a = BooleanArray::from(vec![false, false, true, true]);
-        let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
-        ]);
-
-        let b = b.slice(8, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, true]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_bool_array_and_nulls_offset() {
-        let a = BooleanArray::from(vec![None, Some(false), Some(true), None, Some(true)]);
-        let a = a.slice(1, 4);
-        let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let b = BooleanArray::from(vec![
-            None,
-            None,
-            Some(true),
-            Some(false),
-            Some(true),
-            Some(true),
-        ]);
-
-        let b = b.slice(2, 4);
-        let b = b.as_any().downcast_ref::<BooleanArray>().unwrap();
-
-        let c = and(&a, &b).unwrap();
-
-        let expected =
-            BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
-
-        assert_eq!(expected, c);
-    }
-
-    #[test]
-    fn test_nonnull_array_is_null() {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
-
-        let res = is_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nonnull_array_with_offset_is_null() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1]);
-        let a = a.slice(8, 4);
-
-        let res = is_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![false, false, false, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nonnull_array_is_not_null() {
-        let a = Int32Array::from(vec![1, 2, 3, 4]);
-
-        let res = is_not_null(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![true, true, true, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nonnull_array_with_offset_is_not_null() {
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1]);
-        let a = a.slice(8, 4);
-
-        let res = is_not_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![true, true, true, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_is_null() {
-        let a = Int32Array::from(vec![Some(1), None, Some(3), None]);
-
-        let res = is_null(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![false, true, false, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_with_offset_is_null() {
-        let a = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            // offset 8, previous None values are skipped by the slice
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-            None,
-            None,
-        ]);
-        let a = a.slice(8, 4);
-
-        let res = is_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![false, true, false, true]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_is_not_null() {
-        let a = Int32Array::from(vec![Some(1), None, Some(3), None]);
-
-        let res = is_not_null(&a).unwrap();
-
-        let expected = BooleanArray::from(vec![true, false, true, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullable_array_with_offset_is_not_null() {
-        let a = Int32Array::from(vec![
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            // offset 8, previous None values are skipped by the slice
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-            None,
-            None,
-        ]);
-        let a = a.slice(8, 4);
-
-        let res = is_not_null(a.as_ref()).unwrap();
-
-        let expected = BooleanArray::from(vec![true, false, true, false]);
-
-        assert_eq!(expected, res);
-        assert_eq!(&None, res.data_ref().null_bitmap());
-    }
-
-    #[test]
-    fn test_nullif_int_array() {
-        let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
-        let comp =
-            BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
-        let res = nullif(&a, &comp).unwrap();
-
-        let expected = Int32Array::from(vec![
-            Some(15),
-            None,
-            None, // comp true, slot 2 turned into null
-            Some(1),
-            // Even though comp array / right is null, should still pass through original value
-            // comp true, slot 2 turned into null
-            Some(9),
-        ]);
-
-        assert_eq!(expected, res);
-    }
-
-    #[test]
-    fn test_nullif_int_array_offset() {
-        let a = Int32Array::from(vec![None, Some(15), Some(8), Some(1), Some(9)]);
-        let a = a.slice(1, 3); // Some(15), Some(8), Some(1)
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let comp = BooleanArray::from(vec![
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-        let comp = comp.slice(2, 3); // Some(false), None, Some(true)
-        let comp = comp.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let res = nullif(&a, &comp).unwrap();
-
-        let expected = Int32Array::from(vec![
-            Some(15), // False => keep it
-            Some(8),  // None => keep it
-            None,     // true => None
-        ]);
-        assert_eq!(&expected, &res)
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs
deleted file mode 100644
index de1516b0768..00000000000
--- a/rust/arrow/src/compute/kernels/cast.rs
+++ /dev/null
@@ -1,3843 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines cast kernels for `ArrayRef`, to convert `Array`s between
-//! supported datatypes.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::*;
-//! use arrow::compute::cast;
-//! use arrow::datatypes::DataType;
-//! use std::sync::Arc;
-//!
-//! let a = Int32Array::from(vec![5, 6, 7]);
-//! let array = Arc::new(a) as ArrayRef;
-//! let b = cast(&array, &DataType::Float64).unwrap();
-//! let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
-//! assert_eq!(5.0, c.value(0));
-//! assert_eq!(6.0, c.value(1));
-//! assert_eq!(7.0, c.value(2));
-//! ```
-
-use std::str;
-use std::sync::Arc;
-
-use crate::buffer::MutableBuffer;
-use crate::compute::kernels::arithmetic::{divide, multiply};
-use crate::compute::kernels::arity::unary;
-use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::{array::*, compute::take};
-use crate::{buffer::Buffer, util::serialization::lexical_to_string};
-use num::{NumCast, ToPrimitive};
-
-/// CastOptions provides a way to override the default cast behaviors
-#[derive(Debug)]
-pub struct CastOptions {
-    /// how to handle cast failures, either return NULL (safe=true) or return ERR (safe=false)
-    pub safe: bool,
-}
-
-pub const DEFAULT_CAST_OPTIONS: CastOptions = CastOptions { safe: true };
-
-/// Return true if a value of type `from_type` can be cast into a
-/// value of `to_type`. Note that such as cast may be lossy.
-///
-/// If this function returns true to stay consistent with the `cast` kernel below.
-pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
-    use self::DataType::*;
-    if from_type == to_type {
-        return true;
-    }
-
-    match (from_type, to_type) {
-        (Struct(_), _) => false,
-        (_, Struct(_)) => false,
-        (LargeList(list_from), LargeList(list_to)) => {
-            can_cast_types(list_from.data_type(), list_to.data_type())
-        }
-        (List(list_from), List(list_to)) => {
-            can_cast_types(list_from.data_type(), list_to.data_type())
-        }
-        (List(list_from), LargeList(list_to)) => {
-            list_from.data_type() == list_to.data_type()
-        }
-        (List(_), _) => false,
-        (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
-        (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
-        (Dictionary(_, from_value_type), Dictionary(_, to_value_type)) => {
-            can_cast_types(from_value_type, to_value_type)
-        }
-        (Dictionary(_, value_type), _) => can_cast_types(value_type, to_type),
-        (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type),
-
-        (_, Boolean) => DataType::is_numeric(from_type),
-        (Boolean, _) => DataType::is_numeric(to_type) || to_type == &Utf8,
-
-        (Utf8, LargeUtf8) => true,
-        (LargeUtf8, Utf8) => true,
-        (Utf8, Date32) => true,
-        (Utf8, Date64) => true,
-        (Utf8, Timestamp(TimeUnit::Nanosecond, None)) => true,
-        (Utf8, _) => DataType::is_numeric(to_type),
-        (LargeUtf8, Date32) => true,
-        (LargeUtf8, Date64) => true,
-        (LargeUtf8, Timestamp(TimeUnit::Nanosecond, None)) => true,
-        (LargeUtf8, _) => DataType::is_numeric(to_type),
-        (_, Utf8) | (_, LargeUtf8) => {
-            DataType::is_numeric(from_type) || from_type == &Binary
-        }
-
-        // start numeric casts
-        (UInt8, UInt16) => true,
-        (UInt8, UInt32) => true,
-        (UInt8, UInt64) => true,
-        (UInt8, Int8) => true,
-        (UInt8, Int16) => true,
-        (UInt8, Int32) => true,
-        (UInt8, Int64) => true,
-        (UInt8, Float32) => true,
-        (UInt8, Float64) => true,
-
-        (UInt16, UInt8) => true,
-        (UInt16, UInt32) => true,
-        (UInt16, UInt64) => true,
-        (UInt16, Int8) => true,
-        (UInt16, Int16) => true,
-        (UInt16, Int32) => true,
-        (UInt16, Int64) => true,
-        (UInt16, Float32) => true,
-        (UInt16, Float64) => true,
-
-        (UInt32, UInt8) => true,
-        (UInt32, UInt16) => true,
-        (UInt32, UInt64) => true,
-        (UInt32, Int8) => true,
-        (UInt32, Int16) => true,
-        (UInt32, Int32) => true,
-        (UInt32, Int64) => true,
-        (UInt32, Float32) => true,
-        (UInt32, Float64) => true,
-
-        (UInt64, UInt8) => true,
-        (UInt64, UInt16) => true,
-        (UInt64, UInt32) => true,
-        (UInt64, Int8) => true,
-        (UInt64, Int16) => true,
-        (UInt64, Int32) => true,
-        (UInt64, Int64) => true,
-        (UInt64, Float32) => true,
-        (UInt64, Float64) => true,
-
-        (Int8, UInt8) => true,
-        (Int8, UInt16) => true,
-        (Int8, UInt32) => true,
-        (Int8, UInt64) => true,
-        (Int8, Int16) => true,
-        (Int8, Int32) => true,
-        (Int8, Int64) => true,
-        (Int8, Float32) => true,
-        (Int8, Float64) => true,
-
-        (Int16, UInt8) => true,
-        (Int16, UInt16) => true,
-        (Int16, UInt32) => true,
-        (Int16, UInt64) => true,
-        (Int16, Int8) => true,
-        (Int16, Int32) => true,
-        (Int16, Int64) => true,
-        (Int16, Float32) => true,
-        (Int16, Float64) => true,
-
-        (Int32, UInt8) => true,
-        (Int32, UInt16) => true,
-        (Int32, UInt32) => true,
-        (Int32, UInt64) => true,
-        (Int32, Int8) => true,
-        (Int32, Int16) => true,
-        (Int32, Int64) => true,
-        (Int32, Float32) => true,
-        (Int32, Float64) => true,
-
-        (Int64, UInt8) => true,
-        (Int64, UInt16) => true,
-        (Int64, UInt32) => true,
-        (Int64, UInt64) => true,
-        (Int64, Int8) => true,
-        (Int64, Int16) => true,
-        (Int64, Int32) => true,
-        (Int64, Float32) => true,
-        (Int64, Float64) => true,
-
-        (Float32, UInt8) => true,
-        (Float32, UInt16) => true,
-        (Float32, UInt32) => true,
-        (Float32, UInt64) => true,
-        (Float32, Int8) => true,
-        (Float32, Int16) => true,
-        (Float32, Int32) => true,
-        (Float32, Int64) => true,
-        (Float32, Float64) => true,
-
-        (Float64, UInt8) => true,
-        (Float64, UInt16) => true,
-        (Float64, UInt32) => true,
-        (Float64, UInt64) => true,
-        (Float64, Int8) => true,
-        (Float64, Int16) => true,
-        (Float64, Int32) => true,
-        (Float64, Int64) => true,
-        (Float64, Float32) => true,
-        // end numeric casts
-
-        // temporal casts
-        (Int32, Date32) => true,
-        (Int32, Date64) => true,
-        (Int32, Time32(_)) => true,
-        (Date32, Int32) => true,
-        (Date32, Int64) => true,
-        (Time32(_), Int32) => true,
-        (Int64, Date64) => true,
-        (Int64, Date32) => true,
-        (Int64, Time64(_)) => true,
-        (Date64, Int64) => true,
-        (Date64, Int32) => true,
-        (Time64(_), Int64) => true,
-        (Date32, Date64) => true,
-        (Date64, Date32) => true,
-        (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => true,
-        (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => true,
-        (Time32(_), Time64(_)) => true,
-        (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => true,
-        (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => true,
-        (Time64(_), Time32(to_unit)) => {
-            matches!(to_unit, TimeUnit::Second | TimeUnit::Millisecond)
-        }
-        (Timestamp(_, _), Int64) => true,
-        (Int64, Timestamp(_, _)) => true,
-        (Timestamp(_, _), Timestamp(_, _)) => true,
-        (Timestamp(_, _), Date32) => true,
-        (Timestamp(_, _), Date64) => true,
-        // date64 to timestamp might not make sense,
-        (Int64, Duration(_)) => true,
-        (Null, Int32) => true,
-        (_, _) => false,
-    }
-}
-
-/// Cast `array` to the provided data type and return a new Array with
-/// type `to_type`, if possible.
-///
-/// Behavior:
-/// * Boolean to Utf8: `true` => '1', `false` => `0`
-/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
-///   in integer casts return null
-/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
-/// * List to List: the underlying data type is cast
-/// * Primitive to List: a list array with 1 value per slot is created
-/// * Date32 and Date64: precision lost when going to higher interval
-/// * Time32 and Time64: precision lost when going to higher interval
-/// * Timestamp and Date{32|64}: precision lost when going to higher interval
-/// * Temporal to/from backing primitive: zero-copy with data type change
-///
-/// Unsupported Casts
-/// * To or from `StructArray`
-/// * List to primitive
-/// * Utf8 to boolean
-/// * Interval and duration
-pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
-    cast_with_options(array, to_type, &DEFAULT_CAST_OPTIONS)
-}
-
-/// Cast `array` to the provided data type and return a new Array with
-/// type `to_type`, if possible. It accepts `CastOptions` to allow consumers
-/// to configure cast behavior.
-///
-/// Behavior:
-/// * Boolean to Utf8: `true` => '1', `false` => `0`
-/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
-///   in integer casts return null
-/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
-/// * List to List: the underlying data type is cast
-/// * Primitive to List: a list array with 1 value per slot is created
-/// * Date32 and Date64: precision lost when going to higher interval
-/// * Time32 and Time64: precision lost when going to higher interval
-/// * Timestamp and Date{32|64}: precision lost when going to higher interval
-/// * Temporal to/from backing primitive: zero-copy with data type change
-///
-/// Unsupported Casts
-/// * To or from `StructArray`
-/// * List to primitive
-/// * Utf8 to boolean
-/// * Interval and duration
-pub fn cast_with_options(
-    array: &ArrayRef,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use DataType::*;
-    let from_type = array.data_type();
-
-    // clone array if types are the same
-    if from_type == to_type {
-        return Ok(array.clone());
-    }
-    match (from_type, to_type) {
-        (Struct(_), _) => Err(ArrowError::CastError(
-            "Cannot cast from struct to other types".to_string(),
-        )),
-        (_, Struct(_)) => Err(ArrowError::CastError(
-            "Cannot cast to struct from other types".to_string(),
-        )),
-        (List(_), List(ref to)) => {
-            cast_list_inner::<i32>(array, to, to_type, cast_options)
-        }
-        (LargeList(_), LargeList(ref to)) => {
-            cast_list_inner::<i64>(array, to, to_type, cast_options)
-        }
-        (List(list_from), LargeList(list_to)) => {
-            if list_to.data_type() != list_from.data_type() {
-                Err(ArrowError::CastError(
-                    "cannot cast list to large-list with different child data".into(),
-                ))
-            } else {
-                cast_list_container::<i32, i64>(&**array, cast_options)
-            }
-        }
-        (LargeList(list_from), List(list_to)) => {
-            if list_to.data_type() != list_from.data_type() {
-                Err(ArrowError::CastError(
-                    "cannot cast large-list to list with different child data".into(),
-                ))
-            } else {
-                cast_list_container::<i64, i32>(&**array, cast_options)
-            }
-        }
-        (List(_), _) => Err(ArrowError::CastError(
-            "Cannot cast list to non-list data types".to_string(),
-        )),
-        (_, List(ref to)) => {
-            cast_primitive_to_list::<i32>(array, to, to_type, cast_options)
-        }
-        (_, LargeList(ref to)) => {
-            cast_primitive_to_list::<i64>(array, to, to_type, cast_options)
-        }
-        (Dictionary(index_type, _), _) => match **index_type {
-            DataType::Int8 => dictionary_cast::<Int8Type>(array, to_type, cast_options),
-            DataType::Int16 => dictionary_cast::<Int16Type>(array, to_type, cast_options),
-            DataType::Int32 => dictionary_cast::<Int32Type>(array, to_type, cast_options),
-            DataType::Int64 => dictionary_cast::<Int64Type>(array, to_type, cast_options),
-            DataType::UInt8 => dictionary_cast::<UInt8Type>(array, to_type, cast_options),
-            DataType::UInt16 => {
-                dictionary_cast::<UInt16Type>(array, to_type, cast_options)
-            }
-            DataType::UInt32 => {
-                dictionary_cast::<UInt32Type>(array, to_type, cast_options)
-            }
-            DataType::UInt64 => {
-                dictionary_cast::<UInt64Type>(array, to_type, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from dictionary type {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, Dictionary(index_type, value_type)) => match **index_type {
-            DataType::Int8 => {
-                cast_to_dictionary::<Int8Type>(array, value_type, cast_options)
-            }
-            DataType::Int16 => {
-                cast_to_dictionary::<Int16Type>(array, value_type, cast_options)
-            }
-            DataType::Int32 => {
-                cast_to_dictionary::<Int32Type>(array, value_type, cast_options)
-            }
-            DataType::Int64 => {
-                cast_to_dictionary::<Int64Type>(array, value_type, cast_options)
-            }
-            DataType::UInt8 => {
-                cast_to_dictionary::<UInt8Type>(array, value_type, cast_options)
-            }
-            DataType::UInt16 => {
-                cast_to_dictionary::<UInt16Type>(array, value_type, cast_options)
-            }
-            DataType::UInt32 => {
-                cast_to_dictionary::<UInt32Type>(array, value_type, cast_options)
-            }
-            DataType::UInt64 => {
-                cast_to_dictionary::<UInt64Type>(array, value_type, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from type {:?} to dictionary type {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, Boolean) => match from_type {
-            UInt8 => cast_numeric_to_bool::<UInt8Type>(array),
-            UInt16 => cast_numeric_to_bool::<UInt16Type>(array),
-            UInt32 => cast_numeric_to_bool::<UInt32Type>(array),
-            UInt64 => cast_numeric_to_bool::<UInt64Type>(array),
-            Int8 => cast_numeric_to_bool::<Int8Type>(array),
-            Int16 => cast_numeric_to_bool::<Int16Type>(array),
-            Int32 => cast_numeric_to_bool::<Int32Type>(array),
-            Int64 => cast_numeric_to_bool::<Int64Type>(array),
-            Float32 => cast_numeric_to_bool::<Float32Type>(array),
-            Float64 => cast_numeric_to_bool::<Float64Type>(array),
-            Utf8 => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (Boolean, _) => match to_type {
-            UInt8 => cast_bool_to_numeric::<UInt8Type>(array, cast_options),
-            UInt16 => cast_bool_to_numeric::<UInt16Type>(array, cast_options),
-            UInt32 => cast_bool_to_numeric::<UInt32Type>(array, cast_options),
-            UInt64 => cast_bool_to_numeric::<UInt64Type>(array, cast_options),
-            Int8 => cast_bool_to_numeric::<Int8Type>(array, cast_options),
-            Int16 => cast_bool_to_numeric::<Int16Type>(array, cast_options),
-            Int32 => cast_bool_to_numeric::<Int32Type>(array, cast_options),
-            Int64 => cast_bool_to_numeric::<Int64Type>(array, cast_options),
-            Float32 => cast_bool_to_numeric::<Float32Type>(array, cast_options),
-            Float64 => cast_bool_to_numeric::<Float64Type>(array, cast_options),
-            Utf8 => {
-                let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-                Ok(Arc::new(
-                    array
-                        .iter()
-                        .map(|value| value.map(|value| if value { "1" } else { "0" }))
-                        .collect::<StringArray>(),
-                ))
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (Utf8, _) => match to_type {
-            LargeUtf8 => cast_str_container::<i32, i64>(&**array),
-            UInt8 => cast_string_to_numeric::<UInt8Type, i32>(array, cast_options),
-            UInt16 => cast_string_to_numeric::<UInt16Type, i32>(array, cast_options),
-            UInt32 => cast_string_to_numeric::<UInt32Type, i32>(array, cast_options),
-            UInt64 => cast_string_to_numeric::<UInt64Type, i32>(array, cast_options),
-            Int8 => cast_string_to_numeric::<Int8Type, i32>(array, cast_options),
-            Int16 => cast_string_to_numeric::<Int16Type, i32>(array, cast_options),
-            Int32 => cast_string_to_numeric::<Int32Type, i32>(array, cast_options),
-            Int64 => cast_string_to_numeric::<Int64Type, i32>(array, cast_options),
-            Float32 => cast_string_to_numeric::<Float32Type, i32>(array, cast_options),
-            Float64 => cast_string_to_numeric::<Float64Type, i32>(array, cast_options),
-            Date32 => cast_string_to_date32::<i32>(&**array, cast_options),
-            Date64 => cast_string_to_date64::<i32>(&**array, cast_options),
-            Timestamp(TimeUnit::Nanosecond, None) => {
-                cast_string_to_timestamp_ns::<i32>(&**array, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, Utf8) => match from_type {
-            LargeUtf8 => cast_str_container::<i64, i32>(&**array),
-            UInt8 => cast_numeric_to_string::<UInt8Type, i32>(array),
-            UInt16 => cast_numeric_to_string::<UInt16Type, i32>(array),
-            UInt32 => cast_numeric_to_string::<UInt32Type, i32>(array),
-            UInt64 => cast_numeric_to_string::<UInt64Type, i32>(array),
-            Int8 => cast_numeric_to_string::<Int8Type, i32>(array),
-            Int16 => cast_numeric_to_string::<Int16Type, i32>(array),
-            Int32 => cast_numeric_to_string::<Int32Type, i32>(array),
-            Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
-            Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
-            Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
-            Binary => {
-                let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
-                Ok(Arc::new(
-                    array
-                        .iter()
-                        .map(|maybe_value| match maybe_value {
-                            Some(value) => {
-                                let result = str::from_utf8(value);
-                                if cast_options.safe {
-                                    Ok(result.ok())
-                                } else {
-                                    Some(result.map_err(|_| {
-                                        ArrowError::CastError(
-                                            "Cannot cast binary to string".to_string(),
-                                        )
-                                    }))
-                                    .transpose()
-                                }
-                            }
-                            None => Ok(None),
-                        })
-                        .collect::<Result<StringArray>>()?,
-                ))
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (_, LargeUtf8) => match from_type {
-            UInt8 => cast_numeric_to_string::<UInt8Type, i64>(array),
-            UInt16 => cast_numeric_to_string::<UInt16Type, i64>(array),
-            UInt32 => cast_numeric_to_string::<UInt32Type, i64>(array),
-            UInt64 => cast_numeric_to_string::<UInt64Type, i64>(array),
-            Int8 => cast_numeric_to_string::<Int8Type, i64>(array),
-            Int16 => cast_numeric_to_string::<Int16Type, i64>(array),
-            Int32 => cast_numeric_to_string::<Int32Type, i64>(array),
-            Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
-            Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
-            Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
-            Binary => {
-                let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
-                Ok(Arc::new(
-                    array
-                        .iter()
-                        .map(|maybe_value| match maybe_value {
-                            Some(value) => {
-                                let result = str::from_utf8(value);
-                                if cast_options.safe {
-                                    Ok(result.ok())
-                                } else {
-                                    Some(result.map_err(|_| {
-                                        ArrowError::CastError(
-                                            "Cannot cast binary to string".to_string(),
-                                        )
-                                    }))
-                                    .transpose()
-                                }
-                            }
-                            None => Ok(None),
-                        })
-                        .collect::<Result<LargeStringArray>>()?,
-                ))
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-        (LargeUtf8, _) => match to_type {
-            UInt8 => cast_string_to_numeric::<UInt8Type, i64>(array, cast_options),
-            UInt16 => cast_string_to_numeric::<UInt16Type, i64>(array, cast_options),
-            UInt32 => cast_string_to_numeric::<UInt32Type, i64>(array, cast_options),
-            UInt64 => cast_string_to_numeric::<UInt64Type, i64>(array, cast_options),
-            Int8 => cast_string_to_numeric::<Int8Type, i64>(array, cast_options),
-            Int16 => cast_string_to_numeric::<Int16Type, i64>(array, cast_options),
-            Int32 => cast_string_to_numeric::<Int32Type, i64>(array, cast_options),
-            Int64 => cast_string_to_numeric::<Int64Type, i64>(array, cast_options),
-            Float32 => cast_string_to_numeric::<Float32Type, i64>(array, cast_options),
-            Float64 => cast_string_to_numeric::<Float64Type, i64>(array, cast_options),
-            Date32 => cast_string_to_date32::<i64>(&**array, cast_options),
-            Date64 => cast_string_to_date64::<i64>(&**array, cast_options),
-            Timestamp(TimeUnit::Nanosecond, None) => {
-                cast_string_to_timestamp_ns::<i64>(&**array, cast_options)
-            }
-            _ => Err(ArrowError::CastError(format!(
-                "Casting from {:?} to {:?} not supported",
-                from_type, to_type,
-            ))),
-        },
-
-        // start numeric casts
-        (UInt8, UInt16) => cast_numeric_arrays::<UInt8Type, UInt16Type>(array),
-        (UInt8, UInt32) => cast_numeric_arrays::<UInt8Type, UInt32Type>(array),
-        (UInt8, UInt64) => cast_numeric_arrays::<UInt8Type, UInt64Type>(array),
-        (UInt8, Int8) => cast_numeric_arrays::<UInt8Type, Int8Type>(array),
-        (UInt8, Int16) => cast_numeric_arrays::<UInt8Type, Int16Type>(array),
-        (UInt8, Int32) => cast_numeric_arrays::<UInt8Type, Int32Type>(array),
-        (UInt8, Int64) => cast_numeric_arrays::<UInt8Type, Int64Type>(array),
-        (UInt8, Float32) => cast_numeric_arrays::<UInt8Type, Float32Type>(array),
-        (UInt8, Float64) => cast_numeric_arrays::<UInt8Type, Float64Type>(array),
-
-        (UInt16, UInt8) => cast_numeric_arrays::<UInt16Type, UInt8Type>(array),
-        (UInt16, UInt32) => cast_numeric_arrays::<UInt16Type, UInt32Type>(array),
-        (UInt16, UInt64) => cast_numeric_arrays::<UInt16Type, UInt64Type>(array),
-        (UInt16, Int8) => cast_numeric_arrays::<UInt16Type, Int8Type>(array),
-        (UInt16, Int16) => cast_numeric_arrays::<UInt16Type, Int16Type>(array),
-        (UInt16, Int32) => cast_numeric_arrays::<UInt16Type, Int32Type>(array),
-        (UInt16, Int64) => cast_numeric_arrays::<UInt16Type, Int64Type>(array),
-        (UInt16, Float32) => cast_numeric_arrays::<UInt16Type, Float32Type>(array),
-        (UInt16, Float64) => cast_numeric_arrays::<UInt16Type, Float64Type>(array),
-
-        (UInt32, UInt8) => cast_numeric_arrays::<UInt32Type, UInt8Type>(array),
-        (UInt32, UInt16) => cast_numeric_arrays::<UInt32Type, UInt16Type>(array),
-        (UInt32, UInt64) => cast_numeric_arrays::<UInt32Type, UInt64Type>(array),
-        (UInt32, Int8) => cast_numeric_arrays::<UInt32Type, Int8Type>(array),
-        (UInt32, Int16) => cast_numeric_arrays::<UInt32Type, Int16Type>(array),
-        (UInt32, Int32) => cast_numeric_arrays::<UInt32Type, Int32Type>(array),
-        (UInt32, Int64) => cast_numeric_arrays::<UInt32Type, Int64Type>(array),
-        (UInt32, Float32) => cast_numeric_arrays::<UInt32Type, Float32Type>(array),
-        (UInt32, Float64) => cast_numeric_arrays::<UInt32Type, Float64Type>(array),
-
-        (UInt64, UInt8) => cast_numeric_arrays::<UInt64Type, UInt8Type>(array),
-        (UInt64, UInt16) => cast_numeric_arrays::<UInt64Type, UInt16Type>(array),
-        (UInt64, UInt32) => cast_numeric_arrays::<UInt64Type, UInt32Type>(array),
-        (UInt64, Int8) => cast_numeric_arrays::<UInt64Type, Int8Type>(array),
-        (UInt64, Int16) => cast_numeric_arrays::<UInt64Type, Int16Type>(array),
-        (UInt64, Int32) => cast_numeric_arrays::<UInt64Type, Int32Type>(array),
-        (UInt64, Int64) => cast_numeric_arrays::<UInt64Type, Int64Type>(array),
-        (UInt64, Float32) => cast_numeric_arrays::<UInt64Type, Float32Type>(array),
-        (UInt64, Float64) => cast_numeric_arrays::<UInt64Type, Float64Type>(array),
-
-        (Int8, UInt8) => cast_numeric_arrays::<Int8Type, UInt8Type>(array),
-        (Int8, UInt16) => cast_numeric_arrays::<Int8Type, UInt16Type>(array),
-        (Int8, UInt32) => cast_numeric_arrays::<Int8Type, UInt32Type>(array),
-        (Int8, UInt64) => cast_numeric_arrays::<Int8Type, UInt64Type>(array),
-        (Int8, Int16) => cast_numeric_arrays::<Int8Type, Int16Type>(array),
-        (Int8, Int32) => cast_numeric_arrays::<Int8Type, Int32Type>(array),
-        (Int8, Int64) => cast_numeric_arrays::<Int8Type, Int64Type>(array),
-        (Int8, Float32) => cast_numeric_arrays::<Int8Type, Float32Type>(array),
-        (Int8, Float64) => cast_numeric_arrays::<Int8Type, Float64Type>(array),
-
-        (Int16, UInt8) => cast_numeric_arrays::<Int16Type, UInt8Type>(array),
-        (Int16, UInt16) => cast_numeric_arrays::<Int16Type, UInt16Type>(array),
-        (Int16, UInt32) => cast_numeric_arrays::<Int16Type, UInt32Type>(array),
-        (Int16, UInt64) => cast_numeric_arrays::<Int16Type, UInt64Type>(array),
-        (Int16, Int8) => cast_numeric_arrays::<Int16Type, Int8Type>(array),
-        (Int16, Int32) => cast_numeric_arrays::<Int16Type, Int32Type>(array),
-        (Int16, Int64) => cast_numeric_arrays::<Int16Type, Int64Type>(array),
-        (Int16, Float32) => cast_numeric_arrays::<Int16Type, Float32Type>(array),
-        (Int16, Float64) => cast_numeric_arrays::<Int16Type, Float64Type>(array),
-
-        (Int32, UInt8) => cast_numeric_arrays::<Int32Type, UInt8Type>(array),
-        (Int32, UInt16) => cast_numeric_arrays::<Int32Type, UInt16Type>(array),
-        (Int32, UInt32) => cast_numeric_arrays::<Int32Type, UInt32Type>(array),
-        (Int32, UInt64) => cast_numeric_arrays::<Int32Type, UInt64Type>(array),
-        (Int32, Int8) => cast_numeric_arrays::<Int32Type, Int8Type>(array),
-        (Int32, Int16) => cast_numeric_arrays::<Int32Type, Int16Type>(array),
-        (Int32, Int64) => cast_numeric_arrays::<Int32Type, Int64Type>(array),
-        (Int32, Float32) => cast_numeric_arrays::<Int32Type, Float32Type>(array),
-        (Int32, Float64) => cast_numeric_arrays::<Int32Type, Float64Type>(array),
-
-        (Int64, UInt8) => cast_numeric_arrays::<Int64Type, UInt8Type>(array),
-        (Int64, UInt16) => cast_numeric_arrays::<Int64Type, UInt16Type>(array),
-        (Int64, UInt32) => cast_numeric_arrays::<Int64Type, UInt32Type>(array),
-        (Int64, UInt64) => cast_numeric_arrays::<Int64Type, UInt64Type>(array),
-        (Int64, Int8) => cast_numeric_arrays::<Int64Type, Int8Type>(array),
-        (Int64, Int16) => cast_numeric_arrays::<Int64Type, Int16Type>(array),
-        (Int64, Int32) => cast_numeric_arrays::<Int64Type, Int32Type>(array),
-        (Int64, Float32) => cast_numeric_arrays::<Int64Type, Float32Type>(array),
-        (Int64, Float64) => cast_numeric_arrays::<Int64Type, Float64Type>(array),
-
-        (Float32, UInt8) => cast_numeric_arrays::<Float32Type, UInt8Type>(array),
-        (Float32, UInt16) => cast_numeric_arrays::<Float32Type, UInt16Type>(array),
-        (Float32, UInt32) => cast_numeric_arrays::<Float32Type, UInt32Type>(array),
-        (Float32, UInt64) => cast_numeric_arrays::<Float32Type, UInt64Type>(array),
-        (Float32, Int8) => cast_numeric_arrays::<Float32Type, Int8Type>(array),
-        (Float32, Int16) => cast_numeric_arrays::<Float32Type, Int16Type>(array),
-        (Float32, Int32) => cast_numeric_arrays::<Float32Type, Int32Type>(array),
-        (Float32, Int64) => cast_numeric_arrays::<Float32Type, Int64Type>(array),
-        (Float32, Float64) => cast_numeric_arrays::<Float32Type, Float64Type>(array),
-
-        (Float64, UInt8) => cast_numeric_arrays::<Float64Type, UInt8Type>(array),
-        (Float64, UInt16) => cast_numeric_arrays::<Float64Type, UInt16Type>(array),
-        (Float64, UInt32) => cast_numeric_arrays::<Float64Type, UInt32Type>(array),
-        (Float64, UInt64) => cast_numeric_arrays::<Float64Type, UInt64Type>(array),
-        (Float64, Int8) => cast_numeric_arrays::<Float64Type, Int8Type>(array),
-        (Float64, Int16) => cast_numeric_arrays::<Float64Type, Int16Type>(array),
-        (Float64, Int32) => cast_numeric_arrays::<Float64Type, Int32Type>(array),
-        (Float64, Int64) => cast_numeric_arrays::<Float64Type, Int64Type>(array),
-        (Float64, Float32) => cast_numeric_arrays::<Float64Type, Float32Type>(array),
-        // end numeric casts
-
-        // temporal casts
-        (Int32, Date32) => cast_array_data::<Date32Type>(array, to_type.clone()),
-        (Int32, Date64) => cast_with_options(
-            &cast_with_options(array, &DataType::Date32, &cast_options)?,
-            &DataType::Date64,
-            &cast_options,
-        ),
-        (Int32, Time32(TimeUnit::Second)) => {
-            cast_array_data::<Time32SecondType>(array, to_type.clone())
-        }
-        (Int32, Time32(TimeUnit::Millisecond)) => {
-            cast_array_data::<Time32MillisecondType>(array, to_type.clone())
-        }
-        // No support for microsecond/nanosecond with i32
-        (Date32, Int32) => cast_array_data::<Int32Type>(array, to_type.clone()),
-        (Date32, Int64) => cast_with_options(
-            &cast_with_options(array, &DataType::Int32, cast_options)?,
-            &DataType::Int64,
-            &cast_options,
-        ),
-        (Time32(_), Int32) => cast_array_data::<Int32Type>(array, to_type.clone()),
-        (Int64, Date64) => cast_array_data::<Date64Type>(array, to_type.clone()),
-        (Int64, Date32) => cast_with_options(
-            &cast_with_options(array, &DataType::Int32, &cast_options)?,
-            &DataType::Date32,
-            &cast_options,
-        ),
-        // No support for second/milliseconds with i64
-        (Int64, Time64(TimeUnit::Microsecond)) => {
-            cast_array_data::<Time64MicrosecondType>(array, to_type.clone())
-        }
-        (Int64, Time64(TimeUnit::Nanosecond)) => {
-            cast_array_data::<Time64NanosecondType>(array, to_type.clone())
-        }
-
-        (Date64, Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
-        (Date64, Int32) => cast_with_options(
-            &cast_with_options(array, &DataType::Int64, &cast_options)?,
-            &DataType::Int32,
-            &cast_options,
-        ),
-        (Time64(_), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
-        (Date32, Date64) => {
-            let date_array = array.as_any().downcast_ref::<Date32Array>().unwrap();
-
-            let values =
-                unary::<_, _, Date64Type>(date_array, |x| x as i64 * MILLISECONDS_IN_DAY);
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Date64, Date32) => {
-            let date_array = array.as_any().downcast_ref::<Date64Array>().unwrap();
-
-            let values = unary::<_, _, Date32Type>(date_array, |x| {
-                (x / MILLISECONDS_IN_DAY) as i32
-            });
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
-            let time_array = array.as_any().downcast_ref::<Time32SecondArray>().unwrap();
-
-            let values = unary::<_, _, Time32MillisecondType>(time_array, |x| {
-                x * MILLISECONDS as i32
-            });
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => {
-            let time_array = array
-                .as_any()
-                .downcast_ref::<Time32MillisecondArray>()
-                .unwrap();
-
-            let values = unary::<_, _, Time32SecondType>(time_array, |x| {
-                x / (MILLISECONDS as i32)
-            });
-
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        //(Time32(TimeUnit::Second), Time64(_)) => {},
-        (Time32(from_unit), Time64(to_unit)) => {
-            let time_array = Int32Array::from(array.data().clone());
-            // note: (numeric_cast + SIMD multiply) is faster than (cast & multiply)
-            let c: Int64Array = numeric_cast(&time_array);
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = time_unit_multiple(&to_unit);
-            // from is only smaller than to if 64milli/64second don't exist
-            let mult = Int64Array::from(vec![to_size / from_size; array.len()]);
-            let converted = multiply(&c, &mult)?;
-            let array_ref = Arc::new(converted) as ArrayRef;
-            use TimeUnit::*;
-            match to_unit {
-                Microsecond => cast_array_data::<TimestampMicrosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                Nanosecond => cast_array_data::<TimestampNanosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                _ => unreachable!("array type not supported"),
-            }
-        }
-        (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => {
-            let time_array = array
-                .as_any()
-                .downcast_ref::<Time64MicrosecondArray>()
-                .unwrap();
-
-            let values =
-                unary::<_, _, Time64NanosecondType>(time_array, |x| x * MILLISECONDS);
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => {
-            let time_array = array
-                .as_any()
-                .downcast_ref::<Time64NanosecondArray>()
-                .unwrap();
-
-            let values =
-                unary::<_, _, Time64MicrosecondType>(time_array, |x| x / MILLISECONDS);
-            Ok(Arc::new(values) as ArrayRef)
-        }
-        (Time64(from_unit), Time32(to_unit)) => {
-            let time_array = Int64Array::from(array.data().clone());
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = time_unit_multiple(&to_unit);
-            let divisor = from_size / to_size;
-            match to_unit {
-                TimeUnit::Second => {
-                    let values = unary::<_, _, Time32SecondType>(&time_array, |x| {
-                        (x as i64 / divisor) as i32
-                    });
-                    Ok(Arc::new(values) as ArrayRef)
-                }
-                TimeUnit::Millisecond => {
-                    let values = unary::<_, _, Time32MillisecondType>(&time_array, |x| {
-                        (x as i64 / divisor) as i32
-                    });
-                    Ok(Arc::new(values) as ArrayRef)
-                }
-                _ => unreachable!("array type not supported"),
-            }
-        }
-        (Timestamp(_, _), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
-        (Int64, Timestamp(to_unit, _)) => {
-            use TimeUnit::*;
-            match to_unit {
-                Second => cast_array_data::<TimestampSecondType>(array, to_type.clone()),
-                Millisecond => {
-                    cast_array_data::<TimestampMillisecondType>(array, to_type.clone())
-                }
-                Microsecond => {
-                    cast_array_data::<TimestampMicrosecondType>(array, to_type.clone())
-                }
-                Nanosecond => {
-                    cast_array_data::<TimestampNanosecondType>(array, to_type.clone())
-                }
-            }
-        }
-        (Timestamp(from_unit, _), Timestamp(to_unit, _)) => {
-            let time_array = Int64Array::from(array.data().clone());
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = time_unit_multiple(&to_unit);
-            // we either divide or multiply, depending on size of each unit
-            // units are never the same when the types are the same
-            let converted = if from_size >= to_size {
-                divide(
-                    &time_array,
-                    &Int64Array::from(vec![from_size / to_size; array.len()]),
-                )?
-            } else {
-                multiply(
-                    &time_array,
-                    &Int64Array::from(vec![to_size / from_size; array.len()]),
-                )?
-            };
-            let array_ref = Arc::new(converted) as ArrayRef;
-            use TimeUnit::*;
-            match to_unit {
-                Second => {
-                    cast_array_data::<TimestampSecondType>(&array_ref, to_type.clone())
-                }
-                Millisecond => cast_array_data::<TimestampMillisecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                Microsecond => cast_array_data::<TimestampMicrosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-                Nanosecond => cast_array_data::<TimestampNanosecondType>(
-                    &array_ref,
-                    to_type.clone(),
-                ),
-            }
-        }
-        (Timestamp(from_unit, _), Date32) => {
-            let time_array = Int64Array::from(array.data().clone());
-            let from_size = time_unit_multiple(&from_unit) * SECONDS_IN_DAY;
-            let mut b = Date32Builder::new(array.len());
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    b.append_value((time_array.value(i) / from_size) as i32)?;
-                }
-            }
-
-            Ok(Arc::new(b.finish()) as ArrayRef)
-        }
-        (Timestamp(from_unit, _), Date64) => {
-            let from_size = time_unit_multiple(&from_unit);
-            let to_size = MILLISECONDS;
-
-            // Scale time_array by (to_size / from_size) using a
-            // single integer operation, but need to avoid integer
-            // math rounding down to zero
-
-            match to_size.cmp(&from_size) {
-                std::cmp::Ordering::Less => {
-                    let time_array = Date64Array::from(array.data().clone());
-                    Ok(Arc::new(divide(
-                        &time_array,
-                        &Date64Array::from(vec![from_size / to_size; array.len()]),
-                    )?) as ArrayRef)
-                }
-                std::cmp::Ordering::Equal => {
-                    cast_array_data::<Date64Type>(array, to_type.clone())
-                }
-                std::cmp::Ordering::Greater => {
-                    let time_array = Date64Array::from(array.data().clone());
-                    Ok(Arc::new(multiply(
-                        &time_array,
-                        &Date64Array::from(vec![to_size / from_size; array.len()]),
-                    )?) as ArrayRef)
-                }
-            }
-        }
-        // date64 to timestamp might not make sense,
-        (Int64, Duration(to_unit)) => {
-            use TimeUnit::*;
-            match to_unit {
-                Second => cast_array_data::<DurationSecondType>(array, to_type.clone()),
-                Millisecond => {
-                    cast_array_data::<DurationMillisecondType>(array, to_type.clone())
-                }
-                Microsecond => {
-                    cast_array_data::<DurationMicrosecondType>(array, to_type.clone())
-                }
-                Nanosecond => {
-                    cast_array_data::<DurationNanosecondType>(array, to_type.clone())
-                }
-            }
-        }
-
-        // null to primitive/flat types
-        (Null, Int32) => Ok(Arc::new(Int32Array::from(vec![None; array.len()]))),
-
-        (_, _) => Err(ArrowError::CastError(format!(
-            "Casting from {:?} to {:?} not supported",
-            from_type, to_type,
-        ))),
-    }
-}
-
-/// Get the time unit as a multiple of a second
-const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
-    match unit {
-        TimeUnit::Second => 1,
-        TimeUnit::Millisecond => MILLISECONDS,
-        TimeUnit::Microsecond => MICROSECONDS,
-        TimeUnit::Nanosecond => NANOSECONDS,
-    }
-}
-
-/// Number of seconds in a day
-const SECONDS_IN_DAY: i64 = 86_400;
-/// Number of milliseconds in a second
-const MILLISECONDS: i64 = 1_000;
-/// Number of microseconds in a second
-const MICROSECONDS: i64 = 1_000_000;
-/// Number of nanoseconds in a second
-const NANOSECONDS: i64 = 1_000_000_000;
-/// Number of milliseconds in a day
-const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
-/// Number of days between 0001-01-01 and 1970-01-01
-const EPOCH_DAYS_FROM_CE: i32 = 719_163;
-
-/// Cast an array by changing its array_data type to the desired type
-///
-/// Arrays should have the same primitive data type, otherwise this should fail.
-/// We do not perform this check on primitive data types as we only use this
-/// function internally, where it is guaranteed to be infallible.
-#[allow(clippy::unnecessary_wraps)]
-fn cast_array_data<TO>(array: &ArrayRef, to_type: DataType) -> Result<ArrayRef>
-where
-    TO: ArrowNumericType,
-{
-    let data = ArrayData::new(
-        to_type,
-        array.len(),
-        Some(array.null_count()),
-        array.data().null_bitmap().clone().map(|bitmap| bitmap.bits),
-        array.data().offset(),
-        array.data().buffers().to_vec(),
-        vec![],
-    );
-    Ok(Arc::new(PrimitiveArray::<TO>::from(data)) as ArrayRef)
-}
-
-/// Convert Array into a PrimitiveArray of type, and apply numeric cast
-#[allow(clippy::unnecessary_wraps)]
-fn cast_numeric_arrays<FROM, TO>(from: &ArrayRef) -> Result<ArrayRef>
-where
-    FROM: ArrowNumericType,
-    TO: ArrowNumericType,
-    FROM::Native: num::NumCast,
-    TO::Native: num::NumCast,
-{
-    Ok(Arc::new(numeric_cast::<FROM, TO>(
-        from.as_any()
-            .downcast_ref::<PrimitiveArray<FROM>>()
-            .unwrap(),
-    )))
-}
-
-/// Natural cast between numeric types
-fn numeric_cast<T, R>(from: &PrimitiveArray<T>) -> PrimitiveArray<R>
-where
-    T: ArrowNumericType,
-    R: ArrowNumericType,
-    T::Native: num::NumCast,
-    R::Native: num::NumCast,
-{
-    let iter = from
-        .iter()
-        .map(|v| v.and_then(num::cast::cast::<T::Native, R::Native>));
-    // Soundness:
-    //  The iterator is trustedLen because it comes from an `PrimitiveArray`.
-    unsafe { PrimitiveArray::<R>::from_trusted_len_iter(iter) }
-}
-
-/// Cast numeric types to Utf8
-#[allow(clippy::unnecessary_wraps)]
-fn cast_numeric_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
-where
-    FROM: ArrowNumericType,
-    FROM::Native: lexical_core::ToLexical,
-    OffsetSize: StringOffsetSizeTrait,
-{
-    Ok(Arc::new(numeric_to_string_cast::<FROM, OffsetSize>(
-        array
-            .as_any()
-            .downcast_ref::<PrimitiveArray<FROM>>()
-            .unwrap(),
-    )))
-}
-
-fn numeric_to_string_cast<T, OffsetSize>(
-    from: &PrimitiveArray<T>,
-) -> GenericStringArray<OffsetSize>
-where
-    T: ArrowPrimitiveType + ArrowNumericType,
-    T::Native: lexical_core::ToLexical,
-    OffsetSize: StringOffsetSizeTrait,
-{
-    from.iter()
-        .map(|maybe_value| maybe_value.map(lexical_to_string))
-        .collect()
-}
-
-/// Cast numeric types to Utf8
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_numeric<T, Offset: StringOffsetSizeTrait>(
-    from: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    T: ArrowNumericType,
-    <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
-{
-    Ok(Arc::new(string_to_numeric_cast::<T, Offset>(
-        from.as_any()
-            .downcast_ref::<GenericStringArray<Offset>>()
-            .unwrap(),
-        cast_options,
-    )?))
-}
-
-fn string_to_numeric_cast<T, Offset: StringOffsetSizeTrait>(
-    from: &GenericStringArray<Offset>,
-    cast_options: &CastOptions,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
-{
-    if cast_options.safe {
-        let iter = (0..from.len()).map(|i| {
-            if from.is_null(i) {
-                None
-            } else {
-                lexical_core::parse(from.value(i).as_bytes()).ok()
-            }
-        });
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        Ok(unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) })
-    } else {
-        let vec = (0..from.len())
-            .map(|i| {
-                if from.is_null(i) {
-                    Ok(None)
-                } else {
-                    let string = from.value(i);
-                    let result = lexical_core::parse(string.as_bytes());
-                    Some(result.map_err(|_| {
-                        ArrowError::CastError(format!(
-                            "Cannot cast string '{}' to value of {} type",
-                            string,
-                            std::any::type_name::<T>()
-                        ))
-                    }))
-                    .transpose()
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        Ok(unsafe { PrimitiveArray::<T>::from_trusted_len_iter(vec.iter()) })
-    }
-}
-
-/// Casts generic string arrays to Date32Array
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_date32<Offset: StringOffsetSizeTrait>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use chrono::Datelike;
-    let string_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    let array = if cast_options.safe {
-        let iter = (0..string_array.len()).map(|i| {
-            if string_array.is_null(i) {
-                None
-            } else {
-                string_array
-                    .value(i)
-                    .parse::<chrono::NaiveDate>()
-                    .map(|date| date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
-                    .ok()
-            }
-        });
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date32Array::from_trusted_len_iter(iter) }
-    } else {
-        let vec = (0..string_array.len())
-            .map(|i| {
-                if string_array.is_null(i) {
-                    Ok(None)
-                } else {
-                    let string = string_array
-                        .value(i);
-
-                    let result = string
-                        .parse::<chrono::NaiveDate>()
-                        .map(|date| date.num_days_from_ce() - EPOCH_DAYS_FROM_CE);
-
-                    Some(result.map_err(|_| {
-                        ArrowError::CastError(
-                            format!("Cannot cast string '{}' to value of arrow::datatypes::types::Date32Type type", string),
-                        )
-                    }))
-                        .transpose()
-                }
-            })
-            .collect::<Result<Vec<Option<i32>>>>()?;
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date32Array::from_trusted_len_iter(vec.iter()) }
-    };
-
-    Ok(Arc::new(array) as ArrayRef)
-}
-
-/// Casts generic string arrays to Date64Array
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_date64<Offset: StringOffsetSizeTrait>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    let string_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    let array = if cast_options.safe {
-        let iter = (0..string_array.len()).map(|i| {
-            if string_array.is_null(i) {
-                None
-            } else {
-                string_array
-                    .value(i)
-                    .parse::<chrono::NaiveDateTime>()
-                    .map(|datetime| datetime.timestamp_millis())
-                    .ok()
-            }
-        });
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date64Array::from_trusted_len_iter(iter) }
-    } else {
-        let vec = (0..string_array.len())
-            .map(|i| {
-                if string_array.is_null(i) {
-                    Ok(None)
-                } else {
-                let string = string_array
-                        .value(i);
-
-                    let result = string
-                        .parse::<chrono::NaiveDateTime>()
-                        .map(|datetime| datetime.timestamp_millis());
-
-                    Some(result.map_err(|_| {
-                        ArrowError::CastError(
-                            format!("Cannot cast string '{}' to value of arrow::datatypes::types::Date64Type type", string),
-                        )
-                    }))
-                        .transpose()
-                }
-            })
-            .collect::<Result<Vec<Option<i64>>>>()?;
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { Date64Array::from_trusted_len_iter(vec.iter()) }
-    };
-
-    Ok(Arc::new(array) as ArrayRef)
-}
-
-/// Casts generic string arrays to TimeStampNanosecondArray
-#[allow(clippy::unnecessary_wraps)]
-fn cast_string_to_timestamp_ns<Offset: StringOffsetSizeTrait>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    let string_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<Offset>>()
-        .unwrap();
-
-    let array = if cast_options.safe {
-        let iter = (0..string_array.len()).map(|i| {
-            if string_array.is_null(i) {
-                None
-            } else {
-                string_to_timestamp_nanos(string_array.value(i)).ok()
-            }
-        });
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { TimestampNanosecondArray::from_trusted_len_iter(iter) }
-    } else {
-        let vec = (0..string_array.len())
-            .map(|i| {
-                if string_array.is_null(i) {
-                    Ok(None)
-                } else {
-                    let result = string_to_timestamp_nanos(string_array.value(i));
-                    Some(result).transpose()
-                }
-            })
-            .collect::<Result<Vec<Option<i64>>>>()?;
-
-        // Benefit:
-        //     20% performance improvement
-        // Soundness:
-        //     The iterator is trustedLen because it comes from an `StringArray`.
-        unsafe { TimestampNanosecondArray::from_trusted_len_iter(vec.iter()) }
-    };
-
-    Ok(Arc::new(array) as ArrayRef)
-}
-
-/// Cast numeric types to Boolean
-///
-/// Any zero value returns `false` while non-zero returns `true`
-fn cast_numeric_to_bool<FROM>(from: &ArrayRef) -> Result<ArrayRef>
-where
-    FROM: ArrowNumericType,
-{
-    numeric_to_bool_cast::<FROM>(
-        from.as_any()
-            .downcast_ref::<PrimitiveArray<FROM>>()
-            .unwrap(),
-    )
-    .map(|to| Arc::new(to) as ArrayRef)
-}
-
-fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowPrimitiveType + ArrowNumericType,
-{
-    let mut b = BooleanBuilder::new(from.len());
-
-    for i in 0..from.len() {
-        if from.is_null(i) {
-            b.append_null()?;
-        } else if from.value(i) != T::default_value() {
-            b.append_value(true)?;
-        } else {
-            b.append_value(false)?;
-        }
-    }
-
-    Ok(b.finish())
-}
-
-/// Cast Boolean types to numeric
-///
-/// `false` returns 0 while `true` returns 1
-#[allow(clippy::unnecessary_wraps)]
-fn cast_bool_to_numeric<TO>(
-    from: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    TO: ArrowNumericType,
-    TO::Native: num::cast::NumCast,
-{
-    Ok(Arc::new(bool_to_numeric_cast::<TO>(
-        from.as_any().downcast_ref::<BooleanArray>().unwrap(),
-        cast_options,
-    )))
-}
-
-fn bool_to_numeric_cast<T>(
-    from: &BooleanArray,
-    _cast_options: &CastOptions,
-) -> PrimitiveArray<T>
-where
-    T: ArrowNumericType,
-    T::Native: num::NumCast,
-{
-    let iter = (0..from.len()).map(|i| {
-        if from.is_null(i) {
-            None
-        } else if from.value(i) {
-            // a workaround to cast a primitive to T::Native, infallible
-            num::cast::cast(1)
-        } else {
-            Some(T::default_value())
-        }
-    });
-    // Benefit:
-    //     20% performance improvement
-    // Soundness:
-    //     The iterator is trustedLen because it comes from a Range
-    unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) }
-}
-
-/// Attempts to cast an `ArrayDictionary` with index type K into
-/// `to_type` for supported types.
-///
-/// K is the key type
-fn dictionary_cast<K: ArrowDictionaryKeyType>(
-    array: &ArrayRef,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use DataType::*;
-
-    match to_type {
-        Dictionary(to_index_type, to_value_type) => {
-            let dict_array = array
-                .as_any()
-                .downcast_ref::<DictionaryArray<K>>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
-                    )
-                })?;
-
-            let keys_array: ArrayRef = Arc::new(dict_array.keys_array());
-            let values_array: ArrayRef = dict_array.values();
-            let cast_keys = cast_with_options(&keys_array, to_index_type, &cast_options)?;
-            let cast_values =
-                cast_with_options(&values_array, to_value_type, &cast_options)?;
-
-            // Failure to cast keys (because they don't fit in the
-            // target type) results in NULL values;
-            if cast_keys.null_count() > keys_array.null_count() {
-                return Err(ArrowError::ComputeError(format!(
-                    "Could not convert {} dictionary indexes from {:?} to {:?}",
-                    cast_keys.null_count() - keys_array.null_count(),
-                    keys_array.data_type(),
-                    to_index_type
-                )));
-            }
-
-            // keys are data, child_data is values (dictionary)
-            let data = ArrayData::new(
-                to_type.clone(),
-                cast_keys.len(),
-                Some(cast_keys.null_count()),
-                cast_keys
-                    .data()
-                    .null_bitmap()
-                    .clone()
-                    .map(|bitmap| bitmap.bits),
-                cast_keys.data().offset(),
-                cast_keys.data().buffers().to_vec(),
-                vec![cast_values.data().clone()],
-            );
-
-            // create the appropriate array type
-            let new_array: ArrayRef = match **to_index_type {
-                Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
-                Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
-                Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
-                Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
-                UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
-                UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
-                UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
-                UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
-                _ => {
-                    return Err(ArrowError::CastError(format!(
-                        "Unsupported type {:?} for dictionary index",
-                        to_index_type
-                    )))
-                }
-            };
-
-            Ok(new_array)
-        }
-        _ => unpack_dictionary::<K>(array, to_type, cast_options),
-    }
-}
-
-// Unpack a dictionary where the keys are of type <K> into a flattened array of type to_type
-fn unpack_dictionary<K>(
-    array: &ArrayRef,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let dict_array = array
-        .as_any()
-        .downcast_ref::<DictionaryArray<K>>()
-        .ok_or_else(|| {
-            ArrowError::ComputeError(
-                "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
-            )
-        })?;
-
-    // attempt to cast the dict values to the target type
-    // use the take kernel to expand out the dictionary
-    let cast_dict_values =
-        cast_with_options(&dict_array.values(), to_type, cast_options)?;
-
-    // Note take requires first casting the indices to u32
-    let keys_array: ArrayRef = Arc::new(dict_array.keys_array());
-    let indicies = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?;
-    let u32_indicies =
-        indicies
-            .as_any()
-            .downcast_ref::<UInt32Array>()
-            .ok_or_else(|| {
-                ArrowError::ComputeError(
-                    "Internal Error: Cannot cast dict indices to UInt32".to_string(),
-                )
-            })?;
-
-    take(cast_dict_values.as_ref(), u32_indicies, None)
-}
-
-/// Attempts to encode an array into an `ArrayDictionary` with index
-/// type K and value (dictionary) type value_type
-///
-/// K is the key type
-fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
-    array: &ArrayRef,
-    dict_value_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    use DataType::*;
-
-    match *dict_value_type {
-        Int8 => pack_numeric_to_dictionary::<K, Int8Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int16 => pack_numeric_to_dictionary::<K, Int16Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int32 => pack_numeric_to_dictionary::<K, Int32Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int64 => pack_numeric_to_dictionary::<K, Int64Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Utf8 => pack_string_to_dictionary::<K>(array, cast_options),
-        _ => Err(ArrowError::CastError(format!(
-            "Unsupported output type for dictionary packing: {:?}",
-            dict_value_type
-        ))),
-    }
-}
-
-// Packs the data from the primitive array of type <V> to a
-// DictionaryArray with keys of type K and values of value_type V
-fn pack_numeric_to_dictionary<K, V>(
-    array: &ArrayRef,
-    dict_value_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    K: ArrowDictionaryKeyType,
-    V: ArrowNumericType,
-{
-    // attempt to cast the source array values to the target value type (the dictionary values type)
-    let cast_values = cast_with_options(array, &dict_value_type, cast_options)?;
-    let values = cast_values
-        .as_any()
-        .downcast_ref::<PrimitiveArray<V>>()
-        .unwrap();
-
-    let keys_builder = PrimitiveBuilder::<K>::new(values.len());
-    let values_builder = PrimitiveBuilder::<V>::new(values.len());
-    let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null()?;
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
-// Packs the data as a StringDictionaryArray, if possible, with the
-// key types of K
-fn pack_string_to_dictionary<K>(
-    array: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?;
-    let values = cast_values.as_any().downcast_ref::<StringArray>().unwrap();
-
-    let keys_builder = PrimitiveBuilder::<K>::new(values.len());
-    let values_builder = StringBuilder::new(values.len());
-    let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null()?;
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
-/// Helper function that takes a primitive array and casts to a (generic) list array.
-fn cast_primitive_to_list<OffsetSize: OffsetSizeTrait + NumCast>(
-    array: &ArrayRef,
-    to: &Field,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    // cast primitive to list's primitive
-    let cast_array = cast_with_options(array, to.data_type(), cast_options)?;
-    // create offsets, where if array.len() = 2, we have [0,1,2]
-    // Safety:
-    // Length of range can be trusted.
-    // Note: could not yet create a generic range in stable Rust.
-    let offsets = unsafe {
-        MutableBuffer::from_trusted_len_iter(
-            (0..=array.len()).map(|i| OffsetSize::from(i).expect("integer")),
-        )
-    };
-
-    let list_data = ArrayData::new(
-        to_type.clone(),
-        array.len(),
-        Some(cast_array.null_count()),
-        cast_array
-            .data()
-            .null_bitmap()
-            .clone()
-            .map(|bitmap| bitmap.bits),
-        0,
-        vec![offsets.into()],
-        vec![cast_array.data().clone()],
-    );
-    let list_array =
-        Arc::new(GenericListArray::<OffsetSize>::from(list_data)) as ArrayRef;
-
-    Ok(list_array)
-}
-
-/// Helper function that takes an Generic list container and casts the inner datatype.
-fn cast_list_inner<OffsetSize: OffsetSizeTrait>(
-    array: &Arc<dyn Array>,
-    to: &Field,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef> {
-    let data = array.data_ref();
-    let underlying_array = make_array(data.child_data()[0].clone());
-    let cast_array = cast_with_options(&underlying_array, to.data_type(), cast_options)?;
-    let array_data = ArrayData::new(
-        to_type.clone(),
-        array.len(),
-        Some(cast_array.null_count()),
-        cast_array
-            .data()
-            .null_bitmap()
-            .clone()
-            .map(|bitmap| bitmap.bits),
-        array.offset(),
-        // reuse offset buffer
-        data.buffers().to_vec(),
-        vec![cast_array.data().clone()],
-    );
-    let list = GenericListArray::<OffsetSize>::from(array_data);
-    Ok(Arc::new(list) as ArrayRef)
-}
-
-/// Helper function to cast from `Utf8` to `LargeUtf8` and vice versa. If the `LargeUtf8` is too large for
-/// a `Utf8` array it will return an Error.
-fn cast_str_container<OffsetSizeFrom, OffsetSizeTo>(array: &dyn Array) -> Result<ArrayRef>
-where
-    OffsetSizeFrom: StringOffsetSizeTrait + ToPrimitive,
-    OffsetSizeTo: StringOffsetSizeTrait + NumCast + ArrowNativeType,
-{
-    let str_array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<OffsetSizeFrom>>()
-        .unwrap();
-    let list_data = array.data();
-    let str_values_buf = str_array.value_data();
-
-    let offsets = unsafe { list_data.buffers()[0].typed_data::<OffsetSizeFrom>() };
-
-    let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
-    offsets.iter().try_for_each::<_, Result<_>>(|offset| {
-        let offset = OffsetSizeTo::from(*offset).ok_or_else(|| {
-            ArrowError::ComputeError(
-                "large-utf8 array too large to cast to utf8-array".into(),
-            )
-        })?;
-        offset_builder.append(offset);
-        Ok(())
-    })?;
-
-    let offset_buffer = offset_builder.finish();
-
-    let dtype = if matches!(std::mem::size_of::<OffsetSizeTo>(), 8) {
-        DataType::LargeUtf8
-    } else {
-        DataType::Utf8
-    };
-
-    let mut builder = ArrayData::builder(dtype)
-        .len(array.len())
-        .add_buffer(offset_buffer)
-        .add_buffer(str_values_buf);
-
-    if let Some(buf) = list_data.null_buffer() {
-        builder = builder.null_bit_buffer(buf.clone())
-    }
-    let data = builder.build();
-    Ok(Arc::new(GenericStringArray::<OffsetSizeTo>::from(data)))
-}
-
-/// Cast the container type of List/Largelist array but not the inner types.
-/// This function can leave the value data intact and only has to cast the offset dtypes.
-fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
-    array: &dyn Array,
-    _cast_options: &CastOptions,
-) -> Result<ArrayRef>
-where
-    OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
-    OffsetSizeTo: OffsetSizeTrait + NumCast,
-{
-    let data = array.data_ref();
-    // the value data stored by the list
-    let value_data = data.child_data()[0].clone();
-
-    let out_dtype = match array.data_type() {
-        DataType::List(value_type) => {
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeFrom>(),
-                std::mem::size_of::<i32>()
-            );
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeTo>(),
-                std::mem::size_of::<i64>()
-            );
-            DataType::LargeList(value_type.clone())
-        }
-        DataType::LargeList(value_type) => {
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeFrom>(),
-                std::mem::size_of::<i64>()
-            );
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeTo>(),
-                std::mem::size_of::<i32>()
-            );
-            if value_data.len() > i32::MAX as usize {
-                return Err(ArrowError::ComputeError(
-                    "LargeList too large to cast to List".into(),
-                ));
-            }
-            DataType::List(value_type.clone())
-        }
-        // implementation error
-        _ => unreachable!(),
-    };
-
-    let offsets = data.buffer::<OffsetSizeFrom>(0);
-
-    let iter = offsets.iter().map(|idx| {
-        let idx: OffsetSizeTo = NumCast::from(*idx).unwrap();
-        idx
-    });
-
-    // SAFETY
-    //      A slice produces a trusted length iterator
-    let offset_buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
-
-    // wrap up
-    let mut builder = ArrayData::builder(out_dtype)
-        .len(array.len())
-        .add_buffer(offset_buffer)
-        .add_child_data(value_data);
-
-    if let Some(buf) = data.null_buffer() {
-        builder = builder.null_bit_buffer(buf.clone())
-    }
-    let data = builder.build();
-    Ok(make_array(data))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{buffer::Buffer, util::display::array_value_to_string};
-
-    #[test]
-    fn test_cast_i32_to_f64() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Float64).unwrap();
-        let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert!(5.0 - c.value(0) < f64::EPSILON);
-        assert!(6.0 - c.value(1) < f64::EPSILON);
-        assert!(7.0 - c.value(2) < f64::EPSILON);
-        assert!(8.0 - c.value(3) < f64::EPSILON);
-        assert!(9.0 - c.value(4) < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_cast_i32_to_u8() {
-        let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::UInt8).unwrap();
-        let c = b.as_any().downcast_ref::<UInt8Array>().unwrap();
-        assert_eq!(false, c.is_valid(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(false, c.is_valid(2));
-        assert_eq!(8, c.value(3));
-        // overflows return None
-        assert_eq!(false, c.is_valid(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_u8_sliced() {
-        let a = Int32Array::from(vec![-5, 6, -7, 8, 100000000]);
-        let array = Arc::new(a) as ArrayRef;
-        assert_eq!(0, array.offset());
-        let array = array.slice(2, 3);
-        assert_eq!(2, array.offset());
-        let b = cast(&array, &DataType::UInt8).unwrap();
-        assert_eq!(3, b.len());
-        assert_eq!(0, b.offset());
-        let c = b.as_any().downcast_ref::<UInt8Array>().unwrap();
-        assert_eq!(false, c.is_valid(0));
-        assert_eq!(8, c.value(1));
-        // overflows return None
-        assert_eq!(false, c.is_valid(2));
-    }
-
-    #[test]
-    fn test_cast_i32_to_i32() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_list_i32() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-        )
-        .unwrap();
-        assert_eq!(5, b.len());
-        let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&[0, 1, 2, 3, 4, 5], arr.value_offsets());
-        assert_eq!(1, arr.value_length(0));
-        assert_eq!(1, arr.value_length(1));
-        assert_eq!(1, arr.value_length(2));
-        assert_eq!(1, arr.value_length(3));
-        assert_eq!(1, arr.value_length(4));
-        let values = arr.values();
-        let c = values.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_list_i32_nullable() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-        )
-        .unwrap();
-        assert_eq!(5, b.len());
-        assert_eq!(1, b.null_count());
-        let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&[0, 1, 2, 3, 4, 5], arr.value_offsets());
-        assert_eq!(1, arr.value_length(0));
-        assert_eq!(1, arr.value_length(1));
-        assert_eq!(1, arr.value_length(2));
-        assert_eq!(1, arr.value_length(3));
-        assert_eq!(1, arr.value_length(4));
-        let values = arr.values();
-        let c = values.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, c.null_count());
-        assert_eq!(5, c.value(0));
-        assert_eq!(false, c.is_valid(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_cast_i32_to_list_f64_nullable_sliced() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), None, Some(10)]);
-        let array = Arc::new(a) as ArrayRef;
-        let array = array.slice(2, 4);
-        let b = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-        )
-        .unwrap();
-        assert_eq!(4, b.len());
-        assert_eq!(1, b.null_count());
-        let arr = b.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&[0, 1, 2, 3, 4], arr.value_offsets());
-        assert_eq!(1, arr.value_length(0));
-        assert_eq!(1, arr.value_length(1));
-        assert_eq!(1, arr.value_length(2));
-        assert_eq!(1, arr.value_length(3));
-        let values = arr.values();
-        let c = values.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert_eq!(1, c.null_count());
-        assert!(7.0 - c.value(0) < f64::EPSILON);
-        assert!(8.0 - c.value(1) < f64::EPSILON);
-        assert_eq!(false, c.is_valid(2));
-        assert!(10.0 - c.value(3) < f64::EPSILON);
-    }
-
-    #[test]
-    fn test_cast_utf8_to_i32() {
-        let a = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(false, c.is_valid(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(false, c.is_valid(4));
-    }
-
-    #[test]
-    fn test_cast_with_options_utf8_to_i32() {
-        let a = StringArray::from(vec!["5", "6", "seven", "8", "9.1"]);
-        let array = Arc::new(a) as ArrayRef;
-        let result =
-            cast_with_options(&array, &DataType::Int32, &CastOptions { safe: false });
-        match result {
-            Ok(_) => panic!("expected error"),
-            Err(e) => {
-                assert!(e.to_string().contains(
-                    "Cast error: Cannot cast string 'seven' to value of arrow::datatypes::types::Int32Type type"
-                ))
-            }
-        }
-    }
-
-    #[test]
-    fn test_cast_bool_to_i32() {
-        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, c.value(0));
-        assert_eq!(0, c.value(1));
-        assert_eq!(false, c.is_valid(2));
-    }
-
-    #[test]
-    fn test_cast_bool_to_f64() {
-        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Float64).unwrap();
-        let c = b.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert!(1.0 - c.value(0) < f64::EPSILON);
-        assert!(0.0 - c.value(1) < f64::EPSILON);
-        assert_eq!(false, c.is_valid(2));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
-    )]
-    fn test_cast_int32_to_timestamp() {
-        let a = Int32Array::from(vec![Some(2), Some(10), None]);
-        let array = Arc::new(a) as ArrayRef;
-        cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
-    }
-
-    #[test]
-    fn test_cast_list_i32_to_list_u16() {
-        // Construct a value array
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000])
-            .data()
-            .clone();
-
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
-
-        let cast_array = cast(
-            &list_array,
-            &DataType::List(Box::new(Field::new("item", DataType::UInt16, true))),
-        )
-        .unwrap();
-        // 3 negative values should get lost when casting to unsigned,
-        // 1 value should overflow
-        assert_eq!(4, cast_array.null_count());
-        // offsets should be the same
-        assert_eq!(
-            list_array.data().buffers().to_vec(),
-            cast_array.data().buffers().to_vec()
-        );
-        let array = cast_array
-            .as_ref()
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        assert_eq!(DataType::UInt16, array.value_type());
-        assert_eq!(4, array.values().null_count());
-        assert_eq!(3, array.value_length(0));
-        assert_eq!(3, array.value_length(1));
-        assert_eq!(2, array.value_length(2));
-        let values = array.values();
-        let u16arr = values.as_any().downcast_ref::<UInt16Array>().unwrap();
-        assert_eq!(8, u16arr.len());
-        assert_eq!(4, u16arr.null_count());
-
-        assert_eq!(0, u16arr.value(0));
-        assert_eq!(0, u16arr.value(1));
-        assert_eq!(0, u16arr.value(2));
-        assert_eq!(false, u16arr.is_valid(3));
-        assert_eq!(false, u16arr.is_valid(4));
-        assert_eq!(false, u16arr.is_valid(5));
-        assert_eq!(2, u16arr.value(6));
-        assert_eq!(false, u16arr.is_valid(7));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
-    )]
-    fn test_cast_list_i32_to_list_timestamp() {
-        // Construct a value array
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000])
-            .data()
-            .clone();
-
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 9]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
-
-        cast(
-            &list_array,
-            &DataType::List(Box::new(Field::new(
-                "item",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                true,
-            ))),
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_cast_date32_to_date64() {
-        let a = Date32Array::from(vec![10000, 17890]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date64).unwrap();
-        let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
-        assert_eq!(864000000000, c.value(0));
-        assert_eq!(1545696000000, c.value(1));
-    }
-
-    #[test]
-    fn test_cast_date64_to_date32() {
-        let a = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_string_to_timestamp() {
-        let a1 = Arc::new(StringArray::from(vec![
-            Some("2020-09-08T12:00:00+00:00"),
-            Some("Not a valid date"),
-            None,
-        ])) as ArrayRef;
-        let a2 = Arc::new(LargeStringArray::from(vec![
-            Some("2020-09-08T12:00:00+00:00"),
-            Some("Not a valid date"),
-            None,
-        ])) as ArrayRef;
-        for array in &[a1, a2] {
-            let b =
-                cast(array, &DataType::Timestamp(TimeUnit::Nanosecond, None)).unwrap();
-            let c = b
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            assert_eq!(1599566400000000000, c.value(0));
-            assert!(c.is_null(1));
-            assert!(c.is_null(2));
-        }
-    }
-
-    #[test]
-    fn test_cast_date32_to_int32() {
-        let a = Date32Array::from(vec![10000, 17890]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int32).unwrap();
-        let c = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-    }
-
-    #[test]
-    fn test_cast_int32_to_date32() {
-        let a = Int32Array::from(vec![10000, 17890]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-    }
-
-    #[test]
-    fn test_cast_timestamp_to_date32() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000000005), Some(1545696000001), None],
-            Some(String::from("UTC")),
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-        assert_eq!(10000, c.value(0));
-        assert_eq!(17890, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_timestamp_to_date64() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000000005), Some(1545696000001), None],
-            None,
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date64).unwrap();
-        let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
-        assert_eq!(864000000005, c.value(0));
-        assert_eq!(1545696000001, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_timestamp_to_i64() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000000005), Some(1545696000001), None],
-            Some("UTC".to_string()),
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Int64).unwrap();
-        let c = b.as_any().downcast_ref::<Int64Array>().unwrap();
-        assert_eq!(&DataType::Int64, c.data_type());
-        assert_eq!(864000000005, c.value(0));
-        assert_eq!(1545696000001, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_between_timestamps() {
-        let a = TimestampMillisecondArray::from_opt_vec(
-            vec![Some(864000003005), Some(1545696002001), None],
-            None,
-        );
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
-        let c = b.as_any().downcast_ref::<TimestampSecondArray>().unwrap();
-        assert_eq!(864000003, c.value(0));
-        assert_eq!(1545696002, c.value(1));
-        assert!(c.is_null(2));
-    }
-
-    #[test]
-    fn test_cast_to_strings() {
-        let a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
-        let out = cast(&a, &DataType::Utf8).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(out, vec![Some("1"), Some("2"), Some("3")]);
-        let out = cast(&a, &DataType::LargeUtf8).unwrap();
-        let out = out
-            .as_any()
-            .downcast_ref::<LargeStringArray>()
-            .unwrap()
-            .into_iter()
-            .collect::<Vec<_>>();
-        assert_eq!(out, vec![Some("1"), Some("2"), Some("3")]);
-    }
-
-    #[test]
-    fn test_str_to_str_casts() {
-        for data in vec![
-            vec![Some("foo"), Some("bar"), Some("ham")],
-            vec![Some("foo"), None, Some("bar")],
-        ] {
-            let a = Arc::new(LargeStringArray::from(data.clone())) as ArrayRef;
-            let to = cast(&a, &DataType::Utf8).unwrap();
-            let expect = a
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            let out = to
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            assert_eq!(expect, out);
-
-            let a = Arc::new(StringArray::from(data)) as ArrayRef;
-            let to = cast(&a, &DataType::LargeUtf8).unwrap();
-            let expect = a
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            let out = to
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .unwrap()
-                .into_iter()
-                .collect::<Vec<_>>();
-            assert_eq!(expect, out);
-        }
-    }
-
-    #[test]
-    fn test_cast_from_f64() {
-        let f64_values: Vec<f64> = vec![
-            std::i64::MIN as f64,
-            std::i32::MIN as f64,
-            std::i16::MIN as f64,
-            std::i8::MIN as f64,
-            0_f64,
-            std::u8::MAX as f64,
-            std::u16::MAX as f64,
-            std::u32::MAX as f64,
-            std::u64::MAX as f64,
-        ];
-        let f64_array: ArrayRef = Arc::new(Float64Array::from(f64_values));
-
-        let f64_expected = vec![
-            "-9223372036854776000.0",
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967295.0",
-            "18446744073709552000.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&f64_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-9223372000000000000.0",
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967300.0",
-            "18446744000000000000.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&f64_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec![
-            "-9223372036854775808",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "4294967295",
-            "null",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&f64_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec![
-            "null",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "null",
-            "null",
-        ];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&f64_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec![
-            "null", "null", "-32768", "-128", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&f64_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec![
-            "null", "null", "null", "-128", "0", "null", "null", "null", "null",
-        ];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&f64_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "255",
-            "65535",
-            "4294967295",
-            "null",
-        ];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&f64_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "255",
-            "65535",
-            "4294967295",
-            "null",
-        ];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&f64_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec![
-            "null", "null", "null", "null", "0", "255", "65535", "null", "null",
-        ];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&f64_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec![
-            "null", "null", "null", "null", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&f64_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_f32() {
-        let f32_values: Vec<f32> = vec![
-            std::i32::MIN as f32,
-            std::i32::MIN as f32,
-            std::i16::MIN as f32,
-            std::i8::MIN as f32,
-            0_f32,
-            std::u8::MAX as f32,
-            std::u16::MAX as f32,
-            std::u32::MAX as f32,
-            std::u32::MAX as f32,
-        ];
-        let f32_array: ArrayRef = Arc::new(Float32Array::from(f32_values));
-
-        let f64_expected = vec![
-            "-2147483648.0",
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967296.0",
-            "4294967296.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&f32_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-2147483600.0",
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967300.0",
-            "4294967300.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&f32_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec![
-            "-2147483648",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "4294967296",
-            "4294967296",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&f32_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec![
-            "-2147483648",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "255",
-            "65535",
-            "null",
-            "null",
-        ];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&f32_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec![
-            "null", "null", "-32768", "-128", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&f32_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec![
-            "null", "null", "null", "-128", "0", "null", "null", "null", "null",
-        ];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&f32_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "255",
-            "65535",
-            "4294967296",
-            "4294967296",
-        ];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&f32_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec![
-            "null", "null", "null", "null", "0", "255", "65535", "null", "null",
-        ];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&f32_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec![
-            "null", "null", "null", "null", "0", "255", "65535", "null", "null",
-        ];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&f32_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec![
-            "null", "null", "null", "null", "0", "255", "null", "null", "null",
-        ];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&f32_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint64() {
-        let u64_values: Vec<u64> = vec![
-            0,
-            std::u8::MAX as u64,
-            std::u16::MAX as u64,
-            std::u32::MAX as u64,
-            std::u64::MAX,
-        ];
-        let u64_array: ArrayRef = Arc::new(UInt64Array::from(u64_values));
-
-        let f64_expected = vec![
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967295.0",
-            "18446744073709552000.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u64_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "0.0",
-            "255.0",
-            "65535.0",
-            "4294967300.0",
-            "18446744000000000000.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u64_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255", "65535", "4294967295", "null"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u64_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255", "65535", "null", "null"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u64_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255", "null", "null", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u64_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null", "null", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u64_array, &DataType::Int8)
-        );
-
-        let u64_expected =
-            vec!["0", "255", "65535", "4294967295", "18446744073709551615"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u64_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255", "65535", "4294967295", "null"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u64_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255", "65535", "null", "null"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u64_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255", "null", "null", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u64_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint32() {
-        let u32_values: Vec<u32> = vec![
-            0,
-            std::u8::MAX as u32,
-            std::u16::MAX as u32,
-            std::u32::MAX as u32,
-        ];
-        let u32_array: ArrayRef = Arc::new(UInt32Array::from(u32_values));
-
-        let f64_expected = vec!["0.0", "255.0", "65535.0", "4294967295.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u32_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["0.0", "255.0", "65535.0", "4294967300.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u32_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255", "65535", "4294967295"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u32_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255", "65535", "null"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u32_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255", "null", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u32_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u32_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["0", "255", "65535", "4294967295"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u32_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255", "65535", "4294967295"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u32_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255", "65535", "null"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u32_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255", "null", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u32_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint16() {
-        let u16_values: Vec<u16> = vec![0, std::u8::MAX as u16, std::u16::MAX as u16];
-        let u16_array: ArrayRef = Arc::new(UInt16Array::from(u16_values));
-
-        let f64_expected = vec!["0.0", "255.0", "65535.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u16_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["0.0", "255.0", "65535.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u16_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u16_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u16_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u16_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u16_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u16_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u16_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255", "65535"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u16_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u16_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_uint8() {
-        let u8_values: Vec<u8> = vec![0, std::u8::MAX];
-        let u8_array: ArrayRef = Arc::new(UInt8Array::from(u8_values));
-
-        let f64_expected = vec!["0.0", "255.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&u8_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["0.0", "255.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&u8_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["0", "255"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&u8_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["0", "255"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&u8_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["0", "255"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&u8_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["0", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&u8_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["0", "255"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&u8_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["0", "255"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&u8_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["0", "255"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&u8_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["0", "255"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&u8_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int64() {
-        let i64_values: Vec<i64> = vec![
-            std::i64::MIN,
-            std::i32::MIN as i64,
-            std::i16::MIN as i64,
-            std::i8::MIN as i64,
-            0,
-            std::i8::MAX as i64,
-            std::i16::MAX as i64,
-            std::i32::MAX as i64,
-            std::i64::MAX,
-        ];
-        let i64_array: ArrayRef = Arc::new(Int64Array::from(i64_values));
-
-        let f64_expected = vec![
-            "-9223372036854776000.0",
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483647.0",
-            "9223372036854776000.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i64_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-9223372000000000000.0",
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483600.0",
-            "9223372000000000000.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i64_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec![
-            "-9223372036854775808",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "9223372036854775807",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&i64_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec![
-            "null",
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "null",
-        ];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&i64_array, &DataType::Int32)
-        );
-
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Date32Type>(&i64_array, &DataType::Date32)
-        );
-
-        let i16_expected = vec![
-            "null", "null", "-32768", "-128", "0", "127", "32767", "null", "null",
-        ];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i64_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec![
-            "null", "null", "null", "-128", "0", "127", "null", "null", "null",
-        ];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i64_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "9223372036854775807",
-        ];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i64_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec![
-            "null",
-            "null",
-            "null",
-            "null",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-            "null",
-        ];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i64_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec![
-            "null", "null", "null", "null", "0", "127", "32767", "null", "null",
-        ];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i64_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec![
-            "null", "null", "null", "null", "0", "127", "null", "null", "null",
-        ];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i64_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int32() {
-        let i32_values: Vec<i32> = vec![
-            std::i32::MIN as i32,
-            std::i16::MIN as i32,
-            std::i8::MIN as i32,
-            0,
-            std::i8::MAX as i32,
-            std::i16::MAX as i32,
-            std::i32::MAX as i32,
-        ];
-        let i32_array: ArrayRef = Arc::new(Int32Array::from(i32_values));
-
-        let f64_expected = vec![
-            "-2147483648.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483647.0",
-        ];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i32_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec![
-            "-2147483600.0",
-            "-32768.0",
-            "-128.0",
-            "0.0",
-            "127.0",
-            "32767.0",
-            "2147483600.0",
-        ];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i32_array, &DataType::Float32)
-        );
-
-        let i16_expected = vec!["null", "-32768", "-128", "0", "127", "32767", "null"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i32_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["null", "null", "-128", "0", "127", "null", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i32_array, &DataType::Int8)
-        );
-
-        let u64_expected =
-            vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i32_array, &DataType::UInt64)
-        );
-
-        let u32_expected =
-            vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i32_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["null", "null", "null", "0", "127", "32767", "null"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i32_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["null", "null", "null", "0", "127", "null", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i32_array, &DataType::UInt8)
-        );
-
-        // The date32 to date64 cast increases the numerical values in order to keep the same dates.
-        let i64_expected = vec![
-            "-185542587187200000",
-            "-2831155200000",
-            "-11059200000",
-            "0",
-            "10972800000",
-            "2831068800000",
-            "185542587100800000",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Date64Type>(&i32_array, &DataType::Date64)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int16() {
-        let i16_values: Vec<i16> = vec![
-            std::i16::MIN,
-            std::i8::MIN as i16,
-            0,
-            std::i8::MAX as i16,
-            std::i16::MAX,
-        ];
-        let i16_array: ArrayRef = Arc::new(Int16Array::from(i16_values));
-
-        let f64_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i16_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i16_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["-32768", "-128", "0", "127", "32767"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&i16_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["-32768", "-128", "0", "127", "32767"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&i16_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["-32768", "-128", "0", "127", "32767"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i16_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["null", "-128", "0", "127", "null"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i16_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["null", "null", "0", "127", "32767"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i16_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["null", "null", "0", "127", "32767"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i16_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["null", "null", "0", "127", "32767"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i16_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["null", "null", "0", "127", "null"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i16_array, &DataType::UInt8)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_date32() {
-        let i32_values: Vec<i32> = vec![
-            std::i32::MIN as i32,
-            std::i16::MIN as i32,
-            std::i8::MIN as i32,
-            0,
-            std::i8::MAX as i32,
-            std::i16::MAX as i32,
-            std::i32::MAX as i32,
-        ];
-        let date32_array: ArrayRef = Arc::new(Date32Array::from(i32_values));
-
-        let i64_expected = vec![
-            "-2147483648",
-            "-32768",
-            "-128",
-            "0",
-            "127",
-            "32767",
-            "2147483647",
-        ];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&date32_array, &DataType::Int64)
-        );
-    }
-
-    #[test]
-    fn test_cast_from_int8() {
-        let i8_values: Vec<i8> = vec![std::i8::MIN, 0, std::i8::MAX];
-        let i8_array: ArrayRef = Arc::new(Int8Array::from(i8_values));
-
-        let f64_expected = vec!["-128.0", "0.0", "127.0"];
-        assert_eq!(
-            f64_expected,
-            get_cast_values::<Float64Type>(&i8_array, &DataType::Float64)
-        );
-
-        let f32_expected = vec!["-128.0", "0.0", "127.0"];
-        assert_eq!(
-            f32_expected,
-            get_cast_values::<Float32Type>(&i8_array, &DataType::Float32)
-        );
-
-        let i64_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i64_expected,
-            get_cast_values::<Int64Type>(&i8_array, &DataType::Int64)
-        );
-
-        let i32_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i32_expected,
-            get_cast_values::<Int32Type>(&i8_array, &DataType::Int32)
-        );
-
-        let i16_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i16_expected,
-            get_cast_values::<Int16Type>(&i8_array, &DataType::Int16)
-        );
-
-        let i8_expected = vec!["-128", "0", "127"];
-        assert_eq!(
-            i8_expected,
-            get_cast_values::<Int8Type>(&i8_array, &DataType::Int8)
-        );
-
-        let u64_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u64_expected,
-            get_cast_values::<UInt64Type>(&i8_array, &DataType::UInt64)
-        );
-
-        let u32_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u32_expected,
-            get_cast_values::<UInt32Type>(&i8_array, &DataType::UInt32)
-        );
-
-        let u16_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u16_expected,
-            get_cast_values::<UInt16Type>(&i8_array, &DataType::UInt16)
-        );
-
-        let u8_expected = vec!["null", "0", "127"];
-        assert_eq!(
-            u8_expected,
-            get_cast_values::<UInt8Type>(&i8_array, &DataType::UInt8)
-        );
-    }
-
-    /// Convert `array` into a vector of strings by casting to data type dt
-    fn get_cast_values<T>(array: &ArrayRef, dt: &DataType) -> Vec<String>
-    where
-        T: ArrowNumericType,
-    {
-        let c = cast(&array, dt).unwrap();
-        let a = c.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-        let mut v: Vec<String> = vec![];
-        for i in 0..array.len() {
-            if a.is_null(i) {
-                v.push("null".to_string())
-            } else {
-                v.push(format!("{:?}", a.value(i)));
-            }
-        }
-        v
-    }
-
-    #[test]
-    fn test_cast_utf8_dict() {
-        // FROM a dictionary with of Utf8 values
-        use DataType::*;
-
-        let keys_builder = PrimitiveBuilder::<Int8Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-        builder.append("one").unwrap();
-        builder.append_null().unwrap();
-        builder.append("three").unwrap();
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let expected = vec!["one", "null", "three"];
-
-        // Test casting TO StringArray
-        let cast_type = Utf8;
-        let cast_array = cast(&array, &cast_type).expect("cast to UTF-8 failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        // Test casting TO Dictionary (with different index sizes)
-
-        let cast_type = Dictionary(Box::new(Int16), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(Int32), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(Int64), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt16), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt32), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        let cast_type = Dictionary(Box::new(UInt64), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-    }
-
-    #[test]
-    fn test_cast_dict_to_dict_bad_index_value_primitive() {
-        use DataType::*;
-        // test converting from an array that has indexes of a type
-        // that are out of bounds for a particular other kind of
-        // index.
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = PrimitiveBuilder::<Int64Type>::new(10);
-        let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-
-        // add 200 distinct values (which can be stored by a
-        // dictionary indexed by int32, but not a dictionary indexed
-        // with int8)
-        for i in 0..200 {
-            builder.append(i).unwrap();
-        }
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let res = cast(&array, &cast_type);
-        assert!(res.is_err());
-        let actual_error = format!("{:?}", res);
-        let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8";
-        assert!(
-            actual_error.contains(expected_error),
-            "did not find expected error '{}' in actual error '{}'",
-            actual_error,
-            expected_error
-        );
-    }
-
-    #[test]
-    fn test_cast_dict_to_dict_bad_index_value_utf8() {
-        use DataType::*;
-        // Same test as test_cast_dict_to_dict_bad_index_value but use
-        // string values (and encode the expected behavior here);
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        // add 200 distinct values (which can be stored by a
-        // dictionary indexed by int32, but not a dictionary indexed
-        // with int8)
-        for i in 0..200 {
-            let val = format!("val{}", i);
-            builder.append(&val).unwrap();
-        }
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let cast_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let res = cast(&array, &cast_type);
-        assert!(res.is_err());
-        let actual_error = format!("{:?}", res);
-        let expected_error = "Could not convert 72 dictionary indexes from Int32 to Int8";
-        assert!(
-            actual_error.contains(expected_error),
-            "did not find expected error '{}' in actual error '{}'",
-            actual_error,
-            expected_error
-        );
-    }
-
-    #[test]
-    fn test_cast_primitive_dict() {
-        // FROM a dictionary with of INT32 values
-        use DataType::*;
-
-        let keys_builder = PrimitiveBuilder::<Int8Type>::new(10);
-        let values_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-        builder.append(1).unwrap();
-        builder.append_null().unwrap();
-        builder.append(3).unwrap();
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let expected = vec!["1", "null", "3"];
-
-        // Test casting TO PrimitiveArray, different dictionary type
-        let cast_array = cast(&array, &Utf8).expect("cast to UTF-8 failed");
-        assert_eq!(array_to_strings(&cast_array), expected);
-        assert_eq!(cast_array.data_type(), &Utf8);
-
-        let cast_array = cast(&array, &Int64).expect("cast to int64 failed");
-        assert_eq!(array_to_strings(&cast_array), expected);
-        assert_eq!(cast_array.data_type(), &Int64);
-    }
-
-    #[test]
-    fn test_cast_primitive_array_to_dict() {
-        use DataType::*;
-
-        let mut builder = PrimitiveBuilder::<Int32Type>::new(10);
-        builder.append_value(1).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(3).unwrap();
-        let array: ArrayRef = Arc::new(builder.finish());
-
-        let expected = vec!["1", "null", "3"];
-
-        // Cast to a dictionary (same value type, Int32)
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Int32));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-
-        // Cast to a dictionary (different value type, Int8)
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Int8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-    }
-
-    #[test]
-    fn test_cast_string_array_to_dict() {
-        use DataType::*;
-
-        let array = Arc::new(StringArray::from(vec![Some("one"), None, Some("three")]))
-            as ArrayRef;
-
-        let expected = vec!["one", "null", "three"];
-
-        // Cast to a dictionary (same value type, Utf8)
-        let cast_type = Dictionary(Box::new(UInt8), Box::new(Utf8));
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(array_to_strings(&cast_array), expected);
-    }
-
-    #[test]
-    fn test_cast_null_array_to_int32() {
-        let array = Arc::new(NullArray::new(6)) as ArrayRef;
-
-        let expected = Int32Array::from(vec![None; 6]);
-
-        // Cast to a dictionary (same value type, Utf8)
-        let cast_type = DataType::Int32;
-        let cast_array = cast(&array, &cast_type).expect("cast failed");
-        let cast_array = as_primitive_array::<Int32Type>(&cast_array);
-        assert_eq!(cast_array.data_type(), &cast_type);
-        assert_eq!(cast_array, &expected);
-    }
-
-    /// Print the `DictionaryArray` `array` as a vector of strings
-    fn array_to_strings(array: &ArrayRef) -> Vec<String> {
-        (0..array.len())
-            .map(|i| {
-                if array.is_null(i) {
-                    "null".to_string()
-                } else {
-                    array_value_to_string(array, i).expect("Convert array to String")
-                }
-            })
-            .collect()
-    }
-
-    #[test]
-    fn test_cast_utf8_to_date32() {
-        use chrono::NaiveDate;
-        let from_ymd = chrono::NaiveDate::from_ymd;
-        let since = chrono::NaiveDate::signed_duration_since;
-
-        let a = StringArray::from(vec![
-            "2000-01-01",          // valid date with leading 0s
-            "2000-2-2",            // valid date without leading 0s
-            "2000-00-00",          // invalid month and day
-            "2000-01-01T12:00:00", // date + time is invalid
-            "2000",                // just a year is invalid
-        ]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date32).unwrap();
-        let c = b.as_any().downcast_ref::<Date32Array>().unwrap();
-
-        // test valid inputs
-        let date_value = since(NaiveDate::from_ymd(2000, 1, 1), from_ymd(1970, 1, 1))
-            .num_days() as i32;
-        assert_eq!(true, c.is_valid(0)); // "2000-01-01"
-        assert_eq!(date_value, c.value(0));
-
-        let date_value = since(NaiveDate::from_ymd(2000, 2, 2), from_ymd(1970, 1, 1))
-            .num_days() as i32;
-        assert_eq!(true, c.is_valid(1)); // "2000-2-2"
-        assert_eq!(date_value, c.value(1));
-
-        // test invalid inputs
-        assert_eq!(false, c.is_valid(2)); // "2000-00-00"
-        assert_eq!(false, c.is_valid(3)); // "2000-01-01T12:00:00"
-        assert_eq!(false, c.is_valid(4)); // "2000"
-    }
-
-    #[test]
-    fn test_cast_utf8_to_date64() {
-        let a = StringArray::from(vec![
-            "2000-01-01T12:00:00", // date + time valid
-            "2020-12-15T12:34:56", // date + time valid
-            "2020-2-2T12:34:56",   // valid date time without leading 0s
-            "2000-00-00T12:00:00", // invalid month and day
-            "2000-01-01 12:00:00", // missing the 'T'
-            "2000-01-01",          // just a date is invalid
-        ]);
-        let array = Arc::new(a) as ArrayRef;
-        let b = cast(&array, &DataType::Date64).unwrap();
-        let c = b.as_any().downcast_ref::<Date64Array>().unwrap();
-
-        // test valid inputs
-        assert_eq!(true, c.is_valid(0)); // "2000-01-01T12:00:00"
-        assert_eq!(946728000000, c.value(0));
-        assert_eq!(true, c.is_valid(1)); // "2020-12-15T12:34:56"
-        assert_eq!(1608035696000, c.value(1));
-        assert_eq!(true, c.is_valid(2)); // "2020-2-2T12:34:56"
-        assert_eq!(1580646896000, c.value(2));
-
-        // test invalid inputs
-        assert_eq!(false, c.is_valid(3)); // "2000-00-00T12:00:00"
-        assert_eq!(false, c.is_valid(4)); // "2000-01-01 12:00:00"
-        assert_eq!(false, c.is_valid(5)); // "2000-01-01"
-    }
-
-    #[test]
-    fn test_can_cast_types() {
-        // this function attempts to ensure that can_cast_types stays
-        // in sync with cast.  It simply tries all combinations of
-        // types and makes sure that if `can_cast_types` returns
-        // true, so does `cast`
-
-        let all_types = get_all_types();
-
-        for array in get_arrays_of_all_types() {
-            for to_type in &all_types {
-                println!("Test casting {:?} --> {:?}", array.data_type(), to_type);
-                let cast_result = cast(&array, &to_type);
-                let reported_cast_ability = can_cast_types(array.data_type(), to_type);
-
-                // check for mismatch
-                match (cast_result, reported_cast_ability) {
-                    (Ok(_), false) => {
-                        panic!("Was able to cast array {:?} from {:?} to {:?} but can_cast_types reported false",
-                               array, array.data_type(), to_type)
-                    }
-                    (Err(e), true) => {
-                        panic!("Was not able to cast array {:?} from {:?} to {:?} but can_cast_types reported true. \
-                                Error was {:?}",
-                               array, array.data_type(), to_type, e)
-                    }
-                    // otherwise it was a match
-                    _ => {}
-                };
-            }
-        }
-    }
-
-    #[test]
-    fn test_cast_list_containers() {
-        // large-list to list
-        let array = Arc::new(make_large_list_array()) as ArrayRef;
-        let list_array = cast(
-            &array,
-            &DataType::List(Box::new(Field::new("", DataType::Int32, false))),
-        )
-        .unwrap();
-        let actual = list_array.as_any().downcast_ref::<ListArray>().unwrap();
-        let expected = array.as_any().downcast_ref::<LargeListArray>().unwrap();
-
-        assert_eq!(&expected.value(0), &actual.value(0));
-        assert_eq!(&expected.value(1), &actual.value(1));
-        assert_eq!(&expected.value(2), &actual.value(2));
-
-        // list to large-list
-        let array = Arc::new(make_list_array()) as ArrayRef;
-        let large_list_array = cast(
-            &array,
-            &DataType::LargeList(Box::new(Field::new("", DataType::Int32, false))),
-        )
-        .unwrap();
-        let actual = large_list_array
-            .as_any()
-            .downcast_ref::<LargeListArray>()
-            .unwrap();
-        let expected = array.as_any().downcast_ref::<ListArray>().unwrap();
-
-        assert_eq!(&expected.value(0), &actual.value(0));
-        assert_eq!(&expected.value(1), &actual.value(1));
-        assert_eq!(&expected.value(2), &actual.value(2));
-    }
-
-    /// Create instances of arrays with varying types for cast tests
-    fn get_arrays_of_all_types() -> Vec<ArrayRef> {
-        let tz_name = String::from("America/New_York");
-        let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
-        vec![
-            Arc::new(BinaryArray::from(binary_data.clone())),
-            Arc::new(LargeBinaryArray::from(binary_data.clone())),
-            make_dictionary_primitive::<Int8Type>(),
-            make_dictionary_primitive::<Int16Type>(),
-            make_dictionary_primitive::<Int32Type>(),
-            make_dictionary_primitive::<Int64Type>(),
-            make_dictionary_primitive::<UInt8Type>(),
-            make_dictionary_primitive::<UInt16Type>(),
-            make_dictionary_primitive::<UInt32Type>(),
-            make_dictionary_primitive::<UInt64Type>(),
-            make_dictionary_utf8::<Int8Type>(),
-            make_dictionary_utf8::<Int16Type>(),
-            make_dictionary_utf8::<Int32Type>(),
-            make_dictionary_utf8::<Int64Type>(),
-            make_dictionary_utf8::<UInt8Type>(),
-            make_dictionary_utf8::<UInt16Type>(),
-            make_dictionary_utf8::<UInt32Type>(),
-            make_dictionary_utf8::<UInt64Type>(),
-            Arc::new(make_list_array()),
-            Arc::new(make_large_list_array()),
-            Arc::new(make_fixed_size_list_array()),
-            Arc::new(make_fixed_size_binary_array()),
-            Arc::new(StructArray::from(vec![
-                (
-                    Field::new("a", DataType::Boolean, false),
-                    Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                        as Arc<Array>,
-                ),
-                (
-                    Field::new("b", DataType::Int32, false),
-                    Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
-                ),
-            ])),
-            //Arc::new(make_union_array()),
-            Arc::new(NullArray::new(10)),
-            Arc::new(StringArray::from(vec!["foo", "bar"])),
-            Arc::new(LargeStringArray::from(vec!["foo", "bar"])),
-            Arc::new(BooleanArray::from(vec![true, false])),
-            Arc::new(Int8Array::from(vec![1, 2])),
-            Arc::new(Int16Array::from(vec![1, 2])),
-            Arc::new(Int32Array::from(vec![1, 2])),
-            Arc::new(Int64Array::from(vec![1, 2])),
-            Arc::new(UInt8Array::from(vec![1, 2])),
-            Arc::new(UInt16Array::from(vec![1, 2])),
-            Arc::new(UInt32Array::from(vec![1, 2])),
-            Arc::new(UInt64Array::from(vec![1, 2])),
-            Arc::new(Float32Array::from(vec![1.0, 2.0])),
-            Arc::new(Float64Array::from(vec![1.0, 2.0])),
-            Arc::new(TimestampSecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampMillisecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampMicrosecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampNanosecondArray::from_vec(vec![1000, 2000], None)),
-            Arc::new(TimestampSecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name.clone()),
-            )),
-            Arc::new(TimestampMillisecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name.clone()),
-            )),
-            Arc::new(TimestampMicrosecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name.clone()),
-            )),
-            Arc::new(TimestampNanosecondArray::from_vec(
-                vec![1000, 2000],
-                Some(tz_name),
-            )),
-            Arc::new(Date32Array::from(vec![1000, 2000])),
-            Arc::new(Date64Array::from(vec![1000, 2000])),
-            Arc::new(Time32SecondArray::from(vec![1000, 2000])),
-            Arc::new(Time32MillisecondArray::from(vec![1000, 2000])),
-            Arc::new(Time64MicrosecondArray::from(vec![1000, 2000])),
-            Arc::new(Time64NanosecondArray::from(vec![1000, 2000])),
-            Arc::new(IntervalYearMonthArray::from(vec![1000, 2000])),
-            Arc::new(IntervalDayTimeArray::from(vec![1000, 2000])),
-            Arc::new(DurationSecondArray::from(vec![1000, 2000])),
-            Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
-            Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
-            Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
-        ]
-    }
-
-    fn make_list_array() -> ListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        ListArray::from(list_data)
-    }
-
-    fn make_large_list_array() -> LargeListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        LargeListArray::from(list_data)
-    }
-
-    fn make_fixed_size_list_array() -> FixedSizeListArray {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::Int32, true)),
-            2,
-        );
-        let list_data = ArrayData::builder(list_data_type)
-            .len(5)
-            .add_child_data(value_data)
-            .build();
-        FixedSizeListArray::from(list_data)
-    }
-
-    fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
-        let values: [u8; 15] = *b"hellotherearrow";
-
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
-            .len(3)
-            .add_buffer(Buffer::from(&values[..]))
-            .build();
-        FixedSizeBinaryArray::from(array_data)
-    }
-
-    fn make_union_array() -> UnionArray {
-        let mut builder = UnionBuilder::new_dense(7);
-        builder.append::<Int32Type>("a", 1).unwrap();
-        builder.append::<Int64Type>("b", 2).unwrap();
-        builder.build().unwrap()
-    }
-
-    /// Creates a dictionary with primitive dictionary values, and keys of type K
-    fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
-        let keys_builder = PrimitiveBuilder::<K>::new(2);
-        // Pick Int32 arbitrarily for dictionary values
-        let values_builder = PrimitiveBuilder::<Int32Type>::new(2);
-        let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-        b.append(1).unwrap();
-        b.append(2).unwrap();
-        Arc::new(b.finish())
-    }
-
-    /// Creates a dictionary with utf8 values, and keys of type K
-    fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
-        let keys_builder = PrimitiveBuilder::<K>::new(2);
-        // Pick Int32 arbitrarily for dictionary values
-        let values_builder = StringBuilder::new(2);
-        let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
-        b.append("foo").unwrap();
-        b.append("bar").unwrap();
-        Arc::new(b.finish())
-    }
-
-    // Get a selection of datatypes to try and cast to
-    fn get_all_types() -> Vec<DataType> {
-        use DataType::*;
-        let tz_name = String::from("America/New_York");
-
-        vec![
-            Null,
-            Boolean,
-            Int8,
-            Int16,
-            Int32,
-            UInt64,
-            UInt8,
-            UInt16,
-            UInt32,
-            UInt64,
-            Float16,
-            Float32,
-            Float64,
-            Timestamp(TimeUnit::Second, None),
-            Timestamp(TimeUnit::Millisecond, None),
-            Timestamp(TimeUnit::Microsecond, None),
-            Timestamp(TimeUnit::Nanosecond, None),
-            Timestamp(TimeUnit::Second, Some(tz_name.clone())),
-            Timestamp(TimeUnit::Millisecond, Some(tz_name.clone())),
-            Timestamp(TimeUnit::Microsecond, Some(tz_name.clone())),
-            Timestamp(TimeUnit::Nanosecond, Some(tz_name)),
-            Date32,
-            Date64,
-            Time32(TimeUnit::Second),
-            Time32(TimeUnit::Millisecond),
-            Time64(TimeUnit::Microsecond),
-            Time64(TimeUnit::Nanosecond),
-            Duration(TimeUnit::Second),
-            Duration(TimeUnit::Millisecond),
-            Duration(TimeUnit::Microsecond),
-            Duration(TimeUnit::Nanosecond),
-            Interval(IntervalUnit::YearMonth),
-            Interval(IntervalUnit::DayTime),
-            Binary,
-            FixedSizeBinary(10),
-            LargeBinary,
-            Utf8,
-            LargeUtf8,
-            List(Box::new(Field::new("item", DataType::Int8, true))),
-            List(Box::new(Field::new("item", DataType::Utf8, true))),
-            FixedSizeList(Box::new(Field::new("item", DataType::Int8, true)), 10),
-            FixedSizeList(Box::new(Field::new("item", DataType::Utf8, false)), 10),
-            LargeList(Box::new(Field::new("item", DataType::Int8, true))),
-            LargeList(Box::new(Field::new("item", DataType::Utf8, false))),
-            Struct(vec![
-                Field::new("f1", DataType::Int32, false),
-                Field::new("f2", DataType::Utf8, true),
-            ]),
-            Union(vec![
-                Field::new("f1", DataType::Int32, false),
-                Field::new("f2", DataType::Utf8, true),
-            ]),
-            Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)),
-            Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
-            Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
-        ]
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/cast_utils.rs b/rust/arrow/src/compute/kernels/cast_utils.rs
deleted file mode 100644
index a06bf421ea4..00000000000
--- a/rust/arrow/src/compute/kernels/cast_utils.rs
+++ /dev/null
@@ -1,299 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::error::{ArrowError, Result};
-use chrono::{prelude::*, LocalResult};
-
-/// Accepts a string in RFC3339 / ISO8601 standard format and some
-/// variants and converts it to a nanosecond precision timestamp.
-///
-/// Implements the `to_timestamp` function to convert a string to a
-/// timestamp, following the model of spark SQL’s to_`timestamp`.
-///
-/// In addition to RFC3339 / ISO8601 standard timestamps, it also
-/// accepts strings that use a space ` ` to separate the date and time
-/// as well as strings that have no explicit timezone offset.
-///
-/// Examples of accepted inputs:
-/// * `1997-01-31T09:26:56.123Z`        # RCF3339
-/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
-/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
-/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
-/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
-/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
-//
-/// Internally, this function uses the `chrono` library for the
-/// datetime parsing
-///
-/// We hope to extend this function in the future with a second
-/// parameter to specifying the format string.
-///
-/// ## Timestamp Precision
-///
-/// Function uses the maximum precision timestamps supported by
-/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
-/// means the range of dates that timestamps can represent is ~1677 AD
-/// to 2262 AM
-///
-///
-/// ## Timezone / Offset Handling
-///
-/// Numerical values of timestamps are stored compared to offset UTC.
-///
-/// This function intertprets strings without an explicit time zone as
-/// timestamps with offsets of the local time on the machine
-///
-/// For example, `1997-01-31 09:26:56.123Z` is interpreted as UTC, as
-/// it has an explicit timezone specifier (“Z” for Zulu/UTC)
-///
-/// `1997-01-31T09:26:56.123` is interpreted as a local timestamp in
-/// the timezone of the machine. For example, if
-/// the system timezone is set to Americas/New_York (UTC-5) the
-/// timestamp will be interpreted as though it were
-/// `1997-01-31T09:26:56.123-05:00`
-#[inline]
-pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
-    // Fast path:  RFC3339 timestamp (with a T)
-    // Example: 2020-09-08T13:42:29.190855Z
-    if let Ok(ts) = DateTime::parse_from_rfc3339(s) {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Implement quasi-RFC3339 support by trying to parse the
-    // timestamp with various other format specifiers to to support
-    // separating the date and time with a space ' ' rather than 'T' to be
-    // (more) compatible with Apache Spark SQL
-
-    // timezone offset, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855-05:00
-    if let Ok(ts) = DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%:z") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // with an explicit Z, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29Z
-    if let Ok(ts) = Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Support timestamps without an explicit timezone offset, again
-    // to be compatible with what Apache Spark SQL does.
-
-    // without a timezone specifier as a local time, using T as a separator
-    // Example: 2020-09-08T13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using T as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08T13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08 13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // Note we don't pass along the error message from the underlying
-    // chrono parsing because we tried several different format
-    // strings and we don't know which the user was trying to
-    // match. Ths any of the specific error messages is likely to be
-    // be more confusing than helpful
-    Err(ArrowError::CastError(format!(
-        "Error parsing '{}' as timestamp",
-        s
-    )))
-}
-
-/// Converts the naive datetime (which has no specific timezone) to a
-/// nanosecond epoch timestamp relative to UTC.
-fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
-    let l = Local {};
-
-    match l.from_local_datetime(&datetime) {
-        LocalResult::None => Err(ArrowError::CastError(format!(
-            "Error parsing '{}' as timestamp: local time representation is invalid",
-            s
-        ))),
-        LocalResult::Single(local_datetime) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-        // Ambiguous times can happen if the timestamp is exactly when
-        // a daylight savings time transition occurs, for example, and
-        // so the datetime could validly be said to be in two
-        // potential offsets. However, since we are about to convert
-        // to UTC anyways, we can pick one arbitrarily
-        LocalResult::Ambiguous(local_datetime, _) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn string_to_timestamp_timezone() -> Result<()> {
-        // Explicit timezone
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08T13:42:29.190855+00:00")?
-        );
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08T13:42:29.190855Z")?
-        );
-        assert_eq!(
-            1599572549000000000,
-            parse_timestamp("2020-09-08T13:42:29Z")?
-        ); // no fractional part
-        assert_eq!(
-            1599590549190855000,
-            parse_timestamp("2020-09-08T13:42:29.190855-05:00")?
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn string_to_timestamp_timezone_space() -> Result<()> {
-        // Ensure space rather than T between time and date is accepted
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08 13:42:29.190855+00:00")?
-        );
-        assert_eq!(
-            1599572549190855000,
-            parse_timestamp("2020-09-08 13:42:29.190855Z")?
-        );
-        assert_eq!(
-            1599572549000000000,
-            parse_timestamp("2020-09-08 13:42:29Z")?
-        ); // no fractional part
-        assert_eq!(
-            1599590549190855000,
-            parse_timestamp("2020-09-08 13:42:29.190855-05:00")?
-        );
-        Ok(())
-    }
-
-    /// Interprets a naive_datetime (with no explicit timzone offset)
-    /// using the local timezone and returns the timestamp in UTC (0
-    /// offset)
-    fn naive_datetime_to_timestamp(naive_datetime: &NaiveDateTime) -> i64 {
-        // Note: Use chrono APIs that are different than
-        // naive_datetime_to_timestamp to compute the utc offset to
-        // try and double check the logic
-        let utc_offset_secs = match Local.offset_from_local_datetime(&naive_datetime) {
-            LocalResult::Single(local_offset) => {
-                local_offset.fix().local_minus_utc() as i64
-            }
-            _ => panic!("Unexpected failure converting to local datetime"),
-        };
-        let utc_offset_nanos = utc_offset_secs * 1_000_000_000;
-        naive_datetime.timestamp_nanos() - utc_offset_nanos
-    }
-
-    #[test]
-    fn string_to_timestamp_no_timezone() -> Result<()> {
-        // This test is designed to succeed in regardless of the local
-        // timezone the test machine is running. Thus it is still
-        // somewhat suceptable to bugs in the use of chrono
-        let naive_datetime = NaiveDateTime::new(
-            NaiveDate::from_ymd(2020, 9, 8),
-            NaiveTime::from_hms_nano(13, 42, 29, 190855),
-        );
-
-        // Ensure both T and ' ' variants work
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime),
-            parse_timestamp("2020-09-08T13:42:29.190855")?
-        );
-
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime),
-            parse_timestamp("2020-09-08 13:42:29.190855")?
-        );
-
-        // Also ensure that parsing timestamps with no fractional
-        // second part works as well
-        let naive_datetime_whole_secs = NaiveDateTime::new(
-            NaiveDate::from_ymd(2020, 9, 8),
-            NaiveTime::from_hms(13, 42, 29),
-        );
-
-        // Ensure both T and ' ' variants work
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime_whole_secs),
-            parse_timestamp("2020-09-08T13:42:29")?
-        );
-
-        assert_eq!(
-            naive_datetime_to_timestamp(&naive_datetime_whole_secs),
-            parse_timestamp("2020-09-08 13:42:29")?
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn string_to_timestamp_invalid() {
-        // Test parsing invalid formats
-
-        // It would be nice to make these messages better
-        expect_timestamp_parse_error("", "Error parsing '' as timestamp");
-        expect_timestamp_parse_error("SS", "Error parsing 'SS' as timestamp");
-        expect_timestamp_parse_error(
-            "Wed, 18 Feb 2015 23:16:09 GMT",
-            "Error parsing 'Wed, 18 Feb 2015 23:16:09 GMT' as timestamp",
-        );
-    }
-
-    // Parse a timestamp to timestamp int with a useful human readable error message
-    fn parse_timestamp(s: &str) -> Result<i64> {
-        let result = string_to_timestamp_nanos(s);
-        if let Err(e) = &result {
-            eprintln!("Error parsing timestamp '{}': {:?}", s, e);
-        }
-        result
-    }
-
-    fn expect_timestamp_parse_error(s: &str, expected_err: &str) {
-        match string_to_timestamp_nanos(s) {
-            Ok(v) => panic!(
-                "Expected error '{}' while parsing '{}', but parsed {} instead",
-                expected_err, s, v
-            ),
-            Err(e) => {
-                assert!(e.to_string().contains(expected_err),
-                        "Can not find expected error '{}' while parsing '{}'. Actual error '{}'",
-                        expected_err, s, e);
-            }
-        }
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs
deleted file mode 100644
index a770ede21dc..00000000000
--- a/rust/arrow/src/compute/kernels/comparison.rs
+++ /dev/null
@@ -1,1619 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines basic comparison kernels for [`PrimitiveArray`]s.
-//!
-//! These kernels can leverage SIMD if available on your system.  Currently no runtime
-//! detection is provided, you should enable the specific SIMD intrinsics using
-//! `RUSTFLAGS="-C target-feature=+avx2"` for example.  See the documentation
-//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-
-use regex::Regex;
-use std::collections::HashMap;
-
-use crate::array::*;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::compute::util::combine_option_bitmap;
-use crate::datatypes::{ArrowNumericType, DataType};
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-/// Helper function to perform boolean lambda function on values from two arrays, this
-/// version does not attempt to use SIMD.
-macro_rules! compare_op {
-    ($left: expr, $right:expr, $op:expr) => {{
-        if $left.len() != $right.len() {
-            return Err(ArrowError::ComputeError(
-                "Cannot perform comparison operation on arrays of different length"
-                    .to_string(),
-            ));
-        }
-
-        let null_bit_buffer =
-            combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
-
-        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right.value(i)));
-        // same size as $left.len() and $right.len()
-        let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(buffer)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-macro_rules! compare_op_primitive {
-    ($left: expr, $right:expr, $op:expr) => {{
-        if $left.len() != $right.len() {
-            return Err(ArrowError::ComputeError(
-                "Cannot perform comparison operation on arrays of different length"
-                    .to_string(),
-            ));
-        }
-
-        let null_bit_buffer =
-            combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
-
-        let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
-        let lhs_chunks_iter = $left.values().chunks_exact(8);
-        let lhs_remainder = lhs_chunks_iter.remainder();
-        let rhs_chunks_iter = $right.values().chunks_exact(8);
-        let rhs_remainder = rhs_chunks_iter.remainder();
-        let chunks = $left.len() / 8;
-
-        values[..chunks]
-            .iter_mut()
-            .zip(lhs_chunks_iter)
-            .zip(rhs_chunks_iter)
-            .for_each(|((byte, lhs), rhs)| {
-                lhs.iter()
-                    .zip(rhs.iter())
-                    .enumerate()
-                    .for_each(|(i, (&lhs, &rhs))| {
-                        *byte |= if $op(lhs, rhs) { 1 << i } else { 0 };
-                    });
-            });
-
-        if !lhs_remainder.is_empty() {
-            let last = &mut values[chunks];
-            lhs_remainder
-                .iter()
-                .zip(rhs_remainder.iter())
-                .enumerate()
-                .for_each(|(i, (&lhs, &rhs))| {
-                    *last |= if $op(lhs, rhs) { 1 << i } else { 0 };
-                });
-        };
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(values)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-macro_rules! compare_op_scalar {
-    ($left: expr, $right:expr, $op:expr) => {{
-        let null_bit_buffer = $left.data().null_buffer().cloned();
-
-        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right));
-        // same as $left.len()
-        let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(buffer)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-macro_rules! compare_op_scalar_primitive {
-    ($left: expr, $right:expr, $op:expr) => {{
-        let null_bit_buffer = $left.data().null_buffer().cloned();
-
-        let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
-        let lhs_chunks_iter = $left.values().chunks_exact(8);
-        let lhs_remainder = lhs_chunks_iter.remainder();
-        let chunks = $left.len() / 8;
-
-        values[..chunks]
-            .iter_mut()
-            .zip(lhs_chunks_iter)
-            .for_each(|(byte, chunk)| {
-                chunk.iter().enumerate().for_each(|(i, &c_i)| {
-                    *byte |= if $op(c_i, $right) { 1 << i } else { 0 };
-                });
-            });
-        if !lhs_remainder.is_empty() {
-            let last = &mut values[chunks];
-            lhs_remainder.iter().enumerate().for_each(|(i, &lhs)| {
-                *last |= if $op(lhs, $right) { 1 << i } else { 0 };
-            });
-        };
-
-        let data = ArrayData::new(
-            DataType::Boolean,
-            $left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![Buffer::from(values)],
-            vec![],
-        );
-        Ok(BooleanArray::from(data))
-    }};
-}
-
-/// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified
-/// comparison function.
-pub fn no_simd_compare_op<T, F>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    op: F,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> bool,
-{
-    compare_op_primitive!(left, right, op)
-}
-
-/// Evaluate `op(left, right)` for [`PrimitiveArray`] and scalar using
-/// a specified comparison function.
-pub fn no_simd_compare_op_scalar<T, F>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-    op: F,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    F: Fn(T::Native, T::Native) -> bool,
-{
-    compare_op_scalar_primitive!(left, right, op)
-}
-
-/// Perform SQL `left LIKE right` operation on [`StringArray`] / [`LargeStringArray`].
-///
-/// There are two wildcards supported with the LIKE operator:
-///
-/// 1. `%` - The percent sign represents zero, one, or multiple characters
-/// 2. `_` - The underscore represents a single character
-///
-/// For example:
-/// ```
-/// use arrow::array::{StringArray, BooleanArray};
-/// use arrow::compute::like_utf8;
-///
-/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
-/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A."]);
-///
-/// let result = like_utf8(&strings, &patterns).unwrap();
-/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
-/// ```
-pub fn like_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    let mut map = HashMap::new();
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let mut result = BooleanBufferBuilder::new(left.len());
-    for i in 0..left.len() {
-        let haystack = left.value(i);
-        let pat = right.value(i);
-        let re = if let Some(ref regex) = map.get(pat) {
-            regex
-        } else {
-            let re_pattern = pat.replace("%", ".*").replace("_", ".");
-            let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-                ArrowError::ComputeError(format!(
-                    "Unable to build regex from LIKE pattern: {}",
-                    e
-                ))
-            })?;
-            map.insert(pat, re);
-            map.get(pat).unwrap()
-        };
-
-        result.append(re.is_match(haystack));
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.finish()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-fn is_like_pattern(c: char) -> bool {
-    c == '%' || c == '_'
-}
-
-/// Perform SQL `left LIKE right` operation on [`StringArray`] /
-/// [`LargeStringArray`] and a scalar.
-///
-/// See the documentation on [`like_utf8`] for more details.
-pub fn like_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let bytes = bit_util::ceil(left.len(), 8);
-    let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
-    if !right.contains(is_like_pattern) {
-        // fast path, can use equals
-        for i in 0..left.len() {
-            if left.value(i) == right {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use starts_with
-        let starts_with = &right[..right.len() - 1];
-        for i in 0..left.len() {
-            if left.value(i).starts_with(starts_with) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
-        // fast path, can use ends_with
-        let ends_with = &right[1..];
-        for i in 0..left.len() {
-            if left.value(i).ends_with(ends_with) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    } else {
-        let re_pattern = right.replace("%", ".*").replace("_", ".");
-        let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-            ArrowError::ComputeError(format!(
-                "Unable to build regex from LIKE pattern: {}",
-                e
-            ))
-        })?;
-
-        for i in 0..left.len() {
-            let haystack = left.value(i);
-            if re.is_match(haystack) {
-                bit_util::set_bit(bool_slice, i);
-            }
-        }
-    };
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![bool_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
-/// [`LargeStringArray`].
-///
-/// See the documentation on [`like_utf8`] for more details.
-pub fn nlike_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    let mut map = HashMap::new();
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let mut result = BooleanBufferBuilder::new(left.len());
-    for i in 0..left.len() {
-        let haystack = left.value(i);
-        let pat = right.value(i);
-        let re = if let Some(ref regex) = map.get(pat) {
-            regex
-        } else {
-            let re_pattern = pat.replace("%", ".*").replace("_", ".");
-            let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-                ArrowError::ComputeError(format!(
-                    "Unable to build regex from LIKE pattern: {}",
-                    e
-                ))
-            })?;
-            map.insert(pat, re);
-            map.get(pat).unwrap()
-        };
-
-        result.append(!re.is_match(haystack));
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.finish()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
-/// [`LargeStringArray`] and a scalar.
-///
-/// See the documentation on [`like_utf8`] for more details.
-pub fn nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    let null_bit_buffer = left.data().null_buffer().cloned();
-    let mut result = BooleanBufferBuilder::new(left.len());
-
-    if !right.contains(is_like_pattern) {
-        // fast path, can use equals
-        for i in 0..left.len() {
-            result.append(left.value(i) != right);
-        }
-    } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
-    {
-        // fast path, can use ends_with
-        for i in 0..left.len() {
-            result.append(!left.value(i).starts_with(&right[..right.len() - 1]));
-        }
-    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
-        // fast path, can use starts_with
-        for i in 0..left.len() {
-            result.append(!left.value(i).ends_with(&right[1..]));
-        }
-    } else {
-        let re_pattern = right.replace("%", ".*").replace("_", ".");
-        let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
-            ArrowError::ComputeError(format!(
-                "Unable to build regex from LIKE pattern: {}",
-                e
-            ))
-        })?;
-        for i in 0..left.len() {
-            let haystack = left.value(i);
-            result.append(!re.is_match(haystack));
-        }
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.finish()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn eq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a == b)
-}
-
-/// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a == b)
-}
-
-/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn neq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a != b)
-}
-
-/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn neq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a != b)
-}
-
-/// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn lt_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a < b)
-}
-
-/// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn lt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a < b)
-}
-
-/// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn lt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a <= b)
-}
-
-/// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn lt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a <= b)
-}
-
-/// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn gt_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a > b)
-}
-
-/// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn gt_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a > b)
-}
-
-/// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`].
-pub fn gt_eq_utf8<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &GenericStringArray<OffsetSize>,
-) -> Result<BooleanArray> {
-    compare_op!(left, right, |a, b| a >= b)
-}
-
-/// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
-pub fn gt_eq_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &str,
-) -> Result<BooleanArray> {
-    compare_op_scalar!(left, right, |a, b| a >= b)
-}
-
-/// Helper function to perform boolean lambda function on values from two arrays using
-/// SIMD.
-#[cfg(simd)]
-fn simd_compare_op<T, SIMD_OP, SCALAR_OP>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
-    SCALAR_OP: Fn(T::Native, T::Native) -> bool,
-{
-    use std::borrow::BorrowMut;
-
-    let len = left.len();
-    if len != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
-
-    let lanes = T::lanes();
-    let buffer_size = bit_util::ceil(len, 8);
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    // this is currently the case for all our datatypes and allows us to always append full bytes
-    assert!(
-        lanes % 8 == 0,
-        "Number of vector lanes must be multiple of 8"
-    );
-    let mut left_chunks = left.values().chunks_exact(lanes);
-    let mut right_chunks = right.values().chunks_exact(lanes);
-
-    let result_remainder = left_chunks
-        .borrow_mut()
-        .zip(right_chunks.borrow_mut())
-        .fold(
-            result.typed_data_mut(),
-            |result_slice, (left_slice, right_slice)| {
-                let simd_left = T::load(left_slice);
-                let simd_right = T::load(right_slice);
-                let simd_result = simd_op(simd_left, simd_right);
-
-                let bitmask = T::mask_to_u64(&simd_result);
-                let bytes = bitmask.to_le_bytes();
-                &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
-
-                &mut result_slice[lanes / 8..]
-            },
-        );
-
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    assert_eq!(left_remainder.len(), right_remainder.len());
-
-    let remainder_bitmask = left_remainder
-        .iter()
-        .zip(right_remainder.iter())
-        .enumerate()
-        .fold(0_u64, |mut mask, (i, (scalar_left, scalar_right))| {
-            let bit = if scalar_op(*scalar_left, *scalar_right) {
-                1_u64
-            } else {
-                0_u64
-            };
-            mask |= bit << i;
-            mask
-        });
-    let remainder_mask_as_bytes =
-        &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
-    result_remainder.copy_from_slice(remainder_mask_as_bytes);
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        None,
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Helper function to perform boolean lambda function on values from an array and a scalar value using
-/// SIMD.
-#[cfg(simd)]
-fn simd_compare_op_scalar<T, SIMD_OP, SCALAR_OP>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    SIMD_OP: Fn(T::Simd, T::Simd) -> T::SimdMask,
-    SCALAR_OP: Fn(T::Native, T::Native) -> bool,
-{
-    use std::borrow::BorrowMut;
-
-    let len = left.len();
-
-    let lanes = T::lanes();
-    let buffer_size = bit_util::ceil(len, 8);
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
-
-    // this is currently the case for all our datatypes and allows us to always append full bytes
-    assert!(
-        lanes % 8 == 0,
-        "Number of vector lanes must be multiple of 8"
-    );
-    let mut left_chunks = left.values().chunks_exact(lanes);
-    let simd_right = T::init(right);
-
-    let result_remainder = left_chunks.borrow_mut().fold(
-        result.typed_data_mut(),
-        |result_slice, left_slice| {
-            let simd_left = T::load(left_slice);
-            let simd_result = simd_op(simd_left, simd_right);
-
-            let bitmask = T::mask_to_u64(&simd_result);
-            let bytes = bitmask.to_le_bytes();
-            &result_slice[0..lanes / 8].copy_from_slice(&bytes[0..lanes / 8]);
-
-            &mut result_slice[lanes / 8..]
-        },
-    );
-
-    let left_remainder = left_chunks.remainder();
-
-    let remainder_bitmask =
-        left_remainder
-            .iter()
-            .enumerate()
-            .fold(0_u64, |mut mask, (i, scalar_left)| {
-                let bit = if scalar_op(*scalar_left, right) {
-                    1_u64
-                } else {
-                    0_u64
-                };
-                mask |= bit << i;
-                mask
-            });
-    let remainder_mask_as_bytes =
-        &remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
-    result_remainder.copy_from_slice(remainder_mask_as_bytes);
-
-    let null_bit_buffer = left
-        .data_ref()
-        .null_buffer()
-        .map(|b| b.bit_slice(left.offset(), left.len()));
-
-    // null count is the same as in the input since the right side of the scalar comparison cannot be null
-    let null_count = left.null_count();
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        len,
-        Some(null_count),
-        null_bit_buffer,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Perform `left == right` operation on two arrays.
-pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::eq, |a, b| a == b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a == b);
-}
-
-/// Perform `left == right` operation on an array and a scalar value.
-pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::eq, |a, b| a == b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a == b);
-}
-
-/// Perform `left != right` operation on two arrays.
-pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::ne, |a, b| a != b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a != b);
-}
-
-/// Perform `left != right` operation on an array and a scalar value.
-pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::ne, |a, b| a != b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a != b);
-}
-
-/// Perform `left < right` operation on two arrays. Null values are less than non-null
-/// values.
-pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::lt, |a, b| a < b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a < b);
-}
-
-/// Perform `left < right` operation on an array and a scalar value.
-/// Null values are less than non-null values.
-pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::lt, |a, b| a < b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a < b);
-}
-
-/// Perform `left <= right` operation on two arrays. Null values are less than non-null
-/// values.
-pub fn lt_eq<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::le, |a, b| a <= b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a <= b);
-}
-
-/// Perform `left <= right` operation on an array and a scalar value.
-/// Null values are less than non-null values.
-pub fn lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::le, |a, b| a <= b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a <= b);
-}
-
-/// Perform `left > right` operation on two arrays. Non-null values are greater than null
-/// values.
-pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::gt, |a, b| a > b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a > b);
-}
-
-/// Perform `left > right` operation on an array and a scalar value.
-/// Non-null values are greater than null values.
-pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::gt, |a, b| a > b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a > b);
-}
-
-/// Perform `left >= right` operation on two arrays. Non-null values are greater than null
-/// values.
-pub fn gt_eq<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op(left, right, T::ge, |a, b| a >= b);
-    #[cfg(not(simd))]
-    return compare_op!(left, right, |a, b| a >= b);
-}
-
-/// Perform `left >= right` operation on an array and a scalar value.
-/// Non-null values are greater than null values.
-pub fn gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-{
-    #[cfg(simd)]
-    return simd_compare_op_scalar(left, right, T::ge, |a, b| a >= b);
-    #[cfg(not(simd))]
-    return compare_op_scalar!(left, right, |a, b| a >= b);
-}
-
-/// Checks if a [`GenericListArray`] contains a value in the [`PrimitiveArray`]
-pub fn contains<T, OffsetSize>(
-    left: &PrimitiveArray<T>,
-    right: &GenericListArray<OffsetSize>,
-) -> Result<BooleanArray>
-where
-    T: ArrowNumericType,
-    OffsetSize: OffsetSizeTrait,
-{
-    let left_len = left.len();
-    if left_len != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let num_bytes = bit_util::ceil(left_len, 8);
-
-    let not_both_null_bit_buffer =
-        match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
-            Some(buff) => buff,
-            None => new_all_set_buffer(num_bytes),
-        };
-    let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
-
-    let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
-    let bool_slice = bool_buf.as_slice_mut();
-
-    // if both array slots are valid, check if list contains primitive
-    for i in 0..left_len {
-        if bit_util::get_bit(not_both_null_bitmap, i) {
-            let list = right.value(i);
-            let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-
-            for j in 0..list.len() {
-                if list.is_valid(j) && (left.value(i) == list.value(j)) {
-                    bit_util::set_bit(bool_slice, i);
-                    continue;
-                }
-            }
-        }
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        None,
-        0,
-        vec![bool_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
-pub fn contains_utf8<OffsetSize>(
-    left: &GenericStringArray<OffsetSize>,
-    right: &ListArray,
-) -> Result<BooleanArray>
-where
-    OffsetSize: StringOffsetSizeTrait,
-{
-    let left_len = left.len();
-    if left_len != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
-        ));
-    }
-
-    let num_bytes = bit_util::ceil(left_len, 8);
-
-    let not_both_null_bit_buffer =
-        match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
-            Some(buff) => buff,
-            None => new_all_set_buffer(num_bytes),
-        };
-    let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
-
-    let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
-    let bool_slice = &mut bool_buf;
-
-    for i in 0..left_len {
-        // contains(null, null) = false
-        if bit_util::get_bit(not_both_null_bitmap, i) {
-            let list = right.value(i);
-            let list = list
-                .as_any()
-                .downcast_ref::<GenericStringArray<OffsetSize>>()
-                .unwrap();
-
-            for j in 0..list.len() {
-                if list.is_valid(j) && (left.value(i) == list.value(j)) {
-                    bit_util::set_bit(bool_slice, i);
-                    continue;
-                }
-            }
-        }
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        left.len(),
-        None,
-        None,
-        0,
-        vec![bool_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-// create a buffer and fill it with valid bits
-#[inline]
-fn new_all_set_buffer(len: usize) -> Buffer {
-    let buffer = MutableBuffer::new(len);
-    let buffer = buffer.with_bitset(len, true);
-
-    buffer.into()
-}
-
-// disable wrapping inside literal vectors used for test data and assertions
-#[rustfmt::skip::macros(vec)]
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datatypes::Int8Type;
-    use crate::{array::Int32Array, array::Int64Array, datatypes::Field};
-
-    /// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
-    /// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
-    /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
-    /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
-    macro_rules! cmp_i64 {
-        ($KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
-            let a = Int64Array::from($A_VEC);
-            let b = Int64Array::from($B_VEC);
-            let c = $KERNEL(&a, &b).unwrap();
-            assert_eq!(BooleanArray::from($EXPECTED), c);
-        };
-    }
-
-    /// Evaluate `KERNEL` with one vectors and one scalar as inputs and assert against the expected output.
-    /// `A_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
-    /// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
-    /// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
-    macro_rules! cmp_i64_scalar {
-        ($KERNEL:ident, $A_VEC:expr, $B:literal, $EXPECTED:expr) => {
-            let a = Int64Array::from($A_VEC);
-            let c = $KERNEL(&a, $B).unwrap();
-            assert_eq!(BooleanArray::from($EXPECTED), c);
-        };
-    }
-
-    #[test]
-    fn test_primitive_array_eq() {
-        cmp_i64!(
-            eq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![false, false, true, false, false, false, false, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_eq_scalar() {
-        cmp_i64_scalar!(
-            eq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![false, false, true, false, false, false, false, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_eq_with_slice() {
-        let a = Int32Array::from(vec![6, 7, 8, 8, 10]);
-        let b = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let b_slice = b.slice(5, 5);
-        let c = b_slice.as_any().downcast_ref().unwrap();
-        let d = eq(&c, &a).unwrap();
-        assert_eq!(true, d.value(0));
-        assert_eq!(true, d.value(1));
-        assert_eq!(true, d.value(2));
-        assert_eq!(false, d.value(3));
-        assert_eq!(true, d.value(4));
-    }
-
-    #[test]
-    fn test_primitive_array_neq() {
-        cmp_i64!(
-            neq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![true, true, false, true, true, true, true, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_neq_scalar() {
-        cmp_i64_scalar!(
-            neq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![true, true, false, true, true, true, true, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt() {
-        cmp_i64!(
-            lt,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![false, false, false, true, true, false, false, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_scalar() {
-        cmp_i64_scalar!(
-            lt_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![true, true, false, false, false, true, true, false, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_nulls() {
-        cmp_i64!(
-            lt,
-            vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
-            vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
-            vec![None, None, None, Some(false), None, None, None, Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_scalar_nulls() {
-        cmp_i64_scalar!(
-            lt_scalar,
-            vec![None, Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3), Some(2), None],
-            2,
-            vec![None, Some(true), Some(false), Some(false), None, Some(true), Some(false), Some(false), Some(false), None]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq() {
-        cmp_i64!(
-            lt_eq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![false, false, true, true, true, false, false, true, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq_scalar() {
-        cmp_i64_scalar!(
-            lt_eq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![true, true, true, false, false, true, true, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq_nulls() {
-        cmp_i64!(
-            lt_eq,
-            vec![None, None, Some(1), None, None, Some(1), None, None, Some(1)],
-            vec![None, Some(1), Some(0), None, Some(1), Some(2), None, None, Some(3)],
-            vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_lt_eq_scalar_nulls() {
-        cmp_i64_scalar!(
-            lt_eq_scalar,
-            vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
-            1,
-            vec![None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt() {
-        cmp_i64!(
-            gt,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![true, true, false, false, false, true, true, false, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_scalar() {
-        cmp_i64_scalar!(
-            gt_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![false, false, false, true, true, false, false, false, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_nulls() {
-        cmp_i64!(
-            gt,
-            vec![None, None, Some(1), None, None, Some(2), None, None, Some(3)],
-            vec![None, Some(1), Some(1), None, Some(1), Some(1), None, Some(1), Some(1)],
-            vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_scalar_nulls() {
-        cmp_i64_scalar!(
-            gt_scalar,
-            vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
-            1,
-            vec![None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false), Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq() {
-        cmp_i64!(
-            gt_eq,
-            vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            vec![true, true, true, false, false, true, true, true, false, false]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq_scalar() {
-        cmp_i64_scalar!(
-            gt_eq_scalar,
-            vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
-            8,
-            vec![false, false, true, true, true, false, false, true, true, true]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq_nulls() {
-        cmp_i64!(
-            gt_eq,
-            vec![None, None, Some(1), None, Some(1), Some(2), None, None, Some(1)],
-            vec![None, Some(1), None, None, Some(1), Some(1), None, Some(2), Some(2)],
-            vec![None, None, None, None, Some(true), Some(true), None, None, Some(false)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_gt_eq_scalar_nulls() {
-        cmp_i64_scalar!(
-            gt_eq_scalar,
-            vec![None, Some(1), Some(2), None, Some(2), Some(3), None, Some(3), Some(4)],
-            2,
-            vec![None, Some(false), Some(true), None, Some(true), Some(true), None, Some(true), Some(true)]
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_compare_slice() {
-        let a: Int32Array = (0..100).map(Some).collect();
-        let a = a.slice(50, 50);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let b: Int32Array = (100..200).map(Some).collect();
-        let b = b.slice(50, 50);
-        let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
-        let actual = lt(&a, &b).unwrap();
-        let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_primitive_array_compare_scalar_slice() {
-        let a: Int32Array = (0..100).map(Some).collect();
-        let a = a.slice(50, 50);
-        let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
-        let actual = lt_scalar(&a, 200).unwrap();
-        let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_length_of_result_buffer() {
-        // `item_count` is chosen to not be a multiple of the number of SIMD lanes for this
-        // type (`Int8Type`), 64.
-        let item_count = 130;
-
-        let select_mask: BooleanArray = vec![true; item_count].into();
-
-        let array_a: PrimitiveArray<Int8Type> = vec![1; item_count].into();
-        let array_b: PrimitiveArray<Int8Type> = vec![2; item_count].into();
-        let result_mask = gt_eq(&array_a, &array_b).unwrap();
-
-        assert_eq!(
-            result_mask.data().buffers()[0].len(),
-            select_mask.data().buffers()[0].len()
-        );
-    }
-
-    // Expected behaviour:
-    // contains(1, [1, 2, null]) = true
-    // contains(3, [1, 2, null]) = false
-    // contains(null, [1, 2, null]) = false
-    // contains(null, null) = false
-    #[test]
-    fn test_contains() {
-        let value_data = Int32Array::from(vec![
-            Some(0),
-            Some(1),
-            Some(2),
-            Some(3),
-            Some(4),
-            Some(5),
-            Some(6),
-            None,
-            Some(7),
-        ])
-        .data()
-        .clone();
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 6, 9]);
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(4)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from([0b00001011]))
-            .build();
-
-        //  [[0, 1, 2], [3, 4, 5], null, [6, null, 7]]
-        let list_array = LargeListArray::from(list_data);
-
-        let nulls = Int32Array::from(vec![None, None, None, None]);
-        let nulls_result = contains(&nulls, &list_array).unwrap();
-        assert_eq!(
-            nulls_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![false, false, false, false]),
-        );
-
-        let values = Int32Array::from(vec![Some(0), Some(0), Some(0), Some(0)]);
-        let values_result = contains(&values, &list_array).unwrap();
-        assert_eq!(
-            values_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![true, false, false, false]),
-        );
-    }
-
-    // Expected behaviour:
-    // contains("ab", ["ab", "cd", null]) = true
-    // contains("ef", ["ab", "cd", null]) = false
-    // contains(null, ["ab", "cd", null]) = false
-    // contains(null, null) = false
-    #[test]
-    fn test_contains_utf8() {
-        let values_builder = StringBuilder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        builder.values().append_value("Lorem").unwrap();
-        builder.values().append_value("ipsum").unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value("sit").unwrap();
-        builder.values().append_value("amet").unwrap();
-        builder.values().append_value("Lorem").unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value("ipsum").unwrap();
-        builder.append(true).unwrap();
-
-        //  [["Lorem", "ipsum", null], ["sit", "amet", "Lorem"], null, ["ipsum"]]
-        // value_offsets = [0, 3, 6, 6]
-        let list_array = builder.finish();
-
-        let nulls = StringArray::from(vec![None, None, None, None]);
-        let nulls_result = contains_utf8(&nulls, &list_array).unwrap();
-        assert_eq!(
-            nulls_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![false, false, false, false]),
-        );
-
-        let values = StringArray::from(vec![
-            Some("Lorem"),
-            Some("Lorem"),
-            Some("Lorem"),
-            Some("Lorem"),
-        ]);
-        let values_result = contains_utf8(&values, &list_array).unwrap();
-        assert_eq!(
-            values_result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .unwrap(),
-            &BooleanArray::from(vec![true, true, false, false]),
-        );
-    }
-
-    macro_rules! test_utf8 {
-        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
-            #[test]
-            fn $test_name() {
-                let left = StringArray::from($left);
-                let right = StringArray::from($right);
-                let res = $op(&left, &right).unwrap();
-                let expected = $expected;
-                assert_eq!(expected.len(), res.len());
-                for i in 0..res.len() {
-                    let v = res.value(i);
-                    assert_eq!(v, expected[i]);
-                }
-            }
-        };
-    }
-
-    macro_rules! test_utf8_scalar {
-        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
-            #[test]
-            fn $test_name() {
-                let left = StringArray::from($left);
-                let res = $op(&left, $right).unwrap();
-                let expected = $expected;
-                assert_eq!(expected.len(), res.len());
-                for i in 0..res.len() {
-                    let v = res.value(i);
-                    assert_eq!(
-                        v,
-                        expected[i],
-                        "unexpected result when comparing {} at position {} to {} ",
-                        left.value(i),
-                        i,
-                        $right
-                    );
-                }
-
-                let left = LargeStringArray::from($left);
-                let res = $op(&left, $right).unwrap();
-                let expected = $expected;
-                assert_eq!(expected.len(), res.len());
-                for i in 0..res.len() {
-                    let v = res.value(i);
-                    assert_eq!(
-                        v,
-                        expected[i],
-                        "unexpected result when comparing {} at position {} to {} ",
-                        left.value(i),
-                        i,
-                        $right
-                    );
-                }
-            }
-        };
-    }
-
-    test_utf8!(
-        test_utf8_array_like,
-        vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
-        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
-        like_utf8,
-        vec![true, true, true, false, false, true, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "%ar%",
-        like_utf8_scalar,
-        vec![true, true, false, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_start,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow%",
-        like_utf8_scalar,
-        vec![true, false, true, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_end,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "%arrow",
-        like_utf8_scalar,
-        vec![true, true, false, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_equals,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow",
-        like_utf8_scalar,
-        vec![true, false, false, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_like_scalar_one,
-        vec!["arrow", "arrows", "parrow", "arr"],
-        "arrow_",
-        like_utf8_scalar,
-        vec![false, true, false, false]
-    );
-
-    test_utf8!(
-        test_utf8_array_nlike,
-        vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
-        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
-        nlike_utf8,
-        vec![false, false, false, true, true, false, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "%ar%",
-        nlike_utf8_scalar,
-        vec![false, false, true, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_eq,
-        vec!["arrow", "arrow", "arrow", "arrow"],
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        eq_utf8,
-        vec![true, false, false, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_eq_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "arrow",
-        eq_utf8_scalar,
-        vec![true, false, false, false]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_start,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow%",
-        nlike_utf8_scalar,
-        vec![false, true, false, true]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_end,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "%arrow",
-        nlike_utf8_scalar,
-        vec![false, false, true, true]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_equals,
-        vec!["arrow", "parrow", "arrows", "arr"],
-        "arrow",
-        nlike_utf8_scalar,
-        vec![false, true, true, true]
-    );
-
-    test_utf8_scalar!(
-        test_utf8_array_nlike_scalar_one,
-        vec!["arrow", "arrows", "parrow", "arr"],
-        "arrow_",
-        nlike_utf8_scalar,
-        vec![true, false, true, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_neq,
-        vec!["arrow", "arrow", "arrow", "arrow"],
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        neq_utf8,
-        vec![false, true, true, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_neq_scalar,
-        vec!["arrow", "parquet", "datafusion", "flight"],
-        "arrow",
-        neq_utf8_scalar,
-        vec![false, true, true, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_lt,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        lt_utf8,
-        vec![true, true, false, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_lt_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        lt_utf8_scalar,
-        vec![true, true, false, false]
-    );
-
-    test_utf8!(
-        test_utf8_array_lt_eq,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        lt_eq_utf8,
-        vec![true, true, true, false]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_lt_eq_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        lt_eq_utf8_scalar,
-        vec![true, true, true, false]
-    );
-
-    test_utf8!(
-        test_utf8_array_gt,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        gt_utf8,
-        vec![false, false, false, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_gt_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        gt_utf8_scalar,
-        vec![false, false, false, true]
-    );
-
-    test_utf8!(
-        test_utf8_array_gt_eq,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        vec!["flight", "flight", "flight", "flight"],
-        gt_eq_utf8,
-        vec![false, false, true, true]
-    );
-    test_utf8_scalar!(
-        test_utf8_array_gt_eq_scalar,
-        vec!["arrow", "datafusion", "flight", "parquet"],
-        "flight",
-        gt_eq_utf8_scalar,
-        vec![false, false, true, true]
-    );
-}
diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs
deleted file mode 100644
index 32880286a72..00000000000
--- a/rust/arrow/src/compute/kernels/concat.rs
+++ /dev/null
@@ -1,387 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines concat kernel for `ArrayRef`
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::{ArrayRef, StringArray};
-//! use arrow::compute::concat;
-//!
-//! let arr = concat(&[
-//!     &StringArray::from(vec!["hello", "world"]),
-//!     &StringArray::from(vec!["!"]),
-//! ]).unwrap();
-//! assert_eq!(arr.len(), 3);
-//! ```
-
-use crate::array::*;
-use crate::error::{ArrowError, Result};
-
-/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
-pub fn concat(arrays: &[&Array]) -> Result<ArrayRef> {
-    if arrays.is_empty() {
-        return Err(ArrowError::ComputeError(
-            "concat requires input of at least one array".to_string(),
-        ));
-    }
-
-    if arrays
-        .iter()
-        .any(|array| array.data_type() != arrays[0].data_type())
-    {
-        return Err(ArrowError::InvalidArgumentError(
-            "It is not possible to concatenate arrays of different data types."
-                .to_string(),
-        ));
-    }
-
-    let lengths = arrays.iter().map(|array| array.len()).collect::<Vec<_>>();
-    let capacity = lengths.iter().sum();
-
-    let arrays = arrays.iter().map(|a| a.data()).collect::<Vec<_>>();
-
-    let mut mutable = MutableArrayData::new(arrays, false, capacity);
-
-    for (i, len) in lengths.iter().enumerate() {
-        mutable.extend(i, 0, *len)
-    }
-
-    Ok(make_array(mutable.freeze()))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datatypes::*;
-    use std::sync::Arc;
-
-    #[test]
-    fn test_concat_empty_vec() {
-        let re = concat(&[]);
-        assert!(re.is_err());
-    }
-
-    #[test]
-    fn test_concat_incompatible_datatypes() {
-        let re = concat(&[
-            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
-            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
-        ]);
-        assert!(re.is_err());
-    }
-
-    #[test]
-    fn test_concat_string_arrays() -> Result<()> {
-        let arr = concat(&[
-            &StringArray::from(vec!["hello", "world"]),
-            &StringArray::from(vec!["2", "3", "4"]),
-            &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
-        ])?;
-
-        let expected_output = Arc::new(StringArray::from(vec![
-            Some("hello"),
-            Some("world"),
-            Some("2"),
-            Some("3"),
-            Some("4"),
-            Some("foo"),
-            Some("bar"),
-            None,
-            Some("baz"),
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_primitive_arrays() -> Result<()> {
-        let arr = concat(&[
-            &PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]),
-            &PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]),
-            &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
-        ])?;
-
-        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-            Some(256),
-            Some(512),
-            Some(1024),
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_primitive_array_slices() -> Result<()> {
-        let input_1 = PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-        ])
-        .slice(1, 3);
-
-        let input_2 = PrimitiveArray::<Int64Type>::from(vec![
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-        ])
-        .slice(1, 3);
-        let arr = concat(&[input_1.as_ref(), input_2.as_ref()])?;
-
-        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(2),
-            None,
-            Some(102),
-            Some(103),
-            None,
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_boolean_primitive_arrays() -> Result<()> {
-        let arr = concat(&[
-            &BooleanArray::from(vec![
-                Some(true),
-                Some(true),
-                Some(false),
-                None,
-                None,
-                Some(false),
-            ]),
-            &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
-        ])?;
-
-        let expected_output = Arc::new(BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(false),
-            None,
-            None,
-            Some(false),
-            None,
-            Some(false),
-            Some(true),
-            Some(false),
-        ])) as ArrayRef;
-
-        assert_eq!(&arr, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_primitive_list_arrays() -> Result<()> {
-        let list1 = vec![
-            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
-            Some(vec![]),
-            None,
-            Some(vec![Some(10)]),
-        ];
-        let list1_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
-
-        let list2 = vec![
-            None,
-            Some(vec![Some(100), None, Some(101)]),
-            Some(vec![Some(102)]),
-        ];
-        let list2_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
-
-        let list3 = vec![Some(vec![Some(1000), Some(1001)])];
-        let list3_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
-
-        let array_result = concat(&[&list1_array, &list2_array, &list3_array])?;
-
-        let expected = list1
-            .into_iter()
-            .chain(list2.into_iter())
-            .chain(list3.into_iter());
-        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
-
-        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_struct_arrays() -> Result<()> {
-        let field = Field::new("field", DataType::Int64, true);
-        let input_primitive_1: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]));
-        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
-
-        let input_primitive_2: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]));
-        let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
-
-        let input_primitive_3: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(256),
-                Some(512),
-                Some(1024),
-            ]));
-        let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
-
-        let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3])?;
-
-        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-            Some(256),
-            Some(512),
-            Some(1024),
-        ])) as ArrayRef;
-
-        let actual_primitive = arr
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap()
-            .column(0);
-        assert_eq!(actual_primitive, &expected_primitive_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_struct_array_slices() -> Result<()> {
-        let field = Field::new("field", DataType::Int64, true);
-        let input_primitive_1: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]));
-        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
-
-        let input_primitive_2: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]));
-        let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
-
-        let arr = concat(&[
-            input_struct_1.slice(1, 3).as_ref(),
-            input_struct_2.slice(1, 2).as_ref(),
-        ])?;
-
-        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(2),
-            None,
-            Some(102),
-            Some(103),
-        ])) as ArrayRef;
-
-        let actual_primitive = arr
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap()
-            .column(0);
-        assert_eq!(actual_primitive, &expected_primitive_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_string_array_slices() -> Result<()> {
-        let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
-        let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
-
-        let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])?;
-
-        let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
-
-        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(actual_output, &expected_output);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_string_array_with_null_slices() -> Result<()> {
-        let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
-        let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
-
-        let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])?;
-
-        let expected_output =
-            StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
-
-        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(actual_output, &expected_output);
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs
deleted file mode 100644
index 68feb0a546e..00000000000
--- a/rust/arrow/src/compute/kernels/filter.rs
+++ /dev/null
@@ -1,584 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines miscellaneous array kernels.
-
-use crate::error::Result;
-use crate::record_batch::RecordBatch;
-use crate::{array::*, util::bit_chunk_iterator::BitChunkIterator};
-use std::iter::Enumerate;
-
-/// Function that can filter arbitrary arrays
-pub type Filter<'a> = Box<Fn(&ArrayData) -> ArrayData + 'a>;
-
-/// Internal state of [SlicesIterator]
-#[derive(Debug, PartialEq)]
-enum State {
-    // it is iterating over bits of a mask (`u64`, steps of size of 1 slot)
-    Bits(u64),
-    // it is iterating over chunks (steps of size of 64 slots)
-    Chunks,
-    // it is iterating over the remainding bits (steps of size of 1 slot)
-    Remainder,
-    // nothing more to iterate.
-    Finish,
-}
-
-/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose
-/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be
-/// "taken" from an array to be filtered.
-#[derive(Debug)]
-pub(crate) struct SlicesIterator<'a> {
-    iter: Enumerate<BitChunkIterator<'a>>,
-    state: State,
-    filter_count: usize,
-    remainder_mask: u64,
-    remainder_len: usize,
-    chunk_len: usize,
-    len: usize,
-    start: usize,
-    on_region: bool,
-    current_chunk: usize,
-    current_bit: usize,
-}
-
-impl<'a> SlicesIterator<'a> {
-    pub(crate) fn new(filter: &'a BooleanArray) -> Self {
-        let values = &filter.data_ref().buffers()[0];
-
-        // this operation is performed before iteration
-        // because it is fast and allows reserving all the needed memory
-        let filter_count = values.count_set_bits_offset(filter.offset(), filter.len());
-
-        let chunks = values.bit_chunks(filter.offset(), filter.len());
-
-        Self {
-            iter: chunks.iter().enumerate(),
-            state: State::Chunks,
-            filter_count,
-            remainder_len: chunks.remainder_len(),
-            chunk_len: chunks.chunk_len(),
-            remainder_mask: chunks.remainder_bits(),
-            len: 0,
-            start: 0,
-            on_region: false,
-            current_chunk: 0,
-            current_bit: 0,
-        }
-    }
-
-    #[inline]
-    fn current_start(&self) -> usize {
-        self.current_chunk * 64 + self.current_bit
-    }
-
-    #[inline]
-    fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
-        while self.current_bit < max {
-            if (mask & (1 << self.current_bit)) != 0 {
-                if !self.on_region {
-                    self.start = self.current_start();
-                    self.on_region = true;
-                }
-                self.len += 1;
-            } else if self.on_region {
-                let result = (self.start, self.start + self.len);
-                self.len = 0;
-                self.on_region = false;
-                self.current_bit += 1;
-                return Some(result);
-            }
-            self.current_bit += 1;
-        }
-        self.current_bit = 0;
-        None
-    }
-
-    /// iterates over chunks.
-    #[inline]
-    fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
-        while let Some((i, mask)) = self.iter.next() {
-            self.current_chunk = i;
-            if mask == 0 {
-                if self.on_region {
-                    let result = (self.start, self.start + self.len);
-                    self.len = 0;
-                    self.on_region = false;
-                    return Some(result);
-                }
-            } else if mask == 18446744073709551615u64 {
-                // = !0u64
-                if !self.on_region {
-                    self.start = self.current_start();
-                    self.on_region = true;
-                }
-                self.len += 64;
-            } else {
-                // there is a chunk that has a non-trivial mask => iterate over bits.
-                self.state = State::Bits(mask);
-                return None;
-            }
-        }
-        // no more chunks => start iterating over the remainder
-        self.current_chunk = self.chunk_len;
-        self.state = State::Remainder;
-        None
-    }
-}
-
-impl<'a> Iterator for SlicesIterator<'a> {
-    type Item = (usize, usize);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.state {
-            State::Chunks => {
-                match self.iterate_chunks() {
-                    None => {
-                        // iterating over chunks does not yield any new slice => continue to the next
-                        self.current_bit = 0;
-                        self.next()
-                    }
-                    other => other,
-                }
-            }
-            State::Bits(mask) => {
-                match self.iterate_bits(mask, 64) {
-                    None => {
-                        // iterating over bits does not yield any new slice => change back
-                        // to chunks and continue to the next
-                        self.state = State::Chunks;
-                        self.next()
-                    }
-                    other => other,
-                }
-            }
-            State::Remainder => {
-                match self.iterate_bits(self.remainder_mask, self.remainder_len) {
-                    None => {
-                        self.state = State::Finish;
-                        if self.on_region {
-                            Some((self.start, self.start + self.len))
-                        } else {
-                            None
-                        }
-                    }
-                    other => other,
-                }
-            }
-            State::Finish => None,
-        }
-    }
-}
-
-/// Returns a prepared function optimized to filter multiple arrays.
-/// Creating this function requires time, but using it is faster than [filter] when the
-/// same filter needs to be applied to multiple arrays (e.g. a multi-column `RecordBatch`).
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
-pub fn build_filter(filter: &BooleanArray) -> Result<Filter> {
-    let iter = SlicesIterator::new(filter);
-    let filter_count = iter.filter_count;
-    let chunks = iter.collect::<Vec<_>>();
-
-    Ok(Box::new(move |array: &ArrayData| {
-        let mut mutable = MutableArrayData::new(vec![array], false, filter_count);
-        chunks
-            .iter()
-            .for_each(|(start, end)| mutable.extend(0, *start, *end));
-        mutable.freeze()
-    }))
-}
-
-/// Filters an [Array], returning elements matching the filter (i.e. where the values are true).
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
-/// # Example
-/// ```rust
-/// # use arrow::array::{Int32Array, BooleanArray};
-/// # use arrow::error::Result;
-/// # use arrow::compute::kernels::filter::filter;
-/// # fn main() -> Result<()> {
-/// let array = Int32Array::from(vec![5, 6, 7, 8, 9]);
-/// let filter_array = BooleanArray::from(vec![true, false, false, true, false]);
-/// let c = filter(&array, &filter_array)?;
-/// let c = c.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(c, &Int32Array::from(vec![5, 8]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn filter(array: &Array, filter: &BooleanArray) -> Result<ArrayRef> {
-    let iter = SlicesIterator::new(filter);
-
-    let mut mutable =
-        MutableArrayData::new(vec![array.data_ref()], false, iter.filter_count);
-    iter.for_each(|(start, end)| mutable.extend(0, start, end));
-    let data = mutable.freeze();
-    Ok(make_array(data))
-}
-
-/// Returns a new [RecordBatch] with arrays containing only values matching the filter.
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
-pub fn filter_record_batch(
-    record_batch: &RecordBatch,
-    filter: &BooleanArray,
-) -> Result<RecordBatch> {
-    let filter = build_filter(filter)?;
-    let filtered_arrays = record_batch
-        .columns()
-        .iter()
-        .map(|a| make_array(filter(&a.data())))
-        .collect();
-    RecordBatch::try_new(record_batch.schema(), filtered_arrays)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        buffer::Buffer,
-        datatypes::{DataType, Field},
-    };
-
-    macro_rules! def_temporal_test {
-        ($test:ident, $array_type: ident, $data: expr) => {
-            #[test]
-            fn $test() {
-                let a = $data;
-                let b = BooleanArray::from(vec![true, false, true, false]);
-                let c = filter(&a, &b).unwrap();
-                let d = c.as_ref().as_any().downcast_ref::<$array_type>().unwrap();
-                assert_eq!(2, d.len());
-                assert_eq!(1, d.value(0));
-                assert_eq!(3, d.value(1));
-            }
-        };
-    }
-
-    def_temporal_test!(
-        test_filter_date32,
-        Date32Array,
-        Date32Array::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_date64,
-        Date64Array,
-        Date64Array::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time32_second,
-        Time32SecondArray,
-        Time32SecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time32_millisecond,
-        Time32MillisecondArray,
-        Time32MillisecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time64_microsecond,
-        Time64MicrosecondArray,
-        Time64MicrosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_time64_nanosecond,
-        Time64NanosecondArray,
-        Time64NanosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_second,
-        DurationSecondArray,
-        DurationSecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_millisecond,
-        DurationMillisecondArray,
-        DurationMillisecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_microsecond,
-        DurationMicrosecondArray,
-        DurationMicrosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_duration_nanosecond,
-        DurationNanosecondArray,
-        DurationNanosecondArray::from(vec![1, 2, 3, 4])
-    );
-    def_temporal_test!(
-        test_filter_timestamp_second,
-        TimestampSecondArray,
-        TimestampSecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-    def_temporal_test!(
-        test_filter_timestamp_millisecond,
-        TimestampMillisecondArray,
-        TimestampMillisecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-    def_temporal_test!(
-        test_filter_timestamp_microsecond,
-        TimestampMicrosecondArray,
-        TimestampMicrosecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-    def_temporal_test!(
-        test_filter_timestamp_nanosecond,
-        TimestampNanosecondArray,
-        TimestampNanosecondArray::from_vec(vec![1, 2, 3, 4], None)
-    );
-
-    #[test]
-    fn test_filter_array_slice() {
-        let a_slice = Int32Array::from(vec![5, 6, 7, 8, 9]).slice(1, 4);
-        let a = a_slice.as_ref();
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        // filtering with sliced filter array is not currently supported
-        // let b_slice = BooleanArray::from(vec![true, false, false, true, false]).slice(1, 4);
-        // let b = b_slice.as_any().downcast_ref().unwrap();
-        let c = filter(a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(6, d.value(0));
-        assert_eq!(9, d.value(1));
-    }
-
-    #[test]
-    fn test_filter_array_low_density() {
-        // this test exercises the all 0's branch of the filter algorithm
-        let mut data_values = (1..=65).collect::<Vec<i32>>();
-        let mut filter_values =
-            (1..=65).map(|i| matches!(i % 65, 0)).collect::<Vec<bool>>();
-        // set up two more values after the batch
-        data_values.extend_from_slice(&[66, 67]);
-        filter_values.extend_from_slice(&[false, true]);
-        let a = Int32Array::from(data_values);
-        let b = BooleanArray::from(filter_values);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(65, d.value(0));
-        assert_eq!(67, d.value(1));
-    }
-
-    #[test]
-    fn test_filter_array_high_density() {
-        // this test exercises the all 1's branch of the filter algorithm
-        let mut data_values = (1..=65).map(Some).collect::<Vec<_>>();
-        let mut filter_values = (1..=65)
-            .map(|i| !matches!(i % 65, 0))
-            .collect::<Vec<bool>>();
-        // set second data value to null
-        data_values[1] = None;
-        // set up two more values after the batch
-        data_values.extend_from_slice(&[Some(66), None, Some(67), None]);
-        filter_values.extend_from_slice(&[false, true, true, true]);
-        let a = Int32Array::from(data_values);
-        let b = BooleanArray::from(filter_values);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(67, d.len());
-        assert_eq!(3, d.null_count());
-        assert_eq!(1, d.value(0));
-        assert_eq!(true, d.is_null(1));
-        assert_eq!(64, d.value(63));
-        assert_eq!(true, d.is_null(64));
-        assert_eq!(67, d.value(65));
-    }
-
-    #[test]
-    fn test_filter_string_array_simple() {
-        let a = StringArray::from(vec!["hello", " ", "world", "!"]);
-        let b = BooleanArray::from(vec![true, false, true, false]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!("hello", d.value(0));
-        assert_eq!("world", d.value(1));
-    }
-
-    #[test]
-    fn test_filter_primative_array_with_null() {
-        let a = Int32Array::from(vec![Some(5), None]);
-        let b = BooleanArray::from(vec![false, true]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, d.len());
-        assert_eq!(true, d.is_null(0));
-    }
-
-    #[test]
-    fn test_filter_string_array_with_null() {
-        let a = StringArray::from(vec![Some("hello"), None, Some("world"), None]);
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!("hello", d.value(0));
-        assert_eq!(false, d.is_null(0));
-        assert_eq!(true, d.is_null(1));
-    }
-
-    #[test]
-    fn test_filter_binary_array_with_null() {
-        let data: Vec<Option<&[u8]>> = vec![Some(b"hello"), None, Some(b"world"), None];
-        let a = BinaryArray::from(data);
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<BinaryArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(b"hello", d.value(0));
-        assert_eq!(false, d.is_null(0));
-        assert_eq!(true, d.is_null(1));
-    }
-
-    #[test]
-    fn test_filter_array_slice_with_null() {
-        let a_slice =
-            Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]).slice(1, 4);
-        let a = a_slice.as_ref();
-        let b = BooleanArray::from(vec![true, false, false, true]);
-        // filtering with sliced filter array is not currently supported
-        // let b_slice = BooleanArray::from(vec![true, false, false, true, false]).slice(1, 4);
-        // let b = b_slice.as_any().downcast_ref().unwrap();
-        let c = filter(a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!(true, d.is_null(0));
-        assert_eq!(false, d.is_null(1));
-        assert_eq!(9, d.value(1));
-    }
-
-    #[test]
-    fn test_filter_dictionary_array() {
-        let values = vec![Some("hello"), None, Some("world"), Some("!")];
-        let a: Int8DictionaryArray = values.iter().copied().collect();
-        let b = BooleanArray::from(vec![false, true, true, false]);
-        let c = filter(&a, &b).unwrap();
-        let d = c
-            .as_ref()
-            .as_any()
-            .downcast_ref::<Int8DictionaryArray>()
-            .unwrap();
-        let value_array = d.values();
-        let values = value_array.as_any().downcast_ref::<StringArray>().unwrap();
-        // values are cloned in the filtered dictionary array
-        assert_eq!(3, values.len());
-        // but keys are filtered
-        assert_eq!(2, d.len());
-        assert_eq!(true, d.is_null(0));
-        assert_eq!("world", values.value(d.keys().value(1) as usize));
-    }
-
-    #[test]
-    fn test_filter_string_array_with_negated_boolean_array() {
-        let a = StringArray::from(vec!["hello", " ", "world", "!"]);
-        let mut bb = BooleanBuilder::new(2);
-        bb.append_value(false).unwrap();
-        bb.append_value(true).unwrap();
-        bb.append_value(false).unwrap();
-        bb.append_value(true).unwrap();
-        let b = bb.finish();
-        let b = crate::compute::not(&b).unwrap();
-
-        let c = filter(&a, &b).unwrap();
-        let d = c.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, d.len());
-        assert_eq!("hello", d.value(0));
-        assert_eq!("world", d.value(1));
-    }
-
-    #[test]
-    fn test_filter_list_array() {
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 8, 8]);
-
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(4)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from([0b00000111]))
-            .build();
-
-        //  a = [[0, 1, 2], [3, 4, 5], [6, 7], null]
-        let a = LargeListArray::from(list_data);
-        let b = BooleanArray::from(vec![false, true, false, true]);
-        let result = filter(&a, &b).unwrap();
-
-        // expected: [[3, 4, 5], null]
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(3)
-            .add_buffer(Buffer::from_slice_ref(&[3, 4, 5]))
-            .build();
-
-        let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 3]);
-
-        let list_data_type =
-            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
-        let expected = ArrayData::builder(list_data_type)
-            .len(2)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from([0b00000001]))
-            .build();
-
-        assert_eq!(&make_array(expected), &result);
-    }
-
-    #[test]
-    fn test_slice_iterator_bits() {
-        let filter_values = (0..64).map(|i| i == 1).collect::<Vec<bool>>();
-        let filter = BooleanArray::from(filter_values);
-
-        let iter = SlicesIterator::new(&filter);
-        let filter_count = iter.filter_count;
-        let chunks = iter.collect::<Vec<_>>();
-
-        assert_eq!(chunks, vec![(1, 2)]);
-        assert_eq!(filter_count, 1);
-    }
-
-    #[test]
-    fn test_slice_iterator_bits1() {
-        let filter_values = (0..64).map(|i| i != 1).collect::<Vec<bool>>();
-        let filter = BooleanArray::from(filter_values);
-
-        let iter = SlicesIterator::new(&filter);
-        let filter_count = iter.filter_count;
-        let chunks = iter.collect::<Vec<_>>();
-
-        assert_eq!(chunks, vec![(0, 1), (2, 64)]);
-        assert_eq!(filter_count, 64 - 1);
-    }
-
-    #[test]
-    fn test_slice_iterator_chunk_and_bits() {
-        let filter_values = (0..130).map(|i| i % 62 != 0).collect::<Vec<bool>>();
-        let filter = BooleanArray::from(filter_values);
-
-        let iter = SlicesIterator::new(&filter);
-        let filter_count = iter.filter_count;
-        let chunks = iter.collect::<Vec<_>>();
-
-        assert_eq!(chunks, vec![(1, 62), (63, 124), (125, 130)]);
-        assert_eq!(filter_count, 61 + 61 + 5);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/length.rs b/rust/arrow/src/compute/kernels/length.rs
deleted file mode 100644
index 4d704d27078..00000000000
--- a/rust/arrow/src/compute/kernels/length.rs
+++ /dev/null
@@ -1,385 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernel for length of a string array
-
-use crate::{
-    array::*,
-    buffer::Buffer,
-    datatypes::{ArrowNativeType, ArrowPrimitiveType},
-};
-use crate::{
-    datatypes::{DataType, Int32Type, Int64Type},
-    error::{ArrowError, Result},
-};
-
-fn unary_offsets_string<O, F>(
-    array: &GenericStringArray<O>,
-    data_type: DataType,
-    op: F,
-) -> ArrayRef
-where
-    O: StringOffsetSizeTrait + ArrowNativeType,
-    F: Fn(O) -> O,
-{
-    // note: offsets are stored as u8, but they can be interpreted as OffsetSize
-    let offsets = &array.data_ref().buffers()[0];
-    // this is a 30% improvement over iterating over u8s and building OffsetSize, which
-    // justifies the usage of `unsafe`.
-    let slice: &[O] = &unsafe { offsets.typed_data::<O>() }[array.offset()..];
-
-    let lengths = slice.windows(2).map(|offset| op(offset[1] - offset[0]));
-
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size.
-    let buffer = unsafe { Buffer::from_trusted_len_iter(lengths) };
-
-    let null_bit_buffer = array
-        .data_ref()
-        .null_bitmap()
-        .as_ref()
-        .map(|b| b.bits.clone());
-
-    let data = ArrayData::new(
-        data_type,
-        array.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    make_array(data)
-}
-
-fn octet_length<O: StringOffsetSizeTrait, T: ArrowPrimitiveType>(
-    array: &dyn Array,
-) -> ArrayRef
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<O>>()
-        .unwrap();
-    unary_offsets_string::<O, _>(array, T::DATA_TYPE, |x| x)
-}
-
-fn bit_length_impl<O: StringOffsetSizeTrait, T: ArrowPrimitiveType>(
-    array: &dyn Array,
-) -> ArrayRef
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let array = array
-        .as_any()
-        .downcast_ref::<GenericStringArray<O>>()
-        .unwrap();
-    let bits_in_bytes = O::from_usize(8).unwrap();
-    unary_offsets_string::<O, _>(array, T::DATA_TYPE, |x| x * bits_in_bytes)
-}
-
-/// Returns an array of Int32/Int64 denoting the number of bytes in each string in the array.
-///
-/// * this only accepts StringArray/Utf8 and LargeString/LargeUtf8
-/// * length of null is null.
-/// * length is in number of bytes
-pub fn length(array: &Array) -> Result<ArrayRef> {
-    match array.data_type() {
-        DataType::Utf8 => Ok(octet_length::<i32, Int32Type>(array)),
-        DataType::LargeUtf8 => Ok(octet_length::<i64, Int64Type>(array)),
-        _ => Err(ArrowError::ComputeError(format!(
-            "length not supported for {:?}",
-            array.data_type()
-        ))),
-    }
-}
-
-/// Returns an array of Int32/Int64 denoting the number of bits in each string in the array.
-///
-/// * this only accepts StringArray/Utf8 and LargeString/LargeUtf8
-/// * bit_length of null is null.
-/// * bit_length is in number of bits
-pub fn bit_length(array: &Array) -> Result<ArrayRef> {
-    match array.data_type() {
-        DataType::Utf8 => Ok(bit_length_impl::<i32, Int32Type>(array)),
-        DataType::LargeUtf8 => Ok(bit_length_impl::<i64, Int64Type>(array)),
-        _ => Err(ArrowError::ComputeError(format!(
-            "bit_length not supported for {:?}",
-            array.data_type()
-        ))),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
-        fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-            [&v[..], &v[..]].concat()
-        }
-
-        // a large array
-        let mut values = vec!["one", "on", "o", ""];
-        let mut expected = vec![3, 2, 1, 0];
-        for _ in 0..10 {
-            values = double_vec(values);
-            expected = double_vec(expected);
-        }
-
-        vec![
-            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
-            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
-            (vec!["💖"], 1, vec![4]),
-            (values, 4096, expected),
-        ]
-    }
-
-    #[test]
-    fn length_test_string() -> Result<()> {
-        length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn length_test_large_string() -> Result<()> {
-        length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value as i64, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    fn length_null_cases() -> Vec<(Vec<Option<&'static str>>, usize, Vec<Option<i32>>)> {
-        vec![(
-            vec![Some("one"), None, Some("three"), Some("four")],
-            4,
-            vec![Some(3), None, Some(5), Some(4)],
-        )]
-    }
-
-    #[test]
-    fn length_null_string() -> Result<()> {
-        length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-
-                let expected: Int32Array = expected.into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn length_null_large_string() -> Result<()> {
-        length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-
-                // convert to i64
-                let expected: Int64Array = expected
-                    .iter()
-                    .map(|e| e.map(|e| e as i64))
-                    .collect::<Vec<_>>()
-                    .into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    /// Tests that length is not valid for u64.
-    #[test]
-    fn length_wrong_type() {
-        let array: UInt64Array = vec![1u64].into();
-
-        assert!(length(&array).is_err());
-    }
-
-    /// Tests with an offset
-    #[test]
-    fn length_offsets() -> Result<()> {
-        let a = StringArray::from(vec!["hello", " ", "world"]);
-        let b = make_array(
-            ArrayData::builder(DataType::Utf8)
-                .len(2)
-                .offset(1)
-                .buffers(a.data_ref().buffers().to_vec())
-                .build(),
-        );
-        let result = length(b.as_ref())?;
-
-        let expected = Int32Array::from(vec![1, 5]);
-        assert_eq!(expected.data(), result.data());
-
-        Ok(())
-    }
-
-    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
-        fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
-            [&v[..], &v[..]].concat()
-        }
-
-        // a large array
-        let mut values = vec!["one", "on", "o", ""];
-        let mut expected = vec![24, 16, 8, 0];
-        for _ in 0..10 {
-            values = double_vec(values);
-            expected = double_vec(expected);
-        }
-
-        vec![
-            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
-            (vec!["💖"], 1, vec![32]),
-            (vec!["josé"], 1, vec![40]),
-            (values, 4096, expected),
-        ]
-    }
-
-    #[test]
-    fn bit_length_test_string() -> Result<()> {
-        bit_length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn bit_length_test_large_string() -> Result<()> {
-        bit_length_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-                expected.iter().enumerate().for_each(|(i, value)| {
-                    assert_eq!(*value as i64, result.value(i));
-                });
-                Ok(())
-            })
-    }
-
-    fn bit_length_null_cases() -> Vec<(Vec<Option<&'static str>>, usize, Vec<Option<i32>>)>
-    {
-        vec![(
-            vec![Some("one"), None, Some("three"), Some("four")],
-            4,
-            vec![Some(24), None, Some(40), Some(32)],
-        )]
-    }
-
-    #[test]
-    fn bit_length_null_string() -> Result<()> {
-        bit_length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = StringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
-
-                let expected: Int32Array = expected.into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    #[test]
-    fn bit_length_null_large_string() -> Result<()> {
-        bit_length_null_cases()
-            .into_iter()
-            .try_for_each(|(input, len, expected)| {
-                let array = LargeStringArray::from(input);
-                let result = bit_length(&array)?;
-                assert_eq!(len, result.len());
-                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
-
-                // convert to i64
-                let expected: Int64Array = expected
-                    .iter()
-                    .map(|e| e.map(|e| e as i64))
-                    .collect::<Vec<_>>()
-                    .into();
-                assert_eq!(expected.data(), result.data());
-                Ok(())
-            })
-    }
-
-    /// Tests that bit_length is not valid for u64.
-    #[test]
-    fn bit_length_wrong_type() {
-        let array: UInt64Array = vec![1u64].into();
-
-        assert!(bit_length(&array).is_err());
-    }
-
-    /// Tests with an offset
-    #[test]
-    fn bit_length_offsets() -> Result<()> {
-        let a = StringArray::from(vec!["hello", " ", "world"]);
-        let b = make_array(
-            ArrayData::builder(DataType::Utf8)
-                .len(2)
-                .offset(1)
-                .buffers(a.data_ref().buffers().to_vec())
-                .build(),
-        );
-        let result = bit_length(b.as_ref())?;
-
-        let expected = Int32Array::from(vec![8, 40]);
-        assert_eq!(expected.data(), result.data());
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/limit.rs b/rust/arrow/src/compute/kernels/limit.rs
deleted file mode 100644
index 4b4b08572a2..00000000000
--- a/rust/arrow/src/compute/kernels/limit.rs
+++ /dev/null
@@ -1,200 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines miscellaneous array kernels.
-
-use crate::array::ArrayRef;
-
-/// Returns the array, taking only the number of elements specified
-///
-/// Limit performs a zero-copy slice of the array, and is a convenience method on slice
-/// where:
-/// * it performs a bounds-check on the array
-/// * it slices from offset 0
-pub fn limit(array: &ArrayRef, num_elements: usize) -> ArrayRef {
-    let lim = num_elements.min(array.len());
-    array.slice(0, lim)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::*;
-    use crate::buffer::Buffer;
-    use crate::datatypes::{DataType, Field};
-    use crate::util::bit_util;
-
-    use std::sync::Arc;
-
-    #[test]
-    fn test_limit_array() {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![5, 6, 7, 8, 9]));
-        let b = limit(&a, 3);
-        let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(3, c.len());
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-    }
-
-    #[test]
-    fn test_limit_string_array() {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["hello", " ", "world", "!"]));
-        let b = limit(&a, 2);
-        let c = b.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, c.len());
-        assert_eq!("hello", c.value(0));
-        assert_eq!(" ", c.value(1));
-    }
-
-    #[test]
-    fn test_limit_array_with_null() {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, Some(5)]));
-        let b = limit(&a, 1);
-        let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(1, c.len());
-        assert_eq!(true, c.is_null(0));
-    }
-
-    #[test]
-    fn test_limit_array_with_limit_too_large() {
-        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
-        let a_ref: ArrayRef = Arc::new(a);
-        let b = limit(&a_ref, 6);
-        let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
-
-        assert_eq!(5, c.len());
-        assert_eq!(5, c.value(0));
-        assert_eq!(6, c.value(1));
-        assert_eq!(7, c.value(2));
-        assert_eq!(8, c.value(3));
-        assert_eq!(9, c.value(4));
-    }
-
-    #[test]
-    fn test_list_array_limit() {
-        // adapted from crate::array::test::test_list_array_slice
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(10)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1], null, [2, 3], null, [4, 5], null, [6, 7, 8], null, [9]]
-        let value_offsets = Buffer::from_slice_ref(&[0, 2, 2, 4, 4, 6, 6, 9, 9, 10]);
-        // 01010101 00000001
-        let mut null_bits: [u8; 2] = [0; 2];
-        bit_util::set_bit(&mut null_bits, 0);
-        bit_util::set_bit(&mut null_bits, 2);
-        bit_util::set_bit(&mut null_bits, 4);
-        bit_util::set_bit(&mut null_bits, 6);
-        bit_util::set_bit(&mut null_bits, 8);
-
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(9)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .null_bit_buffer(Buffer::from(null_bits))
-            .build();
-        let list_array: ArrayRef = Arc::new(ListArray::from(list_data));
-
-        let limit_array = limit(&list_array, 6);
-        assert_eq!(6, limit_array.len());
-        assert_eq!(0, limit_array.offset());
-        assert_eq!(3, limit_array.null_count());
-
-        // Check offset and length for each non-null value.
-        let limit_array: &ListArray =
-            limit_array.as_any().downcast_ref::<ListArray>().unwrap();
-
-        for i in 0..limit_array.len() {
-            let offset = limit_array.value_offsets()[i];
-            let length = limit_array.value_length(i);
-            if i % 2 == 0 {
-                assert_eq!(2, length);
-                assert_eq!(i as i32, offset);
-            } else {
-                assert_eq!(0, length);
-            }
-        }
-    }
-
-    #[test]
-    fn test_struct_array_limit() {
-        // adapted from crate::array::test::test_struct_array_slice
-        let boolean_data = ArrayData::builder(DataType::Boolean)
-            .len(5)
-            .add_buffer(Buffer::from([0b00010000]))
-            .null_bit_buffer(Buffer::from([0b00010001]))
-            .build();
-        let int_data = ArrayData::builder(DataType::Int32)
-            .len(5)
-            .add_buffer(Buffer::from_slice_ref(&[0, 28, 42, 0, 0]))
-            .null_bit_buffer(Buffer::from([0b00000110]))
-            .build();
-
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, false));
-        field_types.push(Field::new("b", DataType::Int32, false));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            .add_child_data(boolean_data.clone())
-            .add_child_data(int_data.clone())
-            .null_bit_buffer(Buffer::from([0b00010111]))
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(5, struct_array.len());
-        assert_eq!(1, struct_array.null_count());
-        assert_eq!(&boolean_data, struct_array.column(0).data());
-        assert_eq!(&int_data, struct_array.column(1).data());
-
-        let array: ArrayRef = Arc::new(struct_array);
-
-        let sliced_array = limit(&array, 3);
-        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(3, sliced_array.len());
-        assert_eq!(0, sliced_array.offset());
-        assert_eq!(0, sliced_array.null_count());
-        assert!(sliced_array.is_valid(0));
-        assert!(sliced_array.is_valid(1));
-        assert!(sliced_array.is_valid(2));
-
-        let sliced_c0 = sliced_array.column(0);
-        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(3, sliced_c0.len());
-        assert_eq!(0, sliced_c0.offset());
-        assert_eq!(2, sliced_c0.null_count());
-        assert!(sliced_c0.is_valid(0));
-        assert!(sliced_c0.is_null(1));
-        assert!(sliced_c0.is_null(2));
-        assert_eq!(false, sliced_c0.value(0));
-
-        let sliced_c1 = sliced_array.column(1);
-        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
-        assert_eq!(3, sliced_c1.len());
-        assert_eq!(0, sliced_c1.offset());
-        assert_eq!(1, sliced_c1.null_count());
-        assert!(sliced_c1.is_null(0));
-        assert_eq!(28, sliced_c1.value(1));
-        assert_eq!(42, sliced_c1.value(2));
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/mod.rs b/rust/arrow/src/compute/kernels/mod.rs
deleted file mode 100644
index 862f55fe2f2..00000000000
--- a/rust/arrow/src/compute/kernels/mod.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Computation kernels on Arrow Arrays
-
-pub mod aggregate;
-pub mod arithmetic;
-pub mod arity;
-pub mod boolean;
-pub mod cast;
-pub mod cast_utils;
-pub mod comparison;
-pub mod concat;
-pub mod filter;
-pub mod length;
-pub mod limit;
-pub mod regexp;
-pub mod sort;
-pub mod substring;
-pub mod take;
-pub mod temporal;
-pub mod window;
-pub mod zip;
diff --git a/rust/arrow/src/compute/kernels/regexp.rs b/rust/arrow/src/compute/kernels/regexp.rs
deleted file mode 100644
index 446d71d9f4a..00000000000
--- a/rust/arrow/src/compute/kernels/regexp.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernel to extract substrings based on a regular
-//! expression of a \[Large\]StringArray
-
-use crate::array::{
-    ArrayRef, GenericStringArray, GenericStringBuilder, ListBuilder,
-    StringOffsetSizeTrait,
-};
-use crate::error::{ArrowError, Result};
-use std::collections::HashMap;
-
-use std::sync::Arc;
-
-use regex::Regex;
-
-/// Extract all groups matched by a regular expression for a given String array.
-pub fn regexp_match<OffsetSize: StringOffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    regex_array: &GenericStringArray<OffsetSize>,
-    flags_array: Option<&GenericStringArray<OffsetSize>>,
-) -> Result<ArrayRef> {
-    let mut patterns: HashMap<String, Regex> = HashMap::new();
-    let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::new(0);
-    let mut list_builder = ListBuilder::new(builder);
-
-    let complete_pattern = match flags_array {
-        Some(flags) => Box::new(regex_array.iter().zip(flags.iter()).map(
-            |(pattern, flags)| {
-                pattern.map(|pattern| match flags {
-                    Some(value) => format!("(?{}){}", value, pattern),
-                    None => pattern.to_string(),
-                })
-            },
-        )) as Box<dyn Iterator<Item = Option<String>>>,
-        None => Box::new(
-            regex_array
-                .iter()
-                .map(|pattern| pattern.map(|pattern| pattern.to_string())),
-        ),
-    };
-    array
-        .iter()
-        .zip(complete_pattern)
-        .map(|(value, pattern)| {
-            match (value, pattern) {
-                // Required for Postgres compatibility:
-                // SELECT regexp_match('foobarbequebaz', ''); = {""}
-                (Some(_), Some(pattern)) if pattern == *"" => {
-                    list_builder.values().append_value("")?;
-                    list_builder.append(true)?;
-                }
-                (Some(value), Some(pattern)) => {
-                    let existing_pattern = patterns.get(&pattern);
-                    let re = match existing_pattern {
-                        Some(re) => re.clone(),
-                        None => {
-                            let re = Regex::new(pattern.as_str()).map_err(|e| {
-                                ArrowError::ComputeError(format!(
-                                    "Regular expression did not compile: {:?}",
-                                    e
-                                ))
-                            })?;
-                            patterns.insert(pattern, re.clone());
-                            re
-                        }
-                    };
-                    match re.captures(value) {
-                        Some(caps) => {
-                            for m in caps.iter().skip(1) {
-                                if let Some(v) = m {
-                                    list_builder.values().append_value(v.as_str())?;
-                                }
-                            }
-                            list_builder.append(true)?
-                        }
-                        None => list_builder.append(false)?,
-                    }
-                }
-                _ => list_builder.append(false)?,
-            }
-            Ok(())
-        })
-        .collect::<Result<Vec<()>>>()?;
-    Ok(Arc::new(list_builder.finish()))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::{ListArray, StringArray};
-
-    #[test]
-    fn match_single_group() -> Result<()> {
-        let values = vec![
-            Some("abc-005-def"),
-            Some("X-7-5"),
-            Some("X545"),
-            None,
-            Some("foobarbequebaz"),
-            Some("foobarbequebaz"),
-        ];
-        let array = StringArray::from(values);
-        let mut pattern_values = vec![r".*-(\d*)-.*"; 4];
-        pattern_values.push(r"(bar)(bequ1e)");
-        pattern_values.push("");
-        let pattern = StringArray::from(pattern_values);
-        let actual = regexp_match(&array, &pattern, None)?;
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new(0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.values().append_value("005")?;
-        expected_builder.append(true)?;
-        expected_builder.values().append_value("7")?;
-        expected_builder.append(true)?;
-        expected_builder.append(false)?;
-        expected_builder.append(false)?;
-        expected_builder.append(false)?;
-        expected_builder.values().append_value("")?;
-        expected_builder.append(true)?;
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
-        Ok(())
-    }
-
-    #[test]
-    fn match_single_group_with_flags() -> Result<()> {
-        let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None];
-        let array = StringArray::from(values);
-        let pattern = StringArray::from(vec![r"x.*-(\d*)-.*"; 4]);
-        let flags = StringArray::from(vec!["i"; 4]);
-        let actual = regexp_match(&array, &pattern, Some(&flags))?;
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new(0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.append(false)?;
-        expected_builder.values().append_value("7")?;
-        expected_builder.append(true)?;
-        expected_builder.append(false)?;
-        expected_builder.append(false)?;
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs
deleted file mode 100644
index bf8eda353e6..00000000000
--- a/rust/arrow/src/compute/kernels/sort.rs
+++ /dev/null
@@ -1,2246 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines sort kernel for `ArrayRef`
-
-use std::cmp::Ordering;
-
-use crate::array::*;
-use crate::buffer::MutableBuffer;
-use crate::compute::take;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-use TimeUnit::*;
-
-/// Sort the `ArrayRef` using `SortOptions`.
-///
-/// Performs a stable sort on values and indices. Nulls are ordered according to the `nulls_first` flag in `options`.
-/// Floats are sorted using IEEE 754 totalOrder
-///
-/// Returns an `ArrowError::ComputeError(String)` if the array type is either unsupported by `sort_to_indices` or `take`.
-///
-/// # Example
-/// ```rust
-/// # use std::sync::Arc;
-/// # use arrow::array::{Int32Array, ArrayRef};
-/// # use arrow::error::Result;
-/// # use arrow::compute::kernels::sort::sort;
-/// # fn main() -> Result<()> {
-/// let array: ArrayRef = Arc::new(Int32Array::from(vec![5, 4, 3, 2, 1]));
-/// let sorted_array = sort(&array, None).unwrap();
-/// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(sorted_array, &Int32Array::from(vec![1, 2, 3, 4, 5]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> {
-    let indices = sort_to_indices(values, options, None)?;
-    take(values.as_ref(), &indices, None)
-}
-
-/// Sort the `ArrayRef` partially.
-///
-/// If `limit` is specified, the resulting array will contain only
-/// first `limit` in the sort order. Any data data after the limit
-/// will be discarded.
-///
-/// Note: this is an unstable_sort, meaning it may not preserve the
-/// order of equal elements.
-///
-/// # Example
-/// ```rust
-/// # use std::sync::Arc;
-/// # use arrow::array::{Int32Array, ArrayRef};
-/// # use arrow::error::Result;
-/// # use arrow::compute::kernels::sort::{sort_limit, SortOptions};
-/// # fn main() -> Result<()> {
-/// let array: ArrayRef = Arc::new(Int32Array::from(vec![5, 4, 3, 2, 1]));
-///
-/// // Find the the top 2 items
-/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
-/// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(sorted_array, &Int32Array::from(vec![1, 2]));
-///
-/// // Find the bottom top 2 items
-/// let options = Some(SortOptions {
-///                  descending: true,
-///                  ..Default::default()
-///               });
-/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
-/// let sorted_array = sorted_array.as_any().downcast_ref::<Int32Array>().unwrap();
-/// assert_eq!(sorted_array, &Int32Array::from(vec![5, 4]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn sort_limit(
-    values: &ArrayRef,
-    options: Option<SortOptions>,
-    limit: Option<usize>,
-) -> Result<ArrayRef> {
-    let indices = sort_to_indices(values, options, limit)?;
-    take(values.as_ref(), &indices, None)
-}
-
-#[inline]
-fn sort_by<T, F>(array: &mut [T], limit: usize, cmp: F)
-where
-    F: FnMut(&T, &T) -> Ordering,
-{
-    if array.len() == limit {
-        array.sort_by(cmp);
-    } else {
-        partial_sort(array, limit, cmp);
-    }
-}
-
-// implements comparison using IEEE 754 total ordering for f32
-// Original implementation from https://doc.rust-lang.org/std/primitive.f64.html#method.total_cmp
-// TODO to change to use std when it becomes stable
-fn total_cmp_32(l: f32, r: f32) -> std::cmp::Ordering {
-    let mut left = l.to_bits() as i32;
-    let mut right = r.to_bits() as i32;
-
-    left ^= (((left >> 31) as u32) >> 1) as i32;
-    right ^= (((right >> 31) as u32) >> 1) as i32;
-
-    left.cmp(&right)
-}
-
-// implements comparison using IEEE 754 total ordering for f64
-// Original implementation from https://doc.rust-lang.org/std/primitive.f64.html#method.total_cmp
-// TODO to change to use std when it becomes stable
-fn total_cmp_64(l: f64, r: f64) -> std::cmp::Ordering {
-    let mut left = l.to_bits() as i64;
-    let mut right = r.to_bits() as i64;
-
-    left ^= (((left >> 63) as u64) >> 1) as i64;
-    right ^= (((right >> 63) as u64) >> 1) as i64;
-
-    left.cmp(&right)
-}
-
-fn cmp<T>(l: T, r: T) -> std::cmp::Ordering
-where
-    T: Ord,
-{
-    l.cmp(&r)
-}
-
-// partition indices into valid and null indices
-fn partition_validity(array: &ArrayRef) -> (Vec<u32>, Vec<u32>) {
-    match array.null_count() {
-        // faster path
-        0 => ((0..(array.len() as u32)).collect(), vec![]),
-        _ => {
-            let indices = 0..(array.len() as u32);
-            indices.partition(|index| array.is_valid(*index as usize))
-        }
-    }
-}
-
-/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value
-/// limit is an option for partial_sort
-pub fn sort_to_indices(
-    values: &ArrayRef,
-    options: Option<SortOptions>,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let options = options.unwrap_or_default();
-
-    let (v, n) = partition_validity(values);
-
-    match values.data_type() {
-        DataType::Boolean => sort_boolean(values, v, n, &options, limit),
-        DataType::Int8 => {
-            sort_primitive::<Int8Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Int16 => {
-            sort_primitive::<Int16Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Int32 => {
-            sort_primitive::<Int32Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Int64 => {
-            sort_primitive::<Int64Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt8 => {
-            sort_primitive::<UInt8Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt16 => {
-            sort_primitive::<UInt16Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt32 => {
-            sort_primitive::<UInt32Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::UInt64 => {
-            sort_primitive::<UInt64Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Float32 => {
-            sort_primitive::<Float32Type, _>(values, v, n, total_cmp_32, &options, limit)
-        }
-        DataType::Float64 => {
-            sort_primitive::<Float64Type, _>(values, v, n, total_cmp_64, &options, limit)
-        }
-        DataType::Date32 => {
-            sort_primitive::<Date32Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Date64 => {
-            sort_primitive::<Date64Type, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time32(Second) => {
-            sort_primitive::<Time32SecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time32(Millisecond) => {
-            sort_primitive::<Time32MillisecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time64(Microsecond) => {
-            sort_primitive::<Time64MicrosecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Time64(Nanosecond) => {
-            sort_primitive::<Time64NanosecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Timestamp(Second, _) => {
-            sort_primitive::<TimestampSecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Timestamp(Millisecond, _) => {
-            sort_primitive::<TimestampMillisecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Timestamp(Microsecond, _) => {
-            sort_primitive::<TimestampMicrosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Timestamp(Nanosecond, _) => {
-            sort_primitive::<TimestampNanosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            sort_primitive::<IntervalYearMonthType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            sort_primitive::<IntervalDayTimeType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            sort_primitive::<DurationSecondType, _>(values, v, n, cmp, &options, limit)
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            sort_primitive::<DurationMillisecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            sort_primitive::<DurationMicrosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            sort_primitive::<DurationNanosecondType, _>(
-                values, v, n, cmp, &options, limit,
-            )
-        }
-        DataType::Utf8 => sort_string(values, v, n, &options, limit),
-        DataType::List(field) => match field.data_type() {
-            DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, &options, limit),
-            DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, &options, limit),
-            DataType::Int32 => sort_list::<i32, Int32Type>(values, v, n, &options, limit),
-            DataType::Int64 => sort_list::<i32, Int64Type>(values, v, n, &options, limit),
-            DataType::UInt8 => sort_list::<i32, UInt8Type>(values, v, n, &options, limit),
-            DataType::UInt16 => {
-                sort_list::<i32, UInt16Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt32 => {
-                sort_list::<i32, UInt32Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt64 => {
-                sort_list::<i32, UInt64Type>(values, v, n, &options, limit)
-            }
-            t => Err(ArrowError::ComputeError(format!(
-                "Sort not supported for list type {:?}",
-                t
-            ))),
-        },
-        DataType::LargeList(field) => match field.data_type() {
-            DataType::Int8 => sort_list::<i64, Int8Type>(values, v, n, &options, limit),
-            DataType::Int16 => sort_list::<i64, Int16Type>(values, v, n, &options, limit),
-            DataType::Int32 => sort_list::<i64, Int32Type>(values, v, n, &options, limit),
-            DataType::Int64 => sort_list::<i64, Int64Type>(values, v, n, &options, limit),
-            DataType::UInt8 => sort_list::<i64, UInt8Type>(values, v, n, &options, limit),
-            DataType::UInt16 => {
-                sort_list::<i64, UInt16Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt32 => {
-                sort_list::<i64, UInt32Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt64 => {
-                sort_list::<i64, UInt64Type>(values, v, n, &options, limit)
-            }
-            t => Err(ArrowError::ComputeError(format!(
-                "Sort not supported for list type {:?}",
-                t
-            ))),
-        },
-        DataType::FixedSizeList(field, _) => match field.data_type() {
-            DataType::Int8 => sort_list::<i32, Int8Type>(values, v, n, &options, limit),
-            DataType::Int16 => sort_list::<i32, Int16Type>(values, v, n, &options, limit),
-            DataType::Int32 => sort_list::<i32, Int32Type>(values, v, n, &options, limit),
-            DataType::Int64 => sort_list::<i32, Int64Type>(values, v, n, &options, limit),
-            DataType::UInt8 => sort_list::<i32, UInt8Type>(values, v, n, &options, limit),
-            DataType::UInt16 => {
-                sort_list::<i32, UInt16Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt32 => {
-                sort_list::<i32, UInt32Type>(values, v, n, &options, limit)
-            }
-            DataType::UInt64 => {
-                sort_list::<i32, UInt64Type>(values, v, n, &options, limit)
-            }
-            t => Err(ArrowError::ComputeError(format!(
-                "Sort not supported for list type {:?}",
-                t
-            ))),
-        },
-        DataType::Dictionary(key_type, value_type)
-            if *value_type.as_ref() == DataType::Utf8 =>
-        {
-            match key_type.as_ref() {
-                DataType::Int8 => {
-                    sort_string_dictionary::<Int8Type>(values, v, n, &options, limit)
-                }
-                DataType::Int16 => {
-                    sort_string_dictionary::<Int16Type>(values, v, n, &options, limit)
-                }
-                DataType::Int32 => {
-                    sort_string_dictionary::<Int32Type>(values, v, n, &options, limit)
-                }
-                DataType::Int64 => {
-                    sort_string_dictionary::<Int64Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt8 => {
-                    sort_string_dictionary::<UInt8Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt16 => {
-                    sort_string_dictionary::<UInt16Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt32 => {
-                    sort_string_dictionary::<UInt32Type>(values, v, n, &options, limit)
-                }
-                DataType::UInt64 => {
-                    sort_string_dictionary::<UInt64Type>(values, v, n, &options, limit)
-                }
-                t => Err(ArrowError::ComputeError(format!(
-                    "Sort not supported for dictionary key type {:?}",
-                    t
-                ))),
-            }
-        }
-        t => Err(ArrowError::ComputeError(format!(
-            "Sort not supported for data type {:?}",
-            t
-        ))),
-    }
-}
-
-/// Options that define how sort kernels should behave
-#[derive(Clone, Copy, Debug)]
-pub struct SortOptions {
-    /// Whether to sort in descending order
-    pub descending: bool,
-    /// Whether to sort nulls first
-    pub nulls_first: bool,
-}
-
-impl Default for SortOptions {
-    fn default() -> Self {
-        Self {
-            descending: false,
-            // default to nulls first to match spark's behavior
-            nulls_first: true,
-        }
-    }
-}
-
-/// Sort primitive values
-#[allow(clippy::unnecessary_wraps)]
-fn sort_boolean(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let values = values
-        .as_any()
-        .downcast_ref::<BooleanArray>()
-        .expect("Unable to downcast to boolean array");
-    let descending = options.descending;
-
-    // create tuples that are used for sorting
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, values.value(index as usize)))
-        .collect::<Vec<(u32, bool)>>();
-
-    let mut nulls = null_indices;
-
-    let valids_len = valids.len();
-    let nulls_len = nulls.len();
-
-    let mut len = values.len();
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1));
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
-    }
-
-    // collect results directly into a buffer instead of a vec to avoid another aligned allocation
-    let mut result = MutableBuffer::new(values.len() * std::mem::size_of::<u32>());
-    // sets len to capacity so we can access the whole buffer as a typed slice
-    result.resize(values.len() * std::mem::size_of::<u32>(), 0);
-    let result_slice: &mut [u32] = result.typed_data_mut();
-
-    debug_assert_eq!(result_slice.len(), nulls_len + valids_len);
-
-    if options.nulls_first {
-        let size = nulls_len.min(len);
-        result_slice[0..nulls_len.min(len)].copy_from_slice(&nulls);
-        if nulls_len < len {
-            insert_valid_values(result_slice, nulls_len, &valids[0..len - size]);
-        }
-    } else {
-        // nulls last
-        let size = valids.len().min(len);
-        insert_valid_values(result_slice, 0, &valids[0..size]);
-        if len > size {
-            result_slice[valids_len..].copy_from_slice(&nulls[0..(len - valids_len)]);
-        }
-    }
-
-    let result_data = ArrayData::new(
-        DataType::UInt32,
-        len,
-        Some(0),
-        None,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-
-    Ok(UInt32Array::from(result_data))
-}
-
-/// Sort primitive values
-#[allow(clippy::unnecessary_wraps)]
-fn sort_primitive<T, F>(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    cmp: F,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array>
-where
-    T: ArrowPrimitiveType,
-    T::Native: std::cmp::PartialOrd,
-    F: Fn(T::Native, T::Native) -> std::cmp::Ordering,
-{
-    let values = as_primitive_array::<T>(values);
-    let descending = options.descending;
-
-    // create tuples that are used for sorting
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, values.value(index as usize)))
-        .collect::<Vec<(u32, T::Native)>>();
-
-    let mut nulls = null_indices;
-
-    let valids_len = valids.len();
-    let nulls_len = nulls.len();
-    let mut len = values.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1));
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
-    }
-
-    // collect results directly into a buffer instead of a vec to avoid another aligned allocation
-    let mut result = MutableBuffer::new(values.len() * std::mem::size_of::<u32>());
-    // sets len to capacity so we can access the whole buffer as a typed slice
-    result.resize(values.len() * std::mem::size_of::<u32>(), 0);
-    let result_slice: &mut [u32] = result.typed_data_mut();
-
-    debug_assert_eq!(result_slice.len(), nulls_len + valids_len);
-
-    if options.nulls_first {
-        let size = nulls_len.min(len);
-        result_slice[0..nulls_len.min(len)].copy_from_slice(&nulls);
-        if nulls_len < len {
-            insert_valid_values(result_slice, nulls_len, &valids[0..len - size]);
-        }
-    } else {
-        // nulls last
-        let size = valids.len().min(len);
-        insert_valid_values(result_slice, 0, &valids[0..size]);
-        if len > size {
-            result_slice[valids_len..].copy_from_slice(&nulls[0..(len - valids_len)]);
-        }
-    }
-
-    let result_data = ArrayData::new(
-        DataType::UInt32,
-        len,
-        Some(0),
-        None,
-        0,
-        vec![result.into()],
-        vec![],
-    );
-
-    Ok(UInt32Array::from(result_data))
-}
-
-// insert valid and nan values in the correct order depending on the descending flag
-fn insert_valid_values<T>(result_slice: &mut [u32], offset: usize, valids: &[(u32, T)]) {
-    let valids_len = valids.len();
-    // helper to append the index part of the valid tuples
-    let append_valids = move |dst_slice: &mut [u32]| {
-        debug_assert_eq!(dst_slice.len(), valids_len);
-        dst_slice
-            .iter_mut()
-            .zip(valids.iter())
-            .for_each(|(dst, src)| *dst = src.0)
-    };
-
-    append_valids(&mut result_slice[offset..offset + valids.len()]);
-}
-
-/// Sort strings
-fn sort_string(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let values = as_string_array(values);
-
-    sort_string_helper(
-        values,
-        value_indices,
-        null_indices,
-        options,
-        limit,
-        |array, idx| array.value(idx as usize),
-    )
-}
-
-/// Sort dictionary encoded strings
-fn sort_string_dictionary<T: ArrowDictionaryKeyType>(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    let values: &DictionaryArray<T> = as_dictionary_array::<T>(values);
-
-    let keys: &PrimitiveArray<T> = &values.keys_array();
-
-    let dict = values.values();
-    let dict: &StringArray = as_string_array(&dict);
-
-    sort_string_helper(
-        keys,
-        value_indices,
-        null_indices,
-        options,
-        limit,
-        |array: &PrimitiveArray<T>, idx| -> &str {
-            let key: T::Native = array.value(idx as usize);
-            dict.value(key.to_usize().unwrap())
-        },
-    )
-}
-
-/// shared implementation between dictionary encoded and plain string arrays
-#[inline]
-#[allow(clippy::unnecessary_wraps)]
-fn sort_string_helper<'a, A: Array, F>(
-    values: &'a A,
-    value_indices: Vec<u32>,
-    null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-    value_fn: F,
-) -> Result<UInt32Array>
-where
-    F: Fn(&'a A, u32) -> &str,
-{
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, value_fn(&values, index)))
-        .collect::<Vec<(u32, &str)>>();
-    let mut nulls = null_indices;
-    let descending = options.descending;
-    let mut len = values.len();
-    let nulls_len = nulls.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1));
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
-    }
-    // collect the order of valid tuplies
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
-
-    if options.nulls_first {
-        nulls.append(&mut valid_indices);
-        nulls.truncate(len);
-        return Ok(UInt32Array::from(nulls));
-    }
-
-    // no need to sort nulls as they are in the correct order already
-    valid_indices.append(&mut nulls);
-    valid_indices.truncate(len);
-    Ok(UInt32Array::from(valid_indices))
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn sort_list<S, T>(
-    values: &ArrayRef,
-    value_indices: Vec<u32>,
-    mut null_indices: Vec<u32>,
-    options: &SortOptions,
-    limit: Option<usize>,
-) -> Result<UInt32Array>
-where
-    S: OffsetSizeTrait,
-    T: ArrowPrimitiveType,
-    T::Native: std::cmp::PartialOrd,
-{
-    let mut valids: Vec<(u32, ArrayRef)> = values
-        .as_any()
-        .downcast_ref::<FixedSizeListArray>()
-        .map_or_else(
-            || {
-                let values = as_generic_list_array::<S>(values);
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-            |values| {
-                value_indices
-                    .iter()
-                    .copied()
-                    .map(|index| (index, values.value(index as usize)))
-                    .collect()
-            },
-        );
-
-    let mut len = values.len();
-    let nulls_len = null_indices.len();
-    let descending = options.descending;
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    if !descending {
-        sort_by(&mut valids, len - nulls_len, |a, b| {
-            cmp_array(a.1.as_ref(), b.1.as_ref())
-        });
-    } else {
-        sort_by(&mut valids, len - nulls_len, |a, b| {
-            cmp_array(a.1.as_ref(), b.1.as_ref()).reverse()
-        });
-        // reverse to keep a stable ordering
-        null_indices.reverse();
-    }
-
-    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
-    if options.nulls_first {
-        null_indices.append(&mut valid_indices);
-        null_indices.truncate(len);
-        return Ok(UInt32Array::from(null_indices));
-    }
-
-    valid_indices.append(&mut null_indices);
-    valid_indices.truncate(len);
-    Ok(UInt32Array::from(valid_indices))
-}
-
-/// Compare two `Array`s based on the ordering defined in [ord](crate::array::ord).
-fn cmp_array(a: &Array, b: &Array) -> Ordering {
-    let cmp_op = build_compare(a, b).unwrap();
-    let length = a.len().max(b.len());
-
-    for i in 0..length {
-        let result = cmp_op(i, i);
-        if result != Ordering::Equal {
-            return result;
-        }
-    }
-    Ordering::Equal
-}
-
-/// One column to be used in lexicographical sort
-#[derive(Clone, Debug)]
-pub struct SortColumn {
-    pub values: ArrayRef,
-    pub options: Option<SortOptions>,
-}
-
-/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
-///
-/// Performs a stable lexicographical sort on values and indices.
-///
-/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
-/// `lexsort_to_indices` or `take`.
-///
-/// Example:
-///
-/// ```
-/// use std::convert::From;
-/// use std::sync::Arc;
-/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, as_primitive_array};
-/// use arrow::compute::kernels::sort::{SortColumn, SortOptions, lexsort};
-/// use arrow::datatypes::Int64Type;
-///
-/// let sorted_columns = lexsort(&vec![
-///     SortColumn {
-///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-///             None,
-///             Some(-2),
-///             Some(89),
-///             Some(-64),
-///             Some(101),
-///         ])) as ArrayRef,
-///         options: None,
-///     },
-///     SortColumn {
-///         values: Arc::new(StringArray::from(vec![
-///             Some("hello"),
-///             Some("world"),
-///             Some(","),
-///             Some("foobar"),
-///             Some("!"),
-///         ])) as ArrayRef,
-///         options: Some(SortOptions {
-///             descending: true,
-///             nulls_first: false,
-///         }),
-///     },
-/// ], None).unwrap();
-///
-/// assert_eq!(as_primitive_array::<Int64Type>(&sorted_columns[0]).value(1), -64);
-/// assert!(sorted_columns[0].is_null(0));
-/// ```
-pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>> {
-    let indices = lexsort_to_indices(columns, limit)?;
-    columns
-        .iter()
-        .map(|c| take(c.values.as_ref(), &indices, None))
-        .collect()
-}
-
-/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
-/// (`UInt32Array`) of indices.
-pub fn lexsort_to_indices(
-    columns: &[SortColumn],
-    limit: Option<usize>,
-) -> Result<UInt32Array> {
-    if columns.is_empty() {
-        return Err(ArrowError::InvalidArgumentError(
-            "Sort requires at least one column".to_string(),
-        ));
-    }
-    if columns.len() == 1 {
-        // fallback to non-lexical sort
-        let column = &columns[0];
-        return sort_to_indices(&column.values, column.options, limit);
-    }
-
-    let row_count = columns[0].values.len();
-    if columns.iter().any(|item| item.values.len() != row_count) {
-        return Err(ArrowError::ComputeError(
-            "lexical sort columns have different row counts".to_string(),
-        ));
-    };
-
-    // map to data and DynComparator
-    let flat_columns = columns
-        .iter()
-        .map(
-            |column| -> Result<(&ArrayData, DynComparator, SortOptions)> {
-                // flatten and convert build comparators
-                // use ArrayData for is_valid checks later to avoid dynamic call
-                let values = column.values.as_ref();
-                let data = values.data_ref();
-                Ok((
-                    data,
-                    build_compare(values, values)?,
-                    column.options.unwrap_or_default(),
-                ))
-            },
-        )
-        .collect::<Result<Vec<(&ArrayData, DynComparator, SortOptions)>>>()?;
-
-    let lex_comparator = |a_idx: &usize, b_idx: &usize| -> Ordering {
-        for (data, comparator, sort_option) in flat_columns.iter() {
-            match (data.is_valid(*a_idx), data.is_valid(*b_idx)) {
-                (true, true) => {
-                    match (comparator)(*a_idx, *b_idx) {
-                        // equal, move on to next column
-                        Ordering::Equal => continue,
-                        order => {
-                            if sort_option.descending {
-                                return order.reverse();
-                            } else {
-                                return order;
-                            }
-                        }
-                    }
-                }
-                (false, true) => {
-                    return if sort_option.nulls_first {
-                        Ordering::Less
-                    } else {
-                        Ordering::Greater
-                    };
-                }
-                (true, false) => {
-                    return if sort_option.nulls_first {
-                        Ordering::Greater
-                    } else {
-                        Ordering::Less
-                    };
-                }
-                // equal, move on to next column
-                (false, false) => continue,
-            }
-        }
-
-        Ordering::Equal
-    };
-
-    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
-    let mut len = value_indices.len();
-
-    if let Some(limit) = limit {
-        len = limit.min(len);
-    }
-    sort_by(&mut value_indices, len, lex_comparator);
-
-    Ok(UInt32Array::from(
-        (&value_indices)[0..len]
-            .iter()
-            .map(|i| *i as u32)
-            .collect::<Vec<u32>>(),
-    ))
-}
-
-/// It's unstable_sort, may not preserve the order of equal elements
-pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
-where
-    F: FnMut(&T, &T) -> Ordering,
-{
-    let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less);
-    before.sort_unstable_by(is_less);
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::compute::util::tests::{
-        build_fixed_size_list_nullable, build_generic_list_nullable,
-    };
-    use rand::rngs::StdRng;
-    use rand::{Rng, RngCore, SeedableRng};
-    use std::convert::TryFrom;
-    use std::iter::FromIterator;
-    use std::sync::Arc;
-
-    fn test_sort_to_indices_boolean_arrays(
-        data: Vec<Option<bool>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<u32>,
-    ) {
-        let output = BooleanArray::from(data);
-        let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
-        assert_eq!(output, expected)
-    }
-
-    fn test_sort_to_indices_primitive_arrays<T>(
-        data: Vec<Option<T::Native>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<u32>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
-        assert_eq!(output, expected)
-    }
-
-    fn test_sort_primitive_arrays<T>(
-        data: Vec<Option<T::Native>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<T::Native>>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
-        let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
-            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
-        };
-        assert_eq!(&output, &expected)
-    }
-
-    fn test_sort_to_indices_string_arrays(
-        data: Vec<Option<&str>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<u32>,
-    ) {
-        let output = StringArray::from(data);
-        let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
-        assert_eq!(output, expected)
-    }
-
-    fn test_sort_string_arrays(
-        data: Vec<Option<&str>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<&str>>,
-    ) {
-        let output = StringArray::from(data);
-        let expected = Arc::new(StringArray::from(expected_data)) as ArrayRef;
-        let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
-            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
-        };
-        assert_eq!(&output, &expected)
-    }
-
-    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
-        data: Vec<Option<&str>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<&str>>,
-    ) {
-        let array = DictionaryArray::<T>::from_iter(data.into_iter());
-        let array_values = array.values();
-        let dict = array_values
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .expect("Unable to get dictionary values");
-
-        let sorted = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap()
-            }
-            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
-        };
-        let sorted = sorted
-            .as_any()
-            .downcast_ref::<DictionaryArray<T>>()
-            .unwrap();
-        let sorted_values = sorted.values();
-        let sorted_dict = sorted_values
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .expect("Unable to get dictionary values");
-        let sorted_keys = sorted.keys_array();
-
-        assert_eq!(sorted_dict, dict);
-
-        let sorted_strings = StringArray::try_from(
-            (0..sorted.len())
-                .map(|i| {
-                    if sorted.is_valid(i) {
-                        Some(sorted_dict.value(sorted_keys.value(i).to_usize().unwrap()))
-                    } else {
-                        None
-                    }
-                })
-                .collect::<Vec<Option<&str>>>(),
-        )
-        .expect("Unable to create string array from dictionary");
-        let expected =
-            StringArray::try_from(expected_data).expect("Unable to create string array");
-
-        assert_eq!(sorted_strings, expected)
-    }
-
-    fn test_sort_list_arrays<T>(
-        data: Vec<Option<Vec<Option<T::Native>>>>,
-        options: Option<SortOptions>,
-        limit: Option<usize>,
-        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
-        fixed_length: Option<i32>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        // for FixedSizedList
-        if let Some(length) = fixed_length {
-            let input = Arc::new(build_fixed_size_list_nullable(data.clone(), length));
-            let sorted = match limit {
-                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
-                _ => sort(&(input as ArrayRef), options).unwrap(),
-            };
-            let expected = Arc::new(build_fixed_size_list_nullable(
-                expected_data.clone(),
-                length,
-            )) as ArrayRef;
-
-            assert_eq!(&sorted, &expected);
-        }
-
-        // for List
-        let input = Arc::new(build_generic_list_nullable::<i32, T>(data.clone()));
-        let sorted = match limit {
-            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
-            _ => sort(&(input as ArrayRef), options).unwrap(),
-        };
-        let expected =
-            Arc::new(build_generic_list_nullable::<i32, T>(expected_data.clone()))
-                as ArrayRef;
-
-        assert_eq!(&sorted, &expected);
-
-        // for LargeList
-        let input = Arc::new(build_generic_list_nullable::<i64, T>(data));
-        let sorted = match limit {
-            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
-            _ => sort(&(input as ArrayRef), options).unwrap(),
-        };
-        let expected =
-            Arc::new(build_generic_list_nullable::<i64, T>(expected_data)) as ArrayRef;
-
-        assert_eq!(&sorted, &expected);
-    }
-
-    fn test_lex_sort_arrays(
-        input: Vec<SortColumn>,
-        expected_output: Vec<ArrayRef>,
-        limit: Option<usize>,
-    ) {
-        let sorted = lexsort(&input, limit).unwrap();
-
-        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
-            assert_eq!(result, expected);
-        }
-    }
-
-    #[test]
-    fn test_sort_to_indices_primitives() {
-        test_sort_to_indices_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Float32Type>(
-            vec![
-                None,
-                Some(-0.05),
-                Some(2.225),
-                Some(-1.01),
-                Some(-0.05),
-                None,
-            ],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-        test_sort_to_indices_primitive_arrays::<Float64Type>(
-            vec![
-                None,
-                Some(-0.05),
-                Some(2.225),
-                Some(-1.01),
-                Some(-0.05),
-                None,
-            ],
-            None,
-            None,
-            vec![0, 5, 3, 1, 4, 2],
-        );
-
-        // descending
-        test_sort_to_indices_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0], // [2, 4, 1, 3, 5, 0]
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float32Type>(
-            vec![
-                None,
-                Some(0.005),
-                Some(20.22),
-                Some(-10.3),
-                Some(0.005),
-                None,
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float64Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 1, 4, 3, 5, 0],
-        );
-
-        // descending, nulls first
-        test_sort_to_indices_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float32Type>(
-            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-
-        test_sort_to_indices_primitive_arrays::<Float64Type>(
-            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 1, 4, 3],
-        );
-    }
-
-    #[test]
-    fn test_sort_boolean() {
-        // boolean
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            None,
-            None,
-            vec![0, 5, 1, 4, 2, 3],
-        );
-
-        // boolean, descending
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 3, 1, 4, 5, 0],
-        );
-
-        // boolean, descending, nulls first
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![5, 0, 2, 3, 1, 4],
-        );
-
-        // boolean, descending, nulls first, limit
-        test_sort_to_indices_boolean_arrays(
-            vec![None, Some(false), Some(true), Some(true), Some(false), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![5, 0, 2],
-        );
-    }
-
-    #[test]
-    fn test_sort_primitives() {
-        // default case
-        test_sort_primitive_arrays::<UInt8Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-        test_sort_primitive_arrays::<UInt16Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-        test_sort_primitive_arrays::<UInt32Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-        test_sort_primitive_arrays::<UInt64Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            None,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        );
-
-        // descending
-        test_sort_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-        test_sort_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
-        );
-
-        // descending, nulls first
-        test_sort_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-        test_sort_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-        test_sort_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
-        );
-
-        test_sort_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![None, None, Some(2)],
-        );
-
-        test_sort_primitive_arrays::<Float32Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-        );
-
-        // int8 nulls first
-        test_sort_primitive_arrays::<Int8Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Int16Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Int32Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Int64Type>(
-            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
-        );
-        test_sort_primitive_arrays::<Float32Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
-        );
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
-        );
-
-        // limit
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            Some(2),
-            vec![Some(1.0), Some(f64::NAN)],
-        );
-
-        // limit with actual value
-        test_sort_primitive_arrays::<Float64Type>(
-            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![Some(1.0), Some(2.0), Some(3.0)],
-        );
-    }
-
-    #[test]
-    fn test_sort_to_indices_strings() {
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            None,
-            None,
-            vec![0, 3, 5, 1, 4, 2],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![2, 4, 1, 5, 3, 0],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![0, 3, 5, 1, 4, 2],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![3, 0, 2, 4, 1, 5],
-        );
-
-        test_sort_to_indices_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![3, 0, 2],
-        );
-    }
-
-    #[test]
-    fn test_sort_strings() {
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            None,
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-                None,
-                None,
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-            ],
-        );
-
-        test_sort_string_arrays(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![None, None, Some("sad")],
-        );
-    }
-
-    #[test]
-    fn test_sort_string_dicts() {
-        test_sort_string_dict_arrays::<Int8Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            None,
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int16Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-                None,
-                None,
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int32Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("-ad"),
-                Some("bad"),
-                Some("glad"),
-                Some("sad"),
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int16Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![
-                None,
-                None,
-                Some("sad"),
-                Some("glad"),
-                Some("bad"),
-                Some("-ad"),
-            ],
-        );
-
-        test_sort_string_dict_arrays::<Int16Type>(
-            vec![
-                None,
-                Some("bad"),
-                Some("sad"),
-                None,
-                Some("glad"),
-                Some("-ad"),
-            ],
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![None, None, Some("sad")],
-        );
-    }
-
-    #[test]
-    fn test_sort_list() {
-        test_sort_list_arrays::<Int8Type>(
-            vec![
-                Some(vec![Some(1)]),
-                Some(vec![Some(4)]),
-                Some(vec![Some(2)]),
-                Some(vec![Some(3)]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some(vec![Some(1)]),
-                Some(vec![Some(2)]),
-                Some(vec![Some(3)]),
-                Some(vec![Some(4)]),
-            ],
-            Some(1),
-        );
-
-        test_sort_list_arrays::<Int32Type>(
-            vec![
-                Some(vec![Some(1), Some(0)]),
-                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
-                Some(vec![Some(1), Some(1)]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some(vec![Some(1), Some(0)]),
-                Some(vec![Some(1), Some(1)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
-                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
-            ],
-            None,
-        );
-
-        test_sort_list_arrays::<Int32Type>(
-            vec![
-                None,
-                Some(vec![Some(4), None, Some(2)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                None,
-                Some(vec![Some(3), Some(3), None]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            None,
-            vec![
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), None]),
-                Some(vec![Some(4), None, Some(2)]),
-                None,
-                None,
-            ],
-            Some(3),
-        );
-
-        test_sort_list_arrays::<Int32Type>(
-            vec![
-                Some(vec![Some(1), Some(0)]),
-                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
-                Some(vec![Some(2), Some(3), Some(4)]),
-                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
-                Some(vec![Some(1), Some(1)]),
-            ],
-            Some(SortOptions {
-                descending: false,
-                nulls_first: false,
-            }),
-            Some(2),
-            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
-            None,
-        );
-    }
-
-    #[test]
-    fn test_lex_sort_single_column() {
-        let input = vec![SortColumn {
-            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(17),
-                Some(2),
-                Some(-1),
-                Some(0),
-            ])) as ArrayRef,
-            options: None,
-        }];
-        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(0),
-            Some(2),
-            Some(17),
-        ])) as ArrayRef];
-        test_lex_sort_arrays(input.clone(), expected, None);
-
-        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(0),
-            Some(2),
-        ])) as ArrayRef];
-        test_lex_sort_arrays(input, expected, Some(3));
-    }
-
-    #[test]
-    fn test_lex_sort_unaligned_rows() {
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
-                    as ArrayRef,
-                options: None,
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
-                options: None,
-            },
-        ];
-        assert!(
-            lexsort(&input, None).is_err(),
-            "lexsort should reject columns with different row counts"
-        );
-    }
-
-    #[test]
-    fn test_lex_sort_mixed_types() {
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    Some(0),
-                    Some(2),
-                    Some(-1),
-                    Some(0),
-                ])) as ArrayRef,
-                options: None,
-            },
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
-                    Some(101),
-                    Some(8),
-                    Some(7),
-                    Some(102),
-                ])) as ArrayRef,
-                options: None,
-            },
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    Some(-1),
-                    Some(-2),
-                    Some(-3),
-                    Some(-4),
-                ])) as ArrayRef,
-                options: None,
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(0),
-                Some(0),
-                Some(2),
-            ])) as ArrayRef,
-            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
-                Some(7),
-                Some(101),
-                Some(102),
-                Some(8),
-            ])) as ArrayRef,
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-3),
-                Some(-1),
-                Some(-4),
-                Some(-2),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test mix of string and in64 with option
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    Some(0),
-                    Some(2),
-                    Some(-1),
-                    Some(0),
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("9"),
-                    Some("7"),
-                    Some("bar"),
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(2),
-                Some(0),
-                Some(0),
-                Some(-1),
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                Some("9"),
-                Some("foo"),
-                Some("bar"),
-                Some("7"),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test sort with nulls first
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    None,
-                    Some(-1),
-                    Some(2),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("world"),
-                    Some("hello"),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                None,
-                None,
-                Some(2),
-                Some(-1),
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                None,
-                Some("foo"),
-                Some("hello"),
-                Some("world"),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test sort with nulls last
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    None,
-                    Some(-1),
-                    Some(2),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("world"),
-                    Some("hello"),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(2),
-                Some(-1),
-                None,
-                None,
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                Some("hello"),
-                Some("world"),
-                Some("foo"),
-                None,
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-
-        // test sort with opposite options
-        let input = vec![
-            SortColumn {
-                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                    None,
-                    Some(-1),
-                    Some(2),
-                    Some(-1),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: false,
-                    nulls_first: false,
-                }),
-            },
-            SortColumn {
-                values: Arc::new(StringArray::from(vec![
-                    Some("foo"),
-                    Some("bar"),
-                    Some("world"),
-                    Some("hello"),
-                    None,
-                ])) as ArrayRef,
-                options: Some(SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                }),
-            },
-        ];
-        let expected = vec![
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ])) as ArrayRef,
-            Arc::new(StringArray::from(vec![
-                Some("hello"),
-                Some("bar"),
-                Some("world"),
-                None,
-                Some("foo"),
-            ])) as ArrayRef,
-        ];
-        test_lex_sort_arrays(input, expected, None);
-    }
-
-    #[test]
-    fn test_partial_sort() {
-        let mut before: Vec<&str> = vec![
-            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
-        ];
-        let mut d = before.clone();
-        d.sort_unstable();
-
-        for last in 0..before.len() {
-            partial_sort(&mut before, last, |a, b| a.cmp(b));
-            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
-        }
-    }
-
-    #[test]
-    fn test_partial_rand_sort() {
-        let size = 1000u32;
-        let mut rng = StdRng::seed_from_u64(42);
-        let mut before: Vec<u32> = (0..size).map(|_| rng.gen::<u32>()).collect();
-        let mut d = before.clone();
-        let last = (rng.next_u32() % size) as usize;
-        d.sort_unstable();
-
-        partial_sort(&mut before, last, |a, b| a.cmp(b));
-        assert_eq!(&d[0..last], &before[0..last]);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/substring.rs b/rust/arrow/src/compute/kernels/substring.rs
deleted file mode 100644
index d9956b89687..00000000000
--- a/rust/arrow/src/compute/kernels/substring.rs
+++ /dev/null
@@ -1,269 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines kernel to extract a substring of a \[Large\]StringArray
-
-use crate::{array::*, buffer::Buffer};
-use crate::{
-    datatypes::DataType,
-    error::{ArrowError, Result},
-};
-
-#[allow(clippy::unnecessary_wraps)]
-fn generic_substring<OffsetSize: StringOffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    start: OffsetSize,
-    length: &Option<OffsetSize>,
-) -> Result<ArrayRef> {
-    // compute current offsets
-    let offsets = array.data_ref().clone().buffers()[0].clone();
-    let offsets: &[OffsetSize] = unsafe { offsets.typed_data::<OffsetSize>() };
-
-    // compute null bitmap (copy)
-    let null_bit_buffer = array.data_ref().null_buffer().cloned();
-
-    // compute values
-    let values = &array.data_ref().buffers()[1];
-    let data = values.as_slice();
-
-    let mut new_values = Vec::new(); // we have no way to estimate how much this will be.
-    let mut new_offsets: Vec<OffsetSize> = Vec::with_capacity(array.len() + 1);
-
-    let mut length_so_far = OffsetSize::zero();
-    new_offsets.push(length_so_far);
-    (0..array.len()).for_each(|i| {
-        // the length of this entry
-        let length_i: OffsetSize = offsets[i + 1] - offsets[i];
-        // compute where we should start slicing this entry
-        let start = offsets[i]
-            + if start >= OffsetSize::zero() {
-                start
-            } else {
-                length_i + start
-            };
-
-        let start = start.max(offsets[i]).min(offsets[i + 1]);
-        // compute the length of the slice
-        let length: OffsetSize = length
-            .unwrap_or(length_i)
-            // .max(0) is not needed as it is guaranteed
-            .min(offsets[i + 1] - start); // so we do not go beyond this entry
-
-        length_so_far += length;
-
-        new_offsets.push(length_so_far);
-
-        // we need usize for ranges
-        let start = start.to_usize().unwrap();
-        let length = length.to_usize().unwrap();
-
-        new_values.extend_from_slice(&data[start..start + length]);
-    });
-
-    let data = ArrayData::new(
-        <OffsetSize as StringOffsetSizeTrait>::DATA_TYPE,
-        array.len(),
-        None,
-        null_bit_buffer,
-        0,
-        vec![
-            Buffer::from_slice_ref(&new_offsets),
-            Buffer::from_slice_ref(&new_values),
-        ],
-        vec![],
-    );
-    Ok(make_array(data))
-}
-
-/// Returns an ArrayRef with a substring starting from `start` and with optional length `length` of each of the elements in `array`.
-/// `start` can be negative, in which case the start counts from the end of the string.
-/// this function errors when the passed array is not a \[Large\]String array.
-pub fn substring(array: &Array, start: i64, length: &Option<u64>) -> Result<ArrayRef> {
-    match array.data_type() {
-        DataType::LargeUtf8 => generic_substring(
-            array
-                .as_any()
-                .downcast_ref::<LargeStringArray>()
-                .expect("A large string is expected"),
-            start,
-            &length.map(|e| e as i64),
-        ),
-        DataType::Utf8 => generic_substring(
-            array
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .expect("A string is expected"),
-            start as i32,
-            &length.map(|e| e as i32),
-        ),
-        _ => Err(ArrowError::ComputeError(format!(
-            "substring does not support type {:?}",
-            array.data_type()
-        ))),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn with_nulls<T: 'static + Array + PartialEq + From<Vec<Option<&'static str>>>>(
-    ) -> Result<()> {
-        let cases = vec![
-            // identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                None,
-                vec![Some("hello"), None, Some("word")],
-            ),
-            // 0 length -> Nothing
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                Some(0),
-                vec![Some(""), None, Some("")],
-            ),
-            // high start -> Nothing
-            (
-                vec![Some("hello"), None, Some("word")],
-                1000,
-                Some(0),
-                vec![Some(""), None, Some("")],
-            ),
-            // high negative start -> identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                -1000,
-                None,
-                vec![Some("hello"), None, Some("word")],
-            ),
-            // high length -> identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                Some(1000),
-                vec![Some("hello"), None, Some("word")],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = T::from(array);
-                let result: ArrayRef = substring(&array, start, &length)?;
-                assert_eq!(array.len(), result.len());
-
-                let result = result.as_any().downcast_ref::<T>().unwrap();
-                let expected = T::from(expected);
-                assert_eq!(&expected, result);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn with_nulls_string() -> Result<()> {
-        with_nulls::<StringArray>()
-    }
-
-    #[test]
-    fn with_nulls_large_string() -> Result<()> {
-        with_nulls::<LargeStringArray>()
-    }
-
-    fn without_nulls<T: 'static + Array + PartialEq + From<Vec<Option<&'static str>>>>(
-    ) -> Result<()> {
-        let cases = vec![
-            // increase start
-            (
-                vec!["hello", "", "word"],
-                0,
-                None,
-                vec!["hello", "", "word"],
-            ),
-            (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]),
-            (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]),
-            (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]),
-            (vec!["hello", "", "word"], 10, None, vec!["", "", ""]),
-            // increase start negatively
-            (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]),
-            (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]),
-            (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]),
-            (
-                vec!["hello", "", "word"],
-                -10,
-                None,
-                vec!["hello", "", "word"],
-            ),
-            // increase length
-            (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]),
-            (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]),
-            (
-                vec!["hello", "", "word"],
-                1,
-                Some(3),
-                vec!["ell", "", "ord"],
-            ),
-            (
-                vec!["hello", "", "word"],
-                1,
-                Some(4),
-                vec!["ello", "", "ord"],
-            ),
-            (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]),
-            (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]),
-            (
-                vec!["hello", "", "word"],
-                -3,
-                Some(3),
-                vec!["llo", "", "ord"],
-            ),
-            (
-                vec!["hello", "", "word"],
-                -3,
-                Some(4),
-                vec!["llo", "", "ord"],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = StringArray::from(array);
-                let result = substring(&array, start, &length)?;
-                assert_eq!(array.len(), result.len());
-                let result = result.as_any().downcast_ref::<StringArray>().unwrap();
-                let expected = StringArray::from(expected);
-                assert_eq!(&expected, result,);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn without_nulls_string() -> Result<()> {
-        without_nulls::<StringArray>()
-    }
-
-    #[test]
-    fn without_nulls_large_string() -> Result<()> {
-        without_nulls::<LargeStringArray>()
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs
deleted file mode 100644
index 0217573dc5d..00000000000
--- a/rust/arrow/src/compute/kernels/take.rs
+++ /dev/null
@@ -1,1621 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines take kernel for [Array]
-
-use std::{ops::AddAssign, sync::Arc};
-
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::compute::util::{
-    take_value_indices_from_fixed_size_list, take_value_indices_from_list,
-};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-use crate::{array::*, buffer::buffer_bin_and};
-
-use num::{ToPrimitive, Zero};
-use TimeUnit::*;
-
-macro_rules! downcast_take {
-    ($type: ty, $values: expr, $indices: expr) => {{
-        let values = $values
-            .as_any()
-            .downcast_ref::<PrimitiveArray<$type>>()
-            .expect("Unable to downcast to a primitive array");
-        Ok(Arc::new(take_primitive::<$type, _>(&values, $indices)?))
-    }};
-}
-
-macro_rules! downcast_dict_take {
-    ($type: ty, $values: expr, $indices: expr) => {{
-        let values = $values
-            .as_any()
-            .downcast_ref::<DictionaryArray<$type>>()
-            .expect("Unable to downcast to a dictionary array");
-        Ok(Arc::new(take_dict::<$type, _>(values, $indices)?))
-    }};
-}
-
-/// Take elements by index from [Array], creating a new [Array] from those indexes.
-///
-/// # Errors
-/// This function errors whenever:
-/// * An index cannot be casted to `usize` (typically 32 bit architectures)
-/// * An index is out of bounds and `options` is set to check bounds.
-/// # Safety
-/// When `options` is not set to check bounds (default), taking indexes after `len` is undefined behavior.
-/// # Examples
-/// ```
-/// use arrow::array::{StringArray, UInt32Array};
-/// use arrow::error::Result;
-/// use arrow::compute::take;
-/// # fn main() -> Result<()> {
-/// let values = StringArray::from(vec!["zero", "one", "two"]);
-///
-/// // Take items at index 2, and 1:
-/// let indices = UInt32Array::from(vec![2, 1]);
-/// let taken = take(&values, &indices, None)?;
-/// let taken = taken.as_any().downcast_ref::<StringArray>().unwrap();
-///
-/// assert_eq!(*taken, StringArray::from(vec!["two", "one"]));
-/// # Ok(())
-/// # }
-/// ```
-pub fn take<IndexType>(
-    values: &Array,
-    indices: &PrimitiveArray<IndexType>,
-    options: Option<TakeOptions>,
-) -> Result<ArrayRef>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    take_impl(values, indices, options)
-}
-
-fn take_impl<IndexType>(
-    values: &Array,
-    indices: &PrimitiveArray<IndexType>,
-    options: Option<TakeOptions>,
-) -> Result<ArrayRef>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let options = options.unwrap_or_default();
-    if options.check_bounds {
-        let len = values.len();
-        for i in 0..indices.len() {
-            if indices.is_valid(i) {
-                let ix = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                    ArrowError::ComputeError("Cast to usize failed".to_string())
-                })?;
-                if ix >= len {
-                    return Err(ArrowError::ComputeError(
-                    format!("Array index out of bounds, cannot get item at index {} from {} entries", ix, len))
-                );
-                }
-            }
-        }
-    }
-    match values.data_type() {
-        DataType::Boolean => {
-            let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();
-            Ok(Arc::new(take_boolean(values, indices)?))
-        }
-        DataType::Int8 => downcast_take!(Int8Type, values, indices),
-        DataType::Int16 => downcast_take!(Int16Type, values, indices),
-        DataType::Int32 => downcast_take!(Int32Type, values, indices),
-        DataType::Int64 => downcast_take!(Int64Type, values, indices),
-        DataType::UInt8 => downcast_take!(UInt8Type, values, indices),
-        DataType::UInt16 => downcast_take!(UInt16Type, values, indices),
-        DataType::UInt32 => downcast_take!(UInt32Type, values, indices),
-        DataType::UInt64 => downcast_take!(UInt64Type, values, indices),
-        DataType::Float32 => downcast_take!(Float32Type, values, indices),
-        DataType::Float64 => downcast_take!(Float64Type, values, indices),
-        DataType::Date32 => downcast_take!(Date32Type, values, indices),
-        DataType::Date64 => downcast_take!(Date64Type, values, indices),
-        DataType::Time32(Second) => downcast_take!(Time32SecondType, values, indices),
-        DataType::Time32(Millisecond) => {
-            downcast_take!(Time32MillisecondType, values, indices)
-        }
-        DataType::Time64(Microsecond) => {
-            downcast_take!(Time64MicrosecondType, values, indices)
-        }
-        DataType::Time64(Nanosecond) => {
-            downcast_take!(Time64NanosecondType, values, indices)
-        }
-        DataType::Timestamp(Second, _) => {
-            downcast_take!(TimestampSecondType, values, indices)
-        }
-        DataType::Timestamp(Millisecond, _) => {
-            downcast_take!(TimestampMillisecondType, values, indices)
-        }
-        DataType::Timestamp(Microsecond, _) => {
-            downcast_take!(TimestampMicrosecondType, values, indices)
-        }
-        DataType::Timestamp(Nanosecond, _) => {
-            downcast_take!(TimestampNanosecondType, values, indices)
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            downcast_take!(IntervalYearMonthType, values, indices)
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            downcast_take!(IntervalDayTimeType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            downcast_take!(DurationSecondType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            downcast_take!(DurationMillisecondType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            downcast_take!(DurationMicrosecondType, values, indices)
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            downcast_take!(DurationNanosecondType, values, indices)
-        }
-        DataType::Utf8 => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericStringArray<i32>>()
-                .unwrap();
-            Ok(Arc::new(take_string::<i32, _>(values, indices)?))
-        }
-        DataType::LargeUtf8 => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericStringArray<i64>>()
-                .unwrap();
-            Ok(Arc::new(take_string::<i64, _>(values, indices)?))
-        }
-        DataType::List(_) => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericListArray<i32>>()
-                .unwrap();
-            Ok(Arc::new(take_list::<_, Int32Type>(values, indices)?))
-        }
-        DataType::LargeList(_) => {
-            let values = values
-                .as_any()
-                .downcast_ref::<GenericListArray<i64>>()
-                .unwrap();
-            Ok(Arc::new(take_list::<_, Int64Type>(values, indices)?))
-        }
-        DataType::FixedSizeList(_, length) => {
-            let values = values
-                .as_any()
-                .downcast_ref::<FixedSizeListArray>()
-                .unwrap();
-            Ok(Arc::new(take_fixed_size_list(
-                values,
-                indices,
-                *length as u32,
-            )?))
-        }
-        DataType::Struct(fields) => {
-            let struct_: &StructArray =
-                values.as_any().downcast_ref::<StructArray>().unwrap();
-            let arrays: Result<Vec<ArrayRef>> = struct_
-                .columns()
-                .iter()
-                .map(|a| take_impl(a.as_ref(), indices, Some(options.clone())))
-                .collect();
-            let arrays = arrays?;
-            let pairs: Vec<(Field, ArrayRef)> =
-                fields.clone().into_iter().zip(arrays).collect();
-            Ok(Arc::new(StructArray::from(pairs)) as ArrayRef)
-        }
-        DataType::Dictionary(key_type, _) => match key_type.as_ref() {
-            DataType::Int8 => downcast_dict_take!(Int8Type, values, indices),
-            DataType::Int16 => downcast_dict_take!(Int16Type, values, indices),
-            DataType::Int32 => downcast_dict_take!(Int32Type, values, indices),
-            DataType::Int64 => downcast_dict_take!(Int64Type, values, indices),
-            DataType::UInt8 => downcast_dict_take!(UInt8Type, values, indices),
-            DataType::UInt16 => downcast_dict_take!(UInt16Type, values, indices),
-            DataType::UInt32 => downcast_dict_take!(UInt32Type, values, indices),
-            DataType::UInt64 => downcast_dict_take!(UInt64Type, values, indices),
-            t => unimplemented!("Take not supported for dictionary key type {:?}", t),
-        },
-        t => unimplemented!("Take not supported for data type {:?}", t),
-    }
-}
-
-/// Options that define how `take` should behave
-#[derive(Clone, Debug)]
-pub struct TakeOptions {
-    /// Perform bounds check before taking indices from values.
-    /// If enabled, an `ArrowError` is returned if the indices are out of bounds.
-    /// If not enabled, and indices exceed bounds, the kernel will panic.
-    pub check_bounds: bool,
-}
-
-impl Default for TakeOptions {
-    fn default() -> Self {
-        Self {
-            check_bounds: false,
-        }
-    }
-}
-
-#[inline(always)]
-fn maybe_usize<I: ArrowPrimitiveType>(index: I::Native) -> Result<usize> {
-    index
-        .to_usize()
-        .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))
-}
-
-// take implementation when neither values nor indices contain nulls
-fn take_no_nulls<T, I>(
-    values: &[T::Native],
-    indices: &[I::Native],
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-{
-    let values = indices
-        .iter()
-        .map(|index| Result::Ok(values[maybe_usize::<I>(*index)?]));
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    Ok((buffer, None))
-}
-
-// take implementation when only values contain nulls
-fn take_values_nulls<T, I>(
-    values: &PrimitiveArray<T>,
-    indices: &[I::Native],
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut nulls = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    let null_slice = nulls.as_slice_mut();
-    let mut null_count = 0;
-
-    let values_values = values.values();
-
-    let values = indices.iter().enumerate().map(|(i, index)| {
-        let index = maybe_usize::<I>(*index)?;
-        if values.is_null(index) {
-            null_count += 1;
-            bit_util::unset_bit(null_slice, i);
-        }
-        Result::Ok(values_values[index])
-    });
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    let nulls = if null_count == 0 {
-        // if only non-null values were taken
-        None
-    } else {
-        Some(nulls.into())
-    };
-
-    Ok((buffer, nulls))
-}
-
-// take implementation when only indices contain nulls
-fn take_indices_nulls<T, I>(
-    values: &[T::Native],
-    indices: &PrimitiveArray<I>,
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let values = indices.values().iter().map(|index| {
-        let index = maybe_usize::<I>(*index)?;
-        Result::Ok(match values.get(index) {
-            Some(value) => *value,
-            None => {
-                if indices.is_null(index) {
-                    T::Native::default()
-                } else {
-                    panic!("Out-of-bounds index {}", index)
-                }
-            }
-        })
-    });
-
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    Ok((buffer, indices.data_ref().null_buffer().cloned()))
-}
-
-// take implementation when both values and indices contain nulls
-fn take_values_indices_nulls<T, I>(
-    values: &PrimitiveArray<T>,
-    indices: &PrimitiveArray<I>,
-) -> Result<(Buffer, Option<Buffer>)>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut nulls = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    let null_slice = nulls.as_slice_mut();
-    let mut null_count = 0;
-
-    let values_values = values.values();
-    let values = indices.iter().enumerate().map(|(i, index)| match index {
-        Some(index) => {
-            let index = maybe_usize::<I>(index)?;
-            if values.is_null(index) {
-                null_count += 1;
-                bit_util::unset_bit(null_slice, i);
-            }
-            Result::Ok(values_values[index])
-        }
-        None => {
-            null_count += 1;
-            bit_util::unset_bit(null_slice, i);
-            Ok(T::Native::default())
-        }
-    });
-    // Soundness: `slice.map` is `TrustedLen`.
-    let buffer = unsafe { Buffer::try_from_trusted_len_iter(values)? };
-
-    let nulls = if null_count == 0 {
-        // if only non-null values were taken
-        None
-    } else {
-        Some(nulls.into())
-    };
-
-    Ok((buffer, nulls))
-}
-
-/// `take` implementation for all primitive arrays
-///
-/// This checks if an `indices` slot is populated, and gets the value from `values`
-///  as the populated index.
-/// If the `indices` slot is null, a null value is returned.
-/// For example, given:
-///     values:  [1, 2, 3, null, 5]
-///     indices: [0, null, 4, 3]
-/// The result is: [1 (slot 0), null (null slot), 5 (slot 4), null (slot 3)]
-fn take_primitive<T, I>(
-    values: &PrimitiveArray<T>,
-    indices: &PrimitiveArray<I>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowPrimitiveType,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let indices_has_nulls = indices.null_count() > 0;
-    let values_has_nulls = values.null_count() > 0;
-    // note: this function should only panic when "an index is not null and out of bounds".
-    // if the index is null, its value is undefined and therefore we should not read from it.
-
-    let (buffer, nulls) = match (values_has_nulls, indices_has_nulls) {
-        (false, false) => {
-            // * no nulls
-            // * all `indices.values()` are valid
-            take_no_nulls::<T, I>(values.values(), indices.values())?
-        }
-        (true, false) => {
-            // * nulls come from `values` alone
-            // * all `indices.values()` are valid
-            take_values_nulls::<T, I>(values, indices.values())?
-        }
-        (false, true) => {
-            // in this branch it is unsound to read and use `index.values()`,
-            // as doing so is UB when they come from a null slot.
-            take_indices_nulls::<T, I>(values.values(), indices)?
-        }
-        (true, true) => {
-            // in this branch it is unsound to read and use `index.values()`,
-            // as doing so is UB when they come from a null slot.
-            take_values_indices_nulls::<T, I>(values, indices)?
-        }
-    };
-
-    let data = ArrayData::new(
-        T::DATA_TYPE,
-        indices.len(),
-        None,
-        nulls,
-        0,
-        vec![buffer],
-        vec![],
-    );
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// `take` implementation for boolean arrays
-fn take_boolean<IndexType>(
-    values: &BooleanArray,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<BooleanArray>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let data_len = indices.len();
-
-    let num_byte = bit_util::ceil(data_len, 8);
-    let mut val_buf = MutableBuffer::from_len_zeroed(num_byte);
-
-    let val_slice = val_buf.as_slice_mut();
-
-    let null_count = values.null_count();
-
-    let nulls;
-    if null_count == 0 {
-        (0..data_len).try_for_each::<_, Result<()>>(|i| {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if values.value(index) {
-                bit_util::set_bit(val_slice, i);
-            }
-
-            Ok(())
-        })?;
-
-        nulls = indices.data_ref().null_buffer().cloned();
-    } else {
-        let mut null_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, true);
-        let null_slice = null_buf.as_slice_mut();
-
-        (0..data_len).try_for_each::<_, Result<()>>(|i| {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if values.is_null(index) {
-                bit_util::unset_bit(null_slice, i);
-            } else if values.value(index) {
-                bit_util::set_bit(val_slice, i);
-            }
-
-            Ok(())
-        })?;
-
-        nulls = match indices.data_ref().null_buffer() {
-            Some(buffer) => Some(buffer_bin_and(
-                buffer,
-                0,
-                &null_buf.into(),
-                0,
-                indices.len(),
-            )),
-            None => Some(null_buf.into()),
-        };
-    }
-
-    let data = ArrayData::new(
-        DataType::Boolean,
-        indices.len(),
-        None,
-        nulls,
-        0,
-        vec![val_buf.into()],
-        vec![],
-    );
-    Ok(BooleanArray::from(data))
-}
-
-/// `take` implementation for string arrays
-fn take_string<OffsetSize, IndexType>(
-    array: &GenericStringArray<OffsetSize>,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<GenericStringArray<OffsetSize>>
-where
-    OffsetSize: Zero + AddAssign + StringOffsetSizeTrait,
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let data_len = indices.len();
-
-    let bytes_offset = (data_len + 1) * std::mem::size_of::<OffsetSize>();
-    let mut offsets_buffer = MutableBuffer::from_len_zeroed(bytes_offset);
-
-    let offsets = offsets_buffer.typed_data_mut();
-    let mut values = MutableBuffer::new(0);
-    let mut length_so_far = OffsetSize::zero();
-    offsets[0] = length_so_far;
-
-    let nulls;
-    if array.null_count() == 0 && indices.null_count() == 0 {
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            let s = array.value(index);
-
-            length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-            values.extend_from_slice(s.as_bytes());
-            *offset = length_so_far;
-        }
-        nulls = None
-    } else if indices.null_count() == 0 {
-        let num_bytes = bit_util::ceil(data_len, 8);
-
-        let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        let null_slice = null_buf.as_slice_mut();
-
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if array.is_valid(index) {
-                let s = array.value(index);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
-            } else {
-                bit_util::unset_bit(null_slice, i);
-            }
-            *offset = length_so_far;
-        }
-        nulls = Some(null_buf.into());
-    } else if array.null_count() == 0 {
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            if indices.is_valid(i) {
-                let index =
-                    ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                        ArrowError::ComputeError("Cast to usize failed".to_string())
-                    })?;
-
-                let s = array.value(index);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
-            }
-            *offset = length_so_far;
-        }
-        nulls = indices.data_ref().null_buffer().cloned();
-    } else {
-        let num_bytes = bit_util::ceil(data_len, 8);
-
-        let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        let null_slice = null_buf.as_slice_mut();
-
-        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-
-            if array.is_valid(index) && indices.is_valid(i) {
-                let s = array.value(index);
-
-                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
-                values.extend_from_slice(s.as_bytes());
-            } else {
-                // set null bit
-                bit_util::unset_bit(null_slice, i);
-            }
-            *offset = length_so_far;
-        }
-
-        nulls = match indices.data_ref().null_buffer() {
-            Some(buffer) => {
-                Some(buffer_bin_and(buffer, 0, &null_buf.into(), 0, data_len))
-            }
-            None => Some(null_buf.into()),
-        };
-    }
-
-    let mut data = ArrayData::builder(<OffsetSize as StringOffsetSizeTrait>::DATA_TYPE)
-        .len(data_len)
-        .add_buffer(offsets_buffer.into())
-        .add_buffer(values.into());
-    if let Some(null_buffer) = nulls {
-        data = data.null_bit_buffer(null_buffer);
-    }
-    Ok(GenericStringArray::<OffsetSize>::from(data.build()))
-}
-
-/// `take` implementation for list arrays
-///
-/// Calculates the index and indexed offset for the inner array,
-/// applying `take` on the inner array, then reconstructing a list array
-/// with the indexed offsets
-fn take_list<IndexType, OffsetType>(
-    values: &GenericListArray<OffsetType::Native>,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<GenericListArray<OffsetType::Native>>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-    OffsetType: ArrowNumericType,
-    OffsetType::Native: ToPrimitive + OffsetSizeTrait,
-    PrimitiveArray<OffsetType>: From<Vec<Option<OffsetType::Native>>>,
-{
-    // TODO: Some optimizations can be done here such as if it is
-    // taking the whole list or a contiguous sublist
-    let (list_indices, offsets) =
-        take_value_indices_from_list::<IndexType, OffsetType>(values, indices)?;
-
-    let taken = take_impl::<OffsetType>(values.values().as_ref(), &list_indices, None)?;
-    // determine null count and null buffer, which are a function of `values` and `indices`
-    let mut null_count = 0;
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    {
-        let null_slice = null_buf.as_slice_mut();
-        offsets[..].windows(2).enumerate().for_each(
-            |(i, window): (usize, &[OffsetType::Native])| {
-                if window[0] == window[1] {
-                    // offsets are equal, slot is null
-                    bit_util::unset_bit(null_slice, i);
-                    null_count += 1;
-                }
-            },
-        );
-    }
-    let value_offsets = Buffer::from_slice_ref(&offsets);
-    // create a new list with taken data and computed null information
-    let list_data = ArrayDataBuilder::new(values.data_type().clone())
-        .len(indices.len())
-        .null_bit_buffer(null_buf.into())
-        .offset(0)
-        .add_child_data(taken.data().clone())
-        .add_buffer(value_offsets)
-        .build();
-    Ok(GenericListArray::<OffsetType::Native>::from(list_data))
-}
-
-/// `take` implementation for `FixedSizeListArray`
-///
-/// Calculates the index and indexed offset for the inner array,
-/// applying `take` on the inner array, then reconstructing a list array
-/// with the indexed offsets
-fn take_fixed_size_list<IndexType>(
-    values: &FixedSizeListArray,
-    indices: &PrimitiveArray<IndexType>,
-    length: <UInt32Type as ArrowPrimitiveType>::Native,
-) -> Result<FixedSizeListArray>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let list_indices = take_value_indices_from_fixed_size_list(values, indices, length)?;
-    let taken = take_impl::<UInt32Type>(values.values().as_ref(), &list_indices, None)?;
-
-    // determine null count and null buffer, which are a function of `values` and `indices`
-    let num_bytes = bit_util::ceil(indices.len(), 8);
-    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-    let null_slice = null_buf.as_slice_mut();
-
-    for i in 0..indices.len() {
-        let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-            ArrowError::ComputeError("Cast to usize failed".to_string())
-        })?;
-        if !indices.is_valid(i) || values.is_null(index) {
-            bit_util::unset_bit(null_slice, i);
-        }
-    }
-
-    let list_data = ArrayDataBuilder::new(values.data_type().clone())
-        .len(indices.len())
-        .null_bit_buffer(null_buf.into())
-        .offset(0)
-        .add_child_data(taken.data().clone())
-        .build();
-
-    Ok(FixedSizeListArray::from(list_data))
-}
-
-/// `take` implementation for dictionary arrays
-///
-/// applies `take` to the keys of the dictionary array and returns a new dictionary array
-/// with the same dictionary values and reordered keys
-fn take_dict<T, I>(
-    values: &DictionaryArray<T>,
-    indices: &PrimitiveArray<I>,
-) -> Result<DictionaryArray<T>>
-where
-    T: ArrowPrimitiveType,
-    T::Native: num::Num,
-    I: ArrowNumericType,
-    I::Native: ToPrimitive,
-{
-    let new_keys = take_primitive::<T, I>(&values.keys_array(), indices)?;
-    let new_keys_data = new_keys.data_ref();
-
-    let data = ArrayData::new(
-        values.data_type().clone(),
-        new_keys.len(),
-        Some(new_keys_data.null_count()),
-        new_keys_data.null_buffer().cloned(),
-        0,
-        new_keys_data.buffers().to_vec(),
-        values.data().child_data().to_vec(),
-    );
-
-    Ok(DictionaryArray::<T>::from(data))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::compute::util::tests::build_fixed_size_list_nullable;
-
-    fn test_take_boolean_arrays(
-        data: Vec<Option<bool>>,
-        index: &UInt32Array,
-        options: Option<TakeOptions>,
-        expected_data: Vec<Option<bool>>,
-    ) {
-        let output = BooleanArray::from(data);
-        let expected = Arc::new(BooleanArray::from(expected_data)) as ArrayRef;
-        let output = take(&output, index, options).unwrap();
-        assert_eq!(&output, &expected)
-    }
-
-    fn test_take_primitive_arrays<T>(
-        data: Vec<Option<T::Native>>,
-        index: &UInt32Array,
-        options: Option<TakeOptions>,
-        expected_data: Vec<Option<T::Native>>,
-    ) -> Result<()>
-    where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
-        let output = take(&output, index, options)?;
-        assert_eq!(&output, &expected);
-        Ok(())
-    }
-
-    fn test_take_impl_primitive_arrays<T, I>(
-        data: Vec<Option<T::Native>>,
-        index: &PrimitiveArray<I>,
-        options: Option<TakeOptions>,
-        expected_data: Vec<Option<T::Native>>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-        I: ArrowNumericType,
-        I::Native: ToPrimitive,
-    {
-        let output = PrimitiveArray::<T>::from(data);
-        let expected = PrimitiveArray::<T>::from(expected_data);
-        let output = take_impl(&output, index, options).unwrap();
-        let output = output.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-        assert_eq!(output, &expected)
-    }
-
-    // create a simple struct for testing purposes
-    fn create_test_struct() -> StructArray {
-        let boolean_data = BooleanArray::from(vec![true, false, false, true])
-            .data()
-            .clone();
-        let int_data = Int32Array::from(vec![42, 28, 19, 31]).data().clone();
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, true));
-        field_types.push(Field::new("b", DataType::Int32, true));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(4)
-            .add_child_data(boolean_data)
-            .add_child_data(int_data)
-            .build();
-        StructArray::from(struct_array_data)
-    }
-
-    #[test]
-    fn test_take_primitive_non_null_indices() {
-        let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]);
-        test_take_primitive_arrays::<Int8Type>(
-            vec![None, Some(3), Some(5), Some(2), Some(3), None],
-            &index,
-            None,
-            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_primitive_non_null_values() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-        test_take_primitive_arrays::<Int8Type>(
-            vec![Some(0), Some(1), Some(2), Some(3), Some(4)],
-            &index,
-            None,
-            vec![Some(3), None, Some(1), Some(3), Some(2)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_primitive_non_null() {
-        let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]);
-        test_take_primitive_arrays::<Int8Type>(
-            vec![Some(0), Some(3), Some(5), Some(2), Some(3), Some(1)],
-            &index,
-            None,
-            vec![Some(0), Some(1), Some(2), Some(3), Some(3), Some(5)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_primitive() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-
-        // int8
-        test_take_primitive_arrays::<Int8Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int16
-        test_take_primitive_arrays::<Int16Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int32
-        test_take_primitive_arrays::<Int32Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int64
-        test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // uint8
-        test_take_primitive_arrays::<UInt8Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // uint16
-        test_take_primitive_arrays::<UInt16Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // uint32
-        test_take_primitive_arrays::<UInt32Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        )
-        .unwrap();
-
-        // int64
-        test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // interval_year_month
-        test_take_primitive_arrays::<IntervalYearMonthType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // interval_day_time
-        test_take_primitive_arrays::<IntervalDayTimeType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_second
-        test_take_primitive_arrays::<DurationSecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_millisecond
-        test_take_primitive_arrays::<DurationMillisecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_microsecond
-        test_take_primitive_arrays::<DurationMicrosecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // duration_nanosecond
-        test_take_primitive_arrays::<DurationNanosecondType>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        )
-        .unwrap();
-
-        // float32
-        test_take_primitive_arrays::<Float32Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        )
-        .unwrap();
-
-        // float64
-        test_take_primitive_arrays::<Float64Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_impl_primitive_with_int64_indices() {
-        let index = Int64Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-
-        // int16
-        test_take_impl_primitive_arrays::<Int16Type, Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        );
-
-        // int64
-        test_take_impl_primitive_arrays::<Int64Type, Int64Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        );
-
-        // uint64
-        test_take_impl_primitive_arrays::<UInt64Type, Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        );
-
-        // duration_millisecond
-        test_take_impl_primitive_arrays::<DurationMillisecondType, Int64Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        );
-
-        // float32
-        test_take_impl_primitive_arrays::<Float32Type, Int64Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        );
-    }
-
-    #[test]
-    fn test_take_impl_primitive_with_uint8_indices() {
-        let index = UInt8Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-
-        // int16
-        test_take_impl_primitive_arrays::<Int16Type, UInt8Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            None,
-            vec![Some(3), None, None, Some(3), Some(2)],
-        );
-
-        // duration_millisecond
-        test_take_impl_primitive_arrays::<DurationMillisecondType, UInt8Type>(
-            vec![Some(0), None, Some(2), Some(-15), None],
-            &index,
-            None,
-            vec![Some(-15), None, None, Some(-15), Some(2)],
-        );
-
-        // float32
-        test_take_impl_primitive_arrays::<Float32Type, UInt8Type>(
-            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
-            &index,
-            None,
-            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
-        );
-    }
-
-    #[test]
-    fn test_take_primitive_bool() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-        // boolean
-        test_take_boolean_arrays(
-            vec![Some(false), None, Some(true), Some(false), None],
-            &index,
-            None,
-            vec![Some(false), None, None, Some(false), Some(true)],
-        );
-    }
-
-    fn _test_take_string<'a, K: 'static>()
-    where
-        K: Array + PartialEq + From<Vec<Option<&'a str>>>,
-    {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4)]);
-
-        let array = K::from(vec![
-            Some("one"),
-            None,
-            Some("three"),
-            Some("four"),
-            Some("five"),
-        ]);
-        let actual = take(&array, &index, None).unwrap();
-        assert_eq!(actual.len(), index.len());
-
-        let actual = actual.as_any().downcast_ref::<K>().unwrap();
-
-        let expected =
-            K::from(vec![Some("four"), None, None, Some("four"), Some("five")]);
-
-        assert_eq!(actual, &expected);
-    }
-
-    #[test]
-    fn test_take_string() {
-        _test_take_string::<StringArray>()
-    }
-
-    #[test]
-    fn test_take_large_string() {
-        _test_take_string::<LargeStringArray>()
-    }
-
-    macro_rules! test_take_list {
-        ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
-            // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]]
-            let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3])
-                .data()
-                .clone();
-            // Construct offsets
-            let value_offsets: [$offset_type; 4] = [0, 3, 6, 8];
-            let value_offsets = Buffer::from_slice_ref(&value_offsets);
-            // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Box::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
-            let list_data = ArrayData::builder(list_data_type.clone())
-                .len(3)
-                .add_buffer(value_offsets)
-                .add_child_data(value_data)
-                .build();
-            let list_array = $list_array_type::from(list_data);
-
-            // index returns: [[2,3], null, [-1,-2,-1], [2,3], [0,0,0]]
-            let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(2), Some(0)]);
-
-            let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
-
-            // construct a value array with expected results:
-            // [[2,3], null, [-1,-2,-1], [2,3], [0,0,0]]
-            let expected_data = Int32Array::from(vec![
-                Some(2),
-                Some(3),
-                Some(-1),
-                Some(-2),
-                Some(-1),
-                Some(2),
-                Some(3),
-                Some(0),
-                Some(0),
-                Some(0),
-            ])
-            .data()
-            .clone();
-            // construct offsets
-            let expected_offsets: [$offset_type; 6] = [0, 2, 2, 5, 7, 10];
-            let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
-            // construct list array from the two
-            let expected_list_data = ArrayData::builder(list_data_type)
-                .len(5)
-                // null buffer remains the same as only the indices have nulls
-                .null_bit_buffer(
-                    index.data().null_bitmap().as_ref().unwrap().bits.clone(),
-                )
-                .add_buffer(expected_offsets)
-                .add_child_data(expected_data)
-                .build();
-            let expected_list_array = $list_array_type::from(expected_list_data);
-
-            assert_eq!(a, &expected_list_array);
-        }};
-    }
-
-    macro_rules! test_take_list_with_value_nulls {
-        ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
-            // Construct a value array, [[0,null,0], [-1,-2,3], [null], [5,null]]
-            let value_data = Int32Array::from(vec![
-                Some(0),
-                None,
-                Some(0),
-                Some(-1),
-                Some(-2),
-                Some(3),
-                None,
-                Some(5),
-                None,
-            ])
-            .data()
-            .clone();
-            // Construct offsets
-            let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9];
-            let value_offsets = Buffer::from_slice_ref(&value_offsets);
-            // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Box::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
-            let list_data = ArrayData::builder(list_data_type.clone())
-                .len(4)
-                .add_buffer(value_offsets)
-                .null_bit_buffer(Buffer::from([0b10111101, 0b00000000]))
-                .add_child_data(value_data)
-                .build();
-            let list_array = $list_array_type::from(list_data);
-
-            // index returns: [[null], null, [-1,-2,3], [2,null], [0,null,0]]
-            let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
-
-            let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
-
-            // construct a value array with expected results:
-            // [[null], null, [-1,-2,3], [5,null], [0,null,0]]
-            let expected_data = Int32Array::from(vec![
-                None,
-                Some(-1),
-                Some(-2),
-                Some(3),
-                Some(5),
-                None,
-                Some(0),
-                None,
-                Some(0),
-            ])
-            .data()
-            .clone();
-            // construct offsets
-            let expected_offsets: [$offset_type; 6] = [0, 1, 1, 4, 6, 9];
-            let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
-            // construct list array from the two
-            let expected_list_data = ArrayData::builder(list_data_type)
-                .len(5)
-                // null buffer remains the same as only the indices have nulls
-                .null_bit_buffer(
-                    index.data().null_bitmap().as_ref().unwrap().bits.clone(),
-                )
-                .add_buffer(expected_offsets)
-                .add_child_data(expected_data)
-                .build();
-            let expected_list_array = $list_array_type::from(expected_list_data);
-
-            assert_eq!(a, &expected_list_array);
-        }};
-    }
-
-    macro_rules! test_take_list_with_nulls {
-        ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
-            // Construct a value array, [[0,null,0], [-1,-2,3], null, [5,null]]
-            let value_data = Int32Array::from(vec![
-                Some(0),
-                None,
-                Some(0),
-                Some(-1),
-                Some(-2),
-                Some(3),
-                Some(5),
-                None,
-            ])
-            .data()
-            .clone();
-            // Construct offsets
-            let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
-            let value_offsets = Buffer::from_slice_ref(&value_offsets);
-            // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Box::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
-            let list_data = ArrayData::builder(list_data_type.clone())
-                .len(4)
-                .add_buffer(value_offsets)
-                .null_bit_buffer(Buffer::from([0b01111101]))
-                .add_child_data(value_data)
-                .build();
-            let list_array = $list_array_type::from(list_data);
-
-            // index returns: [null, null, [-1,-2,3], [5,null], [0,null,0]]
-            let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
-
-            let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
-
-            // construct a value array with expected results:
-            // [null, null, [-1,-2,3], [5,null], [0,null,0]]
-            let expected_data = Int32Array::from(vec![
-                Some(-1),
-                Some(-2),
-                Some(3),
-                Some(5),
-                None,
-                Some(0),
-                None,
-                Some(0),
-            ])
-            .data()
-            .clone();
-            // construct offsets
-            let expected_offsets: [$offset_type; 6] = [0, 0, 0, 3, 5, 8];
-            let expected_offsets = Buffer::from_slice_ref(&expected_offsets);
-            // construct list array from the two
-            let mut null_bits: [u8; 1] = [0; 1];
-            bit_util::set_bit(&mut null_bits, 2);
-            bit_util::set_bit(&mut null_bits, 3);
-            bit_util::set_bit(&mut null_bits, 4);
-            let expected_list_data = ArrayData::builder(list_data_type)
-                .len(5)
-                // null buffer must be recalculated as both values and indices have nulls
-                .null_bit_buffer(Buffer::from(null_bits))
-                .add_buffer(expected_offsets)
-                .add_child_data(expected_data)
-                .build();
-            let expected_list_array = $list_array_type::from(expected_list_data);
-
-            assert_eq!(a, &expected_list_array);
-        }};
-    }
-
-    fn do_take_fixed_size_list_test<T>(
-        length: <Int32Type as ArrowPrimitiveType>::Native,
-        input_data: Vec<Option<Vec<Option<T::Native>>>>,
-        indices: Vec<<UInt32Type as ArrowPrimitiveType>::Native>,
-        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
-    ) where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let indices = UInt32Array::from(indices);
-
-        let input_array = build_fixed_size_list_nullable::<T>(input_data, length);
-
-        let output = take_fixed_size_list(&input_array, &indices, length as u32).unwrap();
-
-        let expected = build_fixed_size_list_nullable::<T>(expected_data, length);
-
-        assert_eq!(&output, &expected)
-    }
-
-    #[test]
-    fn test_take_list() {
-        test_take_list!(i32, List, ListArray);
-    }
-
-    #[test]
-    fn test_take_large_list() {
-        test_take_list!(i64, LargeList, LargeListArray);
-    }
-
-    #[test]
-    fn test_take_list_with_value_nulls() {
-        test_take_list_with_value_nulls!(i32, List, ListArray);
-    }
-
-    #[test]
-    fn test_take_large_list_with_value_nulls() {
-        test_take_list_with_value_nulls!(i64, LargeList, LargeListArray);
-    }
-
-    #[test]
-    fn test_test_take_list_with_nulls() {
-        test_take_list_with_nulls!(i32, List, ListArray);
-    }
-
-    #[test]
-    fn test_test_take_large_list_with_nulls() {
-        test_take_list_with_nulls!(i64, LargeList, LargeListArray);
-    }
-
-    #[test]
-    fn test_take_fixed_size_list() {
-        do_take_fixed_size_list_test::<Int32Type>(
-            3,
-            vec![
-                Some(vec![None, Some(1), Some(2)]),
-                Some(vec![Some(3), Some(4), None]),
-                Some(vec![Some(6), Some(7), Some(8)]),
-            ],
-            vec![2, 1, 0],
-            vec![
-                Some(vec![Some(6), Some(7), Some(8)]),
-                Some(vec![Some(3), Some(4), None]),
-                Some(vec![None, Some(1), Some(2)]),
-            ],
-        );
-
-        do_take_fixed_size_list_test::<UInt8Type>(
-            1,
-            vec![
-                Some(vec![Some(1)]),
-                Some(vec![Some(2)]),
-                Some(vec![Some(3)]),
-                Some(vec![Some(4)]),
-                Some(vec![Some(5)]),
-                Some(vec![Some(6)]),
-                Some(vec![Some(7)]),
-                Some(vec![Some(8)]),
-            ],
-            vec![2, 7, 0],
-            vec![
-                Some(vec![Some(3)]),
-                Some(vec![Some(8)]),
-                Some(vec![Some(1)]),
-            ],
-        );
-
-        do_take_fixed_size_list_test::<UInt64Type>(
-            3,
-            vec![
-                Some(vec![Some(10), Some(11), Some(12)]),
-                Some(vec![Some(13), Some(14), Some(15)]),
-                None,
-                Some(vec![Some(16), Some(17), Some(18)]),
-            ],
-            vec![3, 2, 1, 2, 0],
-            vec![
-                Some(vec![Some(16), Some(17), Some(18)]),
-                None,
-                Some(vec![Some(13), Some(14), Some(15)]),
-                None,
-                Some(vec![Some(10), Some(11), Some(12)]),
-            ],
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
-    fn test_take_list_out_of_bounds() {
-        // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]]
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3])
-            .data()
-            .clone();
-        // Construct offsets
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8]);
-        // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-        let list_array = ListArray::from(list_data);
-
-        let index = UInt32Array::from(vec![1000]);
-
-        // A panic is expected here since we have not supplied the check_bounds
-        // option.
-        take(&list_array, &index, None).unwrap();
-    }
-
-    #[test]
-    fn test_take_struct() {
-        let array = create_test_struct();
-
-        let index = UInt32Array::from(vec![0, 3, 1, 0, 2]);
-        let a = take(&array, &index, None).unwrap();
-        let a: &StructArray = a.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(index.len(), a.len());
-        assert_eq!(0, a.null_count());
-
-        let expected_bool_data = BooleanArray::from(vec![true, true, false, true, false])
-            .data()
-            .clone();
-        let expected_int_data = Int32Array::from(vec![42, 31, 28, 42, 19]).data().clone();
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, true));
-        field_types.push(Field::new("b", DataType::Int32, true));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            .add_child_data(expected_bool_data)
-            .add_child_data(expected_int_data)
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-
-        assert_eq!(a, &struct_array);
-    }
-
-    #[test]
-    fn test_take_struct_with_nulls() {
-        let array = create_test_struct();
-
-        let index = UInt32Array::from(vec![None, Some(3), Some(1), None, Some(0)]);
-        let a = take(&array, &index, None).unwrap();
-        let a: &StructArray = a.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(index.len(), a.len());
-        assert_eq!(0, a.null_count());
-
-        let expected_bool_data =
-            BooleanArray::from(vec![None, Some(true), Some(false), None, Some(true)])
-                .data()
-                .clone();
-        let expected_int_data =
-            Int32Array::from(vec![None, Some(31), Some(28), None, Some(42)])
-                .data()
-                .clone();
-
-        let mut field_types = vec![];
-        field_types.push(Field::new("a", DataType::Boolean, true));
-        field_types.push(Field::new("b", DataType::Int32, true));
-        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
-            .len(5)
-            // TODO: see https://issues.apache.org/jira/browse/ARROW-5408 for why count != 2
-            .add_child_data(expected_bool_data)
-            .add_child_data(expected_int_data)
-            .build();
-        let struct_array = StructArray::from(struct_array_data);
-        assert_eq!(a, &struct_array);
-    }
-
-    #[test]
-    fn test_take_out_of_bounds() {
-        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(6)]);
-        let take_opt = TakeOptions { check_bounds: true };
-
-        // int64
-        let result = test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), None, Some(2), Some(3), None],
-            &index,
-            Some(take_opt),
-            vec![None],
-        );
-        assert!(result.is_err());
-    }
-
-    #[test]
-    #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
-    fn test_take_out_of_bounds_panic() {
-        let index = UInt32Array::from(vec![Some(1000)]);
-
-        test_take_primitive_arrays::<Int64Type>(
-            vec![Some(0), Some(1), Some(2), Some(3)],
-            &index,
-            None,
-            vec![None],
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_take_dict() {
-        let keys_builder = Int16Builder::new(8);
-        let values_builder = StringBuilder::new(4);
-
-        let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        dict_builder.append("foo").unwrap();
-        dict_builder.append("bar").unwrap();
-        dict_builder.append("").unwrap();
-        dict_builder.append_null().unwrap();
-        dict_builder.append("foo").unwrap();
-        dict_builder.append("bar").unwrap();
-        dict_builder.append("bar").unwrap();
-        dict_builder.append("foo").unwrap();
-
-        let array = dict_builder.finish();
-        let dict_values = array.values().clone();
-        let dict_values = dict_values.as_any().downcast_ref::<StringArray>().unwrap();
-
-        let indices = UInt32Array::from(vec![
-            Some(0), // first "foo"
-            Some(7), // last "foo"
-            None,    // null index should return null
-            Some(5), // second "bar"
-            Some(6), // another "bar"
-            Some(2), // empty string
-            Some(3), // input is null at this index
-        ]);
-
-        let result = take(&array, &indices, None).unwrap();
-        let result = result
-            .as_any()
-            .downcast_ref::<DictionaryArray<Int16Type>>()
-            .unwrap();
-
-        let result_values: StringArray = result.values().data().clone().into();
-
-        // dictionary values should stay the same
-        let expected_values = StringArray::from(vec!["foo", "bar", ""]);
-        assert_eq!(&expected_values, dict_values);
-        assert_eq!(&expected_values, &result_values);
-
-        let expected_keys = Int16Array::from(vec![
-            Some(0),
-            Some(0),
-            None,
-            Some(1),
-            Some(1),
-            Some(2),
-            None,
-        ]);
-        assert_eq!(result.keys(), &expected_keys);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/temporal.rs b/rust/arrow/src/compute/kernels/temporal.rs
deleted file mode 100644
index 63e412990fd..00000000000
--- a/rust/arrow/src/compute/kernels/temporal.rs
+++ /dev/null
@@ -1,187 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines temporal kernels for time and date related functions.
-
-use chrono::{Datelike, Timelike};
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-/// Extracts the hours of a given temporal array as an array of integers
-pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: std::convert::From<T::Native>,
-{
-    let mut b = Int32Builder::new(array.len());
-    match array.data_type() {
-        &DataType::Time32(_) | &DataType::Time64(_) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_time(i) {
-                        Some(time) => b.append_value(time.hour() as i32)?,
-                        None => b.append_null()?,
-                    };
-                }
-            }
-        }
-        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.hour() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
-        }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "hour does not support type {:?}",
-                    dt
-                )))
-            }
-        }
-    }
-
-    Ok(b.finish())
-}
-
-/// Extracts the years of a given temporal array as an array of integers
-pub fn year<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: std::convert::From<T::Native>,
-{
-    let mut b = Int32Builder::new(array.len());
-    match array.data_type() {
-        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.year() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
-        }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "year does not support type {:?}",
-                    dt
-                )))
-            }
-        }
-    }
-
-    Ok(b.finish())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_temporal_array_date64_hour() {
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), None, Some(1550636625000)].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(0, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(4, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_date32_hour() {
-        let a: PrimitiveArray<Date32Type> = vec![Some(15147), None, Some(15148)].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(0, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(0, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_time32_second_hour() {
-        let a: PrimitiveArray<Time32SecondType> = vec![37800, 86339].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(10, b.value(0));
-        assert_eq!(23, b.value(1));
-    }
-
-    #[test]
-    fn test_temporal_array_time64_micro_hour() {
-        let a: PrimitiveArray<Time64MicrosecondType> =
-            vec![37800000000, 86339000000].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(10, b.value(0));
-        assert_eq!(23, b.value(1));
-    }
-
-    #[test]
-    fn test_temporal_array_timestamp_micro_hour() {
-        let a: TimestampMicrosecondArray = vec![37800000000, 86339000000].into();
-
-        let b = hour(&a).unwrap();
-        assert_eq!(10, b.value(0));
-        assert_eq!(23, b.value(1));
-    }
-
-    #[test]
-    fn test_temporal_array_date64_year() {
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), None, Some(1550636625000)].into();
-
-        let b = year(&a).unwrap();
-        assert_eq!(2018, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(2019, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_date32_year() {
-        let a: PrimitiveArray<Date32Type> = vec![Some(15147), None, Some(15448)].into();
-
-        let b = year(&a).unwrap();
-        assert_eq!(2011, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(2012, b.value(2));
-    }
-
-    #[test]
-    fn test_temporal_array_timestamp_micro_year() {
-        let a: TimestampMicrosecondArray =
-            vec![Some(1612025847000000), None, Some(1722015847000000)].into();
-
-        let b = year(&a).unwrap();
-        assert_eq!(2021, b.value(0));
-        assert_eq!(false, b.is_valid(1));
-        assert_eq!(2024, b.value(2));
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/window.rs b/rust/arrow/src/compute/kernels/window.rs
deleted file mode 100644
index 82e712c3079..00000000000
--- a/rust/arrow/src/compute/kernels/window.rs
+++ /dev/null
@@ -1,109 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines windowing functions, like `shift`ing
-
-use crate::compute::concat;
-use num::{abs, clamp};
-
-use crate::{
-    array::{make_array, ArrayData, PrimitiveArray},
-    datatypes::ArrowPrimitiveType,
-    error::Result,
-};
-use crate::{
-    array::{Array, ArrayRef},
-    buffer::MutableBuffer,
-};
-
-/// Shifts array by defined number of items (to left or right)
-/// A positive value for `offset` shifts the array to the right
-/// a negative value shifts the array to the left.
-/// # Examples
-/// ```
-/// use arrow::array::Int32Array;
-/// use arrow::error::Result;
-/// use arrow::compute::shift;
-///
-/// let a: Int32Array = vec![Some(1), None, Some(4)].into();
-/// // shift array 1 element to the right
-/// let res = shift(&a, 1).unwrap();
-/// let expected: Int32Array = vec![None, Some(1), None].into();
-/// assert_eq!(res.as_ref(), &expected)
-/// ```
-pub fn shift<T>(values: &PrimitiveArray<T>, offset: i64) -> Result<ArrayRef>
-where
-    T: ArrowPrimitiveType,
-{
-    // Compute slice
-    let slice_offset = clamp(-offset, 0, values.len() as i64) as usize;
-    let length = values.len() - abs(offset) as usize;
-    let slice = values.slice(slice_offset, length);
-
-    // Generate array with remaining `null` items
-    let nulls = abs(offset as i64) as usize;
-
-    let mut null_array = MutableBuffer::new(nulls);
-    let mut null_data = MutableBuffer::new(nulls * T::get_byte_width());
-    null_array.extend_zeros(nulls);
-    null_data.extend_zeros(nulls * T::get_byte_width());
-
-    let null_data = ArrayData::new(
-        T::DATA_TYPE,
-        nulls as usize,
-        Some(nulls),
-        Some(null_array.into()),
-        0,
-        vec![null_data.into()],
-        vec![],
-    );
-
-    // Concatenate both arrays, add nulls after if shift > 0 else before
-    let null_arr = make_array(null_data);
-    if offset > 0 {
-        concat(&[null_arr.as_ref(), slice.as_ref()])
-    } else {
-        concat(&[slice.as_ref(), null_arr.as_ref()])
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::array::Int32Array;
-
-    use super::*;
-
-    #[test]
-    fn test_shift_neg() {
-        let a: Int32Array = vec![Some(1), None, Some(4)].into();
-        let res = shift(&a, -1).unwrap();
-
-        let expected: Int32Array = vec![None, Some(4), None].into();
-
-        assert_eq!(res.as_ref(), &expected);
-    }
-
-    #[test]
-    fn test_shift_pos() {
-        let a: Int32Array = vec![Some(1), None, Some(4)].into();
-        let res = shift(&a, 1).unwrap();
-
-        let expected: Int32Array = vec![None, Some(1), None].into();
-
-        assert_eq!(res.as_ref(), &expected);
-    }
-}
diff --git a/rust/arrow/src/compute/kernels/zip.rs b/rust/arrow/src/compute/kernels/zip.rs
deleted file mode 100644
index 0ee8e47bede..00000000000
--- a/rust/arrow/src/compute/kernels/zip.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::array::*;
-use crate::compute::SlicesIterator;
-use crate::error::{ArrowError, Result};
-
-/// Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy`
-/// are taken, where the mask evaluates `false` values of `falsy` are taken.
-///
-/// # Arguments
-/// * `mask` - Boolean values used to determine from which array to take the values.
-/// * `truthy` - Values of this array are taken if mask evaluates `true`
-/// * `falsy` - Values of this array are taken if mask evaluates `false`
-pub fn zip(
-    mask: &BooleanArray,
-    truthy: &dyn Array,
-    falsy: &dyn Array,
-) -> Result<ArrayRef> {
-    if truthy.data_type() != falsy.data_type() {
-        return Err(ArrowError::InvalidArgumentError(
-            "arguments need to have the same data type".into(),
-        ));
-    }
-    if truthy.len() != falsy.len() || falsy.len() != mask.len() {
-        return Err(ArrowError::InvalidArgumentError(
-            "all arrays should have the same length".into(),
-        ));
-    }
-    let falsy = falsy.data();
-    let truthy = truthy.data();
-
-    let mut mutable = MutableArrayData::new(vec![&*truthy, &*falsy], false, truthy.len());
-
-    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
-    // fill with falsy values
-
-    // keep track of how much is filled
-    let mut filled = 0;
-
-    SlicesIterator::new(mask).for_each(|(start, end)| {
-        // the gap needs to be filled with falsy values
-        if start > filled {
-            mutable.extend(1, filled, start);
-        }
-        // fill with truthy values
-        mutable.extend(0, start, end);
-        filled = end;
-    });
-    // the remaining part is falsy
-    if filled < truthy.len() {
-        mutable.extend(1, filled, truthy.len());
-    }
-
-    let data = mutable.freeze();
-    Ok(make_array(data))
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_zip_kernel() {
-        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
-        let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
-        let mask = BooleanArray::from(vec![true, true, false, false, true]);
-        let out = zip(&mask, &a, &b).unwrap();
-        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
-        let expected = Int32Array::from(vec![Some(5), None, Some(6), Some(7), Some(1)]);
-        assert_eq!(actual, &expected);
-    }
-}
diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs
deleted file mode 100644
index be1aa277ca4..00000000000
--- a/rust/arrow/src/compute/mod.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Computation kernels on Arrow Arrays
-
-pub mod kernels;
-
-mod util;
-
-pub use self::kernels::aggregate::*;
-pub use self::kernels::arithmetic::*;
-pub use self::kernels::boolean::*;
-pub use self::kernels::cast::*;
-pub use self::kernels::comparison::*;
-pub use self::kernels::concat::*;
-pub use self::kernels::filter::*;
-pub use self::kernels::limit::*;
-pub use self::kernels::regexp::*;
-pub use self::kernels::sort::*;
-pub use self::kernels::take::*;
-pub use self::kernels::temporal::*;
-pub use self::kernels::window::*;
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
deleted file mode 100644
index 56de5948301..00000000000
--- a/rust/arrow/src/compute/util.rs
+++ /dev/null
@@ -1,463 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common utilities for computation kernels.
-
-use crate::array::*;
-use crate::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use num::{One, ToPrimitive, Zero};
-use std::ops::Add;
-
-/// Combines the null bitmaps of two arrays using a bitwise `and` operation.
-///
-/// This function is useful when implementing operations on higher level arrays.
-#[allow(clippy::unnecessary_wraps)]
-pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-    len_in_bits: usize,
-) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                &l,
-                left_offset_in_bits,
-                &r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
-    }
-}
-
-/// Compares the null bitmaps of two arrays using a bitwise `or` operation.
-///
-/// This function is useful when implementing operations on higher level arrays.
-#[allow(clippy::unnecessary_wraps)]
-pub(super) fn compare_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-    len_in_bits: usize,
-) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_or(
-                &l,
-                left_offset_in_bits,
-                &r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
-    }
-}
-
-/// Takes/filters a list array's inner data using the offsets of the list array.
-///
-/// Where a list array has indices `[0,2,5,10]`, taking indices of `[2,0]` returns
-/// an array of the indices `[5..10, 0..2]` and offsets `[0,5,7]` (5 elements and 2
-/// elements)
-pub(super) fn take_value_indices_from_list<IndexType, OffsetType>(
-    list: &GenericListArray<OffsetType::Native>,
-    indices: &PrimitiveArray<IndexType>,
-) -> Result<(PrimitiveArray<OffsetType>, Vec<OffsetType::Native>)>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-    OffsetType: ArrowNumericType,
-    OffsetType::Native: OffsetSizeTrait + Add + Zero + One,
-    PrimitiveArray<OffsetType>: From<Vec<Option<OffsetType::Native>>>,
-{
-    // TODO: benchmark this function, there might be a faster unsafe alternative
-    let offsets: &[OffsetType::Native] = list.value_offsets();
-
-    let mut new_offsets = Vec::with_capacity(indices.len());
-    let mut values = Vec::new();
-    let mut current_offset = OffsetType::Native::zero();
-    // add first offset
-    new_offsets.push(OffsetType::Native::zero());
-    // compute the value indices, and set offsets accordingly
-    for i in 0..indices.len() {
-        if indices.is_valid(i) {
-            let ix = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-            let start = offsets[ix];
-            let end = offsets[ix + 1];
-            current_offset += end - start;
-            new_offsets.push(current_offset);
-
-            let mut curr = start;
-
-            // if start == end, this slot is empty
-            while curr < end {
-                values.push(Some(curr));
-                curr += OffsetType::Native::one();
-            }
-        } else {
-            new_offsets.push(current_offset);
-        }
-    }
-
-    Ok((PrimitiveArray::<OffsetType>::from(values), new_offsets))
-}
-
-/// Takes/filters a fixed size list array's inner data using the offsets of the list array.
-pub(super) fn take_value_indices_from_fixed_size_list<IndexType>(
-    list: &FixedSizeListArray,
-    indices: &PrimitiveArray<IndexType>,
-    length: <UInt32Type as ArrowPrimitiveType>::Native,
-) -> Result<PrimitiveArray<UInt32Type>>
-where
-    IndexType: ArrowNumericType,
-    IndexType::Native: ToPrimitive,
-{
-    let mut values = vec![];
-
-    for i in 0..indices.len() {
-        if indices.is_valid(i) {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-            let start =
-                list.value_offset(index) as <UInt32Type as ArrowPrimitiveType>::Native;
-
-            values.extend(start..start + length);
-        }
-    }
-
-    Ok(PrimitiveArray::<UInt32Type>::from(values))
-}
-
-#[cfg(test)]
-pub(super) mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::datatypes::DataType;
-    use crate::util::bit_util;
-    use crate::{array::ArrayData, buffer::MutableBuffer};
-
-    fn make_data_with_null_bit_buffer(
-        len: usize,
-        offset: usize,
-        null_bit_buffer: Option<Buffer>,
-    ) -> Arc<ArrayData> {
-        // empty vec for buffers and children is not really correct, but for these tests we only care about the null bitmap
-        Arc::new(ArrayData::new(
-            DataType::UInt8,
-            len,
-            None,
-            null_bit_buffer,
-            offset,
-            vec![],
-            vec![],
-        ))
-    }
-
-    #[test]
-    fn test_combine_option_bitmap() {
-        let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
-        let some_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
-        let inverse_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
-        assert_eq!(
-            None,
-            combine_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b0])),
-            combine_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_compare_option_bitmap() {
-        let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
-        let some_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
-        let inverse_bitmap =
-            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
-        assert_eq!(
-            None,
-            compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b01001010])),
-            compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
-        );
-        assert_eq!(
-            Some(Buffer::from([0b11111111])),
-            compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
-        );
-    }
-
-    pub(crate) fn build_generic_list<S, T>(
-        data: Vec<Option<Vec<T::Native>>>,
-    ) -> GenericListArray<S>
-    where
-        S: OffsetSizeTrait + 'static,
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let data = data
-            .into_iter()
-            .map(|subarray| {
-                subarray.map(|item| {
-                    item.into_iter()
-                        .map(Some)
-                        .collect::<Vec<Option<T::Native>>>()
-                })
-            })
-            .collect();
-        build_generic_list_nullable(data)
-    }
-
-    pub(crate) fn build_generic_list_nullable<S, T>(
-        data: Vec<Option<Vec<Option<T::Native>>>>,
-    ) -> GenericListArray<S>
-    where
-        S: OffsetSizeTrait + 'static,
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        use std::any::TypeId;
-
-        let mut offset = vec![0];
-        let mut values = vec![];
-
-        let list_len = data.len();
-        let num_bytes = bit_util::ceil(list_len, 8);
-        let mut list_null_count = 0;
-        let mut list_bitmap = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        for (idx, array) in data.into_iter().enumerate() {
-            if let Some(mut array) = array {
-                values.append(&mut array);
-            } else {
-                list_null_count += 1;
-                bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx);
-            }
-            offset.push(values.len() as i64);
-        }
-
-        let value_data = PrimitiveArray::<T>::from(values).data().clone();
-        let (list_data_type, value_offsets) = if TypeId::of::<S>() == TypeId::of::<i32>()
-        {
-            (
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    T::DATA_TYPE,
-                    list_null_count == 0,
-                ))),
-                Buffer::from_slice_ref(
-                    &offset.into_iter().map(|x| x as i32).collect::<Vec<i32>>(),
-                ),
-            )
-        } else if TypeId::of::<S>() == TypeId::of::<i64>() {
-            (
-                DataType::LargeList(Box::new(Field::new(
-                    "item",
-                    T::DATA_TYPE,
-                    list_null_count == 0,
-                ))),
-                Buffer::from_slice_ref(&offset),
-            )
-        } else {
-            unreachable!()
-        };
-
-        let list_data = ArrayData::builder(list_data_type)
-            .len(list_len)
-            .null_bit_buffer(list_bitmap.into())
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-
-        GenericListArray::<S>::from(list_data)
-    }
-
-    pub(crate) fn build_fixed_size_list<T>(
-        data: Vec<Option<Vec<T::Native>>>,
-        length: <Int32Type as ArrowPrimitiveType>::Native,
-    ) -> FixedSizeListArray
-    where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let data = data
-            .into_iter()
-            .map(|subarray| {
-                subarray.map(|item| {
-                    item.into_iter()
-                        .map(Some)
-                        .collect::<Vec<Option<T::Native>>>()
-                })
-            })
-            .collect();
-        build_fixed_size_list_nullable(data, length)
-    }
-
-    pub(crate) fn build_fixed_size_list_nullable<T>(
-        list_values: Vec<Option<Vec<Option<T::Native>>>>,
-        length: <Int32Type as ArrowPrimitiveType>::Native,
-    ) -> FixedSizeListArray
-    where
-        T: ArrowPrimitiveType,
-        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
-    {
-        let mut values = vec![];
-        let mut list_null_count = 0;
-        let list_len = list_values.len();
-
-        let num_bytes = bit_util::ceil(list_len, 8);
-        let mut list_bitmap = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-        for (idx, list_element) in list_values.into_iter().enumerate() {
-            if let Some(items) = list_element {
-                // every sub-array should have the same length
-                debug_assert_eq!(length as usize, items.len());
-
-                values.extend(items.into_iter());
-            } else {
-                list_null_count += 1;
-                bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx);
-                values.extend(vec![None; length as usize].into_iter());
-            }
-        }
-
-        let list_data_type = DataType::FixedSizeList(
-            Box::new(Field::new("item", T::DATA_TYPE, list_null_count == 0)),
-            length,
-        );
-
-        let child_data = PrimitiveArray::<T>::from(values).data().clone();
-
-        let list_data = ArrayData::builder(list_data_type)
-            .len(list_len)
-            .null_bit_buffer(list_bitmap.into())
-            .add_child_data(child_data)
-            .build();
-
-        FixedSizeListArray::from(list_data)
-    }
-
-    #[test]
-    fn test_take_value_index_from_list() {
-        let list = build_generic_list::<i32, Int32Type>(vec![
-            Some(vec![0, 1]),
-            Some(vec![2, 3, 4]),
-            Some(vec![5, 6, 7, 8, 9]),
-        ]);
-        let indices = UInt32Array::from(vec![2, 0]);
-
-        let (indexed, offsets) = take_value_indices_from_list(&list, &indices).unwrap();
-
-        assert_eq!(indexed, Int32Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
-        assert_eq!(offsets, vec![0, 5, 7]);
-    }
-
-    #[test]
-    fn test_take_value_index_from_large_list() {
-        let list = build_generic_list::<i64, Int32Type>(vec![
-            Some(vec![0, 1]),
-            Some(vec![2, 3, 4]),
-            Some(vec![5, 6, 7, 8, 9]),
-        ]);
-        let indices = UInt32Array::from(vec![2, 0]);
-
-        let (indexed, offsets) =
-            take_value_indices_from_list::<_, Int64Type>(&list, &indices).unwrap();
-
-        assert_eq!(indexed, Int64Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
-        assert_eq!(offsets, vec![0, 5, 7]);
-    }
-
-    #[test]
-    fn test_take_value_index_from_fixed_list() {
-        let list = build_fixed_size_list_nullable::<Int32Type>(
-            vec![
-                Some(vec![Some(1), Some(2), None]),
-                Some(vec![Some(4), None, Some(6)]),
-                None,
-                Some(vec![None, Some(8), Some(9)]),
-            ],
-            3,
-        );
-
-        let indices = UInt32Array::from(vec![2, 1, 0]);
-        let indexed =
-            take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
-
-        assert_eq!(indexed, UInt32Array::from(vec![6, 7, 8, 3, 4, 5, 0, 1, 2]));
-
-        let indices = UInt32Array::from(vec![3, 2, 1, 2, 0]);
-        let indexed =
-            take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
-
-        assert_eq!(
-            indexed,
-            UInt32Array::from(vec![9, 10, 11, 6, 7, 8, 3, 4, 5, 6, 7, 8, 0, 1, 2])
-        );
-    }
-}
diff --git a/rust/arrow/src/csv/mod.rs b/rust/arrow/src/csv/mod.rs
deleted file mode 100644
index ffe82f33580..00000000000
--- a/rust/arrow/src/csv/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Transfer data between the Arrow memory format and CSV (comma-separated values).
-
-pub mod reader;
-pub mod writer;
-
-pub use self::reader::infer_schema_from_files;
-pub use self::reader::Reader;
-pub use self::reader::ReaderBuilder;
-pub use self::writer::Writer;
-pub use self::writer::WriterBuilder;
diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs
deleted file mode 100644
index 985c88b4978..00000000000
--- a/rust/arrow/src/csv/reader.rs
+++ /dev/null
@@ -1,1291 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV Reader
-//!
-//! This CSV reader allows CSV files to be read into the Arrow memory model. Records are
-//! loaded in batches and are then converted from row-based data to columnar data.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::csv;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use std::fs::File;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("city", DataType::Utf8, false),
-//!     Field::new("lat", DataType::Float64, false),
-//!     Field::new("lng", DataType::Float64, false),
-//! ]);
-//!
-//! let file = File::open("test/data/uk_cities.csv").unwrap();
-//!
-//! let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
-//! let batch = csv.next().unwrap().unwrap();
-//! ```
-
-use core::cmp::min;
-use lazy_static::lazy_static;
-use regex::{Regex, RegexBuilder};
-use std::collections::HashSet;
-use std::fmt;
-use std::fs::File;
-use std::io::{Read, Seek, SeekFrom};
-use std::sync::Arc;
-
-use csv as csv_crate;
-
-use crate::array::{ArrayRef, BooleanArray, PrimitiveArray, StringArray};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
-
-use self::csv_crate::{ByteRecord, StringRecord};
-
-lazy_static! {
-    static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
-    static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
-    static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
-        .case_insensitive(true)
-        .build()
-        .unwrap();
-    static ref DATE_RE: Regex = Regex::new(r"^\d{4}-\d\d-\d\d$").unwrap();
-    static ref DATETIME_RE: Regex =
-        Regex::new(r"^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d$").unwrap();
-}
-
-/// Infer the data type of a record
-fn infer_field_schema(string: &str) -> DataType {
-    // when quoting is enabled in the reader, these quotes aren't escaped, we default to
-    // Utf8 for them
-    if string.starts_with('"') {
-        return DataType::Utf8;
-    }
-    // match regex in a particular order
-    if BOOLEAN_RE.is_match(string) {
-        DataType::Boolean
-    } else if DECIMAL_RE.is_match(string) {
-        DataType::Float64
-    } else if INTEGER_RE.is_match(string) {
-        DataType::Int64
-    } else if DATETIME_RE.is_match(string) {
-        DataType::Date64
-    } else if DATE_RE.is_match(string) {
-        DataType::Date32
-    } else {
-        DataType::Utf8
-    }
-}
-
-/// Infer the schema of a CSV file by reading through the first n records of the file,
-/// with `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its schema.
-///
-/// Return infered schema and number of records used for inference. This function does not change
-/// reader cursor offset.
-pub fn infer_file_schema<R: Read + Seek>(
-    reader: &mut R,
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<(Schema, usize)> {
-    let saved_offset = reader.seek(SeekFrom::Current(0))?;
-
-    let (schema, records_count) =
-        infer_reader_schema(reader, delimiter, max_read_records, has_header)?;
-
-    // return the reader seek back to the start
-    reader.seek(SeekFrom::Start(saved_offset))?;
-
-    Ok((schema, records_count))
-}
-
-/// Infer schema of CSV records provided by struct that implements `Read` trait.
-///
-/// `max_read_records` controlling the maximum number of records to read. If `max_read_records` is
-/// not set, all records are read to infer the schema.
-///
-/// Return infered schema and number of records used for inference.
-pub fn infer_reader_schema<R: Read>(
-    reader: &mut R,
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<(Schema, usize)> {
-    let mut csv_reader = csv_crate::ReaderBuilder::new()
-        .delimiter(delimiter)
-        .from_reader(reader);
-
-    // get or create header names
-    // when has_header is false, creates default column names with column_ prefix
-    let headers: Vec<String> = if has_header {
-        let headers = &csv_reader.headers()?.clone();
-        headers.iter().map(|s| s.to_string()).collect()
-    } else {
-        let first_record_count = &csv_reader.headers()?.len();
-        (0..*first_record_count)
-            .map(|i| format!("column_{}", i + 1))
-            .collect()
-    };
-
-    let header_length = headers.len();
-    // keep track of inferred field types
-    let mut column_types: Vec<HashSet<DataType>> = vec![HashSet::new(); header_length];
-    // keep track of columns with nulls
-    let mut nulls: Vec<bool> = vec![false; header_length];
-
-    let mut records_count = 0;
-    let mut fields = vec![];
-
-    let mut record = StringRecord::new();
-    let max_records = max_read_records.unwrap_or(usize::MAX);
-    while records_count < max_records {
-        if !csv_reader.read_record(&mut record)? {
-            break;
-        }
-        records_count += 1;
-
-        for i in 0..header_length {
-            if let Some(string) = record.get(i) {
-                if string.is_empty() {
-                    nulls[i] = true;
-                } else {
-                    column_types[i].insert(infer_field_schema(string));
-                }
-            }
-        }
-    }
-
-    // build schema from inference results
-    for i in 0..header_length {
-        let possibilities = &column_types[i];
-        let has_nulls = nulls[i];
-        let field_name = &headers[i];
-
-        // determine data type based on possible types
-        // if there are incompatible types, use DataType::Utf8
-        match possibilities.len() {
-            1 => {
-                for dtype in possibilities.iter() {
-                    fields.push(Field::new(&field_name, dtype.clone(), has_nulls));
-                }
-            }
-            2 => {
-                if possibilities.contains(&DataType::Int64)
-                    && possibilities.contains(&DataType::Float64)
-                {
-                    // we have an integer and double, fall down to double
-                    fields.push(Field::new(&field_name, DataType::Float64, has_nulls));
-                } else {
-                    // default to Utf8 for conflicting datatypes (e.g bool and int)
-                    fields.push(Field::new(&field_name, DataType::Utf8, has_nulls));
-                }
-            }
-            _ => fields.push(Field::new(&field_name, DataType::Utf8, has_nulls)),
-        }
-    }
-
-    Ok((Schema::new(fields), records_count))
-}
-
-/// Infer schema from a list of CSV files by reading through first n records
-/// with `max_read_records` controlling the maximum number of records to read.
-///
-/// Files will be read in the given order untill n records have been reached.
-///
-/// If `max_read_records` is not set, all files will be read fully to infer the schema.
-pub fn infer_schema_from_files(
-    files: &[String],
-    delimiter: u8,
-    max_read_records: Option<usize>,
-    has_header: bool,
-) -> Result<Schema> {
-    let mut schemas = vec![];
-    let mut records_to_read = max_read_records.unwrap_or(std::usize::MAX);
-
-    for fname in files.iter() {
-        let (schema, records_read) = infer_file_schema(
-            &mut File::open(fname)?,
-            delimiter,
-            Some(records_to_read),
-            has_header,
-        )?;
-        if records_read == 0 {
-            continue;
-        }
-        schemas.push(schema.clone());
-        records_to_read -= records_read;
-        if records_to_read == 0 {
-            break;
-        }
-    }
-
-    Schema::try_merge(schemas)
-}
-
-// optional bounds of the reader, of the form (min line, max line).
-type Bounds = Option<(usize, usize)>;
-
-/// CSV file reader
-pub struct Reader<R: Read> {
-    /// Explicit schema for the CSV file
-    schema: SchemaRef,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<usize>>,
-    /// File reader
-    reader: csv_crate::Reader<R>,
-    /// Current line number
-    line_number: usize,
-    /// Maximum number of rows to read
-    end: usize,
-    /// Number of records per batch
-    batch_size: usize,
-    /// Vector that can hold the `StringRecord`s of the batches
-    batch_records: Vec<StringRecord>,
-}
-
-impl<R> fmt::Debug for Reader<R>
-where
-    R: Read,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Reader")
-            .field("schema", &self.schema)
-            .field("projection", &self.projection)
-            .field("line_number", &self.line_number)
-            .finish()
-    }
-}
-
-impl<R: Read> Reader<R> {
-    /// Create a new CsvReader from any value that implements the `Read` trait.
-    ///
-    /// If reading a `File` or an input that supports `std::io::Read` and `std::io::Seek`;
-    /// you can customise the Reader, such as to enable schema inference, use
-    /// `ReaderBuilder`.
-    pub fn new(
-        reader: R,
-        schema: SchemaRef,
-        has_header: bool,
-        delimiter: Option<u8>,
-        batch_size: usize,
-        bounds: Bounds,
-        projection: Option<Vec<usize>>,
-    ) -> Self {
-        Self::from_reader(
-            reader, schema, has_header, delimiter, batch_size, bounds, projection,
-        )
-    }
-
-    /// Returns the schema of the reader, useful for getting the schema without reading
-    /// record batches
-    pub fn schema(&self) -> SchemaRef {
-        match &self.projection {
-            Some(projection) => {
-                let fields = self.schema.fields();
-                let projected_fields: Vec<Field> =
-                    projection.iter().map(|i| fields[*i].clone()).collect();
-
-                Arc::new(Schema::new(projected_fields))
-            }
-            None => self.schema.clone(),
-        }
-    }
-
-    /// Create a new CsvReader from a Reader
-    ///
-    /// This constructor allows you more flexibility in what records are processed by the
-    /// csv reader.
-    pub fn from_reader(
-        reader: R,
-        schema: SchemaRef,
-        has_header: bool,
-        delimiter: Option<u8>,
-        batch_size: usize,
-        bounds: Bounds,
-        projection: Option<Vec<usize>>,
-    ) -> Self {
-        let mut reader_builder = csv_crate::ReaderBuilder::new();
-        reader_builder.has_headers(has_header);
-
-        if let Some(c) = delimiter {
-            reader_builder.delimiter(c);
-        }
-
-        let mut csv_reader = reader_builder.from_reader(reader);
-
-        let (start, end) = match bounds {
-            None => (0, usize::MAX),
-            Some((start, end)) => (start, end),
-        };
-
-        // First we will skip `start` rows
-        // note that this skips by iteration. This is because in general it is not possible
-        // to seek in CSV. However, skiping still saves the burden of creating arrow arrays,
-        // which is a slow operation that scales with the number of columns
-
-        let mut record = ByteRecord::new();
-        // Skip first start items
-        for _ in 0..start {
-            let res = csv_reader.read_byte_record(&mut record);
-            if !res.unwrap_or(false) {
-                break;
-            }
-        }
-
-        // Initialize batch_records with StringRecords so they
-        // can be reused accross batches
-        let mut batch_records = Vec::with_capacity(batch_size);
-        batch_records.resize_with(batch_size, Default::default);
-
-        Self {
-            schema,
-            projection,
-            reader: csv_reader,
-            line_number: if has_header { start + 1 } else { start },
-            batch_size,
-            end,
-            batch_records,
-        }
-    }
-}
-
-impl<R: Read> Iterator for Reader<R> {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let remaining = self.end - self.line_number;
-
-        let mut read_records = 0;
-        for i in 0..min(self.batch_size, remaining) {
-            match self.reader.read_record(&mut self.batch_records[i]) {
-                Ok(true) => {
-                    read_records += 1;
-                }
-                Ok(false) => break,
-                Err(e) => {
-                    return Some(Err(ArrowError::ParseError(format!(
-                        "Error parsing line {}: {:?}",
-                        self.line_number + i,
-                        e
-                    ))))
-                }
-            }
-        }
-
-        // return early if no data was loaded
-        if read_records == 0 {
-            return None;
-        }
-
-        // parse the batches into a RecordBatch
-        let result = parse(
-            &self.batch_records[..read_records],
-            &self.schema.fields(),
-            Some(self.schema.metadata.clone()),
-            &self.projection,
-            self.line_number,
-        );
-
-        self.line_number += read_records;
-
-        Some(result)
-    }
-}
-
-/// parses a slice of [csv_crate::StringRecord] into a [array::record_batch::RecordBatch].
-fn parse(
-    rows: &[StringRecord],
-    fields: &[Field],
-    metadata: Option<std::collections::HashMap<String, String>>,
-    projection: &Option<Vec<usize>>,
-    line_number: usize,
-) -> Result<RecordBatch> {
-    let projection: Vec<usize> = match projection {
-        Some(ref v) => v.clone(),
-        None => fields.iter().enumerate().map(|(i, _)| i).collect(),
-    };
-
-    let arrays: Result<Vec<ArrayRef>> = projection
-        .iter()
-        .map(|i| {
-            let i = *i;
-            let field = &fields[i];
-            match field.data_type() {
-                &DataType::Boolean => build_boolean_array(line_number, rows, i),
-                &DataType::Int8 => {
-                    build_primitive_array::<Int8Type>(line_number, rows, i)
-                }
-                &DataType::Int16 => {
-                    build_primitive_array::<Int16Type>(line_number, rows, i)
-                }
-                &DataType::Int32 => {
-                    build_primitive_array::<Int32Type>(line_number, rows, i)
-                }
-                &DataType::Int64 => {
-                    build_primitive_array::<Int64Type>(line_number, rows, i)
-                }
-                &DataType::UInt8 => {
-                    build_primitive_array::<UInt8Type>(line_number, rows, i)
-                }
-                &DataType::UInt16 => {
-                    build_primitive_array::<UInt16Type>(line_number, rows, i)
-                }
-                &DataType::UInt32 => {
-                    build_primitive_array::<UInt32Type>(line_number, rows, i)
-                }
-                &DataType::UInt64 => {
-                    build_primitive_array::<UInt64Type>(line_number, rows, i)
-                }
-                &DataType::Float32 => {
-                    build_primitive_array::<Float32Type>(line_number, rows, i)
-                }
-                &DataType::Float64 => {
-                    build_primitive_array::<Float64Type>(line_number, rows, i)
-                }
-                &DataType::Date32 => {
-                    build_primitive_array::<Date32Type>(line_number, rows, i)
-                }
-                &DataType::Date64 => {
-                    build_primitive_array::<Date64Type>(line_number, rows, i)
-                }
-                &DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                    build_primitive_array::<TimestampMicrosecondType>(
-                        line_number,
-                        rows,
-                        i,
-                    )
-                }
-                &DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                    build_primitive_array::<TimestampNanosecondType>(line_number, rows, i)
-                }
-                &DataType::Utf8 => Ok(Arc::new(
-                    rows.iter().map(|row| row.get(i)).collect::<StringArray>(),
-                ) as ArrayRef),
-                other => Err(ArrowError::ParseError(format!(
-                    "Unsupported data type {:?}",
-                    other
-                ))),
-            }
-        })
-        .collect();
-
-    let projected_fields: Vec<Field> =
-        projection.iter().map(|i| fields[*i].clone()).collect();
-
-    let projected_schema = Arc::new(match metadata {
-        None => Schema::new(projected_fields),
-        Some(metadata) => Schema::new_with_metadata(projected_fields, metadata),
-    });
-
-    arrays.and_then(|arr| RecordBatch::try_new(projected_schema, arr))
-}
-
-/// Specialized parsing implementations
-trait Parser: ArrowPrimitiveType {
-    fn parse(string: &str) -> Option<Self::Native> {
-        string.parse::<Self::Native>().ok()
-    }
-}
-
-impl Parser for Float32Type {
-    fn parse(string: &str) -> Option<f32> {
-        lexical_core::parse(string.as_bytes()).ok()
-    }
-}
-impl Parser for Float64Type {
-    fn parse(string: &str) -> Option<f64> {
-        lexical_core::parse(string.as_bytes()).ok()
-    }
-}
-
-impl Parser for UInt64Type {}
-
-impl Parser for UInt32Type {}
-
-impl Parser for UInt16Type {}
-
-impl Parser for UInt8Type {}
-
-impl Parser for Int64Type {}
-
-impl Parser for Int32Type {}
-
-impl Parser for Int16Type {}
-
-impl Parser for Int8Type {}
-
-/// Number of days between 0001-01-01 and 1970-01-01
-const EPOCH_DAYS_FROM_CE: i32 = 719_163;
-
-impl Parser for Date32Type {
-    fn parse(string: &str) -> Option<i32> {
-        use chrono::Datelike;
-
-        match Self::DATA_TYPE {
-            DataType::Date32 => {
-                let date = string.parse::<chrono::NaiveDate>().ok()?;
-                Self::Native::from_i32(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
-            }
-            _ => None,
-        }
-    }
-}
-
-impl Parser for Date64Type {
-    fn parse(string: &str) -> Option<i64> {
-        match Self::DATA_TYPE {
-            DataType::Date64 => {
-                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
-                Self::Native::from_i64(date_time.timestamp_millis())
-            }
-            _ => None,
-        }
-    }
-}
-
-impl Parser for TimestampNanosecondType {
-    fn parse(string: &str) -> Option<i64> {
-        match Self::DATA_TYPE {
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
-                Self::Native::from_i64(date_time.timestamp_nanos())
-            }
-            _ => None,
-        }
-    }
-}
-
-impl Parser for TimestampMicrosecondType {
-    fn parse(string: &str) -> Option<i64> {
-        match Self::DATA_TYPE {
-            DataType::Timestamp(TimeUnit::Microsecond, None) => {
-                let date_time = string.parse::<chrono::NaiveDateTime>().ok()?;
-                Self::Native::from_i64(date_time.timestamp_nanos() / 1000)
-            }
-            _ => None,
-        }
-    }
-}
-
-fn parse_item<T: Parser>(string: &str) -> Option<T::Native> {
-    T::parse(string)
-}
-
-fn parse_bool(string: &str) -> Option<bool> {
-    if string.eq_ignore_ascii_case("false") {
-        Some(false)
-    } else if string.eq_ignore_ascii_case("true") {
-        Some(true)
-    } else {
-        None
-    }
-}
-
-// parses a specific column (col_idx) into an Arrow Array.
-fn build_primitive_array<T: ArrowPrimitiveType + Parser>(
-    line_number: usize,
-    rows: &[StringRecord],
-    col_idx: usize,
-) -> Result<ArrayRef> {
-    rows.iter()
-        .enumerate()
-        .map(|(row_index, row)| {
-            match row.get(col_idx) {
-                Some(s) => {
-                    if s.is_empty() {
-                        return Ok(None);
-                    }
-
-                    let parsed = parse_item::<T>(s);
-                    match parsed {
-                        Some(e) => Ok(Some(e)),
-                        None => Err(ArrowError::ParseError(format!(
-                            // TODO: we should surface the underlying error here.
-                            "Error while parsing value {} for column {} at line {}",
-                            s,
-                            col_idx,
-                            line_number + row_index
-                        ))),
-                    }
-                }
-                None => Ok(None),
-            }
-        })
-        .collect::<Result<PrimitiveArray<T>>>()
-        .map(|e| Arc::new(e) as ArrayRef)
-}
-
-// parses a specific column (col_idx) into an Arrow Array.
-fn build_boolean_array(
-    line_number: usize,
-    rows: &[StringRecord],
-    col_idx: usize,
-) -> Result<ArrayRef> {
-    rows.iter()
-        .enumerate()
-        .map(|(row_index, row)| {
-            match row.get(col_idx) {
-                Some(s) => {
-                    if s.is_empty() {
-                        return Ok(None);
-                    }
-
-                    let parsed = parse_bool(s);
-                    match parsed {
-                        Some(e) => Ok(Some(e)),
-                        None => Err(ArrowError::ParseError(format!(
-                            // TODO: we should surface the underlying error here.
-                            "Error while parsing value {} for column {} at line {}",
-                            s,
-                            col_idx,
-                            line_number + row_index
-                        ))),
-                    }
-                }
-                None => Ok(None),
-            }
-        })
-        .collect::<Result<BooleanArray>>()
-        .map(|e| Arc::new(e) as ArrayRef)
-}
-
-/// CSV file reader builder
-#[derive(Debug)]
-pub struct ReaderBuilder {
-    /// Optional schema for the CSV file
-    ///
-    /// If the schema is not supplied, the reader will try to infer the schema
-    /// based on the CSV structure.
-    schema: Option<SchemaRef>,
-    /// Whether the file has headers or not
-    ///
-    /// If schema inference is run on a file with no headers, default column names
-    /// are created.
-    has_header: bool,
-    /// An optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
-    /// Optional maximum number of records to read during schema inference
-    ///
-    /// If a number is not provided, all the records are read.
-    max_records: Option<usize>,
-    /// Batch size (number of records to load each time)
-    ///
-    /// The default batch size when using the `ReaderBuilder` is 1024 records
-    batch_size: usize,
-    /// The bounds over which to scan the reader. `None` starts from 0 and runs until EOF.
-    bounds: Bounds,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<usize>>,
-}
-
-impl Default for ReaderBuilder {
-    fn default() -> Self {
-        Self {
-            schema: None,
-            has_header: false,
-            delimiter: None,
-            max_records: None,
-            batch_size: 1024,
-            bounds: None,
-            projection: None,
-        }
-    }
-}
-
-impl ReaderBuilder {
-    /// Create a new builder for configuring CSV parsing options.
-    ///
-    /// To convert a builder into a reader, call `ReaderBuilder::build`
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::csv;
-    /// use std::fs::File;
-    ///
-    /// fn example() -> csv::Reader<File> {
-    ///     let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-    ///
-    ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = csv::ReaderBuilder::new().infer_schema(Some(100));
-    ///
-    ///     let reader = builder.build(file).unwrap();
-    ///
-    ///     reader
-    /// }
-    /// ```
-    pub fn new() -> ReaderBuilder {
-        ReaderBuilder::default()
-    }
-
-    /// Set the CSV file's schema
-    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
-        self.schema = Some(schema);
-        self
-    }
-
-    /// Set whether the CSV file has headers
-    pub fn has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
-        self
-    }
-
-    /// Set the CSV file's column delimiter as a byte character
-    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = Some(delimiter);
-        self
-    }
-
-    /// Set the CSV reader to infer the schema of the file
-    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
-        // remove any schema that is set
-        self.schema = None;
-        self.max_records = max_records;
-        self
-    }
-
-    /// Set the batch size (number of records to load at one time)
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// Set the reader's column projection
-    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
-        self.projection = Some(projection);
-        self
-    }
-
-    /// Create a new `Reader` from the `ReaderBuilder`
-    pub fn build<R: Read + Seek>(self, mut reader: R) -> Result<Reader<R>> {
-        // check if schema should be inferred
-        let delimiter = self.delimiter.unwrap_or(b',');
-        let schema = match self.schema {
-            Some(schema) => schema,
-            None => {
-                let (inferred_schema, _) = infer_file_schema(
-                    &mut reader,
-                    delimiter,
-                    self.max_records,
-                    self.has_header,
-                )?;
-
-                Arc::new(inferred_schema)
-            }
-        };
-        Ok(Reader::from_reader(
-            reader,
-            schema,
-            self.has_header,
-            self.delimiter,
-            self.batch_size,
-            None,
-            self.projection.clone(),
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-    use std::io::{Cursor, Write};
-    use tempfile::NamedTempFile;
-
-    use crate::array::*;
-    use crate::datatypes::Field;
-
-    #[test]
-    fn test_csv() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(
-            file,
-            Arc::new(schema.clone()),
-            false,
-            None,
-            1024,
-            None,
-            None,
-        );
-        assert_eq!(Arc::new(schema), csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(57.653484 - lat.value(0) < f64::EPSILON);
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city.value(13));
-    }
-
-    #[test]
-    fn test_csv_schema_metadata() {
-        let mut metadata = std::collections::HashMap::new();
-        metadata.insert("foo".to_owned(), "bar".to_owned());
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new("city", DataType::Utf8, false),
-                Field::new("lat", DataType::Float64, false),
-                Field::new("lng", DataType::Float64, false),
-            ],
-            metadata.clone(),
-        );
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(
-            file,
-            Arc::new(schema.clone()),
-            false,
-            None,
-            1024,
-            None,
-            None,
-        );
-        assert_eq!(Arc::new(schema), csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        assert_eq!(&metadata, batch.schema().metadata());
-    }
-
-    #[test]
-    fn test_csv_from_buf_reader() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file_with_headers =
-            File::open("test/data/uk_cities_with_headers.csv").unwrap();
-        let file_without_headers = File::open("test/data/uk_cities.csv").unwrap();
-        let both_files = file_with_headers
-            .chain(Cursor::new("\n".to_string()))
-            .chain(file_without_headers);
-        let mut csv = Reader::from_reader(
-            both_files,
-            Arc::new(schema),
-            true,
-            None,
-            1024,
-            None,
-            None,
-        );
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(74, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-    }
-
-    #[test]
-    fn test_csv_with_schema_inference() {
-        let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
-
-        let builder = ReaderBuilder::new().has_header(true).infer_schema(None);
-
-        let mut csv = builder.build(file).unwrap();
-        let expected_schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-        assert_eq!(Arc::new(expected_schema), csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(57.653484 - lat.value(0) < f64::EPSILON);
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city.value(13));
-    }
-
-    #[test]
-    fn test_csv_with_schema_inference_no_headers() {
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let builder = ReaderBuilder::new().infer_schema(None);
-
-        let mut csv = builder.build(file).unwrap();
-
-        // csv field names should be 'column_{number}'
-        let schema = csv.schema();
-        assert_eq!("column_1", schema.field(0).name());
-        assert_eq!("column_2", schema.field(1).name());
-        assert_eq!("column_3", schema.field(2).name());
-        let batch = csv.next().unwrap().unwrap();
-        let batch_schema = batch.schema();
-
-        assert_eq!(schema, batch_schema);
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        // access data from a primitive array
-        let lat = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(57.653484 - lat.value(0) < f64::EPSILON);
-
-        // access data from a string array (ListArray<u8>)
-        let city = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-
-        assert_eq!("Aberdeen, Aberdeen City, UK", city.value(13));
-    }
-
-    #[test]
-    fn test_csv_with_projection() {
-        let schema = Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-            Field::new("lng", DataType::Float64, false),
-        ]);
-
-        let file = File::open("test/data/uk_cities.csv").unwrap();
-
-        let mut csv = Reader::new(
-            file,
-            Arc::new(schema),
-            false,
-            None,
-            1024,
-            None,
-            Some(vec![0, 1]),
-        );
-        let projected_schema = Arc::new(Schema::new(vec![
-            Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Float64, false),
-        ]));
-        assert_eq!(projected_schema, csv.schema());
-        let batch = csv.next().unwrap().unwrap();
-        assert_eq!(projected_schema, batch.schema());
-        assert_eq!(37, batch.num_rows());
-        assert_eq!(2, batch.num_columns());
-    }
-
-    #[test]
-    fn test_nulls() {
-        let schema = Schema::new(vec![
-            Field::new("c_int", DataType::UInt64, false),
-            Field::new("c_float", DataType::Float32, false),
-            Field::new("c_string", DataType::Utf8, false),
-        ]);
-
-        let file = File::open("test/data/null_test.csv").unwrap();
-
-        let mut csv = Reader::new(file, Arc::new(schema), true, None, 1024, None, None);
-        let batch = csv.next().unwrap().unwrap();
-
-        assert_eq!(false, batch.column(1).is_null(0));
-        assert_eq!(false, batch.column(1).is_null(1));
-        assert_eq!(true, batch.column(1).is_null(2));
-        assert_eq!(false, batch.column(1).is_null(3));
-        assert_eq!(false, batch.column(1).is_null(4));
-    }
-
-    #[test]
-    fn test_nulls_with_inference() {
-        let file = File::open("test/data/various_types.csv").unwrap();
-
-        let builder = ReaderBuilder::new()
-            .infer_schema(None)
-            .has_header(true)
-            .with_delimiter(b'|')
-            .with_batch_size(512)
-            .with_projection(vec![0, 1, 2, 3, 4, 5]);
-
-        let mut csv = builder.build(file).unwrap();
-        let batch = csv.next().unwrap().unwrap();
-
-        assert_eq!(5, batch.num_rows());
-        assert_eq!(6, batch.num_columns());
-
-        let schema = batch.schema();
-
-        assert_eq!(&DataType::Int64, schema.field(0).data_type());
-        assert_eq!(&DataType::Float64, schema.field(1).data_type());
-        assert_eq!(&DataType::Float64, schema.field(2).data_type());
-        assert_eq!(&DataType::Boolean, schema.field(3).data_type());
-        assert_eq!(&DataType::Date32, schema.field(4).data_type());
-        assert_eq!(&DataType::Date64, schema.field(5).data_type());
-
-        let names: Vec<&str> =
-            schema.fields().iter().map(|x| x.name().as_str()).collect();
-        assert_eq!(
-            names,
-            vec![
-                "c_int",
-                "c_float",
-                "c_string",
-                "c_bool",
-                "c_date",
-                "c_datetime"
-            ]
-        );
-
-        assert_eq!(false, schema.field(0).is_nullable());
-        assert_eq!(true, schema.field(1).is_nullable());
-        assert_eq!(true, schema.field(2).is_nullable());
-        assert_eq!(false, schema.field(3).is_nullable());
-        assert_eq!(true, schema.field(4).is_nullable());
-        assert_eq!(true, schema.field(5).is_nullable());
-
-        assert_eq!(false, batch.column(1).is_null(0));
-        assert_eq!(false, batch.column(1).is_null(1));
-        assert_eq!(true, batch.column(1).is_null(2));
-        assert_eq!(false, batch.column(1).is_null(3));
-        assert_eq!(false, batch.column(1).is_null(4));
-    }
-
-    #[test]
-    fn test_parse_invalid_csv() {
-        let file = File::open("test/data/various_types_invalid.csv").unwrap();
-
-        let schema = Schema::new(vec![
-            Field::new("c_int", DataType::UInt64, false),
-            Field::new("c_float", DataType::Float32, false),
-            Field::new("c_string", DataType::Utf8, false),
-            Field::new("c_bool", DataType::Boolean, false),
-        ]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .has_header(true)
-            .with_delimiter(b'|')
-            .with_batch_size(512)
-            .with_projection(vec![0, 1, 2, 3]);
-
-        let mut csv = builder.build(file).unwrap();
-        match csv.next() {
-            Some(e) => match e {
-                Err(e) => assert_eq!(
-                    "ParseError(\"Error while parsing value 4.x4 for column 1 at line 4\")",
-                    format!("{:?}", e)
-                ),
-                Ok(_) => panic!("should have failed"),
-            },
-            None => panic!("should have failed"),
-        }
-    }
-
-    #[test]
-    fn test_infer_field_schema() {
-        assert_eq!(infer_field_schema("A"), DataType::Utf8);
-        assert_eq!(infer_field_schema("\"123\""), DataType::Utf8);
-        assert_eq!(infer_field_schema("10"), DataType::Int64);
-        assert_eq!(infer_field_schema("10.2"), DataType::Float64);
-        assert_eq!(infer_field_schema("true"), DataType::Boolean);
-        assert_eq!(infer_field_schema("false"), DataType::Boolean);
-        assert_eq!(infer_field_schema("2020-11-08"), DataType::Date32);
-        assert_eq!(infer_field_schema("2020-11-08T14:20:01"), DataType::Date64);
-    }
-
-    #[test]
-    fn parse_date32() {
-        assert_eq!(parse_item::<Date32Type>("1970-01-01").unwrap(), 0);
-        assert_eq!(parse_item::<Date32Type>("2020-03-15").unwrap(), 18336);
-        assert_eq!(parse_item::<Date32Type>("1945-05-08").unwrap(), -9004);
-    }
-
-    #[test]
-    fn parse_date64() {
-        assert_eq!(parse_item::<Date64Type>("1970-01-01T00:00:00").unwrap(), 0);
-        assert_eq!(
-            parse_item::<Date64Type>("2018-11-13T17:11:10").unwrap(),
-            1542129070000
-        );
-        assert_eq!(
-            parse_item::<Date64Type>("2018-11-13T17:11:10.011").unwrap(),
-            1542129070011
-        );
-        assert_eq!(
-            parse_item::<Date64Type>("1900-02-28T12:34:56").unwrap(),
-            -2203932304000
-        );
-    }
-
-    #[test]
-    fn test_infer_schema_from_multiple_files() -> Result<()> {
-        let mut csv1 = NamedTempFile::new()?;
-        let mut csv2 = NamedTempFile::new()?;
-        let csv3 = NamedTempFile::new()?; // empty csv file should be skipped
-        let mut csv4 = NamedTempFile::new()?;
-        writeln!(csv1, "c1,c2,c3")?;
-        writeln!(csv1, "1,\"foo\",0.5")?;
-        writeln!(csv1, "3,\"bar\",1")?;
-        // reading csv2 will set c2 to optional
-        writeln!(csv2, "c1,c2,c3,c4")?;
-        writeln!(csv2, "10,,3.14,true")?;
-        // reading csv4 will set c3 to optional
-        writeln!(csv4, "c1,c2,c3")?;
-        writeln!(csv4, "10,\"foo\",")?;
-
-        let schema = infer_schema_from_files(
-            &[
-                csv3.path().to_str().unwrap().to_string(),
-                csv1.path().to_str().unwrap().to_string(),
-                csv2.path().to_str().unwrap().to_string(),
-                csv4.path().to_str().unwrap().to_string(),
-            ],
-            b',',
-            Some(3), // only csv1 and csv2 should be read
-            true,
-        )?;
-
-        assert_eq!(schema.fields().len(), 4);
-        assert_eq!(false, schema.field(0).is_nullable());
-        assert_eq!(true, schema.field(1).is_nullable());
-        assert_eq!(false, schema.field(2).is_nullable());
-        assert_eq!(false, schema.field(3).is_nullable());
-
-        assert_eq!(&DataType::Int64, schema.field(0).data_type());
-        assert_eq!(&DataType::Utf8, schema.field(1).data_type());
-        assert_eq!(&DataType::Float64, schema.field(2).data_type());
-        assert_eq!(&DataType::Boolean, schema.field(3).data_type());
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_bounded() {
-        let schema = Schema::new(vec![Field::new("int", DataType::UInt32, false)]);
-        let data = vec![
-            vec!["0"],
-            vec!["1"],
-            vec!["2"],
-            vec!["3"],
-            vec!["4"],
-            vec!["5"],
-            vec!["6"],
-        ];
-
-        let data = data
-            .iter()
-            .map(|x| x.join(","))
-            .collect::<Vec<_>>()
-            .join("\n");
-        let data = data.as_bytes();
-
-        let reader = std::io::Cursor::new(data);
-
-        let mut csv = Reader::new(
-            reader,
-            Arc::new(schema),
-            false,
-            None,
-            2,
-            // starting at row 2 and up to row 6.
-            Some((2, 6)),
-            Some(vec![0]),
-        );
-
-        let batch = csv.next().unwrap().unwrap();
-        let a = batch.column(0);
-        let a = a.as_any().downcast_ref::<UInt32Array>().unwrap();
-        assert_eq!(a, &UInt32Array::from(vec![2, 3]));
-
-        let batch = csv.next().unwrap().unwrap();
-        let a = batch.column(0);
-        let a = a.as_any().downcast_ref::<UInt32Array>().unwrap();
-        assert_eq!(a, &UInt32Array::from(vec![4, 5]));
-
-        assert!(csv.next().is_none());
-    }
-
-    #[test]
-    fn test_parsing_bool() {
-        // Encode the expected behavior of boolean parsing
-        assert_eq!(Some(true), parse_bool("true"));
-        assert_eq!(Some(true), parse_bool("tRUe"));
-        assert_eq!(Some(true), parse_bool("True"));
-        assert_eq!(Some(true), parse_bool("TRUE"));
-        assert_eq!(None, parse_bool("t"));
-        assert_eq!(None, parse_bool("T"));
-        assert_eq!(None, parse_bool(""));
-
-        assert_eq!(Some(false), parse_bool("false"));
-        assert_eq!(Some(false), parse_bool("fALse"));
-        assert_eq!(Some(false), parse_bool("False"));
-        assert_eq!(Some(false), parse_bool("FALSE"));
-        assert_eq!(None, parse_bool("f"));
-        assert_eq!(None, parse_bool("F"));
-        assert_eq!(None, parse_bool(""));
-    }
-
-    #[test]
-    fn test_parsing_float() {
-        assert_eq!(Some(12.34), parse_item::<Float64Type>("12.34"));
-        assert_eq!(Some(-12.34), parse_item::<Float64Type>("-12.34"));
-        assert_eq!(Some(12.0), parse_item::<Float64Type>("12"));
-        assert_eq!(Some(0.0), parse_item::<Float64Type>("0"));
-        assert!(parse_item::<Float64Type>("nan").unwrap().is_nan());
-        assert!(parse_item::<Float64Type>("NaN").unwrap().is_nan());
-        assert!(parse_item::<Float64Type>("inf").unwrap().is_infinite());
-        assert!(parse_item::<Float64Type>("inf").unwrap().is_sign_positive());
-        assert!(parse_item::<Float64Type>("-inf").unwrap().is_infinite());
-        assert!(parse_item::<Float64Type>("-inf")
-            .unwrap()
-            .is_sign_negative());
-        assert_eq!(None, parse_item::<Float64Type>(""));
-        assert_eq!(None, parse_item::<Float64Type>("dd"));
-        assert_eq!(None, parse_item::<Float64Type>("12.34.56"));
-    }
-}
diff --git a/rust/arrow/src/csv/writer.rs b/rust/arrow/src/csv/writer.rs
deleted file mode 100644
index e9d8565b2a5..00000000000
--- a/rust/arrow/src/csv/writer.rs
+++ /dev/null
@@ -1,651 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV Writer
-//!
-//! This CSV writer allows Arrow data (in record batches) to be written as CSV files.
-//! The writer does not support writing `ListArray` and `StructArray`.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::*;
-//! use arrow::csv;
-//! use arrow::datatypes::*;
-//! use arrow::record_batch::RecordBatch;
-//! use arrow::util::test_util::get_temp_file;
-//! use std::fs::File;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("c1", DataType::Utf8, false),
-//!     Field::new("c2", DataType::Float64, true),
-//!     Field::new("c3", DataType::UInt32, false),
-//!     Field::new("c3", DataType::Boolean, true),
-//! ]);
-//! let c1 = StringArray::from(vec![
-//!     "Lorem ipsum dolor sit amet",
-//!     "consectetur adipiscing elit",
-//!     "sed do eiusmod tempor",
-//! ]);
-//! let c2 = PrimitiveArray::<Float64Type>::from(vec![
-//!     Some(123.564532),
-//!     None,
-//!     Some(-556132.25),
-//! ]);
-//! let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-//! let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-//!
-//! let batch = RecordBatch::try_new(
-//!     Arc::new(schema),
-//!     vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-//! )
-//! .unwrap();
-//!
-//! let file = get_temp_file("out.csv", &[]);
-//!
-//! let mut writer = csv::Writer::new(file);
-//! let batches = vec![&batch, &batch];
-//! for batch in batches {
-//!     writer.write(batch).unwrap();
-//! }
-//! ```
-
-use csv as csv_crate;
-
-use std::io::Write;
-
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
-use crate::{array::*, util::serialization::lexical_to_string};
-const DEFAULT_DATE_FORMAT: &str = "%F";
-const DEFAULT_TIME_FORMAT: &str = "%T";
-const DEFAULT_TIMESTAMP_FORMAT: &str = "%FT%H:%M:%S.%9f";
-
-fn write_primitive_value<T>(array: &ArrayRef, i: usize) -> String
-where
-    T: ArrowNumericType,
-    T::Native: lexical_core::ToLexical,
-{
-    let c = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
-    lexical_to_string(c.value(i))
-}
-
-/// A CSV writer
-#[derive(Debug)]
-pub struct Writer<W: Write> {
-    /// The object to write to
-    writer: csv_crate::Writer<W>,
-    /// Column delimiter. Defaults to `b','`
-    delimiter: u8,
-    /// Whether file should be written with headers. Defaults to `true`
-    has_headers: bool,
-    /// The date format for date arrays
-    date_format: String,
-    /// The datetime format for datetime arrays
-    datetime_format: String,
-    /// The timestamp format for timestamp arrays
-    timestamp_format: String,
-    /// The time format for time arrays
-    time_format: String,
-    /// Is the beginning-of-writer
-    beginning: bool,
-}
-
-impl<W: Write> Writer<W> {
-    /// Create a new CsvWriter from a writable object, with default options
-    pub fn new(writer: W) -> Self {
-        let delimiter = b',';
-        let mut builder = csv_crate::WriterBuilder::new();
-        let writer = builder.delimiter(delimiter).from_writer(writer);
-        Writer {
-            writer,
-            delimiter,
-            has_headers: true,
-            date_format: DEFAULT_DATE_FORMAT.to_string(),
-            datetime_format: DEFAULT_TIMESTAMP_FORMAT.to_string(),
-            time_format: DEFAULT_TIME_FORMAT.to_string(),
-            timestamp_format: DEFAULT_TIMESTAMP_FORMAT.to_string(),
-            beginning: true,
-        }
-    }
-
-    /// Convert a record to a string vector
-    fn convert(
-        &self,
-        batch: &RecordBatch,
-        row_index: usize,
-        buffer: &mut [String],
-    ) -> Result<()> {
-        // TODO: it'd be more efficient if we could create `record: Vec<&[u8]>
-        for (col_index, item) in buffer.iter_mut().enumerate() {
-            let col = batch.column(col_index);
-            if col.is_null(row_index) {
-                // write an empty value
-                *item = "".to_string();
-                continue;
-            }
-            let string = match col.data_type() {
-                DataType::Float64 => write_primitive_value::<Float64Type>(col, row_index),
-                DataType::Float32 => write_primitive_value::<Float32Type>(col, row_index),
-                DataType::Int8 => write_primitive_value::<Int8Type>(col, row_index),
-                DataType::Int16 => write_primitive_value::<Int16Type>(col, row_index),
-                DataType::Int32 => write_primitive_value::<Int32Type>(col, row_index),
-                DataType::Int64 => write_primitive_value::<Int64Type>(col, row_index),
-                DataType::UInt8 => write_primitive_value::<UInt8Type>(col, row_index),
-                DataType::UInt16 => write_primitive_value::<UInt16Type>(col, row_index),
-                DataType::UInt32 => write_primitive_value::<UInt32Type>(col, row_index),
-                DataType::UInt64 => write_primitive_value::<UInt64Type>(col, row_index),
-                DataType::Boolean => {
-                    let c = col.as_any().downcast_ref::<BooleanArray>().unwrap();
-                    c.value(row_index).to_string()
-                }
-                DataType::Utf8 => {
-                    let c = col.as_any().downcast_ref::<StringArray>().unwrap();
-                    c.value(row_index).to_owned()
-                }
-                DataType::LargeUtf8 => {
-                    let c = col.as_any().downcast_ref::<LargeStringArray>().unwrap();
-                    c.value(row_index).to_owned()
-                }
-                DataType::Date32 => {
-                    let c = col.as_any().downcast_ref::<Date32Array>().unwrap();
-                    c.value_as_date(row_index)
-                        .unwrap()
-                        .format(&self.date_format)
-                        .to_string()
-                }
-                DataType::Date64 => {
-                    let c = col.as_any().downcast_ref::<Date64Array>().unwrap();
-                    c.value_as_datetime(row_index)
-                        .unwrap()
-                        .format(&self.datetime_format)
-                        .to_string()
-                }
-                DataType::Time32(TimeUnit::Second) => {
-                    let c = col.as_any().downcast_ref::<Time32SecondArray>().unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Time32(TimeUnit::Millisecond) => {
-                    let c = col
-                        .as_any()
-                        .downcast_ref::<Time32MillisecondArray>()
-                        .unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Time64(TimeUnit::Microsecond) => {
-                    let c = col
-                        .as_any()
-                        .downcast_ref::<Time64MicrosecondArray>()
-                        .unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Time64(TimeUnit::Nanosecond) => {
-                    let c = col
-                        .as_any()
-                        .downcast_ref::<Time64NanosecondArray>()
-                        .unwrap();
-                    c.value_as_time(row_index)
-                        .unwrap()
-                        .format(&self.time_format)
-                        .to_string()
-                }
-                DataType::Timestamp(time_unit, _) => {
-                    use TimeUnit::*;
-                    let datetime = match time_unit {
-                        Second => col
-                            .as_any()
-                            .downcast_ref::<TimestampSecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                        Millisecond => col
-                            .as_any()
-                            .downcast_ref::<TimestampMillisecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                        Microsecond => col
-                            .as_any()
-                            .downcast_ref::<TimestampMicrosecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                        Nanosecond => col
-                            .as_any()
-                            .downcast_ref::<TimestampNanosecondArray>()
-                            .unwrap()
-                            .value_as_datetime(row_index)
-                            .unwrap(),
-                    };
-                    format!("{}", datetime.format(&self.timestamp_format))
-                }
-                t => {
-                    // List and Struct arrays not supported by the writer, any
-                    // other type needs to be implemented
-                    return Err(ArrowError::CsvError(format!(
-                        "CSV Writer does not support {:?} data type",
-                        t
-                    )));
-                }
-            };
-            *item = string;
-        }
-        Ok(())
-    }
-
-    /// Write a vector of record batches to a writable object
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        let num_columns = batch.num_columns();
-        if self.beginning {
-            if self.has_headers {
-                let mut headers: Vec<String> = Vec::with_capacity(num_columns);
-                batch
-                    .schema()
-                    .fields()
-                    .iter()
-                    .for_each(|field| headers.push(field.name().to_string()));
-                self.writer.write_record(&headers[..])?;
-            }
-            self.beginning = false;
-        }
-
-        let mut buffer = vec!["".to_string(); batch.num_columns()];
-
-        for row_index in 0..batch.num_rows() {
-            self.convert(batch, row_index, &mut buffer)?;
-            self.writer.write_record(&buffer)?;
-        }
-        self.writer.flush()?;
-
-        Ok(())
-    }
-}
-
-/// A CSV writer builder
-#[derive(Debug)]
-pub struct WriterBuilder {
-    /// Optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
-    /// Whether to write column names as file headers. Defaults to `true`
-    has_headers: bool,
-    /// Optional date format for date arrays
-    date_format: Option<String>,
-    /// Optional datetime format for datetime arrays
-    datetime_format: Option<String>,
-    /// Optional timestamp format for timestamp arrays
-    timestamp_format: Option<String>,
-    /// Optional time format for time arrays
-    time_format: Option<String>,
-}
-
-impl Default for WriterBuilder {
-    fn default() -> Self {
-        Self {
-            has_headers: true,
-            delimiter: None,
-            date_format: Some(DEFAULT_DATE_FORMAT.to_string()),
-            datetime_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            time_format: Some(DEFAULT_TIME_FORMAT.to_string()),
-            timestamp_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-        }
-    }
-}
-
-impl WriterBuilder {
-    /// Create a new builder for configuring CSV writing options.
-    ///
-    /// To convert a builder into a writer, call `WriterBuilder::build`
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::csv;
-    /// use std::fs::File;
-    ///
-    /// fn example() -> csv::Writer<File> {
-    ///     let file = File::create("target/out.csv").unwrap();
-    ///
-    ///     // create a builder that doesn't write headers
-    ///     let builder = csv::WriterBuilder::new().has_headers(false);
-    ///     let writer = builder.build(file);
-    ///
-    ///     writer
-    /// }
-    /// ```
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set whether to write headers
-    pub fn has_headers(mut self, has_headers: bool) -> Self {
-        self.has_headers = has_headers;
-        self
-    }
-
-    /// Set the CSV file's column delimiter as a byte character
-    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = Some(delimiter);
-        self
-    }
-
-    /// Set the CSV file's date format
-    pub fn with_date_format(mut self, format: String) -> Self {
-        self.date_format = Some(format);
-        self
-    }
-
-    /// Set the CSV file's time format
-    pub fn with_time_format(mut self, format: String) -> Self {
-        self.time_format = Some(format);
-        self
-    }
-
-    /// Set the CSV file's timestamp format
-    pub fn with_timestamp_format(mut self, format: String) -> Self {
-        self.timestamp_format = Some(format);
-        self
-    }
-
-    /// Create a new `Writer`
-    pub fn build<W: Write>(self, writer: W) -> Writer<W> {
-        let delimiter = self.delimiter.unwrap_or(b',');
-        let mut builder = csv_crate::WriterBuilder::new();
-        let writer = builder.delimiter(delimiter).from_writer(writer);
-        Writer {
-            writer,
-            delimiter,
-            has_headers: self.has_headers,
-            date_format: self
-                .date_format
-                .unwrap_or_else(|| DEFAULT_DATE_FORMAT.to_string()),
-            datetime_format: self
-                .datetime_format
-                .unwrap_or_else(|| DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            time_format: self
-                .time_format
-                .unwrap_or_else(|| DEFAULT_TIME_FORMAT.to_string()),
-            timestamp_format: self
-                .timestamp_format
-                .unwrap_or_else(|| DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            beginning: true,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::csv::Reader;
-    use crate::datatypes::{Field, Schema};
-    use crate::util::string_writer::StringWriter;
-    use crate::util::test_util::get_temp_file;
-    use std::fs::File;
-    use std::io::{Cursor, Read};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_write_csv() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::UInt32, false),
-            Field::new("c4", DataType::Boolean, true),
-            Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
-            Field::new("c6", DataType::Time32(TimeUnit::Second), false),
-        ]);
-
-        let c1 = StringArray::from(vec![
-            "Lorem ipsum dolor sit amet",
-            "consectetur adipiscing elit",
-            "sed do eiusmod tempor",
-        ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
-        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c5 = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(1555584887378), Some(1555555555555)],
-            None,
-        );
-        let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-                Arc::new(c5),
-                Arc::new(c6),
-            ],
-        )
-        .unwrap();
-
-        let file = get_temp_file("columns.csv", &[]);
-
-        let mut writer = Writer::new(file);
-        let batches = vec![&batch, &batch];
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-        // check that file was written successfully
-        let mut file = File::open("target/debug/testdata/columns.csv").unwrap();
-        let mut buffer: Vec<u8> = vec![];
-        file.read_to_end(&mut buffer).unwrap();
-
-        assert_eq!(
-            r#"c1,c2,c3,c4,c5,c6
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-"#
-            .to_string(),
-            String::from_utf8(buffer).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_write_csv_custom_options() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::UInt32, false),
-            Field::new("c4", DataType::Boolean, true),
-            Field::new("c6", DataType::Time32(TimeUnit::Second), false),
-        ]);
-
-        let c1 = StringArray::from(vec![
-            "Lorem ipsum dolor sit amet",
-            "consectetur adipiscing elit",
-            "sed do eiusmod tempor",
-        ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
-        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-                Arc::new(c6),
-            ],
-        )
-        .unwrap();
-
-        let file = get_temp_file("custom_options.csv", &[]);
-
-        let builder = WriterBuilder::new()
-            .has_headers(false)
-            .with_delimiter(b'|')
-            .with_time_format("%r".to_string());
-        let mut writer = builder.build(file);
-        let batches = vec![&batch];
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-
-        // check that file was written successfully
-        let mut file = File::open("target/debug/testdata/custom_options.csv").unwrap();
-        let mut buffer: Vec<u8> = vec![];
-        file.read_to_end(&mut buffer).unwrap();
-
-        assert_eq!(
-            "Lorem ipsum dolor sit amet|123.564532|3|true|12:20:34 AM\nconsectetur adipiscing elit||2|false|06:51:20 AM\nsed do eiusmod tempor|-556132.25|1||11:46:03 PM\n"
-            .to_string(),
-            String::from_utf8(buffer).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_export_csv_string() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::UInt32, false),
-            Field::new("c4", DataType::Boolean, true),
-            Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
-            Field::new("c6", DataType::Time32(TimeUnit::Second), false),
-        ]);
-
-        let c1 = StringArray::from(vec![
-            "Lorem ipsum dolor sit amet",
-            "consectetur adipiscing elit",
-            "sed do eiusmod tempor",
-        ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
-        let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-        let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c5 = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(1555584887378), Some(1555555555555)],
-            None,
-        );
-        let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(c1),
-                Arc::new(c2),
-                Arc::new(c3),
-                Arc::new(c4),
-                Arc::new(c5),
-                Arc::new(c6),
-            ],
-        )
-        .unwrap();
-
-        let sw = StringWriter::new();
-        let mut writer = Writer::new(sw);
-        let batches = vec![&batch, &batch];
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-
-        let left = "c1,c2,c3,c4,c5,c6
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n";
-        let right = writer.writer.into_inner().map(|s| s.to_string());
-        assert_eq!(Some(left.to_string()), right.ok());
-    }
-
-    #[test]
-    fn test_conversion_consistency() {
-        // test if we can serialize and deserialize whilst retaining the same type information/ precision
-
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Date32, false),
-            Field::new("c2", DataType::Date64, false),
-        ]);
-
-        let c1 = Date32Array::from(vec![3, 2, 1]);
-        let c2 = Date64Array::from(vec![3, 2, 1]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(c1), Arc::new(c2)],
-        )
-        .unwrap();
-
-        let builder = WriterBuilder::new().has_headers(false);
-
-        let mut buf: Cursor<Vec<u8>> = Default::default();
-        // drop the writer early to release the borrow.
-        {
-            let mut writer = builder.build(&mut buf);
-            writer.write(&batch).unwrap();
-        }
-        buf.set_position(0);
-
-        let mut reader = Reader::new(
-            buf,
-            Arc::new(schema),
-            false,
-            None,
-            3,
-            // starting at row 2 and up to row 6.
-            None,
-            None,
-        );
-        let rb = reader.next().unwrap().unwrap();
-        let c1 = rb.column(0).as_any().downcast_ref::<Date32Array>().unwrap();
-        let c2 = rb.column(1).as_any().downcast_ref::<Date64Array>().unwrap();
-
-        let actual = c1.into_iter().collect::<Vec<_>>();
-        let expected = vec![Some(3), Some(2), Some(1)];
-        assert_eq!(actual, expected);
-        let actual = c2.into_iter().collect::<Vec<_>>();
-        let expected = vec![Some(3), Some(2), Some(1)];
-        assert_eq!(actual, expected);
-    }
-}
diff --git a/rust/arrow/src/datatypes/datatype.rs b/rust/arrow/src/datatypes/datatype.rs
deleted file mode 100644
index 122cbdd5e47..00000000000
--- a/rust/arrow/src/datatypes/datatype.rs
+++ /dev/null
@@ -1,477 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt;
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value, Value::String as VString};
-
-use crate::error::{ArrowError, Result};
-
-use super::Field;
-
-/// The set of datatypes that are supported by this implementation of Apache Arrow.
-///
-/// The Arrow specification on data types includes some more types.
-/// See also [`Schema.fbs`](https://github.com/apache/arrow/blob/master/format/Schema.fbs)
-/// for Arrow's specification.
-///
-/// The variants of this enum include primitive fixed size types as well as parametric or
-/// nested types.
-/// Currently the Rust implementation supports the following  nested types:
-///  - `List<T>`
-///  - `Struct<T, U, V, ...>`
-///
-/// Nested types can themselves be nested within other arrays.
-/// For more information on these types please see
-/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum DataType {
-    /// Null type
-    Null,
-    /// A boolean datatype representing the values `true` and `false`.
-    Boolean,
-    /// A signed 8-bit integer.
-    Int8,
-    /// A signed 16-bit integer.
-    Int16,
-    /// A signed 32-bit integer.
-    Int32,
-    /// A signed 64-bit integer.
-    Int64,
-    /// An unsigned 8-bit integer.
-    UInt8,
-    /// An unsigned 16-bit integer.
-    UInt16,
-    /// An unsigned 32-bit integer.
-    UInt32,
-    /// An unsigned 64-bit integer.
-    UInt64,
-    /// A 16-bit floating point number.
-    Float16,
-    /// A 32-bit floating point number.
-    Float32,
-    /// A 64-bit floating point number.
-    Float64,
-    /// A timestamp with an optional timezone.
-    ///
-    /// Time is measured as a Unix epoch, counting the seconds from
-    /// 00:00:00.000 on 1 January 1970, excluding leap seconds,
-    /// as a 64-bit integer.
-    ///
-    /// The time zone is a string indicating the name of a time zone, one of:
-    ///
-    /// * As used in the Olson time zone database (the "tz database" or
-    ///   "tzdata"), such as "America/New_York"
-    /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-    Timestamp(TimeUnit, Option<String>),
-    /// A 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
-    /// in days (32 bits).
-    Date32,
-    /// A 64-bit date representing the elapsed time since UNIX epoch (1970-01-01)
-    /// in milliseconds (64 bits). Values are evenly divisible by 86400000.
-    Date64,
-    /// A 32-bit time representing the elapsed time since midnight in the unit of `TimeUnit`.
-    Time32(TimeUnit),
-    /// A 64-bit time representing the elapsed time since midnight in the unit of `TimeUnit`.
-    Time64(TimeUnit),
-    /// Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds.
-    Duration(TimeUnit),
-    /// A "calendar" interval which models types that don't necessarily
-    /// have a precise duration without the context of a base timestamp (e.g.
-    /// days can differ in length during day light savings time transitions).
-    Interval(IntervalUnit),
-    /// Opaque binary data of variable length.
-    Binary,
-    /// Opaque binary data of fixed size.
-    /// Enum parameter specifies the number of bytes per value.
-    FixedSizeBinary(i32),
-    /// Opaque binary data of variable length and 64-bit offsets.
-    LargeBinary,
-    /// A variable-length string in Unicode with UTF-8 encoding.
-    Utf8,
-    /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
-    LargeUtf8,
-    /// A list of some logical data type with variable length.
-    List(Box<Field>),
-    /// A list of some logical data type with fixed length.
-    FixedSizeList(Box<Field>, i32),
-    /// A list of some logical data type with variable length and 64-bit offsets.
-    LargeList(Box<Field>),
-    /// A nested datatype that contains a number of sub-fields.
-    Struct(Vec<Field>),
-    /// A nested datatype that can represent slots of differing types.
-    Union(Vec<Field>),
-    /// A dictionary encoded array (`key_type`, `value_type`), where
-    /// each array element is an index of `key_type` into an
-    /// associated dictionary of `value_type`.
-    ///
-    /// Dictionary arrays are used to store columns of `value_type`
-    /// that contain many repeated values using less memory, but with
-    /// a higher CPU overhead for some operations.
-    ///
-    /// This type mostly used to represent low cardinality string
-    /// arrays or a limited set of primitive types as integers.
-    Dictionary(Box<DataType>, Box<DataType>),
-    /// Decimal value with precision and scale
-    Decimal(usize, usize),
-}
-
-/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum TimeUnit {
-    /// Time in seconds.
-    Second,
-    /// Time in milliseconds.
-    Millisecond,
-    /// Time in microseconds.
-    Microsecond,
-    /// Time in nanoseconds.
-    Nanosecond,
-}
-
-/// YEAR_MONTH or DAY_TIME interval in SQL style.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub enum IntervalUnit {
-    /// Indicates the number of elapsed whole months, stored as 4-byte integers.
-    YearMonth,
-    /// Indicates the number of elapsed days and milliseconds,
-    /// stored as 2 contiguous 32-bit integers (days, milliseconds) (8-bytes in total).
-    DayTime,
-}
-
-impl fmt::Display for DataType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl DataType {
-    /// Parse a data type from a JSON representation.
-    pub(crate) fn from(json: &Value) -> Result<DataType> {
-        let default_field = Field::new("", DataType::Boolean, true);
-        match *json {
-            Value::Object(ref map) => match map.get("name") {
-                Some(s) if s == "null" => Ok(DataType::Null),
-                Some(s) if s == "bool" => Ok(DataType::Boolean),
-                Some(s) if s == "binary" => Ok(DataType::Binary),
-                Some(s) if s == "largebinary" => Ok(DataType::LargeBinary),
-                Some(s) if s == "utf8" => Ok(DataType::Utf8),
-                Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8),
-                Some(s) if s == "fixedsizebinary" => {
-                    // return a list with any type as its child isn't defined in the map
-                    if let Some(Value::Number(size)) = map.get("byteWidth") {
-                        Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as i32))
-                    } else {
-                        Err(ArrowError::ParseError(
-                            "Expecting a byteWidth for fixedsizebinary".to_string(),
-                        ))
-                    }
-                }
-                Some(s) if s == "decimal" => {
-                    // return a list with any type as its child isn't defined in the map
-                    let precision = match map.get("precision") {
-                        Some(p) => Ok(p.as_u64().unwrap() as usize),
-                        None => Err(ArrowError::ParseError(
-                            "Expecting a precision for decimal".to_string(),
-                        )),
-                    };
-                    let scale = match map.get("scale") {
-                        Some(s) => Ok(s.as_u64().unwrap() as usize),
-                        _ => Err(ArrowError::ParseError(
-                            "Expecting a scale for decimal".to_string(),
-                        )),
-                    };
-
-                    Ok(DataType::Decimal(precision?, scale?))
-                }
-                Some(s) if s == "floatingpoint" => match map.get("precision") {
-                    Some(p) if p == "HALF" => Ok(DataType::Float16),
-                    Some(p) if p == "SINGLE" => Ok(DataType::Float32),
-                    Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
-                    _ => Err(ArrowError::ParseError(
-                        "floatingpoint precision missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "timestamp" => {
-                    let unit = match map.get("unit") {
-                        Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
-                        Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
-                        Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
-                        Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
-                        _ => Err(ArrowError::ParseError(
-                            "timestamp unit missing or invalid".to_string(),
-                        )),
-                    };
-                    let tz = match map.get("timezone") {
-                        None => Ok(None),
-                        Some(VString(tz)) => Ok(Some(tz.clone())),
-                        _ => Err(ArrowError::ParseError(
-                            "timezone must be a string".to_string(),
-                        )),
-                    };
-                    Ok(DataType::Timestamp(unit?, tz?))
-                }
-                Some(s) if s == "date" => match map.get("unit") {
-                    Some(p) if p == "DAY" => Ok(DataType::Date32),
-                    Some(p) if p == "MILLISECOND" => Ok(DataType::Date64),
-                    _ => Err(ArrowError::ParseError(
-                        "date unit missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "time" => {
-                    let unit = match map.get("unit") {
-                        Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
-                        Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
-                        Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
-                        Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
-                        _ => Err(ArrowError::ParseError(
-                            "time unit missing or invalid".to_string(),
-                        )),
-                    };
-                    match map.get("bitWidth") {
-                        Some(p) if p == 32 => Ok(DataType::Time32(unit?)),
-                        Some(p) if p == 64 => Ok(DataType::Time64(unit?)),
-                        _ => Err(ArrowError::ParseError(
-                            "time bitWidth missing or invalid".to_string(),
-                        )),
-                    }
-                }
-                Some(s) if s == "duration" => match map.get("unit") {
-                    Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)),
-                    Some(p) if p == "MILLISECOND" => {
-                        Ok(DataType::Duration(TimeUnit::Millisecond))
-                    }
-                    Some(p) if p == "MICROSECOND" => {
-                        Ok(DataType::Duration(TimeUnit::Microsecond))
-                    }
-                    Some(p) if p == "NANOSECOND" => {
-                        Ok(DataType::Duration(TimeUnit::Nanosecond))
-                    }
-                    _ => Err(ArrowError::ParseError(
-                        "time unit missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "interval" => match map.get("unit") {
-                    Some(p) if p == "DAY_TIME" => {
-                        Ok(DataType::Interval(IntervalUnit::DayTime))
-                    }
-                    Some(p) if p == "YEAR_MONTH" => {
-                        Ok(DataType::Interval(IntervalUnit::YearMonth))
-                    }
-                    _ => Err(ArrowError::ParseError(
-                        "interval unit missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "int" => match map.get("isSigned") {
-                    Some(&Value::Bool(true)) => match map.get("bitWidth") {
-                        Some(&Value::Number(ref n)) => match n.as_u64() {
-                            Some(8) => Ok(DataType::Int8),
-                            Some(16) => Ok(DataType::Int16),
-                            Some(32) => Ok(DataType::Int32),
-                            Some(64) => Ok(DataType::Int64),
-                            _ => Err(ArrowError::ParseError(
-                                "int bitWidth missing or invalid".to_string(),
-                            )),
-                        },
-                        _ => Err(ArrowError::ParseError(
-                            "int bitWidth missing or invalid".to_string(),
-                        )),
-                    },
-                    Some(&Value::Bool(false)) => match map.get("bitWidth") {
-                        Some(&Value::Number(ref n)) => match n.as_u64() {
-                            Some(8) => Ok(DataType::UInt8),
-                            Some(16) => Ok(DataType::UInt16),
-                            Some(32) => Ok(DataType::UInt32),
-                            Some(64) => Ok(DataType::UInt64),
-                            _ => Err(ArrowError::ParseError(
-                                "int bitWidth missing or invalid".to_string(),
-                            )),
-                        },
-                        _ => Err(ArrowError::ParseError(
-                            "int bitWidth missing or invalid".to_string(),
-                        )),
-                    },
-                    _ => Err(ArrowError::ParseError(
-                        "int signed missing or invalid".to_string(),
-                    )),
-                },
-                Some(s) if s == "list" => {
-                    // return a list with any type as its child isn't defined in the map
-                    Ok(DataType::List(Box::new(default_field)))
-                }
-                Some(s) if s == "largelist" => {
-                    // return a largelist with any type as its child isn't defined in the map
-                    Ok(DataType::LargeList(Box::new(default_field)))
-                }
-                Some(s) if s == "fixedsizelist" => {
-                    // return a list with any type as its child isn't defined in the map
-                    if let Some(Value::Number(size)) = map.get("listSize") {
-                        Ok(DataType::FixedSizeList(
-                            Box::new(default_field),
-                            size.as_i64().unwrap() as i32,
-                        ))
-                    } else {
-                        Err(ArrowError::ParseError(
-                            "Expecting a listSize for fixedsizelist".to_string(),
-                        ))
-                    }
-                }
-                Some(s) if s == "struct" => {
-                    // return an empty `struct` type as its children aren't defined in the map
-                    Ok(DataType::Struct(vec![]))
-                }
-                Some(other) => Err(ArrowError::ParseError(format!(
-                    "invalid or unsupported type name: {} in {:?}",
-                    other, json
-                ))),
-                None => Err(ArrowError::ParseError("type name missing".to_string())),
-            },
-            _ => Err(ArrowError::ParseError(
-                "invalid json value type".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a JSON representation of the data type.
-    pub fn to_json(&self) -> Value {
-        match self {
-            DataType::Null => json!({"name": "null"}),
-            DataType::Boolean => json!({"name": "bool"}),
-            DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
-            DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
-            DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}),
-            DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}),
-            DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}),
-            DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}),
-            DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}),
-            DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}),
-            DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}),
-            DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}),
-            DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}),
-            DataType::Utf8 => json!({"name": "utf8"}),
-            DataType::LargeUtf8 => json!({"name": "largeutf8"}),
-            DataType::Binary => json!({"name": "binary"}),
-            DataType::LargeBinary => json!({"name": "largebinary"}),
-            DataType::FixedSizeBinary(byte_width) => {
-                json!({"name": "fixedsizebinary", "byteWidth": byte_width})
-            }
-            DataType::Struct(_) => json!({"name": "struct"}),
-            DataType::Union(_) => json!({"name": "union"}),
-            DataType::List(_) => json!({ "name": "list"}),
-            DataType::LargeList(_) => json!({ "name": "largelist"}),
-            DataType::FixedSizeList(_, length) => {
-                json!({"name":"fixedsizelist", "listSize": length})
-            }
-            DataType::Time32(unit) => {
-                json!({"name": "time", "bitWidth": 32, "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }})
-            }
-            DataType::Time64(unit) => {
-                json!({"name": "time", "bitWidth": 64, "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }})
-            }
-            DataType::Date32 => {
-                json!({"name": "date", "unit": "DAY"})
-            }
-            DataType::Date64 => {
-                json!({"name": "date", "unit": "MILLISECOND"})
-            }
-            DataType::Timestamp(unit, None) => {
-                json!({"name": "timestamp", "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }})
-            }
-            DataType::Timestamp(unit, Some(tz)) => {
-                json!({"name": "timestamp", "unit": match unit {
-                    TimeUnit::Second => "SECOND",
-                    TimeUnit::Millisecond => "MILLISECOND",
-                    TimeUnit::Microsecond => "MICROSECOND",
-                    TimeUnit::Nanosecond => "NANOSECOND",
-                }, "timezone": tz})
-            }
-            DataType::Interval(unit) => json!({"name": "interval", "unit": match unit {
-                IntervalUnit::YearMonth => "YEAR_MONTH",
-                IntervalUnit::DayTime => "DAY_TIME",
-            }}),
-            DataType::Duration(unit) => json!({"name": "duration", "unit": match unit {
-                TimeUnit::Second => "SECOND",
-                TimeUnit::Millisecond => "MILLISECOND",
-                TimeUnit::Microsecond => "MICROSECOND",
-                TimeUnit::Nanosecond => "NANOSECOND",
-            }}),
-            DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
-            DataType::Decimal(precision, scale) => {
-                json!({"name": "decimal", "precision": precision, "scale": scale})
-            }
-        }
-    }
-
-    /// Returns true if this type is numeric: (UInt*, Unit*, or Float*).
-    pub fn is_numeric(t: &DataType) -> bool {
-        use DataType::*;
-        matches!(
-            t,
-            UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Int8
-                | Int16
-                | Int32
-                | Int64
-                | Float32
-                | Float64
-        )
-    }
-
-    /// Compares the datatype with another, ignoring nested field names
-    /// and metadata.
-    pub(crate) fn equals_datatype(&self, other: &DataType) -> bool {
-        match (&self, other) {
-            (DataType::List(a), DataType::List(b))
-            | (DataType::LargeList(a), DataType::LargeList(b)) => {
-                a.is_nullable() == b.is_nullable()
-                    && a.data_type().equals_datatype(b.data_type())
-            }
-            (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => {
-                a_size == b_size
-                    && a.is_nullable() == b.is_nullable()
-                    && a.data_type().equals_datatype(b.data_type())
-            }
-            (DataType::Struct(a), DataType::Struct(b)) => {
-                a.len() == b.len()
-                    && a.iter().zip(b).all(|(a, b)| {
-                        a.is_nullable() == b.is_nullable()
-                            && a.data_type().equals_datatype(b.data_type())
-                    })
-            }
-            _ => self == other,
-        }
-    }
-}
diff --git a/rust/arrow/src/datatypes/field.rs b/rust/arrow/src/datatypes/field.rs
deleted file mode 100644
index a471f12ef95..00000000000
--- a/rust/arrow/src/datatypes/field.rs
+++ /dev/null
@@ -1,541 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::BTreeMap;
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value};
-
-use crate::error::{ArrowError, Result};
-
-use super::DataType;
-
-/// Contains the meta-data for a single relative type.
-///
-/// The `Schema` object is an ordered collection of `Field` objects.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
-pub struct Field {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    dict_id: i64,
-    dict_is_ordered: bool,
-    /// A map of key-value pairs containing additional custom meta data.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    metadata: Option<BTreeMap<String, String>>,
-}
-
-impl Field {
-    /// Creates a new field
-    pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
-        Field {
-            name: name.to_string(),
-            data_type,
-            nullable,
-            dict_id: 0,
-            dict_is_ordered: false,
-            metadata: None,
-        }
-    }
-
-    /// Creates a new field
-    pub fn new_dict(
-        name: &str,
-        data_type: DataType,
-        nullable: bool,
-        dict_id: i64,
-        dict_is_ordered: bool,
-    ) -> Self {
-        Field {
-            name: name.to_string(),
-            data_type,
-            nullable,
-            dict_id,
-            dict_is_ordered,
-            metadata: None,
-        }
-    }
-
-    /// Sets the `Field`'s optional custom metadata.
-    /// The metadata is set as `None` for empty map.
-    #[inline]
-    pub fn set_metadata(&mut self, metadata: Option<BTreeMap<String, String>>) {
-        // To make serde happy, convert Some(empty_map) to None.
-        self.metadata = None;
-        if let Some(v) = metadata {
-            if !v.is_empty() {
-                self.metadata = Some(v);
-            }
-        }
-    }
-
-    /// Returns the immutable reference to the `Field`'s optional custom metadata.
-    #[inline]
-    pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
-        &self.metadata
-    }
-
-    /// Returns an immutable reference to the `Field`'s name.
-    #[inline]
-    pub const fn name(&self) -> &String {
-        &self.name
-    }
-
-    /// Returns an immutable reference to the `Field`'s  data-type.
-    #[inline]
-    pub const fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    /// Indicates whether this `Field` supports null values.
-    #[inline]
-    pub const fn is_nullable(&self) -> bool {
-        self.nullable
-    }
-
-    /// Returns the dictionary ID, if this is a dictionary type.
-    #[inline]
-    pub const fn dict_id(&self) -> Option<i64> {
-        match self.data_type {
-            DataType::Dictionary(_, _) => Some(self.dict_id),
-            _ => None,
-        }
-    }
-
-    /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
-    #[inline]
-    pub const fn dict_is_ordered(&self) -> Option<bool> {
-        match self.data_type {
-            DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
-            _ => None,
-        }
-    }
-
-    /// Parse a `Field` definition from a JSON representation.
-    pub fn from(json: &Value) -> Result<Self> {
-        match *json {
-            Value::Object(ref map) => {
-                let name = match map.get("name") {
-                    Some(&Value::String(ref name)) => name.to_string(),
-                    _ => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'name' attribute".to_string(),
-                        ));
-                    }
-                };
-                let nullable = match map.get("nullable") {
-                    Some(&Value::Bool(b)) => b,
-                    _ => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'nullable' attribute".to_string(),
-                        ));
-                    }
-                };
-                let data_type = match map.get("type") {
-                    Some(t) => DataType::from(t)?,
-                    _ => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'type' attribute".to_string(),
-                        ));
-                    }
-                };
-
-                // Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
-                let metadata = match map.get("metadata") {
-                    Some(&Value::Array(ref values)) => {
-                        let mut res: BTreeMap<String, String> = BTreeMap::new();
-                        for value in values {
-                            match value.as_object() {
-                                Some(map) => {
-                                    if map.len() != 2 {
-                                        return Err(ArrowError::ParseError(
-                                            "Field 'metadata' must have exact two entries for each key-value map".to_string(),
-                                        ));
-                                    }
-                                    if let (Some(k), Some(v)) =
-                                        (map.get("key"), map.get("value"))
-                                    {
-                                        if let (Some(k_str), Some(v_str)) =
-                                            (k.as_str(), v.as_str())
-                                        {
-                                            res.insert(
-                                                k_str.to_string().clone(),
-                                                v_str.to_string().clone(),
-                                            );
-                                        } else {
-                                            return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
-                                        }
-                                    } else {
-                                        return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
-                                    }
-                                }
-                                _ => {
-                                    return Err(ArrowError::ParseError(
-                                        "Field 'metadata' contains non-object key-value pair".to_string(),
-                                    ));
-                                }
-                            }
-                        }
-                        Some(res)
-                    }
-                    // We also support map format, because Schema's metadata supports this.
-                    // See https://github.com/apache/arrow/pull/5907
-                    Some(&Value::Object(ref values)) => {
-                        let mut res: BTreeMap<String, String> = BTreeMap::new();
-                        for (k, v) in values {
-                            if let Some(str_value) = v.as_str() {
-                                res.insert(k.clone(), str_value.to_string().clone());
-                            } else {
-                                return Err(ArrowError::ParseError(
-                                    format!("Field 'metadata' contains non-string value for key {}", k),
-                                ));
-                            }
-                        }
-                        Some(res)
-                    }
-                    Some(_) => {
-                        return Err(ArrowError::ParseError(
-                            "Field `metadata` is not json array".to_string(),
-                        ));
-                    }
-                    _ => None,
-                };
-
-                // if data_type is a struct or list, get its children
-                let data_type = match data_type {
-                    DataType::List(_)
-                    | DataType::LargeList(_)
-                    | DataType::FixedSizeList(_, _) => match map.get("children") {
-                        Some(Value::Array(values)) => {
-                            if values.len() != 1 {
-                                return Err(ArrowError::ParseError(
-                                    "Field 'children' must have one element for a list data type".to_string(),
-                                ));
-                            }
-                            match data_type {
-                                    DataType::List(_) => {
-                                        DataType::List(Box::new(Self::from(&values[0])?))
-                                    }
-                                    DataType::LargeList(_) => {
-                                        DataType::LargeList(Box::new(Self::from(&values[0])?))
-                                    }
-                                    DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
-                                        Box::new(Self::from(&values[0])?),
-                                        int,
-                                    ),
-                                    _ => unreachable!(
-                                        "Data type should be a list, largelist or fixedsizelist"
-                                    ),
-                                }
-                        }
-                        Some(_) => {
-                            return Err(ArrowError::ParseError(
-                                "Field 'children' must be an array".to_string(),
-                            ))
-                        }
-                        None => {
-                            return Err(ArrowError::ParseError(
-                                "Field missing 'children' attribute".to_string(),
-                            ));
-                        }
-                    },
-                    DataType::Struct(mut fields) => match map.get("children") {
-                        Some(Value::Array(values)) => {
-                            let struct_fields: Result<Vec<Field>> =
-                                values.iter().map(|v| Field::from(v)).collect();
-                            fields.append(&mut struct_fields?);
-                            DataType::Struct(fields)
-                        }
-                        Some(_) => {
-                            return Err(ArrowError::ParseError(
-                                "Field 'children' must be an array".to_string(),
-                            ))
-                        }
-                        None => {
-                            return Err(ArrowError::ParseError(
-                                "Field missing 'children' attribute".to_string(),
-                            ));
-                        }
-                    },
-                    _ => data_type,
-                };
-
-                let mut dict_id = 0;
-                let mut dict_is_ordered = false;
-
-                let data_type = match map.get("dictionary") {
-                    Some(dictionary) => {
-                        let index_type = match dictionary.get("indexType") {
-                            Some(t) => DataType::from(t)?,
-                            _ => {
-                                return Err(ArrowError::ParseError(
-                                    "Field missing 'indexType' attribute".to_string(),
-                                ));
-                            }
-                        };
-                        dict_id = match dictionary.get("id") {
-                            Some(Value::Number(n)) => n.as_i64().unwrap(),
-                            _ => {
-                                return Err(ArrowError::ParseError(
-                                    "Field missing 'id' attribute".to_string(),
-                                ));
-                            }
-                        };
-                        dict_is_ordered = match dictionary.get("isOrdered") {
-                            Some(&Value::Bool(n)) => n,
-                            _ => {
-                                return Err(ArrowError::ParseError(
-                                    "Field missing 'isOrdered' attribute".to_string(),
-                                ));
-                            }
-                        };
-                        DataType::Dictionary(Box::new(index_type), Box::new(data_type))
-                    }
-                    _ => data_type,
-                };
-                Ok(Field {
-                    name,
-                    data_type,
-                    nullable,
-                    dict_id,
-                    dict_is_ordered,
-                    metadata,
-                })
-            }
-            _ => Err(ArrowError::ParseError(
-                "Invalid json value type for field".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a JSON representation of the `Field`.
-    pub fn to_json(&self) -> Value {
-        let children: Vec<Value> = match self.data_type() {
-            DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
-            DataType::List(field) => vec![field.to_json()],
-            DataType::LargeList(field) => vec![field.to_json()],
-            DataType::FixedSizeList(field, _) => vec![field.to_json()],
-            _ => vec![],
-        };
-        match self.data_type() {
-            DataType::Dictionary(ref index_type, ref value_type) => json!({
-                "name": self.name,
-                "nullable": self.nullable,
-                "type": value_type.to_json(),
-                "children": children,
-                "dictionary": {
-                    "id": self.dict_id,
-                    "indexType": index_type.to_json(),
-                    "isOrdered": self.dict_is_ordered
-                }
-            }),
-            _ => json!({
-                "name": self.name,
-                "nullable": self.nullable,
-                "type": self.data_type.to_json(),
-                "children": children
-            }),
-        }
-    }
-
-    /// Merge field into self if it is compatible. Struct will be merged recursively.
-    /// NOTE: `self` may be updated to unexpected state in case of merge failure.
-    ///
-    /// Example:
-    ///
-    /// ```
-    /// use arrow::datatypes::*;
-    ///
-    /// let mut field = Field::new("c1", DataType::Int64, false);
-    /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok());
-    /// assert!(field.is_nullable());
-    /// ```
-    pub fn try_merge(&mut self, from: &Field) -> Result<()> {
-        // merge metadata
-        match (self.metadata(), from.metadata()) {
-            (Some(self_metadata), Some(from_metadata)) => {
-                let mut merged = self_metadata.clone();
-                for (key, from_value) in from_metadata {
-                    if let Some(self_value) = self_metadata.get(key) {
-                        if self_value != from_value {
-                            return Err(ArrowError::SchemaError(format!(
-                                "Fail to merge field due to conflicting metadata data value for key {}", key),
-                            ));
-                        }
-                    } else {
-                        merged.insert(key.clone(), from_value.clone());
-                    }
-                }
-                self.set_metadata(Some(merged));
-            }
-            (None, Some(from_metadata)) => {
-                self.set_metadata(Some(from_metadata.clone()));
-            }
-            _ => {}
-        }
-        if from.dict_id != self.dict_id {
-            return Err(ArrowError::SchemaError(
-                "Fail to merge schema Field due to conflicting dict_id".to_string(),
-            ));
-        }
-        if from.dict_is_ordered != self.dict_is_ordered {
-            return Err(ArrowError::SchemaError(
-                "Fail to merge schema Field due to conflicting dict_is_ordered"
-                    .to_string(),
-            ));
-        }
-        match &mut self.data_type {
-            DataType::Struct(nested_fields) => match &from.data_type {
-                DataType::Struct(from_nested_fields) => {
-                    for from_field in from_nested_fields {
-                        let mut is_new_field = true;
-                        for self_field in nested_fields.iter_mut() {
-                            if self_field.name != from_field.name {
-                                continue;
-                            }
-                            is_new_field = false;
-                            self_field.try_merge(&from_field)?;
-                        }
-                        if is_new_field {
-                            nested_fields.push(from_field.clone());
-                        }
-                    }
-                }
-                _ => {
-                    return Err(ArrowError::SchemaError(
-                        "Fail to merge schema Field due to conflicting datatype"
-                            .to_string(),
-                    ));
-                }
-            },
-            DataType::Union(nested_fields) => match &from.data_type {
-                DataType::Union(from_nested_fields) => {
-                    for from_field in from_nested_fields {
-                        let mut is_new_field = true;
-                        for self_field in nested_fields.iter_mut() {
-                            if from_field == self_field {
-                                is_new_field = false;
-                                break;
-                            }
-                        }
-                        if is_new_field {
-                            nested_fields.push(from_field.clone());
-                        }
-                    }
-                }
-                _ => {
-                    return Err(ArrowError::SchemaError(
-                        "Fail to merge schema Field due to conflicting datatype"
-                            .to_string(),
-                    ));
-                }
-            },
-            DataType::Null
-            | DataType::Boolean
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Binary
-            | DataType::LargeBinary
-            | DataType::Interval(_)
-            | DataType::LargeList(_)
-            | DataType::List(_)
-            | DataType::Dictionary(_, _)
-            | DataType::FixedSizeList(_, _)
-            | DataType::FixedSizeBinary(_)
-            | DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Decimal(_, _) => {
-                if self.data_type != from.data_type {
-                    return Err(ArrowError::SchemaError(
-                        "Fail to merge schema Field due to conflicting datatype"
-                            .to_string(),
-                    ));
-                }
-            }
-        }
-        if from.nullable {
-            self.nullable = from.nullable;
-        }
-
-        Ok(())
-    }
-
-    /// Check to see if `self` is a superset of `other` field. Superset is defined as:
-    ///
-    /// * if nullability doesn't match, self needs to be nullable
-    /// * self.metadata is a superset of other.metadata
-    /// * all other fields are equal
-    pub fn contains(&self, other: &Field) -> bool {
-        if self.name != other.name
-            || self.data_type != other.data_type
-            || self.dict_id != other.dict_id
-            || self.dict_is_ordered != other.dict_is_ordered
-        {
-            return false;
-        }
-
-        if self.nullable != other.nullable && !self.nullable {
-            return false;
-        }
-
-        // make sure self.metadata is a superset of other.metadata
-        match (&self.metadata, &other.metadata) {
-            (None, Some(_)) => {
-                return false;
-            }
-            (Some(self_meta), Some(other_meta)) => {
-                for (k, v) in other_meta.iter() {
-                    match self_meta.get(k) {
-                        Some(s) => {
-                            if s != v {
-                                return false;
-                            }
-                        }
-                        None => {
-                            return false;
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-
-        true
-    }
-}
-
-// TODO: improve display with crate https://crates.io/crates/derive_more ?
-impl std::fmt::Display for Field {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
diff --git a/rust/arrow/src/datatypes/mod.rs b/rust/arrow/src/datatypes/mod.rs
deleted file mode 100644
index 175b50b0177..00000000000
--- a/rust/arrow/src/datatypes/mod.rs
+++ /dev/null
@@ -1,1241 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the logical data types of Arrow arrays.
-//!
-//! The most important things you might be looking for are:
-//!  * [`Schema`](crate::datatypes::Schema) to describe a schema.
-//!  * [`Field`](crate::datatypes::Field) to describe one field within a schema.
-//!  * [`DataType`](crate::datatypes::DataType) to describe the type of a field.
-
-use std::sync::Arc;
-
-mod native;
-pub use native::*;
-mod field;
-pub use field::*;
-mod schema;
-pub use schema::*;
-mod numeric;
-pub use numeric::*;
-mod types;
-pub use types::*;
-mod datatype;
-pub use datatype::*;
-
-/// A reference-counted reference to a [`Schema`](crate::datatypes::Schema).
-pub type SchemaRef = Arc<Schema>;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use serde_json::Value::{Bool, Number as VNumber};
-    use serde_json::{Number, Value};
-    use std::{
-        collections::{BTreeMap, HashMap},
-        f32::NAN,
-    };
-
-    #[test]
-    fn test_list_datatype_equality() {
-        // tests that list type equality is checked while ignoring list names
-        let list_a = DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_b = DataType::List(Box::new(Field::new("array", DataType::Int32, true)));
-        let list_c = DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
-        let list_d = DataType::List(Box::new(Field::new("item", DataType::UInt32, true)));
-        assert!(list_a.equals_datatype(&list_b));
-        assert!(!list_a.equals_datatype(&list_c));
-        assert!(!list_b.equals_datatype(&list_c));
-        assert!(!list_a.equals_datatype(&list_d));
-
-        let list_e =
-            DataType::FixedSizeList(Box::new(Field::new("item", list_a, false)), 3);
-        let list_f =
-            DataType::FixedSizeList(Box::new(Field::new("array", list_b, false)), 3);
-        let list_g = DataType::FixedSizeList(
-            Box::new(Field::new("item", DataType::FixedSizeBinary(3), true)),
-            3,
-        );
-        assert!(list_e.equals_datatype(&list_f));
-        assert!(!list_e.equals_datatype(&list_g));
-        assert!(!list_f.equals_datatype(&list_g));
-
-        let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]);
-        let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]);
-        let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]);
-        let list_k = DataType::Struct(vec![
-            Field::new("f1", list_f.clone(), false),
-            Field::new("f2", list_g.clone(), false),
-            Field::new("f3", DataType::Utf8, true),
-        ]);
-        let list_l = DataType::Struct(vec![
-            Field::new("ff1", list_f.clone(), false),
-            Field::new("ff2", list_g.clone(), false),
-            Field::new("ff3", DataType::LargeUtf8, true),
-        ]);
-        let list_m = DataType::Struct(vec![
-            Field::new("ff1", list_f, false),
-            Field::new("ff2", list_g, false),
-            Field::new("ff3", DataType::Utf8, true),
-        ]);
-        assert!(list_h.equals_datatype(&list_i));
-        assert!(!list_h.equals_datatype(&list_j));
-        assert!(!list_k.equals_datatype(&list_l));
-        assert!(list_k.equals_datatype(&list_m));
-    }
-
-    #[test]
-    fn create_struct_type() {
-        let _person = DataType::Struct(vec![
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-        ]);
-    }
-
-    #[test]
-    fn serde_struct_type() {
-        let kv_array = [("k".to_string(), "v".to_string())];
-        let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
-
-        // Non-empty map: should be converted as JSON obj { ... }
-        let mut first_name = Field::new("first_name", DataType::Utf8, false);
-        first_name.set_metadata(Some(field_metadata));
-
-        // Empty map: should be omitted.
-        let mut last_name = Field::new("last_name", DataType::Utf8, false);
-        last_name.set_metadata(Some(BTreeMap::default()));
-
-        let person = DataType::Struct(vec![
-            first_name,
-            last_name,
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-        ]);
-
-        let serialized = serde_json::to_string(&person).unwrap();
-
-        // NOTE that this is testing the default (derived) serialization format, not the
-        // JSON format specified in metadata.md
-
-        assert_eq!(
-            "{\"Struct\":[\
-             {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\
-             {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
-             {\"name\":\"address\",\"data_type\":{\"Struct\":\
-             [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
-             {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\
-             ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}",
-            serialized
-        );
-
-        let deserialized = serde_json::from_str(&serialized).unwrap();
-
-        assert_eq!(person, deserialized);
-    }
-
-    #[test]
-    fn struct_field_to_json() {
-        let f = Field::new(
-            "address",
-            DataType::Struct(vec![
-                Field::new("street", DataType::Utf8, false),
-                Field::new("zip", DataType::UInt16, false),
-            ]),
-            false,
-        );
-        let value: Value = serde_json::from_str(
-            r#"{
-                "name": "address",
-                "nullable": false,
-                "type": {
-                    "name": "struct"
-                },
-                "children": [
-                    {
-                        "name": "street",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "zip",
-                        "nullable": false,
-                        "type": {
-                            "name": "int",
-                            "bitWidth": 16,
-                            "isSigned": false
-                        },
-                        "children": []
-                    }
-                ]
-            }"#,
-        )
-        .unwrap();
-        assert_eq!(value, f.to_json());
-    }
-
-    #[test]
-    fn primitive_field_to_json() {
-        let f = Field::new("first_name", DataType::Utf8, false);
-        let value: Value = serde_json::from_str(
-            r#"{
-                "name": "first_name",
-                "nullable": false,
-                "type": {
-                    "name": "utf8"
-                },
-                "children": []
-            }"#,
-        )
-        .unwrap();
-        assert_eq!(value, f.to_json());
-    }
-    #[test]
-    fn parse_struct_from_json() {
-        let json = r#"
-        {
-            "name": "address",
-            "type": {
-                "name": "struct"
-            },
-            "nullable": false,
-            "children": [
-                {
-                    "name": "street",
-                    "type": {
-                    "name": "utf8"
-                    },
-                    "nullable": false,
-                    "children": []
-                },
-                {
-                    "name": "zip",
-                    "type": {
-                    "name": "int",
-                    "isSigned": false,
-                    "bitWidth": 16
-                    },
-                    "nullable": false,
-                    "children": []
-                }
-            ]
-        }
-        "#;
-        let value: Value = serde_json::from_str(json).unwrap();
-        let dt = Field::from(&value).unwrap();
-
-        let expected = Field::new(
-            "address",
-            DataType::Struct(vec![
-                Field::new("street", DataType::Utf8, false),
-                Field::new("zip", DataType::UInt16, false),
-            ]),
-            false,
-        );
-
-        assert_eq!(expected, dt);
-    }
-
-    #[test]
-    fn parse_utf8_from_json() {
-        let json = "{\"name\":\"utf8\"}";
-        let value: Value = serde_json::from_str(json).unwrap();
-        let dt = DataType::from(&value).unwrap();
-        assert_eq!(DataType::Utf8, dt);
-    }
-
-    #[test]
-    fn parse_int32_from_json() {
-        let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
-        let value: Value = serde_json::from_str(json).unwrap();
-        let dt = DataType::from(&value).unwrap();
-        assert_eq!(DataType::Int32, dt);
-    }
-
-    #[test]
-    fn schema_json() {
-        // Add some custom metadata
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new("c1", DataType::Utf8, false),
-                Field::new("c2", DataType::Binary, false),
-                Field::new("c3", DataType::FixedSizeBinary(3), false),
-                Field::new("c4", DataType::Boolean, false),
-                Field::new("c5", DataType::Date32, false),
-                Field::new("c6", DataType::Date64, false),
-                Field::new("c7", DataType::Time32(TimeUnit::Second), false),
-                Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
-                Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),
-                Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false),
-                Field::new("c11", DataType::Time64(TimeUnit::Second), false),
-                Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false),
-                Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
-                Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
-                Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false),
-                Field::new(
-                    "c16",
-                    DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())),
-                    false,
-                ),
-                Field::new(
-                    "c17",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".to_string()),
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c18",
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    false,
-                ),
-                Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
-                Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
-                Field::new(
-                    "c21",
-                    DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-                    false,
-                ),
-                Field::new(
-                    "c22",
-                    DataType::FixedSizeList(
-                        Box::new(Field::new("bools", DataType::Boolean, false)),
-                        5,
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c23",
-                    DataType::List(Box::new(Field::new(
-                        "inner_list",
-                        DataType::List(Box::new(Field::new(
-                            "struct",
-                            DataType::Struct(vec![]),
-                            true,
-                        ))),
-                        false,
-                    ))),
-                    true,
-                ),
-                Field::new(
-                    "c24",
-                    DataType::Struct(vec![
-                        Field::new("a", DataType::Utf8, false),
-                        Field::new("b", DataType::UInt16, false),
-                    ]),
-                    false,
-                ),
-                Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true),
-                Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true),
-                Field::new("c27", DataType::Duration(TimeUnit::Second), false),
-                Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
-                Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
-                Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
-                Field::new_dict(
-                    "c31",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new("c32", DataType::LargeBinary, true),
-                Field::new("c33", DataType::LargeUtf8, true),
-                Field::new(
-                    "c34",
-                    DataType::LargeList(Box::new(Field::new(
-                        "inner_large_list",
-                        DataType::LargeList(Box::new(Field::new(
-                            "struct",
-                            DataType::Struct(vec![]),
-                            false,
-                        ))),
-                        true,
-                    ))),
-                    true,
-                ),
-            ],
-            metadata,
-        );
-
-        let expected = schema.to_json();
-        let json = r#"{
-                "fields": [
-                    {
-                        "name": "c1",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c2",
-                        "nullable": false,
-                        "type": {
-                            "name": "binary"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c3",
-                        "nullable": false,
-                        "type": {
-                            "name": "fixedsizebinary",
-                            "byteWidth": 3
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c4",
-                        "nullable": false,
-                        "type": {
-                            "name": "bool"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c5",
-                        "nullable": false,
-                        "type": {
-                            "name": "date",
-                            "unit": "DAY"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c6",
-                        "nullable": false,
-                        "type": {
-                            "name": "date",
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c7",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c8",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c9",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "MICROSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c10",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 32,
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c11",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c12",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c13",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "MICROSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c14",
-                        "nullable": false,
-                        "type": {
-                            "name": "time",
-                            "bitWidth": 64,
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c15",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c16",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "MILLISECOND",
-                            "timezone": "UTC"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c17",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "MICROSECOND",
-                            "timezone": "Africa/Johannesburg"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c18",
-                        "nullable": false,
-                        "type": {
-                            "name": "timestamp",
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c19",
-                        "nullable": false,
-                        "type": {
-                            "name": "interval",
-                            "unit": "DAY_TIME"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c20",
-                        "nullable": false,
-                        "type": {
-                            "name": "interval",
-                            "unit": "YEAR_MONTH"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c21",
-                        "nullable": false,
-                        "type": {
-                            "name": "list"
-                        },
-                        "children": [
-                            {
-                                "name": "item",
-                                "nullable": true,
-                                "type": {
-                                    "name": "bool"
-                                },
-                                "children": []
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c22",
-                        "nullable": false,
-                        "type": {
-                            "name": "fixedsizelist",
-                            "listSize": 5
-                        },
-                        "children": [
-                            {
-                                "name": "bools",
-                                "nullable": false,
-                                "type": {
-                                    "name": "bool"
-                                },
-                                "children": []
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c23",
-                        "nullable": true,
-                        "type": {
-                            "name": "list"
-                        },
-                        "children": [
-                            {
-                                "name": "inner_list",
-                                "nullable": false,
-                                "type": {
-                                    "name": "list"
-                                },
-                                "children": [
-                                    {
-                                        "name": "struct",
-                                        "nullable": true,
-                                        "type": {
-                                            "name": "struct"
-                                        },
-                                        "children": []
-                                    }
-                                ]
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c24",
-                        "nullable": false,
-                        "type": {
-                            "name": "struct"
-                        },
-                        "children": [
-                            {
-                                "name": "a",
-                                "nullable": false,
-                                "type": {
-                                    "name": "utf8"
-                                },
-                                "children": []
-                            },
-                            {
-                                "name": "b",
-                                "nullable": false,
-                                "type": {
-                                    "name": "int",
-                                    "bitWidth": 16,
-                                    "isSigned": false
-                                },
-                                "children": []
-                            }
-                        ]
-                    },
-                    {
-                        "name": "c25",
-                        "nullable": true,
-                        "type": {
-                            "name": "interval",
-                            "unit": "YEAR_MONTH"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c26",
-                        "nullable": true,
-                        "type": {
-                            "name": "interval",
-                            "unit": "DAY_TIME"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c27",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "SECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c28",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "MILLISECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c29",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "MICROSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c30",
-                        "nullable": false,
-                        "type": {
-                            "name": "duration",
-                            "unit": "NANOSECOND"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c31",
-                        "nullable": true,
-                        "children": [],
-                        "type": {
-                          "name": "utf8"
-                        },
-                        "dictionary": {
-                          "id": 123,
-                          "indexType": {
-                            "name": "int",
-                            "bitWidth": 32,
-                            "isSigned": true
-                          },
-                          "isOrdered": true
-                        }
-                    },
-                    {
-                        "name": "c32",
-                        "nullable": true,
-                        "type": {
-                          "name": "largebinary"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c33",
-                        "nullable": true,
-                        "type": {
-                          "name": "largeutf8"
-                        },
-                        "children": []
-                    },
-                    {
-                        "name": "c34",
-                        "nullable": true,
-                        "type": {
-                          "name": "largelist"
-                        },
-                        "children": [
-                            {
-                                "name": "inner_large_list",
-                                "nullable": true,
-                                "type": {
-                                    "name": "largelist"
-                                },
-                                "children": [
-                                    {
-                                        "name": "struct",
-                                        "nullable": false,
-                                        "type": {
-                                            "name": "struct"
-                                        },
-                                        "children": []
-                                    }
-                                ]
-                            }
-                        ]
-                    }
-                ],
-                "metadata" : {
-                    "Key": "Value"
-                }
-            }"#;
-        let value: Value = serde_json::from_str(&json).unwrap();
-        assert_eq!(expected, value);
-
-        // convert back to a schema
-        let value: Value = serde_json::from_str(&json).unwrap();
-        let schema2 = Schema::from(&value).unwrap();
-
-        assert_eq!(schema, schema2);
-
-        // Check that empty metadata produces empty value in JSON and can be parsed
-        let json = r#"{
-                "fields": [
-                    {
-                        "name": "c1",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    }
-                ],
-                "metadata": {}
-            }"#;
-        let value: Value = serde_json::from_str(&json).unwrap();
-        let schema = Schema::from(&value).unwrap();
-        assert!(schema.metadata.is_empty());
-
-        // Check that metadata field is not required in the JSON.
-        let json = r#"{
-                "fields": [
-                    {
-                        "name": "c1",
-                        "nullable": false,
-                        "type": {
-                            "name": "utf8"
-                        },
-                        "children": []
-                    }
-                ]
-            }"#;
-        let value: Value = serde_json::from_str(&json).unwrap();
-        let schema = Schema::from(&value).unwrap();
-        assert!(schema.metadata.is_empty());
-    }
-
-    #[test]
-    fn create_schema_string() {
-        let schema = person_schema();
-        assert_eq!(schema.to_string(),
-        "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \
-        Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-        Field { name: \"address\", data_type: Struct([\
-            Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-            Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\
-        ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-        Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }")
-    }
-
-    #[test]
-    fn schema_field_accessors() {
-        let schema = person_schema();
-
-        // test schema accessors
-        assert_eq!(schema.fields().len(), 4);
-
-        // test field accessors
-        let first_name = &schema.fields()[0];
-        assert_eq!(first_name.name(), "first_name");
-        assert_eq!(first_name.data_type(), &DataType::Utf8);
-        assert_eq!(first_name.is_nullable(), false);
-        assert_eq!(first_name.dict_id(), None);
-        assert_eq!(first_name.dict_is_ordered(), None);
-
-        let metadata = first_name.metadata();
-        assert!(metadata.is_some());
-        let md = metadata.as_ref().unwrap();
-        assert_eq!(md.len(), 1);
-        let key = md.get("k");
-        assert!(key.is_some());
-        assert_eq!(key.unwrap(), "v");
-
-        let interests = &schema.fields()[3];
-        assert_eq!(interests.name(), "interests");
-        assert_eq!(
-            interests.data_type(),
-            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
-        );
-        assert_eq!(interests.dict_id(), Some(123));
-        assert_eq!(interests.dict_is_ordered(), Some(true));
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
-    )]
-    fn schema_index_of() {
-        let schema = person_schema();
-        assert_eq!(schema.index_of("first_name").unwrap(), 0);
-        assert_eq!(schema.index_of("last_name").unwrap(), 1);
-        schema.index_of("nickname").unwrap();
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
-    )]
-    fn schema_field_with_name() {
-        let schema = person_schema();
-        assert_eq!(
-            schema.field_with_name("first_name").unwrap().name(),
-            "first_name"
-        );
-        assert_eq!(
-            schema.field_with_name("last_name").unwrap().name(),
-            "last_name"
-        );
-        schema.field_with_name("nickname").unwrap();
-    }
-
-    #[test]
-    fn schema_field_with_dict_id() {
-        let schema = person_schema();
-
-        let fields_dict_123: Vec<_> = schema
-            .fields_with_dict_id(123)
-            .iter()
-            .map(|f| f.name())
-            .collect();
-        assert_eq!(fields_dict_123, vec!["interests"]);
-
-        assert!(schema.fields_with_dict_id(456).is_empty());
-    }
-
-    #[test]
-    fn schema_equality() {
-        let schema1 = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::LargeBinary, true),
-        ]);
-        let schema2 = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::LargeBinary, true),
-        ]);
-
-        assert_eq!(schema1, schema2);
-
-        let schema3 = Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::Float32, true),
-        ]);
-        let schema4 = Schema::new(vec![
-            Field::new("C1", DataType::Utf8, false),
-            Field::new("C2", DataType::Float64, true),
-        ]);
-
-        assert!(schema1 != schema3);
-        assert!(schema1 != schema4);
-        assert!(schema2 != schema3);
-        assert!(schema2 != schema4);
-        assert!(schema3 != schema4);
-
-        let mut f = Field::new("c1", DataType::Utf8, false);
-        f.set_metadata(Some(
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-        let schema5 = Schema::new(vec![
-            f,
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::LargeBinary, true),
-        ]);
-        assert!(schema1 != schema5);
-    }
-
-    #[test]
-    fn test_arrow_native_type_to_json() {
-        assert_eq!(Some(Bool(true)), true.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
-        assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
-        assert_eq!(
-            Some(VNumber(Number::from_f64(0.01f64).unwrap())),
-            0.01.into_json_value()
-        );
-        assert_eq!(
-            Some(VNumber(Number::from_f64(0.01f64).unwrap())),
-            0.01f64.into_json_value()
-        );
-        assert_eq!(None, NAN.into_json_value());
-    }
-
-    fn person_schema() -> Schema {
-        let kv_array = [("k".to_string(), "v".to_string())];
-        let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
-        let mut first_name = Field::new("first_name", DataType::Utf8, false);
-        first_name.set_metadata(Some(field_metadata));
-
-        Schema::new(vec![
-            first_name,
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-            Field::new_dict(
-                "interests",
-                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-                true,
-                123,
-                true,
-            ),
-        ])
-    }
-
-    #[test]
-    fn test_try_merge_field_with_metadata() {
-        // 1. Different values for the same key should cause error.
-        let metadata1: BTreeMap<String, String> =
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        f1.set_metadata(Some(metadata1));
-
-        let metadata2: BTreeMap<String, String> =
-            [("foo".to_string(), "baz".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-        let mut f2 = Field::new("first_name", DataType::Utf8, false);
-        f2.set_metadata(Some(metadata2));
-
-        assert!(
-            Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
-                .is_err()
-        );
-
-        // 2. None + Some
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        let metadata2: BTreeMap<String, String> =
-            [("missing".to_string(), "value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-        let mut f2 = Field::new("first_name", DataType::Utf8, false);
-        f2.set_metadata(Some(metadata2));
-
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_some());
-        assert_eq!(
-            f1.metadata().as_ref().unwrap(),
-            f2.metadata().as_ref().unwrap()
-        );
-
-        // 3. Some + Some
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        f1.set_metadata(Some(
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-        let mut f2 = Field::new("first_name", DataType::Utf8, false);
-        f2.set_metadata(Some(
-            [("foo2".to_string(), "bar2".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_some());
-        assert_eq!(
-            f1.metadata().clone().unwrap(),
-            [
-                ("foo".to_string(), "bar".to_string()),
-                ("foo2".to_string(), "bar2".to_string())
-            ]
-            .iter()
-            .cloned()
-            .collect()
-        );
-
-        // 4. Some + None.
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        f1.set_metadata(Some(
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect(),
-        ));
-        let f2 = Field::new("first_name", DataType::Utf8, false);
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_some());
-        assert_eq!(
-            f1.metadata().clone().unwrap(),
-            [("foo".to_string(), "bar".to_string())]
-                .iter()
-                .cloned()
-                .collect()
-        );
-
-        // 5. None + None.
-        let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        let f2 = Field::new("first_name", DataType::Utf8, false);
-        assert!(f1.try_merge(&f2).is_ok());
-        assert!(f1.metadata().is_none());
-    }
-
-    #[test]
-    fn test_schema_merge() -> Result<()> {
-        let merged = Schema::try_merge(vec![
-            Schema::new(vec![
-                Field::new("first_name", DataType::Utf8, false),
-                Field::new("last_name", DataType::Utf8, false),
-                Field::new(
-                    "address",
-                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]),
-                    false,
-                ),
-            ]),
-            Schema::new_with_metadata(
-                vec![
-                    // nullable merge
-                    Field::new("last_name", DataType::Utf8, true),
-                    Field::new(
-                        "address",
-                        DataType::Struct(vec![
-                            // add new nested field
-                            Field::new("street", DataType::Utf8, false),
-                            // nullable merge on nested field
-                            Field::new("zip", DataType::UInt16, true),
-                        ]),
-                        false,
-                    ),
-                    // new field
-                    Field::new("number", DataType::Utf8, true),
-                ],
-                [("foo".to_string(), "bar".to_string())]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>(),
-            ),
-        ])?;
-
-        assert_eq!(
-            merged,
-            Schema::new_with_metadata(
-                vec![
-                    Field::new("first_name", DataType::Utf8, false),
-                    Field::new("last_name", DataType::Utf8, true),
-                    Field::new(
-                        "address",
-                        DataType::Struct(vec![
-                            Field::new("zip", DataType::UInt16, true),
-                            Field::new("street", DataType::Utf8, false),
-                        ]),
-                        false,
-                    ),
-                    Field::new("number", DataType::Utf8, true),
-                ],
-                [("foo".to_string(), "bar".to_string())]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>()
-            )
-        );
-
-        // support merge union fields
-        assert_eq!(
-            Schema::try_merge(vec![
-                Schema::new(vec![Field::new(
-                    "c1",
-                    DataType::Union(vec![
-                        Field::new("c11", DataType::Utf8, true),
-                        Field::new("c12", DataType::Utf8, true),
-                    ]),
-                    false
-                ),]),
-                Schema::new(vec![Field::new(
-                    "c1",
-                    DataType::Union(vec![
-                        Field::new("c12", DataType::Utf8, true),
-                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
-                    ]),
-                    false
-                ),])
-            ])?,
-            Schema::new(vec![Field::new(
-                "c1",
-                DataType::Union(vec![
-                    Field::new("c11", DataType::Utf8, true),
-                    Field::new("c12", DataType::Utf8, true),
-                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
-                ]),
-                false
-            ),]),
-        );
-
-        // incompatible field should throw error
-        assert!(Schema::try_merge(vec![
-            Schema::new(vec![
-                Field::new("first_name", DataType::Utf8, false),
-                Field::new("last_name", DataType::Utf8, false),
-            ]),
-            Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
-        ])
-        .is_err());
-
-        // incompatible metadata should throw error
-        assert!(Schema::try_merge(vec![
-            Schema::new_with_metadata(
-                vec![Field::new("first_name", DataType::Utf8, false)],
-                [("foo".to_string(), "bar".to_string()),]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>()
-            ),
-            Schema::new_with_metadata(
-                vec![Field::new("last_name", DataType::Utf8, false)],
-                [("foo".to_string(), "baz".to_string()),]
-                    .iter()
-                    .cloned()
-                    .collect::<HashMap<String, String>>()
-            )
-        ])
-        .is_err());
-
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/datatypes/native.rs b/rust/arrow/src/datatypes/native.rs
deleted file mode 100644
index 6e8cf892237..00000000000
--- a/rust/arrow/src/datatypes/native.rs
+++ /dev/null
@@ -1,333 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use serde_json::{Number, Value};
-
-use super::DataType;
-
-/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
-pub trait JsonSerializable: 'static {
-    fn into_json_value(self) -> Option<Value>;
-}
-
-/// Trait expressing a Rust type that has the same in-memory representation
-/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
-/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
-/// as is.
-pub trait ArrowNativeType:
-    std::fmt::Debug
-    + Send
-    + Sync
-    + Copy
-    + PartialOrd
-    + std::str::FromStr
-    + Default
-    + JsonSerializable
-{
-    /// Convert native type from usize.
-    #[inline]
-    fn from_usize(_: usize) -> Option<Self> {
-        None
-    }
-
-    /// Convert native type to usize.
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        None
-    }
-
-    /// Convert native type to isize.
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        None
-    }
-
-    /// Convert native type from i32.
-    #[inline]
-    fn from_i32(_: i32) -> Option<Self> {
-        None
-    }
-
-    /// Convert native type from i64.
-    #[inline]
-    fn from_i64(_: i64) -> Option<Self> {
-        None
-    }
-}
-
-/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
-/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
-pub trait ArrowPrimitiveType: 'static {
-    /// Corresponding Rust native type for the primitive type.
-    type Native: ArrowNativeType;
-
-    /// the corresponding Arrow data type of this primitive type.
-    const DATA_TYPE: DataType;
-
-    /// Returns the byte width of this primitive type.
-    fn get_byte_width() -> usize {
-        std::mem::size_of::<Self::Native>()
-    }
-
-    /// Returns a default value of this primitive type.
-    ///
-    /// This is useful for aggregate array ops like `sum()`, `mean()`.
-    fn default_value() -> Self::Native {
-        Default::default()
-    }
-}
-
-impl JsonSerializable for bool {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl JsonSerializable for i8 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for i8 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for i16 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for i16 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for i32 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for i32 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-
-    /// Convert native type from i32.
-    #[inline]
-    fn from_i32(val: i32) -> Option<Self> {
-        Some(val)
-    }
-}
-
-impl JsonSerializable for i64 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(Value::Number(Number::from(self)))
-    }
-}
-
-impl ArrowNativeType for i64 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-
-    /// Convert native type from i64.
-    #[inline]
-    fn from_i64(val: i64) -> Option<Self> {
-        Some(val)
-    }
-}
-
-impl JsonSerializable for u8 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u8 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for u16 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u16 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for u32 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u32 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for u64 {
-    fn into_json_value(self) -> Option<Value> {
-        Some(self.into())
-    }
-}
-
-impl ArrowNativeType for u64 {
-    #[inline]
-    fn from_usize(v: usize) -> Option<Self> {
-        num::FromPrimitive::from_usize(v)
-    }
-
-    #[inline]
-    fn to_usize(&self) -> Option<usize> {
-        num::ToPrimitive::to_usize(self)
-    }
-
-    #[inline]
-    fn to_isize(&self) -> Option<isize> {
-        num::ToPrimitive::to_isize(self)
-    }
-}
-
-impl JsonSerializable for f32 {
-    fn into_json_value(self) -> Option<Value> {
-        Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(Value::Number)
-    }
-}
-
-impl JsonSerializable for f64 {
-    fn into_json_value(self) -> Option<Value> {
-        Number::from_f64(self).map(Value::Number)
-    }
-}
-
-impl ArrowNativeType for f32 {}
-impl ArrowNativeType for f64 {}
-
-/// Allows conversion from supported Arrow types to a byte slice.
-pub trait ToByteSlice {
-    /// Converts this instance into a byte slice
-    fn to_byte_slice(&self) -> &[u8];
-}
-
-impl<T: ArrowNativeType> ToByteSlice for [T] {
-    #[inline]
-    fn to_byte_slice(&self) -> &[u8] {
-        let raw_ptr = self.as_ptr() as *const T as *const u8;
-        unsafe {
-            std::slice::from_raw_parts(raw_ptr, self.len() * std::mem::size_of::<T>())
-        }
-    }
-}
-
-impl<T: ArrowNativeType> ToByteSlice for T {
-    #[inline]
-    fn to_byte_slice(&self) -> &[u8] {
-        let raw_ptr = self as *const T as *const u8;
-        unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of::<T>()) }
-    }
-}
diff --git a/rust/arrow/src/datatypes/numeric.rs b/rust/arrow/src/datatypes/numeric.rs
deleted file mode 100644
index 0046398122b..00000000000
--- a/rust/arrow/src/datatypes/numeric.rs
+++ /dev/null
@@ -1,534 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[cfg(feature = "simd")]
-use packed_simd::*;
-#[cfg(feature = "simd")]
-use std::ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Sub};
-
-use super::*;
-
-/// A subtype of primitive type that represents numeric values.
-///
-/// SIMD operations are defined in this trait if available on the target system.
-#[cfg(simd)]
-pub trait ArrowNumericType: ArrowPrimitiveType
-where
-    Self::Simd: Add<Output = Self::Simd>
-        + Sub<Output = Self::Simd>
-        + Mul<Output = Self::Simd>
-        + Div<Output = Self::Simd>
-        + Copy,
-    Self::SimdMask: BitAnd<Output = Self::SimdMask>
-        + BitOr<Output = Self::SimdMask>
-        + BitAndAssign
-        + BitOrAssign
-        + Not<Output = Self::SimdMask>
-        + Copy,
-{
-    /// Defines the SIMD type that should be used for this numeric type
-    type Simd;
-
-    /// Defines the SIMD Mask type that should be used for this numeric type
-    type SimdMask;
-
-    /// The number of SIMD lanes available
-    fn lanes() -> usize;
-
-    /// Initializes a SIMD register to a constant value
-    fn init(value: Self::Native) -> Self::Simd;
-
-    /// Loads a slice into a SIMD register
-    fn load(slice: &[Self::Native]) -> Self::Simd;
-
-    /// Creates a new SIMD mask for this SIMD type filling it with `value`
-    fn mask_init(value: bool) -> Self::SimdMask;
-
-    /// Creates a new SIMD mask for this SIMD type from the lower-most bits of the given `mask`.
-    /// The number of bits used corresponds to the number of lanes of this type
-    fn mask_from_u64(mask: u64) -> Self::SimdMask;
-
-    /// Creates a bitmask from the given SIMD mask.
-    /// Each bit corresponds to one vector lane, starting with the least-significant bit.
-    fn mask_to_u64(mask: &Self::SimdMask) -> u64;
-
-    /// Gets the value of a single lane in a SIMD mask
-    fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool;
-
-    /// Sets the value of a single lane of a SIMD mask
-    fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask;
-
-    /// Selects elements of `a` and `b` using `mask`
-    fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd;
-
-    /// Returns `true` if any of the lanes in the mask are `true`
-    fn mask_any(mask: Self::SimdMask) -> bool;
-
-    /// Performs a SIMD binary operation
-    fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
-        left: Self::Simd,
-        right: Self::Simd,
-        op: F,
-    ) -> Self::Simd;
-
-    /// SIMD version of equal
-    fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of not equal
-    fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of less than
-    fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of less than or equal to
-    fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of greater than
-    fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// SIMD version of greater than or equal to
-    fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
-
-    /// Writes a SIMD result back to a slice
-    fn write(simd_result: Self::Simd, slice: &mut [Self::Native]);
-
-    fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd;
-}
-
-#[cfg(not(simd))]
-pub trait ArrowNumericType: ArrowPrimitiveType {}
-
-macro_rules! make_numeric_type {
-    ($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => {
-        #[cfg(simd)]
-        impl ArrowNumericType for $impl_ty {
-            type Simd = $simd_ty;
-
-            type SimdMask = $simd_mask_ty;
-
-            #[inline]
-            fn lanes() -> usize {
-                Self::Simd::lanes()
-            }
-
-            #[inline]
-            fn init(value: Self::Native) -> Self::Simd {
-                Self::Simd::splat(value)
-            }
-
-            #[inline]
-            fn load(slice: &[Self::Native]) -> Self::Simd {
-                unsafe { Self::Simd::from_slice_unaligned_unchecked(slice) }
-            }
-
-            #[inline]
-            fn mask_init(value: bool) -> Self::SimdMask {
-                Self::SimdMask::splat(value)
-            }
-
-            #[inline]
-            fn mask_from_u64(mask: u64) -> Self::SimdMask {
-                // this match will get removed by the compiler since the number of lanes is known at
-                // compile-time for each concrete numeric type
-                match Self::lanes() {
-                    8 => {
-                        // the bit position in each lane indicates the index of that lane
-                        let vecidx = i64x8::new(1, 2, 4, 8, 16, 32, 64, 128);
-
-                        // broadcast the lowermost 8 bits of mask to each lane
-                        let vecmask = i64x8::splat((mask & 0xFF) as i64);
-                        // compute whether the bit corresponding to each lanes index is set
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        // transmute is necessary because the different match arms return different
-                        // mask types, at runtime only one of those expressions will exist per type,
-                        // with the type being equal to `SimdMask`.
-                        unsafe { std::mem::transmute(vecmask) }
-                    }
-                    16 => {
-                        // same general logic as for 8 lanes, extended to 16 bits
-                        let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
-                        );
-
-                        let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        unsafe { std::mem::transmute(vecmask) }
-                    }
-                    32 => {
-                        // compute two separate m32x16 vector masks from  from the lower-most 32 bits of `mask`
-                        // and then combine them into one m16x32 vector mask by writing and reading a temporary
-                        let tmp = &mut [0_i16; 32];
-
-                        let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
-                        );
-
-                        let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i16x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[0..16]);
-
-                        let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i16x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[16..32]);
-
-                        unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
-                    }
-                    64 => {
-                        // compute four m32x16 vector masks from  from all 64 bits of `mask`
-                        // and convert them into one m8x64 vector mask by writing and reading a temporary
-                        let tmp = &mut [0_i8; 64];
-
-                        let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
-                        );
-
-                        let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[0..16]);
-
-                        let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[16..32]);
-
-                        let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[32..48]);
-
-                        let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
-                        let vecmask = (vecidx & vecmask).eq(vecidx);
-
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[48..64]);
-
-                        unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
-                    }
-                    _ => panic!("Invalid number of vector lanes"),
-                }
-            }
-
-            #[inline]
-            fn mask_to_u64(mask: &Self::SimdMask) -> u64 {
-                mask.bitmask() as u64
-            }
-
-            #[inline]
-            fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool {
-                unsafe { mask.extract_unchecked(idx) }
-            }
-
-            #[inline]
-            fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask {
-                unsafe { mask.replace_unchecked(idx, value) }
-            }
-
-            /// Selects elements of `a` and `b` using `mask`
-            #[inline]
-            fn mask_select(
-                mask: Self::SimdMask,
-                a: Self::Simd,
-                b: Self::Simd,
-            ) -> Self::Simd {
-                mask.select(a, b)
-            }
-
-            #[inline]
-            fn mask_any(mask: Self::SimdMask) -> bool {
-                mask.any()
-            }
-
-            #[inline]
-            fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
-                left: Self::Simd,
-                right: Self::Simd,
-                op: F,
-            ) -> Self::Simd {
-                op(left, right)
-            }
-
-            #[inline]
-            fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.eq(right)
-            }
-
-            #[inline]
-            fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.ne(right)
-            }
-
-            #[inline]
-            fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.lt(right)
-            }
-
-            #[inline]
-            fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.le(right)
-            }
-
-            #[inline]
-            fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.gt(right)
-            }
-
-            #[inline]
-            fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
-                left.ge(right)
-            }
-
-            #[inline]
-            fn write(simd_result: Self::Simd, slice: &mut [Self::Native]) {
-                unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
-            }
-
-            #[inline]
-            fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
-                a: Self::Simd,
-                op: F,
-            ) -> Self::Simd {
-                op(a)
-            }
-        }
-
-        #[cfg(not(simd))]
-        impl ArrowNumericType for $impl_ty {}
-    };
-}
-
-make_numeric_type!(Int8Type, i8, i8x64, m8x64);
-make_numeric_type!(Int16Type, i16, i16x32, m16x32);
-make_numeric_type!(Int32Type, i32, i32x16, m32x16);
-make_numeric_type!(Int64Type, i64, i64x8, m64x8);
-make_numeric_type!(UInt8Type, u8, u8x64, m8x64);
-make_numeric_type!(UInt16Type, u16, u16x32, m16x32);
-make_numeric_type!(UInt32Type, u32, u32x16, m32x16);
-make_numeric_type!(UInt64Type, u64, u64x8, m64x8);
-make_numeric_type!(Float32Type, f32, f32x16, m32x16);
-make_numeric_type!(Float64Type, f64, f64x8, m64x8);
-
-make_numeric_type!(TimestampSecondType, i64, i64x8, m64x8);
-make_numeric_type!(TimestampMillisecondType, i64, i64x8, m64x8);
-make_numeric_type!(TimestampMicrosecondType, i64, i64x8, m64x8);
-make_numeric_type!(TimestampNanosecondType, i64, i64x8, m64x8);
-make_numeric_type!(Date32Type, i32, i32x16, m32x16);
-make_numeric_type!(Date64Type, i64, i64x8, m64x8);
-make_numeric_type!(Time32SecondType, i32, i32x16, m32x16);
-make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16);
-make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
-make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
-make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16);
-make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
-make_numeric_type!(DurationSecondType, i64, i64x8, m64x8);
-make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8);
-make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
-make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8);
-
-/// A subtype of primitive type that represents signed numeric values.
-///
-/// SIMD operations are defined in this trait if available on the target system.
-#[cfg(simd)]
-pub trait ArrowSignedNumericType: ArrowNumericType
-where
-    Self::SignedSimd: Neg<Output = Self::SignedSimd>,
-{
-    /// Defines the SIMD type that should be used for this numeric type
-    type SignedSimd;
-
-    /// Loads a slice of signed numeric type into a SIMD register
-    fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd;
-
-    /// Performs a SIMD unary operation on signed numeric type
-    fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
-        a: Self::SignedSimd,
-        op: F,
-    ) -> Self::SignedSimd;
-
-    /// Writes a signed SIMD result back to a slice
-    fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]);
-}
-
-#[cfg(not(simd))]
-pub trait ArrowSignedNumericType: ArrowNumericType
-where
-    Self::Native: std::ops::Neg<Output = Self::Native>,
-{
-}
-
-macro_rules! make_signed_numeric_type {
-    ($impl_ty:ty, $simd_ty:ident) => {
-        #[cfg(simd)]
-        impl ArrowSignedNumericType for $impl_ty {
-            type SignedSimd = $simd_ty;
-
-            #[inline]
-            fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd {
-                unsafe { Self::SignedSimd::from_slice_unaligned_unchecked(slice) }
-            }
-
-            #[inline]
-            fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
-                a: Self::SignedSimd,
-                op: F,
-            ) -> Self::SignedSimd {
-                op(a)
-            }
-
-            #[inline]
-            fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]) {
-                unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
-            }
-        }
-
-        #[cfg(not(simd))]
-        impl ArrowSignedNumericType for $impl_ty {}
-    };
-}
-
-make_signed_numeric_type!(Int8Type, i8x64);
-make_signed_numeric_type!(Int16Type, i16x32);
-make_signed_numeric_type!(Int32Type, i32x16);
-make_signed_numeric_type!(Int64Type, i64x8);
-make_signed_numeric_type!(Float32Type, f32x16);
-make_signed_numeric_type!(Float64Type, f64x8);
-
-#[cfg(simd)]
-pub trait ArrowFloatNumericType: ArrowNumericType {
-    fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd;
-}
-
-#[cfg(not(simd))]
-pub trait ArrowFloatNumericType: ArrowNumericType {}
-
-macro_rules! make_float_numeric_type {
-    ($impl_ty:ty, $simd_ty:ident) => {
-        #[cfg(simd)]
-        impl ArrowFloatNumericType for $impl_ty {
-            #[inline]
-            fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd {
-                base.powf(raise)
-            }
-        }
-
-        #[cfg(not(simd))]
-        impl ArrowFloatNumericType for $impl_ty {}
-    };
-}
-
-make_float_numeric_type!(Float32Type, f32x16);
-make_float_numeric_type!(Float64Type, f64x8);
-
-#[cfg(all(test, simd_x86))]
-mod tests {
-    use crate::datatypes::{
-        ArrowNumericType, Float32Type, Float64Type, Int32Type, Int64Type, Int8Type,
-        UInt16Type,
-    };
-    use packed_simd::*;
-    use FromCast;
-
-    /// calculate the expected mask by iterating over all bits
-    macro_rules! expected_mask {
-        ($T:ty, $MASK:expr) => {{
-            let mask = $MASK;
-            // simd width of all types is currently 64 bytes -> 512 bits
-            let lanes = 64 / std::mem::size_of::<$T>();
-            // translate each set bit into a value of all ones (-1) of the correct type
-            (0..lanes)
-                .map(|i| (if (mask & (1 << i)) != 0 { -1 } else { 0 }))
-                .collect::<Vec<$T>>()
-        }};
-    }
-
-    #[test]
-    fn test_mask_f64() {
-        let mask = 0b10101010;
-        let actual = Float64Type::mask_from_u64(mask);
-        let expected = expected_mask!(i64, mask);
-        let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_u64() {
-        let mask = 0b01010101;
-        let actual = Int64Type::mask_from_u64(mask);
-        let expected = expected_mask!(i64, mask);
-        let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_f32() {
-        let mask = 0b10101010_10101010;
-        let actual = Float32Type::mask_from_u64(mask);
-        let expected = expected_mask!(i32, mask);
-        let expected =
-            m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_i32() {
-        let mask = 0b01010101_01010101;
-        let actual = Int32Type::mask_from_u64(mask);
-        let expected = expected_mask!(i32, mask);
-        let expected =
-            m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_u16() {
-        let mask = 0b01010101_01010101_10101010_10101010;
-        let actual = UInt16Type::mask_from_u64(mask);
-        let expected = expected_mask!(i16, mask);
-        dbg!(&expected);
-        let expected =
-            m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_mask_i8() {
-        let mask =
-            0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
-        let actual = Int8Type::mask_from_u64(mask);
-        let expected = expected_mask!(i8, mask);
-        let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));
-
-        assert_eq!(expected, actual);
-    }
-}
diff --git a/rust/arrow/src/datatypes/schema.rs b/rust/arrow/src/datatypes/schema.rs
deleted file mode 100644
index ad89b29cacd..00000000000
--- a/rust/arrow/src/datatypes/schema.rs
+++ /dev/null
@@ -1,337 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::default::Default;
-use std::fmt;
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{json, Value};
-
-use crate::error::{ArrowError, Result};
-
-use super::Field;
-
-/// Describes the meta-data of an ordered sequence of relative types.
-///
-/// Note that this information is only part of the meta-data and not part of the physical
-/// memory layout.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-pub struct Schema {
-    pub(crate) fields: Vec<Field>,
-    /// A map of key-value pairs containing additional meta data.
-    #[serde(skip_serializing_if = "HashMap::is_empty")]
-    pub(crate) metadata: HashMap<String, String>,
-}
-
-impl Schema {
-    /// Creates an empty `Schema`
-    pub fn empty() -> Self {
-        Self {
-            fields: vec![],
-            metadata: HashMap::new(),
-        }
-    }
-
-    /// Creates a new `Schema` from a sequence of `Field` values.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # extern crate arrow;
-    /// # use arrow::datatypes::{Field, DataType, Schema};
-    /// let field_a = Field::new("a", DataType::Int64, false);
-    /// let field_b = Field::new("b", DataType::Boolean, false);
-    ///
-    /// let schema = Schema::new(vec![field_a, field_b]);
-    /// ```
-    pub fn new(fields: Vec<Field>) -> Self {
-        Self::new_with_metadata(fields, HashMap::new())
-    }
-
-    /// Creates a new `Schema` from a sequence of `Field` values
-    /// and adds additional metadata in form of key value pairs.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # extern crate arrow;
-    /// # use arrow::datatypes::{Field, DataType, Schema};
-    /// # use std::collections::HashMap;
-    /// let field_a = Field::new("a", DataType::Int64, false);
-    /// let field_b = Field::new("b", DataType::Boolean, false);
-    ///
-    /// let mut metadata: HashMap<String, String> = HashMap::new();
-    /// metadata.insert("row_count".to_string(), "100".to_string());
-    ///
-    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
-    /// ```
-    #[inline]
-    pub const fn new_with_metadata(
-        fields: Vec<Field>,
-        metadata: HashMap<String, String>,
-    ) -> Self {
-        Self { fields, metadata }
-    }
-
-    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
-    ///
-    /// Example:
-    ///
-    /// ```
-    /// use arrow::datatypes::*;
-    ///
-    /// let merged = Schema::try_merge(vec![
-    ///     Schema::new(vec![
-    ///         Field::new("c1", DataType::Int64, false),
-    ///         Field::new("c2", DataType::Utf8, false),
-    ///     ]),
-    ///     Schema::new(vec![
-    ///         Field::new("c1", DataType::Int64, true),
-    ///         Field::new("c2", DataType::Utf8, false),
-    ///         Field::new("c3", DataType::Utf8, false),
-    ///     ]),
-    /// ]).unwrap();
-    ///
-    /// assert_eq!(
-    ///     merged,
-    ///     Schema::new(vec![
-    ///         Field::new("c1", DataType::Int64, true),
-    ///         Field::new("c2", DataType::Utf8, false),
-    ///         Field::new("c3", DataType::Utf8, false),
-    ///     ]),
-    /// );
-    /// ```
-    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self> {
-        schemas
-            .into_iter()
-            .try_fold(Self::empty(), |mut merged, schema| {
-                let Schema { metadata, fields } = schema;
-                for (key, value) in metadata.into_iter() {
-                    // merge metadata
-                    if let Some(old_val) = merged.metadata.get(&key) {
-                        if old_val != &value {
-                            return Err(ArrowError::SchemaError(
-                                "Fail to merge schema due to conflicting metadata."
-                                    .to_string(),
-                            ));
-                        }
-                    }
-                    merged.metadata.insert(key, value);
-                }
-                // merge fields
-                for field in fields.into_iter() {
-                    let mut new_field = true;
-                    for merged_field in &mut merged.fields {
-                        if field.name() != merged_field.name() {
-                            continue;
-                        }
-                        new_field = false;
-                        merged_field.try_merge(&field)?
-                    }
-                    // found a new field, add to field list
-                    if new_field {
-                        merged.fields.push(field);
-                    }
-                }
-                Ok(merged)
-            })
-    }
-
-    /// Returns an immutable reference of the vector of `Field` instances.
-    #[inline]
-    pub const fn fields(&self) -> &Vec<Field> {
-        &self.fields
-    }
-
-    /// Returns an immutable reference of a specific `Field` instance selected using an
-    /// offset within the internal `fields` vector.
-    pub fn field(&self, i: usize) -> &Field {
-        &self.fields[i]
-    }
-
-    /// Returns an immutable reference of a specific `Field` instance selected by name.
-    pub fn field_with_name(&self, name: &str) -> Result<&Field> {
-        Ok(&self.fields[self.index_of(name)?])
-    }
-
-    /// Returns a vector of immutable references to all `Field` instances selected by
-    /// the dictionary ID they use.
-    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
-        self.fields
-            .iter()
-            .filter(|f| f.dict_id() == Some(dict_id))
-            .collect()
-    }
-
-    /// Find the index of the column with the given name.
-    pub fn index_of(&self, name: &str) -> Result<usize> {
-        for i in 0..self.fields.len() {
-            if self.fields[i].name() == name {
-                return Ok(i);
-            }
-        }
-        let valid_fields: Vec<String> =
-            self.fields.iter().map(|f| f.name().clone()).collect();
-        Err(ArrowError::InvalidArgumentError(format!(
-            "Unable to get field named \"{}\". Valid fields: {:?}",
-            name, valid_fields
-        )))
-    }
-
-    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
-    #[inline]
-    pub const fn metadata(&self) -> &HashMap<String, String> {
-        &self.metadata
-    }
-
-    /// Look up a column by name and return a immutable reference to the column along with
-    /// its index.
-    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
-        self.fields
-            .iter()
-            .enumerate()
-            .find(|&(_, c)| c.name() == name)
-    }
-
-    /// Generate a JSON representation of the `Schema`.
-    pub fn to_json(&self) -> Value {
-        json!({
-            "fields": self.fields.iter().map(|field| field.to_json()).collect::<Vec<Value>>(),
-            "metadata": serde_json::to_value(&self.metadata).unwrap()
-        })
-    }
-
-    /// Parse a `Schema` definition from a JSON representation.
-    pub fn from(json: &Value) -> Result<Self> {
-        match *json {
-            Value::Object(ref schema) => {
-                let fields = if let Some(Value::Array(fields)) = schema.get("fields") {
-                    fields
-                        .iter()
-                        .map(|f| Field::from(f))
-                        .collect::<Result<_>>()?
-                } else {
-                    return Err(ArrowError::ParseError(
-                        "Schema fields should be an array".to_string(),
-                    ));
-                };
-
-                let metadata = if let Some(value) = schema.get("metadata") {
-                    Self::from_metadata(value)?
-                } else {
-                    HashMap::default()
-                };
-
-                Ok(Self { fields, metadata })
-            }
-            _ => Err(ArrowError::ParseError(
-                "Invalid json value type for schema".to_string(),
-            )),
-        }
-    }
-
-    /// Parse a `metadata` definition from a JSON representation.
-    /// The JSON can either be an Object or an Array of Objects.
-    fn from_metadata(json: &Value) -> Result<HashMap<String, String>> {
-        match json {
-            Value::Array(_) => {
-                let mut hashmap = HashMap::new();
-                let values: Vec<MetadataKeyValue> = serde_json::from_value(json.clone())
-                    .map_err(|_| {
-                        ArrowError::JsonError(
-                            "Unable to parse object into key-value pair".to_string(),
-                        )
-                    })?;
-                for meta in values {
-                    hashmap.insert(meta.key.clone(), meta.value);
-                }
-                Ok(hashmap)
-            }
-            Value::Object(md) => md
-                .iter()
-                .map(|(k, v)| {
-                    if let Value::String(v) = v {
-                        Ok((k.to_string(), v.to_string()))
-                    } else {
-                        Err(ArrowError::ParseError(
-                            "metadata `value` field must be a string".to_string(),
-                        ))
-                    }
-                })
-                .collect::<Result<_>>(),
-            _ => Err(ArrowError::ParseError(
-                "`metadata` field must be an object".to_string(),
-            )),
-        }
-    }
-
-    /// Check to see if `self` is a superset of `other` schema. Here are the comparision rules:
-    ///
-    /// * `self` and `other` should contain the same number of fields
-    /// * for every field `f` in `other`, the field in `self` with corresponding index should be a
-    /// superset of `f`.
-    /// * self.metadata is a superset of other.metadata
-    ///
-    /// In other words, any record conforms to `other` should also conform to `self`.
-    pub fn contains(&self, other: &Schema) -> bool {
-        if self.fields.len() != other.fields.len() {
-            return false;
-        }
-
-        for (i, field) in other.fields.iter().enumerate() {
-            if !self.fields[i].contains(field) {
-                return false;
-            }
-        }
-
-        // make sure self.metadata is a superset of other.metadata
-        for (k, v) in &other.metadata {
-            match self.metadata.get(k) {
-                Some(s) => {
-                    if s != v {
-                        return false;
-                    }
-                }
-                None => {
-                    return false;
-                }
-            }
-        }
-
-        true
-    }
-}
-
-impl fmt::Display for Schema {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str(
-            &self
-                .fields
-                .iter()
-                .map(|c| c.to_string())
-                .collect::<Vec<String>>()
-                .join(", "),
-        )
-    }
-}
-
-#[derive(Deserialize)]
-struct MetadataKeyValue {
-    key: String,
-    value: String,
-}
diff --git a/rust/arrow/src/datatypes/types.rs b/rust/arrow/src/datatypes/types.rs
deleted file mode 100644
index 30c9aae8956..00000000000
--- a/rust/arrow/src/datatypes/types.rs
+++ /dev/null
@@ -1,185 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::{ArrowPrimitiveType, DataType, IntervalUnit, TimeUnit};
-
-// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
-// operation assumes bit-packing.
-#[derive(Debug)]
-pub struct BooleanType {}
-
-impl BooleanType {
-    pub const DATA_TYPE: DataType = DataType::Boolean;
-}
-
-macro_rules! make_type {
-    ($name:ident, $native_ty:ty, $data_ty:expr) => {
-        #[derive(Debug)]
-        pub struct $name {}
-
-        impl ArrowPrimitiveType for $name {
-            type Native = $native_ty;
-            const DATA_TYPE: DataType = $data_ty;
-        }
-    };
-}
-
-make_type!(Int8Type, i8, DataType::Int8);
-make_type!(Int16Type, i16, DataType::Int16);
-make_type!(Int32Type, i32, DataType::Int32);
-make_type!(Int64Type, i64, DataType::Int64);
-make_type!(UInt8Type, u8, DataType::UInt8);
-make_type!(UInt16Type, u16, DataType::UInt16);
-make_type!(UInt32Type, u32, DataType::UInt32);
-make_type!(UInt64Type, u64, DataType::UInt64);
-make_type!(Float32Type, f32, DataType::Float32);
-make_type!(Float64Type, f64, DataType::Float64);
-make_type!(
-    TimestampSecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Second, None)
-);
-make_type!(
-    TimestampMillisecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Millisecond, None)
-);
-make_type!(
-    TimestampMicrosecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Microsecond, None)
-);
-make_type!(
-    TimestampNanosecondType,
-    i64,
-    DataType::Timestamp(TimeUnit::Nanosecond, None)
-);
-make_type!(Date32Type, i32, DataType::Date32);
-make_type!(Date64Type, i64, DataType::Date64);
-make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
-make_type!(
-    Time32MillisecondType,
-    i32,
-    DataType::Time32(TimeUnit::Millisecond)
-);
-make_type!(
-    Time64MicrosecondType,
-    i64,
-    DataType::Time64(TimeUnit::Microsecond)
-);
-make_type!(
-    Time64NanosecondType,
-    i64,
-    DataType::Time64(TimeUnit::Nanosecond)
-);
-make_type!(
-    IntervalYearMonthType,
-    i32,
-    DataType::Interval(IntervalUnit::YearMonth)
-);
-make_type!(
-    IntervalDayTimeType,
-    i64,
-    DataType::Interval(IntervalUnit::DayTime)
-);
-make_type!(
-    DurationSecondType,
-    i64,
-    DataType::Duration(TimeUnit::Second)
-);
-make_type!(
-    DurationMillisecondType,
-    i64,
-    DataType::Duration(TimeUnit::Millisecond)
-);
-make_type!(
-    DurationMicrosecondType,
-    i64,
-    DataType::Duration(TimeUnit::Microsecond)
-);
-make_type!(
-    DurationNanosecondType,
-    i64,
-    DataType::Duration(TimeUnit::Nanosecond)
-);
-
-/// A subtype of primitive type that represents legal dictionary keys.
-/// See <https://arrow.apache.org/docs/format/Columnar.html>
-pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {}
-
-impl ArrowDictionaryKeyType for Int8Type {}
-
-impl ArrowDictionaryKeyType for Int16Type {}
-
-impl ArrowDictionaryKeyType for Int32Type {}
-
-impl ArrowDictionaryKeyType for Int64Type {}
-
-impl ArrowDictionaryKeyType for UInt8Type {}
-
-impl ArrowDictionaryKeyType for UInt16Type {}
-
-impl ArrowDictionaryKeyType for UInt32Type {}
-
-impl ArrowDictionaryKeyType for UInt64Type {}
-
-/// A subtype of primitive type that represents temporal values.
-pub trait ArrowTemporalType: ArrowPrimitiveType {}
-
-impl ArrowTemporalType for TimestampSecondType {}
-impl ArrowTemporalType for TimestampMillisecondType {}
-impl ArrowTemporalType for TimestampMicrosecondType {}
-impl ArrowTemporalType for TimestampNanosecondType {}
-impl ArrowTemporalType for Date32Type {}
-impl ArrowTemporalType for Date64Type {}
-impl ArrowTemporalType for Time32SecondType {}
-impl ArrowTemporalType for Time32MillisecondType {}
-impl ArrowTemporalType for Time64MicrosecondType {}
-impl ArrowTemporalType for Time64NanosecondType {}
-// impl ArrowTemporalType for IntervalYearMonthType {}
-// impl ArrowTemporalType for IntervalDayTimeType {}
-impl ArrowTemporalType for DurationSecondType {}
-impl ArrowTemporalType for DurationMillisecondType {}
-impl ArrowTemporalType for DurationMicrosecondType {}
-impl ArrowTemporalType for DurationNanosecondType {}
-
-/// A timestamp type allows us to create array builders that take a timestamp.
-pub trait ArrowTimestampType: ArrowTemporalType {
-    /// Returns the `TimeUnit` of this timestamp.
-    fn get_time_unit() -> TimeUnit;
-}
-
-impl ArrowTimestampType for TimestampSecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Second
-    }
-}
-impl ArrowTimestampType for TimestampMillisecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Millisecond
-    }
-}
-impl ArrowTimestampType for TimestampMicrosecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Microsecond
-    }
-}
-impl ArrowTimestampType for TimestampNanosecondType {
-    fn get_time_unit() -> TimeUnit {
-        TimeUnit::Nanosecond
-    }
-}
diff --git a/rust/arrow/src/error.rs b/rust/arrow/src/error.rs
deleted file mode 100644
index 6bfa077f4ab..00000000000
--- a/rust/arrow/src/error.rs
+++ /dev/null
@@ -1,134 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines `ArrowError` for representing failures in various Arrow operations.
-use std::fmt::{Debug, Display, Formatter};
-use std::io::Write;
-
-use csv as csv_crate;
-use std::error::Error;
-
-/// Many different operations in the `arrow` crate return this error type.
-#[derive(Debug)]
-pub enum ArrowError {
-    /// Returned when functionality is not yet available.
-    NotYetImplemented(String),
-    ExternalError(Box<dyn Error + Send + Sync>),
-    CastError(String),
-    MemoryError(String),
-    ParseError(String),
-    SchemaError(String),
-    ComputeError(String),
-    DivideByZero,
-    CsvError(String),
-    JsonError(String),
-    IoError(String),
-    InvalidArgumentError(String),
-    ParquetError(String),
-    /// Error during import or export to/from the C Data Interface
-    CDataInterface(String),
-    DictionaryKeyOverflowError,
-}
-
-impl ArrowError {
-    /// Wraps an external error in an `ArrowError`.
-    pub fn from_external_error(
-        error: Box<dyn ::std::error::Error + Send + Sync>,
-    ) -> Self {
-        Self::ExternalError(error)
-    }
-}
-
-impl From<::std::io::Error> for ArrowError {
-    fn from(error: std::io::Error) -> Self {
-        ArrowError::IoError(error.to_string())
-    }
-}
-
-impl From<csv_crate::Error> for ArrowError {
-    fn from(error: csv_crate::Error) -> Self {
-        match error.kind() {
-            csv_crate::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
-            csv_crate::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
-                "Encountered UTF-8 error while reading CSV file: {}",
-                err.to_string()
-            )),
-            csv_crate::ErrorKind::UnequalLengths {
-                expected_len, len, ..
-            } => ArrowError::CsvError(format!(
-                "Encountered unequal lengths between records on CSV file. Expected {} \
-                 records, found {} records",
-                len, expected_len
-            )),
-            _ => ArrowError::CsvError("Error reading CSV file".to_string()),
-        }
-    }
-}
-
-impl From<::std::string::FromUtf8Error> for ArrowError {
-    fn from(error: std::string::FromUtf8Error) -> Self {
-        ArrowError::ParseError(error.to_string())
-    }
-}
-
-impl From<serde_json::Error> for ArrowError {
-    fn from(error: serde_json::Error) -> Self {
-        ArrowError::JsonError(error.to_string())
-    }
-}
-
-impl<W: Write> From<::std::io::IntoInnerError<W>> for ArrowError {
-    fn from(error: std::io::IntoInnerError<W>) -> Self {
-        ArrowError::IoError(error.to_string())
-    }
-}
-
-impl Display for ArrowError {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ArrowError::NotYetImplemented(source) => {
-                write!(f, "Not yet implemented: {}", &source)
-            }
-            ArrowError::ExternalError(source) => write!(f, "External error: {}", &source),
-            ArrowError::CastError(desc) => write!(f, "Cast error: {}", desc),
-            ArrowError::MemoryError(desc) => write!(f, "Memory error: {}", desc),
-            ArrowError::ParseError(desc) => write!(f, "Parser error: {}", desc),
-            ArrowError::SchemaError(desc) => write!(f, "Schema error: {}", desc),
-            ArrowError::ComputeError(desc) => write!(f, "Compute error: {}", desc),
-            ArrowError::DivideByZero => write!(f, "Divide by zero error"),
-            ArrowError::CsvError(desc) => write!(f, "Csv error: {}", desc),
-            ArrowError::JsonError(desc) => write!(f, "Json error: {}", desc),
-            ArrowError::IoError(desc) => write!(f, "Io error: {}", desc),
-            ArrowError::InvalidArgumentError(desc) => {
-                write!(f, "Invalid argument error: {}", desc)
-            }
-            ArrowError::ParquetError(desc) => {
-                write!(f, "Parquet argument error: {}", desc)
-            }
-            ArrowError::CDataInterface(desc) => {
-                write!(f, "C Data interface error: {}", desc)
-            }
-            ArrowError::DictionaryKeyOverflowError => {
-                write!(f, "Dictionary key bigger than the key type")
-            }
-        }
-    }
-}
-
-impl Error for ArrowError {}
-
-pub type Result<T> = std::result::Result<T, ArrowError>;
diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs
deleted file mode 100644
index 3a6d031ebd8..00000000000
--- a/rust/arrow/src/ffi.rs
+++ /dev/null
@@ -1,997 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
-//!
-//! Generally, this module is divided in two main interfaces:
-//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
-//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
-//!
-//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
-//! `Buffer`, etc. This is handled by `ArrowArray`.
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow::array::{Int32Array, Array, ArrayData, make_array_from_raw};
-//! # use arrow::error::{Result, ArrowError};
-//! # use arrow::compute::kernels::arithmetic;
-//! # use std::convert::TryFrom;
-//! # fn main() -> Result<()> {
-//! // create an array natively
-//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
-//!
-//! // export it
-//! let (array_ptr, schema_ptr) = array.to_raw()?;
-//!
-//! // consumed and used by something else...
-//!
-//! // import it
-//! let array = unsafe { make_array_from_raw(array_ptr, schema_ptr)? };
-//!
-//! // perform some operation
-//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
-//!     ArrowError::ParseError("Expects an int32".to_string()),
-//! )?;
-//! let array = arithmetic::add(&array, &array)?;
-//!
-//! // verify
-//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
-//!
-//! // (drop/release)
-//! Ok(())
-//! }
-//! ```
-
-/*
-# Design:
-
-Main assumptions:
-* A memory region is deallocated according it its own release mechanism.
-* Rust shares memory regions between arrays.
-* A memory region should be deallocated when no-one is using it.
-
-The design of this module is as follows:
-
-`ArrowArray` contains two `Arc`s, one per ABI-compatible `struct`, each containing data
-according to the C Data Interface. These Arcs are used for ref counting of the structs
-within Rust and lifetime management.
-
-Each ABI-compatible `struct` knowns how to `drop` itself, calling `release`.
-
-To import an array, unsafely create an `ArrowArray` from two pointers using [ArrowArray::try_from_raw].
-To export an array, create an `ArrowArray` using [ArrowArray::try_new].
-*/
-
-use std::{
-    convert::TryFrom,
-    ffi::CStr,
-    ffi::CString,
-    iter,
-    mem::{size_of, ManuallyDrop},
-    os::raw::c_char,
-    ptr::{self, NonNull},
-    sync::Arc,
-};
-
-use crate::array::ArrayData;
-use crate::buffer::Buffer;
-use crate::datatypes::{DataType, Field, TimeUnit};
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-/// ABI-compatible struct for `ArrowSchema` from C Data Interface
-/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
-/// This was created by bindgen
-#[repr(C)]
-#[derive(Debug)]
-pub struct FFI_ArrowSchema {
-    format: *const ::std::os::raw::c_char,
-    name: *const ::std::os::raw::c_char,
-    metadata: *const ::std::os::raw::c_char,
-    flags: i64,
-    n_children: i64,
-    children: *mut *mut FFI_ArrowSchema,
-    dictionary: *mut FFI_ArrowSchema,
-    release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowSchema)>,
-    private_data: *mut ::std::os::raw::c_void,
-}
-
-// callback used to drop [FFI_ArrowSchema] when it is exported.
-unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
-    let schema = &mut *schema;
-
-    // take ownership back to release it.
-    CString::from_raw(schema.format as *mut std::os::raw::c_char);
-
-    schema.release = None;
-}
-
-struct SchemaPrivateData {
-    children: Box<[*mut FFI_ArrowSchema]>,
-}
-
-impl FFI_ArrowSchema {
-    /// create a new [FFI_ArrowSchema] from a format.
-    fn new(
-        format: &str,
-        children: Vec<*mut FFI_ArrowSchema>,
-        nullable: bool,
-    ) -> FFI_ArrowSchema {
-        let children = children.into_boxed_slice();
-        let n_children = children.len() as i64;
-        let children_ptr = children.as_ptr() as *mut *mut FFI_ArrowSchema;
-
-        let flags = if nullable { 2 } else { 0 };
-
-        let private_data = Box::new(SchemaPrivateData { children });
-        // <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
-        FFI_ArrowSchema {
-            format: CString::new(format).unwrap().into_raw(),
-            // For child data a non null string is expected and is called item
-            name: CString::new("item").unwrap().into_raw(),
-            metadata: std::ptr::null_mut(),
-            flags,
-            n_children,
-            children: children_ptr,
-            dictionary: std::ptr::null_mut(),
-            release: Some(release_schema),
-            private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
-        }
-    }
-
-    /// create an empty [FFI_ArrowSchema]
-    fn empty() -> Self {
-        Self {
-            format: std::ptr::null_mut(),
-            name: std::ptr::null_mut(),
-            metadata: std::ptr::null_mut(),
-            flags: 0,
-            n_children: 0,
-            children: ptr::null_mut(),
-            dictionary: std::ptr::null_mut(),
-            release: None,
-            private_data: std::ptr::null_mut(),
-        }
-    }
-
-    /// returns the format of this schema.
-    pub fn format(&self) -> &str {
-        unsafe { CStr::from_ptr(self.format) }
-            .to_str()
-            .expect("The external API has a non-utf8 as format")
-    }
-}
-
-impl Drop for FFI_ArrowSchema {
-    fn drop(&mut self) {
-        match self.release {
-            None => (),
-            Some(release) => unsafe { release(self) },
-        };
-    }
-}
-
-/// maps a DataType `format` to a [DataType](arrow::datatypes::DataType).
-/// See https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
-fn to_datatype(
-    format: &str,
-    child_type: Option<DataType>,
-    schema: &FFI_ArrowSchema,
-) -> Result<DataType> {
-    Ok(match format {
-        "n" => DataType::Null,
-        "b" => DataType::Boolean,
-        "c" => DataType::Int8,
-        "C" => DataType::UInt8,
-        "s" => DataType::Int16,
-        "S" => DataType::UInt16,
-        "i" => DataType::Int32,
-        "I" => DataType::UInt32,
-        "l" => DataType::Int64,
-        "L" => DataType::UInt64,
-        "e" => DataType::Float16,
-        "f" => DataType::Float32,
-        "g" => DataType::Float64,
-        "z" => DataType::Binary,
-        "Z" => DataType::LargeBinary,
-        "u" => DataType::Utf8,
-        "U" => DataType::LargeUtf8,
-        "tdD" => DataType::Date32,
-        "tdm" => DataType::Date64,
-        "tts" => DataType::Time32(TimeUnit::Second),
-        "ttm" => DataType::Time32(TimeUnit::Millisecond),
-        "ttu" => DataType::Time64(TimeUnit::Microsecond),
-        "ttn" => DataType::Time64(TimeUnit::Nanosecond),
-
-        // Note: The datatype null will only be created when called from ArrowArray::buffer_len
-        // at that point the child data is not yet known, but it is also not required to determine
-        // the buffer length of the list arrays.
-        "+l" => {
-            let nullable = schema.flags == 2;
-            // Safety
-            // Should be set as this is expected from the C FFI definition
-            debug_assert!(!schema.name.is_null());
-            let name = unsafe { CString::from_raw(schema.name as *mut c_char) }
-                .into_string()
-                .unwrap();
-            // prevent a double free
-            let name = ManuallyDrop::new(name);
-            DataType::List(Box::new(Field::new(
-                &name,
-                child_type.unwrap_or(DataType::Null),
-                nullable,
-            )))
-        }
-        "+L" => {
-            let nullable = schema.flags == 2;
-            // Safety
-            // Should be set as this is expected from the C FFI definition
-            debug_assert!(!schema.name.is_null());
-            let name = unsafe { CString::from_raw(schema.name as *mut c_char) }
-                .into_string()
-                .unwrap();
-            // prevent a double free
-            let name = ManuallyDrop::new(name);
-            DataType::LargeList(Box::new(Field::new(
-                &name,
-                child_type.unwrap_or(DataType::Null),
-                nullable,
-            )))
-        }
-        dt => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{}\" is not supported in the Rust implementation",
-                dt
-            )))
-        }
-    })
-}
-
-/// the inverse of [to_datatype]
-fn from_datatype(datatype: &DataType) -> Result<String> {
-    Ok(match datatype {
-        DataType::Null => "n",
-        DataType::Boolean => "b",
-        DataType::Int8 => "c",
-        DataType::UInt8 => "C",
-        DataType::Int16 => "s",
-        DataType::UInt16 => "S",
-        DataType::Int32 => "i",
-        DataType::UInt32 => "I",
-        DataType::Int64 => "l",
-        DataType::UInt64 => "L",
-        DataType::Float16 => "e",
-        DataType::Float32 => "f",
-        DataType::Float64 => "g",
-        DataType::Binary => "z",
-        DataType::LargeBinary => "Z",
-        DataType::Utf8 => "u",
-        DataType::LargeUtf8 => "U",
-        DataType::Date32 => "tdD",
-        DataType::Date64 => "tdm",
-        DataType::Time32(TimeUnit::Second) => "tts",
-        DataType::Time32(TimeUnit::Millisecond) => "ttm",
-        DataType::Time64(TimeUnit::Microsecond) => "ttu",
-        DataType::Time64(TimeUnit::Nanosecond) => "ttn",
-        DataType::List(_) => "+l",
-        DataType::LargeList(_) => "+L",
-        z => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" is still not supported in Rust implementation",
-                z
-            )))
-        }
-    }
-    .to_string())
-}
-
-// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
-// This is set by the Arrow specification
-fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
-    Ok(match (data_type, i) {
-        // the null buffer is bit sized
-        (_, 0) => 1,
-        // primitive types first buffer's size is given by the native types
-        (DataType::Boolean, 1) => 1,
-        (DataType::UInt8, 1) => size_of::<u8>() * 8,
-        (DataType::UInt16, 1) => size_of::<u16>() * 8,
-        (DataType::UInt32, 1) => size_of::<u32>() * 8,
-        (DataType::UInt64, 1) => size_of::<u64>() * 8,
-        (DataType::Int8, 1) => size_of::<i8>() * 8,
-        (DataType::Int16, 1) => size_of::<i16>() * 8,
-        (DataType::Int32, 1) | (DataType::Date32, 1) | (DataType::Time32(_), 1) => size_of::<i32>() * 8,
-        (DataType::Int64, 1) | (DataType::Date64, 1) | (DataType::Time64(_), 1) => size_of::<i64>() * 8,
-        (DataType::Float32, 1) => size_of::<f32>() * 8,
-        (DataType::Float64, 1) => size_of::<f64>() * 8,
-        // primitive types have a single buffer
-        (DataType::Boolean, _) |
-        (DataType::UInt8, _) |
-        (DataType::UInt16, _) |
-        (DataType::UInt32, _) |
-        (DataType::UInt64, _) |
-        (DataType::Int8, _) |
-        (DataType::Int16, _) |
-        (DataType::Int32, _) | (DataType::Date32, _) | (DataType::Time32(_), _) |
-        (DataType::Int64, _) | (DataType::Date64, _) | (DataType::Time64(_), _) |
-        (DataType::Float32, _) |
-        (DataType::Float64, _) => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" expects 2 buffers, but requested {}. Please verify that the C data interface is correctly implemented.",
-                data_type, i
-            )))
-        }
-        // Variable-sized binaries: have two buffers.
-        // "small": first buffer is i32, second is in bytes
-        (DataType::Utf8, 1) | (DataType::Binary, 1) | (DataType::List(_), 1) => size_of::<i32>() * 8,
-        (DataType::Utf8, 2) | (DataType::Binary, 2) | (DataType::List(_), 2) => size_of::<u8>() * 8,
-        (DataType::Utf8, _) | (DataType::Binary, _) | (DataType::List(_), _)=> {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" expects 3 buffers, but requested {}. Please verify that the C data interface is correctly implemented.",
-                data_type, i
-            )))
-        }
-        // Variable-sized binaries: have two buffers.
-        // LargeUtf8: first buffer is i64, second is in bytes
-        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => size_of::<i64>() * 8,
-        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> size_of::<u8>() * 8,
-        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" expects 3 buffers, but requested {}. Please verify that the C data interface is correctly implemented.",
-                data_type, i
-            )))
-        }
-        _ => {
-            return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{:?}\" is still not supported in Rust implementation",
-                data_type
-            )))
-        }
-    })
-}
-
-/// ABI-compatible struct for ArrowArray from C Data Interface
-/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
-/// This was created by bindgen
-#[repr(C)]
-#[derive(Debug)]
-pub struct FFI_ArrowArray {
-    pub(crate) length: i64,
-    pub(crate) null_count: i64,
-    pub(crate) offset: i64,
-    pub(crate) n_buffers: i64,
-    pub(crate) n_children: i64,
-    pub(crate) buffers: *mut *const ::std::os::raw::c_void,
-    children: *mut *mut FFI_ArrowArray,
-    dictionary: *mut FFI_ArrowArray,
-    release: ::std::option::Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
-    // When exported, this MUST contain everything that is owned by this array.
-    // for example, any buffer pointed to in `buffers` must be here, as well as the `buffers` pointer
-    // itself.
-    // In other words, everything in [FFI_ArrowArray] must be owned by `private_data` and can assume
-    // that they do not outlive `private_data`.
-    private_data: *mut ::std::os::raw::c_void,
-}
-
-// callback used to drop [FFI_ArrowArray] when it is exported
-unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
-    if array.is_null() {
-        return;
-    }
-    let array = &mut *array;
-    // take ownership of `private_data`, therefore dropping it
-    Box::from_raw(array.private_data as *mut PrivateData);
-
-    array.release = None;
-}
-
-struct PrivateData {
-    buffers: Vec<Option<Buffer>>,
-    buffers_ptr: Box<[*const std::os::raw::c_void]>,
-    children: Box<[*mut FFI_ArrowArray]>,
-}
-
-impl FFI_ArrowArray {
-    /// creates a new `FFI_ArrowArray` from existing data.
-    /// # Safety
-    /// This method releases `buffers`. Consumers of this struct *must* call `release` before
-    /// releasing this struct, or contents in `buffers` leak.
-    unsafe fn new(
-        length: i64,
-        null_count: i64,
-        offset: i64,
-        n_buffers: i64,
-        buffers: Vec<Option<Buffer>>,
-        children: Vec<*mut FFI_ArrowArray>,
-    ) -> Self {
-        let buffers_ptr = buffers
-            .iter()
-            .map(|maybe_buffer| match maybe_buffer {
-                // note that `raw_data` takes into account the buffer's offset
-                Some(b) => b.as_ptr() as *const std::os::raw::c_void,
-                None => std::ptr::null(),
-            })
-            .collect::<Box<[_]>>();
-        let pointer = buffers_ptr.as_ptr() as *mut *const std::ffi::c_void;
-
-        let children = children.into_boxed_slice();
-        let children_ptr = children.as_ptr() as *mut *mut FFI_ArrowArray;
-        let n_children = children.len() as i64;
-
-        // create the private data owning everything.
-        // any other data must be added here, e.g. via a struct, to track lifetime.
-        let private_data = Box::new(PrivateData {
-            buffers,
-            buffers_ptr,
-            children,
-        });
-
-        Self {
-            length,
-            null_count,
-            offset,
-            n_buffers,
-            n_children,
-            buffers: pointer,
-            children: children_ptr,
-            dictionary: std::ptr::null_mut(),
-            release: Some(release_array),
-            private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
-        }
-    }
-
-    // create an empty `FFI_ArrowArray`, which can be used to import data into
-    fn empty() -> Self {
-        Self {
-            length: 0,
-            null_count: 0,
-            offset: 0,
-            n_buffers: 0,
-            n_children: 0,
-            buffers: std::ptr::null_mut(),
-            children: std::ptr::null_mut(),
-            dictionary: std::ptr::null_mut(),
-            release: None,
-            private_data: std::ptr::null_mut(),
-        }
-    }
-}
-
-/// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer).
-/// `bits` is the number of bits that the native type of this buffer has.
-/// The size of the buffer will be `ceil(self.length * bits, 8)`.
-/// # Panic
-/// This function panics if `i` is larger or equal to `n_buffers`.
-/// # Safety
-/// This function assumes that `ceil(self.length * bits, 8)` is the size of the buffer
-unsafe fn create_buffer(
-    array: Arc<FFI_ArrowArray>,
-    index: usize,
-    len: usize,
-) -> Option<Buffer> {
-    if array.buffers.is_null() {
-        return None;
-    }
-    let buffers = array.buffers as *mut *const u8;
-
-    assert!(index < array.n_buffers as usize);
-    let ptr = *buffers.add(index);
-
-    NonNull::new(ptr as *mut u8).map(|ptr| Buffer::from_unowned(ptr, len, array))
-}
-
-unsafe fn create_child_arrays(
-    array: Arc<FFI_ArrowArray>,
-    schema: Arc<FFI_ArrowSchema>,
-) -> Result<Vec<ArrayData>> {
-    (0..array.n_children as usize)
-        .map(|i| {
-            let arr_ptr = *array.children.add(i);
-            let schema_ptr = *schema.children.add(i);
-            let arrow_arr = ArrowArray::try_from_raw(
-                arr_ptr as *const FFI_ArrowArray,
-                schema_ptr as *const FFI_ArrowSchema,
-            )?;
-            ArrayData::try_from(arrow_arr)
-        })
-        .collect()
-}
-
-impl Drop for FFI_ArrowArray {
-    fn drop(&mut self) {
-        match self.release {
-            None => (),
-            Some(release) => unsafe { release(self) },
-        };
-    }
-}
-
-/// Struct used to move an Array from and to the C Data Interface.
-/// Its main responsibility is to expose functionality that requires
-/// both [FFI_ArrowArray] and [FFI_ArrowSchema].
-///
-/// This struct has two main paths:
-///
-/// ## Import from the C Data Interface
-/// * [ArrowArray::empty] to allocate memory to be filled by an external call
-/// * [ArrowArray::try_from_raw] to consume two non-null allocated pointers
-/// ## Export to the C Data Interface
-/// * [ArrowArray::try_new] to create a new [ArrowArray] from Rust-specific information
-/// * [ArrowArray::into_raw] to expose two pointers for [FFI_ArrowArray] and [FFI_ArrowSchema].
-///
-/// # Safety
-/// Whoever creates this struct is responsible for releasing their resources. Specifically,
-/// consumers *must* call [ArrowArray::into_raw] and take ownership of the individual pointers,
-/// calling [FFI_ArrowArray::release] and [FFI_ArrowSchema::release] accordingly.
-///
-/// Furthermore, this struct assumes that the incoming data agrees with the C data interface.
-#[derive(Debug)]
-pub struct ArrowArray {
-    // these are ref-counted because they can be shared by multiple buffers.
-    array: Arc<FFI_ArrowArray>,
-    schema: Arc<FFI_ArrowSchema>,
-}
-
-impl ArrowArray {
-    /// creates a new `ArrowArray`. This is used to export to the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    #[allow(clippy::too_many_arguments)]
-    pub unsafe fn try_new(
-        data_type: &DataType,
-        len: usize,
-        null_count: usize,
-        null_buffer: Option<Buffer>,
-        offset: usize,
-        buffers: Vec<Buffer>,
-        child_data: Vec<ArrowArray>,
-        nullable: bool,
-    ) -> Result<Self> {
-        let format = from_datatype(data_type)?;
-        // * insert the null buffer at the start
-        // * make all others `Option<Buffer>`.
-        let new_buffers = iter::once(null_buffer)
-            .chain(buffers.iter().map(|b| Some(b.clone())))
-            .collect::<Vec<_>>();
-
-        let mut ffi_arrow_arrays = Vec::with_capacity(child_data.len());
-        let mut ffi_arrow_schemas = Vec::with_capacity(child_data.len());
-
-        child_data.into_iter().for_each(|arrow_arr| {
-            let (arr, schema) = ArrowArray::into_raw(arrow_arr);
-            ffi_arrow_arrays.push(arr as *mut FFI_ArrowArray);
-            ffi_arrow_schemas.push(schema as *mut FFI_ArrowSchema);
-        });
-
-        let schema = Arc::new(FFI_ArrowSchema::new(&format, ffi_arrow_schemas, nullable));
-        let array = Arc::new(FFI_ArrowArray::new(
-            len as i64,
-            null_count as i64,
-            offset as i64,
-            new_buffers.len() as i64,
-            new_buffers,
-            ffi_arrow_arrays,
-        ));
-
-        Ok(ArrowArray { array, schema })
-    }
-
-    /// creates a new [ArrowArray] from two pointers. Used to import from the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    /// # Error
-    /// Errors if any of the pointers is null
-    pub unsafe fn try_from_raw(
-        array: *const FFI_ArrowArray,
-        schema: *const FFI_ArrowSchema,
-    ) -> Result<Self> {
-        if array.is_null() || schema.is_null() {
-            return Err(ArrowError::MemoryError(
-                "At least one of the pointers passed to `try_from_raw` is null"
-                    .to_string(),
-            ));
-        };
-        Ok(Self {
-            array: Arc::from_raw(array as *mut FFI_ArrowArray),
-            schema: Arc::from_raw(schema as *mut FFI_ArrowSchema),
-        })
-    }
-
-    /// creates a new empty [ArrowArray]. Used to import from the C Data Interface.
-    /// # Safety
-    /// See safety of [ArrowArray]
-    pub unsafe fn empty() -> Self {
-        let schema = Arc::new(FFI_ArrowSchema::empty());
-        let array = Arc::new(FFI_ArrowArray::empty());
-        ArrowArray { array, schema }
-    }
-
-    /// exports [ArrowArray] to the C Data Interface
-    pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const FFI_ArrowSchema) {
-        (Arc::into_raw(this.array), Arc::into_raw(this.schema))
-    }
-
-    /// returns the null bit buffer.
-    /// Rust implementation uses a buffer that is not part of the array of buffers.
-    /// The C Data interface's null buffer is part of the array of buffers.
-    pub fn null_bit_buffer(&self) -> Option<Buffer> {
-        // similar to `self.buffer_len(0)`, but without `Result`.
-        let buffer_len = bit_util::ceil(self.array.length as usize, 8);
-
-        unsafe { create_buffer(self.array.clone(), 0, buffer_len) }
-    }
-
-    /// Returns the length, in bytes, of the buffer `i` (indexed according to the C data interface)
-    // Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
-    // for variable-sized buffers, such as the second buffer of a stringArray, we need
-    // to fetch offset buffer's len to build the second buffer.
-    fn buffer_len(&self, i: usize) -> Result<usize> {
-        // Inner type is not important for buffer length.
-        let data_type = &self.data_type(None)?;
-
-        Ok(match (data_type, i) {
-            (DataType::Utf8, 1)
-            | (DataType::LargeUtf8, 1)
-            | (DataType::Binary, 1)
-            | (DataType::LargeBinary, 1)
-            | (DataType::List(_), 1)
-            | (DataType::LargeList(_), 1) => {
-                // the len of the offset buffer (buffer 1) equals length + 1
-                let bits = bit_width(data_type, i)?;
-                debug_assert_eq!(bits % 8, 0);
-                (self.array.length as usize + 1) * (bits / 8)
-            }
-            (DataType::Utf8, 2) | (DataType::Binary, 2) | (DataType::List(_), 2) => {
-                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
-                let len = self.buffer_len(1)?;
-                // first buffer is the null buffer => add(1)
-                // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
-                #[allow(clippy::cast_ptr_alignment)]
-                let offset_buffer = unsafe {
-                    *(self.array.buffers as *mut *const u8).add(1) as *const i32
-                };
-                // get last offset
-                (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
-            }
-            (DataType::LargeUtf8, 2)
-            | (DataType::LargeBinary, 2)
-            | (DataType::LargeList(_), 2) => {
-                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
-                let len = self.buffer_len(1)?;
-                // first buffer is the null buffer => add(1)
-                // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
-                #[allow(clippy::cast_ptr_alignment)]
-                let offset_buffer = unsafe {
-                    *(self.array.buffers as *mut *const u8).add(1) as *const i64
-                };
-                // get last offset
-                (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
-            }
-            // buffer len of primitive types
-            _ => {
-                let bits = bit_width(data_type, i)?;
-                bit_util::ceil(self.array.length as usize * bits, 8)
-            }
-        })
-    }
-
-    /// returns all buffers, as organized by Rust (i.e. null buffer is skipped)
-    pub fn buffers(&self) -> Result<Vec<Buffer>> {
-        (0..self.array.n_buffers - 1)
-            .map(|index| {
-                // + 1: skip null buffer
-                let index = (index + 1) as usize;
-
-                let len = self.buffer_len(index)?;
-
-                unsafe { create_buffer(self.array.clone(), index, len) }.ok_or_else(
-                    || {
-                        ArrowError::CDataInterface(format!(
-                            "The external buffer at position {} is null.",
-                            index - 1
-                        ))
-                    },
-                )
-            })
-            .collect()
-    }
-
-    /// returns the child data of this array
-    pub fn children(&self) -> Result<Vec<ArrayData>> {
-        unsafe { create_child_arrays(self.array.clone(), self.schema.clone()) }
-    }
-
-    /// the length of the array
-    pub fn len(&self) -> usize {
-        self.array.length as usize
-    }
-
-    /// whether the array is empty
-    pub fn is_empty(&self) -> bool {
-        self.array.length == 0
-    }
-
-    /// the offset of the array
-    pub fn offset(&self) -> usize {
-        self.array.offset as usize
-    }
-
-    /// the null count of the array
-    pub fn null_count(&self) -> usize {
-        self.array.null_count as usize
-    }
-
-    /// the data_type as declared in the schema
-    pub fn data_type(&self, child_type: Option<DataType>) -> Result<DataType> {
-        to_datatype(self.schema.format(), child_type, self.schema.as_ref())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::array::{
-        make_array, Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray,
-        GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
-        OffsetSizeTrait, StringOffsetSizeTrait, Time32MillisecondArray,
-    };
-    use crate::compute::kernels;
-    use crate::datatypes::Field;
-    use std::convert::TryFrom;
-    use std::iter::FromIterator;
-
-    #[test]
-    fn test_round_trip() -> Result<()> {
-        // create an array natively
-        let array = Int32Array::from(vec![1, 2, 3]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
-        let array = kernels::arithmetic::add(&array, &array).unwrap();
-
-        // verify
-        assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
-
-        // (drop/release)
-        Ok(())
-    }
-    // case with nulls is tested in the docs, through the example on this module.
-
-    fn test_generic_string<Offset: StringOffsetSizeTrait>() -> Result<()> {
-        // create an array natively
-        let array =
-            GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap();
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericStringArray<Offset>>()
-            .unwrap();
-
-        // verify
-        let expected = GenericStringArray::<Offset>::from(vec![
-            Some("a"),
-            None,
-            Some("aaa"),
-            Some("a"),
-            None,
-            Some("aaa"),
-        ]);
-        assert_eq!(array, &expected);
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_string() -> Result<()> {
-        test_generic_string::<i32>()
-    }
-
-    #[test]
-    fn test_large_string() -> Result<()> {
-        test_generic_string::<i64>()
-    }
-
-    fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
-        // Construct a value array
-        let value_data = ArrayData::builder(DataType::Int32)
-            .len(8)
-            .add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
-            .build();
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        let value_offsets = Buffer::from_iter(
-            [0usize, 3, 6, 8]
-                .iter()
-                .map(|i| Offset::from_usize(*i).unwrap()),
-        );
-
-        // Construct a list array from the above two
-        let list_data_type = match std::mem::size_of::<Offset>() {
-            4 => DataType::List(Box::new(Field::new("item", DataType::Int32, false))),
-            _ => {
-                DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)))
-            }
-        };
-
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build();
-
-        // create an array natively
-        let array = GenericListArray::<Offset>::from(list_data.clone());
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // downcast
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericListArray<Offset>>()
-            .unwrap();
-
-        dbg!(&array);
-
-        // verify
-        let expected = GenericListArray::<Offset>::from(list_data);
-        assert_eq!(&array.value(0), &expected.value(0));
-        assert_eq!(&array.value(1), &expected.value(1));
-        assert_eq!(&array.value(2), &expected.value(2));
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_list() -> Result<()> {
-        test_generic_list::<i32>()
-    }
-
-    #[test]
-    fn test_large_list() -> Result<()> {
-        test_generic_list::<i64>()
-    }
-
-    fn test_generic_binary<Offset: BinaryOffsetSizeTrait>() -> Result<()> {
-        // create an array natively
-        let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
-        let array = GenericBinaryArray::<Offset>::from(array);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap();
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericBinaryArray<Offset>>()
-            .unwrap();
-
-        // verify
-        let expected: Vec<Option<&[u8]>> = vec![
-            Some(b"a"),
-            None,
-            Some(b"aaa"),
-            Some(b"a"),
-            None,
-            Some(b"aaa"),
-        ];
-        let expected = GenericBinaryArray::<Offset>::from(expected);
-        assert_eq!(array, &expected);
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_binary() -> Result<()> {
-        test_generic_binary::<i32>()
-    }
-
-    #[test]
-    fn test_large_binary() -> Result<()> {
-        test_generic_binary::<i64>()
-    }
-
-    #[test]
-    fn test_bool() -> Result<()> {
-        // create an array natively
-        let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
-        let array = kernels::boolean::not(&array)?;
-
-        // verify
-        assert_eq!(
-            array,
-            BooleanArray::from(vec![None, Some(false), Some(true)])
-        );
-
-        // (drop/release)
-        Ok(())
-    }
-
-    #[test]
-    fn test_time32() -> Result<()> {
-        // create an array natively
-        let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
-
-        // export it
-        let array = ArrowArray::try_from(array.data().clone())?;
-
-        // (simulate consumer) import it
-        let data = ArrayData::try_from(array)?;
-        let array = make_array(data);
-
-        // perform some operation
-        let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).unwrap();
-        let array = array
-            .as_any()
-            .downcast_ref::<Time32MillisecondArray>()
-            .unwrap();
-
-        // verify
-        assert_eq!(
-            array,
-            &Time32MillisecondArray::from(vec![
-                None,
-                Some(1),
-                Some(2),
-                None,
-                Some(1),
-                Some(2)
-            ])
-        );
-
-        // (drop/release)
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/ipc/convert.rs b/rust/arrow/src/ipc/convert.rs
deleted file mode 100644
index 59d4d0b9089..00000000000
--- a/rust/arrow/src/ipc/convert.rs
+++ /dev/null
@@ -1,871 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities for converting between IPC types and native Arrow types
-
-use crate::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
-use crate::error::{ArrowError, Result};
-use crate::ipc;
-
-use flatbuffers::{
-    FlatBufferBuilder, ForwardsUOffset, UnionWIPOffset, Vector, WIPOffset,
-};
-use std::collections::{BTreeMap, HashMap};
-
-use DataType::*;
-
-/// Serialize a schema in IPC format
-pub fn schema_to_fb(schema: &Schema) -> FlatBufferBuilder {
-    let mut fbb = FlatBufferBuilder::new();
-
-    let root = schema_to_fb_offset(&mut fbb, schema);
-
-    fbb.finish(root, None);
-
-    fbb
-}
-
-pub fn schema_to_fb_offset<'a>(
-    fbb: &mut FlatBufferBuilder<'a>,
-    schema: &Schema,
-) -> WIPOffset<ipc::Schema<'a>> {
-    let mut fields = vec![];
-    for field in schema.fields() {
-        let fb_field = build_field(fbb, field);
-        fields.push(fb_field);
-    }
-
-    let mut custom_metadata = vec![];
-    for (k, v) in schema.metadata() {
-        let fb_key_name = fbb.create_string(k.as_str());
-        let fb_val_name = fbb.create_string(v.as_str());
-
-        let mut kv_builder = ipc::KeyValueBuilder::new(fbb);
-        kv_builder.add_key(fb_key_name);
-        kv_builder.add_value(fb_val_name);
-        custom_metadata.push(kv_builder.finish());
-    }
-
-    let fb_field_list = fbb.create_vector(&fields);
-    let fb_metadata_list = fbb.create_vector(&custom_metadata);
-
-    let mut builder = ipc::SchemaBuilder::new(fbb);
-    builder.add_fields(fb_field_list);
-    builder.add_custom_metadata(fb_metadata_list);
-    builder.finish()
-}
-
-/// Convert an IPC Field to Arrow Field
-impl<'a> From<ipc::Field<'a>> for Field {
-    fn from(field: ipc::Field) -> Field {
-        let mut arrow_field = if let Some(dictionary) = field.dictionary() {
-            Field::new_dict(
-                field.name().unwrap(),
-                get_data_type(field, true),
-                field.nullable(),
-                dictionary.id(),
-                dictionary.isOrdered(),
-            )
-        } else {
-            Field::new(
-                field.name().unwrap(),
-                get_data_type(field, true),
-                field.nullable(),
-            )
-        };
-
-        let mut metadata = None;
-        if let Some(list) = field.custom_metadata() {
-            let mut metadata_map = BTreeMap::default();
-            for kv in list {
-                if let (Some(k), Some(v)) = (kv.key(), kv.value()) {
-                    metadata_map.insert(k.to_string(), v.to_string());
-                }
-            }
-            metadata = Some(metadata_map);
-        }
-
-        arrow_field.set_metadata(metadata);
-        arrow_field
-    }
-}
-
-/// Deserialize a Schema table from IPC format to Schema data type
-pub fn fb_to_schema(fb: ipc::Schema) -> Schema {
-    let mut fields: Vec<Field> = vec![];
-    let c_fields = fb.fields().unwrap();
-    let len = c_fields.len();
-    for i in 0..len {
-        let c_field: ipc::Field = c_fields.get(i);
-        match c_field.type_type() {
-            ipc::Type::Decimal if fb.endianness() == ipc::Endianness::Big => {
-                unimplemented!("Big Endian is not supported for Decimal!")
-            }
-            _ => (),
-        };
-        fields.push(c_field.into());
-    }
-
-    let mut metadata: HashMap<String, String> = HashMap::default();
-    if let Some(md_fields) = fb.custom_metadata() {
-        let len = md_fields.len();
-        for i in 0..len {
-            let kv = md_fields.get(i);
-            let k_str = kv.key();
-            let v_str = kv.value();
-            if let Some(k) = k_str {
-                if let Some(v) = v_str {
-                    metadata.insert(k.to_string(), v.to_string());
-                }
-            }
-        }
-    }
-    Schema::new_with_metadata(fields, metadata)
-}
-
-/// Deserialize an IPC message into a schema
-pub fn schema_from_bytes(bytes: &[u8]) -> Result<Schema> {
-    if let Ok(ipc) = ipc::root_as_message(bytes) {
-        if let Some(schema) = ipc.header_as_schema().map(fb_to_schema) {
-            Ok(schema)
-        } else {
-            Err(ArrowError::IoError(
-                "Unable to get head as schema".to_string(),
-            ))
-        }
-    } else {
-        Err(ArrowError::IoError(
-            "Unable to get root as message".to_string(),
-        ))
-    }
-}
-
-/// Get the Arrow data type from the flatbuffer Field table
-pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataType {
-    if let Some(dictionary) = field.dictionary() {
-        if may_be_dictionary {
-            let int = dictionary.indexType().unwrap();
-            let index_type = match (int.bitWidth(), int.is_signed()) {
-                (8, true) => DataType::Int8,
-                (8, false) => DataType::UInt8,
-                (16, true) => DataType::Int16,
-                (16, false) => DataType::UInt16,
-                (32, true) => DataType::Int32,
-                (32, false) => DataType::UInt32,
-                (64, true) => DataType::Int64,
-                (64, false) => DataType::UInt64,
-                _ => panic!("Unexpected bitwidth and signed"),
-            };
-            return DataType::Dictionary(
-                Box::new(index_type),
-                Box::new(get_data_type(field, false)),
-            );
-        }
-    }
-
-    match field.type_type() {
-        ipc::Type::Null => DataType::Null,
-        ipc::Type::Bool => DataType::Boolean,
-        ipc::Type::Int => {
-            let int = field.type_as_int().unwrap();
-            match (int.bitWidth(), int.is_signed()) {
-                (8, true) => DataType::Int8,
-                (8, false) => DataType::UInt8,
-                (16, true) => DataType::Int16,
-                (16, false) => DataType::UInt16,
-                (32, true) => DataType::Int32,
-                (32, false) => DataType::UInt32,
-                (64, true) => DataType::Int64,
-                (64, false) => DataType::UInt64,
-                z => panic!(
-                    "Int type with bit width of {} and signed of {} not supported",
-                    z.0, z.1
-                ),
-            }
-        }
-        ipc::Type::Binary => DataType::Binary,
-        ipc::Type::LargeBinary => DataType::LargeBinary,
-        ipc::Type::Utf8 => DataType::Utf8,
-        ipc::Type::LargeUtf8 => DataType::LargeUtf8,
-        ipc::Type::FixedSizeBinary => {
-            let fsb = field.type_as_fixed_size_binary().unwrap();
-            DataType::FixedSizeBinary(fsb.byteWidth())
-        }
-        ipc::Type::FloatingPoint => {
-            let float = field.type_as_floating_point().unwrap();
-            match float.precision() {
-                ipc::Precision::HALF => DataType::Float16,
-                ipc::Precision::SINGLE => DataType::Float32,
-                ipc::Precision::DOUBLE => DataType::Float64,
-                z => panic!("FloatingPoint type with precision of {:?} not supported", z),
-            }
-        }
-        ipc::Type::Date => {
-            let date = field.type_as_date().unwrap();
-            match date.unit() {
-                ipc::DateUnit::DAY => DataType::Date32,
-                ipc::DateUnit::MILLISECOND => DataType::Date64,
-                z => panic!("Date type with unit of {:?} not supported", z),
-            }
-        }
-        ipc::Type::Time => {
-            let time = field.type_as_time().unwrap();
-            match (time.bitWidth(), time.unit()) {
-                (32, ipc::TimeUnit::SECOND) => DataType::Time32(TimeUnit::Second),
-                (32, ipc::TimeUnit::MILLISECOND) => {
-                    DataType::Time32(TimeUnit::Millisecond)
-                }
-                (64, ipc::TimeUnit::MICROSECOND) => {
-                    DataType::Time64(TimeUnit::Microsecond)
-                }
-                (64, ipc::TimeUnit::NANOSECOND) => DataType::Time64(TimeUnit::Nanosecond),
-                z => panic!(
-                    "Time type with bit width of {} and unit of {:?} not supported",
-                    z.0, z.1
-                ),
-            }
-        }
-        ipc::Type::Timestamp => {
-            let timestamp = field.type_as_timestamp().unwrap();
-            let timezone: Option<String> = timestamp.timezone().map(|tz| tz.to_string());
-            match timestamp.unit() {
-                ipc::TimeUnit::SECOND => DataType::Timestamp(TimeUnit::Second, timezone),
-                ipc::TimeUnit::MILLISECOND => {
-                    DataType::Timestamp(TimeUnit::Millisecond, timezone)
-                }
-                ipc::TimeUnit::MICROSECOND => {
-                    DataType::Timestamp(TimeUnit::Microsecond, timezone)
-                }
-                ipc::TimeUnit::NANOSECOND => {
-                    DataType::Timestamp(TimeUnit::Nanosecond, timezone)
-                }
-                z => panic!("Timestamp type with unit of {:?} not supported", z),
-            }
-        }
-        ipc::Type::Interval => {
-            let interval = field.type_as_interval().unwrap();
-            match interval.unit() {
-                ipc::IntervalUnit::YEAR_MONTH => {
-                    DataType::Interval(IntervalUnit::YearMonth)
-                }
-                ipc::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime),
-                z => panic!("Interval type with unit of {:?} unsupported", z),
-            }
-        }
-        ipc::Type::Duration => {
-            let duration = field.type_as_duration().unwrap();
-            match duration.unit() {
-                ipc::TimeUnit::SECOND => DataType::Duration(TimeUnit::Second),
-                ipc::TimeUnit::MILLISECOND => DataType::Duration(TimeUnit::Millisecond),
-                ipc::TimeUnit::MICROSECOND => DataType::Duration(TimeUnit::Microsecond),
-                ipc::TimeUnit::NANOSECOND => DataType::Duration(TimeUnit::Nanosecond),
-                z => panic!("Duration type with unit of {:?} unsupported", z),
-            }
-        }
-        ipc::Type::List => {
-            let children = field.children().unwrap();
-            if children.len() != 1 {
-                panic!("expect a list to have one child")
-            }
-            DataType::List(Box::new(children.get(0).into()))
-        }
-        ipc::Type::LargeList => {
-            let children = field.children().unwrap();
-            if children.len() != 1 {
-                panic!("expect a large list to have one child")
-            }
-            DataType::LargeList(Box::new(children.get(0).into()))
-        }
-        ipc::Type::FixedSizeList => {
-            let children = field.children().unwrap();
-            if children.len() != 1 {
-                panic!("expect a list to have one child")
-            }
-            let fsl = field.type_as_fixed_size_list().unwrap();
-            DataType::FixedSizeList(Box::new(children.get(0).into()), fsl.listSize())
-        }
-        ipc::Type::Struct_ => {
-            let mut fields = vec![];
-            if let Some(children) = field.children() {
-                for i in 0..children.len() {
-                    fields.push(children.get(i).into());
-                }
-            };
-
-            DataType::Struct(fields)
-        }
-        ipc::Type::Decimal => {
-            let fsb = field.type_as_decimal().unwrap();
-            DataType::Decimal(fsb.precision() as usize, fsb.scale() as usize)
-        }
-        t => unimplemented!("Type {:?} not supported", t),
-    }
-}
-
-pub(crate) struct FBFieldType<'b> {
-    pub(crate) type_type: ipc::Type,
-    pub(crate) type_: WIPOffset<UnionWIPOffset>,
-    pub(crate) children: Option<WIPOffset<Vector<'b, ForwardsUOffset<ipc::Field<'b>>>>>,
-}
-
-/// Create an IPC Field from an Arrow Field
-pub(crate) fn build_field<'a>(
-    fbb: &mut FlatBufferBuilder<'a>,
-    field: &Field,
-) -> WIPOffset<ipc::Field<'a>> {
-    // Optional custom metadata.
-    let mut fb_metadata = None;
-    if let Some(metadata) = field.metadata() {
-        if !metadata.is_empty() {
-            let mut kv_vec = vec![];
-            for (k, v) in metadata {
-                let kv_args = ipc::KeyValueArgs {
-                    key: Some(fbb.create_string(k.as_str())),
-                    value: Some(fbb.create_string(v.as_str())),
-                };
-                let kv_offset = ipc::KeyValue::create(fbb, &kv_args);
-                kv_vec.push(kv_offset);
-            }
-            fb_metadata = Some(fbb.create_vector(&kv_vec));
-        }
-    };
-
-    let fb_field_name = fbb.create_string(field.name().as_str());
-    let field_type = get_fb_field_type(field.data_type(), field.is_nullable(), fbb);
-
-    let fb_dictionary = if let Dictionary(index_type, _) = field.data_type() {
-        Some(get_fb_dictionary(
-            index_type,
-            field
-                .dict_id()
-                .expect("All Dictionary types have `dict_id`"),
-            field
-                .dict_is_ordered()
-                .expect("All Dictionary types have `dict_is_ordered`"),
-            fbb,
-        ))
-    } else {
-        None
-    };
-
-    let mut field_builder = ipc::FieldBuilder::new(fbb);
-    field_builder.add_name(fb_field_name);
-    if let Some(dictionary) = fb_dictionary {
-        field_builder.add_dictionary(dictionary)
-    }
-    field_builder.add_type_type(field_type.type_type);
-    field_builder.add_nullable(field.is_nullable());
-    match field_type.children {
-        None => {}
-        Some(children) => field_builder.add_children(children),
-    };
-    field_builder.add_type_(field_type.type_);
-
-    if let Some(fb_metadata) = fb_metadata {
-        field_builder.add_custom_metadata(fb_metadata);
-    }
-
-    field_builder.finish()
-}
-
-/// Get the IPC type of a data type
-pub(crate) fn get_fb_field_type<'a>(
-    data_type: &DataType,
-    is_nullable: bool,
-    fbb: &mut FlatBufferBuilder<'a>,
-) -> FBFieldType<'a> {
-    // some IPC implementations expect an empty list for child data, instead of a null value.
-    // An empty field list is thus returned for primitive types
-    let empty_fields: Vec<WIPOffset<ipc::Field>> = vec![];
-    match data_type {
-        Null => FBFieldType {
-            type_type: ipc::Type::Null,
-            type_: ipc::NullBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        Boolean => FBFieldType {
-            type_type: ipc::Type::Bool,
-            type_: ipc::BoolBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        UInt8 | UInt16 | UInt32 | UInt64 => {
-            let children = fbb.create_vector(&empty_fields[..]);
-            let mut builder = ipc::IntBuilder::new(fbb);
-            builder.add_is_signed(false);
-            match data_type {
-                UInt8 => builder.add_bitWidth(8),
-                UInt16 => builder.add_bitWidth(16),
-                UInt32 => builder.add_bitWidth(32),
-                UInt64 => builder.add_bitWidth(64),
-                _ => {}
-            };
-            FBFieldType {
-                type_type: ipc::Type::Int,
-                type_: builder.finish().as_union_value(),
-                children: Some(children),
-            }
-        }
-        Int8 | Int16 | Int32 | Int64 => {
-            let children = fbb.create_vector(&empty_fields[..]);
-            let mut builder = ipc::IntBuilder::new(fbb);
-            builder.add_is_signed(true);
-            match data_type {
-                Int8 => builder.add_bitWidth(8),
-                Int16 => builder.add_bitWidth(16),
-                Int32 => builder.add_bitWidth(32),
-                Int64 => builder.add_bitWidth(64),
-                _ => {}
-            };
-            FBFieldType {
-                type_type: ipc::Type::Int,
-                type_: builder.finish().as_union_value(),
-                children: Some(children),
-            }
-        }
-        Float16 | Float32 | Float64 => {
-            let children = fbb.create_vector(&empty_fields[..]);
-            let mut builder = ipc::FloatingPointBuilder::new(fbb);
-            match data_type {
-                Float16 => builder.add_precision(ipc::Precision::HALF),
-                Float32 => builder.add_precision(ipc::Precision::SINGLE),
-                Float64 => builder.add_precision(ipc::Precision::DOUBLE),
-                _ => {}
-            };
-            FBFieldType {
-                type_type: ipc::Type::FloatingPoint,
-                type_: builder.finish().as_union_value(),
-                children: Some(children),
-            }
-        }
-        Binary => FBFieldType {
-            type_type: ipc::Type::Binary,
-            type_: ipc::BinaryBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        LargeBinary => FBFieldType {
-            type_type: ipc::Type::LargeBinary,
-            type_: ipc::LargeBinaryBuilder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        Utf8 => FBFieldType {
-            type_type: ipc::Type::Utf8,
-            type_: ipc::Utf8Builder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        LargeUtf8 => FBFieldType {
-            type_type: ipc::Type::LargeUtf8,
-            type_: ipc::LargeUtf8Builder::new(fbb).finish().as_union_value(),
-            children: Some(fbb.create_vector(&empty_fields[..])),
-        },
-        FixedSizeBinary(len) => {
-            let mut builder = ipc::FixedSizeBinaryBuilder::new(fbb);
-            builder.add_byteWidth(*len as i32);
-            FBFieldType {
-                type_type: ipc::Type::FixedSizeBinary,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Date32 => {
-            let mut builder = ipc::DateBuilder::new(fbb);
-            builder.add_unit(ipc::DateUnit::DAY);
-            FBFieldType {
-                type_type: ipc::Type::Date,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Date64 => {
-            let mut builder = ipc::DateBuilder::new(fbb);
-            builder.add_unit(ipc::DateUnit::MILLISECOND);
-            FBFieldType {
-                type_type: ipc::Type::Date,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Time32(unit) | Time64(unit) => {
-            let mut builder = ipc::TimeBuilder::new(fbb);
-            match unit {
-                TimeUnit::Second => {
-                    builder.add_bitWidth(32);
-                    builder.add_unit(ipc::TimeUnit::SECOND);
-                }
-                TimeUnit::Millisecond => {
-                    builder.add_bitWidth(32);
-                    builder.add_unit(ipc::TimeUnit::MILLISECOND);
-                }
-                TimeUnit::Microsecond => {
-                    builder.add_bitWidth(64);
-                    builder.add_unit(ipc::TimeUnit::MICROSECOND);
-                }
-                TimeUnit::Nanosecond => {
-                    builder.add_bitWidth(64);
-                    builder.add_unit(ipc::TimeUnit::NANOSECOND);
-                }
-            }
-            FBFieldType {
-                type_type: ipc::Type::Time,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Timestamp(unit, tz) => {
-            let tz = tz.clone().unwrap_or_else(String::new);
-            let tz_str = fbb.create_string(tz.as_str());
-            let mut builder = ipc::TimestampBuilder::new(fbb);
-            let time_unit = match unit {
-                TimeUnit::Second => ipc::TimeUnit::SECOND,
-                TimeUnit::Millisecond => ipc::TimeUnit::MILLISECOND,
-                TimeUnit::Microsecond => ipc::TimeUnit::MICROSECOND,
-                TimeUnit::Nanosecond => ipc::TimeUnit::NANOSECOND,
-            };
-            builder.add_unit(time_unit);
-            if !tz.is_empty() {
-                builder.add_timezone(tz_str);
-            }
-            FBFieldType {
-                type_type: ipc::Type::Timestamp,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Interval(unit) => {
-            let mut builder = ipc::IntervalBuilder::new(fbb);
-            let interval_unit = match unit {
-                IntervalUnit::YearMonth => ipc::IntervalUnit::YEAR_MONTH,
-                IntervalUnit::DayTime => ipc::IntervalUnit::DAY_TIME,
-            };
-            builder.add_unit(interval_unit);
-            FBFieldType {
-                type_type: ipc::Type::Interval,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        Duration(unit) => {
-            let mut builder = ipc::DurationBuilder::new(fbb);
-            let time_unit = match unit {
-                TimeUnit::Second => ipc::TimeUnit::SECOND,
-                TimeUnit::Millisecond => ipc::TimeUnit::MILLISECOND,
-                TimeUnit::Microsecond => ipc::TimeUnit::MICROSECOND,
-                TimeUnit::Nanosecond => ipc::TimeUnit::NANOSECOND,
-            };
-            builder.add_unit(time_unit);
-            FBFieldType {
-                type_type: ipc::Type::Duration,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        List(ref list_type) => {
-            let child = build_field(fbb, list_type);
-            FBFieldType {
-                type_type: ipc::Type::List,
-                type_: ipc::ListBuilder::new(fbb).finish().as_union_value(),
-                children: Some(fbb.create_vector(&[child])),
-            }
-        }
-        LargeList(ref list_type) => {
-            let child = build_field(fbb, list_type);
-            FBFieldType {
-                type_type: ipc::Type::LargeList,
-                type_: ipc::LargeListBuilder::new(fbb).finish().as_union_value(),
-                children: Some(fbb.create_vector(&[child])),
-            }
-        }
-        FixedSizeList(ref list_type, len) => {
-            let child = build_field(fbb, list_type);
-            let mut builder = ipc::FixedSizeListBuilder::new(fbb);
-            builder.add_listSize(*len as i32);
-            FBFieldType {
-                type_type: ipc::Type::FixedSizeList,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&[child])),
-            }
-        }
-        Struct(fields) => {
-            // struct's fields are children
-            let mut children = vec![];
-            for field in fields {
-                let inner_types =
-                    get_fb_field_type(field.data_type(), field.is_nullable(), fbb);
-                let field_name = fbb.create_string(field.name());
-                children.push(ipc::Field::create(
-                    fbb,
-                    &ipc::FieldArgs {
-                        name: Some(field_name),
-                        nullable: field.is_nullable(),
-                        type_type: inner_types.type_type,
-                        type_: Some(inner_types.type_),
-                        dictionary: None,
-                        children: inner_types.children,
-                        custom_metadata: None,
-                    },
-                ));
-            }
-            FBFieldType {
-                type_type: ipc::Type::Struct_,
-                type_: ipc::Struct_Builder::new(fbb).finish().as_union_value(),
-                children: Some(fbb.create_vector(&children[..])),
-            }
-        }
-        Dictionary(_, value_type) => {
-            // In this library, the dictionary "type" is a logical construct. Here we
-            // pass through to the value type, as we've already captured the index
-            // type in the DictionaryEncoding metadata in the parent field
-            get_fb_field_type(value_type, is_nullable, fbb)
-        }
-        Decimal(precision, scale) => {
-            let mut builder = ipc::DecimalBuilder::new(fbb);
-            builder.add_precision(*precision as i32);
-            builder.add_scale(*scale as i32);
-            builder.add_bitWidth(128);
-            FBFieldType {
-                type_type: ipc::Type::Decimal,
-                type_: builder.finish().as_union_value(),
-                children: Some(fbb.create_vector(&empty_fields[..])),
-            }
-        }
-        t => unimplemented!("Type {:?} not supported", t),
-    }
-}
-
-/// Create an IPC dictionary encoding
-pub(crate) fn get_fb_dictionary<'a>(
-    index_type: &DataType,
-    dict_id: i64,
-    dict_is_ordered: bool,
-    fbb: &mut FlatBufferBuilder<'a>,
-) -> WIPOffset<ipc::DictionaryEncoding<'a>> {
-    // We assume that the dictionary index type (as an integer) has already been
-    // validated elsewhere, and can safely assume we are dealing with integers
-    let mut index_builder = ipc::IntBuilder::new(fbb);
-
-    match *index_type {
-        Int8 | Int16 | Int32 | Int64 => index_builder.add_is_signed(true),
-        UInt8 | UInt16 | UInt32 | UInt64 => index_builder.add_is_signed(false),
-        _ => {}
-    }
-
-    match *index_type {
-        Int8 | UInt8 => index_builder.add_bitWidth(8),
-        Int16 | UInt16 => index_builder.add_bitWidth(16),
-        Int32 | UInt32 => index_builder.add_bitWidth(32),
-        Int64 | UInt64 => index_builder.add_bitWidth(64),
-        _ => {}
-    }
-
-    let index_builder = index_builder.finish();
-
-    let mut builder = ipc::DictionaryEncodingBuilder::new(fbb);
-    builder.add_id(dict_id);
-    builder.add_indexType(index_builder);
-    builder.add_isOrdered(dict_is_ordered);
-
-    builder.finish()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datatypes::{DataType, Field, Schema};
-
-    #[test]
-    fn convert_schema_round_trip() {
-        let md: HashMap<String, String> = [("Key".to_string(), "value".to_string())]
-            .iter()
-            .cloned()
-            .collect();
-        let field_md: BTreeMap<String, String> = [("k".to_string(), "v".to_string())]
-            .iter()
-            .cloned()
-            .collect();
-        let schema = Schema::new_with_metadata(
-            vec![
-                {
-                    let mut f = Field::new("uint8", DataType::UInt8, false);
-                    f.set_metadata(Some(field_md));
-                    f
-                },
-                Field::new("uint16", DataType::UInt16, true),
-                Field::new("uint32", DataType::UInt32, false),
-                Field::new("uint64", DataType::UInt64, true),
-                Field::new("int8", DataType::Int8, true),
-                Field::new("int16", DataType::Int16, false),
-                Field::new("int32", DataType::Int32, true),
-                Field::new("int64", DataType::Int64, false),
-                Field::new("float16", DataType::Float16, true),
-                Field::new("float32", DataType::Float32, false),
-                Field::new("float64", DataType::Float64, true),
-                Field::new("null", DataType::Null, false),
-                Field::new("bool", DataType::Boolean, false),
-                Field::new("date32", DataType::Date32, false),
-                Field::new("date64", DataType::Date64, true),
-                Field::new("time32[s]", DataType::Time32(TimeUnit::Second), true),
-                Field::new("time32[ms]", DataType::Time32(TimeUnit::Millisecond), false),
-                Field::new("time64[us]", DataType::Time64(TimeUnit::Microsecond), false),
-                Field::new("time64[ns]", DataType::Time64(TimeUnit::Nanosecond), true),
-                Field::new(
-                    "timestamp[s]",
-                    DataType::Timestamp(TimeUnit::Second, None),
-                    false,
-                ),
-                Field::new(
-                    "timestamp[ms]",
-                    DataType::Timestamp(TimeUnit::Millisecond, None),
-                    true,
-                ),
-                Field::new(
-                    "timestamp[us]",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".to_string()),
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "timestamp[ns]",
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    true,
-                ),
-                Field::new(
-                    "interval[ym]",
-                    DataType::Interval(IntervalUnit::YearMonth),
-                    true,
-                ),
-                Field::new(
-                    "interval[dt]",
-                    DataType::Interval(IntervalUnit::DayTime),
-                    true,
-                ),
-                Field::new("utf8", DataType::Utf8, false),
-                Field::new("binary", DataType::Binary, false),
-                Field::new(
-                    "list[u8]",
-                    DataType::List(Box::new(Field::new("item", DataType::UInt8, false))),
-                    true,
-                ),
-                Field::new(
-                    "list[struct<float32, int32, bool>]",
-                    DataType::List(Box::new(Field::new(
-                        "struct",
-                        DataType::Struct(vec![
-                            Field::new("float32", DataType::UInt8, false),
-                            Field::new("int32", DataType::Int32, true),
-                            Field::new("bool", DataType::Boolean, true),
-                        ]),
-                        true,
-                    ))),
-                    false,
-                ),
-                Field::new(
-                    "struct<int64, list[struct<date32, list[struct<>]>]>",
-                    DataType::Struct(vec![
-                        Field::new("int64", DataType::Int64, true),
-                        Field::new(
-                            "list[struct<date32, list[struct<>]>]",
-                            DataType::List(Box::new(Field::new(
-                                "struct",
-                                DataType::Struct(vec![
-                                    Field::new("date32", DataType::Date32, true),
-                                    Field::new(
-                                        "list[struct<>]",
-                                        DataType::List(Box::new(Field::new(
-                                            "struct",
-                                            DataType::Struct(vec![]),
-                                            false,
-                                        ))),
-                                        false,
-                                    ),
-                                ]),
-                                false,
-                            ))),
-                            false,
-                        ),
-                    ]),
-                    false,
-                ),
-                Field::new("struct<>", DataType::Struct(vec![]), true),
-                Field::new_dict(
-                    "dictionary<int32, utf8>",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new_dict(
-                    "dictionary<uint8, uint32>",
-                    DataType::Dictionary(
-                        Box::new(DataType::UInt8),
-                        Box::new(DataType::UInt32),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new("decimal<usize, usize>", DataType::Decimal(10, 6), false),
-            ],
-            md,
-        );
-
-        let fb = schema_to_fb(&schema);
-
-        // read back fields
-        let ipc = ipc::root_as_schema(fb.finished_data()).unwrap();
-        let schema2 = fb_to_schema(ipc);
-        assert_eq!(schema, schema2);
-    }
-
-    #[test]
-    fn schema_from_bytes() {
-        // bytes of a schema generated from python (0.14.0), saved as an `ipc::Message`.
-        // the schema is: Field("field1", DataType::UInt32, false)
-        let bytes: Vec<u8> = vec![
-            16, 0, 0, 0, 0, 0, 10, 0, 12, 0, 6, 0, 5, 0, 8, 0, 10, 0, 0, 0, 0, 1, 3, 0,
-            12, 0, 0, 0, 8, 0, 8, 0, 0, 0, 4, 0, 8, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 20,
-            0, 0, 0, 16, 0, 20, 0, 8, 0, 0, 0, 7, 0, 12, 0, 0, 0, 16, 0, 16, 0, 0, 0, 0,
-            0, 0, 2, 32, 0, 0, 0, 20, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 8, 0,
-            4, 0, 6, 0, 0, 0, 32, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49, 0, 0,
-            0, 0, 0, 0,
-        ];
-        let ipc = ipc::root_as_message(&bytes[..]).unwrap();
-        let schema = ipc.header_as_schema().unwrap();
-
-        // a message generated from Rust, same as the Python one
-        let bytes: Vec<u8> = vec![
-            16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 12, 0, 11, 0, 4, 0, 10, 0, 0, 0, 20, 0, 0,
-            0, 0, 0, 0, 1, 3, 0, 10, 0, 12, 0, 0, 0, 8, 0, 4, 0, 10, 0, 0, 0, 8, 0, 0, 0,
-            8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 16, 0, 0, 0, 12, 0, 18, 0, 12, 0, 0, 0,
-            11, 0, 4, 0, 12, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 20, 0, 0, 0, 0, 0, 6, 0,
-            8, 0, 4, 0, 6, 0, 0, 0, 32, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49,
-            0, 0,
-        ];
-        let ipc2 = ipc::root_as_message(&bytes[..]).unwrap();
-        let schema2 = ipc.header_as_schema().unwrap();
-
-        assert_eq!(schema, schema2);
-        assert_eq!(ipc.version(), ipc2.version());
-        assert_eq!(ipc.header_type(), ipc2.header_type());
-        assert_eq!(ipc.bodyLength(), ipc2.bodyLength());
-        assert!(ipc.custom_metadata().is_none());
-        assert!(ipc2.custom_metadata().is_none());
-    }
-}
diff --git a/rust/arrow/src/ipc/gen/File.rs b/rust/arrow/src/ipc/gen/File.rs
deleted file mode 100644
index 04cbc644137..00000000000
--- a/rust/arrow/src/ipc/gen/File.rs
+++ /dev/null
@@ -1,491 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-// struct Block, aligned to 8
-#[repr(transparent)]
-#[derive(Clone, Copy, PartialEq)]
-pub struct Block(pub [u8; 24]);
-impl std::fmt::Debug for Block {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("Block")
-            .field("offset", &self.offset())
-            .field("metaDataLength", &self.metaDataLength())
-            .field("bodyLength", &self.bodyLength())
-            .finish()
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Block {}
-impl flatbuffers::SafeSliceAccess for Block {}
-impl<'a> flatbuffers::Follow<'a> for Block {
-    type Inner = &'a Block;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a Block>::follow(buf, loc)
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for &'a Block {
-    type Inner = &'a Block;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<Block>(buf, loc)
-    }
-}
-impl<'b> flatbuffers::Push for Block {
-    type Output = Block;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(self as *const Block as *const u8, Self::size())
-        };
-        dst.copy_from_slice(src);
-    }
-}
-impl<'b> flatbuffers::Push for &'b Block {
-    type Output = Block;
-
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(*self as *const Block as *const u8, Self::size())
-        };
-        dst.copy_from_slice(src);
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Block {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.in_buffer::<Self>(pos)
-    }
-}
-impl Block {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(offset: i64, metaDataLength: i32, bodyLength: i64) -> Self {
-        let mut s = Self([0; 24]);
-        s.set_offset(offset);
-        s.set_metaDataLength(metaDataLength);
-        s.set_bodyLength(bodyLength);
-        s
-    }
-
-    /// Index to the start of the RecordBlock (note this is past the Message header)
-    pub fn offset(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[0..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_offset(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[0..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-
-    /// Length of the metadata
-    pub fn metaDataLength(&self) -> i32 {
-        let mut mem = core::mem::MaybeUninit::<i32>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[8..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i32>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_metaDataLength(&mut self, x: i32) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i32 as *const u8,
-                self.0[8..].as_mut_ptr(),
-                core::mem::size_of::<i32>(),
-            );
-        }
-    }
-
-    /// Length of the data (this is aligned so there can be a gap between this and
-    /// the metadata).
-    pub fn bodyLength(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[16..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_bodyLength(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[16..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-}
-
-pub enum FooterOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// Arrow File metadata
-///
-pub struct Footer<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Footer<'a> {
-    type Inner = Footer<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Footer<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Footer { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FooterArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Footer<'bldr>> {
-        let mut builder = FooterBuilder::new(_fbb);
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.recordBatches {
-            builder.add_recordBatches(x);
-        }
-        if let Some(x) = args.dictionaries {
-            builder.add_dictionaries(x);
-        }
-        if let Some(x) = args.schema {
-            builder.add_schema(x);
-        }
-        builder.add_version(args.version);
-        builder.finish()
-    }
-
-    pub const VT_VERSION: flatbuffers::VOffsetT = 4;
-    pub const VT_SCHEMA: flatbuffers::VOffsetT = 6;
-    pub const VT_DICTIONARIES: flatbuffers::VOffsetT = 8;
-    pub const VT_RECORDBATCHES: flatbuffers::VOffsetT = 10;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 12;
-
-    #[inline]
-    pub fn version(&self) -> MetadataVersion {
-        self._tab
-            .get::<MetadataVersion>(Footer::VT_VERSION, Some(MetadataVersion::V1))
-            .unwrap()
-    }
-    #[inline]
-    pub fn schema(&self) -> Option<Schema<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Schema>>(Footer::VT_SCHEMA, None)
-    }
-    #[inline]
-    pub fn dictionaries(&self) -> Option<&'a [Block]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Block>>>(
-                Footer::VT_DICTIONARIES,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    #[inline]
-    pub fn recordBatches(&self) -> Option<&'a [Block]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Block>>>(
-                Footer::VT_RECORDBATCHES,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    /// User-defined metadata
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Footer::VT_CUSTOM_METADATA, None)
-    }
-}
-
-impl flatbuffers::Verifiable for Footer<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<MetadataVersion>(&"version", Self::VT_VERSION, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Schema>>(
-                &"schema",
-                Self::VT_SCHEMA,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Block>>>(
-                &"dictionaries",
-                Self::VT_DICTIONARIES,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Block>>>(
-                &"recordBatches",
-                Self::VT_RECORDBATCHES,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<
-                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>,
-            >>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FooterArgs<'a> {
-    pub version: MetadataVersion,
-    pub schema: Option<flatbuffers::WIPOffset<Schema<'a>>>,
-    pub dictionaries: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Block>>>,
-    pub recordBatches: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Block>>>,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-}
-impl<'a> Default for FooterArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        FooterArgs {
-            version: MetadataVersion::V1,
-            schema: None,
-            dictionaries: None,
-            recordBatches: None,
-            custom_metadata: None,
-        }
-    }
-}
-pub struct FooterBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_version(&mut self, version: MetadataVersion) {
-        self.fbb_.push_slot::<MetadataVersion>(
-            Footer::VT_VERSION,
-            version,
-            MetadataVersion::V1,
-        );
-    }
-    #[inline]
-    pub fn add_schema(&mut self, schema: flatbuffers::WIPOffset<Schema<'b>>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<Schema>>(
-                Footer::VT_SCHEMA,
-                schema,
-            );
-    }
-    #[inline]
-    pub fn add_dictionaries(
-        &mut self,
-        dictionaries: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Block>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_DICTIONARIES,
-            dictionaries,
-        );
-    }
-    #[inline]
-    pub fn add_recordBatches(
-        &mut self,
-        recordBatches: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Block>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_RECORDBATCHES,
-            recordBatches,
-        );
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FooterBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FooterBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Footer<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Footer<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Footer");
-        ds.field("version", &self.version());
-        ds.field("schema", &self.schema());
-        ds.field("dictionaries", &self.dictionaries());
-        ds.field("recordBatches", &self.recordBatches());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_footer<'a>(buf: &'a [u8]) -> Footer<'a> {
-    unsafe { flatbuffers::root_unchecked::<Footer<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_footer<'a>(buf: &'a [u8]) -> Footer<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Footer<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Footer`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_footer_unchecked`.
-pub fn root_as_footer(buf: &[u8]) -> Result<Footer, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Footer>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Footer` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_footer_unchecked`.
-pub fn size_prefixed_root_as_footer(
-    buf: &[u8],
-) -> Result<Footer, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Footer>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Footer` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_footer_unchecked`.
-pub fn root_as_footer_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Footer<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Footer<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Footer` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_footer_unchecked`.
-pub fn size_prefixed_root_as_footer_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Footer<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Footer<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Footer and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Footer`.
-pub unsafe fn root_as_footer_unchecked(buf: &[u8]) -> Footer {
-    flatbuffers::root_unchecked::<Footer>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Footer and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Footer`.
-pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
-    flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
-}
-#[inline]
-pub fn finish_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Footer<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Footer<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/Message.rs b/rust/arrow/src/ipc/gen/Message.rs
deleted file mode 100644
index 7903844a1fe..00000000000
--- a/rust/arrow/src/ipc/gen/Message.rs
+++ /dev/null
@@ -1,1346 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use crate::ipc::gen::SparseTensor::*;
-use crate::ipc::gen::Tensor::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_COMPRESSION_TYPE: i8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_COMPRESSION_TYPE: i8 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_COMPRESSION_TYPE: [CompressionType; 2] =
-    [CompressionType::LZ4_FRAME, CompressionType::ZSTD];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct CompressionType(pub i8);
-#[allow(non_upper_case_globals)]
-impl CompressionType {
-    pub const LZ4_FRAME: Self = Self(0);
-    pub const ZSTD: Self = Self(1);
-
-    pub const ENUM_MIN: i8 = 0;
-    pub const ENUM_MAX: i8 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::LZ4_FRAME, Self::ZSTD];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::LZ4_FRAME => Some("LZ4_FRAME"),
-            Self::ZSTD => Some("ZSTD"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for CompressionType {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for CompressionType {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for CompressionType {
-    type Output = CompressionType;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for CompressionType {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for CompressionType {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for CompressionType {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_BODY_COMPRESSION_METHOD: i8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_BODY_COMPRESSION_METHOD: i8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_BODY_COMPRESSION_METHOD: [BodyCompressionMethod; 1] =
-    [BodyCompressionMethod::BUFFER];
-
-/// Provided for forward compatibility in case we need to support different
-/// strategies for compressing the IPC message body (like whole-body
-/// compression rather than buffer-level) in the future
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct BodyCompressionMethod(pub i8);
-#[allow(non_upper_case_globals)]
-impl BodyCompressionMethod {
-    /// Each constituent buffer is first compressed with the indicated
-    /// compressor, and then written with the uncompressed length in the first 8
-    /// bytes as a 64-bit little-endian signed integer followed by the compressed
-    /// buffer bytes (and then padding as required by the protocol). The
-    /// uncompressed length may be set to -1 to indicate that the data that
-    /// follows is not compressed, which can be useful for cases where
-    /// compression does not yield appreciable savings.
-    pub const BUFFER: Self = Self(0);
-
-    pub const ENUM_MIN: i8 = 0;
-    pub const ENUM_MAX: i8 = 0;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::BUFFER];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::BUFFER => Some("BUFFER"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for BodyCompressionMethod {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for BodyCompressionMethod {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for BodyCompressionMethod {
-    type Output = BodyCompressionMethod;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for BodyCompressionMethod {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for BodyCompressionMethod {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for BodyCompressionMethod {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_MESSAGE_HEADER: u8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_MESSAGE_HEADER: u8 = 5;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_MESSAGE_HEADER: [MessageHeader; 6] = [
-    MessageHeader::NONE,
-    MessageHeader::Schema,
-    MessageHeader::DictionaryBatch,
-    MessageHeader::RecordBatch,
-    MessageHeader::Tensor,
-    MessageHeader::SparseTensor,
-];
-
-/// ----------------------------------------------------------------------
-/// The root Message type
-/// This union enables us to easily send different message types without
-/// redundant storage, and in the future we can easily add new message types.
-///
-/// Arrow implementations do not need to implement all of the message types,
-/// which may include experimental metadata types. For maximum compatibility,
-/// it is best to send data using RecordBatch
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct MessageHeader(pub u8);
-#[allow(non_upper_case_globals)]
-impl MessageHeader {
-    pub const NONE: Self = Self(0);
-    pub const Schema: Self = Self(1);
-    pub const DictionaryBatch: Self = Self(2);
-    pub const RecordBatch: Self = Self(3);
-    pub const Tensor: Self = Self(4);
-    pub const SparseTensor: Self = Self(5);
-
-    pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 5;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::NONE,
-        Self::Schema,
-        Self::DictionaryBatch,
-        Self::RecordBatch,
-        Self::Tensor,
-        Self::SparseTensor,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::NONE => Some("NONE"),
-            Self::Schema => Some("Schema"),
-            Self::DictionaryBatch => Some("DictionaryBatch"),
-            Self::RecordBatch => Some("RecordBatch"),
-            Self::Tensor => Some("Tensor"),
-            Self::SparseTensor => Some("SparseTensor"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for MessageHeader {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-pub struct MessageHeaderUnionTableOffset {}
-impl<'a> flatbuffers::Follow<'a> for MessageHeader {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for MessageHeader {
-    type Output = MessageHeader;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for MessageHeader {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = u8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = u8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for MessageHeader {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        u8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for MessageHeader {}
-/// ----------------------------------------------------------------------
-/// Data structures for describing a table row batch (a collection of
-/// equal-length Arrow arrays)
-/// Metadata about a field at some level of a nested type tree (but not
-/// its children).
-///
-/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
-/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
-/// null_count: 0} for its Int16 node, as separate FieldNode structs
-// struct FieldNode, aligned to 8
-#[repr(transparent)]
-#[derive(Clone, Copy, PartialEq)]
-pub struct FieldNode(pub [u8; 16]);
-impl std::fmt::Debug for FieldNode {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("FieldNode")
-            .field("length", &self.length())
-            .field("null_count", &self.null_count())
-            .finish()
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for FieldNode {}
-impl flatbuffers::SafeSliceAccess for FieldNode {}
-impl<'a> flatbuffers::Follow<'a> for FieldNode {
-    type Inner = &'a FieldNode;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a FieldNode>::follow(buf, loc)
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for &'a FieldNode {
-    type Inner = &'a FieldNode;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<FieldNode>(buf, loc)
-    }
-}
-impl<'b> flatbuffers::Push for FieldNode {
-    type Output = FieldNode;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(
-                self as *const FieldNode as *const u8,
-                Self::size(),
-            )
-        };
-        dst.copy_from_slice(src);
-    }
-}
-impl<'b> flatbuffers::Push for &'b FieldNode {
-    type Output = FieldNode;
-
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(
-                *self as *const FieldNode as *const u8,
-                Self::size(),
-            )
-        };
-        dst.copy_from_slice(src);
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for FieldNode {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.in_buffer::<Self>(pos)
-    }
-}
-impl FieldNode {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(length: i64, null_count: i64) -> Self {
-        let mut s = Self([0; 16]);
-        s.set_length(length);
-        s.set_null_count(null_count);
-        s
-    }
-
-    /// The number of value slots in the Arrow array at this level of a nested
-    /// tree
-    pub fn length(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[0..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_length(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[0..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-
-    /// The number of observed nulls. Fields with null_count == 0 may choose not
-    /// to write their physical validity bitmap out as a materialized buffer,
-    /// instead setting the length of the bitmap buffer to 0.
-    pub fn null_count(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[8..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_null_count(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[8..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-}
-
-pub enum BodyCompressionOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Optional compression for the memory buffers constituting IPC message
-/// bodies. Intended for use with RecordBatch but could be used for other
-/// message types
-pub struct BodyCompression<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for BodyCompression<'a> {
-    type Inner = BodyCompression<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> BodyCompression<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        BodyCompression { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args BodyCompressionArgs,
-    ) -> flatbuffers::WIPOffset<BodyCompression<'bldr>> {
-        let mut builder = BodyCompressionBuilder::new(_fbb);
-        builder.add_method(args.method);
-        builder.add_codec(args.codec);
-        builder.finish()
-    }
-
-    pub const VT_CODEC: flatbuffers::VOffsetT = 4;
-    pub const VT_METHOD: flatbuffers::VOffsetT = 6;
-
-    /// Compressor library
-    #[inline]
-    pub fn codec(&self) -> CompressionType {
-        self._tab
-            .get::<CompressionType>(
-                BodyCompression::VT_CODEC,
-                Some(CompressionType::LZ4_FRAME),
-            )
-            .unwrap()
-    }
-    /// Indicates the way the record batch body was compressed
-    #[inline]
-    pub fn method(&self) -> BodyCompressionMethod {
-        self._tab
-            .get::<BodyCompressionMethod>(
-                BodyCompression::VT_METHOD,
-                Some(BodyCompressionMethod::BUFFER),
-            )
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for BodyCompression<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<CompressionType>(&"codec", Self::VT_CODEC, false)?
-            .visit_field::<BodyCompressionMethod>(&"method", Self::VT_METHOD, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct BodyCompressionArgs {
-    pub codec: CompressionType,
-    pub method: BodyCompressionMethod,
-}
-impl<'a> Default for BodyCompressionArgs {
-    #[inline]
-    fn default() -> Self {
-        BodyCompressionArgs {
-            codec: CompressionType::LZ4_FRAME,
-            method: BodyCompressionMethod::BUFFER,
-        }
-    }
-}
-pub struct BodyCompressionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_codec(&mut self, codec: CompressionType) {
-        self.fbb_.push_slot::<CompressionType>(
-            BodyCompression::VT_CODEC,
-            codec,
-            CompressionType::LZ4_FRAME,
-        );
-    }
-    #[inline]
-    pub fn add_method(&mut self, method: BodyCompressionMethod) {
-        self.fbb_.push_slot::<BodyCompressionMethod>(
-            BodyCompression::VT_METHOD,
-            method,
-            BodyCompressionMethod::BUFFER,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> BodyCompressionBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        BodyCompressionBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<BodyCompression<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for BodyCompression<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("BodyCompression");
-        ds.field("codec", &self.codec());
-        ds.field("method", &self.method());
-        ds.finish()
-    }
-}
-pub enum RecordBatchOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A data header describing the shared memory layout of a "record" or "row"
-/// batch. Some systems call this a "row batch" internally and others a "record
-/// batch".
-pub struct RecordBatch<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for RecordBatch<'a> {
-    type Inner = RecordBatch<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> RecordBatch<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        RecordBatch { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args RecordBatchArgs<'args>,
-    ) -> flatbuffers::WIPOffset<RecordBatch<'bldr>> {
-        let mut builder = RecordBatchBuilder::new(_fbb);
-        builder.add_length(args.length);
-        if let Some(x) = args.compression {
-            builder.add_compression(x);
-        }
-        if let Some(x) = args.buffers {
-            builder.add_buffers(x);
-        }
-        if let Some(x) = args.nodes {
-            builder.add_nodes(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_LENGTH: flatbuffers::VOffsetT = 4;
-    pub const VT_NODES: flatbuffers::VOffsetT = 6;
-    pub const VT_BUFFERS: flatbuffers::VOffsetT = 8;
-    pub const VT_COMPRESSION: flatbuffers::VOffsetT = 10;
-
-    /// number of records / rows. The arrays in the batch should all have this
-    /// length
-    #[inline]
-    pub fn length(&self) -> i64 {
-        self._tab
-            .get::<i64>(RecordBatch::VT_LENGTH, Some(0))
-            .unwrap()
-    }
-    /// Nodes correspond to the pre-ordered flattened logical schema
-    #[inline]
-    pub fn nodes(&self) -> Option<&'a [FieldNode]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, FieldNode>>>(
-                RecordBatch::VT_NODES,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    /// Buffers correspond to the pre-ordered flattened buffer tree
-    ///
-    /// The number of buffers appended to this list depends on the schema. For
-    /// example, most primitive arrays will have 2 buffers, 1 for the validity
-    /// bitmap and 1 for the values. For struct arrays, there will only be a
-    /// single buffer for the validity (nulls) bitmap
-    #[inline]
-    pub fn buffers(&self) -> Option<&'a [Buffer]> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Buffer>>>(
-                RecordBatch::VT_BUFFERS,
-                None,
-            )
-            .map(|v| v.safe_slice())
-    }
-    /// Optional compression of the message body
-    #[inline]
-    pub fn compression(&self) -> Option<BodyCompression<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<BodyCompression>>(
-                RecordBatch::VT_COMPRESSION,
-                None,
-            )
-    }
-}
-
-impl flatbuffers::Verifiable for RecordBatch<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<i64>(&"length", Self::VT_LENGTH, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, FieldNode>>>(&"nodes", Self::VT_NODES, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(&"buffers", Self::VT_BUFFERS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<BodyCompression>>(&"compression", Self::VT_COMPRESSION, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct RecordBatchArgs<'a> {
-    pub length: i64,
-    pub nodes: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, FieldNode>>>,
-    pub buffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
-    pub compression: Option<flatbuffers::WIPOffset<BodyCompression<'a>>>,
-}
-impl<'a> Default for RecordBatchArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        RecordBatchArgs {
-            length: 0,
-            nodes: None,
-            buffers: None,
-            compression: None,
-        }
-    }
-}
-pub struct RecordBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_length(&mut self, length: i64) {
-        self.fbb_
-            .push_slot::<i64>(RecordBatch::VT_LENGTH, length, 0);
-    }
-    #[inline]
-    pub fn add_nodes(
-        &mut self,
-        nodes: flatbuffers::WIPOffset<flatbuffers::Vector<'b, FieldNode>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(RecordBatch::VT_NODES, nodes);
-    }
-    #[inline]
-    pub fn add_buffers(
-        &mut self,
-        buffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            RecordBatch::VT_BUFFERS,
-            buffers,
-        );
-    }
-    #[inline]
-    pub fn add_compression(
-        &mut self,
-        compression: flatbuffers::WIPOffset<BodyCompression<'b>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<BodyCompression>>(
-                RecordBatch::VT_COMPRESSION,
-                compression,
-            );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> RecordBatchBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        RecordBatchBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<RecordBatch<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for RecordBatch<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("RecordBatch");
-        ds.field("length", &self.length());
-        ds.field("nodes", &self.nodes());
-        ds.field("buffers", &self.buffers());
-        ds.field("compression", &self.compression());
-        ds.finish()
-    }
-}
-pub enum DictionaryBatchOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// For sending dictionary encoding information. Any Field can be
-/// dictionary-encoded, but in this case none of its children may be
-/// dictionary-encoded.
-/// There is one vector / column per dictionary, but that vector / column
-/// may be spread across multiple dictionary batches by using the isDelta
-/// flag
-pub struct DictionaryBatch<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for DictionaryBatch<'a> {
-    type Inner = DictionaryBatch<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> DictionaryBatch<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        DictionaryBatch { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DictionaryBatchArgs<'args>,
-    ) -> flatbuffers::WIPOffset<DictionaryBatch<'bldr>> {
-        let mut builder = DictionaryBatchBuilder::new(_fbb);
-        builder.add_id(args.id);
-        if let Some(x) = args.data {
-            builder.add_data(x);
-        }
-        builder.add_isDelta(args.isDelta);
-        builder.finish()
-    }
-
-    pub const VT_ID: flatbuffers::VOffsetT = 4;
-    pub const VT_DATA: flatbuffers::VOffsetT = 6;
-    pub const VT_ISDELTA: flatbuffers::VOffsetT = 8;
-
-    #[inline]
-    pub fn id(&self) -> i64 {
-        self._tab
-            .get::<i64>(DictionaryBatch::VT_ID, Some(0))
-            .unwrap()
-    }
-    #[inline]
-    pub fn data(&self) -> Option<RecordBatch<'a>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<RecordBatch>>(
-            DictionaryBatch::VT_DATA,
-            None,
-        )
-    }
-    /// If isDelta is true the values in the dictionary are to be appended to a
-    /// dictionary with the indicated id. If isDelta is false this dictionary
-    /// should replace the existing dictionary.
-    #[inline]
-    pub fn isDelta(&self) -> bool {
-        self._tab
-            .get::<bool>(DictionaryBatch::VT_ISDELTA, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for DictionaryBatch<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i64>(&"id", Self::VT_ID, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<RecordBatch>>(
-                &"data",
-                Self::VT_DATA,
-                false,
-            )?
-            .visit_field::<bool>(&"isDelta", Self::VT_ISDELTA, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DictionaryBatchArgs<'a> {
-    pub id: i64,
-    pub data: Option<flatbuffers::WIPOffset<RecordBatch<'a>>>,
-    pub isDelta: bool,
-}
-impl<'a> Default for DictionaryBatchArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        DictionaryBatchArgs {
-            id: 0,
-            data: None,
-            isDelta: false,
-        }
-    }
-}
-pub struct DictionaryBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_id(&mut self, id: i64) {
-        self.fbb_.push_slot::<i64>(DictionaryBatch::VT_ID, id, 0);
-    }
-    #[inline]
-    pub fn add_data(&mut self, data: flatbuffers::WIPOffset<RecordBatch<'b>>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<RecordBatch>>(
-                DictionaryBatch::VT_DATA,
-                data,
-            );
-    }
-    #[inline]
-    pub fn add_isDelta(&mut self, isDelta: bool) {
-        self.fbb_
-            .push_slot::<bool>(DictionaryBatch::VT_ISDELTA, isDelta, false);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryBatchBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DictionaryBatchBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<DictionaryBatch<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for DictionaryBatch<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("DictionaryBatch");
-        ds.field("id", &self.id());
-        ds.field("data", &self.data());
-        ds.field("isDelta", &self.isDelta());
-        ds.finish()
-    }
-}
-pub enum MessageOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Message<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Message<'a> {
-    type Inner = Message<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Message<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Message { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args MessageArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Message<'bldr>> {
-        let mut builder = MessageBuilder::new(_fbb);
-        builder.add_bodyLength(args.bodyLength);
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.header {
-            builder.add_header(x);
-        }
-        builder.add_version(args.version);
-        builder.add_header_type(args.header_type);
-        builder.finish()
-    }
-
-    pub const VT_VERSION: flatbuffers::VOffsetT = 4;
-    pub const VT_HEADER_TYPE: flatbuffers::VOffsetT = 6;
-    pub const VT_HEADER: flatbuffers::VOffsetT = 8;
-    pub const VT_BODYLENGTH: flatbuffers::VOffsetT = 10;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 12;
-
-    #[inline]
-    pub fn version(&self) -> MetadataVersion {
-        self._tab
-            .get::<MetadataVersion>(Message::VT_VERSION, Some(MetadataVersion::V1))
-            .unwrap()
-    }
-    #[inline]
-    pub fn header_type(&self) -> MessageHeader {
-        self._tab
-            .get::<MessageHeader>(Message::VT_HEADER_TYPE, Some(MessageHeader::NONE))
-            .unwrap()
-    }
-    #[inline]
-    pub fn header(&self) -> Option<flatbuffers::Table<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                Message::VT_HEADER,
-                None,
-            )
-    }
-    #[inline]
-    pub fn bodyLength(&self) -> i64 {
-        self._tab
-            .get::<i64>(Message::VT_BODYLENGTH, Some(0))
-            .unwrap()
-    }
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Message::VT_CUSTOM_METADATA, None)
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_schema(&self) -> Option<Schema<'a>> {
-        if self.header_type() == MessageHeader::Schema {
-            self.header().map(Schema::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_dictionary_batch(&self) -> Option<DictionaryBatch<'a>> {
-        if self.header_type() == MessageHeader::DictionaryBatch {
-            self.header().map(DictionaryBatch::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_record_batch(&self) -> Option<RecordBatch<'a>> {
-        if self.header_type() == MessageHeader::RecordBatch {
-            self.header().map(RecordBatch::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_tensor(&self) -> Option<Tensor<'a>> {
-        if self.header_type() == MessageHeader::Tensor {
-            self.header().map(Tensor::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn header_as_sparse_tensor(&self) -> Option<SparseTensor<'a>> {
-        if self.header_type() == MessageHeader::SparseTensor {
-            self.header().map(SparseTensor::init_from_table)
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for Message<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<MetadataVersion>(&"version", Self::VT_VERSION, false)?
-     .visit_union::<MessageHeader, _>(&"header_type", Self::VT_HEADER_TYPE, &"header", Self::VT_HEADER, false, |key, v, pos| {
-        match key {
-          MessageHeader::Schema => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Schema>>("MessageHeader::Schema", pos),
-          MessageHeader::DictionaryBatch => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DictionaryBatch>>("MessageHeader::DictionaryBatch", pos),
-          MessageHeader::RecordBatch => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RecordBatch>>("MessageHeader::RecordBatch", pos),
-          MessageHeader::Tensor => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Tensor>>("MessageHeader::Tensor", pos),
-          MessageHeader::SparseTensor => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensor>>("MessageHeader::SparseTensor", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<i64>(&"bodyLength", Self::VT_BODYLENGTH, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct MessageArgs<'a> {
-    pub version: MetadataVersion,
-    pub header_type: MessageHeader,
-    pub header: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub bodyLength: i64,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-}
-impl<'a> Default for MessageArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        MessageArgs {
-            version: MetadataVersion::V1,
-            header_type: MessageHeader::NONE,
-            header: None,
-            bodyLength: 0,
-            custom_metadata: None,
-        }
-    }
-}
-pub struct MessageBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_version(&mut self, version: MetadataVersion) {
-        self.fbb_.push_slot::<MetadataVersion>(
-            Message::VT_VERSION,
-            version,
-            MetadataVersion::V1,
-        );
-    }
-    #[inline]
-    pub fn add_header_type(&mut self, header_type: MessageHeader) {
-        self.fbb_.push_slot::<MessageHeader>(
-            Message::VT_HEADER_TYPE,
-            header_type,
-            MessageHeader::NONE,
-        );
-    }
-    #[inline]
-    pub fn add_header(
-        &mut self,
-        header: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Message::VT_HEADER, header);
-    }
-    #[inline]
-    pub fn add_bodyLength(&mut self, bodyLength: i64) {
-        self.fbb_
-            .push_slot::<i64>(Message::VT_BODYLENGTH, bodyLength, 0);
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Message::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> MessageBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        MessageBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Message<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Message<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Message");
-        ds.field("version", &self.version());
-        ds.field("header_type", &self.header_type());
-        match self.header_type() {
-            MessageHeader::Schema => {
-                if let Some(x) = self.header_as_schema() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::DictionaryBatch => {
-                if let Some(x) = self.header_as_dictionary_batch() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::RecordBatch => {
-                if let Some(x) = self.header_as_record_batch() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::Tensor => {
-                if let Some(x) = self.header_as_tensor() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            MessageHeader::SparseTensor => {
-                if let Some(x) = self.header_as_sparse_tensor() {
-                    ds.field("header", &x)
-                } else {
-                    ds.field(
-                        "header",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("header", &x)
-            }
-        };
-        ds.field("bodyLength", &self.bodyLength());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_message<'a>(buf: &'a [u8]) -> Message<'a> {
-    unsafe { flatbuffers::root_unchecked::<Message<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_message<'a>(buf: &'a [u8]) -> Message<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Message<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Message`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_message_unchecked`.
-pub fn root_as_message(buf: &[u8]) -> Result<Message, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Message>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Message` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_message_unchecked`.
-pub fn size_prefixed_root_as_message(
-    buf: &[u8],
-) -> Result<Message, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Message>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Message` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_message_unchecked`.
-pub fn root_as_message_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Message<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Message<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Message` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_message_unchecked`.
-pub fn size_prefixed_root_as_message_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Message<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Message<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Message and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Message`.
-pub unsafe fn root_as_message_unchecked(buf: &[u8]) -> Message {
-    flatbuffers::root_unchecked::<Message>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Message and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Message`.
-pub unsafe fn size_prefixed_root_as_message_unchecked(buf: &[u8]) -> Message {
-    flatbuffers::size_prefixed_root_unchecked::<Message>(buf)
-}
-#[inline]
-pub fn finish_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Message<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Message<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs
deleted file mode 100644
index f37f9206cb7..00000000000
--- a/rust/arrow/src/ipc/gen/Schema.rs
+++ /dev/null
@@ -1,4586 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_METADATA_VERSION: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_METADATA_VERSION: i16 = 4;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_METADATA_VERSION: [MetadataVersion; 5] = [
-    MetadataVersion::V1,
-    MetadataVersion::V2,
-    MetadataVersion::V3,
-    MetadataVersion::V4,
-    MetadataVersion::V5,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct MetadataVersion(pub i16);
-#[allow(non_upper_case_globals)]
-impl MetadataVersion {
-    /// 0.1.0 (October 2016).
-    pub const V1: Self = Self(0);
-    /// 0.2.0 (February 2017). Non-backwards compatible with V1.
-    pub const V2: Self = Self(1);
-    /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
-    pub const V3: Self = Self(2);
-    /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
-    pub const V4: Self = Self(3);
-    /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
-    /// metadata and IPC messages). Implementations are recommended to provide a
-    /// V4 compatibility mode with V5 format changes disabled.
-    ///
-    /// Incompatible changes between V4 and V5:
-    /// - Union buffer layout has changed. In V5, Unions don't have a validity
-    ///   bitmap buffer.
-    pub const V5: Self = Self(4);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 4;
-    pub const ENUM_VALUES: &'static [Self] =
-        &[Self::V1, Self::V2, Self::V3, Self::V4, Self::V5];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::V1 => Some("V1"),
-            Self::V2 => Some("V2"),
-            Self::V3 => Some("V3"),
-            Self::V4 => Some("V4"),
-            Self::V5 => Some("V5"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for MetadataVersion {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for MetadataVersion {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for MetadataVersion {
-    type Output = MetadataVersion;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for MetadataVersion {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for MetadataVersion {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for MetadataVersion {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_FEATURE: i64 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_FEATURE: i64 = 2;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_FEATURE: [Feature; 3] = [
-    Feature::UNUSED,
-    Feature::DICTIONARY_REPLACEMENT,
-    Feature::COMPRESSED_BODY,
-];
-
-/// Represents Arrow Features that might not have full support
-/// within implementations. This is intended to be used in
-/// two scenarios:
-///  1.  A mechanism for readers of Arrow Streams
-///      and files to understand that the stream or file makes
-///      use of a feature that isn't supported or unknown to
-///      the implementation (and therefore can meet the Arrow
-///      forward compatibility guarantees).
-///  2.  A means of negotiating between a client and server
-///      what features a stream is allowed to use. The enums
-///      values here are intented to represent higher level
-///      features, additional details maybe negotiated
-///      with key-value pairs specific to the protocol.
-///
-/// Enums added to this list should be assigned power-of-two values
-/// to facilitate exchanging and comparing bitmaps for supported
-/// features.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Feature(pub i64);
-#[allow(non_upper_case_globals)]
-impl Feature {
-    /// Needed to make flatbuffers happy.
-    pub const UNUSED: Self = Self(0);
-    /// The stream makes use of multiple full dictionaries with the
-    /// same ID and assumes clients implement dictionary replacement
-    /// correctly.
-    pub const DICTIONARY_REPLACEMENT: Self = Self(1);
-    /// The stream makes use of compressed bodies as described
-    /// in Message.fbs.
-    pub const COMPRESSED_BODY: Self = Self(2);
-
-    pub const ENUM_MIN: i64 = 0;
-    pub const ENUM_MAX: i64 = 2;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::UNUSED,
-        Self::DICTIONARY_REPLACEMENT,
-        Self::COMPRESSED_BODY,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::UNUSED => Some("UNUSED"),
-            Self::DICTIONARY_REPLACEMENT => Some("DICTIONARY_REPLACEMENT"),
-            Self::COMPRESSED_BODY => Some("COMPRESSED_BODY"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Feature {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for Feature {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i64>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Feature {
-    type Output = Feature;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i64>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Feature {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i64::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i64::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Feature {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i64::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Feature {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_UNION_MODE: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_UNION_MODE: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_UNION_MODE: [UnionMode; 2] = [UnionMode::Sparse, UnionMode::Dense];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct UnionMode(pub i16);
-#[allow(non_upper_case_globals)]
-impl UnionMode {
-    pub const Sparse: Self = Self(0);
-    pub const Dense: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::Sparse, Self::Dense];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::Sparse => Some("Sparse"),
-            Self::Dense => Some("Dense"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for UnionMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for UnionMode {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for UnionMode {
-    type Output = UnionMode;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for UnionMode {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for UnionMode {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for UnionMode {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_PRECISION: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_PRECISION: i16 = 2;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_PRECISION: [Precision; 3] =
-    [Precision::HALF, Precision::SINGLE, Precision::DOUBLE];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Precision(pub i16);
-#[allow(non_upper_case_globals)]
-impl Precision {
-    pub const HALF: Self = Self(0);
-    pub const SINGLE: Self = Self(1);
-    pub const DOUBLE: Self = Self(2);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 2;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::HALF, Self::SINGLE, Self::DOUBLE];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::HALF => Some("HALF"),
-            Self::SINGLE => Some("SINGLE"),
-            Self::DOUBLE => Some("DOUBLE"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Precision {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for Precision {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Precision {
-    type Output = Precision;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Precision {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Precision {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Precision {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_DATE_UNIT: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_DATE_UNIT: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_DATE_UNIT: [DateUnit; 2] = [DateUnit::DAY, DateUnit::MILLISECOND];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct DateUnit(pub i16);
-#[allow(non_upper_case_globals)]
-impl DateUnit {
-    pub const DAY: Self = Self(0);
-    pub const MILLISECOND: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::DAY, Self::MILLISECOND];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::DAY => Some("DAY"),
-            Self::MILLISECOND => Some("MILLISECOND"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for DateUnit {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for DateUnit {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for DateUnit {
-    type Output = DateUnit;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for DateUnit {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for DateUnit {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for DateUnit {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_TIME_UNIT: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_TIME_UNIT: i16 = 3;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_TIME_UNIT: [TimeUnit; 4] = [
-    TimeUnit::SECOND,
-    TimeUnit::MILLISECOND,
-    TimeUnit::MICROSECOND,
-    TimeUnit::NANOSECOND,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct TimeUnit(pub i16);
-#[allow(non_upper_case_globals)]
-impl TimeUnit {
-    pub const SECOND: Self = Self(0);
-    pub const MILLISECOND: Self = Self(1);
-    pub const MICROSECOND: Self = Self(2);
-    pub const NANOSECOND: Self = Self(3);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 3;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::SECOND,
-        Self::MILLISECOND,
-        Self::MICROSECOND,
-        Self::NANOSECOND,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::SECOND => Some("SECOND"),
-            Self::MILLISECOND => Some("MILLISECOND"),
-            Self::MICROSECOND => Some("MICROSECOND"),
-            Self::NANOSECOND => Some("NANOSECOND"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for TimeUnit {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for TimeUnit {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for TimeUnit {
-    type Output = TimeUnit;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for TimeUnit {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for TimeUnit {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for TimeUnit {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_INTERVAL_UNIT: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_INTERVAL_UNIT: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_INTERVAL_UNIT: [IntervalUnit; 2] =
-    [IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct IntervalUnit(pub i16);
-#[allow(non_upper_case_globals)]
-impl IntervalUnit {
-    pub const YEAR_MONTH: Self = Self(0);
-    pub const DAY_TIME: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::YEAR_MONTH, Self::DAY_TIME];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::YEAR_MONTH => Some("YEAR_MONTH"),
-            Self::DAY_TIME => Some("DAY_TIME"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for IntervalUnit {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for IntervalUnit {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for IntervalUnit {
-    type Output = IntervalUnit;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for IntervalUnit {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for IntervalUnit {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for IntervalUnit {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_TYPE: u8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_TYPE: u8 = 21;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_TYPE: [Type; 22] = [
-    Type::NONE,
-    Type::Null,
-    Type::Int,
-    Type::FloatingPoint,
-    Type::Binary,
-    Type::Utf8,
-    Type::Bool,
-    Type::Decimal,
-    Type::Date,
-    Type::Time,
-    Type::Timestamp,
-    Type::Interval,
-    Type::List,
-    Type::Struct_,
-    Type::Union,
-    Type::FixedSizeBinary,
-    Type::FixedSizeList,
-    Type::Map,
-    Type::Duration,
-    Type::LargeBinary,
-    Type::LargeUtf8,
-    Type::LargeList,
-];
-
-/// ----------------------------------------------------------------------
-/// Top-level Type value, enabling extensible type-specific metadata. We can
-/// add new logical types to Type without breaking backwards compatibility
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Type(pub u8);
-#[allow(non_upper_case_globals)]
-impl Type {
-    pub const NONE: Self = Self(0);
-    pub const Null: Self = Self(1);
-    pub const Int: Self = Self(2);
-    pub const FloatingPoint: Self = Self(3);
-    pub const Binary: Self = Self(4);
-    pub const Utf8: Self = Self(5);
-    pub const Bool: Self = Self(6);
-    pub const Decimal: Self = Self(7);
-    pub const Date: Self = Self(8);
-    pub const Time: Self = Self(9);
-    pub const Timestamp: Self = Self(10);
-    pub const Interval: Self = Self(11);
-    pub const List: Self = Self(12);
-    pub const Struct_: Self = Self(13);
-    pub const Union: Self = Self(14);
-    pub const FixedSizeBinary: Self = Self(15);
-    pub const FixedSizeList: Self = Self(16);
-    pub const Map: Self = Self(17);
-    pub const Duration: Self = Self(18);
-    pub const LargeBinary: Self = Self(19);
-    pub const LargeUtf8: Self = Self(20);
-    pub const LargeList: Self = Self(21);
-
-    pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 21;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::NONE,
-        Self::Null,
-        Self::Int,
-        Self::FloatingPoint,
-        Self::Binary,
-        Self::Utf8,
-        Self::Bool,
-        Self::Decimal,
-        Self::Date,
-        Self::Time,
-        Self::Timestamp,
-        Self::Interval,
-        Self::List,
-        Self::Struct_,
-        Self::Union,
-        Self::FixedSizeBinary,
-        Self::FixedSizeList,
-        Self::Map,
-        Self::Duration,
-        Self::LargeBinary,
-        Self::LargeUtf8,
-        Self::LargeList,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::NONE => Some("NONE"),
-            Self::Null => Some("Null"),
-            Self::Int => Some("Int"),
-            Self::FloatingPoint => Some("FloatingPoint"),
-            Self::Binary => Some("Binary"),
-            Self::Utf8 => Some("Utf8"),
-            Self::Bool => Some("Bool"),
-            Self::Decimal => Some("Decimal"),
-            Self::Date => Some("Date"),
-            Self::Time => Some("Time"),
-            Self::Timestamp => Some("Timestamp"),
-            Self::Interval => Some("Interval"),
-            Self::List => Some("List"),
-            Self::Struct_ => Some("Struct_"),
-            Self::Union => Some("Union"),
-            Self::FixedSizeBinary => Some("FixedSizeBinary"),
-            Self::FixedSizeList => Some("FixedSizeList"),
-            Self::Map => Some("Map"),
-            Self::Duration => Some("Duration"),
-            Self::LargeBinary => Some("LargeBinary"),
-            Self::LargeUtf8 => Some("LargeUtf8"),
-            Self::LargeList => Some("LargeList"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Type {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-pub struct TypeUnionTableOffset {}
-impl<'a> flatbuffers::Follow<'a> for Type {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Type {
-    type Output = Type;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Type {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = u8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = u8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Type {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        u8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Type {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_DICTIONARY_KIND: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_DICTIONARY_KIND: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_DICTIONARY_KIND: [DictionaryKind; 1] = [DictionaryKind::DenseArray];
-
-/// ----------------------------------------------------------------------
-/// Dictionary encoding metadata
-/// Maintained for forwards compatibility, in the future
-/// Dictionaries might be explicit maps between integers and values
-/// allowing for non-contiguous index values
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct DictionaryKind(pub i16);
-#[allow(non_upper_case_globals)]
-impl DictionaryKind {
-    pub const DenseArray: Self = Self(0);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 0;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::DenseArray];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::DenseArray => Some("DenseArray"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for DictionaryKind {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for DictionaryKind {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for DictionaryKind {
-    type Output = DictionaryKind;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for DictionaryKind {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for DictionaryKind {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for DictionaryKind {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_ENDIANNESS: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_ENDIANNESS: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_ENDIANNESS: [Endianness; 2] = [Endianness::Little, Endianness::Big];
-
-/// ----------------------------------------------------------------------
-/// Endianness of the platform producing the data
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct Endianness(pub i16);
-#[allow(non_upper_case_globals)]
-impl Endianness {
-    pub const Little: Self = Self(0);
-    pub const Big: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::Little, Self::Big];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::Little => Some("Little"),
-            Self::Big => Some("Big"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for Endianness {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for Endianness {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for Endianness {
-    type Output = Endianness;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for Endianness {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Endianness {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Endianness {}
-/// ----------------------------------------------------------------------
-/// A Buffer represents a single contiguous memory segment
-// struct Buffer, aligned to 8
-#[repr(transparent)]
-#[derive(Clone, Copy, PartialEq)]
-pub struct Buffer(pub [u8; 16]);
-impl std::fmt::Debug for Buffer {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        f.debug_struct("Buffer")
-            .field("offset", &self.offset())
-            .field("length", &self.length())
-            .finish()
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for Buffer {}
-impl flatbuffers::SafeSliceAccess for Buffer {}
-impl<'a> flatbuffers::Follow<'a> for Buffer {
-    type Inner = &'a Buffer;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a Buffer>::follow(buf, loc)
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for &'a Buffer {
-    type Inner = &'a Buffer;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<Buffer>(buf, loc)
-    }
-}
-impl<'b> flatbuffers::Push for Buffer {
-    type Output = Buffer;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(self as *const Buffer as *const u8, Self::size())
-        };
-        dst.copy_from_slice(src);
-    }
-}
-impl<'b> flatbuffers::Push for &'b Buffer {
-    type Output = Buffer;
-
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        let src = unsafe {
-            ::std::slice::from_raw_parts(
-                *self as *const Buffer as *const u8,
-                Self::size(),
-            )
-        };
-        dst.copy_from_slice(src);
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for Buffer {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.in_buffer::<Self>(pos)
-    }
-}
-impl Buffer {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(offset: i64, length: i64) -> Self {
-        let mut s = Self([0; 16]);
-        s.set_offset(offset);
-        s.set_length(length);
-        s
-    }
-
-    /// The relative offset into the shared memory page where the bytes for this
-    /// buffer starts
-    pub fn offset(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[0..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_offset(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[0..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-
-    /// The absolute length (in bytes) of the memory buffer. The memory is found
-    /// from offset (inclusive) to offset + length (non-inclusive). When building
-    /// messages using the encapsulated IPC message, padding bytes may be written
-    /// after a buffer, but such padding bytes do not need to be accounted for in
-    /// the size here.
-    pub fn length(&self) -> i64 {
-        let mut mem = core::mem::MaybeUninit::<i64>::uninit();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                self.0[8..].as_ptr(),
-                mem.as_mut_ptr() as *mut u8,
-                core::mem::size_of::<i64>(),
-            );
-            mem.assume_init()
-        }
-        .from_little_endian()
-    }
-
-    pub fn set_length(&mut self, x: i64) {
-        let x_le = x.to_little_endian();
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                &x_le as *const i64 as *const u8,
-                self.0[8..].as_mut_ptr(),
-                core::mem::size_of::<i64>(),
-            );
-        }
-    }
-}
-
-pub enum NullOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// These are stored in the flatbuffer in the Type union below
-pub struct Null<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Null<'a> {
-    type Inner = Null<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Null<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Null { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args NullArgs,
-    ) -> flatbuffers::WIPOffset<Null<'bldr>> {
-        let mut builder = NullBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Null<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct NullArgs {}
-impl<'a> Default for NullArgs {
-    #[inline]
-    fn default() -> Self {
-        NullArgs {}
-    }
-}
-pub struct NullBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> NullBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> NullBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        NullBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Null<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Null<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Null");
-        ds.finish()
-    }
-}
-pub enum Struct_Offset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
-/// (according to the physical memory layout). We used Struct_ here as
-/// Struct is a reserved word in Flatbuffers
-pub struct Struct_<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Struct_<'a> {
-    type Inner = Struct_<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Struct_<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Struct_ { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args Struct_Args,
-    ) -> flatbuffers::WIPOffset<Struct_<'bldr>> {
-        let mut builder = Struct_Builder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Struct_<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct Struct_Args {}
-impl<'a> Default for Struct_Args {
-    #[inline]
-    fn default() -> Self {
-        Struct_Args {}
-    }
-}
-pub struct Struct_Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> Struct_Builder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> Struct_Builder<'a, 'b> {
-        let start = _fbb.start_table();
-        Struct_Builder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Struct_<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Struct_<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Struct_");
-        ds.finish()
-    }
-}
-pub enum ListOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct List<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for List<'a> {
-    type Inner = List<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> List<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        List { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args ListArgs,
-    ) -> flatbuffers::WIPOffset<List<'bldr>> {
-        let mut builder = ListBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for List<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct ListArgs {}
-impl<'a> Default for ListArgs {
-    #[inline]
-    fn default() -> Self {
-        ListArgs {}
-    }
-}
-pub struct ListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> ListBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        ListBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<List<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for List<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("List");
-        ds.finish()
-    }
-}
-pub enum LargeListOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Same as List, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-pub struct LargeList<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for LargeList<'a> {
-    type Inner = LargeList<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> LargeList<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        LargeList { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args LargeListArgs,
-    ) -> flatbuffers::WIPOffset<LargeList<'bldr>> {
-        let mut builder = LargeListBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for LargeList<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct LargeListArgs {}
-impl<'a> Default for LargeListArgs {
-    #[inline]
-    fn default() -> Self {
-        LargeListArgs {}
-    }
-}
-pub struct LargeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> LargeListBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeListBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        LargeListBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<LargeList<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for LargeList<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("LargeList");
-        ds.finish()
-    }
-}
-pub enum FixedSizeListOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct FixedSizeList<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for FixedSizeList<'a> {
-    type Inner = FixedSizeList<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> FixedSizeList<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        FixedSizeList { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FixedSizeListArgs,
-    ) -> flatbuffers::WIPOffset<FixedSizeList<'bldr>> {
-        let mut builder = FixedSizeListBuilder::new(_fbb);
-        builder.add_listSize(args.listSize);
-        builder.finish()
-    }
-
-    pub const VT_LISTSIZE: flatbuffers::VOffsetT = 4;
-
-    /// Number of list items per value
-    #[inline]
-    pub fn listSize(&self) -> i32 {
-        self._tab
-            .get::<i32>(FixedSizeList::VT_LISTSIZE, Some(0))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for FixedSizeList<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"listSize", Self::VT_LISTSIZE, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FixedSizeListArgs {
-    pub listSize: i32,
-}
-impl<'a> Default for FixedSizeListArgs {
-    #[inline]
-    fn default() -> Self {
-        FixedSizeListArgs { listSize: 0 }
-    }
-}
-pub struct FixedSizeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FixedSizeListBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_listSize(&mut self, listSize: i32) {
-        self.fbb_
-            .push_slot::<i32>(FixedSizeList::VT_LISTSIZE, listSize, 0);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FixedSizeListBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FixedSizeListBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<FixedSizeList<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for FixedSizeList<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("FixedSizeList");
-        ds.field("listSize", &self.listSize());
-        ds.finish()
-    }
-}
-pub enum MapOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A Map is a logical nested type that is represented as
-///
-/// List<entries: Struct<key: K, value: V>>
-///
-/// In this layout, the keys and values are each respectively contiguous. We do
-/// not constrain the key and value types, so the application is responsible
-/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
-/// may be set in the metadata for this field.
-///
-/// In a field with Map type, the field has a child Struct field, which then
-/// has two children: key type and the second the value type. The names of the
-/// child fields may be respectively "entries", "key", and "value", but this is
-/// not enforced.
-///
-/// Map
-/// ```text
-///   - child[0] entries: Struct
-///     - child[0] key: K
-///     - child[1] value: V
-/// ```
-/// Neither the "entries" field nor the "key" field may be nullable.
-///
-/// The metadata is structured so that Arrow systems without special handling
-/// for Map can make Map an alias for List. The "layout" attribute for the Map
-/// field must have the same contents as a List.
-pub struct Map<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Map<'a> {
-    type Inner = Map<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Map<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Map { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args MapArgs,
-    ) -> flatbuffers::WIPOffset<Map<'bldr>> {
-        let mut builder = MapBuilder::new(_fbb);
-        builder.add_keysSorted(args.keysSorted);
-        builder.finish()
-    }
-
-    pub const VT_KEYSSORTED: flatbuffers::VOffsetT = 4;
-
-    /// Set to true if the keys within each value are sorted
-    #[inline]
-    pub fn keysSorted(&self) -> bool {
-        self._tab
-            .get::<bool>(Map::VT_KEYSSORTED, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Map<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<bool>(&"keysSorted", Self::VT_KEYSSORTED, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct MapArgs {
-    pub keysSorted: bool,
-}
-impl<'a> Default for MapArgs {
-    #[inline]
-    fn default() -> Self {
-        MapArgs { keysSorted: false }
-    }
-}
-pub struct MapBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> MapBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_keysSorted(&mut self, keysSorted: bool) {
-        self.fbb_
-            .push_slot::<bool>(Map::VT_KEYSSORTED, keysSorted, false);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MapBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        MapBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Map<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Map<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Map");
-        ds.field("keysSorted", &self.keysSorted());
-        ds.finish()
-    }
-}
-pub enum UnionOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// A union is a complex type with children in Field
-/// By default ids in the type vector refer to the offsets in the children
-/// optionally typeIds provides an indirection between the child offset and the type id
-/// for each child `typeIds[offset]` is the id used in the type vector
-pub struct Union<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Union<'a> {
-    type Inner = Union<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Union<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Union { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args UnionArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Union<'bldr>> {
-        let mut builder = UnionBuilder::new(_fbb);
-        if let Some(x) = args.typeIds {
-            builder.add_typeIds(x);
-        }
-        builder.add_mode(args.mode);
-        builder.finish()
-    }
-
-    pub const VT_MODE: flatbuffers::VOffsetT = 4;
-    pub const VT_TYPEIDS: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn mode(&self) -> UnionMode {
-        self._tab
-            .get::<UnionMode>(Union::VT_MODE, Some(UnionMode::Sparse))
-            .unwrap()
-    }
-    #[inline]
-    pub fn typeIds(&self) -> Option<flatbuffers::Vector<'a, i32>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
-                Union::VT_TYPEIDS,
-                None,
-            )
-    }
-}
-
-impl flatbuffers::Verifiable for Union<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<UnionMode>(&"mode", Self::VT_MODE, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
-                &"typeIds",
-                Self::VT_TYPEIDS,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct UnionArgs<'a> {
-    pub mode: UnionMode,
-    pub typeIds: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
-}
-impl<'a> Default for UnionArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        UnionArgs {
-            mode: UnionMode::Sparse,
-            typeIds: None,
-        }
-    }
-}
-pub struct UnionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_mode(&mut self, mode: UnionMode) {
-        self.fbb_
-            .push_slot::<UnionMode>(Union::VT_MODE, mode, UnionMode::Sparse);
-    }
-    #[inline]
-    pub fn add_typeIds(
-        &mut self,
-        typeIds: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Union::VT_TYPEIDS, typeIds);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> UnionBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        UnionBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Union<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Union<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Union");
-        ds.field("mode", &self.mode());
-        ds.field("typeIds", &self.typeIds());
-        ds.finish()
-    }
-}
-pub enum IntOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Int<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Int<'a> {
-    type Inner = Int<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Int<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Int { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args IntArgs,
-    ) -> flatbuffers::WIPOffset<Int<'bldr>> {
-        let mut builder = IntBuilder::new(_fbb);
-        builder.add_bitWidth(args.bitWidth);
-        builder.add_is_signed(args.is_signed);
-        builder.finish()
-    }
-
-    pub const VT_BITWIDTH: flatbuffers::VOffsetT = 4;
-    pub const VT_IS_SIGNED: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn bitWidth(&self) -> i32 {
-        self._tab.get::<i32>(Int::VT_BITWIDTH, Some(0)).unwrap()
-    }
-    #[inline]
-    pub fn is_signed(&self) -> bool {
-        self._tab
-            .get::<bool>(Int::VT_IS_SIGNED, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Int<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"bitWidth", Self::VT_BITWIDTH, false)?
-            .visit_field::<bool>(&"is_signed", Self::VT_IS_SIGNED, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct IntArgs {
-    pub bitWidth: i32,
-    pub is_signed: bool,
-}
-impl<'a> Default for IntArgs {
-    #[inline]
-    fn default() -> Self {
-        IntArgs {
-            bitWidth: 0,
-            is_signed: false,
-        }
-    }
-}
-pub struct IntBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> IntBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_bitWidth(&mut self, bitWidth: i32) {
-        self.fbb_.push_slot::<i32>(Int::VT_BITWIDTH, bitWidth, 0);
-    }
-    #[inline]
-    pub fn add_is_signed(&mut self, is_signed: bool) {
-        self.fbb_
-            .push_slot::<bool>(Int::VT_IS_SIGNED, is_signed, false);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        IntBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Int<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Int<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Int");
-        ds.field("bitWidth", &self.bitWidth());
-        ds.field("is_signed", &self.is_signed());
-        ds.finish()
-    }
-}
-pub enum FloatingPointOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct FloatingPoint<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for FloatingPoint<'a> {
-    type Inner = FloatingPoint<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> FloatingPoint<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        FloatingPoint { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FloatingPointArgs,
-    ) -> flatbuffers::WIPOffset<FloatingPoint<'bldr>> {
-        let mut builder = FloatingPointBuilder::new(_fbb);
-        builder.add_precision(args.precision);
-        builder.finish()
-    }
-
-    pub const VT_PRECISION: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn precision(&self) -> Precision {
-        self._tab
-            .get::<Precision>(FloatingPoint::VT_PRECISION, Some(Precision::HALF))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for FloatingPoint<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<Precision>(&"precision", Self::VT_PRECISION, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FloatingPointArgs {
-    pub precision: Precision,
-}
-impl<'a> Default for FloatingPointArgs {
-    #[inline]
-    fn default() -> Self {
-        FloatingPointArgs {
-            precision: Precision::HALF,
-        }
-    }
-}
-pub struct FloatingPointBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FloatingPointBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_precision(&mut self, precision: Precision) {
-        self.fbb_.push_slot::<Precision>(
-            FloatingPoint::VT_PRECISION,
-            precision,
-            Precision::HALF,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FloatingPointBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FloatingPointBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<FloatingPoint<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for FloatingPoint<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("FloatingPoint");
-        ds.field("precision", &self.precision());
-        ds.finish()
-    }
-}
-pub enum Utf8Offset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Unicode with UTF-8 encoding
-pub struct Utf8<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Utf8<'a> {
-    type Inner = Utf8<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Utf8<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Utf8 { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args Utf8Args,
-    ) -> flatbuffers::WIPOffset<Utf8<'bldr>> {
-        let mut builder = Utf8Builder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Utf8<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct Utf8Args {}
-impl<'a> Default for Utf8Args {
-    #[inline]
-    fn default() -> Self {
-        Utf8Args {}
-    }
-}
-pub struct Utf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> Utf8Builder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8Builder<'a, 'b> {
-        let start = _fbb.start_table();
-        Utf8Builder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Utf8<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Utf8<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Utf8");
-        ds.finish()
-    }
-}
-pub enum BinaryOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Opaque binary data
-pub struct Binary<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Binary<'a> {
-    type Inner = Binary<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Binary<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Binary { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args BinaryArgs,
-    ) -> flatbuffers::WIPOffset<Binary<'bldr>> {
-        let mut builder = BinaryBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Binary<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct BinaryArgs {}
-impl<'a> Default for BinaryArgs {
-    #[inline]
-    fn default() -> Self {
-        BinaryArgs {}
-    }
-}
-pub struct BinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> BinaryBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> BinaryBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        BinaryBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Binary<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Binary<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Binary");
-        ds.finish()
-    }
-}
-pub enum LargeUtf8Offset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Same as Utf8, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-pub struct LargeUtf8<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for LargeUtf8<'a> {
-    type Inner = LargeUtf8<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> LargeUtf8<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        LargeUtf8 { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args LargeUtf8Args,
-    ) -> flatbuffers::WIPOffset<LargeUtf8<'bldr>> {
-        let mut builder = LargeUtf8Builder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for LargeUtf8<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct LargeUtf8Args {}
-impl<'a> Default for LargeUtf8Args {
-    #[inline]
-    fn default() -> Self {
-        LargeUtf8Args {}
-    }
-}
-pub struct LargeUtf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> LargeUtf8Builder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeUtf8Builder<'a, 'b> {
-        let start = _fbb.start_table();
-        LargeUtf8Builder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<LargeUtf8<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for LargeUtf8<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("LargeUtf8");
-        ds.finish()
-    }
-}
-pub enum LargeBinaryOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Same as Binary, but with 64-bit offsets, allowing to represent
-/// extremely large data values.
-pub struct LargeBinary<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for LargeBinary<'a> {
-    type Inner = LargeBinary<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> LargeBinary<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        LargeBinary { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args LargeBinaryArgs,
-    ) -> flatbuffers::WIPOffset<LargeBinary<'bldr>> {
-        let mut builder = LargeBinaryBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for LargeBinary<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct LargeBinaryArgs {}
-impl<'a> Default for LargeBinaryArgs {
-    #[inline]
-    fn default() -> Self {
-        LargeBinaryArgs {}
-    }
-}
-pub struct LargeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> LargeBinaryBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeBinaryBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        LargeBinaryBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<LargeBinary<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for LargeBinary<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("LargeBinary");
-        ds.finish()
-    }
-}
-pub enum FixedSizeBinaryOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct FixedSizeBinary<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for FixedSizeBinary<'a> {
-    type Inner = FixedSizeBinary<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> FixedSizeBinary<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        FixedSizeBinary { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FixedSizeBinaryArgs,
-    ) -> flatbuffers::WIPOffset<FixedSizeBinary<'bldr>> {
-        let mut builder = FixedSizeBinaryBuilder::new(_fbb);
-        builder.add_byteWidth(args.byteWidth);
-        builder.finish()
-    }
-
-    pub const VT_BYTEWIDTH: flatbuffers::VOffsetT = 4;
-
-    /// Number of bytes per value
-    #[inline]
-    pub fn byteWidth(&self) -> i32 {
-        self._tab
-            .get::<i32>(FixedSizeBinary::VT_BYTEWIDTH, Some(0))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for FixedSizeBinary<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"byteWidth", Self::VT_BYTEWIDTH, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct FixedSizeBinaryArgs {
-    pub byteWidth: i32,
-}
-impl<'a> Default for FixedSizeBinaryArgs {
-    #[inline]
-    fn default() -> Self {
-        FixedSizeBinaryArgs { byteWidth: 0 }
-    }
-}
-pub struct FixedSizeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FixedSizeBinaryBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_byteWidth(&mut self, byteWidth: i32) {
-        self.fbb_
-            .push_slot::<i32>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FixedSizeBinaryBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FixedSizeBinaryBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<FixedSizeBinary<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for FixedSizeBinary<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("FixedSizeBinary");
-        ds.field("byteWidth", &self.byteWidth());
-        ds.finish()
-    }
-}
-pub enum BoolOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Bool<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Bool<'a> {
-    type Inner = Bool<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Bool<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Bool { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        _args: &'args BoolArgs,
-    ) -> flatbuffers::WIPOffset<Bool<'bldr>> {
-        let mut builder = BoolBuilder::new(_fbb);
-        builder.finish()
-    }
-}
-
-impl flatbuffers::Verifiable for Bool<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?.finish();
-        Ok(())
-    }
-}
-pub struct BoolArgs {}
-impl<'a> Default for BoolArgs {
-    #[inline]
-    fn default() -> Self {
-        BoolArgs {}
-    }
-}
-pub struct BoolBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> BoolBuilder<'a, 'b> {
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BoolBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        BoolBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Bool<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Bool<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Bool");
-        ds.finish()
-    }
-}
-pub enum DecimalOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Exact decimal value represented as an integer value in two's
-/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
-/// are used. The representation uses the endianness indicated
-/// in the Schema.
-pub struct Decimal<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Decimal<'a> {
-    type Inner = Decimal<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Decimal<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Decimal { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DecimalArgs,
-    ) -> flatbuffers::WIPOffset<Decimal<'bldr>> {
-        let mut builder = DecimalBuilder::new(_fbb);
-        builder.add_bitWidth(args.bitWidth);
-        builder.add_scale(args.scale);
-        builder.add_precision(args.precision);
-        builder.finish()
-    }
-
-    pub const VT_PRECISION: flatbuffers::VOffsetT = 4;
-    pub const VT_SCALE: flatbuffers::VOffsetT = 6;
-    pub const VT_BITWIDTH: flatbuffers::VOffsetT = 8;
-
-    /// Total number of decimal digits
-    #[inline]
-    pub fn precision(&self) -> i32 {
-        self._tab
-            .get::<i32>(Decimal::VT_PRECISION, Some(0))
-            .unwrap()
-    }
-    /// Number of digits after the decimal point "."
-    #[inline]
-    pub fn scale(&self) -> i32 {
-        self._tab.get::<i32>(Decimal::VT_SCALE, Some(0)).unwrap()
-    }
-    /// Number of bits per value. The only accepted widths are 128 and 256.
-    /// We use bitWidth for consistency with Int::bitWidth.
-    #[inline]
-    pub fn bitWidth(&self) -> i32 {
-        self._tab
-            .get::<i32>(Decimal::VT_BITWIDTH, Some(128))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Decimal<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i32>(&"precision", Self::VT_PRECISION, false)?
-            .visit_field::<i32>(&"scale", Self::VT_SCALE, false)?
-            .visit_field::<i32>(&"bitWidth", Self::VT_BITWIDTH, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DecimalArgs {
-    pub precision: i32,
-    pub scale: i32,
-    pub bitWidth: i32,
-}
-impl<'a> Default for DecimalArgs {
-    #[inline]
-    fn default() -> Self {
-        DecimalArgs {
-            precision: 0,
-            scale: 0,
-            bitWidth: 128,
-        }
-    }
-}
-pub struct DecimalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_precision(&mut self, precision: i32) {
-        self.fbb_
-            .push_slot::<i32>(Decimal::VT_PRECISION, precision, 0);
-    }
-    #[inline]
-    pub fn add_scale(&mut self, scale: i32) {
-        self.fbb_.push_slot::<i32>(Decimal::VT_SCALE, scale, 0);
-    }
-    #[inline]
-    pub fn add_bitWidth(&mut self, bitWidth: i32) {
-        self.fbb_
-            .push_slot::<i32>(Decimal::VT_BITWIDTH, bitWidth, 128);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DecimalBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DecimalBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Decimal<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Decimal<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Decimal");
-        ds.field("precision", &self.precision());
-        ds.field("scale", &self.scale());
-        ds.field("bitWidth", &self.bitWidth());
-        ds.finish()
-    }
-}
-pub enum DateOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
-/// epoch (1970-01-01), stored in either of two units:
-///
-/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
-///   leap seconds), where the values are evenly divisible by 86400000
-/// * Days (32 bits) since the UNIX epoch
-pub struct Date<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Date<'a> {
-    type Inner = Date<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Date<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Date { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DateArgs,
-    ) -> flatbuffers::WIPOffset<Date<'bldr>> {
-        let mut builder = DateBuilder::new(_fbb);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn unit(&self) -> DateUnit {
-        self._tab
-            .get::<DateUnit>(Date::VT_UNIT, Some(DateUnit::MILLISECOND))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Date<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<DateUnit>(&"unit", Self::VT_UNIT, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DateArgs {
-    pub unit: DateUnit,
-}
-impl<'a> Default for DateArgs {
-    #[inline]
-    fn default() -> Self {
-        DateArgs {
-            unit: DateUnit::MILLISECOND,
-        }
-    }
-}
-pub struct DateBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DateBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: DateUnit) {
-        self.fbb_
-            .push_slot::<DateUnit>(Date::VT_UNIT, unit, DateUnit::MILLISECOND);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DateBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DateBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Date<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Date<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Date");
-        ds.field("unit", &self.unit());
-        ds.finish()
-    }
-}
-pub enum TimeOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Time type. The physical storage type depends on the unit
-/// - SECOND and MILLISECOND: 32 bits
-/// - MICROSECOND and NANOSECOND: 64 bits
-pub struct Time<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Time<'a> {
-    type Inner = Time<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Time<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Time { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TimeArgs,
-    ) -> flatbuffers::WIPOffset<Time<'bldr>> {
-        let mut builder = TimeBuilder::new(_fbb);
-        builder.add_bitWidth(args.bitWidth);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-    pub const VT_BITWIDTH: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn unit(&self) -> TimeUnit {
-        self._tab
-            .get::<TimeUnit>(Time::VT_UNIT, Some(TimeUnit::MILLISECOND))
-            .unwrap()
-    }
-    #[inline]
-    pub fn bitWidth(&self) -> i32 {
-        self._tab.get::<i32>(Time::VT_BITWIDTH, Some(32)).unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Time<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<TimeUnit>(&"unit", Self::VT_UNIT, false)?
-            .visit_field::<i32>(&"bitWidth", Self::VT_BITWIDTH, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct TimeArgs {
-    pub unit: TimeUnit,
-    pub bitWidth: i32,
-}
-impl<'a> Default for TimeArgs {
-    #[inline]
-    fn default() -> Self {
-        TimeArgs {
-            unit: TimeUnit::MILLISECOND,
-            bitWidth: 32,
-        }
-    }
-}
-pub struct TimeBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TimeBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: TimeUnit) {
-        self.fbb_
-            .push_slot::<TimeUnit>(Time::VT_UNIT, unit, TimeUnit::MILLISECOND);
-    }
-    #[inline]
-    pub fn add_bitWidth(&mut self, bitWidth: i32) {
-        self.fbb_.push_slot::<i32>(Time::VT_BITWIDTH, bitWidth, 32);
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimeBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TimeBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Time<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Time<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Time");
-        ds.field("unit", &self.unit());
-        ds.field("bitWidth", &self.bitWidth());
-        ds.finish()
-    }
-}
-pub enum TimestampOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
-/// leap seconds, as a 64-bit integer. Note that UNIX time does not include
-/// leap seconds.
-///
-/// The Timestamp metadata supports both "time zone naive" and "time zone
-/// aware" timestamps. Read about the timezone attribute for more detail
-pub struct Timestamp<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Timestamp<'a> {
-    type Inner = Timestamp<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Timestamp<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Timestamp { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TimestampArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Timestamp<'bldr>> {
-        let mut builder = TimestampBuilder::new(_fbb);
-        if let Some(x) = args.timezone {
-            builder.add_timezone(x);
-        }
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-    pub const VT_TIMEZONE: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn unit(&self) -> TimeUnit {
-        self._tab
-            .get::<TimeUnit>(Timestamp::VT_UNIT, Some(TimeUnit::SECOND))
-            .unwrap()
-    }
-    /// The time zone is a string indicating the name of a time zone, one of:
-    ///
-    /// * As used in the Olson time zone database (the "tz database" or
-    ///   "tzdata"), such as "America/New_York"
-    /// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-    ///
-    /// Whether a timezone string is present indicates different semantics about
-    /// the data:
-    ///
-    /// * If the time zone is null or equal to an empty string, the data is "time
-    ///   zone naive" and shall be displayed *as is* to the user, not localized
-    ///   to the locale of the user. This data can be though of as UTC but
-    ///   without having "UTC" as the time zone, it is not considered to be
-    ///   localized to any time zone
-    ///
-    /// * If the time zone is set to a valid value, values can be displayed as
-    ///   "localized" to that time zone, even though the underlying 64-bit
-    ///   integers are identical to the same data stored in UTC. Converting
-    ///   between time zones is a metadata-only operation and does not change the
-    ///   underlying values
-    #[inline]
-    pub fn timezone(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(Timestamp::VT_TIMEZONE, None)
-    }
-}
-
-impl flatbuffers::Verifiable for Timestamp<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<TimeUnit>(&"unit", Self::VT_UNIT, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"timezone",
-                Self::VT_TIMEZONE,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct TimestampArgs<'a> {
-    pub unit: TimeUnit,
-    pub timezone: Option<flatbuffers::WIPOffset<&'a str>>,
-}
-impl<'a> Default for TimestampArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        TimestampArgs {
-            unit: TimeUnit::SECOND,
-            timezone: None,
-        }
-    }
-}
-pub struct TimestampBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: TimeUnit) {
-        self.fbb_
-            .push_slot::<TimeUnit>(Timestamp::VT_UNIT, unit, TimeUnit::SECOND);
-    }
-    #[inline]
-    pub fn add_timezone(&mut self, timezone: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Timestamp::VT_TIMEZONE,
-            timezone,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TimestampBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TimestampBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Timestamp<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Timestamp<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Timestamp");
-        ds.field("unit", &self.unit());
-        ds.field("timezone", &self.timezone());
-        ds.finish()
-    }
-}
-pub enum IntervalOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Interval<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Interval<'a> {
-    type Inner = Interval<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Interval<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Interval { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args IntervalArgs,
-    ) -> flatbuffers::WIPOffset<Interval<'bldr>> {
-        let mut builder = IntervalBuilder::new(_fbb);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn unit(&self) -> IntervalUnit {
-        self._tab
-            .get::<IntervalUnit>(Interval::VT_UNIT, Some(IntervalUnit::YEAR_MONTH))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Interval<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<IntervalUnit>(&"unit", Self::VT_UNIT, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct IntervalArgs {
-    pub unit: IntervalUnit,
-}
-impl<'a> Default for IntervalArgs {
-    #[inline]
-    fn default() -> Self {
-        IntervalArgs {
-            unit: IntervalUnit::YEAR_MONTH,
-        }
-    }
-}
-pub struct IntervalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> IntervalBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: IntervalUnit) {
-        self.fbb_.push_slot::<IntervalUnit>(
-            Interval::VT_UNIT,
-            unit,
-            IntervalUnit::YEAR_MONTH,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> IntervalBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        IntervalBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Interval<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Interval<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Interval");
-        ds.field("unit", &self.unit());
-        ds.finish()
-    }
-}
-pub enum DurationOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Duration<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Duration<'a> {
-    type Inner = Duration<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Duration<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Duration { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DurationArgs,
-    ) -> flatbuffers::WIPOffset<Duration<'bldr>> {
-        let mut builder = DurationBuilder::new(_fbb);
-        builder.add_unit(args.unit);
-        builder.finish()
-    }
-
-    pub const VT_UNIT: flatbuffers::VOffsetT = 4;
-
-    #[inline]
-    pub fn unit(&self) -> TimeUnit {
-        self._tab
-            .get::<TimeUnit>(Duration::VT_UNIT, Some(TimeUnit::MILLISECOND))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for Duration<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<TimeUnit>(&"unit", Self::VT_UNIT, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DurationArgs {
-    pub unit: TimeUnit,
-}
-impl<'a> Default for DurationArgs {
-    #[inline]
-    fn default() -> Self {
-        DurationArgs {
-            unit: TimeUnit::MILLISECOND,
-        }
-    }
-}
-pub struct DurationBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DurationBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_unit(&mut self, unit: TimeUnit) {
-        self.fbb_
-            .push_slot::<TimeUnit>(Duration::VT_UNIT, unit, TimeUnit::MILLISECOND);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DurationBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DurationBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Duration<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Duration<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Duration");
-        ds.field("unit", &self.unit());
-        ds.finish()
-    }
-}
-pub enum KeyValueOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// user defined key value pairs to add custom metadata to arrow
-/// key namespacing is the responsibility of the user
-pub struct KeyValue<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for KeyValue<'a> {
-    type Inner = KeyValue<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> KeyValue<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        KeyValue { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args KeyValueArgs<'args>,
-    ) -> flatbuffers::WIPOffset<KeyValue<'bldr>> {
-        let mut builder = KeyValueBuilder::new(_fbb);
-        if let Some(x) = args.value {
-            builder.add_value(x);
-        }
-        if let Some(x) = args.key {
-            builder.add_key(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_KEY: flatbuffers::VOffsetT = 4;
-    pub const VT_VALUE: flatbuffers::VOffsetT = 6;
-
-    #[inline]
-    pub fn key(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(KeyValue::VT_KEY, None)
-    }
-    #[inline]
-    pub fn value(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(KeyValue::VT_VALUE, None)
-    }
-}
-
-impl flatbuffers::Verifiable for KeyValue<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"key",
-                Self::VT_KEY,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"value",
-                Self::VT_VALUE,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct KeyValueArgs<'a> {
-    pub key: Option<flatbuffers::WIPOffset<&'a str>>,
-    pub value: Option<flatbuffers::WIPOffset<&'a str>>,
-}
-impl<'a> Default for KeyValueArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        KeyValueArgs {
-            key: None,
-            value: None,
-        }
-    }
-}
-pub struct KeyValueBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_key(&mut self, key: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_KEY, key);
-    }
-    #[inline]
-    pub fn add_value(&mut self, value: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_VALUE, value);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> KeyValueBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        KeyValueBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<KeyValue<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for KeyValue<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("KeyValue");
-        ds.field("key", &self.key());
-        ds.field("value", &self.value());
-        ds.finish()
-    }
-}
-pub enum DictionaryEncodingOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct DictionaryEncoding<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for DictionaryEncoding<'a> {
-    type Inner = DictionaryEncoding<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> DictionaryEncoding<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        DictionaryEncoding { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args DictionaryEncodingArgs<'args>,
-    ) -> flatbuffers::WIPOffset<DictionaryEncoding<'bldr>> {
-        let mut builder = DictionaryEncodingBuilder::new(_fbb);
-        builder.add_id(args.id);
-        if let Some(x) = args.indexType {
-            builder.add_indexType(x);
-        }
-        builder.add_dictionaryKind(args.dictionaryKind);
-        builder.add_isOrdered(args.isOrdered);
-        builder.finish()
-    }
-
-    pub const VT_ID: flatbuffers::VOffsetT = 4;
-    pub const VT_INDEXTYPE: flatbuffers::VOffsetT = 6;
-    pub const VT_ISORDERED: flatbuffers::VOffsetT = 8;
-    pub const VT_DICTIONARYKIND: flatbuffers::VOffsetT = 10;
-
-    /// The known dictionary id in the application where this data is used. In
-    /// the file or streaming formats, the dictionary ids are found in the
-    /// DictionaryBatch messages
-    #[inline]
-    pub fn id(&self) -> i64 {
-        self._tab
-            .get::<i64>(DictionaryEncoding::VT_ID, Some(0))
-            .unwrap()
-    }
-    /// The dictionary indices are constrained to be non-negative integers. If
-    /// this field is null, the indices must be signed int32. To maximize
-    /// cross-language compatibility and performance, implementations are
-    /// recommended to prefer signed integer types over unsigned integer types
-    /// and to avoid uint64 indices unless they are required by an application.
-    #[inline]
-    pub fn indexType(&self) -> Option<Int<'a>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<Int>>(
-            DictionaryEncoding::VT_INDEXTYPE,
-            None,
-        )
-    }
-    /// By default, dictionaries are not ordered, or the order does not have
-    /// semantic meaning. In some statistical, applications, dictionary-encoding
-    /// is used to represent ordered categorical data, and we provide a way to
-    /// preserve that metadata here
-    #[inline]
-    pub fn isOrdered(&self) -> bool {
-        self._tab
-            .get::<bool>(DictionaryEncoding::VT_ISORDERED, Some(false))
-            .unwrap()
-    }
-    #[inline]
-    pub fn dictionaryKind(&self) -> DictionaryKind {
-        self._tab
-            .get::<DictionaryKind>(
-                DictionaryEncoding::VT_DICTIONARYKIND,
-                Some(DictionaryKind::DenseArray),
-            )
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for DictionaryEncoding<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i64>(&"id", Self::VT_ID, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indexType",
-                Self::VT_INDEXTYPE,
-                false,
-            )?
-            .visit_field::<bool>(&"isOrdered", Self::VT_ISORDERED, false)?
-            .visit_field::<DictionaryKind>(
-                &"dictionaryKind",
-                Self::VT_DICTIONARYKIND,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct DictionaryEncodingArgs<'a> {
-    pub id: i64,
-    pub indexType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub isOrdered: bool,
-    pub dictionaryKind: DictionaryKind,
-}
-impl<'a> Default for DictionaryEncodingArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        DictionaryEncodingArgs {
-            id: 0,
-            indexType: None,
-            isOrdered: false,
-            dictionaryKind: DictionaryKind::DenseArray,
-        }
-    }
-}
-pub struct DictionaryEncodingBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> DictionaryEncodingBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_id(&mut self, id: i64) {
-        self.fbb_.push_slot::<i64>(DictionaryEncoding::VT_ID, id, 0);
-    }
-    #[inline]
-    pub fn add_indexType(&mut self, indexType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            DictionaryEncoding::VT_INDEXTYPE,
-            indexType,
-        );
-    }
-    #[inline]
-    pub fn add_isOrdered(&mut self, isOrdered: bool) {
-        self.fbb_
-            .push_slot::<bool>(DictionaryEncoding::VT_ISORDERED, isOrdered, false);
-    }
-    #[inline]
-    pub fn add_dictionaryKind(&mut self, dictionaryKind: DictionaryKind) {
-        self.fbb_.push_slot::<DictionaryKind>(
-            DictionaryEncoding::VT_DICTIONARYKIND,
-            dictionaryKind,
-            DictionaryKind::DenseArray,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryEncodingBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        DictionaryEncodingBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<DictionaryEncoding<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for DictionaryEncoding<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("DictionaryEncoding");
-        ds.field("id", &self.id());
-        ds.field("indexType", &self.indexType());
-        ds.field("isOrdered", &self.isOrdered());
-        ds.field("dictionaryKind", &self.dictionaryKind());
-        ds.finish()
-    }
-}
-pub enum FieldOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// A field represents a named column in a record / row batch or child of a
-/// nested type.
-pub struct Field<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Field<'a> {
-    type Inner = Field<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Field<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Field { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args FieldArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Field<'bldr>> {
-        let mut builder = FieldBuilder::new(_fbb);
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.children {
-            builder.add_children(x);
-        }
-        if let Some(x) = args.dictionary {
-            builder.add_dictionary(x);
-        }
-        if let Some(x) = args.type_ {
-            builder.add_type_(x);
-        }
-        if let Some(x) = args.name {
-            builder.add_name(x);
-        }
-        builder.add_type_type(args.type_type);
-        builder.add_nullable(args.nullable);
-        builder.finish()
-    }
-
-    pub const VT_NAME: flatbuffers::VOffsetT = 4;
-    pub const VT_NULLABLE: flatbuffers::VOffsetT = 6;
-    pub const VT_TYPE_TYPE: flatbuffers::VOffsetT = 8;
-    pub const VT_TYPE_: flatbuffers::VOffsetT = 10;
-    pub const VT_DICTIONARY: flatbuffers::VOffsetT = 12;
-    pub const VT_CHILDREN: flatbuffers::VOffsetT = 14;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 16;
-
-    /// Name is not required, in i.e. a List
-    #[inline]
-    pub fn name(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(Field::VT_NAME, None)
-    }
-    /// Whether or not this field can contain nulls. Should be true in general.
-    #[inline]
-    pub fn nullable(&self) -> bool {
-        self._tab
-            .get::<bool>(Field::VT_NULLABLE, Some(false))
-            .unwrap()
-    }
-    #[inline]
-    pub fn type_type(&self) -> Type {
-        self._tab
-            .get::<Type>(Field::VT_TYPE_TYPE, Some(Type::NONE))
-            .unwrap()
-    }
-    /// This is the type of the decoded value if the field is dictionary encoded.
-    #[inline]
-    pub fn type_(&self) -> Option<flatbuffers::Table<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                Field::VT_TYPE_,
-                None,
-            )
-    }
-    /// Present only if the field is dictionary encoded.
-    #[inline]
-    pub fn dictionary(&self) -> Option<DictionaryEncoding<'a>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(
-                Field::VT_DICTIONARY,
-                None,
-            )
-    }
-    /// children apply only to nested data types like Struct, List and Union. For
-    /// primitive types children will have length 0.
-    #[inline]
-    pub fn children(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field>>,
-        >>(Field::VT_CHILDREN, None)
-    }
-    /// User-defined metadata
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Field::VT_CUSTOM_METADATA, None)
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_null(&self) -> Option<Null<'a>> {
-        if self.type_type() == Type::Null {
-            self.type_().map(Null::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_int(&self) -> Option<Int<'a>> {
-        if self.type_type() == Type::Int {
-            self.type_().map(Int::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_floating_point(&self) -> Option<FloatingPoint<'a>> {
-        if self.type_type() == Type::FloatingPoint {
-            self.type_().map(FloatingPoint::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_binary(&self) -> Option<Binary<'a>> {
-        if self.type_type() == Type::Binary {
-            self.type_().map(Binary::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_utf_8(&self) -> Option<Utf8<'a>> {
-        if self.type_type() == Type::Utf8 {
-            self.type_().map(Utf8::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_bool(&self) -> Option<Bool<'a>> {
-        if self.type_type() == Type::Bool {
-            self.type_().map(Bool::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_decimal(&self) -> Option<Decimal<'a>> {
-        if self.type_type() == Type::Decimal {
-            self.type_().map(Decimal::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_date(&self) -> Option<Date<'a>> {
-        if self.type_type() == Type::Date {
-            self.type_().map(Date::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_time(&self) -> Option<Time<'a>> {
-        if self.type_type() == Type::Time {
-            self.type_().map(Time::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_timestamp(&self) -> Option<Timestamp<'a>> {
-        if self.type_type() == Type::Timestamp {
-            self.type_().map(Timestamp::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_interval(&self) -> Option<Interval<'a>> {
-        if self.type_type() == Type::Interval {
-            self.type_().map(Interval::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_list(&self) -> Option<List<'a>> {
-        if self.type_type() == Type::List {
-            self.type_().map(List::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_struct_(&self) -> Option<Struct_<'a>> {
-        if self.type_type() == Type::Struct_ {
-            self.type_().map(Struct_::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_union(&self) -> Option<Union<'a>> {
-        if self.type_type() == Type::Union {
-            self.type_().map(Union::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_binary(&self) -> Option<FixedSizeBinary<'a>> {
-        if self.type_type() == Type::FixedSizeBinary {
-            self.type_().map(FixedSizeBinary::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_list(&self) -> Option<FixedSizeList<'a>> {
-        if self.type_type() == Type::FixedSizeList {
-            self.type_().map(FixedSizeList::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_map(&self) -> Option<Map<'a>> {
-        if self.type_type() == Type::Map {
-            self.type_().map(Map::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_duration(&self) -> Option<Duration<'a>> {
-        if self.type_type() == Type::Duration {
-            self.type_().map(Duration::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_binary(&self) -> Option<LargeBinary<'a>> {
-        if self.type_type() == Type::LargeBinary {
-            self.type_().map(LargeBinary::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_utf_8(&self) -> Option<LargeUtf8<'a>> {
-        if self.type_type() == Type::LargeUtf8 {
-            self.type_().map(LargeUtf8::init_from_table)
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_list(&self) -> Option<LargeList<'a>> {
-        if self.type_type() == Type::LargeList {
-            self.type_().map(LargeList::init_from_table)
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for Field<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<flatbuffers::ForwardsUOffset<&str>>(&"name", Self::VT_NAME, false)?
-     .visit_field::<bool>(&"nullable", Self::VT_NULLABLE, false)?
-     .visit_union::<Type, _>(&"type_type", Self::VT_TYPE_TYPE, &"type_", Self::VT_TYPE_, false, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(&"dictionary", Self::VT_DICTIONARY, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>>>(&"children", Self::VT_CHILDREN, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct FieldArgs<'a> {
-    pub name: Option<flatbuffers::WIPOffset<&'a str>>,
-    pub nullable: bool,
-    pub type_type: Type,
-    pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub dictionary: Option<flatbuffers::WIPOffset<DictionaryEncoding<'a>>>,
-    pub children: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>,
-        >,
-    >,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-}
-impl<'a> Default for FieldArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        FieldArgs {
-            name: None,
-            nullable: false,
-            type_type: Type::NONE,
-            type_: None,
-            dictionary: None,
-            children: None,
-            custom_metadata: None,
-        }
-    }
-}
-pub struct FieldBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_NAME, name);
-    }
-    #[inline]
-    pub fn add_nullable(&mut self, nullable: bool) {
-        self.fbb_
-            .push_slot::<bool>(Field::VT_NULLABLE, nullable, false);
-    }
-    #[inline]
-    pub fn add_type_type(&mut self, type_type: Type) {
-        self.fbb_
-            .push_slot::<Type>(Field::VT_TYPE_TYPE, type_type, Type::NONE);
-    }
-    #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_TYPE_, type_);
-    }
-    #[inline]
-    pub fn add_dictionary(
-        &mut self,
-        dictionary: flatbuffers::WIPOffset<DictionaryEncoding<'b>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<DictionaryEncoding>>(
-                Field::VT_DICTIONARY,
-                dictionary,
-            );
-    }
-    #[inline]
-    pub fn add_children(
-        &mut self,
-        children: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Field<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_CHILDREN, children);
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Field::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FieldBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        FieldBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Field<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Field<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Field");
-        ds.field("name", &self.name());
-        ds.field("nullable", &self.nullable());
-        ds.field("type_type", &self.type_type());
-        match self.type_type() {
-            Type::Null => {
-                if let Some(x) = self.type_as_null() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Int => {
-                if let Some(x) = self.type_as_int() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FloatingPoint => {
-                if let Some(x) = self.type_as_floating_point() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Binary => {
-                if let Some(x) = self.type_as_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Utf8 => {
-                if let Some(x) = self.type_as_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Bool => {
-                if let Some(x) = self.type_as_bool() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Decimal => {
-                if let Some(x) = self.type_as_decimal() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Date => {
-                if let Some(x) = self.type_as_date() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Time => {
-                if let Some(x) = self.type_as_time() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Timestamp => {
-                if let Some(x) = self.type_as_timestamp() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Interval => {
-                if let Some(x) = self.type_as_interval() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::List => {
-                if let Some(x) = self.type_as_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Struct_ => {
-                if let Some(x) = self.type_as_struct_() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Union => {
-                if let Some(x) = self.type_as_union() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeBinary => {
-                if let Some(x) = self.type_as_fixed_size_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeList => {
-                if let Some(x) = self.type_as_fixed_size_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Map => {
-                if let Some(x) = self.type_as_map() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Duration => {
-                if let Some(x) = self.type_as_duration() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeBinary => {
-                if let Some(x) = self.type_as_large_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeUtf8 => {
-                if let Some(x) = self.type_as_large_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeList => {
-                if let Some(x) = self.type_as_large_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("type_", &x)
-            }
-        };
-        ds.field("dictionary", &self.dictionary());
-        ds.field("children", &self.children());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.finish()
-    }
-}
-pub enum SchemaOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// A Schema describes the columns in a row batch
-pub struct Schema<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Schema<'a> {
-    type Inner = Schema<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Schema<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Schema { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SchemaArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Schema<'bldr>> {
-        let mut builder = SchemaBuilder::new(_fbb);
-        if let Some(x) = args.features {
-            builder.add_features(x);
-        }
-        if let Some(x) = args.custom_metadata {
-            builder.add_custom_metadata(x);
-        }
-        if let Some(x) = args.fields {
-            builder.add_fields(x);
-        }
-        builder.add_endianness(args.endianness);
-        builder.finish()
-    }
-
-    pub const VT_ENDIANNESS: flatbuffers::VOffsetT = 4;
-    pub const VT_FIELDS: flatbuffers::VOffsetT = 6;
-    pub const VT_CUSTOM_METADATA: flatbuffers::VOffsetT = 8;
-    pub const VT_FEATURES: flatbuffers::VOffsetT = 10;
-
-    /// endianness of the buffer
-    /// it is Little Endian by default
-    /// if endianness doesn't match the underlying system then the vectors need to be converted
-    #[inline]
-    pub fn endianness(&self) -> Endianness {
-        self._tab
-            .get::<Endianness>(Schema::VT_ENDIANNESS, Some(Endianness::Little))
-            .unwrap()
-    }
-    #[inline]
-    pub fn fields(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field>>,
-        >>(Schema::VT_FIELDS, None)
-    }
-    #[inline]
-    pub fn custom_metadata(
-        &self,
-    ) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>> {
-        self._tab.get::<flatbuffers::ForwardsUOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue>>,
-        >>(Schema::VT_CUSTOM_METADATA, None)
-    }
-    /// Features used in the stream/file.
-    #[inline]
-    pub fn features(&self) -> Option<flatbuffers::Vector<'a, Feature>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Feature>>>(
-                Schema::VT_FEATURES,
-                None,
-            )
-    }
-}
-
-impl flatbuffers::Verifiable for Schema<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_field::<Endianness>(&"endianness", Self::VT_ENDIANNESS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>>>(&"fields", Self::VT_FIELDS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>(&"custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Feature>>>(&"features", Self::VT_FEATURES, false)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct SchemaArgs<'a> {
-    pub endianness: Endianness,
-    pub fields: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>,
-        >,
-    >,
-    pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
-    >,
-    pub features: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Feature>>>,
-}
-impl<'a> Default for SchemaArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SchemaArgs {
-            endianness: Endianness::Little,
-            fields: None,
-            custom_metadata: None,
-            features: None,
-        }
-    }
-}
-pub struct SchemaBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_endianness(&mut self, endianness: Endianness) {
-        self.fbb_.push_slot::<Endianness>(
-            Schema::VT_ENDIANNESS,
-            endianness,
-            Endianness::Little,
-        );
-    }
-    #[inline]
-    pub fn add_fields(
-        &mut self,
-        fields: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<Field<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FIELDS, fields);
-    }
-    #[inline]
-    pub fn add_custom_metadata(
-        &mut self,
-        custom_metadata: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<KeyValue<'b>>>,
-        >,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Schema::VT_CUSTOM_METADATA,
-            custom_metadata,
-        );
-    }
-    #[inline]
-    pub fn add_features(
-        &mut self,
-        features: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Feature>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FEATURES, features);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SchemaBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SchemaBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Schema<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Schema<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Schema");
-        ds.field("endianness", &self.endianness());
-        ds.field("fields", &self.fields());
-        ds.field("custom_metadata", &self.custom_metadata());
-        ds.field("features", &self.features());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_schema<'a>(buf: &'a [u8]) -> Schema<'a> {
-    unsafe { flatbuffers::root_unchecked::<Schema<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_schema<'a>(buf: &'a [u8]) -> Schema<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Schema<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Schema`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_schema_unchecked`.
-pub fn root_as_schema(buf: &[u8]) -> Result<Schema, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Schema>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Schema` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_schema_unchecked`.
-pub fn size_prefixed_root_as_schema(
-    buf: &[u8],
-) -> Result<Schema, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Schema>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Schema` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_schema_unchecked`.
-pub fn root_as_schema_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Schema<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Schema<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Schema` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_schema_unchecked`.
-pub fn size_prefixed_root_as_schema_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Schema<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Schema<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Schema and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Schema`.
-pub unsafe fn root_as_schema_unchecked(buf: &[u8]) -> Schema {
-    flatbuffers::root_unchecked::<Schema>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Schema and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Schema`.
-pub unsafe fn size_prefixed_root_as_schema_unchecked(buf: &[u8]) -> Schema {
-    flatbuffers::size_prefixed_root_unchecked::<Schema>(buf)
-}
-#[inline]
-pub fn finish_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Schema<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Schema<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
deleted file mode 100644
index 5d12d4e3627..00000000000
--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
+++ /dev/null
@@ -1,1902 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use crate::ipc::gen::Tensor::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_SPARSE_MATRIX_COMPRESSED_AXIS: i16 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_SPARSE_MATRIX_COMPRESSED_AXIS: i16 = 1;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_SPARSE_MATRIX_COMPRESSED_AXIS: [SparseMatrixCompressedAxis; 2] = [
-    SparseMatrixCompressedAxis::Row,
-    SparseMatrixCompressedAxis::Column,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct SparseMatrixCompressedAxis(pub i16);
-#[allow(non_upper_case_globals)]
-impl SparseMatrixCompressedAxis {
-    pub const Row: Self = Self(0);
-    pub const Column: Self = Self(1);
-
-    pub const ENUM_MIN: i16 = 0;
-    pub const ENUM_MAX: i16 = 1;
-    pub const ENUM_VALUES: &'static [Self] = &[Self::Row, Self::Column];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::Row => Some("Row"),
-            Self::Column => Some("Column"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for SparseMatrixCompressedAxis {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-impl<'a> flatbuffers::Follow<'a> for SparseMatrixCompressedAxis {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for SparseMatrixCompressedAxis {
-    type Output = SparseMatrixCompressedAxis;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for SparseMatrixCompressedAxis {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = i16::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = i16::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for SparseMatrixCompressedAxis {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        i16::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for SparseMatrixCompressedAxis {}
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MIN_SPARSE_TENSOR_INDEX: u8 = 0;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-pub const ENUM_MAX_SPARSE_TENSOR_INDEX: u8 = 3;
-#[deprecated(
-    since = "2.0.0",
-    note = "Use associated constants instead. This will no longer be generated in 2021."
-)]
-#[allow(non_camel_case_types)]
-pub const ENUM_VALUES_SPARSE_TENSOR_INDEX: [SparseTensorIndex; 4] = [
-    SparseTensorIndex::NONE,
-    SparseTensorIndex::SparseTensorIndexCOO,
-    SparseTensorIndex::SparseMatrixIndexCSX,
-    SparseTensorIndex::SparseTensorIndexCSF,
-];
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(transparent)]
-pub struct SparseTensorIndex(pub u8);
-#[allow(non_upper_case_globals)]
-impl SparseTensorIndex {
-    pub const NONE: Self = Self(0);
-    pub const SparseTensorIndexCOO: Self = Self(1);
-    pub const SparseMatrixIndexCSX: Self = Self(2);
-    pub const SparseTensorIndexCSF: Self = Self(3);
-
-    pub const ENUM_MIN: u8 = 0;
-    pub const ENUM_MAX: u8 = 3;
-    pub const ENUM_VALUES: &'static [Self] = &[
-        Self::NONE,
-        Self::SparseTensorIndexCOO,
-        Self::SparseMatrixIndexCSX,
-        Self::SparseTensorIndexCSF,
-    ];
-    /// Returns the variant's name or "" if unknown.
-    pub fn variant_name(self) -> Option<&'static str> {
-        match self {
-            Self::NONE => Some("NONE"),
-            Self::SparseTensorIndexCOO => Some("SparseTensorIndexCOO"),
-            Self::SparseMatrixIndexCSX => Some("SparseMatrixIndexCSX"),
-            Self::SparseTensorIndexCSF => Some("SparseTensorIndexCSF"),
-            _ => None,
-        }
-    }
-}
-impl std::fmt::Debug for SparseTensorIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if let Some(name) = self.variant_name() {
-            f.write_str(name)
-        } else {
-            f.write_fmt(format_args!("<UNKNOWN {:?}>", self.0))
-        }
-    }
-}
-pub struct SparseTensorIndexUnionTableOffset {}
-impl<'a> flatbuffers::Follow<'a> for SparseTensorIndex {
-    type Inner = Self;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
-        Self(b)
-    }
-}
-
-impl flatbuffers::Push for SparseTensorIndex {
-    type Output = SparseTensorIndex;
-    #[inline]
-    fn push(&self, dst: &mut [u8], _rest: &[u8]) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
-    }
-}
-
-impl flatbuffers::EndianScalar for SparseTensorIndex {
-    #[inline]
-    fn to_little_endian(self) -> Self {
-        let b = u8::to_le(self.0);
-        Self(b)
-    }
-    #[inline]
-    fn from_little_endian(self) -> Self {
-        let b = u8::from_le(self.0);
-        Self(b)
-    }
-}
-
-impl<'a> flatbuffers::Verifiable for SparseTensorIndex {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        u8::run_verifier(v, pos)
-    }
-}
-
-impl flatbuffers::SimpleToVerifyInSlice for SparseTensorIndex {}
-pub enum SparseTensorIndexCOOOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// EXPERIMENTAL: Data structures for sparse tensors
-/// Coordinate (COO) format of sparse tensor index.
-///
-/// COO's index list are represented as a NxM matrix,
-/// where N is the number of non-zero values,
-/// and M is the number of dimensions of a sparse tensor.
-///
-/// indicesBuffer stores the location and size of the data of this indices
-/// matrix.  The value type and the stride of the indices matrix is
-/// specified in indicesType and indicesStrides fields.
-///
-/// For example, let X be a 2x3x4x5 tensor, and it has the following
-/// 6 non-zero values:
-/// ```text
-///   X[0, 1, 2, 0] := 1
-///   X[1, 1, 2, 3] := 2
-///   X[0, 2, 1, 0] := 3
-///   X[0, 1, 3, 0] := 4
-///   X[0, 1, 2, 1] := 5
-///   X[1, 2, 0, 4] := 6
-/// ```
-/// In COO format, the index matrix of X is the following 4x6 matrix:
-/// ```text
-///   [[0, 0, 0, 0, 1, 1],
-///    [1, 1, 1, 2, 1, 2],
-///    [2, 2, 3, 1, 2, 0],
-///    [0, 1, 0, 0, 3, 4]]
-/// ```
-/// When isCanonical is true, the indices is sorted in lexicographical order
-/// (row-major order), and it does not have duplicated entries.  Otherwise,
-/// the indices may not be sorted, or may have duplicated entries.
-pub struct SparseTensorIndexCOO<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseTensorIndexCOO<'a> {
-    type Inner = SparseTensorIndexCOO<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseTensorIndexCOO<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseTensorIndexCOO { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseTensorIndexCOOArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseTensorIndexCOO<'bldr>> {
-        let mut builder = SparseTensorIndexCOOBuilder::new(_fbb);
-        if let Some(x) = args.indicesBuffer {
-            builder.add_indicesBuffer(x);
-        }
-        if let Some(x) = args.indicesStrides {
-            builder.add_indicesStrides(x);
-        }
-        if let Some(x) = args.indicesType {
-            builder.add_indicesType(x);
-        }
-        builder.add_isCanonical(args.isCanonical);
-        builder.finish()
-    }
-
-    pub const VT_INDICESTYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_INDICESSTRIDES: flatbuffers::VOffsetT = 6;
-    pub const VT_INDICESBUFFER: flatbuffers::VOffsetT = 8;
-    pub const VT_ISCANONICAL: flatbuffers::VOffsetT = 10;
-
-    /// The type of values in indicesBuffer
-    #[inline]
-    pub fn indicesType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseTensorIndexCOO::VT_INDICESTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// Non-negative byte offsets to advance one value cell along each dimension
-    /// If omitted, default to row-major order (C-like).
-    #[inline]
-    pub fn indicesStrides(&self) -> Option<flatbuffers::Vector<'a, i64>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i64>>>(
-                SparseTensorIndexCOO::VT_INDICESSTRIDES,
-                None,
-            )
-    }
-    /// The location and size of the indices matrix's data
-    #[inline]
-    pub fn indicesBuffer(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseTensorIndexCOO::VT_INDICESBUFFER, None)
-            .unwrap()
-    }
-    /// This flag is true if and only if the indices matrix is sorted in
-    /// row-major order, and does not have duplicated entries.
-    /// This sort order is the same as of Tensorflow's SparseTensor,
-    /// but it is inverse order of SciPy's canonical coo_matrix
-    /// (SciPy employs column-major order for its coo_matrix).
-    #[inline]
-    pub fn isCanonical(&self) -> bool {
-        self._tab
-            .get::<bool>(SparseTensorIndexCOO::VT_ISCANONICAL, Some(false))
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for SparseTensorIndexCOO<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indicesType",
-                Self::VT_INDICESTYPE,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(
-                &"indicesStrides",
-                Self::VT_INDICESSTRIDES,
-                false,
-            )?
-            .visit_field::<Buffer>(&"indicesBuffer", Self::VT_INDICESBUFFER, true)?
-            .visit_field::<bool>(&"isCanonical", Self::VT_ISCANONICAL, false)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct SparseTensorIndexCOOArgs<'a> {
-    pub indicesType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indicesStrides: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i64>>>,
-    pub indicesBuffer: Option<&'a Buffer>,
-    pub isCanonical: bool,
-}
-impl<'a> Default for SparseTensorIndexCOOArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseTensorIndexCOOArgs {
-            indicesType: None, // required field
-            indicesStrides: None,
-            indicesBuffer: None, // required field
-            isCanonical: false,
-        }
-    }
-}
-pub struct SparseTensorIndexCOOBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseTensorIndexCOO::VT_INDICESTYPE,
-            indicesType,
-        );
-    }
-    #[inline]
-    pub fn add_indicesStrides(
-        &mut self,
-        indicesStrides: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCOO::VT_INDICESSTRIDES,
-            indicesStrides,
-        );
-    }
-    #[inline]
-    pub fn add_indicesBuffer(&mut self, indicesBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseTensorIndexCOO::VT_INDICESBUFFER,
-            indicesBuffer,
-        );
-    }
-    #[inline]
-    pub fn add_isCanonical(&mut self, isCanonical: bool) {
-        self.fbb_.push_slot::<bool>(
-            SparseTensorIndexCOO::VT_ISCANONICAL,
-            isCanonical,
-            false,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCOOBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseTensorIndexCOOBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseTensorIndexCOO<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_
-            .required(o, SparseTensorIndexCOO::VT_INDICESTYPE, "indicesType");
-        self.fbb_
-            .required(o, SparseTensorIndexCOO::VT_INDICESBUFFER, "indicesBuffer");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseTensorIndexCOO<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseTensorIndexCOO");
-        ds.field("indicesType", &self.indicesType());
-        ds.field("indicesStrides", &self.indicesStrides());
-        ds.field("indicesBuffer", &self.indicesBuffer());
-        ds.field("isCanonical", &self.isCanonical());
-        ds.finish()
-    }
-}
-pub enum SparseMatrixIndexCSXOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Compressed Sparse format, that is matrix-specific.
-pub struct SparseMatrixIndexCSX<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseMatrixIndexCSX<'a> {
-    type Inner = SparseMatrixIndexCSX<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseMatrixIndexCSX<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseMatrixIndexCSX { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseMatrixIndexCSXArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseMatrixIndexCSX<'bldr>> {
-        let mut builder = SparseMatrixIndexCSXBuilder::new(_fbb);
-        if let Some(x) = args.indicesBuffer {
-            builder.add_indicesBuffer(x);
-        }
-        if let Some(x) = args.indicesType {
-            builder.add_indicesType(x);
-        }
-        if let Some(x) = args.indptrBuffer {
-            builder.add_indptrBuffer(x);
-        }
-        if let Some(x) = args.indptrType {
-            builder.add_indptrType(x);
-        }
-        builder.add_compressedAxis(args.compressedAxis);
-        builder.finish()
-    }
-
-    pub const VT_COMPRESSEDAXIS: flatbuffers::VOffsetT = 4;
-    pub const VT_INDPTRTYPE: flatbuffers::VOffsetT = 6;
-    pub const VT_INDPTRBUFFER: flatbuffers::VOffsetT = 8;
-    pub const VT_INDICESTYPE: flatbuffers::VOffsetT = 10;
-    pub const VT_INDICESBUFFER: flatbuffers::VOffsetT = 12;
-
-    /// Which axis, row or column, is compressed
-    #[inline]
-    pub fn compressedAxis(&self) -> SparseMatrixCompressedAxis {
-        self._tab
-            .get::<SparseMatrixCompressedAxis>(
-                SparseMatrixIndexCSX::VT_COMPRESSEDAXIS,
-                Some(SparseMatrixCompressedAxis::Row),
-            )
-            .unwrap()
-    }
-    /// The type of values in indptrBuffer
-    #[inline]
-    pub fn indptrType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseMatrixIndexCSX::VT_INDPTRTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indptrBuffer stores the location and size of indptr array that
-    /// represents the range of the rows.
-    /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
-    /// The length of this array is 1 + (the number of rows), and the type
-    /// of index value is long.
-    ///
-    /// For example, let X be the following 6x4 matrix:
-    /// ```text
-    ///   X := [[0, 1, 2, 0],
-    ///         [0, 0, 3, 0],
-    ///         [0, 4, 0, 5],
-    ///         [0, 0, 0, 0],
-    ///         [6, 0, 7, 8],
-    ///         [0, 9, 0, 0]].
-    /// ```
-    /// The array of non-zero values in X is:
-    /// ```text
-    ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
-    /// ```
-    /// And the indptr of X is:
-    /// ```text
-    ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
-    /// ```
-    #[inline]
-    pub fn indptrBuffer(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseMatrixIndexCSX::VT_INDPTRBUFFER, None)
-            .unwrap()
-    }
-    /// The type of values in indicesBuffer
-    #[inline]
-    pub fn indicesType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseMatrixIndexCSX::VT_INDICESTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indicesBuffer stores the location and size of the array that
-    /// contains the column indices of the corresponding non-zero values.
-    /// The type of index value is long.
-    ///
-    /// For example, the indices of the above X is:
-    /// ```text
-    ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-    /// ```
-    /// Note that the indices are sorted in lexicographical order for each row.
-    #[inline]
-    pub fn indicesBuffer(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseMatrixIndexCSX::VT_INDICESBUFFER, None)
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for SparseMatrixIndexCSX<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<SparseMatrixCompressedAxis>(
-                &"compressedAxis",
-                Self::VT_COMPRESSEDAXIS,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indptrType",
-                Self::VT_INDPTRTYPE,
-                true,
-            )?
-            .visit_field::<Buffer>(&"indptrBuffer", Self::VT_INDPTRBUFFER, true)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indicesType",
-                Self::VT_INDICESTYPE,
-                true,
-            )?
-            .visit_field::<Buffer>(&"indicesBuffer", Self::VT_INDICESBUFFER, true)?
-            .finish();
-        Ok(())
-    }
-}
-pub struct SparseMatrixIndexCSXArgs<'a> {
-    pub compressedAxis: SparseMatrixCompressedAxis,
-    pub indptrType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indptrBuffer: Option<&'a Buffer>,
-    pub indicesType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indicesBuffer: Option<&'a Buffer>,
-}
-impl<'a> Default for SparseMatrixIndexCSXArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseMatrixIndexCSXArgs {
-            compressedAxis: SparseMatrixCompressedAxis::Row,
-            indptrType: None,    // required field
-            indptrBuffer: None,  // required field
-            indicesType: None,   // required field
-            indicesBuffer: None, // required field
-        }
-    }
-}
-pub struct SparseMatrixIndexCSXBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_compressedAxis(&mut self, compressedAxis: SparseMatrixCompressedAxis) {
-        self.fbb_.push_slot::<SparseMatrixCompressedAxis>(
-            SparseMatrixIndexCSX::VT_COMPRESSEDAXIS,
-            compressedAxis,
-            SparseMatrixCompressedAxis::Row,
-        );
-    }
-    #[inline]
-    pub fn add_indptrType(&mut self, indptrType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseMatrixIndexCSX::VT_INDPTRTYPE,
-            indptrType,
-        );
-    }
-    #[inline]
-    pub fn add_indptrBuffer(&mut self, indptrBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseMatrixIndexCSX::VT_INDPTRBUFFER,
-            indptrBuffer,
-        );
-    }
-    #[inline]
-    pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseMatrixIndexCSX::VT_INDICESTYPE,
-            indicesType,
-        );
-    }
-    #[inline]
-    pub fn add_indicesBuffer(&mut self, indicesBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseMatrixIndexCSX::VT_INDICESBUFFER,
-            indicesBuffer,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseMatrixIndexCSXBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseMatrixIndexCSXBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseMatrixIndexCSX<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDPTRTYPE, "indptrType");
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDPTRBUFFER, "indptrBuffer");
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDICESTYPE, "indicesType");
-        self.fbb_
-            .required(o, SparseMatrixIndexCSX::VT_INDICESBUFFER, "indicesBuffer");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseMatrixIndexCSX<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseMatrixIndexCSX");
-        ds.field("compressedAxis", &self.compressedAxis());
-        ds.field("indptrType", &self.indptrType());
-        ds.field("indptrBuffer", &self.indptrBuffer());
-        ds.field("indicesType", &self.indicesType());
-        ds.field("indicesBuffer", &self.indicesBuffer());
-        ds.finish()
-    }
-}
-pub enum SparseTensorIndexCSFOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// Compressed Sparse Fiber (CSF) sparse tensor index.
-pub struct SparseTensorIndexCSF<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseTensorIndexCSF<'a> {
-    type Inner = SparseTensorIndexCSF<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseTensorIndexCSF<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseTensorIndexCSF { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseTensorIndexCSFArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseTensorIndexCSF<'bldr>> {
-        let mut builder = SparseTensorIndexCSFBuilder::new(_fbb);
-        if let Some(x) = args.axisOrder {
-            builder.add_axisOrder(x);
-        }
-        if let Some(x) = args.indicesBuffers {
-            builder.add_indicesBuffers(x);
-        }
-        if let Some(x) = args.indicesType {
-            builder.add_indicesType(x);
-        }
-        if let Some(x) = args.indptrBuffers {
-            builder.add_indptrBuffers(x);
-        }
-        if let Some(x) = args.indptrType {
-            builder.add_indptrType(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_INDPTRTYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_INDPTRBUFFERS: flatbuffers::VOffsetT = 6;
-    pub const VT_INDICESTYPE: flatbuffers::VOffsetT = 8;
-    pub const VT_INDICESBUFFERS: flatbuffers::VOffsetT = 10;
-    pub const VT_AXISORDER: flatbuffers::VOffsetT = 12;
-
-    /// CSF is a generalization of compressed sparse row (CSR) index.
-    /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
-    ///
-    /// CSF index recursively compresses each dimension of a tensor into a set
-    /// of prefix trees. Each path from a root to leaf forms one tensor
-    /// non-zero index. CSF is implemented with two arrays of buffers and one
-    /// arrays of integers.
-    ///
-    /// For example, let X be a 2x3x4x5 tensor and let it have the following
-    /// 8 non-zero values:
-    /// ```text
-    ///   X[0, 0, 0, 1] := 1
-    ///   X[0, 0, 0, 2] := 2
-    ///   X[0, 1, 0, 0] := 3
-    ///   X[0, 1, 0, 2] := 4
-    ///   X[0, 1, 1, 0] := 5
-    ///   X[1, 1, 1, 0] := 6
-    ///   X[1, 1, 1, 1] := 7
-    ///   X[1, 1, 1, 2] := 8
-    /// ```
-    /// As a prefix tree this would be represented as:
-    /// ```text
-    ///         0          1
-    ///        / \         |
-    ///       0   1        1
-    ///      /   / \       |
-    ///     0   0   1      1
-    ///    /|  /|   |    /| |
-    ///   1 2 0 2   0   0 1 2
-    /// ```
-    /// The type of values in indptrBuffers
-    #[inline]
-    pub fn indptrType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseTensorIndexCSF::VT_INDPTRTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indptrBuffers stores the sparsity structure.
-    /// Each two consecutive dimensions in a tensor correspond to a buffer in
-    /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
-    /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
-    /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
-    ///
-    /// For example, the indptrBuffers for the above X is:
-    /// ```text
-    ///   indptrBuffer(X) = [
-    ///                       [0, 2, 3],
-    ///                       [0, 1, 3, 4],
-    ///                       [0, 2, 4, 5, 8]
-    ///                     ].
-    /// ```
-    #[inline]
-    pub fn indptrBuffers(&self) -> &'a [Buffer] {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Buffer>>>(
-                SparseTensorIndexCSF::VT_INDPTRBUFFERS,
-                None,
-            )
-            .map(|v| v.safe_slice())
-            .unwrap()
-    }
-    /// The type of values in indicesBuffers
-    #[inline]
-    pub fn indicesType(&self) -> Int<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<Int>>(
-                SparseTensorIndexCSF::VT_INDICESTYPE,
-                None,
-            )
-            .unwrap()
-    }
-    /// indicesBuffers stores values of nodes.
-    /// Each tensor dimension corresponds to a buffer in indicesBuffers.
-    /// For example, the indicesBuffers for the above X is:
-    /// ```text
-    ///   indicesBuffer(X) = [
-    ///                        [0, 1],
-    ///                        [0, 1, 1],
-    ///                        [0, 0, 1, 1],
-    ///                        [1, 2, 0, 2, 0, 0, 1, 2]
-    ///                      ].
-    /// ```
-    #[inline]
-    pub fn indicesBuffers(&self) -> &'a [Buffer] {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, Buffer>>>(
-                SparseTensorIndexCSF::VT_INDICESBUFFERS,
-                None,
-            )
-            .map(|v| v.safe_slice())
-            .unwrap()
-    }
-    /// axisOrder stores the sequence in which dimensions were traversed to
-    /// produce the prefix tree.
-    /// For example, the axisOrder for the above X is:
-    /// ```text
-    ///   axisOrder(X) = [0, 1, 2, 3].
-    /// ```
-    #[inline]
-    pub fn axisOrder(&self) -> flatbuffers::Vector<'a, i32> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i32>>>(
-                SparseTensorIndexCSF::VT_AXISORDER,
-                None,
-            )
-            .unwrap()
-    }
-}
-
-impl flatbuffers::Verifiable for SparseTensorIndexCSF<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indptrType",
-                Self::VT_INDPTRTYPE,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(
-                &"indptrBuffers",
-                Self::VT_INDPTRBUFFERS,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<Int>>(
-                &"indicesType",
-                Self::VT_INDICESTYPE,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(
-                &"indicesBuffers",
-                Self::VT_INDICESBUFFERS,
-                true,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i32>>>(
-                &"axisOrder",
-                Self::VT_AXISORDER,
-                true,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct SparseTensorIndexCSFArgs<'a> {
-    pub indptrType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indptrBuffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
-    pub indicesType: Option<flatbuffers::WIPOffset<Int<'a>>>,
-    pub indicesBuffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Buffer>>>,
-    pub axisOrder: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i32>>>,
-}
-impl<'a> Default for SparseTensorIndexCSFArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseTensorIndexCSFArgs {
-            indptrType: None,     // required field
-            indptrBuffers: None,  // required field
-            indicesType: None,    // required field
-            indicesBuffers: None, // required field
-            axisOrder: None,      // required field
-        }
-    }
-}
-pub struct SparseTensorIndexCSFBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseTensorIndexCSFBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_indptrType(&mut self, indptrType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseTensorIndexCSF::VT_INDPTRTYPE,
-            indptrType,
-        );
-    }
-    #[inline]
-    pub fn add_indptrBuffers(
-        &mut self,
-        indptrBuffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCSF::VT_INDPTRBUFFERS,
-            indptrBuffers,
-        );
-    }
-    #[inline]
-    pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
-            SparseTensorIndexCSF::VT_INDICESTYPE,
-            indicesType,
-        );
-    }
-    #[inline]
-    pub fn add_indicesBuffers(
-        &mut self,
-        indicesBuffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCSF::VT_INDICESBUFFERS,
-            indicesBuffers,
-        );
-    }
-    #[inline]
-    pub fn add_axisOrder(
-        &mut self,
-        axisOrder: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensorIndexCSF::VT_AXISORDER,
-            axisOrder,
-        );
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCSFBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseTensorIndexCSFBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseTensorIndexCSF<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDPTRTYPE, "indptrType");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDPTRBUFFERS, "indptrBuffers");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDICESTYPE, "indicesType");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_INDICESBUFFERS, "indicesBuffers");
-        self.fbb_
-            .required(o, SparseTensorIndexCSF::VT_AXISORDER, "axisOrder");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseTensorIndexCSF<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseTensorIndexCSF");
-        ds.field("indptrType", &self.indptrType());
-        ds.field("indptrBuffers", &self.indptrBuffers());
-        ds.field("indicesType", &self.indicesType());
-        ds.field("indicesBuffers", &self.indicesBuffers());
-        ds.field("axisOrder", &self.axisOrder());
-        ds.finish()
-    }
-}
-pub enum SparseTensorOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct SparseTensor<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for SparseTensor<'a> {
-    type Inner = SparseTensor<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> SparseTensor<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        SparseTensor { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args SparseTensorArgs<'args>,
-    ) -> flatbuffers::WIPOffset<SparseTensor<'bldr>> {
-        let mut builder = SparseTensorBuilder::new(_fbb);
-        builder.add_non_zero_length(args.non_zero_length);
-        if let Some(x) = args.data {
-            builder.add_data(x);
-        }
-        if let Some(x) = args.sparseIndex {
-            builder.add_sparseIndex(x);
-        }
-        if let Some(x) = args.shape {
-            builder.add_shape(x);
-        }
-        if let Some(x) = args.type_ {
-            builder.add_type_(x);
-        }
-        builder.add_sparseIndex_type(args.sparseIndex_type);
-        builder.add_type_type(args.type_type);
-        builder.finish()
-    }
-
-    pub const VT_TYPE_TYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_TYPE_: flatbuffers::VOffsetT = 6;
-    pub const VT_SHAPE: flatbuffers::VOffsetT = 8;
-    pub const VT_NON_ZERO_LENGTH: flatbuffers::VOffsetT = 10;
-    pub const VT_SPARSEINDEX_TYPE: flatbuffers::VOffsetT = 12;
-    pub const VT_SPARSEINDEX: flatbuffers::VOffsetT = 14;
-    pub const VT_DATA: flatbuffers::VOffsetT = 16;
-
-    #[inline]
-    pub fn type_type(&self) -> Type {
-        self._tab
-            .get::<Type>(SparseTensor::VT_TYPE_TYPE, Some(Type::NONE))
-            .unwrap()
-    }
-    /// The type of data contained in a value cell.
-    /// Currently only fixed-width value types are supported,
-    /// no strings or nested types.
-    #[inline]
-    pub fn type_(&self) -> flatbuffers::Table<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                SparseTensor::VT_TYPE_,
-                None,
-            )
-            .unwrap()
-    }
-    /// The dimensions of the tensor, optionally named.
-    #[inline]
-    pub fn shape(
-        &self,
-    ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<
-                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim>>,
-            >>(SparseTensor::VT_SHAPE, None)
-            .unwrap()
-    }
-    /// The number of non-zero values in a sparse tensor.
-    #[inline]
-    pub fn non_zero_length(&self) -> i64 {
-        self._tab
-            .get::<i64>(SparseTensor::VT_NON_ZERO_LENGTH, Some(0))
-            .unwrap()
-    }
-    #[inline]
-    pub fn sparseIndex_type(&self) -> SparseTensorIndex {
-        self._tab
-            .get::<SparseTensorIndex>(
-                SparseTensor::VT_SPARSEINDEX_TYPE,
-                Some(SparseTensorIndex::NONE),
-            )
-            .unwrap()
-    }
-    /// Sparse tensor index
-    #[inline]
-    pub fn sparseIndex(&self) -> flatbuffers::Table<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                SparseTensor::VT_SPARSEINDEX,
-                None,
-            )
-            .unwrap()
-    }
-    /// The location and size of the tensor's data
-    #[inline]
-    pub fn data(&self) -> &'a Buffer {
-        self._tab
-            .get::<Buffer>(SparseTensor::VT_DATA, None)
-            .unwrap()
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_null(&self) -> Option<Null<'a>> {
-        if self.type_type() == Type::Null {
-            let u = self.type_();
-            Some(Null::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_int(&self) -> Option<Int<'a>> {
-        if self.type_type() == Type::Int {
-            let u = self.type_();
-            Some(Int::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_floating_point(&self) -> Option<FloatingPoint<'a>> {
-        if self.type_type() == Type::FloatingPoint {
-            let u = self.type_();
-            Some(FloatingPoint::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_binary(&self) -> Option<Binary<'a>> {
-        if self.type_type() == Type::Binary {
-            let u = self.type_();
-            Some(Binary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_utf_8(&self) -> Option<Utf8<'a>> {
-        if self.type_type() == Type::Utf8 {
-            let u = self.type_();
-            Some(Utf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_bool(&self) -> Option<Bool<'a>> {
-        if self.type_type() == Type::Bool {
-            let u = self.type_();
-            Some(Bool::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_decimal(&self) -> Option<Decimal<'a>> {
-        if self.type_type() == Type::Decimal {
-            let u = self.type_();
-            Some(Decimal::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_date(&self) -> Option<Date<'a>> {
-        if self.type_type() == Type::Date {
-            let u = self.type_();
-            Some(Date::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_time(&self) -> Option<Time<'a>> {
-        if self.type_type() == Type::Time {
-            let u = self.type_();
-            Some(Time::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_timestamp(&self) -> Option<Timestamp<'a>> {
-        if self.type_type() == Type::Timestamp {
-            let u = self.type_();
-            Some(Timestamp::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_interval(&self) -> Option<Interval<'a>> {
-        if self.type_type() == Type::Interval {
-            let u = self.type_();
-            Some(Interval::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_list(&self) -> Option<List<'a>> {
-        if self.type_type() == Type::List {
-            let u = self.type_();
-            Some(List::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_struct_(&self) -> Option<Struct_<'a>> {
-        if self.type_type() == Type::Struct_ {
-            let u = self.type_();
-            Some(Struct_::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_union(&self) -> Option<Union<'a>> {
-        if self.type_type() == Type::Union {
-            let u = self.type_();
-            Some(Union::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_binary(&self) -> Option<FixedSizeBinary<'a>> {
-        if self.type_type() == Type::FixedSizeBinary {
-            let u = self.type_();
-            Some(FixedSizeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_list(&self) -> Option<FixedSizeList<'a>> {
-        if self.type_type() == Type::FixedSizeList {
-            let u = self.type_();
-            Some(FixedSizeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_map(&self) -> Option<Map<'a>> {
-        if self.type_type() == Type::Map {
-            let u = self.type_();
-            Some(Map::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_duration(&self) -> Option<Duration<'a>> {
-        if self.type_type() == Type::Duration {
-            let u = self.type_();
-            Some(Duration::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_binary(&self) -> Option<LargeBinary<'a>> {
-        if self.type_type() == Type::LargeBinary {
-            let u = self.type_();
-            Some(LargeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_utf_8(&self) -> Option<LargeUtf8<'a>> {
-        if self.type_type() == Type::LargeUtf8 {
-            let u = self.type_();
-            Some(LargeUtf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_list(&self) -> Option<LargeList<'a>> {
-        if self.type_type() == Type::LargeList {
-            let u = self.type_();
-            Some(LargeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_tensor_index_coo(
-        &self,
-    ) -> Option<SparseTensorIndexCOO<'a>> {
-        if self.sparseIndex_type() == SparseTensorIndex::SparseTensorIndexCOO {
-            let u = self.sparseIndex();
-            Some(SparseTensorIndexCOO::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_matrix_index_csx(
-        &self,
-    ) -> Option<SparseMatrixIndexCSX<'a>> {
-        if self.sparseIndex_type() == SparseTensorIndex::SparseMatrixIndexCSX {
-            let u = self.sparseIndex();
-            Some(SparseMatrixIndexCSX::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_tensor_index_csf(
-        &self,
-    ) -> Option<SparseTensorIndexCSF<'a>> {
-        if self.sparseIndex_type() == SparseTensorIndex::SparseTensorIndexCSF {
-            let u = self.sparseIndex();
-            Some(SparseTensorIndexCSF::init_from_table(u))
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for SparseTensor<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_union::<Type, _>(&"type_type", Self::VT_TYPE_TYPE, &"type_", Self::VT_TYPE_, true, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>>>(&"shape", Self::VT_SHAPE, true)?
-     .visit_field::<i64>(&"non_zero_length", Self::VT_NON_ZERO_LENGTH, false)?
-     .visit_union::<SparseTensorIndex, _>(&"sparseIndex_type", Self::VT_SPARSEINDEX_TYPE, &"sparseIndex", Self::VT_SPARSEINDEX, true, |key, v, pos| {
-        match key {
-          SparseTensorIndex::SparseTensorIndexCOO => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCOO>>("SparseTensorIndex::SparseTensorIndexCOO", pos),
-          SparseTensorIndex::SparseMatrixIndexCSX => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseMatrixIndexCSX>>("SparseTensorIndex::SparseMatrixIndexCSX", pos),
-          SparseTensorIndex::SparseTensorIndexCSF => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCSF>>("SparseTensorIndex::SparseTensorIndexCSF", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<Buffer>(&"data", Self::VT_DATA, true)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct SparseTensorArgs<'a> {
-    pub type_type: Type,
-    pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub shape: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>>,
-        >,
-    >,
-    pub non_zero_length: i64,
-    pub sparseIndex_type: SparseTensorIndex,
-    pub sparseIndex: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub data: Option<&'a Buffer>,
-}
-impl<'a> Default for SparseTensorArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        SparseTensorArgs {
-            type_type: Type::NONE,
-            type_: None, // required field
-            shape: None, // required field
-            non_zero_length: 0,
-            sparseIndex_type: SparseTensorIndex::NONE,
-            sparseIndex: None, // required field
-            data: None,        // required field
-        }
-    }
-}
-pub struct SparseTensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_type_type(&mut self, type_type: Type) {
-        self.fbb_
-            .push_slot::<Type>(SparseTensor::VT_TYPE_TYPE, type_type, Type::NONE);
-    }
-    #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(SparseTensor::VT_TYPE_, type_);
-    }
-    #[inline]
-    pub fn add_shape(
-        &mut self,
-        shape: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<TensorDim<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(SparseTensor::VT_SHAPE, shape);
-    }
-    #[inline]
-    pub fn add_non_zero_length(&mut self, non_zero_length: i64) {
-        self.fbb_
-            .push_slot::<i64>(SparseTensor::VT_NON_ZERO_LENGTH, non_zero_length, 0);
-    }
-    #[inline]
-    pub fn add_sparseIndex_type(&mut self, sparseIndex_type: SparseTensorIndex) {
-        self.fbb_.push_slot::<SparseTensorIndex>(
-            SparseTensor::VT_SPARSEINDEX_TYPE,
-            sparseIndex_type,
-            SparseTensorIndex::NONE,
-        );
-    }
-    #[inline]
-    pub fn add_sparseIndex(
-        &mut self,
-        sparseIndex: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            SparseTensor::VT_SPARSEINDEX,
-            sparseIndex,
-        );
-    }
-    #[inline]
-    pub fn add_data(&mut self, data: &Buffer) {
-        self.fbb_
-            .push_slot_always::<&Buffer>(SparseTensor::VT_DATA, data);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        SparseTensorBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<SparseTensor<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_.required(o, SparseTensor::VT_TYPE_, "type_");
-        self.fbb_.required(o, SparseTensor::VT_SHAPE, "shape");
-        self.fbb_
-            .required(o, SparseTensor::VT_SPARSEINDEX, "sparseIndex");
-        self.fbb_.required(o, SparseTensor::VT_DATA, "data");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for SparseTensor<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("SparseTensor");
-        ds.field("type_type", &self.type_type());
-        match self.type_type() {
-            Type::Null => {
-                if let Some(x) = self.type_as_null() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Int => {
-                if let Some(x) = self.type_as_int() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FloatingPoint => {
-                if let Some(x) = self.type_as_floating_point() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Binary => {
-                if let Some(x) = self.type_as_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Utf8 => {
-                if let Some(x) = self.type_as_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Bool => {
-                if let Some(x) = self.type_as_bool() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Decimal => {
-                if let Some(x) = self.type_as_decimal() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Date => {
-                if let Some(x) = self.type_as_date() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Time => {
-                if let Some(x) = self.type_as_time() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Timestamp => {
-                if let Some(x) = self.type_as_timestamp() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Interval => {
-                if let Some(x) = self.type_as_interval() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::List => {
-                if let Some(x) = self.type_as_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Struct_ => {
-                if let Some(x) = self.type_as_struct_() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Union => {
-                if let Some(x) = self.type_as_union() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeBinary => {
-                if let Some(x) = self.type_as_fixed_size_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeList => {
-                if let Some(x) = self.type_as_fixed_size_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Map => {
-                if let Some(x) = self.type_as_map() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Duration => {
-                if let Some(x) = self.type_as_duration() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeBinary => {
-                if let Some(x) = self.type_as_large_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeUtf8 => {
-                if let Some(x) = self.type_as_large_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeList => {
-                if let Some(x) = self.type_as_large_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("type_", &x)
-            }
-        };
-        ds.field("shape", &self.shape());
-        ds.field("non_zero_length", &self.non_zero_length());
-        ds.field("sparseIndex_type", &self.sparseIndex_type());
-        match self.sparseIndex_type() {
-            SparseTensorIndex::SparseTensorIndexCOO => {
-                if let Some(x) = self.sparseIndex_as_sparse_tensor_index_coo() {
-                    ds.field("sparseIndex", &x)
-                } else {
-                    ds.field(
-                        "sparseIndex",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            SparseTensorIndex::SparseMatrixIndexCSX => {
-                if let Some(x) = self.sparseIndex_as_sparse_matrix_index_csx() {
-                    ds.field("sparseIndex", &x)
-                } else {
-                    ds.field(
-                        "sparseIndex",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            SparseTensorIndex::SparseTensorIndexCSF => {
-                if let Some(x) = self.sparseIndex_as_sparse_tensor_index_csf() {
-                    ds.field("sparseIndex", &x)
-                } else {
-                    ds.field(
-                        "sparseIndex",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("sparseIndex", &x)
-            }
-        };
-        ds.field("data", &self.data());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_sparse_tensor<'a>(buf: &'a [u8]) -> SparseTensor<'a> {
-    unsafe { flatbuffers::root_unchecked::<SparseTensor<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_sparse_tensor<'a>(buf: &'a [u8]) -> SparseTensor<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<SparseTensor<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `SparseTensor`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_sparse_tensor_unchecked`.
-pub fn root_as_sparse_tensor(
-    buf: &[u8],
-) -> Result<SparseTensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<SparseTensor>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `SparseTensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_sparse_tensor_unchecked`.
-pub fn size_prefixed_root_as_sparse_tensor(
-    buf: &[u8],
-) -> Result<SparseTensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<SparseTensor>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `SparseTensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_sparse_tensor_unchecked`.
-pub fn root_as_sparse_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<SparseTensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<SparseTensor<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `SparseTensor` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_sparse_tensor_unchecked`.
-pub fn size_prefixed_root_as_sparse_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<SparseTensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<SparseTensor<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a SparseTensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `SparseTensor`.
-pub unsafe fn root_as_sparse_tensor_unchecked(buf: &[u8]) -> SparseTensor {
-    flatbuffers::root_unchecked::<SparseTensor>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed SparseTensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `SparseTensor`.
-pub unsafe fn size_prefixed_root_as_sparse_tensor_unchecked(buf: &[u8]) -> SparseTensor {
-    flatbuffers::size_prefixed_root_unchecked::<SparseTensor>(buf)
-}
-#[inline]
-pub fn finish_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<SparseTensor<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<SparseTensor<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/Tensor.rs b/rust/arrow/src/ipc/gen/Tensor.rs
deleted file mode 100644
index 120636eaf1f..00000000000
--- a/rust/arrow/src/ipc/gen/Tensor.rs
+++ /dev/null
@@ -1,913 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
-use crate::ipc::gen::Schema::*;
-use flatbuffers::EndianScalar;
-use std::{cmp::Ordering, mem};
-// automatically generated by the FlatBuffers compiler, do not modify
-
-pub enum TensorDimOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-/// ----------------------------------------------------------------------
-/// Data structures for dense tensors
-/// Shape data for a single axis in a tensor
-pub struct TensorDim<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for TensorDim<'a> {
-    type Inner = TensorDim<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> TensorDim<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        TensorDim { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TensorDimArgs<'args>,
-    ) -> flatbuffers::WIPOffset<TensorDim<'bldr>> {
-        let mut builder = TensorDimBuilder::new(_fbb);
-        builder.add_size_(args.size_);
-        if let Some(x) = args.name {
-            builder.add_name(x);
-        }
-        builder.finish()
-    }
-
-    pub const VT_SIZE_: flatbuffers::VOffsetT = 4;
-    pub const VT_NAME: flatbuffers::VOffsetT = 6;
-
-    /// Length of dimension
-    #[inline]
-    pub fn size_(&self) -> i64 {
-        self._tab.get::<i64>(TensorDim::VT_SIZE_, Some(0)).unwrap()
-    }
-    /// Name of the dimension, optional
-    #[inline]
-    pub fn name(&self) -> Option<&'a str> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<&str>>(TensorDim::VT_NAME, None)
-    }
-}
-
-impl flatbuffers::Verifiable for TensorDim<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-            .visit_field::<i64>(&"size_", Self::VT_SIZE_, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                &"name",
-                Self::VT_NAME,
-                false,
-            )?
-            .finish();
-        Ok(())
-    }
-}
-pub struct TensorDimArgs<'a> {
-    pub size_: i64,
-    pub name: Option<flatbuffers::WIPOffset<&'a str>>,
-}
-impl<'a> Default for TensorDimArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        TensorDimArgs {
-            size_: 0,
-            name: None,
-        }
-    }
-}
-pub struct TensorDimBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_size_(&mut self, size_: i64) {
-        self.fbb_.push_slot::<i64>(TensorDim::VT_SIZE_, size_, 0);
-    }
-    #[inline]
-    pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(TensorDim::VT_NAME, name);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TensorDimBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TensorDimBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<TensorDim<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for TensorDim<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("TensorDim");
-        ds.field("size_", &self.size_());
-        ds.field("name", &self.name());
-        ds.finish()
-    }
-}
-pub enum TensorOffset {}
-#[derive(Copy, Clone, PartialEq)]
-
-pub struct Tensor<'a> {
-    pub _tab: flatbuffers::Table<'a>,
-}
-
-impl<'a> flatbuffers::Follow<'a> for Tensor<'a> {
-    type Inner = Tensor<'a>;
-    #[inline]
-    fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        Self {
-            _tab: flatbuffers::Table { buf, loc },
-        }
-    }
-}
-
-impl<'a> Tensor<'a> {
-    #[inline]
-    pub fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
-        Tensor { _tab: table }
-    }
-    #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
-        args: &'args TensorArgs<'args>,
-    ) -> flatbuffers::WIPOffset<Tensor<'bldr>> {
-        let mut builder = TensorBuilder::new(_fbb);
-        if let Some(x) = args.data {
-            builder.add_data(x);
-        }
-        if let Some(x) = args.strides {
-            builder.add_strides(x);
-        }
-        if let Some(x) = args.shape {
-            builder.add_shape(x);
-        }
-        if let Some(x) = args.type_ {
-            builder.add_type_(x);
-        }
-        builder.add_type_type(args.type_type);
-        builder.finish()
-    }
-
-    pub const VT_TYPE_TYPE: flatbuffers::VOffsetT = 4;
-    pub const VT_TYPE_: flatbuffers::VOffsetT = 6;
-    pub const VT_SHAPE: flatbuffers::VOffsetT = 8;
-    pub const VT_STRIDES: flatbuffers::VOffsetT = 10;
-    pub const VT_DATA: flatbuffers::VOffsetT = 12;
-
-    #[inline]
-    pub fn type_type(&self) -> Type {
-        self._tab
-            .get::<Type>(Tensor::VT_TYPE_TYPE, Some(Type::NONE))
-            .unwrap()
-    }
-    /// The type of data contained in a value cell. Currently only fixed-width
-    /// value types are supported, no strings or nested types
-    #[inline]
-    pub fn type_(&self) -> flatbuffers::Table<'a> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                Tensor::VT_TYPE_,
-                None,
-            )
-            .unwrap()
-    }
-    /// The dimensions of the tensor, optionally named
-    #[inline]
-    pub fn shape(
-        &self,
-    ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<
-                flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim>>,
-            >>(Tensor::VT_SHAPE, None)
-            .unwrap()
-    }
-    /// Non-negative byte offsets to advance one value cell along each dimension
-    /// If omitted, default to row-major order (C-like).
-    #[inline]
-    pub fn strides(&self) -> Option<flatbuffers::Vector<'a, i64>> {
-        self._tab
-            .get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, i64>>>(
-                Tensor::VT_STRIDES,
-                None,
-            )
-    }
-    /// The location and size of the tensor's data
-    #[inline]
-    pub fn data(&self) -> &'a Buffer {
-        self._tab.get::<Buffer>(Tensor::VT_DATA, None).unwrap()
-    }
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_null(&self) -> Option<Null<'a>> {
-        if self.type_type() == Type::Null {
-            let u = self.type_();
-            Some(Null::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_int(&self) -> Option<Int<'a>> {
-        if self.type_type() == Type::Int {
-            let u = self.type_();
-            Some(Int::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_floating_point(&self) -> Option<FloatingPoint<'a>> {
-        if self.type_type() == Type::FloatingPoint {
-            let u = self.type_();
-            Some(FloatingPoint::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_binary(&self) -> Option<Binary<'a>> {
-        if self.type_type() == Type::Binary {
-            let u = self.type_();
-            Some(Binary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_utf_8(&self) -> Option<Utf8<'a>> {
-        if self.type_type() == Type::Utf8 {
-            let u = self.type_();
-            Some(Utf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_bool(&self) -> Option<Bool<'a>> {
-        if self.type_type() == Type::Bool {
-            let u = self.type_();
-            Some(Bool::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_decimal(&self) -> Option<Decimal<'a>> {
-        if self.type_type() == Type::Decimal {
-            let u = self.type_();
-            Some(Decimal::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_date(&self) -> Option<Date<'a>> {
-        if self.type_type() == Type::Date {
-            let u = self.type_();
-            Some(Date::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_time(&self) -> Option<Time<'a>> {
-        if self.type_type() == Type::Time {
-            let u = self.type_();
-            Some(Time::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_timestamp(&self) -> Option<Timestamp<'a>> {
-        if self.type_type() == Type::Timestamp {
-            let u = self.type_();
-            Some(Timestamp::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_interval(&self) -> Option<Interval<'a>> {
-        if self.type_type() == Type::Interval {
-            let u = self.type_();
-            Some(Interval::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_list(&self) -> Option<List<'a>> {
-        if self.type_type() == Type::List {
-            let u = self.type_();
-            Some(List::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_struct_(&self) -> Option<Struct_<'a>> {
-        if self.type_type() == Type::Struct_ {
-            let u = self.type_();
-            Some(Struct_::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_union(&self) -> Option<Union<'a>> {
-        if self.type_type() == Type::Union {
-            let u = self.type_();
-            Some(Union::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_binary(&self) -> Option<FixedSizeBinary<'a>> {
-        if self.type_type() == Type::FixedSizeBinary {
-            let u = self.type_();
-            Some(FixedSizeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_fixed_size_list(&self) -> Option<FixedSizeList<'a>> {
-        if self.type_type() == Type::FixedSizeList {
-            let u = self.type_();
-            Some(FixedSizeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_map(&self) -> Option<Map<'a>> {
-        if self.type_type() == Type::Map {
-            let u = self.type_();
-            Some(Map::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_duration(&self) -> Option<Duration<'a>> {
-        if self.type_type() == Type::Duration {
-            let u = self.type_();
-            Some(Duration::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_binary(&self) -> Option<LargeBinary<'a>> {
-        if self.type_type() == Type::LargeBinary {
-            let u = self.type_();
-            Some(LargeBinary::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_utf_8(&self) -> Option<LargeUtf8<'a>> {
-        if self.type_type() == Type::LargeUtf8 {
-            let u = self.type_();
-            Some(LargeUtf8::init_from_table(u))
-        } else {
-            None
-        }
-    }
-
-    #[inline]
-    #[allow(non_snake_case)]
-    pub fn type_as_large_list(&self) -> Option<LargeList<'a>> {
-        if self.type_type() == Type::LargeList {
-            let u = self.type_();
-            Some(LargeList::init_from_table(u))
-        } else {
-            None
-        }
-    }
-}
-
-impl flatbuffers::Verifiable for Tensor<'_> {
-    #[inline]
-    fn run_verifier(
-        v: &mut flatbuffers::Verifier,
-        pos: usize,
-    ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
-        use flatbuffers::Verifiable;
-        v.visit_table(pos)?
-     .visit_union::<Type, _>(&"type_type", Self::VT_TYPE_TYPE, &"type_", Self::VT_TYPE_, true, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>>>(&"shape", Self::VT_SHAPE, true)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(&"strides", Self::VT_STRIDES, false)?
-     .visit_field::<Buffer>(&"data", Self::VT_DATA, true)?
-     .finish();
-        Ok(())
-    }
-}
-pub struct TensorArgs<'a> {
-    pub type_type: Type,
-    pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
-    pub shape: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>>,
-        >,
-    >,
-    pub strides: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, i64>>>,
-    pub data: Option<&'a Buffer>,
-}
-impl<'a> Default for TensorArgs<'a> {
-    #[inline]
-    fn default() -> Self {
-        TensorArgs {
-            type_type: Type::NONE,
-            type_: None, // required field
-            shape: None, // required field
-            strides: None,
-            data: None, // required field
-        }
-    }
-}
-pub struct TensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
-}
-impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
-    #[inline]
-    pub fn add_type_type(&mut self, type_type: Type) {
-        self.fbb_
-            .push_slot::<Type>(Tensor::VT_TYPE_TYPE, type_type, Type::NONE);
-    }
-    #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_TYPE_, type_);
-    }
-    #[inline]
-    pub fn add_shape(
-        &mut self,
-        shape: flatbuffers::WIPOffset<
-            flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<TensorDim<'b>>>,
-        >,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_SHAPE, shape);
-    }
-    #[inline]
-    pub fn add_strides(
-        &mut self,
-        strides: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
-    ) {
-        self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_STRIDES, strides);
-    }
-    #[inline]
-    pub fn add_data(&mut self, data: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(Tensor::VT_DATA, data);
-    }
-    #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TensorBuilder<'a, 'b> {
-        let start = _fbb.start_table();
-        TensorBuilder {
-            fbb_: _fbb,
-            start_: start,
-        }
-    }
-    #[inline]
-    pub fn finish(self) -> flatbuffers::WIPOffset<Tensor<'a>> {
-        let o = self.fbb_.end_table(self.start_);
-        self.fbb_.required(o, Tensor::VT_TYPE_, "type_");
-        self.fbb_.required(o, Tensor::VT_SHAPE, "shape");
-        self.fbb_.required(o, Tensor::VT_DATA, "data");
-        flatbuffers::WIPOffset::new(o.value())
-    }
-}
-
-impl std::fmt::Debug for Tensor<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut ds = f.debug_struct("Tensor");
-        ds.field("type_type", &self.type_type());
-        match self.type_type() {
-            Type::Null => {
-                if let Some(x) = self.type_as_null() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Int => {
-                if let Some(x) = self.type_as_int() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FloatingPoint => {
-                if let Some(x) = self.type_as_floating_point() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Binary => {
-                if let Some(x) = self.type_as_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Utf8 => {
-                if let Some(x) = self.type_as_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Bool => {
-                if let Some(x) = self.type_as_bool() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Decimal => {
-                if let Some(x) = self.type_as_decimal() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Date => {
-                if let Some(x) = self.type_as_date() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Time => {
-                if let Some(x) = self.type_as_time() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Timestamp => {
-                if let Some(x) = self.type_as_timestamp() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Interval => {
-                if let Some(x) = self.type_as_interval() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::List => {
-                if let Some(x) = self.type_as_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Struct_ => {
-                if let Some(x) = self.type_as_struct_() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Union => {
-                if let Some(x) = self.type_as_union() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeBinary => {
-                if let Some(x) = self.type_as_fixed_size_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::FixedSizeList => {
-                if let Some(x) = self.type_as_fixed_size_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Map => {
-                if let Some(x) = self.type_as_map() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::Duration => {
-                if let Some(x) = self.type_as_duration() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeBinary => {
-                if let Some(x) = self.type_as_large_binary() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeUtf8 => {
-                if let Some(x) = self.type_as_large_utf_8() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            Type::LargeList => {
-                if let Some(x) = self.type_as_large_list() {
-                    ds.field("type_", &x)
-                } else {
-                    ds.field(
-                        "type_",
-                        &"InvalidFlatbuffer: Union discriminant does not match value.",
-                    )
-                }
-            }
-            _ => {
-                let x: Option<()> = None;
-                ds.field("type_", &x)
-            }
-        };
-        ds.field("shape", &self.shape());
-        ds.field("strides", &self.strides());
-        ds.field("data", &self.data());
-        ds.finish()
-    }
-}
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_root_as_tensor<'a>(buf: &'a [u8]) -> Tensor<'a> {
-    unsafe { flatbuffers::root_unchecked::<Tensor<'a>>(buf) }
-}
-
-#[inline]
-#[deprecated(since = "2.0.0", note = "Deprecated in favor of `root_as...` methods.")]
-pub fn get_size_prefixed_root_as_tensor<'a>(buf: &'a [u8]) -> Tensor<'a> {
-    unsafe { flatbuffers::size_prefixed_root_unchecked::<Tensor<'a>>(buf) }
-}
-
-#[inline]
-/// Verifies that a buffer of bytes contains a `Tensor`
-/// and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_tensor_unchecked`.
-pub fn root_as_tensor(buf: &[u8]) -> Result<Tensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root::<Tensor>(buf)
-}
-#[inline]
-/// Verifies that a buffer of bytes contains a size prefixed
-/// `Tensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `size_prefixed_root_as_tensor_unchecked`.
-pub fn size_prefixed_root_as_tensor(
-    buf: &[u8],
-) -> Result<Tensor, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root::<Tensor>(buf)
-}
-#[inline]
-/// Verifies, with the given options, that a buffer of bytes
-/// contains a `Tensor` and returns it.
-/// Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_tensor_unchecked`.
-pub fn root_as_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Tensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::root_with_opts::<Tensor<'b>>(opts, buf)
-}
-#[inline]
-/// Verifies, with the given verifier options, that a buffer of
-/// bytes contains a size prefixed `Tensor` and returns
-/// it. Note that verification is still experimental and may not
-/// catch every error, or be maximally performant. For the
-/// previous, unchecked, behavior use
-/// `root_as_tensor_unchecked`.
-pub fn size_prefixed_root_as_tensor_with_opts<'b, 'o>(
-    opts: &'o flatbuffers::VerifierOptions,
-    buf: &'b [u8],
-) -> Result<Tensor<'b>, flatbuffers::InvalidFlatbuffer> {
-    flatbuffers::size_prefixed_root_with_opts::<Tensor<'b>>(opts, buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a Tensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid `Tensor`.
-pub unsafe fn root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
-    flatbuffers::root_unchecked::<Tensor>(buf)
-}
-#[inline]
-/// Assumes, without verification, that a buffer of bytes contains a size prefixed Tensor and returns it.
-/// # Safety
-/// Callers must trust the given bytes do indeed contain a valid size prefixed `Tensor`.
-pub unsafe fn size_prefixed_root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
-    flatbuffers::size_prefixed_root_unchecked::<Tensor>(buf)
-}
-#[inline]
-pub fn finish_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Tensor<'a>>,
-) {
-    fbb.finish(root, None);
-}
-
-#[inline]
-pub fn finish_size_prefixed_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    root: flatbuffers::WIPOffset<Tensor<'a>>,
-) {
-    fbb.finish_size_prefixed(root, None);
-}
diff --git a/rust/arrow/src/ipc/gen/mod.rs b/rust/arrow/src/ipc/gen/mod.rs
deleted file mode 100644
index ceeb6b2c5c7..00000000000
--- a/rust/arrow/src/ipc/gen/mod.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Generated code
-
-#![allow(non_snake_case)]
-
-#[allow(clippy::all)]
-pub mod File;
-#[allow(clippy::all)]
-pub mod Message;
-#[allow(clippy::all)]
-pub mod Schema;
-#[allow(clippy::all)]
-pub mod SparseTensor;
-#[allow(clippy::all)]
-pub mod Tensor;
diff --git a/rust/arrow/src/ipc/mod.rs b/rust/arrow/src/ipc/mod.rs
deleted file mode 100644
index a2d7103aacf..00000000000
--- a/rust/arrow/src/ipc/mod.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// TODO: (vcq): Protobuf codegen is not generating Debug impls.
-#![allow(missing_debug_implementations)]
-
-pub mod convert;
-pub mod reader;
-pub mod writer;
-
-#[allow(clippy::redundant_closure)]
-#[allow(clippy::needless_lifetimes)]
-#[allow(clippy::extra_unused_lifetimes)]
-#[allow(clippy::redundant_static_lifetimes)]
-#[allow(clippy::redundant_field_names)]
-pub mod gen;
-
-pub use self::gen::File::*;
-pub use self::gen::Message::*;
-pub use self::gen::Schema::*;
-pub use self::gen::SparseTensor::*;
-pub use self::gen::Tensor::*;
-
-const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
-const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
diff --git a/rust/arrow/src/ipc/reader.rs b/rust/arrow/src/ipc/reader.rs
deleted file mode 100644
index 3c893cdf2ff..00000000000
--- a/rust/arrow/src/ipc/reader.rs
+++ /dev/null
@@ -1,1160 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Arrow IPC File and Stream Readers
-//!
-//! The `FileReader` and `StreamReader` have similar interfaces,
-//! however the `FileReader` expects a reader that supports `Seek`ing
-
-use std::collections::HashMap;
-use std::io::{BufReader, Read, Seek, SeekFrom};
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::buffer::Buffer;
-use crate::compute::cast;
-use crate::datatypes::{DataType, Field, IntervalUnit, Schema, SchemaRef};
-use crate::error::{ArrowError, Result};
-use crate::ipc;
-use crate::record_batch::{RecordBatch, RecordBatchReader};
-
-use ipc::CONTINUATION_MARKER;
-use DataType::*;
-
-/// Read a buffer based on offset and length
-fn read_buffer(buf: &ipc::Buffer, a_data: &[u8]) -> Buffer {
-    let start_offset = buf.offset() as usize;
-    let end_offset = start_offset + buf.length() as usize;
-    let buf_data = &a_data[start_offset..end_offset];
-    Buffer::from(&buf_data)
-}
-
-/// Coordinates reading arrays based on data types.
-///
-/// Notes:
-/// * In the IPC format, null buffers are always set, but may be empty. We discard them if an array has 0 nulls
-/// * Numeric values inside list arrays are often stored as 64-bit values regardless of their data type size.
-///   We thus:
-///     - check if the bit width of non-64-bit numbers is 64, and
-///     - read the buffer as 64-bit (signed integer or float), and
-///     - cast the 64-bit array to the appropriate data type
-fn create_array(
-    nodes: &[ipc::FieldNode],
-    data_type: &DataType,
-    data: &[u8],
-    buffers: &[ipc::Buffer],
-    dictionaries: &[Option<ArrayRef>],
-    mut node_index: usize,
-    mut buffer_index: usize,
-) -> (ArrayRef, usize, usize) {
-    use DataType::*;
-    let array = match data_type {
-        Utf8 | Binary | LargeBinary | LargeUtf8 => {
-            let array = create_primitive_array(
-                &nodes[node_index],
-                data_type,
-                buffers[buffer_index..buffer_index + 3]
-                    .iter()
-                    .map(|buf| read_buffer(buf, data))
-                    .collect(),
-            );
-            node_index += 1;
-            buffer_index += 3;
-            array
-        }
-        FixedSizeBinary(_) => {
-            let array = create_primitive_array(
-                &nodes[node_index],
-                data_type,
-                buffers[buffer_index..buffer_index + 2]
-                    .iter()
-                    .map(|buf| read_buffer(buf, data))
-                    .collect(),
-            );
-            node_index += 1;
-            buffer_index += 2;
-            array
-        }
-        List(ref list_field) | LargeList(ref list_field) => {
-            let list_node = &nodes[node_index];
-            let list_buffers: Vec<Buffer> = buffers[buffer_index..buffer_index + 2]
-                .iter()
-                .map(|buf| read_buffer(buf, data))
-                .collect();
-            node_index += 1;
-            buffer_index += 2;
-            let triple = create_array(
-                nodes,
-                list_field.data_type(),
-                data,
-                buffers,
-                dictionaries,
-                node_index,
-                buffer_index,
-            );
-            node_index = triple.1;
-            buffer_index = triple.2;
-
-            create_list_array(list_node, data_type, &list_buffers[..], triple.0)
-        }
-        FixedSizeList(ref list_field, _) => {
-            let list_node = &nodes[node_index];
-            let list_buffers: Vec<Buffer> = buffers[buffer_index..=buffer_index]
-                .iter()
-                .map(|buf| read_buffer(buf, data))
-                .collect();
-            node_index += 1;
-            buffer_index += 1;
-            let triple = create_array(
-                nodes,
-                list_field.data_type(),
-                data,
-                buffers,
-                dictionaries,
-                node_index,
-                buffer_index,
-            );
-            node_index = triple.1;
-            buffer_index = triple.2;
-
-            create_list_array(list_node, data_type, &list_buffers[..], triple.0)
-        }
-        Struct(struct_fields) => {
-            let struct_node = &nodes[node_index];
-            let null_buffer: Buffer = read_buffer(&buffers[buffer_index], data);
-            node_index += 1;
-            buffer_index += 1;
-
-            // read the arrays for each field
-            let mut struct_arrays = vec![];
-            // TODO investigate whether just knowing the number of buffers could
-            // still work
-            for struct_field in struct_fields {
-                let triple = create_array(
-                    nodes,
-                    struct_field.data_type(),
-                    data,
-                    buffers,
-                    dictionaries,
-                    node_index,
-                    buffer_index,
-                );
-                node_index = triple.1;
-                buffer_index = triple.2;
-                struct_arrays.push((struct_field.clone(), triple.0));
-            }
-            let null_count = struct_node.null_count() as usize;
-            let struct_array = if null_count > 0 {
-                // create struct array from fields, arrays and null data
-                StructArray::from((struct_arrays, null_buffer))
-            } else {
-                StructArray::from(struct_arrays)
-            };
-            Arc::new(struct_array)
-        }
-        // Create dictionary array from RecordBatch
-        Dictionary(_, _) => {
-            let index_node = &nodes[node_index];
-            let index_buffers: Vec<Buffer> = buffers[buffer_index..buffer_index + 2]
-                .iter()
-                .map(|buf| read_buffer(buf, data))
-                .collect();
-            let value_array = dictionaries[node_index].clone().unwrap();
-            node_index += 1;
-            buffer_index += 2;
-
-            create_dictionary_array(
-                index_node,
-                data_type,
-                &index_buffers[..],
-                value_array,
-            )
-        }
-        Null => {
-            let length = nodes[node_index].length() as usize;
-            let data = ArrayData::builder(data_type.clone())
-                .len(length)
-                .offset(0)
-                .build();
-            node_index += 1;
-            // no buffer increases
-            make_array(data)
-        }
-        _ => {
-            let array = create_primitive_array(
-                &nodes[node_index],
-                data_type,
-                buffers[buffer_index..buffer_index + 2]
-                    .iter()
-                    .map(|buf| read_buffer(buf, data))
-                    .collect(),
-            );
-            node_index += 1;
-            buffer_index += 2;
-            array
-        }
-    };
-    (array, node_index, buffer_index)
-}
-
-/// Reads the correct number of buffers based on data type and null_count, and creates a
-/// primitive array ref
-fn create_primitive_array(
-    field_node: &ipc::FieldNode,
-    data_type: &DataType,
-    buffers: Vec<Buffer>,
-) -> ArrayRef {
-    let length = field_node.length() as usize;
-    let null_count = field_node.null_count() as usize;
-    let array_data = match data_type {
-        Utf8 | Binary | LargeBinary | LargeUtf8 => {
-            // read 3 buffers
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..3].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        FixedSizeBinary(_) => {
-            // read 3 buffers
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..2].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        Int8
-        | Int16
-        | Int32
-        | UInt8
-        | UInt16
-        | UInt32
-        | Time32(_)
-        | Date32
-        | Interval(IntervalUnit::YearMonth) => {
-            if buffers[1].len() / 8 == length && length != 1 {
-                // interpret as a signed i64, and cast appropriately
-                let mut builder = ArrayData::builder(DataType::Int64)
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                let values = Arc::new(Int64Array::from(builder.build())) as ArrayRef;
-                // this cast is infallible, the unwrap is safe
-                let casted = cast(&values, data_type).unwrap();
-                casted.data().clone()
-            } else {
-                let mut builder = ArrayData::builder(data_type.clone())
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                builder.build()
-            }
-        }
-        Float32 => {
-            if buffers[1].len() / 8 == length && length != 1 {
-                // interpret as a f64, and cast appropriately
-                let mut builder = ArrayData::builder(DataType::Float64)
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                let values = Arc::new(Float64Array::from(builder.build())) as ArrayRef;
-                // this cast is infallible, the unwrap is safe
-                let casted = cast(&values, data_type).unwrap();
-                casted.data().clone()
-            } else {
-                let mut builder = ArrayData::builder(data_type.clone())
-                    .len(length)
-                    .buffers(buffers[1..].to_vec())
-                    .offset(0);
-                if null_count > 0 {
-                    builder = builder.null_bit_buffer(buffers[0].clone())
-                }
-                builder.build()
-            }
-        }
-        Boolean
-        | Int64
-        | UInt64
-        | Float64
-        | Time64(_)
-        | Timestamp(_, _)
-        | Date64
-        | Duration(_)
-        | Interval(IntervalUnit::DayTime) => {
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        Decimal(_, _) => {
-            // read 3 buffers
-            let mut builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .buffers(buffers[1..2].to_vec())
-                .offset(0);
-            if null_count > 0 {
-                builder = builder.null_bit_buffer(buffers[0].clone())
-            }
-            builder.build()
-        }
-        t => panic!("Data type {:?} either unsupported or not primitive", t),
-    };
-
-    make_array(array_data)
-}
-
-/// Reads the correct number of buffers based on list type and null_count, and creates a
-/// list array ref
-fn create_list_array(
-    field_node: &ipc::FieldNode,
-    data_type: &DataType,
-    buffers: &[Buffer],
-    child_array: ArrayRef,
-) -> ArrayRef {
-    if let DataType::List(_) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..2].to_vec())
-            .offset(0)
-            .child_data(vec![child_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else if let DataType::LargeList(_) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..2].to_vec())
-            .offset(0)
-            .child_data(vec![child_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else if let DataType::FixedSizeList(_, _) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..1].to_vec())
-            .offset(0)
-            .child_data(vec![child_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else {
-        panic!("Cannot create list array from {:?}", data_type)
-    }
-}
-
-/// Reads the correct number of buffers based on list type and null_count, and creates a
-/// list array ref
-fn create_dictionary_array(
-    field_node: &ipc::FieldNode,
-    data_type: &DataType,
-    buffers: &[Buffer],
-    value_array: ArrayRef,
-) -> ArrayRef {
-    if let DataType::Dictionary(_, _) = *data_type {
-        let null_count = field_node.null_count() as usize;
-        let mut builder = ArrayData::builder(data_type.clone())
-            .len(field_node.length() as usize)
-            .buffers(buffers[1..2].to_vec())
-            .offset(0)
-            .child_data(vec![value_array.data().clone()]);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(buffers[0].clone())
-        }
-        make_array(builder.build())
-    } else {
-        unreachable!("Cannot create dictionary array from {:?}", data_type)
-    }
-}
-
-/// Creates a record batch from binary data using the `ipc::RecordBatch` indexes and the `Schema`
-pub fn read_record_batch(
-    buf: &[u8],
-    batch: ipc::RecordBatch,
-    schema: SchemaRef,
-    dictionaries: &[Option<ArrayRef>],
-) -> Result<RecordBatch> {
-    let buffers = batch.buffers().ok_or_else(|| {
-        ArrowError::IoError("Unable to get buffers from IPC RecordBatch".to_string())
-    })?;
-    let field_nodes = batch.nodes().ok_or_else(|| {
-        ArrowError::IoError("Unable to get field nodes from IPC RecordBatch".to_string())
-    })?;
-    // keep track of buffer and node index, the functions that create arrays mutate these
-    let mut buffer_index = 0;
-    let mut node_index = 0;
-    let mut arrays = vec![];
-
-    // keep track of index as lists require more than one node
-    for field in schema.fields() {
-        let triple = create_array(
-            field_nodes,
-            field.data_type(),
-            &buf,
-            buffers,
-            dictionaries,
-            node_index,
-            buffer_index,
-        );
-        node_index = triple.1;
-        buffer_index = triple.2;
-        arrays.push(triple.0);
-    }
-
-    RecordBatch::try_new(schema, arrays)
-}
-
-/// Read the dictionary from the buffer and provided metadata,
-/// updating the `dictionaries_by_field` with the resulting dictionary
-pub fn read_dictionary(
-    buf: &[u8],
-    batch: ipc::DictionaryBatch,
-    schema: &Schema,
-    dictionaries_by_field: &mut [Option<ArrayRef>],
-) -> Result<()> {
-    if batch.isDelta() {
-        return Err(ArrowError::IoError(
-            "delta dictionary batches not supported".to_string(),
-        ));
-    }
-
-    let id = batch.id();
-    let fields_using_this_dictionary = schema.fields_with_dict_id(id);
-    let first_field = fields_using_this_dictionary.first().ok_or_else(|| {
-        ArrowError::InvalidArgumentError("dictionary id not found in schema".to_string())
-    })?;
-
-    // As the dictionary batch does not contain the type of the
-    // values array, we need to retrieve this from the schema.
-    // Get an array representing this dictionary's values.
-    let dictionary_values: ArrayRef = match first_field.data_type() {
-        DataType::Dictionary(_, ref value_type) => {
-            // Make a fake schema for the dictionary batch.
-            let schema = Schema {
-                fields: vec![Field::new("", value_type.as_ref().clone(), false)],
-                metadata: HashMap::new(),
-            };
-            // Read a single column
-            let record_batch = read_record_batch(
-                &buf,
-                batch.data().unwrap(),
-                Arc::new(schema),
-                &dictionaries_by_field,
-            )?;
-            Some(record_batch.column(0).clone())
-        }
-        _ => None,
-    }
-    .ok_or_else(|| {
-        ArrowError::InvalidArgumentError("dictionary id not found in schema".to_string())
-    })?;
-
-    // for all fields with this dictionary id, update the dictionaries vector
-    // in the reader. Note that a dictionary batch may be shared between many fields.
-    // We don't currently record the isOrdered field. This could be general
-    // attributes of arrays.
-    for (i, field) in schema.fields().iter().enumerate() {
-        if field.dict_id() == Some(id) {
-            // Add (possibly multiple) array refs to the dictionaries array.
-            dictionaries_by_field[i] = Some(dictionary_values.clone());
-        }
-    }
-
-    Ok(())
-}
-
-/// Arrow File reader
-pub struct FileReader<R: Read + Seek> {
-    /// Buffered file reader that supports reading and seeking
-    reader: BufReader<R>,
-
-    /// The schema that is read from the file header
-    schema: SchemaRef,
-
-    /// The blocks in the file
-    ///
-    /// A block indicates the regions in the file to read to get data
-    blocks: Vec<ipc::Block>,
-
-    /// A counter to keep track of the current block that should be read
-    current_block: usize,
-
-    /// The total number of blocks, which may contain record batches and other types
-    total_blocks: usize,
-
-    /// Optional dictionaries for each schema field.
-    ///
-    /// Dictionaries may be appended to in the streaming format.
-    dictionaries_by_field: Vec<Option<ArrayRef>>,
-
-    /// Metadata version
-    metadata_version: ipc::MetadataVersion,
-}
-
-impl<R: Read + Seek> FileReader<R> {
-    /// Try to create a new file reader
-    ///
-    /// Returns errors if the file does not meet the Arrow Format header and footer
-    /// requirements
-    pub fn try_new(reader: R) -> Result<Self> {
-        let mut reader = BufReader::new(reader);
-        // check if header and footer contain correct magic bytes
-        let mut magic_buffer: [u8; 6] = [0; 6];
-        reader.read_exact(&mut magic_buffer)?;
-        if magic_buffer != super::ARROW_MAGIC {
-            return Err(ArrowError::IoError(
-                "Arrow file does not contain correct header".to_string(),
-            ));
-        }
-        reader.seek(SeekFrom::End(-6))?;
-        reader.read_exact(&mut magic_buffer)?;
-        if magic_buffer != super::ARROW_MAGIC {
-            return Err(ArrowError::IoError(
-                "Arrow file does not contain correct footer".to_string(),
-            ));
-        }
-        // read footer length
-        let mut footer_size: [u8; 4] = [0; 4];
-        reader.seek(SeekFrom::End(-10))?;
-        reader.read_exact(&mut footer_size)?;
-        let footer_len = i32::from_le_bytes(footer_size);
-
-        // read footer
-        let mut footer_data = vec![0; footer_len as usize];
-        reader.seek(SeekFrom::End(-10 - footer_len as i64))?;
-        reader.read_exact(&mut footer_data)?;
-
-        let footer = ipc::root_as_footer(&footer_data[..]).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as footer: {:?}", err))
-        })?;
-
-        let blocks = footer.recordBatches().ok_or_else(|| {
-            ArrowError::IoError(
-                "Unable to get record batches from IPC Footer".to_string(),
-            )
-        })?;
-
-        let total_blocks = blocks.len();
-
-        let ipc_schema = footer.schema().unwrap();
-        let schema = ipc::convert::fb_to_schema(ipc_schema);
-
-        // Create an array of optional dictionary value arrays, one per field.
-        let mut dictionaries_by_field = vec![None; schema.fields().len()];
-        for block in footer.dictionaries().unwrap() {
-            // read length from end of offset
-            let mut message_size: [u8; 4] = [0; 4];
-            reader.seek(SeekFrom::Start(block.offset() as u64))?;
-            reader.read_exact(&mut message_size)?;
-            let footer_len = if message_size == CONTINUATION_MARKER {
-                reader.read_exact(&mut message_size)?;
-                i32::from_le_bytes(message_size)
-            } else {
-                i32::from_le_bytes(message_size)
-            };
-
-            let mut block_data = vec![0; footer_len as usize];
-
-            reader.read_exact(&mut block_data)?;
-
-            let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
-                ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-            })?;
-
-            match message.header_type() {
-                ipc::MessageHeader::DictionaryBatch => {
-                    let batch = message.header_as_dictionary_batch().unwrap();
-
-                    // read the block that makes up the dictionary batch into a buffer
-                    let mut buf = vec![0; block.bodyLength() as usize];
-                    reader.seek(SeekFrom::Start(
-                        block.offset() as u64 + block.metaDataLength() as u64,
-                    ))?;
-                    reader.read_exact(&mut buf)?;
-
-                    read_dictionary(&buf, batch, &schema, &mut dictionaries_by_field)?;
-                }
-                t => {
-                    return Err(ArrowError::IoError(format!(
-                        "Expecting DictionaryBatch in dictionary blocks, found {:?}.",
-                        t
-                    )));
-                }
-            };
-        }
-
-        Ok(Self {
-            reader,
-            schema: Arc::new(schema),
-            blocks: blocks.to_vec(),
-            current_block: 0,
-            total_blocks,
-            dictionaries_by_field,
-            metadata_version: footer.version(),
-        })
-    }
-
-    /// Return the number of batches in the file
-    pub fn num_batches(&self) -> usize {
-        self.total_blocks
-    }
-
-    /// Return the schema of the file
-    pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Read a specific record batch
-    ///
-    /// Sets the current block to the index, allowing random reads
-    pub fn set_index(&mut self, index: usize) -> Result<()> {
-        if index >= self.total_blocks {
-            Err(ArrowError::IoError(format!(
-                "Cannot set batch to index {} from {} total batches",
-                index, self.total_blocks
-            )))
-        } else {
-            self.current_block = index;
-            Ok(())
-        }
-    }
-
-    fn maybe_next(&mut self) -> Result<Option<RecordBatch>> {
-        let block = self.blocks[self.current_block];
-        self.current_block += 1;
-
-        // read length
-        self.reader.seek(SeekFrom::Start(block.offset() as u64))?;
-        let mut meta_buf = [0; 4];
-        self.reader.read_exact(&mut meta_buf)?;
-        if meta_buf == CONTINUATION_MARKER {
-            // continuation marker encountered, read message next
-            self.reader.read_exact(&mut meta_buf)?;
-        }
-        let meta_len = i32::from_le_bytes(meta_buf);
-
-        let mut block_data = vec![0; meta_len as usize];
-        self.reader.read_exact(&mut block_data)?;
-
-        let message = ipc::root_as_message(&block_data[..]).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as footer: {:?}", err))
-        })?;
-
-        // some old test data's footer metadata is not set, so we account for that
-        if self.metadata_version != ipc::MetadataVersion::V1
-            && message.version() != self.metadata_version
-        {
-            return Err(ArrowError::IoError(
-                "Could not read IPC message as metadata versions mismatch".to_string(),
-            ));
-        }
-
-        match message.header_type() {
-            ipc::MessageHeader::Schema => Err(ArrowError::IoError(
-                "Not expecting a schema when messages are read".to_string(),
-            )),
-            ipc::MessageHeader::RecordBatch => {
-                let batch = message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::IoError(
-                        "Unable to read IPC message as record batch".to_string(),
-                    )
-                })?;
-                // read the block that makes up the record batch into a buffer
-                let mut buf = vec![0; block.bodyLength() as usize];
-                self.reader.seek(SeekFrom::Start(
-                    block.offset() as u64 + block.metaDataLength() as u64,
-                ))?;
-                self.reader.read_exact(&mut buf)?;
-
-                read_record_batch(
-                    &buf,
-                    batch,
-                    self.schema(),
-                    &self.dictionaries_by_field,
-                ).map(Some)
-            }
-            ipc::MessageHeader::NONE => {
-                Ok(None)
-            }
-            t => Err(ArrowError::IoError(format!(
-                "Reading types other than record batches not yet supported, unable to read {:?}", t
-            ))),
-        }
-    }
-}
-
-impl<R: Read + Seek> Iterator for FileReader<R> {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // get current block
-        if self.current_block < self.total_blocks {
-            self.maybe_next().transpose()
-        } else {
-            None
-        }
-    }
-}
-
-impl<R: Read + Seek> RecordBatchReader for FileReader<R> {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Arrow Stream reader
-pub struct StreamReader<R: Read> {
-    /// Buffered stream reader
-    reader: BufReader<R>,
-
-    /// The schema that is read from the stream's first message
-    schema: SchemaRef,
-
-    /// Optional dictionaries for each schema field.
-    ///
-    /// Dictionaries may be appended to in the streaming format.
-    dictionaries_by_field: Vec<Option<ArrayRef>>,
-
-    /// An indicator of whether the stream is complete.
-    ///
-    /// This value is set to `true` the first time the reader's `next()` returns `None`.
-    finished: bool,
-}
-
-impl<R: Read> StreamReader<R> {
-    /// Try to create a new stream reader
-    ///
-    /// The first message in the stream is the schema, the reader will fail if it does not
-    /// encounter a schema.
-    /// To check if the reader is done, use `is_finished(self)`
-    pub fn try_new(reader: R) -> Result<Self> {
-        let mut reader = BufReader::new(reader);
-        // determine metadata length
-        let mut meta_size: [u8; 4] = [0; 4];
-        reader.read_exact(&mut meta_size)?;
-        let meta_len = {
-            // If a continuation marker is encountered, skip over it and read
-            // the size from the next four bytes.
-            if meta_size == CONTINUATION_MARKER {
-                reader.read_exact(&mut meta_size)?;
-            }
-            i32::from_le_bytes(meta_size)
-        };
-
-        let mut meta_buffer = vec![0; meta_len as usize];
-        reader.read_exact(&mut meta_buffer)?;
-
-        let message = ipc::root_as_message(meta_buffer.as_slice()).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-        })?;
-        // message header is a Schema, so read it
-        let ipc_schema: ipc::Schema = message.header_as_schema().ok_or_else(|| {
-            ArrowError::IoError("Unable to read IPC message as schema".to_string())
-        })?;
-        let schema = ipc::convert::fb_to_schema(ipc_schema);
-
-        // Create an array of optional dictionary value arrays, one per field.
-        let dictionaries_by_field = vec![None; schema.fields().len()];
-
-        Ok(Self {
-            reader,
-            schema: Arc::new(schema),
-            finished: false,
-            dictionaries_by_field,
-        })
-    }
-
-    /// Return the schema of the stream
-    pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Check if the stream is finished
-    pub fn is_finished(&self) -> bool {
-        self.finished
-    }
-
-    fn maybe_next(&mut self) -> Result<Option<RecordBatch>> {
-        if self.finished {
-            return Ok(None);
-        }
-        // determine metadata length
-        let mut meta_size: [u8; 4] = [0; 4];
-
-        match self.reader.read_exact(&mut meta_size) {
-            Ok(()) => (),
-            Err(e) => {
-                return if e.kind() == std::io::ErrorKind::UnexpectedEof {
-                    // Handle EOF without the "0xFFFFFFFF 0x00000000"
-                    // valid according to:
-                    // https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
-                    self.finished = true;
-                    Ok(None)
-                } else {
-                    Err(ArrowError::from(e))
-                };
-            }
-        }
-
-        let meta_len = {
-            // If a continuation marker is encountered, skip over it and read
-            // the size from the next four bytes.
-            if meta_size == CONTINUATION_MARKER {
-                self.reader.read_exact(&mut meta_size)?;
-            }
-            i32::from_le_bytes(meta_size)
-        };
-
-        if meta_len == 0 {
-            // the stream has ended, mark the reader as finished
-            self.finished = true;
-            return Ok(None);
-        }
-
-        let mut meta_buffer = vec![0; meta_len as usize];
-        self.reader.read_exact(&mut meta_buffer)?;
-
-        let vecs = &meta_buffer.to_vec();
-        let message = ipc::root_as_message(vecs).map_err(|err| {
-            ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
-        })?;
-
-        match message.header_type() {
-            ipc::MessageHeader::Schema => Err(ArrowError::IoError(
-                "Not expecting a schema when messages are read".to_string(),
-            )),
-            ipc::MessageHeader::RecordBatch => {
-                let batch = message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::IoError(
-                        "Unable to read IPC message as record batch".to_string(),
-                    )
-                })?;
-                // read the block that makes up the record batch into a buffer
-                let mut buf = vec![0; message.bodyLength() as usize];
-                self.reader.read_exact(&mut buf)?;
-
-                read_record_batch(&buf, batch, self.schema(), &self.dictionaries_by_field).map(Some)
-            }
-            ipc::MessageHeader::DictionaryBatch => {
-                let batch = message.header_as_dictionary_batch().ok_or_else(|| {
-                    ArrowError::IoError(
-                        "Unable to read IPC message as dictionary batch".to_string(),
-                    )
-                })?;
-                // read the block that makes up the dictionary batch into a buffer
-                let mut buf = vec![0; message.bodyLength() as usize];
-                self.reader.read_exact(&mut buf)?;
-
-                read_dictionary(
-                    &buf, batch, &self.schema, &mut self.dictionaries_by_field
-                )?;
-
-                // read the next message until we encounter a RecordBatch
-                self.maybe_next()
-            }
-            ipc::MessageHeader::NONE => {
-                Ok(None)
-            }
-            t => Err(ArrowError::IoError(
-                format!("Reading types other than record batches not yet supported, unable to read {:?} ", t)
-            )),
-        }
-    }
-}
-
-impl<R: Read> Iterator for StreamReader<R> {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.maybe_next().transpose()
-    }
-}
-
-impl<R: Read> RecordBatchReader for StreamReader<R> {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-
-    use flate2::read::GzDecoder;
-
-    use crate::util::integration_util::*;
-
-    #[test]
-    fn read_generated_files_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    #[should_panic(expected = "Big Endian is not supported for Decimal!")]
-    fn read_decimal_be_file_should_panic() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_decimal.arrow_file",
-                testdata
-            ))
-            .unwrap();
-        FileReader::try_new(file).unwrap();
-    }
-
-    #[test]
-    fn read_generated_be_files_should_work() {
-        // complementary to the previous test
-        let testdata = crate::util::test_util::arrow_test_data();
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/1.0.0-bigendian/{}.arrow_file",
-                testdata, path
-            ))
-            .unwrap();
-
-            FileReader::try_new(file).unwrap();
-        });
-    }
-
-    #[test]
-    fn read_generated_streams_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-            // the next batch must be empty
-            assert!(reader.next().is_none());
-            // the stream must indicate that it's finished
-            assert!(reader.is_finished());
-        });
-    }
-
-    #[test]
-    fn read_generated_files_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_generated_streams_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-            // the next batch must be empty
-            assert!(reader.next().is_none());
-            // the stream must indicate that it's finished
-            assert!(reader.is_finished());
-        });
-    }
-
-    #[test]
-    fn test_arrow_single_float_row() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Float32, false),
-            Field::new("b", DataType::Float32, false),
-            Field::new("c", DataType::Int32, false),
-            Field::new("d", DataType::Int32, false),
-        ]);
-        let arrays = vec![
-            Arc::new(Float32Array::from(vec![1.23])) as ArrayRef,
-            Arc::new(Float32Array::from(vec![-6.50])) as ArrayRef,
-            Arc::new(Int32Array::from(vec![2])) as ArrayRef,
-            Arc::new(Int32Array::from(vec![1])) as ArrayRef,
-        ];
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), arrays).unwrap();
-        // create stream writer
-        let file = File::create("target/debug/testdata/float.stream").unwrap();
-        let mut stream_writer =
-            crate::ipc::writer::StreamWriter::try_new(file, &schema).unwrap();
-        stream_writer.write(&batch).unwrap();
-        stream_writer.finish().unwrap();
-
-        // read stream back
-        let file = File::open("target/debug/testdata/float.stream").unwrap();
-        let reader = StreamReader::try_new(file).unwrap();
-
-        reader.for_each(|batch| {
-            let batch = batch.unwrap();
-            assert!(
-                batch
-                    .column(0)
-                    .as_any()
-                    .downcast_ref::<Float32Array>()
-                    .unwrap()
-                    .value(0)
-                    != 0.0
-            );
-            assert!(
-                batch
-                    .column(1)
-                    .as_any()
-                    .downcast_ref::<Float32Array>()
-                    .unwrap()
-                    .value(0)
-                    != 0.0
-            );
-        })
-    }
-
-    /// Read gzipped JSON file
-    fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let file = File::open(format!(
-            "{}/arrow-ipc-stream/integration/{}/{}.json.gz",
-            testdata, version, path
-        ))
-        .unwrap();
-        let mut gz = GzDecoder::new(&file);
-        let mut s = String::new();
-        gz.read_to_string(&mut s).unwrap();
-        // convert to Arrow JSON
-        let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
-        arrow_json
-    }
-}
diff --git a/rust/arrow/src/ipc/writer.rs b/rust/arrow/src/ipc/writer.rs
deleted file mode 100644
index a6df7b8a1eb..00000000000
--- a/rust/arrow/src/ipc/writer.rs
+++ /dev/null
@@ -1,1160 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Arrow IPC File and Stream Writers
-//!
-//! The `FileWriter` and `StreamWriter` have similar interfaces,
-//! however the `FileWriter` expects a reader that supports `Seek`ing
-
-use std::collections::HashMap;
-use std::io::{BufWriter, Write};
-
-use flatbuffers::FlatBufferBuilder;
-
-use crate::array::{ArrayData, ArrayRef};
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::ipc;
-use crate::record_batch::RecordBatch;
-use crate::util::bit_util;
-
-use ipc::CONTINUATION_MARKER;
-
-/// IPC write options used to control the behaviour of the writer
-#[derive(Debug)]
-pub struct IpcWriteOptions {
-    /// Write padding after memory buffers to this multiple of bytes.
-    /// Generally 8 or 64, defaults to 8
-    alignment: usize,
-    /// The legacy format is for releases before 0.15.0, and uses metadata V4
-    write_legacy_ipc_format: bool,
-    /// The metadata version to write. The Rust IPC writer supports V4+
-    ///
-    /// *Default versions per crate*
-    ///
-    /// When creating the default IpcWriteOptions, the following metadata versions are used:
-    ///
-    /// version 2.0.0: V4, with legacy format enabled
-    /// version 4.0.0: V5
-    metadata_version: ipc::MetadataVersion,
-}
-
-impl IpcWriteOptions {
-    /// Try create IpcWriteOptions, checking for incompatible settings
-    pub fn try_new(
-        alignment: usize,
-        write_legacy_ipc_format: bool,
-        metadata_version: ipc::MetadataVersion,
-    ) -> Result<Self> {
-        if alignment == 0 || alignment % 8 != 0 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Alignment should be greater than 0 and be a multiple of 8".to_string(),
-            ));
-        }
-        match metadata_version {
-            ipc::MetadataVersion::V1
-            | ipc::MetadataVersion::V2
-            | ipc::MetadataVersion::V3 => Err(ArrowError::InvalidArgumentError(
-                "Writing IPC metadata version 3 and lower not supported".to_string(),
-            )),
-            ipc::MetadataVersion::V4 => Ok(Self {
-                alignment,
-                write_legacy_ipc_format,
-                metadata_version,
-            }),
-            ipc::MetadataVersion::V5 => {
-                if write_legacy_ipc_format {
-                    Err(ArrowError::InvalidArgumentError(
-                        "Legacy IPC format only supported on metadata version 4"
-                            .to_string(),
-                    ))
-                } else {
-                    Ok(Self {
-                        alignment,
-                        write_legacy_ipc_format,
-                        metadata_version,
-                    })
-                }
-            }
-            z => panic!("Unsupported ipc::MetadataVersion {:?}", z),
-        }
-    }
-}
-
-impl Default for IpcWriteOptions {
-    fn default() -> Self {
-        Self {
-            alignment: 8,
-            write_legacy_ipc_format: false,
-            metadata_version: ipc::MetadataVersion::V5,
-        }
-    }
-}
-
-#[derive(Debug, Default)]
-pub struct IpcDataGenerator {}
-
-impl IpcDataGenerator {
-    pub fn schema_to_bytes(
-        &self,
-        schema: &Schema,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-        let schema = {
-            let fb = ipc::convert::schema_to_fb_offset(&mut fbb, schema);
-            fb.as_union_value()
-        };
-
-        let mut message = ipc::MessageBuilder::new(&mut fbb);
-        message.add_version(write_options.metadata_version);
-        message.add_header_type(ipc::MessageHeader::Schema);
-        message.add_bodyLength(0);
-        message.add_header(schema);
-        // TODO: custom metadata
-        let data = message.finish();
-        fbb.finish(data, None);
-
-        let data = fbb.finished_data();
-        EncodedData {
-            ipc_message: data.to_vec(),
-            arrow_data: vec![],
-        }
-    }
-
-    pub fn encoded_batch(
-        &self,
-        batch: &RecordBatch,
-        dictionary_tracker: &mut DictionaryTracker,
-        write_options: &IpcWriteOptions,
-    ) -> Result<(Vec<EncodedData>, EncodedData)> {
-        // TODO: handle nested dictionaries
-        let schema = batch.schema();
-        let mut encoded_dictionaries = Vec::with_capacity(schema.fields().len());
-
-        for (i, field) in schema.fields().iter().enumerate() {
-            let column = batch.column(i);
-
-            if let DataType::Dictionary(_key_type, _value_type) = column.data_type() {
-                let dict_id = field
-                    .dict_id()
-                    .expect("All Dictionary types have `dict_id`");
-                let dict_data = column.data();
-                let dict_values = &dict_data.child_data()[0];
-
-                let emit = dictionary_tracker.insert(dict_id, column)?;
-
-                if emit {
-                    encoded_dictionaries.push(self.dictionary_batch_to_bytes(
-                        dict_id,
-                        dict_values,
-                        write_options,
-                    ));
-                }
-            }
-        }
-
-        let encoded_message = self.record_batch_to_bytes(batch, write_options);
-
-        Ok((encoded_dictionaries, encoded_message))
-    }
-
-    /// Write a `RecordBatch` into two sets of bytes, one for the header (ipc::Message) and the
-    /// other for the batch's data
-    fn record_batch_to_bytes(
-        &self,
-        batch: &RecordBatch,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-
-        let mut nodes: Vec<ipc::FieldNode> = vec![];
-        let mut buffers: Vec<ipc::Buffer> = vec![];
-        let mut arrow_data: Vec<u8> = vec![];
-        let mut offset = 0;
-        for array in batch.columns() {
-            let array_data = array.data();
-            offset = write_array_data(
-                &array_data,
-                &mut buffers,
-                &mut arrow_data,
-                &mut nodes,
-                offset,
-                array.len(),
-                array.null_count(),
-            );
-        }
-
-        // write data
-        let buffers = fbb.create_vector(&buffers);
-        let nodes = fbb.create_vector(&nodes);
-
-        let root = {
-            let mut batch_builder = ipc::RecordBatchBuilder::new(&mut fbb);
-            batch_builder.add_length(batch.num_rows() as i64);
-            batch_builder.add_nodes(nodes);
-            batch_builder.add_buffers(buffers);
-            let b = batch_builder.finish();
-            b.as_union_value()
-        };
-        // create an ipc::Message
-        let mut message = ipc::MessageBuilder::new(&mut fbb);
-        message.add_version(write_options.metadata_version);
-        message.add_header_type(ipc::MessageHeader::RecordBatch);
-        message.add_bodyLength(arrow_data.len() as i64);
-        message.add_header(root);
-        let root = message.finish();
-        fbb.finish(root, None);
-        let finished_data = fbb.finished_data();
-
-        EncodedData {
-            ipc_message: finished_data.to_vec(),
-            arrow_data,
-        }
-    }
-
-    /// Write dictionary values into two sets of bytes, one for the header (ipc::Message) and the
-    /// other for the data
-    fn dictionary_batch_to_bytes(
-        &self,
-        dict_id: i64,
-        array_data: &ArrayData,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-
-        let mut nodes: Vec<ipc::FieldNode> = vec![];
-        let mut buffers: Vec<ipc::Buffer> = vec![];
-        let mut arrow_data: Vec<u8> = vec![];
-
-        write_array_data(
-            &array_data,
-            &mut buffers,
-            &mut arrow_data,
-            &mut nodes,
-            0,
-            array_data.len(),
-            array_data.null_count(),
-        );
-
-        // write data
-        let buffers = fbb.create_vector(&buffers);
-        let nodes = fbb.create_vector(&nodes);
-
-        let root = {
-            let mut batch_builder = ipc::RecordBatchBuilder::new(&mut fbb);
-            batch_builder.add_length(array_data.len() as i64);
-            batch_builder.add_nodes(nodes);
-            batch_builder.add_buffers(buffers);
-            batch_builder.finish()
-        };
-
-        let root = {
-            let mut batch_builder = ipc::DictionaryBatchBuilder::new(&mut fbb);
-            batch_builder.add_id(dict_id);
-            batch_builder.add_data(root);
-            batch_builder.finish().as_union_value()
-        };
-
-        let root = {
-            let mut message_builder = ipc::MessageBuilder::new(&mut fbb);
-            message_builder.add_version(write_options.metadata_version);
-            message_builder.add_header_type(ipc::MessageHeader::DictionaryBatch);
-            message_builder.add_bodyLength(arrow_data.len() as i64);
-            message_builder.add_header(root);
-            message_builder.finish()
-        };
-
-        fbb.finish(root, None);
-        let finished_data = fbb.finished_data();
-
-        EncodedData {
-            ipc_message: finished_data.to_vec(),
-            arrow_data,
-        }
-    }
-}
-
-/// Keeps track of dictionaries that have been written, to avoid emitting the same dictionary
-/// multiple times. Can optionally error if an update to an existing dictionary is attempted, which
-/// isn't allowed in the `FileWriter`.
-pub struct DictionaryTracker {
-    written: HashMap<i64, ArrayRef>,
-    error_on_replacement: bool,
-}
-
-impl DictionaryTracker {
-    pub fn new(error_on_replacement: bool) -> Self {
-        Self {
-            written: HashMap::new(),
-            error_on_replacement,
-        }
-    }
-
-    /// Keep track of the dictionary with the given ID and values. Behavior:
-    ///
-    /// * If this ID has been written already and has the same data, return `Ok(false)` to indicate
-    ///   that the dictionary was not actually inserted (because it's already been seen).
-    /// * If this ID has been written already but with different data, and this tracker is
-    ///   configured to return an error, return an error.
-    /// * If the tracker has not been configured to error on replacement or this dictionary
-    ///   has never been seen before, return `Ok(true)` to indicate that the dictionary was just
-    ///   inserted.
-    pub fn insert(&mut self, dict_id: i64, column: &ArrayRef) -> Result<bool> {
-        let dict_data = column.data();
-        let dict_values = &dict_data.child_data()[0];
-
-        // If a dictionary with this id was already emitted, check if it was the same.
-        if let Some(last) = self.written.get(&dict_id) {
-            if last.data().child_data()[0] == *dict_values {
-                // Same dictionary values => no need to emit it again
-                return Ok(false);
-            } else if self.error_on_replacement {
-                return Err(ArrowError::InvalidArgumentError(
-                    "Dictionary replacement detected when writing IPC file format. \
-                     Arrow IPC files only support a single dictionary for a given field \
-                     across all batches."
-                        .to_string(),
-                ));
-            }
-        }
-
-        self.written.insert(dict_id, column.clone());
-        Ok(true)
-    }
-}
-
-pub struct FileWriter<W: Write> {
-    /// The object to write to
-    writer: BufWriter<W>,
-    /// IPC write options
-    write_options: IpcWriteOptions,
-    /// A reference to the schema, used in validating record batches
-    schema: Schema,
-    /// The number of bytes between each block of bytes, as an offset for random access
-    block_offsets: usize,
-    /// Dictionary blocks that will be written as part of the IPC footer
-    dictionary_blocks: Vec<ipc::Block>,
-    /// Record blocks that will be written as part of the IPC footer
-    record_blocks: Vec<ipc::Block>,
-    /// Whether the writer footer has been written, and the writer is finished
-    finished: bool,
-    /// Keeps track of dictionaries that have been written
-    dictionary_tracker: DictionaryTracker,
-
-    data_gen: IpcDataGenerator,
-}
-
-impl<W: Write> FileWriter<W> {
-    /// Try create a new writer, with the schema written as part of the header
-    pub fn try_new(writer: W, schema: &Schema) -> Result<Self> {
-        let write_options = IpcWriteOptions::default();
-        Self::try_new_with_options(writer, schema, write_options)
-    }
-
-    /// Try create a new writer with IpcWriteOptions
-    pub fn try_new_with_options(
-        writer: W,
-        schema: &Schema,
-        write_options: IpcWriteOptions,
-    ) -> Result<Self> {
-        let data_gen = IpcDataGenerator::default();
-        let mut writer = BufWriter::new(writer);
-        // write magic to header
-        writer.write_all(&super::ARROW_MAGIC[..])?;
-        // create an 8-byte boundary after the header
-        writer.write_all(&[0, 0])?;
-        // write the schema, set the written bytes to the schema + header
-        let encoded_message = data_gen.schema_to_bytes(schema, &write_options);
-        let (meta, data) = write_message(&mut writer, encoded_message, &write_options)?;
-        Ok(Self {
-            writer,
-            write_options,
-            schema: schema.clone(),
-            block_offsets: meta + data + 8,
-            dictionary_blocks: vec![],
-            record_blocks: vec![],
-            finished: false,
-            dictionary_tracker: DictionaryTracker::new(true),
-            data_gen,
-        })
-    }
-
-    /// Write a record batch to the file
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write record batch to file writer as it is closed".to_string(),
-            ));
-        }
-
-        let (encoded_dictionaries, encoded_message) = self.data_gen.encoded_batch(
-            batch,
-            &mut self.dictionary_tracker,
-            &self.write_options,
-        )?;
-
-        for encoded_dictionary in encoded_dictionaries {
-            let (meta, data) =
-                write_message(&mut self.writer, encoded_dictionary, &self.write_options)?;
-
-            let block =
-                ipc::Block::new(self.block_offsets as i64, meta as i32, data as i64);
-            self.dictionary_blocks.push(block);
-            self.block_offsets += meta + data;
-        }
-
-        let (meta, data) =
-            write_message(&mut self.writer, encoded_message, &self.write_options)?;
-        // add a record block for the footer
-        let block = ipc::Block::new(
-            self.block_offsets as i64,
-            meta as i32, // TODO: is this still applicable?
-            data as i64,
-        );
-        self.record_blocks.push(block);
-        self.block_offsets += meta + data;
-        Ok(())
-    }
-
-    /// Write footer and closing tag, then mark the writer as done
-    pub fn finish(&mut self) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write footer to file writer as it is closed".to_string(),
-            ));
-        }
-
-        // write EOS
-        write_continuation(&mut self.writer, &self.write_options, 0)?;
-
-        let mut fbb = FlatBufferBuilder::new();
-        let dictionaries = fbb.create_vector(&self.dictionary_blocks);
-        let record_batches = fbb.create_vector(&self.record_blocks);
-        let schema = ipc::convert::schema_to_fb_offset(&mut fbb, &self.schema);
-
-        let root = {
-            let mut footer_builder = ipc::FooterBuilder::new(&mut fbb);
-            footer_builder.add_version(self.write_options.metadata_version);
-            footer_builder.add_schema(schema);
-            footer_builder.add_dictionaries(dictionaries);
-            footer_builder.add_recordBatches(record_batches);
-            footer_builder.finish()
-        };
-        fbb.finish(root, None);
-        let footer_data = fbb.finished_data();
-        self.writer.write_all(footer_data)?;
-        self.writer
-            .write_all(&(footer_data.len() as i32).to_le_bytes())?;
-        self.writer.write_all(&super::ARROW_MAGIC)?;
-        self.writer.flush()?;
-        self.finished = true;
-
-        Ok(())
-    }
-}
-
-pub struct StreamWriter<W: Write> {
-    /// The object to write to
-    writer: BufWriter<W>,
-    /// IPC write options
-    write_options: IpcWriteOptions,
-    /// A reference to the schema, used in validating record batches
-    schema: Schema,
-    /// Whether the writer footer has been written, and the writer is finished
-    finished: bool,
-    /// Keeps track of dictionaries that have been written
-    dictionary_tracker: DictionaryTracker,
-
-    data_gen: IpcDataGenerator,
-}
-
-impl<W: Write> StreamWriter<W> {
-    /// Try create a new writer, with the schema written as part of the header
-    pub fn try_new(writer: W, schema: &Schema) -> Result<Self> {
-        let write_options = IpcWriteOptions::default();
-        Self::try_new_with_options(writer, schema, write_options)
-    }
-
-    pub fn try_new_with_options(
-        writer: W,
-        schema: &Schema,
-        write_options: IpcWriteOptions,
-    ) -> Result<Self> {
-        let data_gen = IpcDataGenerator::default();
-        let mut writer = BufWriter::new(writer);
-        // write the schema, set the written bytes to the schema
-        let encoded_message = data_gen.schema_to_bytes(schema, &write_options);
-        write_message(&mut writer, encoded_message, &write_options)?;
-        Ok(Self {
-            writer,
-            write_options,
-            schema: schema.clone(),
-            finished: false,
-            dictionary_tracker: DictionaryTracker::new(false),
-            data_gen,
-        })
-    }
-
-    /// Write a record batch to the stream
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write record batch to stream writer as it is closed".to_string(),
-            ));
-        }
-
-        let (encoded_dictionaries, encoded_message) = self
-            .data_gen
-            .encoded_batch(batch, &mut self.dictionary_tracker, &self.write_options)
-            .expect("StreamWriter is configured to not error on dictionary replacement");
-
-        for encoded_dictionary in encoded_dictionaries {
-            write_message(&mut self.writer, encoded_dictionary, &self.write_options)?;
-        }
-
-        write_message(&mut self.writer, encoded_message, &self.write_options)?;
-        Ok(())
-    }
-
-    /// Write continuation bytes, and mark the stream as done
-    pub fn finish(&mut self) -> Result<()> {
-        if self.finished {
-            return Err(ArrowError::IoError(
-                "Cannot write footer to stream writer as it is closed".to_string(),
-            ));
-        }
-
-        write_continuation(&mut self.writer, &self.write_options, 0)?;
-
-        self.finished = true;
-
-        Ok(())
-    }
-
-    /// Unwraps the BufWriter housed in StreamWriter.writer, returning the underlying
-    /// writer
-    ///
-    /// The buffer is flushed and the StreamWriter is finished before returning the
-    /// writer.
-    ///
-    /// # Errors
-    ///
-    /// An ['Err'] may be returned if an error occurs while finishing the StreamWriter
-    /// or while flushing the buffer.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # use arrow::datatypes::Schema;
-    /// # use arrow::ipc::writer::StreamWriter;
-    /// # use arrow::error::ArrowError;
-    /// # fn main() -> Result<(), ArrowError> {
-    /// // The result we expect from an empty schema
-    /// let expected = vec![
-    ///     255, 255, 255, 255,  64,   0,   0,   0,
-    ///      16,   0,   0,   0,   0,   0,  10,   0,
-    ///      14,   0,  12,   0,  11,   0,   4,   0,
-    ///      10,   0,   0,   0,  20,   0,   0,   0,
-    ///       0,   0,   0,   1,   4,   0,  10,   0,
-    ///      12,   0,   0,   0,   8,   0,   4,   0,
-    ///      10,   0,   0,   0,   8,   0,   0,   0,
-    ///       8,   0,   0,   0,   0,   0,   0,   0,
-    ///       0,   0,   0,   0,   0,   0,   0,   0,
-    ///     255, 255, 255, 255,   0,   0,   0,   0
-    /// ];
-    ///
-    /// let schema = Schema::new(vec![]);
-    /// let buffer: Vec<u8> = Vec::new();
-    /// let stream_writer = StreamWriter::try_new(buffer, &schema)?;
-    ///
-    /// assert_eq!(stream_writer.into_inner()?, expected);
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn into_inner(mut self) -> Result<W> {
-        if !self.finished {
-            self.finish()?;
-        }
-        self.writer.into_inner().map_err(ArrowError::from)
-    }
-}
-
-/// Stores the encoded data, which is an ipc::Message, and optional Arrow data
-pub struct EncodedData {
-    /// An encoded ipc::Message
-    pub ipc_message: Vec<u8>,
-    /// Arrow buffers to be written, should be an empty vec for schema messages
-    pub arrow_data: Vec<u8>,
-}
-/// Write a message's IPC data and buffers, returning metadata and buffer data lengths written
-pub fn write_message<W: Write>(
-    mut writer: W,
-    encoded: EncodedData,
-    write_options: &IpcWriteOptions,
-) -> Result<(usize, usize)> {
-    let arrow_data_len = encoded.arrow_data.len();
-    if arrow_data_len % 8 != 0 {
-        return Err(ArrowError::MemoryError(
-            "Arrow data not aligned".to_string(),
-        ));
-    }
-
-    let a = write_options.alignment - 1;
-    let buffer = encoded.ipc_message;
-    let flatbuf_size = buffer.len();
-    let prefix_size = if write_options.write_legacy_ipc_format {
-        4
-    } else {
-        8
-    };
-    let aligned_size = (flatbuf_size + prefix_size + a) & !a;
-    let padding_bytes = aligned_size - flatbuf_size - prefix_size;
-
-    write_continuation(
-        &mut writer,
-        &write_options,
-        (aligned_size - prefix_size) as i32,
-    )?;
-
-    // write the flatbuf
-    if flatbuf_size > 0 {
-        writer.write_all(&buffer)?;
-    }
-    // write padding
-    writer.write_all(&vec![0; padding_bytes])?;
-
-    // write arrow data
-    let body_len = if arrow_data_len > 0 {
-        write_body_buffers(&mut writer, &encoded.arrow_data)?
-    } else {
-        0
-    };
-
-    Ok((aligned_size, body_len))
-}
-
-fn write_body_buffers<W: Write>(mut writer: W, data: &[u8]) -> Result<usize> {
-    let len = data.len() as u32;
-    let pad_len = pad_to_8(len) as u32;
-    let total_len = len + pad_len;
-
-    // write body buffer
-    writer.write_all(data)?;
-    if pad_len > 0 {
-        writer.write_all(&vec![0u8; pad_len as usize][..])?;
-    }
-
-    writer.flush()?;
-    Ok(total_len as usize)
-}
-
-/// Write a record batch to the writer, writing the message size before the message
-/// if the record batch is being written to a stream
-fn write_continuation<W: Write>(
-    mut writer: W,
-    write_options: &IpcWriteOptions,
-    total_len: i32,
-) -> Result<usize> {
-    let mut written = 8;
-
-    // the version of the writer determines whether continuation markers should be added
-    match write_options.metadata_version {
-        ipc::MetadataVersion::V1
-        | ipc::MetadataVersion::V2
-        | ipc::MetadataVersion::V3 => {
-            unreachable!("Options with the metadata version cannot be created")
-        }
-        ipc::MetadataVersion::V4 => {
-            if !write_options.write_legacy_ipc_format {
-                // v0.15.0 format
-                writer.write_all(&CONTINUATION_MARKER)?;
-                written = 4;
-            }
-            writer.write_all(&total_len.to_le_bytes()[..])?;
-        }
-        ipc::MetadataVersion::V5 => {
-            // write continuation marker and message length
-            writer.write_all(&CONTINUATION_MARKER)?;
-            writer.write_all(&total_len.to_le_bytes()[..])?;
-        }
-        z => panic!("Unsupported ipc::MetadataVersion {:?}", z),
-    };
-
-    writer.flush()?;
-
-    Ok(written)
-}
-
-/// Write array data to a vector of bytes
-fn write_array_data(
-    array_data: &ArrayData,
-    mut buffers: &mut Vec<ipc::Buffer>,
-    mut arrow_data: &mut Vec<u8>,
-    mut nodes: &mut Vec<ipc::FieldNode>,
-    offset: i64,
-    num_rows: usize,
-    null_count: usize,
-) -> i64 {
-    let mut offset = offset;
-    nodes.push(ipc::FieldNode::new(num_rows as i64, null_count as i64));
-    // NullArray does not have any buffers, thus the null buffer is not generated
-    if array_data.data_type() != &DataType::Null {
-        // write null buffer if exists
-        let null_buffer = match array_data.null_buffer() {
-            None => {
-                // create a buffer and fill it with valid bits
-                let num_bytes = bit_util::ceil(num_rows, 8);
-                let buffer = MutableBuffer::new(num_bytes);
-                let buffer = buffer.with_bitset(num_bytes, true);
-                buffer.into()
-            }
-            Some(buffer) => buffer.clone(),
-        };
-
-        offset = write_buffer(&null_buffer, &mut buffers, &mut arrow_data, offset);
-    }
-
-    array_data.buffers().iter().for_each(|buffer| {
-        offset = write_buffer(buffer, &mut buffers, &mut arrow_data, offset);
-    });
-
-    if !matches!(array_data.data_type(), DataType::Dictionary(_, _)) {
-        // recursively write out nested structures
-        array_data.child_data().iter().for_each(|data_ref| {
-            // write the nested data (e.g list data)
-            offset = write_array_data(
-                data_ref,
-                &mut buffers,
-                &mut arrow_data,
-                &mut nodes,
-                offset,
-                data_ref.len(),
-                data_ref.null_count(),
-            );
-        });
-    }
-
-    offset
-}
-
-/// Write a buffer to a vector of bytes, and add its ipc::Buffer to a vector
-fn write_buffer(
-    buffer: &Buffer,
-    buffers: &mut Vec<ipc::Buffer>,
-    arrow_data: &mut Vec<u8>,
-    offset: i64,
-) -> i64 {
-    let len = buffer.len();
-    let pad_len = pad_to_8(len as u32);
-    let total_len: i64 = (len + pad_len) as i64;
-    // assert_eq!(len % 8, 0, "Buffer width not a multiple of 8 bytes");
-    buffers.push(ipc::Buffer::new(offset, total_len));
-    arrow_data.extend_from_slice(buffer.as_slice());
-    arrow_data.extend_from_slice(&vec![0u8; pad_len][..]);
-    offset + total_len
-}
-
-/// Calculate an 8-byte boundary and return the number of bytes needed to pad to 8 bytes
-#[inline]
-fn pad_to_8(len: u32) -> usize {
-    (((len + 7) & !7) - len) as usize
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-    use std::io::Read;
-    use std::sync::Arc;
-
-    use flate2::read::GzDecoder;
-    use ipc::MetadataVersion;
-
-    use crate::array::*;
-    use crate::datatypes::Field;
-    use crate::ipc::reader::*;
-    use crate::util::integration_util::*;
-
-    #[test]
-    fn test_write_file() {
-        let schema = Schema::new(vec![Field::new("field1", DataType::UInt32, false)]);
-        let values: Vec<Option<u32>> = vec![
-            Some(999),
-            None,
-            Some(235),
-            Some(123),
-            None,
-            None,
-            None,
-            None,
-            None,
-        ];
-        let array1 = UInt32Array::from(values);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(array1) as ArrayRef],
-        )
-        .unwrap();
-        {
-            let file = File::create("target/debug/testdata/arrow.arrow_file").unwrap();
-            let mut writer = FileWriter::try_new(file, &schema).unwrap();
-
-            writer.write(&batch).unwrap();
-            writer.finish().unwrap();
-        }
-
-        {
-            let file =
-                File::open(format!("target/debug/testdata/{}.arrow_file", "arrow"))
-                    .unwrap();
-            let mut reader = FileReader::try_new(file).unwrap();
-            while let Some(Ok(read_batch)) = reader.next() {
-                read_batch
-                    .columns()
-                    .iter()
-                    .zip(batch.columns())
-                    .for_each(|(a, b)| {
-                        assert_eq!(a.data_type(), b.data_type());
-                        assert_eq!(a.len(), b.len());
-                        assert_eq!(a.null_count(), b.null_count());
-                    });
-            }
-        }
-    }
-
-    fn write_null_file(options: IpcWriteOptions, suffix: &str) {
-        let schema = Schema::new(vec![
-            Field::new("nulls", DataType::Null, true),
-            Field::new("int32s", DataType::Int32, false),
-            Field::new("nulls2", DataType::Null, false),
-            Field::new("f64s", DataType::Float64, false),
-        ]);
-        let array1 = NullArray::new(32);
-        let array2 = Int32Array::from(vec![1; 32]);
-        let array3 = NullArray::new(32);
-        let array4 = Float64Array::from(vec![std::f64::NAN; 32]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(array1) as ArrayRef,
-                Arc::new(array2) as ArrayRef,
-                Arc::new(array3) as ArrayRef,
-                Arc::new(array4) as ArrayRef,
-            ],
-        )
-        .unwrap();
-        let file_name = format!("target/debug/testdata/nulls_{}.arrow_file", suffix);
-        {
-            let file = File::create(&file_name).unwrap();
-            let mut writer =
-                FileWriter::try_new_with_options(file, &schema, options).unwrap();
-
-            writer.write(&batch).unwrap();
-            writer.finish().unwrap();
-        }
-
-        {
-            let file = File::open(&file_name).unwrap();
-            let reader = FileReader::try_new(file).unwrap();
-            reader.for_each(|maybe_batch| {
-                maybe_batch
-                    .unwrap()
-                    .columns()
-                    .iter()
-                    .zip(batch.columns())
-                    .for_each(|(a, b)| {
-                        assert_eq!(a.data_type(), b.data_type());
-                        assert_eq!(a.len(), b.len());
-                        assert_eq!(a.null_count(), b.null_count());
-                    });
-            });
-        }
-    }
-    #[test]
-    fn test_write_null_file_v4() {
-        write_null_file(
-            IpcWriteOptions::try_new(8, false, MetadataVersion::V4).unwrap(),
-            "v4_a8",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(8, true, MetadataVersion::V4).unwrap(),
-            "v4_a8l",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(64, false, MetadataVersion::V4).unwrap(),
-            "v4_a64",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(64, true, MetadataVersion::V4).unwrap(),
-            "v4_a64l",
-        );
-    }
-
-    #[test]
-    fn test_write_null_file_v5() {
-        write_null_file(
-            IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap(),
-            "v5_a8",
-        );
-        write_null_file(
-            IpcWriteOptions::try_new(64, false, MetadataVersion::V5).unwrap(),
-            "v5_a64",
-        );
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_files_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read and rewrite the file to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.arrow_file",
-                    version, path
-                ))
-                .unwrap();
-                let mut writer = FileWriter::try_new(file, &reader.schema()).unwrap();
-                while let Some(Ok(batch)) = reader.next() {
-                    writer.write(&batch).unwrap();
-                }
-                writer.finish().unwrap();
-            }
-
-            let file = File::open(format!(
-                "target/debug/testdata/{}-{}.arrow_file",
-                version, path
-            ))
-            .unwrap();
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_streams_014() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "0.14.1";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_interval",
-            "generated_datetime",
-            "generated_dictionary",
-            "generated_nested",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            "generated_decimal",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let reader = StreamReader::try_new(file).unwrap();
-
-            // read and rewrite the stream to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.stream",
-                    version, path
-                ))
-                .unwrap();
-                let mut writer = StreamWriter::try_new(file, &reader.schema()).unwrap();
-                reader.for_each(|batch| {
-                    writer.write(&batch.unwrap()).unwrap();
-                });
-                writer.finish().unwrap();
-            }
-
-            let file =
-                File::open(format!("target/debug/testdata/{}-{}.stream", version, path))
-                    .unwrap();
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_files_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_custom_metadata",
-            "generated_datetime",
-            "generated_dictionary_unsigned",
-            "generated_dictionary",
-            // "generated_duplicate_fieldnames",
-            "generated_interval",
-            "generated_nested",
-            // "generated_nested_large_offsets",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_large_offsets",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            // "generated_recursive_nested",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read and rewrite the file to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.arrow_file",
-                    version, path
-                ))
-                .unwrap();
-                // write IPC version 5
-                let options =
-                    IpcWriteOptions::try_new(8, false, ipc::MetadataVersion::V5).unwrap();
-                let mut writer =
-                    FileWriter::try_new_with_options(file, &reader.schema(), options)
-                        .unwrap();
-                while let Some(Ok(batch)) = reader.next() {
-                    writer.write(&batch).unwrap();
-                }
-                writer.finish().unwrap();
-            }
-
-            let file = File::open(format!(
-                "target/debug/testdata/{}-{}.arrow_file",
-                version, path
-            ))
-            .unwrap();
-            let mut reader = FileReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    #[test]
-    fn read_and_rewrite_generated_streams_100() {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let version = "1.0.0-littleendian";
-        // the test is repetitive, thus we can read all supported files at once
-        let paths = vec![
-            "generated_custom_metadata",
-            "generated_datetime",
-            "generated_dictionary_unsigned",
-            "generated_dictionary",
-            // "generated_duplicate_fieldnames",
-            "generated_interval",
-            "generated_nested",
-            // "generated_nested_large_offsets",
-            "generated_null_trivial",
-            "generated_null",
-            "generated_primitive_large_offsets",
-            "generated_primitive_no_batches",
-            "generated_primitive_zerolength",
-            "generated_primitive",
-            // "generated_recursive_nested",
-        ];
-        paths.iter().for_each(|path| {
-            let file = File::open(format!(
-                "{}/arrow-ipc-stream/integration/{}/{}.stream",
-                testdata, version, path
-            ))
-            .unwrap();
-
-            let reader = StreamReader::try_new(file).unwrap();
-
-            // read and rewrite the stream to a temp location
-            {
-                let file = File::create(format!(
-                    "target/debug/testdata/{}-{}.stream",
-                    version, path
-                ))
-                .unwrap();
-                let options =
-                    IpcWriteOptions::try_new(8, false, ipc::MetadataVersion::V5).unwrap();
-                let mut writer =
-                    StreamWriter::try_new_with_options(file, &reader.schema(), options)
-                        .unwrap();
-                reader.for_each(|batch| {
-                    writer.write(&batch.unwrap()).unwrap();
-                });
-                writer.finish().unwrap();
-            }
-
-            let file =
-                File::open(format!("target/debug/testdata/{}-{}.stream", version, path))
-                    .unwrap();
-            let mut reader = StreamReader::try_new(file).unwrap();
-
-            // read expected JSON output
-            let arrow_json = read_gzip_json(version, path);
-            assert!(arrow_json.equals_reader(&mut reader));
-        });
-    }
-
-    /// Read gzipped JSON file
-    fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
-        let testdata = crate::util::test_util::arrow_test_data();
-        let file = File::open(format!(
-            "{}/arrow-ipc-stream/integration/{}/{}.json.gz",
-            testdata, version, path
-        ))
-        .unwrap();
-        let mut gz = GzDecoder::new(&file);
-        let mut s = String::new();
-        gz.read_to_string(&mut s).unwrap();
-        // convert to Arrow JSON
-        let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
-        arrow_json
-    }
-}
diff --git a/rust/arrow/src/json/mod.rs b/rust/arrow/src/json/mod.rs
deleted file mode 100644
index 6b3df188a47..00000000000
--- a/rust/arrow/src/json/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Transfer data between the Arrow memory format and JSON
-//! line-delimited records. See the module level documentation for the
-//! [`reader`] and [`writer`] for usage examples.
-
-pub mod reader;
-pub mod writer;
-
-pub use self::reader::Reader;
-pub use self::reader::ReaderBuilder;
-pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer};
diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs
deleted file mode 100644
index 31c496c9293..00000000000
--- a/rust/arrow/src/json/reader.rs
+++ /dev/null
@@ -1,2949 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! # JSON Reader
-//!
-//! This JSON reader allows JSON line-delimited files to be read into the Arrow memory
-//! model. Records are loaded in batches and are then converted from row-based data to
-//! columnar data.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use std::fs::File;
-//! use std::io::BufReader;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("a", DataType::Float64, false),
-//!     Field::new("b", DataType::Float64, false),
-//!     Field::new("c", DataType::Float64, false),
-//! ]);
-//!
-//! let file = File::open("test/data/basic.json").unwrap();
-//!
-//! let mut json = json::Reader::new(BufReader::new(file), Arc::new(schema), 1024, None);
-//! let batch = json.next().unwrap().unwrap();
-//! ```
-
-use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
-use std::iter::FromIterator;
-use std::sync::Arc;
-
-use indexmap::map::IndexMap as HashMap;
-use indexmap::set::IndexSet as HashSet;
-use serde_json::{map::Map as JsonMap, Value};
-
-use crate::buffer::MutableBuffer;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
-use crate::util::bit_util;
-use crate::{array::*, buffer::Buffer};
-
-#[derive(Debug, Clone)]
-enum InferredType {
-    Scalar(HashSet<DataType>),
-    Array(Box<InferredType>),
-    Object(HashMap<String, InferredType>),
-    Any,
-}
-
-impl InferredType {
-    fn merge(&mut self, other: InferredType) -> Result<()> {
-        match (self, other) {
-            (InferredType::Array(s), InferredType::Array(o)) => {
-                s.merge(*o)?;
-            }
-            (InferredType::Scalar(self_hs), InferredType::Scalar(other_hs)) => {
-                other_hs.into_iter().for_each(|v| {
-                    self_hs.insert(v);
-                });
-            }
-            (InferredType::Object(self_map), InferredType::Object(other_map)) => {
-                for (k, v) in other_map {
-                    self_map.entry(k).or_insert(InferredType::Any).merge(v)?;
-                }
-            }
-            (s @ InferredType::Any, v) => {
-                *s = v;
-            }
-            (_, InferredType::Any) => {}
-            // convert a scalar type to a single-item scalar array type.
-            (
-                InferredType::Array(self_inner_type),
-                other_scalar @ InferredType::Scalar(_),
-            ) => {
-                self_inner_type.merge(other_scalar)?;
-            }
-            (s @ InferredType::Scalar(_), InferredType::Array(mut other_inner_type)) => {
-                other_inner_type.merge(s.clone())?;
-                *s = InferredType::Array(other_inner_type);
-            }
-            // incompatible types
-            (s, o) => {
-                return Err(ArrowError::JsonError(format!(
-                    "Incompatible type found during schema inference: {:?} v.s. {:?}",
-                    s, o,
-                )));
-            }
-        }
-
-        Ok(())
-    }
-}
-
-/// Coerce data type during inference
-///
-/// * `Int64` and `Float64` should be `Float64`
-/// * Lists and scalars are coerced to a list of a compatible scalar
-/// * All other types are coerced to `Utf8`
-fn coerce_data_type(dt: Vec<&DataType>) -> DataType {
-    let mut dt_iter = dt.into_iter().cloned();
-    let dt_init = dt_iter.next().unwrap_or(DataType::Utf8);
-
-    dt_iter.fold(dt_init, |l, r| match (l, r) {
-        (DataType::Boolean, DataType::Boolean) => DataType::Boolean,
-        (DataType::Int64, DataType::Int64) => DataType::Int64,
-        (DataType::Float64, DataType::Float64)
-        | (DataType::Float64, DataType::Int64)
-        | (DataType::Int64, DataType::Float64) => DataType::Float64,
-        (DataType::List(l), DataType::List(r)) => DataType::List(Box::new(Field::new(
-            "item",
-            coerce_data_type(vec![l.data_type(), r.data_type()]),
-            true,
-        ))),
-        // coerce scalar and scalar array into scalar array
-        (DataType::List(e), not_list) | (not_list, DataType::List(e)) => {
-            DataType::List(Box::new(Field::new(
-                "item",
-                coerce_data_type(vec![e.data_type(), &not_list]),
-                true,
-            )))
-        }
-        _ => DataType::Utf8,
-    })
-}
-
-fn generate_datatype(t: &InferredType) -> Result<DataType> {
-    Ok(match t {
-        InferredType::Scalar(hs) => coerce_data_type(hs.iter().collect()),
-        InferredType::Object(spec) => DataType::Struct(generate_fields(spec)?),
-        InferredType::Array(ele_type) => DataType::List(Box::new(Field::new(
-            "item",
-            generate_datatype(ele_type)?,
-            true,
-        ))),
-        InferredType::Any => DataType::Null,
-    })
-}
-
-fn generate_fields(spec: &HashMap<String, InferredType>) -> Result<Vec<Field>> {
-    spec.iter()
-        .map(|(k, types)| Ok(Field::new(k, generate_datatype(types)?, true)))
-        .collect()
-}
-
-/// Generate schema from JSON field names and inferred data types
-fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema> {
-    Ok(Schema::new(generate_fields(&spec)?))
-}
-
-/// JSON file reader that produces a serde_json::Value iterator from a Read trait
-///
-/// # Example
-///
-/// ```
-/// use std::fs::File;
-/// use std::io::BufReader;
-/// use arrow::json::reader::ValueIter;
-///
-/// let mut reader =
-///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-/// let mut value_reader = ValueIter::new(&mut reader, None);
-/// for value in value_reader {
-///     println!("JSON value: {}", value.unwrap());
-/// }
-/// ```
-#[derive(Debug)]
-pub struct ValueIter<'a, R: Read> {
-    reader: &'a mut BufReader<R>,
-    max_read_records: Option<usize>,
-    record_count: usize,
-    // reuse line buffer to avoid allocation on each record
-    line_buf: String,
-}
-
-impl<'a, R: Read> ValueIter<'a, R> {
-    pub fn new(reader: &'a mut BufReader<R>, max_read_records: Option<usize>) -> Self {
-        Self {
-            reader,
-            max_read_records,
-            record_count: 0,
-            line_buf: String::new(),
-        }
-    }
-}
-
-impl<'a, R: Read> Iterator for ValueIter<'a, R> {
-    type Item = Result<Value>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(max) = self.max_read_records {
-            if self.record_count >= max {
-                return None;
-            }
-        }
-
-        loop {
-            self.line_buf.truncate(0);
-            match self.reader.read_line(&mut self.line_buf) {
-                Ok(0) => {
-                    // read_line returns 0 when stream reached EOF
-                    return None;
-                }
-                Err(e) => {
-                    return Some(Err(ArrowError::JsonError(format!(
-                        "Failed to read JSON record: {}",
-                        e
-                    ))));
-                }
-                _ => {
-                    let trimmed_s = self.line_buf.trim();
-                    if trimmed_s.is_empty() {
-                        // ignore empty lines
-                        continue;
-                    }
-
-                    self.record_count += 1;
-                    return Some(serde_json::from_str(trimmed_s).map_err(|e| {
-                        ArrowError::JsonError(format!("Not valid JSON: {}", e))
-                    }));
-                }
-            }
-        }
-    }
-}
-
-/// Infer the fields of a JSON file by reading the first n records of the file, with
-/// `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its field types.
-///
-/// Contrary to [`infer_json_schema`], this function will seek back to the start of the `reader`.
-/// That way, the `reader` can be used immediately afterwards to create a [`Reader`].
-///
-/// # Examples
-/// ```
-/// use std::fs::File;
-/// use std::io::BufReader;
-/// use arrow::json::reader::infer_json_schema_from_seekable;
-///
-/// let file = File::open("test/data/mixed_arrays.json").unwrap();
-/// // file's cursor's offset at 0
-/// let mut reader = BufReader::new(file);
-/// let inferred_schema = infer_json_schema_from_seekable(&mut reader, None).unwrap();
-/// // file's cursor's offset automatically set at 0
-/// ```
-pub fn infer_json_schema_from_seekable<R: Read + Seek>(
-    reader: &mut BufReader<R>,
-    max_read_records: Option<usize>,
-) -> Result<Schema> {
-    let schema = infer_json_schema(reader, max_read_records);
-    // return the reader seek back to the start
-    reader.seek(SeekFrom::Start(0))?;
-
-    schema
-}
-
-/// Infer the fields of a JSON file by reading the first n records of the buffer, with
-/// `max_read_records` controlling the maximum number of records to read.
-///
-/// If `max_read_records` is not set, the whole file is read to infer its field types.
-///
-/// This function will not seek back to the start of the `reader`. The user has to manage the
-/// original file's cursor. This function is useful when the `reader`'s cursor is not available
-/// (does not implement [`Seek`]), such is the case for compressed streams decoders.
-///
-/// # Examples
-/// ```
-/// use std::fs::File;
-/// use std::io::{BufReader, SeekFrom, Seek};
-/// use flate2::read::GzDecoder;
-/// use arrow::json::reader::infer_json_schema;
-///
-/// let mut file = File::open("test/data/mixed_arrays.json.gz").unwrap();
-///
-/// // file's cursor's offset at 0
-/// let mut reader = BufReader::new(GzDecoder::new(&file));
-/// let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
-/// // cursor's offset at end of file
-///
-/// // seek back to start so that the original file is usable again
-/// file.seek(SeekFrom::Start(0)).unwrap();
-/// ```
-pub fn infer_json_schema<R: Read>(
-    reader: &mut BufReader<R>,
-    max_read_records: Option<usize>,
-) -> Result<Schema> {
-    infer_json_schema_from_iterator(ValueIter::new(reader, max_read_records))
-}
-
-fn set_object_scalar_field_type(
-    field_types: &mut HashMap<String, InferredType>,
-    key: &str,
-    ftype: DataType,
-) -> Result<()> {
-    if !field_types.contains_key(key) {
-        field_types.insert(key.to_string(), InferredType::Scalar(HashSet::new()));
-    }
-
-    match field_types.get_mut(key).unwrap() {
-        InferredType::Scalar(hs) => {
-            hs.insert(ftype);
-            Ok(())
-        }
-        // in case of column contains both scalar type and scalar array type, we convert type of
-        // this column to scalar array.
-        scalar_array @ InferredType::Array(_) => {
-            let mut hs = HashSet::new();
-            hs.insert(ftype);
-            scalar_array.merge(InferredType::Scalar(hs))?;
-            Ok(())
-        }
-        t => Err(ArrowError::JsonError(format!(
-            "Expected scalar or scalar array JSON type, found: {:?}",
-            t,
-        ))),
-    }
-}
-
-fn infer_scalar_array_type(array: &[Value]) -> Result<InferredType> {
-    let mut hs = HashSet::new();
-
-    for v in array {
-        match v {
-            Value::Null => {}
-            Value::Number(n) => {
-                if n.is_i64() {
-                    hs.insert(DataType::Int64);
-                } else {
-                    hs.insert(DataType::Float64);
-                }
-            }
-            Value::Bool(_) => {
-                hs.insert(DataType::Boolean);
-            }
-            Value::String(_) => {
-                hs.insert(DataType::Utf8);
-            }
-            Value::Array(_) | Value::Object(_) => {
-                return Err(ArrowError::JsonError(format!(
-                    "Expected scalar value for scalar array, got: {:?}",
-                    v
-                )));
-            }
-        }
-    }
-
-    Ok(InferredType::Scalar(hs))
-}
-
-fn infer_nested_array_type(array: &[Value]) -> Result<InferredType> {
-    let mut inner_ele_type = InferredType::Any;
-
-    for v in array {
-        match v {
-            Value::Array(inner_array) => {
-                inner_ele_type.merge(infer_array_element_type(inner_array)?)?;
-            }
-            x => {
-                return Err(ArrowError::JsonError(format!(
-                    "Got non array element in nested array: {:?}",
-                    x
-                )));
-            }
-        }
-    }
-
-    Ok(InferredType::Array(Box::new(inner_ele_type)))
-}
-
-fn infer_struct_array_type(array: &[Value]) -> Result<InferredType> {
-    let mut field_types = HashMap::new();
-
-    for v in array {
-        match v {
-            Value::Object(map) => {
-                collect_field_types_from_object(&mut field_types, map)?;
-            }
-            _ => {
-                return Err(ArrowError::JsonError(format!(
-                    "Expected struct value for struct array, got: {:?}",
-                    v
-                )));
-            }
-        }
-    }
-
-    Ok(InferredType::Object(field_types))
-}
-
-fn infer_array_element_type(array: &[Value]) -> Result<InferredType> {
-    match array.iter().take(1).next() {
-        None => Ok(InferredType::Any), // empty array, return any type that can be updated later
-        Some(a) => match a {
-            Value::Array(_) => infer_nested_array_type(array),
-            Value::Object(_) => infer_struct_array_type(array),
-            _ => infer_scalar_array_type(array),
-        },
-    }
-}
-
-fn collect_field_types_from_object(
-    field_types: &mut HashMap<String, InferredType>,
-    map: &JsonMap<String, Value>,
-) -> Result<()> {
-    for (k, v) in map {
-        match v {
-            Value::Array(array) => {
-                let ele_type = infer_array_element_type(array)?;
-
-                if !field_types.contains_key(k) {
-                    match ele_type {
-                        InferredType::Scalar(_) => {
-                            field_types.insert(
-                                k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Scalar(
-                                    HashSet::new(),
-                                ))),
-                            );
-                        }
-                        InferredType::Object(_) => {
-                            field_types.insert(
-                                k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Object(
-                                    HashMap::new(),
-                                ))),
-                            );
-                        }
-                        InferredType::Any | InferredType::Array(_) => {
-                            // set inner type to any for nested array as well
-                            // so it can be updated properly from subsequent type merges
-                            field_types.insert(
-                                k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Any)),
-                            );
-                        }
-                    }
-                }
-
-                match field_types.get_mut(k).unwrap() {
-                    InferredType::Array(inner_type) => {
-                        inner_type.merge(ele_type)?;
-                    }
-                    // in case of column contains both scalar type and scalar array type, we
-                    // convert type of this column to scalar array.
-                    field_type @ InferredType::Scalar(_) => {
-                        field_type.merge(ele_type)?;
-                        *field_type = InferredType::Array(Box::new(field_type.clone()));
-                    }
-                    t => {
-                        return Err(ArrowError::JsonError(format!(
-                            "Expected array json type, found: {:?}",
-                            t,
-                        )));
-                    }
-                }
-            }
-            Value::Bool(_) => {
-                set_object_scalar_field_type(field_types, k, DataType::Boolean)?;
-            }
-            Value::Null => {
-                // do nothing, we treat json as nullable by default when
-                // inferring
-            }
-            Value::Number(n) => {
-                if n.is_f64() {
-                    set_object_scalar_field_type(field_types, k, DataType::Float64)?;
-                } else {
-                    // default to i64
-                    set_object_scalar_field_type(field_types, k, DataType::Int64)?;
-                }
-            }
-            Value::String(_) => {
-                set_object_scalar_field_type(field_types, k, DataType::Utf8)?;
-            }
-            Value::Object(inner_map) => {
-                if !field_types.contains_key(k) {
-                    field_types
-                        .insert(k.to_string(), InferredType::Object(HashMap::new()));
-                }
-                match field_types.get_mut(k).unwrap() {
-                    InferredType::Object(inner_field_types) => {
-                        collect_field_types_from_object(inner_field_types, inner_map)?;
-                    }
-                    t => {
-                        return Err(ArrowError::JsonError(format!(
-                            "Expected object json type, found: {:?}",
-                            t,
-                        )));
-                    }
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-/// Infer the fields of a JSON file by reading all items from the JSON Value Iterator.
-///
-/// The following type coercion logic is implemented:
-/// * `Int64` and `Float64` are converted to `Float64`
-/// * Lists and scalars are coerced to a list of a compatible scalar
-/// * All other cases are coerced to `Utf8` (String)
-///
-/// Note that the above coercion logic is different from what Spark has, where it would default to
-/// String type in case of List and Scalar values appeared in the same field.
-///
-/// The reason we diverge here is because we don't have utilities to deal with JSON data once it's
-/// interpreted as Strings. We should match Spark's behavior once we added more JSON parsing
-/// kernels in the future.
-pub fn infer_json_schema_from_iterator<I>(value_iter: I) -> Result<Schema>
-where
-    I: Iterator<Item = Result<Value>>,
-{
-    let mut field_types: HashMap<String, InferredType> = HashMap::new();
-
-    for record in value_iter {
-        match record? {
-            Value::Object(map) => {
-                collect_field_types_from_object(&mut field_types, &map)?;
-            }
-            value => {
-                return Err(ArrowError::JsonError(format!(
-                    "Expected JSON record to be an object, found {:?}",
-                    value
-                )));
-            }
-        };
-    }
-
-    generate_schema(field_types)
-}
-
-/// JSON values to Arrow record batch decoder. Decoder's next_batch method takes a JSON Value
-/// iterator as input and outputs Arrow record batch.
-///
-/// # Examples
-/// ```
-/// use arrow::json::reader::{Decoder, ValueIter, infer_json_schema};
-/// use std::fs::File;
-/// use std::io::{BufReader, Seek, SeekFrom};
-/// use std::sync::Arc;
-///
-/// let mut reader =
-///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-/// let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
-/// let batch_size = 1024;
-/// let decoder = Decoder::new(Arc::new(inferred_schema), batch_size, None);
-///
-/// // seek back to start so that the original file is usable again
-/// reader.seek(SeekFrom::Start(0)).unwrap();
-/// let mut value_reader = ValueIter::new(&mut reader, None);
-/// let batch = decoder.next_batch(&mut value_reader).unwrap().unwrap();
-/// assert_eq!(4, batch.num_rows());
-/// assert_eq!(4, batch.num_columns());
-/// ```
-#[derive(Debug)]
-pub struct Decoder {
-    /// Explicit schema for the JSON file
-    schema: SchemaRef,
-    /// Optional projection for which columns to load (case-sensitive names)
-    projection: Option<Vec<String>>,
-    /// Batch size (number of records to load each time)
-    batch_size: usize,
-}
-
-impl Decoder {
-    /// Create a new JSON decoder from any value that implements the `Iterator<Item=Result<Value>>`
-    /// trait.
-    pub fn new(
-        schema: SchemaRef,
-        batch_size: usize,
-        projection: Option<Vec<String>>,
-    ) -> Self {
-        Self {
-            schema,
-            projection,
-            batch_size,
-        }
-    }
-
-    /// Returns the schema of the reader, useful for getting the schema without reading
-    /// record batches
-    pub fn schema(&self) -> SchemaRef {
-        match &self.projection {
-            Some(projection) => {
-                let fields = self.schema.fields();
-                let projected_fields: Vec<Field> = fields
-                    .iter()
-                    .filter_map(|field| {
-                        if projection.contains(field.name()) {
-                            Some(field.clone())
-                        } else {
-                            None
-                        }
-                    })
-                    .collect();
-
-                Arc::new(Schema::new(projected_fields))
-            }
-            None => self.schema.clone(),
-        }
-    }
-
-    /// Read the next batch of records
-    pub fn next_batch<I>(&self, value_iter: &mut I) -> Result<Option<RecordBatch>>
-    where
-        I: Iterator<Item = Result<Value>>,
-    {
-        let mut rows: Vec<Value> = Vec::with_capacity(self.batch_size);
-
-        for value in value_iter.by_ref().take(self.batch_size) {
-            let v = value?;
-            match v {
-                Value::Object(_) => rows.push(v),
-                _ => {
-                    return Err(ArrowError::JsonError(format!(
-                        "Row needs to be of type object, got: {:?}",
-                        v
-                    )));
-                }
-            }
-        }
-        if rows.is_empty() {
-            // reached end of file
-            return Ok(None);
-        }
-
-        let rows = &rows[..];
-        let projection = self.projection.clone().unwrap_or_else(Vec::new);
-        let arrays = self.build_struct_array(rows, self.schema.fields(), &projection);
-
-        let projected_fields: Vec<Field> = if projection.is_empty() {
-            self.schema.fields().to_vec()
-        } else {
-            projection
-                .iter()
-                .map(|name| self.schema.column_with_name(name))
-                .filter_map(|c| c)
-                .map(|(_, field)| field.clone())
-                .collect()
-        };
-
-        let projected_schema = Arc::new(Schema::new(projected_fields));
-
-        arrays.and_then(|arr| RecordBatch::try_new(projected_schema, arr).map(Some))
-    }
-
-    fn build_wrapped_list_array(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-        key_type: &DataType,
-    ) -> Result<ArrayRef> {
-        match *key_type {
-            DataType::Int8 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int8),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int8Type>(&dtype, col_name, rows)
-            }
-            DataType::Int16 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int16),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int16Type>(&dtype, col_name, rows)
-            }
-            DataType::Int32 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int32),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int32Type>(&dtype, col_name, rows)
-            }
-            DataType::Int64 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::Int64),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<Int64Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt8 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt8),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt8Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt16 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt16),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt16Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt32 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt32),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt32Type>(&dtype, col_name, rows)
-            }
-            DataType::UInt64 => {
-                let dtype = DataType::Dictionary(
-                    Box::new(DataType::UInt64),
-                    Box::new(DataType::Utf8),
-                );
-                self.list_array_string_array_builder::<UInt64Type>(&dtype, col_name, rows)
-            }
-            ref e => Err(ArrowError::JsonError(format!(
-                "Data type is currently not supported for dictionaries in list : {:?}",
-                e
-            ))),
-        }
-    }
-
-    #[inline(always)]
-    fn list_array_string_array_builder<DICT_TY>(
-        &self,
-        data_type: &DataType,
-        col_name: &str,
-        rows: &[Value],
-    ) -> Result<ArrayRef>
-    where
-        DICT_TY: ArrowPrimitiveType + ArrowDictionaryKeyType,
-    {
-        let mut builder: Box<dyn ArrayBuilder> = match data_type {
-            DataType::Utf8 => {
-                let values_builder = StringBuilder::new(rows.len() * 5);
-                Box::new(ListBuilder::new(values_builder))
-            }
-            DataType::Dictionary(_, _) => {
-                let values_builder =
-                    self.build_string_dictionary_builder::<DICT_TY>(rows.len() * 5)?;
-                Box::new(ListBuilder::new(values_builder))
-            }
-            e => {
-                return Err(ArrowError::JsonError(format!(
-                    "Nested list data builder type is not supported: {:?}",
-                    e
-                )))
-            }
-        };
-
-        for row in rows {
-            if let Some(value) = row.get(col_name) {
-                // value can be an array or a scalar
-                let vals: Vec<Option<String>> = if let Value::String(v) = value {
-                    vec![Some(v.to_string())]
-                } else if let Value::Array(n) = value {
-                    n.iter()
-                        .map(|v: &Value| {
-                            if v.is_string() {
-                                Some(v.as_str().unwrap().to_string())
-                            } else if v.is_array() || v.is_object() || v.is_null() {
-                                // implicitly drop nested values
-                                // TODO support deep-nesting
-                                None
-                            } else {
-                                Some(v.to_string())
-                            }
-                        })
-                        .collect()
-                } else if let Value::Null = value {
-                    vec![None]
-                } else if !value.is_object() {
-                    vec![Some(value.to_string())]
-                } else {
-                    return Err(ArrowError::JsonError(
-                        "Only scalars are currently supported in JSON arrays".to_string(),
-                    ));
-                };
-
-                // TODO: ARROW-10335: APIs of dictionary arrays and others are different. Unify
-                // them.
-                match data_type {
-                    DataType::Utf8 => {
-                        let builder = builder
-                            .as_any_mut()
-                            .downcast_mut::<ListBuilder<StringBuilder>>()
-                            .ok_or_else(||ArrowError::JsonError(
-                                "Cast failed for ListBuilder<StringBuilder> during nested data parsing".to_string(),
-                            ))?;
-                        for val in vals {
-                            if let Some(v) = val {
-                                builder.values().append_value(&v)?
-                            } else {
-                                builder.values().append_null()?
-                            };
-                        }
-
-                        // Append to the list
-                        builder.append(true)?;
-                    }
-                    DataType::Dictionary(_, _) => {
-                        let builder = builder.as_any_mut().downcast_mut::<ListBuilder<StringDictionaryBuilder<DICT_TY>>>().ok_or_else(||ArrowError::JsonError(
-                            "Cast failed for ListBuilder<StringDictionaryBuilder> during nested data parsing".to_string(),
-                        ))?;
-                        for val in vals {
-                            if let Some(v) = val {
-                                let _ = builder.values().append(&v)?;
-                            } else {
-                                builder.values().append_null()?
-                            };
-                        }
-
-                        // Append to the list
-                        builder.append(true)?;
-                    }
-                    e => {
-                        return Err(ArrowError::JsonError(format!(
-                            "Nested list data builder type is not supported: {:?}",
-                            e
-                        )))
-                    }
-                }
-            }
-        }
-
-        Ok(builder.finish() as ArrayRef)
-    }
-
-    #[inline(always)]
-    #[allow(clippy::unnecessary_wraps)]
-    fn build_string_dictionary_builder<T>(
-        &self,
-        row_len: usize,
-    ) -> Result<StringDictionaryBuilder<T>>
-    where
-        T: ArrowPrimitiveType + ArrowDictionaryKeyType,
-    {
-        let key_builder = PrimitiveBuilder::<T>::new(row_len);
-        let values_builder = StringBuilder::new(row_len * 5);
-        Ok(StringDictionaryBuilder::new(key_builder, values_builder))
-    }
-
-    #[inline(always)]
-    fn build_string_dictionary_array(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-        key_type: &DataType,
-        value_type: &DataType,
-    ) -> Result<ArrayRef> {
-        if let DataType::Utf8 = *value_type {
-            match *key_type {
-                DataType::Int8 => self.build_dictionary_array::<Int8Type>(rows, col_name),
-                DataType::Int16 => {
-                    self.build_dictionary_array::<Int16Type>(rows, col_name)
-                }
-                DataType::Int32 => {
-                    self.build_dictionary_array::<Int32Type>(rows, col_name)
-                }
-                DataType::Int64 => {
-                    self.build_dictionary_array::<Int64Type>(rows, col_name)
-                }
-                DataType::UInt8 => {
-                    self.build_dictionary_array::<UInt8Type>(rows, col_name)
-                }
-                DataType::UInt16 => {
-                    self.build_dictionary_array::<UInt16Type>(rows, col_name)
-                }
-                DataType::UInt32 => {
-                    self.build_dictionary_array::<UInt32Type>(rows, col_name)
-                }
-                DataType::UInt64 => {
-                    self.build_dictionary_array::<UInt64Type>(rows, col_name)
-                }
-                _ => Err(ArrowError::JsonError(
-                    "unsupported dictionary key type".to_string(),
-                )),
-            }
-        } else {
-            Err(ArrowError::JsonError(
-                "dictionary types other than UTF-8 not yet supported".to_string(),
-            ))
-        }
-    }
-
-    fn build_boolean_array(&self, rows: &[Value], col_name: &str) -> Result<ArrayRef> {
-        let mut builder = BooleanBuilder::new(rows.len());
-        for row in rows {
-            if let Some(value) = row.get(&col_name) {
-                if let Some(boolean) = value.as_bool() {
-                    builder.append_value(boolean)?
-                } else {
-                    builder.append_null()?;
-                }
-            } else {
-                builder.append_null()?;
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }
-
-    #[allow(clippy::unnecessary_wraps)]
-    fn build_primitive_array<T: ArrowPrimitiveType>(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-    ) -> Result<ArrayRef>
-    where
-        T: ArrowNumericType,
-        T::Native: num::NumCast,
-    {
-        Ok(Arc::new(
-            rows.iter()
-                .map(|row| {
-                    row.get(&col_name)
-                        .and_then(|value| value.as_f64())
-                        .and_then(num::cast::cast)
-                })
-                .collect::<PrimitiveArray<T>>(),
-        ))
-    }
-
-    /// Build a nested GenericListArray from a list of unnested `Value`s
-    fn build_nested_list_array<OffsetSize: OffsetSizeTrait>(
-        &self,
-        rows: &[Value],
-        list_field: &Field,
-    ) -> Result<ArrayRef> {
-        // build list offsets
-        let mut cur_offset = OffsetSize::zero();
-        let list_len = rows.len();
-        let num_list_bytes = bit_util::ceil(list_len, 8);
-        let mut offsets = Vec::with_capacity(list_len + 1);
-        let mut list_nulls = MutableBuffer::from_len_zeroed(num_list_bytes);
-        let list_nulls = list_nulls.as_slice_mut();
-        offsets.push(cur_offset);
-        rows.iter().enumerate().for_each(|(i, v)| {
-            if let Value::Array(a) = v {
-                cur_offset += OffsetSize::from_usize(a.len()).unwrap();
-                bit_util::set_bit(list_nulls, i);
-            } else if let Value::Null = v {
-                // value is null, not incremented
-            } else {
-                cur_offset += OffsetSize::one();
-            }
-            offsets.push(cur_offset);
-        });
-        let valid_len = cur_offset.to_usize().unwrap();
-        let array_data = match list_field.data_type() {
-            DataType::Null => NullArray::new(valid_len).data().clone(),
-            DataType::Boolean => {
-                let num_bytes = bit_util::ceil(valid_len, 8);
-                let mut bool_values = MutableBuffer::from_len_zeroed(num_bytes);
-                let mut bool_nulls =
-                    MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
-                let mut curr_index = 0;
-                rows.iter().for_each(|v| {
-                    if let Value::Array(vs) = v {
-                        vs.iter().for_each(|value| {
-                            if let Value::Bool(child) = value {
-                                // if valid boolean, append value
-                                if *child {
-                                    bit_util::set_bit(
-                                        bool_values.as_slice_mut(),
-                                        curr_index,
-                                    );
-                                }
-                            } else {
-                                // null slot
-                                bit_util::unset_bit(
-                                    bool_nulls.as_slice_mut(),
-                                    curr_index,
-                                );
-                            }
-                            curr_index += 1;
-                        });
-                    }
-                });
-                ArrayData::builder(list_field.data_type().clone())
-                    .len(valid_len)
-                    .add_buffer(bool_values.into())
-                    .null_bit_buffer(bool_nulls.into())
-                    .build()
-            }
-            DataType::Int8 => self.read_primitive_list_values::<Int8Type>(rows),
-            DataType::Int16 => self.read_primitive_list_values::<Int16Type>(rows),
-            DataType::Int32 => self.read_primitive_list_values::<Int32Type>(rows),
-            DataType::Int64 => self.read_primitive_list_values::<Int64Type>(rows),
-            DataType::UInt8 => self.read_primitive_list_values::<UInt8Type>(rows),
-            DataType::UInt16 => self.read_primitive_list_values::<UInt16Type>(rows),
-            DataType::UInt32 => self.read_primitive_list_values::<UInt32Type>(rows),
-            DataType::UInt64 => self.read_primitive_list_values::<UInt64Type>(rows),
-            DataType::Float16 => {
-                return Err(ArrowError::JsonError("Float16 not supported".to_string()))
-            }
-            DataType::Float32 => self.read_primitive_list_values::<Float32Type>(rows),
-            DataType::Float64 => self.read_primitive_list_values::<Float64Type>(rows),
-            DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_) => {
-                return Err(ArrowError::JsonError(
-                    "Temporal types are not yet supported, see ARROW-4803".to_string(),
-                ))
-            }
-            DataType::Utf8 => {
-                StringArray::from_iter(flatten_json_string_values(rows).into_iter())
-                    .data()
-                    .clone()
-            }
-            DataType::LargeUtf8 => {
-                LargeStringArray::from_iter(flatten_json_string_values(rows).into_iter())
-                    .data()
-                    .clone()
-            }
-            DataType::List(field) => {
-                let child = self
-                    .build_nested_list_array::<i32>(&flatten_json_values(rows), field)?;
-                child.data().clone()
-            }
-            DataType::LargeList(field) => {
-                let child = self
-                    .build_nested_list_array::<i64>(&flatten_json_values(rows), field)?;
-                child.data().clone()
-            }
-            DataType::Struct(fields) => {
-                // extract list values, with non-lists converted to Value::Null
-                let array_item_count = rows
-                    .iter()
-                    .map(|row| match row {
-                        Value::Array(values) => values.len(),
-                        _ => 1,
-                    })
-                    .sum();
-                let num_bytes = bit_util::ceil(array_item_count, 8);
-                let mut null_buffer = MutableBuffer::from_len_zeroed(num_bytes);
-                let mut struct_index = 0;
-                let rows: Vec<Value> = rows
-                    .iter()
-                    .flat_map(|row| {
-                        if let Value::Array(values) = row {
-                            values.iter().for_each(|_| {
-                                bit_util::set_bit(
-                                    null_buffer.as_slice_mut(),
-                                    struct_index,
-                                );
-                                struct_index += 1;
-                            });
-                            values.clone()
-                        } else {
-                            struct_index += 1;
-                            vec![Value::Null]
-                        }
-                    })
-                    .collect();
-                let arrays =
-                    self.build_struct_array(rows.as_slice(), fields.as_slice(), &[])?;
-                let data_type = DataType::Struct(fields.clone());
-                let buf = null_buffer.into();
-                ArrayDataBuilder::new(data_type)
-                    .len(rows.len())
-                    .null_bit_buffer(buf)
-                    .child_data(arrays.into_iter().map(|a| a.data().clone()).collect())
-                    .build()
-            }
-            datatype => {
-                return Err(ArrowError::JsonError(format!(
-                    "Nested list of {:?} not supported",
-                    datatype
-                )));
-            }
-        };
-        // build list
-        let list_data = ArrayData::builder(DataType::List(Box::new(list_field.clone())))
-            .len(list_len)
-            .add_buffer(Buffer::from_slice_ref(&offsets))
-            .add_child_data(array_data)
-            .null_bit_buffer(list_nulls.into())
-            .build();
-        Ok(Arc::new(GenericListArray::<OffsetSize>::from(list_data)))
-    }
-
-    /// Builds the child values of a `StructArray`, falling short of constructing the StructArray.
-    /// The function does not construct the StructArray as some callers would want the child arrays.
-    ///
-    /// *Note*: The function is recursive, and will read nested structs.
-    ///
-    /// If `projection` is not empty, then all values are returned. The first level of projection
-    /// occurs at the `RecordBatch` level. No further projection currently occurs, but would be
-    /// useful if plucking values from a struct, e.g. getting `a.b.c.e` from `a.b.c.{d, e}`.
-    fn build_struct_array(
-        &self,
-        rows: &[Value],
-        struct_fields: &[Field],
-        projection: &[String],
-    ) -> Result<Vec<ArrayRef>> {
-        let arrays: Result<Vec<ArrayRef>> = struct_fields
-            .iter()
-            .filter(|field| projection.is_empty() || projection.contains(field.name()))
-            .map(|field| {
-                match field.data_type() {
-                    DataType::Null => {
-                        Ok(Arc::new(NullArray::new(rows.len())) as ArrayRef)
-                    }
-                    DataType::Boolean => self.build_boolean_array(rows, field.name()),
-                    DataType::Float64 => {
-                        self.build_primitive_array::<Float64Type>(rows, field.name())
-                    }
-                    DataType::Float32 => {
-                        self.build_primitive_array::<Float32Type>(rows, field.name())
-                    }
-                    DataType::Int64 => {
-                        self.build_primitive_array::<Int64Type>(rows, field.name())
-                    }
-                    DataType::Int32 => {
-                        self.build_primitive_array::<Int32Type>(rows, field.name())
-                    }
-                    DataType::Int16 => {
-                        self.build_primitive_array::<Int16Type>(rows, field.name())
-                    }
-                    DataType::Int8 => {
-                        self.build_primitive_array::<Int8Type>(rows, field.name())
-                    }
-                    DataType::UInt64 => {
-                        self.build_primitive_array::<UInt64Type>(rows, field.name())
-                    }
-                    DataType::UInt32 => {
-                        self.build_primitive_array::<UInt32Type>(rows, field.name())
-                    }
-                    DataType::UInt16 => {
-                        self.build_primitive_array::<UInt16Type>(rows, field.name())
-                    }
-                    DataType::UInt8 => {
-                        self.build_primitive_array::<UInt8Type>(rows, field.name())
-                    }
-                    // TODO: this is incomplete
-                    DataType::Timestamp(unit, _) => match unit {
-                        TimeUnit::Second => self
-                            .build_primitive_array::<TimestampSecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Microsecond => self
-                            .build_primitive_array::<TimestampMicrosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Millisecond => self
-                            .build_primitive_array::<TimestampMillisecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Nanosecond => self
-                            .build_primitive_array::<TimestampNanosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                    },
-                    DataType::Date64 => {
-                        self.build_primitive_array::<Date64Type>(rows, field.name())
-                    }
-                    DataType::Date32 => {
-                        self.build_primitive_array::<Date32Type>(rows, field.name())
-                    }
-                    DataType::Time64(unit) => match unit {
-                        TimeUnit::Microsecond => self
-                            .build_primitive_array::<Time64MicrosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Nanosecond => self
-                            .build_primitive_array::<Time64NanosecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        t => Err(ArrowError::JsonError(format!(
-                            "TimeUnit {:?} not supported with Time64",
-                            t
-                        ))),
-                    },
-                    DataType::Time32(unit) => match unit {
-                        TimeUnit::Second => self
-                            .build_primitive_array::<Time32SecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        TimeUnit::Millisecond => self
-                            .build_primitive_array::<Time32MillisecondType>(
-                                rows,
-                                field.name(),
-                            ),
-                        t => Err(ArrowError::JsonError(format!(
-                            "TimeUnit {:?} not supported with Time32",
-                            t
-                        ))),
-                    },
-                    DataType::Utf8 => Ok(Arc::new(
-                        rows.iter()
-                            .map(|row| {
-                                let maybe_value = row.get(field.name());
-                                maybe_value.and_then(|value| value.as_str())
-                            })
-                            .collect::<StringArray>(),
-                    ) as ArrayRef),
-                    DataType::List(ref list_field) => {
-                        match list_field.data_type() {
-                            DataType::Dictionary(ref key_ty, _) => {
-                                self.build_wrapped_list_array(rows, field.name(), key_ty)
-                            }
-                            _ => {
-                                // extract rows by name
-                                let extracted_rows = rows
-                                    .iter()
-                                    .map(|row| {
-                                        row.get(field.name())
-                                            .cloned()
-                                            .unwrap_or(Value::Null)
-                                    })
-                                    .collect::<Vec<Value>>();
-                                self.build_nested_list_array::<i32>(
-                                    extracted_rows.as_slice(),
-                                    list_field,
-                                )
-                            }
-                        }
-                    }
-                    DataType::Dictionary(ref key_ty, ref val_ty) => self
-                        .build_string_dictionary_array(
-                            rows,
-                            field.name(),
-                            key_ty,
-                            val_ty,
-                        ),
-                    DataType::Struct(fields) => {
-                        let len = rows.len();
-                        let num_bytes = bit_util::ceil(len, 8);
-                        let mut null_buffer = MutableBuffer::from_len_zeroed(num_bytes);
-                        let struct_rows = rows
-                            .iter()
-                            .enumerate()
-                            .map(|(i, row)| {
-                                (
-                                    i,
-                                    row.as_object()
-                                        .map(|v| v.get(field.name()))
-                                        .flatten(),
-                                )
-                            })
-                            .map(|(i, v)| match v {
-                                // we want the field as an object, if it's not, we treat as null
-                                Some(Value::Object(value)) => {
-                                    bit_util::set_bit(null_buffer.as_slice_mut(), i);
-                                    Value::Object(value.clone())
-                                }
-                                _ => Value::Object(Default::default()),
-                            })
-                            .collect::<Vec<Value>>();
-                        let arrays =
-                            self.build_struct_array(&struct_rows, fields, &[])?;
-                        // construct a struct array's data in order to set null buffer
-                        let data_type = DataType::Struct(fields.clone());
-                        let data = ArrayDataBuilder::new(data_type)
-                            .len(len)
-                            .null_bit_buffer(null_buffer.into())
-                            .child_data(
-                                arrays.into_iter().map(|a| a.data().clone()).collect(),
-                            )
-                            .build();
-                        Ok(make_array(data))
-                    }
-                    _ => Err(ArrowError::JsonError(format!(
-                        "{:?} type is not supported",
-                        field.data_type()
-                    ))),
-                }
-            })
-            .collect();
-        arrays
-    }
-
-    #[inline(always)]
-    fn build_dictionary_array<T>(
-        &self,
-        rows: &[Value],
-        col_name: &str,
-    ) -> Result<ArrayRef>
-    where
-        T::Native: num::NumCast,
-        T: ArrowPrimitiveType + ArrowDictionaryKeyType,
-    {
-        let mut builder: StringDictionaryBuilder<T> =
-            self.build_string_dictionary_builder(rows.len())?;
-        for row in rows {
-            if let Some(value) = row.get(&col_name) {
-                if let Some(str_v) = value.as_str() {
-                    builder.append(str_v).map(drop)?
-                } else {
-                    builder.append_null()?
-                }
-            } else {
-                builder.append_null()?
-            }
-        }
-        Ok(Arc::new(builder.finish()) as ArrayRef)
-    }
-
-    /// Read the primitive list's values into ArrayData
-    fn read_primitive_list_values<T>(&self, rows: &[Value]) -> ArrayData
-    where
-        T: ArrowPrimitiveType + ArrowNumericType,
-        T::Native: num::NumCast,
-    {
-        let values = rows
-            .iter()
-            .flat_map(|row| {
-                // read values from list
-                if let Value::Array(values) = row {
-                    values
-                        .iter()
-                        .map(|value| {
-                            let v: Option<T::Native> =
-                                value.as_f64().and_then(num::cast::cast);
-                            v
-                        })
-                        .collect::<Vec<Option<T::Native>>>()
-                } else if let Value::Number(value) = row {
-                    // handle the scalar number case
-                    let v: Option<T::Native> = value.as_f64().and_then(num::cast::cast);
-                    v.map(|v| vec![Some(v)]).unwrap_or_default()
-                } else {
-                    vec![]
-                }
-            })
-            .collect::<Vec<Option<T::Native>>>();
-        let array = PrimitiveArray::<T>::from_iter(values.iter());
-        array.data().clone()
-    }
-}
-
-/// Reads a JSON value as a string, regardless of its type.
-/// This is useful if the expected datatype is a string, in which case we preserve
-/// all the values regardless of they type.
-///
-/// Applying `value.to_string()` unfortunately results in an escaped string, which
-/// is not what we want.
-#[inline(always)]
-fn json_value_as_string(value: &Value) -> Option<String> {
-    match value {
-        Value::Null => None,
-        Value::String(string) => Some(string.clone()),
-        _ => Some(value.to_string()),
-    }
-}
-
-/// Flattens a list of JSON values, by flattening lists, and treating all other values as
-/// single-value lists.
-/// This is used to read into nested lists (list of list, list of struct) and non-dictionary lists.
-#[inline]
-fn flatten_json_values(values: &[Value]) -> Vec<Value> {
-    values
-        .iter()
-        .flat_map(|row| {
-            if let Value::Array(values) = row {
-                values.clone()
-            } else if let Value::Null = row {
-                vec![Value::Null]
-            } else {
-                // we interpret a scalar as a single-value list to minimise data loss
-                vec![row.clone()]
-            }
-        })
-        .collect()
-}
-
-/// Flattens a list into string values, dropping Value::Null in the process.
-/// This is useful for interpreting any JSON array as string, dropping nulls.
-/// See `json_value_as_string`.
-#[inline]
-fn flatten_json_string_values(values: &[Value]) -> Vec<Option<String>> {
-    values
-        .iter()
-        .flat_map(|row| {
-            if let Value::Array(values) = row {
-                values
-                    .iter()
-                    .map(json_value_as_string)
-                    .collect::<Vec<Option<_>>>()
-            } else if let Value::Null = row {
-                vec![]
-            } else {
-                vec![json_value_as_string(row)]
-            }
-        })
-        .collect::<Vec<Option<_>>>()
-}
-/// JSON file reader
-#[derive(Debug)]
-pub struct Reader<R: Read> {
-    reader: BufReader<R>,
-    /// JSON value decoder
-    decoder: Decoder,
-}
-
-impl<R: Read> Reader<R> {
-    /// Create a new JSON Reader from any value that implements the `Read` trait.
-    ///
-    /// If reading a `File`, you can customise the Reader, such as to enable schema
-    /// inference, use `ReaderBuilder`.
-    pub fn new(
-        reader: R,
-        schema: SchemaRef,
-        batch_size: usize,
-        projection: Option<Vec<String>>,
-    ) -> Self {
-        Self::from_buf_reader(BufReader::new(reader), schema, batch_size, projection)
-    }
-
-    /// Create a new JSON Reader from a `BufReader<R: Read>`
-    ///
-    /// To customize the schema, such as to enable schema inference, use `ReaderBuilder`
-    pub fn from_buf_reader(
-        reader: BufReader<R>,
-        schema: SchemaRef,
-        batch_size: usize,
-        projection: Option<Vec<String>>,
-    ) -> Self {
-        Self {
-            reader,
-            decoder: Decoder::new(schema, batch_size, projection),
-        }
-    }
-
-    /// Returns the schema of the reader, useful for getting the schema without reading
-    /// record batches
-    pub fn schema(&self) -> SchemaRef {
-        self.decoder.schema()
-    }
-
-    /// Read the next batch of records
-    #[allow(clippy::should_implement_trait)]
-    pub fn next(&mut self) -> Result<Option<RecordBatch>> {
-        self.decoder
-            .next_batch(&mut ValueIter::new(&mut self.reader, None))
-    }
-}
-
-/// JSON file reader builder
-#[derive(Debug)]
-pub struct ReaderBuilder {
-    /// Optional schema for the JSON file
-    ///
-    /// If the schema is not supplied, the reader will try to infer the schema
-    /// based on the JSON structure.
-    schema: Option<SchemaRef>,
-    /// Optional maximum number of records to read during schema inference
-    ///
-    /// If a number is not provided, all the records are read.
-    max_records: Option<usize>,
-    /// Batch size (number of records to load each time)
-    ///
-    /// The default batch size when using the `ReaderBuilder` is 1024 records
-    batch_size: usize,
-    /// Optional projection for which columns to load (zero-based column indices)
-    projection: Option<Vec<String>>,
-}
-
-impl Default for ReaderBuilder {
-    fn default() -> Self {
-        Self {
-            schema: None,
-            max_records: None,
-            batch_size: 1024,
-            projection: None,
-        }
-    }
-}
-
-impl ReaderBuilder {
-    /// Create a new builder for configuring JSON parsing options.
-    ///
-    /// To convert a builder into a reader, call `Reader::from_builder`
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// extern crate arrow;
-    ///
-    /// use arrow::json;
-    /// use std::fs::File;
-    ///
-    /// fn example() -> json::Reader<File> {
-    ///     let file = File::open("test/data/basic.json").unwrap();
-    ///
-    ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = json::ReaderBuilder::new().infer_schema(Some(100));
-    ///
-    ///     let reader = builder.build::<File>(file).unwrap();
-    ///
-    ///     reader
-    /// }
-    /// ```
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    /// Set the JSON file's schema
-    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
-        self.schema = Some(schema);
-        self
-    }
-
-    /// Set the JSON reader to infer the schema of the file
-    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
-        // remove any schema that is set
-        self.schema = None;
-        self.max_records = max_records;
-        self
-    }
-
-    /// Set the batch size (number of records to load at one time)
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// Set the reader's column projection
-    pub fn with_projection(mut self, projection: Vec<String>) -> Self {
-        self.projection = Some(projection);
-        self
-    }
-
-    /// Create a new `Reader` from the `ReaderBuilder`
-    pub fn build<R>(self, source: R) -> Result<Reader<R>>
-    where
-        R: Read + Seek,
-    {
-        let mut buf_reader = BufReader::new(source);
-
-        // check if schema should be inferred
-        let schema = match self.schema {
-            Some(schema) => schema,
-            None => Arc::new(infer_json_schema_from_seekable(
-                &mut buf_reader,
-                self.max_records,
-            )?),
-        };
-
-        Ok(Reader::from_buf_reader(
-            buf_reader,
-            schema,
-            self.batch_size,
-            self.projection,
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        buffer::Buffer,
-        datatypes::DataType::{Dictionary, List},
-    };
-
-    use super::*;
-    use flate2::read::GzDecoder;
-    use std::fs::File;
-    use std::io::Cursor;
-
-    #[test]
-    fn test_json_basic() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(0, a.0);
-        assert_eq!(&DataType::Int64, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(1, b.0);
-        assert_eq!(&DataType::Float64, b.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(2, c.0);
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(3, d.0);
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .unwrap();
-        assert_eq!(1, aa.value(0));
-        assert_eq!(-10, aa.value(1));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert!(2.0 - bb.value(0) < f64::EPSILON);
-        assert!(-3.5 - bb.value(1) < f64::EPSILON);
-        let cc = batch
-            .column(c.0)
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .unwrap();
-        assert_eq!(false, cc.value(0));
-        assert_eq!(true, cc.value(10));
-        let dd = batch
-            .column(d.0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        assert_eq!("4", dd.value(0));
-        assert_eq!("text", dd.value(8));
-    }
-
-    #[test]
-    fn test_json_basic_with_nulls() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int64, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(&DataType::Float64, b.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(11));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        assert_eq!(true, bb.is_valid(0));
-        assert_eq!(false, bb.is_valid(2));
-        assert_eq!(false, bb.is_valid(11));
-        let cc = batch
-            .column(c.0)
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .unwrap();
-        assert_eq!(true, cc.is_valid(0));
-        assert_eq!(false, cc.is_valid(4));
-        assert_eq!(false, cc.is_valid(11));
-        let dd = batch
-            .column(d.0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        assert_eq!(false, dd.is_valid(0));
-        assert_eq!(true, dd.is_valid(1));
-        assert_eq!(false, dd.is_valid(4));
-        assert_eq!(false, dd.is_valid(11));
-    }
-
-    #[test]
-    fn test_json_basic_schema() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Float32, false),
-            Field::new("c", DataType::Boolean, false),
-            Field::new("d", DataType::Utf8, false),
-        ]);
-
-        let mut reader: Reader<File> = Reader::new(
-            File::open("test/data/basic.json").unwrap(),
-            Arc::new(schema.clone()),
-            1024,
-            None,
-        );
-        let reader_schema = reader.schema();
-        assert_eq!(reader_schema, Arc::new(schema));
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = batch.schema();
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int32, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(&DataType::Float32, b.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        assert_eq!(1, aa.value(0));
-        // test that a 64bit value is returned as null due to overflowing
-        assert_eq!(false, aa.is_valid(11));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<Float32Array>()
-            .unwrap();
-        assert!(2.0 - bb.value(0) < f32::EPSILON);
-        assert!(-3.5 - bb.value(1) < f32::EPSILON);
-    }
-
-    #[test]
-    fn test_json_basic_schema_projection() {
-        // We test implicit and explicit projection:
-        // Implicit: omitting fields from a schema
-        // Explicit: supplying a vec of fields to take
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Float32, false),
-            Field::new("c", DataType::Boolean, false),
-        ]);
-
-        let mut reader: Reader<File> = Reader::new(
-            File::open("test/data/basic.json").unwrap(),
-            Arc::new(schema),
-            1024,
-            Some(vec!["a".to_string(), "c".to_string()]),
-        );
-        let reader_schema = reader.schema();
-        let expected_schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("c", DataType::Boolean, false),
-        ]));
-        assert_eq!(reader_schema, expected_schema);
-
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(2, batch.num_columns());
-        assert_eq!(2, batch.schema().fields().len());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = batch.schema();
-        assert_eq!(reader_schema, schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(0, a.0);
-        assert_eq!(&DataType::Int32, a.1.data_type());
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(1, c.0);
-        assert_eq!(&DataType::Boolean, c.1.data_type());
-    }
-
-    #[test]
-    fn test_json_arrays() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/arrays.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(4, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = batch.schema();
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int64, a.1.data_type());
-        let b = schema.column_with_name("b").unwrap();
-        assert_eq!(
-            &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-            b.1.data_type()
-        );
-        let c = schema.column_with_name("c").unwrap();
-        assert_eq!(
-            &DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-            c.1.data_type()
-        );
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(&DataType::Utf8, d.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Int64Array>()
-            .unwrap();
-        assert_eq!(1, aa.value(0));
-        assert_eq!(-10, aa.value(1));
-        let bb = batch
-            .column(b.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let bb = bb.values();
-        let bb = bb.as_any().downcast_ref::<Float64Array>().unwrap();
-        assert_eq!(9, bb.len());
-        assert!(2.0 - bb.value(0) < f64::EPSILON);
-        assert!(-6.1 - bb.value(5) < f64::EPSILON);
-        assert_eq!(false, bb.is_valid(7));
-
-        let cc = batch
-            .column(c.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let cc = cc.values();
-        let cc = cc.as_any().downcast_ref::<BooleanArray>().unwrap();
-        assert_eq!(6, cc.len());
-        assert_eq!(false, cc.value(0));
-        assert_eq!(false, cc.value(4));
-        assert_eq!(false, cc.is_valid(5));
-    }
-
-    #[test]
-    fn test_invalid_json_infer_schema() {
-        let re = infer_json_schema_from_seekable(
-            &mut BufReader::new(
-                File::open("test/data/uk_cities_with_headers.csv").unwrap(),
-            ),
-            None,
-        );
-        assert_eq!(
-            re.err().unwrap().to_string(),
-            "Json error: Not valid JSON: expected value at line 1 column 1",
-        );
-    }
-
-    #[test]
-    fn test_invalid_json_read_record() {
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "a",
-            DataType::Struct(vec![Field::new("a", DataType::Utf8, true)]),
-            true,
-        )]));
-        let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/uk_cities_with_headers.csv").unwrap())
-            .unwrap();
-        assert_eq!(
-            reader.next().err().unwrap().to_string(),
-            "Json error: Not valid JSON: expected value at line 1 column 1",
-        );
-    }
-
-    #[test]
-    fn test_coersion_scalar_and_list() {
-        use crate::datatypes::DataType::*;
-
-        assert_eq!(
-            List(Box::new(Field::new("item", Float64, true))),
-            coerce_data_type(vec![
-                &Float64,
-                &List(Box::new(Field::new("item", Float64, true)))
-            ])
-        );
-        assert_eq!(
-            List(Box::new(Field::new("item", Float64, true))),
-            coerce_data_type(vec![
-                &Float64,
-                &List(Box::new(Field::new("item", Int64, true)))
-            ])
-        );
-        assert_eq!(
-            List(Box::new(Field::new("item", Int64, true))),
-            coerce_data_type(vec![
-                &Int64,
-                &List(Box::new(Field::new("item", Int64, true)))
-            ])
-        );
-        // boolean and number are incompatible, return utf8
-        assert_eq!(
-            List(Box::new(Field::new("item", Utf8, true))),
-            coerce_data_type(vec![
-                &Boolean,
-                &List(Box::new(Field::new("item", Float64, true)))
-            ])
-        );
-    }
-
-    #[test]
-    fn test_mixed_json_arrays() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/mixed_arrays.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        let mut file = File::open("test/data/mixed_arrays.json.gz").unwrap();
-        let mut reader = BufReader::new(GzDecoder::new(&file));
-        let schema = infer_json_schema(&mut reader, None).unwrap();
-        file.seek(SeekFrom::Start(0)).unwrap();
-
-        let reader = BufReader::new(GzDecoder::new(&file));
-        let mut reader = Reader::from_buf_reader(reader, Arc::new(schema), 64, None);
-        let batch_gz = reader.next().unwrap().unwrap();
-
-        for batch in vec![batch, batch_gz] {
-            assert_eq!(4, batch.num_columns());
-            assert_eq!(4, batch.num_rows());
-
-            let schema = batch.schema();
-
-            let a = schema.column_with_name("a").unwrap();
-            assert_eq!(&DataType::Int64, a.1.data_type());
-            let b = schema.column_with_name("b").unwrap();
-            assert_eq!(
-                &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-                b.1.data_type()
-            );
-            let c = schema.column_with_name("c").unwrap();
-            assert_eq!(
-                &DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-                c.1.data_type()
-            );
-            let d = schema.column_with_name("d").unwrap();
-            assert_eq!(
-                &DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                d.1.data_type()
-            );
-
-            let bb = batch
-                .column(b.0)
-                .as_any()
-                .downcast_ref::<ListArray>()
-                .unwrap();
-            let bb = bb.values();
-            let bb = bb.as_any().downcast_ref::<Float64Array>().unwrap();
-            assert_eq!(10, bb.len());
-            assert!(4.0 - bb.value(9) < f64::EPSILON);
-
-            let cc = batch
-                .column(c.0)
-                .as_any()
-                .downcast_ref::<ListArray>()
-                .unwrap();
-            // test that the list offsets are correct
-            assert_eq!(
-                cc.data().buffers()[0],
-                Buffer::from_slice_ref(&[0i32, 2, 2, 4, 5])
-            );
-            let cc = cc.values();
-            let cc = cc.as_any().downcast_ref::<BooleanArray>().unwrap();
-            let cc_expected = BooleanArray::from(vec![
-                Some(false),
-                Some(true),
-                Some(false),
-                None,
-                Some(false),
-            ]);
-            assert_eq!(cc.data_ref(), cc_expected.data_ref());
-
-            let dd: &ListArray = batch
-                .column(d.0)
-                .as_any()
-                .downcast_ref::<ListArray>()
-                .unwrap();
-            // test that the list offsets are correct
-            assert_eq!(
-                dd.data().buffers()[0],
-                Buffer::from_slice_ref(&[0i32, 1, 1, 2, 6])
-            );
-            let dd = dd.values();
-            let dd = dd.as_any().downcast_ref::<StringArray>().unwrap();
-            // values are 6 because a `d: null` is treated as a null slot
-            // and a list's null slot can be omitted from the child (i.e. same offset)
-            assert_eq!(6, dd.len());
-            assert_eq!("text", dd.value(1));
-            assert_eq!("1", dd.value(2));
-            assert_eq!("false", dd.value(3));
-            assert_eq!("array", dd.value(4));
-            assert_eq!("2.4", dd.value(5));
-        }
-    }
-
-    #[test]
-    fn test_nested_struct_json_arrays() {
-        let c_field = Field::new(
-            "c",
-            DataType::Struct(vec![Field::new("d", DataType::Utf8, true)]),
-            true,
-        );
-        let a_field = Field::new(
-            "a",
-            DataType::Struct(vec![
-                Field::new("b", DataType::Boolean, true),
-                c_field.clone(),
-            ]),
-            true,
-        );
-        let schema = Arc::new(Schema::new(vec![a_field.clone()]));
-        let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/nested_structs.json").unwrap())
-            .unwrap();
-
-        // build expected output
-        let d = StringArray::from(vec![Some("text"), None, Some("text"), None]);
-        let c = ArrayDataBuilder::new(c_field.data_type().clone())
-            .len(4)
-            .add_child_data(d.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00000101]))
-            .build();
-        let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]);
-        let a = ArrayDataBuilder::new(a_field.data_type().clone())
-            .len(4)
-            .add_child_data(b.data().clone())
-            .add_child_data(c)
-            .null_bit_buffer(Buffer::from(vec![0b00000111]))
-            .build();
-        let expected = make_array(a);
-
-        // compare `a` with result from json reader
-        let batch = reader.next().unwrap().unwrap();
-        let read = batch.column(0);
-        assert!(
-            expected.data_ref() == read.data_ref(),
-            "{:?} != {:?}",
-            expected.data(),
-            read.data(),
-        );
-    }
-
-    #[test]
-    fn test_nested_list_json_arrays() {
-        let c_field = Field::new(
-            "c",
-            DataType::Struct(vec![Field::new("d", DataType::Utf8, true)]),
-            true,
-        );
-        let a_struct_field = Field::new(
-            "a",
-            DataType::Struct(vec![
-                Field::new("b", DataType::Boolean, true),
-                c_field.clone(),
-            ]),
-            true,
-        );
-        let a_field =
-            Field::new("a", DataType::List(Box::new(a_struct_field.clone())), true);
-        let schema = Arc::new(Schema::new(vec![a_field.clone()]));
-        let builder = ReaderBuilder::new().with_schema(schema).with_batch_size(64);
-        let json_content = r#"
-        {"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]}
-        {"a": [{"b": false, "c": null}]}
-        {"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]}
-        {"a": null}
-        {"a": []}
-        "#;
-        let mut reader = builder.build(Cursor::new(json_content)).unwrap();
-
-        // build expected output
-        let d = StringArray::from(vec![
-            Some("a_text"),
-            Some("b_text"),
-            None,
-            Some("c_text"),
-            Some("d_text"),
-            None,
-            None,
-        ]);
-        let c = ArrayDataBuilder::new(c_field.data_type().clone())
-            .len(7)
-            .add_child_data(d.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00111011]))
-            .build();
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            Some(false),
-            Some(true),
-            None,
-            Some(true),
-            None,
-        ]);
-        let a = ArrayDataBuilder::new(a_struct_field.data_type().clone())
-            .len(7)
-            .add_child_data(b.data().clone())
-            .add_child_data(c.clone())
-            .null_bit_buffer(Buffer::from(vec![0b00111111]))
-            .build();
-        let a_list = ArrayDataBuilder::new(a_field.data_type().clone())
-            .len(5)
-            .add_buffer(Buffer::from_slice_ref(&[0i32, 2, 3, 6, 6, 6]))
-            .add_child_data(a)
-            .null_bit_buffer(Buffer::from(vec![0b00010111]))
-            .build();
-        let expected = make_array(a_list);
-
-        // compare `a` with result from json reader
-        let batch = reader.next().unwrap().unwrap();
-        let read = batch.column(0);
-        assert_eq!(read.len(), 5);
-        // compare the arrays the long way around, to better detect differences
-        let read: &ListArray = read.as_any().downcast_ref::<ListArray>().unwrap();
-        let expected = expected.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(
-            read.data().buffers()[0],
-            Buffer::from_slice_ref(&[0i32, 2, 3, 6, 6, 6])
-        );
-        // compare list null buffers
-        assert_eq!(read.data().null_buffer(), expected.data().null_buffer());
-        // build struct from list
-        let struct_values = read.values();
-        let struct_array: &StructArray = struct_values
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap();
-        let expected_struct_values = expected.values();
-        let expected_struct_array = expected_struct_values
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .unwrap();
-
-        assert_eq!(7, struct_array.len());
-        assert_eq!(1, struct_array.null_count());
-        assert_eq!(7, expected_struct_array.len());
-        assert_eq!(1, expected_struct_array.null_count());
-        // test struct's nulls
-        assert_eq!(
-            struct_array.data().null_buffer(),
-            expected_struct_array.data().null_buffer()
-        );
-        // test struct's fields
-        let read_b = struct_array.column(0);
-        assert_eq!(b.data_ref(), read_b.data_ref());
-        let read_c = struct_array.column(1);
-        assert_eq!(&c, read_c.data_ref());
-        let read_c: &StructArray = read_c.as_any().downcast_ref::<StructArray>().unwrap();
-        let read_d = read_c.column(0);
-        assert_eq!(d.data_ref(), read_d.data_ref());
-
-        assert_eq!(read.data_ref(), expected.data_ref());
-    }
-
-    #[test]
-    fn test_dictionary_from_json_basic_with_nulls() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-
-        let dd = batch
-            .column(d.0)
-            .as_any()
-            .downcast_ref::<DictionaryArray<Int16Type>>()
-            .unwrap();
-        assert_eq!(false, dd.is_valid(0));
-        assert_eq!(true, dd.is_valid(1));
-        assert_eq!(true, dd.is_valid(2));
-        assert_eq!(false, dd.is_valid(11));
-
-        assert_eq!(
-            dd.keys(),
-            &Int16Array::from(vec![
-                None,
-                Some(0),
-                Some(1),
-                Some(0),
-                None,
-                None,
-                Some(0),
-                None,
-                Some(1),
-                Some(0),
-                Some(0),
-                None
-            ])
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_int8() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_int32() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_int64() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_skip_empty_lines() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let json_content = "
-        {\"a\": 1}
-
-        {\"a\": 2}
-
-        {\"a\": 3}";
-        let mut reader = builder.build(Cursor::new(json_content)).unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = reader.schema();
-        let c = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Int64, c.1.data_type());
-    }
-
-    #[test]
-    fn test_row_type_validation() {
-        let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(64);
-        let json_content = "
-        [1, \"hello\"]
-        \"world\"";
-        let re = builder.build(Cursor::new(json_content));
-        assert_eq!(
-            re.err().unwrap().to_string(),
-            r#"Json error: Expected JSON record to be an object, found Array([Number(1), String("hello")])"#,
-        );
-    }
-
-    #[test]
-    fn test_list_of_string_dictionary_from_json() {
-        let schema = Schema::new(vec![Field::new(
-            "events",
-            List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true,
-            ))),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/list_string_dict_nested.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let events = schema.column_with_name("events").unwrap();
-        assert_eq!(
-            &List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true
-            ))),
-            events.1.data_type()
-        );
-
-        let evs_list = batch
-            .column(events.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let evs_list = evs_list.values();
-        let evs_list = evs_list
-            .as_any()
-            .downcast_ref::<DictionaryArray<UInt64Type>>()
-            .unwrap();
-        assert_eq!(6, evs_list.len());
-        assert_eq!(true, evs_list.is_valid(1));
-        assert_eq!(DataType::Utf8, evs_list.value_type());
-
-        // dict from the events list
-        let dict_el = evs_list.values();
-        let dict_el = dict_el.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(3, dict_el.len());
-        assert_eq!("Elect Leader", dict_el.value(0));
-        assert_eq!("Do Ballot", dict_el.value(1));
-        assert_eq!("Send Data", dict_el.value(2));
-    }
-
-    #[test]
-    fn test_list_of_string_dictionary_from_json_with_nulls() {
-        let schema = Schema::new(vec![Field::new(
-            "events",
-            List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true,
-            ))),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(
-                File::open("test/data/list_string_dict_nested_nulls.json").unwrap(),
-            )
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(3, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let events = schema.column_with_name("events").unwrap();
-        assert_eq!(
-            &List(Box::new(Field::new(
-                "item",
-                Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-                true
-            ))),
-            events.1.data_type()
-        );
-
-        let evs_list = batch
-            .column(events.0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
-        let evs_list = evs_list.values();
-        let evs_list = evs_list
-            .as_any()
-            .downcast_ref::<DictionaryArray<UInt64Type>>()
-            .unwrap();
-        assert_eq!(8, evs_list.len());
-        assert_eq!(true, evs_list.is_valid(1));
-        assert_eq!(DataType::Utf8, evs_list.value_type());
-
-        // dict from the events list
-        let dict_el = evs_list.values();
-        let dict_el = dict_el.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(2, evs_list.null_count());
-        assert_eq!(3, dict_el.len());
-        assert_eq!("Elect Leader", dict_el.value(0));
-        assert_eq!("Do Ballot", dict_el.value(1));
-        assert_eq!("Send Data", dict_el.value(2));
-    }
-
-    #[test]
-    fn test_dictionary_from_json_uint8() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_uint32() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_dictionary_from_json_uint64() {
-        let schema = Schema::new(vec![Field::new(
-            "d",
-            Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-            true,
-        )]);
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let d = schema.column_with_name("d").unwrap();
-        assert_eq!(
-            &Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
-            d.1.data_type()
-        );
-    }
-
-    #[test]
-    fn test_with_multiple_batches() {
-        let builder = ReaderBuilder::new()
-            .infer_schema(Some(4))
-            .with_batch_size(5);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-
-        let mut num_records = Vec::new();
-        while let Some(rb) = reader.next().unwrap() {
-            num_records.push(rb.num_rows());
-        }
-
-        assert_eq!(vec![5, 5, 2], num_records);
-    }
-
-    #[test]
-    fn test_json_infer_schema() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int64, true),
-            Field::new(
-                "b",
-                DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
-                true,
-            ),
-            Field::new(
-                "c",
-                DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new(
-                "d",
-                DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                true,
-            ),
-        ]);
-
-        let mut reader =
-            BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-        let inferred_schema = infer_json_schema_from_seekable(&mut reader, None).unwrap();
-
-        assert_eq!(inferred_schema, schema);
-
-        let file = File::open("test/data/mixed_arrays.json.gz").unwrap();
-        let mut reader = BufReader::new(GzDecoder::new(&file));
-        let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_json_infer_schema_nested_structs() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::Struct(vec![
-                    Field::new("a", DataType::Boolean, true),
-                    Field::new(
-                        "b",
-                        DataType::Struct(vec![Field::new("c", DataType::Utf8, true)]),
-                        true,
-                    ),
-                ]),
-                true,
-            ),
-            Field::new("c2", DataType::Int64, true),
-            Field::new("c3", DataType::Utf8, true),
-        ]);
-
-        let inferred_schema = infer_json_schema_from_iterator(
-            vec![
-                Ok(serde_json::json!({"c1": {"a": true, "b": {"c": "text"}}, "c2": 1})),
-                Ok(serde_json::json!({"c1": {"a": false, "b": null}, "c2": 0})),
-                Ok(serde_json::json!({"c1": {"a": true, "b": {"c": "text"}}, "c3": "ok"})),
-            ]
-            .into_iter(),
-        )
-        .unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_json_infer_schema_struct_in_list() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    DataType::Struct(vec![
-                        Field::new("a", DataType::Utf8, true),
-                        Field::new("b", DataType::Int64, true),
-                        Field::new("c", DataType::Boolean, true),
-                    ]),
-                    true,
-                ))),
-                true,
-            ),
-            Field::new("c2", DataType::Float64, true),
-            Field::new(
-                "c3",
-                // empty json array's inner types are inferred as null
-                DataType::List(Box::new(Field::new("item", DataType::Null, true))),
-                true,
-            ),
-        ]);
-
-        let inferred_schema = infer_json_schema_from_iterator(
-            vec![
-                Ok(serde_json::json!({
-                    "c1": [{"a": "foo", "b": 100}], "c2": 1, "c3": [],
-                })),
-                Ok(serde_json::json!({
-                    "c1": [{"a": "bar", "b": 2}, {"a": "foo", "c": true}], "c2": 0, "c3": [],
-                })),
-                Ok(serde_json::json!({"c1": [], "c2": 0.5, "c3": []})),
-            ]
-            .into_iter(),
-        )
-        .unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_json_infer_schema_nested_list() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                    true,
-                ))),
-                true,
-            ),
-            Field::new("c2", DataType::Float64, true),
-        ]);
-
-        let inferred_schema = infer_json_schema_from_iterator(
-            vec![
-                Ok(serde_json::json!({
-                    "c1": [],
-                    "c2": 12,
-                })),
-                Ok(serde_json::json!({
-                    "c1": [["a", "b"], ["c"]],
-                })),
-                Ok(serde_json::json!({
-                    "c1": [["foo"]],
-                    "c2": 0.11,
-                })),
-            ]
-            .into_iter(),
-        )
-        .unwrap();
-
-        assert_eq!(inferred_schema, schema);
-    }
-
-    #[test]
-    fn test_timestamp_from_json_seconds() {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Timestamp(TimeUnit::Second, None),
-            true,
-        )]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(
-            &DataType::Timestamp(TimeUnit::Second, None),
-            a.1.data_type()
-        );
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<TimestampSecondArray>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_timestamp_from_json_milliseconds() {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            true,
-        )]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(
-            &DataType::Timestamp(TimeUnit::Millisecond, None),
-            a.1.data_type()
-        );
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<TimestampMillisecondArray>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_date_from_json_milliseconds() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Date64, true)]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Date64, a.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Date64Array>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_time_from_json_nanoseconds() {
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::Time64(TimeUnit::Nanosecond),
-            true,
-        )]);
-
-        let builder = ReaderBuilder::new()
-            .with_schema(Arc::new(schema))
-            .with_batch_size(64);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open("test/data/basic_nulls.json").unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(12, batch.num_rows());
-
-        let schema = reader.schema();
-        let batch_schema = batch.schema();
-        assert_eq!(schema, batch_schema);
-
-        let a = schema.column_with_name("a").unwrap();
-        assert_eq!(&DataType::Time64(TimeUnit::Nanosecond), a.1.data_type());
-
-        let aa = batch
-            .column(a.0)
-            .as_any()
-            .downcast_ref::<Time64NanosecondArray>()
-            .unwrap();
-        assert_eq!(true, aa.is_valid(0));
-        assert_eq!(false, aa.is_valid(1));
-        assert_eq!(false, aa.is_valid(2));
-        assert_eq!(1, aa.value(0));
-        assert_eq!(1, aa.value(3));
-        assert_eq!(5, aa.value(7));
-    }
-
-    #[test]
-    fn test_json_read_nested_list() {
-        let schema = Schema::new(vec![Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new(
-                "item",
-                DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-                true,
-            ))),
-            true,
-        )]);
-
-        let decoder = Decoder::new(Arc::new(schema), 1024, None);
-        let batch = decoder
-            .next_batch(
-                &mut vec![
-                    Ok(serde_json::json!({
-                        "c1": [],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [["a", "b"], ["c"], ["e", "f"], ["g"], ["h"], ["i"], ["j"], ["k"]],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [["foo"], ["bar"]],
-                    })),
-                ]
-                .into_iter(),
-            )
-            .unwrap()
-            .unwrap();
-
-        assert_eq!(batch.num_columns(), 1);
-        assert_eq!(batch.num_rows(), 3);
-    }
-
-    #[test]
-    fn test_json_read_list_of_structs() {
-        let schema = Schema::new(vec![Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new(
-                "item",
-                DataType::Struct(vec![Field::new("a", DataType::Int64, true)]),
-                true,
-            ))),
-            true,
-        )]);
-
-        let decoder = Decoder::new(Arc::new(schema), 1024, None);
-        let batch = decoder
-            .next_batch(
-                // NOTE: total struct element count needs to be greater than
-                // bit_util::ceil(array_count, 8) to test validity bit buffer length calculation
-                // logic
-                &mut vec![
-                    Ok(serde_json::json!({
-                        "c1": [{"a": 1}],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [{"a": 2}, {"a": 3}, {"a": 4}, {"a": 5}, {"a": 6}, {"a": 7}],
-                    })),
-                    Ok(serde_json::json!({
-                        "c1": [{"a": 10}, {"a": 11}],
-                    })),
-                ]
-                .into_iter(),
-            )
-            .unwrap()
-            .unwrap();
-
-        assert_eq!(batch.num_columns(), 1);
-        assert_eq!(batch.num_rows(), 3);
-    }
-}
diff --git a/rust/arrow/src/json/writer.rs b/rust/arrow/src/json/writer.rs
deleted file mode 100644
index 27c1ff138aa..00000000000
--- a/rust/arrow/src/json/writer.rs
+++ /dev/null
@@ -1,1206 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! # JSON Writer
-//!
-//! This JSON writer converts Arrow [`RecordBatch`]es into arrays of
-//! JSON objects or JSON formatted byte streams.
-//!
-//! ## Writing JSON Objects
-//!
-//! To serialize [`RecordBatch`]es into array of
-//! [JSON](https://docs.serde.rs/serde_json/) objects, use
-//! [`record_batches_to_json_rows`]:
-//!
-//! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
-//!
-//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-//! let a = Int32Array::from(vec![1, 2, 3]);
-//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-//!
-//! let json_rows = json::writer::record_batches_to_json_rows(&[batch]);
-//! assert_eq!(
-//!     serde_json::Value::Object(json_rows[1].clone()),
-//!     serde_json::json!({"a": 2}),
-//! );
-//! ```
-//!
-//! ## Writing JSON formatted byte streams
-//!
-//! To serialize [`RecordBatch`]es into line-delimited JSON bytes, use
-//! [`LineDelimitedWriter`]:
-//!
-//! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
-//!
-//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-//! let a = Int32Array::from(vec![1, 2, 3]);
-//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-//!
-//! // Write the record batch out as JSON
-//! let buf = Vec::new();
-//! let mut writer = json::LineDelimitedWriter::new(buf);
-//! writer.write_batches(&vec![batch]).unwrap();
-//! writer.finish().unwrap();
-//!
-//! // Get the underlying buffer back,
-//! let buf = writer.into_inner();
-//! assert_eq!(r#"{"a":1}
-//! {"a":2}
-//! {"a":3}
-//!"#, String::from_utf8(buf).unwrap())
-//! ```
-//!
-//! To serialize [`RecordBatch`]es into a well formed JSON array, use
-//! [`ArrayWriter`]:
-//!
-//! ```
-//! use std::sync::Arc;
-//!
-//! use arrow::array::Int32Array;
-//! use arrow::datatypes::{DataType, Field, Schema};
-//! use arrow::json;
-//! use arrow::record_batch::RecordBatch;
-//!
-//! let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-//! let a = Int32Array::from(vec![1, 2, 3]);
-//! let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-//!
-//! // Write the record batch out as a JSON array
-//! let buf = Vec::new();
-//! let mut writer = json::ArrayWriter::new(buf);
-//! writer.write_batches(&vec![batch]).unwrap();
-//! writer.finish().unwrap();
-//!
-//! // Get the underlying buffer back,
-//! let buf = writer.into_inner();
-//! assert_eq!(r#"[{"a":1},{"a":2},{"a":3}]"#, String::from_utf8(buf).unwrap())
-//! ```
-
-use std::iter;
-use std::{fmt::Debug, io::Write};
-
-use serde_json::map::Map as JsonMap;
-use serde_json::Value;
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::Result;
-use crate::record_batch::RecordBatch;
-
-fn primitive_array_to_json<T: ArrowPrimitiveType>(array: &ArrayRef) -> Vec<Value> {
-    as_primitive_array::<T>(array)
-        .iter()
-        .map(|maybe_value| match maybe_value {
-            Some(v) => v.into_json_value().unwrap_or(Value::Null),
-            None => Value::Null,
-        })
-        .collect()
-}
-
-fn struct_array_to_jsonmap_array(
-    array: &StructArray,
-    row_count: usize,
-) -> Vec<JsonMap<String, Value>> {
-    let inner_col_names = array.column_names();
-
-    let mut inner_objs = iter::repeat(JsonMap::new())
-        .take(row_count)
-        .collect::<Vec<JsonMap<String, Value>>>();
-
-    array
-        .columns()
-        .iter()
-        .enumerate()
-        .for_each(|(j, struct_col)| {
-            set_column_for_json_rows(
-                &mut inner_objs,
-                row_count,
-                struct_col,
-                inner_col_names[j],
-            );
-        });
-
-    inner_objs
-}
-
-/// Converts an arrow [`ArrayRef`] into a `Vec` of Serde JSON [`serde_json::Value`]'s
-pub fn array_to_json_array(array: &ArrayRef) -> Vec<Value> {
-    match array.data_type() {
-        DataType::Null => iter::repeat(Value::Null).take(array.len()).collect(),
-        DataType::Boolean => as_boolean_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => v.into(),
-                None => Value::Null,
-            })
-            .collect(),
-
-        DataType::Utf8 => as_string_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => v.into(),
-                None => Value::Null,
-            })
-            .collect(),
-        DataType::Int8 => primitive_array_to_json::<Int8Type>(array),
-        DataType::Int16 => primitive_array_to_json::<Int16Type>(array),
-        DataType::Int32 => primitive_array_to_json::<Int32Type>(array),
-        DataType::Int64 => primitive_array_to_json::<Int64Type>(array),
-        DataType::UInt8 => primitive_array_to_json::<UInt8Type>(array),
-        DataType::UInt16 => primitive_array_to_json::<UInt16Type>(array),
-        DataType::UInt32 => primitive_array_to_json::<UInt32Type>(array),
-        DataType::UInt64 => primitive_array_to_json::<UInt64Type>(array),
-        DataType::Float32 => primitive_array_to_json::<Float32Type>(array),
-        DataType::Float64 => primitive_array_to_json::<Float64Type>(array),
-        DataType::List(_) => as_list_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => Value::Array(array_to_json_array(&v)),
-                None => Value::Null,
-            })
-            .collect(),
-        DataType::LargeList(_) => as_large_list_array(array)
-            .iter()
-            .map(|maybe_value| match maybe_value {
-                Some(v) => Value::Array(array_to_json_array(&v)),
-                None => Value::Null,
-            })
-            .collect(),
-        DataType::Struct(_) => {
-            let jsonmaps =
-                struct_array_to_jsonmap_array(as_struct_array(array), array.len());
-            jsonmaps.into_iter().map(Value::Object).collect()
-        }
-        _ => {
-            panic!(
-                "Unsupported datatype for array conversion: {:#?}",
-                array.data_type()
-            );
-        }
-    }
-}
-
-macro_rules! set_column_by_array_type {
-    ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident) => {
-        let arr = $cast_fn($array);
-        $rows.iter_mut().zip(arr.iter()).take($row_count).for_each(
-            |(row, maybe_value)| {
-                if let Some(v) = maybe_value {
-                    row.insert($col_name.to_string(), v.into());
-                }
-            },
-        );
-    };
-}
-
-macro_rules! set_temporal_column_by_array_type {
-    ($array_type:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident, $cast_fn:ident) => {
-        let arr = $array.as_any().downcast_ref::<$array_type>().unwrap();
-
-        $rows
-            .iter_mut()
-            .enumerate()
-            .take($row_count)
-            .for_each(|(i, row)| {
-                if !arr.is_null(i) {
-                    if let Some(v) = arr.$cast_fn(i) {
-                        row.insert($col_name.to_string(), v.to_string().into());
-                    }
-                }
-            });
-    };
-}
-
-fn set_column_by_primitive_type<T: ArrowPrimitiveType>(
-    rows: &mut [JsonMap<String, Value>],
-    row_count: usize,
-    array: &ArrayRef,
-    col_name: &str,
-) {
-    let primitive_arr = as_primitive_array::<T>(array);
-
-    rows.iter_mut()
-        .zip(primitive_arr.iter())
-        .take(row_count)
-        .for_each(|(row, maybe_value)| {
-            // when value is null, we simply skip setting the key
-            if let Some(j) = maybe_value.and_then(|v| v.into_json_value()) {
-                row.insert(col_name.to_string(), j);
-            }
-        });
-}
-
-fn set_column_for_json_rows(
-    rows: &mut [JsonMap<String, Value>],
-    row_count: usize,
-    array: &ArrayRef,
-    col_name: &str,
-) {
-    match array.data_type() {
-        DataType::Int8 => {
-            set_column_by_primitive_type::<Int8Type>(rows, row_count, array, col_name)
-        }
-        DataType::Int16 => {
-            set_column_by_primitive_type::<Int16Type>(rows, row_count, array, col_name)
-        }
-        DataType::Int32 => {
-            set_column_by_primitive_type::<Int32Type>(rows, row_count, array, col_name)
-        }
-        DataType::Int64 => {
-            set_column_by_primitive_type::<Int64Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt8 => {
-            set_column_by_primitive_type::<UInt8Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt16 => {
-            set_column_by_primitive_type::<UInt16Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt32 => {
-            set_column_by_primitive_type::<UInt32Type>(rows, row_count, array, col_name)
-        }
-        DataType::UInt64 => {
-            set_column_by_primitive_type::<UInt64Type>(rows, row_count, array, col_name)
-        }
-        DataType::Float32 => {
-            set_column_by_primitive_type::<Float32Type>(rows, row_count, array, col_name)
-        }
-        DataType::Float64 => {
-            set_column_by_primitive_type::<Float64Type>(rows, row_count, array, col_name)
-        }
-        DataType::Null => {
-            // when value is null, we simply skip setting the key
-        }
-        DataType::Boolean => {
-            set_column_by_array_type!(as_boolean_array, col_name, rows, array, row_count);
-        }
-        DataType::Utf8 => {
-            set_column_by_array_type!(as_string_array, col_name, rows, array, row_count);
-        }
-        DataType::Date32 => {
-            set_temporal_column_by_array_type!(
-                Date32Array,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_date
-            );
-        }
-        DataType::Date64 => {
-            set_temporal_column_by_array_type!(
-                Date64Array,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_date
-            );
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            set_temporal_column_by_array_type!(
-                TimestampSecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_datetime
-            );
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            set_temporal_column_by_array_type!(
-                TimestampMillisecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_datetime
-            );
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            set_temporal_column_by_array_type!(
-                TimestampMicrosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_datetime
-            );
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            set_temporal_column_by_array_type!(
-                TimestampNanosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_datetime
-            );
-        }
-        DataType::Time32(TimeUnit::Second) => {
-            set_temporal_column_by_array_type!(
-                Time32SecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_time
-            );
-        }
-        DataType::Time32(TimeUnit::Millisecond) => {
-            set_temporal_column_by_array_type!(
-                Time32MillisecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_time
-            );
-        }
-        DataType::Time64(TimeUnit::Microsecond) => {
-            set_temporal_column_by_array_type!(
-                Time64MicrosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_time
-            );
-        }
-        DataType::Time64(TimeUnit::Nanosecond) => {
-            set_temporal_column_by_array_type!(
-                Time64NanosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_time
-            );
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            set_temporal_column_by_array_type!(
-                DurationSecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_duration
-            );
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            set_temporal_column_by_array_type!(
-                DurationMillisecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_duration
-            );
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            set_temporal_column_by_array_type!(
-                DurationMicrosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_duration
-            );
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            set_temporal_column_by_array_type!(
-                DurationNanosecondArray,
-                col_name,
-                rows,
-                array,
-                row_count,
-                value_as_duration
-            );
-        }
-        DataType::Struct(_) => {
-            let inner_objs =
-                struct_array_to_jsonmap_array(as_struct_array(array), row_count);
-            rows.iter_mut()
-                .take(row_count)
-                .zip(inner_objs.into_iter())
-                .for_each(|(row, obj)| {
-                    row.insert(col_name.to_string(), Value::Object(obj));
-                });
-        }
-        DataType::List(_) => {
-            let listarr = as_list_array(array);
-            rows.iter_mut()
-                .zip(listarr.iter())
-                .take(row_count)
-                .for_each(|(row, maybe_value)| {
-                    if let Some(v) = maybe_value {
-                        row.insert(
-                            col_name.to_string(),
-                            Value::Array(array_to_json_array(&v)),
-                        );
-                    }
-                });
-        }
-        DataType::LargeList(_) => {
-            let listarr = as_large_list_array(array);
-            rows.iter_mut()
-                .zip(listarr.iter())
-                .take(row_count)
-                .for_each(|(row, maybe_value)| {
-                    if let Some(v) = maybe_value {
-                        row.insert(
-                            col_name.to_string(),
-                            Value::Array(array_to_json_array(&v)),
-                        );
-                    }
-                });
-        }
-        _ => {
-            panic!("Unsupported datatype: {:#?}", array.data_type());
-        }
-    }
-}
-
-/// Converts an arrow [`RecordBatch`] into a `Vec` of Serde JSON
-/// [`JsonMap`]s (objects)
-pub fn record_batches_to_json_rows(
-    batches: &[RecordBatch],
-) -> Vec<JsonMap<String, Value>> {
-    let mut rows: Vec<JsonMap<String, Value>> = iter::repeat(JsonMap::new())
-        .take(batches.iter().map(|b| b.num_rows()).sum())
-        .collect();
-
-    if !rows.is_empty() {
-        let schema = batches[0].schema();
-        let mut base = 0;
-        batches.iter().for_each(|batch| {
-            let row_count = batch.num_rows();
-            batch.columns().iter().enumerate().for_each(|(j, col)| {
-                let col_name = schema.field(j).name();
-                set_column_for_json_rows(&mut rows[base..], row_count, col, col_name);
-            });
-            base += row_count;
-        });
-    }
-
-    rows
-}
-
-/// This trait defines how to format a sequence of JSON objects to a
-/// byte stream.
-pub trait JsonFormat: Debug + Default {
-    #[inline]
-    /// write any bytes needed at the start of the file to the writer
-    fn start_stream<W: Write>(&self, _writer: &mut W) -> Result<()> {
-        Ok(())
-    }
-
-    #[inline]
-    /// write any bytes needed for the start of each row
-    fn start_row<W: Write>(&self, _writer: &mut W, _is_first_row: bool) -> Result<()> {
-        Ok(())
-    }
-
-    #[inline]
-    /// write any bytes needed for the end of each row
-    fn end_row<W: Write>(&self, _writer: &mut W) -> Result<()> {
-        Ok(())
-    }
-
-    /// write any bytes needed for the start of each row
-    fn end_stream<W: Write>(&self, _writer: &mut W) -> Result<()> {
-        Ok(())
-    }
-}
-
-/// Produces JSON output with one record per line. For example
-///
-/// ```json
-/// {"foo":1}
-/// {"bar":1}
-///
-/// ```
-#[derive(Debug, Default)]
-pub struct LineDelimited {}
-
-impl JsonFormat for LineDelimited {
-    fn end_row<W: Write>(&self, writer: &mut W) -> Result<()> {
-        writer.write_all(b"\n")?;
-        Ok(())
-    }
-}
-
-/// Produces JSON output as a single JSON array. For example
-///
-/// ```json
-/// [{"foo":1},{"bar":1}]
-/// ```
-#[derive(Debug, Default)]
-pub struct JsonArray {}
-
-impl JsonFormat for JsonArray {
-    fn start_stream<W: Write>(&self, writer: &mut W) -> Result<()> {
-        writer.write_all(b"[")?;
-        Ok(())
-    }
-
-    fn start_row<W: Write>(&self, writer: &mut W, is_first_row: bool) -> Result<()> {
-        if !is_first_row {
-            writer.write_all(b",")?;
-        }
-        Ok(())
-    }
-
-    fn end_stream<W: Write>(&self, writer: &mut W) -> Result<()> {
-        writer.write_all(b"]")?;
-        Ok(())
-    }
-}
-
-/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON objects
-pub type LineDelimitedWriter<W> = Writer<W, LineDelimited>;
-
-/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays
-pub type ArrayWriter<W> = Writer<W, JsonArray>;
-
-/// A JSON writer which serializes [`RecordBatch`]es to a stream of
-/// `u8` encoded JSON objects. See the module level documentation for
-/// detailed usage and examples. The specific format of the stream is
-/// controlled by the [`JsonFormat`] type parameter.
-#[derive(Debug)]
-pub struct Writer<W, F>
-where
-    W: Write,
-    F: JsonFormat,
-{
-    /// Underlying writer to use to write bytes
-    writer: W,
-
-    /// Has the writer output any records yet?
-    started: bool,
-
-    /// Is the writer finished?
-    finished: bool,
-
-    /// Determines how the byte stream is formatted
-    format: F,
-}
-
-impl<W, F> Writer<W, F>
-where
-    W: Write,
-    F: JsonFormat,
-{
-    /// Construct a new writer
-    pub fn new(writer: W) -> Self {
-        Self {
-            writer,
-            started: false,
-            finished: false,
-            format: F::default(),
-        }
-    }
-
-    /// Write a single JSON row to the output writer
-    pub fn write_row(&mut self, row: &Value) -> Result<()> {
-        let is_first_row = !self.started;
-        if !self.started {
-            self.format.start_stream(&mut self.writer)?;
-            self.started = true;
-        }
-
-        self.format.start_row(&mut self.writer, is_first_row)?;
-        self.writer.write_all(&serde_json::to_vec(row)?)?;
-        self.format.end_row(&mut self.writer)?;
-        Ok(())
-    }
-
-    /// Convert the [`RecordBatch`] into JSON rows, and write them to the output
-    pub fn write_batches(&mut self, batches: &[RecordBatch]) -> Result<()> {
-        for row in record_batches_to_json_rows(batches) {
-            self.write_row(&Value::Object(row))?;
-        }
-        Ok(())
-    }
-
-    /// Finishes the output stream. This function must be called after
-    /// all record batches have been produced. (e.g. producing the final `']'` if writing
-    /// arrays.
-    pub fn finish(&mut self) -> Result<()> {
-        if self.started && !self.finished {
-            self.format.end_stream(&mut self.writer)?;
-            self.finished = true;
-        }
-        Ok(())
-    }
-
-    /// Unwraps this `Writer<W>`, returning the underlying writer
-    pub fn into_inner(self) -> W {
-        self.writer
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::convert::TryFrom;
-    use std::fs::{read_to_string, File};
-    use std::sync::Arc;
-
-    use serde_json::json;
-
-    use crate::buffer::*;
-    use crate::json::reader::*;
-
-    use super::*;
-
-    #[test]
-    fn write_simple_rows() {
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Utf8, false),
-        ]);
-
-        let a = Int32Array::from(vec![Some(1), Some(2), Some(3), None, Some(5)]);
-        let b = StringArray::from(vec![Some("a"), Some("b"), Some("c"), Some("d"), None]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":1,"c2":"a"}
-{"c1":2,"c2":"b"}
-{"c1":3,"c2":"c"}
-{"c2":"d"}
-{"c1":5}
-"#
-        );
-    }
-
-    #[test]
-    fn write_timestamps() {
-        let ts_string = "2018-11-13T17:11:10.011375885995";
-        let ts_nanos = ts_string
-            .parse::<chrono::NaiveDateTime>()
-            .unwrap()
-            .timestamp_nanos();
-        let ts_micros = ts_nanos / 1000;
-        let ts_millis = ts_micros / 1000;
-        let ts_secs = ts_millis / 1000;
-
-        let arr_nanos =
-            TimestampNanosecondArray::from_opt_vec(vec![Some(ts_nanos), None], None);
-        let arr_micros =
-            TimestampMicrosecondArray::from_opt_vec(vec![Some(ts_micros), None], None);
-        let arr_millis =
-            TimestampMillisecondArray::from_opt_vec(vec![Some(ts_millis), None], None);
-        let arr_secs =
-            TimestampSecondArray::from_opt_vec(vec![Some(ts_secs), None], None);
-        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
-
-        let schema = Schema::new(vec![
-            Field::new("nanos", arr_nanos.data_type().clone(), false),
-            Field::new("micros", arr_micros.data_type().clone(), false),
-            Field::new("millis", arr_millis.data_type().clone(), false),
-            Field::new("secs", arr_secs.data_type().clone(), false),
-            Field::new("name", arr_names.data_type().clone(), false),
-        ]);
-        let schema = Arc::new(schema);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(arr_nanos),
-                Arc::new(arr_micros),
-                Arc::new(arr_millis),
-                Arc::new(arr_secs),
-                Arc::new(arr_names),
-            ],
-        )
-        .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"nanos":"2018-11-13 17:11:10.011375885","micros":"2018-11-13 17:11:10.011375","millis":"2018-11-13 17:11:10.011","secs":"2018-11-13 17:11:10","name":"a"}
-{"name":"b"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_dates() {
-        let ts_string = "2018-11-13T17:11:10.011375885995";
-        let ts_millis = ts_string
-            .parse::<chrono::NaiveDateTime>()
-            .unwrap()
-            .timestamp_millis();
-
-        let arr_date32 = Date32Array::from(vec![
-            Some(i32::try_from(ts_millis / 1000 / (60 * 60 * 24)).unwrap()),
-            None,
-        ]);
-        let arr_date64 = Date64Array::from(vec![Some(ts_millis), None]);
-        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
-
-        let schema = Schema::new(vec![
-            Field::new("date32", arr_date32.data_type().clone(), false),
-            Field::new("date64", arr_date64.data_type().clone(), false),
-            Field::new("name", arr_names.data_type().clone(), false),
-        ]);
-        let schema = Arc::new(schema);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(arr_date32),
-                Arc::new(arr_date64),
-                Arc::new(arr_names),
-            ],
-        )
-        .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"date32":"2018-11-13","date64":"2018-11-13","name":"a"}
-{"name":"b"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_times() {
-        let arr_time32sec = Time32SecondArray::from(vec![Some(120), None]);
-        let arr_time32msec = Time32MillisecondArray::from(vec![Some(120), None]);
-        let arr_time64usec = Time64MicrosecondArray::from(vec![Some(120), None]);
-        let arr_time64nsec = Time64NanosecondArray::from(vec![Some(120), None]);
-        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
-
-        let schema = Schema::new(vec![
-            Field::new("time32sec", arr_time32sec.data_type().clone(), false),
-            Field::new("time32msec", arr_time32msec.data_type().clone(), false),
-            Field::new("time64usec", arr_time64usec.data_type().clone(), false),
-            Field::new("time64nsec", arr_time64nsec.data_type().clone(), false),
-            Field::new("name", arr_names.data_type().clone(), false),
-        ]);
-        let schema = Arc::new(schema);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(arr_time32sec),
-                Arc::new(arr_time32msec),
-                Arc::new(arr_time64usec),
-                Arc::new(arr_time64nsec),
-                Arc::new(arr_names),
-            ],
-        )
-        .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
-{"name":"b"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_durations() {
-        let arr_durationsec = DurationSecondArray::from(vec![Some(120), None]);
-        let arr_durationmsec = DurationMillisecondArray::from(vec![Some(120), None]);
-        let arr_durationusec = DurationMicrosecondArray::from(vec![Some(120), None]);
-        let arr_durationnsec = DurationNanosecondArray::from(vec![Some(120), None]);
-        let arr_names = StringArray::from(vec![Some("a"), Some("b")]);
-
-        let schema = Schema::new(vec![
-            Field::new("duration_sec", arr_durationsec.data_type().clone(), false),
-            Field::new("duration_msec", arr_durationmsec.data_type().clone(), false),
-            Field::new("duration_usec", arr_durationusec.data_type().clone(), false),
-            Field::new("duration_nsec", arr_durationnsec.data_type().clone(), false),
-            Field::new("name", arr_names.data_type().clone(), false),
-        ]);
-        let schema = Arc::new(schema);
-
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(arr_durationsec),
-                Arc::new(arr_durationmsec),
-                Arc::new(arr_durationusec),
-                Arc::new(arr_durationnsec),
-                Arc::new(arr_names),
-            ],
-        )
-        .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"duration_sec":"PT120S","duration_msec":"PT0.120S","duration_usec":"PT0.000120S","duration_nsec":"PT0.000000120S","name":"a"}
-{"name":"b"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_nested_structs() {
-        let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::Struct(vec![
-                    Field::new("c11", DataType::Int32, false),
-                    Field::new(
-                        "c12",
-                        DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                        false,
-                    ),
-                ]),
-                false,
-            ),
-            Field::new("c2", DataType::Utf8, false),
-        ]);
-
-        let c1 = StructArray::from(vec![
-            (
-                Field::new("c11", DataType::Int32, false),
-                Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as ArrayRef,
-            ),
-            (
-                Field::new(
-                    "c12",
-                    DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                    false,
-                ),
-                Arc::new(StructArray::from(vec![(
-                    Field::new("c121", DataType::Utf8, false),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
-                )])) as ArrayRef,
-            ),
-        ]);
-        let c2 = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"}
-{"c1":{"c12":{"c121":"f"}},"c2":"b"}
-{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}
-"#
-        );
-    }
-
-    #[test]
-    fn write_struct_with_list_field() {
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new("c_list", DataType::Utf8, false))),
-            false,
-        );
-        let field_c2 = Field::new("c2", DataType::Int32, false);
-        let schema = Schema::new(vec![field_c1.clone(), field_c2]);
-
-        let a_values = StringArray::from(vec!["a", "a1", "b", "c", "d", "e"]);
-        // list column rows: ["a", "a1"], ["b"], ["c"], ["d"], ["e"]
-        let a_value_offsets = Buffer::from(&[0, 2, 3, 4, 5, 6].to_byte_slice());
-        let a_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(5)
-            .add_buffer(a_value_offsets)
-            .add_child_data(a_values.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00011111]))
-            .build();
-        let a = ListArray::from(a_list_data);
-
-        let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":["a","a1"],"c2":1}
-{"c1":["b"],"c2":2}
-{"c1":["c"],"c2":3}
-{"c1":["d"],"c2":4}
-{"c1":["e"],"c2":5}
-"#
-        );
-    }
-
-    #[test]
-    fn write_nested_list() {
-        let list_inner_type = Field::new(
-            "a",
-            DataType::List(Box::new(Field::new("b", DataType::Int32, false))),
-            false,
-        );
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Box::new(list_inner_type.clone())),
-            false,
-        );
-        let field_c2 = Field::new("c2", DataType::Utf8, false);
-        let schema = Schema::new(vec![field_c1.clone(), field_c2]);
-
-        // list column rows: [[1, 2], [3]], [], [[4, 5, 6]]
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
-
-        let a_value_offsets = Buffer::from(&[0, 2, 3, 6].to_byte_slice());
-        // Construct a list array from the above two
-        let a_list_data = ArrayData::builder(list_inner_type.data_type().clone())
-            .len(3)
-            .add_buffer(a_value_offsets)
-            .null_bit_buffer(Buffer::from(vec![0b00000111]))
-            .add_child_data(a_values.data().clone())
-            .build();
-
-        let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
-        let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(3)
-            .add_buffer(c1_value_offsets)
-            .add_child_data(a_list_data)
-            .build();
-
-        let c1 = ListArray::from(c1_list_data);
-        let c2 = StringArray::from(vec![Some("foo"), Some("bar"), None]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":[[1,2],[3]],"c2":"foo"}
-{"c1":[],"c2":"bar"}
-{"c1":[[4,5,6]]}
-"#
-        );
-    }
-
-    #[test]
-    fn write_list_of_struct() {
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Box::new(Field::new(
-                "s",
-                DataType::Struct(vec![
-                    Field::new("c11", DataType::Int32, false),
-                    Field::new(
-                        "c12",
-                        DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                        false,
-                    ),
-                ]),
-                false,
-            ))),
-            true,
-        );
-        let field_c2 = Field::new("c2", DataType::Int32, false);
-        let schema = Schema::new(vec![field_c1.clone(), field_c2]);
-
-        let struct_values = StructArray::from(vec![
-            (
-                Field::new("c11", DataType::Int32, false),
-                Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as ArrayRef,
-            ),
-            (
-                Field::new(
-                    "c12",
-                    DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)]),
-                    false,
-                ),
-                Arc::new(StructArray::from(vec![(
-                    Field::new("c121", DataType::Utf8, false),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
-                )])) as ArrayRef,
-            ),
-        ]);
-
-        // list column rows (c1):
-        // [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}],
-        // null,
-        // [{"c11": 5, "c12": {"c121": "g"}}]
-        let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
-        let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(3)
-            .add_buffer(c1_value_offsets)
-            .add_child_data(struct_values.data().clone())
-            .null_bit_buffer(Buffer::from(vec![0b00000101]))
-            .build();
-        let c1 = ListArray::from(c1_list_data);
-
-        let c2 = Int32Array::from(vec![1, 2, 3]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
-            r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c12":{"c121":"f"}}],"c2":1}
-{"c2":2}
-{"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3}
-"#
-        );
-    }
-
-    fn test_write_for_file(test_file: &str) {
-        let builder = ReaderBuilder::new()
-            .infer_schema(None)
-            .with_batch_size(1024);
-        let mut reader: Reader<File> = builder
-            .build::<File>(File::open(test_file).unwrap())
-            .unwrap();
-        let batch = reader.next().unwrap().unwrap();
-
-        let mut buf = Vec::new();
-        {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[batch]).unwrap();
-        }
-
-        let result = String::from_utf8(buf).unwrap();
-        let expected = read_to_string(test_file).unwrap();
-        for (r, e) in result.lines().zip(expected.lines()) {
-            let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
-            // remove null value from object to make comparision consistent:
-            if let Value::Object(obj) = expected_json {
-                expected_json = Value::Object(
-                    obj.into_iter().filter(|(_, v)| *v != Value::Null).collect(),
-                );
-            }
-            assert_eq!(serde_json::from_str::<Value>(r).unwrap(), expected_json,);
-        }
-    }
-
-    #[test]
-    fn write_basic_rows() {
-        test_write_for_file("test/data/basic.json");
-    }
-
-    #[test]
-    fn write_arrays() {
-        test_write_for_file("test/data/arrays.json");
-    }
-
-    #[test]
-    fn write_basic_nulls() {
-        test_write_for_file("test/data/basic_nulls.json");
-    }
-
-    #[test]
-    fn json_writer_empty() {
-        let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
-        writer.finish().unwrap();
-        assert_eq!(String::from_utf8(writer.into_inner()).unwrap(), "");
-    }
-    #[test]
-    fn json_writer_one_row() {
-        let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
-        let v = json!({ "an": "object" });
-        writer.write_row(&v).unwrap();
-        writer.finish().unwrap();
-        assert_eq!(
-            String::from_utf8(writer.into_inner()).unwrap(),
-            r#"[{"an":"object"}]"#
-        );
-    }
-
-    #[test]
-    fn json_writer_two_rows() {
-        let mut writer = ArrayWriter::new(vec![] as Vec<u8>);
-        let v = json!({ "an": "object" });
-        writer.write_row(&v).unwrap();
-        let v = json!({ "another": "object" });
-        writer.write_row(&v).unwrap();
-        writer.finish().unwrap();
-        assert_eq!(
-            String::from_utf8(writer.into_inner()).unwrap(),
-            r#"[{"an":"object"},{"another":"object"}]"#
-        );
-    }
-}
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
deleted file mode 100644
index 30f968c9979..00000000000
--- a/rust/arrow/src/lib.rs
+++ /dev/null
@@ -1,162 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! A native Rust implementation of [Apache Arrow](https://arrow.apache.org), a cross-language
-//! development platform for in-memory data.
-//!
-//! ### DataType
-//!
-//! Every [`Array`](array::Array) in this crate has an associated [`DataType`](datatypes::DataType),
-//! that specifies how its data is layed in memory and represented.
-//! Thus, a central enum of this crate is [`DataType`](datatypes::DataType), that contains the set of valid
-//! DataTypes in the specification. For example, [`DataType::Utf8`](datatypes::DataType::Utf8).
-//!
-//! ## Array
-//!
-//! The central trait of this package is the dynamically-typed [`Array`](array::Array) that
-//! represents a fixed-sized, immutable, Send + Sync Array of nullable elements. An example of such an array is [`UInt32Array`](array::UInt32Array).
-//! One way to think about an arrow [`Array`](array::Array) is a `Arc<[Option<T>; len]>` where T can be anything ranging from an integer to a string, or even
-//! another [`Array`](array::Array).
-//!
-//! [`Arrays`](array::Array) have [`len()`](array::Array::len), [`data_type()`](array::Array::data_type), and the nullability of each of its elements,
-//! can be obtained via [`is_null(index)`](array::Array::is_null). To downcast an [`Array`](array::Array) to a specific implementation, you can use
-//!
-//! ```rust
-//! use arrow::array::{Array, UInt32Array};
-//! let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! assert_eq!(array.len(), 3);
-//! assert_eq!(array.value(0), 1);
-//! assert_eq!(array.is_null(1), true);
-//! ```
-//!
-//! To make the array dynamically typed, we wrap it in an [`Arc`](std::sync::Arc):
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! use arrow::datatypes::DataType;
-//! use arrow::array::{UInt32Array, ArrayRef};
-//! # let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! let array: ArrayRef = Arc::new(array);
-//! assert_eq!(array.len(), 3);
-//! // array.value() is not available in the dynamically-typed version
-//! assert_eq!(array.is_null(1), true);
-//! assert_eq!(array.data_type(), &DataType::UInt32);
-//! ```
-//!
-//! to downcast, use `as_any()`:
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! # use arrow::array::{UInt32Array, ArrayRef};
-//! # let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! # let array: ArrayRef = Arc::new(array);
-//! let array = array.as_any().downcast_ref::<UInt32Array>().unwrap();
-//! assert_eq!(array.value(0), 1);
-//! ```
-//!
-//! ## Memory and Buffers
-//!
-//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::ArrayData), that in turn
-//! is a collection of other [`ArrayData`](array::ArrayData) and [`Buffers`](buffer::Buffer).
-//! [`Buffers`](buffer::Buffer) is the central struct that array implementations use keep allocated memory and pointers.
-//! The [`MutableBuffer`](buffer::MutableBuffer) is the mutable counter-part of[`Buffer`](buffer::Buffer).
-//! These are the lowest abstractions of this crate, and are used throughout the crate to
-//! efficiently allocate, write, read and deallocate memory.
-//!
-//! ## Field, Schema and RecordBatch
-//!
-//! [`Field`](datatypes::Field) is a struct that contains an array's metadata (datatype and whether its values
-//! can be null), and a name. [`Schema`](datatypes::Schema) is a vector of fields with optional metadata.
-//! Together, they form the basis of a schematic representation of a group of [`Arrays`](array::Array).
-//!
-//! In fact, [`RecordBatch`](record_batch::RecordBatch) is a struct with a [`Schema`](datatypes::Schema) and a vector of
-//! [`Array`](array::Array)s, all with the same `len`. A record batch is the highest order struct that this crate currently offers
-//! and is broadly used to represent a table where each column in an `Array`.
-//!
-//! ## Compute
-//!
-//! This crate offers many operations (called kernels) to operate on `Array`s, that you can find at [compute::kernels].
-//! It has both vertical and horizontal operations, and some of them have an SIMD implementation.
-//!
-//! ## Status
-//!
-//! This crate has most of the implementation of the arrow specification. Specifically, it supports the following types:
-//!
-//! * All arrow primitive types, such as [`Int32Array`](array::UInt8Array), [`BooleanArray`](array::BooleanArray) and [`Float64Array`](array::Float64Array).
-//! * All arrow variable length types, such as [`StringArray`](array::StringArray) and [`BinaryArray`](array::BinaryArray)
-//! * All composite types such as [`StructArray`](array::StructArray) and [`ListArray`](array::ListArray)
-//! * Dictionary types  [`DictionaryArray`](array::DictionaryArray)
-
-//!
-//! This crate also implements many common vertical operations:
-//! * all mathematical binary operators, such as [`subtract`](compute::kernels::arithmetic::subtract)
-//! * all boolean binary operators such as [`equality`](compute::kernels::comparison::eq)
-//! * [`cast`](compute::kernels::cast::cast)
-//! * [`filter`](compute::kernels::filter::filter)
-//! * [`take`](compute::kernels::take::take) and [`limit`](compute::kernels::limit::limit)
-//! * [`sort`](compute::kernels::sort::sort)
-//! * some string operators such as [`substring`](compute::kernels::substring::substring) and [`length`](compute::kernels::length::length)
-//!
-//! as well as some horizontal operations, such as
-//!
-//! * [`min`](compute::kernels::aggregate::min) and [`max`](compute::kernels::aggregate::max)
-//! * [`sum`](compute::kernels::aggregate::sum)
-//!
-//! Finally, this crate implements some readers and writers to different formats:
-//!
-//! * json: [reader](json::reader::Reader)
-//! * csv: [reader](csv::reader::Reader) and [writer](csv::writer::Writer)
-//! * ipc: [reader](ipc::reader::StreamReader) and [writer](ipc::writer::FileWriter)
-//!
-//! The parquet implementation is on a [separate crate](https://crates.io/crates/parquet)
-
-#![cfg_attr(feature = "avx512", feature(stdsimd))]
-#![cfg_attr(feature = "avx512", feature(repr_simd))]
-#![cfg_attr(feature = "avx512", feature(avx512_target_feature))]
-#![allow(dead_code)]
-#![allow(non_camel_case_types)]
-#![deny(clippy::redundant_clone)]
-#![allow(
-    // introduced to ignore lint errors when upgrading from 2020-04-22 to 2020-11-14
-    clippy::float_equality_without_abs,
-    clippy::type_complexity,
-    // upper_case_acronyms lint was introduced in Rust 1.51.
-    // It is triggered in the ffi module, and ipc::gen, which we have no control over
-    clippy::upper_case_acronyms,
-    clippy::vec_init_then_push
-)]
-#![allow(bare_trait_objects)]
-#![warn(missing_debug_implementations)]
-
-pub mod alloc;
-mod arch;
-pub mod array;
-pub mod bitmap;
-pub mod buffer;
-mod bytes;
-pub mod compute;
-pub mod csv;
-pub mod datatypes;
-pub mod error;
-pub mod ffi;
-pub mod ipc;
-pub mod json;
-pub mod record_batch;
-pub mod temporal_conversions;
-pub mod tensor;
-pub mod util;
-mod zz_memory_check;
diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs
deleted file mode 100644
index 93abb909d02..00000000000
--- a/rust/arrow/src/record_batch.rs
+++ /dev/null
@@ -1,434 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! A two-dimensional batch of column-oriented data with a defined
-//! [schema](crate::datatypes::Schema).
-
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-
-/// A two-dimensional batch of column-oriented data with a defined
-/// [schema](crate::datatypes::Schema).
-///
-/// A `RecordBatch` is a two-dimensional dataset of a number of
-/// contiguous arrays, each the same length.
-/// A record batch has a schema which must match its arrays’
-/// datatypes.
-///
-/// Record batches are a convenient unit of work for various
-/// serialization and computation functions, possibly incremental.
-/// See also [CSV reader](crate::csv::Reader) and
-/// [JSON reader](crate::json::Reader).
-#[derive(Clone, Debug)]
-pub struct RecordBatch {
-    schema: SchemaRef,
-    columns: Vec<Arc<Array>>,
-}
-
-impl RecordBatch {
-    /// Creates a `RecordBatch` from a schema and columns.
-    ///
-    /// Expects the following:
-    ///  * the vec of columns to not be empty
-    ///  * the schema and column data types to have equal lengths
-    ///    and match
-    ///  * each array in columns to have the same length
-    ///
-    /// If the conditions are not met, an error is returned.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false)
-    /// ]);
-    ///
-    /// let batch = RecordBatch::try_new(
-    ///     Arc::new(schema),
-    ///     vec![Arc::new(id_array)]
-    /// )?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self> {
-        let options = RecordBatchOptions::default();
-        Self::validate_new_batch(&schema, columns.as_slice(), &options)?;
-        Ok(RecordBatch { schema, columns })
-    }
-
-    /// Creates a `RecordBatch` from a schema and columns, with additional options,
-    /// such as whether to strictly validate field names.
-    ///
-    /// See [`RecordBatch::try_new`] for the expected conditions.
-    pub fn try_new_with_options(
-        schema: SchemaRef,
-        columns: Vec<ArrayRef>,
-        options: &RecordBatchOptions,
-    ) -> Result<Self> {
-        Self::validate_new_batch(&schema, columns.as_slice(), options)?;
-        Ok(RecordBatch { schema, columns })
-    }
-
-    /// Creates a new empty [`RecordBatch`].
-    pub fn new_empty(schema: SchemaRef) -> Self {
-        let columns = schema
-            .fields()
-            .iter()
-            .map(|field| new_empty_array(field.data_type()))
-            .collect();
-        RecordBatch { schema, columns }
-    }
-
-    /// Validate the schema and columns using [`RecordBatchOptions`]. Returns an error
-    /// if any validation check fails.
-    fn validate_new_batch(
-        schema: &SchemaRef,
-        columns: &[ArrayRef],
-        options: &RecordBatchOptions,
-    ) -> Result<()> {
-        // check that there are some columns
-        if columns.is_empty() {
-            return Err(ArrowError::InvalidArgumentError(
-                "at least one column must be defined to create a record batch"
-                    .to_string(),
-            ));
-        }
-        // check that number of fields in schema match column length
-        if schema.fields().len() != columns.len() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "number of columns({}) must match number of fields({}) in schema",
-                columns.len(),
-                schema.fields().len(),
-            )));
-        }
-        // check that all columns have the same row count, and match the schema
-        let len = columns[0].data().len();
-
-        // This is a bit repetitive, but it is better to check the condition outside the loop
-        if options.match_field_names {
-            for (i, column) in columns.iter().enumerate() {
-                if column.len() != len {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "all columns in a record batch must have the same length"
-                            .to_string(),
-                    ));
-                }
-                if column.data_type() != schema.field(i).data_type() {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "column types must match schema types, expected {:?} but found {:?} at column index {}",
-                        schema.field(i).data_type(),
-                        column.data_type(),
-                        i)));
-                }
-            }
-        } else {
-            for (i, column) in columns.iter().enumerate() {
-                if column.len() != len {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "all columns in a record batch must have the same length"
-                            .to_string(),
-                    ));
-                }
-                if !column
-                    .data_type()
-                    .equals_datatype(schema.field(i).data_type())
-                {
-                    return Err(ArrowError::InvalidArgumentError(format!(
-                        "column types must match schema types, expected {:?} but found {:?} at column index {}",
-                        schema.field(i).data_type(),
-                        column.data_type(),
-                        i)));
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Returns the [`Schema`](crate::datatypes::Schema) of the record batch.
-    pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Returns the number of columns in the record batch.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false)
-    /// ]);
-    ///
-    /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)])?;
-    ///
-    /// assert_eq!(batch.num_columns(), 1);
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    /// Returns the number of rows in each column.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the `RecordBatch` contains no columns.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use std::sync::Arc;
-    /// use arrow::array::Int32Array;
-    /// use arrow::datatypes::{Schema, Field, DataType};
-    /// use arrow::record_batch::RecordBatch;
-    ///
-    /// # fn main() -> arrow::error::Result<()> {
-    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false)
-    /// ]);
-    ///
-    /// let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)])?;
-    ///
-    /// assert_eq!(batch.num_rows(), 5);
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub fn num_rows(&self) -> usize {
-        self.columns[0].data().len()
-    }
-
-    /// Get a reference to a column's array by index.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `index` is outside of `0..num_columns`.
-    pub fn column(&self, index: usize) -> &ArrayRef {
-        &self.columns[index]
-    }
-
-    /// Get a reference to all columns in the record batch.
-    pub fn columns(&self) -> &[ArrayRef] {
-        &self.columns[..]
-    }
-}
-
-/// Options that control the behaviour used when creating a [`RecordBatch`].
-#[derive(Debug)]
-pub struct RecordBatchOptions {
-    /// Match field names of structs and lists. If set to `true`, the names must match.
-    pub match_field_names: bool,
-}
-
-impl Default for RecordBatchOptions {
-    fn default() -> Self {
-        Self {
-            match_field_names: true,
-        }
-    }
-}
-
-impl From<&StructArray> for RecordBatch {
-    /// Create a record batch from struct array.
-    ///
-    /// This currently does not flatten and nested struct types
-    fn from(struct_array: &StructArray) -> Self {
-        if let DataType::Struct(fields) = struct_array.data_type() {
-            let schema = Schema::new(fields.clone());
-            let columns = struct_array.boxed_fields.clone();
-            RecordBatch {
-                schema: Arc::new(schema),
-                columns,
-            }
-        } else {
-            unreachable!("unable to get datatype as struct")
-        }
-    }
-}
-
-impl From<RecordBatch> for StructArray {
-    fn from(batch: RecordBatch) -> Self {
-        batch
-            .schema
-            .fields
-            .iter()
-            .zip(batch.columns.iter())
-            .map(|t| (t.0.clone(), t.1.clone()))
-            .collect::<Vec<(Field, ArrayRef)>>()
-            .into()
-    }
-}
-
-/// Trait for types that can read `RecordBatch`'s.
-pub trait RecordBatchReader: Iterator<Item = Result<RecordBatch>> {
-    /// Returns the schema of this `RecordBatchReader`.
-    ///
-    /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this
-    /// reader should have the same schema as returned from this method.
-    fn schema(&self) -> SchemaRef;
-
-    /// Reads the next `RecordBatch`.
-    #[deprecated(
-        since = "2.0.0",
-        note = "This method is deprecated in favour of `next` from the trait Iterator."
-    )]
-    fn next_batch(&mut self) -> Result<Option<RecordBatch>> {
-        self.next().transpose()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn create_record_batch() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Utf8, false),
-        ]);
-
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
-
-        let record_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
-
-        assert_eq!(5, record_batch.num_rows());
-        assert_eq!(2, record_batch.num_columns());
-        assert_eq!(&DataType::Int32, record_batch.schema().field(0).data_type());
-        assert_eq!(&DataType::Utf8, record_batch.schema().field(1).data_type());
-        assert_eq!(5, record_batch.column(0).data().len());
-        assert_eq!(5, record_batch.column(1).data().len());
-    }
-
-    #[test]
-    fn create_record_batch_schema_mismatch() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let a = Int64Array::from(vec![1, 2, 3, 4, 5]);
-
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]);
-        assert!(!batch.is_ok());
-    }
-
-    #[test]
-    fn create_record_batch_field_name_mismatch() {
-        let struct_fields = vec![
-            Field::new("a1", DataType::Int32, false),
-            Field::new(
-                "a2",
-                DataType::List(Box::new(Field::new("item", DataType::Int8, false))),
-                false,
-            ),
-        ];
-        let struct_type = DataType::Struct(struct_fields);
-        let schema = Arc::new(Schema::new(vec![Field::new("a", struct_type, true)]));
-
-        let a1: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
-        let a2_child = Int8Array::from(vec![1, 2, 3, 4]);
-        let a2 = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
-            "array",
-            DataType::Int8,
-            false,
-        ))))
-        .add_child_data(a2_child.data().clone())
-        .len(2)
-        .add_buffer(Buffer::from(vec![0i32, 3, 4].to_byte_slice()))
-        .build();
-        let a2: ArrayRef = Arc::new(ListArray::from(a2));
-        let a = ArrayDataBuilder::new(DataType::Struct(vec![
-            Field::new("aa1", DataType::Int32, false),
-            Field::new("a2", a2.data_type().clone(), false),
-        ]))
-        .add_child_data(a1.data().clone())
-        .add_child_data(a2.data().clone())
-        .len(2)
-        .build();
-        let a: ArrayRef = Arc::new(StructArray::from(a));
-
-        // creating the batch with field name validation should fail
-        let batch = RecordBatch::try_new(schema.clone(), vec![a.clone()]);
-        assert!(batch.is_err());
-
-        // creating the batch without field name validation should pass
-        let options = RecordBatchOptions {
-            match_field_names: false,
-        };
-        let batch = RecordBatch::try_new_with_options(schema, vec![a], &options);
-        assert!(batch.is_ok());
-    }
-
-    #[test]
-    fn create_record_batch_record_mismatch() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
-        assert!(!batch.is_ok());
-    }
-
-    #[test]
-    fn create_record_batch_from_struct_array() {
-        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
-        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
-        let struct_array = StructArray::from(vec![
-            (
-                Field::new("b", DataType::Boolean, false),
-                boolean.clone() as ArrayRef,
-            ),
-            (
-                Field::new("c", DataType::Int32, false),
-                int.clone() as ArrayRef,
-            ),
-        ]);
-
-        let batch = RecordBatch::from(&struct_array);
-        assert_eq!(2, batch.num_columns());
-        assert_eq!(4, batch.num_rows());
-        assert_eq!(
-            struct_array.data_type(),
-            &DataType::Struct(batch.schema().fields().to_vec())
-        );
-        assert_eq!(batch.column(0).as_ref(), boolean.as_ref());
-        assert_eq!(batch.column(1).as_ref(), int.as_ref());
-    }
-}
diff --git a/rust/arrow/src/temporal_conversions.rs b/rust/arrow/src/temporal_conversions.rs
deleted file mode 100644
index 2d6d6776f59..00000000000
--- a/rust/arrow/src/temporal_conversions.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Conversion methods for dates and times.
-
-use chrono::{Duration, NaiveDateTime, NaiveTime};
-
-/// Number of seconds in a day
-const SECONDS_IN_DAY: i64 = 86_400;
-/// Number of milliseconds in a second
-const MILLISECONDS: i64 = 1_000;
-/// Number of microseconds in a second
-const MICROSECONDS: i64 = 1_000_000;
-/// Number of nanoseconds in a second
-const NANOSECONDS: i64 = 1_000_000_000;
-
-/// converts a `i32` representing a `date32` to [`NaiveDateTime`]
-#[inline]
-pub fn date32_to_datetime(v: i32) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0)
-}
-
-/// converts a `i64` representing a `date64` to [`NaiveDateTime`]
-#[inline]
-pub fn date64_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from milliseconds
-        v / MILLISECONDS,
-        // discard extracted seconds and convert milliseconds to nanoseconds
-        (v % MILLISECONDS * MICROSECONDS) as u32,
-    )
-}
-
-/// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`]
-#[inline]
-pub fn time32s_to_time(v: i32) -> NaiveTime {
-    NaiveTime::from_num_seconds_from_midnight(v as u32, 0)
-}
-
-/// converts a `i32` representing a `time32(ms)` to [`NaiveDateTime`]
-#[inline]
-pub fn time32ms_to_time(v: i32) -> NaiveTime {
-    let v = v as i64;
-    NaiveTime::from_num_seconds_from_midnight(
-        // extract seconds from milliseconds
-        (v / MILLISECONDS) as u32,
-        // discard extracted seconds and convert milliseconds to
-        // nanoseconds
-        (v % MILLISECONDS * MICROSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`]
-#[inline]
-pub fn time64us_to_time(v: i64) -> NaiveTime {
-    NaiveTime::from_num_seconds_from_midnight(
-        // extract seconds from microseconds
-        (v / MICROSECONDS) as u32,
-        // discard extracted seconds and convert microseconds to
-        // nanoseconds
-        (v % MICROSECONDS * MILLISECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`]
-#[inline]
-pub fn time64ns_to_time(v: i64) -> NaiveTime {
-    NaiveTime::from_num_seconds_from_midnight(
-        // extract seconds from nanoseconds
-        (v / NANOSECONDS) as u32,
-        // discard extracted seconds
-        (v % NANOSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_s_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(v, 0)
-}
-
-/// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_ms_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from milliseconds
-        v / MILLISECONDS,
-        // discard extracted seconds and convert milliseconds to nanoseconds
-        (v % MILLISECONDS * MICROSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `timestamp(us)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_us_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from microseconds
-        v / MICROSECONDS,
-        // discard extracted seconds and convert microseconds to nanoseconds
-        (v % MICROSECONDS * MILLISECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `timestamp(ns)` to [`NaiveDateTime`]
-#[inline]
-pub fn timestamp_ns_to_datetime(v: i64) -> NaiveDateTime {
-    NaiveDateTime::from_timestamp(
-        // extract seconds from nanoseconds
-        v / NANOSECONDS,
-        // discard extracted seconds
-        (v % NANOSECONDS) as u32,
-    )
-}
-
-/// converts a `i64` representing a `duration(s)` to [`Duration`]
-#[inline]
-pub fn duration_s_to_duration(v: i64) -> Duration {
-    Duration::seconds(v)
-}
-
-/// converts a `i64` representing a `duration(ms)` to [`Duration`]
-#[inline]
-pub fn duration_ms_to_duration(v: i64) -> Duration {
-    Duration::milliseconds(v)
-}
-
-/// converts a `i64` representing a `duration(us)` to [`Duration`]
-#[inline]
-pub fn duration_us_to_duration(v: i64) -> Duration {
-    Duration::microseconds(v)
-}
-
-/// converts a `i64` representing a `duration(ns)` to [`Duration`]
-#[inline]
-pub fn duration_ns_to_duration(v: i64) -> Duration {
-    Duration::nanoseconds(v)
-}
diff --git a/rust/arrow/src/tensor.rs b/rust/arrow/src/tensor.rs
deleted file mode 100644
index 35e45a25c38..00000000000
--- a/rust/arrow/src/tensor.rs
+++ /dev/null
@@ -1,495 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Arrow Tensor Type, defined in
-//! [`format/Tensor.fbs`](https://github.com/apache/arrow/blob/master/format/Tensor.fbs).
-
-use std::marker::PhantomData;
-use std::mem;
-
-use crate::buffer::Buffer;
-use crate::datatypes::*;
-
-use crate::error::{ArrowError, Result};
-
-/// Computes the strides required assuming a row major memory layout
-fn compute_row_major_strides<T: ArrowPrimitiveType>(
-    shape: &[usize],
-) -> Result<Vec<usize>> {
-    let mut remaining_bytes = mem::size_of::<T::Native>();
-
-    for i in shape {
-        if let Some(val) = remaining_bytes.checked_mul(*i) {
-            remaining_bytes = val;
-        } else {
-            return Err(ArrowError::ComputeError(
-                "overflow occurred when computing row major strides.".to_string(),
-            ));
-        }
-    }
-
-    let mut strides = Vec::<usize>::new();
-    for i in shape {
-        remaining_bytes /= *i;
-        strides.push(remaining_bytes);
-    }
-
-    Ok(strides)
-}
-
-/// Computes the strides required assuming a column major memory layout
-fn compute_column_major_strides<T: ArrowPrimitiveType>(
-    shape: &[usize],
-) -> Result<Vec<usize>> {
-    let mut remaining_bytes = mem::size_of::<T::Native>();
-    let mut strides = Vec::<usize>::new();
-
-    for i in shape {
-        strides.push(remaining_bytes);
-
-        if let Some(val) = remaining_bytes.checked_mul(*i) {
-            remaining_bytes = val;
-        } else {
-            return Err(ArrowError::ComputeError(
-                "overflow occurred when computing column major strides.".to_string(),
-            ));
-        }
-    }
-
-    Ok(strides)
-}
-
-/// Tensor of primitive types
-#[derive(Debug)]
-pub struct Tensor<'a, T: ArrowPrimitiveType> {
-    data_type: DataType,
-    buffer: Buffer,
-    shape: Option<Vec<usize>>,
-    strides: Option<Vec<usize>>,
-    names: Option<Vec<&'a str>>,
-    _marker: PhantomData<T>,
-}
-
-pub type BooleanTensor<'a> = Tensor<'a, BooleanType>;
-pub type Int8Tensor<'a> = Tensor<'a, Int8Type>;
-pub type Int16Tensor<'a> = Tensor<'a, Int16Type>;
-pub type Int32Tensor<'a> = Tensor<'a, Int32Type>;
-pub type Int64Tensor<'a> = Tensor<'a, Int64Type>;
-pub type UInt8Tensor<'a> = Tensor<'a, UInt8Type>;
-pub type UInt16Tensor<'a> = Tensor<'a, UInt16Type>;
-pub type UInt32Tensor<'a> = Tensor<'a, UInt32Type>;
-pub type UInt64Tensor<'a> = Tensor<'a, UInt64Type>;
-pub type Float32Tensor<'a> = Tensor<'a, Float32Type>;
-pub type Float64Tensor<'a> = Tensor<'a, Float64Type>;
-
-impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
-    /// Creates a new `Tensor`
-    pub fn try_new(
-        buffer: Buffer,
-        shape: Option<Vec<usize>>,
-        strides: Option<Vec<usize>>,
-        names: Option<Vec<&'a str>>,
-    ) -> Result<Self> {
-        match shape {
-            None => {
-                if buffer.len() != mem::size_of::<T::Native>() {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "underlying buffer should only contain a single tensor element"
-                            .to_string(),
-                    ));
-                }
-
-                if strides != None {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "expected None strides for tensor with no shape".to_string(),
-                    ));
-                }
-
-                if names != None {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "expected None names for tensor with no shape".to_string(),
-                    ));
-                }
-            }
-
-            Some(ref s) => {
-                if let Some(ref st) = strides {
-                    if st.len() != s.len() {
-                        return Err(ArrowError::InvalidArgumentError(
-                            "shape and stride dimensions differ".to_string(),
-                        ));
-                    }
-                }
-
-                if let Some(ref n) = names {
-                    if n.len() != s.len() {
-                        return Err(ArrowError::InvalidArgumentError(
-                            "number of dimensions and number of dimension names differ"
-                                .to_string(),
-                        ));
-                    }
-                }
-
-                let total_elements: usize = s.iter().product();
-                if total_elements != (buffer.len() / mem::size_of::<T::Native>()) {
-                    return Err(ArrowError::InvalidArgumentError(
-                        "number of elements in buffer does not match dimensions"
-                            .to_string(),
-                    ));
-                }
-            }
-        };
-
-        // Checking that the tensor strides used for construction are correct
-        // otherwise a row major stride is calculated and used as value for the tensor
-        let tensor_strides = {
-            if let Some(st) = strides {
-                if let Some(ref s) = shape {
-                    if compute_row_major_strides::<T>(s)? == st
-                        || compute_column_major_strides::<T>(s)? == st
-                    {
-                        Some(st)
-                    } else {
-                        return Err(ArrowError::InvalidArgumentError(
-                            "the input stride does not match the selected shape"
-                                .to_string(),
-                        ));
-                    }
-                } else {
-                    Some(st)
-                }
-            } else if let Some(ref s) = shape {
-                Some(compute_row_major_strides::<T>(s)?)
-            } else {
-                None
-            }
-        };
-
-        Ok(Self {
-            data_type: T::DATA_TYPE,
-            buffer,
-            shape,
-            strides: tensor_strides,
-            names,
-            _marker: PhantomData,
-        })
-    }
-
-    /// Creates a new Tensor using row major memory layout
-    pub fn new_row_major(
-        buffer: Buffer,
-        shape: Option<Vec<usize>>,
-        names: Option<Vec<&'a str>>,
-    ) -> Result<Self> {
-        if let Some(ref s) = shape {
-            let strides = Some(compute_row_major_strides::<T>(&s)?);
-
-            Self::try_new(buffer, shape, strides, names)
-        } else {
-            Err(ArrowError::InvalidArgumentError(
-                "shape required to create row major tensor".to_string(),
-            ))
-        }
-    }
-
-    /// Creates a new Tensor using column major memory layout
-    pub fn new_column_major(
-        buffer: Buffer,
-        shape: Option<Vec<usize>>,
-        names: Option<Vec<&'a str>>,
-    ) -> Result<Self> {
-        if let Some(ref s) = shape {
-            let strides = Some(compute_column_major_strides::<T>(&s)?);
-
-            Self::try_new(buffer, shape, strides, names)
-        } else {
-            Err(ArrowError::InvalidArgumentError(
-                "shape required to create column major tensor".to_string(),
-            ))
-        }
-    }
-
-    /// The data type of the `Tensor`
-    pub fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    /// The sizes of the dimensions
-    pub fn shape(&self) -> Option<&Vec<usize>> {
-        self.shape.as_ref()
-    }
-
-    /// Returns a reference to the underlying `Buffer`
-    pub fn data(&self) -> &Buffer {
-        &self.buffer
-    }
-
-    /// The number of bytes between elements in each dimension
-    pub fn strides(&self) -> Option<&Vec<usize>> {
-        self.strides.as_ref()
-    }
-
-    /// The names of the dimensions
-    pub fn names(&self) -> Option<&Vec<&'a str>> {
-        self.names.as_ref()
-    }
-
-    /// The number of dimensions
-    pub fn ndim(&self) -> usize {
-        match &self.shape {
-            None => 0,
-            Some(v) => v.len(),
-        }
-    }
-
-    /// The name of dimension i
-    pub fn dim_name(&self, i: usize) -> Option<&'a str> {
-        self.names.as_ref().map(|ref names| names[i])
-    }
-
-    /// The total number of elements in the `Tensor`
-    pub fn size(&self) -> usize {
-        match self.shape {
-            None => 0,
-            Some(ref s) => s.iter().product(),
-        }
-    }
-
-    /// Indicates if the data is laid out contiguously in memory
-    pub fn is_contiguous(&self) -> Result<bool> {
-        Ok(self.is_row_major()? || self.is_column_major()?)
-    }
-
-    /// Indicates if the memory layout row major
-    pub fn is_row_major(&self) -> Result<bool> {
-        match self.shape {
-            None => Ok(false),
-            Some(ref s) => Ok(Some(compute_row_major_strides::<T>(s)?) == self.strides),
-        }
-    }
-
-    /// Indicates if the memory layout column major
-    pub fn is_column_major(&self) -> Result<bool> {
-        match self.shape {
-            None => Ok(false),
-            Some(ref s) => {
-                Ok(Some(compute_column_major_strides::<T>(s)?) == self.strides)
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::array::*;
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_compute_row_major_strides() {
-        assert_eq!(
-            vec![48_usize, 8],
-            compute_row_major_strides::<Int64Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![24_usize, 4],
-            compute_row_major_strides::<Int32Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![6_usize, 1],
-            compute_row_major_strides::<Int8Type>(&[4_usize, 6]).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_compute_column_major_strides() {
-        assert_eq!(
-            vec![8_usize, 32],
-            compute_column_major_strides::<Int64Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![4_usize, 16],
-            compute_column_major_strides::<Int32Type>(&[4_usize, 6]).unwrap()
-        );
-        assert_eq!(
-            vec![1_usize, 4],
-            compute_column_major_strides::<Int8Type>(&[4_usize, 6]).unwrap()
-        );
-    }
-
-    #[test]
-    fn test_zero_dim() {
-        let buf = Buffer::from(&[1]);
-        let tensor = UInt8Tensor::try_new(buf, None, None, None).unwrap();
-        assert_eq!(0, tensor.size());
-        assert_eq!(None, tensor.shape());
-        assert_eq!(None, tensor.names());
-        assert_eq!(0, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(false, tensor.is_column_major().unwrap());
-        assert_eq!(false, tensor.is_contiguous().unwrap());
-
-        let buf = Buffer::from(&[1, 2, 2, 2]);
-        let tensor = Int32Tensor::try_new(buf, None, None, None).unwrap();
-        assert_eq!(0, tensor.size());
-        assert_eq!(None, tensor.shape());
-        assert_eq!(None, tensor.names());
-        assert_eq!(0, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(false, tensor.is_column_major().unwrap());
-        assert_eq!(false, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_tensor() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let tensor = Int32Tensor::try_new(buf, Some(vec![2, 8]), None, None).unwrap();
-        assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![32_usize, 4]).as_ref(), tensor.strides());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(None, tensor.names());
-    }
-
-    #[test]
-    fn test_new_row_major() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let tensor = Int32Tensor::new_row_major(buf, Some(vec![2, 8]), None).unwrap();
-        assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![32_usize, 4]).as_ref(), tensor.strides());
-        assert_eq!(None, tensor.names());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(true, tensor.is_row_major().unwrap());
-        assert_eq!(false, tensor.is_column_major().unwrap());
-        assert_eq!(true, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_new_column_major() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let tensor = Int32Tensor::new_column_major(buf, Some(vec![2, 8]), None).unwrap();
-        assert_eq!(16, tensor.size());
-        assert_eq!(Some(vec![2_usize, 8]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![4_usize, 8]).as_ref(), tensor.strides());
-        assert_eq!(None, tensor.names());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(true, tensor.is_column_major().unwrap());
-        assert_eq!(true, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_with_names() {
-        let mut builder = Int64BufferBuilder::new(8);
-        for i in 0..8 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-        let names = vec!["Dim 1", "Dim 2"];
-        let tensor =
-            Int64Tensor::new_column_major(buf, Some(vec![2, 4]), Some(names)).unwrap();
-        assert_eq!(8, tensor.size());
-        assert_eq!(Some(vec![2_usize, 4]).as_ref(), tensor.shape());
-        assert_eq!(Some(vec![8_usize, 16]).as_ref(), tensor.strides());
-        assert_eq!("Dim 1", tensor.dim_name(0).unwrap());
-        assert_eq!("Dim 2", tensor.dim_name(1).unwrap());
-        assert_eq!(2, tensor.ndim());
-        assert_eq!(false, tensor.is_row_major().unwrap());
-        assert_eq!(true, tensor.is_column_major().unwrap());
-        assert_eq!(true, tensor.is_contiguous().unwrap());
-    }
-
-    #[test]
-    fn test_inconsistent_strides() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result =
-            Int32Tensor::try_new(buf, Some(vec![2, 8]), Some(vec![2, 8, 1]), None);
-
-        if result.is_ok() {
-            panic!("shape and stride dimensions are different")
-        }
-    }
-
-    #[test]
-    fn test_inconsistent_names() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result = Int32Tensor::try_new(
-            buf,
-            Some(vec![2, 8]),
-            Some(vec![4, 8]),
-            Some(vec!["1", "2", "3"]),
-        );
-
-        if result.is_ok() {
-            panic!("dimensions and names have different shape")
-        }
-    }
-
-    #[test]
-    fn test_incorrect_shape() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result = Int32Tensor::try_new(buf, Some(vec![2, 6]), None, None);
-
-        if result.is_ok() {
-            panic!("number of elements does not match for the shape")
-        }
-    }
-
-    #[test]
-    fn test_incorrect_stride() {
-        let mut builder = Int32BufferBuilder::new(16);
-        for i in 0..16 {
-            builder.append(i);
-        }
-        let buf = builder.finish();
-
-        let result = Int32Tensor::try_new(buf, Some(vec![2, 8]), Some(vec![30, 4]), None);
-
-        if result.is_ok() {
-            panic!("the input stride does not match the selected shape")
-        }
-    }
-}
diff --git a/rust/arrow/src/util/bench_util.rs b/rust/arrow/src/util/bench_util.rs
deleted file mode 100644
index fd0ece830a1..00000000000
--- a/rust/arrow/src/util/bench_util.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils to make benchmarking easier
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::util::test_util::seedable_rng;
-use rand::Rng;
-use rand::SeedableRng;
-use rand::{
-    distributions::{Alphanumeric, Distribution, Standard},
-    prelude::StdRng,
-};
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
-where
-    T: ArrowPrimitiveType,
-    Standard: Distribution<T::Native>,
-{
-    let mut rng = seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                Some(rng.gen())
-            }
-        })
-        .collect()
-}
-
-pub fn create_primitive_array_with_seed<T>(
-    size: usize,
-    null_density: f32,
-    seed: u64,
-) -> PrimitiveArray<T>
-where
-    T: ArrowPrimitiveType,
-    Standard: Distribution<T::Native>,
-{
-    let mut rng = StdRng::seed_from_u64(seed);
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                Some(rng.gen())
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_boolean_array(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> BooleanArray
-where
-    Standard: Distribution<bool>,
-{
-    let mut rng = seedable_rng();
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                let value = rng.gen::<f32>() < true_density;
-                Some(value)
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_string_array<Offset: StringOffsetSizeTrait>(
-    size: usize,
-    null_density: f32,
-) -> GenericStringArray<Offset> {
-    let rng = &mut seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                let value = rng.sample_iter(&Alphanumeric).take(4).collect::<String>();
-                Some(value)
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) binary array of a given size and null density
-pub fn create_binary_array<Offset: BinaryOffsetSizeTrait>(
-    size: usize,
-    null_density: f32,
-) -> GenericBinaryArray<Offset> {
-    let rng = &mut seedable_rng();
-    let range_rng = &mut seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f32>() < null_density {
-                None
-            } else {
-                let value = rng
-                    .sample_iter::<u8, _>(Standard)
-                    .take(range_rng.gen_range(0, 8))
-                    .collect::<Vec<u8>>();
-                Some(value)
-            }
-        })
-        .collect()
-}
-
-/// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_fsb_array(
-    size: usize,
-    null_density: f32,
-    value_len: usize,
-) -> FixedSizeBinaryArray {
-    let rng = &mut seedable_rng();
-
-    FixedSizeBinaryArray::try_from_sparse_iter((0..size).map(|_| {
-        if rng.gen::<f32>() < null_density {
-            None
-        } else {
-            let value = rng
-                .sample_iter::<u8, _>(Standard)
-                .take(value_len)
-                .collect::<Vec<u8>>();
-            Some(value)
-        }
-    }))
-    .unwrap()
-}
diff --git a/rust/arrow/src/util/bit_chunk_iterator.rs b/rust/arrow/src/util/bit_chunk_iterator.rs
deleted file mode 100644
index b9145b7af86..00000000000
--- a/rust/arrow/src/util/bit_chunk_iterator.rs
+++ /dev/null
@@ -1,257 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-use crate::util::bit_util::ceil;
-use std::fmt::Debug;
-
-#[derive(Debug)]
-pub struct BitChunks<'a> {
-    buffer: &'a [u8],
-    /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive)
-    bit_offset: usize,
-    /// number of complete u64 chunks
-    chunk_len: usize,
-    /// number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
-    remainder_len: usize,
-}
-
-impl<'a> BitChunks<'a> {
-    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
-        assert!(ceil(offset + len, 8) <= buffer.len() * 8);
-
-        let byte_offset = offset / 8;
-        let bit_offset = offset % 8;
-
-        let chunk_bits = 8 * std::mem::size_of::<u64>();
-
-        let chunk_len = len / chunk_bits;
-        let remainder_len = len & (chunk_bits - 1);
-
-        BitChunks::<'a> {
-            buffer: &buffer[byte_offset..],
-            bit_offset,
-            chunk_len,
-            remainder_len,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct BitChunkIterator<'a> {
-    buffer: &'a [u8],
-    bit_offset: usize,
-    chunk_len: usize,
-    index: usize,
-}
-
-impl<'a> BitChunks<'a> {
-    /// Returns the number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
-    #[inline]
-    pub const fn remainder_len(&self) -> usize {
-        self.remainder_len
-    }
-
-    /// Returns the number of chunks
-    #[inline]
-    pub const fn chunk_len(&self) -> usize {
-        self.chunk_len
-    }
-
-    /// Returns the bitmask of remaining bits
-    #[inline]
-    pub fn remainder_bits(&self) -> u64 {
-        let bit_len = self.remainder_len;
-        if bit_len == 0 {
-            0
-        } else {
-            let bit_offset = self.bit_offset;
-            // number of bytes to read
-            // might be one more than sizeof(u64) if the offset is in the middle of a byte
-            let byte_len = ceil(bit_len + bit_offset, 8);
-            // pointer to remainder bytes after all complete chunks
-            let base = unsafe {
-                self.buffer
-                    .as_ptr()
-                    .add(self.chunk_len * std::mem::size_of::<u64>())
-            };
-
-            let mut bits = unsafe { std::ptr::read(base) } as u64 >> bit_offset;
-            for i in 1..byte_len {
-                let byte = unsafe { std::ptr::read(base.add(i)) };
-                bits |= (byte as u64) << (i * 8 - bit_offset);
-            }
-
-            bits & ((1 << bit_len) - 1)
-        }
-    }
-
-    /// Returns an iterator over chunks of 64 bits represented as an u64
-    #[inline]
-    pub const fn iter(&self) -> BitChunkIterator<'a> {
-        BitChunkIterator::<'a> {
-            buffer: self.buffer,
-            bit_offset: self.bit_offset,
-            chunk_len: self.chunk_len,
-            index: 0,
-        }
-    }
-}
-
-impl<'a> IntoIterator for BitChunks<'a> {
-    type Item = u64;
-    type IntoIter = BitChunkIterator<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.iter()
-    }
-}
-
-impl Iterator for BitChunkIterator<'_> {
-    type Item = u64;
-
-    #[inline]
-    fn next(&mut self) -> Option<u64> {
-        let index = self.index;
-        if index >= self.chunk_len {
-            return None;
-        }
-
-        // cast to *const u64 should be fine since we are using read_unaligned below
-        #[allow(clippy::cast_ptr_alignment)]
-        let raw_data = self.buffer.as_ptr() as *const u64;
-
-        // bit-packed buffers are stored starting with the least-significant byte first
-        // so when reading as u64 on a big-endian machine, the bytes need to be swapped
-        let current = unsafe { std::ptr::read_unaligned(raw_data.add(index)).to_le() };
-
-        let combined = if self.bit_offset == 0 {
-            current
-        } else {
-            let next =
-                unsafe { std::ptr::read_unaligned(raw_data.add(index + 1)).to_le() };
-
-            current >> self.bit_offset
-                | (next & ((1 << self.bit_offset) - 1)) << (64 - self.bit_offset)
-        };
-
-        self.index = index + 1;
-
-        Some(combined)
-    }
-
-    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.chunk_len - self.index,
-            Some(self.chunk_len - self.index),
-        )
-    }
-}
-
-impl ExactSizeIterator for BitChunkIterator<'_> {
-    #[inline]
-    fn len(&self) -> usize {
-        self.chunk_len - self.index
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_iter_aligned() {
-        let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(0, 64);
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(vec![0x0706050403020100], result);
-    }
-
-    #[test]
-    fn test_iter_unaligned() {
-        let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(4, 64);
-
-        assert_eq!(0, bitchunks.remainder_len());
-        assert_eq!(0, bitchunks.remainder_bits());
-
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(
-            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
-            result
-        );
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_1_byte() {
-        let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(4, 66);
-
-        assert_eq!(2, bitchunks.remainder_len());
-        assert_eq!(0b00000011, bitchunks.remainder_bits());
-
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(
-            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
-            result
-        );
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_bits_across_bytes() {
-        let input: &[u8] = &[0b00111111, 0b11111100];
-        let buffer: Buffer = Buffer::from(input);
-
-        // remainder contains bits from both bytes
-        // result should be the highest 2 bits from first byte followed by lowest 5 bits of second bytes
-        let bitchunks = buffer.bit_chunks(6, 7);
-
-        assert_eq!(7, bitchunks.remainder_len());
-        assert_eq!(0b1110000, bitchunks.remainder_bits());
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_bits_large() {
-        let input: &[u8] = &[
-            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
-            0b11111111, 0b00000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(2, 63);
-
-        assert_eq!(63, bitchunks.remainder_len());
-        assert_eq!(
-            0b100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111,
-            bitchunks.remainder_bits()
-        );
-    }
-}
diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs
deleted file mode 100644
index 9fa8813e952..00000000000
--- a/rust/arrow/src/util/bit_util.rs
+++ /dev/null
@@ -1,322 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils for working with bits
-
-#[cfg(feature = "simd")]
-use packed_simd::u8x64;
-
-const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
-const UNSET_BIT_MASK: [u8; 8] = [
-    255 - 1,
-    255 - 2,
-    255 - 4,
-    255 - 8,
-    255 - 16,
-    255 - 32,
-    255 - 64,
-    255 - 128,
-];
-
-/// Returns the nearest number that is `>=` than `num` and is a multiple of 64
-#[inline]
-pub fn round_upto_multiple_of_64(num: usize) -> usize {
-    round_upto_power_of_2(num, 64)
-}
-
-/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
-/// be a power of 2.
-pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
-    debug_assert!(factor > 0 && (factor & (factor - 1)) == 0);
-    (num + (factor - 1)) & !(factor - 1)
-}
-
-/// Returns whether bit at position `i` in `data` is set or not
-#[inline]
-pub fn get_bit(data: &[u8], i: usize) -> bool {
-    (data[i >> 3] & BIT_MASK[i & 7]) != 0
-}
-
-/// Returns whether bit at position `i` in `data` is set or not.
-///
-/// # Safety
-///
-/// Note this doesn't do any bound checking, for performance reason. The caller is
-/// responsible to guarantee that `i` is within bounds.
-#[inline]
-pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
-    (*data.add(i >> 3) & BIT_MASK[i & 7]) != 0
-}
-
-/// Sets bit at position `i` for `data`
-#[inline]
-pub fn set_bit(data: &mut [u8], i: usize) {
-    data[i >> 3] |= BIT_MASK[i & 7];
-}
-
-/// Sets bit at position `i` for `data`
-///
-/// # Safety
-///
-/// Note this doesn't do any bound checking, for performance reason. The caller is
-/// responsible to guarantee that `i` is within bounds.
-#[inline]
-pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
-    *data.add(i >> 3) |= BIT_MASK[i & 7];
-}
-
-/// Sets bit at position `i` for `data` to 0
-#[inline]
-pub fn unset_bit(data: &mut [u8], i: usize) {
-    data[i >> 3] &= UNSET_BIT_MASK[i & 7];
-}
-
-/// Sets bit at position `i` for `data` to 0
-///
-/// # Safety
-///
-/// Note this doesn't do any bound checking, for performance reason. The caller is
-/// responsible to guarantee that `i` is within bounds.
-#[inline]
-pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
-    *data.add(i >> 3) &= UNSET_BIT_MASK[i & 7];
-}
-
-/// Returns the ceil of `value`/`divisor`
-#[inline]
-pub fn ceil(value: usize, divisor: usize) -> usize {
-    let (quot, rem) = (value / divisor, value % divisor);
-    if rem > 0 && divisor > 0 {
-        quot + 1
-    } else {
-        quot
-    }
-}
-
-/// Performs SIMD bitwise binary operations.
-///
-/// # Safety
-///
-/// Note that each slice should be 64 bytes and it is the callers responsibility to ensure
-/// that this is the case.  If passed slices larger than 64 bytes the operation will only
-/// be performed on the first 64 bytes.  Slices less than 64 bytes will panic.
-#[cfg(simd)]
-pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut [u8], op: F)
-where
-    F: Fn(u8x64, u8x64) -> u8x64,
-{
-    let left_simd = u8x64::from_slice_unaligned_unchecked(left);
-    let right_simd = u8x64::from_slice_unaligned_unchecked(right);
-    let simd_result = op(left_simd, right_simd);
-    simd_result.write_to_slice_unaligned_unchecked(result);
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::HashSet;
-
-    use super::*;
-    use crate::util::test_util::seedable_rng;
-    use rand::Rng;
-
-    #[test]
-    fn test_round_upto_multiple_of_64() {
-        assert_eq!(0, round_upto_multiple_of_64(0));
-        assert_eq!(64, round_upto_multiple_of_64(1));
-        assert_eq!(64, round_upto_multiple_of_64(63));
-        assert_eq!(64, round_upto_multiple_of_64(64));
-        assert_eq!(128, round_upto_multiple_of_64(65));
-        assert_eq!(192, round_upto_multiple_of_64(129));
-    }
-
-    #[test]
-    fn test_get_bit() {
-        // 00001101
-        assert_eq!(true, get_bit(&[0b00001101], 0));
-        assert_eq!(false, get_bit(&[0b00001101], 1));
-        assert_eq!(true, get_bit(&[0b00001101], 2));
-        assert_eq!(true, get_bit(&[0b00001101], 3));
-
-        // 01001001 01010010
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 0));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 1));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 2));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 3));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 4));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 5));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 6));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 7));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 8));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 9));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 10));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 11));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 12));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 13));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 14));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 15));
-    }
-
-    #[test]
-    fn test_get_bit_raw() {
-        const NUM_BYTE: usize = 10;
-        let mut buf = vec![0; NUM_BYTE];
-        let mut expected = vec![];
-        let mut rng = seedable_rng();
-        for i in 0..8 * NUM_BYTE {
-            let b = rng.gen_bool(0.5);
-            expected.push(b);
-            if b {
-                set_bit(&mut buf[..], i)
-            }
-        }
-
-        let raw_ptr = buf.as_ptr();
-        for (i, b) in expected.iter().enumerate() {
-            unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_set_bit() {
-        let mut b = [0b00000010];
-        set_bit(&mut b, 0);
-        assert_eq!([0b00000011], b);
-        set_bit(&mut b, 1);
-        assert_eq!([0b00000011], b);
-        set_bit(&mut b, 7);
-        assert_eq!([0b10000011], b);
-    }
-
-    #[test]
-    fn test_unset_bit() {
-        let mut b = [0b11111101];
-        unset_bit(&mut b, 0);
-        assert_eq!([0b11111100], b);
-        unset_bit(&mut b, 1);
-        assert_eq!([0b11111100], b);
-        unset_bit(&mut b, 7);
-        assert_eq!([0b01111100], b);
-    }
-
-    #[test]
-    fn test_set_bit_raw() {
-        const NUM_BYTE: usize = 10;
-        let mut buf = vec![0; NUM_BYTE];
-        let mut expected = vec![];
-        let mut rng = seedable_rng();
-        for i in 0..8 * NUM_BYTE {
-            let b = rng.gen_bool(0.5);
-            expected.push(b);
-            if b {
-                unsafe {
-                    set_bit_raw(buf.as_mut_ptr(), i);
-                }
-            }
-        }
-
-        let raw_ptr = buf.as_ptr();
-        for (i, b) in expected.iter().enumerate() {
-            unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_unset_bit_raw() {
-        const NUM_BYTE: usize = 10;
-        let mut buf = vec![255; NUM_BYTE];
-        let mut expected = vec![];
-        let mut rng = seedable_rng();
-        for i in 0..8 * NUM_BYTE {
-            let b = rng.gen_bool(0.5);
-            expected.push(b);
-            if !b {
-                unsafe {
-                    unset_bit_raw(buf.as_mut_ptr(), i);
-                }
-            }
-        }
-
-        let raw_ptr = buf.as_ptr();
-        for (i, b) in expected.iter().enumerate() {
-            unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_get_set_bit_roundtrip() {
-        const NUM_BYTES: usize = 10;
-        const NUM_SETS: usize = 10;
-
-        let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8];
-        let mut v = HashSet::new();
-        let mut rng = seedable_rng();
-        for _ in 0..NUM_SETS {
-            let offset = rng.gen_range(0, 8 * NUM_BYTES);
-            v.insert(offset);
-            set_bit(&mut buffer[..], offset);
-        }
-        for i in 0..NUM_BYTES * 8 {
-            assert_eq!(v.contains(&i), get_bit(&buffer[..], i));
-        }
-    }
-
-    #[test]
-    #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))]
-    fn test_ceil() {
-        assert_eq!(ceil(0, 1), 0);
-        assert_eq!(ceil(1, 1), 1);
-        assert_eq!(ceil(1, 2), 1);
-        assert_eq!(ceil(1, 8), 1);
-        assert_eq!(ceil(7, 8), 1);
-        assert_eq!(ceil(8, 8), 1);
-        assert_eq!(ceil(9, 8), 2);
-        assert_eq!(ceil(9, 9), 1);
-        assert_eq!(ceil(10000000000, 10), 1000000000);
-        assert_eq!(ceil(10, 10000000000), 1);
-        assert_eq!(ceil(10000000000, 1000000000), 10);
-    }
-
-    #[test]
-    #[cfg(simd)]
-    fn test_bitwise_and_simd() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a & b) };
-        for i in buf3.iter() {
-            assert_eq!(&0b00110000u8, i);
-        }
-    }
-
-    #[test]
-    #[cfg(simd)]
-    fn test_bitwise_or_simd() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a | b) };
-        for i in buf3.iter() {
-            assert_eq!(&0b11110011u8, i);
-        }
-    }
-}
diff --git a/rust/arrow/src/util/data_gen.rs b/rust/arrow/src/util/data_gen.rs
deleted file mode 100644
index cd1f25efea0..00000000000
--- a/rust/arrow/src/util/data_gen.rs
+++ /dev/null
@@ -1,347 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities to generate random arrays and batches
-
-use std::{convert::TryFrom, sync::Arc};
-
-use rand::{distributions::uniform::SampleUniform, Rng};
-
-use crate::error::{ArrowError, Result};
-use crate::record_batch::{RecordBatch, RecordBatchOptions};
-use crate::{array::*, datatypes::SchemaRef};
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    datatypes::*,
-};
-
-use super::{bench_util::*, bit_util, test_util::seedable_rng};
-
-/// Create a random [RecordBatch] from a schema
-pub fn create_random_batch(
-    schema: SchemaRef,
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<RecordBatch> {
-    let columns = schema
-        .fields()
-        .iter()
-        .map(|field| create_random_array(field, size, null_density, true_density))
-        .collect::<Result<Vec<ArrayRef>>>()?;
-
-    RecordBatch::try_new_with_options(
-        schema,
-        columns,
-        &RecordBatchOptions {
-            match_field_names: false,
-        },
-    )
-}
-
-/// Create a random [ArrayRef] from a [DataType] with a length,
-/// null density and true density (for [BooleanArray]).
-pub fn create_random_array(
-    field: &Field,
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<ArrayRef> {
-    // Override null density with 0.0 if the array is non-nullable
-    let null_density = match field.is_nullable() {
-        true => null_density,
-        false => 0.0,
-    };
-    use DataType::*;
-    Ok(match field.data_type() {
-        Null => Arc::new(NullArray::new(size)) as ArrayRef,
-        Boolean => Arc::new(create_boolean_array(size, null_density, true_density)),
-        Int8 => Arc::new(create_primitive_array::<Int8Type>(size, null_density)),
-        Int16 => Arc::new(create_primitive_array::<Int16Type>(size, null_density)),
-        Int32 => Arc::new(create_primitive_array::<Int32Type>(size, null_density)),
-        Int64 => Arc::new(create_primitive_array::<Int64Type>(size, null_density)),
-        UInt8 => Arc::new(create_primitive_array::<UInt8Type>(size, null_density)),
-        UInt16 => Arc::new(create_primitive_array::<UInt16Type>(size, null_density)),
-        UInt32 => Arc::new(create_primitive_array::<UInt32Type>(size, null_density)),
-        UInt64 => Arc::new(create_primitive_array::<UInt64Type>(size, null_density)),
-        Float16 => {
-            return Err(ArrowError::NotYetImplemented(
-                "Float16 is not implememted".to_string(),
-            ))
-        }
-        Float32 => Arc::new(create_primitive_array::<Float32Type>(size, null_density)),
-        Float64 => Arc::new(create_primitive_array::<Float64Type>(size, null_density)),
-        Timestamp(_, _) => {
-            let int64_array =
-                Arc::new(create_primitive_array::<Int64Type>(size, null_density))
-                    as ArrayRef;
-            return crate::compute::cast(&int64_array, field.data_type());
-        }
-        Date32 => Arc::new(create_primitive_array::<Date32Type>(size, null_density)),
-        Date64 => Arc::new(create_primitive_array::<Date64Type>(size, null_density)),
-        Time32(unit) => match unit {
-            TimeUnit::Second => Arc::new(create_primitive_array::<Time32SecondType>(
-                size,
-                null_density,
-            )) as ArrayRef,
-            TimeUnit::Millisecond => Arc::new(create_primitive_array::<
-                Time32MillisecondType,
-            >(size, null_density)),
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Unsupported unit {:?} for Time32",
-                    unit
-                )))
-            }
-        },
-        Time64(unit) => match unit {
-            TimeUnit::Microsecond => Arc::new(create_primitive_array::<
-                Time64MicrosecondType,
-            >(size, null_density)) as ArrayRef,
-            TimeUnit::Nanosecond => Arc::new(create_primitive_array::<
-                Time64NanosecondType,
-            >(size, null_density)),
-            _ => {
-                return Err(ArrowError::InvalidArgumentError(format!(
-                    "Unsupported unit {:?} for Time64",
-                    unit
-                )))
-            }
-        },
-        Utf8 => Arc::new(create_string_array::<i32>(size, null_density)),
-        LargeUtf8 => Arc::new(create_string_array::<i64>(size, null_density)),
-        Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
-        LargeBinary => Arc::new(create_binary_array::<i64>(size, null_density)),
-        FixedSizeBinary(len) => {
-            Arc::new(create_fsb_array(size, null_density, *len as usize))
-        }
-        List(_) => create_random_list_array(field, size, null_density, true_density)?,
-        LargeList(_) => {
-            create_random_list_array(field, size, null_density, true_density)?
-        }
-        Struct(fields) => Arc::new(StructArray::try_from(
-            fields
-                .iter()
-                .map(|struct_field| {
-                    create_random_array(struct_field, size, null_density, true_density)
-                        .map(|array_ref| (struct_field.name().as_str(), array_ref))
-                })
-                .collect::<Result<Vec<(&str, ArrayRef)>>>()?,
-        )?),
-        other => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Generating random arrays not yet implemented for {:?}",
-                other
-            )))
-        }
-    })
-}
-
-#[inline]
-fn create_random_list_array(
-    field: &Field,
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<ArrayRef> {
-    // Override null density with 0.0 if the array is non-nullable
-    let null_density = match field.is_nullable() {
-        true => null_density,
-        false => 0.0,
-    };
-    let list_field;
-    let (offsets, child_len) = match field.data_type() {
-        DataType::List(f) => {
-            let (offsets, child_len) = create_random_offsets::<i32>(size, 0, 5);
-            list_field = f;
-            (Buffer::from(offsets.to_byte_slice()), child_len as usize)
-        }
-        DataType::LargeList(f) => {
-            let (offsets, child_len) = create_random_offsets::<i64>(size, 0, 5);
-            list_field = f;
-            (Buffer::from(offsets.to_byte_slice()), child_len as usize)
-        }
-        _ => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot create list array for field {:?}",
-                field
-            )))
-        }
-    };
-
-    // Create list's child data
-    let child_array =
-        create_random_array(list_field, child_len as usize, null_density, true_density)?;
-    let child_data = child_array.data();
-    // Create list's null buffers, if it is nullable
-    let null_buffer = match field.is_nullable() {
-        true => Some(create_random_null_buffer(size, null_density)),
-        false => None,
-    };
-    let list_data = ArrayData::new(
-        field.data_type().clone(),
-        size,
-        None,
-        null_buffer,
-        0,
-        vec![offsets],
-        vec![child_data.clone()],
-    );
-    Ok(make_array(list_data))
-}
-
-/// Generate random offsets for list arrays
-fn create_random_offsets<T: OffsetSizeTrait + SampleUniform>(
-    size: usize,
-    min: T,
-    max: T,
-) -> (Vec<T>, T) {
-    let rng = &mut seedable_rng();
-
-    let mut current_offset = T::zero();
-
-    let mut offsets = Vec::with_capacity(size + 1);
-    offsets.push(current_offset);
-
-    (0..size).for_each(|_| {
-        current_offset += rng.gen_range(min, max);
-        offsets.push(current_offset);
-    });
-
-    (offsets, current_offset)
-}
-
-fn create_random_null_buffer(size: usize, null_density: f32) -> Buffer {
-    let mut rng = seedable_rng();
-    let mut mut_buf = MutableBuffer::new_null(size);
-    {
-        let mut_slice = mut_buf.as_slice_mut();
-        (0..size).for_each(|i| {
-            if rng.gen::<f32>() >= null_density {
-                bit_util::set_bit(mut_slice, i)
-            }
-        })
-    };
-    mut_buf.into()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_create_batch() {
-        let size = 32;
-        let fields = vec![Field::new("a", DataType::Int32, true)];
-        let schema = Schema::new(fields);
-        let schema_ref = Arc::new(schema);
-        let batch = create_random_batch(schema_ref.clone(), size, 0.35, 0.7).unwrap();
-
-        assert_eq!(batch.schema(), schema_ref);
-        assert_eq!(batch.num_columns(), schema_ref.fields().len());
-        for array in batch.columns() {
-            assert_eq!(array.len(), size);
-        }
-    }
-
-    #[test]
-    fn test_create_batch_non_null() {
-        let size = 32;
-        let fields = vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new(
-                "b",
-                DataType::List(Box::new(Field::new("item", DataType::LargeUtf8, true))),
-                false,
-            ),
-            Field::new("a", DataType::Int32, false),
-        ];
-        let schema = Schema::new(fields);
-        let schema_ref = Arc::new(schema);
-        let batch = create_random_batch(schema_ref.clone(), size, 0.35, 0.7).unwrap();
-
-        assert_eq!(batch.schema(), schema_ref);
-        assert_eq!(batch.num_columns(), schema_ref.fields().len());
-        for array in batch.columns() {
-            assert_eq!(array.null_count(), 0);
-        }
-        // Test that the list's child values are non-null
-        let b_array = batch.column(1);
-        let list_array = b_array.as_any().downcast_ref::<ListArray>().unwrap();
-        let child_array = make_array(list_array.data().child_data()[0].clone());
-        assert_eq!(child_array.null_count(), 0);
-        // There should be more values than the list, to show that it's a list
-        assert!(child_array.len() > list_array.len());
-    }
-
-    #[test]
-    fn test_create_struct_array() {
-        let size = 32;
-        let struct_fields = vec![
-            Field::new("b", DataType::Boolean, true),
-            Field::new(
-                "c",
-                DataType::LargeList(Box::new(Field::new(
-                    "item",
-                    DataType::List(Box::new(Field::new(
-                        "item",
-                        DataType::FixedSizeBinary(6),
-                        true,
-                    ))),
-                    false,
-                ))),
-                true,
-            ),
-            Field::new(
-                "d",
-                DataType::Struct(vec![
-                    Field::new("d_x", DataType::Int32, true),
-                    Field::new("d_y", DataType::Float32, false),
-                    Field::new("d_z", DataType::Binary, true),
-                ]),
-                true,
-            ),
-        ];
-        let field = Field::new("struct", DataType::Struct(struct_fields), true);
-        let array = create_random_array(&field, size, 0.2, 0.5).unwrap();
-
-        assert_eq!(array.len(), 32);
-        let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(struct_array.columns().len(), 3);
-
-        // Test that the nested list makes sense,
-        // i.e. its children's values are more than the parent, to show repetition
-        let col_c = struct_array.column_by_name("c").unwrap();
-        let col_c = col_c.as_any().downcast_ref::<LargeListArray>().unwrap();
-        assert_eq!(col_c.len(), size);
-        let col_c_values = col_c.values();
-        assert!(col_c_values.len() > size);
-        // col_c_values should be a list
-        let col_c_list = col_c_values.as_any().downcast_ref::<ListArray>().unwrap();
-        // Its values should be FixedSizeBinary(6)
-        let fsb = col_c_list.values();
-        assert_eq!(fsb.data_type(), &DataType::FixedSizeBinary(6));
-        assert!(fsb.len() > col_c_list.len());
-
-        // Test nested struct
-        let col_d = struct_array.column_by_name("d").unwrap();
-        let col_d = col_d.as_any().downcast_ref::<StructArray>().unwrap();
-        let col_d_y = col_d.column_by_name("d_y").unwrap();
-        assert_eq!(col_d_y.data_type(), &DataType::Float32);
-        assert_eq!(col_d_y.null_count(), 0);
-    }
-}
diff --git a/rust/arrow/src/util/display.rs b/rust/arrow/src/util/display.rs
deleted file mode 100644
index e40ababd233..00000000000
--- a/rust/arrow/src/util/display.rs
+++ /dev/null
@@ -1,298 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Functions for printing array values, as strings, for debugging
-//! purposes. See the `pretty` crate for additional functions for
-//! record batch pretty printing.
-
-use crate::array::Array;
-use crate::datatypes::{
-    ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
-    Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-};
-use crate::{array, datatypes::IntervalUnit};
-
-use array::DictionaryArray;
-
-use crate::error::{ArrowError, Result};
-
-macro_rules! make_string {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array.value($row).to_string()
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_interval_year_month {
-    ($column: ident, $row: ident) => {{
-        let array = $column
-            .as_any()
-            .downcast_ref::<array::IntervalYearMonthArray>()
-            .unwrap();
-
-        let s = if array.is_null($row) {
-            "NULL".to_string()
-        } else {
-            let interval = array.value($row) as f64;
-            let years = (interval / 12_f64).floor();
-            let month = interval - (years * 12_f64);
-
-            format!(
-                "{} years {} mons 0 days 0 hours 0 mins 0.00 secs",
-                years, month,
-            )
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_interval_day_time {
-    ($column: ident, $row: ident) => {{
-        let array = $column
-            .as_any()
-            .downcast_ref::<array::IntervalDayTimeArray>()
-            .unwrap();
-
-        let s = if array.is_null($row) {
-            "NULL".to_string()
-        } else {
-            let value: u64 = array.value($row) as u64;
-
-            let days_parts: i32 = ((value & 0xFFFFFFFF00000000) >> 32) as i32;
-            let milliseconds_part: i32 = (value & 0xFFFFFFFF) as i32;
-
-            let secs = milliseconds_part / 1000;
-            let mins = secs / 60;
-            let hours = mins / 60;
-
-            let secs = secs - (mins * 60);
-            let mins = mins - (hours * 60);
-
-            format!(
-                "0 years 0 mons {} days {} hours {} mins {}.{:02} secs",
-                days_parts,
-                hours,
-                mins,
-                secs,
-                (milliseconds_part % 1000),
-            )
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_date {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array
-                .value_as_date($row)
-                .map(|d| d.to_string())
-                .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_time {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array
-                .value_as_time($row)
-                .map(|d| d.to_string())
-                .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_datetime {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            array
-                .value_as_datetime($row)
-                .map(|d| d.to_string())
-                .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
-        };
-
-        Ok(s)
-    }};
-}
-
-// It's not possible to do array.value($row).to_string() for &[u8], let's format it as hex
-macro_rules! make_string_hex {
-    ($array_type:ty, $column: ident, $row: ident) => {{
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-
-        let s = if array.is_null($row) {
-            "".to_string()
-        } else {
-            let mut tmp = "".to_string();
-
-            for character in array.value($row) {
-                tmp += &format!("{:02x}", character);
-            }
-
-            tmp
-        };
-
-        Ok(s)
-    }};
-}
-
-macro_rules! make_string_from_list {
-    ($column: ident, $row: ident) => {{
-        let list = $column
-            .as_any()
-            .downcast_ref::<array::ListArray>()
-            .ok_or(ArrowError::InvalidArgumentError(format!(
-                "Repl error: could not convert list column to list array."
-            )))?
-            .value($row);
-        let string_values = (0..list.len())
-            .map(|i| array_value_to_string(&list.clone(), i))
-            .collect::<Result<Vec<String>>>()?;
-        Ok(format!("[{}]", string_values.join(", ")))
-    }};
-}
-
-/// Get the value at the given row in an array as a String.
-///
-/// Note this function is quite inefficient and is unlikely to be
-/// suitable for converting large arrays or record batches.
-pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<String> {
-    if column.is_null(row) {
-        return Ok("".to_string());
-    }
-    match column.data_type() {
-        DataType::Utf8 => make_string!(array::StringArray, column, row),
-        DataType::LargeUtf8 => make_string!(array::LargeStringArray, column, row),
-        DataType::Binary => make_string_hex!(array::BinaryArray, column, row),
-        DataType::LargeBinary => make_string_hex!(array::LargeBinaryArray, column, row),
-        DataType::Boolean => make_string!(array::BooleanArray, column, row),
-        DataType::Int8 => make_string!(array::Int8Array, column, row),
-        DataType::Int16 => make_string!(array::Int16Array, column, row),
-        DataType::Int32 => make_string!(array::Int32Array, column, row),
-        DataType::Int64 => make_string!(array::Int64Array, column, row),
-        DataType::UInt8 => make_string!(array::UInt8Array, column, row),
-        DataType::UInt16 => make_string!(array::UInt16Array, column, row),
-        DataType::UInt32 => make_string!(array::UInt32Array, column, row),
-        DataType::UInt64 => make_string!(array::UInt64Array, column, row),
-        DataType::Float16 => make_string!(array::Float32Array, column, row),
-        DataType::Float32 => make_string!(array::Float32Array, column, row),
-        DataType::Float64 => make_string!(array::Float64Array, column, row),
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
-            make_string_datetime!(array::TimestampSecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
-            make_string_datetime!(array::TimestampMillisecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
-            make_string_datetime!(array::TimestampMicrosecondArray, column, row)
-        }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
-            make_string_datetime!(array::TimestampNanosecondArray, column, row)
-        }
-        DataType::Date32 => make_string_date!(array::Date32Array, column, row),
-        DataType::Date64 => make_string_date!(array::Date64Array, column, row),
-        DataType::Time32(unit) if *unit == TimeUnit::Second => {
-            make_string_time!(array::Time32SecondArray, column, row)
-        }
-        DataType::Time32(unit) if *unit == TimeUnit::Millisecond => {
-            make_string_time!(array::Time32MillisecondArray, column, row)
-        }
-        DataType::Time64(unit) if *unit == TimeUnit::Microsecond => {
-            make_string_time!(array::Time64MicrosecondArray, column, row)
-        }
-        DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
-            make_string_time!(array::Time64NanosecondArray, column, row)
-        }
-        DataType::Interval(unit) => match unit {
-            IntervalUnit::DayTime => {
-                make_string_interval_day_time!(column, row)
-            }
-            IntervalUnit::YearMonth => {
-                make_string_interval_year_month!(column, row)
-            }
-        },
-        DataType::List(_) => make_string_from_list!(column, row),
-        DataType::Dictionary(index_type, _value_type) => match **index_type {
-            DataType::Int8 => dict_array_value_to_string::<Int8Type>(column, row),
-            DataType::Int16 => dict_array_value_to_string::<Int16Type>(column, row),
-            DataType::Int32 => dict_array_value_to_string::<Int32Type>(column, row),
-            DataType::Int64 => dict_array_value_to_string::<Int64Type>(column, row),
-            DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column, row),
-            DataType::UInt16 => dict_array_value_to_string::<UInt16Type>(column, row),
-            DataType::UInt32 => dict_array_value_to_string::<UInt32Type>(column, row),
-            DataType::UInt64 => dict_array_value_to_string::<UInt64Type>(column, row),
-            _ => Err(ArrowError::InvalidArgumentError(format!(
-                "Pretty printing not supported for {:?} due to index type",
-                column.data_type()
-            ))),
-        },
-        _ => Err(ArrowError::InvalidArgumentError(format!(
-            "Pretty printing not implemented for {:?} type",
-            column.data_type()
-        ))),
-    }
-}
-
-/// Converts the value of the dictionary array at `row` to a String
-fn dict_array_value_to_string<K: ArrowPrimitiveType>(
-    colum: &array::ArrayRef,
-    row: usize,
-) -> Result<String> {
-    let dict_array = colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    let keys_array = dict_array.keys_array();
-
-    if keys_array.is_null(row) {
-        return Ok(String::from(""));
-    }
-
-    let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
-        ArrowError::InvalidArgumentError(format!(
-            "Can not convert value {:?} at index {:?} to usize for string conversion.",
-            keys_array.value(row),
-            row
-        ))
-    })?;
-
-    array_value_to_string(&dict_array.values(), dict_index)
-}
diff --git a/rust/arrow/src/util/integration_util.rs b/rust/arrow/src/util/integration_util.rs
deleted file mode 100644
index ec2c294cb4e..00000000000
--- a/rust/arrow/src/util/integration_util.rs
+++ /dev/null
@@ -1,957 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils for JSON integration testing
-//!
-//! These utilities define structs that read the integration JSON format for integration testing purposes.
-
-use serde_derive::{Deserialize, Serialize};
-use serde_json::{Map as SJMap, Number as VNumber, Value};
-
-use crate::array::*;
-use crate::datatypes::*;
-use crate::error::Result;
-use crate::record_batch::{RecordBatch, RecordBatchReader};
-
-/// A struct that represents an Arrow file with a schema and record batches
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJson {
-    pub schema: ArrowJsonSchema,
-    pub batches: Vec<ArrowJsonBatch>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
-}
-
-/// A struct that partially reads the Arrow JSON schema.
-///
-/// Fields are left as JSON `Value` as they vary by `DataType`
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonSchema {
-    pub fields: Vec<ArrowJsonField>,
-}
-
-/// Fields are left as JSON `Value` as they vary by `DataType`
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonField {
-    pub name: String,
-    #[serde(rename = "type")]
-    pub field_type: Value,
-    pub nullable: bool,
-    pub children: Vec<ArrowJsonField>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dictionary: Option<ArrowJsonFieldDictionary>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<Value>,
-}
-
-impl From<&Field> for ArrowJsonField {
-    fn from(field: &Field) -> Self {
-        let metadata_value = match field.metadata() {
-            Some(kv_list) => {
-                let mut array = Vec::new();
-                for (k, v) in kv_list {
-                    let mut kv_map = SJMap::new();
-                    kv_map.insert(k.clone(), Value::String(v.clone()));
-                    array.push(Value::Object(kv_map));
-                }
-                if !array.is_empty() {
-                    Some(Value::Array(array))
-                } else {
-                    None
-                }
-            }
-            _ => None,
-        };
-
-        Self {
-            name: field.name().to_string(),
-            field_type: field.data_type().to_json(),
-            nullable: field.is_nullable(),
-            children: vec![],
-            dictionary: None, // TODO: not enough info
-            metadata: metadata_value,
-        }
-    }
-}
-
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonFieldDictionary {
-    pub id: i64,
-    #[serde(rename = "indexType")]
-    pub index_type: DictionaryIndexType,
-    #[serde(rename = "isOrdered")]
-    pub is_ordered: bool,
-}
-
-#[derive(Deserialize, Serialize, Debug)]
-pub struct DictionaryIndexType {
-    pub name: String,
-    #[serde(rename = "isSigned")]
-    pub is_signed: bool,
-    #[serde(rename = "bitWidth")]
-    pub bit_width: i64,
-}
-
-/// A struct that partially reads the Arrow JSON record batch
-#[derive(Deserialize, Serialize, Debug)]
-pub struct ArrowJsonBatch {
-    count: usize,
-    pub columns: Vec<ArrowJsonColumn>,
-}
-
-/// A struct that partially reads the Arrow JSON dictionary batch
-#[derive(Deserialize, Serialize, Debug)]
-#[allow(non_snake_case)]
-pub struct ArrowJsonDictionaryBatch {
-    pub id: i64,
-    pub data: ArrowJsonBatch,
-}
-
-/// A struct that partially reads the Arrow JSON column/array
-#[derive(Deserialize, Serialize, Clone, Debug)]
-pub struct ArrowJsonColumn {
-    name: String,
-    pub count: usize,
-    #[serde(rename = "VALIDITY")]
-    pub validity: Option<Vec<u8>>,
-    #[serde(rename = "DATA")]
-    pub data: Option<Vec<Value>>,
-    #[serde(rename = "OFFSET")]
-    pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are strings
-    pub children: Option<Vec<ArrowJsonColumn>>,
-}
-
-impl ArrowJson {
-    /// Compare the Arrow JSON with a record batch reader
-    pub fn equals_reader(&self, reader: &mut dyn RecordBatchReader) -> bool {
-        if !self.schema.equals_schema(&reader.schema()) {
-            return false;
-        }
-        self.batches.iter().all(|col| {
-            let batch = reader.next();
-            match batch {
-                Some(Ok(batch)) => col.equals_batch(&batch),
-                _ => false,
-            }
-        })
-    }
-}
-
-impl ArrowJsonSchema {
-    /// Compare the Arrow JSON schema with the Arrow `Schema`
-    fn equals_schema(&self, schema: &Schema) -> bool {
-        let field_len = self.fields.len();
-        if field_len != schema.fields().len() {
-            return false;
-        }
-        for i in 0..field_len {
-            let json_field = &self.fields[i];
-            let field = schema.field(i);
-            if !json_field.equals_field(field) {
-                return false;
-            }
-        }
-        true
-    }
-}
-
-impl ArrowJsonField {
-    /// Compare the Arrow JSON field with the Arrow `Field`
-    fn equals_field(&self, field: &Field) -> bool {
-        // convert to a field
-        match self.to_arrow_field() {
-            Ok(self_field) => {
-                assert_eq!(&self_field, field, "Arrow fields not the same");
-                true
-            }
-            Err(e) => {
-                eprintln!(
-                    "Encountered error while converting JSON field to Arrow field: {:?}",
-                    e
-                );
-                false
-            }
-        }
-    }
-
-    /// Convert to an Arrow Field
-    /// TODO: convert to use an Into
-    fn to_arrow_field(&self) -> Result<Field> {
-        // a bit regressive, but we have to convert the field to JSON in order to convert it
-        let field = serde_json::to_value(self)?;
-        Field::from(&field)
-    }
-}
-
-impl ArrowJsonBatch {
-    /// Compare the Arrow JSON record batch with a `RecordBatch`
-    fn equals_batch(&self, batch: &RecordBatch) -> bool {
-        if self.count != batch.num_rows() {
-            return false;
-        }
-        let num_columns = self.columns.len();
-        if num_columns != batch.num_columns() {
-            return false;
-        }
-        let schema = batch.schema();
-        self.columns
-            .iter()
-            .zip(batch.columns())
-            .zip(schema.fields())
-            .all(|((col, arr), field)| {
-                // compare each column based on its type
-                if &col.name != field.name() {
-                    return false;
-                }
-                let json_array: Vec<Value> = json_from_col(&col, field.data_type());
-                match field.data_type() {
-                    DataType::Null => {
-                        let arr: &NullArray =
-                            arr.as_any().downcast_ref::<NullArray>().unwrap();
-                        // NullArrays should have the same length, json_array is empty
-                        arr.len() == col.count
-                    }
-                    DataType::Boolean => {
-                        let arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int8 => {
-                        let arr = arr.as_any().downcast_ref::<Int8Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int16 => {
-                        let arr = arr.as_any().downcast_ref::<Int16Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int32 | DataType::Date32 | DataType::Time32(_) => {
-                        let arr = Int32Array::from(arr.data().clone());
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Int64
-                    | DataType::Date64
-                    | DataType::Time64(_)
-                    | DataType::Timestamp(_, _)
-                    | DataType::Duration(_) => {
-                        let arr = Int64Array::from(arr.data().clone());
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Interval(IntervalUnit::YearMonth) => {
-                        let arr = IntervalYearMonthArray::from(arr.data().clone());
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Interval(IntervalUnit::DayTime) => {
-                        let arr = IntervalDayTimeArray::from(arr.data().clone());
-                        let x = json_array
-                            .iter()
-                            .map(|v| {
-                                match v {
-                                    Value::Null => Value::Null,
-                                    Value::Object(v) => {
-                                        // interval has days and milliseconds
-                                        let days: i32 =
-                                            v.get("days").unwrap().as_i64().unwrap()
-                                                as i32;
-                                        let milliseconds: i32 = v
-                                            .get("milliseconds")
-                                            .unwrap()
-                                            .as_i64()
-                                            .unwrap()
-                                            as i32;
-                                        let value: i64 = unsafe {
-                                            std::mem::transmute::<[i32; 2], i64>([
-                                                days,
-                                                milliseconds,
-                                            ])
-                                        };
-                                        Value::Number(VNumber::from(value))
-                                    }
-                                    // return null if Value is not an object
-                                    _ => Value::Null,
-                                }
-                            })
-                            .collect::<Vec<Value>>();
-                        arr.equals_json(&x.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt8 => {
-                        let arr = arr.as_any().downcast_ref::<UInt8Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt16 => {
-                        let arr = arr.as_any().downcast_ref::<UInt16Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt32 => {
-                        let arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::UInt64 => {
-                        let arr = arr.as_any().downcast_ref::<UInt64Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Float32 => {
-                        let arr = arr.as_any().downcast_ref::<Float32Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Float64 => {
-                        let arr = arr.as_any().downcast_ref::<Float64Array>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Binary => {
-                        let arr = arr.as_any().downcast_ref::<BinaryArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::LargeBinary => {
-                        let arr =
-                            arr.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::FixedSizeBinary(_) => {
-                        let arr =
-                            arr.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Utf8 => {
-                        let arr = arr.as_any().downcast_ref::<StringArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::LargeUtf8 => {
-                        let arr =
-                            arr.as_any().downcast_ref::<LargeStringArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::List(_) => {
-                        let arr = arr.as_any().downcast_ref::<ListArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::LargeList(_) => {
-                        let arr = arr.as_any().downcast_ref::<LargeListArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::FixedSizeList(_, _) => {
-                        let arr =
-                            arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Struct(_) => {
-                        let arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Decimal(_, _) => {
-                        let arr = arr.as_any().downcast_ref::<DecimalArray>().unwrap();
-                        arr.equals_json(&json_array.iter().collect::<Vec<&Value>>()[..])
-                    }
-                    DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
-                        DataType::Int8 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int8DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::Int16 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int16DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::Int32 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int32DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::Int64 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<Int64DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt8 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt8DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt16 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt16DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt32 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt32DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        DataType::UInt64 => {
-                            let arr = arr
-                                .as_any()
-                                .downcast_ref::<UInt64DictionaryArray>()
-                                .unwrap();
-                            arr.equals_json(
-                                &json_array.iter().collect::<Vec<&Value>>()[..],
-                            )
-                        }
-                        t => panic!("Unsupported dictionary comparison for {:?}", t),
-                    },
-                    t => panic!("Unsupported comparison for {:?}", t),
-                }
-            })
-    }
-
-    pub fn from_batch(batch: &RecordBatch) -> ArrowJsonBatch {
-        let mut json_batch = ArrowJsonBatch {
-            count: batch.num_rows(),
-            columns: Vec::with_capacity(batch.num_columns()),
-        };
-
-        for (col, field) in batch.columns().iter().zip(batch.schema().fields.iter()) {
-            let json_col = match field.data_type() {
-                DataType::Int8 => {
-                    let col = col.as_any().downcast_ref::<Int8Array>().unwrap();
-
-                    let mut validity: Vec<u8> = Vec::with_capacity(col.len());
-                    let mut data: Vec<Value> = Vec::with_capacity(col.len());
-
-                    for i in 0..col.len() {
-                        if col.is_null(i) {
-                            validity.push(1);
-                            data.push(0i8.into());
-                        } else {
-                            validity.push(0);
-                            data.push(col.value(i).into());
-                        }
-                    }
-
-                    ArrowJsonColumn {
-                        name: field.name().clone(),
-                        count: col.len(),
-                        validity: Some(validity),
-                        data: Some(data),
-                        offset: None,
-                        children: None,
-                    }
-                }
-                _ => ArrowJsonColumn {
-                    name: field.name().clone(),
-                    count: col.len(),
-                    validity: None,
-                    data: None,
-                    offset: None,
-                    children: None,
-                },
-            };
-
-            json_batch.columns.push(json_col);
-        }
-
-        json_batch
-    }
-}
-
-/// Convert an Arrow JSON column/array into a vector of `Value`
-fn json_from_col(col: &ArrowJsonColumn, data_type: &DataType) -> Vec<Value> {
-    match data_type {
-        DataType::List(field) => json_from_list_col(col, field.data_type()),
-        DataType::FixedSizeList(field, list_size) => {
-            json_from_fixed_size_list_col(col, field.data_type(), *list_size as usize)
-        }
-        DataType::Struct(fields) => json_from_struct_col(col, fields),
-        DataType::Int64
-        | DataType::UInt64
-        | DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_) => {
-            // convert int64 data from strings to numbers
-            let converted_col: Vec<Value> = col
-                .data
-                .clone()
-                .unwrap()
-                .iter()
-                .map(|v| {
-                    Value::Number(match v {
-                        Value::Number(number) => number.clone(),
-                        Value::String(string) => VNumber::from(
-                            string
-                                .parse::<i64>()
-                                .expect("Unable to parse string as i64"),
-                        ),
-                        t => panic!("Cannot convert {} to number", t),
-                    })
-                })
-                .collect();
-            merge_json_array(
-                col.validity.as_ref().unwrap().as_slice(),
-                converted_col.as_slice(),
-            )
-        }
-        DataType::Null => vec![],
-        _ => merge_json_array(
-            col.validity.as_ref().unwrap().as_slice(),
-            &col.data.clone().unwrap(),
-        ),
-    }
-}
-
-/// Merge VALIDITY and DATA vectors from a primitive data type into a `Value` vector with nulls
-fn merge_json_array(validity: &[u8], data: &[Value]) -> Vec<Value> {
-    validity
-        .iter()
-        .zip(data)
-        .map(|(v, d)| match v {
-            0 => Value::Null,
-            1 => d.clone(),
-            _ => panic!("Validity data should be 0 or 1"),
-        })
-        .collect()
-}
-
-/// Convert an Arrow JSON column/array of a `DataType::Struct` into a vector of `Value`
-fn json_from_struct_col(col: &ArrowJsonColumn, fields: &[Field]) -> Vec<Value> {
-    let mut values = Vec::with_capacity(col.count);
-
-    let children: Vec<Vec<Value>> = col
-        .children
-        .clone()
-        .unwrap()
-        .iter()
-        .zip(fields)
-        .map(|(child, field)| json_from_col(child, field.data_type()))
-        .collect();
-
-    // create a struct from children
-    for j in 0..col.count {
-        let mut map = serde_json::map::Map::new();
-        for i in 0..children.len() {
-            map.insert(fields[i].name().to_string(), children[i][j].clone());
-        }
-        values.push(Value::Object(map));
-    }
-
-    values
-}
-
-/// Convert an Arrow JSON column/array of a `DataType::List` into a vector of `Value`
-fn json_from_list_col(col: &ArrowJsonColumn, data_type: &DataType) -> Vec<Value> {
-    let mut values = Vec::with_capacity(col.count);
-
-    // get the inner array
-    let child = &col.children.clone().expect("list type must have children")[0];
-    let offsets: Vec<usize> = col
-        .offset
-        .clone()
-        .unwrap()
-        .iter()
-        .map(|o| match o {
-            Value::String(s) => s.parse::<usize>().unwrap(),
-            Value::Number(n) => n.as_u64().unwrap() as usize,
-            _ => panic!(
-                "Offsets should be numbers or strings that are convertible to numbers"
-            ),
-        })
-        .collect();
-    let inner = match data_type {
-        DataType::List(ref field) => json_from_col(child, field.data_type()),
-        DataType::Struct(fields) => json_from_struct_col(col, fields),
-        _ => merge_json_array(
-            child.validity.as_ref().unwrap().as_slice(),
-            &child.data.clone().unwrap(),
-        ),
-    };
-
-    for i in 0..col.count {
-        match &col.validity {
-            Some(validity) => match &validity[i] {
-                0 => values.push(Value::Null),
-                1 => {
-                    values.push(Value::Array(inner[offsets[i]..offsets[i + 1]].to_vec()))
-                }
-                _ => panic!("Validity data should be 0 or 1"),
-            },
-            None => {
-                // Null type does not have a validity vector
-            }
-        }
-    }
-
-    values
-}
-
-/// Convert an Arrow JSON column/array of a `DataType::List` into a vector of `Value`
-fn json_from_fixed_size_list_col(
-    col: &ArrowJsonColumn,
-    data_type: &DataType,
-    list_size: usize,
-) -> Vec<Value> {
-    let mut values = Vec::with_capacity(col.count);
-
-    // get the inner array
-    let child = &col.children.clone().expect("list type must have children")[0];
-    let inner = match data_type {
-        DataType::List(ref field) => json_from_col(child, field.data_type()),
-        DataType::FixedSizeList(ref field, _) => json_from_col(child, field.data_type()),
-        DataType::Struct(fields) => json_from_struct_col(col, fields),
-        _ => merge_json_array(
-            child.validity.as_ref().unwrap().as_slice(),
-            &child.data.clone().unwrap(),
-        ),
-    };
-
-    for i in 0..col.count {
-        match &col.validity {
-            Some(validity) => match &validity[i] {
-                0 => values.push(Value::Null),
-                1 => values.push(Value::Array(
-                    inner[(list_size * i)..(list_size * (i + 1))].to_vec(),
-                )),
-                _ => panic!("Validity data should be 0 or 1"),
-            },
-            None => {}
-        }
-    }
-
-    values
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::fs::File;
-    use std::io::Read;
-    use std::sync::Arc;
-
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_schema_equality() {
-        let json = r#"
-        {
-            "fields": [
-                {
-                    "name": "c1",
-                    "type": {"name": "int", "isSigned": true, "bitWidth": 32},
-                    "nullable": true,
-                    "children": []
-                },
-                {
-                    "name": "c2",
-                    "type": {"name": "floatingpoint", "precision": "DOUBLE"},
-                    "nullable": true,
-                    "children": []
-                },
-                {
-                    "name": "c3",
-                    "type": {"name": "utf8"},
-                    "nullable": true,
-                    "children": []
-                },
-                {
-                    "name": "c4",
-                    "type": {
-                        "name": "list"
-                    },
-                    "nullable": true,
-                    "children": [
-                        {
-                            "name": "custom_item",
-                            "type": {
-                                "name": "int",
-                                "isSigned": true,
-                                "bitWidth": 32
-                            },
-                            "nullable": false,
-                            "children": []
-                        }
-                    ]
-                }
-            ]
-        }"#;
-        let json_schema: ArrowJsonSchema = serde_json::from_str(json).unwrap();
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, true),
-            Field::new("c2", DataType::Float64, true),
-            Field::new("c3", DataType::Utf8, true),
-            Field::new(
-                "c4",
-                DataType::List(Box::new(Field::new(
-                    "custom_item",
-                    DataType::Int32,
-                    false,
-                ))),
-                true,
-            ),
-        ]);
-        assert!(json_schema.equals_schema(&schema));
-    }
-
-    #[test]
-    fn test_arrow_data_equality() {
-        let secs_tz = Some("Europe/Budapest".to_string());
-        let millis_tz = Some("America/New_York".to_string());
-        let micros_tz = Some("UTC".to_string());
-        let nanos_tz = Some("Africa/Johannesburg".to_string());
-
-        let schema = Schema::new(vec![
-            {
-                let mut f =
-                    Field::new("bools-with-metadata-map", DataType::Boolean, true);
-                f.set_metadata(Some(
-                    [("k".to_string(), "v".to_string())]
-                        .iter()
-                        .cloned()
-                        .collect(),
-                ));
-                f
-            },
-            {
-                let mut f =
-                    Field::new("bools-with-metadata-vec", DataType::Boolean, true);
-                f.set_metadata(Some(
-                    [("k2".to_string(), "v2".to_string())]
-                        .iter()
-                        .cloned()
-                        .collect(),
-                ));
-                f
-            },
-            Field::new("bools", DataType::Boolean, true),
-            Field::new("int8s", DataType::Int8, true),
-            Field::new("int16s", DataType::Int16, true),
-            Field::new("int32s", DataType::Int32, true),
-            Field::new("int64s", DataType::Int64, true),
-            Field::new("uint8s", DataType::UInt8, true),
-            Field::new("uint16s", DataType::UInt16, true),
-            Field::new("uint32s", DataType::UInt32, true),
-            Field::new("uint64s", DataType::UInt64, true),
-            Field::new("float32s", DataType::Float32, true),
-            Field::new("float64s", DataType::Float64, true),
-            Field::new("date_days", DataType::Date32, true),
-            Field::new("date_millis", DataType::Date64, true),
-            Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
-            Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
-            Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
-            Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
-            Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
-            Field::new(
-                "ts_millis",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_micros",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_nanos",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_secs_tz",
-                DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
-                true,
-            ),
-            Field::new(
-                "ts_millis_tz",
-                DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
-                true,
-            ),
-            Field::new(
-                "ts_micros_tz",
-                DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
-                true,
-            ),
-            Field::new(
-                "ts_nanos_tz",
-                DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
-                true,
-            ),
-            Field::new("utf8s", DataType::Utf8, true),
-            Field::new(
-                "lists",
-                DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-                true,
-            ),
-            Field::new(
-                "structs",
-                DataType::Struct(vec![
-                    Field::new("int32s", DataType::Int32, true),
-                    Field::new("utf8s", DataType::Utf8, true),
-                ]),
-                true,
-            ),
-        ]);
-
-        let bools_with_metadata_map =
-            BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let bools_with_metadata_vec =
-            BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let bools = BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let int8s = Int8Array::from(vec![Some(1), None, Some(3)]);
-        let int16s = Int16Array::from(vec![Some(1), None, Some(3)]);
-        let int32s = Int32Array::from(vec![Some(1), None, Some(3)]);
-        let int64s = Int64Array::from(vec![Some(1), None, Some(3)]);
-        let uint8s = UInt8Array::from(vec![Some(1), None, Some(3)]);
-        let uint16s = UInt16Array::from(vec![Some(1), None, Some(3)]);
-        let uint32s = UInt32Array::from(vec![Some(1), None, Some(3)]);
-        let uint64s = UInt64Array::from(vec![Some(1), None, Some(3)]);
-        let float32s = Float32Array::from(vec![Some(1.0), None, Some(3.0)]);
-        let float64s = Float64Array::from(vec![Some(1.0), None, Some(3.0)]);
-        let date_days = Date32Array::from(vec![Some(1196848), None, None]);
-        let date_millis = Date64Array::from(vec![
-            Some(167903550396207),
-            Some(29923997007884),
-            Some(30612271819236),
-        ]);
-        let time_secs =
-            Time32SecondArray::from(vec![Some(27974), Some(78592), Some(43207)]);
-        let time_millis = Time32MillisecondArray::from(vec![
-            Some(6613125),
-            Some(74667230),
-            Some(52260079),
-        ]);
-        let time_micros =
-            Time64MicrosecondArray::from(vec![Some(62522958593), None, None]);
-        let time_nanos = Time64NanosecondArray::from(vec![
-            Some(73380123595985),
-            None,
-            Some(16584393546415),
-        ]);
-        let ts_secs = TimestampSecondArray::from_opt_vec(
-            vec![None, Some(193438817552), None],
-            None,
-        );
-        let ts_millis = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(38606916383008), Some(58113709376587)],
-            None,
-        );
-        let ts_micros =
-            TimestampMicrosecondArray::from_opt_vec(vec![None, None, None], None);
-        let ts_nanos = TimestampNanosecondArray::from_opt_vec(
-            vec![None, None, Some(-6473623571954960143)],
-            None,
-        );
-        let ts_secs_tz = TimestampSecondArray::from_opt_vec(
-            vec![None, Some(193438817552), None],
-            secs_tz,
-        );
-        let ts_millis_tz = TimestampMillisecondArray::from_opt_vec(
-            vec![None, Some(38606916383008), Some(58113709376587)],
-            millis_tz,
-        );
-        let ts_micros_tz =
-            TimestampMicrosecondArray::from_opt_vec(vec![None, None, None], micros_tz);
-        let ts_nanos_tz = TimestampNanosecondArray::from_opt_vec(
-            vec![None, None, Some(-6473623571954960143)],
-            nanos_tz,
-        );
-        let utf8s = StringArray::from(vec![Some("aa"), None, Some("bbb")]);
-
-        let value_data = Int32Array::from(vec![None, Some(2), None, None]);
-        let value_offsets = Buffer::from_slice_ref(&[0, 3, 4, 4]);
-        let list_data_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data.data().clone())
-            .build();
-        let lists = ListArray::from(list_data);
-
-        let structs_int32s = Int32Array::from(vec![None, Some(-2), None]);
-        let structs_utf8s = StringArray::from(vec![None, None, Some("aaaaaa")]);
-        let structs = StructArray::from(vec![
-            (
-                Field::new("int32s", DataType::Int32, true),
-                Arc::new(structs_int32s) as ArrayRef,
-            ),
-            (
-                Field::new("utf8s", DataType::Utf8, true),
-                Arc::new(structs_utf8s) as ArrayRef,
-            ),
-        ]);
-
-        let record_batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(bools_with_metadata_map),
-                Arc::new(bools_with_metadata_vec),
-                Arc::new(bools),
-                Arc::new(int8s),
-                Arc::new(int16s),
-                Arc::new(int32s),
-                Arc::new(int64s),
-                Arc::new(uint8s),
-                Arc::new(uint16s),
-                Arc::new(uint32s),
-                Arc::new(uint64s),
-                Arc::new(float32s),
-                Arc::new(float64s),
-                Arc::new(date_days),
-                Arc::new(date_millis),
-                Arc::new(time_secs),
-                Arc::new(time_millis),
-                Arc::new(time_micros),
-                Arc::new(time_nanos),
-                Arc::new(ts_secs),
-                Arc::new(ts_millis),
-                Arc::new(ts_micros),
-                Arc::new(ts_nanos),
-                Arc::new(ts_secs_tz),
-                Arc::new(ts_millis_tz),
-                Arc::new(ts_micros_tz),
-                Arc::new(ts_nanos_tz),
-                Arc::new(utf8s),
-                Arc::new(lists),
-                Arc::new(structs),
-            ],
-        )
-        .unwrap();
-        let mut file = File::open("test/data/integration.json").unwrap();
-        let mut json = String::new();
-        file.read_to_string(&mut json).unwrap();
-        let arrow_json: ArrowJson = serde_json::from_str(&json).unwrap();
-        // test schemas
-        assert!(arrow_json.schema.equals_schema(&schema));
-        // test record batch
-        assert!(arrow_json.batches[0].equals_batch(&record_batch));
-    }
-}
diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs
deleted file mode 100644
index b2fd4f78661..00000000000
--- a/rust/arrow/src/util/mod.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod bench_util;
-pub mod bit_chunk_iterator;
-pub mod bit_util;
-pub mod data_gen;
-pub mod display;
-pub mod integration_util;
-#[cfg(feature = "prettyprint")]
-pub mod pretty;
-pub(crate) mod serialization;
-pub mod string_writer;
-pub mod test_util;
-
-mod trusted_len;
-pub(crate) use trusted_len::trusted_len_unzip;
diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs
deleted file mode 100644
index f354899c1df..00000000000
--- a/rust/arrow/src/util/pretty.rs
+++ /dev/null
@@ -1,421 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utilities for printing record batches. Note this module is not
-//! available unless `feature = "prettyprint"` is enabled.
-
-use crate::{array::ArrayRef, record_batch::RecordBatch};
-
-use prettytable::format;
-use prettytable::{Cell, Row, Table};
-
-use crate::error::Result;
-
-use super::display::array_value_to_string;
-
-///! Create a visual representation of record batches
-pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<String> {
-    Ok(create_table(results)?.to_string())
-}
-
-///! Create a visual representation of columns
-pub fn pretty_format_columns(col_name: &str, results: &[ArrayRef]) -> Result<String> {
-    Ok(create_column(col_name, results)?.to_string())
-}
-
-///! Prints a visual representation of record batches to stdout
-pub fn print_batches(results: &[RecordBatch]) -> Result<()> {
-    create_table(results)?.printstd();
-    Ok(())
-}
-
-///! Prints a visual representation of a list of column to stdout
-pub fn print_columns(col_name: &str, results: &[ArrayRef]) -> Result<()> {
-    create_column(col_name, results)?.printstd();
-    Ok(())
-}
-
-///! Convert a series of record batches into a table
-fn create_table(results: &[RecordBatch]) -> Result<Table> {
-    let mut table = Table::new();
-    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-
-    if results.is_empty() {
-        return Ok(table);
-    }
-
-    let schema = results[0].schema();
-
-    let mut header = Vec::new();
-    for field in schema.fields() {
-        header.push(Cell::new(&field.name()));
-    }
-    table.set_titles(Row::new(header));
-
-    for batch in results {
-        for row in 0..batch.num_rows() {
-            let mut cells = Vec::new();
-            for col in 0..batch.num_columns() {
-                let column = batch.column(col);
-                cells.push(Cell::new(&array_value_to_string(&column, row)?));
-            }
-            table.add_row(Row::new(cells));
-        }
-    }
-
-    Ok(table)
-}
-
-fn create_column(field: &str, columns: &[ArrayRef]) -> Result<Table> {
-    let mut table = Table::new();
-    table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
-
-    if columns.is_empty() {
-        return Ok(table);
-    }
-
-    let header = vec![Cell::new(field)];
-    table.set_titles(Row::new(header));
-
-    for col in columns {
-        for row in 0..col.len() {
-            let cells = vec![Cell::new(&array_value_to_string(&col, row)?)];
-            table.add_row(Row::new(cells));
-        }
-    }
-
-    Ok(table)
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        array::{
-            self, new_null_array, Array, Date32Array, Date64Array, PrimitiveBuilder,
-            StringBuilder, StringDictionaryBuilder, Time32MillisecondArray,
-            Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-            TimestampMicrosecondArray, TimestampMillisecondArray,
-            TimestampNanosecondArray, TimestampSecondArray,
-        },
-        datatypes::{DataType, Field, Int32Type, Schema},
-    };
-
-    use super::*;
-    use std::sync::Arc;
-
-    #[test]
-    fn test_pretty_format_batches() -> Result<()> {
-        // define a schema.
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        // define data.
-        let batch = RecordBatch::try_new(
-            schema,
-            vec![
-                Arc::new(array::StringArray::from(vec![
-                    Some("a"),
-                    Some("b"),
-                    None,
-                    Some("d"),
-                ])),
-                Arc::new(array::Int32Array::from(vec![
-                    Some(1),
-                    None,
-                    Some(10),
-                    Some(100),
-                ])),
-            ],
-        )?;
-
-        let table = pretty_format_batches(&[batch])?;
-
-        let expected = vec![
-            "+---+-----+",
-            "| a | b   |",
-            "+---+-----+",
-            "| a | 1   |",
-            "| b |     |",
-            "|   | 10  |",
-            "| d | 100 |",
-            "+---+-----+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_pretty_format_columns() -> Result<()> {
-        let columns = vec![
-            Arc::new(array::StringArray::from(vec![
-                Some("a"),
-                Some("b"),
-                None,
-                Some("d"),
-            ])) as ArrayRef,
-            Arc::new(array::StringArray::from(vec![Some("e"), None, Some("g")])),
-        ];
-
-        let table = pretty_format_columns("a", &columns)?;
-
-        let expected = vec![
-            "+---+", "| a |", "+---+", "| a |", "| b |", "|   |", "| d |", "| e |",
-            "|   |", "| g |", "+---+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_pretty_format_null() {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-            Field::new("c", DataType::Null, true),
-        ]));
-
-        let num_rows = 4;
-        let arrays = schema
-            .fields()
-            .iter()
-            .map(|f| new_null_array(f.data_type(), num_rows))
-            .collect();
-
-        // define data (null)
-        let batch = RecordBatch::try_new(schema, arrays).unwrap();
-
-        let table = pretty_format_batches(&[batch]).unwrap();
-
-        let expected = vec![
-            "+---+---+---+",
-            "| a | b | c |",
-            "+---+---+---+",
-            "|   |   |   |",
-            "|   |   |   |",
-            "|   |   |   |",
-            "|   |   |   |",
-            "+---+---+---+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{:#?}", table);
-    }
-
-    #[test]
-    fn test_pretty_format_dictionary() -> Result<()> {
-        // define a schema.
-        let field_type =
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-        let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
-
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = StringBuilder::new(10);
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        builder.append("one")?;
-        builder.append_null()?;
-        builder.append("three")?;
-        let array = Arc::new(builder.finish());
-
-        let batch = RecordBatch::try_new(schema, vec![array])?;
-
-        let table = pretty_format_batches(&[batch])?;
-
-        let expected = vec![
-            "+-------+",
-            "| d1    |",
-            "+-------+",
-            "| one   |",
-            "|       |",
-            "| three |",
-            "+-------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{}", table);
-
-        Ok(())
-    }
-
-    /// Generate an array with type $ARRAYTYPE with a numeric value of
-    /// $VALUE, and compare $EXPECTED_RESULT to the output of
-    /// formatting that array with `pretty_format_batches`
-    macro_rules! check_datetime {
-        ($ARRAYTYPE:ident, $VALUE:expr, $EXPECTED_RESULT:expr) => {
-            let mut builder = $ARRAYTYPE::builder(10);
-            builder.append_value($VALUE).unwrap();
-            builder.append_null().unwrap();
-            let array = builder.finish();
-
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "f",
-                array.data_type().clone(),
-                true,
-            )]));
-            let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
-
-            let table = pretty_format_batches(&[batch]).expect("formatting batches");
-
-            let expected = $EXPECTED_RESULT;
-            let actual: Vec<&str> = table.lines().collect();
-
-            assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n", actual);
-        };
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_second() {
-        let expected = vec![
-            "+---------------------+",
-            "| f                   |",
-            "+---------------------+",
-            "| 1970-05-09 14:25:11 |",
-            "|                     |",
-            "+---------------------+",
-        ];
-        check_datetime!(TimestampSecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_millisecond() {
-        let expected = vec![
-            "+-------------------------+",
-            "| f                       |",
-            "+-------------------------+",
-            "| 1970-01-01 03:05:11.111 |",
-            "|                         |",
-            "+-------------------------+",
-        ];
-        check_datetime!(TimestampMillisecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_microsecond() {
-        let expected = vec![
-            "+----------------------------+",
-            "| f                          |",
-            "+----------------------------+",
-            "| 1970-01-01 00:00:11.111111 |",
-            "|                            |",
-            "+----------------------------+",
-        ];
-        check_datetime!(TimestampMicrosecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_timestamp_nanosecond() {
-        let expected = vec![
-            "+-------------------------------+",
-            "| f                             |",
-            "+-------------------------------+",
-            "| 1970-01-01 00:00:00.011111111 |",
-            "|                               |",
-            "+-------------------------------+",
-        ];
-        check_datetime!(TimestampNanosecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_date_32() {
-        let expected = vec![
-            "+------------+",
-            "| f          |",
-            "+------------+",
-            "| 1973-05-19 |",
-            "|            |",
-            "+------------+",
-        ];
-        check_datetime!(Date32Array, 1234, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_date_64() {
-        let expected = vec![
-            "+------------+",
-            "| f          |",
-            "+------------+",
-            "| 2005-03-18 |",
-            "|            |",
-            "+------------+",
-        ];
-        check_datetime!(Date64Array, 1111111100000, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_32_second() {
-        let expected = vec![
-            "+----------+",
-            "| f        |",
-            "+----------+",
-            "| 00:18:31 |",
-            "|          |",
-            "+----------+",
-        ];
-        check_datetime!(Time32SecondArray, 1111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_32_millisecond() {
-        let expected = vec![
-            "+--------------+",
-            "| f            |",
-            "+--------------+",
-            "| 03:05:11.111 |",
-            "|              |",
-            "+--------------+",
-        ];
-        check_datetime!(Time32MillisecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_64_microsecond() {
-        let expected = vec![
-            "+-----------------+",
-            "| f               |",
-            "+-----------------+",
-            "| 00:00:11.111111 |",
-            "|                 |",
-            "+-----------------+",
-        ];
-        check_datetime!(Time64MicrosecondArray, 11111111, expected);
-    }
-
-    #[test]
-    fn test_pretty_format_time_64_nanosecond() {
-        let expected = vec![
-            "+--------------------+",
-            "| f                  |",
-            "+--------------------+",
-            "| 00:00:00.011111111 |",
-            "|                    |",
-            "+--------------------+",
-        ];
-        check_datetime!(Time64NanosecondArray, 11111111, expected);
-    }
-}
diff --git a/rust/arrow/src/util/serialization.rs b/rust/arrow/src/util/serialization.rs
deleted file mode 100644
index 14d67ca117c..00000000000
--- a/rust/arrow/src/util/serialization.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Converts numeric type to a `String`
-pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
-    let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
-    unsafe {
-        // JUSTIFICATION
-        //  Benefit
-        //      Allows using the faster serializer lexical core and convert to string
-        //  Soundness
-        //      Length of buf is set as written length afterwards. lexical_core
-        //      creates a valid string, so doesn't need to be checked.
-        let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
-        let len = lexical_core::write(n, slice).len();
-        buf.set_len(len);
-        String::from_utf8_unchecked(buf)
-    }
-}
diff --git a/rust/arrow/src/util/string_writer.rs b/rust/arrow/src/util/string_writer.rs
deleted file mode 100644
index 2a8175d1562..00000000000
--- a/rust/arrow/src/util/string_writer.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! String Writer
-//! This string writer encapsulates `std::string::String` and
-//! implements `std::io::Write` trait, which makes String as a
-//! writable object like File.
-//!
-//! Example:
-//!
-//! ```
-//! use arrow::array::*;
-//! use arrow::csv;
-//! use arrow::datatypes::*;
-//! use arrow::record_batch::RecordBatch;
-//! use arrow::util::string_writer::StringWriter;
-//! use std::sync::Arc;
-//!
-//! let schema = Schema::new(vec![
-//!     Field::new("c1", DataType::Utf8, false),
-//!     Field::new("c2", DataType::Float64, true),
-//!     Field::new("c3", DataType::UInt32, false),
-//!     Field::new("c3", DataType::Boolean, true),
-//! ]);
-//! let c1 = StringArray::from(vec![
-//!     "Lorem ipsum dolor sit amet",
-//!     "consectetur adipiscing elit",
-//!     "sed do eiusmod tempor",
-//! ]);
-//! let c2 = PrimitiveArray::<Float64Type>::from(vec![
-//!     Some(123.564532),
-//!     None,
-//!     Some(-556132.25),
-//! ]);
-//! let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
-//! let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-//!
-//! let batch = RecordBatch::try_new(
-//!     Arc::new(schema),
-//!     vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
-//! )
-//! .unwrap();
-//!
-//! let sw = StringWriter::new();
-//! let mut writer = csv::Writer::new(sw);
-//! writer.write(&batch).unwrap();
-//! ```
-
-use std::io::{Error, ErrorKind, Result, Write};
-
-#[derive(Debug)]
-pub struct StringWriter {
-    data: String,
-}
-
-impl StringWriter {
-    pub fn new() -> Self {
-        StringWriter {
-            data: String::new(),
-        }
-    }
-}
-
-impl Default for StringWriter {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ToString for StringWriter {
-    fn to_string(&self) -> String {
-        self.data.clone()
-    }
-}
-
-impl Write for StringWriter {
-    fn write(&mut self, buf: &[u8]) -> Result<usize> {
-        let string = match String::from_utf8(buf.to_vec()) {
-            Ok(x) => x,
-            Err(e) => {
-                return Err(Error::new(ErrorKind::InvalidData, e));
-            }
-        };
-        self.data.push_str(&string);
-        Ok(string.len())
-    }
-
-    fn flush(&mut self) -> Result<()> {
-        Ok(())
-    }
-}
diff --git a/rust/arrow/src/util/test_util.rs b/rust/arrow/src/util/test_util.rs
deleted file mode 100644
index b32ff429c9b..00000000000
--- a/rust/arrow/src/util/test_util.rs
+++ /dev/null
@@ -1,211 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utils to make testing easier
-
-use rand::{rngs::StdRng, Rng, SeedableRng};
-use std::{env, error::Error, fs, io::Write, path::PathBuf};
-
-/// Returns a vector of size `n`, filled with randomly generated bytes.
-pub fn random_bytes(n: usize) -> Vec<u8> {
-    let mut result = vec![];
-    let mut rng = seedable_rng();
-    for _ in 0..n {
-        result.push(rng.gen_range(0, 255));
-    }
-    result
-}
-
-/// Returns fixed seedable RNG
-pub fn seedable_rng() -> StdRng {
-    StdRng::seed_from_u64(42)
-}
-
-/// Returns file handle for a temp file in 'target' directory with a provided content
-///
-/// TODO: Originates from `parquet` utils, can be merged in [ARROW-4064]
-pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
-    // build tmp path to a file in "target/debug/testdata"
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(file_name);
-
-    // write file content
-    let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
-    tmp_file.write_all(content).unwrap();
-    tmp_file.sync_all().unwrap();
-
-    // return file handle for both read and write
-    let file = fs::OpenOptions::new()
-        .read(true)
-        .write(true)
-        .open(path_buf.as_path());
-    assert!(file.is_ok());
-    file.unwrap()
-}
-
-/// Returns the arrow test data directory, which is by default stored
-/// in a git submodule rooted at `arrow/testing/data`.
-///
-/// The default can be overridden by the optional environment
-/// variable `ARROW_TEST_DATA`
-///
-/// panics when the directory can not be found.
-///
-/// Example:
-/// ```
-/// let testdata = arrow::util::test_util::arrow_test_data();
-/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
-/// assert!(std::path::PathBuf::from(csvdata).exists());
-/// ```
-pub fn arrow_test_data() -> String {
-    match get_data_dir("ARROW_TEST_DATA", "../../testing/data") {
-        Ok(pb) => pb.display().to_string(),
-        Err(err) => panic!("failed to get arrow data dir: {}", err),
-    }
-}
-
-/// Returns the parquest test data directory, which is by default
-/// stored in a git submodule rooted at
-/// `arrow/cpp/submodules/parquest-testing/data`.
-///
-/// The default can be overridden by the optional environment variable
-/// `PARQUET_TEST_DATA`
-///
-/// panics when the directory can not be found.
-///
-/// Example:
-/// ```
-/// let testdata = arrow::util::test_util::parquet_test_data();
-/// let filename = format!("{}/binary.parquet", testdata);
-/// assert!(std::path::PathBuf::from(filename).exists());
-/// ```
-pub fn parquet_test_data() -> String {
-    match get_data_dir(
-        "PARQUET_TEST_DATA",
-        "../../cpp/submodules/parquet-testing/data",
-    ) {
-        Ok(pb) => pb.display().to_string(),
-        Err(err) => panic!("failed to get parquet data dir: {}", err),
-    }
-}
-
-/// Returns a directory path for finding test data.
-///
-/// udf_env: name of an environment variable
-///
-/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
-///
-///  Returns either:
-/// The path referred to in `udf_env` if that variable is set and refers to a directory
-/// The submodule_data directory relative to CARGO_MANIFEST_PATH
-fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn Error>> {
-    // Try user defined env.
-    if let Ok(dir) = env::var(udf_env) {
-        let trimmed = dir.trim().to_string();
-        if !trimmed.is_empty() {
-            let pb = PathBuf::from(trimmed);
-            if pb.is_dir() {
-                return Ok(pb);
-            } else {
-                return Err(format!(
-                    "the data dir `{}` defined by env {} not found",
-                    pb.display().to_string(),
-                    udf_env
-                )
-                .into());
-            }
-        }
-    }
-
-    // The env is undefined or its value is trimmed to empty, let's try default dir.
-
-    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
-    // set by `cargo run` or `cargo test`, see:
-    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
-    let dir = env!("CARGO_MANIFEST_DIR");
-
-    let pb = PathBuf::from(dir).join(submodule_data);
-    if pb.is_dir() {
-        Ok(pb)
-    } else {
-        Err(format!(
-            "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
-             HINT: try running `git submodule update --init`",
-            udf_env,
-            pb.display().to_string(),
-        ).into())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::env;
-
-    #[test]
-    fn test_data_dir() {
-        let udf_env = "get_data_dir";
-        let cwd = env::current_dir().unwrap();
-
-        let existing_pb = cwd.join("..");
-        let existing = existing_pb.display().to_string();
-        let existing_str = existing.as_str();
-
-        let non_existing = cwd.join("non-existing-dir").display().to_string();
-        let non_existing_str = non_existing.as_str();
-
-        env::set_var(udf_env, non_existing_str);
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_err());
-
-        env::set_var(udf_env, "");
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::set_var(udf_env, " ");
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::set_var(udf_env, existing_str);
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-
-        env::remove_var(udf_env);
-        let res = get_data_dir(udf_env, non_existing_str);
-        assert!(res.is_err());
-
-        let res = get_data_dir(udf_env, existing_str);
-        assert!(res.is_ok());
-        assert_eq!(res.unwrap(), existing_pb);
-    }
-
-    #[test]
-    fn test_happy() {
-        let res = arrow_test_data();
-        assert!(PathBuf::from(res).is_dir());
-
-        let res = parquet_test_data();
-        assert!(PathBuf::from(res).is_dir());
-    }
-}
diff --git a/rust/arrow/src/util/trusted_len.rs b/rust/arrow/src/util/trusted_len.rs
deleted file mode 100644
index 84a66238b63..00000000000
--- a/rust/arrow/src/util/trusted_len.rs
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::bit_util;
-use crate::{
-    buffer::{Buffer, MutableBuffer},
-    datatypes::ArrowNativeType,
-};
-
-/// Creates two [`Buffer`]s from an iterator of `Option`.
-/// The first buffer corresponds to a bitmap buffer, the second one
-/// corresponds to a values buffer.
-/// # Safety
-/// The caller must ensure that `iterator` is `TrustedLen`.
-#[inline]
-pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Buffer, Buffer)
-where
-    T: ArrowNativeType,
-    P: std::borrow::Borrow<Option<T>>,
-    I: Iterator<Item = P>,
-{
-    let (_, upper) = iterator.size_hint();
-    let upper = upper.expect("trusted_len_unzip requires an upper limit");
-    let len = upper * std::mem::size_of::<T>();
-
-    let mut null = MutableBuffer::from_len_zeroed(upper.saturating_add(7) / 8);
-    let mut buffer = MutableBuffer::new(len);
-
-    let dst_null = null.as_mut_ptr();
-    let mut dst = buffer.as_mut_ptr() as *mut T;
-    for (i, item) in iterator.enumerate() {
-        let item = item.borrow();
-        if let Some(item) = item {
-            std::ptr::write(dst, *item);
-            bit_util::set_bit_raw(dst_null, i);
-        } else {
-            std::ptr::write(dst, T::default());
-        }
-        dst = dst.add(1);
-    }
-    assert_eq!(
-        dst.offset_from(buffer.as_ptr() as *mut T) as usize,
-        upper,
-        "Trusted iterator length was not accurately reported"
-    );
-    buffer.set_len(len);
-    (null.into(), buffer.into())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn trusted_len_unzip_good() {
-        let vec = vec![Some(1u32), None];
-        let (null, buffer) = unsafe { trusted_len_unzip(vec.iter()) };
-        assert_eq!(null.as_slice(), &[0b00000001]);
-        assert_eq!(buffer.as_slice(), &[1u8, 0, 0, 0, 0, 0, 0, 0]);
-    }
-
-    #[test]
-    #[should_panic(expected = "trusted_len_unzip requires an upper limit")]
-    fn trusted_len_unzip_panic() {
-        let iter = std::iter::repeat(Some(4i32));
-        unsafe { trusted_len_unzip(iter) };
-    }
-}
diff --git a/rust/arrow/src/zz_memory_check.rs b/rust/arrow/src/zz_memory_check.rs
deleted file mode 100644
index 70ec8ebdbdd..00000000000
--- a/rust/arrow/src/zz_memory_check.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This file is named like this so that it is the last one to be tested
-// It contains no content, it has a single test that verifies that there is no memory leak
-// on all unit-tests
-
-#[cfg(feature = "memory-check")]
-mod tests {
-    use crate::memory::ALLOCATIONS;
-
-    // verify that there is no data un-allocated
-    #[test]
-    fn test_memory_check() {
-        unsafe { assert_eq!(ALLOCATIONS.load(std::sync::atomic::Ordering::SeqCst), 0) }
-    }
-}
diff --git a/rust/arrow/test/data/arrays.json b/rust/arrow/test/data/arrays.json
deleted file mode 100644
index 5dbdd19ffc0..00000000000
--- a/rust/arrow/test/data/arrays.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"}
-{"a":-10, "b":[2.0, 1.3, -6.1], "c":[true, true], "d":"4"}
-{"a":2, "b":[2.0, null, -6.1], "c":[false, null], "d":"text"}
diff --git a/rust/arrow/test/data/basic.json b/rust/arrow/test/data/basic.json
deleted file mode 100644
index dafd2dd2e42..00000000000
--- a/rust/arrow/test/data/basic.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":-10, "b":-3.5, "c":true, "d":"4"}
-{"a":2, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":7, "b":-3.5, "c":true, "d":"4"}
-{"a":1, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":5, "b":-3.5, "c":true, "d":"4"}
-{"a":1, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":1, "b":-3.5, "c":true, "d":"4"}
-{"a":100000000000000, "b":0.6, "c":false, "d":"text"}
\ No newline at end of file
diff --git a/rust/arrow/test/data/basic_nulls.json b/rust/arrow/test/data/basic_nulls.json
deleted file mode 100644
index 1451df7f57f..00000000000
--- a/rust/arrow/test/data/basic_nulls.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{"a":1, "b":2.0, "c":false}
-{"a":null, "b":-3.5, "c":true, "d":"4"}
-{"c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":7, "b":-3.5, "c":null, "d":null}
-{"a":1, "b":0.6, "c":false}
-{"a":1, "b":2.0, "d":"4"}
-{"a":5, "c":true}
-{"a":1, "b":0.6, "c":false, "d":"text"}
-{"a":1, "b":2.0, "c":false, "d":"4"}
-{"a":1, "b":-3.5, "c":true, "d":"4"}
-{}
\ No newline at end of file
diff --git a/rust/arrow/test/data/integration.json b/rust/arrow/test/data/integration.json
deleted file mode 100644
index 7e4a22cddba..00000000000
--- a/rust/arrow/test/data/integration.json
+++ /dev/null
@@ -1,808 +0,0 @@
-{
-  "schema": {
-    "fields": [
-      {
-        "name": "bools-with-metadata-map",
-        "type": {
-          "name": "bool"
-        },
-        "nullable": true,
-        "metadata": {
-          "k": "v"
-        },
-        "children": []
-      },
-      {
-        "name": "bools-with-metadata-vec",
-        "type": {
-          "name": "bool"
-        },
-        "nullable": true,
-        "metadata": [
-          {
-            "key": "k2",
-            "value": "v2"
-          }
-        ],
-        "children": []
-      },
-      {
-        "name": "bools",
-        "type": {
-          "name": "bool"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int8s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 8
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int16s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 16
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int32s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "int64s",
-        "type": {
-          "name": "int",
-          "isSigned": true,
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint8s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 8
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint16s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 16
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint32s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "uint64s",
-        "type": {
-          "name": "int",
-          "isSigned": false,
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "float32s",
-        "type": {
-          "name": "floatingpoint",
-          "precision": "SINGLE"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "float64s",
-        "type": {
-          "name": "floatingpoint",
-          "precision": "DOUBLE"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "date_days",
-        "type": {
-          "name": "date",
-          "unit": "DAY"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "date_millis",
-        "type": {
-          "name": "date",
-          "unit": "MILLISECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_secs",
-        "type": {
-          "name": "time",
-          "unit": "SECOND",
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_millis",
-        "type": {
-          "name": "time",
-          "unit": "MILLISECOND",
-          "bitWidth": 32
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_micros",
-        "type": {
-          "name": "time",
-          "unit": "MICROSECOND",
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "time_nanos",
-        "type": {
-          "name": "time",
-          "unit": "NANOSECOND",
-          "bitWidth": 64
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_secs",
-        "type": {
-          "name": "timestamp",
-          "unit": "SECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_millis",
-        "type": {
-          "name": "timestamp",
-          "unit": "MILLISECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_micros",
-        "type": {
-          "name": "timestamp",
-          "unit": "MICROSECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_nanos",
-        "type": {
-          "name": "timestamp",
-          "unit": "NANOSECOND"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_secs_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "SECOND",
-          "timezone": "Europe/Budapest"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_millis_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "MILLISECOND",
-          "timezone": "America/New_York"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_micros_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "MICROSECOND",
-          "timezone": "UTC"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "ts_nanos_tz",
-        "type": {
-          "name": "timestamp",
-          "unit": "NANOSECOND",
-          "timezone": "Africa/Johannesburg"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "utf8s",
-        "type": {
-          "name": "utf8"
-        },
-        "nullable": true,
-        "children": []
-      },
-      {
-        "name": "lists",
-        "nullable": true,
-        "type": {
-          "name": "list"
-        },
-        "children": [
-          {
-            "name": "item",
-            "nullable": true,
-            "type": {
-              "name": "int",
-              "bitWidth": 32,
-              "isSigned": true
-            },
-            "children": []
-          }
-        ]
-      },
-      {
-        "name": "structs",
-        "type": {
-          "name": "struct"
-        },
-        "nullable": true,
-        "children": [
-          {
-            "name": "int32s",
-            "type": {
-              "name": "int",
-              "isSigned": true,
-              "bitWidth": 32
-            },
-            "nullable": true,
-            "children": []
-          },
-          {
-            "name": "utf8s",
-            "type": {
-              "name": "utf8"
-            },
-            "nullable": true,
-            "children": []
-          }
-        ]
-      }
-    ]
-  },
-  "batches": [
-    {
-      "count": 3,
-      "columns": [
-        {
-          "name": "bools-with-metadata-map",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            true,
-            true,
-            false
-          ]
-        },
-        {
-          "name": "bools-with-metadata-vec",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            true,
-            true,
-            false
-          ]
-        },
-        {
-          "name": "bools",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            true,
-            true,
-            false
-          ]
-        },
-        {
-          "name": "int8s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "int16s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "int32s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "int64s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint8s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint16s",
-          "count": 5,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint32s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "uint64s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1,
-            2,
-            3
-          ]
-        },
-        {
-          "name": "float32s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1.0,
-            2.0,
-            3.0
-          ]
-        },
-        {
-          "name": "float64s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            1.0,
-            2.0,
-            3.0
-          ]
-        },
-        {
-          "name": "date_days",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            0
-          ],
-          "DATA": [
-            1196848,
-            2319603,
-            2755982
-          ]
-        },
-        {
-          "name": "date_millis",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            1
-          ],
-          "DATA": [
-            167903550396207,
-            29923997007884,
-            30612271819236
-          ]
-        },
-        {
-          "name": "time_secs",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            1
-          ],
-          "DATA": [
-            27974,
-            78592,
-            43207
-          ]
-        },
-        {
-          "name": "time_millis",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            1
-          ],
-          "DATA": [
-            6613125,
-            74667230,
-            52260079
-          ]
-        },
-        {
-          "name": "time_micros",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            0
-          ],
-          "DATA": [
-            62522958593,
-            13470380050,
-            50797036705
-          ]
-        },
-        {
-          "name": "time_nanos",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "DATA": [
-            73380123595985,
-            52520995325145,
-            16584393546415
-          ]
-        },
-        {
-          "name": "ts_secs",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            0
-          ],
-          "DATA": [
-            209869064422,
-            193438817552,
-            51757838205
-          ]
-        },
-        {
-          "name": "ts_millis",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            1
-          ],
-          "DATA": [
-            228315043570185,
-            38606916383008,
-            58113709376587
-          ]
-        },
-        {
-          "name": "ts_micros",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            0
-          ],
-          "DATA": [
-            133457416537791415,
-            129522736067409280,
-            177110451066832967
-          ]
-        },
-        {
-          "name": "ts_nanos",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            1
-          ],
-          "DATA": [
-            -804525722984600007,
-            8166038652634779458,
-            -6473623571954960143
-          ]
-        },
-        {
-          "name": "ts_secs_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            0
-          ],
-          "DATA": [
-            209869064422,
-            193438817552,
-            51757838205
-          ]
-        },
-        {
-          "name": "ts_millis_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            1,
-            1
-          ],
-          "DATA": [
-            228315043570185,
-            38606916383008,
-            58113709376587
-          ]
-        },
-        {
-          "name": "ts_micros_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            0
-          ],
-          "DATA": [
-            133457416537791415,
-            129522736067409280,
-            177110451066832967
-          ]
-        },
-        {
-          "name": "ts_nanos_tz",
-          "count": 3,
-          "VALIDITY": [
-            0,
-            0,
-            1
-          ],
-          "DATA": [
-            -804525722984600007,
-            8166038652634779458,
-            -6473623571954960143
-          ]
-        },
-        {
-          "name": "utf8s",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            0,
-            1
-          ],
-          "OFFSET": [
-            0,
-            2,
-            2,
-            5
-          ],
-          "DATA": [
-            "aa",
-            "",
-            "bbb"
-          ]
-        },
-        {
-          "name": "lists",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            0
-          ],
-          "OFFSET": [
-            0,
-            3,
-            4,
-            4
-          ],
-          "children": [
-            {
-              "name": "item",
-              "count": 4,
-              "VALIDITY": [
-                0,
-                1,
-                0,
-                0
-              ],
-              "DATA": [
-                1,
-                2,
-                3,
-                4
-              ]
-            }
-          ]
-        },
-        {
-          "name": "structs",
-          "count": 3,
-          "VALIDITY": [
-            1,
-            1,
-            0
-          ],
-          "children": [
-            {
-              "name": "int32s",
-              "count": 3,
-              "VALIDITY": [
-                0,
-                1,
-                0
-              ],
-              "DATA": [
-                -1,
-                -2,
-                -3
-              ]
-            },
-            {
-              "name": "utf8s",
-              "count": 3,
-              "VALIDITY": [
-                0,
-                0,
-                1
-              ],
-              "OFFSET": [
-                0,
-                0,
-                0,
-                7
-              ],
-              "DATA": [
-                "",
-                "",
-                "aaaaaa"
-              ]
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/rust/arrow/test/data/list_string_dict_nested.json b/rust/arrow/test/data/list_string_dict_nested.json
deleted file mode 100644
index d215b318bae..00000000000
--- a/rust/arrow/test/data/list_string_dict_nested.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"machine": "a", "events": ["Elect Leader", "Do Ballot"]}
-{"machine": "b", "events": ["Do Ballot", "Send Data", "Elect Leader"]}
-{"machine": "c", "events": ["Send Data"]}
diff --git a/rust/arrow/test/data/list_string_dict_nested_nulls.json b/rust/arrow/test/data/list_string_dict_nested_nulls.json
deleted file mode 100644
index 9300b14ce27..00000000000
--- a/rust/arrow/test/data/list_string_dict_nested_nulls.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"machine": "a", "events": [null, "Elect Leader", "Do Ballot"]}
-{"machine": "b", "events": ["Do Ballot", null, "Send Data", "Elect Leader"]}
-{"machine": "c", "events": ["Send Data"]}
diff --git a/rust/arrow/test/data/mixed_arrays.json b/rust/arrow/test/data/mixed_arrays.json
deleted file mode 100644
index 18987284a5b..00000000000
--- a/rust/arrow/test/data/mixed_arrays.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":4.1}
-{"a":-10, "b":[2.0, 1.3, -6.1], "c":null, "d":null}
-{"a":2, "b":[2.0, null, -6.1], "c":[false, null], "d":"text"}
-{"a":3, "b":4, "c": true, "d":[1, false, "array", 2.4]}
diff --git a/rust/arrow/test/data/mixed_arrays.json.gz b/rust/arrow/test/data/mixed_arrays.json.gz
deleted file mode 100644
index 0f6040092ff1277ab28be57795e0d1ad17aa74c7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 141
zcmb2|=3oE==G9?dA)Ab@1O|8oZR#?+WPBme;EM6Jzze3AjeA(T)R<i_3l(<-mx_3G
zc?CN<X?lgJRkdx6xvDUCR~y@pFVChvd*ZOAAtJ!m?Sz$r)`kf7NexlULncK9E@HWw
s%X2Z8DUi2J)Fss9f_T7QuTEFj&Z%BjTy9lr8@I+Vbe~B#d;)X@0M_X?j{pDw

diff --git a/rust/arrow/test/data/nested_structs.json b/rust/arrow/test/data/nested_structs.json
deleted file mode 100644
index 32a3ac85c61..00000000000
--- a/rust/arrow/test/data/nested_structs.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{"a": {"b": true, "c": {"d": "text"}}}
-{"a": {"b": false, "c": null}}
-{"a": {"b": true, "c": {"d": "text"}}}
-{"a": 1}
\ No newline at end of file
diff --git a/rust/arrow/test/data/null_test.csv b/rust/arrow/test/data/null_test.csv
deleted file mode 100644
index 7e0dde53714..00000000000
--- a/rust/arrow/test/data/null_test.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int,c_float,c_string,c_bool
-1,1.1,"1.11",True
-2,2.2,"2.22",TRUE
-3,,"3.33",true
-4,4.4,,False
-5,6.6,"",FALSE
\ No newline at end of file
diff --git a/rust/arrow/test/data/uk_cities.csv b/rust/arrow/test/data/uk_cities.csv
deleted file mode 100644
index db9e6da8c7a..00000000000
--- a/rust/arrow/test/data/uk_cities.csv
+++ /dev/null
@@ -1,37 +0,0 @@
-"Elgin, Scotland, the UK",57.653484,-3.335724
-"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404
-"Solihull, Birmingham, UK",52.412811,-1.778197
-"Cardiff, Cardiff county, UK",51.481583,-3.179090
-"Eastbourne, East Sussex, UK",50.768036,0.290472
-"Oxford, Oxfordshire, UK",51.752022,-1.257677
-"London, UK",51.509865,-0.118092
-"Swindon, Swindon, UK",51.568535,-1.772232
-"Gravesend, Kent, UK",51.441883,0.370759
-"Northampton, Northamptonshire, UK",52.240479,-0.902656
-"Rugby, Warwickshire, UK",52.370876,-1.265032
-"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042
-"Harlow, Essex, UK",51.772938,0.102310
-"Aberdeen, Aberdeen City, UK",57.149651,-2.099075
-"Swansea, Swansea, UK",51.621441,-3.943646
-"Chesterfield, Derbyshire, UK",53.235046,-1.421629
-"Londonderry, Derry, UK",55.006763,-7.318268
-"Salisbury, Wiltshire, UK",51.068787,-1.794472
-"Weymouth, Dorset, UK",50.614429,-2.457621
-"Wolverhampton, West Midlands, UK",52.591370,-2.110748
-"Preston, Lancashire, UK",53.765762,-2.692337
-"Bournemouth, UK",50.720806,-1.904755
-"Doncaster, South Yorkshire, UK",53.522820,-1.128462
-"Ayr, South Ayrshire, UK",55.458565,-4.629179
-"Hastings, East Sussex, UK",50.854259,0.573453
-"Bedford, UK",52.136436,-0.460739
-"Basildon, Essex, UK",51.572376,0.470009
-"Chippenham, Wiltshire, UK",51.458057,-2.116074
-"Belfast, UK",54.607868,-5.926437
-"Uckfield, East Sussex, UK",50.967941,0.085831
-"Worthing, West Sussex, UK",50.825024,-0.383835
-"Leeds, West Yorkshire, UK",53.801277,-1.548567
-"Kendal, Cumbria, UK",54.328506,-2.743870
-"Plymouth, UK",50.376289,-4.143841
-"Haverhill, Suffolk, UK",52.080875,0.444517
-"Frankton, Warwickshire, UK",52.328415,-1.377561
-"Inverness, the UK",57.477772,-4.224721
\ No newline at end of file
diff --git a/rust/arrow/test/data/uk_cities_with_headers.csv b/rust/arrow/test/data/uk_cities_with_headers.csv
deleted file mode 100644
index 92f5a17bdda..00000000000
--- a/rust/arrow/test/data/uk_cities_with_headers.csv
+++ /dev/null
@@ -1,38 +0,0 @@
-city,lat,lng
-"Elgin, Scotland, the UK",57.653484,-3.335724
-"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404
-"Solihull, Birmingham, UK",52.412811,-1.778197
-"Cardiff, Cardiff county, UK",51.481583,-3.179090
-"Eastbourne, East Sussex, UK",50.768036,0.290472
-"Oxford, Oxfordshire, UK",51.752022,-1.257677
-"London, UK",51.509865,-0.118092
-"Swindon, Swindon, UK",51.568535,-1.772232
-"Gravesend, Kent, UK",51.441883,0.370759
-"Northampton, Northamptonshire, UK",52.240479,-0.902656
-"Rugby, Warwickshire, UK",52.370876,-1.265032
-"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042
-"Harlow, Essex, UK",51.772938,0.102310
-"Aberdeen, Aberdeen City, UK",57.149651,-2.099075
-"Swansea, Swansea, UK",51.621441,-3.943646
-"Chesterfield, Derbyshire, UK",53.235046,-1.421629
-"Londonderry, Derry, UK",55.006763,-7.318268
-"Salisbury, Wiltshire, UK",51.068787,-1.794472
-"Weymouth, Dorset, UK",50.614429,-2.457621
-"Wolverhampton, West Midlands, UK",52.591370,-2.110748
-"Preston, Lancashire, UK",53.765762,-2.692337
-"Bournemouth, UK",50.720806,-1.904755
-"Doncaster, South Yorkshire, UK",53.522820,-1.128462
-"Ayr, South Ayrshire, UK",55.458565,-4.629179
-"Hastings, East Sussex, UK",50.854259,0.573453
-"Bedford, UK",52.136436,-0.460739
-"Basildon, Essex, UK",51.572376,0.470009
-"Chippenham, Wiltshire, UK",51.458057,-2.116074
-"Belfast, UK",54.607868,-5.926437
-"Uckfield, East Sussex, UK",50.967941,0.085831
-"Worthing, West Sussex, UK",50.825024,-0.383835
-"Leeds, West Yorkshire, UK",53.801277,-1.548567
-"Kendal, Cumbria, UK",54.328506,-2.743870
-"Plymouth, UK",50.376289,-4.143841
-"Haverhill, Suffolk, UK",52.080875,0.444517
-"Frankton, Warwickshire, UK",52.328415,-1.377561
-"Inverness, the UK",57.477772,-4.224721
\ No newline at end of file
diff --git a/rust/arrow/test/data/various_types.csv b/rust/arrow/test/data/various_types.csv
deleted file mode 100644
index 8f4466fbe6a..00000000000
--- a/rust/arrow/test/data/various_types.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int|c_float|c_string|c_bool|c_date|c_datetime
-1|1.1|"1.11"|true|1970-01-01|1970-01-01T00:00:00
-2|2.2|"2.22"|true|2020-11-08|2020-11-08T01:00:00
-3||"3.33"|true|1969-12-31|1969-11-08T02:00:00
-4|4.4||false||
-5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
\ No newline at end of file
diff --git a/rust/arrow/test/data/various_types_invalid.csv b/rust/arrow/test/data/various_types_invalid.csv
deleted file mode 100644
index 6f059cb73e6..00000000000
--- a/rust/arrow/test/data/various_types_invalid.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-c_int|c_float|c_string|c_bool
-1|1.1|"1.11"|true
-2|2.2|"2.22"|true
-3||"3.33"|true
-4|4.x4||false
-5|6.6|""|false
\ No newline at end of file
diff --git a/rust/ballista/.dockerignore b/rust/ballista/.dockerignore
deleted file mode 100644
index 3cde49e0a0c..00000000000
--- a/rust/ballista/.dockerignore
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-rust/**/target
diff --git a/rust/ballista/README.md b/rust/ballista/README.md
deleted file mode 100644
index 288386f0161..00000000000
--- a/rust/ballista/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista: Distributed Compute with Apache Arrow
-
-Ballista is a distributed compute platform primarily implemented in Rust, and powered by Apache Arrow. It is built 
-on an architecture that allows other programming languages (such as Python, C++, and Java) to be supported as 
-first-class citizens without paying a penalty for serialization costs.
-
-The foundational technologies in Ballista are:
-
-- [Apache Arrow](https://arrow.apache.org/) memory model and compute kernels for efficient processing of data.
-- [Apache Arrow Flight Protocol](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for efficient 
-  data transfer between processes.
-- [Google Protocol Buffers](https://developers.google.com/protocol-buffers) for serializing query plans.
-- [Docker](https://www.docker.com/) for packaging up executors along with user-defined code.
-
-Ballista can be deployed as a standalone cluster and also supports [Kubernetes](https://kubernetes.io/). In either
-case, the scheduler can be configured to use [etcd](https://etcd.io/) as a backing store to (eventually) provide
-redundancy in the case of a scheduler failing.
-
-# How does this compare to Apache Spark?
-
-Although Ballista is largely inspired by Apache Spark, there are some key differences.
-
-- The choice of Rust as the main execution language means that memory usage is deterministic and avoids the overhead of
-  GC pauses.
-- Ballista is designed from the ground up to use columnar data, enabling a number of efficiencies such as vectorized
-  processing (SIMD and GPU) and efficient compression. Although Spark does have some columnar support, it is still
-  largely row-based today.
-- The combination of Rust and Arrow provides excellent memory efficiency and memory usage can be 5x - 10x lower than
-  Apache Spark in some cases, which means that more processing can fit on a single node, reducing the overhead of
-  distributed compute.
-- The use of Apache Arrow as the memory model and network protocol means that data can be exchanged between executors
-  in any programming language with minimal serialization overhead.
-
-# Status
-
-The Ballista project was donated to Apache Arrow in April 2021 and work is underway to integrate more tightly with 
-DataFusion.
-
-One of the goals is to implement a common scheduler that can seamlessly scale queries across cores in DataFusion and 
-across nodes in Ballista.
-
-Ballista issues are tracked in ASF JIRA [here](https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20component%20%3D%20%22Rust%20-%20Ballista%22)
-
-
-
diff --git a/rust/ballista/dev/build-rust-base.sh b/rust/ballista/dev/build-rust-base.sh
deleted file mode 100755
index ee4b32c8e69..00000000000
--- a/rust/ballista/dev/build-rust-base.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-set -e
-docker build -t ballistacompute/rust-base:$BALLISTA_VERSION -f docker/rust-base.dockerfile .
diff --git a/rust/ballista/dev/build-rust.sh b/rust/ballista/dev/build-rust.sh
deleted file mode 100755
index 1916f8efbef..00000000000
--- a/rust/ballista/dev/build-rust.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-
-set -e
-
-docker build -t ballistacompute/ballista-rust:$BALLISTA_VERSION -f docker/rust.dockerfile .
diff --git a/rust/ballista/dev/integration-tests.sh b/rust/ballista/dev/integration-tests.sh
deleted file mode 100755
index cc34a5ce91f..00000000000
--- a/rust/ballista/dev/integration-tests.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -e
-./dev/build-rust.sh
-pushd rust/benchmarks/tpch
-./tpch-gen.sh
-
-docker-compose up -d
-docker-compose run ballista-client ./run.sh
-docker-compose down
-
-popd
diff --git a/rust/ballista/docker/README.md b/rust/ballista/docker/README.md
deleted file mode 100644
index 8417d04c492..00000000000
--- a/rust/ballista/docker/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista Docker Images
-
-Pre-built docker images are available from [Docker Hub](https://hub.docker.com/orgs/ballistacompute/repositories) but here are the commands to build the images from source.
-
-Run these commands from the root directory of the project.
-
-```bash
-./dev/build-all.sh
-```
-
diff --git a/rust/ballista/docker/rust-base.dockerfile b/rust/ballista/docker/rust-base.dockerfile
deleted file mode 100644
index 4519225d219..00000000000
--- a/rust/ballista/docker/rust-base.dockerfile
+++ /dev/null
@@ -1,99 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-
-
-# Base image extends debian:buster-slim
-FROM rust:1.49.0-buster AS builder
-
-RUN apt update && apt -y install musl musl-dev musl-tools libssl-dev openssl
-
-#NOTE: the following was copied from https://github.com/emk/rust-musl-builder/blob/master/Dockerfile under Apache 2.0 license
-
-# The OpenSSL version to use. We parameterize this because many Rust
-# projects will fail to build with 1.1.
-#ARG OPENSSL_VERSION=1.0.2r
-ARG OPENSSL_VERSION=1.1.1b
-
-# Build a static library version of OpenSSL using musl-libc.  This is needed by
-# the popular Rust `hyper` crate.
-#
-# We point /usr/local/musl/include/linux at some Linux kernel headers (not
-# necessarily the right ones) in an effort to compile OpenSSL 1.1's "engine"
-# component. It's possible that this will cause bizarre and terrible things to
-# happen. There may be "sanitized" header
-RUN echo "Building OpenSSL" && \
-    ls /usr/include/linux && \
-    mkdir -p /usr/local/musl/include && \
-    ln -s /usr/include/linux /usr/local/musl/include/linux && \
-    ln -s /usr/include/x86_64-linux-gnu/asm /usr/local/musl/include/asm && \
-    ln -s /usr/include/asm-generic /usr/local/musl/include/asm-generic && \
-    cd /tmp && \
-    curl -LO "https://www.openssl.org/source/openssl-$OPENSSL_VERSION.tar.gz" && \
-    tar xvzf "openssl-$OPENSSL_VERSION.tar.gz" && cd "openssl-$OPENSSL_VERSION" && \
-    env CC=musl-gcc ./Configure no-shared no-zlib -fPIC --prefix=/usr/local/musl -DOPENSSL_NO_SECURE_MEMORY linux-x86_64 && \
-    env C_INCLUDE_PATH=/usr/local/musl/include/ make depend && \
-    env C_INCLUDE_PATH=/usr/local/musl/include/ make && \
-    make install && \
-    rm /usr/local/musl/include/linux /usr/local/musl/include/asm /usr/local/musl/include/asm-generic && \
-    rm -r /tmp/*
-
-RUN echo "Building zlib" && \
-    cd /tmp && \
-    ZLIB_VERSION=1.2.11 && \
-    curl -LO "http://zlib.net/zlib-$ZLIB_VERSION.tar.gz" && \
-    tar xzf "zlib-$ZLIB_VERSION.tar.gz" && cd "zlib-$ZLIB_VERSION" && \
-    CC=musl-gcc ./configure --static --prefix=/usr/local/musl && \
-    make && make install && \
-    rm -r /tmp/*
-
-RUN echo "Building libpq" && \
-    cd /tmp && \
-    POSTGRESQL_VERSION=11.2 && \
-    curl -LO "https://ftp.postgresql.org/pub/source/v$POSTGRESQL_VERSION/postgresql-$POSTGRESQL_VERSION.tar.gz" && \
-    tar xzf "postgresql-$POSTGRESQL_VERSION.tar.gz" && cd "postgresql-$POSTGRESQL_VERSION" && \
-    CC=musl-gcc CPPFLAGS=-I/usr/local/musl/include LDFLAGS=-L/usr/local/musl/lib ./configure --with-openssl --without-readline --prefix=/usr/local/musl && \
-    cd src/interfaces/libpq && make all-static-lib && make install-lib-static && \
-    cd ../../bin/pg_config && make && make install && \
-    rm -r /tmp/*
-
-ENV OPENSSL_DIR=/usr/local/musl/ \
-    OPENSSL_INCLUDE_DIR=/usr/local/musl/include/ \
-    DEP_OPENSSL_INCLUDE=/usr/local/musl/include/ \
-    OPENSSL_LIB_DIR=/usr/local/musl/lib/ \
-    OPENSSL_STATIC=1 \
-    PQ_LIB_STATIC_X86_64_UNKNOWN_LINUX_MUSL=1 \
-    PG_CONFIG_X86_64_UNKNOWN_LINUX_GNU=/usr/bin/pg_config \
-    PKG_CONFIG_ALLOW_CROSS=true \
-    PKG_CONFIG_ALL_STATIC=true \
-    LIBZ_SYS_STATIC=1 \
-    TARGET=musl
-
-# The content copied mentioned in the NOTE above ends here.
-
-## Download the target for static linking.
-RUN rustup target add x86_64-unknown-linux-musl
-RUN cargo install cargo-build-deps
-
-# prepare toolchain
-RUN rustup update && \
-    rustup component add rustfmt
\ No newline at end of file
diff --git a/rust/ballista/docker/rust.dockerfile b/rust/ballista/docker/rust.dockerfile
deleted file mode 100644
index 8b06af3dc78..00000000000
--- a/rust/ballista/docker/rust.dockerfile
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-
-ARG RELEASE_FLAG=--release
-FROM ballistacompute/rust-base:0.4.0-20210213 AS base
-WORKDIR /tmp/ballista
-RUN apt-get -y install cmake
-RUN cargo install cargo-chef 
-
-FROM base as planner
-COPY rust .
-RUN cargo chef prepare --recipe-path recipe.json
-
-FROM base as cacher
-COPY --from=planner /tmp/ballista/recipe.json recipe.json
-RUN cargo chef cook $RELEASE_FLAG --recipe-path recipe.json
-
-FROM base as builder
-COPY rust .
-COPY --from=cacher /tmp/ballista/target target
-ARG RELEASE_FLAG=--release
-
-# force build.rs to run to generate configure_me code.
-ENV FORCE_REBUILD='true'
-RUN cargo build $RELEASE_FLAG
-
-# put the executor on /executor (need to be copied from different places depending on FLAG)
-ENV RELEASE_FLAG=${RELEASE_FLAG}
-RUN if [ -z "$RELEASE_FLAG" ]; then mv /tmp/ballista/target/debug/ballista-executor /executor; else mv /tmp/ballista/target/release/ballista-executor /executor; fi
-
-# put the scheduler on /scheduler (need to be copied from different places depending on FLAG)
-ENV RELEASE_FLAG=${RELEASE_FLAG}
-RUN if [ -z "$RELEASE_FLAG" ]; then mv /tmp/ballista/target/debug/ballista-scheduler /scheduler; else mv /tmp/ballista/target/release/ballista-scheduler /scheduler; fi
-
-# put the tpch on /tpch (need to be copied from different places depending on FLAG)
-ENV RELEASE_FLAG=${RELEASE_FLAG}
-RUN if [ -z "$RELEASE_FLAG" ]; then mv /tmp/ballista/target/debug/tpch /tpch; else mv /tmp/ballista/target/release/tpch /tpch; fi
-
-# Copy the binary into a new container for a smaller docker image
-FROM ballistacompute/rust-base:0.4.0-20210213
-
-COPY --from=builder /executor /
-
-COPY --from=builder /scheduler /
-
-COPY --from=builder /tpch /
-
-ENV RUST_LOG=info
-ENV RUST_BACKTRACE=full
-
-CMD ["/executor", "--local"]
diff --git a/rust/ballista/docs/README.md b/rust/ballista/docs/README.md
deleted file mode 100644
index 44c831d3780..00000000000
--- a/rust/ballista/docs/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Ballista Developer Documentation
-
-This directory contains documentation for developers that are contributing to Ballista. If you are looking for 
-end-user documentation for a published release, please start with the 
-[Ballista User Guide](https://ballistacompute.org/docs/) instead.
-
-## Architecture & Design
-
-- Read the [Architecture Overview](architecture.md) to get an understanding of the scheduler and executor 
-  processes and how distributed query execution works.
-
-## Build, Test, Release
-
-- Setting up a [Rust development environment](dev-env-rust.md).
-- Setting up a [Java development environment](dev-env-jvm.md).
-- Notes on building [Rust docker images](rust-docker.md)  
-- [Integration Testing](integration-testing.md)
-- [Release process](release-process.md)
-
diff --git a/rust/ballista/docs/architecture.md b/rust/ballista/docs/architecture.md
deleted file mode 100644
index a73b53a0870..00000000000
--- a/rust/ballista/docs/architecture.md
+++ /dev/null
@@ -1,75 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Ballista Architecture
-
-## Overview
-
-Ballista allows queries to be executed in a distributed cluster. A cluster consists of one or 
-more scheduler processes and one or more executor processes. See the following sections in this document for more
-details about these components.
-
-The scheduler accepts logical query plans and translates them into physical query plans using DataFusion and then 
-runs a secondary planning/optimization process to translate the physical query plan into a distributed physical 
-query plan. 
-
-This process breaks a query down into a number of query stages that can be executed independently. There are 
-dependencies between query stages and these dependencies form a directionally-acyclic graph (DAG) because a query 
-stage cannot start until its child query stages have completed.
-
-Each query stage has one or more partitions that can be processed in parallel by the available 
-executors in the cluster. This is the basic unit of scalability in Ballista.
-
-The following diagram shows the flow of requests and responses between the client, scheduler, and executor 
-processes. 
-
-![Query Execution Flow](images/query-execution.png)
-
-## Scheduler Process
-
-The scheduler process implements a gRPC interface (defined in 
-[ballista.proto](../rust/ballista/proto/ballista.proto)). The interface provides the following methods:
-
-| Method               | Description                                                          |
-|----------------------|----------------------------------------------------------------------|
-| ExecuteQuery         | Submit a logical query plan or SQL query for execution               |
-| GetExecutorsMetadata | Retrieves a list of executors that have registered with a scheduler  |
-| GetFileMetadata      | Retrieve metadata about files available in the cluster file system   |
-| GetJobStatus         | Get the status of a submitted query                                  |
-| RegisterExecutor     | Executors call this method to register themselves with the scheduler |
-
-The scheduler can run in standalone mode, or can be run in clustered mode using etcd as backing store for state.
-
-## Executor Process
-
-The executor process implements the Apache Arrow Flight gRPC interface and is responsible for:
-
-- Executing query stages and persisting the results to disk in Apache Arrow IPC Format
-- Making query stage results available as Flights so that they can be retrieved by other executors as well as by 
-  clients
-
-## Rust Client
-
-The Rust client provides a DataFrame API that is a thin wrapper around the DataFusion DataFrame and provides
-the means for a client to build a query plan for execution.
-
-The client executes the query plan by submitting an `ExecuteLogicalPlan` request to the scheduler and then calls
-`GetJobStatus` to check for completion. On completion, the client receives a list of locations for the Flights 
-containing the results for the query and will then connect to the appropriate executor processes to retrieve 
-those results.
-
diff --git a/rust/ballista/docs/dev-env-rust.md b/rust/ballista/docs/dev-env-rust.md
deleted file mode 100644
index bf50c9d9c91..00000000000
--- a/rust/ballista/docs/dev-env-rust.md
+++ /dev/null
@@ -1,38 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Setting up a Rust development environment
-
-You will need a standard Rust development environment. The easiest way to achieve this is by using rustup: https://rustup.rs/
-
-## Install OpenSSL
-
-Follow instructions for [setting up OpenSSL](https://docs.rs/openssl/0.10.28/openssl/). For Ubuntu users, the following 
-command works.
-
-```bash
-sudo apt-get install pkg-config libssl-dev
-```
-
-## Install CMake
-
-You'll need cmake in order to compile some of ballista's dependencies. Ubuntu users can use the following command:
-
-```bash
-sudo apt-get install cmake
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/images/query-execution.png b/rust/ballista/docs/images/query-execution.png
deleted file mode 100644
index b35240282bcdf832aea82da0c8acbdea492967a9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 11378
zcmeIY2T+q=`zDMcqO>R-(NLr#p(`bHklsNAB8U{}VCY>yx<F{sL6FddAkqv~nn@_4
z6eCRp0tg5gLN8&1@9+K3?Ckt^XXpEN_WNcxGnwZ+=ef_h%Q^RbT~A3cGSH-@x<N%o
zMn<cxrEWq-Mh+n(yMVhwPU^X(@B>9UTzaaauR=yvpGtjVM?pI0wAV7xCnF2yBO`nM
zf{g5l)b;!q8JV9L8QI!nGBUXwGBOtLf>vWN>4KM$o|(qk+1bv{&fMJG@$qqAUmqL}
zCtb<N$Y^S6Qdd`BTU#3)9R-0vXVX=Yk&&#dtd^FRo}Qi)6B95Ptfi#|p6s#nwPda<
z;}}~$+L)9PZl^9M66tB)R~fm6&Szx=SnA0&<p+4$Xo2{FtE;Q=D0v-{f(uV{HPy-D
zDQ;<zPOf@uS^AKXvF4xuFKGKkXOcQ8eYN#9D3>lW(O=_DP;@jSBNJNJR#!0#oZ2Y7
z8uDJQv6+n}nz2W=SWVL-gm~t4@=YhF(XUHrisxts`=c}dDxc+y{*^1F*!-{fZDxzD
zAw$`pW%3xixABq5+}Ba4SiUlLy8p~+`{@_z0t(rYA*%As;z^$+N?XfoOxoOa@u-tP
zFnzO{AK`Ji)WfVY-mX_JJ$28DKzfFZ8bJc9z7hR(HD7HKGlekBnN08uhcQNNR$dq2
z%VJI-C+xAJ?6CBb)vwNqpG=M-ZQ5L;Xp;;rwVdTEjCDpCjI?YXA2i+gFnofQCS^kO
z8iUB$=~>ib+K1!E2Dm3_(C}gkmHSOMb*ea>t7s?F`}=P2>Z&E?G)hOttKoSjKhTy6
ziTF%{>h!JwzC7FSzR4#Q*v@RBuTGJQ6PoSPdaA}GinH_;z{0pk1uqgj^W5Llu1-HG
zM7TPA4xR}E0~6W+PC7B8TG>5?0K5zchlriO@I+9-5PR+PEn2!<h7X&=4Fl>QDFZ#B
zUa^7CQ`!K_z7GW`A)|BZVP2os9Msfy>bOZl-{h#~f4uY7`&w)L?xAHdGoB1^ZAbwL
zt#-J=LVIfdVa})j4dnD_j!)`|2o_c<Y(H_yoM{NRxro=A2@|V;X6Z9M&=-887Q`Mi
zlT|O)t-1tWUaWug1eaJLcvN-p0oL2hHOZr4@hNlahgu)`CDQD89%$v$`P*!)&1I!p
z7?b<ecN`y-mftZDI&97J(KA77J7lS!<m$z|#C&3W#1`~jt3h<(!!c<s12~58+ZARb
zJq?U5*PbR9{8#`A8n8=du;RNk%+k^v(z)Z^GHh*$Qhe;DJC5#84WELQ|AuNga2_O@
z0IWw|6Vh&*e^S6--la_slo=S<=G3?*%GeZEA;rUPvSaaJqEQ+A@k&$>y&YjI=wrH*
z;Zsgv07X>L4Ozg2=`)>=akdRB^Vd_4FLr~|oYROaS^6$;C!50;fivt^%B@MVm!e$K
zG=SW|02)#wvr^^{HU7ixYkEmJKHeeWT>rX0e<>hqmmxVGbu03*l|y^5!(X8p!1AKe
z1T|p6E+lpL%6{~r(fB56F}u*Yz2|pN#~)jysm^f>TTAhYPdm+W2W&Hcg0H?9&+6*E
zU0av_tA%!aDXlD_;PuFE4-+ElQ1Y)3FlWgPTq)DC;yPN)j?Ox{vAgoAtqi}q)*6-F
z-Oe&f-VmCMI(YN@%S@-Zp+<v%zP=K}{UF4g=CbsV)&~67-7jk)PqTum^Ka|O&Yg{?
zXA;zBqxP^fktQa8&NlZPZ7vgRv;aff(!e(m6)vSVOLSs;`T$1s7w(>SS<faq!FKml
zvs63LlN@=%nZh13#{d4K=IgV!8Rgsd;D(RkYaFBIavGxp&ZGV}Q|BGUwqi2#g6q!~
z9Yq(#(}dmgWIXd=n$Psv)Q{+D?x;>0Ne?ioAKed7OAIH6+G1eU-)B0r{1(1_)s6#m
zTl7q4%zap6bh#=0d6WCxviNY~hta`68p@Q&8QU@hd`^2uRCDRo!>ab=h8-h>cnq3p
z;A5&P(IPg8B5S)sbLv+dL}!F`z@$2YVq8u>`WML{R?$V3WTbRi`n}r2d24;YymF}K
zg58P|%uVm>w54{BjNw@G*R@~bO913hp$@%Hax<ObNC?}I{HMV{z1Wyhrd6P}Uynf8
zJ(1x^h<&2++~W!GMg7Y#9X2G-p%;`Q5_$MYZpY4Z;a|GMvxE_Ov8j2j-T6ja`;bWi
zi=DyCuCmKiL|xFtE<_sFs`1w*@vOISaP~91w-Z-xuMRi>JI$Y!qJpxRdGpJ2Je4s<
zb(uf5av=B1^#Igd*sN%_&t0hWwm{ZYj5ZEE)Fb@hb{QY7se(F>j_#=*Yc=z>R<4=i
z%N`dhX?Nkp)a~k$aw%w5C=c_%lNQc~RDF6U+8EgSKsOEHiPwt=uLI^(#21BNW}6Wd
zCAD<HN;+IfYD86+K{B_y&OT9FOIJ3*+#3sTrKWNVLuvcGeC3*k6D%ITe*xF(B-Ve&
z=Tc}(AX93QGt^6qz-SQ0W%gaET~gp!S9)74eyKrbO82^c-FvNVlV+pXWmh5X8+$>j
zgqZ+>*q4v()P-Tp?MmIhhoT@UedH$$hEbMI{W5z-&Fr*zvWlN-VuLkP$1bt&XPp5+
zfH(IhCE??pw86D4Hm~Fj<}(Yh`&K=0K3oiCFM@XDK>J;`xkUIV>e%)br~v#a6?)aF
zCIU&0bSFoC{`lrGUAW+MGO2W${t?c>mwIw$%j3z;5+b@G<6R2;!W+mFlN_1!-xWn0
zfZY~e{xYTY?VJ0H&qjutFl~>EtWr}lysl^70vias_!5`<D%>?rS-zdMswBQzACC(_
za-@bV>zEue3^A-f+nep6lhRA5meP4^y8CkI9^YE=zUFmgk14z>JMiENPVNC<LX<XO
ze^jZ;t&Of)Z)|RBa{_*aUE(T(Ymd04Sb6-YcN)=fKOQfY*y{DN>T9aFp;@b#Z850t
zPi0#`4$<xfQuO=n{$5Q^Y<+O0oiQerumji|jTQ1)sqH&~&7)>JVi{Mx)dphATTF7x
zofC+~L1Iu{N73xgY#pO@vs|DEbJj`D-NKpsyb}8qA!YwqC4J|UKEt{#V+?c)VYE5R
zgaK?M76<I#Fm>qtg>cgR<X}(5%##$Q6K)SAyb4AJj%_D(a?=e&sebN|Ik_4vcj9mw
zxiEhlw(hL($$pfgsdf@I+|f3*xoNZdwJoIS2-Of9(@D@Sc5hAAsT-Sp0y4q)mXs2)
zEAQp9tZ(EFD}%P4Lr`nRex~?sKFKaGk18V_rlZU1vzb34A!CfEzBcdqu{k|6tHPPn
z=FBr;yA3fhGJVDCW+D*=8h43`%+R1q=h2l8%J?<S)gTtrnQybl5wO2znscm6wedq7
zD;2Y+X3VOy<q#W#{J>XR*=nZ3Q-K7eA~~}G$_V>iqeTf0Ukn%4p)z}8ZuTv3Y0VVZ
z%Ytc~&vDoLdYESPYOr-PM|Y$sM>{#Pl66yWTon{?ZgR;dN5!A%(L4&$-EG12{sH^d
z0^kpA*qkWrL@)xvVU<jRz77t3b9A=_zlEUWK2%A(P$-2EL#+8i`FOM)HpqUylrKr9
zdgk-*?^Lwb>~^%>sD5KbGD4fVnr&-Sy6oW}XT&3%NFCi?`mSKGlEm6(Ma9mv88NvM
zQ~K(|nUDZL7ilTSQNXJmu<}syQP60N`93{>BNC#y{`Jk%`K3)8JI<E9dvUf4luRXx
z9uqtXx$_xBH?8ZL%lC3>fj^Tmv5Xy%5-{j-X3}v>Xu8lii!nyCz1)iCUcH&j$PyT~
zer2pJWOcp}*=A9&-{$MQ`H87dyFDPFPF&-f&(;KoJS}O8S^r5Ph287>E#&K)e9Zi_
z#}e9~ywaz`SFI!PEhQ_^n`l4Nz?b_YM8|^0E{ga#1x~DoAki;3y%ir#a~%+(j9_Qb
zz(m+QwyvC3+`EB)M~Q3oxdzDhh}fl_qQfp;X25$>qy1Jmfp@2gYwV4;5GY!{g?VP!
z`ezled-LhUJ!g4HD!X1^uLvi$FgozN`|(?^KXS>Pki4_&dinT=B3S+~D0ucZs#gSg
zcSN+V&TC6}_Sf>6o6ksh0b6Qn_J)7Ubc2IFirBdk@h?F^<lQ=T>%$29J}Ztxp99%N
z`Ayjz!UEo4=ji?IBRTCGXg{54+puVst^MEMHVe;!-e89#_N=^M>zDE0uiqn%xxPI+
ztdN4OXEt{oT^9YZm3_)FjIG<1xdZ8NZ}Ks03<;hKkMP{N1q&e6W&Fd{3*-R0Vn;I~
zayqQ8N$R9!Q$LlnsLT17x7g#=^sJSyl`Gr<gNB>B^T0-I=lFpyW_`erbT`f7vHSPV
zB1ZiWRd<HAI&Hh=a9eBJ`LDKM>r?oHQfzoy%vAXGID7rN=@!wBWQ*WOo9fD&)17n9
z`=$u5BMHG*ecFoG0NFThtJBPNzhkZl^RA`YB=v3GJM9PZn{p(d>$#7JN8?<9<)X$f
z?z-K|U?EO7r`OAgiX4vIihI0!s)hgrhoo~Ix8cCXwZ|!OYEYMGx}~XK@%Z9l{gtAc
zILN&O)X_#h<xJg3Fa))wq<)hvqRS2LP_W;<6>!vB#{m_IV;q#czp;N45QjTzS_$6T
za_H8yn>!6g`@Ibe4GqV9@Wk$e*Hwz^Ky+=Jy(FmiS4&)C8)GMp$i@Y4q$2O;s~`-4
zt8Nbrmq)m8t<S*zQ)Rm)5c%M-X9p)-*kjIYZ9E+^_MgSDJ(}!k+3*p?27?FgkNAxR
zg1cBtije(EyS^u=V95>S(Pa5j_q#tHu=Oyl3rM|xRyto~qsXzmMQMZ=1+iumR@|~V
zT9<wad7IRQD~gsC25!xowZG(eAW>33>B$f67)!AbkyQ?ERx@-)<FoXDdnicys$)wm
zMD3sX47aRHy@0J=7GqU`!HecwrpSZmhZN99^1x8DChPDpH`D=oeIrCB0&Z*Ta{NB)
zOHS26ZEomXheubdZ$7C1GL-f_M{GdYv9%;}hw|tEdBWM4D~9Xj_qQDEFqV!#tqzzv
zT%l*>p51z8we@S)hp<y(IG2yzWe<zM%ztx2&W6`I-l{vY!c_3}6Ik3(f<JcUNqK}M
zJ`bV$ALiH>{|~@>Iq@h#9-&p1fgtOnfOE&1dzY-lHWnU%V>Ry;hURZcoFF6b^P<w%
zH1FlcWB_}lDmFM2aspBU@Q+1l_?3>~Q&?Daqj};`FC5@}R95j;{_RFxr^@uaoaA4r
z?;)edFPv}oZtR;q6V=J?ap&9w3g1de6{lW&XJ}=BOGFWcJ2z#nLuX}w^xJ0&d*aga
zd8MCSbD~{cnY&yMu9Ms<9>mmLU#*Zz7Hp-(Z9A81@B+w0c<p|CIa@0$;NM{8CR`a2
zl8o*vD-r5)omL7K+q${6Aij!Zeh-Pi_?w~aBO->W2?-u`hY33+eF(T=vcrxRU{GLa
zu9z4ZuU$JE)0Q>-E`n}=SJ?;z@r4d7e%0X9wQxVy1S}UWk^*6mP`-rm@qZzbLC5>F
z7<b0$GA_V!oMvF^3)Nr(P!H<So7dbNWTNC-g=ykkmnQ$5DDZY=L`!K{++<Tl$fMc<
zX&PeMZ(8V^_F26bNTmN(zWMZk4tK+pRX#{2XVurZPfaaIs}0;msDNaJe-HWWy2swW
zK*LR{qwPtkA1^*d`Hqyz%`uAl5f~_;q$nUnCn%k3E!q7r5d)5$HbsY?jZqK_+U1R_
zZ@h{3fqcLas9-Ji+3iyk#w_s5^;i!j+?uK0Jy;r#C{Yce6LESGI?I>Ca-FZKnsB?9
zkeR!dX|NLJ1E1OfI1)g>^NlQ=5iO{3%44GnJrd4#p>Y|1!i^PW|KhfIwuXFfP{jzH
z<<7~5`9{NnBm4?|^_4@o^3Y6}CUV+UpIpCf>t#1m;P^Z<^_fumrqB=;<7-S{bsEPi
zoD?s<`=P28Ba@zq-n}o-QdnH9#>^l*7@2uJ;kd?#Pl+4tm-N_%4AuDP624ScW<;r7
zKNaIh{CMB0(>|9243zrp{LAtnW@rI_fDpq%9Fp&7NKk$^tG?UuN{3HyZ+SZ*W9cRW
zbeV#GA}PxzzE9~h)`3KPHN-~vH|5JsR@th=+{|uJI;xll2v%M-wzTl8nuAwWm)Q%o
zjR_<*{x(6KXy@<b4okz2q+O?_I#geGB*oo$C?KL<&KxZBh(8z<ZG9)(&aa@d;Pyv-
z^R?I;CONNJnVS~G1B>{meWbH^q=(<K=-g6(H{=FbbmXu=3k@41%8l!pna+m6nYwhJ
zcIQSjZ&V+284;m-w=p2;vk4FOf!3Bq<D;m&S4ue?tCaC^4ax|D_I^r}D%%N4gu_`g
zq_^jV8S4w3;v#o??fNg|J*DfZtv<y%AVL)%YHGRZ)wrX0MCAb)-kTZir>IjLa@AB)
zIA@*fb%Eeyw(e?BZH%Hql`j<=0tZzsPy6~rN`XF!Zb?VOvai>R6MN`jl~F0LP*n*d
zzr_5@2*fsq>3dK;7B?H|-;SnQYpdooss55L3lsJXG|IKL8z5d-n7;#C4}~fSGHDu{
zr9$y`J0+0P%YYm5jth9o`_Co-+>SrQhm(q-82nN9OT~qGc9OVG7`{o2?(7o&wt*x1
zJ2XlU0K7fx(==-`S*U1m6pomfom3ey-%np#M_vbbMM7ANOz+jZfoi^@Oe`_s<KP0f
zgNNF3Ra%B24oBGQ=I-IPnf@Kw1)HPA>jaytyIw4EsRfFILt0QGfD_0CL|jCIIBVkb
z?kqmDW*_`&+)dtAo(a#?Of-MwGI!&FQQ9F=Eu(qX(bK%JBIbmsQt>&#MB~J=bnf!_
zxQl55WSlSQ)v|39{tDik3GH|F*@M;m*LQaE3+5Q#3sj>uk6zT<*e=!6?9qR#%37>=
zhuCp6wad0b3|E;TZIrucW$cVWJI{^7A6kzHERx8-F(($To>8?8^_prwaTEmd5eH5g
zdgEF=avVAbnl1tu0F;rCfRtmrccHAunRj<%Yu%abCaTwOx$(4@Vr;FGNTN_a(<25y
zZj(q9s4F_i<60_m<0=1E8i{9R&vh4QO!388@NfeO&mjSfLzd`a5CEa24qm>1+!V-Z
z&>H3n6vtEJb4dK`&EwD9x5_^xHmZXWq~th1Ei~Y=uuKRXx(qF38lo&6;ND-fn<&hu
zx`^D&;|@>)KA1&TaRLOgCmzne>agQ#(vA-tqJYjCi#<&<7*4#NDoplQ%P*Yk8Xh9C
zAPzwLX_8E;9=75HV#)s{2+WB&S8zTEKUhn5JvB%fAy2j3qyk=UE-q}>yh~z&;gp1_
z|GuB?cjHkrdkq=;=*u2)gIkYn*!%1~k0&u94(H$%AXZ1aAl`EN8>jG|ghJc$IoQki
zoau1?ZQ$hV4evs>AsLqm4$9x6cn~n<0un^$RLOC_f~e>e5o?S0CRuGM*4r+|(kdiu
zcgzk>jLT3)P)Avo_x~Z#I0{1}o{Rq74W44Ur<wHVB{JB0%{j~MielTy=Y>uZrHT6W
zvkPa5apnP@LTK`VES9)74%~Y-z}w9kGZ<y2@Rgpj+3Z259nY5lyf+)#FI;S~%67o&
zmuD*f-{dtGTP!2p>w0ivEzh~KPZaV?cvV;GWbwQOc!nH0e3kDQo?^jI6K!fdHr!_%
z3jIX*ew-)&GR|HZk=qUCGqSW$*U#UoOTK+-82o)nzxn1WUA+KmoH94!%PeE9&Y56C
z=x}}Qj5ZkZ7X^6PN1YO#=Ma7FT*hlTkY{QhMGcP+VMm+<fJyb36chj1#3BY;R$;M_
zvS3Gn_RDw-s{DF-Mc!%Cf;q3m=B|55#@jo`<-adS%QkHrst)JKNhG}Qt=od=V0`b3
z@H*w6zT1v-Gl^t|o-)DK`MPTaC8^;*wisk5AgFGfMDXD~11ccdz%9>1Q=oczJ+4Oo
z(`!Q!0#W~4m$V}c8}m}$Txy_+wYjQAJk+z(#71^nR<dB90U)o8$iLK3x=MT3RH#&=
zJ0#T7Xk6)`L4uZkp4kEyhIUGLwNbUSanQ-6qKunZ`TIu-)I3vNd#o=SQspj~u{ApF
zp2stgFZ3f@_(swnW&AtxbC9rS8&77;qWRKCE`5tR_J~u^ld4LV<4J!OZZzZ8(WDQb
z-;Y=cotn0mXb3On0ZT%f@XOK~JMy?KD(;QU&`y4A*_BiVfUYva`jTX+4iUqcQSYkJ
zC*c3s5Y%u?ko$4gPi-@ba>=FnUG4oxxi78k7>b{MQj%JIw31<#;2WybnbgL=IcY_Z
z#l63TYt<IhcH@S+B*=xM;ea6to!0W9qi%<rOllTQ&PT#3bU2~@?&jZ5so77nFb9v)
z6c2>BvZXn(`calNrhlxsXMHrjIy_`09F6MHG~bEJ)$nqWp)^k{N|)o)ZaUBa8Dz4I
zYUJ4?#%aj)N0?Tx_Da;^d`?wpkVMi=U&iBdvmHNrWbAFR$%Y0#`l}|25~I;5ok6Bm
z!#74u!2&FUjIm`YJ0%k`cierwzGd@(K1{FkNi}|%ecq@UVZfIZ1&N?J_hB-bk9yZP
zQ240qSxs2RBI3oAm>CpiK6s=a04h`U@zgaQK3u3V9DekWo+r6|z&cDK$;TA7ZmD63
z#q`s~1=ATrh4;h(Lw1nCH^WBB(8AD!Pp5sobg=ci-FJ>{&7EepGHa|fpvwKXlBlYQ
z5YxUl&9OqVOd7OI<G3R#+G-h}v5MuTMAvUs<6eiatDQ|&y8^W;sU{7Sn2Hn7?Zf3?
zZ<U1FU`nhJ4LeZY_gQgCaZH@p+Nj*8Iuk(wI)c^a)hvX}<NUJP<6V#1ek04=s&Set
z8CHUhMTJ6Dy^x8k9E$J~4CI>!t3x8gvR_Ftb&)_r(+Bf;T-!ps$dKg2H+dnw%^RQ@
z{jr@sp&4`Vou4@nguV2Gh?I?QdkR3<dP-D~hkHx&7T#M2q?_f_3H)ftnN7J%7Jd;4
zuB80KfL>BjcwVdw0<2v)xdaU|0{rBM4Mqo(DMdnn<Kzcdaoex||32h<Q~}DL`t&WS
zbo*67KVy5Dn<pY6bmYiol^vWEf|BmsZEI!3Ip)><jZyP+$T=X|WDBi?D!5-j{zVax
z`RBvn?-!xN9!l~-kE#m@yYDD)dFKGoY9_m~ZbyxlK1YXtRKVQMp<~@-sh}>7^!(u*
zSFrE_S=;}oim+?O_gCPlrM)r2|6$jr;rey>EtH1Q@sf6?$z%kaV<2K9@jo0o9t$oz
z-8vb?447nP1W-bEW%ovX)ph9qlrx|9NJswL)qNBc-c?($r>lfZ2fTT;HYedvJt?wA
z_8;CCK@zH`DDsVsOQJXpvIfXSEULzz43!KgRV5(f1`@e5KzB@yYv0G3F$*wNhDVMq
z#pv*m*TwHFKW!}b2;<CdkO5ir-4`h3d93*m@&0upWrX-(mUO!!2Q<gCwp2o(%7wc0
zAq}qYJ_yvc#u8OfSzT#YQR$qodROVqsrx!PvY`JN(sdF)SWa)s9i-jYQou4LbES$E
z>Wt)Gcv0(s>6eG6cJEv<8JH98OD@xLxT0-_>RxE+#;tBVwdO!C-SHfN54qd1npqXU
zn*9n*0IRTOofv$A($dPh8rb+KP1MF~u`xxY=|+6Y6%Jxq>&6xAHPixdQ099}Yw>~c
z6*<)zZ^yWQ@k=potW@@WkTC^cl!Eqmi$a&~tMor{v3z>@d$S^J&~hg8EeG@c`=H!b
zF=*2)r4nx7O=W(pXr=bp5L%jNDqC23{mn--74r85<edTYUE$q#@6wE)+&tyRiWzdG
zzhotx!GxuB%tw8ykXg|X67AaKle!@*K(!9O3vJqd5L~nftxt6-6$8@8bU^AYD|s{o
zG_7ERcVUA?kWex6!84~<x3%I&Yg`u2U`Zim+#W*V$-^kQH<?Ornr)pOwr|Hk_97w7
zhTPeHYWl(q3|W~a4uZI{tq61zOwx|mjWsvb9wQL6SZ)o|mCdt)<{(4SjsUb{6A(|_
zjj}r#xw+JpQSdHn>_5+w1wG^|1vm6W6N!KQD|1Lau@z1thQs)6I;1D;pKK@)#;#Aj
zlaobZZm5E=&mm7?&6s(0G!nXT1RplBZh&|O9{TL>R$&p@gDuG?4d1-Jy=9pavFw|u
zAjVnLD$Xf#Vt@Zj!^u_X)i(pZtJO(M!ND1dF50%>?KfMq4<fb=cJ!yE>zD}#yNg*$
z$AL%O!Mxh7xRKR@#irs&YJ%_?YY6js)jPR5yW8F0SEm~nGxI2H%(dzC2e|f^AE9yb
z>+zUmM0*f?LjXHw_3B5^>6oh~M|+~;$m$2WB`QKx?Azi;WTZNhs$};8Qk>W*f9${d
z;<9L+OG}2$ufn3;Hqi$wh1rVOIA~ZJ&5h#2tPn&)r<WnfgINWHJ+Csv7sjB&u%ync
zk4R`(fGOV43K$$j?`IQ0lS&Qu3JzXt%tOO(g&zH6)Gb`3M-J4i=_1@)7B8WBF&_=9
z@3fxe=yKR;r#4DLH*5l3{M12~UGp|>L7TXegsr|iQ4oar=RMEg0vd{HhzbAWjz<Y{
zTeErD;fNlYoo|D?qqH{zz*1magvO(dY^zDnW(7``YkRRV<9zc4ld7-Qg7oW3Q)qUJ
zvW0;Mn9+N+MXUOv@er@lteP`i|L!4lLdpLrdRJ4%RyjKdJyDEr-N|voAV~MqOJh#D
zZ7YREcG+-Id|dP3{#NawcSri}mRDbNFEx&8p-cfb=r3<`b!Gq23iU7gLWsUXk#47N
zGfdH0xFWS>2j6HF&Jfl`K)qdh)oj~NB;<D%W0&<<yAU>kr-P_54XF4r>3z;dAaB83
zK%wYs-Lk2gj*$2wL2U42N5u0rA{!`fYUgP@mfZ#@n!fhU%b?5CRmkGqO7{EN6pNn2
zFzn&QR`uY=4}M||4T8HeH~ib_KX0{tX8FP!ei#Ecglf|=`;jA!l$HMIf$YQ=H)jpz
zPde8_FWf?@AvZ6ttS^(Al+a1{kZImO9nVupZ<(4qHs}v*lu?n8yVF1XuZD`gUQ`C<
zdw8{7>SX!yj5B+<x#iKtIqF|ButBHkX;9=Ah3`$YW5b3=_Q(B;a&)-&lY7m&KSTbH
zo3=nH?9Zvjch3^*e@C#smJSDHU7^16@%rD@<s{Se4xjVud`@<=`|rwK^uF|W<NRNN
z|0E76YM<@fVm$x7qfpIqutB>5(vG~QAUmvp!XCED+WvoQrxJODMhtEgr>%fFvlR;J
z^4%!(MpOO-rkQtT3c}x;rr)<O5++Cfo)4Xc2)|y7SY*+Ez~@?rUK4wE>ud)0IfG9H
z_EhECK#x8NHN87M5lluKmJ?q$nV>@1?I5awh-ss)*HP>j|C!;Z8A7HmF%R&{Of9jk
zI9)R`NqLaT+>epwVwTw-HyaBLP}~C{>&9Hn?K9&y=_cDgXO8c)gs!GncMjOG;@-y*
z{CsMkGa$FijVYwzFNqIRDodi`c}rBy+Z$PNiE#R>=6)JkW>pJ2e1MS3rBM__#Q3Wb
z)Asd;Z(FX}Jp6R}55K@t$)vq~T=zY<S(wNCdY64gya|vx(WwxEEOY70(t{GjAAVmk
zUXDl;PN(f-1908$i1|(8H5C8w8j_R79!}KJFjq_CG(iWR%M^}`ms<y~_OofEo2=UC
zWh6<L>mG4g=ma~X^AjqsBBZ0Q{5t?*)&h&*eksOvRaY;)H0smp*uEHFe>)L16%h+@
zGj|~i{oqfqehDiT(A=~0aC+ZlF24_g52zu{&IS_RZ0>juX*#%Axgp1m>cfz}XWy>j
zS{eSX+s&m>O>29uVF7W_{?d(YUJ1hWlWYjJM}=OvGUE8-VJq^%M`HvmFNdj;L6Aq(
z-2CRR`q?_@s&?{+(J>C!%A8kkCAj;8PgOa5F!Z?XTclcRb%DPjMp_#DIfGbUe)p=x
zX|vp@PmH?XEaS;k7^2dkRlO-U(ZX!07IH*-I7R+<L<N5uON}bL;^2ltv#Gj=uR4OW
z?qDp(@wejB=}enDLEZ70jN=Y=q{mE`Nb>#{dQS=MjwiMWZ+;d-&ZYuM#OJ>$@;>E|
z!6MQ(8>;`5?*hrC8%23dU}@B#JWg)vpYpu|LE2zrCor-G->yNCu>=a=ghqY33bW5y
zZdg%|S5tXQ(B8q~wmQQO{|-o}2d=m&yuqg9izkK~p}4Em?z(%pcLCY@4_MF(rmmQE
z-ir>ZhZuT|@{6n{X@>7~Os^UKNa%EFTb_6V><Vw+e#&8Jt6s;WoqZx(=+nL@*=bi)
zWMg0P>~GNorTMc^8J@KIK19z>+^c;sy)`_RXmign|5k^R0s)n8F_i*UsC8T|#46SL
z?{vTYMJPyFH$sSs{&L!9ICT3rzV?Qyr}<Y5feOF;T$0}KAEoXXYY5-(Uu!3ft{fR5
zMAKCkCA**_e&6;tFz{h+bj-z8v;YgH>GlHle~Q{<kxD(Ux2!qyqwpbk>bo+{DK@0P
zXnUg^cXrL3cU==WVEfDO=L`*3pr>{hx|bLaX^;=5H}?H`*V+TzY>tpri(jlovAXxX
zXYgPCgd&;!nrvPDEFEw4H7ooK{!L3=rQajl5OZgOkY}TDi)~Fo)B^k`1MX#|FqUs~
z4f#V|*zydMv`$#6j`*DpqyB(WM-X3P;V6i%zs5+9Xz^X^DTS>anzlP(2}+HRIwQP>
z<JPlkA|h@BZ`yELT;tz}gfvfIf6n0xmEH{m9ga?_9=)zDQ2a598go6=-#PuvQTNJv
zDja__;&l{mlP_s^(i%L!E6QQWg6aQ#40mt2O{S#PWSO*DExBX`s2O51npNsuGRh~?
z;DdVy7d1>~kA_#V`ua;$dnYD;HI-31QG}o3%Jy|9JvGU~{|tIbho$SfzN=(MQ=EL1
zM84>+EjqaP(Hw6CB0C|2Uhuxn$wGGT;{Vo0&mQ47zAwbP<@*Cl=!O5cKQQ%HASwmA
z;Sk+dc63Q|^ZIH$^mVZFb(FJ*I+6}#5@HhKB4QFE65?j!GIHW~<s|P3i;2mJiJAW_
harmzWcz8LuI0gOZ19&>vzMosKtzn>!P<<TrUjU3;H3$Fz

diff --git a/rust/ballista/docs/integration-testing.md b/rust/ballista/docs/integration-testing.md
deleted file mode 100644
index 2a979b6ec34..00000000000
--- a/rust/ballista/docs/integration-testing.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Integration Testing
-
-Ballista has a [benchmark crate](https://github.com/ballista-compute/ballista/tree/main/rust/benchmarks/tpch) which is
-derived from TPC-H and this is currently the main form of integration testing. 
-
-The following command can be used to run the integration tests.
-
-```bash
-./dev/integration-tests.sh
-```
-
-Please refer to the
-[benchmark documentation](https://github.com/ballista-compute/ballista/blob/main/rust/benchmarks/tpch/README.md)
-for more information.
diff --git a/rust/ballista/docs/release-process.md b/rust/ballista/docs/release-process.md
deleted file mode 100644
index c6c45c3cf17..00000000000
--- a/rust/ballista/docs/release-process.md
+++ /dev/null
@@ -1,68 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Release Process
-
-These instructions are for project maintainers wishing to create public releases of Ballista.
-
-- Create a `release-0.4` branch or merge latest from `main` into an existing `release-0.4` branch.
-- Update version numbers using `./dev/bump-version.sh`
-- Run integration tests with `./dev/integration-tests.sh`
-- Push changes
-- Create `v0.4.x` release tag from the `release-0.4` branch
-- Publish Docker images
-- Publish crate if possible (if we're using a published version of Arrow)
-
-## Publishing Java artifacts to Maven Central
-
-The JVM artifacts are published to Maven central by uploading to sonatype. You will need to set the environment 
-variables `SONATYPE_USERNAME` and `SONATYPE_PASSWORD` to the correct values for your account and you will also need 
-verified GPG keys available for signing the artifacts (instructions tbd).
-
-Run the follow commands to publish the artifacts to a sonatype staging repository.
-
-```bash
-./dev/publish-jvm.sh
-```
-
-## Publishing Rust Artifacts
-
-Run the following script to publish the Rust crate to crates.io.
-
-```
-./dev/publish-rust.sh
-```
-
-## Publishing Docker Images
-
-Run the following script to publish the executor Docker images to Docker Hub.
-
-```
-./dev/publish-docker-images.sh
-```
-
-## GPG Notes
-
-Refer to [this article](https://help.github.com/en/github/authenticating-to-github/generating-a-new-gpg-key) for 
-instructions on setting up GPG keys. Some useful commands are:
-
-```bash
-gpg --full-generate-key
-gpg --export-secret-keys > ~/.gnupg/secring.gpg
-gpg --key-server keys.openpgp.org --send-keys KEYID
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/rust-docker.md b/rust/ballista/docs/rust-docker.md
deleted file mode 100644
index 0b94a1499a0..00000000000
--- a/rust/ballista/docs/rust-docker.md
+++ /dev/null
@@ -1,66 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-### How to build rust's docker image
-
-To build the docker image in development, use
-
-```
-docker build -f docker/rust.dockerfile -t ballistacompute/ballista-rust:latest .
-```
-
-This uses a multi-stage build, on which the build stage is called `builder`.
-Our github has this target cached, that we use to speed-up the build time:
-
-```
-export BUILDER_IMAGE=docker.pkg.github.com/ballista-compute/ballista/ballista-rust-builder:main
-
-docker login docker.pkg.github.com -u ... -p ...  # a personal access token to read from the read:packages
-docker pull $BUILDER_IMAGE
-
-docker build --cache-from $BUILDER_IMAGE -f docker/rust.dockerfile -t ballista:latest .
-```
-
-will build the image by re-using a cached image.
-
-### Docker images for development
-
-This project often requires testing on kubernetes. For this reason, we have a github workflow to push images to 
-github's registry, both from this repo and its forks.
-
-The basic principle is that every push to a git reference builds and publishes a docker image.
-Specifically, given a branch or tag `${REF}`,
-
-* `docker.pkg.github.com/ballista-compute/ballista/ballista-rust:${REF}` is the latest image from $REF
-* `docker.pkg.github.com/${USER}/ballista/ballista-rust:${REF}` is the latest image from $REF on your fork
-
-To pull them from a kubernetes cluster or your computer, you need to have a personal access token with scope `read:packages`,
-and login to the registry `docker.pkg.github.com`.
-
-The builder image - the large image with all the cargo caches - is available on the same registry as described above, and is also
-available in all forks and for all references.
-
-Please refer to the [rust workflow](.github/workflows/rust.yaml) and [rust dockerfile](docker/rust.dockerfile) for details on how we build and publish these images.
-
-### Get the binary
-
-If you do not aim to run this in docker but any linux-based machine, you can get the latest binary from a docker image on the registry: the binary is statically linked and thus runs on any linux-based machine. You can get it using
-
-```
-id=$(docker create $BUILDER_IMAGE) && docker cp $id:/executor executor && docker rm -v $id
-```
diff --git a/rust/ballista/docs/user-guide/.gitignore b/rust/ballista/docs/user-guide/.gitignore
deleted file mode 100644
index e662f99e328..00000000000
--- a/rust/ballista/docs/user-guide/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-ballista-book.tgz
-book
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/README.md b/rust/ballista/docs/user-guide/README.md
deleted file mode 100644
index 9ee3e90fcf6..00000000000
--- a/rust/ballista/docs/user-guide/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Ballista User Guide Source
-
-This directory contains the sources for the user guide that is published at https://ballistacompute.org/docs/.
-
-## Generate HTML
-
-```bash
-cargo install mdbook
-mdbook build
-```
-
-## Deploy User Guide to Web Site
-
-Requires ssh certificate to be available.
-
-```bash
-./deploy.sh
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/book.toml b/rust/ballista/docs/user-guide/book.toml
deleted file mode 100644
index cf1653d7455..00000000000
--- a/rust/ballista/docs/user-guide/book.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[book]
-authors = ["Andy Grove"]
-language = "en"
-multilingual = false
-src = "src"
-title = "Ballista User Guide"
diff --git a/rust/ballista/docs/user-guide/src/SUMMARY.md b/rust/ballista/docs/user-guide/src/SUMMARY.md
deleted file mode 100644
index c8fc2c8bd6a..00000000000
--- a/rust/ballista/docs/user-guide/src/SUMMARY.md
+++ /dev/null
@@ -1,30 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Summary
-
-- [Introduction](introduction.md)
-- [Create a Ballista Cluster](deployment.md)
-  - [Docker](standalone.md)
-  - [Docker Compose](docker-compose.md)
-  - [Kubernetes](kubernetes.md)
-  - [Ballista Configuration](configuration.md)
-- [Clients](clients.md)
-  - [Rust](client-rust.md)
-  - [Python](client-python.md)
-- [Frequently Asked Questions](faq.md)
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/client-rust.md b/rust/ballista/docs/user-guide/src/client-rust.md
deleted file mode 100644
index 048c10fc926..00000000000
--- a/rust/ballista/docs/user-guide/src/client-rust.md
+++ /dev/null
@@ -1,22 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-## Ballista Rust Client
-
-The Rust client supports a `DataFrame` API as well as SQL. See the 
-[TPC-H Benchmark Client](https://github.com/ballista-compute/ballista/tree/main/rust/benchmarks/tpch) for an example.
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/clients.md b/rust/ballista/docs/user-guide/src/clients.md
deleted file mode 100644
index 1e223dd8eb0..00000000000
--- a/rust/ballista/docs/user-guide/src/clients.md
+++ /dev/null
@@ -1,22 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-## Clients
-
-- [Rust](client-rust.md)
-- [Python](client-python.md)
diff --git a/rust/ballista/docs/user-guide/src/configuration.md b/rust/ballista/docs/user-guide/src/configuration.md
deleted file mode 100644
index 52b05b0e916..00000000000
--- a/rust/ballista/docs/user-guide/src/configuration.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Configuration 
-The rust executor and scheduler can be configured using toml files, environment variables and command line arguments. The specification for config options can be found in `rust/ballista/src/bin/[executor|scheduler]_config_spec.toml`. 
-
-Those files fully define Ballista's configuration. If there is a discrepancy between this documentation and the files, assume those files are correct.
-
-To get a list of command line arguments, run the binary with `--help`
-
-There is an example config file at `ballista/rust/ballista/examples/example_executor_config.toml`
-
-The order of precedence for arguments is: default config file < environment variables < specified config file < command line arguments. 
-
-The executor and scheduler will look for the default config file at `/etc/ballista/[executor|scheduler].toml` To specify a config file use the `--config-file` argument. 
-
-Environment variables are prefixed by `BALLISTA_EXECUTOR` or `BALLISTA_SCHEDULER` for the executor and scheduler respectively. Hyphens in command line arguments become underscores. For example, the `--scheduler-host` argument for the executor becomes `BALLISTA_EXECUTOR_SCHEDULER_HOST`
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/deployment.md b/rust/ballista/docs/user-guide/src/deployment.md
deleted file mode 100644
index 2432f2bebb1..00000000000
--- a/rust/ballista/docs/user-guide/src/deployment.md
+++ /dev/null
@@ -1,26 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Deployment
-
-Ballista is packaged as Docker images. Refer to the following guides to create a Ballista cluster:
-
-- [Create a cluster using Docker](standalone.md)
-- [Create a cluster using Docker Compose](docker-compose.md)
-- [Create a cluster using Kubernetes](kubernetes.md)
-
diff --git a/rust/ballista/docs/user-guide/src/docker-compose.md b/rust/ballista/docs/user-guide/src/docker-compose.md
deleted file mode 100644
index 2548e57e5a7..00000000000
--- a/rust/ballista/docs/user-guide/src/docker-compose.md
+++ /dev/null
@@ -1,55 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Installing Ballista with Docker Compose
-
-Docker Compose is a convenient way to launch a cluister when testing locally. The following Docker Compose example 
-demonstrates how to start a cluster using a single process that acts as both a scheduler and an executor, with a data 
-volume mounted into the container so that Ballista can access the host file system.
-
-```yaml
-version: '2.0'
-services:
-  etcd:
-    image: quay.io/coreos/etcd:v3.4.9
-    command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379"
-    ports:
-      - "2379:2379"
-  ballista-executor:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50051 --local"
-    environment:
-      - RUST_LOG=info
-    ports:
-      - "50050:50050"
-      - "50051:50051"
-    volumes:
-      - ./data:/data
-
-
-```
-
-With the above content saved to a `docker-compose.yaml` file, the following command can be used to start the single 
-node cluster.
-
-```bash
-docker-compose up
-```
-
-The scheduler listens on port 50050 and this is the port that clients will need to connect to.
diff --git a/rust/ballista/docs/user-guide/src/faq.md b/rust/ballista/docs/user-guide/src/faq.md
deleted file mode 100644
index b73a376988b..00000000000
--- a/rust/ballista/docs/user-guide/src/faq.md
+++ /dev/null
@@ -1,31 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Frequently Asked Questions
-
-## What is the relationship between Apache Arrow, DataFusion, and Ballista?
-
-Apache Arrow is a library which provides a standardized memory representation for columnar data. It also provides
-"kernels" for performing common operations on this data.
-
-DataFusion is a library for executing queries in-process using the Apache Arrow memory 
-model and computational kernels. It is designed to run within a single process, using threads 
-for parallel query execution. 
-
-Ballista is a distributed compute platform design to leverage DataFusion and other query
-execution libraries.
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/img/ballista-architecture.png b/rust/ballista/docs/user-guide/src/img/ballista-architecture.png
deleted file mode 100644
index 2f78f299c20bac3f3126a1875596fc3b63563185..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 21225
zcmd4(bySqm_dkk*%+Mm;odN<wN=tWlGf1~|gACFGNDnRDEg|U$k^@Q$3JTI7N=x4d
z-|x@&-gWQ&<NL>N-Su-VmYiqKv(G-~?Ah_!dlRptrTh?=8W#->?V+lQf*u+g2#SXG
z02><vctT$3lm+|;a+KASMMG;&!MnBr17k*86+KNfv|x5Lw3n~Y&@O>TFSpUq{2*v(
zyHGSVaReF~#f$tdT}gltxsHZ`;@#aHFz2*wq&2neZoqeAW5ZG3Nm}5UkEI{sLn1R(
z^RnoQrsS6V@WSsk_T}N~({HS=`n~zsArTHQ(*rUoiE-0k$nCbcm?}IjhLzSP)W>@!
zMm`sBOVB&%@)*oCRS=?Qq-NBV*1Ed7nx3AHjEtnDr2PK<yN7`QFAM45;Gl<x$H~b_
zTU(ogf<k(F`tI)T-Ph{x{pnW=6}uyOrW)ddRgntfJSS7d>ESk%q>svz+<7_ZJ*-sP
z@&Y0~45!~F&KEZPsOT_!rnp<*-;<bA=oeZ4GU*8;>tV~FvzAeaxxFkzn1YCSG_x$p
z$tP#}=x)E-Ha1^a)gWu~Ai85l(<RuixGTJF82)}GZDi}QsEnMM6Ac?T0SUQVX01t3
zVmZS7WVQJFWcpxR<n)I`Q$6wYIGf!sc?!}zyj=7i_9~R*k0JvMueK}NDg(~fOHEBp
zd3kw<x9i$x3lw!E5iPF_?bI5^vR-C+vT!mal?4k+@eHh2_brzZlM&ka8JG4ac}Lp@
zx2w1mNo)D>ay_GBgpepXaatxm4Qy5NZBX#6lp|-v$0NcN(qb@*6qK@Hwab(+>#Ec$
zNfQrr)2qo5bcGT!(KE=1tL4N$d*#orqe995#H6E4G5IAn(IcbgA|5M)^78W4)zy)a
zk*{CBW@Kda^z>+JYy0^4l$4Zka&o?U^-4-gs;Q~z`Sa&wWMs1W6heSlpg&hvRzQmb
z)2RRhoEIvlK4@r^dG|l)s(w)!z(XuwRZT^#jR%;INca!(xH{3$w7XRmWDNqBe)Yyj
zkbayW{)K$}_%)&Mcp?*yaf9|+Fqne$DfTNK6%~gTM-}X$rv6XqVFj#mGDf;Of7XV^
znd6x6RztK5JzE{va|(BIuKO4SXY?&C)2XJdKUMEG1n+VJ%l?1#Pew#8k!hx1a)g)s
zbU2IPPmundXP?`u-Q^xS9_zyh<EgtZg=Zq16dHvchdt0j^hInC22uv{UYL;*8xw}!
zfDQ}@@n1Xy29eO0uJ;2Fh6M&NV5Iy(eLp~=ExZH<*qEJ||9|+ZT_m*7vM__H#L&9W
z;_jY(74AYJ6fSXLNO*bci@9Axx0nmdbxBb(KjxdCe#mu-$44YS4;HNStJY;Mv0&&{
z1M;w-@Q(>w){MbZx_^o5wR9k%e_HDhChaQHOyhA-Toru-+OP;!=lC|uy4oyi7$;9s
z_((402Cx(sJu%9qK7*prs=mF@>fPmCP@$DYJ*d^YUe|p?&9<n`nc-SsYCL#hH}j0i
z)z}4DN5E~}rng{7Q%Iy?D&5);H?OeVpIrB&0^aEnq{x=}F~ur@1nvf$J*E{2YG%)w
z=Z<4bN~*)+mSO^CdMY{PJ^#LjLhOU-U5Q#=*?x3pF;+k0YRca@D-+lKtgVgO3DWqi
z#v6+M%}wL_z~n;fs|VS=*R-mcL7?NYn27$%XE+g^#&^A|nSVazA)vUkKG9C4GVaDB
zrL6K7(LrS821d}DiWZB)@x%nR45;s~>z0}aqq(?TM`Pm&@04VP5B-l|st3pHmvk}^
zb%I75hdH0OyNba^A6Y&%gvu&I_KK`;_=0bDDvK(&jCy*k+OuYSKJ}N4(PFi#!+B|@
zyNfc7_pG01BVa)uH@L0W(b<K@8=s`kr+2f9%=@{Lp;ke^{B$kXIPz!1Z`xiTb8{jy
zbjqo+Gtv~e70VU(^VgVbgaiupN*xNFhW4XI8!L8Qp8s6ewMRE38I}#TaQ0&M?5+Bg
zw;h0nd?F^vjxhY>o$xkc{&=(BiOp5!6xs8b<2u;ul@s(PxYR9kZmR`mWzu&IHsjND
zLjE~u3mq|=n^n((vc{T?dmg^Mmb`hAUC-_{uFm(76z<i|#)D}3$J3N}EEKo8OGiZQ
z@32N{Le(G>mORAJfXUrpw07D%*?a8mn>1pSoU~`jcXUwe`k&WOV41=@#)ZZVoprYM
zVLa_Tm}&JigMXw+q;+H4I4#U{QuULXKE&0q{bTINj}tX>NUd-7Z#BaPy-JF0Ya;(<
z(D6{7XR6{f%w{F{IH?SzZ>FR3K2lBhoj$}U8_tICuJIp362hz4_!`Z6x%j!*I1u3Q
z6@`8bY=_ZRPUI8o+dAK-Kk;{xjw*cSuqm_EwTPWuzOVL7tDkVIo4!$}8O2Iooy?p;
z+IY8yDV(EsH&btaQv9X;yD;_`h9mZs{4wR;wVDg5>S^g0PGX8|@(c;EzKQ`Q6GGU)
za!logyrM%8G0gPumgb4XOK27?Ju$qr$nVCd_V)B!%?o6mp-pUpX9=#-DPb(0QvO>J
zIY_PH`Gnd{@UO&iLv}JQM2GyRcE!W_uXn1<iUOC$Zpd{*11}cCASTW<+x$^NjYx70
zXHL8dA!lUS(ecnF0V9Gf_9?g(X7m{26>d!L@mn>x(J&$WgfJF&(^-NHmk7mWc-zur
z#A3v!(z8y6%%DhJ^y4m0STU5faKfulyEHZkGJQvfG%)QlX@Pm2%cCXMk)zClwR*3!
zC(|{|CDpl&>Vs3~pV3)DxbMhPnVXZ0BzrHTv5`%+Ms;Srg~G>mYFSw`OD~Xi?225U
zwbx?Rrtm}98p~wFeG{Dv<RKRZ2284YI8+0(dfIGC(cVmd9)QwWIEi~Pre*!{VkSzw
z$qA%Jn9?)HciwHyn&`fQ3h~w9Q=rB_HP_BJrVygk_jcZ$*5LgGoArnG3jg>;iqfar
zUY{#f$rMO1kTG<{tw}VYRm^l9JFZQ{or?}sntT3kCBU*8U(vjvr`*7@xk1G;*b%1!
zPTYOWXe=JYIperO%(Mq;4UQI!37qP*{jovD{a4v@^yk=Ck+pmObjmI?y3IAZH)Jkh
zgL^*LzU$jkEpfRa-?RSfixICfID!7wI7CnEYiW%I^J`q^y0`*;6Fcj-<Lovy^XrQ+
zP7C7+#qww^WQP9p0M3muh_L-7%ADD*04k?bz}av#onT1J*h||gr^Ne~`xD;+4Vz18
zXp&p?!roC5Zt{z=(4?s;?&zW9(6Fo0@N#<fqx3i^`QIRt8{o3@nexD`{vYw)t~m#o
z%8qYbE`K2t@UE4?9L+QbKmNBdU;>P}TS0Gf{QB9KEh`NZNa9^jj);mk%xmjAEo_vk
z#T4SYAYFiI)L=!&_b>`-@%L#g)H}wF9M19bzXQzwADO~*B@ux<;>K$aCuH~UUWU@$
zE2fZ@6jQ$Ce>OIKl0@qmdBy+a_`t!(btaZUb>dM0+0*GP%3mHhi>+)9y>BC|{&6xL
z_#vz(`r_3`BHMoZ7@iYkCC#(26lrftOql8ogL%cn2Y?B$mRw!Y*UsBv3D+4-3lBAB
zlh6BJ`3a8iGNa5;4c)}hTby*r<cWuhgn3Xp&Qv<IoZdQ=XH3vMt<BBTM7o844~`u?
zZXx|Ymvlz<9MikoJ#-7*qC4%pkQ$AVUUhdUt$B~zA^{bNkm1_2L55xug`IFxxL?B)
zrHL<k?)ay&9+`5Qahs#+lZsT5J{iU@a)C$=hqIicfIVyMf?HLYDa|3~+pPiLZ{Y22
zx3dY*PL1_9H%CNaXPk${G#9Hr<YVkRs8!G|%hL7a*2z@XCsUnO*rMIsk1XjgEkR8E
zJfL(hMoNm?2_I7k!-(i~CBtV&Eo7)1aYpEELu<Ny`pIG6d3D^X{^HF+2s0w(VgvA_
z!y1wUiw1JF4G2zUUqM%lh1WmxJa5$q`(Fp_EWOk+%1N25A#EV0p~81C^e73nO(KX~
zd~EgH&n;+6kU(3+BebYA_k4Ep6Eo<Ts+W)JC$;(W)5r1<LO9?bkq3PUtHVr{4%(ut
zgsv<e@vCnQjM$WD{wCQH{hgXBva@{jx6p=*2=2jc0bwu|77(id-Fsdd;@YA+QHv-1
zVYkVgh(~^bEr$asq6_h~vA3_IvOeu&50S$aAl_^%A*=<+R3a3X(L-$ZmH+Fl&y@Ui
zN)Kuq8+J?4oSr!n3)?3|k<Udle=73QVqJbx6xKOPj^c1}O?OeWJEiZxHOS2A<^Nrc
zDdIl1Z^-}8!7H<b>|;&Fu9TmPc%~7pYch<!KG~{EOWK5p{4P5;l_-i}y)908wzd3Q
zj2V%1f=&@u2P=FeqQ-5$vZ%q}wsOL0TF}D)NJ<z^A|oO&!YxPS-peerg5(X-{8kwr
z=U~{?T6rX$wUQMu8`!$0zk|2Qurt4Wl<MZ;i#_!C@>SlY#%^`okkcoXtj?j~GUZxE
z#Fzg?<~7fdWn58h()lwRXSdbujb-g;7Ya!#{oVG7L8{+2&!1h`B?jG$T@{TitLM#W
z-Ha$Q>?Q_3jW}CXvOK%sD5~wHnV>dbugOg^7x2womiUiTOL6bv{_i@q16Qy^@xMLQ
zclX)?^AjHC6DdZ-H*^?!AqH%e4)E5*{=Xiw@W9Y_00yjB=wRq84`Ard7%<ad83?00
z;FmqZ#w>e??XZK6eEAj~3Ghn!TBHQm!FA*d2$XJnKS^P3xSg7VP$T(%N+hZCzo|sp
zkgox=2j4J}>$HG#OVbQ)^~v6itd4|w1Mxvw5(2iU*?l%Ue^6ga_up5E^dS;XwZSW~
zS}Q=%fbPsv0o~m_@!@d?&I#|o@3eY$1h1%fGa<g+vkA=T1HSQZlg^kG$N29#d!EAl
zi>0QPsl@$%hyps|BX9Rvcr!k1^xu><tI=+E^PXNpjjRU%!$jtgujh$YXCA;R|2X>s
zt4_tUX(!@+J1+K*ByL64nd+YwTWh-iZg`MXV?ufvy_JMw$&{mFLG{GP?lQ%r`t?qK
zY>?`30O2@&NWjk44I$j``Fscs_K@vobkB5M<bIi1(1&RMWD>y{%t&Yp9yYrbhbNtG
zk_B#S*c5Ekmk~h<oD><Mh9q)-r8UKA;g;0GNjYc{1B9W62@?Di?f<po<5xUnAU;4+
z2C|bQKmvqTS*(N_!4;@iP{4!10i_pm3Nle*JE%TDmN|}T07~C9?f*U%6^0Zd)U{<M
zaB6DkP@Scgeg;af|2IkS9Q@PMNyPx7`7l?Zekwok8|Z#Db}4;_%|7V*z7g!O?d{{$
z?Iw9CD#Z$D-OIAM-}>lkwKa>=)OvL^KZfVo1zadK+vhvlhAzn%DLv8exj>b(ApBLs
z#dr54s*|kSbl>wP5Taxamt?W_!muRRJ|0yEMpvbSOY|RyyBUmT*Gw(GPttSLN15Qj
z4k`a_&O!jT3aV-=FQA+LxI`H3yQoM6vk7=R+p#^cbaSiZQnU3_Ja17Y+#FK>ix?^7
z735Pn%ZTeMBoA3c2a$M6Q`!c0WicWYz&=fprLW1)WN6xI11siu1;8^aRZ!Kx4<g%T
zUIugosQqEq)qW{oc*(osj1wa~D116DQ%UOg@%BG^86-uu*ZcXINAvA6B3^;@zi*yD
zttG7}a4+VuB~!$X=_UEW7;}n5hrfc{iqu+FL9e@dQfb&SLuDbR03Z4g;j?RS=Vx9U
zq!8BZQ}$;3EtR96j}uf<2PtXOo22ot!?`7KC^XWgzu>zwBI?j!@tlZX68^txbqD+O
z(O_Ob(i=>OiK;xvlGQ{L4#g6g!7L8WsT^d;zc+m*L~z{|Bo-DUVjmqAKZDg8ao2M3
z5=v^xoJ8&d)mL#;%Wgp*mr;t(S6qc1f**wML^;oi=-xo4v3`2GOwl{@gG<NvBrbj^
zVY7$C@gNQk<agf<2l{`OU-qhhvHjvad-sh;!xv4dMJ|LrqJc>k`2sY(G_O)m>!j2A
zn{i8FLSW?^=Tv;_V+#l!SJEzFtnWprWg4%aZu+N%5x?9(h7MJ|#7FKWm_?1UE;0~P
z1vez&sms(W+_l<90^038DcWJ2Fl7Ugu(#THn=d@APlAWngx4M-%iOzS9xP+%TI$e;
zMc9O!XtL{Ce;MxLpbV>4Pv6xqMiUky8jrDkEfZ#<+k0#jiCOSexJA{>kWKfI1lt>N
zH_<>G5P^T%WNDca<NF*^P`cqD2|isiGorNj_;K-XQMz9TS4SYaXVoB*#Ps7=vh;)+
zmldv$-?YGBuomW$4^>y(BMqjeHL9}|WTQ&jc)L-DPlc<S>8yQ}jn%`<qb!IPzI`UP
zGH5*Dg#2MJP;`Ij{ZYf*cv#l`O?w5epM|-6ddk<T>=<f;p!Ay0-_VeD54}J>C!*XR
zNHw$resd-(5csu91rzw4pZIeB+O8ZJ?)v4l-pQ0q;wy)vRB6KT=-hwLpMXv@6xI=}
zrNigRu)Eo=6>I?6$!jQn>5I@+-23q1(T#z|s_+2kr;b@ta&XRYR)&cpThQVA<yl7i
zK_U&$r`#V1HJ%c2NW0<1yLE?DU8###Ow(Q-$((y}O4`s43B$3=KTToztciNn>+5;f
zWV<q?lg9`5ts5k;6sE6`-X961EmfNqvAwTMzhFQ@mbtl=c@$ts%|uBMl97%ps+LM^
zUxL6*hH;J4C2iZ{adW-vfw}6~^M6bU2ehcb8qVf>qF-5s7_qG1cO=3MuBU1xf>Nec
zReh4jwZBKoD}ND6&|i|I)`x`ouCGTHviQbX(>)9Q@tU@7y3+QDpmLkr6PG<~Mi{YV
zqWn<{X`rtduBfYPqsq3Ct)K+4$n8*aLk8*J=BgcPqLVeNC8+Hyoiy7U80hHMn^#Wb
zVzbY1tmrW{K!dld$Wf#8J5|3nc+?@*s*fgYj0SAVv-mU{X-fxEVLKJl8}vbbs46?`
z&B4gdC#gHs3N+Cd7hpw$H9`#>L!Xi6n4ihhBhr*v3PHMlL4HKB+?jfODVk;Bxazgs
zv{`Ak<Az&WHoE1cWW#rsz~05zaASC$mx*$8cWa-}`{7n-Yk0vwdcl%U&d%uhhw}e!
zRI4EWSQ&0{hx?gi7EQxeU+t;{6d8ll`*x^N&BeP3Jw1d^4H|SUxwVj#q*X6tZE4A|
zupk<o8`<WI+EvgazaUQ6m@3KJ-+)1zC9;Naq|7j7uF+PO_HxL92Bt-W(vnpySYSK9
zZpM7aQdN+ziQ%(TmP6t-J6R)JpdxN%$tA4&#ZDflC0OTs{5XlJs$n&NOlqyDts!2W
z+Pp6EjOJK{>27?qixB~A6y70tAV<4#<Olaolo%lwLf`jqlLB|I!|%k36qUWT_2z*r
zXmT1=`76Ab9WiRh_u?}YOXbd8nDt41;*(9w?-NXoRp<Bt?KPSWXs`7xiYy&N10>~!
zp#=?K(WXZvVK!lLM!{^&VhVM{=8)L!FZ^DkICF|$1HQoJf_GkZC7dJpHFyBW_o$+f
zPdf%@bKNFY|IukMpklq|e#&upmQ$DWrj5C35Rn>re<8AjM9(Q+D9%McK`^&gb(z%}
zG!0F~iMLmo{qoa?1k;P3A9f5a;vfUz7a*T)(Pn(Dsl&p9pUOLd--~|^hY;+TgOgKt
zo9ut*Z%fD;(asDg(_S6bmZtqhz;=p%E{9q4e_}$=i!UAD(H9TI{{1nqLW&CiDUNuH
zQ}L>7ygeaF&uWa>q*B~Wjnn!@0k;AkRyI^qTm7b~NoKlSU_~!7VJ=SEXbg{=mC{=s
z;vT70^u05yp|5zc^G(Cz<EDlN_0Zb`lebvUKh2{d1CuU5EDN<KBb@?ML|K}RJqPnJ
zF*lWR<t|-X%kE2~$e$cq;tf$3N0d9<QSQjNH^G;)!a`oW1d~<J_PPC~Mu%66x$MY4
zKGhb%>Y0jQV%Upd12ay=iyoW#x4}Vk*uo2@>Bf4RigSZB)2{lKq)Tz)BK>A#wa3dO
z@Nd#~%MN86)%w2voIg!Eu%4*#@+8fpfZ)T3&>SMA)DhqJx&1d}OG>~jtfg}6^C-Qa
zmq$v_Bjl)l@wp)VuGpNT8|@lHrX=08-wF3=pkCXzfZadE7|o+E671I6p<HUG_Po)C
zExLL}hK0tctj89{My7^LrP6!2xTsuqGQ;wg$})p4<D+pkJYSLvLwi>PAv=-hwBs9U
z&kc`PY?~O7LgSLWHn78mt82@G+0}FktkY0@uaXqdHuLEVLf9zz_R9{OJNdwkwuQUa
zsz=wCu8j(zSp52B)Fd6?pPb~Z2$@R01Sq9-_csug__#rDS8tW6xI6W%)cn!NvwAv#
z?h4yP+pJ{v{)VZg&vTzudp<2t#Vvlt0Hmjm82@W@Y5bk(H!ODC0MxbRyM}XQU_vyu
zlJ+|qO~|Rcz(=`GD0Ou!Kcj5QWWajrQeIX1gNe!dEUkCZT{C>b;?=2-ocGfR+2`}W
z1F_%;J+kL;^KRGIehOxDdp_}$R<U`%Ic(1Zo1GsiG>YPz0_!s{T6aR>R7ebWEu@aE
zd6@jRQDq@WiHGg;Bzj6=HL_5K7Kk@YnGsJ{LdSO>sYXK)9Z!Y_2~qRn4Q*I3RTW5*
zOH1}PY-+iG$UE6R2Ku1hUsE5VXT2HG+<rYKKcx47=XBEm()VS#hY&tIf{DByw#lBw
zQ4-sHR`b}1;&%geiP=3rv%{ns-NbOQ+77Biozk=<eMm)89S@7alse8APyNPT45S@;
zAq6U8<@T9(>RrgyGk#F3p@-MEUQY&4IvjfPS4%w(I{0w@aoVEP*Ke|<^H}mrlHnn>
zrYSB@#ma2ku#t9P;C=H%{}IWaHl{A<2`l6Z2c~KP;Td|+tT`E38Z{*zbV?AfG5bN)
z4B}qHRMcz#gzYmu6J?poqTDO!Wmmt&ODy6aI_IJ35$6i;8gu2kto8Dlsp$BdL8S~G
zau6IGC7d#SsWwd3H2!>VopMDRn6vIq>Ir^8i%zFj>EihF^Ntat5E-l|(&~`DU{@Rx
zY<9djLKKmob;-BOZvl+DC-u|4m1~y#Pk-xyyl0T9aRR5Ye}?iJfrw=x$zx?3w)hCi
z%0y|q-kDg{TbycOQgZ2+_aL}<0fPKn-TSVM?F~86w$rK_GXhPx%~uWI&T~|5?%@z@
zR$M48<YBIDBOT!X!(SjN&qdE`My-jXlM}6zC4N(2w;Ba7!{SAhJ1=72@I|T?Lun}n
z6ev*i*z7pSx4Y{jm0d<E6L<Ajei%uY;_sSBRkQeuwFnIC#3FfxpC}}?ypWOa^{MT9
zSD;eQUkjd5I;tBUWJ0(xYC!x6Q&HYZLlX*>b`N@-^2-K6wy(6OK*t&0sM=~GRBdtg
z(nqyamz>_W#)XWCAW*4MiP2wF@;8yK0(5tx4zmss8jLyj>;3b+<V+*O@s+gTAv2Jf
zj<*JoQ6$vxCuB#E%xg}mLBfWUr>7b(QKmi{qfZi;5ig$QOMX*tfu-eg_QZmHwkZpA
zme%NoQ>ycr`uC+)e11TbWjNN8cj>oY^{v8)M6R#qKH_f9MR;QKB`{H95u!q2&Pf4y
z&*i>6M9mnmeC%VR0pbZ8m{|DrOMOVMZ^;#Q!H@TwRh)#ak8=TY)k?8h_T;s1MlyqF
znbG5&Visj>T9%-wpT2e7#*KM{ewPF@sRtefX^X!ZEFq|vr{sV5>0*^H2uf;fUF!i{
zTpDR2Kihz}3Eiis#2)f_g*(K5nx4zq+F|Vclhw6cdrR9Nz+?&hV*RIg4YjqZs;vzY
z$|@@XC#-1cFf%vXID^%-h*q?WJXn&b5Mdvx)E)E^su5Zc`Ei-k-CBy`N?cra*$4+|
zhqDUX@yAv2QX%I{-M|o)7vFg7ZT91@Zl((DL(VxL=d-H0jIN23t&BD^zoJ}=3gg~6
z-4LaTCRuN6c5~3rhm4ddM1ai(PNF<J=RoX*o_84wG0a4f3AU4IQ`vNzZj}j9{W4=4
za5QGyIMqDmiceD@$$YGTx*hN<1}g9n2uGD@k)fk})!miK(cBzWzh6@h>XK$L(}~lI
zfXb%1$Td96pjy6ou{=d5VSX9#>H|gu90U0>gNRL0+*d9Jp6qp9oUN!|d7FKbsb)>9
zxEcnj{LqjtVP7{wt_azr?I9=%wYO7ONfjWtU#t_#I+3cVGz;b*V_yzl-ubDVLjR0V
z&u#+YoXku&<%)&)80Ndb9+^IDz<}bfgm%p(lk^Tvv&9CdP^J=y2T7sdQ>lWUKn`t6
ztW0;TM3U*s8&!e>PfJ_l9%}#|=k;fF<wITZaJfdeh>YMzUtu;Rqsc#qE@xYe^29Xg
zL?knJ#DbEl1zf%8fGj4t0s?Bs^96(BQ69x$fw<ampX3WUBlmql53QEOWRv0CTJgeN
z!?Pxg5?ejvqq3EifiUlhW%QdbKCtA%?R2~Yg1w(6&XvBOeclVg7Tenzw0JoGDAEBi
z*Gn)FJEAX8J?@p)Zs6?NK`BlLp{(cN4GtA4yte|)X4(@Q^GO^8SvCHhczrv5`=d3_
z7rt1|wJ-R%4p+&RS0}j-BtB#Pqr;0xS59G2Lt$bCml*Gh<@`7#%9jimTKj@(*zaU0
z=pn>6!C!@Wd48w{!O8qSR11Y-d4XDZq_`!5aRIOeUu^}$nXMJJ*w1YrpzlbscnyFd
zKs-N4gd!;EBCX%Y?g4|+*%=X?uKJa3tHM8c7!gAMJ}%6?xPE}N<0RC`eF<ITq3iaP
z0Hx>2K%5`I;ul8E&0B4OapHf*Oo)0WTatfJO&Jb0dko;9hyn-^06A#h0}>7BfdEPZ
z{7Q(@1Hn{*iQo318qZgL1GWGFobKr_@0%#U^`w~M6`5%GMW{glAPxW_s|17Ah`ii(
zo5hxbw+ygRrvG7%;_9USfpFV+*4&&7L<~T%fE@&eLAp<lSHjM&dk8hO(EbrfhWbxy
z1uba_kDuuSFtPl9*~e;4tg3Ken3}hL8$k8H9&;e-L+pk)cVLU%G_X;&e`}$9ikHva
z56|QvrnvX3P+E+3lLEf2LPe!cLAGxwmToIcji1FR9&(-X9v|aBs90%sdv;JUX?PDh
zy=Lb5AEfQvh=ULw^gOsg;hA#9-S+!wbipMy0&*)1M%8KlQp<|F;li57XfSkOIE58e
zZp%d3q!7=GpyP5|G{XaMVYDZ(ZCEwi6%pWCqnR+?BbNWk&7=KG*rNfd_@6LnE)63k
z8VK6~33FyFk+HRn`eeF&d@TR-n#-k$w@1~0p~fB?(~hDUq(44eH4GU1Ea(yq4GjjV
zlNV{bP6;eEDp~<NBj8y|i2sMS8I(R|4)!*Jgx|~(={aFnOyIkP`oEgl#Cs+xpyixc
zp2C~p^aubPIl83VD^DDTzmaQ%y$nUyv%&^Zn~o$gBVuIUsA_zkR+wdFBJiteX>W?k
z6zf7MKU*+r+%=bo7w<$bkUK2(SX^8|%CLYiBndpoRHC6>+i%pK=jKXO7w6c2UJPuq
z)R=lN?u0!;ZfXyJ9&N|%MN46)Se&0ff9CYdT@=g@ZCiv*@ko5d^Yxd7y#FZ!NhH(2
zXj|ww)D8<HY1=)>R&s%Qs^s9hherAnJ=lAb^po=VM+GLhV`{AYSEyT<@*6`JtkBbf
zP};;wBstv0G*+#~UgV)3>yIEtgd(=mrL%+~hbPMwlH&{l8YqFPs#xeXT3wl7E9C?=
zMZ%pJ%Ct^3euc+iP&4G*2Xf3yQP7tikx+6b%Cw$J^)AC)F=^*4$?7S3MHf<}(2i9W
z`2cBBJ2bzb=a(97xW0IJKUv``JoY*%Y|KC!YzHEkmnidR)9*+xMFiCB6kDGJub4j>
zYKFl@f4GM|^!vQcj77f~=Jgf}`O=NC0_q=%4tOI7UKgV&#VY8a1eeU=Fqwt~U52#P
zGcYPmORS+D`A4Oe6A^a?;L`5TaVuh+K1~8&GZ@UdRLeyd^H2gjqcB070}4F^&Ah+J
zyq*nEOPizJh9sxQ5_rrl6oY@JxB(Zv!d?Mw|JBG+VtLj7!V*GOo=_uvTzI3TIphOd
zt&#&%@XRi5zgAU$xg^2)qDfj0vYFX45x#q}Q}xO<pgrANg`=&OJTIOaLv#9Rof(1W
zKay4qr~^I)fh$0G&<$GzxZ-Q@N)N2%i-psPP`P%cm8_tdZ(pcOXu@J`^=)m#vZ*nq
zQzm}>nvmWzG%odPQuWa<jT`&^r7S@rFV`=6@+_+6s<U*t@sX=u2idE3EF_vINq>64
z*GcMgPqN2vLW8*Y$`h)t5;^M}h)KI2BXq>8>!k;P@)sW++IRbl!(!7bbk4;1$~1<d
zu-dUB6DuPFGc$`c<FOnw=U33NDj70TpN~MrffYwI8!c8QnK(6nG7l#=7Z-PG$+TbQ
zVQEn`&d|cNjb$kw5)De!nBL)WN6|g=dDPXV6gHL*tFBms3Dlr!t14g#*ho?%eO0S?
zJHx%Is+VK~ebtXk1pGDMEyqc>zpSc*I~9-uWli0T2+V2Y_p%VQSA@zC+MY6d($UXD
zxoT!V3Gv6Hp3akH`%pe#c*3po2mNBdP))3*CeAkfLkY94UXYb4TkTML2Ccct;6~5}
z@z`ro6c~UR>s8|bO`{$Ip!a>Yjgzn2yZI=iT6#WXSJYYj3cwwbsZ=iCHQmxS%&@L%
z$Cal}wN+G95)|^}AgDlr7^4S&t<Nn(4)BzQV&c?}<i0gk#8q{@ku-qp9g&Sm&T$Qm
zJ}poGAFRQimT{^ltd9^Mjgz$jdLrr<tXKAYXU(sRsj;b~Muk?7I*hcsCoW-3kzr2U
zo$J#Aj{=cKCK2#02J|W4p&skI>4pY>^I(v^36CYj44th48#gh~8Xsw(7r8Ed!Ii8_
zs>)@6uwHXgpsO7-j0OA}ARR#LN$UI^xJX*XvC0y;ctJJRyj1izxS0!a&?9+Q>aQPZ
zPf6ntoi8+<bF^T8i$8cMFmN{~daUpu|B7SuiS+Ylm;3&0AC*enjOKR_kNRlM$>Q@7
z$dZJ_aUyQK<S2|m1N2bUy3&%CT`qE28$Vq`*_@NFMmlYNbnqZH&uHpmBilLGxH2rq
zg55UG`iz%DE(#3hHghl3Ap)fZep@e58zq$?IKEXE@8vhcO-1V;diTpH@8jO9U9U^1
zsA|n(t53%stokv})P;_-bC8vdqWx)Z&vQRN+HpBcdSm+)YUuYw16>O)qcJ|7&7bql
ziL=rE(crncj~hYB0A^-U8eD+SHxZG7gn@o8XyFi{Wc%-+rTz)Q&400#COB4PdHA{S
z#eR?+MSl~q9M-)|4zDzM4Hd_5CxYqnnPEIf#F2r_@@|6LCmfDbBSNtz1Z)$1YNN2i
zy6Wo7-^pr@eI?(9pPB<iLIw}k%X16NI}W~ROEBF>50#M#?37Z&_MK4QF<+g9*_7Ep
zhzx6h_L6x$onF0aW?On47o%|A^H|`T|IqS-_0=zNyDb4x4+71wkvvhL04xbJ=x6#4
z)e`Hj7j}$z-dAzCj-~4A`g+wy#?LEI6bG+51#ly-z>RcK-}I!n%R&yprNFm}iE`2L
z6!{naQdxk@^wk6>Wi{VSpNV<66cTPF`{vzBK6*tGwG;>C7+KcRd73J6Uziy|y|5K^
zNQy_IaU%)iZyFQtHbdoBKtoT1g&(+%H)6r5t0%Z>J02ZnK`(uxGVm%^zS<c;zE(Nx
z9oU-(5YJa=89;(7s^&j4vH8oJfb@Uw?Xuzu#o{Wp=l*@{y7z{D?x1N*137rz8Vw|P
z_K8p}mP2>`mi>!U`cEt%6GLoVmPc*@NB&Bdv0ZJdn{$Rfl5fRWC?(9CU4>Z?FOY-z
zln(&A-=YHPc6B}b*q=oK18}!=;lC$_?R{FqBfhROA95kcw>6x65`ifvXsw8SX~avP
zTK#jRj(Hq3R3()WVL^y$|B|>O4=MKDNwp<V%e~e_$I<jPn*COcnKb|&DAvk@Ghh6T
z<F)p2tkW%HcX2y?I`$@&Tmx%Vyw!?U5e9Hx2G*AcVqhS4%s+O2LQ&D$w$djd<5&8s
zQblu<x4&ym(Hd)@q_{$4E6auN^M)Xp1@Bq#vG{#mArQ;A3H=^(C4@)%;XPN6@*?uc
zJDx||6H!TT-;8nSZ8Xh#d{~=o^<7q&yRi4;&S%+W&$`B|de03<>-?{DKsJm$1k~CV
zv+F{39}~uh;5~nSH=C4duABOm%U0yR;z9Lh&R!29JU=J;f0==UJH+7m6i^DR=f>H<
zWmgGDLO03ae4lsJ*BC^`p818W8wt(2XY$hk*<VE2?XCbPC7hX!iP94dAUS%2w9|u7
z-ftd=28EHc(**x2Fc>KVK&|xzr7Hh2$DOReE3)@>l0S2ykc59}e7Gx&e=+G_8V?iZ
zr3^7YXZ|mhhd^HNow|{A6tG?ft^@SNV?VvuLosj$Z1&M-2so}1jG`v-snJ7ZAl`3m
zr4$1EVbKj`C!u%8(r<4fBmWF3CH-e63P%ad*L^ma!$$%;q_rYOZU6Z}IrX>X)5T+0
z4ker&k<mj0_^inNA9r{A%Uy2Vz#%-*fLuewWNi<%w~1VB8LfoXAzjiB#UJ64z?VO-
zk7Tq_P4(DvfYQ;CvwF)SqiK4q2sh@uXR#fifvxbS@1hrmMnYr#YlBVU$Y(6Jq#0rQ
zrLrZA<F?+p-1r0%BKz}omn=uJ%D<xI^vY6YscERnsmLdUelyU#eoB`^Rod+3E{UtQ
z<nZeiRDc!1cSiJUF_jCj(5dW5e30`JCxR*$K}A(zDi1+6Gi8CWh%J)J-<!*KeW>bP
zC57uoc;~Z=UN;eV<2uAR`AbrkrQWJ)eNuAZ6dm2LwUuv0zPSziV$qshK&TM|YIQqL
z7B30o+>JtCVWxvo(bBFT2aRzebm!OvrERg52y)AdZ_%d+CG6WH%nT_DnxE#w`1ev&
z66_e(N#O~WCJZb+j^C=J)ku1TN9;?l69Vuh{N*4y#Q3f5e6jr9dd)Rxf5@GQ;C2GG
z@><9y7NvTRbfPqTC#4WaJ3G6$w$&=An<&_J-WcXZZ-?jbdc6uKYgB0B&#f2v=f`&^
z5@!NuTG;GSuvCcBypq|qALPAxS^XmixVE!01FG&M6t0yU22nbEGZiW1WZd*LKMazU
zi+M$dB?cy8{gpzrB5<BE9mj-tbK$xK=v+9igK0v;I(UWI61Hk#w?v3~eV~(*LqP5A
z1xOT;QkPBnhfW_L)sBkFhf(Q!N>qUa%lsdSf&^vLfrr46Re;meLQ-FLsN_M1Od;|$
z9d=9m&6%m?I!KLJt3d(VWOs9#DUdC2I;f_!c;6#}w8cnUJSESh6!H6Wp}-dp@=5l^
zad6N`+R}KT#~L|q@>)Ko`iBVus^sKW76FBbt}x(hsh<CO(@UfY!C<Dpdh=OpWwm-#
zSC6hMN<Y5@>_`ivdK1rC&Jn_KZ^kpy8_dBfU_KJke99RP(b3P-Itb{cV?4Wxvx+fg
z^V1TX7>w8POk1+;&<f0W*i5VUkEW$(`*5h}ergN)@T3M_MSWy-<N3ILB7Z@5@D!A<
z{9~i}KGlZb+-s^$XCyt~r_*%uA!Hr=eJw0`B*cV7YkqsuQqj@o6-=FT?9$Mm{#^h2
z<4@;F3LLeIdloZg*57*j{?B$?n9lY@GRq9hjT+=ezcgw_z9+&-$7-inzcuPIKp?=u
z4ANr4VY>Iy-(NN7kok?(Y`~83ydH_e9A<aw(9p#SP-nfvIuoGZJv96J<SRik&&l7n
zctcy~u1KMa8y*1hcH(tC@Z?B~tB9`}_&)#5gGZ&|tX&9P<?OY$4j5cvAS%N^zOg{>
z`4L3(p$}nvkDJ}8{#6e={Z3=0Y6LkIM>>CYMO-b}N9**aiFx`g%I+G(r}Gj1$%E<A
z)T8Ta$qR0ooIZTmhuQnBvdsMSwjvobHDx6S4o$o2i4VtWzG=4Ap18ID3fR?mN|U%s
z2bgaz4w^J{AdNj|UF*#8MKAQOP8Z}R8(G2nze`_M6N;!L)=@5%ySC*VCNLo`jYe~;
z(xd2|i}j-=CZ5nmRFYzv*@OGvOO6vR@a^803lGjJ(-}9Iwnuy?z-AvFu|%F|Ck^)V
zOUNQA9nGg<I1>G7WEx=-ZPc8^Au#IzYVl^kPhenVVEDQ+&Cat;Hg1pplfWfQjc)oy
zX+)uri*C`=so#uQQvTD0^}@o}23SB)F7^cRvAF*qYThR+<&1=Q_FfD<0M85tL%O}p
z*-GM_ki;4+p0B?sm^q~fDt&xbLQa%U`l;LMvgE!{SN2KB?EX?O96S(-0^s_Nm(b^U
z?CWmkZ=iL@+|Ec9{An4@hYueDmQ*e7gY9bo{)Lk-i7C5mhSo%j)8wEj%i+MS{=3_j
zVcx7nYzHj&sQW?~Ao5-zkorpg(cJTn1E6G`Q2bhCG<C@^j@S^Qut^3v3ViK1TYSXL
zh8P+e1S0WIG<y<p_9-_?ApKT6PCcMZCr}Pj@9kWmokpV%negXmB+wQ29U2G`Uy=jQ
zJh^QK4@^{LnV~Ss(igtM7Pb7$_jdjFIld)kxCUB6<V)G7F!M*H3d)}<y1Zs8ArFFp
zem>f!H)Da}RK+@s!ptxPc-ILv1V>wWD2;TZ)(z^t4SE}8{U;-CES2<GeSUoj13UrR
zfjt=2+gl~9(tST;h<15n`T_;EkZ_e(38ssi)AlYaS@^azg4eG&(PD(gN&JhfQBB#I
z+FZpYA@aqaQgJ&ViE{BqT!bI_<oZ7nG2HS?;(T^)_W6hE{CnDzR8%<?JE`09lK>`L
z?fX!_qlrU2Z?Asu?rYxOFNy*z!#fXk87m<(47njI%O8XqVKN-beNhE1U$bBRw@fsP
zhZdV%;;;(}@+p2<y=ZEC8{wml<6!6OWM^0G{)rD!Iqh3)S$m-4d@QI$P-gvh(QnBt
ze_(>i@Fx7Kc}{$&@ptse)!r6ME5#DeSEJfUD2ilekM&ZlMG1n@LVz~<E8#2)+reaF
zG$;4t>*v^;)k4c$uT=$tYu_JOZWi%?Yzq<AA$i|OP$1255Z}xs4>>ABV_iy2RzER3
zuM=i^eV*nV#mYfx99NxUwv=82B)RWwwGBxD49gT)p-C^|&g64*Ihe#^@F6-OoCBm!
zadnwp22S7Epw@3|v8{O6G7AzciA}|He!rr}IV}V}#TOvdn8&SXCjh)g0UpY>mXRkD
zU(D4Y=|4%<!x;cSr2?AuYY7+daHz;o8B`tcFXo$&)7ij`aw;YSP%h}d6TB!dT)i7L
z`c@clSV=YRUb%>k#HaRtgi-0Jv-3Q6;KXJJU`6WbUpV(fh*3|HCh#>!zTD5qDUMIQ
zO=V<8xRIgG%s2qID4hwZk(@%MF1ci$#ynATcbvp`X~~E{VGqgXOMn3+J5?Q)L(R|Y
zozqNC0ck%We$7%Ui04cWJ4}Z5ec|ZsxpJY1Xth1S+dRn;6AwAaJ8qGx6b*=c$~Rds
zbJd^I?KGP&c!3GHL4+FA;MNd^W&o!sy}5S-FJgY4zEf9l{0Q+4(rVwq4T;_KjTZD>
z%6PwF2lhF;?`Byqgr0q--^d}=5Y60!U1aeVt-jp&VA{o1`7Uj2N8Ojd2D+JsuRcj~
zKlcsPPRq(>YpoK6@<QfwOyhjM@cI4)_abZZBLxcFg+l*yF`mt*f~%O_SOwbq_N`pK
za^t1oZj)eT`P1W*g{^lnxWSX31AMZzmG2Y*r}EybEo!GJkp+=hLn?RlMw}bSjvmGe
z9<m@j*%G_tzK7qO+IM+7XodRR;#zjeS=ixhelX0H@L9dK@ABOww#;H)>7l&2uU)(k
zEGP_)KeqoeKPQf<-XcO#Wl}}sY+lobkr+a<C!GRAlC>f@gn4T)C!3A?h_>=sTEGMD
z`TXD4xH`OJwjb!RVzXa?TkDLh9kOAZf<6|I65`3g3=v+ZXp5ZgCsKWY`Dc!`=<`*b
zX6-#^HkPhz=MAOfMTlK$MXNb>KKADTfPSQ1Dl{vsxxd2WuKmju6Qv`lR9t=V(eB03
zx12`0QWax&WWH4WlyipnO#W)V*h&|)or;~3vtPzBiSAxhZeaXEQKZiF$WV?j7XLSE
z*!;YR5ME^l#WlO_d}5R#*=^{6IfX6uW{Hc(y?A>AzI9GPCxx>#(xTJNmB|2NZLM4V
zkb6XY_FXnczL=QYmdS^Xn%j>vTW!h>2-wudcfeKGn8bDnsONWNS4!O^g2VT#@B;d~
zNqG^T+fVXVyIcKpY#ZS^)}5gjk_-_W*9%*XlLecU(H*>>+2+T{;!QH34JvmiGT}kr
z`z9%G1&BnPOB|j4&tL`(#p;qyyOq}ORilAwkkCz%f`V?1-StPRu+IHywvzGrkrY3+
z-}&_S0D<Gq(cJiM3j#O<R`dyb@sQIaQ|CWv7HB@IbDh5Q|Dhb>d@fL9c@}*&Fbm#%
zuV#O8<NCP7;RFBFJvbJ<oOrTKO9MT~AIm^prNE_(70I^h7v_6ad?>bNx6jDlY3`gD
zcKI68g#{-!sC@9f+})_%;u~x8`(FJOSIfGXJB>YzWQ!gHh-yaZk^4)nZl;><;y@+-
zJyKR(mOyzm;KD8|8~iWf^iNnTlQ&wFd1Du|=z(So9Yd?a$?)~&9{5ZJ?5JmrnTscx
zP(!x*i<9L}S5MCezqvB2jZEXrEb?`+d!UT3s$ny$vno*2nGBE=KygrL@XS4_EzT`a
z=$_%EEVvo5Wiuo*&~>&_6ecrJz^-Vjs*-HXPrwz!D_MKaETXAOWOx)2?ay-g#ZRHU
zob`~1j7*p6zobwH)YJNo2db_KcJGSw)ePV06&mlVq>kZ|UQBoEjfDi3su6Hk?)LP!
zgpa0~ZZ+NGVytsV<x(Xza%K0CTzU}liTT;b7JG@a*<ard`k-9u>gwF8R3H82+dynw
zbMgD@nUYlqHC4Sz=7LAM;l(a*B%kqVKrXt@JJZUXG_Svdfja{d2RIH^dR&x0zjZ9X
zuYQziL>hzZVA&^ksgGL=^aq{X7vk7I0}64LdF^S3QO~abQ}=BKY7HB{&CTtV=qQ(k
zT<l)+5%|elU3BS@&1EP7;*Y9S!KpB-)ZAzW>sNqV?*)&MvJrcz^L6(0Yvzru94WSc
z@R(W|BnGNht#Ojh*UOj7tQw7btWKL$WjjHNLv*k10u><=0CLg3Y#uPZBII|}pi8CA
zmRZ}E)gxS<CP1bSRxgoEfX;iZ=_~FjXt34g)I8rgn8+3f-=8{HkPeGh1NP>1Cihk;
zsrBoSicb>O*!&zms)+61n81xLvLP(5e79RT{IS#5Y+U9XyOV1Yxy6Ljytda89^h!>
zs#tzYb3B{JCfiWl#}hW1VCLjz5%W)SBWtdx18}Q7-z1o10Ek6UQE?TXa5rfs<=)Sz
z77AOgvJfrZTDvchb)(Su)$rNz9IVgVtYYKkHzl6An2|xQtDCuBBhm}~&ZVPMeferZ
z<xJnoLLpO4w#U$5=nxKoszzgp<Hz5KP#-DvWeQSNDJLzb_51Sq9qdF&Y{i#BVl4e1
zXl=Btu-W$j8}gmt<|O(nEPmp;<5@-iI@rhP`)HtU<+EV+>cq-=(|um6c^|77)m2!-
z-_xDM&C=`85M=0SeX4rF-6l;dJ%@>7d=Z~TAL%Kenk6|X8`D*hZh)ZQw1xQpN}sb0
zuv3CiF*+kHGQJkGc`<ce7oZ%^+i(TkD)TE^Q(;q_Gfy{r7mieXD#gCN3r4>4Tivyv
z_K_W#<@X6*>TYb@HFc0=MOZLB<oKq?a@N8&$=8Poi)TalaDl~@9ks2=Y!z@f_r^(?
z5XZhbEX1bGgG`iNp_?Zb+vLykH|u=_CI&WqzE}I(XXW^;5aXjY$4Mm^?4c29Xdt&2
zpYV<L3Hs=Q1uMxh{u-q)7d-tyZwfm4u=5&QX)Zgau0An%Y+~H8eBZp(Qp%4VgwX;E
zYFuCL74!*%EHWWp>q}Qa@#|XD9eHzI_`yENKbdXz8q~b{;UonepyM`HOG5k+%uOQH
zd@myXX&;moy(RlmTp_b%lX5D&?`z`pJMW!t9ppBA?``jC68&7COJ)u%eq|XSiS`Mo
zNbe3fU|!m~jP@^lZbO8+fFMP?EqQmrR0FYDpCSSj*+&hKBSEG#({NqhC37XLBa2ti
zl1D5+Y6!qO;UGSP4%<ZmY<6JT$6~c>h<Ae4)@j+5@oSIti;Tngjkjs&(8A=6K8-KF
zu2;`~+rS%xgBb>4RF7k#B<tB<B5{<n!d5CBhWD4h!=qrG7FBkfERwDTAOG5!aZy`o
zWC0!Xj0*KKm)6!5{ho9IY+cxGzE&2hzSj2?R;T5zq+|>yqi+O|ONlrjy+_;gA81Cf
z5!$D3JG9kEy-;P5foI$|69&ok*<t|?R4~4qRpiN0$Tk?1_i_AdlJnr(N>^2GulE!w
z{0W0|GF%?QhzF1|tNF@qN(=%BO<MPr#T0%jx-fc(S8x}Tbo9_sv9etI+}zgU@+7CJ
zn^wE%9pEiaMuaFWI7$^PJmp<9_dcKMsEHQ5WUui2R28jJ^8En&{;(|}*840=tr##>
z8HksMv<5-QzSz*?x2mY})cY2P=cfRm#<CPEjR1jc`5yPGAawvVjd$qe5s=JN-q$U2
z4-tbew%I-f38p}KEs`0$%1j`RwwL%on*_^&A4I~=NWuaO_{^^Pk`#OMWM`zl>h$@L
z`aLGxnHF#eJN$=;cT(WM(BBJ7Ho3)DzJ#KwFaf&y|B?L!x|09Xo&0}Ki~jq-nMgw<
zb)pDpNci7wk6=WUV}UTHF+r^-FQGC^K>s5BeTtV5HGSXK@c+_Vk?M`KyQHtnXrAq(
z0NO}ocEK}6G-Vc$??CqeW+za+0I0|R%`WRjRzzxK6Kph&7%<G}Rv$`XIdnn@zry3F
z)uH&u_lo}qn<N)P_V`k-QdTKej&%0>Gcvt<5cIukpb_MF{smv){jAr_J$#5&*&Qb;
zy8NB&P-mCm7e;GYvbY2U4_}PF|DkjD`O8V10UZ@>;qIf+n)~h-JL2DX4zJkZSd-c8
z@iC4ZP8N7dmjHkh_}mTVq_$x=fLtX7>gfTn(?qh63>D|l(jSqf#4+q7gX0h<DxE`1
zKP%N|1`*=&8@j4=dR`M@hYuL4myUnxp!lq(Id7cLV!g+(!AYsk<`86fuP|%HPEpML
zeyAk5DJ5V#3=qup_0$PL5h+4ugseQIRcOFm{{Frtqa49M16P4i1m&y4rkD!LF<PCx
zw`XXdUV~vd`EP78ZxFkbX?2hFAU|uSpZw1!Uj9ePkJKlHl=N?e$axV&)J7s7Ai^Mw
z9+)Fo&;>#!pdlqs@>8%?h)1|0Rt`L{LY4QBphomx^E(B=b)5|>S<eef!P=IRDSlp>
z!f84|8-UG61O)K=Q}}~VPo_c3uDo_YUpf1&=-!k=5ABMe&+|AaX}61VDb+Py@7X8U
zCcvqPU{r}hl7jD%VhlYQUf==L_l2k-iRgdMQ;ZE^aDRu;!~607v*~_>7&(|%;i0zd
zO_vf0MtN_cBQ3ha9|Gswk_X3gFwj(b`g@1|m|%#>Z|e7pZzAL^moLX*7pucGPd!$y
zIS<4l3L0YRPQIHz^>}AT2gs!C5Li$t-r_663?U0?J{>hf1+-@`q?ZFx6c>Jb$UaqX
zokoaibek@^*y_h48cEtaJeP;Oe<;8M0JRTmTs=9IMX~vLRxNbh-;=^Kags#s1nM@s
z_KGT2t9Vov*E7gb272Rf-*F@I#)$i`mHOFv5!O2$3=xh`Z<G!(imI;V07HP5Lym$x
z)Tqtb2BgdmHncAOj`LlyuKVdHwM#)x00Joc@%FL&@`0!a{j-Y3Zw2`o<P|Jtl*Nnu
z)P^uB>}eI^ev?58pcA^0?V7Epm#GgLUisZ%`)EMBKjLn0!aAb=^s8Uc&DF0Wu_l}N
z>leoh=FhB+VHX?o6Abu@uUGz_#*FMN<oWJ#oL&$7?$|yP-Xb^OCSU$MHI?$z(;K7z
zR^w?I-Bp!qhld3K7=E=gf#?g(KVv-Z99dsRPEEa8SC0~s5TMSe_Q!I5{#eB}{f3m5
zhAx+o<YB8)%s$P7n2~9vx8$8}Zh;CQL9x_tnxJCqe+85!cYKF@kybYW2wOQ8g!xG;
zUFG%l*9n@FuaAfqzV(Gic|_dZ2(Q7c6(RlCZkS@vrXDKG9MRnPUp?h~r_~YO9hM&M
z;}I4S<zd;+84_VL^@9CuukEYv@#XKy(!ldaz+Z95!w2NzIZ@6tH3JCa2OBcDR_z!2
zxH&;&>hRzXnf@g)mgD!^Zp!{nWhntKLe=n;G=DPkj8shtb-cG1ta-$4+Aj(&-Ip81
ziTOZB!iD%A&jP=@Z2-$J_==xgeR_Iv`soA@Z|F(u^@qlAUk7d4q9&>EANAkEDQql#
z+-?}tE^kE`F0HEr^2&WQOPIEFNlWfkqkIe?cj-654eCab;*C*UlvFvX@ue|!x7IoR
zT8pu4EIyG~bbIU*$vbQgr<V_kdEiwTrjXIgkHn~$h`S;&Va+rckyXU6%qrN$?b0ur
zY5S}61n9c(vcTCjDyFldgp3^JUXJ^J^m6UdP;Py=j+l}Atz2h>2o0~1%I!=NgJ|3m
zDyBoDB9}}FF^58G3h6R4CSpoC<a)#q8r2NPl;f@%x17f9Yleo%+`iXY>s#OY{`mg;
z*7w)D)?R!6_Pf`5-@TvT^V`ou`^4-EPYhUjEqXKDT&?^PR=+~4XW6xM#}+uveq0R&
zrqFNI*p_+{@w{~Ryh?_6-78oP-0r)ou-Ru~@Ob<%RpKve5`esbAbw*KlUs@!((@=!
zs~gDO<VQDx0-wlVx<8a@Z=hA7y6Y@sF~ohZf-3PVscQk7zQ#w)-Jn++7<v7eD|h%z
z8P2$7VlkDg&JcO4i$$-t(qjYLc%$ndE-w^W==UEid<edZI@_|Q%__(G2vC}yg}uLv
ztVOg-7$X*I1{~DwV>DB8M@URZZ(tzBxeyvaM#RV4Ml@pOaiJ*}KwkmN(n}$VQXyBb
z&-(Y)ikFMD9i;Hw*znfx4X!|pc8^6zXd2J~l3kxhvchkrc*-+t4|{FV;DREvxdK0i
zcR{hix$Nxcnx6W|s-Ft{#;x0IY-z?XuuNC$vpN**pk5g&nEnD>Rbmxo50#1?_bNE4
zDP0Y{j!j=*qVy#Q8<v}N&HEAennYYx_Sf*(;E4W?qA^ocd?WnmydXf4!NKD>9&PYy
zaapJ$%EBLrdpWl@YObaz*e2A<3@HQasSUVmNcVuMoDbdIew2GhJ>@RD4#!iW$}5=L
zvS#NQ-g5La=SELBV38iv`ebjV3mLI4rhJ2@(0KN!<Egy!W+tRR3(rrPtAG0PReJ4-
z2MW~Kt20g*=UP?|5JG^ap&@(|f^j0MNQ8T1Se2uPB#B=u4d?Lc>F*$J8pQciKfbVz
z_m8D6Y!4bx`g!vEsv0^=-c%&>)C;&D0fGUvyR60@zB=h%^iihV)rdVdh6)_W2R#>M
zFC6m>I~3=GQch6gBjc3vdH3Y2ZTAk<ghKltHOS^*%eTcFu=V2F+k+XqTAj4UA&Et^
z+?C_|-oTJ12wD&7`tjyjbQ%0SLp$wNr**&UYpua{J^VClVwP#`Co8x)ZmBLLH@)!y
zx6qNAEVz~tsVP9W-t8OcivTJIVPsAA;B0f_lLv-6fv(PA38F!Us^fXt)}%-*rk%@4
zr|Q69O>xc?U0s;IKFnqhQe(LDuEQbJd!ngECn=mbwzF;%Be>h88Jqrtj~IV$RTDub
zZU1<(!(|^~UGm-MDR%!KbuFi=Cq%c>C4MvB+?|PiTWH{C_8j?JN%MHk{10x;6dJ_W
z=aFeUYu+B@ZTGsU0QN8t%>T4yy0#}EfmhzXcfw6Hx7pl3lS6OKAn+<mB5&Ua6T%)$
zw5FQXxtAUDURxw?Vz8~XU9#`qQDZOEw5bLCmBw(&Ix_Cdvy?y)FAuA%&9tw3PA;}`
z{raLOX1)pW`xq0fkR_fthY)*x*9u7f{gSE1GD+`j`|fqf?-G=2t{C8mQGQ2LeM`!u
zN42;vjfC5%NRRJP4FZW>-3Ms9H>f=09(^tN!i$m^P%a0_D_PnEdc{QguR6329K6EV
z{Vg^3B2EgP8Igx3Td>8T@N>A^g-Q^4#%Myyu?LgC8O~4ja#BSq&bj*sTvhUL*B$$e
zV68^NEOEl>4(Aq)!+;+5R&Bq)AjJ&`do5%8@w7L9nF`vRcH!B+w!<biU2uZ)<u%sB
zS}3>J#6@o{7q-;nm?uqH9D$$8xv9AK+OXs%jMI^{DrrMWKVlC71c{*cWOAi&b=kOb
zIwNMHRXSNZJOJ1454QMlfH{nE&t4gI@`drF$*beGK?>iZE7SCNC{BI`UljxjWl*M8
zeE_gJmz#d&z%u3xT(M-9kT{pEL=cR#dYu9<O13&vHH=bXhSOhZ@e{tHRrYe8c7$lO
zsAMYyQXeaI=H(sL`p%+3Fta<NKdU(>FVAL}lUU0haU42LB?SvxW0%bPz{ZkA9-+;~
zi2OIm^{<B7y&JXkEAHd>G;@1<9Wlk6WbXS3ycxKdz7q8#PJvmyoWcRgcp-W!Gr7uw
z6D4)ph7v!qtr8w0!~zb<_PH&>ePfNXw91%A9ek%mew^~Jk}?9%a~FOj@II}_ED-Q7
zLI{EzMQnI~XEqv-8Rz9iX18tN+02BKz>S>xw5Y2Nby7t&cA7mwb|)!{*R2zirE?!U
zs^NZ~Mq{e7X;ExH6J=uXRzkJ{xBZWIFhP0ja>0jXnW)|B79_~}$Pl7ofcMPj?D{FW
zb{FfdxHETcVsS(ptH)M<9G)YofpGJXz5Oh&WR)*{9ipVbm7rAipQOW$NRF7<Zd4vW
zIJ3Y(bBDG7TJbz3P%)K5U~00{j;;YG0K$L_H|~3{#?T<@1y*0oEX|}JUiP@y@+=6h
zgXk&}*-Wr5&Nwbpu|&rzmb+O*b&%18kS>1{5F!>1cDpcju9VoqQ}9yBRe(|JFELHI
za7@Ad{a4{C+zrZYCu@yOKm4T<%7`y}LV;>1nmapPUVP9u=-P1BK;RV79>0}KR#N%l
zGdOvNmW8st)JWVeYusQ%l$&O&ez~-~fk%~HIbSqqFj~w*&}{l@-Uo=lcc(X|Q#NrX
zj#hYVC>@LMx(4N1uUJX7x2>^mW7*{}UytuA!7GlodJ@U&9oh|&Y%~t8SnvUqwvA@`
zVO)`IowL5u!C4P!SM81>^3>dtCxT?2I`Oy>Q*6dUvjXDXK>W+l&d#?QA_QWrD}vwj
zL5xdT20wqghlR~rg=^)F<}`#$(wWt1m?4Q}AfPhX%89x)mqf3u6wxRtq8aChlbpJS
z8g&0$<;UT=-1+nEEFr%_Ay4XC-(rpMuv%GLa)z3o01A<<H|L|tB-4v|zqd4{5KT>=
z|4dS&JXDtR^l7b#t>>l{y66@?dK^my(-e|+X2DJIX?+SNWL+T9c%K7vlojN|#@mFx
zNj0JbKBijWD@TPpDoc-t$67W@OFQsiQ!&N0^7@&j5Q5WP?A5P=nQ8Q$$T1o^iC<R*
zlPn#$J<}vzkZ2qSl9GiCF=0^k#j^+*=aehq%9>v|M~(fH=vj_QPh@p;bFiJVG$ZYu
zYP!V~-TM_!Bg4kJ&)NxYnro9cyPK}^ROBG*W~U1t?;u;?4ENV{O>Ep5s`4|jx06oY
zf+WHIyqbSx95Nra>7vcnv6;f-2dQW-wjhDVaMkI`tup9`7`Nvq{aBEXcA-y}5^u*{
zUVGCVFja@I5}iK$d3@qG#Mre!!a+iu?{CSG%2z6QxNW-d=FR4J0swb7yV+dZGviDd
z!RsB;u7X=_E2|M`LE*uD1CDi9O}eTxA8!KxMS4$gl10%zM9#QR+6*t^<V%+kc@$?i
z5G^e|H$i{Or@i2pw?~IXrjA^G>bR=UsxZ(ay8eU9-=ALR$j#{G2wn5GXT8X;NUd($
zrS0a+6$YvM4V;s!AW)fdWJCKcv}1vHJdtLRC6A9%Eq%=BnVUAUz@)`8tUD7Og|~j5
z*4AU_bL$*KQQ2XO?KvpUR{`gXt1f&3zdCyr%u*p>PyMJLk0Kgreu+k5QD$)+It-!%
zg+vJx?cWQ=`t@z0Y-T}js@V`Fx<Ap;?&nEWqWyGrSj>U%N)>hUhKLu&8;Rqq&)sqi
zqSCuAbrW&Ko?Q<+8vM4{>O5wU{X&$_-p_`VGlx<(>S8iT?`=?A{)+lwVp5M$-GJC{
z%K*ww@2|85vAfyQ=-^Ki24J)UTbGKla{A&1V+VtII8vjStoGp!eSr;6HErE7{i>V1
z<hHOuJRh)FEXH<vu+q>Qp+PT-zj=axIkqjn#3E(?Ucv?s@xL*61N}Vco&LhGe(<*X
zvd8NQt^bq7-M^=)y(}RVB<{3HzLt6pY~6`>3&uyD!k>1CxNsVLY(fG^8w7wv0DA*%
w>>O<D9roB;0)PVm<gfjD>OTgYkBdBuOaAWzrk?B{+nCqW?Z~0(gV@Y}0)z*lZ2$lO

diff --git a/rust/ballista/docs/user-guide/src/introduction.md b/rust/ballista/docs/user-guide/src/introduction.md
deleted file mode 100644
index 59d7a1a2a5c..00000000000
--- a/rust/ballista/docs/user-guide/src/introduction.md
+++ /dev/null
@@ -1,52 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-## Overview
-
-Ballista is a distributed compute platform primarily implemented in Rust, and powered by Apache Arrow. It is 
-built on an architecture that allows other programming languages to be supported as first-class citizens without paying
-a penalty for serialization costs.
-
-The foundational technologies in Ballista are:
-
-- [Apache Arrow](https://arrow.apache.org/) memory model and compute kernels for efficient processing of data.
-- [Apache Arrow Flight Protocol](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for efficient data transfer between processes.
-- [Google Protocol Buffers](https://developers.google.com/protocol-buffers) for serializing query plans.
-- [Docker](https://www.docker.com/) for packaging up executors along with user-defined code.
-
-## Architecture
-
-The following diagram highlights some of the integrations that will be possible with this unique architecture. Note that not all components shown here are available yet.
-
-![Ballista Architecture Diagram](img/ballista-architecture.png)
-
-## How does this compare to Apache Spark?
-
-Although Ballista is largely inspired by Apache Spark, there are some key differences.
-
-- The choice of Rust as the main execution language means that memory usage is deterministic and avoids the overhead of GC pauses.
-- Ballista is designed from the ground up to use columnar data, enabling a number of efficiencies such as vectorized 
-processing (SIMD and GPU) and efficient compression. Although Spark does have some columnar support, it is still 
-largely row-based today.
-- The combination of Rust and Arrow provides excellent memory efficiency and memory usage can be 5x - 10x lower than Apache Spark in some cases, which means that more processing can fit on a single node, reducing the overhead of distributed compute.
-- The use of Apache Arrow as the memory model and network protocol means that data can be exchanged between executors in any programming language with minimal serialization overhead.
-  
-## Status
-
-Ballista is at the proof-of-concept phase currently but is under active development by a growing community.
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/kubernetes.md b/rust/ballista/docs/user-guide/src/kubernetes.md
deleted file mode 100644
index 8cd8beeb267..00000000000
--- a/rust/ballista/docs/user-guide/src/kubernetes.md
+++ /dev/null
@@ -1,216 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Deploying Ballista with Kubernetes
-
-Ballista can be deployed to any Kubernetes cluster using the following instructions. These instructions assume that
-you are already comfortable with managing Kubernetes deployments.
-
-The k8s deployment consists of:
-
-- k8s stateful set for one or more scheduler processes
-- k8s stateful set for one or more executor processes
-- k8s service to route traffic to the schedulers
-- k8s persistent volume and persistent volume claims to make local data accessible to Ballista
-
-## Limitations
-
-Ballista is at an early stage of development and therefore has some significant limitations:
-
-- There is no support for shared object stores such as S3. All data must exist locally on each node in the 
-  cluster, including where any client process runs  (until 
-  [#473](https://github.com/ballista-compute/ballista/issues/473) is resolved).
-- Only a single scheduler instance is currently supported unless the scheduler is configured to use `etcd` as a 
-  backing store.
-
-## Create Persistent Volume and Persistent Volume Claim 
-
-Copy the following yaml to a `pv.yaml` file and apply to the cluster to create a persistent volume and a persistent 
-volume claim so that the specified host directory is available to the containers. This is where any data should be 
-located so that Ballista can execute queries against it.
-
-```yaml
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: data-pv
-  labels:
-    type: local
-spec:
-  storageClassName: manual
-  capacity:
-    storage: 10Gi
-  accessModes:
-    - ReadWriteOnce
-  hostPath:
-    path: "/mnt"
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: data-pv-claim
-spec:
-  storageClassName: manual
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 3Gi
-```
-
-To apply this yaml:
-
-```bash
-kubectl apply -f pv.yaml
-```
-
-You should see the following output:
-
-```bash
-persistentvolume/data-pv created
-persistentvolumeclaim/data-pv-claim created
-```
-
-## Deploying Ballista Scheduler and Executors
-
-Copy the following yaml to a `cluster.yaml` file.
-
-```yaml
-apiVersion: v1
-kind: Service
-metadata:
-  name: ballista-scheduler
-  labels:
-    app: ballista-scheduler
-spec:
-  ports:
-    - port: 50050
-      name: scheduler
-  clusterIP: None
-  selector:
-    app: ballista-scheduler
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: ballista-scheduler
-spec:
-  serviceName: "ballista-scheduler"
-  replicas: 1
-  selector:
-    matchLabels:
-      app: ballista-scheduler
-  template:
-    metadata:
-      labels:
-        app: ballista-scheduler
-        ballista-cluster: ballista
-    spec:
-      containers:
-      - name: ballista-scheduler
-        image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-        command: ["/scheduler"]
-        args: ["--port=50050"]
-        ports:
-          - containerPort: 50050
-            name: flight
-        volumeMounts:
-          - mountPath: /mnt
-            name: data
-      volumes:
-      - name: data
-        persistentVolumeClaim:
-          claimName: data-pv-claim
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: ballista-executor
-spec:
-  serviceName: "ballista-scheduler"
-  replicas: 2
-  selector:
-    matchLabels:
-      app: ballista-executor
-  template:
-    metadata:
-      labels:
-        app: ballista-executor
-        ballista-cluster: ballista
-    spec:
-      containers:
-        - name: ballista-executor
-          image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-          command: ["/executor"]
-          args: ["--port=50051", "--scheduler-host=ballista-scheduler", "--scheduler-port=50050", "--external-host=$(MY_POD_IP)"]
-          env:
-            - name: MY_POD_IP
-              valueFrom:
-                fieldRef:
-                  fieldPath: status.podIP            
-          ports:
-            - containerPort: 50051
-              name: flight
-          volumeMounts:
-            - mountPath: /mnt
-              name: data
-      volumes:
-        - name: data
-          persistentVolumeClaim:
-            claimName: data-pv-claim
-```
-
-```bash
-$ kubectl apply -f cluster.yaml
-```
-
-This should show the following output:
-
-```
-service/ballista-scheduler created
-statefulset.apps/ballista-scheduler created
-statefulset.apps/ballista-executor created
-```
-
-You can also check status by running `kubectl get pods`:
-
-```bash
-$ kubectl get pods
-NAME                   READY   STATUS    RESTARTS   AGE
-busybox                1/1     Running   0          16m
-ballista-scheduler-0   1/1     Running   0          42s
-ballista-executor-0    1/1     Running   2          42s
-ballista-executor-1    1/1     Running   0          26s
-```
-
-You can view the scheduler logs with `kubectl logs ballista-scheduler-0`:
-
-```
-$ kubectl logs ballista-scheduler-0
-[2021-02-19T00:24:01Z INFO  scheduler] Ballista v0.4.2-SNAPSHOT Scheduler listening on 0.0.0.0:50050
-[2021-02-19T00:24:16Z INFO  ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "b5e81711-1c5c-46ec-8522-d8b359793188", host: "10.1.23.149", port: 50051 }
-[2021-02-19T00:24:17Z INFO  ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "816e4502-a876-4ed8-b33f-86d243dcf63f", host: "10.1.23.150", port: 50051 }
-```
-
-## Deleting the Ballista cluster
-
-Run the following kubectl command to delete the cluster.
-
-```bash
-kubectl delete -f cluster.yaml
-```
\ No newline at end of file
diff --git a/rust/ballista/docs/user-guide/src/standalone.md b/rust/ballista/docs/user-guide/src/standalone.md
deleted file mode 100644
index e4c24fedd31..00000000000
--- a/rust/ballista/docs/user-guide/src/standalone.md
+++ /dev/null
@@ -1,92 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-## Deploying a standalone Ballista cluster
-
-### Start a Scheduler
-
-Start a scheduler using the following syntax:
-
-```bash
-docker run --network=host \
-  -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /scheduler --port 50050
-```
-
-Run `docker ps` to check that the process is running:
-
-```
-$ docker ps
-CONTAINER ID   IMAGE                                         COMMAND                  CREATED         STATUS         PORTS     NAMES
-59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --port 5…"   6 seconds ago   Up 5 seconds             affectionate_hofstadter
-```
-
-Run `docker logs CONTAINER_ID` to check the output from the process:
-
-```
-$ docker logs 59452ce72138
-[2021-02-14T18:32:20Z INFO  scheduler] Ballista v0.4.2-SNAPSHOT Scheduler listening on 0.0.0.0:50050
-```
-
-### Start executors
-
-Start one or more executor processes. Each executor process will need to listen on a different port.
-
-```bash
-docker run --network=host \
-  -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /executor --external-host localhost --port 50051 
-```
-
-Use `docker ps` to check that both the scheduer and executor(s) are now running:
-
-```
-$ docker ps
-CONTAINER ID   IMAGE                                         COMMAND                  CREATED         STATUS         PORTS     NAMES
-0746ce262a19   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/executor --externa…"   2 seconds ago   Up 1 second              naughty_mclean
-59452ce72138   ballistacompute/ballista-rust:0.4.2-SNAPSHOT   "/scheduler --port 5…"   4 minutes ago   Up 4 minutes             affectionate_hofstadter
-```
-
-Use `docker logs CONTAINER_ID` to check the output from the executor(s):
-
-```
-$ docker logs 0746ce262a19
-[2021-02-14T18:36:25Z INFO  executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 }
-[2021-02-14T18:36:25Z INFO  executor] Ballista v0.4.2-SNAPSHOT Rust Executor listening on 0.0.0.0:50051
-[2021-02-14T18:36:25Z INFO  executor] Starting registration with scheduler
-```
-
-The external host and port will be registered with the scheduler. The executors will discover other executors by 
-requesting a list of executors from the scheduler.
-
-### Using etcd as backing store
-
-_NOTE: This functionality is currently experimental_
-
-Ballista can optionally use [etcd](https://etcd.io/) as a backing store for the scheduler. 
-
-```bash
-docker run --network=host \
-  -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \
-  /scheduler --port 50050 \
-  --config-backend etcd \
-  --etcd-urls etcd:2379
-```
-
-Please refer to the [etcd](https://etcd.io/) web site for installation instructions. Etcd version 3.4.9 or later is 
-recommended.
diff --git a/rust/ballista/rust/.dockerignore b/rust/ballista/rust/.dockerignore
deleted file mode 100644
index 96f99a522ad..00000000000
--- a/rust/ballista/rust/.dockerignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-target
\ No newline at end of file
diff --git a/rust/ballista/rust/.gitignore b/rust/ballista/rust/.gitignore
deleted file mode 100644
index 97eec164046..00000000000
--- a/rust/ballista/rust/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-target
-temp
\ No newline at end of file
diff --git a/rust/ballista/rust/Cargo.toml b/rust/ballista/rust/Cargo.toml
deleted file mode 100644
index 5e344e004b8..00000000000
--- a/rust/ballista/rust/Cargo.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[workspace]
-
-members = [
-    "benchmarks/tpch",
-    "client",
-    "core",
-    "executor",
-    "scheduler",
-]
-
-#[profile.release]
-#lto = true
-#codegen-units = 1
diff --git a/rust/ballista/rust/benchmarks/tpch/.dockerignore b/rust/ballista/rust/benchmarks/tpch/.dockerignore
deleted file mode 100644
index 3a7d0fdaa06..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/.dockerignore
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Turn .dockerignore to .dockerallow by excluding everything and explicitly
-# allowing specific files and directories. This enables us to quickly add
-# dependency files to the docker content without scanning the whole directory.
-# This setup requires to all of our docker containers have arrow's source
-# as a mounted directory.
-
-data
-target
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/.gitignore b/rust/ballista/rust/benchmarks/tpch/.gitignore
deleted file mode 100644
index 6320cd248dd..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-data
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/Cargo.toml b/rust/ballista/rust/benchmarks/tpch/Cargo.toml
deleted file mode 100644
index 822d101d4e9..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/Cargo.toml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "tpch"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-edition = "2018"
-
-[dependencies]
-ballista = { path="../../client" }
-
-arrow = { path = "../../../../arrow"  }
-datafusion = { path = "../../../../datafusion" }
-parquet = { path = "../../../../parquet"  }
-
-env_logger = "0.8"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-structopt = "0.3"
diff --git a/rust/ballista/rust/benchmarks/tpch/README.md b/rust/ballista/rust/benchmarks/tpch/README.md
deleted file mode 100644
index 6d77694b91b..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/README.md
+++ /dev/null
@@ -1,104 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# TPC-H Benchmarks
-
-TPC-H is an industry standard benchmark for testing databases and query engines. A command-line tool is available that
-can generate the raw test data at any given scale factor (scale factor refers to the amount of data to be generated).
-
-## Generating Test Data
-
-TPC-H data can be generated using the `tpch-gen.sh` script, which creates a Docker image containing the TPC-DS data
-generator.
-
-```bash
-./tpch-gen.sh
-```
-
-Data will be generated into the `data` subdirectory and will not be checked in because this directory has been added 
-to the `.gitignore` file.
-
-## Running the Benchmarks
-
-To run the benchmarks it is necessary to have at least one Ballista scheduler and one Ballista executor running.
-
-To run the scheduler from source:
-
-```bash
-cd $ARROW_HOME/rust/ballista/rust/scheduler
-RUST_LOG=info cargo run --release
-```
-
-By default the scheduler will bind to `0.0.0.0` and listen on port 50050.
-
-To run the executor from source:
-
-```bash
-cd $ARROW_HOME/rust/ballista/rust/executor
-RUST_LOG=info cargo run --release
-```
-
-By default the executor will bind to `0.0.0.0` and listen on port 50051.
-
-You can add SIMD/snmalloc/LTO flags to improve speed (with longer build times):
-
-```
-RUST_LOG=info RUSTFLAGS='-C target-cpu=native -C lto -C codegen-units=1 -C embed-bitcode' cargo run --release --bin executor --features "simd snmalloc" --target x86_64-unknown-linux-gnu
-```
-
-To run the benchmarks:
-
-```bash
-cd $ARROW_HOME/rust/ballista/rust/benchmarks/tpch
-cargo run --release benchmark --host localhost --port 50050 --query 1 --path $(pwd)/data --format tbl
-```
-
-## Running the Benchmarks on docker-compose
-
-To start a Rust scheduler and executor using Docker Compose:
-
-```bash
-cd $BALLISTA_HOME
-./dev/build-rust.sh
-cd $BALLISTA_HOME/rust/benchmarks/tpch
-docker-compose up
-```
-
-Then you can run the benchmark with:
-
-```bash
-docker-compose run ballista-client cargo run benchmark --host ballista-scheduler --port 50050 --query 1 --path /data --format tbl
-```
-
-## Expected output
-
-The result of query 1 should produce the following output when executed against the SF=1 dataset.
-
-```
-+--------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+----------------------+-------------+
-| l_returnflag | l_linestatus | sum_qty  | sum_base_price     | sum_disc_price     | sum_charge         | avg_qty            | avg_price          | avg_disc             | count_order |
-+--------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+----------------------+-------------+
-| A            | F            | 37734107 | 56586554400.73001  | 53758257134.870026 | 55909065222.82768  | 25.522005853257337 | 38273.12973462168  | 0.049985295838396455 | 1478493     |
-| N            | F            | 991417   | 1487504710.3799996 | 1413082168.0541    | 1469649223.1943746 | 25.516471920522985 | 38284.467760848296 | 0.05009342667421622  | 38854       |
-| N            | O            | 74476023 | 111701708529.50996 | 106118209986.10472 | 110367023144.56622 | 25.502229680934594 | 38249.1238377803   | 0.049996589476752576 | 2920373     |
-| R            | F            | 37719753 | 56568041380.90001  | 53741292684.60399  | 55889619119.83194  | 25.50579361269077  | 38250.854626099666 | 0.05000940583012587  | 1478870     |
-+--------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+----------------------+-------------+
-Query 1 iteration 0 took 1956.1 ms
-Query 1 avg time: 1956.11 ms
-```
diff --git a/rust/ballista/rust/benchmarks/tpch/docker-compose.yaml b/rust/ballista/rust/benchmarks/tpch/docker-compose.yaml
deleted file mode 100644
index f872ce16e2d..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/docker-compose.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-version: '2.0'
-services:
-  etcd:
-    image: quay.io/coreos/etcd:v3.4.9
-    command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379"
-  ballista-scheduler:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --port 50050"
-    environment:
-      - RUST_LOG=ballista=debug
-    volumes:
-      - ./data:/data
-    depends_on:
-      - etcd
-  ballista-executor-1:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50051 --external-host ballista-executor-1 --scheduler-host ballista-scheduler"
-    environment:
-      - RUST_LOG=info
-    volumes:
-      - ./data:/data
-    depends_on:
-      - ballista-scheduler
-  ballista-executor-2:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/executor --bind-host 0.0.0.0 --port 50052 --external-host ballista-executor-2 --scheduler-host ballista-scheduler"
-    environment:
-      - RUST_LOG=info
-    volumes:
-      - ./data:/data
-    depends_on:
-      - ballista-scheduler
-  ballista-client:
-    image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT
-    command: "/bin/sh" # do nothing
-    working_dir: /ballista/benchmarks/tpch
-    environment:
-      - RUST_LOG=info
-    volumes:
-      - ./data:/data
-      - ../..:/ballista
-    depends_on:
-      - ballista-scheduler
-      - ballista-executor-1
-      - ballista-executor-2
-
diff --git a/rust/ballista/rust/benchmarks/tpch/entrypoint.sh b/rust/ballista/rust/benchmarks/tpch/entrypoint.sh
deleted file mode 100755
index 71c04324afd..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/entrypoint.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-cd /tpch-dbgen
-./dbgen -vf -s 1
-mv *.tbl /data
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q1.sql b/rust/ballista/rust/benchmarks/tpch/queries/q1.sql
deleted file mode 100644
index a0fcf159e20..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q1.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-select
-    l_returnflag,
-    l_linestatus,
-    sum(l_quantity) as sum_qty,
-    sum(l_extendedprice) as sum_base_price,
-    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-    avg(l_quantity) as avg_qty,
-    avg(l_extendedprice) as avg_price,
-    avg(l_discount) as avg_disc,
-    count(*) as count_order
-from
-    lineitem
-where
-        l_shipdate <= date '1998-09-02'
-group by
-    l_returnflag,
-    l_linestatus
-order by
-    l_returnflag,
-    l_linestatus;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q10.sql b/rust/ballista/rust/benchmarks/tpch/queries/q10.sql
deleted file mode 100644
index cf45e43485f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q10.sql
+++ /dev/null
@@ -1,31 +0,0 @@
-select
-    c_custkey,
-    c_name,
-    sum(l_extendedprice * (1 - l_discount)) as revenue,
-    c_acctbal,
-    n_name,
-    c_address,
-    c_phone,
-    c_comment
-from
-    customer,
-    orders,
-    lineitem,
-    nation
-where
-        c_custkey = o_custkey
-  and l_orderkey = o_orderkey
-  and o_orderdate >= date '1993-10-01'
-  and o_orderdate < date '1994-01-01'
-  and l_returnflag = 'R'
-  and c_nationkey = n_nationkey
-group by
-    c_custkey,
-    c_name,
-    c_acctbal,
-    c_phone,
-    n_name,
-    c_address,
-    c_comment
-order by
-    revenue desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q11.sql b/rust/ballista/rust/benchmarks/tpch/queries/q11.sql
deleted file mode 100644
index c23ed1c71bf..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q11.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-select
-    ps_partkey,
-    sum(ps_supplycost * ps_availqty) as value
-from
-    partsupp,
-    supplier,
-    nation
-where
-    ps_suppkey = s_suppkey
-  and s_nationkey = n_nationkey
-  and n_name = 'GERMANY'
-group by
-    ps_partkey having
-    sum(ps_supplycost * ps_availqty) > (
-    select
-    sum(ps_supplycost * ps_availqty) * 0.0001
-    from
-    partsupp,
-    supplier,
-    nation
-    where
-    ps_suppkey = s_suppkey
-                  and s_nationkey = n_nationkey
-                  and n_name = 'GERMANY'
-    )
-order by
-    value desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q12.sql b/rust/ballista/rust/benchmarks/tpch/queries/q12.sql
deleted file mode 100644
index f8e6d960c84..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q12.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-select
-    l_shipmode,
-    sum(case
-            when o_orderpriority = '1-URGENT'
-                or o_orderpriority = '2-HIGH'
-                then 1
-            else 0
-        end) as high_line_count,
-    sum(case
-            when o_orderpriority <> '1-URGENT'
-                and o_orderpriority <> '2-HIGH'
-                then 1
-            else 0
-        end) as low_line_count
-from
-    lineitem
-        join
-    orders
-    on
-            l_orderkey = o_orderkey
-where
-        l_shipmode in ('MAIL', 'SHIP')
-  and l_commitdate < l_receiptdate
-  and l_shipdate < l_commitdate
-  and l_receiptdate >= date '1994-01-01'
-  and l_receiptdate < date '1995-01-01'
-group by
-    l_shipmode
-order by
-    l_shipmode;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q13.sql b/rust/ballista/rust/benchmarks/tpch/queries/q13.sql
deleted file mode 100644
index 4bfe8c35553..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q13.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-select
-    c_count,
-    count(*) as custdist
-from
-    (
-        select
-            c_custkey,
-            count(o_orderkey)
-        from
-            customer left outer join orders on
-                        c_custkey = o_custkey
-                    and o_comment not like '%special%requests%'
-        group by
-            c_custkey
-    ) as c_orders (c_custkey, c_count)
-group by
-    c_count
-order by
-    custdist desc,
-    c_count desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q14.sql b/rust/ballista/rust/benchmarks/tpch/queries/q14.sql
deleted file mode 100644
index d8ef6afaca9..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q14.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-select
-            100.00 * sum(case
-                             when p_type like 'PROMO%'
-                                 then l_extendedprice * (1 - l_discount)
-                             else 0
-            end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
-from
-    lineitem,
-    part
-where
-        l_partkey = p_partkey
-  and l_shipdate >= date '1995-09-01'
-  and l_shipdate < date '1995-10-01';
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q16.sql b/rust/ballista/rust/benchmarks/tpch/queries/q16.sql
deleted file mode 100644
index 36b7c07c164..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q16.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-select
-    p_brand,
-    p_type,
-    p_size,
-    count(distinct ps_suppkey) as supplier_cnt
-from
-    partsupp,
-    part
-where
-        p_partkey = ps_partkey
-  and p_brand <> 'Brand#45'
-  and p_type not like 'MEDIUM POLISHED%'
-  and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
-  and ps_suppkey not in (
-    select
-        s_suppkey
-    from
-        supplier
-    where
-            s_comment like '%Customer%Complaints%'
-)
-group by
-    p_brand,
-    p_type,
-    p_size
-order by
-    supplier_cnt desc,
-    p_brand,
-    p_type,
-    p_size;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q17.sql b/rust/ballista/rust/benchmarks/tpch/queries/q17.sql
deleted file mode 100644
index 1e65550634f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q17.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-select
-        sum(l_extendedprice) / 7.0 as avg_yearly
-from
-    lineitem,
-    part
-where
-        p_partkey = l_partkey
-  and p_brand = 'Brand#23'
-  and p_container = 'MED BOX'
-  and l_quantity < (
-    select
-            0.2 * avg(l_quantity)
-    from
-        lineitem
-    where
-            l_partkey = p_partkey
-);
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q18.sql b/rust/ballista/rust/benchmarks/tpch/queries/q18.sql
deleted file mode 100644
index 835de28a57b..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q18.sql
+++ /dev/null
@@ -1,32 +0,0 @@
-select
-    c_name,
-    c_custkey,
-    o_orderkey,
-    o_orderdate,
-    o_totalprice,
-    sum(l_quantity)
-from
-    customer,
-    orders,
-    lineitem
-where
-        o_orderkey in (
-        select
-            l_orderkey
-        from
-            lineitem
-        group by
-            l_orderkey having
-                sum(l_quantity) > 300
-    )
-  and c_custkey = o_custkey
-  and o_orderkey = l_orderkey
-group by
-    c_name,
-    c_custkey,
-    o_orderkey,
-    o_orderdate,
-    o_totalprice
-order by
-    o_totalprice desc,
-    o_orderdate;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q19.sql b/rust/ballista/rust/benchmarks/tpch/queries/q19.sql
deleted file mode 100644
index 56668e73f86..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q19.sql
+++ /dev/null
@@ -1,35 +0,0 @@
-select
-    sum(l_extendedprice* (1 - l_discount)) as revenue
-from
-    lineitem,
-    part
-where
-    (
-                p_partkey = l_partkey
-            and p_brand = 'Brand#12'
-            and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
-            and l_quantity >= 1 and l_quantity <= 1 + 10
-            and p_size between 1 and 5
-            and l_shipmode in ('AIR', 'AIR REG')
-            and l_shipinstruct = 'DELIVER IN PERSON'
-        )
-   or
-    (
-                p_partkey = l_partkey
-            and p_brand = 'Brand#23'
-            and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
-            and l_quantity >= 10 and l_quantity <= 10 + 10
-            and p_size between 1 and 10
-            and l_shipmode in ('AIR', 'AIR REG')
-            and l_shipinstruct = 'DELIVER IN PERSON'
-        )
-   or
-    (
-                p_partkey = l_partkey
-            and p_brand = 'Brand#34'
-            and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
-            and l_quantity >= 20 and l_quantity <= 20 + 10
-            and p_size between 1 and 15
-            and l_shipmode in ('AIR', 'AIR REG')
-            and l_shipinstruct = 'DELIVER IN PERSON'
-        );
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q2.sql b/rust/ballista/rust/benchmarks/tpch/queries/q2.sql
deleted file mode 100644
index f66af210205..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q2.sql
+++ /dev/null
@@ -1,43 +0,0 @@
-select
-    s_acctbal,
-    s_name,
-    n_name,
-    p_partkey,
-    p_mfgr,
-    s_address,
-    s_phone,
-    s_comment
-from
-    part,
-    supplier,
-    partsupp,
-    nation,
-    region
-where
-        p_partkey = ps_partkey
-  and s_suppkey = ps_suppkey
-  and p_size = 15
-  and p_type like '%BRASS'
-  and s_nationkey = n_nationkey
-  and n_regionkey = r_regionkey
-  and r_name = 'EUROPE'
-  and ps_supplycost = (
-    select
-        min(ps_supplycost)
-    from
-        partsupp,
-        supplier,
-        nation,
-        region
-    where
-            p_partkey = ps_partkey
-      and s_suppkey = ps_suppkey
-      and s_nationkey = n_nationkey
-      and n_regionkey = r_regionkey
-      and r_name = 'EUROPE'
-)
-order by
-    s_acctbal desc,
-    n_name,
-    s_name,
-    p_partkey;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q20.sql b/rust/ballista/rust/benchmarks/tpch/queries/q20.sql
deleted file mode 100644
index f0339a6013c..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q20.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-select
-    s_name,
-    s_address
-from
-    supplier,
-    nation
-where
-        s_suppkey in (
-        select
-            ps_suppkey
-        from
-            partsupp
-        where
-                ps_partkey in (
-                select
-                    p_partkey
-                from
-                    part
-                where
-                        p_name like 'forest%'
-            )
-          and ps_availqty > (
-            select
-                    0.5 * sum(l_quantity)
-            from
-                lineitem
-            where
-                    l_partkey = ps_partkey
-              and l_suppkey = ps_suppkey
-              and l_shipdate >= date '1994-01-01'
-              and l_shipdate < 'date 1994-01-01' + interval '1' year
-        )
-    )
-  and s_nationkey = n_nationkey
-  and n_name = 'CANADA'
-order by
-    s_name;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q21.sql b/rust/ballista/rust/benchmarks/tpch/queries/q21.sql
deleted file mode 100644
index 9d2fe32cee2..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q21.sql
+++ /dev/null
@@ -1,39 +0,0 @@
-select
-    s_name,
-    count(*) as numwait
-from
-    supplier,
-    lineitem l1,
-    orders,
-    nation
-where
-        s_suppkey = l1.l_suppkey
-  and o_orderkey = l1.l_orderkey
-  and o_orderstatus = 'F'
-  and l1.l_receiptdate > l1.l_commitdate
-  and exists (
-        select
-            *
-        from
-            lineitem l2
-        where
-                l2.l_orderkey = l1.l_orderkey
-          and l2.l_suppkey <> l1.l_suppkey
-    )
-  and not exists (
-        select
-            *
-        from
-            lineitem l3
-        where
-                l3.l_orderkey = l1.l_orderkey
-          and l3.l_suppkey <> l1.l_suppkey
-          and l3.l_receiptdate > l3.l_commitdate
-    )
-  and s_nationkey = n_nationkey
-  and n_name = 'SAUDI ARABIA'
-group by
-    s_name
-order by
-    numwait desc,
-    s_name;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q22.sql b/rust/ballista/rust/benchmarks/tpch/queries/q22.sql
deleted file mode 100644
index 90aea6fd74f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q22.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-select
-    cntrycode,
-    count(*) as numcust,
-    sum(c_acctbal) as totacctbal
-from
-    (
-        select
-            substring(c_phone from 1 for 2) as cntrycode,
-            c_acctbal
-        from
-            customer
-        where
-                substring(c_phone from 1 for 2) in
-                ('13', '31', '23', '29', '30', '18', '17')
-          and c_acctbal > (
-            select
-                avg(c_acctbal)
-            from
-                customer
-            where
-                    c_acctbal > 0.00
-              and substring(c_phone from 1 for 2) in
-                  ('13', '31', '23', '29', '30', '18', '17')
-        )
-          and not exists (
-                select
-                    *
-                from
-                    orders
-                where
-                        o_custkey = c_custkey
-            )
-    ) as custsale
-group by
-    cntrycode
-order by
-    cntrycode;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q3.sql b/rust/ballista/rust/benchmarks/tpch/queries/q3.sql
deleted file mode 100644
index 7dbc6d9ef67..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q3.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-select
-    l_orderkey,
-    sum(l_extendedprice * (1 - l_discount)) as revenue,
-    o_orderdate,
-    o_shippriority
-from
-    customer,
-    orders,
-    lineitem
-where
-        c_mktsegment = 'BUILDING'
-  and c_custkey = o_custkey
-  and l_orderkey = o_orderkey
-  and o_orderdate < date '1995-03-15'
-  and l_shipdate > date '1995-03-15'
-group by
-    l_orderkey,
-    o_orderdate,
-    o_shippriority
-order by
-    revenue desc,
-    o_orderdate;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q4.sql b/rust/ballista/rust/benchmarks/tpch/queries/q4.sql
deleted file mode 100644
index 74a620dbc8a..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q4.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-select
-    o_orderpriority,
-    count(*) as order_count
-from
-    orders
-where
-        o_orderdate >= '1993-07-01'
-  and o_orderdate < date '1993-07-01' + interval '3' month
-  and exists (
-        select
-            *
-        from
-            lineitem
-        where
-                l_orderkey = o_orderkey
-          and l_commitdate < l_receiptdate
-    )
-group by
-    o_orderpriority
-order by
-    o_orderpriority;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q5.sql b/rust/ballista/rust/benchmarks/tpch/queries/q5.sql
deleted file mode 100644
index 5a336b23118..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q5.sql
+++ /dev/null
@@ -1,24 +0,0 @@
-select
-    n_name,
-    sum(l_extendedprice * (1 - l_discount)) as revenue
-from
-    customer,
-    orders,
-    lineitem,
-    supplier,
-    nation,
-    region
-where
-        c_custkey = o_custkey
-  and l_orderkey = o_orderkey
-  and l_suppkey = s_suppkey
-  and c_nationkey = s_nationkey
-  and s_nationkey = n_nationkey
-  and n_regionkey = r_regionkey
-  and r_name = 'ASIA'
-  and o_orderdate >= date '1994-01-01'
-  and o_orderdate < date '1995-01-01'
-group by
-    n_name
-order by
-    revenue desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q6.sql b/rust/ballista/rust/benchmarks/tpch/queries/q6.sql
deleted file mode 100644
index 5806f980f80..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q6.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-select
-    sum(l_extendedprice * l_discount) as revenue
-from
-    lineitem
-where
-        l_shipdate >= date '1994-01-01'
-  and l_shipdate < date '1995-01-01'
-  and l_discount between 0.06 - 0.01 and 0.06 + 0.01
-  and l_quantity < 24;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q7.sql b/rust/ballista/rust/benchmarks/tpch/queries/q7.sql
deleted file mode 100644
index d53877c8dde..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q7.sql
+++ /dev/null
@@ -1,39 +0,0 @@
-select
-    supp_nation,
-    cust_nation,
-    l_year,
-    sum(volume) as revenue
-from
-    (
-        select
-            n1.n_name as supp_nation,
-            n2.n_name as cust_nation,
-            extract(year from l_shipdate) as l_year,
-            l_extendedprice * (1 - l_discount) as volume
-        from
-            supplier,
-            lineitem,
-            orders,
-            customer,
-            nation n1,
-            nation n2
-        where
-                s_suppkey = l_suppkey
-          and o_orderkey = l_orderkey
-          and c_custkey = o_custkey
-          and s_nationkey = n1.n_nationkey
-          and c_nationkey = n2.n_nationkey
-          and (
-                (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
-                or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
-            )
-          and l_shipdate between date '1995-01-01' and date '1996-12-31'
-    ) as shipping
-group by
-    supp_nation,
-    cust_nation,
-    l_year
-order by
-    supp_nation,
-    cust_nation,
-    l_year;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q8.sql b/rust/ballista/rust/benchmarks/tpch/queries/q8.sql
deleted file mode 100644
index 6ddb2a67475..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q8.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-select
-    o_year,
-    sum(case
-            when nation = 'BRAZIL' then volume
-            else 0
-        end) / sum(volume) as mkt_share
-from
-    (
-        select
-            extract(year from o_orderdate) as o_year,
-            l_extendedprice * (1 - l_discount) as volume,
-            n2.n_name as nation
-        from
-            part,
-            supplier,
-            lineitem,
-            orders,
-            customer,
-            nation n1,
-            nation n2,
-            region
-        where
-                p_partkey = l_partkey
-          and s_suppkey = l_suppkey
-          and l_orderkey = o_orderkey
-          and o_custkey = c_custkey
-          and c_nationkey = n1.n_nationkey
-          and n1.n_regionkey = r_regionkey
-          and r_name = 'AMERICA'
-          and s_nationkey = n2.n_nationkey
-          and o_orderdate between date '1995-01-01' and date '1996-12-31'
-          and p_type = 'ECONOMY ANODIZED STEEL'
-    ) as all_nations
-group by
-    o_year
-order by
-    o_year;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/queries/q9.sql b/rust/ballista/rust/benchmarks/tpch/queries/q9.sql
deleted file mode 100644
index 587bbc8a207..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/queries/q9.sql
+++ /dev/null
@@ -1,32 +0,0 @@
-select
-    nation,
-    o_year,
-    sum(amount) as sum_profit
-from
-    (
-        select
-            n_name as nation,
-            extract(year from o_orderdate) as o_year,
-            l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
-        from
-            part,
-            supplier,
-            lineitem,
-            partsupp,
-            orders,
-            nation
-        where
-                s_suppkey = l_suppkey
-          and ps_suppkey = l_suppkey
-          and ps_partkey = l_partkey
-          and p_partkey = l_partkey
-          and o_orderkey = l_orderkey
-          and s_nationkey = n_nationkey
-          and p_name like '%green%'
-    ) as profit
-group by
-    nation,
-    o_year
-order by
-    nation,
-    o_year desc;
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/run.sh b/rust/ballista/rust/benchmarks/tpch/run.sh
deleted file mode 100755
index c8a36b6013c..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/run.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-set -e
-
-# This bash script is meant to be run inside the docker-compose environment. Check the README for instructions
-
-for query in 1 3 5 6 10 12
-do
-  /tpch benchmark --host ballista-scheduler --port 50050 --query $query --path /data --format tbl --iterations 1 --debug
-done
diff --git a/rust/ballista/rust/benchmarks/tpch/src/main.rs b/rust/ballista/rust/benchmarks/tpch/src/main.rs
deleted file mode 100644
index 1ba46ea1826..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/src/main.rs
+++ /dev/null
@@ -1,360 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Benchmark derived from TPC-H. This is not an official TPC-H benchmark.
-//!
-//! This is a modified version of the DataFusion version of these benchmarks.
-
-use std::collections::HashMap;
-use std::fs;
-use std::path::{Path, PathBuf};
-use std::time::Instant;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
-use ballista::prelude::*;
-use datafusion::prelude::*;
-use parquet::basic::Compression;
-use parquet::file::properties::WriterProperties;
-use structopt::StructOpt;
-
-#[derive(Debug, StructOpt)]
-struct BenchmarkOpt {
-    /// Ballista executor host
-    #[structopt(long = "host")]
-    host: String,
-
-    /// Ballista executor port
-    #[structopt(long = "port")]
-    port: u16,
-
-    /// Query number
-    #[structopt(long)]
-    query: usize,
-
-    /// Activate debug mode to see query results
-    #[structopt(long)]
-    debug: bool,
-
-    /// Number of iterations of each test run
-    #[structopt(long = "iterations", default_value = "1")]
-    iterations: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(long = "batch-size", default_value = "32768")]
-    batch_size: usize,
-
-    /// Path to data files
-    #[structopt(parse(from_os_str), required = true, long = "path")]
-    path: PathBuf,
-
-    /// File format: `csv`, `tbl` or `parquet`
-    #[structopt(long = "format")]
-    file_format: String,
-}
-
-#[derive(Debug, StructOpt)]
-struct ConvertOpt {
-    /// Path to csv files
-    #[structopt(parse(from_os_str), required = true, short = "i", long = "input")]
-    input_path: PathBuf,
-
-    /// Output path
-    #[structopt(parse(from_os_str), required = true, short = "o", long = "output")]
-    output_path: PathBuf,
-
-    /// Output file format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format")]
-    file_format: String,
-
-    /// Compression to use when writing Parquet files
-    #[structopt(short = "c", long = "compression", default_value = "snappy")]
-    compression: String,
-
-    /// Number of partitions to produce
-    #[structopt(short = "p", long = "partitions", default_value = "1")]
-    partitions: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "4096")]
-    batch_size: usize,
-}
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "TPC-H", about = "TPC-H Benchmarks.")]
-enum TpchOpt {
-    Benchmark(BenchmarkOpt),
-    Convert(ConvertOpt),
-}
-
-const TABLES: &[&str] = &[
-    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
-];
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-    match TpchOpt::from_args() {
-        TpchOpt::Benchmark(opt) => benchmark(opt).await.map(|_| ()),
-        TpchOpt::Convert(opt) => convert_tbl(opt).await,
-    }
-}
-
-async fn benchmark(opt: BenchmarkOpt) -> Result<()> {
-    println!("Running benchmarks with the following options: {:?}", opt);
-
-    let mut settings = HashMap::new();
-    settings.insert("batch.size".to_owned(), format!("{}", opt.batch_size));
-
-    let ctx = BallistaContext::remote(opt.host.as_str(), opt.port, settings);
-
-    // register tables with Ballista context
-    let path = opt.path.to_str().unwrap();
-    let file_format = opt.file_format.as_str();
-    for table in TABLES {
-        match file_format {
-            // dbgen creates .tbl ('|' delimited) files without header
-            "tbl" => {
-                let path = format!("{}/{}.tbl", path, table);
-                let schema = get_schema(table);
-                let options = CsvReadOptions::new()
-                    .schema(&schema)
-                    .delimiter(b'|')
-                    .has_header(false)
-                    .file_extension(".tbl");
-                ctx.register_csv(table, &path, options)?;
-            }
-            "csv" => {
-                let path = format!("{}/{}", path, table);
-                let schema = get_schema(table);
-                let options = CsvReadOptions::new().schema(&schema).has_header(true);
-                ctx.register_csv(table, &path, options)?;
-            }
-            "parquet" => {
-                let path = format!("{}/{}", path, table);
-                ctx.register_parquet(table, &path)?;
-            }
-            other => {
-                unimplemented!("Invalid file format '{}'", other);
-            }
-        }
-    }
-
-    let mut millis = vec![];
-
-    // run benchmark
-    let sql = get_query_sql(opt.query)?;
-    println!("Running benchmark with query {}:\n {}", opt.query, sql);
-    for i in 0..opt.iterations {
-        let start = Instant::now();
-        let df = ctx.sql(&sql)?;
-        let mut batches = vec![];
-        let mut stream = df.collect().await?;
-        while let Some(result) = stream.next().await {
-            let batch = result?;
-            batches.push(batch);
-        }
-        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
-        millis.push(elapsed as f64);
-        println!("Query {} iteration {} took {:.1} ms", opt.query, i, elapsed);
-        if opt.debug {
-            pretty::print_batches(&batches)?;
-        }
-    }
-
-    let avg = millis.iter().sum::<f64>() / millis.len() as f64;
-    println!("Query {} avg time: {:.2} ms", opt.query, avg);
-
-    Ok(())
-}
-
-fn get_query_sql(query: usize) -> Result<String> {
-    if query > 0 && query < 23 {
-        let filename = format!("queries/q{}.sql", query);
-        Ok(fs::read_to_string(&filename).expect("failed to read query"))
-    } else {
-        Err(BallistaError::General(
-            "invalid query. Expected value between 1 and 22".to_owned(),
-        ))
-    }
-}
-
-async fn convert_tbl(opt: ConvertOpt) -> Result<()> {
-    let output_root_path = Path::new(&opt.output_path);
-    for table in TABLES {
-        let start = Instant::now();
-        let schema = get_schema(table);
-
-        let input_path = format!("{}/{}.tbl", opt.input_path.to_str().unwrap(), table);
-        let options = CsvReadOptions::new()
-            .schema(&schema)
-            .delimiter(b'|')
-            .file_extension(".tbl");
-
-        let config = ExecutionConfig::new().with_batch_size(opt.batch_size);
-        let mut ctx = ExecutionContext::with_config(config);
-
-        // build plan to read the TBL file
-        let mut csv = ctx.read_csv(&input_path, options)?;
-
-        // optionally, repartition the file
-        if opt.partitions > 1 {
-            csv = csv.repartition(Partitioning::RoundRobinBatch(opt.partitions))?
-        }
-
-        // create the physical plan
-        let csv = csv.to_logical_plan();
-        let csv = ctx.optimize(&csv)?;
-        let csv = ctx.create_physical_plan(&csv)?;
-
-        let output_path = output_root_path.join(table);
-        let output_path = output_path.to_str().unwrap().to_owned();
-
-        println!(
-            "Converting '{}' to {} files in directory '{}'",
-            &input_path, &opt.file_format, &output_path
-        );
-        match opt.file_format.as_str() {
-            "csv" => ctx.write_csv(csv, output_path).await?,
-            "parquet" => {
-                let compression = match opt.compression.as_str() {
-                    "none" => Compression::UNCOMPRESSED,
-                    "snappy" => Compression::SNAPPY,
-                    "brotli" => Compression::BROTLI,
-                    "gzip" => Compression::GZIP,
-                    "lz4" => Compression::LZ4,
-                    "lz0" => Compression::LZO,
-                    "zstd" => Compression::ZSTD,
-                    other => {
-                        return Err(BallistaError::NotImplemented(format!(
-                            "Invalid compression format: {}",
-                            other
-                        )))
-                    }
-                };
-                let props = WriterProperties::builder()
-                    .set_compression(compression)
-                    .build();
-                ctx.write_parquet(csv, output_path, Some(props)).await?
-            }
-            other => {
-                return Err(BallistaError::NotImplemented(format!(
-                    "Invalid output format: {}",
-                    other
-                )))
-            }
-        }
-        println!("Conversion completed in {} ms", start.elapsed().as_millis());
-    }
-
-    Ok(())
-}
-
-fn get_schema(table: &str) -> Schema {
-    // note that the schema intentionally uses signed integers so that any generated Parquet
-    // files can also be used to benchmark tools that only support signed integers, such as
-    // Apache Spark
-
-    match table {
-        "part" => Schema::new(vec![
-            Field::new("p_partkey", DataType::Int32, false),
-            Field::new("p_name", DataType::Utf8, false),
-            Field::new("p_mfgr", DataType::Utf8, false),
-            Field::new("p_brand", DataType::Utf8, false),
-            Field::new("p_type", DataType::Utf8, false),
-            Field::new("p_size", DataType::Int32, false),
-            Field::new("p_container", DataType::Utf8, false),
-            Field::new("p_retailprice", DataType::Float64, false),
-            Field::new("p_comment", DataType::Utf8, false),
-        ]),
-
-        "supplier" => Schema::new(vec![
-            Field::new("s_suppkey", DataType::Int32, false),
-            Field::new("s_name", DataType::Utf8, false),
-            Field::new("s_address", DataType::Utf8, false),
-            Field::new("s_nationkey", DataType::Int32, false),
-            Field::new("s_phone", DataType::Utf8, false),
-            Field::new("s_acctbal", DataType::Float64, false),
-            Field::new("s_comment", DataType::Utf8, false),
-        ]),
-
-        "partsupp" => Schema::new(vec![
-            Field::new("ps_partkey", DataType::Int32, false),
-            Field::new("ps_suppkey", DataType::Int32, false),
-            Field::new("ps_availqty", DataType::Int32, false),
-            Field::new("ps_supplycost", DataType::Float64, false),
-            Field::new("ps_comment", DataType::Utf8, false),
-        ]),
-
-        "customer" => Schema::new(vec![
-            Field::new("c_custkey", DataType::Int32, false),
-            Field::new("c_name", DataType::Utf8, false),
-            Field::new("c_address", DataType::Utf8, false),
-            Field::new("c_nationkey", DataType::Int32, false),
-            Field::new("c_phone", DataType::Utf8, false),
-            Field::new("c_acctbal", DataType::Float64, false),
-            Field::new("c_mktsegment", DataType::Utf8, false),
-            Field::new("c_comment", DataType::Utf8, false),
-        ]),
-
-        "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::Int32, false),
-            Field::new("o_custkey", DataType::Int32, false),
-            Field::new("o_orderstatus", DataType::Utf8, false),
-            Field::new("o_totalprice", DataType::Float64, false),
-            Field::new("o_orderdate", DataType::Date32, false),
-            Field::new("o_orderpriority", DataType::Utf8, false),
-            Field::new("o_clerk", DataType::Utf8, false),
-            Field::new("o_shippriority", DataType::Int32, false),
-            Field::new("o_comment", DataType::Utf8, false),
-        ]),
-
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::Int32, false),
-            Field::new("l_partkey", DataType::Int32, false),
-            Field::new("l_suppkey", DataType::Int32, false),
-            Field::new("l_linenumber", DataType::Int32, false),
-            Field::new("l_quantity", DataType::Float64, false),
-            Field::new("l_extendedprice", DataType::Float64, false),
-            Field::new("l_discount", DataType::Float64, false),
-            Field::new("l_tax", DataType::Float64, false),
-            Field::new("l_returnflag", DataType::Utf8, false),
-            Field::new("l_linestatus", DataType::Utf8, false),
-            Field::new("l_shipdate", DataType::Date32, false),
-            Field::new("l_commitdate", DataType::Date32, false),
-            Field::new("l_receiptdate", DataType::Date32, false),
-            Field::new("l_shipinstruct", DataType::Utf8, false),
-            Field::new("l_shipmode", DataType::Utf8, false),
-            Field::new("l_comment", DataType::Utf8, false),
-        ]),
-
-        "nation" => Schema::new(vec![
-            Field::new("n_nationkey", DataType::Int32, false),
-            Field::new("n_name", DataType::Utf8, false),
-            Field::new("n_regionkey", DataType::Int32, false),
-            Field::new("n_comment", DataType::Utf8, false),
-        ]),
-
-        "region" => Schema::new(vec![
-            Field::new("r_regionkey", DataType::Int32, false),
-            Field::new("r_name", DataType::Utf8, false),
-            Field::new("r_comment", DataType::Utf8, false),
-        ]),
-
-        _ => unimplemented!(),
-    }
-}
diff --git a/rust/ballista/rust/benchmarks/tpch/tpch-gen.sh b/rust/ballista/rust/benchmarks/tpch/tpch-gen.sh
deleted file mode 100755
index f5147f55f2f..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/tpch-gen.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-BALLISTA_VERSION=0.4.2-SNAPSHOT
-
-#set -e
-
-docker build -t ballistacompute/ballista-tpchgen:$BALLISTA_VERSION -f tpchgen.dockerfile .
-
-# Generate data into the ./data directory if it does not already exist
-FILE=./data/supplier.tbl
-if test -f "$FILE"; then
-    echo "$FILE exists."
-else
-  mkdir data 2>/dev/null
-  docker run -v `pwd`/data:/data -it --rm ballistacompute/ballista-tpchgen:$BALLISTA_VERSION
-  ls -l data
-fi
\ No newline at end of file
diff --git a/rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile b/rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile
deleted file mode 100644
index 7fc2e5005a5..00000000000
--- a/rust/ballista/rust/benchmarks/tpch/tpchgen.dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu
-
-RUN apt-get update && \
-    apt-get install -y git build-essential
-
-RUN git clone https://github.com/databricks/tpch-dbgen.git && \
-    cd tpch-dbgen && \
-    make
-
-WORKDIR /tpch-dbgen
-ADD entrypoint.sh /tpch-dbgen/
-
-VOLUME data
-
-ENTRYPOINT [ "bash", "./entrypoint.sh" ]
diff --git a/rust/ballista/rust/client/Cargo.toml b/rust/ballista/rust/client/Cargo.toml
deleted file mode 100644
index de3effe87ca..00000000000
--- a/rust/ballista/rust/client/Cargo.toml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista"
-description = "Ballista Distributed Compute"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-
-[dependencies]
-ballista-core = { path = "../core" }
-futures = "0.3"
-log = "0.4"
-tokio = "1.0"
-
-arrow = { path = "../../../arrow"  }
-datafusion = { path = "../../../datafusion" }
\ No newline at end of file
diff --git a/rust/ballista/rust/client/README.md b/rust/ballista/rust/client/README.md
deleted file mode 100644
index 00bf3ea5ec6..00000000000
--- a/rust/ballista/rust/client/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista - Rust
-This crate contains the Ballista client library. For an example usage, please refer [here](../benchmarks/tpch/README.md).
-
diff --git a/rust/ballista/rust/client/src/columnar_batch.rs b/rust/ballista/rust/client/src/columnar_batch.rs
deleted file mode 100644
index d3ff8861faa..00000000000
--- a/rust/ballista/rust/client/src/columnar_batch.rs
+++ /dev/null
@@ -1,167 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, sync::Arc};
-
-use ballista_core::error::{ballista_error, Result};
-
-use arrow::{
-    array::ArrayRef,
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-use datafusion::scalar::ScalarValue;
-
-pub type MaybeColumnarBatch = Result<Option<ColumnarBatch>>;
-
-/// Batch of columnar data.
-#[allow(dead_code)]
-#[derive(Debug, Clone)]
-
-pub struct ColumnarBatch {
-    schema: Arc<Schema>,
-    columns: HashMap<String, ColumnarValue>,
-}
-
-impl ColumnarBatch {
-    pub fn from_arrow(batch: &RecordBatch) -> Self {
-        let columns = batch
-            .columns()
-            .iter()
-            .enumerate()
-            .map(|(i, array)| {
-                (
-                    batch.schema().field(i).name().clone(),
-                    ColumnarValue::Columnar(array.clone()),
-                )
-            })
-            .collect();
-
-        Self {
-            schema: batch.schema(),
-            columns,
-        }
-    }
-
-    pub fn from_values(values: &[ColumnarValue], schema: &Schema) -> Self {
-        let columns = schema
-            .fields()
-            .iter()
-            .enumerate()
-            .map(|(i, f)| (f.name().clone(), values[i].clone()))
-            .collect();
-
-        Self {
-            schema: Arc::new(schema.clone()),
-            columns,
-        }
-    }
-
-    pub fn to_arrow(&self) -> Result<RecordBatch> {
-        let arrays = self
-            .schema
-            .fields()
-            .iter()
-            .map(|c| {
-                match self.column(c.name())? {
-                    ColumnarValue::Columnar(array) => Ok(array.clone()),
-                    ColumnarValue::Scalar(_, _) => {
-                        // note that this can be implemented easily if needed
-                        Err(ballista_error("Cannot convert scalar value to Arrow array"))
-                    }
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(RecordBatch::try_new(self.schema.clone(), arrays)?)
-    }
-
-    pub fn schema(&self) -> Arc<Schema> {
-        self.schema.clone()
-    }
-
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    pub fn num_rows(&self) -> usize {
-        self.columns[self.schema.field(0).name()].len()
-    }
-
-    pub fn column(&self, name: &str) -> Result<&ColumnarValue> {
-        Ok(&self.columns[name])
-    }
-
-    pub fn memory_size(&self) -> usize {
-        self.columns.values().map(|c| c.memory_size()).sum()
-    }
-}
-
-/// A columnar value can either be a scalar value or an Arrow array.
-#[allow(dead_code)]
-#[derive(Debug, Clone)]
-
-pub enum ColumnarValue {
-    Scalar(ScalarValue, usize),
-    Columnar(ArrayRef),
-}
-
-impl ColumnarValue {
-    pub fn len(&self) -> usize {
-        match self {
-            ColumnarValue::Scalar(_, n) => *n,
-            ColumnarValue::Columnar(array) => array.len(),
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn data_type(&self) -> &DataType {
-        match self {
-            ColumnarValue::Columnar(array) => array.data_type(),
-            ColumnarValue::Scalar(value, _) => match value {
-                ScalarValue::UInt8(_) => &DataType::UInt8,
-                ScalarValue::UInt16(_) => &DataType::UInt16,
-                ScalarValue::UInt32(_) => &DataType::UInt32,
-                ScalarValue::UInt64(_) => &DataType::UInt64,
-                ScalarValue::Int8(_) => &DataType::Int8,
-                ScalarValue::Int16(_) => &DataType::Int16,
-                ScalarValue::Int32(_) => &DataType::Int32,
-                ScalarValue::Int64(_) => &DataType::Int64,
-                ScalarValue::Float32(_) => &DataType::Float32,
-                ScalarValue::Float64(_) => &DataType::Float64,
-                _ => unimplemented!(),
-            },
-        }
-    }
-
-    pub fn to_arrow(&self) -> ArrayRef {
-        match self {
-            ColumnarValue::Columnar(array) => array.clone(),
-            ColumnarValue::Scalar(value, n) => value.to_array_of_size(*n),
-        }
-    }
-
-    pub fn memory_size(&self) -> usize {
-        match self {
-            ColumnarValue::Columnar(array) => array.get_array_memory_size(),
-            _ => 0,
-        }
-    }
-}
diff --git a/rust/ballista/rust/client/src/context.rs b/rust/ballista/rust/client/src/context.rs
deleted file mode 100644
index 400f6b6183e..00000000000
--- a/rust/ballista/rust/client/src/context.rs
+++ /dev/null
@@ -1,400 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Distributed execution context.
-
-use std::path::PathBuf;
-use std::pin::Pin;
-use std::sync::{Arc, Mutex};
-use std::{collections::HashMap, convert::TryInto};
-use std::{fs, time::Duration};
-
-use ballista_core::serde::protobuf::scheduler_grpc_client::SchedulerGrpcClient;
-use ballista_core::serde::protobuf::{
-    execute_query_params::Query, job_status, ExecuteQueryParams, GetJobStatusParams,
-    GetJobStatusResult,
-};
-use ballista_core::{
-    client::BallistaClient,
-    datasource::DFTableAdapter,
-    error::{BallistaError, Result},
-    memory_stream::MemoryStream,
-    utils::create_datafusion_context,
-};
-
-use arrow::datatypes::Schema;
-use datafusion::catalog::TableReference;
-use datafusion::logical_plan::{DFSchema, Expr, LogicalPlan, Partitioning};
-use datafusion::physical_plan::csv::CsvReadOptions;
-use datafusion::{dataframe::DataFrame, physical_plan::RecordBatchStream};
-use log::{error, info};
-
-#[allow(dead_code)]
-struct BallistaContextState {
-    /// Scheduler host
-    scheduler_host: String,
-    /// Scheduler port
-    scheduler_port: u16,
-    /// Tables that have been registered with this context
-    tables: HashMap<String, LogicalPlan>,
-    /// General purpose settings
-    settings: HashMap<String, String>,
-}
-
-impl BallistaContextState {
-    pub fn new(
-        scheduler_host: String,
-        scheduler_port: u16,
-        settings: HashMap<String, String>,
-    ) -> Self {
-        Self {
-            scheduler_host,
-            scheduler_port,
-            tables: HashMap::new(),
-            settings,
-        }
-    }
-}
-
-#[allow(dead_code)]
-
-pub struct BallistaContext {
-    state: Arc<Mutex<BallistaContextState>>,
-}
-
-impl BallistaContext {
-    /// Create a context for executing queries against a remote Ballista scheduler instance
-    pub fn remote(host: &str, port: u16, settings: HashMap<String, String>) -> Self {
-        let state = BallistaContextState::new(host.to_owned(), port, settings);
-
-        Self {
-            state: Arc::new(Mutex::new(state)),
-        }
-    }
-
-    /// Create a DataFrame representing a Parquet table scan
-
-    pub fn read_parquet(&self, path: &str) -> Result<BallistaDataFrame> {
-        // convert to absolute path because the executor likely has a different working directory
-        let path = PathBuf::from(path);
-        let path = fs::canonicalize(&path)?;
-
-        // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = create_datafusion_context();
-        let df = ctx.read_parquet(path.to_str().unwrap())?;
-        Ok(BallistaDataFrame::from(self.state.clone(), df))
-    }
-
-    /// Create a DataFrame representing a CSV table scan
-
-    pub fn read_csv(
-        &self,
-        path: &str,
-        options: CsvReadOptions,
-    ) -> Result<BallistaDataFrame> {
-        // convert to absolute path because the executor likely has a different working directory
-        let path = PathBuf::from(path);
-        let path = fs::canonicalize(&path)?;
-
-        // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = create_datafusion_context();
-        let df = ctx.read_csv(path.to_str().unwrap(), options)?;
-        Ok(BallistaDataFrame::from(self.state.clone(), df))
-    }
-
-    /// Register a DataFrame as a table that can be referenced from a SQL query
-    pub fn register_table(&self, name: &str, table: &BallistaDataFrame) -> Result<()> {
-        let mut state = self.state.lock().unwrap();
-        state
-            .tables
-            .insert(name.to_owned(), table.to_logical_plan());
-        Ok(())
-    }
-
-    pub fn register_csv(
-        &self,
-        name: &str,
-        path: &str,
-        options: CsvReadOptions,
-    ) -> Result<()> {
-        let df = self.read_csv(path, options)?;
-        self.register_table(name, &df)
-    }
-
-    pub fn register_parquet(&self, name: &str, path: &str) -> Result<()> {
-        let df = self.read_parquet(path)?;
-        self.register_table(name, &df)
-    }
-
-    /// Create a DataFrame from a SQL statement
-    pub fn sql(&self, sql: &str) -> Result<BallistaDataFrame> {
-        // use local DataFusion context for now but later this might call the scheduler
-        let mut ctx = create_datafusion_context();
-        // register tables
-        let state = self.state.lock().unwrap();
-        for (name, plan) in &state.tables {
-            let plan = ctx.optimize(plan)?;
-            let execution_plan = ctx.create_physical_plan(&plan)?;
-            ctx.register_table(
-                TableReference::Bare { table: name },
-                Arc::new(DFTableAdapter::new(plan, execution_plan)),
-            )?;
-        }
-        let df = ctx.sql(sql)?;
-        Ok(BallistaDataFrame::from(self.state.clone(), df))
-    }
-}
-
-/// The Ballista DataFrame is a wrapper around the DataFusion DataFrame and overrides the
-/// `collect` method so that the query is executed against Ballista and not DataFusion.
-
-pub struct BallistaDataFrame {
-    /// Ballista context state
-    state: Arc<Mutex<BallistaContextState>>,
-    /// DataFusion DataFrame representing logical query plan
-    df: Arc<dyn DataFrame>,
-}
-
-impl BallistaDataFrame {
-    fn from(state: Arc<Mutex<BallistaContextState>>, df: Arc<dyn DataFrame>) -> Self {
-        Self { state, df }
-    }
-
-    pub async fn collect(&self) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        let scheduler_url = {
-            let state = self.state.lock().unwrap();
-
-            format!("http://{}:{}", state.scheduler_host, state.scheduler_port)
-        };
-
-        info!("Connecting to Ballista scheduler at {}", scheduler_url);
-
-        let mut scheduler = SchedulerGrpcClient::connect(scheduler_url).await?;
-
-        let plan = self.df.to_logical_plan();
-        let schema: Schema = plan.schema().as_ref().clone().into();
-
-        let job_id = scheduler
-            .execute_query(ExecuteQueryParams {
-                query: Some(Query::LogicalPlan((&plan).try_into()?)),
-            })
-            .await?
-            .into_inner()
-            .job_id;
-
-        loop {
-            let GetJobStatusResult { status } = scheduler
-                .get_job_status(GetJobStatusParams {
-                    job_id: job_id.clone(),
-                })
-                .await?
-                .into_inner();
-            let status = status.and_then(|s| s.status).ok_or_else(|| {
-                BallistaError::Internal("Received empty status message".to_owned())
-            })?;
-            let wait_future = tokio::time::sleep(Duration::from_millis(100));
-            match status {
-                job_status::Status::Queued(_) => {
-                    info!("Job {} still queued...", job_id);
-                    wait_future.await;
-                }
-                job_status::Status::Running(_) => {
-                    info!("Job {} is running...", job_id);
-                    wait_future.await;
-                }
-                job_status::Status::Failed(err) => {
-                    let msg = format!("Job {} failed: {}", job_id, err.error);
-                    error!("{}", msg);
-                    break Err(BallistaError::General(msg));
-                }
-                job_status::Status::Completed(completed) => {
-                    // TODO: use streaming. Probably need to change the signature of fetch_partition to achieve that
-                    let mut result = vec![];
-                    for location in completed.partition_location {
-                        let metadata = location.executor_meta.ok_or_else(|| {
-                            BallistaError::Internal(
-                                "Received empty executor metadata".to_owned(),
-                            )
-                        })?;
-                        let partition_id = location.partition_id.ok_or_else(|| {
-                            BallistaError::Internal(
-                                "Received empty partition id".to_owned(),
-                            )
-                        })?;
-                        let mut ballista_client = BallistaClient::try_new(
-                            metadata.host.as_str(),
-                            metadata.port as u16,
-                        )
-                        .await?;
-                        let stream = ballista_client
-                            .fetch_partition(
-                                &partition_id.job_id,
-                                partition_id.stage_id as usize,
-                                partition_id.partition_id as usize,
-                            )
-                            .await?;
-                        result.append(
-                            &mut datafusion::physical_plan::common::collect(stream)
-                                .await?,
-                        );
-                    }
-                    break Ok(Box::pin(MemoryStream::try_new(
-                        result,
-                        Arc::new(schema),
-                        None,
-                    )?));
-                }
-            };
-        }
-    }
-
-    pub fn select_columns(&self, columns: &[&str]) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df
-                .select_columns(columns)
-                .map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn select(&self, expr: Vec<Expr>) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.select(expr).map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn filter(&self, expr: Expr) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.filter(expr).map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn aggregate(
-        &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
-    ) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df
-                .aggregate(group_expr, aggr_expr)
-                .map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn limit(&self, n: usize) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.limit(n).map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn sort(&self, expr: Vec<Expr>) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.sort(expr).map_err(BallistaError::from)?,
-        ))
-    }
-
-    // TODO lifetime issue
-    // pub fn join(&self, right: Arc<dyn DataFrame>, join_type: JoinType, left_cols: &[&str], right_cols: &[&str]) ->
-    // Result<BallistaDataFrame> {     Ok(Self::from(self.state.clone(), self.df.join(right, join_type, &left_cols,
-    // &right_cols).map_err(BallistaError::from)?)) }
-
-    pub fn repartition(
-        &self,
-        partitioning_scheme: Partitioning,
-    ) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df
-                .repartition(partitioning_scheme)
-                .map_err(BallistaError::from)?,
-        ))
-    }
-
-    pub fn schema(&self) -> &DFSchema {
-        self.df.schema()
-    }
-
-    pub fn to_logical_plan(&self) -> LogicalPlan {
-        self.df.to_logical_plan()
-    }
-
-    pub fn explain(&self, verbose: bool) -> Result<BallistaDataFrame> {
-        Ok(Self::from(
-            self.state.clone(),
-            self.df.explain(verbose).map_err(BallistaError::from)?,
-        ))
-    }
-}
-
-// #[async_trait]
-// impl ExecutionContext for BallistaContext {
-//     async fn get_executor_ids(&self) -> Result<Vec<ExecutorMeta>> {
-//         match &self.config.discovery_mode {
-//             DiscoveryMode::Etcd => etcd_get_executors(&self.config.etcd_urls, "default").await,
-//             DiscoveryMode::Kubernetes => k8s_get_executors("default", "ballista").await,
-//             DiscoveryMode::Standalone => Err(ballista_error("Standalone mode not implemented yet")),
-//         }
-//     }
-//
-//     async fn execute_task(
-//         &self,
-//         executor_meta: ExecutorMeta,
-//         task: ExecutionTask,
-//     ) -> Result<ShuffleId> {
-//         // TODO what is the point of returning this info since it is based on input arg?
-//         let shuffle_id = ShuffleId::new(task.job_uuid, task.stage_id, task.partition_id);
-//
-//         let _ = execute_action(
-//             &executor_meta.host,
-//             executor_meta.port,
-//             &Action::Execute(task),
-//         )
-//         .await?;
-//
-//         Ok(shuffle_id)
-//     }
-//
-//     async fn read_shuffle(&self, shuffle_id: &ShuffleId) -> Result<Vec<ColumnarBatch>> {
-//         match self.shuffle_locations.get(shuffle_id) {
-//             Some(executor_meta) => {
-//                 let batches = execute_action(
-//                     &executor_meta.host,
-//                     executor_meta.port,
-//                     &Action::FetchShuffle(*shuffle_id),
-//                 )
-//                 .await?;
-//                 Ok(batches
-//                     .iter()
-//                     .map(|b| ColumnarBatch::from_arrow(b))
-//                     .collect())
-//             }
-//             _ => Err(ballista_error(&format!(
-//                 "Failed to resolve executor UUID for shuffle ID {:?}",
-//                 shuffle_id
-//             ))),
-//         }
-//     }
-//
-//     fn config(&self) -> ExecutorConfig {
-//         self.config.clone()
-//     }
-// }
diff --git a/rust/ballista/rust/client/src/lib.rs b/rust/ballista/rust/client/src/lib.rs
deleted file mode 100644
index c3c62918680..00000000000
--- a/rust/ballista/rust/client/src/lib.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod columnar_batch;
-pub mod context;
-pub mod prelude;
diff --git a/rust/ballista/rust/client/src/prelude.rs b/rust/ballista/rust/client/src/prelude.rs
deleted file mode 100644
index 2f940aef4c9..00000000000
--- a/rust/ballista/rust/client/src/prelude.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Prelude (common imports)
-
-pub use crate::context::BallistaContext;
-pub use ballista_core::error::{BallistaError, Result};
-
-pub use futures::StreamExt;
diff --git a/rust/ballista/rust/core/Cargo.toml b/rust/ballista/rust/core/Cargo.toml
deleted file mode 100644
index e37a1ea7caa..00000000000
--- a/rust/ballista/rust/core/Cargo.toml
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista-core"
-description = "Ballista Distributed Compute"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-build = "build.rs"
-
-[features]
-simd = ["datafusion/simd"]
-
-[dependencies]
-async-trait = "0.1.36"
-futures = "0.3"
-log = "0.4"
-prost = "0.7"
-serde = {version = "1", features = ["derive"]}
-sqlparser = "0.8"
-tokio = "1.0"
-tonic = "0.4"
-uuid = { version = "0.8", features = ["v4"] }
-
-arrow = { path = "../../../arrow"  }
-arrow-flight = { path = "../../../arrow-flight"  }
-datafusion = { path = "../../../datafusion" }
-
-[dev-dependencies]
-
-[build-dependencies]
-tonic-build = { version = "0.4" }
diff --git a/rust/ballista/rust/core/README.md b/rust/ballista/rust/core/README.md
deleted file mode 100644
index f97952b3f70..00000000000
--- a/rust/ballista/rust/core/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista - Rust
-This crate contains the core Ballista types.
diff --git a/rust/ballista/rust/core/build.rs b/rust/ballista/rust/core/build.rs
deleted file mode 100644
index 6ad153e87c8..00000000000
--- a/rust/ballista/rust/core/build.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-fn main() -> Result<(), String> {
-    // for use in docker build where file changes can be wonky
-    println!("cargo:rerun-if-env-changed=FORCE_REBUILD");
-
-    println!("cargo:rerun-if-changed=proto/ballista.proto");
-    tonic_build::configure()
-        .compile(&["proto/ballista.proto"], &["proto"])
-        .map_err(|e| format!("protobuf compilation failed: {}", e))
-}
diff --git a/rust/ballista/rust/core/proto/ballista.proto b/rust/ballista/rust/core/proto/ballista.proto
deleted file mode 100644
index 5733921bc92..00000000000
--- a/rust/ballista/rust/core/proto/ballista.proto
+++ /dev/null
@@ -1,824 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-syntax = "proto3";
-
-package ballista.protobuf;
-
-option java_multiple_files = true;
-option java_package = "org.ballistacompute.protobuf";
-option java_outer_classname = "BallistaProto";
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Ballista Logical Plan
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-// logical expressions
-message LogicalExprNode {
-  oneof ExprType {
-    // column references
-    string column_name = 1;
-    
-    // alias
-    AliasNode alias = 2;
-
-    ScalarValue literal = 3;
-
-
-    // binary expressions
-    BinaryExprNode binary_expr = 4;
-    
-    // aggregate expressions
-    AggregateExprNode aggregate_expr = 5;
-    
-    // null checks
-    IsNull is_null_expr = 6;
-    IsNotNull is_not_null_expr = 7;
-    Not not_expr = 8;
-    
-    BetweenNode between = 9;
-    CaseNode case_ = 10;
-    CastNode cast = 11;
-    SortExprNode sort = 12;
-    NegativeNode negative = 13;
-    InListNode in_list = 14;
-    bool wildcard = 15;
-    ScalarFunctionNode scalar_function = 16;
-    TryCastNode try_cast = 17;
-  }
-}
-
-message IsNull {
-  LogicalExprNode expr = 1;
-}
-
-message IsNotNull {
-  LogicalExprNode expr = 1;
-}
-
-message Not {
-  LogicalExprNode expr = 1;
-}
-
-message AliasNode {
-  LogicalExprNode expr = 1;
-  string alias = 2;
-}
-
-message BinaryExprNode {
-  LogicalExprNode l = 1;
-  LogicalExprNode r = 2;
-  string op = 3;
-}
-
-message NegativeNode {
-  LogicalExprNode expr = 1;
-}
-
-message InListNode {
-  LogicalExprNode expr = 1;
-  repeated LogicalExprNode list = 2;
-  bool negated = 3;
-}
-
-enum ScalarFunction {
-  SQRT = 0;
-  SIN = 1;
-  COS = 2;
-  TAN = 3;
-  ASIN = 4;
-  ACOS = 5;
-  ATAN = 6;
-  EXP = 7;
-  LOG = 8;
-  LOG2 = 9;
-  LOG10 = 10;
-  FLOOR = 11;
-  CEIL = 12;
-  ROUND = 13;
-  TRUNC = 14;
-  ABS = 15;
-  SIGNUM = 16;
-  OCTETLENGTH = 17;
-  CONCAT = 18;
-  LOWER = 19;
-  UPPER = 20;
-  TRIM = 21;
-  LTRIM = 22;
-  RTRIM = 23;
-  TOTIMESTAMP = 24;
-  ARRAY = 25;
-  NULLIF = 26;
-  DATETRUNC = 27;
-  MD5 = 28;
-  SHA224 = 29;
-  SHA256 = 30;
-  SHA384 = 31;
-  SHA512 = 32;
-}
-
-message ScalarFunctionNode {
-  ScalarFunction fun = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-enum AggregateFunction {
-  MIN = 0;
-  MAX = 1;
-  SUM = 2;
-  AVG = 3;
-  COUNT = 4;
-}
-
-message AggregateExprNode {
-  AggregateFunction aggr_function = 1;
-  LogicalExprNode expr = 2;
-}
-
-message BetweenNode {
-  LogicalExprNode expr = 1;
-  bool negated = 2;
-  LogicalExprNode low = 3;
-  LogicalExprNode high = 4;
-}
-
-message CaseNode {
-  LogicalExprNode expr = 1;
-  repeated WhenThen when_then_expr = 2;
-  LogicalExprNode else_expr = 3;
-}
-
-message WhenThen {
-  LogicalExprNode when_expr = 1;
-  LogicalExprNode then_expr = 2;
-}
-
-message CastNode {
-  LogicalExprNode expr = 1;
-  ArrowType arrow_type = 2;
-}
-
-message TryCastNode {
-  LogicalExprNode expr = 1;
-  ArrowType arrow_type = 2;
-}
-
-message SortExprNode {
-  LogicalExprNode expr = 1;
-  bool asc = 2;
-  bool nulls_first = 3;
-}
-
-// LogicalPlan is a nested type
-message LogicalPlanNode {
-  oneof LogicalPlanType {
-    CsvTableScanNode csv_scan = 1;
-    ParquetTableScanNode parquet_scan = 2;
-    ProjectionNode projection = 3;
-    SelectionNode selection = 4;
-    LimitNode limit = 5;
-    AggregateNode aggregate = 6;
-    JoinNode join = 7;
-    SortNode sort = 8;
-    RepartitionNode repartition = 9;
-    EmptyRelationNode empty_relation = 10;
-    CreateExternalTableNode create_external_table = 11;
-    ExplainNode explain = 12;
-  }
-}
-
-message ProjectionColumns {
-  repeated string columns = 1;
-}
-
-message CsvTableScanNode {
-  string table_name = 1;
-  string path = 2;
-  bool has_header = 3;
-  string delimiter = 4;
-  string file_extension = 5;
-  ProjectionColumns projection = 6;
-  Schema schema = 7;
-  repeated LogicalExprNode filters = 8;
-}
-
-message ParquetTableScanNode {
-  string table_name = 1;
-  string path = 2;
-  ProjectionColumns projection = 3;
-  Schema schema = 4;
-  repeated LogicalExprNode filters = 5;
-}
-
-message ProjectionNode {
-  LogicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-message SelectionNode {
-  LogicalPlanNode input = 1;
-  LogicalExprNode expr = 2;
-}
-
-message SortNode{
-  LogicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-message RepartitionNode{
-  LogicalPlanNode input = 1;
-  oneof partition_method {
-    uint64 round_robin = 2;
-    HashRepartition hash = 3;
-  }
-}
-
-message HashRepartition {
-  repeated LogicalExprNode hash_expr = 1;
-  uint64 partition_count = 2;
-}
-
-message EmptyRelationNode{
-  bool produce_one_row = 1;
-}
-
-message CreateExternalTableNode{
-  string name = 1;
-  string location = 2;
-  FileType file_type = 3;
-  bool has_header = 4;
-  Schema schema = 5;
-}
-
-enum FileType{
-  NdJson = 0;
-  Parquet = 1;
-  CSV = 2;
-}
-
-message ExplainNode{
-  LogicalPlanNode input = 1;
-  bool verbose = 2;
-}
-
-message DfField{
-  string qualifier = 2;
-  Field field = 1;
-}
-
-message AggregateNode {
-  LogicalPlanNode input = 1;
-  repeated LogicalExprNode group_expr = 2;
-  repeated LogicalExprNode aggr_expr = 3;
-}
-
-enum JoinType {
-  INNER = 0;
-  LEFT = 1;
-  RIGHT = 2;
-}
-
-message JoinNode {
-  LogicalPlanNode left = 1;
-  LogicalPlanNode right = 2;
-  JoinType join_type = 3;
-  repeated string left_join_column = 4;
-  repeated string right_join_column = 5;
-}
-
-message LimitNode {
-  LogicalPlanNode input = 1;
-  uint32 limit = 2;
-}
-
-message SelectionExecNode {
-  LogicalExprNode expr = 1;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Ballista Physical Plan
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-// PhysicalPlanNode is a nested type
-message PhysicalPlanNode {
-  oneof PhysicalPlanType {
-    ParquetScanExecNode parquet_scan = 1;
-    CsvScanExecNode csv_scan = 2;
-    EmptyExecNode empty = 3;
-    ProjectionExecNode projection = 4;
-    GlobalLimitExecNode global_limit = 6;
-    LocalLimitExecNode local_limit = 7;
-    HashAggregateExecNode hash_aggregate = 8;
-    HashJoinExecNode hash_join = 9;
-    ShuffleReaderExecNode shuffle_reader = 10;
-    SortExecNode sort = 11;
-    CoalesceBatchesExecNode coalesce_batches = 12;
-    FilterExecNode filter = 13;
-    MergeExecNode merge = 14;
-    UnresolvedShuffleExecNode unresolved = 15;
-    RepartitionExecNode repartition = 16;
-  }
-}
-
-message UnresolvedShuffleExecNode {
-  repeated uint32 query_stage_ids = 1;
-  Schema schema = 2;
-  uint32 partition_count = 3;
-}
-
-message FilterExecNode {
-  PhysicalPlanNode input = 1;
-  LogicalExprNode expr = 2;
-}
-
-message ParquetScanExecNode {
-  repeated string filename = 1;
-  repeated uint32 projection = 2;
-  uint32 num_partitions = 3;
-  uint32 batch_size = 4;
-}
-
-message CsvScanExecNode {
-  string path = 1;
-  repeated uint32 projection = 2;
-  Schema schema = 3;
-  string file_extension = 4;
-  bool has_header = 5;
-  uint32 batch_size = 6;
-  string delimiter = 7;
-  
-  // partition filenames
-  repeated string filename = 8;
-}
-
-message HashJoinExecNode {
-  PhysicalPlanNode left = 1;
-  PhysicalPlanNode right = 2;
-  repeated JoinOn on = 3;
-  JoinType join_type = 4;
-
-}
-
-message JoinOn {
-   string left = 1;
-   string right = 2;
-}
-
-
-message EmptyExecNode {
-  bool produce_one_row = 1;
-  Schema schema = 2;
-}
-
-message ProjectionExecNode {
-  PhysicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-  repeated string expr_name = 3;
-}
-
-enum AggregateMode {
-  PARTIAL = 0;
-  FINAL = 1;
-}
-
-message HashAggregateExecNode {
-  repeated LogicalExprNode group_expr = 1;
-  repeated LogicalExprNode aggr_expr = 2;
-  AggregateMode mode = 3;
-  PhysicalPlanNode input = 4;
-  repeated string group_expr_name = 5;
-  repeated string aggr_expr_name = 6;
-  // we need the input schema to the partial aggregate to pass to the final aggregate
-  Schema input_schema = 7;
-}
-
-message ShuffleReaderExecNode {
-  repeated PartitionLocation partition_location = 1;
-  Schema schema = 2;
-}
-
-message GlobalLimitExecNode {
-  PhysicalPlanNode input = 1;
-  uint32 limit = 2;
-}
-
-message LocalLimitExecNode {
-  PhysicalPlanNode input = 1;
-  uint32 limit = 2;
-}
-
-message SortExecNode {
-  PhysicalPlanNode input = 1;
-  repeated LogicalExprNode expr = 2;
-}
-
-message CoalesceBatchesExecNode {
-  PhysicalPlanNode input = 1;
-  uint32 target_batch_size = 2;
-}
-
-message MergeExecNode {
-  PhysicalPlanNode input = 1;
-}
-
-message RepartitionExecNode{
-  PhysicalPlanNode input = 1;
-  oneof partition_method {
-    uint64 round_robin = 2;
-    HashRepartition hash = 3;
-    uint64 unknown = 4;
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Ballista Scheduling
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-message KeyValuePair {
-  string key = 1;
-  string value = 2;
-}
-
-message Action {
-
-  oneof ActionType {
-    // Execute a logical query plan
-    LogicalPlanNode query = 1;
-
-    // Execute one partition of a physical query plan
-    ExecutePartition execute_partition = 2;
-
-    // Fetch a partition from an executor
-    PartitionId fetch_partition = 3;
-  }
-  
-  // configuration settings
-  repeated KeyValuePair settings = 100;
-}
-
-message ExecutePartition {
-  string job_id = 1;
-  uint32 stage_id = 2;
-  repeated uint32 partition_id = 3;
-  PhysicalPlanNode plan = 4;
-  // The task could need to read partitions from other executors
-  repeated PartitionLocation partition_location = 5;
-}
-
-// Mapping from partition id to executor id
-message PartitionLocation {
-  PartitionId partition_id = 1;
-  ExecutorMetadata executor_meta = 2;
-  PartitionStats partition_stats = 3;
-}
-
-// Unique identifier for a materialized partition of data
-message PartitionId {
-  string job_id = 1;
-  uint32 stage_id = 2;
-  uint32 partition_id = 4;
-}
-
-message PartitionStats {
-  int64 num_rows = 1;
-  int64 num_batches = 2;
-  int64 num_bytes = 3;
-  repeated ColumnStats column_stats = 4;
-}
-
-message ColumnStats {
-  ScalarValue min_value = 1;
-  ScalarValue max_value = 2;
-  uint32 null_count = 3;
-  uint32 distinct_count = 4;
-}
-
-message ExecutorMetadata {
-  string id = 1;
-  string host = 2;
-  uint32 port = 3;
-}
-
-message GetExecutorMetadataParams {}
-
-message GetExecutorMetadataResult {
-  repeated ExecutorMetadata metadata = 1;
-}
-
-message RunningTask {
-  string executor_id = 1;
-}
-
-message FailedTask {
-  string error = 1;
-}
-
-message CompletedTask {
-  string executor_id = 1;
-}
-
-message TaskStatus {
-  PartitionId partition_id = 1;
-  oneof status {
-    RunningTask running = 2;
-    FailedTask failed = 3;
-    CompletedTask completed = 4;
-  }
-}
-
-message PollWorkParams {
-  ExecutorMetadata metadata = 1;
-  bool can_accept_task = 2;
-  // All tasks must be reported until they reach the failed or completed state
-  repeated TaskStatus task_status = 3;
-}
-
-message TaskDefinition {
-  PartitionId task_id = 1;
-  PhysicalPlanNode plan = 2;
-}
-
-message PollWorkResult {
-  TaskDefinition task = 1;
-}
-
-message ExecuteQueryParams {
-  oneof query {
-    LogicalPlanNode logical_plan = 1;
-    string sql = 2;
-  }}
-
-message ExecuteSqlParams {
-  string sql = 1;
-}
-
-message ExecuteQueryResult {
-  string job_id = 1;
-}
-
-message GetJobStatusParams {
-  string job_id = 1;
-}
-
-message CompletedJob {
-  repeated PartitionLocation partition_location = 1;
-}
-
-message QueuedJob {}
-
-// TODO: add progress report
-message RunningJob {}
-
-message FailedJob {
-  string error = 1;
-}
-
-message JobStatus {
-  oneof status {
-    QueuedJob queued = 1;
-    RunningJob running = 2;
-    FailedJob failed = 3;
-    CompletedJob completed = 4;
-  }
-}
-
-message GetJobStatusResult {
-  JobStatus status = 1;
-}
-
-message GetFileMetadataParams {
-  string path = 1;
-  FileType file_type = 2;
-}
-
-message GetFileMetadataResult {
-  Schema schema = 1;
-  repeated FilePartitionMetadata partitions = 2;
-}
-
-message FilePartitionMetadata {
-  repeated string filename = 1;
-}
-
-service SchedulerGrpc {
-  rpc GetExecutorsMetadata (GetExecutorMetadataParams) returns (GetExecutorMetadataResult) {}
-
-  // Executors must poll the scheduler for heartbeat and to receive tasks
-  rpc PollWork (PollWorkParams) returns (PollWorkResult) {}
-
-  rpc GetFileMetadata (GetFileMetadataParams) returns (GetFileMetadataResult) {}
-
-  rpc ExecuteQuery (ExecuteQueryParams) returns (ExecuteQueryResult) {}
-
-  rpc GetJobStatus (GetJobStatusParams) returns (GetJobStatusResult) {}
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Arrow Data Types
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-message Schema {
-  repeated Field columns = 1;
-}
-
-message Field {
-  // name of the field
-  string name = 1;
-  ArrowType arrow_type = 2;
-  bool nullable = 3;
-  // for complex data types like structs, unions
-  repeated Field children = 4;
-}
-
-message FixedSizeBinary{
-    int32 length = 1;
-}
-
-message Timestamp{
-    TimeUnit time_unit = 1;
-    string timezone = 2;
-}
-
-enum DateUnit{
-    Day = 0;
-    DateMillisecond = 1;
-}
-
-enum TimeUnit{
-    Second = 0;
-    TimeMillisecond = 1;
-    Microsecond = 2;
-    Nanosecond = 3;
-}
-
-enum IntervalUnit{
-    YearMonth = 0;
-    DayTime = 1;
-}
-
-message Decimal{
-    uint64 whole = 1;
-    uint64 fractional = 2;
-}
-
-message List{
-    Field field_type = 1;
-}
-
-message FixedSizeList{
-    Field field_type = 1;
-    int32 list_size = 2;
-}
-
-message Dictionary{
-    ArrowType key = 1;
-    ArrowType value = 2;
-}
-
-message Struct{
-    repeated Field sub_field_types = 1;
-}
-
-message Union{
-    repeated Field union_types = 1;
-}
-
-
-message ScalarListValue{
-    ScalarType datatype = 1;
-    repeated ScalarValue values = 2;
-}
-
-
-
-message ScalarValue{
-    oneof value{
-        bool   bool_value = 1;
-        string utf8_value = 2;
-        string large_utf8_value = 3;
-        int32  int8_value = 4;
-        int32  int16_value = 5;
-        int32  int32_value = 6;
-        int64  int64_value = 7;
-        uint32 uint8_value = 8;
-        uint32 uint16_value = 9;
-        uint32 uint32_value = 10;
-        uint64 uint64_value = 11;
-        float  float32_value = 12;
-        double float64_value = 13;
-        //Literal Date32 value always has a unit of day
-        int32  date_32_value = 14;
-        int64  time_microsecond_value = 15;
-        int64  time_nanosecond_value = 16;
-        ScalarListValue list_value = 17;
-        ScalarType null_list_value = 18;
-
-        PrimitiveScalarType null_value = 19;
-    }
-}
-
-// Contains all valid datafusion scalar type except for 
-// List
-enum PrimitiveScalarType{
-    
-    BOOL = 0;     // arrow::Type::BOOL
-    UINT8 = 1;    // arrow::Type::UINT8
-    INT8 = 2;     // arrow::Type::INT8
-    UINT16 = 3;   // represents arrow::Type fields in src/arrow/type.h
-    INT16 = 4;
-    UINT32 = 5;
-    INT32 = 6;
-    UINT64 = 7;
-    INT64 = 8;
-    FLOAT32 = 9;
-    FLOAT64 = 10;
-    UTF8 = 11;
-    LARGE_UTF8 = 12;
-    DATE32 = 13;
-    TIME_MICROSECOND = 14;
-    TIME_NANOSECOND = 15;
-    NULL = 16;
-}
-
-message ScalarType{
-    oneof datatype{
-        PrimitiveScalarType scalar = 1;
-        ScalarListType list = 2;
-    }
-}
-
-message ScalarListType{
-    repeated string field_names = 3;
-    PrimitiveScalarType deepest_type = 2;
-}
-
-// Broke out into multiple message types so that type 
-// metadata did not need to be in separate message
-//All types that are of the empty message types contain no additional metadata
-// about the type
-message ArrowType{
-    oneof arrow_type_enum{
-        EmptyMessage NONE = 1;     // arrow::Type::NA
-        EmptyMessage BOOL =  2;     // arrow::Type::BOOL
-        EmptyMessage UINT8 = 3;    // arrow::Type::UINT8
-        EmptyMessage INT8 =  4;     // arrow::Type::INT8
-        EmptyMessage UINT16 =5;   // represents arrow::Type fields in src/arrow/type.h
-        EmptyMessage INT16 = 6;
-        EmptyMessage UINT32 =7;
-        EmptyMessage INT32 = 8;
-        EmptyMessage UINT64 =9;
-        EmptyMessage INT64 =10 ;
-        EmptyMessage FLOAT16 =11 ;
-        EmptyMessage FLOAT32 =12 ; 
-        EmptyMessage FLOAT64 =13 ;
-        EmptyMessage UTF8 =14 ;
-        EmptyMessage LARGE_UTF8 = 32;
-        EmptyMessage BINARY =15 ;
-        int32 FIXED_SIZE_BINARY =16 ;
-        EmptyMessage LARGE_BINARY = 31;
-        EmptyMessage DATE32 =17 ;
-        EmptyMessage DATE64 =18 ;
-        TimeUnit DURATION = 19;
-        Timestamp TIMESTAMP =20 ;
-        TimeUnit TIME32 =21 ;
-        TimeUnit TIME64 =22 ;
-        IntervalUnit INTERVAL =23 ;
-        Decimal DECIMAL =24 ;
-        List LIST =25;
-        List LARGE_LIST = 26;
-        FixedSizeList FIXED_SIZE_LIST = 27;
-        Struct STRUCT =28;
-        Union UNION =29;
-        Dictionary DICTIONARY =30;
-    }
-}
-
-
-
-
-
-//Useful for representing an empty enum variant in rust
-// E.G. enum example{One, Two(i32)}
-// maps to 
-// message example{
-//    oneof{
-//        EmptyMessage One = 1;
-//        i32 Two = 2;
-//   }
-//}
-message EmptyMessage{}
diff --git a/rust/ballista/rust/core/src/client.rs b/rust/ballista/rust/core/src/client.rs
deleted file mode 100644
index f64f95f7cfe..00000000000
--- a/rust/ballista/rust/core/src/client.rs
+++ /dev/null
@@ -1,224 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Client API for sending requests to executors.
-
-use std::sync::Arc;
-use std::{collections::HashMap, pin::Pin};
-use std::{
-    convert::{TryFrom, TryInto},
-    task::{Context, Poll},
-};
-
-use crate::error::{ballista_error, BallistaError, Result};
-use crate::memory_stream::MemoryStream;
-use crate::serde::protobuf::{self};
-use crate::serde::scheduler::{
-    Action, ExecutePartition, ExecutePartitionResult, PartitionId, PartitionStats,
-};
-
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    array::{StringArray, StructArray},
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{datatypes::Schema, datatypes::SchemaRef};
-use arrow_flight::utils::flight_data_to_arrow_batch;
-use arrow_flight::Ticket;
-use arrow_flight::{flight_service_client::FlightServiceClient, FlightData};
-use datafusion::physical_plan::common::collect;
-use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
-use datafusion::{logical_plan::LogicalPlan, physical_plan::RecordBatchStream};
-use futures::{Stream, StreamExt};
-use log::debug;
-use prost::Message;
-use tonic::Streaming;
-use uuid::Uuid;
-
-/// Client for interacting with Ballista executors.
-#[derive(Clone)]
-pub struct BallistaClient {
-    flight_client: FlightServiceClient<tonic::transport::channel::Channel>,
-}
-
-impl BallistaClient {
-    /// Create a new BallistaClient to connect to the executor listening on the specified
-    /// host and port
-
-    pub async fn try_new(host: &str, port: u16) -> Result<Self> {
-        let addr = format!("http://{}:{}", host, port);
-        debug!("BallistaClient connecting to {}", addr);
-        let flight_client =
-            FlightServiceClient::connect(addr.clone())
-                .await
-                .map_err(|e| {
-                    BallistaError::General(format!(
-                        "Error connecting to Ballista scheduler or executor at {}: {:?}",
-                        addr, e
-                    ))
-                })?;
-        debug!("BallistaClient connected OK");
-
-        Ok(Self { flight_client })
-    }
-
-    /// Execute one partition of a physical query plan against the executor
-    pub async fn execute_partition(
-        &mut self,
-        job_id: String,
-        stage_id: usize,
-        partition_id: Vec<usize>,
-        plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Vec<ExecutePartitionResult>> {
-        let action = Action::ExecutePartition(ExecutePartition {
-            job_id,
-            stage_id,
-            partition_id,
-            plan,
-            shuffle_locations: Default::default(),
-        });
-        let stream = self.execute_action(&action).await?;
-        let batches = collect(stream).await?;
-
-        batches
-            .iter()
-            .map(|batch| {
-                if batch.num_rows() != 1 {
-                    Err(BallistaError::General(
-                        "execute_partition received wrong number of rows".to_owned(),
-                    ))
-                } else {
-                    let path = batch
-                        .column(0)
-                        .as_any()
-                        .downcast_ref::<StringArray>()
-                        .expect(
-                            "execute_partition expected column 0 to be a StringArray",
-                        );
-
-                    let stats = batch
-                        .column(1)
-                        .as_any()
-                        .downcast_ref::<StructArray>()
-                        .expect(
-                            "execute_partition expected column 1 to be a StructArray",
-                        );
-
-                    Ok(ExecutePartitionResult::new(
-                        path.value(0),
-                        PartitionStats::from_arrow_struct_array(stats),
-                    ))
-                }
-            })
-            .collect::<Result<Vec<_>>>()
-    }
-
-    /// Fetch a partition from an executor
-    pub async fn fetch_partition(
-        &mut self,
-        job_id: &str,
-        stage_id: usize,
-        partition_id: usize,
-    ) -> Result<SendableRecordBatchStream> {
-        let action =
-            Action::FetchPartition(PartitionId::new(job_id, stage_id, partition_id));
-        self.execute_action(&action).await
-    }
-
-    /// Execute an action and retrieve the results
-    pub async fn execute_action(
-        &mut self,
-        action: &Action,
-    ) -> Result<SendableRecordBatchStream> {
-        let serialized_action: protobuf::Action = action.to_owned().try_into()?;
-
-        let mut buf: Vec<u8> = Vec::with_capacity(serialized_action.encoded_len());
-
-        serialized_action
-            .encode(&mut buf)
-            .map_err(|e| BallistaError::General(format!("{:?}", e)))?;
-
-        let request = tonic::Request::new(Ticket { ticket: buf });
-
-        let mut stream = self
-            .flight_client
-            .do_get(request)
-            .await
-            .map_err(|e| BallistaError::General(format!("{:?}", e)))?
-            .into_inner();
-
-        // the schema should be the first message returned, else client should error
-        match stream
-            .message()
-            .await
-            .map_err(|e| BallistaError::General(format!("{:?}", e)))?
-        {
-            Some(flight_data) => {
-                // convert FlightData to a stream
-                let schema = Arc::new(Schema::try_from(&flight_data)?);
-
-                // all the remaining stream messages should be dictionary and record batches
-                Ok(Box::pin(FlightDataStream::new(stream, schema)))
-            }
-            None => Err(ballista_error(
-                "Did not receive schema batch from flight server",
-            )),
-        }
-    }
-}
-
-struct FlightDataStream {
-    stream: Streaming<FlightData>,
-    schema: SchemaRef,
-}
-
-impl FlightDataStream {
-    pub fn new(stream: Streaming<FlightData>, schema: SchemaRef) -> Self {
-        Self { stream, schema }
-    }
-}
-
-impl Stream for FlightDataStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.stream.poll_next_unpin(cx).map(|x| match x {
-            Some(flight_data_chunk_result) => {
-                let converted_chunk = flight_data_chunk_result
-                    .map_err(|e| ArrowError::from_external_error(Box::new(e)))
-                    .and_then(|flight_data_chunk| {
-                        flight_data_to_arrow_batch(
-                            &flight_data_chunk,
-                            self.schema.clone(),
-                            &[],
-                        )
-                    });
-                Some(converted_chunk)
-            }
-            None => None,
-        })
-    }
-}
-
-impl RecordBatchStream for FlightDataStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/ballista/rust/core/src/datasource.rs b/rust/ballista/rust/core/src/datasource.rs
deleted file mode 100644
index 8ff0df44e4b..00000000000
--- a/rust/ballista/rust/core/src/datasource.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{any::Any, sync::Arc};
-
-use arrow::datatypes::SchemaRef;
-use datafusion::error::Result as DFResult;
-use datafusion::{
-    datasource::{datasource::Statistics, TableProvider},
-    logical_plan::{Expr, LogicalPlan},
-    physical_plan::ExecutionPlan,
-};
-
-/// This ugly adapter is needed because we use DataFusion's logical plan when building queries
-/// and when we register tables with DataFusion's `ExecutionContext` we need to provide a
-/// TableProvider which is effectively a wrapper around a physical plan. We need to be able to
-/// register tables so that we can create logical plans from SQL statements that reference these
-/// tables.
-pub struct DFTableAdapter {
-    /// DataFusion logical plan
-    pub logical_plan: LogicalPlan,
-    /// DataFusion execution plan
-    plan: Arc<dyn ExecutionPlan>,
-}
-
-impl DFTableAdapter {
-    pub fn new(logical_plan: LogicalPlan, plan: Arc<dyn ExecutionPlan>) -> Self {
-        Self { logical_plan, plan }
-    }
-}
-
-impl TableProvider for DFTableAdapter {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.plan.schema()
-    }
-
-    fn scan(
-        &self,
-        _projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> DFResult<Arc<dyn ExecutionPlan>> {
-        Ok(self.plan.clone())
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics {
-            num_rows: None,
-            total_byte_size: None,
-            column_statistics: None,
-        }
-    }
-}
diff --git a/rust/ballista/rust/core/src/error.rs b/rust/ballista/rust/core/src/error.rs
deleted file mode 100644
index d0155ce4b78..00000000000
--- a/rust/ballista/rust/core/src/error.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista error types
-
-use std::{
-    error::Error,
-    fmt::{Display, Formatter},
-    io, result,
-};
-
-use arrow::error::ArrowError;
-use datafusion::error::DataFusionError;
-use sqlparser::parser;
-
-pub type Result<T> = result::Result<T, BallistaError>;
-
-/// Ballista error
-#[derive(Debug)]
-pub enum BallistaError {
-    NotImplemented(String),
-    General(String),
-    Internal(String),
-    ArrowError(ArrowError),
-    DataFusionError(DataFusionError),
-    SqlError(parser::ParserError),
-    IoError(io::Error),
-    // ReqwestError(reqwest::Error),
-    //HttpError(http::Error),
-    // KubeAPIError(kube::error::Error),
-    // KubeAPIRequestError(k8s_openapi::RequestError),
-    // KubeAPIResponseError(k8s_openapi::ResponseError),
-    TonicError(tonic::transport::Error),
-    GrpcError(tonic::Status),
-    TokioError(tokio::task::JoinError),
-}
-
-impl<T> Into<Result<T>> for BallistaError {
-    fn into(self) -> Result<T> {
-        Err(self)
-    }
-}
-
-pub fn ballista_error(message: &str) -> BallistaError {
-    BallistaError::General(message.to_owned())
-}
-
-impl From<String> for BallistaError {
-    fn from(e: String) -> Self {
-        BallistaError::General(e)
-    }
-}
-
-impl From<ArrowError> for BallistaError {
-    fn from(e: ArrowError) -> Self {
-        BallistaError::ArrowError(e)
-    }
-}
-
-impl From<parser::ParserError> for BallistaError {
-    fn from(e: parser::ParserError) -> Self {
-        BallistaError::SqlError(e)
-    }
-}
-
-impl From<DataFusionError> for BallistaError {
-    fn from(e: DataFusionError) -> Self {
-        BallistaError::DataFusionError(e)
-    }
-}
-
-impl From<io::Error> for BallistaError {
-    fn from(e: io::Error) -> Self {
-        BallistaError::IoError(e)
-    }
-}
-
-// impl From<reqwest::Error> for BallistaError {
-//     fn from(e: reqwest::Error) -> Self {
-//         BallistaError::ReqwestError(e)
-//     }
-// }
-//
-// impl From<http::Error> for BallistaError {
-//     fn from(e: http::Error) -> Self {
-//         BallistaError::HttpError(e)
-//     }
-// }
-
-// impl From<kube::error::Error> for BallistaError {
-//     fn from(e: kube::error::Error) -> Self {
-//         BallistaError::KubeAPIError(e)
-//     }
-// }
-
-// impl From<k8s_openapi::RequestError> for BallistaError {
-//     fn from(e: k8s_openapi::RequestError) -> Self {
-//         BallistaError::KubeAPIRequestError(e)
-//     }
-// }
-
-// impl From<k8s_openapi::ResponseError> for BallistaError {
-//     fn from(e: k8s_openapi::ResponseError) -> Self {
-//         BallistaError::KubeAPIResponseError(e)
-//     }
-// }
-
-impl From<tonic::transport::Error> for BallistaError {
-    fn from(e: tonic::transport::Error) -> Self {
-        BallistaError::TonicError(e)
-    }
-}
-
-impl From<tonic::Status> for BallistaError {
-    fn from(e: tonic::Status) -> Self {
-        BallistaError::GrpcError(e)
-    }
-}
-
-impl From<tokio::task::JoinError> for BallistaError {
-    fn from(e: tokio::task::JoinError) -> Self {
-        BallistaError::TokioError(e)
-    }
-}
-
-impl Display for BallistaError {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            BallistaError::NotImplemented(ref desc) => {
-                write!(f, "Not implemented: {}", desc)
-            }
-            BallistaError::General(ref desc) => write!(f, "General error: {}", desc),
-            BallistaError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            BallistaError::DataFusionError(ref desc) => {
-                write!(f, "DataFusion error: {:?}", desc)
-            }
-            BallistaError::SqlError(ref desc) => write!(f, "SQL error: {:?}", desc),
-            BallistaError::IoError(ref desc) => write!(f, "IO error: {}", desc),
-            // BallistaError::ReqwestError(ref desc) => write!(f, "Reqwest error: {}", desc),
-            // BallistaError::HttpError(ref desc) => write!(f, "HTTP error: {}", desc),
-            // BallistaError::KubeAPIError(ref desc) => write!(f, "Kube API error: {}", desc),
-            // BallistaError::KubeAPIRequestError(ref desc) => {
-            //     write!(f, "KubeAPI request error: {}", desc)
-            // }
-            // BallistaError::KubeAPIResponseError(ref desc) => {
-            //     write!(f, "KubeAPI response error: {}", desc)
-            // }
-            BallistaError::TonicError(desc) => write!(f, "Tonic error: {}", desc),
-            BallistaError::GrpcError(desc) => write!(f, "Grpc error: {}", desc),
-            BallistaError::Internal(desc) => {
-                write!(f, "Internal Ballista error: {}", desc)
-            }
-            BallistaError::TokioError(desc) => write!(f, "Tokio join error: {}", desc),
-        }
-    }
-}
-
-impl Error for BallistaError {}
diff --git a/rust/ballista/rust/core/src/execution_plans/mod.rs b/rust/ballista/rust/core/src/execution_plans/mod.rs
deleted file mode 100644
index 1fb2010bd54..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains execution plans that are needed to distribute Datafusion's execution plans into
-//! several Ballista executors.
-
-mod query_stage;
-mod shuffle_reader;
-mod unresolved_shuffle;
-
-pub use query_stage::QueryStageExec;
-pub use shuffle_reader::ShuffleReaderExec;
-pub use unresolved_shuffle::UnresolvedShuffleExec;
diff --git a/rust/ballista/rust/core/src/execution_plans/query_stage.rs b/rust/ballista/rust/core/src/execution_plans/query_stage.rs
deleted file mode 100644
index d8822ea3138..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/query_stage.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::{error::Result, physical_plan::RecordBatchStream};
-use uuid::Uuid;
-
-/// QueryStageExec represents a section of a query plan that has consistent partitioning and
-/// can be executed as one unit with each partition being executed in parallel. The output of
-/// a query stage either forms the input of another query stage or can be the final result of
-/// a query.
-#[derive(Debug, Clone)]
-pub struct QueryStageExec {
-    /// Unique ID for the job (query) that this stage is a part of
-    pub job_id: String,
-    /// Unique query stage ID within the job
-    pub stage_id: usize,
-    /// Physical execution plan for this query stage
-    pub child: Arc<dyn ExecutionPlan>,
-}
-
-impl QueryStageExec {
-    /// Create a new query stage
-    pub fn try_new(
-        job_id: String,
-        stage_id: usize,
-        child: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        Ok(Self {
-            job_id,
-            stage_id,
-            child,
-        })
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for QueryStageExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.child.schema()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.child.output_partitioning()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.child.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        assert!(children.len() == 1);
-        Ok(Arc::new(QueryStageExec::try_new(
-            self.job_id.clone(),
-            self.stage_id,
-            children[0].clone(),
-        )?))
-    }
-
-    async fn execute(
-        &self,
-        partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        self.child.execute(partition).await
-    }
-}
diff --git a/rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs b/rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs
deleted file mode 100644
index bd8f6fdbbea..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/shuffle_reader.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use crate::client::BallistaClient;
-use crate::memory_stream::MemoryStream;
-use crate::serde::scheduler::PartitionLocation;
-
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_plan::RecordBatchStream,
-};
-use log::info;
-
-/// ShuffleReaderExec reads partitions that have already been materialized by an executor.
-#[derive(Debug, Clone)]
-pub struct ShuffleReaderExec {
-    // The query stage that is responsible for producing the shuffle partitions that
-    // this operator will read
-    pub(crate) partition_location: Vec<PartitionLocation>,
-    pub(crate) schema: SchemaRef,
-}
-
-impl ShuffleReaderExec {
-    /// Create a new ShuffleReaderExec
-    pub fn try_new(
-        partition_meta: Vec<PartitionLocation>,
-        schema: SchemaRef,
-    ) -> Result<Self> {
-        Ok(Self {
-            partition_location: partition_meta,
-            schema,
-        })
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for ShuffleReaderExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partition_location.len())
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Plan(
-            "Ballista ShuffleReaderExec does not support with_new_children()".to_owned(),
-        ))
-    }
-
-    async fn execute(
-        &self,
-        partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        info!("ShuffleReaderExec::execute({})", partition);
-        let partition_location = &self.partition_location[partition];
-
-        let mut client = BallistaClient::try_new(
-            &partition_location.executor_meta.host,
-            partition_location.executor_meta.port,
-        )
-        .await
-        .map_err(|e| DataFusionError::Execution(format!("Ballista Error: {:?}", e)))?;
-
-        client
-            .fetch_partition(
-                &partition_location.partition_id.job_id,
-                partition_location.partition_id.stage_id,
-                partition,
-            )
-            .await
-            .map_err(|e| DataFusionError::Execution(format!("Ballista Error: {:?}", e)))
-    }
-}
diff --git a/rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs b/rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs
deleted file mode 100644
index a62a2513ff4..00000000000
--- a/rust/ballista/rust/core/src/execution_plans/unresolved_shuffle.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-use std::{any::Any, pin::Pin};
-
-use crate::client::BallistaClient;
-use crate::memory_stream::MemoryStream;
-use crate::serde::scheduler::PartitionLocation;
-
-use arrow::datatypes::SchemaRef;
-use async_trait::async_trait;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning};
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_plan::RecordBatchStream,
-};
-use log::info;
-
-/// UnresolvedShuffleExec represents a dependency on the results of several QueryStageExec nodes which haven't been computed yet.
-///
-/// An ExecutionPlan that contains an UnresolvedShuffleExec isn't ready for execution. The presence of this ExecutionPlan
-/// is used as a signal so the scheduler knows it can't start computation on a specific QueryStageExec.
-#[derive(Debug, Clone)]
-pub struct UnresolvedShuffleExec {
-    // The query stage ids which needs to be computed
-    pub query_stage_ids: Vec<usize>,
-
-    // The schema this node will have once it is replaced with a ShuffleReaderExec
-    pub schema: SchemaRef,
-
-    // The partition count this node will have once it is replaced with a ShuffleReaderExec
-    pub partition_count: usize,
-}
-
-impl UnresolvedShuffleExec {
-    /// Create a new UnresolvedShuffleExec
-    pub fn new(
-        query_stage_ids: Vec<usize>,
-        schema: SchemaRef,
-        partition_count: usize,
-    ) -> Self {
-        Self {
-            query_stage_ids,
-            schema,
-            partition_count,
-        }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for UnresolvedShuffleExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partition_count)
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Plan(
-            "Ballista UnresolvedShuffleExec does not support with_new_children()"
-                .to_owned(),
-        ))
-    }
-
-    async fn execute(
-        &self,
-        _partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        Err(DataFusionError::Plan(
-            "Ballista UnresolvedShuffleExec does not support execution".to_owned(),
-        ))
-    }
-}
diff --git a/rust/ballista/rust/core/src/lib.rs b/rust/ballista/rust/core/src/lib.rs
deleted file mode 100644
index 425dbab34c1..00000000000
--- a/rust/ballista/rust/core/src/lib.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Distributed Compute
-#![allow(unused_imports)]
-pub const BALLISTA_VERSION: &str = env!("CARGO_PKG_VERSION");
-
-pub fn print_version() {
-    println!("Ballista version: {}", BALLISTA_VERSION)
-}
-
-pub mod client;
-pub mod datasource;
-pub mod error;
-pub mod execution_plans;
-pub mod memory_stream;
-pub mod utils;
-
-#[macro_use]
-pub mod serde;
diff --git a/rust/ballista/rust/core/src/memory_stream.rs b/rust/ballista/rust/core/src/memory_stream.rs
deleted file mode 100644
index 8bf5e203f6d..00000000000
--- a/rust/ballista/rust/core/src/memory_stream.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This is copied from DataFusion because it is declared as `pub(crate)`. See
-//! https://issues.apache.org/jira/browse/ARROW-11276.
-
-use std::task::{Context, Poll};
-
-use arrow::{datatypes::SchemaRef, error::Result, record_batch::RecordBatch};
-use datafusion::physical_plan::RecordBatchStream;
-use futures::Stream;
-
-/// Iterator over batches
-
-pub struct MemoryStream {
-    /// Vector of record batches
-    data: Vec<RecordBatch>,
-    /// Schema representing the data
-    schema: SchemaRef,
-    /// Optional projection for which columns to load
-    projection: Option<Vec<usize>>,
-    /// Index into the data
-    index: usize,
-}
-
-impl MemoryStream {
-    /// Create an iterator for a vector of record batches
-
-    pub fn try_new(
-        data: Vec<RecordBatch>,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        Ok(Self {
-            data,
-            schema,
-            projection,
-            index: 0,
-        })
-    }
-}
-
-impl Stream for MemoryStream {
-    type Item = Result<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(if self.index < self.data.len() {
-            self.index += 1;
-
-            let batch = &self.data[self.index - 1];
-
-            // apply projection
-            match &self.projection {
-                Some(columns) => Some(RecordBatch::try_new(
-                    self.schema.clone(),
-                    columns.iter().map(|i| batch.column(*i).clone()).collect(),
-                )),
-                None => Some(Ok(batch.clone())),
-            }
-        } else {
-            None
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.data.len(), Some(self.data.len()))
-    }
-}
-
-impl RecordBatchStream for MemoryStream {
-    /// Get the schema
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
deleted file mode 100644
index 93084260662..00000000000
--- a/rust/ballista/rust/core/src/serde/logical_plan/from_proto.rs
+++ /dev/null
@@ -1,1200 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serde code to convert from protocol buffers to Rust data structures.
-
-use std::{
-    convert::{From, TryInto},
-    unimplemented,
-};
-
-use crate::error::BallistaError;
-use crate::serde::{proto_error, protobuf};
-use crate::{convert_box_required, convert_required};
-
-use arrow::datatypes::{DataType, Field, Schema};
-use datafusion::logical_plan::{
-    abs, acos, asin, atan, ceil, cos, exp, floor, log10, log2, round, signum, sin, sqrt,
-    tan, trunc, Expr, JoinType, LogicalPlan, LogicalPlanBuilder, Operator,
-};
-use datafusion::physical_plan::aggregates::AggregateFunction;
-use datafusion::physical_plan::csv::CsvReadOptions;
-use datafusion::scalar::ScalarValue;
-use protobuf::logical_plan_node::LogicalPlanType;
-use protobuf::{logical_expr_node::ExprType, scalar_type};
-
-// use uuid::Uuid;
-
-impl TryInto<LogicalPlan> for &protobuf::LogicalPlanNode {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<LogicalPlan, Self::Error> {
-        let plan = self.logical_plan_type.as_ref().ok_or_else(|| {
-            proto_error(format!(
-                "logical_plan::from_proto() Unsupported logical plan '{:?}'",
-                self
-            ))
-        })?;
-        match plan {
-            LogicalPlanType::Projection(projection) => {
-                let input: LogicalPlan = convert_box_required!(projection.input)?;
-                let x: Vec<Expr> = projection
-                    .expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                LogicalPlanBuilder::from(&input)
-                    .project(x)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Selection(selection) => {
-                let input: LogicalPlan = convert_box_required!(selection.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .filter(
-                        selection
-                            .expr
-                            .as_ref()
-                            .expect("expression required")
-                            .try_into()?,
-                    )?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Aggregate(aggregate) => {
-                let input: LogicalPlan = convert_box_required!(aggregate.input)?;
-                let group_expr = aggregate
-                    .group_expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                let aggr_expr = aggregate
-                    .aggr_expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                LogicalPlanBuilder::from(&input)
-                    .aggregate(group_expr, aggr_expr)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::CsvScan(scan) => {
-                let schema: Schema = convert_required!(scan.schema)?;
-                let options = CsvReadOptions::new()
-                    .schema(&schema)
-                    .delimiter(scan.delimiter.as_bytes()[0])
-                    .file_extension(&scan.file_extension)
-                    .has_header(scan.has_header);
-
-                let mut projection = None;
-                if let Some(column_names) = &scan.projection {
-                    let column_indices = column_names
-                        .columns
-                        .iter()
-                        .map(|name| schema.index_of(name))
-                        .collect::<Result<Vec<usize>, _>>()?;
-                    projection = Some(column_indices);
-                }
-
-                LogicalPlanBuilder::scan_csv(&scan.path, options, projection)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::ParquetScan(scan) => {
-                let projection = match scan.projection.as_ref() {
-                    None => None,
-                    Some(columns) => {
-                        let schema: Schema = convert_required!(scan.schema)?;
-                        let r: Result<Vec<usize>, _> = columns
-                            .columns
-                            .iter()
-                            .map(|col_name| {
-                                schema.fields().iter().position(|field| field.name() == col_name).ok_or_else(|| {
-                                    let column_names: Vec<&String> = schema.fields().iter().map(|f| f.name()).collect();
-                                    proto_error(format!(
-                                        "Parquet projection contains column name that is not present in schema. Column name: {}. Schema columns: {:?}",
-                                        col_name, column_names
-                                    ))
-                                })
-                            })
-                            .collect();
-                        Some(r?)
-                    }
-                };
-                LogicalPlanBuilder::scan_parquet(&scan.path, projection, 24)? //TODO concurrency
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Sort(sort) => {
-                let input: LogicalPlan = convert_box_required!(sort.input)?;
-                let sort_expr: Vec<Expr> = sort
-                    .expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<Expr>, _>>()?;
-                LogicalPlanBuilder::from(&input)
-                    .sort(sort_expr)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Repartition(repartition) => {
-                use datafusion::logical_plan::Partitioning;
-                let input: LogicalPlan = convert_box_required!(repartition.input)?;
-                use protobuf::repartition_node::PartitionMethod;
-                let pb_partition_method = repartition.partition_method.clone().ok_or_else(|| {
-                    BallistaError::General(String::from(
-                        "Protobuf deserialization error, RepartitionNode was missing required field 'partition_method'",
-                    ))
-                })?;
-
-                let partitioning_scheme = match pb_partition_method {
-                    PartitionMethod::Hash(protobuf::HashRepartition {
-                        hash_expr: pb_hash_expr,
-                        partition_count,
-                    }) => Partitioning::Hash(
-                        pb_hash_expr
-                            .iter()
-                            .map(|pb_expr| pb_expr.try_into())
-                            .collect::<Result<Vec<_>, _>>()?,
-                        partition_count as usize,
-                    ),
-                    PartitionMethod::RoundRobin(batch_size) => {
-                        Partitioning::RoundRobinBatch(batch_size as usize)
-                    }
-                };
-
-                LogicalPlanBuilder::from(&input)
-                    .repartition(partitioning_scheme)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::EmptyRelation(empty_relation) => {
-                LogicalPlanBuilder::empty(empty_relation.produce_one_row)
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::CreateExternalTable(create_extern_table) => {
-                let pb_schema = (create_extern_table.schema.clone()).ok_or_else(|| {
-                    BallistaError::General(String::from(
-                        "Protobuf deserialization error, CreateExternalTableNode was missing required field schema.",
-                    ))
-                })?;
-
-                let pb_file_type: protobuf::FileType =
-                    create_extern_table.file_type.try_into()?;
-
-                Ok(LogicalPlan::CreateExternalTable {
-                    schema: pb_schema.try_into()?,
-                    name: create_extern_table.name.clone(),
-                    location: create_extern_table.location.clone(),
-                    file_type: pb_file_type.into(),
-                    has_header: create_extern_table.has_header,
-                })
-            }
-            LogicalPlanType::Explain(explain) => {
-                let input: LogicalPlan = convert_box_required!(explain.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .explain(explain.verbose)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Limit(limit) => {
-                let input: LogicalPlan = convert_box_required!(limit.input)?;
-                LogicalPlanBuilder::from(&input)
-                    .limit(limit.limit as usize)?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-            LogicalPlanType::Join(join) => {
-                let left_keys: Vec<&str> =
-                    join.left_join_column.iter().map(|i| i.as_str()).collect();
-                let right_keys: Vec<&str> =
-                    join.right_join_column.iter().map(|i| i.as_str()).collect();
-                let join_type =
-                    protobuf::JoinType::from_i32(join.join_type).ok_or_else(|| {
-                        proto_error(format!(
-                            "Received a JoinNode message with unknown JoinType {}",
-                            join.join_type
-                        ))
-                    })?;
-                let join_type = match join_type {
-                    protobuf::JoinType::Inner => JoinType::Inner,
-                    protobuf::JoinType::Left => JoinType::Left,
-                    protobuf::JoinType::Right => JoinType::Right,
-                };
-                LogicalPlanBuilder::from(&convert_box_required!(join.left)?)
-                    .join(
-                        &convert_box_required!(join.right)?,
-                        join_type,
-                        &left_keys,
-                        &right_keys,
-                    )?
-                    .build()
-                    .map_err(|e| e.into())
-            }
-        }
-    }
-}
-
-impl TryInto<datafusion::logical_plan::DFSchema> for protobuf::Schema {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::logical_plan::DFSchema, Self::Error> {
-        let schema: Schema = (&self).try_into()?;
-        schema.try_into().map_err(BallistaError::DataFusionError)
-    }
-}
-
-impl TryInto<datafusion::logical_plan::DFSchemaRef> for protobuf::Schema {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::logical_plan::DFSchemaRef, Self::Error> {
-        use datafusion::logical_plan::ToDFSchema;
-        let schema: Schema = (&self).try_into()?;
-        schema
-            .to_dfschema_ref()
-            .map_err(BallistaError::DataFusionError)
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::scalar_type::Datatype {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        use protobuf::scalar_type::Datatype;
-        Ok(match self {
-            Datatype::Scalar(scalar_type) => {
-                let pb_scalar_enum = protobuf::PrimitiveScalarType::from_i32(*scalar_type).ok_or_else(|| {
-                    proto_error(format!(
-                        "Protobuf deserialization error, scalar_type::Datatype missing was provided invalid enum variant: {}",
-                        *scalar_type
-                    ))
-                })?;
-                pb_scalar_enum.into()
-            }
-            Datatype::List(protobuf::ScalarListType {
-                deepest_type,
-                field_names,
-            }) => {
-                if field_names.is_empty() {
-                    return Err(proto_error(
-                        "Protobuf deserialization error: found no field names in ScalarListType message which requires at least one",
-                    ));
-                }
-                let pb_scalar_type = protobuf::PrimitiveScalarType::from_i32(
-                    *deepest_type,
-                )
-                .ok_or_else(|| {
-                    proto_error(format!(
-                        "Protobuf deserialization error: invalid i32 for scalar enum: {}",
-                        *deepest_type
-                    ))
-                })?;
-                //Because length is checked above it is safe to unwrap .last()
-                let mut scalar_type =
-                    arrow::datatypes::DataType::List(Box::new(Field::new(
-                        field_names.last().unwrap().as_str(),
-                        pb_scalar_type.into(),
-                        true,
-                    )));
-                //Iterate over field names in reverse order except for the last item in the vector
-                for name in field_names.iter().rev().skip(1) {
-                    let new_datatype = arrow::datatypes::DataType::List(Box::new(
-                        Field::new(name.as_str(), scalar_type, true),
-                    ));
-                    scalar_type = new_datatype;
-                }
-                scalar_type
-            }
-        })
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::arrow_type::ArrowTypeEnum {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        use arrow::datatypes::DataType;
-        use protobuf::arrow_type;
-        Ok(match self {
-            arrow_type::ArrowTypeEnum::None(_) => DataType::Null,
-            arrow_type::ArrowTypeEnum::Bool(_) => DataType::Boolean,
-            arrow_type::ArrowTypeEnum::Uint8(_) => DataType::UInt8,
-            arrow_type::ArrowTypeEnum::Int8(_) => DataType::Int8,
-            arrow_type::ArrowTypeEnum::Uint16(_) => DataType::UInt16,
-            arrow_type::ArrowTypeEnum::Int16(_) => DataType::Int16,
-            arrow_type::ArrowTypeEnum::Uint32(_) => DataType::UInt32,
-            arrow_type::ArrowTypeEnum::Int32(_) => DataType::Int32,
-            arrow_type::ArrowTypeEnum::Uint64(_) => DataType::UInt64,
-            arrow_type::ArrowTypeEnum::Int64(_) => DataType::Int64,
-            arrow_type::ArrowTypeEnum::Float16(_) => DataType::Float16,
-            arrow_type::ArrowTypeEnum::Float32(_) => DataType::Float32,
-            arrow_type::ArrowTypeEnum::Float64(_) => DataType::Float64,
-            arrow_type::ArrowTypeEnum::Utf8(_) => DataType::Utf8,
-            arrow_type::ArrowTypeEnum::LargeUtf8(_) => DataType::LargeUtf8,
-            arrow_type::ArrowTypeEnum::Binary(_) => DataType::Binary,
-            arrow_type::ArrowTypeEnum::FixedSizeBinary(size) => {
-                DataType::FixedSizeBinary(*size)
-            }
-            arrow_type::ArrowTypeEnum::LargeBinary(_) => DataType::LargeBinary,
-            arrow_type::ArrowTypeEnum::Date32(_) => DataType::Date32,
-            arrow_type::ArrowTypeEnum::Date64(_) => DataType::Date64,
-            arrow_type::ArrowTypeEnum::Duration(time_unit) => {
-                DataType::Duration(protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?)
-            }
-            arrow_type::ArrowTypeEnum::Timestamp(protobuf::Timestamp {
-                time_unit,
-                timezone,
-            }) => DataType::Timestamp(
-                protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?,
-                match timezone.len() {
-                    0 => None,
-                    _ => Some(timezone.to_owned()),
-                },
-            ),
-            arrow_type::ArrowTypeEnum::Time32(time_unit) => {
-                DataType::Time32(protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?)
-            }
-            arrow_type::ArrowTypeEnum::Time64(time_unit) => {
-                DataType::Time64(protobuf::TimeUnit::from_i32_to_arrow(*time_unit)?)
-            }
-            arrow_type::ArrowTypeEnum::Interval(interval_unit) => DataType::Interval(
-                protobuf::IntervalUnit::from_i32_to_arrow(*interval_unit)?,
-            ),
-            arrow_type::ArrowTypeEnum::Decimal(protobuf::Decimal {
-                whole,
-                fractional,
-            }) => DataType::Decimal(*whole as usize, *fractional as usize),
-            arrow_type::ArrowTypeEnum::List(list) => {
-                let list_type: &protobuf::Field = list
-                    .as_ref()
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message missing required field 'field_type'"))?
-                    .as_ref();
-                DataType::List(Box::new(list_type.try_into()?))
-            }
-            arrow_type::ArrowTypeEnum::LargeList(list) => {
-                let list_type: &protobuf::Field = list
-                    .as_ref()
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message missing required field 'field_type'"))?
-                    .as_ref();
-                DataType::LargeList(Box::new(list_type.try_into()?))
-            }
-            arrow_type::ArrowTypeEnum::FixedSizeList(list) => {
-                let list_type: &protobuf::Field = list
-                    .as_ref()
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message missing required field 'field_type'"))?
-                    .as_ref();
-                let list_size = list.list_size;
-                DataType::FixedSizeList(Box::new(list_type.try_into()?), list_size)
-            }
-            arrow_type::ArrowTypeEnum::Struct(strct) => DataType::Struct(
-                strct
-                    .sub_field_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?,
-            ),
-            arrow_type::ArrowTypeEnum::Union(union) => DataType::Union(
-                union
-                    .union_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?,
-            ),
-            arrow_type::ArrowTypeEnum::Dictionary(dict) => {
-                let pb_key_datatype = dict
-                    .as_ref()
-                    .key
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message missing required field 'key'"))?;
-                let pb_value_datatype = dict
-                    .as_ref()
-                    .value
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message missing required field 'key'"))?;
-                let key_datatype: DataType = pb_key_datatype.as_ref().try_into()?;
-                let value_datatype: DataType = pb_value_datatype.as_ref().try_into()?;
-                DataType::Dictionary(Box::new(key_datatype), Box::new(value_datatype))
-            }
-        })
-    }
-}
-
-impl Into<arrow::datatypes::DataType> for protobuf::PrimitiveScalarType {
-    fn into(self) -> arrow::datatypes::DataType {
-        use arrow::datatypes::DataType;
-        match self {
-            protobuf::PrimitiveScalarType::Bool => DataType::Boolean,
-            protobuf::PrimitiveScalarType::Uint8 => DataType::UInt8,
-            protobuf::PrimitiveScalarType::Int8 => DataType::Int8,
-            protobuf::PrimitiveScalarType::Uint16 => DataType::UInt16,
-            protobuf::PrimitiveScalarType::Int16 => DataType::Int16,
-            protobuf::PrimitiveScalarType::Uint32 => DataType::UInt32,
-            protobuf::PrimitiveScalarType::Int32 => DataType::Int32,
-            protobuf::PrimitiveScalarType::Uint64 => DataType::UInt64,
-            protobuf::PrimitiveScalarType::Int64 => DataType::Int64,
-            protobuf::PrimitiveScalarType::Float32 => DataType::Float32,
-            protobuf::PrimitiveScalarType::Float64 => DataType::Float64,
-            protobuf::PrimitiveScalarType::Utf8 => DataType::Utf8,
-            protobuf::PrimitiveScalarType::LargeUtf8 => DataType::LargeUtf8,
-            protobuf::PrimitiveScalarType::Date32 => DataType::Date32,
-            protobuf::PrimitiveScalarType::TimeMicrosecond => {
-                DataType::Time64(arrow::datatypes::TimeUnit::Microsecond)
-            }
-            protobuf::PrimitiveScalarType::TimeNanosecond => {
-                DataType::Time64(arrow::datatypes::TimeUnit::Nanosecond)
-            }
-            protobuf::PrimitiveScalarType::Null => DataType::Null,
-        }
-    }
-}
-
-//Does not typecheck lists
-fn typechecked_scalar_value_conversion(
-    tested_type: &protobuf::scalar_value::Value,
-    required_type: protobuf::PrimitiveScalarType,
-) -> Result<datafusion::scalar::ScalarValue, BallistaError> {
-    use protobuf::scalar_value::Value;
-    use protobuf::PrimitiveScalarType;
-    Ok(match (tested_type, &required_type) {
-        (Value::BoolValue(v), PrimitiveScalarType::Bool) => {
-            ScalarValue::Boolean(Some(*v))
-        }
-        (Value::Int8Value(v), PrimitiveScalarType::Int8) => {
-            ScalarValue::Int8(Some(*v as i8))
-        }
-        (Value::Int16Value(v), PrimitiveScalarType::Int16) => {
-            ScalarValue::Int16(Some(*v as i16))
-        }
-        (Value::Int32Value(v), PrimitiveScalarType::Int32) => {
-            ScalarValue::Int32(Some(*v))
-        }
-        (Value::Int64Value(v), PrimitiveScalarType::Int64) => {
-            ScalarValue::Int64(Some(*v))
-        }
-        (Value::Uint8Value(v), PrimitiveScalarType::Uint8) => {
-            ScalarValue::UInt8(Some(*v as u8))
-        }
-        (Value::Uint16Value(v), PrimitiveScalarType::Uint16) => {
-            ScalarValue::UInt16(Some(*v as u16))
-        }
-        (Value::Uint32Value(v), PrimitiveScalarType::Uint32) => {
-            ScalarValue::UInt32(Some(*v))
-        }
-        (Value::Uint64Value(v), PrimitiveScalarType::Uint64) => {
-            ScalarValue::UInt64(Some(*v))
-        }
-        (Value::Float32Value(v), PrimitiveScalarType::Float32) => {
-            ScalarValue::Float32(Some(*v))
-        }
-        (Value::Float64Value(v), PrimitiveScalarType::Float64) => {
-            ScalarValue::Float64(Some(*v))
-        }
-        (Value::Date32Value(v), PrimitiveScalarType::Date32) => {
-            ScalarValue::Date32(Some(*v))
-        }
-        (Value::TimeMicrosecondValue(v), PrimitiveScalarType::TimeMicrosecond) => {
-            ScalarValue::TimestampMicrosecond(Some(*v))
-        }
-        (Value::TimeNanosecondValue(v), PrimitiveScalarType::TimeMicrosecond) => {
-            ScalarValue::TimestampNanosecond(Some(*v))
-        }
-        (Value::Utf8Value(v), PrimitiveScalarType::Utf8) => {
-            ScalarValue::Utf8(Some(v.to_owned()))
-        }
-        (Value::LargeUtf8Value(v), PrimitiveScalarType::LargeUtf8) => {
-            ScalarValue::LargeUtf8(Some(v.to_owned()))
-        }
-
-        (Value::NullValue(i32_enum), required_scalar_type) => {
-            if *i32_enum == *required_scalar_type as i32 {
-                let pb_scalar_type = PrimitiveScalarType::from_i32(*i32_enum).ok_or_else(|| {
-                    BallistaError::General(format!(
-                        "Invalid i32_enum={} when converting with PrimitiveScalarType::from_i32()",
-                        *i32_enum
-                    ))
-                })?;
-                let scalar_value: ScalarValue = match pb_scalar_type {
-                    PrimitiveScalarType::Bool => ScalarValue::Boolean(None),
-                    PrimitiveScalarType::Uint8 => ScalarValue::UInt8(None),
-                    PrimitiveScalarType::Int8 => ScalarValue::Int8(None),
-                    PrimitiveScalarType::Uint16 => ScalarValue::UInt16(None),
-                    PrimitiveScalarType::Int16 => ScalarValue::Int16(None),
-                    PrimitiveScalarType::Uint32 => ScalarValue::UInt32(None),
-                    PrimitiveScalarType::Int32 => ScalarValue::Int32(None),
-                    PrimitiveScalarType::Uint64 => ScalarValue::UInt64(None),
-                    PrimitiveScalarType::Int64 => ScalarValue::Int64(None),
-                    PrimitiveScalarType::Float32 => ScalarValue::Float32(None),
-                    PrimitiveScalarType::Float64 => ScalarValue::Float64(None),
-                    PrimitiveScalarType::Utf8 => ScalarValue::Utf8(None),
-                    PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None),
-                    PrimitiveScalarType::Date32 => ScalarValue::Date32(None),
-                    PrimitiveScalarType::TimeMicrosecond => {
-                        ScalarValue::TimestampMicrosecond(None)
-                    }
-                    PrimitiveScalarType::TimeNanosecond => {
-                        ScalarValue::TimestampNanosecond(None)
-                    }
-                    PrimitiveScalarType::Null => {
-                        return Err(proto_error(
-                            "Untyped scalar null is not a valid scalar value",
-                        ))
-                    }
-                };
-                scalar_value
-            } else {
-                return Err(proto_error("Could not convert to the proper type"));
-            }
-        }
-        _ => return Err(proto_error("Could not convert to the proper type")),
-    })
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::scalar_value::Value {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        use datafusion::scalar::ScalarValue;
-        use protobuf::PrimitiveScalarType;
-        let scalar = match self {
-            protobuf::scalar_value::Value::BoolValue(v) => ScalarValue::Boolean(Some(*v)),
-            protobuf::scalar_value::Value::Utf8Value(v) => {
-                ScalarValue::Utf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::LargeUtf8Value(v) => {
-                ScalarValue::LargeUtf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::Int8Value(v) => {
-                ScalarValue::Int8(Some(*v as i8))
-            }
-            protobuf::scalar_value::Value::Int16Value(v) => {
-                ScalarValue::Int16(Some(*v as i16))
-            }
-            protobuf::scalar_value::Value::Int32Value(v) => ScalarValue::Int32(Some(*v)),
-            protobuf::scalar_value::Value::Int64Value(v) => ScalarValue::Int64(Some(*v)),
-            protobuf::scalar_value::Value::Uint8Value(v) => {
-                ScalarValue::UInt8(Some(*v as u8))
-            }
-            protobuf::scalar_value::Value::Uint16Value(v) => {
-                ScalarValue::UInt16(Some(*v as u16))
-            }
-            protobuf::scalar_value::Value::Uint32Value(v) => {
-                ScalarValue::UInt32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Uint64Value(v) => {
-                ScalarValue::UInt64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float32Value(v) => {
-                ScalarValue::Float32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float64Value(v) => {
-                ScalarValue::Float64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Date32Value(v) => {
-                ScalarValue::Date32(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeMicrosecondValue(v) => {
-                ScalarValue::TimestampMicrosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeNanosecondValue(v) => {
-                ScalarValue::TimestampNanosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::ListValue(v) => v.try_into()?,
-            protobuf::scalar_value::Value::NullListValue(v) => {
-                ScalarValue::List(None, v.try_into()?)
-            }
-            protobuf::scalar_value::Value::NullValue(null_enum) => {
-                PrimitiveScalarType::from_i32(*null_enum)
-                    .ok_or_else(|| proto_error("Invalid scalar type"))?
-                    .try_into()?
-            }
-        };
-        Ok(scalar)
-    }
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::ScalarListValue {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        use protobuf::scalar_type::Datatype;
-        use protobuf::PrimitiveScalarType;
-        let protobuf::ScalarListValue { datatype, values } = self;
-        let pb_scalar_type = datatype
-            .as_ref()
-            .ok_or_else(|| proto_error("Protobuf deserialization error: ScalarListValue messsage missing required field 'datatype'"))?;
-        let scalar_type = pb_scalar_type
-            .datatype
-            .as_ref()
-            .ok_or_else(|| proto_error("Protobuf deserialization error: ScalarListValue.Datatype messsage missing required field 'datatype'"))?;
-        let scalar_values = match scalar_type {
-            Datatype::Scalar(scalar_type_i32) => {
-                let leaf_scalar_type =
-                    protobuf::PrimitiveScalarType::from_i32(*scalar_type_i32)
-                        .ok_or_else(|| {
-                            proto_error("Error converting i32 to basic scalar type")
-                        })?;
-                let typechecked_values: Vec<datafusion::scalar::ScalarValue> = values
-                    .iter()
-                    .map(|protobuf::ScalarValue { value: opt_value }| {
-                        let value = opt_value.as_ref().ok_or_else(|| {
-                            proto_error(
-                                "Protobuf deserialization error: missing required field 'value'",
-                            )
-                        })?;
-                        typechecked_scalar_value_conversion(value, leaf_scalar_type)
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-                datafusion::scalar::ScalarValue::List(
-                    Some(typechecked_values),
-                    leaf_scalar_type.into(),
-                )
-            }
-            Datatype::List(list_type) => {
-                let protobuf::ScalarListType {
-                    deepest_type,
-                    field_names,
-                } = &list_type;
-                let leaf_type =
-                    PrimitiveScalarType::from_i32(*deepest_type).ok_or_else(|| {
-                        proto_error("Error converting i32 to basic scalar type")
-                    })?;
-                let depth = field_names.len();
-
-                let typechecked_values: Vec<datafusion::scalar::ScalarValue> = if depth
-                    == 0
-                {
-                    return Err(proto_error(
-                        "Protobuf deserialization error, ScalarListType had no field names, requires at least one",
-                    ));
-                } else if depth == 1 {
-                    values
-                        .iter()
-                        .map(|protobuf::ScalarValue { value: opt_value }| {
-                            let value = opt_value
-                                .as_ref()
-                                .ok_or_else(|| proto_error("Protobuf deserialization error: missing required field 'value'"))?;
-                            typechecked_scalar_value_conversion(value, leaf_type)
-                        })
-                        .collect::<Result<Vec<_>, _>>()?
-                } else {
-                    values
-                        .iter()
-                        .map(|protobuf::ScalarValue { value: opt_value }| {
-                            let value = opt_value
-                                .as_ref()
-                                .ok_or_else(|| proto_error("Protobuf deserialization error: missing required field 'value'"))?;
-                            value.try_into()
-                        })
-                        .collect::<Result<Vec<_>, _>>()?
-                };
-                datafusion::scalar::ScalarValue::List(
-                    match typechecked_values.len() {
-                        0 => None,
-                        _ => Some(typechecked_values),
-                    },
-                    list_type.try_into()?,
-                )
-            }
-        };
-        Ok(scalar_values)
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::ScalarListType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        use protobuf::PrimitiveScalarType;
-        let protobuf::ScalarListType {
-            deepest_type,
-            field_names,
-        } = self;
-
-        let depth = field_names.len();
-        if depth == 0 {
-            return Err(proto_error(
-                "Protobuf deserialization error: Found a ScalarListType message with no field names, at least one is required",
-            ));
-        }
-
-        let mut curr_type = arrow::datatypes::DataType::List(Box::new(Field::new(
-            //Since checked vector is not empty above this is safe to unwrap
-            field_names.last().unwrap(),
-            PrimitiveScalarType::from_i32(*deepest_type)
-                .ok_or_else(|| {
-                    proto_error("Could not convert to datafusion scalar type")
-                })?
-                .into(),
-            true,
-        )));
-        //Iterates over field names in reverse order except for the last item in the vector
-        for name in field_names.iter().rev().skip(1) {
-            let temp_curr_type = arrow::datatypes::DataType::List(Box::new(Field::new(
-                name, curr_type, true,
-            )));
-            curr_type = temp_curr_type;
-        }
-        Ok(curr_type)
-    }
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for protobuf::PrimitiveScalarType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        use datafusion::scalar::ScalarValue;
-        Ok(match self {
-            protobuf::PrimitiveScalarType::Null => {
-                return Err(proto_error("Untyped null is an invalid scalar value"))
-            }
-            protobuf::PrimitiveScalarType::Bool => ScalarValue::Boolean(None),
-            protobuf::PrimitiveScalarType::Uint8 => ScalarValue::UInt8(None),
-            protobuf::PrimitiveScalarType::Int8 => ScalarValue::Int8(None),
-            protobuf::PrimitiveScalarType::Uint16 => ScalarValue::UInt16(None),
-            protobuf::PrimitiveScalarType::Int16 => ScalarValue::Int16(None),
-            protobuf::PrimitiveScalarType::Uint32 => ScalarValue::UInt32(None),
-            protobuf::PrimitiveScalarType::Int32 => ScalarValue::Int32(None),
-            protobuf::PrimitiveScalarType::Uint64 => ScalarValue::UInt64(None),
-            protobuf::PrimitiveScalarType::Int64 => ScalarValue::Int64(None),
-            protobuf::PrimitiveScalarType::Float32 => ScalarValue::Float32(None),
-            protobuf::PrimitiveScalarType::Float64 => ScalarValue::Float64(None),
-            protobuf::PrimitiveScalarType::Utf8 => ScalarValue::Utf8(None),
-            protobuf::PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None),
-            protobuf::PrimitiveScalarType::Date32 => ScalarValue::Date32(None),
-            protobuf::PrimitiveScalarType::TimeMicrosecond => {
-                ScalarValue::TimestampMicrosecond(None)
-            }
-            protobuf::PrimitiveScalarType::TimeNanosecond => {
-                ScalarValue::TimestampNanosecond(None)
-            }
-        })
-    }
-}
-
-impl TryInto<datafusion::scalar::ScalarValue> for &protobuf::ScalarValue {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<datafusion::scalar::ScalarValue, Self::Error> {
-        let value = self.value.as_ref().ok_or_else(|| {
-            proto_error("Protobuf deserialization error: missing required field 'value'")
-        })?;
-        Ok(match value {
-            protobuf::scalar_value::Value::BoolValue(v) => ScalarValue::Boolean(Some(*v)),
-            protobuf::scalar_value::Value::Utf8Value(v) => {
-                ScalarValue::Utf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::LargeUtf8Value(v) => {
-                ScalarValue::LargeUtf8(Some(v.to_owned()))
-            }
-            protobuf::scalar_value::Value::Int8Value(v) => {
-                ScalarValue::Int8(Some(*v as i8))
-            }
-            protobuf::scalar_value::Value::Int16Value(v) => {
-                ScalarValue::Int16(Some(*v as i16))
-            }
-            protobuf::scalar_value::Value::Int32Value(v) => ScalarValue::Int32(Some(*v)),
-            protobuf::scalar_value::Value::Int64Value(v) => ScalarValue::Int64(Some(*v)),
-            protobuf::scalar_value::Value::Uint8Value(v) => {
-                ScalarValue::UInt8(Some(*v as u8))
-            }
-            protobuf::scalar_value::Value::Uint16Value(v) => {
-                ScalarValue::UInt16(Some(*v as u16))
-            }
-            protobuf::scalar_value::Value::Uint32Value(v) => {
-                ScalarValue::UInt32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Uint64Value(v) => {
-                ScalarValue::UInt64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float32Value(v) => {
-                ScalarValue::Float32(Some(*v))
-            }
-            protobuf::scalar_value::Value::Float64Value(v) => {
-                ScalarValue::Float64(Some(*v))
-            }
-            protobuf::scalar_value::Value::Date32Value(v) => {
-                ScalarValue::Date32(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeMicrosecondValue(v) => {
-                ScalarValue::TimestampMicrosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::TimeNanosecondValue(v) => {
-                ScalarValue::TimestampNanosecond(Some(*v))
-            }
-            protobuf::scalar_value::Value::ListValue(scalar_list) => {
-                let protobuf::ScalarListValue {
-                    values,
-                    datatype: opt_scalar_type,
-                } = &scalar_list;
-                let pb_scalar_type = opt_scalar_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization err: ScalaListValue missing required field 'datatype'"))?;
-                let typechecked_values: Vec<ScalarValue> = values
-                    .iter()
-                    .map(|val| val.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                let scalar_type: arrow::datatypes::DataType =
-                    pb_scalar_type.try_into()?;
-                ScalarValue::List(Some(typechecked_values), scalar_type)
-            }
-            protobuf::scalar_value::Value::NullListValue(v) => {
-                let pb_datatype = v
-                    .datatype
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: NullListValue message missing required field 'datatyp'"))?;
-                ScalarValue::List(None, pb_datatype.try_into()?)
-            }
-            protobuf::scalar_value::Value::NullValue(v) => {
-                let null_type_enum = protobuf::PrimitiveScalarType::from_i32(*v)
-                    .ok_or_else(|| proto_error("Protobuf deserialization error found invalid enum variant for DatafusionScalar"))?;
-                null_type_enum.try_into()?
-            }
-        })
-    }
-}
-
-impl TryInto<Expr> for &protobuf::LogicalExprNode {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Expr, Self::Error> {
-        use protobuf::logical_expr_node::ExprType;
-
-        let expr_type = self
-            .expr_type
-            .as_ref()
-            .ok_or_else(|| proto_error("Unexpected empty logical expression"))?;
-        match expr_type {
-            ExprType::BinaryExpr(binary_expr) => Ok(Expr::BinaryExpr {
-                left: Box::new(parse_required_expr(&binary_expr.l)?),
-                op: from_proto_binary_op(&binary_expr.op)?,
-                right: Box::new(parse_required_expr(&binary_expr.r)?),
-            }),
-            ExprType::ColumnName(column_name) => Ok(Expr::Column(column_name.to_owned())),
-            ExprType::Literal(literal) => {
-                use datafusion::scalar::ScalarValue;
-                let scalar_value: datafusion::scalar::ScalarValue = literal.try_into()?;
-                Ok(Expr::Literal(scalar_value))
-            }
-            ExprType::AggregateExpr(expr) => {
-                let aggr_function =
-                    protobuf::AggregateFunction::from_i32(expr.aggr_function)
-                        .ok_or_else(|| {
-                            proto_error(format!(
-                                "Received an unknown aggregate function: {}",
-                                expr.aggr_function
-                            ))
-                        })?;
-                let fun = match aggr_function {
-                    protobuf::AggregateFunction::Min => AggregateFunction::Min,
-                    protobuf::AggregateFunction::Max => AggregateFunction::Max,
-                    protobuf::AggregateFunction::Sum => AggregateFunction::Sum,
-                    protobuf::AggregateFunction::Avg => AggregateFunction::Avg,
-                    protobuf::AggregateFunction::Count => AggregateFunction::Count,
-                };
-
-                Ok(Expr::AggregateFunction {
-                    fun,
-                    args: vec![parse_required_expr(&expr.expr)?],
-                    distinct: false, //TODO
-                })
-            }
-            ExprType::Alias(alias) => Ok(Expr::Alias(
-                Box::new(parse_required_expr(&alias.expr)?),
-                alias.alias.clone(),
-            )),
-            ExprType::IsNullExpr(is_null) => {
-                Ok(Expr::IsNull(Box::new(parse_required_expr(&is_null.expr)?)))
-            }
-            ExprType::IsNotNullExpr(is_not_null) => Ok(Expr::IsNotNull(Box::new(
-                parse_required_expr(&is_not_null.expr)?,
-            ))),
-            ExprType::NotExpr(not) => {
-                Ok(Expr::Not(Box::new(parse_required_expr(&not.expr)?)))
-            }
-            ExprType::Between(between) => Ok(Expr::Between {
-                expr: Box::new(parse_required_expr(&between.expr)?),
-                negated: between.negated,
-                low: Box::new(parse_required_expr(&between.low)?),
-                high: Box::new(parse_required_expr(&between.high)?),
-            }),
-            ExprType::Case(case) => {
-                let when_then_expr = case
-                    .when_then_expr
-                    .iter()
-                    .map(|e| {
-                        Ok((
-                            Box::new(match &e.when_expr {
-                                Some(e) => e.try_into(),
-                                None => Err(proto_error("Missing required expression")),
-                            }?),
-                            Box::new(match &e.then_expr {
-                                Some(e) => e.try_into(),
-                                None => Err(proto_error("Missing required expression")),
-                            }?),
-                        ))
-                    })
-                    .collect::<Result<Vec<(Box<Expr>, Box<Expr>)>, BallistaError>>()?;
-                Ok(Expr::Case {
-                    expr: parse_optional_expr(&case.expr)?.map(Box::new),
-                    when_then_expr,
-                    else_expr: parse_optional_expr(&case.else_expr)?.map(Box::new),
-                })
-            }
-            ExprType::Cast(cast) => {
-                let expr = Box::new(parse_required_expr(&cast.expr)?);
-                let arrow_type: &protobuf::ArrowType = cast
-                    .arrow_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: CastNode message missing required field 'arrow_type'"))?;
-                let data_type = arrow_type.try_into()?;
-                Ok(Expr::Cast { expr, data_type })
-            }
-            ExprType::TryCast(cast) => {
-                let expr = Box::new(parse_required_expr(&cast.expr)?);
-                let arrow_type: &protobuf::ArrowType = cast
-                    .arrow_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: CastNode message missing required field 'arrow_type'"))?;
-                let data_type = arrow_type.try_into()?;
-                Ok(Expr::TryCast { expr, data_type })
-            }
-            ExprType::Sort(sort) => Ok(Expr::Sort {
-                expr: Box::new(parse_required_expr(&sort.expr)?),
-                asc: sort.asc,
-                nulls_first: sort.nulls_first,
-            }),
-            ExprType::Negative(negative) => Ok(Expr::Negative(Box::new(
-                parse_required_expr(&negative.expr)?,
-            ))),
-            ExprType::InList(in_list) => Ok(Expr::InList {
-                expr: Box::new(parse_required_expr(&in_list.expr)?),
-                list: in_list
-                    .list
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, _>>()?,
-                negated: in_list.negated,
-            }),
-            ExprType::Wildcard(_) => Ok(Expr::Wildcard),
-            ExprType::ScalarFunction(expr) => {
-                let scalar_function = protobuf::ScalarFunction::from_i32(expr.fun)
-                    .ok_or_else(|| {
-                        proto_error(format!(
-                            "Received an unknown scalar function: {}",
-                            expr.fun
-                        ))
-                    })?;
-                match scalar_function {
-                    protobuf::ScalarFunction::Sqrt => {
-                        Ok(sqrt((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sin => Ok(sin((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Cos => Ok(cos((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Tan => Ok(tan((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Asin => Ok(asin(&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Acos => Ok(acos(&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Atan => {
-                        Ok(atan((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Exp => Ok(exp((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Log2 => {
-                        Ok(log2((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Log10 => {
-                        Ok(log10((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Floor => {
-                        Ok(floor((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Ceil => {
-                        Ok(ceil((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Round => {
-                        Ok(round((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Trunc => {
-                        Ok(trunc((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Abs => Ok(abs((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Signum => {
-                        Ok(signum((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Octetlength => {
-                        Ok(length((&expr.expr[0]).try_into()?))
-                    }
-                    // // protobuf::ScalarFunction::Concat => Ok(concat((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Lower => {
-                        Ok(lower((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Upper => {
-                        Ok(upper((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Trim => {
-                        Ok(trim((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Ltrim => {
-                        Ok(ltrim((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Rtrim => {
-                        Ok(rtrim((&expr.expr[0]).try_into()?))
-                    }
-                    // protobuf::ScalarFunction::Totimestamp => Ok(to_timestamp((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Array => Ok(array((&expr.expr[0]).try_into()?)),
-                    // // protobuf::ScalarFunction::Nullif => Ok(nulli((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Datetrunc => Ok(date_trunc((&expr.expr[0]).try_into()?)),
-                    // protobuf::ScalarFunction::Md5 => Ok(md5((&expr.expr[0]).try_into()?)),
-                    protobuf::ScalarFunction::Sha224 => {
-                        Ok(sha224((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sha256 => {
-                        Ok(sha256((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sha384 => {
-                        Ok(sha384((&expr.expr[0]).try_into()?))
-                    }
-                    protobuf::ScalarFunction::Sha512 => {
-                        Ok(sha512((&expr.expr[0]).try_into()?))
-                    }
-                    _ => Err(proto_error(
-                        "Protobuf deserialization error: Unsupported scalar function",
-                    )),
-                }
-            }
-        }
-    }
-}
-
-fn from_proto_binary_op(op: &str) -> Result<Operator, BallistaError> {
-    match op {
-        "And" => Ok(Operator::And),
-        "Or" => Ok(Operator::Or),
-        "Eq" => Ok(Operator::Eq),
-        "NotEq" => Ok(Operator::NotEq),
-        "LtEq" => Ok(Operator::LtEq),
-        "Lt" => Ok(Operator::Lt),
-        "Gt" => Ok(Operator::Gt),
-        "GtEq" => Ok(Operator::GtEq),
-        "Plus" => Ok(Operator::Plus),
-        "Minus" => Ok(Operator::Minus),
-        "Multiply" => Ok(Operator::Multiply),
-        "Divide" => Ok(Operator::Divide),
-        "Like" => Ok(Operator::Like),
-        other => Err(proto_error(format!(
-            "Unsupported binary operator '{:?}'",
-            other
-        ))),
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::ScalarType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        let pb_scalartype = self.datatype.as_ref().ok_or_else(|| {
-            proto_error("ScalarType message missing required field 'datatype'")
-        })?;
-        pb_scalartype.try_into()
-    }
-}
-
-impl TryInto<Schema> for &protobuf::Schema {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Schema, BallistaError> {
-        let fields = self
-            .columns
-            .iter()
-            .map(|c| {
-                let pb_arrow_type_res = c
-                    .arrow_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Field message was missing required field 'arrow_type'"));
-                let pb_arrow_type: &protobuf::ArrowType = match pb_arrow_type_res {
-                    Ok(res) => res,
-                    Err(e) => return Err(e),
-                };
-                Ok(Field::new(&c.name, pb_arrow_type.try_into()?, c.nullable))
-            })
-            .collect::<Result<Vec<_>, _>>()?;
-        Ok(Schema::new(fields))
-    }
-}
-
-impl TryInto<arrow::datatypes::Field> for &protobuf::Field {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::Field, Self::Error> {
-        let pb_datatype = self.arrow_type.as_ref().ok_or_else(|| {
-            proto_error(
-                "Protobuf deserialization error: Field message missing required field 'arrow_type'",
-            )
-        })?;
-
-        Ok(arrow::datatypes::Field::new(
-            self.name.as_str(),
-            pb_datatype.as_ref().try_into()?,
-            self.nullable,
-        ))
-    }
-}
-
-use datafusion::physical_plan::datetime_expressions::{date_trunc, to_timestamp};
-use datafusion::prelude::{
-    array, length, lower, ltrim, md5, rtrim, sha224, sha256, sha384, sha512, trim, upper,
-};
-use std::convert::TryFrom;
-
-impl TryFrom<i32> for protobuf::FileType {
-    type Error = BallistaError;
-    fn try_from(value: i32) -> Result<Self, Self::Error> {
-        use protobuf::FileType;
-        match value {
-            _x if _x == FileType::NdJson as i32 => Ok(FileType::NdJson),
-            _x if _x == FileType::Parquet as i32 => Ok(FileType::Parquet),
-            _x if _x == FileType::Csv as i32 => Ok(FileType::Csv),
-            invalid => Err(BallistaError::General(format!(
-                "Attempted to convert invalid i32 to protobuf::Filetype: {}",
-                invalid
-            ))),
-        }
-    }
-}
-
-impl Into<datafusion::sql::parser::FileType> for protobuf::FileType {
-    fn into(self) -> datafusion::sql::parser::FileType {
-        use datafusion::sql::parser::FileType;
-        match self {
-            protobuf::FileType::NdJson => FileType::NdJson,
-            protobuf::FileType::Parquet => FileType::Parquet,
-            protobuf::FileType::Csv => FileType::CSV,
-        }
-    }
-}
-
-fn parse_required_expr(
-    p: &Option<Box<protobuf::LogicalExprNode>>,
-) -> Result<Expr, BallistaError> {
-    match p {
-        Some(expr) => expr.as_ref().try_into(),
-        None => Err(proto_error("Missing required expression")),
-    }
-}
-
-fn parse_optional_expr(
-    p: &Option<Box<protobuf::LogicalExprNode>>,
-) -> Result<Option<Expr>, BallistaError> {
-    match p {
-        Some(expr) => expr.as_ref().try_into().map(Some),
-        None => Ok(None),
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/mod.rs b/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
deleted file mode 100644
index 48dd96c4d3f..00000000000
--- a/rust/ballista/rust/core/src/serde/logical_plan/mod.rs
+++ /dev/null
@@ -1,929 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod from_proto;
-pub mod to_proto;
-
-#[cfg(test)]
-
-mod roundtrip_tests {
-
-    use super::super::{super::error::Result, protobuf};
-    use crate::error::BallistaError;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use core::panic;
-    use datafusion::physical_plan::functions::BuiltinScalarFunction::Sqrt;
-    use datafusion::{
-        logical_plan::{Expr, LogicalPlan, LogicalPlanBuilder},
-        physical_plan::csv::CsvReadOptions,
-        prelude::*,
-        scalar::ScalarValue,
-    };
-    use protobuf::arrow_type;
-    use std::convert::TryInto;
-
-    //Given a identity of a LogicalPlan converts it to protobuf and back, using debug formatting to test equality.
-    macro_rules! roundtrip_test {
-        ($initial_struct:ident, $proto_type:ty, $struct_type:ty) => {
-            let proto: $proto_type = (&$initial_struct).try_into()?;
-
-            let round_trip: $struct_type = (&proto).try_into()?;
-
-            assert_eq!(
-                format!("{:?}", $initial_struct),
-                format!("{:?}", round_trip)
-            );
-        };
-        ($initial_struct:ident, $struct_type:ty) => {
-            roundtrip_test!($initial_struct, protobuf::LogicalPlanNode, $struct_type);
-        };
-        ($initial_struct:ident) => {
-            roundtrip_test!($initial_struct, protobuf::LogicalPlanNode, LogicalPlan);
-        };
-    }
-
-    #[test]
-
-    fn roundtrip_repartition() -> Result<()> {
-        use datafusion::logical_plan::Partitioning;
-
-        let test_batch_sizes = [usize::MIN, usize::MAX, 43256];
-
-        let test_expr: Vec<Expr> = vec![
-            Expr::Column("c1".to_string()) + Expr::Column("c2".to_string()),
-            Expr::Literal((4.0).into()),
-        ];
-
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let plan = std::sync::Arc::new(
-            LogicalPlanBuilder::scan_csv(
-                "employee.csv",
-                CsvReadOptions::new().schema(&schema).has_header(true),
-                Some(vec![3, 4]),
-            )
-            .and_then(|plan| plan.sort(vec![col("salary")]))
-            .and_then(|plan| plan.build())
-            .map_err(BallistaError::DataFusionError)?,
-        );
-
-        for batch_size in test_batch_sizes.iter() {
-            let rr_repartition = Partitioning::RoundRobinBatch(*batch_size);
-
-            let roundtrip_plan = LogicalPlan::Repartition {
-                input: plan.clone(),
-                partitioning_scheme: rr_repartition,
-            };
-
-            roundtrip_test!(roundtrip_plan);
-
-            let h_repartition = Partitioning::Hash(test_expr.clone(), *batch_size);
-
-            let roundtrip_plan = LogicalPlan::Repartition {
-                input: plan.clone(),
-                partitioning_scheme: h_repartition,
-            };
-
-            roundtrip_test!(roundtrip_plan);
-
-            let no_expr_hrepartition = Partitioning::Hash(Vec::new(), *batch_size);
-
-            let roundtrip_plan = LogicalPlan::Repartition {
-                input: plan.clone(),
-                partitioning_scheme: no_expr_hrepartition,
-            };
-
-            roundtrip_test!(roundtrip_plan);
-        }
-
-        Ok(())
-    }
-
-    fn new_box_field(
-        name: &str,
-        dt: DataType,
-        nullable: bool,
-    ) -> Box<arrow::datatypes::Field> {
-        Box::new(arrow::datatypes::Field::new(name, dt, nullable))
-    }
-
-    #[test]
-    fn scalar_values_error_serialization() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use datafusion::scalar::ScalarValue;
-        let should_fail_on_seralize: Vec<ScalarValue> = vec![
-            //Should fail due to inconsistent types
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::Int16(None),
-                    ScalarValue::Float32(Some(32.0)),
-                ]),
-                DataType::List(new_box_field("item", DataType::Int16, true)),
-            ),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::Float32(None),
-                    ScalarValue::Float32(Some(32.0)),
-                ]),
-                DataType::List(new_box_field("item", DataType::Int16, true)),
-            ),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::List(
-                        None,
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                    ScalarValue::List(
-                        Some(vec![
-                            ScalarValue::Float32(Some(-213.1)),
-                            ScalarValue::Float32(None),
-                            ScalarValue::Float32(Some(5.5)),
-                            ScalarValue::Float32(Some(2.0)),
-                            ScalarValue::Float32(Some(1.0)),
-                        ]),
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                    ScalarValue::List(
-                        None,
-                        DataType::List(new_box_field(
-                            "lists are typed inconsistently",
-                            DataType::Int16,
-                            true,
-                        )),
-                    ),
-                ]),
-                DataType::List(new_box_field(
-                    "level1",
-                    DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    true,
-                )),
-            ),
-        ];
-
-        for test_case in should_fail_on_seralize.into_iter() {
-            let res: Result<protobuf::ScalarValue> = (&test_case).try_into();
-            if let Ok(val) = res {
-                return Err(BallistaError::General(format!(
-                    "The value {:?} should not have been able to serialize. Serialized to :{:?}",
-                    test_case, val
-                )));
-            }
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn round_trip_scalar_values() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use datafusion::scalar::ScalarValue;
-        let should_pass: Vec<ScalarValue> = vec![
-            ScalarValue::Boolean(None),
-            ScalarValue::Float32(None),
-            ScalarValue::Float64(None),
-            ScalarValue::Int8(None),
-            ScalarValue::Int16(None),
-            ScalarValue::Int32(None),
-            ScalarValue::Int64(None),
-            ScalarValue::UInt8(None),
-            ScalarValue::UInt16(None),
-            ScalarValue::UInt32(None),
-            ScalarValue::UInt64(None),
-            ScalarValue::Utf8(None),
-            ScalarValue::LargeUtf8(None),
-            ScalarValue::List(None, DataType::Boolean),
-            ScalarValue::Date32(None),
-            ScalarValue::TimestampMicrosecond(None),
-            ScalarValue::TimestampNanosecond(None),
-            ScalarValue::Boolean(Some(true)),
-            ScalarValue::Boolean(Some(false)),
-            ScalarValue::Float32(Some(1.0)),
-            ScalarValue::Float32(Some(f32::MAX)),
-            ScalarValue::Float32(Some(f32::MIN)),
-            ScalarValue::Float32(Some(-2000.0)),
-            ScalarValue::Float64(Some(1.0)),
-            ScalarValue::Float64(Some(f64::MAX)),
-            ScalarValue::Float64(Some(f64::MIN)),
-            ScalarValue::Float64(Some(-2000.0)),
-            ScalarValue::Int8(Some(i8::MIN)),
-            ScalarValue::Int8(Some(i8::MAX)),
-            ScalarValue::Int8(Some(0)),
-            ScalarValue::Int8(Some(-15)),
-            ScalarValue::Int16(Some(i16::MIN)),
-            ScalarValue::Int16(Some(i16::MAX)),
-            ScalarValue::Int16(Some(0)),
-            ScalarValue::Int16(Some(-15)),
-            ScalarValue::Int32(Some(i32::MIN)),
-            ScalarValue::Int32(Some(i32::MAX)),
-            ScalarValue::Int32(Some(0)),
-            ScalarValue::Int32(Some(-15)),
-            ScalarValue::Int64(Some(i64::MIN)),
-            ScalarValue::Int64(Some(i64::MAX)),
-            ScalarValue::Int64(Some(0)),
-            ScalarValue::Int64(Some(-15)),
-            ScalarValue::UInt8(Some(u8::MAX)),
-            ScalarValue::UInt8(Some(0)),
-            ScalarValue::UInt16(Some(u16::MAX)),
-            ScalarValue::UInt16(Some(0)),
-            ScalarValue::UInt32(Some(u32::MAX)),
-            ScalarValue::UInt32(Some(0)),
-            ScalarValue::UInt64(Some(u64::MAX)),
-            ScalarValue::UInt64(Some(0)),
-            ScalarValue::Utf8(Some(String::from("Test string   "))),
-            ScalarValue::LargeUtf8(Some(String::from("Test Large utf8"))),
-            ScalarValue::Date32(Some(0)),
-            ScalarValue::Date32(Some(i32::MAX)),
-            ScalarValue::TimestampNanosecond(Some(0)),
-            ScalarValue::TimestampNanosecond(Some(i64::MAX)),
-            ScalarValue::TimestampMicrosecond(Some(0)),
-            ScalarValue::TimestampMicrosecond(Some(i64::MAX)),
-            ScalarValue::TimestampMicrosecond(None),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::Float32(Some(-213.1)),
-                    ScalarValue::Float32(None),
-                    ScalarValue::Float32(Some(5.5)),
-                    ScalarValue::Float32(Some(2.0)),
-                    ScalarValue::Float32(Some(1.0)),
-                ]),
-                DataType::List(new_box_field("level1", DataType::Float32, true)),
-            ),
-            ScalarValue::List(
-                Some(vec![
-                    ScalarValue::List(
-                        None,
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                    ScalarValue::List(
-                        Some(vec![
-                            ScalarValue::Float32(Some(-213.1)),
-                            ScalarValue::Float32(None),
-                            ScalarValue::Float32(Some(5.5)),
-                            ScalarValue::Float32(Some(2.0)),
-                            ScalarValue::Float32(Some(1.0)),
-                        ]),
-                        DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    ),
-                ]),
-                DataType::List(new_box_field(
-                    "level1",
-                    DataType::List(new_box_field("level2", DataType::Float32, true)),
-                    true,
-                )),
-            ),
-        ];
-
-        for test_case in should_pass.into_iter() {
-            let proto: protobuf::ScalarValue = (&test_case).try_into()?;
-            let _roundtrip: ScalarValue = (&proto).try_into()?;
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn round_trip_scalar_types() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use arrow::datatypes::{IntervalUnit, TimeUnit};
-        let should_pass: Vec<DataType> = vec![
-            DataType::Boolean,
-            DataType::Int8,
-            DataType::Int16,
-            DataType::Int32,
-            DataType::Int64,
-            DataType::UInt8,
-            DataType::UInt16,
-            DataType::UInt32,
-            DataType::UInt64,
-            DataType::Float32,
-            DataType::Float64,
-            DataType::Date32,
-            DataType::Time64(TimeUnit::Microsecond),
-            DataType::Time64(TimeUnit::Nanosecond),
-            DataType::Utf8,
-            DataType::LargeUtf8,
-            //Recursive list tests
-            DataType::List(new_box_field("Level1", DataType::Boolean, true)),
-            DataType::List(new_box_field(
-                "Level1",
-                DataType::List(new_box_field("Level2", DataType::Date32, true)),
-                true,
-            )),
-        ];
-
-        let should_fail: Vec<DataType> = vec![
-            DataType::Null,
-            DataType::Float16,
-            //Add more timestamp tests
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::Date64,
-            DataType::Time32(TimeUnit::Second),
-            DataType::Time32(TimeUnit::Millisecond),
-            DataType::Time32(TimeUnit::Microsecond),
-            DataType::Time32(TimeUnit::Nanosecond),
-            DataType::Time64(TimeUnit::Second),
-            DataType::Time64(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Second),
-            DataType::Duration(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Microsecond),
-            DataType::Duration(TimeUnit::Nanosecond),
-            DataType::Interval(IntervalUnit::YearMonth),
-            DataType::Interval(IntervalUnit::DayTime),
-            DataType::Binary,
-            DataType::FixedSizeBinary(0),
-            DataType::FixedSizeBinary(1234),
-            DataType::FixedSizeBinary(-432),
-            DataType::LargeBinary,
-            DataType::Decimal(1345, 5431),
-            //Recursive list tests
-            DataType::List(new_box_field("Level1", DataType::Binary, true)),
-            DataType::List(new_box_field(
-                "Level1",
-                DataType::List(new_box_field(
-                    "Level2",
-                    DataType::FixedSizeBinary(53),
-                    false,
-                )),
-                true,
-            )),
-            //Fixed size lists
-            DataType::FixedSizeList(new_box_field("Level1", DataType::Binary, true), 4),
-            DataType::FixedSizeList(
-                new_box_field(
-                    "Level1",
-                    DataType::List(new_box_field(
-                        "Level2",
-                        DataType::FixedSizeBinary(53),
-                        false,
-                    )),
-                    true,
-                ),
-                41,
-            ),
-            //Struct Testing
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Dictionary(
-                Box::new(DataType::Utf8),
-                Box::new(DataType::Struct(vec![
-                    Field::new("nullable", DataType::Boolean, false),
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("datatype", DataType::Binary, false),
-                ])),
-            ),
-            DataType::Dictionary(
-                Box::new(DataType::Decimal(10, 50)),
-                Box::new(DataType::FixedSizeList(
-                    new_box_field("Level1", DataType::Binary, true),
-                    4,
-                )),
-            ),
-        ];
-
-        for test_case in should_pass.into_iter() {
-            let proto: protobuf::ScalarType = (&test_case).try_into()?;
-            let roundtrip: DataType = (&proto).try_into()?;
-            assert_eq!(format!("{:?}", test_case), format!("{:?}", roundtrip));
-        }
-
-        let mut success: Vec<DataType> = Vec::new();
-        for test_case in should_fail.into_iter() {
-            let proto: Result<protobuf::ScalarType> = (&test_case).try_into();
-            if proto.is_ok() {
-                success.push(test_case)
-            }
-        }
-        if !success.is_empty() {
-            return Err(BallistaError::General(format!(
-                "The following items which should have ressulted in an error completed successfully: {:?}",
-                success
-            )));
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn round_trip_datatype() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use arrow::datatypes::{IntervalUnit, TimeUnit};
-        let test_cases: Vec<DataType> = vec![
-            DataType::Null,
-            DataType::Boolean,
-            DataType::Int8,
-            DataType::Int16,
-            DataType::Int32,
-            DataType::Int64,
-            DataType::UInt8,
-            DataType::UInt16,
-            DataType::UInt32,
-            DataType::UInt64,
-            DataType::Float16,
-            DataType::Float32,
-            DataType::Float64,
-            //Add more timestamp tests
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::Date32,
-            DataType::Date64,
-            DataType::Time32(TimeUnit::Second),
-            DataType::Time32(TimeUnit::Millisecond),
-            DataType::Time32(TimeUnit::Microsecond),
-            DataType::Time32(TimeUnit::Nanosecond),
-            DataType::Time64(TimeUnit::Second),
-            DataType::Time64(TimeUnit::Millisecond),
-            DataType::Time64(TimeUnit::Microsecond),
-            DataType::Time64(TimeUnit::Nanosecond),
-            DataType::Duration(TimeUnit::Second),
-            DataType::Duration(TimeUnit::Millisecond),
-            DataType::Duration(TimeUnit::Microsecond),
-            DataType::Duration(TimeUnit::Nanosecond),
-            DataType::Interval(IntervalUnit::YearMonth),
-            DataType::Interval(IntervalUnit::DayTime),
-            DataType::Binary,
-            DataType::FixedSizeBinary(0),
-            DataType::FixedSizeBinary(1234),
-            DataType::FixedSizeBinary(-432),
-            DataType::LargeBinary,
-            DataType::Utf8,
-            DataType::LargeUtf8,
-            DataType::Decimal(1345, 5431),
-            //Recursive list tests
-            DataType::List(new_box_field("Level1", DataType::Binary, true)),
-            DataType::List(new_box_field(
-                "Level1",
-                DataType::List(new_box_field(
-                    "Level2",
-                    DataType::FixedSizeBinary(53),
-                    false,
-                )),
-                true,
-            )),
-            //Fixed size lists
-            DataType::FixedSizeList(new_box_field("Level1", DataType::Binary, true), 4),
-            DataType::FixedSizeList(
-                new_box_field(
-                    "Level1",
-                    DataType::List(new_box_field(
-                        "Level2",
-                        DataType::FixedSizeBinary(53),
-                        false,
-                    )),
-                    true,
-                ),
-                41,
-            ),
-            //Struct Testing
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Struct(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-            ]),
-            DataType::Union(vec![
-                Field::new("nullable", DataType::Boolean, false),
-                Field::new("name", DataType::Utf8, false),
-                Field::new("datatype", DataType::Binary, false),
-                Field::new(
-                    "nested_struct",
-                    DataType::Struct(vec![
-                        Field::new("nullable", DataType::Boolean, false),
-                        Field::new("name", DataType::Utf8, false),
-                        Field::new("datatype", DataType::Binary, false),
-                    ]),
-                    true,
-                ),
-            ]),
-            DataType::Dictionary(
-                Box::new(DataType::Utf8),
-                Box::new(DataType::Struct(vec![
-                    Field::new("nullable", DataType::Boolean, false),
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("datatype", DataType::Binary, false),
-                ])),
-            ),
-            DataType::Dictionary(
-                Box::new(DataType::Decimal(10, 50)),
-                Box::new(DataType::FixedSizeList(
-                    new_box_field("Level1", DataType::Binary, true),
-                    4,
-                )),
-            ),
-        ];
-
-        for test_case in test_cases.into_iter() {
-            let proto: protobuf::ArrowType = (&test_case).into();
-            let roundtrip: DataType = (&proto).try_into()?;
-            assert_eq!(format!("{:?}", test_case), format!("{:?}", roundtrip));
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_null_scalar_values() -> Result<()> {
-        use arrow::datatypes::DataType;
-        use arrow::datatypes::Field;
-        use datafusion::scalar::ScalarValue;
-        let test_types = vec![
-            ScalarValue::Boolean(None),
-            ScalarValue::Float32(None),
-            ScalarValue::Float64(None),
-            ScalarValue::Int8(None),
-            ScalarValue::Int16(None),
-            ScalarValue::Int32(None),
-            ScalarValue::Int64(None),
-            ScalarValue::UInt8(None),
-            ScalarValue::UInt16(None),
-            ScalarValue::UInt32(None),
-            ScalarValue::UInt64(None),
-            ScalarValue::Utf8(None),
-            ScalarValue::LargeUtf8(None),
-            ScalarValue::Date32(None),
-            ScalarValue::TimestampMicrosecond(None),
-            ScalarValue::TimestampNanosecond(None),
-            //ScalarValue::List(None, DataType::Boolean)
-        ];
-
-        for test_case in test_types.into_iter() {
-            let proto_scalar: protobuf::ScalarValue = (&test_case).try_into()?;
-            let returned_scalar: datafusion::scalar::ScalarValue =
-                (&proto_scalar).try_into()?;
-            assert_eq!(
-                format!("{:?}", &test_case),
-                format!("{:?}", returned_scalar)
-            );
-        }
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_create_external_table() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        use datafusion::logical_plan::ToDFSchema;
-
-        let df_schema_ref = schema.to_dfschema_ref()?;
-
-        use datafusion::sql::parser::FileType;
-
-        let filetypes: [FileType; 3] =
-            [FileType::NdJson, FileType::Parquet, FileType::CSV];
-
-        for file in filetypes.iter() {
-            let create_table_node = LogicalPlan::CreateExternalTable {
-                schema: df_schema_ref.clone(),
-                name: String::from("TestName"),
-                location: String::from("employee.csv"),
-                file_type: *file,
-                has_header: true,
-            };
-
-            roundtrip_test!(create_table_node);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_explain() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let verbose_plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.sort(vec![col("salary")]))
-        .and_then(|plan| plan.explain(true))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.sort(vec![col("salary")]))
-        .and_then(|plan| plan.explain(false))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan);
-
-        roundtrip_test!(verbose_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_join() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let scan_plan = LogicalPlanBuilder::empty(false)
-            .build()
-            .map_err(BallistaError::DataFusionError)?;
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.join(&scan_plan, JoinType::Inner, &["id"], &["id"]))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan);
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_sort() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.sort(vec![col("salary")]))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-        roundtrip_test!(plan);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_empty_relation() -> Result<()> {
-        let plan_false = LogicalPlanBuilder::empty(false)
-            .build()
-            .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan_false);
-
-        let plan_true = LogicalPlanBuilder::empty(true)
-            .build()
-            .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan_true);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_logical_plan() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ]);
-
-        let plan = LogicalPlanBuilder::scan_csv(
-            "employee.csv",
-            CsvReadOptions::new().schema(&schema).has_header(true),
-            Some(vec![3, 4]),
-        )
-        .and_then(|plan| plan.aggregate(vec![col("state")], vec![max(col("salary"))]))
-        .and_then(|plan| plan.build())
-        .map_err(BallistaError::DataFusionError)?;
-
-        roundtrip_test!(plan);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_not() -> Result<()> {
-        let test_expr = Expr::Not(Box::new(Expr::Literal((1.0).into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_is_null() -> Result<()> {
-        let test_expr = Expr::IsNull(Box::new(Expr::Column("id".into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_is_not_null() -> Result<()> {
-        let test_expr = Expr::IsNotNull(Box::new(Expr::Column("id".into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_between() -> Result<()> {
-        let test_expr = Expr::Between {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            negated: true,
-            low: Box::new(Expr::Literal((2.0).into())),
-            high: Box::new(Expr::Literal((3.0).into())),
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_case() -> Result<()> {
-        let test_expr = Expr::Case {
-            expr: Some(Box::new(Expr::Literal((1.0).into()))),
-            when_then_expr: vec![(
-                Box::new(Expr::Literal((2.0).into())),
-                Box::new(Expr::Literal((3.0).into())),
-            )],
-            else_expr: Some(Box::new(Expr::Literal((4.0).into()))),
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_cast() -> Result<()> {
-        let test_expr = Expr::Cast {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            data_type: DataType::Boolean,
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_sort_expr() -> Result<()> {
-        let test_expr = Expr::Sort {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            asc: true,
-            nulls_first: true,
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_negative() -> Result<()> {
-        let test_expr = Expr::Negative(Box::new(Expr::Literal((1.0).into())));
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_inlist() -> Result<()> {
-        let test_expr = Expr::InList {
-            expr: Box::new(Expr::Literal((1.0).into())),
-            list: vec![Expr::Literal((2.0).into())],
-            negated: true,
-        };
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-
-    fn roundtrip_wildcard() -> Result<()> {
-        let test_expr = Expr::Wildcard;
-
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_sqrt() -> Result<()> {
-        let test_expr = Expr::ScalarFunction {
-            fun: Sqrt,
-            args: vec![col("col")],
-        };
-        roundtrip_test!(test_expr, protobuf::LogicalExprNode, Expr);
-
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
deleted file mode 100644
index a181f98b6eb..00000000000
--- a/rust/ballista/rust/core/src/serde/logical_plan/to_proto.rs
+++ /dev/null
@@ -1,1233 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serde code to convert Arrow schemas and DataFusion logical plans to Ballista protocol
-//! buffer format, allowing DataFusion logical plans to be serialized and transmitted between
-//! processes.
-
-use std::{
-    boxed,
-    convert::{TryFrom, TryInto},
-};
-
-use crate::datasource::DFTableAdapter;
-use crate::serde::{protobuf, BallistaError};
-
-use arrow::datatypes::{DataType, Schema};
-use datafusion::datasource::CsvFile;
-use datafusion::logical_plan::{Expr, JoinType, LogicalPlan};
-use datafusion::physical_plan::aggregates::AggregateFunction;
-use datafusion::{datasource::parquet::ParquetTable, logical_plan::exprlist_to_fields};
-use protobuf::{
-    arrow_type, logical_expr_node::ExprType, scalar_type, DateUnit, Field,
-    PrimitiveScalarType, ScalarListValue, ScalarType,
-};
-
-use super::super::proto_error;
-use datafusion::physical_plan::functions::BuiltinScalarFunction;
-
-impl protobuf::IntervalUnit {
-    pub fn from_arrow_interval_unit(
-        interval_unit: &arrow::datatypes::IntervalUnit,
-    ) -> Self {
-        match interval_unit {
-            arrow::datatypes::IntervalUnit::YearMonth => {
-                protobuf::IntervalUnit::YearMonth
-            }
-            arrow::datatypes::IntervalUnit::DayTime => protobuf::IntervalUnit::DayTime,
-        }
-    }
-
-    pub fn from_i32_to_arrow(
-        interval_unit_i32: i32,
-    ) -> Result<arrow::datatypes::IntervalUnit, BallistaError> {
-        let pb_interval_unit = protobuf::IntervalUnit::from_i32(interval_unit_i32);
-        use arrow::datatypes::IntervalUnit;
-        match pb_interval_unit {
-            Some(interval_unit) => Ok(match interval_unit {
-                protobuf::IntervalUnit::YearMonth => IntervalUnit::YearMonth,
-                protobuf::IntervalUnit::DayTime => IntervalUnit::DayTime,
-            }),
-            None => Err(proto_error(
-                "Error converting i32 to DateUnit: Passed invalid variant",
-            )),
-        }
-    }
-}
-/* Arrow changed dates to no longer have date unit
-
-impl protobuf::DateUnit {
-    pub fn from_arrow_date_unit(val: &arrow::datatypes::DateUnit) -> Self {
-        match val {
-            arrow::datatypes::DateUnit::Day => protobuf::DateUnit::Day,
-            arrow::datatypes::DateUnit::Millisecond => protobuf::DateUnit::DateMillisecond,
-        }
-    }
-    pub fn from_i32_to_arrow(date_unit_i32: i32) -> Result<arrow::datatypes::DateUnit, BallistaError> {
-        let pb_date_unit = protobuf::DateUnit::from_i32(date_unit_i32);
-        use arrow::datatypes::DateUnit;
-        match pb_date_unit {
-            Some(date_unit) => Ok(match date_unit {
-                protobuf::DateUnit::Day => DateUnit::Day,
-                protobuf::DateUnit::DateMillisecond => DateUnit::Millisecond,
-            }),
-            None => Err(proto_error("Error converting i32 to DateUnit: Passed invalid variant")),
-        }
-    }
-
-}*/
-
-impl protobuf::TimeUnit {
-    pub fn from_arrow_time_unit(val: &arrow::datatypes::TimeUnit) -> Self {
-        match val {
-            arrow::datatypes::TimeUnit::Second => protobuf::TimeUnit::Second,
-            arrow::datatypes::TimeUnit::Millisecond => {
-                protobuf::TimeUnit::TimeMillisecond
-            }
-            arrow::datatypes::TimeUnit::Microsecond => protobuf::TimeUnit::Microsecond,
-            arrow::datatypes::TimeUnit::Nanosecond => protobuf::TimeUnit::Nanosecond,
-        }
-    }
-    pub fn from_i32_to_arrow(
-        time_unit_i32: i32,
-    ) -> Result<arrow::datatypes::TimeUnit, BallistaError> {
-        let pb_time_unit = protobuf::TimeUnit::from_i32(time_unit_i32);
-        use arrow::datatypes::TimeUnit;
-        match pb_time_unit {
-            Some(time_unit) => Ok(match time_unit {
-                protobuf::TimeUnit::Second => TimeUnit::Second,
-                protobuf::TimeUnit::TimeMillisecond => TimeUnit::Millisecond,
-                protobuf::TimeUnit::Microsecond => TimeUnit::Microsecond,
-                protobuf::TimeUnit::Nanosecond => TimeUnit::Nanosecond,
-            }),
-            None => Err(proto_error(
-                "Error converting i32 to TimeUnit: Passed invalid variant",
-            )),
-        }
-    }
-}
-
-impl From<&arrow::datatypes::Field> for protobuf::Field {
-    fn from(field: &arrow::datatypes::Field) -> Self {
-        protobuf::Field {
-            name: field.name().to_owned(),
-            arrow_type: Some(Box::new(field.data_type().into())),
-            nullable: field.is_nullable(),
-            children: Vec::new(),
-        }
-    }
-}
-
-impl From<&arrow::datatypes::DataType> for protobuf::ArrowType {
-    fn from(val: &arrow::datatypes::DataType) -> protobuf::ArrowType {
-        protobuf::ArrowType {
-            arrow_type_enum: Some(val.into()),
-        }
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &protobuf::ArrowType {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        let pb_arrow_type = self.arrow_type_enum.as_ref().ok_or_else(|| {
-            proto_error(
-                "Protobuf deserialization error: ArrowType missing required field 'data_type'",
-            )
-        })?;
-        use arrow::datatypes::DataType;
-        Ok(match pb_arrow_type {
-            protobuf::arrow_type::ArrowTypeEnum::None(_) => DataType::Null,
-            protobuf::arrow_type::ArrowTypeEnum::Bool(_) => DataType::Boolean,
-            protobuf::arrow_type::ArrowTypeEnum::Uint8(_) => DataType::UInt8,
-            protobuf::arrow_type::ArrowTypeEnum::Int8(_) => DataType::Int8,
-            protobuf::arrow_type::ArrowTypeEnum::Uint16(_) => DataType::UInt16,
-            protobuf::arrow_type::ArrowTypeEnum::Int16(_) => DataType::Int16,
-            protobuf::arrow_type::ArrowTypeEnum::Uint32(_) => DataType::UInt32,
-            protobuf::arrow_type::ArrowTypeEnum::Int32(_) => DataType::Int32,
-            protobuf::arrow_type::ArrowTypeEnum::Uint64(_) => DataType::UInt64,
-            protobuf::arrow_type::ArrowTypeEnum::Int64(_) => DataType::Int64,
-            protobuf::arrow_type::ArrowTypeEnum::Float16(_) => DataType::Float16,
-            protobuf::arrow_type::ArrowTypeEnum::Float32(_) => DataType::Float32,
-            protobuf::arrow_type::ArrowTypeEnum::Float64(_) => DataType::Float64,
-            protobuf::arrow_type::ArrowTypeEnum::Utf8(_) => DataType::Utf8,
-            protobuf::arrow_type::ArrowTypeEnum::LargeUtf8(_) => DataType::LargeUtf8,
-            protobuf::arrow_type::ArrowTypeEnum::Binary(_) => DataType::Binary,
-            protobuf::arrow_type::ArrowTypeEnum::FixedSizeBinary(size) => {
-                DataType::FixedSizeBinary(*size)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::LargeBinary(_) => DataType::LargeBinary,
-            protobuf::arrow_type::ArrowTypeEnum::Date32(_) => DataType::Date32,
-            protobuf::arrow_type::ArrowTypeEnum::Date64(_) => DataType::Date64,
-            protobuf::arrow_type::ArrowTypeEnum::Duration(time_unit_i32) => {
-                DataType::Duration(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Timestamp(timestamp) => {
-                DataType::Timestamp(
-                    protobuf::TimeUnit::from_i32_to_arrow(timestamp.time_unit)?,
-                    match timestamp.timezone.is_empty() {
-                        true => None,
-                        false => Some(timestamp.timezone.to_owned()),
-                    },
-                )
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Time32(time_unit_i32) => {
-                DataType::Time32(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Time64(time_unit_i32) => {
-                DataType::Time64(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Interval(interval_unit_i32) => {
-                DataType::Interval(protobuf::IntervalUnit::from_i32_to_arrow(
-                    *interval_unit_i32,
-                )?)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Decimal(protobuf::Decimal {
-                whole,
-                fractional,
-            }) => DataType::Decimal(*whole as usize, *fractional as usize),
-            protobuf::arrow_type::ArrowTypeEnum::List(boxed_list) => {
-                let field_ref = boxed_list
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
-                    .as_ref();
-                arrow::datatypes::DataType::List(Box::new(field_ref.try_into()?))
-            }
-            protobuf::arrow_type::ArrowTypeEnum::LargeList(boxed_list) => {
-                let field_ref = boxed_list
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
-                    .as_ref();
-                arrow::datatypes::DataType::LargeList(Box::new(field_ref.try_into()?))
-            }
-            protobuf::arrow_type::ArrowTypeEnum::FixedSizeList(boxed_list) => {
-                let fsl_ref = boxed_list.as_ref();
-                let pb_fieldtype = fsl_ref
-                    .field_type
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: FixedSizeList message was missing required field 'field_type'"))?;
-                arrow::datatypes::DataType::FixedSizeList(
-                    Box::new(pb_fieldtype.as_ref().try_into()?),
-                    fsl_ref.list_size,
-                )
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Struct(struct_type) => {
-                let fields = struct_type
-                    .sub_field_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                arrow::datatypes::DataType::Struct(fields)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Union(union) => {
-                let union_types = union
-                    .union_types
-                    .iter()
-                    .map(|field| field.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-                arrow::datatypes::DataType::Union(union_types)
-            }
-            protobuf::arrow_type::ArrowTypeEnum::Dictionary(boxed_dict) => {
-                let dict_ref = boxed_dict.as_ref();
-                let pb_key = dict_ref
-                    .key
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'key'"))?;
-                let pb_value = dict_ref
-                    .value
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'value'"))?;
-                arrow::datatypes::DataType::Dictionary(
-                    Box::new(pb_key.as_ref().try_into()?),
-                    Box::new(pb_value.as_ref().try_into()?),
-                )
-            }
-        })
-    }
-}
-
-impl TryInto<arrow::datatypes::DataType> for &Box<protobuf::List> {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<arrow::datatypes::DataType, Self::Error> {
-        let list_ref = self.as_ref();
-        match &list_ref.field_type {
-            Some(pb_field) => {
-                let pb_field_ref = pb_field.as_ref();
-                let arrow_field: arrow::datatypes::Field = pb_field_ref.try_into()?;
-                Ok(arrow::datatypes::DataType::List(Box::new(arrow_field)))
-            }
-            None => Err(proto_error(
-                "List message missing required field 'field_type'",
-            )),
-        }
-    }
-}
-
-impl From<&arrow::datatypes::DataType> for protobuf::arrow_type::ArrowTypeEnum {
-    fn from(val: &arrow::datatypes::DataType) -> protobuf::arrow_type::ArrowTypeEnum {
-        use protobuf::arrow_type::ArrowTypeEnum;
-        use protobuf::ArrowType;
-        use protobuf::EmptyMessage;
-        match val {
-            DataType::Null => ArrowTypeEnum::None(EmptyMessage {}),
-            DataType::Boolean => ArrowTypeEnum::Bool(EmptyMessage {}),
-            DataType::Int8 => ArrowTypeEnum::Int8(EmptyMessage {}),
-            DataType::Int16 => ArrowTypeEnum::Int16(EmptyMessage {}),
-            DataType::Int32 => ArrowTypeEnum::Int32(EmptyMessage {}),
-            DataType::Int64 => ArrowTypeEnum::Int64(EmptyMessage {}),
-            DataType::UInt8 => ArrowTypeEnum::Uint8(EmptyMessage {}),
-            DataType::UInt16 => ArrowTypeEnum::Uint16(EmptyMessage {}),
-            DataType::UInt32 => ArrowTypeEnum::Uint32(EmptyMessage {}),
-            DataType::UInt64 => ArrowTypeEnum::Uint64(EmptyMessage {}),
-            DataType::Float16 => ArrowTypeEnum::Float16(EmptyMessage {}),
-            DataType::Float32 => ArrowTypeEnum::Float32(EmptyMessage {}),
-            DataType::Float64 => ArrowTypeEnum::Float64(EmptyMessage {}),
-            DataType::Timestamp(time_unit, timezone) => {
-                ArrowTypeEnum::Timestamp(protobuf::Timestamp {
-                    time_unit: protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-                    timezone: timezone.to_owned().unwrap_or_else(String::new),
-                })
-            }
-            DataType::Date32 => ArrowTypeEnum::Date32(EmptyMessage {}),
-            DataType::Date64 => ArrowTypeEnum::Date64(EmptyMessage {}),
-            DataType::Time32(time_unit) => ArrowTypeEnum::Time32(
-                protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-            ),
-            DataType::Time64(time_unit) => ArrowTypeEnum::Time64(
-                protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-            ),
-            DataType::Duration(time_unit) => ArrowTypeEnum::Duration(
-                protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-            ),
-            DataType::Interval(interval_unit) => ArrowTypeEnum::Interval(
-                protobuf::IntervalUnit::from_arrow_interval_unit(interval_unit) as i32,
-            ),
-            DataType::Binary => ArrowTypeEnum::Binary(EmptyMessage {}),
-            DataType::FixedSizeBinary(size) => ArrowTypeEnum::FixedSizeBinary(*size),
-            DataType::LargeBinary => ArrowTypeEnum::LargeBinary(EmptyMessage {}),
-            DataType::Utf8 => ArrowTypeEnum::Utf8(EmptyMessage {}),
-            DataType::LargeUtf8 => ArrowTypeEnum::LargeUtf8(EmptyMessage {}),
-            DataType::List(item_type) => ArrowTypeEnum::List(Box::new(protobuf::List {
-                field_type: Some(Box::new(item_type.as_ref().into())),
-            })),
-            DataType::FixedSizeList(item_type, size) => {
-                ArrowTypeEnum::FixedSizeList(Box::new(protobuf::FixedSizeList {
-                    field_type: Some(Box::new(item_type.as_ref().into())),
-                    list_size: *size,
-                }))
-            }
-            DataType::LargeList(item_type) => {
-                ArrowTypeEnum::LargeList(Box::new(protobuf::List {
-                    field_type: Some(Box::new(item_type.as_ref().into())),
-                }))
-            }
-            DataType::Struct(struct_fields) => ArrowTypeEnum::Struct(protobuf::Struct {
-                sub_field_types: struct_fields
-                    .iter()
-                    .map(|field| field.into())
-                    .collect::<Vec<_>>(),
-            }),
-            DataType::Union(union_types) => ArrowTypeEnum::Union(protobuf::Union {
-                union_types: union_types
-                    .iter()
-                    .map(|field| field.into())
-                    .collect::<Vec<_>>(),
-            }),
-            DataType::Dictionary(key_type, value_type) => {
-                ArrowTypeEnum::Dictionary(Box::new(protobuf::Dictionary {
-                    key: Some(Box::new(key_type.as_ref().into())),
-                    value: Some(Box::new(value_type.as_ref().into())),
-                }))
-            }
-            DataType::Decimal(whole, fractional) => {
-                ArrowTypeEnum::Decimal(protobuf::Decimal {
-                    whole: *whole as u64,
-                    fractional: *fractional as u64,
-                })
-            }
-        }
-    }
-}
-
-//Does not check if list subtypes are valid
-fn is_valid_scalar_type_no_list_check(datatype: &arrow::datatypes::DataType) -> bool {
-    match datatype {
-        DataType::Boolean
-        | DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::Int64
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64
-        | DataType::Float32
-        | DataType::Float64
-        | DataType::LargeUtf8
-        | DataType::Utf8
-        | DataType::Date32 => true,
-        DataType::Time64(time_unit) => matches!(
-            time_unit,
-            arrow::datatypes::TimeUnit::Microsecond
-                | arrow::datatypes::TimeUnit::Nanosecond
-        ),
-
-        DataType::List(_) => true,
-        _ => false,
-    }
-}
-
-impl TryFrom<&arrow::datatypes::DataType> for protobuf::scalar_type::Datatype {
-    type Error = BallistaError;
-    fn try_from(val: &arrow::datatypes::DataType) -> Result<Self, Self::Error> {
-        use protobuf::scalar_type;
-        use protobuf::Field;
-        use protobuf::{List, PrimitiveScalarType};
-        let scalar_value = match val {
-            DataType::Boolean => scalar_type::Datatype::Scalar(PrimitiveScalarType::Bool as i32),
-            DataType::Int8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int8 as i32),
-            DataType::Int16 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int16 as i32),
-            DataType::Int32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int32 as i32),
-            DataType::Int64 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Int64 as i32),
-            DataType::UInt8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint8 as i32),
-            DataType::UInt16 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint16 as i32),
-            DataType::UInt32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint32 as i32),
-            DataType::UInt64 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Uint64 as i32),
-            DataType::Float32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Float32 as i32),
-            DataType::Float64 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Float64 as i32),
-            DataType::Date32 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Date32 as i32),
-            DataType::Time64(time_unit) => match time_unit {
-                arrow::datatypes::TimeUnit::Microsecond => scalar_type::Datatype::Scalar(PrimitiveScalarType::TimeMicrosecond as i32),
-                arrow::datatypes::TimeUnit::Nanosecond => scalar_type::Datatype::Scalar(PrimitiveScalarType::TimeNanosecond as i32),
-                _ => {
-                    return Err(proto_error(format!(
-                        "Found invalid time unit for scalar value, only TimeUnit::Microsecond and TimeUnit::Nanosecond are valid time units: {:?}",
-                        time_unit
-                    )))
-                }
-            },
-            DataType::Utf8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::Utf8 as i32),
-            DataType::LargeUtf8 => scalar_type::Datatype::Scalar(PrimitiveScalarType::LargeUtf8 as i32),
-            DataType::List(field_type) => {
-                let mut field_names: Vec<String> = Vec::new();
-                let mut curr_field: &arrow::datatypes::Field = field_type.as_ref();
-                field_names.push(curr_field.name().to_owned());
-                //For each nested field check nested datatype, since datafusion scalars only support recursive lists with a leaf scalar type
-                // any other compound types are errors.
-
-                while let DataType::List(nested_field_type) = curr_field.data_type() {
-                    curr_field = nested_field_type.as_ref();
-                    field_names.push(curr_field.name().to_owned());
-                    if !is_valid_scalar_type_no_list_check(curr_field.data_type()) {
-                        return Err(proto_error(format!("{:?} is an invalid scalar type", curr_field)));
-                    }
-                }
-                let deepest_datatype = curr_field.data_type();
-                if !is_valid_scalar_type_no_list_check(deepest_datatype) {
-                    return Err(proto_error(format!("The list nested type {:?} is an invalid scalar type", curr_field)));
-                }
-                let pb_deepest_type: PrimitiveScalarType = match deepest_datatype {
-                    DataType::Boolean => PrimitiveScalarType::Bool,
-                    DataType::Int8 => PrimitiveScalarType::Int8,
-                    DataType::Int16 => PrimitiveScalarType::Int16,
-                    DataType::Int32 => PrimitiveScalarType::Int32,
-                    DataType::Int64 => PrimitiveScalarType::Int64,
-                    DataType::UInt8 => PrimitiveScalarType::Uint8,
-                    DataType::UInt16 => PrimitiveScalarType::Uint16,
-                    DataType::UInt32 => PrimitiveScalarType::Uint32,
-                    DataType::UInt64 => PrimitiveScalarType::Uint64,
-                    DataType::Float32 => PrimitiveScalarType::Float32,
-                    DataType::Float64 => PrimitiveScalarType::Float64,
-                    DataType::Date32 => PrimitiveScalarType::Date32,
-                    DataType::Time64(time_unit) => match time_unit {
-                        arrow::datatypes::TimeUnit::Microsecond => PrimitiveScalarType::TimeMicrosecond,
-                        arrow::datatypes::TimeUnit::Nanosecond => PrimitiveScalarType::TimeNanosecond,
-                        _ => {
-                            return Err(proto_error(format!(
-                                "Found invalid time unit for scalar value, only TimeUnit::Microsecond and TimeUnit::Nanosecond are valid time units: {:?}",
-                                time_unit
-                            )))
-                        }
-                    },
-
-                    DataType::Utf8 => PrimitiveScalarType::Utf8,
-                    DataType::LargeUtf8 => PrimitiveScalarType::LargeUtf8,
-                    _ => {
-                        return Err(proto_error(format!(
-                            "Error converting to Datatype to scalar type, {:?} is invalid as a datafusion scalar.",
-                            val
-                        )))
-                    }
-                };
-                protobuf::scalar_type::Datatype::List(protobuf::ScalarListType {
-                    field_names,
-                    deepest_type: pb_deepest_type as i32,
-                })
-            }
-            DataType::Null
-            | DataType::Float16
-            | DataType::Timestamp(_, _)
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Duration(_)
-            | DataType::Interval(_)
-            | DataType::Binary
-            | DataType::FixedSizeBinary(_)
-            | DataType::LargeBinary
-            | DataType::FixedSizeList(_, _)
-            | DataType::LargeList(_)
-            | DataType::Struct(_)
-            | DataType::Union(_)
-            | DataType::Dictionary(_, _)
-            | DataType::Decimal(_, _) => {
-                return Err(proto_error(format!(
-                    "Error converting to Datatype to scalar type, {:?} is invalid as a datafusion scalar.",
-                    val
-                )))
-            }
-        };
-        Ok(scalar_value)
-    }
-}
-
-impl TryFrom<&datafusion::scalar::ScalarValue> for protobuf::ScalarValue {
-    type Error = BallistaError;
-    fn try_from(
-        val: &datafusion::scalar::ScalarValue,
-    ) -> Result<protobuf::ScalarValue, Self::Error> {
-        use datafusion::scalar;
-        use protobuf::scalar_value::Value;
-        use protobuf::PrimitiveScalarType;
-        let scalar_val = match val {
-            scalar::ScalarValue::Boolean(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Bool, |s| Value::BoolValue(*s))
-            }
-            scalar::ScalarValue::Float32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Float32, |s| {
-                    Value::Float32Value(*s)
-                })
-            }
-            scalar::ScalarValue::Float64(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Float64, |s| {
-                    Value::Float64Value(*s)
-                })
-            }
-            scalar::ScalarValue::Int8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int8, |s| {
-                    Value::Int8Value(*s as i32)
-                })
-            }
-            scalar::ScalarValue::Int16(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int16, |s| {
-                    Value::Int16Value(*s as i32)
-                })
-            }
-            scalar::ScalarValue::Int32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int32, |s| Value::Int32Value(*s))
-            }
-            scalar::ScalarValue::Int64(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Int64, |s| Value::Int64Value(*s))
-            }
-            scalar::ScalarValue::UInt8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint8, |s| {
-                    Value::Uint8Value(*s as u32)
-                })
-            }
-            scalar::ScalarValue::UInt16(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint16, |s| {
-                    Value::Uint16Value(*s as u32)
-                })
-            }
-            scalar::ScalarValue::UInt32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint32, |s| Value::Uint32Value(*s))
-            }
-            scalar::ScalarValue::UInt64(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Uint64, |s| Value::Uint64Value(*s))
-            }
-            scalar::ScalarValue::Utf8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Utf8, |s| {
-                    Value::Utf8Value(s.to_owned())
-                })
-            }
-            scalar::ScalarValue::LargeUtf8(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::LargeUtf8, |s| {
-                    Value::LargeUtf8Value(s.to_owned())
-                })
-            }
-            scalar::ScalarValue::List(value, datatype) => {
-                println!("Current datatype of list: {:?}", datatype);
-                match value {
-                    Some(values) => {
-                        if values.is_empty() {
-                            protobuf::ScalarValue {
-                                value: Some(protobuf::scalar_value::Value::ListValue(
-                                    protobuf::ScalarListValue {
-                                        datatype: Some(datatype.try_into()?),
-                                        values: Vec::new(),
-                                    },
-                                )),
-                            }
-                        } else {
-                            let scalar_type = match datatype {
-                                DataType::List(field) => field.as_ref().data_type(),
-                                _ => todo!("Proper error handling"),
-                            };
-                            println!("Current scalar type for list: {:?}", scalar_type);
-                            let type_checked_values: Vec<protobuf::ScalarValue> = values
-                                .iter()
-                                .map(|scalar| match (scalar, scalar_type) {
-                                    (scalar::ScalarValue::List(_, arrow::datatypes::DataType::List(list_field)), arrow::datatypes::DataType::List(field)) => {
-                                        let scalar_datatype = field.data_type();
-                                        let list_datatype = list_field.data_type();
-                                        if std::mem::discriminant(list_datatype) != std::mem::discriminant(scalar_datatype) {
-                                            return Err(proto_error(format!(
-                                                "Protobuf serialization error: Lists with inconsistent typing {:?} and {:?} found within list",
-                                                list_datatype, scalar_datatype
-                                            )));
-                                        }
-                                        scalar.try_into()
-                                    }
-                                    (scalar::ScalarValue::Boolean(_), arrow::datatypes::DataType::Boolean) => scalar.try_into(),
-                                    (scalar::ScalarValue::Float32(_), arrow::datatypes::DataType::Float32) => scalar.try_into(),
-                                    (scalar::ScalarValue::Float64(_), arrow::datatypes::DataType::Float64) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int8(_), arrow::datatypes::DataType::Int8) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int16(_), arrow::datatypes::DataType::Int16) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int32(_), arrow::datatypes::DataType::Int32) => scalar.try_into(),
-                                    (scalar::ScalarValue::Int64(_), arrow::datatypes::DataType::Int64) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt8(_), arrow::datatypes::DataType::UInt8) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt16(_), arrow::datatypes::DataType::UInt16) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt32(_), arrow::datatypes::DataType::UInt32) => scalar.try_into(),
-                                    (scalar::ScalarValue::UInt64(_), arrow::datatypes::DataType::UInt64) => scalar.try_into(),
-                                    (scalar::ScalarValue::Utf8(_), arrow::datatypes::DataType::Utf8) => scalar.try_into(),
-                                    (scalar::ScalarValue::LargeUtf8(_), arrow::datatypes::DataType::LargeUtf8) => scalar.try_into(),
-                                    _ => Err(proto_error(format!(
-                                        "Protobuf serialization error, {:?} was inconsistent with designated type {:?}",
-                                        scalar, datatype
-                                    ))),
-                                })
-                                .collect::<Result<Vec<_>, _>>()?;
-                            protobuf::ScalarValue {
-                                value: Some(protobuf::scalar_value::Value::ListValue(
-                                    protobuf::ScalarListValue {
-                                        datatype: Some(datatype.try_into()?),
-                                        values: type_checked_values,
-                                    },
-                                )),
-                            }
-                        }
-                    }
-                    None => protobuf::ScalarValue {
-                        value: Some(protobuf::scalar_value::Value::NullListValue(
-                            datatype.try_into()?,
-                        )),
-                    },
-                }
-            }
-            datafusion::scalar::ScalarValue::Date32(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::Date32, |s| Value::Date32Value(*s))
-            }
-            datafusion::scalar::ScalarValue::TimestampMicrosecond(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::TimeMicrosecond, |s| {
-                    Value::TimeMicrosecondValue(*s)
-                })
-            }
-            datafusion::scalar::ScalarValue::TimestampNanosecond(val) => {
-                create_proto_scalar(val, PrimitiveScalarType::TimeNanosecond, |s| {
-                    Value::TimeNanosecondValue(*s)
-                })
-            }
-            _ => {
-                return Err(proto_error(format!(
-                    "Error converting to Datatype to scalar type, {:?} is invalid as a datafusion scalar.",
-                    val
-                )))
-            }
-        };
-        Ok(scalar_val)
-    }
-}
-
-impl TryInto<protobuf::LogicalPlanNode> for &LogicalPlan {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::LogicalPlanNode, Self::Error> {
-        use protobuf::logical_plan_node::LogicalPlanType;
-        match self {
-            LogicalPlan::TableScan {
-                table_name,
-                source,
-                filters,
-                projection,
-                ..
-            } => {
-                let schema = source.schema();
-
-                // unwrap the DFTableAdapter to get to the real TableProvider
-                let source = if let Some(adapter) =
-                    source.as_any().downcast_ref::<DFTableAdapter>()
-                {
-                    match &adapter.logical_plan {
-                        LogicalPlan::TableScan { source, .. } => Ok(source.as_any()),
-                        _ => Err(BallistaError::General(
-                            "Invalid LogicalPlan::TableScan".to_owned(),
-                        )),
-                    }
-                } else {
-                    Ok(source.as_any())
-                }?;
-
-                let projection = match projection {
-                    None => None,
-                    Some(columns) => {
-                        let column_names = columns
-                            .iter()
-                            .map(|i| schema.field(*i).name().to_owned())
-                            .collect();
-                        Some(protobuf::ProjectionColumns {
-                            columns: column_names,
-                        })
-                    }
-                };
-                let schema: protobuf::Schema = schema.as_ref().into();
-
-                let filters: Vec<protobuf::LogicalExprNode> = filters
-                    .iter()
-                    .map(|filter| filter.try_into())
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                if let Some(parquet) = source.downcast_ref::<ParquetTable>() {
-                    Ok(protobuf::LogicalPlanNode {
-                        logical_plan_type: Some(LogicalPlanType::ParquetScan(
-                            protobuf::ParquetTableScanNode {
-                                table_name: table_name.to_owned(),
-                                path: parquet.path().to_owned(),
-                                projection,
-                                schema: Some(schema),
-                                filters,
-                            },
-                        )),
-                    })
-                } else if let Some(csv) = source.downcast_ref::<CsvFile>() {
-                    let delimiter = [csv.delimiter()];
-                    let delimiter = std::str::from_utf8(&delimiter).map_err(|_| {
-                        BallistaError::General("Invalid CSV delimiter".to_owned())
-                    })?;
-                    Ok(protobuf::LogicalPlanNode {
-                        logical_plan_type: Some(LogicalPlanType::CsvScan(
-                            protobuf::CsvTableScanNode {
-                                table_name: table_name.to_owned(),
-                                path: csv.path().to_owned(),
-                                projection,
-                                schema: Some(schema),
-                                has_header: csv.has_header(),
-                                delimiter: delimiter.to_string(),
-                                file_extension: csv.file_extension().to_string(),
-                                filters,
-                            },
-                        )),
-                    })
-                } else {
-                    Err(BallistaError::General(format!(
-                        "logical plan to_proto unsupported table provider {:?}",
-                        source
-                    )))
-                }
-            }
-            LogicalPlan::Projection { expr, input, .. } => {
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Projection(Box::new(
-                        protobuf::ProjectionNode {
-                            input: Some(Box::new(input.as_ref().try_into()?)),
-                            expr: expr
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Filter { predicate, input } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Selection(Box::new(
-                        protobuf::SelectionNode {
-                            input: Some(Box::new(input)),
-                            expr: Some(predicate.try_into()?),
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Aggregate {
-                input,
-                group_expr,
-                aggr_expr,
-                ..
-            } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Aggregate(Box::new(
-                        protobuf::AggregateNode {
-                            input: Some(Box::new(input)),
-                            group_expr: group_expr
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                            aggr_expr: aggr_expr
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Join {
-                left,
-                right,
-                on,
-                join_type,
-                ..
-            } => {
-                let left: protobuf::LogicalPlanNode = left.as_ref().try_into()?;
-                let right: protobuf::LogicalPlanNode = right.as_ref().try_into()?;
-                let join_type = match join_type {
-                    JoinType::Inner => protobuf::JoinType::Inner,
-                    JoinType::Left => protobuf::JoinType::Left,
-                    JoinType::Right => protobuf::JoinType::Right,
-                };
-                let left_join_column = on.iter().map(|on| on.0.to_owned()).collect();
-                let right_join_column = on.iter().map(|on| on.1.to_owned()).collect();
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Join(Box::new(
-                        protobuf::JoinNode {
-                            left: Some(Box::new(left)),
-                            right: Some(Box::new(right)),
-                            join_type: join_type.into(),
-                            left_join_column,
-                            right_join_column,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Limit { input, n } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Limit(Box::new(
-                        protobuf::LimitNode {
-                            input: Some(Box::new(input)),
-                            limit: *n as u32,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Sort { input, expr } => {
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-                let selection_expr: Vec<protobuf::LogicalExprNode> = expr
-                    .iter()
-                    .map(|expr| expr.try_into())
-                    .collect::<Result<Vec<_>, BallistaError>>()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Sort(Box::new(
-                        protobuf::SortNode {
-                            input: Some(Box::new(input)),
-                            expr: selection_expr,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Repartition {
-                input,
-                partitioning_scheme,
-            } => {
-                use datafusion::logical_plan::Partitioning;
-                let input: protobuf::LogicalPlanNode = input.as_ref().try_into()?;
-
-                //Assumed common usize field was batch size
-                //Used u64 to avoid any nastyness involving large values, most data clusters are probably uniformly 64 bits any ways
-                use protobuf::repartition_node::PartitionMethod;
-
-                let pb_partition_method = match partitioning_scheme {
-                    Partitioning::Hash(exprs, partition_count) => {
-                        PartitionMethod::Hash(protobuf::HashRepartition {
-                            hash_expr: exprs
-                                .iter()
-                                .map(|expr| expr.try_into())
-                                .collect::<Result<Vec<_>, BallistaError>>()?,
-                            partition_count: *partition_count as u64,
-                        })
-                    }
-                    Partitioning::RoundRobinBatch(batch_size) => {
-                        PartitionMethod::RoundRobin(*batch_size as u64)
-                    }
-                };
-
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Repartition(Box::new(
-                        protobuf::RepartitionNode {
-                            input: Some(Box::new(input)),
-                            partition_method: Some(pb_partition_method),
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::EmptyRelation {
-                produce_one_row, ..
-            } => Ok(protobuf::LogicalPlanNode {
-                logical_plan_type: Some(LogicalPlanType::EmptyRelation(
-                    protobuf::EmptyRelationNode {
-                        produce_one_row: *produce_one_row,
-                    },
-                )),
-            }),
-            LogicalPlan::CreateExternalTable {
-                name,
-                location,
-                file_type,
-                has_header,
-                schema: df_schema,
-            } => {
-                use datafusion::sql::parser::FileType;
-                let schema: Schema = df_schema.as_ref().clone().into();
-                let pb_schema: protobuf::Schema = (&schema).try_into().map_err(|e| {
-                    BallistaError::General(format!(
-                        "Could not convert schema into protobuf: {:?}",
-                        e
-                    ))
-                })?;
-
-                let pb_file_type: protobuf::FileType = match file_type {
-                    FileType::NdJson => protobuf::FileType::NdJson,
-                    FileType::Parquet => protobuf::FileType::Parquet,
-                    FileType::CSV => protobuf::FileType::Csv,
-                };
-
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::CreateExternalTable(
-                        protobuf::CreateExternalTableNode {
-                            name: name.clone(),
-                            location: location.clone(),
-                            file_type: pb_file_type as i32,
-                            has_header: *has_header,
-                            schema: Some(pb_schema),
-                        },
-                    )),
-                })
-            }
-            LogicalPlan::Explain { verbose, plan, .. } => {
-                let input: protobuf::LogicalPlanNode = plan.as_ref().try_into()?;
-                Ok(protobuf::LogicalPlanNode {
-                    logical_plan_type: Some(LogicalPlanType::Explain(Box::new(
-                        protobuf::ExplainNode {
-                            input: Some(Box::new(input)),
-                            verbose: *verbose,
-                        },
-                    ))),
-                })
-            }
-            LogicalPlan::Extension { .. } => unimplemented!(),
-            LogicalPlan::Union { .. } => unimplemented!(),
-        }
-    }
-}
-
-fn create_proto_scalar<I, T: FnOnce(&I) -> protobuf::scalar_value::Value>(
-    v: &Option<I>,
-    null_arrow_type: protobuf::PrimitiveScalarType,
-    constructor: T,
-) -> protobuf::ScalarValue {
-    protobuf::ScalarValue {
-        value: Some(v.as_ref().map(constructor).unwrap_or(
-            protobuf::scalar_value::Value::NullValue(null_arrow_type as i32),
-        )),
-    }
-}
-
-impl TryInto<protobuf::LogicalExprNode> for &Expr {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::LogicalExprNode, Self::Error> {
-        use datafusion::scalar::ScalarValue;
-        use protobuf::scalar_value::Value;
-        match self {
-            Expr::Column(name) => {
-                let expr = protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::ColumnName(name.clone())),
-                };
-                Ok(expr)
-            }
-            Expr::Alias(expr, alias) => {
-                let alias = Box::new(protobuf::AliasNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    alias: alias.to_owned(),
-                });
-                let expr = protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Alias(alias)),
-                };
-                Ok(expr)
-            }
-            Expr::Literal(value) => {
-                let pb_value: protobuf::ScalarValue = value.try_into()?;
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Literal(pb_value)),
-                })
-            }
-            Expr::BinaryExpr { left, op, right } => {
-                let binary_expr = Box::new(protobuf::BinaryExprNode {
-                    l: Some(Box::new(left.as_ref().try_into()?)),
-                    r: Some(Box::new(right.as_ref().try_into()?)),
-                    op: format!("{:?}", op),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::BinaryExpr(binary_expr)),
-                })
-            }
-            Expr::AggregateFunction {
-                ref fun, ref args, ..
-            } => {
-                let aggr_function = match fun {
-                    AggregateFunction::Min => protobuf::AggregateFunction::Min,
-                    AggregateFunction::Max => protobuf::AggregateFunction::Max,
-                    AggregateFunction::Sum => protobuf::AggregateFunction::Sum,
-                    AggregateFunction::Avg => protobuf::AggregateFunction::Avg,
-                    AggregateFunction::Count => protobuf::AggregateFunction::Count,
-                };
-
-                let arg = &args[0];
-                let aggregate_expr = Box::new(protobuf::AggregateExprNode {
-                    aggr_function: aggr_function.into(),
-                    expr: Some(Box::new(arg.try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::AggregateExpr(aggregate_expr)),
-                })
-            }
-            Expr::ScalarVariable(_) => unimplemented!(),
-            Expr::ScalarFunction { ref fun, ref args } => {
-                let fun: protobuf::ScalarFunction = fun.try_into()?;
-                let expr: Vec<protobuf::LogicalExprNode> = args
-                    .iter()
-                    .map(|e| Ok(e.try_into()?))
-                    .collect::<Result<Vec<protobuf::LogicalExprNode>, BallistaError>>()?;
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(
-                        protobuf::logical_expr_node::ExprType::ScalarFunction(
-                            protobuf::ScalarFunctionNode {
-                                fun: fun.into(),
-                                expr,
-                            },
-                        ),
-                    ),
-                })
-            }
-            Expr::ScalarUDF { .. } => unimplemented!(),
-            Expr::AggregateUDF { .. } => unimplemented!(),
-            Expr::Not(expr) => {
-                let expr = Box::new(protobuf::Not {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::NotExpr(expr)),
-                })
-            }
-            Expr::IsNull(expr) => {
-                let expr = Box::new(protobuf::IsNull {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::IsNullExpr(expr)),
-                })
-            }
-            Expr::IsNotNull(expr) => {
-                let expr = Box::new(protobuf::IsNotNull {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::IsNotNullExpr(expr)),
-                })
-            }
-            Expr::Between {
-                expr,
-                negated,
-                low,
-                high,
-            } => {
-                let expr = Box::new(protobuf::BetweenNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    negated: *negated,
-                    low: Some(Box::new(low.as_ref().try_into()?)),
-                    high: Some(Box::new(high.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Between(expr)),
-                })
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let when_then_expr = when_then_expr
-                    .iter()
-                    .map(|(w, t)| {
-                        Ok(protobuf::WhenThen {
-                            when_expr: Some(w.as_ref().try_into()?),
-                            then_expr: Some(t.as_ref().try_into()?),
-                        })
-                    })
-                    .collect::<Result<Vec<protobuf::WhenThen>, BallistaError>>()?;
-                let expr = Box::new(protobuf::CaseNode {
-                    expr: match expr {
-                        Some(e) => Some(Box::new(e.as_ref().try_into()?)),
-                        None => None,
-                    },
-                    when_then_expr,
-                    else_expr: match else_expr {
-                        Some(e) => Some(Box::new(e.as_ref().try_into()?)),
-                        None => None,
-                    },
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Case(expr)),
-                })
-            }
-            Expr::Cast { expr, data_type } => {
-                let expr = Box::new(protobuf::CastNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    arrow_type: Some(data_type.into()),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Cast(expr)),
-                })
-            }
-            Expr::Sort {
-                expr,
-                asc,
-                nulls_first,
-            } => {
-                let expr = Box::new(protobuf::SortExprNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    asc: *asc,
-                    nulls_first: *nulls_first,
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(ExprType::Sort(expr)),
-                })
-            }
-            Expr::Negative(expr) => {
-                let expr = Box::new(protobuf::NegativeNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(protobuf::logical_expr_node::ExprType::Negative(
-                        expr,
-                    )),
-                })
-            }
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => {
-                let expr = Box::new(protobuf::InListNode {
-                    expr: Some(Box::new(expr.as_ref().try_into()?)),
-                    list: list.iter().map(|expr| expr.try_into()).collect::<Result<
-                        Vec<_>,
-                        BallistaError,
-                    >>(
-                    )?,
-                    negated: *negated,
-                });
-                Ok(protobuf::LogicalExprNode {
-                    expr_type: Some(protobuf::logical_expr_node::ExprType::InList(expr)),
-                })
-            }
-            Expr::Wildcard => Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Wildcard(true)),
-            }),
-            Expr::TryCast { .. } => unimplemented!(),
-        }
-    }
-}
-
-impl Into<protobuf::Schema> for &Schema {
-    fn into(self) -> protobuf::Schema {
-        protobuf::Schema {
-            columns: self
-                .fields()
-                .iter()
-                .map(protobuf::Field::from)
-                .collect::<Vec<_>>(),
-        }
-    }
-}
-
-impl TryFrom<&arrow::datatypes::DataType> for protobuf::ScalarType {
-    type Error = BallistaError;
-    fn try_from(value: &arrow::datatypes::DataType) -> Result<Self, Self::Error> {
-        let datatype = protobuf::scalar_type::Datatype::try_from(value)?;
-        Ok(protobuf::ScalarType {
-            datatype: Some(datatype),
-        })
-    }
-}
-
-impl TryInto<protobuf::ScalarFunction> for &BuiltinScalarFunction {
-    type Error = BallistaError;
-    fn try_into(self) -> Result<protobuf::ScalarFunction, Self::Error> {
-        match self {
-            BuiltinScalarFunction::Sqrt => Ok(protobuf::ScalarFunction::Sqrt),
-            BuiltinScalarFunction::Sin => Ok(protobuf::ScalarFunction::Sin),
-            BuiltinScalarFunction::Cos => Ok(protobuf::ScalarFunction::Cos),
-            BuiltinScalarFunction::Tan => Ok(protobuf::ScalarFunction::Tan),
-            BuiltinScalarFunction::Asin => Ok(protobuf::ScalarFunction::Asin),
-            BuiltinScalarFunction::Acos => Ok(protobuf::ScalarFunction::Acos),
-            BuiltinScalarFunction::Atan => Ok(protobuf::ScalarFunction::Atan),
-            BuiltinScalarFunction::Exp => Ok(protobuf::ScalarFunction::Exp),
-            BuiltinScalarFunction::Log => Ok(protobuf::ScalarFunction::Log),
-            BuiltinScalarFunction::Log10 => Ok(protobuf::ScalarFunction::Log10),
-            BuiltinScalarFunction::Floor => Ok(protobuf::ScalarFunction::Floor),
-            BuiltinScalarFunction::Ceil => Ok(protobuf::ScalarFunction::Ceil),
-            BuiltinScalarFunction::Round => Ok(protobuf::ScalarFunction::Round),
-            BuiltinScalarFunction::Trunc => Ok(protobuf::ScalarFunction::Trunc),
-            BuiltinScalarFunction::Abs => Ok(protobuf::ScalarFunction::Abs),
-            BuiltinScalarFunction::OctetLength => {
-                Ok(protobuf::ScalarFunction::Octetlength)
-            }
-            BuiltinScalarFunction::Concat => Ok(protobuf::ScalarFunction::Concat),
-            BuiltinScalarFunction::Lower => Ok(protobuf::ScalarFunction::Lower),
-            BuiltinScalarFunction::Upper => Ok(protobuf::ScalarFunction::Upper),
-            BuiltinScalarFunction::Trim => Ok(protobuf::ScalarFunction::Trim),
-            BuiltinScalarFunction::Ltrim => Ok(protobuf::ScalarFunction::Ltrim),
-            BuiltinScalarFunction::Rtrim => Ok(protobuf::ScalarFunction::Rtrim),
-            BuiltinScalarFunction::ToTimestamp => {
-                Ok(protobuf::ScalarFunction::Totimestamp)
-            }
-            BuiltinScalarFunction::Array => Ok(protobuf::ScalarFunction::Array),
-            BuiltinScalarFunction::NullIf => Ok(protobuf::ScalarFunction::Nullif),
-            BuiltinScalarFunction::DateTrunc => Ok(protobuf::ScalarFunction::Datetrunc),
-            BuiltinScalarFunction::MD5 => Ok(protobuf::ScalarFunction::Md5),
-            BuiltinScalarFunction::SHA224 => Ok(protobuf::ScalarFunction::Sha224),
-            BuiltinScalarFunction::SHA256 => Ok(protobuf::ScalarFunction::Sha256),
-            BuiltinScalarFunction::SHA384 => Ok(protobuf::ScalarFunction::Sha384),
-            BuiltinScalarFunction::SHA512 => Ok(protobuf::ScalarFunction::Sha512),
-            _ => Err(BallistaError::General(format!(
-                "logical_plan::to_proto() unsupported scalar function {:?}",
-                self
-            ))),
-        }
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/mod.rs b/rust/ballista/rust/core/src/serde/mod.rs
deleted file mode 100644
index b96163999f3..00000000000
--- a/rust/ballista/rust/core/src/serde/mod.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This crate contains code generated from the Ballista Protocol Buffer Definition as well
-//! as convenience code for interacting with the generated code.
-
-use std::{convert::TryInto, io::Cursor};
-
-use crate::{error::BallistaError, serde::scheduler::Action as BallistaAction};
-
-use prost::Message;
-
-// include the generated protobuf source as a submodule
-#[allow(clippy::all)]
-pub mod protobuf {
-    include!(concat!(env!("OUT_DIR"), "/ballista.protobuf.rs"));
-}
-
-pub mod logical_plan;
-pub mod physical_plan;
-pub mod scheduler;
-
-pub fn decode_protobuf(bytes: &[u8]) -> Result<BallistaAction, BallistaError> {
-    let mut buf = Cursor::new(bytes);
-
-    protobuf::Action::decode(&mut buf)
-        .map_err(|e| BallistaError::Internal(format!("{:?}", e)))
-        .and_then(|node| node.try_into())
-}
-
-pub(crate) fn proto_error<S: Into<String>>(message: S) -> BallistaError {
-    BallistaError::General(message.into())
-}
-
-#[macro_export]
-macro_rules! convert_required {
-    ($PB:expr) => {{
-        if let Some(field) = $PB.as_ref() {
-            field.try_into()
-        } else {
-            Err(proto_error("Missing required field in protobuf"))
-        }
-    }};
-}
-
-#[macro_export]
-macro_rules! convert_box_required {
-    ($PB:expr) => {{
-        if let Some(field) = $PB.as_ref() {
-            field.as_ref().try_into()
-        } else {
-            Err(proto_error("Missing required field in protobuf"))
-        }
-    }};
-}
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
deleted file mode 100644
index be0777dbb9a..00000000000
--- a/rust/ballista/rust/core/src/serde/physical_plan/from_proto.rs
+++ /dev/null
@@ -1,398 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serde code to convert from protocol buffers to Rust data structures.
-
-use std::collections::HashMap;
-use std::convert::TryInto;
-use std::sync::Arc;
-
-use crate::error::BallistaError;
-use crate::execution_plans::{ShuffleReaderExec, UnresolvedShuffleExec};
-use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
-use crate::serde::protobuf::LogicalExprNode;
-use crate::serde::scheduler::PartitionLocation;
-use crate::serde::{proto_error, protobuf};
-use crate::{convert_box_required, convert_required};
-
-use arrow::datatypes::{DataType, Schema, SchemaRef};
-use datafusion::catalog::catalog::{
-    CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider,
-};
-use datafusion::execution::context::{ExecutionConfig, ExecutionContextState};
-use datafusion::logical_plan::{DFSchema, Expr};
-use datafusion::physical_plan::aggregates::{create_aggregate_expr, AggregateFunction};
-use datafusion::physical_plan::expressions::col;
-use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
-use datafusion::physical_plan::hash_join::PartitionMode;
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
-use datafusion::physical_plan::{
-    coalesce_batches::CoalesceBatchesExec,
-    csv::CsvExec,
-    empty::EmptyExec,
-    expressions::{Avg, Column, PhysicalSortExpr},
-    filter::FilterExec,
-    hash_join::HashJoinExec,
-    hash_utils::JoinType,
-    limit::{GlobalLimitExec, LocalLimitExec},
-    parquet::ParquetExec,
-    projection::ProjectionExec,
-    repartition::RepartitionExec,
-    sort::{SortExec, SortOptions},
-    Partitioning,
-};
-use datafusion::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr};
-use datafusion::prelude::CsvReadOptions;
-use log::debug;
-use protobuf::logical_expr_node::ExprType;
-use protobuf::physical_plan_node::PhysicalPlanType;
-
-impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Arc<dyn ExecutionPlan>, Self::Error> {
-        let plan = self.physical_plan_type.as_ref().ok_or_else(|| {
-            proto_error(format!(
-                "physical_plan::from_proto() Unsupported physical plan '{:?}'",
-                self
-            ))
-        })?;
-        match plan {
-            PhysicalPlanType::Projection(projection) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(projection.input)?;
-                let exprs = projection
-                    .expr
-                    .iter()
-                    .zip(projection.expr_name.iter())
-                    .map(|(expr, name)| {
-                        compile_expr(expr, &input.schema()).map(|e| (e, name.to_string()))
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-                Ok(Arc::new(ProjectionExec::try_new(exprs, input)?))
-            }
-            PhysicalPlanType::Filter(filter) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(filter.input)?;
-                let predicate = compile_expr(
-                    filter.expr.as_ref().ok_or_else(|| {
-                        BallistaError::General(
-                            "filter (FilterExecNode) in PhysicalPlanNode is missing."
-                                .to_owned(),
-                        )
-                    })?,
-                    &input.schema(),
-                )?;
-                Ok(Arc::new(FilterExec::try_new(predicate, input)?))
-            }
-            PhysicalPlanType::CsvScan(scan) => {
-                let schema = Arc::new(convert_required!(scan.schema)?);
-                let options = CsvReadOptions::new()
-                    .has_header(scan.has_header)
-                    .file_extension(&scan.file_extension)
-                    .delimiter(scan.delimiter.as_bytes()[0])
-                    .schema(&schema);
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                Ok(Arc::new(CsvExec::try_new(
-                    &scan.path,
-                    options,
-                    Some(projection),
-                    scan.batch_size as usize,
-                    None,
-                )?))
-            }
-            PhysicalPlanType::ParquetScan(scan) => {
-                let projection = scan.projection.iter().map(|i| *i as usize).collect();
-                let filenames: Vec<&str> =
-                    scan.filename.iter().map(|s| s.as_str()).collect();
-                Ok(Arc::new(ParquetExec::try_from_files(
-                    &filenames,
-                    Some(projection),
-                    None,
-                    scan.batch_size as usize,
-                    scan.num_partitions as usize,
-                    None,
-                )?))
-            }
-            PhysicalPlanType::CoalesceBatches(coalesce_batches) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(coalesce_batches.input)?;
-                Ok(Arc::new(CoalesceBatchesExec::new(
-                    input,
-                    coalesce_batches.target_batch_size as usize,
-                )))
-            }
-            PhysicalPlanType::Merge(merge) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(merge.input)?;
-                Ok(Arc::new(MergeExec::new(input)))
-            }
-            PhysicalPlanType::Repartition(repart) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(repart.input)?;
-                match repart.partition_method {
-                    Some(PartitionMethod::Hash(ref hash_part)) => {
-                        let expr = hash_part
-                            .hash_expr
-                            .iter()
-                            .map(|e| compile_expr(e, &input.schema()))
-                            .collect::<Result<Vec<Arc<dyn PhysicalExpr>>, _>>()?;
-
-                        Ok(Arc::new(RepartitionExec::try_new(
-                            input,
-                            Partitioning::Hash(
-                                expr,
-                                hash_part.partition_count.try_into().unwrap(),
-                            ),
-                        )?))
-                    }
-                    Some(PartitionMethod::RoundRobin(partition_count)) => {
-                        Ok(Arc::new(RepartitionExec::try_new(
-                            input,
-                            Partitioning::RoundRobinBatch(
-                                partition_count.try_into().unwrap(),
-                            ),
-                        )?))
-                    }
-                    Some(PartitionMethod::Unknown(partition_count)) => {
-                        Ok(Arc::new(RepartitionExec::try_new(
-                            input,
-                            Partitioning::UnknownPartitioning(
-                                partition_count.try_into().unwrap(),
-                            ),
-                        )?))
-                    }
-                    _ => Err(BallistaError::General(
-                        "Invalid partitioning scheme".to_owned(),
-                    )),
-                }
-            }
-            PhysicalPlanType::GlobalLimit(limit) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(limit.input)?;
-                Ok(Arc::new(GlobalLimitExec::new(input, limit.limit as usize)))
-            }
-            PhysicalPlanType::LocalLimit(limit) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(limit.input)?;
-                Ok(Arc::new(LocalLimitExec::new(input, limit.limit as usize)))
-            }
-            PhysicalPlanType::HashAggregate(hash_agg) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(hash_agg.input)?;
-                let mode = protobuf::AggregateMode::from_i32(hash_agg.mode).ok_or_else(|| {
-                    proto_error(format!(
-                        "Received a HashAggregateNode message with unknown AggregateMode {}",
-                        hash_agg.mode
-                    ))
-                })?;
-                let agg_mode: AggregateMode = match mode {
-                    protobuf::AggregateMode::Partial => AggregateMode::Partial,
-                    protobuf::AggregateMode::Final => AggregateMode::Final,
-                };
-
-                let group = hash_agg
-                    .group_expr
-                    .iter()
-                    .zip(hash_agg.group_expr_name.iter())
-                    .map(|(expr, name)| {
-                        compile_expr(expr, &input.schema()).map(|e| (e, name.to_string()))
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let logical_agg_expr: Vec<(Expr, String)> = hash_agg
-                    .aggr_expr
-                    .iter()
-                    .zip(hash_agg.aggr_expr_name.iter())
-                    .map(|(expr, name)| expr.try_into().map(|expr| (expr, name.clone())))
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let df_planner = DefaultPhysicalPlanner::default();
-                let catalog_list =
-                    Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
-                let ctx_state = ExecutionContextState {
-                    catalog_list,
-                    scalar_functions: Default::default(),
-                    var_provider: Default::default(),
-                    aggregate_functions: Default::default(),
-                    config: ExecutionConfig::new(),
-                };
-
-                let input_schema = hash_agg
-                    .input_schema
-                    .as_ref()
-                    .ok_or_else(|| {
-                        BallistaError::General(
-                            "input_schema in HashAggregateNode is missing.".to_owned(),
-                        )
-                    })?
-                    .clone();
-                let physical_schema: SchemaRef =
-                    SchemaRef::new((&input_schema).try_into()?);
-
-                let mut physical_aggr_expr = vec![];
-
-                for (expr, name) in &logical_agg_expr {
-                    match expr {
-                        Expr::AggregateFunction { fun, args, .. } => {
-                            let arg = df_planner
-                                .create_physical_expr(
-                                    &args[0],
-                                    &physical_schema,
-                                    &ctx_state,
-                                )
-                                .map_err(|e| {
-                                    BallistaError::General(format!("{:?}", e))
-                                })?;
-                            physical_aggr_expr.push(create_aggregate_expr(
-                                &fun,
-                                false,
-                                &[arg],
-                                &physical_schema,
-                                name.to_string(),
-                            )?);
-                        }
-                        _ => {
-                            return Err(BallistaError::General(
-                                "Invalid expression for HashAggregateExec".to_string(),
-                            ))
-                        }
-                    }
-                }
-                Ok(Arc::new(HashAggregateExec::try_new(
-                    agg_mode,
-                    group,
-                    physical_aggr_expr,
-                    input,
-                    Arc::new((&input_schema).try_into()?),
-                )?))
-            }
-            PhysicalPlanType::HashJoin(hashjoin) => {
-                let left: Arc<dyn ExecutionPlan> = convert_box_required!(hashjoin.left)?;
-                let right: Arc<dyn ExecutionPlan> =
-                    convert_box_required!(hashjoin.right)?;
-                let on: Vec<(String, String)> = hashjoin
-                    .on
-                    .iter()
-                    .map(|col| (col.left.clone(), col.right.clone()))
-                    .collect();
-                let join_type = protobuf::JoinType::from_i32(hashjoin.join_type)
-                    .ok_or_else(|| {
-                        proto_error(format!(
-                            "Received a HashJoinNode message with unknown JoinType {}",
-                            hashjoin.join_type
-                        ))
-                    })?;
-                let join_type = match join_type {
-                    protobuf::JoinType::Inner => JoinType::Inner,
-                    protobuf::JoinType::Left => JoinType::Left,
-                    protobuf::JoinType::Right => JoinType::Right,
-                };
-                Ok(Arc::new(HashJoinExec::try_new(
-                    left,
-                    right,
-                    &on,
-                    &join_type,
-                    PartitionMode::CollectLeft,
-                )?))
-            }
-            PhysicalPlanType::ShuffleReader(shuffle_reader) => {
-                let schema = Arc::new(convert_required!(shuffle_reader.schema)?);
-                let partition_location: Vec<PartitionLocation> = shuffle_reader
-                    .partition_location
-                    .iter()
-                    .map(|p| p.clone().try_into())
-                    .collect::<Result<Vec<_>, BallistaError>>()?;
-                let shuffle_reader =
-                    ShuffleReaderExec::try_new(partition_location, schema)?;
-                Ok(Arc::new(shuffle_reader))
-            }
-            PhysicalPlanType::Empty(empty) => {
-                let schema = Arc::new(convert_required!(empty.schema)?);
-                Ok(Arc::new(EmptyExec::new(empty.produce_one_row, schema)))
-            }
-            PhysicalPlanType::Sort(sort) => {
-                let input: Arc<dyn ExecutionPlan> = convert_box_required!(sort.input)?;
-                let exprs = sort
-                    .expr
-                    .iter()
-                    .map(|expr| {
-                        let expr = expr.expr_type.as_ref().ok_or_else(|| {
-                            proto_error(format!(
-                                "physical_plan::from_proto() Unexpected expr {:?}",
-                                self
-                            ))
-                        })?;
-                        if let protobuf::logical_expr_node::ExprType::Sort(sort_expr) = expr {
-                            let expr = sort_expr
-                                .expr
-                                .as_ref()
-                                .ok_or_else(|| {
-                                    proto_error(format!(
-                                        "physical_plan::from_proto() Unexpected sort expr {:?}",
-                                        self
-                                    ))
-                                })?
-                                .as_ref();
-                            Ok(PhysicalSortExpr {
-                                expr: compile_expr(expr, &input.schema())?,
-                                options: SortOptions {
-                                    descending: !sort_expr.asc,
-                                    nulls_first: sort_expr.nulls_first,
-                                },
-                            })
-                        } else {
-                            Err(BallistaError::General(format!(
-                                "physical_plan::from_proto() {:?}",
-                                self
-                            )))
-                        }
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-                // Update concurrency here in the future
-                Ok(Arc::new(SortExec::try_new(exprs, input)?))
-            }
-            PhysicalPlanType::Unresolved(unresolved_shuffle) => {
-                let schema = Arc::new(convert_required!(unresolved_shuffle.schema)?);
-                Ok(Arc::new(UnresolvedShuffleExec {
-                    query_stage_ids: unresolved_shuffle
-                        .query_stage_ids
-                        .iter()
-                        .map(|id| *id as usize)
-                        .collect(),
-                    schema,
-                    partition_count: unresolved_shuffle.partition_count as usize,
-                }))
-            }
-        }
-    }
-}
-
-fn compile_expr(
-    expr: &protobuf::LogicalExprNode,
-    schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>, BallistaError> {
-    let df_planner = DefaultPhysicalPlanner::default();
-    let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
-    let state = ExecutionContextState {
-        catalog_list,
-        scalar_functions: HashMap::new(),
-        var_provider: HashMap::new(),
-        aggregate_functions: HashMap::new(),
-        config: ExecutionConfig::new(),
-    };
-    let expr: Expr = expr.try_into()?;
-    df_planner
-        .create_physical_expr(&expr, schema, &state)
-        .map_err(|e| BallistaError::General(format!("{:?}", e)))
-}
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/mod.rs b/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
deleted file mode 100644
index e7985cc84a9..00000000000
--- a/rust/ballista/rust/core/src/serde/physical_plan/mod.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod from_proto;
-pub mod to_proto;
-
-#[cfg(test)]
-mod roundtrip_tests {
-    use datafusion::physical_plan::hash_utils::JoinType;
-    use std::{convert::TryInto, sync::Arc};
-
-    use arrow::datatypes::{DataType, Schema};
-    use datafusion::physical_plan::ColumnarValue;
-    use datafusion::physical_plan::{
-        empty::EmptyExec,
-        expressions::{Avg, Column, PhysicalSortExpr},
-        hash_aggregate::{AggregateMode, HashAggregateExec},
-        hash_join::HashJoinExec,
-        limit::{GlobalLimitExec, LocalLimitExec},
-        sort::SortExec,
-        ExecutionPlan,
-    };
-    use datafusion::physical_plan::{
-        AggregateExpr, Distribution, Partitioning, PhysicalExpr,
-    };
-
-    use super::super::super::error::Result;
-    use super::super::protobuf;
-    use datafusion::physical_plan::hash_join::PartitionMode;
-
-    fn roundtrip_test(exec_plan: Arc<dyn ExecutionPlan>) -> Result<()> {
-        let proto: protobuf::PhysicalPlanNode = exec_plan.clone().try_into()?;
-        let result_exec_plan: Arc<dyn ExecutionPlan> = (&proto).try_into()?;
-        assert_eq!(
-            format!("{:?}", exec_plan),
-            format!("{:?}", result_exec_plan)
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn roundtrip_empty() -> Result<()> {
-        roundtrip_test(Arc::new(EmptyExec::new(false, Arc::new(Schema::empty()))))
-    }
-
-    #[test]
-    fn roundtrip_local_limit() -> Result<()> {
-        roundtrip_test(Arc::new(LocalLimitExec::new(
-            Arc::new(EmptyExec::new(false, Arc::new(Schema::empty()))),
-            25,
-        )))
-    }
-
-    #[test]
-    fn roundtrip_global_limit() -> Result<()> {
-        roundtrip_test(Arc::new(GlobalLimitExec::new(
-            Arc::new(EmptyExec::new(false, Arc::new(Schema::empty()))),
-            25,
-        )))
-    }
-
-    #[test]
-    fn roundtrip_hash_join() -> Result<()> {
-        use arrow::datatypes::{DataType, Field, Schema};
-        let field_a = Field::new("col", DataType::Int64, false);
-        let schema_left = Schema::new(vec![field_a.clone()]);
-        let schema_right = Schema::new(vec![field_a]);
-
-        roundtrip_test(Arc::new(HashJoinExec::try_new(
-            Arc::new(EmptyExec::new(false, Arc::new(schema_left))),
-            Arc::new(EmptyExec::new(false, Arc::new(schema_right))),
-            &[("col".to_string(), "col".to_string())],
-            &JoinType::Inner,
-            PartitionMode::CollectLeft,
-        )?))
-    }
-
-    fn col(name: &str) -> Arc<dyn PhysicalExpr> {
-        Arc::new(Column::new(name))
-    }
-
-    #[test]
-    fn rountrip_hash_aggregate() -> Result<()> {
-        use arrow::datatypes::{DataType, Field, Schema};
-        let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
-            vec![(col("a"), "unused".to_string())];
-
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![Arc::new(Avg::new(
-            col("b"),
-            "AVG(b)".to_string(),
-            DataType::Float64,
-        ))];
-
-        let field_a = Field::new("a", DataType::Int64, false);
-        let field_b = Field::new("b", DataType::Int64, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-
-        roundtrip_test(Arc::new(HashAggregateExec::try_new(
-            AggregateMode::Final,
-            groups.clone(),
-            aggregates.clone(),
-            Arc::new(EmptyExec::new(false, schema.clone())),
-            schema,
-        )?))
-    }
-
-    #[test]
-    fn roundtrip_filter_with_not_and_in_list() -> Result<()> {
-        use arrow::datatypes::{DataType, Field, Schema};
-        use datafusion::logical_plan::Operator;
-        use datafusion::physical_plan::{
-            expressions::{binary, lit, InListExpr, NotExpr},
-            filter::FilterExec,
-        };
-        use datafusion::scalar::ScalarValue;
-        let field_a = Field::new("a", DataType::Boolean, false);
-        let field_b = Field::new("b", DataType::Int64, false);
-        let field_c = Field::new("c", DataType::Int64, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b, field_c]));
-        let not = Arc::new(NotExpr::new(col("a")));
-        let in_list = Arc::new(InListExpr::new(
-            col("b"),
-            vec![
-                lit(ScalarValue::Int64(Some(1))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            false,
-        ));
-        let and = binary(not, Operator::And, in_list, &schema)?;
-        roundtrip_test(Arc::new(FilterExec::try_new(
-            and,
-            Arc::new(EmptyExec::new(false, schema.clone())),
-        )?))
-    }
-
-    #[test]
-    fn roundtrip_sort() -> Result<()> {
-        use arrow::compute::kernels::sort::SortOptions;
-        use arrow::datatypes::{DataType, Field, Schema};
-        let field_a = Field::new("a", DataType::Boolean, false);
-        let field_b = Field::new("b", DataType::Int64, false);
-        let schema = Arc::new(Schema::new(vec![field_a, field_b]));
-        let sort_exprs = vec![
-            PhysicalSortExpr {
-                expr: col("a"),
-                options: SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                },
-            },
-            PhysicalSortExpr {
-                expr: col("b"),
-                options: SortOptions {
-                    descending: false,
-                    nulls_first: true,
-                },
-            },
-        ];
-        roundtrip_test(Arc::new(SortExec::try_new(
-            sort_exprs,
-            Arc::new(EmptyExec::new(false, schema)),
-        )?))
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs b/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
deleted file mode 100644
index 5352c1f7775..00000000000
--- a/rust/ballista/rust/core/src/serde/physical_plan/to_proto.rs
+++ /dev/null
@@ -1,556 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.language governing permissions and
-// limitations under the License.
-
-//! Serde code to convert Arrow schemas and DataFusion logical plans to Ballista protocol
-//! buffer format, allowing DataFusion physical plans to be serialized and transmitted between
-//! processes.
-
-use std::{
-    convert::{TryFrom, TryInto},
-    str::FromStr,
-    sync::Arc,
-};
-
-use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion::physical_plan::csv::CsvExec;
-use datafusion::physical_plan::expressions::{
-    CaseExpr, InListExpr, IsNotNullExpr, IsNullExpr, NegativeExpr, NotExpr,
-};
-use datafusion::physical_plan::expressions::{CastExpr, TryCastExpr};
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::hash_aggregate::AggregateMode;
-use datafusion::physical_plan::hash_join::HashJoinExec;
-use datafusion::physical_plan::hash_utils::JoinType;
-use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use datafusion::physical_plan::parquet::ParquetExec;
-use datafusion::physical_plan::projection::ProjectionExec;
-use datafusion::physical_plan::sort::SortExec;
-use datafusion::{
-    physical_plan::expressions::{Count, Literal},
-    scalar::ScalarValue,
-};
-
-use datafusion::physical_plan::{
-    empty::EmptyExec,
-    expressions::{Avg, BinaryExpr, Column, Sum},
-    Partitioning,
-};
-use datafusion::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr};
-
-use datafusion::physical_plan::hash_aggregate::HashAggregateExec;
-use protobuf::physical_plan_node::PhysicalPlanType;
-
-use crate::execution_plans::{ShuffleReaderExec, UnresolvedShuffleExec};
-use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
-use crate::serde::{protobuf, BallistaError};
-use datafusion::physical_plan::functions::{BuiltinScalarFunction, ScalarFunctionExpr};
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::repartition::RepartitionExec;
-
-impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::PhysicalPlanNode, Self::Error> {
-        let plan = self.as_any();
-
-        if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            let expr = exec
-                .expr()
-                .iter()
-                .map(|expr| expr.0.clone().try_into())
-                .collect::<Result<Vec<_>, Self::Error>>()?;
-            let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
-                    protobuf::ProjectionExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                        expr_name,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<FilterExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Filter(Box::new(
-                    protobuf::FilterExecNode {
-                        input: Some(Box::new(input)),
-                        expr: Some(exec.predicate().clone().try_into()?),
-                    },
-                ))),
-            })
-        } else if let Some(limit) = plan.downcast_ref::<GlobalLimitExec>() {
-            let input: protobuf::PhysicalPlanNode =
-                limit.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::GlobalLimit(Box::new(
-                    protobuf::GlobalLimitExecNode {
-                        input: Some(Box::new(input)),
-                        limit: limit.limit() as u32,
-                    },
-                ))),
-            })
-        } else if let Some(limit) = plan.downcast_ref::<LocalLimitExec>() {
-            let input: protobuf::PhysicalPlanNode =
-                limit.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::LocalLimit(Box::new(
-                    protobuf::LocalLimitExecNode {
-                        input: Some(Box::new(input)),
-                        limit: limit.limit() as u32,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<HashJoinExec>() {
-            let left: protobuf::PhysicalPlanNode = exec.left().to_owned().try_into()?;
-            let right: protobuf::PhysicalPlanNode = exec.right().to_owned().try_into()?;
-            let on: Vec<protobuf::JoinOn> = exec
-                .on()
-                .iter()
-                .map(|tuple| protobuf::JoinOn {
-                    left: tuple.0.to_owned(),
-                    right: tuple.1.to_owned(),
-                })
-                .collect();
-            let join_type = match exec.join_type() {
-                JoinType::Inner => protobuf::JoinType::Inner,
-                JoinType::Left => protobuf::JoinType::Left,
-                JoinType::Right => protobuf::JoinType::Right,
-            };
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::HashJoin(Box::new(
-                    protobuf::HashJoinExecNode {
-                        left: Some(Box::new(left)),
-                        right: Some(Box::new(right)),
-                        on,
-                        join_type: join_type.into(),
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<HashAggregateExec>() {
-            let groups = exec
-                .group_expr()
-                .iter()
-                .map(|expr| expr.0.to_owned().try_into())
-                .collect::<Result<Vec<_>, BallistaError>>()?;
-            let group_names = exec
-                .group_expr()
-                .iter()
-                .map(|expr| expr.1.to_owned())
-                .collect();
-            let agg = exec
-                .aggr_expr()
-                .iter()
-                .map(|expr| expr.to_owned().try_into())
-                .collect::<Result<Vec<_>, BallistaError>>()?;
-            let agg_names = exec
-                .aggr_expr()
-                .iter()
-                .map(|expr| match expr.field() {
-                    Ok(field) => Ok(field.name().clone()),
-                    Err(e) => Err(BallistaError::DataFusionError(e)),
-                })
-                .collect::<Result<_, Self::Error>>()?;
-
-            let agg_mode = match exec.mode() {
-                AggregateMode::Partial => protobuf::AggregateMode::Partial,
-                AggregateMode::Final => protobuf::AggregateMode::Final,
-            };
-            let input_schema = exec.input_schema();
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::HashAggregate(Box::new(
-                    protobuf::HashAggregateExecNode {
-                        group_expr: groups,
-                        group_expr_name: group_names,
-                        aggr_expr: agg,
-                        aggr_expr_name: agg_names,
-                        mode: agg_mode as i32,
-                        input: Some(Box::new(input)),
-                        input_schema: Some(input_schema.as_ref().into()),
-                    },
-                ))),
-            })
-        } else if let Some(empty) = plan.downcast_ref::<EmptyExec>() {
-            let schema = empty.schema().as_ref().into();
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Empty(
-                    protobuf::EmptyExecNode {
-                        produce_one_row: empty.produce_one_row(),
-                        schema: Some(schema),
-                    },
-                )),
-            })
-        } else if let Some(coalesce_batches) = plan.downcast_ref::<CoalesceBatchesExec>()
-        {
-            let input: protobuf::PhysicalPlanNode =
-                coalesce_batches.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::CoalesceBatches(Box::new(
-                    protobuf::CoalesceBatchesExecNode {
-                        input: Some(Box::new(input)),
-                        target_batch_size: coalesce_batches.target_batch_size() as u32,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<CsvExec>() {
-            let delimiter = [*exec.delimiter().ok_or_else(|| {
-                BallistaError::General("Delimeter is not set for CsvExec".to_owned())
-            })?];
-            let delimiter = std::str::from_utf8(&delimiter).map_err(|_| {
-                BallistaError::General("Invalid CSV delimiter".to_owned())
-            })?;
-
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::CsvScan(
-                    protobuf::CsvScanExecNode {
-                        path: exec.path().to_owned(),
-                        filename: exec.filenames().to_vec(),
-                        projection: exec
-                            .projection()
-                            .ok_or_else(|| {
-                                BallistaError::General(
-                                    "projection in CsvExec dosn not exist.".to_owned(),
-                                )
-                            })?
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        file_extension: exec.file_extension().to_owned(),
-                        schema: Some(exec.file_schema().as_ref().into()),
-                        has_header: exec.has_header(),
-                        delimiter: delimiter.to_string(),
-                        batch_size: exec.batch_size() as u32,
-                    },
-                )),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<ParquetExec>() {
-            let filenames = exec
-                .partitions()
-                .iter()
-                .flat_map(|part| part.filenames().to_owned())
-                .collect();
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::ParquetScan(
-                    protobuf::ParquetScanExecNode {
-                        filename: filenames,
-                        projection: exec
-                            .projection()
-                            .as_ref()
-                            .iter()
-                            .map(|n| *n as u32)
-                            .collect(),
-                        num_partitions: exec.partitions().len() as u32,
-                        batch_size: exec.batch_size() as u32,
-                    },
-                )),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<ShuffleReaderExec>() {
-            let partition_location = exec
-                .partition_location
-                .iter()
-                .map(|l| l.clone().try_into())
-                .collect::<Result<_, _>>()?;
-
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::ShuffleReader(
-                    protobuf::ShuffleReaderExecNode {
-                        partition_location,
-                        schema: Some(exec.schema().as_ref().into()),
-                    },
-                )),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<MergeExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Merge(Box::new(
-                    protobuf::MergeExecNode {
-                        input: Some(Box::new(input)),
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<RepartitionExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-
-            let pb_partition_method = match exec.partitioning() {
-                Partitioning::Hash(exprs, partition_count) => {
-                    PartitionMethod::Hash(protobuf::HashRepartition {
-                        hash_expr: exprs
-                            .iter()
-                            .map(|expr| expr.clone().try_into())
-                            .collect::<Result<Vec<_>, BallistaError>>()?,
-                        partition_count: *partition_count as u64,
-                    })
-                }
-                Partitioning::RoundRobinBatch(partition_count) => {
-                    PartitionMethod::RoundRobin(*partition_count as u64)
-                }
-                Partitioning::UnknownPartitioning(partition_count) => {
-                    PartitionMethod::Unknown(*partition_count as u64)
-                }
-            };
-
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Repartition(Box::new(
-                    protobuf::RepartitionExecNode {
-                        input: Some(Box::new(input)),
-                        partition_method: Some(pb_partition_method),
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<SortExec>() {
-            let input: protobuf::PhysicalPlanNode = exec.input().to_owned().try_into()?;
-            let expr = exec
-                .expr()
-                .iter()
-                .map(|expr| {
-                    let sort_expr = Box::new(protobuf::SortExprNode {
-                        expr: Some(Box::new(expr.expr.to_owned().try_into()?)),
-                        asc: !expr.options.descending,
-                        nulls_first: expr.options.nulls_first,
-                    });
-                    Ok(protobuf::LogicalExprNode {
-                        expr_type: Some(protobuf::logical_expr_node::ExprType::Sort(
-                            sort_expr,
-                        )),
-                    })
-                })
-                .collect::<Result<Vec<_>, Self::Error>>()?;
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Sort(Box::new(
-                    protobuf::SortExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                    },
-                ))),
-            })
-        } else if let Some(exec) = plan.downcast_ref::<UnresolvedShuffleExec>() {
-            Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Unresolved(
-                    protobuf::UnresolvedShuffleExecNode {
-                        query_stage_ids: exec
-                            .query_stage_ids
-                            .iter()
-                            .map(|id| *id as u32)
-                            .collect(),
-                        schema: Some(exec.schema().as_ref().into()),
-                        partition_count: exec.partition_count as u32,
-                    },
-                )),
-            })
-        } else {
-            Err(BallistaError::General(format!(
-                "physical plan to_proto unsupported plan {:?}",
-                self
-            )))
-        }
-    }
-}
-
-impl TryInto<protobuf::LogicalExprNode> for Arc<dyn AggregateExpr> {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::LogicalExprNode, Self::Error> {
-        let aggr_function = if self.as_any().downcast_ref::<Avg>().is_some() {
-            Ok(protobuf::AggregateFunction::Avg.into())
-        } else if self.as_any().downcast_ref::<Sum>().is_some() {
-            Ok(protobuf::AggregateFunction::Sum.into())
-        } else if self.as_any().downcast_ref::<Count>().is_some() {
-            Ok(protobuf::AggregateFunction::Count.into())
-        } else {
-            Err(BallistaError::NotImplemented(format!(
-                "Aggregate function not supported: {:?}",
-                self
-            )))
-        }?;
-        let expressions: Vec<protobuf::LogicalExprNode> = self
-            .expressions()
-            .iter()
-            .map(|e| e.clone().try_into())
-            .collect::<Result<Vec<_>, BallistaError>>()?;
-        Ok(protobuf::LogicalExprNode {
-            expr_type: Some(protobuf::logical_expr_node::ExprType::AggregateExpr(
-                Box::new(protobuf::AggregateExprNode {
-                    aggr_function,
-                    expr: Some(Box::new(expressions[0].clone())),
-                }),
-            )),
-        })
-    }
-}
-
-impl TryFrom<Arc<dyn PhysicalExpr>> for protobuf::LogicalExprNode {
-    type Error = BallistaError;
-
-    fn try_from(value: Arc<dyn PhysicalExpr>) -> Result<Self, Self::Error> {
-        let expr = value.as_any();
-
-        if let Some(expr) = expr.downcast_ref::<Column>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::ColumnName(
-                    expr.name().to_owned(),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<BinaryExpr>() {
-            let binary_expr = Box::new(protobuf::BinaryExprNode {
-                l: Some(Box::new(expr.left().to_owned().try_into()?)),
-                r: Some(Box::new(expr.right().to_owned().try_into()?)),
-                op: format!("{:?}", expr.op()),
-            });
-
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::BinaryExpr(
-                    binary_expr,
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<CaseExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Case(Box::new(
-                    protobuf::CaseNode {
-                        expr: expr
-                            .expr()
-                            .as_ref()
-                            .map(|exp| exp.clone().try_into().map(Box::new))
-                            .transpose()?,
-                        when_then_expr: expr
-                            .when_then_expr()
-                            .iter()
-                            .map(|(when_expr, then_expr)| {
-                                try_parse_when_then_expr(when_expr, then_expr)
-                            })
-                            .collect::<Result<Vec<protobuf::WhenThen>, Self::Error>>()?,
-                        else_expr: expr
-                            .else_expr()
-                            .map(|a| a.clone().try_into().map(Box::new))
-                            .transpose()?,
-                    },
-                ))),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<NotExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::NotExpr(
-                    Box::new(protobuf::Not {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<IsNullExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::IsNullExpr(
-                    Box::new(protobuf::IsNull {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<IsNotNullExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::IsNotNullExpr(
-                    Box::new(protobuf::IsNotNull {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<InListExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(
-                    protobuf::logical_expr_node::ExprType::InList(
-                        Box::new(
-                            protobuf::InListNode {
-                                expr: Some(Box::new(expr.expr().to_owned().try_into()?)),
-                                list: expr
-                                    .list()
-                                    .iter()
-                                    .map(|a| a.clone().try_into())
-                                    .collect::<Result<
-                                    Vec<protobuf::LogicalExprNode>,
-                                    Self::Error,
-                                >>()?,
-                                negated: expr.negated(),
-                            },
-                        ),
-                    ),
-                ),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<NegativeExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Negative(
-                    Box::new(protobuf::NegativeNode {
-                        expr: Some(Box::new(expr.arg().to_owned().try_into()?)),
-                    }),
-                )),
-            })
-        } else if let Some(lit) = expr.downcast_ref::<Literal>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Literal(
-                    lit.value().try_into()?,
-                )),
-            })
-        } else if let Some(cast) = expr.downcast_ref::<CastExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::Cast(Box::new(
-                    protobuf::CastNode {
-                        expr: Some(Box::new(cast.expr().clone().try_into()?)),
-                        arrow_type: Some(cast.cast_type().into()),
-                    },
-                ))),
-            })
-        } else if let Some(cast) = expr.downcast_ref::<TryCastExpr>() {
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::TryCast(
-                    Box::new(protobuf::TryCastNode {
-                        expr: Some(Box::new(cast.expr().clone().try_into()?)),
-                        arrow_type: Some(cast.cast_type().into()),
-                    }),
-                )),
-            })
-        } else if let Some(expr) = expr.downcast_ref::<ScalarFunctionExpr>() {
-            let fun: BuiltinScalarFunction =
-                BuiltinScalarFunction::from_str(expr.name())?;
-            let fun: protobuf::ScalarFunction = (&fun).try_into()?;
-            let expr: Vec<protobuf::LogicalExprNode> = expr
-                .args()
-                .iter()
-                .map(|e| e.to_owned().try_into())
-                .collect::<Result<Vec<_>, _>>()?;
-            Ok(protobuf::LogicalExprNode {
-                expr_type: Some(protobuf::logical_expr_node::ExprType::ScalarFunction(
-                    protobuf::ScalarFunctionNode {
-                        fun: fun.into(),
-                        expr,
-                    },
-                )),
-            })
-        } else {
-            Err(BallistaError::General(format!(
-                "physical_plan::to_proto() unsupported expression {:?}",
-                value
-            )))
-        }
-    }
-}
-
-fn try_parse_when_then_expr(
-    when_expr: &Arc<dyn PhysicalExpr>,
-    then_expr: &Arc<dyn PhysicalExpr>,
-) -> Result<protobuf::WhenThen, BallistaError> {
-    Ok(protobuf::WhenThen {
-        when_expr: Some(when_expr.clone().try_into()?),
-        then_expr: Some(then_expr.clone().try_into()?),
-    })
-}
diff --git a/rust/ballista/rust/core/src/serde/scheduler/from_proto.rs b/rust/ballista/rust/core/src/serde/scheduler/from_proto.rs
deleted file mode 100644
index fb1e4f812d0..00000000000
--- a/rust/ballista/rust/core/src/serde/scheduler/from_proto.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, convert::TryInto};
-
-use crate::error::BallistaError;
-use crate::serde::protobuf;
-use crate::serde::protobuf::action::ActionType;
-use crate::serde::scheduler::{
-    Action, ExecutePartition, PartitionId, PartitionLocation, PartitionStats,
-};
-
-use datafusion::logical_plan::LogicalPlan;
-use uuid::Uuid;
-
-impl TryInto<Action> for protobuf::Action {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<Action, Self::Error> {
-        match self.action_type {
-            Some(ActionType::ExecutePartition(partition)) => {
-                Ok(Action::ExecutePartition(ExecutePartition::new(
-                    partition.job_id,
-                    partition.stage_id as usize,
-                    partition.partition_id.iter().map(|n| *n as usize).collect(),
-                    partition
-                        .plan
-                        .as_ref()
-                        .ok_or_else(|| {
-                            BallistaError::General(
-                                "PhysicalPlanNode in ExecutePartition is missing"
-                                    .to_owned(),
-                            )
-                        })?
-                        .try_into()?,
-                    HashMap::new(),
-                )))
-            }
-            Some(ActionType::FetchPartition(partition)) => {
-                Ok(Action::FetchPartition(partition.try_into()?))
-            }
-            _ => Err(BallistaError::General(
-                "scheduler::from_proto(Action) invalid or missing action".to_owned(),
-            )),
-        }
-    }
-}
-
-impl TryInto<PartitionId> for protobuf::PartitionId {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<PartitionId, Self::Error> {
-        Ok(PartitionId::new(
-            &self.job_id,
-            self.stage_id as usize,
-            self.partition_id as usize,
-        ))
-    }
-}
-
-impl Into<PartitionStats> for protobuf::PartitionStats {
-    fn into(self) -> PartitionStats {
-        PartitionStats::new(
-            foo(self.num_rows),
-            foo(self.num_batches),
-            foo(self.num_bytes),
-        )
-    }
-}
-
-fn foo(n: i64) -> Option<u64> {
-    if n < 0 {
-        None
-    } else {
-        Some(n as u64)
-    }
-}
-
-impl TryInto<PartitionLocation> for protobuf::PartitionLocation {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<PartitionLocation, Self::Error> {
-        Ok(PartitionLocation {
-            partition_id: self
-                .partition_id
-                .ok_or_else(|| {
-                    BallistaError::General(
-                        "partition_id in PartitionLocation is missing.".to_owned(),
-                    )
-                })?
-                .try_into()?,
-            executor_meta: self
-                .executor_meta
-                .ok_or_else(|| {
-                    BallistaError::General(
-                        "executor_meta in PartitionLocation is missing".to_owned(),
-                    )
-                })?
-                .into(),
-            partition_stats: self
-                .partition_stats
-                .ok_or_else(|| {
-                    BallistaError::General(
-                        "partition_stats in PartitionLocation is missing".to_owned(),
-                    )
-                })?
-                .into(),
-        })
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/scheduler/mod.rs b/rust/ballista/rust/core/src/serde/scheduler/mod.rs
deleted file mode 100644
index 81d8722d7f4..00000000000
--- a/rust/ballista/rust/core/src/serde/scheduler/mod.rs
+++ /dev/null
@@ -1,262 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, sync::Arc};
-
-use arrow::array::{
-    ArrayBuilder, ArrayRef, StructArray, StructBuilder, UInt64Array, UInt64Builder,
-};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::logical_plan::LogicalPlan;
-use datafusion::physical_plan::ExecutionPlan;
-use serde::Serialize;
-use uuid::Uuid;
-
-use super::protobuf;
-use crate::error::BallistaError;
-
-pub mod from_proto;
-pub mod to_proto;
-
-/// Action that can be sent to an executor
-#[derive(Debug, Clone)]
-
-pub enum Action {
-    /// Execute a query and store the results in memory
-    ExecutePartition(ExecutePartition),
-    /// Collect a shuffle partition
-    FetchPartition(PartitionId),
-}
-
-/// Unique identifier for the output partition of an operator.
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct PartitionId {
-    pub job_id: String,
-    pub stage_id: usize,
-    pub partition_id: usize,
-}
-
-impl PartitionId {
-    pub fn new(job_id: &str, stage_id: usize, partition_id: usize) -> Self {
-        Self {
-            job_id: job_id.to_string(),
-            stage_id,
-            partition_id,
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct PartitionLocation {
-    pub partition_id: PartitionId,
-    pub executor_meta: ExecutorMeta,
-    pub partition_stats: PartitionStats,
-}
-
-/// Meta-data for an executor, used when fetching shuffle partitions from other executors
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
-pub struct ExecutorMeta {
-    pub id: String,
-    pub host: String,
-    pub port: u16,
-}
-
-impl Into<protobuf::ExecutorMetadata> for ExecutorMeta {
-    fn into(self) -> protobuf::ExecutorMetadata {
-        protobuf::ExecutorMetadata {
-            id: self.id,
-            host: self.host,
-            port: self.port as u32,
-        }
-    }
-}
-
-impl From<protobuf::ExecutorMetadata> for ExecutorMeta {
-    fn from(meta: protobuf::ExecutorMetadata) -> Self {
-        Self {
-            id: meta.id,
-            host: meta.host,
-            port: meta.port as u16,
-        }
-    }
-}
-
-/// Summary of executed partition
-#[derive(Debug, Copy, Clone)]
-pub struct PartitionStats {
-    num_rows: Option<u64>,
-    num_batches: Option<u64>,
-    num_bytes: Option<u64>,
-}
-
-impl Default for PartitionStats {
-    fn default() -> Self {
-        Self {
-            num_rows: None,
-            num_batches: None,
-            num_bytes: None,
-        }
-    }
-}
-
-impl PartitionStats {
-    pub fn new(
-        num_rows: Option<u64>,
-        num_batches: Option<u64>,
-        num_bytes: Option<u64>,
-    ) -> Self {
-        Self {
-            num_rows,
-            num_batches,
-            num_bytes,
-        }
-    }
-
-    pub fn arrow_struct_repr(self) -> Field {
-        Field::new(
-            "partition_stats",
-            DataType::Struct(self.arrow_struct_fields()),
-            false,
-        )
-    }
-    fn arrow_struct_fields(self) -> Vec<Field> {
-        vec![
-            Field::new("num_rows", DataType::UInt64, false),
-            Field::new("num_batches", DataType::UInt64, false),
-            Field::new("num_bytes", DataType::UInt64, false),
-        ]
-    }
-
-    pub fn to_arrow_arrayref(&self) -> Result<Arc<StructArray>, BallistaError> {
-        let mut field_builders = Vec::new();
-
-        let mut num_rows_builder = UInt64Builder::new(1);
-        match self.num_rows {
-            Some(n) => num_rows_builder.append_value(n)?,
-            None => num_rows_builder.append_null()?,
-        }
-        field_builders.push(Box::new(num_rows_builder) as Box<dyn ArrayBuilder>);
-
-        let mut num_batches_builder = UInt64Builder::new(1);
-        match self.num_batches {
-            Some(n) => num_batches_builder.append_value(n)?,
-            None => num_batches_builder.append_null()?,
-        }
-        field_builders.push(Box::new(num_batches_builder) as Box<dyn ArrayBuilder>);
-
-        let mut num_bytes_builder = UInt64Builder::new(1);
-        match self.num_bytes {
-            Some(n) => num_bytes_builder.append_value(n)?,
-            None => num_bytes_builder.append_null()?,
-        }
-        field_builders.push(Box::new(num_bytes_builder) as Box<dyn ArrayBuilder>);
-
-        let mut struct_builder =
-            StructBuilder::new(self.arrow_struct_fields(), field_builders);
-        struct_builder.append(true)?;
-        Ok(Arc::new(struct_builder.finish()))
-    }
-
-    pub fn from_arrow_struct_array(struct_array: &StructArray) -> PartitionStats {
-        let num_rows = struct_array
-            .column_by_name("num_rows")
-            .expect("from_arrow_struct_array expected a field num_rows")
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .expect("from_arrow_struct_array expected num_rows to be a UInt64Array");
-        let num_batches = struct_array
-            .column_by_name("num_batches")
-            .expect("from_arrow_struct_array expected a field num_batches")
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .expect("from_arrow_struct_array expected num_batches to be a UInt64Array");
-        let num_bytes = struct_array
-            .column_by_name("num_bytes")
-            .expect("from_arrow_struct_array expected a field num_bytes")
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .expect("from_arrow_struct_array expected num_bytes to be a UInt64Array");
-        PartitionStats {
-            num_rows: Some(num_rows.value(0).to_owned()),
-            num_batches: Some(num_batches.value(0).to_owned()),
-            num_bytes: Some(num_bytes.value(0).to_owned()),
-        }
-    }
-}
-
-/// Task that can be sent to an executor to execute one stage of a query and write
-/// results out to disk
-#[derive(Debug, Clone)]
-pub struct ExecutePartition {
-    /// Unique ID representing this query execution
-    pub job_id: String,
-    /// Unique ID representing this query stage within the overall query
-    pub stage_id: usize,
-    /// The partitions to execute. The same plan could be sent to multiple executors and each
-    /// executor will execute a range of partitions per QueryStageTask
-    pub partition_id: Vec<usize>,
-    /// The physical plan for this query stage
-    pub plan: Arc<dyn ExecutionPlan>,
-    /// Location of shuffle partitions that this query stage may depend on
-    pub shuffle_locations: HashMap<PartitionId, ExecutorMeta>,
-}
-
-impl ExecutePartition {
-    pub fn new(
-        job_id: String,
-        stage_id: usize,
-        partition_id: Vec<usize>,
-        plan: Arc<dyn ExecutionPlan>,
-        shuffle_locations: HashMap<PartitionId, ExecutorMeta>,
-    ) -> Self {
-        Self {
-            job_id,
-            stage_id,
-            partition_id,
-            plan,
-            shuffle_locations,
-        }
-    }
-
-    pub fn key(&self) -> String {
-        format!("{}.{}.{:?}", self.job_id, self.stage_id, self.partition_id)
-    }
-}
-
-#[derive(Debug)]
-pub struct ExecutePartitionResult {
-    /// Path containing results for this partition
-    path: String,
-    stats: PartitionStats,
-}
-
-impl ExecutePartitionResult {
-    pub fn new(path: &str, stats: PartitionStats) -> Self {
-        Self {
-            path: path.to_owned(),
-            stats,
-        }
-    }
-
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-
-    pub fn statistics(&self) -> &PartitionStats {
-        &self.stats
-    }
-}
diff --git a/rust/ballista/rust/core/src/serde/scheduler/to_proto.rs b/rust/ballista/rust/core/src/serde/scheduler/to_proto.rs
deleted file mode 100644
index f581becdea1..00000000000
--- a/rust/ballista/rust/core/src/serde/scheduler/to_proto.rs
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryInto;
-
-use crate::error::BallistaError;
-use crate::serde::protobuf;
-use crate::serde::protobuf::action::ActionType;
-use crate::serde::scheduler::{
-    Action, ExecutePartition, PartitionId, PartitionLocation, PartitionStats,
-};
-
-impl TryInto<protobuf::Action> for Action {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::Action, Self::Error> {
-        match self {
-            Action::ExecutePartition(partition) => Ok(protobuf::Action {
-                action_type: Some(ActionType::ExecutePartition(partition.try_into()?)),
-                settings: vec![],
-            }),
-            Action::FetchPartition(partition_id) => Ok(protobuf::Action {
-                action_type: Some(ActionType::FetchPartition(partition_id.into())),
-                settings: vec![],
-            }),
-        }
-    }
-}
-
-impl TryInto<protobuf::ExecutePartition> for ExecutePartition {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::ExecutePartition, Self::Error> {
-        Ok(protobuf::ExecutePartition {
-            job_id: self.job_id,
-            stage_id: self.stage_id as u32,
-            partition_id: self.partition_id.iter().map(|n| *n as u32).collect(),
-            plan: Some(self.plan.try_into()?),
-            partition_location: vec![],
-        })
-    }
-}
-
-impl Into<protobuf::PartitionId> for PartitionId {
-    fn into(self) -> protobuf::PartitionId {
-        protobuf::PartitionId {
-            job_id: self.job_id,
-            stage_id: self.stage_id as u32,
-            partition_id: self.partition_id as u32,
-        }
-    }
-}
-
-impl TryInto<protobuf::PartitionLocation> for PartitionLocation {
-    type Error = BallistaError;
-
-    fn try_into(self) -> Result<protobuf::PartitionLocation, Self::Error> {
-        Ok(protobuf::PartitionLocation {
-            partition_id: Some(self.partition_id.into()),
-            executor_meta: Some(self.executor_meta.into()),
-            partition_stats: Some(self.partition_stats.into()),
-        })
-    }
-}
-
-impl Into<protobuf::PartitionStats> for PartitionStats {
-    fn into(self) -> protobuf::PartitionStats {
-        let none_value = -1_i64;
-        protobuf::PartitionStats {
-            num_rows: self.num_rows.map(|n| n as i64).unwrap_or(none_value),
-            num_batches: self.num_batches.map(|n| n as i64).unwrap_or(none_value),
-            num_bytes: self.num_bytes.map(|n| n as i64).unwrap_or(none_value),
-            column_stats: vec![],
-        }
-    }
-}
diff --git a/rust/ballista/rust/core/src/utils.rs b/rust/ballista/rust/core/src/utils.rs
deleted file mode 100644
index ee9c9557e78..00000000000
--- a/rust/ballista/rust/core/src/utils.rs
+++ /dev/null
@@ -1,327 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::io::{BufWriter, Write};
-use std::ops::Deref;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::Arc;
-use std::{fs::File, pin::Pin};
-
-use crate::error::{BallistaError, Result};
-use crate::execution_plans::{QueryStageExec, UnresolvedShuffleExec};
-use crate::memory_stream::MemoryStream;
-use crate::serde::scheduler::PartitionStats;
-use arrow::array::{
-    ArrayBuilder, ArrayRef, StructArray, StructBuilder, UInt64Array, UInt64Builder,
-};
-use arrow::datatypes::{DataType, Field};
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::FileWriter;
-use arrow::record_batch::RecordBatch;
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-use datafusion::logical_plan::Operator;
-use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches;
-use datafusion::physical_optimizer::merge_exec::AddMergeExec;
-use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule;
-use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion::physical_plan::csv::CsvExec;
-use datafusion::physical_plan::expressions::{BinaryExpr, Column, Literal};
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::hash_aggregate::HashAggregateExec;
-use datafusion::physical_plan::hash_join::HashJoinExec;
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::parquet::ParquetExec;
-use datafusion::physical_plan::projection::ProjectionExec;
-use datafusion::physical_plan::sort::SortExec;
-use datafusion::physical_plan::{
-    AggregateExpr, ExecutionPlan, PhysicalExpr, RecordBatchStream,
-};
-use futures::StreamExt;
-
-/// Stream data to disk in Arrow IPC format
-
-pub async fn write_stream_to_disk(
-    stream: &mut Pin<Box<dyn RecordBatchStream + Send + Sync>>,
-    path: &str,
-) -> Result<PartitionStats> {
-    let file = File::create(&path).map_err(|e| {
-        BallistaError::General(format!(
-            "Failed to create partition file at {}: {:?}",
-            path, e
-        ))
-    })?;
-
-    let mut num_rows = 0;
-    let mut num_batches = 0;
-    let mut num_bytes = 0;
-    let mut writer = FileWriter::try_new(file, stream.schema().as_ref())?;
-
-    while let Some(result) = stream.next().await {
-        let batch = result?;
-
-        let batch_size_bytes: usize = batch
-            .columns()
-            .iter()
-            .map(|array| array.get_array_memory_size())
-            .sum();
-        num_batches += 1;
-        num_rows += batch.num_rows();
-        num_bytes += batch_size_bytes;
-        writer.write(&batch)?;
-    }
-    writer.finish()?;
-    Ok(PartitionStats::new(
-        Some(num_rows as u64),
-        Some(num_batches),
-        Some(num_bytes as u64),
-    ))
-}
-
-pub async fn collect_stream(
-    stream: &mut Pin<Box<dyn RecordBatchStream + Send + Sync>>,
-) -> Result<Vec<RecordBatch>> {
-    let mut batches = vec![];
-    while let Some(batch) = stream.next().await {
-        batches.push(batch?);
-    }
-    Ok(batches)
-}
-
-pub fn format_plan(plan: &dyn ExecutionPlan, indent: usize) -> Result<String> {
-    let operator_str =
-        if let Some(exec) = plan.as_any().downcast_ref::<HashAggregateExec>() {
-            format!(
-                "HashAggregateExec: groupBy={:?}, aggrExpr={:?}",
-                exec.group_expr()
-                    .iter()
-                    .map(|e| format_expr(e.0.as_ref()))
-                    .collect::<Vec<String>>(),
-                exec.aggr_expr()
-                    .iter()
-                    .map(|e| format_agg_expr(e.as_ref()))
-                    .collect::<Result<Vec<String>>>()?
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<HashJoinExec>() {
-            format!(
-                "HashJoinExec: joinType={:?}, on={:?}",
-                exec.join_type(),
-                exec.on()
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<ParquetExec>() {
-            let mut num_files = 0;
-            for part in exec.partitions() {
-                num_files += part.filenames().len();
-            }
-            format!(
-                "ParquetExec: partitions={}, files={}",
-                exec.partitions().len(),
-                num_files
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<CsvExec>() {
-            format!(
-                "CsvExec: {}; partitions={}",
-                &exec.path(),
-                exec.output_partitioning().partition_count()
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<FilterExec>() {
-            format!("FilterExec: {}", format_expr(exec.predicate().as_ref()))
-        } else if let Some(exec) = plan.as_any().downcast_ref::<QueryStageExec>() {
-            format!(
-                "QueryStageExec: job={}, stage={}",
-                exec.job_id, exec.stage_id
-            )
-        } else if let Some(exec) = plan.as_any().downcast_ref::<UnresolvedShuffleExec>() {
-            format!("UnresolvedShuffleExec: stages={:?}", exec.query_stage_ids)
-        } else if let Some(exec) = plan.as_any().downcast_ref::<CoalesceBatchesExec>() {
-            format!(
-                "CoalesceBatchesExec: batchSize={}",
-                exec.target_batch_size()
-            )
-        } else if plan.as_any().downcast_ref::<MergeExec>().is_some() {
-            "MergeExec".to_string()
-        } else {
-            let str = format!("{:?}", plan);
-            String::from(&str[0..120])
-        };
-
-    let children_str = plan
-        .children()
-        .iter()
-        .map(|c| format_plan(c.as_ref(), indent + 1))
-        .collect::<Result<Vec<String>>>()?
-        .join("\n");
-
-    let indent_str = "  ".repeat(indent);
-    if plan.children().is_empty() {
-        Ok(format!("{}{}{}", indent_str, &operator_str, children_str))
-    } else {
-        Ok(format!("{}{}\n{}", indent_str, &operator_str, children_str))
-    }
-}
-
-pub fn format_agg_expr(expr: &dyn AggregateExpr) -> Result<String> {
-    Ok(format!(
-        "{} {:?}",
-        expr.field()?.name(),
-        expr.expressions()
-            .iter()
-            .map(|e| format_expr(e.as_ref()))
-            .collect::<Vec<String>>()
-    ))
-}
-
-pub fn format_expr(expr: &dyn PhysicalExpr) -> String {
-    if let Some(e) = expr.as_any().downcast_ref::<Column>() {
-        e.name().to_string()
-    } else if let Some(e) = expr.as_any().downcast_ref::<Literal>() {
-        e.to_string()
-    } else if let Some(e) = expr.as_any().downcast_ref::<BinaryExpr>() {
-        format!("{} {} {}", e.left(), e.op(), e.right())
-    } else {
-        format!("{}", expr)
-    }
-}
-
-pub fn produce_diagram(filename: &str, stages: &[Arc<QueryStageExec>]) -> Result<()> {
-    let write_file = File::create(filename)?;
-    let mut w = BufWriter::new(&write_file);
-    writeln!(w, "digraph G {{")?;
-
-    // draw stages and entities
-    for stage in stages {
-        writeln!(w, "\tsubgraph cluster{} {{", stage.stage_id)?;
-        writeln!(w, "\t\tlabel = \"Stage {}\";", stage.stage_id)?;
-        let mut id = AtomicUsize::new(0);
-        build_exec_plan_diagram(
-            &mut w,
-            stage.child.as_ref(),
-            stage.stage_id,
-            &mut id,
-            true,
-        )?;
-        writeln!(w, "\t}}")?;
-    }
-
-    // draw relationships
-    for stage in stages {
-        let mut id = AtomicUsize::new(0);
-        build_exec_plan_diagram(
-            &mut w,
-            stage.child.as_ref(),
-            stage.stage_id,
-            &mut id,
-            false,
-        )?;
-    }
-
-    write!(w, "}}")?;
-    Ok(())
-}
-
-fn build_exec_plan_diagram(
-    w: &mut BufWriter<&File>,
-    plan: &dyn ExecutionPlan,
-    stage_id: usize,
-    id: &mut AtomicUsize,
-    draw_entity: bool,
-) -> Result<usize> {
-    let operator_str = if plan.as_any().downcast_ref::<HashAggregateExec>().is_some() {
-        "HashAggregateExec"
-    } else if plan.as_any().downcast_ref::<SortExec>().is_some() {
-        "SortExec"
-    } else if plan.as_any().downcast_ref::<ProjectionExec>().is_some() {
-        "ProjectionExec"
-    } else if plan.as_any().downcast_ref::<HashJoinExec>().is_some() {
-        "HashJoinExec"
-    } else if plan.as_any().downcast_ref::<ParquetExec>().is_some() {
-        "ParquetExec"
-    } else if plan.as_any().downcast_ref::<CsvExec>().is_some() {
-        "CsvExec"
-    } else if plan.as_any().downcast_ref::<FilterExec>().is_some() {
-        "FilterExec"
-    } else if plan.as_any().downcast_ref::<QueryStageExec>().is_some() {
-        "QueryStageExec"
-    } else if plan
-        .as_any()
-        .downcast_ref::<UnresolvedShuffleExec>()
-        .is_some()
-    {
-        "UnresolvedShuffleExec"
-    } else if plan
-        .as_any()
-        .downcast_ref::<CoalesceBatchesExec>()
-        .is_some()
-    {
-        "CoalesceBatchesExec"
-    } else if plan.as_any().downcast_ref::<MergeExec>().is_some() {
-        "MergeExec"
-    } else {
-        println!("Unknown: {:?}", plan);
-        "Unknown"
-    };
-
-    let node_id = id.load(Ordering::SeqCst);
-    id.store(node_id + 1, Ordering::SeqCst);
-
-    if draw_entity {
-        writeln!(
-            w,
-            "\t\tstage_{}_exec_{} [shape=box, label=\"{}\"];",
-            stage_id, node_id, operator_str
-        )?;
-    }
-    for child in plan.children() {
-        if let Some(shuffle) = child.as_any().downcast_ref::<UnresolvedShuffleExec>() {
-            if !draw_entity {
-                for y in &shuffle.query_stage_ids {
-                    writeln!(
-                        w,
-                        "\tstage_{}_exec_1 -> stage_{}_exec_{};",
-                        y, stage_id, node_id
-                    )?;
-                }
-            }
-        } else {
-            // relationships within same entity
-            let child_id =
-                build_exec_plan_diagram(w, child.as_ref(), stage_id, id, draw_entity)?;
-            if draw_entity {
-                writeln!(
-                    w,
-                    "\t\tstage_{}_exec_{} -> stage_{}_exec_{};",
-                    stage_id, child_id, stage_id, node_id
-                )?;
-            }
-        }
-    }
-    Ok(node_id)
-}
-
-/// Create a DataFusion context that is compatible with Ballista
-pub fn create_datafusion_context() -> ExecutionContext {
-    // remove Repartition rule because that isn't supported yet
-    let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
-        Arc::new(CoalesceBatches::new()),
-        Arc::new(AddMergeExec::new()),
-    ];
-    let config = ExecutionConfig::new()
-        .with_concurrency(1)
-        .with_repartition_joins(false)
-        .with_physical_optimizer_rules(rules);
-    ExecutionContext::with_config(config)
-}
diff --git a/rust/ballista/rust/executor/Cargo.toml b/rust/ballista/rust/executor/Cargo.toml
deleted file mode 100644
index 6b05b7c7fa9..00000000000
--- a/rust/ballista/rust/executor/Cargo.toml
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista-executor"
-description = "Ballista Distributed Compute - Executor"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-
-[features]
-default = ["snmalloc"]
-snmalloc = ["snmalloc-rs"]
-
-[dependencies]
-anyhow = "1"
-async-trait = "0.1.36"
-ballista-core = { path = "../core" }
-ballista-scheduler = { path = "../scheduler" }
-configure_me = "0.4.0"
-env_logger = "0.8"
-futures = "0.3"
-log = "0.4"
-snmalloc-rs = {version = "0.2", features= ["cache-friendly"], optional = true}
-tempfile = "3"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-tokio-stream = "0.1"
-tonic = "0.4"
-uuid = { version = "0.8", features = ["v4"] }
-
-arrow = { path = "../../../arrow"  }
-arrow-flight = { path = "../../../arrow-flight"  }
-datafusion = { path = "../../../datafusion" }
-
-[dev-dependencies]
-
-[build-dependencies]
-configure_me_codegen = "0.4.0"
-
-[package.metadata.configure_me.bin]
-executor = "executor_config_spec.toml"
-
diff --git a/rust/ballista/rust/executor/README.md b/rust/ballista/rust/executor/README.md
deleted file mode 100644
index c0824e639fd..00000000000
--- a/rust/ballista/rust/executor/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista Executor - Rust
-This crate contains the Ballista Executor. It can be used both as a library or as a binary.
-
-## Run
-
-```bash
-RUST_LOG=info cargo run --release
-...
-[2021-02-11T05:30:13Z INFO  executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/var/folders/y8/fc61kyjd4n53tn444n72rjrm0000gn/T/.tmpv1LjN0", concurrent_tasks: 4 }
-```
-
-By default, the executor will bind to `localhost` and listen on port `50051`.
\ No newline at end of file
diff --git a/rust/ballista/rust/executor/build.rs b/rust/ballista/rust/executor/build.rs
deleted file mode 100644
index 1c9e32b0b89..00000000000
--- a/rust/ballista/rust/executor/build.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate configure_me_codegen;
-
-fn main() -> Result<(), String> {
-    println!("cargo:rerun-if-changed=executor_config_spec.toml");
-    configure_me_codegen::build_script_auto()
-        .map_err(|e| format!("configure_me code generation failed: {}", e))
-}
diff --git a/rust/ballista/rust/executor/examples/example_executor_config.toml b/rust/ballista/rust/executor/examples/example_executor_config.toml
deleted file mode 100644
index 0705016ff30..00000000000
--- a/rust/ballista/rust/executor/examples/example_executor_config.toml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# the default configuration location is "/etc/ballista/scheduler.toml"
-# if you include a specifc conf file using "--config-file = my_config_file.toml"
-# then that file will override environment variables, but not command line arguments
-namespace = "my_name_space"
-bind_host = "1.2.3.4"
\ No newline at end of file
diff --git a/rust/ballista/rust/executor/executor_config_spec.toml b/rust/ballista/rust/executor/executor_config_spec.toml
deleted file mode 100644
index cb47ca06423..00000000000
--- a/rust/ballista/rust/executor/executor_config_spec.toml
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[general]
-name = "Ballista Executor"
-env_prefix = "BALLISTA_EXECUTOR"
-conf_file_param = "config_file"
-
-[[switch]]
-name = "version"
-doc = "Print version of this executable"
-
-[[param]]
-abbr = "n"
-name = "namespace"
-type = "String"
-doc = "Namespace for the ballista cluster that this executor will join. yippee"
-default = "std::string::String::from(\"ballista\")"
-
-[[param]]
-name = "scheduler_host"
-type = "String"
-default = "std::string::String::from(\"localhost\")"
-doc = "Scheduler host"
-
-[[param]]
-name = "scheduler_port"
-type = "u16"
-default = "50050"
-doc = "scheduler port"
-
-[[switch]]
-name = "local"
-doc = "Running in local mode will launch a standalone scheduler inside the executor process. This will create a single-executor cluster, and is useful for development scenarios."
-
-[[param]]
-name = "bind_host"
-type = "String"
-default = "std::string::String::from(\"0.0.0.0\")"
-doc = "Local IP address to bind to."
-
-[[param]]
-name = "external_host"
-type = "String"
-default = "std::string::String::from(\"localhost\")"
-doc = "Host name or IP address to register with scheduler so that other executors can connect to this executor."
-
-[[param]]
-abbr = "p"
-name = "port"
-type = "u16"
-default = "50051"
-doc = "bind port"
-
-[[param]]
-name = "work_dir"
-type = "String"
-doc = "Directory for temporary IPC files"
-
-[[param]]
-abbr = "c"
-name = "concurrent_tasks"
-type = "usize"
-default = "4"
-doc = "Max concurrent tasks."
\ No newline at end of file
diff --git a/rust/ballista/rust/executor/src/collect.rs b/rust/ballista/rust/executor/src/collect.rs
deleted file mode 100644
index a2f9d4c6360..00000000000
--- a/rust/ballista/rust/executor/src/collect.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! The CollectExec operator retrieves results from the cluster and returns them as a single
-//! vector of [RecordBatch].
-
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::{any::Any, pin::Pin};
-
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use datafusion::error::DataFusionError;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
-use datafusion::{error::Result, physical_plan::RecordBatchStream};
-use futures::stream::SelectAll;
-use futures::Stream;
-
-/// The CollectExec operator retrieves results from the cluster and returns them as a single
-/// vector of [RecordBatch].
-#[derive(Debug, Clone)]
-pub struct CollectExec {
-    plan: Arc<dyn ExecutionPlan>,
-}
-
-impl CollectExec {
-    pub fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
-        Self { plan }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CollectExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.plan.schema()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.plan.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        _children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        unimplemented!()
-    }
-
-    async fn execute(
-        &self,
-        partition: usize,
-    ) -> Result<Pin<Box<dyn RecordBatchStream + Send + Sync>>> {
-        assert_eq!(0, partition);
-        let num_partitions = self.plan.output_partitioning().partition_count();
-
-        let mut futures = Vec::with_capacity(num_partitions);
-        for i in 0..num_partitions {
-            futures.push(self.plan.execute(i));
-        }
-
-        let mut streams = Vec::with_capacity(num_partitions);
-        for result in futures::future::join_all(futures).await {
-            match result {
-                Ok(stream) => {
-                    streams.push(stream);
-                }
-                Err(e) => {
-                    return Err(DataFusionError::Execution(format!(
-                        "BallistaError: {:?}",
-                        e
-                    )));
-                }
-            }
-        }
-
-        Ok(Box::pin(MergedRecordBatchStream {
-            schema: self.schema(),
-            select_all: Box::pin(futures::stream::select_all(streams)),
-        }))
-    }
-}
-
-struct MergedRecordBatchStream {
-    schema: SchemaRef,
-    select_all: Pin<Box<SelectAll<SendableRecordBatchStream>>>,
-}
-
-impl Stream for MergedRecordBatchStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.select_all.as_mut().poll_next(cx)
-    }
-}
-
-impl RecordBatchStream for MergedRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/ballista/rust/executor/src/execution_loop.rs b/rust/ballista/rust/executor/src/execution_loop.rs
deleted file mode 100644
index cf641ddcc5c..00000000000
--- a/rust/ballista/rust/executor/src/execution_loop.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryInto;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::mpsc::{Receiver, Sender, TryRecvError};
-use std::{sync::Arc, time::Duration};
-
-use datafusion::physical_plan::ExecutionPlan;
-use log::{debug, error, info, warn};
-use tonic::transport::Channel;
-
-use ballista_core::serde::scheduler::ExecutorMeta;
-use ballista_core::{
-    client::BallistaClient,
-    serde::protobuf::{
-        self, scheduler_grpc_client::SchedulerGrpcClient, task_status, FailedTask,
-        PartitionId, PollWorkParams, PollWorkResult, TaskDefinition, TaskStatus,
-    },
-};
-use protobuf::CompletedTask;
-
-pub async fn poll_loop(
-    mut scheduler: SchedulerGrpcClient<Channel>,
-    executor_client: BallistaClient,
-    executor_meta: ExecutorMeta,
-    concurrent_tasks: usize,
-) {
-    let executor_meta: protobuf::ExecutorMetadata = executor_meta.into();
-    let available_tasks_slots = Arc::new(AtomicUsize::new(concurrent_tasks));
-    let (task_status_sender, mut task_status_receiver) =
-        std::sync::mpsc::channel::<TaskStatus>();
-
-    loop {
-        debug!("Starting registration loop with scheduler");
-
-        let task_status: Vec<TaskStatus> =
-            sample_tasks_status(&mut task_status_receiver).await;
-
-        let poll_work_result: anyhow::Result<
-            tonic::Response<PollWorkResult>,
-            tonic::Status,
-        > = scheduler
-            .poll_work(PollWorkParams {
-                metadata: Some(executor_meta.clone()),
-                can_accept_task: available_tasks_slots.load(Ordering::SeqCst) > 0,
-                task_status,
-            })
-            .await;
-
-        let task_status_sender = task_status_sender.clone();
-
-        match poll_work_result {
-            Ok(result) => {
-                if let Some(task) = result.into_inner().task {
-                    run_received_tasks(
-                        executor_client.clone(),
-                        executor_meta.id.clone(),
-                        available_tasks_slots.clone(),
-                        task_status_sender,
-                        task,
-                    )
-                    .await;
-                }
-            }
-            Err(error) => {
-                warn!("Executor registration failed. If this continues to happen the executor might be marked as dead by the scheduler. Error: {}", error);
-            }
-        }
-
-        tokio::time::sleep(Duration::from_millis(250)).await;
-    }
-}
-
-async fn run_received_tasks(
-    mut executor_client: BallistaClient,
-    executor_id: String,
-    available_tasks_slots: Arc<AtomicUsize>,
-    task_status_sender: Sender<TaskStatus>,
-    task: TaskDefinition,
-) {
-    info!("Received task {:?}", task.task_id.as_ref().unwrap());
-    available_tasks_slots.fetch_sub(1, Ordering::SeqCst);
-    let plan: Arc<dyn ExecutionPlan> = (&task.plan.unwrap()).try_into().unwrap();
-    let task_id = task.task_id.unwrap();
-    // TODO: This is a convoluted way of executing the task. We should move the task
-    // execution code outside of the FlightService (data plane) into the control plane.
-
-    tokio::spawn(async move {
-        let execution_result = executor_client
-            .execute_partition(
-                task_id.job_id.clone(),
-                task_id.stage_id as usize,
-                vec![task_id.partition_id as usize],
-                plan,
-            )
-            .await;
-        info!("DONE WITH TASK: {:?}", execution_result);
-        available_tasks_slots.fetch_add(1, Ordering::SeqCst);
-        let _ = task_status_sender.send(as_task_status(
-            execution_result.map(|_| ()),
-            executor_id,
-            task_id,
-        ));
-    });
-}
-
-fn as_task_status(
-    execution_result: ballista_core::error::Result<()>,
-    executor_id: String,
-    task_id: PartitionId,
-) -> TaskStatus {
-    match execution_result {
-        Ok(_) => {
-            info!("Task {:?} finished", task_id);
-
-            TaskStatus {
-                partition_id: Some(task_id),
-                status: Some(task_status::Status::Completed(CompletedTask {
-                    executor_id,
-                })),
-            }
-        }
-        Err(e) => {
-            let error_msg = e.to_string();
-            info!("Task {:?} failed: {}", task_id, error_msg);
-
-            TaskStatus {
-                partition_id: Some(task_id),
-                status: Some(task_status::Status::Failed(FailedTask {
-                    error: format!("Task failed due to Tokio error: {}", error_msg),
-                })),
-            }
-        }
-    }
-}
-
-async fn sample_tasks_status(
-    task_status_receiver: &mut Receiver<TaskStatus>,
-) -> Vec<TaskStatus> {
-    let mut task_status: Vec<TaskStatus> = vec![];
-
-    loop {
-        match task_status_receiver.try_recv() {
-            anyhow::Result::Ok(status) => {
-                task_status.push(status);
-            }
-            Err(TryRecvError::Empty) => {
-                break;
-            }
-            Err(TryRecvError::Disconnected) => {
-                error!("Task statuses channel disconnected");
-            }
-        }
-    }
-
-    task_status
-}
diff --git a/rust/ballista/rust/executor/src/flight_service.rs b/rust/ballista/rust/executor/src/flight_service.rs
deleted file mode 100644
index 8fff3dbcade..00000000000
--- a/rust/ballista/rust/executor/src/flight_service.rs
+++ /dev/null
@@ -1,374 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementation of the Apache Arrow Flight protocol that wraps an executor.
-
-use std::fs::File;
-use std::path::PathBuf;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::time::Instant;
-
-use crate::BallistaExecutor;
-use ballista_core::error::BallistaError;
-use ballista_core::serde::decode_protobuf;
-use ballista_core::serde::scheduler::{Action as BallistaAction, PartitionStats};
-use ballista_core::utils::{self, format_plan};
-
-use arrow::array::{ArrayRef, StringBuilder};
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::error::ArrowError;
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::IpcWriteOptions;
-use arrow::record_batch::RecordBatch;
-use arrow_flight::{
-    flight_service_server::FlightService, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
-    PutResult, SchemaResult, Ticket,
-};
-use datafusion::error::DataFusionError;
-use futures::{Stream, StreamExt};
-use log::{info, warn};
-use std::io::{Read, Seek};
-use tokio::sync::mpsc::channel;
-use tokio::task::JoinHandle;
-use tokio::{
-    sync::mpsc::{Receiver, Sender},
-    task,
-};
-use tokio_stream::wrappers::ReceiverStream;
-use tonic::{Request, Response, Status, Streaming};
-
-type FlightDataSender = Sender<Result<FlightData, Status>>;
-type FlightDataReceiver = Receiver<Result<FlightData, Status>>;
-
-/// Service implementing the Apache Arrow Flight Protocol
-#[derive(Clone)]
-pub struct BallistaFlightService {
-    executor: Arc<BallistaExecutor>,
-}
-
-impl BallistaFlightService {
-    pub fn new(executor: Arc<BallistaExecutor>) -> Self {
-        Self { executor }
-    }
-}
-
-type BoxedFlightStream<T> =
-    Pin<Box<dyn Stream<Item = Result<T, Status>> + Send + Sync + 'static>>;
-
-#[tonic::async_trait]
-impl FlightService for BallistaFlightService {
-    type DoActionStream = BoxedFlightStream<arrow_flight::Result>;
-    type DoExchangeStream = BoxedFlightStream<FlightData>;
-    type DoGetStream = BoxedFlightStream<FlightData>;
-    type DoPutStream = BoxedFlightStream<PutResult>;
-    type HandshakeStream = BoxedFlightStream<HandshakeResponse>;
-    type ListActionsStream = BoxedFlightStream<ActionType>;
-    type ListFlightsStream = BoxedFlightStream<FlightInfo>;
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        let ticket = request.into_inner();
-        info!("Received do_get request");
-
-        let action =
-            decode_protobuf(&ticket.ticket).map_err(|e| from_ballista_err(&e))?;
-
-        match &action {
-            BallistaAction::ExecutePartition(partition) => {
-                info!(
-                    "ExecutePartition: job={}, stage={}, partition={:?}\n{}",
-                    partition.job_id,
-                    partition.stage_id,
-                    partition.partition_id,
-                    format_plan(partition.plan.as_ref(), 0)
-                        .map_err(|e| from_ballista_err(&e))?
-                );
-
-                let mut tasks: Vec<JoinHandle<Result<_, BallistaError>>> = vec![];
-                for part in partition.partition_id.clone() {
-                    let work_dir = self.executor.config.work_dir.clone();
-                    let partition = partition.clone();
-                    tasks.push(tokio::spawn(async move {
-                        let mut path = PathBuf::from(&work_dir);
-                        path.push(partition.job_id);
-                        path.push(&format!("{}", partition.stage_id));
-                        path.push(&format!("{}", part));
-                        std::fs::create_dir_all(&path)?;
-
-                        path.push("data.arrow");
-                        let path = path.to_str().unwrap();
-                        info!("Writing results to {}", path);
-
-                        let now = Instant::now();
-
-                        // execute the query partition
-                        let mut stream = partition
-                            .plan
-                            .execute(part)
-                            .await
-                            .map_err(|e| from_datafusion_err(&e))?;
-
-                        // stream results to disk
-                        let stats = utils::write_stream_to_disk(&mut stream, &path)
-                            .await
-                            .map_err(|e| from_ballista_err(&e))?;
-
-                        info!(
-                            "Executed partition {} in {} seconds. Statistics: {:?}",
-                            part,
-                            now.elapsed().as_secs(),
-                            stats
-                        );
-
-                        let mut flights: Vec<Result<FlightData, Status>> = vec![];
-                        let options = arrow::ipc::writer::IpcWriteOptions::default();
-
-                        let schema = Arc::new(Schema::new(vec![
-                            Field::new("path", DataType::Utf8, false),
-                            stats.arrow_struct_repr(),
-                        ]));
-
-                        // build result set with summary of the partition execution status
-                        let mut c0 = StringBuilder::new(1);
-                        c0.append_value(&path).unwrap();
-                        let path: ArrayRef = Arc::new(c0.finish());
-
-                        let stats: ArrayRef = stats.to_arrow_arrayref()?;
-                        let results = vec![RecordBatch::try_new(
-                            schema,
-                            vec![path, stats],
-                        )
-                        .unwrap()];
-
-                        let mut batches: Vec<Result<FlightData, Status>> = results
-                            .iter()
-                            .flat_map(|batch| create_flight_iter(batch, &options))
-                            .collect();
-
-                        // append batch vector to schema vector, so that the first message sent is the schema
-                        flights.append(&mut batches);
-
-                        Ok(flights)
-                    }));
-                }
-
-                // wait for all partitions to complete
-                let results = futures::future::join_all(tasks).await;
-
-                // get results
-                let mut flights: Vec<Result<FlightData, Status>> = vec![];
-
-                // add an initial FlightData message that sends schema
-                let options = arrow::ipc::writer::IpcWriteOptions::default();
-                let stats = PartitionStats::default();
-                let schema = Arc::new(Schema::new(vec![
-                    Field::new("path", DataType::Utf8, false),
-                    stats.arrow_struct_repr(),
-                ]));
-                let schema_flight_data =
-                    arrow_flight::utils::flight_data_from_arrow_schema(
-                        schema.as_ref(),
-                        &options,
-                    );
-                flights.push(Ok(schema_flight_data));
-
-                // collect statistics from each executed partition
-                for result in results {
-                    let result = result.map_err(|e| {
-                        Status::internal(format!("Ballista Error: {:?}", e))
-                    })?;
-                    let batches = result.map_err(|e| {
-                        Status::internal(format!("Ballista Error: {:?}", e))
-                    })?;
-                    flights.extend_from_slice(&batches);
-                }
-
-                let output = futures::stream::iter(flights);
-                Ok(Response::new(Box::pin(output) as Self::DoGetStream))
-            }
-            BallistaAction::FetchPartition(partition_id) => {
-                // fetch a partition that was previously executed by this executor
-                info!("FetchPartition {:?}", partition_id);
-
-                let mut path = PathBuf::from(&self.executor.config.work_dir);
-                path.push(&partition_id.job_id);
-                path.push(&format!("{}", partition_id.stage_id));
-                path.push(&format!("{}", partition_id.partition_id));
-                path.push("data.arrow");
-                let path = path.to_str().unwrap();
-
-                info!("FetchPartition {:?} reading {}", partition_id, path);
-                let file = File::open(&path)
-                    .map_err(|e| {
-                        BallistaError::General(format!(
-                            "Failed to open partition file at {}: {:?}",
-                            path, e
-                        ))
-                    })
-                    .map_err(|e| from_ballista_err(&e))?;
-                let reader = FileReader::try_new(file).map_err(|e| from_arrow_err(&e))?;
-
-                let (tx, rx): (FlightDataSender, FlightDataReceiver) = channel(2);
-
-                // Arrow IPC reader does not implement Sync + Send so we need to use a channel
-                // to communicate
-                task::spawn(async move {
-                    if let Err(e) = stream_flight_data(reader, tx).await {
-                        warn!("Error streaming results: {:?}", e);
-                    }
-                });
-
-                Ok(Response::new(
-                    Box::pin(ReceiverStream::new(rx)) as Self::DoGetStream
-                ))
-            }
-        }
-    }
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("get_schema"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented("get_flight_info"))
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("handshake"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("list_flights"))
-    }
-
-    async fn do_put(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        let mut request = request.into_inner();
-
-        while let Some(data) = request.next().await {
-            let _data = data?;
-        }
-
-        Err(Status::unimplemented("do_put"))
-    }
-
-    async fn do_action(
-        &self,
-        request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        let action = request.into_inner();
-
-        let _action =
-            decode_protobuf(&action.body.to_vec()).map_err(|e| from_ballista_err(&e))?;
-
-        Err(Status::unimplemented("do_action"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("list_actions"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("do_exchange"))
-    }
-}
-
-/// Convert a single RecordBatch into an iterator of FlightData (containing
-/// dictionaries and batches)
-fn create_flight_iter(
-    batch: &RecordBatch,
-    options: &IpcWriteOptions,
-) -> Box<dyn Iterator<Item = Result<FlightData, Status>>> {
-    let (flight_dictionaries, flight_batch) =
-        arrow_flight::utils::flight_data_from_arrow_batch(batch, &options);
-    Box::new(
-        flight_dictionaries
-            .into_iter()
-            .chain(std::iter::once(flight_batch))
-            .map(Ok),
-    )
-}
-
-async fn stream_flight_data<T>(
-    reader: FileReader<T>,
-    tx: FlightDataSender,
-) -> Result<(), Status>
-where
-    T: Read + Seek,
-{
-    let options = arrow::ipc::writer::IpcWriteOptions::default();
-    let schema_flight_data = arrow_flight::utils::flight_data_from_arrow_schema(
-        reader.schema().as_ref(),
-        &options,
-    );
-    send_response(&tx, Ok(schema_flight_data)).await?;
-
-    for batch in reader {
-        let batch_flight_data: Vec<_> = batch
-            .map(|b| create_flight_iter(&b, &options).collect())
-            .map_err(|e| from_arrow_err(&e))?;
-        for batch in &batch_flight_data {
-            send_response(&tx, batch.clone()).await?;
-        }
-    }
-    Ok(())
-}
-
-async fn send_response(
-    tx: &FlightDataSender,
-    data: Result<FlightData, Status>,
-) -> Result<(), Status> {
-    tx.send(data)
-        .await
-        .map_err(|e| Status::internal(format!("{:?}", e)))
-}
-
-fn from_arrow_err(e: &ArrowError) -> Status {
-    Status::internal(format!("ArrowError: {:?}", e))
-}
-
-fn from_ballista_err(e: &ballista_core::error::BallistaError) -> Status {
-    Status::internal(format!("Ballista Error: {:?}", e))
-}
-
-fn from_datafusion_err(e: &DataFusionError) -> Status {
-    Status::internal(format!("DataFusion Error: {:?}", e))
-}
diff --git a/rust/ballista/rust/executor/src/lib.rs b/rust/ballista/rust/executor/src/lib.rs
deleted file mode 100644
index 3d7bbaca3f1..00000000000
--- a/rust/ballista/rust/executor/src/lib.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Core executor logic for executing queries and storing results in memory.
-
-pub mod collect;
-pub mod flight_service;
-
-#[derive(Debug, Clone)]
-pub struct ExecutorConfig {
-    pub(crate) host: String,
-    pub(crate) port: u16,
-    /// Directory for temporary files, such as IPC files
-    pub(crate) work_dir: String,
-    pub(crate) concurrent_tasks: usize,
-}
-
-impl ExecutorConfig {
-    pub fn new(host: &str, port: u16, work_dir: &str, concurrent_tasks: usize) -> Self {
-        Self {
-            host: host.to_owned(),
-            port,
-            work_dir: work_dir.to_owned(),
-            concurrent_tasks,
-        }
-    }
-}
-
-#[allow(dead_code)]
-pub struct BallistaExecutor {
-    pub(crate) config: ExecutorConfig,
-}
-
-impl BallistaExecutor {
-    pub fn new(config: ExecutorConfig) -> Self {
-        Self { config }
-    }
-}
diff --git a/rust/ballista/rust/executor/src/main.rs b/rust/ballista/rust/executor/src/main.rs
deleted file mode 100644
index 2718ea3542f..00000000000
--- a/rust/ballista/rust/executor/src/main.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Rust executor binary.
-
-use std::sync::Arc;
-
-use anyhow::{Context, Result};
-use arrow_flight::flight_service_server::FlightServiceServer;
-use futures::future::MaybeDone;
-use log::info;
-use tempfile::TempDir;
-use tonic::transport::Server;
-use uuid::Uuid;
-
-use ballista_core::{
-    client::BallistaClient, serde::protobuf::scheduler_grpc_client::SchedulerGrpcClient,
-};
-use ballista_core::{
-    print_version, serde::protobuf::scheduler_grpc_server::SchedulerGrpcServer,
-    serde::scheduler::ExecutorMeta, BALLISTA_VERSION,
-};
-use ballista_executor::{
-    flight_service::BallistaFlightService, BallistaExecutor, ExecutorConfig,
-};
-use ballista_scheduler::{state::StandaloneClient, SchedulerServer};
-use config::prelude::*;
-
-mod execution_loop;
-
-#[macro_use]
-extern crate configure_me;
-
-#[allow(clippy::all, warnings)]
-mod config {
-    // Ideally we would use the include_config macro from configure_me, but then we cannot use
-    // #[allow(clippy::all)] to silence clippy warnings from the generated code
-    include!(concat!(env!("OUT_DIR"), "/executor_configure_me_config.rs"));
-}
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-
-    // parse command-line arguments
-    let (opt, _remaining_args) =
-        Config::including_optional_config_files(&["/etc/ballista/executor.toml"])
-            .unwrap_or_exit();
-
-    if opt.version {
-        print_version();
-        std::process::exit(0);
-    }
-
-    let namespace = opt.namespace;
-    let external_host = opt.external_host;
-    let bind_host = opt.bind_host;
-    let port = opt.port;
-
-    let addr = format!("{}:{}", bind_host, port);
-    let addr = addr
-        .parse()
-        .with_context(|| format!("Could not parse address: {}", addr))?;
-
-    let scheduler_host = if opt.local {
-        external_host.to_owned()
-    } else {
-        opt.scheduler_host
-    };
-    let scheduler_port = opt.scheduler_port;
-    let scheduler_url = format!("http://{}:{}", scheduler_host, scheduler_port);
-
-    let work_dir = opt.work_dir.unwrap_or(
-        TempDir::new()?
-            .into_path()
-            .into_os_string()
-            .into_string()
-            .unwrap(),
-    );
-    let config =
-        ExecutorConfig::new(&external_host, port, &work_dir, opt.concurrent_tasks);
-    info!("Running with config: {:?}", config);
-
-    let executor_meta = ExecutorMeta {
-        id: Uuid::new_v4().to_string(), // assign this executor a unique ID
-        host: external_host.clone(),
-        port,
-    };
-
-    if opt.local {
-        info!("Running in local mode. Scheduler will be run in-proc");
-        let client = StandaloneClient::try_new_temporary()
-            .context("Could not create standalone config backend")?;
-        let server =
-            SchedulerGrpcServer::new(SchedulerServer::new(Arc::new(client), namespace));
-        let addr = format!("{}:{}", bind_host, scheduler_port);
-        let addr = addr
-            .parse()
-            .with_context(|| format!("Could not parse {}", addr))?;
-        info!(
-            "Ballista v{} Rust Scheduler listening on {:?}",
-            BALLISTA_VERSION, addr
-        );
-        let scheduler_future =
-            tokio::spawn(Server::builder().add_service(server).serve(addr));
-        let mut scheduler_result = futures::future::maybe_done(scheduler_future);
-
-        // Ensure scheduler is ready to receive connections
-        while SchedulerGrpcClient::connect(scheduler_url.clone())
-            .await
-            .is_err()
-        {
-            let scheduler_future = match scheduler_result {
-                MaybeDone::Future(f) => f,
-                MaybeDone::Done(Err(e)) => return Err(e).context("Tokio error"),
-                MaybeDone::Done(Ok(Err(e))) => {
-                    return Err(e).context("Scheduler failed to initialize correctly")
-                }
-                MaybeDone::Done(Ok(Ok(()))) => {
-                    return Err(anyhow::format_err!(
-                        "Scheduler unexpectedly finished successfully"
-                    ))
-                }
-                MaybeDone::Gone => {
-                    panic!("Received Gone from recently created MaybeDone")
-                }
-            };
-            tokio::time::sleep(std::time::Duration::from_millis(100)).await;
-            scheduler_result = futures::future::maybe_done(scheduler_future);
-        }
-    }
-
-    let scheduler = SchedulerGrpcClient::connect(scheduler_url)
-        .await
-        .context("Could not connect to scheduler")?;
-    let executor = Arc::new(BallistaExecutor::new(config));
-    let service = BallistaFlightService::new(executor);
-
-    let server = FlightServiceServer::new(service);
-    info!(
-        "Ballista v{} Rust Executor listening on {:?}",
-        BALLISTA_VERSION, addr
-    );
-    let server_future = tokio::spawn(Server::builder().add_service(server).serve(addr));
-    let client = BallistaClient::try_new(&external_host, port).await?;
-    tokio::spawn(execution_loop::poll_loop(
-        scheduler,
-        client,
-        executor_meta,
-        opt.concurrent_tasks,
-    ));
-
-    server_future
-        .await
-        .context("Tokio error")?
-        .context("Could not start executor server")?;
-    Ok(())
-}
diff --git a/rust/ballista/rust/scheduler/Cargo.toml b/rust/ballista/rust/scheduler/Cargo.toml
deleted file mode 100644
index 71925ee5259..00000000000
--- a/rust/ballista/rust/scheduler/Cargo.toml
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "ballista-scheduler"
-description = "Ballista Distributed Compute - Scheduler"
-license = "Apache-2.0"
-version = "0.4.2-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-edition = "2018"
-
-[features]
-default = ["etcd", "sled"]
-etcd = ["etcd-client"]
-sled = ["sled_package"]
-
-[dependencies]
-anyhow = "1"
-ballista-core = { path = "../core" }
-clap = "2"
-configure_me = "0.4.0"
-env_logger = "0.8"
-etcd-client = { version = "0.6", optional = true }
-futures = "0.3"
-http = "0.2"
-http-body = "0.4"
-hyper = "0.14.4"
-log = "0.4"
-parse_arg = "0.1.3"
-prost = "0.7"
-rand = "0.8"
-serde = {version = "1", features = ["derive"]}
-sled_package = { package = "sled", version = "0.34", optional = true }
-tokio = { version = "1.0", features = ["full"] }
-tonic = "0.4"
-tower = { version = "0.4" }
-warp = "0.3"
-
-arrow = { path = "../../../arrow"  }
-datafusion = { path = "../../../datafusion" }
-
-[dev-dependencies]
-ballista-core = { path = "../core" }
-uuid = { version = "0.8", features = ["v4"] }
-
-[build-dependencies]
-configure_me_codegen = "0.4.0"
-
-[package.metadata.configure_me.bin]
-scheduler = "scheduler_config_spec.toml"
diff --git a/rust/ballista/rust/scheduler/README.md b/rust/ballista/rust/scheduler/README.md
deleted file mode 100644
index d87eec30e23..00000000000
--- a/rust/ballista/rust/scheduler/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista Scheduler
-This crate contains the Ballista Scheduler. It can be used both as a library or as a binary.
-
-## Run
-
-```bash
-$ RUST_LOG=info cargo run --release
-...
-[2021-02-11T05:29:30Z INFO  scheduler] Ballista v0.4.2-SNAPSHOT Scheduler listening on 0.0.0.0:50050
-[2021-02-11T05:30:13Z INFO  ballista::scheduler] Received register_executor request for ExecutorMetadata { id: "6d10f5d2-c8c3-4e0f-afdb-1f6ec9171321", host: "localhost", port: 50051 }
-```
-
-By default, the scheduler will bind to `localhost` and listen on port `50051`.
-
-## Connecting to Scheduler
-Scheduler supports REST model also using content negotiation. 
-For e.x if you want to get list of executors connected to the scheduler, 
-you can do (assuming you use default config)
-
-```bash
-curl --request GET \
-  --url http://localhost:50050/executors \
-  --header 'Accept: application/json'
-```
-
-## Scheduler UI
-A basic ui for the scheduler is in `ui/scheduler` of the ballista repo. 
-It can be started using the following [yarn](https://yarnpkg.com/) command
-
-```bash
-yarn && yarn start
-```
diff --git a/rust/ballista/rust/scheduler/build.rs b/rust/ballista/rust/scheduler/build.rs
deleted file mode 100644
index bae6a3bfe2e..00000000000
--- a/rust/ballista/rust/scheduler/build.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-extern crate configure_me_codegen;
-
-fn main() -> Result<(), String> {
-    println!("cargo:rerun-if-changed=scheduler_config_spec.toml");
-    configure_me_codegen::build_script_auto()
-        .map_err(|e| format!("configure_me code generation failed: {}", e))
-}
diff --git a/rust/ballista/rust/scheduler/scheduler_config_spec.toml b/rust/ballista/rust/scheduler/scheduler_config_spec.toml
deleted file mode 100644
index 560e9a2599b..00000000000
--- a/rust/ballista/rust/scheduler/scheduler_config_spec.toml
+++ /dev/null
@@ -1,60 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[general]
-name = "Ballista Scheduler"
-env_prefix = "BALLISTA_SCHEDULER"
-conf_file_param = "config_file"
-
-[[switch]]
-name = "version"
-doc = "Print version of this executable"
-
-[[param]]
-abbr = "b"
-name = "config_backend"
-type = "ballista_scheduler::ConfigBackend"
-doc = "The configuration backend for the scheduler, see ConfigBackend::variants() for options. Default: Standalone"
-default = "ballista_scheduler::ConfigBackend::Standalone"
-
-[[param]]
-abbr = "n"
-name = "namespace"
-type = "String"
-doc = "Namespace for the ballista cluster that this executor will join. Default: ballista"
-default = "std::string::String::from(\"ballista\")"
-
-[[param]]
-abbr = "e"
-name = "etcd_urls"
-type = "String"
-doc = "etcd urls for use when discovery mode is `etcd`. Default: localhost:2379"
-default = "std::string::String::from(\"localhost:2379\")"
-
-[[param]]
-abbr = "h"
-name = "bind_host"
-type = "String"
-default = "std::string::String::from(\"0.0.0.0\")"
-doc = "Local host name or IP address to bind to. Default: 0.0.0.0"
-
-[[param]]
-abbr = "p"
-name = "port"
-type = "u16"
-default = "50050"
-doc = "bind port. Default: 50050"
\ No newline at end of file
diff --git a/rust/ballista/rust/scheduler/src/api/handlers.rs b/rust/ballista/rust/scheduler/src/api/handlers.rs
deleted file mode 100644
index 7293558d0cc..00000000000
--- a/rust/ballista/rust/scheduler/src/api/handlers.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use crate::SchedulerServer;
-use ballista_core::serde::protobuf::{
-    scheduler_grpc_server::SchedulerGrpc, ExecutorMetadata, GetExecutorMetadataParams,
-    GetExecutorMetadataResult,
-};
-use ballista_core::serde::scheduler::ExecutorMeta;
-use tonic::{Request, Response};
-use warp::Rejection;
-
-#[derive(Debug, serde::Serialize)]
-struct StateResponse {
-    executors: Vec<ExecutorMeta>,
-    started: u128,
-    version: String,
-}
-
-pub(crate) async fn scheduler_state(
-    data_server: SchedulerServer,
-) -> Result<impl warp::Reply, Rejection> {
-    let data: Result<Response<GetExecutorMetadataResult>, tonic::Status> = data_server
-        .get_executors_metadata(Request::new(GetExecutorMetadataParams {}))
-        .await;
-    let metadata: Vec<ExecutorMeta> = match data {
-        Ok(result) => {
-            let res: &GetExecutorMetadataResult = result.get_ref();
-            let vec: &Vec<ExecutorMetadata> = &res.metadata;
-            vec.iter()
-                .map(|v: &ExecutorMetadata| ExecutorMeta {
-                    host: v.host.clone(),
-                    port: v.port as u16,
-                    id: v.id.clone(),
-                })
-                .collect()
-        }
-        Err(_) => vec![],
-    };
-    let response = StateResponse {
-        executors: metadata,
-        started: data_server.start_time,
-        version: data_server.version.clone(),
-    };
-    Ok(warp::reply::json(&response))
-}
diff --git a/rust/ballista/rust/scheduler/src/api/mod.rs b/rust/ballista/rust/scheduler/src/api/mod.rs
deleted file mode 100644
index 45f281a67a3..00000000000
--- a/rust/ballista/rust/scheduler/src/api/mod.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-mod handlers;
-
-use crate::SchedulerServer;
-use anyhow::Result;
-use std::{
-    pin::Pin,
-    task::{Context as TaskContext, Poll},
-};
-use warp::filters::BoxedFilter;
-use warp::{Buf, Filter, Reply};
-
-pub enum EitherBody<A, B> {
-    Left(A),
-    Right(B),
-}
-
-pub type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-pub type HttpBody = dyn http_body::Body<Data = dyn Buf, Error = Error> + 'static;
-
-impl<A, B> http_body::Body for EitherBody<A, B>
-where
-    A: http_body::Body + Send + Unpin,
-    B: http_body::Body<Data = A::Data> + Send + Unpin,
-    A::Error: Into<Error>,
-    B::Error: Into<Error>,
-{
-    type Data = A::Data;
-    type Error = Error;
-
-    fn poll_data(
-        self: Pin<&mut Self>,
-        cx: &mut TaskContext<'_>,
-    ) -> Poll<Option<Result<Self::Data, Self::Error>>> {
-        match self.get_mut() {
-            EitherBody::Left(b) => Pin::new(b).poll_data(cx).map(map_option_err),
-            EitherBody::Right(b) => Pin::new(b).poll_data(cx).map(map_option_err),
-        }
-    }
-
-    fn poll_trailers(
-        self: Pin<&mut Self>,
-        cx: &mut TaskContext<'_>,
-    ) -> Poll<Result<Option<http::HeaderMap>, Self::Error>> {
-        match self.get_mut() {
-            EitherBody::Left(b) => Pin::new(b).poll_trailers(cx).map_err(Into::into),
-            EitherBody::Right(b) => Pin::new(b).poll_trailers(cx).map_err(Into::into),
-        }
-    }
-
-    fn is_end_stream(&self) -> bool {
-        match self {
-            EitherBody::Left(b) => b.is_end_stream(),
-            EitherBody::Right(b) => b.is_end_stream(),
-        }
-    }
-}
-
-fn map_option_err<T, U: Into<Error>>(
-    err: Option<Result<T, U>>,
-) -> Option<Result<T, Error>> {
-    err.map(|e| e.map_err(Into::into))
-}
-
-fn with_data_server(
-    db: SchedulerServer,
-) -> impl Filter<Extract = (SchedulerServer,), Error = std::convert::Infallible> + Clone {
-    warp::any().map(move || db.clone())
-}
-
-pub fn get_routes(scheduler_server: SchedulerServer) -> BoxedFilter<(impl Reply,)> {
-    let routes = warp::path("state")
-        .and(with_data_server(scheduler_server))
-        .and_then(handlers::scheduler_state);
-    routes.boxed()
-}
diff --git a/rust/ballista/rust/scheduler/src/lib.rs b/rust/ballista/rust/scheduler/src/lib.rs
deleted file mode 100644
index a675153897b..00000000000
--- a/rust/ballista/rust/scheduler/src/lib.rs
+++ /dev/null
@@ -1,490 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Support for distributed schedulers, such as Kubernetes
-
-pub mod api;
-pub mod planner;
-pub mod state;
-
-#[cfg(test)]
-pub mod test_utils;
-
-use std::fmt;
-use std::{convert::TryInto, sync::Arc};
-
-use ballista_core::serde::protobuf::{
-    execute_query_params::Query, job_status, scheduler_grpc_server::SchedulerGrpc,
-    ExecuteQueryParams, ExecuteQueryResult, FailedJob, FilePartitionMetadata, FileType,
-    GetExecutorMetadataParams, GetExecutorMetadataResult, GetFileMetadataParams,
-    GetFileMetadataResult, GetJobStatusParams, GetJobStatusResult, JobStatus,
-    PartitionId, PollWorkParams, PollWorkResult, QueuedJob, RunningJob, TaskDefinition,
-    TaskStatus,
-};
-use ballista_core::serde::scheduler::ExecutorMeta;
-
-use clap::arg_enum;
-use datafusion::physical_plan::ExecutionPlan;
-#[cfg(feature = "sled")]
-extern crate sled_package as sled;
-
-// an enum used to configure the backend
-// needs to be visible to code generated by configure_me
-arg_enum! {
-    #[derive(Debug, serde::Deserialize)]
-    pub enum ConfigBackend {
-        Etcd,
-        Standalone
-    }
-}
-
-impl parse_arg::ParseArgFromStr for ConfigBackend {
-    fn describe_type<W: fmt::Write>(mut writer: W) -> fmt::Result {
-        write!(writer, "The configuration backend for the scheduler")
-    }
-}
-
-use crate::planner::DistributedPlanner;
-
-use log::{debug, error, info, warn};
-use rand::{distributions::Alphanumeric, thread_rng, Rng};
-use tonic::{Request, Response};
-
-use self::state::{ConfigBackendClient, SchedulerState};
-use ballista_core::utils::create_datafusion_context;
-use datafusion::physical_plan::parquet::ParquetExec;
-use std::time::{Instant, SystemTime, UNIX_EPOCH};
-
-#[derive(Clone)]
-pub struct SchedulerServer {
-    state: Arc<SchedulerState>,
-    start_time: u128,
-    version: String,
-}
-
-impl SchedulerServer {
-    pub fn new(config: Arc<dyn ConfigBackendClient>, namespace: String) -> Self {
-        const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
-        let state = Arc::new(SchedulerState::new(config, namespace));
-        let state_clone = state.clone();
-
-        // TODO: we should elect a leader in the scheduler cluster and run this only in the leader
-        tokio::spawn(async move { state_clone.synchronize_job_status_loop().await });
-
-        Self {
-            state,
-            start_time: SystemTime::now()
-                .duration_since(UNIX_EPOCH)
-                .unwrap()
-                .as_millis(),
-            version: VERSION.unwrap_or("Unknown").to_string(),
-        }
-    }
-}
-
-#[tonic::async_trait]
-impl SchedulerGrpc for SchedulerServer {
-    async fn get_executors_metadata(
-        &self,
-        _request: Request<GetExecutorMetadataParams>,
-    ) -> std::result::Result<Response<GetExecutorMetadataResult>, tonic::Status> {
-        info!("Received get_executors_metadata request");
-        let result = self
-            .state
-            .get_executors_metadata()
-            .await
-            .map_err(|e| {
-                let msg = format!("Error reading executors metadata: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?
-            .into_iter()
-            .map(|meta| meta.into())
-            .collect();
-        Ok(Response::new(GetExecutorMetadataResult {
-            metadata: result,
-        }))
-    }
-
-    async fn poll_work(
-        &self,
-        request: Request<PollWorkParams>,
-    ) -> std::result::Result<Response<PollWorkResult>, tonic::Status> {
-        if let PollWorkParams {
-            metadata: Some(metadata),
-            can_accept_task,
-            task_status,
-        } = request.into_inner()
-        {
-            debug!("Received poll_work request for {:?}", metadata);
-            let metadata: ExecutorMeta = metadata.into();
-            let mut lock = self.state.lock().await.map_err(|e| {
-                let msg = format!("Could not lock the state: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?;
-            self.state
-                .save_executor_metadata(metadata.clone())
-                .await
-                .map_err(|e| {
-                    let msg = format!("Could not save executor metadata: {}", e);
-                    error!("{}", msg);
-                    tonic::Status::internal(msg)
-                })?;
-            for task_status in task_status {
-                self.state
-                    .save_task_status(&task_status)
-                    .await
-                    .map_err(|e| {
-                        let msg = format!("Could not save task status: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-            }
-            let task = if can_accept_task {
-                let plan = self
-                    .state
-                    .assign_next_schedulable_task(&metadata.id)
-                    .await
-                    .map_err(|e| {
-                        let msg = format!("Error finding next assignable task: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-                if let Some((task, _plan)) = &plan {
-                    let partition_id = task.partition_id.as_ref().unwrap();
-                    info!(
-                        "Sending new task to {}: {}/{}/{}",
-                        metadata.id,
-                        partition_id.job_id,
-                        partition_id.stage_id,
-                        partition_id.partition_id
-                    );
-                }
-                plan.map(|(status, plan)| TaskDefinition {
-                    plan: Some(plan.try_into().unwrap()),
-                    task_id: status.partition_id,
-                })
-            } else {
-                None
-            };
-            lock.unlock().await;
-            Ok(Response::new(PollWorkResult { task }))
-        } else {
-            warn!("Received invalid executor poll_work request");
-            Err(tonic::Status::invalid_argument(
-                "Missing metadata in request",
-            ))
-        }
-    }
-
-    async fn get_file_metadata(
-        &self,
-        request: Request<GetFileMetadataParams>,
-    ) -> std::result::Result<Response<GetFileMetadataResult>, tonic::Status> {
-        let GetFileMetadataParams { path, file_type } = request.into_inner();
-
-        let file_type: FileType = file_type.try_into().map_err(|e| {
-            let msg = format!("Error reading request: {}", e);
-            error!("{}", msg);
-            tonic::Status::internal(msg)
-        })?;
-
-        match file_type {
-            FileType::Parquet => {
-                let parquet_exec =
-                    ParquetExec::try_from_path(&path, None, None, 1024, 1, None)
-                        .map_err(|e| {
-                            let msg = format!("Error opening parquet files: {}", e);
-                            error!("{}", msg);
-                            tonic::Status::internal(msg)
-                        })?;
-
-                //TODO include statistics and any other info needed to reconstruct ParquetExec
-                Ok(Response::new(GetFileMetadataResult {
-                    schema: Some(parquet_exec.schema().as_ref().into()),
-                    partitions: parquet_exec
-                        .partitions()
-                        .iter()
-                        .map(|part| FilePartitionMetadata {
-                            filename: part.filenames().to_vec(),
-                        })
-                        .collect(),
-                }))
-            }
-            //TODO implement for CSV
-            _ => Err(tonic::Status::unimplemented(
-                "get_file_metadata unsupported file type",
-            )),
-        }
-    }
-
-    async fn execute_query(
-        &self,
-        request: Request<ExecuteQueryParams>,
-    ) -> std::result::Result<Response<ExecuteQueryResult>, tonic::Status> {
-        if let ExecuteQueryParams { query: Some(query) } = request.into_inner() {
-            let plan = match query {
-                Query::LogicalPlan(logical_plan) => {
-                    // parse protobuf
-                    (&logical_plan).try_into().map_err(|e| {
-                        let msg = format!("Could not parse logical plan protobuf: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?
-                }
-                Query::Sql(sql) => {
-                    //TODO we can't just create a new context because we need a context that has
-                    // tables registered from previous SQL statements that have been executed
-                    let mut ctx = create_datafusion_context();
-                    let df = ctx.sql(&sql).map_err(|e| {
-                        let msg = format!("Error parsing SQL: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    })?;
-                    df.to_logical_plan()
-                }
-            };
-            debug!("Received plan for execution: {:?}", plan);
-            let executors = self.state.get_executors_metadata().await.map_err(|e| {
-                let msg = format!("Error reading executors metadata: {}", e);
-                error!("{}", msg);
-                tonic::Status::internal(msg)
-            })?;
-            debug!("Found executors: {:?}", executors);
-
-            let job_id: String = {
-                let mut rng = thread_rng();
-                std::iter::repeat(())
-                    .map(|()| rng.sample(Alphanumeric))
-                    .map(char::from)
-                    .take(7)
-                    .collect()
-            };
-
-            // Save placeholder job metadata
-            self.state
-                .save_job_metadata(
-                    &job_id,
-                    &JobStatus {
-                        status: Some(job_status::Status::Queued(QueuedJob {})),
-                    },
-                )
-                .await
-                .map_err(|e| {
-                    tonic::Status::internal(format!("Could not save job metadata: {}", e))
-                })?;
-
-            let state = self.state.clone();
-            let job_id_spawn = job_id.clone();
-            tokio::spawn(async move {
-                // create physical plan using DataFusion
-                let datafusion_ctx = create_datafusion_context();
-                macro_rules! fail_job {
-                    ($code :expr) => {{
-                        match $code {
-                            Err(error) => {
-                                warn!("Job {} failed with {}", job_id_spawn, error);
-                                state
-                                    .save_job_metadata(
-                                        &job_id_spawn,
-                                        &JobStatus {
-                                            status: Some(job_status::Status::Failed(
-                                                FailedJob {
-                                                    error: format!("{}", error),
-                                                },
-                                            )),
-                                        },
-                                    )
-                                    .await
-                                    .unwrap();
-                                return;
-                            }
-                            Ok(value) => value,
-                        }
-                    }};
-                }
-
-                let start = Instant::now();
-
-                let optimized_plan =
-                    fail_job!(datafusion_ctx.optimize(&plan).map_err(|e| {
-                        let msg =
-                            format!("Could not create optimized logical plan: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-
-                debug!("Calculated optimized plan: {:?}", optimized_plan);
-
-                let plan = fail_job!(datafusion_ctx
-                    .create_physical_plan(&optimized_plan)
-                    .map_err(|e| {
-                        let msg = format!("Could not create physical plan: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-
-                info!(
-                    "DataFusion created physical plan in {} milliseconds",
-                    start.elapsed().as_millis(),
-                );
-
-                // create distributed physical plan using Ballista
-                if let Err(e) = state
-                    .save_job_metadata(
-                        &job_id_spawn,
-                        &JobStatus {
-                            status: Some(job_status::Status::Running(RunningJob {})),
-                        },
-                    )
-                    .await
-                {
-                    warn!(
-                        "Could not update job {} status to running: {}",
-                        job_id_spawn, e
-                    );
-                }
-                let mut planner = fail_job!(DistributedPlanner::try_new(executors)
-                    .map_err(|e| {
-                        let msg = format!("Could not create distributed planner: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-                let stages = fail_job!(planner
-                    .plan_query_stages(&job_id_spawn, plan)
-                    .map_err(|e| {
-                        let msg = format!("Could not plan query stages: {}", e);
-                        error!("{}", msg);
-                        tonic::Status::internal(msg)
-                    }));
-
-                // save stages into state
-                for stage in stages {
-                    fail_job!(state
-                        .save_stage_plan(
-                            &job_id_spawn,
-                            stage.stage_id,
-                            stage.child.clone()
-                        )
-                        .await
-                        .map_err(|e| {
-                            let msg = format!("Could not save stage plan: {}", e);
-                            error!("{}", msg);
-                            tonic::Status::internal(msg)
-                        }));
-                    let num_partitions = stage.output_partitioning().partition_count();
-                    for partition_id in 0..num_partitions {
-                        let pending_status = TaskStatus {
-                            partition_id: Some(PartitionId {
-                                job_id: job_id_spawn.clone(),
-                                stage_id: stage.stage_id as u32,
-                                partition_id: partition_id as u32,
-                            }),
-                            status: None,
-                        };
-                        fail_job!(state.save_task_status(&pending_status).await.map_err(
-                            |e| {
-                                let msg = format!("Could not save task status: {}", e);
-                                error!("{}", msg);
-                                tonic::Status::internal(msg)
-                            }
-                        ));
-                    }
-                }
-            });
-
-            Ok(Response::new(ExecuteQueryResult { job_id }))
-        } else {
-            Err(tonic::Status::internal("Error parsing request"))
-        }
-    }
-
-    async fn get_job_status(
-        &self,
-        request: Request<GetJobStatusParams>,
-    ) -> std::result::Result<Response<GetJobStatusResult>, tonic::Status> {
-        let job_id = request.into_inner().job_id;
-        debug!("Received get_job_status request for job {}", job_id);
-        let job_meta = self.state.get_job_metadata(&job_id).await.map_err(|e| {
-            let msg = format!("Error reading job metadata: {}", e);
-            error!("{}", msg);
-            tonic::Status::internal(msg)
-        })?;
-        Ok(Response::new(GetJobStatusResult {
-            status: Some(job_meta),
-        }))
-    }
-}
-
-#[cfg(all(test, feature = "sled"))]
-mod test {
-    use std::sync::Arc;
-
-    use tonic::Request;
-
-    use ballista_core::error::BallistaError;
-    use ballista_core::serde::protobuf::{ExecutorMetadata, PollWorkParams};
-
-    use super::{
-        state::{SchedulerState, StandaloneClient},
-        SchedulerGrpc, SchedulerServer,
-    };
-
-    #[tokio::test]
-    async fn test_poll_work() -> Result<(), BallistaError> {
-        let state = Arc::new(StandaloneClient::try_new_temporary()?);
-        let namespace = "default";
-        let scheduler = SchedulerServer::new(state.clone(), namespace.to_owned());
-        let state = SchedulerState::new(state, namespace.to_string());
-        let exec_meta = ExecutorMetadata {
-            id: "abc".to_owned(),
-            host: "".to_owned(),
-            port: 0,
-        };
-        let request: Request<PollWorkParams> = Request::new(PollWorkParams {
-            metadata: Some(exec_meta.clone()),
-            can_accept_task: false,
-            task_status: vec![],
-        });
-        let response = scheduler
-            .poll_work(request)
-            .await
-            .expect("Received error response")
-            .into_inner();
-        // no response task since we told the scheduler we didn't want to accept one
-        assert!(response.task.is_none());
-        // executor should be registered
-        assert_eq!(state.get_executors_metadata().await.unwrap().len(), 1);
-
-        let request: Request<PollWorkParams> = Request::new(PollWorkParams {
-            metadata: Some(exec_meta.clone()),
-            can_accept_task: true,
-            task_status: vec![],
-        });
-        let response = scheduler
-            .poll_work(request)
-            .await
-            .expect("Received error response")
-            .into_inner();
-        // still no response task since there are no tasks in the scheduelr
-        assert!(response.task.is_none());
-        // executor should be registered
-        assert_eq!(state.get_executors_metadata().await.unwrap().len(), 1);
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/main.rs b/rust/ballista/rust/scheduler/src/main.rs
deleted file mode 100644
index 205023a4c34..00000000000
--- a/rust/ballista/rust/scheduler/src/main.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Ballista Rust scheduler binary.
-
-use anyhow::{Context, Result};
-use futures::future::{self, Either, TryFutureExt};
-use hyper::{service::make_service_fn, Server};
-use std::convert::Infallible;
-use std::{net::SocketAddr, sync::Arc};
-use tonic::transport::Server as TonicServer;
-use tower::Service;
-
-use ballista_core::BALLISTA_VERSION;
-use ballista_core::{
-    print_version, serde::protobuf::scheduler_grpc_server::SchedulerGrpcServer,
-};
-use ballista_scheduler::api::{get_routes, EitherBody, Error};
-#[cfg(feature = "etcd")]
-use ballista_scheduler::state::EtcdClient;
-#[cfg(feature = "sled")]
-use ballista_scheduler::state::StandaloneClient;
-use ballista_scheduler::{state::ConfigBackendClient, ConfigBackend, SchedulerServer};
-
-use log::info;
-
-#[macro_use]
-extern crate configure_me;
-
-#[allow(clippy::all, warnings)]
-mod config {
-    // Ideally we would use the include_config macro from configure_me, but then we cannot use
-    // #[allow(clippy::all)] to silence clippy warnings from the generated code
-    include!(concat!(
-        env!("OUT_DIR"),
-        "/scheduler_configure_me_config.rs"
-    ));
-}
-use config::prelude::*;
-
-async fn start_server(
-    config_backend: Arc<dyn ConfigBackendClient>,
-    namespace: String,
-    addr: SocketAddr,
-) -> Result<()> {
-    info!(
-        "Ballista v{} Scheduler listening on {:?}",
-        BALLISTA_VERSION, addr
-    );
-
-    let scheduler_server =
-        SchedulerServer::new(config_backend.clone(), namespace.clone());
-    Ok(Server::bind(&addr)
-        .serve(make_service_fn(move |_| {
-            let scheduler_grpc_server =
-                SchedulerGrpcServer::new(scheduler_server.clone());
-
-            let mut tonic = TonicServer::builder()
-                .add_service(scheduler_grpc_server)
-                .into_service();
-            let mut warp = warp::service(get_routes(scheduler_server.clone()));
-
-            future::ok::<_, Infallible>(tower::service_fn(
-                move |req: hyper::Request<hyper::Body>| {
-                    let header = req.headers().get(hyper::header::ACCEPT);
-                    if header.is_some() && header.unwrap().eq("application/json") {
-                        return Either::Left(
-                            warp.call(req)
-                                .map_ok(|res| res.map(EitherBody::Left))
-                                .map_err(Error::from),
-                        );
-                    }
-                    Either::Right(
-                        tonic
-                            .call(req)
-                            .map_ok(|res| res.map(EitherBody::Right))
-                            .map_err(Error::from),
-                    )
-                },
-            ))
-        }))
-        .await
-        .context("Could not start grpc server")?)
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-
-    // parse options
-    let (opt, _remaining_args) =
-        Config::including_optional_config_files(&["/etc/ballista/scheduler.toml"])
-            .unwrap_or_exit();
-
-    if opt.version {
-        print_version();
-        std::process::exit(0);
-    }
-
-    let namespace = opt.namespace;
-    let bind_host = opt.bind_host;
-    let port = opt.port;
-
-    let addr = format!("{}:{}", bind_host, port);
-    let addr = addr.parse()?;
-
-    let client: Arc<dyn ConfigBackendClient> = match opt.config_backend {
-        #[cfg(not(any(feature = "sled", feature = "etcd")))]
-        _ => std::compile_error!(
-            "To build the scheduler enable at least one config backend feature (`etcd` or `sled`)"
-        ),
-        #[cfg(feature = "etcd")]
-        ConfigBackend::Etcd => {
-            let etcd = etcd_client::Client::connect(&[opt.etcd_urls], None)
-                .await
-                .context("Could not connect to etcd")?;
-            Arc::new(EtcdClient::new(etcd))
-        }
-        #[cfg(not(feature = "etcd"))]
-        ConfigBackend::Etcd => {
-            unimplemented!(
-                "build the scheduler with the `etcd` feature to use the etcd config backend"
-            )
-        }
-        #[cfg(feature = "sled")]
-        ConfigBackend::Standalone => {
-            // TODO: Use a real file and make path is configurable
-            Arc::new(
-                StandaloneClient::try_new_temporary()
-                    .context("Could not create standalone config backend")?,
-            )
-        }
-        #[cfg(not(feature = "sled"))]
-        ConfigBackend::Standalone => {
-            unimplemented!(
-                "build the scheduler with the `sled` feature to use the standalone config backend"
-            )
-        }
-    };
-    start_server(client, namespace, addr).await?;
-    Ok(())
-}
diff --git a/rust/ballista/rust/scheduler/src/planner.rs b/rust/ballista/rust/scheduler/src/planner.rs
deleted file mode 100644
index e9f668a7d5f..00000000000
--- a/rust/ballista/rust/scheduler/src/planner.rs
+++ /dev/null
@@ -1,494 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Distributed query execution
-//!
-//! This code is EXPERIMENTAL and still under development
-
-use std::pin::Pin;
-use std::sync::Arc;
-use std::time::Instant;
-use std::{collections::HashMap, future::Future};
-
-use ballista_core::client::BallistaClient;
-use ballista_core::datasource::DFTableAdapter;
-use ballista_core::error::{BallistaError, Result};
-use ballista_core::serde::scheduler::ExecutorMeta;
-use ballista_core::serde::scheduler::PartitionId;
-use ballista_core::utils::format_plan;
-use ballista_core::{
-    execution_plans::{QueryStageExec, ShuffleReaderExec, UnresolvedShuffleExec},
-    serde::scheduler::PartitionLocation,
-};
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches;
-use datafusion::physical_optimizer::merge_exec::AddMergeExec;
-use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule;
-use datafusion::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
-use datafusion::physical_plan::hash_join::HashJoinExec;
-use datafusion::physical_plan::merge::MergeExec;
-use datafusion::physical_plan::ExecutionPlan;
-use log::{debug, info};
-use tokio::task::JoinHandle;
-
-type SendableExecutionPlan =
-    Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send>>;
-type PartialQueryStageResult = (Arc<dyn ExecutionPlan>, Vec<Arc<QueryStageExec>>);
-
-pub struct DistributedPlanner {
-    executors: Vec<ExecutorMeta>,
-    next_stage_id: usize,
-}
-
-impl DistributedPlanner {
-    pub fn try_new(executors: Vec<ExecutorMeta>) -> Result<Self> {
-        if executors.is_empty() {
-            Err(BallistaError::General(
-                "DistributedPlanner requires at least one executor".to_owned(),
-            ))
-        } else {
-            Ok(Self {
-                executors,
-                next_stage_id: 0,
-            })
-        }
-    }
-}
-
-impl DistributedPlanner {
-    /// Execute a distributed query against a cluster, leaving the final results on the
-    /// executors. The [ExecutionPlan] returned by this method is guaranteed to be a
-    /// [ShuffleReaderExec] that can be used to fetch the final results from the executors
-    /// in parallel.
-    pub async fn execute_distributed_query(
-        &mut self,
-        job_id: String,
-        execution_plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let now = Instant::now();
-        let execution_plans = self.plan_query_stages(&job_id, execution_plan)?;
-
-        info!(
-            "DistributedPlanner created {} execution plans in {} seconds:",
-            execution_plans.len(),
-            now.elapsed().as_secs()
-        );
-
-        for plan in &execution_plans {
-            info!("{}", format_plan(plan.as_ref(), 0)?);
-        }
-
-        execute(execution_plans, self.executors.clone()).await
-    }
-
-    /// Returns a vector of ExecutionPlans, where the root node is a [QueryStageExec].
-    /// Plans that depend on the input of other plans will have leaf nodes of type [UnresolvedShuffleExec].
-    /// A [QueryStageExec] is created whenever the partitioning changes.
-    ///
-    /// Returns an empty vector if the execution_plan doesn't need to be sliced into several stages.
-    pub fn plan_query_stages(
-        &mut self,
-        job_id: &str,
-        execution_plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Vec<Arc<QueryStageExec>>> {
-        info!("planning query stages");
-        let (new_plan, mut stages) =
-            self.plan_query_stages_internal(job_id, execution_plan)?;
-        stages.push(create_query_stage(
-            job_id.to_string(),
-            self.next_stage_id(),
-            new_plan,
-        )?);
-        Ok(stages)
-    }
-
-    /// Returns a potentially modified version of the input execution_plan along with the resulting query stages.
-    /// This function is needed because the input execution_plan might need to be modified, but it might not hold a
-    /// compelte query stage (its parent might also belong to the same stage)
-    fn plan_query_stages_internal(
-        &mut self,
-        job_id: &str,
-        execution_plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<PartialQueryStageResult> {
-        // recurse down and replace children
-        if execution_plan.children().is_empty() {
-            return Ok((execution_plan, vec![]));
-        }
-
-        let mut stages = vec![];
-        let mut children = vec![];
-        for child in execution_plan.children() {
-            let (new_child, mut child_stages) =
-                self.plan_query_stages_internal(job_id, child.clone())?;
-            children.push(new_child);
-            stages.append(&mut child_stages);
-        }
-
-        if let Some(adapter) = execution_plan.as_any().downcast_ref::<DFTableAdapter>() {
-            // remove Repartition rule because that isn't supported yet
-            let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
-                Arc::new(CoalesceBatches::new()),
-                Arc::new(AddMergeExec::new()),
-            ];
-            let config = ExecutionConfig::new().with_physical_optimizer_rules(rules);
-            let ctx = ExecutionContext::with_config(config);
-            Ok((ctx.create_physical_plan(&adapter.logical_plan)?, stages))
-        } else if let Some(merge) = execution_plan.as_any().downcast_ref::<MergeExec>() {
-            let query_stage = create_query_stage(
-                job_id.to_string(),
-                self.next_stage_id(),
-                merge.children()[0].clone(),
-            )?;
-            let unresolved_shuffle = Arc::new(UnresolvedShuffleExec::new(
-                vec![query_stage.stage_id],
-                query_stage.schema(),
-                query_stage.output_partitioning().partition_count(),
-            ));
-            stages.push(query_stage);
-            Ok((merge.with_new_children(vec![unresolved_shuffle])?, stages))
-        } else if let Some(agg) =
-            execution_plan.as_any().downcast_ref::<HashAggregateExec>()
-        {
-            //TODO should insert query stages in more generic way based on partitioning metadata
-            // and not specifically for this operator
-            match agg.mode() {
-                AggregateMode::Final => {
-                    let mut new_children: Vec<Arc<dyn ExecutionPlan>> = vec![];
-                    for child in &children {
-                        let new_stage = create_query_stage(
-                            job_id.to_string(),
-                            self.next_stage_id(),
-                            child.clone(),
-                        )?;
-                        new_children.push(Arc::new(UnresolvedShuffleExec::new(
-                            vec![new_stage.stage_id],
-                            new_stage.schema().clone(),
-                            new_stage.output_partitioning().partition_count(),
-                        )));
-                        stages.push(new_stage);
-                    }
-                    Ok((agg.with_new_children(new_children)?, stages))
-                }
-                AggregateMode::Partial => Ok((agg.with_new_children(children)?, stages)),
-            }
-        } else if let Some(join) = execution_plan.as_any().downcast_ref::<HashJoinExec>()
-        {
-            Ok((join.with_new_children(children)?, stages))
-        } else {
-            // TODO check for compatible partitioning schema, not just count
-            if execution_plan.output_partitioning().partition_count()
-                != children[0].output_partitioning().partition_count()
-            {
-                let mut new_children: Vec<Arc<dyn ExecutionPlan>> = vec![];
-                for child in &children {
-                    let new_stage = create_query_stage(
-                        job_id.to_string(),
-                        self.next_stage_id(),
-                        child.clone(),
-                    )?;
-                    new_children.push(Arc::new(UnresolvedShuffleExec::new(
-                        vec![new_stage.stage_id],
-                        new_stage.schema().clone(),
-                        new_stage.output_partitioning().partition_count(),
-                    )));
-                    stages.push(new_stage);
-                }
-                Ok((execution_plan.with_new_children(new_children)?, stages))
-            } else {
-                Ok((execution_plan.with_new_children(children)?, stages))
-            }
-        }
-    }
-
-    /// Generate a new stage ID
-    fn next_stage_id(&mut self) -> usize {
-        self.next_stage_id += 1;
-        self.next_stage_id
-    }
-}
-
-fn execute(
-    stages: Vec<Arc<QueryStageExec>>,
-    executors: Vec<ExecutorMeta>,
-) -> SendableExecutionPlan {
-    Box::pin(async move {
-        let mut partition_locations: HashMap<usize, Vec<PartitionLocation>> =
-            HashMap::new();
-        let mut result_partition_locations = vec![];
-        for stage in &stages {
-            debug!("execute() {}", &format!("{:?}", stage)[0..60]);
-            let stage = remove_unresolved_shuffles(stage.as_ref(), &partition_locations)?;
-            let stage = stage.as_any().downcast_ref::<QueryStageExec>().unwrap();
-            result_partition_locations = execute_query_stage(
-                &stage.job_id.clone(),
-                stage.stage_id,
-                stage.children()[0].clone(),
-                executors.clone(),
-            )
-            .await?;
-            partition_locations
-                .insert(stage.stage_id, result_partition_locations.clone());
-        }
-
-        let shuffle_reader: Arc<dyn ExecutionPlan> =
-            Arc::new(ShuffleReaderExec::try_new(
-                result_partition_locations,
-                stages.last().unwrap().schema(),
-            )?);
-        Ok(shuffle_reader)
-    })
-}
-
-pub fn remove_unresolved_shuffles(
-    stage: &dyn ExecutionPlan,
-    partition_locations: &HashMap<usize, Vec<PartitionLocation>>,
-) -> Result<Arc<dyn ExecutionPlan>> {
-    let mut new_children: Vec<Arc<dyn ExecutionPlan>> = vec![];
-    for child in stage.children() {
-        if let Some(unresolved_shuffle) =
-            child.as_any().downcast_ref::<UnresolvedShuffleExec>()
-        {
-            let mut relevant_locations = vec![];
-            for id in &unresolved_shuffle.query_stage_ids {
-                relevant_locations.append(
-                    &mut partition_locations
-                        .get(id)
-                        .ok_or_else(|| {
-                            BallistaError::General(
-                                "Missing partition location. Could not remove unresolved shuffles"
-                                    .to_owned(),
-                            )
-                        })?
-                        .clone(),
-                );
-            }
-            new_children.push(Arc::new(ShuffleReaderExec::try_new(
-                relevant_locations,
-                unresolved_shuffle.schema().clone(),
-            )?))
-        } else {
-            new_children.push(remove_unresolved_shuffles(
-                child.as_ref(),
-                partition_locations,
-            )?);
-        }
-    }
-    Ok(stage.with_new_children(new_children)?)
-}
-
-fn create_query_stage(
-    job_id: String,
-    stage_id: usize,
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Arc<QueryStageExec>> {
-    Ok(Arc::new(QueryStageExec::try_new(job_id, stage_id, plan)?))
-}
-
-/// Execute a query stage by sending each partition to an executor
-async fn execute_query_stage(
-    job_id: &str,
-    stage_id: usize,
-    plan: Arc<dyn ExecutionPlan>,
-    executors: Vec<ExecutorMeta>,
-) -> Result<Vec<PartitionLocation>> {
-    info!(
-        "execute_query_stage() stage_id={}\n{}",
-        stage_id,
-        format_plan(plan.as_ref(), 0)?
-    );
-
-    let partition_count = plan.output_partitioning().partition_count();
-
-    let num_chunks = partition_count / executors.len();
-    let num_chunks = num_chunks.max(1);
-    let partition_chunks: Vec<Vec<usize>> = (0..partition_count)
-        .collect::<Vec<usize>>()
-        .chunks(num_chunks)
-        .map(|r| r.to_vec())
-        .collect();
-
-    info!(
-        "Executing query stage with {} chunks of partition ranges",
-        partition_chunks.len()
-    );
-
-    let mut executions: Vec<JoinHandle<Result<Vec<PartitionLocation>>>> =
-        Vec::with_capacity(partition_count);
-    for i in 0..partition_chunks.len() {
-        let plan = plan.clone();
-        let executor_meta = executors[i % executors.len()].clone();
-        let partition_ids = partition_chunks[i].to_vec();
-        let job_id = job_id.to_owned();
-        executions.push(tokio::spawn(async move {
-            let mut client =
-                BallistaClient::try_new(&executor_meta.host, executor_meta.port).await?;
-            let stats = client
-                .execute_partition(job_id.clone(), stage_id, partition_ids.clone(), plan)
-                .await?;
-
-            Ok(partition_ids
-                .iter()
-                .map(|part| PartitionLocation {
-                    partition_id: PartitionId::new(&job_id, stage_id, *part),
-                    executor_meta: executor_meta.clone(),
-                    partition_stats: *stats[*part].statistics(),
-                })
-                .collect())
-        }));
-    }
-
-    // wait for all partitions to complete
-    let results = futures::future::join_all(executions).await;
-
-    // check for errors
-    let mut meta = Vec::with_capacity(partition_count);
-    for result in results {
-        match result {
-            Ok(partition_result) => {
-                let final_result = partition_result?;
-                debug!("Query stage partition result: {:?}", final_result);
-                meta.extend(final_result);
-            }
-            Err(e) => {
-                return Err(BallistaError::General(format!(
-                    "Query stage {} failed: {:?}",
-                    stage_id, e
-                )))
-            }
-        }
-    }
-
-    debug!(
-        "execute_query_stage() stage_id={} produced {:?}",
-        stage_id, meta
-    );
-
-    Ok(meta)
-}
-
-#[cfg(test)]
-mod test {
-    use crate::planner::DistributedPlanner;
-    use crate::test_utils::datafusion_test_context;
-    use ballista_core::error::BallistaError;
-    use ballista_core::execution_plans::UnresolvedShuffleExec;
-    use ballista_core::serde::protobuf;
-    use ballista_core::serde::scheduler::ExecutorMeta;
-    use ballista_core::utils::format_plan;
-    use datafusion::physical_plan::hash_aggregate::HashAggregateExec;
-    use datafusion::physical_plan::merge::MergeExec;
-    use datafusion::physical_plan::projection::ProjectionExec;
-    use datafusion::physical_plan::sort::SortExec;
-    use datafusion::physical_plan::ExecutionPlan;
-    use std::convert::TryInto;
-    use std::sync::Arc;
-    use uuid::Uuid;
-
-    macro_rules! downcast_exec {
-        ($exec: expr, $ty: ty) => {
-            $exec.as_any().downcast_ref::<$ty>().unwrap()
-        };
-    }
-
-    #[test]
-    fn test() -> Result<(), BallistaError> {
-        let mut ctx = datafusion_test_context("testdata")?;
-
-        // simplified form of TPC-H query 1
-        let df = ctx.sql(
-            "select l_returnflag, sum(l_extendedprice * 1) as sum_disc_price
-            from lineitem
-            group by l_returnflag
-            order by l_returnflag",
-        )?;
-
-        let plan = df.to_logical_plan();
-        let plan = ctx.optimize(&plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-
-        let mut planner = DistributedPlanner::try_new(vec![ExecutorMeta {
-            id: "".to_string(),
-            host: "".to_string(),
-            port: 0,
-        }])?;
-        let job_uuid = Uuid::new_v4();
-        let stages = planner.plan_query_stages(&job_uuid.to_string(), plan)?;
-        for stage in &stages {
-            println!("{}", format_plan(stage.as_ref(), 0)?);
-        }
-
-        /* Expected result:
-        QueryStageExec: job=f011432e-e424-4016-915d-e3d8b84f6dbd, stage=1
-         HashAggregateExec: groupBy=["l_returnflag"], aggrExpr=["SUM(l_extendedprice Multiply Int64(1)) [\"l_extendedprice * CAST(1 AS Float64)\"]"]
-          CsvExec: testdata/lineitem; partitions=2
-
-        QueryStageExec: job=f011432e-e424-4016-915d-e3d8b84f6dbd, stage=2
-         MergeExec
-          UnresolvedShuffleExec: stages=[1]
-
-        QueryStageExec: job=f011432e-e424-4016-915d-e3d8b84f6dbd, stage=3
-         SortExec { input: ProjectionExec { expr: [(Column { name: "l_returnflag" }, "l_returnflag"), (Column { name: "SUM(l_ext
-          ProjectionExec { expr: [(Column { name: "l_returnflag" }, "l_returnflag"), (Column { name: "SUM(l_extendedprice Multip
-           HashAggregateExec: groupBy=["l_returnflag"], aggrExpr=["SUM(l_extendedprice Multiply Int64(1)) [\"l_extendedprice * CAST(1 AS Float64)\"]"]
-            UnresolvedShuffleExec: stages=[2]
-        */
-
-        let sort = stages[2].children()[0].clone();
-        let sort = downcast_exec!(sort, SortExec);
-
-        let projection = sort.children()[0].clone();
-        println!("{:?}", projection);
-        let projection = downcast_exec!(projection, ProjectionExec);
-
-        let final_hash = projection.children()[0].clone();
-        let final_hash = downcast_exec!(final_hash, HashAggregateExec);
-
-        let unresolved_shuffle = final_hash.children()[0].clone();
-        let unresolved_shuffle =
-            downcast_exec!(unresolved_shuffle, UnresolvedShuffleExec);
-        assert_eq!(unresolved_shuffle.query_stage_ids, vec![2]);
-
-        let merge_exec = stages[1].children()[0].clone();
-        let merge_exec = downcast_exec!(merge_exec, MergeExec);
-
-        let unresolved_shuffle = merge_exec.children()[0].clone();
-        let unresolved_shuffle =
-            downcast_exec!(unresolved_shuffle, UnresolvedShuffleExec);
-        assert_eq!(unresolved_shuffle.query_stage_ids, vec![1]);
-
-        let partial_hash = stages[0].children()[0].clone();
-        let partial_hash_serde = roundtrip_operator(partial_hash.clone())?;
-
-        let partial_hash = downcast_exec!(partial_hash, HashAggregateExec);
-        let partial_hash_serde = downcast_exec!(partial_hash_serde, HashAggregateExec);
-
-        assert_eq!(
-            format!("{:?}", partial_hash),
-            format!("{:?}", partial_hash_serde)
-        );
-
-        Ok(())
-    }
-
-    fn roundtrip_operator(
-        plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<Arc<dyn ExecutionPlan>, BallistaError> {
-        let proto: protobuf::PhysicalPlanNode = plan.clone().try_into()?;
-        let result_exec_plan: Arc<dyn ExecutionPlan> = (&proto).try_into()?;
-        Ok(result_exec_plan)
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/state/etcd.rs b/rust/ballista/rust/scheduler/src/state/etcd.rs
deleted file mode 100644
index 807477d8699..00000000000
--- a/rust/ballista/rust/scheduler/src/state/etcd.rs
+++ /dev/null
@@ -1,205 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Etcd config backend.
-
-use std::{task::Poll, time::Duration};
-
-use crate::state::ConfigBackendClient;
-use ballista_core::error::{ballista_error, Result};
-
-use etcd_client::{
-    GetOptions, LockResponse, PutOptions, WatchOptions, WatchStream, Watcher,
-};
-use futures::{Stream, StreamExt};
-use log::warn;
-
-use super::{Lock, Watch, WatchEvent};
-
-/// A [`ConfigBackendClient`] implementation that uses etcd to save cluster configuration.
-#[derive(Clone)]
-pub struct EtcdClient {
-    etcd: etcd_client::Client,
-}
-
-impl EtcdClient {
-    pub fn new(etcd: etcd_client::Client) -> Self {
-        Self { etcd }
-    }
-}
-
-#[tonic::async_trait]
-impl ConfigBackendClient for EtcdClient {
-    async fn get(&self, key: &str) -> Result<Vec<u8>> {
-        Ok(self
-            .etcd
-            .clone()
-            .get(key, None)
-            .await
-            .map_err(|e| ballista_error(&format!("etcd error {:?}", e)))?
-            .kvs()
-            .get(0)
-            .map(|kv| kv.value().to_owned())
-            .unwrap_or_default())
-    }
-
-    async fn get_from_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>> {
-        Ok(self
-            .etcd
-            .clone()
-            .get(prefix, Some(GetOptions::new().with_prefix()))
-            .await
-            .map_err(|e| ballista_error(&format!("etcd error {:?}", e)))?
-            .kvs()
-            .iter()
-            .map(|kv| (kv.key_str().unwrap().to_owned(), kv.value().to_owned()))
-            .collect())
-    }
-
-    async fn put(
-        &self,
-        key: String,
-        value: Vec<u8>,
-        lease_time: Option<Duration>,
-    ) -> Result<()> {
-        let mut etcd = self.etcd.clone();
-        let put_options = if let Some(lease_time) = lease_time {
-            etcd.lease_grant(lease_time.as_secs() as i64, None)
-                .await
-                .map(|lease| Some(PutOptions::new().with_lease(lease.id())))
-                .map_err(|e| {
-                    warn!("etcd lease grant failed: {:?}", e.to_string());
-                    ballista_error("etcd lease grant failed")
-                })?
-        } else {
-            None
-        };
-        etcd.put(key.clone(), value.clone(), put_options)
-            .await
-            .map_err(|e| {
-                warn!("etcd put failed: {}", e);
-                ballista_error("etcd put failed")
-            })
-            .map(|_| ())
-    }
-
-    async fn lock(&self) -> Result<Box<dyn Lock>> {
-        let mut etcd = self.etcd.clone();
-        let lock = etcd
-            .lock("/ballista_global_lock", None)
-            .await
-            .map_err(|e| {
-                warn!("etcd lock failed: {}", e);
-                ballista_error("etcd lock failed")
-            })?;
-        Ok(Box::new(EtcdLockGuard { etcd, lock }))
-    }
-
-    async fn watch(&self, prefix: String) -> Result<Box<dyn Watch>> {
-        let mut etcd = self.etcd.clone();
-        let options = WatchOptions::new().with_prefix();
-        let (watcher, stream) = etcd.watch(prefix, Some(options)).await.map_err(|e| {
-            warn!("etcd watch failed: {}", e);
-            ballista_error("etcd watch failed")
-        })?;
-        Ok(Box::new(EtcdWatch {
-            watcher,
-            stream,
-            buffered_events: Vec::new(),
-        }))
-    }
-}
-
-struct EtcdWatch {
-    watcher: Watcher,
-    stream: WatchStream,
-    buffered_events: Vec<WatchEvent>,
-}
-
-#[tonic::async_trait]
-impl Watch for EtcdWatch {
-    async fn cancel(&mut self) -> Result<()> {
-        self.watcher.cancel().await.map_err(|e| {
-            warn!("etcd watch cancel failed: {}", e);
-            ballista_error("etcd watch cancel failed")
-        })
-    }
-}
-
-impl Stream for EtcdWatch {
-    type Item = WatchEvent;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        let self_mut = self.get_mut();
-        if let Some(event) = self_mut.buffered_events.pop() {
-            Poll::Ready(Some(event))
-        } else {
-            loop {
-                match self_mut.stream.poll_next_unpin(cx) {
-                    Poll::Ready(Some(Err(e))) => {
-                        warn!("Error when watching etcd prefix: {}", e);
-                        continue;
-                    }
-                    Poll::Ready(Some(Ok(v))) => {
-                        self_mut.buffered_events.extend(v.events().iter().map(|ev| {
-                            match ev.event_type() {
-                                etcd_client::EventType::Put => {
-                                    let kv = ev.kv().unwrap();
-                                    WatchEvent::Put(
-                                        kv.key_str().unwrap().to_string(),
-                                        kv.value().to_owned(),
-                                    )
-                                }
-                                etcd_client::EventType::Delete => {
-                                    let kv = ev.kv().unwrap();
-                                    WatchEvent::Delete(kv.key_str().unwrap().to_string())
-                                }
-                            }
-                        }));
-                        if let Some(event) = self_mut.buffered_events.pop() {
-                            return Poll::Ready(Some(event));
-                        } else {
-                            continue;
-                        }
-                    }
-                    Poll::Ready(None) => return Poll::Ready(None),
-                    Poll::Pending => return Poll::Pending,
-                }
-            }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.stream.size_hint()
-    }
-}
-
-struct EtcdLockGuard {
-    etcd: etcd_client::Client,
-    lock: LockResponse,
-}
-
-// Cannot use Drop because we need this to be async
-#[tonic::async_trait]
-impl Lock for EtcdLockGuard {
-    async fn unlock(&mut self) {
-        self.etcd.unlock(self.lock.key()).await.unwrap();
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/state/mod.rs b/rust/ballista/rust/scheduler/src/state/mod.rs
deleted file mode 100644
index a15efd618ff..00000000000
--- a/rust/ballista/rust/scheduler/src/state/mod.rs
+++ /dev/null
@@ -1,880 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{
-    any::type_name, collections::HashMap, convert::TryInto, sync::Arc, time::Duration,
-};
-
-use datafusion::physical_plan::ExecutionPlan;
-use futures::{Stream, StreamExt};
-use log::{debug, error, info};
-use prost::Message;
-use tokio::sync::OwnedMutexGuard;
-
-use ballista_core::serde::protobuf::{
-    job_status, task_status, CompletedJob, CompletedTask, ExecutorMetadata, FailedJob,
-    FailedTask, JobStatus, PhysicalPlanNode, RunningJob, RunningTask, TaskStatus,
-};
-use ballista_core::serde::scheduler::PartitionStats;
-use ballista_core::{error::BallistaError, serde::scheduler::ExecutorMeta};
-use ballista_core::{
-    error::Result, execution_plans::UnresolvedShuffleExec,
-    serde::protobuf::PartitionLocation,
-};
-
-use super::planner::remove_unresolved_shuffles;
-
-#[cfg(feature = "etcd")]
-mod etcd;
-#[cfg(feature = "sled")]
-mod standalone;
-
-#[cfg(feature = "etcd")]
-pub use etcd::EtcdClient;
-#[cfg(feature = "sled")]
-pub use standalone::StandaloneClient;
-
-const LEASE_TIME: Duration = Duration::from_secs(60);
-
-/// A trait that contains the necessary methods to save and retrieve the state and configuration of a cluster.
-#[tonic::async_trait]
-pub trait ConfigBackendClient: Send + Sync {
-    /// Retrieve the data associated with a specific key.
-    ///
-    /// An empty vec is returned if the key does not exist.
-    async fn get(&self, key: &str) -> Result<Vec<u8>>;
-
-    /// Retrieve all data associated with a specific key.
-    async fn get_from_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>>;
-
-    /// Saves the value into the provided key, overriding any previous data that might have been associated to that key.
-    async fn put(
-        &self,
-        key: String,
-        value: Vec<u8>,
-        lease_time: Option<Duration>,
-    ) -> Result<()>;
-
-    async fn lock(&self) -> Result<Box<dyn Lock>>;
-
-    /// Watch all events that happen on a specific prefix.
-    async fn watch(&self, prefix: String) -> Result<Box<dyn Watch>>;
-}
-
-/// A Watch is a cancelable stream of put or delete events in the [ConfigBackendClient]
-#[tonic::async_trait]
-pub trait Watch: Stream<Item = WatchEvent> + Send + Unpin {
-    async fn cancel(&mut self) -> Result<()>;
-}
-
-#[derive(Debug, PartialEq)]
-pub enum WatchEvent {
-    /// Contains the inserted or updated key and the new value
-    Put(String, Vec<u8>),
-
-    /// Contains the deleted key
-    Delete(String),
-}
-
-#[derive(Clone)]
-pub(super) struct SchedulerState {
-    config_client: Arc<dyn ConfigBackendClient>,
-    namespace: String,
-}
-
-impl SchedulerState {
-    pub fn new(config_client: Arc<dyn ConfigBackendClient>, namespace: String) -> Self {
-        Self {
-            config_client,
-            namespace,
-        }
-    }
-
-    pub async fn get_executors_metadata(&self) -> Result<Vec<ExecutorMeta>> {
-        let mut result = vec![];
-
-        let entries = self
-            .config_client
-            .get_from_prefix(&get_executors_prefix(&self.namespace))
-            .await?;
-        for (_key, entry) in entries {
-            let meta: ExecutorMetadata = decode_protobuf(&entry)?;
-            result.push(meta.into());
-        }
-        Ok(result)
-    }
-
-    pub async fn save_executor_metadata(&self, meta: ExecutorMeta) -> Result<()> {
-        let key = get_executor_key(&self.namespace, &meta.id);
-        let meta: ExecutorMetadata = meta.into();
-        let value: Vec<u8> = encode_protobuf(&meta)?;
-        self.config_client.put(key, value, Some(LEASE_TIME)).await
-    }
-
-    pub async fn save_job_metadata(
-        &self,
-        job_id: &str,
-        status: &JobStatus,
-    ) -> Result<()> {
-        debug!("Saving job metadata: {:?}", status);
-        let key = get_job_key(&self.namespace, job_id);
-        let value = encode_protobuf(status)?;
-        self.config_client.put(key, value, None).await
-    }
-
-    pub async fn get_job_metadata(&self, job_id: &str) -> Result<JobStatus> {
-        let key = get_job_key(&self.namespace, job_id);
-        let value = &self.config_client.get(&key).await?;
-        if value.is_empty() {
-            return Err(BallistaError::General(format!(
-                "No job metadata found for {}",
-                key
-            )));
-        }
-        let value: JobStatus = decode_protobuf(value)?;
-        Ok(value)
-    }
-
-    pub async fn save_task_status(&self, status: &TaskStatus) -> Result<()> {
-        let partition_id = status.partition_id.as_ref().unwrap();
-        let key = get_task_status_key(
-            &self.namespace,
-            &partition_id.job_id,
-            partition_id.stage_id as usize,
-            partition_id.partition_id as usize,
-        );
-        let value = encode_protobuf(status)?;
-        self.config_client.put(key, value, None).await
-    }
-
-    pub async fn _get_task_status(
-        &self,
-        job_id: &str,
-        stage_id: usize,
-        partition_id: usize,
-    ) -> Result<TaskStatus> {
-        let key = get_task_status_key(&self.namespace, job_id, stage_id, partition_id);
-        let value = &self.config_client.clone().get(&key).await?;
-        if value.is_empty() {
-            return Err(BallistaError::General(format!(
-                "No task status found for {}",
-                key
-            )));
-        }
-        let value: TaskStatus = decode_protobuf(value)?;
-        Ok(value)
-    }
-
-    // "Unnecessary" lifetime syntax due to https://github.com/rust-lang/rust/issues/63033
-    pub async fn save_stage_plan<'a>(
-        &'a self,
-        job_id: &'a str,
-        stage_id: usize,
-        plan: Arc<dyn ExecutionPlan>,
-    ) -> Result<()> {
-        let key = get_stage_plan_key(&self.namespace, job_id, stage_id);
-        let value = {
-            let proto: PhysicalPlanNode = plan.try_into()?;
-            encode_protobuf(&proto)?
-        };
-        self.config_client.clone().put(key, value, None).await
-    }
-
-    pub async fn get_stage_plan(
-        &self,
-        job_id: &str,
-        stage_id: usize,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let key = get_stage_plan_key(&self.namespace, job_id, stage_id);
-        let value = &self.config_client.get(&key).await?;
-        if value.is_empty() {
-            return Err(BallistaError::General(format!(
-                "No stage plan found for {}",
-                key
-            )));
-        }
-        let value: PhysicalPlanNode = decode_protobuf(value)?;
-        Ok((&value).try_into()?)
-    }
-
-    pub async fn assign_next_schedulable_task(
-        &self,
-        executor_id: &str,
-    ) -> Result<Option<(TaskStatus, Arc<dyn ExecutionPlan>)>> {
-        let kvs: HashMap<String, Vec<u8>> = self
-            .config_client
-            .get_from_prefix(&get_task_prefix(&self.namespace))
-            .await?
-            .into_iter()
-            .collect();
-        let executors = self.get_executors_metadata().await?;
-        'tasks: for (_key, value) in kvs.iter() {
-            let mut status: TaskStatus = decode_protobuf(&value)?;
-            if status.status.is_none() {
-                let partition = status.partition_id.as_ref().unwrap();
-                let plan = self
-                    .get_stage_plan(&partition.job_id, partition.stage_id as usize)
-                    .await?;
-
-                // Let's try to resolve any unresolved shuffles we find
-                let unresolved_shuffles = find_unresolved_shuffles(&plan)?;
-                let mut partition_locations: HashMap<
-                    usize,
-                    Vec<ballista_core::serde::scheduler::PartitionLocation>,
-                > = HashMap::new();
-                for unresolved_shuffle in unresolved_shuffles {
-                    for stage_id in unresolved_shuffle.query_stage_ids {
-                        for partition_id in 0..unresolved_shuffle.partition_count {
-                            let referenced_task = kvs
-                                .get(&get_task_status_key(
-                                    &self.namespace,
-                                    &partition.job_id,
-                                    stage_id,
-                                    partition_id,
-                                ))
-                                .unwrap();
-                            let referenced_task: TaskStatus =
-                                decode_protobuf(referenced_task)?;
-                            if let Some(task_status::Status::Completed(CompletedTask {
-                                executor_id,
-                            })) = referenced_task.status
-                            {
-                                let empty = vec![];
-                                let locations =
-                                    partition_locations.entry(stage_id).or_insert(empty);
-                                locations.push(
-                                    ballista_core::serde::scheduler::PartitionLocation {
-                                        partition_id:
-                                            ballista_core::serde::scheduler::PartitionId {
-                                                job_id: partition.job_id.clone(),
-                                                stage_id,
-                                                partition_id,
-                                            },
-                                        executor_meta: executors
-                                            .iter()
-                                            .find(|exec| exec.id == executor_id)
-                                            .unwrap()
-                                            .clone(),
-                                        partition_stats: PartitionStats::default(),
-                                    },
-                                );
-                            } else {
-                                continue 'tasks;
-                            }
-                        }
-                    }
-                }
-                let plan =
-                    remove_unresolved_shuffles(plan.as_ref(), &partition_locations)?;
-
-                // If we get here, there are no more unresolved shuffled and the task can be run
-                status.status = Some(task_status::Status::Running(RunningTask {
-                    executor_id: executor_id.to_owned(),
-                }));
-                self.save_task_status(&status).await?;
-                return Ok(Some((status, plan)));
-            }
-        }
-        Ok(None)
-    }
-
-    // Global lock for the state. We should get rid of this to be able to scale.
-    pub async fn lock(&self) -> Result<Box<dyn Lock>> {
-        self.config_client.lock().await
-    }
-
-    /// This function starts a watch over the task keys. Whenever a task changes, it re-evaluates
-    /// the status for the parent job and updates it accordingly.
-    ///
-    /// The future returned by this function never returns (unless an error happens), so it is wise
-    /// to [tokio::spawn] calls to this method.
-    pub async fn synchronize_job_status_loop(&self) -> Result<()> {
-        let watch = self
-            .config_client
-            .watch(get_task_prefix(&self.namespace))
-            .await?;
-        watch.for_each(|event: WatchEvent| async {
-            let key = match event {
-                WatchEvent::Put(key, _value) => key,
-                WatchEvent::Delete(key) => key
-            };
-            let job_id = extract_job_id_from_task_key(&key).unwrap();
-            match self.lock().await {
-                Ok(mut lock) => {
-                    if let Err(e) = self.synchronize_job_status(job_id).await {
-                        error!("Could not update job status for {}. This job might be stuck forever. Error: {}", job_id, e);
-                    }
-                    lock.unlock().await;
-                },
-                Err(e) => error!("Could not lock config backend. Job {} will have an unsynchronized status and might be stuck forever. Error: {}", job_id, e)
-            }
-        }).await;
-
-        Ok(())
-    }
-
-    async fn synchronize_job_status(&self, job_id: &str) -> Result<()> {
-        let value = self
-            .config_client
-            .get(&get_job_key(&self.namespace, job_id))
-            .await?;
-        let executors: HashMap<String, ExecutorMeta> = self
-            .get_executors_metadata()
-            .await?
-            .into_iter()
-            .map(|meta| (meta.id.to_string(), meta))
-            .collect();
-        let status: JobStatus = decode_protobuf(&value)?;
-        let new_status = self.get_job_status_from_tasks(job_id, &executors).await?;
-        if let Some(new_status) = new_status {
-            if status != new_status {
-                info!(
-                    "Changing status for job {} to {:?}",
-                    job_id, new_status.status
-                );
-                debug!("Old status: {:?}", status);
-                debug!("New status: {:?}", new_status);
-                self.save_job_metadata(job_id, &new_status).await?;
-            }
-        }
-        Ok(())
-    }
-
-    async fn get_job_status_from_tasks(
-        &self,
-        job_id: &str,
-        executors: &HashMap<String, ExecutorMeta>,
-    ) -> Result<Option<JobStatus>> {
-        let statuses = self
-            .config_client
-            .get_from_prefix(&get_task_prefix_for_job(&self.namespace, job_id))
-            .await?
-            .into_iter()
-            .map(|(_k, v)| decode_protobuf::<TaskStatus>(&v))
-            .collect::<Result<Vec<_>>>()?;
-        if statuses.is_empty() {
-            return Ok(None);
-        }
-
-        // Check for job completion
-        let last_stage = statuses
-            .iter()
-            .map(|task| task.partition_id.as_ref().unwrap().stage_id)
-            .max()
-            .unwrap();
-        let statuses: Vec<_> = statuses
-            .into_iter()
-            .filter(|task| task.partition_id.as_ref().unwrap().stage_id == last_stage)
-            .collect();
-        let mut job_status = statuses
-            .iter()
-            .map(|status| match &status.status {
-                Some(task_status::Status::Completed(CompletedTask { executor_id })) => {
-                    Ok((status, executor_id))
-                }
-                _ => Err(BallistaError::General("Task not completed".to_string())),
-            })
-            .collect::<Result<Vec<_>>>()
-            .ok()
-            .map(|info| {
-                let partition_location = info
-                    .into_iter()
-                    .map(|(status, execution_id)| PartitionLocation {
-                        partition_id: status.partition_id.to_owned(),
-                        executor_meta: executors
-                            .get(execution_id)
-                            .map(|e| e.clone().into()),
-                        partition_stats: None,
-                    })
-                    .collect();
-                job_status::Status::Completed(CompletedJob { partition_location })
-            });
-
-        if job_status.is_none() {
-            // Update other statuses
-            for status in statuses {
-                match status.status {
-                    Some(task_status::Status::Failed(FailedTask { error })) => {
-                        job_status =
-                            Some(job_status::Status::Failed(FailedJob { error }));
-                        break;
-                    }
-                    Some(task_status::Status::Running(_)) if job_status == None => {
-                        job_status = Some(job_status::Status::Running(RunningJob {}));
-                    }
-                    _ => (),
-                }
-            }
-        }
-        Ok(job_status.map(|status| JobStatus {
-            status: Some(status),
-        }))
-    }
-}
-
-#[tonic::async_trait]
-pub trait Lock: Send + Sync {
-    async fn unlock(&mut self);
-}
-
-#[tonic::async_trait]
-impl<T: Send + Sync> Lock for OwnedMutexGuard<T> {
-    async fn unlock(&mut self) {}
-}
-
-/// Returns the the unresolved shuffles in the execution plan
-fn find_unresolved_shuffles(
-    plan: &Arc<dyn ExecutionPlan>,
-) -> Result<Vec<UnresolvedShuffleExec>> {
-    if let Some(unresolved_shuffle) =
-        plan.as_any().downcast_ref::<UnresolvedShuffleExec>()
-    {
-        Ok(vec![unresolved_shuffle.clone()])
-    } else {
-        Ok(plan
-            .children()
-            .iter()
-            .map(|child| find_unresolved_shuffles(child))
-            .collect::<Result<Vec<_>>>()?
-            .into_iter()
-            .flatten()
-            .collect())
-    }
-}
-
-fn get_executors_prefix(namespace: &str) -> String {
-    format!("/ballista/{}/executors", namespace)
-}
-
-fn get_executor_key(namespace: &str, id: &str) -> String {
-    format!("{}/{}", get_executors_prefix(namespace), id)
-}
-
-fn get_job_prefix(namespace: &str) -> String {
-    format!("/ballista/{}/jobs", namespace)
-}
-
-fn get_job_key(namespace: &str, id: &str) -> String {
-    format!("{}/{}", get_job_prefix(namespace), id)
-}
-
-fn get_task_prefix(namespace: &str) -> String {
-    format!("/ballista/{}/tasks", namespace)
-}
-
-fn get_task_prefix_for_job(namespace: &str, job_id: &str) -> String {
-    format!("{}/{}", get_task_prefix(namespace), job_id)
-}
-
-fn get_task_status_key(
-    namespace: &str,
-    job_id: &str,
-    stage_id: usize,
-    partition_id: usize,
-) -> String {
-    format!(
-        "{}/{}/{}",
-        get_task_prefix_for_job(namespace, job_id),
-        stage_id,
-        partition_id,
-    )
-}
-
-fn extract_job_id_from_task_key(job_key: &str) -> Result<&str> {
-    job_key.split('/').nth(4).ok_or_else(|| {
-        BallistaError::Internal(format!("Unexpected task key: {}", job_key))
-    })
-}
-
-fn get_stage_plan_key(namespace: &str, job_id: &str, stage_id: usize) -> String {
-    format!("/ballista/{}/stages/{}/{}", namespace, job_id, stage_id,)
-}
-
-fn decode_protobuf<T: Message + Default>(bytes: &[u8]) -> Result<T> {
-    T::decode(bytes).map_err(|e| {
-        BallistaError::Internal(format!(
-            "Could not deserialize {}: {}",
-            type_name::<T>(),
-            e
-        ))
-    })
-}
-
-fn encode_protobuf<T: Message + Default>(msg: &T) -> Result<Vec<u8>> {
-    let mut value: Vec<u8> = Vec::with_capacity(msg.encoded_len());
-    msg.encode(&mut value).map_err(|e| {
-        BallistaError::Internal(format!(
-            "Could not serialize {}: {}",
-            type_name::<T>(),
-            e
-        ))
-    })?;
-    Ok(value)
-}
-
-#[cfg(all(test, feature = "sled"))]
-mod test {
-    use std::sync::Arc;
-
-    use ballista_core::serde::protobuf::{
-        job_status, task_status, CompletedTask, FailedTask, JobStatus, PartitionId,
-        QueuedJob, RunningJob, RunningTask, TaskStatus,
-    };
-    use ballista_core::{error::BallistaError, serde::scheduler::ExecutorMeta};
-
-    use super::{
-        extract_job_id_from_task_key, get_task_status_key, SchedulerState,
-        StandaloneClient,
-    };
-
-    #[tokio::test]
-    async fn executor_metadata() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let meta = ExecutorMeta {
-            id: "123".to_owned(),
-            host: "localhost".to_owned(),
-            port: 123,
-        };
-        state.save_executor_metadata(meta.clone()).await?;
-        let result = state.get_executors_metadata().await?;
-        assert_eq!(vec![meta], result);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn job_metadata() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let meta = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state.save_job_metadata("job", &meta).await?;
-        let result = state.get_job_metadata("job").await?;
-        assert!(result.status.is_some());
-        match result.status.unwrap() {
-            job_status::Status::Queued(_) => (),
-            _ => panic!("Unexpected status"),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn job_metadata_non_existant() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let meta = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state.save_job_metadata("job", &meta).await?;
-        let result = state.get_job_metadata("job2").await;
-        assert!(result.is_err());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_status() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Failed(FailedTask {
-                error: "error".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: "job".to_owned(),
-                stage_id: 1,
-                partition_id: 2,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let result = state._get_task_status("job", 1, 2).await?;
-        assert!(result.status.is_some());
-        match result.status.unwrap() {
-            task_status::Status::Failed(_) => (),
-            _ => panic!("Unexpected status"),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_status_non_existant() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Failed(FailedTask {
-                error: "error".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: "job".to_owned(),
-                stage_id: 1,
-                partition_id: 2,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let result = state._get_task_status("job", 25, 2).await;
-        assert!(result.is_err());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_queued() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state.save_job_metadata(job_id, &job_status).await?;
-        state.synchronize_job_status(job_id).await?;
-        let result = state.get_job_metadata(job_id).await?;
-        assert_eq!(result, job_status);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_running() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state.save_job_metadata(job_id, &job_status).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Running(RunningTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        state.synchronize_job_status(job_id).await?;
-        let result = state.get_job_metadata(job_id).await?;
-        assert_eq!(result, job_status);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_running2() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state.save_job_metadata(job_id, &job_status).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let meta = TaskStatus {
-            status: None,
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        state.synchronize_job_status(job_id).await?;
-        let result = state.get_job_metadata(job_id).await?;
-        assert_eq!(result, job_status);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_completed() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state.save_job_metadata(job_id, &job_status).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        state.synchronize_job_status(job_id).await?;
-        let result = state.get_job_metadata(job_id).await?;
-        match result.status.unwrap() {
-            job_status::Status::Completed(_) => (),
-            status => panic!("Received status: {:?}", status),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_completed2() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Queued(QueuedJob {})),
-        };
-        state.save_job_metadata(job_id, &job_status).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        state.synchronize_job_status(job_id).await?;
-        let result = state.get_job_metadata(job_id).await?;
-        match result.status.unwrap() {
-            job_status::Status::Completed(_) => (),
-            status => panic!("Received status: {:?}", status),
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn task_synchronize_job_status_failed() -> Result<(), BallistaError> {
-        let state = SchedulerState::new(
-            Arc::new(StandaloneClient::try_new_temporary()?),
-            "test".to_string(),
-        );
-        let job_id = "job";
-        let job_status = JobStatus {
-            status: Some(job_status::Status::Running(RunningJob {})),
-        };
-        state.save_job_metadata(job_id, &job_status).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Completed(CompletedTask {
-                executor_id: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 0,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let meta = TaskStatus {
-            status: Some(task_status::Status::Failed(FailedTask {
-                error: "".to_owned(),
-            })),
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 1,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        let meta = TaskStatus {
-            status: None,
-            partition_id: Some(PartitionId {
-                job_id: job_id.to_owned(),
-                stage_id: 0,
-                partition_id: 2,
-            }),
-        };
-        state.save_task_status(&meta).await?;
-        state.synchronize_job_status(job_id).await?;
-        let result = state.get_job_metadata(job_id).await?;
-        match result.status.unwrap() {
-            job_status::Status::Failed(_) => (),
-            status => panic!("Received status: {:?}", status),
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn task_extract_job_id_from_task_key() {
-        let job_id = "foo";
-        assert_eq!(
-            extract_job_id_from_task_key(&get_task_status_key("namespace", job_id, 0, 1))
-                .unwrap(),
-            job_id
-        );
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/state/standalone.rs b/rust/ballista/rust/scheduler/src/state/standalone.rs
deleted file mode 100644
index 69805c016a1..00000000000
--- a/rust/ballista/rust/scheduler/src/state/standalone.rs
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{sync::Arc, task::Poll, time::Duration};
-
-use crate::state::ConfigBackendClient;
-use ballista_core::error::{ballista_error, BallistaError, Result};
-
-use futures::{FutureExt, Stream};
-use log::warn;
-use sled::{Event, Subscriber};
-use tokio::sync::Mutex;
-
-use super::{Lock, Watch, WatchEvent};
-
-/// A [`ConfigBackendClient`] implementation that uses file-based storage to save cluster configuration.
-#[derive(Clone)]
-pub struct StandaloneClient {
-    db: sled::Db,
-    lock: Arc<Mutex<()>>,
-}
-
-impl StandaloneClient {
-    /// Creates a StandaloneClient that saves data to the specified file.
-    pub fn try_new<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
-        Ok(Self {
-            db: sled::open(path).map_err(sled_to_ballista_error)?,
-            lock: Arc::new(Mutex::new(())),
-        })
-    }
-
-    /// Creates a StandaloneClient that saves data to a temp file.
-    pub fn try_new_temporary() -> Result<Self> {
-        Ok(Self {
-            db: sled::Config::new()
-                .temporary(true)
-                .open()
-                .map_err(sled_to_ballista_error)?,
-            lock: Arc::new(Mutex::new(())),
-        })
-    }
-}
-
-fn sled_to_ballista_error(e: sled::Error) -> BallistaError {
-    match e {
-        sled::Error::Io(io) => BallistaError::IoError(io),
-        _ => BallistaError::General(format!("{}", e)),
-    }
-}
-
-#[tonic::async_trait]
-impl ConfigBackendClient for StandaloneClient {
-    async fn get(&self, key: &str) -> Result<Vec<u8>> {
-        Ok(self
-            .db
-            .get(key)
-            .map_err(|e| ballista_error(&format!("sled error {:?}", e)))?
-            .map(|v| v.to_vec())
-            .unwrap_or_default())
-    }
-
-    async fn get_from_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>> {
-        Ok(self
-            .db
-            .scan_prefix(prefix)
-            .map(|v| {
-                v.map(|(key, value)| {
-                    (
-                        std::str::from_utf8(&key).unwrap().to_owned(),
-                        value.to_vec(),
-                    )
-                })
-            })
-            .collect::<std::result::Result<Vec<_>, _>>()
-            .map_err(|e| ballista_error(&format!("sled error {:?}", e)))?)
-    }
-
-    // TODO: support lease_time. See https://github.com/spacejam/sled/issues/1119 for how to approach this
-    async fn put(
-        &self,
-        key: String,
-        value: Vec<u8>,
-        _lease_time: Option<Duration>,
-    ) -> Result<()> {
-        self.db
-            .insert(key, value)
-            .map_err(|e| {
-                warn!("sled insert failed: {}", e);
-                ballista_error("sled insert failed")
-            })
-            .map(|_| ())
-    }
-
-    async fn lock(&self) -> Result<Box<dyn Lock>> {
-        Ok(Box::new(self.lock.clone().lock_owned().await))
-    }
-
-    async fn watch(&self, prefix: String) -> Result<Box<dyn Watch>> {
-        Ok(Box::new(SledWatch {
-            subscriber: self.db.watch_prefix(prefix),
-        }))
-    }
-}
-
-struct SledWatch {
-    subscriber: Subscriber,
-}
-
-#[tonic::async_trait]
-impl Watch for SledWatch {
-    async fn cancel(&mut self) -> Result<()> {
-        Ok(())
-    }
-}
-
-impl Stream for SledWatch {
-    type Item = WatchEvent;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Option<Self::Item>> {
-        match self.get_mut().subscriber.poll_unpin(cx) {
-            Poll::Pending => Poll::Pending,
-            Poll::Ready(None) => Poll::Ready(None),
-            Poll::Ready(Some(Event::Insert { key, value })) => {
-                let key = std::str::from_utf8(&key).unwrap().to_owned();
-                Poll::Ready(Some(WatchEvent::Put(key, value.to_vec())))
-            }
-            Poll::Ready(Some(Event::Remove { key })) => {
-                let key = std::str::from_utf8(&key).unwrap().to_owned();
-                Poll::Ready(Some(WatchEvent::Delete(key)))
-            }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.subscriber.size_hint()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::state::{ConfigBackendClient, Watch, WatchEvent};
-
-    use super::StandaloneClient;
-    use futures::StreamExt;
-    use std::result::Result;
-
-    fn create_instance() -> Result<StandaloneClient, Box<dyn std::error::Error>> {
-        Ok(StandaloneClient::try_new_temporary()?)
-    }
-
-    #[tokio::test]
-    async fn put_read() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let value = "value".as_bytes();
-        client.put(key.to_owned(), value.to_vec(), None).await?;
-        assert_eq!(client.get(key).await?, value);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_empty() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let empty: &[u8] = &[];
-        assert_eq!(client.get(key).await?, empty);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_prefix() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let value = "value".as_bytes();
-        client
-            .put(format!("{}/1", key), value.to_vec(), None)
-            .await?;
-        client
-            .put(format!("{}/2", key), value.to_vec(), None)
-            .await?;
-        assert_eq!(
-            client.get_from_prefix(key).await?,
-            vec![
-                ("key/1".to_owned(), value.to_vec()),
-                ("key/2".to_owned(), value.to_vec())
-            ]
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_watch() -> Result<(), Box<dyn std::error::Error>> {
-        let client = create_instance()?;
-        let key = "key";
-        let value = "value".as_bytes();
-        let mut watch: Box<dyn Watch> = client.watch(key.to_owned()).await?;
-        client.put(key.to_owned(), value.to_vec(), None).await?;
-        assert_eq!(
-            watch.next().await,
-            Some(WatchEvent::Put(key.to_owned(), value.to_owned()))
-        );
-        let value2 = "value2".as_bytes();
-        client.put(key.to_owned(), value2.to_vec(), None).await?;
-        assert_eq!(
-            watch.next().await,
-            Some(WatchEvent::Put(key.to_owned(), value2.to_owned()))
-        );
-        watch.cancel().await?;
-        Ok(())
-    }
-}
diff --git a/rust/ballista/rust/scheduler/src/test_utils.rs b/rust/ballista/rust/scheduler/src/test_utils.rs
deleted file mode 100644
index 330cc9a9332..00000000000
--- a/rust/ballista/rust/scheduler/src/test_utils.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use ballista_core::error::Result;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches;
-use datafusion::physical_optimizer::merge_exec::AddMergeExec;
-use datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule;
-use datafusion::physical_plan::csv::CsvReadOptions;
-
-pub const TPCH_TABLES: &[&str] = &[
-    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
-];
-
-pub fn datafusion_test_context(path: &str) -> Result<ExecutionContext> {
-    // remove Repartition rule because that isn't supported yet
-    let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
-        Arc::new(CoalesceBatches::new()),
-        Arc::new(AddMergeExec::new()),
-    ];
-    let config = ExecutionConfig::new().with_physical_optimizer_rules(rules);
-    let mut ctx = ExecutionContext::with_config(config);
-
-    for table in TPCH_TABLES {
-        let schema = get_tpch_schema(table);
-        let options = CsvReadOptions::new()
-            .schema(&schema)
-            .delimiter(b'|')
-            .has_header(false)
-            .file_extension(".tbl");
-        let dir = format!("{}/{}", path, table);
-        ctx.register_csv(table, &dir, options)?;
-    }
-    Ok(ctx)
-}
-
-pub fn get_tpch_schema(table: &str) -> Schema {
-    // note that the schema intentionally uses signed integers so that any generated Parquet
-    // files can also be used to benchmark tools that only support signed integers, such as
-    // Apache Spark
-
-    match table {
-        "part" => Schema::new(vec![
-            Field::new("p_partkey", DataType::Int32, false),
-            Field::new("p_name", DataType::Utf8, false),
-            Field::new("p_mfgr", DataType::Utf8, false),
-            Field::new("p_brand", DataType::Utf8, false),
-            Field::new("p_type", DataType::Utf8, false),
-            Field::new("p_size", DataType::Int32, false),
-            Field::new("p_container", DataType::Utf8, false),
-            Field::new("p_retailprice", DataType::Float64, false),
-            Field::new("p_comment", DataType::Utf8, false),
-        ]),
-
-        "supplier" => Schema::new(vec![
-            Field::new("s_suppkey", DataType::Int32, false),
-            Field::new("s_name", DataType::Utf8, false),
-            Field::new("s_address", DataType::Utf8, false),
-            Field::new("s_nationkey", DataType::Int32, false),
-            Field::new("s_phone", DataType::Utf8, false),
-            Field::new("s_acctbal", DataType::Float64, false),
-            Field::new("s_comment", DataType::Utf8, false),
-        ]),
-
-        "partsupp" => Schema::new(vec![
-            Field::new("ps_partkey", DataType::Int32, false),
-            Field::new("ps_suppkey", DataType::Int32, false),
-            Field::new("ps_availqty", DataType::Int32, false),
-            Field::new("ps_supplycost", DataType::Float64, false),
-            Field::new("ps_comment", DataType::Utf8, false),
-        ]),
-
-        "customer" => Schema::new(vec![
-            Field::new("c_custkey", DataType::Int32, false),
-            Field::new("c_name", DataType::Utf8, false),
-            Field::new("c_address", DataType::Utf8, false),
-            Field::new("c_nationkey", DataType::Int32, false),
-            Field::new("c_phone", DataType::Utf8, false),
-            Field::new("c_acctbal", DataType::Float64, false),
-            Field::new("c_mktsegment", DataType::Utf8, false),
-            Field::new("c_comment", DataType::Utf8, false),
-        ]),
-
-        "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::Int32, false),
-            Field::new("o_custkey", DataType::Int32, false),
-            Field::new("o_orderstatus", DataType::Utf8, false),
-            Field::new("o_totalprice", DataType::Float64, false),
-            Field::new("o_orderdate", DataType::Date32, false),
-            Field::new("o_orderpriority", DataType::Utf8, false),
-            Field::new("o_clerk", DataType::Utf8, false),
-            Field::new("o_shippriority", DataType::Int32, false),
-            Field::new("o_comment", DataType::Utf8, false),
-        ]),
-
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::Int32, false),
-            Field::new("l_partkey", DataType::Int32, false),
-            Field::new("l_suppkey", DataType::Int32, false),
-            Field::new("l_linenumber", DataType::Int32, false),
-            Field::new("l_quantity", DataType::Float64, false),
-            Field::new("l_extendedprice", DataType::Float64, false),
-            Field::new("l_discount", DataType::Float64, false),
-            Field::new("l_tax", DataType::Float64, false),
-            Field::new("l_returnflag", DataType::Utf8, false),
-            Field::new("l_linestatus", DataType::Utf8, false),
-            Field::new("l_shipdate", DataType::Date32, false),
-            Field::new("l_commitdate", DataType::Date32, false),
-            Field::new("l_receiptdate", DataType::Date32, false),
-            Field::new("l_shipinstruct", DataType::Utf8, false),
-            Field::new("l_shipmode", DataType::Utf8, false),
-            Field::new("l_comment", DataType::Utf8, false),
-        ]),
-
-        "nation" => Schema::new(vec![
-            Field::new("n_nationkey", DataType::Int32, false),
-            Field::new("n_name", DataType::Utf8, false),
-            Field::new("n_regionkey", DataType::Int32, false),
-            Field::new("n_comment", DataType::Utf8, false),
-        ]),
-
-        "region" => Schema::new(vec![
-            Field::new("r_regionkey", DataType::Int32, false),
-            Field::new("r_name", DataType::Utf8, false),
-            Field::new("r_comment", DataType::Utf8, false),
-        ]),
-
-        _ => unimplemented!(),
-    }
-}
diff --git a/rust/ballista/rust/scheduler/testdata/customer/customer.tbl b/rust/ballista/rust/scheduler/testdata/customer/customer.tbl
deleted file mode 100644
index afa5a739ab3..00000000000
--- a/rust/ballista/rust/scheduler/testdata/customer/customer.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag e|
-2|Customer#000000002|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: caref|
-3|Customer#000000003|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov|
-4|Customer#000000004|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| requests. final, regular ideas sleep final accou|
-5|Customer#000000005|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole accor|
-6|Customer#000000006|sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn|20|30-114-968-4951|7638.57|AUTOMOBILE|tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious|
-7|Customer#000000007|TcGe5gaZNgVePxU5kRrvXBfkasDTea|18|28-190-982-9759|9561.95|AUTOMOBILE|ainst the ironic, express theodolites. express, even pinto beans among the exp|
-8|Customer#000000008|I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5|17|27-147-574-9335|6819.74|BUILDING|among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide|
-9|Customer#000000009|xKiAFTjUsCuxfeleNqefumTrjS|8|18-338-906-3675|8324.07|FURNITURE|r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl|
-10|Customer#000000010|6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2|5|15-741-346-9870|2753.54|HOUSEHOLD|es regular deposits haggle. fur|
diff --git a/rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl b/rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl
deleted file mode 100644
index b7424c2138b..00000000000
--- a/rust/ballista/rust/scheduler/testdata/lineitem/partition0.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|
-1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |
-1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|
-1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de|
-1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re|
-1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex|
-2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|
-3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco|
-3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve|
-3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. |
diff --git a/rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl b/rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl
deleted file mode 100644
index b7424c2138b..00000000000
--- a/rust/ballista/rust/scheduler/testdata/lineitem/partition1.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|
-1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |
-1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|
-1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de|
-1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re|
-1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex|
-2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|
-3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco|
-3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve|
-3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. |
diff --git a/rust/ballista/rust/scheduler/testdata/nation/nation.tbl b/rust/ballista/rust/scheduler/testdata/nation/nation.tbl
deleted file mode 100644
index c31ad6be0fa..00000000000
--- a/rust/ballista/rust/scheduler/testdata/nation/nation.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-0|ALGERIA|0| haggle. carefully final deposits detect slyly agai|
-1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon|
-2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special |
-3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold|
-4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d|
-5|ETHIOPIA|0|ven packages wake quickly. regu|
-6|FRANCE|3|refully final requests. regular, ironi|
-7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco|
-8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun|
-9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull|
diff --git a/rust/ballista/rust/scheduler/testdata/orders/orders.tbl b/rust/ballista/rust/scheduler/testdata/orders/orders.tbl
deleted file mode 100644
index f5fa65b09a7..00000000000
--- a/rust/ballista/rust/scheduler/testdata/orders/orders.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|36901|O|173665.47|1996-01-02|5-LOW|Clerk#000000951|0|nstructions sleep furiously among |
-2|78002|O|46929.18|1996-12-01|1-URGENT|Clerk#000000880|0| foxes. pending accounts at the pending, silent asymptot|
-3|123314|F|193846.25|1993-10-14|5-LOW|Clerk#000000955|0|sly final accounts boost. carefully regular ideas cajole carefully. depos|
-4|136777|O|32151.78|1995-10-11|5-LOW|Clerk#000000124|0|sits. slyly regular warthogs cajole. regular, regular theodolites acro|
-5|44485|F|144659.20|1994-07-30|5-LOW|Clerk#000000925|0|quickly. bold deposits sleep slyly. packages use slyly|
-6|55624|F|58749.59|1992-02-21|4-NOT SPECIFIED|Clerk#000000058|0|ggle. special, final requests are against the furiously specia|
-7|39136|O|252004.18|1996-01-10|2-HIGH|Clerk#000000470|0|ly special requests |
-32|130057|O|208660.75|1995-07-16|2-HIGH|Clerk#000000616|0|ise blithely bold, regular requests. quickly unusual dep|
-33|66958|F|163243.98|1993-10-27|3-MEDIUM|Clerk#000000409|0|uriously. furiously final request|
-34|61001|O|58949.67|1998-07-21|3-MEDIUM|Clerk#000000223|0|ly final packages. fluffily final deposits wake blithely ideas. spe|
diff --git a/rust/ballista/rust/scheduler/testdata/part/part.tbl b/rust/ballista/rust/scheduler/testdata/part/part.tbl
deleted file mode 100644
index 0c6f0e2f3e1..00000000000
--- a/rust/ballista/rust/scheduler/testdata/part/part.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|goldenrod lavender spring chocolate lace|Manufacturer#1|Brand#13|PROMO BURNISHED COPPER|7|JUMBO PKG|901.00|ly. slyly ironi|
-2|blush thistle blue yellow saddle|Manufacturer#1|Brand#13|LARGE BRUSHED BRASS|1|LG CASE|902.00|lar accounts amo|
-3|spring green yellow purple cornsilk|Manufacturer#4|Brand#42|STANDARD POLISHED BRASS|21|WRAP CASE|903.00|egular deposits hag|
-4|cornflower chocolate smoke green pink|Manufacturer#3|Brand#34|SMALL PLATED BRASS|14|MED DRUM|904.00|p furiously r|
-5|forest brown coral puff cream|Manufacturer#3|Brand#32|STANDARD POLISHED TIN|15|SM PKG|905.00| wake carefully |
-6|bisque cornflower lawn forest magenta|Manufacturer#2|Brand#24|PROMO PLATED STEEL|4|MED BAG|906.00|sual a|
-7|moccasin green thistle khaki floral|Manufacturer#1|Brand#11|SMALL PLATED COPPER|45|SM BAG|907.00|lyly. ex|
-8|misty lace thistle snow royal|Manufacturer#4|Brand#44|PROMO BURNISHED TIN|41|LG DRUM|908.00|eposi|
-9|thistle dim navajo dark gainsboro|Manufacturer#4|Brand#43|SMALL BURNISHED STEEL|12|WRAP CASE|909.00|ironic foxe|
-10|linen pink saddle puff powder|Manufacturer#5|Brand#54|LARGE BURNISHED STEEL|44|LG CAN|910.01|ithely final deposit|
diff --git a/rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl b/rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl
deleted file mode 100644
index 45145385a16..00000000000
--- a/rust/ballista/rust/scheduler/testdata/partsupp/partsupp.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|2|3325|771.64|, even theodolites. regular, final theodolites eat after the carefully pending foxes. furiously regular deposits sleep slyly. carefully bold realms above the ironic dependencies haggle careful|
-1|2502|8076|993.49|ven ideas. quickly even packages print. pending multipliers must have to are fluff|
-1|5002|3956|337.09|after the fluffily ironic deposits? blithely special dependencies integrate furiously even excuses. blithely silent theodolites could have to haggle pending, express requests; fu|
-1|7502|4069|357.84|al, regular dependencies serve carefully after the quickly final pinto beans. furiously even deposits sleep quickly final, silent pinto beans. fluffily reg|
-2|3|8895|378.49|nic accounts. final accounts sleep furiously about the ironic, bold packages. regular, regular accounts|
-2|2503|4969|915.27|ptotes. quickly pending dependencies integrate furiously. fluffily ironic ideas impress blithely above the express accounts. furiously even epitaphs need to wak|
-2|5003|8539|438.37|blithely bold ideas. furiously stealthy packages sleep fluffily. slyly special deposits snooze furiously carefully regular accounts. regular deposits according to the accounts nag carefully slyl|
-2|7503|3025|306.39|olites. deposits wake carefully. even, express requests cajole. carefully regular ex|
-3|4|4651|920.92|ilent foxes affix furiously quickly unusual requests. even packages across the carefully even theodolites nag above the sp|
-3|2504|4093|498.13|ending dependencies haggle fluffily. regular deposits boost quickly carefully regular requests. deposits affix furiously around the pinto beans. ironic, unusual platelets across the p|
diff --git a/rust/ballista/rust/scheduler/testdata/region/region.tbl b/rust/ballista/rust/scheduler/testdata/region/region.tbl
deleted file mode 100644
index c5ebb63b621..00000000000
--- a/rust/ballista/rust/scheduler/testdata/region/region.tbl
+++ /dev/null
@@ -1,5 +0,0 @@
-0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to |
-1|AMERICA|hs use ironic, even requests. s|
-2|ASIA|ges. thinly even pinto beans ca|
-3|EUROPE|ly final courts cajole furiously final excuse|
-4|MIDDLE EAST|uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl|
diff --git a/rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl b/rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl
deleted file mode 100644
index d9c0e9f7e20..00000000000
--- a/rust/ballista/rust/scheduler/testdata/supplier/supplier.tbl
+++ /dev/null
@@ -1,10 +0,0 @@
-1|Supplier#000000001| N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful|
-2|Supplier#000000002|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly bold instructions. idle dependen|
-3|Supplier#000000003|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely silent requests after the express dependencies are sl|
-4|Supplier#000000004|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously even requests above the exp|
-5|Supplier#000000005|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly regular pinto bea|
-6|Supplier#000000006|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. regular dolphins use against the furiously ironic decoys. |
-7|Supplier#000000007|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit|
-8|Supplier#000000008|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al pinto beans. asymptotes haggl|
-9|Supplier#000000009|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. unusual, even requests along the furiously regular pac|
-10|Supplier#000000010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing waters. regular requests ar|
diff --git a/rust/ballista/ui/scheduler/.gitignore b/rust/ballista/ui/scheduler/.gitignore
deleted file mode 100644
index 4d29575de80..00000000000
--- a/rust/ballista/ui/scheduler/.gitignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
-
-# dependencies
-/node_modules
-/.pnp
-.pnp.js
-
-# testing
-/coverage
-
-# production
-/build
-
-# misc
-.DS_Store
-.env.local
-.env.development.local
-.env.test.local
-.env.production.local
-
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
diff --git a/rust/ballista/ui/scheduler/README.md b/rust/ballista/ui/scheduler/README.md
deleted file mode 100644
index 1a196dab2fa..00000000000
--- a/rust/ballista/ui/scheduler/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Ballista UI
-
-
-## Available Scripts
-
-In the project directory, you can run:
-
-### `yarn start`
-
-Runs the app in the development mode.\
-Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
-
-The page will reload if you make edits.\
-You will also see any lint errors in the console.
-
-### `yarn test`
-
-Launches the test runner in the interactive watch mode.\
-See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
-
-### `yarn build`
-
-Builds the app for production to the `build` folder.\
-It correctly bundles React in production mode and optimizes the build for the best performance.
-
-The build is minified and the filenames include the hashes.
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/index.d.ts b/rust/ballista/ui/scheduler/index.d.ts
deleted file mode 100644
index 9f715810217..00000000000
--- a/rust/ballista/ui/scheduler/index.d.ts
+++ /dev/null
@@ -1,18 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-declare module "@chakra-ui/icons";
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/package.json b/rust/ballista/ui/scheduler/package.json
deleted file mode 100644
index fe1e72d68f7..00000000000
--- a/rust/ballista/ui/scheduler/package.json
+++ /dev/null
@@ -1,58 +0,0 @@
-{
-  "name": "scheduler-ui",
-  "version": "0.1.0",
-  "private": true,
-  "dependencies": {
-    "@chakra-ui/icons": "^1.0.5",
-    "@chakra-ui/react": "^1.3.3",
-    "@emotion/react": "^11.1.5",
-    "@emotion/styled": "^11.1.5",
-    "@testing-library/jest-dom": "^5.11.4",
-    "@testing-library/react": "^11.1.0",
-    "@testing-library/user-event": "^12.1.10",
-    "@types/jest": "^26.0.15",
-    "@types/node": "^12.0.0",
-    "@types/react": "^17.0.0",
-    "@types/react-dom": "^17.0.0",
-    "framer-motion": "^3.7.0",
-    "react": "^17.0.1",
-    "react-dom": "^17.0.1",
-    "react-icons": "^4.2.0",
-    "react-router-dom": "^5.2.0",
-    "react-scripts": "4.0.3",
-    "react-table": "^7.6.3",
-    "react-timeago": "^5.2.0",
-    "typescript": "^4.1.2",
-    "web-vitals": "^1.0.1"
-  },
-  "scripts": {
-    "start": "react-scripts start",
-    "build": "react-scripts build",
-    "test": "react-scripts test",
-    "eject": "react-scripts eject"
-  },
-  "eslintConfig": {
-    "extends": [
-      "react-app",
-      "react-app/jest"
-    ]
-  },
-  "browserslist": {
-    "production": [
-      ">0.2%",
-      "not dead",
-      "not op_mini all"
-    ],
-    "development": [
-      "last 1 chrome version",
-      "last 1 firefox version",
-      "last 1 safari version"
-    ]
-  },
-  "devDependencies": {
-    "@types/react-table": "^7.0.28",
-    "@types/react-timeago": "^4.1.2",
-    "prettier": "^2.2.1"
-  },
-  "proxy": "http://localhost:50050"
-}
diff --git a/rust/ballista/ui/scheduler/public/favicon.ico b/rust/ballista/ui/scheduler/public/favicon.ico
deleted file mode 100644
index a11777cc471a4344702741ab1c8a588998b1311a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3870
zcma);c{J4h9>;%nil|2-o+rCuEF-(I%-F}ijC~o(k~HKAkr0)!FCj~d>`RtpD?8b;
zXOC1OD!V*IsqUwzbMF1)-gEDD=A573Z-&G7^LoAC9|WO7Xc0Cx1g^Zu0u_SjAPB<A
z`RksU20=ur5rmib*S!+l%h4eS4)^Q+0X>3vGa^W|sj)80f#V0@M_CAZTIO(t--xg=
z!sii`1giyH7EKL_+Wi0ab<)&E_0KD!3Rp2^HNB*K2@PHCs4PWSA32*-^7d{9nH2_E
zmC{C*N*)(vEF1_aMamw2A{ZH5aIDqiabnFdJ|y0%aS|64E$`s2ccV~3lR!u<){eS`
z#^Mx6o(iP1Ix%<jZ{9b!^*}EvPeMb_W#+3mPDk@<s^Oh#VM&a2^K;|820}`)peR}+
zJXt@j)V#7+Js?u;Lb#g$HH)e~Ro^hvl6KSLHq)Y3adj<OOD7?;gwee^gNzCxwD?IA
z8?*}E@b*IiVPUPv3?XqzLRv|{4)GKGzjS`)#ukL7W&K6BHn&1}P(skc69cJ?5^C+V
z@yyqLJg;V2Ul%gZ*?2WiB%bNfz1}F^UeTpW^N?dSY@NL3zDD+Tzk$Cg_=cj!M^ot0
zu%qYEoTU9K@kMP2H52_@<2On}lNX!oZ(oWk^?eSfXAa3M8S?8tzISV2V&9A+_-47Y
z>4dv`t@!&Za-K@mTm#vadc{0aWDV*_%EiGK7qMC_(`exc>-$Gb9~W!w_^{*pYRm~G
zBN{nA<l~YIv(*f3@JAyAZDXwp4d;meFk*lN;rx5VQze6aK!n?W9`Uc4pES2K&V3BC
zkTJK{PcIXdQ?hM;i7~K{wRSeU-w9_32aC}+7nN6r5o<=I@CyjQAS~;jsb7p#@eUT2
zkh1M~1>;cm^w$VWg1O^^<6vY`1XCD|s_zv*g*5&V#wv&s#h$xlUilPe4U@I&UXZbL
z0)%9Uj&@yd03n;!7do+bfixH^FeZ-Ema}s;DQX2gY+7g0s(9;`8GyvPY1*vxiF&|w
z>!vA~GA<~JUqH}d;DfBSi^IT*#lrzXl$fNpq0_T1tA+`A$1?(gLb?e#0>UELvljtQ
zK+*74m0jn&)5yk8mLBv;=@}c{t0ztT<<S2g5CX`xuBQVwYJOMIsv7paOX6ypYJL$a
zJ|Vy}#?V4i+kjXzBq)LcuJEA=z^Z2W4WQ1U@0}*!;_q<!3_ls8PhMM3ii*Ci+cF6=
zF!@E<x#%Yvb!P0>v;Avck$S6D`Z)^c0(jiwKhQsn|LDRY&w(Fmi91I7H6S;b0XM{e
zXp0~(T@k_r-!jkLwd1_Vre^v$G4|kh4}=Gi?$AaJ)3I+^m|Zyj#*?Kp@w(lQdJZf4
z#|IJW5z+S^e9@(6hW6N~{pj8|NO*>1)E=%?nNUAkmv~OY&ZV<PHdt%yO<W_%O|c-T
zC%nAvgv?#h>;m-%?pQ_11)hAr0oAwILrlsGawpxx4D43J&K=n+p3WLnlDsQ$b(9+4
z?mO^hmV^F8MV{4<aA#E-8o{y-by8hR1>Lx>(Q=aHhQ1){0d*(e&s%G=i5rq3;t{JC
zmgbn5Nkl)t@fPH$v;af26lyhH!k+#}_&aBK4baYPbZy$5aFx4}ka<ge$nBI}>&qxl
z$=Rh$W;U)>-=S-0=?7FH9dUAd2(q#4TCAHky!$^~;Dz^j|8_wuKc*YzfdA<NJp8x7
z`_}_7!m44CG`<6nLk0r3A}8e>ht@Q&ror?91Dm!N03=4=O!a)I*0q~p0g$Fm$pmr$
zb;wD;STDIi$@M%y1>p&_>%?UP($15gou_ue1u0!4(%81;qcIW8NyxFEvXpiJ|H4wz
z*mFT(qVx1FKufG11hByuX%lPk4t#WZ{>8ka2efjY`~;AL6vWyQKpJun2nRiZYDij$
zP>4jQXPaP$UC$yIVgGa)jDV;F0l^n(V=HMRB5)20V7&r$<L^Phf(W29K>jmk{UUIe
zVjKroK}JAbD>B`2cwNQ&GDLx8{pg`7hbA~grk|W6LgiZ`8y`{Iq0i>t!3p2}MS6S+
zO_ruKyAElt)rdS>CtF7j{&6rP-#c=7evGMt7B6`7HG|-(WL`bDUAjyn+k$mx$C<FS
ztTQ#rrhaxTX7@2TN#`pson<p6thk-4?N)^;_(Up!_V=f}<~kR)zD%o0iiqseIMZqh
zGU`kZGbN)qs{;AuZP?~%PajDo&b&7)!V!+|VO<ediN}{)OvR~sQ<ZYe%O|)8-DTKw
zTXmYP$VLa(Y>H;q2Dz4x;cPP$hW=`pFfLO)!jaCL@V2+F)So3}vg|%O*^T1j>C2lx
zsURO-zIJC$^$g2byVbRIo^w>UxK}74^TqUiRR#7s_X$e)$6iYG1(PcW7un-va-S&u
zHk9-6Zn&>T==A)lM^D~bk{&rFzCi35>UR!ZjQkdSiNX*-;l4z9j*7|q`TBl~Au`5&
z+c)*8?#-tgUR$Zd%Q3bs96w6k7q@#tUn`5rj+r@_sAVVLqco|6O{ILX&U-&-cbVa3
zY?ngHR@%l{;`ri%H*0EhBWrGjv!LE4db?HEWb5mu*t@{kv|XwK8?npOshmzf=vZA@
zVSN9sL~!sn?r(AK)Q7Jk2(|M67Uy3I{eRy<vjA)m;~)jV3DFGzL)eNbs@Sy80roD>
z_l&Y@A>;vjkWN5I2xvFFTLX0i+`{qz7C_@bo`ZUzDugfq4+>a3?1v%)O+YTd6@Ul7
zAfLfm=nhZ`)P~&v90$&UcF+yXm9sq!qCx3^9gzIcO|Y(js^Fj)Rvq>nQAHI92ap=P
z10A4@prk+<s7nQxb0&o?puD0BStB$NLIA{pVg<pW;2=HJ11ZpVkRkF89w0s#3ef?(
zka>AGWCb`2)dQYFuR$|H6iDE8p}9a?#nV2}LBCoCf(Xi2@szia7#gY>b|l!-U`c}@
zLdhvQjc!BdLJvYvzzzngnw51yRYCqh4}$oRCy-z|v3Hc*d|?^Wj=l~18*E~*cR_kU
z{XsxM1i{V*4GujHQ3DBpl2w4FgFR48Nma@HPgnyKoIEY-MqmMeY=I<%oG~l!f<+FN
z1ZY^;10j4M4<Vo=b&OyEfF!Y);yDCJas8bbVhK~blk}<IGME~h)6n~gdmqP>#HYXP
zw5eJpA_y(>uLQ~OucgxDLuf}fVs272FaMxhn4xnDGIyLXnw>Xsd^J8XhcWIwIoQ9}
z%FoSJTAGW(SRGwJwb=@pY7r$uQRK3Zd~XbxU)ts!4XsJrCycrWSI?e!IqwqIR8+Jh
zlRjZ`UO1I!BtJR_2~7AbkbSm%XQqxEPkz6BTGWx8e}nQ=w7bZ|eVP4?*Tb!$(R)iC
z9)&%bS*u(lXqzitAN)Oo=&Ytn>%Hzjc<5liuPi>zC_nw;Z0AE3Y$Jao_Q90R-gl~5
z_xAb2J%eArrC1CN4G$}-zVvCqF1;H;abAu6G*+PDHSYFx@Tdbfox*uEd3}BUyYY-l
zTfEsOqsi#f9^FoLO;ChK<554qkri&Av~SIM*{fEYRE?vH7pTAOmu2pz3X?Wn*!ROX
ztd54huAk&mFBemMooL33RV-*1f0Q3_(7hl$<#*|WF9P!;r;4_+X~k~uKEqdzZ$5Al
zV63X<s4EnR@itBNL^suG_KHV!zgrw6&Bq&`dNv>N<k2!6lBSoSAvQBw$a}{Sg*d5f
zJqeF6lxH}v-(s5jl(8V8Bv*((#aw(*iLTd8#?8FnMLG#}AorDTkK*%$ni#S{e-*jA
zjy$_xALPmR?$A)F?XdsKy|!Ue+lIR5=csS!ZPu7h{Nc+Sd%?*WHR`S5ByDdhQAsNO
zeyx0!D+fx-a_t<57fQ^<7*WTVDog0}WA0F2_h++_I?f`i|C>@)j$FN#cCD;ek1R#l
zv%pGrhB~KWgoCj%GT?%{@@o(AJGt*PG#l3i>lhmb_twKH^EYvacVY-6bsCl5*^~L0
zonm@lk2UvvTKr2RS%}T>^~EYqdL1q4nD%0n&Xqr^cK^`J5W;lRRB^R-O<zOhVxo?8
zb#fjP=~|*nH<rZsU&F20QcP*BR|)$r#sFFtYi6hV=2&f<YJ%JC0IAdIRdHjO(;S%3
zC;L{EqcHO368@u|<ql>8b&HENO||mo0xaD+S=I8RTlIfVgqN@SXDr2&-)we--K7w=
zJVU8?Z+7k9dy;s;^gDkQa`0nz6N{T?(A&Iz)2!DEecLyRa&FI!id#5Z7B*O2=PsR0
zEvc|8{NS^)!d)MDX(97Xw}m&kEO@5jqRaDZ!+%`wYOI<23q|&js`&o4xvjP7D_xv@
z5hEwpsp{HezI9!~6O{~)lLR@oF7?J7i>1|5a~UuoN=q&6N}EJPV_GD`&M*v8Y`^2j
zKII*d_@Fi$+i*YEW+Hbz<W=zs^XxM$!;??OHDS{MUEdOi9{rF;;#a0RO>n{iQk~yP
z>7N{S4)r*!NwQ`(qcN#8SRQsNK6>{)X12nbF`*7#ecO7I)Q$uZsV+xS4E7aUn+U(K
baj7?x%VD!5Cxk2YbYLNVeiXvvpMCWYo=by@

diff --git a/rust/ballista/ui/scheduler/public/index.html b/rust/ballista/ui/scheduler/public/index.html
deleted file mode 100644
index d902333f034..00000000000
--- a/rust/ballista/ui/scheduler/public/index.html
+++ /dev/null
@@ -1,62 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8" />
-    <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <meta name="theme-color" content="#000000" />
-    <meta
-      name="description"
-      content="Ballista Scheduler UI - manage nodes & tasks"
-    />
-    <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
-    <!--
-      manifest.json provides metadata used when your web app is installed on a
-      user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
-    -->
-    <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
-    <!--
-      Notice the use of %PUBLIC_URL% in the tags above.
-      It will be replaced with the URL of the `public` folder during the build.
-      Only files inside the `public` folder can be referenced from the HTML.
-
-      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
-      work correctly both with client-side routing and a non-root public URL.
-      Learn how to configure a non-root public URL by running `npm run build`.
-    -->
-    <title>Ballista UI</title>
-  </head>
-  <body>
-    <noscript>You need to enable JavaScript to run this app.</noscript>
-    <div id="root"></div>
-    <!--
-      This HTML file is a template.
-      If you open it directly in the browser, you will see an empty page.
-
-      You can add webfonts, meta tags, or analytics to this file.
-      The build step will place the bundled scripts into the <body> tag.
-
-      To begin the development, run `npm start` or `yarn start`.
-      To create a production bundle, use `npm run build` or `yarn build`.
-    -->
-  </body>
-</html>
diff --git a/rust/ballista/ui/scheduler/public/logo192.png b/rust/ballista/ui/scheduler/public/logo192.png
deleted file mode 100644
index fc44b0a3796c0e0a64c3d858ca038bd4570465d9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5347
zcmZWtbyO6NvR-oO24RV%BvuJ&=?+<7=`LvyB&A_#M7mSDYw1v6DJkiYl9X<guIKOG
zci*|^ymP*p?>jT!%$dLEBTQ8R9|wd3008in6lFF3GV-6mLi?MoP_y~}QUnaDCHI#t
z7w^m$@6DI)|C8_jrT?q=f8D?0AM?L)Z}xAo^e^W>t$*Y0KlT5=@bBjT9k<?nGGBhQ
zSbehEe6l@wQk?yk{Pz@AcMVld0M;GTCE?4p`2*7=c-2|99C89m^UO&?Z>xb%-KNdk
zeOS1tKO#ChhG7%{ApNBzE2ZVNcxbrin#E1TiAw#BlUhXllzhN$qWez5l;h<YdrI9P
zS<6GhD3leYXm+LY=TY4I>+t^q#Eav8PhR2|T}y5kkflaK`ba-eoE+Z2q@o6P$)=&`
z+(8}+-McnNO>e#$Rr{32ngsZIAX>GH??tqgwUuUz6kjns|LjsB37zUEWd|(&O!)DY
zQLrq%Y>)Y8G`yYbYCx&aVHi@-vZ3|ebG!f$sTQqMgi0hWRJ^Wc+Ibv!udh_r%2|U)
zPi|E^PK?UE!>_4`f`1k4hqqj_$+d!EB_#IYt;f9)fBOumGNyglU(ofY`yHq4Y?B%-
zp&G!MRY<~ajTgIHErMe(Z8JG*;D-PJhd@RX@QatggM7+G(Lz8eZ;73)72Hfx5KDOE
zkT(m}i2;@X2AT5fW?qVp?@WgN$aT+f_6eo?IsLh;jscNRp|8H}Z9p_UBO^SJXpZew
zEK8fz|0Th%(Wr|KZBGTM4yxkA5CFdAj8=QSrT$fKW#tweUFqr0TZ9D<AY0)k`aBx_
z>~a5lF{)%-tTGMK^2tz(y2v$i%V8XAxIywrZCp=)83p(zIk6@S5AWl|Oa2hF`~~^W
zI;KeOSkw1O#TiQ8;U7OPXjZM|KrnN}9arP)m0v$c|L)lF`j_rpG(zW1Qjv$=^|p*f
z>)Na{D&>n`jOWMwB^TM}slgTEcjxTlUby89j1)|6ydRfWERn3|7Zd2&e7?!K&5G$x
z`5U3uFtn4~SZq|LjFVrz$3iln-+ucY4q$BC{CSm7Xe5c1J<=%Oagztj{ifpaZk_bQ
z9Sb-LaQMKp-qJA*bP6DzgE3`}*i1o3GKmo2pn@dj0;He}F=BgINo};6gQF8!n0ULZ
zL>kC0nPSFzlcB7p4<H52f8=qMn2=dQ!;xXD`6jdiBJ2^oNyt+16A(f<i;0;6ddGE;
zQ_@XTca6wSK(vK5KIKHUgO;P>1doao2F7%6IUTi_+!L`MM4o*#Y#0v~WiO8<L#fHx
zI?x?k(&T-}!n%}LcF+uCp*>uSeAUNp=vA2KaR&=jNR2iVwG>7t%sG2x_~yXzY)7K&
zk3p+O0AFZ1eu^T3s};B<g5t4vVJN7*?kWOGhv$ru8HW)vzo*&RaaqNEl3s?|)YGKH
zo63kVeX8eiiI8)8TVI<9KtqUE{ofuaw7$nnPUt#2l$=IC;iDij;8{QXU+uLWA9c~M
z?KiTNfE|~IwacG?sFBRbqY&vgc~Yaopzd0{Lg`-WSBW2a@&8=tG<r`Ob?)2siT;lG
zPzbHtt{(VS9*a_>%6TpJ6h-Y%B^*zT&SN7C=N;g|#dGIVMSOru3iv^SvO>h4<o1)Q
ztk-z{yw|{Hc59vTba3&#6I)4@Z!Z{_&vNhxwseBQJk-micCb@PRsZ-yUF*D=BME?9
zv0H77d40W7BL-#9+(qd9=V7!I>M=t-N1GSLLDqVTcgurco6)3&XpU!FP6Hlrmj}f$
zp95;b)>M~`kxuZF3r~a!rMf4|&1=uMG$;h^g=Kl;H&Np-(pFT9FF@++MMEx3R<rS-
zuB^adWYC5}jnG`RBeLHUV`KdbUu)vW8p$<wk-gJklNpkTMH8;qgxUtn=hQw+aXu!!
z7L<V8=#FBERK(Iy;KSCGArNoBxI|R+%WaYJr`}%uyfu_sJ6N4<E%!ST6&8KTNUgT0
zc=|z>BsK?AU0fPk-#mdR)Wdkj)`>ZMl#^<80kM87VvsI3r_c@_vX=fdQ`_9-d(xiI
z4K;1y1TiPj_RPh*SpDI7U~^QQ?%0&!$Sh#?x_@;ag)P}ZkAik{_WPB4rHyW#%>|Gs
zdbhyt=qQPA7`?h2_8T;-E6HI#im9K>au*(j4;kzwMSLgo6u*}-K`$_Gzgu&XE)udQ
zmQ72^eZd|vzI)~!20JV-v-T|<4@7ruqrj|o4=JJPlybwMg;M$Ud7>h6g()CT@wXm`
zbq=A(t;RJ^{Xxi*Ff~!|3!-l_PS{AyNAU~t{h;(N(PXMEf^R<?TfDfq&c>(B+ZVX3
z8y0;0A8hJYp@g+c*`>eTA|3Tgv9U8#BDTO9@a@gVMDxr(fVaEqL1tl?md{v^j8aUv
zm&%PX4^|<cvLF*HzSDMGV0iHPD$KT$lv#8;LIw%pD|^3Sh^Dv=f=y*RKZlzMkH(pA
zj!TBU#${|io0kf9sBt#c(IUh^Nw?i5pPmkQDL8Jo`ihi{POC*hzPF#9gJ%+*%r~)G
z*hzHaRQu;^GSmtSWXj1<&y{<D%B-d(ca1<IOKZoU>rX|?E4^CkplWWNv*OKM>DxPa
z!RJ)U^0-WJMi)Ksc!^ixOtw^egoAZZ2Cg;X7(5xZG7yL_;UJ#yp*ZD-;I^Z9qkP`}
zwCTs0*%rIVF1sgLervtnUo&brwz?6?PXRuOCS*JI-WL6GKy7-~yi0giTEMmDs_-UX
zo=+nFrW_EfTg>oY72_4Z0*uG>MnXP=c0VpT&*|rvv1i<G)%__T#O;}Vf68{=uDg!&
z$^|uGJ##zrX6I7v^ea{ysV}DJ_zrf_yt8+T?W6jw=&>StW;*^={rP<Gps5k_;Ey{*
zO|;e5vGXQ@h1vJKGQ+`NMmYBKV~Sx1US+h>1y?Hv+6R6bxFMkxpWkJ>m7Ba{>zc_q
zEefC3jsXdyS5??Mz7IET$Kft|EMNJIv7Ny8ZOcKnzf`K5Cd)&`-fTY#W&jnV0l2vt
z?Gqhic}l}mCv1yUEy$%DP}4AN;36$=7aNI^*AzV(eYGeJ(Px-j<^gSDp5dBAv2#?;
zcM<nu%TB#lev5kX<apfcKZZ%hDDU3kXtK*%;R839$alV38VWT{NJnhjF0GL`9rM2k
zVexf3KgbIO)>Xv#aj>%;MiG^q^$0MSg-(uTl!xm49dH!{X0){Ew7ThWV~Gtj7h%ZD
zVN-R-^7Cf0VH!8O)uUHPL2mO2tmE*cecwQv_5CzWeh)ykX8r5Hi`ehYo)d{Jnh&3p
z9ndXT$OW51#H5cFKa76c<%nNkP~<gM?)^OX$gL^Ky|we;1(h|2M#l;#h2Tj`PPB<E
z!n=Eb`hcI+66~)eT{SBi;R$mV2KtH}>FU93b5h-|Cb}ScHs@4Q#|}byWg;KDMJ#|l
zE=MKD<?0c>*F@HDBcX@~QJH%56eh~jfPO-uKm}~t7Vk<jf*+P>HxHT;)4sd+?Wc4*
z>CyR*{w@4(gnYRdFq=^(#-ytb^5ESD?x<0Skhb%Pt?npNW1m+Nv`tr9+qN<3H1f<%
zZvNEqyK5F<KUONUP{U|Z&`@-OcU{=Mb%iZGj^d}>gPsQ`QIu9P0x_}wJR~^CotL|n
zk?dn;tLRw9jJTur4uWoX6iMm914f0AJfB@C74a;_qRrAP4E7l890P&{v<}>_&GLrW
z)klculcg`?zJO~4;BBAa=POU%aN|pmZJn2{hA!d!*lwO%YSIzv8bTJ}=nhC^n<w3-
z-v~(ZP6zhLQOa--Vj)F~k0Ob}euB(Y8{v*v$;WjNYg|Cj9;VkDLv+N+V{aW7CW=3<
z$l$KzIhY7gI#*j8`VKQqt@ea1=E#0c5IVICnVAH{bp_LL1iIVw*Itgfi#Sq7_Q<98
zA1cq2BqF{g9$p1@&gq>}g(ld^rn#kq9Z3)z`k9lvV>y#!F4e{5c$tnr9M{V)0m(Z<
z#88vX6-AW7T2UUwW`g<;8I$Jb!R%z@rCcGT)-2k7&x9kZZT66}Ztid~6t0jKb&9mm
zpa}LCb`bz`{MzpZR#E*QuBiZXI#<`5qxx=&LMr-UUf~@dRk}YI2hbMsAMWOmDzYtm
zjof16D=mc`^B$+_bCG$$@R0t;e?~UkF?7<(vkb70*EQB1rfUWXh$j)R2)+dNAH5%R
zEBs^?N;UMdy}V};59Gu#0$q53$}|+q7CIGg_w_WlvE}AdqoS<7DY1LWS9?TrfmcvT
zaypmplwn=P4;a8-%l^e?f`OpGb}%(_mFsL&GywhyN(-VROj`4~V~9bGv%UhcA|YW%
zs{;nh@aDX11y^HOF<O&mcM-|{L00A>XB$a7#Sr3cEtNd4eLm@Y#fc&j)TGvbbMwze
zXtekX_wJqxe4NhuW$r}cNy|L{V=t#$%SuWEW)YZTH|!iT79k#?632OFse{+BT_gau
zJwQcbH{b}dzKO?^dV&3nTILYlGw{27UJ72ZN){BILd_HV_s$WfI2DC<9LIHFmtyw?
zQ;?MuK7g%Ym+4e^W#5}WDLpko%jPOC=aN)3!=8)s#Rnercak&b3ESRX3z{xfKBF8L
z5%CGkFmGO@x?_mPGlpEej!3!AMddChabyf~nJNZxx!D&{@xEb!TDyvqSj%Y5@A{}9
zRzoBn0?x}=krh{ok3Nn%e)#~uh;6jpezhA)ySb^b#E>73e*frBFu6IZ^D7Ii&rsiU
z%jzygxT-n*joJpY4o&8UXr2s%j^Q{?e-<G_^{J76Mq?|eHl2Q}TIfLz1H}I9fvS=c
zm*oIlbD9$tAnOWfM^xYqm2?aavV7kSFN~t(hX*&jXwdT)(-yUc1(^4$bB@D*Rg4fF
zGv*BCBqRz8`^LRBWj98zY@aQ`B||0ovS-9b;m0T<TXj-Hh5;G|U%0o&CSKp)@EmW@
zChzrZU(8@!L%c_f>voloX`4DQyEK+DmrZh8A$)<mmOk^JRtKa)h*12TXYBu6*SOO3
ze#NvXs$UpPLNJLqoTpKTRV%K2qK9}L;hCtucS=cqUWJH}3K=Em3K@4&JHx{iSFa8E
zqVHD4$k0g3oTIYd{?wVF<(2=uTWaH@w6)NT<>iWL#NO9+Y@!sO2f@rI!@jN@>HOA<
z?q2l{^%mY*PNx2FoX+A7X3N}(RV$B`g&N=e0uvAvEN1W^{*W?zT1i#fxuw10%~))J
zjx#gxoVlXREWZf4hRkgdHx5V_S*;p-y%JtGgQ4}lnA~MBz-AFdxUxU1RIT$`sal|X
zPB6sEVRjGbXIP0U+?rT|y5+ev&OMX*5C$n2SBPZr`jqzrmpVrNciR0e*Wm?fK6DY&
zl(XQZ60yWXV-|Ps!A<n+?vbcQJG{k7=<p3~`+h4Kd_>{EF;=_z(YAF=T(-MkJXUoX
zI{UMQDAV2}Ya?EisdEW;@pE6dt;j0fg5oT2dxCi{wqWJ<)|SR6fxX~5CzblPGr8cb
zUBVJ2CQd~3L?7yfTpLNbt)He1D>*KXI^GK%<`bq^cUq$Q@uJifG>p3LU(!H=C)aEL
zenk7pVg}0{dKU}&l)Y2Y2eFMdS(<j~2+yHkUVn{?C5dsJXag$OUKP&Vl2lSAJL_uI
ztevY_DRGdi^2bgn=Ll@Km6Uk>JS0}oZUuVaf2+K*YFNGHB`^YGcIpnBlMhO7d4@vV
zv(@N}(k#REdul8~fP+^F@ky*wt@~&|(&&meNO>rKDEnB{ykAZ}k>e@lad7to>Ao$B
zz<1(L=#J*u4_LB=8w+*{KFK^u00NAmeNN7pr+Pf+N*Zl^dO{LM-hMHyP6N!~`24jd
zXYP|Ze;dRXKdF2iJG$U{k=S86l@pytLx}$JFFs8e)*Vi?aVBtGJ3JZUj!~c{<R$n(
ziv;4$OAR*24{KJ-u{Mz2C%|m?Lu8%akP2m-8t9?^hJ};KWux0$T6Zc6vmNj_(P^97
znxN8^Fl+G8f)9)fW?Qt`NcWoFLaagnygy3@TZ@Gu-ER?^vZ;^CT6NUUf@sIN!o*#I
zTQDxUq9IS<Y5j7ng8Y<xvPo+D=~nKpr2LflB|zg+Vlqg|&Z#IWz8CdW!h`-uDggJR
z+f9qRnZ^{3x$+Kifl~IZh)$X4>(rw5>vuRF$`^p!P8w1B=O!skwkO5yd4_XuG^QVF
z`-r5K7(IPSiKQ2|U9+`@Js!<HL1C{aO{H=}S{3p}_Edej>g6sfJwAHVd|s?|mnC*q
zp|B|z)(8+mxXyxQ{8Pg3F4|tdpgZZSoU4P&9I8)nHo1@)9_9u&NcT^FI)6|hsAZFk
zZ+arl&@*>RXBf-OZxhZerOr&dN5LW9@gV=oGFbK*J+m#R-|e6(Loz(;g@T^*oO)0R
zN`N=X46b{7yk5FZGr#5&n1!-@j@g02g|X>MOpF3#IjZ_4wg{dX+G9eqS+Es9@6nC7
zD9$NuVJI}6ZlwtUm5cCAiYv0(Yi{%eH+}t)!E^>^KxB5^L~a`4%1~5q6h>d;paC9c
zTj0wTCKrhWf+F#5>EgX<cLYfrtsHC5;@&1Tu=KIwHE|R;*1f&W24i_&2yx+Xe5N7V
z`hmH?m*G_>`sl%POl?oyCq0(w0xoL?L%)|Q7d|Hl92rUYAU#lc**I&^6p=4lNQPa0
znQ|A~i0ip@`B=FW-Q;zh?-wF;Wl5!+q3GXDu-x&}$gUO)NoO7^$BeEIrd~1Dh{Tr`
z8s<(Bn@gZ(mkIGnmYh_ehXnq78QL$pNDi)|QcT*|GtS%nz1uKE+E{7jdEBp%h0}%r
zD2|KmYGiPa4;md-t_m5YDz#c*oV_FqXd85d@eub?9N61QuYcb3CnVWpM(D-^|CmkL
z(F}L&N7qhL2PCq)fRh}XO@U`Yn<<Z#)X^Ij=#WjXr&snbL8Hbkya6{c!+Ay;w1Jlr
z9}X^@zhtUU>?TNGR4L(mF7#4u29{i~@k;pLsgl({YW5`Mo+p=zZn3L*4{JU;++dG9
X@eDJUQo;Ye2mwlRs<JiGX2Jghdw)}T

diff --git a/rust/ballista/ui/scheduler/public/logo512.png b/rust/ballista/ui/scheduler/public/logo512.png
deleted file mode 100644
index a4e47a6545bc15971f8f63fba70e4013df88a664..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 9664
zcmYj%RZtvEu=T>?y0|+_a0zY+Zo%Dkae}+MySoIppb75o?vUW_?)>@g{U2`ERQIXV
zeY$JrWnMZ$QC<=ii4X|@0H8`si75jB(ElJb00H<f^p#K#{|oMlvZ~_$qS5Nh{~rCn
zA4Y5cVZ*go<F$|f$hFu1n6>AB%>SlLR{!zO|C9P3zxw_U8?1d8uRZ=({Ga4shyN}3
zAK}WA(ds|``G4jA)9}Bt2Hy0+f3rV1E6b|@?hpGA=PI&r8)ah|)I2s(P5Ic*Ndhn^
z*T&j@gbCTv7+8rpYbR^Ty}1AY)YH;p!m948r#%7x^Z@_-w{pDl|1S4`EM3n_PaXvK
z1JF)E3qy$qTj5Xs{jU9k=y%SQ0>8E$;x?p9ayU0bZZeo{5Z@&FKX>}s!0+^>C^D#z
z>xsCPvxD3Z=dP}TTOSJhNTPyVt14VCQ9MQFN`rn!c&_p?&4<5_PGm4a;WS&1(!qKE
z_H$;dDdiPQ!F_gsN`2>`X}$I=B;={R8%L~`>RyKcS$72ai$!2>d(YkciA^J0@X%G4
z4cu!%Ps~2JuJ8ex`&;Fa0NQOq_nDZ&X;^A=oc1&f#3P1(!5il>6?uK4QpEG8z0Rhu
zvBJ+A9RV?z%v?!$=(vcH?*;vRs*+PPbOQ3cdPr5=tOc<a-ro?Zc5la+tVgj!hwG^F
z4*)z+Dj6T#D>Lqmfx@#hOqX0iN)wTTO21jH<>jpmwRIAGw7`a|sl?9y9zRBh>(_%|
zF?h|P7}~RKj?HR+q|4U`CjRmV-$mLW>MScKnNXiv{vD3&2@*u)-6P@h0A`eeZ7}71
zK(w%@R<4lLt`O7fs1E)$5iGb~fPfJ?WxhY7c3Q>T-w#wT&zW522pH-B%r5v#5y^CF
zcC30Se|`D2mY$hAlIULL%-PNXgbbpRHgn<&X3N9W!@BUk@9g*P5mz-YnZBb*-$zMM
z7Qq}ic0mR8n{^L|=+diODdV}Q!gwr?y+2m=3HWwMq4z)DqYVg0J~^}-%7rMR@S1;9
z7GFj6K}i32X;3*$SmzB&HW{PJ55kT+EI#SsZf}<HMwvFaF@TTvjK|r2I5vs2LpffL
z{Bv!nm|BcMhd{9tj}v>bD7nW^Haf}_gXciYKX{QBxIPSx2<c3y_W_ueW=lkplo6_C
z4pVF;!S-6Ziu|Mq`r%r``(lz68Cu3J#n^oDot`%+UFGP6#%tPM4xaP$n-~x$9>Ma?
zHQqgzZq!_{&zg{yxqv3xq8YV+`S}F6A>Gtl39_m;K4dA{pP$BW0oIXJ>jEQ!2V3A2
zdpoTxG&V=(?^q?ZTj2ZUpDUdMb)T?E$}CI>r@}PFPWD9@*%V6;4Ag>D#h>!s)=$0R
zRXvdkZ%|c}ubej`jl?cS$onl9Tw52rBKT)kgyw~Xy%z62Lr%V6Y=f?2)J|bZJ5(Wx
zmji`O;_B+*X@qe-#~`HFP<{8$w@z4@&`q^Q-Zk8JG3>WalhnW1cvnoVw>*R@c&|o8
zZ%w!{Z+M<tG%{r@|BA#vF#4bf!f++tPT5ym8X91BldH}+AI}Y|vX0!&r;lt@eS^lN
zvg`OBp>HeZ*OE4v<xX`%2$O4;S;&Cbv04cU5}9n7>*otkZqz11*s!#s^Gq>+o`8Z5
z^i-qzJLJh9!W-<EsXOxneQlPdVDePK)>;SmFkR<yAIkG=KFv={m{2U06G>8HEZ<d@
zt-Mk%C6JOyyG;Tv=hp@FaMRsh9p2N;-8nqS(z2KtL@(7nZSC(RXHEa2p`gB`jgK!f
zO!Zy))*;8CLtHznXwkD}e&!X(!hBWIP31$_mJ0Qb0%nbgBTMCL4HMpFsK&}NkusiS
z)A#t)!I!l!vB<6_T!LTOk!S`bCf_JCqRZ0G)JH4uX@iT41bzV2n&>JWiXk$40i6)7
zZpr=k2lp}SasbM*Nbn3j$sn0;rUI;%EDbi7T1ZI4qL6PNNM2Y%6{LMIKW+FY_yF3)
zSKQ2<Ya(Kkoy=zdC9*YK)(E7vJkX5gaF83}z?|lmq+>QSujzNMSL2r&bYs`|i2Dnn
z=>}c0>a}>|uT!IiMOA~pVT~R@bGlm}Edf}Kq0?*Af6#mW9f9!}RjW7om0c9Qlp;yK
z)=XQs(|<cGut0+-L3r!cqm1tE6>6GCadQbWIhYF=rf{Y)sj%^Id-ARO0=O^Ad;Ph+
z0?$eE1xhH?{T$QI>0JP75`r)U_$#%K1^BQ8z#uciKf(C701&RyLQWBUp*Q7eyn76}
z6JHpC9}R$J#(R0cDCkXoFSp;j6{x{b&0yE@P7{;pCEpKjS(+1RQy38`=&Yxo%F=3y
zCPeefABp34U-s?WmU#JJw2<Hy#VJPjU_z!blTTddQRvmJ;M1^SwGhk9F3L!VYgE2}
z!hN4|O@-;WQ~A8Ac|siS)QeHnw6sA2IkoVrt&@Qs%P6~@n5!6r8e%GfaPU^w9TIM(
z+qX(?1}UGxDSvKVX1LW8iFMjeq>3dcC{sPPFc2#J$ZgEN%zod}J~8dLm*fx9f6SpO
zn^Ww3bt9-r0XaT2a@Wpw;C23XM}7_14#%QpubrIw5aZtP+CqIFmsG4`Cm6rfxl9n5
z7=r2C-+lM2AB9X0T_`?EW&Byv<FnI6caTN5D)MUOu9(rjGJ}|99fVRv!X=m8I|ntE
zJ6XpQP1)X(+6SBV*7)9sgp(5zk-^p1E@|<-2^-l-ZW#Kj|IJ&(K=R75?+0Sn{(BV|
z)<!{Xjk+B_tZ!}_{^w<QMOVpX(FpR#8=7_$7TdAfPyiOWZvo8WTqZv}@;S*lPA$Rs
zn+2BOVa?j7wIw`|@yC+YqijL$-?j$YqnBw9uWnNX<bc*#<Sqv}z=}R0au2Xj__+Xc
z|5Zi<%3X($k`eB4OfoyCoJfrfsnP_(kI)~k#Slp5==?)J^f|>&K?HS4QLoylJ|OAF
z`8atBNTzJ&AQ<Z&$gy`^x^JOg-uapGljHB_jawUn+lOR$Lal;{U)TVO@l6XlAhXvf
z&}RhuqQ7a6<jLsJ0)_9Tl`lObK+u8*wmYdM+gnW=+v~Cg={2^r6A-TFvKP$LTFKFk
zC%VN!ZkZ6V>!>sOo$?^0xj~D(;kS$`9zbEGd>f6r`NC3X`tX)sWgWUUOQ7w=$TO<q
zW~{Euy_99}%58ATz~`-F(jnUkM{m~L{o=;3Hl9hX$s(cq;5cRA92lsb@Jg~cz*VaL
zt36Y*Oe?E>&*j;=u%25ay-%>3@81tGe^_z*C7pb9y*Ed^H3t$BIKH2o+olp#$q;)_
zfpjCb_^VFg5fU~K)nf*d*r@BCC>UZ!0&b?AGk_jTPXaSnCuW110wjHPPe^9R^;jo3
zwvzTl)C`Zl5}O2}3lec=hZ*$JnkW#7enKKc)(pM${_$9Hc=Sr_A9Biwe*Y=T?~1CK
z6eZ9uPICjy-sMGbZl$yQmpB&`ouS8v{58__t0$JP%i3R&%QR<t`@HqaIe3AGzxCPH
z06(XDO&~Ok$=UP%vG;P&hu?hEJ29wAaM6E!HZ0R;x8r*qHy+!hZxDYg-KGZI`{P_}
zY{dHlfnW6S)?CPAP)zp_!xelMRGuAo@t@!gSdowYtvHr8K9WNNw}a|TzE-87F!WRs
z-#;HoNH5O`b&7Kri+=ag7)^^;3^1?o2Q2qw@}+ZE%fAQU-nq{%`+R|B7FhGK+M!Fl
z2ZyeAFYON2o9at)@lQt2WoWTyBs<V9RDa+*;620gC9bv{?izYvGuFv(YU1!YDK{kN
zfuajP^aW|>3ianbZqDs<2#5FdN@n5bCn^ZtH992~5k(eA|8|@G9u`wdn7bnpg|@{m
z^d6Y`*$Zf2Xr&|g%sai#5}Syvv(>Jnx&EM7-|Jr7!M~zdAyjt*xl;OLhvW-a%H1m0
z*x5*nb=R5u><7lyVpN<INnH%~Yw@M#U6Pu*P(p=#E`62!G$HpM^Fj^SgYNx!W^2fr
zkI!m)izx6Dlg78SlE~FIDdEd}c|raeMkO<=|63PClZI~^epYjlJD}Z`<%|7DCiNUv
zG)@)s+cUFWM~QdlNaB)J5z`+Rh!K6;Qjn|xbp*GZE8Oc@gJVh~Yk^QNmM<N`7=nyt
z^&xA|=4HLov%ZKEejPsm{k;ktCe=zCR9B1@0wmg_efnHnX;*=is!NwZ>AR?q@1U59
zO+)QW<j~4qKP_fJbKV#dkbk5|s_=T+xd;<8uKpNiftfsnY^b*vkT2H1%VS`S<#uK|
zjNMI3R($QKsX+O9r(;Z277$LfqVgbuD{2wsZBsx#6p~V;+BiVs555-sk`S_(uZ4+h
z)<$QI#xEv`Eka6DmEWW&rUOf*Vo9$F6`G&Jq7J`r0+jS%Qxqc#v^D*NyEI1gB}|q!
z)+rEYS;WOK<Wz?e_Z2Q0;QX0^^7`!HvIf7)1y?Hoj9S$VrgX{Ye9I!Bx85oCC)?4z
zjdu{7tR8-C2~=B$IqnW+8OcPpDJW2wE_8+TYdyClF#Az`1L!6t9*pZdLVY;p<yBtF
zOm~+y=m;=-2Tc+I$K4se0R$L&IWm@H&UYad(l8Y*q?01q-iww`%aiBbF149`>wL8t
zyip?u_nI+K$uh{<eXaA|n3IG+8OrGZ)9HGA&^RJ{Jd9>y)~}qj?(w0&=SE^8`_WMM
zTybjG=999h38Yes7}-4*LJ7H)UE8{mE(6;8voE+TYY%33A>S6`G_95^5QHNTo_;Ao
ztIQIZ_}49%{8|=O;isBZ?=7kfdF8_@azfoTd+hEJKWE!)$)N%HIe2cplaK`ry#=pV
z0q{9w-`i0h@!R8K3GC{ivt{70IWG`EP<iX3`qZ%H^f(R!@OED}+3u4g7{Xr9UwpnK
zTOD@;FUScIf-f4;fF&{6twOyC0W6O!P4PKEm%fJY7_abkr=vB+O94OwvhK{ZP6_!?
z<iuvlT@!faRAoB1`yY6GRfnc*q1!>|(1g7i_Q<>aEAT{5(<ns<#%dS?L`x`En%)Ut
z{nCo<KWFUh<S<CDmdO|;fv7JLuUS7^E}0ijJVb)Q<0jWOI=_FiCK24AD%G{4e$NQd
zWv*R@_2{PvzvNMu@Y3QBNJJKAzFJ33r_h+}NP7l{uwC<5(0xcl0^=Em4$LS-ZF-5D
zMD(oR`sZ*UYIe*BY*c~7#G1SLTv3VfBTd_C@@TBwsuESuxm7Y0Uf&u{$l-}_?d>yD
z=!O?kq61VegV+st@XCw475j6vS)_z@efuqQgHQR1T4;|-#OLZNQJPV4k$AX1Uk8Lm
z{N*b*ia=I+MB}kWpupJ~>!C@xEN#Wa7V+7{m4j8c?)ChV=D?o~sjT?0C_AQ<J}v#S
zq&&10i;k!wZ0^l<H$PM2AS4v2B7le67PsGi3{5cEJvQTXYQd9$TA$ATXW$sERJFH|
zUFQmh;BXn<X&*(eK7*8b7K+8>7B-vxqX30s0I_`2$in86#`mAsT-w?j{&AL@B3$;P
z31G4(lV|b}uSD<Q-$cmmD#5!{N;ON{%=s}<yxrxZp;&F{OtN|&Osm7~f0ORXV+M%%
zhys!Gh~U9xxTSrb2pKtcmi71qF!D2BtUcc1(uP<LQ-4B<(+;>CIrjk+M1R!X7s<hT
z2KXhB-@~*Z#DnL&I)I4&$X=6)^|><DE!Cgw9m@wB3B0oPTj6$<u_@p0qZd2rpQY_#
zEFr4$jqoGqJSybV){Dvrnb_tOoKmSO#70t@P~q_L%<9+Qb(JW|nv0-SWLrjEuZTVs
z44b8p8-&PiM|E?GM`){f%M?C9*dLm28~DlBW?*4ua4H+nWN_%3iNC_(B+k``Oazc8
z83kgJUNcy2CKRR@Pn1$!R|+BC1lz16vh1Y$6BfKm&WMiaUzg^B!!Zp$xNrq{)ln-H
zcg5u<qf>4Aabn<)zpgT}#gE|mIvV38^ODy@<&yflpCwS#fRf9ZX3lPV_?8@C5)A;T
zqmouFLFk;qIs4rA=hh=GL~sCFsXHsqO6_y~*AFt93<ymU#4-U}YQ)Pa*UpuA%os{2
z&>9UYVBSx1s(=Kb&5;j7cSowdE;7()CC2|-i9Zz+_BIw8#ll~-tyH?F3{%`QCsY<I
zU5z8T?uMPvp*VYrm~~t-K+6Pgjku>a*b#s*9iCc`1P1oC26?`g<9))EJ3%xz+O!B3
zZ7$j~To)C@PquR>a1+Dh>-a%IvH_Y7^ys|4o?E%3`I&ADXfC8++hAdZfzIT#%C+Jz
z1lU~K_vAm0m8Qk}K$F>|<CsjNZ*?_o$*ZsW3W*ZecdNs4Im>>RPK%<1SI0(G+8q~H
zAsjezyP+u!Se4q3GW)`h`NPSRlMoBjCzNPesWJwVTY!o@G8=(6I%4XHGaSiS3MEBK
zhgGFv6Jc>L$4jVE!I?TQuwvz_%CyO!bLh94nqK11C2W$*aa2ueGopG8DnBICVUORP
zgytv#)49fVXDaR$SukloYC3u7#5H)}1K21=?DKj^U)8G;MS)&Op)g^zR2($<>C*zW
z;X7`hLxiIO#J`ANdyAOJle4V%ppa*(+0i3w;8i*BA_;u8gOO6)MY`ueq7stBMJTB;
z-a0R>hT<!E*EnpUxAxCvwvo$2Z}nSc&KEBz0q7{Fm>*}>z|Gg}@^zDL1MrH+2hsR8
zHc}*9IvuQC^Ju)^#Y{fOr(96rQNPNhxc;mH@W*m206>Lo<*SaaH?~8zg&f&%YiOEG
zGiz?*CP>Bci}!WiS=zj#K5I}>DtpregpP_tfZtPa(N<%vo^#WCQ5BTv0vr%Z{)0q+
z)RbfHktUm|lg&U3YM%lMUM(f<ok0JPn&g&>u}i#kjX9h>GYctkx9Mt_8{@s%!K_EI
zScgwy6%_fR?CG<BS|7E|e1Uiu+4N|3CP*{mA6E>JQtmgNAj^h9B#zma<L`GR52{?r
zw=yYEhBrx2I7mEv4WBN$tAM7|KP9m=OTPk^73y)|tA#lJ(mG>MDWgH55pGuY1Gv7D
z;8Psm(vEPiwn#MgJYu4Ty9D|h!?Rj0ddE|&L3S{IP%H4^N!m`60ZwZw^;eg4sk6K{
ziA^`Sbl_4~f&Oo%n;8Ye(tiAdlZKI!Z=|j$5hS|D$bDJ}p{gh$KN&JZYLUjv4h{NY
zBJ>X9z<S-$t-=L{3#MCguo5ug^BN(csELHS6D1V)g#mO1+{f#R(F2A;Jtz>!xfDGY
z+oh_Z&_e#Q(-}>ssZfm=j$D&4W4FNy&-kAO1~#3Im;F)Nwe{(*75(p=P^VI?X<FsK
z+mujv723Y8RTh-aX#a)Qm;PXW^W`h>0GFakfh+X-px4a%Uw@fSbmp9hM1_~R>?Z8+
ziy|e9>8V*`OP}4x5JjdWp}7eX;lVxp5qS}<UzbgS%F%qxg|}u`F%N~wbUq7r3Tq2N
z`L+(4<Yw>0YZek;SNmm7tEeSF*-dI)6U-A%m6YvCgM(}_=k#a6o^%-K4{`B1+}O4x
zztDT%hVb;v#?j`lTvlFQ3aV#zkX=7<v0Xt+SO4-V7;S>;YFLS$uIzb0E3lozs5`Xy
zi~vF+%{z9uLjKvKPhP%x5f<NLNK1Zu_hJxLjLK{w;{*>~7-Gj+%5N`%^=yk*Qn{`>
z;xj&ROY6g`iy2a@{O)V(jk&8#hHACVDXey5a+KDod_Z&}kHM}xt7}Md@pil{2x7E~
zL$k^d2@Ec2XskjrN+IILw;#7((abu;OJii&v3?60x>d_Ma(onIPtcVnX@ELF0aL?T
zSmWiL3(dOFkt!x=1O!_0n(cAzZW+3nHJ{2S>tgSK?~cF<W~g{Uk=X^%saR^iO2-=d
zF*rKVVAPU1W>ha^y(l@-Mr2W$%MN{#af8J;V*>hdq!gx=d0h$T7l}>91Wh07)9CTX
zh2_ZdQCyFOQ)l(}gft0UZ<Qo&@`u@GIyo^7BB;_Jrh>G`Sh2`x-w`5vC2UD}lZs*5
zG76$akzn}Xi))L3oGJ75#pcN=cX3!=57$Ha=hQ2^lwdyU#a}4JJOz6ddR%zae%#4&
za)bFj)z=YQela(F#Y|Q#dp}PJghITwXouVaMq$BM?K%cXn9^Y@g43$=O)F&ZlOUom
zJiad#dea;-eywBA@e&D6Pdso1?2^(pXiN91?jvcaUyYoKUmvl5G9e$W!okWe*@a<^
z8cQQ6cNSf+UPDx%?_G4a<m)UKh(R<crXCvksf8T4MGW_VPMHrJGOqh#<rdAK%kV`|
zqLv2C)0Oba2mQ50>IiybZHHagF{<S-4D+!Tsu-gt1o$)JW!(&V?v-lI1Lv(lQE6R!
zWjXrkjWX-&v!bw*7_u$ws?*dOF^}ann%C)lp)v!U?&S&S%`~VL={@<rBH$gl7F=4D
zs%B$Bo06T#CB)!Sf;LI9_<<tT&#Jv^`mC8{I3pWeU7jyQ0gh;9%B>;IcD(dPO!#=u
zWfqLcPc^+7Uu#l(B<Qg-R1c!j-uotKRCgB)MF*8IZpiA>pxft{*4lv#*u7X9AOzDO
z1D9?^jIo}?%iz(_dwLa{ex#T}76ZfN_Z-hwpus9y+4xaUu9cX}&P{XrZVWE{1^0yw
zO;YhLEW!pJcbCt3L8~a7>jsaN{V3>tz6_7`&pi%GxZ=V3?3K^<rn`e8a7?eZI-TG+
z{hR_I;2c?$BM1)pjP2l@7#6U3^o=*9Hsp__;N;$8F&5@Ghp#>U+*ryLSb)8^IblJ0
zSRLNDvIxt)S}g30?s_3NX>F?NKIGrG_zB9@Z>uSW3k2es_H2kU;Rnn%j5qP)!XHKE
zPB2mHP~tLCg4K_vH$xv`HbRsJwbZMUV(t=ez;Ec(vyHH)FbfLg`c61I$W_uBB>i^r
z&{_P;369-&>23R%qNIULe=1~T$(DA`ev*EWZ6j(B$(te}x1WvmIll21zvygkS%vwG
zzkR6Z#RKA2!z!C%M!O>!=Gr0(J0FP=-MN=5t-Ir)of50y10W}j`GtRCsXBakrKtG&
zazmITDJMA0C51&BnLY)SY9r)NVTMs);1<=oosS9g31l{4ztjD3#+2H7u_|66b|_*O
z;Qk6nalpqdHOjx|K&vUS_6ITgGll;TdaN*ta=M_YtyC)I9Tmr~VaPrH2q<HCA^;;b
zni;6_t9t~p5;T0mX`UW-c?4TAiadb)6}vsp``(hz(}(&x4ab<TyrI|$niD$NiTl-b
zJt9ixO#S|?KYH3Eadm4D8|NzLhAY993hoQanUS>b6sd~=AcIxV+%z{E&0@y=DPArw
zdV7z(G1hBx7hd{>(cr43^WF%4Y@PXZ?wPpj{OQ#tvc$pABJbvPGvdR`cAtHn)cSEV
zrpu}1tJwQ3y!mSmH*uz*x0o|CS<^w%&KJzsj~DU0cLQUxk5B!hWE>aBkjJle8z~;s
z-!A=($+}Jq_BTK5^B!`R>!MulZN)F=iXXeUd0w5lUsE5VP*H*oCy(<w;IZ?{Pso`R
z;9tSfBWDPpv(ru@ok6#>;?S$p*TVvTxwAeWFB$jHyb0593)$zqalVlDX=GcCN1gU0
zlgU)I$LcXZ8Oyc2TZYTPu@-;7<4YYB-``Qa;IDcvydIA$%kHhJKV^m*-<Eu89DD6r
z$hXxW3}1&`pz`)lE8f*kAC}P(6)qA>zxcvU4viy<a-^x1uJC*fAd9KCgjrYHBR=y`
zw#X)*QjS-7i>&Kr5GVM{IT>WRywKQ9;>SEiQD*NqplK-KK4YR`p0@JW)n_{TU3bt0
zim%;(m1=#v2}zTps=?fU5w^(*y)xT%1vtQH&}50ZF!9YxW=&7*W($2kgKyz1mUgfs
zfV<*XVVIFnohW=|j+@Kfo!#liQR^x>2yQdrG;2o8WZR+XzU_nG=Ed2rK?ntA;K5B{
z>M8+*A4<Ta>!Jm^Bg}aW?R?6;@QG@uQ8&oJ{hFixcfEnJ4QH?A4>P=q29oDGW;L;=
z9-a0;g%c`C+Ai!UmK$NC*4#;Jp<1=TioL=t^YM)<<%u#hnnfSS`nq63QKGO1L8RzX
z@MFDq<H`&N7x6|cHF$jHtc;8QSd3*XDI;%h;Be47aqDn+ovE51)i6?}0L%GiJ>s1z
ztYmxDl@LU)5acvHk)~Z`RW7=aJ_nGD!mOSYD>5Odjn@TK#LY{jf?+piB5AM-CAoT_
z?S-*q7}wyLJzK>N%eMPuFgN)Q_otKP;aqy=D5f!<Uxm0kJ!&((NN1Cc$Lf2D8xbv(
z*WfnV!Kme-C7`<}Hk^(!-La76WI@dSiD?t@Imfnp1{N8W$}|)~%wx6MKY2OYwhJDH
z)z%|ULU9X+--|?(ocK})YRZKw<7x0>7<=n(lNkYRXVpkB{TAYLYg{|(jtRqYmg$xH
zjmq<Cf4$wzOeRC1g`5bkE7g|z=wldi@dYy#eUIYfkuubZe|$MvzfnD`b2{>?B(RE4
zQx^~Pt}gxC2~l=K$$-sYy_r$CO(d=+b3H1MB*y_5g6WLaWTXn+TKQ|hNY^>Mp6k*$
zwkovomhu776vQATqT4blf~g;TY(MWCrf^^yfWJvSAB$p5l;jm@o#=!lqw+Lqfq>X=
z$6~kxfm7`3q4zUEB;u4qa#BdJxO!;xGm)wwuisj{0y2x{R(IGMrsIzDY9LW>m!Y`=
z04sx3IjnYvL<4JqxQ8f7qYd0s2Ig%`ytYPEMKI)s(LD}D@EY>x`VFtqvnADNBdeao
zC96X+MxnwKmjpg{U&gP3HE}1=s!lv&D{6(g_lzyF3A`7Jn*&d_kL<;dAFx!UZ>hB8
z5A*%LsAn;VLp>3${0>M?PSQ)9s3}|h2e?TG4_F{}{Cs>#3Q*t$(CUc}M)I}8cPF6%
z=+h(Kh^8)}gj(0}#e7O^FQ6`~fd1#8#!}LMuo3A0bN`o}PYsm!Y}sdOz$+Tegc=qT
z8x`PH$7lvnhJp{kHWb22l;@7B7|4yL4UOOVM0MP_>P%S1Lnid)+k9{+3D+JFa#Pyf
zhVc#&df87APl4W9X)F3pGS>@etfl=_E5tBcVoOfrD4hmVeTY-cj((pkn%n@EgN{0f
zwb_^Rk0I#i<UGQdc-Nmd=Rb)xhox&LXCiL2JOtMf1nJ{Y*CC^NXhbH@kK=kc_`LQd
zpKZRrfMT*+Mhk36qPN<LRtNnRgTK6F!~*AtcX%l1)YCyR^Cg*|aI@K7&6brfZD+JV
zGcqOky{~wE&Wx}Ojr2$00rvimv@fJs@iLuizXDa>ZuHK!l*lN`ceJn(sI{$Fq6nN&
zE<-=0_2WN}m+*ivmIOxB@#~Q-cZ>l136w{#TIJe478`KE7@=a{>SzPHsKLzYAyBQO
zAtuuF$-JSDy_S@6GW0MOE~R)b;+0f%_NMrW(+V#c_d&U8Z9+ec4=HmOHw?gdjF(Lu
zzra<iFcvmxzT>83M_BoO-1b3;9`%&DHfuUY)6YDV21P$C!Rc?mv&{lx#f8oc6?0?x
zK08{WP65?#>(vPfA-c=MCY|<S!ZyNl<um89EGH-nZopot<9vhnMSrJUdliV1$R@h(
zReDzy8)E@8VrU(MTz_4ai}TcxM)B2^Im7X9WBhxiIczSob@_Q~*btJ>%*1_<3D4NX
zeVTi-JGl2uP_2@0F{G({pxQOXt_d{g_CV6b?jNpfUG9;8yle-^4KHRvZs-_2siata
zt+d_T@U$&t*xaD22(fH(W1r$Mo?3dc%Tncm=C6{V9y{v&VT#^1L04vDrLM9qBoZ4@
z6DBN#m57hX7$C(=#$Y5$bJmwA$T8jKD8+6A!-IJwA{WOfs%s}yxUw^?MRZjF$n_KN
z6`_bGXcmE#5e4Ym)aQJ)xg3Pg0@k`iGuHe?f(5LtuzSq=nS^5z>vqU0EuZ&75V%Z{
zYyhRLN^)$c6Ds{f7*FBpE;n5iglx5PkHfWrj3<K%`xq+5RKqKFc8rLQ*ZRbbx$E1#
z3f|;4cOJ3Ebo^39!B`+!g&)irRekwjXNvz=dRTz5`G+KYEbcaaK8WXc9Bd>`x^j^t
z7ntuV`g!9Xg#^3!x)l*}IW=(Tz3>Y5l4uGaB&lz{GDjm2D5S$CExLT`I1#n^lBH7Y
zDgpMag@`iETKAI=p<5E#LTkw<F5K4Wbo)QRuzF*eH_@ivMrE0Wp~Gnj6dqxd?q0<i
zCg50hY}if?yn)!*`4%$BA^3^>zVR@=yY|uBVI1HG|8h+d;G-qfuj}-ZR6fN>EfCCW
z9~wRQoAPEa#aO?3h?x{YvV*d+NtPkf&4V0k4|L=uj!U{L+oLa(z#&iuhJr3-PjO3R
z5s?=nn_5^*^Rawr>>Nr@K(jwkB#JK-=+HqwfdO<+P5byeim)wvqGlP-P|~Nse8=XF
zz`?RYB|D6SwS}C<!9XcXRWqW$6w&z(j$m~}aKHcZK~n4i+541c<|vO(dRs@`mO_la
zV#-mf$jU#l&0!zW|IK42VgGl#Cw`Pp0u0|_KdVe9>+YQv+;}k6$-%D(@+t14BL@vM
z2q%q?f6D-A5s$_WY3{^G0F131bbh|g!}#BKw=HQ7mx;Dzg4Z*bTLQSfo{ed{4}NZW
zfrRm^Ca$rlE{Ue~uYv>R9{3s<lJFO-AA<uH1E0Ejy3!9=Y^Pj|>mwATcdM_6+yWIO
z*ZRH~uXE@#p$XTbCt5j7j2=86e{9>HIB6xDzV+vAo&B?KUiMP|ttOElepnl%|DPqL
b{|{}U^kRn2wo}j7|0ATu<;8xA7zX}7|B6mN

diff --git a/rust/ballista/ui/scheduler/public/manifest.json b/rust/ballista/ui/scheduler/public/manifest.json
deleted file mode 100644
index 080d6c77ac2..00000000000
--- a/rust/ballista/ui/scheduler/public/manifest.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "short_name": "React App",
-  "name": "Create React App Sample",
-  "icons": [
-    {
-      "src": "favicon.ico",
-      "sizes": "64x64 32x32 24x24 16x16",
-      "type": "image/x-icon"
-    },
-    {
-      "src": "logo192.png",
-      "type": "image/png",
-      "sizes": "192x192"
-    },
-    {
-      "src": "logo512.png",
-      "type": "image/png",
-      "sizes": "512x512"
-    }
-  ],
-  "start_url": ".",
-  "display": "standalone",
-  "theme_color": "#000000",
-  "background_color": "#ffffff"
-}
diff --git a/rust/ballista/ui/scheduler/public/robots.txt b/rust/ballista/ui/scheduler/public/robots.txt
deleted file mode 100644
index dc045698d09..00000000000
--- a/rust/ballista/ui/scheduler/public/robots.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# https://www.robotstxt.org/robotstxt.html
-User-agent: *
-Disallow:
diff --git a/rust/ballista/ui/scheduler/react-table-config.d.ts b/rust/ballista/ui/scheduler/react-table-config.d.ts
deleted file mode 100644
index 4bdce7667ec..00000000000
--- a/rust/ballista/ui/scheduler/react-table-config.d.ts
+++ /dev/null
@@ -1,137 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import {
-    UseColumnOrderInstanceProps,
-    UseColumnOrderState,
-    UseExpandedHooks,
-    UseExpandedInstanceProps,
-    UseExpandedOptions,
-    UseExpandedRowProps,
-    UseExpandedState,
-    UseFiltersColumnOptions,
-    UseFiltersColumnProps,
-    UseFiltersInstanceProps,
-    UseFiltersOptions,
-    UseFiltersState,
-    UseGlobalFiltersColumnOptions,
-    UseGlobalFiltersInstanceProps,
-    UseGlobalFiltersOptions,
-    UseGlobalFiltersState,
-    UseGroupByCellProps,
-    UseGroupByColumnOptions,
-    UseGroupByColumnProps,
-    UseGroupByHooks,
-    UseGroupByInstanceProps,
-    UseGroupByOptions,
-    UseGroupByRowProps,
-    UseGroupByState,
-    UsePaginationInstanceProps,
-    UsePaginationOptions,
-    UsePaginationState,
-    UseResizeColumnsColumnOptions,
-    UseResizeColumnsColumnProps,
-    UseResizeColumnsOptions,
-    UseResizeColumnsState,
-    UseRowSelectHooks,
-    UseRowSelectInstanceProps,
-    UseRowSelectOptions,
-    UseRowSelectRowProps,
-    UseRowSelectState,
-    UseRowStateCellProps,
-    UseRowStateInstanceProps,
-    UseRowStateOptions,
-    UseRowStateRowProps,
-    UseRowStateState,
-    UseSortByColumnOptions,
-    UseSortByColumnProps,
-    UseSortByHooks,
-    UseSortByInstanceProps,
-    UseSortByOptions,
-    UseSortByState
-} from 'react-table'
-
-declare module 'react-table' {
-    // take this file as-is, or comment out the sections that don't apply to your plugin configuration
-
-    export interface TableOptions<D extends Record<string, unknown>>
-        extends UseExpandedOptions<D>,
-            UseFiltersOptions<D>,
-            UseGlobalFiltersOptions<D>,
-            UseGroupByOptions<D>,
-            UsePaginationOptions<D>,
-            UseResizeColumnsOptions<D>,
-            UseRowSelectOptions<D>,
-            UseRowStateOptions<D>,
-            UseSortByOptions<D>,
-            // note that having Record here allows you to add anything to the options, this matches the spirit of the
-            // underlying js library, but might be cleaner if it's replaced by a more specific type that matches your
-            // feature set, this is a safe default.
-            Record<string, any> {}
-
-    export interface Hooks<D extends Record<string, unknown> = Record<string, unknown>>
-        extends UseExpandedHooks<D>,
-            UseGroupByHooks<D>,
-            UseRowSelectHooks<D>,
-            UseSortByHooks<D> {}
-
-    export interface TableInstance<D extends Record<string, unknown> = Record<string, unknown>>
-        extends UseColumnOrderInstanceProps<D>,
-            UseExpandedInstanceProps<D>,
-            UseFiltersInstanceProps<D>,
-            UseGlobalFiltersInstanceProps<D>,
-            UseGroupByInstanceProps<D>,
-            UsePaginationInstanceProps<D>,
-            UseRowSelectInstanceProps<D>,
-            UseRowStateInstanceProps<D>,
-            UseSortByInstanceProps<D> {}
-
-    export interface TableState<D extends Record<string, unknown> = Record<string, unknown>>
-        extends UseColumnOrderState<D>,
-            UseExpandedState<D>,
-            UseFiltersState<D>,
-            UseGlobalFiltersState<D>,
-            UseGroupByState<D>,
-            UsePaginationState<D>,
-            UseResizeColumnsState<D>,
-            UseRowSelectState<D>,
-            UseRowStateState<D>,
-            UseSortByState<D> {}
-
-    export interface ColumnInterface<D extends Record<string, unknown> = Record<string, unknown>>
-        extends UseFiltersColumnOptions<D>,
-            UseGlobalFiltersColumnOptions<D>,
-            UseGroupByColumnOptions<D>,
-            UseResizeColumnsColumnOptions<D>,
-            UseSortByColumnOptions<D> {}
-
-    export interface ColumnInstance<D extends Record<string, unknown> = Record<string, unknown>>
-        extends UseFiltersColumnProps<D>,
-            UseGroupByColumnProps<D>,
-            UseResizeColumnsColumnProps<D>,
-            UseSortByColumnProps<D> {}
-
-    export interface Cell<D extends Record<string, unknown> = Record<string, unknown>, V = any>
-        extends UseGroupByCellProps<D>,
-            UseRowStateCellProps<D> {}
-
-    export interface Row<D extends Record<string, unknown> = Record<string, unknown>>
-        extends UseExpandedRowProps<D>,
-            UseGroupByRowProps<D>,
-            UseRowSelectRowProps<D>,
-            UseRowStateRowProps<D> {}
-}
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/src/App.css b/rust/ballista/ui/scheduler/src/App.css
deleted file mode 100644
index bea95535e9e..00000000000
--- a/rust/ballista/ui/scheduler/src/App.css
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
-*/
diff --git a/rust/ballista/ui/scheduler/src/App.test.tsx b/rust/ballista/ui/scheduler/src/App.test.tsx
deleted file mode 100644
index 20dca216eb2..00000000000
--- a/rust/ballista/ui/scheduler/src/App.test.tsx
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import { render, screen } from "@testing-library/react";
-import App from "./App";
-
-test("renders learn react link", () => {
-  render(<App />);
-  const linkElement = screen.getByText(/learn react/i);
-  expect(linkElement).toBeInTheDocument();
-});
diff --git a/rust/ballista/ui/scheduler/src/App.tsx b/rust/ballista/ui/scheduler/src/App.tsx
deleted file mode 100644
index 5864a27cdf5..00000000000
--- a/rust/ballista/ui/scheduler/src/App.tsx
+++ /dev/null
@@ -1,97 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React, {useState, useEffect} from "react";
-import {Box, Grid, VStack} from "@chakra-ui/react";
-import {Header} from "./components/Header";
-import { Summary} from "./components/Summary";
-import {QueriesList, Query, QueryStatus} from "./components/QueriesList";
-import {Footer} from "./components/Footer";
-
-import "./App.css";
-
-function uuidv4() {
-  return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
-    var r = (Math.random() * 16) | 0,
-      v = c === "x" ? r : (r & 0x3) | 0x8;
-    return v.toString(16);
-  });
-}
-
-const getRandomQueries = (num: number): Query[] => {
-  const nodes: Query[] = [];
-
-  for (let i = 0; i < num; i++) {
-    nodes.push({
-      started: new Date().toISOString(),
-      query: "SELECT \n" +
-          "    employee.id,\n" +
-          "    employee.first_name,\n" +
-          "    employee.last_name,\n" +
-          "    SUM(DATEDIFF(\"SECOND\", call.start_time, call.end_time)) AS call_duration_sum\n" +
-          "FROM call\n" +
-          "INNER JOIN employee ON call.employee_id = employee.id\n" +
-          "GROUP BY\n" +
-          "    employee.id,\n" +
-          "    employee.first_name,\n" +
-          "    employee.last_name\n" +
-          "ORDER BY\n" +
-          "    employee.id ASC;",
-      status: QueryStatus.RUNNING,
-      progress: Math.round(Math.random() * 100),
-      uuid: uuidv4()
-    });
-  }
-  return nodes;
-};
-
-const queries = getRandomQueries(17);
-
-const App : React.FunctionComponent<any> = () => {
-
-  const [schedulerState, setSchedulerState] = useState(undefined)
-
-  function getSchedulerState() {
-    return fetch(`/state`, {
-      method: 'POST',
-      headers: {
-        'Accept': 'application/json'
-      }
-    })
-      .then(res => res.json())
-      .then(res => setSchedulerState(res));
-  }
-
-  useEffect(() => {
-    getSchedulerState();
-  }, []);
-
-  return (
-    <Box>
-      <Grid minH="100vh">
-        <VStack alignItems={"flex-start"} spacing={0} width={"100%"}>
-          <Header schedulerState={schedulerState} />
-          <Summary schedulerState={schedulerState} />
-          <QueriesList queries={queries} />
-          <Footer />
-        </VStack>
-      </Grid>
-    </Box>
-  );
-}
-
-export default App;
diff --git a/rust/ballista/ui/scheduler/src/components/DataTable.tsx b/rust/ballista/ui/scheduler/src/components/DataTable.tsx
deleted file mode 100644
index 38176d3e34f..00000000000
--- a/rust/ballista/ui/scheduler/src/components/DataTable.tsx
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import {Link, Table, Thead, Flex, Tbody, Text, Tr, Th, Td, VStack, chakra} from "@chakra-ui/react";
-import {TriangleDownIcon, TriangleUpIcon} from "@chakra-ui/icons";
-import {useTable, useSortBy, usePagination, Column as RTColumn} from "react-table";
-import {HiChevronLeft, HiChevronRight} from "react-icons/all";
-import TimeAgo from "react-timeago";
-
-type RenderFn = (props: any) => React.ReactNode;
-
-interface Row {
-    [name: string]: any;
-}
-
-// eslint-disable-next-line
-export type Column<Row> = RTColumn | {
-    isNumeric?: boolean;
-    render?: RenderFn;
-};
-
-interface DataTableProps {
-    columns: Column<Row>[];
-    data: Row[];
-    pageSize?: number;
-    maxW?: number;
-    pb?: number;
-}
-export const DateCell : (props: any) => React.ReactNode = (props: any) => {
-    return <TimeAgo minPeriod={60} date={props.value}
-                    formatter={(value: number, unit: TimeAgo.Unit, suffix: TimeAgo.Suffix) => {
-                        if (unit === 'second') return 'just now';
-                        const plural: string = value !== 1 ? 's' : '';
-                        return `${value} ${unit}${plural} ${suffix}`;
-                    }}
-    />
-}
-
-export const LinkCell : (props: any) => React.ReactNode = (props: any) => {
-    return (
-        <Link href={props.href} isExternal>
-            {props.value}
-        </Link>
-    )
-}
-
-export const DataTable: React.FunctionComponent<DataTableProps> = ({data, columns, pageSize = 10, maxW, pb}) => {
-        const {
-            getTableProps,
-            getTableBodyProps,
-            headerGroups,
-            rows,
-            prepareRow,
-            pageOptions,
-            canNextPage,
-            nextPage,
-            canPreviousPage,
-            previousPage,
-            state: {pageIndex},
-        } = useTable({columns: columns as any, data, initialState: {pageIndex: 0, pageSize},}, useSortBy, usePagination);
-
-        const last = data.length;
-        const start = (pageIndex * pageSize) + 1;
-        const end = Math.min((pageIndex + 1) * pageSize, last);
-
-        return (
-            <VStack maxW={maxW} pb={pb}>
-                <Table {...getTableProps()} size={"sm"}>
-                    <Thead>
-                        {headerGroups.map((headerGroup) => (
-                            <Tr {...headerGroup.getHeaderGroupProps()}>
-                                {headerGroup.headers.map((column: any) => (
-                                    <Th
-                                        {...column.getHeaderProps(column.getSortByToggleProps())}
-                                        isNumeric={column.isNumeric}
-                                    >
-                                        {column.render("Header")}
-                                        <chakra.span pl="4">
-                                            {column.isSorted ? (
-                                                column.isSortedDesc ? (
-                                                    <TriangleDownIcon aria-label="sorted descending"/>
-                                                ) : (
-                                                    <TriangleUpIcon aria-label="sorted ascending"/>
-                                                )
-                                            ) : null}
-                                        </chakra.span>
-                                    </Th>
-                                ))}
-                            </Tr>
-                        ))}
-                    </Thead>
-                    <Tbody {...getTableBodyProps()}>
-                        {rows.slice(start - 1, end).map((row: any) => {
-                            prepareRow(row);
-                            return (
-                                <Tr {...row.getRowProps()}>
-                                    {row.cells.map((cell: any) => (
-                                        <Td {...cell.getCellProps()} isNumeric={cell.column.isNumeric}>
-                                            {cell.render("Cell")}
-                                        </Td>
-                                    ))}
-                                </Tr>
-                            );
-                        })}
-                    </Tbody>
-                </Table>
-                {pageOptions.length > 1 ?
-                    (<Flex width={"100%"} pr={10} justifyContent={"flex-end"} pt={4}>
-                        <Text fontSize={"sm"} pr={2}>Showing {start} to {end} of {last}. </Text>
-                        <HiChevronLeft color={canPreviousPage ? 'black': 'dimgray'} onClick={previousPage}/>
-                        <HiChevronRight color={canNextPage ? 'black': 'dimgray'} onClick={nextPage}/>
-                    </Flex>) : null}
-            </VStack>
-        );
-    }
-;
diff --git a/rust/ballista/ui/scheduler/src/components/Empty.tsx b/rust/ballista/ui/scheduler/src/components/Empty.tsx
deleted file mode 100644
index b772e70e4e8..00000000000
--- a/rust/ballista/ui/scheduler/src/components/Empty.tsx
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import { Flex, Text } from "@chakra-ui/react";
-interface EmptyProps {
-  text: string;
-}
-
-export const Empty: React.FunctionComponent<EmptyProps> = ({ text }) => {
-  return (
-    <Flex
-      minH={100}
-      minW={200}
-      flex={1}
-      alignItems={"center"}
-      justifyContent={"center"}
-    >
-      <Text>{text}</Text>
-    </Flex>
-  );
-};
diff --git a/rust/ballista/ui/scheduler/src/components/Footer.tsx b/rust/ballista/ui/scheduler/src/components/Footer.tsx
deleted file mode 100644
index ab03898f44b..00000000000
--- a/rust/ballista/ui/scheduler/src/components/Footer.tsx
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import {Flex, Text} from "@chakra-ui/react";
-
-
-export const Footer: React.FunctionComponent = () => {
-    return (
-        <Flex borderTop={"1px solid #f1f1f1"} w={"100%"} p={4} justifyContent={"flex-end"}>
-            <Text fontSize="md" fontStyle={"italic"}>Licensed under the Apache License, Version 2.0.</Text>
-        </Flex>
-    )
-}
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/src/components/Header.tsx b/rust/ballista/ui/scheduler/src/components/Header.tsx
deleted file mode 100644
index c0ddd35c726..00000000000
--- a/rust/ballista/ui/scheduler/src/components/Header.tsx
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import {Box, Flex, Text, Button} from "@chakra-ui/react";
-import Logo from "./logo.svg";
-import {AiFillGithub, HiDocumentText} from "react-icons/all";
-import {SchedulerState} from "./Summary";
-
-export const NavBarContainer: React.FunctionComponent<React.PropsWithChildren<any>> = ({children, ...props}) => {
-    return (
-        <Flex
-            as="nav"
-            align="center"
-            justify="space-between"
-            wrap="wrap"
-            w="100%"
-            padding={1}
-            bg={["white"]}
-            {...props}
-        >
-            {children}
-        </Flex>
-    );
-};
-
-interface HeaderProps {
-    schedulerState?: SchedulerState
-}
-
-export const Header: React.FunctionComponent<HeaderProps> = ({schedulerState}) => {
-    return (
-        <NavBarContainer borderBottom={"1px"} borderBottomColor={"#f1f1f1"}>
-            <Box w="100%" alignItems={"flex-start"}>
-                <NavBarContainer>
-                    <Flex flexDirection={"row"} alignItems={"center"}>
-                        <img alt={"Ballista Logo"} src={Logo}/>
-                        <Text
-                            background={"aliceblue"}
-                            ml={4}
-                            fontSize="md"
-                            padding={1}
-                            borderRadius={4}
-                        >
-                            Version - {schedulerState?.version}
-                        </Text>
-                    </Flex>
-                    <Flex>
-                        <a rel={"noreferrer"} target={"_blank"} href={"https://ballistacompute.org/docs/"}>
-                            <Button mr={4} colorScheme="blue" size="sm" rightIcon={<HiDocumentText/>}>
-                                Docs
-                            </Button>
-                        </a>
-                        <a
-                            rel="noreferrer"
-                            href={"https://github.com/apache/arrow/tree/master/rust/ballista"}
-                            target={"_blank"}
-                        >
-                            <Button colorScheme="blue" size="sm" rightIcon={<AiFillGithub/>}>
-                                Github
-                            </Button>
-                        </a>
-                    </Flex>
-                </NavBarContainer>
-            </Box>
-        </NavBarContainer>
-    );
-};
diff --git a/rust/ballista/ui/scheduler/src/components/NodesList.tsx b/rust/ballista/ui/scheduler/src/components/NodesList.tsx
deleted file mode 100644
index 2690e86b534..00000000000
--- a/rust/ballista/ui/scheduler/src/components/NodesList.tsx
+++ /dev/null
@@ -1,71 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import {Box } from "@chakra-ui/react";
-import {Column, DateCell, DataTable} from "./DataTable";
-
-export enum NodeStatus {
-  RUNNING = "RUNNING",
-  TERMINATED = "TERMINATED"
-}
-
-export interface NodeInfo {
-  id: string;
-  host: string;
-  port: number;
-  status: NodeStatus;
-  started: string;
-}
-
-const columns : Column<any>[] = [
-  {
-    Header: "Node",
-    accessor: "id",
-  },
-  {
-    Header: "Host",
-    accessor: "host",
-  },
-  {
-    Header: "Port",
-    accessor: "port",
-  },
-  {
-    Header: "Status",
-    accessor: "status",
-  },
-  {
-    Header: "Started",
-    accessor: "started",
-    Cell: DateCell,
-  },
-];
-
-interface NodesListProps {
-  nodes:  NodeInfo[]
-}
-
-export const NodesList: React.FunctionComponent<NodesListProps> = ({
-  nodes = [],
-}) => {
-  return (
-    <Box flex={1}>
-      <DataTable maxW={960} columns={columns} data={nodes} pageSize={4} />
-    </Box>
-  );
-};
diff --git a/rust/ballista/ui/scheduler/src/components/QueriesList.tsx b/rust/ballista/ui/scheduler/src/components/QueriesList.tsx
deleted file mode 100644
index 2d7166a28eb..00000000000
--- a/rust/ballista/ui/scheduler/src/components/QueriesList.tsx
+++ /dev/null
@@ -1,115 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import {CircularProgress, CircularProgressLabel, VStack, Skeleton, Stack, Text, Flex, Box} from "@chakra-ui/react";
-import {Column, DateCell, DataTable, LinkCell} from "./DataTable";
-import {FaStop} from "react-icons/fa";
-import {GrPowerReset} from "react-icons/gr";
-
-export enum QueryStatus {
-    QUEUED = "QUEUED",
-    RUNNING = "RUNNING",
-    FAILED = "FAILED",
-    COMPLETED = "COMPLETED",
-}
-
-export interface Query {
-    uuid: string;
-    query: string;
-    status: QueryStatus;
-    progress: number;
-    started: string;
-}
-
-export interface QueriesListProps {
-    queries?: Query[];
-}
-
-export const ActionsCell: (props: any) => React.ReactNode = (props: any) => {
-    return (
-        <Flex>
-            <FaStop color={"red"} title={"stop"}/>
-            <Box mx={2}></Box>
-            <GrPowerReset title={"Retry"}/>
-        </Flex>
-    )
-}
-
-export const ProgressCell: (props: any) => React.ReactNode = (props: any) => {
-    return (
-        <CircularProgress value={props.value} color="orange.400">
-            <CircularProgressLabel>{props.value}%</CircularProgressLabel>
-        </CircularProgress>
-    )
-}
-
-const columns: Column<any>[] = [
-    {
-        Header: "UUID",
-        accessor: "uuid",
-        Cell: LinkCell
-    },
-    {
-        Header: "Query",
-        accessor: "query",
-    },
-    {
-        Header: "Status",
-        accessor: "status",
-    },
-    {
-        Header: "Progress",
-        accessor: "progress",
-        Cell: ProgressCell,
-    },
-    {
-        Header: "Started",
-        accessor: "started",
-        Cell: DateCell,
-    },
-    {
-        Header: "Actions",
-        accessor: "",
-        Cell: ActionsCell,
-    }
-];
-
-const getSkeletion = () => (
-    <>
-        <Skeleton height={5}/>
-        <Skeleton height={5}/>
-        <Skeleton height={5}/>
-        <Skeleton height={5}/>
-        <Skeleton height={5}/>
-        <Skeleton height={5}/>
-    </>
-)
-
-export const QueriesList: React.FunctionComponent<QueriesListProps> = ({queries}) => {
-    const isLoaded = typeof queries !== "undefined";
-
-    //TODO: Remove blur once queries api is ready
-    return (
-        <VStack flex={1} p={4} w={"100%"} alignItems={"flex-start"} filter="blur(3px)">
-            <Text mb={4}>Queries</Text>
-            <Stack w={"100%"} flex={1}>
-                {isLoaded ? <DataTable columns={columns} data={queries || []} pageSize={10} pb={10}/> : getSkeletion()}
-            </Stack>
-        </VStack>
-    )
-};
\ No newline at end of file
diff --git a/rust/ballista/ui/scheduler/src/components/Summary.tsx b/rust/ballista/ui/scheduler/src/components/Summary.tsx
deleted file mode 100644
index 2e52498296f..00000000000
--- a/rust/ballista/ui/scheduler/src/components/Summary.tsx
+++ /dev/null
@@ -1,89 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import { Box, Text, Flex, VStack } from "@chakra-ui/react";
-import { HiCheckCircle } from "react-icons/hi";
-import TimeAgo from "react-timeago";
-import { NodesList, NodeInfo } from "./NodesList";
-
-const Label: React.FunctionComponent<React.PropsWithChildren<any>> = ({
-  children,
-}) => {
-  return (
-    <Text fontSize="md" fontWeight={"light"} width={90}>
-      {children}
-    </Text>
-  );
-};
-
-export interface SchedulerState {
-  status: string;
-  started: string;
-  version: string;
-  executors: NodeInfo[];
-}
-
-export interface SummaryProps {
-  schedulerState?: SchedulerState
-}
-
-export const Summary: React.FunctionComponent<SummaryProps> = ({schedulerState}) => {
-
-  if (!schedulerState) {
-    return <Text>Scheduler isn't running</Text>
-  }
-
-  return (
-    <Flex bg={"gray.100"} padding={10} width={"100%"}>
-      <Box width={"100%"}>
-        <Flex paddingX={4}>
-          <VStack
-            minWidth={250}
-            fontSize="md"
-            alignItems={"flex-start"}
-            fontWeight={"normal"}
-          >
-            <Text fontWeight={"light"} mb={2}>General Cluster Info</Text>
-            <Flex>
-              <Label>Status</Label>
-              <Flex alignItems={"center"}>
-                <HiCheckCircle color={"green"} />
-                <Text pl={1}>Active</Text>
-              </Flex>
-            </Flex>
-            <Flex>
-              <Label>Nodes</Label>
-              <Text>{schedulerState.executors?.length}</Text>
-            </Flex>
-            <Flex>
-              <Label>Started</Label>
-              <Text>
-                <TimeAgo date={schedulerState.started} />
-              </Text>
-            </Flex>
-            <Flex>
-              <Label>Version</Label>
-              <Text>{schedulerState.version}</Text>
-            </Flex>
-          </VStack>
-          <NodesList nodes={schedulerState.executors} />
-        </Flex>
-      </Box>
-    </Flex>
-  );
-};
diff --git a/rust/ballista/ui/scheduler/src/components/logo.svg b/rust/ballista/ui/scheduler/src/components/logo.svg
deleted file mode 100644
index 3cb5257955a..00000000000
--- a/rust/ballista/ui/scheduler/src/components/logo.svg
+++ /dev/null
@@ -1,25 +0,0 @@
-
-<svg width="276" height="77" viewBox="0 0 276 77" fill="none" xmlns="http://www.w3.org/2000/svg">
-    <path d="M270.046 11.5891L270.046 11.5891C270.566 10.3565 269.332 9.089 268.086 9.62327L249.603 17.5458L249.8 18.0054L249.603 17.5458C248.312 18.0994 248.425 19.9782 249.778 20.3676L249.778 20.3676L256.976 22.4391L256.976 22.4391C257.138 22.4859 257.267 22.6145 257.315 22.7818L259.371 30.0321L259.852 29.8957L259.371 30.0321C259.757 31.3911 261.63 31.5107 262.181 30.2071L261.72 30.0125L262.181 30.2071L270.046 11.5891Z" fill="#0097E6" stroke="#0097E6"/>
-    <g filter="url(#filter0_di)">
-        <path d="M30.9609 42.4766C30.9609 41.8672 30.7734 41.4609 30.3984 41.2578C30.0391 41.0547 29.5078 40.9531 28.8047 40.9531H17.9297V39.2656H28.8047C31.3672 39.2656 32.6484 40.3359 32.6484 42.4766C32.6484 43.6797 32.2969 44.5469 31.5938 45.0781C30.8906 45.6094 29.9609 45.875 28.8047 45.875H17.9297V44.1875H28.8047C30.2422 44.1875 30.9609 43.6172 30.9609 42.4766ZM16.9922 56H15.3047V27.3125H28.8047C29.8672 27.3125 30.7734 27.6406 31.5234 28.2969C32.2734 28.9375 32.6484 29.7422 32.6484 30.7109C32.6484 31.9922 32.2969 32.9219 31.5938 33.5C30.8906 34.0625 29.9609 34.3438 28.8047 34.3438H17.9297V32.6562H28.8047C30.2422 32.6562 30.9609 32.0078 30.9609 30.7109C30.9609 30.1797 30.7578 29.7656 30.3516 29.4688C29.9453 29.1562 29.4297 29 28.8047 29H16.9922V56ZM17.9297 36.0312H28.8047C30.3516 36.0312 31.6562 35.5703 32.7188 34.6484C33.7969 33.7109 34.3359 32.3984 34.3359 30.7109C34.3359 29.3516 33.7734 28.1641 32.6484 27.1484C31.5391 26.1328 30.2578 25.625 28.8047 25.625H13.6172V56H11.9297V23.9375H28.8047C30.7266 23.9375 32.4062 24.6172 33.8438 25.9766C35.2969 27.3203 36.0234 28.8984 36.0234 30.7109C36.0234 32.1328 35.6875 33.375 35.0156 34.4375C34.3594 35.5 33.4688 36.3203 32.3438 36.8984C34.7969 37.7734 36.0234 39.5547 36.0234 42.2422C36.0234 44.3672 35.3203 46.0703 33.9141 47.3516C32.5078 48.6172 30.8047 49.25 28.8047 49.25H17.9297V47.5625H28.8047C30.3516 47.5625 31.6562 47.1016 32.7188 46.1797C33.7969 45.2422 34.3359 43.9297 34.3359 42.2422C34.3359 41.1016 33.9297 40.0938 33.1172 39.2188C32.3047 38.3438 31.1094 37.8359 29.5312 37.6953C29.375 37.7109 29.1328 37.7188 28.8047 37.7188H17.9297V36.0312ZM39.3984 30.7109C39.3984 32.8984 38.625 34.875 37.0781 36.6406C38.625 38.3594 39.3984 40.2266 39.3984 42.2422C39.3984 44.1953 38.9062 45.9766 37.9219 47.5859C36.9531 49.1797 35.6562 50.4219 34.0312 51.3125C32.4219 52.1875 30.6797 52.625 28.8047 52.625H17.9297V50.9375H28.8047C31.2422 50.9375 33.3359 50.125 35.0859 48.5C36.8359 46.8594 37.7109 44.7734 37.7109 42.2422C37.7109 41.3359 37.4766 40.3359 37.0078 39.2422C36.5547 38.1328 35.9219 37.2891 35.1094 36.7109C36.8438 35.0859 37.7109 33.0859 37.7109 30.7109C37.7109 28.4297 36.8203 26.4531 35.0391 24.7812C33.2578 23.0938 31.1797 22.25 28.8047 22.25H10.2422V56H8.55469V20.5625H28.8047C30.6641 20.5625 32.4062 21.0234 34.0312 21.9453C35.6719 22.8516 36.9766 24.0938 37.9453 25.6719C38.9141 27.2344 39.3984 28.9141 39.3984 30.7109ZM39 36.7344C40.3906 34.875 41.0859 32.8672 41.0859 30.7109C41.0859 28.6016 40.5234 26.6406 39.3984 24.8281C38.2891 23 36.7891 21.5547 34.8984 20.4922C33.0078 19.4141 30.9766 18.875 28.8047 18.875H6.86719V56H5.17969V17.1875H28.8047C31.2734 17.1875 33.5781 17.7969 35.7188 19.0156C37.875 20.2344 39.5859 21.8906 40.8516 23.9844C42.1328 26.0625 42.7734 28.3047 42.7734 30.7109C42.7734 33.0078 42.125 35.0234 40.8281 36.7578C42.125 38.5547 42.7734 40.3828 42.7734 42.2422C42.7734 44.3359 42.3984 46.2578 41.6484 48.0078C40.8984 49.7422 39.8828 51.1875 38.6016 52.3438C37.3359 53.5 35.8594 54.3984 34.1719 55.0391C32.4844 55.6797 30.6953 56 28.8047 56H17.9297V54.3125H28.8047C30.4453 54.3125 32.0156 54.0078 33.5156 53.3984C35.0156 52.7734 36.3203 51.9297 37.4297 50.8672C38.5391 49.8047 39.4219 48.5234 40.0781 47.0234C40.75 45.5234 41.0859 43.9297 41.0859 42.2422C41.0859 41.4922 40.9062 40.5938 40.5469 39.5469C40.2031 38.4844 39.6875 37.5469 39 36.7344ZM70.5234 39.3594C70.5234 37.3594 69.9062 35.8125 68.6719 34.7188C67.4375 33.625 65.8047 33.0781 63.7734 33.0781C62.0547 33.0781 60.5781 33.6719 59.3438 34.8594C58.1094 36.0469 57.4922 37.4688 57.4922 39.125C57.4922 40.0625 57.6172 40.9062 57.8672 41.6562C58.1172 42.3906 58.4531 43.0078 58.875 43.5078C59.3125 43.9922 59.8281 44.3984 60.4219 44.7266C61.0156 45.0391 61.6328 45.2656 62.2734 45.4062C62.9297 45.5469 63.625 45.6172 64.3594 45.6172C66.1406 45.6172 67.7656 45.1797 69.2344 44.3047V45.9922C67.7188 46.7891 66.0859 47.1875 64.3359 47.1875C63.2109 47.1875 62.1484 47.0391 61.1484 46.7422C60.1641 46.4297 59.2656 45.9688 58.4531 45.3594C57.6406 44.7344 56.9922 43.8906 56.5078 42.8281C56.0391 41.75 55.8047 40.5156 55.8047 39.125C55.8047 36.9688 56.5625 35.1406 58.0781 33.6406C59.5938 32.1406 61.4922 31.3906 63.7734 31.3906C66.2891 31.3906 68.3203 32.1016 69.8672 33.5234C71.4297 34.9297 72.2109 36.875 72.2109 39.3594V56H70.5234V39.3594ZM75.5859 39.3594V56H73.8984V39.3594C73.8984 36.4062 72.9531 34.0625 71.0625 32.3281C69.1719 30.5781 66.7422 29.7031 63.7734 29.7031C61.0234 29.7031 58.7266 30.6172 56.8828 32.4453C55.0391 34.2734 54.1172 36.5 54.1172 39.125C54.1172 40.7188 54.3906 42.1484 54.9375 43.4141C55.5 44.6797 56.2578 45.6953 57.2109 46.4609C58.1641 47.2266 59.2266 47.8125 60.3984 48.2188C61.5859 48.6094 62.8359 48.8047 64.1484 48.8047C65.9297 48.8047 67.625 48.3906 69.2344 47.5625V49.3438C67.5469 50.0469 65.8438 50.3984 64.125 50.3984C62.5781 50.3984 61.1094 50.1641 59.7188 49.6953C58.3281 49.2109 57.0859 48.5156 55.9922 47.6094C54.8984 46.6875 54.0312 45.5 53.3906 44.0469C52.75 42.5781 52.4297 40.9375 52.4297 39.125C52.4297 36.0469 53.5156 33.4297 55.6875 31.2734C57.875 29.1016 60.5703 28.0156 63.7734 28.0156C66.0234 28.0156 68.0391 28.4766 69.8203 29.3984C71.6016 30.3047 73.0078 31.625 74.0391 33.3594C75.0703 35.0938 75.5859 37.0938 75.5859 39.3594ZM78.9609 39.3594V56H77.2734V39.3594C77.2734 36.7812 76.6797 34.5 75.4922 32.5156C74.3203 30.5156 72.7109 28.9844 70.6641 27.9219C68.6172 26.8594 66.3203 26.3281 63.7734 26.3281C61.3516 26.3281 59.1406 26.9062 57.1406 28.0625C55.1406 29.2031 53.5703 30.75 52.4297 32.7031C51.3047 34.6562 50.7422 36.7969 50.7422 39.125C50.7422 41.125 51.1094 42.9531 51.8438 44.6094C52.5781 46.25 53.5625 47.6016 54.7969 48.6641C56.0469 49.7266 57.4609 50.5547 59.0391 51.1484C60.6328 51.7266 62.2969 52.0156 64.0312 52.0156C65.7969 52.0156 67.5312 51.6484 69.2344 50.9141V52.625C67.4531 53.2969 65.7031 53.6328 63.9844 53.6328C61.2812 53.6328 58.7969 53.0234 56.5312 51.8047C54.2656 50.5859 52.4531 48.8594 51.0938 46.625C49.7344 44.3906 49.0547 41.8906 49.0547 39.125C49.0547 37.1562 49.4297 35.2812 50.1797 33.5C50.9453 31.7188 51.9766 30.1797 53.2734 28.8828C54.5703 27.5859 56.1328 26.5547 57.9609 25.7891C59.7891 25.0234 61.7266 24.6406 63.7734 24.6406C66.6328 24.6406 69.2109 25.25 71.5078 26.4688C73.8203 27.6875 75.6406 29.4219 76.9688 31.6719C78.2969 33.9219 78.9609 36.4844 78.9609 39.3594ZM82.3359 39.3594V56H80.6484V39.3594C80.6484 36.1719 79.9062 33.3281 78.4219 30.8281C76.9375 28.3125 74.9141 26.375 72.3516 25.0156C69.7891 23.6406 66.9297 22.9531 63.7734 22.9531C60.7422 22.9531 57.9688 23.6797 55.4531 25.1328C52.9375 26.5859 50.9609 28.5547 49.5234 31.0391C48.0859 33.5078 47.3672 36.2031 47.3672 39.125C47.3672 41.0469 47.6719 42.875 48.2812 44.6094C48.9062 46.3281 49.75 47.8359 50.8125 49.1328C51.8906 50.4141 53.125 51.5234 54.5156 52.4609C55.9219 53.3984 57.4297 54.1094 59.0391 54.5938C60.6641 55.0781 62.3125 55.3203 63.9844 55.3203C65.8281 55.3203 67.5781 54.9688 69.2344 54.2656V56C67.5 56.6094 65.7188 56.9141 63.8906 56.9141C62 56.9141 60.1562 56.6484 58.3594 56.1172C56.5781 55.5859 54.9219 54.8047 53.3906 53.7734C51.875 52.7266 50.5391 51.5 49.3828 50.0938C48.2266 48.6719 47.3203 47.0156 46.6641 45.125C46.0078 43.2344 45.6797 41.2344 45.6797 39.125C45.6797 36.7031 46.1328 34.3984 47.0391 32.2109C47.9609 30.0078 49.2109 28.1094 50.7891 26.5156C52.3828 24.9062 54.3047 23.6328 56.5547 22.6953C58.8203 21.7422 61.2266 21.2656 63.7734 21.2656C66.3672 21.2656 68.8047 21.7031 71.0859 22.5781C73.3672 23.4531 75.3359 24.6719 76.9922 26.2344C78.6484 27.7812 79.9531 29.6875 80.9062 31.9531C81.8594 34.2188 82.3359 36.6875 82.3359 39.3594ZM86.5078 22.25H88.1953V56H86.5078V22.25ZM96.6328 22.25H98.3203V47.5625H110.766V49.25H98.3203V50.9375H110.766V52.625H98.3203V54.3125H110.766V56H96.6328V22.25ZM93.2578 22.25H94.9453V56H93.2578V22.25ZM89.8828 22.25H91.5703V56H89.8828V22.25ZM114.492 22.25H116.18V56H114.492V22.25ZM124.617 22.25H126.305V47.5625H138.75V49.25H126.305V50.9375H138.75V52.625H126.305V54.3125H138.75V56H124.617V22.25ZM121.242 22.25H122.93V56H121.242V22.25ZM117.867 22.25H119.555V56H117.867V22.25ZM152.602 22.25H154.289V56H152.602V22.25ZM149.227 22.25H150.914V56H149.227V22.25ZM145.852 22.25H147.539V56H145.852V22.25ZM142.477 22.25H144.164V56H142.477V22.25ZM174.211 39.9453H171.539C168.258 39.9453 165.734 39.2812 163.969 37.9531C162.203 36.6094 161.32 34.7656 161.32 32.4219C161.32 30.2344 162.359 28.4141 164.438 26.9609C166.516 25.4922 169.43 24.7578 173.18 24.7578C175.68 24.7578 178.008 25.1016 180.164 25.7891C182.336 26.4766 184.141 27.4453 185.578 28.6953L184.266 29.75C182.984 28.7031 181.375 27.8906 179.438 27.3125C177.5 26.7344 175.414 26.4453 173.18 26.4453C169.883 26.4453 167.359 27.0078 165.609 28.1328C163.875 29.2578 163.008 30.6875 163.008 32.4219C163.008 34.2969 163.719 35.7422 165.141 36.7578C166.578 37.7578 168.711 38.2578 171.539 38.2578H174.117C177.445 38.2578 179.992 38.9297 181.758 40.2734C183.539 41.6016 184.43 43.4375 184.43 45.7812C184.43 47.9688 183.391 49.7969 181.312 51.2656C179.234 52.7188 176.32 53.4453 172.57 53.4453C170.07 53.4453 167.734 53.1016 165.562 52.4141C163.406 51.7266 161.609 50.7578 160.172 49.5078L161.484 48.4531C162.766 49.5 164.375 50.3125 166.312 50.8906C168.25 51.4688 170.336 51.7578 172.57 51.7578C175.867 51.7578 178.383 51.1953 180.117 50.0703C181.867 48.9453 182.742 47.5156 182.742 45.7812C182.742 43.9062 182.023 42.4688 180.586 41.4688C179.164 40.4531 177.039 39.9453 174.211 39.9453ZM174.211 43.3203H171.539C167.195 43.3203 163.844 42.3359 161.484 40.3672C159.125 38.3828 157.945 35.7344 157.945 32.4219C157.945 30.9062 158.281 29.4844 158.953 28.1562C159.625 26.8281 160.586 25.6641 161.836 24.6641C163.102 23.6484 164.711 22.8516 166.664 22.2734C168.617 21.6797 170.789 21.3828 173.18 21.3828C176.195 21.3828 179.016 21.8438 181.641 22.7656C184.266 23.6719 186.453 24.9453 188.203 26.5859L186.844 27.5938C185.25 26.1562 183.258 25.0469 180.867 24.2656C178.477 23.4688 175.914 23.0703 173.18 23.0703C168.961 23.0703 165.648 23.9609 163.242 25.7422C160.836 27.5234 159.633 29.75 159.633 32.4219C159.633 35.25 160.664 37.4922 162.727 39.1484C164.789 40.8047 167.727 41.6328 171.539 41.6328H174.117C178.742 41.6328 181.055 43.0156 181.055 45.7812C181.055 47.0312 180.344 48.0625 178.922 48.875C177.516 49.6719 175.398 50.0703 172.57 50.0703C170.586 50.0703 168.742 49.8438 167.039 49.3906C165.336 48.9219 163.93 48.2578 162.82 47.3984L164.156 46.3672C166.109 47.7109 168.914 48.3828 172.57 48.3828C174.914 48.3828 176.633 48.1406 177.727 47.6562C178.82 47.1719 179.367 46.5469 179.367 45.7812C179.367 44.8906 178.984 44.2578 178.219 43.8828C177.453 43.5078 176.117 43.3203 174.211 43.3203ZM174.211 36.5703H171.539C166.977 36.5703 164.695 35.1875 164.695 32.4219C164.695 31.1719 165.398 30.1484 166.805 29.3516C168.227 28.5391 170.352 28.1328 173.18 28.1328C175.164 28.1328 177.008 28.3672 178.711 28.8359C180.414 29.2891 181.82 29.9453 182.93 30.8047L181.594 31.8359C179.641 30.4922 176.836 29.8203 173.18 29.8203C170.836 29.8203 169.117 30.0625 168.023 30.5469C166.93 31.0312 166.383 31.6562 166.383 32.4219C166.383 33.3125 166.766 33.9453 167.531 34.3203C168.297 34.6953 169.633 34.8828 171.539 34.8828H174.211C178.555 34.8828 181.906 35.875 184.266 37.8594C186.625 39.8281 187.805 42.4688 187.805 45.7812C187.805 47.2969 187.469 48.7188 186.797 50.0469C186.125 51.375 185.156 52.5469 183.891 53.5625C182.641 54.5625 181.039 55.3594 179.086 55.9531C177.133 56.5312 174.961 56.8203 172.57 56.8203C169.555 56.8203 166.734 56.3672 164.109 55.4609C161.484 54.5391 159.297 53.2578 157.547 51.6172L158.906 50.6094C160.5 52.0469 162.492 53.1641 164.883 53.9609C167.273 54.7422 169.836 55.1328 172.57 55.1328C176.789 55.1328 180.102 54.2422 182.508 52.4609C184.914 50.6797 186.117 48.4531 186.117 45.7812C186.117 42.9531 185.086 40.7109 183.023 39.0547C180.961 37.3984 178.023 36.5703 174.211 36.5703ZM197.484 31.6016H199.172V56H197.484V31.6016ZM190.734 23.9375V22.25H216.047V23.9375H190.734ZM190.734 27.3125V25.625H216.047V27.3125H190.734ZM190.734 30.6875V29H216.047V30.6875H190.734ZM207.609 31.6016H209.297V56H207.609V31.6016ZM204.234 31.6016H205.922V56H204.234V31.6016ZM200.859 31.6016H202.547V56H200.859V31.6016ZM243.727 39.3594C243.727 37.3594 243.109 35.8125 241.875 34.7188C240.641 33.625 239.008 33.0781 236.977 33.0781C235.258 33.0781 233.781 33.6719 232.547 34.8594C231.312 36.0469 230.695 37.4688 230.695 39.125C230.695 40.0625 230.82 40.9062 231.07 41.6562C231.32 42.3906 231.656 43.0078 232.078 43.5078C232.516 43.9922 233.031 44.3984 233.625 44.7266C234.219 45.0391 234.836 45.2656 235.477 45.4062C236.133 45.5469 236.828 45.6172 237.562 45.6172C239.344 45.6172 240.969 45.1797 242.438 44.3047V45.9922C240.922 46.7891 239.289 47.1875 237.539 47.1875C236.414 47.1875 235.352 47.0391 234.352 46.7422C233.367 46.4297 232.469 45.9688 231.656 45.3594C230.844 44.7344 230.195 43.8906 229.711 42.8281C229.242 41.75 229.008 40.5156 229.008 39.125C229.008 36.9688 229.766 35.1406 231.281 33.6406C232.797 32.1406 234.695 31.3906 236.977 31.3906C239.492 31.3906 241.523 32.1016 243.07 33.5234C244.633 34.9297 245.414 36.875 245.414 39.3594V56H243.727V39.3594ZM248.789 39.3594V56H247.102V39.3594C247.102 36.4062 246.156 34.0625 244.266 32.3281C242.375 30.5781 239.945 29.7031 236.977 29.7031C234.227 29.7031 231.93 30.6172 230.086 32.4453C228.242 34.2734 227.32 36.5 227.32 39.125C227.32 40.7188 227.594 42.1484 228.141 43.4141C228.703 44.6797 229.461 45.6953 230.414 46.4609C231.367 47.2266 232.43 47.8125 233.602 48.2188C234.789 48.6094 236.039 48.8047 237.352 48.8047C239.133 48.8047 240.828 48.3906 242.438 47.5625V49.3438C240.75 50.0469 239.047 50.3984 237.328 50.3984C235.781 50.3984 234.312 50.1641 232.922 49.6953C231.531 49.2109 230.289 48.5156 229.195 47.6094C228.102 46.6875 227.234 45.5 226.594 44.0469C225.953 42.5781 225.633 40.9375 225.633 39.125C225.633 36.0469 226.719 33.4297 228.891 31.2734C231.078 29.1016 233.773 28.0156 236.977 28.0156C239.227 28.0156 241.242 28.4766 243.023 29.3984C244.805 30.3047 246.211 31.625 247.242 33.3594C248.273 35.0938 248.789 37.0938 248.789 39.3594ZM252.164 39.3594V56H250.477V39.3594C250.477 36.7812 249.883 34.5 248.695 32.5156C247.523 30.5156 245.914 28.9844 243.867 27.9219C241.82 26.8594 239.523 26.3281 236.977 26.3281C234.555 26.3281 232.344 26.9062 230.344 28.0625C228.344 29.2031 226.773 30.75 225.633 32.7031C224.508 34.6562 223.945 36.7969 223.945 39.125C223.945 41.125 224.312 42.9531 225.047 44.6094C225.781 46.25 226.766 47.6016 228 48.6641C229.25 49.7266 230.664 50.5547 232.242 51.1484C233.836 51.7266 235.5 52.0156 237.234 52.0156C239 52.0156 240.734 51.6484 242.438 50.9141V52.625C240.656 53.2969 238.906 53.6328 237.188 53.6328C234.484 53.6328 232 53.0234 229.734 51.8047C227.469 50.5859 225.656 48.8594 224.297 46.625C222.938 44.3906 222.258 41.8906 222.258 39.125C222.258 37.1562 222.633 35.2812 223.383 33.5C224.148 31.7188 225.18 30.1797 226.477 28.8828C227.773 27.5859 229.336 26.5547 231.164 25.7891C232.992 25.0234 234.93 24.6406 236.977 24.6406C239.836 24.6406 242.414 25.25 244.711 26.4688C247.023 27.6875 248.844 29.4219 250.172 31.6719C251.5 33.9219 252.164 36.4844 252.164 39.3594ZM255.539 39.3594V56H253.852V39.3594C253.852 36.1719 253.109 33.3281 251.625 30.8281C250.141 28.3125 248.117 26.375 245.555 25.0156C242.992 23.6406 240.133 22.9531 236.977 22.9531C233.945 22.9531 231.172 23.6797 228.656 25.1328C226.141 26.5859 224.164 28.5547 222.727 31.0391C221.289 33.5078 220.57 36.2031 220.57 39.125C220.57 41.0469 220.875 42.875 221.484 44.6094C222.109 46.3281 222.953 47.8359 224.016 49.1328C225.094 50.4141 226.328 51.5234 227.719 52.4609C229.125 53.3984 230.633 54.1094 232.242 54.5938C233.867 55.0781 235.516 55.3203 237.188 55.3203C239.031 55.3203 240.781 54.9688 242.438 54.2656V56C240.703 56.6094 238.922 56.9141 237.094 56.9141C235.203 56.9141 233.359 56.6484 231.562 56.1172C229.781 55.5859 228.125 54.8047 226.594 53.7734C225.078 52.7266 223.742 51.5 222.586 50.0938C221.43 48.6719 220.523 47.0156 219.867 45.125C219.211 43.2344 218.883 41.2344 218.883 39.125C218.883 36.7031 219.336 34.3984 220.242 32.2109C221.164 30.0078 222.414 28.1094 223.992 26.5156C225.586 24.9062 227.508 23.6328 229.758 22.6953C232.023 21.7422 234.43 21.2656 236.977 21.2656C239.57 21.2656 242.008 21.7031 244.289 22.5781C246.57 23.4531 248.539 24.6719 250.195 26.2344C251.852 27.7812 253.156 29.6875 254.109 31.9531C255.062 34.2188 255.539 36.6875 255.539 39.3594Z" fill="#0097E6"/>
-        <path d="M30.3984 41.2578L30.1524 41.6932L30.1603 41.6975L30.3984 41.2578ZM17.9297 40.9531H17.4297V41.4531H17.9297V40.9531ZM17.9297 39.2656V38.7656H17.4297V39.2656H17.9297ZM17.9297 45.875H17.4297V46.375H17.9297V45.875ZM17.9297 44.1875V43.6875H17.4297V44.1875H17.9297ZM16.9922 56V56.5H17.4922V56H16.9922ZM15.3047 56H14.8047V56.5H15.3047V56ZM15.3047 27.3125V26.8125H14.8047V27.3125H15.3047ZM31.5234 28.2969L31.1942 28.6732L31.1987 28.6771L31.5234 28.2969ZM31.5938 33.5L31.9061 33.8905L31.9113 33.8862L31.5938 33.5ZM17.9297 34.3438H17.4297V34.8438H17.9297V34.3438ZM17.9297 32.6562V32.1562H17.4297V32.6562H17.9297ZM30.3516 29.4688L30.0466 29.8652L30.0566 29.8724L30.3516 29.4688ZM16.9922 29V28.5H16.4922V29H16.9922ZM17.9297 36.0312V35.5312H17.4297V36.0312H17.9297ZM32.7188 34.6484L33.0464 35.0261L33.0468 35.0257L32.7188 34.6484ZM32.6484 27.1484L32.3108 27.5172L32.3134 27.5196L32.6484 27.1484ZM13.6172 25.625V25.125H13.1172V25.625H13.6172ZM13.6172 56V56.5H14.1172V56H13.6172ZM11.9297 56H11.4297V56.5H11.9297V56ZM11.9297 23.9375V23.4375H11.4297V23.9375H11.9297ZM33.8438 25.9766L33.5002 26.3399L33.5043 26.3437L33.8438 25.9766ZM35.0156 34.4375L34.593 34.1703L34.5902 34.1748L35.0156 34.4375ZM32.3438 36.8984L32.1152 36.4537L31.0882 36.9815L32.1758 37.3694L32.3438 36.8984ZM33.9141 47.3516L34.2486 47.7232L34.2508 47.7212L33.9141 47.3516ZM17.9297 49.25H17.4297V49.75H17.9297V49.25ZM17.9297 47.5625V47.0625H17.4297V47.5625H17.9297ZM32.7188 46.1797L33.0464 46.5573L33.0468 46.557L32.7188 46.1797ZM29.5312 37.6953L29.5756 37.1973L29.5285 37.1931L29.4815 37.1978L29.5312 37.6953ZM17.9297 37.7188H17.4297V38.2188H17.9297V37.7188ZM37.0781 36.6406L36.702 36.3111L36.4095 36.6451L36.7065 36.9751L37.0781 36.6406ZM37.9219 47.5859L37.4953 47.325L37.4946 47.3262L37.9219 47.5859ZM34.0312 51.3125L34.2701 51.7518L34.2716 51.751L34.0312 51.3125ZM17.9297 52.625H17.4297V53.125H17.9297V52.625ZM17.9297 50.9375V50.4375H17.4297V50.9375H17.9297ZM35.0859 48.5L35.4262 48.8664L35.4279 48.8648L35.0859 48.5ZM37.0078 39.2422L36.5449 39.4313L36.5482 39.4391L37.0078 39.2422ZM35.1094 36.7109L34.7675 36.3461L34.3215 36.764L34.8195 37.1183L35.1094 36.7109ZM35.0391 24.7812L34.6952 25.1442L34.6969 25.1458L35.0391 24.7812ZM10.2422 22.25V21.75H9.74219V22.25H10.2422ZM10.2422 56V56.5H10.7422V56H10.2422ZM8.55469 56H8.05469V56.5H8.55469V56ZM8.55469 20.5625V20.0625H8.05469V20.5625H8.55469ZM34.0312 21.9453L33.7845 22.3802L33.7895 22.383L34.0312 21.9453ZM37.9453 25.6719L37.5192 25.9335L37.5204 25.9353L37.9453 25.6719ZM39 36.7344L38.5996 36.4349L38.3613 36.7536L38.6183 37.0573L39 36.7344ZM39.3984 24.8281L38.971 25.0875L38.9736 25.0918L39.3984 24.8281ZM34.8984 20.4922L34.6507 20.9265L34.6535 20.9281L34.8984 20.4922ZM6.86719 18.875V18.375H6.36719V18.875H6.86719ZM6.86719 56V56.5H7.36719V56H6.86719ZM5.17969 56H4.67969V56.5H5.17969V56ZM5.17969 17.1875V16.6875H4.67969V17.1875H5.17969ZM35.7188 19.0156L35.4714 19.4501L35.4727 19.4509L35.7188 19.0156ZM40.8516 23.9844L40.4236 24.243L40.426 24.2468L40.8516 23.9844ZM40.8281 36.7578L40.4277 36.4584L40.2077 36.7526L40.4227 37.0504L40.8281 36.7578ZM41.6484 48.0078L42.1074 48.2063L42.108 48.2048L41.6484 48.0078ZM38.6016 52.3438L38.2666 51.9725L38.2643 51.9746L38.6016 52.3438ZM17.9297 56H17.4297V56.5H17.9297V56ZM17.9297 54.3125V53.8125H17.4297V54.3125H17.9297ZM33.5156 53.3984L33.7038 53.8617L33.7079 53.86L33.5156 53.3984ZM40.0781 47.0234L39.6218 46.819L39.62 46.823L40.0781 47.0234ZM40.5469 39.5469L40.0711 39.7008L40.074 39.7092L40.5469 39.5469ZM31.4609 42.4766C31.4609 41.772 31.238 41.1439 30.6366 40.8182L30.1603 41.6975C30.3089 41.7779 30.4609 41.9624 30.4609 42.4766H31.4609ZM30.6445 40.8225C30.1744 40.5568 29.5408 40.4531 28.8047 40.4531V41.4531C29.4748 41.4531 29.9037 41.5525 30.1524 41.6931L30.6445 40.8225ZM28.8047 40.4531H17.9297V41.4531H28.8047V40.4531ZM18.4297 40.9531V39.2656H17.4297V40.9531H18.4297ZM17.9297 39.7656H28.8047V38.7656H17.9297V39.7656ZM28.8047 39.7656C30.0286 39.7656 30.8534 40.0231 31.3669 40.4521C31.8633 40.8667 32.1484 41.5093 32.1484 42.4766H33.1484C33.1484 41.3032 32.793 40.3403 32.0081 39.6846C31.2403 39.0433 30.1433 38.7656 28.8047 38.7656V39.7656ZM32.1484 42.4766C32.1484 43.5844 31.8284 44.2742 31.2923 44.6792L31.8952 45.4771C32.7654 44.8196 33.1484 43.775 33.1484 42.4766H32.1484ZM31.2923 44.6792C30.6997 45.127 29.8861 45.375 28.8047 45.375V46.375C30.0358 46.375 31.0816 46.0918 31.8952 45.4771L31.2923 44.6792ZM28.8047 45.375H17.9297V46.375H28.8047V45.375ZM18.4297 45.875V44.1875H17.4297V45.875H18.4297ZM17.9297 44.6875H28.8047V43.6875H17.9297V44.6875ZM28.8047 44.6875C29.5761 44.6875 30.2471 44.5367 30.7327 44.1514C31.2403 43.7487 31.4609 43.1587 31.4609 42.4766H30.4609C30.4609 42.9351 30.3222 43.2005 30.1111 43.3681C29.8779 43.5531 29.4707 43.6875 28.8047 43.6875V44.6875ZM16.9922 55.5H15.3047V56.5H16.9922V55.5ZM15.8047 56V27.3125H14.8047V56H15.8047ZM15.3047 27.8125H28.8047V26.8125H15.3047V27.8125ZM28.8047 27.8125C29.7548 27.8125 30.5417 28.1022 31.1942 28.6732L31.8527 27.9206C31.0052 27.179 29.9796 26.8125 28.8047 26.8125V27.8125ZM31.1987 28.6771C31.8398 29.2247 32.1484 29.8915 32.1484 30.7109H33.1484C33.1484 29.5929 32.7071 28.6503 31.8482 27.9167L31.1987 28.6771ZM32.1484 30.7109C32.1484 31.9064 31.8223 32.6648 31.2762 33.1138L31.9113 33.8862C32.7715 33.1789 33.1484 32.078 33.1484 30.7109H32.1484ZM31.2814 33.1096C30.6895 33.5831 29.8801 33.8438 28.8047 33.8438V34.8438C30.0418 34.8438 31.0918 34.5419 31.9061 33.8904L31.2814 33.1096ZM28.8047 33.8438H17.9297V34.8438H28.8047V33.8438ZM18.4297 34.3438V32.6562H17.4297V34.3438H18.4297ZM17.9297 33.1562H28.8047V32.1562H17.9297V33.1562ZM28.8047 33.1562C29.5885 33.1562 30.2698 32.9806 30.7568 32.5412C31.2536 32.0929 31.4609 31.4505 31.4609 30.7109H30.4609C30.4609 31.2683 30.3089 31.5985 30.0869 31.7987C29.8552 32.0077 29.4584 32.1562 28.8047 32.1562V33.1562ZM31.4609 30.7109C31.4609 30.0306 31.1899 29.4621 30.6466 29.0651L30.0566 29.8724C30.3258 30.0692 30.4609 30.3288 30.4609 30.7109H31.4609ZM30.6564 29.0724C30.1449 28.679 29.5151 28.5 28.8047 28.5V29.5C29.3442 29.5 29.7457 29.6335 30.0467 29.8651L30.6564 29.0724ZM28.8047 28.5H16.9922V29.5H28.8047V28.5ZM16.4922 29V56H17.4922V29H16.4922ZM17.9297 36.5312H28.8047V35.5312H17.9297V36.5312ZM28.8047 36.5312C30.459 36.5312 31.8837 36.0349 33.0464 35.0261L32.3911 34.2708C31.4288 35.1057 30.2441 35.5312 28.8047 35.5312V36.5312ZM33.0468 35.0257C34.2542 33.9759 34.8359 32.5143 34.8359 30.7109H33.8359C33.8359 32.2826 33.3395 33.446 32.3907 34.2711L33.0468 35.0257ZM34.8359 30.7109C34.8359 29.1929 34.2001 27.8756 32.9835 26.7773L32.3134 27.5196C33.3468 28.4525 33.8359 29.5102 33.8359 30.7109H34.8359ZM32.9861 26.7796C31.7881 25.6829 30.3875 25.125 28.8047 25.125V26.125C30.1281 26.125 31.2901 26.5827 32.3108 27.5172L32.9861 26.7796ZM28.8047 25.125H13.6172V26.125H28.8047V25.125ZM13.1172 25.625V56H14.1172V25.625H13.1172ZM13.6172 55.5H11.9297V56.5H13.6172V55.5ZM12.4297 56V23.9375H11.4297V56H12.4297ZM11.9297 24.4375H28.8047V23.4375H11.9297V24.4375ZM28.8047 24.4375C30.5947 24.4375 32.1525 25.0654 33.5002 26.3398L34.1873 25.6133C32.66 24.169 30.8585 23.4375 28.8047 23.4375V24.4375ZM33.5043 26.3437C34.8656 27.6025 35.5234 29.0515 35.5234 30.7109H36.5234C36.5234 28.7454 35.7282 27.0381 34.1832 25.6095L33.5043 26.3437ZM35.5234 30.7109C35.5234 32.0509 35.2079 33.1979 34.593 34.1703L35.4382 34.7047C36.1671 33.5521 36.5234 32.2147 36.5234 30.7109H35.5234ZM34.5902 34.1748C33.9825 35.1588 33.1603 35.9166 32.1152 36.4537L32.5723 37.3432C33.7772 36.724 34.7363 35.8412 35.441 34.7002L34.5902 34.1748ZM32.1758 37.3694C33.3241 37.779 34.1493 38.3866 34.6917 39.1742C35.235 39.9631 35.5234 40.975 35.5234 42.2422H36.5234C36.5234 40.8219 36.1986 39.5994 35.5153 38.6071C34.8311 37.6134 33.8166 36.8929 32.5117 36.4275L32.1758 37.3694ZM35.5234 42.2422C35.5234 44.2448 34.8668 45.8071 33.5773 46.982L34.2508 47.7212C35.7738 46.3335 36.5234 44.4896 36.5234 42.2422H35.5234ZM33.5796 46.9799C32.2707 48.1579 30.6889 48.75 28.8047 48.75V49.75C30.9205 49.75 32.7449 49.0765 34.2485 47.7232L33.5796 46.9799ZM28.8047 48.75H17.9297V49.75H28.8047V48.75ZM18.4297 49.25V47.5625H17.4297V49.25H18.4297ZM17.9297 48.0625H28.8047V47.0625H17.9297V48.0625ZM28.8047 48.0625C30.459 48.0625 31.8837 47.5662 33.0464 46.5573L32.3911 45.802C31.4288 46.6369 30.2441 47.0625 28.8047 47.0625V48.0625ZM33.0468 46.557C34.2542 45.5071 34.8359 44.0456 34.8359 42.2422H33.8359C33.8359 43.8138 33.3395 44.9773 32.3907 45.8024L33.0468 46.557ZM34.8359 42.2422C34.8359 40.969 34.3775 39.8412 33.4836 38.8785L32.7508 39.559C33.4818 40.3463 33.8359 41.2341 33.8359 42.2422H34.8359ZM33.4836 38.8785C32.5626 37.8867 31.2359 37.3452 29.5756 37.1973L29.4869 38.1933C30.9829 38.3266 32.0468 38.8008 32.7508 39.559L33.4836 38.8785ZM29.4815 37.1978C29.3514 37.2108 29.13 37.2188 28.8047 37.2188V38.2188C29.1356 38.2188 29.3986 38.2111 29.581 38.1928L29.4815 37.1978ZM28.8047 37.2188H17.9297V38.2188H28.8047V37.2188ZM18.4297 37.7188V36.0312H17.4297V37.7188H18.4297ZM38.8984 30.7109C38.8984 32.7679 38.1754 34.6295 36.702 36.3111L37.4542 36.9701C39.0746 35.1205 39.8984 33.0289 39.8984 30.7109H38.8984ZM36.7065 36.9751C38.1837 38.6165 38.8984 40.3682 38.8984 42.2422H39.8984C39.8984 40.0849 39.0663 38.1023 37.4498 36.3061L36.7065 36.9751ZM38.8984 42.2422C38.8984 44.1067 38.4299 45.7971 37.4953 47.325L38.3484 47.8468C39.3826 46.1561 39.8984 44.284 39.8984 42.2422H38.8984ZM37.4946 47.3262C36.5711 48.8455 35.3385 50.0259 33.7909 50.874L34.2716 51.751C35.974 50.8179 37.3351 49.5138 38.3491 47.8456L37.4946 47.3262ZM33.7924 50.8732C32.2581 51.7074 30.5983 52.125 28.8047 52.125V53.125C30.7611 53.125 32.5856 52.6676 34.2701 51.7518L33.7924 50.8732ZM28.8047 52.125H17.9297V53.125H28.8047V52.125ZM18.4297 52.625V50.9375H17.4297V52.625H18.4297ZM17.9297 51.4375H28.8047V50.4375H17.9297V51.4375ZM28.8047 51.4375C31.3655 51.4375 33.5816 50.5792 35.4262 48.8664L34.7457 48.1336C33.0903 49.6708 31.1189 50.4375 28.8047 50.4375V51.4375ZM35.4279 48.8648C37.2848 47.1239 38.2109 44.9031 38.2109 42.2422H37.2109C37.2109 44.6438 36.3871 46.5948 34.744 48.1352L35.4279 48.8648ZM38.2109 42.2422C38.2109 41.2506 37.9549 40.1827 37.4674 39.0452L36.5482 39.4391C36.9983 40.4892 37.2109 41.4213 37.2109 42.2422H38.2109ZM37.4707 39.0531C36.989 37.8739 36.3033 36.9468 35.3993 36.3035L34.8195 37.1183C35.5405 37.6313 36.1203 38.3917 36.5449 39.4313L37.4707 39.0531ZM35.4512 37.0758C37.2864 35.3564 38.2109 33.2243 38.2109 30.7109H37.2109C37.2109 32.9476 36.4011 34.8155 34.7675 36.3461L35.4512 37.0758ZM38.2109 30.7109C38.2109 28.281 37.2552 26.1756 35.3812 24.4167L34.6969 25.1458C36.3854 26.7307 37.2109 28.5784 37.2109 30.7109H38.2109ZM35.3829 24.4183C33.5122 22.646 31.3122 21.75 28.8047 21.75V22.75C31.0471 22.75 33.0034 23.5415 34.6952 25.1442L35.3829 24.4183ZM28.8047 21.75H10.2422V22.75H28.8047V21.75ZM9.74219 22.25V56H10.7422V22.25H9.74219ZM10.2422 55.5H8.55469V56.5H10.2422V55.5ZM9.05469 56V20.5625H8.05469V56H9.05469ZM8.55469 21.0625H28.8047V20.0625H8.55469V21.0625ZM28.8047 21.0625C30.5771 21.0625 32.2344 21.5008 33.7845 22.3802L34.278 21.5104C32.5781 20.5461 30.751 20.0625 28.8047 20.0625V21.0625ZM33.7895 22.383C35.3554 23.248 36.5962 24.4299 37.5192 25.9335L38.3714 25.4103C37.3569 23.7576 35.9883 22.4551 34.273 21.5076L33.7895 22.383ZM37.5204 25.9353C38.4413 27.4207 38.8984 29.0094 38.8984 30.7109H39.8984C39.8984 28.8187 39.3869 27.0481 38.3703 25.4084L37.5204 25.9353ZM39.4004 37.0338C40.8504 35.0951 41.5859 32.9836 41.5859 30.7109H40.5859C40.5859 32.7508 39.9308 34.6549 38.5996 36.4349L39.4004 37.0338ZM41.5859 30.7109C41.5859 28.5073 40.9968 26.4552 39.8233 24.5644L38.9736 25.0918C40.0501 26.8261 40.5859 28.6959 40.5859 30.7109H41.5859ZM39.8259 24.5687C38.6712 22.666 37.1079 21.1603 35.1434 20.0563L34.6535 20.9281C36.4702 21.949 37.9069 23.334 38.971 25.0875L39.8259 24.5687ZM35.1461 20.0578C33.1802 18.9368 31.0636 18.375 28.8047 18.375V19.375C30.8895 19.375 32.8354 19.8913 34.6508 20.9265L35.1461 20.0578ZM28.8047 18.375H6.86719V19.375H28.8047V18.375ZM6.36719 18.875V56H7.36719V18.875H6.36719ZM6.86719 55.5H5.17969V56.5H6.86719V55.5ZM5.67969 56V17.1875H4.67969V56H5.67969ZM5.17969 17.6875H28.8047V16.6875H5.17969V17.6875ZM28.8047 17.6875C31.1869 17.6875 33.4063 18.2744 35.4714 19.4501L35.9661 18.5811C33.7499 17.3193 31.36 16.6875 28.8047 16.6875V17.6875ZM35.4727 19.4509C37.5552 20.6279 39.2031 22.2238 40.4237 24.243L41.2795 23.7257C39.9688 21.5575 38.1948 19.8408 35.9648 18.5803L35.4727 19.4509ZM40.426 24.2468C41.6593 26.2472 42.2734 28.3988 42.2734 30.7109H43.2734C43.2734 28.2106 42.6063 25.8778 41.2772 23.722L40.426 24.2468ZM42.2734 30.7109C42.2734 32.9068 41.6559 34.8159 40.4277 36.4584L41.2286 37.0572C42.5941 35.231 43.2734 33.1088 43.2734 30.7109H42.2734ZM40.4227 37.0504C41.6702 38.7789 42.2734 40.5068 42.2734 42.2422H43.2734C43.2734 40.2588 42.5798 38.3305 41.2336 36.4652L40.4227 37.0504ZM42.2734 42.2422C42.2734 44.275 41.9096 46.129 41.1889 47.8109L42.108 48.2048C42.8872 46.3866 43.2734 44.3969 43.2734 42.2422H42.2734ZM41.1895 47.8094C40.4652 49.4842 39.4897 50.8687 38.2666 51.9726L38.9365 52.7149C40.2759 51.5063 41.3316 50.0001 42.1074 48.2063L41.1895 47.8094ZM38.2643 51.9746C37.0477 53.0861 35.6261 53.9522 33.9944 54.5716L34.3493 55.5065C36.0927 54.8447 37.6242 53.9139 38.9388 52.7129L38.2643 51.9746ZM33.9944 54.5716C32.3663 55.1897 30.6376 55.5 28.8047 55.5V56.5C30.753 56.5 32.6025 56.1697 34.3493 55.5065L33.9944 54.5716ZM28.8047 55.5H17.9297V56.5H28.8047V55.5ZM18.4297 56V54.3125H17.4297V56H18.4297ZM17.9297 54.8125H28.8047V53.8125H17.9297V54.8125ZM28.8047 54.8125C30.5089 54.8125 32.1432 54.4957 33.7038 53.8617L33.3274 52.9352C31.888 53.52 30.3817 53.8125 28.8047 53.8125V54.8125ZM33.7079 53.86C35.2615 53.2126 36.6191 52.3359 37.7755 51.2283L37.0838 50.5061C36.0215 51.5235 34.7697 52.3342 33.3233 52.9369L33.7079 53.86ZM37.7755 51.2283C38.9356 50.1172 39.8553 48.7803 40.5362 47.2238L39.62 46.823C38.9885 48.2666 38.1425 49.4922 37.0838 50.5061L37.7755 51.2283ZM40.5344 47.2278C41.2359 45.6618 41.5859 43.9982 41.5859 42.2422H40.5859C40.5859 43.8612 40.2641 45.3851 39.6218 46.819L40.5344 47.2278ZM41.5859 42.2422C41.5859 41.4174 41.3894 40.4612 41.0198 39.3845L40.074 39.7092C40.4231 40.7263 40.5859 41.567 40.5859 42.2422H41.5859ZM41.0226 39.393C40.6598 38.2716 40.1133 37.276 39.3817 36.4114L38.6183 37.0573C39.2617 37.8177 39.7464 38.6971 40.0712 39.7008L41.0226 39.393ZM57.8672 41.6562L57.3928 41.8144L57.3939 41.8174L57.8672 41.6562ZM58.875 43.5078L58.4929 43.8302L58.4983 43.8367L58.5039 43.843L58.875 43.5078ZM60.4219 44.7266L60.18 45.1643L60.189 45.169L60.4219 44.7266ZM62.2734 45.4062L62.1662 45.8946L62.1687 45.8952L62.2734 45.4062ZM69.2344 44.3047H69.7344V43.4248L68.9785 43.8751L69.2344 44.3047ZM69.2344 45.9922L69.4671 46.4347L69.7344 46.2942V45.9922H69.2344ZM61.1484 46.7422L60.9971 47.2188L61.0061 47.2215L61.1484 46.7422ZM58.4531 45.3594L58.1482 45.7557L58.1531 45.7594L58.4531 45.3594ZM56.5078 42.8281L56.0492 43.0275L56.0529 43.0355L56.5078 42.8281ZM69.8672 33.5234L69.5288 33.8916L69.5327 33.8951L69.8672 33.5234ZM72.2109 56V56.5H72.7109V56H72.2109ZM70.5234 56H70.0234V56.5H70.5234V56ZM75.5859 56V56.5H76.0859V56H75.5859ZM73.8984 56H73.3984V56.5H73.8984V56ZM71.0625 32.3281L70.7229 32.6951L70.7245 32.6966L71.0625 32.3281ZM54.9375 43.4141L54.4785 43.6124L54.4806 43.6171L54.9375 43.4141ZM60.3984 48.2188L60.2346 48.6912L60.2422 48.6937L60.3984 48.2188ZM69.2344 47.5625H69.7344V46.7429L69.0056 47.1179L69.2344 47.5625ZM69.2344 49.3438L69.4267 49.8053L69.7344 49.6771V49.3438H69.2344ZM59.7188 49.6953L59.5543 50.1675L59.559 50.1691L59.7188 49.6953ZM55.9922 47.6094L55.6699 47.9917L55.6732 47.9944L55.9922 47.6094ZM53.3906 44.0469L52.9323 44.2468L52.9331 44.2486L53.3906 44.0469ZM55.6875 31.2734L56.0398 31.6283L56.0398 31.6283L55.6875 31.2734ZM69.8203 29.3984L69.5905 29.8425L69.5936 29.8441L69.8203 29.3984ZM78.9609 56V56.5H79.4609V56H78.9609ZM77.2734 56H76.7734V56.5H77.2734V56ZM75.4922 32.5156L75.0608 32.7684L75.0631 32.7724L75.4922 32.5156ZM57.1406 28.0625L57.3883 28.4968L57.3909 28.4954L57.1406 28.0625ZM52.4297 32.7031L51.9979 32.451L51.9964 32.4536L52.4297 32.7031ZM51.8438 44.6094L51.3867 44.812L51.3874 44.8137L51.8438 44.6094ZM54.7969 48.6641L54.4707 49.043L54.4731 49.045L54.7969 48.6641ZM59.0391 51.1484L58.863 51.6164L58.8686 51.6185L59.0391 51.1484ZM69.2344 50.9141H69.7344V50.154L69.0364 50.4549L69.2344 50.9141ZM69.2344 52.625L69.4108 53.0928L69.7344 52.9708V52.625H69.2344ZM50.1797 33.5L49.7203 33.3025L49.7189 33.306L50.1797 33.5ZM71.5078 26.4688L71.2735 26.9104L71.2747 26.9111L71.5078 26.4688ZM82.3359 56V56.5H82.8359V56H82.3359ZM80.6484 56H80.1484V56.5H80.6484V56ZM78.4219 30.8281L77.9913 31.0822L77.9919 31.0834L78.4219 30.8281ZM72.3516 25.0156L72.1152 25.4562L72.1172 25.4573L72.3516 25.0156ZM49.5234 31.0391L49.9555 31.2907L49.9562 31.2895L49.5234 31.0391ZM48.2812 44.6094L47.8095 44.7751L47.8114 44.7802L48.2812 44.6094ZM50.8125 49.1328L50.4257 49.4497L50.4299 49.4547L50.8125 49.1328ZM54.5156 52.4609L54.2361 52.8755L54.2383 52.877L54.5156 52.4609ZM59.0391 54.5938L58.895 55.0725L58.8962 55.0729L59.0391 54.5938ZM69.2344 54.2656H69.7344V53.5102L69.039 53.8054L69.2344 54.2656ZM69.2344 56L69.4001 56.4717L69.7344 56.3543V56H69.2344ZM58.3594 56.1172L58.2165 56.5963L58.2176 56.5967L58.3594 56.1172ZM53.3906 53.7734L53.1064 54.1849L53.1113 54.1882L53.3906 53.7734ZM49.3828 50.0938L48.9949 50.4092L48.9966 50.4113L49.3828 50.0938ZM47.0391 32.2109L46.5778 32.0179L46.5771 32.0196L47.0391 32.2109ZM50.7891 26.5156L50.4338 26.1638L50.4338 26.1638L50.7891 26.5156ZM56.5547 22.6953L56.747 23.1569L56.7486 23.1562L56.5547 22.6953ZM76.9922 26.2344L76.6491 26.5981L76.6509 26.5998L76.9922 26.2344ZM71.0234 39.3594C71.0234 37.2494 70.3675 35.5531 69.0035 34.3445L68.3403 35.093C69.445 36.0719 70.0234 37.4694 70.0234 39.3594H71.0234ZM69.0035 34.3445C67.6577 33.1521 65.8974 32.5781 63.7734 32.5781V33.5781C65.712 33.5781 67.2173 34.0979 68.3403 35.093L69.0035 34.3445ZM63.7734 32.5781C61.9259 32.5781 60.325 33.2216 58.9971 34.499L59.6904 35.2197C60.8313 34.1222 62.1834 33.5781 63.7734 33.5781V32.5781ZM58.9971 34.499C57.6685 35.7772 56.9922 37.3276 56.9922 39.125H57.9922C57.9922 37.6099 58.5502 36.3166 59.6904 35.2197L58.9971 34.499ZM56.9922 39.125C56.9922 40.1065 57.123 41.0049 57.3928 41.8144L58.3415 41.4981C58.1113 40.8076 57.9922 40.0185 57.9922 39.125H56.9922ZM57.3939 41.8174C57.6603 42.6 58.0241 43.2747 58.4929 43.8302L59.2571 43.1854C58.8822 42.741 58.5741 42.1812 58.3405 41.4951L57.3939 41.8174ZM58.5039 43.843C58.98 44.37 59.5398 44.8104 60.18 45.1642L60.6637 44.2889C60.1165 43.9865 59.645 43.6144 59.2461 43.1727L58.5039 43.843ZM60.189 45.169C60.8215 45.5019 61.4808 45.7442 62.1662 45.8946L62.3806 44.9179C61.7848 44.7871 61.2098 44.5762 60.6547 44.2841L60.189 45.169ZM62.1687 45.8952C62.8624 46.0438 63.5931 46.1172 64.3594 46.1172V45.1172C63.6569 45.1172 62.997 45.0499 62.3782 44.9173L62.1687 45.8952ZM64.3594 46.1172C66.2268 46.1172 67.9408 45.6573 69.4903 44.7342L68.9785 43.8751C67.5904 44.7021 66.0545 45.1172 64.3594 45.1172V46.1172ZM68.7344 44.3047V45.9922H69.7344V44.3047H68.7344ZM69.0017 45.5496C67.5594 46.3079 66.0067 46.6875 64.3359 46.6875V47.6875C66.1652 47.6875 67.8781 47.2702 69.4671 46.4347L69.0017 45.5496ZM64.3359 46.6875C63.2554 46.6875 62.2411 46.545 61.2907 46.2629L61.0061 47.2215C62.0558 47.5331 63.1665 47.6875 64.3359 47.6875V46.6875ZM61.2997 46.2656C60.3679 45.9698 59.5199 45.5345 58.7531 44.9594L58.1531 45.7594C59.0113 46.403 59.9602 46.8896 60.9971 47.2187L61.2997 46.2656ZM58.758 44.9631C58.0187 44.3944 57.418 43.6194 56.9628 42.6207L56.0529 43.0355C56.5663 44.1619 57.2625 45.0744 58.1483 45.7557L58.758 44.9631ZM56.9663 42.6288C56.5295 41.624 56.3047 40.4594 56.3047 39.125H55.3047C55.3047 40.5719 55.5486 41.876 56.0493 43.0275L56.9663 42.6288ZM56.3047 39.125C56.3047 37.1006 57.0103 35.4009 58.4298 33.996L57.7264 33.2852C56.1147 34.8803 55.3047 36.8369 55.3047 39.125H56.3047ZM58.4298 33.996C59.8449 32.5956 61.6137 31.8906 63.7734 31.8906V30.8906C61.3706 30.8906 59.3426 31.6857 57.7264 33.2852L58.4298 33.996ZM63.7734 31.8906C66.1899 31.8906 68.0918 32.5707 69.5288 33.8916L70.2056 33.1553C68.5488 31.6325 66.3882 30.8906 63.7734 30.8906V31.8906ZM69.5327 33.8951C70.9706 35.1892 71.7109 36.9885 71.7109 39.3594H72.7109C72.7109 36.7615 71.8888 34.6702 70.2017 33.1518L69.5327 33.8951ZM71.7109 39.3594V56H72.7109V39.3594H71.7109ZM72.2109 55.5H70.5234V56.5H72.2109V55.5ZM71.0234 56V39.3594H70.0234V56H71.0234ZM75.0859 39.3594V56H76.0859V39.3594H75.0859ZM75.5859 55.5H73.8984V56.5H75.5859V55.5ZM74.3984 56V39.3594H73.3984V56H74.3984ZM74.3984 39.3594C74.3984 36.2894 73.4103 33.8034 71.4005 31.9597L70.7245 32.6966C72.4959 34.3216 73.3984 36.5231 73.3984 39.3594H74.3984ZM71.4021 31.9612C69.405 30.1126 66.8476 29.2031 63.7734 29.2031V30.2031C66.6368 30.2031 68.9387 31.0436 70.7229 32.6951L71.4021 31.9612ZM63.7734 29.2031C60.9002 29.2031 58.4741 30.1634 56.5308 32.0903L57.2349 32.8004C58.979 31.071 61.1467 30.2031 63.7734 30.2031V29.2031ZM56.5308 32.0903C54.5912 34.0134 53.6172 36.3683 53.6172 39.125H54.6172C54.6172 36.6317 55.487 34.5334 57.2349 32.8004L56.5308 32.0903ZM53.6172 39.125C53.6172 40.7764 53.9007 42.2751 54.4785 43.6124L55.3965 43.2157C54.8806 42.0217 54.6172 40.6611 54.6172 39.125H53.6172ZM54.4806 43.6171C55.0711 44.9458 55.8747 46.0289 56.8978 46.8507L57.5241 46.0711C56.6409 45.3617 55.9289 44.4136 55.3944 43.211L54.4806 43.6171ZM56.8978 46.8507C57.8963 47.6528 59.0094 48.2664 60.2347 48.6912L60.5622 47.7463C59.4437 47.3586 58.4319 46.8003 57.5241 46.0711L56.8978 46.8507ZM60.2422 48.6937C61.4826 49.1017 62.7855 49.3047 64.1484 49.3047V48.3047C62.8863 48.3047 61.6893 48.117 60.5547 47.7438L60.2422 48.6937ZM64.1484 49.3047C66.0106 49.3047 67.7842 48.871 69.4631 48.0071L69.0056 47.1179C67.4658 47.9102 65.8488 48.3047 64.1484 48.3047V49.3047ZM68.7344 47.5625V49.3438H69.7344V47.5625H68.7344ZM69.0421 48.8822C67.4116 49.5616 65.7736 49.8984 64.125 49.8984V50.8984C65.9139 50.8984 67.6822 50.5322 69.4267 49.8053L69.0421 48.8822ZM64.125 49.8984C62.6295 49.8984 61.2149 49.672 59.8785 49.2215L59.559 50.1691C61.0039 50.6561 62.5268 50.8984 64.125 50.8984V49.8984ZM59.8832 49.2231C58.5468 48.7576 57.3574 48.0912 56.3112 47.2244L55.6732 47.9944C56.8145 48.9401 58.1095 49.6642 59.5543 50.1675L59.8832 49.2231ZM56.3144 47.2271C55.2851 46.3595 54.4614 45.2363 53.8481 43.8452L52.9331 44.2486C53.6011 45.7637 54.5118 47.0155 55.67 47.9917L56.3144 47.2271ZM53.8489 43.847C53.2392 42.449 52.9297 40.8777 52.9297 39.125H51.9297C51.9297 40.9973 52.2608 42.7072 52.9323 44.2468L53.8489 43.847ZM52.9297 39.125C52.9297 36.1792 53.9631 33.69 56.0398 31.6283L55.3352 30.9186C53.0682 33.1693 51.9297 35.9145 51.9297 39.125H52.9297ZM56.0398 31.6283C58.1296 29.5534 60.6965 28.5156 63.7734 28.5156V27.5156C60.4441 27.5156 57.6204 28.6498 55.3352 30.9186L56.0398 31.6283ZM63.7734 28.5156C65.9533 28.5156 67.8885 28.9617 69.5905 29.8425L70.0501 28.9544C68.1896 27.9915 66.0936 27.5156 63.7734 27.5156V28.5156ZM69.5936 29.8441C71.2897 30.707 72.6259 31.961 73.6093 33.6149L74.4688 33.1038C73.3897 31.289 71.9134 29.9024 70.047 28.9528L69.5936 29.8441ZM73.6093 33.6149C74.5891 35.2628 75.0859 37.1724 75.0859 39.3594H76.0859C76.0859 37.0151 75.5515 34.9247 74.4688 33.1038L73.6093 33.6149ZM78.4609 39.3594V56H79.4609V39.3594H78.4609ZM78.9609 55.5H77.2734V56.5H78.9609V55.5ZM77.7734 56V39.3594H76.7734V56H77.7734ZM77.7734 39.3594C77.7734 36.7015 77.1603 34.3295 75.9212 32.2589L75.0631 32.7724C76.199 34.6705 76.7734 36.861 76.7734 39.3594H77.7734ZM75.9236 32.2629C74.7043 30.1819 73.0254 28.5843 70.8944 27.4781L70.4337 28.3656C72.3964 29.3845 73.9363 30.8493 75.0608 32.7684L75.9236 32.2629ZM70.8944 27.4781C68.7692 26.3749 66.392 25.8281 63.7734 25.8281V26.8281C66.2486 26.8281 68.4652 27.3438 70.4337 28.3656L70.8944 27.4781ZM63.7734 25.8281C61.2681 25.8281 58.9702 26.4272 56.8904 27.6296L57.3909 28.4954C59.3111 27.3853 61.435 26.8281 63.7734 26.8281V25.8281ZM56.8929 27.6282C54.8181 28.8115 53.1837 30.4206 51.9979 32.451L52.8615 32.9553C53.957 31.0794 55.4632 29.5948 57.3883 28.4968L56.8929 27.6282ZM51.9964 32.4536C50.8262 34.4852 50.2422 36.7123 50.2422 39.125H51.2422C51.2422 36.8815 51.7832 34.8273 52.863 32.9527L51.9964 32.4536ZM50.2422 39.125C50.2422 41.1875 50.6212 43.0856 51.3867 44.812L52.3008 44.4067C51.5976 42.8206 51.2422 41.0625 51.2422 39.125H50.2422ZM51.3874 44.8137C52.1486 46.5143 53.1751 47.9278 54.4707 49.043L55.1231 48.2851C53.9499 47.2753 53.0076 45.9857 52.3001 44.4051L51.3874 44.8137ZM54.4731 49.045C55.7681 50.1459 57.2325 51.003 58.863 51.6164L59.2151 50.6805C57.6894 50.1064 56.3256 49.3073 55.1207 48.2831L54.4731 49.045ZM58.8686 51.6185C60.5182 52.2169 62.2401 52.5156 64.0312 52.5156V51.5156C62.3537 51.5156 60.7474 51.2362 59.2096 50.6784L58.8686 51.6185ZM64.0312 52.5156C65.8684 52.5156 67.6699 52.1331 69.4324 51.3732L69.0364 50.4549C67.3926 51.1637 65.7254 51.5156 64.0312 51.5156V52.5156ZM68.7344 50.9141V52.625H69.7344V50.9141H68.7344ZM69.0579 52.1572C67.327 52.8101 65.6367 53.1328 63.9844 53.1328V54.1328C65.7696 54.1328 67.5792 53.7837 69.4108 53.0928L69.0579 52.1572ZM63.9844 53.1328C61.3596 53.1328 58.9572 52.5419 56.7681 51.3644L56.2944 52.245C58.6365 53.5049 61.2029 54.1328 63.9844 54.1328V53.1328ZM56.7681 51.3644C54.5822 50.1885 52.8348 48.5248 51.5209 46.3651L50.6666 46.8849C52.0714 49.1939 53.949 50.9834 56.2944 52.245L56.7681 51.3644ZM51.5209 46.3651C50.212 44.2136 49.5547 41.8045 49.5547 39.125H48.5547C48.5547 41.9768 49.2568 44.5676 50.6666 46.8849L51.5209 46.3651ZM49.5547 39.125C49.5547 37.2216 49.9169 35.4127 50.6405 33.694L49.7189 33.306C48.9425 35.1498 48.5547 37.0909 48.5547 39.125H49.5547ZM50.6391 33.6974C51.3811 31.9711 52.3775 30.4859 53.627 29.2364L52.9199 28.5293C51.5756 29.8735 50.5095 31.4664 49.7203 33.3026L50.6391 33.6974ZM53.627 29.2364C54.8743 27.9891 56.381 26.9928 58.1541 26.2503L57.7678 25.3279C55.8846 26.1166 54.2663 27.1828 52.9199 28.5293L53.627 29.2364ZM58.1541 26.2503C59.919 25.5111 61.7907 25.1406 63.7734 25.1406V24.1406C61.6625 24.1406 59.6591 24.5358 57.7678 25.3279L58.1541 26.2503ZM63.7734 25.1406C66.5594 25.1406 69.0558 25.7337 71.2735 26.9104L71.7422 26.0271C69.3661 24.7663 66.7062 24.1406 63.7734 24.1406V25.1406ZM71.2747 26.9111C73.5052 28.0866 75.2572 29.7559 76.5382 31.926L77.3993 31.4177C76.0241 29.0879 74.1354 27.2884 71.7409 26.0264L71.2747 26.9111ZM76.5382 31.926C77.8164 34.0915 78.4609 36.5645 78.4609 39.3594H79.4609C79.4609 36.4042 78.7774 33.7523 77.3993 31.4177L76.5382 31.926ZM81.8359 39.3594V56H82.8359V39.3594H81.8359ZM82.3359 55.5H80.6484V56.5H82.3359V55.5ZM81.1484 56V39.3594H80.1484V56H81.1484ZM81.1484 39.3594C81.1484 36.091 80.3864 33.1574 78.8518 30.5729L77.9919 31.0834C79.4261 33.4988 80.1484 36.2528 80.1484 39.3594H81.1484ZM78.8525 30.574C77.3216 27.9795 75.2305 25.9768 72.5859 24.5739L72.1172 25.4573C74.5977 26.7732 76.5534 28.6455 77.9913 31.0822L78.8525 30.574ZM72.588 24.575C69.9467 23.1578 67.005 22.4531 63.7734 22.4531V23.4531C66.8544 23.4531 69.6314 24.1235 72.1152 25.4562L72.588 24.575ZM63.7734 22.4531C60.6584 22.4531 57.7981 23.2009 55.203 24.6999L55.7032 25.5658C58.1394 24.1585 60.826 23.4531 63.7734 23.4531V22.4531ZM55.203 24.6999C52.6125 26.1962 50.5726 28.2275 49.0907 30.7887L49.9562 31.2895C51.3493 28.8819 53.2625 26.9757 55.7032 25.5658L55.203 24.6999ZM49.0913 30.7875C47.6083 33.3344 46.8672 36.1168 46.8672 39.125H47.8672C47.8672 36.2894 48.5636 33.6812 49.9555 31.2907L49.0913 30.7875ZM46.8672 39.125C46.8672 41.1007 47.1806 42.9851 47.8095 44.7751L48.753 44.4436C48.1632 42.7649 47.8672 40.993 47.8672 39.125H46.8672ZM47.8114 44.7802C48.4539 46.5472 49.3244 48.1054 50.4257 49.4497L51.1993 48.8159C50.1756 47.5665 49.3586 46.1091 48.7511 44.4385L47.8114 44.7802ZM50.4299 49.4547C51.538 50.7716 52.8071 51.9121 54.2361 52.8755L54.7951 52.0464C53.4429 51.1348 52.2433 50.0566 51.1951 48.8109L50.4299 49.4547ZM54.2383 52.877C55.6854 53.8417 57.238 54.5739 58.895 55.0725L59.1832 54.115C57.6213 53.6449 56.1584 52.9552 54.793 52.0449L54.2383 52.877ZM58.8962 55.0729C60.5669 55.5709 62.2634 55.8203 63.9844 55.8203V54.8203C62.3616 54.8203 60.7612 54.5853 59.1819 54.1146L58.8962 55.0729ZM63.9844 55.8203C65.8936 55.8203 67.7102 55.4559 69.4298 54.7259L69.039 53.8054C67.4461 54.4816 65.7627 54.8203 63.9844 54.8203V55.8203ZM68.7344 54.2656V56H69.7344V54.2656H68.7344ZM69.0686 55.5283C67.3872 56.119 65.662 56.4141 63.8906 56.4141V57.4141C65.7755 57.4141 67.6128 57.0997 69.4001 56.4717L69.0686 55.5283ZM63.8906 56.4141C62.0476 56.4141 60.2516 56.1552 58.5011 55.6377L58.2176 56.5967C60.0609 57.1417 61.9524 57.4141 63.8906 57.4141V56.4141ZM58.5023 55.638C56.7692 55.1211 55.1589 54.3615 53.6699 53.3587L53.1113 54.1882C54.6848 55.2479 56.3871 56.0507 58.2165 56.5963L58.5023 55.638ZM53.6748 53.362C52.1969 52.3412 50.8954 51.1462 49.769 49.7762L48.9966 50.4113C50.1827 51.8538 51.5531 53.1119 53.1065 54.1848L53.6748 53.362ZM49.7707 49.7783C48.6545 48.4056 47.7753 46.8017 47.1364 44.961L46.1917 45.289C46.8653 47.2296 47.7986 48.9382 48.9949 50.4092L49.7707 49.7783ZM47.1364 44.961C46.4994 43.1259 46.1797 41.1816 46.1797 39.125H45.1797C45.1797 41.2872 45.5162 43.3428 46.1917 45.289L47.1364 44.961ZM46.1797 39.125C46.1797 36.7673 46.6205 34.5277 47.501 32.4023L46.5771 32.0196C45.6452 34.2691 45.1797 36.639 45.1797 39.125H46.1797ZM47.5003 32.4039C48.399 30.2562 49.6143 28.4126 51.1444 26.8674L50.4338 26.1638C48.8076 27.8061 47.5229 29.7594 46.5778 32.0179L47.5003 32.4039ZM51.1443 26.8674C52.6881 25.3085 54.5533 24.0709 56.747 23.1569L56.3624 22.2338C54.056 23.1948 52.0775 24.504 50.4338 26.1638L51.1443 26.8674ZM56.7486 23.1562C58.9506 22.2298 61.2907 21.7656 63.7734 21.7656V20.7656C61.1624 20.7656 58.69 21.2546 56.3608 22.2344L56.7486 23.1562ZM63.7734 21.7656C66.3098 21.7656 68.6862 22.1932 70.9069 23.045L71.265 22.1113C68.9231 21.213 66.4246 20.7656 63.7734 20.7656V21.7656ZM70.9069 23.045C73.1317 23.8983 75.0436 25.0835 76.6491 26.5981L77.3353 25.8707C75.6282 24.2602 73.6026 23.0079 71.265 22.1113L70.9069 23.045ZM76.6509 26.5998C78.2514 28.0946 79.5172 29.9408 80.4454 32.147L81.3671 31.7592C80.389 29.4342 79.0455 27.4679 77.3335 25.869L76.6509 26.5998ZM80.4454 32.147C81.3705 34.3461 81.8359 36.7482 81.8359 39.3594H82.8359C82.8359 36.6268 82.3482 34.0914 81.3671 31.7592L80.4454 32.147ZM86.5078 22.25V21.75H86.0078V22.25H86.5078ZM88.1953 22.25H88.6953V21.75H88.1953V22.25ZM88.1953 56V56.5H88.6953V56H88.1953ZM86.5078 56H86.0078V56.5H86.5078V56ZM96.6328 22.25V21.75H96.1328V22.25H96.6328ZM98.3203 22.25H98.8203V21.75H98.3203V22.25ZM98.3203 47.5625H97.8203V48.0625H98.3203V47.5625ZM110.766 47.5625H111.266V47.0625H110.766V47.5625ZM110.766 49.25V49.75H111.266V49.25H110.766ZM98.3203 49.25V48.75H97.8203V49.25H98.3203ZM98.3203 50.9375H97.8203V51.4375H98.3203V50.9375ZM110.766 50.9375H111.266V50.4375H110.766V50.9375ZM110.766 52.625V53.125H111.266V52.625H110.766ZM98.3203 52.625V52.125H97.8203V52.625H98.3203ZM98.3203 54.3125H97.8203V54.8125H98.3203V54.3125ZM110.766 54.3125H111.266V53.8125H110.766V54.3125ZM110.766 56V56.5H111.266V56H110.766ZM96.6328 56H96.1328V56.5H96.6328V56ZM93.2578 22.25V21.75H92.7578V22.25H93.2578ZM94.9453 22.25H95.4453V21.75H94.9453V22.25ZM94.9453 56V56.5H95.4453V56H94.9453ZM93.2578 56H92.7578V56.5H93.2578V56ZM89.8828 22.25V21.75H89.3828V22.25H89.8828ZM91.5703 22.25H92.0703V21.75H91.5703V22.25ZM91.5703 56V56.5H92.0703V56H91.5703ZM89.8828 56H89.3828V56.5H89.8828V56ZM86.5078 22.75H88.1953V21.75H86.5078V22.75ZM87.6953 22.25V56H88.6953V22.25H87.6953ZM88.1953 55.5H86.5078V56.5H88.1953V55.5ZM87.0078 56V22.25H86.0078V56H87.0078ZM96.6328 22.75H98.3203V21.75H96.6328V22.75ZM97.8203 22.25V47.5625H98.8203V22.25H97.8203ZM98.3203 48.0625H110.766V47.0625H98.3203V48.0625ZM110.266 47.5625V49.25H111.266V47.5625H110.266ZM110.766 48.75H98.3203V49.75H110.766V48.75ZM97.8203 49.25V50.9375H98.8203V49.25H97.8203ZM98.3203 51.4375H110.766V50.4375H98.3203V51.4375ZM110.266 50.9375V52.625H111.266V50.9375H110.266ZM110.766 52.125H98.3203V53.125H110.766V52.125ZM97.8203 52.625V54.3125H98.8203V52.625H97.8203ZM98.3203 54.8125H110.766V53.8125H98.3203V54.8125ZM110.266 54.3125V56H111.266V54.3125H110.266ZM110.766 55.5H96.6328V56.5H110.766V55.5ZM97.1328 56V22.25H96.1328V56H97.1328ZM93.2578 22.75H94.9453V21.75H93.2578V22.75ZM94.4453 22.25V56H95.4453V22.25H94.4453ZM94.9453 55.5H93.2578V56.5H94.9453V55.5ZM93.7578 56V22.25H92.7578V56H93.7578ZM89.8828 22.75H91.5703V21.75H89.8828V22.75ZM91.0703 22.25V56H92.0703V22.25H91.0703ZM91.5703 55.5H89.8828V56.5H91.5703V55.5ZM90.3828 56V22.25H89.3828V56H90.3828ZM114.492 22.25V21.75H113.992V22.25H114.492ZM116.18 22.25H116.68V21.75H116.18V22.25ZM116.18 56V56.5H116.68V56H116.18ZM114.492 56H113.992V56.5H114.492V56ZM124.617 22.25V21.75H124.117V22.25H124.617ZM126.305 22.25H126.805V21.75H126.305V22.25ZM126.305 47.5625H125.805V48.0625H126.305V47.5625ZM138.75 47.5625H139.25V47.0625H138.75V47.5625ZM138.75 49.25V49.75H139.25V49.25H138.75ZM126.305 49.25V48.75H125.805V49.25H126.305ZM126.305 50.9375H125.805V51.4375H126.305V50.9375ZM138.75 50.9375H139.25V50.4375H138.75V50.9375ZM138.75 52.625V53.125H139.25V52.625H138.75ZM126.305 52.625V52.125H125.805V52.625H126.305ZM126.305 54.3125H125.805V54.8125H126.305V54.3125ZM138.75 54.3125H139.25V53.8125H138.75V54.3125ZM138.75 56V56.5H139.25V56H138.75ZM124.617 56H124.117V56.5H124.617V56ZM121.242 22.25V21.75H120.742V22.25H121.242ZM122.93 22.25H123.43V21.75H122.93V22.25ZM122.93 56V56.5H123.43V56H122.93ZM121.242 56H120.742V56.5H121.242V56ZM117.867 22.25V21.75H117.367V22.25H117.867ZM119.555 22.25H120.055V21.75H119.555V22.25ZM119.555 56V56.5H120.055V56H119.555ZM117.867 56H117.367V56.5H117.867V56ZM114.492 22.75H116.18V21.75H114.492V22.75ZM115.68 22.25V56H116.68V22.25H115.68ZM116.18 55.5H114.492V56.5H116.18V55.5ZM114.992 56V22.25H113.992V56H114.992ZM124.617 22.75H126.305V21.75H124.617V22.75ZM125.805 22.25V47.5625H126.805V22.25H125.805ZM126.305 48.0625H138.75V47.0625H126.305V48.0625ZM138.25 47.5625V49.25H139.25V47.5625H138.25ZM138.75 48.75H126.305V49.75H138.75V48.75ZM125.805 49.25V50.9375H126.805V49.25H125.805ZM126.305 51.4375H138.75V50.4375H126.305V51.4375ZM138.25 50.9375V52.625H139.25V50.9375H138.25ZM138.75 52.125H126.305V53.125H138.75V52.125ZM125.805 52.625V54.3125H126.805V52.625H125.805ZM126.305 54.8125H138.75V53.8125H126.305V54.8125ZM138.25 54.3125V56H139.25V54.3125H138.25ZM138.75 55.5H124.617V56.5H138.75V55.5ZM125.117 56V22.25H124.117V56H125.117ZM121.242 22.75H122.93V21.75H121.242V22.75ZM122.43 22.25V56H123.43V22.25H122.43ZM122.93 55.5H121.242V56.5H122.93V55.5ZM121.742 56V22.25H120.742V56H121.742ZM117.867 22.75H119.555V21.75H117.867V22.75ZM119.055 22.25V56H120.055V22.25H119.055ZM119.555 55.5H117.867V56.5H119.555V55.5ZM118.367 56V22.25H117.367V56H118.367ZM152.602 22.25V21.75H152.102V22.25H152.602ZM154.289 22.25H154.789V21.75H154.289V22.25ZM154.289 56V56.5H154.789V56H154.289ZM152.602 56H152.102V56.5H152.602V56ZM149.227 22.25V21.75H148.727V22.25H149.227ZM150.914 22.25H151.414V21.75H150.914V22.25ZM150.914 56V56.5H151.414V56H150.914ZM149.227 56H148.727V56.5H149.227V56ZM145.852 22.25V21.75H145.352V22.25H145.852ZM147.539 22.25H148.039V21.75H147.539V22.25ZM147.539 56V56.5H148.039V56H147.539ZM145.852 56H145.352V56.5H145.852V56ZM142.477 22.25V21.75H141.977V22.25H142.477ZM144.164 22.25H144.664V21.75H144.164V22.25ZM144.164 56V56.5H144.664V56H144.164ZM142.477 56H141.977V56.5H142.477V56ZM152.602 22.75H154.289V21.75H152.602V22.75ZM153.789 22.25V56H154.789V22.25H153.789ZM154.289 55.5H152.602V56.5H154.289V55.5ZM153.102 56V22.25H152.102V56H153.102ZM149.227 22.75H150.914V21.75H149.227V22.75ZM150.414 22.25V56H151.414V22.25H150.414ZM150.914 55.5H149.227V56.5H150.914V55.5ZM149.727 56V22.25H148.727V56H149.727ZM145.852 22.75H147.539V21.75H145.852V22.75ZM147.039 22.25V56H148.039V22.25H147.039ZM147.539 55.5H145.852V56.5H147.539V55.5ZM146.352 56V22.25H145.352V56H146.352ZM142.477 22.75H144.164V21.75H142.477V22.75ZM143.664 22.25V56H144.664V22.25H143.664ZM144.164 55.5H142.477V56.5H144.164V55.5ZM142.977 56V22.25H141.977V56H142.977ZM163.969 37.9531L163.666 38.351L163.668 38.3527L163.969 37.9531ZM164.438 26.9609L164.724 27.3707L164.726 27.3693L164.438 26.9609ZM180.164 25.7891L180.012 26.2654L180.013 26.2658L180.164 25.7891ZM185.578 28.6953L185.891 29.0851L186.358 28.7104L185.906 28.318L185.578 28.6953ZM184.266 29.75L183.949 30.1372L184.263 30.3935L184.579 30.1398L184.266 29.75ZM165.609 28.1328L165.339 27.7122L165.337 27.7133L165.609 28.1328ZM165.141 36.7578L164.85 37.1647L164.855 37.1683L165.141 36.7578ZM181.758 40.2734L181.455 40.6713L181.459 40.6743L181.758 40.2734ZM181.312 51.2656L181.599 51.6754L181.601 51.6739L181.312 51.2656ZM165.562 52.4141L165.411 52.8904L165.412 52.8908L165.562 52.4141ZM160.172 49.5078L159.859 49.1181L159.392 49.4927L159.844 49.8851L160.172 49.5078ZM161.484 48.4531L161.801 48.0659L161.487 47.8096L161.171 48.0634L161.484 48.4531ZM180.117 50.0703L179.847 49.6497L179.845 49.6508L180.117 50.0703ZM180.586 41.4688L180.295 41.8756L180.3 41.8792L180.586 41.4688ZM161.484 40.3672L161.163 40.7498L161.164 40.7511L161.484 40.3672ZM161.836 24.6641L162.148 25.0545L162.149 25.054L161.836 24.6641ZM166.664 22.2734L166.806 22.7529L166.809 22.7518L166.664 22.2734ZM181.641 22.7656L181.475 23.2374L181.477 23.2383L181.641 22.7656ZM188.203 26.5859L188.501 26.9876L188.982 26.6309L188.545 26.2212L188.203 26.5859ZM186.844 27.5938L186.509 27.965L186.813 28.2391L187.142 27.9954L186.844 27.5938ZM180.867 24.2656L180.709 24.74L180.712 24.7409L180.867 24.2656ZM178.922 48.875L179.168 49.31L179.17 49.3091L178.922 48.875ZM167.039 49.3906L166.906 49.8727L166.911 49.8738L167.039 49.3906ZM162.82 47.3984L162.515 47.0026L162.003 47.3977L162.514 47.7937L162.82 47.3984ZM164.156 46.3672L164.44 45.9553L164.139 45.7486L163.851 45.9714L164.156 46.3672ZM166.805 29.3516L167.051 29.7866L167.053 29.7857L166.805 29.3516ZM178.711 28.8359L178.578 29.318L178.582 29.3191L178.711 28.8359ZM182.93 30.8047L183.235 31.2005L183.747 30.8054L183.236 30.4094L182.93 30.8047ZM181.594 31.8359L181.31 32.2479L181.611 32.4545L181.899 32.2317L181.594 31.8359ZM184.266 37.8594L183.944 38.242L183.945 38.2433L184.266 37.8594ZM183.891 53.5625L184.203 53.9529L184.204 53.9525L183.891 53.5625ZM179.086 55.9531L179.228 56.4326L179.231 56.4315L179.086 55.9531ZM164.109 55.4609L163.944 55.9327L163.946 55.9336L164.109 55.4609ZM157.547 51.6172L157.249 51.2155L156.768 51.5723L157.205 51.982L157.547 51.6172ZM158.906 50.6094L159.241 50.2381L158.937 49.964L158.608 50.2077L158.906 50.6094ZM164.883 53.9609L164.725 54.4353L164.727 54.4362L164.883 53.9609ZM174.211 39.4453H171.539V40.4453H174.211V39.4453ZM171.539 39.4453C168.316 39.4453 165.915 38.7917 164.269 37.5535L163.668 38.3527C165.553 39.7708 168.2 40.4453 171.539 40.4453V39.4453ZM164.272 37.5552C162.638 36.3121 161.82 34.6197 161.82 32.4219H160.82C160.82 34.9115 161.768 36.9067 163.666 38.351L164.272 37.5552ZM161.82 32.4219C161.82 30.4254 162.755 28.7474 164.724 27.3707L164.151 26.5512C161.964 28.0807 160.82 30.0433 160.82 32.4219H161.82ZM164.726 27.3693C166.688 25.9823 169.487 25.2578 173.18 25.2578V24.2578C169.373 24.2578 166.343 25.002 164.149 26.5526L164.726 27.3693ZM173.18 25.2578C175.633 25.2578 177.91 25.5951 180.012 26.2654L180.316 25.3127C178.106 24.608 175.726 24.2578 173.18 24.2578V25.2578ZM180.013 26.2658C182.132 26.9366 183.873 27.8754 185.25 29.0726L185.906 28.318C184.408 27.0152 182.54 26.0166 180.315 25.3124L180.013 26.2658ZM185.265 28.3056L183.952 29.3602L184.579 30.1398L185.891 29.0851L185.265 28.3056ZM184.582 29.3628C183.24 28.266 181.568 27.4263 179.58 26.8334L179.295 27.7916C181.182 28.3549 182.729 29.1403 183.949 30.1372L184.582 29.3628ZM179.58 26.8334C177.592 26.24 175.457 25.9453 173.18 25.9453V26.9453C175.371 26.9453 177.408 27.2287 179.295 27.7916L179.58 26.8334ZM173.18 25.9453C169.838 25.9453 167.204 26.5133 165.339 27.7122L165.88 28.5534C167.515 27.5023 169.927 26.9453 173.18 26.9453V25.9453ZM165.337 27.7133C163.492 28.91 162.508 30.4836 162.508 32.4219H163.508C163.508 30.8914 164.258 29.6056 165.881 28.5523L165.337 27.7133ZM162.508 32.4219C162.508 34.4445 163.286 36.0474 164.85 37.1647L165.431 36.3509C164.152 35.4369 163.508 34.1492 163.508 32.4219H162.508ZM164.855 37.1683C166.414 38.2525 168.666 38.7578 171.539 38.7578V37.7578C168.756 37.7578 166.743 37.2632 165.426 36.3474L164.855 37.1683ZM171.539 38.7578H174.117V37.7578H171.539V38.7578ZM174.117 38.7578C177.388 38.7578 179.81 39.4196 181.455 40.6713L182.061 39.8756C180.174 38.4398 177.503 37.7578 174.117 37.7578V38.7578ZM181.459 40.6743C183.106 41.9021 183.93 43.5853 183.93 45.7812H184.93C184.93 43.2897 183.972 41.301 182.057 39.8726L181.459 40.6743ZM183.93 45.7812C183.93 47.7782 182.994 49.4647 181.024 50.8573L181.601 51.6739C183.787 50.129 184.93 48.1593 184.93 45.7812H183.93ZM181.026 50.8559C179.063 52.2283 176.264 52.9453 172.57 52.9453V53.9453C176.376 53.9453 179.405 53.2092 181.599 51.6754L181.026 50.8559ZM172.57 52.9453C170.117 52.9453 167.832 52.608 165.713 51.9374L165.412 52.8908C167.637 53.5951 170.024 53.9453 172.57 53.9453V52.9453ZM165.714 51.9377C163.611 51.267 161.877 50.3283 160.5 49.1305L159.844 49.8851C161.341 51.1874 163.201 52.1861 165.411 52.8904L165.714 51.9377ZM160.485 49.8976L161.798 48.8429L161.171 48.0634L159.859 49.1181L160.485 49.8976ZM161.168 48.8403C162.51 49.9372 164.182 50.7768 166.17 51.3698L166.455 50.4115C164.568 49.8482 163.021 49.0628 161.801 48.0659L161.168 48.8403ZM166.17 51.3698C168.158 51.9631 170.293 52.2578 172.57 52.2578V51.2578C170.379 51.2578 168.342 50.9744 166.455 50.4115L166.17 51.3698ZM172.57 52.2578C175.911 52.2578 178.539 51.69 180.389 50.4898L179.845 49.6508C178.227 50.7006 175.823 51.2578 172.57 51.2578V52.2578ZM180.388 50.4909C182.248 49.2951 183.242 47.7216 183.242 45.7812H182.242C182.242 47.3096 181.487 48.5955 179.847 49.6497L180.388 50.4909ZM183.242 45.7812C183.242 43.7562 182.454 42.1593 180.871 41.0583L180.3 41.8792C181.593 42.7782 182.242 44.0563 182.242 45.7812H183.242ZM180.877 41.0619C179.332 39.9586 177.084 39.4453 174.211 39.4453V40.4453C176.994 40.4453 178.996 40.9477 180.295 41.8756L180.877 41.0619ZM174.211 42.8203H171.539V43.8203H174.211V42.8203ZM171.539 42.8203C167.265 42.8203 164.043 41.8513 161.805 39.9833L161.164 40.7511C163.644 42.8206 167.126 43.8203 171.539 43.8203V42.8203ZM161.806 39.9845C159.569 38.1029 158.445 35.5993 158.445 32.4219H157.445C157.445 35.8695 158.681 38.6627 161.163 40.7498L161.806 39.9845ZM158.445 32.4219C158.445 30.9832 158.763 29.6388 159.399 28.382L158.507 27.9305C157.799 29.33 157.445 30.8293 157.445 32.4219H158.445ZM159.399 28.382C160.036 27.1224 160.95 26.0132 162.148 25.0545L161.524 24.2736C160.222 25.3149 159.214 26.5338 158.507 27.9305L159.399 28.382ZM162.149 25.054C163.353 24.088 164.9 23.3171 166.806 22.7529L166.522 21.794C164.522 22.386 162.85 23.2089 161.523 24.2741L162.149 25.054ZM166.809 22.7518C168.708 22.1746 170.83 21.8828 173.18 21.8828V20.8828C170.748 20.8828 168.526 21.1848 166.519 21.7951L166.809 22.7518ZM173.18 21.8828C176.144 21.8828 178.907 22.3357 181.475 23.2374L181.806 22.2939C179.124 21.3518 176.247 20.8828 173.18 20.8828V21.8828ZM181.477 23.2383C184.046 24.1251 186.17 25.3652 187.861 26.9507L188.545 26.2212C186.736 24.5254 184.485 23.2186 181.804 22.293L181.477 23.2383ZM187.905 26.1843L186.546 27.1921L187.142 27.9954L188.501 26.9876L187.905 26.1843ZM187.179 27.2225C185.525 25.7308 183.468 24.5896 181.023 23.7904L180.712 24.7409C183.047 25.5041 184.975 26.5817 186.509 27.965L187.179 27.2225ZM181.025 23.7913C178.58 22.976 175.963 22.5703 173.18 22.5703V23.5703C175.865 23.5703 178.374 23.9615 180.709 24.74L181.025 23.7913ZM173.18 22.5703C168.896 22.5703 165.466 23.474 162.945 25.3403L163.54 26.1441C165.831 24.4479 169.025 23.5703 173.18 23.5703V22.5703ZM162.945 25.3403C160.429 27.2028 159.133 29.5708 159.133 32.4219H160.133C160.133 29.9292 161.243 27.8441 163.54 26.1441L162.945 25.3403ZM159.133 32.4219C159.133 35.3895 160.224 37.7798 162.413 39.5383L163.04 38.7586C161.104 37.2045 160.133 35.1105 160.133 32.4219H159.133ZM162.413 39.5383C164.597 41.2914 167.662 42.1328 171.539 42.1328V41.1328C167.792 41.1328 164.982 40.318 163.04 38.7586L162.413 39.5383ZM171.539 42.1328H174.117V41.1328H171.539V42.1328ZM174.117 42.1328C176.396 42.1328 178.022 42.4764 179.064 43.0991C180.064 43.6973 180.555 44.5686 180.555 45.7812H181.555C181.555 44.2282 180.889 43.0254 179.577 42.2408C178.306 41.4806 176.463 41.1328 174.117 41.1328V42.1328ZM180.555 45.7812C180.555 46.796 179.995 47.6857 178.674 48.4409L179.17 49.3091C180.692 48.4393 181.555 47.2665 181.555 45.7812H180.555ZM178.675 48.44C177.381 49.1732 175.368 49.5703 172.57 49.5703V50.5703C175.429 50.5703 177.65 50.1705 179.168 49.31L178.675 48.44ZM172.57 49.5703C170.624 49.5703 168.824 49.3481 167.168 48.9074L166.911 49.8738C168.66 50.3394 170.548 50.5703 172.57 50.5703V49.5703ZM167.172 48.9086C165.516 48.4528 164.173 47.8137 163.127 47.0032L162.514 47.7937C163.687 48.702 165.156 49.3909 166.906 49.8727L167.172 48.9086ZM163.126 47.7942L164.462 46.763L163.851 45.9714L162.515 47.0026L163.126 47.7942ZM163.873 46.7791C165.943 48.2036 168.864 48.8828 172.57 48.8828V47.8828C168.964 47.8828 166.275 47.2183 164.44 45.9553L163.873 46.7791ZM172.57 48.8828C174.93 48.8828 176.736 48.6419 177.929 48.1134L177.524 47.1991C176.53 47.6394 174.898 47.8828 172.57 47.8828V48.8828ZM177.929 48.1134C179.09 47.5995 179.867 46.8449 179.867 45.7812H178.867C178.867 46.2488 178.551 46.7443 177.524 47.1991L177.929 48.1134ZM179.867 45.7812C179.867 45.2634 179.755 44.7889 179.508 44.3799C179.26 43.9695 178.894 43.6569 178.439 43.4338L177.999 44.3318C178.309 44.4838 178.518 44.675 178.652 44.8975C178.788 45.1212 178.867 45.4085 178.867 45.7812H179.867ZM178.439 43.4338C177.561 43.0038 176.122 42.8203 174.211 42.8203V43.8203C176.113 43.8203 177.346 44.0119 177.999 44.3318L178.439 43.4338ZM174.211 36.0703H171.539V37.0703H174.211V36.0703ZM171.539 36.0703C169.292 36.0703 167.69 35.7269 166.665 35.1056C165.681 34.5088 165.195 33.6377 165.195 32.4219H164.195C164.195 33.9717 164.85 35.1748 166.147 35.9608C167.403 36.7223 169.224 37.0703 171.539 37.0703V36.0703ZM165.195 32.4219C165.195 31.4056 165.749 30.5247 167.051 29.7866L166.558 28.9166C165.048 29.7721 164.195 30.9382 164.195 32.4219H165.195ZM167.053 29.7857C168.362 29.0373 170.383 28.6328 173.18 28.6328V27.6328C170.32 27.6328 168.091 28.0408 166.557 28.9174L167.053 29.7857ZM173.18 28.6328C175.125 28.6328 176.923 28.8625 178.578 29.318L178.844 28.3539C177.092 27.8719 175.203 27.6328 173.18 27.6328V28.6328ZM178.582 29.3191C180.237 29.7593 181.578 30.3905 182.623 31.2L183.236 30.4094C182.062 29.5002 180.591 28.8188 178.839 28.3527L178.582 29.3191ZM182.624 30.4089L181.288 31.4401L181.899 32.2317L183.235 31.2005L182.624 30.4089ZM181.877 31.424C179.807 29.9995 176.886 29.3203 173.18 29.3203V30.3203C176.786 30.3203 179.475 30.9849 181.31 32.2479L181.877 31.424ZM173.18 29.3203C170.82 29.3203 169.014 29.5613 167.821 30.0897L168.226 31.004C169.22 30.5637 170.852 30.3203 173.18 30.3203V29.3203ZM167.821 30.0897C166.66 30.6037 165.883 31.3582 165.883 32.4219H166.883C166.883 31.9543 167.199 31.4588 168.226 31.004L167.821 30.0897ZM165.883 32.4219C165.883 32.9397 165.995 33.4142 166.242 33.8233C166.49 34.2336 166.856 34.5463 167.311 34.7693L167.751 33.8713C167.441 33.7194 167.232 33.5281 167.098 33.3057C166.962 33.0819 166.883 32.7947 166.883 32.4219H165.883ZM167.311 34.7693C168.189 35.1994 169.628 35.3828 171.539 35.3828V34.3828C169.637 34.3828 168.404 34.1913 167.751 33.8713L167.311 34.7693ZM171.539 35.3828H174.211V34.3828H171.539V35.3828ZM174.211 35.3828C178.484 35.3828 181.705 36.3593 183.944 38.242L184.587 37.4767C182.107 35.3907 178.625 34.3828 174.211 34.3828V35.3828ZM183.945 38.2433C186.181 40.1089 187.305 42.6039 187.305 45.7812H188.305C188.305 42.3336 187.069 39.5473 184.586 37.4755L183.945 38.2433ZM187.305 45.7812C187.305 47.2199 186.987 48.5643 186.351 49.8212L187.243 50.2726C187.951 48.8732 188.305 47.3738 188.305 45.7812H187.305ZM186.351 49.8212C185.714 51.0801 184.793 52.1975 183.578 53.1725L184.204 53.9525C185.52 52.8963 186.536 51.6699 187.243 50.2726L186.351 49.8212ZM183.578 53.1721C182.389 54.1232 180.849 54.8946 178.941 55.4747L179.231 56.4315C181.229 55.8242 182.892 55.0018 184.203 53.9529L183.578 53.1721ZM178.944 55.4737C177.044 56.0361 174.921 56.3203 172.57 56.3203V57.3203C175.001 57.3203 177.221 57.0264 179.228 56.4326L178.944 55.4737ZM172.57 56.3203C169.605 56.3203 166.841 55.8749 164.273 54.9883L163.946 55.9336C166.628 56.8594 169.504 57.3203 172.57 57.3203V56.3203ZM164.275 54.9892C161.705 54.0867 159.581 52.8385 157.889 51.2524L157.205 51.982C159.013 53.6771 161.264 54.9914 163.944 55.9327L164.275 54.9892ZM157.845 52.0188L159.204 51.011L158.608 50.2077L157.249 51.2155L157.845 52.0188ZM158.571 50.9807C160.224 52.4717 162.28 53.6204 164.725 54.4353L165.041 53.4866C162.704 52.7078 160.776 51.6221 159.241 50.2381L158.571 50.9807ZM164.727 54.4362C167.172 55.2352 169.788 55.6328 172.57 55.6328V54.6328C169.884 54.6328 167.374 54.2492 165.038 53.4857L164.727 54.4362ZM172.57 55.6328C176.854 55.6328 180.284 54.7292 182.805 52.8628L182.21 52.0591C179.919 53.7552 176.725 54.6328 172.57 54.6328V55.6328ZM182.805 52.8628C185.321 51.0003 186.617 48.6324 186.617 45.7812H185.617C185.617 48.2739 184.507 50.359 182.21 52.0591L182.805 52.8628ZM186.617 45.7812C186.617 42.8136 185.526 40.4233 183.337 38.6648L182.71 39.4445C184.646 40.9986 185.617 43.0926 185.617 45.7812H186.617ZM183.337 38.6648C181.153 36.9117 178.088 36.0703 174.211 36.0703V37.0703C177.958 37.0703 180.768 37.8851 182.71 39.4445L183.337 38.6648ZM197.484 31.6016V31.1016H196.984V31.6016H197.484ZM199.172 31.6016H199.672V31.1016H199.172V31.6016ZM199.172 56V56.5H199.672V56H199.172ZM197.484 56H196.984V56.5H197.484V56ZM190.734 23.9375H190.234V24.4375H190.734V23.9375ZM190.734 22.25V21.75H190.234V22.25H190.734ZM216.047 22.25H216.547V21.75H216.047V22.25ZM216.047 23.9375V24.4375H216.547V23.9375H216.047ZM190.734 27.3125H190.234V27.8125H190.734V27.3125ZM190.734 25.625V25.125H190.234V25.625H190.734ZM216.047 25.625H216.547V25.125H216.047V25.625ZM216.047 27.3125V27.8125H216.547V27.3125H216.047ZM190.734 30.6875H190.234V31.1875H190.734V30.6875ZM190.734 29V28.5H190.234V29H190.734ZM216.047 29H216.547V28.5H216.047V29ZM216.047 30.6875V31.1875H216.547V30.6875H216.047ZM207.609 31.6016V31.1016H207.109V31.6016H207.609ZM209.297 31.6016H209.797V31.1016H209.297V31.6016ZM209.297 56V56.5H209.797V56H209.297ZM207.609 56H207.109V56.5H207.609V56ZM204.234 31.6016V31.1016H203.734V31.6016H204.234ZM205.922 31.6016H206.422V31.1016H205.922V31.6016ZM205.922 56V56.5H206.422V56H205.922ZM204.234 56H203.734V56.5H204.234V56ZM200.859 31.6016V31.1016H200.359V31.6016H200.859ZM202.547 31.6016H203.047V31.1016H202.547V31.6016ZM202.547 56V56.5H203.047V56H202.547ZM200.859 56H200.359V56.5H200.859V56ZM197.484 32.1016H199.172V31.1016H197.484V32.1016ZM198.672 31.6016V56H199.672V31.6016H198.672ZM199.172 55.5H197.484V56.5H199.172V55.5ZM197.984 56V31.6016H196.984V56H197.984ZM191.234 23.9375V22.25H190.234V23.9375H191.234ZM190.734 22.75H216.047V21.75H190.734V22.75ZM215.547 22.25V23.9375H216.547V22.25H215.547ZM216.047 23.4375H190.734V24.4375H216.047V23.4375ZM191.234 27.3125V25.625H190.234V27.3125H191.234ZM190.734 26.125H216.047V25.125H190.734V26.125ZM215.547 25.625V27.3125H216.547V25.625H215.547ZM216.047 26.8125H190.734V27.8125H216.047V26.8125ZM191.234 30.6875V29H190.234V30.6875H191.234ZM190.734 29.5H216.047V28.5H190.734V29.5ZM215.547 29V30.6875H216.547V29H215.547ZM216.047 30.1875H190.734V31.1875H216.047V30.1875ZM207.609 32.1016H209.297V31.1016H207.609V32.1016ZM208.797 31.6016V56H209.797V31.6016H208.797ZM209.297 55.5H207.609V56.5H209.297V55.5ZM208.109 56V31.6016H207.109V56H208.109ZM204.234 32.1016H205.922V31.1016H204.234V32.1016ZM205.422 31.6016V56H206.422V31.6016H205.422ZM205.922 55.5H204.234V56.5H205.922V55.5ZM204.734 56V31.6016H203.734V56H204.734ZM200.859 32.1016H202.547V31.1016H200.859V32.1016ZM202.047 31.6016V56H203.047V31.6016H202.047ZM202.547 55.5H200.859V56.5H202.547V55.5ZM201.359 56V31.6016H200.359V56H201.359ZM231.07 41.6562L230.596 41.8144L230.597 41.8174L231.07 41.6562ZM232.078 43.5078L231.696 43.8302L231.701 43.8367L231.707 43.843L232.078 43.5078ZM233.625 44.7266L233.383 45.1643L233.392 45.169L233.625 44.7266ZM235.477 45.4062L235.369 45.8946L235.372 45.8952L235.477 45.4062ZM242.438 44.3047H242.938V43.4248L242.182 43.8751L242.438 44.3047ZM242.438 45.9922L242.67 46.4347L242.938 46.2942V45.9922H242.438ZM234.352 46.7422L234.2 47.2188L234.209 47.2215L234.352 46.7422ZM231.656 45.3594L231.351 45.7557L231.356 45.7594L231.656 45.3594ZM229.711 42.8281L229.252 43.0275L229.256 43.0355L229.711 42.8281ZM243.07 33.5234L242.732 33.8916L242.736 33.8951L243.07 33.5234ZM245.414 56V56.5H245.914V56H245.414ZM243.727 56H243.227V56.5H243.727V56ZM248.789 56V56.5H249.289V56H248.789ZM247.102 56H246.602V56.5H247.102V56ZM244.266 32.3281L243.926 32.6951L243.928 32.6966L244.266 32.3281ZM228.141 43.4141L227.682 43.6124L227.684 43.6171L228.141 43.4141ZM233.602 48.2188L233.438 48.6912L233.445 48.6937L233.602 48.2188ZM242.438 47.5625H242.938V46.7429L242.209 47.1179L242.438 47.5625ZM242.438 49.3438L242.63 49.8053L242.938 49.6771V49.3438H242.438ZM232.922 49.6953L232.757 50.1675L232.762 50.1691L232.922 49.6953ZM229.195 47.6094L228.873 47.9917L228.876 47.9944L229.195 47.6094ZM226.594 44.0469L226.135 44.2468L226.136 44.2486L226.594 44.0469ZM228.891 31.2734L229.243 31.6283L229.243 31.6283L228.891 31.2734ZM243.023 29.3984L242.794 29.8425L242.797 29.8441L243.023 29.3984ZM252.164 56V56.5H252.664V56H252.164ZM250.477 56H249.977V56.5H250.477V56ZM248.695 32.5156L248.264 32.7684L248.266 32.7724L248.695 32.5156ZM230.344 28.0625L230.591 28.4968L230.594 28.4954L230.344 28.0625ZM225.633 32.7031L225.201 32.451L225.2 32.4536L225.633 32.7031ZM225.047 44.6094L224.59 44.812L224.591 44.8137L225.047 44.6094ZM228 48.6641L227.674 49.043L227.676 49.045L228 48.6641ZM232.242 51.1484L232.066 51.6164L232.072 51.6185L232.242 51.1484ZM242.438 50.9141H242.938V50.154L242.24 50.4549L242.438 50.9141ZM242.438 52.625L242.614 53.0928L242.938 52.9708V52.625H242.438ZM223.383 33.5L222.923 33.3025L222.922 33.306L223.383 33.5ZM244.711 26.4688L244.477 26.9104L244.478 26.9111L244.711 26.4688ZM255.539 56V56.5H256.039V56H255.539ZM253.852 56H253.352V56.5H253.852V56ZM251.625 30.8281L251.194 31.0822L251.195 31.0834L251.625 30.8281ZM245.555 25.0156L245.318 25.4562L245.32 25.4573L245.555 25.0156ZM222.727 31.0391L223.159 31.2907L223.159 31.2895L222.727 31.0391ZM221.484 44.6094L221.013 44.7751L221.014 44.7802L221.484 44.6094ZM224.016 49.1328L223.629 49.4497L223.633 49.4547L224.016 49.1328ZM227.719 52.4609L227.439 52.8755L227.441 52.877L227.719 52.4609ZM232.242 54.5938L232.098 55.0725L232.099 55.0729L232.242 54.5938ZM242.438 54.2656H242.938V53.5102L242.242 53.8054L242.438 54.2656ZM242.438 56L242.603 56.4717L242.938 56.3543V56H242.438ZM231.562 56.1172L231.42 56.5963L231.421 56.5967L231.562 56.1172ZM226.594 53.7734L226.31 54.1849L226.314 54.1882L226.594 53.7734ZM222.586 50.0938L222.198 50.4092L222.2 50.4113L222.586 50.0938ZM220.242 32.2109L219.781 32.0179L219.78 32.0196L220.242 32.2109ZM223.992 26.5156L223.637 26.1638L223.637 26.1638L223.992 26.5156ZM229.758 22.6953L229.95 23.1569L229.952 23.1562L229.758 22.6953ZM250.195 26.2344L249.852 26.5981L249.854 26.5998L250.195 26.2344ZM244.227 39.3594C244.227 37.2494 243.571 35.5531 242.207 34.3445L241.543 35.093C242.648 36.0719 243.227 37.4694 243.227 39.3594H244.227ZM242.207 34.3445C240.861 33.1521 239.1 32.5781 236.977 32.5781V33.5781C238.915 33.5781 240.42 34.0979 241.543 35.093L242.207 34.3445ZM236.977 32.5781C235.129 32.5781 233.528 33.2216 232.2 34.499L232.894 35.2197C234.034 34.1222 235.387 33.5781 236.977 33.5781V32.5781ZM232.2 34.499C230.872 35.7772 230.195 37.3276 230.195 39.125H231.195C231.195 37.6099 231.753 36.3166 232.894 35.2197L232.2 34.499ZM230.195 39.125C230.195 40.1065 230.326 41.0049 230.596 41.8144L231.545 41.4981C231.314 40.8076 231.195 40.0185 231.195 39.125H230.195ZM230.597 41.8174C230.863 42.6 231.227 43.2747 231.696 43.8302L232.46 43.1854C232.085 42.741 231.777 42.1812 231.544 41.4951L230.597 41.8174ZM231.707 43.843C232.183 44.37 232.743 44.8104 233.383 45.1642L233.867 44.2889C233.32 43.9865 232.848 43.6144 232.449 43.1727L231.707 43.843ZM233.392 45.169C234.025 45.5019 234.684 45.7442 235.369 45.8946L235.584 44.9179C234.988 44.7871 234.413 44.5762 233.858 44.2841L233.392 45.169ZM235.372 45.8952C236.066 46.0438 236.796 46.1172 237.562 46.1172V45.1172C236.86 45.1172 236.2 45.0499 235.581 44.9173L235.372 45.8952ZM237.562 46.1172C239.43 46.1172 241.144 45.6573 242.693 44.7342L242.182 43.8751C240.794 44.7021 239.258 45.1172 237.562 45.1172V46.1172ZM241.938 44.3047V45.9922H242.938V44.3047H241.938ZM242.205 45.5496C240.763 46.3079 239.21 46.6875 237.539 46.6875V47.6875C239.368 47.6875 241.081 47.2702 242.67 46.4347L242.205 45.5496ZM237.539 46.6875C236.458 46.6875 235.444 46.545 234.494 46.2629L234.209 47.2215C235.259 47.5331 236.37 47.6875 237.539 47.6875V46.6875ZM234.503 46.2656C233.571 45.9698 232.723 45.5345 231.956 44.9594L231.356 45.7594C232.214 46.403 233.163 46.8896 234.2 47.2187L234.503 46.2656ZM231.961 44.9631C231.222 44.3944 230.621 43.6194 230.166 42.6207L229.256 43.0355C229.769 44.1619 230.466 45.0744 231.351 45.7557L231.961 44.9631ZM230.169 42.6288C229.733 41.624 229.508 40.4594 229.508 39.125H228.508C228.508 40.5719 228.752 41.876 229.252 43.0275L230.169 42.6288ZM229.508 39.125C229.508 37.1006 230.213 35.4009 231.633 33.996L230.93 33.2852C229.318 34.8803 228.508 36.8369 228.508 39.125H229.508ZM231.633 33.996C233.048 32.5956 234.817 31.8906 236.977 31.8906V30.8906C234.574 30.8906 232.546 31.6857 230.93 33.2852L231.633 33.996ZM236.977 31.8906C239.393 31.8906 241.295 32.5707 242.732 33.8916L243.409 33.1553C241.752 31.6325 239.591 30.8906 236.977 30.8906V31.8906ZM242.736 33.8951C244.174 35.1892 244.914 36.9885 244.914 39.3594H245.914C245.914 36.7615 245.092 34.6702 243.405 33.1518L242.736 33.8951ZM244.914 39.3594V56H245.914V39.3594H244.914ZM245.414 55.5H243.727V56.5H245.414V55.5ZM244.227 56V39.3594H243.227V56H244.227ZM248.289 39.3594V56H249.289V39.3594H248.289ZM248.789 55.5H247.102V56.5H248.789V55.5ZM247.602 56V39.3594H246.602V56H247.602ZM247.602 39.3594C247.602 36.2894 246.613 33.8034 244.604 31.9597L243.928 32.6966C245.699 34.3216 246.602 36.5231 246.602 39.3594H247.602ZM244.605 31.9612C242.608 30.1126 240.051 29.2031 236.977 29.2031V30.2031C239.84 30.2031 242.142 31.0436 243.926 32.6951L244.605 31.9612ZM236.977 29.2031C234.103 29.2031 231.677 30.1634 229.734 32.0903L230.438 32.8004C232.182 31.071 234.35 30.2031 236.977 30.2031V29.2031ZM229.734 32.0903C227.794 34.0134 226.82 36.3683 226.82 39.125H227.82C227.82 36.6317 228.69 34.5334 230.438 32.8004L229.734 32.0903ZM226.82 39.125C226.82 40.7764 227.104 42.2751 227.682 43.6124L228.6 43.2157C228.084 42.0217 227.82 40.6611 227.82 39.125H226.82ZM227.684 43.6171C228.274 44.9458 229.078 46.0289 230.101 46.8507L230.727 46.0711C229.844 45.3617 229.132 44.4136 228.598 43.211L227.684 43.6171ZM230.101 46.8507C231.099 47.6528 232.213 48.2664 233.438 48.6912L233.765 47.7463C232.647 47.3586 231.635 46.8003 230.727 46.0711L230.101 46.8507ZM233.445 48.6937C234.686 49.1017 235.989 49.3047 237.352 49.3047V48.3047C236.089 48.3047 234.892 48.117 233.758 47.7438L233.445 48.6937ZM237.352 49.3047C239.214 49.3047 240.987 48.871 242.666 48.0071L242.209 47.1179C240.669 47.9102 239.052 48.3047 237.352 48.3047V49.3047ZM241.938 47.5625V49.3438H242.938V47.5625H241.938ZM242.245 48.8822C240.615 49.5616 238.977 49.8984 237.328 49.8984V50.8984C239.117 50.8984 240.885 50.5322 242.63 49.8053L242.245 48.8822ZM237.328 49.8984C235.833 49.8984 234.418 49.672 233.082 49.2215L232.762 50.1691C234.207 50.6561 235.73 50.8984 237.328 50.8984V49.8984ZM233.086 49.2231C231.75 48.7576 230.56 48.0912 229.514 47.2244L228.876 47.9944C230.018 48.9401 231.313 49.6642 232.757 50.1675L233.086 49.2231ZM229.518 47.2271C228.488 46.3595 227.665 45.2363 227.051 43.8452L226.136 44.2486C226.804 45.7637 227.715 47.0155 228.873 47.9917L229.518 47.2271ZM227.052 43.847C226.442 42.449 226.133 40.8777 226.133 39.125H225.133C225.133 40.9973 225.464 42.7072 226.135 44.2468L227.052 43.847ZM226.133 39.125C226.133 36.1792 227.166 33.69 229.243 31.6283L228.538 30.9186C226.271 33.1693 225.133 35.9145 225.133 39.125H226.133ZM229.243 31.6283C231.333 29.5534 233.9 28.5156 236.977 28.5156V27.5156C233.647 27.5156 230.824 28.6498 228.538 30.9186L229.243 31.6283ZM236.977 28.5156C239.156 28.5156 241.092 28.9617 242.794 29.8425L243.253 28.9544C241.393 27.9915 239.297 27.5156 236.977 27.5156V28.5156ZM242.797 29.8441C244.493 30.707 245.829 31.961 246.812 33.6149L247.672 33.1038C246.593 31.289 245.117 29.9024 243.25 28.9528L242.797 29.8441ZM246.812 33.6149C247.792 35.2628 248.289 37.1724 248.289 39.3594H249.289C249.289 37.0151 248.755 34.9247 247.672 33.1038L246.812 33.6149ZM251.664 39.3594V56H252.664V39.3594H251.664ZM252.164 55.5H250.477V56.5H252.164V55.5ZM250.977 56V39.3594H249.977V56H250.977ZM250.977 39.3594C250.977 36.7015 250.363 34.3295 249.124 32.2589L248.266 32.7724C249.402 34.6705 249.977 36.861 249.977 39.3594H250.977ZM249.127 32.2629C247.907 30.1819 246.229 28.5843 244.098 27.4781L243.637 28.3656C245.6 29.3845 247.139 30.8493 248.264 32.7684L249.127 32.2629ZM244.098 27.4781C241.972 26.3749 239.595 25.8281 236.977 25.8281V26.8281C239.452 26.8281 241.668 27.3438 243.637 28.3656L244.098 27.4781ZM236.977 25.8281C234.471 25.8281 232.173 26.4272 230.093 27.6296L230.594 28.4954C232.514 27.3853 234.638 26.8281 236.977 26.8281V25.8281ZM230.096 27.6282C228.021 28.8115 226.387 30.4206 225.201 32.451L226.065 32.9553C227.16 31.0794 228.666 29.5948 230.591 28.4968L230.096 27.6282ZM225.2 32.4536C224.029 34.4852 223.445 36.7123 223.445 39.125H224.445C224.445 36.8815 224.986 34.8273 226.066 32.9527L225.2 32.4536ZM223.445 39.125C223.445 41.1875 223.824 43.0856 224.59 44.812L225.504 44.4067C224.801 42.8206 224.445 41.0625 224.445 39.125H223.445ZM224.591 44.8137C225.352 46.5143 226.378 47.9278 227.674 49.043L228.326 48.2851C227.153 47.2753 226.211 45.9857 225.503 44.4051L224.591 44.8137ZM227.676 49.045C228.971 50.1459 230.436 51.003 232.066 51.6164L232.418 50.6805C230.893 50.1064 229.529 49.3073 228.324 48.2831L227.676 49.045ZM232.072 51.6185C233.721 52.2169 235.443 52.5156 237.234 52.5156V51.5156C235.557 51.5156 233.951 51.2362 232.413 50.6784L232.072 51.6185ZM237.234 52.5156C239.072 52.5156 240.873 52.1331 242.635 51.3732L242.24 50.4549C240.596 51.1637 238.928 51.5156 237.234 51.5156V52.5156ZM241.938 50.9141V52.625H242.938V50.9141H241.938ZM242.261 52.1572C240.53 52.8101 238.84 53.1328 237.188 53.1328V54.1328C238.973 54.1328 240.782 53.7837 242.614 53.0928L242.261 52.1572ZM237.188 53.1328C234.563 53.1328 232.16 52.5419 229.971 51.3644L229.498 52.245C231.84 53.5049 234.406 54.1328 237.188 54.1328V53.1328ZM229.971 51.3644C227.785 50.1885 226.038 48.5248 224.724 46.3651L223.87 46.8849C225.275 49.1939 227.152 50.9834 229.498 52.245L229.971 51.3644ZM224.724 46.3651C223.415 44.2136 222.758 41.8045 222.758 39.125H221.758C221.758 41.9768 222.46 44.5676 223.87 46.8849L224.724 46.3651ZM222.758 39.125C222.758 37.2216 223.12 35.4127 223.844 33.694L222.922 33.306C222.146 35.1498 221.758 37.0909 221.758 39.125H222.758ZM223.842 33.6974C224.584 31.9711 225.581 30.4859 226.83 29.2364L226.123 28.5293C224.779 29.8735 223.713 31.4664 222.923 33.3026L223.842 33.6974ZM226.83 29.2364C228.077 27.9891 229.584 26.9928 231.357 26.2503L230.971 25.3279C229.088 26.1166 227.469 27.1828 226.123 28.5293L226.83 29.2364ZM231.357 26.2503C233.122 25.5111 234.994 25.1406 236.977 25.1406V24.1406C234.866 24.1406 232.862 24.5358 230.971 25.3279L231.357 26.2503ZM236.977 25.1406C239.763 25.1406 242.259 25.7337 244.477 26.9104L244.945 26.0271C242.569 24.7663 239.909 24.1406 236.977 24.1406V25.1406ZM244.478 26.9111C246.708 28.0866 248.46 29.7559 249.741 31.926L250.602 31.4177C249.227 29.0879 247.339 27.2884 244.944 26.0264L244.478 26.9111ZM249.741 31.926C251.019 34.0915 251.664 36.5645 251.664 39.3594H252.664C252.664 36.4042 251.981 33.7523 250.602 31.4177L249.741 31.926ZM255.039 39.3594V56H256.039V39.3594H255.039ZM255.539 55.5H253.852V56.5H255.539V55.5ZM254.352 56V39.3594H253.352V56H254.352ZM254.352 39.3594C254.352 36.091 253.59 33.1574 252.055 30.5729L251.195 31.0834C252.629 33.4988 253.352 36.2528 253.352 39.3594H254.352ZM252.056 30.574C250.525 27.9795 248.434 25.9768 245.789 24.5739L245.32 25.4573C247.801 26.7732 249.757 28.6455 251.194 31.0822L252.056 30.574ZM245.791 24.575C243.15 23.1578 240.208 22.4531 236.977 22.4531V23.4531C240.058 23.4531 242.835 24.1235 245.318 25.4562L245.791 24.575ZM236.977 22.4531C233.861 22.4531 231.001 23.2009 228.406 24.6999L228.906 25.5658C231.343 24.1585 234.029 23.4531 236.977 23.4531V22.4531ZM228.406 24.6999C225.816 26.1962 223.776 28.2275 222.294 30.7887L223.159 31.2895C224.552 28.8819 226.466 26.9757 228.906 25.5658L228.406 24.6999ZM222.294 30.7875C220.811 33.3344 220.07 36.1168 220.07 39.125H221.07C221.07 36.2894 221.767 33.6812 223.159 31.2907L222.294 30.7875ZM220.07 39.125C220.07 41.1007 220.384 42.9851 221.013 44.7751L221.956 44.4436C221.366 42.7649 221.07 40.993 221.07 39.125H220.07ZM221.014 44.7802C221.657 46.5472 222.528 48.1054 223.629 49.4497L224.402 48.8159C223.379 47.5665 222.562 46.1091 221.954 44.4385L221.014 44.7802ZM223.633 49.4547C224.741 50.7716 226.01 51.9121 227.439 52.8755L227.998 52.0464C226.646 51.1348 225.446 50.0566 224.398 48.8109L223.633 49.4547ZM227.441 52.877C228.888 53.8417 230.441 54.5739 232.098 55.0725L232.386 54.115C230.824 53.6449 229.362 52.9552 227.996 52.0449L227.441 52.877ZM232.099 55.0729C233.77 55.5709 235.467 55.8203 237.188 55.8203V54.8203C235.565 54.8203 233.964 54.5853 232.385 54.1146L232.099 55.0729ZM237.188 55.8203C239.097 55.8203 240.913 55.4559 242.633 54.7259L242.242 53.8054C240.649 54.4816 238.966 54.8203 237.188 54.8203V55.8203ZM241.938 54.2656V56H242.938V54.2656H241.938ZM242.272 55.5283C240.59 56.119 238.865 56.4141 237.094 56.4141V57.4141C238.979 57.4141 240.816 57.0997 242.603 56.4717L242.272 55.5283ZM237.094 56.4141C235.251 56.4141 233.455 56.1552 231.704 55.6377L231.421 56.5967C233.264 57.1417 235.156 57.4141 237.094 57.4141V56.4141ZM231.705 55.638C229.972 55.1211 228.362 54.3615 226.873 53.3587L226.314 54.1882C227.888 55.2479 229.59 56.0507 231.42 56.5963L231.705 55.638ZM226.878 53.362C225.4 52.3412 224.099 51.1462 222.972 49.7762L222.2 50.4113C223.386 51.8538 224.756 53.1119 226.31 54.1848L226.878 53.362ZM222.974 49.7783C221.858 48.4056 220.978 46.8017 220.34 44.961L219.395 45.289C220.068 47.2296 221.002 48.9382 222.198 50.4092L222.974 49.7783ZM220.34 44.961C219.703 43.1259 219.383 41.1816 219.383 39.125H218.383C218.383 41.2872 218.719 43.3428 219.395 45.289L220.34 44.961ZM219.383 39.125C219.383 36.7673 219.824 34.5277 220.704 32.4023L219.78 32.0196C218.848 34.2691 218.383 36.639 218.383 39.125H219.383ZM220.703 32.4039C221.602 30.2562 222.817 28.4126 224.347 26.8674L223.637 26.1638C222.011 27.8061 220.726 29.7594 219.781 32.0179L220.703 32.4039ZM224.347 26.8674C225.891 25.3085 227.756 24.0709 229.95 23.1569L229.566 22.2338C227.259 23.1948 225.281 24.504 223.637 26.1638L224.347 26.8674ZM229.952 23.1562C232.154 22.2298 234.494 21.7656 236.977 21.7656V20.7656C234.366 20.7656 231.893 21.2546 229.564 22.2344L229.952 23.1562ZM236.977 21.7656C239.513 21.7656 241.889 22.1932 244.11 23.045L244.468 22.1113C242.126 21.213 239.628 20.7656 236.977 20.7656V21.7656ZM244.11 23.045C246.335 23.8983 248.247 25.0835 249.852 26.5981L250.538 25.8707C248.831 24.2602 246.806 23.0079 244.468 22.1113L244.11 23.045ZM249.854 26.5998C251.455 28.0946 252.72 29.9408 253.648 32.147L254.57 31.7592C253.592 29.4342 252.249 27.4679 250.537 25.869L249.854 26.5998ZM253.648 32.147C254.574 34.3461 255.039 36.7482 255.039 39.3594H256.039C256.039 36.6268 255.551 34.0914 254.57 31.7592L253.648 32.147Z" fill="#0097E6"/>
-    </g>
-    <defs>
-        <filter id="filter0_di" x="0.679688" y="16.6875" width="259.359" height="48.7266" filterUnits="userSpaceOnUse" color-interpolation-filters="sRGB">
-            <feFlood flood-opacity="0" result="BackgroundImageFix"/>
-            <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0"/>
-            <feOffset dy="4"/>
-            <feGaussianBlur stdDeviation="2"/>
-            <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0"/>
-            <feBlend mode="normal" in2="BackgroundImageFix" result="effect1_dropShadow"/>
-            <feBlend mode="normal" in="SourceGraphic" in2="effect1_dropShadow" result="shape"/>
-            <feColorMatrix in="SourceAlpha" type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127 0" result="hardAlpha"/>
-            <feOffset dy="2"/>
-            <feGaussianBlur stdDeviation="2"/>
-            <feComposite in2="hardAlpha" operator="arithmetic" k2="-1" k3="1"/>
-            <feColorMatrix type="matrix" values="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0"/>
-            <feBlend mode="normal" in2="shape" result="effect2_innerShadow"/>
-        </filter>
-    </defs>
-</svg>
diff --git a/rust/ballista/ui/scheduler/src/index.css b/rust/ballista/ui/scheduler/src/index.css
deleted file mode 100644
index ef9298e7e96..00000000000
--- a/rust/ballista/ui/scheduler/src/index.css
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
-*/
-
-@import url("https://fonts.googleapis.com/css2?family=Poppins:wght@100;400;600&display=swap");
-
-body {
-  margin: 0;
-  font-family: "Poppins", sans-serif;
-  -webkit-font-smoothing: antialiased;
-  -moz-osx-font-smoothing: grayscale;
-}
-
-code {
-  font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New",
-    monospace;
-}
diff --git a/rust/ballista/ui/scheduler/src/index.tsx b/rust/ballista/ui/scheduler/src/index.tsx
deleted file mode 100644
index 2d03fe6f004..00000000000
--- a/rust/ballista/ui/scheduler/src/index.tsx
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import React from "react";
-import ReactDOM from "react-dom";
-import { ChakraProvider } from "@chakra-ui/react";
-
-import "./index.css";
-import App from "./App";
-import reportWebVitals from "./reportWebVitals";
-
-ReactDOM.render(
-  <React.StrictMode>
-    <ChakraProvider>
-      <App />
-    </ChakraProvider>
-  </React.StrictMode>,
-  document.getElementById("root")
-);
-
-// If you want to start measuring performance in your app, pass a function
-// to log results (for example: reportWebVitals(console.log))
-// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
-reportWebVitals();
diff --git a/rust/ballista/ui/scheduler/src/react-app-env.d.ts b/rust/ballista/ui/scheduler/src/react-app-env.d.ts
deleted file mode 100644
index 52130497608..00000000000
--- a/rust/ballista/ui/scheduler/src/react-app-env.d.ts
+++ /dev/null
@@ -1,18 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// <reference types="react-scripts" />
diff --git a/rust/ballista/ui/scheduler/src/reportWebVitals.ts b/rust/ballista/ui/scheduler/src/reportWebVitals.ts
deleted file mode 100644
index 7bb3e76d1aa..00000000000
--- a/rust/ballista/ui/scheduler/src/reportWebVitals.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { ReportHandler } from "web-vitals";
-
-const reportWebVitals = (onPerfEntry?: ReportHandler) => {
-  if (onPerfEntry && onPerfEntry instanceof Function) {
-    import("web-vitals").then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
-      getCLS(onPerfEntry);
-      getFID(onPerfEntry);
-      getFCP(onPerfEntry);
-      getLCP(onPerfEntry);
-      getTTFB(onPerfEntry);
-    });
-  }
-};
-
-export default reportWebVitals;
diff --git a/rust/ballista/ui/scheduler/src/setupTests.ts b/rust/ballista/ui/scheduler/src/setupTests.ts
deleted file mode 100644
index 48482da51ce..00000000000
--- a/rust/ballista/ui/scheduler/src/setupTests.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// jest-dom adds custom jest matchers for asserting on DOM nodes.
-// allows you to do things like:
-// expect(element).toHaveTextContent(/react/i)
-// learn more: https://github.com/testing-library/jest-dom
-import "@testing-library/jest-dom";
diff --git a/rust/ballista/ui/scheduler/tsconfig.json b/rust/ballista/ui/scheduler/tsconfig.json
deleted file mode 100644
index 6116bcd2125..00000000000
--- a/rust/ballista/ui/scheduler/tsconfig.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es5",
-    "lib": [
-      "dom",
-      "dom.iterable",
-      "esnext"
-    ],
-    "allowJs": true,
-    "skipLibCheck": true,
-    "esModuleInterop": true,
-    "allowSyntheticDefaultImports": true,
-    "strict": true,
-    "forceConsistentCasingInFileNames": true,
-    "noFallthroughCasesInSwitch": true,
-    "module": "esnext",
-    "moduleResolution": "node",
-    "resolveJsonModule": true,
-    "isolatedModules": true,
-    "noEmit": true,
-    "jsx": "react-jsx"
-  },
-  "include": [
-    "src",
-    "index.d.ts",
-    "react-table-config.d.ts"
-  ]
-}
diff --git a/rust/ballista/ui/scheduler/yarn.lock b/rust/ballista/ui/scheduler/yarn.lock
deleted file mode 100644
index f2ea84b87bc..00000000000
--- a/rust/ballista/ui/scheduler/yarn.lock
+++ /dev/null
@@ -1,12431 +0,0 @@
-# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
-# yarn lockfile v1
-
-
-"@babel/code-frame@7.10.4":
-  version "7.10.4"
-  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.10.4.tgz#168da1a36e90da68ae8d49c0f1b48c7c6249213a"
-  integrity sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==
-  dependencies:
-    "@babel/highlight" "^7.10.4"
-
-"@babel/code-frame@7.12.11":
-  version "7.12.11"
-  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.11.tgz#f4ad435aa263db935b8f10f2c552d23fb716a63f"
-  integrity sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==
-  dependencies:
-    "@babel/highlight" "^7.10.4"
-
-"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.10.4", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.5.5":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.13.tgz#dcfc826beef65e75c50e21d3837d7d95798dd658"
-  integrity sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==
-  dependencies:
-    "@babel/highlight" "^7.12.13"
-
-"@babel/compat-data@^7.12.1", "@babel/compat-data@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.12.13.tgz#27e19e0ed3726ccf54067ced4109501765e7e2e8"
-  integrity sha512-U/hshG5R+SIoW7HVWIdmy1cB7s3ki+r3FpyEZiCgpi4tFgPnX/vynY80ZGSASOIrUM6O7VxOgCZgdt7h97bUGg==
-
-"@babel/core@7.12.3":
-  version "7.12.3"
-  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.12.3.tgz#1b436884e1e3bff6fb1328dc02b208759de92ad8"
-  integrity sha512-0qXcZYKZp3/6N2jKYVxZv0aNCsxTSVCiK72DTiTYZAu7sjg73W0/aynWjMbiGd87EQL4WyA8reiJVh92AVla9g==
-  dependencies:
-    "@babel/code-frame" "^7.10.4"
-    "@babel/generator" "^7.12.1"
-    "@babel/helper-module-transforms" "^7.12.1"
-    "@babel/helpers" "^7.12.1"
-    "@babel/parser" "^7.12.3"
-    "@babel/template" "^7.10.4"
-    "@babel/traverse" "^7.12.1"
-    "@babel/types" "^7.12.1"
-    convert-source-map "^1.7.0"
-    debug "^4.1.0"
-    gensync "^1.0.0-beta.1"
-    json5 "^2.1.2"
-    lodash "^4.17.19"
-    resolve "^1.3.2"
-    semver "^5.4.1"
-    source-map "^0.5.0"
-
-"@babel/core@^7.1.0", "@babel/core@^7.12.3", "@babel/core@^7.7.5", "@babel/core@^7.8.4":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.12.17.tgz#993c5e893333107a2815d8e0d73a2c3755e280b2"
-  integrity sha512-V3CuX1aBywbJvV2yzJScRxeiiw0v2KZZYYE3giywxzFJL13RiyPjaaDwhDnxmgFTTS7FgvM2ijr4QmKNIu0AtQ==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.12.17"
-    "@babel/helper-module-transforms" "^7.12.17"
-    "@babel/helpers" "^7.12.17"
-    "@babel/parser" "^7.12.17"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.12.17"
-    "@babel/types" "^7.12.17"
-    convert-source-map "^1.7.0"
-    debug "^4.1.0"
-    gensync "^1.0.0-beta.1"
-    json5 "^2.1.2"
-    lodash "^4.17.19"
-    semver "^5.4.1"
-    source-map "^0.5.0"
-
-"@babel/generator@^7.12.1", "@babel/generator@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.12.17.tgz#9ef1dd792d778b32284411df63f4f668a9957287"
-  integrity sha512-DSA7ruZrY4WI8VxuS1jWSRezFnghEoYEFrZcw9BizQRmOZiUsiHl59+qEARGPqPikwA/GPTyRCi7isuCK/oyqg==
-  dependencies:
-    "@babel/types" "^7.12.17"
-    jsesc "^2.5.1"
-    source-map "^0.5.0"
-
-"@babel/helper-annotate-as-pure@^7.10.4", "@babel/helper-annotate-as-pure@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.12.13.tgz#0f58e86dfc4bb3b1fcd7db806570e177d439b6ab"
-  integrity sha512-7YXfX5wQ5aYM/BOlbSccHDbuXXFPxeoUmfWtz8le2yTkTZc+BxsiEnENFoi2SlmA8ewDkG2LgIMIVzzn2h8kfw==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-builder-binary-assignment-operator-visitor@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-builder-binary-assignment-operator-visitor/-/helper-builder-binary-assignment-operator-visitor-7.12.13.tgz#6bc20361c88b0a74d05137a65cac8d3cbf6f61fc"
-  integrity sha512-CZOv9tGphhDRlVjVkAgm8Nhklm9RzSmWpX2my+t7Ua/KT616pEzXsQCjinzvkRvHWJ9itO4f296efroX23XCMA==
-  dependencies:
-    "@babel/helper-explode-assignable-expression" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-compilation-targets@^7.12.1", "@babel/helper-compilation-targets@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.12.17.tgz#91d83fae61ef390d39c3f0507cb83979bab837c7"
-  integrity sha512-5EkibqLVYOuZ89BSg2lv+GG8feywLuvMXNYgf0Im4MssE0mFWPztSpJbildNnUgw0bLI2EsIN4MpSHC2iUJkQA==
-  dependencies:
-    "@babel/compat-data" "^7.12.13"
-    "@babel/helper-validator-option" "^7.12.17"
-    browserslist "^4.14.5"
-    semver "^5.5.0"
-
-"@babel/helper-create-class-features-plugin@^7.12.1", "@babel/helper-create-class-features-plugin@^7.12.13", "@babel/helper-create-class-features-plugin@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-create-class-features-plugin/-/helper-create-class-features-plugin-7.12.17.tgz#704b69c8a78d03fb1c5fcc2e7b593f8a65628944"
-  integrity sha512-I/nurmTxIxHV0M+rIpfQBF1oN342+yvl2kwZUrQuOClMamHF1w5tknfZubgNOLRoA73SzBFAdFcpb4M9HwOeWQ==
-  dependencies:
-    "@babel/helper-function-name" "^7.12.13"
-    "@babel/helper-member-expression-to-functions" "^7.12.17"
-    "@babel/helper-optimise-call-expression" "^7.12.13"
-    "@babel/helper-replace-supers" "^7.12.13"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-
-"@babel/helper-create-regexp-features-plugin@^7.12.13":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-create-regexp-features-plugin/-/helper-create-regexp-features-plugin-7.12.17.tgz#a2ac87e9e319269ac655b8d4415e94d38d663cb7"
-  integrity sha512-p2VGmBu9oefLZ2nQpgnEnG0ZlRPvL8gAGvPUMQwUdaE8k49rOMuZpOwdQoy5qJf6K8jL3bcAMhVUlHAjIgJHUg==
-  dependencies:
-    "@babel/helper-annotate-as-pure" "^7.12.13"
-    regexpu-core "^4.7.1"
-
-"@babel/helper-explode-assignable-expression@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-explode-assignable-expression/-/helper-explode-assignable-expression-7.12.13.tgz#0e46990da9e271502f77507efa4c9918d3d8634a"
-  integrity sha512-5loeRNvMo9mx1dA/d6yNi+YiKziJZFylZnCo1nmFF4qPU4yJ14abhWESuSMQSlQxWdxdOFzxXjk/PpfudTtYyw==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-function-name@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.12.13.tgz#93ad656db3c3c2232559fd7b2c3dbdcbe0eb377a"
-  integrity sha512-TZvmPn0UOqmvi5G4vvw0qZTpVptGkB1GL61R6lKvrSdIxGm5Pky7Q3fpKiIkQCAtRCBUwB0PaThlx9vebCDSwA==
-  dependencies:
-    "@babel/helper-get-function-arity" "^7.12.13"
-    "@babel/template" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-get-function-arity@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz#bc63451d403a3b3082b97e1d8b3fe5bd4091e583"
-  integrity sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-hoist-variables@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.12.13.tgz#13aba58b7480b502362316ea02f52cca0e9796cd"
-  integrity sha512-KSC5XSj5HreRhYQtZ3cnSnQwDzgnbdUDEFsxkN0m6Q3WrCRt72xrnZ8+h+pX7YxM7hr87zIO3a/v5p/H3TrnVw==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-member-expression-to-functions@^7.12.13", "@babel/helper-member-expression-to-functions@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.12.17.tgz#f82838eb06e1235307b6d71457b6670ff71ee5ac"
-  integrity sha512-Bzv4p3ODgS/qpBE0DiJ9qf5WxSmrQ8gVTe8ClMfwwsY2x/rhykxxy3bXzG7AGTnPB2ij37zGJ/Q/6FruxHxsxg==
-  dependencies:
-    "@babel/types" "^7.12.17"
-
-"@babel/helper-module-imports@^7.0.0", "@babel/helper-module-imports@^7.12.1", "@babel/helper-module-imports@^7.12.13", "@babel/helper-module-imports@^7.7.0":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.12.13.tgz#ec67e4404f41750463e455cc3203f6a32e93fcb0"
-  integrity sha512-NGmfvRp9Rqxy0uHSSVP+SRIW1q31a7Ji10cLBcqSDUngGentY4FRiHOFZFE1CLU5eiL0oE8reH7Tg1y99TDM/g==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-module-transforms@^7.12.1", "@babel/helper-module-transforms@^7.12.13", "@babel/helper-module-transforms@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.12.17.tgz#7c75b987d6dfd5b48e575648f81eaac891539509"
-  integrity sha512-sFL+p6zOCQMm9vilo06M4VHuTxUAwa6IxgL56Tq1DVtA0ziAGTH1ThmJq7xwPqdQlgAbKX3fb0oZNbtRIyA5KQ==
-  dependencies:
-    "@babel/helper-module-imports" "^7.12.13"
-    "@babel/helper-replace-supers" "^7.12.13"
-    "@babel/helper-simple-access" "^7.12.13"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/helper-validator-identifier" "^7.12.11"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.12.17"
-    "@babel/types" "^7.12.17"
-    lodash "^4.17.19"
-
-"@babel/helper-optimise-call-expression@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz#5c02d171b4c8615b1e7163f888c1c81c30a2aaea"
-  integrity sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.8.0", "@babel/helper-plugin-utils@^7.8.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.12.13.tgz#174254d0f2424d8aefb4dd48057511247b0a9eeb"
-  integrity sha512-C+10MXCXJLiR6IeG9+Wiejt9jmtFpxUc3MQqCmPY8hfCjyUGl9kT+B2okzEZrtykiwrc4dbCPdDoz0A/HQbDaA==
-
-"@babel/helper-remap-async-to-generator@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-remap-async-to-generator/-/helper-remap-async-to-generator-7.12.13.tgz#170365f4140e2d20e5c88f8ba23c24468c296878"
-  integrity sha512-Qa6PU9vNcj1NZacZZI1Mvwt+gXDH6CTfgAkSjeRMLE8HxtDK76+YDId6NQR+z7Rgd5arhD2cIbS74r0SxD6PDA==
-  dependencies:
-    "@babel/helper-annotate-as-pure" "^7.12.13"
-    "@babel/helper-wrap-function" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-replace-supers@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.12.13.tgz#00ec4fb6862546bd3d0aff9aac56074277173121"
-  integrity sha512-pctAOIAMVStI2TMLhozPKbf5yTEXc0OJa0eENheb4w09SrgOWEs+P4nTOZYJQCqs8JlErGLDPDJTiGIp3ygbLg==
-  dependencies:
-    "@babel/helper-member-expression-to-functions" "^7.12.13"
-    "@babel/helper-optimise-call-expression" "^7.12.13"
-    "@babel/traverse" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-simple-access@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.12.13.tgz#8478bcc5cacf6aa1672b251c1d2dde5ccd61a6c4"
-  integrity sha512-0ski5dyYIHEfwpWGx5GPWhH35j342JaflmCeQmsPWcrOQDtCN6C1zKAVRFVbK53lPW2c9TsuLLSUDf0tIGJ5hA==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-skip-transparent-expression-wrappers@^7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/helper-skip-transparent-expression-wrappers/-/helper-skip-transparent-expression-wrappers-7.12.1.tgz#462dc63a7e435ade8468385c63d2b84cce4b3cbf"
-  integrity sha512-Mf5AUuhG1/OCChOJ/HcADmvcHM42WJockombn8ATJG3OnyiSxBK/Mm5x78BQWvmtXZKHgbjdGL2kin/HOLlZGA==
-  dependencies:
-    "@babel/types" "^7.12.1"
-
-"@babel/helper-split-export-declaration@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz#e9430be00baf3e88b0e13e6f9d4eaf2136372b05"
-  integrity sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-validator-identifier@^7.12.11":
-  version "7.12.11"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.12.11.tgz#c9a1f021917dcb5ccf0d4e453e399022981fc9ed"
-  integrity sha512-np/lG3uARFybkoHokJUmf1QfEvRVCPbmQeUQpKow5cQ3xWrV9i3rUHodKDJPQfTVX61qKi+UdYk8kik84n7XOw==
-
-"@babel/helper-validator-option@^7.12.1", "@babel/helper-validator-option@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz#d1fbf012e1a79b7eebbfdc6d270baaf8d9eb9831"
-  integrity sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==
-
-"@babel/helper-wrap-function@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-wrap-function/-/helper-wrap-function-7.12.13.tgz#e3ea8cb3ee0a16911f9c1b50d9e99fe8fe30f9ff"
-  integrity sha512-t0aZFEmBJ1LojdtJnhOaQEVejnzYhyjWHSsNSNo8vOYRbAJNh6r6GQF7pd36SqG7OKGbn+AewVQ/0IfYfIuGdw==
-  dependencies:
-    "@babel/helper-function-name" "^7.12.13"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/helpers@^7.12.1", "@babel/helpers@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.12.17.tgz#71e03d2981a6b5ee16899964f4101dc8471d60bc"
-  integrity sha512-tEpjqSBGt/SFEsFikKds1sLNChKKGGR17flIgQKXH4fG6m9gTgl3gnOC1giHNyaBCSKuTfxaSzHi7UnvqiVKxg==
-  dependencies:
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.12.17"
-    "@babel/types" "^7.12.17"
-
-"@babel/highlight@^7.10.4", "@babel/highlight@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.12.13.tgz#8ab538393e00370b26271b01fa08f7f27f2e795c"
-  integrity sha512-kocDQvIbgMKlWxXe9fof3TQ+gkIPOUSEYhJjqUjvKMez3krV7vbzYCDq39Oj11UAVK7JqPVGQPlgE85dPNlQww==
-  dependencies:
-    "@babel/helper-validator-identifier" "^7.12.11"
-    chalk "^2.0.0"
-    js-tokens "^4.0.0"
-
-"@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.12.17", "@babel/parser@^7.12.3", "@babel/parser@^7.7.0":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.12.17.tgz#bc85d2d47db38094e5bb268fc761716e7d693848"
-  integrity sha512-r1yKkiUTYMQ8LiEI0UcQx5ETw5dpTLn9wijn9hk6KkTtOK95FndDN10M+8/s6k/Ymlbivw0Av9q4SlgF80PtHg==
-
-"@babel/plugin-proposal-async-generator-functions@^7.12.1", "@babel/plugin-proposal-async-generator-functions@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-async-generator-functions/-/plugin-proposal-async-generator-functions-7.12.13.tgz#d1c6d841802ffb88c64a2413e311f7345b9e66b5"
-  integrity sha512-1KH46Hx4WqP77f978+5Ye/VUbuwQld2hph70yaw2hXS2v7ER2f3nlpNMu909HO2rbvP0NKLlMVDPh9KXklVMhA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-remap-async-to-generator" "^7.12.13"
-    "@babel/plugin-syntax-async-generators" "^7.8.0"
-
-"@babel/plugin-proposal-class-properties@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-class-properties/-/plugin-proposal-class-properties-7.12.1.tgz#a082ff541f2a29a4821065b8add9346c0c16e5de"
-  integrity sha512-cKp3dlQsFsEs5CWKnN7BnSHOd0EOW8EKpEjkoz1pO2E5KzIDNV9Ros1b0CnmbVgAGXJubOYVBOGCT1OmJwOI7w==
-  dependencies:
-    "@babel/helper-create-class-features-plugin" "^7.12.1"
-    "@babel/helper-plugin-utils" "^7.10.4"
-
-"@babel/plugin-proposal-class-properties@^7.12.1", "@babel/plugin-proposal-class-properties@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-class-properties/-/plugin-proposal-class-properties-7.12.13.tgz#3d2ce350367058033c93c098e348161d6dc0d8c8"
-  integrity sha512-8SCJ0Ddrpwv4T7Gwb33EmW1V9PY5lggTO+A8WjyIwxrSHDUyBw4MtF96ifn1n8H806YlxbVCoKXbbmzD6RD+cA==
-  dependencies:
-    "@babel/helper-create-class-features-plugin" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-proposal-decorators@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-decorators/-/plugin-proposal-decorators-7.12.1.tgz#59271439fed4145456c41067450543aee332d15f"
-  integrity sha512-knNIuusychgYN8fGJHONL0RbFxLGawhXOJNLBk75TniTsZZeA+wdkDuv6wp4lGwzQEKjZi6/WYtnb3udNPmQmQ==
-  dependencies:
-    "@babel/helper-create-class-features-plugin" "^7.12.1"
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/plugin-syntax-decorators" "^7.12.1"
-
-"@babel/plugin-proposal-dynamic-import@^7.12.1", "@babel/plugin-proposal-dynamic-import@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-dynamic-import/-/plugin-proposal-dynamic-import-7.12.17.tgz#e0ebd8db65acc37eac518fa17bead2174e224512"
-  integrity sha512-ZNGoFZqrnuy9H2izB2jLlnNDAfVPlGl5NhFEiFe4D84ix9GQGygF+CWMGHKuE+bpyS/AOuDQCnkiRNqW2IzS1Q==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-dynamic-import" "^7.8.0"
-
-"@babel/plugin-proposal-export-namespace-from@^7.12.1", "@babel/plugin-proposal-export-namespace-from@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-export-namespace-from/-/plugin-proposal-export-namespace-from-7.12.13.tgz#393be47a4acd03fa2af6e3cde9b06e33de1b446d"
-  integrity sha512-INAgtFo4OnLN3Y/j0VwAgw3HDXcDtX+C/erMvWzuV9v71r7urb6iyMXu7eM9IgLr1ElLlOkaHjJ0SbCmdOQ3Iw==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-export-namespace-from" "^7.8.3"
-
-"@babel/plugin-proposal-json-strings@^7.12.1", "@babel/plugin-proposal-json-strings@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-json-strings/-/plugin-proposal-json-strings-7.12.13.tgz#ced7888a2db92a3d520a2e35eb421fdb7fcc9b5d"
-  integrity sha512-v9eEi4GiORDg8x+Dmi5r8ibOe0VXoKDeNPYcTTxdGN4eOWikrJfDJCJrr1l5gKGvsNyGJbrfMftC2dTL6oz7pg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-json-strings" "^7.8.0"
-
-"@babel/plugin-proposal-logical-assignment-operators@^7.12.1", "@babel/plugin-proposal-logical-assignment-operators@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-logical-assignment-operators/-/plugin-proposal-logical-assignment-operators-7.12.13.tgz#575b5d9a08d8299eeb4db6430da6e16e5cf14350"
-  integrity sha512-fqmiD3Lz7jVdK6kabeSr1PZlWSUVqSitmHEe3Z00dtGTKieWnX9beafvavc32kjORa5Bai4QNHgFDwWJP+WtSQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-logical-assignment-operators" "^7.10.4"
-
-"@babel/plugin-proposal-nullish-coalescing-operator@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-nullish-coalescing-operator/-/plugin-proposal-nullish-coalescing-operator-7.12.1.tgz#3ed4fff31c015e7f3f1467f190dbe545cd7b046c"
-  integrity sha512-nZY0ESiaQDI1y96+jk6VxMOaL4LPo/QDHBqL+SF3/vl6dHkTwHlOI8L4ZwuRBHgakRBw5zsVylel7QPbbGuYgg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
-
-"@babel/plugin-proposal-nullish-coalescing-operator@^7.12.1", "@babel/plugin-proposal-nullish-coalescing-operator@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-nullish-coalescing-operator/-/plugin-proposal-nullish-coalescing-operator-7.12.13.tgz#24867307285cee4e1031170efd8a7ac807deefde"
-  integrity sha512-Qoxpy+OxhDBI5kRqliJFAl4uWXk3Bn24WeFstPH0iLymFehSAUR8MHpqU7njyXv/qbo7oN6yTy5bfCmXdKpo1Q==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
-
-"@babel/plugin-proposal-numeric-separator@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-numeric-separator/-/plugin-proposal-numeric-separator-7.12.1.tgz#0e2c6774c4ce48be412119b4d693ac777f7685a6"
-  integrity sha512-MR7Ok+Af3OhNTCxYVjJZHS0t97ydnJZt/DbR4WISO39iDnhiD8XHrY12xuSJ90FFEGjir0Fzyyn7g/zY6hxbxA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
-
-"@babel/plugin-proposal-numeric-separator@^7.12.1", "@babel/plugin-proposal-numeric-separator@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-numeric-separator/-/plugin-proposal-numeric-separator-7.12.13.tgz#bd9da3188e787b5120b4f9d465a8261ce67ed1db"
-  integrity sha512-O1jFia9R8BUCl3ZGB7eitaAPu62TXJRHn7rh+ojNERCFyqRwJMTmhz+tJ+k0CwI6CLjX/ee4qW74FSqlq9I35w==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
-
-"@babel/plugin-proposal-object-rest-spread@^7.12.1", "@babel/plugin-proposal-object-rest-spread@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-object-rest-spread/-/plugin-proposal-object-rest-spread-7.12.13.tgz#f93f3116381ff94bc676fdcb29d71045cd1ec011"
-  integrity sha512-WvA1okB/0OS/N3Ldb3sziSrXg6sRphsBgqiccfcQq7woEn5wQLNX82Oc4PlaFcdwcWHuQXAtb8ftbS8Fbsg/sg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-object-rest-spread" "^7.8.0"
-    "@babel/plugin-transform-parameters" "^7.12.13"
-
-"@babel/plugin-proposal-optional-catch-binding@^7.12.1", "@babel/plugin-proposal-optional-catch-binding@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-optional-catch-binding/-/plugin-proposal-optional-catch-binding-7.12.13.tgz#4640520afe57728af14b4d1574ba844f263bcae5"
-  integrity sha512-9+MIm6msl9sHWg58NvqpNpLtuFbmpFYk37x8kgnGzAHvX35E1FyAwSUt5hIkSoWJFSAH+iwU8bJ4fcD1zKXOzg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-optional-catch-binding" "^7.8.0"
-
-"@babel/plugin-proposal-optional-chaining@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-optional-chaining/-/plugin-proposal-optional-chaining-7.12.1.tgz#cce122203fc8a32794296fc377c6dedaf4363797"
-  integrity sha512-c2uRpY6WzaVDzynVY9liyykS+kVU+WRZPMPYpkelXH8KBt1oXoI89kPbZKKG/jDT5UK92FTW2fZkZaJhdiBabw==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/helper-skip-transparent-expression-wrappers" "^7.12.1"
-    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
-
-"@babel/plugin-proposal-optional-chaining@^7.12.1", "@babel/plugin-proposal-optional-chaining@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-optional-chaining/-/plugin-proposal-optional-chaining-7.12.17.tgz#e382becadc2cb16b7913b6c672d92e4b33385b5c"
-  integrity sha512-TvxwI80pWftrGPKHNfkvX/HnoeSTR7gC4ezWnAL39PuktYUe6r8kEpOLTYnkBTsaoeazXm2jHJ22EQ81sdgfcA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-skip-transparent-expression-wrappers" "^7.12.1"
-    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
-
-"@babel/plugin-proposal-private-methods@^7.12.1", "@babel/plugin-proposal-private-methods@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-private-methods/-/plugin-proposal-private-methods-7.12.13.tgz#ea78a12554d784ecf7fc55950b752d469d9c4a71"
-  integrity sha512-sV0V57uUwpauixvR7s2o75LmwJI6JECwm5oPUY5beZB1nBl2i37hc7CJGqB5G+58fur5Y6ugvl3LRONk5x34rg==
-  dependencies:
-    "@babel/helper-create-class-features-plugin" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-proposal-unicode-property-regex@^7.12.1", "@babel/plugin-proposal-unicode-property-regex@^7.12.13", "@babel/plugin-proposal-unicode-property-regex@^7.4.4":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-proposal-unicode-property-regex/-/plugin-proposal-unicode-property-regex-7.12.13.tgz#bebde51339be829c17aaaaced18641deb62b39ba"
-  integrity sha512-XyJmZidNfofEkqFV5VC/bLabGmO5QzenPO/YOfGuEbgU+2sSwMmio3YLb4WtBgcmmdwZHyVyv8on77IUjQ5Gvg==
-  dependencies:
-    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-syntax-async-generators@^7.8.0", "@babel/plugin-syntax-async-generators@^7.8.4":
-  version "7.8.4"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz#a983fb1aeb2ec3f6ed042a210f640e90e786fe0d"
-  integrity sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-bigint@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz#4c9a6f669f5d0cdf1b90a1671e9a146be5300cea"
-  integrity sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-class-properties@^7.12.1", "@babel/plugin-syntax-class-properties@^7.12.13", "@babel/plugin-syntax-class-properties@^7.8.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz#b5c987274c4a3a82b89714796931a6b53544ae10"
-  integrity sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-syntax-decorators@^7.12.1":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-decorators/-/plugin-syntax-decorators-7.12.13.tgz#fac829bf3c7ef4a1bc916257b403e58c6bdaf648"
-  integrity sha512-Rw6aIXGuqDLr6/LoBBYE57nKOzQpz/aDkKlMqEwH+Vp0MXbG6H/TfRjaY343LKxzAKAMXIHsQ8JzaZKuDZ9MwA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-syntax-dynamic-import@^7.8.0":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-dynamic-import/-/plugin-syntax-dynamic-import-7.8.3.tgz#62bf98b2da3cd21d626154fc96ee5b3cb68eacb3"
-  integrity sha512-5gdGbFon+PszYzqs83S3E5mpi7/y/8M9eC90MRTZfduQOYW76ig6SOSPNe41IG5LoP3FGBn2N0RjVDSQiS94kQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-export-namespace-from@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-export-namespace-from/-/plugin-syntax-export-namespace-from-7.8.3.tgz#028964a9ba80dbc094c915c487ad7c4e7a66465a"
-  integrity sha512-MXf5laXo6c1IbEbegDmzGPwGNTsHZmEy6QGznu5Sh2UCWvueywb2ee+CCE4zQiZstxU9BMoQO9i6zUFSY0Kj0Q==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.3"
-
-"@babel/plugin-syntax-flow@^7.12.1":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.12.13.tgz#5df9962503c0a9c918381c929d51d4d6949e7e86"
-  integrity sha512-J/RYxnlSLXZLVR7wTRsozxKT8qbsx1mNKJzXEEjQ0Kjx1ZACcyHgbanNWNCFtc36IzuWhYWPpvJFFoexoOWFmA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-syntax-import-meta@^7.8.3":
-  version "7.10.4"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz#ee601348c370fa334d2207be158777496521fd51"
-  integrity sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-
-"@babel/plugin-syntax-json-strings@^7.8.0", "@babel/plugin-syntax-json-strings@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz#01ca21b668cd8218c9e640cb6dd88c5412b2c96a"
-  integrity sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-jsx@^7.12.1", "@babel/plugin-syntax-jsx@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.12.13.tgz#044fb81ebad6698fe62c478875575bcbb9b70f15"
-  integrity sha512-d4HM23Q1K7oq/SLNmG6mRt85l2csmQ0cHRaxRXjKW0YFdEXqlZ5kzFQKH5Uc3rDJECgu+yCRgPkG04Mm98R/1g==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-syntax-logical-assignment-operators@^7.10.4", "@babel/plugin-syntax-logical-assignment-operators@^7.8.3":
-  version "7.10.4"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz#ca91ef46303530448b906652bac2e9fe9941f699"
-  integrity sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-
-"@babel/plugin-syntax-nullish-coalescing-operator@^7.8.0", "@babel/plugin-syntax-nullish-coalescing-operator@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz#167ed70368886081f74b5c36c65a88c03b66d1a9"
-  integrity sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-numeric-separator@^7.10.4", "@babel/plugin-syntax-numeric-separator@^7.8.3":
-  version "7.10.4"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz#b9b070b3e33570cd9fd07ba7fa91c0dd37b9af97"
-  integrity sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-
-"@babel/plugin-syntax-object-rest-spread@^7.8.0", "@babel/plugin-syntax-object-rest-spread@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz#60e225edcbd98a640332a2e72dd3e66f1af55871"
-  integrity sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-optional-catch-binding@^7.8.0", "@babel/plugin-syntax-optional-catch-binding@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz#6111a265bcfb020eb9efd0fdfd7d26402b9ed6c1"
-  integrity sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-optional-chaining@^7.8.0", "@babel/plugin-syntax-optional-chaining@^7.8.3":
-  version "7.8.3"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz#4f69c2ab95167e0180cd5336613f8c5788f7d48a"
-  integrity sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.8.0"
-
-"@babel/plugin-syntax-top-level-await@^7.12.1", "@babel/plugin-syntax-top-level-await@^7.12.13", "@babel/plugin-syntax-top-level-await@^7.8.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.12.13.tgz#c5f0fa6e249f5b739727f923540cf7a806130178"
-  integrity sha512-A81F9pDwyS7yM//KwbCSDqy3Uj4NMIurtplxphWxoYtNPov7cJsDkAFNNyVlIZ3jwGycVsurZ+LtOA8gZ376iQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-syntax-typescript@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.12.13.tgz#9dff111ca64154cef0f4dc52cf843d9f12ce4474"
-  integrity sha512-cHP3u1JiUiG2LFDKbXnwVad81GvfyIOmCD6HIEId6ojrY0Drfy2q1jw7BwN7dE84+kTnBjLkXoL3IEy/3JPu2w==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-arrow-functions@^7.12.1", "@babel/plugin-transform-arrow-functions@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-arrow-functions/-/plugin-transform-arrow-functions-7.12.13.tgz#eda5670b282952100c229f8a3bd49e0f6a72e9fe"
-  integrity sha512-tBtuN6qtCTd+iHzVZVOMNp+L04iIJBpqkdY42tWbmjIT5wvR2kx7gxMBsyhQtFzHwBbyGi9h8J8r9HgnOpQHxg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-async-to-generator@^7.12.1", "@babel/plugin-transform-async-to-generator@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-async-to-generator/-/plugin-transform-async-to-generator-7.12.13.tgz#fed8c69eebf187a535bfa4ee97a614009b24f7ae"
-  integrity sha512-psM9QHcHaDr+HZpRuJcE1PXESuGWSCcbiGFFhhwfzdbTxaGDVzuVtdNYliAwcRo3GFg0Bc8MmI+AvIGYIJG04A==
-  dependencies:
-    "@babel/helper-module-imports" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-remap-async-to-generator" "^7.12.13"
-
-"@babel/plugin-transform-block-scoped-functions@^7.12.1", "@babel/plugin-transform-block-scoped-functions@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-block-scoped-functions/-/plugin-transform-block-scoped-functions-7.12.13.tgz#a9bf1836f2a39b4eb6cf09967739de29ea4bf4c4"
-  integrity sha512-zNyFqbc3kI/fVpqwfqkg6RvBgFpC4J18aKKMmv7KdQ/1GgREapSJAykLMVNwfRGO3BtHj3YQZl8kxCXPcVMVeg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-block-scoping@^7.12.1", "@babel/plugin-transform-block-scoping@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-block-scoping/-/plugin-transform-block-scoping-7.12.13.tgz#f36e55076d06f41dfd78557ea039c1b581642e61"
-  integrity sha512-Pxwe0iqWJX4fOOM2kEZeUuAxHMWb9nK+9oh5d11bsLoB0xMg+mkDpt0eYuDZB7ETrY9bbcVlKUGTOGWy7BHsMQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-classes@^7.12.1", "@babel/plugin-transform-classes@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-classes/-/plugin-transform-classes-7.12.13.tgz#9728edc1838b5d62fc93ad830bd523b1fcb0e1f6"
-  integrity sha512-cqZlMlhCC1rVnxE5ZGMtIb896ijL90xppMiuWXcwcOAuFczynpd3KYemb91XFFPi3wJSe/OcrX9lXoowatkkxA==
-  dependencies:
-    "@babel/helper-annotate-as-pure" "^7.12.13"
-    "@babel/helper-function-name" "^7.12.13"
-    "@babel/helper-optimise-call-expression" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-replace-supers" "^7.12.13"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    globals "^11.1.0"
-
-"@babel/plugin-transform-computed-properties@^7.12.1", "@babel/plugin-transform-computed-properties@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-computed-properties/-/plugin-transform-computed-properties-7.12.13.tgz#6a210647a3d67f21f699cfd2a01333803b27339d"
-  integrity sha512-dDfuROUPGK1mTtLKyDPUavmj2b6kFu82SmgpztBFEO974KMjJT+Ytj3/oWsTUMBmgPcp9J5Pc1SlcAYRpJ2hRA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-destructuring@^7.12.1", "@babel/plugin-transform-destructuring@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-destructuring/-/plugin-transform-destructuring-7.12.13.tgz#fc56c5176940c5b41735c677124d1d20cecc9aeb"
-  integrity sha512-Dn83KykIFzjhA3FDPA1z4N+yfF3btDGhjnJwxIj0T43tP0flCujnU8fKgEkf0C1biIpSv9NZegPBQ1J6jYkwvQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-dotall-regex@^7.12.1", "@babel/plugin-transform-dotall-regex@^7.12.13", "@babel/plugin-transform-dotall-regex@^7.4.4":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-dotall-regex/-/plugin-transform-dotall-regex-7.12.13.tgz#3f1601cc29905bfcb67f53910f197aeafebb25ad"
-  integrity sha512-foDrozE65ZFdUC2OfgeOCrEPTxdB3yjqxpXh8CH+ipd9CHd4s/iq81kcUpyH8ACGNEPdFqbtzfgzbT/ZGlbDeQ==
-  dependencies:
-    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-duplicate-keys@^7.12.1", "@babel/plugin-transform-duplicate-keys@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-duplicate-keys/-/plugin-transform-duplicate-keys-7.12.13.tgz#6f06b87a8b803fd928e54b81c258f0a0033904de"
-  integrity sha512-NfADJiiHdhLBW3pulJlJI2NB0t4cci4WTZ8FtdIuNc2+8pslXdPtRRAEWqUY+m9kNOk2eRYbTAOipAxlrOcwwQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-exponentiation-operator@^7.12.1", "@babel/plugin-transform-exponentiation-operator@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-exponentiation-operator/-/plugin-transform-exponentiation-operator-7.12.13.tgz#4d52390b9a273e651e4aba6aee49ef40e80cd0a1"
-  integrity sha512-fbUelkM1apvqez/yYx1/oICVnGo2KM5s63mhGylrmXUxK/IAXSIf87QIxVfZldWf4QsOafY6vV3bX8aMHSvNrA==
-  dependencies:
-    "@babel/helper-builder-binary-assignment-operator-visitor" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-flow-strip-types@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-flow-strip-types/-/plugin-transform-flow-strip-types-7.12.1.tgz#8430decfa7eb2aea5414ed4a3fa6e1652b7d77c4"
-  integrity sha512-8hAtkmsQb36yMmEtk2JZ9JnVyDSnDOdlB+0nEGzIDLuK4yR3JcEjfuFPYkdEPSh8Id+rAMeBEn+X0iVEyho6Hg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/plugin-syntax-flow" "^7.12.1"
-
-"@babel/plugin-transform-for-of@^7.12.1", "@babel/plugin-transform-for-of@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-for-of/-/plugin-transform-for-of-7.12.13.tgz#561ff6d74d9e1c8879cb12dbaf4a14cd29d15cf6"
-  integrity sha512-xCbdgSzXYmHGyVX3+BsQjcd4hv4vA/FDy7Kc8eOpzKmBBPEOTurt0w5fCRQaGl+GSBORKgJdstQ1rHl4jbNseQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-function-name@^7.12.1", "@babel/plugin-transform-function-name@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-function-name/-/plugin-transform-function-name-7.12.13.tgz#bb024452f9aaed861d374c8e7a24252ce3a50051"
-  integrity sha512-6K7gZycG0cmIwwF7uMK/ZqeCikCGVBdyP2J5SKNCXO5EOHcqi+z7Jwf8AmyDNcBgxET8DrEtCt/mPKPyAzXyqQ==
-  dependencies:
-    "@babel/helper-function-name" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-literals@^7.12.1", "@babel/plugin-transform-literals@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-literals/-/plugin-transform-literals-7.12.13.tgz#2ca45bafe4a820197cf315794a4d26560fe4bdb9"
-  integrity sha512-FW+WPjSR7hiUxMcKqyNjP05tQ2kmBCdpEpZHY1ARm96tGQCCBvXKnpjILtDplUnJ/eHZ0lALLM+d2lMFSpYJrQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-member-expression-literals@^7.12.1", "@babel/plugin-transform-member-expression-literals@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-member-expression-literals/-/plugin-transform-member-expression-literals-7.12.13.tgz#5ffa66cd59b9e191314c9f1f803b938e8c081e40"
-  integrity sha512-kxLkOsg8yir4YeEPHLuO2tXP9R/gTjpuTOjshqSpELUN3ZAg2jfDnKUvzzJxObun38sw3wm4Uu69sX/zA7iRvg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-modules-amd@^7.12.1", "@babel/plugin-transform-modules-amd@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-amd/-/plugin-transform-modules-amd-7.12.13.tgz#43db16249b274ee2e551e2422090aa1c47692d56"
-  integrity sha512-JHLOU0o81m5UqG0Ulz/fPC68/v+UTuGTWaZBUwpEk1fYQ1D9LfKV6MPn4ttJKqRo5Lm460fkzjLTL4EHvCprvA==
-  dependencies:
-    "@babel/helper-module-transforms" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    babel-plugin-dynamic-import-node "^2.3.3"
-
-"@babel/plugin-transform-modules-commonjs@^7.12.1", "@babel/plugin-transform-modules-commonjs@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-commonjs/-/plugin-transform-modules-commonjs-7.12.13.tgz#5043b870a784a8421fa1fd9136a24f294da13e50"
-  integrity sha512-OGQoeVXVi1259HjuoDnsQMlMkT9UkZT9TpXAsqWplS/M0N1g3TJAn/ByOCeQu7mfjc5WpSsRU+jV1Hd89ts0kQ==
-  dependencies:
-    "@babel/helper-module-transforms" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-simple-access" "^7.12.13"
-    babel-plugin-dynamic-import-node "^2.3.3"
-
-"@babel/plugin-transform-modules-systemjs@^7.12.1", "@babel/plugin-transform-modules-systemjs@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-systemjs/-/plugin-transform-modules-systemjs-7.12.13.tgz#351937f392c7f07493fc79b2118201d50404a3c5"
-  integrity sha512-aHfVjhZ8QekaNF/5aNdStCGzwTbU7SI5hUybBKlMzqIMC7w7Ho8hx5a4R/DkTHfRfLwHGGxSpFt9BfxKCoXKoA==
-  dependencies:
-    "@babel/helper-hoist-variables" "^7.12.13"
-    "@babel/helper-module-transforms" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-validator-identifier" "^7.12.11"
-    babel-plugin-dynamic-import-node "^2.3.3"
-
-"@babel/plugin-transform-modules-umd@^7.12.1", "@babel/plugin-transform-modules-umd@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-modules-umd/-/plugin-transform-modules-umd-7.12.13.tgz#26c66f161d3456674e344b4b1255de4d530cfb37"
-  integrity sha512-BgZndyABRML4z6ibpi7Z98m4EVLFI9tVsZDADC14AElFaNHHBcJIovflJ6wtCqFxwy2YJ1tJhGRsr0yLPKoN+w==
-  dependencies:
-    "@babel/helper-module-transforms" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-named-capturing-groups-regex@^7.12.1", "@babel/plugin-transform-named-capturing-groups-regex@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-named-capturing-groups-regex/-/plugin-transform-named-capturing-groups-regex-7.12.13.tgz#2213725a5f5bbbe364b50c3ba5998c9599c5c9d9"
-  integrity sha512-Xsm8P2hr5hAxyYblrfACXpQKdQbx4m2df9/ZZSQ8MAhsadw06+jW7s9zsSw6he+mJZXRlVMyEnVktJo4zjk1WA==
-  dependencies:
-    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
-
-"@babel/plugin-transform-new-target@^7.12.1", "@babel/plugin-transform-new-target@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-new-target/-/plugin-transform-new-target-7.12.13.tgz#e22d8c3af24b150dd528cbd6e685e799bf1c351c"
-  integrity sha512-/KY2hbLxrG5GTQ9zzZSc3xWiOy379pIETEhbtzwZcw9rvuaVV4Fqy7BYGYOWZnaoXIQYbbJ0ziXLa/sKcGCYEQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-object-super@^7.12.1", "@babel/plugin-transform-object-super@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-object-super/-/plugin-transform-object-super-7.12.13.tgz#b4416a2d63b8f7be314f3d349bd55a9c1b5171f7"
-  integrity sha512-JzYIcj3XtYspZDV8j9ulnoMPZZnF/Cj0LUxPOjR89BdBVx+zYJI9MdMIlUZjbXDX+6YVeS6I3e8op+qQ3BYBoQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-replace-supers" "^7.12.13"
-
-"@babel/plugin-transform-parameters@^7.12.1", "@babel/plugin-transform-parameters@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-parameters/-/plugin-transform-parameters-7.12.13.tgz#461e76dfb63c2dfd327b8a008a9e802818ce9853"
-  integrity sha512-e7QqwZalNiBRHCpJg/P8s/VJeSRYgmtWySs1JwvfwPqhBbiWfOcHDKdeAi6oAyIimoKWBlwc8oTgbZHdhCoVZA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-property-literals@^7.12.1", "@babel/plugin-transform-property-literals@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-property-literals/-/plugin-transform-property-literals-7.12.13.tgz#4e6a9e37864d8f1b3bc0e2dce7bf8857db8b1a81"
-  integrity sha512-nqVigwVan+lR+g8Fj8Exl0UQX2kymtjcWfMOYM1vTYEKujeyv2SkMgazf2qNcK7l4SDiKyTA/nHCPqL4e2zo1A==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-react-constant-elements@^7.12.1":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-constant-elements/-/plugin-transform-react-constant-elements-7.12.13.tgz#f8ee56888545d53d80f766b3cc1563ab2c241f92"
-  integrity sha512-qmzKVTn46Upvtxv8LQoQ8mTCdUC83AOVQIQm57e9oekLT5cmK9GOMOfcWhe8jMNx4UJXn/UDhVZ/7lGofVNeDQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-react-display-name@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.12.1.tgz#1cbcd0c3b1d6648c55374a22fc9b6b7e5341c00d"
-  integrity sha512-cAzB+UzBIrekfYxyLlFqf/OagTvHLcVBb5vpouzkYkBclRPraiygVnafvAoipErZLI8ANv8Ecn6E/m5qPXD26w==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-
-"@babel/plugin-transform-react-display-name@^7.12.1", "@babel/plugin-transform-react-display-name@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.12.13.tgz#c28effd771b276f4647411c9733dbb2d2da954bd"
-  integrity sha512-MprESJzI9O5VnJZrL7gg1MpdqmiFcUv41Jc7SahxYsNP2kDkFqClxxTZq+1Qv4AFCamm+GXMRDQINNn+qrxmiA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-react-jsx-development@^7.12.1", "@babel/plugin-transform-react-jsx-development@^7.12.12":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.12.17.tgz#f510c0fa7cd7234153539f9a362ced41a5ca1447"
-  integrity sha512-BPjYV86SVuOaudFhsJR1zjgxxOhJDt6JHNoD48DxWEIxUCAMjV1ys6DYw4SDYZh0b1QsS2vfIA9t/ZsQGsDOUQ==
-  dependencies:
-    "@babel/plugin-transform-react-jsx" "^7.12.17"
-
-"@babel/plugin-transform-react-jsx-self@^7.12.1":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.12.13.tgz#422d99d122d592acab9c35ea22a6cfd9bf189f60"
-  integrity sha512-FXYw98TTJ125GVCCkFLZXlZ1qGcsYqNQhVBQcZjyrwf8FEUtVfKIoidnO8S0q+KBQpDYNTmiGo1gn67Vti04lQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-react-jsx-source@^7.12.1":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.12.13.tgz#051d76126bee5c9a6aa3ba37be2f6c1698856bcb"
-  integrity sha512-O5JJi6fyfih0WfDgIJXksSPhGP/G0fQpfxYy87sDc+1sFmsCS6wr3aAn+whbzkhbjtq4VMqLRaSzR6IsshIC0Q==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-react-jsx@^7.12.1", "@babel/plugin-transform-react-jsx@^7.12.13", "@babel/plugin-transform-react-jsx@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.12.17.tgz#dd2c1299f5e26de584939892de3cfc1807a38f24"
-  integrity sha512-mwaVNcXV+l6qJOuRhpdTEj8sT/Z0owAVWf9QujTZ0d2ye9X/K+MTOTSizcgKOj18PGnTc/7g1I4+cIUjsKhBcw==
-  dependencies:
-    "@babel/helper-annotate-as-pure" "^7.12.13"
-    "@babel/helper-module-imports" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-jsx" "^7.12.13"
-    "@babel/types" "^7.12.17"
-
-"@babel/plugin-transform-react-pure-annotations@^7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.12.1.tgz#05d46f0ab4d1339ac59adf20a1462c91b37a1a42"
-  integrity sha512-RqeaHiwZtphSIUZ5I85PEH19LOSzxfuEazoY7/pWASCAIBuATQzpSVD+eT6MebeeZT2F4eSL0u4vw6n4Nm0Mjg==
-  dependencies:
-    "@babel/helper-annotate-as-pure" "^7.10.4"
-    "@babel/helper-plugin-utils" "^7.10.4"
-
-"@babel/plugin-transform-regenerator@^7.12.1", "@babel/plugin-transform-regenerator@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-regenerator/-/plugin-transform-regenerator-7.12.13.tgz#b628bcc9c85260ac1aeb05b45bde25210194a2f5"
-  integrity sha512-lxb2ZAvSLyJ2PEe47hoGWPmW22v7CtSl9jW8mingV4H2sEX/JOcrAj2nPuGWi56ERUm2bUpjKzONAuT6HCn2EA==
-  dependencies:
-    regenerator-transform "^0.14.2"
-
-"@babel/plugin-transform-reserved-words@^7.12.1", "@babel/plugin-transform-reserved-words@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-reserved-words/-/plugin-transform-reserved-words-7.12.13.tgz#7d9988d4f06e0fe697ea1d9803188aa18b472695"
-  integrity sha512-xhUPzDXxZN1QfiOy/I5tyye+TRz6lA7z6xaT4CLOjPRMVg1ldRf0LHw0TDBpYL4vG78556WuHdyO9oi5UmzZBg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-runtime@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-runtime/-/plugin-transform-runtime-7.12.1.tgz#04b792057eb460389ff6a4198e377614ea1e7ba5"
-  integrity sha512-Ac/H6G9FEIkS2tXsZjL4RAdS3L3WHxci0usAnz7laPWUmFiGtj7tIASChqKZMHTSQTQY6xDbOq+V1/vIq3QrWg==
-  dependencies:
-    "@babel/helper-module-imports" "^7.12.1"
-    "@babel/helper-plugin-utils" "^7.10.4"
-    resolve "^1.8.1"
-    semver "^5.5.1"
-
-"@babel/plugin-transform-shorthand-properties@^7.12.1", "@babel/plugin-transform-shorthand-properties@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-shorthand-properties/-/plugin-transform-shorthand-properties-7.12.13.tgz#db755732b70c539d504c6390d9ce90fe64aff7ad"
-  integrity sha512-xpL49pqPnLtf0tVluuqvzWIgLEhuPpZzvs2yabUHSKRNlN7ScYU7aMlmavOeyXJZKgZKQRBlh8rHbKiJDraTSw==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-spread@^7.12.1", "@babel/plugin-transform-spread@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-spread/-/plugin-transform-spread-7.12.13.tgz#ca0d5645abbd560719c354451b849f14df4a7949"
-  integrity sha512-dUCrqPIowjqk5pXsx1zPftSq4sT0aCeZVAxhdgs3AMgyaDmoUT0G+5h3Dzja27t76aUEIJWlFgPJqJ/d4dbTtg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-skip-transparent-expression-wrappers" "^7.12.1"
-
-"@babel/plugin-transform-sticky-regex@^7.12.1", "@babel/plugin-transform-sticky-regex@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-sticky-regex/-/plugin-transform-sticky-regex-7.12.13.tgz#760ffd936face73f860ae646fb86ee82f3d06d1f"
-  integrity sha512-Jc3JSaaWT8+fr7GRvQP02fKDsYk4K/lYwWq38r/UGfaxo89ajud321NH28KRQ7xy1Ybc0VUE5Pz8psjNNDUglg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-template-literals@^7.12.1", "@babel/plugin-transform-template-literals@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-template-literals/-/plugin-transform-template-literals-7.12.13.tgz#655037b07ebbddaf3b7752f55d15c2fd6f5aa865"
-  integrity sha512-arIKlWYUgmNsF28EyfmiQHJLJFlAJNYkuQO10jL46ggjBpeb2re1P9K9YGxNJB45BqTbaslVysXDYm/g3sN/Qg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-typeof-symbol@^7.12.1", "@babel/plugin-transform-typeof-symbol@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-typeof-symbol/-/plugin-transform-typeof-symbol-7.12.13.tgz#785dd67a1f2ea579d9c2be722de8c84cb85f5a7f"
-  integrity sha512-eKv/LmUJpMnu4npgfvs3LiHhJua5fo/CysENxa45YCQXZwKnGCQKAg87bvoqSW1fFT+HA32l03Qxsm8ouTY3ZQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-typescript@^7.12.1":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-typescript/-/plugin-transform-typescript-7.12.17.tgz#4aa6a5041888dd2e5d316ec39212b0cf855211bb"
-  integrity sha512-1bIYwnhRoetxkFonuZRtDZPFEjl1l5r+3ITkxLC3mlMaFja+GQFo94b/WHEPjqWLU9Bc+W4oFZbvCGe9eYMu1g==
-  dependencies:
-    "@babel/helper-create-class-features-plugin" "^7.12.17"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-syntax-typescript" "^7.12.13"
-
-"@babel/plugin-transform-unicode-escapes@^7.12.1", "@babel/plugin-transform-unicode-escapes@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-escapes/-/plugin-transform-unicode-escapes-7.12.13.tgz#840ced3b816d3b5127dd1d12dcedc5dead1a5e74"
-  integrity sha512-0bHEkdwJ/sN/ikBHfSmOXPypN/beiGqjo+o4/5K+vxEFNPRPdImhviPakMKG4x96l85emoa0Z6cDflsdBusZbw==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/plugin-transform-unicode-regex@^7.12.1", "@babel/plugin-transform-unicode-regex@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-transform-unicode-regex/-/plugin-transform-unicode-regex-7.12.13.tgz#b52521685804e155b1202e83fc188d34bb70f5ac"
-  integrity sha512-mDRzSNY7/zopwisPZ5kM9XKCfhchqIYwAKRERtEnhYscZB79VRekuRSoYbN0+KVe3y8+q1h6A4svXtP7N+UoCA==
-  dependencies:
-    "@babel/helper-create-regexp-features-plugin" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/preset-env@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/preset-env/-/preset-env-7.12.1.tgz#9c7e5ca82a19efc865384bb4989148d2ee5d7ac2"
-  integrity sha512-H8kxXmtPaAGT7TyBvSSkoSTUK6RHh61So05SyEbpmr0MCZrsNYn7mGMzzeYoOUCdHzww61k8XBft2TaES+xPLg==
-  dependencies:
-    "@babel/compat-data" "^7.12.1"
-    "@babel/helper-compilation-targets" "^7.12.1"
-    "@babel/helper-module-imports" "^7.12.1"
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/helper-validator-option" "^7.12.1"
-    "@babel/plugin-proposal-async-generator-functions" "^7.12.1"
-    "@babel/plugin-proposal-class-properties" "^7.12.1"
-    "@babel/plugin-proposal-dynamic-import" "^7.12.1"
-    "@babel/plugin-proposal-export-namespace-from" "^7.12.1"
-    "@babel/plugin-proposal-json-strings" "^7.12.1"
-    "@babel/plugin-proposal-logical-assignment-operators" "^7.12.1"
-    "@babel/plugin-proposal-nullish-coalescing-operator" "^7.12.1"
-    "@babel/plugin-proposal-numeric-separator" "^7.12.1"
-    "@babel/plugin-proposal-object-rest-spread" "^7.12.1"
-    "@babel/plugin-proposal-optional-catch-binding" "^7.12.1"
-    "@babel/plugin-proposal-optional-chaining" "^7.12.1"
-    "@babel/plugin-proposal-private-methods" "^7.12.1"
-    "@babel/plugin-proposal-unicode-property-regex" "^7.12.1"
-    "@babel/plugin-syntax-async-generators" "^7.8.0"
-    "@babel/plugin-syntax-class-properties" "^7.12.1"
-    "@babel/plugin-syntax-dynamic-import" "^7.8.0"
-    "@babel/plugin-syntax-export-namespace-from" "^7.8.3"
-    "@babel/plugin-syntax-json-strings" "^7.8.0"
-    "@babel/plugin-syntax-logical-assignment-operators" "^7.10.4"
-    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
-    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
-    "@babel/plugin-syntax-object-rest-spread" "^7.8.0"
-    "@babel/plugin-syntax-optional-catch-binding" "^7.8.0"
-    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
-    "@babel/plugin-syntax-top-level-await" "^7.12.1"
-    "@babel/plugin-transform-arrow-functions" "^7.12.1"
-    "@babel/plugin-transform-async-to-generator" "^7.12.1"
-    "@babel/plugin-transform-block-scoped-functions" "^7.12.1"
-    "@babel/plugin-transform-block-scoping" "^7.12.1"
-    "@babel/plugin-transform-classes" "^7.12.1"
-    "@babel/plugin-transform-computed-properties" "^7.12.1"
-    "@babel/plugin-transform-destructuring" "^7.12.1"
-    "@babel/plugin-transform-dotall-regex" "^7.12.1"
-    "@babel/plugin-transform-duplicate-keys" "^7.12.1"
-    "@babel/plugin-transform-exponentiation-operator" "^7.12.1"
-    "@babel/plugin-transform-for-of" "^7.12.1"
-    "@babel/plugin-transform-function-name" "^7.12.1"
-    "@babel/plugin-transform-literals" "^7.12.1"
-    "@babel/plugin-transform-member-expression-literals" "^7.12.1"
-    "@babel/plugin-transform-modules-amd" "^7.12.1"
-    "@babel/plugin-transform-modules-commonjs" "^7.12.1"
-    "@babel/plugin-transform-modules-systemjs" "^7.12.1"
-    "@babel/plugin-transform-modules-umd" "^7.12.1"
-    "@babel/plugin-transform-named-capturing-groups-regex" "^7.12.1"
-    "@babel/plugin-transform-new-target" "^7.12.1"
-    "@babel/plugin-transform-object-super" "^7.12.1"
-    "@babel/plugin-transform-parameters" "^7.12.1"
-    "@babel/plugin-transform-property-literals" "^7.12.1"
-    "@babel/plugin-transform-regenerator" "^7.12.1"
-    "@babel/plugin-transform-reserved-words" "^7.12.1"
-    "@babel/plugin-transform-shorthand-properties" "^7.12.1"
-    "@babel/plugin-transform-spread" "^7.12.1"
-    "@babel/plugin-transform-sticky-regex" "^7.12.1"
-    "@babel/plugin-transform-template-literals" "^7.12.1"
-    "@babel/plugin-transform-typeof-symbol" "^7.12.1"
-    "@babel/plugin-transform-unicode-escapes" "^7.12.1"
-    "@babel/plugin-transform-unicode-regex" "^7.12.1"
-    "@babel/preset-modules" "^0.1.3"
-    "@babel/types" "^7.12.1"
-    core-js-compat "^3.6.2"
-    semver "^5.5.0"
-
-"@babel/preset-env@^7.12.1", "@babel/preset-env@^7.8.4":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/preset-env/-/preset-env-7.12.17.tgz#94a3793ff089c32ee74d76a3c03a7597693ebaaa"
-  integrity sha512-9PMijx8zFbCwTHrd2P4PJR5nWGH3zWebx2OcpTjqQrHhCiL2ssSR2Sc9ko2BsI2VmVBfoaQmPrlMTCui4LmXQg==
-  dependencies:
-    "@babel/compat-data" "^7.12.13"
-    "@babel/helper-compilation-targets" "^7.12.17"
-    "@babel/helper-module-imports" "^7.12.13"
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/helper-validator-option" "^7.12.17"
-    "@babel/plugin-proposal-async-generator-functions" "^7.12.13"
-    "@babel/plugin-proposal-class-properties" "^7.12.13"
-    "@babel/plugin-proposal-dynamic-import" "^7.12.17"
-    "@babel/plugin-proposal-export-namespace-from" "^7.12.13"
-    "@babel/plugin-proposal-json-strings" "^7.12.13"
-    "@babel/plugin-proposal-logical-assignment-operators" "^7.12.13"
-    "@babel/plugin-proposal-nullish-coalescing-operator" "^7.12.13"
-    "@babel/plugin-proposal-numeric-separator" "^7.12.13"
-    "@babel/plugin-proposal-object-rest-spread" "^7.12.13"
-    "@babel/plugin-proposal-optional-catch-binding" "^7.12.13"
-    "@babel/plugin-proposal-optional-chaining" "^7.12.17"
-    "@babel/plugin-proposal-private-methods" "^7.12.13"
-    "@babel/plugin-proposal-unicode-property-regex" "^7.12.13"
-    "@babel/plugin-syntax-async-generators" "^7.8.0"
-    "@babel/plugin-syntax-class-properties" "^7.12.13"
-    "@babel/plugin-syntax-dynamic-import" "^7.8.0"
-    "@babel/plugin-syntax-export-namespace-from" "^7.8.3"
-    "@babel/plugin-syntax-json-strings" "^7.8.0"
-    "@babel/plugin-syntax-logical-assignment-operators" "^7.10.4"
-    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.0"
-    "@babel/plugin-syntax-numeric-separator" "^7.10.4"
-    "@babel/plugin-syntax-object-rest-spread" "^7.8.0"
-    "@babel/plugin-syntax-optional-catch-binding" "^7.8.0"
-    "@babel/plugin-syntax-optional-chaining" "^7.8.0"
-    "@babel/plugin-syntax-top-level-await" "^7.12.13"
-    "@babel/plugin-transform-arrow-functions" "^7.12.13"
-    "@babel/plugin-transform-async-to-generator" "^7.12.13"
-    "@babel/plugin-transform-block-scoped-functions" "^7.12.13"
-    "@babel/plugin-transform-block-scoping" "^7.12.13"
-    "@babel/plugin-transform-classes" "^7.12.13"
-    "@babel/plugin-transform-computed-properties" "^7.12.13"
-    "@babel/plugin-transform-destructuring" "^7.12.13"
-    "@babel/plugin-transform-dotall-regex" "^7.12.13"
-    "@babel/plugin-transform-duplicate-keys" "^7.12.13"
-    "@babel/plugin-transform-exponentiation-operator" "^7.12.13"
-    "@babel/plugin-transform-for-of" "^7.12.13"
-    "@babel/plugin-transform-function-name" "^7.12.13"
-    "@babel/plugin-transform-literals" "^7.12.13"
-    "@babel/plugin-transform-member-expression-literals" "^7.12.13"
-    "@babel/plugin-transform-modules-amd" "^7.12.13"
-    "@babel/plugin-transform-modules-commonjs" "^7.12.13"
-    "@babel/plugin-transform-modules-systemjs" "^7.12.13"
-    "@babel/plugin-transform-modules-umd" "^7.12.13"
-    "@babel/plugin-transform-named-capturing-groups-regex" "^7.12.13"
-    "@babel/plugin-transform-new-target" "^7.12.13"
-    "@babel/plugin-transform-object-super" "^7.12.13"
-    "@babel/plugin-transform-parameters" "^7.12.13"
-    "@babel/plugin-transform-property-literals" "^7.12.13"
-    "@babel/plugin-transform-regenerator" "^7.12.13"
-    "@babel/plugin-transform-reserved-words" "^7.12.13"
-    "@babel/plugin-transform-shorthand-properties" "^7.12.13"
-    "@babel/plugin-transform-spread" "^7.12.13"
-    "@babel/plugin-transform-sticky-regex" "^7.12.13"
-    "@babel/plugin-transform-template-literals" "^7.12.13"
-    "@babel/plugin-transform-typeof-symbol" "^7.12.13"
-    "@babel/plugin-transform-unicode-escapes" "^7.12.13"
-    "@babel/plugin-transform-unicode-regex" "^7.12.13"
-    "@babel/preset-modules" "^0.1.3"
-    "@babel/types" "^7.12.17"
-    core-js-compat "^3.8.0"
-    semver "^5.5.0"
-
-"@babel/preset-modules@^0.1.3":
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/@babel/preset-modules/-/preset-modules-0.1.4.tgz#362f2b68c662842970fdb5e254ffc8fc1c2e415e"
-  integrity sha512-J36NhwnfdzpmH41M1DrnkkgAqhZaqr/NBdPfQ677mLzlaXo+oDiv1deyCDtgAhz8p328otdob0Du7+xgHGZbKg==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.0.0"
-    "@babel/plugin-proposal-unicode-property-regex" "^7.4.4"
-    "@babel/plugin-transform-dotall-regex" "^7.4.4"
-    "@babel/types" "^7.4.4"
-    esutils "^2.0.2"
-
-"@babel/preset-react@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/preset-react/-/preset-react-7.12.1.tgz#7f022b13f55b6dd82f00f16d1c599ae62985358c"
-  integrity sha512-euCExymHCi0qB9u5fKw7rvlw7AZSjw/NaB9h7EkdTt5+yHRrXdiRTh7fkG3uBPpJg82CqLfp1LHLqWGSCrab+g==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/plugin-transform-react-display-name" "^7.12.1"
-    "@babel/plugin-transform-react-jsx" "^7.12.1"
-    "@babel/plugin-transform-react-jsx-development" "^7.12.1"
-    "@babel/plugin-transform-react-jsx-self" "^7.12.1"
-    "@babel/plugin-transform-react-jsx-source" "^7.12.1"
-    "@babel/plugin-transform-react-pure-annotations" "^7.12.1"
-
-"@babel/preset-react@^7.12.5":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/preset-react/-/preset-react-7.12.13.tgz#5f911b2eb24277fa686820d5bd81cad9a0602a0a"
-  integrity sha512-TYM0V9z6Abb6dj1K7i5NrEhA13oS5ujUYQYDfqIBXYHOc2c2VkFgc+q9kyssIyUfy4/hEwqrgSlJ/Qgv8zJLsA==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-    "@babel/plugin-transform-react-display-name" "^7.12.13"
-    "@babel/plugin-transform-react-jsx" "^7.12.13"
-    "@babel/plugin-transform-react-jsx-development" "^7.12.12"
-    "@babel/plugin-transform-react-pure-annotations" "^7.12.1"
-
-"@babel/preset-typescript@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/preset-typescript/-/preset-typescript-7.12.1.tgz#86480b483bb97f75036e8864fe404cc782cc311b"
-  integrity sha512-hNK/DhmoJPsksdHuI/RVrcEws7GN5eamhi28JkO52MqIxU8Z0QpmiSOQxZHWOHV7I3P4UjHV97ay4TcamMA6Kw==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.10.4"
-    "@babel/plugin-transform-typescript" "^7.12.1"
-
-"@babel/runtime-corejs3@^7.10.2":
-  version "7.12.18"
-  resolved "https://registry.yarnpkg.com/@babel/runtime-corejs3/-/runtime-corejs3-7.12.18.tgz#e5663237e5658e4c09586995d2dd6d2c8cfd6fc0"
-  integrity sha512-ngR7yhNTjDxxe1VYmhqQqqXZWujGb6g0IoA4qeG6MxNGRnIw2Zo8ImY8HfaQ7l3T6GklWhdNfyhWk0C0iocdVA==
-  dependencies:
-    core-js-pure "^3.0.0"
-    regenerator-runtime "^0.13.4"
-
-"@babel/runtime@7.12.1":
-  version "7.12.1"
-  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.12.1.tgz#b4116a6b6711d010b2dad3b7b6e43bf1b9954740"
-  integrity sha512-J5AIf3vPj3UwXaAzb5j1xM4WAQDX3EMgemF8rjCP3SoW09LfRKAXQKt6CoVYl230P6iWdRcBbnLDDdnqWxZSCA==
-  dependencies:
-    regenerator-runtime "^0.13.4"
-
-"@babel/runtime@^7.0.0", "@babel/runtime@^7.1.2", "@babel/runtime@^7.12.1", "@babel/runtime@^7.12.13", "@babel/runtime@^7.12.5", "@babel/runtime@^7.9.2":
-  version "7.13.7"
-  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.13.7.tgz#d494e39d198ee9ca04f4dcb76d25d9d7a1dc961a"
-  integrity sha512-h+ilqoX998mRVM5FtB5ijRuHUDVt5l3yfoOi2uh18Z/O3hvyaHQ39NpxVkCIG5yFs+mLq/ewFp8Bss6zmWv6ZA==
-  dependencies:
-    regenerator-runtime "^0.13.4"
-
-"@babel/runtime@^7.10.2", "@babel/runtime@^7.11.2", "@babel/runtime@^7.5.5", "@babel/runtime@^7.7.2", "@babel/runtime@^7.8.4":
-  version "7.12.18"
-  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.12.18.tgz#af137bd7e7d9705a412b3caaf991fe6aaa97831b"
-  integrity sha512-BogPQ7ciE6SYAUPtlm9tWbgI9+2AgqSam6QivMgXgAT+fKbgppaj4ZX15MHeLC1PVF5sNk70huBu20XxWOs8Cg==
-  dependencies:
-    regenerator-runtime "^0.13.4"
-
-"@babel/template@^7.10.4", "@babel/template@^7.12.13", "@babel/template@^7.3.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.12.13.tgz#530265be8a2589dbb37523844c5bcb55947fb327"
-  integrity sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/parser" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/traverse@^7.1.0", "@babel/traverse@^7.12.1", "@babel/traverse@^7.12.13", "@babel/traverse@^7.12.17", "@babel/traverse@^7.7.0":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.12.17.tgz#40ec8c7ffb502c4e54c7f95492dc11b88d718619"
-  integrity sha512-LGkTqDqdiwC6Q7fWSwQoas/oyiEYw6Hqjve5KOSykXkmFJFqzvGMb9niaUEag3Rlve492Mkye3gLw9FTv94fdQ==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.12.17"
-    "@babel/helper-function-name" "^7.12.13"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/parser" "^7.12.17"
-    "@babel/types" "^7.12.17"
-    debug "^4.1.0"
-    globals "^11.1.0"
-    lodash "^4.17.19"
-
-"@babel/types@^7.0.0", "@babel/types@^7.12.1", "@babel/types@^7.12.13", "@babel/types@^7.12.17", "@babel/types@^7.12.6", "@babel/types@^7.3.0", "@babel/types@^7.3.3", "@babel/types@^7.4.4", "@babel/types@^7.7.0":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.12.17.tgz#9d711eb807e0934c90b8b1ca0eb1f7230d150963"
-  integrity sha512-tNMDjcv/4DIcHxErTgwB9q2ZcYyN0sUfgGKUK/mm1FJK7Wz+KstoEekxrl/tBiNDgLK1HGi+sppj1An/1DR4fQ==
-  dependencies:
-    "@babel/helper-validator-identifier" "^7.12.11"
-    lodash "^4.17.19"
-    to-fast-properties "^2.0.0"
-
-"@bcoe/v8-coverage@^0.2.3":
-  version "0.2.3"
-  resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
-  integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==
-
-"@chakra-ui/accordion@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/accordion/-/accordion-1.1.2.tgz#b45a44746276100601a39f88e3a5e150a2232294"
-  integrity sha512-ni4lwO7I1f9uHgV/FHZVfyr+FRDabXfX2cqpCtY2+QvBzaWM+55VAHJfbel2N6/eogXy5WSLJyYD5fQmyu7Fpg==
-  dependencies:
-    "@chakra-ui/descendant" "1.0.7"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/transition" "1.0.8"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/alert@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/alert/-/alert-1.1.1.tgz#96286feab8b74f624325da9b51b6960043a7ba71"
-  integrity sha512-Hqbf4VuAL/gL6oLQapoF8BV5zAX41Rm+xN2q8c/jWZx5i3l7kWiQ5jn0dJ0prWnVdNbEPmIAqiU0UkSo/lUOjw==
-  dependencies:
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/avatar@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/avatar/-/avatar-1.1.2.tgz#6b61253ecce850d0ab4c6d0b4045e5cf4969daed"
-  integrity sha512-CqXedZed9bEWzzs+8mkB/4NLmD+JbMetNvVbHtLlENta7jnOJDCMJpaXD9QMmiGKKNuqFHfZlGmLmxIMruZBpg==
-  dependencies:
-    "@chakra-ui/image" "1.0.7"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/breadcrumb@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/breadcrumb/-/breadcrumb-1.1.1.tgz#3f995a20ec0db39b5922dc4d7b12f1871a20e957"
-  integrity sha512-EnbMYwqPI8if0WJ2m/054fKXc+K7GN8jafaLCm4qfWedogF6t3huB0qvLA00Z6HUwTNfdCtESxU44VfGHsbNDQ==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/button@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/button/-/button-1.1.2.tgz#3c7a4e6bf38f3451c2b280ea366fc2da0054e992"
-  integrity sha512-6QEn6cL3v1VcdqCT92uqduLI7ip+VQk4Adxekt5WWHDvkw9WqQz1aOqKzfPTEau27WHBBymB09vJR66CoUZCiw==
-  dependencies:
-    "@chakra-ui/spinner" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/checkbox@1.2.3":
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/checkbox/-/checkbox-1.2.3.tgz#b0574f973515612ce99984c38dde17bc5d603821"
-  integrity sha512-op7o/tt4P9oj/N6X5LZUrdaK+VMWoeZavlPh1WWZJ34e26R8y51eCjTQdQURu95hHuwdm7EMK1wSJsINUgWP7A==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-    "@chakra-ui/visually-hidden" "1.0.4"
-
-"@chakra-ui/clickable@1.0.4":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/clickable/-/clickable-1.0.4.tgz#aa97871ccfe5ec66168a67b6df6e3309bfd34eb4"
-  integrity sha512-KAfOjz2zoF7OGay/rg9x2hPCgwd5WqnsxR3dgo6R6ULQ4dsee602kjy6OYxKyI9e6DUgodI/BDZq+57e7+wd5A==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/close-button@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/close-button/-/close-button-1.1.1.tgz#a9805fe1b8fb2b13e7f27e263d23bed9859bb99d"
-  integrity sha512-vTapJ3kZZ04xxR1c+EO1t7w5BYZmm/7NTCotAPN6SuehlcqzG0YP2t+fHk7YPlXiweTQKL6v5DLCBlSuqSjZtw==
-  dependencies:
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/color-mode@1.1.0":
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/color-mode/-/color-mode-1.1.0.tgz#fdb6f44790897754d2c2c444855be5af228ecb43"
-  integrity sha512-c9BdU/B3/WbYOJlAI6z02IzGBzWX1+icqWf9M+5psgTqCv5jqxOsqUKs39Zw6H+rpYqQqR20/i0hyDmN1eptpw==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/control-box@1.0.4":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/control-box/-/control-box-1.0.4.tgz#82bdce22e771accfc56dd5896582ec475b7d490c"
-  integrity sha512-qiZAawX8EaWxk+QnCtpmGADV3D7mNy3DcPhIPmsY4XYfnh8wl9cJfm2B6u3we8nHhi8eQSReSHGbV5s884bO1g==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/counter@1.0.7":
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/counter/-/counter-1.0.7.tgz#78a335bdca4768905235722924262ea98f828954"
-  integrity sha512-wcSqSZIvdumn8yIfpF7r/cuYQ6MQlSoY/WQu6aJuwpky/k4xgfVSQq/VCM/Jcb8VcnYeYmPasnKHo+5RY35gSw==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/css-reset@1.0.0":
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/css-reset/-/css-reset-1.0.0.tgz#8395921b35ef27bee0579a4d730c5ab7f7b39734"
-  integrity sha512-UaPsImGHvCgFO3ayp6Ugafu2/3/EG8wlW/8Y9Ihfk1UFv8cpV+3BfWKmuZ7IcmxcBL9dkP6E8p3/M1T0FB92hg==
-
-"@chakra-ui/descendant@1.0.7":
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/descendant/-/descendant-1.0.7.tgz#22e13fd732c742a9a74c0f414b0fbd03310299f2"
-  integrity sha512-PnyLyV8hD+STVr9KYzPN13hCj7pwSLvGtQc3J1d+XXvazBtmwUIaX9WZ632kXQhxlvdz83tOzDbJPQs3e1VU3A==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-
-"@chakra-ui/editable@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/editable/-/editable-1.1.1.tgz#43725979c32bd791d160016fa3a5d0c52e8785b1"
-  integrity sha512-p33kIcqBoM9c+hh10QRoV15Lb/sKT3KJoPwThjyDcBaNyvSyFhrOX0equVxjxD7Y4htp9/G7b8owx767lnobwg==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/focus-lock@1.1.0":
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/focus-lock/-/focus-lock-1.1.0.tgz#5b6e1623216d3ba135828fa508df38890a643059"
-  integrity sha512-yzW/By8DO+9kH4eT5y73POuO3HMDMLdy/1udEy95fcP0RbofIU03ytx439FIFB0JwyF8pUP4PEHB6zI6YxXCbQ==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-    react-focus-lock "2.5.0"
-
-"@chakra-ui/form-control@1.2.1":
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/form-control/-/form-control-1.2.1.tgz#70693011fdf396c292aac81575caba976276d385"
-  integrity sha512-pgp34e5DRHc8B+wUMxZNYBo8W7f2TdIOm04dO24WIIC+mPIu++QsV2O0SPOeN+WJzbirtq/8vibPmaZOfBdQVw==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/hooks@1.1.4":
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/hooks/-/hooks-1.1.4.tgz#369280f49c3204ccdf6a31b4436fe4938c15073c"
-  integrity sha512-5E4JT4Bl/JYe75N3/eU6eWkDhLCx1azcKpkvzNAgRFP1QqbXxAjxVnwHiMoNhWNFHBWRhkvTI/z4yBoOc7Rf4w==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-    compute-scroll-into-view "1.0.14"
-    copy-to-clipboard "3.3.1"
-
-"@chakra-ui/icon@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/icon/-/icon-1.1.1.tgz#13317762a58f3e039b14a52f5cf978d037e64b4b"
-  integrity sha512-dL1D1q11MM+cL849jmADSjY8KqOWplAEb/XgLXX/ZZau7GHqYAXIwdYnfXwOYBaypiosUvGsm9g2zU4iHYZtdg==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/icons@^1.0.5":
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/icons/-/icons-1.0.5.tgz#21eee12b9ab12da0430c62ea99e6081a6cec7b14"
-  integrity sha512-l8CgisPAS44ehKLw/hoHI6dfSX7pOAfylv8QkINVPEzhHwnNIg0wHzcm1cGkRXuq3DN8G0z88KmFegFFp02yiA==
-  dependencies:
-    "@chakra-ui/icon" "1.1.1"
-    "@types/react" "^17.0.0"
-
-"@chakra-ui/image@1.0.7":
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/image/-/image-1.0.7.tgz#ec49b2abb05549d4358675a8df860608294d19fe"
-  integrity sha512-GqPHBzWgvkmxuovD8sLvwO45Zh+vRa0qIKFg6mBeMwpdQh4aWHZJLw8Ln5Hh3WFRtJcIbZqKQV3dUJln8ZpQkQ==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/input@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/input/-/input-1.1.2.tgz#8d1b1734870168802a4d4c679f9bf0fb7b4bc716"
-  integrity sha512-ipT5RpkwVTTzadvOEXt62m9a7Q3vH0cZf1Dis3xdh2FBJjR1Xk0Nr+jjXxtTj99Rn6UMxsVyq3EsSnH09O8o5A==
-  dependencies:
-    "@chakra-ui/form-control" "1.2.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/layout@1.3.1":
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/layout/-/layout-1.3.1.tgz#5a9e0bc67c3f4ce24e00da75850576d9c0d89d8c"
-  integrity sha512-xkIemd9Sloq0kOnbyxnXO22W5YFlMxxHVJLkX9cN5+13i+Qi/Fk/da+yyP8wQ4g8zxQORFZb5K47GdJneGOUlQ==
-  dependencies:
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/live-region@1.0.4":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/live-region/-/live-region-1.0.4.tgz#3dc528e89fde5fa950cb42816cf0a81c010c66ba"
-  integrity sha512-sJkCqT1chDU04MMgFCy2amq/h/95IoOLbRhuJpWm4V7WIHT/YsQURk0DCHr8JhPXgTJx88jgNve/WFdVtZEmDw==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/media-query@1.0.5":
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/media-query/-/media-query-1.0.5.tgz#bad0c45919d70c3543a1114f988865ebe7720631"
-  integrity sha512-QoZt7YkPKEZhdHQ1M6F6QzRC0gUlLCDDrDEPHn3D4AZzhYcmygH4TlSTi2WwhZjdiwgTRJT4zqpkkdej0sLXuA==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/menu@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/menu/-/menu-1.1.2.tgz#6357a56c713f180d6fe1e570ecb6f8892e3dd2cc"
-  integrity sha512-nUfzsXb/HyNrDyJrzJM7+ZajZMzKLHNXhqerHowOoMekTYJcX6MF/K6Sv78KemMrrAHHrfUh2p/2aK0VHPx6pw==
-  dependencies:
-    "@chakra-ui/clickable" "1.0.4"
-    "@chakra-ui/descendant" "1.0.7"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/popper" "1.1.4"
-    "@chakra-ui/transition" "1.0.8"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/modal@1.6.0":
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/modal/-/modal-1.6.0.tgz#58ba52bd6181217a886e10a730514679b9aa8ca0"
-  integrity sha512-mrCQTt+Dyo9enbN3YvBjoxxxjC63vT60EBdM+EKoymcHtC6tJ8gH4uvDVSxvAwtd4TWHrv2x+MhI/W/nb6f9Hg==
-  dependencies:
-    "@chakra-ui/close-button" "1.1.1"
-    "@chakra-ui/focus-lock" "1.1.0"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/portal" "1.1.1"
-    "@chakra-ui/transition" "1.0.8"
-    "@chakra-ui/utils" "1.2.0"
-    aria-hidden "^1.1.1"
-    react-remove-scroll "2.4.1"
-
-"@chakra-ui/number-input@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/number-input/-/number-input-1.1.1.tgz#c62999faf05ea50a4cb7f24a0f22eaaf304ca7ee"
-  integrity sha512-BAVfv79andVOEYx3YWLb0RrdLLqE484CwGxjoJ3aE8OprW/WE8Ghl2BO/wxWBXwX/TxnvB6JpC4iM4u6eHufnQ==
-  dependencies:
-    "@chakra-ui/counter" "1.0.7"
-    "@chakra-ui/form-control" "1.2.1"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/pin-input@1.4.0":
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/pin-input/-/pin-input-1.4.0.tgz#ab13ee640e30a02a6c172a12c8453cf01470a92b"
-  integrity sha512-85XXAMNNgX7RG0ca9tU4kJeYYrj9+jtUWINatYXVzcvkx/T1VRM3ohRQDtDdZ7wovzEE90mtzkQGPISZhLKHug==
-  dependencies:
-    "@chakra-ui/descendant" "1.0.7"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/popover@1.2.2":
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/popover/-/popover-1.2.2.tgz#cf8f0449dc0ad1c27460c80c2a037aa6f304a077"
-  integrity sha512-J2let+7e1RbLP/SG+waHI7I/7DWq9KMQnh9baiUxn2PatxNHtnCI+raCAalXFuLQ93fwaBgf++a/BmFYfq3LLw==
-  dependencies:
-    "@chakra-ui/close-button" "1.1.1"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/popper" "1.1.4"
-    "@chakra-ui/portal" "1.1.1"
-    "@chakra-ui/transition" "1.0.8"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/popper@1.1.4":
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/popper/-/popper-1.1.4.tgz#6b44115dea192e9e23b270c32a77b19fcc10941a"
-  integrity sha512-eAZ6i/+7jd/fjgqWwE4SuhkKFkkWYDw0A/rfV093FtInc0lHDOjzXBgp5GEEbl4pnoPSP2AYJq+5JJFCjW9zIA==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-    "@popperjs/core" "2.4.4"
-    dequal "2.0.2"
-
-"@chakra-ui/portal@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/portal/-/portal-1.1.1.tgz#8ed8481f8e4b555c3cbcba1745767dc068b0d8e5"
-  integrity sha512-YzD/807srlkC1+F1jRaJYAlgtb2CN24RhYRVliV45xR59RkrgnAVVzt7+KmsUF4N9OK+OJhrMfhyM2sNvthw1g==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/progress@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/progress/-/progress-1.1.1.tgz#416536d804953455bbe7d7196c713510f757e5b6"
-  integrity sha512-lGZaUPvi0ySwvIp1FG2RyrioBBPGyr3TzQxT5nf6eUVTfwps3LlAGcCTfNy59tK8vcwXz0uDDKtobfRFwxj19g==
-  dependencies:
-    "@chakra-ui/theme-tools" "1.0.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/radio@1.2.3":
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/radio/-/radio-1.2.3.tgz#87e859c99f35af9d33cff20adbfed2c0f40811f5"
-  integrity sha512-HZDZnoMKJfJ1xlkWy4713Q5mHvF92EJFTY/ZaE6VHSBb1h+JjE1DmEO2Latt5OITx02Liv0dN0je3Hk1ncsgcg==
-  dependencies:
-    "@chakra-ui/form-control" "1.2.1"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-    "@chakra-ui/visually-hidden" "1.0.4"
-
-"@chakra-ui/react@^1.3.3":
-  version "1.3.3"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/react/-/react-1.3.3.tgz#b45559fba32d018a66a7ec8f01ef9d0f33c0b8a5"
-  integrity sha512-61mVM98U5gTzQdCLqvcQRs4q7vsEHp8T5wNhbkPc1hw1tQ1iW9c7H4RyOQgqNqlolTCRbtoRGuh8TEQI9mQdQg==
-  dependencies:
-    "@chakra-ui/accordion" "1.1.2"
-    "@chakra-ui/alert" "1.1.1"
-    "@chakra-ui/avatar" "1.1.2"
-    "@chakra-ui/breadcrumb" "1.1.1"
-    "@chakra-ui/button" "1.1.2"
-    "@chakra-ui/checkbox" "1.2.3"
-    "@chakra-ui/close-button" "1.1.1"
-    "@chakra-ui/control-box" "1.0.4"
-    "@chakra-ui/counter" "1.0.7"
-    "@chakra-ui/css-reset" "1.0.0"
-    "@chakra-ui/editable" "1.1.1"
-    "@chakra-ui/form-control" "1.2.1"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/image" "1.0.7"
-    "@chakra-ui/input" "1.1.2"
-    "@chakra-ui/layout" "1.3.1"
-    "@chakra-ui/live-region" "1.0.4"
-    "@chakra-ui/media-query" "1.0.5"
-    "@chakra-ui/menu" "1.1.2"
-    "@chakra-ui/modal" "1.6.0"
-    "@chakra-ui/number-input" "1.1.1"
-    "@chakra-ui/pin-input" "1.4.0"
-    "@chakra-ui/popover" "1.2.2"
-    "@chakra-ui/popper" "1.1.4"
-    "@chakra-ui/portal" "1.1.1"
-    "@chakra-ui/progress" "1.1.1"
-    "@chakra-ui/radio" "1.2.3"
-    "@chakra-ui/select" "1.1.1"
-    "@chakra-ui/skeleton" "1.1.3"
-    "@chakra-ui/slider" "1.1.1"
-    "@chakra-ui/spinner" "1.1.1"
-    "@chakra-ui/stat" "1.1.1"
-    "@chakra-ui/switch" "1.1.3"
-    "@chakra-ui/system" "1.3.1"
-    "@chakra-ui/table" "1.1.1"
-    "@chakra-ui/tabs" "1.1.1"
-    "@chakra-ui/tag" "1.1.1"
-    "@chakra-ui/textarea" "1.1.1"
-    "@chakra-ui/theme" "1.6.2"
-    "@chakra-ui/toast" "1.1.11"
-    "@chakra-ui/tooltip" "1.1.2"
-    "@chakra-ui/transition" "1.0.8"
-    "@chakra-ui/utils" "1.2.0"
-    "@chakra-ui/visually-hidden" "1.0.4"
-
-"@chakra-ui/select@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/select/-/select-1.1.1.tgz#54becf1c70183968aab45c1fc01b8dc5c0ae1c60"
-  integrity sha512-7sFPXjBlV/6Ms60hHyLgiCMe93BF6z59HFrGQyXavvN3NNSpR4B4+AQqD5/guRi3GV8TamyPiHVIQIPw6wt2Eg==
-  dependencies:
-    "@chakra-ui/form-control" "1.2.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/skeleton@1.1.3":
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/skeleton/-/skeleton-1.1.3.tgz#86905e88fedf3ed844da87fd0ad01c064988d72a"
-  integrity sha512-F2HK0/QAQ/BmkY0FC8Lhhquxl5bSL0OemF6gyPcmK21tQ5czvUBCElVUybFnYkyVLfihB5pvazKZky8xCEgfIQ==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/media-query" "1.0.5"
-    "@chakra-ui/system" "1.3.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/slider@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/slider/-/slider-1.1.1.tgz#f329984b328588bd5f87d94d73a0be312cb5967b"
-  integrity sha512-D9xZqefmxx2clbd3iNK2bM1zFmygXXNZuwvPFCVWPa82zSOVPnfDjH1n+Z+VjPixEW6fl64sIl99oJdvDuH7wg==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/spinner@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/spinner/-/spinner-1.1.1.tgz#803c3f75dc6af08901a156079c3e068e28ac8b85"
-  integrity sha512-dGT5DVvQwnj4B7FjIl6C/1ZmXgUFSA0ZC7JgQNQdJYc3DgtkGf0a6L+DhiyBneEPb9/RRHQJhHoRPndnHU15QA==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-    "@chakra-ui/visually-hidden" "1.0.4"
-
-"@chakra-ui/stat@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/stat/-/stat-1.1.1.tgz#6ccab1734ea88a0c5d8cd2e2ed9a2c6e1af669e8"
-  integrity sha512-dG7SGe2ZEFugbA4kyCtSLRtwgMQ2pqq3QAWBjA/ZvdEdhL10EGp+bv3Ab0l1qju3DEpfyx3+M6mHxG5zWJ92Fw==
-  dependencies:
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-    "@chakra-ui/visually-hidden" "1.0.4"
-
-"@chakra-ui/styled-system@1.7.1":
-  version "1.7.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/styled-system/-/styled-system-1.7.1.tgz#1483006a8cb123ce1c6f67f2ef4346c8b8fa6a4b"
-  integrity sha512-mhSakTWdh7ZEkqwRdoVW1seumIFq6Yu/Glal4VVcyhxS34V/VPLX0GtRfjC8cSpsYiwGZHX/7WJYWN2cldx+Gg==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-    css-get-unit "1.0.1"
-    csstype "^3.0.6"
-
-"@chakra-ui/switch@1.1.3":
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/switch/-/switch-1.1.3.tgz#aa72379daf1bce12173e26c6d8541d21088f7712"
-  integrity sha512-64o7GL3yFiLlh4qtdK//Bey4wLn8yqib9Duci9T+FRiFWLLDa9ksmlQGeX7Qe4AIolimXGa96Ys8yYAfzDqaJQ==
-  dependencies:
-    "@chakra-ui/checkbox" "1.2.3"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/system@1.3.1":
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/system/-/system-1.3.1.tgz#65d5d6af288d4b04df088cf3e7c94c4c15b40dad"
-  integrity sha512-NMC0ajaCUIYnVxYBS3jRrgRmqK1p39MX1yB9J3BRgfVlKbDizkUOAJaqe+FX//3NVJ++QPJOjUu0azmlR6HYZw==
-  dependencies:
-    "@chakra-ui/color-mode" "1.1.0"
-    "@chakra-ui/styled-system" "1.7.1"
-    "@chakra-ui/utils" "1.2.0"
-    react-fast-compare "3.2.0"
-
-"@chakra-ui/table@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/table/-/table-1.1.1.tgz#a2fb89c21e1405ab56b90225640e8e2fadfc8fd9"
-  integrity sha512-GdYbqN1q/QPQqca3jfWbyWJ7waUg6RbpQbsyyhFNWHDKhOb6H2y+cDZCHrOiFwBahWZxykR4ZcMJnQjPqtB7ag==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/tabs@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/tabs/-/tabs-1.1.1.tgz#a0772d8adb9d1452f103213443b7852837d94cf4"
-  integrity sha512-yfJct0Yfxt2fQ9KgbtVhC89OaB5iD8nLAFi3zhiuNQBp84OXESVfvanr1lHBg3YZLgVLW1O0QQLv3aHVGLtwYg==
-  dependencies:
-    "@chakra-ui/clickable" "1.0.4"
-    "@chakra-ui/descendant" "1.0.7"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/tag@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/tag/-/tag-1.1.1.tgz#b95aee5f579cc45658ef6b75ead413fef772711c"
-  integrity sha512-I1ScSeaUEgNPY7lv2IZ0blTAb13wvu/UqGPuatG71PqI4LIKAtZJVxF/AnxAQY4WpbDKTp/t3z3DfyKr8Ccouw==
-  dependencies:
-    "@chakra-ui/icon" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/textarea@1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/textarea/-/textarea-1.1.1.tgz#d4c5274a79fbbae55eba0a3d3173429e72bd0e83"
-  integrity sha512-Mmw/mVfZSNf/0QpLe8Nnvpp1jAkRtjhKD4eDBG6AW6M0l6tST3LFbDC3qZePmEJpqxkOE3IyQgYEjHjb9PgcMA==
-  dependencies:
-    "@chakra-ui/form-control" "1.2.1"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/theme-tools@1.0.4":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/theme-tools/-/theme-tools-1.0.4.tgz#ead4886e61e3f054e48ca0a95a70d796fe7e3838"
-  integrity sha512-kx34izftAHvtRjxpkgWbnMx5DyGtUi8JoQO8E5bhwjJ5drNQl2yvNeoqLpHjf3YTqBYQqkz3VkzHIeHs3wZEwg==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-    "@types/tinycolor2" "1.4.2"
-    tinycolor2 "1.4.2"
-
-"@chakra-ui/theme@1.6.2":
-  version "1.6.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/theme/-/theme-1.6.2.tgz#6d1ff9ca0fb8461615cae9eb785e51c5faad6703"
-  integrity sha512-yM1pacXJfvpwjBUFvFQm/E3sG51/4IReKB6OB1ddr5i7Z/30cPzUQTeUrbfizGWMkrgMRy1ImEmZbo1ACN6gqw==
-  dependencies:
-    "@chakra-ui/theme-tools" "1.0.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/toast@1.1.11":
-  version "1.1.11"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/toast/-/toast-1.1.11.tgz#1576385315fd894a6ae3fe2d56dacdb8072e9e2c"
-  integrity sha512-joeNDETyPIKdwc0YAq/Qjr68SriJU1nTH9KIkuGwmntHE859DtIhwXtAg7k4ZRoSTFEm3/xtFN7yk2aGhueO1w==
-  dependencies:
-    "@chakra-ui/alert" "1.1.1"
-    "@chakra-ui/close-button" "1.1.1"
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/theme" "1.6.2"
-    "@chakra-ui/transition" "1.0.8"
-    "@chakra-ui/utils" "1.2.0"
-    "@reach/alert" "0.13.0"
-
-"@chakra-ui/tooltip@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/tooltip/-/tooltip-1.1.2.tgz#18372edbd92d8613d6c34c54f6adc8aefe6a501b"
-  integrity sha512-sT7PcgYqa5uvsTpXiCNOZxLhIPFWUtblWnMMyn3QIALsgrSkWCceNyIKs1fTCYVBowb30nYOX3owuoP4CsgWHw==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/popper" "1.1.4"
-    "@chakra-ui/portal" "1.1.1"
-    "@chakra-ui/utils" "1.2.0"
-    "@chakra-ui/visually-hidden" "1.0.4"
-
-"@chakra-ui/transition@1.0.8":
-  version "1.0.8"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/transition/-/transition-1.0.8.tgz#b9fc6ab7ec901af7d417f0b84ef2785ec5cbaee2"
-  integrity sha512-c4BArP5Q9nl2R6QDAmigCklkMpGKP1ZYOfF1RD7qboPROZVt+SUNGKW+GHGN7mN0kaWHuCb+sbLXMCqQG/jQmQ==
-  dependencies:
-    "@chakra-ui/hooks" "1.1.4"
-    "@chakra-ui/utils" "1.2.0"
-
-"@chakra-ui/utils@1.2.0":
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/utils/-/utils-1.2.0.tgz#9385950e91455ecf480eb52bca268d3a5f8c9d6d"
-  integrity sha512-oMTX8BX1+MUf+iYUafFm9tNfwa3m1cqzMcE/5iQFmNcqKlZmuADnVL28Yw7jhoeouIjANaGY+f51qj9zHgDn9Q==
-  dependencies:
-    "@types/lodash.mergewith" "4.6.6"
-    "@types/object-assign" "4.0.30"
-    css-box-model "1.2.1"
-    lodash.mergewith "4.6.2"
-
-"@chakra-ui/visually-hidden@1.0.4":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@chakra-ui/visually-hidden/-/visually-hidden-1.0.4.tgz#e83428acff21b4471f57c0a8c8a9467b050d75fc"
-  integrity sha512-RxXmEjwOoMh28lSen4tmkQBRQ21Hi15UGLQTnKfY2LhJyxsojyPT9TSHzehWgFIb8D+N3Er09WLgkd6f/bJqyg==
-  dependencies:
-    "@chakra-ui/utils" "1.2.0"
-
-"@cnakazawa/watch@^1.0.3":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a"
-  integrity sha512-v9kIhKwjeZThiWrLmj0y17CWoyddASLj9O2yvbZkbvw/N3rWOYy9zkV66ursAoVr0mV15bL8g0c4QZUE6cdDoQ==
-  dependencies:
-    exec-sh "^0.3.2"
-    minimist "^1.2.0"
-
-"@csstools/convert-colors@^1.4.0":
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/@csstools/convert-colors/-/convert-colors-1.4.0.tgz#ad495dc41b12e75d588c6db8b9834f08fa131eb7"
-  integrity sha512-5a6wqoJV/xEdbRNKVo6I4hO3VjyDq//8q2f9I6PBAvMesJHFauXDorcNCsr9RzvsZnaWi5NYCcfyqP1QeFHFbw==
-
-"@csstools/normalize.css@^10.1.0":
-  version "10.1.0"
-  resolved "https://registry.yarnpkg.com/@csstools/normalize.css/-/normalize.css-10.1.0.tgz#f0950bba18819512d42f7197e56c518aa491cf18"
-  integrity sha512-ij4wRiunFfaJxjB0BdrYHIH8FxBJpOwNPhhAcunlmPdXudL1WQV1qoP9un6JsEBAgQH+7UXyyjh0g7jTxXK6tg==
-
-"@emotion/babel-plugin@^11.1.2":
-  version "11.2.0"
-  resolved "https://registry.yarnpkg.com/@emotion/babel-plugin/-/babel-plugin-11.2.0.tgz#f25c6df8ec045dad5ae6ca63df0791673b98c920"
-  integrity sha512-lsnQBnl3l4wu/FJoyHnYRpHJeIPNkOBMbtDUIXcO8luulwRKZXPvA10zd2eXVN6dABIWNX4E34en/jkejIg/yA==
-  dependencies:
-    "@babel/helper-module-imports" "^7.7.0"
-    "@babel/plugin-syntax-jsx" "^7.12.1"
-    "@babel/runtime" "^7.7.2"
-    "@emotion/hash" "^0.8.0"
-    "@emotion/memoize" "^0.7.5"
-    "@emotion/serialize" "^1.0.0"
-    babel-plugin-macros "^2.6.1"
-    convert-source-map "^1.5.0"
-    escape-string-regexp "^4.0.0"
-    find-root "^1.1.0"
-    source-map "^0.5.7"
-    stylis "^4.0.3"
-
-"@emotion/cache@^11.1.3":
-  version "11.1.3"
-  resolved "https://registry.yarnpkg.com/@emotion/cache/-/cache-11.1.3.tgz#c7683a9484bcd38d5562f2b9947873cf66829afd"
-  integrity sha512-n4OWinUPJVaP6fXxWZD9OUeQ0lY7DvtmtSuqtRWT0Ofo/sBLCVSgb4/Oa0Q5eFxcwablRKjUXqXtNZVyEwCAuA==
-  dependencies:
-    "@emotion/memoize" "^0.7.4"
-    "@emotion/sheet" "^1.0.0"
-    "@emotion/utils" "^1.0.0"
-    "@emotion/weak-memoize" "^0.2.5"
-    stylis "^4.0.3"
-
-"@emotion/hash@^0.8.0":
-  version "0.8.0"
-  resolved "https://registry.yarnpkg.com/@emotion/hash/-/hash-0.8.0.tgz#bbbff68978fefdbe68ccb533bc8cbe1d1afb5413"
-  integrity sha512-kBJtf7PH6aWwZ6fka3zQ0p6SBYzx4fl1LoZXE2RrnYST9Xljm7WfKJrU4g/Xr3Beg72MLrp1AWNUmuYJTL7Cow==
-
-"@emotion/is-prop-valid@^0.8.2":
-  version "0.8.8"
-  resolved "https://registry.yarnpkg.com/@emotion/is-prop-valid/-/is-prop-valid-0.8.8.tgz#db28b1c4368a259b60a97311d6a952d4fd01ac1a"
-  integrity sha512-u5WtneEAr5IDG2Wv65yhunPSMLIpuKsbuOktRojfrEiEvRyC85LgPMZI63cr7NUqT8ZIGdSVg8ZKGxIug4lXcA==
-  dependencies:
-    "@emotion/memoize" "0.7.4"
-
-"@emotion/is-prop-valid@^1.1.0":
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/@emotion/is-prop-valid/-/is-prop-valid-1.1.0.tgz#29ef6be1e946fb4739f9707def860f316f668cde"
-  integrity sha512-9RkilvXAufQHsSsjQ3PIzSns+pxuX4EW8EbGeSPjZMHuMx6z/MOzb9LpqNieQX4F3mre3NWS2+X3JNRHTQztUQ==
-  dependencies:
-    "@emotion/memoize" "^0.7.4"
-
-"@emotion/memoize@0.7.4":
-  version "0.7.4"
-  resolved "https://registry.yarnpkg.com/@emotion/memoize/-/memoize-0.7.4.tgz#19bf0f5af19149111c40d98bb0cf82119f5d9eeb"
-  integrity sha512-Ja/Vfqe3HpuzRsG1oBtWTHk2PGZ7GR+2Vz5iYGelAw8dx32K0y7PjVuxK6z1nMpZOqAFsRUPCkK1YjJ56qJlgw==
-
-"@emotion/memoize@^0.7.4", "@emotion/memoize@^0.7.5":
-  version "0.7.5"
-  resolved "https://registry.yarnpkg.com/@emotion/memoize/-/memoize-0.7.5.tgz#2c40f81449a4e554e9fc6396910ed4843ec2be50"
-  integrity sha512-igX9a37DR2ZPGYtV6suZ6whr8pTFtyHL3K/oLUotxpSVO2ASaprmAe2Dkq7tBo7CRY7MMDrAa9nuQP9/YG8FxQ==
-
-"@emotion/react@^11.1.5":
-  version "11.1.5"
-  resolved "https://registry.yarnpkg.com/@emotion/react/-/react-11.1.5.tgz#15e78f9822894cdc296e6f4e0688bac8120dfe66"
-  integrity sha512-xfnZ9NJEv9SU9K2sxXM06lzjK245xSeHRpUh67eARBm3PBHjjKIZlfWZ7UQvD0Obvw6ZKjlC79uHrlzFYpOB/Q==
-  dependencies:
-    "@babel/runtime" "^7.7.2"
-    "@emotion/cache" "^11.1.3"
-    "@emotion/serialize" "^1.0.0"
-    "@emotion/sheet" "^1.0.1"
-    "@emotion/utils" "^1.0.0"
-    "@emotion/weak-memoize" "^0.2.5"
-    hoist-non-react-statics "^3.3.1"
-
-"@emotion/serialize@^1.0.0":
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/@emotion/serialize/-/serialize-1.0.0.tgz#1a61f4f037cf39995c97fc80ebe99abc7b191ca9"
-  integrity sha512-zt1gm4rhdo5Sry8QpCOpopIUIKU+mUSpV9WNmFILUraatm5dttNEaYzUWWSboSMUE6PtN2j1cAsuvcugfdI3mw==
-  dependencies:
-    "@emotion/hash" "^0.8.0"
-    "@emotion/memoize" "^0.7.4"
-    "@emotion/unitless" "^0.7.5"
-    "@emotion/utils" "^1.0.0"
-    csstype "^3.0.2"
-
-"@emotion/sheet@^1.0.0", "@emotion/sheet@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@emotion/sheet/-/sheet-1.0.1.tgz#245f54abb02dfd82326e28689f34c27aa9b2a698"
-  integrity sha512-GbIvVMe4U+Zc+929N1V7nW6YYJtidj31lidSmdYcWozwoBIObXBnaJkKNDjZrLm9Nc0BR+ZyHNaRZxqNZbof5g==
-
-"@emotion/styled@^11.1.5":
-  version "11.1.5"
-  resolved "https://registry.yarnpkg.com/@emotion/styled/-/styled-11.1.5.tgz#3d7bfa58b346e48315f65ee956aeef81f0bea8e0"
-  integrity sha512-nIq7pOBEDqT5xSFbclQ3XFy0q8C9EUU8ECqKN2kJKGxKh+vLz/x26kEih4aOpoAsyzc+R60rQxh7VJiLTUEdmg==
-  dependencies:
-    "@babel/runtime" "^7.7.2"
-    "@emotion/babel-plugin" "^11.1.2"
-    "@emotion/is-prop-valid" "^1.1.0"
-    "@emotion/serialize" "^1.0.0"
-    "@emotion/utils" "^1.0.0"
-
-"@emotion/unitless@^0.7.5":
-  version "0.7.5"
-  resolved "https://registry.yarnpkg.com/@emotion/unitless/-/unitless-0.7.5.tgz#77211291c1900a700b8a78cfafda3160d76949ed"
-  integrity sha512-OWORNpfjMsSSUBVrRBVGECkhWcULOAJz9ZW8uK9qgxD+87M7jHRcvh/A96XXNhXTLmKcoYSQtBEX7lHMO7YRwg==
-
-"@emotion/utils@^1.0.0":
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/@emotion/utils/-/utils-1.0.0.tgz#abe06a83160b10570816c913990245813a2fd6af"
-  integrity sha512-mQC2b3XLDs6QCW+pDQDiyO/EdGZYOygE8s5N5rrzjSI4M3IejPE/JPndCBwRT9z982aqQNi6beWs1UeayrQxxA==
-
-"@emotion/weak-memoize@^0.2.5":
-  version "0.2.5"
-  resolved "https://registry.yarnpkg.com/@emotion/weak-memoize/-/weak-memoize-0.2.5.tgz#8eed982e2ee6f7f4e44c253e12962980791efd46"
-  integrity sha512-6U71C2Wp7r5XtFtQzYrW5iKFT67OixrSxjI4MptCHzdSVlgabczzqLe0ZSgnub/5Kp4hSbpDB1tMytZY9pwxxA==
-
-"@eslint/eslintrc@^0.3.0":
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.3.0.tgz#d736d6963d7003b6514e6324bec9c602ac340318"
-  integrity sha512-1JTKgrOKAHVivSvOYw+sJOunkBjUOvjqWk1DPja7ZFhIS2mX/4EgTT8M7eTK9jrKhL/FvXXEbQwIs3pg1xp3dg==
-  dependencies:
-    ajv "^6.12.4"
-    debug "^4.1.1"
-    espree "^7.3.0"
-    globals "^12.1.0"
-    ignore "^4.0.6"
-    import-fresh "^3.2.1"
-    js-yaml "^3.13.1"
-    lodash "^4.17.20"
-    minimatch "^3.0.4"
-    strip-json-comments "^3.1.1"
-
-"@hapi/address@2.x.x":
-  version "2.1.4"
-  resolved "https://registry.yarnpkg.com/@hapi/address/-/address-2.1.4.tgz#5d67ed43f3fd41a69d4b9ff7b56e7c0d1d0a81e5"
-  integrity sha512-QD1PhQk+s31P1ixsX0H0Suoupp3VMXzIVMSwobR3F3MSUO2YCV0B7xqLcUw/Bh8yuvd3LhpyqLQWTNcRmp6IdQ==
-
-"@hapi/bourne@1.x.x":
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/@hapi/bourne/-/bourne-1.3.2.tgz#0a7095adea067243ce3283e1b56b8a8f453b242a"
-  integrity sha512-1dVNHT76Uu5N3eJNTYcvxee+jzX4Z9lfciqRRHCU27ihbUcYi+iSc2iml5Ke1LXe1SyJCLA0+14Jh4tXJgOppA==
-
-"@hapi/hoek@8.x.x", "@hapi/hoek@^8.3.0":
-  version "8.5.1"
-  resolved "https://registry.yarnpkg.com/@hapi/hoek/-/hoek-8.5.1.tgz#fde96064ca446dec8c55a8c2f130957b070c6e06"
-  integrity sha512-yN7kbciD87WzLGc5539Tn0sApjyiGHAJgKvG9W8C7O+6c7qmoQMfVs0W4bX17eqz6C78QJqqFrtgdK5EWf6Qow==
-
-"@hapi/joi@^15.1.0":
-  version "15.1.1"
-  resolved "https://registry.yarnpkg.com/@hapi/joi/-/joi-15.1.1.tgz#c675b8a71296f02833f8d6d243b34c57b8ce19d7"
-  integrity sha512-entf8ZMOK8sc+8YfeOlM8pCfg3b5+WZIKBfUaaJT8UsjAAPjartzxIYm3TIbjvA4u+u++KbcXD38k682nVHDAQ==
-  dependencies:
-    "@hapi/address" "2.x.x"
-    "@hapi/bourne" "1.x.x"
-    "@hapi/hoek" "8.x.x"
-    "@hapi/topo" "3.x.x"
-
-"@hapi/topo@3.x.x":
-  version "3.1.6"
-  resolved "https://registry.yarnpkg.com/@hapi/topo/-/topo-3.1.6.tgz#68d935fa3eae7fdd5ab0d7f953f3205d8b2bfc29"
-  integrity sha512-tAag0jEcjwH+P2quUfipd7liWCNX2F8NvYjQp2wtInsZxnMlypdw0FtAOLxtvvkO+GSRRbmNi8m/5y42PQJYCQ==
-  dependencies:
-    "@hapi/hoek" "^8.3.0"
-
-"@istanbuljs/load-nyc-config@^1.0.0":
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz#fd3db1d59ecf7cf121e80650bb86712f9b55eced"
-  integrity sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==
-  dependencies:
-    camelcase "^5.3.1"
-    find-up "^4.1.0"
-    get-package-type "^0.1.0"
-    js-yaml "^3.13.1"
-    resolve-from "^5.0.0"
-
-"@istanbuljs/schema@^0.1.2":
-  version "0.1.3"
-  resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98"
-  integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==
-
-"@jest/console@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-26.6.2.tgz#4e04bc464014358b03ab4937805ee36a0aeb98f2"
-  integrity sha512-IY1R2i2aLsLr7Id3S6p2BA82GNWryt4oSvEXLAKc+L2zdi89dSkE8xC1C+0kpATG4JhBJREnQOH7/zmccM2B0g==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    jest-message-util "^26.6.2"
-    jest-util "^26.6.2"
-    slash "^3.0.0"
-
-"@jest/core@^26.6.0", "@jest/core@^26.6.3":
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/@jest/core/-/core-26.6.3.tgz#7639fcb3833d748a4656ada54bde193051e45fad"
-  integrity sha512-xvV1kKbhfUqFVuZ8Cyo+JPpipAHHAV3kcDBftiduK8EICXmTFddryy3P7NfZt8Pv37rA9nEJBKCCkglCPt/Xjw==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/reporters" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    ansi-escapes "^4.2.1"
-    chalk "^4.0.0"
-    exit "^0.1.2"
-    graceful-fs "^4.2.4"
-    jest-changed-files "^26.6.2"
-    jest-config "^26.6.3"
-    jest-haste-map "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-resolve-dependencies "^26.6.3"
-    jest-runner "^26.6.3"
-    jest-runtime "^26.6.3"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    jest-watcher "^26.6.2"
-    micromatch "^4.0.2"
-    p-each-series "^2.1.0"
-    rimraf "^3.0.0"
-    slash "^3.0.0"
-    strip-ansi "^6.0.0"
-
-"@jest/environment@^26.6.0", "@jest/environment@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-26.6.2.tgz#ba364cc72e221e79cc8f0a99555bf5d7577cf92c"
-  integrity sha512-nFy+fHl28zUrRsCeMB61VDThV1pVTtlEokBRgqPrcT1JNq4yRNIyTHfyht6PqtUvY9IsuLGTrbG8kPXjSZIZwA==
-  dependencies:
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    jest-mock "^26.6.2"
-
-"@jest/fake-timers@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-26.6.2.tgz#459c329bcf70cee4af4d7e3f3e67848123535aad"
-  integrity sha512-14Uleatt7jdzefLPYM3KLcnUl1ZNikaKq34enpb5XG9i81JpppDb5muZvonvKyrl7ftEHkKS5L5/eB/kxJ+bvA==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    "@sinonjs/fake-timers" "^6.0.1"
-    "@types/node" "*"
-    jest-message-util "^26.6.2"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
-
-"@jest/globals@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-26.6.2.tgz#5b613b78a1aa2655ae908eba638cc96a20df720a"
-  integrity sha512-85Ltnm7HlB/KesBUuALwQ68YTU72w9H2xW9FjZ1eL1U3lhtefjjl5c2MiUbpXt/i6LaPRvoOFJ22yCBSfQ0JIA==
-  dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    expect "^26.6.2"
-
-"@jest/reporters@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-26.6.2.tgz#1f518b99637a5f18307bd3ecf9275f6882a667f6"
-  integrity sha512-h2bW53APG4HvkOnVMo8q3QXa6pcaNt1HkwVsOPMBV6LD/q9oSpxNSYZQYkAnjdMjrJ86UuYeLo+aEZClV6opnw==
-  dependencies:
-    "@bcoe/v8-coverage" "^0.2.3"
-    "@jest/console" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    chalk "^4.0.0"
-    collect-v8-coverage "^1.0.0"
-    exit "^0.1.2"
-    glob "^7.1.2"
-    graceful-fs "^4.2.4"
-    istanbul-lib-coverage "^3.0.0"
-    istanbul-lib-instrument "^4.0.3"
-    istanbul-lib-report "^3.0.0"
-    istanbul-lib-source-maps "^4.0.0"
-    istanbul-reports "^3.0.2"
-    jest-haste-map "^26.6.2"
-    jest-resolve "^26.6.2"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
-    slash "^3.0.0"
-    source-map "^0.6.0"
-    string-length "^4.0.1"
-    terminal-link "^2.0.0"
-    v8-to-istanbul "^7.0.0"
-  optionalDependencies:
-    node-notifier "^8.0.0"
-
-"@jest/source-map@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-26.6.2.tgz#29af5e1e2e324cafccc936f218309f54ab69d535"
-  integrity sha512-YwYcCwAnNmOVsZ8mr3GfnzdXDAl4LaenZP5z+G0c8bzC9/dugL8zRmxZzdoTl4IaS3CryS1uWnROLPFmb6lVvA==
-  dependencies:
-    callsites "^3.0.0"
-    graceful-fs "^4.2.4"
-    source-map "^0.6.0"
-
-"@jest/test-result@^26.6.0", "@jest/test-result@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-26.6.2.tgz#55da58b62df134576cc95476efa5f7949e3f5f18"
-  integrity sha512-5O7H5c/7YlojphYNrK02LlDIV2GNPYisKwHm2QTKjNZeEzezCbwYs9swJySv2UfPMyZ0VdsmMv7jIlD/IKYQpQ==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    collect-v8-coverage "^1.0.0"
-
-"@jest/test-sequencer@^26.6.3":
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-26.6.3.tgz#98e8a45100863886d074205e8ffdc5a7eb582b17"
-  integrity sha512-YHlVIjP5nfEyjlrSr8t/YdNfU/1XEt7c5b4OxcXCjyRhjzLYu/rO69/WHPuYcbCWkz8kAeZVZp2N2+IOLLEPGw==
-  dependencies:
-    "@jest/test-result" "^26.6.2"
-    graceful-fs "^4.2.4"
-    jest-haste-map "^26.6.2"
-    jest-runner "^26.6.3"
-    jest-runtime "^26.6.3"
-
-"@jest/transform@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-26.6.2.tgz#5ac57c5fa1ad17b2aae83e73e45813894dcf2e4b"
-  integrity sha512-E9JjhUgNzvuQ+vVAL21vlyfy12gP0GhazGgJC4h6qUt1jSdUXGWJ1wfu/X7Sd8etSgxV4ovT1pb9v5D6QW4XgA==
-  dependencies:
-    "@babel/core" "^7.1.0"
-    "@jest/types" "^26.6.2"
-    babel-plugin-istanbul "^6.0.0"
-    chalk "^4.0.0"
-    convert-source-map "^1.4.0"
-    fast-json-stable-stringify "^2.0.0"
-    graceful-fs "^4.2.4"
-    jest-haste-map "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-util "^26.6.2"
-    micromatch "^4.0.2"
-    pirates "^4.0.1"
-    slash "^3.0.0"
-    source-map "^0.6.1"
-    write-file-atomic "^3.0.0"
-
-"@jest/types@^26.6.0", "@jest/types@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-26.6.2.tgz#bef5a532030e1d88a2f5a6d933f84e97226ed48e"
-  integrity sha512-fC6QCp7Sc5sX6g8Tvbmj4XUTbyrik0akgRy03yjXbQaBWWNWGE7SGtJk98m0N8nzegD/7SggrUlivxo5ax4KWQ==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    "@types/istanbul-reports" "^3.0.0"
-    "@types/node" "*"
-    "@types/yargs" "^15.0.0"
-    chalk "^4.0.0"
-
-"@nodelib/fs.scandir@2.1.4":
-  version "2.1.4"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz#d4b3549a5db5de2683e0c1071ab4f140904bbf69"
-  integrity sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==
-  dependencies:
-    "@nodelib/fs.stat" "2.0.4"
-    run-parallel "^1.1.9"
-
-"@nodelib/fs.stat@2.0.4", "@nodelib/fs.stat@^2.0.2":
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz#a3f2dd61bab43b8db8fa108a121cfffe4c676655"
-  integrity sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==
-
-"@nodelib/fs.walk@^1.2.3":
-  version "1.2.6"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz#cce9396b30aa5afe9e3756608f5831adcb53d063"
-  integrity sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==
-  dependencies:
-    "@nodelib/fs.scandir" "2.1.4"
-    fastq "^1.6.0"
-
-"@npmcli/move-file@^1.0.1":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-1.1.2.tgz#1a82c3e372f7cae9253eb66d72543d6b8685c674"
-  integrity sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg==
-  dependencies:
-    mkdirp "^1.0.4"
-    rimraf "^3.0.2"
-
-"@pmmmwh/react-refresh-webpack-plugin@0.4.3":
-  version "0.4.3"
-  resolved "https://registry.yarnpkg.com/@pmmmwh/react-refresh-webpack-plugin/-/react-refresh-webpack-plugin-0.4.3.tgz#1eec460596d200c0236bf195b078a5d1df89b766"
-  integrity sha512-br5Qwvh8D2OQqSXpd1g/xqXKnK0r+Jz6qVKBbWmpUcrbGOxUrf39V5oZ1876084CGn18uMdR5uvPqBv9UqtBjQ==
-  dependencies:
-    ansi-html "^0.0.7"
-    error-stack-parser "^2.0.6"
-    html-entities "^1.2.1"
-    native-url "^0.2.6"
-    schema-utils "^2.6.5"
-    source-map "^0.7.3"
-
-"@popperjs/core@2.4.4":
-  version "2.4.4"
-  resolved "https://registry.yarnpkg.com/@popperjs/core/-/core-2.4.4.tgz#11d5db19bd178936ec89cd84519c4de439574398"
-  integrity sha512-1oO6+dN5kdIA3sKPZhRGJTfGVP4SWV6KqlMOwry4J3HfyD68sl/3KmG7DeYUzvN+RbhXDnv/D8vNNB8168tAMg==
-
-"@reach/alert@0.13.0":
-  version "0.13.0"
-  resolved "https://registry.yarnpkg.com/@reach/alert/-/alert-0.13.0.tgz#1f67b389f49af61286ef03a84f5a57bd3503dadf"
-  integrity sha512-5lpgRnlQ0JHBsRTPfKjD9aFPDZuLcaxTgD5PXdSLb+1CU8WgNbcy+7qSjqnu1uzWS2pQenIEBViV5wGpt63ADw==
-  dependencies:
-    "@reach/utils" "0.13.0"
-    "@reach/visually-hidden" "0.13.0"
-    prop-types "^15.7.2"
-    tslib "^2.0.0"
-
-"@reach/utils@0.13.0":
-  version "0.13.0"
-  resolved "https://registry.yarnpkg.com/@reach/utils/-/utils-0.13.0.tgz#2da775a910d8894bb34e1e94fe95842674f71844"
-  integrity sha512-dypxuyA1Qy3LHxzzyS7jFGPgCCR04b8UEn+Tv/aj6y9V578dULQqkcCyobrdEa+OI8lxH7dFFHa+jH8M/noBrQ==
-  dependencies:
-    "@types/warning" "^3.0.0"
-    tslib "^2.0.0"
-    warning "^4.0.3"
-
-"@reach/visually-hidden@0.13.0":
-  version "0.13.0"
-  resolved "https://registry.yarnpkg.com/@reach/visually-hidden/-/visually-hidden-0.13.0.tgz#cace36d9bb80ffb797374fcaea989391b881038f"
-  integrity sha512-LF11WL9/495Q3d86xNy0VO6ylPI6SqF2xZGg9jpZSXLbFKpQ5Bf0qC7DOJfSf+/yb9WgPgB4m+a48Fz8AO6oZA==
-  dependencies:
-    tslib "^2.0.0"
-
-"@rollup/plugin-node-resolve@^7.1.1":
-  version "7.1.3"
-  resolved "https://registry.yarnpkg.com/@rollup/plugin-node-resolve/-/plugin-node-resolve-7.1.3.tgz#80de384edfbd7bfc9101164910f86078151a3eca"
-  integrity sha512-RxtSL3XmdTAE2byxekYLnx+98kEUOrPHF/KRVjLH+DEIHy6kjIw7YINQzn+NXiH/NTrQLAwYs0GWB+csWygA9Q==
-  dependencies:
-    "@rollup/pluginutils" "^3.0.8"
-    "@types/resolve" "0.0.8"
-    builtin-modules "^3.1.0"
-    is-module "^1.0.0"
-    resolve "^1.14.2"
-
-"@rollup/plugin-replace@^2.3.1":
-  version "2.4.1"
-  resolved "https://registry.yarnpkg.com/@rollup/plugin-replace/-/plugin-replace-2.4.1.tgz#c411b5ab72809fb1bfc8b487d8d02eef661460d3"
-  integrity sha512-XwC1oK5rrtRJ0tn1ioLHS6OV5JTluJF7QE1J/q1hN3bquwjnVxjtMyY9iCnoyH9DQbf92CxajB3o98wZbP3oAQ==
-  dependencies:
-    "@rollup/pluginutils" "^3.1.0"
-    magic-string "^0.25.7"
-
-"@rollup/pluginutils@^3.0.8", "@rollup/pluginutils@^3.1.0":
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/@rollup/pluginutils/-/pluginutils-3.1.0.tgz#706b4524ee6dc8b103b3c995533e5ad680c02b9b"
-  integrity sha512-GksZ6pr6TpIjHm8h9lSQ8pi8BE9VeubNT0OMJ3B5uZJ8pz73NPiqOtCog/x2/QzM1ENChPKxMDhiQuRHsqc+lg==
-  dependencies:
-    "@types/estree" "0.0.39"
-    estree-walker "^1.0.1"
-    picomatch "^2.2.2"
-
-"@sinonjs/commons@^1.7.0":
-  version "1.8.2"
-  resolved "https://registry.yarnpkg.com/@sinonjs/commons/-/commons-1.8.2.tgz#858f5c4b48d80778fde4b9d541f27edc0d56488b"
-  integrity sha512-sruwd86RJHdsVf/AtBoijDmUqJp3B6hF/DGC23C+JaegnDHaZyewCjoVGTdg3J0uz3Zs7NnIT05OBOmML72lQw==
-  dependencies:
-    type-detect "4.0.8"
-
-"@sinonjs/fake-timers@^6.0.1":
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-6.0.1.tgz#293674fccb3262ac782c7aadfdeca86b10c75c40"
-  integrity sha512-MZPUxrmFubI36XS1DI3qmI0YdN1gks62JtFZvxR67ljjSNCeK6U08Zx4msEWOXuofgqUt6zPHSi1H9fbjR/NRA==
-  dependencies:
-    "@sinonjs/commons" "^1.7.0"
-
-"@surma/rollup-plugin-off-main-thread@^1.1.1":
-  version "1.4.2"
-  resolved "https://registry.yarnpkg.com/@surma/rollup-plugin-off-main-thread/-/rollup-plugin-off-main-thread-1.4.2.tgz#e6786b6af5799f82f7ab3a82e53f6182d2b91a58"
-  integrity sha512-yBMPqmd1yEJo/280PAMkychuaALyQ9Lkb5q1ck3mjJrFuEobIfhnQ4J3mbvBoISmR3SWMWV+cGB/I0lCQee79A==
-  dependencies:
-    ejs "^2.6.1"
-    magic-string "^0.25.0"
-
-"@svgr/babel-plugin-add-jsx-attribute@^5.4.0":
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-add-jsx-attribute/-/babel-plugin-add-jsx-attribute-5.4.0.tgz#81ef61947bb268eb9d50523446f9c638fb355906"
-  integrity sha512-ZFf2gs/8/6B8PnSofI0inYXr2SDNTDScPXhN7k5EqD4aZ3gi6u+rbmZHVB8IM3wDyx8ntKACZbtXSm7oZGRqVg==
-
-"@svgr/babel-plugin-remove-jsx-attribute@^5.4.0":
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-remove-jsx-attribute/-/babel-plugin-remove-jsx-attribute-5.4.0.tgz#6b2c770c95c874654fd5e1d5ef475b78a0a962ef"
-  integrity sha512-yaS4o2PgUtwLFGTKbsiAy6D0o3ugcUhWK0Z45umJ66EPWunAz9fuFw2gJuje6wqQvQWOTJvIahUwndOXb7QCPg==
-
-"@svgr/babel-plugin-remove-jsx-empty-expression@^5.0.1":
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-remove-jsx-empty-expression/-/babel-plugin-remove-jsx-empty-expression-5.0.1.tgz#25621a8915ed7ad70da6cea3d0a6dbc2ea933efd"
-  integrity sha512-LA72+88A11ND/yFIMzyuLRSMJ+tRKeYKeQ+mR3DcAZ5I4h5CPWN9AHyUzJbWSYp/u2u0xhmgOe0+E41+GjEueA==
-
-"@svgr/babel-plugin-replace-jsx-attribute-value@^5.0.1":
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-replace-jsx-attribute-value/-/babel-plugin-replace-jsx-attribute-value-5.0.1.tgz#0b221fc57f9fcd10e91fe219e2cd0dd03145a897"
-  integrity sha512-PoiE6ZD2Eiy5mK+fjHqwGOS+IXX0wq/YDtNyIgOrc6ejFnxN4b13pRpiIPbtPwHEc+NT2KCjteAcq33/F1Y9KQ==
-
-"@svgr/babel-plugin-svg-dynamic-title@^5.4.0":
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-svg-dynamic-title/-/babel-plugin-svg-dynamic-title-5.4.0.tgz#139b546dd0c3186b6e5db4fefc26cb0baea729d7"
-  integrity sha512-zSOZH8PdZOpuG1ZVx/cLVePB2ibo3WPpqo7gFIjLV9a0QsuQAzJiwwqmuEdTaW2pegyBE17Uu15mOgOcgabQZg==
-
-"@svgr/babel-plugin-svg-em-dimensions@^5.4.0":
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-svg-em-dimensions/-/babel-plugin-svg-em-dimensions-5.4.0.tgz#6543f69526632a133ce5cabab965deeaea2234a0"
-  integrity sha512-cPzDbDA5oT/sPXDCUYoVXEmm3VIoAWAPT6mSPTJNbQaBNUuEKVKyGH93oDY4e42PYHRW67N5alJx/eEol20abw==
-
-"@svgr/babel-plugin-transform-react-native-svg@^5.4.0":
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-transform-react-native-svg/-/babel-plugin-transform-react-native-svg-5.4.0.tgz#00bf9a7a73f1cad3948cdab1f8dfb774750f8c80"
-  integrity sha512-3eYP/SaopZ41GHwXma7Rmxcv9uRslRDTY1estspeB1w1ueZWd/tPlMfEOoccYpEMZU3jD4OU7YitnXcF5hLW2Q==
-
-"@svgr/babel-plugin-transform-svg-component@^5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-transform-svg-component/-/babel-plugin-transform-svg-component-5.5.0.tgz#583a5e2a193e214da2f3afeb0b9e8d3250126b4a"
-  integrity sha512-q4jSH1UUvbrsOtlo/tKcgSeiCHRSBdXoIoqX1pgcKK/aU3JD27wmMKwGtpB8qRYUYoyXvfGxUVKchLuR5pB3rQ==
-
-"@svgr/babel-preset@^5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/babel-preset/-/babel-preset-5.5.0.tgz#8af54f3e0a8add7b1e2b0fcd5a882c55393df327"
-  integrity sha512-4FiXBjvQ+z2j7yASeGPEi8VD/5rrGQk4Xrq3EdJmoZgz/tpqChpo5hgXDvmEauwtvOc52q8ghhZK4Oy7qph4ig==
-  dependencies:
-    "@svgr/babel-plugin-add-jsx-attribute" "^5.4.0"
-    "@svgr/babel-plugin-remove-jsx-attribute" "^5.4.0"
-    "@svgr/babel-plugin-remove-jsx-empty-expression" "^5.0.1"
-    "@svgr/babel-plugin-replace-jsx-attribute-value" "^5.0.1"
-    "@svgr/babel-plugin-svg-dynamic-title" "^5.4.0"
-    "@svgr/babel-plugin-svg-em-dimensions" "^5.4.0"
-    "@svgr/babel-plugin-transform-react-native-svg" "^5.4.0"
-    "@svgr/babel-plugin-transform-svg-component" "^5.5.0"
-
-"@svgr/core@^5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/core/-/core-5.5.0.tgz#82e826b8715d71083120fe8f2492ec7d7874a579"
-  integrity sha512-q52VOcsJPvV3jO1wkPtzTuKlvX7Y3xIcWRpCMtBF3MrteZJtBfQw/+u0B1BHy5ColpQc1/YVTrPEtSYIMNZlrQ==
-  dependencies:
-    "@svgr/plugin-jsx" "^5.5.0"
-    camelcase "^6.2.0"
-    cosmiconfig "^7.0.0"
-
-"@svgr/hast-util-to-babel-ast@^5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/hast-util-to-babel-ast/-/hast-util-to-babel-ast-5.5.0.tgz#5ee52a9c2533f73e63f8f22b779f93cd432a5461"
-  integrity sha512-cAaR/CAiZRB8GP32N+1jocovUtvlj0+e65TB50/6Lcime+EA49m/8l+P2ko+XPJ4dw3xaPS3jOL4F2X4KWxoeQ==
-  dependencies:
-    "@babel/types" "^7.12.6"
-
-"@svgr/plugin-jsx@^5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/plugin-jsx/-/plugin-jsx-5.5.0.tgz#1aa8cd798a1db7173ac043466d7b52236b369000"
-  integrity sha512-V/wVh33j12hGh05IDg8GpIUXbjAPnTdPTKuP4VNLggnwaHMPNQNae2pRnyTAILWCQdz5GyMqtO488g7CKM8CBA==
-  dependencies:
-    "@babel/core" "^7.12.3"
-    "@svgr/babel-preset" "^5.5.0"
-    "@svgr/hast-util-to-babel-ast" "^5.5.0"
-    svg-parser "^2.0.2"
-
-"@svgr/plugin-svgo@^5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/plugin-svgo/-/plugin-svgo-5.5.0.tgz#02da55d85320549324e201c7b2e53bf431fcc246"
-  integrity sha512-r5swKk46GuQl4RrVejVwpeeJaydoxkdwkM1mBKOgJLBUJPGaLci6ylg/IjhrRsREKDkr4kbMWdgOtbXEh0fyLQ==
-  dependencies:
-    cosmiconfig "^7.0.0"
-    deepmerge "^4.2.2"
-    svgo "^1.2.2"
-
-"@svgr/webpack@5.5.0":
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/@svgr/webpack/-/webpack-5.5.0.tgz#aae858ee579f5fa8ce6c3166ef56c6a1b381b640"
-  integrity sha512-DOBOK255wfQxguUta2INKkzPj6AIS6iafZYiYmHn6W3pHlycSRRlvWKCfLDG10fXfLWqE3DJHgRUOyJYmARa7g==
-  dependencies:
-    "@babel/core" "^7.12.3"
-    "@babel/plugin-transform-react-constant-elements" "^7.12.1"
-    "@babel/preset-env" "^7.12.1"
-    "@babel/preset-react" "^7.12.5"
-    "@svgr/core" "^5.5.0"
-    "@svgr/plugin-jsx" "^5.5.0"
-    "@svgr/plugin-svgo" "^5.5.0"
-    loader-utils "^2.0.0"
-
-"@testing-library/dom@^7.28.1":
-  version "7.29.6"
-  resolved "https://registry.yarnpkg.com/@testing-library/dom/-/dom-7.29.6.tgz#eb37844fb431186db7960a7ff6749ea65a19617c"
-  integrity sha512-vzTsAXa439ptdvav/4lsKRcGpAQX7b6wBIqia7+iNzqGJ5zjswApxA6jDAsexrc6ue9krWcbh8o+LYkBXW+GCQ==
-  dependencies:
-    "@babel/code-frame" "^7.10.4"
-    "@babel/runtime" "^7.12.5"
-    "@types/aria-query" "^4.2.0"
-    aria-query "^4.2.2"
-    chalk "^4.1.0"
-    dom-accessibility-api "^0.5.4"
-    lz-string "^1.4.4"
-    pretty-format "^26.6.2"
-
-"@testing-library/jest-dom@^5.11.4":
-  version "5.11.9"
-  resolved "https://registry.yarnpkg.com/@testing-library/jest-dom/-/jest-dom-5.11.9.tgz#e6b3cd687021f89f261bd53cbe367041fbd3e975"
-  integrity sha512-Mn2gnA9d1wStlAIT2NU8J15LNob0YFBVjs2aEQ3j8rsfRQo+lAs7/ui1i2TGaJjapLmuNPLTsrm+nPjmZDwpcQ==
-  dependencies:
-    "@babel/runtime" "^7.9.2"
-    "@types/testing-library__jest-dom" "^5.9.1"
-    aria-query "^4.2.2"
-    chalk "^3.0.0"
-    css "^3.0.0"
-    css.escape "^1.5.1"
-    lodash "^4.17.15"
-    redent "^3.0.0"
-
-"@testing-library/react@^11.1.0":
-  version "11.2.5"
-  resolved "https://registry.yarnpkg.com/@testing-library/react/-/react-11.2.5.tgz#ae1c36a66c7790ddb6662c416c27863d87818eb9"
-  integrity sha512-yEx7oIa/UWLe2F2dqK0FtMF9sJWNXD+2PPtp39BvE0Kh9MJ9Kl0HrZAgEuhUJR+Lx8Di6Xz+rKwSdEPY2UV8ZQ==
-  dependencies:
-    "@babel/runtime" "^7.12.5"
-    "@testing-library/dom" "^7.28.1"
-
-"@testing-library/user-event@^12.1.10":
-  version "12.7.3"
-  resolved "https://registry.yarnpkg.com/@testing-library/user-event/-/user-event-12.7.3.tgz#ef674ccb91794e52123b3532c336485d16f453b3"
-  integrity sha512-IdSHkWfbeSSJRFlldvHDWfVX0U18TbXIvLSGII+JbqkJrsflFr4OWlQIua0TvcVVJNna3BNrNvRSvpQ0yvSXlA==
-  dependencies:
-    "@babel/runtime" "^7.12.5"
-
-"@types/anymatch@*":
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/@types/anymatch/-/anymatch-1.3.1.tgz#336badc1beecb9dacc38bea2cf32adf627a8421a"
-  integrity sha512-/+CRPXpBDpo2RK9C68N3b2cOvO0Cf5B9aPijHsoDQTHivnGSObdOF2BRQOYjojWTDy6nQvMjmqRXIxH55VjxxA==
-
-"@types/aria-query@^4.2.0":
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/@types/aria-query/-/aria-query-4.2.1.tgz#78b5433344e2f92e8b306c06a5622c50c245bf6b"
-  integrity sha512-S6oPal772qJZHoRZLFc/XoZW2gFvwXusYUmXPXkgxJLuEk2vOt7jc4Yo6z/vtI0EBkbPBVrJJ0B+prLIKiWqHg==
-
-"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.7":
-  version "7.1.12"
-  resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.12.tgz#4d8e9e51eb265552a7e4f1ff2219ab6133bdfb2d"
-  integrity sha512-wMTHiiTiBAAPebqaPiPDLFA4LYPKr6Ph0Xq/6rq1Ur3v66HXyG+clfR9CNETkD7MQS8ZHvpQOtA53DLws5WAEQ==
-  dependencies:
-    "@babel/parser" "^7.1.0"
-    "@babel/types" "^7.0.0"
-    "@types/babel__generator" "*"
-    "@types/babel__template" "*"
-    "@types/babel__traverse" "*"
-
-"@types/babel__generator@*":
-  version "7.6.2"
-  resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.2.tgz#f3d71178e187858f7c45e30380f8f1b7415a12d8"
-  integrity sha512-MdSJnBjl+bdwkLskZ3NGFp9YcXGx5ggLpQQPqtgakVhsWK0hTtNYhjpZLlWQTviGTvF8at+Bvli3jV7faPdgeQ==
-  dependencies:
-    "@babel/types" "^7.0.0"
-
-"@types/babel__template@*":
-  version "7.4.0"
-  resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.4.0.tgz#0c888dd70b3ee9eebb6e4f200e809da0076262be"
-  integrity sha512-NTPErx4/FiPCGScH7foPyr+/1Dkzkni+rHiYHHoTjvwou7AQzJkNeD60A9CXRy+ZEN2B1bggmkTMCDb+Mv5k+A==
-  dependencies:
-    "@babel/parser" "^7.1.0"
-    "@babel/types" "^7.0.0"
-
-"@types/babel__traverse@*", "@types/babel__traverse@^7.0.4", "@types/babel__traverse@^7.0.6":
-  version "7.11.0"
-  resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.11.0.tgz#b9a1efa635201ba9bc850323a8793ee2d36c04a0"
-  integrity sha512-kSjgDMZONiIfSH1Nxcr5JIRMwUetDki63FSQfpTCz8ogF3Ulqm8+mr5f78dUYs6vMiB6gBusQqfQmBvHZj/lwg==
-  dependencies:
-    "@babel/types" "^7.3.0"
-
-"@types/eslint@^7.2.6":
-  version "7.2.6"
-  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.6.tgz#5e9aff555a975596c03a98b59ecd103decc70c3c"
-  integrity sha512-I+1sYH+NPQ3/tVqCeUSBwTE/0heyvtXqpIopUUArlBm0Kpocb8FbMa3AZ/ASKIFpN3rnEx932TTXDbt9OXsNDw==
-  dependencies:
-    "@types/estree" "*"
-    "@types/json-schema" "*"
-
-"@types/estree@*":
-  version "0.0.46"
-  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.46.tgz#0fb6bfbbeabd7a30880504993369c4bf1deab1fe"
-  integrity sha512-laIjwTQaD+5DukBZaygQ79K1Z0jb1bPEMRrkXSLjtCcZm+abyp5YbrqpSLzD42FwWW6gK/aS4NYpJ804nG2brg==
-
-"@types/estree@0.0.39":
-  version "0.0.39"
-  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.39.tgz#e177e699ee1b8c22d23174caaa7422644389509f"
-  integrity sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw==
-
-"@types/glob@^7.1.1":
-  version "7.1.3"
-  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.3.tgz#e6ba80f36b7daad2c685acd9266382e68985c183"
-  integrity sha512-SEYeGAIQIQX8NN6LDKprLjbrd5dARM5EXsd8GI/A5l0apYI1fGMWgPHSe4ZKL4eozlAyI+doUE9XbYS4xCkQ1w==
-  dependencies:
-    "@types/minimatch" "*"
-    "@types/node" "*"
-
-"@types/graceful-fs@^4.1.2":
-  version "4.1.5"
-  resolved "https://registry.yarnpkg.com/@types/graceful-fs/-/graceful-fs-4.1.5.tgz#21ffba0d98da4350db64891f92a9e5db3cdb4e15"
-  integrity sha512-anKkLmZZ+xm4p8JWBf4hElkM4XR+EZeA2M9BAkkTldmcyDY4mbdIJnRghDJH3Ov5ooY7/UAoENtmdMSkaAd7Cw==
-  dependencies:
-    "@types/node" "*"
-
-"@types/html-minifier-terser@^5.0.0":
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/@types/html-minifier-terser/-/html-minifier-terser-5.1.1.tgz#3c9ee980f1a10d6021ae6632ca3e79ca2ec4fb50"
-  integrity sha512-giAlZwstKbmvMk1OO7WXSj4OZ0keXAcl2TQq4LWHiiPH2ByaH7WeUzng+Qej8UPxxv+8lRTuouo0iaNDBuzIBA==
-
-"@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0", "@types/istanbul-lib-coverage@^2.0.1":
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.3.tgz#4ba8ddb720221f432e443bd5f9117fd22cfd4762"
-  integrity sha512-sz7iLqvVUg1gIedBOvlkxPlc8/uVzyS5OwGz1cKjXzkl3FpL3al0crU8YGU1WoHkxn0Wxbw5tyi6hvzJKNzFsw==
-
-"@types/istanbul-lib-report@*":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#c14c24f18ea8190c118ee7562b7ff99a36552686"
-  integrity sha512-plGgXAPfVKFoYfa9NpYDAkseG+g6Jr294RqeqcqDixSbU34MZVJRi/P+7Y8GDpzkEwLaGZZOpKIEmeVZNtKsrg==
-  dependencies:
-    "@types/istanbul-lib-coverage" "*"
-
-"@types/istanbul-reports@^3.0.0":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.0.tgz#508b13aa344fa4976234e75dddcc34925737d821"
-  integrity sha512-nwKNbvnwJ2/mndE9ItP/zc2TCzw6uuodnF4EHYWD+gCQDVBuRQL5UzbZD0/ezy1iKsFU2ZQiDqg4M9dN4+wZgA==
-  dependencies:
-    "@types/istanbul-lib-report" "*"
-
-"@types/jest@*", "@types/jest@^26.0.15":
-  version "26.0.20"
-  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.20.tgz#cd2f2702ecf69e86b586e1f5223a60e454056307"
-  integrity sha512-9zi2Y+5USJRxd0FsahERhBwlcvFh6D2GLQnY2FH2BzK8J9s9omvNHIbvABwIluXa0fD8XVKMLTO0aOEuUfACAA==
-  dependencies:
-    jest-diff "^26.0.0"
-    pretty-format "^26.0.0"
-
-"@types/json-schema@*", "@types/json-schema@^7.0.3", "@types/json-schema@^7.0.5", "@types/json-schema@^7.0.6":
-  version "7.0.7"
-  resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
-  integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
-
-"@types/json5@^0.0.29":
-  version "0.0.29"
-  resolved "https://registry.yarnpkg.com/@types/json5/-/json5-0.0.29.tgz#ee28707ae94e11d2b827bcbe5270bcea7f3e71ee"
-  integrity sha1-7ihweulOEdK4J7y+UnC86n8+ce4=
-
-"@types/lodash.mergewith@4.6.6":
-  version "4.6.6"
-  resolved "https://registry.yarnpkg.com/@types/lodash.mergewith/-/lodash.mergewith-4.6.6.tgz#c4698f5b214a433ff35cb2c75ee6ec7f99d79f10"
-  integrity sha512-RY/8IaVENjG19rxTZu9Nukqh0W2UrYgmBj5sdns4hWRZaV8PqR7wIKHFKzvOTjo4zVRV7sVI+yFhAJql12Kfqg==
-  dependencies:
-    "@types/lodash" "*"
-
-"@types/lodash@*":
-  version "4.14.168"
-  resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.168.tgz#fe24632e79b7ade3f132891afff86caa5e5ce008"
-  integrity sha512-oVfRvqHV/V6D1yifJbVRU3TMp8OT6o6BG+U9MkwuJ3U8/CsDHvalRpsxBqivn71ztOFZBTfJMvETbqHiaNSj7Q==
-
-"@types/minimatch@*":
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d"
-  integrity sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA==
-
-"@types/node@*":
-  version "14.14.31"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.31.tgz#72286bd33d137aa0d152d47ec7c1762563d34055"
-  integrity sha512-vFHy/ezP5qI0rFgJ7aQnjDXwAMrG0KqqIH7tQG5PPv3BWBayOPIQNBjVc/P6hhdZfMx51REc6tfDNXHUio893g==
-
-"@types/node@^12.0.0":
-  version "12.20.4"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-12.20.4.tgz#73687043dd00fcb6962c60fbf499553a24d6bdf2"
-  integrity sha512-xRCgeE0Q4pT5UZ189TJ3SpYuX/QGl6QIAOAIeDSbAVAd2gX1NxSZup4jNVK7cxIeP8KDSbJgcckun495isP1jQ==
-
-"@types/normalize-package-data@^2.4.0":
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
-  integrity sha512-f5j5b/Gf71L+dbqxIpQ4Z2WlmI/mPJ0fOkGGmFgtb6sAu97EPczzbS3/tJKxmcYDj55OX6ssqwDAWOHIYDRDGA==
-
-"@types/object-assign@4.0.30":
-  version "4.0.30"
-  resolved "https://registry.yarnpkg.com/@types/object-assign/-/object-assign-4.0.30.tgz#8949371d5a99f4381ee0f1df0a9b7a187e07e652"
-  integrity sha1-iUk3HVqZ9Dge4PHfCpt6GH4H5lI=
-
-"@types/parse-json@^4.0.0":
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0"
-  integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==
-
-"@types/prettier@^2.0.0":
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.2.1.tgz#374e31645d58cb18a07b3ecd8e9dede4deb2cccd"
-  integrity sha512-DxZZbyMAM9GWEzXL+BMZROWz9oo6A9EilwwOMET2UVu2uZTqMWS5S69KVtuVKaRjCUpcrOXRalet86/OpG4kqw==
-
-"@types/prop-types@*":
-  version "15.7.3"
-  resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.3.tgz#2ab0d5da2e5815f94b0b9d4b95d1e5f243ab2ca7"
-  integrity sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==
-
-"@types/q@^1.5.1":
-  version "1.5.4"
-  resolved "https://registry.yarnpkg.com/@types/q/-/q-1.5.4.tgz#15925414e0ad2cd765bfef58842f7e26a7accb24"
-  integrity sha512-1HcDas8SEj4z1Wc696tH56G8OlRaH/sqZOynNNB+HF0WOeXPaxTtbYzJY2oEfiUxjSKjhCKr+MvR7dCHcEelug==
-
-"@types/react-dom@^17.0.0":
-  version "17.0.1"
-  resolved "https://registry.yarnpkg.com/@types/react-dom/-/react-dom-17.0.1.tgz#d92d77d020bfb083e07cc8e0ac9f933599a4d56a"
-  integrity sha512-yIVyopxQb8IDZ7SOHeTovurFq+fXiPICa+GV3gp0Xedsl+MwQlMLKmvrnEjFbQxjliH5YVAEWFh975eVNmKj7Q==
-  dependencies:
-    "@types/react" "*"
-
-"@types/react-table@^7.0.28":
-  version "7.0.28"
-  resolved "https://registry.yarnpkg.com/@types/react-table/-/react-table-7.0.28.tgz#763383c3e7a285892ee64f311ee97a9c254b2bb0"
-  integrity sha512-crPm70S2KYGj3HJ2zCoeT0t8tdIvKDKCClMd1up3Gi/EDiTZraj3JFUsEL3+oXGSyv+n0EGGAf9a+0XsmdGpXA==
-  dependencies:
-    "@types/react" "*"
-
-"@types/react-timeago@^4.1.2":
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/@types/react-timeago/-/react-timeago-4.1.2.tgz#fc365ac4483888e9b47267259416be2fd5cf765f"
-  integrity sha512-gkhU3rH7aZgeRybbm9ie9wHOM9i1I5YhUoto/uqY/DAbeRZuLU8ugl6E97jp65XCl9QTij32Vs7BAX2E/MqOAw==
-  dependencies:
-    "@types/react" "*"
-
-"@types/react@*", "@types/react@^17.0.0":
-  version "17.0.2"
-  resolved "https://registry.yarnpkg.com/@types/react/-/react-17.0.2.tgz#3de24c4efef902dd9795a49c75f760cbe4f7a5a8"
-  integrity sha512-Xt40xQsrkdvjn1EyWe1Bc0dJLcil/9x2vAuW7ya+PuQip4UYUaXyhzWmAbwRsdMgwOFHpfp7/FFZebDU6Y8VHA==
-  dependencies:
-    "@types/prop-types" "*"
-    csstype "^3.0.2"
-
-"@types/resolve@0.0.8":
-  version "0.0.8"
-  resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-0.0.8.tgz#f26074d238e02659e323ce1a13d041eee280e194"
-  integrity sha512-auApPaJf3NPfe18hSoJkp8EbZzer2ISk7o8mCC3M9he/a04+gbMF97NkpD2S8riMGvm4BMRI59/SZQSaLTKpsQ==
-  dependencies:
-    "@types/node" "*"
-
-"@types/source-list-map@*":
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/@types/source-list-map/-/source-list-map-0.1.2.tgz#0078836063ffaf17412349bba364087e0ac02ec9"
-  integrity sha512-K5K+yml8LTo9bWJI/rECfIPrGgxdpeNbj+d53lwN4QjW1MCwlkhUms+gtdzigTeUyBr09+u8BwOIY3MXvHdcsA==
-
-"@types/stack-utils@^2.0.0":
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
-  integrity sha512-RJJrrySY7A8havqpGObOB4W92QXKJo63/jFLLgpvOtsGUqbQZ9Sbgl35KMm1DjC6j7AvmmU2bIno+3IyEaemaw==
-
-"@types/tapable@*", "@types/tapable@^1.0.5":
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/@types/tapable/-/tapable-1.0.6.tgz#a9ca4b70a18b270ccb2bc0aaafefd1d486b7ea74"
-  integrity sha512-W+bw9ds02rAQaMvaLYxAbJ6cvguW/iJXNT6lTssS1ps6QdrMKttqEAMEG/b5CR8TZl3/L7/lH0ZV5nNR1LXikA==
-
-"@types/testing-library__jest-dom@^5.9.1":
-  version "5.9.5"
-  resolved "https://registry.yarnpkg.com/@types/testing-library__jest-dom/-/testing-library__jest-dom-5.9.5.tgz#5bf25c91ad2d7b38f264b12275e5c92a66d849b0"
-  integrity sha512-ggn3ws+yRbOHog9GxnXiEZ/35Mow6YtPZpd7Z5mKDeZS/o7zx3yAle0ov/wjhVB5QT4N2Dt+GNoGCdqkBGCajQ==
-  dependencies:
-    "@types/jest" "*"
-
-"@types/tinycolor2@1.4.2":
-  version "1.4.2"
-  resolved "https://registry.yarnpkg.com/@types/tinycolor2/-/tinycolor2-1.4.2.tgz#721ca5c5d1a2988b4a886e35c2ffc5735b6afbdf"
-  integrity sha512-PeHg/AtdW6aaIO2a+98Xj7rWY4KC1E6yOy7AFknJQ7VXUGNrMlyxDFxJo7HqLtjQms/ZhhQX52mLVW/EX3JGOw==
-
-"@types/uglify-js@*":
-  version "3.12.0"
-  resolved "https://registry.yarnpkg.com/@types/uglify-js/-/uglify-js-3.12.0.tgz#2bb061c269441620d46b946350c8f16d52ef37c5"
-  integrity sha512-sYAF+CF9XZ5cvEBkI7RtrG9g2GtMBkviTnBxYYyq+8BWvO4QtXfwwR6a2LFwCi4evMKZfpv6U43ViYvv17Wz3Q==
-  dependencies:
-    source-map "^0.6.1"
-
-"@types/warning@^3.0.0":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/warning/-/warning-3.0.0.tgz#0d2501268ad8f9962b740d387c4654f5f8e23e52"
-  integrity sha1-DSUBJorY+ZYrdA04fEZU9fjiPlI=
-
-"@types/webpack-sources@*":
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/@types/webpack-sources/-/webpack-sources-2.1.0.tgz#8882b0bd62d1e0ce62f183d0d01b72e6e82e8c10"
-  integrity sha512-LXn/oYIpBeucgP1EIJbKQ2/4ZmpvRl+dlrFdX7+94SKRUV3Evy3FsfMZY318vGhkWUS5MPhtOM3w1/hCOAOXcg==
-  dependencies:
-    "@types/node" "*"
-    "@types/source-list-map" "*"
-    source-map "^0.7.3"
-
-"@types/webpack@^4.41.8":
-  version "4.41.26"
-  resolved "https://registry.yarnpkg.com/@types/webpack/-/webpack-4.41.26.tgz#27a30d7d531e16489f9c7607c747be6bc1a459ef"
-  integrity sha512-7ZyTfxjCRwexh+EJFwRUM+CDB2XvgHl4vfuqf1ZKrgGvcS5BrNvPQqJh3tsZ0P6h6Aa1qClVHaJZszLPzpqHeA==
-  dependencies:
-    "@types/anymatch" "*"
-    "@types/node" "*"
-    "@types/tapable" "*"
-    "@types/uglify-js" "*"
-    "@types/webpack-sources" "*"
-    source-map "^0.6.0"
-
-"@types/yargs-parser@*":
-  version "20.2.0"
-  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.0.tgz#dd3e6699ba3237f0348cd085e4698780204842f9"
-  integrity sha512-37RSHht+gzzgYeobbG+KWryeAW8J33Nhr69cjTqSYymXVZEN9NbRYWoYlRtDhHKPVT1FyNKwaTPC1NynKZpzRA==
-
-"@types/yargs@^15.0.0":
-  version "15.0.13"
-  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.13.tgz#34f7fec8b389d7f3c1fd08026a5763e072d3c6dc"
-  integrity sha512-kQ5JNTrbDv3Rp5X2n/iUu37IJBDU2gsZ5R/g1/KHOOEc5IKfUFjXT6DENPGduh08I/pamwtEq4oul7gUqKTQDQ==
-  dependencies:
-    "@types/yargs-parser" "*"
-
-"@typescript-eslint/eslint-plugin@^4.5.0":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.15.2.tgz#981b26b4076c62a5a55873fbef3fe98f83360c61"
-  integrity sha512-uiQQeu9tWl3f1+oK0yoAv9lt/KXO24iafxgQTkIYO/kitruILGx3uH+QtIAHqxFV+yIsdnJH+alel9KuE3J15Q==
-  dependencies:
-    "@typescript-eslint/experimental-utils" "4.15.2"
-    "@typescript-eslint/scope-manager" "4.15.2"
-    debug "^4.1.1"
-    functional-red-black-tree "^1.0.1"
-    lodash "^4.17.15"
-    regexpp "^3.0.0"
-    semver "^7.3.2"
-    tsutils "^3.17.1"
-
-"@typescript-eslint/experimental-utils@4.15.2", "@typescript-eslint/experimental-utils@^4.0.1":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.15.2.tgz#5efd12355bd5b535e1831282e6cf465b9a71cf36"
-  integrity sha512-Fxoshw8+R5X3/Vmqwsjc8nRO/7iTysRtDqx6rlfLZ7HbT8TZhPeQqbPjTyk2RheH3L8afumecTQnUc9EeXxohQ==
-  dependencies:
-    "@types/json-schema" "^7.0.3"
-    "@typescript-eslint/scope-manager" "4.15.2"
-    "@typescript-eslint/types" "4.15.2"
-    "@typescript-eslint/typescript-estree" "4.15.2"
-    eslint-scope "^5.0.0"
-    eslint-utils "^2.0.0"
-
-"@typescript-eslint/experimental-utils@^3.10.1":
-  version "3.10.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-3.10.1.tgz#e179ffc81a80ebcae2ea04e0332f8b251345a686"
-  integrity sha512-DewqIgscDzmAfd5nOGe4zm6Bl7PKtMG2Ad0KG8CUZAHlXfAKTF9Ol5PXhiMh39yRL2ChRH1cuuUGOcVyyrhQIw==
-  dependencies:
-    "@types/json-schema" "^7.0.3"
-    "@typescript-eslint/types" "3.10.1"
-    "@typescript-eslint/typescript-estree" "3.10.1"
-    eslint-scope "^5.0.0"
-    eslint-utils "^2.0.0"
-
-"@typescript-eslint/parser@^4.5.0":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.15.2.tgz#c804474321ef76a3955aec03664808f0d6e7872e"
-  integrity sha512-SHeF8xbsC6z2FKXsaTb1tBCf0QZsjJ94H6Bo51Y1aVEZ4XAefaw5ZAilMoDPlGghe+qtq7XdTiDlGfVTOmvA+Q==
-  dependencies:
-    "@typescript-eslint/scope-manager" "4.15.2"
-    "@typescript-eslint/types" "4.15.2"
-    "@typescript-eslint/typescript-estree" "4.15.2"
-    debug "^4.1.1"
-
-"@typescript-eslint/scope-manager@4.15.2":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.15.2.tgz#5725bda656995960ae1d004bfd1cd70320f37f4f"
-  integrity sha512-Zm0tf/MSKuX6aeJmuXexgdVyxT9/oJJhaCkijv0DvJVT3ui4zY6XYd6iwIo/8GEZGy43cd7w1rFMiCLHbRzAPQ==
-  dependencies:
-    "@typescript-eslint/types" "4.15.2"
-    "@typescript-eslint/visitor-keys" "4.15.2"
-
-"@typescript-eslint/types@3.10.1":
-  version "3.10.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-3.10.1.tgz#1d7463fa7c32d8a23ab508a803ca2fe26e758727"
-  integrity sha512-+3+FCUJIahE9q0lDi1WleYzjCwJs5hIsbugIgnbB+dSCYUxl8L6PwmsyOPFZde2hc1DlTo/xnkOgiTLSyAbHiQ==
-
-"@typescript-eslint/types@4.15.2":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.15.2.tgz#04acf3a2dc8001a88985291744241e732ef22c60"
-  integrity sha512-r7lW7HFkAarfUylJ2tKndyO9njwSyoy6cpfDKWPX6/ctZA+QyaYscAHXVAfJqtnY6aaTwDYrOhp+ginlbc7HfQ==
-
-"@typescript-eslint/typescript-estree@3.10.1":
-  version "3.10.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-3.10.1.tgz#fd0061cc38add4fad45136d654408569f365b853"
-  integrity sha512-QbcXOuq6WYvnB3XPsZpIwztBoquEYLXh2MtwVU+kO8jgYCiv4G5xrSP/1wg4tkvrEE+esZVquIPX/dxPlePk1w==
-  dependencies:
-    "@typescript-eslint/types" "3.10.1"
-    "@typescript-eslint/visitor-keys" "3.10.1"
-    debug "^4.1.1"
-    glob "^7.1.6"
-    is-glob "^4.0.1"
-    lodash "^4.17.15"
-    semver "^7.3.2"
-    tsutils "^3.17.1"
-
-"@typescript-eslint/typescript-estree@4.15.2":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.15.2.tgz#c2f7a1e94f3428d229d5ecff3ead6581ee9b62fa"
-  integrity sha512-cGR8C2g5SPtHTQvAymEODeqx90pJHadWsgTtx6GbnTWKqsg7yp6Eaya9nFzUd4KrKhxdYTTFBiYeTPQaz/l8bw==
-  dependencies:
-    "@typescript-eslint/types" "4.15.2"
-    "@typescript-eslint/visitor-keys" "4.15.2"
-    debug "^4.1.1"
-    globby "^11.0.1"
-    is-glob "^4.0.1"
-    semver "^7.3.2"
-    tsutils "^3.17.1"
-
-"@typescript-eslint/visitor-keys@3.10.1":
-  version "3.10.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-3.10.1.tgz#cd4274773e3eb63b2e870ac602274487ecd1e931"
-  integrity sha512-9JgC82AaQeglebjZMgYR5wgmfUdUc+EitGUUMW8u2nDckaeimzW+VsoLV6FoimPv2id3VQzfjwBxEMVz08ameQ==
-  dependencies:
-    eslint-visitor-keys "^1.1.0"
-
-"@typescript-eslint/visitor-keys@4.15.2":
-  version "4.15.2"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.15.2.tgz#3d1c7979ce75bf6acf9691109bd0d6b5706192b9"
-  integrity sha512-TME1VgSb7wTwgENN5KVj4Nqg25hP8DisXxNBojM4Nn31rYaNDIocNm5cmjOFfh42n7NVERxWrDFoETO/76ePyg==
-  dependencies:
-    "@typescript-eslint/types" "4.15.2"
-    eslint-visitor-keys "^2.0.0"
-
-"@webassemblyjs/ast@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.9.0.tgz#bd850604b4042459a5a41cd7d338cbed695ed964"
-  integrity sha512-C6wW5L+b7ogSDVqymbkkvuW9kruN//YisMED04xzeBBqjHa2FYnmvOlS6Xj68xWQRgWvI9cIglsjFowH/RJyEA==
-  dependencies:
-    "@webassemblyjs/helper-module-context" "1.9.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
-    "@webassemblyjs/wast-parser" "1.9.0"
-
-"@webassemblyjs/floating-point-hex-parser@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.9.0.tgz#3c3d3b271bddfc84deb00f71344438311d52ffb4"
-  integrity sha512-TG5qcFsS8QB4g4MhrxK5TqfdNe7Ey/7YL/xN+36rRjl/BlGE/NcBvJcqsRgCP6Z92mRE+7N50pRIi8SmKUbcQA==
-
-"@webassemblyjs/helper-api-error@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.9.0.tgz#203f676e333b96c9da2eeab3ccef33c45928b6a2"
-  integrity sha512-NcMLjoFMXpsASZFxJ5h2HZRcEhDkvnNFOAKneP5RbKRzaWJN36NC4jqQHKwStIhGXu5mUWlUUk7ygdtrO8lbmw==
-
-"@webassemblyjs/helper-buffer@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.9.0.tgz#a1442d269c5feb23fcbc9ef759dac3547f29de00"
-  integrity sha512-qZol43oqhq6yBPx7YM3m9Bv7WMV9Eevj6kMi6InKOuZxhw+q9hOkvq5e/PpKSiLfyetpaBnogSbNCfBwyB00CA==
-
-"@webassemblyjs/helper-code-frame@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.9.0.tgz#647f8892cd2043a82ac0c8c5e75c36f1d9159f27"
-  integrity sha512-ERCYdJBkD9Vu4vtjUYe8LZruWuNIToYq/ME22igL+2vj2dQ2OOujIZr3MEFvfEaqKoVqpsFKAGsRdBSBjrIvZA==
-  dependencies:
-    "@webassemblyjs/wast-printer" "1.9.0"
-
-"@webassemblyjs/helper-fsm@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-fsm/-/helper-fsm-1.9.0.tgz#c05256b71244214671f4b08ec108ad63b70eddb8"
-  integrity sha512-OPRowhGbshCb5PxJ8LocpdX9Kl0uB4XsAjl6jH/dWKlk/mzsANvhwbiULsaiqT5GZGT9qinTICdj6PLuM5gslw==
-
-"@webassemblyjs/helper-module-context@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-module-context/-/helper-module-context-1.9.0.tgz#25d8884b76839871a08a6c6f806c3979ef712f07"
-  integrity sha512-MJCW8iGC08tMk2enck1aPW+BE5Cw8/7ph/VGZxwyvGbJwjktKkDK7vy7gAmMDx88D7mhDTCNKAW5tED+gZ0W8g==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-
-"@webassemblyjs/helper-wasm-bytecode@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.9.0.tgz#4fed8beac9b8c14f8c58b70d124d549dd1fe5790"
-  integrity sha512-R7FStIzyNcd7xKxCZH5lE0Bqy+hGTwS3LJjuv1ZVxd9O7eHCedSdrId/hMOd20I+v8wDXEn+bjfKDLzTepoaUw==
-
-"@webassemblyjs/helper-wasm-section@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.9.0.tgz#5a4138d5a6292ba18b04c5ae49717e4167965346"
-  integrity sha512-XnMB8l3ek4tvrKUUku+IVaXNHz2YsJyOOmz+MMkZvh8h1uSJpSen6vYnw3IoQ7WwEuAhL8Efjms1ZWjqh2agvw==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/helper-buffer" "1.9.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
-    "@webassemblyjs/wasm-gen" "1.9.0"
-
-"@webassemblyjs/ieee754@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.9.0.tgz#15c7a0fbaae83fb26143bbacf6d6df1702ad39e4"
-  integrity sha512-dcX8JuYU/gvymzIHc9DgxTzUUTLexWwt8uCTWP3otys596io0L5aW02Gb1RjYpx2+0Jus1h4ZFqjla7umFniTg==
-  dependencies:
-    "@xtuc/ieee754" "^1.2.0"
-
-"@webassemblyjs/leb128@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.9.0.tgz#f19ca0b76a6dc55623a09cffa769e838fa1e1c95"
-  integrity sha512-ENVzM5VwV1ojs9jam6vPys97B/S65YQtv/aanqnU7D8aSoHFX8GyhGg0CMfyKNIHBuAVjy3tlzd5QMMINa7wpw==
-  dependencies:
-    "@xtuc/long" "4.2.2"
-
-"@webassemblyjs/utf8@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.9.0.tgz#04d33b636f78e6a6813227e82402f7637b6229ab"
-  integrity sha512-GZbQlWtopBTP0u7cHrEx+73yZKrQoBMpwkGEIqlacljhXCkVM1kMQge/Mf+csMJAjEdSwhOyLAS0AoR3AG5P8w==
-
-"@webassemblyjs/wasm-edit@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.9.0.tgz#3fe6d79d3f0f922183aa86002c42dd256cfee9cf"
-  integrity sha512-FgHzBm80uwz5M8WKnMTn6j/sVbqilPdQXTWraSjBwFXSYGirpkSWE2R9Qvz9tNiTKQvoKILpCuTjBKzOIm0nxw==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/helper-buffer" "1.9.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
-    "@webassemblyjs/helper-wasm-section" "1.9.0"
-    "@webassemblyjs/wasm-gen" "1.9.0"
-    "@webassemblyjs/wasm-opt" "1.9.0"
-    "@webassemblyjs/wasm-parser" "1.9.0"
-    "@webassemblyjs/wast-printer" "1.9.0"
-
-"@webassemblyjs/wasm-gen@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.9.0.tgz#50bc70ec68ded8e2763b01a1418bf43491a7a49c"
-  integrity sha512-cPE3o44YzOOHvlsb4+E9qSqjc9Qf9Na1OO/BHFy4OI91XDE14MjFN4lTMezzaIWdPqHnsTodGGNP+iRSYfGkjA==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
-    "@webassemblyjs/ieee754" "1.9.0"
-    "@webassemblyjs/leb128" "1.9.0"
-    "@webassemblyjs/utf8" "1.9.0"
-
-"@webassemblyjs/wasm-opt@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.9.0.tgz#2211181e5b31326443cc8112eb9f0b9028721a61"
-  integrity sha512-Qkjgm6Anhm+OMbIL0iokO7meajkzQD71ioelnfPEj6r4eOFuqm4YC3VBPqXjFyyNwowzbMD+hizmprP/Fwkl2A==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/helper-buffer" "1.9.0"
-    "@webassemblyjs/wasm-gen" "1.9.0"
-    "@webassemblyjs/wasm-parser" "1.9.0"
-
-"@webassemblyjs/wasm-parser@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.9.0.tgz#9d48e44826df4a6598294aa6c87469d642fff65e"
-  integrity sha512-9+wkMowR2AmdSWQzsPEjFU7njh8HTO5MqO8vjwEHuM+AMHioNqSBONRdr0NQQ3dVQrzp0s8lTcYqzUdb7YgELA==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/helper-api-error" "1.9.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.9.0"
-    "@webassemblyjs/ieee754" "1.9.0"
-    "@webassemblyjs/leb128" "1.9.0"
-    "@webassemblyjs/utf8" "1.9.0"
-
-"@webassemblyjs/wast-parser@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-parser/-/wast-parser-1.9.0.tgz#3031115d79ac5bd261556cecc3fa90a3ef451914"
-  integrity sha512-qsqSAP3QQ3LyZjNC/0jBJ/ToSxfYJ8kYyuiGvtn/8MK89VrNEfwj7BPQzJVHi0jGTRK2dGdJ5PRqhtjzoww+bw==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/floating-point-hex-parser" "1.9.0"
-    "@webassemblyjs/helper-api-error" "1.9.0"
-    "@webassemblyjs/helper-code-frame" "1.9.0"
-    "@webassemblyjs/helper-fsm" "1.9.0"
-    "@xtuc/long" "4.2.2"
-
-"@webassemblyjs/wast-printer@1.9.0":
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.9.0.tgz#4935d54c85fef637b00ce9f52377451d00d47899"
-  integrity sha512-2J0nE95rHXHyQ24cWjMKJ1tqB/ds8z/cyeOZxJhcb+rW+SQASVjuznUSmdz5GpVJTzU8JkhYut0D3siFDD6wsA==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/wast-parser" "1.9.0"
-    "@xtuc/long" "4.2.2"
-
-"@xtuc/ieee754@^1.2.0":
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/@xtuc/ieee754/-/ieee754-1.2.0.tgz#eef014a3145ae477a1cbc00cd1e552336dceb790"
-  integrity sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==
-
-"@xtuc/long@4.2.2":
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d"
-  integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==
-
-abab@^2.0.3:
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/abab/-/abab-2.0.5.tgz#c0b678fb32d60fc1219c784d6a826fe385aeb79a"
-  integrity sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q==
-
-accepts@~1.3.4, accepts@~1.3.5, accepts@~1.3.7:
-  version "1.3.7"
-  resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.7.tgz#531bc726517a3b2b41f850021c6cc15eaab507cd"
-  integrity sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==
-  dependencies:
-    mime-types "~2.1.24"
-    negotiator "0.6.2"
-
-acorn-globals@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-6.0.0.tgz#46cdd39f0f8ff08a876619b55f5ac8a6dc770b45"
-  integrity sha512-ZQl7LOWaF5ePqqcX4hLuv/bLXYQNfNWw2c0/yX/TsPRKamzHcTGQnlCjHT3TsmkOUVEPS3crCxiPfdzE/Trlhg==
-  dependencies:
-    acorn "^7.1.1"
-    acorn-walk "^7.1.1"
-
-acorn-jsx@^5.3.1:
-  version "5.3.1"
-  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.1.tgz#fc8661e11b7ac1539c47dbfea2e72b3af34d267b"
-  integrity sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==
-
-acorn-walk@^7.1.1:
-  version "7.2.0"
-  resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-7.2.0.tgz#0de889a601203909b0fbe07b8938dc21d2e967bc"
-  integrity sha512-OPdCF6GsMIP+Az+aWfAAOEt2/+iVDKE7oy6lJ098aoe59oAmK76qV6Gw60SbZ8jHuG2wH058GF4pLFbYamYrVA==
-
-acorn@^6.4.1:
-  version "6.4.2"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6"
-  integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ==
-
-acorn@^7.1.0, acorn@^7.1.1, acorn@^7.4.0:
-  version "7.4.1"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa"
-  integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
-
-address@1.1.2, address@^1.0.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/address/-/address-1.1.2.tgz#bf1116c9c758c51b7a933d296b72c221ed9428b6"
-  integrity sha512-aT6camzM4xEA54YVJYSqxz1kv4IHnQZRtThJJHhUMRExaU5spC7jX5ugSwTaTgJliIgs4VhZOk7htClvQ/LmRA==
-
-adjust-sourcemap-loader@3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/adjust-sourcemap-loader/-/adjust-sourcemap-loader-3.0.0.tgz#5ae12fb5b7b1c585e80bbb5a63ec163a1a45e61e"
-  integrity sha512-YBrGyT2/uVQ/c6Rr+t6ZJXniY03YtHGMJQYal368burRGYKqhx9qGTWqcBU5s1CwYY9E/ri63RYyG1IacMZtqw==
-  dependencies:
-    loader-utils "^2.0.0"
-    regex-parser "^2.2.11"
-
-aggregate-error@^3.0.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-3.1.0.tgz#92670ff50f5359bdb7a3e0d40d0ec30c5737687a"
-  integrity sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==
-  dependencies:
-    clean-stack "^2.0.0"
-    indent-string "^4.0.0"
-
-ajv-errors@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/ajv-errors/-/ajv-errors-1.0.1.tgz#f35986aceb91afadec4102fbd85014950cefa64d"
-  integrity sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==
-
-ajv-keywords@^3.1.0, ajv-keywords@^3.4.1, ajv-keywords@^3.5.2:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
-  integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==
-
-ajv@^6.1.0, ajv@^6.10.0, ajv@^6.10.2, ajv@^6.12.3, ajv@^6.12.4, ajv@^6.12.5:
-  version "6.12.6"
-  resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
-  integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==
-  dependencies:
-    fast-deep-equal "^3.1.1"
-    fast-json-stable-stringify "^2.0.0"
-    json-schema-traverse "^0.4.1"
-    uri-js "^4.2.2"
-
-ajv@^7.0.2:
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/ajv/-/ajv-7.1.1.tgz#1e6b37a454021fa9941713f38b952fc1c8d32a84"
-  integrity sha512-ga/aqDYnUy/o7vbsRTFhhTsNeXiYb5JWDIcRIeZfwRNCefwjNTVYCGdGSUrEmiu3yDK3vFvNbgJxvrQW4JXrYQ==
-  dependencies:
-    fast-deep-equal "^3.1.1"
-    json-schema-traverse "^1.0.0"
-    require-from-string "^2.0.2"
-    uri-js "^4.2.2"
-
-alphanum-sort@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/alphanum-sort/-/alphanum-sort-1.0.2.tgz#97a1119649b211ad33691d9f9f486a8ec9fbe0a3"
-  integrity sha1-l6ERlkmyEa0zaR2fn0hqjsn74KM=
-
-ansi-colors@^3.0.0:
-  version "3.2.4"
-  resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-3.2.4.tgz#e3a3da4bfbae6c86a9c285625de124a234026fbf"
-  integrity sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA==
-
-ansi-colors@^4.1.1:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348"
-  integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==
-
-ansi-escapes@^4.2.1, ansi-escapes@^4.3.1:
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.1.tgz#a5c47cc43181f1f38ffd7076837700d395522a61"
-  integrity sha512-JWF7ocqNrp8u9oqpgV+wH5ftbt+cfvv+PTjOvKLT3AdYly/LmORARfEVT1iyjwN+4MqE5UmVKoAdIBqeoCHgLA==
-  dependencies:
-    type-fest "^0.11.0"
-
-ansi-html@0.0.7, ansi-html@^0.0.7:
-  version "0.0.7"
-  resolved "https://registry.yarnpkg.com/ansi-html/-/ansi-html-0.0.7.tgz#813584021962a9e9e6fd039f940d12f56ca7859e"
-  integrity sha1-gTWEAhliqenm/QOflA0S9WynhZ4=
-
-ansi-regex@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-2.1.1.tgz#c3b33ab5ee360d86e0e628f0468ae7ef27d654df"
-  integrity sha1-w7M6te42DYbg5ijwRorn7yfWVN8=
-
-ansi-regex@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-4.1.0.tgz#8b9f8f08cf1acb843756a839ca8c7e3168c51997"
-  integrity sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==
-
-ansi-regex@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.0.tgz#388539f55179bf39339c81af30a654d69f87cb75"
-  integrity sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==
-
-ansi-styles@^3.2.0, ansi-styles@^3.2.1:
-  version "3.2.1"
-  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d"
-  integrity sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==
-  dependencies:
-    color-convert "^1.9.0"
-
-ansi-styles@^4.0.0, ansi-styles@^4.1.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
-  integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
-  dependencies:
-    color-convert "^2.0.1"
-
-anymatch@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-2.0.0.tgz#bcb24b4f37934d9aa7ac17b4adaf89e7c76ef2eb"
-  integrity sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==
-  dependencies:
-    micromatch "^3.1.4"
-    normalize-path "^2.1.1"
-
-anymatch@^3.0.3, anymatch@~3.1.1:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.1.tgz#c55ecf02185e2469259399310c173ce31233b142"
-  integrity sha512-mM8522psRCqzV+6LhomX5wgp25YVibjh8Wj23I5RPkPppSVSjyKD2A2mBJmWGa+KN7f2D6LNh9jkBCeyLktzjg==
-  dependencies:
-    normalize-path "^3.0.0"
-    picomatch "^2.0.4"
-
-aproba@^1.1.1:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/aproba/-/aproba-1.2.0.tgz#6802e6264efd18c790a1b0d517f0f2627bf2c94a"
-  integrity sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==
-
-argparse@^1.0.7:
-  version "1.0.10"
-  resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911"
-  integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==
-  dependencies:
-    sprintf-js "~1.0.2"
-
-aria-hidden@^1.1.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/aria-hidden/-/aria-hidden-1.1.2.tgz#5354315a29bffdaced3993fccd826817dc8c5272"
-  integrity sha512-WAMH9q3vRimVqP+B0q2eDvx7IPDoY17A2fWwj5atTA/zTYJCNcS6HJ5YErZ5FO3PUHhrV0y0yR1NA0dRNm913A==
-  dependencies:
-    tslib "^1.0.0"
-
-aria-query@^4.2.2:
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/aria-query/-/aria-query-4.2.2.tgz#0d2ca6c9aceb56b8977e9fed6aed7e15bbd2f83b"
-  integrity sha512-o/HelwhuKpTj/frsOsbNLNgnNGVIFsVP/SW2BSF14gVl7kAfMOJ6/8wUAUvG1R1NHKrfG+2sHZTu0yauT1qBrA==
-  dependencies:
-    "@babel/runtime" "^7.10.2"
-    "@babel/runtime-corejs3" "^7.10.2"
-
-arity-n@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/arity-n/-/arity-n-1.0.4.tgz#d9e76b11733e08569c0847ae7b39b2860b30b745"
-  integrity sha1-2edrEXM+CFacCEeuezmyhgswt0U=
-
-arr-diff@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/arr-diff/-/arr-diff-4.0.0.tgz#d6461074febfec71e7e15235761a329a5dc7c520"
-  integrity sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=
-
-arr-flatten@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/arr-flatten/-/arr-flatten-1.1.0.tgz#36048bbff4e7b47e136644316c99669ea5ae91f1"
-  integrity sha512-L3hKV5R/p5o81R7O02IGnwpDmkp6E982XhtbuwSe3O4qOtMMMtodicASA1Cny2U+aCXcNpml+m4dPsvsJ3jatg==
-
-arr-union@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4"
-  integrity sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=
-
-array-flatten@1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/array-flatten/-/array-flatten-1.1.1.tgz#9a5f699051b1e7073328f2a008968b64ea2955d2"
-  integrity sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=
-
-array-flatten@^2.1.0:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/array-flatten/-/array-flatten-2.1.2.tgz#24ef80a28c1a893617e2149b0c6d0d788293b099"
-  integrity sha512-hNfzcOV8W4NdualtqBFPyVO+54DSJuZGY9qT4pRroB6S9e3iiido2ISIC5h9R2sPJ8H3FHCIiEnsv1lPXO3KtQ==
-
-array-includes@^3.1.1, array-includes@^3.1.2:
-  version "3.1.3"
-  resolved "https://registry.yarnpkg.com/array-includes/-/array-includes-3.1.3.tgz#c7f619b382ad2afaf5326cddfdc0afc61af7690a"
-  integrity sha512-gcem1KlBU7c9rB+Rq8/3PPKsK2kjqeEBa3bD5kkQo4nYlOHQCJqIJFqBXDEfwaRuYTT4E+FxA9xez7Gf/e3Q7A==
-  dependencies:
-    call-bind "^1.0.2"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.2"
-    get-intrinsic "^1.1.1"
-    is-string "^1.0.5"
-
-array-union@^1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/array-union/-/array-union-1.0.2.tgz#9a34410e4f4e3da23dea375be5be70f24778ec39"
-  integrity sha1-mjRBDk9OPaI96jdb5b5w8kd47Dk=
-  dependencies:
-    array-uniq "^1.0.1"
-
-array-union@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/array-union/-/array-union-2.1.0.tgz#b798420adbeb1de828d84acd8a2e23d3efe85e8d"
-  integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==
-
-array-uniq@^1.0.1:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/array-uniq/-/array-uniq-1.0.3.tgz#af6ac877a25cc7f74e058894753858dfdb24fdb6"
-  integrity sha1-r2rId6Jcx/dOBYiUdThY39sk/bY=
-
-array-unique@^0.3.2:
-  version "0.3.2"
-  resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428"
-  integrity sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=
-
-array.prototype.flat@^1.2.3:
-  version "1.2.4"
-  resolved "https://registry.yarnpkg.com/array.prototype.flat/-/array.prototype.flat-1.2.4.tgz#6ef638b43312bd401b4c6199fdec7e2dc9e9a123"
-  integrity sha512-4470Xi3GAPAjZqFcljX2xzckv1qeKPizoNkiS0+O4IoPR2ZNpcjE0pkhdihlDouK+x6QOast26B4Q/O9DJnwSg==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.1"
-
-array.prototype.flatmap@^1.2.3:
-  version "1.2.4"
-  resolved "https://registry.yarnpkg.com/array.prototype.flatmap/-/array.prototype.flatmap-1.2.4.tgz#94cfd47cc1556ec0747d97f7c7738c58122004c9"
-  integrity sha512-r9Z0zYoxqHz60vvQbWEdXIEtCwHF0yxaWfno9qzXeNHvfyl3BZqygmGzb84dsubyaXLH4husF+NFgMSdpZhk2Q==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.1"
-    function-bind "^1.1.1"
-
-arrify@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/arrify/-/arrify-2.0.1.tgz#c9655e9331e0abcd588d2a7cad7e9956f66701fa"
-  integrity sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==
-
-asap@~2.0.6:
-  version "2.0.6"
-  resolved "https://registry.yarnpkg.com/asap/-/asap-2.0.6.tgz#e50347611d7e690943208bbdafebcbc2fb866d46"
-  integrity sha1-5QNHYR1+aQlDIIu9r+vLwvuGbUY=
-
-asn1.js@^5.2.0:
-  version "5.4.1"
-  resolved "https://registry.yarnpkg.com/asn1.js/-/asn1.js-5.4.1.tgz#11a980b84ebb91781ce35b0fdc2ee294e3783f07"
-  integrity sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==
-  dependencies:
-    bn.js "^4.0.0"
-    inherits "^2.0.1"
-    minimalistic-assert "^1.0.0"
-    safer-buffer "^2.1.0"
-
-asn1@~0.2.3:
-  version "0.2.4"
-  resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.4.tgz#8d2475dfab553bb33e77b54e59e880bb8ce23136"
-  integrity sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==
-  dependencies:
-    safer-buffer "~2.1.0"
-
-assert-plus@1.0.0, assert-plus@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525"
-  integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=
-
-assert@^1.1.1:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/assert/-/assert-1.5.0.tgz#55c109aaf6e0aefdb3dc4b71240c70bf574b18eb"
-  integrity sha512-EDsgawzwoun2CZkCgtxJbv392v4nbk9XDD06zI+kQYoBM/3RBWLlEyJARDOmhAAosBjWACEkKL6S+lIZtcAubA==
-  dependencies:
-    object-assign "^4.1.1"
-    util "0.10.3"
-
-assign-symbols@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
-  integrity sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=
-
-ast-types-flow@^0.0.7:
-  version "0.0.7"
-  resolved "https://registry.yarnpkg.com/ast-types-flow/-/ast-types-flow-0.0.7.tgz#f70b735c6bca1a5c9c22d982c3e39e7feba3bdad"
-  integrity sha1-9wtzXGvKGlycItmCw+Oef+ujva0=
-
-astral-regex@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31"
-  integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
-
-async-each@^1.0.1:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/async-each/-/async-each-1.0.3.tgz#b727dbf87d7651602f06f4d4ac387f47d91b0cbf"
-  integrity sha512-z/WhQ5FPySLdvREByI2vZiTWwCnF0moMJ1hK9YQwDTHKh6I7/uSckMetoRGb5UBZPC1z0jlw+n/XCgjeH7y1AQ==
-
-async-limiter@~1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/async-limiter/-/async-limiter-1.0.1.tgz#dd379e94f0db8310b08291f9d64c3209766617fd"
-  integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==
-
-async@^2.6.2:
-  version "2.6.3"
-  resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff"
-  integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==
-  dependencies:
-    lodash "^4.17.14"
-
-asynckit@^0.4.0:
-  version "0.4.0"
-  resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
-  integrity sha1-x57Zf380y48robyXkLzDZkdLS3k=
-
-at-least-node@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2"
-  integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==
-
-atob@^2.1.2:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9"
-  integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==
-
-autoprefixer@^9.6.1:
-  version "9.8.6"
-  resolved "https://registry.yarnpkg.com/autoprefixer/-/autoprefixer-9.8.6.tgz#3b73594ca1bf9266320c5acf1588d74dea74210f"
-  integrity sha512-XrvP4VVHdRBCdX1S3WXVD8+RyG9qeb1D5Sn1DeLiG2xfSpzellk5k54xbUERJ3M5DggQxes39UGOTP8CFrEGbg==
-  dependencies:
-    browserslist "^4.12.0"
-    caniuse-lite "^1.0.30001109"
-    colorette "^1.2.1"
-    normalize-range "^0.1.2"
-    num2fraction "^1.2.2"
-    postcss "^7.0.32"
-    postcss-value-parser "^4.1.0"
-
-aws-sign2@~0.7.0:
-  version "0.7.0"
-  resolved "https://registry.yarnpkg.com/aws-sign2/-/aws-sign2-0.7.0.tgz#b46e890934a9591f2d2f6f86d7e6a9f1b3fe76a8"
-  integrity sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=
-
-aws4@^1.8.0:
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
-  integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
-
-axe-core@^4.0.2:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/axe-core/-/axe-core-4.1.2.tgz#7cf783331320098bfbef620df3b3c770147bc224"
-  integrity sha512-V+Nq70NxKhYt89ArVcaNL9FDryB3vQOd+BFXZIfO3RP6rwtj+2yqqqdHEkacutglPaZLkJeuXKCjCJDMGPtPqg==
-
-axobject-query@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/axobject-query/-/axobject-query-2.2.0.tgz#943d47e10c0b704aa42275e20edf3722648989be"
-  integrity sha512-Td525n+iPOOyUQIeBfcASuG6uJsDOITl7Mds5gFyerkWiX7qhUTdYUBlSgNMyVqtSJqwpt1kXGLdUt6SykLMRA==
-
-babel-eslint@^10.1.0:
-  version "10.1.0"
-  resolved "https://registry.yarnpkg.com/babel-eslint/-/babel-eslint-10.1.0.tgz#6968e568a910b78fb3779cdd8b6ac2f479943232"
-  integrity sha512-ifWaTHQ0ce+448CYop8AdrQiBsGrnC+bMgfyKFdi6EsPLTAWG+QfyDeM6OH+FmWnKvEq5NnBMLvlBUPKQZoDSg==
-  dependencies:
-    "@babel/code-frame" "^7.0.0"
-    "@babel/parser" "^7.7.0"
-    "@babel/traverse" "^7.7.0"
-    "@babel/types" "^7.7.0"
-    eslint-visitor-keys "^1.0.0"
-    resolve "^1.12.0"
-
-babel-extract-comments@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/babel-extract-comments/-/babel-extract-comments-1.0.0.tgz#0a2aedf81417ed391b85e18b4614e693a0351a21"
-  integrity sha512-qWWzi4TlddohA91bFwgt6zO/J0X+io7Qp184Fw0m2JYRSTZnJbFR8+07KmzudHCZgOiKRCrjhylwv9Xd8gfhVQ==
-  dependencies:
-    babylon "^6.18.0"
-
-babel-jest@^26.6.0, babel-jest@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-26.6.3.tgz#d87d25cb0037577a0c89f82e5755c5d293c01056"
-  integrity sha512-pl4Q+GAVOHwvjrck6jKjvmGhnO3jHX/xuB9d27f+EJZ/6k+6nMuPjorrYp7s++bKKdANwzElBWnLWaObvTnaZA==
-  dependencies:
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/babel__core" "^7.1.7"
-    babel-plugin-istanbul "^6.0.0"
-    babel-preset-jest "^26.6.2"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    slash "^3.0.0"
-
-babel-loader@8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/babel-loader/-/babel-loader-8.1.0.tgz#c611d5112bd5209abe8b9fa84c3e4da25275f1c3"
-  integrity sha512-7q7nC1tYOrqvUrN3LQK4GwSk/TQorZSOlO9C+RZDZpODgyN4ZlCqE5q9cDsyWOliN+aU9B4JX01xK9eJXowJLw==
-  dependencies:
-    find-cache-dir "^2.1.0"
-    loader-utils "^1.4.0"
-    mkdirp "^0.5.3"
-    pify "^4.0.1"
-    schema-utils "^2.6.5"
-
-babel-plugin-dynamic-import-node@^2.3.3:
-  version "2.3.3"
-  resolved "https://registry.yarnpkg.com/babel-plugin-dynamic-import-node/-/babel-plugin-dynamic-import-node-2.3.3.tgz#84fda19c976ec5c6defef57f9427b3def66e17a3"
-  integrity sha512-jZVI+s9Zg3IqA/kdi0i6UDCybUI3aSBLnglhYbSSjKlV7yF1F/5LWv8MakQmvYpnbJDS6fcBL2KzHSxNCMtWSQ==
-  dependencies:
-    object.assign "^4.1.0"
-
-babel-plugin-istanbul@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/babel-plugin-istanbul/-/babel-plugin-istanbul-6.0.0.tgz#e159ccdc9af95e0b570c75b4573b7c34d671d765"
-  integrity sha512-AF55rZXpe7trmEylbaE1Gv54wn6rwU03aptvRoVIGP8YykoSxqdVLV1TfwflBCE/QtHmqtP8SWlTENqbK8GCSQ==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.0.0"
-    "@istanbuljs/load-nyc-config" "^1.0.0"
-    "@istanbuljs/schema" "^0.1.2"
-    istanbul-lib-instrument "^4.0.0"
-    test-exclude "^6.0.0"
-
-babel-plugin-jest-hoist@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-26.6.2.tgz#8185bd030348d254c6d7dd974355e6a28b21e62d"
-  integrity sha512-PO9t0697lNTmcEHH69mdtYiOIkkOlj9fySqfO3K1eCcdISevLAE0xY59VLLUj0SoiPiTX/JU2CYFpILydUa5Lw==
-  dependencies:
-    "@babel/template" "^7.3.3"
-    "@babel/types" "^7.3.3"
-    "@types/babel__core" "^7.0.0"
-    "@types/babel__traverse" "^7.0.6"
-
-babel-plugin-macros@2.8.0, babel-plugin-macros@^2.6.1:
-  version "2.8.0"
-  resolved "https://registry.yarnpkg.com/babel-plugin-macros/-/babel-plugin-macros-2.8.0.tgz#0f958a7cc6556b1e65344465d99111a1e5e10138"
-  integrity sha512-SEP5kJpfGYqYKpBrj5XU3ahw5p5GOHJ0U5ssOSQ/WBVdwkD2Dzlce95exQTs3jOVWPPKLBN2rlEWkCK7dSmLvg==
-  dependencies:
-    "@babel/runtime" "^7.7.2"
-    cosmiconfig "^6.0.0"
-    resolve "^1.12.0"
-
-babel-plugin-named-asset-import@^0.3.7:
-  version "0.3.7"
-  resolved "https://registry.yarnpkg.com/babel-plugin-named-asset-import/-/babel-plugin-named-asset-import-0.3.7.tgz#156cd55d3f1228a5765774340937afc8398067dd"
-  integrity sha512-squySRkf+6JGnvjoUtDEjSREJEBirnXi9NqP6rjSYsylxQxqBTz+pkmf395i9E2zsvmYUaI40BHo6SqZUdydlw==
-
-babel-plugin-syntax-object-rest-spread@^6.8.0:
-  version "6.13.0"
-  resolved "https://registry.yarnpkg.com/babel-plugin-syntax-object-rest-spread/-/babel-plugin-syntax-object-rest-spread-6.13.0.tgz#fd6536f2bce13836ffa3a5458c4903a597bb3bf5"
-  integrity sha1-/WU28rzhODb/o6VFjEkDpZe7O/U=
-
-babel-plugin-transform-object-rest-spread@^6.26.0:
-  version "6.26.0"
-  resolved "https://registry.yarnpkg.com/babel-plugin-transform-object-rest-spread/-/babel-plugin-transform-object-rest-spread-6.26.0.tgz#0f36692d50fef6b7e2d4b3ac1478137a963b7b06"
-  integrity sha1-DzZpLVD+9rfi1LOsFHgTepY7ewY=
-  dependencies:
-    babel-plugin-syntax-object-rest-spread "^6.8.0"
-    babel-runtime "^6.26.0"
-
-babel-plugin-transform-react-remove-prop-types@0.4.24:
-  version "0.4.24"
-  resolved "https://registry.yarnpkg.com/babel-plugin-transform-react-remove-prop-types/-/babel-plugin-transform-react-remove-prop-types-0.4.24.tgz#f2edaf9b4c6a5fbe5c1d678bfb531078c1555f3a"
-  integrity sha512-eqj0hVcJUR57/Ug2zE1Yswsw4LhuqqHhD+8v120T1cl3kjg76QwtyBrdIk4WVwK+lAhBJVYCd/v+4nc4y+8JsA==
-
-babel-preset-current-node-syntax@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.0.1.tgz#b4399239b89b2a011f9ddbe3e4f401fc40cff73b"
-  integrity sha512-M7LQ0bxarkxQoN+vz5aJPsLBn77n8QgTFmo8WK0/44auK2xlCXrYcUxHFxgU7qW5Yzw/CjmLRK2uJzaCd7LvqQ==
-  dependencies:
-    "@babel/plugin-syntax-async-generators" "^7.8.4"
-    "@babel/plugin-syntax-bigint" "^7.8.3"
-    "@babel/plugin-syntax-class-properties" "^7.8.3"
-    "@babel/plugin-syntax-import-meta" "^7.8.3"
-    "@babel/plugin-syntax-json-strings" "^7.8.3"
-    "@babel/plugin-syntax-logical-assignment-operators" "^7.8.3"
-    "@babel/plugin-syntax-nullish-coalescing-operator" "^7.8.3"
-    "@babel/plugin-syntax-numeric-separator" "^7.8.3"
-    "@babel/plugin-syntax-object-rest-spread" "^7.8.3"
-    "@babel/plugin-syntax-optional-catch-binding" "^7.8.3"
-    "@babel/plugin-syntax-optional-chaining" "^7.8.3"
-    "@babel/plugin-syntax-top-level-await" "^7.8.3"
-
-babel-preset-jest@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-26.6.2.tgz#747872b1171df032252426586881d62d31798fee"
-  integrity sha512-YvdtlVm9t3k777c5NPQIv6cxFFFapys25HiUmuSgHwIZhfifweR5c5Sf5nwE3MAbfu327CYSvps8Yx6ANLyleQ==
-  dependencies:
-    babel-plugin-jest-hoist "^26.6.2"
-    babel-preset-current-node-syntax "^1.0.0"
-
-babel-preset-react-app@^10.0.0:
-  version "10.0.0"
-  resolved "https://registry.yarnpkg.com/babel-preset-react-app/-/babel-preset-react-app-10.0.0.tgz#689b60edc705f8a70ce87f47ab0e560a317d7045"
-  integrity sha512-itL2z8v16khpuKutx5IH8UdCdSTuzrOhRFTEdIhveZ2i1iBKDrVE0ATa4sFVy+02GLucZNVBWtoarXBy0Msdpg==
-  dependencies:
-    "@babel/core" "7.12.3"
-    "@babel/plugin-proposal-class-properties" "7.12.1"
-    "@babel/plugin-proposal-decorators" "7.12.1"
-    "@babel/plugin-proposal-nullish-coalescing-operator" "7.12.1"
-    "@babel/plugin-proposal-numeric-separator" "7.12.1"
-    "@babel/plugin-proposal-optional-chaining" "7.12.1"
-    "@babel/plugin-transform-flow-strip-types" "7.12.1"
-    "@babel/plugin-transform-react-display-name" "7.12.1"
-    "@babel/plugin-transform-runtime" "7.12.1"
-    "@babel/preset-env" "7.12.1"
-    "@babel/preset-react" "7.12.1"
-    "@babel/preset-typescript" "7.12.1"
-    "@babel/runtime" "7.12.1"
-    babel-plugin-macros "2.8.0"
-    babel-plugin-transform-react-remove-prop-types "0.4.24"
-
-babel-runtime@^6.26.0:
-  version "6.26.0"
-  resolved "https://registry.yarnpkg.com/babel-runtime/-/babel-runtime-6.26.0.tgz#965c7058668e82b55d7bfe04ff2337bc8b5647fe"
-  integrity sha1-llxwWGaOgrVde/4E/yM3vItWR/4=
-  dependencies:
-    core-js "^2.4.0"
-    regenerator-runtime "^0.11.0"
-
-babylon@^6.18.0:
-  version "6.18.0"
-  resolved "https://registry.yarnpkg.com/babylon/-/babylon-6.18.0.tgz#af2f3b88fa6f5c1e4c634d1a0f8eac4f55b395e3"
-  integrity sha512-q/UEjfGJ2Cm3oKV71DJz9d25TPnq5rhBVL2Q4fA5wcC3jcrdn7+SssEybFIxwAvvP+YCsCYNKughoF33GxgycQ==
-
-balanced-match@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
-  integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
-
-base64-js@^1.0.2:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
-  integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==
-
-base@^0.11.1:
-  version "0.11.2"
-  resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f"
-  integrity sha512-5T6P4xPgpp0YDFvSWwEZ4NoE3aM4QBQXDzmVbraCkFj8zHM+mba8SyqB5DbZWyR7mYHo6Y7BdQo3MoA4m0TeQg==
-  dependencies:
-    cache-base "^1.0.1"
-    class-utils "^0.3.5"
-    component-emitter "^1.2.1"
-    define-property "^1.0.0"
-    isobject "^3.0.1"
-    mixin-deep "^1.2.0"
-    pascalcase "^0.1.1"
-
-batch@0.6.1:
-  version "0.6.1"
-  resolved "https://registry.yarnpkg.com/batch/-/batch-0.6.1.tgz#dc34314f4e679318093fc760272525f94bf25c16"
-  integrity sha1-3DQxT05nkxgJP8dgJyUl+UvyXBY=
-
-bcrypt-pbkdf@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e"
-  integrity sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=
-  dependencies:
-    tweetnacl "^0.14.3"
-
-bfj@^7.0.2:
-  version "7.0.2"
-  resolved "https://registry.yarnpkg.com/bfj/-/bfj-7.0.2.tgz#1988ce76f3add9ac2913fd8ba47aad9e651bfbb2"
-  integrity sha512-+e/UqUzwmzJamNF50tBV6tZPTORow7gQ96iFow+8b562OdMpEK0BcJEq2OSPEDmAbSMBQ7PKZ87ubFkgxpYWgw==
-  dependencies:
-    bluebird "^3.5.5"
-    check-types "^11.1.1"
-    hoopy "^0.1.4"
-    tryer "^1.0.1"
-
-big.js@^5.2.2:
-  version "5.2.2"
-  resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328"
-  integrity sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==
-
-binary-extensions@^1.0.0:
-  version "1.13.1"
-  resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-1.13.1.tgz#598afe54755b2868a5330d2aff9d4ebb53209b65"
-  integrity sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw==
-
-binary-extensions@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d"
-  integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==
-
-bindings@^1.5.0:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df"
-  integrity sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==
-  dependencies:
-    file-uri-to-path "1.0.0"
-
-bluebird@^3.5.5:
-  version "3.7.2"
-  resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f"
-  integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==
-
-bn.js@^4.0.0, bn.js@^4.1.0, bn.js@^4.11.9:
-  version "4.11.9"
-  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-4.11.9.tgz#26d556829458f9d1e81fc48952493d0ba3507828"
-  integrity sha512-E6QoYqCKZfgatHTdHzs1RRKP7ip4vvm+EyRUeE2RF0NblwVvb0p6jSVeNTOFxPn26QXN2o6SMfNxKp6kU8zQaw==
-
-bn.js@^5.0.0, bn.js@^5.1.1:
-  version "5.1.3"
-  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-5.1.3.tgz#beca005408f642ebebea80b042b4d18d2ac0ee6b"
-  integrity sha512-GkTiFpjFtUzU9CbMeJ5iazkCzGL3jrhzerzZIuqLABjbwRaFt33I9tUdSNryIptM+RxDet6OKm2WnLXzW51KsQ==
-
-body-parser@1.19.0:
-  version "1.19.0"
-  resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.19.0.tgz#96b2709e57c9c4e09a6fd66a8fd979844f69f08a"
-  integrity sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==
-  dependencies:
-    bytes "3.1.0"
-    content-type "~1.0.4"
-    debug "2.6.9"
-    depd "~1.1.2"
-    http-errors "1.7.2"
-    iconv-lite "0.4.24"
-    on-finished "~2.3.0"
-    qs "6.7.0"
-    raw-body "2.4.0"
-    type-is "~1.6.17"
-
-bonjour@^3.5.0:
-  version "3.5.0"
-  resolved "https://registry.yarnpkg.com/bonjour/-/bonjour-3.5.0.tgz#8e890a183d8ee9a2393b3844c691a42bcf7bc9f5"
-  integrity sha1-jokKGD2O6aI5OzhExpGkK897yfU=
-  dependencies:
-    array-flatten "^2.1.0"
-    deep-equal "^1.0.1"
-    dns-equal "^1.0.0"
-    dns-txt "^2.0.2"
-    multicast-dns "^6.0.1"
-    multicast-dns-service-types "^1.1.0"
-
-boolbase@^1.0.0, boolbase@~1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
-  integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24=
-
-brace-expansion@^1.1.7:
-  version "1.1.11"
-  resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
-  integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==
-  dependencies:
-    balanced-match "^1.0.0"
-    concat-map "0.0.1"
-
-braces@^2.3.1, braces@^2.3.2:
-  version "2.3.2"
-  resolved "https://registry.yarnpkg.com/braces/-/braces-2.3.2.tgz#5979fd3f14cd531565e5fa2df1abfff1dfaee729"
-  integrity sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==
-  dependencies:
-    arr-flatten "^1.1.0"
-    array-unique "^0.3.2"
-    extend-shallow "^2.0.1"
-    fill-range "^4.0.0"
-    isobject "^3.0.1"
-    repeat-element "^1.1.2"
-    snapdragon "^0.8.1"
-    snapdragon-node "^2.0.1"
-    split-string "^3.0.2"
-    to-regex "^3.0.1"
-
-braces@^3.0.1, braces@~3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107"
-  integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==
-  dependencies:
-    fill-range "^7.0.1"
-
-brorand@^1.0.1, brorand@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/brorand/-/brorand-1.1.0.tgz#12c25efe40a45e3c323eb8675a0a0ce57b22371f"
-  integrity sha1-EsJe/kCkXjwyPrhnWgoM5XsiNx8=
-
-browser-process-hrtime@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
-  integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
-
-browserify-aes@^1.0.0, browserify-aes@^1.0.4:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/browserify-aes/-/browserify-aes-1.2.0.tgz#326734642f403dabc3003209853bb70ad428ef48"
-  integrity sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==
-  dependencies:
-    buffer-xor "^1.0.3"
-    cipher-base "^1.0.0"
-    create-hash "^1.1.0"
-    evp_bytestokey "^1.0.3"
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
-browserify-cipher@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/browserify-cipher/-/browserify-cipher-1.0.1.tgz#8d6474c1b870bfdabcd3bcfcc1934a10e94f15f0"
-  integrity sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==
-  dependencies:
-    browserify-aes "^1.0.4"
-    browserify-des "^1.0.0"
-    evp_bytestokey "^1.0.0"
-
-browserify-des@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/browserify-des/-/browserify-des-1.0.2.tgz#3af4f1f59839403572f1c66204375f7a7f703e9c"
-  integrity sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==
-  dependencies:
-    cipher-base "^1.0.1"
-    des.js "^1.0.0"
-    inherits "^2.0.1"
-    safe-buffer "^5.1.2"
-
-browserify-rsa@^4.0.0, browserify-rsa@^4.0.1:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/browserify-rsa/-/browserify-rsa-4.1.0.tgz#b2fd06b5b75ae297f7ce2dc651f918f5be158c8d"
-  integrity sha512-AdEER0Hkspgno2aR97SAf6vi0y0k8NuOpGnVH3O99rcA5Q6sh8QxcngtHuJ6uXwnfAXNM4Gn1Gb7/MV1+Ymbog==
-  dependencies:
-    bn.js "^5.0.0"
-    randombytes "^2.0.1"
-
-browserify-sign@^4.0.0:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/browserify-sign/-/browserify-sign-4.2.1.tgz#eaf4add46dd54be3bb3b36c0cf15abbeba7956c3"
-  integrity sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==
-  dependencies:
-    bn.js "^5.1.1"
-    browserify-rsa "^4.0.1"
-    create-hash "^1.2.0"
-    create-hmac "^1.1.7"
-    elliptic "^6.5.3"
-    inherits "^2.0.4"
-    parse-asn1 "^5.1.5"
-    readable-stream "^3.6.0"
-    safe-buffer "^5.2.0"
-
-browserify-zlib@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/browserify-zlib/-/browserify-zlib-0.2.0.tgz#2869459d9aa3be245fe8fe2ca1f46e2e7f54d73f"
-  integrity sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==
-  dependencies:
-    pako "~1.0.5"
-
-browserslist@4.14.2:
-  version "4.14.2"
-  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.14.2.tgz#1b3cec458a1ba87588cc5e9be62f19b6d48813ce"
-  integrity sha512-HI4lPveGKUR0x2StIz+2FXfDk9SfVMrxn6PLh1JeGUwcuoDkdKZebWiyLRJ68iIPDpMI4JLVDf7S7XzslgWOhw==
-  dependencies:
-    caniuse-lite "^1.0.30001125"
-    electron-to-chromium "^1.3.564"
-    escalade "^3.0.2"
-    node-releases "^1.1.61"
-
-browserslist@^4.0.0, browserslist@^4.12.0, browserslist@^4.14.5, browserslist@^4.16.3, browserslist@^4.6.2, browserslist@^4.6.4:
-  version "4.16.3"
-  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.3.tgz#340aa46940d7db878748567c5dea24a48ddf3717"
-  integrity sha512-vIyhWmIkULaq04Gt93txdh+j02yX/JzlyhLYbV3YQCn/zvES3JnY7TifHHvvr1w5hTDluNKMkV05cs4vy8Q7sw==
-  dependencies:
-    caniuse-lite "^1.0.30001181"
-    colorette "^1.2.1"
-    electron-to-chromium "^1.3.649"
-    escalade "^3.1.1"
-    node-releases "^1.1.70"
-
-bser@2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/bser/-/bser-2.1.1.tgz#e6787da20ece9d07998533cfd9de6f5c38f4bc05"
-  integrity sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==
-  dependencies:
-    node-int64 "^0.4.0"
-
-buffer-from@^1.0.0:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
-  integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
-
-buffer-indexof@^1.0.0:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/buffer-indexof/-/buffer-indexof-1.1.1.tgz#52fabcc6a606d1a00302802648ef68f639da268c"
-  integrity sha512-4/rOEg86jivtPTeOUUT61jJO1Ya1TrR/OkqCSZDyq84WJh3LuuiphBYJN+fm5xufIk4XAFcEwte/8WzC8If/1g==
-
-buffer-xor@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9"
-  integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk=
-
-buffer@^4.3.0:
-  version "4.9.2"
-  resolved "https://registry.yarnpkg.com/buffer/-/buffer-4.9.2.tgz#230ead344002988644841ab0244af8c44bbe3ef8"
-  integrity sha512-xq+q3SRMOxGivLhBNaUdC64hDTQwejJ+H0T/NB1XMtTVEwNTrfFF3gAxiyW0Bu/xWEGhjVKgUcMhCrUy2+uCWg==
-  dependencies:
-    base64-js "^1.0.2"
-    ieee754 "^1.1.4"
-    isarray "^1.0.0"
-
-builtin-modules@^3.1.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-3.2.0.tgz#45d5db99e7ee5e6bc4f362e008bf917ab5049887"
-  integrity sha512-lGzLKcioL90C7wMczpkY0n/oART3MbBa8R9OFGE1rJxoVI86u4WAGfEk8Wjv10eKSyTHVGkSo3bvBylCEtk7LA==
-
-builtin-status-codes@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz#85982878e21b98e1c66425e03d0174788f569ee8"
-  integrity sha1-hZgoeOIbmOHGZCXgPQF0eI9Wnug=
-
-bytes@3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.0.0.tgz#d32815404d689699f85a4ea4fa8755dd13a96048"
-  integrity sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=
-
-bytes@3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.1.0.tgz#f6cf7933a360e0588fa9fde85651cdc7f805d1f6"
-  integrity sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==
-
-cacache@^12.0.2:
-  version "12.0.4"
-  resolved "https://registry.yarnpkg.com/cacache/-/cacache-12.0.4.tgz#668bcbd105aeb5f1d92fe25570ec9525c8faa40c"
-  integrity sha512-a0tMB40oefvuInr4Cwb3GerbL9xTj1D5yg0T5xrjGCGyfvbxseIXX7BAO/u/hIXdafzOI5JC3wDwHyf24buOAQ==
-  dependencies:
-    bluebird "^3.5.5"
-    chownr "^1.1.1"
-    figgy-pudding "^3.5.1"
-    glob "^7.1.4"
-    graceful-fs "^4.1.15"
-    infer-owner "^1.0.3"
-    lru-cache "^5.1.1"
-    mississippi "^3.0.0"
-    mkdirp "^0.5.1"
-    move-concurrently "^1.0.1"
-    promise-inflight "^1.0.1"
-    rimraf "^2.6.3"
-    ssri "^6.0.1"
-    unique-filename "^1.1.1"
-    y18n "^4.0.0"
-
-cacache@^15.0.5:
-  version "15.0.5"
-  resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.0.5.tgz#69162833da29170d6732334643c60e005f5f17d0"
-  integrity sha512-lloiL22n7sOjEEXdL8NAjTgv9a1u43xICE9/203qonkZUCj5X1UEWIdf2/Y0d6QcCtMzbKQyhrcDbdvlZTs/+A==
-  dependencies:
-    "@npmcli/move-file" "^1.0.1"
-    chownr "^2.0.0"
-    fs-minipass "^2.0.0"
-    glob "^7.1.4"
-    infer-owner "^1.0.4"
-    lru-cache "^6.0.0"
-    minipass "^3.1.1"
-    minipass-collect "^1.0.2"
-    minipass-flush "^1.0.5"
-    minipass-pipeline "^1.2.2"
-    mkdirp "^1.0.3"
-    p-map "^4.0.0"
-    promise-inflight "^1.0.1"
-    rimraf "^3.0.2"
-    ssri "^8.0.0"
-    tar "^6.0.2"
-    unique-filename "^1.1.1"
-
-cache-base@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/cache-base/-/cache-base-1.0.1.tgz#0a7f46416831c8b662ee36fe4e7c59d76f666ab2"
-  integrity sha512-AKcdTnFSWATd5/GCPRxr2ChwIJ85CeyrEyjRHlKxQ56d4XJMGym0uAiKn0xbLOGOl3+yRpOTi484dVCEc5AUzQ==
-  dependencies:
-    collection-visit "^1.0.0"
-    component-emitter "^1.2.1"
-    get-value "^2.0.6"
-    has-value "^1.0.0"
-    isobject "^3.0.1"
-    set-value "^2.0.0"
-    to-object-path "^0.3.0"
-    union-value "^1.0.0"
-    unset-value "^1.0.0"
-
-call-bind@^1.0.0, call-bind@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.2.tgz#b1d4e89e688119c3c9a903ad30abb2f6a919be3c"
-  integrity sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==
-  dependencies:
-    function-bind "^1.1.1"
-    get-intrinsic "^1.0.2"
-
-caller-callsite@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/caller-callsite/-/caller-callsite-2.0.0.tgz#847e0fce0a223750a9a027c54b33731ad3154134"
-  integrity sha1-hH4PzgoiN1CpoCfFSzNzGtMVQTQ=
-  dependencies:
-    callsites "^2.0.0"
-
-caller-path@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/caller-path/-/caller-path-2.0.0.tgz#468f83044e369ab2010fac5f06ceee15bb2cb1f4"
-  integrity sha1-Ro+DBE42mrIBD6xfBs7uFbsssfQ=
-  dependencies:
-    caller-callsite "^2.0.0"
-
-callsites@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/callsites/-/callsites-2.0.0.tgz#06eb84f00eea413da86affefacbffb36093b3c50"
-  integrity sha1-BuuE8A7qQT2oav/vrL/7Ngk7PFA=
-
-callsites@^3.0.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
-  integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==
-
-camel-case@^4.1.1:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/camel-case/-/camel-case-4.1.2.tgz#9728072a954f805228225a6deea6b38461e1bd5a"
-  integrity sha512-gxGWBrTT1JuMx6R+o5PTXMmUnhnVzLQ9SNutD4YqKtI6ap897t3tKECYla6gCWEkplXnlNybEkZg9GEGxKFCgw==
-  dependencies:
-    pascal-case "^3.1.2"
-    tslib "^2.0.3"
-
-camelcase@5.3.1, camelcase@^5.0.0, camelcase@^5.3.1:
-  version "5.3.1"
-  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
-  integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
-
-camelcase@^6.0.0, camelcase@^6.1.0, camelcase@^6.2.0:
-  version "6.2.0"
-  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.2.0.tgz#924af881c9d525ac9d87f40d964e5cea982a1809"
-  integrity sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==
-
-caniuse-api@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/caniuse-api/-/caniuse-api-3.0.0.tgz#5e4d90e2274961d46291997df599e3ed008ee4c0"
-  integrity sha512-bsTwuIg/BZZK/vreVTYYbSWoe2F+71P7K5QGEX+pT250DZbfU1MQ5prOKpPR+LL6uWKK3KMwMCAS74QB3Um1uw==
-  dependencies:
-    browserslist "^4.0.0"
-    caniuse-lite "^1.0.0"
-    lodash.memoize "^4.1.2"
-    lodash.uniq "^4.5.0"
-
-caniuse-lite@^1.0.0, caniuse-lite@^1.0.30000981, caniuse-lite@^1.0.30001109, caniuse-lite@^1.0.30001125, caniuse-lite@^1.0.30001181:
-  version "1.0.30001191"
-  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001191.tgz#bacb432b6701f690c8c5f7c680166b9a9f0843d9"
-  integrity sha512-xJJqzyd+7GCJXkcoBiQ1GuxEiOBCLQ0aVW9HMekifZsAVGdj5eJ4mFB9fEhSHipq9IOk/QXFJUiIr9lZT+EsGw==
-
-capture-exit@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/capture-exit/-/capture-exit-2.0.0.tgz#fb953bfaebeb781f62898239dabb426d08a509a4"
-  integrity sha512-PiT/hQmTonHhl/HFGN+Lx3JJUznrVYJ3+AQsnthneZbvW7x+f08Tk7yLJTLEOUvBTbduLeeBkxEaYXUOUrRq6g==
-  dependencies:
-    rsvp "^4.8.4"
-
-case-sensitive-paths-webpack-plugin@2.3.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/case-sensitive-paths-webpack-plugin/-/case-sensitive-paths-webpack-plugin-2.3.0.tgz#23ac613cc9a856e4f88ff8bb73bbb5e989825cf7"
-  integrity sha512-/4YgnZS8y1UXXmC02xD5rRrBEu6T5ub+mQHLNRj0fzTRbgdBYhsNo2V5EqwgqrExjxsjtF/OpAKAMkKsxbD5XQ==
-
-caseless@~0.12.0:
-  version "0.12.0"
-  resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
-  integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
-
-chalk@2.4.2, chalk@^2.0.0, chalk@^2.4.1, chalk@^2.4.2:
-  version "2.4.2"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
-  integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
-  dependencies:
-    ansi-styles "^3.2.1"
-    escape-string-regexp "^1.0.5"
-    supports-color "^5.3.0"
-
-chalk@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
-  integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
-  dependencies:
-    ansi-styles "^4.1.0"
-    supports-color "^7.1.0"
-
-chalk@^4.0.0, chalk@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
-  integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A==
-  dependencies:
-    ansi-styles "^4.1.0"
-    supports-color "^7.1.0"
-
-char-regex@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/char-regex/-/char-regex-1.0.2.tgz#d744358226217f981ed58f479b1d6bcc29545dcf"
-  integrity sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==
-
-check-types@^11.1.1:
-  version "11.1.2"
-  resolved "https://registry.yarnpkg.com/check-types/-/check-types-11.1.2.tgz#86a7c12bf5539f6324eb0e70ca8896c0e38f3e2f"
-  integrity sha512-tzWzvgePgLORb9/3a0YenggReLKAIb2owL03H2Xdoe5pKcUyWRSEQ8xfCar8t2SIAuEDwtmx2da1YB52YuHQMQ==
-
-chokidar@^2.1.8:
-  version "2.1.8"
-  resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-2.1.8.tgz#804b3a7b6a99358c3c5c61e71d8728f041cff917"
-  integrity sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==
-  dependencies:
-    anymatch "^2.0.0"
-    async-each "^1.0.1"
-    braces "^2.3.2"
-    glob-parent "^3.1.0"
-    inherits "^2.0.3"
-    is-binary-path "^1.0.0"
-    is-glob "^4.0.0"
-    normalize-path "^3.0.0"
-    path-is-absolute "^1.0.0"
-    readdirp "^2.2.1"
-    upath "^1.1.1"
-  optionalDependencies:
-    fsevents "^1.2.7"
-
-chokidar@^3.4.1:
-  version "3.5.1"
-  resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.1.tgz#ee9ce7bbebd2b79f49f304799d5468e31e14e68a"
-  integrity sha512-9+s+Od+W0VJJzawDma/gvBNQqkTiqYTWLuZoyAsivsI4AaWTCzHG06/TMjsf1cYe9Cb97UCEhjz7HvnPk2p/tw==
-  dependencies:
-    anymatch "~3.1.1"
-    braces "~3.0.2"
-    glob-parent "~5.1.0"
-    is-binary-path "~2.1.0"
-    is-glob "~4.0.1"
-    normalize-path "~3.0.0"
-    readdirp "~3.5.0"
-  optionalDependencies:
-    fsevents "~2.3.1"
-
-chownr@^1.1.1:
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
-  integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
-
-chownr@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
-  integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
-
-chrome-trace-event@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.2.tgz#234090ee97c7d4ad1a2c4beae27505deffc608a4"
-  integrity sha512-9e/zx1jw7B4CO+c/RXoCsfg/x1AfUBioy4owYH0bJprEYAx5hRFLRhWBqHAG57D0ZM4H7vxbP7bPe0VwhQRYDQ==
-  dependencies:
-    tslib "^1.9.0"
-
-ci-info@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
-  integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
-
-cipher-base@^1.0.0, cipher-base@^1.0.1, cipher-base@^1.0.3:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/cipher-base/-/cipher-base-1.0.4.tgz#8760e4ecc272f4c363532f926d874aae2c1397de"
-  integrity sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==
-  dependencies:
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
-cjs-module-lexer@^0.6.0:
-  version "0.6.0"
-  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-0.6.0.tgz#4186fcca0eae175970aee870b9fe2d6cf8d5655f"
-  integrity sha512-uc2Vix1frTfnuzxxu1Hp4ktSvM3QaI4oXl4ZUqL1wjTu/BGki9TrCWoqLTg/drR1KwAEarXuRFCG2Svr1GxPFw==
-
-class-utils@^0.3.5:
-  version "0.3.6"
-  resolved "https://registry.yarnpkg.com/class-utils/-/class-utils-0.3.6.tgz#f93369ae8b9a7ce02fd41faad0ca83033190c463"
-  integrity sha512-qOhPa/Fj7s6TY8H8esGu5QNpMMQxz79h+urzrNYN6mn+9BnxlDGf5QZ+XeCDsxSjPqsSR56XOZOJmpeurnLMeg==
-  dependencies:
-    arr-union "^3.1.0"
-    define-property "^0.2.5"
-    isobject "^3.0.0"
-    static-extend "^0.1.1"
-
-clean-css@^4.2.3:
-  version "4.2.3"
-  resolved "https://registry.yarnpkg.com/clean-css/-/clean-css-4.2.3.tgz#507b5de7d97b48ee53d84adb0160ff6216380f78"
-  integrity sha512-VcMWDN54ZN/DS+g58HYL5/n4Zrqe8vHJpGA8KdgUXFU4fuP/aHNw8eld9SyEIyabIMJX/0RaY/fplOo5hYLSFA==
-  dependencies:
-    source-map "~0.6.0"
-
-clean-stack@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b"
-  integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
-
-cliui@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/cliui/-/cliui-5.0.0.tgz#deefcfdb2e800784aa34f46fa08e06851c7bbbc5"
-  integrity sha512-PYeGSEmmHM6zvoef2w8TPzlrnNpXIjTipYK780YswmIP9vjxmd6Y2a3CB2Ks6/AU8NHjZugXvo8w3oWM2qnwXA==
-  dependencies:
-    string-width "^3.1.0"
-    strip-ansi "^5.2.0"
-    wrap-ansi "^5.1.0"
-
-cliui@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1"
-  integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==
-  dependencies:
-    string-width "^4.2.0"
-    strip-ansi "^6.0.0"
-    wrap-ansi "^6.2.0"
-
-co@^4.6.0:
-  version "4.6.0"
-  resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184"
-  integrity sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=
-
-coa@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/coa/-/coa-2.0.2.tgz#43f6c21151b4ef2bf57187db0d73de229e3e7ec3"
-  integrity sha512-q5/jG+YQnSy4nRTV4F7lPepBJZ8qBNJJDBuJdoejDyLXgmL7IEo+Le2JDZudFTFt7mrCqIRaSjws4ygRCTCAXA==
-  dependencies:
-    "@types/q" "^1.5.1"
-    chalk "^2.4.1"
-    q "^1.1.2"
-
-collect-v8-coverage@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz#cc2c8e94fc18bbdffe64d6534570c8a673b27f59"
-  integrity sha512-iBPtljfCNcTKNAto0KEtDfZ3qzjJvqE3aTGZsbhjSBlorqpXJlaWWtPO35D+ZImoC3KWejX64o+yPGxhWSTzfg==
-
-collection-visit@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/collection-visit/-/collection-visit-1.0.0.tgz#4bc0373c164bc3291b4d368c829cf1a80a59dca0"
-  integrity sha1-S8A3PBZLwykbTTaMgpzxqApZ3KA=
-  dependencies:
-    map-visit "^1.0.0"
-    object-visit "^1.0.0"
-
-color-convert@^1.9.0, color-convert@^1.9.1:
-  version "1.9.3"
-  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
-  integrity sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==
-  dependencies:
-    color-name "1.1.3"
-
-color-convert@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
-  integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
-  dependencies:
-    color-name "~1.1.4"
-
-color-name@1.1.3:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25"
-  integrity sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=
-
-color-name@^1.0.0, color-name@~1.1.4:
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
-  integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
-
-color-string@^1.5.4:
-  version "1.5.4"
-  resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.5.4.tgz#dd51cd25cfee953d138fe4002372cc3d0e504cb6"
-  integrity sha512-57yF5yt8Xa3czSEW1jfQDE79Idk0+AkN/4KWad6tbdxUmAs3MvjxlWSWD4deYytcRfoZ9nhKyFl1kj5tBvidbw==
-  dependencies:
-    color-name "^1.0.0"
-    simple-swizzle "^0.2.2"
-
-color@^3.0.0:
-  version "3.1.3"
-  resolved "https://registry.yarnpkg.com/color/-/color-3.1.3.tgz#ca67fb4e7b97d611dcde39eceed422067d91596e"
-  integrity sha512-xgXAcTHa2HeFCGLE9Xs/R82hujGtu9Jd9x4NW3T34+OMs7VoPsjwzRczKHvTAHeJwWFwX5j15+MgAppE8ztObQ==
-  dependencies:
-    color-convert "^1.9.1"
-    color-string "^1.5.4"
-
-colorette@^1.2.1:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/colorette/-/colorette-1.2.1.tgz#4d0b921325c14faf92633086a536db6e89564b1b"
-  integrity sha512-puCDz0CzydiSYOrnXpz/PKd69zRrribezjtE9yd4zvytoRc8+RY/KJPvtPFKZS3E3wP6neGyMe0vOTlHO5L3Pw==
-
-combined-stream@^1.0.6, combined-stream@~1.0.6:
-  version "1.0.8"
-  resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
-  integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
-  dependencies:
-    delayed-stream "~1.0.0"
-
-commander@^2.20.0:
-  version "2.20.3"
-  resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
-  integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
-
-commander@^4.1.1:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/commander/-/commander-4.1.1.tgz#9fd602bd936294e9e9ef46a3f4d6964044b18068"
-  integrity sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==
-
-common-tags@^1.8.0:
-  version "1.8.0"
-  resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.0.tgz#8e3153e542d4a39e9b10554434afaaf98956a937"
-  integrity sha512-6P6g0uetGpW/sdyUy/iQQCbFF0kWVMSIVSyYz7Zgjcgh8mgw8PQzDNZeyZ5DQ2gM7LBoZPHmnjz8rUthkBG5tw==
-
-commondir@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
-  integrity sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=
-
-component-emitter@^1.2.1:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/component-emitter/-/component-emitter-1.3.0.tgz#16e4070fba8ae29b679f2215853ee181ab2eabc0"
-  integrity sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg==
-
-compose-function@3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/compose-function/-/compose-function-3.0.3.tgz#9ed675f13cc54501d30950a486ff6a7ba3ab185f"
-  integrity sha1-ntZ18TzFRQHTCVCkhv9qe6OrGF8=
-  dependencies:
-    arity-n "^1.0.4"
-
-compressible@~2.0.16:
-  version "2.0.18"
-  resolved "https://registry.yarnpkg.com/compressible/-/compressible-2.0.18.tgz#af53cca6b070d4c3c0750fbd77286a6d7cc46fba"
-  integrity sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==
-  dependencies:
-    mime-db ">= 1.43.0 < 2"
-
-compression@^1.7.4:
-  version "1.7.4"
-  resolved "https://registry.yarnpkg.com/compression/-/compression-1.7.4.tgz#95523eff170ca57c29a0ca41e6fe131f41e5bb8f"
-  integrity sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==
-  dependencies:
-    accepts "~1.3.5"
-    bytes "3.0.0"
-    compressible "~2.0.16"
-    debug "2.6.9"
-    on-headers "~1.0.2"
-    safe-buffer "5.1.2"
-    vary "~1.1.2"
-
-compute-scroll-into-view@1.0.14:
-  version "1.0.14"
-  resolved "https://registry.yarnpkg.com/compute-scroll-into-view/-/compute-scroll-into-view-1.0.14.tgz#80e3ebb25d6aa89f42e533956cb4b16a04cfe759"
-  integrity sha512-mKDjINe3tc6hGelUMNDzuhorIUZ7kS7BwyY0r2wQd2HOH2tRuJykiC06iSEX8y1TuhNzvz4GcJnK16mM2J1NMQ==
-
-concat-map@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
-  integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
-
-concat-stream@^1.5.0:
-  version "1.6.2"
-  resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.6.2.tgz#904bdf194cd3122fc675c77fc4ac3d4ff0fd1a34"
-  integrity sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==
-  dependencies:
-    buffer-from "^1.0.0"
-    inherits "^2.0.3"
-    readable-stream "^2.2.2"
-    typedarray "^0.0.6"
-
-confusing-browser-globals@^1.0.10:
-  version "1.0.10"
-  resolved "https://registry.yarnpkg.com/confusing-browser-globals/-/confusing-browser-globals-1.0.10.tgz#30d1e7f3d1b882b25ec4933d1d1adac353d20a59"
-  integrity sha512-gNld/3lySHwuhaVluJUKLePYirM3QNCKzVxqAdhJII9/WXKVX5PURzMVJspS1jTslSqjeuG4KMVTSouit5YPHA==
-
-connect-history-api-fallback@^1.6.0:
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/connect-history-api-fallback/-/connect-history-api-fallback-1.6.0.tgz#8b32089359308d111115d81cad3fceab888f97bc"
-  integrity sha512-e54B99q/OUoH64zYYRf3HBP5z24G38h5D3qXu23JGRoigpX5Ss4r9ZnDk3g0Z8uQC2x2lPaJ+UlWBc1ZWBWdLg==
-
-console-browserify@^1.1.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/console-browserify/-/console-browserify-1.2.0.tgz#67063cef57ceb6cf4993a2ab3a55840ae8c49336"
-  integrity sha512-ZMkYO/LkF17QvCPqM0gxw8yUzigAOZOSWSHg91FH6orS7vcEj5dVZTidN2fQ14yBSdg97RqhSNwLUXInd52OTA==
-
-constants-browserify@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/constants-browserify/-/constants-browserify-1.0.0.tgz#c20b96d8c617748aaf1c16021760cd27fcb8cb75"
-  integrity sha1-wguW2MYXdIqvHBYCF2DNJ/y4y3U=
-
-contains-path@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/contains-path/-/contains-path-0.1.0.tgz#fe8cf184ff6670b6baef01a9d4861a5cbec4120a"
-  integrity sha1-/ozxhP9mcLa67wGp1IYaXL7EEgo=
-
-content-disposition@0.5.3:
-  version "0.5.3"
-  resolved "https://registry.yarnpkg.com/content-disposition/-/content-disposition-0.5.3.tgz#e130caf7e7279087c5616c2007d0485698984fbd"
-  integrity sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==
-  dependencies:
-    safe-buffer "5.1.2"
-
-content-type@~1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/content-type/-/content-type-1.0.4.tgz#e138cc75e040c727b1966fe5e5f8c9aee256fe3b"
-  integrity sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==
-
-convert-source-map@1.7.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.7.0.tgz#17a2cb882d7f77d3490585e2ce6c524424a3a442"
-  integrity sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==
-  dependencies:
-    safe-buffer "~5.1.1"
-
-convert-source-map@^0.3.3:
-  version "0.3.5"
-  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-0.3.5.tgz#f1d802950af7dd2631a1febe0596550c86ab3190"
-  integrity sha1-8dgClQr33SYxof6+BZZVDIarMZA=
-
-cookie-signature@1.0.6:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/cookie-signature/-/cookie-signature-1.0.6.tgz#e303a882b342cc3ee8ca513a79999734dab3ae2c"
-  integrity sha1-4wOogrNCzD7oylE6eZmXNNqzriw=
-
-cookie@0.4.0:
-  version "0.4.0"
-  resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.4.0.tgz#beb437e7022b3b6d49019d088665303ebe9c14ba"
-  integrity sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==
-
-copy-concurrently@^1.0.0:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/copy-concurrently/-/copy-concurrently-1.0.5.tgz#92297398cae34937fcafd6ec8139c18051f0b5e0"
-  integrity sha512-f2domd9fsVDFtaFcbaRZuYXwtdmnzqbADSwhSWYxYB/Q8zsdUUFMXVRwXGDMWmbEzAn1kdRrtI1T/KTFOL4X2A==
-  dependencies:
-    aproba "^1.1.1"
-    fs-write-stream-atomic "^1.0.8"
-    iferr "^0.1.5"
-    mkdirp "^0.5.1"
-    rimraf "^2.5.4"
-    run-queue "^1.0.0"
-
-copy-descriptor@^0.1.0:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d"
-  integrity sha1-Z29us8OZl8LuGsOpJP1hJHSPV40=
-
-copy-to-clipboard@3.3.1:
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/copy-to-clipboard/-/copy-to-clipboard-3.3.1.tgz#115aa1a9998ffab6196f93076ad6da3b913662ae"
-  integrity sha512-i13qo6kIHTTpCm8/Wup+0b1mVWETvu2kIMzKoK8FpkLkFxlt0znUAHcMzox+T8sPlqtZXq3CulEjQHsYiGFJUw==
-  dependencies:
-    toggle-selection "^1.0.6"
-
-core-js-compat@^3.6.2, core-js-compat@^3.8.0:
-  version "3.9.0"
-  resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.9.0.tgz#29da39385f16b71e1915565aa0385c4e0963ad56"
-  integrity sha512-YK6fwFjCOKWwGnjFUR3c544YsnA/7DoLL0ysncuOJ4pwbriAtOpvM2bygdlcXbvQCQZ7bBU9CL4t7tGl7ETRpQ==
-  dependencies:
-    browserslist "^4.16.3"
-    semver "7.0.0"
-
-core-js-pure@^3.0.0:
-  version "3.9.0"
-  resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.9.0.tgz#326cc74e1fef8b7443a6a793ddb0adfcd81f9efb"
-  integrity sha512-3pEcmMZC9Cq0D4ZBh3pe2HLtqxpGNJBLXF/kZ2YzK17RbKp94w0HFbdbSx8H8kAlZG5k76hvLrkPm57Uyef+kg==
-
-core-js@^2.4.0:
-  version "2.6.12"
-  resolved "https://registry.yarnpkg.com/core-js/-/core-js-2.6.12.tgz#d9333dfa7b065e347cc5682219d6f690859cc2ec"
-  integrity sha512-Kb2wC0fvsWfQrgk8HU5lW6U/Lcs8+9aaYcy4ZFc6DDlo4nZ7n70dEgE5rtR0oG6ufKDUnrwfWL1mXR5ljDatrQ==
-
-core-js@^3.6.5:
-  version "3.9.0"
-  resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.9.0.tgz#790b1bb11553a2272b36e2625c7179db345492f8"
-  integrity sha512-PyFBJaLq93FlyYdsndE5VaueA9K5cNB7CGzeCj191YYLhkQM0gdZR2SKihM70oF0wdqKSKClv/tEBOpoRmdOVQ==
-
-core-util-is@1.0.2, core-util-is@~1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
-  integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
-
-cosmiconfig@^5.0.0:
-  version "5.2.1"
-  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-5.2.1.tgz#040f726809c591e77a17c0a3626ca45b4f168b1a"
-  integrity sha512-H65gsXo1SKjf8zmrJ67eJk8aIRKV5ff2D4uKZIBZShbhGSpEmsQOPW/SKMKYhSTrqR7ufy6RP69rPogdaPh/kA==
-  dependencies:
-    import-fresh "^2.0.0"
-    is-directory "^0.3.1"
-    js-yaml "^3.13.1"
-    parse-json "^4.0.0"
-
-cosmiconfig@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-6.0.0.tgz#da4fee853c52f6b1e6935f41c1a2fc50bd4a9982"
-  integrity sha512-xb3ZL6+L8b9JLLCx3ZdoZy4+2ECphCMo2PwqgP1tlfVq6M6YReyzBJtvWWtbDSpNr9hn96pkCiZqUcFEc+54Qg==
-  dependencies:
-    "@types/parse-json" "^4.0.0"
-    import-fresh "^3.1.0"
-    parse-json "^5.0.0"
-    path-type "^4.0.0"
-    yaml "^1.7.2"
-
-cosmiconfig@^7.0.0:
-  version "7.0.0"
-  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-7.0.0.tgz#ef9b44d773959cae63ddecd122de23853b60f8d3"
-  integrity sha512-pondGvTuVYDk++upghXJabWzL6Kxu6f26ljFw64Swq9v6sQPUL3EUlVDV56diOjpCayKihL6hVe8exIACU4XcA==
-  dependencies:
-    "@types/parse-json" "^4.0.0"
-    import-fresh "^3.2.1"
-    parse-json "^5.0.0"
-    path-type "^4.0.0"
-    yaml "^1.10.0"
-
-create-ecdh@^4.0.0:
-  version "4.0.4"
-  resolved "https://registry.yarnpkg.com/create-ecdh/-/create-ecdh-4.0.4.tgz#d6e7f4bffa66736085a0762fd3a632684dabcc4e"
-  integrity sha512-mf+TCx8wWc9VpuxfP2ht0iSISLZnt0JgWlrOKZiNqyUZWnjIaCIVNQArMHnCZKfEYRg6IM7A+NeJoN8gf/Ws0A==
-  dependencies:
-    bn.js "^4.1.0"
-    elliptic "^6.5.3"
-
-create-hash@^1.1.0, create-hash@^1.1.2, create-hash@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/create-hash/-/create-hash-1.2.0.tgz#889078af11a63756bcfb59bd221996be3a9ef196"
-  integrity sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==
-  dependencies:
-    cipher-base "^1.0.1"
-    inherits "^2.0.1"
-    md5.js "^1.3.4"
-    ripemd160 "^2.0.1"
-    sha.js "^2.4.0"
-
-create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7:
-  version "1.1.7"
-  resolved "https://registry.yarnpkg.com/create-hmac/-/create-hmac-1.1.7.tgz#69170c78b3ab957147b2b8b04572e47ead2243ff"
-  integrity sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==
-  dependencies:
-    cipher-base "^1.0.3"
-    create-hash "^1.1.0"
-    inherits "^2.0.1"
-    ripemd160 "^2.0.0"
-    safe-buffer "^5.0.1"
-    sha.js "^2.4.8"
-
-cross-spawn@7.0.3, cross-spawn@^7.0.0, cross-spawn@^7.0.2:
-  version "7.0.3"
-  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
-  integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
-  dependencies:
-    path-key "^3.1.0"
-    shebang-command "^2.0.0"
-    which "^2.0.1"
-
-cross-spawn@^6.0.0:
-  version "6.0.5"
-  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
-  integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==
-  dependencies:
-    nice-try "^1.0.4"
-    path-key "^2.0.1"
-    semver "^5.5.0"
-    shebang-command "^1.2.0"
-    which "^1.2.9"
-
-crypto-browserify@^3.11.0:
-  version "3.12.0"
-  resolved "https://registry.yarnpkg.com/crypto-browserify/-/crypto-browserify-3.12.0.tgz#396cf9f3137f03e4b8e532c58f698254e00f80ec"
-  integrity sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==
-  dependencies:
-    browserify-cipher "^1.0.0"
-    browserify-sign "^4.0.0"
-    create-ecdh "^4.0.0"
-    create-hash "^1.1.0"
-    create-hmac "^1.1.0"
-    diffie-hellman "^5.0.0"
-    inherits "^2.0.1"
-    pbkdf2 "^3.0.3"
-    public-encrypt "^4.0.0"
-    randombytes "^2.0.0"
-    randomfill "^1.0.3"
-
-crypto-random-string@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/crypto-random-string/-/crypto-random-string-1.0.0.tgz#a230f64f568310e1498009940790ec99545bca7e"
-  integrity sha1-ojD2T1aDEOFJgAmUB5DsmVRbyn4=
-
-css-blank-pseudo@^0.1.4:
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/css-blank-pseudo/-/css-blank-pseudo-0.1.4.tgz#dfdefd3254bf8a82027993674ccf35483bfcb3c5"
-  integrity sha512-LHz35Hr83dnFeipc7oqFDmsjHdljj3TQtxGGiNWSOsTLIAubSm4TEz8qCaKFpk7idaQ1GfWscF4E6mgpBysA1w==
-  dependencies:
-    postcss "^7.0.5"
-
-css-box-model@1.2.1:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/css-box-model/-/css-box-model-1.2.1.tgz#59951d3b81fd6b2074a62d49444415b0d2b4d7c1"
-  integrity sha512-a7Vr4Q/kd/aw96bnJG332W9V9LkJO69JRcaCYDUqjp6/z0w6VcZjgAcTbgFxEPfBgdnAwlh3iwu+hLopa+flJw==
-  dependencies:
-    tiny-invariant "^1.0.6"
-
-css-color-names@0.0.4, css-color-names@^0.0.4:
-  version "0.0.4"
-  resolved "https://registry.yarnpkg.com/css-color-names/-/css-color-names-0.0.4.tgz#808adc2e79cf84738069b646cb20ec27beb629e0"
-  integrity sha1-gIrcLnnPhHOAabZGyyDsJ762KeA=
-
-css-declaration-sorter@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/css-declaration-sorter/-/css-declaration-sorter-4.0.1.tgz#c198940f63a76d7e36c1e71018b001721054cb22"
-  integrity sha512-BcxQSKTSEEQUftYpBVnsH4SF05NTuBokb19/sBt6asXGKZ/6VP7PLG1CBCkFDYOnhXhPh0jMhO6xZ71oYHXHBA==
-  dependencies:
-    postcss "^7.0.1"
-    timsort "^0.3.0"
-
-css-get-unit@1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/css-get-unit/-/css-get-unit-1.0.1.tgz#e490b9e56b2cd20f903a22ccafb448382edf7976"
-  integrity sha1-5JC55Wss0g+QOiLMr7RIOC7feXY=
-
-css-has-pseudo@^0.10.0:
-  version "0.10.0"
-  resolved "https://registry.yarnpkg.com/css-has-pseudo/-/css-has-pseudo-0.10.0.tgz#3c642ab34ca242c59c41a125df9105841f6966ee"
-  integrity sha512-Z8hnfsZu4o/kt+AuFzeGpLVhFOGO9mluyHBaA2bA8aCGTwah5sT3WV/fTHH8UNZUytOIImuGPrl/prlb4oX4qQ==
-  dependencies:
-    postcss "^7.0.6"
-    postcss-selector-parser "^5.0.0-rc.4"
-
-css-loader@4.3.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/css-loader/-/css-loader-4.3.0.tgz#c888af64b2a5b2e85462c72c0f4a85c7e2e0821e"
-  integrity sha512-rdezjCjScIrsL8BSYszgT4s476IcNKt6yX69t0pHjJVnPUTDpn4WfIpDQTN3wCJvUvfsz/mFjuGOekf3PY3NUg==
-  dependencies:
-    camelcase "^6.0.0"
-    cssesc "^3.0.0"
-    icss-utils "^4.1.1"
-    loader-utils "^2.0.0"
-    postcss "^7.0.32"
-    postcss-modules-extract-imports "^2.0.0"
-    postcss-modules-local-by-default "^3.0.3"
-    postcss-modules-scope "^2.2.0"
-    postcss-modules-values "^3.0.0"
-    postcss-value-parser "^4.1.0"
-    schema-utils "^2.7.1"
-    semver "^7.3.2"
-
-css-prefers-color-scheme@^3.1.1:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/css-prefers-color-scheme/-/css-prefers-color-scheme-3.1.1.tgz#6f830a2714199d4f0d0d0bb8a27916ed65cff1f4"
-  integrity sha512-MTu6+tMs9S3EUqzmqLXEcgNRbNkkD/TGFvowpeoWJn5Vfq7FMgsmRQs9X5NXAURiOBmOxm/lLjsDNXDE6k9bhg==
-  dependencies:
-    postcss "^7.0.5"
-
-css-select-base-adapter@^0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/css-select-base-adapter/-/css-select-base-adapter-0.1.1.tgz#3b2ff4972cc362ab88561507a95408a1432135d7"
-  integrity sha512-jQVeeRG70QI08vSTwf1jHxp74JoZsr2XSgETae8/xC8ovSnL2WF87GTLO86Sbwdt2lK4Umg4HnnwMO4YF3Ce7w==
-
-css-select@^2.0.0, css-select@^2.0.2:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/css-select/-/css-select-2.1.0.tgz#6a34653356635934a81baca68d0255432105dbef"
-  integrity sha512-Dqk7LQKpwLoH3VovzZnkzegqNSuAziQyNZUcrdDM401iY+R5NkGBXGmtO05/yaXQziALuPogeG0b7UAgjnTJTQ==
-  dependencies:
-    boolbase "^1.0.0"
-    css-what "^3.2.1"
-    domutils "^1.7.0"
-    nth-check "^1.0.2"
-
-css-tree@1.0.0-alpha.37:
-  version "1.0.0-alpha.37"
-  resolved "https://registry.yarnpkg.com/css-tree/-/css-tree-1.0.0-alpha.37.tgz#98bebd62c4c1d9f960ec340cf9f7522e30709a22"
-  integrity sha512-DMxWJg0rnz7UgxKT0Q1HU/L9BeJI0M6ksor0OgqOnF+aRCDWg/N2641HmVyU9KVIu0OVVWOb2IpC9A+BJRnejg==
-  dependencies:
-    mdn-data "2.0.4"
-    source-map "^0.6.1"
-
-css-tree@^1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/css-tree/-/css-tree-1.1.2.tgz#9ae393b5dafd7dae8a622475caec78d3d8fbd7b5"
-  integrity sha512-wCoWush5Aeo48GLhfHPbmvZs59Z+M7k5+B1xDnXbdWNcEF423DoFdqSWE0PM5aNk5nI5cp1q7ms36zGApY/sKQ==
-  dependencies:
-    mdn-data "2.0.14"
-    source-map "^0.6.1"
-
-css-what@^3.2.1:
-  version "3.4.2"
-  resolved "https://registry.yarnpkg.com/css-what/-/css-what-3.4.2.tgz#ea7026fcb01777edbde52124e21f327e7ae950e4"
-  integrity sha512-ACUm3L0/jiZTqfzRM3Hi9Q8eZqd6IK37mMWPLz9PJxkLWllYeRf+EHUSHYEtFop2Eqytaq1FizFVh7XfBnXCDQ==
-
-css.escape@^1.5.1:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/css.escape/-/css.escape-1.5.1.tgz#42e27d4fa04ae32f931a4b4d4191fa9cddee97cb"
-  integrity sha1-QuJ9T6BK4y+TGktNQZH6nN3ul8s=
-
-css@^2.0.0:
-  version "2.2.4"
-  resolved "https://registry.yarnpkg.com/css/-/css-2.2.4.tgz#c646755c73971f2bba6a601e2cf2fd71b1298929"
-  integrity sha512-oUnjmWpy0niI3x/mPL8dVEI1l7MnG3+HHyRPHf+YFSbK+svOhXpmSOcDURUh2aOCgl2grzrOPt1nHLuCVFULLw==
-  dependencies:
-    inherits "^2.0.3"
-    source-map "^0.6.1"
-    source-map-resolve "^0.5.2"
-    urix "^0.1.0"
-
-css@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/css/-/css-3.0.0.tgz#4447a4d58fdd03367c516ca9f64ae365cee4aa5d"
-  integrity sha512-DG9pFfwOrzc+hawpmqX/dHYHJG+Bsdb0klhyi1sDneOgGOXy9wQIC8hzyVp1e4NRYDBdxcylvywPkkXCHAzTyQ==
-  dependencies:
-    inherits "^2.0.4"
-    source-map "^0.6.1"
-    source-map-resolve "^0.6.0"
-
-cssdb@^4.4.0:
-  version "4.4.0"
-  resolved "https://registry.yarnpkg.com/cssdb/-/cssdb-4.4.0.tgz#3bf2f2a68c10f5c6a08abd92378331ee803cddb0"
-  integrity sha512-LsTAR1JPEM9TpGhl/0p3nQecC2LJ0kD8X5YARu1hk/9I1gril5vDtMZyNxcEpxxDj34YNck/ucjuoUd66K03oQ==
-
-cssesc@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/cssesc/-/cssesc-2.0.0.tgz#3b13bd1bb1cb36e1bcb5a4dcd27f54c5dcb35703"
-  integrity sha512-MsCAG1z9lPdoO/IUMLSBWBSVxVtJ1395VGIQ+Fc2gNdkQ1hNDnQdw3YhA71WJCBW1vdwA0cAnk/DnW6bqoEUYg==
-
-cssesc@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/cssesc/-/cssesc-3.0.0.tgz#37741919903b868565e1c09ea747445cd18983ee"
-  integrity sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==
-
-cssnano-preset-default@^4.0.7:
-  version "4.0.7"
-  resolved "https://registry.yarnpkg.com/cssnano-preset-default/-/cssnano-preset-default-4.0.7.tgz#51ec662ccfca0f88b396dcd9679cdb931be17f76"
-  integrity sha512-x0YHHx2h6p0fCl1zY9L9roD7rnlltugGu7zXSKQx6k2rYw0Hi3IqxcoAGF7u9Q5w1nt7vK0ulxV8Lo+EvllGsA==
-  dependencies:
-    css-declaration-sorter "^4.0.1"
-    cssnano-util-raw-cache "^4.0.1"
-    postcss "^7.0.0"
-    postcss-calc "^7.0.1"
-    postcss-colormin "^4.0.3"
-    postcss-convert-values "^4.0.1"
-    postcss-discard-comments "^4.0.2"
-    postcss-discard-duplicates "^4.0.2"
-    postcss-discard-empty "^4.0.1"
-    postcss-discard-overridden "^4.0.1"
-    postcss-merge-longhand "^4.0.11"
-    postcss-merge-rules "^4.0.3"
-    postcss-minify-font-values "^4.0.2"
-    postcss-minify-gradients "^4.0.2"
-    postcss-minify-params "^4.0.2"
-    postcss-minify-selectors "^4.0.2"
-    postcss-normalize-charset "^4.0.1"
-    postcss-normalize-display-values "^4.0.2"
-    postcss-normalize-positions "^4.0.2"
-    postcss-normalize-repeat-style "^4.0.2"
-    postcss-normalize-string "^4.0.2"
-    postcss-normalize-timing-functions "^4.0.2"
-    postcss-normalize-unicode "^4.0.1"
-    postcss-normalize-url "^4.0.1"
-    postcss-normalize-whitespace "^4.0.2"
-    postcss-ordered-values "^4.1.2"
-    postcss-reduce-initial "^4.0.3"
-    postcss-reduce-transforms "^4.0.2"
-    postcss-svgo "^4.0.2"
-    postcss-unique-selectors "^4.0.1"
-
-cssnano-util-get-arguments@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/cssnano-util-get-arguments/-/cssnano-util-get-arguments-4.0.0.tgz#ed3a08299f21d75741b20f3b81f194ed49cc150f"
-  integrity sha1-7ToIKZ8h11dBsg87gfGU7UnMFQ8=
-
-cssnano-util-get-match@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/cssnano-util-get-match/-/cssnano-util-get-match-4.0.0.tgz#c0e4ca07f5386bb17ec5e52250b4f5961365156d"
-  integrity sha1-wOTKB/U4a7F+xeUiULT1lhNlFW0=
-
-cssnano-util-raw-cache@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/cssnano-util-raw-cache/-/cssnano-util-raw-cache-4.0.1.tgz#b26d5fd5f72a11dfe7a7846fb4c67260f96bf282"
-  integrity sha512-qLuYtWK2b2Dy55I8ZX3ky1Z16WYsx544Q0UWViebptpwn/xDBmog2TLg4f+DBMg1rJ6JDWtn96WHbOKDWt1WQA==
-  dependencies:
-    postcss "^7.0.0"
-
-cssnano-util-same-parent@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/cssnano-util-same-parent/-/cssnano-util-same-parent-4.0.1.tgz#574082fb2859d2db433855835d9a8456ea18bbf3"
-  integrity sha512-WcKx5OY+KoSIAxBW6UBBRay1U6vkYheCdjyVNDm85zt5K9mHoGOfsOsqIszfAqrQQFIIKgjh2+FDgIj/zsl21Q==
-
-cssnano@^4.1.10:
-  version "4.1.10"
-  resolved "https://registry.yarnpkg.com/cssnano/-/cssnano-4.1.10.tgz#0ac41f0b13d13d465487e111b778d42da631b8b2"
-  integrity sha512-5wny+F6H4/8RgNlaqab4ktc3e0/blKutmq8yNlBFXA//nSFFAqAngjNVRzUvCgYROULmZZUoosL/KSoZo5aUaQ==
-  dependencies:
-    cosmiconfig "^5.0.0"
-    cssnano-preset-default "^4.0.7"
-    is-resolvable "^1.0.0"
-    postcss "^7.0.0"
-
-csso@^4.0.2:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/csso/-/csso-4.2.0.tgz#ea3a561346e8dc9f546d6febedd50187cf389529"
-  integrity sha512-wvlcdIbf6pwKEk7vHj8/Bkc0B4ylXZruLvOgs9doS5eOsOpuodOV2zJChSpkp+pRpYQLQMeF04nr3Z68Sta9jA==
-  dependencies:
-    css-tree "^1.1.2"
-
-cssom@^0.4.4:
-  version "0.4.4"
-  resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.4.4.tgz#5a66cf93d2d0b661d80bf6a44fb65f5c2e4e0a10"
-  integrity sha512-p3pvU7r1MyyqbTk+WbNJIgJjG2VmTIaB10rI93LzVPrmDJKkzKYMtxxyAvQXR/NS6otuzveI7+7BBq3SjBS2mw==
-
-cssom@~0.3.6:
-  version "0.3.8"
-  resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.3.8.tgz#9f1276f5b2b463f2114d3f2c75250af8c1a36f4a"
-  integrity sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==
-
-cssstyle@^2.2.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/cssstyle/-/cssstyle-2.3.0.tgz#ff665a0ddbdc31864b09647f34163443d90b0852"
-  integrity sha512-AZL67abkUzIuvcHqk7c09cezpGNcxUxU4Ioi/05xHk4DQeTkWmGYftIE6ctU6AEt+Gn4n1lDStOtj7FKycP71A==
-  dependencies:
-    cssom "~0.3.6"
-
-csstype@^3.0.2, csstype@^3.0.6:
-  version "3.0.7"
-  resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.0.7.tgz#2a5fb75e1015e84dd15692f71e89a1450290950b"
-  integrity sha512-KxnUB0ZMlnUWCsx2Z8MUsr6qV6ja1w9ArPErJaJaF8a5SOWoHLIszeCTKGRGRgtLgYrs1E8CHkNSP1VZTTPc9g==
-
-cyclist@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-1.0.1.tgz#596e9698fd0c80e12038c2b82d6eb1b35b6224d9"
-  integrity sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk=
-
-d@1, d@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
-  integrity sha512-m62ShEObQ39CfralilEQRjH6oAMtNCV1xJyEx5LpRYUVN+EviphDgUc/F3hnYbADmkiNs67Y+3ylmlG7Lnu+FA==
-  dependencies:
-    es5-ext "^0.10.50"
-    type "^1.0.1"
-
-damerau-levenshtein@^1.0.6:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/damerau-levenshtein/-/damerau-levenshtein-1.0.6.tgz#143c1641cb3d85c60c32329e26899adea8701791"
-  integrity sha512-JVrozIeElnj3QzfUIt8tB8YMluBJom4Vw9qTPpjGYQ9fYlB3D/rb6OordUxf3xeFB35LKWs0xqcO5U6ySvBtug==
-
-dashdash@^1.12.0:
-  version "1.14.1"
-  resolved "https://registry.yarnpkg.com/dashdash/-/dashdash-1.14.1.tgz#853cfa0f7cbe2fed5de20326b8dd581035f6e2f0"
-  integrity sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=
-  dependencies:
-    assert-plus "^1.0.0"
-
-data-urls@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/data-urls/-/data-urls-2.0.0.tgz#156485a72963a970f5d5821aaf642bef2bf2db9b"
-  integrity sha512-X5eWTSXO/BJmpdIKCRuKUgSCgAN0OwliVK3yPKbwIWU1Tdw5BRajxlzMidvh+gwko9AfQ9zIj52pzF91Q3YAvQ==
-  dependencies:
-    abab "^2.0.3"
-    whatwg-mimetype "^2.3.0"
-    whatwg-url "^8.0.0"
-
-debug@2.6.9, debug@^2.2.0, debug@^2.3.3, debug@^2.6.0, debug@^2.6.9:
-  version "2.6.9"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
-  integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==
-  dependencies:
-    ms "2.0.0"
-
-debug@^3.1.1, debug@^3.2.6:
-  version "3.2.7"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
-  integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
-  dependencies:
-    ms "^2.1.1"
-
-debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
-  integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
-  dependencies:
-    ms "2.1.2"
-
-decamelize@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
-  integrity sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=
-
-decimal.js@^10.2.0:
-  version "10.2.1"
-  resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.2.1.tgz#238ae7b0f0c793d3e3cea410108b35a2c01426a3"
-  integrity sha512-KaL7+6Fw6i5A2XSnsbhm/6B+NuEA7TZ4vqxnd5tXz9sbKtrN9Srj8ab4vKVdK8YAqZO9P1kg45Y6YLoduPf+kw==
-
-decode-uri-component@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.0.tgz#eb3913333458775cb84cd1a1fae062106bb87545"
-  integrity sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=
-
-dedent@^0.7.0:
-  version "0.7.0"
-  resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c"
-  integrity sha1-JJXduvbrh0q7Dhvp3yLS5aVEMmw=
-
-deep-equal@^1.0.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/deep-equal/-/deep-equal-1.1.1.tgz#b5c98c942ceffaf7cb051e24e1434a25a2e6076a"
-  integrity sha512-yd9c5AdiqVcR+JjcwUQb9DkhJc8ngNr0MahEBGvDiJw8puWab2yZlh+nkasOnZP+EGTAP6rRp2JzJhJZzvNF8g==
-  dependencies:
-    is-arguments "^1.0.4"
-    is-date-object "^1.0.1"
-    is-regex "^1.0.4"
-    object-is "^1.0.1"
-    object-keys "^1.1.1"
-    regexp.prototype.flags "^1.2.0"
-
-deep-is@^0.1.3, deep-is@~0.1.3:
-  version "0.1.3"
-  resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
-  integrity sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=
-
-deepmerge@^4.2.2:
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/deepmerge/-/deepmerge-4.2.2.tgz#44d2ea3679b8f4d4ffba33f03d865fc1e7bf4955"
-  integrity sha512-FJ3UgI4gIl+PHZm53knsuSFpE+nESMr7M4v9QcgB7S63Kj/6WqMiFQJpBBYz1Pt+66bZpP3Q7Lye0Oo9MPKEdg==
-
-default-gateway@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/default-gateway/-/default-gateway-4.2.0.tgz#167104c7500c2115f6dd69b0a536bb8ed720552b"
-  integrity sha512-h6sMrVB1VMWVrW13mSc6ia/DwYYw5MN6+exNu1OaJeFac5aSAvwM7lZ0NVfTABuSkQelr4h5oebg3KB1XPdjgA==
-  dependencies:
-    execa "^1.0.0"
-    ip-regex "^2.1.0"
-
-define-properties@^1.1.2, define-properties@^1.1.3:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.3.tgz#cf88da6cbee26fe6db7094f61d870cbd84cee9f1"
-  integrity sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==
-  dependencies:
-    object-keys "^1.0.12"
-
-define-property@^0.2.5:
-  version "0.2.5"
-  resolved "https://registry.yarnpkg.com/define-property/-/define-property-0.2.5.tgz#c35b1ef918ec3c990f9a5bc57be04aacec5c8116"
-  integrity sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=
-  dependencies:
-    is-descriptor "^0.1.0"
-
-define-property@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/define-property/-/define-property-1.0.0.tgz#769ebaaf3f4a63aad3af9e8d304c9bbe79bfb0e6"
-  integrity sha1-dp66rz9KY6rTr56NMEybvnm/sOY=
-  dependencies:
-    is-descriptor "^1.0.0"
-
-define-property@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/define-property/-/define-property-2.0.2.tgz#d459689e8d654ba77e02a817f8710d702cb16e9d"
-  integrity sha512-jwK2UV4cnPpbcG7+VRARKTZPUWowwXA8bzH5NP6ud0oeAxyYPuGZUAC7hMugpCdz4BeSZl2Dl9k66CHJ/46ZYQ==
-  dependencies:
-    is-descriptor "^1.0.2"
-    isobject "^3.0.1"
-
-del@^4.1.1:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/del/-/del-4.1.1.tgz#9e8f117222ea44a31ff3a156c049b99052a9f0b4"
-  integrity sha512-QwGuEUouP2kVwQenAsOof5Fv8K9t3D8Ca8NxcXKrIpEHjTXK5J2nXLdP+ALI1cgv8wj7KuwBhTwBkOZSJKM5XQ==
-  dependencies:
-    "@types/glob" "^7.1.1"
-    globby "^6.1.0"
-    is-path-cwd "^2.0.0"
-    is-path-in-cwd "^2.0.0"
-    p-map "^2.0.0"
-    pify "^4.0.1"
-    rimraf "^2.6.3"
-
-delayed-stream@~1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
-  integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk=
-
-depd@~1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9"
-  integrity sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=
-
-dequal@2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/dequal/-/dequal-2.0.2.tgz#85ca22025e3a87e65ef75a7a437b35284a7e319d"
-  integrity sha512-q9K8BlJVxK7hQYqa6XISGmBZbtQQWVXSrRrWreHC94rMt1QL/Impruc+7p2CYSYuVIUr+YCt6hjrs1kkdJRTug==
-
-des.js@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/des.js/-/des.js-1.0.1.tgz#5382142e1bdc53f85d86d53e5f4aa7deb91e0843"
-  integrity sha512-Q0I4pfFrv2VPd34/vfLrFOoRmlYj3OV50i7fskps1jZWK1kApMWWT9G6RRUeYedLcBDIhnSDaUvJMb3AhUlaEA==
-  dependencies:
-    inherits "^2.0.1"
-    minimalistic-assert "^1.0.0"
-
-destroy@~1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/destroy/-/destroy-1.0.4.tgz#978857442c44749e4206613e37946205826abd80"
-  integrity sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=
-
-detect-newline@^3.0.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
-  integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
-
-detect-node-es@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/detect-node-es/-/detect-node-es-1.0.0.tgz#c0318b9e539a5256ca780dd9575c9345af05b8ed"
-  integrity sha512-S4AHriUkTX9FoFvL4G8hXDcx6t3gp2HpfCza3Q0v6S78gul2hKWifLQbeW+ZF89+hSm2ZIc/uF3J97ZgytgTRg==
-
-detect-node@^2.0.4:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.0.4.tgz#014ee8f8f669c5c58023da64b8179c083a28c46c"
-  integrity sha512-ZIzRpLJrOj7jjP2miAtgqIfmzbxa4ZOr5jJc601zklsfEx9oTzmmj2nVpIPRpNlRTIh8lc1kyViIY7BWSGNmKw==
-
-detect-port-alt@1.1.6:
-  version "1.1.6"
-  resolved "https://registry.yarnpkg.com/detect-port-alt/-/detect-port-alt-1.1.6.tgz#24707deabe932d4a3cf621302027c2b266568275"
-  integrity sha512-5tQykt+LqfJFBEYaDITx7S7cR7mJ/zQmLXZ2qt5w04ainYZw6tBf9dBunMjVeVOdYVRUzUOE4HkY5J7+uttb5Q==
-  dependencies:
-    address "^1.0.1"
-    debug "^2.6.0"
-
-diff-sequences@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.6.2.tgz#48ba99157de1923412eed41db6b6d4aa9ca7c0b1"
-  integrity sha512-Mv/TDa3nZ9sbc5soK+OoA74BsS3mL37yixCvUAQkiuA4Wz6YtwP/K47n2rv2ovzHZvoiQeA5FTQOschKkEwB0Q==
-
-diffie-hellman@^5.0.0:
-  version "5.0.3"
-  resolved "https://registry.yarnpkg.com/diffie-hellman/-/diffie-hellman-5.0.3.tgz#40e8ee98f55a2149607146921c63e1ae5f3d2875"
-  integrity sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==
-  dependencies:
-    bn.js "^4.1.0"
-    miller-rabin "^4.0.0"
-    randombytes "^2.0.0"
-
-dir-glob@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f"
-  integrity sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==
-  dependencies:
-    path-type "^4.0.0"
-
-dns-equal@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/dns-equal/-/dns-equal-1.0.0.tgz#b39e7f1da6eb0a75ba9c17324b34753c47e0654d"
-  integrity sha1-s55/HabrCnW6nBcySzR1PEfgZU0=
-
-dns-packet@^1.3.1:
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.1.tgz#12aa426981075be500b910eedcd0b47dd7deda5a"
-  integrity sha512-0UxfQkMhYAUaZI+xrNZOz/as5KgDU0M/fQ9b6SpkyLbk3GEswDi6PADJVaYJradtRVsRIlF1zLyOodbcTCDzUg==
-  dependencies:
-    ip "^1.1.0"
-    safe-buffer "^5.0.1"
-
-dns-txt@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/dns-txt/-/dns-txt-2.0.2.tgz#b91d806f5d27188e4ab3e7d107d881a1cc4642b6"
-  integrity sha1-uR2Ab10nGI5Ks+fRB9iBocxGQrY=
-  dependencies:
-    buffer-indexof "^1.0.0"
-
-doctrine@1.5.0:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-1.5.0.tgz#379dce730f6166f76cefa4e6707a159b02c5a6fa"
-  integrity sha1-N53Ocw9hZvds76TmcHoVmwLFpvo=
-  dependencies:
-    esutils "^2.0.2"
-    isarray "^1.0.0"
-
-doctrine@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-2.1.0.tgz#5cd01fc101621b42c4cd7f5d1a66243716d3f39d"
-  integrity sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==
-  dependencies:
-    esutils "^2.0.2"
-
-doctrine@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961"
-  integrity sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==
-  dependencies:
-    esutils "^2.0.2"
-
-dom-accessibility-api@^0.5.4:
-  version "0.5.4"
-  resolved "https://registry.yarnpkg.com/dom-accessibility-api/-/dom-accessibility-api-0.5.4.tgz#b06d059cdd4a4ad9a79275f9d414a5c126241166"
-  integrity sha512-TvrjBckDy2c6v6RLxPv5QXOnU+SmF9nBII5621Ve5fu6Z/BDrENurBEvlC1f44lKEUVqOpK4w9E5Idc5/EgkLQ==
-
-dom-converter@^0.2:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/dom-converter/-/dom-converter-0.2.0.tgz#6721a9daee2e293682955b6afe416771627bb768"
-  integrity sha512-gd3ypIPfOMr9h5jIKq8E3sHOTCjeirnl0WK5ZdS1AW0Odt0b1PaWaHdJ4Qk4klv+YB9aJBS7mESXjFoDQPu6DA==
-  dependencies:
-    utila "~0.4"
-
-dom-serializer@0:
-  version "0.2.2"
-  resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.2.2.tgz#1afb81f533717175d478655debc5e332d9f9bb51"
-  integrity sha512-2/xPb3ORsQ42nHYiSunXkDjPLBaEj/xTwUO4B7XCZQTRk7EBtTOPaygh10YAAh2OI1Qrp6NWfpAhzswj0ydt9g==
-  dependencies:
-    domelementtype "^2.0.1"
-    entities "^2.0.0"
-
-domain-browser@^1.1.1:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
-  integrity sha512-jnjyiM6eRyZl2H+W8Q/zLMA481hzi0eszAaBUzIVnmYVDBbnLxVNnfu1HgEBvCbL+71FrxMl3E6lpKH7Ge3OXA==
-
-domelementtype@1, domelementtype@^1.3.1:
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f"
-  integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==
-
-domelementtype@^2.0.1:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.1.0.tgz#a851c080a6d1c3d94344aed151d99f669edf585e"
-  integrity sha512-LsTgx/L5VpD+Q8lmsXSHW2WpA+eBlZ9HPf3erD1IoPF00/3JKHZ3BknUVA2QGDNu69ZNmyFmCWBSO45XjYKC5w==
-
-domexception@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/domexception/-/domexception-2.0.1.tgz#fb44aefba793e1574b0af6aed2801d057529f304"
-  integrity sha512-yxJ2mFy/sibVQlu5qHjOkf9J3K6zgmCxgJ94u2EdvDOV09H+32LtRswEcUsmUWN72pVLOEnTSRaIVVzVQgS0dg==
-  dependencies:
-    webidl-conversions "^5.0.0"
-
-domhandler@^2.3.0:
-  version "2.4.2"
-  resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-2.4.2.tgz#8805097e933d65e85546f726d60f5eb88b44f803"
-  integrity sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==
-  dependencies:
-    domelementtype "1"
-
-domutils@^1.5.1, domutils@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a"
-  integrity sha512-Lgd2XcJ/NjEw+7tFvfKxOzCYKZsdct5lczQ2ZaQY8Djz7pfAD3Gbp8ySJWtreII/vDlMVmxwa6pHmdxIYgttDg==
-  dependencies:
-    dom-serializer "0"
-    domelementtype "1"
-
-dot-case@^3.0.4:
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751"
-  integrity sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w==
-  dependencies:
-    no-case "^3.0.4"
-    tslib "^2.0.3"
-
-dot-prop@^5.2.0:
-  version "5.3.0"
-  resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-5.3.0.tgz#90ccce708cd9cd82cc4dc8c3ddd9abdd55b20e88"
-  integrity sha512-QM8q3zDe58hqUqjraQOmzZ1LIH9SWQJTlEKCH4kJ2oQvLZk7RbQXvtDM2XEq3fwkV9CCvvH4LA0AV+ogFsBM2Q==
-  dependencies:
-    is-obj "^2.0.0"
-
-dotenv-expand@5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/dotenv-expand/-/dotenv-expand-5.1.0.tgz#3fbaf020bfd794884072ea26b1e9791d45a629f0"
-  integrity sha512-YXQl1DSa4/PQyRfgrv6aoNjhasp/p4qs9FjJ4q4cQk+8m4r6k4ZSiEyytKG8f8W9gi8WsQtIObNmKd+tMzNTmA==
-
-dotenv@8.2.0:
-  version "8.2.0"
-  resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.2.0.tgz#97e619259ada750eea3e4ea3e26bceea5424b16a"
-  integrity sha512-8sJ78ElpbDJBHNeBzUbUVLsqKdccaa/BXF1uPTw3GrvQTBgrQrtObr2mUrE38vzYd8cEv+m/JBfDLioYcfXoaw==
-
-duplexer@^0.1.1:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/duplexer/-/duplexer-0.1.2.tgz#3abe43aef3835f8ae077d136ddce0f276b0400e6"
-  integrity sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==
-
-duplexify@^3.4.2, duplexify@^3.6.0:
-  version "3.7.1"
-  resolved "https://registry.yarnpkg.com/duplexify/-/duplexify-3.7.1.tgz#2a4df5317f6ccfd91f86d6fd25d8d8a103b88309"
-  integrity sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==
-  dependencies:
-    end-of-stream "^1.0.0"
-    inherits "^2.0.1"
-    readable-stream "^2.0.0"
-    stream-shift "^1.0.0"
-
-ecc-jsbn@~0.1.1:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz#3a83a904e54353287874c564b7549386849a98c9"
-  integrity sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=
-  dependencies:
-    jsbn "~0.1.0"
-    safer-buffer "^2.1.0"
-
-ee-first@1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d"
-  integrity sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=
-
-ejs@^2.6.1:
-  version "2.7.4"
-  resolved "https://registry.yarnpkg.com/ejs/-/ejs-2.7.4.tgz#48661287573dcc53e366c7a1ae52c3a120eec9ba"
-  integrity sha512-7vmuyh5+kuUyJKePhQfRQBhXV5Ce+RnaeeQArKu1EAMpL3WbgMt5WG6uQZpEVvYSSsxMXRKOewtDk9RaTKXRlA==
-
-electron-to-chromium@^1.3.564, electron-to-chromium@^1.3.649:
-  version "1.3.671"
-  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.671.tgz#8feaed6eae42d279fa4611f58c42a5a1eb81b2a0"
-  integrity sha512-RTD97QkdrJKaKwRv9h/wGAaoR2lGxNXEcBXS31vjitgTPwTWAbLdS7cEsBK68eEQy7p6YyT8D5BxBEYHu2SuwQ==
-
-elliptic@^6.5.3:
-  version "6.5.4"
-  resolved "https://registry.yarnpkg.com/elliptic/-/elliptic-6.5.4.tgz#da37cebd31e79a1367e941b592ed1fbebd58abbb"
-  integrity sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==
-  dependencies:
-    bn.js "^4.11.9"
-    brorand "^1.1.0"
-    hash.js "^1.0.0"
-    hmac-drbg "^1.0.1"
-    inherits "^2.0.4"
-    minimalistic-assert "^1.0.1"
-    minimalistic-crypto-utils "^1.0.1"
-
-emittery@^0.7.1:
-  version "0.7.2"
-  resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.7.2.tgz#25595908e13af0f5674ab419396e2fb394cdfa82"
-  integrity sha512-A8OG5SR/ij3SsJdWDJdkkSYUjQdCUx6APQXem0SaEePBSRg4eymGYwBkKo1Y6DU+af/Jn2dBQqDBvjnr9Vi8nQ==
-
-emoji-regex@^7.0.1:
-  version "7.0.3"
-  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-7.0.3.tgz#933a04052860c85e83c122479c4748a8e4c72156"
-  integrity sha512-CwBLREIQ7LvYFB0WyRvwhq5N5qPhc6PMjD6bYggFlI5YyDgl+0vxq5VHbMOFqLg7hfWzmu8T5Z1QofhmTIhItA==
-
-emoji-regex@^8.0.0:
-  version "8.0.0"
-  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
-  integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
-
-emoji-regex@^9.0.0:
-  version "9.2.1"
-  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.1.tgz#c9b25604256bb3428964bead3ab63069d736f7ee"
-  integrity sha512-117l1H6U4X3Krn+MrzYrL57d5H7siRHWraBs7s+LjRuFK7Fe7hJqnJ0skWlinqsycVLU5YAo6L8CsEYQ0V5prg==
-
-emojis-list@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-2.1.0.tgz#4daa4d9db00f9819880c79fa457ae5b09a1fd389"
-  integrity sha1-TapNnbAPmBmIDHn6RXrlsJof04k=
-
-emojis-list@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-3.0.0.tgz#5570662046ad29e2e916e71aae260abdff4f6a78"
-  integrity sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==
-
-encodeurl@~1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59"
-  integrity sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=
-
-end-of-stream@^1.0.0, end-of-stream@^1.1.0:
-  version "1.4.4"
-  resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0"
-  integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==
-  dependencies:
-    once "^1.4.0"
-
-enhanced-resolve@^4.3.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-4.5.0.tgz#2f3cfd84dbe3b487f18f2db2ef1e064a571ca5ec"
-  integrity sha512-Nv9m36S/vxpsI+Hc4/ZGRs0n9mXqSWGGq49zxb/cJfPAQMbUtttJAlNPS4AQzaBdw/pKskw5bMbekT/Y7W/Wlg==
-  dependencies:
-    graceful-fs "^4.1.2"
-    memory-fs "^0.5.0"
-    tapable "^1.0.0"
-
-enquirer@^2.3.5:
-  version "2.3.6"
-  resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d"
-  integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==
-  dependencies:
-    ansi-colors "^4.1.1"
-
-entities@^1.1.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56"
-  integrity sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==
-
-entities@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
-  integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
-
-errno@^0.1.3, errno@~0.1.7:
-  version "0.1.8"
-  resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.8.tgz#8bb3e9c7d463be4976ff888f76b4809ebc2e811f"
-  integrity sha512-dJ6oBr5SQ1VSd9qkk7ByRgb/1SH4JZjCHSW/mr63/QcXO9zLVxvJ6Oy13nio03rxpSnVDDjFor75SjVeZWPW/A==
-  dependencies:
-    prr "~1.0.1"
-
-error-ex@^1.2.0, error-ex@^1.3.1:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf"
-  integrity sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==
-  dependencies:
-    is-arrayish "^0.2.1"
-
-error-stack-parser@^2.0.6:
-  version "2.0.6"
-  resolved "https://registry.yarnpkg.com/error-stack-parser/-/error-stack-parser-2.0.6.tgz#5a99a707bd7a4c58a797902d48d82803ede6aad8"
-  integrity sha512-d51brTeqC+BHlwF0BhPtcYgF5nlzf9ZZ0ZIUQNZpc9ZB9qw5IJ2diTrBY9jlCJkTLITYPjmiX6OWCwH+fuyNgQ==
-  dependencies:
-    stackframe "^1.1.1"
-
-es-abstract@^1.17.2:
-  version "1.17.7"
-  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.17.7.tgz#a4de61b2f66989fc7421676c1cb9787573ace54c"
-  integrity sha512-VBl/gnfcJ7OercKA9MVaegWsBHFjV492syMudcnQZvt/Dw8ezpcOHYZXa/J96O8vx+g4x65YKhxOwDUh63aS5g==
-  dependencies:
-    es-to-primitive "^1.2.1"
-    function-bind "^1.1.1"
-    has "^1.0.3"
-    has-symbols "^1.0.1"
-    is-callable "^1.2.2"
-    is-regex "^1.1.1"
-    object-inspect "^1.8.0"
-    object-keys "^1.1.1"
-    object.assign "^4.1.1"
-    string.prototype.trimend "^1.0.1"
-    string.prototype.trimstart "^1.0.1"
-
-es-abstract@^1.18.0-next.1, es-abstract@^1.18.0-next.2:
-  version "1.18.0-next.2"
-  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.0-next.2.tgz#088101a55f0541f595e7e057199e27ddc8f3a5c2"
-  integrity sha512-Ih4ZMFHEtZupnUh6497zEL4y2+w8+1ljnCyaTa+adcoafI1GOvMwFlDjBLfWR7y9VLfrjRJe9ocuHY1PSR9jjw==
-  dependencies:
-    call-bind "^1.0.2"
-    es-to-primitive "^1.2.1"
-    function-bind "^1.1.1"
-    get-intrinsic "^1.0.2"
-    has "^1.0.3"
-    has-symbols "^1.0.1"
-    is-callable "^1.2.2"
-    is-negative-zero "^2.0.1"
-    is-regex "^1.1.1"
-    object-inspect "^1.9.0"
-    object-keys "^1.1.1"
-    object.assign "^4.1.2"
-    string.prototype.trimend "^1.0.3"
-    string.prototype.trimstart "^1.0.3"
-
-es-to-primitive@^1.2.1:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a"
-  integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==
-  dependencies:
-    is-callable "^1.1.4"
-    is-date-object "^1.0.1"
-    is-symbol "^1.0.2"
-
-es5-ext@^0.10.35, es5-ext@^0.10.50:
-  version "0.10.53"
-  resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.53.tgz#93c5a3acfdbef275220ad72644ad02ee18368de1"
-  integrity sha512-Xs2Stw6NiNHWypzRTY1MtaG/uJlwCk8kH81920ma8mvN8Xq1gsfhZvpkImLQArw8AHnv8MT2I45J3c0R8slE+Q==
-  dependencies:
-    es6-iterator "~2.0.3"
-    es6-symbol "~3.1.3"
-    next-tick "~1.0.0"
-
-es6-iterator@2.0.3, es6-iterator@~2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/es6-iterator/-/es6-iterator-2.0.3.tgz#a7de889141a05a94b0854403b2d0a0fbfa98f3b7"
-  integrity sha1-p96IkUGgWpSwhUQDstCg+/qY87c=
-  dependencies:
-    d "1"
-    es5-ext "^0.10.35"
-    es6-symbol "^3.1.1"
-
-es6-symbol@^3.1.1, es6-symbol@~3.1.3:
-  version "3.1.3"
-  resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.3.tgz#bad5d3c1bcdac28269f4cb331e431c78ac705d18"
-  integrity sha512-NJ6Yn3FuDinBaBRWl/q5X/s4koRHBrgKAu+yGI6JCBeiu3qrcbJhwT2GeR/EXVfylRk8dpQVJoLEFhK+Mu31NA==
-  dependencies:
-    d "^1.0.1"
-    ext "^1.1.2"
-
-escalade@^3.0.2, escalade@^3.1.1:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40"
-  integrity sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==
-
-escape-html@~1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
-  integrity sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=
-
-escape-string-regexp@2.0.0, escape-string-regexp@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz#a30304e99daa32e23b2fd20f51babd07cffca344"
-  integrity sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==
-
-escape-string-regexp@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
-  integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=
-
-escape-string-regexp@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
-  integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
-
-escodegen@^1.14.1:
-  version "1.14.3"
-  resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.14.3.tgz#4e7b81fba61581dc97582ed78cab7f0e8d63f503"
-  integrity sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==
-  dependencies:
-    esprima "^4.0.1"
-    estraverse "^4.2.0"
-    esutils "^2.0.2"
-    optionator "^0.8.1"
-  optionalDependencies:
-    source-map "~0.6.1"
-
-eslint-config-react-app@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/eslint-config-react-app/-/eslint-config-react-app-6.0.0.tgz#ccff9fc8e36b322902844cbd79197982be355a0e"
-  integrity sha512-bpoAAC+YRfzq0dsTk+6v9aHm/uqnDwayNAXleMypGl6CpxI9oXXscVHo4fk3eJPIn+rsbtNetB4r/ZIidFIE8A==
-  dependencies:
-    confusing-browser-globals "^1.0.10"
-
-eslint-import-resolver-node@^0.3.4:
-  version "0.3.4"
-  resolved "https://registry.yarnpkg.com/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.4.tgz#85ffa81942c25012d8231096ddf679c03042c717"
-  integrity sha512-ogtf+5AB/O+nM6DIeBUNr2fuT7ot9Qg/1harBfBtaP13ekEWFQEEMP94BCB7zaNW3gyY+8SHYF00rnqYwXKWOA==
-  dependencies:
-    debug "^2.6.9"
-    resolve "^1.13.1"
-
-eslint-module-utils@^2.6.0:
-  version "2.6.0"
-  resolved "https://registry.yarnpkg.com/eslint-module-utils/-/eslint-module-utils-2.6.0.tgz#579ebd094f56af7797d19c9866c9c9486629bfa6"
-  integrity sha512-6j9xxegbqe8/kZY8cYpcp0xhbK0EgJlg3g9mib3/miLaExuuwc3n5UEfSnU6hWMbT0FAYVvDbL9RrRgpUeQIvA==
-  dependencies:
-    debug "^2.6.9"
-    pkg-dir "^2.0.0"
-
-eslint-plugin-flowtype@^5.2.0:
-  version "5.2.2"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-flowtype/-/eslint-plugin-flowtype-5.2.2.tgz#c6e5dd2fad4e757a1c63e652da6cff597659554f"
-  integrity sha512-C4PlPYpszr9h1cBfUbTNRI1IdxUCF0qrXAHkXS2+bESp7WUUCnvb3UBBnYlaQLvJYJ2lRz+2SPQQ/WyV7p/Tow==
-  dependencies:
-    lodash "^4.17.15"
-    string-natural-compare "^3.0.1"
-
-eslint-plugin-import@^2.22.1:
-  version "2.22.1"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-import/-/eslint-plugin-import-2.22.1.tgz#0896c7e6a0cf44109a2d97b95903c2bb689d7702"
-  integrity sha512-8K7JjINHOpH64ozkAhpT3sd+FswIZTfMZTjdx052pnWrgRCVfp8op9tbjpAk3DdUeI/Ba4C8OjdC0r90erHEOw==
-  dependencies:
-    array-includes "^3.1.1"
-    array.prototype.flat "^1.2.3"
-    contains-path "^0.1.0"
-    debug "^2.6.9"
-    doctrine "1.5.0"
-    eslint-import-resolver-node "^0.3.4"
-    eslint-module-utils "^2.6.0"
-    has "^1.0.3"
-    minimatch "^3.0.4"
-    object.values "^1.1.1"
-    read-pkg-up "^2.0.0"
-    resolve "^1.17.0"
-    tsconfig-paths "^3.9.0"
-
-eslint-plugin-jest@^24.1.0:
-  version "24.1.5"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.1.5.tgz#1e866a9f0deac587d0a3d5d7cefe99815a580de2"
-  integrity sha512-FIP3lwC8EzEG+rOs1y96cOJmMVpdFNreoDJv29B5vIupVssRi8zrSY3QadogT0K3h1Y8TMxJ6ZSAzYUmFCp2hg==
-  dependencies:
-    "@typescript-eslint/experimental-utils" "^4.0.1"
-
-eslint-plugin-jsx-a11y@^6.3.1:
-  version "6.4.1"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.4.1.tgz#a2d84caa49756942f42f1ffab9002436391718fd"
-  integrity sha512-0rGPJBbwHoGNPU73/QCLP/vveMlM1b1Z9PponxO87jfr6tuH5ligXbDT6nHSSzBC8ovX2Z+BQu7Bk5D/Xgq9zg==
-  dependencies:
-    "@babel/runtime" "^7.11.2"
-    aria-query "^4.2.2"
-    array-includes "^3.1.1"
-    ast-types-flow "^0.0.7"
-    axe-core "^4.0.2"
-    axobject-query "^2.2.0"
-    damerau-levenshtein "^1.0.6"
-    emoji-regex "^9.0.0"
-    has "^1.0.3"
-    jsx-ast-utils "^3.1.0"
-    language-tags "^1.0.5"
-
-eslint-plugin-react-hooks@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.2.0.tgz#8c229c268d468956334c943bb45fc860280f5556"
-  integrity sha512-623WEiZJqxR7VdxFCKLI6d6LLpwJkGPYKODnkH3D7WpOG5KM8yWueBd8TLsNAetEJNF5iJmolaAKO3F8yzyVBQ==
-
-eslint-plugin-react@^7.21.5:
-  version "7.22.0"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-react/-/eslint-plugin-react-7.22.0.tgz#3d1c542d1d3169c45421c1215d9470e341707269"
-  integrity sha512-p30tuX3VS+NWv9nQot9xIGAHBXR0+xJVaZriEsHoJrASGCJZDJ8JLNM0YqKqI0AKm6Uxaa1VUHoNEibxRCMQHA==
-  dependencies:
-    array-includes "^3.1.1"
-    array.prototype.flatmap "^1.2.3"
-    doctrine "^2.1.0"
-    has "^1.0.3"
-    jsx-ast-utils "^2.4.1 || ^3.0.0"
-    object.entries "^1.1.2"
-    object.fromentries "^2.0.2"
-    object.values "^1.1.1"
-    prop-types "^15.7.2"
-    resolve "^1.18.1"
-    string.prototype.matchall "^4.0.2"
-
-eslint-plugin-testing-library@^3.9.2:
-  version "3.10.1"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-testing-library/-/eslint-plugin-testing-library-3.10.1.tgz#4dd02306d601c3238fdabf1d1dbc5f2a8e85d531"
-  integrity sha512-nQIFe2muIFv2oR2zIuXE4vTbcFNx8hZKRzgHZqJg8rfopIWwoTwtlbCCNELT/jXzVe1uZF68ALGYoDXjLczKiQ==
-  dependencies:
-    "@typescript-eslint/experimental-utils" "^3.10.1"
-
-eslint-scope@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-4.0.3.tgz#ca03833310f6889a3264781aa82e63eb9cfe7848"
-  integrity sha512-p7VutNr1O/QrxysMo3E45FjYDTeXBy0iTltPFNSqKAIfjDSXC+4dj+qfyuD8bfAXrW/y6lW3O76VaYNPKfpKrg==
-  dependencies:
-    esrecurse "^4.1.0"
-    estraverse "^4.1.1"
-
-eslint-scope@^5.0.0, eslint-scope@^5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
-  integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==
-  dependencies:
-    esrecurse "^4.3.0"
-    estraverse "^4.1.1"
-
-eslint-utils@^2.0.0, eslint-utils@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-2.1.0.tgz#d2de5e03424e707dc10c74068ddedae708741b27"
-  integrity sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==
-  dependencies:
-    eslint-visitor-keys "^1.1.0"
-
-eslint-visitor-keys@^1.0.0, eslint-visitor-keys@^1.1.0, eslint-visitor-keys@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz#30ebd1ef7c2fdff01c3a4f151044af25fab0523e"
-  integrity sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==
-
-eslint-visitor-keys@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.0.0.tgz#21fdc8fbcd9c795cc0321f0563702095751511a8"
-  integrity sha512-QudtT6av5WXels9WjIM7qz1XD1cWGvX4gGXvp/zBn9nXG02D0utdU3Em2m/QjTnrsk6bBjmCygl3rmj118msQQ==
-
-eslint-webpack-plugin@^2.5.2:
-  version "2.5.2"
-  resolved "https://registry.yarnpkg.com/eslint-webpack-plugin/-/eslint-webpack-plugin-2.5.2.tgz#4ee17577d6392bf72048080a1678d6237183db81"
-  integrity sha512-ndD9chZ/kaGnjjx7taRg7c6FK/YKb29SSYzaLtPBIYLYJQmZtuKqtQbAvTS2ymiMQT6X0VW9vZIHK0KLstv93Q==
-  dependencies:
-    "@types/eslint" "^7.2.6"
-    arrify "^2.0.1"
-    jest-worker "^26.6.2"
-    micromatch "^4.0.2"
-    schema-utils "^3.0.0"
-
-eslint@^7.11.0:
-  version "7.20.0"
-  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.20.0.tgz#db07c4ca4eda2e2316e7aa57ac7fc91ec550bdc7"
-  integrity sha512-qGi0CTcOGP2OtCQBgWZlQjcTuP0XkIpYFj25XtRTQSHC+umNnp7UMshr2G8SLsRFYDdAPFeHOsiteadmMH02Yw==
-  dependencies:
-    "@babel/code-frame" "7.12.11"
-    "@eslint/eslintrc" "^0.3.0"
-    ajv "^6.10.0"
-    chalk "^4.0.0"
-    cross-spawn "^7.0.2"
-    debug "^4.0.1"
-    doctrine "^3.0.0"
-    enquirer "^2.3.5"
-    eslint-scope "^5.1.1"
-    eslint-utils "^2.1.0"
-    eslint-visitor-keys "^2.0.0"
-    espree "^7.3.1"
-    esquery "^1.4.0"
-    esutils "^2.0.2"
-    file-entry-cache "^6.0.0"
-    functional-red-black-tree "^1.0.1"
-    glob-parent "^5.0.0"
-    globals "^12.1.0"
-    ignore "^4.0.6"
-    import-fresh "^3.0.0"
-    imurmurhash "^0.1.4"
-    is-glob "^4.0.0"
-    js-yaml "^3.13.1"
-    json-stable-stringify-without-jsonify "^1.0.1"
-    levn "^0.4.1"
-    lodash "^4.17.20"
-    minimatch "^3.0.4"
-    natural-compare "^1.4.0"
-    optionator "^0.9.1"
-    progress "^2.0.0"
-    regexpp "^3.1.0"
-    semver "^7.2.1"
-    strip-ansi "^6.0.0"
-    strip-json-comments "^3.1.0"
-    table "^6.0.4"
-    text-table "^0.2.0"
-    v8-compile-cache "^2.0.3"
-
-espree@^7.3.0, espree@^7.3.1:
-  version "7.3.1"
-  resolved "https://registry.yarnpkg.com/espree/-/espree-7.3.1.tgz#f2df330b752c6f55019f8bd89b7660039c1bbbb6"
-  integrity sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==
-  dependencies:
-    acorn "^7.4.0"
-    acorn-jsx "^5.3.1"
-    eslint-visitor-keys "^1.3.0"
-
-esprima@^4.0.0, esprima@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71"
-  integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==
-
-esquery@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.4.0.tgz#2148ffc38b82e8c7057dfed48425b3e61f0f24a5"
-  integrity sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==
-  dependencies:
-    estraverse "^5.1.0"
-
-esrecurse@^4.1.0, esrecurse@^4.3.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921"
-  integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==
-  dependencies:
-    estraverse "^5.2.0"
-
-estraverse@^4.1.1, estraverse@^4.2.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d"
-  integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==
-
-estraverse@^5.1.0, estraverse@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-5.2.0.tgz#307df42547e6cc7324d3cf03c155d5cdb8c53880"
-  integrity sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==
-
-estree-walker@^0.6.1:
-  version "0.6.1"
-  resolved "https://registry.yarnpkg.com/estree-walker/-/estree-walker-0.6.1.tgz#53049143f40c6eb918b23671d1fe3219f3a1b362"
-  integrity sha512-SqmZANLWS0mnatqbSfRP5g8OXZC12Fgg1IwNtLsyHDzJizORW4khDfjPqJZsemPWBB2uqykUah5YpQ6epsqC/w==
-
-estree-walker@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/estree-walker/-/estree-walker-1.0.1.tgz#31bc5d612c96b704106b477e6dd5d8aa138cb700"
-  integrity sha512-1fMXF3YP4pZZVozF8j/ZLfvnR8NSIljt56UhbZ5PeeDmmGHpgpdwQt7ITlGvYaQukCvuBRMLEiKiYC+oeIg4cg==
-
-esutils@^2.0.2:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64"
-  integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==
-
-etag@~1.8.1:
-  version "1.8.1"
-  resolved "https://registry.yarnpkg.com/etag/-/etag-1.8.1.tgz#41ae2eeb65efa62268aebfea83ac7d79299b0887"
-  integrity sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=
-
-eventemitter3@^4.0.0:
-  version "4.0.7"
-  resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
-  integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
-
-events@^3.0.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/events/-/events-3.2.0.tgz#93b87c18f8efcd4202a461aec4dfc0556b639379"
-  integrity sha512-/46HWwbfCX2xTawVfkKLGxMifJYQBWMwY1mjywRtb4c9x8l5NP3KoJtnIOiL1hfdRkIuYhETxQlo62IF8tcnlg==
-
-eventsource@^1.0.7:
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-1.0.7.tgz#8fbc72c93fcd34088090bc0a4e64f4b5cee6d8d0"
-  integrity sha512-4Ln17+vVT0k8aWq+t/bF5arcS3EpT9gYtW66EPacdj/mAFevznsnyoHLPy2BA8gbIQeIHoPsvwmfBftfcG//BQ==
-  dependencies:
-    original "^1.0.0"
-
-evp_bytestokey@^1.0.0, evp_bytestokey@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz#7fcbdb198dc71959432efe13842684e0525acb02"
-  integrity sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==
-  dependencies:
-    md5.js "^1.3.4"
-    safe-buffer "^5.1.1"
-
-exec-sh@^0.3.2:
-  version "0.3.4"
-  resolved "https://registry.yarnpkg.com/exec-sh/-/exec-sh-0.3.4.tgz#3a018ceb526cc6f6df2bb504b2bfe8e3a4934ec5"
-  integrity sha512-sEFIkc61v75sWeOe72qyrqg2Qg0OuLESziUDk/O/z2qgS15y2gWVFrI6f2Qn/qw/0/NCfCEsmNA4zOjkwEZT1A==
-
-execa@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8"
-  integrity sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==
-  dependencies:
-    cross-spawn "^6.0.0"
-    get-stream "^4.0.0"
-    is-stream "^1.1.0"
-    npm-run-path "^2.0.0"
-    p-finally "^1.0.0"
-    signal-exit "^3.0.0"
-    strip-eof "^1.0.0"
-
-execa@^4.0.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a"
-  integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA==
-  dependencies:
-    cross-spawn "^7.0.0"
-    get-stream "^5.0.0"
-    human-signals "^1.1.1"
-    is-stream "^2.0.0"
-    merge-stream "^2.0.0"
-    npm-run-path "^4.0.0"
-    onetime "^5.1.0"
-    signal-exit "^3.0.2"
-    strip-final-newline "^2.0.0"
-
-exit@^0.1.2:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c"
-  integrity sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=
-
-expand-brackets@^2.1.4:
-  version "2.1.4"
-  resolved "https://registry.yarnpkg.com/expand-brackets/-/expand-brackets-2.1.4.tgz#b77735e315ce30f6b6eff0f83b04151a22449622"
-  integrity sha1-t3c14xXOMPa27/D4OwQVGiJEliI=
-  dependencies:
-    debug "^2.3.3"
-    define-property "^0.2.5"
-    extend-shallow "^2.0.1"
-    posix-character-classes "^0.1.0"
-    regex-not "^1.0.0"
-    snapdragon "^0.8.1"
-    to-regex "^3.0.1"
-
-expect@^26.6.0, expect@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/expect/-/expect-26.6.2.tgz#c6b996bf26bf3fe18b67b2d0f51fc981ba934417"
-  integrity sha512-9/hlOBkQl2l/PLHJx6JjoDF6xPKcJEsUlWKb23rKE7KzeDqUZKXKNMW27KIue5JMdBV9HgmoJPcc8HtO85t9IA==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    ansi-styles "^4.0.0"
-    jest-get-type "^26.3.0"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-regex-util "^26.0.0"
-
-express@^4.17.1:
-  version "4.17.1"
-  resolved "https://registry.yarnpkg.com/express/-/express-4.17.1.tgz#4491fc38605cf51f8629d39c2b5d026f98a4c134"
-  integrity sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==
-  dependencies:
-    accepts "~1.3.7"
-    array-flatten "1.1.1"
-    body-parser "1.19.0"
-    content-disposition "0.5.3"
-    content-type "~1.0.4"
-    cookie "0.4.0"
-    cookie-signature "1.0.6"
-    debug "2.6.9"
-    depd "~1.1.2"
-    encodeurl "~1.0.2"
-    escape-html "~1.0.3"
-    etag "~1.8.1"
-    finalhandler "~1.1.2"
-    fresh "0.5.2"
-    merge-descriptors "1.0.1"
-    methods "~1.1.2"
-    on-finished "~2.3.0"
-    parseurl "~1.3.3"
-    path-to-regexp "0.1.7"
-    proxy-addr "~2.0.5"
-    qs "6.7.0"
-    range-parser "~1.2.1"
-    safe-buffer "5.1.2"
-    send "0.17.1"
-    serve-static "1.14.1"
-    setprototypeof "1.1.1"
-    statuses "~1.5.0"
-    type-is "~1.6.18"
-    utils-merge "1.0.1"
-    vary "~1.1.2"
-
-ext@^1.1.2:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/ext/-/ext-1.4.0.tgz#89ae7a07158f79d35517882904324077e4379244"
-  integrity sha512-Key5NIsUxdqKg3vIsdw9dSuXpPCQ297y6wBjL30edxwPgt2E44WcWBZey/ZvUc6sERLTxKdyCu4gZFmUbk1Q7A==
-  dependencies:
-    type "^2.0.0"
-
-extend-shallow@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-2.0.1.tgz#51af7d614ad9a9f610ea1bafbb989d6b1c56890f"
-  integrity sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=
-  dependencies:
-    is-extendable "^0.1.0"
-
-extend-shallow@^3.0.0, extend-shallow@^3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-3.0.2.tgz#26a71aaf073b39fb2127172746131c2704028db8"
-  integrity sha1-Jqcarwc7OfshJxcnRhMcJwQCjbg=
-  dependencies:
-    assign-symbols "^1.0.0"
-    is-extendable "^1.0.1"
-
-extend@~3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
-  integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
-
-extglob@^2.0.4:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/extglob/-/extglob-2.0.4.tgz#ad00fe4dc612a9232e8718711dc5cb5ab0285543"
-  integrity sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==
-  dependencies:
-    array-unique "^0.3.2"
-    define-property "^1.0.0"
-    expand-brackets "^2.1.4"
-    extend-shallow "^2.0.1"
-    fragment-cache "^0.2.1"
-    regex-not "^1.0.0"
-    snapdragon "^0.8.1"
-    to-regex "^3.0.1"
-
-extsprintf@1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
-  integrity sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=
-
-extsprintf@^1.2.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.4.0.tgz#e2689f8f356fad62cca65a3a91c5df5f9551692f"
-  integrity sha1-4mifjzVvrWLMplo6kcXfX5VRaS8=
-
-fast-deep-equal@^3.1.1:
-  version "3.1.3"
-  resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525"
-  integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==
-
-fast-glob@^3.1.1:
-  version "3.2.5"
-  resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.5.tgz#7939af2a656de79a4f1901903ee8adcaa7cb9661"
-  integrity sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==
-  dependencies:
-    "@nodelib/fs.stat" "^2.0.2"
-    "@nodelib/fs.walk" "^1.2.3"
-    glob-parent "^5.1.0"
-    merge2 "^1.3.0"
-    micromatch "^4.0.2"
-    picomatch "^2.2.1"
-
-fast-json-stable-stringify@^2.0.0, fast-json-stable-stringify@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz#874bf69c6f404c2b5d99c481341399fd55892633"
-  integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==
-
-fast-levenshtein@^2.0.6, fast-levenshtein@~2.0.6:
-  version "2.0.6"
-  resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
-  integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
-
-fastq@^1.6.0:
-  version "1.10.1"
-  resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.10.1.tgz#8b8f2ac8bf3632d67afcd65dac248d5fdc45385e"
-  integrity sha512-AWuv6Ery3pM+dY7LYS8YIaCiQvUaos9OB1RyNgaOWnaX+Tik7Onvcsf8x8c+YtDeT0maYLniBip2hox5KtEXXA==
-  dependencies:
-    reusify "^1.0.4"
-
-faye-websocket@^0.11.3:
-  version "0.11.3"
-  resolved "https://registry.yarnpkg.com/faye-websocket/-/faye-websocket-0.11.3.tgz#5c0e9a8968e8912c286639fde977a8b209f2508e"
-  integrity sha512-D2y4bovYpzziGgbHYtGCMjlJM36vAl/y+xUyn1C+FVx8szd1E+86KwVw6XvYSzOP8iMpm1X0I4xJD+QtUb36OA==
-  dependencies:
-    websocket-driver ">=0.5.1"
-
-fb-watchman@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/fb-watchman/-/fb-watchman-2.0.1.tgz#fc84fb39d2709cf3ff6d743706157bb5708a8a85"
-  integrity sha512-DkPJKQeY6kKwmuMretBhr7G6Vodr7bFwDYTXIkfG1gjvNpaxBTQV3PbXg6bR1c1UP4jPOX0jHUbbHANL9vRjVg==
-  dependencies:
-    bser "2.1.1"
-
-figgy-pudding@^3.5.1:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/figgy-pudding/-/figgy-pudding-3.5.2.tgz#b4eee8148abb01dcf1d1ac34367d59e12fa61d6e"
-  integrity sha512-0btnI/H8f2pavGMN8w40mlSKOfTK2SVJmBfBeVIj3kNw0swwgzyRq0d5TJVOwodFmtvpPeWPN/MCcfuWF0Ezbw==
-
-file-entry-cache@^6.0.0:
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"
-  integrity sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==
-  dependencies:
-    flat-cache "^3.0.4"
-
-file-loader@6.1.1:
-  version "6.1.1"
-  resolved "https://registry.yarnpkg.com/file-loader/-/file-loader-6.1.1.tgz#a6f29dfb3f5933a1c350b2dbaa20ac5be0539baa"
-  integrity sha512-Klt8C4BjWSXYQAfhpYYkG4qHNTna4toMHEbWrI5IuVoxbU6uiDKeKAP99R8mmbJi3lvewn/jQBOgU4+NS3tDQw==
-  dependencies:
-    loader-utils "^2.0.0"
-    schema-utils "^3.0.0"
-
-file-uri-to-path@1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd"
-  integrity sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==
-
-filesize@6.1.0:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/filesize/-/filesize-6.1.0.tgz#e81bdaa780e2451d714d71c0d7a4f3238d37ad00"
-  integrity sha512-LpCHtPQ3sFx67z+uh2HnSyWSLLu5Jxo21795uRDuar/EOuYWXib5EmPaGIBuSnRqH2IODiKA2k5re/K9OnN/Yg==
-
-fill-range@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-4.0.0.tgz#d544811d428f98eb06a63dc402d2403c328c38f7"
-  integrity sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=
-  dependencies:
-    extend-shallow "^2.0.1"
-    is-number "^3.0.0"
-    repeat-string "^1.6.1"
-    to-regex-range "^2.1.0"
-
-fill-range@^7.0.1:
-  version "7.0.1"
-  resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
-  integrity sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==
-  dependencies:
-    to-regex-range "^5.0.1"
-
-finalhandler@~1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.1.2.tgz#b7e7d000ffd11938d0fdb053506f6ebabe9f587d"
-  integrity sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==
-  dependencies:
-    debug "2.6.9"
-    encodeurl "~1.0.2"
-    escape-html "~1.0.3"
-    on-finished "~2.3.0"
-    parseurl "~1.3.3"
-    statuses "~1.5.0"
-    unpipe "~1.0.0"
-
-find-cache-dir@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-2.1.0.tgz#8d0f94cd13fe43c6c7c261a0d86115ca918c05f7"
-  integrity sha512-Tq6PixE0w/VMFfCgbONnkiQIVol/JJL7nRMi20fqzA4NRs9AfeqMGeRdPi3wIhYkxjeBaWh2rxwapn5Tu3IqOQ==
-  dependencies:
-    commondir "^1.0.1"
-    make-dir "^2.0.0"
-    pkg-dir "^3.0.0"
-
-find-cache-dir@^3.3.1:
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-3.3.1.tgz#89b33fad4a4670daa94f855f7fbe31d6d84fe880"
-  integrity sha512-t2GDMt3oGC/v+BMwzmllWDuJF/xcDtE5j/fCGbqDD7OLuJkj0cfh1YSA5VKPvwMeLFLNDBkwOKZ2X85jGLVftQ==
-  dependencies:
-    commondir "^1.0.1"
-    make-dir "^3.0.2"
-    pkg-dir "^4.1.0"
-
-find-root@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/find-root/-/find-root-1.1.0.tgz#abcfc8ba76f708c42a97b3d685b7e9450bfb9ce4"
-  integrity sha512-NKfW6bec6GfKc0SGx1e07QZY9PE99u0Bft/0rzSD5k3sO/vwkVUpDUKVm5Gpp5Ue3YfShPFTX2070tDs5kB9Ng==
-
-find-up@4.1.0, find-up@^4.0.0, find-up@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19"
-  integrity sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==
-  dependencies:
-    locate-path "^5.0.0"
-    path-exists "^4.0.0"
-
-find-up@^2.0.0, find-up@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/find-up/-/find-up-2.1.0.tgz#45d1b7e506c717ddd482775a2b77920a3c0c57a7"
-  integrity sha1-RdG35QbHF93UgndaK3eSCjwMV6c=
-  dependencies:
-    locate-path "^2.0.0"
-
-find-up@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/find-up/-/find-up-3.0.0.tgz#49169f1d7993430646da61ecc5ae355c21c97b73"
-  integrity sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==
-  dependencies:
-    locate-path "^3.0.0"
-
-flat-cache@^3.0.4:
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-3.0.4.tgz#61b0338302b2fe9f957dcc32fc2a87f1c3048b11"
-  integrity sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==
-  dependencies:
-    flatted "^3.1.0"
-    rimraf "^3.0.2"
-
-flatted@^3.1.0:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.1.tgz#c4b489e80096d9df1dfc97c79871aea7c617c469"
-  integrity sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==
-
-flatten@^1.0.2:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/flatten/-/flatten-1.0.3.tgz#c1283ac9f27b368abc1e36d1ff7b04501a30356b"
-  integrity sha512-dVsPA/UwQ8+2uoFe5GHtiBMu48dWLTdsuEd7CKGlZlD78r1TTWBvDuFaFGKCo/ZfEr95Uk56vZoX86OsHkUeIg==
-
-flush-write-stream@^1.0.0:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
-  integrity sha512-3Z4XhFZ3992uIq0XOqb9AreonueSYphE6oYbpt5+3u06JWklbsPkNv3ZKkP9Bz/r+1MWCaMoSQ28P85+1Yc77w==
-  dependencies:
-    inherits "^2.0.3"
-    readable-stream "^2.3.6"
-
-focus-lock@^0.8.1:
-  version "0.8.1"
-  resolved "https://registry.yarnpkg.com/focus-lock/-/focus-lock-0.8.1.tgz#bb36968abf77a2063fa173cb6c47b12ac8599d33"
-  integrity sha512-/LFZOIo82WDsyyv7h7oc0MJF9ACOvDRdx9rWPZ2pgMfNWu/z8hQDBtOchuB/0BVLmuFOZjV02YwUVzNsWx/EzA==
-  dependencies:
-    tslib "^1.9.3"
-
-follow-redirects@^1.0.0:
-  version "1.13.2"
-  resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.13.2.tgz#dd73c8effc12728ba5cf4259d760ea5fb83e3147"
-  integrity sha512-6mPTgLxYm3r6Bkkg0vNM0HTjfGrOEtsfbhagQvbxDEsEkpNhw582upBaoRZylzen6krEmxXJgt9Ju6HiI4O7BA==
-
-for-in@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80"
-  integrity sha1-gQaNKVqBQuwKxybG4iAMMPttXoA=
-
-forever-agent@~0.6.1:
-  version "0.6.1"
-  resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
-  integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=
-
-fork-ts-checker-webpack-plugin@4.1.6:
-  version "4.1.6"
-  resolved "https://registry.yarnpkg.com/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-4.1.6.tgz#5055c703febcf37fa06405d400c122b905167fc5"
-  integrity sha512-DUxuQaKoqfNne8iikd14SAkh5uw4+8vNifp6gmA73yYNS6ywLIWSLD/n/mBzHQRpW3J7rbATEakmiA8JvkTyZw==
-  dependencies:
-    "@babel/code-frame" "^7.5.5"
-    chalk "^2.4.1"
-    micromatch "^3.1.10"
-    minimatch "^3.0.4"
-    semver "^5.6.0"
-    tapable "^1.0.0"
-    worker-rpc "^0.1.0"
-
-form-data@~2.3.2:
-  version "2.3.3"
-  resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6"
-  integrity sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==
-  dependencies:
-    asynckit "^0.4.0"
-    combined-stream "^1.0.6"
-    mime-types "^2.1.12"
-
-forwarded@~0.1.2:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/forwarded/-/forwarded-0.1.2.tgz#98c23dab1175657b8c0573e8ceccd91b0ff18c84"
-  integrity sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=
-
-fragment-cache@^0.2.1:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/fragment-cache/-/fragment-cache-0.2.1.tgz#4290fad27f13e89be7f33799c6bc5a0abfff0d19"
-  integrity sha1-QpD60n8T6Jvn8zeZxrxaCr//DRk=
-  dependencies:
-    map-cache "^0.2.2"
-
-framer-motion@^3.7.0:
-  version "3.7.0"
-  resolved "https://registry.yarnpkg.com/framer-motion/-/framer-motion-3.7.0.tgz#57f4c4899223e1a9d76092a865b1245b48806d6c"
-  integrity sha512-sEmI/1a0vG91aFV7zW9vGHJ0O7IO+V/KAUWpuGFmXYbE7WojAomRMOgz7EkeOMgSm408jewf8/KNEzWK5b2N5g==
-  dependencies:
-    framesync "^5.1.0"
-    hey-listen "^1.0.8"
-    popmotion "9.2.1"
-    style-value-types "4.0.3"
-    tslib "^1.10.0"
-  optionalDependencies:
-    "@emotion/is-prop-valid" "^0.8.2"
-
-framesync@5.1.0, framesync@^5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/framesync/-/framesync-5.1.0.tgz#b22639be6e83cf170e5cb3d0497e3e50100a01ef"
-  integrity sha512-31sDH8LxSFoLKDYENzKdI+YJD4vV8sMBpwcAW0/6Es2jZBQBdlqbFnqrYczpsnzpqG+y6EqYPvgFMI2ZDdlnyQ==
-
-fresh@0.5.2:
-  version "0.5.2"
-  resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7"
-  integrity sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=
-
-from2@^2.1.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/from2/-/from2-2.3.0.tgz#8bfb5502bde4a4d36cfdeea007fcca21d7e382af"
-  integrity sha1-i/tVAr3kpNNs/e6gB/zKIdfjgq8=
-  dependencies:
-    inherits "^2.0.1"
-    readable-stream "^2.0.0"
-
-fs-extra@^7.0.0:
-  version "7.0.1"
-  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-7.0.1.tgz#4f189c44aa123b895f722804f55ea23eadc348e9"
-  integrity sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw==
-  dependencies:
-    graceful-fs "^4.1.2"
-    jsonfile "^4.0.0"
-    universalify "^0.1.0"
-
-fs-extra@^8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-8.1.0.tgz#49d43c45a88cd9677668cb7be1b46efdb8d2e1c0"
-  integrity sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==
-  dependencies:
-    graceful-fs "^4.2.0"
-    jsonfile "^4.0.0"
-    universalify "^0.1.0"
-
-fs-extra@^9.0.1:
-  version "9.1.0"
-  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
-  integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==
-  dependencies:
-    at-least-node "^1.0.0"
-    graceful-fs "^4.2.0"
-    jsonfile "^6.0.1"
-    universalify "^2.0.0"
-
-fs-minipass@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
-  integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==
-  dependencies:
-    minipass "^3.0.0"
-
-fs-write-stream-atomic@^1.0.8:
-  version "1.0.10"
-  resolved "https://registry.yarnpkg.com/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz#b47df53493ef911df75731e70a9ded0189db40c9"
-  integrity sha1-tH31NJPvkR33VzHnCp3tAYnbQMk=
-  dependencies:
-    graceful-fs "^4.1.2"
-    iferr "^0.1.5"
-    imurmurhash "^0.1.4"
-    readable-stream "1 || 2"
-
-fs.realpath@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
-  integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8=
-
-fsevents@^1.2.7:
-  version "1.2.13"
-  resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-1.2.13.tgz#f325cb0455592428bcf11b383370ef70e3bfcc38"
-  integrity sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==
-  dependencies:
-    bindings "^1.5.0"
-    nan "^2.12.1"
-
-fsevents@^2.1.2, fsevents@^2.1.3, fsevents@~2.3.1:
-  version "2.3.2"
-  resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
-  integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
-
-function-bind@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
-  integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
-
-functional-red-black-tree@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz#1b0ab3bd553b2a0d6399d29c0e3ea0b252078327"
-  integrity sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=
-
-gensync@^1.0.0-beta.1:
-  version "1.0.0-beta.2"
-  resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0"
-  integrity sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==
-
-get-caller-file@^2.0.1:
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e"
-  integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==
-
-get-intrinsic@^1.0.2, get-intrinsic@^1.1.0, get-intrinsic@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.1.1.tgz#15f59f376f855c446963948f0d24cd3637b4abc6"
-  integrity sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==
-  dependencies:
-    function-bind "^1.1.1"
-    has "^1.0.3"
-    has-symbols "^1.0.1"
-
-get-nonce@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/get-nonce/-/get-nonce-1.0.1.tgz#fdf3f0278073820d2ce9426c18f07481b1e0cdf3"
-  integrity sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==
-
-get-own-enumerable-property-symbols@^3.0.0:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/get-own-enumerable-property-symbols/-/get-own-enumerable-property-symbols-3.0.2.tgz#b5fde77f22cbe35f390b4e089922c50bce6ef664"
-  integrity sha512-I0UBV/XOz1XkIJHEUDMZAbzCThU/H8DxmSfmdGcKPnVhu2VfFqr34jr9777IyaTYvxjedWhqVIilEDsCdP5G6g==
-
-get-package-type@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/get-package-type/-/get-package-type-0.1.0.tgz#8de2d803cff44df3bc6c456e6668b36c3926e11a"
-  integrity sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==
-
-get-stream@^4.0.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-4.1.0.tgz#c1b255575f3dc21d59bfc79cd3d2b46b1c3a54b5"
-  integrity sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==
-  dependencies:
-    pump "^3.0.0"
-
-get-stream@^5.0.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3"
-  integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==
-  dependencies:
-    pump "^3.0.0"
-
-get-value@^2.0.3, get-value@^2.0.6:
-  version "2.0.6"
-  resolved "https://registry.yarnpkg.com/get-value/-/get-value-2.0.6.tgz#dc15ca1c672387ca76bd37ac0a395ba2042a2c28"
-  integrity sha1-3BXKHGcjh8p2vTesCjlbogQqLCg=
-
-getpass@^0.1.1:
-  version "0.1.7"
-  resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa"
-  integrity sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=
-  dependencies:
-    assert-plus "^1.0.0"
-
-glob-parent@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-3.1.0.tgz#9e6af6299d8d3bd2bd40430832bd113df906c5ae"
-  integrity sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=
-  dependencies:
-    is-glob "^3.1.0"
-    path-dirname "^1.0.0"
-
-glob-parent@^5.0.0, glob-parent@^5.1.0, glob-parent@~5.1.0:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.1.tgz#b6c1ef417c4e5663ea498f1c45afac6916bbc229"
-  integrity sha512-FnI+VGOpnlGHWZxthPGR+QhR78fuiK0sNLkHQv+bL9fQi57lNNdquIbna/WrfROrolq8GK5Ek6BiMwqL/voRYQ==
-  dependencies:
-    is-glob "^4.0.1"
-
-glob@^7.0.3, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6:
-  version "7.1.6"
-  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
-  integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
-  dependencies:
-    fs.realpath "^1.0.0"
-    inflight "^1.0.4"
-    inherits "2"
-    minimatch "^3.0.4"
-    once "^1.3.0"
-    path-is-absolute "^1.0.0"
-
-global-modules@2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/global-modules/-/global-modules-2.0.0.tgz#997605ad2345f27f51539bea26574421215c7780"
-  integrity sha512-NGbfmJBp9x8IxyJSd1P+otYK8vonoJactOogrVfFRIAEY1ukil8RSKDz2Yo7wh1oihl51l/r6W4epkeKJHqL8A==
-  dependencies:
-    global-prefix "^3.0.0"
-
-global-prefix@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/global-prefix/-/global-prefix-3.0.0.tgz#fc85f73064df69f50421f47f883fe5b913ba9b97"
-  integrity sha512-awConJSVCHVGND6x3tmMaKcQvwXLhjdkmomy2W+Goaui8YPgYgXJZewhg3fWC+DlfqqQuWg8AwqjGTD2nAPVWg==
-  dependencies:
-    ini "^1.3.5"
-    kind-of "^6.0.2"
-    which "^1.3.1"
-
-globals@^11.1.0:
-  version "11.12.0"
-  resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e"
-  integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==
-
-globals@^12.1.0:
-  version "12.4.0"
-  resolved "https://registry.yarnpkg.com/globals/-/globals-12.4.0.tgz#a18813576a41b00a24a97e7f815918c2e19925f8"
-  integrity sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==
-  dependencies:
-    type-fest "^0.8.1"
-
-globby@11.0.1:
-  version "11.0.1"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.1.tgz#9a2bf107a068f3ffeabc49ad702c79ede8cfd357"
-  integrity sha512-iH9RmgwCmUJHi2z5o2l3eTtGBtXek1OYlHrbcxOYugyHLmAsZrPj43OtHThd62Buh/Vv6VyCBD2bdyWcGNQqoQ==
-  dependencies:
-    array-union "^2.1.0"
-    dir-glob "^3.0.1"
-    fast-glob "^3.1.1"
-    ignore "^5.1.4"
-    merge2 "^1.3.0"
-    slash "^3.0.0"
-
-globby@^11.0.1:
-  version "11.0.2"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.2.tgz#1af538b766a3b540ebfb58a32b2e2d5897321d83"
-  integrity sha512-2ZThXDvvV8fYFRVIxnrMQBipZQDr7MxKAmQK1vujaj9/7eF0efG7BPUKJ7jP7G5SLF37xKDXvO4S/KKLj/Z0og==
-  dependencies:
-    array-union "^2.1.0"
-    dir-glob "^3.0.1"
-    fast-glob "^3.1.1"
-    ignore "^5.1.4"
-    merge2 "^1.3.0"
-    slash "^3.0.0"
-
-globby@^6.1.0:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-6.1.0.tgz#f5a6d70e8395e21c858fb0489d64df02424d506c"
-  integrity sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=
-  dependencies:
-    array-union "^1.0.1"
-    glob "^7.0.3"
-    object-assign "^4.0.1"
-    pify "^2.0.0"
-    pinkie-promise "^2.0.0"
-
-graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4:
-  version "4.2.6"
-  resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee"
-  integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==
-
-growly@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081"
-  integrity sha1-8QdIy+dq+WS3yWyTxrzCivEgwIE=
-
-gzip-size@5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/gzip-size/-/gzip-size-5.1.1.tgz#cb9bee692f87c0612b232840a873904e4c135274"
-  integrity sha512-FNHi6mmoHvs1mxZAds4PpdCS6QG8B4C1krxJsMutgxl5t3+GlRTzzI3NEkifXx2pVsOvJdOGSmIgDhQ55FwdPA==
-  dependencies:
-    duplexer "^0.1.1"
-    pify "^4.0.1"
-
-handle-thing@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/handle-thing/-/handle-thing-2.0.1.tgz#857f79ce359580c340d43081cc648970d0bb234e"
-  integrity sha512-9Qn4yBxelxoh2Ow62nP+Ka/kMnOXRi8BXnRaUwezLNhqelnN49xKz4F/dPP8OYLxLxq6JDtZb2i9XznUQbNPTg==
-
-har-schema@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/har-schema/-/har-schema-2.0.0.tgz#a94c2224ebcac04782a0d9035521f24735b7ec92"
-  integrity sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=
-
-har-validator@~5.1.3:
-  version "5.1.5"
-  resolved "https://registry.yarnpkg.com/har-validator/-/har-validator-5.1.5.tgz#1f0803b9f8cb20c0fa13822df1ecddb36bde1efd"
-  integrity sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==
-  dependencies:
-    ajv "^6.12.3"
-    har-schema "^2.0.0"
-
-harmony-reflect@^1.4.6:
-  version "1.6.1"
-  resolved "https://registry.yarnpkg.com/harmony-reflect/-/harmony-reflect-1.6.1.tgz#c108d4f2bb451efef7a37861fdbdae72c9bdefa9"
-  integrity sha512-WJTeyp0JzGtHcuMsi7rw2VwtkvLa+JyfEKJCFyfcS0+CDkjQ5lHPu7zEhFZP+PDSRrEgXa5Ah0l1MbgbE41XjA==
-
-has-flag@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd"
-  integrity sha1-tdRU3CGZriJWmfNGfloH87lVuv0=
-
-has-flag@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
-  integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
-
-has-symbols@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.1.tgz#9f5214758a44196c406d9bd76cebf81ec2dd31e8"
-  integrity sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg==
-
-has-value@^0.3.1:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/has-value/-/has-value-0.3.1.tgz#7b1f58bada62ca827ec0a2078025654845995e1f"
-  integrity sha1-ex9YutpiyoJ+wKIHgCVlSEWZXh8=
-  dependencies:
-    get-value "^2.0.3"
-    has-values "^0.1.4"
-    isobject "^2.0.0"
-
-has-value@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/has-value/-/has-value-1.0.0.tgz#18b281da585b1c5c51def24c930ed29a0be6b177"
-  integrity sha1-GLKB2lhbHFxR3vJMkw7SmgvmsXc=
-  dependencies:
-    get-value "^2.0.6"
-    has-values "^1.0.0"
-    isobject "^3.0.0"
-
-has-values@^0.1.4:
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/has-values/-/has-values-0.1.4.tgz#6d61de95d91dfca9b9a02089ad384bff8f62b771"
-  integrity sha1-bWHeldkd/Km5oCCJrThL/49it3E=
-
-has-values@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/has-values/-/has-values-1.0.0.tgz#95b0b63fec2146619a6fe57fe75628d5a39efe4f"
-  integrity sha1-lbC2P+whRmGab+V/51Yo1aOe/k8=
-  dependencies:
-    is-number "^3.0.0"
-    kind-of "^4.0.0"
-
-has@^1.0.0, has@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/has/-/has-1.0.3.tgz#722d7cbfc1f6aa8241f16dd814e011e1f41e8796"
-  integrity sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==
-  dependencies:
-    function-bind "^1.1.1"
-
-hash-base@^3.0.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/hash-base/-/hash-base-3.1.0.tgz#55c381d9e06e1d2997a883b4a3fddfe7f0d3af33"
-  integrity sha512-1nmYp/rhMDiE7AYkDw+lLwlAzz0AntGIe51F3RfFfEqyQ3feY2eI/NcwC6umIQVOASPMsWJLJScWKSSvzL9IVA==
-  dependencies:
-    inherits "^2.0.4"
-    readable-stream "^3.6.0"
-    safe-buffer "^5.2.0"
-
-hash.js@^1.0.0, hash.js@^1.0.3:
-  version "1.1.7"
-  resolved "https://registry.yarnpkg.com/hash.js/-/hash.js-1.1.7.tgz#0babca538e8d4ee4a0f8988d68866537a003cf42"
-  integrity sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==
-  dependencies:
-    inherits "^2.0.3"
-    minimalistic-assert "^1.0.1"
-
-he@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
-  integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==
-
-hex-color-regex@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/hex-color-regex/-/hex-color-regex-1.1.0.tgz#4c06fccb4602fe2602b3c93df82d7e7dbf1a8a8e"
-  integrity sha512-l9sfDFsuqtOqKDsQdqrMRk0U85RZc0RtOR9yPI7mRVOa4FsR/BVnZ0shmQRM96Ji99kYZP/7hn1cedc1+ApsTQ==
-
-hey-listen@^1.0.8:
-  version "1.0.8"
-  resolved "https://registry.yarnpkg.com/hey-listen/-/hey-listen-1.0.8.tgz#8e59561ff724908de1aa924ed6ecc84a56a9aa68"
-  integrity sha512-COpmrF2NOg4TBWUJ5UVyaCU2A88wEMkUPK4hNqyCkqHbxT92BbvfjoSozkAIIm6XhicGlJHhFdullInrdhwU8Q==
-
-history@^4.9.0:
-  version "4.10.1"
-  resolved "https://registry.yarnpkg.com/history/-/history-4.10.1.tgz#33371a65e3a83b267434e2b3f3b1b4c58aad4cf3"
-  integrity sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==
-  dependencies:
-    "@babel/runtime" "^7.1.2"
-    loose-envify "^1.2.0"
-    resolve-pathname "^3.0.0"
-    tiny-invariant "^1.0.2"
-    tiny-warning "^1.0.0"
-    value-equal "^1.0.1"
-
-hmac-drbg@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/hmac-drbg/-/hmac-drbg-1.0.1.tgz#d2745701025a6c775a6c545793ed502fc0c649a1"
-  integrity sha1-0nRXAQJabHdabFRXk+1QL8DGSaE=
-  dependencies:
-    hash.js "^1.0.3"
-    minimalistic-assert "^1.0.0"
-    minimalistic-crypto-utils "^1.0.1"
-
-hoist-non-react-statics@^3.1.0, hoist-non-react-statics@^3.3.1:
-  version "3.3.2"
-  resolved "https://registry.yarnpkg.com/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz#ece0acaf71d62c2969c2ec59feff42a4b1a85b45"
-  integrity sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw==
-  dependencies:
-    react-is "^16.7.0"
-
-hoopy@^0.1.4:
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/hoopy/-/hoopy-0.1.4.tgz#609207d661100033a9a9402ad3dea677381c1b1d"
-  integrity sha512-HRcs+2mr52W0K+x8RzcLzuPPmVIKMSv97RGHy0Ea9y/mpcaK+xTrjICA04KAHi4GRzxliNqNJEFYWHghy3rSfQ==
-
-hosted-git-info@^2.1.4:
-  version "2.8.8"
-  resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.8.tgz#7539bd4bc1e0e0a895815a2e0262420b12858488"
-  integrity sha512-f/wzC2QaWBs7t9IYqB4T3sR1xviIViXJRJTWBlx2Gf3g0Xi5vI7Yy4koXQ1c9OYDGHN9sBy1DQ2AB8fqZBWhUg==
-
-hpack.js@^2.1.6:
-  version "2.1.6"
-  resolved "https://registry.yarnpkg.com/hpack.js/-/hpack.js-2.1.6.tgz#87774c0949e513f42e84575b3c45681fade2a0b2"
-  integrity sha1-h3dMCUnlE/QuhFdbPEVoH63ioLI=
-  dependencies:
-    inherits "^2.0.1"
-    obuf "^1.0.0"
-    readable-stream "^2.0.1"
-    wbuf "^1.1.0"
-
-hsl-regex@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/hsl-regex/-/hsl-regex-1.0.0.tgz#d49330c789ed819e276a4c0d272dffa30b18fe6e"
-  integrity sha1-1JMwx4ntgZ4nakwNJy3/owsY/m4=
-
-hsla-regex@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/hsla-regex/-/hsla-regex-1.0.0.tgz#c1ce7a3168c8c6614033a4b5f7877f3b225f9c38"
-  integrity sha1-wc56MWjIxmFAM6S194d/OyJfnDg=
-
-html-comment-regex@^1.1.0:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/html-comment-regex/-/html-comment-regex-1.1.2.tgz#97d4688aeb5c81886a364faa0cad1dda14d433a7"
-  integrity sha512-P+M65QY2JQ5Y0G9KKdlDpo0zK+/OHptU5AaBwUfAIDJZk1MYf32Frm84EcOytfJE0t5JvkAnKlmjsXDnWzCJmQ==
-
-html-encoding-sniffer@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz#42a6dc4fd33f00281176e8b23759ca4e4fa185f3"
-  integrity sha512-D5JbOMBIR/TVZkubHT+OyT2705QvogUW4IBn6nHd756OwieSF9aDYFj4dv6HHEVGYbHaLETa3WggZYWWMyy3ZQ==
-  dependencies:
-    whatwg-encoding "^1.0.5"
-
-html-entities@^1.2.1, html-entities@^1.3.1:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-1.4.0.tgz#cfbd1b01d2afaf9adca1b10ae7dffab98c71d2dc"
-  integrity sha512-8nxjcBcd8wovbeKx7h3wTji4e6+rhaVuPNpMqwWgnHh+N9ToqsCs6XztWRBPQ+UtzsoMAdKZtUENoVzU/EMtZA==
-
-html-escaper@^2.0.0:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
-  integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
-
-html-minifier-terser@^5.0.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/html-minifier-terser/-/html-minifier-terser-5.1.1.tgz#922e96f1f3bb60832c2634b79884096389b1f054"
-  integrity sha512-ZPr5MNObqnV/T9akshPKbVgyOqLmy+Bxo7juKCfTfnjNniTAMdy4hz21YQqoofMBJD2kdREaqPPdThoR78Tgxg==
-  dependencies:
-    camel-case "^4.1.1"
-    clean-css "^4.2.3"
-    commander "^4.1.1"
-    he "^1.2.0"
-    param-case "^3.0.3"
-    relateurl "^0.2.7"
-    terser "^4.6.3"
-
-html-webpack-plugin@4.5.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/html-webpack-plugin/-/html-webpack-plugin-4.5.0.tgz#625097650886b97ea5dae331c320e3238f6c121c"
-  integrity sha512-MouoXEYSjTzCrjIxWwg8gxL5fE2X2WZJLmBYXlaJhQUH5K/b5OrqmV7T4dB7iu0xkmJ6JlUuV6fFVtnqbPopZw==
-  dependencies:
-    "@types/html-minifier-terser" "^5.0.0"
-    "@types/tapable" "^1.0.5"
-    "@types/webpack" "^4.41.8"
-    html-minifier-terser "^5.0.1"
-    loader-utils "^1.2.3"
-    lodash "^4.17.15"
-    pretty-error "^2.1.1"
-    tapable "^1.1.3"
-    util.promisify "1.0.0"
-
-htmlparser2@^3.10.1:
-  version "3.10.1"
-  resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.10.1.tgz#bd679dc3f59897b6a34bb10749c855bb53a9392f"
-  integrity sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==
-  dependencies:
-    domelementtype "^1.3.1"
-    domhandler "^2.3.0"
-    domutils "^1.5.1"
-    entities "^1.1.1"
-    inherits "^2.0.1"
-    readable-stream "^3.1.1"
-
-http-deceiver@^1.2.7:
-  version "1.2.7"
-  resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87"
-  integrity sha1-+nFolEq5pRnTN8sL7HKE3D5yPYc=
-
-http-errors@1.7.2:
-  version "1.7.2"
-  resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.2.tgz#4f5029cf13239f31036e5b2e55292bcfbcc85c8f"
-  integrity sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==
-  dependencies:
-    depd "~1.1.2"
-    inherits "2.0.3"
-    setprototypeof "1.1.1"
-    statuses ">= 1.5.0 < 2"
-    toidentifier "1.0.0"
-
-http-errors@~1.6.2:
-  version "1.6.3"
-  resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.6.3.tgz#8b55680bb4be283a0b5bf4ea2e38580be1d9320d"
-  integrity sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=
-  dependencies:
-    depd "~1.1.2"
-    inherits "2.0.3"
-    setprototypeof "1.1.0"
-    statuses ">= 1.4.0 < 2"
-
-http-errors@~1.7.2:
-  version "1.7.3"
-  resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.3.tgz#6c619e4f9c60308c38519498c14fbb10aacebb06"
-  integrity sha512-ZTTX0MWrsQ2ZAhA1cejAwDLycFsd7I7nVtnkT3Ol0aqodaKW+0CTZDQ1uBv5whptCnc8e8HeRRJxRs0kmm/Qfw==
-  dependencies:
-    depd "~1.1.2"
-    inherits "2.0.4"
-    setprototypeof "1.1.1"
-    statuses ">= 1.5.0 < 2"
-    toidentifier "1.0.0"
-
-http-parser-js@>=0.5.1:
-  version "0.5.3"
-  resolved "https://registry.yarnpkg.com/http-parser-js/-/http-parser-js-0.5.3.tgz#01d2709c79d41698bb01d4decc5e9da4e4a033d9"
-  integrity sha512-t7hjvef/5HEK7RWTdUzVUhl8zkEu+LlaE0IYzdMuvbSDipxBRpOn4Uhw8ZyECEa808iVT8XCjzo6xmYt4CiLZg==
-
-http-proxy-middleware@0.19.1:
-  version "0.19.1"
-  resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-0.19.1.tgz#183c7dc4aa1479150306498c210cdaf96080a43a"
-  integrity sha512-yHYTgWMQO8VvwNS22eLLloAkvungsKdKTLO8AJlftYIKNfJr3GK3zK0ZCfzDDGUBttdGc8xFy1mCitvNKQtC3Q==
-  dependencies:
-    http-proxy "^1.17.0"
-    is-glob "^4.0.0"
-    lodash "^4.17.11"
-    micromatch "^3.1.10"
-
-http-proxy@^1.17.0:
-  version "1.18.1"
-  resolved "https://registry.yarnpkg.com/http-proxy/-/http-proxy-1.18.1.tgz#401541f0534884bbf95260334e72f88ee3976549"
-  integrity sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==
-  dependencies:
-    eventemitter3 "^4.0.0"
-    follow-redirects "^1.0.0"
-    requires-port "^1.0.0"
-
-http-signature@~1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.2.0.tgz#9aecd925114772f3d95b65a60abb8f7c18fbace1"
-  integrity sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=
-  dependencies:
-    assert-plus "^1.0.0"
-    jsprim "^1.2.2"
-    sshpk "^1.7.0"
-
-https-browserify@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73"
-  integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=
-
-human-signals@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
-  integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw==
-
-iconv-lite@0.4.24:
-  version "0.4.24"
-  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b"
-  integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==
-  dependencies:
-    safer-buffer ">= 2.1.2 < 3"
-
-icss-utils@^4.0.0, icss-utils@^4.1.1:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-4.1.1.tgz#21170b53789ee27447c2f47dd683081403f9a467"
-  integrity sha512-4aFq7wvWyMHKgxsH8QQtGpvbASCf+eM3wPRLI6R+MgAnTCZ6STYsRvttLvRWK0Nfif5piF394St3HeJDaljGPA==
-  dependencies:
-    postcss "^7.0.14"
-
-identity-obj-proxy@3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/identity-obj-proxy/-/identity-obj-proxy-3.0.0.tgz#94d2bda96084453ef36fbc5aaec37e0f79f1fc14"
-  integrity sha1-lNK9qWCERT7zb7xarsN+D3nx/BQ=
-  dependencies:
-    harmony-reflect "^1.4.6"
-
-ieee754@^1.1.4:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352"
-  integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==
-
-iferr@^0.1.5:
-  version "0.1.5"
-  resolved "https://registry.yarnpkg.com/iferr/-/iferr-0.1.5.tgz#c60eed69e6d8fdb6b3104a1fcbca1c192dc5b501"
-  integrity sha1-xg7taebY/bazEEofy8ocGS3FtQE=
-
-ignore@^4.0.6:
-  version "4.0.6"
-  resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc"
-  integrity sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==
-
-ignore@^5.1.4:
-  version "5.1.8"
-  resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57"
-  integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==
-
-immer@8.0.1:
-  version "8.0.1"
-  resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.1.tgz#9c73db683e2b3975c424fb0572af5889877ae656"
-  integrity sha512-aqXhGP7//Gui2+UrEtvxZxSquQVXTpZ7KDxfCcKAF3Vysvw0CViVaW9RZ1j1xlIYqaaaipBoqdqeibkc18PNvA==
-
-import-cwd@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/import-cwd/-/import-cwd-2.1.0.tgz#aa6cf36e722761285cb371ec6519f53e2435b0a9"
-  integrity sha1-qmzzbnInYShcs3HsZRn1PiQ1sKk=
-  dependencies:
-    import-from "^2.1.0"
-
-import-fresh@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-2.0.0.tgz#d81355c15612d386c61f9ddd3922d4304822a546"
-  integrity sha1-2BNVwVYS04bGH53dOSLUMEgipUY=
-  dependencies:
-    caller-path "^2.0.0"
-    resolve-from "^3.0.0"
-
-import-fresh@^3.0.0, import-fresh@^3.1.0, import-fresh@^3.2.1:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b"
-  integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==
-  dependencies:
-    parent-module "^1.0.0"
-    resolve-from "^4.0.0"
-
-import-from@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/import-from/-/import-from-2.1.0.tgz#335db7f2a7affd53aaa471d4b8021dee36b7f3b1"
-  integrity sha1-M1238qev/VOqpHHUuAId7ja387E=
-  dependencies:
-    resolve-from "^3.0.0"
-
-import-local@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/import-local/-/import-local-2.0.0.tgz#55070be38a5993cf18ef6db7e961f5bee5c5a09d"
-  integrity sha512-b6s04m3O+s3CGSbqDIyP4R6aAwAeYlVq9+WUWep6iHa8ETRf9yei1U48C5MmfJmV9AiLYYBKPMq/W+/WRpQmCQ==
-  dependencies:
-    pkg-dir "^3.0.0"
-    resolve-cwd "^2.0.0"
-
-import-local@^3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/import-local/-/import-local-3.0.2.tgz#a8cfd0431d1de4a2199703d003e3e62364fa6db6"
-  integrity sha512-vjL3+w0oulAVZ0hBHnxa/Nm5TAurf9YLQJDhqRZyqb+VKGOB6LU8t9H1Nr5CIo16vh9XfJTOoHwU0B71S557gA==
-  dependencies:
-    pkg-dir "^4.2.0"
-    resolve-cwd "^3.0.0"
-
-imurmurhash@^0.1.4:
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea"
-  integrity sha1-khi5srkoojixPcT7a21XbyMUU+o=
-
-indent-string@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
-  integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
-
-indexes-of@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/indexes-of/-/indexes-of-1.0.1.tgz#f30f716c8e2bd346c7b67d3df3915566a7c05607"
-  integrity sha1-8w9xbI4r00bHtn0985FVZqfAVgc=
-
-infer-owner@^1.0.3, infer-owner@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/infer-owner/-/infer-owner-1.0.4.tgz#c4cefcaa8e51051c2a40ba2ce8a3d27295af9467"
-  integrity sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==
-
-inflight@^1.0.4:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
-  integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=
-  dependencies:
-    once "^1.3.0"
-    wrappy "1"
-
-inherits@2, inherits@2.0.4, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.1, inherits@~2.0.3:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
-  integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
-
-inherits@2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.1.tgz#b17d08d326b4423e568eff719f91b0b1cbdf69f1"
-  integrity sha1-sX0I0ya0Qj5Wjv9xn5GwscvfafE=
-
-inherits@2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
-  integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
-
-ini@^1.3.5:
-  version "1.3.8"
-  resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
-  integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==
-
-internal-ip@^4.3.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/internal-ip/-/internal-ip-4.3.0.tgz#845452baad9d2ca3b69c635a137acb9a0dad0907"
-  integrity sha512-S1zBo1D6zcsyuC6PMmY5+55YMILQ9av8lotMx447Bq6SAgo/sDK6y6uUKmuYhW7eacnIhFfsPmCNYdDzsnnDCg==
-  dependencies:
-    default-gateway "^4.2.0"
-    ipaddr.js "^1.9.0"
-
-internal-slot@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.3.tgz#7347e307deeea2faac2ac6205d4bc7d34967f59c"
-  integrity sha512-O0DB1JC/sPyZl7cIo78n5dR7eUSwwpYPiXRhTzNxZVAMUuB8vlnRFyLxdrVToks6XPLVnFfbzaVd5WLjhgg+vA==
-  dependencies:
-    get-intrinsic "^1.1.0"
-    has "^1.0.3"
-    side-channel "^1.0.4"
-
-invariant@^2.2.4:
-  version "2.2.4"
-  resolved "https://registry.yarnpkg.com/invariant/-/invariant-2.2.4.tgz#610f3c92c9359ce1db616e538008d23ff35158e6"
-  integrity sha512-phJfQVBuaJM5raOpJjSfkiD6BpbCE4Ns//LaXl6wGYtUBY83nWS6Rf9tXm2e8VaK60JEjYldbPif/A2B1C2gNA==
-  dependencies:
-    loose-envify "^1.0.0"
-
-ip-regex@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
-  integrity sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=
-
-ip@^1.1.0, ip@^1.1.5:
-  version "1.1.5"
-  resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.5.tgz#bdded70114290828c0a039e72ef25f5aaec4354a"
-  integrity sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=
-
-ipaddr.js@1.9.1, ipaddr.js@^1.9.0:
-  version "1.9.1"
-  resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz#bff38543eeb8984825079ff3a2a8e6cbd46781b3"
-  integrity sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==
-
-is-absolute-url@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-2.1.0.tgz#50530dfb84fcc9aa7dbe7852e83a37b93b9f2aa6"
-  integrity sha1-UFMN+4T8yap9vnhS6Do3uTufKqY=
-
-is-absolute-url@^3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-3.0.3.tgz#96c6a22b6a23929b11ea0afb1836c36ad4a5d698"
-  integrity sha512-opmNIX7uFnS96NtPmhWQgQx6/NYFgsUXYMllcfzwWKUMwfo8kku1TvE6hkNcH+Q1ts5cMVrsY7j0bxXQDciu9Q==
-
-is-accessor-descriptor@^0.1.6:
-  version "0.1.6"
-  resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz#a9e12cb3ae8d876727eeef3843f8a0897b5c98d6"
-  integrity sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=
-  dependencies:
-    kind-of "^3.0.2"
-
-is-accessor-descriptor@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz#169c2f6d3df1f992618072365c9b0ea1f6878656"
-  integrity sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==
-  dependencies:
-    kind-of "^6.0.0"
-
-is-arguments@^1.0.4:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-arguments/-/is-arguments-1.1.0.tgz#62353031dfbee07ceb34656a6bde59efecae8dd9"
-  integrity sha512-1Ij4lOMPl/xB5kBDn7I+b2ttPMKa8szhEIrXDuXQD/oe3HJLTLhqhgGspwgyGd6MOywBUqVvYicF72lkgDnIHg==
-  dependencies:
-    call-bind "^1.0.0"
-
-is-arrayish@^0.2.1:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d"
-  integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=
-
-is-arrayish@^0.3.1:
-  version "0.3.2"
-  resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.3.2.tgz#4574a2ae56f7ab206896fb431eaeed066fdf8f03"
-  integrity sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==
-
-is-binary-path@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-1.0.1.tgz#75f16642b480f187a711c814161fd3a4a7655898"
-  integrity sha1-dfFmQrSA8YenEcgUFh/TpKdlWJg=
-  dependencies:
-    binary-extensions "^1.0.0"
-
-is-binary-path@~2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09"
-  integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==
-  dependencies:
-    binary-extensions "^2.0.0"
-
-is-buffer@^1.1.5:
-  version "1.1.6"
-  resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be"
-  integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==
-
-is-callable@^1.1.4, is-callable@^1.2.2:
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.3.tgz#8b1e0500b73a1d76c70487636f368e519de8db8e"
-  integrity sha512-J1DcMe8UYTBSrKezuIUTUwjXsho29693unXM2YhJUTR2txK/eG47bvNa/wipPFmZFgr/N6f1GA66dv0mEyTIyQ==
-
-is-ci@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-2.0.0.tgz#6bc6334181810e04b5c22b3d589fdca55026404c"
-  integrity sha512-YfJT7rkpQB0updsdHLGWrvhBJfcfzNNawYDNIyQXJz0IViGf75O8EBPKSdvw2rF+LGCsX4FZ8tcr3b19LcZq4w==
-  dependencies:
-    ci-info "^2.0.0"
-
-is-color-stop@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-color-stop/-/is-color-stop-1.1.0.tgz#cfff471aee4dd5c9e158598fbe12967b5cdad345"
-  integrity sha1-z/9HGu5N1cnhWFmPvhKWe1za00U=
-  dependencies:
-    css-color-names "^0.0.4"
-    hex-color-regex "^1.1.0"
-    hsl-regex "^1.0.0"
-    hsla-regex "^1.0.0"
-    rgb-regex "^1.0.1"
-    rgba-regex "^1.0.0"
-
-is-core-module@^2.0.0, is-core-module@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.2.0.tgz#97037ef3d52224d85163f5597b2b63d9afed981a"
-  integrity sha512-XRAfAdyyY5F5cOXn7hYQDqh2Xmii+DEfIcQGxK/uNwMHhIkPWO0g8msXcbzLe+MpGoR951MlqM/2iIlU4vKDdQ==
-  dependencies:
-    has "^1.0.3"
-
-is-data-descriptor@^0.1.4:
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56"
-  integrity sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=
-  dependencies:
-    kind-of "^3.0.2"
-
-is-data-descriptor@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz#d84876321d0e7add03990406abbbbd36ba9268c7"
-  integrity sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==
-  dependencies:
-    kind-of "^6.0.0"
-
-is-date-object@^1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e"
-  integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g==
-
-is-descriptor@^0.1.0:
-  version "0.1.6"
-  resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-0.1.6.tgz#366d8240dde487ca51823b1ab9f07a10a78251ca"
-  integrity sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==
-  dependencies:
-    is-accessor-descriptor "^0.1.6"
-    is-data-descriptor "^0.1.4"
-    kind-of "^5.0.0"
-
-is-descriptor@^1.0.0, is-descriptor@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/is-descriptor/-/is-descriptor-1.0.2.tgz#3b159746a66604b04f8c81524ba365c5f14d86ec"
-  integrity sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==
-  dependencies:
-    is-accessor-descriptor "^1.0.0"
-    is-data-descriptor "^1.0.0"
-    kind-of "^6.0.2"
-
-is-directory@^0.3.1:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/is-directory/-/is-directory-0.3.1.tgz#61339b6f2475fc772fd9c9d83f5c8575dc154ae1"
-  integrity sha1-YTObbyR1/Hcv2cnYP1yFddwVSuE=
-
-is-docker@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.1.1.tgz#4125a88e44e450d384e09047ede71adc2d144156"
-  integrity sha512-ZOoqiXfEwtGknTiuDEy8pN2CfE3TxMHprvNer1mXiqwkOT77Rw3YVrUQ52EqAOU3QAWDQ+bQdx7HJzrv7LS2Hw==
-
-is-extendable@^0.1.0, is-extendable@^0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89"
-  integrity sha1-YrEQ4omkcUGOPsNqYX1HLjAd/Ik=
-
-is-extendable@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-1.0.1.tgz#a7470f9e426733d81bd81e1155264e3a3507cab4"
-  integrity sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA==
-  dependencies:
-    is-plain-object "^2.0.4"
-
-is-extglob@^2.1.0, is-extglob@^2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2"
-  integrity sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=
-
-is-fullwidth-code-point@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz#a3b30a5c4f199183167aaab93beefae3ddfb654f"
-  integrity sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=
-
-is-fullwidth-code-point@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d"
-  integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==
-
-is-generator-fn@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-generator-fn/-/is-generator-fn-2.1.0.tgz#7d140adc389aaf3011a8f2a2a4cfa6faadffb118"
-  integrity sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==
-
-is-glob@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-3.1.0.tgz#7ba5ae24217804ac70707b96922567486cc3e84a"
-  integrity sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=
-  dependencies:
-    is-extglob "^2.1.0"
-
-is-glob@^4.0.0, is-glob@^4.0.1, is-glob@~4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.1.tgz#7567dbe9f2f5e2467bc77ab83c4a29482407a5dc"
-  integrity sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==
-  dependencies:
-    is-extglob "^2.1.1"
-
-is-module@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/is-module/-/is-module-1.0.0.tgz#3258fb69f78c14d5b815d664336b4cffb6441591"
-  integrity sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE=
-
-is-negative-zero@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.1.tgz#3de746c18dda2319241a53675908d8f766f11c24"
-  integrity sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==
-
-is-number@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/is-number/-/is-number-3.0.0.tgz#24fd6201a4782cf50561c810276afc7d12d71195"
-  integrity sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=
-  dependencies:
-    kind-of "^3.0.2"
-
-is-number@^7.0.0:
-  version "7.0.0"
-  resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b"
-  integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
-
-is-obj@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-1.0.1.tgz#3e4729ac1f5fde025cd7d83a896dab9f4f67db0f"
-  integrity sha1-PkcprB9f3gJc19g6iW2rn09n2w8=
-
-is-obj@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-2.0.0.tgz#473fb05d973705e3fd9620545018ca8e22ef4982"
-  integrity sha512-drqDG3cbczxxEJRoOXcOjtdp1J/lyp1mNn0xaznRs8+muBhgQcrnbspox5X5fOw0HnMnbfDzvnEMEtqDEJEo8w==
-
-is-path-cwd@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-path-cwd/-/is-path-cwd-2.2.0.tgz#67d43b82664a7b5191fd9119127eb300048a9fdb"
-  integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ==
-
-is-path-in-cwd@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-path-in-cwd/-/is-path-in-cwd-2.1.0.tgz#bfe2dca26c69f397265a4009963602935a053acb"
-  integrity sha512-rNocXHgipO+rvnP6dk3zI20RpOtrAM/kzbB258Uw5BWr3TpXi861yzjo16Dn4hUox07iw5AyeMLHWsujkjzvRQ==
-  dependencies:
-    is-path-inside "^2.1.0"
-
-is-path-inside@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-2.1.0.tgz#7c9810587d659a40d27bcdb4d5616eab059494b2"
-  integrity sha512-wiyhTzfDWsvwAW53OBWF5zuvaOGlZ6PwYxAbPVDhpm+gM09xKQGjBq/8uYN12aDvMxnAnq3dxTyoSoRNmg5YFg==
-  dependencies:
-    path-is-inside "^1.0.2"
-
-is-plain-obj@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e"
-  integrity sha1-caUMhCnfync8kqOQpKA7OfzVHT4=
-
-is-plain-object@^2.0.3, is-plain-object@^2.0.4:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
-  integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==
-  dependencies:
-    isobject "^3.0.1"
-
-is-potential-custom-element-name@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.0.tgz#0c52e54bcca391bb2c494b21e8626d7336c6e397"
-  integrity sha1-DFLlS8yjkbssSUsh6GJtczbG45c=
-
-is-regex@^1.0.4, is-regex@^1.1.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.2.tgz#81c8ebde4db142f2cf1c53fc86d6a45788266251"
-  integrity sha512-axvdhb5pdhEVThqJzYXwMlVuZwC+FF2DpcOhTS+y/8jVq4trxyPgfcwIxIKiyeuLlSQYKkmUaPQJ8ZE4yNKXDg==
-  dependencies:
-    call-bind "^1.0.2"
-    has-symbols "^1.0.1"
-
-is-regexp@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/is-regexp/-/is-regexp-1.0.0.tgz#fd2d883545c46bac5a633e7b9a09e87fa2cb5069"
-  integrity sha1-/S2INUXEa6xaYz57mgnof6LLUGk=
-
-is-resolvable@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-resolvable/-/is-resolvable-1.1.0.tgz#fb18f87ce1feb925169c9a407c19318a3206ed88"
-  integrity sha512-qgDYXFSR5WvEfuS5dMj6oTMEbrrSaM0CrFk2Yiq/gXnBvD9pMa2jGXxyhGLfvhZpuMZe18CJpFxAt3CRs42NMg==
-
-is-root@2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-root/-/is-root-2.1.0.tgz#809e18129cf1129644302a4f8544035d51984a9c"
-  integrity sha512-AGOriNp96vNBd3HtU+RzFEc75FfR5ymiYv8E553I71SCeXBiMsVDUtdio1OEFvrPyLIQ9tVR5RxXIFe5PUFjMg==
-
-is-stream@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44"
-  integrity sha1-EtSj3U5o4Lec6428hBc66A2RykQ=
-
-is-stream@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.0.tgz#bde9c32680d6fae04129d6ac9d921ce7815f78e3"
-  integrity sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw==
-
-is-string@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.5.tgz#40493ed198ef3ff477b8c7f92f644ec82a5cd3a6"
-  integrity sha512-buY6VNRjhQMiF1qWDouloZlQbRhDPCebwxSjxMjxgemYT46YMd2NR0/H+fBhEfWX4A/w9TBJ+ol+okqJKFE6vQ==
-
-is-svg@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/is-svg/-/is-svg-3.0.0.tgz#9321dbd29c212e5ca99c4fa9794c714bcafa2f75"
-  integrity sha512-gi4iHK53LR2ujhLVVj+37Ykh9GLqYHX6JOVXbLAucaG/Cqw9xwdFOjDM2qeifLs1sF1npXXFvDu0r5HNgCMrzQ==
-  dependencies:
-    html-comment-regex "^1.1.0"
-
-is-symbol@^1.0.2:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937"
-  integrity sha512-OwijhaRSgqvhm/0ZdAcXNZt9lYdKFpcRDT5ULUuYXPoT794UNOdU+gpT6Rzo7b4V2HUl/op6GqY894AZwv9faQ==
-  dependencies:
-    has-symbols "^1.0.1"
-
-is-typedarray@^1.0.0, is-typedarray@~1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/is-typedarray/-/is-typedarray-1.0.0.tgz#e479c80858df0c1b11ddda6940f96011fcda4a9a"
-  integrity sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=
-
-is-windows@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d"
-  integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==
-
-is-wsl@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-1.1.0.tgz#1f16e4aa22b04d1336b66188a66af3c600c3a66d"
-  integrity sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0=
-
-is-wsl@^2.1.1, is-wsl@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271"
-  integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==
-  dependencies:
-    is-docker "^2.0.0"
-
-isarray@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/isarray/-/isarray-0.0.1.tgz#8a18acfca9a8f4177e09abfc6038939b05d1eedf"
-  integrity sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=
-
-isarray@1.0.0, isarray@^1.0.0, isarray@~1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
-  integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=
-
-isexe@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
-  integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=
-
-isobject@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/isobject/-/isobject-2.1.0.tgz#f065561096a3f1da2ef46272f815c840d87e0c89"
-  integrity sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=
-  dependencies:
-    isarray "1.0.0"
-
-isobject@^3.0.0, isobject@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df"
-  integrity sha1-TkMekrEalzFjaqH5yNHMvP2reN8=
-
-isstream@~0.1.2:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
-  integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=
-
-istanbul-lib-coverage@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/istanbul-lib-coverage/-/istanbul-lib-coverage-3.0.0.tgz#f5944a37c70b550b02a78a5c3b2055b280cec8ec"
-  integrity sha512-UiUIqxMgRDET6eR+o5HbfRYP1l0hqkWOs7vNxC/mggutCMUIhWMm8gAHb8tHlyfD3/l6rlgNA5cKdDzEAf6hEg==
-
-istanbul-lib-instrument@^4.0.0, istanbul-lib-instrument@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/istanbul-lib-instrument/-/istanbul-lib-instrument-4.0.3.tgz#873c6fff897450118222774696a3f28902d77c1d"
-  integrity sha512-BXgQl9kf4WTCPCCpmFGoJkz/+uhvm7h7PFKUYxh7qarQd3ER33vHG//qaE8eN25l07YqZPpHXU9I09l/RD5aGQ==
-  dependencies:
-    "@babel/core" "^7.7.5"
-    "@istanbuljs/schema" "^0.1.2"
-    istanbul-lib-coverage "^3.0.0"
-    semver "^6.3.0"
-
-istanbul-lib-report@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz#7518fe52ea44de372f460a76b5ecda9ffb73d8a6"
-  integrity sha512-wcdi+uAKzfiGT2abPpKZ0hSU1rGQjUQnLvtY5MpQ7QCTahD3VODhcu4wcfY1YtkGaDD5yuydOLINXsfbus9ROw==
-  dependencies:
-    istanbul-lib-coverage "^3.0.0"
-    make-dir "^3.0.0"
-    supports-color "^7.1.0"
-
-istanbul-lib-source-maps@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.0.tgz#75743ce6d96bb86dc7ee4352cf6366a23f0b1ad9"
-  integrity sha512-c16LpFRkR8vQXyHZ5nLpY35JZtzj1PQY1iZmesUbf1FZHbIupcWfjgOXBY9YHkLEQ6puz1u4Dgj6qmU/DisrZg==
-  dependencies:
-    debug "^4.1.1"
-    istanbul-lib-coverage "^3.0.0"
-    source-map "^0.6.1"
-
-istanbul-reports@^3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.0.2.tgz#d593210e5000683750cb09fc0644e4b6e27fd53b"
-  integrity sha512-9tZvz7AiR3PEDNGiV9vIouQ/EAcqMXFmkcA1CDFTwOB98OZVDL0PH9glHotf5Ugp6GCOTypfzGWI/OqjWNCRUw==
-  dependencies:
-    html-escaper "^2.0.0"
-    istanbul-lib-report "^3.0.0"
-
-jest-changed-files@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-26.6.2.tgz#f6198479e1cc66f22f9ae1e22acaa0b429c042d0"
-  integrity sha512-fDS7szLcY9sCtIip8Fjry9oGf3I2ht/QT21bAHm5Dmf0mD4X3ReNUf17y+bO6fR8WgbIZTlbyG1ak/53cbRzKQ==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    execa "^4.0.0"
-    throat "^5.0.0"
-
-jest-circus@26.6.0:
-  version "26.6.0"
-  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-26.6.0.tgz#7d9647b2e7f921181869faae1f90a2629fd70705"
-  integrity sha512-L2/Y9szN6FJPWFK8kzWXwfp+FOR7xq0cUL4lIsdbIdwz3Vh6P1nrpcqOleSzr28zOtSHQNV9Z7Tl+KkuK7t5Ng==
-  dependencies:
-    "@babel/traverse" "^7.1.0"
-    "@jest/environment" "^26.6.0"
-    "@jest/test-result" "^26.6.0"
-    "@jest/types" "^26.6.0"
-    "@types/babel__traverse" "^7.0.4"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    co "^4.6.0"
-    dedent "^0.7.0"
-    expect "^26.6.0"
-    is-generator-fn "^2.0.0"
-    jest-each "^26.6.0"
-    jest-matcher-utils "^26.6.0"
-    jest-message-util "^26.6.0"
-    jest-runner "^26.6.0"
-    jest-runtime "^26.6.0"
-    jest-snapshot "^26.6.0"
-    jest-util "^26.6.0"
-    pretty-format "^26.6.0"
-    stack-utils "^2.0.2"
-    throat "^5.0.0"
-
-jest-cli@^26.6.0:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-26.6.3.tgz#43117cfef24bc4cd691a174a8796a532e135e92a"
-  integrity sha512-GF9noBSa9t08pSyl3CY4frMrqp+aQXFGFkf5hEPbh/pIUFYWMK6ZLTfbmadxJVcJrdRoChlWQsA2VkJcDFK8hg==
-  dependencies:
-    "@jest/core" "^26.6.3"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    chalk "^4.0.0"
-    exit "^0.1.2"
-    graceful-fs "^4.2.4"
-    import-local "^3.0.2"
-    is-ci "^2.0.0"
-    jest-config "^26.6.3"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    prompts "^2.0.1"
-    yargs "^15.4.1"
-
-jest-config@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-26.6.3.tgz#64f41444eef9eb03dc51d5c53b75c8c71f645349"
-  integrity sha512-t5qdIj/bCj2j7NFVHb2nFB4aUdfucDn3JRKgrZnplb8nieAirAzRSHP8uDEd+qV6ygzg9Pz4YG7UTJf94LPSyg==
-  dependencies:
-    "@babel/core" "^7.1.0"
-    "@jest/test-sequencer" "^26.6.3"
-    "@jest/types" "^26.6.2"
-    babel-jest "^26.6.3"
-    chalk "^4.0.0"
-    deepmerge "^4.2.2"
-    glob "^7.1.1"
-    graceful-fs "^4.2.4"
-    jest-environment-jsdom "^26.6.2"
-    jest-environment-node "^26.6.2"
-    jest-get-type "^26.3.0"
-    jest-jasmine2 "^26.6.3"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    micromatch "^4.0.2"
-    pretty-format "^26.6.2"
-
-jest-diff@^26.0.0, jest-diff@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-26.6.2.tgz#1aa7468b52c3a68d7d5c5fdcdfcd5e49bd164394"
-  integrity sha512-6m+9Z3Gv9wN0WFVasqjCL/06+EFCMTqDEUl/b87HYK2rAPTyfz4ZIuSlPhY51PIQRWx5TaxeF1qmXKe9gfN3sA==
-  dependencies:
-    chalk "^4.0.0"
-    diff-sequences "^26.6.2"
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-docblock@^26.0.0:
-  version "26.0.0"
-  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-26.0.0.tgz#3e2fa20899fc928cb13bd0ff68bd3711a36889b5"
-  integrity sha512-RDZ4Iz3QbtRWycd8bUEPxQsTlYazfYn/h5R65Fc6gOfwozFhoImx+affzky/FFBuqISPTqjXomoIGJVKBWoo0w==
-  dependencies:
-    detect-newline "^3.0.0"
-
-jest-each@^26.6.0, jest-each@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-26.6.2.tgz#02526438a77a67401c8a6382dfe5999952c167cb"
-  integrity sha512-Mer/f0KaATbjl8MCJ+0GEpNdqmnVmDYqCTJYTvoo7rqmRiDllmp2AYN+06F93nXcY3ur9ShIjS+CO/uD+BbH4A==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    chalk "^4.0.0"
-    jest-get-type "^26.3.0"
-    jest-util "^26.6.2"
-    pretty-format "^26.6.2"
-
-jest-environment-jsdom@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-26.6.2.tgz#78d09fe9cf019a357009b9b7e1f101d23bd1da3e"
-  integrity sha512-jgPqCruTlt3Kwqg5/WVFyHIOJHsiAvhcp2qiR2QQstuG9yWox5+iHpU3ZrcBxW14T4fe5Z68jAfLRh7joCSP2Q==
-  dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
-    jsdom "^16.4.0"
-
-jest-environment-node@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-26.6.2.tgz#824e4c7fb4944646356f11ac75b229b0035f2b0c"
-  integrity sha512-zhtMio3Exty18dy8ee8eJ9kjnRyZC1N4C1Nt/VShN1apyXc8rWGtJ9lI7vqiWcyyXS4BVSEn9lxAM2D+07/Tag==
-  dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
-
-jest-get-type@^26.3.0:
-  version "26.3.0"
-  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.3.0.tgz#e97dc3c3f53c2b406ca7afaed4493b1d099199e0"
-  integrity sha512-TpfaviN1R2pQWkIihlfEanwOXK0zcxrKEE4MlU6Tn7keoXdN6/3gK/xl0yEh8DOunn5pOVGKf8hB4R9gVh04ig==
-
-jest-haste-map@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-26.6.2.tgz#dd7e60fe7dc0e9f911a23d79c5ff7fb5c2cafeaa"
-  integrity sha512-easWIJXIw71B2RdR8kgqpjQrbMRWQBgiBwXYEhtGUTaX+doCjBheluShdDMeR8IMfJiTqH4+zfhtg29apJf/8w==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    "@types/graceful-fs" "^4.1.2"
-    "@types/node" "*"
-    anymatch "^3.0.3"
-    fb-watchman "^2.0.0"
-    graceful-fs "^4.2.4"
-    jest-regex-util "^26.0.0"
-    jest-serializer "^26.6.2"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
-    micromatch "^4.0.2"
-    sane "^4.0.3"
-    walker "^1.0.7"
-  optionalDependencies:
-    fsevents "^2.1.2"
-
-jest-jasmine2@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-26.6.3.tgz#adc3cf915deacb5212c93b9f3547cd12958f2edd"
-  integrity sha512-kPKUrQtc8aYwBV7CqBg5pu+tmYXlvFlSFYn18ev4gPFtrRzB15N2gW/Roew3187q2w2eHuu0MU9TJz6w0/nPEg==
-  dependencies:
-    "@babel/traverse" "^7.1.0"
-    "@jest/environment" "^26.6.2"
-    "@jest/source-map" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    co "^4.6.0"
-    expect "^26.6.2"
-    is-generator-fn "^2.0.0"
-    jest-each "^26.6.2"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-runtime "^26.6.3"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    pretty-format "^26.6.2"
-    throat "^5.0.0"
-
-jest-leak-detector@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-26.6.2.tgz#7717cf118b92238f2eba65054c8a0c9c653a91af"
-  integrity sha512-i4xlXpsVSMeKvg2cEKdfhh0H39qlJlP5Ex1yQxwF9ubahboQYMgTtz5oML35AVA3B4Eu+YsmwaiKVev9KCvLxg==
-  dependencies:
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-matcher-utils@^26.6.0, jest-matcher-utils@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-26.6.2.tgz#8e6fd6e863c8b2d31ac6472eeb237bc595e53e7a"
-  integrity sha512-llnc8vQgYcNqDrqRDXWwMr9i7rS5XFiCwvh6DTP7Jqa2mqpcCBBlpCbn+trkG0KNhPu/h8rzyBkriOtBstvWhw==
-  dependencies:
-    chalk "^4.0.0"
-    jest-diff "^26.6.2"
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-message-util@^26.6.0, jest-message-util@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-26.6.2.tgz#58173744ad6fc0506b5d21150b9be56ef001ca07"
-  integrity sha512-rGiLePzQ3AzwUshu2+Rn+UMFk0pHN58sOG+IaJbk5Jxuqo3NYO1U2/MIR4S1sKgsoYSXSzdtSa0TgrmtUwEbmA==
-  dependencies:
-    "@babel/code-frame" "^7.0.0"
-    "@jest/types" "^26.6.2"
-    "@types/stack-utils" "^2.0.0"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    micromatch "^4.0.2"
-    pretty-format "^26.6.2"
-    slash "^3.0.0"
-    stack-utils "^2.0.2"
-
-jest-mock@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-26.6.2.tgz#d6cb712b041ed47fe0d9b6fc3474bc6543feb302"
-  integrity sha512-YyFjePHHp1LzpzYcmgqkJ0nm0gg/lJx2aZFzFy1S6eUqNjXsOqTK10zNRff2dNfssgokjkG65OlWNcIlgd3zew==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-
-jest-pnp-resolver@^1.2.2:
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c"
-  integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w==
-
-jest-regex-util@^26.0.0:
-  version "26.0.0"
-  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-26.0.0.tgz#d25e7184b36e39fd466c3bc41be0971e821fee28"
-  integrity sha512-Gv3ZIs/nA48/Zvjrl34bf+oD76JHiGDUxNOVgUjh3j890sblXryjY4rss71fPtD/njchl6PSE2hIhvyWa1eT0A==
-
-jest-resolve-dependencies@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-26.6.3.tgz#6680859ee5d22ee5dcd961fe4871f59f4c784fb6"
-  integrity sha512-pVwUjJkxbhe4RY8QEWzN3vns2kqyuldKpxlxJlzEYfKSvY6/bMvxoFrYYzUO1Gx28yKWN37qyV7rIoIp2h8fTg==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-snapshot "^26.6.2"
-
-jest-resolve@26.6.0:
-  version "26.6.0"
-  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.6.0.tgz#070fe7159af87b03e50f52ea5e17ee95bbee40e1"
-  integrity sha512-tRAz2bwraHufNp+CCmAD8ciyCpXCs1NQxB5EJAmtCFy6BN81loFEGWKzYu26Y62lAJJe4X4jg36Kf+NsQyiStQ==
-  dependencies:
-    "@jest/types" "^26.6.0"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    jest-pnp-resolver "^1.2.2"
-    jest-util "^26.6.0"
-    read-pkg-up "^7.0.1"
-    resolve "^1.17.0"
-    slash "^3.0.0"
-
-jest-resolve@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.6.2.tgz#a3ab1517217f469b504f1b56603c5bb541fbb507"
-  integrity sha512-sOxsZOq25mT1wRsfHcbtkInS+Ek7Q8jCHUB0ZUTP0tc/c41QHriU/NunqMfCUWsL4H3MHpvQD4QR9kSYhS7UvQ==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    jest-pnp-resolver "^1.2.2"
-    jest-util "^26.6.2"
-    read-pkg-up "^7.0.1"
-    resolve "^1.18.1"
-    slash "^3.0.0"
-
-jest-runner@^26.6.0, jest-runner@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-26.6.3.tgz#2d1fed3d46e10f233fd1dbd3bfaa3fe8924be159"
-  integrity sha512-atgKpRHnaA2OvByG/HpGA4g6CSPS/1LK0jK3gATJAoptC1ojltpmVlYC3TYgdmGp+GLuhzpH30Gvs36szSL2JQ==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/environment" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    emittery "^0.7.1"
-    exit "^0.1.2"
-    graceful-fs "^4.2.4"
-    jest-config "^26.6.3"
-    jest-docblock "^26.0.0"
-    jest-haste-map "^26.6.2"
-    jest-leak-detector "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-resolve "^26.6.2"
-    jest-runtime "^26.6.3"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
-    source-map-support "^0.5.6"
-    throat "^5.0.0"
-
-jest-runtime@^26.6.0, jest-runtime@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-26.6.3.tgz#4f64efbcfac398331b74b4b3c82d27d401b8fa2b"
-  integrity sha512-lrzyR3N8sacTAMeonbqpnSka1dHNux2uk0qqDXVkMv2c/A3wYnvQ4EXuI013Y6+gSKSCxdaczvf4HF0mVXHRdw==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/globals" "^26.6.2"
-    "@jest/source-map" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/yargs" "^15.0.0"
-    chalk "^4.0.0"
-    cjs-module-lexer "^0.6.0"
-    collect-v8-coverage "^1.0.0"
-    exit "^0.1.2"
-    glob "^7.1.3"
-    graceful-fs "^4.2.4"
-    jest-config "^26.6.3"
-    jest-haste-map "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-mock "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    slash "^3.0.0"
-    strip-bom "^4.0.0"
-    yargs "^15.4.1"
-
-jest-serializer@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-26.6.2.tgz#d139aafd46957d3a448f3a6cdabe2919ba0742d1"
-  integrity sha512-S5wqyz0DXnNJPd/xfIzZ5Xnp1HrJWBczg8mMfMpN78OJ5eDxXyf+Ygld9wX1DnUWbIbhM1YDY95NjR4CBXkb2g==
-  dependencies:
-    "@types/node" "*"
-    graceful-fs "^4.2.4"
-
-jest-snapshot@^26.6.0, jest-snapshot@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-26.6.2.tgz#f3b0af1acb223316850bd14e1beea9837fb39c84"
-  integrity sha512-OLhxz05EzUtsAmOMzuupt1lHYXCNib0ECyuZ/PZOx9TrZcC8vL0x+DUG3TL+GLX3yHG45e6YGjIm0XwDc3q3og==
-  dependencies:
-    "@babel/types" "^7.0.0"
-    "@jest/types" "^26.6.2"
-    "@types/babel__traverse" "^7.0.4"
-    "@types/prettier" "^2.0.0"
-    chalk "^4.0.0"
-    expect "^26.6.2"
-    graceful-fs "^4.2.4"
-    jest-diff "^26.6.2"
-    jest-get-type "^26.3.0"
-    jest-haste-map "^26.6.2"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-resolve "^26.6.2"
-    natural-compare "^1.4.0"
-    pretty-format "^26.6.2"
-    semver "^7.3.2"
-
-jest-util@^26.6.0, jest-util@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1"
-  integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    is-ci "^2.0.0"
-    micromatch "^4.0.2"
-
-jest-validate@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-26.6.2.tgz#23d380971587150467342911c3d7b4ac57ab20ec"
-  integrity sha512-NEYZ9Aeyj0i5rQqbq+tpIOom0YS1u2MVu6+euBsvpgIme+FOfRmoC4R5p0JiAUpaFvFy24xgrpMknarR/93XjQ==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    camelcase "^6.0.0"
-    chalk "^4.0.0"
-    jest-get-type "^26.3.0"
-    leven "^3.1.0"
-    pretty-format "^26.6.2"
-
-jest-watch-typeahead@0.6.1:
-  version "0.6.1"
-  resolved "https://registry.yarnpkg.com/jest-watch-typeahead/-/jest-watch-typeahead-0.6.1.tgz#45221b86bb6710b7e97baaa1640ae24a07785e63"
-  integrity sha512-ITVnHhj3Jd/QkqQcTqZfRgjfyRhDFM/auzgVo2RKvSwi18YMvh0WvXDJFoFED6c7jd/5jxtu4kSOb9PTu2cPVg==
-  dependencies:
-    ansi-escapes "^4.3.1"
-    chalk "^4.0.0"
-    jest-regex-util "^26.0.0"
-    jest-watcher "^26.3.0"
-    slash "^3.0.0"
-    string-length "^4.0.1"
-    strip-ansi "^6.0.0"
-
-jest-watcher@^26.3.0, jest-watcher@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-26.6.2.tgz#a5b683b8f9d68dbcb1d7dae32172d2cca0592975"
-  integrity sha512-WKJob0P/Em2csiVthsI68p6aGKTIcsfjH9Gsx1f0A3Italz43e3ho0geSAVsmj09RWOELP1AZ/DXyJgOgDKxXQ==
-  dependencies:
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    ansi-escapes "^4.2.1"
-    chalk "^4.0.0"
-    jest-util "^26.6.2"
-    string-length "^4.0.1"
-
-jest-worker@^24.9.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-24.9.0.tgz#5dbfdb5b2d322e98567898238a9697bcce67b3e5"
-  integrity sha512-51PE4haMSXcHohnSMdM42anbvZANYTqMrr52tVKPqqsPJMzoP6FYYDVqahX/HrAoKEKz3uUPzSvKs9A3qR4iVw==
-  dependencies:
-    merge-stream "^2.0.0"
-    supports-color "^6.1.0"
-
-jest-worker@^26.5.0, jest-worker@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-26.6.2.tgz#7f72cbc4d643c365e27b9fd775f9d0eaa9c7a8ed"
-  integrity sha512-KWYVV1c4i+jbMpaBC+U++4Va0cp8OisU185o73T1vo99hqi7w8tSJfUXYswwqqrjzwxa6KpRK54WhPvwf5w6PQ==
-  dependencies:
-    "@types/node" "*"
-    merge-stream "^2.0.0"
-    supports-color "^7.0.0"
-
-jest@26.6.0:
-  version "26.6.0"
-  resolved "https://registry.yarnpkg.com/jest/-/jest-26.6.0.tgz#546b25a1d8c888569dbbe93cae131748086a4a25"
-  integrity sha512-jxTmrvuecVISvKFFhOkjsWRZV7sFqdSUAd1ajOKY+/QE/aLBVstsJ/dX8GczLzwiT6ZEwwmZqtCUHLHHQVzcfA==
-  dependencies:
-    "@jest/core" "^26.6.0"
-    import-local "^3.0.2"
-    jest-cli "^26.6.0"
-
-"js-tokens@^3.0.0 || ^4.0.0", js-tokens@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
-  integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==
-
-js-yaml@^3.13.1:
-  version "3.14.1"
-  resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537"
-  integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==
-  dependencies:
-    argparse "^1.0.7"
-    esprima "^4.0.0"
-
-jsbn@~0.1.0:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
-  integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM=
-
-jsdom@^16.4.0:
-  version "16.4.0"
-  resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.4.0.tgz#36005bde2d136f73eee1a830c6d45e55408edddb"
-  integrity sha512-lYMm3wYdgPhrl7pDcRmvzPhhrGVBeVhPIqeHjzeiHN3DFmD1RBpbExbi8vU7BJdH8VAZYovR8DMt0PNNDM7k8w==
-  dependencies:
-    abab "^2.0.3"
-    acorn "^7.1.1"
-    acorn-globals "^6.0.0"
-    cssom "^0.4.4"
-    cssstyle "^2.2.0"
-    data-urls "^2.0.0"
-    decimal.js "^10.2.0"
-    domexception "^2.0.1"
-    escodegen "^1.14.1"
-    html-encoding-sniffer "^2.0.1"
-    is-potential-custom-element-name "^1.0.0"
-    nwsapi "^2.2.0"
-    parse5 "5.1.1"
-    request "^2.88.2"
-    request-promise-native "^1.0.8"
-    saxes "^5.0.0"
-    symbol-tree "^3.2.4"
-    tough-cookie "^3.0.1"
-    w3c-hr-time "^1.0.2"
-    w3c-xmlserializer "^2.0.0"
-    webidl-conversions "^6.1.0"
-    whatwg-encoding "^1.0.5"
-    whatwg-mimetype "^2.3.0"
-    whatwg-url "^8.0.0"
-    ws "^7.2.3"
-    xml-name-validator "^3.0.0"
-
-jsesc@^2.5.1:
-  version "2.5.2"
-  resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-2.5.2.tgz#80564d2e483dacf6e8ef209650a67df3f0c283a4"
-  integrity sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==
-
-jsesc@~0.5.0:
-  version "0.5.0"
-  resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-0.5.0.tgz#e7dee66e35d6fc16f710fe91d5cf69f70f08911d"
-  integrity sha1-597mbjXW/Bb3EP6R1c9p9w8IkR0=
-
-json-parse-better-errors@^1.0.1, json-parse-better-errors@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9"
-  integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==
-
-json-parse-even-better-errors@^2.3.0:
-  version "2.3.1"
-  resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d"
-  integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==
-
-json-schema-traverse@^0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
-  integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==
-
-json-schema-traverse@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2"
-  integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==
-
-json-schema@0.2.3:
-  version "0.2.3"
-  resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13"
-  integrity sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=
-
-json-stable-stringify-without-jsonify@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651"
-  integrity sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=
-
-json-stringify-safe@~5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
-  integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
-
-json3@^3.3.3:
-  version "3.3.3"
-  resolved "https://registry.yarnpkg.com/json3/-/json3-3.3.3.tgz#7fc10e375fc5ae42c4705a5cc0aa6f62be305b81"
-  integrity sha512-c7/8mbUsKigAbLkD5B010BK4D9LZm7A1pNItkEwiUZRpIN66exu/e7YQWysGun+TRKaJp8MhemM+VkfWv42aCA==
-
-json5@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe"
-  integrity sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==
-  dependencies:
-    minimist "^1.2.0"
-
-json5@^2.1.2:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3"
-  integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==
-  dependencies:
-    minimist "^1.2.5"
-
-jsonfile@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb"
-  integrity sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=
-  optionalDependencies:
-    graceful-fs "^4.1.6"
-
-jsonfile@^6.0.1:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae"
-  integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==
-  dependencies:
-    universalify "^2.0.0"
-  optionalDependencies:
-    graceful-fs "^4.1.6"
-
-jsprim@^1.2.2:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/jsprim/-/jsprim-1.4.1.tgz#313e66bc1e5cc06e438bc1b7499c2e5c56acb6a2"
-  integrity sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=
-  dependencies:
-    assert-plus "1.0.0"
-    extsprintf "1.3.0"
-    json-schema "0.2.3"
-    verror "1.10.0"
-
-"jsx-ast-utils@^2.4.1 || ^3.0.0", jsx-ast-utils@^3.1.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/jsx-ast-utils/-/jsx-ast-utils-3.2.0.tgz#41108d2cec408c3453c1bbe8a4aae9e1e2bd8f82"
-  integrity sha512-EIsmt3O3ljsU6sot/J4E1zDRxfBNrhjyf/OKjlydwgEimQuznlM4Wv7U+ueONJMyEn1WRE0K8dhi3dVAXYT24Q==
-  dependencies:
-    array-includes "^3.1.2"
-    object.assign "^4.1.2"
-
-killable@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/killable/-/killable-1.0.1.tgz#4c8ce441187a061c7474fb87ca08e2a638194892"
-  integrity sha512-LzqtLKlUwirEUyl/nicirVmNiPvYs7l5n8wOPP7fyJVpUPkvCnW/vuiXGpylGUlnPDnB7311rARzAt3Mhswpjg==
-
-kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
-  version "3.2.2"
-  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"
-  integrity sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=
-  dependencies:
-    is-buffer "^1.1.5"
-
-kind-of@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-4.0.0.tgz#20813df3d712928b207378691a45066fae72dd57"
-  integrity sha1-IIE989cSkosgc3hpGkUGb65y3Vc=
-  dependencies:
-    is-buffer "^1.1.5"
-
-kind-of@^5.0.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-5.1.0.tgz#729c91e2d857b7a419a1f9aa65685c4c33f5845d"
-  integrity sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==
-
-kind-of@^6.0.0, kind-of@^6.0.2:
-  version "6.0.3"
-  resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd"
-  integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==
-
-kleur@^3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e"
-  integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==
-
-klona@^2.0.4:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/klona/-/klona-2.0.4.tgz#7bb1e3affb0cb8624547ef7e8f6708ea2e39dfc0"
-  integrity sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==
-
-language-subtag-registry@~0.3.2:
-  version "0.3.21"
-  resolved "https://registry.yarnpkg.com/language-subtag-registry/-/language-subtag-registry-0.3.21.tgz#04ac218bea46f04cb039084602c6da9e788dd45a"
-  integrity sha512-L0IqwlIXjilBVVYKFT37X9Ih11Um5NEl9cbJIuU/SwP/zEEAbBPOnEeeuxVMf45ydWQRDQN3Nqc96OgbH1K+Pg==
-
-language-tags@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/language-tags/-/language-tags-1.0.5.tgz#d321dbc4da30ba8bf3024e040fa5c14661f9193a"
-  integrity sha1-0yHbxNowuovzAk4ED6XBRmH5GTo=
-  dependencies:
-    language-subtag-registry "~0.3.2"
-
-last-call-webpack-plugin@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/last-call-webpack-plugin/-/last-call-webpack-plugin-3.0.0.tgz#9742df0e10e3cf46e5c0381c2de90d3a7a2d7555"
-  integrity sha512-7KI2l2GIZa9p2spzPIVZBYyNKkN+e/SQPpnjlTiPhdbDW3F86tdKKELxKpzJ5sgU19wQWsACULZmpTPYHeWO5w==
-  dependencies:
-    lodash "^4.17.5"
-    webpack-sources "^1.1.0"
-
-leven@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
-  integrity sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==
-
-levn@^0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/levn/-/levn-0.4.1.tgz#ae4562c007473b932a6200d403268dd2fffc6ade"
-  integrity sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==
-  dependencies:
-    prelude-ls "^1.2.1"
-    type-check "~0.4.0"
-
-levn@~0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/levn/-/levn-0.3.0.tgz#3b09924edf9f083c0490fdd4c0bc4421e04764ee"
-  integrity sha1-OwmSTt+fCDwEkP3UwLxEIeBHZO4=
-  dependencies:
-    prelude-ls "~1.1.2"
-    type-check "~0.3.2"
-
-lines-and-columns@^1.1.6:
-  version "1.1.6"
-  resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"
-  integrity sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=
-
-load-json-file@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-2.0.0.tgz#7947e42149af80d696cbf797bcaabcfe1fe29ca8"
-  integrity sha1-eUfkIUmvgNaWy/eXvKq8/h/inKg=
-  dependencies:
-    graceful-fs "^4.1.2"
-    parse-json "^2.2.0"
-    pify "^2.0.0"
-    strip-bom "^3.0.0"
-
-loader-runner@^2.4.0:
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-2.4.0.tgz#ed47066bfe534d7e84c4c7b9998c2a75607d9357"
-  integrity sha512-Jsmr89RcXGIwivFY21FcRrisYZfvLMTWx5kOLc+JTxtpBOG6xML0vzbc6SEQG2FO9/4Fc3wW4LVcB5DmGflaRw==
-
-loader-utils@1.2.3:
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.2.3.tgz#1ff5dc6911c9f0a062531a4c04b609406108c2c7"
-  integrity sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==
-  dependencies:
-    big.js "^5.2.2"
-    emojis-list "^2.0.0"
-    json5 "^1.0.1"
-
-loader-utils@2.0.0, loader-utils@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-2.0.0.tgz#e4cace5b816d425a166b5f097e10cd12b36064b0"
-  integrity sha512-rP4F0h2RaWSvPEkD7BLDFQnvSf+nK+wr3ESUjNTyAGobqrijmW92zc+SO6d4p4B1wh7+B/Jg1mkQe5NYUEHtHQ==
-  dependencies:
-    big.js "^5.2.2"
-    emojis-list "^3.0.0"
-    json5 "^2.1.2"
-
-loader-utils@^1.1.0, loader-utils@^1.2.3, loader-utils@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.0.tgz#c579b5e34cb34b1a74edc6c1fb36bfa371d5a613"
-  integrity sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA==
-  dependencies:
-    big.js "^5.2.2"
-    emojis-list "^3.0.0"
-    json5 "^1.0.1"
-
-locate-path@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-2.0.0.tgz#2b568b265eec944c6d9c0de9c3dbbbca0354cd8e"
-  integrity sha1-K1aLJl7slExtnA3pw9u7ygNUzY4=
-  dependencies:
-    p-locate "^2.0.0"
-    path-exists "^3.0.0"
-
-locate-path@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-3.0.0.tgz#dbec3b3ab759758071b58fe59fc41871af21400e"
-  integrity sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==
-  dependencies:
-    p-locate "^3.0.0"
-    path-exists "^3.0.0"
-
-locate-path@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0"
-  integrity sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==
-  dependencies:
-    p-locate "^4.1.0"
-
-lodash._reinterpolate@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz#0ccf2d89166af03b3663c796538b75ac6e114d9d"
-  integrity sha1-DM8tiRZq8Ds2Y8eWU4t1rG4RTZ0=
-
-lodash.memoize@^4.1.2:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe"
-  integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4=
-
-lodash.mergewith@4.6.2:
-  version "4.6.2"
-  resolved "https://registry.yarnpkg.com/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz#617121f89ac55f59047c7aec1ccd6654c6590f55"
-  integrity sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ==
-
-lodash.sortby@^4.7.0:
-  version "4.7.0"
-  resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438"
-  integrity sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=
-
-lodash.template@^4.5.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/lodash.template/-/lodash.template-4.5.0.tgz#f976195cf3f347d0d5f52483569fe8031ccce8ab"
-  integrity sha512-84vYFxIkmidUiFxidA/KjjH9pAycqW+h980j7Fuz5qxRtO9pgB7MDFTdys1N7A5mcucRiDyEq4fusljItR1T/A==
-  dependencies:
-    lodash._reinterpolate "^3.0.0"
-    lodash.templatesettings "^4.0.0"
-
-lodash.templatesettings@^4.0.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/lodash.templatesettings/-/lodash.templatesettings-4.2.0.tgz#e481310f049d3cf6d47e912ad09313b154f0fb33"
-  integrity sha512-stgLz+i3Aa9mZgnjr/O+v9ruKZsPsndy7qPZOchbqk2cnTU1ZaldKK+v7m54WoKIyxiuMZTKT2H81F8BeAc3ZQ==
-  dependencies:
-    lodash._reinterpolate "^3.0.0"
-
-lodash.uniq@^4.5.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
-  integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
-
-"lodash@>=3.5 <5", lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.5:
-  version "4.17.21"
-  resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
-  integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
-
-loglevel@^1.6.8:
-  version "1.7.1"
-  resolved "https://registry.yarnpkg.com/loglevel/-/loglevel-1.7.1.tgz#005fde2f5e6e47068f935ff28573e125ef72f197"
-  integrity sha512-Hesni4s5UkWkwCGJMQGAh71PaLUmKFM60dHvq0zi/vDhhrzuk+4GgNbTXJ12YYQJn6ZKBDNIjYcuQGKudvqrIw==
-
-loose-envify@^1.0.0, loose-envify@^1.1.0, loose-envify@^1.2.0, loose-envify@^1.3.1, loose-envify@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf"
-  integrity sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==
-  dependencies:
-    js-tokens "^3.0.0 || ^4.0.0"
-
-lower-case@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28"
-  integrity sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==
-  dependencies:
-    tslib "^2.0.3"
-
-lru-cache@^5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920"
-  integrity sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==
-  dependencies:
-    yallist "^3.0.2"
-
-lru-cache@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-6.0.0.tgz#6d6fe6570ebd96aaf90fcad1dafa3b2566db3a94"
-  integrity sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==
-  dependencies:
-    yallist "^4.0.0"
-
-lz-string@^1.4.4:
-  version "1.4.4"
-  resolved "https://registry.yarnpkg.com/lz-string/-/lz-string-1.4.4.tgz#c0d8eaf36059f705796e1e344811cf4c498d3a26"
-  integrity sha1-wNjq82BZ9wV5bh40SBHPTEmNOiY=
-
-magic-string@^0.25.0, magic-string@^0.25.7:
-  version "0.25.7"
-  resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.25.7.tgz#3f497d6fd34c669c6798dcb821f2ef31f5445051"
-  integrity sha512-4CrMT5DOHTDk4HYDlzmwu4FVCcIYI8gauveasrdCu2IKIFOJ3f0v/8MDGJCDL9oD2ppz/Av1b0Nj345H9M+XIA==
-  dependencies:
-    sourcemap-codec "^1.4.4"
-
-make-dir@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-2.1.0.tgz#5f0310e18b8be898cc07009295a30ae41e91e6f5"
-  integrity sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==
-  dependencies:
-    pify "^4.0.1"
-    semver "^5.6.0"
-
-make-dir@^3.0.0, make-dir@^3.0.2:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
-  integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
-  dependencies:
-    semver "^6.0.0"
-
-makeerror@1.0.x:
-  version "1.0.11"
-  resolved "https://registry.yarnpkg.com/makeerror/-/makeerror-1.0.11.tgz#e01a5c9109f2af79660e4e8b9587790184f5a96c"
-  integrity sha1-4BpckQnyr3lmDk6LlYd5AYT1qWw=
-  dependencies:
-    tmpl "1.0.x"
-
-map-cache@^0.2.2:
-  version "0.2.2"
-  resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf"
-  integrity sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8=
-
-map-visit@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/map-visit/-/map-visit-1.0.0.tgz#ecdca8f13144e660f1b5bd41f12f3479d98dfb8f"
-  integrity sha1-7Nyo8TFE5mDxtb1B8S80edmN+48=
-  dependencies:
-    object-visit "^1.0.0"
-
-md5.js@^1.3.4:
-  version "1.3.5"
-  resolved "https://registry.yarnpkg.com/md5.js/-/md5.js-1.3.5.tgz#b5d07b8e3216e3e27cd728d72f70d1e6a342005f"
-  integrity sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==
-  dependencies:
-    hash-base "^3.0.0"
-    inherits "^2.0.1"
-    safe-buffer "^5.1.2"
-
-mdn-data@2.0.14:
-  version "2.0.14"
-  resolved "https://registry.yarnpkg.com/mdn-data/-/mdn-data-2.0.14.tgz#7113fc4281917d63ce29b43446f701e68c25ba50"
-  integrity sha512-dn6wd0uw5GsdswPFfsgMp5NSB0/aDe6fK94YJV/AJDYXL6HVLWBsxeq7js7Ad+mU2K9LAlwpk6kN2D5mwCPVow==
-
-mdn-data@2.0.4:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/mdn-data/-/mdn-data-2.0.4.tgz#699b3c38ac6f1d728091a64650b65d388502fd5b"
-  integrity sha512-iV3XNKw06j5Q7mi6h+9vbx23Tv7JkjEVgKHW4pimwyDGWm0OIQntJJ+u1C6mg6mK1EaTv42XQ7w76yuzH7M2cA==
-
-media-typer@0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/media-typer/-/media-typer-0.3.0.tgz#8710d7af0aa626f8fffa1ce00168545263255748"
-  integrity sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=
-
-memory-fs@^0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.4.1.tgz#3a9a20b8462523e447cfbc7e8bb80ed667bfc552"
-  integrity sha1-OpoguEYlI+RHz7x+i7gO1me/xVI=
-  dependencies:
-    errno "^0.1.3"
-    readable-stream "^2.0.1"
-
-memory-fs@^0.5.0:
-  version "0.5.0"
-  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.5.0.tgz#324c01288b88652966d161db77838720845a8e3c"
-  integrity sha512-jA0rdU5KoQMC0e6ppoNRtpp6vjFq6+NY7r8hywnC7V+1Xj/MtHwGIbB1QaK/dunyjWteJzmkpd7ooeWg10T7GA==
-  dependencies:
-    errno "^0.1.3"
-    readable-stream "^2.0.1"
-
-merge-descriptors@1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61"
-  integrity sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=
-
-merge-stream@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/merge-stream/-/merge-stream-2.0.0.tgz#52823629a14dd00c9770fb6ad47dc6310f2c1f60"
-  integrity sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==
-
-merge2@^1.3.0:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae"
-  integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==
-
-methods@~1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/methods/-/methods-1.1.2.tgz#5529a4d67654134edcc5266656835b0f851afcee"
-  integrity sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=
-
-microevent.ts@~0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/microevent.ts/-/microevent.ts-0.1.1.tgz#70b09b83f43df5172d0205a63025bce0f7357fa0"
-  integrity sha512-jo1OfR4TaEwd5HOrt5+tAZ9mqT4jmpNAusXtyfNzqVm9uiSYFZlKM1wYL4oU7azZW/PxQW53wM0S6OR1JHNa2g==
-
-micromatch@^3.1.10, micromatch@^3.1.4:
-  version "3.1.10"
-  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23"
-  integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==
-  dependencies:
-    arr-diff "^4.0.0"
-    array-unique "^0.3.2"
-    braces "^2.3.1"
-    define-property "^2.0.2"
-    extend-shallow "^3.0.2"
-    extglob "^2.0.4"
-    fragment-cache "^0.2.1"
-    kind-of "^6.0.2"
-    nanomatch "^1.2.9"
-    object.pick "^1.3.0"
-    regex-not "^1.0.0"
-    snapdragon "^0.8.1"
-    to-regex "^3.0.2"
-
-micromatch@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.2.tgz#4fcb0999bf9fbc2fcbdd212f6d629b9a56c39259"
-  integrity sha512-y7FpHSbMUMoyPbYUSzO6PaZ6FyRnQOpHuKwbo1G+Knck95XVU4QAiKdGEnj5wwoS7PlOgthX/09u5iFJ+aYf5Q==
-  dependencies:
-    braces "^3.0.1"
-    picomatch "^2.0.5"
-
-miller-rabin@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/miller-rabin/-/miller-rabin-4.0.1.tgz#f080351c865b0dc562a8462966daa53543c78a4d"
-  integrity sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==
-  dependencies:
-    bn.js "^4.0.0"
-    brorand "^1.0.1"
-
-mime-db@1.46.0, "mime-db@>= 1.43.0 < 2":
-  version "1.46.0"
-  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.46.0.tgz#6267748a7f799594de3cbc8cde91def349661cee"
-  integrity sha512-svXaP8UQRZ5K7or+ZmfNhg2xX3yKDMUzqadsSqi4NCH/KomcH75MAMYAGVlvXn4+b/xOPhS3I2uHKRUzvjY7BQ==
-
-mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24:
-  version "2.1.29"
-  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.29.tgz#1d4ab77da64b91f5f72489df29236563754bb1b2"
-  integrity sha512-Y/jMt/S5sR9OaqteJtslsFZKWOIIqMACsJSiHghlCAyhf7jfVYjKBmLiX8OgpWeW+fjJ2b+Az69aPFPkUOY6xQ==
-  dependencies:
-    mime-db "1.46.0"
-
-mime@1.6.0:
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1"
-  integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==
-
-mime@^2.4.4:
-  version "2.5.2"
-  resolved "https://registry.yarnpkg.com/mime/-/mime-2.5.2.tgz#6e3dc6cc2b9510643830e5f19d5cb753da5eeabe"
-  integrity sha512-tqkh47FzKeCPD2PUiPB6pkbMzsCasjxAfC62/Wap5qrUWcb+sFasXUC5I3gYM5iBM8v/Qpn4UK0x+j0iHyFPDg==
-
-mimic-fn@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
-  integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==
-
-min-indent@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869"
-  integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==
-
-mini-create-react-context@^0.4.0:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/mini-create-react-context/-/mini-create-react-context-0.4.1.tgz#072171561bfdc922da08a60c2197a497cc2d1d5e"
-  integrity sha512-YWCYEmd5CQeHGSAKrYvXgmzzkrvssZcuuQDDeqkT+PziKGMgE+0MCCtcKbROzocGBG1meBLl2FotlRwf4gAzbQ==
-  dependencies:
-    "@babel/runtime" "^7.12.1"
-    tiny-warning "^1.0.3"
-
-mini-css-extract-plugin@0.11.3:
-  version "0.11.3"
-  resolved "https://registry.yarnpkg.com/mini-css-extract-plugin/-/mini-css-extract-plugin-0.11.3.tgz#15b0910a7f32e62ffde4a7430cfefbd700724ea6"
-  integrity sha512-n9BA8LonkOkW1/zn+IbLPQmovsL0wMb9yx75fMJQZf2X1Zoec9yTZtyMePcyu19wPkmFbzZZA6fLTotpFhQsOA==
-  dependencies:
-    loader-utils "^1.1.0"
-    normalize-url "1.9.1"
-    schema-utils "^1.0.0"
-    webpack-sources "^1.1.0"
-
-minimalistic-assert@^1.0.0, minimalistic-assert@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz#2e194de044626d4a10e7f7fbc00ce73e83e4d5c7"
-  integrity sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==
-
-minimalistic-crypto-utils@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz#f6c00c1c0b082246e5c4d99dfb8c7c083b2b582a"
-  integrity sha1-9sAMHAsIIkblxNmd+4x8CDsrWCo=
-
-minimatch@3.0.4, minimatch@^3.0.4:
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
-  integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
-  dependencies:
-    brace-expansion "^1.1.7"
-
-minimist@^1.1.1, minimist@^1.2.0, minimist@^1.2.5:
-  version "1.2.5"
-  resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
-  integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
-
-minipass-collect@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/minipass-collect/-/minipass-collect-1.0.2.tgz#22b813bf745dc6edba2576b940022ad6edc8c617"
-  integrity sha512-6T6lH0H8OG9kITm/Jm6tdooIbogG9e0tLgpY6mphXSm/A9u8Nq1ryBG+Qspiub9LjWlBPsPS3tWQ/Botq4FdxA==
-  dependencies:
-    minipass "^3.0.0"
-
-minipass-flush@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/minipass-flush/-/minipass-flush-1.0.5.tgz#82e7135d7e89a50ffe64610a787953c4c4cbb373"
-  integrity sha512-JmQSYYpPUqX5Jyn1mXaRwOda1uQ8HP5KAT/oDSLCzt1BYRhQU0/hDtsB1ufZfEEzMZ9aAVmsBw8+FWsIXlClWw==
-  dependencies:
-    minipass "^3.0.0"
-
-minipass-pipeline@^1.2.2:
-  version "1.2.4"
-  resolved "https://registry.yarnpkg.com/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz#68472f79711c084657c067c5c6ad93cddea8214c"
-  integrity sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==
-  dependencies:
-    minipass "^3.0.0"
-
-minipass@^3.0.0, minipass@^3.1.1:
-  version "3.1.3"
-  resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.3.tgz#7d42ff1f39635482e15f9cdb53184deebd5815fd"
-  integrity sha512-Mgd2GdMVzY+x3IJ+oHnVM+KG3lA5c8tnabyJKmHSaG2kAGpudxuOf8ToDkhumF7UzME7DecbQE9uOZhNm7PuJg==
-  dependencies:
-    yallist "^4.0.0"
-
-minizlib@^2.1.1:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
-  integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==
-  dependencies:
-    minipass "^3.0.0"
-    yallist "^4.0.0"
-
-mississippi@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/mississippi/-/mississippi-3.0.0.tgz#ea0a3291f97e0b5e8776b363d5f0a12d94c67022"
-  integrity sha512-x471SsVjUtBRtcvd4BzKE9kFC+/2TeWgKCgw0bZcw1b9l2X3QX5vCWgF+KaZaYm87Ss//rHnWryupDrgLvmSkA==
-  dependencies:
-    concat-stream "^1.5.0"
-    duplexify "^3.4.2"
-    end-of-stream "^1.1.0"
-    flush-write-stream "^1.0.0"
-    from2 "^2.1.0"
-    parallel-transform "^1.1.0"
-    pump "^3.0.0"
-    pumpify "^1.3.3"
-    stream-each "^1.1.0"
-    through2 "^2.0.0"
-
-mixin-deep@^1.2.0:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566"
-  integrity sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA==
-  dependencies:
-    for-in "^1.0.2"
-    is-extendable "^1.0.1"
-
-mkdirp@^0.5.1, mkdirp@^0.5.3, mkdirp@^0.5.5, mkdirp@~0.5.1:
-  version "0.5.5"
-  resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
-  integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==
-  dependencies:
-    minimist "^1.2.5"
-
-mkdirp@^1.0.3, mkdirp@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
-  integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
-
-move-concurrently@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/move-concurrently/-/move-concurrently-1.0.1.tgz#be2c005fda32e0b29af1f05d7c4b33214c701f92"
-  integrity sha1-viwAX9oy4LKa8fBdfEszIUxwH5I=
-  dependencies:
-    aproba "^1.1.1"
-    copy-concurrently "^1.0.0"
-    fs-write-stream-atomic "^1.0.8"
-    mkdirp "^0.5.1"
-    rimraf "^2.5.4"
-    run-queue "^1.0.3"
-
-ms@2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
-  integrity sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=
-
-ms@2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.1.tgz#30a5864eb3ebb0a66f2ebe6d727af06a09d86e0a"
-  integrity sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==
-
-ms@2.1.2:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
-  integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
-
-ms@^2.1.1:
-  version "2.1.3"
-  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
-  integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
-
-multicast-dns-service-types@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/multicast-dns-service-types/-/multicast-dns-service-types-1.1.0.tgz#899f11d9686e5e05cb91b35d5f0e63b773cfc901"
-  integrity sha1-iZ8R2WhuXgXLkbNdXw5jt3PPyQE=
-
-multicast-dns@^6.0.1:
-  version "6.2.3"
-  resolved "https://registry.yarnpkg.com/multicast-dns/-/multicast-dns-6.2.3.tgz#a0ec7bd9055c4282f790c3c82f4e28db3b31b229"
-  integrity sha512-ji6J5enbMyGRHIAkAOu3WdV8nggqviKCEKtXcOqfphZZtQrmHKycfynJ2V7eVPUA4NhJ6V7Wf4TmGbTwKE9B6g==
-  dependencies:
-    dns-packet "^1.3.1"
-    thunky "^1.0.2"
-
-nan@^2.12.1:
-  version "2.14.2"
-  resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
-  integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==
-
-nanoid@^3.1.20:
-  version "3.1.20"
-  resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.20.tgz#badc263c6b1dcf14b71efaa85f6ab4c1d6cfc788"
-  integrity sha512-a1cQNyczgKbLX9jwbS/+d7W8fX/RfgYR7lVWwWOGIPNgK2m0MWvrGF6/m4kk6U3QcFMnZf3RIhL0v2Jgh/0Uxw==
-
-nanomatch@^1.2.9:
-  version "1.2.13"
-  resolved "https://registry.yarnpkg.com/nanomatch/-/nanomatch-1.2.13.tgz#b87a8aa4fc0de8fe6be88895b38983ff265bd119"
-  integrity sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA==
-  dependencies:
-    arr-diff "^4.0.0"
-    array-unique "^0.3.2"
-    define-property "^2.0.2"
-    extend-shallow "^3.0.2"
-    fragment-cache "^0.2.1"
-    is-windows "^1.0.2"
-    kind-of "^6.0.2"
-    object.pick "^1.3.0"
-    regex-not "^1.0.0"
-    snapdragon "^0.8.1"
-    to-regex "^3.0.1"
-
-native-url@^0.2.6:
-  version "0.2.6"
-  resolved "https://registry.yarnpkg.com/native-url/-/native-url-0.2.6.tgz#ca1258f5ace169c716ff44eccbddb674e10399ae"
-  integrity sha512-k4bDC87WtgrdD362gZz6zoiXQrl40kYlBmpfmSjwRO1VU0V5ccwJTlxuE72F6m3V0vc1xOf6n3UCP9QyerRqmA==
-  dependencies:
-    querystring "^0.2.0"
-
-natural-compare@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
-  integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=
-
-negotiator@0.6.2:
-  version "0.6.2"
-  resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
-  integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
-
-neo-async@^2.5.0, neo-async@^2.6.1, neo-async@^2.6.2:
-  version "2.6.2"
-  resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
-  integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==
-
-next-tick@~1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/next-tick/-/next-tick-1.0.0.tgz#ca86d1fe8828169b0120208e3dc8424b9db8342c"
-  integrity sha1-yobR/ogoFpsBICCOPchCS524NCw=
-
-nice-try@^1.0.4:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366"
-  integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==
-
-no-case@^3.0.4:
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/no-case/-/no-case-3.0.4.tgz#d361fd5c9800f558551a8369fc0dcd4662b6124d"
-  integrity sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==
-  dependencies:
-    lower-case "^2.0.2"
-    tslib "^2.0.3"
-
-node-forge@^0.10.0:
-  version "0.10.0"
-  resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
-  integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
-
-node-int64@^0.4.0:
-  version "0.4.0"
-  resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
-  integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs=
-
-node-libs-browser@^2.2.1:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/node-libs-browser/-/node-libs-browser-2.2.1.tgz#b64f513d18338625f90346d27b0d235e631f6425"
-  integrity sha512-h/zcD8H9kaDZ9ALUWwlBUDo6TKF8a7qBSCSEGfjTVIYeqsioSKaAX+BN7NgiMGp6iSIXZ3PxgCu8KS3b71YK5Q==
-  dependencies:
-    assert "^1.1.1"
-    browserify-zlib "^0.2.0"
-    buffer "^4.3.0"
-    console-browserify "^1.1.0"
-    constants-browserify "^1.0.0"
-    crypto-browserify "^3.11.0"
-    domain-browser "^1.1.1"
-    events "^3.0.0"
-    https-browserify "^1.0.0"
-    os-browserify "^0.3.0"
-    path-browserify "0.0.1"
-    process "^0.11.10"
-    punycode "^1.2.4"
-    querystring-es3 "^0.2.0"
-    readable-stream "^2.3.3"
-    stream-browserify "^2.0.1"
-    stream-http "^2.7.2"
-    string_decoder "^1.0.0"
-    timers-browserify "^2.0.4"
-    tty-browserify "0.0.0"
-    url "^0.11.0"
-    util "^0.11.0"
-    vm-browserify "^1.0.1"
-
-node-modules-regexp@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz#8d9dbe28964a4ac5712e9131642107c71e90ec40"
-  integrity sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=
-
-node-notifier@^8.0.0:
-  version "8.0.1"
-  resolved "https://registry.yarnpkg.com/node-notifier/-/node-notifier-8.0.1.tgz#f86e89bbc925f2b068784b31f382afdc6ca56be1"
-  integrity sha512-BvEXF+UmsnAfYfoapKM9nGxnP+Wn7P91YfXmrKnfcYCx6VBeoN5Ez5Ogck6I8Bi5k4RlpqRYaw75pAwzX9OphA==
-  dependencies:
-    growly "^1.3.0"
-    is-wsl "^2.2.0"
-    semver "^7.3.2"
-    shellwords "^0.1.1"
-    uuid "^8.3.0"
-    which "^2.0.2"
-
-node-releases@^1.1.61, node-releases@^1.1.70:
-  version "1.1.70"
-  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.70.tgz#66e0ed0273aa65666d7fe78febe7634875426a08"
-  integrity sha512-Slf2s69+2/uAD79pVVQo8uSiC34+g8GWY8UH2Qtqv34ZfhYrxpYpfzs9Js9d6O0mbDmALuxaTlplnBTnSELcrw==
-
-normalize-package-data@^2.3.2, normalize-package-data@^2.5.0:
-  version "2.5.0"
-  resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
-  integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==
-  dependencies:
-    hosted-git-info "^2.1.4"
-    resolve "^1.10.0"
-    semver "2 || 3 || 4 || 5"
-    validate-npm-package-license "^3.0.1"
-
-normalize-path@^2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-2.1.1.tgz#1ab28b556e198363a8c1a6f7e6fa20137fe6aed9"
-  integrity sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=
-  dependencies:
-    remove-trailing-separator "^1.0.1"
-
-normalize-path@^3.0.0, normalize-path@~3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
-  integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
-
-normalize-range@^0.1.2:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/normalize-range/-/normalize-range-0.1.2.tgz#2d10c06bdfd312ea9777695a4d28439456b75942"
-  integrity sha1-LRDAa9/TEuqXd2laTShDlFa3WUI=
-
-normalize-url@1.9.1:
-  version "1.9.1"
-  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-1.9.1.tgz#2cc0d66b31ea23036458436e3620d85954c66c3c"
-  integrity sha1-LMDWazHqIwNkWENuNiDYWVTGbDw=
-  dependencies:
-    object-assign "^4.0.1"
-    prepend-http "^1.0.0"
-    query-string "^4.1.0"
-    sort-keys "^1.0.0"
-
-normalize-url@^3.0.0:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-3.3.0.tgz#b2e1c4dc4f7c6d57743df733a4f5978d18650559"
-  integrity sha512-U+JJi7duF1o+u2pynbp2zXDW2/PADgC30f0GsHZtRh+HOcXHnw137TrNlyxxRvWW5fjKd3bcLHPxofWuCjaeZg==
-
-npm-run-path@^2.0.0:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-2.0.2.tgz#35a9232dfa35d7067b4cb2ddf2357b1871536c5f"
-  integrity sha1-NakjLfo11wZ7TLLd8jV7GHFTbF8=
-  dependencies:
-    path-key "^2.0.0"
-
-npm-run-path@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
-  integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
-  dependencies:
-    path-key "^3.0.0"
-
-nth-check@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-1.0.2.tgz#b2bd295c37e3dd58a3bf0700376663ba4d9cf05c"
-  integrity sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==
-  dependencies:
-    boolbase "~1.0.0"
-
-num2fraction@^1.2.2:
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/num2fraction/-/num2fraction-1.2.2.tgz#6f682b6a027a4e9ddfa4564cd2589d1d4e669ede"
-  integrity sha1-b2gragJ6Tp3fpFZM0lidHU5mnt4=
-
-nwsapi@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.0.tgz#204879a9e3d068ff2a55139c2c772780681a38b7"
-  integrity sha512-h2AatdwYH+JHiZpv7pt/gSX1XoRGb7L/qSIeuqA6GwYoF9w1vP1cw42TO0aI2pNyshRK5893hNSl+1//vHK7hQ==
-
-oauth-sign@~0.9.0:
-  version "0.9.0"
-  resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.9.0.tgz#47a7b016baa68b5fa0ecf3dee08a85c679ac6455"
-  integrity sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==
-
-object-assign@^4.0.1, object-assign@^4.1.0, object-assign@^4.1.1:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
-  integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
-
-object-copy@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/object-copy/-/object-copy-0.1.0.tgz#7e7d858b781bd7c991a41ba975ed3812754e998c"
-  integrity sha1-fn2Fi3gb18mRpBupde04EnVOmYw=
-  dependencies:
-    copy-descriptor "^0.1.0"
-    define-property "^0.2.5"
-    kind-of "^3.0.3"
-
-object-inspect@^1.8.0, object-inspect@^1.9.0:
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.9.0.tgz#c90521d74e1127b67266ded3394ad6116986533a"
-  integrity sha512-i3Bp9iTqwhaLZBxGkRfo5ZbE07BQRT7MGu8+nNgwW9ItGp1TzCTw2DLEoWwjClxBjOFI/hWljTAmYGCEwmtnOw==
-
-object-is@^1.0.1:
-  version "1.1.5"
-  resolved "https://registry.yarnpkg.com/object-is/-/object-is-1.1.5.tgz#b9deeaa5fc7f1846a0faecdceec138e5778f53ac"
-  integrity sha512-3cyDsyHgtmi7I7DfSSI2LDp6SK2lwvtbg0p0R1e0RvTqF5ceGx+K2dfSjm1bKDMVCFEDAQvy+o8c6a7VujOddw==
-  dependencies:
-    call-bind "^1.0.2"
-    define-properties "^1.1.3"
-
-object-keys@^1.0.12, object-keys@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e"
-  integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==
-
-object-visit@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/object-visit/-/object-visit-1.0.1.tgz#f79c4493af0c5377b59fe39d395e41042dd045bb"
-  integrity sha1-95xEk68MU3e1n+OdOV5BBC3QRbs=
-  dependencies:
-    isobject "^3.0.0"
-
-object.assign@^4.1.0, object.assign@^4.1.1, object.assign@^4.1.2:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.2.tgz#0ed54a342eceb37b38ff76eb831a0e788cb63940"
-  integrity sha512-ixT2L5THXsApyiUPYKmW+2EHpXXe5Ii3M+f4e+aJFAHao5amFRW6J0OO6c/LU8Be47utCx2GL89hxGB6XSmKuQ==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-    has-symbols "^1.0.1"
-    object-keys "^1.1.1"
-
-object.entries@^1.1.0, object.entries@^1.1.2:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/object.entries/-/object.entries-1.1.3.tgz#c601c7f168b62374541a07ddbd3e2d5e4f7711a6"
-  integrity sha512-ym7h7OZebNS96hn5IJeyUmaWhaSM4SVtAPPfNLQEI2MYWCO2egsITb9nab2+i/Pwibx+R0mtn+ltKJXRSeTMGg==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.1"
-    has "^1.0.3"
-
-object.fromentries@^2.0.2:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/object.fromentries/-/object.fromentries-2.0.4.tgz#26e1ba5c4571c5c6f0890cef4473066456a120b8"
-  integrity sha512-EsFBshs5RUUpQEY1D4q/m59kMfz4YJvxuNCJcv/jWwOJr34EaVnG11ZrZa0UHB3wnzV1wx8m58T4hQL8IuNXlQ==
-  dependencies:
-    call-bind "^1.0.2"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.2"
-    has "^1.0.3"
-
-object.getownpropertydescriptors@^2.0.3, object.getownpropertydescriptors@^2.1.0:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/object.getownpropertydescriptors/-/object.getownpropertydescriptors-2.1.2.tgz#1bd63aeacf0d5d2d2f31b5e393b03a7c601a23f7"
-  integrity sha512-WtxeKSzfBjlzL+F9b7M7hewDzMwy+C8NRssHd1YrNlzHzIDrXcXiNOMrezdAEM4UXixgV+vvnyBeN7Rygl2ttQ==
-  dependencies:
-    call-bind "^1.0.2"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.2"
-
-object.pick@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/object.pick/-/object.pick-1.3.0.tgz#87a10ac4c1694bd2e1cbf53591a66141fb5dd747"
-  integrity sha1-h6EKxMFpS9Lhy/U1kaZhQftd10c=
-  dependencies:
-    isobject "^3.0.1"
-
-object.values@^1.1.0, object.values@^1.1.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/object.values/-/object.values-1.1.2.tgz#7a2015e06fcb0f546bd652486ce8583a4731c731"
-  integrity sha512-MYC0jvJopr8EK6dPBiO8Nb9mvjdypOachO5REGk6MXzujbBrAisKo3HmdEI6kZDL6fC31Mwee/5YbtMebixeag==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.1"
-    has "^1.0.3"
-
-obuf@^1.0.0, obuf@^1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/obuf/-/obuf-1.1.2.tgz#09bea3343d41859ebd446292d11c9d4db619084e"
-  integrity sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==
-
-on-finished@~2.3.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/on-finished/-/on-finished-2.3.0.tgz#20f1336481b083cd75337992a16971aa2d906947"
-  integrity sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=
-  dependencies:
-    ee-first "1.1.1"
-
-on-headers@~1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/on-headers/-/on-headers-1.0.2.tgz#772b0ae6aaa525c399e489adfad90c403eb3c28f"
-  integrity sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==
-
-once@^1.3.0, once@^1.3.1, once@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
-  integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E=
-  dependencies:
-    wrappy "1"
-
-onetime@^5.1.0:
-  version "5.1.2"
-  resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
-  integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
-  dependencies:
-    mimic-fn "^2.1.0"
-
-open@^7.0.2:
-  version "7.4.2"
-  resolved "https://registry.yarnpkg.com/open/-/open-7.4.2.tgz#b8147e26dcf3e426316c730089fd71edd29c2321"
-  integrity sha512-MVHddDVweXZF3awtlAS+6pgKLlm/JgxZ90+/NBurBoQctVOOB/zDdVjcyPzQ+0laDGbsWgrRkflI65sQeOgT9Q==
-  dependencies:
-    is-docker "^2.0.0"
-    is-wsl "^2.1.1"
-
-opn@^5.5.0:
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/opn/-/opn-5.5.0.tgz#fc7164fab56d235904c51c3b27da6758ca3b9bfc"
-  integrity sha512-PqHpggC9bLV0VeWcdKhkpxY+3JTzetLSqTCWL/z/tFIbI6G8JCjondXklT1JinczLz2Xib62sSp0T/gKT4KksA==
-  dependencies:
-    is-wsl "^1.1.0"
-
-optimize-css-assets-webpack-plugin@5.0.4:
-  version "5.0.4"
-  resolved "https://registry.yarnpkg.com/optimize-css-assets-webpack-plugin/-/optimize-css-assets-webpack-plugin-5.0.4.tgz#85883c6528aaa02e30bbad9908c92926bb52dc90"
-  integrity sha512-wqd6FdI2a5/FdoiCNNkEvLeA//lHHfG24Ln2Xm2qqdIk4aOlsR18jwpyOihqQ8849W3qu2DX8fOYxpvTMj+93A==
-  dependencies:
-    cssnano "^4.1.10"
-    last-call-webpack-plugin "^3.0.0"
-
-optionator@^0.8.1:
-  version "0.8.3"
-  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"
-  integrity sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==
-  dependencies:
-    deep-is "~0.1.3"
-    fast-levenshtein "~2.0.6"
-    levn "~0.3.0"
-    prelude-ls "~1.1.2"
-    type-check "~0.3.2"
-    word-wrap "~1.2.3"
-
-optionator@^0.9.1:
-  version "0.9.1"
-  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.1.tgz#4f236a6373dae0566a6d43e1326674f50c291499"
-  integrity sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==
-  dependencies:
-    deep-is "^0.1.3"
-    fast-levenshtein "^2.0.6"
-    levn "^0.4.1"
-    prelude-ls "^1.2.1"
-    type-check "^0.4.0"
-    word-wrap "^1.2.3"
-
-original@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/original/-/original-1.0.2.tgz#e442a61cffe1c5fd20a65f3261c26663b303f25f"
-  integrity sha512-hyBVl6iqqUOJ8FqRe+l/gS8H+kKYjrEndd5Pm1MfBtsEKA038HkkdbAl/72EAXGyonD/PFsvmVG+EvcIpliMBg==
-  dependencies:
-    url-parse "^1.4.3"
-
-os-browserify@^0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/os-browserify/-/os-browserify-0.3.0.tgz#854373c7f5c2315914fc9bfc6bd8238fdda1ec27"
-  integrity sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=
-
-p-each-series@^2.1.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/p-each-series/-/p-each-series-2.2.0.tgz#105ab0357ce72b202a8a8b94933672657b5e2a9a"
-  integrity sha512-ycIL2+1V32th+8scbpTvyHNaHe02z0sjgh91XXjAk+ZeXoPN4Z46DVUnzdso0aX4KckKw0FNNFHdjZ2UsZvxiA==
-
-p-finally@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-finally/-/p-finally-1.0.0.tgz#3fbcfb15b899a44123b34b6dcc18b724336a2cae"
-  integrity sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=
-
-p-limit@^1.1.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-1.3.0.tgz#b86bd5f0c25690911c7590fcbfc2010d54b3ccb8"
-  integrity sha512-vvcXsLAJ9Dr5rQOPk7toZQZJApBl2K4J6dANSsEuh6QI41JYcsS/qhTGa9ErIUUgK3WNQoJYvylxvjqmiqEA9Q==
-  dependencies:
-    p-try "^1.0.0"
-
-p-limit@^2.0.0, p-limit@^2.2.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1"
-  integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==
-  dependencies:
-    p-try "^2.0.0"
-
-p-limit@^3.0.2:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b"
-  integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==
-  dependencies:
-    yocto-queue "^0.1.0"
-
-p-locate@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-2.0.0.tgz#20a0103b222a70c8fd39cc2e580680f3dde5ec43"
-  integrity sha1-IKAQOyIqcMj9OcwuWAaA893l7EM=
-  dependencies:
-    p-limit "^1.1.0"
-
-p-locate@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-3.0.0.tgz#322d69a05c0264b25997d9f40cd8a891ab0064a4"
-  integrity sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==
-  dependencies:
-    p-limit "^2.0.0"
-
-p-locate@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07"
-  integrity sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==
-  dependencies:
-    p-limit "^2.2.0"
-
-p-map@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175"
-  integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==
-
-p-map@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/p-map/-/p-map-4.0.0.tgz#bb2f95a5eda2ec168ec9274e06a747c3e2904d2b"
-  integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==
-  dependencies:
-    aggregate-error "^3.0.0"
-
-p-retry@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/p-retry/-/p-retry-3.0.1.tgz#316b4c8893e2c8dc1cfa891f406c4b422bebf328"
-  integrity sha512-XE6G4+YTTkT2a0UWb2kjZe8xNwf8bIbnqpc/IS/idOBVhyves0mK5OJgeocjx7q5pvX/6m23xuzVPYT1uGM73w==
-  dependencies:
-    retry "^0.12.0"
-
-p-try@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-try/-/p-try-1.0.0.tgz#cbc79cdbaf8fd4228e13f621f2b1a237c1b207b3"
-  integrity sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=
-
-p-try@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
-  integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==
-
-pako@~1.0.5:
-  version "1.0.11"
-  resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.11.tgz#6c9599d340d54dfd3946380252a35705a6b992bf"
-  integrity sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==
-
-parallel-transform@^1.1.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/parallel-transform/-/parallel-transform-1.2.0.tgz#9049ca37d6cb2182c3b1d2c720be94d14a5814fc"
-  integrity sha512-P2vSmIu38uIlvdcU7fDkyrxj33gTUy/ABO5ZUbGowxNCopBq/OoD42bP4UmMrJoPyk4Uqf0mu3mtWBhHCZD8yg==
-  dependencies:
-    cyclist "^1.0.1"
-    inherits "^2.0.3"
-    readable-stream "^2.1.5"
-
-param-case@^3.0.3:
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/param-case/-/param-case-3.0.4.tgz#7d17fe4aa12bde34d4a77d91acfb6219caad01c5"
-  integrity sha512-RXlj7zCYokReqWpOPH9oYivUzLYZ5vAPIfEmCTNViosC78F8F0H9y7T7gG2M39ymgutxF5gcFEsyZQSph9Bp3A==
-  dependencies:
-    dot-case "^3.0.4"
-    tslib "^2.0.3"
-
-parent-module@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
-  integrity sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==
-  dependencies:
-    callsites "^3.0.0"
-
-parse-asn1@^5.0.0, parse-asn1@^5.1.5:
-  version "5.1.6"
-  resolved "https://registry.yarnpkg.com/parse-asn1/-/parse-asn1-5.1.6.tgz#385080a3ec13cb62a62d39409cb3e88844cdaed4"
-  integrity sha512-RnZRo1EPU6JBnra2vGHj0yhp6ebyjBZpmUCLHWiFhxlzvBCCpAuZ7elsBp1PVAbQN0/04VD/19rfzlBSwLstMw==
-  dependencies:
-    asn1.js "^5.2.0"
-    browserify-aes "^1.0.0"
-    evp_bytestokey "^1.0.0"
-    pbkdf2 "^3.0.3"
-    safe-buffer "^5.1.1"
-
-parse-json@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-2.2.0.tgz#f480f40434ef80741f8469099f8dea18f55a4dc9"
-  integrity sha1-9ID0BDTvgHQfhGkJn43qGPVaTck=
-  dependencies:
-    error-ex "^1.2.0"
-
-parse-json@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-4.0.0.tgz#be35f5425be1f7f6c747184f98a788cb99477ee0"
-  integrity sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=
-  dependencies:
-    error-ex "^1.3.1"
-    json-parse-better-errors "^1.0.1"
-
-parse-json@^5.0.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd"
-  integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==
-  dependencies:
-    "@babel/code-frame" "^7.0.0"
-    error-ex "^1.3.1"
-    json-parse-even-better-errors "^2.3.0"
-    lines-and-columns "^1.1.6"
-
-parse5@5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/parse5/-/parse5-5.1.1.tgz#f68e4e5ba1852ac2cadc00f4555fff6c2abb6178"
-  integrity sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug==
-
-parseurl@~1.3.2, parseurl@~1.3.3:
-  version "1.3.3"
-  resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
-  integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==
-
-pascal-case@^3.1.2:
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/pascal-case/-/pascal-case-3.1.2.tgz#b48e0ef2b98e205e7c1dae747d0b1508237660eb"
-  integrity sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==
-  dependencies:
-    no-case "^3.0.4"
-    tslib "^2.0.3"
-
-pascalcase@^0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14"
-  integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ=
-
-path-browserify@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-0.0.1.tgz#e6c4ddd7ed3aa27c68a20cc4e50e1a4ee83bbc4a"
-  integrity sha512-BapA40NHICOS+USX9SN4tyhq+A2RrN/Ws5F0Z5aMHDp98Fl86lX8Oti8B7uN93L4Ifv4fHOEA+pQw87gmMO/lQ==
-
-path-dirname@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/path-dirname/-/path-dirname-1.0.2.tgz#cc33d24d525e099a5388c0336c6e32b9160609e0"
-  integrity sha1-zDPSTVJeCZpTiMAzbG4yuRYGCeA=
-
-path-exists@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-3.0.0.tgz#ce0ebeaa5f78cb18925ea7d810d7b59b010fd515"
-  integrity sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=
-
-path-exists@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3"
-  integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==
-
-path-is-absolute@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
-  integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
-
-path-is-inside@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/path-is-inside/-/path-is-inside-1.0.2.tgz#365417dede44430d1c11af61027facf074bdfc53"
-  integrity sha1-NlQX3t5EQw0cEa9hAn+s8HS9/FM=
-
-path-key@^2.0.0, path-key@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40"
-  integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=
-
-path-key@^3.0.0, path-key@^3.1.0:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375"
-  integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
-
-path-parse@^1.0.6:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c"
-  integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==
-
-path-to-regexp@0.1.7:
-  version "0.1.7"
-  resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.7.tgz#df604178005f522f15eb4490e7247a1bfaa67f8c"
-  integrity sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=
-
-path-to-regexp@^1.7.0:
-  version "1.8.0"
-  resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-1.8.0.tgz#887b3ba9d84393e87a0a0b9f4cb756198b53548a"
-  integrity sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==
-  dependencies:
-    isarray "0.0.1"
-
-path-type@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/path-type/-/path-type-2.0.0.tgz#f012ccb8415b7096fc2daa1054c3d72389594c73"
-  integrity sha1-8BLMuEFbcJb8LaoQVMPXI4lZTHM=
-  dependencies:
-    pify "^2.0.0"
-
-path-type@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
-  integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
-
-pbkdf2@^3.0.3:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/pbkdf2/-/pbkdf2-3.1.1.tgz#cb8724b0fada984596856d1a6ebafd3584654b94"
-  integrity sha512-4Ejy1OPxi9f2tt1rRV7Go7zmfDQ+ZectEQz3VGUQhgq62HtIRPDyG/JtnwIxs6x3uNMwo2V7q1fMvKjb+Tnpqg==
-  dependencies:
-    create-hash "^1.1.2"
-    create-hmac "^1.1.4"
-    ripemd160 "^2.0.1"
-    safe-buffer "^5.0.1"
-    sha.js "^2.4.8"
-
-performance-now@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
-  integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
-
-picomatch@^2.0.4, picomatch@^2.0.5, picomatch@^2.2.1, picomatch@^2.2.2:
-  version "2.2.2"
-  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.2.2.tgz#21f333e9b6b8eaff02468f5146ea406d345f4dad"
-  integrity sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==
-
-pify@^2.0.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/pify/-/pify-2.3.0.tgz#ed141a6ac043a849ea588498e7dca8b15330e90c"
-  integrity sha1-7RQaasBDqEnqWISY59yosVMw6Qw=
-
-pify@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231"
-  integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==
-
-pinkie-promise@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/pinkie-promise/-/pinkie-promise-2.0.1.tgz#2135d6dfa7a358c069ac9b178776288228450ffa"
-  integrity sha1-ITXW36ejWMBprJsXh3YogihFD/o=
-  dependencies:
-    pinkie "^2.0.0"
-
-pinkie@^2.0.0:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/pinkie/-/pinkie-2.0.4.tgz#72556b80cfa0d48a974e80e77248e80ed4f7f870"
-  integrity sha1-clVrgM+g1IqXToDnckjoDtT3+HA=
-
-pirates@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/pirates/-/pirates-4.0.1.tgz#643a92caf894566f91b2b986d2c66950a8e2fb87"
-  integrity sha512-WuNqLTbMI3tmfef2TKxlQmAiLHKtFhlsCZnPIpuv2Ow0RDVO8lfy1Opf4NUzlMXLjPl+Men7AuVdX6TA+s+uGA==
-  dependencies:
-    node-modules-regexp "^1.0.0"
-
-pkg-dir@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-2.0.0.tgz#f6d5d1109e19d63edf428e0bd57e12777615334b"
-  integrity sha1-9tXREJ4Z1j7fQo4L1X4Sd3YVM0s=
-  dependencies:
-    find-up "^2.1.0"
-
-pkg-dir@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-3.0.0.tgz#2749020f239ed990881b1f71210d51eb6523bea3"
-  integrity sha512-/E57AYkoeQ25qkxMj5PBOVgF8Kiu/h7cYS30Z5+R7WaiCCBfLq58ZI/dSeaEKb9WVJV5n/03QwrN3IeWIFllvw==
-  dependencies:
-    find-up "^3.0.0"
-
-pkg-dir@^4.1.0, pkg-dir@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3"
-  integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==
-  dependencies:
-    find-up "^4.0.0"
-
-pkg-up@3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/pkg-up/-/pkg-up-3.1.0.tgz#100ec235cc150e4fd42519412596a28512a0def5"
-  integrity sha512-nDywThFk1i4BQK4twPQ6TA4RT8bDY96yeuCVBWL3ePARCiEKDRSrNGbFIgUJpLp+XeIR65v8ra7WuJOFUBtkMA==
-  dependencies:
-    find-up "^3.0.0"
-
-pnp-webpack-plugin@1.6.4:
-  version "1.6.4"
-  resolved "https://registry.yarnpkg.com/pnp-webpack-plugin/-/pnp-webpack-plugin-1.6.4.tgz#c9711ac4dc48a685dabafc86f8b6dd9f8df84149"
-  integrity sha512-7Wjy+9E3WwLOEL30D+m8TSTF7qJJUJLONBnwQp0518siuMxUQUbgZwssaFX+QKlZkjHZcw/IpZCt/H0srrntSg==
-  dependencies:
-    ts-pnp "^1.1.6"
-
-popmotion@9.2.1:
-  version "9.2.1"
-  resolved "https://registry.yarnpkg.com/popmotion/-/popmotion-9.2.1.tgz#8bc19214a4f0ba7925a901455d0996131cbec6dc"
-  integrity sha512-kplHK5z2LwYkUXNMCC4+tSYuuAXcG3oatKdsEzJzc1r0I2wM5UnYKITO1ZUnmmFy84VJqIZuoBXwJrWuZuAKkg==
-  dependencies:
-    framesync "5.1.0"
-    hey-listen "^1.0.8"
-    style-value-types "4.0.3"
-    tslib "^1.10.0"
-
-portfinder@^1.0.26:
-  version "1.0.28"
-  resolved "https://registry.yarnpkg.com/portfinder/-/portfinder-1.0.28.tgz#67c4622852bd5374dd1dd900f779f53462fac778"
-  integrity sha512-Se+2isanIcEqf2XMHjyUKskczxbPH7dQnlMjXX6+dybayyHvAf/TCgyMRlzf/B6QDhAEFOGes0pzRo3by4AbMA==
-  dependencies:
-    async "^2.6.2"
-    debug "^3.1.1"
-    mkdirp "^0.5.5"
-
-posix-character-classes@^0.1.0:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab"
-  integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
-
-postcss-attribute-case-insensitive@^4.0.1:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-attribute-case-insensitive/-/postcss-attribute-case-insensitive-4.0.2.tgz#d93e46b504589e94ac7277b0463226c68041a880"
-  integrity sha512-clkFxk/9pcdb4Vkn0hAHq3YnxBQ2p0CGD1dy24jN+reBck+EWxMbxSUqN4Yj7t0w8csl87K6p0gxBe1utkJsYA==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-selector-parser "^6.0.2"
-
-postcss-browser-comments@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-browser-comments/-/postcss-browser-comments-3.0.0.tgz#1248d2d935fb72053c8e1f61a84a57292d9f65e9"
-  integrity sha512-qfVjLfq7HFd2e0HW4s1dvU8X080OZdG46fFbIBFjW7US7YPDcWfRvdElvwMJr2LI6hMmD+7LnH2HcmXTs+uOig==
-  dependencies:
-    postcss "^7"
-
-postcss-calc@^7.0.1:
-  version "7.0.5"
-  resolved "https://registry.yarnpkg.com/postcss-calc/-/postcss-calc-7.0.5.tgz#f8a6e99f12e619c2ebc23cf6c486fdc15860933e"
-  integrity sha512-1tKHutbGtLtEZF6PT4JSihCHfIVldU72mZ8SdZHIYriIZ9fh9k9aWSppaT8rHsyI3dX+KSR+W+Ix9BMY3AODrg==
-  dependencies:
-    postcss "^7.0.27"
-    postcss-selector-parser "^6.0.2"
-    postcss-value-parser "^4.0.2"
-
-postcss-color-functional-notation@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-color-functional-notation/-/postcss-color-functional-notation-2.0.1.tgz#5efd37a88fbabeb00a2966d1e53d98ced93f74e0"
-  integrity sha512-ZBARCypjEDofW4P6IdPVTLhDNXPRn8T2s1zHbZidW6rPaaZvcnCS2soYFIQJrMZSxiePJ2XIYTlcb2ztr/eT2g==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-color-gray@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-color-gray/-/postcss-color-gray-5.0.0.tgz#532a31eb909f8da898ceffe296fdc1f864be8547"
-  integrity sha512-q6BuRnAGKM/ZRpfDascZlIZPjvwsRye7UDNalqVz3s7GDxMtqPY6+Q871liNxsonUw8oC61OG+PSaysYpl1bnw==
-  dependencies:
-    "@csstools/convert-colors" "^1.4.0"
-    postcss "^7.0.5"
-    postcss-values-parser "^2.0.0"
-
-postcss-color-hex-alpha@^5.0.3:
-  version "5.0.3"
-  resolved "https://registry.yarnpkg.com/postcss-color-hex-alpha/-/postcss-color-hex-alpha-5.0.3.tgz#a8d9ca4c39d497c9661e374b9c51899ef0f87388"
-  integrity sha512-PF4GDel8q3kkreVXKLAGNpHKilXsZ6xuu+mOQMHWHLPNyjiUBOr75sp5ZKJfmv1MCus5/DWUGcK9hm6qHEnXYw==
-  dependencies:
-    postcss "^7.0.14"
-    postcss-values-parser "^2.0.1"
-
-postcss-color-mod-function@^3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/postcss-color-mod-function/-/postcss-color-mod-function-3.0.3.tgz#816ba145ac11cc3cb6baa905a75a49f903e4d31d"
-  integrity sha512-YP4VG+xufxaVtzV6ZmhEtc+/aTXH3d0JLpnYfxqTvwZPbJhWqp8bSY3nfNzNRFLgB4XSaBA82OE4VjOOKpCdVQ==
-  dependencies:
-    "@csstools/convert-colors" "^1.4.0"
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-color-rebeccapurple@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-color-rebeccapurple/-/postcss-color-rebeccapurple-4.0.1.tgz#c7a89be872bb74e45b1e3022bfe5748823e6de77"
-  integrity sha512-aAe3OhkS6qJXBbqzvZth2Au4V3KieR5sRQ4ptb2b2O8wgvB3SJBsdG+jsn2BZbbwekDG8nTfcCNKcSfe/lEy8g==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-colormin@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/postcss-colormin/-/postcss-colormin-4.0.3.tgz#ae060bce93ed794ac71264f08132d550956bd381"
-  integrity sha512-WyQFAdDZpExQh32j0U0feWisZ0dmOtPl44qYmJKkq9xFWY3p+4qnRzCHeNrkeRhwPHz9bQ3mo0/yVkaply0MNw==
-  dependencies:
-    browserslist "^4.0.0"
-    color "^3.0.0"
-    has "^1.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-convert-values@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-convert-values/-/postcss-convert-values-4.0.1.tgz#ca3813ed4da0f812f9d43703584e449ebe189a7f"
-  integrity sha512-Kisdo1y77KUC0Jmn0OXU/COOJbzM8cImvw1ZFsBgBgMgb1iL23Zs/LXRe3r+EZqM3vGYKdQ2YJVQ5VkJI+zEJQ==
-  dependencies:
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-custom-media@^7.0.8:
-  version "7.0.8"
-  resolved "https://registry.yarnpkg.com/postcss-custom-media/-/postcss-custom-media-7.0.8.tgz#fffd13ffeffad73621be5f387076a28b00294e0c"
-  integrity sha512-c9s5iX0Ge15o00HKbuRuTqNndsJUbaXdiNsksnVH8H4gdc+zbLzr/UasOwNG6CTDpLFekVY4672eWdiiWu2GUg==
-  dependencies:
-    postcss "^7.0.14"
-
-postcss-custom-properties@^8.0.11:
-  version "8.0.11"
-  resolved "https://registry.yarnpkg.com/postcss-custom-properties/-/postcss-custom-properties-8.0.11.tgz#2d61772d6e92f22f5e0d52602df8fae46fa30d97"
-  integrity sha512-nm+o0eLdYqdnJ5abAJeXp4CEU1c1k+eB2yMCvhgzsds/e0umabFrN6HoTy/8Q4K5ilxERdl/JD1LO5ANoYBeMA==
-  dependencies:
-    postcss "^7.0.17"
-    postcss-values-parser "^2.0.1"
-
-postcss-custom-selectors@^5.1.2:
-  version "5.1.2"
-  resolved "https://registry.yarnpkg.com/postcss-custom-selectors/-/postcss-custom-selectors-5.1.2.tgz#64858c6eb2ecff2fb41d0b28c9dd7b3db4de7fba"
-  integrity sha512-DSGDhqinCqXqlS4R7KGxL1OSycd1lydugJ1ky4iRXPHdBRiozyMHrdu0H3o7qNOCiZwySZTUI5MV0T8QhCLu+w==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-selector-parser "^5.0.0-rc.3"
-
-postcss-dir-pseudo-class@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-dir-pseudo-class/-/postcss-dir-pseudo-class-5.0.0.tgz#6e3a4177d0edb3abcc85fdb6fbb1c26dabaeaba2"
-  integrity sha512-3pm4oq8HYWMZePJY+5ANriPs3P07q+LW6FAdTlkFH2XqDdP4HeeJYMOzn0HYLhRSjBO3fhiqSwwU9xEULSrPgw==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-selector-parser "^5.0.0-rc.3"
-
-postcss-discard-comments@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-discard-comments/-/postcss-discard-comments-4.0.2.tgz#1fbabd2c246bff6aaad7997b2b0918f4d7af4033"
-  integrity sha512-RJutN259iuRf3IW7GZyLM5Sw4GLTOH8FmsXBnv8Ab/Tc2k4SR4qbV4DNbyyY4+Sjo362SyDmW2DQ7lBSChrpkg==
-  dependencies:
-    postcss "^7.0.0"
-
-postcss-discard-duplicates@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-discard-duplicates/-/postcss-discard-duplicates-4.0.2.tgz#3fe133cd3c82282e550fc9b239176a9207b784eb"
-  integrity sha512-ZNQfR1gPNAiXZhgENFfEglF93pciw0WxMkJeVmw8eF+JZBbMD7jp6C67GqJAXVZP2BWbOztKfbsdmMp/k8c6oQ==
-  dependencies:
-    postcss "^7.0.0"
-
-postcss-discard-empty@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-discard-empty/-/postcss-discard-empty-4.0.1.tgz#c8c951e9f73ed9428019458444a02ad90bb9f765"
-  integrity sha512-B9miTzbznhDjTfjvipfHoqbWKwd0Mj+/fL5s1QOz06wufguil+Xheo4XpOnc4NqKYBCNqqEzgPv2aPBIJLox0w==
-  dependencies:
-    postcss "^7.0.0"
-
-postcss-discard-overridden@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-discard-overridden/-/postcss-discard-overridden-4.0.1.tgz#652aef8a96726f029f5e3e00146ee7a4e755ff57"
-  integrity sha512-IYY2bEDD7g1XM1IDEsUT4//iEYCxAmP5oDSFMVU/JVvT7gh+l4fmjciLqGgwjdWpQIdb0Che2VX00QObS5+cTg==
-  dependencies:
-    postcss "^7.0.0"
-
-postcss-double-position-gradients@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-double-position-gradients/-/postcss-double-position-gradients-1.0.0.tgz#fc927d52fddc896cb3a2812ebc5df147e110522e"
-  integrity sha512-G+nV8EnQq25fOI8CH/B6krEohGWnF5+3A6H/+JEpOncu5dCnkS1QQ6+ct3Jkaepw1NGVqqOZH6lqrm244mCftA==
-  dependencies:
-    postcss "^7.0.5"
-    postcss-values-parser "^2.0.0"
-
-postcss-env-function@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-env-function/-/postcss-env-function-2.0.2.tgz#0f3e3d3c57f094a92c2baf4b6241f0b0da5365d7"
-  integrity sha512-rwac4BuZlITeUbiBq60h/xbLzXY43qOsIErngWa4l7Mt+RaSkT7QBjXVGTcBHupykkblHMDrBFh30zchYPaOUw==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-flexbugs-fixes@4.2.1:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/postcss-flexbugs-fixes/-/postcss-flexbugs-fixes-4.2.1.tgz#9218a65249f30897deab1033aced8578562a6690"
-  integrity sha512-9SiofaZ9CWpQWxOwRh1b/r85KD5y7GgvsNt1056k6OYLvWUun0czCvogfJgylC22uJTwW1KzY3Gz65NZRlvoiQ==
-  dependencies:
-    postcss "^7.0.26"
-
-postcss-focus-visible@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-focus-visible/-/postcss-focus-visible-4.0.0.tgz#477d107113ade6024b14128317ade2bd1e17046e"
-  integrity sha512-Z5CkWBw0+idJHSV6+Bgf2peDOFf/x4o+vX/pwcNYrWpXFrSfTkQ3JQ1ojrq9yS+upnAlNRHeg8uEwFTgorjI8g==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-focus-within@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-focus-within/-/postcss-focus-within-3.0.0.tgz#763b8788596cee9b874c999201cdde80659ef680"
-  integrity sha512-W0APui8jQeBKbCGZudW37EeMCjDeVxKgiYfIIEo8Bdh5SpB9sxds/Iq8SEuzS0Q4YFOlG7EPFulbbxujpkrV2w==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-font-variant@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-font-variant/-/postcss-font-variant-4.0.1.tgz#42d4c0ab30894f60f98b17561eb5c0321f502641"
-  integrity sha512-I3ADQSTNtLTTd8uxZhtSOrTCQ9G4qUVKPjHiDk0bV75QSxXjVWiJVJ2VLdspGUi9fbW9BcjKJoRvxAH1pckqmA==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-gap-properties@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-gap-properties/-/postcss-gap-properties-2.0.0.tgz#431c192ab3ed96a3c3d09f2ff615960f902c1715"
-  integrity sha512-QZSqDaMgXCHuHTEzMsS2KfVDOq7ZFiknSpkrPJY6jmxbugUPTuSzs/vuE5I3zv0WAS+3vhrlqhijiprnuQfzmg==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-image-set-function@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-image-set-function/-/postcss-image-set-function-3.0.1.tgz#28920a2f29945bed4c3198d7df6496d410d3f288"
-  integrity sha512-oPTcFFip5LZy8Y/whto91L9xdRHCWEMs3e1MdJxhgt4jy2WYXfhkng59fH5qLXSCPN8k4n94p1Czrfe5IOkKUw==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-initial@^3.0.0:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-initial/-/postcss-initial-3.0.2.tgz#f018563694b3c16ae8eaabe3c585ac6319637b2d"
-  integrity sha512-ugA2wKonC0xeNHgirR4D3VWHs2JcU08WAi1KFLVcnb7IN89phID6Qtg2RIctWbnvp1TM2BOmDtX8GGLCKdR8YA==
-  dependencies:
-    lodash.template "^4.5.0"
-    postcss "^7.0.2"
-
-postcss-lab-function@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-lab-function/-/postcss-lab-function-2.0.1.tgz#bb51a6856cd12289ab4ae20db1e3821ef13d7d2e"
-  integrity sha512-whLy1IeZKY+3fYdqQFuDBf8Auw+qFuVnChWjmxm/UhHWqNHZx+B99EwxTvGYmUBqe3Fjxs4L1BoZTJmPu6usVg==
-  dependencies:
-    "@csstools/convert-colors" "^1.4.0"
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-load-config@^2.0.0:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/postcss-load-config/-/postcss-load-config-2.1.2.tgz#c5ea504f2c4aef33c7359a34de3573772ad7502a"
-  integrity sha512-/rDeGV6vMUo3mwJZmeHfEDvwnTKKqQ0S7OHUi/kJvvtx3aWtyWG2/0ZWnzCt2keEclwN6Tf0DST2v9kITdOKYw==
-  dependencies:
-    cosmiconfig "^5.0.0"
-    import-cwd "^2.0.0"
-
-postcss-loader@3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-loader/-/postcss-loader-3.0.0.tgz#6b97943e47c72d845fa9e03f273773d4e8dd6c2d"
-  integrity sha512-cLWoDEY5OwHcAjDnkyRQzAXfs2jrKjXpO/HQFcc5b5u/r7aa471wdmChmwfnv7x2u840iat/wi0lQ5nbRgSkUA==
-  dependencies:
-    loader-utils "^1.1.0"
-    postcss "^7.0.0"
-    postcss-load-config "^2.0.0"
-    schema-utils "^1.0.0"
-
-postcss-logical@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-logical/-/postcss-logical-3.0.0.tgz#2495d0f8b82e9f262725f75f9401b34e7b45d5b5"
-  integrity sha512-1SUKdJc2vuMOmeItqGuNaC+N8MzBWFWEkAnRnLpFYj1tGGa7NqyVBujfRtgNa2gXR+6RkGUiB2O5Vmh7E2RmiA==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-media-minmax@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-media-minmax/-/postcss-media-minmax-4.0.0.tgz#b75bb6cbc217c8ac49433e12f22048814a4f5ed5"
-  integrity sha512-fo9moya6qyxsjbFAYl97qKO9gyre3qvbMnkOZeZwlsW6XYFsvs2DMGDlchVLfAd8LHPZDxivu/+qW2SMQeTHBw==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-merge-longhand@^4.0.11:
-  version "4.0.11"
-  resolved "https://registry.yarnpkg.com/postcss-merge-longhand/-/postcss-merge-longhand-4.0.11.tgz#62f49a13e4a0ee04e7b98f42bb16062ca2549e24"
-  integrity sha512-alx/zmoeXvJjp7L4mxEMjh8lxVlDFX1gqWHzaaQewwMZiVhLo42TEClKaeHbRf6J7j82ZOdTJ808RtN0ZOZwvw==
-  dependencies:
-    css-color-names "0.0.4"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-    stylehacks "^4.0.0"
-
-postcss-merge-rules@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/postcss-merge-rules/-/postcss-merge-rules-4.0.3.tgz#362bea4ff5a1f98e4075a713c6cb25aefef9a650"
-  integrity sha512-U7e3r1SbvYzO0Jr3UT/zKBVgYYyhAz0aitvGIYOYK5CPmkNih+WDSsS5tvPrJ8YMQYlEMvsZIiqmn7HdFUaeEQ==
-  dependencies:
-    browserslist "^4.0.0"
-    caniuse-api "^3.0.0"
-    cssnano-util-same-parent "^4.0.0"
-    postcss "^7.0.0"
-    postcss-selector-parser "^3.0.0"
-    vendors "^1.0.0"
-
-postcss-minify-font-values@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-minify-font-values/-/postcss-minify-font-values-4.0.2.tgz#cd4c344cce474343fac5d82206ab2cbcb8afd5a6"
-  integrity sha512-j85oO6OnRU9zPf04+PZv1LYIYOprWm6IA6zkXkrJXyRveDEuQggG6tvoy8ir8ZwjLxLuGfNkCZEQG7zan+Hbtg==
-  dependencies:
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-minify-gradients@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-minify-gradients/-/postcss-minify-gradients-4.0.2.tgz#93b29c2ff5099c535eecda56c4aa6e665a663471"
-  integrity sha512-qKPfwlONdcf/AndP1U8SJ/uzIJtowHlMaSioKzebAXSG4iJthlWC9iSWznQcX4f66gIWX44RSA841HTHj3wK+Q==
-  dependencies:
-    cssnano-util-get-arguments "^4.0.0"
-    is-color-stop "^1.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-minify-params@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-minify-params/-/postcss-minify-params-4.0.2.tgz#6b9cef030c11e35261f95f618c90036d680db874"
-  integrity sha512-G7eWyzEx0xL4/wiBBJxJOz48zAKV2WG3iZOqVhPet/9geefm/Px5uo1fzlHu+DOjT+m0Mmiz3jkQzVHe6wxAWg==
-  dependencies:
-    alphanum-sort "^1.0.0"
-    browserslist "^4.0.0"
-    cssnano-util-get-arguments "^4.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-    uniqs "^2.0.0"
-
-postcss-minify-selectors@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-minify-selectors/-/postcss-minify-selectors-4.0.2.tgz#e2e5eb40bfee500d0cd9243500f5f8ea4262fbd8"
-  integrity sha512-D5S1iViljXBj9kflQo4YutWnJmwm8VvIsU1GeXJGiG9j8CIg9zs4voPMdQDUmIxetUOh60VilsNzCiAFTOqu3g==
-  dependencies:
-    alphanum-sort "^1.0.0"
-    has "^1.0.0"
-    postcss "^7.0.0"
-    postcss-selector-parser "^3.0.0"
-
-postcss-modules-extract-imports@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-modules-extract-imports/-/postcss-modules-extract-imports-2.0.0.tgz#818719a1ae1da325f9832446b01136eeb493cd7e"
-  integrity sha512-LaYLDNS4SG8Q5WAWqIJgdHPJrDDr/Lv775rMBFUbgjTz6j34lUznACHcdRWroPvXANP2Vj7yNK57vp9eFqzLWQ==
-  dependencies:
-    postcss "^7.0.5"
-
-postcss-modules-local-by-default@^3.0.3:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/postcss-modules-local-by-default/-/postcss-modules-local-by-default-3.0.3.tgz#bb14e0cc78279d504dbdcbfd7e0ca28993ffbbb0"
-  integrity sha512-e3xDq+LotiGesympRlKNgaJ0PCzoUIdpH0dj47iWAui/kyTgh3CiAr1qP54uodmJhl6p9rN6BoNcdEDVJx9RDw==
-  dependencies:
-    icss-utils "^4.1.1"
-    postcss "^7.0.32"
-    postcss-selector-parser "^6.0.2"
-    postcss-value-parser "^4.1.0"
-
-postcss-modules-scope@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/postcss-modules-scope/-/postcss-modules-scope-2.2.0.tgz#385cae013cc7743f5a7d7602d1073a89eaae62ee"
-  integrity sha512-YyEgsTMRpNd+HmyC7H/mh3y+MeFWevy7V1evVhJWewmMbjDHIbZbOXICC2y+m1xI1UVfIT1HMW/O04Hxyu9oXQ==
-  dependencies:
-    postcss "^7.0.6"
-    postcss-selector-parser "^6.0.0"
-
-postcss-modules-values@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-modules-values/-/postcss-modules-values-3.0.0.tgz#5b5000d6ebae29b4255301b4a3a54574423e7f10"
-  integrity sha512-1//E5jCBrZ9DmRX+zCtmQtRSV6PV42Ix7Bzj9GbwJceduuf7IqP8MgeTXuRDHOWj2m0VzZD5+roFWDuU8RQjcg==
-  dependencies:
-    icss-utils "^4.0.0"
-    postcss "^7.0.6"
-
-postcss-nesting@^7.0.0:
-  version "7.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-nesting/-/postcss-nesting-7.0.1.tgz#b50ad7b7f0173e5b5e3880c3501344703e04c052"
-  integrity sha512-FrorPb0H3nuVq0Sff7W2rnc3SmIcruVC6YwpcS+k687VxyxO33iE1amna7wHuRVzM8vfiYofXSBHNAZ3QhLvYg==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-normalize-charset@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-charset/-/postcss-normalize-charset-4.0.1.tgz#8b35add3aee83a136b0471e0d59be58a50285dd4"
-  integrity sha512-gMXCrrlWh6G27U0hF3vNvR3w8I1s2wOBILvA87iNXaPvSNo5uZAMYsZG7XjCUf1eVxuPfyL4TJ7++SGZLc9A3g==
-  dependencies:
-    postcss "^7.0.0"
-
-postcss-normalize-display-values@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-display-values/-/postcss-normalize-display-values-4.0.2.tgz#0dbe04a4ce9063d4667ed2be476bb830c825935a"
-  integrity sha512-3F2jcsaMW7+VtRMAqf/3m4cPFhPD3EFRgNs18u+k3lTJJlVe7d0YPO+bnwqo2xg8YiRpDXJI2u8A0wqJxMsQuQ==
-  dependencies:
-    cssnano-util-get-match "^4.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-positions@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-positions/-/postcss-normalize-positions-4.0.2.tgz#05f757f84f260437378368a91f8932d4b102917f"
-  integrity sha512-Dlf3/9AxpxE+NF1fJxYDeggi5WwV35MXGFnnoccP/9qDtFrTArZ0D0R+iKcg5WsUd8nUYMIl8yXDCtcrT8JrdA==
-  dependencies:
-    cssnano-util-get-arguments "^4.0.0"
-    has "^1.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-repeat-style@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-repeat-style/-/postcss-normalize-repeat-style-4.0.2.tgz#c4ebbc289f3991a028d44751cbdd11918b17910c"
-  integrity sha512-qvigdYYMpSuoFs3Is/f5nHdRLJN/ITA7huIoCyqqENJe9PvPmLhNLMu7QTjPdtnVf6OcYYO5SHonx4+fbJE1+Q==
-  dependencies:
-    cssnano-util-get-arguments "^4.0.0"
-    cssnano-util-get-match "^4.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-string@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-string/-/postcss-normalize-string-4.0.2.tgz#cd44c40ab07a0c7a36dc5e99aace1eca4ec2690c"
-  integrity sha512-RrERod97Dnwqq49WNz8qo66ps0swYZDSb6rM57kN2J+aoyEAJfZ6bMx0sx/F9TIEX0xthPGCmeyiam/jXif0eA==
-  dependencies:
-    has "^1.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-timing-functions@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-timing-functions/-/postcss-normalize-timing-functions-4.0.2.tgz#8e009ca2a3949cdaf8ad23e6b6ab99cb5e7d28d9"
-  integrity sha512-acwJY95edP762e++00Ehq9L4sZCEcOPyaHwoaFOhIwWCDfik6YvqsYNxckee65JHLKzuNSSmAdxwD2Cud1Z54A==
-  dependencies:
-    cssnano-util-get-match "^4.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-unicode@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-unicode/-/postcss-normalize-unicode-4.0.1.tgz#841bd48fdcf3019ad4baa7493a3d363b52ae1cfb"
-  integrity sha512-od18Uq2wCYn+vZ/qCOeutvHjB5jm57ToxRaMeNuf0nWVHaP9Hua56QyMF6fs/4FSUnVIw0CBPsU0K4LnBPwYwg==
-  dependencies:
-    browserslist "^4.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-url@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-url/-/postcss-normalize-url-4.0.1.tgz#10e437f86bc7c7e58f7b9652ed878daaa95faae1"
-  integrity sha512-p5oVaF4+IHwu7VpMan/SSpmpYxcJMtkGppYf0VbdH5B6hN8YNmVyJLuY9FmLQTzY3fag5ESUUHDqM+heid0UVA==
-  dependencies:
-    is-absolute-url "^2.0.0"
-    normalize-url "^3.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize-whitespace@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-normalize-whitespace/-/postcss-normalize-whitespace-4.0.2.tgz#bf1d4070fe4fcea87d1348e825d8cc0c5faa7d82"
-  integrity sha512-tO8QIgrsI3p95r8fyqKV+ufKlSHh9hMJqACqbv2XknufqEDhDvbguXGBBqxw9nsQoXWf0qOqppziKJKHMD4GtA==
-  dependencies:
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-normalize@8.0.1:
-  version "8.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-normalize/-/postcss-normalize-8.0.1.tgz#90e80a7763d7fdf2da6f2f0f82be832ce4f66776"
-  integrity sha512-rt9JMS/m9FHIRroDDBGSMsyW1c0fkvOJPy62ggxSHUldJO7B195TqFMqIf+lY5ezpDcYOV4j86aUp3/XbxzCCQ==
-  dependencies:
-    "@csstools/normalize.css" "^10.1.0"
-    browserslist "^4.6.2"
-    postcss "^7.0.17"
-    postcss-browser-comments "^3.0.0"
-    sanitize.css "^10.0.0"
-
-postcss-ordered-values@^4.1.2:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/postcss-ordered-values/-/postcss-ordered-values-4.1.2.tgz#0cf75c820ec7d5c4d280189559e0b571ebac0eee"
-  integrity sha512-2fCObh5UanxvSxeXrtLtlwVThBvHn6MQcu4ksNT2tsaV2Fg76R2CV98W7wNSlX+5/pFwEyaDwKLLoEV7uRybAw==
-  dependencies:
-    cssnano-util-get-arguments "^4.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-overflow-shorthand@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-overflow-shorthand/-/postcss-overflow-shorthand-2.0.0.tgz#31ecf350e9c6f6ddc250a78f0c3e111f32dd4c30"
-  integrity sha512-aK0fHc9CBNx8jbzMYhshZcEv8LtYnBIRYQD5i7w/K/wS9c2+0NSR6B3OVMu5y0hBHYLcMGjfU+dmWYNKH0I85g==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-page-break@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-page-break/-/postcss-page-break-2.0.0.tgz#add52d0e0a528cabe6afee8b46e2abb277df46bf"
-  integrity sha512-tkpTSrLpfLfD9HvgOlJuigLuk39wVTbbd8RKcy8/ugV2bNBUW3xU+AIqyxhDrQr1VUj1RmyJrBn1YWrqUm9zAQ==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-place@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-place/-/postcss-place-4.0.1.tgz#e9f39d33d2dc584e46ee1db45adb77ca9d1dcc62"
-  integrity sha512-Zb6byCSLkgRKLODj/5mQugyuj9bvAAw9LqJJjgwz5cYryGeXfFZfSXoP1UfveccFmeq0b/2xxwcTEVScnqGxBg==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-values-parser "^2.0.0"
-
-postcss-preset-env@6.7.0:
-  version "6.7.0"
-  resolved "https://registry.yarnpkg.com/postcss-preset-env/-/postcss-preset-env-6.7.0.tgz#c34ddacf8f902383b35ad1e030f178f4cdf118a5"
-  integrity sha512-eU4/K5xzSFwUFJ8hTdTQzo2RBLbDVt83QZrAvI07TULOkmyQlnYlpwep+2yIK+K+0KlZO4BvFcleOCCcUtwchg==
-  dependencies:
-    autoprefixer "^9.6.1"
-    browserslist "^4.6.4"
-    caniuse-lite "^1.0.30000981"
-    css-blank-pseudo "^0.1.4"
-    css-has-pseudo "^0.10.0"
-    css-prefers-color-scheme "^3.1.1"
-    cssdb "^4.4.0"
-    postcss "^7.0.17"
-    postcss-attribute-case-insensitive "^4.0.1"
-    postcss-color-functional-notation "^2.0.1"
-    postcss-color-gray "^5.0.0"
-    postcss-color-hex-alpha "^5.0.3"
-    postcss-color-mod-function "^3.0.3"
-    postcss-color-rebeccapurple "^4.0.1"
-    postcss-custom-media "^7.0.8"
-    postcss-custom-properties "^8.0.11"
-    postcss-custom-selectors "^5.1.2"
-    postcss-dir-pseudo-class "^5.0.0"
-    postcss-double-position-gradients "^1.0.0"
-    postcss-env-function "^2.0.2"
-    postcss-focus-visible "^4.0.0"
-    postcss-focus-within "^3.0.0"
-    postcss-font-variant "^4.0.0"
-    postcss-gap-properties "^2.0.0"
-    postcss-image-set-function "^3.0.1"
-    postcss-initial "^3.0.0"
-    postcss-lab-function "^2.0.1"
-    postcss-logical "^3.0.0"
-    postcss-media-minmax "^4.0.0"
-    postcss-nesting "^7.0.0"
-    postcss-overflow-shorthand "^2.0.0"
-    postcss-page-break "^2.0.0"
-    postcss-place "^4.0.1"
-    postcss-pseudo-class-any-link "^6.0.0"
-    postcss-replace-overflow-wrap "^3.0.0"
-    postcss-selector-matches "^4.0.0"
-    postcss-selector-not "^4.0.0"
-
-postcss-pseudo-class-any-link@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-pseudo-class-any-link/-/postcss-pseudo-class-any-link-6.0.0.tgz#2ed3eed393b3702879dec4a87032b210daeb04d1"
-  integrity sha512-lgXW9sYJdLqtmw23otOzrtbDXofUdfYzNm4PIpNE322/swES3VU9XlXHeJS46zT2onFO7V1QFdD4Q9LiZj8mew==
-  dependencies:
-    postcss "^7.0.2"
-    postcss-selector-parser "^5.0.0-rc.3"
-
-postcss-reduce-initial@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/postcss-reduce-initial/-/postcss-reduce-initial-4.0.3.tgz#7fd42ebea5e9c814609639e2c2e84ae270ba48df"
-  integrity sha512-gKWmR5aUulSjbzOfD9AlJiHCGH6AEVLaM0AV+aSioxUDd16qXP1PCh8d1/BGVvpdWn8k/HiK7n6TjeoXN1F7DA==
-  dependencies:
-    browserslist "^4.0.0"
-    caniuse-api "^3.0.0"
-    has "^1.0.0"
-    postcss "^7.0.0"
-
-postcss-reduce-transforms@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-reduce-transforms/-/postcss-reduce-transforms-4.0.2.tgz#17efa405eacc6e07be3414a5ca2d1074681d4e29"
-  integrity sha512-EEVig1Q2QJ4ELpJXMZR8Vt5DQx8/mo+dGWSR7vWXqcob2gQLyQGsionYcGKATXvQzMPn6DSN1vTN7yFximdIAg==
-  dependencies:
-    cssnano-util-get-match "^4.0.0"
-    has "^1.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-
-postcss-replace-overflow-wrap@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-replace-overflow-wrap/-/postcss-replace-overflow-wrap-3.0.0.tgz#61b360ffdaedca84c7c918d2b0f0d0ea559ab01c"
-  integrity sha512-2T5hcEHArDT6X9+9dVSPQdo7QHzG4XKclFT8rU5TzJPDN7RIRTbO9c4drUISOVemLj03aezStHCR2AIcr8XLpw==
-  dependencies:
-    postcss "^7.0.2"
-
-postcss-safe-parser@5.0.2:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-safe-parser/-/postcss-safe-parser-5.0.2.tgz#459dd27df6bc2ba64608824ba39e45dacf5e852d"
-  integrity sha512-jDUfCPJbKOABhwpUKcqCVbbXiloe/QXMcbJ6Iipf3sDIihEzTqRCeMBfRaOHxhBuTYqtASrI1KJWxzztZU4qUQ==
-  dependencies:
-    postcss "^8.1.0"
-
-postcss-selector-matches@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-selector-matches/-/postcss-selector-matches-4.0.0.tgz#71c8248f917ba2cc93037c9637ee09c64436fcff"
-  integrity sha512-LgsHwQR/EsRYSqlwdGzeaPKVT0Ml7LAT6E75T8W8xLJY62CE4S/l03BWIt3jT8Taq22kXP08s2SfTSzaraoPww==
-  dependencies:
-    balanced-match "^1.0.0"
-    postcss "^7.0.2"
-
-postcss-selector-not@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-selector-not/-/postcss-selector-not-4.0.1.tgz#263016eef1cf219e0ade9a913780fc1f48204cbf"
-  integrity sha512-YolvBgInEK5/79C+bdFMyzqTg6pkYqDbzZIST/PDMqa/o3qtXenD05apBG2jLgT0/BQ77d4U2UK12jWpilqMAQ==
-  dependencies:
-    balanced-match "^1.0.0"
-    postcss "^7.0.2"
-
-postcss-selector-parser@^3.0.0:
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-3.1.2.tgz#b310f5c4c0fdaf76f94902bbaa30db6aa84f5270"
-  integrity sha512-h7fJ/5uWuRVyOtkO45pnt1Ih40CEleeyCHzipqAZO2e5H20g25Y48uYnFUiShvY4rZWNJ/Bib/KVPmanaCtOhA==
-  dependencies:
-    dot-prop "^5.2.0"
-    indexes-of "^1.0.1"
-    uniq "^1.0.1"
-
-postcss-selector-parser@^5.0.0-rc.3, postcss-selector-parser@^5.0.0-rc.4:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-5.0.0.tgz#249044356697b33b64f1a8f7c80922dddee7195c"
-  integrity sha512-w+zLE5Jhg6Liz8+rQOWEAwtwkyqpfnmsinXjXg6cY7YIONZZtgvE0v2O0uhQBs0peNomOJwWRKt6JBfTdTd3OQ==
-  dependencies:
-    cssesc "^2.0.0"
-    indexes-of "^1.0.1"
-    uniq "^1.0.1"
-
-postcss-selector-parser@^6.0.0, postcss-selector-parser@^6.0.2:
-  version "6.0.4"
-  resolved "https://registry.yarnpkg.com/postcss-selector-parser/-/postcss-selector-parser-6.0.4.tgz#56075a1380a04604c38b063ea7767a129af5c2b3"
-  integrity sha512-gjMeXBempyInaBqpp8gODmwZ52WaYsVOsfr4L4lDQ7n3ncD6mEyySiDtgzCT+NYC0mmeOLvtsF8iaEf0YT6dBw==
-  dependencies:
-    cssesc "^3.0.0"
-    indexes-of "^1.0.1"
-    uniq "^1.0.1"
-    util-deprecate "^1.0.2"
-
-postcss-svgo@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/postcss-svgo/-/postcss-svgo-4.0.2.tgz#17b997bc711b333bab143aaed3b8d3d6e3d38258"
-  integrity sha512-C6wyjo3VwFm0QgBy+Fu7gCYOkCmgmClghO+pjcxvrcBKtiKt0uCF+hvbMO1fyv5BMImRK90SMb+dwUnfbGd+jw==
-  dependencies:
-    is-svg "^3.0.0"
-    postcss "^7.0.0"
-    postcss-value-parser "^3.0.0"
-    svgo "^1.0.0"
-
-postcss-unique-selectors@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-unique-selectors/-/postcss-unique-selectors-4.0.1.tgz#9446911f3289bfd64c6d680f073c03b1f9ee4bac"
-  integrity sha512-+JanVaryLo9QwZjKrmJgkI4Fn8SBgRO6WXQBJi7KiAVPlmxikB5Jzc4EvXMT2H0/m0RjrVVm9rGNhZddm/8Spg==
-  dependencies:
-    alphanum-sort "^1.0.0"
-    postcss "^7.0.0"
-    uniqs "^2.0.0"
-
-postcss-value-parser@^3.0.0:
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/postcss-value-parser/-/postcss-value-parser-3.3.1.tgz#9ff822547e2893213cf1c30efa51ac5fd1ba8281"
-  integrity sha512-pISE66AbVkp4fDQ7VHBwRNXzAAKJjw4Vw7nWI/+Q3vuly7SNfgYXvm6i5IgFylHGK5sP/xHAbB7N49OS4gWNyQ==
-
-postcss-value-parser@^4.0.2, postcss-value-parser@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/postcss-value-parser/-/postcss-value-parser-4.1.0.tgz#443f6a20ced6481a2bda4fa8532a6e55d789a2cb"
-  integrity sha512-97DXOFbQJhk71ne5/Mt6cOu6yxsSfM0QGQyl0L25Gca4yGWEGJaig7l7gbCX623VqTBNGLRLaVUCnNkcedlRSQ==
-
-postcss-values-parser@^2.0.0, postcss-values-parser@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/postcss-values-parser/-/postcss-values-parser-2.0.1.tgz#da8b472d901da1e205b47bdc98637b9e9e550e5f"
-  integrity sha512-2tLuBsA6P4rYTNKCXYG/71C7j1pU6pK503suYOmn4xYrQIzW+opD+7FAFNuGSdZC/3Qfy334QbeMu7MEb8gOxg==
-  dependencies:
-    flatten "^1.0.2"
-    indexes-of "^1.0.1"
-    uniq "^1.0.1"
-
-postcss@7.0.21:
-  version "7.0.21"
-  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.21.tgz#06bb07824c19c2021c5d056d5b10c35b989f7e17"
-  integrity sha512-uIFtJElxJo29QC753JzhidoAhvp/e/Exezkdhfmt8AymWT6/5B7W1WmponYWkHk2eg6sONyTch0A3nkMPun3SQ==
-  dependencies:
-    chalk "^2.4.2"
-    source-map "^0.6.1"
-    supports-color "^6.1.0"
-
-postcss@^7, postcss@^7.0.0, postcss@^7.0.1, postcss@^7.0.14, postcss@^7.0.17, postcss@^7.0.2, postcss@^7.0.26, postcss@^7.0.27, postcss@^7.0.32, postcss@^7.0.5, postcss@^7.0.6:
-  version "7.0.35"
-  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.35.tgz#d2be00b998f7f211d8a276974079f2e92b970e24"
-  integrity sha512-3QT8bBJeX/S5zKTTjTCIjRF3If4avAT6kqxcASlTWEtAFCb9NH0OUxNDfgZSWdP5fJnBYCMEWkIFfWeugjzYMg==
-  dependencies:
-    chalk "^2.4.2"
-    source-map "^0.6.1"
-    supports-color "^6.1.0"
-
-postcss@^8.1.0:
-  version "8.2.6"
-  resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.2.6.tgz#5d69a974543b45f87e464bc4c3e392a97d6be9fe"
-  integrity sha512-xpB8qYxgPuly166AGlpRjUdEYtmOWx2iCwGmrv4vqZL9YPVviDVPZPRXxnXr6xPZOdxQ9lp3ZBFCRgWJ7LE3Sg==
-  dependencies:
-    colorette "^1.2.1"
-    nanoid "^3.1.20"
-    source-map "^0.6.1"
-
-prelude-ls@^1.2.1:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
-  integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==
-
-prelude-ls@~1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
-  integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
-
-prepend-http@^1.0.0:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/prepend-http/-/prepend-http-1.0.4.tgz#d4f4562b0ce3696e41ac52d0e002e57a635dc6dc"
-  integrity sha1-1PRWKwzjaW5BrFLQ4ALlemNdxtw=
-
-prettier@^2.2.1:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.2.1.tgz#795a1a78dd52f073da0cd42b21f9c91381923ff5"
-  integrity sha512-PqyhM2yCjg/oKkFPtTGUojv7gnZAoG80ttl45O6x2Ug/rMJw4wcc9k6aaf2hibP7BGVCCM33gZoGjyvt9mm16Q==
-
-pretty-bytes@^5.3.0:
-  version "5.6.0"
-  resolved "https://registry.yarnpkg.com/pretty-bytes/-/pretty-bytes-5.6.0.tgz#356256f643804773c82f64723fe78c92c62beaeb"
-  integrity sha512-FFw039TmrBqFK8ma/7OL3sDz/VytdtJr044/QUJtH0wK9lb9jLq9tJyIxUwtQJHwar2BqtiA4iCWSwo9JLkzFg==
-
-pretty-error@^2.1.1:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/pretty-error/-/pretty-error-2.1.2.tgz#be89f82d81b1c86ec8fdfbc385045882727f93b6"
-  integrity sha512-EY5oDzmsX5wvuynAByrmY0P0hcp+QpnAKbJng2A2MPjVKXCxrDSUkzghVJ4ZGPIv+JC4gX8fPUWscC0RtjsWGw==
-  dependencies:
-    lodash "^4.17.20"
-    renderkid "^2.0.4"
-
-pretty-format@^26.0.0, pretty-format@^26.6.0, pretty-format@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-26.6.2.tgz#e35c2705f14cb7fe2fe94fa078345b444120fc93"
-  integrity sha512-7AeGuCYNGmycyQbCqd/3PWH4eOoX/OiCa0uphp57NVTeAGdJGaAliecxwBDHYQCIvrW7aDBZCYeNTP/WX69mkg==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    ansi-regex "^5.0.0"
-    ansi-styles "^4.0.0"
-    react-is "^17.0.1"
-
-process-nextick-args@~2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
-  integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
-
-process@^0.11.10:
-  version "0.11.10"
-  resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182"
-  integrity sha1-czIwDoQBYb2j5podHZGn1LwW8YI=
-
-progress@^2.0.0:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
-  integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
-
-promise-inflight@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/promise-inflight/-/promise-inflight-1.0.1.tgz#98472870bf228132fcbdd868129bad12c3c029e3"
-  integrity sha1-mEcocL8igTL8vdhoEputEsPAKeM=
-
-promise@^8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/promise/-/promise-8.1.0.tgz#697c25c3dfe7435dd79fcd58c38a135888eaf05e"
-  integrity sha512-W04AqnILOL/sPRXziNicCjSNRruLAuIHEOVBazepu0545DDNGYHz7ar9ZgZ1fMU8/MA4mVxp5rkBWRi6OXIy3Q==
-  dependencies:
-    asap "~2.0.6"
-
-prompts@2.4.0, prompts@^2.0.1:
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/prompts/-/prompts-2.4.0.tgz#4aa5de0723a231d1ee9121c40fdf663df73f61d7"
-  integrity sha512-awZAKrk3vN6CroQukBL+R9051a4R3zCZBlJm/HBfrSZ8iTpYix3VX1vU4mveiLpiwmOJT4wokTF9m6HUk4KqWQ==
-  dependencies:
-    kleur "^3.0.3"
-    sisteransi "^1.0.5"
-
-prop-types@^15.6.2, prop-types@^15.7.2:
-  version "15.7.2"
-  resolved "https://registry.yarnpkg.com/prop-types/-/prop-types-15.7.2.tgz#52c41e75b8c87e72b9d9360e0206b99dcbffa6c5"
-  integrity sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==
-  dependencies:
-    loose-envify "^1.4.0"
-    object-assign "^4.1.1"
-    react-is "^16.8.1"
-
-proxy-addr@~2.0.5:
-  version "2.0.6"
-  resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.6.tgz#fdc2336505447d3f2f2c638ed272caf614bbb2bf"
-  integrity sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==
-  dependencies:
-    forwarded "~0.1.2"
-    ipaddr.js "1.9.1"
-
-prr@~1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476"
-  integrity sha1-0/wRS6BplaRexok/SEzrHXj19HY=
-
-psl@^1.1.28:
-  version "1.8.0"
-  resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
-  integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==
-
-public-encrypt@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/public-encrypt/-/public-encrypt-4.0.3.tgz#4fcc9d77a07e48ba7527e7cbe0de33d0701331e0"
-  integrity sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==
-  dependencies:
-    bn.js "^4.1.0"
-    browserify-rsa "^4.0.0"
-    create-hash "^1.1.0"
-    parse-asn1 "^5.0.0"
-    randombytes "^2.0.1"
-    safe-buffer "^5.1.2"
-
-pump@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/pump/-/pump-2.0.1.tgz#12399add6e4cf7526d973cbc8b5ce2e2908b3909"
-  integrity sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==
-  dependencies:
-    end-of-stream "^1.1.0"
-    once "^1.3.1"
-
-pump@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
-  integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
-  dependencies:
-    end-of-stream "^1.1.0"
-    once "^1.3.1"
-
-pumpify@^1.3.3:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/pumpify/-/pumpify-1.5.1.tgz#36513be246ab27570b1a374a5ce278bfd74370ce"
-  integrity sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==
-  dependencies:
-    duplexify "^3.6.0"
-    inherits "^2.0.3"
-    pump "^2.0.0"
-
-punycode@1.3.2:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d"
-  integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
-
-punycode@^1.2.4:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
-  integrity sha1-wNWmOycYgArY4esPpSachN1BhF4=
-
-punycode@^2.1.0, punycode@^2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
-  integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
-
-q@^1.1.2:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/q/-/q-1.5.1.tgz#7e32f75b41381291d04611f1bf14109ac00651d7"
-  integrity sha1-fjL3W0E4EpHQRhHxvxQQmsAGUdc=
-
-qs@6.7.0:
-  version "6.7.0"
-  resolved "https://registry.yarnpkg.com/qs/-/qs-6.7.0.tgz#41dc1a015e3d581f1621776be31afb2876a9b1bc"
-  integrity sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==
-
-qs@~6.5.2:
-  version "6.5.2"
-  resolved "https://registry.yarnpkg.com/qs/-/qs-6.5.2.tgz#cb3ae806e8740444584ef154ce8ee98d403f3e36"
-  integrity sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==
-
-query-string@^4.1.0:
-  version "4.3.4"
-  resolved "https://registry.yarnpkg.com/query-string/-/query-string-4.3.4.tgz#bbb693b9ca915c232515b228b1a02b609043dbeb"
-  integrity sha1-u7aTucqRXCMlFbIosaArYJBD2+s=
-  dependencies:
-    object-assign "^4.1.0"
-    strict-uri-encode "^1.0.0"
-
-querystring-es3@^0.2.0:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/querystring-es3/-/querystring-es3-0.2.1.tgz#9ec61f79049875707d69414596fd907a4d711e73"
-  integrity sha1-nsYfeQSYdXB9aUFFlv2Qek1xHnM=
-
-querystring@0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620"
-  integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
-
-querystring@^0.2.0:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.1.tgz#40d77615bb09d16902a85c3e38aa8b5ed761c2dd"
-  integrity sha512-wkvS7mL/JMugcup3/rMitHmd9ecIGd2lhFhK9N3UUQ450h66d1r3Y9nvXzQAW1Lq+wyx61k/1pfKS5KuKiyEbg==
-
-querystringify@^2.1.1:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/querystringify/-/querystringify-2.2.0.tgz#3345941b4153cb9d082d8eee4cda2016a9aef7f6"
-  integrity sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==
-
-queue-microtask@^1.2.2:
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.2.tgz#abf64491e6ecf0f38a6502403d4cda04f372dfd3"
-  integrity sha512-dB15eXv3p2jDlbOiNLyMabYg1/sXvppd8DP2J3EOCQ0AkuSXCW2tP7mnVouVLJKgUMY6yP0kcQDVpLCN13h4Xg==
-
-raf@^3.4.1:
-  version "3.4.1"
-  resolved "https://registry.yarnpkg.com/raf/-/raf-3.4.1.tgz#0742e99a4a6552f445d73e3ee0328af0ff1ede39"
-  integrity sha512-Sq4CW4QhwOHE8ucn6J34MqtZCeWFP2aQSmrlroYgqAV1PjStIhJXxYuTgUIfkEk7zTLjmIjLmU5q+fbD1NnOJA==
-  dependencies:
-    performance-now "^2.1.0"
-
-randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
-  integrity sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==
-  dependencies:
-    safe-buffer "^5.1.0"
-
-randomfill@^1.0.3:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/randomfill/-/randomfill-1.0.4.tgz#c92196fc86ab42be983f1bf31778224931d61458"
-  integrity sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==
-  dependencies:
-    randombytes "^2.0.5"
-    safe-buffer "^5.1.0"
-
-range-parser@^1.2.1, range-parser@~1.2.1:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/range-parser/-/range-parser-1.2.1.tgz#3cf37023d199e1c24d1a55b84800c2f3e6468031"
-  integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==
-
-raw-body@2.4.0:
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.4.0.tgz#a1ce6fb9c9bc356ca52e89256ab59059e13d0332"
-  integrity sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==
-  dependencies:
-    bytes "3.1.0"
-    http-errors "1.7.2"
-    iconv-lite "0.4.24"
-    unpipe "1.0.0"
-
-react-app-polyfill@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/react-app-polyfill/-/react-app-polyfill-2.0.0.tgz#a0bea50f078b8a082970a9d853dc34b6dcc6a3cf"
-  integrity sha512-0sF4ny9v/B7s6aoehwze9vJNWcmCemAUYBVasscVr92+UYiEqDXOxfKjXN685mDaMRNF3WdhHQs76oTODMocFA==
-  dependencies:
-    core-js "^3.6.5"
-    object-assign "^4.1.1"
-    promise "^8.1.0"
-    raf "^3.4.1"
-    regenerator-runtime "^0.13.7"
-    whatwg-fetch "^3.4.1"
-
-react-clientside-effect@^1.2.2:
-  version "1.2.5"
-  resolved "https://registry.yarnpkg.com/react-clientside-effect/-/react-clientside-effect-1.2.5.tgz#e2c4dc3c9ee109f642fac4f5b6e9bf5bcd2219a3"
-  integrity sha512-2bL8qFW1TGBHozGGbVeyvnggRpMjibeZM2536AKNENLECutp2yfs44IL8Hmpn8qjFQ2K7A9PnYf3vc7aQq/cPA==
-  dependencies:
-    "@babel/runtime" "^7.12.13"
-
-react-dev-utils@^11.0.3:
-  version "11.0.3"
-  resolved "https://registry.yarnpkg.com/react-dev-utils/-/react-dev-utils-11.0.3.tgz#b61ed499c7d74f447d4faddcc547e5e671e97c08"
-  integrity sha512-4lEA5gF4OHrcJLMUV1t+4XbNDiJbsAWCH5Z2uqlTqW6dD7Cf5nEASkeXrCI/Mz83sI2o527oBIFKVMXtRf1Vtg==
-  dependencies:
-    "@babel/code-frame" "7.10.4"
-    address "1.1.2"
-    browserslist "4.14.2"
-    chalk "2.4.2"
-    cross-spawn "7.0.3"
-    detect-port-alt "1.1.6"
-    escape-string-regexp "2.0.0"
-    filesize "6.1.0"
-    find-up "4.1.0"
-    fork-ts-checker-webpack-plugin "4.1.6"
-    global-modules "2.0.0"
-    globby "11.0.1"
-    gzip-size "5.1.1"
-    immer "8.0.1"
-    is-root "2.1.0"
-    loader-utils "2.0.0"
-    open "^7.0.2"
-    pkg-up "3.1.0"
-    prompts "2.4.0"
-    react-error-overlay "^6.0.9"
-    recursive-readdir "2.2.2"
-    shell-quote "1.7.2"
-    strip-ansi "6.0.0"
-    text-table "0.2.0"
-
-react-dom@^17.0.1:
-  version "17.0.1"
-  resolved "https://registry.yarnpkg.com/react-dom/-/react-dom-17.0.1.tgz#1de2560474ec9f0e334285662ede52dbc5426fc6"
-  integrity sha512-6eV150oJZ9U2t9svnsspTMrWNyHc6chX0KzDeAOXftRa8bNeOKTTfCJ7KorIwenkHd2xqVTBTCZd79yk/lx/Ug==
-  dependencies:
-    loose-envify "^1.1.0"
-    object-assign "^4.1.1"
-    scheduler "^0.20.1"
-
-react-error-overlay@^6.0.9:
-  version "6.0.9"
-  resolved "https://registry.yarnpkg.com/react-error-overlay/-/react-error-overlay-6.0.9.tgz#3c743010c9359608c375ecd6bc76f35d93995b0a"
-  integrity sha512-nQTTcUu+ATDbrSD1BZHr5kgSD4oF8OFjxun8uAaL8RwPBacGBNPf/yAuVVdx17N8XNzRDMrZ9XcKZHCjPW+9ew==
-
-react-fast-compare@3.2.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/react-fast-compare/-/react-fast-compare-3.2.0.tgz#641a9da81b6a6320f270e89724fb45a0b39e43bb"
-  integrity sha512-rtGImPZ0YyLrscKI9xTpV8psd6I8VAtjKCzQDlzyDvqJA8XOW78TXYQwNRNd8g8JZnDu8q9Fu/1v4HPAVwVdHA==
-
-react-focus-lock@2.5.0:
-  version "2.5.0"
-  resolved "https://registry.yarnpkg.com/react-focus-lock/-/react-focus-lock-2.5.0.tgz#12e3a3940e897c26e2c2a0408cd25ea3c99b3709"
-  integrity sha512-XLxj6uTXgz0US8TmqNU2jMfnXwZG0mH2r/afQqvPEaX6nyEll5LHVcEXk2XDUQ34RVeLPkO/xK5x6c/qiuSq/A==
-  dependencies:
-    "@babel/runtime" "^7.0.0"
-    focus-lock "^0.8.1"
-    prop-types "^15.6.2"
-    react-clientside-effect "^1.2.2"
-    use-callback-ref "^1.2.1"
-    use-sidecar "^1.0.1"
-
-react-icons@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/react-icons/-/react-icons-4.2.0.tgz#6dda80c8a8f338ff96a1851424d63083282630d0"
-  integrity sha512-rmzEDFt+AVXRzD7zDE21gcxyBizD/3NqjbX6cmViAgdqfJ2UiLer8927/QhhrXQV7dEj/1EGuOTPp7JnLYVJKQ==
-
-react-is@^16.6.0, react-is@^16.7.0, react-is@^16.8.1:
-  version "16.13.1"
-  resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
-  integrity sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==
-
-react-is@^17.0.1:
-  version "17.0.1"
-  resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.1.tgz#5b3531bd76a645a4c9fb6e693ed36419e3301339"
-  integrity sha512-NAnt2iGDXohE5LI7uBnLnqvLQMtzhkiAOLXTmv+qnF9Ky7xAPcX8Up/xWIhxvLVGJvuLiNc4xQLtuqDRzb4fSA==
-
-react-refresh@^0.8.3:
-  version "0.8.3"
-  resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.8.3.tgz#721d4657672d400c5e3c75d063c4a85fb2d5d68f"
-  integrity sha512-X8jZHc7nCMjaCqoU+V2I0cOhNW+QMBwSUkeXnTi8IPe6zaRWfn60ZzvFDZqWPfmSJfjub7dDW1SP0jaHWLu/hg==
-
-react-remove-scroll-bar@^2.1.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/react-remove-scroll-bar/-/react-remove-scroll-bar-2.2.0.tgz#d4d545a7df024f75d67e151499a6ab5ac97c8cdd"
-  integrity sha512-UU9ZBP1wdMR8qoUs7owiVcpaPwsQxUDC2lypP6mmixaGlARZa7ZIBx1jcuObLdhMOvCsnZcvetOho0wzPa9PYg==
-  dependencies:
-    react-style-singleton "^2.1.0"
-    tslib "^1.0.0"
-
-react-remove-scroll@2.4.1:
-  version "2.4.1"
-  resolved "https://registry.yarnpkg.com/react-remove-scroll/-/react-remove-scroll-2.4.1.tgz#e0af6126621083a5064591d367291a81b2d107f5"
-  integrity sha512-K7XZySEzOHMTq7dDwcHsZA6Y7/1uX5RsWhRXVYv8rdh+y9Qz2nMwl9RX/Mwnj/j7JstCGmxyfyC0zbVGXYh3mA==
-  dependencies:
-    react-remove-scroll-bar "^2.1.0"
-    react-style-singleton "^2.1.0"
-    tslib "^1.0.0"
-    use-callback-ref "^1.2.3"
-    use-sidecar "^1.0.1"
-
-react-router-dom@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-5.2.0.tgz#9e65a4d0c45e13289e66c7b17c7e175d0ea15662"
-  integrity sha512-gxAmfylo2QUjcwxI63RhQ5G85Qqt4voZpUXSEqCwykV0baaOTQDR1f0PmY8AELqIyVc0NEZUj0Gov5lNGcXgsA==
-  dependencies:
-    "@babel/runtime" "^7.1.2"
-    history "^4.9.0"
-    loose-envify "^1.3.1"
-    prop-types "^15.6.2"
-    react-router "5.2.0"
-    tiny-invariant "^1.0.2"
-    tiny-warning "^1.0.0"
-
-react-router@5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/react-router/-/react-router-5.2.0.tgz#424e75641ca8747fbf76e5ecca69781aa37ea293"
-  integrity sha512-smz1DUuFHRKdcJC0jobGo8cVbhO3x50tCL4icacOlcwDOEQPq4TMqwx3sY1TP+DvtTgz4nm3thuo7A+BK2U0Dw==
-  dependencies:
-    "@babel/runtime" "^7.1.2"
-    history "^4.9.0"
-    hoist-non-react-statics "^3.1.0"
-    loose-envify "^1.3.1"
-    mini-create-react-context "^0.4.0"
-    path-to-regexp "^1.7.0"
-    prop-types "^15.6.2"
-    react-is "^16.6.0"
-    tiny-invariant "^1.0.2"
-    tiny-warning "^1.0.0"
-
-react-scripts@4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/react-scripts/-/react-scripts-4.0.3.tgz#b1cafed7c3fa603e7628ba0f187787964cb5d345"
-  integrity sha512-S5eO4vjUzUisvkIPB7jVsKtuH2HhWcASREYWHAQ1FP5HyCv3xgn+wpILAEWkmy+A+tTNbSZClhxjT3qz6g4L1A==
-  dependencies:
-    "@babel/core" "7.12.3"
-    "@pmmmwh/react-refresh-webpack-plugin" "0.4.3"
-    "@svgr/webpack" "5.5.0"
-    "@typescript-eslint/eslint-plugin" "^4.5.0"
-    "@typescript-eslint/parser" "^4.5.0"
-    babel-eslint "^10.1.0"
-    babel-jest "^26.6.0"
-    babel-loader "8.1.0"
-    babel-plugin-named-asset-import "^0.3.7"
-    babel-preset-react-app "^10.0.0"
-    bfj "^7.0.2"
-    camelcase "^6.1.0"
-    case-sensitive-paths-webpack-plugin "2.3.0"
-    css-loader "4.3.0"
-    dotenv "8.2.0"
-    dotenv-expand "5.1.0"
-    eslint "^7.11.0"
-    eslint-config-react-app "^6.0.0"
-    eslint-plugin-flowtype "^5.2.0"
-    eslint-plugin-import "^2.22.1"
-    eslint-plugin-jest "^24.1.0"
-    eslint-plugin-jsx-a11y "^6.3.1"
-    eslint-plugin-react "^7.21.5"
-    eslint-plugin-react-hooks "^4.2.0"
-    eslint-plugin-testing-library "^3.9.2"
-    eslint-webpack-plugin "^2.5.2"
-    file-loader "6.1.1"
-    fs-extra "^9.0.1"
-    html-webpack-plugin "4.5.0"
-    identity-obj-proxy "3.0.0"
-    jest "26.6.0"
-    jest-circus "26.6.0"
-    jest-resolve "26.6.0"
-    jest-watch-typeahead "0.6.1"
-    mini-css-extract-plugin "0.11.3"
-    optimize-css-assets-webpack-plugin "5.0.4"
-    pnp-webpack-plugin "1.6.4"
-    postcss-flexbugs-fixes "4.2.1"
-    postcss-loader "3.0.0"
-    postcss-normalize "8.0.1"
-    postcss-preset-env "6.7.0"
-    postcss-safe-parser "5.0.2"
-    prompts "2.4.0"
-    react-app-polyfill "^2.0.0"
-    react-dev-utils "^11.0.3"
-    react-refresh "^0.8.3"
-    resolve "1.18.1"
-    resolve-url-loader "^3.1.2"
-    sass-loader "^10.0.5"
-    semver "7.3.2"
-    style-loader "1.3.0"
-    terser-webpack-plugin "4.2.3"
-    ts-pnp "1.2.0"
-    url-loader "4.1.1"
-    webpack "4.44.2"
-    webpack-dev-server "3.11.1"
-    webpack-manifest-plugin "2.2.0"
-    workbox-webpack-plugin "5.1.4"
-  optionalDependencies:
-    fsevents "^2.1.3"
-
-react-style-singleton@^2.1.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/react-style-singleton/-/react-style-singleton-2.1.1.tgz#ce7f90b67618be2b6b94902a30aaea152ce52e66"
-  integrity sha512-jNRp07Jza6CBqdRKNgGhT3u9umWvils1xsuMOjZlghBDH2MU0PL2WZor4PGYjXpnRCa9DQSlHMs/xnABWOwYbA==
-  dependencies:
-    get-nonce "^1.0.0"
-    invariant "^2.2.4"
-    tslib "^1.0.0"
-
-react-table@^7.6.3:
-  version "7.6.3"
-  resolved "https://registry.yarnpkg.com/react-table/-/react-table-7.6.3.tgz#76434392b3f62344bdb704f5b227c2f29c1ffb14"
-  integrity sha512-hfPF13zDLxPMpLKzIKCE8RZud9T/XrRTsaCIf8zXpWZIZ2juCl7qrGpo3AQw9eAetXV5DP7s2GDm+hht7qq5Dw==
-
-react-timeago@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/react-timeago/-/react-timeago-5.2.0.tgz#d655d40aa55e4fe08a92234481a6aea7f656ab5d"
-  integrity sha512-wCEEDGQHMdFh/PLp+Hj5vk9ZoC4KjQ5u0u6+KrrY9rny5LqJ2gZvNNEAS4mhSZDV1i7JLgQI5VQTAux7f+vj2w==
-
-react@^17.0.1:
-  version "17.0.1"
-  resolved "https://registry.yarnpkg.com/react/-/react-17.0.1.tgz#6e0600416bd57574e3f86d92edba3d9008726127"
-  integrity sha512-lG9c9UuMHdcAexXtigOZLX8exLWkW0Ku29qPRU8uhF2R9BN96dLCt0psvzPLlHc5OWkgymP3qwTRgbnw5BKx3w==
-  dependencies:
-    loose-envify "^1.1.0"
-    object-assign "^4.1.1"
-
-read-pkg-up@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-2.0.0.tgz#6b72a8048984e0c41e79510fd5e9fa99b3b549be"
-  integrity sha1-a3KoBImE4MQeeVEP1en6mbO1Sb4=
-  dependencies:
-    find-up "^2.0.0"
-    read-pkg "^2.0.0"
-
-read-pkg-up@^7.0.1:
-  version "7.0.1"
-  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-7.0.1.tgz#f3a6135758459733ae2b95638056e1854e7ef507"
-  integrity sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg==
-  dependencies:
-    find-up "^4.1.0"
-    read-pkg "^5.2.0"
-    type-fest "^0.8.1"
-
-read-pkg@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-2.0.0.tgz#8ef1c0623c6a6db0dc6713c4bfac46332b2368f8"
-  integrity sha1-jvHAYjxqbbDcZxPEv6xGMysjaPg=
-  dependencies:
-    load-json-file "^2.0.0"
-    normalize-package-data "^2.3.2"
-    path-type "^2.0.0"
-
-read-pkg@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-5.2.0.tgz#7bf295438ca5a33e56cd30e053b34ee7250c93cc"
-  integrity sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg==
-  dependencies:
-    "@types/normalize-package-data" "^2.4.0"
-    normalize-package-data "^2.5.0"
-    parse-json "^5.0.0"
-    type-fest "^0.6.0"
-
-"readable-stream@1 || 2", readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.6, readable-stream@~2.3.6:
-  version "2.3.7"
-  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57"
-  integrity sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==
-  dependencies:
-    core-util-is "~1.0.0"
-    inherits "~2.0.3"
-    isarray "~1.0.0"
-    process-nextick-args "~2.0.0"
-    safe-buffer "~5.1.1"
-    string_decoder "~1.1.1"
-    util-deprecate "~1.0.1"
-
-readable-stream@^3.0.6, readable-stream@^3.1.1, readable-stream@^3.6.0:
-  version "3.6.0"
-  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
-  integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
-  dependencies:
-    inherits "^2.0.3"
-    string_decoder "^1.1.1"
-    util-deprecate "^1.0.1"
-
-readdirp@^2.2.1:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-2.2.1.tgz#0e87622a3325aa33e892285caf8b4e846529a525"
-  integrity sha512-1JU/8q+VgFZyxwrJ+SVIOsh+KywWGpds3NTqikiKpDMZWScmAYyKIgqkO+ARvNWJfXeXR1zxz7aHF4u4CyH6vQ==
-  dependencies:
-    graceful-fs "^4.1.11"
-    micromatch "^3.1.10"
-    readable-stream "^2.0.2"
-
-readdirp@~3.5.0:
-  version "3.5.0"
-  resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.5.0.tgz#9ba74c019b15d365278d2e91bb8c48d7b4d42c9e"
-  integrity sha512-cMhu7c/8rdhkHXWsY+osBhfSy0JikwpHK/5+imo+LpeasTF8ouErHrlYkwT0++njiyuDvc7OFY5T3ukvZ8qmFQ==
-  dependencies:
-    picomatch "^2.2.1"
-
-recursive-readdir@2.2.2:
-  version "2.2.2"
-  resolved "https://registry.yarnpkg.com/recursive-readdir/-/recursive-readdir-2.2.2.tgz#9946fb3274e1628de6e36b2f6714953b4845094f"
-  integrity sha512-nRCcW9Sj7NuZwa2XvH9co8NPeXUBhZP7CRKJtU+cS6PW9FpCIFoI5ib0NT1ZrbNuPoRy0ylyCaUL8Gih4LSyFg==
-  dependencies:
-    minimatch "3.0.4"
-
-redent@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f"
-  integrity sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==
-  dependencies:
-    indent-string "^4.0.0"
-    strip-indent "^3.0.0"
-
-regenerate-unicode-properties@^8.2.0:
-  version "8.2.0"
-  resolved "https://registry.yarnpkg.com/regenerate-unicode-properties/-/regenerate-unicode-properties-8.2.0.tgz#e5de7111d655e7ba60c057dbe9ff37c87e65cdec"
-  integrity sha512-F9DjY1vKLo/tPePDycuH3dn9H1OTPIkVD9Kz4LODu+F2C75mgjAJ7x/gwy6ZcSNRAAkhNlJSOHRe8k3p+K9WhA==
-  dependencies:
-    regenerate "^1.4.0"
-
-regenerate@^1.4.0:
-  version "1.4.2"
-  resolved "https://registry.yarnpkg.com/regenerate/-/regenerate-1.4.2.tgz#b9346d8827e8f5a32f7ba29637d398b69014848a"
-  integrity sha512-zrceR/XhGYU/d/opr2EKO7aRHUeiBI8qjtfHqADTwZd6Szfy16la6kqD0MIUs5z5hx6AaKa+PixpPrR289+I0A==
-
-regenerator-runtime@^0.11.0:
-  version "0.11.1"
-  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.11.1.tgz#be05ad7f9bf7d22e056f9726cee5017fbf19e2e9"
-  integrity sha512-MguG95oij0fC3QV3URf4V2SDYGJhJnJGqvIIgdECeODCT98wSWDAJ94SSuVpYQUoTcGUIL6L4yNB7j1DFFHSBg==
-
-regenerator-runtime@^0.13.4, regenerator-runtime@^0.13.7:
-  version "0.13.7"
-  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz#cac2dacc8a1ea675feaabaeb8ae833898ae46f55"
-  integrity sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==
-
-regenerator-transform@^0.14.2:
-  version "0.14.5"
-  resolved "https://registry.yarnpkg.com/regenerator-transform/-/regenerator-transform-0.14.5.tgz#c98da154683671c9c4dcb16ece736517e1b7feb4"
-  integrity sha512-eOf6vka5IO151Jfsw2NO9WpGX58W6wWmefK3I1zEGr0lOD0u8rwPaNqQL1aRxUaxLeKO3ArNh3VYg1KbaD+FFw==
-  dependencies:
-    "@babel/runtime" "^7.8.4"
-
-regex-not@^1.0.0, regex-not@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/regex-not/-/regex-not-1.0.2.tgz#1f4ece27e00b0b65e0247a6810e6a85d83a5752c"
-  integrity sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A==
-  dependencies:
-    extend-shallow "^3.0.2"
-    safe-regex "^1.1.0"
-
-regex-parser@^2.2.11:
-  version "2.2.11"
-  resolved "https://registry.yarnpkg.com/regex-parser/-/regex-parser-2.2.11.tgz#3b37ec9049e19479806e878cabe7c1ca83ccfe58"
-  integrity sha512-jbD/FT0+9MBU2XAZluI7w2OBs1RBi6p9M83nkoZayQXXU9e8Robt69FcZc7wU4eJD/YFTjn1JdCk3rbMJajz8Q==
-
-regexp.prototype.flags@^1.2.0, regexp.prototype.flags@^1.3.1:
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz#7ef352ae8d159e758c0eadca6f8fcb4eef07be26"
-  integrity sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==
-  dependencies:
-    call-bind "^1.0.2"
-    define-properties "^1.1.3"
-
-regexpp@^3.0.0, regexpp@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.1.0.tgz#206d0ad0a5648cffbdb8ae46438f3dc51c9f78e2"
-  integrity sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==
-
-regexpu-core@^4.7.1:
-  version "4.7.1"
-  resolved "https://registry.yarnpkg.com/regexpu-core/-/regexpu-core-4.7.1.tgz#2dea5a9a07233298fbf0db91fa9abc4c6e0f8ad6"
-  integrity sha512-ywH2VUraA44DZQuRKzARmw6S66mr48pQVva4LBeRhcOltJ6hExvWly5ZjFLYo67xbIxb6W1q4bAGtgfEl20zfQ==
-  dependencies:
-    regenerate "^1.4.0"
-    regenerate-unicode-properties "^8.2.0"
-    regjsgen "^0.5.1"
-    regjsparser "^0.6.4"
-    unicode-match-property-ecmascript "^1.0.4"
-    unicode-match-property-value-ecmascript "^1.2.0"
-
-regjsgen@^0.5.1:
-  version "0.5.2"
-  resolved "https://registry.yarnpkg.com/regjsgen/-/regjsgen-0.5.2.tgz#92ff295fb1deecbf6ecdab2543d207e91aa33733"
-  integrity sha512-OFFT3MfrH90xIW8OOSyUrk6QHD5E9JOTeGodiJeBS3J6IwlgzJMNE/1bZklWz5oTg+9dCMyEetclvCVXOPoN3A==
-
-regjsparser@^0.6.4:
-  version "0.6.7"
-  resolved "https://registry.yarnpkg.com/regjsparser/-/regjsparser-0.6.7.tgz#c00164e1e6713c2e3ee641f1701c4b7aa0a7f86c"
-  integrity sha512-ib77G0uxsA2ovgiYbCVGx4Pv3PSttAx2vIwidqQzbL2U5S4Q+j00HdSAneSBuyVcMvEnTXMjiGgB+DlXozVhpQ==
-  dependencies:
-    jsesc "~0.5.0"
-
-relateurl@^0.2.7:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/relateurl/-/relateurl-0.2.7.tgz#54dbf377e51440aca90a4cd274600d3ff2d888a9"
-  integrity sha1-VNvzd+UUQKypCkzSdGANP/LYiKk=
-
-remove-trailing-separator@^1.0.1:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz#c24bce2a283adad5bc3f58e0d48249b92379d8ef"
-  integrity sha1-wkvOKig62tW8P1jg1IJJuSN52O8=
-
-renderkid@^2.0.4:
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/renderkid/-/renderkid-2.0.5.tgz#483b1ac59c6601ab30a7a596a5965cabccfdd0a5"
-  integrity sha512-ccqoLg+HLOHq1vdfYNm4TBeaCDIi1FLt3wGojTDSvdewUv65oTmI3cnT2E4hRjl1gzKZIPK+KZrXzlUYKnR+vQ==
-  dependencies:
-    css-select "^2.0.2"
-    dom-converter "^0.2"
-    htmlparser2 "^3.10.1"
-    lodash "^4.17.20"
-    strip-ansi "^3.0.0"
-
-repeat-element@^1.1.2:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/repeat-element/-/repeat-element-1.1.3.tgz#782e0d825c0c5a3bb39731f84efee6b742e6b1ce"
-  integrity sha512-ahGq0ZnV5m5XtZLMb+vP76kcAM5nkLqk0lpqAuojSKGgQtn4eRi4ZZGm2olo2zKFH+sMsWaqOCW1dqAnOru72g==
-
-repeat-string@^1.6.1:
-  version "1.6.1"
-  resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637"
-  integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc=
-
-request-promise-core@1.1.4:
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.4.tgz#3eedd4223208d419867b78ce815167d10593a22f"
-  integrity sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==
-  dependencies:
-    lodash "^4.17.19"
-
-request-promise-native@^1.0.8:
-  version "1.0.9"
-  resolved "https://registry.yarnpkg.com/request-promise-native/-/request-promise-native-1.0.9.tgz#e407120526a5efdc9a39b28a5679bf47b9d9dc28"
-  integrity sha512-wcW+sIUiWnKgNY0dqCpOZkUbF/I+YPi+f09JZIDa39Ec+q82CpSYniDp+ISgTTbKmnpJWASeJBPZmoxH84wt3g==
-  dependencies:
-    request-promise-core "1.1.4"
-    stealthy-require "^1.1.1"
-    tough-cookie "^2.3.3"
-
-request@^2.88.2:
-  version "2.88.2"
-  resolved "https://registry.yarnpkg.com/request/-/request-2.88.2.tgz#d73c918731cb5a87da047e207234146f664d12b3"
-  integrity sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==
-  dependencies:
-    aws-sign2 "~0.7.0"
-    aws4 "^1.8.0"
-    caseless "~0.12.0"
-    combined-stream "~1.0.6"
-    extend "~3.0.2"
-    forever-agent "~0.6.1"
-    form-data "~2.3.2"
-    har-validator "~5.1.3"
-    http-signature "~1.2.0"
-    is-typedarray "~1.0.0"
-    isstream "~0.1.2"
-    json-stringify-safe "~5.0.1"
-    mime-types "~2.1.19"
-    oauth-sign "~0.9.0"
-    performance-now "^2.1.0"
-    qs "~6.5.2"
-    safe-buffer "^5.1.2"
-    tough-cookie "~2.5.0"
-    tunnel-agent "^0.6.0"
-    uuid "^3.3.2"
-
-require-directory@^2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
-  integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I=
-
-require-from-string@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909"
-  integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==
-
-require-main-filename@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b"
-  integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==
-
-requires-port@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/requires-port/-/requires-port-1.0.0.tgz#925d2601d39ac485e091cf0da5c6e694dc3dcaff"
-  integrity sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=
-
-resolve-cwd@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-2.0.0.tgz#00a9f7387556e27038eae232caa372a6a59b665a"
-  integrity sha1-AKn3OHVW4nA46uIyyqNypqWbZlo=
-  dependencies:
-    resolve-from "^3.0.0"
-
-resolve-cwd@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-3.0.0.tgz#0f0075f1bb2544766cf73ba6a6e2adfebcb13f2d"
-  integrity sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==
-  dependencies:
-    resolve-from "^5.0.0"
-
-resolve-from@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-3.0.0.tgz#b22c7af7d9d6881bc8b6e653335eebcb0a188748"
-  integrity sha1-six699nWiBvItuZTM17rywoYh0g=
-
-resolve-from@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
-  integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==
-
-resolve-from@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-5.0.0.tgz#c35225843df8f776df21c57557bc087e9dfdfc69"
-  integrity sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==
-
-resolve-pathname@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-pathname/-/resolve-pathname-3.0.0.tgz#99d02224d3cf263689becbb393bc560313025dcd"
-  integrity sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==
-
-resolve-url-loader@^3.1.2:
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/resolve-url-loader/-/resolve-url-loader-3.1.2.tgz#235e2c28e22e3e432ba7a5d4e305c59a58edfc08"
-  integrity sha512-QEb4A76c8Mi7I3xNKXlRKQSlLBwjUV/ULFMP+G7n3/7tJZ8MG5wsZ3ucxP1Jz8Vevn6fnJsxDx9cIls+utGzPQ==
-  dependencies:
-    adjust-sourcemap-loader "3.0.0"
-    camelcase "5.3.1"
-    compose-function "3.0.3"
-    convert-source-map "1.7.0"
-    es6-iterator "2.0.3"
-    loader-utils "1.2.3"
-    postcss "7.0.21"
-    rework "1.0.1"
-    rework-visit "1.0.0"
-    source-map "0.6.1"
-
-resolve-url@^0.2.1:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a"
-  integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=
-
-resolve@1.18.1:
-  version "1.18.1"
-  resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.18.1.tgz#018fcb2c5b207d2a6424aee361c5a266da8f4130"
-  integrity sha512-lDfCPaMKfOJXjy0dPayzPdF1phampNWr3qFCjAu+rw/qbQmr5jWH5xN2hwh9QKfw9E5v4hwV7A+jrCmL8yjjqA==
-  dependencies:
-    is-core-module "^2.0.0"
-    path-parse "^1.0.6"
-
-resolve@^1.10.0, resolve@^1.12.0, resolve@^1.13.1, resolve@^1.14.2, resolve@^1.17.0, resolve@^1.18.1, resolve@^1.3.2, resolve@^1.8.1:
-  version "1.20.0"
-  resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975"
-  integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==
-  dependencies:
-    is-core-module "^2.2.0"
-    path-parse "^1.0.6"
-
-ret@~0.1.10:
-  version "0.1.15"
-  resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
-  integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==
-
-retry@^0.12.0:
-  version "0.12.0"
-  resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
-  integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=
-
-reusify@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76"
-  integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==
-
-rework-visit@1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/rework-visit/-/rework-visit-1.0.0.tgz#9945b2803f219e2f7aca00adb8bc9f640f842c9a"
-  integrity sha1-mUWygD8hni96ygCtuLyfZA+ELJo=
-
-rework@1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/rework/-/rework-1.0.1.tgz#30806a841342b54510aa4110850cd48534144aa7"
-  integrity sha1-MIBqhBNCtUUQqkEQhQzUhTQUSqc=
-  dependencies:
-    convert-source-map "^0.3.3"
-    css "^2.0.0"
-
-rgb-regex@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/rgb-regex/-/rgb-regex-1.0.1.tgz#c0e0d6882df0e23be254a475e8edd41915feaeb1"
-  integrity sha1-wODWiC3w4jviVKR16O3UGRX+rrE=
-
-rgba-regex@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/rgba-regex/-/rgba-regex-1.0.0.tgz#43374e2e2ca0968b0ef1523460b7d730ff22eeb3"
-  integrity sha1-QzdOLiyglosO8VI0YLfXMP8i7rM=
-
-rimraf@^2.5.4, rimraf@^2.6.3:
-  version "2.7.1"
-  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
-  integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
-  dependencies:
-    glob "^7.1.3"
-
-rimraf@^3.0.0, rimraf@^3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
-  integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
-  dependencies:
-    glob "^7.1.3"
-
-ripemd160@^2.0.0, ripemd160@^2.0.1:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c"
-  integrity sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==
-  dependencies:
-    hash-base "^3.0.0"
-    inherits "^2.0.1"
-
-rollup-plugin-babel@^4.3.3:
-  version "4.4.0"
-  resolved "https://registry.yarnpkg.com/rollup-plugin-babel/-/rollup-plugin-babel-4.4.0.tgz#d15bd259466a9d1accbdb2fe2fff17c52d030acb"
-  integrity sha512-Lek/TYp1+7g7I+uMfJnnSJ7YWoD58ajo6Oarhlex7lvUce+RCKRuGRSgztDO3/MF/PuGKmUL5iTHKf208UNszw==
-  dependencies:
-    "@babel/helper-module-imports" "^7.0.0"
-    rollup-pluginutils "^2.8.1"
-
-rollup-plugin-terser@^5.3.1:
-  version "5.3.1"
-  resolved "https://registry.yarnpkg.com/rollup-plugin-terser/-/rollup-plugin-terser-5.3.1.tgz#8c650062c22a8426c64268548957463bf981b413"
-  integrity sha512-1pkwkervMJQGFYvM9nscrUoncPwiKR/K+bHdjv6PFgRo3cgPHoRT83y2Aa3GvINj4539S15t/tpFPb775TDs6w==
-  dependencies:
-    "@babel/code-frame" "^7.5.5"
-    jest-worker "^24.9.0"
-    rollup-pluginutils "^2.8.2"
-    serialize-javascript "^4.0.0"
-    terser "^4.6.2"
-
-rollup-pluginutils@^2.8.1, rollup-pluginutils@^2.8.2:
-  version "2.8.2"
-  resolved "https://registry.yarnpkg.com/rollup-pluginutils/-/rollup-pluginutils-2.8.2.tgz#72f2af0748b592364dbd3389e600e5a9444a351e"
-  integrity sha512-EEp9NhnUkwY8aif6bxgovPHMoMoNr2FulJziTndpt5H9RdwC47GSGuII9XxpSdzVGM0GWrNPHV6ie1LTNJPaLQ==
-  dependencies:
-    estree-walker "^0.6.1"
-
-rollup@^1.31.1:
-  version "1.32.1"
-  resolved "https://registry.yarnpkg.com/rollup/-/rollup-1.32.1.tgz#4480e52d9d9e2ae4b46ba0d9ddeaf3163940f9c4"
-  integrity sha512-/2HA0Ec70TvQnXdzynFffkjA6XN+1e2pEv/uKS5Ulca40g2L7KuOE3riasHoNVHOsFD5KKZgDsMk1CP3Tw9s+A==
-  dependencies:
-    "@types/estree" "*"
-    "@types/node" "*"
-    acorn "^7.1.0"
-
-rsvp@^4.8.4:
-  version "4.8.5"
-  resolved "https://registry.yarnpkg.com/rsvp/-/rsvp-4.8.5.tgz#c8f155311d167f68f21e168df71ec5b083113734"
-  integrity sha512-nfMOlASu9OnRJo1mbEk2cz0D56a1MBNrJ7orjRZQG10XDyuvwksKbuXNp6qa+kbn839HwjwhBzhFmdsaEAfauA==
-
-run-parallel@^1.1.9:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
-  integrity sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==
-  dependencies:
-    queue-microtask "^1.2.2"
-
-run-queue@^1.0.0, run-queue@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/run-queue/-/run-queue-1.0.3.tgz#e848396f057d223f24386924618e25694161ec47"
-  integrity sha1-6Eg5bwV9Ij8kOGkkYY4laUFh7Ec=
-  dependencies:
-    aproba "^1.1.1"
-
-safe-buffer@5.1.2, safe-buffer@~5.1.0, safe-buffer@~5.1.1:
-  version "5.1.2"
-  resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
-  integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
-
-safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@^5.2.0, safe-buffer@~5.2.0:
-  version "5.2.1"
-  resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
-  integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
-
-safe-regex@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e"
-  integrity sha1-QKNmnzsHfR6UPURinhV91IAjvy4=
-  dependencies:
-    ret "~0.1.10"
-
-"safer-buffer@>= 2.1.2 < 3", safer-buffer@^2.0.2, safer-buffer@^2.1.0, safer-buffer@~2.1.0:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
-  integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
-
-sane@^4.0.3:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/sane/-/sane-4.1.0.tgz#ed881fd922733a6c461bc189dc2b6c006f3ffded"
-  integrity sha512-hhbzAgTIX8O7SHfp2c8/kREfEn4qO/9q8C9beyY6+tvZ87EpoZ3i1RIEvp27YBswnNbY9mWd6paKVmKbAgLfZA==
-  dependencies:
-    "@cnakazawa/watch" "^1.0.3"
-    anymatch "^2.0.0"
-    capture-exit "^2.0.0"
-    exec-sh "^0.3.2"
-    execa "^1.0.0"
-    fb-watchman "^2.0.0"
-    micromatch "^3.1.4"
-    minimist "^1.1.1"
-    walker "~1.0.5"
-
-sanitize.css@^10.0.0:
-  version "10.0.0"
-  resolved "https://registry.yarnpkg.com/sanitize.css/-/sanitize.css-10.0.0.tgz#b5cb2547e96d8629a60947544665243b1dc3657a"
-  integrity sha512-vTxrZz4dX5W86M6oVWVdOVe72ZiPs41Oi7Z6Km4W5Turyz28mrXSJhhEBZoRtzJWIv3833WKVwLSDWWkEfupMg==
-
-sass-loader@^10.0.5:
-  version "10.1.1"
-  resolved "https://registry.yarnpkg.com/sass-loader/-/sass-loader-10.1.1.tgz#4ddd5a3d7638e7949065dd6e9c7c04037f7e663d"
-  integrity sha512-W6gVDXAd5hR/WHsPicvZdjAWHBcEJ44UahgxcIE196fW2ong0ZHMPO1kZuI5q0VlvMQZh32gpv69PLWQm70qrw==
-  dependencies:
-    klona "^2.0.4"
-    loader-utils "^2.0.0"
-    neo-async "^2.6.2"
-    schema-utils "^3.0.0"
-    semver "^7.3.2"
-
-sax@~1.2.4:
-  version "1.2.4"
-  resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
-  integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==
-
-saxes@^5.0.0:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d"
-  integrity sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw==
-  dependencies:
-    xmlchars "^2.2.0"
-
-scheduler@^0.20.1:
-  version "0.20.1"
-  resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.20.1.tgz#da0b907e24026b01181ecbc75efdc7f27b5a000c"
-  integrity sha512-LKTe+2xNJBNxu/QhHvDR14wUXHRQbVY5ZOYpOGWRzhydZUqrLb2JBvLPY7cAqFmqrWuDED0Mjk7013SZiOz6Bw==
-  dependencies:
-    loose-envify "^1.1.0"
-    object-assign "^4.1.1"
-
-schema-utils@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-1.0.0.tgz#0b79a93204d7b600d4b2850d1f66c2a34951c770"
-  integrity sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==
-  dependencies:
-    ajv "^6.1.0"
-    ajv-errors "^1.0.0"
-    ajv-keywords "^3.1.0"
-
-schema-utils@^2.6.5, schema-utils@^2.7.0, schema-utils@^2.7.1:
-  version "2.7.1"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-2.7.1.tgz#1ca4f32d1b24c590c203b8e7a50bf0ea4cd394d7"
-  integrity sha512-SHiNtMOUGWBQJwzISiVYKu82GiV4QYGePp3odlY1tuKO7gPtphAT5R/py0fA6xtbgLL/RvtJZnU9b8s0F1q0Xg==
-  dependencies:
-    "@types/json-schema" "^7.0.5"
-    ajv "^6.12.4"
-    ajv-keywords "^3.5.2"
-
-schema-utils@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-3.0.0.tgz#67502f6aa2b66a2d4032b4279a2944978a0913ef"
-  integrity sha512-6D82/xSzO094ajanoOSbe4YvXWMfn2A//8Y1+MUqFAJul5Bs+yn36xbK9OtNDcRVSBJ9jjeoXftM6CfztsjOAA==
-  dependencies:
-    "@types/json-schema" "^7.0.6"
-    ajv "^6.12.5"
-    ajv-keywords "^3.5.2"
-
-select-hose@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/select-hose/-/select-hose-2.0.0.tgz#625d8658f865af43ec962bfc376a37359a4994ca"
-  integrity sha1-Yl2GWPhlr0Psliv8N2o3NZpJlMo=
-
-selfsigned@^1.10.8:
-  version "1.10.8"
-  resolved "https://registry.yarnpkg.com/selfsigned/-/selfsigned-1.10.8.tgz#0d17208b7d12c33f8eac85c41835f27fc3d81a30"
-  integrity sha512-2P4PtieJeEwVgTU9QEcwIRDQ/mXJLX8/+I3ur+Pg16nS8oNbrGxEso9NyYWy8NAmXiNl4dlAp5MwoNeCWzON4w==
-  dependencies:
-    node-forge "^0.10.0"
-
-"semver@2 || 3 || 4 || 5", semver@^5.4.1, semver@^5.5.0, semver@^5.5.1, semver@^5.6.0:
-  version "5.7.1"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
-  integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
-
-semver@7.0.0:
-  version "7.0.0"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e"
-  integrity sha512-+GB6zVA9LWh6zovYQLALHwv5rb2PHGlJi3lfiqIHxR0uuwCgefcOJc59v9fv1w8GbStwxuuqqAjI9NMAOOgq1A==
-
-semver@7.3.2:
-  version "7.3.2"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.2.tgz#604962b052b81ed0786aae84389ffba70ffd3938"
-  integrity sha512-OrOb32TeeambH6UrhtShmF7CRDqhL6/5XpPNp2DuRH6+9QLw/orhp72j87v8Qa1ScDkvrrBNpZcDejAirJmfXQ==
-
-semver@^6.0.0, semver@^6.3.0:
-  version "6.3.0"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
-  integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
-
-semver@^7.2.1, semver@^7.3.2:
-  version "7.3.4"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.4.tgz#27aaa7d2e4ca76452f98d3add093a72c943edc97"
-  integrity sha512-tCfb2WLjqFAtXn4KEdxIhalnRtoKFN7nAwj0B3ZXCbQloV2tq5eDbcTmT68JJD3nRJq24/XgxtQKFIpQdtvmVw==
-  dependencies:
-    lru-cache "^6.0.0"
-
-send@0.17.1:
-  version "0.17.1"
-  resolved "https://registry.yarnpkg.com/send/-/send-0.17.1.tgz#c1d8b059f7900f7466dd4938bdc44e11ddb376c8"
-  integrity sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==
-  dependencies:
-    debug "2.6.9"
-    depd "~1.1.2"
-    destroy "~1.0.4"
-    encodeurl "~1.0.2"
-    escape-html "~1.0.3"
-    etag "~1.8.1"
-    fresh "0.5.2"
-    http-errors "~1.7.2"
-    mime "1.6.0"
-    ms "2.1.1"
-    on-finished "~2.3.0"
-    range-parser "~1.2.1"
-    statuses "~1.5.0"
-
-serialize-javascript@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-4.0.0.tgz#b525e1238489a5ecfc42afacc3fe99e666f4b1aa"
-  integrity sha512-GaNA54380uFefWghODBWEGisLZFj00nS5ACs6yHa9nLqlLpVLO8ChDGeKRjZnV4Nh4n0Qi7nhYZD/9fCPzEqkw==
-  dependencies:
-    randombytes "^2.1.0"
-
-serialize-javascript@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
-  integrity sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==
-  dependencies:
-    randombytes "^2.1.0"
-
-serve-index@^1.9.1:
-  version "1.9.1"
-  resolved "https://registry.yarnpkg.com/serve-index/-/serve-index-1.9.1.tgz#d3768d69b1e7d82e5ce050fff5b453bea12a9239"
-  integrity sha1-03aNabHn2C5c4FD/9bRTvqEqkjk=
-  dependencies:
-    accepts "~1.3.4"
-    batch "0.6.1"
-    debug "2.6.9"
-    escape-html "~1.0.3"
-    http-errors "~1.6.2"
-    mime-types "~2.1.17"
-    parseurl "~1.3.2"
-
-serve-static@1.14.1:
-  version "1.14.1"
-  resolved "https://registry.yarnpkg.com/serve-static/-/serve-static-1.14.1.tgz#666e636dc4f010f7ef29970a88a674320898b2f9"
-  integrity sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==
-  dependencies:
-    encodeurl "~1.0.2"
-    escape-html "~1.0.3"
-    parseurl "~1.3.3"
-    send "0.17.1"
-
-set-blocking@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
-  integrity sha1-BF+XgtARrppoA93TgrJDkrPYkPc=
-
-set-value@^2.0.0, set-value@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/set-value/-/set-value-2.0.1.tgz#a18d40530e6f07de4228c7defe4227af8cad005b"
-  integrity sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw==
-  dependencies:
-    extend-shallow "^2.0.1"
-    is-extendable "^0.1.1"
-    is-plain-object "^2.0.3"
-    split-string "^3.0.1"
-
-setimmediate@^1.0.4:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285"
-  integrity sha1-KQy7Iy4waULX1+qbg3Mqt4VvgoU=
-
-setprototypeof@1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.0.tgz#d0bd85536887b6fe7c0d818cb962d9d91c54e656"
-  integrity sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ==
-
-setprototypeof@1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.1.tgz#7e95acb24aa92f5885e0abef5ba131330d4ae683"
-  integrity sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==
-
-sha.js@^2.4.0, sha.js@^2.4.8:
-  version "2.4.11"
-  resolved "https://registry.yarnpkg.com/sha.js/-/sha.js-2.4.11.tgz#37a5cf0b81ecbc6943de109ba2960d1b26584ae7"
-  integrity sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==
-  dependencies:
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
-shebang-command@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-1.2.0.tgz#44aac65b695b03398968c39f363fee5deafdf1ea"
-  integrity sha1-RKrGW2lbAzmJaMOfNj/uXer98eo=
-  dependencies:
-    shebang-regex "^1.0.0"
-
-shebang-command@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
-  integrity sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==
-  dependencies:
-    shebang-regex "^3.0.0"
-
-shebang-regex@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-1.0.0.tgz#da42f49740c0b42db2ca9728571cb190c98efea3"
-  integrity sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM=
-
-shebang-regex@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
-  integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
-
-shell-quote@1.7.2:
-  version "1.7.2"
-  resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.7.2.tgz#67a7d02c76c9da24f99d20808fcaded0e0e04be2"
-  integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==
-
-shellwords@^0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b"
-  integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==
-
-side-channel@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf"
-  integrity sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==
-  dependencies:
-    call-bind "^1.0.0"
-    get-intrinsic "^1.0.2"
-    object-inspect "^1.9.0"
-
-signal-exit@^3.0.0, signal-exit@^3.0.2:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.3.tgz#a1410c2edd8f077b08b4e253c8eacfcaf057461c"
-  integrity sha512-VUJ49FC8U1OxwZLxIbTTrDvLnf/6TDgxZcK8wxR8zs13xpx7xbG60ndBlhNrFi2EMuFRoeDoJO7wthSLq42EjA==
-
-simple-swizzle@^0.2.2:
-  version "0.2.2"
-  resolved "https://registry.yarnpkg.com/simple-swizzle/-/simple-swizzle-0.2.2.tgz#a4da6b635ffcccca33f70d17cb92592de95e557a"
-  integrity sha1-pNprY1/8zMoz9w0Xy5JZLeleVXo=
-  dependencies:
-    is-arrayish "^0.3.1"
-
-sisteransi@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/sisteransi/-/sisteransi-1.0.5.tgz#134d681297756437cc05ca01370d3a7a571075ed"
-  integrity sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==
-
-slash@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
-  integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
-
-slice-ansi@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b"
-  integrity sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==
-  dependencies:
-    ansi-styles "^4.0.0"
-    astral-regex "^2.0.0"
-    is-fullwidth-code-point "^3.0.0"
-
-snapdragon-node@^2.0.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/snapdragon-node/-/snapdragon-node-2.1.1.tgz#6c175f86ff14bdb0724563e8f3c1b021a286853b"
-  integrity sha512-O27l4xaMYt/RSQ5TR3vpWCAB5Kb/czIcqUFOM/C4fYcLnbZUc1PkjTAMjof2pBWaSTwOUd6qUHcFGVGj7aIwnw==
-  dependencies:
-    define-property "^1.0.0"
-    isobject "^3.0.0"
-    snapdragon-util "^3.0.1"
-
-snapdragon-util@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/snapdragon-util/-/snapdragon-util-3.0.1.tgz#f956479486f2acd79700693f6f7b805e45ab56e2"
-  integrity sha512-mbKkMdQKsjX4BAL4bRYTj21edOf8cN7XHdYUJEe+Zn99hVEYcMvKPct1IqNe7+AZPirn8BCDOQBHQZknqmKlZQ==
-  dependencies:
-    kind-of "^3.2.0"
-
-snapdragon@^0.8.1:
-  version "0.8.2"
-  resolved "https://registry.yarnpkg.com/snapdragon/-/snapdragon-0.8.2.tgz#64922e7c565b0e14204ba1aa7d6964278d25182d"
-  integrity sha512-FtyOnWN/wCHTVXOMwvSv26d+ko5vWlIDD6zoUJ7LW8vh+ZBC8QdljveRP+crNrtBwioEUWy/4dMtbBjA4ioNlg==
-  dependencies:
-    base "^0.11.1"
-    debug "^2.2.0"
-    define-property "^0.2.5"
-    extend-shallow "^2.0.1"
-    map-cache "^0.2.2"
-    source-map "^0.5.6"
-    source-map-resolve "^0.5.0"
-    use "^3.1.0"
-
-sockjs-client@^1.5.0:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/sockjs-client/-/sockjs-client-1.5.0.tgz#2f8ff5d4b659e0d092f7aba0b7c386bd2aa20add"
-  integrity sha512-8Dt3BDi4FYNrCFGTL/HtwVzkARrENdwOUf1ZoW/9p3M8lZdFT35jVdrHza+qgxuG9H3/shR4cuX/X9umUrjP8Q==
-  dependencies:
-    debug "^3.2.6"
-    eventsource "^1.0.7"
-    faye-websocket "^0.11.3"
-    inherits "^2.0.4"
-    json3 "^3.3.3"
-    url-parse "^1.4.7"
-
-sockjs@^0.3.21:
-  version "0.3.21"
-  resolved "https://registry.yarnpkg.com/sockjs/-/sockjs-0.3.21.tgz#b34ffb98e796930b60a0cfa11904d6a339a7d417"
-  integrity sha512-DhbPFGpxjc6Z3I+uX07Id5ZO2XwYsWOrYjaSeieES78cq+JaJvVe5q/m1uvjIQhXinhIeCFRH6JgXe+mvVMyXw==
-  dependencies:
-    faye-websocket "^0.11.3"
-    uuid "^3.4.0"
-    websocket-driver "^0.7.4"
-
-sort-keys@^1.0.0:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-1.1.2.tgz#441b6d4d346798f1b4e49e8920adfba0e543f9ad"
-  integrity sha1-RBttTTRnmPG05J6JIK37oOVD+a0=
-  dependencies:
-    is-plain-obj "^1.0.0"
-
-source-list-map@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/source-list-map/-/source-list-map-2.0.1.tgz#3993bd873bfc48479cca9ea3a547835c7c154b34"
-  integrity sha512-qnQ7gVMxGNxsiL4lEuJwe/To8UnK7fAnmbGEEH8RpLouuKbeEm0lhbQVFIrNSuB+G7tVrAlVsZgETT5nljf+Iw==
-
-source-map-resolve@^0.5.0, source-map-resolve@^0.5.2:
-  version "0.5.3"
-  resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a"
-  integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw==
-  dependencies:
-    atob "^2.1.2"
-    decode-uri-component "^0.2.0"
-    resolve-url "^0.2.1"
-    source-map-url "^0.4.0"
-    urix "^0.1.0"
-
-source-map-resolve@^0.6.0:
-  version "0.6.0"
-  resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.6.0.tgz#3d9df87e236b53f16d01e58150fc7711138e5ed2"
-  integrity sha512-KXBr9d/fO/bWo97NXsPIAW1bFSBOuCnjbNTBMO7N59hsv5i9yzRDfcYwwt0l04+VqnKC+EwzvJZIP/qkuMgR/w==
-  dependencies:
-    atob "^2.1.2"
-    decode-uri-component "^0.2.0"
-
-source-map-support@^0.5.6, source-map-support@~0.5.12, source-map-support@~0.5.19:
-  version "0.5.19"
-  resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
-  integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
-  dependencies:
-    buffer-from "^1.0.0"
-    source-map "^0.6.0"
-
-source-map-url@^0.4.0:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/source-map-url/-/source-map-url-0.4.1.tgz#0af66605a745a5a2f91cf1bbf8a7afbc283dec56"
-  integrity sha512-cPiFOTLUKvJFIg4SKVScy4ilPPW6rFgMgfuZJPNoDuMs3nC1HbMUycBoJw77xFIp6z1UJQJOfx6C9GMH80DiTw==
-
-source-map@0.6.1, source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.0, source-map@~0.6.1:
-  version "0.6.1"
-  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
-  integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
-
-source-map@^0.5.0, source-map@^0.5.6, source-map@^0.5.7:
-  version "0.5.7"
-  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"
-  integrity sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=
-
-source-map@^0.7.3, source-map@~0.7.2:
-  version "0.7.3"
-  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383"
-  integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==
-
-sourcemap-codec@^1.4.4:
-  version "1.4.8"
-  resolved "https://registry.yarnpkg.com/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz#ea804bd94857402e6992d05a38ef1ae35a9ab4c4"
-  integrity sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==
-
-spdx-correct@^3.0.0:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.1.1.tgz#dece81ac9c1e6713e5f7d1b6f17d468fa53d89a9"
-  integrity sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==
-  dependencies:
-    spdx-expression-parse "^3.0.0"
-    spdx-license-ids "^3.0.0"
-
-spdx-exceptions@^2.1.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz#3f28ce1a77a00372683eade4a433183527a2163d"
-  integrity sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==
-
-spdx-expression-parse@^3.0.0:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz#cf70f50482eefdc98e3ce0a6833e4a53ceeba679"
-  integrity sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==
-  dependencies:
-    spdx-exceptions "^2.1.0"
-    spdx-license-ids "^3.0.0"
-
-spdx-license-ids@^3.0.0:
-  version "3.0.7"
-  resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.7.tgz#e9c18a410e5ed7e12442a549fbd8afa767038d65"
-  integrity sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==
-
-spdy-transport@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/spdy-transport/-/spdy-transport-3.0.0.tgz#00d4863a6400ad75df93361a1608605e5dcdcf31"
-  integrity sha512-hsLVFE5SjA6TCisWeJXFKniGGOpBgMLmerfO2aCyCU5s7nJ/rpAepqmFifv/GCbSbueEeAJJnmSQ2rKC/g8Fcw==
-  dependencies:
-    debug "^4.1.0"
-    detect-node "^2.0.4"
-    hpack.js "^2.1.6"
-    obuf "^1.1.2"
-    readable-stream "^3.0.6"
-    wbuf "^1.7.3"
-
-spdy@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/spdy/-/spdy-4.0.2.tgz#b74f466203a3eda452c02492b91fb9e84a27677b"
-  integrity sha512-r46gZQZQV+Kl9oItvl1JZZqJKGr+oEkB08A6BzkiR7593/7IbtuncXHd2YoYeTsG4157ZssMu9KYvUHLcjcDoA==
-  dependencies:
-    debug "^4.1.0"
-    handle-thing "^2.0.0"
-    http-deceiver "^1.2.7"
-    select-hose "^2.0.0"
-    spdy-transport "^3.0.0"
-
-split-string@^3.0.1, split-string@^3.0.2:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2"
-  integrity sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw==
-  dependencies:
-    extend-shallow "^3.0.0"
-
-sprintf-js@~1.0.2:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
-  integrity sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=
-
-sshpk@^1.7.0:
-  version "1.16.1"
-  resolved "https://registry.yarnpkg.com/sshpk/-/sshpk-1.16.1.tgz#fb661c0bef29b39db40769ee39fa70093d6f6877"
-  integrity sha512-HXXqVUq7+pcKeLqqZj6mHFUMvXtOJt1uoUx09pFW6011inTMxqI8BA8PM95myrIyyKwdnzjdFjLiE6KBPVtJIg==
-  dependencies:
-    asn1 "~0.2.3"
-    assert-plus "^1.0.0"
-    bcrypt-pbkdf "^1.0.0"
-    dashdash "^1.12.0"
-    ecc-jsbn "~0.1.1"
-    getpass "^0.1.1"
-    jsbn "~0.1.0"
-    safer-buffer "^2.0.2"
-    tweetnacl "~0.14.0"
-
-ssri@^6.0.1:
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.1.tgz#2a3c41b28dd45b62b63676ecb74001265ae9edd8"
-  integrity sha512-3Wge10hNcT1Kur4PDFwEieXSCMCJs/7WvSACcrMYrNp+b8kDL1/0wJch5Ni2WrtwEa2IO8OsVfeKIciKCDx/QA==
-  dependencies:
-    figgy-pudding "^3.5.1"
-
-ssri@^8.0.0:
-  version "8.0.1"
-  resolved "https://registry.yarnpkg.com/ssri/-/ssri-8.0.1.tgz#638e4e439e2ffbd2cd289776d5ca457c4f51a2af"
-  integrity sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==
-  dependencies:
-    minipass "^3.1.1"
-
-stable@^0.1.8:
-  version "0.1.8"
-  resolved "https://registry.yarnpkg.com/stable/-/stable-0.1.8.tgz#836eb3c8382fe2936feaf544631017ce7d47a3cf"
-  integrity sha512-ji9qxRnOVfcuLDySj9qzhGSEFVobyt1kIOSkj1qZzYLzq7Tos/oUUWvotUPQLlrsidqsK6tBH89Bc9kL5zHA6w==
-
-stack-utils@^2.0.2:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-2.0.3.tgz#cd5f030126ff116b78ccb3c027fe302713b61277"
-  integrity sha512-gL//fkxfWUsIlFL2Tl42Cl6+HFALEaB1FU76I/Fy+oZjRreP7OPMXFlGbxM7NQsI0ZpUfw76sHnv0WNYuTb7Iw==
-  dependencies:
-    escape-string-regexp "^2.0.0"
-
-stackframe@^1.1.1:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/stackframe/-/stackframe-1.2.0.tgz#52429492d63c62eb989804c11552e3d22e779303"
-  integrity sha512-GrdeshiRmS1YLMYgzF16olf2jJ/IzxXY9lhKOskuVziubpTYcYqyOwYeJKzQkwy7uN0fYSsbsC4RQaXf9LCrYA==
-
-static-extend@^0.1.1:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6"
-  integrity sha1-YICcOcv/VTNyJv1eC1IPNB8ftcY=
-  dependencies:
-    define-property "^0.2.5"
-    object-copy "^0.1.0"
-
-"statuses@>= 1.4.0 < 2", "statuses@>= 1.5.0 < 2", statuses@~1.5.0:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.5.0.tgz#161c7dac177659fd9811f43771fa99381478628c"
-  integrity sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=
-
-stealthy-require@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
-  integrity sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=
-
-stream-browserify@^2.0.1:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.2.tgz#87521d38a44aa7ee91ce1cd2a47df0cb49dd660b"
-  integrity sha512-nX6hmklHs/gr2FuxYDltq8fJA1GDlxKQCz8O/IM4atRqBH8OORmBNgfvW5gG10GT/qQ9u0CzIvr2X5Pkt6ntqg==
-  dependencies:
-    inherits "~2.0.1"
-    readable-stream "^2.0.2"
-
-stream-each@^1.1.0:
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/stream-each/-/stream-each-1.2.3.tgz#ebe27a0c389b04fbcc233642952e10731afa9bae"
-  integrity sha512-vlMC2f8I2u/bZGqkdfLQW/13Zihpej/7PmSiMQsbYddxuTsJp8vRe2x2FvVExZg7FaOds43ROAuFJwPR4MTZLw==
-  dependencies:
-    end-of-stream "^1.1.0"
-    stream-shift "^1.0.0"
-
-stream-http@^2.7.2:
-  version "2.8.3"
-  resolved "https://registry.yarnpkg.com/stream-http/-/stream-http-2.8.3.tgz#b2d242469288a5a27ec4fe8933acf623de6514fc"
-  integrity sha512-+TSkfINHDo4J+ZobQLWiMouQYB+UVYFttRA94FpEzzJ7ZdqcL4uUUQ7WkdkI4DSozGmgBUE/a47L+38PenXhUw==
-  dependencies:
-    builtin-status-codes "^3.0.0"
-    inherits "^2.0.1"
-    readable-stream "^2.3.6"
-    to-arraybuffer "^1.0.0"
-    xtend "^4.0.0"
-
-stream-shift@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.1.tgz#d7088281559ab2778424279b0877da3c392d5a3d"
-  integrity sha512-AiisoFqQ0vbGcZgQPY1cdP2I76glaVA/RauYR4G4thNFgkTqr90yXTo4LYX60Jl+sIlPNHHdGSwo01AvbKUSVQ==
-
-strict-uri-encode@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz#279b225df1d582b1f54e65addd4352e18faa0713"
-  integrity sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM=
-
-string-length@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/string-length/-/string-length-4.0.1.tgz#4a973bf31ef77c4edbceadd6af2611996985f8a1"
-  integrity sha512-PKyXUd0LK0ePjSOnWn34V2uD6acUWev9uy0Ft05k0E8xRW+SKcA0F7eMr7h5xlzfn+4O3N+55rduYyet3Jk+jw==
-  dependencies:
-    char-regex "^1.0.2"
-    strip-ansi "^6.0.0"
-
-string-natural-compare@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/string-natural-compare/-/string-natural-compare-3.0.1.tgz#7a42d58474454963759e8e8b7ae63d71c1e7fdf4"
-  integrity sha512-n3sPwynL1nwKi3WJ6AIsClwBMa0zTi54fn2oLU6ndfTSIO05xaznjSf15PcBZU6FNWbmN5Q6cxT4V5hGvB4taw==
-
-string-width@^3.0.0, string-width@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/string-width/-/string-width-3.1.0.tgz#22767be21b62af1081574306f69ac51b62203961"
-  integrity sha512-vafcv6KjVZKSgz06oM/H6GDBrAtz8vdhQakGjFIvNrHA6y3HCF1CInLy+QLq8dTJPQ1b+KDUqDFctkdRW44e1w==
-  dependencies:
-    emoji-regex "^7.0.1"
-    is-fullwidth-code-point "^2.0.0"
-    strip-ansi "^5.1.0"
-
-string-width@^4.1.0, string-width@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.0.tgz#952182c46cc7b2c313d1596e623992bd163b72b5"
-  integrity sha512-zUz5JD+tgqtuDjMhwIg5uFVV3dtqZ9yQJlZVfq4I01/K5Paj5UHj7VyrQOJvzawSVlKpObApbfD0Ed6yJc+1eg==
-  dependencies:
-    emoji-regex "^8.0.0"
-    is-fullwidth-code-point "^3.0.0"
-    strip-ansi "^6.0.0"
-
-string.prototype.matchall@^4.0.2:
-  version "4.0.4"
-  resolved "https://registry.yarnpkg.com/string.prototype.matchall/-/string.prototype.matchall-4.0.4.tgz#608f255e93e072107f5de066f81a2dfb78cf6b29"
-  integrity sha512-pknFIWVachNcyqRfaQSeu/FUfpvJTe4uskUSZ9Wc1RijsPuzbZ8TyYT8WCNnntCjUEqQ3vUHMAfVj2+wLAisPQ==
-  dependencies:
-    call-bind "^1.0.2"
-    define-properties "^1.1.3"
-    es-abstract "^1.18.0-next.2"
-    has-symbols "^1.0.1"
-    internal-slot "^1.0.3"
-    regexp.prototype.flags "^1.3.1"
-    side-channel "^1.0.4"
-
-string.prototype.trimend@^1.0.1, string.prototype.trimend@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.3.tgz#a22bd53cca5c7cf44d7c9d5c732118873d6cd18b"
-  integrity sha512-ayH0pB+uf0U28CtjlLvL7NaohvR1amUvVZk+y3DYb0Ey2PUV5zPkkKy9+U1ndVEIXO8hNg18eIv9Jntbii+dKw==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-
-string.prototype.trimstart@^1.0.1, string.prototype.trimstart@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/string.prototype.trimstart/-/string.prototype.trimstart-1.0.3.tgz#9b4cb590e123bb36564401d59824298de50fd5aa"
-  integrity sha512-oBIBUy5lea5tt0ovtOFiEQaBkoBBkyJhZXzJYrSmDo5IUUqbOPvVezuRs/agBIdZ2p2Eo1FD6bD9USyBLfl3xg==
-  dependencies:
-    call-bind "^1.0.0"
-    define-properties "^1.1.3"
-
-string_decoder@^1.0.0, string_decoder@^1.1.1:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
-  integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
-  dependencies:
-    safe-buffer "~5.2.0"
-
-string_decoder@~1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
-  integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==
-  dependencies:
-    safe-buffer "~5.1.0"
-
-stringify-object@^3.3.0:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/stringify-object/-/stringify-object-3.3.0.tgz#703065aefca19300d3ce88af4f5b3956d7556629"
-  integrity sha512-rHqiFh1elqCQ9WPLIC8I0Q/g/wj5J1eMkyoiD6eoQApWHP0FtlK7rqnhmabL5VUY9JQCcqwwvlOaSuutekgyrw==
-  dependencies:
-    get-own-enumerable-property-symbols "^3.0.0"
-    is-obj "^1.0.1"
-    is-regexp "^1.0.0"
-
-strip-ansi@6.0.0, strip-ansi@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.0.tgz#0b1571dd7669ccd4f3e06e14ef1eed26225ae532"
-  integrity sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==
-  dependencies:
-    ansi-regex "^5.0.0"
-
-strip-ansi@^3.0.0, strip-ansi@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-3.0.1.tgz#6a385fb8853d952d5ff05d0e8aaf94278dc63dcf"
-  integrity sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=
-  dependencies:
-    ansi-regex "^2.0.0"
-
-strip-ansi@^5.0.0, strip-ansi@^5.1.0, strip-ansi@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-5.2.0.tgz#8c9a536feb6afc962bdfa5b104a5091c1ad9c0ae"
-  integrity sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==
-  dependencies:
-    ansi-regex "^4.1.0"
-
-strip-bom@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3"
-  integrity sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=
-
-strip-bom@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-4.0.0.tgz#9c3505c1db45bcedca3d9cf7a16f5c5aa3901878"
-  integrity sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==
-
-strip-comments@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/strip-comments/-/strip-comments-1.0.2.tgz#82b9c45e7f05873bee53f37168af930aa368679d"
-  integrity sha512-kL97alc47hoyIQSV165tTt9rG5dn4w1dNnBhOQ3bOU1Nc1hel09jnXANaHJ7vzHLd4Ju8kseDGzlev96pghLFw==
-  dependencies:
-    babel-extract-comments "^1.0.0"
-    babel-plugin-transform-object-rest-spread "^6.26.0"
-
-strip-eof@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/strip-eof/-/strip-eof-1.0.0.tgz#bb43ff5598a6eb05d89b59fcd129c983313606bf"
-  integrity sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=
-
-strip-final-newline@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
-  integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
-
-strip-indent@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001"
-  integrity sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==
-  dependencies:
-    min-indent "^1.0.0"
-
-strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
-  integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
-
-style-loader@1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/style-loader/-/style-loader-1.3.0.tgz#828b4a3b3b7e7aa5847ce7bae9e874512114249e"
-  integrity sha512-V7TCORko8rs9rIqkSrlMfkqA63DfoGBBJmK1kKGCcSi+BWb4cqz0SRsnp4l6rU5iwOEd0/2ePv68SV22VXon4Q==
-  dependencies:
-    loader-utils "^2.0.0"
-    schema-utils "^2.7.0"
-
-style-value-types@4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/style-value-types/-/style-value-types-4.0.3.tgz#3e2e46c50e876757cba02f442c8a0b0dd970c118"
-  integrity sha512-Yk2kpwC88W2HRlJXegWlT0pfLzjKWMjj8DI4s6m2VsZsL1Ht2oUyHl1EgTYIRlFiAnC4rBSQO+EEn0YiYAxQDw==
-  dependencies:
-    hey-listen "^1.0.8"
-    tslib "^1.10.0"
-
-stylehacks@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/stylehacks/-/stylehacks-4.0.3.tgz#6718fcaf4d1e07d8a1318690881e8d96726a71d5"
-  integrity sha512-7GlLk9JwlElY4Y6a/rmbH2MhVlTyVmiJd1PfTCqFaIBEGMYNsrO/v3SeGTdhBThLg4Z+NbOk/qFMwCa+J+3p/g==
-  dependencies:
-    browserslist "^4.0.0"
-    postcss "^7.0.0"
-    postcss-selector-parser "^3.0.0"
-
-stylis@^4.0.3:
-  version "4.0.7"
-  resolved "https://registry.yarnpkg.com/stylis/-/stylis-4.0.7.tgz#412a90c28079417f3d27c028035095e4232d2904"
-  integrity sha512-OFFeUXFgwnGOKvEXaSv0D0KQ5ADP0n6g3SVONx6I/85JzNZ3u50FRwB3lVIk1QO2HNdI75tbVzc4Z66Gdp9voA==
-
-supports-color@^5.3.0:
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
-  integrity sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==
-  dependencies:
-    has-flag "^3.0.0"
-
-supports-color@^6.1.0:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-6.1.0.tgz#0764abc69c63d5ac842dd4867e8d025e880df8f3"
-  integrity sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==
-  dependencies:
-    has-flag "^3.0.0"
-
-supports-color@^7.0.0, supports-color@^7.1.0:
-  version "7.2.0"
-  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
-  integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
-  dependencies:
-    has-flag "^4.0.0"
-
-supports-hyperlinks@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/supports-hyperlinks/-/supports-hyperlinks-2.1.0.tgz#f663df252af5f37c5d49bbd7eeefa9e0b9e59e47"
-  integrity sha512-zoE5/e+dnEijk6ASB6/qrK+oYdm2do1hjoLWrqUC/8WEIW1gbxFcKuBof7sW8ArN6e+AYvsE8HBGiVRWL/F5CA==
-  dependencies:
-    has-flag "^4.0.0"
-    supports-color "^7.0.0"
-
-svg-parser@^2.0.2:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/svg-parser/-/svg-parser-2.0.4.tgz#fdc2e29e13951736140b76cb122c8ee6630eb6b5"
-  integrity sha512-e4hG1hRwoOdRb37cIMSgzNsxyzKfayW6VOflrwvR+/bzrkyxY/31WkbgnQpgtrNp1SdpJvpUAGTa/ZoiPNDuRQ==
-
-svgo@^1.0.0, svgo@^1.2.2:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/svgo/-/svgo-1.3.2.tgz#b6dc511c063346c9e415b81e43401145b96d4167"
-  integrity sha512-yhy/sQYxR5BkC98CY7o31VGsg014AKLEPxdfhora76l36hD9Rdy5NZA/Ocn6yayNPgSamYdtX2rFJdcv07AYVw==
-  dependencies:
-    chalk "^2.4.1"
-    coa "^2.0.2"
-    css-select "^2.0.0"
-    css-select-base-adapter "^0.1.1"
-    css-tree "1.0.0-alpha.37"
-    csso "^4.0.2"
-    js-yaml "^3.13.1"
-    mkdirp "~0.5.1"
-    object.values "^1.1.0"
-    sax "~1.2.4"
-    stable "^0.1.8"
-    unquote "~1.1.1"
-    util.promisify "~1.0.0"
-
-symbol-tree@^3.2.4:
-  version "3.2.4"
-  resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.4.tgz#430637d248ba77e078883951fb9aa0eed7c63fa2"
-  integrity sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==
-
-table@^6.0.4:
-  version "6.0.7"
-  resolved "https://registry.yarnpkg.com/table/-/table-6.0.7.tgz#e45897ffbcc1bcf9e8a87bf420f2c9e5a7a52a34"
-  integrity sha512-rxZevLGTUzWna/qBLObOe16kB2RTnnbhciwgPbMMlazz1yZGVEgnZK762xyVdVznhqxrfCeBMmMkgOOaPwjH7g==
-  dependencies:
-    ajv "^7.0.2"
-    lodash "^4.17.20"
-    slice-ansi "^4.0.0"
-    string-width "^4.2.0"
-
-tapable@^1.0.0, tapable@^1.1.3:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2"
-  integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA==
-
-tar@^6.0.2:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83"
-  integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA==
-  dependencies:
-    chownr "^2.0.0"
-    fs-minipass "^2.0.0"
-    minipass "^3.0.0"
-    minizlib "^2.1.1"
-    mkdirp "^1.0.3"
-    yallist "^4.0.0"
-
-temp-dir@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d"
-  integrity sha1-CnwOom06Oa+n4OvqnB/AvE2qAR0=
-
-tempy@^0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/tempy/-/tempy-0.3.0.tgz#6f6c5b295695a16130996ad5ab01a8bd726e8bf8"
-  integrity sha512-WrH/pui8YCwmeiAoxV+lpRH9HpRtgBhSR2ViBPgpGb/wnYDzp21R4MN45fsCGvLROvY67o3byhJRYRONJyImVQ==
-  dependencies:
-    temp-dir "^1.0.0"
-    type-fest "^0.3.1"
-    unique-string "^1.0.0"
-
-terminal-link@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/terminal-link/-/terminal-link-2.1.1.tgz#14a64a27ab3c0df933ea546fba55f2d078edc994"
-  integrity sha512-un0FmiRUQNr5PJqy9kP7c40F5BOfpGlYTrxonDChEZB7pzZxRNp/bt+ymiy9/npwXya9KH99nJ/GXFIiUkYGFQ==
-  dependencies:
-    ansi-escapes "^4.2.1"
-    supports-hyperlinks "^2.0.0"
-
-terser-webpack-plugin@4.2.3:
-  version "4.2.3"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-4.2.3.tgz#28daef4a83bd17c1db0297070adc07fc8cfc6a9a"
-  integrity sha512-jTgXh40RnvOrLQNgIkwEKnQ8rmHjHK4u+6UBEi+W+FPmvb+uo+chJXntKe7/3lW5mNysgSWD60KyesnhW8D6MQ==
-  dependencies:
-    cacache "^15.0.5"
-    find-cache-dir "^3.3.1"
-    jest-worker "^26.5.0"
-    p-limit "^3.0.2"
-    schema-utils "^3.0.0"
-    serialize-javascript "^5.0.1"
-    source-map "^0.6.1"
-    terser "^5.3.4"
-    webpack-sources "^1.4.3"
-
-terser-webpack-plugin@^1.4.3:
-  version "1.4.5"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-1.4.5.tgz#a217aefaea330e734ffacb6120ec1fa312d6040b"
-  integrity sha512-04Rfe496lN8EYruwi6oPQkG0vo8C+HT49X687FZnpPF0qMAIHONI6HEXYPKDOE8e5HjXTyKfqRd/agHtH0kOtw==
-  dependencies:
-    cacache "^12.0.2"
-    find-cache-dir "^2.1.0"
-    is-wsl "^1.1.0"
-    schema-utils "^1.0.0"
-    serialize-javascript "^4.0.0"
-    source-map "^0.6.1"
-    terser "^4.1.2"
-    webpack-sources "^1.4.0"
-    worker-farm "^1.7.0"
-
-terser@^4.1.2, terser@^4.6.2, terser@^4.6.3:
-  version "4.8.0"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.0.tgz#63056343d7c70bb29f3af665865a46fe03a0df17"
-  integrity sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==
-  dependencies:
-    commander "^2.20.0"
-    source-map "~0.6.1"
-    source-map-support "~0.5.12"
-
-terser@^5.3.4:
-  version "5.6.0"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-5.6.0.tgz#138cdf21c5e3100b1b3ddfddf720962f88badcd2"
-  integrity sha512-vyqLMoqadC1uR0vywqOZzriDYzgEkNJFK4q9GeyOBHIbiECHiWLKcWfbQWAUaPfxkjDhapSlZB9f7fkMrvkVjA==
-  dependencies:
-    commander "^2.20.0"
-    source-map "~0.7.2"
-    source-map-support "~0.5.19"
-
-test-exclude@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/test-exclude/-/test-exclude-6.0.0.tgz#04a8698661d805ea6fa293b6cb9e63ac044ef15e"
-  integrity sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==
-  dependencies:
-    "@istanbuljs/schema" "^0.1.2"
-    glob "^7.1.4"
-    minimatch "^3.0.4"
-
-text-table@0.2.0, text-table@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4"
-  integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=
-
-throat@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/throat/-/throat-5.0.0.tgz#c5199235803aad18754a667d659b5e72ce16764b"
-  integrity sha512-fcwX4mndzpLQKBS1DVYhGAcYaYt7vsHNIvQV+WXMvnow5cgjPphq5CaayLaGsjRdSCKZFNGt7/GYAuXaNOiYCA==
-
-through2@^2.0.0:
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/through2/-/through2-2.0.5.tgz#01c1e39eb31d07cb7d03a96a70823260b23132cd"
-  integrity sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==
-  dependencies:
-    readable-stream "~2.3.6"
-    xtend "~4.0.1"
-
-thunky@^1.0.2:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d"
-  integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA==
-
-timers-browserify@^2.0.4:
-  version "2.0.12"
-  resolved "https://registry.yarnpkg.com/timers-browserify/-/timers-browserify-2.0.12.tgz#44a45c11fbf407f34f97bccd1577c652361b00ee"
-  integrity sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==
-  dependencies:
-    setimmediate "^1.0.4"
-
-timsort@^0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4"
-  integrity sha1-QFQRqOfmM5/mTbmiNN4R3DHgK9Q=
-
-tiny-invariant@^1.0.2, tiny-invariant@^1.0.6:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.1.0.tgz#634c5f8efdc27714b7f386c35e6760991d230875"
-  integrity sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw==
-
-tiny-warning@^1.0.0, tiny-warning@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/tiny-warning/-/tiny-warning-1.0.3.tgz#94a30db453df4c643d0fd566060d60a875d84754"
-  integrity sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==
-
-tinycolor2@1.4.2:
-  version "1.4.2"
-  resolved "https://registry.yarnpkg.com/tinycolor2/-/tinycolor2-1.4.2.tgz#3f6a4d1071ad07676d7fa472e1fac40a719d8803"
-  integrity sha512-vJhccZPs965sV/L2sU4oRQVAos0pQXwsvTLkWYdqJ+a8Q5kPFzJTuOFwy7UniPli44NKQGAglksjvOcpo95aZA==
-
-tmpl@1.0.x:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.4.tgz#23640dd7b42d00433911140820e5cf440e521dd1"
-  integrity sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=
-
-to-arraybuffer@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/to-arraybuffer/-/to-arraybuffer-1.0.1.tgz#7d229b1fcc637e466ca081180836a7aabff83f43"
-  integrity sha1-fSKbH8xjfkZsoIEYCDanqr/4P0M=
-
-to-fast-properties@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e"
-  integrity sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=
-
-to-object-path@^0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/to-object-path/-/to-object-path-0.3.0.tgz#297588b7b0e7e0ac08e04e672f85c1f4999e17af"
-  integrity sha1-KXWIt7Dn4KwI4E5nL4XB9JmeF68=
-  dependencies:
-    kind-of "^3.0.2"
-
-to-regex-range@^2.1.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-2.1.1.tgz#7c80c17b9dfebe599e27367e0d4dd5590141db38"
-  integrity sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg=
-  dependencies:
-    is-number "^3.0.0"
-    repeat-string "^1.6.1"
-
-to-regex-range@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
-  integrity sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==
-  dependencies:
-    is-number "^7.0.0"
-
-to-regex@^3.0.1, to-regex@^3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/to-regex/-/to-regex-3.0.2.tgz#13cfdd9b336552f30b51f33a8ae1b42a7a7599ce"
-  integrity sha512-FWtleNAtZ/Ki2qtqej2CXTOayOH9bHDQF+Q48VpWyDXjbYxA4Yz8iDB31zXOBUlOHHKidDbqGVrTUvQMPmBGBw==
-  dependencies:
-    define-property "^2.0.2"
-    extend-shallow "^3.0.2"
-    regex-not "^1.0.2"
-    safe-regex "^1.1.0"
-
-toggle-selection@^1.0.6:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/toggle-selection/-/toggle-selection-1.0.6.tgz#6e45b1263f2017fa0acc7d89d78b15b8bf77da32"
-  integrity sha1-bkWxJj8gF/oKzH2J14sVuL932jI=
-
-toidentifier@1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/toidentifier/-/toidentifier-1.0.0.tgz#7e1be3470f1e77948bc43d94a3c8f4d7752ba553"
-  integrity sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==
-
-tough-cookie@^2.3.3, tough-cookie@~2.5.0:
-  version "2.5.0"
-  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
-  integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
-  dependencies:
-    psl "^1.1.28"
-    punycode "^2.1.1"
-
-tough-cookie@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-3.0.1.tgz#9df4f57e739c26930a018184887f4adb7dca73b2"
-  integrity sha512-yQyJ0u4pZsv9D4clxO69OEjLWYw+jbgspjTue4lTQZLfV0c5l1VmK2y1JK8E9ahdpltPOaAThPcp5nKPUgSnsg==
-  dependencies:
-    ip-regex "^2.1.0"
-    psl "^1.1.28"
-    punycode "^2.1.1"
-
-tr46@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.0.2.tgz#03273586def1595ae08fedb38d7733cee91d2479"
-  integrity sha512-3n1qG+/5kg+jrbTzwAykB5yRYtQCTqOGKq5U5PE3b0a1/mzo6snDhjGS0zJVJunO0NrT3Dg1MLy5TjWP/UJppg==
-  dependencies:
-    punycode "^2.1.1"
-
-tryer@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/tryer/-/tryer-1.0.1.tgz#f2c85406800b9b0f74c9f7465b81eaad241252f8"
-  integrity sha512-c3zayb8/kWWpycWYg87P71E1S1ZL6b6IJxfb5fvsUgsf0S2MVGaDhDXXjDMpdCpfWXqptc+4mXwmiy1ypXqRAA==
-
-ts-pnp@1.2.0, ts-pnp@^1.1.6:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/ts-pnp/-/ts-pnp-1.2.0.tgz#a500ad084b0798f1c3071af391e65912c86bca92"
-  integrity sha512-csd+vJOb/gkzvcCHgTGSChYpy5f1/XKNsmvBGO4JXS+z1v2HobugDz4s1IeFXM3wZB44uczs+eazB5Q/ccdhQw==
-
-tsconfig-paths@^3.9.0:
-  version "3.9.0"
-  resolved "https://registry.yarnpkg.com/tsconfig-paths/-/tsconfig-paths-3.9.0.tgz#098547a6c4448807e8fcb8eae081064ee9a3c90b"
-  integrity sha512-dRcuzokWhajtZWkQsDVKbWyY+jgcLC5sqJhg2PSgf4ZkH2aHPvaOY8YWGhmjb68b5qqTfasSsDO9k7RUiEmZAw==
-  dependencies:
-    "@types/json5" "^0.0.29"
-    json5 "^1.0.1"
-    minimist "^1.2.0"
-    strip-bom "^3.0.0"
-
-tslib@^1.0.0, tslib@^1.10.0, tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
-  version "1.14.1"
-  resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
-  integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
-
-tslib@^2.0.0, tslib@^2.0.3:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
-  integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==
-
-tsutils@^3.17.1:
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.20.0.tgz#ea03ea45462e146b53d70ce0893de453ff24f698"
-  integrity sha512-RYbuQuvkhuqVeXweWT3tJLKOEJ/UUw9GjNEZGWdrLLlM+611o1gwLHBpxoFJKKl25fLprp2eVthtKs5JOrNeXg==
-  dependencies:
-    tslib "^1.8.1"
-
-tty-browserify@0.0.0:
-  version "0.0.0"
-  resolved "https://registry.yarnpkg.com/tty-browserify/-/tty-browserify-0.0.0.tgz#a157ba402da24e9bf957f9aa69d524eed42901a6"
-  integrity sha1-oVe6QC2iTpv5V/mqadUk7tQpAaY=
-
-tunnel-agent@^0.6.0:
-  version "0.6.0"
-  resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
-  integrity sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=
-  dependencies:
-    safe-buffer "^5.0.1"
-
-tweetnacl@^0.14.3, tweetnacl@~0.14.0:
-  version "0.14.5"
-  resolved "https://registry.yarnpkg.com/tweetnacl/-/tweetnacl-0.14.5.tgz#5ae68177f192d4456269d108afa93ff8743f4f64"
-  integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=
-
-type-check@^0.4.0, type-check@~0.4.0:
-  version "0.4.0"
-  resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"
-  integrity sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==
-  dependencies:
-    prelude-ls "^1.2.1"
-
-type-check@~0.3.2:
-  version "0.3.2"
-  resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.3.2.tgz#5884cab512cf1d355e3fb784f30804b2b520db72"
-  integrity sha1-WITKtRLPHTVeP7eE8wgEsrUg23I=
-  dependencies:
-    prelude-ls "~1.1.2"
-
-type-detect@4.0.8:
-  version "4.0.8"
-  resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c"
-  integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==
-
-type-fest@^0.11.0:
-  version "0.11.0"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.11.0.tgz#97abf0872310fed88a5c466b25681576145e33f1"
-  integrity sha512-OdjXJxnCN1AvyLSzeKIgXTXxV+99ZuXl3Hpo9XpJAv9MBcHrrJOQ5kV7ypXOuQie+AmWG25hLbiKdwYTifzcfQ==
-
-type-fest@^0.3.1:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.3.1.tgz#63d00d204e059474fe5e1b7c011112bbd1dc29e1"
-  integrity sha512-cUGJnCdr4STbePCgqNFbpVNCepa+kAVohJs1sLhxzdH+gnEoOd8VhbYa7pD3zZYGiURWM2xzEII3fQcRizDkYQ==
-
-type-fest@^0.6.0:
-  version "0.6.0"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.6.0.tgz#8d2a2370d3df886eb5c90ada1c5bf6188acf838b"
-  integrity sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg==
-
-type-fest@^0.8.1:
-  version "0.8.1"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
-  integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==
-
-type-is@~1.6.17, type-is@~1.6.18:
-  version "1.6.18"
-  resolved "https://registry.yarnpkg.com/type-is/-/type-is-1.6.18.tgz#4e552cd05df09467dcbc4ef739de89f2cf37c131"
-  integrity sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==
-  dependencies:
-    media-typer "0.3.0"
-    mime-types "~2.1.24"
-
-type@^1.0.1:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/type/-/type-1.2.0.tgz#848dd7698dafa3e54a6c479e759c4bc3f18847a0"
-  integrity sha512-+5nt5AAniqsCnu2cEQQdpzCAh33kVx8n0VoFidKpB1dVVLAN/F+bgVOqOJqOnEnrhp222clB5p3vUlD+1QAnfg==
-
-type@^2.0.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/type/-/type-2.3.0.tgz#ada7c045f07ead08abf9e2edd29be1a0c0661132"
-  integrity sha512-rgPIqOdfK/4J9FhiVrZ3cveAjRRo5rsQBAIhnylX874y1DX/kEKSVdLsnuHB6l1KTjHyU01VjiMBHgU2adejyg==
-
-typedarray-to-buffer@^3.1.5:
-  version "3.1.5"
-  resolved "https://registry.yarnpkg.com/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz#a97ee7a9ff42691b9f783ff1bc5112fe3fca9080"
-  integrity sha512-zdu8XMNEDepKKR+XYOXAVPtWui0ly0NtohUscw+UmaHiAWT8hrV1rr//H6V+0DvJ3OQ19S979M0laLfX8rm82Q==
-  dependencies:
-    is-typedarray "^1.0.0"
-
-typedarray@^0.0.6:
-  version "0.0.6"
-  resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
-  integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
-
-typescript@^4.1.2:
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.2.2.tgz#1450f020618f872db0ea17317d16d8da8ddb8c4c"
-  integrity sha512-tbb+NVrLfnsJy3M59lsDgrzWIflR4d4TIUjz+heUnHZwdF7YsrMTKoRERiIvI2lvBG95dfpLxB21WZhys1bgaQ==
-
-unicode-canonical-property-names-ecmascript@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-1.0.4.tgz#2619800c4c825800efdd8343af7dd9933cbe2818"
-  integrity sha512-jDrNnXWHd4oHiTZnx/ZG7gtUTVp+gCcTTKr8L0HjlwphROEW3+Him+IpvC+xcJEFegapiMZyZe02CyuOnRmbnQ==
-
-unicode-match-property-ecmascript@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/unicode-match-property-ecmascript/-/unicode-match-property-ecmascript-1.0.4.tgz#8ed2a32569961bce9227d09cd3ffbb8fed5f020c"
-  integrity sha512-L4Qoh15vTfntsn4P1zqnHulG0LdXgjSO035fEpdtp6YxXhMT51Q6vgM5lYdG/5X3MjS+k/Y9Xw4SFCY9IkR0rg==
-  dependencies:
-    unicode-canonical-property-names-ecmascript "^1.0.4"
-    unicode-property-aliases-ecmascript "^1.0.4"
-
-unicode-match-property-value-ecmascript@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/unicode-match-property-value-ecmascript/-/unicode-match-property-value-ecmascript-1.2.0.tgz#0d91f600eeeb3096aa962b1d6fc88876e64ea531"
-  integrity sha512-wjuQHGQVofmSJv1uVISKLE5zO2rNGzM/KCYZch/QQvez7C1hUhBIuZ701fYXExuufJFMPhv2SyL8CyoIfMLbIQ==
-
-unicode-property-aliases-ecmascript@^1.0.4:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/unicode-property-aliases-ecmascript/-/unicode-property-aliases-ecmascript-1.1.0.tgz#dd57a99f6207bedff4628abefb94c50db941c8f4"
-  integrity sha512-PqSoPh/pWetQ2phoj5RLiaqIk4kCNwoV3CI+LfGmWLKI3rE3kl1h59XpX2BjgDrmbxD9ARtQobPGU1SguCYuQg==
-
-union-value@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/union-value/-/union-value-1.0.1.tgz#0b6fe7b835aecda61c6ea4d4f02c14221e109847"
-  integrity sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg==
-  dependencies:
-    arr-union "^3.1.0"
-    get-value "^2.0.6"
-    is-extendable "^0.1.1"
-    set-value "^2.0.1"
-
-uniq@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/uniq/-/uniq-1.0.1.tgz#b31c5ae8254844a3a8281541ce2b04b865a734ff"
-  integrity sha1-sxxa6CVIRKOoKBVBzisEuGWnNP8=
-
-uniqs@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/uniqs/-/uniqs-2.0.0.tgz#ffede4b36b25290696e6e165d4a59edb998e6b02"
-  integrity sha1-/+3ks2slKQaW5uFl1KWe25mOawI=
-
-unique-filename@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/unique-filename/-/unique-filename-1.1.1.tgz#1d69769369ada0583103a1e6ae87681b56573230"
-  integrity sha512-Vmp0jIp2ln35UTXuryvjzkjGdRyf9b2lTXuSYUiPmzRcl3FDtYqAwOnTJkAngD9SWhnoJzDbTKwaOrZ+STtxNQ==
-  dependencies:
-    unique-slug "^2.0.0"
-
-unique-slug@^2.0.0:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/unique-slug/-/unique-slug-2.0.2.tgz#baabce91083fc64e945b0f3ad613e264f7cd4e6c"
-  integrity sha512-zoWr9ObaxALD3DOPfjPSqxt4fnZiWblxHIgeWqW8x7UqDzEtHEQLzji2cuJYQFCU6KmoJikOYAZlrTHHebjx2w==
-  dependencies:
-    imurmurhash "^0.1.4"
-
-unique-string@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/unique-string/-/unique-string-1.0.0.tgz#9e1057cca851abb93398f8b33ae187b99caec11a"
-  integrity sha1-nhBXzKhRq7kzmPizOuGHuZyuwRo=
-  dependencies:
-    crypto-random-string "^1.0.0"
-
-universalify@^0.1.0:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66"
-  integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==
-
-universalify@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.0.tgz#75a4984efedc4b08975c5aeb73f530d02df25717"
-  integrity sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==
-
-unpipe@1.0.0, unpipe@~1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
-  integrity sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=
-
-unquote@~1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/unquote/-/unquote-1.1.1.tgz#8fded7324ec6e88a0ff8b905e7c098cdc086d544"
-  integrity sha1-j97XMk7G6IoP+LkF58CYzcCG1UQ=
-
-unset-value@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/unset-value/-/unset-value-1.0.0.tgz#8376873f7d2335179ffb1e6fc3a8ed0dfc8ab559"
-  integrity sha1-g3aHP30jNRef+x5vw6jtDfyKtVk=
-  dependencies:
-    has-value "^0.3.1"
-    isobject "^3.0.0"
-
-upath@^1.1.1, upath@^1.1.2, upath@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/upath/-/upath-1.2.0.tgz#8f66dbcd55a883acdae4408af8b035a5044c1894"
-  integrity sha512-aZwGpamFO61g3OlfT7OQCHqhGnW43ieH9WZeP7QxN/G/jS4jfqUkZxoryvJgVPEcrl5NL/ggHsSmLMHuH64Lhg==
-
-uri-js@^4.2.2:
-  version "4.4.1"
-  resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e"
-  integrity sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==
-  dependencies:
-    punycode "^2.1.0"
-
-urix@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72"
-  integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=
-
-url-loader@4.1.1:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/url-loader/-/url-loader-4.1.1.tgz#28505e905cae158cf07c92ca622d7f237e70a4e2"
-  integrity sha512-3BTV812+AVHHOJQO8O5MkWgZ5aosP7GnROJwvzLS9hWDj00lZ6Z0wNak423Lp9PBZN05N+Jk/N5Si8jRAlGyWA==
-  dependencies:
-    loader-utils "^2.0.0"
-    mime-types "^2.1.27"
-    schema-utils "^3.0.0"
-
-url-parse@^1.4.3, url-parse@^1.4.7:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.1.tgz#d5fa9890af8a5e1f274a2c98376510f6425f6e3b"
-  integrity sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==
-  dependencies:
-    querystringify "^2.1.1"
-    requires-port "^1.0.0"
-
-url@^0.11.0:
-  version "0.11.0"
-  resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1"
-  integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
-  dependencies:
-    punycode "1.3.2"
-    querystring "0.2.0"
-
-use-callback-ref@^1.2.1, use-callback-ref@^1.2.3:
-  version "1.2.5"
-  resolved "https://registry.yarnpkg.com/use-callback-ref/-/use-callback-ref-1.2.5.tgz#6115ed242cfbaed5915499c0a9842ca2912f38a5"
-  integrity sha512-gN3vgMISAgacF7sqsLPByqoePooY3n2emTH59Ur5d/M8eg4WTWu1xp8i8DHjohftIyEx0S08RiYxbffr4j8Peg==
-
-use-sidecar@^1.0.1:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/use-sidecar/-/use-sidecar-1.0.4.tgz#38398c3723727f9f924bed2343dfa3db6aaaee46"
-  integrity sha512-A5ggIS3/qTdxCAlcy05anO2/oqXOfpmxnpRE1Jm+fHHtCvUvNSZDGqgOSAXPriBVAcw2fMFFkh5v5KqrFFhCMA==
-  dependencies:
-    detect-node-es "^1.0.0"
-    tslib "^1.9.3"
-
-use@^3.1.0:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
-  integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==
-
-util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
-  integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=
-
-util.promisify@1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/util.promisify/-/util.promisify-1.0.0.tgz#440f7165a459c9a16dc145eb8e72f35687097030"
-  integrity sha512-i+6qA2MPhvoKLuxnJNpXAGhg7HphQOSUq2LKMZD0m15EiskXUkMvKdF4Uui0WYeCUGea+o2cw/ZuwehtfsrNkA==
-  dependencies:
-    define-properties "^1.1.2"
-    object.getownpropertydescriptors "^2.0.3"
-
-util.promisify@~1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/util.promisify/-/util.promisify-1.0.1.tgz#6baf7774b80eeb0f7520d8b81d07982a59abbaee"
-  integrity sha512-g9JpC/3He3bm38zsLupWryXHoEcS22YHthuPQSJdMy6KNrzIRzWqcsHzD/WUnqe45whVou4VIsPew37DoXWNrA==
-  dependencies:
-    define-properties "^1.1.3"
-    es-abstract "^1.17.2"
-    has-symbols "^1.0.1"
-    object.getownpropertydescriptors "^2.1.0"
-
-util@0.10.3:
-  version "0.10.3"
-  resolved "https://registry.yarnpkg.com/util/-/util-0.10.3.tgz#7afb1afe50805246489e3db7fe0ed379336ac0f9"
-  integrity sha1-evsa/lCAUkZInj23/g7TeTNqwPk=
-  dependencies:
-    inherits "2.0.1"
-
-util@^0.11.0:
-  version "0.11.1"
-  resolved "https://registry.yarnpkg.com/util/-/util-0.11.1.tgz#3236733720ec64bb27f6e26f421aaa2e1b588d61"
-  integrity sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==
-  dependencies:
-    inherits "2.0.3"
-
-utila@~0.4:
-  version "0.4.0"
-  resolved "https://registry.yarnpkg.com/utila/-/utila-0.4.0.tgz#8a16a05d445657a3aea5eecc5b12a4fa5379772c"
-  integrity sha1-ihagXURWV6Oupe7MWxKk+lN5dyw=
-
-utils-merge@1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/utils-merge/-/utils-merge-1.0.1.tgz#9f95710f50a267947b2ccc124741c1028427e713"
-  integrity sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=
-
-uuid@^3.3.2, uuid@^3.4.0:
-  version "3.4.0"
-  resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee"
-  integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==
-
-uuid@^8.3.0:
-  version "8.3.2"
-  resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
-  integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
-
-v8-compile-cache@^2.0.3:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.2.0.tgz#9471efa3ef9128d2f7c6a7ca39c4dd6b5055b132"
-  integrity sha512-gTpR5XQNKFwOd4clxfnhaqvfqMpqEwr4tOtCyz4MtYZX2JYhfr1JvBFKdS+7K/9rfpZR3VLX+YWBbKoxCgS43Q==
-
-v8-to-istanbul@^7.0.0:
-  version "7.1.0"
-  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.0.tgz#5b95cef45c0f83217ec79f8fc7ee1c8b486aee07"
-  integrity sha512-uXUVqNUCLa0AH1vuVxzi+MI4RfxEOKt9pBgKwHbgH7st8Kv2P1m+jvWNnektzBh5QShF3ODgKmUFCf38LnVz1g==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.1"
-    convert-source-map "^1.6.0"
-    source-map "^0.7.3"
-
-validate-npm-package-license@^3.0.1:
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a"
-  integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==
-  dependencies:
-    spdx-correct "^3.0.0"
-    spdx-expression-parse "^3.0.0"
-
-value-equal@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/value-equal/-/value-equal-1.0.1.tgz#1e0b794c734c5c0cade179c437d356d931a34d6c"
-  integrity sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw==
-
-vary@~1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/vary/-/vary-1.1.2.tgz#2299f02c6ded30d4a5961b0b9f74524a18f634fc"
-  integrity sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=
-
-vendors@^1.0.0:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/vendors/-/vendors-1.0.4.tgz#e2b800a53e7a29b93506c3cf41100d16c4c4ad8e"
-  integrity sha512-/juG65kTL4Cy2su4P8HjtkTxk6VmJDiOPBufWniqQ6wknac6jNiXS9vU+hO3wgusiyqWlzTbVHi0dyJqRONg3w==
-
-verror@1.10.0:
-  version "1.10.0"
-  resolved "https://registry.yarnpkg.com/verror/-/verror-1.10.0.tgz#3a105ca17053af55d6e270c1f8288682e18da400"
-  integrity sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=
-  dependencies:
-    assert-plus "^1.0.0"
-    core-util-is "1.0.2"
-    extsprintf "^1.2.0"
-
-vm-browserify@^1.0.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/vm-browserify/-/vm-browserify-1.1.2.tgz#78641c488b8e6ca91a75f511e7a3b32a86e5dda0"
-  integrity sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==
-
-w3c-hr-time@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz#0a89cdf5cc15822df9c360543676963e0cc308cd"
-  integrity sha512-z8P5DvDNjKDoFIHK7q8r8lackT6l+jo/Ye3HOle7l9nICP9lf1Ci25fy9vHd0JOWewkIFzXIEig3TdKT7JQ5fQ==
-  dependencies:
-    browser-process-hrtime "^1.0.0"
-
-w3c-xmlserializer@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/w3c-xmlserializer/-/w3c-xmlserializer-2.0.0.tgz#3e7104a05b75146cc60f564380b7f683acf1020a"
-  integrity sha512-4tzD0mF8iSiMiNs30BiLO3EpfGLZUT2MSX/G+o7ZywDzliWQ3OPtTZ0PTC3B3ca1UAf4cJMHB+2Bf56EriJuRA==
-  dependencies:
-    xml-name-validator "^3.0.0"
-
-walker@^1.0.7, walker@~1.0.5:
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.7.tgz#2f7f9b8fd10d677262b18a884e28d19618e028fb"
-  integrity sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=
-  dependencies:
-    makeerror "1.0.x"
-
-warning@^4.0.3:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/warning/-/warning-4.0.3.tgz#16e9e077eb8a86d6af7d64aa1e05fd85b4678ca3"
-  integrity sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==
-  dependencies:
-    loose-envify "^1.0.0"
-
-watchpack-chokidar2@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/watchpack-chokidar2/-/watchpack-chokidar2-2.0.1.tgz#38500072ee6ece66f3769936950ea1771be1c957"
-  integrity sha512-nCFfBIPKr5Sh61s4LPpy1Wtfi0HE8isJ3d2Yb5/Ppw2P2B/3eVSEBjKfN0fmHJSK14+31KwMKmcrzs2GM4P0Ww==
-  dependencies:
-    chokidar "^2.1.8"
-
-watchpack@^1.7.4:
-  version "1.7.5"
-  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-1.7.5.tgz#1267e6c55e0b9b5be44c2023aed5437a2c26c453"
-  integrity sha512-9P3MWk6SrKjHsGkLT2KHXdQ/9SNkyoJbabxnKOoJepsvJjJG8uYTR3yTPxPQvNDI3w4Nz1xnE0TLHK4RIVe/MQ==
-  dependencies:
-    graceful-fs "^4.1.2"
-    neo-async "^2.5.0"
-  optionalDependencies:
-    chokidar "^3.4.1"
-    watchpack-chokidar2 "^2.0.1"
-
-wbuf@^1.1.0, wbuf@^1.7.3:
-  version "1.7.3"
-  resolved "https://registry.yarnpkg.com/wbuf/-/wbuf-1.7.3.tgz#c1d8d149316d3ea852848895cb6a0bfe887b87df"
-  integrity sha512-O84QOnr0icsbFGLS0O3bI5FswxzRr8/gHwWkDlQFskhSPryQXvrTMxjxGP4+iWYoauLoBvfDpkrOauZ+0iZpDA==
-  dependencies:
-    minimalistic-assert "^1.0.0"
-
-web-vitals@^1.0.1:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/web-vitals/-/web-vitals-1.1.0.tgz#7f410d9a1f7a1cd5d952806b45776204b47dc274"
-  integrity sha512-1cx54eRxY/+M0KNKdNpNnuXAXG+vJEvwScV4DiV9rOYDguHoeDIzm09ghBohOPtkqPO5OtPC14FWkNva3SDisg==
-
-webidl-conversions@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"
-  integrity sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==
-
-webidl-conversions@^6.1.0:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-6.1.0.tgz#9111b4d7ea80acd40f5270d666621afa78b69514"
-  integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
-
-webpack-dev-middleware@^3.7.2:
-  version "3.7.3"
-  resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-3.7.3.tgz#0639372b143262e2b84ab95d3b91a7597061c2c5"
-  integrity sha512-djelc/zGiz9nZj/U7PTBi2ViorGJXEWo/3ltkPbDyxCXhhEXkW0ce99falaok4TPj+AsxLiXJR0EBOb0zh9fKQ==
-  dependencies:
-    memory-fs "^0.4.1"
-    mime "^2.4.4"
-    mkdirp "^0.5.1"
-    range-parser "^1.2.1"
-    webpack-log "^2.0.0"
-
-webpack-dev-server@3.11.1:
-  version "3.11.1"
-  resolved "https://registry.yarnpkg.com/webpack-dev-server/-/webpack-dev-server-3.11.1.tgz#c74028bf5ba8885aaf230e48a20e8936ab8511f0"
-  integrity sha512-u4R3mRzZkbxQVa+MBWi2uVpB5W59H3ekZAJsQlKUTdl7Elcah2EhygTPLmeFXybQkf9i2+L0kn7ik9SnXa6ihQ==
-  dependencies:
-    ansi-html "0.0.7"
-    bonjour "^3.5.0"
-    chokidar "^2.1.8"
-    compression "^1.7.4"
-    connect-history-api-fallback "^1.6.0"
-    debug "^4.1.1"
-    del "^4.1.1"
-    express "^4.17.1"
-    html-entities "^1.3.1"
-    http-proxy-middleware "0.19.1"
-    import-local "^2.0.0"
-    internal-ip "^4.3.0"
-    ip "^1.1.5"
-    is-absolute-url "^3.0.3"
-    killable "^1.0.1"
-    loglevel "^1.6.8"
-    opn "^5.5.0"
-    p-retry "^3.0.1"
-    portfinder "^1.0.26"
-    schema-utils "^1.0.0"
-    selfsigned "^1.10.8"
-    semver "^6.3.0"
-    serve-index "^1.9.1"
-    sockjs "^0.3.21"
-    sockjs-client "^1.5.0"
-    spdy "^4.0.2"
-    strip-ansi "^3.0.1"
-    supports-color "^6.1.0"
-    url "^0.11.0"
-    webpack-dev-middleware "^3.7.2"
-    webpack-log "^2.0.0"
-    ws "^6.2.1"
-    yargs "^13.3.2"
-
-webpack-log@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/webpack-log/-/webpack-log-2.0.0.tgz#5b7928e0637593f119d32f6227c1e0ac31e1b47f"
-  integrity sha512-cX8G2vR/85UYG59FgkoMamwHUIkSSlV3bBMRsbxVXVUk2j6NleCKjQ/WE9eYg9WY4w25O9w8wKP4rzNZFmUcUg==
-  dependencies:
-    ansi-colors "^3.0.0"
-    uuid "^3.3.2"
-
-webpack-manifest-plugin@2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/webpack-manifest-plugin/-/webpack-manifest-plugin-2.2.0.tgz#19ca69b435b0baec7e29fbe90fb4015de2de4f16"
-  integrity sha512-9S6YyKKKh/Oz/eryM1RyLVDVmy3NSPV0JXMRhZ18fJsq+AwGxUY34X54VNwkzYcEmEkDwNxuEOboCZEebJXBAQ==
-  dependencies:
-    fs-extra "^7.0.0"
-    lodash ">=3.5 <5"
-    object.entries "^1.1.0"
-    tapable "^1.0.0"
-
-webpack-sources@^1.1.0, webpack-sources@^1.3.0, webpack-sources@^1.4.0, webpack-sources@^1.4.1, webpack-sources@^1.4.3:
-  version "1.4.3"
-  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-1.4.3.tgz#eedd8ec0b928fbf1cbfe994e22d2d890f330a933"
-  integrity sha512-lgTS3Xhv1lCOKo7SA5TjKXMjpSM4sBjNV5+q2bqesbSPs5FjGmU6jjtBSkX9b4qW87vDIsCIlUPOEhbZrMdjeQ==
-  dependencies:
-    source-list-map "^2.0.0"
-    source-map "~0.6.1"
-
-webpack@4.44.2:
-  version "4.44.2"
-  resolved "https://registry.yarnpkg.com/webpack/-/webpack-4.44.2.tgz#6bfe2b0af055c8b2d1e90ed2cd9363f841266b72"
-  integrity sha512-6KJVGlCxYdISyurpQ0IPTklv+DULv05rs2hseIXer6D7KrUicRDLFb4IUM1S6LUAKypPM/nSiVSuv8jHu1m3/Q==
-  dependencies:
-    "@webassemblyjs/ast" "1.9.0"
-    "@webassemblyjs/helper-module-context" "1.9.0"
-    "@webassemblyjs/wasm-edit" "1.9.0"
-    "@webassemblyjs/wasm-parser" "1.9.0"
-    acorn "^6.4.1"
-    ajv "^6.10.2"
-    ajv-keywords "^3.4.1"
-    chrome-trace-event "^1.0.2"
-    enhanced-resolve "^4.3.0"
-    eslint-scope "^4.0.3"
-    json-parse-better-errors "^1.0.2"
-    loader-runner "^2.4.0"
-    loader-utils "^1.2.3"
-    memory-fs "^0.4.1"
-    micromatch "^3.1.10"
-    mkdirp "^0.5.3"
-    neo-async "^2.6.1"
-    node-libs-browser "^2.2.1"
-    schema-utils "^1.0.0"
-    tapable "^1.1.3"
-    terser-webpack-plugin "^1.4.3"
-    watchpack "^1.7.4"
-    webpack-sources "^1.4.1"
-
-websocket-driver@>=0.5.1, websocket-driver@^0.7.4:
-  version "0.7.4"
-  resolved "https://registry.yarnpkg.com/websocket-driver/-/websocket-driver-0.7.4.tgz#89ad5295bbf64b480abcba31e4953aca706f5760"
-  integrity sha512-b17KeDIQVjvb0ssuSDF2cYXSg2iztliJ4B9WdsuB6J952qCPKmnVq4DyW5motImXHDC1cBT/1UezrJVsKw5zjg==
-  dependencies:
-    http-parser-js ">=0.5.1"
-    safe-buffer ">=5.1.0"
-    websocket-extensions ">=0.1.1"
-
-websocket-extensions@>=0.1.1:
-  version "0.1.4"
-  resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.4.tgz#7f8473bc839dfd87608adb95d7eb075211578a42"
-  integrity sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg==
-
-whatwg-encoding@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz#5abacf777c32166a51d085d6b4f3e7d27113ddb0"
-  integrity sha512-b5lim54JOPN9HtzvK9HFXvBma/rnfFeqsic0hSpjtDbVxR3dJKLc+KB4V6GgiGOvl7CY/KNh8rxSo9DKQrnUEw==
-  dependencies:
-    iconv-lite "0.4.24"
-
-whatwg-fetch@^3.4.1:
-  version "3.6.1"
-  resolved "https://registry.yarnpkg.com/whatwg-fetch/-/whatwg-fetch-3.6.1.tgz#93bc4005af6c2cc30ba3e42ec3125947c8f54ed3"
-  integrity sha512-IEmN/ZfmMw6G1hgZpVd0LuZXOQDisrMOZrzYd5x3RAK4bMPlJohKUZWZ9t/QsTvH0dV9TbPDcc2OSuIDcihnHA==
-
-whatwg-mimetype@^2.3.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz#3d4b1e0312d2079879f826aff18dbeeca5960fbf"
-  integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==
-
-whatwg-url@^8.0.0:
-  version "8.4.0"
-  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.4.0.tgz#50fb9615b05469591d2b2bd6dfaed2942ed72837"
-  integrity sha512-vwTUFf6V4zhcPkWp/4CQPr1TW9Ml6SF4lVyaIMBdJw5i6qUUJ1QWM4Z6YYVkfka0OUIzVo/0aNtGVGk256IKWw==
-  dependencies:
-    lodash.sortby "^4.7.0"
-    tr46 "^2.0.2"
-    webidl-conversions "^6.1.0"
-
-which-module@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.0.tgz#d9ef07dce77b9902b8a3a8fa4b31c3e3f7e6e87a"
-  integrity sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=
-
-which@^1.2.9, which@^1.3.1:
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a"
-  integrity sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==
-  dependencies:
-    isexe "^2.0.0"
-
-which@^2.0.1, which@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
-  integrity sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==
-  dependencies:
-    isexe "^2.0.0"
-
-word-wrap@^1.2.3, word-wrap@~1.2.3:
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.3.tgz#610636f6b1f703891bd34771ccb17fb93b47079c"
-  integrity sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==
-
-workbox-background-sync@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-background-sync/-/workbox-background-sync-5.1.4.tgz#5ae0bbd455f4e9c319e8d827c055bb86c894fd12"
-  integrity sha512-AH6x5pYq4vwQvfRDWH+vfOePfPIYQ00nCEB7dJRU1e0n9+9HMRyvI63FlDvtFT2AvXVRsXvUt7DNMEToyJLpSA==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-broadcast-update@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-broadcast-update/-/workbox-broadcast-update-5.1.4.tgz#0eeb89170ddca7f6914fa3523fb14462891f2cfc"
-  integrity sha512-HTyTWkqXvHRuqY73XrwvXPud/FN6x3ROzkfFPsRjtw/kGZuZkPzfeH531qdUGfhtwjmtO/ZzXcWErqVzJNdXaA==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-build@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-build/-/workbox-build-5.1.4.tgz#23d17ed5c32060c363030c8823b39d0eabf4c8c7"
-  integrity sha512-xUcZn6SYU8usjOlfLb9Y2/f86Gdo+fy1fXgH8tJHjxgpo53VVsqRX0lUDw8/JuyzNmXuo8vXX14pXX2oIm9Bow==
-  dependencies:
-    "@babel/core" "^7.8.4"
-    "@babel/preset-env" "^7.8.4"
-    "@babel/runtime" "^7.8.4"
-    "@hapi/joi" "^15.1.0"
-    "@rollup/plugin-node-resolve" "^7.1.1"
-    "@rollup/plugin-replace" "^2.3.1"
-    "@surma/rollup-plugin-off-main-thread" "^1.1.1"
-    common-tags "^1.8.0"
-    fast-json-stable-stringify "^2.1.0"
-    fs-extra "^8.1.0"
-    glob "^7.1.6"
-    lodash.template "^4.5.0"
-    pretty-bytes "^5.3.0"
-    rollup "^1.31.1"
-    rollup-plugin-babel "^4.3.3"
-    rollup-plugin-terser "^5.3.1"
-    source-map "^0.7.3"
-    source-map-url "^0.4.0"
-    stringify-object "^3.3.0"
-    strip-comments "^1.0.2"
-    tempy "^0.3.0"
-    upath "^1.2.0"
-    workbox-background-sync "^5.1.4"
-    workbox-broadcast-update "^5.1.4"
-    workbox-cacheable-response "^5.1.4"
-    workbox-core "^5.1.4"
-    workbox-expiration "^5.1.4"
-    workbox-google-analytics "^5.1.4"
-    workbox-navigation-preload "^5.1.4"
-    workbox-precaching "^5.1.4"
-    workbox-range-requests "^5.1.4"
-    workbox-routing "^5.1.4"
-    workbox-strategies "^5.1.4"
-    workbox-streams "^5.1.4"
-    workbox-sw "^5.1.4"
-    workbox-window "^5.1.4"
-
-workbox-cacheable-response@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-cacheable-response/-/workbox-cacheable-response-5.1.4.tgz#9ff26e1366214bdd05cf5a43da9305b274078a54"
-  integrity sha512-0bfvMZs0Of1S5cdswfQK0BXt6ulU5kVD4lwer2CeI+03czHprXR3V4Y8lPTooamn7eHP8Iywi5QjyAMjw0qauA==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-core@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-core/-/workbox-core-5.1.4.tgz#8bbfb2362ecdff30e25d123c82c79ac65d9264f4"
-  integrity sha512-+4iRQan/1D8I81nR2L5vcbaaFskZC2CL17TLbvWVzQ4qiF/ytOGF6XeV54pVxAvKUtkLANhk8TyIUMtiMw2oDg==
-
-workbox-expiration@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-expiration/-/workbox-expiration-5.1.4.tgz#92b5df461e8126114943a3b15c55e4ecb920b163"
-  integrity sha512-oDO/5iC65h2Eq7jctAv858W2+CeRW5e0jZBMNRXpzp0ZPvuT6GblUiHnAsC5W5lANs1QS9atVOm4ifrBiYY7AQ==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-google-analytics@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-google-analytics/-/workbox-google-analytics-5.1.4.tgz#b3376806b1ac7d7df8418304d379707195fa8517"
-  integrity sha512-0IFhKoEVrreHpKgcOoddV+oIaVXBFKXUzJVBI+nb0bxmcwYuZMdteBTp8AEDJacENtc9xbR0wa9RDCnYsCDLjA==
-  dependencies:
-    workbox-background-sync "^5.1.4"
-    workbox-core "^5.1.4"
-    workbox-routing "^5.1.4"
-    workbox-strategies "^5.1.4"
-
-workbox-navigation-preload@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-navigation-preload/-/workbox-navigation-preload-5.1.4.tgz#30d1b720d26a05efc5fa11503e5cc1ed5a78902a"
-  integrity sha512-Wf03osvK0wTflAfKXba//QmWC5BIaIZARU03JIhAEO2wSB2BDROWI8Q/zmianf54kdV7e1eLaIEZhth4K4MyfQ==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-precaching@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-precaching/-/workbox-precaching-5.1.4.tgz#874f7ebdd750dd3e04249efae9a1b3f48285fe6b"
-  integrity sha512-gCIFrBXmVQLFwvAzuGLCmkUYGVhBb7D1k/IL7pUJUO5xacjLcFUaLnnsoVepBGAiKw34HU1y/YuqvTKim9qAZA==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-range-requests@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-range-requests/-/workbox-range-requests-5.1.4.tgz#7066a12c121df65bf76fdf2b0868016aa2bab859"
-  integrity sha512-1HSujLjgTeoxHrMR2muDW2dKdxqCGMc1KbeyGcmjZZAizJTFwu7CWLDmLv6O1ceWYrhfuLFJO+umYMddk2XMhw==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-routing@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-routing/-/workbox-routing-5.1.4.tgz#3e8cd86bd3b6573488d1a2ce7385e547b547e970"
-  integrity sha512-8ljknRfqE1vEQtnMtzfksL+UXO822jJlHTIR7+BtJuxQ17+WPZfsHqvk1ynR/v0EHik4x2+826Hkwpgh4GKDCw==
-  dependencies:
-    workbox-core "^5.1.4"
-
-workbox-strategies@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-strategies/-/workbox-strategies-5.1.4.tgz#96b1418ccdfde5354612914964074d466c52d08c"
-  integrity sha512-VVS57LpaJTdjW3RgZvPwX0NlhNmscR7OQ9bP+N/34cYMDzXLyA6kqWffP6QKXSkca1OFo/v6v7hW7zrrguo6EA==
-  dependencies:
-    workbox-core "^5.1.4"
-    workbox-routing "^5.1.4"
-
-workbox-streams@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-streams/-/workbox-streams-5.1.4.tgz#05754e5e3667bdc078df2c9315b3f41210d8cac0"
-  integrity sha512-xU8yuF1hI/XcVhJUAfbQLa1guQUhdLMPQJkdT0kn6HP5CwiPOGiXnSFq80rAG4b1kJUChQQIGPrq439FQUNVrw==
-  dependencies:
-    workbox-core "^5.1.4"
-    workbox-routing "^5.1.4"
-
-workbox-sw@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-sw/-/workbox-sw-5.1.4.tgz#2bb34c9f7381f90d84cef644816d45150011d3db"
-  integrity sha512-9xKnKw95aXwSNc8kk8gki4HU0g0W6KXu+xks7wFuC7h0sembFnTrKtckqZxbSod41TDaGh+gWUA5IRXrL0ECRA==
-
-workbox-webpack-plugin@5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-webpack-plugin/-/workbox-webpack-plugin-5.1.4.tgz#7bfe8c16e40fe9ed8937080ac7ae9c8bde01e79c"
-  integrity sha512-PZafF4HpugZndqISi3rZ4ZK4A4DxO8rAqt2FwRptgsDx7NF8TVKP86/huHquUsRjMGQllsNdn4FNl8CD/UvKmQ==
-  dependencies:
-    "@babel/runtime" "^7.5.5"
-    fast-json-stable-stringify "^2.0.0"
-    source-map-url "^0.4.0"
-    upath "^1.1.2"
-    webpack-sources "^1.3.0"
-    workbox-build "^5.1.4"
-
-workbox-window@^5.1.4:
-  version "5.1.4"
-  resolved "https://registry.yarnpkg.com/workbox-window/-/workbox-window-5.1.4.tgz#2740f7dea7f93b99326179a62f1cc0ca2c93c863"
-  integrity sha512-vXQtgTeMCUq/4pBWMfQX8Ee7N2wVC4Q7XYFqLnfbXJ2hqew/cU1uMTD2KqGEgEpE4/30luxIxgE+LkIa8glBYw==
-  dependencies:
-    workbox-core "^5.1.4"
-
-worker-farm@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/worker-farm/-/worker-farm-1.7.0.tgz#26a94c5391bbca926152002f69b84a4bf772e5a8"
-  integrity sha512-rvw3QTZc8lAxyVrqcSGVm5yP/IJ2UcB3U0graE3LCFoZ0Yn2x4EoVSqJKdB/T5M+FLcRPjz4TDacRf3OCfNUzw==
-  dependencies:
-    errno "~0.1.7"
-
-worker-rpc@^0.1.0:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/worker-rpc/-/worker-rpc-0.1.1.tgz#cb565bd6d7071a8f16660686051e969ad32f54d5"
-  integrity sha512-P1WjMrUB3qgJNI9jfmpZ/htmBEjFh//6l/5y8SD9hg1Ef5zTTVVoRjTrTEzPrNBQvmhMxkoTsjOXN10GWU7aCg==
-  dependencies:
-    microevent.ts "~0.1.1"
-
-wrap-ansi@^5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-5.1.0.tgz#1fd1f67235d5b6d0fee781056001bfb694c03b09"
-  integrity sha512-QC1/iN/2/RPVJ5jYK8BGttj5z83LmSKmvbvrXPNCLZSEb32KKVDJDl/MOt2N01qU2H/FkzEa9PKto1BqDjtd7Q==
-  dependencies:
-    ansi-styles "^3.2.0"
-    string-width "^3.0.0"
-    strip-ansi "^5.0.0"
-
-wrap-ansi@^6.2.0:
-  version "6.2.0"
-  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53"
-  integrity sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==
-  dependencies:
-    ansi-styles "^4.0.0"
-    string-width "^4.1.0"
-    strip-ansi "^6.0.0"
-
-wrappy@1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
-  integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
-
-write-file-atomic@^3.0.0:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-3.0.3.tgz#56bd5c5a5c70481cd19c571bd39ab965a5de56e8"
-  integrity sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q==
-  dependencies:
-    imurmurhash "^0.1.4"
-    is-typedarray "^1.0.0"
-    signal-exit "^3.0.2"
-    typedarray-to-buffer "^3.1.5"
-
-ws@^6.2.1:
-  version "6.2.1"
-  resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.1.tgz#442fdf0a47ed64f59b6a5d8ff130f4748ed524fb"
-  integrity sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA==
-  dependencies:
-    async-limiter "~1.0.0"
-
-ws@^7.2.3:
-  version "7.4.3"
-  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.3.tgz#1f9643de34a543b8edb124bdcbc457ae55a6e5cd"
-  integrity sha512-hr6vCR76GsossIRsr8OLR9acVVm1jyfEWvhbNjtgPOrfvAlKzvyeg/P6r8RuDjRyrcQoPQT7K0DGEPc7Ae6jzA==
-
-xml-name-validator@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
-  integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==
-
-xmlchars@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"
-  integrity sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==
-
-xtend@^4.0.0, xtend@~4.0.1:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
-  integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==
-
-y18n@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.1.tgz#8db2b83c31c5d75099bb890b23f3094891e247d4"
-  integrity sha512-wNcy4NvjMYL8gogWWYAO7ZFWFfHcbdbE57tZO8e4cbpj8tfUcwrwqSl3ad8HxpYWCdXcJUCeKKZS62Av1affwQ==
-
-yallist@^3.0.2:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
-  integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==
-
-yallist@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
-  integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
-
-yaml@^1.10.0, yaml@^1.7.2:
-  version "1.10.0"
-  resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.0.tgz#3b593add944876077d4d683fee01081bd9fff31e"
-  integrity sha512-yr2icI4glYaNG+KWONODapy2/jDdMSDnrONSjblABjD9B4Z5LgiircSt8m8sRZFNi08kG9Sm0uSHtEmP3zaEGg==
-
-yargs-parser@^13.1.2:
-  version "13.1.2"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-13.1.2.tgz#130f09702ebaeef2650d54ce6e3e5706f7a4fb38"
-  integrity sha512-3lbsNRf/j+A4QuSZfDRA7HRSfWrzO0YjqTJd5kjAq37Zep1CEgaYmrH9Q3GwPiB9cHyd1Y1UwggGhJGoxipbzg==
-  dependencies:
-    camelcase "^5.0.0"
-    decamelize "^1.2.0"
-
-yargs-parser@^18.1.2:
-  version "18.1.3"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
-  integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
-  dependencies:
-    camelcase "^5.0.0"
-    decamelize "^1.2.0"
-
-yargs@^13.3.2:
-  version "13.3.2"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-13.3.2.tgz#ad7ffefec1aa59565ac915f82dccb38a9c31a2dd"
-  integrity sha512-AX3Zw5iPruN5ie6xGRIDgqkT+ZhnRlZMLMHAs8tg7nRruy2Nb+i5o9bwghAogtM08q1dpr2LVoS8KSTMYpWXUw==
-  dependencies:
-    cliui "^5.0.0"
-    find-up "^3.0.0"
-    get-caller-file "^2.0.1"
-    require-directory "^2.1.1"
-    require-main-filename "^2.0.0"
-    set-blocking "^2.0.0"
-    string-width "^3.0.0"
-    which-module "^2.0.0"
-    y18n "^4.0.0"
-    yargs-parser "^13.1.2"
-
-yargs@^15.4.1:
-  version "15.4.1"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8"
-  integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==
-  dependencies:
-    cliui "^6.0.0"
-    decamelize "^1.2.0"
-    find-up "^4.1.0"
-    get-caller-file "^2.0.1"
-    require-directory "^2.1.1"
-    require-main-filename "^2.0.0"
-    set-blocking "^2.0.0"
-    string-width "^4.2.0"
-    which-module "^2.0.0"
-    y18n "^4.0.0"
-    yargs-parser "^18.1.2"
-
-yocto-queue@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b"
-  integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==
diff --git a/rust/benchmarks/Cargo.toml b/rust/benchmarks/Cargo.toml
deleted file mode 100644
index 2d64482a09d..00000000000
--- a/rust/benchmarks/Cargo.toml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-benchmarks"
-description = "Apache Arrow Benchmarks"
-version = "5.0.0-SNAPSHOT"
-edition = "2018"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-license = "Apache-2.0"
-publish = false
-
-[features]
-simd = ["datafusion/simd"]
-snmalloc = ["snmalloc-rs"]
-
-[dependencies]
-arrow = { path = "../arrow" }
-parquet = { path = "../parquet" }
-datafusion = { path = "../datafusion" }
-structopt = { version = "0.3", default-features = false }
-tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread"] }
-futures = "0.3"
-env_logger = "^0.8"
-mimalloc = { version = "0.1", optional = true, default-features = false }
-snmalloc-rs = {version = "0.2", optional = true, features= ["cache-friendly"] }
diff --git a/rust/benchmarks/README.md b/rust/benchmarks/README.md
deleted file mode 100644
index 7460477db4e..00000000000
--- a/rust/benchmarks/README.md
+++ /dev/null
@@ -1,120 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Apache Arrow Rust Benchmarks
-
-This crate contains benchmarks based on popular public data sets and open source benchmark suites, making it easy to
-run real-world benchmarks to help with performance and scalability testing and for comparing performance with other Arrow
-implementations as well as other query engines.
-
-Currently, only DataFusion benchmarks exist, but the plan is to add benchmarks for the arrow, flight, and parquet
-crates as well.
-
-## Benchmark derived from TPC-H
-
-These benchmarks are derived from the [TPC-H][1] benchmark.
-
-Data for this benchmark can be generated using the [tpch-dbgen][2] command-line tool. Run the following commands to
-clone the repository and build the source code.
-
-```bash
-git clone git@github.com:databricks/tpch-dbgen.git
-cd tpch-dbgen
-make
-export TPCH_DATA=$(pwd)
-```
-
-Data can now be generated with the following command. Note that `-s 1` means use Scale Factor 1 or ~1 GB of
-data. This value can be increased to generate larger data sets.
-
-```bash
-./dbgen -vf -s 1
-```
-
-The benchmark can then be run (assuming the data created from `dbgen` is in `/mnt/tpch-dbgen`) with a command such as:
-
-```bash
-cargo run --release --bin tpch -- benchmark --iterations 3 --path /mnt/tpch-dbgen --format tbl --query 1 --batch-size 4096
-```
-
-You can enable the features `simd` (to use SIMD instructions) and/or `mimalloc` or `snmalloc` (to use either the mimalloc or snmalloc allocator) as features by passing them in as `--features`:
-
-```
-cargo run --release --features "simd mimalloc" --bin tpch -- benchmark --iterations 3 --path /mnt/tpch-dbgen --format tbl --query 1 --batch-size 4096
-```
-
-The benchmark program also supports CSV and Parquet input file formats and a utility is provided to convert from `tbl`
-(generated by the `dbgen` utility) to CSV and Parquet.
-
-```bash
-cargo run --release --bin tpch -- convert --input /mnt/tpch-dbgen --output /mnt/tpch-parquet --format parquet
-```
-
-This utility does not yet provide support for changing the number of partitions when performing the conversion. Another
-option is to use the following Docker image to perform the conversion from `tbl` files to CSV or Parquet.
-
-```bash
-docker run -it ballistacompute/spark-benchmarks:0.4.0-SNAPSHOT
-  -h, --help   Show help message
-
-Subcommand: convert-tpch
-  -i, --input  <arg>
-      --input-format  <arg>
-  -o, --output  <arg>
-      --output-format  <arg>
-  -p, --partitions  <arg>
-  -h, --help                   Show help message
-```
-
-Note that it is necessary to mount volumes into the Docker container as appropriate so that the file conversion process
-can access files on the host system.
-
-Here is a full example that assumes that data is stored in the `/mnt` path on the host system.
-
-```bash
-docker run -v /mnt:/mnt -it ballistacompute/spark-benchmarks:0.4.0-SNAPSHOT \
-  convert-tpch \
-  --input /mnt/tpch/csv \
-  --input-format tbl \
-  --output /mnt/tpch/parquet \
-  --output-format parquet \
-  --partitions 64
-```
-
-## NYC Taxi Benchmark
-
-These benchmarks are based on the [New York Taxi and Limousine Commission][3] data set.
-
-```bash
-cargo run --release --bin nyctaxi -- --iterations 3 --path /mnt/nyctaxi/csv --format csv --batch-size 4096
-```
-
-Example output:
-
-```bash
-Running benchmarks with the following options: Opt { debug: false, iterations: 3, batch_size: 4096, path: "/mnt/nyctaxi/csv", file_format: "csv" }
-Executing 'fare_amt_by_passenger'
-Query 'fare_amt_by_passenger' iteration 0 took 7138 ms
-Query 'fare_amt_by_passenger' iteration 1 took 7599 ms
-Query 'fare_amt_by_passenger' iteration 2 took 7969 ms
-```
-
-[1]: http://www.tpc.org/tpch/
-[2]: https://github.com/databricks/tpch-dbgen
-[3]: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
diff --git a/rust/benchmarks/src/bin/nyctaxi.rs b/rust/benchmarks/src/bin/nyctaxi.rs
deleted file mode 100644
index 005efca9488..00000000000
--- a/rust/benchmarks/src/bin/nyctaxi.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Apache Arrow Rust Benchmarks
-
-use std::collections::HashMap;
-use std::path::PathBuf;
-use std::process;
-use std::time::Instant;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
-use datafusion::error::Result;
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-
-use datafusion::physical_plan::collect;
-use datafusion::physical_plan::csv::CsvReadOptions;
-use structopt::StructOpt;
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "Benchmarks", about = "Apache Arrow Rust Benchmarks.")]
-struct Opt {
-    /// Activate debug mode to see query results
-    #[structopt(short, long)]
-    debug: bool,
-
-    /// Number of iterations of each test run
-    #[structopt(short = "i", long = "iterations", default_value = "3")]
-    iterations: usize,
-
-    /// Number of threads for query execution
-    #[structopt(short = "c", long = "concurrency", default_value = "2")]
-    concurrency: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-
-    /// Path to data files
-    #[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
-    path: PathBuf,
-
-    /// File format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format", default_value = "csv")]
-    file_format: String,
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let opt = Opt::from_args();
-    println!("Running benchmarks with the following options: {:?}", opt);
-
-    let config = ExecutionConfig::new()
-        .with_concurrency(opt.concurrency)
-        .with_batch_size(opt.batch_size);
-    let mut ctx = ExecutionContext::with_config(config);
-
-    let path = opt.path.to_str().unwrap();
-
-    match opt.file_format.as_str() {
-        "csv" => {
-            let schema = nyctaxi_schema();
-            let options = CsvReadOptions::new().schema(&schema).has_header(true);
-            ctx.register_csv("tripdata", path, options)?
-        }
-        "parquet" => ctx.register_parquet("tripdata", path)?,
-        other => {
-            println!("Invalid file format '{}'", other);
-            process::exit(-1);
-        }
-    }
-
-    datafusion_sql_benchmarks(&mut ctx, opt.iterations, opt.debug).await
-}
-
-async fn datafusion_sql_benchmarks(
-    ctx: &mut ExecutionContext,
-    iterations: usize,
-    debug: bool,
-) -> Result<()> {
-    let mut queries = HashMap::new();
-    queries.insert("fare_amt_by_passenger", "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM tripdata GROUP BY passenger_count");
-    for (name, sql) in &queries {
-        println!("Executing '{}'", name);
-        for i in 0..iterations {
-            let start = Instant::now();
-            execute_sql(ctx, sql, debug).await?;
-            println!(
-                "Query '{}' iteration {} took {} ms",
-                name,
-                i,
-                start.elapsed().as_millis()
-            );
-        }
-    }
-    Ok(())
-}
-
-async fn execute_sql(ctx: &mut ExecutionContext, sql: &str, debug: bool) -> Result<()> {
-    let plan = ctx.create_logical_plan(sql)?;
-    let plan = ctx.optimize(&plan)?;
-    if debug {
-        println!("Optimized logical plan:\n{:?}", plan);
-    }
-    let physical_plan = ctx.create_physical_plan(&plan)?;
-    let result = collect(physical_plan).await?;
-    if debug {
-        pretty::print_batches(&result)?;
-    }
-    Ok(())
-}
-
-fn nyctaxi_schema() -> Schema {
-    Schema::new(vec![
-        Field::new("VendorID", DataType::Utf8, true),
-        Field::new("tpep_pickup_datetime", DataType::Utf8, true),
-        Field::new("tpep_dropoff_datetime", DataType::Utf8, true),
-        Field::new("passenger_count", DataType::Int32, true),
-        Field::new("trip_distance", DataType::Utf8, true),
-        Field::new("RatecodeID", DataType::Utf8, true),
-        Field::new("store_and_fwd_flag", DataType::Utf8, true),
-        Field::new("PULocationID", DataType::Utf8, true),
-        Field::new("DOLocationID", DataType::Utf8, true),
-        Field::new("payment_type", DataType::Utf8, true),
-        Field::new("fare_amount", DataType::Float64, true),
-        Field::new("extra", DataType::Float64, true),
-        Field::new("mta_tax", DataType::Float64, true),
-        Field::new("tip_amount", DataType::Float64, true),
-        Field::new("tolls_amount", DataType::Float64, true),
-        Field::new("improvement_surcharge", DataType::Float64, true),
-        Field::new("total_amount", DataType::Float64, true),
-    ])
-}
diff --git a/rust/benchmarks/src/bin/tpch.rs b/rust/benchmarks/src/bin/tpch.rs
deleted file mode 100644
index 328a68dd6a6..00000000000
--- a/rust/benchmarks/src/bin/tpch.rs
+++ /dev/null
@@ -1,1692 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Benchmark derived from TPC-H. This is not an official TPC-H benchmark.
-
-use std::time::Instant;
-use std::{
-    path::{Path, PathBuf},
-    sync::Arc,
-};
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::util::pretty;
-use datafusion::datasource::parquet::ParquetTable;
-use datafusion::datasource::{CsvFile, MemTable, TableProvider};
-use datafusion::error::{DataFusionError, Result};
-use datafusion::logical_plan::LogicalPlan;
-use datafusion::physical_plan::collect;
-use datafusion::prelude::*;
-
-use parquet::basic::Compression;
-use parquet::file::properties::WriterProperties;
-use structopt::StructOpt;
-
-#[cfg(feature = "snmalloc")]
-#[global_allocator]
-static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
-
-#[cfg(feature = "mimalloc")]
-#[global_allocator]
-static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
-
-#[derive(Debug, StructOpt)]
-struct BenchmarkOpt {
-    /// Query number
-    #[structopt(short, long)]
-    query: usize,
-
-    /// Activate debug mode to see query results
-    #[structopt(short, long)]
-    debug: bool,
-
-    /// Number of iterations of each test run
-    #[structopt(short = "i", long = "iterations", default_value = "3")]
-    iterations: usize,
-
-    /// Number of threads to use for parallel execution
-    #[structopt(short = "c", long = "concurrency", default_value = "2")]
-    concurrency: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-
-    /// Path to data files
-    #[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
-    path: PathBuf,
-
-    /// File format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format", default_value = "csv")]
-    file_format: String,
-
-    /// Load the data into a MemTable before executing the query
-    #[structopt(short = "m", long = "mem-table")]
-    mem_table: bool,
-
-    /// Number of partitions to create when using MemTable as input
-    #[structopt(short = "n", long = "partitions", default_value = "8")]
-    partitions: usize,
-}
-
-#[derive(Debug, StructOpt)]
-struct ConvertOpt {
-    /// Path to csv files
-    #[structopt(parse(from_os_str), required = true, short = "i", long = "input")]
-    input_path: PathBuf,
-
-    /// Output path
-    #[structopt(parse(from_os_str), required = true, short = "o", long = "output")]
-    output_path: PathBuf,
-
-    /// Output file format: `csv` or `parquet`
-    #[structopt(short = "f", long = "format")]
-    file_format: String,
-
-    /// Compression to use when writing Parquet files
-    #[structopt(short = "c", long = "compression", default_value = "snappy")]
-    compression: String,
-
-    /// Number of partitions to produce
-    #[structopt(short = "p", long = "partitions", default_value = "1")]
-    partitions: usize,
-
-    /// Batch size when reading CSV or Parquet files
-    #[structopt(short = "s", long = "batch-size", default_value = "8192")]
-    batch_size: usize,
-}
-
-#[derive(Debug, StructOpt)]
-#[structopt(name = "TPC-H", about = "TPC-H Benchmarks.")]
-enum TpchOpt {
-    Benchmark(BenchmarkOpt),
-    Convert(ConvertOpt),
-}
-
-const TABLES: &[&str] = &[
-    "part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region",
-];
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    env_logger::init();
-    match TpchOpt::from_args() {
-        TpchOpt::Benchmark(opt) => benchmark(opt).await.map(|_| ()),
-        TpchOpt::Convert(opt) => convert_tbl(opt).await,
-    }
-}
-
-async fn benchmark(opt: BenchmarkOpt) -> Result<Vec<arrow::record_batch::RecordBatch>> {
-    println!("Running benchmarks with the following options: {:?}", opt);
-    let config = ExecutionConfig::new()
-        .with_concurrency(opt.concurrency)
-        .with_batch_size(opt.batch_size);
-    let mut ctx = ExecutionContext::with_config(config);
-
-    // register tables
-    for table in TABLES {
-        let table_provider = get_table(
-            opt.path.to_str().unwrap(),
-            table,
-            opt.file_format.as_str(),
-            opt.concurrency,
-        )?;
-        if opt.mem_table {
-            println!("Loading table '{}' into memory", table);
-            let start = Instant::now();
-
-            let memtable =
-                MemTable::load(table_provider, opt.batch_size, Some(opt.partitions))
-                    .await?;
-            println!(
-                "Loaded table '{}' into memory in {} ms",
-                table,
-                start.elapsed().as_millis()
-            );
-            ctx.register_table(*table, Arc::new(memtable))?;
-        } else {
-            ctx.register_table(*table, table_provider)?;
-        }
-    }
-
-    let mut millis = vec![];
-    // run benchmark
-    let mut result: Vec<arrow::record_batch::RecordBatch> = Vec::with_capacity(1);
-    for i in 0..opt.iterations {
-        let start = Instant::now();
-        let plan = create_logical_plan(&mut ctx, opt.query)?;
-        result = execute_query(&mut ctx, &plan, opt.debug).await?;
-        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
-        millis.push(elapsed as f64);
-        println!("Query {} iteration {} took {:.1} ms", opt.query, i, elapsed);
-    }
-
-    let avg = millis.iter().sum::<f64>() / millis.len() as f64;
-    println!("Query {} avg time: {:.2} ms", opt.query, avg);
-
-    Ok(result)
-}
-
-fn create_logical_plan(ctx: &mut ExecutionContext, query: usize) -> Result<LogicalPlan> {
-    match query {
-        // original
-        // 1 => ctx.create_logical_plan(
-        //     "select
-        //         l_returnflag,
-        //         l_linestatus,
-        //         sum(l_quantity) as sum_qty,
-        //         sum(l_extendedprice) as sum_base_price,
-        //         sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-        //         sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-        //         avg(l_quantity) as avg_qty,
-        //         avg(l_extendedprice) as avg_price,
-        //         avg(l_discount) as avg_disc,
-        //         count(*) as count_order
-        //     from
-        //         lineitem
-        //     where
-        //         l_shipdate <= date '1998-12-01' - interval '90' day (3)
-        //     group by
-        //         l_returnflag,
-        //         l_linestatus
-        //     order by
-        //         l_returnflag,
-        //         l_linestatus;"
-        // ),
-        1 => ctx.create_logical_plan(
-            "select
-                l_returnflag,
-                l_linestatus,
-                sum(l_quantity) as sum_qty,
-                sum(l_extendedprice) as sum_base_price,
-                sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-                sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-                avg(l_quantity) as avg_qty,
-                avg(l_extendedprice) as avg_price,
-                avg(l_discount) as avg_disc,
-                count(*) as count_order
-            from
-                lineitem
-            where
-                l_shipdate <= date '1998-09-02'
-            group by
-                l_returnflag,
-                l_linestatus
-            order by
-                l_returnflag,
-                l_linestatus;",
-        ),
-
-        2 => ctx.create_logical_plan(
-            "select
-                s_acctbal,
-                s_name,
-                n_name,
-                p_partkey,
-                p_mfgr,
-                s_address,
-                s_phone,
-                s_comment
-            from
-                part,
-                supplier,
-                partsupp,
-                nation,
-                region
-            where
-                p_partkey = ps_partkey
-                and s_suppkey = ps_suppkey
-                and p_size = 15
-                and p_type like '%BRASS'
-                and s_nationkey = n_nationkey
-                and n_regionkey = r_regionkey
-                and r_name = 'EUROPE'
-                and ps_supplycost = (
-                    select
-                        min(ps_supplycost)
-                    from
-                        partsupp,
-                        supplier,
-                        nation,
-                        region
-                    where
-                        p_partkey = ps_partkey
-                        and s_suppkey = ps_suppkey
-                        and s_nationkey = n_nationkey
-                        and n_regionkey = r_regionkey
-                        and r_name = 'EUROPE'
-                )
-            order by
-                s_acctbal desc,
-                n_name,
-                s_name,
-                p_partkey;",
-        ),
-
-        3 => ctx.create_logical_plan(
-            "select
-                l_orderkey,
-                sum(l_extendedprice * (1 - l_discount)) as revenue,
-                o_orderdate,
-                o_shippriority
-            from
-                customer,
-                orders,
-                lineitem
-            where
-                c_mktsegment = 'BUILDING'
-                and c_custkey = o_custkey
-                and l_orderkey = o_orderkey
-                and o_orderdate < date '1995-03-15'
-                and l_shipdate > date '1995-03-15'
-            group by
-                l_orderkey,
-                o_orderdate,
-                o_shippriority
-            order by
-                revenue desc,
-                o_orderdate;",
-        ),
-
-        4 => ctx.create_logical_plan(
-            "select
-                o_orderpriority,
-                count(*) as order_count
-            from
-                orders
-            where
-                o_orderdate >= '1993-07-01'
-                and o_orderdate < date '1993-07-01' + interval '3' month
-                and exists (
-                    select
-                        *
-                    from
-                        lineitem
-                    where
-                        l_orderkey = o_orderkey
-                        and l_commitdate < l_receiptdate
-                )
-            group by
-                o_orderpriority
-            order by
-                o_orderpriority;",
-        ),
-
-        // original
-        // 5 => ctx.create_logical_plan(
-        //     "select
-        //         n_name,
-        //         sum(l_extendedprice * (1 - l_discount)) as revenue
-        //     from
-        //         customer,
-        //         orders,
-        //         lineitem,
-        //         supplier,
-        //         nation,
-        //         region
-        //     where
-        //         c_custkey = o_custkey
-        //         and l_orderkey = o_orderkey
-        //         and l_suppkey = s_suppkey
-        //         and c_nationkey = s_nationkey
-        //         and s_nationkey = n_nationkey
-        //         and n_regionkey = r_regionkey
-        //         and r_name = 'ASIA'
-        //         and o_orderdate >= date '1994-01-01'
-        //         and o_orderdate < date '1994-01-01' + interval '1' year
-        //     group by
-        //         n_name
-        //     order by
-        //         revenue desc;"
-        // ),
-        5 => ctx.create_logical_plan(
-            "select
-                n_name,
-                sum(l_extendedprice * (1 - l_discount)) as revenue
-            from
-                customer,
-                orders,
-                lineitem,
-                supplier,
-                nation,
-                region
-            where
-                c_custkey = o_custkey
-                and l_orderkey = o_orderkey
-                and l_suppkey = s_suppkey
-                and c_nationkey = s_nationkey
-                and s_nationkey = n_nationkey
-                and n_regionkey = r_regionkey
-                and r_name = 'ASIA'
-                and o_orderdate >= date '1994-01-01'
-                and o_orderdate < date '1995-01-01'
-            group by
-                n_name
-            order by
-                revenue desc;",
-        ),
-
-        // original
-        // 6 => ctx.create_logical_plan(
-        //     "select
-        //         sum(l_extendedprice * l_discount) as revenue
-        //     from
-        //         lineitem
-        //     where
-        //         l_shipdate >= date '1994-01-01'
-        //         and l_shipdate < date '1994-01-01' + interval '1' year
-        //         and l_discount between .06 - 0.01 and .06 + 0.01
-        //         and l_quantity < 24;"
-        // ),
-        6 => ctx.create_logical_plan(
-            "select
-                sum(l_extendedprice * l_discount) as revenue
-            from
-                lineitem
-            where
-                l_shipdate >= date '1994-01-01'
-                and l_shipdate < date '1995-01-01'
-                and l_discount between .06 - 0.01 and .06 + 0.01
-                and l_quantity < 24;",
-        ),
-
-        7 => ctx.create_logical_plan(
-            "select
-                supp_nation,
-                cust_nation,
-                l_year,
-                sum(volume) as revenue
-            from
-                (
-                    select
-                        n1.n_name as supp_nation,
-                        n2.n_name as cust_nation,
-                        extract(year from l_shipdate) as l_year,
-                        l_extendedprice * (1 - l_discount) as volume
-                    from
-                        supplier,
-                        lineitem,
-                        orders,
-                        customer,
-                        nation n1,
-                        nation n2
-                    where
-                        s_suppkey = l_suppkey
-                        and o_orderkey = l_orderkey
-                        and c_custkey = o_custkey
-                        and s_nationkey = n1.n_nationkey
-                        and c_nationkey = n2.n_nationkey
-                        and (
-                            (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
-                            or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
-                        )
-                        and l_shipdate between date '1995-01-01' and date '1996-12-31'
-                ) as shipping
-            group by
-                supp_nation,
-                cust_nation,
-                l_year
-            order by
-                supp_nation,
-                cust_nation,
-                l_year;",
-        ),
-
-        8 => ctx.create_logical_plan(
-            "select
-                o_year,
-                sum(case
-                    when nation = 'BRAZIL' then volume
-                    else 0
-                end) / sum(volume) as mkt_share
-            from
-                (
-                    select
-                        extract(year from o_orderdate) as o_year,
-                        l_extendedprice * (1 - l_discount) as volume,
-                        n2.n_name as nation
-                    from
-                        part,
-                        supplier,
-                        lineitem,
-                        orders,
-                        customer,
-                        nation n1,
-                        nation n2,
-                        region
-                    where
-                        p_partkey = l_partkey
-                        and s_suppkey = l_suppkey
-                        and l_orderkey = o_orderkey
-                        and o_custkey = c_custkey
-                        and c_nationkey = n1.n_nationkey
-                        and n1.n_regionkey = r_regionkey
-                        and r_name = 'AMERICA'
-                        and s_nationkey = n2.n_nationkey
-                        and o_orderdate between date '1995-01-01' and date '1996-12-31'
-                        and p_type = 'ECONOMY ANODIZED STEEL'
-                ) as all_nations
-            group by
-                o_year
-            order by
-                o_year;",
-        ),
-
-        9 => ctx.create_logical_plan(
-            "select
-                nation,
-                o_year,
-                sum(amount) as sum_profit
-            from
-                (
-                    select
-                        n_name as nation,
-                        extract(year from o_orderdate) as o_year,
-                        l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
-                    from
-                        part,
-                        supplier,
-                        lineitem,
-                        partsupp,
-                        orders,
-                        nation
-                    where
-                        s_suppkey = l_suppkey
-                        and ps_suppkey = l_suppkey
-                        and ps_partkey = l_partkey
-                        and p_partkey = l_partkey
-                        and o_orderkey = l_orderkey
-                        and s_nationkey = n_nationkey
-                        and p_name like '%green%'
-                ) as profit
-            group by
-                nation,
-                o_year
-            order by
-                nation,
-                o_year desc;",
-        ),
-
-        // 10 => ctx.create_logical_plan(
-        //     "select
-        //         c_custkey,
-        //         c_name,
-        //         sum(l_extendedprice * (1 - l_discount)) as revenue,
-        //         c_acctbal,
-        //         n_name,
-        //         c_address,
-        //         c_phone,
-        //         c_comment
-        //     from
-        //         customer,
-        //         orders,
-        //         lineitem,
-        //         nation
-        //     where
-        //         c_custkey = o_custkey
-        //         and l_orderkey = o_orderkey
-        //         and o_orderdate >= date '1993-10-01'
-        //         and o_orderdate < date '1993-10-01' + interval '3' month
-        //         and l_returnflag = 'R'
-        //         and c_nationkey = n_nationkey
-        //     group by
-        //         c_custkey,
-        //         c_name,
-        //         c_acctbal,
-        //         c_phone,
-        //         n_name,
-        //         c_address,
-        //         c_comment
-        //     order by
-        //         revenue desc;"
-        // ),
-        10 => ctx.create_logical_plan(
-            "select
-                c_custkey,
-                c_name,
-                sum(l_extendedprice * (1 - l_discount)) as revenue,
-                c_acctbal,
-                n_name,
-                c_address,
-                c_phone,
-                c_comment
-            from
-                customer,
-                orders,
-                lineitem,
-                nation
-            where
-                c_custkey = o_custkey
-                and l_orderkey = o_orderkey
-                and o_orderdate >= date '1993-10-01'
-                and o_orderdate < date '1994-01-01'
-                and l_returnflag = 'R'
-                and c_nationkey = n_nationkey
-            group by
-                c_custkey,
-                c_name,
-                c_acctbal,
-                c_phone,
-                n_name,
-                c_address,
-                c_comment
-            order by
-                revenue desc;",
-        ),
-
-        11 => ctx.create_logical_plan(
-            "select
-                ps_partkey,
-                sum(ps_supplycost * ps_availqty) as value
-            from
-                partsupp,
-                supplier,
-                nation
-            where
-                ps_suppkey = s_suppkey
-                and s_nationkey = n_nationkey
-                and n_name = 'GERMANY'
-            group by
-                ps_partkey having
-                    sum(ps_supplycost * ps_availqty) > (
-                        select
-                            sum(ps_supplycost * ps_availqty) * 0.0001
-                        from
-                            partsupp,
-                            supplier,
-                            nation
-                        where
-                            ps_suppkey = s_suppkey
-                            and s_nationkey = n_nationkey
-                            and n_name = 'GERMANY'
-                    )
-            order by
-                value desc;",
-        ),
-
-        // original
-        // 12 => ctx.create_logical_plan(
-        //     "select
-        //         l_shipmode,
-        //         sum(case
-        //             when o_orderpriority = '1-URGENT'
-        //                 or o_orderpriority = '2-HIGH'
-        //                 then 1
-        //             else 0
-        //         end) as high_line_count,
-        //         sum(case
-        //             when o_orderpriority <> '1-URGENT'
-        //                 and o_orderpriority <> '2-HIGH'
-        //                 then 1
-        //             else 0
-        //         end) as low_line_count
-        //     from
-        //         orders,
-        //         lineitem
-        //     where
-        //         o_orderkey = l_orderkey
-        //         and l_shipmode in ('MAIL', 'SHIP')
-        //         and l_commitdate < l_receiptdate
-        //         and l_shipdate < l_commitdate
-        //         and l_receiptdate >= date '1994-01-01'
-        //         and l_receiptdate < date '1994-01-01' + interval '1' year
-        //     group by
-        //         l_shipmode
-        //     order by
-        //         l_shipmode;"
-        // ),
-        12 => ctx.create_logical_plan(
-            "select
-                l_shipmode,
-                sum(case
-                    when o_orderpriority = '1-URGENT'
-                        or o_orderpriority = '2-HIGH'
-                        then 1
-                    else 0
-                end) as high_line_count,
-                sum(case
-                    when o_orderpriority <> '1-URGENT'
-                        and o_orderpriority <> '2-HIGH'
-                        then 1
-                    else 0
-                end) as low_line_count
-            from
-                lineitem
-            join
-                orders
-            on
-                l_orderkey = o_orderkey
-            where
-                l_shipmode in ('MAIL', 'SHIP')
-                and l_commitdate < l_receiptdate
-                and l_shipdate < l_commitdate
-                and l_receiptdate >= date '1994-01-01'
-                and l_receiptdate < date '1995-01-01'
-            group by
-                l_shipmode
-            order by
-                l_shipmode;",
-        ),
-
-        13 => ctx.create_logical_plan(
-            "select
-                c_count,
-                count(*) as custdist
-            from
-                (
-                    select
-                        c_custkey,
-                        count(o_orderkey)
-                    from
-                        customer left outer join orders on
-                            c_custkey = o_custkey
-                            and o_comment not like '%special%requests%'
-                    group by
-                        c_custkey
-                ) as c_orders (c_custkey, c_count)
-            group by
-                c_count
-            order by
-                custdist desc,
-                c_count desc;",
-        ),
-
-        14 => ctx.create_logical_plan(
-            "select
-                100.00 * sum(case
-                    when p_type like 'PROMO%'
-                        then l_extendedprice * (1 - l_discount)
-                    else 0
-                end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
-            from
-                lineitem,
-                part
-            where
-                l_partkey = p_partkey
-                and l_shipdate >= date '1995-09-01'
-                and l_shipdate < date '1995-10-01';",
-        ),
-
-        15 => ctx.create_logical_plan(
-            "create view revenue0 (supplier_no, total_revenue) as
-                select
-                    l_suppkey,
-                    sum(l_extendedprice * (1 - l_discount))
-                from
-                    lineitem
-                where
-                    l_shipdate >= date '1996-01-01'
-                    and l_shipdate < date '1996-01-01' + interval '3' month
-                group by
-                    l_suppkey;
-
-            select
-                s_suppkey,
-                s_name,
-                s_address,
-                s_phone,
-                total_revenue
-            from
-                supplier,
-                revenue0
-            where
-                s_suppkey = supplier_no
-                and total_revenue = (
-                    select
-                        max(total_revenue)
-                    from
-                        revenue0
-                )
-            order by
-                s_suppkey;
-
-            drop view revenue0;",
-        ),
-
-        16 => ctx.create_logical_plan(
-            "select
-                p_brand,
-                p_type,
-                p_size,
-                count(distinct ps_suppkey) as supplier_cnt
-            from
-                partsupp,
-                part
-            where
-                p_partkey = ps_partkey
-                and p_brand <> 'Brand#45'
-                and p_type not like 'MEDIUM POLISHED%'
-                and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
-                and ps_suppkey not in (
-                    select
-                        s_suppkey
-                    from
-                        supplier
-                    where
-                        s_comment like '%Customer%Complaints%'
-                )
-            group by
-                p_brand,
-                p_type,
-                p_size
-            order by
-                supplier_cnt desc,
-                p_brand,
-                p_type,
-                p_size;",
-        ),
-
-        17 => ctx.create_logical_plan(
-            "select
-                sum(l_extendedprice) / 7.0 as avg_yearly
-            from
-                lineitem,
-                part
-            where
-                p_partkey = l_partkey
-                and p_brand = 'Brand#23'
-                and p_container = 'MED BOX'
-                and l_quantity < (
-                    select
-                        0.2 * avg(l_quantity)
-                    from
-                        lineitem
-                    where
-                        l_partkey = p_partkey
-                );",
-        ),
-
-        18 => ctx.create_logical_plan(
-            "select
-                c_name,
-                c_custkey,
-                o_orderkey,
-                o_orderdate,
-                o_totalprice,
-                sum(l_quantity)
-            from
-                customer,
-                orders,
-                lineitem
-            where
-                o_orderkey in (
-                    select
-                        l_orderkey
-                    from
-                        lineitem
-                    group by
-                        l_orderkey having
-                            sum(l_quantity) > 300
-                )
-                and c_custkey = o_custkey
-                and o_orderkey = l_orderkey
-            group by
-                c_name,
-                c_custkey,
-                o_orderkey,
-                o_orderdate,
-                o_totalprice
-            order by
-                o_totalprice desc,
-                o_orderdate;",
-        ),
-
-        19 => ctx.create_logical_plan(
-            "select
-                sum(l_extendedprice* (1 - l_discount)) as revenue
-            from
-                lineitem,
-                part
-            where
-                (
-                    p_partkey = l_partkey
-                    and p_brand = 'Brand#12'
-                    and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
-                    and l_quantity >= 1 and l_quantity <= 1 + 10
-                    and p_size between 1 and 5
-                    and l_shipmode in ('AIR', 'AIR REG')
-                    and l_shipinstruct = 'DELIVER IN PERSON'
-                )
-                or
-                (
-                    p_partkey = l_partkey
-                    and p_brand = 'Brand#23'
-                    and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
-                    and l_quantity >= 10 and l_quantity <= 10 + 10
-                    and p_size between 1 and 10
-                    and l_shipmode in ('AIR', 'AIR REG')
-                    and l_shipinstruct = 'DELIVER IN PERSON'
-                )
-                or
-                (
-                    p_partkey = l_partkey
-                    and p_brand = 'Brand#34'
-                    and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
-                    and l_quantity >= 20 and l_quantity <= 20 + 10
-                    and p_size between 1 and 15
-                    and l_shipmode in ('AIR', 'AIR REG')
-                    and l_shipinstruct = 'DELIVER IN PERSON'
-                );",
-        ),
-
-        20 => ctx.create_logical_plan(
-            "select
-                s_name,
-                s_address
-            from
-                supplier,
-                nation
-            where
-                s_suppkey in (
-                    select
-                        ps_suppkey
-                    from
-                        partsupp
-                    where
-                        ps_partkey in (
-                            select
-                                p_partkey
-                            from
-                                part
-                            where
-                                p_name like 'forest%'
-                        )
-                        and ps_availqty > (
-                            select
-                                0.5 * sum(l_quantity)
-                            from
-                                lineitem
-                            where
-                                l_partkey = ps_partkey
-                                and l_suppkey = ps_suppkey
-                                and l_shipdate >= date '1994-01-01'
-                                and l_shipdate < 'date 1994-01-01' + interval '1' year
-                        )
-                )
-                and s_nationkey = n_nationkey
-                and n_name = 'CANADA'
-            order by
-                s_name;",
-        ),
-
-        21 => ctx.create_logical_plan(
-            "select
-                s_name,
-                count(*) as numwait
-            from
-                supplier,
-                lineitem l1,
-                orders,
-                nation
-            where
-                s_suppkey = l1.l_suppkey
-                and o_orderkey = l1.l_orderkey
-                and o_orderstatus = 'F'
-                and l1.l_receiptdate > l1.l_commitdate
-                and exists (
-                    select
-                        *
-                    from
-                        lineitem l2
-                    where
-                        l2.l_orderkey = l1.l_orderkey
-                        and l2.l_suppkey <> l1.l_suppkey
-                )
-                and not exists (
-                    select
-                        *
-                    from
-                        lineitem l3
-                    where
-                        l3.l_orderkey = l1.l_orderkey
-                        and l3.l_suppkey <> l1.l_suppkey
-                        and l3.l_receiptdate > l3.l_commitdate
-                )
-                and s_nationkey = n_nationkey
-                and n_name = 'SAUDI ARABIA'
-            group by
-                s_name
-            order by
-                numwait desc,
-                s_name;",
-        ),
-
-        22 => ctx.create_logical_plan(
-            "select
-                cntrycode,
-                count(*) as numcust,
-                sum(c_acctbal) as totacctbal
-            from
-                (
-                    select
-                        substring(c_phone from 1 for 2) as cntrycode,
-                        c_acctbal
-                    from
-                        customer
-                    where
-                        substring(c_phone from 1 for 2) in
-                            ('13', '31', '23', '29', '30', '18', '17')
-                        and c_acctbal > (
-                            select
-                                avg(c_acctbal)
-                            from
-                                customer
-                            where
-                                c_acctbal > 0.00
-                                and substring(c_phone from 1 for 2) in
-                                    ('13', '31', '23', '29', '30', '18', '17')
-                        )
-                        and not exists (
-                            select
-                                *
-                            from
-                                orders
-                            where
-                                o_custkey = c_custkey
-                        )
-                ) as custsale
-            group by
-                cntrycode
-            order by
-                cntrycode;",
-        ),
-
-        _ => unimplemented!("invalid query. Expected value between 1 and 22"),
-    }
-}
-
-async fn execute_query(
-    ctx: &mut ExecutionContext,
-    plan: &LogicalPlan,
-    debug: bool,
-) -> Result<Vec<arrow::record_batch::RecordBatch>> {
-    if debug {
-        println!("Logical plan:\n{:?}", plan);
-    }
-    let plan = ctx.optimize(&plan)?;
-    if debug {
-        println!("Optimized logical plan:\n{:?}", plan);
-    }
-    let physical_plan = ctx.create_physical_plan(&plan)?;
-    let result = collect(physical_plan).await?;
-    if debug {
-        pretty::print_batches(&result)?;
-    }
-    Ok(result)
-}
-
-async fn convert_tbl(opt: ConvertOpt) -> Result<()> {
-    let output_root_path = Path::new(&opt.output_path);
-    for table in TABLES {
-        let start = Instant::now();
-        let schema = get_schema(table);
-
-        let input_path = format!("{}/{}.tbl", opt.input_path.to_str().unwrap(), table);
-        let options = CsvReadOptions::new()
-            .schema(&schema)
-            .delimiter(b'|')
-            .file_extension(".tbl");
-
-        let config = ExecutionConfig::new().with_batch_size(opt.batch_size);
-        let mut ctx = ExecutionContext::with_config(config);
-
-        // build plan to read the TBL file
-        let mut csv = ctx.read_csv(&input_path, options)?;
-
-        // optionally, repartition the file
-        if opt.partitions > 1 {
-            csv = csv.repartition(Partitioning::RoundRobinBatch(opt.partitions))?
-        }
-
-        // create the physical plan
-        let csv = csv.to_logical_plan();
-        let csv = ctx.optimize(&csv)?;
-        let csv = ctx.create_physical_plan(&csv)?;
-
-        let output_path = output_root_path.join(table);
-        let output_path = output_path.to_str().unwrap().to_owned();
-
-        println!(
-            "Converting '{}' to {} files in directory '{}'",
-            &input_path, &opt.file_format, &output_path
-        );
-        match opt.file_format.as_str() {
-            "csv" => ctx.write_csv(csv, output_path).await?,
-            "parquet" => {
-                let compression = match opt.compression.as_str() {
-                    "none" => Compression::UNCOMPRESSED,
-                    "snappy" => Compression::SNAPPY,
-                    "brotli" => Compression::BROTLI,
-                    "gzip" => Compression::GZIP,
-                    "lz4" => Compression::LZ4,
-                    "lz0" => Compression::LZO,
-                    "zstd" => Compression::ZSTD,
-                    other => {
-                        return Err(DataFusionError::NotImplemented(format!(
-                            "Invalid compression format: {}",
-                            other
-                        )))
-                    }
-                };
-                let props = WriterProperties::builder()
-                    .set_compression(compression)
-                    .build();
-                ctx.write_parquet(csv, output_path, Some(props)).await?
-            }
-            other => {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Invalid output format: {}",
-                    other
-                )))
-            }
-        }
-        println!("Conversion completed in {} ms", start.elapsed().as_millis());
-    }
-
-    Ok(())
-}
-
-fn get_table(
-    path: &str,
-    table: &str,
-    table_format: &str,
-    max_concurrency: usize,
-) -> Result<Arc<dyn TableProvider>> {
-    match table_format {
-        // dbgen creates .tbl ('|' delimited) files without header
-        "tbl" => {
-            let path = format!("{}/{}.tbl", path, table);
-            let schema = get_schema(table);
-            let options = CsvReadOptions::new()
-                .schema(&schema)
-                .delimiter(b'|')
-                .has_header(false)
-                .file_extension(".tbl");
-
-            Ok(Arc::new(CsvFile::try_new(&path, options)?))
-        }
-        "csv" => {
-            let path = format!("{}/{}", path, table);
-            let schema = get_schema(table);
-            let options = CsvReadOptions::new().schema(&schema).has_header(true);
-
-            Ok(Arc::new(CsvFile::try_new(&path, options)?))
-        }
-        "parquet" => {
-            let path = format!("{}/{}", path, table);
-            Ok(Arc::new(ParquetTable::try_new(&path, max_concurrency)?))
-        }
-        other => {
-            unimplemented!("Invalid file format '{}'", other);
-        }
-    }
-}
-
-fn get_schema(table: &str) -> Schema {
-    // note that the schema intentionally uses signed integers so that any generated Parquet
-    // files can also be used to benchmark tools that only support signed integers, such as
-    // Apache Spark
-
-    match table {
-        "part" => Schema::new(vec![
-            Field::new("p_partkey", DataType::Int32, false),
-            Field::new("p_name", DataType::Utf8, false),
-            Field::new("p_mfgr", DataType::Utf8, false),
-            Field::new("p_brand", DataType::Utf8, false),
-            Field::new("p_type", DataType::Utf8, false),
-            Field::new("p_size", DataType::Int32, false),
-            Field::new("p_container", DataType::Utf8, false),
-            Field::new("p_retailprice", DataType::Float64, false),
-            Field::new("p_comment", DataType::Utf8, false),
-        ]),
-
-        "supplier" => Schema::new(vec![
-            Field::new("s_suppkey", DataType::Int32, false),
-            Field::new("s_name", DataType::Utf8, false),
-            Field::new("s_address", DataType::Utf8, false),
-            Field::new("s_nationkey", DataType::Int32, false),
-            Field::new("s_phone", DataType::Utf8, false),
-            Field::new("s_acctbal", DataType::Float64, false),
-            Field::new("s_comment", DataType::Utf8, false),
-        ]),
-
-        "partsupp" => Schema::new(vec![
-            Field::new("ps_partkey", DataType::Int32, false),
-            Field::new("ps_suppkey", DataType::Int32, false),
-            Field::new("ps_availqty", DataType::Int32, false),
-            Field::new("ps_supplycost", DataType::Float64, false),
-            Field::new("ps_comment", DataType::Utf8, false),
-        ]),
-
-        "customer" => Schema::new(vec![
-            Field::new("c_custkey", DataType::Int32, false),
-            Field::new("c_name", DataType::Utf8, false),
-            Field::new("c_address", DataType::Utf8, false),
-            Field::new("c_nationkey", DataType::Int32, false),
-            Field::new("c_phone", DataType::Utf8, false),
-            Field::new("c_acctbal", DataType::Float64, false),
-            Field::new("c_mktsegment", DataType::Utf8, false),
-            Field::new("c_comment", DataType::Utf8, false),
-        ]),
-
-        "orders" => Schema::new(vec![
-            Field::new("o_orderkey", DataType::Int32, false),
-            Field::new("o_custkey", DataType::Int32, false),
-            Field::new("o_orderstatus", DataType::Utf8, false),
-            Field::new("o_totalprice", DataType::Float64, false),
-            Field::new("o_orderdate", DataType::Date32, false),
-            Field::new("o_orderpriority", DataType::Utf8, false),
-            Field::new("o_clerk", DataType::Utf8, false),
-            Field::new("o_shippriority", DataType::Int32, false),
-            Field::new("o_comment", DataType::Utf8, false),
-        ]),
-
-        "lineitem" => Schema::new(vec![
-            Field::new("l_orderkey", DataType::Int32, false),
-            Field::new("l_partkey", DataType::Int32, false),
-            Field::new("l_suppkey", DataType::Int32, false),
-            Field::new("l_linenumber", DataType::Int32, false),
-            Field::new("l_quantity", DataType::Float64, false),
-            Field::new("l_extendedprice", DataType::Float64, false),
-            Field::new("l_discount", DataType::Float64, false),
-            Field::new("l_tax", DataType::Float64, false),
-            Field::new("l_returnflag", DataType::Utf8, false),
-            Field::new("l_linestatus", DataType::Utf8, false),
-            Field::new("l_shipdate", DataType::Date32, false),
-            Field::new("l_commitdate", DataType::Date32, false),
-            Field::new("l_receiptdate", DataType::Date32, false),
-            Field::new("l_shipinstruct", DataType::Utf8, false),
-            Field::new("l_shipmode", DataType::Utf8, false),
-            Field::new("l_comment", DataType::Utf8, false),
-        ]),
-
-        "nation" => Schema::new(vec![
-            Field::new("n_nationkey", DataType::Int32, false),
-            Field::new("n_name", DataType::Utf8, false),
-            Field::new("n_regionkey", DataType::Int32, false),
-            Field::new("n_comment", DataType::Utf8, false),
-        ]),
-
-        "region" => Schema::new(vec![
-            Field::new("r_regionkey", DataType::Int32, false),
-            Field::new("r_name", DataType::Utf8, false),
-            Field::new("r_comment", DataType::Utf8, false),
-        ]),
-
-        _ => unimplemented!(),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::env;
-    use std::sync::Arc;
-
-    use arrow::array::*;
-    use arrow::record_batch::RecordBatch;
-    use arrow::util::display::array_value_to_string;
-
-    use datafusion::logical_plan::Expr;
-    use datafusion::logical_plan::Expr::Cast;
-
-    #[tokio::test]
-    async fn q1() -> Result<()> {
-        verify_query(1).await
-    }
-
-    #[tokio::test]
-    async fn q2() -> Result<()> {
-        verify_query(2).await
-    }
-
-    #[tokio::test]
-    async fn q3() -> Result<()> {
-        verify_query(3).await
-    }
-
-    #[tokio::test]
-    async fn q4() -> Result<()> {
-        verify_query(4).await
-    }
-
-    #[tokio::test]
-    async fn q5() -> Result<()> {
-        verify_query(5).await
-    }
-
-    #[tokio::test]
-    async fn q6() -> Result<()> {
-        verify_query(6).await
-    }
-
-    #[tokio::test]
-    async fn q7() -> Result<()> {
-        verify_query(7).await
-    }
-
-    #[tokio::test]
-    async fn q8() -> Result<()> {
-        verify_query(8).await
-    }
-
-    #[tokio::test]
-    async fn q9() -> Result<()> {
-        verify_query(9).await
-    }
-
-    #[tokio::test]
-    async fn q10() -> Result<()> {
-        verify_query(10).await
-    }
-
-    #[tokio::test]
-    async fn q11() -> Result<()> {
-        verify_query(11).await
-    }
-
-    #[tokio::test]
-    async fn q12() -> Result<()> {
-        verify_query(12).await
-    }
-
-    #[tokio::test]
-    async fn q13() -> Result<()> {
-        verify_query(13).await
-    }
-
-    #[tokio::test]
-    async fn q14() -> Result<()> {
-        verify_query(14).await
-    }
-
-    #[tokio::test]
-    async fn q15() -> Result<()> {
-        verify_query(15).await
-    }
-
-    #[tokio::test]
-    async fn q16() -> Result<()> {
-        verify_query(16).await
-    }
-
-    #[tokio::test]
-    async fn q17() -> Result<()> {
-        verify_query(17).await
-    }
-
-    #[tokio::test]
-    async fn q18() -> Result<()> {
-        verify_query(18).await
-    }
-
-    #[tokio::test]
-    async fn q19() -> Result<()> {
-        verify_query(19).await
-    }
-
-    #[tokio::test]
-    async fn q20() -> Result<()> {
-        verify_query(20).await
-    }
-
-    #[tokio::test]
-    async fn q21() -> Result<()> {
-        verify_query(21).await
-    }
-
-    #[tokio::test]
-    async fn q22() -> Result<()> {
-        verify_query(22).await
-    }
-
-    #[tokio::test]
-    async fn run_q1() -> Result<()> {
-        run_query(1).await
-    }
-
-    #[tokio::test]
-    async fn run_q3() -> Result<()> {
-        run_query(3).await
-    }
-
-    #[tokio::test]
-    async fn run_q5() -> Result<()> {
-        run_query(5).await
-    }
-
-    #[tokio::test]
-    async fn run_q6() -> Result<()> {
-        run_query(6).await
-    }
-
-    #[tokio::test]
-    async fn run_q10() -> Result<()> {
-        run_query(10).await
-    }
-
-    #[tokio::test]
-    async fn run_q12() -> Result<()> {
-        run_query(12).await
-    }
-
-    #[tokio::test]
-    async fn run_q14() -> Result<()> {
-        run_query(14).await
-    }
-
-    /// Specialised String representation
-    fn col_str(column: &ArrayRef, row_index: usize) -> String {
-        if column.is_null(row_index) {
-            return "NULL".to_string();
-        }
-
-        // Special case ListArray as there is no pretty print support for it yet
-        if let DataType::FixedSizeList(_, n) = column.data_type() {
-            let array = column
-                .as_any()
-                .downcast_ref::<FixedSizeListArray>()
-                .unwrap()
-                .value(row_index);
-
-            let mut r = Vec::with_capacity(*n as usize);
-            for i in 0..*n {
-                r.push(col_str(&array, i as usize));
-            }
-            return format!("[{}]", r.join(","));
-        }
-
-        array_value_to_string(column, row_index).unwrap()
-    }
-
-    /// Converts the results into a 2d array of strings, `result[row][column]`
-    /// Special cases nulls to NULL for testing
-    fn result_vec(results: &[RecordBatch]) -> Vec<Vec<String>> {
-        let mut result = vec![];
-        for batch in results {
-            for row_index in 0..batch.num_rows() {
-                let row_vec = batch
-                    .columns()
-                    .iter()
-                    .map(|column| col_str(column, row_index))
-                    .collect();
-                result.push(row_vec);
-            }
-        }
-        result
-    }
-
-    fn get_answer_schema(n: usize) -> Schema {
-        match n {
-            1 => Schema::new(vec![
-                Field::new("l_returnflag", DataType::Utf8, true),
-                Field::new("l_linestatus", DataType::Utf8, true),
-                Field::new("sum_qty", DataType::Float64, true),
-                Field::new("sum_base_price", DataType::Float64, true),
-                Field::new("sum_disc_price", DataType::Float64, true),
-                Field::new("sum_charge", DataType::Float64, true),
-                Field::new("avg_qty", DataType::Float64, true),
-                Field::new("avg_price", DataType::Float64, true),
-                Field::new("avg_disc", DataType::Float64, true),
-                Field::new("count_order", DataType::UInt64, true),
-            ]),
-
-            2 => Schema::new(vec![
-                Field::new("s_acctbal", DataType::Float64, true),
-                Field::new("s_name", DataType::Utf8, true),
-                Field::new("n_name", DataType::Utf8, true),
-                Field::new("p_partkey", DataType::Int32, true),
-                Field::new("p_mfgr", DataType::Utf8, true),
-                Field::new("s_address", DataType::Utf8, true),
-                Field::new("s_phone", DataType::Utf8, true),
-                Field::new("s_comment", DataType::Utf8, true),
-            ]),
-
-            3 => Schema::new(vec![
-                Field::new("l_orderkey", DataType::Int32, true),
-                Field::new("revenue", DataType::Float64, true),
-                Field::new("o_orderdate", DataType::Date32, true),
-                Field::new("o_shippriority", DataType::Int32, true),
-            ]),
-
-            4 => Schema::new(vec![
-                Field::new("o_orderpriority", DataType::Utf8, true),
-                Field::new("order_count", DataType::Int32, true),
-            ]),
-
-            5 => Schema::new(vec![
-                Field::new("n_name", DataType::Utf8, true),
-                Field::new("revenue", DataType::Float64, true),
-            ]),
-
-            6 => Schema::new(vec![Field::new("revenue", DataType::Float64, true)]),
-
-            7 => Schema::new(vec![
-                Field::new("supp_nation", DataType::Utf8, true),
-                Field::new("cust_nation", DataType::Utf8, true),
-                Field::new("l_year", DataType::Int32, true),
-                Field::new("revenue", DataType::Float64, true),
-            ]),
-
-            8 => Schema::new(vec![
-                Field::new("o_year", DataType::Int32, true),
-                Field::new("mkt_share", DataType::Float64, true),
-            ]),
-
-            9 => Schema::new(vec![
-                Field::new("nation", DataType::Utf8, true),
-                Field::new("o_year", DataType::Int32, true),
-                Field::new("sum_profit", DataType::Float64, true),
-            ]),
-
-            10 => Schema::new(vec![
-                Field::new("c_custkey", DataType::Int32, true),
-                Field::new("c_name", DataType::Utf8, true),
-                Field::new("revenue", DataType::Float64, true),
-                Field::new("c_acctbal", DataType::Float64, true),
-                Field::new("n_name", DataType::Utf8, true),
-                Field::new("c_address", DataType::Utf8, true),
-                Field::new("c_phone", DataType::Utf8, true),
-                Field::new("c_comment", DataType::Utf8, true),
-            ]),
-
-            11 => Schema::new(vec![
-                Field::new("ps_partkey", DataType::Int32, true),
-                Field::new("value", DataType::Float64, true),
-            ]),
-
-            12 => Schema::new(vec![
-                Field::new("l_shipmode", DataType::Utf8, true),
-                Field::new("high_line_count", DataType::Int64, true),
-                Field::new("low_line_count", DataType::Int64, true),
-            ]),
-
-            13 => Schema::new(vec![
-                Field::new("c_count", DataType::Int64, true),
-                Field::new("custdist", DataType::Int64, true),
-            ]),
-
-            14 => Schema::new(vec![Field::new("promo_revenue", DataType::Float64, true)]),
-
-            15 => Schema::new(vec![Field::new("promo_revenue", DataType::Float64, true)]),
-
-            16 => Schema::new(vec![
-                Field::new("p_brand", DataType::Utf8, true),
-                Field::new("p_type", DataType::Utf8, true),
-                Field::new("c_phone", DataType::Int32, true),
-                Field::new("c_comment", DataType::Int32, true),
-            ]),
-
-            17 => Schema::new(vec![Field::new("avg_yearly", DataType::Float64, true)]),
-
-            18 => Schema::new(vec![
-                Field::new("c_name", DataType::Utf8, true),
-                Field::new("c_custkey", DataType::Int32, true),
-                Field::new("o_orderkey", DataType::Int32, true),
-                Field::new("o_orderdate", DataType::Date32, true),
-                Field::new("o_totalprice", DataType::Float64, true),
-                Field::new("sum_l_quantity", DataType::Float64, true),
-            ]),
-
-            19 => Schema::new(vec![Field::new("revenue", DataType::Float64, true)]),
-
-            20 => Schema::new(vec![
-                Field::new("s_name", DataType::Utf8, true),
-                Field::new("s_address", DataType::Utf8, true),
-            ]),
-
-            21 => Schema::new(vec![
-                Field::new("s_name", DataType::Utf8, true),
-                Field::new("numwait", DataType::Int32, true),
-            ]),
-
-            22 => Schema::new(vec![
-                Field::new("cntrycode", DataType::Int32, true),
-                Field::new("numcust", DataType::Int32, true),
-                Field::new("totacctbal", DataType::Float64, true),
-            ]),
-
-            _ => unimplemented!(),
-        }
-    }
-
-    // convert expected schema to all utf8 so columns can be read as strings to be parsed separately
-    // this is due to the fact that the csv parser cannot handle leading/trailing spaces
-    fn string_schema(schema: Schema) -> Schema {
-        Schema::new(
-            schema
-                .fields()
-                .iter()
-                .map(|field| {
-                    Field::new(
-                        Field::name(&field),
-                        DataType::Utf8,
-                        Field::is_nullable(&field),
-                    )
-                })
-                .collect::<Vec<Field>>(),
-        )
-    }
-
-    // convert the schema to the same but with all columns set to nullable=true.
-    // this allows direct schema comparison ignoring nullable.
-    fn nullable_schema(schema: Arc<Schema>) -> Schema {
-        Schema::new(
-            schema
-                .fields()
-                .iter()
-                .map(|field| {
-                    Field::new(
-                        Field::name(&field),
-                        Field::data_type(&field).to_owned(),
-                        true,
-                    )
-                })
-                .collect::<Vec<Field>>(),
-        )
-    }
-
-    async fn run_query(n: usize) -> Result<()> {
-        // Tests running query with empty tables, to see whether they run succesfully.
-
-        let config = ExecutionConfig::new()
-            .with_concurrency(1)
-            .with_batch_size(10);
-        let mut ctx = ExecutionContext::with_config(config);
-
-        for &table in TABLES {
-            let schema = get_schema(table);
-            let batch = RecordBatch::new_empty(Arc::new(schema.to_owned()));
-
-            let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]])?;
-
-            ctx.register_table(table, Arc::new(provider))?;
-        }
-
-        let plan = create_logical_plan(&mut ctx, n)?;
-        execute_query(&mut ctx, &plan, false).await?;
-
-        Ok(())
-    }
-
-    async fn verify_query(n: usize) -> Result<()> {
-        if let Ok(path) = env::var("TPCH_DATA") {
-            // load expected answers from tpch-dbgen
-            // read csv as all strings, trim and cast to expected type as the csv string
-            // to value parser does not handle data with leading/trailing spaces
-            let mut ctx = ExecutionContext::new();
-            let schema = string_schema(get_answer_schema(n));
-            let options = CsvReadOptions::new()
-                .schema(&schema)
-                .delimiter(b'|')
-                .file_extension(".out");
-            let df = ctx.read_csv(&format!("{}/answers/q{}.out", path, n), options)?;
-            let df = df.select(
-                get_answer_schema(n)
-                    .fields()
-                    .iter()
-                    .map(|field| {
-                        Expr::Alias(
-                            Box::new(Cast {
-                                expr: Box::new(trim(col(Field::name(&field)))),
-                                data_type: Field::data_type(&field).to_owned(),
-                            }),
-                            Field::name(&field).to_string(),
-                        )
-                    })
-                    .collect::<Vec<Expr>>(),
-            )?;
-            let expected = df.collect().await?;
-
-            // run the query to compute actual results of the query
-            let opt = BenchmarkOpt {
-                query: n,
-                debug: false,
-                iterations: 1,
-                concurrency: 2,
-                batch_size: 8192,
-                path: PathBuf::from(path.to_string()),
-                file_format: "tbl".to_string(),
-                mem_table: false,
-                partitions: 16,
-            };
-            let actual = benchmark(opt).await?;
-
-            // assert schema equality without comparing nullable values
-            assert_eq!(
-                nullable_schema(expected[0].schema()),
-                nullable_schema(actual[0].schema())
-            );
-
-            // convert both datasets to Vec<Vec<String>> for simple comparison
-            let expected_vec = result_vec(&expected);
-            let actual_vec = result_vec(&actual);
-
-            // basic result comparison
-            assert_eq!(expected_vec.len(), actual_vec.len());
-
-            // compare each row. this works as all TPC-H queries have determinisically ordered results
-            for i in 0..actual_vec.len() {
-                assert_eq!(expected_vec[i], actual_vec[i]);
-            }
-        } else {
-            println!("TPCH_DATA environment variable not set, skipping test");
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion-examples/Cargo.toml b/rust/datafusion-examples/Cargo.toml
deleted file mode 100644
index 673bfe2b87b..00000000000
--- a/rust/datafusion-examples/Cargo.toml
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion-examples"
-description = "DataFusion usage examples"
-version = "5.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow", "query", "sql" ]
-edition = "2018"
-publish = false
-
-
-[dev-dependencies]
-datafusion = { path = "../datafusion" }
-arrow = { path = "../arrow" }
-prost = "0.7"
-arrow-flight = { path = "../arrow-flight" }
-tonic = "0.4"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
-futures = "0.3"
-num_cpus = "1.13.0"
diff --git a/rust/datafusion-examples/examples/README.md b/rust/datafusion-examples/examples/README.md
deleted file mode 100644
index 163ef3d952b..00000000000
--- a/rust/datafusion-examples/examples/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# DataFusion Examples
-
-## Single Process
-
-The examples `csv_sql.rs` and `parquet_sql.rs` demonstrate building a query plan from a SQL statement and then executing the query plan against local CSV and Parquet files, respectively.
-
-## Distributed
-
-The `flight-client.rs` and `flight-server.rs` examples demonstrate how to run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol.
\ No newline at end of file
diff --git a/rust/datafusion-examples/examples/csv_sql.rs b/rust/datafusion-examples/examples/csv_sql.rs
deleted file mode 100644
index 63fd36d44ce..00000000000
--- a/rust/datafusion-examples/examples/csv_sql.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::util::pretty;
-
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
-/// fetching results
-#[tokio::main]
-async fn main() -> Result<()> {
-    // create local execution context
-    let mut ctx = ExecutionContext::new();
-
-    let testdata = arrow::util::test_util::arrow_test_data();
-
-    // register csv file with the execution context
-    ctx.register_csv(
-        "aggregate_test_100",
-        &format!("{}/csv/aggregate_test_100.csv", testdata),
-        CsvReadOptions::new(),
-    )?;
-
-    // execute the query
-    let df = ctx.sql(
-        "SELECT c1, MIN(c12), MAX(c12) \
-        FROM aggregate_test_100 \
-        WHERE c11 > 0.1 AND c11 < 0.9 \
-        GROUP BY c1",
-    )?;
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/dataframe.rs b/rust/datafusion-examples/examples/dataframe.rs
deleted file mode 100644
index cba4d87f1e0..00000000000
--- a/rust/datafusion-examples/examples/dataframe.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::util::pretty;
-
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
-/// fetching results, using the DataFrame trait
-#[tokio::main]
-async fn main() -> Result<()> {
-    // create local execution context
-    let mut ctx = ExecutionContext::new();
-
-    let testdata = arrow::util::test_util::parquet_test_data();
-
-    let filename = &format!("{}/alltypes_plain.parquet", testdata);
-
-    // define the query using the DataFrame trait
-    let df = ctx
-        .read_parquet(filename)?
-        .select_columns(&["id", "bool_col", "timestamp_col"])?
-        .filter(col("id").gt(lit(1)))?;
-
-    // execute the query
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/dataframe_in_memory.rs b/rust/datafusion-examples/examples/dataframe_in_memory.rs
deleted file mode 100644
index de8552a3bba..00000000000
--- a/rust/datafusion-examples/examples/dataframe_in_memory.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{Int32Array, StringArray};
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::record_batch::RecordBatch;
-use arrow::util::pretty;
-
-use datafusion::datasource::MemTable;
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates how to use the DataFrame API against in-memory data.
-#[tokio::main]
-async fn main() -> Result<()> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Int32, false),
-    ]));
-
-    // define data.
-    let batch = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
-            Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-        ],
-    )?;
-
-    // declare a new context. In spark API, this corresponds to a new spark SQLsession
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-    ctx.register_table("t", Arc::new(provider))?;
-    let df = ctx.table("t")?;
-
-    // construct an expression corresponding to "SELECT a, b FROM t WHERE b = 10" in SQL
-    let filter = col("b").eq(lit(10));
-
-    let df = df.select_columns(&["a", "b"])?.filter(filter)?;
-
-    // execute
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/flight_client.rs b/rust/datafusion-examples/examples/flight_client.rs
deleted file mode 100644
index 2c2954d5a02..00000000000
--- a/rust/datafusion-examples/examples/flight_client.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use arrow::datatypes::Schema;
-use arrow::util::pretty;
-
-use arrow_flight::flight_descriptor;
-use arrow_flight::flight_service_client::FlightServiceClient;
-use arrow_flight::utils::flight_data_to_arrow_batch;
-use arrow_flight::{FlightDescriptor, Ticket};
-
-/// This example shows how to wrap DataFusion with `FlightService` to support looking up schema information for
-/// Parquet files and executing SQL queries against them on a remote server.
-/// This example is run along-side the example `flight_server`.
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let testdata = arrow::util::test_util::parquet_test_data();
-
-    // Create Flight client
-    let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
-
-    // Call get_schema to get the schema of a Parquet file
-    let request = tonic::Request::new(FlightDescriptor {
-        r#type: flight_descriptor::DescriptorType::Path as i32,
-        cmd: vec![],
-        path: vec![format!("{}/alltypes_plain.parquet", testdata)],
-    });
-
-    let schema_result = client.get_schema(request).await?.into_inner();
-    let schema = Schema::try_from(&schema_result)?;
-    println!("Schema: {:?}", schema);
-
-    // Call do_get to execute a SQL query and receive results
-    let request = tonic::Request::new(Ticket {
-        ticket: "SELECT id FROM alltypes_plain".into(),
-    });
-
-    let mut stream = client.do_get(request).await?.into_inner();
-
-    // the schema should be the first message returned, else client should error
-    let flight_data = stream.message().await?.unwrap();
-    // convert FlightData to a stream
-    let schema = Arc::new(Schema::try_from(&flight_data)?);
-    println!("Schema: {:?}", schema);
-
-    // all the remaining stream messages should be dictionary and record batches
-    let mut results = vec![];
-    let dictionaries_by_field = vec![None; schema.fields().len()];
-    while let Some(flight_data) = stream.message().await? {
-        let record_batch = flight_data_to_arrow_batch(
-            &flight_data,
-            schema.clone(),
-            &dictionaries_by_field,
-        )?;
-        results.push(record_batch);
-    }
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/flight_server.rs b/rust/datafusion-examples/examples/flight_server.rs
deleted file mode 100644
index 79660dd1871..00000000000
--- a/rust/datafusion-examples/examples/flight_server.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-
-use futures::Stream;
-use tonic::transport::Server;
-use tonic::{Request, Response, Status, Streaming};
-
-use datafusion::datasource::parquet::ParquetTable;
-use datafusion::datasource::TableProvider;
-use datafusion::prelude::*;
-
-use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
-    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-
-#[derive(Clone)]
-pub struct FlightServiceImpl {}
-
-#[tonic::async_trait]
-impl FlightService for FlightServiceImpl {
-    type HandshakeStream = Pin<
-        Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + Sync + 'static>,
-    >;
-    type ListFlightsStream =
-        Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + Sync + 'static>>;
-    type DoGetStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-    type DoPutStream =
-        Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + Sync + 'static>>;
-    type DoActionStream = Pin<
-        Box<
-            dyn Stream<Item = Result<arrow_flight::Result, Status>>
-                + Send
-                + Sync
-                + 'static,
-        >,
-    >;
-    type ListActionsStream =
-        Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + Sync + 'static>>;
-    type DoExchangeStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + Sync + 'static>>;
-
-    async fn get_schema(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        let request = request.into_inner();
-
-        let table = ParquetTable::try_new(&request.path[0], num_cpus::get()).unwrap();
-
-        let options = arrow::ipc::writer::IpcWriteOptions::default();
-        let schema_result = arrow_flight::utils::flight_schema_from_arrow_schema(
-            table.schema().as_ref(),
-            &options,
-        );
-
-        Ok(Response::new(schema_result))
-    }
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        let ticket = request.into_inner();
-        match std::str::from_utf8(&ticket.ticket) {
-            Ok(sql) => {
-                println!("do_get: {}", sql);
-
-                // create local execution context
-                let mut ctx = ExecutionContext::new();
-
-                let testdata = arrow::util::test_util::parquet_test_data();
-
-                // register parquet file with the execution context
-                ctx.register_parquet(
-                    "alltypes_plain",
-                    &format!("{}/alltypes_plain.parquet", testdata),
-                )
-                .map_err(to_tonic_err)?;
-
-                // create the DataFrame
-                let df = ctx.sql(sql).map_err(to_tonic_err)?;
-
-                // execute the query
-                let results = df.collect().await.map_err(to_tonic_err)?;
-                if results.is_empty() {
-                    return Err(Status::internal("There were no results from ticket"));
-                }
-
-                // add an initial FlightData message that sends schema
-                let options = arrow::ipc::writer::IpcWriteOptions::default();
-                let schema_flight_data =
-                    arrow_flight::utils::flight_data_from_arrow_schema(
-                        &df.schema().clone().into(),
-                        &options,
-                    );
-
-                let mut flights: Vec<Result<FlightData, Status>> =
-                    vec![Ok(schema_flight_data)];
-
-                let mut batches: Vec<Result<FlightData, Status>> = results
-                    .iter()
-                    .flat_map(|batch| {
-                        let (flight_dictionaries, flight_batch) =
-                            arrow_flight::utils::flight_data_from_arrow_batch(
-                                batch, &options,
-                            );
-                        flight_dictionaries
-                            .into_iter()
-                            .chain(std::iter::once(flight_batch))
-                            .map(Ok)
-                    })
-                    .collect();
-
-                // append batch vector to schema vector, so that the first message sent is the schema
-                flights.append(&mut batches);
-
-                let output = futures::stream::iter(flights);
-
-                Ok(Response::new(Box::pin(output) as Self::DoGetStream))
-            }
-            Err(e) => Err(Status::invalid_argument(format!("Invalid ticket: {:?}", e))),
-        }
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_put(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
-
-fn to_tonic_err(e: datafusion::error::DataFusionError) -> Status {
-    Status::internal(format!("{:?}", e))
-}
-
-/// This example shows how to wrap DataFusion with `FlightService` to support looking up schema information for
-/// Parquet files and executing SQL queries against them on a remote server.
-/// This example is run along-side the example `flight_client`.
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let addr = "0.0.0.0:50051".parse()?;
-    let service = FlightServiceImpl {};
-
-    let svc = FlightServiceServer::new(service);
-
-    println!("Listening on {:?}", addr);
-
-    Server::builder().add_service(svc).serve(addr).await?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/parquet_sql.rs b/rust/datafusion-examples/examples/parquet_sql.rs
deleted file mode 100644
index 8043d3296c8..00000000000
--- a/rust/datafusion-examples/examples/parquet_sql.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::util::pretty;
-
-use datafusion::error::Result;
-use datafusion::prelude::*;
-
-/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
-/// fetching results
-#[tokio::main]
-async fn main() -> Result<()> {
-    // create local execution context
-    let mut ctx = ExecutionContext::new();
-
-    let testdata = arrow::util::test_util::parquet_test_data();
-
-    // register parquet file with the execution context
-    ctx.register_parquet(
-        "alltypes_plain",
-        &format!("{}/alltypes_plain.parquet", testdata),
-    )?;
-
-    // execute the query
-    let df = ctx.sql(
-        "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \
-        FROM alltypes_plain \
-        WHERE id > 1 AND tinyint_col < double_col",
-    )?;
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/simple_udaf.rs b/rust/datafusion-examples/examples/simple_udaf.rs
deleted file mode 100644
index 8086dfc47de..00000000000
--- a/rust/datafusion-examples/examples/simple_udaf.rs
+++ /dev/null
@@ -1,170 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// In this example we will declare a single-type, single return type UDAF that computes the geometric mean.
-/// The geometric mean is described here: https://en.wikipedia.org/wiki/Geometric_mean
-use arrow::{
-    array::Float32Array, array::Float64Array, datatypes::DataType,
-    record_batch::RecordBatch,
-};
-
-use datafusion::{error::Result, logical_plan::create_udaf, physical_plan::Accumulator};
-use datafusion::{prelude::*, scalar::ScalarValue};
-use std::sync::Arc;
-
-// create local execution context with an in-memory table
-fn create_context() -> Result<ExecutionContext> {
-    use arrow::datatypes::{Field, Schema};
-    use datafusion::datasource::MemTable;
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
-
-    // define data in two partitions
-    let batch1 = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
-    )?;
-    let batch2 = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float32Array::from(vec![64.0]))],
-    )?;
-
-    // declare a new context. In spark API, this corresponds to a new spark SQLsession
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![vec![batch1], vec![batch2]])?;
-    ctx.register_table("t", Arc::new(provider))?;
-    Ok(ctx)
-}
-
-/// A UDAF has state across multiple rows, and thus we require a `struct` with that state.
-#[derive(Debug)]
-struct GeometricMean {
-    n: u32,
-    prod: f64,
-}
-
-impl GeometricMean {
-    // how the struct is initialized
-    pub fn new() -> Self {
-        GeometricMean { n: 0, prod: 1.0 }
-    }
-}
-
-// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
-// to use them.
-impl Accumulator for GeometricMean {
-    // this function serializes our state to `ScalarValue`, which DataFusion uses
-    // to pass this state between execution stages.
-    // Note that this can be arbitrary data.
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![
-            ScalarValue::from(self.prod),
-            ScalarValue::from(self.n),
-        ])
-    }
-
-    // this function receives one entry per argument of this accumulator.
-    // DataFusion calls this function on every row, and expects this function to update the accumulator's state.
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        // this is a one-argument UDAF, and thus we use `0`.
-        let value = &values[0];
-        match value {
-            // here we map `ScalarValue` to our internal state. `Float64` indicates that this function
-            // only accepts Float64 as its argument (DataFusion does try to coerce arguments to this type)
-            //
-            // Note that `.map` here ensures that we ignore Nulls.
-            ScalarValue::Float64(e) => e.map(|value| {
-                self.prod *= value;
-                self.n += 1;
-            }),
-            _ => unreachable!(""),
-        };
-        Ok(())
-    }
-
-    // this function receives states from other accumulators (Vec<ScalarValue>)
-    // and updates the accumulator.
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        let prod = &states[0];
-        let n = &states[1];
-        match (prod, n) {
-            (ScalarValue::Float64(Some(prod)), ScalarValue::UInt32(Some(n))) => {
-                self.prod *= prod;
-                self.n += n;
-            }
-            _ => unreachable!(""),
-        };
-        Ok(())
-    }
-
-    // DataFusion expects this function to return the final value of this aggregator.
-    // in this case, this is the formula of the geometric mean
-    fn evaluate(&self) -> Result<ScalarValue> {
-        let value = self.prod.powf(1.0 / self.n as f64);
-        Ok(ScalarValue::from(value))
-    }
-
-    // Optimization hint: this trait also supports `update_batch` and `merge_batch`,
-    // that can be used to perform these operations on arrays instead of single values.
-    // By default, these methods call `update` and `merge` row by row
-}
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let ctx = create_context()?;
-
-    // here is where we define the UDAF. We also declare its signature:
-    let geometric_mean = create_udaf(
-        // the name; used to represent it in plan descriptions and in the registry, to use in SQL.
-        "geo_mean",
-        // the input type; DataFusion guarantees that the first entry of `values` in `update` has this type.
-        DataType::Float64,
-        // the return type; DataFusion expects this to match the type returned by `evaluate`.
-        Arc::new(DataType::Float64),
-        // This is the accumulator factory; DataFusion uses it to create new accumulators.
-        Arc::new(|| Ok(Box::new(GeometricMean::new()))),
-        // This is the description of the state. `state()` must match the types here.
-        Arc::new(vec![DataType::Float64, DataType::UInt32]),
-    );
-
-    // get a DataFrame from the context
-    // this table has 1 column `a` f32 with values {2,4,8,64}, whose geometric mean is 8.0.
-    let df = ctx.table("t")?;
-
-    // perform the aggregation
-    let df = df.aggregate(vec![], vec![geometric_mean.call(vec![col("a")])])?;
-
-    // note that "a" is f32, not f64. DataFusion coerces it to match the UDAF's signature.
-
-    // execute the query
-    let results = df.collect().await?;
-
-    // downcast the array to the expected type
-    let result = results[0]
-        .column(0)
-        .as_any()
-        .downcast_ref::<Float64Array>()
-        .unwrap();
-
-    // verify that the calculation is correct
-    assert!((result.value(0) - 8.0).abs() < f64::EPSILON);
-    println!("The geometric mean of [2,4,8,64] is {}", result.value(0));
-
-    Ok(())
-}
diff --git a/rust/datafusion-examples/examples/simple_udf.rs b/rust/datafusion-examples/examples/simple_udf.rs
deleted file mode 100644
index bfef1089a63..00000000000
--- a/rust/datafusion-examples/examples/simple_udf.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::{
-    array::{ArrayRef, Float32Array, Float64Array},
-    datatypes::DataType,
-    record_batch::RecordBatch,
-    util::pretty,
-};
-
-use datafusion::prelude::*;
-use datafusion::{error::Result, physical_plan::functions::make_scalar_function};
-use std::sync::Arc;
-
-// create local execution context with an in-memory table
-fn create_context() -> Result<ExecutionContext> {
-    use arrow::datatypes::{Field, Schema};
-    use datafusion::datasource::MemTable;
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Float32, false),
-        Field::new("b", DataType::Float64, false),
-    ]));
-
-    // define data.
-    let batch = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(Float32Array::from(vec![2.1, 3.1, 4.1, 5.1])),
-            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-        ],
-    )?;
-
-    // declare a new context. In spark API, this corresponds to a new spark SQLsession
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-    ctx.register_table("t", Arc::new(provider))?;
-    Ok(ctx)
-}
-
-/// In this example we will declare a single-type, single return type UDF that exponentiates f64, a^b
-#[tokio::main]
-async fn main() -> Result<()> {
-    let mut ctx = create_context()?;
-
-    // First, declare the actual implementation of the calculation
-    let pow = |args: &[ArrayRef]| {
-        // in DataFusion, all `args` and output are dynamically-typed arrays, which means that we need to:
-        // 1. cast the values to the type we want
-        // 2. perform the computation for every element in the array (using a loop or SIMD) and construct the result
-
-        // this is guaranteed by DataFusion based on the function's signature.
-        assert_eq!(args.len(), 2);
-
-        // 1. cast both arguments to f64. These casts MUST be aligned with the signature or this function panics!
-        let base = &args[0]
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("cast failed");
-        let exponent = &args[1]
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("cast failed");
-
-        // this is guaranteed by DataFusion. We place it just to make it obvious.
-        assert_eq!(exponent.len(), base.len());
-
-        // 2. perform the computation
-        let array = base
-            .iter()
-            .zip(exponent.iter())
-            .map(|(base, exponent)| {
-                match (base, exponent) {
-                    // in arrow, any value can be null.
-                    // Here we decide to make our UDF to return null when either base or exponent is null.
-                    (Some(base), Some(exponent)) => Some(base.powf(exponent)),
-                    _ => None,
-                }
-            })
-            .collect::<Float64Array>();
-
-        // `Ok` because no error occurred during the calculation (we should add one if exponent was [0, 1[ and the base < 0 because that panics!)
-        // `Arc` because arrays are immutable, thread-safe, trait objects.
-        Ok(Arc::new(array) as ArrayRef)
-    };
-    // the function above expects an `ArrayRef`, but DataFusion may pass a scalar to a UDF.
-    // thus, we use `make_scalar_function` to decorare the closure so that it can handle both Arrays and Scalar values.
-    let pow = make_scalar_function(pow);
-
-    // Next:
-    // * give it a name so that it shows nicely when the plan is printed
-    // * declare what input it expects
-    // * declare its return type
-    let pow = create_udf(
-        "pow",
-        // expects two f64
-        vec![DataType::Float64, DataType::Float64],
-        // returns f64
-        Arc::new(DataType::Float64),
-        pow,
-    );
-
-    // at this point, we can use it or register it, depending on the use-case:
-    // * if the UDF is expected to be used throughout the program in different contexts,
-    //   we can register it, and call it later:
-    ctx.register_udf(pow.clone()); // clone is only required in this example because we show both usages
-
-    // * if the UDF is expected to be used directly in the scope, `.call` it directly:
-    let expr = pow.call(vec![col("a"), col("b")]);
-
-    // get a DataFrame from the context
-    let df = ctx.table("t")?;
-
-    // if we do not have `pow` in the scope and we registered it, we can get it from the registry
-    let pow = df.registry().udf("pow")?;
-    // equivalent to expr
-    let expr1 = pow.call(vec![col("a"), col("b")]);
-
-    // equivalent to `'SELECT pow(a, b), pow(a, b) AS pow1 FROM t'`
-    let df = df.select(vec![
-        expr,
-        // alias so that they have different column names
-        expr1.alias("pow1"),
-    ])?;
-
-    // note that "b" is f32, not f64. DataFusion coerces the types to match the UDF's signature.
-
-    // execute the query
-    let results = df.collect().await?;
-
-    // print the results
-    pretty::print_batches(&results)?;
-
-    Ok(())
-}
diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml
deleted file mode 100644
index 6f46f5da1e5..00000000000
--- a/rust/datafusion/Cargo.toml
+++ /dev/null
@@ -1,99 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion"
-description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
-version = "5.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-keywords = [ "arrow", "query", "sql" ]
-include = [
-    "benches/*.rs",
-    "src/**/*.rs",
-    "Cargo.toml",
-]
-edition = "2018"
-
-[lib]
-name = "datafusion"
-path = "src/lib.rs"
-
-[[bin]]
-name = "datafusion-cli"
-path = "src/bin/main.rs"
-
-[features]
-default = ["cli", "crypto_expressions", "regex_expressions", "unicode_expressions"]
-cli = ["rustyline"]
-simd = ["arrow/simd"]
-crypto_expressions = ["md-5", "sha2"]
-regex_expressions = ["regex", "lazy_static"]
-unicode_expressions = ["unicode-segmentation"]
-
-[dependencies]
-ahash = "0.7"
-hashbrown = "0.11"
-arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", features = ["prettyprint"] }
-parquet = { path = "../parquet", version = "5.0.0-SNAPSHOT", features = ["arrow"] }
-sqlparser = "0.9.0"
-clap = "2.33"
-rustyline = {version = "7.0", optional = true}
-paste = "^1.0"
-num_cpus = "1.13.0"
-chrono = "0.4"
-async-trait = "0.1.41"
-futures = "0.3"
-pin-project-lite= "^0.2.0"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
-tokio-stream = "0.1"
-log = "^0.4"
-md-5 = { version = "^0.9.1", optional = true }
-sha2 = { version = "^0.9.1", optional = true }
-ordered-float = "2.0"
-unicode-segmentation = { version = "^1.7.1", optional = true }
-regex = { version = "^1.4.3", optional = true }
-lazy_static = { version = "^1.4.0", optional = true }
-smallvec = { version = "1.6", features = ["union"] }
-
-[dev-dependencies]
-rand = "0.8"
-criterion = "0.3"
-tempfile = "3"
-doc-comment = "0.3"
-
-[[bench]]
-name = "aggregate_query_sql"
-harness = false
-
-[[bench]]
-name = "sort_limit_query_sql"
-harness = false
-
-[[bench]]
-name = "math_query_sql"
-harness = false
-
-[[bench]]
-name = "filter_query_sql"
-harness = false
-
-[[bench]]
-name = "scalar"
-harness = false
diff --git a/rust/datafusion/DEVELOPERS.md b/rust/datafusion/DEVELOPERS.md
deleted file mode 100644
index aa80cb71d3b..00000000000
--- a/rust/datafusion/DEVELOPERS.md
+++ /dev/null
@@ -1,92 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Developer's guide
-
-This section describes how you can get started at developing DataFusion.
-
-### Bootstrap environment
-
-DataFusion is written in Rust and it uses a standard rust toolkit:
-
-* `cargo build`
-* `cargo fmt` to format the code
-* `cargo test` to test
-* etc.
-
-## How to add a new scalar function
-
-Below is a checklist of what you need to do to add a new scalar function to DataFusion:
-
-* Add the actual implementation of the function:
-  * [here](src/physical_plan/string_expressions.rs) for string functions
-  * [here](src/physical_plan/math_expressions.rs) for math functions
-  * [here](src/physical_plan/datetime_expressions.rs) for datetime functions
-  * create a new module [here](src/physical_plan) for other functions
-* In [src/physical_plan/functions](src/physical_plan/functions.rs), add:
-  * a new variant to `BuiltinScalarFunction`
-  * a new entry to `FromStr` with the name of the function as called by SQL
-  * a new line in `return_type` with the expected return type of the function, given an incoming type
-  * a new line in `signature` with the signature of the function (number and types of its arguments)
-  * a new line in `create_physical_expr` mapping the built-in to the implementation
-  * tests to the function.
-* In [tests/sql.rs](tests/sql.rs), add a new test where the function is called through SQL against well known data and returns the expected result.
-* In [src/logical_plan/expr](src/logical_plan/expr.rs), add:
-  * a new entry of the `unary_scalar_expr!` macro for the new function.
-* In [src/logical_plan/mod](src/logical_plan/mod.rs), add:
-  * a new entry in the `pub use expr::{}` set.
-
-## How to add a new aggregate function
-
-Below is a checklist of what you need to do to add a new aggregate function to DataFusion:
-
-* Add the actual implementation of an `Accumulator` and `AggregateExpr`:
-  * [here](src/physical_plan/string_expressions.rs) for string functions
-  * [here](src/physical_plan/math_expressions.rs) for math functions
-  * [here](src/physical_plan/datetime_expressions.rs) for datetime functions
-  * create a new module [here](src/physical_plan) for other functions
-* In [src/physical_plan/aggregates](src/physical_plan/aggregates.rs), add:
-  * a new variant to `BuiltinAggregateFunction`
-  * a new entry to `FromStr` with the name of the function as called by SQL
-  * a new line in `return_type` with the expected return type of the function, given an incoming type
-  * a new line in `signature` with the signature of the function (number and types of its arguments)
-  * a new line in `create_aggregate_expr` mapping the built-in to the implementation
-  * tests to the function.
-* In [tests/sql.rs](tests/sql.rs), add a new test where the function is called through SQL against well known data and returns the expected result.
-
-## How to display plans graphically
-
-The query plans represented by `LogicalPlan` nodes can be graphically
-rendered using [Graphviz](http://www.graphviz.org/).
-
-To do so, save the output of the `display_graphviz` function to a file.:
-
-```rust
-// Create plan somehow...
-let mut output = File::create("/tmp/plan.dot")?;
-write!(output, "{}", plan.display_graphviz());
-```
-
-Then, use the `dot` command line tool to render it into a file that
-can be displayed. For example, the following command creates a
-`/tmp/plan.pdf` file:
-
-```bash
-dot -Tpdf < /tmp/plan.dot > /tmp/plan.pdf
-```
diff --git a/rust/datafusion/Dockerfile b/rust/datafusion/Dockerfile
deleted file mode 100644
index 97e82b4bbca..00000000000
--- a/rust/datafusion/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM rustlang/rust:nightly
-
-COPY format /arrow/format/
-COPY rust /arrow/rust/
-WORKDIR /arrow/rust/datafusion
-RUN cargo install --bin datafusion-cli --path .
-
-CMD ["datafusion-cli", "--data-path", "/data"]
diff --git a/rust/datafusion/README.md b/rust/datafusion/README.md
deleted file mode 100644
index c9a2562998a..00000000000
--- a/rust/datafusion/README.md
+++ /dev/null
@@ -1,356 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# DataFusion
-
-<img src="docs/images/DataFusion-Logo-Dark.svg" width="256"/>
-
-DataFusion is an extensible query execution framework, written in
-Rust, that uses [Apache Arrow](https://arrow.apache.org) as its
-in-memory format.
-
-DataFusion supports both an SQL and a DataFrame API for building
-logical query plans as well as a query optimizer and execution engine
-capable of parallel execution against partitioned data sources (CSV
-and Parquet) using threads.
-
-## Use Cases
-
-DataFusion is used to create modern, fast and efficient data
-pipelines, ETL processes, and database systems, which need the
-performance of Rust and Apache Arrow and want to provide their users
-the convenience of an SQL interface or a DataFrame API.
-
-## Why DataFusion?
-
-* *High Performance*: Leveraging Rust and Arrow's memory model, DataFusion achieves very high performance
-* *Easy to Connect*: Being part of the Apache Arrow ecosystem (Arrow, Parquet and Flight), DataFusion works well with the rest of the big data ecosystem
-* *Easy to Embed*: Allowing extension at almost any point in its design, DataFusion can be tailored for your specific usecase
-* *High Quality*:  Extensively tested, both by itself and with the rest of the Arrow ecosystem, DataFusion can be used as the foundation for production systems.
-
-## Known Uses
-
-Here are some of the projects known to use DataFusion:
-
-* [Ballista](https://github.com/ballista-compute/ballista) Distributed Compute Platform
-* [Cloudfuse Buzz](https://github.com/cloudfuse-io/buzz-rust)
-* [Cube.js](https://github.com/cube-js/cube.js)
-* [datafusion-python](https://pypi.org/project/datafusion)
-* [delta-rs](https://github.com/delta-io/delta-rs)
-* [InfluxDB IOx](https://github.com/influxdata/influxdb_iox) Time Series Database
-* [ROAPI](https://github.com/roapi/roapi)
-
-(if you know of another project, please submit a PR to add a link!)
-
-## Example Usage
-
-Run a SQL query against data stored in a CSV:
-
-```rust
-use datafusion::prelude::*;
-use arrow::util::pretty::print_batches;
-use arrow::record_batch::RecordBatch;
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-  // register the table
-  let mut ctx = ExecutionContext::new();
-  ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
-
-  // create a plan to run a SQL query
-  let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
-
-  // execute and print results
-  let results: Vec<RecordBatch> = df.collect().await?;
-  print_batches(&results)?;
-  Ok(())
-}
-```
-
-Use the DataFrame API to process data stored in a CSV:
-
-```rust
-use datafusion::prelude::*;
-use arrow::util::pretty::print_batches;
-use arrow::record_batch::RecordBatch;
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-  // create the dataframe
-  let mut ctx = ExecutionContext::new();
-  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-
-  let df = df.filter(col("a").lt_eq(col("b")))?
-           .aggregate(vec![col("a")], vec![min(col("b"))])?
-           .limit(100)?;
-
-  // execute and print results
-  let results: Vec<RecordBatch> = df.collect().await?;
-  print_batches(&results)?;
-  Ok(())
-}
-```
-
-Both of these examples will produce
-
-```text
-+---+--------+
-| a | MIN(b) |
-+---+--------+
-| 1 | 2      |
-+---+--------+
-```
-
-
-
-## Using DataFusion as a library
-
-DataFusion is [published on crates.io](https://crates.io/crates/datafusion), and is [well documented on docs.rs](https://docs.rs/datafusion/).
-
-To get started, add the following to your `Cargo.toml` file:
-
-```toml
-[dependencies]
-datafusion = "5.0.0-SNAPSHOT"
-```
-
-## Using DataFusion as a binary
-
-DataFusion also includes a simple command-line interactive SQL utility. See the [CLI reference](docs/cli.md) for more information.
-
-# Status
-
-## General
-
-- [x] SQL Parser
-- [x] SQL Query Planner
-- [x] Query Optimizer
- - [x] Constant folding
- - [x] Join Reordering
- - [x] Limit Pushdown
- - [x] Projection push down
- - [x] Predicate push down
-- [x] Type coercion
-- [x] Parallel query execution
-
-## SQL Support
-
-- [x] Projection
-- [x] Filter (WHERE)
-- [x] Filter post-aggregate (HAVING)
-- [x] Limit
-- [x] Aggregate
-- [x] Common math functions
-- [x] cast
-- [x] try_cast
-- Postgres compatible String functions
-  - [x] ascii
-  - [x] bit_length
-  - [x] btrim
-  - [x] char_length
-  - [x] character_length
-  - [x] chr
-  - [x] concat
-  - [x] concat_ws
-  - [x] initcap
-  - [x] left
-  - [x] length
-  - [x] lpad
-  - [x] ltrim
-  - [x] octet_length
-  - [x] regexp_replace
-  - [x] repeat
-  - [x] replace
-  - [x] reverse
-  - [x] right
-  - [x] rpad
-  - [x] rtrim
-  - [x] split_part
-  - [x] starts_with
-  - [x] strpos
-  - [x] substr
-  - [x] to_hex
-  - [x] translate
-  - [x] trim
-- Miscellaneous/Boolean functions
-  - [x] nullif
-- Common date/time functions
-  - [ ] Basic date functions
-  - [ ] Basic time functions
-  - [x] Basic timestamp functions
-- nested functions
-  - [x] Array of columns
-- [x] Schema Queries
-  - [x] SHOW TABLES
-  - [x] SHOW COLUMNS
-  - [x] information_schema.{tables, columns}
-  - [ ] information_schema other views
-- [x] Sorting
-- [ ] Nested types
-- [ ] Lists
-- [x] Subqueries
-- [x] Common table expressions
-- [ ] Set Operations
-  - [x] UNION ALL
-  - [ ] UNION
-  - [ ] INTERSECT
-  - [ ] MINUS
-- [x] Joins
-  - [x] INNER JOIN
-  - [ ] CROSS JOIN
-  - [ ] OUTER JOIN
-- [ ] Window
-
-## Data Sources
-
-- [x] CSV
-- [x] Parquet primitive types
-- [ ] Parquet nested types
-
-
-## Extensibility
-
-DataFusion is designed to be extensible at all points. To that end, you can provide your own custom:
-
-- [x] User Defined Functions (UDFs)
-- [x] User Defined Aggregate Functions (UDAFs)
-- [x] User Defined Table Source (`TableProvider`) for tables
-- [x] User Defined `Optimizer` passes (plan rewrites)
-- [x] User Defined `LogicalPlan` nodes
-- [x] User Defined `ExecutionPlan` nodes
-
-
-# Supported SQL
-
-This library currently supports many SQL constructs, including
-
-* `CREATE EXTERNAL TABLE X STORED AS PARQUET LOCATION '...';` to register a table's locations
-* `SELECT ... FROM ...` together with any expression
-* `ALIAS` to name an expression
-* `CAST` to change types, including e.g. `Timestamp(Nanosecond, None)`
-* most mathematical unary and binary expressions such as `+`, `/`, `sqrt`, `tan`, `>=`.
-* `WHERE` to filter
-* `GROUP BY` together with one of the following aggregations: `MIN`, `MAX`, `COUNT`, `SUM`, `AVG`
-* `ORDER BY` together with an expression and optional `ASC` or `DESC` and also optional `NULLS FIRST` or `NULLS LAST`
-
-
-## Supported Functions
-
-DataFusion strives to implement a subset of the [PostgreSQL SQL dialect](https://www.postgresql.org/docs/current/functions.html) where possible. We explicitly choose a single dialect to maximize interoperability with other tools and allow reuse of the PostgreSQL documents and tutorials as much as possible.
-
-Currently, only a subset of the PosgreSQL dialect is implemented, and we will document any deviations.
-
-## Schema Metadata / Information Schema Support
-
-DataFusion supports the showing metadata about the tables available. This information can be accessed using the views of the ISO SQL `information_schema` schema or the DataFusion specific `SHOW TABLES` and `SHOW COLUMNS` commands.
-
-More information can be found in the [Postgres docs](https://www.postgresql.org/docs/13/infoschema-schema.html)).
-
-
-To show tables available for use in DataFusion, use the `SHOW TABLES`  command or the `information_schema.tables` view:
-
-```sql
-> show tables;
-+---------------+--------------------+------------+------------+
-| table_catalog | table_schema       | table_name | table_type |
-+---------------+--------------------+------------+------------+
-| datafusion    | public             | t          | BASE TABLE |
-| datafusion    | information_schema | tables     | VIEW       |
-+---------------+--------------------+------------+------------+
-
-> select * from information_schema.tables;
-
-+---------------+--------------------+------------+--------------+
-| table_catalog | table_schema       | table_name | table_type   |
-+---------------+--------------------+------------+--------------+
-| datafusion    | public             | t          | BASE TABLE   |
-| datafusion    | information_schema | TABLES     | SYSTEM TABLE |
-+---------------+--------------------+------------+--------------+
-```
-
-To show the schema of a table in DataFusion, use the `SHOW COLUMNS`  command or the or `information_schema.columns` view:
-
-```sql
-> show columns from t;
-+---------------+--------------+------------+-------------+-----------+-------------+
-| table_catalog | table_schema | table_name | column_name | data_type | is_nullable |
-+---------------+--------------+------------+-------------+-----------+-------------+
-| datafusion    | public       | t          | a           | Int32     | NO          |
-| datafusion    | public       | t          | b           | Utf8      | NO          |
-| datafusion    | public       | t          | c           | Float32   | NO          |
-+---------------+--------------+------------+-------------+-----------+-------------+
-
->   select table_name, column_name, ordinal_position, is_nullable, data_type from information_schema.columns;
-+------------+-------------+------------------+-------------+-----------+
-| table_name | column_name | ordinal_position | is_nullable | data_type |
-+------------+-------------+------------------+-------------+-----------+
-| t          | a           | 0                | NO          | Int32     |
-| t          | b           | 1                | NO          | Utf8      |
-| t          | c           | 2                | NO          | Float32   |
-+------------+-------------+------------------+-------------+-----------+
-```
-
-
-
-## Supported Data Types
-
-DataFusion uses Arrow, and thus the Arrow type system, for query
-execution. The SQL types from
-[sqlparser-rs](https://github.com/ballista-compute/sqlparser-rs/blob/main/src/ast/data_type.rs#L57)
-are mapped to Arrow types according to the following table
-
-
-| SQL Data Type   | Arrow DataType                   |
-| --------------- | -------------------------------- |
-| `CHAR`          | `Utf8`                           |
-| `VARCHAR`       | `Utf8`                           |
-| `UUID`          | *Not yet supported*              |
-| `CLOB`          | *Not yet supported*              |
-| `BINARY`        | *Not yet supported*              |
-| `VARBINARY`     | *Not yet supported*              |
-| `DECIMAL`       | `Float64`                        |
-| `FLOAT`         | `Float32`                        |
-| `SMALLINT`      | `Int16`                          |
-| `INT`           | `Int32`                          |
-| `BIGINT`        | `Int64`                          |
-| `REAL`          | `Float64`                        |
-| `DOUBLE`        | `Float64`                        |
-| `BOOLEAN`       | `Boolean`                        |
-| `DATE`          | `Date32`                         |
-| `TIME`          | `Time64(TimeUnit::Millisecond)`  |
-| `TIMESTAMP`     | `Date64`                         |
-| `INTERVAL`      | *Not yet supported*              |
-| `REGCLASS`      | *Not yet supported*              |
-| `TEXT`          | *Not yet supported*              |
-| `BYTEA`         | *Not yet supported*              |
-| `CUSTOM`        | *Not yet supported*              |
-| `ARRAY`         | *Not yet supported*              |
-
-
-# Architecture Overview
-
-There is no formal document describing DataFusion's architecture yet, but the following presentations offer a good overview of its different components and how they interact together.
-
-* (March 2021): The DataFusion architecture is described in *Query Engine Design and the Rust-Based DataFusion in Apache Arrow*: [recording](https://www.youtube.com/watch?v=K6eCAVEk4kU) (DataFusion content starts ~ 15 minutes in) and [slides](https://www.slideshare.net/influxdata/influxdb-iox-tech-talks-query-engine-design-and-the-rustbased-datafusion-in-apache-arrow-244161934)
-* (Feburary 2021): How DataFusion is used within the Ballista Project is described in *Ballista: Distributed Compute with Rust and Apache Arrow: [recording](https://www.youtube.com/watch?v=ZZHQaOap9pQ)
-
-
-# Developer's guide
-
-Please see [Developers Guide](DEVELOPERS.md) for information about developing DataFusion.
diff --git a/rust/datafusion/benches/aggregate_query_sql.rs b/rust/datafusion/benches/aggregate_query_sql.rs
deleted file mode 100644
index 8f1a97e198d..00000000000
--- a/rust/datafusion/benches/aggregate_query_sql.rs
+++ /dev/null
@@ -1,248 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use rand::{rngs::StdRng, seq::SliceRandom, Rng, SeedableRng};
-use std::sync::{Arc, Mutex};
-use tokio::runtime::Runtime;
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{
-    array::Float32Array,
-    array::Float64Array,
-    array::StringArray,
-    array::UInt64Array,
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-
-use datafusion::datasource::MemTable;
-use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
-
-pub fn seedable_rng() -> StdRng {
-    StdRng::seed_from_u64(42)
-}
-
-fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
-    // execute the query
-    let df = ctx.lock().unwrap().sql(&sql).unwrap();
-    rt.block_on(df.collect()).unwrap();
-}
-
-fn create_data(size: usize, null_density: f64) -> Vec<Option<f64>> {
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f64>() > null_density {
-                None
-            } else {
-                Some(rng.gen::<f64>())
-            }
-        })
-        .collect()
-}
-
-fn create_integer_data(size: usize, value_density: f64) -> Vec<Option<u64>> {
-    // use random numbers to avoid spurious compiler optimizations wrt to branching
-    let mut rng = seedable_rng();
-
-    (0..size)
-        .map(|_| {
-            if rng.gen::<f64>() > value_density {
-                None
-            } else {
-                Some(rng.gen::<u64>())
-            }
-        })
-        .collect()
-}
-
-fn create_context(
-    partitions_len: usize,
-    array_len: usize,
-    batch_size: usize,
-) -> Result<Arc<Mutex<ExecutionContext>>> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("utf8", DataType::Utf8, false),
-        Field::new("f32", DataType::Float32, false),
-        Field::new("f64", DataType::Float64, false),
-        // This field will contain integers randomly selected from a large
-        // range of values, i.e. [0, u64::MAX], such that there are none (or
-        // very few) repeated values.
-        Field::new("u64_wide", DataType::UInt64, false),
-        // This field will contain integers randomly selected from a narrow
-        // range of values such that there are a few distinct values, but they
-        // are repeated often.
-        Field::new("u64_narrow", DataType::UInt64, false),
-    ]));
-
-    let mut rng = seedable_rng();
-
-    // define data.
-    let partitions = (0..partitions_len)
-        .map(|_| {
-            (0..array_len / batch_size / partitions_len)
-                .map(|i| {
-                    // the 4 here is the number of different keys.
-                    // a higher number increase sparseness
-                    let vs = vec![0, 1, 2, 3];
-                    let keys: Vec<String> = (0..batch_size)
-                        .map(
-                            // use random numbers to avoid spurious compiler optimizations wrt to branching
-                            |_| format!("hi{:?}", vs.choose(&mut rng)),
-                        )
-                        .collect();
-                    let keys: Vec<&str> = keys.iter().map(|e| &**e).collect();
-
-                    let values = create_data(batch_size, 0.5);
-
-                    // Integer values between [0, u64::MAX].
-                    let integer_values_wide = create_integer_data(batch_size, 9.0);
-
-                    // Integer values between [0, 9].
-                    let integer_values_narrow_choices = (0..10).collect::<Vec<u64>>();
-                    let integer_values_narrow = (0..batch_size)
-                        .map(|_| *integer_values_narrow_choices.choose(&mut rng).unwrap())
-                        .collect::<Vec<u64>>();
-
-                    RecordBatch::try_new(
-                        schema.clone(),
-                        vec![
-                            Arc::new(StringArray::from(keys)),
-                            Arc::new(Float32Array::from(vec![i as f32; batch_size])),
-                            Arc::new(Float64Array::from(values)),
-                            Arc::new(UInt64Array::from(integer_values_wide)),
-                            Arc::new(UInt64Array::from(integer_values_narrow)),
-                        ],
-                    )
-                    .unwrap()
-                })
-                .collect::<Vec<_>>()
-        })
-        .collect::<Vec<_>>();
-
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, partitions)?;
-    ctx.register_table("t", Arc::new(provider))?;
-
-    Ok(Arc::new(Mutex::new(ctx)))
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let partitions_len = 8;
-    let array_len = 32768 * 2; // 2^16
-    let batch_size = 2048; // 2^11
-    let ctx = create_context(partitions_len, array_len, batch_size).unwrap();
-
-    c.bench_function("aggregate_query_no_group_by 15 12", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_no_group_by_min_max_f64", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT MIN(f64), MAX(f64) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_no_group_by_count_distinct_wide", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT COUNT(DISTINCT u64_wide) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_no_group_by_count_distinct_narrow", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT COUNT(DISTINCT u64_narrow) \
-                 FROM t",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT utf8, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t GROUP BY utf8",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by_with_filter", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT utf8, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t \
-                 WHERE f32 > 10 AND f32 < 20 GROUP BY utf8",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by_u64 15 12", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT u64_narrow, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t GROUP BY u64_narrow",
-            )
-        })
-    });
-
-    c.bench_function("aggregate_query_group_by_with_filter_u64 15 12", |b| {
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT u64_narrow, MIN(f64), AVG(f64), COUNT(f64) \
-                 FROM t \
-                 WHERE f32 > 10 AND f32 < 20 GROUP BY u64_narrow",
-            )
-        })
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/filter_query_sql.rs b/rust/datafusion/benches/filter_query_sql.rs
deleted file mode 100644
index 8600bdc88c6..00000000000
--- a/rust/datafusion/benches/filter_query_sql.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::{
-    array::{Float32Array, Float64Array},
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion::prelude::ExecutionContext;
-use datafusion::{datasource::MemTable, error::Result};
-use futures::executor::block_on;
-use std::sync::Arc;
-
-async fn query(ctx: &mut ExecutionContext, sql: &str) {
-    // execute the query
-    let df = ctx.sql(&sql).unwrap();
-    let results = df.collect().await.unwrap();
-
-    // display the relation
-    for _batch in results {
-        // println!("num_rows: {}", _batch.num_rows());
-    }
-}
-
-fn create_context(array_len: usize, batch_size: usize) -> Result<ExecutionContext> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("f32", DataType::Float32, false),
-        Field::new("f64", DataType::Float64, false),
-    ]));
-
-    // define data.
-    let batches = (0..array_len / batch_size)
-        .map(|i| {
-            RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Float32Array::from(vec![i as f32; batch_size])),
-                    Arc::new(Float64Array::from(vec![i as f64; batch_size])),
-                ],
-            )
-            .unwrap()
-        })
-        .collect::<Vec<_>>();
-
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![batches])?;
-    ctx.register_table("t", Arc::new(provider))?;
-
-    Ok(ctx)
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let array_len = 524_288; // 2^19
-    let batch_size = 4096; // 2^12
-
-    c.bench_function("filter_array", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| block_on(query(&mut ctx, "select f32, f64 from t where f32 >= f64")))
-    });
-
-    c.bench_function("filter_scalar", |b| {
-        let mut ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| {
-            block_on(query(
-                &mut ctx,
-                "select f32, f64 from t where f32 >= 250 and f64 > 250",
-            ))
-        })
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/math_query_sql.rs b/rust/datafusion/benches/math_query_sql.rs
deleted file mode 100644
index 1aaa2d3403c..00000000000
--- a/rust/datafusion/benches/math_query_sql.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use std::sync::{Arc, Mutex};
-
-use tokio::runtime::Runtime;
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{
-    array::{Float32Array, Float64Array},
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-use datafusion::error::Result;
-
-use datafusion::datasource::MemTable;
-use datafusion::execution::context::ExecutionContext;
-
-fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
-    // execute the query
-    let df = ctx.lock().unwrap().sql(&sql).unwrap();
-    rt.block_on(df.collect()).unwrap();
-}
-
-fn create_context(
-    array_len: usize,
-    batch_size: usize,
-) -> Result<Arc<Mutex<ExecutionContext>>> {
-    // define a schema.
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("f32", DataType::Float32, false),
-        Field::new("f64", DataType::Float64, false),
-    ]));
-
-    // define data.
-    let batches = (0..array_len / batch_size)
-        .map(|i| {
-            RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Float32Array::from(vec![i as f32; batch_size])),
-                    Arc::new(Float64Array::from(vec![i as f64; batch_size])),
-                ],
-            )
-            .unwrap()
-        })
-        .collect::<Vec<_>>();
-
-    let mut ctx = ExecutionContext::new();
-
-    // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
-    let provider = MemTable::try_new(schema, vec![batches])?;
-    ctx.register_table("t", Arc::new(provider))?;
-
-    Ok(Arc::new(Mutex::new(ctx)))
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    let array_len = 1048576; // 2^20
-    let batch_size = 512; // 2^9
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_20_9", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-
-    let array_len = 1048576; // 2^20
-    let batch_size = 4096; // 2^12
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_20_12", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-
-    let array_len = 4194304; // 2^22
-    let batch_size = 4096; // 2^12
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_22_12", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-
-    let array_len = 4194304; // 2^22
-    let batch_size = 16384; // 2^14
-    let ctx = create_context(array_len, batch_size).unwrap();
-    c.bench_function("sqrt_22_14", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/scalar.rs b/rust/datafusion/benches/scalar.rs
deleted file mode 100644
index 30f21a964d5..00000000000
--- a/rust/datafusion/benches/scalar.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion::scalar::ScalarValue;
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("to_array_of_size 100000", |b| {
-        let scalar = ScalarValue::Int32(Some(100));
-
-        b.iter(|| assert_eq!(scalar.to_array_of_size(100000).null_count(), 0))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/benches/sort_limit_query_sql.rs b/rust/datafusion/benches/sort_limit_query_sql.rs
deleted file mode 100644
index be065f32e00..00000000000
--- a/rust/datafusion/benches/sort_limit_query_sql.rs
+++ /dev/null
@@ -1,148 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::Criterion;
-
-use std::sync::{Arc, Mutex};
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::datatypes::{DataType, Field, Schema};
-
-use datafusion::datasource::{CsvFile, CsvReadOptions, MemTable};
-use datafusion::execution::context::ExecutionContext;
-
-use tokio::runtime::Runtime;
-
-fn query(ctx: Arc<Mutex<ExecutionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
-    // execute the query
-    let df = ctx.lock().unwrap().sql(&sql).unwrap();
-    rt.block_on(df.collect()).unwrap();
-}
-
-fn create_context() -> Arc<Mutex<ExecutionContext>> {
-    // define schema for data source (csv file)
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]));
-
-    let testdata = arrow::util::test_util::arrow_test_data();
-
-    // create CSV data source
-    let csv = CsvFile::try_new(
-        &format!("{}/csv/aggregate_test_100.csv", testdata),
-        CsvReadOptions::new().schema(&schema),
-    )
-    .unwrap();
-
-    let rt = Runtime::new().unwrap();
-
-    let ctx_holder: Arc<Mutex<Vec<Arc<Mutex<ExecutionContext>>>>> =
-        Arc::new(Mutex::new(vec![]));
-
-    let partitions = 16;
-
-    rt.block_on(async {
-        let mem_table = MemTable::load(Arc::new(csv), 16 * 1024, Some(partitions))
-            .await
-            .unwrap();
-
-        // create local execution context
-        let mut ctx = ExecutionContext::new();
-        ctx.state.lock().unwrap().config.concurrency = 1;
-        ctx.register_table("aggregate_test_100", Arc::new(mem_table))
-            .unwrap();
-        ctx_holder.lock().unwrap().push(Arc::new(Mutex::new(ctx)))
-    });
-
-    let ctx = ctx_holder.lock().unwrap().get(0).unwrap().clone();
-    ctx
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("sort_and_limit_by_int", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c6, c10 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c6
-                 LIMIT 10",
-            )
-        })
-    });
-
-    c.bench_function("sort_and_limit_by_float", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c12 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c13
-                 LIMIT 10",
-            )
-        })
-    });
-
-    c.bench_function("sort_and_limit_lex_by_int", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c6, c10 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c6 DESC, c10 DESC
-                 LIMIT 10",
-            )
-        })
-    });
-
-    c.bench_function("sort_and_limit_lex_by_string", |b| {
-        let ctx = create_context();
-        b.iter(|| {
-            query(
-                ctx.clone(),
-                "SELECT c1, c13, c6, c10 \
-                 FROM aggregate_test_100 \
-                 ORDER BY c1, c13
-                 LIMIT 10",
-            )
-        })
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/rust/datafusion/docs/cli.md b/rust/datafusion/docs/cli.md
deleted file mode 100644
index aeacdeee04a..00000000000
--- a/rust/datafusion/docs/cli.md
+++ /dev/null
@@ -1,95 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# DataFusion CLI
-
-The DataFusion CLI is a command-line interactive SQL utility that allows queries to be executed against CSV and Parquet files. It is a convenient way to try DataFusion out with your own data sources.
-
-## Run using Cargo
-
-Use the following commands to clone this repository and run the CLI. This will require the Rust toolchain to be installed. Rust can be installed from [https://rustup.rs/](https://rustup.rs/).
-
-```sh
-git clone https://github.com/apache/arrow
-cd arrow/rust/datafusion
-cargo run --bin datafusion-cli --release
-```
-
-## Run using Docker
-
-Use the following commands to clone this repository and build a Docker image containing the CLI tool. Note that there is `.dockerignore` file in the root of the repository that may need to be deleted in order for this to work.
-
-```sh
-git clone https://github.com/apache/arrow
-cd arrow
-docker build -f rust/datafusion/Dockerfile . --tag datafusion-cli
-docker run -it -v $(your_data_location):/data datafusion-cli
-```
-
-## Usage
-
-```
-USAGE:
-    datafusion-cli [OPTIONS]
-
-FLAGS:
-    -h, --help       Prints help information
-    -V, --version    Prints version information
-
-OPTIONS:
-    -c, --batch-size <batch-size>    The batch size of each query, default value is 1048576
-    -p, --data-path <data-path>      Path to your data, default to current directory
-```
-
-Type `exit` or `quit` to exit the CLI.
-
-## Registering Parquet Data Sources
-
-Parquet data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is not necessary to provide schema information for Parquet files.
-
-```sql
-CREATE EXTERNAL TABLE taxi 
-STORED AS PARQUET
-LOCATION '/mnt/nyctaxi/tripdata.parquet';
-```
-
-## Registering CSV Data Sources
-
-CSV data sources can be registered by executing a `CREATE EXTERNAL TABLE` SQL statement. It is necessary to provide schema information for CSV files since DataFusion does not automatically infer the schema when using SQL to query CSV files.
-
-```sql
-CREATE EXTERNAL TABLE test (
-    c1  VARCHAR NOT NULL,
-    c2  INT NOT NULL,
-    c3  SMALLINT NOT NULL,
-    c4  SMALLINT NOT NULL,
-    c5  INT NOT NULL,
-    c6  BIGINT NOT NULL,
-    c7  SMALLINT NOT NULL,
-    c8  INT NOT NULL,
-    c9  BIGINT NOT NULL,
-    c10 VARCHAR NOT NULL,
-    c11 FLOAT NOT NULL,
-    c12 DOUBLE NOT NULL,
-    c13 VARCHAR NOT NULL
-)
-STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_test_100.csv';
-```
diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Dark.png b/rust/datafusion/docs/images/DataFusion-Logo-Dark.png
deleted file mode 100644
index cc60f12a0e4f5a3bba66a01f377adda7a03c8113..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20134
zcmXtgby$?&^Y+p$DP0RoNq5(hOQ%XIE!`<40xn2{)Y2(}AV{}#NF&{iNO!*vpYQwo
zgUbuAefFF)bDx=e?wN-OEe$0+Y)Whp2!sbyme&D+kP|^5Bvnjw;C~_$F&Kb<uw0Z4
z+(95xrl%hym=5F!1fmDQ<Yo1IGWJ`XqaeEh%a<l6f3K9~?2_FuBXxETYS{SRlKeJK
zqUY{-`<JK<^KF_q895IPM*5t{R&-{&c#8Bm4#tuxDtZX!(GYgHgkGovo>`|3$A&ev
zsB8%2)q2JX8I~*m2e1rp;eN6#JI0H(i(@ek6U1Ee*Ed6Y(|VrMx<T2+e{YdUB~Lx{
zFP)q0zqOv<zTfWtOh$j-scnbcKIAiC6uw%mZ=qb_nk0t&Bl~#Lk|JDoaEkznX`3d_
z9q`YB7F1DqiL>GQ;*l%G?5er&k`^~c#~aC?WqY@GkSC-1ora@`itoF_*N#*{6ybr>
zib=n;?Xce_9Z&A^;kI~v2Zs`j3vnZnqs-Xv-aEC#=1=0bGcn})DpnY9`cP#ds?8Pd
zqsEph=Y(XZ(Vw2Bn}rgAo>3RZ_;e~`g73E&O{ju*pdNJX2vC-&iaJeU3{9bzc<rEd
zv*1XyJcvLC!?b%F3mp172Fsjp2(w)dbQt;L4C0`QP0!$O(`jvrQxuoQ7?g4;?66hy
z78@KI_l#FtjhO<;2m9?`R>9n_99`NU91l)^LIhWy&scxbMu%eV3Uhz=J;0VRY0zi_
z)w2I<!&@N?W>hM%Fv3wb%zKA9kK(eBL!-`v<}$Xj7aO1X_-cC@N;?t$rVT4{p}uS0
z7jI4$6I>v7(0o=It4@GbBTzB5I3?F?)&%F8Yl0h2J7D))^U6)GF@GJbm7@PMtcl|!
z1$d%f)c&8`99zvv-COZBNB_E&ty3%VabgWjuw>yMo=ZO5Z0=5;*WtNsxSjb{<Oj%C
zkl<-h268Hye+gl5U7^+|vJ$H1s2YkY>1o2w*aBo<(|&`Vx!Q#tz@NL|%x$&Eb_t{K
zT$ZKJ7_77d{+RSGNtD9YIZS;vaesui)gG+01=L`Df^H}&Uo>1J?<TP-ct2u-Lp@Zs
z&H18zI88v81ST0ml_)C-rnSaqK2di#cT$o~1*o@u!ylwjR<{5B>_tccOR@As^ru>p
zYgqa9NIG2TIWeASJ!=RupL)E9O7*JBD?n^(-7gp|X$*w9_#=2EFkH$j=>VV%^1f8-
zfs?x-HAnvQFH3a<Ipb+-m8U`1w(l}R$$WIkkGZU$4_%8&vrsH7GlN5Mk=DB{qx2>Y
zkbSDmbc2Gy&fFG`F@KC<(l*O9;dBw`zQS~Ycb$B=BXI%iy}POa6y?eY6Wcv2hY9ok
z9ySjgifl+?$~IVI_Zzq|SKVc#d{jAvutla+dR5|f+S(0G&twxo;oYy9m@T?aR~e-L
zwM6&HiH5Ebaob3i?(|Ii<3Y;_E6iW6M!#{vgBNi}+w(;L4<Nl}V@8sRRPl;8d5tXZ
zYf`K3!zO3S+hygat%Ej$5tr0G_!`08IjYGWS<F}A5|ir$*o<&061F1dvB$71=R3Mh
z&55;X^L(j{cr0+(&smy9hvt~sx=6Qdc4)-F#t;o0dWKor4RPUQy^PP6o85M~>@mlT
z-X;%(?DaLx_O-YY##$|%G1Z|X0QZpxpPLiaKl9zjDs?>LS!e-{>sxii934t`dIR$N
z!^tSgw0F4of<2QVEc@qyepNVd1l}{K8uGMhoJ73GsGKHTgI9jcu{ohOj+QY<9C4Gn
z6<>!}%uNVY*4&9sC6U<p)Wou%Hiq4xt1Pr%pNpSk^oM6$yg9=&Xd)(~RUWv;#jnhD
zI+_wQiZ5>%lBU$av#bythrQi7xtq^E`1pKTVkRjX@eJHUp<yC`mr(qen33L6g!lGU
z99$R*6v9rpI>Z`sedlR;?v#l(eDOV<jjI^Y#pNitWH7-2Mc(E*sT*Z<X9ec7gRckE
zp$UWGQo#lln&Gfrk4W<4xBveNrI@>^s}i-JNN^6ZK@ffOz3j(^1vc<$9`?)*?qfIK
zESHKddqOBCFWfW|_G`r8(y%%g3%F0BXB{!1sYs`AIxMYRz;GmME;mGg3*w|c_e*H@
z#eB~_@YGcnhy*+BBYqCAVXj$~*%s;+<aj3#K>d`Ry9<nuPOx1UE;ve_@3TSB`sPyt
zKp0UzID0C^Gu4M#--Ufbfe&&phe=c*=8Fgg6JFs~+G4}F{-G$l_1zq7Cs1l*A7^E6
zN$h_KFo9x%7stJMDK^TCQt<1ov0*g%3>1No=`z*gudAB|f7F3sVe!?RuN3;n1Momw
z`1Ej7JO7=CT64zvfG`(T5%<_rKp`Es=Vddi`AsS-q|aWF26Z#dJ=#jgTU8=m!^P*e
z*#D*?rhqMNlW7i|fumsBT&J%IG^b~r;2nQ%?;qPdy@nPj1NVNiT*a~DiRtHw6ijdz
zG@PEfSai?nvyfil2}y!FM<)*mh(lpiWV_WaS*O*nrf;7z%O~(>82fAfumT})T5y;M
zinY2}w=K;s3ERwa5@3i9g06ynAJIKeJ=fy?ruJnU@G$+6+mwBqs{wz2t);FMKM=&t
zKEsUMege*Up{wjROAu)Jwdv7^!oH?Rx8TTqYBv)SO7NLHBo)_fD*7@|Kp@PL*tcQ`
z$3NB}QDas3sun<H)GzG;_oPOTB)|BpZEr_1un-R_ukEP<n>#jEdVdP)k4pTU`N`Xa
z;r=FrBZQyaaj5$Z(JqNniHTAPTW~R`2b2Z!!Molc#{{zh=RTE)^F04;9||CWk~O^k
zoK|$twDVB4kx{i-Faf~!h&W7B#FjeZ0Tn)HCW+2t8ooLPM5bi39nQ|ty-^8hnHi=z
zUu+SYfj;0VW^CukR*cSA$%1^3$W(qyvbBFdc0LP4$Qx!ch+WWebh?apM2mr2Ec?vI
zJpd~hL<43Zbe>e`(Z!X%AO_+f*YoLU1~TVbEQaq4q8-PsmpRcEnQ>D5sJx;Fb08Do
z4%&9IQ><^7ogyzYcHP#c9P3&5Hc<R%W3$DJD8GU`#blE_Ptb{57qp=OU+`J-QdGbD
z$mJNNX*`%N6)gS_WriKt=aiFi*L;@#S<TR{Y5@{6T#U?^vr}t(>q_r}DYO7^K()$M
z8Gx=OSt)!R*PB=%wEj^9kmbh$TUXB7kh|7rA7JPfk!>L*9LM+LWM&FY(SJ=ak|#vS
zZL{b+7EW%u^8R<a2X>p4?o<m)F?YOJTHm^Dnmwqr39EmrupIAQszin5OJ{Li03qJu
z*xVzoMsd#KD!yulb+p_z0smM9ZO39#DXO}^V}z5z6d=p4_+z4u<0ZVIXHl)6VZt_x
zeYA=8MRB0^d?81+;7?8xuM5y2nGB_|Y%av;0%-WnSBZbsHjeAV9!f%SBTYXw#J}ky
zs<sV97t&Wb<h^?*XgZDlt{pf%(b|`&mL*58ZhA|CvzKa-DctOq=&9_?j-|%ujY7dm
zJw;Sx!M&6vJ@T}gd#siK?zGPZF?Weuo+tLTT^Xm5uK`!0%d)u~kN(D?evI!6ipCoT
zD}oB4mV{6NWo~!o&AjzZiTyg82vXCYIqI#9JBbBSdVrPOTank!uB7X+jIdwV>$9KC
zDtR6Jz;J({g-fb!R1PP<Di&;c25!koF((@9W*r%{$v83)4g-i@qSyES9__!P!NUAm
zbDutBv-K-FRacF(ap8(haUnH-IZncegyqTpT`-)l>?D;9EgPTaxrEr<FLV(IpGYb&
zn~2<Jc!tyMGaM=oI?u)!cTGH@;Mtn{9(D6BQvt6Z3o}61;<E9r6GT)KHrdiWdBPxM
zFhl7P-#Pm+5*<7MrcQ51w4`koYdgT>oLLfCo;<Qqalb~{dD&>IV8at+%o;5QfK}vI
z_nv_2jI=k-W++sM>D$}pNQ1AeR1it2F42Rvi`PPw2=CG`E#D}R<gW&xa8K&U8X8Jx
zL!lW>9$1Y`HO>YCCB(SJA5Yf8QK3SF%RavmFNi#nP7ppOP=|bBa~%nPUt-B&ymOT;
zb+D+g2n4tp6XLc~ragw4&&xf5-`;c1$#6hCogBy6eMeX9iOum^2ol$xpSovdbsFcc
zZ<gJ1&gFd0{@FSLrFM@;aOUGU^YyWJt)4MeMOccV$}NNnM4f;CAm5&0rtUL8itR*i
zv@xu9Fkg?`S&~1A=Cr-!$SC&V_B)kA<jViPtQ#PF;wqWo`CE5XcV{9b(C3Qi^}sr=
z;44LY+_%^(qQO7EPZtN?CgX+l$Sr_=;C70uw+MlS%xY|OLM?;u{_X?xkun7zy(|-I
zUKXdevRy_M;7a#mt$R?V_^Va5ap!9T_|L1kE6n(J{={Q8Z7h$9Cq8uT6W!B2Pd~qf
z%j5On;ya@xQ1p*A=6qntPi8BSb}oE6S(52T5jjJ2b$tIGiip!RrB1NKs?}Ze=M8sL
zD+$4b(H2*ZR-p^Rh(3mOZ6(l>iNE7NA=`=ew{S#Ua;1)1<SXY$f3PUV59e>wR-GgX
zwu4(q-6ICY-s2KL9#dZZ&)QmWU=O{luaaDu&0KXdHyQ+Y1~8%Fp`PaBK=HC-Qr5P2
ziCeCP#*qru0Rb4YA49J~Qeq>U-6aWZO4jQbF=%tv8B6gYqf1^0pdyuL;s8H$9pPvc
zW$Cg5Xkd&^Q;ZH$uo9I}z}pYKOoO_Xf9fj)S9=|GJ7ZD02MsQbs&k2m%tVHsz?!7)
z^ZasqSzSSAJL~0eag2`cg%p!quMh7otLL=#U-EciK6^i<1Pt_;Fe6zv?4KQ{LNT`k
zrv*Nal(JR3FfoOw?4M3Lh6;bOKF<HnEQ>;1iN4#FWa05<Rpch1Ss7?^oxX^hCue8g
zTuGxJh+ZHOOGoh;@&L2FHu6c7HKl96z56H}B2}OZ0$J|JJW)Fndna8B1tIpP(lNi*
z4L)2T3dGF&RDi)9W8O*W$XCyt<Zsh_K3><E6ZAyDuxxJO@*9i}<?sw77fwvaULaTU
zce+dP6KdKmZ6DOU!|rG1&+JG*lylr=cvdJ-oqT2Yo=BzEnvbKJ5s|(c=ujS6A!Hek
z$Ly=Kx_(Cy?hPcafi@F)VZ1{xYu1J^LUSF|jrjaYraY7lQ1_f&2Fgs00{nX#)I9$c
zU(bJB>O(<X)5UL($Xdq|LV#HSjG_I7lRZp<fd&3BCEb84$33WRIVc#UTrGv9q;b;}
z5yW+Zh}q^<(yMfw4;%ZRM_wkIk&CDdx!j6Vi>fp*56jbGnhrPC2y^iT>{8cJ{Xp;p
z*cwjk9WfZt3Lo=JE2$C?`iAZtUhxL6GniUw(D8i+;8O}Fv*ma+7kWNQvWk^Q81XWU
z2grw_b^DJAzj*VQhioNA<wl<aQC1)SU?;%xk%lH$j<@0b#-s6SPGql0g>1epo6D70
z8iQt52Ad(i(Lz3me;eL0JE0NHP-wuyT>6rEYKZN3*3XuR=32TygrY!btOKv_3ajTN
zN~Oc|@uC8<Fk#zdGoLM1!QTDH<(PcEX*O;f)@2(a5Lc~fwuXI`m6o!hlE0eA_?vO<
zfj8l~(-*gM;UX{n<%bI?f?rYms-k0bW1i+w+K6aoX&|16*)Iv%f1&`DJ^;^}&SQh*
zjR<oEH~2R8_8mBBCBf9aa0!Kv-Cqfl6p6l#l7r5rz(Y5P<puF&Q-tFEc(m1q34OtD
zfEsksirnz<#uNy;e&DqB{~Tv_R0b;+*^mD;)Avxcrq3~ddD1_%H<9x@#vvWAJ&M5F
z)Z+dZqe5*8T&!9`X(QTl@|#jB12Z?lVqq8Zc+Xn8C&1>WNaYIR57|rcSDxK|sEJJZ
zxrAtAX=rHf^Ra8P*RDCgnZ!7faj%;IBAR}B1qLYV<xCrXeMGkSu^L}jrn))m$uqk%
zMKP6@uQ!whQgcL!?(!^;IP$Bby>N~C3g#OnnsYT7=6>v;84-Z&1r9gDbX9I*QYxoD
zdqk1|MbtJUmV#Dmh{G?0CHhJYu!`xI%S6QCua~uv!VhAflFfAam3<l4e=(dXUL=Wg
zn`$i(LzRhvC`QXjRNpU^I)a31RH13mV1lw19V7Yk)%aw(2Rwh0Fpql0QC?WTc3L71
zuX@P_pv&PQ8`>AlncZ3mYrH_ja$8iI>s4R6SO*GXF@AV;IGbo9!1wLCwEuR?oD3{<
zjb!@0w<0;BkB3gMF@PyP)v(Z{%NvG1F>gQkMm|0`g-=w-W9|!K2mL$>EPiMn-ZA$I
z9_etCT&zCT&La#|&g?KvTZ?1$Cgs<+xWW_r8ieXqX7NMqIZxYxe}|khRfzqBlEYcZ
z+f`nRlP2I7{FJeP>gWjH6`5uyeR^66U4A4zgQXi%x^4vhR<r)&f?69P-KLwbT+H(u
zC@DG_WF}R552vCGl_UFYUn$~-McM7MgKiyb?kN(sr-f$zTj@)|w?A*JhArLuApCT~
zXqwyb8znyJgi(GNP#j2&cMHEwZQ`XaydcEt_Zc5ev?6z_qG$;J&m0aAinT1n8hFph
zS=09Rl+-D70|gtF5dPoZVhZ)EI9hE9luDZ1)FcuLHLx8yB)BlJx*<R%fB`Zd<XTwo
z8exv;pD4%s8c8yTT5sC{=n-B?iDG?<N439@sez&n)Q_})u>zFc5LZh`iOUJqAkPNw
z8D$(40TMRbXP1XoB?=%bm5`OK3DNm6V}ZMHjY0TGS|pA#sG3BPhm>&-`AER7*u+Q@
zK=Vkm$WM!>%F@Ta*O62`p(qZ__IfK<0eoArGG>1f%Us-p3@>nObo%ReWG8!-8{a)6
ze}##%XZ6>nTVH<B%f%xo#PeBvE$8r`qksQO>?H&d&sAh}{6>Zsq&D|0eq+RazET$#
zxvyFl*o&VUq~ejA3RE?61o%eAcz1o;b_<z;!J(q+zYVBZ0)=>S?ejoyz}oMKHLH2e
zdusEOY(A5-2y3YkLE#2!1ST^d0)LM<g=7L|B3&k5q-9l7_WE<fGE`uX0+%8I0BKIJ
zmKoI<^<KwJH8e`)z?RIbtaHP^Gb(bw{ydg2&z~aPB2ij80q?(K_wqV5Cn}PxLw4(V
zY%*>F=04!PWPtJ+R5~WbK`x+A2M-7Dzg|Pma7>+kUp3f4eUZxXA%Bt=!1shLzuP#s
z2+}74D6YZ_$MxpgS9txldnCtjQyc?1oX{wS`!sMPg-Bi;#8Y<U$;hfzl5x(4^-YXD
z5!n@ud>!Zo=bFJuahCjUI>P^LopmiDiv6R~+d_8GKc+yTmjs2@pS+JBya}JrL8L2m
zCnAlg3;mT!Y^S~A?EbUJEcI#;`F3u8(tIvwL$!8N;%b^zt0nq)wm`cbPGN5m7Hk#N
z46CAzgaN=I$5PTDQZ0JE-u3iJu2$YD{#kvl;%W_*tvTc|Jp_#G!oeKKh>cE38sFsp
zJ3Hh0{@PtTjZz8h)0XA<cy)&BUvEEVVn}+tGfOg^R#|+}f4&q~CS8{pc(){p+EcO|
zeXdp;oj)nX3V|v8w@I{uNY`Mq7(>h0e%u0zYonYv2#xI1*I&eO^zia6WEO38WPCxU
zuB|gjvm@rmIEpNR>U^u4U1_xW?|G&I=5>&E{rrAkM&6`BYT}vNu$|wKBEi^(@ExsE
zJtst+F4iYlA76YIDI-`86Pyf={<{9~fgjW)cQjroVMsQ}<Iz-QMSMJ&Ruq}QS2Q<6
z4}od=me`{JTZlgj?~b)t;EL&6ClU@LMsmr-SB_i<-%!`NPW--mH*_(TxccD_(a7h-
zF|ItGYjWXUyOV8oiC*;>>$}s_@0|ukK7bOr&01(-ZpI3UuziXV2A_?)Oyt2mCh}{A
zSX*$e(GL=)T%9g+qwfu3Lq(R=IL`gN>Zm^fCZx4F*B}|I|EvsYJeir~rQ%27QCOQ<
z6w6RY_%LF_%ozY!#67q4>ygUCIlkA3j^G+c3g!p>B7ioCiKs9kbVrb)9Iv7v#@_|!
zG$W&FnUZO_J6#yoc%@9eD-FjMnB3)ioj~+`r*BCWiXN|C-y;g*Ac{{HI)Yp87T!_(
zey{Ez$sP0nRPc>LU8n`q8%+^~0NXzBw$jud*dQ+I-gxuqF8#3g_|SIqpxI7+TpuBt
zhsfiXU#t4DqxpMQL!+h=s@(Hu%S0WOVc&(5dGjreIq`zJg!5^vKG(C|>z~tSqES{a
z5Us@WtFPBvWDVeCCz$c@>dib{c`_Zk_R5mnBjoK#Tbf!MpMQSSZ(LmY8k<h1TQA#&
zTJIm&kLRY_Zq~0($d(Vs%f4lx9MwIuv3HJgAWZ<RunH=W=pWhnaC#3-Ol5siS1Ko7
z&nPYi<oqSy<28jPOlG%_FGY$tVsva~s9!c4hJzJ?R4q4HtN<#eF7CLHlOC7Yi8?f*
z<MdG%M4TuU6zy)u*}=`Y;P~e_^;77NSkS*mqac*|<*Tjt(<ArmZ5Iaqc6*(}4yExk
z7QPwJ!sfXkd(h!muD=Xu-Q|T-5``QnvH1*kzvCe!W4bN<SS1>U*wr772j~WWh9EA*
z#+D!g>3i0sD5>Gb8O$JBF9#K334`!l!skP1&QQ?XTHH=oyu*}d-bMiv4I&i20MkA9
z+|7PWTkgc53w-oLZ1-k6E&teRklFZt#s+cjJ8G?1$4j%)w3Cq-q{5k5E7~MUKp2p1
zm3?&Wtk6V{+2B$i(*$FI@HZddA+Lwb|IxAHSX*;7?~mLSv3-cT4O_lt3A%T0`}CFY
za=?>~4I=$;qGVKCCU^moxZlsdy*pgEN7{Zw28wLvwG8mN)(&+ymPqIsHITr@H*VN&
zvIqDM=7P%!My~YD`@?rdKn$82YnB4{&aFfb@pKQrHtAr}kNe$&>}^4-u@COwXQzY?
zC2mmzPui}gY_fx#uV;PspB(|A;&W9rt7C9ACL)=5+yb)Pt@Nm1`FoU`f%awRVrza<
z<%BCK3nSUsal_Q@F}}@^z@vt1{7)&x(|{L9^3W6Zpwn4S?tjy5e$L0vfd@@Z^|t9w
zfZMrozb-7fCZ*5B`Aw3sGGpzsrTZ#2LJ$ISfmu#7Eaj20r5U|nFBt;FJVu82aJgv;
zY1g;j(}9WX6>D$|GopIn|6cqF|EqKmh}%xL^Uv)A!a^XcU4V1qwS+~2#@+kEWJ6!*
zNy_7zr2p=cw=~MiE^v6H6-S!kkQax71hk?mnRxdZqolwsJ|l=mj>|KaN#}mW`Y^Dd
z39SS+DQ5EWt%l0MG<&j{2w|*8hO_sw_5gpONgO7_cSOeHechwW<|SRr-rMNZ=8hpq
zdd>k%U@twErRcD$*FD!S6c5VRu#4F~4+JMP25r$l5BLHjw$td4qhHCNz5};hIW`V^
zlz(!<=g!!4udJV^1U><3=XF_}ZCBV(XBYdh?Mi33Itl;uXG*>|MkmF@&sP53_1Cr)
zey8umAdv}MbW>plfePvM?tWHU^p%12?;gZ>;51A8_MhGTJI7P24m$HeIp}lm@{51P
z<WE+znb6+gc3Zj@nNZ0nBmE^v$h>e&x;)=}lW|G$I5=>hAs5OT5$AG=D*UslZY(#Y
zC`y>G|91KIVc<~uI4u@5=IU2lvg@4!|8c%aN6M1_#iF4gPVGc_j+OT;b3_iy8^eTZ
z+~)OAOBzL9PY1w}z?L*bbL9*WO{4`N2-}Pwx8ip4AA3b1Nwn`nBN%mY)Eede_L-zh
zfeiLi;d0H=!`qb)TZ`BG5^)&dym`6)cs(Cvx975z#sT>`BbJdFvFM%Md>Ww?bmM$C
zC0$Anu~-|uEU<RJm0ze!6p*JF*<kZIVeVq>!qFE}I>8=W_RvDe;?ZwY(~X6hMJXHC
ziXBWsCCW6O`R=ejujZXM?wCWe+kqUlNMG5E#P6Kn{%hH$(+e2lif2B3<RzM3Nr3Y8
zGamc<?VX72^=J<Wb5v(`V`^&%b}r)O?xkf!Oou=d2;}0BldH0_b>;8(LgsMoJ^%zf
z#f)4_xm-!&x2au&D|d9L>9T?+*cWN#&+-+^<Dp6^#eB>(L^NM$`|+mjDoy&j@cr_4
z!{#a!`nZucR|w@=?Xjugk(Q3NGRh9XgNYRhpabB%cJ78-lqKp5x?^Y-OERV7-)0et
zYdGb`t5xjwQln4&uiTK{kFf}9DkHlD?b8LG1_37X1s&uXDx3-B2|*nT1gP7~xv+7)
zg;Cv^u__T*12yBmZ&gak$FXVi?haF18hbfa0yikW)f51#zz<K6`F$hVelc*`*$-!$
z;Ub0?Z@T}=Gn&wHX()csOA_wj!##ZMMH_)LraS}?0dqDt*BAUY2&mgz_~ooB=d_TM
zHmYjy+V_kq5j379v5f0}1<~O|J;K+ST6)UNvtk)&MxTC~fSmhkt!Cs%F03oMsSfxT
zOr(L^IHQ`zLmIXr6ED%(TeA7{n^{|E9LK!0TR3aM{^l86ZQe%TP%$PBHTa%Ev<>(c
z3wh4WlO9PYhT;5zoMtfB8lxb<VBleWXexDhqikUtKjIC#VI!u6cm0&18=}EWVbvB>
znEe=H3i3oZUaq&+Ijr+JIcgm)-v6D1rIC}Xoa5cH?3@{0so8|ib)q2fVPKtmWMcbq
z<adTA_l8y<<IV<~i`1HY(Kb7lzMvK6Cmpg1ayGS@{S#HG)YnpfG6@Ay@+7C*3M>&+
z??jd)MO%Hs!j9c98vJU!A>Lm3Q$sP+(og+#f&|`iIkzachJ1arLdQSzh*X{h6Z|JL
zd~!OuDj5p6R=$Vl!0NKulB5l^l}*7fs&*ZNw61YSMuX#`07FWqtYx6(yPp2vEC3&8
zs0xk>PHCO+*gHfS_{kIj0FGb)j`l=5?3!{M<r!Rwr3P?dbuzETqXh$y6qM2H7t2fI
zJA`e)dd#s2qDjL4CKs9Q;ZLU^bgLY<ZF935&u}z<HqONwU{U22SSJACo_N^S^HWML
z>C(W-%IUHf(10ozg?Z)EkU(NjWFt&0WJSyW(2M&fjfF(M1Hv8Lq&|KA7H>wzFQmH2
zm$4>8)JsG0?2D~(;0xO7r7A_`+HqOg6U{;FfuGD&2=8`WZ7QZk2Bx+^-Eb739Pr9@
zzm%{jhs<{I;l{c)eag)~o8-_?<Qj-pZibG0ZWF?URd9;Gz#O2l#AH)tud4w4<LVq$
z&?_ilWS#cg?DqpQOeM}Mi}@TdmGYx<9nM_%Z(h<0{WONKD$xYnabc|<0Zw5B<sLnP
zq)RfEe3;!(&%i!EY`IT$eWt6ku?UKX2N3mt&|d!j7YaJ?<~x|YZgSO&L}6YGt%6UM
zgfEwx`X*CJML&X}e2S=6VxGB}v0W&5Idos<iPC)Y8W3+hUrZ7JjJ%7>gX=G|iATih
zMwF4n)wQ=&uDC50Ae2;BX``>RZmwUDeA5WCjEZ{8F(K4<Xz&0b@O@>25K`fRHfYy;
zd@5E|M{(PJ8+5CC#JhfGzfKG~f7KS@ofQDxwp!ZR^XVcdOxQJt<e$hD?!dgWE-^2{
zYNr`Yy*1tIK9(^EB;W()ybm>n=_GqRKw59km%Ho8xXETOq0QzsZ_OX(D8aiu*M8`g
z_>vXWAPKebrdv?w;?RFJ$40spxzyvxo7)AI)_v*wVdOWyH>*V^+oRk*qlLdQf6e_+
zQ-->|bY7$;X|GsLcQOcHB|MACc04<Be~0~KzF&W4lCxksk=_08?y?U8E?2nkBBWcZ
z9-^IFKTSU#;Frz+DJ@yCrpd$-N=q=0j*E&P`$9als7Uc=F`#{7;Q^o<2T~t_YBny=
z{9ku8c`QCi`cju}g+=;u_X|tl=5(>heGk6XzI&^u$e!aRh>#>!XV4v#)eksD2Y*=5
zTm0|52lXy$j(Fh73{c$$i?)P3RsWd5ds`dR&j;G>R;>K*He<^jPG1FX)=i*^y6?RA
zkK}XO@x)<-@-6-di0Nar)kSUpXj=cCN|L<?sr>VA@`G}Ury>r_8+ekU=@<ZibM!DX
zfKKOs6>!JWc5~YwlEK_LzaDN{M<7X*;QCNd+%PY@+?*JKA!*jK&1$7q*c$P4)$qm0
z95T1ILc4y}Deazf2OS3#oU#O+8nwkft{mQh3G|)oD3+eh`7xG2Mkv9ynSgZ;Rq9Me
z3y^fh6k6LNwww|-;M!H#fPXJ?+>tC)aq<QshY{JgCHOxsiKT0+ypCgA!?-2<mQ}QC
zun6%C7_L8jhw^<WZV2FRNfhPZo&M!=N%=&@SeidFhxMPTFkcr+-xA*Ku++VrT0#UJ
znk}b`e~!a-c|SLaGT)1X9IVDXzTY5HvP{HyltXr~6#p$S;mMxwQ|%m@hQgcVKlT^l
zN!6T#1CNWP-Am~)wB5H|i4}7<5qW~plV~Z-epKR}h_9`FEXAW_1F_@>txu(YvX;3-
z-h41cfemoOZ)aA%L|u=!9In~4`g6B5<E@nCB2uQD;9Y@qbzvJNq4MuPlC1AMWH+O$
z?NVPmdCoJtifQ}@HYiJP@vBv(adJcbZ$a0p%b9I=gm;;fC(q8hBY5FFm<0FI2^m0?
zuwTsa5(&@pt{VCCn^vH6amRV5jpQi;hiRNhcS8(5hjLvCIi{@n96yFVayec86@Ql-
zprUn&Q%;z$ElHYS)toq=U^FjYaNG=dR*u-F*Bk9AWD&jtjG+D262+?P4Tp^J!o^mz
z?JC+-uOrwuEGyhomuHfbqw@$C*E{9uQa51=)MX!8Dow4}It5V*vywS$BwybCPCj;z
z(UA(iEwNhPcAO6LPbsEs`gdMQ!14j3#pM!{wAg#{NunYJlZj+DZ4;)>%kjl^AUrDj
znVI|F06I3vhp|RI6HPQ)e<(6eLYcF7!qJZ#LOd$YR@NYE*ekm{XYu6&u>9?4rbx#E
zZ9>=q+7Z-`q@4cG58K0+CTOCK=5+)K53*paw|g#LSS<#BOy!EmKP=GiUaV64*CU=D
z75Qk&iVl}h_!q61uoDRj8gf_a<LXO$*Q&IBX1nASeMLDR15n1+Y*l8EHRU{hpxCn~
zH-oc?O)4}KTR;(3Ntmu9K=By3lo)aJ_J(;o9tM`FWU^nn7z+-A<k0QAAb2`RR9<>7
z)4Ba(e6nrcJmWL?2@49g3UUDN4{Zl!-FuGJo#~zD+%zGiRCiIue!R?;*a@xeTHYZE
zs|OZFkt2Vftc=69n~9odT$^KR4-eb%!@=<_Pv`1m+mkfrXFfh24YNdkV=<rE-T(Dc
zxNwl?x61PC-nX5LPhc42Jv;F;_bN#Ef=gh#uk+47k7rpd<N33XTO=D%Idm%D!{l{O
z%6KCZr||g5!W(}c3)C&!=lZs}{zs42GCAIli{<1{bChpgSjoRlR<fzsw{VJ%`*-ev
z;8Xr;w7oUy*hBP_!1>BzXmN?!@V{e{GU`%!q(5WH&7o!no|^+^OJs*-#~8xC?UzkO
z!Kep-XHhBV8}Vk}Qm_4*!tzEz!SU7wLWy*vuOti@aO3)vE+D1lA;~P8CLA9^ExcY*
z%<1bG6VD03m{!&KC}S}%yP?i}l=JGW-oo6ztSAS_Pi{nx93G`j7G5CZXS^deIR$7_
zVq9y-5ms>os~@9^Lztc4PN~%$vqGkc-Z@F9YL!zD0wJ0fk7+?2sA}-zh%-N*gt$>@
zXKA+Eir!nf^0^6mV|{Ba)>AFk<)z$v|3X;7cfv7zLDh{hLAb>InJc2QORb>(AJn~0
z_Uy7S6;i-5NzaP(7QcNS5C?DrE)!lt7i<e=m@*?PKc=4!K<G|~qKd<pl8iAjM{F08
z2+`MN52?jNT9qcmo`M&lAeIWK9)Be*KS@)2mc1u^ewwY_c%huM^^jG+7x%FKEsK5Q
z&<Y~Kp6H!}xKkgxMZEC}5Ipg3X^(;YTwpWF%KCxT`0BUzUmwiebfld1wB~OK+-cV8
zVV9lXEOsE=oTY#7uz<-DF=2xD(FSHbL<dsS3qyyeqDeqvqD>%3vMOz!kU-_}OiDky
z4iy3QP|j=Y1-grJwL}%Gyz=?HEg|Ej>qh!zH{TP5_ebObg!Zg+F_1Bt90WjHk_pV}
zN!)WfgMNfb@<k}0|GFriT5dNw{ST^PQ-@K^Fa8D6gjWtKrZIrPmD4at|Lw~Jr19O3
z%d)nLcT&l70B++J$!1r8n96m=B2vak`XxobGqFHY#V0O=`5&kpAv3MLN<&|qVreiB
z7y8&zl0?NA-URT(KKi)Nm!)4>V)?8?StUdZzg8og;Ok7>lmATLKg?O_qS<9iBTe|n
zWPmR=79|TvjXtB|6;An|R-J(ChYnN(DDtB2jo#x3<w%vft!ZYON>35{>Mt>vQe<`9
zT~szcs%RqbVF|-9lH^^NHfK2oz*`gqL>Eg+8-;3hwj~3Y+OaA^Qo@O@)93{TjI8*e
zyLp3KIN6<9k1nt(oo=+_d$OuO3Qm*R?+%1^b>{_5cpHS1#$-az(yYkQ2YIMbd{EBA
zTR$Lrwn@HN^G?mwmgjwshJ${ejNdu<eQ1FgS$wPH6qM~k^%LG{!S-&`Gtc&>DbyH~
znTMl`uG6rC>j1`58A`&^cX6QyCjC+jVjlmO5@!U=7&*l}zUwS9xK1i2Nhq)f>}%Y-
z<!t}2cAM1`62ShwC@K}zY5QC1Ffc3es{6S*CX7C}hp}JoR6B@q4F!Zj^2L&7kSD4$
zaIc(umsDok0ZCrXj3OM6tBH3OQ`>e~pU)f|QR)mA%-0}=rvB!`Ego0=8sV=JJmKMx
zmuR<>L9yg(EZa6h2aaZlKJomdaD|sT{z8rjkRXu-GBmMV(qr&Ec9~o&udSKO7p!q|
ztT7vf+TA3E_PwbKnek@ED1jIjrv{0&iSnb$>Mc{7uw6NiD1;b(=Wr9s1$0M7u>Ioc
z79#>DgAM3QG$^y@Vg<kd+9u6JpO+9l0wZc5e{9Jp)^r%*X1rG#Ml`NEEn;c)JgJql
z@D1Ibdp1waI0ZOWHipo0W};slsZ5a2_HMS-Ae%WB?U_!PB3#V=H3k6h0-glT2Amb7
z^#0-nxyisN1u+x_CuX5&J&AR7rTnNSp&WcTMnE2rN2#2X0f2z#l;5=R0-r{8iuCa4
zO^Wy1k^kC{7aBXy89eTQg6Y#aM!sH-w}X@LzFZu&y>5SoHZnCMAEWRxG*w>pI>{Nf
z^M0XyFVVlTF)kBnhOM3xjj0k1|7O*xjZl>`Pi^Alb?U4R?2>v6HN$o~oM2pO!`c1~
zl)smi3q7(5V?~Eq;YzSgh!-`jtGdMOfx6UmkVbx*(eFn@C4h<p!{eeJr%Rf`ixjhZ
zw#`mVk8aC_gb!alo|NH^$#Oj_It%x&I-eZn(Zp^+618EwWNyC;Se7|J4uw<3@{$CJ
zy;A{7Y}tt}b_%`o;*JAwDR90$Fas;-bxKp%Bb0II(YcU9k`Nl_33%C;PMfQ6zTN(k
zCe!`0o{e$5&-+?nz2uRvg?>7lgL?x&llD*%9z|xxtgWu%83DfE>VcyJmm+^i?gr4K
zF@${4^ff#kbVl((tjSW*9QJ(WWhW{tSqMaF7Y8mH#+_`Bu5;BZHeQNj9>4Ua{aKgt
ze4rg95PnvCMsQv{`TgBO7a~SyN?&rWW6g4MXS=b;?N;x_TX(|4%fzVSm|4EBq&sO2
znEf9nIbUa_nuaO^@UW%c)!bz`VRcMzD(gggJ!S;|FK($$<Z+a`-7>`AikQ@$0!5w6
z{2t9KA6>1NaxBZfb4i4fJs$(>=bgI}(t)HDS{?3_<VCotJN4(xA49uF7)`!KfO*vY
zMO{T)-zSdo4|1V4ehd>r9RDV4us<ipmo^MmBk}vGn?E!^d#^+21bIULOW20)L0+r>
z0O3yjix7=(vW(eQ8k-l?o26$a3#3WnIlYY>hS{@#bT$`9)1v4W^ixN0>bP&xo%#v4
z#5&hWp1*Ic;qLrm=HD+yv|ABn1N-+?nNT9rtm@+T7}}p-<b4{xH0@p9k4ux0-tVng
zj`|BkW2hJt@8K<;9)5tXR(bKy3mF|50!xW=nP!2)__+Gd^SvV<LMLIa#?g+^he7Z7
zm`a<x)o`D(I5bM;g^#!P;(R6EN)&il&7;fGF)?bRNkq<Tv$o<|EbRw|mE+C=98H~q
zm*hog)8fr{wx{B*iyU%BL$;%5hN_x=U;A6nW}T>;+}ARCR_&z|nbr4EGyDNNfLUAY
z=)1YYVR*_m$cu<~`H{b$Zd(r+O{^l%aArQzy3Mi5Q)Hs$s!4xY7wWbfg7`A9FGi@j
zO+}$8X?4@7Xgsvf#=O@#u@D%_x;>8h^t2+;$c<vnkGkWbK=GWKNT;;I2iBT0-3F6y
zEF}eMmY&#erS|=RgKn2?l;guDMx*;A1pcE9n7qmIW(u>U(0QJngGB=5Xvf7WXNJ6?
z5g(OexA%q@_e`N#e<m}otFP-R=h5+Id0s;p^5362bRDY!5aXrXRo%^P&XLIRb_)YI
z%|4%RqP-&1%kqUO2s;_)0c`FXZ3Y4`P?Mb@OB;9-g~V$Snx~GmIofly%>pUae??ud
zyZIWTrJTqX-sw9{e+V5nu@n}x`7reYpj~%&lrqg10LM8{Re=F*uk_AppgqN#i5-%#
z)UWSx!fcXBmK=h(E!iw3`Z8Y=N04OeJVN_kt9-@QL7ZFrw*odGREY)U%k=&Mm}Iz?
z<ugt&s*Kl86=vpA166p;D_bSQ9tTMAigh^Uw~jC!vO&R+lXh*pBVD(R+dsSPlmZr5
zuoYS9g~hbGRP%9-*U>Y`MSA`5`YIaHGW`KJkq1ERsFv0EHAJobOMMo`W_$Q**F8HF
z-(;axM%ofzWuJ;Oyq-#tS`9dZnEfY3URxG5YL<^hmQ>L+;f;(1g`YL%X`B}m?XjI=
z28I_%?P#W1YJC7V)70mj3Dgy*Iv`<rXWJi4!NdiCRR8zGwcNft?tRe~uIKWKy@U9i
z)}Q@#uhcWxJesh=>+n^Is^Sd^8gl{Yw|Y<L!Zx5z4sST&<CE$Es7_ZHli;a?G4=_T
z_tZ)>)JBDBc%CjUpsQ6rf8+k1+n%?0Wy@Hxiy{KPz#;1^)LyjJxJ=xVz^TLc-A40)
z%jl!|I7cKlQFsGZ#OU)xIb!SvfIJt+?b4gT6LnqO(Qr<56||&aoX!?90L5a-ZyG(F
z8j<Cl6<brE#`Bi8I~6HP75JQIuEPS69atw_{IRL{TRyJIEAyo+qfn`-?5n8v$M}FD
zB4boIpke_9Q->PO#Rj@=Vvaw@8I<ZXw;N;l!|TO6KL#4czG<VF1>m5t+T4`8AnN_Q
ze#NUXXr;i^Lz3xkzF`&DulI25N!M2Y75%Dmg<0LOe)3^)^Nefp7f;cK&;R5wY<l5$
zjiSrqjy0`$z{W~?YMbeW;KuTiF{Hq-MICDTd@R+EUn*UL1&lp)guXvxT*aX$uRC0)
zz#)A0DjLpcn`@REvd;c%`VqUZ0!0XNvc?b~SP5YdvN8{u19Yd`^5>3!>qJo$JtAJ*
zD|4aERjz1eK;C9u>2abrh_qeS?i!$qh7}g{TvcdRZxHF40(FOb+i$6ecWTa&4VC9}
zQUx%Fn#JqD{K}ykA|JB)55}&a`R&@i_)M}!Lb-S`{8MwK|MAM`Ma_ccFLnECU;8Z|
zx--Xr=2#UQ4uF0yY=JkHYw3B3c_WN(2$)Mr8y|;?NBDE;)7~!C1ZxXlJqjhNQ~AF?
z=4(9>&Mr*Z<El3RGd{X4blpOhoEK*l+WIe@a7tFP-W_aBJ7ck_hPilK1bDGA56LN|
z)&tScQoew!Jk^G&j!<j#m5kf!lpjmm$(fx0(dy;IpeNaa1GE?MG7>Lj-2Ud?Xa&<R
zdz~8BmFklR!zk<!iSjAfrYq%Cau)jvd@QYW%sryFrLJW!83x3YCsAsJYT4h}YX3>9
ziUGQO2b<fi)-iQ|EXWEqzd9wJSk$8Mk!rf%_7tr@oi3+xKOnr3N_N|lO27&g?3l2f
zDFJ9wtFHnB8mlf6vu_0g4~nHgpf?>w>Ul;ES~!x=lcdgWREBGx*(6%4f%W4p&MLQ@
zE)kf(wkUf5COGPDG9hlj*a&uVp+wy!OWfdYJ6<Av7^t|r7(kc4>F2yV`Ixf)hD!6F
z(x*kD4fA~~iSx-?(>9><Bo!<<w8)H?3bm7OWKF{Ij@_#UE|3}!U!yaF?AgS!x63yD
zd7+dF&56@^&2;|v*D6^Xy>A{^p9GtafR>fIu)hibtk%k6o<YOU#(L<u*7{#wXkEb2
zBG>C*j(D_Z;rNrZ2f&>o1`6l+ZZKJXT$iP$1(_ccj4ln7`q<ER&U?327Z|h|k$rvg
zUoY<Gg{VExLgw(aeM?Q^?_&)d6~va$?I?~jiih6ok0OH10QC@6`Ex-7oF7h2F6Ijb
zBnMo)MbVl-IWK~M9uuljGQe09k4J5{zg7+(v)g`FJ+M4}k&bM;D^vqx38c>8%e^>v
ze4YeY0QF{dp9P3Oj8fh@QC8%%a&7b;I<ElTV%*q2+VL{|ADtj-7kfWJT}zZ;Egof6
zE}4kR{=<II(QozgfYshW@*toI_EEiQiEMS;NM5T5hqiXCBQ%~*`Q{hn(4zI*@`CR^
zqxjy_L}vM^0%Sl4yFe-pFMs7W9!FC^-QyST07Z?D?ON`5GHTX?hgs&CGPY#8<-*Zk
zg#tFMhVEd`(6MPol$W*!tB(VkC5ukG_}GGDFHp;68)>OgMpBb(Q#M8Y{T)P3w6-<f
z@IL$FbwW`tBlE8I@HTmg6MEaqJ+J@q1WxAkkwV|3LflP&@w$BqFe*ZeOPZ9>_E%0?
z^Q4l*j8HY+r7p$`6s)ALD+oje#p(IiMh~6sM}l?IPIms(V@ff;nGv)T)*O{oY#ZXr
zoItmkY<YU9a}J$A8O_z1S6?eK4mW3d4Hr~Cqgy5HGm7Jl^4vX(jTuj|=WI8l51|%v
z$h0=}2d1(8j`!5XQ^>k}&#{X=UaFiMQqY8dk24{Z{{2WOL@=akuulOL02WpHW>c9z
zS<QXg_bFUNTw-X^i@H$d%{;I<o4kgA@0$++id;EB#@hKJfS*Y%8PoQ=r`JyT%+Of*
zA;V}*(3`-Ql^si^p+v)6FR%=Az~i-R1rE3t=!%V!C4hdkd5R#4rkLyZOPG`C@3lQU
z7x8Hd7p}-MM}@l!R+El2klPW=b>DbCltDitkFwhW=54L0R2_|Cx?os>SA$#=h1JKI
znM++Sne7OZ*woIv{OTpcHgY&+f$smJNk$ZM5^<2T<t7Xe5Z{EaW`3)U*Er6h(-(11
zN>|p_(DGo3go(4R+X^rcF1)Sc%0Z+8i%@56U2tTm&~tXv79}tDsi4K#vNpO0fAB*s
zJV2||-)LURLy=AEi=W(Q!U7cos9mDJBoXEEY+`p{e4!kGif4n<U4YgXaSzB9uKXt0
z10-t^V=W)s5?+X!WB$W4j1Kjd>c4JWH^vqi7Y$K>qgl<uBaWAi$U<}?Gmw*T9J7I5
zUbEOfcndtHZgQW4*Igi~mJ{T?<ZG2{tK4ljN34rlVXCT(r<RjaBvY~R!shk;oi(dV
z#%ItjRI4ZHxV;972+;xFuS+6Mc(o$xsH~PFtglHG2~%Y?$V8K8q4b+{;e=!-&z$p0
z8d`%ze>DoD-H8)G=w5IRTzoEa1-dKqz@&CYl*G_|+4b!_J*z!iOw!1q$<AdwDnru(
zUvxiBp=pwj^f>SHGdVL>W=`n0p92}~_)k*Szucrm9}2K9)4nzw+~-=hXC;v$3;L)O
z>eC1dvd-ENdx5C}@@W*I*oD6@b^9`nIUu}QR7;}sZasgY0rx9CBw|s31_)Kvv5X5+
zvd_(afSs|pTMW#njasJRgjb{XI?qPgvFKmHvy+D~%2`?IfL><`8Uj%Pr=x1rFC$FS
z9K=+?vd3UuE-$W%v-GT6<mmiS7C3|1U0?-S_+#~r${S=n=fY5q0*qh)(iLma$@$I@
zA5H`Bs4W_*ylH<<pncf}c`5V`d$Uo&SXW!?090?B5hXMmcad)Bs%xB*1N_DeOkP@@
zFe3Ru`teMOxm9>=!9+19DhE=~ja-hVw&(D-&vLjW%$MS-k<Bp~AfC>+TVGj|Z$he(
zR00EmNf)&*XyKRxd;x%lb$B=gD48jY<fWJcK*t5>uQzt>SEJIfLiz>s=mx5<3;{S5
z!beNoP6Ge3ZeH^K<Gha7KUh=!bU)XlZ6_>RnG*~DbWOA=@w@koYr7Un?Ay;DYByS~
z;U|*dO9Edu7K)L={puHu2|Jf54-R~2g&T^r`E$OwaONB><%H)2&vK?*fa})H(K)Jg
zeMAJj*vZ{(VI~=)W1&<6+B{*j(6Ew79}`ib5m%8^@~HGR+S>7>LFkLSIcR7fg!uhh
zE3qGU3?AOR(`E-6u>6+qS0k(NRsc>Vp#IX|#z*se6IL>Ul8g$qF!ZjNb-IJ2e;26U
z+F;;a*7K0lrtjTy(VZXwL`tEW&t*lCkCO8z)1r!gK07v62T91)(0bHj_WRhubf}{U
zOp=OXbh7qox8+;+j98~1Lc4WCeEuc|k%isBk`)xW8%MKoGAu*CuHCu;il)`~F)3e1
z`mDEP6d@<e*TPbdsz9zG85?p*vykflm`s_F{j|P(OgGG=LWy@4#f~6JCdueZ4Z6HO
zpjaaVmDkYbEXMqyq--6=o)|??gO?V0>9!DV;u(LVF}T%)TrxW@r{MN!BI~B(9x@0N
zV()>D);z`$`Zr%`FHm!f2!*<I%RP#j`Qmr;Hz^xf1CI}nSDDg}r-%N~I#t<ppv?@~
z%y9(<=qo$sO(B_kc`?&@ssFGTLjUf7J%g#VZ7of8!r^6pWX@r6B>p6~kH8DGrc&-n
z*lL$A2fY7+2ck=FGNs-oUfbVI9?soEO6lFRP4q@aDDQEy?^n6I+Wh}9dh2yh?<E10
zsE7OhdgSpVorhH8IO<)+!(ZWJU?X||2D4DeKij0n<@tOC1M2WBW5FlA*}Mz^4A9aS
z>up6$LZy;`{*mive+lDd1QvTrS3N0}I_9mKJ>SEdzbwalpfk2=u856zhV1mLu$|p1
z75R?JbLB+$ZKwYhG!gf1VgqPE%F`f$UK#!UUF^b6@3D>4bp`Cau1<u4!j;3>+?9`#
zTSHMvC-eRw`ypj3=x^J->gB=zT70jdTEj@qV;+H!U6zDF@sb-cZ_{R<Tz-aKAbmRh
z5oQ`<_|vi*dMhTPH0iWm7L9E{C~*sypt}8f`N*%NH}rS<Dm{7)7<BWj{IY>Oc1ck^
z`$-#m$dxC)*0{w%bsnGsex3VhL~I5X#c0laosL58O>>jdGcTn=qIYl8cHw$A{W$1s
z^7wFfHPK(pO^|ttoG9U*aJb7L7G(91z3h8|<$u1{_BrS#vwPs?|3>YW&q&EY&EGp`
ztG|6`W)xOb&`G2Hxl>M^kZ&qh12?ZqW=+Qffu6}c{at|KKD1lhKXWQ&7U8TA-w5S6
zwNgwq!kDI#Aobb6u&-@dykwaS%I98-!k92Fm(xs*J)%@Mjsu3quZ>?H-)Axo#0zZQ
z{?kz#SRQA3=`3L?SmN5G$Dx_yD)HE|d<DKc<9=A*Th=w_>!%(QWs?#%sa>rk#<x8Y
zSyuS<xD2$F*NX$qJ}lo&9logu0NSc+|2K~H(<==nRhjYwZ`YSaPo?*)%m}4nD&3^!
z9xb&CV1Ug+&zU#36L$;=OP=MlIk6sb)xdSAN2o#Q`=bm8!}G`lS$KKdz3k#<@W!e4
zg!|fk6~_+7H42rD;G+55f97hwf%dX%niu;eN^$o_h2))a*64tF41%g~J3`5UW*8X{
zVRFq&8q<$hQR!_6&yhfLu{2i}r{x%J=aoe2wp>?!@yE#OqA-;jM09eZ?F^q3TC=5l
zsaX@x@C7UU-lT{8{pya2nssPhMY7pDOR!0lTFbHa%4y^Kv!(M9w9fXIm9ko<^hEJJ
zJ}dJf=2emd;Y|j>Xegw)=Cj|o7RL>&Jb&KOFbIL*H-I{jYv+DB54couLtA-nD)6%Z
z9hH?r#Sq|N51@EL_IXa}sM31c(xdh_>c8Idt*X&gN9_I4`5Z8JNOzg-@9uiJ(;|xv
z8yBNhd8)9Vg#u0b6uZM$TDou8aO!1}*sZNzzo$WY{>5GXyQ7!-K&${9>!swBl2CNQ
z0x*b!rJXi=bjKp?oyc+>8+eI7&s%0?E-QL7i61~v5kNPCOmG(9@nSctYe$pQoYz1v
z?BZ{Tggo$K30n!ap($uE6-*Rg4pYETk0ba}k>|JWml)A9sQ8?^%*@cG$#82uvQK#n
zCd9{8i6^gjfKvZyC$-#h1A+D%DI*XV*nI!fx$V)QjFxU5u2}4s`Pv!-_!|vpNDWSZ
z<@vTNQ_pY?-VR1&H~=qd%i=3+q6JR_<9G*6a5cu|VvLnUcu~wGgAVQrNxLLY%b?QC
z9nEj;&exq8M_!xLjyM!?E5*}Hcl+QM8-Adbzi()R?ryFhZy&KHnr41K&wE_=0W1s*
z1Qxk9H`NDFOV+@&rUcq<$q|oP8lSwDoKvy#mVO|dwzfbq2hBc&f0p{H6FrCCu-62F
ztr_=?VhBt^)5}`FBSiB^mH%H4R~`>l`^86;T{H38$ymmWE!o$>SY~4EM0$}WQCY)l
zsW)3SlQ0I!Si(%Cddbp|M#N-DvQ+jYOSV#ms8oLUj?d@!_ndj|x#ynq-20sK{eD?O
zmueN?FQY5JG}Rb5IBHufXJ>1Ql1fRgZQ&j=<t9IVEn%I?+O~hiDF`I)*6q5EihIrq
z3`PG-uXUzPY4EiCHau^e{-<hUKdiAuJTHTZOHTQIyQY&Zn@cdG&WzzP-fPr%$MLqr
zNP$;BU660ZrQS|DD;u^JD(|)*;cDMWX5Ic%cWR*DZE9C1q86>$8q~Q{Qrqwd<3j!e
zfHy`IJBMv``>Wz^SvhqQ5Of#In>Jc_t=3(?x^{N~+$9}Ko<IET^z>xAFtNN%W>>!1
zlU!Dy6+yXD9MvUuFrLrlb#PfrPSDw+G;%C3zD3~!?X%Q#g7lPZX6?qg=7Xme1!?>Q
zmTJUPGrQjsb1h-93OR-N;u<@<gq;F7Es6G1@v_dBDFv#`+}wBp3-PnDd+~lBGbqUY
zA0(0p03gUOaC~tZ?XAS>!1bb#>u=AFe&1fSZjqe8%zorTZnafa&+>G{omQyHz44X4
z-cMPjWRoebb)|>Z6gRXY4zo_MzKPvmyf?3WH_S`79)%=^fz4OKX(M~zS%liDU->dK
z6VwEsM^Ghw-z9x_cV*c-zYv*DG2Qp2k02L}w5Odac>#zkhG5$h`Zzd=IGUxB1kC51
z9B0*}+O$vDNKlOZ#8FL2Bp-T3_#mp1u1mu*-T>noDCRmGrP3?)Jcsv|w2tF1Ms2C@
zs2<t|$l0e<F^HL?hDBU{T1}Y+oo_{T6ZFaaJ3unP59FQaQ#Z>CCn8{3C*<whv$j=v
zz+N|0o%Ho|!;9(FZgPK*M{(65b~W*~;})I{eAjtBH2H5nC5_w!A`CJlvpq&sS>qJ>
zX|$zvz_f=0N`_L<oF;ov3yD0&%enm@_$b}17vI0eXFWGa{F`Y(7(_gDccGk@L4h;)
z49Odt`FMKDdTTusN&bz`>MQ=_ra&y0RiY5YF3ceRl2l{(3fS*(m;{YQ%KsXeGv0~!
z4xE}QfSn6%(i}tttC2E2tse7s*jO1`v2IJ$>@915Zz&|iC*jElOmW;TErh8m962D`
zXR3{N0nSTxCVvrQ!A^CCB+hOqYcv<N8v0-`g!?sfU`}NEvSt+lP#!@8TO;~-JJs7o
zx=`No(-ojyi+MTEXJ~8aj4z3v##A^??+okKq+Y~2A94`OryVycE(X{h)`)jzdH#!#
z?n88COpIMKs?73rZWD37sl*sknAk#7<2-=G2N}&MW!<7~i(C2Jp*qAc<LK4$aL5;m
z-oT{m%KrOLKBt4%6T<j#y({!T0&I386~?2n-skgJdh5DfkM_f?mIEJ2EV!rhoR%<d
zp@CcgXrOo=bHER45Ip2r?<j9IyBLi%nCjGiR(h=RabiQ<pgXo^?vwdMkknN_3BVI@
zA_ooRt2u6&<4)x^y>maXSpOkk^*-Y?Hr)CpO^EZeh-hL-fPN&IMlu^V@>q=zwU^Fg
zG@r{Iv3lbDXna1*nCSvG-kA`lx-r>5$2-=YFlK7JHcU{yniwgVJ7m)H=KnTA2=-Xm
zP*&;{gH^uy=%xsz24HL|55KpWJ<g^5xk2KNZ05gSU!1RVpk8u$TlVpx0;l{c`b}y#
z5O)O1JS^4P{GXj&EeBfD?YdN>=1BqY(zIj*J7FS@JB(W}eB_y82_<(fQo`_xF>uaX
zsf;7ZUZYQq2}y(asAW<<&=)CH*@C}atCt9$Dpi49;_rR2xU$d4S;OM<?CzPNtp0K^
z)kqOVJyh5KQZy8|oLYUQ;hP9$^@8agZ_Ss+QTZl80V2ote9Zl7|7s{kU7=4Pd<52`
zR6V0-S*fkM1$mFn#~QD_L<Y)&K^&HguJ4cL{V(ObTGI)vLe8YGV99C8>1FosV#T9r
z(Fg}qh}M$_n}`q&@kLJIEz5MNL!IB-E3{EW{6iD4<Ra!Kor^IU5&$5n1=B+ac|4qT
zg?*D$lG887M=#V*R-oFjY@{zFN3<_|V6&|0yB|$~W2AE^yDdkYK?Y72K)0iJTRUaU
zwD;jTK|o$%-;Dm$yes;A7|$FkVpy7B1y(F>CxM+XlIF!m{k+U2GM^jRpb~8F-f(z9
zUiuaFZ-Atk_vXfQG$62q7TbUUTM3~$if2ZClFJio);2Id>P#;+P)NM!yyKJ7$j^Vl
z0<U3Cs+7}#E)@Y`6^9M<^9>2Dg|R+vrg?0{G;@n0<QUaQaXaO~V>>lXWq}qiQo)%n
z{PS!t^?il#C1nz8#QGgDT0i(!T`CW@g|>W?L$2ZFP65kf;`HGT#+vqwB`5sU$qM20
z5UH!L0fx$K)kHFHZn>FH<4IJ~a8x^oBZhOpMZD+hIKh3yA_LeKc-SqYcVP3a6FZ(Z
zj<1@z7`;bt{01>Pn2D|e*2MGR915urLpeK;y2|hvhWBX;L9evUjZbfL;emPUxui@q
zK5|8v6Jz<HL?M%=iUA(>QhX>kjMn=DcDyY=(%4?k>zkX%kM3R@PNtgw4e|2y8O0T7
zY&>runm>&=8Hs#<?fuBkIPPj88@}?*jhxL%zxbL7?sbUJ`U0$nIysvx4?N8gGTY7X
zL$^aB;xUsM$XU);%P^8WFnB>}6#g4=20}mive;~4+ZD*I18(cT4^+yI?hp`*c<=I{
zBYR%DN6q`cZjxT9B5AOIi6?9UdRvk0hQ^5f)S9v*Ebsrg7y1v}WAElui5rTE^%}Mc
zIT6<kNc(V0fF9IA!%DUzwq~)|tbpD8fgYebE_CJsz#z>uvfmGR#niQy?f{e)AR1ky
z0QXGNR4;#J_n*mzi`h#DKE!}ngZ#|U>u<P>xThpJ{@XGqV<d)bHy|$`gCKleo5u-t
z_9{o~`F_ZZ7hc}hH7Mf#8l0hY6v6>FwspF`u-1bu5j~)W-_Ynt9txI``wwhEhL^y|
z_k5yAlIFZ5hSiyk-{fq}&1KXRV)By!jUzxW01c;ifc!{!8-{Uw5fVSvMIwFNGsn(e
zZ0)XkFsObE$I~?hYSds)f?7r9K;Z(^)Qnc=XD){DEKa@C>h$DS5E&e@D2-)g@5p6#
z=58_RBZq>DyS?su;qIjCbw5eWA+V76mUW++8H_MWNEKZmftBP3pli9u79T`lD5+&2
zUh@)*_n~HxI6qw98sq?yrst4<L0bASvY3@?90Ti9X?LNl&toE(Bwmg~2T~JM>xHbF
zPde}|OMq_@J}CiFNHTx`%yyGMxerDb-ZM+5zL81+SkBq{+r|4NHG!`P(Rf&(?%qER
zVIgZK_Kteng3$h-hzuIX{ho4?xRo;(qd};vFS{Sk1Odx{`jkjm@S37PqQ#dHY0M{1
zarDeY<+o-cV}&7F`&Hlz0k1&MpTTE`3jBe-Q}jP0&_E7h5p&=e#^nG+OYRP0ww?!i
zrE^?_Kmrl&fX^Cz+7|&jhLTNX%nh8!nre*N;n)ya_|1+IFf18RH_Gl|jrMzW7HE-l
QKzI&v%+|@K7IP}?e|Xv(bN~PV

diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Dark.svg b/rust/datafusion/docs/images/DataFusion-Logo-Dark.svg
deleted file mode 100644
index e16f244430e..00000000000
--- a/rust/datafusion/docs/images/DataFusion-Logo-Dark.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#f3971f;}.cls-3{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Dark</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43,23"/><path class="cls-1" d="M279.27,33.78c2.93,2.93-6.55,19.11-22,39.28"/><path class="cls-1" d="M208.83,127.75c-16.9,16.49-31,27-34,24.41"/><path class="cls-1" d="M227,19.05c3.86,0,7.25,18.31,9.11,39.88"/><path class="cls-1" d="M235.79,127.32c-1.91,19-5.12,37.24-8.75,37.24"/><path d="M0,.43H11.23a22,22,0,0,1,9.69,2.13,17.62,17.62,0,0,1,7.24,6.49A18.43,18.43,0,0,1,31,19.27a18.39,18.39,0,0,1-2.88,10.31,17.8,17.8,0,0,1-7.32,6.49,22.44,22.44,0,0,1-9.72,2.1H0ZM2.77,3.09V35.51H11a20,20,0,0,0,6.23-1,18.25,18.25,0,0,0,5.45-2.91,13.58,13.58,0,0,0,4-5.13,17.18,17.18,0,0,0,1.49-7.27,16.82,16.82,0,0,0-1.52-7.29,13.19,13.19,0,0,0-4-5.06A18.94,18.94,0,0,0,11.13,3.09Z"/><path d="M68.88,26H51.65L47,38.17H44.24L59,0H61.7L76.5,38.17H73.63ZM68,23.58,60.21,3.71,52.57,23.58Z"/><path d="M112.87,3.09H100.35V38.17H97.59V3.09H85.5V.43h27.37Z"/><path d="M146.45,26H129.21l-4.68,12.19h-2.72L136.56,0h2.71l14.8,38.17H151.2Zm-.93-2.4L137.78,3.71l-7.64,19.87Z"/><path d="M35.64,62.68H6.7a1.2,1.2,0,0,0-1.2,1.2V89.33a1.2,1.2,0,0,0,1.2,1.2H27.78a1.2,1.2,0,0,1,1.2,1.2v2.48a1.2,1.2,0,0,1-1.2,1.2H6.7a1.2,1.2,0,0,0-1.2,1.2v30.75a1,1,0,0,1-1,1H1.3a1,1,0,0,1-1-1V58.9a1.2,1.2,0,0,1,1.2-1.2h34.1a1.21,1.21,0,0,1,1.21,1.2v2.57A1.21,1.21,0,0,1,35.64,62.68Z"/><path d="M104,57.7a.94.94,0,0,1,1,.95v45q-.1,11.83-8,19.35t-19.85,7.51q-12.14,0-19.95-7.46t-7.91-19.5V58.65a1,1,0,0,1,1-.95h3.18a1,1,0,0,1,.95.95v45a21.46,21.46,0,0,0,6.61,15.76q6.42,6.12,16.17,6.12a22.55,22.55,0,0,0,16.12-6.12q6.47-6.11,6.57-15.76v-45a.94.94,0,0,1,.94-.95Z"/><path d="M139.56,55.41a29,29,0,0,1,13.79,3.05,1.09,1.09,0,0,1,.42,1.48l-1.3,2.37a1.1,1.1,0,0,1-1.38.48,31.23,31.23,0,0,0-11.93-2.5q-7.66,0-11.94,3.77A12.45,12.45,0,0,0,123,73.87a12.09,12.09,0,0,0,.5,3.62,8.63,8.63,0,0,0,1.85,3.18c.9,1,1.69,1.88,2.36,2.58a13.84,13.84,0,0,0,3.41,2.31l3.6,1.9,4.18,2.19,4,2.08q7.9,4.08,11.78,8.2t4,11.6q.1,9-6.61,14a26.07,26.07,0,0,1-16,5,36.05,36.05,0,0,1-10.7-1.69,37.09,37.09,0,0,1-7.41-3.07,1.1,1.1,0,0,1-.36-1.56L119,122a1.08,1.08,0,0,1,1.41-.35,33.62,33.62,0,0,0,16.54,4A20.46,20.46,0,0,0,148.32,122q5-3.5,4.87-10.47a13,13,0,0,0-1.19-5.19,12,12,0,0,0-3.37-4.13,31.66,31.66,0,0,0-4.27-3L138.73,96l-5.63-3.21-5.63-3.2a25.57,25.57,0,0,1-4.82-3.67,16,16,0,0,1-3.78-5.33,16.21,16.21,0,0,1-1.3-6.56q0-8.86,6.32-13.73T139.56,55.41Z"/><path d="M175,128.24h-3a1.09,1.09,0,0,1-1.09-1.09V58.79A1.09,1.09,0,0,1,172,57.7h3a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,175,128.24Z"/><path class="cls-2" d="M227,55.41a39.08,39.08,0,0,1,20.14,5.67A37.52,37.52,0,0,1,256,68.35,33.31,33.31,0,0,1,262.21,79a39.44,39.44,0,0,1,2.38,13.93q-.09,15.42-9.65,25.67Q244.31,130.23,227,130.53a39.35,39.35,0,0,1-10.05-1.34,39.75,39.75,0,0,1-18.85-11.7,33.13,33.13,0,0,1-6.22-10.64,39.18,39.18,0,0,1-2.39-13.93q0-16.11,10.79-26.81T227,55.41Zm0,4.68a32.37,32.37,0,0,0-14.18,3A27.74,27.74,0,0,0,202.71,71a37.51,37.51,0,0,0-5.77,10.5,33.82,33.82,0,0,0-2,11.39q0,14.13,9.05,23.53t23,9.4A31.48,31.48,0,0,0,244.35,121a30.34,30.34,0,0,0,11.14-12.24,35.59,35.59,0,0,0,3.63-15.87q0-14.33-9.05-23.53T227,60.09Z"/><path d="M328.12,128.24h-3.51a1.11,1.11,0,0,1-.9-.47L283.84,69.5a1.08,1.08,0,0,0-2,.61v57a1.09,1.09,0,0,1-1.09,1.09h-2.7a1.09,1.09,0,0,1-1.09-1.09V58.79a1.09,1.09,0,0,1,1.09-1.09h3.14a1.1,1.1,0,0,1,.89.47l40.25,58.49a1.09,1.09,0,0,0,2-.62V58.79a1.09,1.09,0,0,1,1.09-1.09h2.69a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,328.12,128.24Z"/><path d="M173.49,30.7a3.21,3.21,0,0,1,2.63.9,3.61,3.61,0,0,1,.95,2.59,3.34,3.34,0,0,1-1,2.48,3.46,3.46,0,0,1-2.58,1,3.52,3.52,0,0,1-2.49-6A3.38,3.38,0,0,1,173.49,30.7Z"/><path d="M279.46,152.37a2.92,2.92,0,0,1-2.49.42,11.78,11.78,0,0,1-2.29-.74,31.34,31.34,0,0,1-4.15-2.26,91.5,91.5,0,0,1-7.65-5.46c-4.92-3.86-9.58-8-14.16-12.28-9.15-8.5-17.79-17.52-26.19-26.75s-16.44-18.79-24.14-28.62q-5.73-7.41-11.08-15.11a156.2,156.2,0,0,1-9.82-16,.46.46,0,0,1,.81-.44h0c1.56,2.69,3.23,5.3,4.94,7.89s3.52,5.12,5.34,7.64c3.62,5,7.43,10,11.29,14.84,7.7,9.76,15.82,19.19,24.08,28.5S240.84,122.3,249.76,131c4.47,4.32,9.06,8.52,13.83,12.49a91.19,91.19,0,0,0,7.41,5.61,34.64,34.64,0,0,0,4,2.32,11.28,11.28,0,0,0,2.12.78,2.38,2.38,0,0,0,2-.21h0a.29.29,0,0,1,.4,0A.28.28,0,0,1,279.46,152.37Z"/><path d="M279.18,33.88a2.64,2.64,0,0,0-2.29-.18,11.63,11.63,0,0,0-2.3.92,36.9,36.9,0,0,0-4.29,2.61c-2.76,1.91-5.38,4-7.94,6.21-5.13,4.36-10.06,9-14.86,13.69-9.56,9.5-18.83,19.3-27.69,29.46s-17.56,20.47-25.69,31.22c-4.1,5.34-8.07,10.79-11.74,16.43-1.83,2.82-3.62,5.67-5.17,8.64a37.25,37.25,0,0,0-2.06,4.56,12.64,12.64,0,0,0-.62,2.38A2.52,2.52,0,0,0,175,152h0a.24.24,0,0,1,0,.33.22.22,0,0,1-.31,0,2.08,2.08,0,0,1-.66-1.23,4.34,4.34,0,0,1-.05-1.34,11.94,11.94,0,0,1,.53-2.54,37.13,37.13,0,0,1,1.92-4.75,98.74,98.74,0,0,1,4.91-8.91c3.57-5.77,7.44-11.33,11.43-16.8s8.21-10.76,12.48-16,8.63-10.42,13.14-15.46,9.07-10,13.74-14.92,9.44-9.69,14.37-14.33,10-9.18,15.24-13.41a100.81,100.81,0,0,1,8.22-6,36.53,36.53,0,0,1,4.45-2.5,12.69,12.69,0,0,1,2.43-.84,2.92,2.92,0,0,1,2.56.36.14.14,0,1,1-.17.21Z"/><path d="M227,19.35a3.59,3.59,0,0,0-2,.84,9.09,9.09,0,0,0-1.47,1.73,20.85,20.85,0,0,0-2.11,4.16,62.3,62.3,0,0,0-2.7,9,163.56,163.56,0,0,0-3.13,18.7,318.24,318.24,0,0,0-2,37.93q0,9.51.52,19c.32,6.33.81,12.65,1.54,18.94a171.5,171.5,0,0,0,3,18.75,66.25,66.25,0,0,0,2.7,9.08,22.89,22.89,0,0,0,2.08,4.21c.21.34.47.63.69.94a10.71,10.71,0,0,0,.81.84,4.11,4.11,0,0,0,1,.6,2.22,2.22,0,0,0,.52.2.72.72,0,0,1,.56.23h0a0,0,0,0,1,0,0h0a.74.74,0,0,1-.63.06,1.94,1.94,0,0,1-.59-.19,4.82,4.82,0,0,1-1.07-.64,10.1,10.1,0,0,1-.87-.86c-.25-.32-.53-.62-.75-.95a23.42,23.42,0,0,1-2.26-4.25,65.38,65.38,0,0,1-3-9.1,171.4,171.4,0,0,1-3.62-18.78c-.88-6.31-1.51-12.65-2-19s-.64-12.73-.68-19.1.18-12.75.55-19.11.9-12.72,1.78-19a164.9,164.9,0,0,1,3.56-18.81,64.19,64.19,0,0,1,3.07-9.11,21.75,21.75,0,0,1,2.35-4.24,9.39,9.39,0,0,1,1.7-1.81,4.19,4.19,0,0,1,2.4-.88.28.28,0,0,1,.29.29.29.29,0,0,1-.27.3Z"/><path class="cls-3" d="M173.48,29.89a4,4,0,0,1,3.25,1.1,4.46,4.46,0,0,1,1.17,3.2,4.16,4.16,0,0,1-1.23,3.07,4.33,4.33,0,0,1-3.19,1.23,4.3,4.3,0,1,1,0-8.6Z"/></g></g></svg>
\ No newline at end of file
diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Light.png b/rust/datafusion/docs/images/DataFusion-Logo-Light.png
deleted file mode 100644
index 8992213b0e6072414aaddb574b2336cc6beca66b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 19102
zcmXtgbyQT}_cjtD($Xy;14x5(DIp*o(v6gK=Kv~_(lvBQN_Tg6cS%bQ3@}6UyL`Uy
z`v=Rl;NEl2es(>3pM4EgQIf%aN&XTE2?<+HR!R*C2{|4K>6!eC=fH2QlqvOqe=r<n
zbzG2;h#8)Kp2?}bIYmNxjU*@aLBlicc-7zbjkVu8f*;Y>?2%6o+kVgYeY=U|m8~Q)
zw|>XBFDOGZbV)DxP_f4GUdy}?;q|9_#!QBTB`C(wck%k$u@qh+3JV6boXrqfhYOX1
zk?XZl$(L|vB<2_2>A%U!r8V2um!P6|3dYnynhl_A=l$xu2CD|MtIGGj8GptxU(3#2
zL|G14%ok+r-HMElt*%l{$(z6`k5C01Y8j?Ho^cKeQZvxd(M`Dsdw6)DL!u{x<o^m?
zH;njlT&5!bPTS)6*6p6Emda;TY!`mY@0*T->k>I0;MA;$j~~Onf&%}B&L(QKcD{nd
z1=7!8%*;zcfpx!u5Im9+2!N+|ZEEeD=CPTq75pHm=j2$Yh({0|ot}5;;d3M(2*DK3
zUmFPZ@C$#RNqxtbjrDFma@rpi`_Ol`zwv=1v5_F}REhq@Kc&tmvHKo0YJ2oMuBFCu
zx_6j<#70d@%6p{ueKK#?g}z_Fel4ZPBB?T^sl>Gq{R|50EE-{w=9m3i5Fi+0?r!<l
z&B0Nht;eR$o+`{KXrE~EONQ@8V!d!r`{LSshxCuAcHEV5LBqXuiI{wvc8sS#^{r#b
z(9u=>OmA*(M)DNjThEAVCzSpCY_|s-?Q38W+l!rV2TniJn<Jf0Z7v<pG0q^GI9dNb
z4(FovRCn+^V0*WrB$Zp=L<)=K5;Y?j!SF@*r11Q<kFnH8eX0c0{xhD!^2f6ItpGLO
z49<Rg&Qw^={8GNIJU;#zgoQVrYd4UmFL1b;4T?aTmUo_z`YjT#RqX!3Nx4i3rlz@O
zW2xy#sUgEmH)T_XI#fbB?E(i`6rNdhA0s%2v41M^4WR^Y&li1!P;azkZoT;FRLKu&
zpL^GJcfT9)?kKxm6+dRpnM)-<e<TUZyR!Fq-F$(ugmZ5Jd-sjM;u$LCrCL~N&b&gr
zFZ%&Lu8l)zNQgk^&imx1zO;@t({rr#?4Tj8>*j<nl3ym}9`I(f!_tA5<#%~ze}BIf
zeJ}4H8gdbf`;FTzG`Z*8wfi`T^lkXTbQLWu_6~!<$L55iiuH{@w-S7Fhn#>j{Dgy!
zPIOB>OCjCZn<d;L?mI?;<V!g($!M(-_UTtdE8->9TvIk){#w@7(sM9GQAJ-}NJt31
zFIGG!v{awuEqxHz#+xip?lHdp_mVMH>0H-&M5<$zE-=PDt0WM93^j~b<vW?CaF%Ci
zwrEhLu#C2&$dI7B;@P--oXeNEioc#;yjuP_NlEpni=hu`$T}(SfNxq;$HcB%MJ)-T
z0R%*pxHeW%7kUsE*76!UA|guv5YTr(#7{KH3?(!VezA8xG(^XmRrXOQkWDvaUP5MR
zWSBh5oI!;mvtcrA$s8p<zLT60@(%AV>gSpl-bOym;qELtBupP#<4rkO1q|V}`L8g6
zgYL^`q4FH@F<;f+UO-X#@;&Qg6~pY$@f62mKHqi#{&@I_MGe2V`5jt<E(0Ci*gy3w
z)M)~pcRhUBMs9kkz?zm}a~Sc*L<tg7dTZ_f+_UW@0kIUpnDHtr@{9GYf!%3lBqJUy
zi*Kqt^~vf0v-2Fjl|k#Tm5&RNRleX->AUc^PK&-96o~0HOZ%NIyLc1;%Ohewo_YiL
z5*X{hWs;;5>Q_s@#N{jpCq~iV6A@EJvC<S+nd%#{!=$91mzIBX{^Xy;LKfGqH<-Uz
z=M|;|i<(f$B{dnVOtLl7jd^>3-yZb@pl5mcw^w1^vC#0h!D#)%tP$_>1UwPhj9Py%
zDvkDP1};->i;~o&dR)Lb@zSDne?20MAuhJ7_6(^~X#H-nNt}Dy$x~q(llQO;87V1|
zslE3Yy`pp<QX>jg-#^$hsQ&EN=O^@1*>LK0F#G#q?5wcMtd9^nMX2eikB`3(LC!Pk
z`o0A`HKUTdje!{{Lqt<w_)e06n2wJ6&?oB`X5=2*kIkmH%?Y+SaV@NvE|H&^OJ$}1
zhgL}c4K-8wAdM-?qc?MSmq9YdtKqp)0@AtDtIZ@yK>I@qce?Ho?$)pH_tV?ZKQW!C
zo|TGKSk=c0Au|vL|E$RGC-Hyhb%vahMo%{-EQu2@#zv*l56DLN^C!9$le26JYpHB{
zoVfEuQUp2xtL2MtRsY`nQ^z%*+=dkP?KR{->S+#Hm~@^M?(Q*ri=chN$H&E{Re@&*
zb3@<we7k>wiFlmjo^Y2kRaK<3w50HF!_#rjCh(B+pb{_-mQZrfGE!`5936^0q9W%1
z4npxV{^Uu}#nLYXieqGDe4kjV!VcWKgEEw*q{wsL@7F!~{S5CipYo*HZ5w)1NxiZk
z$A$a>2Y?LVZB8-)UXRUsH+V5@Oe#%mfNl4C2T11w@`T_Fa+F%ZHqmO5T3j7y)KAXE
zKpGj(qQ12&o=HjL>pZOuD3l67`xO!8yj)v*SDKk(qGs;g@l-Cgll9WGUAmiRyAs~N
zuJu9!G9^eR>GagIT(u?vMPrV)+NXD9{oG4(j{tTF(e*rG2E4?Y4)SgvX_}`oct2Dm
zx2;xF`nk-vDFbQ7>B79d`M}T(D<AGLZbj6%x??lLb!!M#v+XZg8Z25BVj@JdFT+wF
z03mR40T{dC64QUm5R_?tC#Oy~vR#f@hZ`5uYbX7mUYPD`+^SgVSq0WEP(w?YcSqfm
zgXnJa3Ttu(Lyd;ZnEmXE?X?cvLv(64pWg-u6g6|vfw=<TlMzySG(Krd==bV-jFdDU
zR6v_`MmFT|M7~_fRpYLWnawuZODHMB6ourA_^t0-WpEYhiPbM@EqgOU-|lOp_KS4p
zEUcvuV(&tY5Ac(O6gkU_ScWdUg5CH&<|?1Ktp)@|4B}e;wa;QbxTlyNbsXlsJ|qY#
zEST~DwgNYKl`0NC`e0kP_iNl(Vu{GL&G{tBjslkb<NFrR3MGtvsT38)xbMm=<uKI7
za&s^GrhTu91((ajc*%~Z$0ct29F4q*xX55VxCMB_Dcr6?wMH*A96oUxHI5wS12xDs
zaVRn!xy$jLGT@QADTY4T@b<x}&Vm~X>;4;PdBx##Z8e7m4K6vc@xlVh4V=e^ASv?M
zz4v5lE6XJH^S??X2G>h_%=cAtugndse-_us$HnvJC$9|((a87sfXlM6FeBT0MsEgl
zs39Z`lWPHY9TYkIn(KrKnrRy!(>I@8^QO_-8)AhmcWs|zUq&dU&t3@7wNB2zCsP4-
zsGY}Rk#0_C;GQ8tg6>h`s%Bj%uG#z<p&M5ui=$zpF<Cn4LQA}&2e)RbuiG$|>^n($
z|1OtYi~GK1z>3ork+b?KCvmw_N#Q~08~q)H7*!RQ1=XavYV)YJO&FQn!0-8#g%`<)
zzWT=7xPiYwfPH>7FH8nROkLY<zPV#4n;+A`S>%#!4{;6A?-^PJv2jO_W0M%ee`Z;%
zFGcxn9!llSRsd&=%Za<A4?`%B)CUUompMYeP-GjV<fd?)kD-yQ-%5MuIeddb?lwd?
z9hl0{-ksW5|7Px$j4|;UjgAciZ8dmTMZAnGPExCj0Xx_o+`s$!&q=yvy5PN6$Ia4U
zj{l+V+Ti4@uJVN3nx#pnla`Uq+sf#-Q;1xa6us7tTB;E#q3NXejZqjv(uYbxrL{du
z0k?5ZiRtPhf-;gZC*)ho*3w_YFi?g8?bteZ+_lcmVuE4}_lK)K>=EL+wRf!69zKQI
za66_gO3zrAJJR%j22XiI*H)5@v7=eAUvkAkxZa357=qdF<ZSf|V`v_N$g@I(t81aO
zj(3gBD?T4ee@>&<!}3H_pIskfiuJFWY?g#<ngo;Wewyv&KLU@0nm1})A|pId$xvvv
zDcj{9Mtd_6e8_hXdfJvlUmdQn5FSErZ-k$D67Un78adtWoY8`?9F{olp9`$rZ@&B5
zqT0>fdv^r(j0nH@J}$Yo&*`BGY3ZENZ6vo31IbFU)&Ks*47y@pmb|oMe|ZW}0XIb_
zT=fN0;kv|&Pi_6XoEW<fyQ|w`YEegy9xEooQqOzm-wXD&sFtvVi1oGRsZ@ozvxMr}
z%jl?fo>lbn4Y>agl)@Y<XQc%T`ns44g<b4P2iVDRQvZk1QL;a;&ub4DrFA#WvP61N
zdvj9c0ty<yRg`h-ewk}`bg+30-+&;9ewFgW&v|)DD)uoaxrc4a0(a}Cdzb(fFSCuB
zvix(75nn2gnddQ`XUr-mX2|nCgyXx<d2D89-7eLn<&PTHPRvz1o!Dr%aWdxw4HyAR
zTDK6SC^6U0ZwLDeA2utER~Z#M55HttV)jZw_Fxl<3YqrBi0_ApFSVE}SB*IyqN=|c
z5!~JUUtaDeUVeReZKxX)_A*kuC*2_B;2)b?e*dicVy4l`l#7zCcSbg5EUtm8;LzoJ
zGSlzd!nSQN|1g>lpq}+Xf#I+qI$ptkZ1n9U%_qQd<{_AUA|~O-+IyY5skZORHn@UV
z2YjjOFVgUJ#R@UdA*#*t3b>~jpq7H>H8cZW1y~~~Q$K`clu&)8ZM2HQs3UV@Gbf9v
zb}@w9i^s$?;Qv9cuHo3C%s{+VM)9|i-J2=TkQOB67vWWSXA5)dXi0*B@4y^~3|h!L
zq~~-({Ur`pzm5}E8oF8<e2dU2w$=ez)8fPh_h^L#-Pr))&8(&0=6fdPXU!&tgyTS>
z9%?>|GhbpPd%Q6)olC~%%yzo=5VR}(Tvc0!Y~JBp5H7C};+r(e8>=(U<?vc8owED;
zRvYT3$XY3Z03IzsVB0RXNkU&n2D8;=%dY)QC~XMpCk4#GZAF!$$8g|lv_e#A^$!Yv
zM67oGrRn=i(=9MDrd738w7EgWYWS!$z_rNy0M=n>EGC6^XaKMq#6~Tgb?=IH_db8v
zu!yudd{bolV3d><+vApjY78i%RB<Muqc(@gJ17Ut+~vb)ptPP<zTY)S{!etV4|Qwj
z(MX7ptbRGIlOpc1{?O&g<#i-o1RL@QI)NA%=cZKq-s{KXw|4o(d0!HGE&Kzn<vy`p
z{Rh<Z6#uYCddBa<yzz%=&m1=VIiJ`TAemfB+EygMzh_J-QV5ACp{~wKRWu%Jf6$kO
z7VkG_5FP6YN3sjQ?7h9`k5@o_w7VYXsMB)saJ5gk4dhHl$WdH8%@TKi;zrbK^aj!H
z$u^&Ct}ERQIhbxlkgpqxSyhq~YK++Tqtljc)Ez2b<xpZ|$UKjH&}WS&`qFlC;Hz3w
zlE7-OfSj+zoh7~}-kJX1>*uxE9WqENUs4O)s0&?I``}f^MS|>A12Xr)*>|v()&wl)
z&~Qk8By<X-VW?u4ctdtTg~@HyPgP5M^v}y63I0hecg;}lSZN#bU-?*_p+3zH{2hYi
zEa&g7I5)WLa~Je*xkAO=jy&4b6}5_u40|Nb*c#u?3AuAT1Kl5BlbUG;Ywz}~&jNwK
zY{bUGV4}Gb%SBn&N^RYMC-d~0uUnwJfqivZtdynf7JPxj#srN|Zx;UPiys4e)5&e@
z#w7L>((D@r!Mqk{n-a9YeV$o__~5GByM#RE*;IW)&I<{-V(lq0dGYWo=wcDWVhS6=
zT^RikUp`1)Hlq?2BHMaG`Y~OH@4fpfCb8SyodLO---9*8H*7yy148)%V4E?1OD3e<
z?H_gz%TRVy7-fEr?7dtuPQ00ReveXZZ{mWFk3VnDDB@1V6wG8wom<ZN%&0XMC6^lD
zsb`-nF2E}vD5`=c=+nr}kbTKP<D5I_ia~=RCh<<5a^KXXzG&an;Ca{iRSmIAn=+=?
zro(Z>^V8h~XtL3^)AMK@@MBEmNF;ED?PaK{zBw;_rhjgMEF$05)_L)&{_|JPTxw%z
zVer#35;rcU?$$vb>&IrFUQTf)vrpz`HY2@^r=#2CP#mz9A*ZByv)(S{<23y0i9(;L
zR$R2QeB;ic*+DImR2bi8x_-ba<zN;hWRjLbRd7&WaH6nfg@*7P=im!ct3oqo4N{Px
zT<eUjrGxxB`1i?Db2cgQ^mKH_-q~xtnQ;h#^Gj*1h=Ivqoq=1(&%tSiZ}yxYU@brD
zM7oa9*(23)0ZmZAxlGE6TMH%cQcAWCu%LMD-rh-haSl}`+*zrd$tkW^D^(1afL0o4
zJJNb^xmq3msOmuh<gK+L{w1R*Koo)-$~nvXt!0j%wX-`vR}=-rvk&2F{oqq5V-a^Y
zg(y=PIMW&U3c*s*PGVzzT4{q^z5e-W;OxW<nwG~b83$PWI13x>{HrKavHJ_fGSjDV
zrl-fYP^aZF>`@#0OW4P^z8mlR%zHEaE~7A}MP3{9^$%L-&6>i0!p%e9%saee=0(sd
zFB+$b*@_bd>=%zwvjED~<Zh~~y%!AIwVHLJ3U8W+Jwr8%5Z_6@N90YqvC>_YRob=p
zk}AcV6L>%Coq`1;pJ-o1#w^Ck<K3J7vDI{TR4FM+0wgG-QM>V)4f98MPhMtJ`cCo+
z|A<F7KJ$N~o`fcI%-y3R4oI8RN_Y%!3<+fFl1M-(grL0VPnxzLf;XiTuKl5~bk$-M
zFuV~v$-)+U`K_D_H;zXE;%X-m=lKb+xgNzN$cBEvZ9^@exvU4Kl1FV^^ti-G)%zlr
zk#SWMwXeU)pCmmt1EuG-esb8{eG|`@Zajpfwj^iX8q&MA?b_*_@KUyPMv|Fsv&@Fr
z2lU%MWh34ecSmufv}sSmK{4>wgj<3{t~KjD@I)Pf0v=NJNcb>M%5UNfv$Shn$+~O!
zHy7gW+~VeSk*-o_9Ofri=m9U{33iV5_Nl9OM~M&l9_tDW9eiaHGcbdh>_Uc{*N?I?
z5QZvGpc;+%R_=+eh&~H`N1>m{xLp|RtV#}oT1GZfg7RdZO!&@mq{U+5G8>w2HJ@dQ
z4$+nRDYqZ^6b@(-8UHCb`uuCg=$t}0q1!o5!oKeyDXT)_iJuJSYAf&>M~EF4Z0Xim
zJchUZbFjamp49JcR|MIq8qbpnw0{Y10+Zg9xoD*u@834$_uGA%Io%504j#<UtPhhJ
zIsBF<Q&Z6x65<c)sNOzw72l;*P-*;9`ElY>dK79^7C{@{ydH3CqUNmtNIfLbp%5)A
zUSQ;4HsnjYC9hy-e4|sxZ{8ND)RbVjZmz7Y`M9?{JxQwWk^(MWYOyz2qfcqGSQJg^
zmFDjYY@XOWt0&6gWR3)^Ek+TCQ`8uJu>ZcczHArSC1=b}whd_Oo!`6Mw7$O(JH1VY
za%fW_EC1N~uxqAJU9Q8~+JjlXy!u;5y?hn#r<_wXfDr34It~}(p(OEP^^*IKeIg-c
zv6=8jD=(wN`cwMAb(8j7#SYT|F_f<5X;tq|0%PQDtLsQAIY|qw9tv;ndZJ3*s6U*{
zN`RtbW@vX&A04}on_RyMdcD;rzlBW11{IRo+n3j~_V_LSmi*g#kqUh%f83e%c<6t0
zr13!B24RxOG|TWTud4=TnsE1A6gC+`-x9H*4`uy!Z#eo~svtj=QbYHL*Jd4Z$=Jqx
zp7MVE$jkyG_{zmpDc!{TpZ-Y<CeRFx_cr8z7B1?+;po3xU)$_|hwJaPXLLQ8snGs-
z#qXbi%i&)+4ElU|uxS<UNVZ}5)Vhr-DoA{{a4IrQo*}7OTFmR05Hkf3+q*8XaCXhk
z%X0~Pp#eq6_zJ6Yx33JUHNa0=)iOAYy{Ua;o)4NeQB9R~qwP8{p45?H!s<GP)wQmK
zEh{Y->*vkNa;agY0=`>+Ek7dswk?hBmxK5GW1`xxDGYc^?JeJHaI#WwI6z9KVOWnM
zmXP^yA%las`mz10qz)5uCi(+C#**@ksXK|vd0P)>sLxreyy=Lpf9kdpp5siQAL^hv
z3B*K|L<!X2A*{Csuhwul7u%)dxldm+7J8|FrK8;}N`(f=+$fH|3x2%QTDV@hQ?9i9
z#?9NEW|@;a&n;o{s;<2P-Hu<UKI9m-vAo~4yVaL6MPqkFyIfDu(!kIW7-@NNUJFO^
zV6U-V<2cYh*-Pn-`$tU;S6Lg~Y=+*!+AkGH@E(?Q*P8v|QOnjE%^y32h0CGiCl7x;
z+C#g(e6@92mVsc(7xjBCQ>@uk;L~jeffXjat2W%hoPo8Z;b~?;8B?zlPja>nh<(f7
zh{nckFN-I7q^6742`pW&F0b3wC>kt!fuqLnt`-qTcQO9gK8X+3g(g+MUPSr)1LwVG
zo^n8k@XzPZZ@5U$xS<FUR|y>6QYkD3#StXMGWaVk<d<wm^r45A4dA}Mt+_y$b!`Js
zwDlh5L_G$#!;WzuC;dgA!{tYC9Q0%n)lJjj6M)Xsp?!*ZcnLTJD5%~NvT^P@`;9r3
zF)Tu7r6~7=;1UaM$n~GF@S$){y4+7a0B4=c6lhD^)vLDKbj;;~7SjjsbP&zFIWf)n
zq7&o&L%>Uao1Z4I`3G(qzk4mQAIzTx+eE1p-J>kn5kkt2X5}ZrkoKR2%NAI{$a%8j
ziwZ9n`-ldSM+arnH{NulMWg&V?wM}RbN&>@Roy>b;huRdRWi1k5ec~Lb!qo}K^p?y
zF2l6%&Q13=lDAo=n)y7u;HS{+2)pP9m{%$WIJN5we1iIP?0Ekl=?fcO)>eD26NNCr
z&KgUg)e-dJn_8|v-GNT+M%Xpo<}^8VxonzmWbC-P`$1zzw;U68im{*j{se+xH~gyy
z-G0KaLX(ZV=BS5#Sa#Bz$M!d;Zgd2E;~TS&g2oTJhB_(ZbP5&`0SE_W84j+_spK`w
z6^uAenj4%ZyFN}P6?=yzHx#KlJXKMJ42CEIE#I`NsU5YP?OT))xwp9j`bsHwu^wST
zZNfU;bqeKQ<_aGEW$Ir&#QDS8f2?->O_yDA%kZUS<o!)6*T6~!s-{Ms24Yf!&%0@~
z&teXsg6osoQGT)(ijh;*87E;WK8zw_5C^Bvhv0@<*(-+Kz=|lH3ZF)?L#+Cx!!Xin
z3%j?+rPi|mGN{$&rq{Y_;ukW+ZTB;{_uJt>h^+8=7p{*T=LM)L2>^C$_4ne{n&sn6
z`}uzRsfWYcz`+^n(>362k$wDx&rr@IqSv~rAQi)B4yG24*c<e#s!~1X&z3pvnvb3n
z%pg;yW2ZSqVzgwUttp9h;Vf@8VeE<u&o1ev*BAUg1D1&uN-S<d^eMuZ)BQJGriY>y
zOYiypV8!CMOAolLfk&@QxLJ-%vHtLlsm;L76NrE=SQ`rg6Uw_gmd#l3(0rePF{tN?
zPv3pick^cni77j|+PGCiJ}U<AvTZ{BAeN^uRQb_om_(?|=&B^HnQd?N`0;%D$tp9d
zubQiBgNsVjG(52=@E>Lv^W&y-Y0Z)I_`5ObP@I$+H+py|^^`u6I}IL$UZe%NKQ@U{
z{e&!yz8(I&iWKD$WdWe0qao8wsan&2yOqA`zc0Qnnxe=_;yGTlyLi0MWN-b#cdE-}
zW`~UY#{Sn=Ch0rks-3Af@)<eVo3w(R7at4A7gM;3l34Y;W4v)#(Fb>ox3{EKOdxM~
zD4TuWH7kAJNrfyPnu{WBexXxQm$;+&WPfD(^&gP??`oABlf%d5*Oc$v2R0qQmR`mK
zh=zQ<j3wzv1r#SYCY8E_x4RsT&@|z;L0Ivvil$JR9;8|_WFmlG?Q;g`x(xl@!RN~$
zAQ`qtR?=vSNIK+8Ca^Qaf0h?De+3#Y<8(w3QM9mAF0SkSFbAkX2j?GB^^z=^wSl=d
z>;Zg%>i<=|HTymDW&w9Gu70-3Nw%;hFGPbJxr7u{9)Qwc|EeU1NkOdNv`~QTxp?1&
zW@38Hd6~q{_ZQgUd4QtzC@-J9j%$)Tq31SnV$Pnrj3?@Tvcbhp;?`Jzw#-p@dE_!F
zBvLaBTxc5FHQ9aplg*tYUflBQwESV(8sLI{De(>WHE_+4C|FQi=^-;q;GpYskIZ?5
z0ST!4EpFP3CUr$q$79`}z#*B7;xD_jO%r7ifV}=V1B(Qn5o9TW^x^DWJonXgWd*>Y
ze`K(s)s$3nCfEAB!{l;<n8#x$2w#+IaY#t6*8-XTOxeooRu07u?=iiiiFLHkd+5vL
z5w{8KB>~9J(u#w_hllNK#V*WBKp{WY_yOYVmhh+OozP-zP1KXZ2((w+gVtE{v<FN{
zb@Y^zQKKyxG74-VfwKX^Y-gl)p`6`?IuU_`8n51>3<VEmjln$+XqQYIxdA%Xeq>gk
zo*vX>o^GRQGe8}E-Xupnz+P?TMGgY%|62Lj^L6}KURPU(7USUAj>L{vcX(#~@pB%h
zVfW)P42vYXC+gY5@oncAy!=~liKMz|cAbFx1J7a?2m9HA_2`!b&V5O+Ue8yEty^;0
z%WP-dke7AVEyYV^iL1tb&!6RMbXiCvVS}}D0d13{W(2aVP*W3nC}@jbe;o0#H9aDk
zET>oKI<Q%*HE^bbb$@w3tS6=UA_1{40}--(xRKT3skqh=c`v!12e8j8hKdHe6%~ry
zObO=$=Jxvpng@;wmTFn5#FFHdp1kby7cyO6-d-ms>Ac+AINrAsR5+fQ$mLUXY0K0W
z>l0ouXpon95{Iwb-$^|p+AyU`NXq9iLi~Qau*Xaku=C!}6x$7El$dcv012^>%l7cr
zRK~b2CbNCTI}y324k$8QQF*ea=%cJdsqN_167B<`!(E5+f=J(RO1&E8{FSyF2A8Og
zj`e*8foS325tRGZpFe(@>Hp6H$XwR-V9>ftt|KO$ZRP;o-VE33@|<;Z$?M&&-rjLI
zLVf(LQg6o!{pp5abPv-hH0zoJ2euvsfLMzW19cuJepPc;ncK%WwSzqGF6^WMX=wNy
z?9CH#@t^(8eDvt|kHaOYo;(zXXI7ry^mz%IU=E`wQuy7uNrVCQ1c@UpT>c;BdPZd5
zC4Zj!-a_zU1m^v48erMtwrdOeqK%>GVmIw;x2ZJu2!Hd9g|W3%-`#V6o7L3}M2kIl
zPdlgFt5ZA1{R=mYbiC&9V|?<PwG`m(kd|01kI08>sBZ$)@7Q%?-CYb3p!|561UqpD
zd?NMT1f`aDFwVgGo4<I$-Cy1&U$?f*{R828H-sxCJKm$-Uizay6**E!vO$7IY*gpz
zkNXae2%m7TeRT(_9mOdOim$Plyte4u<=xk$QJQM9$d9x6%WR<s4sv^@M~p1i|9mi?
zYP{H#3c|Fud&I-5;mdf0-$l7O<9>^kRqLQg-M7L&ZYJDc8`<1musMi-f;&J{G2Gqq
zz--S76P~z4M6-Rs6~lhIzM)o3bCmtnsa8^Aj;~qf{EzmrvBTTPcz2tTr8Mc)dQKhp
ze?C+1)tfXe<`9xYY^36(z}62C-4%DQBBB^Yko4o}8NdIJDF5?4lE`D<^=Fx4cf#vd
zCYl~3aIS18L4}MBYsiQ|XPHM?d22iuTyh=2k^RX<qKbBo`>|_2co!CWa36LL8*Rsx
zKh|UR8{V#oUlWhQjUy<0kLYe86_qWa9b}`kVRfKJB#8FfC>-9Vt>fbMLsUUDZ~H0Y
zgpbCOuq#;Mi7V7KPI~y=i~l{@{jt+MZe`EpxH7Rtyzd54U}*lQuf1aK>LYY;=;#21
zDD*F&vcV^7FtP=Sqbl-B+?b`V4bH5}@cUlOW{%w!g!t)qq*arH%KxcCMu2_W%4z@D
z8tgZ~6FrrlJnLn=TK&XsAC3QEeZy^E`2(YT@U%{a;(SsswwQb1K6t1pr@&ukknV!7
zH6BpDkJ8ug_Y)p7vhpJN{{@KmUYULu<Z{)vJ{j!pZI9#kpGP<y=fHQOgC9%aQ~4%U
zQE`!KaX}8yIK1ZA9Q2FZJrl*JeJgj4Z-o>Xcx5f1_A6OFgKw@WtCot-@g9|`2sFF5
zvGdt$#!a}7^J<dMR_BF2agrPo4Bv`Kk{p)<?Tst*u+@nX`imgOUR#tdTkH0dfLX(I
zA;$f8_e<DZ$D^MRdl`IpD`gNM4wo0mzsOUnU_n9h_t~*201lnG0E2t^;0<>GEECQp
z8pa2bUne<^pj1^RwXFj2pspDRB?iguA@<591>GfE!6$jBx7pC!t9h37r~cXF!gHzF
z8`+Er7@U95nP=Ga@BUO}9cVwEsDd2S86SehtdM`Gwp3Dj#o0)@U(CD{)b^u^Vw2+O
zRfS5LM6ZwHNn$ZDTc4C1-beTE;&HAx1Gp+{_C?w#%i%L^2(=6(4mK#k#tE7R%TqET
zOlTO#_)|b=i0#3_5mPN!v$1~ePF3U+J65xK9}eI18*}Xb($rrM+GBlLNpVBXbsF;;
z*h^e`AswkBP0Eb$ZLb)fK0~%ydQAqmL#&e3wXbwO(MfaEe)7n2rjOc&({0z+h2~1l
zhAr~j`D23TA{V6qO63(j;Hg$CbkTS8bBwK4!^?_;xWxvD$O65$k%^J*QhBe726p>I
z*TT3CDFJvo+uGj(iec$%OMEpV$7_-0;ElY1{wZ~>7;+~kWUf@>*80&bs5;M-Sq`^m
z6whHoMO~l36yZX4#4hxd-Z3kEs^wQ@vYn%2kIxf*4(1w{0Di@m2?9>+O>1T*h?<DM
zBQl2P-pSm(+eK^j2S1Wom)s^jnBing^eJsBG8;YSsR$|nnQtq0oS;fbuHZO%9Uo;C
ztsr+{v(7CLY}C_%IeaUHb=3M9ut0yW)(^`~Q92$Rg{8ckO&u^eU#q6qc<suMhY<HV
zR8f$)!gDN35=y<w(FlO<f8ZH)oI-65jkZqiHwH}bLUJvZZJGQT)a?6yR#_qX&{yY?
zDb4V@vO(MmC?RFJJVZoh?d$`azCAs&oeGtC8+UaZ&BR!59Kll(p%ryn2I$u%s!J6%
zFrv~hY*AP>l3Q+`1d;AeN}nDXfd<?eI8yUeQ2&lK4PVBw!>A$!K*r!KYo=va%Oc#5
z%5g)o`^Cw)1u?l4!;%Q49csugh}0Iv=dTnsn`9UGVc4(|tB8BjQ1nq?#QmKWr{p?-
z@_p=_WFpmrl4u9kkA5a?_#og3P7&kak*`sE-~-{s$ia0q8O)^RP&Rwv+)zQC?44sM
zPr!T@5Nsc3h~An|vyPKlI7qGkVskL^Xx*SKE+0nMfQqs%q3t0jI4-0ttz5xf-_@8n
zaQc&svq8^)WhA|L!b)9NT}K9+65=*1){%DA&nuc=Grk+hw=9}6+ghU}R7vZw>!A(f
znl^BR&81MT=mvaF&~|YxB)A4G7}dyJ@^@1`9X=uG(=r{*jhYKLQBpMKYGl`g&#n+V
zX&b%O12fXWx$7%LAqjm-H*RMEH7~2^qvz|MGK+h$-ODBUr?)+RC2#1OSB+K4jF5cR
zs<;SN#5G^hpdVPzz#sV;rbs~_HlLyaVO17yKGruiqPPys2yz4}Ws{J?@-l9qcEC)@
zG2Rc~A@6p}SLCIOsej!*nz2q>Z1PHhqhKzjq;Gd`{;;WoEzQ_SmvFKhArHK}(a_U!
zy-XbE5R08?G8)CUnp}5ENn1=JcnH5!`k^FyrW+@XNH|`+P=T|PCg7R!BfrZDu5Ke@
zk`A&&Zjl2MxU2a<;_e91<oDvWe^iy4j0eo3U5CT(+WPbfcV@F&nHr)NrlafApJI#;
zuXTw+7Uc+^1f3{fZ`#0m{?dt%*HrIh!Nn1_i!4y&*<n+VunlDLr-|l!mdd549{i?t
z@L}wb2Ylk9Fc@JNdYkK0aij73)o&BL7!j`BH{f55U3Ep3%U@7ig7-1fv)Vr}C)!_M
zuLUjdl=6E0^zLca*z*a3N{sxxr~|@SeW9VgXA(xd@m#KtwxuLF97|n~z-<KwALuJS
zh=Cy45TU&A-z?fgLVFOze#_E;#+q*22X*XMmGNcOP5WI;hqNFS?{?9x=&wxOWfZSB
z8*CJm%?U0OvJ5<$`05($2-+X*fabs*yYmk(BdylVhV3+G$@Q%zy{Rf%AOtLq#qS=l
zd}uy#HD7T<sUxg1$ihA?zW5YG-(x@LA#`!f3p~X9%1mNF{lI`wG~uMlMI!3ubRMyn
zkDsOJM-#P`1f_S?XGr6Wi2!vS05L+0b@qrOb(ERvkAg1dc~4!?9^vBCKh`MwnM<@t
z%pNlYJ2PVfi2&=WipW1?sSf{*Fzx%T!TsE?=<oUiuLyMTEFcm2Pd{pNRkmvZ6US3K
zHiCI{PAgv?yVuh<k3Hciu3bcBPS`3ZuYn`|(r!yEBXSOK7+Eqd;3=_=87(Dp&!g<H
zX8VprI?W{Xp>y^FIRZrQ0xnp*yL%oUjcmTAX+Wo@sB5Hyc=%gnIf8hd1b4kW7wfc-
z*dlG^>jC$dWPV9x{F+4-@wU0*8uL!~mZb}8HdOY$j7u~WAO6jl0`aZfMB*@aw?EO>
z;K=V)m6z(h1JZitwU~iDLr1wwES7SJ7tQh<c;vQ-|FX**d57e*!Ae7loTpcz5QEbp
za2ZQ)$um*JFT^4nKKks?V?d4y<&i7Ayz~~5F@*GUsOcA*{qii`Ra;|^KB0JZ!#IJ$
zT3D);%IXl77mGsJfDjkxwMhQM&ColMKwj|^Jnf&QK+{9d=11h9xXEsU>fN*59MfyJ
z1o*D3XrBRpRskrU;s$P-zD7F>Ak|ElQdGXuePJQ(3Hx~Xef4*;UAa9)ZKyO)(>N2m
z1H{yuK1p(i9!P><T6dBqdJ*Q!r^YfU?=r!`w7vZp6g2^pXbab7i)b))SzR#iw9Cj^
zI}}1Cu8*MjDrsb3N`sgAxAUe2Q1Vm|^Dg$8-BGabTw|2Qx8;iKMf9go!2ivr4_+IM
zpI>6S*zD;Db4&AnNOk)TKDG|rY9|3~&RSjJ{xZR5aO<s<?e0)+Li8-zDnDd><8rFf
zjc(-BbIr-%OS~mdwi3bi5JsgBu7cJ($3%eBxzgsW-3*D2;0T*2M$K#SF+OHx^HD|{
z9g<cpwb?CERFS`(4*ym8c{mX0r!T<R%k*=vEW++55R#OBQ9$hzQ}#~P=$aj;eM6{Y
zcU#>4?=^z!gM-(rG4(&CuVd>SU7I+nrn>`i$o19?MdNqUjUU&N>_Hopz>)Wt2zh3%
z(GJL-YZbB*>#ewJoV$?x9OU^kKGxQEQOF)V<eJR9XlU%ToC-kh%~XuBKR8hjoVj6>
z(q+d$#$gBJPxdfRZ6nb5VJbXpJST2$&uKdahE&NDR29d$Q2G4%57J9c$Zy-fp$jzp
zh<tkW4K~_m#$&4np8k_b+pyK~+A<ZtUpl+YEa{U51<buR8XtMZwT#e+>py3e^b5jC
zrU)pTtu%rA6B6ldT$3fzY`mC)<50n8r(;u<Eo7^1wR^F5WRFoQ%ssk;ft0D>6(XiX
zg3asyo%7%o!E0FJ&&4^(6eHzH{>omvA7SHe?;pOTMq6~tDgfs+`7wUj4DbRc6g$#C
zuC9H3V9F(F8KCCTPn;tWEj=YwWn<gNO?_SO;Un}`xTG%XdQmHR(tyGB<955fhLPR~
zXkz6J1EE?bsi%oorlkPP`{}GSY0>LjH4*p+2}5CQ4m9z;86mTB0(Cr1#bd9<vOOa{
zF)DRMVzAm%@XrKgry3uG+erziaFhWTPi-MgG;bL%dAl73xjFc1i_G0$r&KhRZu0rK
z^3%Q%*Z4Z6`H}ngdcGgGeLlkvEsME&g8#DqW?YDK_d0zIubC#$vqrBkpilkl7D5&@
z++=bq+LVe{MV=!A$^0RaXL5_c=bwD>YzP}Hp1tMmg!ZX^rnbIvMyA{Jgt@vI)oMX_
z*j+2duDcJI&MJPk>M%(X-G1toK;?bY15z0SQcTLmYDvp#CST~kQ)IbsQ*6r*-wwi>
zMENA&Pi5fU3jGfL=YSpnbkcsUq8;&>{I+^EW_FM|DO%>Zp0nyo&3PKCH|SVDIwt#a
z<M^rM%eg622igcXa;+98`%MUQt1@P-r~(tHV5-U~r_OK3^e5;z3W?`7^Pv2-CUG@l
zikO)kF9BHjC!XuLVTjbXjMEEwX9B*!Y%!ghw~+}?M<Gc>V31n{mmg5VGb)5I_K?x*
zgU#C7tPKmXmhBbhV{f#VcfQX^E@oh4V1Zdwn#`9C*)LV<B*iTeimVPa?<SseFl2XF
zEHpDw?kaXAZ@4=+lD|#DcG@_XJo@BA2wEoD?0n+1_KS5%@^0YcipH)~zS$Q#EPW>j
z;J^c>YP#^{NPA_OVOlPSS4xX^M9c1SXZAwu!5dABa5@jo-~7f&3q~U1xjW(XXrKJi
z4ET+aY`vKZfZ~oXjMbq$W3O=l246f?eTTu=lldw-<W7Z*D_~xZo6NHD#Eok7<YWv3
z&LGC9Q7-&c>FWXfF_Plzj}ZXJPWKn=Ryp`$pSr#&;zEDC2@DX_VGEAgn-P9P&H-MR
z8VJ`mSaxM<-Y@i+;-JOBeG<RdBBT)N;L5I&b<s`OtddI$m9C@R5V-cNalK<0h0aZC
zG>$j1k^5YG%UK+ADCg?#1(}QodUO)rwpXIpyc1Lc%z#f4<AZtx9#D<c86=ImKhK!q
zsB0)?@8bqc2A{Z<8u4(I>Gh(e+V$$?BV%<S7K?@wtAl@r?K1P~+v>{hK;kt7A=7HL
zT|^J~<{GpY*0EXlhxuTw=J2_veX-$C`aHsOYdoEurmZuuQY<hcdRE0NGvntF5EY*l
zi;ko!kXwbt6Kwu{a(thB;(BcIuRoYnZu)jTvZ#kXGuk?RzZr@0M_bb#{7gIgfL$Ys
zjJGRdxn5^g!{#CYkW|G~tC09u_6dhAF>E73U_O?yuHiU3vCb-Wbezxr4}Mi`QLkO?
z(Om2oCKtPbs|#HJ&~&KY;3kKam=znsx6bJU-o!K;d7Rv5TvjMwQ9Fa$q#)2j7`Hlw
z0&}gRK<FW2N_FahU&iql22Y_)mr+ch{A`W)|K}p!SFk3eE8vb0CRxddY$DyYH~ZIU
zwGI`tog~#+Gi;4Jc`Esy6G%~9Q>bORjuTc7_wd103T-8<oR%B1`9f;`6ehve5D$3e
zj}(vlU6swLxL5G<`(~tmy`!_^;&&SC?KI+D(>pGJK@!xpYbz-pIE2n@liy--=47uI
zA?Kw6<`aZnYce1az6>ua`}8F{$$9Tm)DZXK#&mT%c>bx)uzT{G!m40F-2UopUQT&o
zxb?mA=}{|xa!7TH^e%{FMtHx4RNdQj3j=$kQzgqaPEpn?gjld#J;WfwU`2sPYANYL
zO=$`$eoyc3mS+#!^1q@vJFE9&@%i^*>U=h_&Qh=7){i6j(>nY<60nOO-i%-_liQe4
z4XIIofZN*(^_Q{J*%nes66&tNU(TM$jUQ2uDg_kt`~79~JVy8by0I>HeQdPZUH<rK
zZ8d{I)Y$HyX=2-aHVp0>Utc^4>euy{qA<4rM-_UxegGzofnG=dg;hii4%)(~-$hmA
zp919Y<t5@u+%byB0}}tUwWx>MpJpEI9)tD%_wt7)?tVT(UpK)6333jH66O=`W_1Z8
z2Y#kr_2FMSea>WlJ*DKYX~Eg=_t{h5H_FKCv0Se7z~Fx}FXC99^?trZJRFVRk2nK(
z8r|<JB8pr4={M9k78$OgwR-{$ZPwf-LvZk0gF?eQmtXfyrTDyVvdk(?hm_4JA!Zu#
zI?@3K&}}mN^nXF@|C*kOR&k5mY?M=<0auKxyKT5}8?$2J0sZp*Q*7kv7{C$F1mTBe
zO7HOdZGigDCRMutnBX4Rtn;yYQ5ROYu`FNR_doJO6Ub^eh&Xe25TWru=eMQa8U%8A
zNu}bj=+K9VC<D#F+idQxHhUOdQ8tmk0rFG97ft?AkceruZr0t34A*IEbdI0kXwIVf
zeRdD;w_dvf?AlHb^b*5(LtoG9@H#ggBq*rX$Ia30&FdTijYvKF;qL(R3lyYTX!vkQ
zj_7@AyI>F*)dMPCZlKbQe4#i|scK<YCx>zz4eMJlz$*-Lo)z0|r56?vLcswpY0M9*
zIuxdy^6JZ&@vpP1DpzIs<hr(hE7lJ?HM}eK3Hxa3OKdt%``PQC-nS`mU2D7|Q6ECp
z`&DiBnjB&Q{)b7WY)DUYH`)to9B_YhCHlIIS;GRQ)P`gPKR;jSP;3{RGh3(mB5oYm
z>ujo0Zm|CqzrN}E^;tk~HWbYINdSmaTKP#Z+LXNd2yUBdMIg^;n5o~MS5<0Wv3%hb
z5Mdx_iOVPsIFT@~+Z%y9)M3+71rNP6f!PArHo!++&xDCm-mD6Dx_)2qWNchVJ-=D?
zR!c(8qp|g2?ktIzLu}-~jjZ%P7nN*{E5oRuIEhTP4yx}fF|24C5$rpeJc@ND`QrxH
zXkyL%t)fG#V3^IYcf;NzCN9z$|6RwR1;_y#DbD^ETrU&6Bt||%u<ayw^Fw!zYu*3l
z?3kOEX%mnGqV1K;VcFATCwvIK{Vq;|Ov@JZ6iOd}E4(zA3HOA(Kr4S<o(pg+ak``C
zGGR6xkC|urh`Bk`Q5^SCT6ddZ!>o0axxilP5Z4cfwa@4mKjhgK`nKmY{M2t#!5PF&
zP)%2kW!0rN6HldxU9auhHcp$6Xp92B>yQA}lW|p{JSjvOx^&gg;uSMcw~$CUKE7o&
zc60k-R+m<xL96jyM{NKOwHCZ;c<>_zNHC4sv<>I}ervA2=stG7W!1=b4fBmK?6Yk5
z^4MFI8}m<%WrwZ<=K4nJEiNJE-<%|tqZ^sKPCNu98M`!^eteoEOBBxx%Z5GKciqD6
za)0tQZE-dXpU5Z}xM;&T6@x>~RoE21Tln1`r38(ezl?uiUUmc1oaZ0GR8-X^{)!H@
zJ`1*j%<vK%1z&@JeuW->v0Gdo$U*}v0XM*UuxZF?aL-mxRxz8qDCt+F5kLQD-v5?6
zNqZg))cf~@YWCS1t|Y;<$3cz~jVW@R%Uag2g7qYhlnW2i(}XR!emQ*RZ&d14NuK*`
zPmXMa^8x5_&Hrrqxlt;y1Ly_M#l%LPC+)KgB;N0Zwbhe(D_hUIC6n}0=hxe-nfd6~
zL}838e_DI|_3-%1*F~W*9EC01l(OO`Wv$8HIsJ`Bo8mRWY#gBGos_jtnT%1nHlDh{
zPihxP?lo`^L?Z;5Px4~By#*}jF!t-MySO~HnZAq}C4tKE*R_J$#XiVm`C%O4A6dq~
z3+$Ju7M?P#BOiNAnJUXt^%)nf8M0RXVL+<Vo^H-nKiKWCo?kX>ySVlWX@F{Dlj2i*
z)0>cSfIZ}}S_*GJ0EoEDU1x(J^Pg{CW$?-UkXiihCOc=*eNP>ON01`kapzwESu1Em
z8+NtuOAbX!QQ_q`ar)>;PogO2h*4p*$$E&NU*}xq>YH-yt-o>BT=luIMU;waEh91?
z68=yHH8b`@?*xhr-3>0jM#-HwZ7CI=F`CK9d5?5|eh!H$j4RJlkC#!IDG7YGczyE<
z)DKyjIKtrqzJ8pD_d^{9k34SK#IJwlXePOgMq`~z*94@rL|D1E0=J2ryaVRP&&Gj-
zI-N3C>MCtgc(;8+;V+)-dQR=$EWRTMFpAnrt8229%j4oDOMqj+C8cUrFa{k7^7)wx
zue9x6a?u`8{V^E5eFZXc^N1^Xr~M$d>$b5~@_^^^P2HW31=1ee5ZUL+?$A1>djR5>
zg9n$XX<>l0TYY?lQv6{4cdJKdPCxsPbnOd$?H+Iak0O3#GVA5XH9ez-Qjd0pMOByL
zjo!d@6lyWs(O$;i<Ex5ld@mAq71ciB!HtEg-2*D+6g0V|ZkB;iw=sjHeo<Jff+Y^F
z)7##6I~MI^93|CURm&Z`?bk5+^W|a?>Q7Hm_vc~M;{fhj^LSi-x7hwTD1KcURn-r@
z9~g)A;oeCt1}oO9{rXd<{M=mPx@qre+gHyVRL5hHtSizsl}H6U?q~{T2;OvJEuvoG
zgK(MVZTPz`{sIHetFAi;jNQ%GY+8j|9^N1QzkV+k`r;(u{1KfuZxHW__7v@H3qKf<
z`Z(N$7<9aHszS>?k9B*~{bdf0o_Xj0h-iOIf81U7yFWn?3x`9&4<ti@LnKz9`goGL
zv)4}_CK!AFt~>s!g{Zro`un*KUNS%gicXte8zTyaba=?#x4s%bv$4){`7?T@@BZ_8
zSy_DP<?QW<e}UVaQRBsJS_SyZKlLVf55Yw%sz-iCOa&E)8(E8FIumL`^gm)bP(Hwl
zq2hPK{uxp2mwNm4`!?4i`$TSCfa3;S+lV44o6m2kGq2CeE6%~;v&=M)OGdi@R~gDL
zfSACyBU$+8><H>^eVq42JNOCyB#^a&%kzUm^rkTQ@GqZ7-wyWkC@6O`>?+>9a)JHv
zu6oHSYKcB5GwYp0x1Wo;o)}l;lcRWZt1y3{pgg3$+rBJxWagLo0XEqKz2qX_9ShUO
zic@wnzU}%FJ77|^_9d`adefu*aycB`@G;}aqug*9C>c3<(#%F4ud6na$I9P4mLJ)G
z%?uDv*G#w2`@94dFX(wYQU$w)7`l=!#MA5$$4%tg>U-lU^D2Jc`$ePu%{*YX|5E}6
zix-@;(^!x)_XB|fXEO21V}RFtp{xfuQDevYF`=>*`<L#!+_Z#ya9{G4x~6}SwIiyF
zNwnWP9VlOS%a|~(JdWU(GYJ^-`I6H|UA<&hY}b**P8J(WE){k?PX}OVBd>gGe+nJa
z(fFB3-kTDU+&(uwZuYii%;@LP_#>OU82=*y$BP7#j>@3=yl4A9)<x%)K`p`AMYN8Y
zE5=HH;v$z-KL=ckzawJ>pT#~;YA>>q;Fnx8BUXOFKgdPm|FNUk<dmR}$^WX`e`BZE
zu;uB>@cI-+LSb)kD|TM~h;4Y=nW-y~h~<xK?kPNuitSau19483Z7G!#>~;*rNKm%6
z!d31!h;Bbli!xOxyhG+49XX%C-hC{018GUK!r21_9(X>1jsrVVA#$I4+om7u%N-tX
zlCHMv{jE0^TE`!eftioN_uaWQhk>ya1WYP8!x;JXruV7t{v({7^c~~0CvIb0wAXfj
zjz4Xtl0fDw94^c41S~4|ZqTUj(}0p|TCo!r_lWTKxxBP_+|2x1U_u2H1^%JR50lsR
zi8$@y?tZcEf${%*tFYs1?WgI}WwrX(S1A6LlUc;}kwz&MGW0?^?m(dT7qyss47-n>
zzqUl}LxCD-i#)?!|AC<8uV`F+QdOA=B(ChS4;P_0$JtRlHzoz$^hejnN`K6K!7
zp(43ccbOyB!hdBPb`P_ex6WFrGZwD$MRuBxe?S|yjs7KMCwH3<;ST_hjAtE4OqF?t
z?*A0+DL32=Irfe`T=?}GELI-W#6#!&U>KKuItavb1z5_%w%+63-E*Hzn>;7BWRp*^
zYb(Gwj73nL<Q;B;+YtghwtGG!jXh59g1-y3PmSy$=`a^!M&IS(4%h#k91l)p=cL+P
zXD8f0p#ei9&JKh<kdR&qJpI1_HMLO~ro+{K1|C1I4U|q<q}<ILL)z*I2u|`J|8kuC
zdK_&}_se9(-!3NGb)&Z9G7~m+o0?LmOl>uK-Bn{i;O)as!6dDQ4vEtUXx@N@d5s5g
zh;kVW1q}zl6RoSxVz0ZdZ_@28M>b1NFjrQ&YjJKN;DcvZWB!-F#SyFGSLrl;nV9*Q
zBZtE5l}E}wUkgRL%hx{bbD@1&+z#-1vF}`zLUvANKq&AB)5<;j%q~|nhU+FhF=177
z|M@vW=(vRUcp#(3kgYME^=*b<O=F#1%+W5clOj)z<VdtfqC}nmFfM0{HsVo5HmyK9
z*oLpnlbI)oE=c2+8O<T<&OYcX>lNJLlMDruA(y_jcxY2^2(ql|_qfr;;cQJ+Ea`1-
zvGT@Jpjb&~l*(441g^*322P@1vEDDk5EZ&n!sgVZTT8kOZZBfT7t<rE`m6I<pFav*
z`-Xbc9M+9*8m_isxyfkYiY*%I9C}Z_v5Jgy_>t#t_#ozR@;>cBFSvO&kVzxV47fm(
zx+CB!FL3eWPm_xG#bFT}3%o94W><b6X2tjS=M~voC}(x>@5p6ZUZC@Xe<qLAl6n1r
ziDmQiN3>V8bE73Za@Ud%9kLFt6Y0HFAv0MLJ*gp56Fvs!|2Lz+#6Ip*$!C(>$4$@l
zzM6s8NeRZPD9A;+j@R1pehwO{Jx%_LgGPdVz+?yZ62M&hCAMOO%g9&dJPu)bIozku
z7kY5fN4noMv{ESbnEor)GpZ+LJA5^*?v)@2e?h>FaIxo569&4h_~KuZP@TbrxA>zt
z*A|vq=-6UW$}ou?FyY%g2B70icY>K*-s<^dIgY&pOEMT+tZj~S)FZ+_oBN>YnHuiX
z3iZd;3&Mh(tJx=Y3Oq*WRJA7Qh248flIC^60@w*#@YHJ6010yr;8G!^!LDl)c~^1)
z+cM4;H!ob&Vc5sM{+N7PvW4<3N3w&&Mi|EG|CMn5@l5A&9H$*|=QK@rh|c=4{Fuz|
zA6e04ex##Q5wTpmEeXx$N5fUuwo(|H&Cg`$I#x#^LynyM*1}GHtjo_e!pO2*?)%;S
z{rNoJpU3<CdA~oOKi<#Rd$W<Jo;QsGFnGw@$W9Ue!;sP5;x@fvvKB3qw(u@EC#urz
zSrzQ9*UV?bo_?S6kN|U~zCrqvL<*=F_HPGvvy0=qXyx+y>DdXoIevSO&@ID8yoe5a
z9>BAIUjoqXuT7TD%0BM+@NFO!MZoxXmZ{7>%p9sRc&c`P@TgJ((Y5AB54CH>kC>df
zvCxZni_#M<(cn$BynV{3a8*jDr0@q5sD9g(r)e;9NDe>n*-o?2yg3RZ=~xqxc!+oG
zWZ!W^V59|4P;S8_Qsstcg2x&Qe0;}A)T%CIJ?vfIKxMSDB~yRCmBzgXjUO(!m;`L&
zTN+IT%vomf0~1-mb=7<h5FP4<4!ub7%D#-nj-mfrn2rgtY0puwU#+=J>bw_)wOG?Q
zi%((mLtZkGN)17}*NpUWsr{!t3uSSnfOMax`caLm72L(wp_C8>_tsW0gU7506DL-;
z$cGE!{0;WY=TaWn@1TzVvfa-L;1#kdK*K_b=@{OhB8y<Lh>Zu3uV6_5UmqI6TJ)(L
zhacYNLh20}TwdvBgVq}0R$;u)oJ&$sNItEm4+zVa^AyQhu|#XhwWr#><A$6#0W?9B
z0tR7|mn`w99NBNwuz2^)ux?#l`06WN^3@e<^1M;Ql~BdySepbe&g0JE6E6DHzqP~K
zJAZ~N^&>xINA?UCAF%E)#-|KNi5}AD7F(4V&$u@OcHnd+FvNfdBO6yX1-d9|zn7!a
zvMI*lXG<s3-|W@~Yf;k2Nz~vz*%&h+B-mV7Pn7EY+-w{_NXMj|E~q3ICsvicG*=$Y
z-BT=>E2P042+Jq$j-xB2nUk-t&DgW+BF7-j^vy>`zp;Rp0nT9!O}Q}8Ywd<P)*Liz
zV8hjH*p`z_y|cbfkeZ3xEAb<27sYbk!d{?OSeJg~6d4csorEKcuWOH5>2_1@9w$au
z_vw%GUztUMB@(y0@U{)<pJ#}TpvuPEj_Z}iT~sV*sYv_kp?if5UC>aPlMm1U;3YF!
zG|2AhhADgvvMhCMX(i57pf*|GM%C)vn<foRCsypMlXT=}fmib;ci6AiKUMFX`^mvp
zGG#hz9eVLpQ%>>+RUpSst=q0+#)B$)8@yvdH3v>0>~wTkb(wmK^20>_El5VToasS&
zy5cc5A?g4MhvkK;mLrkxYxSF8fEVdS&mZ}yN10cZ_||#*ihe(6(a2c6^}ik1#le;c
zPc8D?j=>|6YmvS;J)FOxZA-`?&|4S^=XkKegcEG(8F)KQA04?hw>gzh5juCd>Y54k
z@F`p2opK4ZKIMQbfcYSVMQDQkd?F;nNVwVQkpH4{_FkJk6WzXHEYx(7qXp8P@Ue;E
zk(qp4zS86DwE<h5{T^)-mfpyDUgXK<>+7y}%rTcvFMm-=PFQO^w~DTqc&6lER$#2m
ziMXCv$}aAAki^JO_y)N*Koz_;d#_%lEy{In;}63EVqGR8m!lZeSwIEBR5OB;EX9RZ
zkE+HXMd+~aLk|L64?@U)m>o+Uh!H!3CYPN8`|Fxa0qLQd@2BV68Z$I!fGpf&2kLD|
z{dNx*BZCWQ+QCW9P_+%~KD4ULsa*6$XGxXt9WAxHgK0p_&3QtOuP7_PVuPzQBqdHi
ztvseEW)7~Z)kG~oo@Yq@HHaV89T8#(YjQMU|Asou5veQs#*>pqk8O*}R*JQrL|&7W
z4QeBLZ=zv`ChmXvh$(X-J=Q`~9)}Yco*Z#RL*b-J`4<`beOVj`=i&rwXk>dtkCj;8
zz`AM_O-RhEu&wb)vt6`W`6G8_vHz7tYz3I7IPq=t$Akl<TBM^p91`W&62esLxnfpf
zRO@+JxxOp(908ijzR+!3MP^(G@lffN=>f{WTUq^Xj3}tS7Wj2YO5?6{?_)F$xtL}R
zWY#g6tAWnu38R#6J$QS-M;#aD!O?FGzVF4ugC!|E<u3~YaC*D}XbR)9)mS)xF`D(K
zQVF*jltbg!<jS-c1`ySUK`u}+zq96%9GD}dBT=f1Bm!6uVs<jmIC|?s=I-jlcA&}l
z&zvenpFdQW^lK7OD!fHL+Af1ZJm$?h3cB$US^&TVKcdQT5Zfsf0&zqKpJduW)HGoQ
z{|5ZBSo*sJNe${_3sV=oRaQy;Us}n#PKa+u>$n}6Kp?CS=5m6>wP}T(M0P#S67*R=
z5BS$wBA!g%!J+;)NxrF>jpW3bB6fi7`Ik@+&W%2Rnes3LpDR|$*Ia<l?WQw70kj-&
zpv4QV<l=}VR0lUTZ6Lr6aR$`Hu}bo-cC9u0HXkp1I{ux`E)p9ECX@N2Y!fnn?uJ7I
T`<NCma7x9?!w<`M55M*w#h)n2

diff --git a/rust/datafusion/docs/images/DataFusion-Logo-Light.svg b/rust/datafusion/docs/images/DataFusion-Logo-Light.svg
deleted file mode 100644
index b3bef2193dd..00000000000
--- a/rust/datafusion/docs/images/DataFusion-Logo-Light.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#fff;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#fff;}.cls-3{fill:#f3971f;}.cls-4{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Light</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43,23"/><path class="cls-1" d="M279.27,33.78c2.93,2.93-6.55,19.11-22,39.28"/><path class="cls-1" d="M208.83,127.75c-16.9,16.49-31,27-34,24.41"/><path class="cls-1" d="M227,19.05c3.86,0,7.25,18.31,9.11,39.88"/><path class="cls-1" d="M235.79,127.32c-1.91,19-5.12,37.24-8.75,37.24"/><path class="cls-2" d="M0,.43H11.23a22,22,0,0,1,9.69,2.13,17.62,17.62,0,0,1,7.24,6.49A18.43,18.43,0,0,1,31,19.27a18.39,18.39,0,0,1-2.88,10.31,17.8,17.8,0,0,1-7.32,6.49,22.44,22.44,0,0,1-9.72,2.1H0ZM2.77,3.09V35.51H11a20,20,0,0,0,6.23-1,18.25,18.25,0,0,0,5.45-2.91,13.58,13.58,0,0,0,4-5.13,17.18,17.18,0,0,0,1.49-7.27,16.82,16.82,0,0,0-1.52-7.29,13.19,13.19,0,0,0-4-5.06A18.94,18.94,0,0,0,11.13,3.09Z"/><path class="cls-2" d="M68.88,26H51.65L47,38.17H44.24L59,0H61.7L76.5,38.17H73.63ZM68,23.58,60.21,3.71,52.57,23.58Z"/><path class="cls-2" d="M112.87,3.09H100.35V38.17H97.59V3.09H85.5V.43h27.37Z"/><path class="cls-2" d="M146.45,26H129.21l-4.68,12.19h-2.72L136.56,0h2.71l14.8,38.17H151.2Zm-.93-2.4L137.78,3.71l-7.64,19.87Z"/><path class="cls-2" d="M35.64,62.68H6.7a1.2,1.2,0,0,0-1.2,1.2V89.33a1.2,1.2,0,0,0,1.2,1.2H27.78a1.2,1.2,0,0,1,1.2,1.2v2.48a1.2,1.2,0,0,1-1.2,1.2H6.7a1.2,1.2,0,0,0-1.2,1.2v30.75a1,1,0,0,1-1,1H1.3a1,1,0,0,1-1-1V58.9a1.2,1.2,0,0,1,1.2-1.2h34.1a1.21,1.21,0,0,1,1.21,1.2v2.57A1.21,1.21,0,0,1,35.64,62.68Z"/><path class="cls-2" d="M104,57.7a.94.94,0,0,1,1,.95v45q-.1,11.83-8,19.35t-19.85,7.51q-12.14,0-19.95-7.46t-7.91-19.5V58.65a1,1,0,0,1,1-.95h3.18a1,1,0,0,1,.95.95v45a21.46,21.46,0,0,0,6.61,15.76q6.42,6.12,16.17,6.12a22.55,22.55,0,0,0,16.12-6.12q6.47-6.11,6.57-15.76v-45a.94.94,0,0,1,.94-.95Z"/><path class="cls-2" d="M139.56,55.41a29,29,0,0,1,13.79,3.05,1.09,1.09,0,0,1,.42,1.48l-1.3,2.37a1.1,1.1,0,0,1-1.38.48,31.23,31.23,0,0,0-11.93-2.5q-7.66,0-11.94,3.77A12.45,12.45,0,0,0,123,73.87a12.09,12.09,0,0,0,.5,3.62,8.63,8.63,0,0,0,1.85,3.18c.9,1,1.69,1.88,2.36,2.58a13.84,13.84,0,0,0,3.41,2.31l3.6,1.9,4.18,2.19,4,2.08q7.9,4.08,11.78,8.2t4,11.6q.1,9-6.61,14a26.07,26.07,0,0,1-16,5,36.05,36.05,0,0,1-10.7-1.69,37.09,37.09,0,0,1-7.41-3.07,1.1,1.1,0,0,1-.36-1.56L119,122a1.08,1.08,0,0,1,1.41-.35,33.62,33.62,0,0,0,16.54,4A20.46,20.46,0,0,0,148.32,122q5-3.5,4.87-10.47a13,13,0,0,0-1.19-5.19,12,12,0,0,0-3.37-4.13,31.66,31.66,0,0,0-4.27-3L138.73,96l-5.63-3.21-5.63-3.2a25.57,25.57,0,0,1-4.82-3.67,16,16,0,0,1-3.78-5.33,16.21,16.21,0,0,1-1.3-6.56q0-8.86,6.32-13.73T139.56,55.41Z"/><path class="cls-2" d="M175,128.24h-3a1.09,1.09,0,0,1-1.09-1.09V58.79A1.09,1.09,0,0,1,172,57.7h3a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,175,128.24Z"/><path class="cls-3" d="M227,55.41a39.08,39.08,0,0,1,20.14,5.67A37.52,37.52,0,0,1,256,68.35,33.31,33.31,0,0,1,262.21,79a39.44,39.44,0,0,1,2.38,13.93q-.09,15.42-9.65,25.67Q244.31,130.23,227,130.53a39.35,39.35,0,0,1-10.05-1.34,39.75,39.75,0,0,1-18.85-11.7,33.13,33.13,0,0,1-6.22-10.64,39.18,39.18,0,0,1-2.39-13.93q0-16.11,10.79-26.81T227,55.41Zm0,4.68a32.37,32.37,0,0,0-14.18,3A27.74,27.74,0,0,0,202.71,71a37.51,37.51,0,0,0-5.77,10.5,33.82,33.82,0,0,0-2,11.39q0,14.13,9.05,23.53t23,9.4A31.48,31.48,0,0,0,244.35,121a30.34,30.34,0,0,0,11.14-12.24,35.59,35.59,0,0,0,3.63-15.87q0-14.33-9.05-23.53T227,60.09Z"/><path class="cls-2" d="M328.12,128.24h-3.51a1.11,1.11,0,0,1-.9-.47L283.84,69.5a1.08,1.08,0,0,0-2,.61v57a1.09,1.09,0,0,1-1.09,1.09h-2.7a1.09,1.09,0,0,1-1.09-1.09V58.79a1.09,1.09,0,0,1,1.09-1.09h3.14a1.1,1.1,0,0,1,.89.47l40.25,58.49a1.09,1.09,0,0,0,2-.62V58.79a1.09,1.09,0,0,1,1.09-1.09h2.69a1.09,1.09,0,0,1,1.09,1.09v68.36A1.09,1.09,0,0,1,328.12,128.24Z"/><path d="M173.49,30.7a3.21,3.21,0,0,1,2.63.9,3.61,3.61,0,0,1,.95,2.59,3.34,3.34,0,0,1-1,2.48,3.46,3.46,0,0,1-2.58,1,3.52,3.52,0,0,1-2.49-6A3.38,3.38,0,0,1,173.49,30.7Z"/><path class="cls-2" d="M279.46,152.37a2.92,2.92,0,0,1-2.49.42,11.78,11.78,0,0,1-2.29-.74,31.34,31.34,0,0,1-4.15-2.26,91.5,91.5,0,0,1-7.65-5.46c-4.92-3.86-9.58-8-14.16-12.28-9.15-8.5-17.79-17.52-26.19-26.75s-16.44-18.79-24.14-28.62q-5.73-7.41-11.08-15.11a156.2,156.2,0,0,1-9.82-16,.46.46,0,0,1,.81-.44h0c1.56,2.69,3.23,5.3,4.94,7.89s3.52,5.12,5.34,7.64c3.62,5,7.43,10,11.29,14.84,7.7,9.76,15.82,19.19,24.08,28.5S240.84,122.3,249.76,131c4.47,4.32,9.06,8.52,13.83,12.49a91.19,91.19,0,0,0,7.41,5.61,34.64,34.64,0,0,0,4,2.32,11.28,11.28,0,0,0,2.12.78,2.38,2.38,0,0,0,2-.21h0a.29.29,0,0,1,.4,0A.28.28,0,0,1,279.46,152.37Z"/><path class="cls-2" d="M279.18,33.88a2.64,2.64,0,0,0-2.29-.18,11.63,11.63,0,0,0-2.3.92,36.9,36.9,0,0,0-4.29,2.61c-2.76,1.91-5.38,4-7.94,6.21-5.13,4.36-10.06,9-14.86,13.69-9.56,9.5-18.83,19.3-27.69,29.46s-17.56,20.47-25.69,31.22c-4.1,5.34-8.07,10.79-11.74,16.43-1.83,2.82-3.62,5.67-5.17,8.64a37.25,37.25,0,0,0-2.06,4.56,12.64,12.64,0,0,0-.62,2.38A2.52,2.52,0,0,0,175,152h0a.24.24,0,0,1,0,.33.22.22,0,0,1-.31,0,2.08,2.08,0,0,1-.66-1.23,4.34,4.34,0,0,1-.05-1.34,11.94,11.94,0,0,1,.53-2.54,37.13,37.13,0,0,1,1.92-4.75,98.74,98.74,0,0,1,4.91-8.91c3.57-5.77,7.44-11.33,11.43-16.8s8.21-10.76,12.48-16,8.63-10.42,13.14-15.46,9.07-10,13.74-14.92,9.44-9.69,14.37-14.33,10-9.18,15.24-13.41a100.81,100.81,0,0,1,8.22-6,36.53,36.53,0,0,1,4.45-2.5,12.69,12.69,0,0,1,2.43-.84,2.92,2.92,0,0,1,2.56.36.14.14,0,1,1-.17.21Z"/><path class="cls-2" d="M227,19.35a3.59,3.59,0,0,0-2,.84,9.09,9.09,0,0,0-1.47,1.73,20.85,20.85,0,0,0-2.11,4.16,62.3,62.3,0,0,0-2.7,9,163.56,163.56,0,0,0-3.13,18.7,318.24,318.24,0,0,0-2,37.93q0,9.51.52,19c.32,6.33.81,12.65,1.54,18.94a171.5,171.5,0,0,0,3,18.75,66.25,66.25,0,0,0,2.7,9.08,22.89,22.89,0,0,0,2.08,4.21c.21.34.47.63.69.94a10.71,10.71,0,0,0,.81.84,4.11,4.11,0,0,0,1,.6,2.22,2.22,0,0,0,.52.2.72.72,0,0,1,.56.23h0a0,0,0,0,1,0,0h0a.74.74,0,0,1-.63.06,1.94,1.94,0,0,1-.59-.19,4.82,4.82,0,0,1-1.07-.64,10.1,10.1,0,0,1-.87-.86c-.25-.32-.53-.62-.75-.95a23.42,23.42,0,0,1-2.26-4.25,65.38,65.38,0,0,1-3-9.1,171.4,171.4,0,0,1-3.62-18.78c-.88-6.31-1.51-12.65-2-19s-.64-12.73-.68-19.1.18-12.75.55-19.11.9-12.72,1.78-19a164.9,164.9,0,0,1,3.56-18.81,64.19,64.19,0,0,1,3.07-9.11,21.75,21.75,0,0,1,2.35-4.24,9.39,9.39,0,0,1,1.7-1.81,4.19,4.19,0,0,1,2.4-.88.28.28,0,0,1,.29.29.29.29,0,0,1-.27.3Z"/><path class="cls-4" d="M173.48,29.89a4,4,0,0,1,3.25,1.1,4.46,4.46,0,0,1,1.17,3.2,4.16,4.16,0,0,1-1.23,3.07,4.33,4.33,0,0,1-3.19,1.23,4.3,4.3,0,1,1,0-8.6Z"/></g></g></svg>
\ No newline at end of file
diff --git a/rust/datafusion/src/bin/main.rs b/rust/datafusion/src/bin/main.rs
deleted file mode 100644
index deb5b796b2d..00000000000
--- a/rust/datafusion/src/bin/main.rs
+++ /dev/null
@@ -1,25 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Only bring in dependencies for the repl when the cli feature is enabled.
-#[cfg(feature = "cli")]
-mod repl;
-
-pub fn main() {
-    #[cfg(feature = "cli")]
-    repl::main()
-}
diff --git a/rust/datafusion/src/bin/repl.rs b/rust/datafusion/src/bin/repl.rs
deleted file mode 100644
index a6aec204c0d..00000000000
--- a/rust/datafusion/src/bin/repl.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(bare_trait_objects)]
-
-use arrow::util::pretty;
-use clap::{crate_version, App, Arg};
-use datafusion::error::Result;
-use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
-use rustyline::Editor;
-use std::env;
-use std::path::Path;
-use std::time::Instant;
-
-#[tokio::main]
-pub async fn main() {
-    let matches = App::new("DataFusion")
-        .version(crate_version!())
-        .about(
-            "DataFusion is an in-memory query engine that uses Apache Arrow \
-             as the memory model. It supports executing SQL queries against CSV and \
-             Parquet files as well as querying directly against in-memory data.",
-        )
-        .arg(
-            Arg::with_name("data-path")
-                .help("Path to your data, default to current directory")
-                .short("p")
-                .long("data-path")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::with_name("batch-size")
-                .help("The batch size of each query, default value is 1048576")
-                .short("c")
-                .long("batch-size")
-                .takes_value(true),
-        )
-        .get_matches();
-
-    if let Some(path) = matches.value_of("data-path") {
-        let p = Path::new(path);
-        env::set_current_dir(&p).unwrap();
-    };
-
-    let batch_size = matches
-        .value_of("batch-size")
-        .map(|size| size.parse::<usize>().unwrap())
-        .unwrap_or(1_048_576);
-
-    let mut ctx = ExecutionContext::with_config(
-        ExecutionConfig::new()
-            .with_batch_size(batch_size)
-            .with_information_schema(true),
-    );
-
-    let mut rl = Editor::<()>::new();
-    rl.load_history(".history").ok();
-
-    let mut query = "".to_owned();
-    loop {
-        let readline = rl.readline("> ");
-        match readline {
-            Ok(ref line) if is_exit_command(line) && query.is_empty() => {
-                break;
-            }
-            Ok(ref line) if line.trim_end().ends_with(';') => {
-                query.push_str(line.trim_end());
-                rl.add_history_entry(query.clone());
-                match exec_and_print(&mut ctx, query).await {
-                    Ok(_) => {}
-                    Err(err) => println!("{:?}", err),
-                }
-                query = "".to_owned();
-            }
-            Ok(ref line) => {
-                query.push_str(line);
-                query.push(' ');
-            }
-            Err(_) => {
-                break;
-            }
-        }
-    }
-
-    rl.save_history(".history").ok();
-}
-
-fn is_exit_command(line: &str) -> bool {
-    let line = line.trim_end().to_lowercase();
-    line == "quit" || line == "exit"
-}
-
-async fn exec_and_print(ctx: &mut ExecutionContext, sql: String) -> Result<()> {
-    let now = Instant::now();
-
-    let df = ctx.sql(&sql)?;
-    let results = df.collect().await?;
-
-    if results.is_empty() {
-        println!(
-            "0 rows in set. Query took {} seconds.",
-            now.elapsed().as_secs()
-        );
-        return Ok(());
-    }
-
-    pretty::print_batches(&results)?;
-
-    let row_count: usize = results.iter().map(|b| b.num_rows()).sum();
-
-    if row_count > 1 {
-        println!(
-            "{} row in set. Query took {} seconds.",
-            row_count,
-            now.elapsed().as_secs()
-        );
-    } else {
-        println!(
-            "{} rows in set. Query took {} seconds.",
-            row_count,
-            now.elapsed().as_secs()
-        );
-    }
-
-    Ok(())
-}
diff --git a/rust/datafusion/src/catalog/catalog.rs b/rust/datafusion/src/catalog/catalog.rs
deleted file mode 100644
index 30fea1f45f2..00000000000
--- a/rust/datafusion/src/catalog/catalog.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Describes the interface and built-in implementations of catalogs,
-//! representing collections of named schemas.
-
-use crate::catalog::schema::SchemaProvider;
-use std::any::Any;
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-/// Represent a list of named catalogs
-pub trait CatalogList: Sync + Send {
-    /// Returns the catalog list as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Adds a new catalog to this catalog list
-    /// If a catalog of the same name existed before, it is replaced in the list and returned.
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>>;
-
-    /// Retrieves the list of available catalog names
-    fn catalog_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific catalog by name, provided it exists.
-    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
-}
-
-/// Simple in-memory list of catalogs
-pub struct MemoryCatalogList {
-    /// Collection of catalogs containing schemas and ultimately TableProviders
-    pub catalogs: RwLock<HashMap<String, Arc<dyn CatalogProvider>>>,
-}
-
-impl MemoryCatalogList {
-    /// Instantiates a new `MemoryCatalogList` with an empty collection of catalogs
-    pub fn new() -> Self {
-        Self {
-            catalogs: RwLock::new(HashMap::new()),
-        }
-    }
-}
-
-impl CatalogList for MemoryCatalogList {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn register_catalog(
-        &self,
-        name: String,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>> {
-        let mut catalogs = self.catalogs.write().unwrap();
-        catalogs.insert(name, catalog)
-    }
-
-    fn catalog_names(&self) -> Vec<String> {
-        let catalogs = self.catalogs.read().unwrap();
-        catalogs.keys().map(|s| s.to_string()).collect()
-    }
-
-    fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
-        let catalogs = self.catalogs.read().unwrap();
-        catalogs.get(name).cloned()
-    }
-}
-
-/// Represents a catalog, comprising a number of named schemas.
-pub trait CatalogProvider: Sync + Send {
-    /// Returns the catalog provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available schema names in this catalog.
-    fn schema_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific schema from the catalog by name, provided it exists.
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>>;
-}
-
-/// Simple in-memory implementation of a catalog.
-pub struct MemoryCatalogProvider {
-    schemas: RwLock<HashMap<String, Arc<dyn SchemaProvider>>>,
-}
-
-impl MemoryCatalogProvider {
-    /// Instantiates a new MemoryCatalogProvider with an empty collection of schemas.
-    pub fn new() -> Self {
-        Self {
-            schemas: RwLock::new(HashMap::new()),
-        }
-    }
-
-    /// Adds a new schema to this catalog.
-    /// If a schema of the same name existed before, it is replaced in the catalog and returned.
-    pub fn register_schema(
-        &self,
-        name: impl Into<String>,
-        schema: Arc<dyn SchemaProvider>,
-    ) -> Option<Arc<dyn SchemaProvider>> {
-        let mut schemas = self.schemas.write().unwrap();
-        schemas.insert(name.into(), schema)
-    }
-}
-
-impl CatalogProvider for MemoryCatalogProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema_names(&self) -> Vec<String> {
-        let schemas = self.schemas.read().unwrap();
-        schemas.keys().cloned().collect()
-    }
-
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
-        let schemas = self.schemas.read().unwrap();
-        schemas.get(name).cloned()
-    }
-}
diff --git a/rust/datafusion/src/catalog/information_schema.rs b/rust/datafusion/src/catalog/information_schema.rs
deleted file mode 100644
index 5a7b9d5b644..00000000000
--- a/rust/datafusion/src/catalog/information_schema.rs
+++ /dev/null
@@ -1,492 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implements the SQL [Information Schema] for DataFusion.
-//!
-//! Information Schema](https://en.wikipedia.org/wiki/Information_schema)
-
-use std::{any, sync::Arc};
-
-use arrow::{
-    array::{StringBuilder, UInt64Builder},
-    datatypes::{DataType, Field, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::datasource::{MemTable, TableProvider};
-
-use super::{
-    catalog::{CatalogList, CatalogProvider},
-    schema::SchemaProvider,
-};
-
-const INFORMATION_SCHEMA: &str = "information_schema";
-const TABLES: &str = "tables";
-const COLUMNS: &str = "columns";
-
-/// Wraps another [`CatalogProvider`] and adds a "information_schema"
-/// schema that can introspect on tables in the catalog_list
-pub(crate) struct CatalogWithInformationSchema {
-    catalog_list: Arc<dyn CatalogList>,
-    /// wrapped provider
-    inner: Arc<dyn CatalogProvider>,
-}
-
-impl CatalogWithInformationSchema {
-    pub(crate) fn new(
-        catalog_list: Arc<dyn CatalogList>,
-        inner: Arc<dyn CatalogProvider>,
-    ) -> Self {
-        Self {
-            catalog_list,
-            inner,
-        }
-    }
-}
-
-impl CatalogProvider for CatalogWithInformationSchema {
-    fn as_any(&self) -> &dyn any::Any {
-        self
-    }
-
-    fn schema_names(&self) -> Vec<String> {
-        self.inner
-            .schema_names()
-            .into_iter()
-            .chain(std::iter::once(INFORMATION_SCHEMA.to_string()))
-            .collect::<Vec<String>>()
-    }
-
-    fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
-        if name.eq_ignore_ascii_case(INFORMATION_SCHEMA) {
-            Some(Arc::new(InformationSchemaProvider {
-                catalog_list: self.catalog_list.clone(),
-            }))
-        } else {
-            self.inner.schema(name)
-        }
-    }
-}
-
-/// Implements the `information_schema` virtual schema and tables
-///
-/// The underlying tables in the `information_schema` are created on
-/// demand. This means that if more tables are added to the underlying
-/// providers, they will appear the next time the `information_schema`
-/// table is queried.
-struct InformationSchemaProvider {
-    catalog_list: Arc<dyn CatalogList>,
-}
-
-impl InformationSchemaProvider {
-    /// Construct the `information_schema.tables` virtual table
-    fn make_tables(&self) -> Arc<dyn TableProvider> {
-        // create a mem table with the names of tables
-        let mut builder = InformationSchemaTablesBuilder::new();
-
-        for catalog_name in self.catalog_list.catalog_names() {
-            let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
-
-            for schema_name in catalog.schema_names() {
-                if schema_name != INFORMATION_SCHEMA {
-                    let schema = catalog.schema(&schema_name).unwrap();
-                    for table_name in schema.table_names() {
-                        builder.add_base_table(&catalog_name, &schema_name, table_name)
-                    }
-                }
-            }
-
-            // Add a final list for the information schema tables themselves
-            builder.add_system_table(&catalog_name, INFORMATION_SCHEMA, TABLES);
-            builder.add_system_table(&catalog_name, INFORMATION_SCHEMA, COLUMNS);
-        }
-
-        let mem_table: MemTable = builder.into();
-
-        Arc::new(mem_table)
-    }
-
-    /// Construct the `information_schema.columns` virtual table
-    fn make_columns(&self) -> Arc<dyn TableProvider> {
-        let mut builder = InformationSchemaColumnsBuilder::new();
-
-        for catalog_name in self.catalog_list.catalog_names() {
-            let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
-
-            for schema_name in catalog.schema_names() {
-                if schema_name != INFORMATION_SCHEMA {
-                    let schema = catalog.schema(&schema_name).unwrap();
-                    for table_name in schema.table_names() {
-                        let table = schema.table(&table_name).unwrap();
-                        for (i, field) in table.schema().fields().iter().enumerate() {
-                            builder.add_column(
-                                &catalog_name,
-                                &schema_name,
-                                &table_name,
-                                field.name(),
-                                i,
-                                field.is_nullable(),
-                                field.data_type(),
-                            )
-                        }
-                    }
-                }
-            }
-        }
-
-        let mem_table: MemTable = builder.into();
-
-        Arc::new(mem_table)
-    }
-}
-
-impl SchemaProvider for InformationSchemaProvider {
-    fn as_any(&self) -> &(dyn any::Any + 'static) {
-        self
-    }
-
-    fn table_names(&self) -> Vec<String> {
-        vec![TABLES.to_string(), COLUMNS.to_string()]
-    }
-
-    fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        if name.eq_ignore_ascii_case("tables") {
-            Some(self.make_tables())
-        } else if name.eq_ignore_ascii_case("columns") {
-            Some(self.make_columns())
-        } else {
-            None
-        }
-    }
-}
-
-/// Builds the `information_schema.TABLE` table row by row
-///
-/// Columns are based on https://www.postgresql.org/docs/current/infoschema-columns.html
-struct InformationSchemaTablesBuilder {
-    catalog_names: StringBuilder,
-    schema_names: StringBuilder,
-    table_names: StringBuilder,
-    table_types: StringBuilder,
-}
-
-impl InformationSchemaTablesBuilder {
-    fn new() -> Self {
-        // StringBuilder requires providing an initial capacity, so
-        // pick 10 here arbitrarily as this is not performance
-        // critical code and the number of tables is unavailable here.
-        let default_capacity = 10;
-        Self {
-            catalog_names: StringBuilder::new(default_capacity),
-            schema_names: StringBuilder::new(default_capacity),
-            table_names: StringBuilder::new(default_capacity),
-            table_types: StringBuilder::new(default_capacity),
-        }
-    }
-
-    fn add_base_table(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-        table_name: impl AsRef<str>,
-    ) {
-        // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-        self.table_types.append_value("BASE TABLE").unwrap();
-    }
-
-    fn add_system_table(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-        table_name: impl AsRef<str>,
-    ) {
-        // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-        self.table_types.append_value("VIEW").unwrap();
-    }
-}
-
-impl From<InformationSchemaTablesBuilder> for MemTable {
-    fn from(value: InformationSchemaTablesBuilder) -> MemTable {
-        let schema = Schema::new(vec![
-            Field::new("table_catalog", DataType::Utf8, false),
-            Field::new("table_schema", DataType::Utf8, false),
-            Field::new("table_name", DataType::Utf8, false),
-            Field::new("table_type", DataType::Utf8, false),
-        ]);
-
-        let InformationSchemaTablesBuilder {
-            mut catalog_names,
-            mut schema_names,
-            mut table_names,
-            mut table_types,
-        } = value;
-
-        let schema = Arc::new(schema);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(catalog_names.finish()),
-                Arc::new(schema_names.finish()),
-                Arc::new(table_names.finish()),
-                Arc::new(table_types.finish()),
-            ],
-        )
-        .unwrap();
-
-        MemTable::try_new(schema, vec![vec![batch]]).unwrap()
-    }
-}
-
-/// Builds the `information_schema.COLUMNS` table row by row
-///
-/// Columns are based on https://www.postgresql.org/docs/current/infoschema-columns.html
-struct InformationSchemaColumnsBuilder {
-    catalog_names: StringBuilder,
-    schema_names: StringBuilder,
-    table_names: StringBuilder,
-    column_names: StringBuilder,
-    ordinal_positions: UInt64Builder,
-    column_defaults: StringBuilder,
-    is_nullables: StringBuilder,
-    data_types: StringBuilder,
-    character_maximum_lengths: UInt64Builder,
-    character_octet_lengths: UInt64Builder,
-    numeric_precisions: UInt64Builder,
-    numeric_precision_radixes: UInt64Builder,
-    numeric_scales: UInt64Builder,
-    datetime_precisions: UInt64Builder,
-    interval_types: StringBuilder,
-}
-
-impl InformationSchemaColumnsBuilder {
-    fn new() -> Self {
-        // StringBuilder requires providing an initial capacity, so
-        // pick 10 here arbitrarily as this is not performance
-        // critical code and the number of tables is unavailable here.
-        let default_capacity = 10;
-        Self {
-            catalog_names: StringBuilder::new(default_capacity),
-            schema_names: StringBuilder::new(default_capacity),
-            table_names: StringBuilder::new(default_capacity),
-            column_names: StringBuilder::new(default_capacity),
-            ordinal_positions: UInt64Builder::new(default_capacity),
-            column_defaults: StringBuilder::new(default_capacity),
-            is_nullables: StringBuilder::new(default_capacity),
-            data_types: StringBuilder::new(default_capacity),
-            character_maximum_lengths: UInt64Builder::new(default_capacity),
-            character_octet_lengths: UInt64Builder::new(default_capacity),
-            numeric_precisions: UInt64Builder::new(default_capacity),
-            numeric_precision_radixes: UInt64Builder::new(default_capacity),
-            numeric_scales: UInt64Builder::new(default_capacity),
-            datetime_precisions: UInt64Builder::new(default_capacity),
-            interval_types: StringBuilder::new(default_capacity),
-        }
-    }
-
-    #[allow(clippy::too_many_arguments)]
-    fn add_column(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-        table_name: impl AsRef<str>,
-        column_name: impl AsRef<str>,
-        column_position: usize,
-        is_nullable: bool,
-        data_type: &DataType,
-    ) {
-        use DataType::*;
-
-        // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-
-        self.column_names
-            .append_value(column_name.as_ref())
-            .unwrap();
-
-        self.ordinal_positions
-            .append_value(column_position as u64)
-            .unwrap();
-
-        // DataFusion does not support column default values, so null
-        self.column_defaults.append_null().unwrap();
-
-        // "YES if the column is possibly nullable, NO if it is known not nullable. "
-        let nullable_str = if is_nullable { "YES" } else { "NO" };
-        self.is_nullables.append_value(nullable_str).unwrap();
-
-        // "System supplied type" --> Use debug format of the datatype
-        self.data_types
-            .append_value(format!("{:?}", data_type))
-            .unwrap();
-
-        // "If data_type identifies a character or bit string type, the
-        // declared maximum length; null for all other data types or
-        // if no maximum length was declared."
-        //
-        // Arrow has no equivalent of VARCHAR(20), so we leave this as Null
-        let max_chars = None;
-        self.character_maximum_lengths
-            .append_option(max_chars)
-            .unwrap();
-
-        // "Maximum length, in bytes, for binary data, character data,
-        // or text and image data."
-        let char_len: Option<u64> = match data_type {
-            Utf8 | Binary => Some(i32::MAX as u64),
-            LargeBinary | LargeUtf8 => Some(i64::MAX as u64),
-            _ => None,
-        };
-        self.character_octet_lengths
-            .append_option(char_len)
-            .unwrap();
-
-        // numeric_precision: "If data_type identifies a numeric type, this column
-        // contains the (declared or implicit) precision of the type
-        // for this column. The precision indicates the number of
-        // significant digits. It can be expressed in decimal (base
-        // 10) or binary (base 2) terms, as specified in the column
-        // numeric_precision_radix. For all other data types, this
-        // column is null."
-        //
-        // numeric_radix: If data_type identifies a numeric type, this
-        // column indicates in which base the values in the columns
-        // numeric_precision and numeric_scale are expressed. The
-        // value is either 2 or 10. For all other data types, this
-        // column is null.
-        //
-        // numeric_scale: If data_type identifies an exact numeric
-        // type, this column contains the (declared or implicit) scale
-        // of the type for this column. The scale indicates the number
-        // of significant digits to the right of the decimal point. It
-        // can be expressed in decimal (base 10) or binary (base 2)
-        // terms, as specified in the column
-        // numeric_precision_radix. For all other data types, this
-        // column is null.
-        let (numeric_precision, numeric_radix, numeric_scale) = match data_type {
-            Int8 | UInt8 => (Some(8), Some(2), None),
-            Int16 | UInt16 => (Some(16), Some(2), None),
-            Int32 | UInt32 => (Some(32), Some(2), None),
-            // From max value of 65504 as explained on
-            // https://en.wikipedia.org/wiki/Half-precision_floating-point_format#Exponent_encoding
-            Float16 => (Some(15), Some(2), None),
-            // Numbers from postgres `real` type
-            Float32 => (Some(24), Some(2), None),
-            // Numbers from postgres `double` type
-            Float64 => (Some(24), Some(2), None),
-            Decimal(precision, scale) => {
-                (Some(*precision as u64), Some(10), Some(*scale as u64))
-            }
-            _ => (None, None, None),
-        };
-
-        self.numeric_precisions
-            .append_option(numeric_precision)
-            .unwrap();
-        self.numeric_precision_radixes
-            .append_option(numeric_radix)
-            .unwrap();
-        self.numeric_scales.append_option(numeric_scale).unwrap();
-
-        self.datetime_precisions.append_option(None).unwrap();
-        self.interval_types.append_null().unwrap();
-    }
-}
-
-impl From<InformationSchemaColumnsBuilder> for MemTable {
-    fn from(value: InformationSchemaColumnsBuilder) -> MemTable {
-        let schema = Schema::new(vec![
-            Field::new("table_catalog", DataType::Utf8, false),
-            Field::new("table_schema", DataType::Utf8, false),
-            Field::new("table_name", DataType::Utf8, false),
-            Field::new("column_name", DataType::Utf8, false),
-            Field::new("ordinal_position", DataType::UInt64, false),
-            Field::new("column_default", DataType::Utf8, false),
-            Field::new("is_nullable", DataType::Utf8, false),
-            Field::new("data_type", DataType::Utf8, false),
-            Field::new("character_maximum_length", DataType::UInt64, false),
-            Field::new("character_octet_length", DataType::UInt64, false),
-            Field::new("numeric_precision", DataType::UInt64, false),
-            Field::new("numeric_precision_radix", DataType::UInt64, false),
-            Field::new("numeric_scale", DataType::UInt64, false),
-            Field::new("datetime_precision", DataType::UInt64, false),
-            Field::new("interval_type", DataType::Utf8, false),
-        ]);
-
-        let InformationSchemaColumnsBuilder {
-            mut catalog_names,
-            mut schema_names,
-            mut table_names,
-            mut column_names,
-            mut ordinal_positions,
-            mut column_defaults,
-            mut is_nullables,
-            mut data_types,
-            mut character_maximum_lengths,
-            mut character_octet_lengths,
-            mut numeric_precisions,
-            mut numeric_precision_radixes,
-            mut numeric_scales,
-            mut datetime_precisions,
-            mut interval_types,
-        } = value;
-
-        let schema = Arc::new(schema);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(catalog_names.finish()),
-                Arc::new(schema_names.finish()),
-                Arc::new(table_names.finish()),
-                Arc::new(column_names.finish()),
-                Arc::new(ordinal_positions.finish()),
-                Arc::new(column_defaults.finish()),
-                Arc::new(is_nullables.finish()),
-                Arc::new(data_types.finish()),
-                Arc::new(character_maximum_lengths.finish()),
-                Arc::new(character_octet_lengths.finish()),
-                Arc::new(numeric_precisions.finish()),
-                Arc::new(numeric_precision_radixes.finish()),
-                Arc::new(numeric_scales.finish()),
-                Arc::new(datetime_precisions.finish()),
-                Arc::new(interval_types.finish()),
-            ],
-        )
-        .unwrap();
-
-        MemTable::try_new(schema, vec![vec![batch]]).unwrap()
-    }
-}
diff --git a/rust/datafusion/src/catalog/mod.rs b/rust/datafusion/src/catalog/mod.rs
deleted file mode 100644
index 10591f07e37..00000000000
--- a/rust/datafusion/src/catalog/mod.rs
+++ /dev/null
@@ -1,146 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains interfaces and default implementations
-//! of table namespacing concepts, including catalogs and schemas.
-
-pub mod catalog;
-pub mod information_schema;
-pub mod schema;
-
-use crate::error::DataFusionError;
-use std::convert::TryFrom;
-
-/// Represents a resolved path to a table of the form "catalog.schema.table"
-#[derive(Clone, Copy)]
-pub struct ResolvedTableReference<'a> {
-    /// The catalog (aka database) containing the table
-    pub catalog: &'a str,
-    /// The schema containing the table
-    pub schema: &'a str,
-    /// The table name
-    pub table: &'a str,
-}
-
-/// Represents a path to a table that may require further resolution
-#[derive(Clone, Copy)]
-pub enum TableReference<'a> {
-    /// An unqualified table reference, e.g. "table"
-    Bare {
-        /// The table name
-        table: &'a str,
-    },
-    /// A partially resolved table reference, e.g. "schema.table"
-    Partial {
-        /// The schema containing the table
-        schema: &'a str,
-        /// The table name
-        table: &'a str,
-    },
-    /// A fully resolved table reference, e.g. "catalog.schema.table"
-    Full {
-        /// The catalog (aka database) containing the table
-        catalog: &'a str,
-        /// The schema containing the table
-        schema: &'a str,
-        /// The table name
-        table: &'a str,
-    },
-}
-
-impl<'a> TableReference<'a> {
-    /// Retrieve the actual table name, regardless of qualification
-    pub fn table(&self) -> &str {
-        match self {
-            Self::Full { table, .. }
-            | Self::Partial { table, .. }
-            | Self::Bare { table } => table,
-        }
-    }
-
-    /// Given a default catalog and schema, ensure this table reference is fully resolved
-    pub fn resolve(
-        self,
-        default_catalog: &'a str,
-        default_schema: &'a str,
-    ) -> ResolvedTableReference<'a> {
-        match self {
-            Self::Full {
-                catalog,
-                schema,
-                table,
-            } => ResolvedTableReference {
-                catalog,
-                schema,
-                table,
-            },
-            Self::Partial { schema, table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema,
-                table,
-            },
-            Self::Bare { table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema: default_schema,
-                table,
-            },
-        }
-    }
-}
-
-impl<'a> From<&'a str> for TableReference<'a> {
-    fn from(s: &'a str) -> Self {
-        Self::Bare { table: s }
-    }
-}
-
-impl<'a> From<ResolvedTableReference<'a>> for TableReference<'a> {
-    fn from(resolved: ResolvedTableReference<'a>) -> Self {
-        Self::Full {
-            catalog: resolved.catalog,
-            schema: resolved.schema,
-            table: resolved.table,
-        }
-    }
-}
-
-impl<'a> TryFrom<&'a sqlparser::ast::ObjectName> for TableReference<'a> {
-    type Error = DataFusionError;
-
-    fn try_from(value: &'a sqlparser::ast::ObjectName) -> Result<Self, Self::Error> {
-        let idents = &value.0;
-
-        match idents.len() {
-            1 => Ok(Self::Bare {
-                table: &idents[0].value,
-            }),
-            2 => Ok(Self::Partial {
-                schema: &idents[0].value,
-                table: &idents[1].value,
-            }),
-            3 => Ok(Self::Full {
-                catalog: &idents[0].value,
-                schema: &idents[1].value,
-                table: &idents[2].value,
-            }),
-            _ => Err(DataFusionError::Plan(format!(
-                "invalid table reference: {}",
-                value
-            ))),
-        }
-    }
-}
diff --git a/rust/datafusion/src/catalog/schema.rs b/rust/datafusion/src/catalog/schema.rs
deleted file mode 100644
index 0e39546a5f8..00000000000
--- a/rust/datafusion/src/catalog/schema.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Describes the interface and built-in implementations of schemas,
-//! representing collections of named tables.
-
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use std::any::Any;
-use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
-
-/// Represents a schema, comprising a number of named tables.
-pub trait SchemaProvider: Sync + Send {
-    /// Returns the schema provider as [`Any`](std::any::Any)
-    /// so that it can be downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Retrieves the list of available table names in this schema.
-    fn table_names(&self) -> Vec<String>;
-
-    /// Retrieves a specific table from the schema by name, provided it exists.
-    fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>>;
-
-    /// If supported by the implementation, adds a new table to this schema.
-    /// If a table of the same name existed before, it is replaced in the schema and returned.
-    #[allow(unused_variables)]
-    fn register_table(
-        &self,
-        name: String,
-        table: Arc<dyn TableProvider>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        Err(DataFusionError::Execution(
-            "schema provider does not support registering tables".to_owned(),
-        ))
-    }
-
-    /// If supported by the implementation, removes an existing table from this schema and returns it.
-    /// If no table of that name exists, returns Ok(None).
-    #[allow(unused_variables)]
-    fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
-        Err(DataFusionError::Execution(
-            "schema provider does not support deregistering tables".to_owned(),
-        ))
-    }
-}
-
-/// Simple in-memory implementation of a schema.
-pub struct MemorySchemaProvider {
-    tables: RwLock<HashMap<String, Arc<dyn TableProvider>>>,
-}
-
-impl MemorySchemaProvider {
-    /// Instantiates a new MemorySchemaProvider with an empty collection of tables.
-    pub fn new() -> Self {
-        Self {
-            tables: RwLock::new(HashMap::new()),
-        }
-    }
-}
-
-impl SchemaProvider for MemorySchemaProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn table_names(&self) -> Vec<String> {
-        let tables = self.tables.read().unwrap();
-        tables.keys().cloned().collect()
-    }
-
-    fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        let tables = self.tables.read().unwrap();
-        tables.get(name).cloned()
-    }
-
-    fn register_table(
-        &self,
-        name: String,
-        table: Arc<dyn TableProvider>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        let mut tables = self.tables.write().unwrap();
-        Ok(tables.insert(name, table))
-    }
-
-    fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
-        let mut tables = self.tables.write().unwrap();
-        Ok(tables.remove(name))
-    }
-}
diff --git a/rust/datafusion/src/dataframe.rs b/rust/datafusion/src/dataframe.rs
deleted file mode 100644
index 9c7c2ef96d6..00000000000
--- a/rust/datafusion/src/dataframe.rs
+++ /dev/null
@@ -1,286 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFrame API for building and executing query plans.
-
-use crate::arrow::record_batch::RecordBatch;
-use crate::error::Result;
-use crate::logical_plan::{
-    DFSchema, Expr, FunctionRegistry, JoinType, LogicalPlan, Partitioning,
-};
-use std::sync::Arc;
-
-use async_trait::async_trait;
-
-/// DataFrame represents a logical set of rows with the same named columns.
-/// Similar to a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) or
-/// [Spark DataFrame](https://spark.apache.org/docs/latest/sql-programming-guide.html)
-///
-/// DataFrames are typically created by the `read_csv` and `read_parquet` methods on the
-/// [ExecutionContext](../execution/context/struct.ExecutionContext.html) and can then be modified
-/// by calling the transformation methods, such as `filter`, `select`, `aggregate`, and `limit`
-/// to build up a query definition.
-///
-/// The query can be executed by calling the `collect` method.
-///
-/// ```
-/// # use datafusion::prelude::*;
-/// # use datafusion::error::Result;
-/// # fn main() -> Result<()> {
-/// let mut ctx = ExecutionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-/// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
-///            .limit(100)?;
-/// let results = df.collect();
-/// # Ok(())
-/// # }
-/// ```
-#[async_trait]
-pub trait DataFrame: Send + Sync {
-    /// Filter the DataFrame by column. Returns a new DataFrame only containing the
-    /// specified columns.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.select_columns(&["a", "b"])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>>;
-
-    /// Create a projection based on arbitrary expressions.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.select(vec![col("a") * col("b"), col("c")])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn select(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
-
-    /// Filter a DataFrame to only include rows that match the specified filter expression.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.filter(col("a").lt_eq(col("b")))?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn filter(&self, expr: Expr) -> Result<Arc<dyn DataFrame>>;
-
-    /// Perform an aggregate query with optional grouping expressions.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    ///
-    /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
-    /// let _ = df.aggregate(vec![col("a")], vec![min(col("b"))])?;
-    ///
-    /// // The following use is the equivalent of "SELECT MIN(b)"
-    /// let _ = df.aggregate(vec![], vec![min(col("b"))])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn aggregate(
-        &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
-    ) -> Result<Arc<dyn DataFrame>>;
-
-    /// Limit the number of rows returned from this DataFrame.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.limit(100)?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn limit(&self, n: usize) -> Result<Arc<dyn DataFrame>>;
-
-    /// Calculate the union two [`DataFrame`]s.  The two [`DataFrame`]s must have exactly the same schema
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.union(df.clone())?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn union(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>>;
-
-    /// Sort the DataFrame by the specified sorting expressions. Any expression can be turned into
-    /// a sort expression by calling its [sort](../logical_plan/enum.Expr.html#method.sort) method.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>>;
-
-    /// Join this DataFrame with another DataFrame using the specified columns as join keys
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let left = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let right = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?
-    ///   .select(vec![
-    ///     col("a").alias("a2"),
-    ///     col("b").alias("b2"),
-    ///     col("c").alias("c2")])?;
-    /// let join = left.join(right, JoinType::Inner, &["a", "b"], &["a2", "b2"])?;
-    /// let batches = join.collect().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn join(
-        &self,
-        right: Arc<dyn DataFrame>,
-        join_type: JoinType,
-        left_cols: &[&str],
-        right_cols: &[&str],
-    ) -> Result<Arc<dyn DataFrame>>;
-
-    /// Repartition a DataFrame based on a logical partitioning scheme.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn repartition(
-        &self,
-        partitioning_scheme: Partitioning,
-    ) -> Result<Arc<dyn DataFrame>>;
-
-    /// Executes this DataFrame and collects all results into a vector of RecordBatch.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let batches = df.collect().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    async fn collect(&self) -> Result<Vec<RecordBatch>>;
-
-    /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch
-    /// maintaining the input partitioning.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let batches = df.collect_partitioned().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    async fn collect_partitioned(&self) -> Result<Vec<Vec<RecordBatch>>>;
-
-    /// Returns the schema describing the output of this DataFrame in terms of columns returned,
-    /// where each column has a name, data type, and nullability attribute.
-
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let schema = df.schema();
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn schema(&self) -> &DFSchema;
-
-    /// Return the logical plan represented by this DataFrame.
-    fn to_logical_plan(&self) -> LogicalPlan;
-
-    /// Return a DataFrame with the explanation of its plan so far.
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let batches = df.limit(100)?.explain(false)?.collect().await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn explain(&self, verbose: bool) -> Result<Arc<dyn DataFrame>>;
-
-    /// Return a `FunctionRegistry` used to plan udf's calls
-    ///
-    /// ```
-    /// # use datafusion::prelude::*;
-    /// # use datafusion::error::Result;
-    /// # fn main() -> Result<()> {
-    /// let mut ctx = ExecutionContext::new();
-    /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-    /// let f = df.registry();
-    /// // use f.udf("name", vec![...]) to use the udf
-    /// # Ok(())
-    /// # }
-    /// ```
-    fn registry(&self) -> Arc<dyn FunctionRegistry>;
-}
diff --git a/rust/datafusion/src/datasource/csv.rs b/rust/datafusion/src/datasource/csv.rs
deleted file mode 100644
index 6f6c9abe077..00000000000
--- a/rust/datafusion/src/datasource/csv.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CSV data source
-//!
-//! This CSV data source allows CSV files to be used as input for queries.
-//!
-//! Example:
-//!
-//! ```
-//! use datafusion::datasource::TableProvider;
-//! use datafusion::datasource::csv::{CsvFile, CsvReadOptions};
-//!
-//! let testdata = arrow::util::test_util::arrow_test_data();
-//! let csvdata = CsvFile::try_new(
-//!     &format!("{}/csv/aggregate_test_100.csv", testdata),
-//!     CsvReadOptions::new().delimiter(b'|'),
-//! ).unwrap();
-//! let schema = csvdata.schema();
-//! ```
-
-use arrow::datatypes::SchemaRef;
-use std::any::Any;
-use std::string::String;
-use std::sync::Arc;
-
-use crate::datasource::datasource::Statistics;
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Expr;
-use crate::physical_plan::csv::CsvExec;
-pub use crate::physical_plan::csv::CsvReadOptions;
-use crate::physical_plan::{common, ExecutionPlan};
-
-/// Represents a CSV file with a provided schema
-pub struct CsvFile {
-    /// Path to a single CSV file or a directory containing one of more CSV files
-    path: String,
-    schema: SchemaRef,
-    has_header: bool,
-    delimiter: u8,
-    file_extension: String,
-    statistics: Statistics,
-}
-
-impl CsvFile {
-    /// Attempt to initialize a new `CsvFile` from a file path
-    pub fn try_new(path: &str, options: CsvReadOptions) -> Result<Self> {
-        let schema = Arc::new(match options.schema {
-            Some(s) => s.clone(),
-            None => {
-                let mut filenames: Vec<String> = vec![];
-                common::build_file_list(path, &mut filenames, options.file_extension)?;
-                if filenames.is_empty() {
-                    return Err(DataFusionError::Plan(format!(
-                        "No files found at {path} with file extension {file_extension}",
-                        path = path,
-                        file_extension = options.file_extension
-                    )));
-                }
-                CsvExec::try_infer_schema(&filenames, &options)?
-            }
-        });
-
-        Ok(Self {
-            path: String::from(path),
-            schema,
-            has_header: options.has_header,
-            delimiter: options.delimiter,
-            file_extension: String::from(options.file_extension),
-            statistics: Statistics::default(),
-        })
-    }
-
-    /// Get the path for the CSV file(s) represented by this CsvFile instance
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-
-    /// Determine whether the CSV file(s) represented by this CsvFile instance have a header row
-    pub fn has_header(&self) -> bool {
-        self.has_header
-    }
-
-    /// Get the delimiter for the CSV file(s) represented by this CsvFile instance
-    pub fn delimiter(&self) -> u8 {
-        self.delimiter
-    }
-
-    /// Get the file extension for the CSV file(s) represented by this CsvFile instance
-    pub fn file_extension(&self) -> &str {
-        &self.file_extension
-    }
-}
-
-impl TableProvider for CsvFile {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        _filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(CsvExec::try_new(
-            &self.path,
-            CsvReadOptions::new()
-                .schema(&self.schema)
-                .has_header(self.has_header)
-                .delimiter(self.delimiter)
-                .file_extension(self.file_extension.as_str()),
-            projection.clone(),
-            limit
-                .map(|l| std::cmp::min(l, batch_size))
-                .unwrap_or(batch_size),
-            limit,
-        )?))
-    }
-
-    fn statistics(&self) -> Statistics {
-        self.statistics.clone()
-    }
-}
diff --git a/rust/datafusion/src/datasource/datasource.rs b/rust/datafusion/src/datasource/datasource.rs
deleted file mode 100644
index e2b07336486..00000000000
--- a/rust/datafusion/src/datasource/datasource.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Data source traits
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::Result;
-use crate::logical_plan::Expr;
-use crate::physical_plan::ExecutionPlan;
-use crate::{arrow::datatypes::SchemaRef, scalar::ScalarValue};
-
-/// This table statistics are estimates.
-/// It can not be used directly in the precise compute
-#[derive(Debug, Clone, Default)]
-pub struct Statistics {
-    /// The number of table rows
-    pub num_rows: Option<usize>,
-    /// total byte of the table rows
-    pub total_byte_size: Option<usize>,
-    /// Statistics on a column level
-    pub column_statistics: Option<Vec<ColumnStatistics>>,
-}
-/// This table statistics are estimates about column
-#[derive(Clone, Debug, PartialEq)]
-pub struct ColumnStatistics {
-    /// Number of null values on column
-    pub null_count: Option<usize>,
-    /// Maximum value of column
-    pub max_value: Option<ScalarValue>,
-    /// Minimum value of column
-    pub min_value: Option<ScalarValue>,
-    /// Number of distinct values
-    pub distinct_count: Option<usize>,
-}
-
-/// Indicates whether and how a filter expression can be handled by a
-/// TableProvider for table scans.
-#[derive(Debug, Clone)]
-pub enum TableProviderFilterPushDown {
-    /// The expression cannot be used by the provider.
-    Unsupported,
-    /// The expression can be used to help minimise the data retrieved,
-    /// but the provider cannot guarantee that all returned tuples
-    /// satisfy the filter. The Filter plan node containing this expression
-    /// will be preserved.
-    Inexact,
-    /// The provider guarantees that all returned data satisfies this
-    /// filter expression. The Filter plan node containing this expression
-    /// will be removed.
-    Exact,
-}
-
-/// Source table
-pub trait TableProvider: Sync + Send {
-    /// Returns the table provider as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Get a reference to the schema for this table
-    fn schema(&self) -> SchemaRef;
-
-    /// Create an ExecutionPlan that will scan the table.
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        filters: &[Expr],
-        // limit can be used to reduce the amount scanned
-        // from the datasource as a performance optimization.
-        // If set, it contains the amount of rows needed by the `LogicalPlan`,
-        // The datasource should return *at least* this number of rows if available.
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// Returns the table Statistics
-    /// Statistics should be optional because not all data sources can provide statistics.
-    fn statistics(&self) -> Statistics;
-
-    /// Tests whether the table provider can make use of a filter expression
-    /// to optimise data retrieval.
-    fn supports_filter_pushdown(
-        &self,
-        _filter: &Expr,
-    ) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Unsupported)
-    }
-}
diff --git a/rust/datafusion/src/datasource/empty.rs b/rust/datafusion/src/datasource/empty.rs
deleted file mode 100644
index e6140cdb8de..00000000000
--- a/rust/datafusion/src/datasource/empty.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! An empty plan that is usefull for testing and generating plans without mapping them to actual data.
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::datatypes::*;
-
-use crate::datasource::datasource::Statistics;
-use crate::datasource::TableProvider;
-use crate::error::Result;
-use crate::logical_plan::Expr;
-use crate::physical_plan::{empty::EmptyExec, ExecutionPlan};
-
-/// A table with a schema but no data.
-pub struct EmptyTable {
-    schema: SchemaRef,
-}
-
-impl EmptyTable {
-    /// Initialize a new `EmptyTable` from a schema.
-    pub fn new(schema: SchemaRef) -> Self {
-        Self { schema }
-    }
-}
-
-impl TableProvider for EmptyTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // even though there is no data, projections apply
-        let projection = match projection.clone() {
-            Some(p) => p,
-            None => (0..self.schema.fields().len()).collect(),
-        };
-        let projected_schema = Schema::new(
-            projection
-                .iter()
-                .map(|i| self.schema.field(*i).clone())
-                .collect(),
-        );
-        Ok(Arc::new(EmptyExec::new(false, Arc::new(projected_schema))))
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics {
-            num_rows: Some(0),
-            total_byte_size: Some(0),
-            column_statistics: None,
-        }
-    }
-}
diff --git a/rust/datafusion/src/datasource/memory.rs b/rust/datafusion/src/datasource/memory.rs
deleted file mode 100644
index af404808702..00000000000
--- a/rust/datafusion/src/datasource/memory.rs
+++ /dev/null
@@ -1,472 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! In-memory data source for presenting a Vec<RecordBatch> as a data source that can be
-//! queried by DataFusion. This allows data to be pre-loaded into memory and then
-//! repeatedly queried without incurring additional file I/O overhead.
-
-use futures::StreamExt;
-use log::debug;
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::datatypes::{Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Expr;
-use crate::physical_plan::common;
-use crate::physical_plan::memory::MemoryExec;
-use crate::physical_plan::ExecutionPlan;
-use crate::{
-    datasource::datasource::Statistics,
-    physical_plan::{repartition::RepartitionExec, Partitioning},
-};
-
-use super::datasource::ColumnStatistics;
-
-/// In-memory table
-pub struct MemTable {
-    schema: SchemaRef,
-    batches: Vec<Vec<RecordBatch>>,
-    statistics: Statistics,
-}
-
-// Calculates statistics based on partitions
-fn calculate_statistics(
-    schema: &SchemaRef,
-    partitions: &[Vec<RecordBatch>],
-) -> Statistics {
-    let num_rows: usize = partitions
-        .iter()
-        .flat_map(|batches| batches.iter().map(RecordBatch::num_rows))
-        .sum();
-
-    let mut null_count: Vec<usize> = vec![0; schema.fields().len()];
-    for partition in partitions.iter() {
-        for batch in partition {
-            for (i, array) in batch.columns().iter().enumerate() {
-                null_count[i] += array.null_count();
-            }
-        }
-    }
-
-    let column_statistics = Some(
-        null_count
-            .iter()
-            .map(|null_count| ColumnStatistics {
-                null_count: Some(*null_count),
-                distinct_count: None,
-                max_value: None,
-                min_value: None,
-            })
-            .collect(),
-    );
-
-    Statistics {
-        num_rows: Some(num_rows),
-        total_byte_size: None,
-        column_statistics,
-    }
-}
-
-impl MemTable {
-    /// Create a new in-memory table from the provided schema and record batches
-    pub fn try_new(schema: SchemaRef, partitions: Vec<Vec<RecordBatch>>) -> Result<Self> {
-        if partitions
-            .iter()
-            .flatten()
-            .all(|batches| schema.contains(&batches.schema()))
-        {
-            let statistics = calculate_statistics(&schema, &partitions);
-            debug!("MemTable statistics: {:?}", statistics);
-
-            Ok(Self {
-                schema,
-                batches: partitions,
-                statistics,
-            })
-        } else {
-            Err(DataFusionError::Plan(
-                "Mismatch between schema and batches".to_string(),
-            ))
-        }
-    }
-
-    /// Create a mem table by reading from another data source
-    pub async fn load(
-        t: Arc<dyn TableProvider>,
-        batch_size: usize,
-        output_partitions: Option<usize>,
-    ) -> Result<Self> {
-        let schema = t.schema();
-        let exec = t.scan(&None, batch_size, &[], None)?;
-        let partition_count = exec.output_partitioning().partition_count();
-
-        let tasks = (0..partition_count)
-            .map(|part_i| {
-                let exec = exec.clone();
-                tokio::spawn(async move {
-                    let stream = exec.execute(part_i).await?;
-                    common::collect(stream).await
-                })
-            })
-            // this collect *is needed* so that the join below can
-            // switch between tasks
-            .collect::<Vec<_>>();
-
-        let mut data: Vec<Vec<RecordBatch>> =
-            Vec::with_capacity(exec.output_partitioning().partition_count());
-        for task in tasks {
-            let result = task.await.expect("MemTable::load could not join task")?;
-            data.push(result);
-        }
-
-        let exec = MemoryExec::try_new(&data, schema.clone(), None)?;
-
-        if let Some(num_partitions) = output_partitions {
-            let exec = RepartitionExec::try_new(
-                Arc::new(exec),
-                Partitioning::RoundRobinBatch(num_partitions),
-            )?;
-
-            // execute and collect results
-            let mut output_partitions = vec![];
-            for i in 0..exec.output_partitioning().partition_count() {
-                // execute this *output* partition and collect all batches
-                let mut stream = exec.execute(i).await?;
-                let mut batches = vec![];
-                while let Some(result) = stream.next().await {
-                    batches.push(result?);
-                }
-                output_partitions.push(batches);
-            }
-
-            return MemTable::try_new(schema.clone(), output_partitions);
-        }
-        MemTable::try_new(schema.clone(), data)
-    }
-}
-
-impl TableProvider for MemTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let columns: Vec<usize> = match projection {
-            Some(p) => p.clone(),
-            None => {
-                let l = self.schema.fields().len();
-                let mut v = Vec::with_capacity(l);
-                for i in 0..l {
-                    v.push(i);
-                }
-                v
-            }
-        };
-
-        let projected_columns: Result<Vec<Field>> = columns
-            .iter()
-            .map(|i| {
-                if *i < self.schema.fields().len() {
-                    Ok(self.schema.field(*i).clone())
-                } else {
-                    Err(DataFusionError::Internal(
-                        "Projection index out of range".to_string(),
-                    ))
-                }
-            })
-            .collect();
-
-        let projected_schema = Arc::new(Schema::new(projected_columns?));
-
-        Ok(Arc::new(MemoryExec::try_new(
-            &self.batches.clone(),
-            projected_schema,
-            projection.clone(),
-        )?))
-    }
-
-    fn statistics(&self) -> Statistics {
-        self.statistics.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use futures::StreamExt;
-    use std::collections::HashMap;
-
-    #[tokio::test]
-    async fn test_with_projection() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-            Field::new("d", DataType::Int32, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-                Arc::new(Int32Array::from(vec![None, None, Some(9)])),
-            ],
-        )?;
-
-        let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-
-        assert_eq!(provider.statistics().num_rows, Some(3));
-        assert_eq!(
-            provider.statistics().column_statistics,
-            Some(vec![
-                ColumnStatistics {
-                    null_count: Some(0),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-                ColumnStatistics {
-                    null_count: Some(0),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-                ColumnStatistics {
-                    null_count: Some(0),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-                ColumnStatistics {
-                    null_count: Some(2),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                },
-            ])
-        );
-
-        // scan with projection
-        let exec = provider.scan(&Some(vec![2, 1]), 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        let batch2 = it.next().await.unwrap()?;
-        assert_eq!(2, batch2.schema().fields().len());
-        assert_eq!("c", batch2.schema().field(0).name());
-        assert_eq!("b", batch2.schema().field(1).name());
-        assert_eq!(2, batch2.num_columns());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_without_projection() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-
-        let exec = provider.scan(&None, 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        let batch1 = it.next().await.unwrap()?;
-        assert_eq!(3, batch1.schema().fields().len());
-        assert_eq!(3, batch1.num_columns());
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_invalid_projection() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let provider = MemTable::try_new(schema, vec![vec![batch]])?;
-
-        let projection: Vec<usize> = vec![0, 4];
-
-        match provider.scan(&Some(projection), 1024, &[], None) {
-            Err(DataFusionError::Internal(e)) => {
-                assert_eq!("\"Projection index out of range\"", format!("{:?}", e))
-            }
-            _ => panic!("Scan should failed on invalid projection"),
-        };
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_validation_incompatible_column() -> Result<()> {
-        let schema1 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let schema2 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Float64, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema1,
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        match MemTable::try_new(schema2, vec![vec![batch]]) {
-            Err(DataFusionError::Plan(e)) => assert_eq!(
-                "\"Mismatch between schema and batches\"",
-                format!("{:?}", e)
-            ),
-            _ => panic!("MemTable::new should have failed due to schema mismatch"),
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_validation_different_column_count() -> Result<()> {
-        let schema1 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let schema2 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema1,
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![7, 5, 9])),
-            ],
-        )?;
-
-        match MemTable::try_new(schema2, vec![vec![batch]]) {
-            Err(DataFusionError::Plan(e)) => assert_eq!(
-                "\"Mismatch between schema and batches\"",
-                format!("{:?}", e)
-            ),
-            _ => panic!("MemTable::new should have failed due to schema mismatch"),
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_merged_schema() -> Result<()> {
-        let mut metadata = HashMap::new();
-        metadata.insert("foo".to_string(), "bar".to_string());
-
-        let schema1 = Schema::new_with_metadata(
-            vec![
-                Field::new("a", DataType::Int32, false),
-                Field::new("b", DataType::Int32, false),
-                Field::new("c", DataType::Int32, false),
-            ],
-            // test for comparing metadata
-            metadata,
-        );
-
-        let schema2 = Schema::new(vec![
-            // test for comparing nullability
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]);
-
-        let merged_schema = Schema::try_merge(vec![schema1.clone(), schema2.clone()])?;
-
-        let batch1 = RecordBatch::try_new(
-            Arc::new(schema1),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let batch2 = RecordBatch::try_new(
-            Arc::new(schema2),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-            ],
-        )?;
-
-        let provider =
-            MemTable::try_new(Arc::new(merged_schema), vec![vec![batch1, batch2]])?;
-
-        let exec = provider.scan(&None, 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        let batch1 = it.next().await.unwrap()?;
-        assert_eq!(3, batch1.schema().fields().len());
-        assert_eq!(3, batch1.num_columns());
-        assert_eq!(provider.statistics().num_rows, Some(6));
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/datasource/mod.rs b/rust/datafusion/src/datasource/mod.rs
deleted file mode 100644
index 099098dd6f6..00000000000
--- a/rust/datafusion/src/datasource/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFusion data sources
-
-pub mod csv;
-pub mod datasource;
-pub mod empty;
-pub mod memory;
-pub mod parquet;
-
-pub use self::csv::{CsvFile, CsvReadOptions};
-pub use self::datasource::TableProvider;
-pub use self::memory::MemTable;
diff --git a/rust/datafusion/src/datasource/parquet.rs b/rust/datafusion/src/datasource/parquet.rs
deleted file mode 100644
index 30e47df5f64..00000000000
--- a/rust/datafusion/src/datasource/parquet.rs
+++ /dev/null
@@ -1,373 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet data source
-
-use std::any::Any;
-use std::string::String;
-use std::sync::Arc;
-
-use arrow::datatypes::*;
-
-use crate::datasource::datasource::Statistics;
-use crate::datasource::TableProvider;
-use crate::error::Result;
-use crate::logical_plan::{combine_filters, Expr};
-use crate::physical_plan::parquet::ParquetExec;
-use crate::physical_plan::ExecutionPlan;
-
-use super::datasource::TableProviderFilterPushDown;
-
-/// Table-based representation of a `ParquetFile`.
-pub struct ParquetTable {
-    path: String,
-    schema: SchemaRef,
-    statistics: Statistics,
-    max_concurrency: usize,
-}
-
-impl ParquetTable {
-    /// Attempt to initialize a new `ParquetTable` from a file path.
-    pub fn try_new(path: &str, max_concurrency: usize) -> Result<Self> {
-        let parquet_exec = ParquetExec::try_from_path(path, None, None, 0, 1, None)?;
-        let schema = parquet_exec.schema();
-        Ok(Self {
-            path: path.to_string(),
-            schema,
-            statistics: parquet_exec.statistics().to_owned(),
-            max_concurrency,
-        })
-    }
-
-    /// Get the path for the Parquet file(s) represented by this ParquetTable instance
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-}
-
-impl TableProvider for ParquetTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this parquet file.
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn supports_filter_pushdown(
-        &self,
-        _filter: &Expr,
-    ) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Inexact)
-    }
-
-    /// Scan the file(s), using the provided projection, and return one BatchIterator per
-    /// partition.
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let predicate = combine_filters(filters);
-        Ok(Arc::new(ParquetExec::try_from_path(
-            &self.path,
-            projection.clone(),
-            predicate,
-            limit
-                .map(|l| std::cmp::min(l, batch_size))
-                .unwrap_or(batch_size),
-            self.max_concurrency,
-            limit,
-        )?))
-    }
-
-    fn statistics(&self) -> Statistics {
-        self.statistics.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::{
-        BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array,
-        TimestampNanosecondArray,
-    };
-    use arrow::record_batch::RecordBatch;
-    use futures::StreamExt;
-
-    #[tokio::test]
-    async fn read_small_batches() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = None;
-        let exec = table.scan(&projection, 2, &[], None)?;
-        let stream = exec.execute(0).await?;
-
-        let _ = stream
-            .map(|batch| {
-                let batch = batch.unwrap();
-                assert_eq!(11, batch.num_columns());
-                assert_eq!(2, batch.num_rows());
-            })
-            .fold(0, |acc, _| async move { acc + 1i32 })
-            .await;
-
-        // test metadata
-        assert_eq!(table.statistics().num_rows, Some(8));
-        assert_eq!(table.statistics().total_byte_size, Some(671));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-
-        let x: Vec<String> = table
-            .schema()
-            .fields()
-            .iter()
-            .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
-            .collect();
-        let y = x.join("\n");
-        assert_eq!(
-            "id: Int32\n\
-             bool_col: Boolean\n\
-             tinyint_col: Int32\n\
-             smallint_col: Int32\n\
-             int_col: Int32\n\
-             bigint_col: Int64\n\
-             float_col: Float32\n\
-             double_col: Float64\n\
-             date_string_col: Binary\n\
-             string_col: Binary\n\
-             timestamp_col: Timestamp(Nanosecond, None)",
-            y
-        );
-
-        let projection = None;
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(11, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_bool_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![1]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .unwrap();
-        let mut values: Vec<bool> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!(
-            "[true, false, true, false, true, false, true, false]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_i32_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![0]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        let mut values: Vec<i32> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!("[4, 5, 6, 7, 2, 3, 0, 1]", format!("{:?}", values));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_i96_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![10]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<TimestampNanosecondArray>()
-            .unwrap();
-        let mut values: Vec<i64> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!("[1235865600000000000, 1235865660000000000, 1238544000000000000, 1238544060000000000, 1233446400000000000, 1233446460000000000, 1230768000000000000, 1230768060000000000]", format!("{:?}", values));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_f32_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![6]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Float32Array>()
-            .unwrap();
-        let mut values: Vec<f32> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!(
-            "[0.0, 1.1, 0.0, 1.1, 0.0, 1.1, 0.0, 1.1]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_f64_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![7]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .unwrap();
-        let mut values: Vec<f64> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(array.value(i));
-        }
-
-        assert_eq!(
-            "[0.0, 10.1, 0.0, 10.1, 0.0, 10.1, 0.0, 10.1]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn read_binary_alltypes_plain_parquet() -> Result<()> {
-        let table = load_table("alltypes_plain.parquet")?;
-        let projection = Some(vec![9]);
-        let batch = get_first_batch(table, &projection).await?;
-
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(8, batch.num_rows());
-
-        let array = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .unwrap();
-        let mut values: Vec<&str> = vec![];
-        for i in 0..batch.num_rows() {
-            values.push(std::str::from_utf8(array.value(i)).unwrap());
-        }
-
-        assert_eq!(
-            "[\"0\", \"1\", \"0\", \"1\", \"0\", \"1\", \"0\", \"1\"]",
-            format!("{:?}", values)
-        );
-
-        Ok(())
-    }
-
-    fn load_table(name: &str) -> Result<Arc<dyn TableProvider>> {
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let filename = format!("{}/{}", testdata, name);
-        let table = ParquetTable::try_new(&filename, 2)?;
-        Ok(Arc::new(table))
-    }
-
-    async fn get_first_batch(
-        table: Arc<dyn TableProvider>,
-        projection: &Option<Vec<usize>>,
-    ) -> Result<RecordBatch> {
-        let exec = table.scan(projection, 1024, &[], None)?;
-        let mut it = exec.execute(0).await?;
-        it.next()
-            .await
-            .expect("should have received at least one batch")
-            .map_err(|e| e.into())
-    }
-
-    #[test]
-    fn combine_zero_filters() {
-        let result = combine_filters(&[]);
-        assert_eq!(result, None);
-    }
-
-    #[test]
-    fn combine_one_filter() {
-        use crate::logical_plan::{binary_expr, col, lit, Operator};
-        let filter = binary_expr(col("c1"), Operator::Lt, lit(1));
-        let result = combine_filters(&[filter.clone()]);
-        assert_eq!(result, Some(filter));
-    }
-
-    #[test]
-    fn combine_multiple_filters() {
-        use crate::logical_plan::{and, binary_expr, col, lit, Operator};
-        let filter1 = binary_expr(col("c1"), Operator::Lt, lit(1));
-        let filter2 = binary_expr(col("c2"), Operator::Lt, lit(2));
-        let filter3 = binary_expr(col("c3"), Operator::Lt, lit(3));
-        let result =
-            combine_filters(&[filter1.clone(), filter2.clone(), filter3.clone()]);
-        assert_eq!(result, Some(and(and(filter1, filter2), filter3)));
-    }
-}
diff --git a/rust/datafusion/src/error.rs b/rust/datafusion/src/error.rs
deleted file mode 100644
index 903faeabf69..00000000000
--- a/rust/datafusion/src/error.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFusion error types
-
-use std::error;
-use std::fmt::{Display, Formatter};
-use std::io;
-use std::result;
-
-use arrow::error::ArrowError;
-use parquet::errors::ParquetError;
-use sqlparser::parser::ParserError;
-
-/// Result type for operations that could result in an [DataFusionError]
-pub type Result<T> = result::Result<T, DataFusionError>;
-
-/// DataFusion error
-#[derive(Debug)]
-#[allow(missing_docs)]
-pub enum DataFusionError {
-    /// Error returned by arrow.
-    ArrowError(ArrowError),
-    /// Wraps an error from the Parquet crate
-    ParquetError(ParquetError),
-    /// Error associated to I/O operations and associated traits.
-    IoError(io::Error),
-    /// Error returned when SQL is syntactically incorrect.
-    SQL(ParserError),
-    /// Error returned on a branch that we know it is possible
-    /// but to which we still have no implementation for.
-    /// Often, these errors are tracked in our issue tracker.
-    NotImplemented(String),
-    /// Error returned as a consequence of an error in DataFusion.
-    /// This error should not happen in normal usage of DataFusion.
-    // DataFusions has internal invariants that we are unable to ask the compiler to check for us.
-    // This error is raised when one of those invariants is not verified during execution.
-    Internal(String),
-    /// This error happens whenever a plan is not valid. Examples include
-    /// impossible casts, schema inference not possible and non-unique column names.
-    Plan(String),
-    /// Error returned during execution of the query.
-    /// Examples include files not found, errors in parsing certain types.
-    Execution(String),
-}
-
-impl DataFusionError {
-    /// Wraps this [DataFusionError] as an [arrow::error::ArrowError].
-    pub fn into_arrow_external_error(self) -> ArrowError {
-        ArrowError::from_external_error(Box::new(self))
-    }
-}
-
-impl From<io::Error> for DataFusionError {
-    fn from(e: io::Error) -> Self {
-        DataFusionError::IoError(e)
-    }
-}
-
-impl From<ArrowError> for DataFusionError {
-    fn from(e: ArrowError) -> Self {
-        DataFusionError::ArrowError(e)
-    }
-}
-
-impl From<ParquetError> for DataFusionError {
-    fn from(e: ParquetError) -> Self {
-        DataFusionError::ParquetError(e)
-    }
-}
-
-impl From<ParserError> for DataFusionError {
-    fn from(e: ParserError) -> Self {
-        DataFusionError::SQL(e)
-    }
-}
-
-impl Display for DataFusionError {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match *self {
-            DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            DataFusionError::ParquetError(ref desc) => {
-                write!(f, "Parquet error: {}", desc)
-            }
-            DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
-            DataFusionError::SQL(ref desc) => {
-                write!(f, "SQL error: {:?}", desc)
-            }
-            DataFusionError::NotImplemented(ref desc) => {
-                write!(f, "This feature is not implemented: {}", desc)
-            }
-            DataFusionError::Internal(ref desc) => {
-                write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
-                    code and we would welcome that you file an bug report in our issue tracker", desc)
-            }
-            DataFusionError::Plan(ref desc) => {
-                write!(f, "Error during planning: {}", desc)
-            }
-            DataFusionError::Execution(ref desc) => {
-                write!(f, "Execution error: {}", desc)
-            }
-        }
-    }
-}
-
-impl error::Error for DataFusionError {}
diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
deleted file mode 100644
index c83ca4d8de5..00000000000
--- a/rust/datafusion/src/execution/context.rs
+++ /dev/null
@@ -1,3123 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! ExecutionContext contains methods for registering data sources and executing queries
-use crate::{
-    catalog::{
-        catalog::{CatalogList, MemoryCatalogList},
-        information_schema::CatalogWithInformationSchema,
-    },
-    optimizer::hash_build_probe_order::HashBuildProbeOrder,
-    physical_optimizer::optimizer::PhysicalOptimizerRule,
-};
-use log::debug;
-use std::fs;
-use std::path::Path;
-use std::string::String;
-use std::sync::Arc;
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Mutex,
-};
-
-use futures::{StreamExt, TryStreamExt};
-use tokio::task::{self, JoinHandle};
-
-use arrow::csv;
-
-use crate::catalog::{
-    catalog::{CatalogProvider, MemoryCatalogProvider},
-    schema::{MemorySchemaProvider, SchemaProvider},
-    ResolvedTableReference, TableReference,
-};
-use crate::datasource::csv::CsvFile;
-use crate::datasource::parquet::ParquetTable;
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::execution::dataframe_impl::DataFrameImpl;
-use crate::logical_plan::{
-    FunctionRegistry, LogicalPlan, LogicalPlanBuilder, ToDFSchema,
-};
-use crate::optimizer::constant_folding::ConstantFolding;
-use crate::optimizer::filter_push_down::FilterPushDown;
-use crate::optimizer::limit_push_down::LimitPushDown;
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::projection_push_down::ProjectionPushDown;
-use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
-use crate::physical_optimizer::merge_exec::AddMergeExec;
-use crate::physical_optimizer::repartition::Repartition;
-
-use crate::physical_plan::csv::CsvReadOptions;
-use crate::physical_plan::planner::DefaultPhysicalPlanner;
-use crate::physical_plan::udf::ScalarUDF;
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_plan::PhysicalPlanner;
-use crate::sql::{
-    parser::{DFParser, FileType},
-    planner::{ContextProvider, SqlToRel},
-};
-use crate::variable::{VarProvider, VarType};
-use crate::{dataframe::DataFrame, physical_plan::udaf::AggregateUDF};
-use parquet::arrow::ArrowWriter;
-use parquet::file::properties::WriterProperties;
-
-/// ExecutionContext is the main interface for executing queries with DataFusion. The context
-/// provides the following functionality:
-///
-/// * Create DataFrame from a CSV or Parquet data source.
-/// * Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
-/// * Register a custom data source that can be referenced from a SQL query.
-/// * Execution a SQL query
-///
-/// The following example demonstrates how to use the context to execute a query against a CSV
-/// data source using the DataFrame API:
-///
-/// ```
-/// use datafusion::prelude::*;
-/// # use datafusion::error::Result;
-/// # fn main() -> Result<()> {
-/// let mut ctx = ExecutionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-/// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
-///            .limit(100)?;
-/// let results = df.collect();
-/// # Ok(())
-/// # }
-/// ```
-///
-/// The following example demonstrates how to execute the same query using SQL:
-///
-/// ```
-/// use datafusion::prelude::*;
-///
-/// # use datafusion::error::Result;
-/// # fn main() -> Result<()> {
-/// let mut ctx = ExecutionContext::new();
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
-/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Clone)]
-pub struct ExecutionContext {
-    /// Internal state for the context
-    pub state: Arc<Mutex<ExecutionContextState>>,
-}
-
-impl ExecutionContext {
-    /// Creates a new execution context using a default configuration.
-    pub fn new() -> Self {
-        Self::with_config(ExecutionConfig::new())
-    }
-
-    /// Creates a new execution context using the provided configuration.
-    pub fn with_config(config: ExecutionConfig) -> Self {
-        let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
-
-        if config.create_default_catalog_and_schema {
-            let default_catalog = MemoryCatalogProvider::new();
-
-            default_catalog.register_schema(
-                config.default_schema.clone(),
-                Arc::new(MemorySchemaProvider::new()),
-            );
-
-            let default_catalog: Arc<dyn CatalogProvider> = if config.information_schema {
-                Arc::new(CatalogWithInformationSchema::new(
-                    catalog_list.clone(),
-                    Arc::new(default_catalog),
-                ))
-            } else {
-                Arc::new(default_catalog)
-            };
-
-            catalog_list
-                .register_catalog(config.default_catalog.clone(), default_catalog);
-        }
-
-        Self {
-            state: Arc::new(Mutex::new(ExecutionContextState {
-                catalog_list,
-                scalar_functions: HashMap::new(),
-                var_provider: HashMap::new(),
-                aggregate_functions: HashMap::new(),
-                config,
-            })),
-        }
-    }
-
-    /// Creates a dataframe that will execute a SQL query.
-    pub fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>> {
-        let plan = self.create_logical_plan(sql)?;
-        match plan {
-            LogicalPlan::CreateExternalTable {
-                ref schema,
-                ref name,
-                ref location,
-                ref file_type,
-                ref has_header,
-            } => match file_type {
-                FileType::CSV => {
-                    self.register_csv(
-                        name,
-                        location,
-                        CsvReadOptions::new()
-                            .schema(&schema.as_ref().to_owned().into())
-                            .has_header(*has_header),
-                    )?;
-                    let plan = LogicalPlanBuilder::empty(false).build()?;
-                    Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan)))
-                }
-                FileType::Parquet => {
-                    self.register_parquet(name, location)?;
-                    let plan = LogicalPlanBuilder::empty(false).build()?;
-                    Ok(Arc::new(DataFrameImpl::new(self.state.clone(), &plan)))
-                }
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Unsupported file type {:?}.",
-                    file_type
-                ))),
-            },
-
-            plan => Ok(Arc::new(DataFrameImpl::new(
-                self.state.clone(),
-                &self.optimize(&plan)?,
-            ))),
-        }
-    }
-
-    /// Creates a logical plan.
-    ///
-    /// This function is intended for internal use and should not be called directly.
-    pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan> {
-        let statements = DFParser::parse_sql(sql)?;
-
-        if statements.len() != 1 {
-            return Err(DataFusionError::NotImplemented(
-                "The context currently only supports a single SQL statement".to_string(),
-            ));
-        }
-
-        // create a query planner
-        let state = self.state.lock().unwrap().clone();
-        let query_planner = SqlToRel::new(&state);
-        query_planner.statement_to_plan(&statements[0])
-    }
-
-    /// Registers a variable provider within this context.
-    pub fn register_variable(
-        &mut self,
-        variable_type: VarType,
-        provider: Arc<dyn VarProvider + Send + Sync>,
-    ) {
-        self.state
-            .lock()
-            .unwrap()
-            .var_provider
-            .insert(variable_type, provider);
-    }
-
-    /// Registers a scalar UDF within this context.
-    ///
-    /// Note in SQL queries, function names are looked up using
-    /// lowercase unless the query uses quotes. For example,
-    ///
-    /// `SELECT MY_FUNC(x)...` will look for a function named `"my_func"`
-    /// `SELECT "my_FUNC"(x)` will look for a function named `"my_FUNC"`
-    pub fn register_udf(&mut self, f: ScalarUDF) {
-        self.state
-            .lock()
-            .unwrap()
-            .scalar_functions
-            .insert(f.name.clone(), Arc::new(f));
-    }
-
-    /// Registers an aggregate UDF within this context.
-    ///
-    /// Note in SQL queries, aggregate names are looked up using
-    /// lowercase unless the query uses quotes. For example,
-    ///
-    /// `SELECT MY_UDAF(x)...` will look for an aggregate named `"my_udaf"`
-    /// `SELECT "my_UDAF"(x)` will look for an aggregate named `"my_UDAF"`
-    pub fn register_udaf(&mut self, f: AggregateUDF) {
-        self.state
-            .lock()
-            .unwrap()
-            .aggregate_functions
-            .insert(f.name.clone(), Arc::new(f));
-    }
-
-    /// Creates a DataFrame for reading a CSV data source.
-    pub fn read_csv(
-        &mut self,
-        filename: &str,
-        options: CsvReadOptions,
-    ) -> Result<Arc<dyn DataFrame>> {
-        Ok(Arc::new(DataFrameImpl::new(
-            self.state.clone(),
-            &LogicalPlanBuilder::scan_csv(&filename, options, None)?.build()?,
-        )))
-    }
-
-    /// Creates a DataFrame for reading a Parquet data source.
-    pub fn read_parquet(&mut self, filename: &str) -> Result<Arc<dyn DataFrame>> {
-        Ok(Arc::new(DataFrameImpl::new(
-            self.state.clone(),
-            &LogicalPlanBuilder::scan_parquet(
-                &filename,
-                None,
-                self.state.lock().unwrap().config.concurrency,
-            )?
-            .build()?,
-        )))
-    }
-
-    /// Creates a DataFrame for reading a custom TableProvider.
-    pub fn read_table(
-        &mut self,
-        provider: Arc<dyn TableProvider>,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let schema = provider.schema();
-        let table_scan = LogicalPlan::TableScan {
-            table_name: "".to_string(),
-            source: provider,
-            projected_schema: schema.to_dfschema_ref()?,
-            projection: None,
-            filters: vec![],
-            limit: None,
-        };
-        Ok(Arc::new(DataFrameImpl::new(
-            self.state.clone(),
-            &LogicalPlanBuilder::from(&table_scan).build()?,
-        )))
-    }
-
-    /// Registers a CSV data source so that it can be referenced from SQL statements
-    /// executed against this context.
-    pub fn register_csv(
-        &mut self,
-        name: &str,
-        filename: &str,
-        options: CsvReadOptions,
-    ) -> Result<()> {
-        self.register_table(name, Arc::new(CsvFile::try_new(filename, options)?))?;
-        Ok(())
-    }
-
-    /// Registers a Parquet data source so that it can be referenced from SQL statements
-    /// executed against this context.
-    pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()> {
-        let table = ParquetTable::try_new(
-            &filename,
-            self.state.lock().unwrap().config.concurrency,
-        )?;
-        self.register_table(name, Arc::new(table))?;
-        Ok(())
-    }
-
-    /// Registers a named catalog using a custom `CatalogProvider` so that
-    /// it can be referenced from SQL statements executed against this
-    /// context.
-    ///
-    /// Returns the `CatalogProvider` previously registered for this
-    /// name, if any
-    pub fn register_catalog(
-        &self,
-        name: impl Into<String>,
-        catalog: Arc<dyn CatalogProvider>,
-    ) -> Option<Arc<dyn CatalogProvider>> {
-        let name = name.into();
-
-        let state = self.state.lock().unwrap();
-        let catalog = if state.config.information_schema {
-            Arc::new(CatalogWithInformationSchema::new(
-                state.catalog_list.clone(),
-                catalog,
-            ))
-        } else {
-            catalog
-        };
-
-        state.catalog_list.register_catalog(name, catalog)
-    }
-
-    /// Retrieves a `CatalogProvider` instance by name
-    pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
-        self.state.lock().unwrap().catalog_list.catalog(name)
-    }
-
-    /// Registers a table using a custom `TableProvider` so that
-    /// it can be referenced from SQL statements executed against this
-    /// context.
-    ///
-    /// Returns the `TableProvider` previously registered for this
-    /// reference, if any
-    pub fn register_table<'a>(
-        &'a mut self,
-        table_ref: impl Into<TableReference<'a>>,
-        provider: Arc<dyn TableProvider>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        let table_ref = table_ref.into();
-        self.state
-            .lock()
-            .unwrap()
-            .schema_for_ref(table_ref)?
-            .register_table(table_ref.table().to_owned(), provider)
-    }
-
-    /// Deregisters the given table.
-    ///
-    /// Returns the registered provider, if any
-    pub fn deregister_table<'a>(
-        &'a mut self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> Result<Option<Arc<dyn TableProvider>>> {
-        let table_ref = table_ref.into();
-        self.state
-            .lock()
-            .unwrap()
-            .schema_for_ref(table_ref)?
-            .deregister_table(table_ref.table())
-    }
-
-    /// Retrieves a DataFrame representing a table previously registered by calling the
-    /// register_table function.
-    ///
-    /// Returns an error if no table has been registered with the provided reference.
-    pub fn table<'a>(
-        &self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let table_ref = table_ref.into();
-        let schema = self.state.lock().unwrap().schema_for_ref(table_ref)?;
-
-        match schema.table(table_ref.table()) {
-            Some(ref provider) => {
-                let schema = provider.schema();
-                let table_scan = LogicalPlan::TableScan {
-                    table_name: table_ref.table().to_owned(),
-                    source: Arc::clone(provider),
-                    projected_schema: schema.to_dfschema_ref()?,
-                    projection: None,
-                    filters: vec![],
-                    limit: None,
-                };
-                Ok(Arc::new(DataFrameImpl::new(
-                    self.state.clone(),
-                    &LogicalPlanBuilder::from(&table_scan).build()?,
-                )))
-            }
-            _ => Err(DataFusionError::Plan(format!(
-                "No table named '{}'",
-                table_ref.table()
-            ))),
-        }
-    }
-
-    /// Returns the set of available tables in the default catalog and schema.
-    ///
-    /// Use [`table`] to get a specific table.
-    ///
-    /// [`table`]: ExecutionContext::table
-    #[deprecated(
-        note = "Please use the catalog provider interface (`ExecutionContext::catalog`) to examine available catalogs, schemas, and tables"
-    )]
-    pub fn tables(&self) -> Result<HashSet<String>> {
-        Ok(self
-            .state
-            .lock()
-            .unwrap()
-            // a bare reference will always resolve to the default catalog and schema
-            .schema_for_ref(TableReference::Bare { table: "" })?
-            .table_names()
-            .iter()
-            .cloned()
-            .collect())
-    }
-
-    /// Optimizes the logical plan by applying optimizer rules.
-    pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        let optimizers = &self.state.lock().unwrap().config.optimizers;
-
-        let mut new_plan = plan.clone();
-        debug!("Logical plan:\n {:?}", plan);
-        for optimizer in optimizers {
-            new_plan = optimizer.optimize(&new_plan)?;
-        }
-        debug!("Optimized logical plan:\n {:?}", new_plan);
-        Ok(new_plan)
-    }
-
-    /// Creates a physical plan from a logical plan.
-    pub fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let state = self.state.lock().unwrap();
-        state
-            .config
-            .query_planner
-            .create_physical_plan(logical_plan, &state)
-    }
-
-    /// Executes a query and writes the results to a partitioned CSV file.
-    pub async fn write_csv(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        path: String,
-    ) -> Result<()> {
-        // create directory to contain the CSV files (one per partition)
-        let fs_path = Path::new(&path);
-        match fs::create_dir(fs_path) {
-            Ok(()) => {
-                let mut tasks = vec![];
-                for i in 0..plan.output_partitioning().partition_count() {
-                    let plan = plan.clone();
-                    let filename = format!("part-{}.csv", i);
-                    let path = fs_path.join(&filename);
-                    let file = fs::File::create(path)?;
-                    let mut writer = csv::Writer::new(file);
-                    let stream = plan.execute(i).await?;
-                    let handle: JoinHandle<Result<()>> = task::spawn(async move {
-                        stream
-                            .map(|batch| writer.write(&batch?))
-                            .try_collect()
-                            .await
-                            .map_err(DataFusionError::from)
-                    });
-                    tasks.push(handle);
-                }
-                futures::future::join_all(tasks).await;
-                Ok(())
-            }
-            Err(e) => Err(DataFusionError::Execution(format!(
-                "Could not create directory {}: {:?}",
-                path, e
-            ))),
-        }
-    }
-
-    /// Executes a query and writes the results to a partitioned Parquet file.
-    pub async fn write_parquet(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        path: String,
-        writer_properties: Option<WriterProperties>,
-    ) -> Result<()> {
-        // create directory to contain the Parquet files (one per partition)
-        let fs_path = Path::new(&path);
-        match fs::create_dir(fs_path) {
-            Ok(()) => {
-                let mut tasks = vec![];
-                for i in 0..plan.output_partitioning().partition_count() {
-                    let plan = plan.clone();
-                    let filename = format!("part-{}.parquet", i);
-                    let path = fs_path.join(&filename);
-                    let file = fs::File::create(path)?;
-                    let mut writer = ArrowWriter::try_new(
-                        file.try_clone().unwrap(),
-                        plan.schema(),
-                        writer_properties.clone(),
-                    )?;
-                    let stream = plan.execute(i).await?;
-                    let handle: JoinHandle<Result<()>> = task::spawn(async move {
-                        stream
-                            .map(|batch| writer.write(&batch?))
-                            .try_collect()
-                            .await
-                            .map_err(DataFusionError::from)?;
-                        writer.close().map_err(DataFusionError::from).map(|_| ())
-                    });
-                    tasks.push(handle);
-                }
-                futures::future::join_all(tasks).await;
-                Ok(())
-            }
-            Err(e) => Err(DataFusionError::Execution(format!(
-                "Could not create directory {}: {:?}",
-                path, e
-            ))),
-        }
-    }
-}
-
-impl From<Arc<Mutex<ExecutionContextState>>> for ExecutionContext {
-    fn from(state: Arc<Mutex<ExecutionContextState>>) -> Self {
-        ExecutionContext { state }
-    }
-}
-
-impl FunctionRegistry for ExecutionContext {
-    fn udfs(&self) -> HashSet<String> {
-        self.state.lock().unwrap().udfs()
-    }
-
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        self.state.lock().unwrap().udf(name)
-    }
-
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        self.state.lock().unwrap().udaf(name)
-    }
-}
-
-/// A planner used to add extensions to DataFusion logical and physical plans.
-pub trait QueryPlanner {
-    /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-}
-
-/// The query planner used if no user defined planner is provided
-struct DefaultQueryPlanner {}
-
-impl QueryPlanner for DefaultQueryPlanner {
-    /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let planner = DefaultPhysicalPlanner::default();
-        planner.create_physical_plan(logical_plan, ctx_state)
-    }
-}
-
-/// Configuration options for execution context
-#[derive(Clone)]
-pub struct ExecutionConfig {
-    /// Number of concurrent threads for query execution.
-    pub concurrency: usize,
-    /// Default batch size when reading data sources
-    pub batch_size: usize,
-    /// Responsible for optimizing a logical plan
-    optimizers: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
-    /// Responsible for optimizing a physical execution plan
-    pub physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
-    /// Responsible for planning `LogicalPlan`s, and `ExecutionPlan`
-    query_planner: Arc<dyn QueryPlanner + Send + Sync>,
-    /// Default catalog name for table resolution
-    default_catalog: String,
-    /// Default schema name for table resolution
-    default_schema: String,
-    /// Whether the default catalog and schema should be created automatically
-    create_default_catalog_and_schema: bool,
-    /// Should DataFusion provide access to `information_schema`
-    /// virtual tables for displaying schema information
-    information_schema: bool,
-    /// Should DataFusion repartition data using the join keys to execute joins in parallel
-    /// using the provided `concurrency` level
-    pub repartition_joins: bool,
-}
-
-impl ExecutionConfig {
-    /// Create an execution config with default setting
-    pub fn new() -> Self {
-        Self {
-            concurrency: num_cpus::get(),
-            batch_size: 8192,
-            optimizers: vec![
-                Arc::new(ConstantFolding::new()),
-                Arc::new(ProjectionPushDown::new()),
-                Arc::new(FilterPushDown::new()),
-                Arc::new(HashBuildProbeOrder::new()),
-                Arc::new(LimitPushDown::new()),
-            ],
-            physical_optimizers: vec![
-                Arc::new(CoalesceBatches::new()),
-                Arc::new(Repartition::new()),
-                Arc::new(AddMergeExec::new()),
-            ],
-            query_planner: Arc::new(DefaultQueryPlanner {}),
-            default_catalog: "datafusion".to_owned(),
-            default_schema: "public".to_owned(),
-            create_default_catalog_and_schema: true,
-            information_schema: false,
-            repartition_joins: true,
-        }
-    }
-
-    /// Customize max_concurrency
-    pub fn with_concurrency(mut self, n: usize) -> Self {
-        // concurrency must be greater than zero
-        assert!(n > 0);
-        self.concurrency = n;
-        self
-    }
-
-    /// Customize batch size
-    pub fn with_batch_size(mut self, n: usize) -> Self {
-        // batch size must be greater than zero
-        assert!(n > 0);
-        self.batch_size = n;
-        self
-    }
-
-    /// Replace the default query planner
-    pub fn with_query_planner(
-        mut self,
-        query_planner: Arc<dyn QueryPlanner + Send + Sync>,
-    ) -> Self {
-        self.query_planner = query_planner;
-        self
-    }
-
-    /// Replace the physical optimizer rules
-    pub fn with_physical_optimizer_rules(
-        mut self,
-        physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
-    ) -> Self {
-        self.physical_optimizers = physical_optimizers;
-        self
-    }
-
-    /// Adds a new [`OptimizerRule`]
-    pub fn add_optimizer_rule(
-        mut self,
-        optimizer_rule: Arc<dyn OptimizerRule + Send + Sync>,
-    ) -> Self {
-        self.optimizers.push(optimizer_rule);
-        self
-    }
-
-    /// Adds a new [`PhysicalOptimizerRule`]
-    pub fn add_physical_optimizer_rule(
-        mut self,
-        optimizer_rule: Arc<dyn PhysicalOptimizerRule + Send + Sync>,
-    ) -> Self {
-        self.physical_optimizers.push(optimizer_rule);
-        self
-    }
-
-    /// Selects a name for the default catalog and schema
-    pub fn with_default_catalog_and_schema(
-        mut self,
-        catalog: impl Into<String>,
-        schema: impl Into<String>,
-    ) -> Self {
-        self.default_catalog = catalog.into();
-        self.default_schema = schema.into();
-        self
-    }
-
-    /// Controls whether the default catalog and schema will be automatically created
-    pub fn create_default_catalog_and_schema(mut self, create: bool) -> Self {
-        self.create_default_catalog_and_schema = create;
-        self
-    }
-
-    /// Enables or disables the inclusion of `information_schema` virtual tables
-    pub fn with_information_schema(mut self, enabled: bool) -> Self {
-        self.information_schema = enabled;
-        self
-    }
-
-    /// Enables or disables the use of repartitioning for joins to improve parallelism
-    pub fn with_repartition_joins(mut self, enabled: bool) -> Self {
-        self.repartition_joins = enabled;
-        self
-    }
-}
-
-/// Execution context for registering data sources and executing queries
-#[derive(Clone)]
-pub struct ExecutionContextState {
-    /// Collection of catalogs containing schemas and ultimately TableProviders
-    pub catalog_list: Arc<dyn CatalogList>,
-    /// Scalar functions that are registered with the context
-    pub scalar_functions: HashMap<String, Arc<ScalarUDF>>,
-    /// Variable provider that are registered with the context
-    pub var_provider: HashMap<VarType, Arc<dyn VarProvider + Send + Sync>>,
-    /// Aggregate functions registered in the context
-    pub aggregate_functions: HashMap<String, Arc<AggregateUDF>>,
-    /// Context configuration
-    pub config: ExecutionConfig,
-}
-
-impl ExecutionContextState {
-    fn resolve_table_ref<'a>(
-        &'a self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> ResolvedTableReference<'a> {
-        table_ref
-            .into()
-            .resolve(&self.config.default_catalog, &self.config.default_schema)
-    }
-
-    fn schema_for_ref<'a>(
-        &'a self,
-        table_ref: impl Into<TableReference<'a>>,
-    ) -> Result<Arc<dyn SchemaProvider>> {
-        let resolved_ref = self.resolve_table_ref(table_ref.into());
-
-        self.catalog_list
-            .catalog(resolved_ref.catalog)
-            .ok_or_else(|| {
-                DataFusionError::Plan(format!(
-                    "failed to resolve catalog: {}",
-                    resolved_ref.catalog
-                ))
-            })?
-            .schema(resolved_ref.schema)
-            .ok_or_else(|| {
-                DataFusionError::Plan(format!(
-                    "failed to resolve schema: {}",
-                    resolved_ref.schema
-                ))
-            })
-    }
-}
-
-impl ContextProvider for ExecutionContextState {
-    fn get_table_provider(&self, name: TableReference) -> Option<Arc<dyn TableProvider>> {
-        let resolved_ref = self.resolve_table_ref(name);
-        let schema = self.schema_for_ref(resolved_ref).ok()?;
-        schema.table(resolved_ref.table)
-    }
-
-    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-        self.scalar_functions.get(name).cloned()
-    }
-
-    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
-        self.aggregate_functions.get(name).cloned()
-    }
-}
-
-impl FunctionRegistry for ExecutionContextState {
-    fn udfs(&self) -> HashSet<String> {
-        self.scalar_functions.keys().cloned().collect()
-    }
-
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>> {
-        let result = self.scalar_functions.get(name);
-
-        result.cloned().ok_or_else(|| {
-            DataFusionError::Plan(format!(
-                "There is no UDF named \"{}\" in the registry",
-                name
-            ))
-        })
-    }
-
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {
-        let result = self.aggregate_functions.get(name);
-
-        result.cloned().ok_or_else(|| {
-            DataFusionError::Plan(format!(
-                "There is no UDAF named \"{}\" in the registry",
-                name
-            ))
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::functions::make_scalar_function;
-    use crate::physical_plan::{collect, collect_partitioned};
-    use crate::test;
-    use crate::variable::VarType;
-    use crate::{
-        assert_batches_eq, assert_batches_sorted_eq,
-        logical_plan::{col, create_udf, sum},
-    };
-    use crate::{
-        datasource::MemTable, logical_plan::create_udaf,
-        physical_plan::expressions::AvgAccumulator,
-    };
-    use arrow::array::{
-        Array, ArrayRef, BinaryArray, DictionaryArray, Float64Array, Int32Array,
-        Int64Array, LargeBinaryArray, LargeStringArray, StringArray,
-        TimestampNanosecondArray,
-    };
-    use arrow::compute::add;
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
-    use std::fs::File;
-    use std::thread::{self, JoinHandle};
-    use std::{io::prelude::*, sync::Mutex};
-    use tempfile::TempDir;
-    use test::*;
-
-    #[tokio::test]
-    async fn parallel_projection() -> Result<()> {
-        let partition_count = 4;
-        let results = execute("SELECT c1, c2 FROM test", partition_count).await?;
-
-        let expected = vec![
-            "+----+----+",
-            "| c1 | c2 |",
-            "+----+----+",
-            "| 3  | 1  |",
-            "| 3  | 2  |",
-            "| 3  | 3  |",
-            "| 3  | 4  |",
-            "| 3  | 5  |",
-            "| 3  | 6  |",
-            "| 3  | 7  |",
-            "| 3  | 8  |",
-            "| 3  | 9  |",
-            "| 3  | 10 |",
-            "| 2  | 1  |",
-            "| 2  | 2  |",
-            "| 2  | 3  |",
-            "| 2  | 4  |",
-            "| 2  | 5  |",
-            "| 2  | 6  |",
-            "| 2  | 7  |",
-            "| 2  | 8  |",
-            "| 2  | 9  |",
-            "| 2  | 10 |",
-            "| 1  | 1  |",
-            "| 1  | 2  |",
-            "| 1  | 3  |",
-            "| 1  | 4  |",
-            "| 1  | 5  |",
-            "| 1  | 6  |",
-            "| 1  | 7  |",
-            "| 1  | 8  |",
-            "| 1  | 9  |",
-            "| 1  | 10 |",
-            "| 0  | 1  |",
-            "| 0  | 2  |",
-            "| 0  | 3  |",
-            "| 0  | 4  |",
-            "| 0  | 5  |",
-            "| 0  | 6  |",
-            "| 0  | 7  |",
-            "| 0  | 8  |",
-            "| 0  | 9  |",
-            "| 0  | 10 |",
-            "+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn create_variable_expr() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let mut ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let variable_provider = test::variable::SystemVar::new();
-        ctx.register_variable(VarType::System, Arc::new(variable_provider));
-        let variable_provider = test::variable::UserDefinedVar::new();
-        ctx.register_variable(VarType::UserDefined, Arc::new(variable_provider));
-
-        let provider = test::create_table_dual();
-        ctx.register_table("dual", provider)?;
-
-        let results =
-            plan_and_collect(&mut ctx, "SELECT @@version, @name FROM dual").await?;
-
-        let expected = vec![
-            "+----------------------+------------------------+",
-            "| @@version            | @name                  |",
-            "+----------------------+------------------------+",
-            "| system-var-@@version | user-defined-var-@name |",
-            "+----------------------+------------------------+",
-        ];
-        assert_batches_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn register_deregister() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let mut ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let provider = test::create_table_dual();
-        ctx.register_table("dual", provider)?;
-
-        assert!(ctx.deregister_table("dual")?.is_some());
-        assert!(ctx.deregister_table("dual")?.is_none());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn parallel_query_with_filter() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let logical_plan =
-            ctx.create_logical_plan("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3")?;
-        let logical_plan = ctx.optimize(&logical_plan)?;
-
-        let physical_plan = ctx.create_physical_plan(&logical_plan)?;
-        println!("{:?}", physical_plan);
-
-        let results = collect_partitioned(physical_plan).await?;
-
-        // note that the order of partitions is not deterministic
-        let mut num_rows = 0;
-        for partition in &results {
-            for batch in partition {
-                num_rows += batch.num_rows();
-            }
-        }
-        assert_eq!(20, num_rows);
-
-        let results: Vec<RecordBatch> = results.into_iter().flatten().collect();
-        let expected = vec![
-            "+----+----+",
-            "| c1 | c2 |",
-            "+----+----+",
-            "| 1  | 1  |",
-            "| 1  | 10 |",
-            "| 1  | 2  |",
-            "| 1  | 3  |",
-            "| 1  | 4  |",
-            "| 1  | 5  |",
-            "| 1  | 6  |",
-            "| 1  | 7  |",
-            "| 1  | 8  |",
-            "| 1  | 9  |",
-            "| 2  | 1  |",
-            "| 2  | 10 |",
-            "| 2  | 2  |",
-            "| 2  | 3  |",
-            "| 2  | 4  |",
-            "| 2  | 5  |",
-            "| 2  | 6  |",
-            "| 2  | 7  |",
-            "| 2  | 8  |",
-            "| 2  | 9  |",
-            "+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn projection_on_table_scan() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let ctx = create_ctx(&tmp_dir, partition_count)?;
-
-        let table = ctx.table("test")?;
-        let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
-            .project(vec![col("c2")])?
-            .build()?;
-
-        let optimized_plan = ctx.optimize(&logical_plan)?;
-        match &optimized_plan {
-            LogicalPlan::Projection { input, .. } => match &**input {
-                LogicalPlan::TableScan {
-                    source,
-                    projected_schema,
-                    ..
-                } => {
-                    assert_eq!(source.schema().fields().len(), 3);
-                    assert_eq!(projected_schema.fields().len(), 1);
-                }
-                _ => panic!("input to projection should be TableScan"),
-            },
-            _ => panic!("expect optimized_plan to be projection"),
-        }
-
-        let expected = "Projection: #c2\
-        \n  TableScan: test projection=Some([1])";
-        assert_eq!(format!("{:?}", optimized_plan), expected);
-
-        let physical_plan = ctx.create_physical_plan(&optimized_plan)?;
-
-        assert_eq!(1, physical_plan.schema().fields().len());
-        assert_eq!("c2", physical_plan.schema().field(0).name().as_str());
-
-        let batches = collect(physical_plan).await?;
-        assert_eq!(40, batches.iter().map(|x| x.num_rows()).sum::<usize>());
-
-        Ok(())
-    }
-
-    #[test]
-    fn preserve_nullability_on_projection() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let ctx = create_ctx(&tmp_dir, 1)?;
-
-        let schema: Schema = ctx.table("test").unwrap().schema().clone().into();
-        assert_eq!(schema.field_with_name("c1")?.is_nullable(), false);
-
-        let plan = LogicalPlanBuilder::scan_empty("", &schema, None)?
-            .project(vec![col("c1")])?
-            .build()?;
-
-        let plan = ctx.optimize(&plan)?;
-        let physical_plan = ctx.create_physical_plan(&Arc::new(plan))?;
-        assert_eq!(
-            physical_plan.schema().field_with_name("c1")?.is_nullable(),
-            false
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn projection_on_memory_scan() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-            Field::new("c", DataType::Int32, false),
-        ]);
-        let schema = SchemaRef::new(schema);
-
-        let partitions = vec![vec![RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-                Arc::new(Int32Array::from(vec![3, 12, 12, 120])),
-            ],
-        )?]];
-
-        let plan = LogicalPlanBuilder::scan_memory(partitions, schema, None)?
-            .project(vec![col("b")])?
-            .build()?;
-        assert_fields_eq(&plan, vec!["b"]);
-
-        let ctx = ExecutionContext::new();
-        let optimized_plan = ctx.optimize(&plan)?;
-        match &optimized_plan {
-            LogicalPlan::Projection { input, .. } => match &**input {
-                LogicalPlan::TableScan {
-                    source,
-                    projected_schema,
-                    ..
-                } => {
-                    assert_eq!(source.schema().fields().len(), 3);
-                    assert_eq!(projected_schema.fields().len(), 1);
-                }
-                _ => panic!("input to projection should be InMemoryScan"),
-            },
-            _ => panic!("expect optimized_plan to be projection"),
-        }
-
-        let expected = "Projection: #b\
-        \n  TableScan: projection=Some([1])";
-        assert_eq!(format!("{:?}", optimized_plan), expected);
-
-        let physical_plan = ctx.create_physical_plan(&optimized_plan)?;
-
-        assert_eq!(1, physical_plan.schema().fields().len());
-        assert_eq!("b", physical_plan.schema().field(0).name().as_str());
-
-        let batches = collect(physical_plan).await?;
-        assert_eq!(1, batches.len());
-        assert_eq!(1, batches[0].num_columns());
-        assert_eq!(4, batches[0].num_rows());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn sort() -> Result<()> {
-        let results =
-            execute("SELECT c1, c2 FROM test ORDER BY c1 DESC, c2 ASC", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected: Vec<&str> = vec![
-            "+----+----+",
-            "| c1 | c2 |",
-            "+----+----+",
-            "| 3  | 1  |",
-            "| 3  | 2  |",
-            "| 3  | 3  |",
-            "| 3  | 4  |",
-            "| 3  | 5  |",
-            "| 3  | 6  |",
-            "| 3  | 7  |",
-            "| 3  | 8  |",
-            "| 3  | 9  |",
-            "| 3  | 10 |",
-            "| 2  | 1  |",
-            "| 2  | 2  |",
-            "| 2  | 3  |",
-            "| 2  | 4  |",
-            "| 2  | 5  |",
-            "| 2  | 6  |",
-            "| 2  | 7  |",
-            "| 2  | 8  |",
-            "| 2  | 9  |",
-            "| 2  | 10 |",
-            "| 1  | 1  |",
-            "| 1  | 2  |",
-            "| 1  | 3  |",
-            "| 1  | 4  |",
-            "| 1  | 5  |",
-            "| 1  | 6  |",
-            "| 1  | 7  |",
-            "| 1  | 8  |",
-            "| 1  | 9  |",
-            "| 1  | 10 |",
-            "| 0  | 1  |",
-            "| 0  | 2  |",
-            "| 0  | 3  |",
-            "| 0  | 4  |",
-            "| 0  | 5  |",
-            "| 0  | 6  |",
-            "| 0  | 7  |",
-            "| 0  | 8  |",
-            "| 0  | 9  |",
-            "| 0  | 10 |",
-            "+----+----+",
-        ];
-
-        // Note it is important to NOT use assert_batches_sorted_eq
-        // here as we are testing the sortedness of the output
-        assert_batches_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn sort_empty() -> Result<()> {
-        // The predicate on this query purposely generates no results
-        let results = execute(
-            "SELECT c1, c2 FROM test WHERE c1 > 100000 ORDER BY c1 DESC, c2 ASC",
-            4,
-        )
-        .await
-        .unwrap();
-        assert_eq!(results.len(), 0);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate() -> Result<()> {
-        let results = execute("SELECT SUM(c1), SUM(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| SUM(c1) | SUM(c2) |",
-            "+---------+---------+",
-            "| 60      | 220     |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_empty() -> Result<()> {
-        // The predicate on this query purposely generates no results
-        let results = execute("SELECT SUM(c1), SUM(c2) FROM test where c1 > 100000", 4)
-            .await
-            .unwrap();
-
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| SUM(c1) | SUM(c2) |",
-            "+---------+---------+",
-            "|         |         |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_avg() -> Result<()> {
-        let results = execute("SELECT AVG(c1), AVG(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| AVG(c1) | AVG(c2) |",
-            "+---------+---------+",
-            "| 1.5     | 5.5     |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_max() -> Result<()> {
-        let results = execute("SELECT MAX(c1), MAX(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| MAX(c1) | MAX(c2) |",
-            "+---------+---------+",
-            "| 3       | 10      |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_min() -> Result<()> {
-        let results = execute("SELECT MIN(c1), MIN(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------+---------+",
-            "| MIN(c1) | MIN(c2) |",
-            "+---------+---------+",
-            "| 0       | 1       |",
-            "+---------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped() -> Result<()> {
-        let results = execute("SELECT c1, SUM(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | SUM(c2) |",
-            "+----+---------+",
-            "| 0  | 55      |",
-            "| 1  | 55      |",
-            "| 2  | 55      |",
-            "| 3  | 55      |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_avg() -> Result<()> {
-        let results = execute("SELECT c1, AVG(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | AVG(c2) |",
-            "+----+---------+",
-            "| 0  | 5.5     |",
-            "| 1  | 5.5     |",
-            "| 2  | 5.5     |",
-            "| 3  | 5.5     |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn boolean_literal() -> Result<()> {
-        let results =
-            execute("SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+------+",
-            "| c1 | c3   |",
-            "+----+------+",
-            "| 3  | true |",
-            "| 3  | true |",
-            "| 3  | true |",
-            "| 3  | true |",
-            "| 3  | true |",
-            "+----+------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_empty() -> Result<()> {
-        let results =
-            execute("SELECT c1, AVG(c2) FROM test WHERE c1 = 123 GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec!["++", "||", "++", "++"];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_max() -> Result<()> {
-        let results = execute("SELECT c1, MAX(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | MAX(c2) |",
-            "+----+---------+",
-            "| 0  | 10      |",
-            "| 1  | 10      |",
-            "| 2  | 10      |",
-            "| 3  | 10      |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_grouped_min() -> Result<()> {
-        let results = execute("SELECT c1, MIN(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+---------+",
-            "| c1 | MIN(c2) |",
-            "+----+---------+",
-            "| 0  | 1       |",
-            "| 1  | 1       |",
-            "| 2  | 1       |",
-            "| 3  | 1       |",
-            "+----+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_timestamps_sum() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_timestamps())
-            .unwrap();
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t",
-        )
-        .await
-        .unwrap_err();
-
-        assert_eq!(results.to_string(), "Error during planning: Coercion from [Timestamp(Nanosecond, None)] to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed.");
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_timestamps_count() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_timestamps())
-            .unwrap();
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT count(nanos), count(micros), count(millis), count(secs) FROM t",
-        )
-        .await
-        .unwrap();
-
-        let expected = vec![
-            "+--------------+---------------+---------------+-------------+",
-            "| COUNT(nanos) | COUNT(micros) | COUNT(millis) | COUNT(secs) |",
-            "+--------------+---------------+---------------+-------------+",
-            "| 3            | 3             | 3             | 3           |",
-            "+--------------+---------------+---------------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_timestamps_min() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_timestamps())
-            .unwrap();
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT min(nanos), min(micros), min(millis), min(secs) FROM t",
-        )
-        .await
-        .unwrap();
-
-        let expected = vec![
-            "+----------------------------+----------------------------+-------------------------+---------------------+",
-            "| MIN(nanos)                 | MIN(micros)                | MIN(millis)             | MIN(secs)           |",
-            "+----------------------------+----------------------------+-------------------------+---------------------+",
-            "| 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 |",
-            "+----------------------------+----------------------------+-------------------------+---------------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_timestamps_max() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_timestamps())
-            .unwrap();
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT max(nanos), max(micros), max(millis), max(secs) FROM t",
-        )
-        .await
-        .unwrap();
-
-        let expected = vec![
-            "+-------------------------+-------------------------+-------------------------+---------------------+",
-            "| MAX(nanos)              | MAX(micros)             | MAX(millis)             | MAX(secs)           |",
-            "+-------------------------+-------------------------+-------------------------+---------------------+",
-            "| 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 |",
-            "+-------------------------+-------------------------+-------------------------+---------------------+",
-];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn aggregate_timestamps_avg() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_timestamps())
-            .unwrap();
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t",
-        )
-        .await
-        .unwrap_err();
-
-        assert_eq!(results.to_string(), "Error during planning: Coercion from [Timestamp(Nanosecond, None)] to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed.");
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_partitioned() -> Result<()> {
-        // self join on partition id (workaround for duplicate column name)
-        let results = execute(
-            "SELECT 1 FROM test JOIN (SELECT c1 AS id1 FROM test) ON c1=id1",
-            4,
-        )
-        .await?;
-
-        assert_eq!(
-            results.iter().map(|b| b.num_rows()).sum::<usize>(),
-            4 * 10 * 10
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_basic() -> Result<()> {
-        let results = execute("SELECT COUNT(c1), COUNT(c2) FROM test", 1).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+-----------+-----------+",
-            "| COUNT(c1) | COUNT(c2) |",
-            "+-----------+-----------+",
-            "| 10        | 10        |",
-            "+-----------+-----------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_partitioned() -> Result<()> {
-        let results = execute("SELECT COUNT(c1), COUNT(c2) FROM test", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+-----------+-----------+",
-            "| COUNT(c1) | COUNT(c2) |",
-            "+-----------+-----------+",
-            "| 40        | 40        |",
-            "+-----------+-----------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_aggregated() -> Result<()> {
-        let results = execute("SELECT c1, COUNT(c2) FROM test GROUP BY c1", 4).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+----+-----------+",
-            "| c1 | COUNT(c2) |",
-            "+----+-----------+",
-            "| 0  | 10        |",
-            "| 1  | 10        |",
-            "| 2  | 10        |",
-            "| 3  | 10        |",
-            "+----+-----------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn group_by_date_trunc() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = ExecutionContext::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c2", DataType::UInt64, false),
-            Field::new(
-                "t1",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                false,
-            ),
-        ]));
-
-        // generate a partitioned file
-        for partition in 0..4 {
-            let filename = format!("partition-{}.{}", partition, "csv");
-            let file_path = tmp_dir.path().join(&filename);
-            let mut file = File::create(file_path)?;
-
-            // generate some data
-            for i in 0..10 {
-                let data = format!("{},2020-12-{}T00:00:00.000\n", i, i + 10);
-                file.write_all(data.as_bytes())?;
-            }
-        }
-
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema).has_header(false),
-        )?;
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "SELECT date_trunc('week', t1) as week, SUM(c2) FROM test GROUP BY date_trunc('week', t1)"
-        ).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-            "+---------------------+---------+",
-            "| week                | SUM(c2) |",
-            "+---------------------+---------+",
-            "| 2020-12-07 00:00:00 | 24      |",
-            "| 2020-12-14 00:00:00 | 156     |",
-            "+---------------------+---------+",
-        ];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn group_by_dictionary() {
-        async fn run_test_case<K: ArrowDictionaryKeyType>() {
-            let mut ctx = ExecutionContext::new();
-
-            // input data looks like:
-            // A, 1
-            // B, 2
-            // A, 2
-            // A, 4
-            // C, 1
-            // A, 1
-
-            let dict_array: DictionaryArray<K> =
-                vec!["A", "B", "A", "A", "C", "A"].into_iter().collect();
-            let dict_array = Arc::new(dict_array);
-
-            let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
-            let val_array = Arc::new(val_array);
-
-            let schema = Arc::new(Schema::new(vec![
-                Field::new("dict", dict_array.data_type().clone(), false),
-                Field::new("val", val_array.data_type().clone(), false),
-            ]));
-
-            let batch = RecordBatch::try_new(schema.clone(), vec![dict_array, val_array])
-                .unwrap();
-
-            let provider = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap();
-            ctx.register_table("t", Arc::new(provider)).unwrap();
-
-            let results = plan_and_collect(
-                &mut ctx,
-                "SELECT dict, count(val) FROM t GROUP BY dict",
-            )
-            .await
-            .expect("ran plan correctly");
-
-            let expected = vec![
-                "+------+------------+",
-                "| dict | COUNT(val) |",
-                "+------+------------+",
-                "| A    | 4          |",
-                "| B    | 1          |",
-                "| C    | 1          |",
-                "+------+------------+",
-            ];
-            assert_batches_sorted_eq!(expected, &results);
-
-            // Now, use dict as an aggregate
-            let results =
-                plan_and_collect(&mut ctx, "SELECT val, count(dict) FROM t GROUP BY val")
-                    .await
-                    .expect("ran plan correctly");
-
-            let expected = vec![
-                "+-----+-------------+",
-                "| val | COUNT(dict) |",
-                "+-----+-------------+",
-                "| 1   | 3           |",
-                "| 2   | 2           |",
-                "| 4   | 1           |",
-                "+-----+-------------+",
-            ];
-            assert_batches_sorted_eq!(expected, &results);
-        }
-
-        run_test_case::<Int8Type>().await;
-        run_test_case::<Int16Type>().await;
-        run_test_case::<Int32Type>().await;
-        run_test_case::<Int64Type>().await;
-        run_test_case::<UInt8Type>().await;
-        run_test_case::<UInt16Type>().await;
-        run_test_case::<UInt32Type>().await;
-        run_test_case::<UInt64Type>().await;
-    }
-
-    async fn run_count_distinct_integers_aggregated_scenario(
-        partitions: Vec<Vec<(&str, u64)>>,
-    ) -> Result<Vec<RecordBatch>> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = ExecutionContext::new();
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c_group", DataType::Utf8, false),
-            Field::new("c_int8", DataType::Int8, false),
-            Field::new("c_int16", DataType::Int16, false),
-            Field::new("c_int32", DataType::Int32, false),
-            Field::new("c_int64", DataType::Int64, false),
-            Field::new("c_uint8", DataType::UInt8, false),
-            Field::new("c_uint16", DataType::UInt16, false),
-            Field::new("c_uint32", DataType::UInt32, false),
-            Field::new("c_uint64", DataType::UInt64, false),
-        ]));
-
-        for (i, partition) in partitions.iter().enumerate() {
-            let filename = format!("partition-{}.csv", i);
-            let file_path = tmp_dir.path().join(&filename);
-            let mut file = File::create(file_path)?;
-            for row in partition {
-                let row_str = format!(
-                    "{},{}\n",
-                    row.0,
-                    // Populate values for each of the integer fields in the
-                    // schema.
-                    (0..8)
-                        .map(|_| { row.1.to_string() })
-                        .collect::<Vec<_>>()
-                        .join(","),
-                );
-                file.write_all(row_str.as_bytes())?;
-            }
-        }
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema).has_header(false),
-        )?;
-
-        let results = plan_and_collect(
-            &mut ctx,
-            "
-              SELECT
-                c_group,
-                COUNT(c_uint64),
-                COUNT(DISTINCT c_int8),
-                COUNT(DISTINCT c_int16),
-                COUNT(DISTINCT c_int32),
-                COUNT(DISTINCT c_int64),
-                COUNT(DISTINCT c_uint8),
-                COUNT(DISTINCT c_uint16),
-                COUNT(DISTINCT c_uint32),
-                COUNT(DISTINCT c_uint64)
-              FROM test
-              GROUP BY c_group
-            ",
-        )
-        .await?;
-
-        Ok(results)
-    }
-
-    #[tokio::test]
-    async fn count_distinct_integers_aggregated_single_partition() -> Result<()> {
-        let partitions = vec![
-            // The first member of each tuple will be the value for the
-            // `c_group` column, and the second member will be the value for
-            // each of the int/uint fields.
-            vec![
-                ("a", 1),
-                ("a", 1),
-                ("a", 2),
-                ("b", 9),
-                ("c", 9),
-                ("c", 10),
-                ("c", 9),
-            ],
-        ];
-
-        let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec!
-[
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| a       | 3               | 2                      | 2                       | 2                       | 2                       | 2                       | 2                        | 2                        | 2                        |",
-    "| b       | 1               | 1                      | 1                       | 1                       | 1                       | 1                       | 1                        | 1                        | 1                        |",
-    "| c       | 3               | 2                      | 2                       | 2                       | 2                       | 2                       | 2                        | 2                        | 2                        |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn count_distinct_integers_aggregated_multiple_partitions() -> Result<()> {
-        let partitions = vec![
-            // The first member of each tuple will be the value for the
-            // `c_group` column, and the second member will be the value for
-            // each of the int/uint fields.
-            vec![("a", 1), ("a", 1), ("a", 2), ("b", 9), ("c", 9)],
-            vec![("a", 1), ("a", 3), ("b", 8), ("b", 9), ("b", 10), ("b", 11)],
-        ];
-
-        let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
-        assert_eq!(results.len(), 1);
-
-        let expected = vec![
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-    "| a       | 5               | 3                      | 3                       | 3                       | 3                       | 3                       | 3                        | 3                        | 3                        |",
-    "| b       | 5               | 4                      | 4                       | 4                       | 4                       | 4                       | 4                        | 4                        | 4                        |",
-    "| c       | 1               | 1                      | 1                       | 1                       | 1                       | 1                       | 1                        | 1                        | 1                        |",
-    "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+",
-];
-        assert_batches_sorted_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate_with_alias() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let ctx = create_ctx(&tmp_dir, 1)?;
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::UInt32, false),
-        ]));
-
-        let plan = LogicalPlanBuilder::scan_empty("", schema.as_ref(), None)?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .project(vec![col("c1"), col("SUM(c2)").alias("total_salary")])?
-            .build()?;
-
-        let plan = ctx.optimize(&plan)?;
-
-        let physical_plan = ctx.create_physical_plan(&Arc::new(plan))?;
-        assert_eq!("c1", physical_plan.schema().field(0).name().as_str());
-        assert_eq!(
-            "total_salary",
-            physical_plan.schema().field(1).name().as_str()
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn limit() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-        ctx.register_table("t", test::table_with_sequence(1, 1000).unwrap())
-            .unwrap();
-
-        let results =
-            plan_and_collect(&mut ctx, "SELECT i FROM t ORDER BY i DESC limit 3")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+------+", "| i    |", "+------+", "| 1000 |", "| 999  |", "| 998  |",
-            "+------+",
-        ];
-
-        assert_batches_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT i FROM t ORDER BY i limit 3")
-            .await
-            .unwrap();
-
-        let expected = vec![
-            "+---+", "| i |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",
-        ];
-
-        assert_batches_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT i FROM t limit 3")
-            .await
-            .unwrap();
-
-        // the actual rows are not guaranteed, so only check the count (should be 3)
-        let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-        assert_eq!(num_rows, 3);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn limit_multi_partitions() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 1)?;
-
-        let partitions = vec![
-            vec![test::make_partition(0)],
-            vec![test::make_partition(1)],
-            vec![test::make_partition(2)],
-            vec![test::make_partition(3)],
-            vec![test::make_partition(4)],
-            vec![test::make_partition(5)],
-        ];
-        let schema = partitions[0][0].schema();
-        let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
-
-        ctx.register_table("t", provider).unwrap();
-
-        // select all rows
-        let results = plan_and_collect(&mut ctx, "SELECT i FROM t").await.unwrap();
-
-        let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-        assert_eq!(num_rows, 15);
-
-        for limit in 1..10 {
-            let query = format!("SELECT i FROM t limit {}", limit);
-            let results = plan_and_collect(&mut ctx, &query).await.unwrap();
-
-            let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-            assert_eq!(num_rows, limit, "mismatch with query {}", query);
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_functions() {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let expected = vec![
-            "+---------+",
-            "| sqrt(i) |",
-            "+---------+",
-            "| 1       |",
-            "+---------+",
-        ];
-
-        let results = plan_and_collect(&mut ctx, "SELECT sqrt(i) FROM t")
-            .await
-            .unwrap();
-
-        assert_batches_sorted_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT SQRT(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-
-        // Using double quotes allows specifying the function name with capitalization
-        let err = plan_and_collect(&mut ctx, "SELECT \"SQRT\"(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function 'SQRT'"
-        );
-
-        let results = plan_and_collect(&mut ctx, "SELECT \"sqrt\"(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_user_defined_functions() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let myfunc = |args: &[ArrayRef]| Ok(Arc::clone(&args[0]));
-        let myfunc = make_scalar_function(myfunc);
-
-        ctx.register_udf(create_udf(
-            "MY_FUNC",
-            vec![DataType::Int32],
-            Arc::new(DataType::Int32),
-            myfunc,
-        ));
-
-        // doesn't work as it was registered with non lowercase
-        let err = plan_and_collect(&mut ctx, "SELECT MY_FUNC(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function \'my_func\'"
-        );
-
-        // Can call it if you put quotes
-        let result = plan_and_collect(&mut ctx, "SELECT \"MY_FUNC\"(i) FROM t").await?;
-
-        let expected = vec![
-            "+------------+",
-            "| MY_FUNC(i) |",
-            "+------------+",
-            "| 1          |",
-            "+------------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_aggregates() {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let expected = vec![
-            "+--------+",
-            "| MAX(i) |",
-            "+--------+",
-            "| 1      |",
-            "+--------+",
-        ];
-
-        let results = plan_and_collect(&mut ctx, "SELECT max(i) FROM t")
-            .await
-            .unwrap();
-
-        assert_batches_sorted_eq!(expected, &results);
-
-        let results = plan_and_collect(&mut ctx, "SELECT MAX(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-
-        // Using double quotes allows specifying the function name with capitalization
-        let err = plan_and_collect(&mut ctx, "SELECT \"MAX\"(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function 'MAX'"
-        );
-
-        let results = plan_and_collect(&mut ctx, "SELECT \"max\"(i) FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &results);
-    }
-
-    #[tokio::test]
-    async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        // Note capitalizaton
-        let my_avg = create_udaf(
-            "MY_AVG",
-            DataType::Float64,
-            Arc::new(DataType::Float64),
-            Arc::new(|| Ok(Box::new(AvgAccumulator::try_new(&DataType::Float64)?))),
-            Arc::new(vec![DataType::UInt64, DataType::Float64]),
-        );
-
-        ctx.register_udaf(my_avg);
-
-        // doesn't work as it was registered as non lowercase
-        let err = plan_and_collect(&mut ctx, "SELECT MY_AVG(i) FROM t")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Invalid function \'my_avg\'"
-        );
-
-        // Can call it if you put quotes
-        let result = plan_and_collect(&mut ctx, "SELECT \"MY_AVG\"(i) FROM t").await?;
-
-        let expected = vec![
-            "+-----------+",
-            "| MY_AVG(i) |",
-            "+-----------+",
-            "| 1         |",
-            "+-----------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn write_csv_results() -> Result<()> {
-        // create partitioned input file and context
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 4)?;
-
-        // execute a simple query and write the results to CSV
-        let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out";
-        write_csv(&mut ctx, "SELECT c1, c2 FROM test", &out_dir).await?;
-
-        // create a new context and verify that the results were saved to a partitioned csv file
-        let mut ctx = ExecutionContext::new();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::UInt32, false),
-            Field::new("c2", DataType::UInt64, false),
-        ]));
-
-        // register each partition as well as the top level dir
-        let csv_read_option = CsvReadOptions::new().schema(&schema);
-        ctx.register_csv("part0", &format!("{}/part-0.csv", out_dir), csv_read_option)?;
-        ctx.register_csv("allparts", &out_dir, csv_read_option)?;
-
-        let part0 = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM part0").await?;
-        let allparts = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM allparts").await?;
-
-        let allparts_count: usize = allparts.iter().map(|batch| batch.num_rows()).sum();
-
-        assert_eq!(part0[0].schema(), allparts[0].schema());
-
-        assert_eq!(allparts_count, 40);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn write_parquet_results() -> Result<()> {
-        // create partitioned input file and context
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, 4)?;
-
-        // execute a simple query and write the results to CSV
-        let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out";
-        write_parquet(&mut ctx, "SELECT c1, c2 FROM test", &out_dir, None).await?;
-
-        // create a new context and verify that the results were saved to a partitioned csv file
-        let mut ctx = ExecutionContext::new();
-
-        // register each partition as well as the top level dir
-        ctx.register_parquet("part0", &format!("{}/part-0.parquet", out_dir))?;
-        ctx.register_parquet("part1", &format!("{}/part-1.parquet", out_dir))?;
-        ctx.register_parquet("part2", &format!("{}/part-2.parquet", out_dir))?;
-        ctx.register_parquet("part3", &format!("{}/part-3.parquet", out_dir))?;
-        ctx.register_parquet("allparts", &out_dir)?;
-
-        let part0 = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM part0").await?;
-        let allparts = plan_and_collect(&mut ctx, "SELECT c1, c2 FROM allparts").await?;
-
-        let allparts_count: usize = allparts.iter().map(|batch| batch.num_rows()).sum();
-
-        assert_eq!(part0[0].schema(), allparts[0].schema());
-
-        assert_eq!(allparts_count, 40);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn query_csv_with_custom_partition_extension() -> Result<()> {
-        let tmp_dir = TempDir::new()?;
-
-        // The main stipulation of this test: use a file extension that isn't .csv.
-        let file_extension = ".tst";
-
-        let mut ctx = ExecutionContext::new();
-        let schema = populate_csv_partitions(&tmp_dir, 2, file_extension)?;
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new()
-                .schema(&schema)
-                .file_extension(file_extension),
-        )?;
-        let results =
-            plan_and_collect(&mut ctx, "SELECT SUM(c1), SUM(c2), COUNT(*) FROM test")
-                .await?;
-
-        assert_eq!(results.len(), 1);
-        let expected = vec![
-            "+---------+---------+-----------------+",
-            "| SUM(c1) | SUM(c2) | COUNT(UInt8(1)) |",
-            "+---------+---------+-----------------+",
-            "| 10      | 110     | 20              |",
-            "+---------+---------+-----------------+",
-        ];
-        assert_batches_eq!(expected, &results);
-
-        Ok(())
-    }
-
-    #[test]
-    fn send_context_to_threads() -> Result<()> {
-        // ensure ExecutionContexts can be used in a multi-threaded
-        // environment. Usecase is for concurrent planing.
-        let tmp_dir = TempDir::new()?;
-        let partition_count = 4;
-        let ctx = Arc::new(Mutex::new(create_ctx(&tmp_dir, partition_count)?));
-
-        let threads: Vec<JoinHandle<Result<_>>> = (0..2)
-            .map(|_| ctx.clone())
-            .map(|ctx_clone| {
-                thread::spawn(move || {
-                    let ctx = ctx_clone.lock().expect("Locked context");
-                    // Ensure we can create logical plan code on a separate thread.
-                    ctx.create_logical_plan(
-                        "SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3",
-                    )
-                })
-            })
-            .collect();
-
-        for thread in threads {
-            thread.join().expect("Failed to join thread")?;
-        }
-        Ok(())
-    }
-    #[test]
-    fn ctx_sql_should_optimize_plan() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        let plan1 =
-            ctx.create_logical_plan("SELECT * FROM (SELECT 1) WHERE TRUE AND TRUE")?;
-
-        let opt_plan1 = ctx.optimize(&plan1)?;
-
-        let plan2 = ctx.sql("SELECT * FROM (SELECT 1) WHERE TRUE AND TRUE")?;
-
-        assert_eq!(
-            format!("{:?}", opt_plan1),
-            format!("{:?}", plan2.to_logical_plan())
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn scalar_udf() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-            ],
-        )?;
-
-        let mut ctx = ExecutionContext::new();
-
-        let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]])?;
-        ctx.register_table("t", Arc::new(provider))?;
-
-        let myfunc = |args: &[ArrayRef]| {
-            let l = &args[0]
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .expect("cast failed");
-            let r = &args[1]
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .expect("cast failed");
-            Ok(Arc::new(add(l, r)?) as ArrayRef)
-        };
-        let myfunc = make_scalar_function(myfunc);
-
-        ctx.register_udf(create_udf(
-            "my_add",
-            vec![DataType::Int32, DataType::Int32],
-            Arc::new(DataType::Int32),
-            myfunc,
-        ));
-
-        // from here on, we may be in a different scope. We would still like to be able
-        // to call UDFs.
-
-        let t = ctx.table("t")?;
-
-        let plan = LogicalPlanBuilder::from(&t.to_logical_plan())
-            .project(vec![
-                col("a"),
-                col("b"),
-                ctx.udf("my_add")?.call(vec![col("a"), col("b")]),
-            ])?
-            .build()?;
-
-        assert_eq!(
-            format!("{:?}", plan),
-            "Projection: #a, #b, my_add(#a, #b)\n  TableScan: t projection=None"
-        );
-
-        let plan = ctx.optimize(&plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-        let result = collect(plan).await?;
-
-        let expected = vec![
-            "+-----+-----+-------------+",
-            "| a   | b   | my_add(a,b) |",
-            "+-----+-----+-------------+",
-            "| 1   | 2   | 3           |",
-            "| 10  | 12  | 22          |",
-            "| 10  | 12  | 22          |",
-            "| 100 | 120 | 220         |",
-            "+-----+-----+-------------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        let batch = &result[0];
-        let a = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to cast a");
-        let b = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to cast b");
-        let sum = batch
-            .column(2)
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to cast sum");
-
-        assert_eq!(4, a.len());
-        assert_eq!(4, b.len());
-        assert_eq!(4, sum.len());
-        for i in 0..sum.len() {
-            assert_eq!(a.value(i) + b.value(i), sum.value(i));
-        }
-
-        ctx.deregister_table("t")?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn simple_avg() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let batch1 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-        )?;
-        let batch2 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![4, 5]))],
-        )?;
-
-        let mut ctx = ExecutionContext::new();
-
-        let provider =
-            MemTable::try_new(Arc::new(schema), vec![vec![batch1], vec![batch2]])?;
-        ctx.register_table("t", Arc::new(provider))?;
-
-        let result = plan_and_collect(&mut ctx, "SELECT AVG(a) FROM t").await?;
-
-        let batch = &result[0];
-        assert_eq!(1, batch.num_columns());
-        assert_eq!(1, batch.num_rows());
-
-        let values = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("failed to cast version");
-        assert_eq!(values.len(), 1);
-        // avg(1,2,3,4,5) = 3.0
-        assert_eq!(values.value(0), 3.0_f64);
-        Ok(())
-    }
-
-    /// tests the creation, registration and usage of a UDAF
-    #[tokio::test]
-    async fn simple_udaf() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let batch1 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-        )?;
-        let batch2 = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![4, 5]))],
-        )?;
-
-        let mut ctx = ExecutionContext::new();
-
-        let provider =
-            MemTable::try_new(Arc::new(schema), vec![vec![batch1], vec![batch2]])?;
-        ctx.register_table("t", Arc::new(provider))?;
-
-        // define a udaf, using a DataFusion's accumulator
-        let my_avg = create_udaf(
-            "my_avg",
-            DataType::Float64,
-            Arc::new(DataType::Float64),
-            Arc::new(|| Ok(Box::new(AvgAccumulator::try_new(&DataType::Float64)?))),
-            Arc::new(vec![DataType::UInt64, DataType::Float64]),
-        );
-
-        ctx.register_udaf(my_avg);
-
-        let result = plan_and_collect(&mut ctx, "SELECT MY_AVG(a) FROM t").await?;
-
-        let expected = vec![
-            "+-----------+",
-            "| my_avg(a) |",
-            "+-----------+",
-            "| 3         |",
-            "+-----------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn custom_query_planner() -> Result<()> {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_query_planner(Arc::new(MyQueryPlanner {})),
-        );
-
-        let df = ctx.sql("SELECT 1")?;
-        df.collect().await.expect_err("query not supported");
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_not_exist_by_default() {
-        let mut ctx = ExecutionContext::new();
-
-        let err = plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Table or CTE with name 'information_schema.tables' not found"
-        );
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_no_tables() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_tables_default_catalog() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        // Now, register an empty table
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | public             | t          | BASE TABLE |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        // Newly added tables should appear
-        ctx.register_table("t2", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "| datafusion    | public             | t          | BASE TABLE |",
-            "| datafusion    | public             | t2         | BASE TABLE |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_tables_tables_with_multiple_catalogs() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-        schema
-            .register_table("t1".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-        schema
-            .register_table("t2".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-        catalog.register_schema("my_schema", Arc::new(schema));
-        ctx.register_catalog("my_catalog", Arc::new(catalog));
-
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-        schema
-            .register_table("t3".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-        catalog.register_schema("my_other_schema", Arc::new(schema));
-        ctx.register_catalog("my_other_catalog", Arc::new(catalog));
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.tables")
-                .await
-                .unwrap();
-
-        let expected = vec![
-            "+------------------+--------------------+------------+------------+",
-            "| table_catalog    | table_schema       | table_name | table_type |",
-            "+------------------+--------------------+------------+------------+",
-            "| datafusion       | information_schema | columns    | VIEW       |",
-            "| datafusion       | information_schema | tables     | VIEW       |",
-            "| my_catalog       | information_schema | columns    | VIEW       |",
-            "| my_catalog       | information_schema | tables     | VIEW       |",
-            "| my_catalog       | my_schema          | t1         | BASE TABLE |",
-            "| my_catalog       | my_schema          | t2         | BASE TABLE |",
-            "| my_other_catalog | information_schema | columns    | VIEW       |",
-            "| my_other_catalog | information_schema | tables     | VIEW       |",
-            "| my_other_catalog | my_other_schema    | t3         | BASE TABLE |",
-            "+------------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_tables_no_information_schema() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        // use show tables alias
-        let err = plan_and_collect(&mut ctx, "SHOW TABLES").await.unwrap_err();
-
-        assert_eq!(err.to_string(), "Error during planning: SHOW TABLES is not supported unless information_schema is enabled");
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_tables() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        // use show tables alias
-        let result = plan_and_collect(&mut ctx, "SHOW TABLES").await.unwrap();
-
-        let expected = vec![
-            "+---------------+--------------------+------------+------------+",
-            "| table_catalog | table_schema       | table_name | table_type |",
-            "+---------------+--------------------+------------+------------+",
-            "| datafusion    | information_schema | columns    | VIEW       |",
-            "| datafusion    | information_schema | tables     | VIEW       |",
-            "| datafusion    | public             | t          | BASE TABLE |",
-            "+---------------+--------------------+------------+------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW tables").await.unwrap();
-
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_columns_no_information_schema() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let err = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t")
-            .await
-            .unwrap_err();
-
-        assert_eq!(err.to_string(), "Error during planning: SHOW COLUMNS is not supported unless information_schema is enabled");
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_columns_like_where() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let expected =
-            "Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported";
-
-        let err = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t LIKE 'f'")
-            .await
-            .unwrap_err();
-        assert_eq!(err.to_string(), expected);
-
-        let err =
-            plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t WHERE column_name = 'bar'")
-                .await
-                .unwrap_err();
-        assert_eq!(err.to_string(), expected);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_columns() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM t")
-            .await
-            .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| table_catalog | table_schema | table_name | column_name | data_type | is_nullable |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| datafusion    | public       | t          | i           | Int32     | YES         |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW columns from t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &result);
-
-        // This isn't ideal but it is consistent behavior for `SELECT * from T`
-        let err = plan_and_collect(&mut ctx, "SHOW columns from T")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Unknown relation for SHOW COLUMNS: T"
-        );
-    }
-
-    // test errors with WHERE and LIKE
-    #[tokio::test]
-    async fn information_schema_show_columns_full_extended() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result = plan_and_collect(&mut ctx, "SHOW FULL COLUMNS FROM t")
-            .await
-            .unwrap();
-        let expected = vec![
-
-    "+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| table_catalog | table_schema | table_name | column_name | ordinal_position | column_default | is_nullable | data_type | character_maximum_length | character_octet_length | numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type |",
-    "+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| datafusion    | public       | t          | i           | 0                |                | YES         | Int32     |                          |                        | 32                | 2                       |               |                    |               |",
-    "+---------------+--------------+------------+-------------+------------------+----------------+-------------+-----------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW EXTENDED COLUMNS FROM t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn information_schema_show_table_table_names() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-
-        ctx.register_table("t", test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        let result = plan_and_collect(&mut ctx, "SHOW COLUMNS FROM public.t")
-            .await
-            .unwrap();
-
-        let expected = vec![
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| table_catalog | table_schema | table_name | column_name | data_type | is_nullable |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-            "| datafusion    | public       | t          | i           | Int32     | YES         |",
-            "+---------------+--------------+------------+-------------+-----------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let result = plan_and_collect(&mut ctx, "SHOW columns from datafusion.public.t")
-            .await
-            .unwrap();
-        assert_batches_sorted_eq!(expected, &result);
-
-        let err = plan_and_collect(&mut ctx, "SHOW columns from t2")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Unknown relation for SHOW COLUMNS: t2"
-        );
-
-        let err = plan_and_collect(&mut ctx, "SHOW columns from datafusion.public.t2")
-            .await
-            .unwrap_err();
-        assert_eq!(err.to_string(), "Error during planning: Unknown relation for SHOW COLUMNS: datafusion.public.t2");
-    }
-
-    #[tokio::test]
-    async fn show_unsupported() {
-        let mut ctx = ExecutionContext::with_config(ExecutionConfig::new());
-
-        let err = plan_and_collect(&mut ctx, "SHOW SOMETHING_UNKNOWN")
-            .await
-            .unwrap_err();
-
-        assert_eq!(err.to_string(), "This feature is not implemented: SHOW SOMETHING_UNKNOWN not implemented. Supported syntax: SHOW <TABLES>");
-    }
-
-    #[tokio::test]
-    async fn information_schema_columns_not_exist_by_default() {
-        let mut ctx = ExecutionContext::new();
-
-        let err = plan_and_collect(&mut ctx, "SELECT * from information_schema.columns")
-            .await
-            .unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Error during planning: Table or CTE with name 'information_schema.columns' not found"
-        );
-    }
-
-    fn table_with_many_types() -> Arc<dyn TableProvider> {
-        let schema = Schema::new(vec![
-            Field::new("int32_col", DataType::Int32, false),
-            Field::new("float64_col", DataType::Float64, true),
-            Field::new("utf8_col", DataType::Utf8, true),
-            Field::new("large_utf8_col", DataType::LargeUtf8, false),
-            Field::new("binary_col", DataType::Binary, false),
-            Field::new("large_binary_col", DataType::LargeBinary, false),
-            Field::new(
-                "timestamp_nanos",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-        ]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(Int32Array::from(vec![1])),
-                Arc::new(Float64Array::from(vec![1.0])),
-                Arc::new(StringArray::from(vec![Some("foo")])),
-                Arc::new(LargeStringArray::from(vec![Some("bar")])),
-                Arc::new(BinaryArray::from(vec![b"foo" as &[u8]])),
-                Arc::new(LargeBinaryArray::from(vec![b"foo" as &[u8]])),
-                Arc::new(TimestampNanosecondArray::from_opt_vec(
-                    vec![Some(123)],
-                    None,
-                )),
-            ],
-        )
-        .unwrap();
-        let provider = MemTable::try_new(Arc::new(schema), vec![vec![batch]]).unwrap();
-        Arc::new(provider)
-    }
-
-    #[tokio::test]
-    async fn information_schema_columns() {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().with_information_schema(true),
-        );
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-
-        schema
-            .register_table("t1".to_owned(), test::table_with_sequence(1, 1).unwrap())
-            .unwrap();
-
-        schema
-            .register_table("t2".to_owned(), table_with_many_types())
-            .unwrap();
-        catalog.register_schema("my_schema", Arc::new(schema));
-        ctx.register_catalog("my_catalog", Arc::new(catalog));
-
-        let result =
-            plan_and_collect(&mut ctx, "SELECT * from information_schema.columns")
-                .await
-                .unwrap();
-
-        let expected = vec![
-    "+---------------+--------------+------------+------------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| table_catalog | table_schema | table_name | column_name      | ordinal_position | column_default | is_nullable | data_type                   | character_maximum_length | character_octet_length | numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type |",
-    "+---------------+--------------+------------+------------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-    "| my_catalog    | my_schema    | t1         | i                | 0                |                | YES         | Int32                       |                          |                        | 32                | 2                       |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | binary_col       | 4                |                | NO          | Binary                      |                          | 2147483647             |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | float64_col      | 1                |                | YES         | Float64                     |                          |                        | 24                | 2                       |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | int32_col        | 0                |                | NO          | Int32                       |                          |                        | 32                | 2                       |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | large_binary_col | 5                |                | NO          | LargeBinary                 |                          | 9223372036854775807    |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | large_utf8_col   | 3                |                | NO          | LargeUtf8                   |                          | 9223372036854775807    |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | timestamp_nanos  | 6                |                | NO          | Timestamp(Nanosecond, None) |                          |                        |                   |                         |               |                    |               |",
-    "| my_catalog    | my_schema    | t2         | utf8_col         | 2                |                | YES         | Utf8                        |                          | 2147483647             |                   |                         |               |                    |               |",
-    "+---------------+--------------+------------+------------------+------------------+----------------+-------------+-----------------------------+--------------------------+------------------------+-------------------+-------------------------+---------------+--------------------+---------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    #[tokio::test]
-    async fn disabled_default_catalog_and_schema() -> Result<()> {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new().create_default_catalog_and_schema(false),
-        );
-
-        assert!(matches!(
-            ctx.register_table("test", test::table_with_sequence(1, 1)?),
-            Err(DataFusionError::Plan(_))
-        ));
-
-        assert!(matches!(
-            ctx.sql("select * from datafusion.public.test"),
-            Err(DataFusionError::Plan(_))
-        ));
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn custom_catalog_and_schema() -> Result<()> {
-        let mut ctx = ExecutionContext::with_config(
-            ExecutionConfig::new()
-                .create_default_catalog_and_schema(false)
-                .with_default_catalog_and_schema("my_catalog", "my_schema"),
-        );
-
-        let catalog = MemoryCatalogProvider::new();
-        let schema = MemorySchemaProvider::new();
-        schema.register_table("test".to_owned(), test::table_with_sequence(1, 1)?)?;
-        catalog.register_schema("my_schema", Arc::new(schema));
-        ctx.register_catalog("my_catalog", Arc::new(catalog));
-
-        for table_ref in &["my_catalog.my_schema.test", "my_schema.test", "test"] {
-            let result = plan_and_collect(
-                &mut ctx,
-                &format!("SELECT COUNT(*) AS count FROM {}", table_ref),
-            )
-            .await?;
-
-            let expected = vec![
-                "+-------+",
-                "| count |",
-                "+-------+",
-                "| 1     |",
-                "+-------+",
-            ];
-            assert_batches_eq!(expected, &result);
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn cross_catalog_access() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-
-        let catalog_a = MemoryCatalogProvider::new();
-        let schema_a = MemorySchemaProvider::new();
-        schema_a
-            .register_table("table_a".to_owned(), test::table_with_sequence(1, 1)?)?;
-        catalog_a.register_schema("schema_a", Arc::new(schema_a));
-        ctx.register_catalog("catalog_a", Arc::new(catalog_a));
-
-        let catalog_b = MemoryCatalogProvider::new();
-        let schema_b = MemorySchemaProvider::new();
-        schema_b
-            .register_table("table_b".to_owned(), test::table_with_sequence(1, 2)?)?;
-        catalog_b.register_schema("schema_b", Arc::new(schema_b));
-        ctx.register_catalog("catalog_b", Arc::new(catalog_b));
-
-        let result = plan_and_collect(
-            &mut ctx,
-            "SELECT cat, SUM(i) AS total FROM (
-                    SELECT i, 'a' AS cat FROM catalog_a.schema_a.table_a
-                    UNION ALL
-                    SELECT i, 'b' AS cat FROM catalog_b.schema_b.table_b
-                )
-                GROUP BY cat
-                ORDER BY cat
-                ",
-        )
-        .await?;
-
-        let expected = vec![
-            "+-----+-------+",
-            "| cat | total |",
-            "+-----+-------+",
-            "| a   | 1     |",
-            "| b   | 3     |",
-            "+-----+-------+",
-        ];
-        assert_batches_eq!(expected, &result);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn create_external_table_with_timestamps() {
-        let mut ctx = ExecutionContext::new();
-
-        let data = "Jorge,2018-12-13T12:12:10.011\n\
-                    Andrew,2018-11-13T17:11:10.011";
-
-        let tmp_dir = TempDir::new().unwrap();
-        let file_path = tmp_dir.path().join("timestamps.csv");
-
-        // scope to ensure the file is closed and written
-        {
-            File::create(&file_path)
-                .expect("creating temp file")
-                .write_all(data.as_bytes())
-                .expect("writing data");
-        }
-
-        let sql = format!(
-            "CREATE EXTERNAL TABLE csv_with_timestamps (
-                  name VARCHAR,
-                  ts TIMESTAMP
-              )
-              STORED AS CSV
-              LOCATION '{}'
-              ",
-            file_path.to_str().expect("path is utf8")
-        );
-
-        plan_and_collect(&mut ctx, &sql)
-            .await
-            .expect("Executing CREATE EXTERNAL TABLE");
-
-        let sql = "SELECT * from csv_with_timestamps";
-        let result = plan_and_collect(&mut ctx, &sql).await.unwrap();
-        let expected = vec![
-            "+--------+-------------------------+",
-            "| name   | ts                      |",
-            "+--------+-------------------------+",
-            "| Andrew | 2018-11-13 17:11:10.011 |",
-            "| Jorge  | 2018-12-13 12:12:10.011 |",
-            "+--------+-------------------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-    }
-
-    struct MyPhysicalPlanner {}
-
-    impl PhysicalPlanner for MyPhysicalPlanner {
-        fn create_physical_plan(
-            &self,
-            _logical_plan: &LogicalPlan,
-            _ctx_state: &ExecutionContextState,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            Err(DataFusionError::NotImplemented(
-                "query not supported".to_string(),
-            ))
-        }
-    }
-
-    struct MyQueryPlanner {}
-
-    impl QueryPlanner for MyQueryPlanner {
-        fn create_physical_plan(
-            &self,
-            logical_plan: &LogicalPlan,
-            ctx_state: &ExecutionContextState,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            let physical_planner = MyPhysicalPlanner {};
-            physical_planner.create_physical_plan(logical_plan, ctx_state)
-        }
-    }
-
-    /// Execute SQL and return results
-    async fn plan_and_collect(
-        ctx: &mut ExecutionContext,
-        sql: &str,
-    ) -> Result<Vec<RecordBatch>> {
-        ctx.sql(sql)?.collect().await
-    }
-
-    /// Execute SQL and return results
-    async fn execute(sql: &str, partition_count: usize) -> Result<Vec<RecordBatch>> {
-        let tmp_dir = TempDir::new()?;
-        let mut ctx = create_ctx(&tmp_dir, partition_count)?;
-        plan_and_collect(&mut ctx, sql).await
-    }
-
-    /// Execute SQL and write results to partitioned csv files
-    async fn write_csv(
-        ctx: &mut ExecutionContext,
-        sql: &str,
-        out_dir: &str,
-    ) -> Result<()> {
-        let logical_plan = ctx.create_logical_plan(sql)?;
-        let logical_plan = ctx.optimize(&logical_plan)?;
-        let physical_plan = ctx.create_physical_plan(&logical_plan)?;
-        ctx.write_csv(physical_plan, out_dir.to_string()).await
-    }
-
-    /// Execute SQL and write results to partitioned parquet files
-    async fn write_parquet(
-        ctx: &mut ExecutionContext,
-        sql: &str,
-        out_dir: &str,
-        writer_properties: Option<WriterProperties>,
-    ) -> Result<()> {
-        let logical_plan = ctx.create_logical_plan(sql)?;
-        let logical_plan = ctx.optimize(&logical_plan)?;
-        let physical_plan = ctx.create_physical_plan(&logical_plan)?;
-        ctx.write_parquet(physical_plan, out_dir.to_string(), writer_properties)
-            .await
-    }
-
-    /// Generate CSV partitions within the supplied directory
-    fn populate_csv_partitions(
-        tmp_dir: &TempDir,
-        partition_count: usize,
-        file_extension: &str,
-    ) -> Result<SchemaRef> {
-        // define schema for data source (csv file)
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::UInt32, false),
-            Field::new("c2", DataType::UInt64, false),
-            Field::new("c3", DataType::Boolean, false),
-        ]));
-
-        // generate a partitioned file
-        for partition in 0..partition_count {
-            let filename = format!("partition-{}.{}", partition, file_extension);
-            let file_path = tmp_dir.path().join(&filename);
-            let mut file = File::create(file_path)?;
-
-            // generate some data
-            for i in 0..=10 {
-                let data = format!("{},{},{}\n", partition, i, i % 2 == 0);
-                file.write_all(data.as_bytes())?;
-            }
-        }
-
-        Ok(schema)
-    }
-
-    /// Generate a partitioned CSV file and register it with an execution context
-    fn create_ctx(tmp_dir: &TempDir, partition_count: usize) -> Result<ExecutionContext> {
-        let mut ctx =
-            ExecutionContext::with_config(ExecutionConfig::new().with_concurrency(8));
-
-        let schema = populate_csv_partitions(tmp_dir, partition_count, ".csv")?;
-
-        // register csv file with the execution context
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema),
-        )?;
-
-        Ok(ctx)
-    }
-}
diff --git a/rust/datafusion/src/execution/dataframe_impl.rs b/rust/datafusion/src/execution/dataframe_impl.rs
deleted file mode 100644
index 2a0c39aa48e..00000000000
--- a/rust/datafusion/src/execution/dataframe_impl.rs
+++ /dev/null
@@ -1,374 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementation of DataFrame API.
-
-use std::sync::{Arc, Mutex};
-
-use crate::arrow::record_batch::RecordBatch;
-use crate::error::Result;
-use crate::execution::context::{ExecutionContext, ExecutionContextState};
-use crate::logical_plan::{
-    col, DFSchema, Expr, FunctionRegistry, JoinType, LogicalPlan, LogicalPlanBuilder,
-    Partitioning,
-};
-use crate::{
-    dataframe::*,
-    physical_plan::{collect, collect_partitioned},
-};
-
-use async_trait::async_trait;
-
-/// Implementation of DataFrame API
-pub struct DataFrameImpl {
-    ctx_state: Arc<Mutex<ExecutionContextState>>,
-    plan: LogicalPlan,
-}
-
-impl DataFrameImpl {
-    /// Create a new Table based on an existing logical plan
-    pub fn new(ctx_state: Arc<Mutex<ExecutionContextState>>, plan: &LogicalPlan) -> Self {
-        Self {
-            ctx_state,
-            plan: plan.clone(),
-        }
-    }
-}
-
-#[async_trait]
-impl DataFrame for DataFrameImpl {
-    /// Apply a projection based on a list of column names
-    fn select_columns(&self, columns: &[&str]) -> Result<Arc<dyn DataFrame>> {
-        let fields = columns
-            .iter()
-            .map(|name| self.plan.schema().field_with_unqualified_name(name))
-            .collect::<Result<Vec<_>>>()?;
-        let expr: Vec<Expr> = fields.iter().map(|f| col(f.name())).collect();
-        self.select(expr)
-    }
-
-    /// Create a projection based on arbitrary expressions
-    fn select(&self, expr_list: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .project(expr_list)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Create a filter based on a predicate expression
-    fn filter(&self, predicate: Expr) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .filter(predicate)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Perform an aggregate query
-    fn aggregate(
-        &self,
-        group_expr: Vec<Expr>,
-        aggr_expr: Vec<Expr>,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .aggregate(group_expr, aggr_expr)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Limit the number of rows
-    fn limit(&self, n: usize) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan).limit(n)?.build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Sort by specified sorting expressions
-    fn sort(&self, expr: Vec<Expr>) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan).sort(expr)?.build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Join with another DataFrame
-    fn join(
-        &self,
-        right: Arc<dyn DataFrame>,
-        join_type: JoinType,
-        left_cols: &[&str],
-        right_cols: &[&str],
-    ) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .join(&right.to_logical_plan(), join_type, left_cols, right_cols)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    fn repartition(
-        &self,
-        partitioning_scheme: Partitioning,
-    ) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .repartition(partitioning_scheme)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    /// Convert to logical plan
-    fn to_logical_plan(&self) -> LogicalPlan {
-        self.plan.clone()
-    }
-
-    // Convert the logical plan represented by this DataFrame into a physical plan and
-    // execute it
-    async fn collect(&self) -> Result<Vec<RecordBatch>> {
-        let state = self.ctx_state.lock().unwrap().clone();
-        let ctx = ExecutionContext::from(Arc::new(Mutex::new(state)));
-        let plan = ctx.optimize(&self.plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-        Ok(collect(plan).await?)
-    }
-
-    // Convert the logical plan represented by this DataFrame into a physical plan and
-    // execute it
-    async fn collect_partitioned(&self) -> Result<Vec<Vec<RecordBatch>>> {
-        let state = self.ctx_state.lock().unwrap().clone();
-        let ctx = ExecutionContext::from(Arc::new(Mutex::new(state)));
-        let plan = ctx.optimize(&self.plan)?;
-        let plan = ctx.create_physical_plan(&plan)?;
-        Ok(collect_partitioned(plan).await?)
-    }
-
-    /// Returns the schema from the logical plan
-    fn schema(&self) -> &DFSchema {
-        self.plan.schema()
-    }
-
-    fn explain(&self, verbose: bool) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .explain(verbose)?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-
-    fn registry(&self) -> Arc<dyn FunctionRegistry> {
-        let registry = self.ctx_state.lock().unwrap().clone();
-        Arc::new(registry)
-    }
-
-    fn union(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>> {
-        let plan = LogicalPlanBuilder::from(&self.plan)
-            .union(dataframe.to_logical_plan())?
-            .build()?;
-        Ok(Arc::new(DataFrameImpl::new(self.ctx_state.clone(), &plan)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::execution::context::ExecutionContext;
-    use crate::logical_plan::*;
-    use crate::{datasource::csv::CsvReadOptions, physical_plan::ColumnarValue};
-    use crate::{physical_plan::functions::ScalarFunctionImplementation, test};
-    use arrow::datatypes::DataType;
-
-    #[test]
-    fn select_columns() -> Result<()> {
-        // build plan using Table API
-        let t = test_table()?;
-        let t2 = t.select_columns(&["c1", "c2", "c11"])?;
-        let plan = t2.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn select_expr() -> Result<()> {
-        // build plan using Table API
-        let t = test_table()?;
-        let t2 = t.select(vec![col("c1"), col("c2"), col("c11")])?;
-        let plan = t2.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate() -> Result<()> {
-        // build plan using DataFrame API
-        let df = test_table()?;
-        let group_expr = vec![col("c1")];
-        let aggr_expr = vec![
-            min(col("c12")),
-            max(col("c12")),
-            avg(col("c12")),
-            sum(col("c12")),
-            count(col("c12")),
-            count_distinct(col("c12")),
-        ];
-
-        let df = df.aggregate(group_expr, aggr_expr)?;
-
-        let plan = df.to_logical_plan();
-
-        // build same plan using SQL API
-        let sql = "SELECT c1, MIN(c12), MAX(c12), AVG(c12), SUM(c12), COUNT(c12), COUNT(DISTINCT c12) \
-                   FROM aggregate_test_100 \
-                   GROUP BY c1";
-        let sql_plan = create_plan(sql)?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join() -> Result<()> {
-        let left = test_table()?.select_columns(&["c1", "c2"])?;
-        let right = test_table()?.select_columns(&["c1", "c3"])?;
-        let left_rows = left.collect().await?;
-        let right_rows = right.collect().await?;
-        let join = left.join(right, JoinType::Inner, &["c1"], &["c1"])?;
-        let join_rows = join.collect().await?;
-        assert_eq!(100, left_rows.iter().map(|x| x.num_rows()).sum::<usize>());
-        assert_eq!(100, right_rows.iter().map(|x| x.num_rows()).sum::<usize>());
-        assert_eq!(2008, join_rows.iter().map(|x| x.num_rows()).sum::<usize>());
-        Ok(())
-    }
-
-    #[test]
-    fn limit() -> Result<()> {
-        // build query using Table API
-        let t = test_table()?;
-        let t2 = t.select_columns(&["c1", "c2", "c11"])?.limit(10)?;
-        let plan = t2.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan =
-            create_plan("SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn explain() -> Result<()> {
-        // build query using Table API
-        let df = test_table()?;
-        let df = df
-            .select_columns(&["c1", "c2", "c11"])?
-            .limit(10)?
-            .explain(false)?;
-        let plan = df.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan =
-            create_plan("EXPLAIN SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[test]
-    fn registry() -> Result<()> {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx)?;
-
-        // declare the udf
-        let my_fn: ScalarFunctionImplementation =
-            Arc::new(|_: &[ColumnarValue]| unimplemented!("my_fn is not implemented"));
-
-        // create and register the udf
-        ctx.register_udf(create_udf(
-            "my_fn",
-            vec![DataType::Float64],
-            Arc::new(DataType::Float64),
-            my_fn,
-        ));
-
-        // build query with a UDF using DataFrame API
-        let df = ctx.table("aggregate_test_100")?;
-
-        let f = df.registry();
-
-        let df = df.select(vec![f.udf("my_fn")?.call(vec![col("c12")])])?;
-        let plan = df.to_logical_plan();
-
-        // build query using SQL
-        let sql_plan =
-            ctx.create_logical_plan("SELECT my_fn(c12) FROM aggregate_test_100")?;
-
-        // the two plans should be identical
-        assert_same_plan(&plan, &sql_plan);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn sendable() {
-        let df = test_table().unwrap();
-        // dataframes should be sendable between threads/tasks
-        let task = tokio::task::spawn(async move {
-            df.select_columns(&["c1"])
-                .expect("should be usable in a task")
-        });
-        task.await.expect("task completed successfully");
-    }
-
-    /// Compare the formatted string representation of two plans for equality
-    fn assert_same_plan(plan1: &LogicalPlan, plan2: &LogicalPlan) {
-        assert_eq!(format!("{:?}", plan1), format!("{:?}", plan2));
-    }
-
-    /// Create a logical plan from a SQL query
-    fn create_plan(sql: &str) -> Result<LogicalPlan> {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx)?;
-        ctx.create_logical_plan(sql)
-    }
-
-    fn test_table() -> Result<Arc<dyn DataFrame + 'static>> {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx)?;
-        ctx.table("aggregate_test_100")
-    }
-
-    fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
-        ctx.register_csv(
-            "aggregate_test_100",
-            &format!("{}/csv/aggregate_test_100.csv", testdata),
-            CsvReadOptions::new().schema(&schema.as_ref()),
-        )?;
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/execution/mod.rs b/rust/datafusion/src/execution/mod.rs
deleted file mode 100644
index ff44dd43f83..00000000000
--- a/rust/datafusion/src/execution/mod.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DataFusion query execution
-
-pub mod context;
-pub mod dataframe_impl;
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
deleted file mode 100644
index 44a8a686a49..00000000000
--- a/rust/datafusion/src/lib.rs
+++ /dev/null
@@ -1,211 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#![warn(missing_docs)]
-// Clippy lints, some should be disabled incrementally
-#![allow(
-    clippy::float_cmp,
-    clippy::from_over_into,
-    clippy::module_inception,
-    clippy::new_without_default,
-    clippy::type_complexity,
-    clippy::upper_case_acronyms
-)]
-
-//! [DataFusion](https://github.com/apache/arrow/tree/master/rust/datafusion)
-//! is an extensible query execution framework that uses
-//! [Apache Arrow](https://arrow.apache.org) as its in-memory format.
-//!
-//! DataFusion supports both an SQL and a DataFrame API for building logical query plans
-//! as well as a query optimizer and execution engine capable of parallel execution
-//! against partitioned data sources (CSV and Parquet) using threads.
-//!
-//! Below is an example of how to execute a query against data stored
-//! in a CSV file using a [`DataFrame`](dataframe::DataFrame):
-//!
-//! ```rust
-//! # use datafusion::prelude::*;
-//! # use datafusion::error::Result;
-//! # use arrow::record_batch::RecordBatch;
-//!
-//! # #[tokio::main]
-//! # async fn main() -> Result<()> {
-//! let mut ctx = ExecutionContext::new();
-//!
-//! // create the dataframe
-//! let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
-//!
-//! // create a plan
-//! let df = df.filter(col("a").lt_eq(col("b")))?
-//!            .aggregate(vec![col("a")], vec![min(col("b"))])?
-//!            .limit(100)?;
-//!
-//! // execute the plan
-//! let results: Vec<RecordBatch> = df.collect().await?;
-//!
-//! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
-//!
-//! let expected = vec![
-//!     "+---+--------+",
-//!     "| a | MIN(b) |",
-//!     "+---+--------+",
-//!     "| 1 | 2      |",
-//!     "+---+--------+"
-//! ];
-//!
-//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! and how to execute a query against a CSV using SQL:
-//!
-//! ```
-//! # use datafusion::prelude::*;
-//! # use datafusion::error::Result;
-//! # use arrow::record_batch::RecordBatch;
-//!
-//! # #[tokio::main]
-//! # async fn main() -> Result<()> {
-//! let mut ctx = ExecutionContext::new();
-//!
-//! ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
-//!
-//! // create a plan
-//! let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
-//!
-//! // execute the plan
-//! let results: Vec<RecordBatch> = df.collect().await?;
-//!
-//! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
-//!
-//! let expected = vec![
-//!     "+---+--------+",
-//!     "| a | MIN(b) |",
-//!     "+---+--------+",
-//!     "| 1 | 2      |",
-//!     "+---+--------+"
-//! ];
-//!
-//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! ## Parse, Plan, Optimize, Execute
-//!
-//! DataFusion is a fully fledged query engine capable of performing complex operations.
-//! Specifically, when DataFusion receives an SQL query, there are different steps
-//! that it passes through until a result is obtained. Broadly, they are:
-//!
-//! 1. The string is parsed to an Abstract syntax tree (AST) using [sqlparser](https://docs.rs/sqlparser/0.6.1/sqlparser/).
-//! 2. The planner [`SqlToRel`](sql::planner::SqlToRel) converts logical expressions on the AST to logical expressions [`Expr`s](logical_plan::Expr).
-//! 3. The planner [`SqlToRel`](sql::planner::SqlToRel) converts logical nodes on the AST to a [`LogicalPlan`](logical_plan::LogicalPlan).
-//! 4. [`OptimizerRules`](optimizer::optimizer::OptimizerRule) are applied to the [`LogicalPlan`](logical_plan::LogicalPlan) to optimize it.
-//! 5. The [`LogicalPlan`](logical_plan::LogicalPlan) is converted to an [`ExecutionPlan`](physical_plan::ExecutionPlan) by a [`PhysicalPlanner`](physical_plan::PhysicalPlanner)
-//! 6. The [`ExecutionPlan`](physical_plan::ExecutionPlan) is executed against data through the [`ExecutionContext`](execution::context::ExecutionContext)
-//!
-//! With a [`DataFrame`](dataframe::DataFrame) API, steps 1-3 are not used as the DataFrame builds the [`LogicalPlan`](logical_plan::LogicalPlan) directly.
-//!
-//! Phases 1-5 are typically cheap when compared to phase 6, and thus DataFusion puts a
-//! lot of effort to ensure that phase 6 runs efficiently and without errors.
-//!
-//! DataFusion's planning is divided in two main parts: logical planning and physical planning.
-//!
-//! ### Logical plan
-//!
-//! Logical planning yields [`logical plans`](logical_plan::LogicalPlan) and [`logical expressions`](logical_plan::Expr).
-//! These are [`Schema`](arrow::datatypes::Schema)-aware traits that represent statements whose result is independent of how it should physically be executed.
-//!
-//! A [`LogicalPlan`](logical_plan::LogicalPlan) is a Direct Asyclic graph of other [`LogicalPlan`s](logical_plan::LogicalPlan) and each node contains logical expressions ([`Expr`s](logical_plan::Expr)).
-//! All of these are located in [`logical_plan`](logical_plan).
-//!
-//! ### Physical plan
-//!
-//! A Physical plan ([`ExecutionPlan`](physical_plan::ExecutionPlan)) is a plan that can be executed against data.
-//! Contrarily to a logical plan, the physical plan has concrete information about how the calculation
-//! should be performed (e.g. what Rust functions are used) and how data should be loaded into memory.
-//!
-//! [`ExecutionPlan`](physical_plan::ExecutionPlan) uses the Arrow format as its in-memory representation of data, through the [arrow] crate.
-//! We recommend going through [its documentation](arrow) for details on how the data is physically represented.
-//!
-//! A [`ExecutionPlan`](physical_plan::ExecutionPlan) is composed by nodes (implement the trait [`ExecutionPlan`](physical_plan::ExecutionPlan)),
-//! and each node is composed by physical expressions ([`PhysicalExpr`](physical_plan::PhysicalExpr))
-//! or aggreagate expressions ([`AggregateExpr`](physical_plan::AggregateExpr)).
-//! All of these are located in the module [`physical_plan`](physical_plan).
-//!
-//! Broadly speaking,
-//!
-//! * an [`ExecutionPlan`](physical_plan::ExecutionPlan) receives a partition number and asyncronosly returns
-//!   an iterator over [`RecordBatch`](arrow::record_batch::RecordBatch)
-//!   (a node-specific struct that implements [`RecordBatchReader`](arrow::record_batch::RecordBatchReader))
-//! * a [`PhysicalExpr`](physical_plan::PhysicalExpr) receives a [`RecordBatch`](arrow::record_batch::RecordBatch)
-//!   and returns an [`Array`](arrow::array::Array)
-//! * an [`AggregateExpr`](physical_plan::AggregateExpr) receives [`RecordBatch`es](arrow::record_batch::RecordBatch)
-//!   and returns a [`RecordBatch`](arrow::record_batch::RecordBatch) of a single row(*)
-//!
-//! (*) Technically, it aggregates the results on each partition and then merges the results into a single partition.
-//!
-//! The following physical nodes are currently implemented:
-//!
-//! * Projection: [`ProjectionExec`](physical_plan::projection::ProjectionExec)
-//! * Filter: [`FilterExec`](physical_plan::filter::FilterExec)
-//! * Hash and Grouped aggregations: [`HashAggregateExec`](physical_plan::hash_aggregate::HashAggregateExec)
-//! * Sort: [`SortExec`](physical_plan::sort::SortExec)
-//! * Merge (partitions): [`MergeExec`](physical_plan::merge::MergeExec)
-//! * Limit: [`LocalLimitExec`](physical_plan::limit::LocalLimitExec) and [`GlobalLimitExec`](physical_plan::limit::GlobalLimitExec)
-//! * Scan a CSV: [`CsvExec`](physical_plan::csv::CsvExec)
-//! * Scan a Parquet: [`ParquetExec`](physical_plan::parquet::ParquetExec)
-//! * Scan from memory: [`MemoryExec`](physical_plan::memory::MemoryExec)
-//! * Explain the plan: [`ExplainExec`](physical_plan::explain::ExplainExec)
-//!
-//! ## Customize
-//!
-//! DataFusion allows users to
-//! * extend the planner to use user-defined logical and physical nodes ([`QueryPlanner`](execution::context::QueryPlanner))
-//! * declare and use user-defined scalar functions ([`ScalarUDF`](physical_plan::udf::ScalarUDF))
-//! * declare and use user-defined aggregate functions ([`AggregateUDF`](physical_plan::udaf::AggregateUDF))
-//!
-//! you can find examples of each of them in examples section.
-
-extern crate arrow;
-extern crate sqlparser;
-
-pub mod catalog;
-pub mod dataframe;
-pub mod datasource;
-pub mod error;
-pub mod execution;
-pub mod logical_plan;
-pub mod optimizer;
-pub mod physical_optimizer;
-pub mod physical_plan;
-pub mod prelude;
-pub mod scalar;
-pub mod sql;
-pub mod variable;
-
-#[cfg(test)]
-pub mod test;
-
-#[macro_use]
-#[cfg(feature = "regex_expressions")]
-extern crate lazy_static;
-
-#[cfg(doctest)]
-doc_comment::doctest!("../README.md", readme_example_test);
diff --git a/rust/datafusion/src/logical_plan/builder.rs b/rust/datafusion/src/logical_plan/builder.rs
deleted file mode 100644
index fed82fd23b8..00000000000
--- a/rust/datafusion/src/logical_plan/builder.rs
+++ /dev/null
@@ -1,595 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides a builder for creating LogicalPlans
-
-use std::{collections::HashMap, sync::Arc};
-
-use arrow::{
-    datatypes::{Schema, SchemaRef},
-    record_batch::RecordBatch,
-};
-
-use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
-use crate::{
-    datasource::{empty::EmptyTable, parquet::ParquetTable, CsvFile, MemTable},
-    prelude::CsvReadOptions,
-};
-
-use super::dfschema::ToDFSchema;
-use super::{
-    col, exprlist_to_fields, Expr, JoinType, LogicalPlan, PlanType, StringifiedPlan,
-};
-use crate::logical_plan::{DFField, DFSchema, DFSchemaRef, Partitioning};
-use std::collections::HashSet;
-
-/// Builder for logical plans
-///
-/// ```
-/// # use datafusion::prelude::*;
-/// # use datafusion::logical_plan::LogicalPlanBuilder;
-/// # use datafusion::error::Result;
-/// # use arrow::datatypes::{Schema, DataType, Field};
-/// #
-/// # fn main() -> Result<()> {
-/// #
-/// # fn employee_schema() -> Schema {
-/// #    Schema::new(vec![
-/// #           Field::new("id", DataType::Int32, false),
-/// #           Field::new("first_name", DataType::Utf8, false),
-/// #           Field::new("last_name", DataType::Utf8, false),
-/// #           Field::new("state", DataType::Utf8, false),
-/// #           Field::new("salary", DataType::Int32, false),
-/// #       ])
-/// #   }
-/// #
-/// // Create a plan similar to
-/// // SELECT last_name
-/// // FROM employees
-/// // WHERE salary < 1000
-/// let plan = LogicalPlanBuilder::scan_empty(
-///              "employee.csv",
-///              &employee_schema(),
-///              None,
-///            )?
-///            // Keep only rows where salary < 1000
-///            .filter(col("salary").lt_eq(lit(1000)))?
-///            // only show "last_name" in the final results
-///            .project(vec![col("last_name")])?
-///            .build()?;
-///
-/// # Ok(())
-/// # }
-/// ```
-pub struct LogicalPlanBuilder {
-    plan: LogicalPlan,
-}
-
-impl LogicalPlanBuilder {
-    /// Create a builder from an existing plan
-    pub fn from(plan: &LogicalPlan) -> Self {
-        Self { plan: plan.clone() }
-    }
-
-    /// Create an empty relation.
-    ///
-    /// `produce_one_row` set to true means this empty node needs to produce a placeholder row.
-    pub fn empty(produce_one_row: bool) -> Self {
-        Self::from(&LogicalPlan::EmptyRelation {
-            produce_one_row,
-            schema: DFSchemaRef::new(DFSchema::empty()),
-        })
-    }
-
-    /// Scan a memory data source
-    pub fn scan_memory(
-        partitions: Vec<Vec<RecordBatch>>,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let provider = Arc::new(MemTable::try_new(schema, partitions)?);
-        Self::scan("", provider, projection)
-    }
-
-    /// Scan a CSV data source
-    pub fn scan_csv(
-        path: &str,
-        options: CsvReadOptions,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let provider = Arc::new(CsvFile::try_new(path, options)?);
-        Self::scan("", provider, projection)
-    }
-
-    /// Scan a Parquet data source
-    pub fn scan_parquet(
-        path: &str,
-        projection: Option<Vec<usize>>,
-        max_concurrency: usize,
-    ) -> Result<Self> {
-        let provider = Arc::new(ParquetTable::try_new(path, max_concurrency)?);
-        Self::scan("", provider, projection)
-    }
-
-    /// Scan an empty data source, mainly used in tests
-    pub fn scan_empty(
-        name: &str,
-        table_schema: &Schema,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let table_schema = Arc::new(table_schema.clone());
-        let provider = Arc::new(EmptyTable::new(table_schema));
-        Self::scan(name, provider, projection)
-    }
-
-    /// Convert a table provider into a builder with a TableScan
-    pub fn scan(
-        name: &str,
-        provider: Arc<dyn TableProvider>,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        let schema = provider.schema();
-
-        let projected_schema = projection
-            .as_ref()
-            .map(|p| Schema::new(p.iter().map(|i| schema.field(*i).clone()).collect()))
-            .map_or(schema, SchemaRef::new)
-            .to_dfschema_ref()?;
-
-        let table_scan = LogicalPlan::TableScan {
-            table_name: name.to_string(),
-            source: provider,
-            projected_schema,
-            projection,
-            filters: vec![],
-            limit: None,
-        };
-
-        Ok(Self::from(&table_scan))
-    }
-
-    /// Apply a projection.
-    ///
-    /// # Errors
-    /// This function errors under any of the following conditions:
-    /// * Two or more expressions have the same name
-    /// * An invalid expression is used (e.g. a `sort` expression)
-    pub fn project(&self, expr: impl IntoIterator<Item = Expr>) -> Result<Self> {
-        let input_schema = self.plan.schema();
-        let mut projected_expr = vec![];
-        for e in expr {
-            match e {
-                Expr::Wildcard => {
-                    (0..input_schema.fields().len()).for_each(|i| {
-                        projected_expr.push(col(input_schema.field(i).name()))
-                    });
-                }
-                _ => projected_expr.push(e),
-            };
-        }
-
-        validate_unique_names("Projections", projected_expr.iter(), input_schema)?;
-
-        let schema = DFSchema::new(exprlist_to_fields(&projected_expr, input_schema)?)?;
-
-        Ok(Self::from(&LogicalPlan::Projection {
-            expr: projected_expr,
-            input: Arc::new(self.plan.clone()),
-            schema: DFSchemaRef::new(schema),
-        }))
-    }
-
-    /// Apply a filter
-    pub fn filter(&self, expr: Expr) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Filter {
-            predicate: expr,
-            input: Arc::new(self.plan.clone()),
-        }))
-    }
-
-    /// Apply a limit
-    pub fn limit(&self, n: usize) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Limit {
-            n,
-            input: Arc::new(self.plan.clone()),
-        }))
-    }
-
-    /// Apply a sort
-    pub fn sort(&self, expr: impl IntoIterator<Item = Expr>) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Sort {
-            expr: expr.into_iter().collect(),
-            input: Arc::new(self.plan.clone()),
-        }))
-    }
-
-    /// Apply a union
-    pub fn union(&self, plan: LogicalPlan) -> Result<Self> {
-        let schema = self.plan.schema();
-
-        if plan.schema() != schema {
-            return Err(DataFusionError::Plan(
-                "Schema's for union should be the same ".to_string(),
-            ));
-        }
-        // Add plan to existing union if possible
-        let mut inputs = match &self.plan {
-            LogicalPlan::Union { inputs, .. } => inputs.clone(),
-            _ => vec![self.plan.clone()],
-        };
-        inputs.push(plan);
-
-        Ok(Self::from(&LogicalPlan::Union {
-            inputs,
-            schema: schema.clone(),
-            alias: None,
-        }))
-    }
-
-    /// Apply a join
-    pub fn join(
-        &self,
-        right: &LogicalPlan,
-        join_type: JoinType,
-        left_keys: &[&str],
-        right_keys: &[&str],
-    ) -> Result<Self> {
-        if left_keys.len() != right_keys.len() {
-            Err(DataFusionError::Plan(
-                "left_keys and right_keys were not the same length".to_string(),
-            ))
-        } else {
-            let on: Vec<_> = left_keys
-                .iter()
-                .zip(right_keys.iter())
-                .map(|(x, y)| (x.to_string(), y.to_string()))
-                .collect::<Vec<_>>();
-            let join_schema =
-                build_join_schema(self.plan.schema(), right.schema(), &on, &join_type)?;
-            Ok(Self::from(&LogicalPlan::Join {
-                left: Arc::new(self.plan.clone()),
-                right: Arc::new(right.clone()),
-                on,
-                join_type,
-                schema: DFSchemaRef::new(join_schema),
-            }))
-        }
-    }
-
-    /// Repartition
-    pub fn repartition(&self, partitioning_scheme: Partitioning) -> Result<Self> {
-        Ok(Self::from(&LogicalPlan::Repartition {
-            input: Arc::new(self.plan.clone()),
-            partitioning_scheme,
-        }))
-    }
-
-    /// Apply an aggregate: grouping on the `group_expr` expressions
-    /// and calculating `aggr_expr` aggregates for each distinct
-    /// value of the `group_expr`;
-    pub fn aggregate(
-        &self,
-        group_expr: impl IntoIterator<Item = Expr>,
-        aggr_expr: impl IntoIterator<Item = Expr>,
-    ) -> Result<Self> {
-        let group_expr = group_expr.into_iter().collect::<Vec<Expr>>();
-        let aggr_expr = aggr_expr.into_iter().collect::<Vec<Expr>>();
-
-        let all_expr = group_expr.iter().chain(aggr_expr.iter());
-
-        validate_unique_names("Aggregations", all_expr.clone(), self.plan.schema())?;
-
-        let aggr_schema =
-            DFSchema::new(exprlist_to_fields(all_expr, self.plan.schema())?)?;
-
-        Ok(Self::from(&LogicalPlan::Aggregate {
-            input: Arc::new(self.plan.clone()),
-            group_expr,
-            aggr_expr,
-            schema: DFSchemaRef::new(aggr_schema),
-        }))
-    }
-
-    /// Create an expression to represent the explanation of the plan
-    pub fn explain(&self, verbose: bool) -> Result<Self> {
-        let stringified_plans = vec![StringifiedPlan::new(
-            PlanType::LogicalPlan,
-            format!("{:#?}", self.plan.clone()),
-        )];
-
-        let schema = LogicalPlan::explain_schema();
-
-        Ok(Self::from(&LogicalPlan::Explain {
-            verbose,
-            plan: Arc::new(self.plan.clone()),
-            stringified_plans,
-            schema: schema.to_dfschema_ref()?,
-        }))
-    }
-
-    /// Build the plan
-    pub fn build(&self) -> Result<LogicalPlan> {
-        Ok(self.plan.clone())
-    }
-}
-
-/// Creates a schema for a join operation.
-/// The fields from the left side are first
-fn build_join_schema(
-    left: &DFSchema,
-    right: &DFSchema,
-    on: &[(String, String)],
-    join_type: &JoinType,
-) -> Result<DFSchema> {
-    let fields: Vec<DFField> = match join_type {
-        JoinType::Inner | JoinType::Left => {
-            // remove right-side join keys if they have the same names as the left-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left.fields().iter();
-
-            let right_fields = right
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-        JoinType::Right => {
-            // remove left-side join keys if they have the same names as the right-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            let right_fields = right.fields().iter();
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-    };
-    DFSchema::new(fields)
-}
-
-/// Errors if one or more expressions have equal names.
-fn validate_unique_names<'a>(
-    node_name: &str,
-    expressions: impl IntoIterator<Item = &'a Expr>,
-    input_schema: &DFSchema,
-) -> Result<()> {
-    let mut unique_names = HashMap::new();
-    expressions.into_iter().enumerate().try_for_each(|(position, expr)| {
-        let name = expr.name(input_schema)?;
-        match unique_names.get(&name) {
-            None => {
-                unique_names.insert(name, (position, expr));
-                Ok(())
-            },
-            Some((existing_position, existing_expr)) => {
-                Err(DataFusionError::Plan(
-                    format!("{} require unique expression names \
-                             but the expression \"{:?}\" at position {} and \"{:?}\" \
-                             at position {} have the same name. Consider aliasing (\"AS\") one of them.",
-                             node_name, existing_expr, existing_position, expr, position,
-                            )
-                ))
-            }
-        }
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{DataType, Field};
-
-    use super::super::{lit, sum};
-    use super::*;
-
-    #[test]
-    fn plan_builder_simple() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )?
-        .filter(col("state").eq(lit("CO")))?
-        .project(vec![col("id")])?
-        .build()?;
-
-        let expected = "Projection: #id\
-        \n  Filter: #state Eq Utf8(\"CO\")\
-        \n    TableScan: employee.csv projection=Some([0, 3])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn plan_builder_aggregate() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![3, 4]),
-        )?
-        .aggregate(
-            vec![col("state")],
-            vec![sum(col("salary")).alias("total_salary")],
-        )?
-        .project(vec![col("state"), col("total_salary")])?
-        .build()?;
-
-        let expected = "Projection: #state, #total_salary\
-        \n  Aggregate: groupBy=[[#state]], aggr=[[SUM(#salary) AS total_salary]]\
-        \n    TableScan: employee.csv projection=Some([3, 4])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn plan_builder_sort() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![3, 4]),
-        )?
-        .sort(vec![
-            Expr::Sort {
-                expr: Box::new(col("state")),
-                asc: true,
-                nulls_first: true,
-            },
-            Expr::Sort {
-                expr: Box::new(col("total_salary")),
-                asc: false,
-                nulls_first: false,
-            },
-        ])?
-        .build()?;
-
-        let expected = "Sort: #state ASC NULLS FIRST, #total_salary DESC NULLS LAST\
-        \n  TableScan: employee.csv projection=Some([3, 4])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn plan_builder_union_combined_single_union() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![3, 4]),
-        )?;
-
-        let plan = plan
-            .union(plan.build()?)?
-            .union(plan.build()?)?
-            .union(plan.build()?)?
-            .build()?;
-
-        // output has only one union
-        let expected = "Union\
-        \n  TableScan: employee.csv projection=Some([3, 4])\
-        \n  TableScan: employee.csv projection=Some([3, 4])\
-        \n  TableScan: employee.csv projection=Some([3, 4])\
-        \n  TableScan: employee.csv projection=Some([3, 4])";
-
-        assert_eq!(expected, format!("{:?}", plan));
-
-        Ok(())
-    }
-
-    #[test]
-    fn projection_non_unique_names() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )?
-        // two columns with the same name => error
-        .project(vec![col("id"), col("first_name").alias("id")]);
-
-        match plan {
-            Err(DataFusionError::Plan(e)) => {
-                assert_eq!(
-                    e,
-                    "Projections require unique expression names \
-                    but the expression \"#id\" at position 0 and \"#first_name AS id\" at \
-                    position 1 have the same name. Consider aliasing (\"AS\") one of them."
-                );
-                Ok(())
-            }
-            _ => Err(DataFusionError::Plan(
-                "Plan should have returned an DataFusionError::Plan".to_string(),
-            )),
-        }
-    }
-
-    #[test]
-    fn aggregate_non_unique_names() -> Result<()> {
-        let plan = LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )?
-        // two columns with the same name => error
-        .aggregate(vec![col("state")], vec![sum(col("salary")).alias("state")]);
-
-        match plan {
-            Err(DataFusionError::Plan(e)) => {
-                assert_eq!(
-                    e,
-                    "Aggregations require unique expression names \
-                    but the expression \"#state\" at position 0 and \"SUM(#salary) AS state\" at \
-                    position 1 have the same name. Consider aliasing (\"AS\") one of them."
-                );
-                Ok(())
-            }
-            _ => Err(DataFusionError::Plan(
-                "Plan should have returned an DataFusionError::Plan".to_string(),
-            )),
-        }
-    }
-
-    fn employee_schema() -> Schema {
-        Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ])
-    }
-
-    #[test]
-    fn stringified_plan() {
-        let stringified_plan =
-            StringifiedPlan::new(PlanType::LogicalPlan, "...the plan...");
-        assert!(stringified_plan.should_display(true));
-        assert!(stringified_plan.should_display(false)); // display in non verbose mode too
-
-        let stringified_plan =
-            StringifiedPlan::new(PlanType::PhysicalPlan, "...the plan...");
-        assert!(stringified_plan.should_display(true));
-        assert!(!stringified_plan.should_display(false));
-
-        let stringified_plan = StringifiedPlan::new(
-            PlanType::OptimizedLogicalPlan {
-                optimizer_name: "random opt pass".into(),
-            },
-            "...the plan...",
-        );
-        assert!(stringified_plan.should_display(true));
-        assert!(!stringified_plan.should_display(false));
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/dfschema.rs b/rust/datafusion/src/logical_plan/dfschema.rs
deleted file mode 100644
index 9adb22b43d0..00000000000
--- a/rust/datafusion/src/logical_plan/dfschema.rs
+++ /dev/null
@@ -1,521 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DFSchema is an extended schema struct that DataFusion uses to provide support for
-//! fields with optional relation names.
-
-use std::collections::HashSet;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use std::fmt::{Display, Formatter};
-
-/// A reference-counted reference to a `DFSchema`.
-pub type DFSchemaRef = Arc<DFSchema>;
-
-/// DFSchema wraps an Arrow schema and adds relation names
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct DFSchema {
-    /// Fields
-    fields: Vec<DFField>,
-}
-
-impl DFSchema {
-    /// Creates an empty `DFSchema`
-    pub fn empty() -> Self {
-        Self { fields: vec![] }
-    }
-
-    /// Create a new `DFSchema`
-    pub fn new(fields: Vec<DFField>) -> Result<Self> {
-        let mut qualified_names = HashSet::new();
-        let mut unqualified_names = HashSet::new();
-        for field in &fields {
-            if let Some(qualifier) = field.qualifier() {
-                if !qualified_names.insert((qualifier, field.name())) {
-                    return Err(DataFusionError::Plan(format!(
-                        "Schema contains duplicate qualified field name '{}'",
-                        field.qualified_name()
-                    )));
-                }
-            } else if !unqualified_names.insert(field.name()) {
-                return Err(DataFusionError::Plan(format!(
-                    "Schema contains duplicate unqualified field name '{}'",
-                    field.name()
-                )));
-            }
-        }
-
-        // check for mix of qualified and unqualified field with same unqualified name
-        // note that we need to sort the contents of the HashSet first so that errors are
-        // deterministic
-        let mut qualified_names = qualified_names
-            .iter()
-            .map(|(l, r)| (l.to_owned(), r.to_owned()))
-            .collect::<Vec<(&String, &String)>>();
-        qualified_names.sort_by(|a, b| {
-            let a = format!("{}.{}", a.0, a.1);
-            let b = format!("{}.{}", b.0, b.1);
-            a.cmp(&b)
-        });
-        for (qualifier, name) in &qualified_names {
-            if unqualified_names.contains(name) {
-                return Err(DataFusionError::Plan(format!(
-                    "Schema contains qualified field name '{}.{}' \
-                    and unqualified field name '{}' which would be ambiguous",
-                    qualifier, name, name
-                )));
-            }
-        }
-        Ok(Self { fields })
-    }
-
-    /// Create a `DFSchema` from an Arrow schema
-    pub fn try_from_qualified(qualifier: &str, schema: &Schema) -> Result<Self> {
-        Self::new(
-            schema
-                .fields()
-                .iter()
-                .map(|f| DFField {
-                    field: f.clone(),
-                    qualifier: Some(qualifier.to_owned()),
-                })
-                .collect(),
-        )
-    }
-
-    /// Combine two schemas
-    pub fn join(&self, schema: &DFSchema) -> Result<Self> {
-        let mut fields = self.fields.clone();
-        fields.extend_from_slice(schema.fields().as_slice());
-        Self::new(fields)
-    }
-
-    /// Get a list of fields
-    pub fn fields(&self) -> &Vec<DFField> {
-        &self.fields
-    }
-
-    /// Returns an immutable reference of a specific `Field` instance selected using an
-    /// offset within the internal `fields` vector
-    pub fn field(&self, i: usize) -> &DFField {
-        &self.fields[i]
-    }
-
-    /// Find the index of the column with the given name
-    pub fn index_of(&self, name: &str) -> Result<usize> {
-        for i in 0..self.fields.len() {
-            if self.fields[i].name() == name {
-                return Ok(i);
-            }
-        }
-        Err(DataFusionError::Plan(format!("No field named '{}'", name)))
-    }
-
-    /// Find the field with the given name
-    pub fn field_with_name(
-        &self,
-        relation_name: Option<&str>,
-        name: &str,
-    ) -> Result<DFField> {
-        if let Some(relation_name) = relation_name {
-            self.field_with_qualified_name(relation_name, name)
-        } else {
-            self.field_with_unqualified_name(name)
-        }
-    }
-
-    /// Find the field with the given name
-    pub fn field_with_unqualified_name(&self, name: &str) -> Result<DFField> {
-        let matches: Vec<&DFField> = self
-            .fields
-            .iter()
-            .filter(|field| field.name() == name)
-            .collect();
-        match matches.len() {
-            0 => Err(DataFusionError::Plan(format!("No field named '{}'", name))),
-            1 => Ok(matches[0].to_owned()),
-            _ => Err(DataFusionError::Plan(format!(
-                "Ambiguous reference to field named '{}'",
-                name
-            ))),
-        }
-    }
-
-    /// Find the field with the given qualified name
-    pub fn field_with_qualified_name(
-        &self,
-        relation_name: &str,
-        name: &str,
-    ) -> Result<DFField> {
-        let matches: Vec<&DFField> = self
-            .fields
-            .iter()
-            .filter(|field| {
-                field.qualifier == Some(relation_name.to_string()) && field.name() == name
-            })
-            .collect();
-        match matches.len() {
-            0 => Err(DataFusionError::Plan(format!(
-                "No field named '{}.{}'",
-                relation_name, name
-            ))),
-            1 => Ok(matches[0].to_owned()),
-            _ => Err(DataFusionError::Internal(format!(
-                "Ambiguous reference to qualified field named '{}.{}'",
-                relation_name, name
-            ))),
-        }
-    }
-}
-
-impl Into<Schema> for DFSchema {
-    /// Convert a schema into a DFSchema
-    fn into(self) -> Schema {
-        Schema::new(
-            self.fields
-                .into_iter()
-                .map(|f| {
-                    if f.qualifier().is_some() {
-                        Field::new(
-                            f.qualified_name().as_str(),
-                            f.data_type().to_owned(),
-                            f.is_nullable(),
-                        )
-                    } else {
-                        f.field
-                    }
-                })
-                .collect(),
-        )
-    }
-}
-
-/// Create a `DFSchema` from an Arrow schema
-impl TryFrom<Schema> for DFSchema {
-    type Error = DataFusionError;
-    fn try_from(schema: Schema) -> std::result::Result<Self, Self::Error> {
-        Self::new(
-            schema
-                .fields()
-                .iter()
-                .map(|f| DFField {
-                    field: f.clone(),
-                    qualifier: None,
-                })
-                .collect(),
-        )
-    }
-}
-
-impl Into<SchemaRef> for DFSchema {
-    fn into(self) -> SchemaRef {
-        SchemaRef::new(self.into())
-    }
-}
-
-/// Convenience trait to convert Schema like things to DFSchema and DFSchemaRef with fewer keystrokes
-pub trait ToDFSchema
-where
-    Self: Sized,
-{
-    /// Attempt to create a DSSchema
-    #[allow(clippy::wrong_self_convention)]
-    fn to_dfschema(self) -> Result<DFSchema>;
-
-    /// Attempt to create a DSSchemaRef
-    #[allow(clippy::wrong_self_convention)]
-    fn to_dfschema_ref(self) -> Result<DFSchemaRef> {
-        Ok(Arc::new(self.to_dfschema()?))
-    }
-}
-
-impl ToDFSchema for Schema {
-    fn to_dfschema(self) -> Result<DFSchema> {
-        DFSchema::try_from(self)
-    }
-}
-
-impl ToDFSchema for SchemaRef {
-    fn to_dfschema(self) -> Result<DFSchema> {
-        // Attempt to use the Schema directly if there are no other
-        // references, otherwise clone
-        match Self::try_unwrap(self) {
-            Ok(schema) => DFSchema::try_from(schema),
-            Err(schemaref) => DFSchema::try_from(schemaref.as_ref().clone()),
-        }
-    }
-}
-
-impl ToDFSchema for Vec<DFField> {
-    fn to_dfschema(self) -> Result<DFSchema> {
-        DFSchema::new(self)
-    }
-}
-
-impl Display for DFSchema {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            self.fields
-                .iter()
-                .map(|field| field.qualified_name())
-                .collect::<Vec<String>>()
-                .join(", ")
-        )
-    }
-}
-
-/// DFField wraps an Arrow field and adds an optional qualifier
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct DFField {
-    /// Optional qualifier (usually a table or relation name)
-    qualifier: Option<String>,
-    /// Arrow field definition
-    field: Field,
-}
-
-impl DFField {
-    /// Creates a new `DFField`
-    pub fn new(
-        qualifier: Option<&str>,
-        name: &str,
-        data_type: DataType,
-        nullable: bool,
-    ) -> Self {
-        DFField {
-            qualifier: qualifier.map(|s| s.to_owned()),
-            field: Field::new(name, data_type, nullable),
-        }
-    }
-
-    /// Create an unqualified field from an existing Arrow field
-    pub fn from(field: Field) -> Self {
-        Self {
-            qualifier: None,
-            field,
-        }
-    }
-
-    /// Create a qualified field from an existing Arrow field
-    pub fn from_qualified(qualifier: &str, field: Field) -> Self {
-        Self {
-            qualifier: Some(qualifier.to_owned()),
-            field,
-        }
-    }
-
-    /// Returns an immutable reference to the `DFField`'s unqualified name
-    pub fn name(&self) -> &String {
-        &self.field.name()
-    }
-
-    /// Returns an immutable reference to the `DFField`'s data-type
-    pub fn data_type(&self) -> &DataType {
-        &self.field.data_type()
-    }
-
-    /// Indicates whether this `DFField` supports null values
-    pub fn is_nullable(&self) -> bool {
-        self.field.is_nullable()
-    }
-
-    /// Returns a reference to the `DFField`'s qualified name
-    pub fn qualified_name(&self) -> String {
-        if let Some(relation_name) = &self.qualifier {
-            format!("{}.{}", relation_name, self.field.name())
-        } else {
-            self.field.name().to_owned()
-        }
-    }
-
-    /// Get the optional qualifier
-    pub fn qualifier(&self) -> Option<&String> {
-        self.qualifier.as_ref()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::datatypes::DataType;
-
-    #[test]
-    fn from_unqualified_field() {
-        let field = Field::new("c0", DataType::Boolean, true);
-        let field = DFField::from(field);
-        assert_eq!("c0", field.name());
-        assert_eq!("c0", field.qualified_name());
-    }
-
-    #[test]
-    fn from_qualified_field() {
-        let field = Field::new("c0", DataType::Boolean, true);
-        let field = DFField::from_qualified("t1", field);
-        assert_eq!("c0", field.name());
-        assert_eq!("t1.c0", field.qualified_name());
-    }
-
-    #[test]
-    fn from_unqualified_schema() -> Result<()> {
-        let schema = DFSchema::try_from(test_schema_1())?;
-        assert_eq!("c0, c1", schema.to_string());
-        Ok(())
-    }
-
-    #[test]
-    fn from_qualified_schema() -> Result<()> {
-        let schema = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        assert_eq!("t1.c0, t1.c1", schema.to_string());
-        Ok(())
-    }
-
-    #[test]
-    fn from_qualified_schema_into_arrow_schema() -> Result<()> {
-        let schema = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let arrow_schema: Schema = schema.into();
-        let expected = "Field { name: \"t1.c0\", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, \
-        Field { name: \"t1.c1\", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }";
-        assert_eq!(expected, arrow_schema.to_string());
-        Ok(())
-    }
-
-    #[test]
-    fn join_qualified() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from_qualified("t2", &test_schema_1())?;
-        let join = left.join(&right)?;
-        assert_eq!("t1.c0, t1.c1, t2.c0, t2.c1", join.to_string());
-        // test valid access
-        assert!(join.field_with_qualified_name("t1", "c0").is_ok());
-        assert!(join.field_with_qualified_name("t2", "c0").is_ok());
-        // test invalid access
-        assert!(join.field_with_unqualified_name("c0").is_err());
-        assert!(join.field_with_unqualified_name("t1.c0").is_err());
-        assert!(join.field_with_unqualified_name("t2.c0").is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn join_qualified_duplicate() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let join = left.join(&right);
-        assert!(join.is_err());
-        assert_eq!(
-            "Error during planning: Schema contains duplicate \
-        qualified field name \'t1.c0\'",
-            &format!("{}", join.err().unwrap())
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn join_unqualified_duplicate() -> Result<()> {
-        let left = DFSchema::try_from(test_schema_1())?;
-        let right = DFSchema::try_from(test_schema_1())?;
-        let join = left.join(&right);
-        assert!(join.is_err());
-        assert_eq!(
-            "Error during planning: Schema contains duplicate \
-        unqualified field name \'c0\'",
-            &format!("{}", join.err().unwrap())
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn join_mixed() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from(test_schema_2())?;
-        let join = left.join(&right)?;
-        assert_eq!("t1.c0, t1.c1, c100, c101", join.to_string());
-        // test valid access
-        assert!(join.field_with_qualified_name("t1", "c0").is_ok());
-        assert!(join.field_with_unqualified_name("c0").is_ok());
-        assert!(join.field_with_unqualified_name("c100").is_ok());
-        assert!(join.field_with_name(None, "c100").is_ok());
-        // test invalid access
-        assert!(join.field_with_unqualified_name("t1.c0").is_err());
-        assert!(join.field_with_unqualified_name("t1.c100").is_err());
-        assert!(join.field_with_qualified_name("", "c100").is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn join_mixed_duplicate() -> Result<()> {
-        let left = DFSchema::try_from_qualified("t1", &test_schema_1())?;
-        let right = DFSchema::try_from(test_schema_1())?;
-        let join = left.join(&right);
-        assert!(join.is_err());
-        assert_eq!(
-            "Error during planning: Schema contains qualified \
-        field name \'t1.c0\' and unqualified field name \'c0\' which would be ambiguous",
-            &format!("{}", join.err().unwrap())
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn into() {
-        // Demonstrate how to convert back and forth between Schema, SchemaRef, DFSchema, and DFSchemaRef
-        let arrow_schema = Schema::new(vec![Field::new("c0", DataType::Int64, true)]);
-        let arrow_schema_ref = Arc::new(arrow_schema.clone());
-
-        let df_schema =
-            DFSchema::new(vec![DFField::new(None, "c0", DataType::Int64, true)]).unwrap();
-        let df_schema_ref = Arc::new(df_schema.clone());
-
-        {
-            let arrow_schema = arrow_schema.clone();
-            let arrow_schema_ref = arrow_schema_ref.clone();
-
-            assert_eq!(df_schema, arrow_schema.to_dfschema().unwrap());
-            assert_eq!(df_schema, arrow_schema_ref.to_dfschema().unwrap());
-        }
-
-        {
-            let arrow_schema = arrow_schema.clone();
-            let arrow_schema_ref = arrow_schema_ref.clone();
-
-            assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
-            assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
-        }
-
-        // Now, consume the refs
-        assert_eq!(df_schema_ref, arrow_schema.to_dfschema_ref().unwrap());
-        assert_eq!(df_schema_ref, arrow_schema_ref.to_dfschema_ref().unwrap());
-    }
-
-    fn test_schema_1() -> Schema {
-        Schema::new(vec![
-            Field::new("c0", DataType::Boolean, true),
-            Field::new("c1", DataType::Boolean, true),
-        ])
-    }
-
-    fn test_schema_2() -> Schema {
-        Schema::new(vec![
-            Field::new("c100", DataType::Boolean, true),
-            Field::new("c101", DataType::Boolean, true),
-        ])
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/display.rs b/rust/datafusion/src/logical_plan/display.rs
deleted file mode 100644
index 76749b547a8..00000000000
--- a/rust/datafusion/src/logical_plan/display.rs
+++ /dev/null
@@ -1,270 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//! This module provides logic for displaying LogicalPlans in various styles
-
-use super::{LogicalPlan, PlanVisitor};
-use arrow::datatypes::Schema;
-use std::fmt;
-
-/// Formats plans with a single line per node. For example:
-///
-/// Projection: #id
-///    Filter: #state Eq Utf8(\"CO\")\
-///       CsvScan: employee.csv projection=Some([0, 3])";
-pub struct IndentVisitor<'a, 'b> {
-    f: &'a mut fmt::Formatter<'b>,
-    /// If true, includes summarized schema information
-    with_schema: bool,
-    indent: u32,
-}
-
-impl<'a, 'b> IndentVisitor<'a, 'b> {
-    /// Create a visitor that will write a formatted LogicalPlan to f. If `with_schema` is
-    /// true, includes schema information on each line.
-    pub fn new(f: &'a mut fmt::Formatter<'b>, with_schema: bool) -> Self {
-        Self {
-            f,
-            with_schema,
-            indent: 0,
-        }
-    }
-
-    fn write_indent(&mut self) -> fmt::Result {
-        for _ in 0..self.indent {
-            write!(self.f, "  ")?;
-        }
-        Ok(())
-    }
-}
-
-impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
-    type Error = fmt::Error;
-
-    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, fmt::Error> {
-        if self.indent > 0 {
-            writeln!(self.f)?;
-        }
-        self.write_indent()?;
-
-        write!(self.f, "{}", plan.display())?;
-        if self.with_schema {
-            write!(
-                self.f,
-                " {}",
-                display_schema(&plan.schema().as_ref().to_owned().into())
-            )?;
-        }
-
-        self.indent += 1;
-        Ok(true)
-    }
-
-    fn post_visit(
-        &mut self,
-        _plan: &LogicalPlan,
-    ) -> std::result::Result<bool, fmt::Error> {
-        self.indent -= 1;
-        Ok(true)
-    }
-}
-
-/// Print the schema in a compact representation to `buf`
-///
-/// For example: `foo:Utf8` if `foo` can not be null, and
-/// `foo:Utf8;N` if `foo` is nullable.
-///
-/// ```
-/// use arrow::datatypes::{Field, Schema, DataType};
-/// # use datafusion::logical_plan::display_schema;
-/// let schema = Schema::new(vec![
-///     Field::new("id", DataType::Int32, false),
-///     Field::new("first_name", DataType::Utf8, true),
-///  ]);
-///
-///  assert_eq!(
-///      "[id:Int32, first_name:Utf8;N]",
-///      format!("{}", display_schema(&schema))
-///  );
-/// ```
-pub fn display_schema(schema: &Schema) -> impl fmt::Display + '_ {
-    struct Wrapper<'a>(&'a Schema);
-
-    impl<'a> fmt::Display for Wrapper<'a> {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "[")?;
-            for (idx, field) in self.0.fields().iter().enumerate() {
-                if idx > 0 {
-                    write!(f, ", ")?;
-                }
-                let nullable_str = if field.is_nullable() { ";N" } else { "" };
-                write!(
-                    f,
-                    "{}:{:?}{}",
-                    field.name(),
-                    field.data_type(),
-                    nullable_str
-                )?;
-            }
-            write!(f, "]")
-        }
-    }
-    Wrapper(schema)
-}
-
-/// Logic related to creating DOT language graphs.
-#[derive(Default)]
-struct GraphvizBuilder {
-    id_gen: usize,
-}
-
-impl GraphvizBuilder {
-    fn next_id(&mut self) -> usize {
-        self.id_gen += 1;
-        self.id_gen
-    }
-
-    // write out the start of the subgraph cluster
-    fn start_cluster(&mut self, f: &mut fmt::Formatter, title: &str) -> fmt::Result {
-        writeln!(f, "  subgraph cluster_{}", self.next_id())?;
-        writeln!(f, "  {{")?;
-        writeln!(f, "    graph[label={}]", Self::quoted(title))
-    }
-
-    // write out the end of the subgraph cluster
-    fn end_cluster(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "  }}")
-    }
-
-    /// makes a quoted string suitable for inclusion in a graphviz chart
-    fn quoted(label: &str) -> String {
-        let label = label.replace('"', "_");
-        format!("\"{}\"", label)
-    }
-}
-
-/// Formats plans for graphical display using the `DOT` language. This
-/// format can be visualized using software from
-/// [`graphviz`](https://graphviz.org/)
-pub struct GraphvizVisitor<'a, 'b> {
-    f: &'a mut fmt::Formatter<'b>,
-    graphviz_builder: GraphvizBuilder,
-    /// If true, includes summarized schema information
-    with_schema: bool,
-
-    /// Holds the ids (as generated from `graphviz_builder` of all
-    /// parent nodes
-    parent_ids: Vec<usize>,
-}
-
-impl<'a, 'b> GraphvizVisitor<'a, 'b> {
-    pub fn new(f: &'a mut fmt::Formatter<'b>) -> Self {
-        Self {
-            f,
-            graphviz_builder: GraphvizBuilder::default(),
-            with_schema: false,
-            parent_ids: Vec::new(),
-        }
-    }
-
-    /// Sets a flag which controls if the output schema is displayed
-    pub fn set_with_schema(&mut self, with_schema: bool) {
-        self.with_schema = with_schema;
-    }
-
-    pub fn pre_visit_plan(&mut self, label: &str) -> fmt::Result {
-        self.graphviz_builder.start_cluster(self.f, label)
-    }
-
-    pub fn post_visit_plan(&mut self) -> fmt::Result {
-        self.graphviz_builder.end_cluster(self.f)
-    }
-}
-
-impl<'a, 'b> PlanVisitor for GraphvizVisitor<'a, 'b> {
-    type Error = fmt::Error;
-
-    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, fmt::Error> {
-        let id = self.graphviz_builder.next_id();
-
-        // Create a new graph node for `plan` such as
-        // id [label="foo"]
-        let label = if self.with_schema {
-            format!(
-                "{}\\nSchema: {}",
-                plan.display(),
-                display_schema(&plan.schema().as_ref().to_owned().into())
-            )
-        } else {
-            format!("{}", plan.display())
-        };
-
-        writeln!(
-            self.f,
-            "    {}[shape=box label={}]",
-            id,
-            GraphvizBuilder::quoted(&label)
-        )?;
-
-        // Create an edge to our parent node, if any
-        //  parent_id -> id
-        if let Some(parent_id) = self.parent_ids.last() {
-            writeln!(
-                self.f,
-                "    {} -> {} [arrowhead=none, arrowtail=normal, dir=back]",
-                parent_id, id
-            )?;
-        }
-
-        self.parent_ids.push(id);
-        Ok(true)
-    }
-
-    fn post_visit(
-        &mut self,
-        _plan: &LogicalPlan,
-    ) -> std::result::Result<bool, fmt::Error> {
-        // always be non-empty as pre_visit always pushes
-        self.parent_ids.pop().unwrap();
-        Ok(true)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{DataType, Field};
-
-    use super::*;
-
-    #[test]
-    fn test_display_empty_schema() {
-        let schema = Schema::new(vec![]);
-        assert_eq!("[]", format!("{}", display_schema(&schema)));
-    }
-
-    #[test]
-    fn test_display_schema() {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, true),
-        ]);
-
-        assert_eq!(
-            "[id:Int32, first_name:Utf8;N]",
-            format!("{}", display_schema(&schema))
-        );
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/expr.rs b/rust/datafusion/src/logical_plan/expr.rs
deleted file mode 100644
index fa9b9e0a249..00000000000
--- a/rust/datafusion/src/logical_plan/expr.rs
+++ /dev/null
@@ -1,1505 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides an `Expr` enum for representing expressions
-//! such as `col = 5` or `SUM(col)`. See examples on the [`Expr`] struct.
-
-pub use super::Operator;
-
-use std::fmt;
-use std::sync::Arc;
-
-use aggregates::{AccumulatorFunctionImplementation, StateTypeFunction};
-use arrow::{compute::can_cast_types, datatypes::DataType};
-
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::{DFField, DFSchema};
-use crate::physical_plan::{
-    aggregates, expressions::binary_operator_data_type, functions, udf::ScalarUDF,
-};
-use crate::{physical_plan::udaf::AggregateUDF, scalar::ScalarValue};
-use functions::{ReturnTypeFunction, ScalarFunctionImplementation, Signature};
-use std::collections::HashSet;
-
-/// `Expr` is a central struct of DataFusion's query API, and
-/// represent logical expressions such as `A + 1`, or `CAST(c1 AS
-/// int)`.
-///
-/// An `Expr` can compute its [DataType](arrow::datatypes::DataType)
-/// and nullability, and has functions for building up complex
-/// expressions.
-///
-/// # Examples
-///
-/// ## Create an expression `c1` referring to column named "c1"
-/// ```
-/// # use datafusion::logical_plan::*;
-/// let expr = col("c1");
-/// assert_eq!(expr, Expr::Column("c1".to_string()));
-/// ```
-///
-/// ## Create the expression `c1 + c2` to add columns "c1" and "c2" together
-/// ```
-/// # use datafusion::logical_plan::*;
-/// let expr = col("c1") + col("c2");
-///
-/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
-/// if let Expr::BinaryExpr { left, right, op } = expr {
-///   assert_eq!(*left, col("c1"));
-///   assert_eq!(*right, col("c2"));
-///   assert_eq!(op, Operator::Plus);
-/// }
-/// ```
-///
-/// ## Create expression `c1 = 42` to compare the value in coumn "c1" to the literal value `42`
-/// ```
-/// # use datafusion::logical_plan::*;
-/// # use datafusion::scalar::*;
-/// let expr = col("c1").eq(lit(42));
-///
-/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
-/// if let Expr::BinaryExpr { left, right, op } = expr {
-///   assert_eq!(*left, col("c1"));
-///   let scalar = ScalarValue::Int32(Some(42));
-///   assert_eq!(*right, Expr::Literal(scalar));
-///   assert_eq!(op, Operator::Eq);
-/// }
-/// ```
-#[derive(Clone, PartialEq)]
-pub enum Expr {
-    /// An expression with a specific name.
-    Alias(Box<Expr>, String),
-    /// A named reference to a field in a schema.
-    Column(String),
-    /// A named reference to a variable in a registry.
-    ScalarVariable(Vec<String>),
-    /// A constant value.
-    Literal(ScalarValue),
-    /// A binary expression such as "age > 21"
-    BinaryExpr {
-        /// Left-hand side of the expression
-        left: Box<Expr>,
-        /// The comparison operator
-        op: Operator,
-        /// Right-hand side of the expression
-        right: Box<Expr>,
-    },
-    /// Negation of an expression. The expression's type must be a boolean to make sense.
-    Not(Box<Expr>),
-    /// Whether an expression is not Null. This expression is never null.
-    IsNotNull(Box<Expr>),
-    /// Whether an expression is Null. This expression is never null.
-    IsNull(Box<Expr>),
-    /// arithmetic negation of an expression, the operand must be of a signed numeric data type
-    Negative(Box<Expr>),
-    /// Whether an expression is between a given range.
-    Between {
-        /// The value to compare
-        expr: Box<Expr>,
-        /// Whether the expression is negated
-        negated: bool,
-        /// The low end of the range
-        low: Box<Expr>,
-        /// The high end of the range
-        high: Box<Expr>,
-    },
-    /// The CASE expression is similar to a series of nested if/else and there are two forms that
-    /// can be used. The first form consists of a series of boolean "when" expressions with
-    /// corresponding "then" expressions, and an optional "else" expression.
-    ///
-    /// CASE WHEN condition THEN result
-    ///      [WHEN ...]
-    ///      [ELSE result]
-    /// END
-    ///
-    /// The second form uses a base expression and then a series of "when" clauses that match on a
-    /// literal value.
-    ///
-    /// CASE expression
-    ///     WHEN value THEN result
-    ///     [WHEN ...]
-    ///     [ELSE result]
-    /// END
-    Case {
-        /// Optional base expression that can be compared to literal values in the "when" expressions
-        expr: Option<Box<Expr>>,
-        /// One or more when/then expressions
-        when_then_expr: Vec<(Box<Expr>, Box<Expr>)>,
-        /// Optional "else" expression
-        else_expr: Option<Box<Expr>>,
-    },
-    /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast.
-    /// This expression is guaranteed to have a fixed type.
-    Cast {
-        /// The expression being cast
-        expr: Box<Expr>,
-        /// The `DataType` the expression will yield
-        data_type: DataType,
-    },
-    /// Casts the expression to a given type and will return a null value if the expression cannot be cast.
-    /// This expression is guaranteed to have a fixed type.
-    TryCast {
-        /// The expression being cast
-        expr: Box<Expr>,
-        /// The `DataType` the expression will yield
-        data_type: DataType,
-    },
-    /// A sort expression, that can be used to sort values.
-    Sort {
-        /// The expression to sort on
-        expr: Box<Expr>,
-        /// The direction of the sort
-        asc: bool,
-        /// Whether to put Nulls before all other data values
-        nulls_first: bool,
-    },
-    /// Represents the call of a built-in scalar function with a set of arguments.
-    ScalarFunction {
-        /// The function
-        fun: functions::BuiltinScalarFunction,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-    },
-    /// Represents the call of a user-defined scalar function with arguments.
-    ScalarUDF {
-        /// The function
-        fun: Arc<ScalarUDF>,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-    },
-    /// Represents the call of an aggregate built-in function with arguments.
-    AggregateFunction {
-        /// Name of the function
-        fun: aggregates::AggregateFunction,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-        /// Whether this is a DISTINCT aggregation or not
-        distinct: bool,
-    },
-    /// aggregate function
-    AggregateUDF {
-        /// The function
-        fun: Arc<AggregateUDF>,
-        /// List of expressions to feed to the functions as arguments
-        args: Vec<Expr>,
-    },
-    /// Returns whether the list contains the expr value.
-    InList {
-        /// The expression to compare
-        expr: Box<Expr>,
-        /// A list of values to compare against
-        list: Vec<Expr>,
-        /// Whether the expression is negated
-        negated: bool,
-    },
-    /// Represents a reference to all fields in a schema.
-    Wildcard,
-}
-
-impl Expr {
-    /// Returns the [arrow::datatypes::DataType] of the expression based on [arrow::datatypes::Schema].
-    ///
-    /// # Errors
-    ///
-    /// This function errors when it is not possible to compute its [arrow::datatypes::DataType].
-    /// This happens when e.g. the expression refers to a column that does not exist in the schema, or when
-    /// the expression is incorrectly typed (e.g. `[utf8] + [bool]`).
-    pub fn get_type(&self, schema: &DFSchema) -> Result<DataType> {
-        match self {
-            Expr::Alias(expr, _) => expr.get_type(schema),
-            Expr::Column(name) => Ok(schema
-                .field_with_unqualified_name(name)?
-                .data_type()
-                .clone()),
-            Expr::ScalarVariable(_) => Ok(DataType::Utf8),
-            Expr::Literal(l) => Ok(l.get_datatype()),
-            Expr::Case { when_then_expr, .. } => when_then_expr[0].1.get_type(schema),
-            Expr::Cast { data_type, .. } => Ok(data_type.clone()),
-            Expr::TryCast { data_type, .. } => Ok(data_type.clone()),
-            Expr::ScalarUDF { fun, args } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                Ok((fun.return_type)(&data_types)?.as_ref().clone())
-            }
-            Expr::ScalarFunction { fun, args } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                functions::return_type(fun, &data_types)
-            }
-            Expr::AggregateFunction { fun, args, .. } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                aggregates::return_type(fun, &data_types)
-            }
-            Expr::AggregateUDF { fun, args, .. } => {
-                let data_types = args
-                    .iter()
-                    .map(|e| e.get_type(schema))
-                    .collect::<Result<Vec<_>>>()?;
-                Ok((fun.return_type)(&data_types)?.as_ref().clone())
-            }
-            Expr::Not(_) => Ok(DataType::Boolean),
-            Expr::Negative(expr) => expr.get_type(schema),
-            Expr::IsNull(_) => Ok(DataType::Boolean),
-            Expr::IsNotNull(_) => Ok(DataType::Boolean),
-            Expr::BinaryExpr {
-                ref left,
-                ref right,
-                ref op,
-            } => binary_operator_data_type(
-                &left.get_type(schema)?,
-                op,
-                &right.get_type(schema)?,
-            ),
-            Expr::Sort { ref expr, .. } => expr.get_type(schema),
-            Expr::Between { .. } => Ok(DataType::Boolean),
-            Expr::InList { .. } => Ok(DataType::Boolean),
-            Expr::Wildcard => Err(DataFusionError::Internal(
-                "Wildcard expressions are not valid in a logical query plan".to_owned(),
-            )),
-        }
-    }
-
-    /// Returns the nullability of the expression based on [arrow::datatypes::Schema].
-    ///
-    /// # Errors
-    ///
-    /// This function errors when it is not possible to compute its nullability.
-    /// This happens when the expression refers to a column that does not exist in the schema.
-    pub fn nullable(&self, input_schema: &DFSchema) -> Result<bool> {
-        match self {
-            Expr::Alias(expr, _) => expr.nullable(input_schema),
-            Expr::Column(name) => Ok(input_schema
-                .field_with_unqualified_name(name)?
-                .is_nullable()),
-            Expr::Literal(value) => Ok(value.is_null()),
-            Expr::ScalarVariable(_) => Ok(true),
-            Expr::Case {
-                when_then_expr,
-                else_expr,
-                ..
-            } => {
-                // this expression is nullable if any of the input expressions are nullable
-                let then_nullable = when_then_expr
-                    .iter()
-                    .map(|(_, t)| t.nullable(input_schema))
-                    .collect::<Result<Vec<_>>>()?;
-                if then_nullable.contains(&true) {
-                    Ok(true)
-                } else if let Some(e) = else_expr {
-                    e.nullable(input_schema)
-                } else {
-                    Ok(false)
-                }
-            }
-            Expr::Cast { expr, .. } => expr.nullable(input_schema),
-            Expr::TryCast { .. } => Ok(true),
-            Expr::ScalarFunction { .. } => Ok(true),
-            Expr::ScalarUDF { .. } => Ok(true),
-            Expr::AggregateFunction { .. } => Ok(true),
-            Expr::AggregateUDF { .. } => Ok(true),
-            Expr::Not(expr) => expr.nullable(input_schema),
-            Expr::Negative(expr) => expr.nullable(input_schema),
-            Expr::IsNull(_) => Ok(false),
-            Expr::IsNotNull(_) => Ok(false),
-            Expr::BinaryExpr {
-                ref left,
-                ref right,
-                ..
-            } => Ok(left.nullable(input_schema)? || right.nullable(input_schema)?),
-            Expr::Sort { ref expr, .. } => expr.nullable(input_schema),
-            Expr::Between { ref expr, .. } => expr.nullable(input_schema),
-            Expr::InList { ref expr, .. } => expr.nullable(input_schema),
-            Expr::Wildcard => Err(DataFusionError::Internal(
-                "Wildcard expressions are not valid in a logical query plan".to_owned(),
-            )),
-        }
-    }
-
-    /// Returns the name of this expression based on [arrow::datatypes::Schema].
-    ///
-    /// This represents how a column with this expression is named when no alias is chosen
-    pub fn name(&self, input_schema: &DFSchema) -> Result<String> {
-        create_name(self, input_schema)
-    }
-
-    /// Returns a [arrow::datatypes::Field] compatible with this expression.
-    pub fn to_field(&self, input_schema: &DFSchema) -> Result<DFField> {
-        Ok(DFField::new(
-            None, //TODO  qualifier
-            &self.name(input_schema)?,
-            self.get_type(input_schema)?,
-            self.nullable(input_schema)?,
-        ))
-    }
-
-    /// Wraps this expression in a cast to a target [arrow::datatypes::DataType].
-    ///
-    /// # Errors
-    ///
-    /// This function errors when it is impossible to cast the
-    /// expression to the target [arrow::datatypes::DataType].
-    pub fn cast_to(self, cast_to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
-        let this_type = self.get_type(schema)?;
-        if this_type == *cast_to_type {
-            Ok(self)
-        } else if can_cast_types(&this_type, cast_to_type) {
-            Ok(Expr::Cast {
-                expr: Box::new(self),
-                data_type: cast_to_type.clone(),
-            })
-        } else {
-            Err(DataFusionError::Plan(format!(
-                "Cannot automatically convert {:?} to {:?}",
-                this_type, cast_to_type
-            )))
-        }
-    }
-
-    /// Return `self == other`
-    pub fn eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Eq, other)
-    }
-
-    /// Return `self != other`
-    pub fn not_eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::NotEq, other)
-    }
-
-    /// Return `self > other`
-    pub fn gt(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Gt, other)
-    }
-
-    /// Return `self >= other`
-    pub fn gt_eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::GtEq, other)
-    }
-
-    /// Return `self < other`
-    pub fn lt(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Lt, other)
-    }
-
-    /// Return `self <= other`
-    pub fn lt_eq(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::LtEq, other)
-    }
-
-    /// Return `self && other`
-    pub fn and(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::And, other)
-    }
-
-    /// Return `self || other`
-    pub fn or(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Or, other)
-    }
-
-    /// Return `!self`
-    #[allow(clippy::should_implement_trait)]
-    pub fn not(self) -> Expr {
-        Expr::Not(Box::new(self))
-    }
-
-    /// Calculate the modulus of two expressions.
-    /// Return `self % other`
-    pub fn modulus(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Modulus, other)
-    }
-
-    /// Return `self LIKE other`
-    pub fn like(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::Like, other)
-    }
-
-    /// Return `self NOT LIKE other`
-    pub fn not_like(self, other: Expr) -> Expr {
-        binary_expr(self, Operator::NotLike, other)
-    }
-
-    /// Return `self AS name` alias expression
-    pub fn alias(self, name: &str) -> Expr {
-        Expr::Alias(Box::new(self), name.to_owned())
-    }
-
-    /// Return `self IN <list>` if `negated` is false, otherwise
-    /// return `self NOT IN <list>`.a
-    pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
-        Expr::InList {
-            expr: Box::new(self),
-            list,
-            negated,
-        }
-    }
-
-    /// Return `IsNull(Box(self))
-    #[allow(clippy::wrong_self_convention)]
-    pub fn is_null(self) -> Expr {
-        Expr::IsNull(Box::new(self))
-    }
-
-    /// Return `IsNotNull(Box(self))
-    #[allow(clippy::wrong_self_convention)]
-    pub fn is_not_null(self) -> Expr {
-        Expr::IsNotNull(Box::new(self))
-    }
-
-    /// Create a sort expression from an existing expression.
-    ///
-    /// ```
-    /// # use datafusion::logical_plan::col;
-    /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
-    /// ```
-    pub fn sort(self, asc: bool, nulls_first: bool) -> Expr {
-        Expr::Sort {
-            expr: Box::new(self),
-            asc,
-            nulls_first,
-        }
-    }
-
-    /// Performs a depth first walk of an expression and
-    /// its children, calling [`ExpressionVisitor::pre_visit`] and
-    /// `visitor.post_visit`.
-    ///
-    /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
-    /// separate expression algorithms from the structure of the
-    /// `Expr` tree and make it easier to add new types of expressions
-    /// and algorithms that walk the tree.
-    ///
-    /// For an expression tree such as
-    /// ```text
-    /// BinaryExpr (GT)
-    ///    left: Column("foo")
-    ///    right: Column("bar")
-    /// ```
-    ///
-    /// The nodes are visited using the following order
-    /// ```text
-    /// pre_visit(BinaryExpr(GT))
-    /// pre_visit(Column("foo"))
-    /// pre_visit(Column("bar"))
-    /// post_visit(Column("bar"))
-    /// post_visit(Column("bar"))
-    /// post_visit(BinaryExpr(GT))
-    /// ```
-    ///
-    /// If an Err result is returned, recursion is stopped immediately
-    ///
-    /// If `Recursion::Stop` is returned on a call to pre_visit, no
-    /// children of that expression are visited, nor is post_visit
-    /// called on that expression
-    ///
-    pub fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V> {
-        let visitor = match visitor.pre_visit(self)? {
-            Recursion::Continue(visitor) => visitor,
-            // If the recursion should stop, do not visit children
-            Recursion::Stop(visitor) => return Ok(visitor),
-        };
-
-        // recurse (and cover all expression types)
-        let visitor = match self {
-            Expr::Alias(expr, _) => expr.accept(visitor),
-            Expr::Column(..) => Ok(visitor),
-            Expr::ScalarVariable(..) => Ok(visitor),
-            Expr::Literal(..) => Ok(visitor),
-            Expr::BinaryExpr { left, right, .. } => {
-                let visitor = left.accept(visitor)?;
-                right.accept(visitor)
-            }
-            Expr::Not(expr) => expr.accept(visitor),
-            Expr::IsNotNull(expr) => expr.accept(visitor),
-            Expr::IsNull(expr) => expr.accept(visitor),
-            Expr::Negative(expr) => expr.accept(visitor),
-            Expr::Between {
-                expr, low, high, ..
-            } => {
-                let visitor = expr.accept(visitor)?;
-                let visitor = low.accept(visitor)?;
-                high.accept(visitor)
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let visitor = if let Some(expr) = expr.as_ref() {
-                    expr.accept(visitor)
-                } else {
-                    Ok(visitor)
-                }?;
-                let visitor = when_then_expr.iter().try_fold(
-                    visitor,
-                    |visitor, (when, then)| {
-                        let visitor = when.accept(visitor)?;
-                        then.accept(visitor)
-                    },
-                )?;
-                if let Some(else_expr) = else_expr.as_ref() {
-                    else_expr.accept(visitor)
-                } else {
-                    Ok(visitor)
-                }
-            }
-            Expr::Cast { expr, .. } => expr.accept(visitor),
-            Expr::TryCast { expr, .. } => expr.accept(visitor),
-            Expr::Sort { expr, .. } => expr.accept(visitor),
-            Expr::ScalarFunction { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::ScalarUDF { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::AggregateFunction { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::AggregateUDF { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::InList { expr, list, .. } => {
-                let visitor = expr.accept(visitor)?;
-                list.iter()
-                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))
-            }
-            Expr::Wildcard => Ok(visitor),
-        }?;
-
-        visitor.post_visit(self)
-    }
-
-    /// Performs a depth first walk of an expression and its children
-    /// to rewrite an expression, consuming `self` producing a new
-    /// [`Expr`].
-    ///
-    /// Implements a modified version of the [visitor
-    /// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
-    /// separate algorithms from the structure of the `Expr` tree and
-    /// make it easier to write new, efficient expression
-    /// transformation algorithms.
-    ///
-    /// For an expression tree such as
-    /// ```text
-    /// BinaryExpr (GT)
-    ///    left: Column("foo")
-    ///    right: Column("bar")
-    /// ```
-    ///
-    /// The nodes are visited using the following order
-    /// ```text
-    /// pre_visit(BinaryExpr(GT))
-    /// pre_visit(Column("foo"))
-    /// mutatate(Column("foo"))
-    /// pre_visit(Column("bar"))
-    /// mutate(Column("bar"))
-    /// mutate(BinaryExpr(GT))
-    /// ```
-    ///
-    /// If an Err result is returned, recursion is stopped immediately
-    ///
-    /// If [`false`] is returned on a call to pre_visit, no
-    /// children of that expression are visited, nor is mutate
-    /// called on that expression
-    ///
-    pub fn rewrite<R>(self, rewriter: &mut R) -> Result<Self>
-    where
-        R: ExprRewriter,
-    {
-        if !rewriter.pre_visit(&self)? {
-            return Ok(self);
-        };
-
-        // recurse into all sub expressions(and cover all expression types)
-        let expr = match self {
-            Expr::Alias(expr, name) => Expr::Alias(rewrite_boxed(expr, rewriter)?, name),
-            Expr::Column(name) => Expr::Column(name),
-            Expr::ScalarVariable(names) => Expr::ScalarVariable(names),
-            Expr::Literal(value) => Expr::Literal(value),
-            Expr::BinaryExpr { left, op, right } => Expr::BinaryExpr {
-                left: rewrite_boxed(left, rewriter)?,
-                op,
-                right: rewrite_boxed(right, rewriter)?,
-            },
-            Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?),
-            Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr, rewriter)?),
-            Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?),
-            Expr::Negative(expr) => Expr::Negative(rewrite_boxed(expr, rewriter)?),
-            Expr::Between {
-                expr,
-                low,
-                high,
-                negated,
-            } => Expr::Between {
-                expr: rewrite_boxed(expr, rewriter)?,
-                low: rewrite_boxed(low, rewriter)?,
-                high: rewrite_boxed(high, rewriter)?,
-                negated,
-            },
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let expr = rewrite_option_box(expr, rewriter)?;
-                let when_then_expr = when_then_expr
-                    .into_iter()
-                    .map(|(when, then)| {
-                        Ok((
-                            rewrite_boxed(when, rewriter)?,
-                            rewrite_boxed(then, rewriter)?,
-                        ))
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                let else_expr = rewrite_option_box(else_expr, rewriter)?;
-
-                Expr::Case {
-                    expr,
-                    when_then_expr,
-                    else_expr,
-                }
-            }
-            Expr::Cast { expr, data_type } => Expr::Cast {
-                expr: rewrite_boxed(expr, rewriter)?,
-                data_type,
-            },
-            Expr::TryCast { expr, data_type } => Expr::TryCast {
-                expr: rewrite_boxed(expr, rewriter)?,
-                data_type,
-            },
-            Expr::Sort {
-                expr,
-                asc,
-                nulls_first,
-            } => Expr::Sort {
-                expr: rewrite_boxed(expr, rewriter)?,
-                asc,
-                nulls_first,
-            },
-            Expr::ScalarFunction { args, fun } => Expr::ScalarFunction {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-            },
-            Expr::ScalarUDF { args, fun } => Expr::ScalarUDF {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-            },
-            Expr::AggregateFunction {
-                args,
-                fun,
-                distinct,
-            } => Expr::AggregateFunction {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-                distinct,
-            },
-            Expr::AggregateUDF { args, fun } => Expr::AggregateUDF {
-                args: rewrite_vec(args, rewriter)?,
-                fun,
-            },
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => Expr::InList {
-                expr: rewrite_boxed(expr, rewriter)?,
-                list,
-                negated,
-            },
-            Expr::Wildcard => Expr::Wildcard,
-        };
-
-        // now rewrite this expression itself
-        rewriter.mutate(expr)
-    }
-}
-
-#[allow(clippy::boxed_local)]
-fn rewrite_boxed<R>(boxed_expr: Box<Expr>, rewriter: &mut R) -> Result<Box<Expr>>
-where
-    R: ExprRewriter,
-{
-    // TODO: It might be possible to avoid an allocation (the
-    // Box::new) below by reusing the box.
-    let expr: Expr = *boxed_expr;
-    let rewritten_expr = expr.rewrite(rewriter)?;
-    Ok(Box::new(rewritten_expr))
-}
-
-fn rewrite_option_box<R>(
-    option_box: Option<Box<Expr>>,
-    rewriter: &mut R,
-) -> Result<Option<Box<Expr>>>
-where
-    R: ExprRewriter,
-{
-    option_box
-        .map(|expr| rewrite_boxed(expr, rewriter))
-        .transpose()
-}
-
-/// rewrite a `Vec` of `Expr`s with the rewriter
-fn rewrite_vec<R>(v: Vec<Expr>, rewriter: &mut R) -> Result<Vec<Expr>>
-where
-    R: ExprRewriter,
-{
-    v.into_iter().map(|expr| expr.rewrite(rewriter)).collect()
-}
-
-/// Controls how the visitor recursion should proceed.
-pub enum Recursion<V: ExpressionVisitor> {
-    /// Attempt to visit all the children, recursively, of this expression.
-    Continue(V),
-    /// Do not visit the children of this expression, though the walk
-    /// of parents of this expression will not be affected
-    Stop(V),
-}
-
-/// Encode the traversal of an expression tree. When passed to
-/// `Expr::accept`, `ExpressionVisitor::visit` is invoked
-/// recursively on all nodes of an expression tree. See the comments
-/// on `Expr::accept` for details on its use
-pub trait ExpressionVisitor: Sized {
-    /// Invoked before any children of `expr` are visisted.
-    fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>>;
-
-    /// Invoked after all children of `expr` are visited. Default
-    /// implementation does nothing.
-    fn post_visit(self, _expr: &Expr) -> Result<Self> {
-        Ok(self)
-    }
-}
-
-/// Trait for potentially recursively rewriting an [`Expr`] expression
-/// tree. When passed to `Expr::rewrite`, `ExpressionVisitor::mutate` is
-/// invoked recursively on all nodes of an expression tree. See the
-/// comments on `Expr::rewrite` for details on its use
-pub trait ExprRewriter: Sized {
-    /// Invoked before any children of `expr` are rewritten /
-    /// visited. Default implementation returns `Ok(true)`
-    fn pre_visit(&mut self, _expr: &Expr) -> Result<bool> {
-        Ok(true)
-    }
-
-    /// Invoked after all children of `expr` have been mutated and
-    /// returns a potentially modified expr.
-    fn mutate(&mut self, expr: Expr) -> Result<Expr>;
-}
-
-pub struct CaseBuilder {
-    expr: Option<Box<Expr>>,
-    when_expr: Vec<Expr>,
-    then_expr: Vec<Expr>,
-    else_expr: Option<Box<Expr>>,
-}
-
-impl CaseBuilder {
-    pub fn when(&mut self, when: Expr, then: Expr) -> CaseBuilder {
-        self.when_expr.push(when);
-        self.then_expr.push(then);
-        CaseBuilder {
-            expr: self.expr.clone(),
-            when_expr: self.when_expr.clone(),
-            then_expr: self.then_expr.clone(),
-            else_expr: self.else_expr.clone(),
-        }
-    }
-    pub fn otherwise(&mut self, else_expr: Expr) -> Result<Expr> {
-        self.else_expr = Some(Box::new(else_expr));
-        self.build()
-    }
-
-    pub fn end(&self) -> Result<Expr> {
-        self.build()
-    }
-}
-
-impl CaseBuilder {
-    fn build(&self) -> Result<Expr> {
-        // collect all "then" expressions
-        let mut then_expr = self.then_expr.clone();
-        if let Some(e) = &self.else_expr {
-            then_expr.push(e.as_ref().to_owned());
-        }
-
-        let then_types: Vec<DataType> = then_expr
-            .iter()
-            .map(|e| match e {
-                Expr::Literal(_) => e.get_type(&DFSchema::empty()),
-                _ => Ok(DataType::Null),
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        if then_types.contains(&DataType::Null) {
-            // cannot verify types until execution type
-        } else {
-            let unique_types: HashSet<&DataType> = then_types.iter().collect();
-            if unique_types.len() != 1 {
-                return Err(DataFusionError::Plan(format!(
-                    "CASE expression 'then' values had multiple data types: {:?}",
-                    unique_types
-                )));
-            }
-        }
-
-        Ok(Expr::Case {
-            expr: self.expr.clone(),
-            when_then_expr: self
-                .when_expr
-                .iter()
-                .zip(self.then_expr.iter())
-                .map(|(w, t)| (Box::new(w.clone()), Box::new(t.clone())))
-                .collect(),
-            else_expr: self.else_expr.clone(),
-        })
-    }
-}
-
-/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression.
-pub fn case(expr: Expr) -> CaseBuilder {
-    CaseBuilder {
-        expr: Some(Box::new(expr)),
-        when_expr: vec![],
-        then_expr: vec![],
-        else_expr: None,
-    }
-}
-
-/// Create a CASE WHEN statement with boolean WHEN expressions and no base expression.
-pub fn when(when: Expr, then: Expr) -> CaseBuilder {
-    CaseBuilder {
-        expr: None,
-        when_expr: vec![when],
-        then_expr: vec![then],
-        else_expr: None,
-    }
-}
-
-/// return a new expression l <op> r
-pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(l),
-        op,
-        right: Box::new(r),
-    }
-}
-
-/// return a new expression with a logical AND
-pub fn and(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::And,
-        right: Box::new(right),
-    }
-}
-
-/// Combines an array of filter expressions into a single filter expression
-/// consisting of the input filter expressions joined with logical AND.
-/// Returns None if the filters array is empty.
-pub fn combine_filters(filters: &[Expr]) -> Option<Expr> {
-    if filters.is_empty() {
-        return None;
-    }
-    let combined_filter = filters
-        .iter()
-        .skip(1)
-        .fold(filters[0].clone(), |acc, filter| and(acc, filter.clone()));
-    Some(combined_filter)
-}
-
-/// return a new expression with a logical OR
-pub fn or(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::Or,
-        right: Box::new(right),
-    }
-}
-
-/// Create a column expression based on a column name
-pub fn col(name: &str) -> Expr {
-    Expr::Column(name.to_owned())
-}
-
-/// Create an expression to represent the min() aggregate function
-pub fn min(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Min,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the max() aggregate function
-pub fn max(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Max,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the sum() aggregate function
-pub fn sum(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Sum,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the avg() aggregate function
-pub fn avg(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Avg,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count() aggregate function
-pub fn count(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count(distinct) aggregate function
-pub fn count_distinct(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: true,
-        args: vec![expr],
-    }
-}
-
-/// Create an in_list expression
-pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
-    Expr::InList {
-        expr: Box::new(expr),
-        list,
-        negated,
-    }
-}
-
-/// Trait for converting a type to a [`Literal`] literal expression.
-pub trait Literal {
-    /// convert the value to a Literal expression
-    fn lit(&self) -> Expr;
-}
-
-impl Literal for &str {
-    fn lit(&self) -> Expr {
-        Expr::Literal(ScalarValue::Utf8(Some((*self).to_owned())))
-    }
-}
-
-impl Literal for String {
-    fn lit(&self) -> Expr {
-        Expr::Literal(ScalarValue::Utf8(Some((*self).to_owned())))
-    }
-}
-
-impl Literal for ScalarValue {
-    fn lit(&self) -> Expr {
-        Expr::Literal(self.clone())
-    }
-}
-
-macro_rules! make_literal {
-    ($TYPE:ty, $SCALAR:ident) => {
-        #[allow(missing_docs)]
-        impl Literal for $TYPE {
-            fn lit(&self) -> Expr {
-                Expr::Literal(ScalarValue::$SCALAR(Some(self.clone())))
-            }
-        }
-    };
-}
-
-make_literal!(bool, Boolean);
-make_literal!(f32, Float32);
-make_literal!(f64, Float64);
-make_literal!(i8, Int8);
-make_literal!(i16, Int16);
-make_literal!(i32, Int32);
-make_literal!(i64, Int64);
-make_literal!(u8, UInt8);
-make_literal!(u16, UInt16);
-make_literal!(u32, UInt32);
-make_literal!(u64, UInt64);
-
-/// Create a literal expression
-pub fn lit<T: Literal>(n: T) -> Expr {
-    n.lit()
-}
-
-/// Create an convenience function representing a unary scalar function
-macro_rules! unary_scalar_expr {
-    ($ENUM:ident, $FUNC:ident) => {
-        #[allow(missing_docs)]
-        pub fn $FUNC(e: Expr) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args: vec![e],
-            }
-        }
-    };
-}
-
-// generate methods for creating the supported unary expressions
-
-// math functions
-unary_scalar_expr!(Sqrt, sqrt);
-unary_scalar_expr!(Sin, sin);
-unary_scalar_expr!(Cos, cos);
-unary_scalar_expr!(Tan, tan);
-unary_scalar_expr!(Asin, asin);
-unary_scalar_expr!(Acos, acos);
-unary_scalar_expr!(Atan, atan);
-unary_scalar_expr!(Floor, floor);
-unary_scalar_expr!(Ceil, ceil);
-unary_scalar_expr!(Round, round);
-unary_scalar_expr!(Trunc, trunc);
-unary_scalar_expr!(Abs, abs);
-unary_scalar_expr!(Signum, signum);
-unary_scalar_expr!(Exp, exp);
-unary_scalar_expr!(Log, ln);
-unary_scalar_expr!(Log2, log2);
-unary_scalar_expr!(Log10, log10);
-
-// string functions
-unary_scalar_expr!(Ascii, ascii);
-unary_scalar_expr!(BitLength, bit_length);
-unary_scalar_expr!(Btrim, btrim);
-unary_scalar_expr!(CharacterLength, character_length);
-unary_scalar_expr!(CharacterLength, length);
-unary_scalar_expr!(Chr, chr);
-unary_scalar_expr!(Concat, concat);
-unary_scalar_expr!(ConcatWithSeparator, concat_ws);
-unary_scalar_expr!(InitCap, initcap);
-unary_scalar_expr!(Left, left);
-unary_scalar_expr!(Lower, lower);
-unary_scalar_expr!(Lpad, lpad);
-unary_scalar_expr!(Ltrim, ltrim);
-unary_scalar_expr!(MD5, md5);
-unary_scalar_expr!(OctetLength, octet_length);
-unary_scalar_expr!(RegexpMatch, regexp_match);
-unary_scalar_expr!(RegexpReplace, regexp_replace);
-unary_scalar_expr!(Replace, replace);
-unary_scalar_expr!(Repeat, repeat);
-unary_scalar_expr!(Reverse, reverse);
-unary_scalar_expr!(Right, right);
-unary_scalar_expr!(Rpad, rpad);
-unary_scalar_expr!(Rtrim, rtrim);
-unary_scalar_expr!(SHA224, sha224);
-unary_scalar_expr!(SHA256, sha256);
-unary_scalar_expr!(SHA384, sha384);
-unary_scalar_expr!(SHA512, sha512);
-unary_scalar_expr!(SplitPart, split_part);
-unary_scalar_expr!(StartsWith, starts_with);
-unary_scalar_expr!(Strpos, strpos);
-unary_scalar_expr!(Substr, substr);
-unary_scalar_expr!(ToHex, to_hex);
-unary_scalar_expr!(Translate, translate);
-unary_scalar_expr!(Trim, trim);
-unary_scalar_expr!(Upper, upper);
-
-/// returns an array of fixed size with each argument on it.
-pub fn array(args: Vec<Expr>) -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Array,
-        args,
-    }
-}
-
-/// Creates a new UDF with a specific signature and specific return type.
-/// This is a helper function to create a new UDF.
-/// The function `create_udf` returns a subset of all possible `ScalarFunction`:
-/// * the UDF has a fixed return type
-/// * the UDF has a fixed signature (e.g. [f64, f64])
-pub fn create_udf(
-    name: &str,
-    input_types: Vec<DataType>,
-    return_type: Arc<DataType>,
-    fun: ScalarFunctionImplementation,
-) -> ScalarUDF {
-    let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(return_type.clone()));
-    ScalarUDF::new(name, &Signature::Exact(input_types), &return_type, &fun)
-}
-
-/// Creates a new UDAF with a specific signature, state type and return type.
-/// The signature and state type must match the `Acumulator's implementation`.
-#[allow(clippy::rc_buffer)]
-pub fn create_udaf(
-    name: &str,
-    input_type: DataType,
-    return_type: Arc<DataType>,
-    accumulator: AccumulatorFunctionImplementation,
-    state_type: Arc<Vec<DataType>>,
-) -> AggregateUDF {
-    let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(return_type.clone()));
-    let state_type: StateTypeFunction = Arc::new(move |_| Ok(state_type.clone()));
-    AggregateUDF::new(
-        name,
-        &Signature::Exact(vec![input_type]),
-        &return_type,
-        &accumulator,
-        &state_type,
-    )
-}
-
-fn fmt_function(
-    f: &mut fmt::Formatter,
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-) -> fmt::Result {
-    let args: Vec<String> = args.iter().map(|arg| format!("{:?}", arg)).collect();
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-    write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
-}
-
-impl fmt::Debug for Expr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Expr::Alias(expr, alias) => write!(f, "{:?} AS {}", expr, alias),
-            Expr::Column(name) => write!(f, "#{}", name),
-            Expr::ScalarVariable(var_names) => write!(f, "{}", var_names.join(".")),
-            Expr::Literal(v) => write!(f, "{:?}", v),
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-                ..
-            } => {
-                write!(f, "CASE ")?;
-                if let Some(e) = expr {
-                    write!(f, "{:?} ", e)?;
-                }
-                for (w, t) in when_then_expr {
-                    write!(f, "WHEN {:?} THEN {:?} ", w, t)?;
-                }
-                if let Some(e) = else_expr {
-                    write!(f, "ELSE {:?} ", e)?;
-                }
-                write!(f, "END")
-            }
-            Expr::Cast { expr, data_type } => {
-                write!(f, "CAST({:?} AS {:?})", expr, data_type)
-            }
-            Expr::TryCast { expr, data_type } => {
-                write!(f, "TRY_CAST({:?} AS {:?})", expr, data_type)
-            }
-            Expr::Not(expr) => write!(f, "NOT {:?}", expr),
-            Expr::Negative(expr) => write!(f, "(- {:?})", expr),
-            Expr::IsNull(expr) => write!(f, "{:?} IS NULL", expr),
-            Expr::IsNotNull(expr) => write!(f, "{:?} IS NOT NULL", expr),
-            Expr::BinaryExpr { left, op, right } => {
-                write!(f, "{:?} {:?} {:?}", left, op, right)
-            }
-            Expr::Sort {
-                expr,
-                asc,
-                nulls_first,
-            } => {
-                if *asc {
-                    write!(f, "{:?} ASC", expr)?;
-                } else {
-                    write!(f, "{:?} DESC", expr)?;
-                }
-                if *nulls_first {
-                    write!(f, " NULLS FIRST")
-                } else {
-                    write!(f, " NULLS LAST")
-                }
-            }
-            Expr::ScalarFunction { fun, args, .. } => {
-                fmt_function(f, &fun.to_string(), false, args)
-            }
-            Expr::ScalarUDF { fun, ref args, .. } => {
-                fmt_function(f, &fun.name, false, args)
-            }
-            Expr::AggregateFunction {
-                fun,
-                distinct,
-                ref args,
-                ..
-            } => fmt_function(f, &fun.to_string(), *distinct, args),
-            Expr::AggregateUDF { fun, ref args, .. } => {
-                fmt_function(f, &fun.name, false, args)
-            }
-            Expr::Between {
-                expr,
-                negated,
-                low,
-                high,
-            } => {
-                if *negated {
-                    write!(f, "{:?} NOT BETWEEN {:?} AND {:?}", expr, low, high)
-                } else {
-                    write!(f, "{:?} BETWEEN {:?} AND {:?}", expr, low, high)
-                }
-            }
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => {
-                if *negated {
-                    write!(f, "{:?} NOT IN ({:?})", expr, list)
-                } else {
-                    write!(f, "{:?} IN ({:?})", expr, list)
-                }
-            }
-            Expr::Wildcard => write!(f, "*"),
-        }
-    }
-}
-
-fn create_function_name(
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-    input_schema: &DFSchema,
-) -> Result<String> {
-    let names: Vec<String> = args
-        .iter()
-        .map(|e| create_name(e, input_schema))
-        .collect::<Result<_>>()?;
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-    Ok(format!("{}({}{})", fun, distinct_str, names.join(",")))
-}
-
-/// Returns a readable name of an expression based on the input schema.
-/// This function recursively transverses the expression for names such as "CAST(a > 2)".
-fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
-    match e {
-        Expr::Alias(_, name) => Ok(name.clone()),
-        Expr::Column(name) => Ok(name.clone()),
-        Expr::ScalarVariable(variable_names) => Ok(variable_names.join(".")),
-        Expr::Literal(value) => Ok(format!("{:?}", value)),
-        Expr::BinaryExpr { left, op, right } => {
-            let left = create_name(left, input_schema)?;
-            let right = create_name(right, input_schema)?;
-            Ok(format!("{} {:?} {}", left, op, right))
-        }
-        Expr::Case {
-            expr,
-            when_then_expr,
-            else_expr,
-        } => {
-            let mut name = "CASE ".to_string();
-            if let Some(e) = expr {
-                name += &format!("{:?} ", e);
-            }
-            for (w, t) in when_then_expr {
-                name += &format!("WHEN {:?} THEN {:?} ", w, t);
-            }
-            if let Some(e) = else_expr {
-                name += &format!("ELSE {:?} ", e);
-            }
-            name += "END";
-            Ok(name)
-        }
-        Expr::Cast { expr, data_type } => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("CAST({} AS {:?})", expr, data_type))
-        }
-        Expr::TryCast { expr, data_type } => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("TRY_CAST({} AS {:?})", expr, data_type))
-        }
-        Expr::Not(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("NOT {}", expr))
-        }
-        Expr::Negative(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("(- {})", expr))
-        }
-        Expr::IsNull(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("{} IS NULL", expr))
-        }
-        Expr::IsNotNull(expr) => {
-            let expr = create_name(expr, input_schema)?;
-            Ok(format!("{} IS NOT NULL", expr))
-        }
-        Expr::ScalarFunction { fun, args, .. } => {
-            create_function_name(&fun.to_string(), false, args, input_schema)
-        }
-        Expr::ScalarUDF { fun, args, .. } => {
-            create_function_name(&fun.name, false, args, input_schema)
-        }
-        Expr::AggregateFunction {
-            fun,
-            distinct,
-            args,
-            ..
-        } => create_function_name(&fun.to_string(), *distinct, args, input_schema),
-        Expr::AggregateUDF { fun, args } => {
-            let mut names = Vec::with_capacity(args.len());
-            for e in args {
-                names.push(create_name(e, input_schema)?);
-            }
-            Ok(format!("{}({})", fun.name, names.join(",")))
-        }
-        Expr::InList {
-            expr,
-            list,
-            negated,
-        } => {
-            let expr = create_name(expr, input_schema)?;
-            let list = list.iter().map(|expr| create_name(expr, input_schema));
-            if *negated {
-                Ok(format!("{} NOT IN ({:?})", expr, list))
-            } else {
-                Ok(format!("{} IN ({:?})", expr, list))
-            }
-        }
-        other => Err(DataFusionError::NotImplemented(format!(
-            "Physical plan does not support logical expression {:?}",
-            other
-        ))),
-    }
-}
-
-/// Create field meta-data from an expression, for use in a result set schema
-pub fn exprlist_to_fields<'a>(
-    expr: impl IntoIterator<Item = &'a Expr>,
-    input_schema: &DFSchema,
-) -> Result<Vec<DFField>> {
-    expr.into_iter().map(|e| e.to_field(input_schema)).collect()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::{col, lit, when};
-    use super::*;
-
-    #[test]
-    fn case_when_same_literal_then_types() -> Result<()> {
-        let _ = when(col("state").eq(lit("CO")), lit(303))
-            .when(col("state").eq(lit("NY")), lit(212))
-            .end()?;
-        Ok(())
-    }
-
-    #[test]
-    fn case_when_different_literal_then_types() {
-        let maybe_expr = when(col("state").eq(lit("CO")), lit(303))
-            .when(col("state").eq(lit("NY")), lit("212"))
-            .end();
-        assert!(maybe_expr.is_err());
-    }
-
-    #[test]
-    fn rewriter_visit() {
-        let mut rewriter = RecordingRewriter::default();
-        col("state").eq(lit("CO")).rewrite(&mut rewriter).unwrap();
-
-        assert_eq!(
-            rewriter.v,
-            vec![
-                "Previsited #state Eq Utf8(\"CO\")",
-                "Previsited #state",
-                "Mutated #state",
-                "Previsited Utf8(\"CO\")",
-                "Mutated Utf8(\"CO\")",
-                "Mutated #state Eq Utf8(\"CO\")"
-            ]
-        )
-    }
-
-    #[test]
-    fn filter_is_null_and_is_not_null() {
-        let col_null = Expr::Column("col1".to_string());
-        let col_not_null = Expr::Column("col2".to_string());
-        assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
-        assert_eq!(
-            format!("{:?}", col_not_null.is_not_null()),
-            "#col2 IS NOT NULL"
-        );
-    }
-
-    #[derive(Default)]
-    struct RecordingRewriter {
-        v: Vec<String>,
-    }
-    impl ExprRewriter for RecordingRewriter {
-        fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-            self.v.push(format!("Mutated {:?}", expr));
-            Ok(expr)
-        }
-
-        fn pre_visit(&mut self, expr: &Expr) -> Result<bool> {
-            self.v.push(format!("Previsited {:?}", expr));
-            Ok(true)
-        }
-    }
-
-    #[test]
-    fn rewriter_rewrite() {
-        let mut rewriter = FooBarRewriter {};
-
-        // rewrites "foo" --> "bar"
-        let rewritten = col("state").eq(lit("foo")).rewrite(&mut rewriter).unwrap();
-        assert_eq!(rewritten, col("state").eq(lit("bar")));
-
-        // doesn't wrewrite
-        let rewritten = col("state").eq(lit("baz")).rewrite(&mut rewriter).unwrap();
-        assert_eq!(rewritten, col("state").eq(lit("baz")));
-    }
-
-    /// rewrites all "foo" string literals to "bar"
-    struct FooBarRewriter {}
-    impl ExprRewriter for FooBarRewriter {
-        fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-            match expr {
-                Expr::Literal(scalar) => {
-                    if let ScalarValue::Utf8(Some(utf8_val)) = scalar {
-                        let utf8_val = if utf8_val == "foo" {
-                            "bar".to_string()
-                        } else {
-                            utf8_val
-                        };
-                        Ok(lit(utf8_val))
-                    } else {
-                        Ok(Expr::Literal(scalar))
-                    }
-                }
-                // otherwise, return the expression unchanged
-                expr => Ok(expr),
-            }
-        }
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/extension.rs b/rust/datafusion/src/logical_plan/extension.rs
deleted file mode 100644
index 43bf96ffb07..00000000000
--- a/rust/datafusion/src/logical_plan/extension.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module defines the interface for logical nodes
-use super::{Expr, LogicalPlan};
-use crate::logical_plan::DFSchemaRef;
-use std::{any::Any, collections::HashSet, fmt, sync::Arc};
-
-/// This defines the interface for `LogicalPlan` nodes that can be
-/// used to extend DataFusion with custom relational operators.
-///
-/// See the example in
-/// [user_defined_plan.rs](../../tests/user_defined_plan.rs) for an
-/// example of how to use this extension API
-pub trait UserDefinedLogicalNode: fmt::Debug {
-    /// Return a reference to self as Any, to support dynamic downcasting
-    fn as_any(&self) -> &dyn Any;
-
-    /// Return the logical plan's inputs
-    fn inputs(&self) -> Vec<&LogicalPlan>;
-
-    /// Return the output schema of this logical plan node
-    fn schema(&self) -> &DFSchemaRef;
-
-    /// returns all expressions in the current logical plan node. This
-    /// should not include expressions of any inputs (aka
-    /// non-recursively) These expressions are used for optimizer
-    /// passes and rewrites.
-    fn expressions(&self) -> Vec<Expr>;
-
-    /// A list of output columns (e.g. the names of columns in
-    /// self.schema()) for which predicates can not be pushed below
-    /// this node without changing the output.
-    ///
-    /// By default, this returns all columns and thus prevents any
-    /// predicates from being pushed below this node.
-    fn prevent_predicate_push_down_columns(&self) -> HashSet<String> {
-        // default (safe) is all columns in the schema.
-        self.schema()
-            .fields()
-            .iter()
-            .map(|f| f.name().clone())
-            .collect()
-    }
-
-    /// Write a single line, human readable string to `f` for use in explain plan
-    ///
-    /// For example: `TopK: k=10`
-    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result;
-
-    /// Create a new `ExtensionPlanNode` with the specified children
-    /// and expressions. This function is used during optimization
-    /// when the plan is being rewritten and a new instance of the
-    /// `ExtensionPlanNode` must be created.
-    ///
-    /// Note that exprs and inputs are in the same order as the result
-    /// of self.inputs and self.exprs.
-    ///
-    /// So, `self.from_template(exprs, ..).expressions() == exprs
-    fn from_template(
-        &self,
-        exprs: &[Expr],
-        inputs: &[LogicalPlan],
-    ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync>;
-}
diff --git a/rust/datafusion/src/logical_plan/mod.rs b/rust/datafusion/src/logical_plan/mod.rs
deleted file mode 100644
index f9be1ff9830..00000000000
--- a/rust/datafusion/src/logical_plan/mod.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides a logical query plan enum that can describe queries. Logical query
-//! plans can be created from a SQL statement or built programmatically via the Table API.
-//!
-//! Logical query plans can then be optimized and executed directly, or translated into
-//! physical query plans and executed.
-
-mod builder;
-mod dfschema;
-mod display;
-mod expr;
-mod extension;
-mod operators;
-mod plan;
-mod registry;
-pub use builder::LogicalPlanBuilder;
-pub use dfschema::{DFField, DFSchema, DFSchemaRef, ToDFSchema};
-pub use display::display_schema;
-pub use expr::{
-    abs, acos, and, array, ascii, asin, atan, avg, binary_expr, bit_length, btrim, case,
-    ceil, character_length, chr, col, combine_filters, concat, concat_ws, cos, count,
-    count_distinct, create_udaf, create_udf, exp, exprlist_to_fields, floor, in_list,
-    initcap, left, length, lit, ln, log10, log2, lower, lpad, ltrim, max, md5, min,
-    octet_length, or, regexp_match, regexp_replace, repeat, replace, reverse, right,
-    round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, split_part, sqrt,
-    starts_with, strpos, substr, sum, tan, to_hex, translate, trim, trunc, upper, when,
-    Expr, ExprRewriter, ExpressionVisitor, Literal, Recursion,
-};
-pub use extension::UserDefinedLogicalNode;
-pub use operators::Operator;
-pub use plan::{
-    JoinType, LogicalPlan, Partitioning, PlanType, PlanVisitor, StringifiedPlan,
-};
-pub use registry::FunctionRegistry;
diff --git a/rust/datafusion/src/logical_plan/operators.rs b/rust/datafusion/src/logical_plan/operators.rs
deleted file mode 100644
index 624635e6d9a..00000000000
--- a/rust/datafusion/src/logical_plan/operators.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{fmt, ops};
-
-use super::{binary_expr, Expr};
-
-/// Operators applied to expressions
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum Operator {
-    /// Expressions are equal
-    Eq,
-    /// Expressions are not equal
-    NotEq,
-    /// Left side is smaller than right side
-    Lt,
-    /// Left side is smaller or equal to right side
-    LtEq,
-    /// Left side is greater than right side
-    Gt,
-    /// Left side is greater or equal to right side
-    GtEq,
-    /// Addition
-    Plus,
-    /// Subtraction
-    Minus,
-    /// Multiplication operator, like `*`
-    Multiply,
-    /// Division operator, like `/`
-    Divide,
-    /// Remainder operator, like `%`
-    Modulus,
-    /// Logical AND, like `&&`
-    And,
-    /// Logical OR, like `||`
-    Or,
-    /// Matches a wildcard pattern
-    Like,
-    /// Does not match a wildcard pattern
-    NotLike,
-}
-
-impl fmt::Display for Operator {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let display = match &self {
-            Operator::Eq => "=",
-            Operator::NotEq => "!=",
-            Operator::Lt => "<",
-            Operator::LtEq => "<=",
-            Operator::Gt => ">",
-            Operator::GtEq => ">=",
-            Operator::Plus => "+",
-            Operator::Minus => "-",
-            Operator::Multiply => "*",
-            Operator::Divide => "/",
-            Operator::Modulus => "%",
-            Operator::And => "AND",
-            Operator::Or => "OR",
-            Operator::Like => "LIKE",
-            Operator::NotLike => "NOT LIKE",
-        };
-        write!(f, "{}", display)
-    }
-}
-
-impl ops::Add for Expr {
-    type Output = Self;
-
-    fn add(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Plus, rhs)
-    }
-}
-
-impl ops::Sub for Expr {
-    type Output = Self;
-
-    fn sub(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Minus, rhs)
-    }
-}
-
-impl ops::Mul for Expr {
-    type Output = Self;
-
-    fn mul(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Multiply, rhs)
-    }
-}
-
-impl ops::Div for Expr {
-    type Output = Self;
-
-    fn div(self, rhs: Self) -> Self {
-        binary_expr(self, Operator::Divide, rhs)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::prelude::lit;
-
-    #[test]
-    fn test_operators() {
-        assert_eq!(
-            format!("{:?}", lit(1u32) + lit(2u32)),
-            "UInt32(1) Plus UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) - lit(2u32)),
-            "UInt32(1) Minus UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) * lit(2u32)),
-            "UInt32(1) Multiply UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) / lit(2u32)),
-            "UInt32(1) Divide UInt32(2)"
-        );
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/plan.rs b/rust/datafusion/src/logical_plan/plan.rs
deleted file mode 100644
index d1b9b827a5a..00000000000
--- a/rust/datafusion/src/logical_plan/plan.rs
+++ /dev/null
@@ -1,1095 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//! This module contains the  `LogicalPlan` enum that describes queries
-//! via a logical query plan.
-
-use std::{
-    cmp::min,
-    fmt::{self, Display},
-    sync::Arc,
-};
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-
-use crate::datasource::TableProvider;
-use crate::sql::parser::FileType;
-
-use super::expr::Expr;
-use super::extension::UserDefinedLogicalNode;
-use super::{
-    col,
-    display::{GraphvizVisitor, IndentVisitor},
-};
-use crate::logical_plan::dfschema::DFSchemaRef;
-
-/// Join type
-#[derive(Debug, Clone, Copy)]
-pub enum JoinType {
-    /// Inner join
-    Inner,
-    /// Left join
-    Left,
-    /// Right join
-    Right,
-}
-
-/// A LogicalPlan represents the different types of relational
-/// operators (such as Projection, Filter, etc) and can be created by
-/// the SQL query planner and the DataFrame API.
-///
-/// A LogicalPlan represents transforming an input relation (table) to
-/// an output relation (table) with a (potentially) different
-/// schema. A plan represents a dataflow tree where data flows
-/// from leaves up to the root to produce the query result.
-#[derive(Clone)]
-pub enum LogicalPlan {
-    /// Evaluates an arbitrary list of expressions (essentially a
-    /// SELECT with an expression list) on its input.
-    Projection {
-        /// The list of expressions
-        expr: Vec<Expr>,
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-        /// The schema description of the output
-        schema: DFSchemaRef,
-    },
-    /// Filters rows from its input that do not match an
-    /// expression (essentially a WHERE clause with a predicate
-    /// expression).
-    ///
-    /// Semantically, `<predicate>` is evaluated for each row of the input;
-    /// If the value of `<predicate>` is true, the input row is passed to
-    /// the output. If the value of `<predicate>` is false, the row is
-    /// discarded.
-    Filter {
-        /// The predicate expression, which must have Boolean type.
-        predicate: Expr,
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-    },
-    /// Aggregates its input based on a set of grouping and aggregate
-    /// expressions (e.g. SUM).
-    Aggregate {
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-        /// Grouping expressions
-        group_expr: Vec<Expr>,
-        /// Aggregate expressions
-        aggr_expr: Vec<Expr>,
-        /// The schema description of the aggregate output
-        schema: DFSchemaRef,
-    },
-    /// Sorts its input according to a list of sort expressions.
-    Sort {
-        /// The sort expressions
-        expr: Vec<Expr>,
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-    },
-    /// Join two logical plans on one or more join columns
-    Join {
-        /// Left input
-        left: Arc<LogicalPlan>,
-        /// Right input
-        right: Arc<LogicalPlan>,
-        /// Equijoin clause expressed as pairs of (left, right) join columns
-        on: Vec<(String, String)>,
-        /// Join type
-        join_type: JoinType,
-        /// The output schema, containing fields from the left and right inputs
-        schema: DFSchemaRef,
-    },
-    /// Repartition the plan based on a partitioning scheme.
-    Repartition {
-        /// The incoming logical plan
-        input: Arc<LogicalPlan>,
-        /// The partitioning scheme
-        partitioning_scheme: Partitioning,
-    },
-    /// Union multiple inputs
-    Union {
-        /// Inputs to merge
-        inputs: Vec<LogicalPlan>,
-        /// Union schema. Should be the same for all inputs.
-        schema: DFSchemaRef,
-        /// Union output relation alias
-        alias: Option<String>,
-    },
-    /// Produces rows from a table provider by reference or from the context
-    TableScan {
-        /// The name of the table
-        table_name: String,
-        /// The source of the table
-        source: Arc<dyn TableProvider>,
-        /// Optional column indices to use as a projection
-        projection: Option<Vec<usize>>,
-        /// The schema description of the output
-        projected_schema: DFSchemaRef,
-        /// Optional expressions to be used as filters by the table provider
-        filters: Vec<Expr>,
-        /// Optional limit to skip reading
-        limit: Option<usize>,
-    },
-    /// Produces no rows: An empty relation with an empty schema
-    EmptyRelation {
-        /// Whether to produce a placeholder row
-        produce_one_row: bool,
-        /// The schema description of the output
-        schema: DFSchemaRef,
-    },
-    /// Produces the first `n` tuples from its input and discards the rest.
-    Limit {
-        /// The limit
-        n: usize,
-        /// The logical plan
-        input: Arc<LogicalPlan>,
-    },
-    /// Creates an external table.
-    CreateExternalTable {
-        /// The table schema
-        schema: DFSchemaRef,
-        /// The table name
-        name: String,
-        /// The physical location
-        location: String,
-        /// The file type of physical file
-        file_type: FileType,
-        /// Whether the CSV file contains a header
-        has_header: bool,
-    },
-    /// Produces a relation with string representations of
-    /// various parts of the plan
-    Explain {
-        /// Should extra (detailed, intermediate plans) be included?
-        verbose: bool,
-        /// The logical plan that is being EXPLAIN'd
-        plan: Arc<LogicalPlan>,
-        /// Represent the various stages plans have gone through
-        stringified_plans: Vec<StringifiedPlan>,
-        /// The output schema of the explain (2 columns of text)
-        schema: DFSchemaRef,
-    },
-    /// Extension operator defined outside of DataFusion
-    Extension {
-        /// The runtime extension operator
-        node: Arc<dyn UserDefinedLogicalNode + Send + Sync>,
-    },
-}
-
-impl LogicalPlan {
-    /// Get a reference to the logical plan's schema
-    pub fn schema(&self) -> &DFSchemaRef {
-        match self {
-            LogicalPlan::EmptyRelation { schema, .. } => &schema,
-            LogicalPlan::TableScan {
-                projected_schema, ..
-            } => &projected_schema,
-            LogicalPlan::Projection { schema, .. } => &schema,
-            LogicalPlan::Filter { input, .. } => input.schema(),
-            LogicalPlan::Aggregate { schema, .. } => &schema,
-            LogicalPlan::Sort { input, .. } => input.schema(),
-            LogicalPlan::Join { schema, .. } => &schema,
-            LogicalPlan::Repartition { input, .. } => input.schema(),
-            LogicalPlan::Limit { input, .. } => input.schema(),
-            LogicalPlan::CreateExternalTable { schema, .. } => &schema,
-            LogicalPlan::Explain { schema, .. } => &schema,
-            LogicalPlan::Extension { node } => &node.schema(),
-            LogicalPlan::Union { schema, .. } => &schema,
-        }
-    }
-
-    /// Get a vector of references to all schemas in every node of the logical plan
-    pub fn all_schemas(&self) -> Vec<&DFSchemaRef> {
-        match self {
-            LogicalPlan::TableScan {
-                projected_schema, ..
-            } => vec![&projected_schema],
-            LogicalPlan::Aggregate { input, schema, .. }
-            | LogicalPlan::Projection { input, schema, .. } => {
-                let mut schemas = input.all_schemas();
-                schemas.insert(0, &schema);
-                schemas
-            }
-            LogicalPlan::Join {
-                left,
-                right,
-                schema,
-                ..
-            } => {
-                let mut schemas = left.all_schemas();
-                schemas.extend(right.all_schemas());
-                schemas.insert(0, &schema);
-                schemas
-            }
-            LogicalPlan::Union { schema, .. } => {
-                vec![schema]
-            }
-            LogicalPlan::Extension { node } => vec![&node.schema()],
-            LogicalPlan::Explain { schema, .. }
-            | LogicalPlan::EmptyRelation { schema, .. }
-            | LogicalPlan::CreateExternalTable { schema, .. } => vec![&schema],
-            LogicalPlan::Limit { input, .. }
-            | LogicalPlan::Repartition { input, .. }
-            | LogicalPlan::Sort { input, .. }
-            | LogicalPlan::Filter { input, .. } => input.all_schemas(),
-        }
-    }
-
-    /// Returns the (fixed) output schema for explain plans
-    pub fn explain_schema() -> SchemaRef {
-        SchemaRef::new(Schema::new(vec![
-            Field::new("plan_type", DataType::Utf8, false),
-            Field::new("plan", DataType::Utf8, false),
-        ]))
-    }
-
-    /// returns all expressions (non-recursively) in the current
-    /// logical plan node. This does not include expressions in any
-    /// children
-    pub fn expressions(self: &LogicalPlan) -> Vec<Expr> {
-        match self {
-            LogicalPlan::Projection { expr, .. } => expr.clone(),
-            LogicalPlan::Filter { predicate, .. } => vec![predicate.clone()],
-            LogicalPlan::Repartition {
-                partitioning_scheme,
-                ..
-            } => match partitioning_scheme {
-                Partitioning::Hash(expr, _) => expr.clone(),
-                _ => vec![],
-            },
-            LogicalPlan::Aggregate {
-                group_expr,
-                aggr_expr,
-                ..
-            } => {
-                let mut result = group_expr.clone();
-                result.extend(aggr_expr.clone());
-                result
-            }
-            LogicalPlan::Join { on, .. } => {
-                on.iter().flat_map(|(l, r)| vec![col(l), col(r)]).collect()
-            }
-            LogicalPlan::Sort { expr, .. } => expr.clone(),
-            LogicalPlan::Extension { node } => node.expressions(),
-            // plans without expressions
-            LogicalPlan::TableScan { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::Limit { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. } => vec![],
-            LogicalPlan::Union { .. } => {
-                vec![]
-            }
-        }
-    }
-
-    /// returns all inputs of this `LogicalPlan` node. Does not
-    /// include inputs to inputs.
-    pub fn inputs(self: &LogicalPlan) -> Vec<&LogicalPlan> {
-        match self {
-            LogicalPlan::Projection { input, .. } => vec![input],
-            LogicalPlan::Filter { input, .. } => vec![input],
-            LogicalPlan::Repartition { input, .. } => vec![input],
-            LogicalPlan::Aggregate { input, .. } => vec![input],
-            LogicalPlan::Sort { input, .. } => vec![input],
-            LogicalPlan::Join { left, right, .. } => vec![left, right],
-            LogicalPlan::Limit { input, .. } => vec![input],
-            LogicalPlan::Extension { node } => node.inputs(),
-            LogicalPlan::Union { inputs, .. } => inputs.iter().collect(),
-            // plans without inputs
-            LogicalPlan::TableScan { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. } => vec![],
-        }
-    }
-}
-
-/// Logical partitioning schemes supported by the repartition operator.
-#[derive(Debug, Clone)]
-pub enum Partitioning {
-    /// Allocate batches using a round-robin algorithm and the specified number of partitions
-    RoundRobinBatch(usize),
-    /// Allocate rows based on a hash of one of more expressions and the specified number
-    /// of partitions.
-    /// This partitioning scheme is not yet fully supported. See <https://issues.apache.org/jira/browse/ARROW-11011>
-    Hash(Vec<Expr>, usize),
-}
-
-/// Trait that implements the [Visitor
-/// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for a
-/// depth first walk of `LogicalPlan` nodes. `pre_visit` is called
-/// before any children are visited, and then `post_visit` is called
-/// after all children have been visited.
-////
-/// To use, define a struct that implements this trait and then invoke
-/// "LogicalPlan::accept".
-///
-/// For example, for a logical plan like:
-///
-/// Projection: #id
-///    Filter: #state Eq Utf8(\"CO\")\
-///       CsvScan: employee.csv projection=Some([0, 3])";
-///
-/// The sequence of visit operations would be:
-/// ```text
-/// visitor.pre_visit(Projection)
-/// visitor.pre_visit(Filter)
-/// visitor.pre_visit(CsvScan)
-/// visitor.post_visit(CsvScan)
-/// visitor.post_visit(Filter)
-/// visitor.post_visit(Projection)
-/// ```
-pub trait PlanVisitor {
-    /// The type of error returned by this visitor
-    type Error;
-
-    /// Invoked on a logical plan before any of its child inputs have been
-    /// visited. If Ok(true) is returned, the recursion continues. If
-    /// Err(..) or Ok(false) are returned, the recursion stops
-    /// immediately and the error, if any, is returned to `accept`
-    fn pre_visit(&mut self, plan: &LogicalPlan)
-        -> std::result::Result<bool, Self::Error>;
-
-    /// Invoked on a logical plan after all of its child inputs have
-    /// been visited. The return value is handled the same as the
-    /// return value of `pre_visit`. The provided default implementation
-    /// returns `Ok(true)`.
-    fn post_visit(
-        &mut self,
-        _plan: &LogicalPlan,
-    ) -> std::result::Result<bool, Self::Error> {
-        Ok(true)
-    }
-}
-
-impl LogicalPlan {
-    /// returns all inputs in the logical plan. Returns Ok(true) if
-    /// all nodes were visited, and Ok(false) if any call to
-    /// `pre_visit` or `post_visit` returned Ok(false) and may have
-    /// cut short the recursion
-    pub fn accept<V>(&self, visitor: &mut V) -> std::result::Result<bool, V::Error>
-    where
-        V: PlanVisitor,
-    {
-        if !visitor.pre_visit(self)? {
-            return Ok(false);
-        }
-
-        let recurse = match self {
-            LogicalPlan::Projection { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Filter { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Repartition { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Aggregate { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Sort { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Join { left, right, .. } => {
-                left.accept(visitor)? && right.accept(visitor)?
-            }
-            LogicalPlan::Union { inputs, .. } => {
-                for input in inputs {
-                    if !input.accept(visitor)? {
-                        return Ok(false);
-                    }
-                }
-                true
-            }
-            LogicalPlan::Limit { input, .. } => input.accept(visitor)?,
-            LogicalPlan::Extension { node } => {
-                for input in node.inputs() {
-                    if !input.accept(visitor)? {
-                        return Ok(false);
-                    }
-                }
-                true
-            }
-            // plans without inputs
-            LogicalPlan::TableScan { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. } => true,
-        };
-        if !recurse {
-            return Ok(false);
-        }
-
-        if !visitor.post_visit(self)? {
-            return Ok(false);
-        }
-
-        Ok(true)
-    }
-}
-
-// Various implementations for printing out LogicalPlans
-impl LogicalPlan {
-    /// Return a `format`able structure that produces a single line
-    /// per node. For example:
-    ///
-    /// ```text
-    /// Projection: #id
-    ///    Filter: #state Eq Utf8(\"CO\")\
-    ///       CsvScan: employee.csv projection=Some([0, 3])
-    /// ```
-    ///
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display_indent
-    /// let display_string = format!("{}", plan.display_indent());
-    ///
-    /// assert_eq!("Filter: #id Eq Int32(5)\
-    ///              \n  TableScan: foo.csv projection=None",
-    ///             display_string);
-    /// ```
-    pub fn display_indent(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                let with_schema = false;
-                let mut visitor = IndentVisitor::new(f, with_schema);
-                self.0.accept(&mut visitor).unwrap();
-                Ok(())
-            }
-        }
-        Wrapper(self)
-    }
-
-    /// Return a `format`able structure that produces a single line
-    /// per node that includes the output schema. For example:
-    ///
-    /// ```text
-    /// Projection: #id [id:Int32]\
-    ///    Filter: #state Eq Utf8(\"CO\") [id:Int32, state:Utf8]\
-    ///      TableScan: employee.csv projection=Some([0, 3]) [id:Int32, state:Utf8]";
-    /// ```
-    ///
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display_indent_schema
-    /// let display_string = format!("{}", plan.display_indent_schema());
-    ///
-    /// assert_eq!("Filter: #id Eq Int32(5) [id:Int32]\
-    ///             \n  TableScan: foo.csv projection=None [id:Int32]",
-    ///             display_string);
-    /// ```
-    pub fn display_indent_schema(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                let with_schema = true;
-                let mut visitor = IndentVisitor::new(f, with_schema);
-                self.0.accept(&mut visitor).unwrap();
-                Ok(())
-            }
-        }
-        Wrapper(self)
-    }
-
-    /// Return a `format`able structure that produces lines meant for
-    /// graphical display using the `DOT` language. This format can be
-    /// visualized using software from
-    /// [`graphviz`](https://graphviz.org/)
-    ///
-    /// This currently produces two graphs -- one with the basic
-    /// structure, and one with additional details such as schema.
-    ///
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display_graphviz
-    /// let graphviz_string = format!("{}", plan.display_graphviz());
-    /// ```
-    ///
-    /// If graphviz string is saved to a file such as `/tmp/example.dot`, the following
-    /// commands can be used to render it as a pdf:
-    ///
-    /// ```bash
-    ///   dot -Tpdf < /tmp/example.dot  > /tmp/example.pdf
-    /// ```
-    ///
-    pub fn display_graphviz(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                writeln!(
-                    f,
-                    "// Begin DataFusion GraphViz Plan (see https://graphviz.org)"
-                )?;
-                writeln!(f, "digraph {{")?;
-
-                let mut visitor = GraphvizVisitor::new(f);
-
-                visitor.pre_visit_plan("LogicalPlan")?;
-                self.0.accept(&mut visitor).unwrap();
-                visitor.post_visit_plan()?;
-
-                visitor.set_with_schema(true);
-                visitor.pre_visit_plan("Detailed LogicalPlan")?;
-                self.0.accept(&mut visitor).unwrap();
-                visitor.post_visit_plan()?;
-
-                writeln!(f, "}}")?;
-                writeln!(f, "// End DataFusion GraphViz Plan")?;
-                Ok(())
-            }
-        }
-        Wrapper(self)
-    }
-
-    /// Return a `format`able structure with the a human readable
-    /// description of this LogicalPlan node per node, not including
-    /// children. For example:
-    ///
-    /// ```text
-    /// Projection: #id
-    /// ```
-    /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = LogicalPlanBuilder::scan_empty("foo.csv", &schema, None).unwrap()
-    ///     .build().unwrap();
-    ///
-    /// // Format using display
-    /// let display_string = format!("{}", plan.display());
-    ///
-    /// assert_eq!("TableScan: foo.csv projection=None", display_string);
-    /// ```
-    pub fn display(&self) -> impl fmt::Display + '_ {
-        // Boilerplate structure to wrap LogicalPlan with something
-        // that that can be formatted
-        struct Wrapper<'a>(&'a LogicalPlan);
-        impl<'a> fmt::Display for Wrapper<'a> {
-            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-                match &*self.0 {
-                    LogicalPlan::EmptyRelation { .. } => write!(f, "EmptyRelation"),
-                    LogicalPlan::TableScan {
-                        ref table_name,
-                        ref projection,
-                        ref filters,
-                        ref limit,
-                        ..
-                    } => {
-                        let sep = " ".repeat(min(1, table_name.len()));
-                        write!(
-                            f,
-                            "TableScan: {}{}projection={:?}",
-                            table_name, sep, projection
-                        )?;
-
-                        if !filters.is_empty() {
-                            write!(f, ", filters={:?}", filters)?;
-                        }
-
-                        if let Some(n) = limit {
-                            write!(f, ", limit={}", n)?;
-                        }
-
-                        Ok(())
-                    }
-                    LogicalPlan::Projection { ref expr, .. } => {
-                        write!(f, "Projection: ")?;
-                        for (i, expr_item) in expr.iter().enumerate() {
-                            if i > 0 {
-                                write!(f, ", ")?;
-                            }
-                            write!(f, "{:?}", expr_item)?;
-                        }
-                        Ok(())
-                    }
-                    LogicalPlan::Filter {
-                        predicate: ref expr,
-                        ..
-                    } => write!(f, "Filter: {:?}", expr),
-                    LogicalPlan::Aggregate {
-                        ref group_expr,
-                        ref aggr_expr,
-                        ..
-                    } => write!(
-                        f,
-                        "Aggregate: groupBy=[{:?}], aggr=[{:?}]",
-                        group_expr, aggr_expr
-                    ),
-                    LogicalPlan::Sort { ref expr, .. } => {
-                        write!(f, "Sort: ")?;
-                        for (i, expr_item) in expr.iter().enumerate() {
-                            if i > 0 {
-                                write!(f, ", ")?;
-                            }
-                            write!(f, "{:?}", expr_item)?;
-                        }
-                        Ok(())
-                    }
-                    LogicalPlan::Join { on: ref keys, .. } => {
-                        let join_expr: Vec<String> =
-                            keys.iter().map(|(l, r)| format!("{} = {}", l, r)).collect();
-                        write!(f, "Join: {}", join_expr.join(", "))
-                    }
-                    LogicalPlan::Repartition {
-                        partitioning_scheme,
-                        ..
-                    } => match partitioning_scheme {
-                        Partitioning::RoundRobinBatch(n) => write!(
-                            f,
-                            "Repartition: RoundRobinBatch partition_count={}",
-                            n
-                        ),
-                        Partitioning::Hash(expr, n) => {
-                            let hash_expr: Vec<String> =
-                                expr.iter().map(|e| format!("{:?}", e)).collect();
-                            write!(
-                                f,
-                                "Repartition: Hash({}) partition_count={}",
-                                hash_expr.join(", "),
-                                n
-                            )
-                        }
-                    },
-                    LogicalPlan::Limit { ref n, .. } => write!(f, "Limit: {}", n),
-                    LogicalPlan::CreateExternalTable { ref name, .. } => {
-                        write!(f, "CreateExternalTable: {:?}", name)
-                    }
-                    LogicalPlan::Explain { .. } => write!(f, "Explain"),
-                    LogicalPlan::Union { .. } => write!(f, "Union"),
-                    LogicalPlan::Extension { ref node } => node.fmt_for_explain(f),
-                }
-            }
-        }
-        Wrapper(self)
-    }
-}
-
-impl fmt::Debug for LogicalPlan {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.display_indent().fmt(f)
-    }
-}
-
-/// Represents which type of plan
-#[derive(Debug, Clone, PartialEq)]
-pub enum PlanType {
-    /// The initial LogicalPlan provided to DataFusion
-    LogicalPlan,
-    /// The LogicalPlan which results from applying an optimizer pass
-    OptimizedLogicalPlan {
-        /// The name of the optimizer which produced this plan
-        optimizer_name: String,
-    },
-    /// The physical plan, prepared for execution
-    PhysicalPlan,
-}
-
-impl From<&PlanType> for String {
-    fn from(t: &PlanType) -> Self {
-        match t {
-            PlanType::LogicalPlan => "logical_plan".into(),
-            PlanType::OptimizedLogicalPlan { optimizer_name } => {
-                format!("logical_plan after {}", optimizer_name)
-            }
-            PlanType::PhysicalPlan => "physical_plan".into(),
-        }
-    }
-}
-
-/// Represents some sort of execution plan, in String form
-#[derive(Debug, Clone, PartialEq)]
-#[allow(clippy::rc_buffer)]
-pub struct StringifiedPlan {
-    /// An identifier of what type of plan this string represents
-    pub plan_type: PlanType,
-    /// The string representation of the plan
-    pub plan: Arc<String>,
-}
-
-impl StringifiedPlan {
-    /// Create a new Stringified plan of `plan_type` with string
-    /// representation `plan`
-    pub fn new(plan_type: PlanType, plan: impl Into<String>) -> Self {
-        StringifiedPlan {
-            plan_type,
-            plan: Arc::new(plan.into()),
-        }
-    }
-
-    /// returns true if this plan should be displayed. Generally
-    /// `verbose_mode = true` will display all available plans
-    pub fn should_display(&self, verbose_mode: bool) -> bool {
-        self.plan_type == PlanType::LogicalPlan || verbose_mode
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::{col, lit, LogicalPlanBuilder};
-    use super::*;
-
-    fn employee_schema() -> Schema {
-        Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new("state", DataType::Utf8, false),
-            Field::new("salary", DataType::Int32, false),
-        ])
-    }
-
-    fn display_plan() -> LogicalPlan {
-        LogicalPlanBuilder::scan_empty(
-            "employee.csv",
-            &employee_schema(),
-            Some(vec![0, 3]),
-        )
-        .unwrap()
-        .filter(col("state").eq(lit("CO")))
-        .unwrap()
-        .project(vec![col("id")])
-        .unwrap()
-        .build()
-        .unwrap()
-    }
-
-    #[test]
-    fn test_display_indent() {
-        let plan = display_plan();
-
-        let expected = "Projection: #id\
-        \n  Filter: #state Eq Utf8(\"CO\")\
-        \n    TableScan: employee.csv projection=Some([0, 3])";
-
-        assert_eq!(expected, format!("{}", plan.display_indent()));
-    }
-
-    #[test]
-    fn test_display_indent_schema() {
-        let plan = display_plan();
-
-        let expected = "Projection: #id [id:Int32]\
-                        \n  Filter: #state Eq Utf8(\"CO\") [id:Int32, state:Utf8]\
-                        \n    TableScan: employee.csv projection=Some([0, 3]) [id:Int32, state:Utf8]";
-
-        assert_eq!(expected, format!("{}", plan.display_indent_schema()));
-    }
-
-    #[test]
-    fn test_display_graphviz() {
-        let plan = display_plan();
-
-        // just test for a few key lines in the output rather than the
-        // whole thing to make test mainteance easier.
-        let graphviz = format!("{}", plan.display_graphviz());
-
-        assert!(
-            graphviz.contains(
-                r#"// Begin DataFusion GraphViz Plan (see https://graphviz.org)"#
-            ),
-            "\n{}",
-            plan.display_graphviz()
-        );
-        assert!(
-            graphviz.contains(
-                r#"[shape=box label="TableScan: employee.csv projection=Some([0, 3])"]"#
-            ),
-            "\n{}",
-            plan.display_graphviz()
-        );
-        assert!(graphviz.contains(r#"[shape=box label="TableScan: employee.csv projection=Some([0, 3])\nSchema: [id:Int32, state:Utf8]"]"#),
-                "\n{}", plan.display_graphviz());
-        assert!(
-            graphviz.contains(r#"// End DataFusion GraphViz Plan"#),
-            "\n{}",
-            plan.display_graphviz()
-        );
-    }
-
-    /// Tests for the Visitor trait and walking logical plan nodes
-
-    #[derive(Debug, Default)]
-    struct OkVisitor {
-        strings: Vec<String>,
-    }
-    impl PlanVisitor for OkVisitor {
-        type Error = String;
-
-        fn pre_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            let s = match plan {
-                LogicalPlan::Projection { .. } => "pre_visit Projection",
-                LogicalPlan::Filter { .. } => "pre_visit Filter",
-                LogicalPlan::TableScan { .. } => "pre_visit TableScan",
-                _ => unimplemented!("unknown plan type"),
-            };
-
-            self.strings.push(s.into());
-            Ok(true)
-        }
-
-        fn post_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            let s = match plan {
-                LogicalPlan::Projection { .. } => "post_visit Projection",
-                LogicalPlan::Filter { .. } => "post_visit Filter",
-                LogicalPlan::TableScan { .. } => "post_visit TableScan",
-                _ => unimplemented!("unknown plan type"),
-            };
-
-            self.strings.push(s.into());
-            Ok(true)
-        }
-    }
-
-    #[test]
-    fn visit_order() {
-        let mut visitor = OkVisitor::default();
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        assert!(res.is_ok());
-
-        assert_eq!(
-            visitor.strings,
-            vec![
-                "pre_visit Projection",
-                "pre_visit Filter",
-                "pre_visit TableScan",
-                "post_visit TableScan",
-                "post_visit Filter",
-                "post_visit Projection"
-            ]
-        );
-    }
-
-    #[derive(Debug, Default)]
-    /// Counter than counts to zero and returns true when it gets there
-    struct OptionalCounter {
-        val: Option<usize>,
-    }
-    impl OptionalCounter {
-        fn new(val: usize) -> Self {
-            Self { val: Some(val) }
-        }
-        // Decrements the counter by 1, if any, returning true if it hits zero
-        fn dec(&mut self) -> bool {
-            if Some(0) == self.val {
-                true
-            } else {
-                self.val = self.val.take().map(|i| i - 1);
-                false
-            }
-        }
-    }
-
-    #[derive(Debug, Default)]
-    /// Visitor that returns false after some number of visits
-    struct StoppingVisitor {
-        inner: OkVisitor,
-        /// When Some(0) returns false from pre_visit
-        return_false_from_pre_in: OptionalCounter,
-        /// When Some(0) returns false from post_visit
-        return_false_from_post_in: OptionalCounter,
-    }
-
-    impl PlanVisitor for StoppingVisitor {
-        type Error = String;
-
-        fn pre_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_false_from_pre_in.dec() {
-                return Ok(false);
-            }
-            self.inner.pre_visit(plan)
-        }
-
-        fn post_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_false_from_post_in.dec() {
-                return Ok(false);
-            }
-
-            self.inner.post_visit(plan)
-        }
-    }
-
-    /// test early stopping in pre-visit
-    #[test]
-    fn early_stopping_pre_visit() {
-        let mut visitor = StoppingVisitor {
-            return_false_from_pre_in: OptionalCounter::new(2),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        assert!(res.is_ok());
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec!["pre_visit Projection", "pre_visit Filter",]
-        );
-    }
-
-    #[test]
-    fn early_stopping_post_visit() {
-        let mut visitor = StoppingVisitor {
-            return_false_from_post_in: OptionalCounter::new(1),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        assert!(res.is_ok());
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec![
-                "pre_visit Projection",
-                "pre_visit Filter",
-                "pre_visit TableScan",
-                "post_visit TableScan",
-            ]
-        );
-    }
-
-    #[derive(Debug, Default)]
-    /// Visitor that returns an error after some number of visits
-    struct ErrorVisitor {
-        inner: OkVisitor,
-        /// When Some(0) returns false from pre_visit
-        return_error_from_pre_in: OptionalCounter,
-        /// When Some(0) returns false from post_visit
-        return_error_from_post_in: OptionalCounter,
-    }
-
-    impl PlanVisitor for ErrorVisitor {
-        type Error = String;
-
-        fn pre_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_error_from_pre_in.dec() {
-                return Err("Error in pre_visit".into());
-            }
-
-            self.inner.pre_visit(plan)
-        }
-
-        fn post_visit(
-            &mut self,
-            plan: &LogicalPlan,
-        ) -> std::result::Result<bool, Self::Error> {
-            if self.return_error_from_post_in.dec() {
-                return Err("Error in post_visit".into());
-            }
-
-            self.inner.post_visit(plan)
-        }
-    }
-
-    #[test]
-    fn error_pre_visit() {
-        let mut visitor = ErrorVisitor {
-            return_error_from_pre_in: OptionalCounter::new(2),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-
-        if let Err(e) = res {
-            assert_eq!("Error in pre_visit", e);
-        } else {
-            panic!("Expected an error");
-        }
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec!["pre_visit Projection", "pre_visit Filter",]
-        );
-    }
-
-    #[test]
-    fn error_post_visit() {
-        let mut visitor = ErrorVisitor {
-            return_error_from_post_in: OptionalCounter::new(1),
-            ..Default::default()
-        };
-        let plan = test_plan();
-        let res = plan.accept(&mut visitor);
-        if let Err(e) = res {
-            assert_eq!("Error in post_visit", e);
-        } else {
-            panic!("Expected an error");
-        }
-
-        assert_eq!(
-            visitor.inner.strings,
-            vec![
-                "pre_visit Projection",
-                "pre_visit Filter",
-                "pre_visit TableScan",
-                "post_visit TableScan",
-            ]
-        );
-    }
-
-    fn test_plan() -> LogicalPlan {
-        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
-
-        LogicalPlanBuilder::scan_empty("", &schema, Some(vec![0]))
-            .unwrap()
-            .filter(col("state").eq(lit("CO")))
-            .unwrap()
-            .project(vec![col("id")])
-            .unwrap()
-            .build()
-            .unwrap()
-    }
-}
diff --git a/rust/datafusion/src/logical_plan/registry.rs b/rust/datafusion/src/logical_plan/registry.rs
deleted file mode 100644
index d9b1839881d..00000000000
--- a/rust/datafusion/src/logical_plan/registry.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashSet, sync::Arc};
-
-use crate::error::Result;
-use crate::physical_plan::udaf::AggregateUDF;
-use crate::physical_plan::udf::ScalarUDF;
-
-/// A registry knows how to build logical expressions out of user-defined function' names
-pub trait FunctionRegistry {
-    /// Set of all available udfs.
-    fn udfs(&self) -> HashSet<String>;
-
-    /// Returns a reference to the udf named `name`.
-    fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>>;
-
-    /// Returns a reference to the udaf named `name`.
-    fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>>;
-}
diff --git a/rust/datafusion/src/optimizer/constant_folding.rs b/rust/datafusion/src/optimizer/constant_folding.rs
deleted file mode 100644
index 2fa03eb5c70..00000000000
--- a/rust/datafusion/src/optimizer/constant_folding.rs
+++ /dev/null
@@ -1,591 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Boolean comparision rule rewrites redudant comparison expression involing boolean literal into
-//! unary expression.
-
-use std::sync::Arc;
-
-use arrow::datatypes::DataType;
-
-use crate::error::Result;
-use crate::logical_plan::{DFSchemaRef, Expr, ExprRewriter, LogicalPlan, Operator};
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
-use crate::scalar::ScalarValue;
-
-/// Optimizer that simplifies comparison expressions involving boolean literals.
-///
-/// Recursively go through all expressionss and simplify the following cases:
-/// * `expr = true` and `expr != false` to `expr` when `expr` is of boolean type
-/// * `expr = false` and `expr != true` to `!expr` when `expr` is of boolean type
-/// * `true = true` and `false = false` to `true`
-/// * `false = true` and `true = false` to `false`
-/// * `!!expr` to `expr`
-/// * `expr = null` and `expr != null` to `null`
-pub struct ConstantFolding {}
-
-impl ConstantFolding {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl OptimizerRule for ConstantFolding {
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        // We need to pass down the all schemas within the plan tree to `optimize_expr` in order to
-        // to evaluate expression types. For example, a projection plan's schema will only include
-        // projected columns. With just the projected schema, it's not possible to infer types for
-        // expressions that references non-projected columns within the same project plan or its
-        // children plans.
-        let mut rewriter = ConstantRewriter {
-            schemas: plan.all_schemas(),
-        };
-
-        match plan {
-            LogicalPlan::Filter { predicate, input } => Ok(LogicalPlan::Filter {
-                predicate: predicate.clone().rewrite(&mut rewriter)?,
-                input: Arc::new(self.optimize(input)?),
-            }),
-            // Rest: recurse into plan, apply optimization where possible
-            LogicalPlan::Projection { .. }
-            | LogicalPlan::Aggregate { .. }
-            | LogicalPlan::Repartition { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Extension { .. }
-            | LogicalPlan::Sort { .. }
-            | LogicalPlan::Explain { .. }
-            | LogicalPlan::Limit { .. }
-            | LogicalPlan::Union { .. }
-            | LogicalPlan::Join { .. } => {
-                // apply the optimization to all inputs of the plan
-                let inputs = plan.inputs();
-                let new_inputs = inputs
-                    .iter()
-                    .map(|plan| self.optimize(plan))
-                    .collect::<Result<Vec<_>>>()?;
-
-                let expr = plan
-                    .expressions()
-                    .into_iter()
-                    .map(|e| e.rewrite(&mut rewriter))
-                    .collect::<Result<Vec<_>>>()?;
-
-                utils::from_plan(plan, &expr, &new_inputs)
-            }
-            LogicalPlan::TableScan { .. } | LogicalPlan::EmptyRelation { .. } => {
-                Ok(plan.clone())
-            }
-        }
-    }
-
-    fn name(&self) -> &str {
-        "constant_folding"
-    }
-}
-
-struct ConstantRewriter<'a> {
-    /// input schemas
-    schemas: Vec<&'a DFSchemaRef>,
-}
-
-impl<'a> ConstantRewriter<'a> {
-    fn is_boolean_type(&self, expr: &Expr) -> bool {
-        for schema in &self.schemas {
-            if let Ok(DataType::Boolean) = expr.get_type(schema) {
-                return true;
-            }
-        }
-
-        false
-    }
-}
-
-impl<'a> ExprRewriter for ConstantRewriter<'a> {
-    /// rewrite the expression simplifying any constant expressions
-    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-        let new_expr = match expr {
-            Expr::BinaryExpr { left, op, right } => match op {
-                Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l == r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => *right,
-                            Some(false) => Expr::Not(right),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => *left,
-                            Some(false) => Expr::Not(left),
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::Eq,
-                        right,
-                    },
-                },
-                Operator::NotEq => match (left.as_ref(), right.as_ref()) {
-                    (
-                        Expr::Literal(ScalarValue::Boolean(l)),
-                        Expr::Literal(ScalarValue::Boolean(r)),
-                    ) => match (l, r) {
-                        (Some(l), Some(r)) => {
-                            Expr::Literal(ScalarValue::Boolean(Some(l != r)))
-                        }
-                        _ => Expr::Literal(ScalarValue::Boolean(None)),
-                    },
-                    (Expr::Literal(ScalarValue::Boolean(b)), _)
-                        if self.is_boolean_type(&right) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(right),
-                            Some(false) => *right,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    (_, Expr::Literal(ScalarValue::Boolean(b)))
-                        if self.is_boolean_type(&left) =>
-                    {
-                        match b {
-                            Some(true) => Expr::Not(left),
-                            Some(false) => *left,
-                            None => Expr::Literal(ScalarValue::Boolean(None)),
-                        }
-                    }
-                    _ => Expr::BinaryExpr {
-                        left,
-                        op: Operator::NotEq,
-                        right,
-                    },
-                },
-                _ => Expr::BinaryExpr { left, op, right },
-            },
-            Expr::Not(inner) => {
-                // Not(Not(expr)) --> expr
-                if let Expr::Not(negated_inner) = *inner {
-                    *negated_inner
-                } else {
-                    Expr::Not(inner)
-                }
-            }
-            expr => {
-                // no rewrite possible
-                expr
-            }
-        };
-        Ok(new_expr)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::logical_plan::{
-        col, lit, max, min, DFField, DFSchema, LogicalPlanBuilder,
-    };
-
-    use arrow::datatypes::*;
-
-    fn test_table_scan() -> Result<LogicalPlan> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Boolean, false),
-            Field::new("c", DataType::Boolean, false),
-            Field::new("d", DataType::UInt32, false),
-        ]);
-        LogicalPlanBuilder::scan_empty("test", &schema, None)?.build()
-    }
-
-    fn expr_test_schema() -> DFSchemaRef {
-        Arc::new(
-            DFSchema::new(vec![
-                DFField::new(None, "c1", DataType::Utf8, true),
-                DFField::new(None, "c2", DataType::Boolean, true),
-            ])
-            .unwrap(),
-        )
-    }
-
-    #[test]
-    fn optimize_expr_not_not() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(
-            (col("c2").not().not().not()).rewrite(&mut rewriter)?,
-            col("c2").not(),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_null_comparision() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        // x = null is always null
-        assert_eq!(
-            (lit(true).eq(lit(ScalarValue::Boolean(None)))).rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        // null != null is always null
-        assert_eq!(
-            (lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None))))
-                .rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        // x != null is always null
-        assert_eq!(
-            (col("c2").not_eq(lit(ScalarValue::Boolean(None)))).rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        // null = x is always null
-        assert_eq!(
-            (lit(ScalarValue::Boolean(None)).eq(col("c2"))).rewrite(&mut rewriter)?,
-            lit(ScalarValue::Boolean(None)),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_eq() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(col("c2").get_type(&schema)?, DataType::Boolean);
-
-        // true = ture -> true
-        assert_eq!((lit(true).eq(lit(true))).rewrite(&mut rewriter)?, lit(true),);
-
-        // true = false -> false
-        assert_eq!(
-            (lit(true).eq(lit(false))).rewrite(&mut rewriter)?,
-            lit(false),
-        );
-
-        // c2 = true -> c2
-        assert_eq!((col("c2").eq(lit(true))).rewrite(&mut rewriter)?, col("c2"),);
-
-        // c2 = false => !c2
-        assert_eq!(
-            (col("c2").eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c2").not(),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_eq_skip_nonboolean_type() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        // When one of the operand is not of boolean type, folding the other boolean constant will
-        // change return type of expression to non-boolean.
-        //
-        // Make sure c1 column to be used in tests is not boolean type
-        assert_eq!(col("c1").get_type(&schema)?, DataType::Utf8);
-
-        // don't fold c1 = true
-        assert_eq!(
-            (col("c1").eq(lit(true))).rewrite(&mut rewriter)?,
-            col("c1").eq(lit(true)),
-        );
-
-        // don't fold c1 = false
-        assert_eq!(
-            (col("c1").eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c1").eq(lit(false)),
-        );
-
-        // test constant operands
-        assert_eq!(
-            (lit(1).eq(lit(true))).rewrite(&mut rewriter)?,
-            lit(1).eq(lit(true)),
-        );
-
-        assert_eq!(
-            (lit("a").eq(lit(false))).rewrite(&mut rewriter)?,
-            lit("a").eq(lit(false)),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_not_eq() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(col("c2").get_type(&schema)?, DataType::Boolean);
-
-        // c2 != true -> !c2
-        assert_eq!(
-            (col("c2").not_eq(lit(true))).rewrite(&mut rewriter)?,
-            col("c2").not(),
-        );
-
-        // c2 != false -> c2
-        assert_eq!(
-            (col("c2").not_eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c2"),
-        );
-
-        // test constant
-        assert_eq!(
-            (lit(true).not_eq(lit(true))).rewrite(&mut rewriter)?,
-            lit(false),
-        );
-
-        assert_eq!(
-            (lit(true).not_eq(lit(false))).rewrite(&mut rewriter)?,
-            lit(true),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_not_eq_skip_nonboolean_type() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        // when one of the operand is not of boolean type, folding the other boolean constant will
-        // change return type of expression to non-boolean.
-        assert_eq!(col("c1").get_type(&schema)?, DataType::Utf8);
-
-        assert_eq!(
-            (col("c1").not_eq(lit(true))).rewrite(&mut rewriter)?,
-            col("c1").not_eq(lit(true)),
-        );
-
-        assert_eq!(
-            (col("c1").not_eq(lit(false))).rewrite(&mut rewriter)?,
-            col("c1").not_eq(lit(false)),
-        );
-
-        // test constants
-        assert_eq!(
-            (lit(1).not_eq(lit(true))).rewrite(&mut rewriter)?,
-            lit(1).not_eq(lit(true)),
-        );
-
-        assert_eq!(
-            (lit("a").not_eq(lit(false))).rewrite(&mut rewriter)?,
-            lit("a").not_eq(lit(false)),
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_expr_case_when_then_else() -> Result<()> {
-        let schema = expr_test_schema();
-        let mut rewriter = ConstantRewriter {
-            schemas: vec![&schema],
-        };
-
-        assert_eq!(
-            (Box::new(Expr::Case {
-                expr: None,
-                when_then_expr: vec![(
-                    Box::new(col("c2").not_eq(lit(false))),
-                    Box::new(lit("ok").eq(lit(true))),
-                )],
-                else_expr: Some(Box::new(col("c2").eq(lit(true)))),
-            }))
-            .rewrite(&mut rewriter)?,
-            Expr::Case {
-                expr: None,
-                when_then_expr: vec![(
-                    Box::new(col("c2")),
-                    Box::new(lit("ok").eq(lit(true)))
-                )],
-                else_expr: Some(Box::new(col("c2"))),
-            }
-        );
-
-        Ok(())
-    }
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let rule = ConstantFolding::new();
-        let optimized_plan = rule.optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    #[test]
-    fn optimize_plan_eq_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").eq(lit(true)))?
-            .filter(col("c").eq(lit(false)))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: NOT #c\
-        \n    Filter: #b\
-        \n      TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_not_eq_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").not_eq(lit(true)))?
-            .filter(col("c").not_eq(lit(false)))?
-            .limit(1)?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Limit: 1\
-        \n    Filter: #c\
-        \n      Filter: NOT #b\
-        \n        TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_and_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").not_eq(lit(true)).and(col("c").eq(lit(true))))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: NOT #b And #c\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_or_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").not_eq(lit(true)).or(col("c").eq(lit(false))))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: NOT #b Or NOT #c\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_not_expr() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("b").eq(lit(false)).not())?
-            .project(vec![col("a")])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: #b\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_support_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("d"), col("b").eq(lit(false))])?
-            .build()?;
-
-        let expected = "\
-        Projection: #a, #d, NOT #b\
-        \n  TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn optimize_plan_support_aggregate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c"), col("b")])?
-            .aggregate(
-                vec![col("a"), col("c")],
-                vec![max(col("b").eq(lit(true))), min(col("b"))],
-            )?
-            .build()?;
-
-        let expected = "\
-        Aggregate: groupBy=[[#a, #c]], aggr=[[MAX(#b), MIN(#b)]]\
-        \n  Projection: #a, #c, #b\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/optimizer/filter_push_down.rs b/rust/datafusion/src/optimizer/filter_push_down.rs
deleted file mode 100644
index ec260a41dc5..00000000000
--- a/rust/datafusion/src/optimizer/filter_push_down.rs
+++ /dev/null
@@ -1,1021 +0,0 @@
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Filter Push Down optimizer rule ensures that filters are applied as early as possible in the plan
-
-use crate::datasource::datasource::TableProviderFilterPushDown;
-use crate::logical_plan::{and, LogicalPlan};
-use crate::logical_plan::{DFSchema, Expr};
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
-use crate::{error::Result, logical_plan::Operator};
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Arc,
-};
-
-/// Filter Push Down optimizer rule pushes filter clauses down the plan
-/// # Introduction
-/// A filter-commutative operation is an operation whose result of filter(op(data)) = op(filter(data)).
-/// An example of a filter-commutative operation is a projection; a counter-example is `limit`.
-///
-/// The filter-commutative property is column-specific. An aggregate grouped by A on SUM(B)
-/// can commute with a filter that depends on A only, but does not commute with a filter that depends
-/// on SUM(B).
-///
-/// This optimizer commutes filters with filter-commutative operations to push the filters
-/// the closest possible to the scans, re-writing the filter expressions by every
-/// projection that changes the filter's expression.
-///
-/// Filter: #b Gt Int64(10)
-///     Projection: #a AS b
-///
-/// is optimized to
-///
-/// Projection: #a AS b
-///     Filter: #a Gt Int64(10)  <--- changed from #b to #a
-///
-/// This performs a single pass trought the plan. When it passes trought a filter, it stores that filter,
-/// and when it reaches a node that does not commute with it, it adds the filter to that place.
-/// When it passes through a projection, it re-writes the filter's expression taking into accoun that projection.
-/// When multiple filters would have been written, it `AND` their expressions into a single expression.
-pub struct FilterPushDown {}
-
-#[derive(Debug, Clone, Default)]
-struct State {
-    // (predicate, columns on the predicate)
-    filters: Vec<(Expr, HashSet<String>)>,
-}
-
-type Predicates<'a> = (Vec<&'a Expr>, Vec<&'a HashSet<String>>);
-
-/// returns all predicates in `state` that depend on any of `used_columns`
-fn get_predicates<'a>(
-    state: &'a State,
-    used_columns: &HashSet<String>,
-) -> Predicates<'a> {
-    state
-        .filters
-        .iter()
-        .filter(|(_, columns)| {
-            !columns
-                .intersection(used_columns)
-                .collect::<HashSet<_>>()
-                .is_empty()
-        })
-        .map(|&(ref a, ref b)| (a, b))
-        .unzip()
-}
-
-// returns 3 (potentially overlaping) sets of predicates:
-// * pushable to left: its columns are all on the left
-// * pushable to right: its columns is all on the right
-// * keep: the set of columns is not in only either left or right
-// Note that a predicate can be both pushed to the left and to the right.
-fn get_join_predicates<'a>(
-    state: &'a State,
-    left: &DFSchema,
-    right: &DFSchema,
-) -> (
-    Vec<&'a HashSet<String>>,
-    Vec<&'a HashSet<String>>,
-    Predicates<'a>,
-) {
-    let left_columns = &left
-        .fields()
-        .iter()
-        .map(|f| f.name().clone())
-        .collect::<HashSet<_>>();
-    let right_columns = &right
-        .fields()
-        .iter()
-        .map(|f| f.name().clone())
-        .collect::<HashSet<_>>();
-
-    let filters = state
-        .filters
-        .iter()
-        .map(|(predicate, columns)| {
-            (
-                (predicate, columns),
-                (
-                    columns,
-                    left_columns.intersection(columns).collect::<HashSet<_>>(),
-                    right_columns.intersection(columns).collect::<HashSet<_>>(),
-                ),
-            )
-        })
-        .collect::<Vec<_>>();
-
-    let pushable_to_left = filters
-        .iter()
-        .filter(|(_, (columns, left, _))| left.len() == columns.len())
-        .map(|((_, b), _)| *b)
-        .collect();
-    let pushable_to_right = filters
-        .iter()
-        .filter(|(_, (columns, _, right))| right.len() == columns.len())
-        .map(|((_, b), _)| *b)
-        .collect();
-    let keep = filters
-        .iter()
-        .filter(|(_, (columns, left, right))| {
-            // predicates whose columns are not in only one side of the join need to remain
-            let all_in_left = left.len() == columns.len();
-            let all_in_right = right.len() == columns.len();
-            !all_in_left && !all_in_right
-        })
-        .map(|((ref a, ref b), _)| (a, b))
-        .unzip();
-    (pushable_to_left, pushable_to_right, keep)
-}
-
-/// Optimizes the plan
-fn push_down(state: &State, plan: &LogicalPlan) -> Result<LogicalPlan> {
-    let new_inputs = plan
-        .inputs()
-        .iter()
-        .map(|input| optimize(input, state.clone()))
-        .collect::<Result<Vec<_>>>()?;
-
-    let expr = plan.expressions();
-    utils::from_plan(&plan, &expr, &new_inputs)
-}
-
-/// returns a new [LogicalPlan] that wraps `plan` in a [LogicalPlan::Filter] with
-/// its predicate be all `predicates` ANDed.
-fn add_filter(plan: LogicalPlan, predicates: &[&Expr]) -> LogicalPlan {
-    // reduce filters to a single filter with an AND
-    let predicate = predicates
-        .iter()
-        .skip(1)
-        .fold(predicates[0].clone(), |acc, predicate| {
-            and(acc, (*predicate).to_owned())
-        });
-
-    LogicalPlan::Filter {
-        predicate,
-        input: Arc::new(plan),
-    }
-}
-
-// remove all filters from `filters` that are in `predicate_columns`
-fn remove_filters(
-    filters: &[(Expr, HashSet<String>)],
-    predicate_columns: &[&HashSet<String>],
-) -> Vec<(Expr, HashSet<String>)> {
-    filters
-        .iter()
-        .filter(|(_, columns)| !predicate_columns.contains(&columns))
-        .cloned()
-        .collect::<Vec<_>>()
-}
-
-// keeps all filters from `filters` that are in `predicate_columns`
-fn keep_filters(
-    filters: &[(Expr, HashSet<String>)],
-    predicate_columns: &[&HashSet<String>],
-) -> Vec<(Expr, HashSet<String>)> {
-    filters
-        .iter()
-        .filter(|(_, columns)| predicate_columns.contains(&columns))
-        .cloned()
-        .collect::<Vec<_>>()
-}
-
-/// builds a new [LogicalPlan] from `plan` by issuing new [LogicalPlan::Filter] if any of the filters
-/// in `state` depend on the columns `used_columns`.
-fn issue_filters(
-    mut state: State,
-    used_columns: HashSet<String>,
-    plan: &LogicalPlan,
-) -> Result<LogicalPlan> {
-    let (predicates, predicate_columns) = get_predicates(&state, &used_columns);
-
-    if predicates.is_empty() {
-        // all filters can be pushed down => optimize inputs and return new plan
-        return push_down(&state, plan);
-    }
-
-    let plan = add_filter(plan.clone(), &predicates);
-
-    state.filters = remove_filters(&state.filters, &predicate_columns);
-
-    // continue optimization over all input nodes by cloning the current state (i.e. each node is independent)
-    push_down(&state, &plan)
-}
-
-/// converts "A AND B AND C" => [A, B, C]
-fn split_members<'a>(predicate: &'a Expr, predicates: &mut Vec<&'a Expr>) {
-    match predicate {
-        Expr::BinaryExpr {
-            right,
-            op: Operator::And,
-            left,
-        } => {
-            split_members(&left, predicates);
-            split_members(&right, predicates);
-        }
-        other => predicates.push(other),
-    }
-}
-
-fn optimize(plan: &LogicalPlan, mut state: State) -> Result<LogicalPlan> {
-    match plan {
-        LogicalPlan::Filter { input, predicate } => {
-            let mut predicates = vec![];
-            split_members(predicate, &mut predicates);
-
-            predicates
-                .into_iter()
-                .try_for_each::<_, Result<()>>(|predicate| {
-                    let mut columns: HashSet<String> = HashSet::new();
-                    utils::expr_to_column_names(predicate, &mut columns)?;
-                    // collect the predicate
-                    state.filters.push((predicate.clone(), columns));
-                    Ok(())
-                })?;
-
-            optimize(input, state)
-        }
-        LogicalPlan::Projection {
-            input,
-            expr,
-            schema,
-        } => {
-            // A projection is filter-commutable, but re-writes all predicate expressions
-            // collect projection.
-            let mut projection = HashMap::new();
-            schema.fields().iter().enumerate().for_each(|(i, field)| {
-                // strip alias, as they should not be part of filters
-                let expr = match &expr[i] {
-                    Expr::Alias(expr, _) => expr.as_ref().clone(),
-                    expr => expr.clone(),
-                };
-
-                projection.insert(field.name().clone(), expr);
-            });
-
-            // re-write all filters based on this projection
-            // E.g. in `Filter: #b\n  Projection: #a > 1 as b`, we can swap them, but the filter must be "#a > 1"
-            for (predicate, columns) in state.filters.iter_mut() {
-                *predicate = rewrite(predicate, &projection)?;
-
-                columns.clear();
-                utils::expr_to_column_names(predicate, columns)?;
-            }
-
-            // optimize inner
-            let new_input = optimize(input, state)?;
-
-            utils::from_plan(&plan, &expr, &[new_input])
-        }
-        LogicalPlan::Aggregate {
-            input, aggr_expr, ..
-        } => {
-            // An aggregate's aggreagate columns are _not_ filter-commutable => collect these:
-            // * columns whose aggregation expression depends on
-            // * the aggregation columns themselves
-
-            // construct set of columns that `aggr_expr` depends on
-            let mut used_columns = HashSet::new();
-            utils::exprlist_to_column_names(aggr_expr, &mut used_columns)?;
-
-            let agg_columns = aggr_expr
-                .iter()
-                .map(|x| x.name(input.schema()))
-                .collect::<Result<HashSet<_>>>()?;
-            used_columns.extend(agg_columns);
-
-            issue_filters(state, used_columns, plan)
-        }
-        LogicalPlan::Sort { .. } => {
-            // sort is filter-commutable
-            push_down(&state, plan)
-        }
-        LogicalPlan::Limit { input, .. } => {
-            // limit is _not_ filter-commutable => collect all columns from its input
-            let used_columns = input
-                .schema()
-                .fields()
-                .iter()
-                .map(|f| f.name().clone())
-                .collect::<HashSet<_>>();
-            issue_filters(state, used_columns, plan)
-        }
-        LogicalPlan::Join { left, right, .. } => {
-            let (pushable_to_left, pushable_to_right, keep) =
-                get_join_predicates(&state, &left.schema(), &right.schema());
-
-            let mut left_state = state.clone();
-            left_state.filters = keep_filters(&left_state.filters, &pushable_to_left);
-            let left = optimize(left, left_state)?;
-
-            let mut right_state = state.clone();
-            right_state.filters = keep_filters(&right_state.filters, &pushable_to_right);
-            let right = optimize(right, right_state)?;
-
-            // create a new Join with the new `left` and `right`
-            let expr = plan.expressions();
-            let plan = utils::from_plan(&plan, &expr, &[left, right])?;
-
-            if keep.0.is_empty() {
-                Ok(plan)
-            } else {
-                // wrap the join on the filter whose predicates must be kept
-                let plan = add_filter(plan, &keep.0);
-                state.filters = remove_filters(&state.filters, &keep.1);
-
-                Ok(plan)
-            }
-        }
-        LogicalPlan::TableScan {
-            source,
-            projected_schema,
-            filters,
-            projection,
-            table_name,
-            limit,
-        } => {
-            let mut used_columns = HashSet::new();
-            let mut new_filters = filters.clone();
-
-            for (filter_expr, cols) in &state.filters {
-                let (preserve_filter_node, add_to_provider) =
-                    match source.supports_filter_pushdown(filter_expr)? {
-                        TableProviderFilterPushDown::Unsupported => (true, false),
-                        TableProviderFilterPushDown::Inexact => (true, true),
-                        TableProviderFilterPushDown::Exact => (false, true),
-                    };
-
-                if preserve_filter_node {
-                    used_columns.extend(cols.clone());
-                }
-
-                if add_to_provider {
-                    new_filters.push(filter_expr.clone());
-                }
-            }
-
-            issue_filters(
-                state,
-                used_columns,
-                &LogicalPlan::TableScan {
-                    source: source.clone(),
-                    projection: projection.clone(),
-                    projected_schema: projected_schema.clone(),
-                    table_name: table_name.clone(),
-                    filters: new_filters,
-                    limit: *limit,
-                },
-            )
-        }
-        _ => {
-            // all other plans are _not_ filter-commutable
-            let used_columns = plan
-                .schema()
-                .fields()
-                .iter()
-                .map(|f| f.name().clone())
-                .collect::<HashSet<_>>();
-            issue_filters(state, used_columns, plan)
-        }
-    }
-}
-
-impl OptimizerRule for FilterPushDown {
-    fn name(&self) -> &str {
-        "filter_push_down"
-    }
-
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        optimize(plan, State::default())
-    }
-}
-
-impl FilterPushDown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-/// replaces columns by its name on the projection.
-fn rewrite(expr: &Expr, projection: &HashMap<String, Expr>) -> Result<Expr> {
-    let expressions = utils::expr_sub_expressions(&expr)?;
-
-    let expressions = expressions
-        .iter()
-        .map(|e| rewrite(e, &projection))
-        .collect::<Result<Vec<_>>>()?;
-
-    if let Expr::Column(name) = expr {
-        if let Some(expr) = projection.get(name) {
-            return Ok(expr.clone());
-        }
-    }
-
-    utils::rewrite_expression(&expr, &expressions)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datasource::datasource::Statistics;
-    use crate::datasource::TableProvider;
-    use crate::logical_plan::{lit, sum, DFSchema, Expr, LogicalPlanBuilder, Operator};
-    use crate::physical_plan::ExecutionPlan;
-    use crate::test::*;
-    use crate::{logical_plan::col, prelude::JoinType};
-    use arrow::datatypes::SchemaRef;
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let rule = FilterPushDown::new();
-        let optimized_plan = rule.optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    #[test]
-    fn filter_before_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter is before projection
-        let expected = "\
-            Projection: #a, #b\
-            \n  Filter: #a Eq Int64(1)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_after_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .limit(10)?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter is before single projection
-        let expected = "\
-            Filter: #a Eq Int64(1)\
-            \n  Limit: 10\
-            \n    Projection: #a, #b\
-            \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_jump_2_plans() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b"), col("c")])?
-            .project(vec![col("c"), col("b")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter is before double projection
-        let expected = "\
-            Projection: #c, #b\
-            \n  Projection: #a, #b, #c\
-            \n    Filter: #a Eq Int64(1)\
-            \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_move_agg() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![sum(col("b")).alias("total_salary")])?
-            .filter(col("a").gt(lit(10i64)))?
-            .build()?;
-        // filter of key aggregation is commutative
-        let expected = "\
-            Aggregate: groupBy=[[#a]], aggr=[[SUM(#b) AS total_salary]]\
-            \n  Filter: #a Gt Int64(10)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_keep_agg() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])?
-            .filter(col("b").gt(lit(10i64)))?
-            .build()?;
-        // filter of aggregate is after aggregation since they are non-commutative
-        let expected = "\
-            Filter: #b Gt Int64(10)\
-            \n  Aggregate: groupBy=[[#a]], aggr=[[SUM(#b) AS b]]\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that a filter is pushed to before a projection, the filter expression is correctly re-written
-    #[test]
-    fn alias() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .filter(col("b").eq(lit(1i64)))?
-            .build()?;
-        // filter is before projection
-        let expected = "\
-            Projection: #a AS b, #c\
-            \n  Filter: #a Eq Int64(1)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    fn add(left: Expr, right: Expr) -> Expr {
-        Expr::BinaryExpr {
-            left: Box::new(left),
-            op: Operator::Plus,
-            right: Box::new(right),
-        }
-    }
-
-    fn multiply(left: Expr, right: Expr) -> Expr {
-        Expr::BinaryExpr {
-            left: Box::new(left),
-            op: Operator::Multiply,
-            right: Box::new(right),
-        }
-    }
-
-    /// verifies that a filter is pushed to before a projection with a complex expression, the filter expression is correctly re-written
-    #[test]
-    fn complex_expression() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![
-                add(multiply(col("a"), lit(2)), col("c")).alias("b"),
-                col("c"),
-            ])?
-            .filter(col("b").eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #b Eq Int64(1)\
-            \n  Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-            \n    TableScan: test projection=None"
-        );
-
-        // filter is before projection
-        let expected = "\
-            Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-            \n  Filter: #a Multiply Int32(2) Plus #c Eq Int64(1)\
-            \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that when a filter is pushed to after 2 projections, the filter expression is correctly re-written
-    #[test]
-    fn complex_plan() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![
-                add(multiply(col("a"), lit(2)), col("c")).alias("b"),
-                col("c"),
-            ])?
-            // second projection where we rename columns, just to make it difficult
-            .project(vec![multiply(col("b"), lit(3)).alias("a"), col("c")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #a Eq Int64(1)\
-            \n  Projection: #b Multiply Int32(3) AS a, #c\
-            \n    Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-            \n      TableScan: test projection=None"
-        );
-
-        // filter is before the projections
-        let expected = "\
-        Projection: #b Multiply Int32(3) AS a, #c\
-        \n  Projection: #a Multiply Int32(2) Plus #c AS b, #c\
-        \n    Filter: #a Multiply Int32(2) Plus #c Multiply Int32(3) Eq Int64(1)\
-        \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that when two filters apply after an aggregation that only allows one to be pushed, one is pushed
-    /// and the other not.
-    #[test]
-    fn multi_filter() -> Result<()> {
-        // the aggregation allows one filter to pass (b), and the other one to not pass (SUM(c))
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .aggregate(vec![col("b")], vec![sum(col("c"))])?
-            .filter(col("b").gt(lit(10i64)))?
-            .filter(col("SUM(c)").gt(lit(10i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #SUM(c) Gt Int64(10)\
-            \n  Filter: #b Gt Int64(10)\
-            \n    Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-            \n      Projection: #a AS b, #c\
-            \n        TableScan: test projection=None"
-        );
-
-        // filter is before the projections
-        let expected = "\
-        Filter: #SUM(c) Gt Int64(10)\
-        \n  Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-        \n    Projection: #a AS b, #c\
-        \n      Filter: #a Gt Int64(10)\
-        \n        TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// verifies that when a filter with two predicates is applied after an aggregation that only allows one to be pushed, one is pushed
-    /// and the other not.
-    #[test]
-    fn split_filter() -> Result<()> {
-        // the aggregation allows one filter to pass (b), and the other one to not pass (SUM(c))
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a").alias("b"), col("c")])?
-            .aggregate(vec![col("b")], vec![sum(col("c"))])?
-            .filter(and(
-                col("SUM(c)").gt(lit(10i64)),
-                and(col("b").gt(lit(10i64)), col("SUM(c)").lt(lit(20i64))),
-            ))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #SUM(c) Gt Int64(10) And #b Gt Int64(10) And #SUM(c) Lt Int64(20)\
-            \n  Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-            \n    Projection: #a AS b, #c\
-            \n      TableScan: test projection=None"
-        );
-
-        // filter is before the projections
-        let expected = "\
-        Filter: #SUM(c) Gt Int64(10) And #SUM(c) Lt Int64(20)\
-        \n  Aggregate: groupBy=[[#b]], aggr=[[SUM(#c)]]\
-        \n    Projection: #a AS b, #c\
-        \n      Filter: #a Gt Int64(10)\
-        \n        TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// verifies that when two limits are in place, we jump neither
-    #[test]
-    fn double_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .limit(20)?
-            .limit(10)?
-            .project(vec![col("a"), col("b")])?
-            .filter(col("a").eq(lit(1i64)))?
-            .build()?;
-        // filter does not just any of the limits
-        let expected = "\
-            Projection: #a, #b\
-            \n  Filter: #a Eq Int64(1)\
-            \n    Limit: 10\
-            \n      Limit: 20\
-            \n        Projection: #a, #b\
-            \n          TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that filters with the same columns are correctly placed
-    #[test]
-    fn filter_2_breaks_limits() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .limit(1)?
-            .project(vec![col("a")])?
-            .filter(col("a").gt_eq(lit(1i64)))?
-            .build()?;
-        // Should be able to move both filters below the projections
-
-        // not part of the test
-        assert_eq!(
-            format!("{:?}", plan),
-            "Filter: #a GtEq Int64(1)\
-             \n  Projection: #a\
-             \n    Limit: 1\
-             \n      Filter: #a LtEq Int64(1)\
-             \n        Projection: #a\
-             \n          TableScan: test projection=None"
-        );
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: #a GtEq Int64(1)\
-        \n    Limit: 1\
-        \n      Projection: #a\
-        \n        Filter: #a LtEq Int64(1)\
-        \n          TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that filters to be placed on the same depth are ANDed
-    #[test]
-    fn two_filters_on_same_depth() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .limit(1)?
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .filter(col("a").gt_eq(lit(1i64)))?
-            .project(vec![col("a")])?
-            .build()?;
-
-        // not part of the test
-        assert_eq!(
-            format!("{:?}", plan),
-            "Projection: #a\
-            \n  Filter: #a GtEq Int64(1)\
-            \n    Filter: #a LtEq Int64(1)\
-            \n      Limit: 1\
-            \n        TableScan: test projection=None"
-        );
-
-        let expected = "\
-        Projection: #a\
-        \n  Filter: #a GtEq Int64(1) And #a LtEq Int64(1)\
-        \n    Limit: 1\
-        \n      TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// verifies that filters on a plan with user nodes are not lost
-    /// (ARROW-10547)
-    #[test]
-    fn filters_user_defined_node() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .build()?;
-
-        let plan = crate::test::user_defined::new(plan);
-
-        let expected = "\
-            TestUserDefined\
-             \n  Filter: #a LtEq Int64(1)\
-             \n    TableScan: test projection=None";
-
-        // not part of the test
-        assert_eq!(format!("{:?}", plan), expected);
-
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// post-join predicates on a column common to both sides is pushed to both sides
-    #[test]
-    fn filter_join_on_common_independent() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(&table_scan).build()?;
-        let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
-            .build()?;
-        let plan = LogicalPlanBuilder::from(&left)
-            .join(&right, JoinType::Inner, &["a"], &["a"])?
-            .filter(col("a").lt_eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #a LtEq Int64(1)\
-            \n  Join: a = a\
-            \n    TableScan: test projection=None\
-            \n    Projection: #a\
-            \n      TableScan: test projection=None"
-        );
-
-        // filter sent to side before the join
-        let expected = "\
-        Join: a = a\
-        \n  Filter: #a LtEq Int64(1)\
-        \n    TableScan: test projection=None\
-        \n  Projection: #a\
-        \n    Filter: #a LtEq Int64(1)\
-        \n      TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// post-join predicates with columns from both sides are not pushed
-    #[test]
-    fn filter_join_on_common_dependent() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c")])?
-            .build()?;
-        let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .build()?;
-        let plan = LogicalPlanBuilder::from(&left)
-            .join(&right, JoinType::Inner, &["a"], &["a"])?
-            // "b" and "c" are not shared by either side: they are only available together after the join
-            .filter(col("c").lt_eq(col("b")))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #c LtEq #b\
-            \n  Join: a = a\
-            \n    Projection: #a, #c\
-            \n      TableScan: test projection=None\
-            \n    Projection: #a, #b\
-            \n      TableScan: test projection=None"
-        );
-
-        // expected is equal: no push-down
-        let expected = &format!("{:?}", plan);
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// post-join predicates with columns from one side of a join are pushed only to that side
-    #[test]
-    fn filter_join_on_one_side() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let left = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .build()?;
-        let right = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("c")])?
-            .build()?;
-        let plan = LogicalPlanBuilder::from(&left)
-            .join(&right, JoinType::Inner, &["a"], &["a"])?
-            .filter(col("b").lt_eq(lit(1i64)))?
-            .build()?;
-
-        // not part of the test, just good to know:
-        assert_eq!(
-            format!("{:?}", plan),
-            "\
-            Filter: #b LtEq Int64(1)\
-            \n  Join: a = a\
-            \n    Projection: #a, #b\
-            \n      TableScan: test projection=None\
-            \n    Projection: #a, #c\
-            \n      TableScan: test projection=None"
-        );
-
-        let expected = "\
-        Join: a = a\
-        \n  Projection: #a, #b\
-        \n    Filter: #b LtEq Int64(1)\
-        \n      TableScan: test projection=None\
-        \n  Projection: #a, #c\
-        \n    TableScan: test projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    struct PushDownProvider {
-        pub filter_support: TableProviderFilterPushDown,
-    }
-
-    impl TableProvider for PushDownProvider {
-        fn schema(&self) -> SchemaRef {
-            Arc::new(arrow::datatypes::Schema::new(vec![
-                arrow::datatypes::Field::new(
-                    "a",
-                    arrow::datatypes::DataType::Int32,
-                    true,
-                ),
-            ]))
-        }
-
-        fn scan(
-            &self,
-            _: &Option<Vec<usize>>,
-            _: usize,
-            _: &[Expr],
-            _: Option<usize>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!()
-        }
-
-        fn supports_filter_pushdown(
-            &self,
-            _: &Expr,
-        ) -> Result<TableProviderFilterPushDown> {
-            Ok(self.filter_support.clone())
-        }
-
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        fn statistics(&self) -> Statistics {
-            Statistics::default()
-        }
-    }
-
-    fn table_scan_with_pushdown_provider(
-        filter_support: TableProviderFilterPushDown,
-    ) -> Result<LogicalPlan> {
-        let test_provider = PushDownProvider { filter_support };
-
-        let table_scan = LogicalPlan::TableScan {
-            table_name: "".into(),
-            filters: vec![],
-            projected_schema: Arc::new(DFSchema::try_from_qualified(
-                "",
-                &*test_provider.schema(),
-            )?),
-            projection: None,
-            source: Arc::new(test_provider),
-            limit: None,
-        };
-
-        LogicalPlanBuilder::from(&table_scan)
-            .filter(col("a").eq(lit(1i64)))?
-            .build()
-    }
-
-    #[test]
-    fn filter_with_table_provider_exact() -> Result<()> {
-        let plan = table_scan_with_pushdown_provider(TableProviderFilterPushDown::Exact)?;
-
-        let expected = "\
-        TableScan: projection=None, filters=[#a Eq Int64(1)]";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_with_table_provider_inexact() -> Result<()> {
-        let plan =
-            table_scan_with_pushdown_provider(TableProviderFilterPushDown::Inexact)?;
-
-        let expected = "\
-        Filter: #a Eq Int64(1)\
-        \n  TableScan: projection=None, filters=[#a Eq Int64(1)]";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn filter_with_table_provider_unsupported() -> Result<()> {
-        let plan =
-            table_scan_with_pushdown_provider(TableProviderFilterPushDown::Unsupported)?;
-
-        let expected = "\
-        Filter: #a Eq Int64(1)\
-        \n  TableScan: projection=None";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/optimizer/hash_build_probe_order.rs b/rust/datafusion/src/optimizer/hash_build_probe_order.rs
deleted file mode 100644
index f44050f0b72..00000000000
--- a/rust/datafusion/src/optimizer/hash_build_probe_order.rs
+++ /dev/null
@@ -1,257 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License
-
-//! Optimizer rule to switch build and probe order of hash join
-//! based on statistics of a `TableProvider`. If the number of
-//! rows of both sources is known, the order can be switched
-//! for a faster hash join.
-
-use std::sync::Arc;
-
-use crate::logical_plan::LogicalPlan;
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::{error::Result, prelude::JoinType};
-
-use super::utils;
-
-/// BuildProbeOrder reorders the build and probe phase of
-/// hash joins. This uses the amount of rows that a datasource has.
-/// The rule optimizes the order such that the left (build) side of the join
-/// is the smallest.
-/// If the information is not available, the order stays the same,
-/// so that it could be optimized manually in a query.
-pub struct HashBuildProbeOrder {}
-
-// Gets exact number of rows, if known by the statistics of the underlying
-fn get_num_rows(logical_plan: &LogicalPlan) -> Option<usize> {
-    match logical_plan {
-        LogicalPlan::TableScan { source, .. } => source.statistics().num_rows,
-        LogicalPlan::EmptyRelation {
-            produce_one_row, ..
-        } => {
-            if *produce_one_row {
-                Some(1)
-            } else {
-                Some(0)
-            }
-        }
-        LogicalPlan::Limit { n: limit, input } => {
-            let num_rows_input = get_num_rows(input);
-            num_rows_input.map(|rows| std::cmp::min(*limit, rows))
-        }
-        LogicalPlan::Aggregate { .. } => {
-            // we cannot yet predict how many rows will be produced by an aggregate because
-            // we do not know the cardinality of the grouping keys
-            None
-        }
-        LogicalPlan::Filter { .. } => {
-            // we cannot yet predict how many rows will be produced by a filter because
-            // we don't know how selective it is (how many rows it will filter out)
-            None
-        }
-        LogicalPlan::Join { .. } => {
-            // we cannot predict the cardinality of the join output
-            None
-        }
-        LogicalPlan::Repartition { .. } => {
-            // we cannot predict how rows will be repartitioned
-            None
-        }
-        // the following operators are special cases and not querying data
-        LogicalPlan::CreateExternalTable { .. } => None,
-        LogicalPlan::Explain { .. } => None,
-        // we do not support estimating rows with extensions yet
-        LogicalPlan::Extension { .. } => None,
-        // the following operators do not modify row count in any way
-        LogicalPlan::Projection { input, .. } => get_num_rows(input),
-        LogicalPlan::Sort { input, .. } => get_num_rows(input),
-        // Add number of rows of below plans
-        LogicalPlan::Union { inputs, .. } => {
-            inputs.iter().map(|plan| get_num_rows(plan)).sum()
-        }
-    }
-}
-
-// Finds out whether to swap left vs right order based on statistics
-fn should_swap_join_order(left: &LogicalPlan, right: &LogicalPlan) -> bool {
-    let left_rows = get_num_rows(left);
-    let right_rows = get_num_rows(right);
-
-    match (left_rows, right_rows) {
-        (Some(l), Some(r)) => l > r,
-        _ => false,
-    }
-}
-
-impl OptimizerRule for HashBuildProbeOrder {
-    fn name(&self) -> &str {
-        "hash_build_probe_order"
-    }
-
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        match plan {
-            // Main optimization rule, swaps order of left and right
-            // based on number of rows in each table
-            LogicalPlan::Join {
-                left,
-                right,
-                on,
-                join_type,
-                schema,
-            } => {
-                let left = self.optimize(left)?;
-                let right = self.optimize(right)?;
-                if should_swap_join_order(&left, &right) {
-                    // Swap left and right, change join type and (equi-)join key order
-                    Ok(LogicalPlan::Join {
-                        left: Arc::new(right),
-                        right: Arc::new(left),
-                        on: on
-                            .iter()
-                            .map(|(l, r)| (r.to_string(), l.to_string()))
-                            .collect(),
-                        join_type: swap_join_type(*join_type),
-                        schema: schema.clone(),
-                    })
-                } else {
-                    // Keep join as is
-                    Ok(LogicalPlan::Join {
-                        left: Arc::new(left),
-                        right: Arc::new(right),
-                        on: on.clone(),
-                        join_type: *join_type,
-                        schema: schema.clone(),
-                    })
-                }
-            }
-            // Rest: recurse into plan, apply optimization where possible
-            LogicalPlan::Projection { .. }
-            | LogicalPlan::Aggregate { .. }
-            | LogicalPlan::TableScan { .. }
-            | LogicalPlan::Limit { .. }
-            | LogicalPlan::Filter { .. }
-            | LogicalPlan::Repartition { .. }
-            | LogicalPlan::EmptyRelation { .. }
-            | LogicalPlan::Sort { .. }
-            | LogicalPlan::CreateExternalTable { .. }
-            | LogicalPlan::Explain { .. }
-            | LogicalPlan::Union { .. }
-            | LogicalPlan::Extension { .. } => {
-                let expr = plan.expressions();
-
-                // apply the optimization to all inputs of the plan
-                let inputs = plan.inputs();
-                let new_inputs = inputs
-                    .iter()
-                    .map(|plan| self.optimize(plan))
-                    .collect::<Result<Vec<_>>>()?;
-
-                utils::from_plan(plan, &expr, &new_inputs)
-            }
-        }
-    }
-}
-
-impl HashBuildProbeOrder {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn swap_join_type(join_type: JoinType) -> JoinType {
-    match join_type {
-        JoinType::Inner => JoinType::Inner,
-        JoinType::Left => JoinType::Right,
-        JoinType::Right => JoinType::Left,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::sync::Arc;
-
-    use crate::{
-        datasource::{datasource::Statistics, TableProvider},
-        logical_plan::{DFSchema, Expr},
-        test::*,
-    };
-
-    struct TestTableProvider {
-        num_rows: usize,
-    }
-
-    impl TableProvider for TestTableProvider {
-        fn as_any(&self) -> &dyn std::any::Any {
-            unimplemented!()
-        }
-        fn schema(&self) -> arrow::datatypes::SchemaRef {
-            unimplemented!()
-        }
-
-        fn scan(
-            &self,
-            _projection: &Option<Vec<usize>>,
-            _batch_size: usize,
-            _filters: &[Expr],
-            _limit: Option<usize>,
-        ) -> Result<std::sync::Arc<dyn crate::physical_plan::ExecutionPlan>> {
-            unimplemented!()
-        }
-        fn statistics(&self) -> crate::datasource::datasource::Statistics {
-            Statistics {
-                num_rows: Some(self.num_rows),
-                total_byte_size: None,
-                column_statistics: None,
-            }
-        }
-    }
-
-    #[test]
-    fn test_num_rows() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        assert_eq!(get_num_rows(&table_scan), Some(0));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_swap_order() {
-        let lp_left = LogicalPlan::TableScan {
-            table_name: "left".to_string(),
-            projection: None,
-            source: Arc::new(TestTableProvider { num_rows: 1000 }),
-            projected_schema: Arc::new(DFSchema::empty()),
-            filters: vec![],
-            limit: None,
-        };
-
-        let lp_right = LogicalPlan::TableScan {
-            table_name: "right".to_string(),
-            projection: None,
-            source: Arc::new(TestTableProvider { num_rows: 100 }),
-            projected_schema: Arc::new(DFSchema::empty()),
-            filters: vec![],
-            limit: None,
-        };
-
-        assert!(should_swap_join_order(&lp_left, &lp_right));
-        assert!(!should_swap_join_order(&lp_right, &lp_left));
-    }
-}
diff --git a/rust/datafusion/src/optimizer/limit_push_down.rs b/rust/datafusion/src/optimizer/limit_push_down.rs
deleted file mode 100644
index 73a231f2248..00000000000
--- a/rust/datafusion/src/optimizer/limit_push_down.rs
+++ /dev/null
@@ -1,252 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Optimizer rule to push down LIMIT in the query plan
-//! It will push down through projection, limits (taking the smaller limit)
-use std::sync::Arc;
-
-use super::utils;
-use crate::error::Result;
-use crate::logical_plan::LogicalPlan;
-use crate::optimizer::optimizer::OptimizerRule;
-
-/// Optimization rule that tries pushes down LIMIT n
-/// where applicable to reduce the amount of scanned / processed data
-pub struct LimitPushDown {}
-
-impl LimitPushDown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn limit_push_down(
-    upper_limit: Option<usize>,
-    plan: &LogicalPlan,
-) -> Result<LogicalPlan> {
-    match (plan, upper_limit) {
-        (LogicalPlan::Limit { n, input }, upper_limit) => {
-            let smallest = upper_limit.map(|x| std::cmp::min(x, *n)).unwrap_or(*n);
-            Ok(LogicalPlan::Limit {
-                n: smallest,
-                // push down limit to plan (minimum of upper limit and current limit)
-                input: Arc::new(limit_push_down(Some(smallest), input.as_ref())?),
-            })
-        }
-        (
-            LogicalPlan::TableScan {
-                table_name,
-                source,
-                projection,
-                filters,
-                limit,
-                projected_schema,
-            },
-            Some(upper_limit),
-        ) => Ok(LogicalPlan::TableScan {
-            table_name: table_name.clone(),
-            source: source.clone(),
-            projection: projection.clone(),
-            filters: filters.clone(),
-            limit: limit
-                .map(|x| std::cmp::min(x, upper_limit))
-                .or(Some(upper_limit)),
-            projected_schema: projected_schema.clone(),
-        }),
-        (
-            LogicalPlan::Projection {
-                expr,
-                input,
-                schema,
-            },
-            upper_limit,
-        ) => {
-            // Push down limit directly (projection doesn't change number of rows)
-            Ok(LogicalPlan::Projection {
-                expr: expr.clone(),
-                input: Arc::new(limit_push_down(upper_limit, input.as_ref())?),
-                schema: schema.clone(),
-            })
-        }
-        (
-            LogicalPlan::Union {
-                inputs,
-                alias,
-                schema,
-            },
-            Some(upper_limit),
-        ) => {
-            // Push down limit through UNION
-            let new_inputs = inputs
-                .iter()
-                .map(|x| {
-                    Ok(LogicalPlan::Limit {
-                        n: upper_limit,
-                        input: Arc::new(limit_push_down(Some(upper_limit), x)?),
-                    })
-                })
-                .collect::<Result<_>>()?;
-            Ok(LogicalPlan::Union {
-                inputs: new_inputs,
-                alias: alias.clone(),
-                schema: schema.clone(),
-            })
-        }
-        // For other nodes we can't push down the limit
-        // But try to recurse and find other limit nodes to push down
-        _ => {
-            let expr = plan.expressions();
-
-            // apply the optimization to all inputs of the plan
-            let inputs = plan.inputs();
-            let new_inputs = inputs
-                .iter()
-                .map(|plan| limit_push_down(None, plan))
-                .collect::<Result<Vec<_>>>()?;
-
-            utils::from_plan(plan, &expr, &new_inputs)
-        }
-    }
-}
-
-impl OptimizerRule for LimitPushDown {
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        limit_push_down(None, plan)
-    }
-
-    fn name(&self) -> &str {
-        "limit_push_down"
-    }
-}
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::{
-        logical_plan::{col, max, LogicalPlan, LogicalPlanBuilder},
-        test::*,
-    };
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let rule = LimitPushDown::new();
-        let optimized_plan = rule.optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    #[test]
-    fn limit_pushdown_projection_table_provider() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a")])?
-            .limit(1000)?
-            .build()?;
-
-        // Should push the limit down to table provider
-        // When it has a select
-        let expected = "Limit: 1000\
-        \n  Projection: #a\
-        \n    TableScan: test projection=None, limit=1000";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-    #[test]
-    fn limit_push_down_take_smaller_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .limit(1000)?
-            .limit(10)?
-            .build()?;
-
-        // Should push down the smallest limit
-        // Towards table scan
-        // This rule doesn't replace multiple limits
-        let expected = "Limit: 10\
-        \n  Limit: 10\
-        \n    TableScan: test projection=None, limit=10";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn limit_doesnt_push_down_aggregation() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a")], vec![max(col("b"))])?
-            .limit(1000)?
-            .build()?;
-
-        // Limit should *not* push down aggregate node
-        let expected = "Limit: 1000\
-        \n  Aggregate: groupBy=[[#a]], aggr=[[MAX(#b)]]\
-        \n    TableScan: test projection=None";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn limit_should_push_down_union() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .union(LogicalPlanBuilder::from(&table_scan).build()?)?
-            .limit(1000)?
-            .build()?;
-
-        // Limit should push down through union
-        let expected = "Limit: 1000\
-        \n  Union\
-        \n    Limit: 1000\
-        \n      TableScan: test projection=None, limit=1000\
-        \n    Limit: 1000\
-        \n      TableScan: test projection=None, limit=1000";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn multi_stage_limit_recurses_to_deeper_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .limit(1000)?
-            .aggregate(vec![col("a")], vec![max(col("b"))])?
-            .limit(10)?
-            .build()?;
-
-        // Limit should use deeper LIMIT 1000, but Limit 10 shouldn't push down aggregation
-        let expected = "Limit: 10\
-        \n  Aggregate: groupBy=[[#a]], aggr=[[MAX(#b)]]\
-        \n    Limit: 1000\
-        \n      TableScan: test projection=None, limit=1000";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/optimizer/mod.rs b/rust/datafusion/src/optimizer/mod.rs
deleted file mode 100644
index dc59b64ff46..00000000000
--- a/rust/datafusion/src/optimizer/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains a query optimizer that operates against a logical plan and applies
-//! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion".
-
-pub mod constant_folding;
-pub mod filter_push_down;
-pub mod hash_build_probe_order;
-pub mod limit_push_down;
-pub mod optimizer;
-pub mod projection_push_down;
-pub mod utils;
diff --git a/rust/datafusion/src/optimizer/optimizer.rs b/rust/datafusion/src/optimizer/optimizer.rs
deleted file mode 100644
index dee8e06a5e3..00000000000
--- a/rust/datafusion/src/optimizer/optimizer.rs
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Query optimizer traits
-
-use crate::error::Result;
-use crate::logical_plan::LogicalPlan;
-
-/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
-/// computes the same results, but in a potentially more efficient
-/// way.
-pub trait OptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-}
diff --git a/rust/datafusion/src/optimizer/projection_push_down.rs b/rust/datafusion/src/optimizer/projection_push_down.rs
deleted file mode 100644
index 6b1cdfe18ca..00000000000
--- a/rust/datafusion/src/optimizer/projection_push_down.rs
+++ /dev/null
@@ -1,542 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Projection Push Down optimizer rule ensures that only referenced columns are
-//! loaded into memory
-
-use crate::error::Result;
-use crate::logical_plan::{DFField, DFSchema, DFSchemaRef, LogicalPlan, ToDFSchema};
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
-use arrow::datatypes::Schema;
-use arrow::error::Result as ArrowResult;
-use std::{collections::HashSet, sync::Arc};
-use utils::optimize_explain;
-
-/// Optimizer that removes unused projections and aggregations from plans
-/// This reduces both scans and
-pub struct ProjectionPushDown {}
-
-impl OptimizerRule for ProjectionPushDown {
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        // set of all columns refered by the plan (and thus considered required by the root)
-        let required_columns = plan
-            .schema()
-            .fields()
-            .iter()
-            .map(|f| f.name().clone())
-            .collect::<HashSet<String>>();
-        optimize_plan(self, plan, &required_columns, false)
-    }
-
-    fn name(&self) -> &str {
-        "projection_push_down"
-    }
-}
-
-impl ProjectionPushDown {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn get_projected_schema(
-    schema: &Schema,
-    required_columns: &HashSet<String>,
-    has_projection: bool,
-) -> Result<(Vec<usize>, DFSchemaRef)> {
-    // once we reach the table scan, we can use the accumulated set of column
-    // names to construct the set of column indexes in the scan
-    //
-    // we discard non-existing columns because some column names are not part of the schema,
-    // e.g. when the column derives from an aggregation
-    let mut projection: Vec<usize> = required_columns
-        .iter()
-        .map(|name| schema.index_of(name))
-        .filter_map(ArrowResult::ok)
-        .collect();
-
-    if projection.is_empty() {
-        if has_projection {
-            // Ensure that we are reading at least one column from the table in case the query
-            // does not reference any columns directly such as "SELECT COUNT(1) FROM table"
-            projection.push(0);
-        } else {
-            // for table scan without projection, we default to return all columns
-            projection = schema
-                .fields()
-                .iter()
-                .enumerate()
-                .map(|(i, _)| i)
-                .collect::<Vec<usize>>();
-        }
-    }
-
-    // sort the projection otherwise we get non-deterministic behavior
-    projection.sort_unstable();
-
-    // create the projected schema
-    let mut projected_fields: Vec<DFField> = Vec::with_capacity(projection.len());
-    for i in &projection {
-        projected_fields.push(DFField::from(schema.fields()[*i].clone()));
-    }
-
-    Ok((projection, projected_fields.to_dfschema_ref()?))
-}
-
-/// Recursively transverses the logical plan removing expressions and that are not needed.
-fn optimize_plan(
-    optimizer: &ProjectionPushDown,
-    plan: &LogicalPlan,
-    required_columns: &HashSet<String>, // set of columns required up to this step
-    has_projection: bool,
-) -> Result<LogicalPlan> {
-    let mut new_required_columns = required_columns.clone();
-    match plan {
-        LogicalPlan::Projection {
-            input,
-            expr,
-            schema,
-        } => {
-            // projection:
-            // * remove any expression that is not required
-            // * construct the new set of required columns
-
-            let mut new_expr = Vec::new();
-            let mut new_fields = Vec::new();
-
-            // Gather all columns needed for expressions in this Projection
-            schema
-                .fields()
-                .iter()
-                .enumerate()
-                .try_for_each(|(i, field)| {
-                    if required_columns.contains(field.name()) {
-                        new_expr.push(expr[i].clone());
-                        new_fields.push(field.clone());
-
-                        // gather the new set of required columns
-                        utils::expr_to_column_names(&expr[i], &mut new_required_columns)
-                    } else {
-                        Ok(())
-                    }
-                })?;
-
-            let new_input =
-                optimize_plan(optimizer, &input, &new_required_columns, true)?;
-            if new_fields.is_empty() {
-                // no need for an expression at all
-                Ok(new_input)
-            } else {
-                Ok(LogicalPlan::Projection {
-                    expr: new_expr,
-                    input: Arc::new(new_input),
-                    schema: DFSchemaRef::new(DFSchema::new(new_fields)?),
-                })
-            }
-        }
-        LogicalPlan::Join {
-            left,
-            right,
-            on,
-            join_type,
-            schema,
-        } => {
-            for (l, r) in on {
-                new_required_columns.insert(l.to_owned());
-                new_required_columns.insert(r.to_owned());
-            }
-            Ok(LogicalPlan::Join {
-                left: Arc::new(optimize_plan(
-                    optimizer,
-                    &left,
-                    &new_required_columns,
-                    true,
-                )?),
-                right: Arc::new(optimize_plan(
-                    optimizer,
-                    &right,
-                    &new_required_columns,
-                    true,
-                )?),
-
-                join_type: *join_type,
-                on: on.clone(),
-                schema: schema.clone(),
-            })
-        }
-        LogicalPlan::Aggregate {
-            schema,
-            input,
-            group_expr,
-            aggr_expr,
-            ..
-        } => {
-            // aggregate:
-            // * remove any aggregate expression that is not required
-            // * construct the new set of required columns
-
-            utils::exprlist_to_column_names(group_expr, &mut new_required_columns)?;
-
-            // Gather all columns needed for expressions in this Aggregate
-            let mut new_aggr_expr = Vec::new();
-            aggr_expr.iter().try_for_each(|expr| {
-                let name = &expr.name(&schema)?;
-
-                if required_columns.contains(name) {
-                    new_aggr_expr.push(expr.clone());
-                    new_required_columns.insert(name.clone());
-
-                    // add to the new set of required columns
-                    utils::expr_to_column_names(expr, &mut new_required_columns)
-                } else {
-                    Ok(())
-                }
-            })?;
-
-            let new_schema = DFSchema::new(
-                schema
-                    .fields()
-                    .iter()
-                    .filter(|x| new_required_columns.contains(x.name()))
-                    .cloned()
-                    .collect(),
-            )?;
-
-            Ok(LogicalPlan::Aggregate {
-                group_expr: group_expr.clone(),
-                aggr_expr: new_aggr_expr,
-                input: Arc::new(optimize_plan(
-                    optimizer,
-                    &input,
-                    &new_required_columns,
-                    true,
-                )?),
-                schema: DFSchemaRef::new(new_schema),
-            })
-        }
-        // scans:
-        // * remove un-used columns from the scan projection
-        LogicalPlan::TableScan {
-            table_name,
-            source,
-            filters,
-            limit,
-            ..
-        } => {
-            let (projection, projected_schema) =
-                get_projected_schema(&source.schema(), required_columns, has_projection)?;
-
-            // return the table scan with projection
-            Ok(LogicalPlan::TableScan {
-                table_name: table_name.to_string(),
-                source: source.clone(),
-                projection: Some(projection),
-                projected_schema,
-                filters: filters.clone(),
-                limit: *limit,
-            })
-        }
-        LogicalPlan::Explain {
-            verbose,
-            plan,
-            stringified_plans,
-            schema,
-        } => {
-            let schema = schema.as_ref().to_owned().into();
-            optimize_explain(optimizer, *verbose, &*plan, stringified_plans, &schema)
-        }
-        // all other nodes: Add any additional columns used by
-        // expressions in this node to the list of required columns
-        LogicalPlan::Limit { .. }
-        | LogicalPlan::Filter { .. }
-        | LogicalPlan::Repartition { .. }
-        | LogicalPlan::EmptyRelation { .. }
-        | LogicalPlan::Sort { .. }
-        | LogicalPlan::CreateExternalTable { .. }
-        | LogicalPlan::Union { .. }
-        | LogicalPlan::Extension { .. } => {
-            let expr = plan.expressions();
-            // collect all required columns by this plan
-            utils::exprlist_to_column_names(&expr, &mut new_required_columns)?;
-
-            // apply the optimization to all inputs of the plan
-            let inputs = plan.inputs();
-            let new_inputs = inputs
-                .iter()
-                .map(|plan| {
-                    optimize_plan(optimizer, plan, &new_required_columns, has_projection)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            utils::from_plan(plan, &expr, &new_inputs)
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::logical_plan::{col, lit};
-    use crate::logical_plan::{max, min, Expr, LogicalPlanBuilder};
-    use crate::test::*;
-    use arrow::datatypes::DataType;
-
-    #[test]
-    fn aggregate_no_group_by() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![], vec![max(col("b"))])?
-            .build()?;
-
-        let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
-        \n  TableScan: test projection=Some([1])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate_group_by() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("c")], vec![max(col("b"))])?
-            .build()?;
-
-        let expected = "Aggregate: groupBy=[[#c]], aggr=[[MAX(#b)]]\
-        \n  TableScan: test projection=Some([1, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn aggregate_no_group_by_with_filter() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .filter(col("c"))?
-            .aggregate(vec![], vec![max(col("b"))])?
-            .build()?;
-
-        let expected = "Aggregate: groupBy=[[]], aggr=[[MAX(#b)]]\
-        \n  Filter: #c\
-        \n    TableScan: test projection=Some([1, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn cast() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let projection = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![Expr::Cast {
-                expr: Box::new(col("c")),
-                data_type: DataType::Float64,
-            }])?
-            .build()?;
-
-        let expected = "Projection: CAST(#c AS Float64)\
-        \n  TableScan: test projection=Some([2])";
-
-        assert_optimized_plan_eq(&projection, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn table_scan_projected_schema() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("a"), col("b")])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["a", "b"]);
-
-        let expected = "Projection: #a, #b\
-        \n  TableScan: test projection=Some([0, 1])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn table_limit() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("c"), col("a")])?
-            .limit(5)?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["c", "a"]);
-
-        let expected = "Limit: 5\
-        \n  Projection: #c, #a\
-        \n    TableScan: test projection=Some([0, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn table_scan_without_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan).build()?;
-        // should expand projection to all columns without projection
-        let expected = "TableScan: test projection=Some([0, 1, 2])";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    #[test]
-    fn table_scan_with_literal_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![lit(1_i64), lit(2_i64)])?
-            .build()?;
-        let expected = "Projection: Int64(1), Int64(2)\
-                      \n  TableScan: test projection=Some([0])";
-        assert_optimized_plan_eq(&plan, expected);
-        Ok(())
-    }
-
-    /// tests that it removes unused columns in projections
-    #[test]
-    fn table_unused_column() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        // we never use "b" in the first projection => remove it
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("c"), col("a"), col("b")])?
-            .filter(col("c").gt(lit(1)))?
-            .aggregate(vec![col("c")], vec![max(col("a"))])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["c", "MAX(a)"]);
-
-        let expected = "\
-        Aggregate: groupBy=[[#c]], aggr=[[MAX(#a)]]\
-        \n  Filter: #c Gt Int32(1)\
-        \n    Projection: #c, #a\
-        \n      TableScan: test projection=Some([0, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// tests that it removes un-needed projections
-    #[test]
-    fn table_unused_projection() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        // there is no need for the first projection
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("b")])?
-            .project(vec![lit(1).alias("a")])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["a"]);
-
-        let expected = "\
-        Projection: Int32(1) AS a\
-        \n  TableScan: test projection=Some([0])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    /// tests that optimizing twice yields same plan
-    #[test]
-    fn test_double_optimization() -> Result<()> {
-        let table_scan = test_table_scan()?;
-
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .project(vec![col("b")])?
-            .project(vec![lit(1).alias("a")])?
-            .build()?;
-
-        let optimized_plan1 = optimize(&plan).expect("failed to optimize plan");
-        let optimized_plan2 =
-            optimize(&optimized_plan1).expect("failed to optimize plan");
-
-        let formatted_plan1 = format!("{:?}", optimized_plan1);
-        let formatted_plan2 = format!("{:?}", optimized_plan2);
-        assert_eq!(formatted_plan1, formatted_plan2);
-        Ok(())
-    }
-
-    /// tests that it removes an aggregate is never used downstream
-    #[test]
-    fn table_unused_aggregate() -> Result<()> {
-        let table_scan = test_table_scan()?;
-        assert_eq!(3, table_scan.schema().fields().len());
-        assert_fields_eq(&table_scan, vec!["a", "b", "c"]);
-
-        // we never use "min(b)" => remove it
-        let plan = LogicalPlanBuilder::from(&table_scan)
-            .aggregate(vec![col("a"), col("c")], vec![max(col("b")), min(col("b"))])?
-            .filter(col("c").gt(lit(1)))?
-            .project(vec![col("c"), col("a"), col("MAX(b)")])?
-            .build()?;
-
-        assert_fields_eq(&plan, vec!["c", "a", "MAX(b)"]);
-
-        let expected = "\
-        Projection: #c, #a, #MAX(b)\
-        \n  Filter: #c Gt Int32(1)\
-        \n    Aggregate: groupBy=[[#a, #c]], aggr=[[MAX(#b)]]\
-        \n      TableScan: test projection=Some([0, 1, 2])";
-
-        assert_optimized_plan_eq(&plan, expected);
-
-        Ok(())
-    }
-
-    fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
-        let optimized_plan = optimize(plan).expect("failed to optimize plan");
-        let formatted_plan = format!("{:?}", optimized_plan);
-        assert_eq!(formatted_plan, expected);
-    }
-
-    fn optimize(plan: &LogicalPlan) -> Result<LogicalPlan> {
-        let rule = ProjectionPushDown::new();
-        rule.optimize(plan)
-    }
-}
diff --git a/rust/datafusion/src/optimizer/utils.rs b/rust/datafusion/src/optimizer/utils.rs
deleted file mode 100644
index fe1d0238191..00000000000
--- a/rust/datafusion/src/optimizer/utils.rs
+++ /dev/null
@@ -1,489 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Collection of utility functions that are leveraged by the query optimizer rules
-
-use std::{collections::HashSet, sync::Arc};
-
-use arrow::datatypes::Schema;
-
-use super::optimizer::OptimizerRule;
-use crate::logical_plan::{
-    Expr, LogicalPlan, Operator, Partitioning, PlanType, Recursion, StringifiedPlan,
-    ToDFSchema,
-};
-use crate::prelude::lit;
-use crate::scalar::ScalarValue;
-use crate::{
-    error::{DataFusionError, Result},
-    logical_plan::ExpressionVisitor,
-};
-
-const CASE_EXPR_MARKER: &str = "__DATAFUSION_CASE_EXPR__";
-const CASE_ELSE_MARKER: &str = "__DATAFUSION_CASE_ELSE__";
-
-/// Recursively walk a list of expression trees, collecting the unique set of column
-/// names referenced in the expression
-pub fn exprlist_to_column_names(
-    expr: &[Expr],
-    accum: &mut HashSet<String>,
-) -> Result<()> {
-    for e in expr {
-        expr_to_column_names(e, accum)?;
-    }
-    Ok(())
-}
-
-/// Recursively walk an expression tree, collecting the unique set of column names
-/// referenced in the expression
-struct ColumnNameVisitor<'a> {
-    accum: &'a mut HashSet<String>,
-}
-
-impl ExpressionVisitor for ColumnNameVisitor<'_> {
-    fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
-        match expr {
-            Expr::Column(name) => {
-                self.accum.insert(name.clone());
-            }
-            Expr::ScalarVariable(var_names) => {
-                self.accum.insert(var_names.join("."));
-            }
-            Expr::Alias(_, _) => {}
-            Expr::Literal(_) => {}
-            Expr::BinaryExpr { .. } => {}
-            Expr::Not(_) => {}
-            Expr::IsNotNull(_) => {}
-            Expr::IsNull(_) => {}
-            Expr::Negative(_) => {}
-            Expr::Between { .. } => {}
-            Expr::Case { .. } => {}
-            Expr::Cast { .. } => {}
-            Expr::TryCast { .. } => {}
-            Expr::Sort { .. } => {}
-            Expr::ScalarFunction { .. } => {}
-            Expr::ScalarUDF { .. } => {}
-            Expr::AggregateFunction { .. } => {}
-            Expr::AggregateUDF { .. } => {}
-            Expr::InList { .. } => {}
-            Expr::Wildcard => {}
-        }
-        Ok(Recursion::Continue(self))
-    }
-}
-
-/// Recursively walk an expression tree, collecting the unique set of column names
-/// referenced in the expression
-pub fn expr_to_column_names(expr: &Expr, accum: &mut HashSet<String>) -> Result<()> {
-    expr.accept(ColumnNameVisitor { accum })?;
-    Ok(())
-}
-
-/// Create a `LogicalPlan::Explain` node by running `optimizer` on the
-/// input plan and capturing the resulting plan string
-pub fn optimize_explain(
-    optimizer: &impl OptimizerRule,
-    verbose: bool,
-    plan: &LogicalPlan,
-    stringified_plans: &[StringifiedPlan],
-    schema: &Schema,
-) -> Result<LogicalPlan> {
-    // These are the fields of LogicalPlan::Explain It might be nice
-    // to transform that enum Variant into its own struct and avoid
-    // passing the fields individually
-    let plan = Arc::new(optimizer.optimize(plan)?);
-    let mut stringified_plans = stringified_plans.to_vec();
-    let optimizer_name = optimizer.name().into();
-    stringified_plans.push(StringifiedPlan::new(
-        PlanType::OptimizedLogicalPlan { optimizer_name },
-        format!("{:#?}", plan),
-    ));
-    Ok(LogicalPlan::Explain {
-        verbose,
-        plan,
-        stringified_plans,
-        schema: schema.clone().to_dfschema_ref()?,
-    })
-}
-
-/// Convenience rule for writing optimizers: recursively invoke
-/// optimize on plan's children and then return a node of the same
-/// type. Useful for optimizer rules which want to leave the type
-/// of plan unchanged but still apply to the children.
-/// This also handles the case when the `plan` is a [`LogicalPlan::Explain`].
-pub fn optimize_children(
-    optimizer: &impl OptimizerRule,
-    plan: &LogicalPlan,
-) -> Result<LogicalPlan> {
-    if let LogicalPlan::Explain {
-        verbose,
-        plan,
-        stringified_plans,
-        schema,
-    } = plan
-    {
-        return optimize_explain(
-            optimizer,
-            *verbose,
-            &*plan,
-            stringified_plans,
-            &schema.as_ref().to_owned().into(),
-        );
-    }
-
-    let new_exprs = plan.expressions();
-    let new_inputs = plan
-        .inputs()
-        .into_iter()
-        .map(|plan| optimizer.optimize(plan))
-        .collect::<Result<Vec<_>>>()?;
-
-    from_plan(plan, &new_exprs, &new_inputs)
-}
-
-/// Returns a new logical plan based on the original one with inputs and expressions replaced
-pub fn from_plan(
-    plan: &LogicalPlan,
-    expr: &[Expr],
-    inputs: &[LogicalPlan],
-) -> Result<LogicalPlan> {
-    match plan {
-        LogicalPlan::Projection { schema, .. } => Ok(LogicalPlan::Projection {
-            expr: expr.to_vec(),
-            input: Arc::new(inputs[0].clone()),
-            schema: schema.clone(),
-        }),
-        LogicalPlan::Filter { .. } => Ok(LogicalPlan::Filter {
-            predicate: expr[0].clone(),
-            input: Arc::new(inputs[0].clone()),
-        }),
-        LogicalPlan::Repartition {
-            partitioning_scheme,
-            ..
-        } => match partitioning_scheme {
-            Partitioning::RoundRobinBatch(n) => Ok(LogicalPlan::Repartition {
-                partitioning_scheme: Partitioning::RoundRobinBatch(*n),
-                input: Arc::new(inputs[0].clone()),
-            }),
-            Partitioning::Hash(_, n) => Ok(LogicalPlan::Repartition {
-                partitioning_scheme: Partitioning::Hash(expr.to_owned(), *n),
-                input: Arc::new(inputs[0].clone()),
-            }),
-        },
-        LogicalPlan::Aggregate {
-            group_expr, schema, ..
-        } => Ok(LogicalPlan::Aggregate {
-            group_expr: expr[0..group_expr.len()].to_vec(),
-            aggr_expr: expr[group_expr.len()..].to_vec(),
-            input: Arc::new(inputs[0].clone()),
-            schema: schema.clone(),
-        }),
-        LogicalPlan::Sort { .. } => Ok(LogicalPlan::Sort {
-            expr: expr.to_vec(),
-            input: Arc::new(inputs[0].clone()),
-        }),
-        LogicalPlan::Join {
-            join_type,
-            on,
-            schema,
-            ..
-        } => Ok(LogicalPlan::Join {
-            left: Arc::new(inputs[0].clone()),
-            right: Arc::new(inputs[1].clone()),
-            join_type: *join_type,
-            on: on.clone(),
-            schema: schema.clone(),
-        }),
-        LogicalPlan::Limit { n, .. } => Ok(LogicalPlan::Limit {
-            n: *n,
-            input: Arc::new(inputs[0].clone()),
-        }),
-        LogicalPlan::Extension { node } => Ok(LogicalPlan::Extension {
-            node: node.from_template(expr, inputs),
-        }),
-        LogicalPlan::Union { schema, alias, .. } => Ok(LogicalPlan::Union {
-            inputs: inputs.to_vec(),
-            schema: schema.clone(),
-            alias: alias.clone(),
-        }),
-        LogicalPlan::EmptyRelation { .. }
-        | LogicalPlan::TableScan { .. }
-        | LogicalPlan::CreateExternalTable { .. }
-        | LogicalPlan::Explain { .. } => Ok(plan.clone()),
-    }
-}
-
-/// Returns all direct children `Expression`s of `expr`.
-/// E.g. if the expression is "(a + 1) + 1", it returns ["a + 1", "1"] (as Expr objects)
-pub fn expr_sub_expressions(expr: &Expr) -> Result<Vec<Expr>> {
-    match expr {
-        Expr::BinaryExpr { left, right, .. } => {
-            Ok(vec![left.as_ref().to_owned(), right.as_ref().to_owned()])
-        }
-        Expr::IsNull(e) => Ok(vec![e.as_ref().to_owned()]),
-        Expr::IsNotNull(e) => Ok(vec![e.as_ref().to_owned()]),
-        Expr::ScalarFunction { args, .. } => Ok(args.clone()),
-        Expr::ScalarUDF { args, .. } => Ok(args.clone()),
-        Expr::AggregateFunction { args, .. } => Ok(args.clone()),
-        Expr::AggregateUDF { args, .. } => Ok(args.clone()),
-        Expr::Case {
-            expr,
-            when_then_expr,
-            else_expr,
-            ..
-        } => {
-            let mut expr_list: Vec<Expr> = vec![];
-            if let Some(e) = expr {
-                expr_list.push(lit(CASE_EXPR_MARKER));
-                expr_list.push(e.as_ref().to_owned());
-            }
-            for (w, t) in when_then_expr {
-                expr_list.push(w.as_ref().to_owned());
-                expr_list.push(t.as_ref().to_owned());
-            }
-            if let Some(e) = else_expr {
-                expr_list.push(lit(CASE_ELSE_MARKER));
-                expr_list.push(e.as_ref().to_owned());
-            }
-            Ok(expr_list)
-        }
-        Expr::Cast { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::TryCast { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Column(_) => Ok(vec![]),
-        Expr::Alias(expr, ..) => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Literal(_) => Ok(vec![]),
-        Expr::ScalarVariable(_) => Ok(vec![]),
-        Expr::Not(expr) => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Negative(expr) => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Sort { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
-        Expr::Between {
-            expr, low, high, ..
-        } => Ok(vec![
-            expr.as_ref().to_owned(),
-            low.as_ref().to_owned(),
-            high.as_ref().to_owned(),
-        ]),
-        Expr::InList { expr, list, .. } => {
-            let mut expr_list: Vec<Expr> = vec![expr.as_ref().to_owned()];
-            for list_expr in list {
-                expr_list.push(list_expr.to_owned());
-            }
-            Ok(expr_list)
-        }
-        Expr::Wildcard { .. } => Err(DataFusionError::Internal(
-            "Wildcard expressions are not valid in a logical query plan".to_owned(),
-        )),
-    }
-}
-
-/// returns a new expression where the expressions in `expr` are replaced by the ones in
-/// `expressions`.
-/// This is used in conjunction with ``expr_expressions`` to re-write expressions.
-pub fn rewrite_expression(expr: &Expr, expressions: &[Expr]) -> Result<Expr> {
-    match expr {
-        Expr::BinaryExpr { op, .. } => Ok(Expr::BinaryExpr {
-            left: Box::new(expressions[0].clone()),
-            op: *op,
-            right: Box::new(expressions[1].clone()),
-        }),
-        Expr::IsNull(_) => Ok(Expr::IsNull(Box::new(expressions[0].clone()))),
-        Expr::IsNotNull(_) => Ok(Expr::IsNotNull(Box::new(expressions[0].clone()))),
-        Expr::ScalarFunction { fun, .. } => Ok(Expr::ScalarFunction {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-        }),
-        Expr::ScalarUDF { fun, .. } => Ok(Expr::ScalarUDF {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-        }),
-        Expr::AggregateFunction { fun, distinct, .. } => Ok(Expr::AggregateFunction {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-            distinct: *distinct,
-        }),
-        Expr::AggregateUDF { fun, .. } => Ok(Expr::AggregateUDF {
-            fun: fun.clone(),
-            args: expressions.to_vec(),
-        }),
-        Expr::Case { .. } => {
-            let mut base_expr: Option<Box<Expr>> = None;
-            let mut when_then: Vec<(Box<Expr>, Box<Expr>)> = vec![];
-            let mut else_expr: Option<Box<Expr>> = None;
-            let mut i = 0;
-
-            while i < expressions.len() {
-                match &expressions[i] {
-                    Expr::Literal(ScalarValue::Utf8(Some(str)))
-                        if str == CASE_EXPR_MARKER =>
-                    {
-                        base_expr = Some(Box::new(expressions[i + 1].clone()));
-                        i += 2;
-                    }
-                    Expr::Literal(ScalarValue::Utf8(Some(str)))
-                        if str == CASE_ELSE_MARKER =>
-                    {
-                        else_expr = Some(Box::new(expressions[i + 1].clone()));
-                        i += 2;
-                    }
-                    _ => {
-                        when_then.push((
-                            Box::new(expressions[i].clone()),
-                            Box::new(expressions[i + 1].clone()),
-                        ));
-                        i += 2;
-                    }
-                }
-            }
-
-            Ok(Expr::Case {
-                expr: base_expr,
-                when_then_expr: when_then,
-                else_expr,
-            })
-        }
-        Expr::Cast { data_type, .. } => Ok(Expr::Cast {
-            expr: Box::new(expressions[0].clone()),
-            data_type: data_type.clone(),
-        }),
-        Expr::TryCast { data_type, .. } => Ok(Expr::TryCast {
-            expr: Box::new(expressions[0].clone()),
-            data_type: data_type.clone(),
-        }),
-        Expr::Alias(_, alias) => {
-            Ok(Expr::Alias(Box::new(expressions[0].clone()), alias.clone()))
-        }
-        Expr::Not(_) => Ok(Expr::Not(Box::new(expressions[0].clone()))),
-        Expr::Negative(_) => Ok(Expr::Negative(Box::new(expressions[0].clone()))),
-        Expr::Column(_) => Ok(expr.clone()),
-        Expr::Literal(_) => Ok(expr.clone()),
-        Expr::ScalarVariable(_) => Ok(expr.clone()),
-        Expr::Sort {
-            asc, nulls_first, ..
-        } => Ok(Expr::Sort {
-            expr: Box::new(expressions[0].clone()),
-            asc: *asc,
-            nulls_first: *nulls_first,
-        }),
-        Expr::Between { negated, .. } => {
-            let expr = Expr::BinaryExpr {
-                left: Box::new(Expr::BinaryExpr {
-                    left: Box::new(expressions[0].clone()),
-                    op: Operator::GtEq,
-                    right: Box::new(expressions[1].clone()),
-                }),
-                op: Operator::And,
-                right: Box::new(Expr::BinaryExpr {
-                    left: Box::new(expressions[0].clone()),
-                    op: Operator::LtEq,
-                    right: Box::new(expressions[2].clone()),
-                }),
-            };
-
-            if *negated {
-                Ok(Expr::Not(Box::new(expr)))
-            } else {
-                Ok(expr)
-            }
-        }
-        Expr::InList { .. } => Ok(expr.clone()),
-        Expr::Wildcard { .. } => Err(DataFusionError::Internal(
-            "Wildcard expressions are not valid in a logical query plan".to_owned(),
-        )),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::logical_plan::{col, LogicalPlanBuilder};
-    use arrow::datatypes::DataType;
-    use std::collections::HashSet;
-
-    #[test]
-    fn test_collect_expr() -> Result<()> {
-        let mut accum: HashSet<String> = HashSet::new();
-        expr_to_column_names(
-            &Expr::Cast {
-                expr: Box::new(col("a")),
-                data_type: DataType::Float64,
-            },
-            &mut accum,
-        )?;
-        expr_to_column_names(
-            &Expr::Cast {
-                expr: Box::new(col("a")),
-                data_type: DataType::Float64,
-            },
-            &mut accum,
-        )?;
-        assert_eq!(1, accum.len());
-        assert!(accum.contains("a"));
-        Ok(())
-    }
-
-    struct TestOptimizer {}
-
-    impl OptimizerRule for TestOptimizer {
-        fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-            Ok(plan.clone())
-        }
-
-        fn name(&self) -> &str {
-            "test_optimizer"
-        }
-    }
-
-    #[test]
-    fn test_optimize_explain() -> Result<()> {
-        let optimizer = TestOptimizer {};
-
-        let empty_plan = LogicalPlanBuilder::empty(false).build()?;
-        let schema = LogicalPlan::explain_schema();
-
-        let optimized_explain = optimize_explain(
-            &optimizer,
-            true,
-            &empty_plan,
-            &[StringifiedPlan::new(PlanType::LogicalPlan, "...")],
-            schema.as_ref(),
-        )?;
-
-        match &optimized_explain {
-            LogicalPlan::Explain {
-                verbose,
-                stringified_plans,
-                ..
-            } => {
-                assert_eq!(*verbose, true);
-
-                let expected_stringified_plans = vec![
-                    StringifiedPlan::new(PlanType::LogicalPlan, "..."),
-                    StringifiedPlan::new(
-                        PlanType::OptimizedLogicalPlan {
-                            optimizer_name: "test_optimizer".into(),
-                        },
-                        "EmptyRelation",
-                    ),
-                ];
-                assert_eq!(*stringified_plans, expected_stringified_plans);
-            }
-            _ => panic!("Expected explain plan but got {:?}", optimized_explain),
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_optimizer/coalesce_batches.rs b/rust/datafusion/src/physical_optimizer/coalesce_batches.rs
deleted file mode 100644
index 9af8911062d..00000000000
--- a/rust/datafusion/src/physical_optimizer/coalesce_batches.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CoalesceBatches optimizer that groups batches together rows
-//! in bigger batches to avoid overhead with small batches
-
-use super::optimizer::PhysicalOptimizerRule;
-use crate::{
-    error::Result,
-    physical_plan::{
-        coalesce_batches::CoalesceBatchesExec, filter::FilterExec,
-        hash_join::HashJoinExec, repartition::RepartitionExec,
-    },
-};
-use std::sync::Arc;
-
-/// Optimizer that introduces CoalesceBatchesExec to avoid overhead with small batches
-pub struct CoalesceBatches {}
-
-impl CoalesceBatches {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-impl PhysicalOptimizerRule for CoalesceBatches {
-    fn optimize(
-        &self,
-        plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
-        config: &crate::execution::context::ExecutionConfig,
-    ) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
-        // wrap operators in CoalesceBatches to avoid lots of tiny batches when we have
-        // highly selective filters
-        let children = plan
-            .children()
-            .iter()
-            .map(|child| self.optimize(child.clone(), config))
-            .collect::<Result<Vec<_>>>()?;
-
-        let plan_any = plan.as_any();
-        //TODO we should do this in a more generic way either by wrapping all operators
-        // or having an API so that operators can declare when their inputs or outputs
-        // need to be wrapped in a coalesce batches operator.
-        // See https://issues.apache.org/jira/browse/ARROW-11068
-        let wrap_in_coalesce = plan_any.downcast_ref::<FilterExec>().is_some()
-            || plan_any.downcast_ref::<HashJoinExec>().is_some()
-            || plan_any.downcast_ref::<RepartitionExec>().is_some();
-
-        //TODO we should also do this for HashAggregateExec but we need to update tests
-        // as part of this work - see https://issues.apache.org/jira/browse/ARROW-11068
-        // || plan_any.downcast_ref::<HashAggregateExec>().is_some();
-
-        if plan.children().is_empty() {
-            // leaf node, children cannot be replaced
-            Ok(plan.clone())
-        } else {
-            let plan = plan.with_new_children(children)?;
-            Ok(if wrap_in_coalesce {
-                //TODO we should add specific configuration settings for coalescing batches and
-                // we should do that once https://issues.apache.org/jira/browse/ARROW-11059 is
-                // implemented. For now, we choose half the configured batch size to avoid copies
-                // when a small number of rows are removed from a batch
-                let target_batch_size = config.batch_size / 2;
-                Arc::new(CoalesceBatchesExec::new(plan.clone(), target_batch_size))
-            } else {
-                plan.clone()
-            })
-        }
-    }
-
-    fn name(&self) -> &str {
-        "coalesce_batches"
-    }
-}
diff --git a/rust/datafusion/src/physical_optimizer/merge_exec.rs b/rust/datafusion/src/physical_optimizer/merge_exec.rs
deleted file mode 100644
index 255d1bc2458..00000000000
--- a/rust/datafusion/src/physical_optimizer/merge_exec.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! AddMergeExec adds MergeExec to merge plans
-//! with more partitions into one partition when the node
-//! needs a single partition
-use super::optimizer::PhysicalOptimizerRule;
-use crate::{
-    error::Result,
-    physical_plan::{merge::MergeExec, Distribution},
-};
-use std::sync::Arc;
-
-/// Introduces MergeExec
-pub struct AddMergeExec {}
-
-impl AddMergeExec {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl PhysicalOptimizerRule for AddMergeExec {
-    fn optimize(
-        &self,
-        plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
-        config: &crate::execution::context::ExecutionConfig,
-    ) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
-        if plan.children().is_empty() {
-            // leaf node, children cannot be replaced
-            Ok(plan.clone())
-        } else {
-            let children = plan
-                .children()
-                .iter()
-                .map(|child| self.optimize(child.clone(), config))
-                .collect::<Result<Vec<_>>>()?;
-            match plan.required_child_distribution() {
-                Distribution::UnspecifiedDistribution => plan.with_new_children(children),
-                Distribution::SinglePartition => plan.with_new_children(
-                    children
-                        .iter()
-                        .map(|child| {
-                            if child.output_partitioning().partition_count() == 1 {
-                                child.clone()
-                            } else {
-                                Arc::new(MergeExec::new(child.clone()))
-                            }
-                        })
-                        .collect(),
-                ),
-            }
-        }
-    }
-
-    fn name(&self) -> &str {
-        "add_merge_exec"
-    }
-}
diff --git a/rust/datafusion/src/physical_optimizer/mod.rs b/rust/datafusion/src/physical_optimizer/mod.rs
deleted file mode 100644
index eca63db9f3d..00000000000
--- a/rust/datafusion/src/physical_optimizer/mod.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains a query optimizer that operates against a physical plan and applies
-//! rules to a physical plan, such as "Repartition".
-
-pub mod coalesce_batches;
-pub mod merge_exec;
-pub mod optimizer;
-pub mod repartition;
diff --git a/rust/datafusion/src/physical_optimizer/optimizer.rs b/rust/datafusion/src/physical_optimizer/optimizer.rs
deleted file mode 100644
index e2f40ae9540..00000000000
--- a/rust/datafusion/src/physical_optimizer/optimizer.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Physical optimizer traits
-
-use std::sync::Arc;
-
-use crate::{
-    error::Result, execution::context::ExecutionConfig, physical_plan::ExecutionPlan,
-};
-
-/// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which
-/// computes the same results, but in a potentially more efficient
-/// way.
-pub trait PhysicalOptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ExecutionConfig,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-}
diff --git a/rust/datafusion/src/physical_optimizer/repartition.rs b/rust/datafusion/src/physical_optimizer/repartition.rs
deleted file mode 100644
index 82f46f9cbbb..00000000000
--- a/rust/datafusion/src/physical_optimizer/repartition.rs
+++ /dev/null
@@ -1,186 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Repartition optimizer that introduces repartition nodes to increase the level of parallism available
-use std::sync::Arc;
-
-use super::optimizer::PhysicalOptimizerRule;
-use crate::physical_plan::{
-    empty::EmptyExec, repartition::RepartitionExec, ExecutionPlan,
-};
-use crate::physical_plan::{Distribution, Partitioning::*};
-use crate::{error::Result, execution::context::ExecutionConfig};
-
-/// Optimizer that introduces repartition to introduce more parallelism in the plan
-pub struct Repartition {}
-
-impl Repartition {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-fn optimize_concurrency(
-    concurrency: usize,
-    requires_single_partition: bool,
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Arc<dyn ExecutionPlan>> {
-    // Recurse into children bottom-up (added nodes should be as deep as possible)
-
-    let new_plan = if plan.children().is_empty() {
-        // leaf node - don't replace children
-        plan.clone()
-    } else {
-        let children = plan
-            .children()
-            .iter()
-            .map(|child| {
-                optimize_concurrency(
-                    concurrency,
-                    plan.required_child_distribution() == Distribution::SinglePartition,
-                    child.clone(),
-                )
-            })
-            .collect::<Result<_>>()?;
-        plan.with_new_children(children)?
-    };
-
-    let perform_repartition = match new_plan.output_partitioning() {
-        // Apply when underlying node has less than `self.concurrency` amount of concurrency
-        RoundRobinBatch(x) => x < concurrency,
-        UnknownPartitioning(x) => x < concurrency,
-        // we don't want to introduce partitioning after hash partitioning
-        // as the plan will likely depend on this
-        Hash(_, _) => false,
-    };
-
-    // TODO: EmptyExec causes failures with RepartitionExec
-    // But also not very useful to inlude
-    let is_empty_exec = plan.as_any().downcast_ref::<EmptyExec>().is_some();
-
-    if perform_repartition && !requires_single_partition && !is_empty_exec {
-        Ok(Arc::new(RepartitionExec::try_new(
-            new_plan,
-            RoundRobinBatch(concurrency),
-        )?))
-    } else {
-        Ok(new_plan)
-    }
-}
-
-impl PhysicalOptimizerRule for Repartition {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ExecutionConfig,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Don't run optimizer if concurrency == 1
-        if config.concurrency == 1 {
-            Ok(plan)
-        } else {
-            optimize_concurrency(config.concurrency, true, plan)
-        }
-    }
-
-    fn name(&self) -> &str {
-        "repartition"
-    }
-}
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::Schema;
-
-    use super::*;
-    use crate::datasource::datasource::Statistics;
-    use crate::physical_plan::parquet::{ParquetExec, ParquetPartition};
-    use crate::physical_plan::projection::ProjectionExec;
-
-    #[test]
-    fn added_repartition_to_single_partition() -> Result<()> {
-        let parquet_project = ProjectionExec::try_new(
-            vec![],
-            Arc::new(ParquetExec::new(
-                vec![ParquetPartition {
-                    filenames: vec!["x".to_string()],
-                    statistics: Statistics::default(),
-                }],
-                Schema::empty(),
-                None,
-                None,
-                2048,
-                None,
-            )),
-        )?;
-
-        let optimizer = Repartition {};
-
-        let optimized = optimizer.optimize(
-            Arc::new(parquet_project),
-            &ExecutionConfig::new().with_concurrency(10),
-        )?;
-
-        assert_eq!(
-            optimized.children()[0]
-                .output_partitioning()
-                .partition_count(),
-            10
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn repartition_deepest_node() -> Result<()> {
-        let parquet_project = ProjectionExec::try_new(
-            vec![],
-            Arc::new(ProjectionExec::try_new(
-                vec![],
-                Arc::new(ParquetExec::new(
-                    vec![ParquetPartition {
-                        filenames: vec!["x".to_string()],
-                        statistics: Statistics::default(),
-                    }],
-                    Schema::empty(),
-                    None,
-                    None,
-                    2048,
-                    None,
-                )),
-            )?),
-        )?;
-
-        let optimizer = Repartition {};
-
-        let optimized = optimizer.optimize(
-            Arc::new(parquet_project),
-            &ExecutionConfig::new().with_concurrency(10),
-        )?;
-
-        // RepartitionExec is added to deepest node
-        assert!(optimized.children()[0]
-            .as_any()
-            .downcast_ref::<RepartitionExec>()
-            .is_none());
-        assert!(optimized.children()[0].children()[0]
-            .as_any()
-            .downcast_ref::<RepartitionExec>()
-            .is_some());
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/aggregates.rs b/rust/datafusion/src/physical_plan/aggregates.rs
deleted file mode 100644
index 9417c7c8f05..00000000000
--- a/rust/datafusion/src/physical_plan/aggregates.rs
+++ /dev/null
@@ -1,258 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Declaration of built-in (aggregate) functions.
-//! This module contains built-in aggregates' enumeration and metadata.
-//!
-//! Generally, an aggregate has:
-//! * a signature
-//! * a return type, that is a function of the incoming argument's types
-//! * the computation, that must accept each valid signature
-//!
-//! * Signature: see `Signature`
-//! * Return type: a function `(arg_types) -> return_type`. E.g. for min, ([f32]) -> f32, ([f64]) -> f64.
-
-use super::{
-    functions::Signature,
-    type_coercion::{coerce, data_types},
-    Accumulator, AggregateExpr, PhysicalExpr,
-};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::distinct_expressions;
-use crate::physical_plan::expressions;
-use arrow::datatypes::{DataType, Schema, TimeUnit};
-use expressions::{avg_return_type, sum_return_type};
-use std::{fmt, str::FromStr, sync::Arc};
-
-/// the implementation of an aggregate function
-pub type AccumulatorFunctionImplementation =
-    Arc<dyn Fn() -> Result<Box<dyn Accumulator>> + Send + Sync>;
-
-/// This signature corresponds to which types an aggregator serializes
-/// its state, given its return datatype.
-pub type StateTypeFunction =
-    Arc<dyn Fn(&DataType) -> Result<Arc<Vec<DataType>>> + Send + Sync>;
-
-/// Enum of all built-in scalar functions
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum AggregateFunction {
-    /// count
-    Count,
-    /// sum
-    Sum,
-    /// min
-    Min,
-    /// max
-    Max,
-    /// avg
-    Avg,
-}
-
-impl fmt::Display for AggregateFunction {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // uppercase of the debug.
-        write!(f, "{}", format!("{:?}", self).to_uppercase())
-    }
-}
-
-impl FromStr for AggregateFunction {
-    type Err = DataFusionError;
-    fn from_str(name: &str) -> Result<AggregateFunction> {
-        Ok(match name {
-            "min" => AggregateFunction::Min,
-            "max" => AggregateFunction::Max,
-            "count" => AggregateFunction::Count,
-            "avg" => AggregateFunction::Avg,
-            "sum" => AggregateFunction::Sum,
-            _ => {
-                return Err(DataFusionError::Plan(format!(
-                    "There is no built-in function named {}",
-                    name
-                )))
-            }
-        })
-    }
-}
-
-/// Returns the datatype of the scalar function
-pub fn return_type(fun: &AggregateFunction, arg_types: &[DataType]) -> Result<DataType> {
-    // Note that this function *must* return the same type that the respective physical expression returns
-    // or the execution panics.
-
-    // verify that this is a valid set of data types for this function
-    data_types(arg_types, &signature(fun))?;
-
-    match fun {
-        AggregateFunction::Count => Ok(DataType::UInt64),
-        AggregateFunction::Max | AggregateFunction::Min => Ok(arg_types[0].clone()),
-        AggregateFunction::Sum => sum_return_type(&arg_types[0]),
-        AggregateFunction::Avg => avg_return_type(&arg_types[0]),
-    }
-}
-
-/// Create a physical (function) expression.
-/// This function errors when `args`' can't be coerced to a valid argument type of the function.
-pub fn create_aggregate_expr(
-    fun: &AggregateFunction,
-    distinct: bool,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-    name: String,
-) -> Result<Arc<dyn AggregateExpr>> {
-    // coerce
-    let arg = coerce(args, input_schema, &signature(fun))?[0].clone();
-
-    let arg_types = args
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let return_type = return_type(&fun, &arg_types)?;
-
-    Ok(match (fun, distinct) {
-        (AggregateFunction::Count, false) => {
-            Arc::new(expressions::Count::new(arg, name, return_type))
-        }
-        (AggregateFunction::Count, true) => {
-            Arc::new(distinct_expressions::DistinctCount::new(
-                arg_types,
-                args.to_vec(),
-                name,
-                return_type,
-            ))
-        }
-        (AggregateFunction::Sum, false) => {
-            Arc::new(expressions::Sum::new(arg, name, return_type))
-        }
-        (AggregateFunction::Sum, true) => {
-            return Err(DataFusionError::NotImplemented(
-                "SUM(DISTINCT) aggregations are not available".to_string(),
-            ));
-        }
-        (AggregateFunction::Min, _) => {
-            Arc::new(expressions::Min::new(arg, name, return_type))
-        }
-        (AggregateFunction::Max, _) => {
-            Arc::new(expressions::Max::new(arg, name, return_type))
-        }
-        (AggregateFunction::Avg, false) => {
-            Arc::new(expressions::Avg::new(arg, name, return_type))
-        }
-        (AggregateFunction::Avg, true) => {
-            return Err(DataFusionError::NotImplemented(
-                "AVG(DISTINCT) aggregations are not available".to_string(),
-            ));
-        }
-    })
-}
-
-static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8];
-
-static NUMERICS: &[DataType] = &[
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Float32,
-    DataType::Float64,
-];
-
-static TIMESTAMPS: &[DataType] = &[
-    DataType::Timestamp(TimeUnit::Second, None),
-    DataType::Timestamp(TimeUnit::Millisecond, None),
-    DataType::Timestamp(TimeUnit::Microsecond, None),
-    DataType::Timestamp(TimeUnit::Nanosecond, None),
-];
-
-/// the signatures supported by the function `fun`.
-fn signature(fun: &AggregateFunction) -> Signature {
-    // note: the physical expression must accept the type returned by this function or the execution panics.
-    match fun {
-        AggregateFunction::Count => Signature::Any(1),
-        AggregateFunction::Min | AggregateFunction::Max => {
-            let valid = STRINGS
-                .iter()
-                .chain(NUMERICS.iter())
-                .chain(TIMESTAMPS.iter())
-                .cloned()
-                .collect::<Vec<_>>();
-            Signature::Uniform(1, valid)
-        }
-        AggregateFunction::Avg | AggregateFunction::Sum => {
-            Signature::Uniform(1, NUMERICS.to_vec())
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-
-    #[test]
-    fn test_min_max() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Min, &[DataType::Utf8])?;
-        assert_eq!(DataType::Utf8, observed);
-
-        let observed = return_type(&AggregateFunction::Max, &[DataType::Int32])?;
-        assert_eq!(DataType::Int32, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_sum_no_utf8() {
-        let observed = return_type(&AggregateFunction::Sum, &[DataType::Utf8]);
-        assert!(observed.is_err());
-    }
-
-    #[test]
-    fn test_sum_upcasts() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Sum, &[DataType::UInt32])?;
-        assert_eq!(DataType::UInt64, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_count_return_type() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Count, &[DataType::Utf8])?;
-        assert_eq!(DataType::UInt64, observed);
-
-        let observed = return_type(&AggregateFunction::Count, &[DataType::Int8])?;
-        assert_eq!(DataType::UInt64, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_avg_return_type() -> Result<()> {
-        let observed = return_type(&AggregateFunction::Avg, &[DataType::Float32])?;
-        assert_eq!(DataType::Float64, observed);
-
-        let observed = return_type(&AggregateFunction::Avg, &[DataType::Float64])?;
-        assert_eq!(DataType::Float64, observed);
-        Ok(())
-    }
-
-    #[test]
-    fn test_avg_no_utf8() {
-        let observed = return_type(&AggregateFunction::Avg, &[DataType::Utf8]);
-        assert!(observed.is_err());
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/array_expressions.rs b/rust/datafusion/src/physical_plan/array_expressions.rs
deleted file mode 100644
index a7e03b70e5d..00000000000
--- a/rust/datafusion/src/physical_plan/array_expressions.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Array expressions
-
-use crate::error::{DataFusionError, Result};
-use arrow::array::*;
-use arrow::datatypes::DataType;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-
-macro_rules! downcast_vec {
-    ($ARGS:expr, $ARRAY_TYPE:ident) => {{
-        $ARGS
-            .iter()
-            .map(|e| match e.as_any().downcast_ref::<$ARRAY_TYPE>() {
-                Some(array) => Ok(array),
-                _ => Err(DataFusionError::Internal("failed to downcast".to_string())),
-            })
-    }};
-}
-
-macro_rules! array {
-    ($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
-        // downcast all arguments to their common format
-        let args =
-            downcast_vec!($ARGS, $ARRAY_TYPE).collect::<Result<Vec<&$ARRAY_TYPE>>>()?;
-
-        let mut builder = FixedSizeListBuilder::<$BUILDER_TYPE>::new(
-            <$BUILDER_TYPE>::new(args[0].len()),
-            args.len() as i32,
-        );
-        // for each entry in the array
-        for index in 0..args[0].len() {
-            for arg in &args {
-                if arg.is_null(index) {
-                    builder.values().append_null()?;
-                } else {
-                    builder.values().append_value(arg.value(index))?;
-                }
-            }
-            builder.append(true)?;
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn array_array(args: &[&dyn Array]) -> Result<ArrayRef> {
-    // do not accept 0 arguments.
-    if args.is_empty() {
-        return Err(DataFusionError::Internal(
-            "array requires at least one argument".to_string(),
-        ));
-    }
-
-    match args[0].data_type() {
-        DataType::Utf8 => array!(args, StringArray, StringBuilder),
-        DataType::LargeUtf8 => array!(args, LargeStringArray, LargeStringBuilder),
-        DataType::Boolean => array!(args, BooleanArray, BooleanBuilder),
-        DataType::Float32 => array!(args, Float32Array, Float32Builder),
-        DataType::Float64 => array!(args, Float64Array, Float64Builder),
-        DataType::Int8 => array!(args, Int8Array, Int8Builder),
-        DataType::Int16 => array!(args, Int16Array, Int16Builder),
-        DataType::Int32 => array!(args, Int32Array, Int32Builder),
-        DataType::Int64 => array!(args, Int64Array, Int64Builder),
-        DataType::UInt8 => array!(args, UInt8Array, UInt8Builder),
-        DataType::UInt16 => array!(args, UInt16Array, UInt16Builder),
-        DataType::UInt32 => array!(args, UInt32Array, UInt32Builder),
-        DataType::UInt64 => array!(args, UInt64Array, UInt64Builder),
-        data_type => Err(DataFusionError::NotImplemented(format!(
-            "Array is not implemented for type '{:?}'.",
-            data_type
-        ))),
-    }
-}
-
-/// put values in an array.
-pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arrays: Vec<&dyn Array> = values
-        .iter()
-        .map(|value| {
-            if let ColumnarValue::Array(value) = value {
-                Ok(value.as_ref())
-            } else {
-                Err(DataFusionError::NotImplemented(
-                    "Array is not implemented for scalar values.".to_string(),
-                ))
-            }
-        })
-        .collect::<Result<_>>()?;
-
-    Ok(ColumnarValue::Array(array_array(&arrays)?))
-}
-
-/// Currently supported types by the array function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_ARRAY_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-    DataType::Utf8,
-    DataType::LargeUtf8,
-];
diff --git a/rust/datafusion/src/physical_plan/coalesce_batches.rs b/rust/datafusion/src/physical_plan/coalesce_batches.rs
deleted file mode 100644
index b91e0b672eb..00000000000
--- a/rust/datafusion/src/physical_plan/coalesce_batches.rs
+++ /dev/null
@@ -1,316 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! CoalesceBatchesExec combines small batches into larger batches for more efficient use of
-//! vectorized processing by upstream operators.
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{
-    ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
-};
-
-use arrow::compute::kernels::concat::concat;
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use futures::stream::{Stream, StreamExt};
-use log::debug;
-
-/// CoalesceBatchesExec combines small batches into larger batches for more efficient use of
-/// vectorized processing by upstream operators.
-#[derive(Debug)]
-pub struct CoalesceBatchesExec {
-    /// The input plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-}
-
-impl CoalesceBatchesExec {
-    /// Create a new CoalesceBatchesExec
-    pub fn new(input: Arc<dyn ExecutionPlan>, target_batch_size: usize) -> Self {
-        Self {
-            input,
-            target_batch_size,
-        }
-    }
-
-    /// The input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Minimum number of rows for coalesces batches
-    pub fn target_batch_size(&self) -> usize {
-        self.target_batch_size
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CoalesceBatchesExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        // The coalesce batches operator does not make any changes to the schema of its input
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        // The coalesce batches operator does not make any changes to the partitioning of its input
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(CoalesceBatchesExec::new(
-                children[0].clone(),
-                self.target_batch_size,
-            ))),
-            _ => Err(DataFusionError::Internal(
-                "CoalesceBatchesExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(CoalesceBatchesStream {
-            input: self.input.execute(partition).await?,
-            schema: self.input.schema(),
-            target_batch_size: self.target_batch_size,
-            buffer: Vec::new(),
-            buffered_rows: 0,
-            is_closed: false,
-        }))
-    }
-}
-
-struct CoalesceBatchesStream {
-    /// The input plan
-    input: SendableRecordBatchStream,
-    /// The input schema
-    schema: SchemaRef,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-    /// Buffered batches
-    buffer: Vec<RecordBatch>,
-    /// Buffered row count
-    buffered_rows: usize,
-    /// Whether the stream has finished returning all of its data or not
-    is_closed: bool,
-}
-
-impl Stream for CoalesceBatchesStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.is_closed {
-            return Poll::Ready(None);
-        }
-        loop {
-            let input_batch = self.input.poll_next_unpin(cx);
-            match input_batch {
-                Poll::Ready(x) => match x {
-                    Some(Ok(ref batch)) => {
-                        if batch.num_rows() >= self.target_batch_size
-                            && self.buffer.is_empty()
-                        {
-                            return Poll::Ready(Some(Ok(batch.clone())));
-                        } else if batch.num_rows() == 0 {
-                            // discard empty batches
-                        } else {
-                            // add to the buffered batches
-                            self.buffer.push(batch.clone());
-                            self.buffered_rows += batch.num_rows();
-                            // check to see if we have enough batches yet
-                            if self.buffered_rows >= self.target_batch_size {
-                                // combine the batches and return
-                                let batch = concat_batches(
-                                    &self.schema,
-                                    &self.buffer,
-                                    self.buffered_rows,
-                                )?;
-                                // reset buffer state
-                                self.buffer.clear();
-                                self.buffered_rows = 0;
-                                // return batch
-                                return Poll::Ready(Some(Ok(batch)));
-                            }
-                        }
-                    }
-                    None => {
-                        self.is_closed = true;
-                        // we have reached the end of the input stream but there could still
-                        // be buffered batches
-                        if self.buffer.is_empty() {
-                            return Poll::Ready(None);
-                        } else {
-                            // combine the batches and return
-                            let batch = concat_batches(
-                                &self.schema,
-                                &self.buffer,
-                                self.buffered_rows,
-                            )?;
-                            // reset buffer state
-                            self.buffer.clear();
-                            self.buffered_rows = 0;
-                            // return batch
-                            return Poll::Ready(Some(Ok(batch)));
-                        }
-                    }
-                    other => return Poll::Ready(other),
-                },
-                Poll::Pending => return Poll::Pending,
-            }
-        }
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // we can't predict the size of incoming batches so re-use the size hint from the input
-        self.input.size_hint()
-    }
-}
-
-impl RecordBatchStream for CoalesceBatchesStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Concatenates an array of `RecordBatch` into one batch
-pub fn concat_batches(
-    schema: &SchemaRef,
-    batches: &[RecordBatch],
-    row_count: usize,
-) -> ArrowResult<RecordBatch> {
-    if batches.is_empty() {
-        return Ok(RecordBatch::new_empty(schema.clone()));
-    }
-    let mut arrays = Vec::with_capacity(schema.fields().len());
-    for i in 0..schema.fields().len() {
-        let array = concat(
-            &batches
-                .iter()
-                .map(|batch| batch.column(i).as_ref())
-                .collect::<Vec<_>>(),
-        )?;
-        arrays.push(array);
-    }
-    debug!(
-        "Combined {} batches containing {} rows",
-        batches.len(),
-        row_count
-    );
-    RecordBatch::try_new(schema.clone(), arrays)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::{memory::MemoryExec, repartition::RepartitionExec};
-    use arrow::array::UInt32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-
-    #[tokio::test(flavor = "multi_thread")]
-    async fn test_concat_batches() -> Result<()> {
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 10);
-        let partitions = vec![partition];
-
-        let output_partitions = coalesce_batches(&schema, partitions, 21).await?;
-        assert_eq!(1, output_partitions.len());
-
-        // input is 10 batches x 8 rows (80 rows)
-        // expected output is batches of at least 20 rows (except for the final batch)
-        let batches = &output_partitions[0];
-        assert_eq!(4, batches.len());
-        assert_eq!(24, batches[0].num_rows());
-        assert_eq!(24, batches[1].num_rows());
-        assert_eq!(24, batches[2].num_rows());
-        assert_eq!(8, batches[3].num_rows());
-
-        Ok(())
-    }
-
-    fn test_schema() -> Arc<Schema> {
-        Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
-    }
-
-    fn create_vec_batches(schema: &Arc<Schema>, num_batches: usize) -> Vec<RecordBatch> {
-        let batch = create_batch(schema);
-        let mut vec = Vec::with_capacity(num_batches);
-        for _ in 0..num_batches {
-            vec.push(batch.clone());
-        }
-        vec
-    }
-
-    fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
-        RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
-        )
-        .unwrap()
-    }
-
-    async fn coalesce_batches(
-        schema: &SchemaRef,
-        input_partitions: Vec<Vec<RecordBatch>>,
-        target_batch_size: usize,
-    ) -> Result<Vec<Vec<RecordBatch>>> {
-        // create physical plan
-        let exec = MemoryExec::try_new(&input_partitions, schema.clone(), None)?;
-        let exec =
-            RepartitionExec::try_new(Arc::new(exec), Partitioning::RoundRobinBatch(1))?;
-        let exec: Arc<dyn ExecutionPlan> =
-            Arc::new(CoalesceBatchesExec::new(Arc::new(exec), target_batch_size));
-
-        // execute and collect results
-        let output_partition_count = exec.output_partitioning().partition_count();
-        let mut output_partitions = Vec::with_capacity(output_partition_count);
-        for i in 0..output_partition_count {
-            // execute this *output* partition and collect all batches
-            let mut stream = exec.execute(i).await?;
-            let mut batches = vec![];
-            while let Some(result) = stream.next().await {
-                batches.push(result?);
-            }
-            output_partitions.push(batches);
-        }
-        Ok(output_partitions)
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/common.rs b/rust/datafusion/src/physical_plan/common.rs
deleted file mode 100644
index 9de7ee2a32d..00000000000
--- a/rust/datafusion/src/physical_plan/common.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines common code used in execution plans
-
-use std::fs;
-use std::fs::metadata;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use futures::{Stream, TryStreamExt};
-
-/// Stream of record batches
-pub struct SizedRecordBatchStream {
-    schema: SchemaRef,
-    batches: Vec<Arc<RecordBatch>>,
-    index: usize,
-}
-
-impl SizedRecordBatchStream {
-    /// Create a new RecordBatchIterator
-    pub fn new(schema: SchemaRef, batches: Vec<Arc<RecordBatch>>) -> Self {
-        SizedRecordBatchStream {
-            schema,
-            index: 0,
-            batches,
-        }
-    }
-}
-
-impl Stream for SizedRecordBatchStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(if self.index < self.batches.len() {
-            self.index += 1;
-            Some(Ok(self.batches[self.index - 1].as_ref().clone()))
-        } else {
-            None
-        })
-    }
-}
-
-impl RecordBatchStream for SizedRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Create a vector of record batches from a stream
-pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatch>> {
-    stream
-        .try_collect::<Vec<_>>()
-        .await
-        .map_err(DataFusionError::from)
-}
-
-/// Recursively build a list of files in a directory with a given extension
-pub fn build_file_list(dir: &str, filenames: &mut Vec<String>, ext: &str) -> Result<()> {
-    let metadata = metadata(dir)?;
-    if metadata.is_file() {
-        if dir.ends_with(ext) {
-            filenames.push(dir.to_string());
-        }
-    } else {
-        for entry in fs::read_dir(dir)? {
-            let entry = entry?;
-            let path = entry.path();
-            if let Some(path_name) = path.to_str() {
-                if path.is_dir() {
-                    build_file_list(path_name, filenames, ext)?;
-                } else if path_name.ends_with(ext) {
-                    filenames.push(path_name.to_string());
-                }
-            } else {
-                return Err(DataFusionError::Plan("Invalid path".to_string()));
-            }
-        }
-    }
-    Ok(())
-}
diff --git a/rust/datafusion/src/physical_plan/crypto_expressions.rs b/rust/datafusion/src/physical_plan/crypto_expressions.rs
deleted file mode 100644
index 8ad876b24d0..00000000000
--- a/rust/datafusion/src/physical_plan/crypto_expressions.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Crypto expressions
-use std::sync::Arc;
-
-use md5::Md5;
-use sha2::{
-    digest::Output as SHA2DigestOutput, Digest as SHA2Digest, Sha224, Sha256, Sha384,
-    Sha512,
-};
-
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
-use arrow::{
-    array::{Array, BinaryArray, GenericStringArray, StringOffsetSizeTrait},
-    datatypes::DataType,
-};
-
-use super::{string_expressions::unary_string_function, ColumnarValue};
-
-/// Computes the md5 of a string.
-fn md5_process(input: &str) -> String {
-    let mut digest = Md5::default();
-    digest.update(&input);
-
-    let mut result = String::new();
-
-    for byte in &digest.finalize() {
-        result.push_str(&format!("{:02x}", byte));
-    }
-
-    result
-}
-
-// It's not possible to return &[u8], because trait in trait without short lifetime
-fn sha_process<D: SHA2Digest + Default>(input: &str) -> SHA2DigestOutput<D> {
-    let mut digest = D::default();
-    digest.update(&input);
-
-    digest.finalize()
-}
-
-/// # Errors
-/// This function errors when:
-/// * the number of arguments is not 1
-/// * the first argument is not castable to a `GenericStringArray`
-fn unary_binary_function<T, R, F>(
-    args: &[&dyn Array],
-    op: F,
-    name: &str,
-) -> Result<BinaryArray>
-where
-    R: AsRef<[u8]>,
-    T: StringOffsetSizeTrait,
-    F: Fn(&str) -> R,
-{
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but {} takes exactly one argument",
-            args.len(),
-            name,
-        )));
-    }
-
-    let array = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("failed to downcast to string".to_string())
-        })?;
-
-    // first map is the iterator, second is for the `Option<_>`
-    Ok(array.iter().map(|x| x.map(|x| op(x))).collect())
-}
-
-fn handle<F, R>(args: &[ColumnarValue], op: F, name: &str) -> Result<ColumnarValue>
-where
-    R: AsRef<[u8]>,
-    F: Fn(&str) -> R,
-{
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_binary_function::<
-                    i32,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            DataType::LargeUtf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_binary_function::<
-                    i64,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec());
-                Ok(ColumnarValue::Scalar(ScalarValue::Binary(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec());
-                Ok(ColumnarValue::Scalar(ScalarValue::Binary(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-    }
-}
-
-fn md5_array<T: StringOffsetSizeTrait>(
-    args: &[&dyn Array],
-) -> Result<GenericStringArray<i32>> {
-    unary_string_function::<T, i32, _, _>(args, md5_process, "md5")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(md5_array::<i32>(&[
-                a.as_ref()
-            ])?))),
-            DataType::LargeUtf8 => {
-                Ok(ColumnarValue::Array(Arc::new(md5_array::<i64>(&[
-                    a.as_ref()
-                ])?)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function md5",
-                other,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| md5_process(x));
-                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| md5_process(x));
-                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function md5",
-                other,
-            ))),
-        },
-    }
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha224(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha224>, "ssh224")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha256(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha256>, "sha256")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha384(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha384>, "sha384")
-}
-
-/// crypto function that accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]
-pub fn sha512(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, sha_process::<Sha512>, "sha512")
-}
diff --git a/rust/datafusion/src/physical_plan/csv.rs b/rust/datafusion/src/physical_plan/csv.rs
deleted file mode 100644
index 7ee5ae3fd90..00000000000
--- a/rust/datafusion/src/physical_plan/csv.rs
+++ /dev/null
@@ -1,401 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Execution plan for reading CSV files
-
-use std::any::Any;
-use std::fs::File;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_plan::{common, Partitioning};
-use arrow::csv;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use futures::Stream;
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use async_trait::async_trait;
-
-/// CSV file read option
-#[derive(Copy, Clone)]
-pub struct CsvReadOptions<'a> {
-    /// Does the CSV file have a header?
-    ///
-    /// If schema inference is run on a file with no headers, default column names
-    /// are created.
-    pub has_header: bool,
-    /// An optional column delimiter. Defaults to `b','`.
-    pub delimiter: u8,
-    /// An optional schema representing the CSV files. If None, CSV reader will try to infer it
-    /// based on data in file.
-    pub schema: Option<&'a Schema>,
-    /// Max number of rows to read from CSV files for schema inference if needed. Defaults to 1000.
-    pub schema_infer_max_records: usize,
-    /// File extension; only files with this extension are selected for data input.
-    /// Defaults to ".csv".
-    pub file_extension: &'a str,
-}
-
-impl<'a> CsvReadOptions<'a> {
-    /// Create a CSV read option with default presets
-    pub fn new() -> Self {
-        Self {
-            has_header: true,
-            schema: None,
-            schema_infer_max_records: 1000,
-            delimiter: b',',
-            file_extension: ".csv",
-        }
-    }
-
-    /// Configure has_header setting
-    pub fn has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
-        self
-    }
-
-    /// Specify delimiter to use for CSV read
-    pub fn delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = delimiter;
-        self
-    }
-
-    /// Specify the file extension for CSV file selection
-    pub fn file_extension(mut self, file_extension: &'a str) -> Self {
-        self.file_extension = file_extension;
-        self
-    }
-
-    /// Configure delimiter setting with Option, None value will be ignored
-    pub fn delimiter_option(mut self, delimiter: Option<u8>) -> Self {
-        if let Some(d) = delimiter {
-            self.delimiter = d;
-        }
-        self
-    }
-
-    /// Specify schema to use for CSV read
-    pub fn schema(mut self, schema: &'a Schema) -> Self {
-        self.schema = Some(schema);
-        self
-    }
-
-    /// Configure number of max records to read for schema inference
-    pub fn schema_infer_max_records(mut self, max_records: usize) -> Self {
-        self.schema_infer_max_records = max_records;
-        self
-    }
-}
-
-/// Execution plan for scanning a CSV file
-#[derive(Debug, Clone)]
-pub struct CsvExec {
-    /// Path to directory containing partitioned CSV files with the same schema
-    path: String,
-    /// The individual files under path
-    filenames: Vec<String>,
-    /// Schema representing the CSV file
-    schema: SchemaRef,
-    /// Does the CSV file have a header?
-    has_header: bool,
-    /// An optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
-    /// File extension
-    file_extension: String,
-    /// Optional projection for which columns to load
-    projection: Option<Vec<usize>>,
-    /// Schema after the projection has been applied
-    projected_schema: SchemaRef,
-    /// Batch size
-    batch_size: usize,
-    /// Limit in nr. of rows
-    limit: Option<usize>,
-}
-
-impl CsvExec {
-    /// Create a new execution plan for reading a set of CSV files
-    pub fn try_new(
-        path: &str,
-        options: CsvReadOptions,
-        projection: Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        let file_extension = String::from(options.file_extension);
-
-        let mut filenames: Vec<String> = vec![];
-        common::build_file_list(path, &mut filenames, file_extension.as_str())?;
-        if filenames.is_empty() {
-            return Err(DataFusionError::Execution(format!(
-                "No files found at {path} with file extension {file_extension}",
-                path = path,
-                file_extension = file_extension.as_str()
-            )));
-        }
-
-        let schema = match options.schema {
-            Some(s) => s.clone(),
-            None => CsvExec::try_infer_schema(&filenames, &options)?,
-        };
-
-        let projected_schema = match &projection {
-            None => schema.clone(),
-            Some(p) => Schema::new(p.iter().map(|i| schema.field(*i).clone()).collect()),
-        };
-
-        Ok(Self {
-            path: path.to_string(),
-            filenames,
-            schema: Arc::new(schema),
-            has_header: options.has_header,
-            delimiter: Some(options.delimiter),
-            file_extension,
-            projection,
-            projected_schema: Arc::new(projected_schema),
-            batch_size,
-            limit,
-        })
-    }
-
-    /// Path to directory containing partitioned CSV files with the same schema
-    pub fn path(&self) -> &str {
-        &self.path
-    }
-
-    /// The individual files under path
-    pub fn filenames(&self) -> &[String] {
-        &self.filenames
-    }
-
-    /// Does the CSV file have a header?
-    pub fn has_header(&self) -> bool {
-        self.has_header
-    }
-
-    /// An optional column delimiter. Defaults to `b','`
-    pub fn delimiter(&self) -> Option<&u8> {
-        self.delimiter.as_ref()
-    }
-
-    /// File extension
-    pub fn file_extension(&self) -> &str {
-        &self.file_extension
-    }
-
-    /// Get the schema of the CSV file
-    pub fn file_schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    /// Optional projection for which columns to load
-    pub fn projection(&self) -> Option<&Vec<usize>> {
-        self.projection.as_ref()
-    }
-
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-
-    /// Limit
-    pub fn limit(&self) -> Option<usize> {
-        self.limit
-    }
-
-    /// Infer schema for given CSV dataset
-    pub fn try_infer_schema(
-        filenames: &[String],
-        options: &CsvReadOptions,
-    ) -> Result<Schema> {
-        Ok(csv::infer_schema_from_files(
-            filenames,
-            options.delimiter,
-            Some(options.schema_infer_max_records),
-            options.has_header,
-        )?)
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CsvExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.projected_schema.clone()
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.filenames.len())
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(CsvStream::try_new(
-            &self.filenames[partition],
-            self.schema.clone(),
-            self.has_header,
-            self.delimiter,
-            &self.projection,
-            self.batch_size,
-            self.limit,
-        )?))
-    }
-}
-
-/// Iterator over batches
-struct CsvStream {
-    /// Arrow CSV reader
-    reader: csv::Reader<File>,
-}
-
-impl CsvStream {
-    /// Create an iterator for a CSV file
-    pub fn try_new(
-        filename: &str,
-        schema: SchemaRef,
-        has_header: bool,
-        delimiter: Option<u8>,
-        projection: &Option<Vec<usize>>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        let file = File::open(filename)?;
-        let start_line = if has_header { 1 } else { 0 };
-        let bounds = limit.map(|x| (0, x + start_line));
-
-        let reader = csv::Reader::new(
-            file,
-            schema,
-            has_header,
-            delimiter,
-            batch_size,
-            bounds,
-            projection.clone(),
-        );
-
-        Ok(Self { reader })
-    }
-}
-
-impl Stream for CsvStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(self.reader.next())
-    }
-}
-
-impl RecordBatchStream for CsvStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.reader.schema()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::test::aggr_test_schema;
-    use futures::StreamExt;
-
-    #[tokio::test]
-    async fn csv_exec_with_projection() -> Result<()> {
-        let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let filename = "aggregate_test_100.csv";
-        let path = format!("{}/csv/{}", testdata, filename);
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            Some(vec![0, 2, 4]),
-            1024,
-            None,
-        )?;
-        assert_eq!(13, csv.schema.fields().len());
-        assert_eq!(3, csv.projected_schema.fields().len());
-        assert_eq!(13, csv.file_schema().fields().len());
-        assert_eq!(3, csv.schema().fields().len());
-        let mut stream = csv.execute(0).await?;
-        let batch = stream.next().await.unwrap()?;
-        assert_eq!(3, batch.num_columns());
-        let batch_schema = batch.schema();
-        assert_eq!(3, batch_schema.fields().len());
-        assert_eq!("c1", batch_schema.field(0).name());
-        assert_eq!("c3", batch_schema.field(1).name());
-        assert_eq!("c5", batch_schema.field(2).name());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn csv_exec_without_projection() -> Result<()> {
-        let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let filename = "aggregate_test_100.csv";
-        let path = format!("{}/csv/{}", testdata, filename);
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-        assert_eq!(13, csv.schema.fields().len());
-        assert_eq!(13, csv.projected_schema.fields().len());
-        assert_eq!(13, csv.file_schema().fields().len());
-        assert_eq!(13, csv.schema().fields().len());
-        let mut it = csv.execute(0).await?;
-        let batch = it.next().await.unwrap()?;
-        assert_eq!(13, batch.num_columns());
-        let batch_schema = batch.schema();
-        assert_eq!(13, batch_schema.fields().len());
-        assert_eq!("c1", batch_schema.field(0).name());
-        assert_eq!("c2", batch_schema.field(1).name());
-        assert_eq!("c3", batch_schema.field(2).name());
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/datetime_expressions.rs b/rust/datafusion/src/physical_plan/datetime_expressions.rs
deleted file mode 100644
index 7b5816186f2..00000000000
--- a/rust/datafusion/src/physical_plan/datetime_expressions.rs
+++ /dev/null
@@ -1,559 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! DateTime expressions
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::{ScalarType, ScalarValue},
-};
-use arrow::{
-    array::{Array, ArrayRef, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait},
-    datatypes::{ArrowPrimitiveType, DataType, TimestampNanosecondType},
-};
-use arrow::{
-    array::{
-        Date32Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray,
-        TimestampNanosecondArray, TimestampSecondArray,
-    },
-    compute::kernels::temporal,
-    datatypes::TimeUnit,
-    temporal_conversions::timestamp_ns_to_datetime,
-};
-use chrono::prelude::*;
-use chrono::Duration;
-use chrono::LocalResult;
-
-#[inline]
-/// Accepts a string in RFC3339 / ISO8601 standard format and some
-/// variants and converts it to a nanosecond precision timestamp.
-///
-/// Implements the `to_timestamp` function to convert a string to a
-/// timestamp, following the model of spark SQL’s to_`timestamp`.
-///
-/// In addition to RFC3339 / ISO8601 standard timestamps, it also
-/// accepts strings that use a space ` ` to separate the date and time
-/// as well as strings that have no explicit timezone offset.
-///
-/// Examples of accepted inputs:
-/// * `1997-01-31T09:26:56.123Z`        # RCF3339
-/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
-/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
-/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
-/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
-/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
-//
-/// Internally, this function uses the `chrono` library for the
-/// datetime parsing
-///
-/// We hope to extend this function in the future with a second
-/// parameter to specifying the format string.
-///
-/// ## Timestamp Precision
-///
-/// DataFusion uses the maximum precision timestamps supported by
-/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
-/// means the range of dates that timestamps can represent is ~1677 AD
-/// to 2262 AM
-///
-///
-/// ## Timezone / Offset Handling
-///
-/// By using the Arrow format, DataFusion inherits Arrow’s handling of
-/// timestamp values. Specifically, the stored numerical values of
-/// timestamps are stored compared to offset UTC.
-///
-/// This function intertprets strings without an explicit time zone as
-/// timestamps with offsets of the local time on the machine that ran
-/// the datafusion query
-///
-/// For example, `1997-01-31 09:26:56.123Z` is interpreted as UTC, as
-/// it has an explicit timezone specifier (“Z” for Zulu/UTC)
-///
-/// `1997-01-31T09:26:56.123` is interpreted as a local timestamp in
-/// the timezone of the machine that ran DataFusion. For example, if
-/// the system timezone is set to Americas/New_York (UTC-5) the
-/// timestamp will be interpreted as though it were
-/// `1997-01-31T09:26:56.123-05:00`
-fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
-    // Fast path:  RFC3339 timestamp (with a T)
-    // Example: 2020-09-08T13:42:29.190855Z
-    if let Ok(ts) = DateTime::parse_from_rfc3339(s) {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Implement quasi-RFC3339 support by trying to parse the
-    // timestamp with various other format specifiers to to support
-    // separating the date and time with a space ' ' rather than 'T' to be
-    // (more) compatible with Apache Spark SQL
-
-    // timezone offset, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855-05:00
-    if let Ok(ts) = DateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f%:z") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // with an explicit Z, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29Z
-    if let Ok(ts) = Utc.datetime_from_str(s, "%Y-%m-%d %H:%M:%S%.fZ") {
-        return Ok(ts.timestamp_nanos());
-    }
-
-    // Support timestamps without an explicit timezone offset, again
-    // to be compatible with what Apache Spark SQL does.
-
-    // without a timezone specifier as a local time, using T as a separator
-    // Example: 2020-09-08T13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using T as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08T13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a separator
-    // Example: 2020-09-08 13:42:29.190855
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S.%f") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // without a timezone specifier as a local time, using ' ' as a
-    // separator, no fractional seconds
-    // Example: 2020-09-08 13:42:29
-    if let Ok(ts) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
-        return naive_datetime_to_timestamp(s, ts);
-    }
-
-    // Note we don't pass along the error message from the underlying
-    // chrono parsing because we tried several different format
-    // strings and we don't know which the user was trying to
-    // match. Ths any of the specific error messages is likely to be
-    // be more confusing than helpful
-    Err(DataFusionError::Execution(format!(
-        "Error parsing '{}' as timestamp",
-        s
-    )))
-}
-
-/// Converts the naive datetime (which has no specific timezone) to a
-/// nanosecond epoch timestamp relative to UTC.
-fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64> {
-    let l = Local {};
-
-    match l.from_local_datetime(&datetime) {
-        LocalResult::None => Err(DataFusionError::Execution(format!(
-            "Error parsing '{}' as timestamp: local time representation is invalid",
-            s
-        ))),
-        LocalResult::Single(local_datetime) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-        // Ambiguous times can happen if the timestamp is exactly when
-        // a daylight savings time transition occurs, for example, and
-        // so the datetime could validly be said to be in two
-        // potential offsets. However, since we are about to convert
-        // to UTC anyways, we can pick one arbitrarily
-        LocalResult::Ambiguous(local_datetime, _) => {
-            Ok(local_datetime.with_timezone(&Utc).timestamp_nanos())
-        }
-    }
-}
-
-// given a function `op` that maps a `&str` to a Result of an arrow native type,
-// returns a `PrimitiveArray` after the application
-// of the function to `args[0]`.
-/// # Errors
-/// This function errors iff:
-/// * the number of arguments is not 1 or
-/// * the first argument is not castable to a `GenericStringArray` or
-/// * the function `op` errors
-pub(crate) fn unary_string_to_primitive_function<'a, T, O, F>(
-    args: &[&'a dyn Array],
-    op: F,
-    name: &str,
-) -> Result<PrimitiveArray<O>>
-where
-    O: ArrowPrimitiveType,
-    T: StringOffsetSizeTrait,
-    F: Fn(&'a str) -> Result<O::Native>,
-{
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but {} takes exactly one argument",
-            args.len(),
-            name,
-        )));
-    }
-
-    let array = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("failed to downcast to string".to_string())
-        })?;
-
-    // first map is the iterator, second is for the `Option<_>`
-    array.iter().map(|x| x.map(|x| op(x)).transpose()).collect()
-}
-
-// given an function that maps a `&str` to a arrow native type,
-// returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue`
-// depending on the `args`'s variant.
-fn handle<'a, O, F, S>(
-    args: &'a [ColumnarValue],
-    op: F,
-    name: &str,
-) -> Result<ColumnarValue>
-where
-    O: ArrowPrimitiveType,
-    S: ScalarType<O::Native>,
-    F: Fn(&'a str) -> Result<O::Native>,
-{
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i32, O, _>(&[a.as_ref()], op, name)?,
-            ))),
-            DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i64, O, _>(&[a.as_ref()], op, name)?,
-            ))),
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name
-            ))),
-        },
-    }
-}
-
-/// to_timestamp SQL function
-pub fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampNanosecondType, _, TimestampNanosecondType>(
-        args,
-        string_to_timestamp_nanos,
-        "to_timestamp",
-    )
-}
-
-fn date_trunc_single(granularity: &str, value: i64) -> Result<i64> {
-    let value = timestamp_ns_to_datetime(value).with_nanosecond(0);
-    let value = match granularity {
-        "second" => value,
-        "minute" => value.and_then(|d| d.with_second(0)),
-        "hour" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0)),
-        "day" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0)),
-        "week" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0))
-            .map(|d| d - Duration::seconds(60 * 60 * 24 * d.weekday() as i64)),
-        "month" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0))
-            .and_then(|d| d.with_day0(0)),
-        "year" => value
-            .and_then(|d| d.with_second(0))
-            .and_then(|d| d.with_minute(0))
-            .and_then(|d| d.with_hour(0))
-            .and_then(|d| d.with_day0(0))
-            .and_then(|d| d.with_month0(0)),
-        unsupported => {
-            return Err(DataFusionError::Execution(format!(
-                "Unsupported date_trunc granularity: {}",
-                unsupported
-            )))
-        }
-    };
-    // `with_x(0)` are infalible because `0` are always a valid
-    Ok(value.unwrap().timestamp_nanos())
-}
-
-/// date_trunc SQL function
-pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let (granularity, array) = (&args[0], &args[1]);
-
-    let granularity =
-        if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = granularity {
-            v
-        } else {
-            return Err(DataFusionError::Execution(
-                "Granularity of `date_trunc` must be non-null scalar Utf8".to_string(),
-            ));
-        };
-
-    let f = |x: Option<i64>| x.map(|x| date_trunc_single(granularity, x)).transpose();
-
-    Ok(match array {
-        ColumnarValue::Scalar(scalar) => {
-            if let ScalarValue::TimestampNanosecond(v) = scalar {
-                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond((f)(*v)?))
-            } else {
-                return Err(DataFusionError::Execution(
-                    "array of `date_trunc` must be non-null scalar Utf8".to_string(),
-                ));
-            }
-        }
-        ColumnarValue::Array(array) => {
-            let array = array
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            let array = array
-                .iter()
-                .map(f)
-                .collect::<Result<TimestampNanosecondArray>>()?;
-
-            ColumnarValue::Array(Arc::new(array))
-        }
-    })
-}
-
-macro_rules! extract_date_part {
-    ($ARRAY: expr, $FN:expr) => {
-        match $ARRAY.data_type() {
-            DataType::Date32 => {
-                let array = $ARRAY.as_any().downcast_ref::<Date32Array>().unwrap();
-                Ok($FN(array)?)
-            }
-            DataType::Date64 => {
-                let array = $ARRAY.as_any().downcast_ref::<Date64Array>().unwrap();
-                Ok($FN(array)?)
-            }
-            DataType::Timestamp(time_unit, None) => match time_unit {
-                TimeUnit::Second => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampSecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Millisecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampMillisecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Microsecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampMicrosecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Nanosecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampNanosecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-            },
-            datatype => Err(DataFusionError::Internal(format!(
-                "Extract does not support datatype {:?}",
-                datatype
-            ))),
-        }
-    };
-}
-
-/// DATE_PART SQL function
-pub fn date_part(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 2 {
-        return Err(DataFusionError::Execution(
-            "Expected two arguments in DATE_PART".to_string(),
-        ));
-    }
-    let (date_part, array) = (&args[0], &args[1]);
-
-    let date_part = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) = date_part {
-        v
-    } else {
-        return Err(DataFusionError::Execution(
-            "First argument of `DATE_PART` must be non-null scalar Utf8".to_string(),
-        ));
-    };
-
-    let is_scalar = matches!(array, ColumnarValue::Scalar(_));
-
-    let array = match array {
-        ColumnarValue::Array(array) => array.clone(),
-        ColumnarValue::Scalar(scalar) => scalar.to_array(),
-    };
-
-    let arr = match date_part.to_lowercase().as_str() {
-        "hour" => extract_date_part!(array, temporal::hour),
-        "year" => extract_date_part!(array, temporal::year),
-        _ => Err(DataFusionError::Execution(format!(
-            "Date part '{}' not supported",
-            date_part
-        ))),
-    }?;
-
-    Ok(if is_scalar {
-        ColumnarValue::Scalar(ScalarValue::try_from_array(
-            &(Arc::new(arr) as ArrayRef),
-            0,
-        )?)
-    } else {
-        ColumnarValue::Array(Arc::new(arr))
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
-
-    use super::*;
-
-    #[test]
-    fn to_timestamp_arrays_and_nulls() -> Result<()> {
-        // ensure that arrow array implementation is wired up and handles nulls correctly
-
-        let mut string_builder = StringBuilder::new(2);
-        let mut ts_builder = TimestampNanosecondArray::builder(2);
-
-        string_builder.append_value("2020-09-08T13:42:29.190855Z")?;
-        ts_builder.append_value(1599572549190855000)?;
-
-        string_builder.append_null()?;
-        ts_builder.append_null()?;
-        let expected_timestamps = &ts_builder.finish() as &dyn Array;
-
-        let string_array =
-            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
-        let parsed_timestamps = to_timestamp(&[string_array])
-            .expect("that to_timestamp parsed values without error");
-        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
-            assert_eq!(parsed_array.len(), 2);
-            assert_eq!(expected_timestamps, parsed_array.as_ref());
-        } else {
-            panic!("Expected a columnar array")
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn date_trunc_test() {
-        let cases = vec![
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "second",
-                "2020-09-08T13:42:29.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "minute",
-                "2020-09-08T13:42:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "hour",
-                "2020-09-08T13:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "day",
-                "2020-09-08T00:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "week",
-                "2020-09-07T00:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "month",
-                "2020-09-01T00:00:00.000000Z",
-            ),
-            (
-                "2020-09-08T13:42:29.190855Z",
-                "year",
-                "2020-01-01T00:00:00.000000Z",
-            ),
-            (
-                "2021-01-01T13:42:29.190855Z",
-                "week",
-                "2020-12-28T00:00:00.000000Z",
-            ),
-            (
-                "2020-01-01T13:42:29.190855Z",
-                "week",
-                "2019-12-30T00:00:00.000000Z",
-            ),
-        ];
-
-        cases.iter().for_each(|(original, granularity, expected)| {
-            let original = string_to_timestamp_nanos(original).unwrap();
-            let expected = string_to_timestamp_nanos(expected).unwrap();
-            let result = date_trunc_single(granularity, original).unwrap();
-            assert_eq!(result, expected);
-        });
-    }
-
-    #[test]
-    fn to_timestamp_invalid_input_type() -> Result<()> {
-        // pass the wrong type of input array to to_timestamp and test
-        // that we get an error.
-
-        let mut builder = Int64Array::builder(1);
-        builder.append_value(1)?;
-        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
-
-        let expected_err =
-            "Internal error: Unsupported data type Int64 for function to_timestamp";
-        match to_timestamp(&[int64array]) {
-            Ok(_) => panic!("Expected error but got success"),
-            Err(e) => {
-                assert!(
-                    e.to_string().contains(expected_err),
-                    "Can not find expected error '{}'. Actual error '{}'",
-                    expected_err,
-                    e
-                );
-            }
-        }
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/distinct_expressions.rs b/rust/datafusion/src/physical_plan/distinct_expressions.rs
deleted file mode 100644
index 8534e9c8805..00000000000
--- a/rust/datafusion/src/physical_plan/distinct_expressions.rs
+++ /dev/null
@@ -1,557 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Implementations for DISTINCT expressions, e.g. `COUNT(DISTINCT c)`
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::fmt::Debug;
-use std::hash::Hash;
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Field};
-
-use ahash::RandomState;
-use std::collections::HashSet;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::group_scalar::GroupByScalar;
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
-struct DistinctScalarValues(Vec<GroupByScalar>);
-
-fn format_state_name(name: &str, state_name: &str) -> String {
-    format!("{}[{}]", name, state_name)
-}
-
-/// Expression for a COUNT(DISTINCT) aggregation.
-#[derive(Debug)]
-pub struct DistinctCount {
-    /// Column name
-    name: String,
-    /// The DataType for the final count
-    data_type: DataType,
-    /// The DataType for each input argument
-    input_data_types: Vec<DataType>,
-    /// The input arguments
-    exprs: Vec<Arc<dyn PhysicalExpr>>,
-}
-
-impl DistinctCount {
-    /// Create a new COUNT(DISTINCT) aggregate function.
-    pub fn new(
-        input_data_types: Vec<DataType>,
-        exprs: Vec<Arc<dyn PhysicalExpr>>,
-        name: String,
-        data_type: DataType,
-    ) -> Self {
-        Self {
-            input_data_types,
-            exprs,
-            name,
-            data_type,
-        }
-    }
-}
-
-impl AggregateExpr for DistinctCount {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(self
-            .input_data_types
-            .iter()
-            .map(|data_type| {
-                Field::new(
-                    &format_state_name(&self.name, "count distinct"),
-                    DataType::List(Box::new(Field::new("item", data_type.clone(), true))),
-                    false,
-                )
-            })
-            .collect::<Vec<_>>())
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.exprs.clone()
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(DistinctCountAccumulator {
-            values: HashSet::default(),
-            data_types: self.input_data_types.clone(),
-            count_data_type: self.data_type.clone(),
-        }))
-    }
-}
-
-#[derive(Debug)]
-struct DistinctCountAccumulator {
-    values: HashSet<DistinctScalarValues, RandomState>,
-    data_types: Vec<DataType>,
-    count_data_type: DataType,
-}
-
-impl Accumulator for DistinctCountAccumulator {
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        // If a row has a NULL, it is not included in the final count.
-        if !values.iter().any(|v| v.is_null()) {
-            self.values.insert(DistinctScalarValues(
-                values
-                    .iter()
-                    .map(GroupByScalar::try_from)
-                    .collect::<Result<Vec<_>>>()?,
-            ));
-        }
-
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        }
-
-        let col_values = states
-            .iter()
-            .map(|state| match state {
-                ScalarValue::List(Some(values), _) => Ok(values),
-                _ => Err(DataFusionError::Internal(format!(
-                    "Unexpected accumulator state {:?}",
-                    state
-                ))),
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        (0..col_values[0].len()).try_for_each(|row_index| {
-            let row_values = col_values
-                .iter()
-                .map(|col| col[row_index].clone())
-                .collect::<Vec<_>>();
-            self.update(&row_values)
-        })
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        let mut cols_out = self
-            .data_types
-            .iter()
-            .map(|data_type| ScalarValue::List(Some(Vec::new()), data_type.clone()))
-            .collect::<Vec<_>>();
-
-        let mut cols_vec = cols_out
-            .iter_mut()
-            .map(|c| match c {
-                ScalarValue::List(Some(ref mut v), _) => v,
-                _ => unreachable!(),
-            })
-            .collect::<Vec<_>>();
-
-        self.values.iter().for_each(|distinct_values| {
-            distinct_values.0.iter().enumerate().for_each(
-                |(col_index, distinct_value)| {
-                    cols_vec[col_index].push(ScalarValue::from(distinct_value));
-                },
-            )
-        });
-
-        Ok(cols_out)
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        match &self.count_data_type {
-            DataType::UInt64 => Ok(ScalarValue::UInt64(Some(self.values.len() as u64))),
-            t => Err(DataFusionError::Internal(format!(
-                "Invalid data type {:?} for count distinct aggregation",
-                t
-            ))),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use arrow::array::ArrayRef;
-    use arrow::array::{
-        Int16Array, Int32Array, Int64Array, Int8Array, ListArray, UInt16Array,
-        UInt32Array, UInt64Array, UInt8Array,
-    };
-    use arrow::array::{Int32Builder, ListBuilder, UInt64Builder};
-    use arrow::datatypes::DataType;
-
-    macro_rules! build_list {
-        ($LISTS:expr, $BUILDER_TYPE:ident) => {{
-            let mut builder = ListBuilder::new($BUILDER_TYPE::new(0));
-            for list in $LISTS.iter() {
-                match list {
-                    Some(values) => {
-                        for value in values.iter() {
-                            match value {
-                                Some(v) => builder.values().append_value((*v).into())?,
-                                None => builder.values().append_null()?,
-                            }
-                        }
-
-                        builder.append(true)?;
-                    }
-                    None => {
-                        builder.append(false)?;
-                    }
-                }
-            }
-
-            let array = Arc::new(builder.finish()) as ArrayRef;
-
-            Ok(array) as Result<ArrayRef>
-        }};
-    }
-
-    macro_rules! state_to_vec {
-        ($LIST:expr, $DATA_TYPE:ident, $PRIM_TY:ty) => {{
-            match $LIST {
-                ScalarValue::List(_, data_type) => match data_type {
-                    DataType::$DATA_TYPE => (),
-                    _ => panic!("Unexpected DataType for list"),
-                },
-                _ => panic!("Expected a ScalarValue::List"),
-            }
-
-            match $LIST {
-                ScalarValue::List(None, _) => None,
-                ScalarValue::List(Some(scalar_values), _) => {
-                    let vec = scalar_values
-                        .iter()
-                        .map(|scalar_value| match scalar_value {
-                            ScalarValue::$DATA_TYPE(value) => *value,
-                            _ => panic!("Unexpected ScalarValue variant"),
-                        })
-                        .collect::<Vec<Option<$PRIM_TY>>>();
-
-                    Some(vec)
-                }
-                _ => unreachable!(),
-            }
-        }};
-    }
-
-    fn collect_states<T: Ord + Clone, S: Ord + Clone>(
-        state1: &[Option<T>],
-        state2: &[Option<S>],
-    ) -> Vec<(Option<T>, Option<S>)> {
-        let mut states = state1
-            .iter()
-            .zip(state2.iter())
-            .map(|(l, r)| (l.clone(), r.clone()))
-            .collect::<Vec<(Option<T>, Option<S>)>>();
-        states.sort();
-        states
-    }
-
-    fn run_update_batch(arrays: &[ArrayRef]) -> Result<(Vec<ScalarValue>, ScalarValue)> {
-        let agg = DistinctCount::new(
-            arrays
-                .iter()
-                .map(|a| a.data_type().clone())
-                .collect::<Vec<_>>(),
-            vec![],
-            String::from("__col_name__"),
-            DataType::UInt64,
-        );
-
-        let mut accum = agg.create_accumulator()?;
-        accum.update_batch(arrays)?;
-
-        Ok((accum.state()?, accum.evaluate()?))
-    }
-
-    fn run_update(
-        data_types: &[DataType],
-        rows: &[Vec<ScalarValue>],
-    ) -> Result<(Vec<ScalarValue>, ScalarValue)> {
-        let agg = DistinctCount::new(
-            data_types.to_vec(),
-            vec![],
-            String::from("__col_name__"),
-            DataType::UInt64,
-        );
-
-        let mut accum = agg.create_accumulator()?;
-
-        for row in rows.iter() {
-            accum.update(row)?
-        }
-
-        Ok((accum.state()?, accum.evaluate()?))
-    }
-
-    fn run_merge_batch(arrays: &[ArrayRef]) -> Result<(Vec<ScalarValue>, ScalarValue)> {
-        let agg = DistinctCount::new(
-            arrays
-                .iter()
-                .map(|a| a.as_any().downcast_ref::<ListArray>().unwrap())
-                .map(|a| a.values().data_type().clone())
-                .collect::<Vec<_>>(),
-            vec![],
-            String::from("__col_name__"),
-            DataType::UInt64,
-        );
-
-        let mut accum = agg.create_accumulator()?;
-        accum.merge_batch(arrays)?;
-
-        Ok((accum.state()?, accum.evaluate()?))
-    }
-
-    macro_rules! test_count_distinct_update_batch_numeric {
-        ($ARRAY_TYPE:ident, $DATA_TYPE:ident, $PRIM_TYPE:ty) => {{
-            let values: Vec<Option<$PRIM_TYPE>> = vec![
-                Some(1),
-                Some(1),
-                None,
-                Some(3),
-                Some(2),
-                None,
-                Some(2),
-                Some(3),
-                Some(1),
-            ];
-
-            let arrays = vec![Arc::new($ARRAY_TYPE::from(values)) as ArrayRef];
-
-            let (states, result) = run_update_batch(&arrays)?;
-
-            let mut state_vec =
-                state_to_vec!(&states[0], $DATA_TYPE, $PRIM_TYPE).unwrap();
-            state_vec.sort();
-
-            assert_eq!(states.len(), 1);
-            assert_eq!(state_vec, vec![Some(1), Some(2), Some(3)]);
-            assert_eq!(result, ScalarValue::UInt64(Some(3)));
-
-            Ok(())
-        }};
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i8() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int8Array, Int8, i8)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i16() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int16Array, Int16, i16)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i32() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int32Array, Int32, i32)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_i64() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(Int64Array, Int64, i64)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u8() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt8Array, UInt8, u8)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u16() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt16Array, UInt16, u16)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u32() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt32Array, UInt32, u32)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_u64() -> Result<()> {
-        test_count_distinct_update_batch_numeric!(UInt64Array, UInt64, u64)
-    }
-
-    #[test]
-    fn count_distinct_update_batch_all_nulls() -> Result<()> {
-        let arrays = vec![Arc::new(Int32Array::from(
-            vec![None, None, None, None] as Vec<Option<i32>>
-        )) as ArrayRef];
-
-        let (states, result) = run_update_batch(&arrays)?;
-
-        assert_eq!(states.len(), 1);
-        assert_eq!(state_to_vec!(&states[0], Int32, i32), Some(vec![]));
-        assert_eq!(result, ScalarValue::UInt64(Some(0)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update_batch_empty() -> Result<()> {
-        let arrays =
-            vec![Arc::new(Int32Array::from(vec![] as Vec<Option<i32>>)) as ArrayRef];
-
-        let (states, result) = run_update_batch(&arrays)?;
-
-        assert_eq!(states.len(), 1);
-        assert_eq!(state_to_vec!(&states[0], Int32, i32), Some(vec![]));
-        assert_eq!(result, ScalarValue::UInt64(Some(0)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update_batch_multiple_columns() -> Result<()> {
-        let array_int8: ArrayRef = Arc::new(Int8Array::from(vec![1, 1, 2]));
-        let array_int16: ArrayRef = Arc::new(Int16Array::from(vec![3, 3, 4]));
-        let arrays = vec![array_int8, array_int16];
-
-        let (states, result) = run_update_batch(&arrays)?;
-
-        let state_vec1 = state_to_vec!(&states[0], Int8, i8).unwrap();
-        let state_vec2 = state_to_vec!(&states[1], Int16, i16).unwrap();
-        let state_pairs = collect_states::<i8, i16>(&state_vec1, &state_vec2);
-
-        assert_eq!(states.len(), 2);
-        assert_eq!(
-            state_pairs,
-            vec![(Some(1_i8), Some(3_i16)), (Some(2_i8), Some(4_i16))]
-        );
-
-        assert_eq!(result, ScalarValue::UInt64(Some(2)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update() -> Result<()> {
-        let (states, result) = run_update(
-            &[DataType::Int32, DataType::UInt64],
-            &[
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(5)), ScalarValue::UInt64(Some(1))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(5)), ScalarValue::UInt64(Some(1))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(6))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(7))],
-                vec![ScalarValue::Int32(Some(2)), ScalarValue::UInt64(Some(7))],
-            ],
-        )?;
-
-        let state_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
-        let state_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
-        let state_pairs = collect_states::<i32, u64>(&state_vec1, &state_vec2);
-
-        assert_eq!(states.len(), 2);
-        assert_eq!(
-            state_pairs,
-            vec![
-                (Some(-1_i32), Some(5_u64)),
-                (Some(-1_i32), Some(6_u64)),
-                (Some(-1_i32), Some(7_u64)),
-                (Some(2_i32), Some(7_u64)),
-                (Some(5_i32), Some(1_u64)),
-            ]
-        );
-        assert_eq!(result, ScalarValue::UInt64(Some(5)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_update_with_nulls() -> Result<()> {
-        let (states, result) = run_update(
-            &[DataType::Int32, DataType::UInt64],
-            &[
-                // None of these updates contains a None, so these are accumulated.
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(Some(-2)), ScalarValue::UInt64(Some(5))],
-                // Each of these updates contains at least one None, so these
-                // won't be accumulated.
-                vec![ScalarValue::Int32(Some(-1)), ScalarValue::UInt64(None)],
-                vec![ScalarValue::Int32(None), ScalarValue::UInt64(Some(5))],
-                vec![ScalarValue::Int32(None), ScalarValue::UInt64(None)],
-            ],
-        )?;
-
-        let state_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
-        let state_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
-        let state_pairs = collect_states::<i32, u64>(&state_vec1, &state_vec2);
-
-        assert_eq!(states.len(), 2);
-        assert_eq!(
-            state_pairs,
-            vec![(Some(-2_i32), Some(5_u64)), (Some(-1_i32), Some(5_u64))]
-        );
-
-        assert_eq!(result, ScalarValue::UInt64(Some(2)));
-
-        Ok(())
-    }
-
-    #[test]
-    fn count_distinct_merge_batch() -> Result<()> {
-        let state_in1 = build_list!(
-            vec![
-                Some(vec![Some(-1_i32), Some(-1_i32), Some(-2_i32), Some(-2_i32)]),
-                Some(vec![Some(-2_i32), Some(-3_i32)]),
-            ],
-            Int32Builder
-        )?;
-
-        let state_in2 = build_list!(
-            vec![
-                Some(vec![Some(5_u64), Some(6_u64), Some(5_u64), Some(7_u64)]),
-                Some(vec![Some(5_u64), Some(7_u64)]),
-            ],
-            UInt64Builder
-        )?;
-
-        let (states, result) = run_merge_batch(&[state_in1, state_in2])?;
-
-        let state_out_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
-        let state_out_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
-        let state_pairs = collect_states::<i32, u64>(&state_out_vec1, &state_out_vec2);
-
-        assert_eq!(
-            state_pairs,
-            vec![
-                (Some(-3_i32), Some(7_u64)),
-                (Some(-2_i32), Some(5_u64)),
-                (Some(-2_i32), Some(7_u64)),
-                (Some(-1_i32), Some(5_u64)),
-                (Some(-1_i32), Some(6_u64)),
-            ]
-        );
-
-        assert_eq!(result, ScalarValue::UInt64(Some(5)));
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/empty.rs b/rust/datafusion/src/physical_plan/empty.rs
deleted file mode 100644
index 3011b289507..00000000000
--- a/rust/datafusion/src/physical_plan/empty.rs
+++ /dev/null
@@ -1,186 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! EmptyRelation execution plan
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::memory::MemoryStream;
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
-use arrow::array::NullArray;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-
-use super::SendableRecordBatchStream;
-
-use async_trait::async_trait;
-
-/// Execution plan for empty relation (produces no rows)
-#[derive(Debug)]
-pub struct EmptyExec {
-    /// Specifies whether this exec produces a row or not
-    produce_one_row: bool,
-    /// The schema for the produced row
-    schema: SchemaRef,
-}
-
-impl EmptyExec {
-    /// Create a new EmptyExec
-    pub fn new(produce_one_row: bool, schema: SchemaRef) -> Self {
-        EmptyExec {
-            produce_one_row,
-            schema,
-        }
-    }
-
-    /// Specifies whether this exec produces a row or not
-    pub fn produce_one_row(&self) -> bool {
-        self.produce_one_row
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for EmptyExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            0 => Ok(Arc::new(EmptyExec::new(false, self.schema.clone()))),
-            _ => Err(DataFusionError::Internal(
-                "EmptyExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // GlobalLimitExec has a single output partition
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "EmptyExec invalid partition {} (expected 0)",
-                partition
-            )));
-        }
-
-        // Makes a stream only contains one null element if needed
-        let data = if self.produce_one_row {
-            vec![RecordBatch::try_new(
-                Arc::new(Schema::new(vec![Field::new(
-                    "placeholder",
-                    DataType::Null,
-                    true,
-                )])),
-                vec![Arc::new(NullArray::new(1))],
-            )?]
-        } else {
-            vec![]
-        };
-
-        Ok(Box::pin(MemoryStream::try_new(
-            data,
-            self.schema.clone(),
-            None,
-        )?))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::common;
-    use crate::test;
-
-    #[tokio::test]
-    async fn empty() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let empty = EmptyExec::new(false, schema.clone());
-        assert_eq!(empty.schema(), schema);
-
-        // we should have no results
-        let iter = empty.execute(0).await?;
-        let batches = common::collect(iter).await?;
-        assert!(batches.is_empty());
-
-        Ok(())
-    }
-
-    #[test]
-    fn with_new_children() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let empty = EmptyExec::new(false, schema);
-
-        let empty2 = empty.with_new_children(vec![])?;
-        assert_eq!(empty.schema(), empty2.schema());
-
-        let too_many_kids = vec![empty2];
-        assert!(
-            empty.with_new_children(too_many_kids).is_err(),
-            "expected error when providing list of kids"
-        );
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn invalid_execute() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let empty = EmptyExec::new(false, schema);
-
-        // ask for the wrong partition
-        assert!(empty.execute(1).await.is_err());
-        assert!(empty.execute(20).await.is_err());
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn produce_one_row() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let empty = EmptyExec::new(true, schema);
-
-        let iter = empty.execute(0).await?;
-        let batches = common::collect(iter).await?;
-
-        // should have one item
-        assert_eq!(batches.len(), 1);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/explain.rs b/rust/datafusion/src/physical_plan/explain.rs
deleted file mode 100644
index 26d2c94dc80..00000000000
--- a/rust/datafusion/src/physical_plan/explain.rs
+++ /dev/null
@@ -1,125 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the EXPLAIN operator
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::{
-    logical_plan::StringifiedPlan,
-    physical_plan::{common::SizedRecordBatchStream, ExecutionPlan},
-};
-use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
-
-use crate::physical_plan::Partitioning;
-
-use super::SendableRecordBatchStream;
-use async_trait::async_trait;
-
-/// Explain execution plan operator. This operator contains the string
-/// values of the various plans it has when it is created, and passes
-/// them to its output.
-#[derive(Debug, Clone)]
-pub struct ExplainExec {
-    /// The schema that this exec plan node outputs
-    schema: SchemaRef,
-    /// The strings to be printed
-    stringified_plans: Vec<StringifiedPlan>,
-}
-
-impl ExplainExec {
-    /// Create a new ExplainExec
-    pub fn new(schema: SchemaRef, stringified_plans: Vec<StringifiedPlan>) -> Self {
-        ExplainExec {
-            schema,
-            stringified_plans,
-        }
-    }
-
-    /// The strings to be printed
-    pub fn stringified_plans(&self) -> &[StringifiedPlan] {
-        &self.stringified_plans
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for ExplainExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "ExplainExec invalid partition {}",
-                partition
-            )));
-        }
-
-        let mut type_builder = StringBuilder::new(self.stringified_plans.len());
-        let mut plan_builder = StringBuilder::new(self.stringified_plans.len());
-
-        for p in &self.stringified_plans {
-            type_builder.append_value(&String::from(&p.plan_type))?;
-            plan_builder.append_value(&*p.plan)?;
-        }
-
-        let record_batch = RecordBatch::try_new(
-            self.schema.clone(),
-            vec![
-                Arc::new(type_builder.finish()),
-                Arc::new(plan_builder.finish()),
-            ],
-        )?;
-
-        Ok(Box::pin(SizedRecordBatchStream::new(
-            self.schema.clone(),
-            vec![Arc::new(record_batch)],
-        )))
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/average.rs b/rust/datafusion/src/physical_plan/expressions/average.rs
deleted file mode 100644
index 38644129dcd..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/average.rs
+++ /dev/null
@@ -1,293 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{ArrayRef, UInt64Array},
-    datatypes::Field,
-};
-
-use super::{format_state_name, sum};
-
-/// AVG aggregate expression
-#[derive(Debug)]
-pub struct Avg {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-/// function return type of an average
-pub fn avg_return_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::Int64
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64
-        | DataType::Float32
-        | DataType::Float64 => Ok(DataType::Float64),
-        other => Err(DataFusionError::Plan(format!(
-            "AVG does not support {:?}",
-            other
-        ))),
-    }
-}
-
-impl Avg {
-    /// Create a new AVG aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Avg {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, DataType::Float64, true))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![
-            Field::new(
-                &format_state_name(&self.name, "count"),
-                DataType::UInt64,
-                true,
-            ),
-            Field::new(
-                &format_state_name(&self.name, "sum"),
-                DataType::Float64,
-                true,
-            ),
-        ])
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(AvgAccumulator::try_new(
-            // avg is f64
-            &DataType::Float64,
-        )?))
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-}
-
-/// An accumulator to compute the average
-#[derive(Debug)]
-pub struct AvgAccumulator {
-    // sum is used for null
-    sum: ScalarValue,
-    count: u64,
-}
-
-impl AvgAccumulator {
-    /// Creates a new `AvgAccumulator`
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            sum: ScalarValue::try_from(datatype)?,
-            count: 0,
-        })
-    }
-}
-
-impl Accumulator for AvgAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![ScalarValue::from(self.count), self.sum.clone()])
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let values = &values[0];
-
-        self.count += (!values.is_null()) as u64;
-        self.sum = sum::sum(&self.sum, values)?;
-
-        Ok(())
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-
-        self.count += (values.len() - values.data().null_count()) as u64;
-        self.sum = sum::sum(&self.sum, &sum::sum_batch(values)?)?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        let count = &states[0];
-        // counts are summed
-        if let ScalarValue::UInt64(Some(c)) = count {
-            self.count += c
-        } else {
-            unreachable!()
-        };
-
-        // sums are summed
-        self.sum = sum::sum(&self.sum, &states[1])?;
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        let counts = states[0].as_any().downcast_ref::<UInt64Array>().unwrap();
-        // counts are summed
-        self.count += compute::sum(counts).unwrap_or(0);
-
-        // sums are summed
-        self.sum = sum::sum(&self.sum, &sum::sum_batch(&states[1])?)?;
-        Ok(())
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        match self.sum {
-            ScalarValue::Float64(e) => {
-                Ok(ScalarValue::Float64(e.map(|f| f / self.count as f64)))
-            }
-            _ => Err(DataFusionError::Internal(
-                "Sum should be f64 on average".to_string(),
-            )),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::{error::Result, generic_test_op};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, datatypes::*};
-
-    #[test]
-    fn avg_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Avg,
-            ScalarValue::from(3_f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Avg,
-            ScalarValue::from(3.25f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Avg,
-            ScalarValue::Float64(None),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Avg,
-            ScalarValue::from(3.0f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Avg,
-            ScalarValue::from(3_f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn avg_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Avg,
-            ScalarValue::from(3_f64),
-            DataType::Float64
-        )
-    }
-
-    fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/binary.rs b/rust/datafusion/src/physical_plan/expressions/binary.rs
deleted file mode 100644
index 5c2d9ce02f5..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/binary.rs
+++ /dev/null
@@ -1,1101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{any::Any, sync::Arc};
-
-use arrow::array::*;
-use arrow::compute::kernels::arithmetic::{
-    add, divide, divide_scalar, multiply, subtract,
-};
-use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
-use arrow::compute::kernels::comparison::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow::compute::kernels::comparison::{
-    eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
-};
-use arrow::compute::kernels::comparison::{
-    eq_utf8, gt_eq_utf8, gt_utf8, like_utf8, like_utf8_scalar, lt_eq_utf8, lt_utf8,
-    neq_utf8, nlike_utf8, nlike_utf8_scalar,
-};
-use arrow::compute::kernels::comparison::{
-    eq_utf8_scalar, gt_eq_utf8_scalar, gt_utf8_scalar, lt_eq_utf8_scalar, lt_utf8_scalar,
-    neq_utf8_scalar,
-};
-use arrow::datatypes::{DataType, Schema, TimeUnit};
-use arrow::record_batch::RecordBatch;
-
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Operator;
-use crate::physical_plan::expressions::try_cast;
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::scalar::ScalarValue;
-
-use super::coercion::{eq_coercion, numerical_coercion, order_coercion, string_coercion};
-
-/// Binary expression
-#[derive(Debug)]
-pub struct BinaryExpr {
-    left: Arc<dyn PhysicalExpr>,
-    op: Operator,
-    right: Arc<dyn PhysicalExpr>,
-}
-
-impl BinaryExpr {
-    /// Create new binary expression
-    pub fn new(
-        left: Arc<dyn PhysicalExpr>,
-        op: Operator,
-        right: Arc<dyn PhysicalExpr>,
-    ) -> Self {
-        Self { left, op, right }
-    }
-
-    /// Get the left side of the binary expression
-    pub fn left(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.left
-    }
-
-    /// Get the right side of the binary expression
-    pub fn right(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.right
-    }
-
-    /// Get the operator for this binary expression
-    pub fn op(&self) -> &Operator {
-        &self.op
-    }
-}
-
-impl std::fmt::Display for BinaryExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} {} {}", self.left, self.op, self.right)
-    }
-}
-
-/// Invoke a compute kernel on a pair of binary data arrays
-macro_rules! compute_utf8_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new(paste::expr! {[<$OP _utf8>]}(&ll, &rr)?))
-    }};
-}
-
-/// Invoke a compute kernel on a data array and a scalar value
-macro_rules! compute_utf8_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        if let ScalarValue::Utf8(Some(string_value)) = $RIGHT {
-            Ok(Arc::new(paste::expr! {[<$OP _utf8_scalar>]}(
-                &ll,
-                &string_value,
-            )?))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "compute_utf8_op_scalar failed to cast literal value {}",
-                $RIGHT
-            )))
-        }
-    }};
-}
-
-/// Invoke a compute kernel on a data array and a scalar value
-macro_rules! compute_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        use std::convert::TryInto;
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        // generate the scalar function name, such as lt_scalar, from the $OP parameter
-        // (which could have a value of lt) and the suffix _scalar
-        Ok(Arc::new(paste::expr! {[<$OP _scalar>]}(
-            &ll,
-            $RIGHT.try_into()?,
-        )?))
-    }};
-}
-
-/// Invoke a compute kernel on array(s)
-macro_rules! compute_op {
-    // invoke binary operator
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?))
-    }};
-    // invoke unary operator
-    ($OPERAND:expr, $OP:ident, $DT:ident) => {{
-        let operand = $OPERAND
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&operand)?))
-    }};
-}
-
-macro_rules! binary_string_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on string array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-macro_rules! binary_string_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on string arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a compute kernel on a pair of arrays
-/// The binary_primitive_array_op macro only evaluates for primitive types
-/// like integers and floats.
-macro_rules! binary_primitive_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on primitive arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a compute kernel on an array and a scalar
-/// The binary_primitive_array_op_scalar macro only evaluates for primitive
-/// types like integers and floats.
-macro_rules! binary_primitive_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Int8 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on primitive array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-/// The binary_array_op_scalar macro includes types that extend beyond the primitive,
-/// such as Utf8 strings.
-#[macro_export]
-macro_rules! binary_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Int8 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
-            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                compute_op_scalar!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
-            }
-            DataType::Date32 => {
-                compute_op_scalar!($LEFT, $RIGHT, $OP, Date32Array)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on dyn array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-/// The binary_array_op macro includes types that extend beyond the primitive,
-/// such as Utf8 strings.
-#[macro_export]
-macro_rules! binary_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op!($LEFT, $RIGHT, $OP, Float64Array),
-            DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                compute_op!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
-            }
-            DataType::Date32 => {
-                compute_op!($LEFT, $RIGHT, $OP, Date32Array)
-            }
-            DataType::Date64 => {
-                compute_op!($LEFT, $RIGHT, $OP, Date64Array)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on dyn arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a boolean kernel on a pair of arrays
-macro_rules! boolean_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("boolean_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("boolean_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?))
-    }};
-}
-
-/// Coercion rules for all binary operators. Returns the output type
-/// of applying `op` to an argument of `lhs_type` and `rhs_type`.
-fn common_binary_type(
-    lhs_type: &DataType,
-    op: &Operator,
-    rhs_type: &DataType,
-) -> Result<DataType> {
-    // This result MUST be compatible with `binary_coerce`
-    let result = match op {
-        Operator::And | Operator::Or => match (lhs_type, rhs_type) {
-            // logical binary boolean operators can only be evaluated in bools
-            (DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
-            _ => None,
-        },
-        // logical equality operators have their own rules, and always return a boolean
-        Operator::Eq | Operator::NotEq => eq_coercion(lhs_type, rhs_type),
-        // "like" operators operate on strings and always return a boolean
-        Operator::Like | Operator::NotLike => string_coercion(lhs_type, rhs_type),
-        // order-comparison operators have their own rules
-        Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
-            order_coercion(lhs_type, rhs_type)
-        }
-        // for math expressions, the final value of the coercion is also the return type
-        // because coercion favours higher information types
-        Operator::Plus | Operator::Minus | Operator::Divide | Operator::Multiply => {
-            numerical_coercion(lhs_type, rhs_type)
-        }
-        Operator::Modulus => {
-            return Err(DataFusionError::NotImplemented(
-                "Modulus operator is still not supported".to_string(),
-            ))
-        }
-    };
-
-    // re-write the error message of failed coercions to include the operator's information
-    match result {
-        None => Err(DataFusionError::Plan(
-            format!(
-                "'{:?} {} {:?}' can't be evaluated because there isn't a common type to coerce the types to",
-                lhs_type, op, rhs_type
-            ),
-        )),
-        Some(t) => Ok(t)
-    }
-}
-
-/// Returns the return type of a binary operator or an error when the binary operator cannot
-/// perform the computation between the argument's types, even after type coercion.
-///
-/// This function makes some assumptions about the underlying available computations.
-pub fn binary_operator_data_type(
-    lhs_type: &DataType,
-    op: &Operator,
-    rhs_type: &DataType,
-) -> Result<DataType> {
-    // validate that it is possible to perform the operation on incoming types.
-    // (or the return datatype cannot be infered)
-    let common_type = common_binary_type(lhs_type, op, rhs_type)?;
-
-    match op {
-        // operators that return a boolean
-        Operator::Eq
-        | Operator::NotEq
-        | Operator::And
-        | Operator::Or
-        | Operator::Like
-        | Operator::NotLike
-        | Operator::Lt
-        | Operator::Gt
-        | Operator::GtEq
-        | Operator::LtEq => Ok(DataType::Boolean),
-        // math operations return the same value as the common coerced type
-        Operator::Plus | Operator::Minus | Operator::Divide | Operator::Multiply => {
-            Ok(common_type)
-        }
-        Operator::Modulus => Err(DataFusionError::NotImplemented(
-            "Modulus operator is still not supported".to_string(),
-        )),
-    }
-}
-
-impl PhysicalExpr for BinaryExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        binary_operator_data_type(
-            &self.left.data_type(input_schema)?,
-            &self.op,
-            &self.right.data_type(input_schema)?,
-        )
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        Ok(self.left.nullable(input_schema)? || self.right.nullable(input_schema)?)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let left_value = self.left.evaluate(batch)?;
-        let right_value = self.right.evaluate(batch)?;
-        let left_data_type = left_value.data_type();
-        let right_data_type = right_value.data_type();
-
-        if left_data_type != right_data_type {
-            return Err(DataFusionError::Internal(format!(
-                "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                self.op, left_data_type, right_data_type
-            )));
-        }
-
-        let scalar_result = match (&left_value, &right_value) {
-            (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => {
-                // if left is array and right is literal - use scalar operations
-                match &self.op {
-                    Operator::Lt => binary_array_op_scalar!(array, scalar.clone(), lt),
-                    Operator::LtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), lt_eq)
-                    }
-                    Operator::Gt => binary_array_op_scalar!(array, scalar.clone(), gt),
-                    Operator::GtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), gt_eq)
-                    }
-                    Operator::Eq => binary_array_op_scalar!(array, scalar.clone(), eq),
-                    Operator::NotEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), neq)
-                    }
-                    Operator::Like => {
-                        binary_string_array_op_scalar!(array, scalar.clone(), like)
-                    }
-                    Operator::NotLike => {
-                        binary_string_array_op_scalar!(array, scalar.clone(), nlike)
-                    }
-                    Operator::Divide => {
-                        binary_primitive_array_op_scalar!(array, scalar.clone(), divide)
-                    }
-                    // if scalar operation is not supported - fallback to array implementation
-                    _ => None,
-                }
-            }
-            (ColumnarValue::Scalar(scalar), ColumnarValue::Array(array)) => {
-                // if right is literal and left is array - reverse operator and parameters
-                match &self.op {
-                    Operator::Lt => binary_array_op_scalar!(array, scalar.clone(), gt),
-                    Operator::LtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), gt_eq)
-                    }
-                    Operator::Gt => binary_array_op_scalar!(array, scalar.clone(), lt),
-                    Operator::GtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), lt_eq)
-                    }
-                    Operator::Eq => binary_array_op_scalar!(array, scalar.clone(), eq),
-                    Operator::NotEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), neq)
-                    }
-                    // if scalar operation is not supported - fallback to array implementation
-                    _ => None,
-                }
-            }
-            (_, _) => None,
-        };
-
-        if let Some(result) = scalar_result {
-            return result.map(|a| ColumnarValue::Array(a));
-        }
-
-        // if both arrays or both literals - extract arrays and continue execution
-        let (left, right) = (
-            left_value.into_array(batch.num_rows()),
-            right_value.into_array(batch.num_rows()),
-        );
-
-        let result: Result<ArrayRef> = match &self.op {
-            Operator::Like => binary_string_array_op!(left, right, like),
-            Operator::NotLike => binary_string_array_op!(left, right, nlike),
-            Operator::Lt => binary_array_op!(left, right, lt),
-            Operator::LtEq => binary_array_op!(left, right, lt_eq),
-            Operator::Gt => binary_array_op!(left, right, gt),
-            Operator::GtEq => binary_array_op!(left, right, gt_eq),
-            Operator::Eq => binary_array_op!(left, right, eq),
-            Operator::NotEq => binary_array_op!(left, right, neq),
-            Operator::Plus => binary_primitive_array_op!(left, right, add),
-            Operator::Minus => binary_primitive_array_op!(left, right, subtract),
-            Operator::Multiply => binary_primitive_array_op!(left, right, multiply),
-            Operator::Divide => binary_primitive_array_op!(left, right, divide),
-            Operator::And => {
-                if left_data_type == DataType::Boolean {
-                    boolean_op!(left, right, and_kleene)
-                } else {
-                    return Err(DataFusionError::Internal(format!(
-                        "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                        self.op,
-                        left.data_type(),
-                        right.data_type()
-                    )));
-                }
-            }
-            Operator::Or => {
-                if left_data_type == DataType::Boolean {
-                    boolean_op!(left, right, or_kleene)
-                } else {
-                    return Err(DataFusionError::Internal(format!(
-                        "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                        self.op, left_data_type, right_data_type
-                    )));
-                }
-            }
-            Operator::Modulus => Err(DataFusionError::NotImplemented(
-                "Modulus operator is still not supported".to_string(),
-            )),
-        };
-        result.map(|a| ColumnarValue::Array(a))
-    }
-}
-
-/// return two physical expressions that are optionally coerced to a
-/// common type that the binary operator supports.
-fn binary_cast(
-    lhs: Arc<dyn PhysicalExpr>,
-    op: &Operator,
-    rhs: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)> {
-    let lhs_type = &lhs.data_type(input_schema)?;
-    let rhs_type = &rhs.data_type(input_schema)?;
-
-    let cast_type = common_binary_type(lhs_type, op, rhs_type)?;
-
-    Ok((
-        try_cast(lhs, input_schema, cast_type.clone())?,
-        try_cast(rhs, input_schema, cast_type)?,
-    ))
-}
-
-/// Create a binary expression whose arguments are correctly coerced.
-/// This function errors if it is not possible to coerce the arguments
-/// to computational types supported by the operator.
-pub fn binary(
-    lhs: Arc<dyn PhysicalExpr>,
-    op: Operator,
-    rhs: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let (l, r) = binary_cast(lhs, &op, rhs, input_schema)?;
-    Ok(Arc::new(BinaryExpr::new(l, op, r)))
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::datatypes::{ArrowNumericType, Field, Int32Type, SchemaRef};
-    use arrow::util::display::array_value_to_string;
-
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-
-    // Create a binary expression without coercion. Used here when we do not want to coerce the expressions
-    // to valid types. Usage can result in an execution (after plan) error.
-    fn binary_simple(
-        l: Arc<dyn PhysicalExpr>,
-        op: Operator,
-        r: Arc<dyn PhysicalExpr>,
-    ) -> Arc<dyn PhysicalExpr> {
-        Arc::new(BinaryExpr::new(l, op, r))
-    }
-
-    #[test]
-    fn binary_comparison() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 4, 8, 16]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
-
-        // expression: "a < b"
-        let lt = binary_simple(col("a"), Operator::Lt, col("b"));
-        let result = lt.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.len(), 5);
-
-        let expected = vec![false, false, true, true, true];
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-        for (i, &expected_item) in expected.iter().enumerate().take(5) {
-            assert_eq!(result.value(i), expected_item);
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn binary_nested() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let a = Int32Array::from(vec![2, 4, 6, 8, 10]);
-        let b = Int32Array::from(vec![2, 5, 4, 8, 8]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
-
-        // expression: "a < b OR a == b"
-        let expr = binary_simple(
-            binary_simple(col("a"), Operator::Lt, col("b")),
-            Operator::Or,
-            binary_simple(col("a"), Operator::Eq, col("b")),
-        );
-        assert_eq!("a < b OR a = b", format!("{}", expr));
-
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.len(), 5);
-
-        let expected = vec![true, true, false, true, false];
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-        for (i, &expected_item) in expected.iter().enumerate().take(5) {
-            assert_eq!(result.value(i), expected_item);
-        }
-
-        Ok(())
-    }
-
-    // runs an end-to-end test of physical type coercion:
-    // 1. construct a record batch with two columns of type A and B
-    //  (*_ARRAY is the Rust Arrow array type, and *_TYPE is the DataType of the elements)
-    // 2. construct a physical expression of A OP B
-    // 3. evaluate the expression
-    // 4. verify that the resulting expression is of type C
-    // 5. verify that the results of evaluation are $VEC
-    macro_rules! test_coercion {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $B_ARRAY:ident, $B_TYPE:expr, $B_VEC:expr, $OP:expr, $C_ARRAY:ident, $C_TYPE:expr, $VEC:expr) => {{
-            let schema = Schema::new(vec![
-                Field::new("a", $A_TYPE, false),
-                Field::new("b", $B_TYPE, false),
-            ]);
-            let a = $A_ARRAY::from($A_VEC);
-            let b = $B_ARRAY::from($B_VEC);
-            let batch = RecordBatch::try_new(
-                Arc::new(schema.clone()),
-                vec![Arc::new(a), Arc::new(b)],
-            )?;
-
-            // verify that we can construct the expression
-            let expression = binary(col("a"), $OP, col("b"), &schema)?;
-
-            // verify that the expression's type is correct
-            assert_eq!(expression.data_type(&schema)?, $C_TYPE);
-
-            // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-
-            // verify that the array's data_type is correct
-            assert_eq!(*result.data_type(), $C_TYPE);
-
-            // verify that the data itself is downcastable
-            let result = result
-                .as_any()
-                .downcast_ref::<$C_ARRAY>()
-                .expect("failed to downcast");
-            // verify that the result itself is correct
-            for (i, x) in $VEC.iter().enumerate() {
-                assert_eq!(result.value(i), *x);
-            }
-        }};
-    }
-
-    #[test]
-    fn test_type_coersion() -> Result<()> {
-        test_coercion!(
-            Int32Array,
-            DataType::Int32,
-            vec![1i32, 2i32],
-            UInt32Array,
-            DataType::UInt32,
-            vec![1u32, 2u32],
-            Operator::Plus,
-            Int32Array,
-            DataType::Int32,
-            vec![2i32, 4i32]
-        );
-        test_coercion!(
-            Int32Array,
-            DataType::Int32,
-            vec![1i32],
-            UInt16Array,
-            DataType::UInt16,
-            vec![1u16],
-            Operator::Plus,
-            Int32Array,
-            DataType::Int32,
-            vec![2i32]
-        );
-        test_coercion!(
-            Float32Array,
-            DataType::Float32,
-            vec![1f32],
-            UInt16Array,
-            DataType::UInt16,
-            vec![1u16],
-            Operator::Plus,
-            Float32Array,
-            DataType::Float32,
-            vec![2f32]
-        );
-        test_coercion!(
-            Float32Array,
-            DataType::Float32,
-            vec![2f32],
-            UInt16Array,
-            DataType::UInt16,
-            vec![1u16],
-            Operator::Multiply,
-            Float32Array,
-            DataType::Float32,
-            vec![2f32]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["hello world", "world"],
-            StringArray,
-            DataType::Utf8,
-            vec!["%hello%", "%hello%"],
-            Operator::Like,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, false]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13", "1995-01-26"],
-            Date32Array,
-            DataType::Date32,
-            vec![9112, 9156],
-            Operator::Eq,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, true]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13", "1995-01-26"],
-            Date32Array,
-            DataType::Date32,
-            vec![9113, 9154],
-            Operator::Lt,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, false]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13T12:34:56", "1995-01-26T01:23:45"],
-            Date64Array,
-            DataType::Date64,
-            vec![787322096000, 791083425000],
-            Operator::Eq,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, true]
-        );
-        test_coercion!(
-            StringArray,
-            DataType::Utf8,
-            vec!["1994-12-13T12:34:56", "1995-01-26T01:23:45"],
-            Date64Array,
-            DataType::Date64,
-            vec![787322096001, 791083424999],
-            Operator::Lt,
-            BooleanArray,
-            DataType::Boolean,
-            vec![true, false]
-        );
-        Ok(())
-    }
-
-    // Note it would be nice to use the same test_coercion macro as
-    // above, but sadly the type of the values of the dictionary are
-    // not encoded in the rust type of the DictionaryArray. Thus there
-    // is no way at the time of this writing to create a dictionary
-    // array using the `From` trait
-    #[test]
-    fn test_dictionary_type_to_array_coersion() -> Result<()> {
-        // Test string  a string dictionary
-        let dict_type =
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-        let string_type = DataType::Utf8;
-
-        // build dictionary
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = arrow::array::StringBuilder::new(10);
-        let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        dict_builder.append("one")?;
-        dict_builder.append_null()?;
-        dict_builder.append("three")?;
-        dict_builder.append("four")?;
-        let dict_array = dict_builder.finish();
-
-        let str_array =
-            StringArray::from(vec![Some("not one"), Some("two"), None, Some("four")]);
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("dict", dict_type, true),
-            Field::new("str", string_type, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(dict_array), Arc::new(str_array)],
-        )?;
-
-        let expected = "false\n\n\ntrue";
-
-        // Test 1: dict = str
-
-        // verify that we can construct the expression
-        let expression = binary(col("dict"), Operator::Eq, col("str"), &schema)?;
-        assert_eq!(expression.data_type(&schema)?, DataType::Boolean);
-
-        // evaluate and verify the result type matched
-        let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.data_type(), &DataType::Boolean);
-
-        // verify that the result itself is correct
-        assert_eq!(expected, array_to_string(&result)?);
-
-        // Test 2: now test the other direction
-        // str = dict
-
-        // verify that we can construct the expression
-        let expression = binary(col("str"), Operator::Eq, col("dict"), &schema)?;
-        assert_eq!(expression.data_type(&schema)?, DataType::Boolean);
-
-        // evaluate and verify the result type matched
-        let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-        assert_eq!(result.data_type(), &DataType::Boolean);
-
-        // verify that the result itself is correct
-        assert_eq!(expected, array_to_string(&result)?);
-
-        Ok(())
-    }
-
-    // Convert the array to a newline delimited string of pretty printed values
-    fn array_to_string(array: &ArrayRef) -> Result<String> {
-        let s = (0..array.len())
-            .map(|i| array_value_to_string(array, i))
-            .collect::<std::result::Result<Vec<_>, arrow::error::ArrowError>>()?
-            .join("\n");
-        Ok(s)
-    }
-
-    #[test]
-    fn plus_op() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 4, 8, 16]);
-
-        apply_arithmetic::<Int32Type>(
-            Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b)],
-            Operator::Plus,
-            Int32Array::from(vec![2, 4, 7, 12, 21]),
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn minus_op() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]));
-        let a = Arc::new(Int32Array::from(vec![1, 2, 4, 8, 16]));
-        let b = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-
-        apply_arithmetic::<Int32Type>(
-            schema.clone(),
-            vec![a.clone(), b.clone()],
-            Operator::Minus,
-            Int32Array::from(vec![0, 0, 1, 4, 11]),
-        )?;
-
-        // should handle have negative values in result (for signed)
-        apply_arithmetic::<Int32Type>(
-            schema,
-            vec![b, a],
-            Operator::Minus,
-            Int32Array::from(vec![0, 0, -1, -4, -11]),
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn multiply_op() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]));
-        let a = Arc::new(Int32Array::from(vec![4, 8, 16, 32, 64]));
-        let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32]));
-
-        apply_arithmetic::<Int32Type>(
-            schema,
-            vec![a, b],
-            Operator::Multiply,
-            Int32Array::from(vec![8, 32, 128, 512, 2048]),
-        )?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn divide_op() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]));
-        let a = Arc::new(Int32Array::from(vec![8, 32, 128, 512, 2048]));
-        let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32]));
-
-        apply_arithmetic::<Int32Type>(
-            schema,
-            vec![a, b],
-            Operator::Divide,
-            Int32Array::from(vec![4, 8, 16, 32, 64]),
-        )?;
-
-        Ok(())
-    }
-
-    fn apply_arithmetic<T: ArrowNumericType>(
-        schema: SchemaRef,
-        data: Vec<ArrayRef>,
-        op: Operator,
-        expected: PrimitiveArray<T>,
-    ) -> Result<()> {
-        let arithmetic_op = binary_simple(col("a"), op, col("b"));
-        let batch = RecordBatch::try_new(schema, data)?;
-        let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows());
-
-        assert_eq!(result.as_ref(), &expected);
-        Ok(())
-    }
-
-    fn apply_logic_op(
-        schema: SchemaRef,
-        left: BooleanArray,
-        right: BooleanArray,
-        op: Operator,
-        expected: BooleanArray,
-    ) -> Result<()> {
-        let arithmetic_op = binary_simple(col("a"), op, col("b"));
-        let data: Vec<ArrayRef> = vec![Arc::new(left), Arc::new(right)];
-        let batch = RecordBatch::try_new(schema, data)?;
-        let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows());
-
-        assert_eq!(result.as_ref(), &expected);
-        Ok(())
-    }
-
-    #[test]
-    fn and_with_nulls_op() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, true),
-            Field::new("b", DataType::Boolean, true),
-        ]);
-        let a = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            None,
-            None,
-        ]);
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            Some(false),
-            None,
-        ]);
-        apply_logic_op(Arc::new(schema), a, b, Operator::And, expected)?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn or_with_nulls_op() -> Result<()> {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, true),
-            Field::new("b", DataType::Boolean, true),
-        ]);
-        let a = BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            Some(false),
-            None,
-        ]);
-        let b = BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(false),
-            Some(false),
-            Some(false),
-            None,
-            None,
-            None,
-        ]);
-
-        let expected = BooleanArray::from(vec![
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(true),
-            Some(false),
-            None,
-            Some(true),
-            None,
-            None,
-        ]);
-        apply_logic_op(Arc::new(schema), a, b, Operator::Or, expected)?;
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_coersion_error() -> Result<()> {
-        let expr =
-            common_binary_type(&DataType::Float32, &Operator::Plus, &DataType::Utf8);
-
-        if let Err(DataFusionError::Plan(e)) = expr {
-            assert_eq!(e, "'Float32 + Utf8' can't be evaluated because there isn't a common type to coerce the types to");
-            Ok(())
-        } else {
-            Err(DataFusionError::Internal(
-                "Coercion should have returned an DataFusionError::Internal".to_string(),
-            ))
-        }
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/case.rs b/rust/datafusion/src/physical_plan/expressions/case.rs
deleted file mode 100644
index e8c500e5ed6..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/case.rs
+++ /dev/null
@@ -1,597 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{any::Any, sync::Arc};
-
-use arrow::array::{self, *};
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-
-/// The CASE expression is similar to a series of nested if/else and there are two forms that
-/// can be used. The first form consists of a series of boolean "when" expressions with
-/// corresponding "then" expressions, and an optional "else" expression.
-///
-/// CASE WHEN condition THEN result
-///      [WHEN ...]
-///      [ELSE result]
-/// END
-///
-/// The second form uses a base expression and then a series of "when" clauses that match on a
-/// literal value.
-///
-/// CASE expression
-///     WHEN value THEN result
-///     [WHEN ...]
-///     [ELSE result]
-/// END
-#[derive(Debug)]
-pub struct CaseExpr {
-    /// Optional base expression that can be compared to literal values in the "when" expressions
-    expr: Option<Arc<dyn PhysicalExpr>>,
-    /// One or more when/then expressions
-    when_then_expr: Vec<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)>,
-    /// Optional "else" expression
-    else_expr: Option<Arc<dyn PhysicalExpr>>,
-}
-
-impl std::fmt::Display for CaseExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "CASE ")?;
-        if let Some(e) = &self.expr {
-            write!(f, "{} ", e)?;
-        }
-        for (w, t) in &self.when_then_expr {
-            write!(f, "WHEN {} THEN {} ", w, t)?;
-        }
-        if let Some(e) = &self.else_expr {
-            write!(f, "ELSE {} ", e)?;
-        }
-        write!(f, "END")
-    }
-}
-
-impl CaseExpr {
-    /// Create a new CASE WHEN expression
-    pub fn try_new(
-        expr: Option<Arc<dyn PhysicalExpr>>,
-        when_then_expr: &[(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)],
-        else_expr: Option<Arc<dyn PhysicalExpr>>,
-    ) -> Result<Self> {
-        if when_then_expr.is_empty() {
-            Err(DataFusionError::Execution(
-                "There must be at least one WHEN clause".to_string(),
-            ))
-        } else {
-            Ok(Self {
-                expr,
-                when_then_expr: when_then_expr.to_vec(),
-                else_expr,
-            })
-        }
-    }
-
-    /// Optional base expression that can be compared to literal values in the "when" expressions
-    pub fn expr(&self) -> &Option<Arc<dyn PhysicalExpr>> {
-        &self.expr
-    }
-
-    /// One or more when/then expressions
-    pub fn when_then_expr(&self) -> &[(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)] {
-        &self.when_then_expr
-    }
-
-    /// Optional "else" expression
-    pub fn else_expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
-        self.else_expr.as_ref()
-    }
-}
-
-macro_rules! if_then_else {
-    ($BUILDER_TYPE:ty, $ARRAY_TYPE:ty, $BOOLS:expr, $TRUE:expr, $FALSE:expr) => {{
-        let true_values = $TRUE
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$ARRAY_TYPE>()
-            .expect("true_values downcast failed");
-
-        let false_values = $FALSE
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$ARRAY_TYPE>()
-            .expect("false_values downcast failed");
-
-        let mut builder = <$BUILDER_TYPE>::new($BOOLS.len());
-        for i in 0..$BOOLS.len() {
-            if $BOOLS.is_null(i) {
-                if false_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(false_values.value(i))?;
-                }
-            } else if $BOOLS.value(i) {
-                if true_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(true_values.value(i))?;
-                }
-            } else {
-                if false_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(false_values.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn if_then_else(
-    bools: &BooleanArray,
-    true_values: ArrayRef,
-    false_values: ArrayRef,
-    data_type: &DataType,
-) -> Result<ArrayRef> {
-    match data_type {
-        DataType::UInt8 => if_then_else!(
-            array::UInt8Builder,
-            array::UInt8Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt16 => if_then_else!(
-            array::UInt16Builder,
-            array::UInt16Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt32 => if_then_else!(
-            array::UInt32Builder,
-            array::UInt32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt64 => if_then_else!(
-            array::UInt64Builder,
-            array::UInt64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int8 => if_then_else!(
-            array::Int8Builder,
-            array::Int8Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int16 => if_then_else!(
-            array::Int16Builder,
-            array::Int16Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int32 => if_then_else!(
-            array::Int32Builder,
-            array::Int32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int64 => if_then_else!(
-            array::Int64Builder,
-            array::Int64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Float32 => if_then_else!(
-            array::Float32Builder,
-            array::Float32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Float64 => if_then_else!(
-            array::Float64Builder,
-            array::Float64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Utf8 => if_then_else!(
-            array::StringBuilder,
-            array::StringArray,
-            bools,
-            true_values,
-            false_values
-        ),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-macro_rules! make_null_array {
-    ($TY:ty, $N:expr) => {{
-        let mut builder = <$TY>::new($N);
-        for _ in 0..$N {
-            builder.append_null()?;
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn build_null_array(data_type: &DataType, num_rows: usize) -> Result<ArrayRef> {
-    match data_type {
-        DataType::UInt8 => make_null_array!(array::UInt8Builder, num_rows),
-        DataType::UInt16 => make_null_array!(array::UInt16Builder, num_rows),
-        DataType::UInt32 => make_null_array!(array::UInt32Builder, num_rows),
-        DataType::UInt64 => make_null_array!(array::UInt64Builder, num_rows),
-        DataType::Int8 => make_null_array!(array::Int8Builder, num_rows),
-        DataType::Int16 => make_null_array!(array::Int16Builder, num_rows),
-        DataType::Int32 => make_null_array!(array::Int32Builder, num_rows),
-        DataType::Int64 => make_null_array!(array::Int64Builder, num_rows),
-        DataType::Float32 => make_null_array!(array::Float32Builder, num_rows),
-        DataType::Float64 => make_null_array!(array::Float64Builder, num_rows),
-        DataType::Utf8 => make_null_array!(array::StringBuilder, num_rows),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-macro_rules! array_equals {
-    ($TY:ty, $L:expr, $R:expr) => {{
-        let when_value = $L
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$TY>()
-            .expect("array_equals downcast failed");
-
-        let base_value = $R
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$TY>()
-            .expect("array_equals downcast failed");
-
-        let mut builder = BooleanBuilder::new(when_value.len());
-        for row in 0..when_value.len() {
-            if when_value.is_valid(row) && base_value.is_valid(row) {
-                builder.append_value(when_value.value(row) == base_value.value(row))?;
-            } else {
-                builder.append_null()?;
-            }
-        }
-        Ok(builder.finish())
-    }};
-}
-
-fn array_equals(
-    data_type: &DataType,
-    when_value: ArrayRef,
-    base_value: ArrayRef,
-) -> Result<BooleanArray> {
-    match data_type {
-        DataType::UInt8 => array_equals!(array::UInt8Array, when_value, base_value),
-        DataType::UInt16 => array_equals!(array::UInt16Array, when_value, base_value),
-        DataType::UInt32 => array_equals!(array::UInt32Array, when_value, base_value),
-        DataType::UInt64 => array_equals!(array::UInt64Array, when_value, base_value),
-        DataType::Int8 => array_equals!(array::Int8Array, when_value, base_value),
-        DataType::Int16 => array_equals!(array::Int16Array, when_value, base_value),
-        DataType::Int32 => array_equals!(array::Int32Array, when_value, base_value),
-        DataType::Int64 => array_equals!(array::Int64Array, when_value, base_value),
-        DataType::Float32 => array_equals!(array::Float32Array, when_value, base_value),
-        DataType::Float64 => array_equals!(array::Float64Array, when_value, base_value),
-        DataType::Utf8 => array_equals!(array::StringArray, when_value, base_value),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-impl CaseExpr {
-    /// This function evaluates the form of CASE that matches an expression to fixed values.
-    ///
-    /// CASE expression
-    ///     WHEN value THEN result
-    ///     [WHEN ...]
-    ///     [ELSE result]
-    /// END
-    fn case_when_with_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.when_then_expr[0].1.data_type(&batch.schema())?;
-        let expr = self.expr.as_ref().unwrap();
-        let base_value = expr.evaluate(batch)?;
-        let base_type = expr.data_type(&batch.schema())?;
-        let base_value = base_value.into_array(batch.num_rows());
-
-        // start with the else condition, or nulls
-        let mut current_value: Option<ArrayRef> = if let Some(e) = &self.else_expr {
-            Some(e.evaluate(batch)?.into_array(batch.num_rows()))
-        } else {
-            Some(build_null_array(&return_type, batch.num_rows())?)
-        };
-
-        // walk backwards through the when/then expressions
-        for i in (0..self.when_then_expr.len()).rev() {
-            let i = i as usize;
-
-            let when_value = self.when_then_expr[i].0.evaluate(batch)?;
-            let when_value = when_value.into_array(batch.num_rows());
-
-            let then_value = self.when_then_expr[i].1.evaluate(batch)?;
-            let then_value = then_value.into_array(batch.num_rows());
-
-            // build boolean array representing which rows match the "when" value
-            let when_match = array_equals(&base_type, when_value, base_value.clone())?;
-
-            current_value = Some(if_then_else(
-                &when_match,
-                then_value,
-                current_value.unwrap(),
-                &return_type,
-            )?);
-        }
-
-        Ok(ColumnarValue::Array(current_value.unwrap()))
-    }
-
-    /// This function evaluates the form of CASE where each WHEN expression is a boolean
-    /// expression.
-    ///
-    /// CASE WHEN condition THEN result
-    ///      [WHEN ...]
-    ///      [ELSE result]
-    /// END
-    fn case_when_no_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.when_then_expr[0].1.data_type(&batch.schema())?;
-
-        // start with the else condition, or nulls
-        let mut current_value: Option<ArrayRef> = if let Some(e) = &self.else_expr {
-            Some(e.evaluate(batch)?.into_array(batch.num_rows()))
-        } else {
-            Some(build_null_array(&return_type, batch.num_rows())?)
-        };
-
-        // walk backwards through the when/then expressions
-        for i in (0..self.when_then_expr.len()).rev() {
-            let i = i as usize;
-
-            let when_value = self.when_then_expr[i].0.evaluate(batch)?;
-            let when_value = when_value.into_array(batch.num_rows());
-            let when_value = when_value
-                .as_ref()
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .expect("WHEN expression did not return a BooleanArray");
-
-            let then_value = self.when_then_expr[i].1.evaluate(batch)?;
-            let then_value = then_value.into_array(batch.num_rows());
-
-            current_value = Some(if_then_else(
-                &when_value,
-                then_value,
-                current_value.unwrap(),
-                &return_type,
-            )?);
-        }
-
-        Ok(ColumnarValue::Array(current_value.unwrap()))
-    }
-}
-
-impl PhysicalExpr for CaseExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        self.when_then_expr[0].1.data_type(input_schema)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        // this expression is nullable if any of the input expressions are nullable
-        let then_nullable = self
-            .when_then_expr
-            .iter()
-            .map(|(_, t)| t.nullable(input_schema))
-            .collect::<Result<Vec<_>>>()?;
-        if then_nullable.contains(&true) {
-            Ok(true)
-        } else if let Some(e) = &self.else_expr {
-            e.nullable(input_schema)
-        } else {
-            Ok(false)
-        }
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        if self.expr.is_some() {
-            // this use case evaluates "expr" and then compares the values with the "when"
-            // values
-            self.case_when_with_expr(batch)
-        } else {
-            // The "when" conditions all evaluate to boolean in this use case and can be
-            // arbitrary expressions
-            self.case_when_no_expr(batch)
-        }
-    }
-}
-
-/// Create a CASE expression
-pub fn case(
-    expr: Option<Arc<dyn PhysicalExpr>>,
-    when_thens: &[(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)],
-    else_expr: Option<Arc<dyn PhysicalExpr>>,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(CaseExpr::try_new(expr, when_thens, else_expr)?))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        error::Result,
-        logical_plan::Operator,
-        physical_plan::expressions::{binary, col, lit},
-        scalar::ScalarValue,
-    };
-    use arrow::array::StringArray;
-    use arrow::datatypes::*;
-
-    #[test]
-    fn case_with_expr() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE a WHEN 'foo' THEN 123 WHEN 'bar' THEN 456 END
-        let when1 = lit(ScalarValue::Utf8(Some("foo".to_string())));
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = lit(ScalarValue::Utf8(Some("bar".to_string())));
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-
-        let expr = case(Some(col("a")), &[(when1, then1), (when2, then2)], None)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected = &Int32Array::from(vec![Some(123), None, None, Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    #[test]
-    fn case_with_expr_else() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE a WHEN 'foo' THEN 123 WHEN 'bar' THEN 456 ELSE 999 END
-        let when1 = lit(ScalarValue::Utf8(Some("foo".to_string())));
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = lit(ScalarValue::Utf8(Some("bar".to_string())));
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-        let else_value = lit(ScalarValue::Int32(Some(999)));
-
-        let expr = case(
-            Some(col("a")),
-            &[(when1, then1), (when2, then2)],
-            Some(else_value),
-        )?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected =
-            &Int32Array::from(vec![Some(123), Some(999), Some(999), Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    #[test]
-    fn case_without_expr() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN 456 END
-        let when1 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("foo".to_string()))),
-            &batch.schema(),
-        )?;
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("bar".to_string()))),
-            &batch.schema(),
-        )?;
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-
-        let expr = case(None, &[(when1, then1), (when2, then2)], None)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected = &Int32Array::from(vec![Some(123), None, None, Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    #[test]
-    fn case_without_expr_else() -> Result<()> {
-        let batch = case_test_batch()?;
-
-        // CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN 456 ELSE 999 END
-        let when1 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("foo".to_string()))),
-            &batch.schema(),
-        )?;
-        let then1 = lit(ScalarValue::Int32(Some(123)));
-        let when2 = binary(
-            col("a"),
-            Operator::Eq,
-            lit(ScalarValue::Utf8(Some("bar".to_string()))),
-            &batch.schema(),
-        )?;
-        let then2 = lit(ScalarValue::Int32(Some(456)));
-        let else_value = lit(ScalarValue::Int32(Some(999)));
-
-        let expr = case(None, &[(when1, then1), (when2, then2)], Some(else_value))?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .expect("failed to downcast to Int32Array");
-
-        let expected =
-            &Int32Array::from(vec![Some(123), Some(999), Some(999), Some(456)]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-
-    fn case_test_batch() -> Result<RecordBatch> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-        Ok(batch)
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/cast.rs b/rust/datafusion/src/physical_plan/expressions/cast.rs
deleted file mode 100644
index ba395f54d91..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/cast.rs
+++ /dev/null
@@ -1,301 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::compute::kernels;
-use arrow::compute::CastOptions;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-use compute::can_cast_types;
-
-/// provide Datafusion default cast options
-pub const DEFAULT_DATAFUSION_CAST_OPTIONS: CastOptions = CastOptions { safe: false };
-
-/// CAST expression casts an expression to a specific data type and returns a runtime error on invalid cast
-#[derive(Debug)]
-pub struct CastExpr {
-    /// The expression to cast
-    expr: Arc<dyn PhysicalExpr>,
-    /// The data type to cast to
-    cast_type: DataType,
-    /// Cast options
-    cast_options: CastOptions,
-}
-
-impl CastExpr {
-    /// Create a new CastExpr
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        cast_type: DataType,
-        cast_options: CastOptions,
-    ) -> Self {
-        Self {
-            expr,
-            cast_type,
-            cast_options,
-        }
-    }
-
-    /// The expression to cast
-    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.expr
-    }
-
-    /// The data type to cast to
-    pub fn cast_type(&self) -> &DataType {
-        &self.cast_type
-    }
-}
-
-impl fmt::Display for CastExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "CAST({} AS {:?})", self.expr, self.cast_type)
-    }
-}
-
-impl PhysicalExpr for CastExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.cast_type.clone())
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.expr.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let value = self.expr.evaluate(batch)?;
-        match value {
-            ColumnarValue::Array(array) => {
-                Ok(ColumnarValue::Array(kernels::cast::cast_with_options(
-                    &array,
-                    &self.cast_type,
-                    &self.cast_options,
-                )?))
-            }
-            ColumnarValue::Scalar(scalar) => {
-                let scalar_array = scalar.to_array();
-                let cast_array = kernels::cast::cast_with_options(
-                    &scalar_array,
-                    &self.cast_type,
-                    &self.cast_options,
-                )?;
-                let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?;
-                Ok(ColumnarValue::Scalar(cast_scalar))
-            }
-        }
-    }
-}
-
-/// Return a PhysicalExpression representing `expr` casted to
-/// `cast_type`, if any casting is needed.
-///
-/// Note that such casts may lose type information
-pub fn cast_with_options(
-    expr: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-    cast_type: DataType,
-    cast_options: CastOptions,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let expr_type = expr.data_type(input_schema)?;
-    if expr_type == cast_type {
-        Ok(expr.clone())
-    } else if can_cast_types(&expr_type, &cast_type) {
-        Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
-    } else {
-        Err(DataFusionError::Internal(format!(
-            "Unsupported CAST from {:?} to {:?}",
-            expr_type, cast_type
-        )))
-    }
-}
-
-/// Return a PhysicalExpression representing `expr` casted to
-/// `cast_type`, if any casting is needed.
-///
-/// Note that such casts may lose type information
-pub fn cast(
-    expr: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-    cast_type: DataType,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    cast_with_options(
-        expr,
-        input_schema,
-        cast_type,
-        DEFAULT_DATAFUSION_CAST_OPTIONS,
-    )
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-    use arrow::array::{StringArray, Time64NanosecondArray};
-    use arrow::{
-        array::{Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array},
-        datatypes::*,
-    };
-
-    // runs an end-to-end test of physical type cast
-    // 1. construct a record batch with a column "a" of type A
-    // 2. construct a physical expression of CAST(a AS B)
-    // 3. evaluate the expression
-    // 4. verify that the resulting expression is of type B
-    // 5. verify that the resulting values are downcastable and correct
-    macro_rules! generic_test_cast {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr, $CAST_OPTIONS:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $A_TYPE, false)]);
-            let a = $A_ARRAY::from($A_VEC);
-            let batch =
-                RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-            // verify that we can construct the expression
-            let expression = cast_with_options(col("a"), &schema, $TYPE, $CAST_OPTIONS)?;
-
-            // verify that its display is correct
-            assert_eq!(format!("CAST(a AS {:?})", $TYPE), format!("{}", expression));
-
-            // verify that the expression's type is correct
-            assert_eq!(expression.data_type(&schema)?, $TYPE);
-
-            // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-
-            // verify that the array's data_type is correct
-            assert_eq!(*result.data_type(), $TYPE);
-
-            // verify that the len is correct
-            assert_eq!(result.len(), $A_VEC.len());
-
-            // verify that the data itself is downcastable
-            let result = result
-                .as_any()
-                .downcast_ref::<$TYPEARRAY>()
-                .expect("failed to downcast");
-
-            // verify that the result itself is correct
-            for (i, x) in $VEC.iter().enumerate() {
-                match x {
-                    Some(x) => assert_eq!(result.value(i), *x),
-                    None => assert!(!result.is_valid(i)),
-                }
-            }
-        }};
-    }
-
-    #[test]
-    fn test_cast_i32_u32() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            UInt32Array,
-            DataType::UInt32,
-            vec![
-                Some(1_u32),
-                Some(2_u32),
-                Some(3_u32),
-                Some(4_u32),
-                Some(5_u32)
-            ],
-            DEFAULT_DATAFUSION_CAST_OPTIONS
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_cast_i32_utf8() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            StringArray,
-            DataType::Utf8,
-            vec![Some("1"), Some("2"), Some("3"), Some("4"), Some("5")],
-            DEFAULT_DATAFUSION_CAST_OPTIONS
-        );
-        Ok(())
-    }
-
-    #[allow(clippy::redundant_clone)]
-    #[test]
-    fn test_cast_i64_t64() -> Result<()> {
-        let original = vec![1, 2, 3, 4, 5];
-        let expected: Vec<Option<i64>> = original
-            .iter()
-            .map(|i| Some(Time64NanosecondArray::from(vec![*i]).value(0)))
-            .collect();
-        generic_test_cast!(
-            Int64Array,
-            DataType::Int64,
-            original.clone(),
-            TimestampNanosecondArray,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            expected,
-            DEFAULT_DATAFUSION_CAST_OPTIONS
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn invalid_cast() {
-        // Ensure a useful error happens at plan time if invalid casts are used
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let result = cast(col("a"), &schema, DataType::LargeBinary);
-        result.expect_err("expected Invalid CAST");
-    }
-
-    #[test]
-    fn invalid_cast_with_options_error() -> Result<()> {
-        // Ensure a useful error happens at plan time if invalid casts are used
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
-        let a = StringArray::from(vec!["9.1"]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-        let expression = cast_with_options(
-            col("a"),
-            &schema,
-            DataType::Int32,
-            DEFAULT_DATAFUSION_CAST_OPTIONS,
-        )?;
-        let result = expression.evaluate(&batch);
-
-        match result {
-            Ok(_) => panic!("expected error"),
-            Err(e) => {
-                assert!(e.to_string().contains(
-                    "Cast error: Cannot cast string '9.1' to value of arrow::datatypes::types::Int32Type type"
-                ))
-            }
-        }
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/coercion.rs b/rust/datafusion/src/physical_plan/expressions/coercion.rs
deleted file mode 100644
index e9949f5199e..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/coercion.rs
+++ /dev/null
@@ -1,208 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Coercion rules used to coerce types to match existing expressions' implementations
-
-use arrow::datatypes::DataType;
-
-/// Determine if a DataType is signed numeric or not
-pub fn is_signed_numeric(dt: &DataType) -> bool {
-    matches!(
-        dt,
-        DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-    )
-}
-
-/// Determine if a DataType is numeric or not
-pub fn is_numeric(dt: &DataType) -> bool {
-    is_signed_numeric(dt)
-        || match dt {
-            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => {
-                true
-            }
-            _ => false,
-        }
-}
-
-/// Coercion rules for dictionary values (aka the type of the  dictionary itself)
-fn dictionary_value_coercion(
-    lhs_type: &DataType,
-    rhs_type: &DataType,
-) -> Option<DataType> {
-    numerical_coercion(lhs_type, rhs_type).or_else(|| string_coercion(lhs_type, rhs_type))
-}
-
-/// Coercion rules for Dictionaries: the type that both lhs and rhs
-/// can be casted to for the purpose of a computation.
-///
-/// It would likely be preferable to cast primitive values to
-/// dictionaries, and thus avoid unpacking dictionary as well as doing
-/// faster comparisons. However, the arrow compute kernels (e.g. eq)
-/// don't have DictionaryArray support yet, so fall back to unpacking
-/// the dictionaries
-pub fn dictionary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    match (lhs_type, rhs_type) {
-        (
-            DataType::Dictionary(_lhs_index_type, lhs_value_type),
-            DataType::Dictionary(_rhs_index_type, rhs_value_type),
-        ) => dictionary_value_coercion(lhs_value_type, rhs_value_type),
-        (DataType::Dictionary(_index_type, value_type), _) => {
-            dictionary_value_coercion(value_type, rhs_type)
-        }
-        (_, DataType::Dictionary(_index_type, value_type)) => {
-            dictionary_value_coercion(lhs_type, value_type)
-        }
-        _ => None,
-    }
-}
-
-/// Coercion rules for Strings: the type that both lhs and rhs can be
-/// casted to for the purpose of a string computation
-pub fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
-    match (lhs_type, rhs_type) {
-        (Utf8, Utf8) => Some(Utf8),
-        (LargeUtf8, Utf8) => Some(LargeUtf8),
-        (Utf8, LargeUtf8) => Some(LargeUtf8),
-        (LargeUtf8, LargeUtf8) => Some(LargeUtf8),
-        _ => None,
-    }
-}
-
-/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
-/// casted to for the purpose of a date computation
-pub fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
-    match (lhs_type, rhs_type) {
-        (Utf8, Date32) => Some(Date32),
-        (Date32, Utf8) => Some(Date32),
-        (Utf8, Date64) => Some(Date64),
-        (Date64, Utf8) => Some(Date64),
-        _ => None,
-    }
-}
-
-/// Coercion rule for numerical types: The type that both lhs and rhs
-/// can be casted to for numerical calculation, while maintaining
-/// maximum precision
-pub fn numerical_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
-
-    // error on any non-numeric type
-    if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
-        return None;
-    };
-
-    // same type => all good
-    if lhs_type == rhs_type {
-        return Some(lhs_type.clone());
-    }
-
-    // these are ordered from most informative to least informative so
-    // that the coercion removes the least amount of information
-    match (lhs_type, rhs_type) {
-        (Float64, _) => Some(Float64),
-        (_, Float64) => Some(Float64),
-
-        (_, Float32) => Some(Float32),
-        (Float32, _) => Some(Float32),
-
-        (Int64, _) => Some(Int64),
-        (_, Int64) => Some(Int64),
-
-        (Int32, _) => Some(Int32),
-        (_, Int32) => Some(Int32),
-
-        (Int16, _) => Some(Int16),
-        (_, Int16) => Some(Int16),
-
-        (Int8, _) => Some(Int8),
-        (_, Int8) => Some(Int8),
-
-        (UInt64, _) => Some(UInt64),
-        (_, UInt64) => Some(UInt64),
-
-        (UInt32, _) => Some(UInt32),
-        (_, UInt32) => Some(UInt32),
-
-        (UInt16, _) => Some(UInt16),
-        (_, UInt16) => Some(UInt16),
-
-        (UInt8, _) => Some(UInt8),
-        (_, UInt8) => Some(UInt8),
-
-        _ => None,
-    }
-}
-
-// coercion rules for equality operations. This is a superset of all numerical coercion rules.
-pub fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    if lhs_type == rhs_type {
-        // same type => equality is possible
-        return Some(lhs_type.clone());
-    }
-    numerical_coercion(lhs_type, rhs_type)
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
-}
-
-// coercion rules that assume an ordered set, such as "less than".
-// These are the union of all numerical coercion rules and all string coercion rules
-pub fn order_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    if lhs_type == rhs_type {
-        // same type => all good
-        return Some(lhs_type.clone());
-    }
-
-    numerical_coercion(lhs_type, rhs_type)
-        .or_else(|| string_coercion(lhs_type, rhs_type))
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_dictionary_type_coersion() {
-        use DataType::*;
-
-        // TODO: In the future, this would ideally return Dictionary types and avoid unpacking
-        let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32));
-        let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), Some(Int32));
-
-        let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), None);
-
-        let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        let rhs_type = Utf8;
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), Some(Utf8));
-
-        let lhs_type = Utf8;
-        let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type), Some(Utf8));
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/column.rs b/rust/datafusion/src/physical_plan/expressions/column.rs
deleted file mode 100644
index 7e0304e51fe..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/column.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Column expression
-
-use std::sync::Arc;
-
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::error::Result;
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-
-/// Represents the column at a given index in a RecordBatch
-#[derive(Debug)]
-pub struct Column {
-    name: String,
-}
-
-impl Column {
-    /// Create a new column expression
-    pub fn new(name: &str) -> Self {
-        Self {
-            name: name.to_owned(),
-        }
-    }
-
-    /// Get the column name
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-}
-
-impl std::fmt::Display for Column {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.name)
-    }
-}
-
-impl PhysicalExpr for Column {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    /// Get the data type of this expression, given the schema of the input
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        Ok(input_schema
-            .field_with_name(&self.name)?
-            .data_type()
-            .clone())
-    }
-
-    /// Decide whehter this expression is nullable, given the schema of the input
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        Ok(input_schema.field_with_name(&self.name)?.is_nullable())
-    }
-
-    /// Evaluate the expression
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        Ok(ColumnarValue::Array(
-            batch.column(batch.schema().index_of(&self.name)?).clone(),
-        ))
-    }
-}
-
-/// Create a column expression
-pub fn col(name: &str) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Column::new(name))
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/count.rs b/rust/datafusion/src/physical_plan/expressions/count.rs
deleted file mode 100644
index 22459813b7e..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/count.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::error::Result;
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{ArrayRef, UInt64Array},
-    datatypes::Field,
-};
-
-use super::format_state_name;
-
-/// COUNT aggregate expression
-/// Returns the amount of non-null values of the given expression.
-#[derive(Debug)]
-pub struct Count {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl Count {
-    /// Create a new COUNT aggregate function.
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Count {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "count"),
-            self.data_type.clone(),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(CountAccumulator::new()))
-    }
-}
-
-#[derive(Debug)]
-struct CountAccumulator {
-    count: u64,
-}
-
-impl CountAccumulator {
-    /// new count accumulator
-    pub fn new() -> Self {
-        Self { count: 0 }
-    }
-}
-
-impl Accumulator for CountAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let array = &values[0];
-        self.count += (array.len() - array.data().null_count()) as u64;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let value = &values[0];
-        if !value.is_null() {
-            self.count += 1;
-        }
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        let count = &states[0];
-        if let ScalarValue::UInt64(Some(delta)) = count {
-            self.count += *delta;
-        } else {
-            unreachable!()
-        }
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        let counts = states[0].as_any().downcast_ref::<UInt64Array>().unwrap();
-        let delta = &compute::sum(counts);
-        if let Some(d) = delta {
-            self.count += *d;
-        }
-        Ok(())
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![ScalarValue::UInt64(Some(self.count))])
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(ScalarValue::UInt64(Some(self.count)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::expressions::tests::aggregate;
-    use crate::{error::Result, generic_test_op};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, datatypes::*};
-
-    #[test]
-    fn count_elements() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Count,
-            ScalarValue::from(5u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            None,
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Count,
-            ScalarValue::from(3u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(BooleanArray::from(vec![
-            None, None, None, None, None, None, None, None,
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Boolean,
-            Count,
-            ScalarValue::from(0u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_empty() -> Result<()> {
-        let a: Vec<bool> = vec![];
-        let a: ArrayRef = Arc::new(BooleanArray::from(a));
-        generic_test_op!(
-            a,
-            DataType::Boolean,
-            Count,
-            ScalarValue::from(0u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_utf8() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(StringArray::from(vec!["a", "bb", "ccc", "dddd", "ad"]));
-        generic_test_op!(
-            a,
-            DataType::Utf8,
-            Count,
-            ScalarValue::from(5u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn count_large_utf8() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(LargeStringArray::from(vec!["a", "bb", "ccc", "dddd", "ad"]));
-        generic_test_op!(
-            a,
-            DataType::LargeUtf8,
-            Count,
-            ScalarValue::from(5u64),
-            DataType::UInt64
-        )
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/in_list.rs b/rust/datafusion/src/physical_plan/expressions/in_list.rs
deleted file mode 100644
index 41f111006ea..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/in_list.rs
+++ /dev/null
@@ -1,458 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! InList expression
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::GenericStringArray;
-use arrow::array::{
-    ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
-    Int64Array, Int8Array, StringOffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
-    UInt8Array,
-};
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::error::Result;
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::scalar::ScalarValue;
-
-/// InList
-#[derive(Debug)]
-pub struct InListExpr {
-    expr: Arc<dyn PhysicalExpr>,
-    list: Vec<Arc<dyn PhysicalExpr>>,
-    negated: bool,
-}
-
-macro_rules! make_contains {
-    ($ARRAY:expr, $LIST_VALUES:expr, $NEGATED:expr, $SCALAR_VALUE:ident, $ARRAY_TYPE:ident) => {{
-        let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
-
-        let mut contains_null = false;
-        let values = $LIST_VALUES
-            .iter()
-            .flat_map(|expr| match expr {
-                ColumnarValue::Scalar(s) => match s {
-                    ScalarValue::$SCALAR_VALUE(Some(v)) => Some(*v),
-                    ScalarValue::$SCALAR_VALUE(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    ScalarValue::Utf8(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    datatype => unimplemented!("Unexpected type {} for InList", datatype),
-                },
-                ColumnarValue::Array(_) => {
-                    unimplemented!("InList does not yet support nested columns.")
-                }
-            })
-            .collect::<Vec<_>>();
-
-        Ok(ColumnarValue::Array(Arc::new(
-            array
-                .iter()
-                .map(|x| {
-                    let contains = x.map(|x| values.contains(&x));
-                    match contains {
-                        Some(true) => {
-                            if $NEGATED {
-                                Some(false)
-                            } else {
-                                Some(true)
-                            }
-                        }
-                        Some(false) => {
-                            if contains_null {
-                                None
-                            } else if $NEGATED {
-                                Some(true)
-                            } else {
-                                Some(false)
-                            }
-                        }
-                        None => None,
-                    }
-                })
-                .collect::<BooleanArray>(),
-        )))
-    }};
-}
-
-impl InListExpr {
-    /// Create a new InList expression
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        list: Vec<Arc<dyn PhysicalExpr>>,
-        negated: bool,
-    ) -> Self {
-        Self {
-            expr,
-            list,
-            negated,
-        }
-    }
-
-    /// Input expression
-    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.expr
-    }
-
-    /// List to search in
-    pub fn list(&self) -> &[Arc<dyn PhysicalExpr>] {
-        &self.list
-    }
-
-    /// Is this negated e.g. NOT IN LIST
-    pub fn negated(&self) -> bool {
-        self.negated
-    }
-
-    /// Compare for specific utf8 types
-    #[allow(clippy::unnecessary_wraps)]
-    fn compare_utf8<T: StringOffsetSizeTrait>(
-        &self,
-        array: ArrayRef,
-        list_values: Vec<ColumnarValue>,
-        negated: bool,
-    ) -> Result<ColumnarValue> {
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .unwrap();
-
-        let mut contains_null = false;
-        let values = list_values
-            .iter()
-            .flat_map(|expr| match expr {
-                ColumnarValue::Scalar(s) => match s {
-                    ScalarValue::Utf8(Some(v)) => Some(v.as_str()),
-                    ScalarValue::Utf8(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    ScalarValue::LargeUtf8(Some(v)) => Some(v.as_str()),
-                    ScalarValue::LargeUtf8(None) => {
-                        contains_null = true;
-                        None
-                    }
-                    datatype => unimplemented!("Unexpected type {} for InList", datatype),
-                },
-                ColumnarValue::Array(_) => {
-                    unimplemented!("InList does not yet support nested columns.")
-                }
-            })
-            .collect::<Vec<&str>>();
-
-        Ok(ColumnarValue::Array(Arc::new(
-            array
-                .iter()
-                .map(|x| {
-                    let contains = x.map(|x| values.contains(&x));
-                    match contains {
-                        Some(true) => {
-                            if negated {
-                                Some(false)
-                            } else {
-                                Some(true)
-                            }
-                        }
-                        Some(false) => {
-                            if contains_null {
-                                None
-                            } else if negated {
-                                Some(true)
-                            } else {
-                                Some(false)
-                            }
-                        }
-                        None => None,
-                    }
-                })
-                .collect::<BooleanArray>(),
-        )))
-    }
-}
-
-impl std::fmt::Display for InListExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        if self.negated {
-            write!(f, "{} NOT IN ({:?})", self.expr, self.list)
-        } else {
-            write!(f, "{} IN ({:?})", self.expr, self.list)
-        }
-    }
-}
-
-impl PhysicalExpr for InListExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.expr.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let value = self.expr.evaluate(batch)?;
-        let value_data_type = value.data_type();
-        let list_values = self
-            .list
-            .iter()
-            .map(|expr| expr.evaluate(batch))
-            .collect::<Result<Vec<_>>>()?;
-
-        let array = match value {
-            ColumnarValue::Array(array) => array,
-            ColumnarValue::Scalar(scalar) => scalar.to_array(),
-        };
-
-        match value_data_type {
-            DataType::Float32 => {
-                make_contains!(array, list_values, self.negated, Float32, Float32Array)
-            }
-            DataType::Float64 => {
-                make_contains!(array, list_values, self.negated, Float64, Float64Array)
-            }
-            DataType::Int16 => {
-                make_contains!(array, list_values, self.negated, Int16, Int16Array)
-            }
-            DataType::Int32 => {
-                make_contains!(array, list_values, self.negated, Int32, Int32Array)
-            }
-            DataType::Int64 => {
-                make_contains!(array, list_values, self.negated, Int64, Int64Array)
-            }
-            DataType::Int8 => {
-                make_contains!(array, list_values, self.negated, Int8, Int8Array)
-            }
-            DataType::UInt16 => {
-                make_contains!(array, list_values, self.negated, UInt16, UInt16Array)
-            }
-            DataType::UInt32 => {
-                make_contains!(array, list_values, self.negated, UInt32, UInt32Array)
-            }
-            DataType::UInt64 => {
-                make_contains!(array, list_values, self.negated, UInt64, UInt64Array)
-            }
-            DataType::UInt8 => {
-                make_contains!(array, list_values, self.negated, UInt8, UInt8Array)
-            }
-            DataType::Boolean => {
-                make_contains!(array, list_values, self.negated, Boolean, BooleanArray)
-            }
-            DataType::Utf8 => self.compare_utf8::<i32>(array, list_values, self.negated),
-            DataType::LargeUtf8 => {
-                self.compare_utf8::<i64>(array, list_values, self.negated)
-            }
-            datatype => {
-                unimplemented!("InList does not support datatype {:?}.", datatype)
-            }
-        }
-    }
-}
-
-/// Creates a unary expression InList
-pub fn in_list(
-    expr: Arc<dyn PhysicalExpr>,
-    list: Vec<Arc<dyn PhysicalExpr>>,
-    negated: &bool,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(InListExpr::new(expr, list, *negated)))
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::{array::StringArray, datatypes::Field};
-
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::{col, lit};
-
-    // applies the in_list expr to an input batch and list
-    macro_rules! in_list {
-        ($BATCH:expr, $LIST:expr, $NEGATED:expr, $EXPECTED:expr) => {{
-            let expr = in_list(col("a"), $LIST, $NEGATED).unwrap();
-            let result = expr.evaluate(&$BATCH)?.into_array($BATCH.num_rows());
-            let result = result
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .expect("failed to downcast to BooleanArray");
-            let expected = &BooleanArray::from($EXPECTED);
-            assert_eq!(expected, result);
-        }};
-    }
-
-    #[test]
-    fn in_list_utf8() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("a"), Some("d"), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), Some(false), None]);
-
-        // expression: "a not in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), Some(true), None]);
-
-        // expression: "a not in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None, None]);
-
-        // expression: "a not in ("a", "b")"
-        let list = vec![
-            lit(ScalarValue::Utf8(Some("a".to_string()))),
-            lit(ScalarValue::Utf8(Some("b".to_string()))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None, None]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_int64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
-        let a = Int64Array::from(vec![Some(0), Some(2), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), Some(false), None]);
-
-        // expression: "a not in (0, 1)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), Some(true), None]);
-
-        // expression: "a in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None, None]);
-
-        // expression: "a not in (0, 1, NULL)"
-        let list = vec![
-            lit(ScalarValue::Int64(Some(0))),
-            lit(ScalarValue::Int64(Some(1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None, None]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_float64() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Float64, true)]);
-        let a = Float64Array::from(vec![Some(0.0), Some(0.2), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in (0.0, 0.2)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), Some(false), None]);
-
-        // expression: "a not in (0.0, 0.2)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), Some(true), None]);
-
-        // expression: "a in (0.0, 0.2, NULL)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None, None]);
-
-        // expression: "a not in (0.0, 0.2, NULL)"
-        let list = vec![
-            lit(ScalarValue::Float64(Some(0.0))),
-            lit(ScalarValue::Float64(Some(0.1))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None, None]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn in_list_bool() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
-        let a = BooleanArray::from(vec![Some(true), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a in (true)"
-        let list = vec![lit(ScalarValue::Boolean(Some(true)))];
-        in_list!(batch, list, &false, vec![Some(true), None]);
-
-        // expression: "a not in (true)"
-        let list = vec![lit(ScalarValue::Boolean(Some(true)))];
-        in_list!(batch, list, &true, vec![Some(false), None]);
-
-        // expression: "a in (true, NULL)"
-        let list = vec![
-            lit(ScalarValue::Boolean(Some(true))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &false, vec![Some(true), None]);
-
-        // expression: "a not in (true, NULL)"
-        let list = vec![
-            lit(ScalarValue::Boolean(Some(true))),
-            lit(ScalarValue::Utf8(None)),
-        ];
-        in_list!(batch, list, &true, vec![Some(false), None]);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/is_not_null.rs b/rust/datafusion/src/physical_plan/expressions/is_not_null.rs
deleted file mode 100644
index 7ac2110b502..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/is_not_null.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! IS NOT NULL expression
-
-use std::{any::Any, sync::Arc};
-
-use arrow::compute;
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::{error::Result, scalar::ScalarValue};
-
-/// IS NOT NULL expression
-#[derive(Debug)]
-pub struct IsNotNullExpr {
-    /// The input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl IsNotNullExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl std::fmt::Display for IsNotNullExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} IS NOT NULL", self.arg)
-    }
-}
-
-impl PhysicalExpr for IsNotNullExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(false)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
-                compute::is_not_null(array.as_ref())?,
-            ))),
-            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
-                ScalarValue::Boolean(Some(!scalar.is_null())),
-            )),
-        }
-    }
-}
-
-/// Create an IS NOT NULL expression
-pub fn is_not_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(IsNotNullExpr::new(arg)))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use arrow::{
-        array::{BooleanArray, StringArray},
-        datatypes::*,
-        record_batch::RecordBatch,
-    };
-    use std::sync::Arc;
-
-    #[test]
-    fn is_not_null_op() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a is not null"
-        let expr = is_not_null(col("a")).unwrap();
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-
-        let expected = &BooleanArray::from(vec![true, false]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/is_null.rs b/rust/datafusion/src/physical_plan/expressions/is_null.rs
deleted file mode 100644
index dfa53f3f7d2..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/is_null.rs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! IS NULL expression
-
-use std::{any::Any, sync::Arc};
-
-use arrow::compute;
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::{error::Result, scalar::ScalarValue};
-
-/// IS NULL expression
-#[derive(Debug)]
-pub struct IsNullExpr {
-    /// Input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl IsNullExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl std::fmt::Display for IsNullExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} IS NULL", self.arg)
-    }
-}
-
-impl PhysicalExpr for IsNullExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(false)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
-                compute::is_null(array.as_ref())?,
-            ))),
-            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
-                ScalarValue::Boolean(Some(scalar.is_null())),
-            )),
-        }
-    }
-}
-
-/// Create an IS NULL expression
-pub fn is_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(IsNullExpr::new(arg)))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use arrow::{
-        array::{BooleanArray, StringArray},
-        datatypes::*,
-        record_batch::RecordBatch,
-    };
-    use std::sync::Arc;
-
-    #[test]
-    fn is_null_op() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), None]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // expression: "a is null"
-        let expr = is_null(col("a")).unwrap();
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-
-        let expected = &BooleanArray::from(vec![false, true]);
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/literal.rs b/rust/datafusion/src/physical_plan/expressions/literal.rs
deleted file mode 100644
index 3110d39c87e..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/literal.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Literal expression
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::{
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use crate::{error::Result, scalar::ScalarValue};
-
-/// Represents a literal value
-#[derive(Debug)]
-pub struct Literal {
-    value: ScalarValue,
-}
-
-impl Literal {
-    /// Create a literal value expression
-    pub fn new(value: ScalarValue) -> Self {
-        Self { value }
-    }
-
-    /// Get the scalar value
-    pub fn value(&self) -> &ScalarValue {
-        &self.value
-    }
-}
-
-impl std::fmt::Display for Literal {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.value)
-    }
-}
-
-impl PhysicalExpr for Literal {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.value.get_datatype())
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(self.value.is_null())
-    }
-
-    fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
-        Ok(ColumnarValue::Scalar(self.value.clone()))
-    }
-}
-
-/// Create a literal expression
-pub fn lit(value: ScalarValue) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Literal::new(value))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::*;
-
-    #[test]
-    fn literal_i32() -> Result<()> {
-        // create an arbitrary record bacth
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let a = Int32Array::from(vec![Some(1), None, Some(3), Some(4), Some(5)]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
-
-        // create and evaluate a literal expression
-        let literal_expr = lit(ScalarValue::from(42i32));
-        assert_eq!("42", format!("{}", literal_expr));
-
-        let literal_array = literal_expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let literal_array = literal_array.as_any().downcast_ref::<Int32Array>().unwrap();
-
-        // note that the contents of the literal array are unrelated to the batch contents except for the length of the array
-        assert_eq!(literal_array.len(), 5); // 5 rows in the batch
-        for i in 0..literal_array.len() {
-            assert_eq!(literal_array.value(i), 42);
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/min_max.rs b/rust/datafusion/src/physical_plan/expressions/min_max.rs
deleted file mode 100644
index 5ed14610ada..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/min_max.rs
+++ /dev/null
@@ -1,655 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::{DataType, TimeUnit};
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, LargeStringArray, StringArray, TimestampMicrosecondArray,
-        TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
-        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    datatypes::Field,
-};
-
-use super::format_state_name;
-
-/// MAX aggregate expression
-#[derive(Debug)]
-pub struct Max {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl Max {
-    /// Create a new MAX aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Max {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "max"),
-            self.data_type.clone(),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MaxAccumulator::try_new(&self.data_type)?))
-    }
-}
-
-// Statically-typed version of min/max(array) -> ScalarValue for string types.
-macro_rules! typed_min_max_batch_string {
-    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
-        let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let value = compute::$OP(array);
-        let value = value.and_then(|e| Some(e.to_string()));
-        ScalarValue::$SCALAR(value)
-    }};
-}
-
-// Statically-typed version of min/max(array) -> ScalarValue for non-string types.
-macro_rules! typed_min_max_batch {
-    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
-        let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let value = compute::$OP(array);
-        ScalarValue::$SCALAR(value)
-    }};
-}
-
-// Statically-typed version of min/max(array) -> ScalarValue  for non-string types.
-// this is a macro to support both operations (min and max).
-macro_rules! min_max_batch {
-    ($VALUES:expr, $OP:ident) => {{
-        match $VALUES.data_type() {
-            // all types that have a natural order
-            DataType::Float64 => {
-                typed_min_max_batch!($VALUES, Float64Array, Float64, $OP)
-            }
-            DataType::Float32 => {
-                typed_min_max_batch!($VALUES, Float32Array, Float32, $OP)
-            }
-            DataType::Int64 => typed_min_max_batch!($VALUES, Int64Array, Int64, $OP),
-            DataType::Int32 => typed_min_max_batch!($VALUES, Int32Array, Int32, $OP),
-            DataType::Int16 => typed_min_max_batch!($VALUES, Int16Array, Int16, $OP),
-            DataType::Int8 => typed_min_max_batch!($VALUES, Int8Array, Int8, $OP),
-            DataType::UInt64 => typed_min_max_batch!($VALUES, UInt64Array, UInt64, $OP),
-            DataType::UInt32 => typed_min_max_batch!($VALUES, UInt32Array, UInt32, $OP),
-            DataType::UInt16 => typed_min_max_batch!($VALUES, UInt16Array, UInt16, $OP),
-            DataType::UInt8 => typed_min_max_batch!($VALUES, UInt8Array, UInt8, $OP),
-            DataType::Timestamp(TimeUnit::Second, _) => {
-                typed_min_max_batch!($VALUES, TimestampSecondArray, TimestampSecond, $OP)
-            }
-            DataType::Timestamp(TimeUnit::Millisecond, _) => typed_min_max_batch!(
-                $VALUES,
-                TimestampMillisecondArray,
-                TimestampMillisecond,
-                $OP
-            ),
-            DataType::Timestamp(TimeUnit::Microsecond, _) => typed_min_max_batch!(
-                $VALUES,
-                TimestampMicrosecondArray,
-                TimestampMicrosecond,
-                $OP
-            ),
-            DataType::Timestamp(TimeUnit::Nanosecond, _) => typed_min_max_batch!(
-                $VALUES,
-                TimestampNanosecondArray,
-                TimestampNanosecond,
-                $OP
-            ),
-            other => {
-                // This should have been handled before
-                return Err(DataFusionError::Internal(format!(
-                    "Min/Max accumulator not implemented for type {:?}",
-                    other
-                )));
-            }
-        }
-    }};
-}
-
-/// dynamically-typed min(array) -> ScalarValue
-fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
-    Ok(match values.data_type() {
-        DataType::Utf8 => {
-            typed_min_max_batch_string!(values, StringArray, Utf8, min_string)
-        }
-        DataType::LargeUtf8 => {
-            typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
-        }
-        _ => min_max_batch!(values, min),
-    })
-}
-
-/// dynamically-typed max(array) -> ScalarValue
-fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
-    Ok(match values.data_type() {
-        DataType::Utf8 => {
-            typed_min_max_batch_string!(values, StringArray, Utf8, max_string)
-        }
-        DataType::LargeUtf8 => {
-            typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
-        }
-        _ => min_max_batch!(values, max),
-    })
-}
-
-// min/max of two non-string scalar values.
-macro_rules! typed_min_max {
-    ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
-        ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
-            (None, None) => None,
-            (Some(a), None) => Some(a.clone()),
-            (None, Some(b)) => Some(b.clone()),
-            (Some(a), Some(b)) => Some((*a).$OP(*b)),
-        })
-    }};
-}
-
-// min/max of two scalar string values.
-macro_rules! typed_min_max_string {
-    ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
-        ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
-            (None, None) => None,
-            (Some(a), None) => Some(a.clone()),
-            (None, Some(b)) => Some(b.clone()),
-            (Some(a), Some(b)) => Some((a).$OP(b).clone()),
-        })
-    }};
-}
-
-// min/max of two scalar values of the same type
-macro_rules! min_max {
-    ($VALUE:expr, $DELTA:expr, $OP:ident) => {{
-        Ok(match ($VALUE, $DELTA) {
-            (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
-                typed_min_max!(lhs, rhs, Float64, $OP)
-            }
-            (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
-                typed_min_max!(lhs, rhs, Float32, $OP)
-            }
-            (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt64, $OP)
-            }
-            (ScalarValue::UInt32(lhs), ScalarValue::UInt32(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt32, $OP)
-            }
-            (ScalarValue::UInt16(lhs), ScalarValue::UInt16(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt16, $OP)
-            }
-            (ScalarValue::UInt8(lhs), ScalarValue::UInt8(rhs)) => {
-                typed_min_max!(lhs, rhs, UInt8, $OP)
-            }
-            (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
-                typed_min_max!(lhs, rhs, Int64, $OP)
-            }
-            (ScalarValue::Int32(lhs), ScalarValue::Int32(rhs)) => {
-                typed_min_max!(lhs, rhs, Int32, $OP)
-            }
-            (ScalarValue::Int16(lhs), ScalarValue::Int16(rhs)) => {
-                typed_min_max!(lhs, rhs, Int16, $OP)
-            }
-            (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => {
-                typed_min_max!(lhs, rhs, Int8, $OP)
-            }
-            (ScalarValue::Utf8(lhs), ScalarValue::Utf8(rhs)) => {
-                typed_min_max_string!(lhs, rhs, Utf8, $OP)
-            }
-            (ScalarValue::LargeUtf8(lhs), ScalarValue::LargeUtf8(rhs)) => {
-                typed_min_max_string!(lhs, rhs, LargeUtf8, $OP)
-            }
-            (ScalarValue::TimestampSecond(lhs), ScalarValue::TimestampSecond(rhs)) => {
-                typed_min_max!(lhs, rhs, TimestampSecond, $OP)
-            }
-            (
-                ScalarValue::TimestampMillisecond(lhs),
-                ScalarValue::TimestampMillisecond(rhs),
-            ) => {
-                typed_min_max!(lhs, rhs, TimestampMillisecond, $OP)
-            }
-            (
-                ScalarValue::TimestampMicrosecond(lhs),
-                ScalarValue::TimestampMicrosecond(rhs),
-            ) => {
-                typed_min_max!(lhs, rhs, TimestampMicrosecond, $OP)
-            }
-            (
-                ScalarValue::TimestampNanosecond(lhs),
-                ScalarValue::TimestampNanosecond(rhs),
-            ) => {
-                typed_min_max!(lhs, rhs, TimestampNanosecond, $OP)
-            }
-            e => {
-                return Err(DataFusionError::Internal(format!(
-                    "MIN/MAX is not expected to receive a scalar {:?}",
-                    e
-                )))
-            }
-        })
-    }};
-}
-
-/// the minimum of two scalar values
-fn min(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    min_max!(lhs, rhs, min)
-}
-
-/// the maximum of two scalar values
-fn max(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    min_max!(lhs, rhs, max)
-}
-
-#[derive(Debug)]
-struct MaxAccumulator {
-    max: ScalarValue,
-}
-
-impl MaxAccumulator {
-    /// new max accumulator
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            max: ScalarValue::try_from(datatype)?,
-        })
-    }
-}
-
-impl Accumulator for MaxAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-        let delta = &max_batch(values)?;
-        self.max = max(&self.max, delta)?;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let value = &values[0];
-        self.max = max(&self.max, value)?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        self.update(states)
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        self.update_batch(states)
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.max.clone()])
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.max.clone())
-    }
-}
-
-/// MIN aggregate expression
-#[derive(Debug)]
-pub struct Min {
-    name: String,
-    data_type: DataType,
-    nullable: bool,
-    expr: Arc<dyn PhysicalExpr>,
-}
-
-impl Min {
-    /// Create a new MIN aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Min {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "min"),
-            self.data_type.clone(),
-            true,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(MinAccumulator::try_new(&self.data_type)?))
-    }
-}
-
-#[derive(Debug)]
-struct MinAccumulator {
-    min: ScalarValue,
-}
-
-impl MinAccumulator {
-    /// new min accumulator
-    pub fn try_new(datatype: &DataType) -> Result<Self> {
-        Ok(Self {
-            min: ScalarValue::try_from(datatype)?,
-        })
-    }
-}
-
-impl Accumulator for MinAccumulator {
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.min.clone()])
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-        let delta = &min_batch(values)?;
-        self.min = min(&self.min, delta)?;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        let value = &values[0];
-        self.min = min(&self.min, value)?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        self.update(states)
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        self.update_batch(states)
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.min.clone())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::expressions::tests::aggregate;
-    use crate::{error::Result, generic_test_op};
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
-
-    #[test]
-    fn max_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Max,
-            ScalarValue::from(5i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn min_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Min,
-            ScalarValue::from(1i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn max_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::Utf8,
-            Max,
-            ScalarValue::Utf8(Some("d".to_string())),
-            DataType::Utf8
-        )
-    }
-
-    #[test]
-    fn max_large_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(LargeStringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::LargeUtf8,
-            Max,
-            ScalarValue::LargeUtf8(Some("d".to_string())),
-            DataType::LargeUtf8
-        )
-    }
-
-    #[test]
-    fn min_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::Utf8,
-            Min,
-            ScalarValue::Utf8(Some("a".to_string())),
-            DataType::Utf8
-        )
-    }
-
-    #[test]
-    fn min_large_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(LargeStringArray::from(vec!["d", "a", "c", "b"]));
-        generic_test_op!(
-            a,
-            DataType::LargeUtf8,
-            Min,
-            ScalarValue::LargeUtf8(Some("a".to_string())),
-            DataType::LargeUtf8
-        )
-    }
-
-    #[test]
-    fn max_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Max,
-            ScalarValue::from(5i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn min_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Min,
-            ScalarValue::from(1i32),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn max_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Max,
-            ScalarValue::Int32(None),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn min_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Min,
-            ScalarValue::Int32(None),
-            DataType::Int32
-        )
-    }
-
-    #[test]
-    fn max_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Max,
-            ScalarValue::from(5_u32),
-            DataType::UInt32
-        )
-    }
-
-    #[test]
-    fn min_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Min,
-            ScalarValue::from(1u32),
-            DataType::UInt32
-        )
-    }
-
-    #[test]
-    fn max_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Max,
-            ScalarValue::from(5_f32),
-            DataType::Float32
-        )
-    }
-
-    #[test]
-    fn min_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Min,
-            ScalarValue::from(1_f32),
-            DataType::Float32
-        )
-    }
-
-    #[test]
-    fn max_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Max,
-            ScalarValue::from(5_f64),
-            DataType::Float64
-        )
-    }
-
-    #[test]
-    fn min_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Min,
-            ScalarValue::from(1_f64),
-            DataType::Float64
-        )
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/mod.rs b/rust/datafusion/src/physical_plan/expressions/mod.rs
deleted file mode 100644
index 6e252205955..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/mod.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use arrow::compute::kernels::sort::{SortColumn, SortOptions};
-use arrow::record_batch::RecordBatch;
-
-mod average;
-#[macro_use]
-mod binary;
-mod case;
-mod cast;
-mod coercion;
-mod column;
-mod count;
-mod in_list;
-mod is_not_null;
-mod is_null;
-mod literal;
-mod min_max;
-mod negative;
-mod not;
-mod nullif;
-mod sum;
-mod try_cast;
-
-pub use average::{avg_return_type, Avg, AvgAccumulator};
-pub use binary::{binary, binary_operator_data_type, BinaryExpr};
-pub use case::{case, CaseExpr};
-pub use cast::{cast, cast_with_options, CastExpr};
-pub use column::{col, Column};
-pub use count::Count;
-pub use in_list::{in_list, InListExpr};
-pub use is_not_null::{is_not_null, IsNotNullExpr};
-pub use is_null::{is_null, IsNullExpr};
-pub use literal::{lit, Literal};
-pub use min_max::{Max, Min};
-pub use negative::{negative, NegativeExpr};
-pub use not::{not, NotExpr};
-pub use nullif::{nullif_func, SUPPORTED_NULLIF_TYPES};
-pub use sum::{sum_return_type, Sum};
-pub use try_cast::{try_cast, TryCastExpr};
-/// returns the name of the state
-pub fn format_state_name(name: &str, state_name: &str) -> String {
-    format!("{}[{}]", name, state_name)
-}
-
-/// Represents Sort operation for a column in a RecordBatch
-#[derive(Clone, Debug)]
-pub struct PhysicalSortExpr {
-    /// Physical expression representing the column to sort
-    pub expr: Arc<dyn PhysicalExpr>,
-    /// Option to specify how the given column should be sorted
-    pub options: SortOptions,
-}
-
-impl PhysicalSortExpr {
-    /// evaluate the sort expression into SortColumn that can be passed into arrow sort kernel
-    pub fn evaluate_to_sort_column(&self, batch: &RecordBatch) -> Result<SortColumn> {
-        let value_to_sort = self.expr.evaluate(batch)?;
-        let array_to_sort = match value_to_sort {
-            ColumnarValue::Array(array) => array,
-            ColumnarValue::Scalar(scalar) => {
-                return Err(DataFusionError::Internal(format!(
-                    "Sort operation is not applicable to scalar value {}",
-                    scalar
-                )));
-            }
-        };
-        Ok(SortColumn {
-            values: array_to_sort,
-            options: Some(self.options),
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{error::Result, physical_plan::AggregateExpr, scalar::ScalarValue};
-
-    /// macro to perform an aggregation and verify the result.
-    #[macro_export]
-    macro_rules! generic_test_op {
-        ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $DATATYPE, false)]);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY])?;
-
-            let agg =
-                Arc::new(<$OP>::new(col("a"), "bla".to_string(), $EXPECTED_DATATYPE));
-            let actual = aggregate(&batch, agg)?;
-            let expected = ScalarValue::from($EXPECTED);
-
-            assert_eq!(expected, actual);
-
-            Ok(())
-        }};
-    }
-
-    pub fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/negative.rs b/rust/datafusion/src/physical_plan/expressions/negative.rs
deleted file mode 100644
index 65010c6acd1..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/negative.rs
+++ /dev/null
@@ -1,133 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Negation (-) expression
-
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::ArrayRef;
-use arrow::compute::kernels::arithmetic::negate;
-use arrow::{
-    array::{Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array},
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-
-use super::coercion;
-
-/// Invoke a compute kernel on array(s)
-macro_rules! compute_op {
-    // invoke unary operator
-    ($OPERAND:expr, $OP:ident, $DT:ident) => {{
-        let operand = $OPERAND
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&operand)?))
-    }};
-}
-
-/// Negative expression
-#[derive(Debug)]
-pub struct NegativeExpr {
-    /// Input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl NegativeExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl std::fmt::Display for NegativeExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "(- {})", self.arg)
-    }
-}
-
-impl PhysicalExpr for NegativeExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        self.arg.data_type(input_schema)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.arg.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => {
-                let result: Result<ArrayRef> = match array.data_type() {
-                    DataType::Int8 => compute_op!(array, negate, Int8Array),
-                    DataType::Int16 => compute_op!(array, negate, Int16Array),
-                    DataType::Int32 => compute_op!(array, negate, Int32Array),
-                    DataType::Int64 => compute_op!(array, negate, Int64Array),
-                    DataType::Float32 => compute_op!(array, negate, Float32Array),
-                    DataType::Float64 => compute_op!(array, negate, Float64Array),
-                    _ => Err(DataFusionError::Internal(format!(
-                        "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric",
-                        self,
-                        array.data_type(),
-                    ))),
-                };
-                result.map(|a| ColumnarValue::Array(a))
-            }
-            ColumnarValue::Scalar(scalar) => {
-                Ok(ColumnarValue::Scalar(scalar.arithmetic_negate()))
-            }
-        }
-    }
-}
-
-/// Creates a unary expression NEGATIVE
-///
-/// # Errors
-///
-/// This function errors when the argument's type is not signed numeric
-pub fn negative(
-    arg: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let data_type = arg.data_type(input_schema)?;
-    if !coercion::is_signed_numeric(&data_type) {
-        Err(DataFusionError::Internal(
-            format!(
-                "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric",
-                arg, data_type,
-            ),
-        ))
-    } else {
-        Ok(Arc::new(NegativeExpr::new(arg)))
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/not.rs b/rust/datafusion/src/physical_plan/expressions/not.rs
deleted file mode 100644
index 23a1a46651d..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/not.rs
+++ /dev/null
@@ -1,158 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Not expression
-
-use std::any::Any;
-use std::fmt;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use crate::scalar::ScalarValue;
-use arrow::array::BooleanArray;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-
-/// Not expression
-#[derive(Debug)]
-pub struct NotExpr {
-    /// Input expression
-    arg: Arc<dyn PhysicalExpr>,
-}
-
-impl NotExpr {
-    /// Create new not expression
-    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
-        Self { arg }
-    }
-
-    /// Get the input expression
-    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.arg
-    }
-}
-
-impl fmt::Display for NotExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "NOT {}", self.arg)
-    }
-}
-
-impl PhysicalExpr for NotExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(DataType::Boolean)
-    }
-
-    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
-        self.arg.nullable(input_schema)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
-            ColumnarValue::Array(array) => {
-                let array =
-                    array
-                        .as_any()
-                        .downcast_ref::<BooleanArray>()
-                        .ok_or_else(|| {
-                            DataFusionError::Internal(
-                                "boolean_op failed to downcast array".to_owned(),
-                            )
-                        })?;
-                Ok(ColumnarValue::Array(Arc::new(
-                    arrow::compute::kernels::boolean::not(array)?,
-                )))
-            }
-            ColumnarValue::Scalar(scalar) => {
-                use std::convert::TryInto;
-                let bool_value: bool = scalar.try_into()?;
-                Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(
-                    !bool_value,
-                ))))
-            }
-        }
-    }
-}
-
-/// Creates a unary expression NOT
-///
-/// # Errors
-///
-/// This function errors when the argument's type is not boolean
-pub fn not(
-    arg: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let data_type = arg.data_type(input_schema)?;
-    if data_type != DataType::Boolean {
-        Err(DataFusionError::Internal(format!(
-            "NOT '{:?}' can't be evaluated because the expression's type is {:?}, not boolean",
-            arg, data_type,
-        )))
-    } else {
-        Ok(Arc::new(NotExpr::new(arg)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-    use arrow::datatypes::*;
-
-    #[test]
-    fn neg_op() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
-
-        let expr = not(col("a"), &schema)?;
-        assert_eq!(expr.data_type(&schema)?, DataType::Boolean);
-        assert_eq!(expr.nullable(&schema)?, true);
-
-        let input = BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let expected = &BooleanArray::from(vec![Some(false), None, Some(true)]);
-
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(input)])?;
-
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-        let result = result
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("failed to downcast to BooleanArray");
-        assert_eq!(result, expected);
-
-        Ok(())
-    }
-
-    /// verify that expression errors when the input expression is not a boolean.
-    #[test]
-    fn neg_op_not_null() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-
-        let expr = not(col("a"), &schema);
-        assert!(expr.is_err());
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/nullif.rs b/rust/datafusion/src/physical_plan/expressions/nullif.rs
deleted file mode 100644
index 7cc58ed2318..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/nullif.rs
+++ /dev/null
@@ -1,188 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::scalar::ScalarValue;
-use arrow::array::Array;
-use arrow::array::{
-    ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array, StringArray, TimestampNanosecondArray,
-    UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-};
-use arrow::compute::kernels::boolean::nullif;
-use arrow::compute::kernels::comparison::{eq, eq_scalar, eq_utf8, eq_utf8_scalar};
-use arrow::datatypes::{DataType, TimeUnit};
-
-/// Invoke a compute kernel on a primitive array and a Boolean Array
-macro_rules! compute_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?) as ArrayRef)
-    }};
-}
-
-/// Binary op between primitive and boolean arrays
-macro_rules! primitive_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for NULLIF/primitive/boolean operator",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Implements NULLIF(expr1, expr2)
-/// Args: 0 - left expr is any array
-///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
-///
-pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    if args.len() != 2 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but NULLIF takes exactly two args",
-            args.len(),
-        )));
-    }
-
-    let (lhs, rhs) = (&args[0], &args[1]);
-
-    match (lhs, rhs) {
-        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
-            let cond_array = binary_array_op_scalar!(lhs, rhs.clone(), eq).unwrap()?;
-
-            let array = primitive_bool_array_op!(lhs, *cond_array, nullif)?;
-
-            Ok(ColumnarValue::Array(array))
-        }
-        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
-            // Get args0 == args1 evaluated and produce a boolean array
-            let cond_array = binary_array_op!(lhs, rhs, eq)?;
-
-            // Now, invoke nullif on the result
-            let array = primitive_bool_array_op!(lhs, *cond_array, nullif)?;
-            Ok(ColumnarValue::Array(array))
-        }
-        _ => Err(DataFusionError::NotImplemented(
-            "nullif does not support a literal as first argument".to_string(),
-        )),
-    }
-}
-
-/// Currently supported types by the nullif function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-];
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-
-    #[test]
-    fn nullif_int32() -> Result<()> {
-        let a = Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            None,
-            None,
-            Some(4),
-            Some(5),
-        ]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-
-        let result = nullif_func(&[a, lit_array])?;
-        let result = result.into_array(0);
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            None,
-            None,
-            Some(3),
-            None,
-            None,
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
-    }
-
-    #[test]
-    // Ensure that arrays with no nulls can also invoke NULLIF() correctly
-    fn nullif_int32_nonulls() -> Result<()> {
-        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
-
-        let result = nullif_func(&[a, lit_array])?;
-        let result = result.into_array(0);
-
-        let expected = Arc::new(Int32Array::from(vec![
-            None,
-            Some(3),
-            Some(10),
-            Some(7),
-            Some(8),
-            None,
-            Some(2),
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/sum.rs b/rust/datafusion/src/physical_plan/expressions/sum.rs
deleted file mode 100644
index 6f50894003d..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/sum.rs
+++ /dev/null
@@ -1,373 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines physical expressions that can evaluated at runtime during query execution
-
-use std::any::Any;
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    datatypes::Field,
-};
-
-use super::format_state_name;
-
-/// SUM aggregate expression
-#[derive(Debug)]
-pub struct Sum {
-    name: String,
-    data_type: DataType,
-    expr: Arc<dyn PhysicalExpr>,
-    nullable: bool,
-}
-
-/// function return type of a sum
-pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
-            Ok(DataType::Int64)
-        }
-        DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => {
-            Ok(DataType::UInt64)
-        }
-        DataType::Float32 => Ok(DataType::Float32),
-        DataType::Float64 => Ok(DataType::Float64),
-        other => Err(DataFusionError::Plan(format!(
-            "SUM does not support type \"{:?}\"",
-            other
-        ))),
-    }
-}
-
-impl Sum {
-    /// Create a new SUM aggregate function
-    pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
-        Self {
-            name,
-            expr,
-            data_type,
-            nullable: true,
-        }
-    }
-}
-
-impl AggregateExpr for Sum {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(
-            &self.name,
-            self.data_type.clone(),
-            self.nullable,
-        ))
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![Field::new(
-            &format_state_name(&self.name, "sum"),
-            self.data_type.clone(),
-            self.nullable,
-        )])
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![self.expr.clone()]
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SumAccumulator::try_new(&self.data_type)?))
-    }
-}
-
-#[derive(Debug)]
-struct SumAccumulator {
-    sum: ScalarValue,
-}
-
-impl SumAccumulator {
-    /// new sum accumulator
-    pub fn try_new(data_type: &DataType) -> Result<Self> {
-        Ok(Self {
-            sum: ScalarValue::try_from(data_type)?,
-        })
-    }
-}
-
-// returns the new value after sum with the new values, taking nullability into account
-macro_rules! typed_sum_delta_batch {
-    ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
-        let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let delta = compute::sum(array);
-        ScalarValue::$SCALAR(delta)
-    }};
-}
-
-// sums the array and returns a ScalarValue of its corresponding type.
-pub(super) fn sum_batch(values: &ArrayRef) -> Result<ScalarValue> {
-    Ok(match values.data_type() {
-        DataType::Float64 => typed_sum_delta_batch!(values, Float64Array, Float64),
-        DataType::Float32 => typed_sum_delta_batch!(values, Float32Array, Float32),
-        DataType::Int64 => typed_sum_delta_batch!(values, Int64Array, Int64),
-        DataType::Int32 => typed_sum_delta_batch!(values, Int32Array, Int32),
-        DataType::Int16 => typed_sum_delta_batch!(values, Int16Array, Int16),
-        DataType::Int8 => typed_sum_delta_batch!(values, Int8Array, Int8),
-        DataType::UInt64 => typed_sum_delta_batch!(values, UInt64Array, UInt64),
-        DataType::UInt32 => typed_sum_delta_batch!(values, UInt32Array, UInt32),
-        DataType::UInt16 => typed_sum_delta_batch!(values, UInt16Array, UInt16),
-        DataType::UInt8 => typed_sum_delta_batch!(values, UInt8Array, UInt8),
-        e => {
-            return Err(DataFusionError::Internal(format!(
-                "Sum is not expected to receive the type {:?}",
-                e
-            )))
-        }
-    })
-}
-
-// returns the sum of two scalar values, including coercion into $TYPE.
-macro_rules! typed_sum {
-    ($OLD_VALUE:expr, $DELTA:expr, $SCALAR:ident, $TYPE:ident) => {{
-        ScalarValue::$SCALAR(match ($OLD_VALUE, $DELTA) {
-            (None, None) => None,
-            (Some(a), None) => Some(a.clone()),
-            (None, Some(b)) => Some(b.clone() as $TYPE),
-            (Some(a), Some(b)) => Some(a + (*b as $TYPE)),
-        })
-    }};
-}
-
-pub(super) fn sum(lhs: &ScalarValue, rhs: &ScalarValue) -> Result<ScalarValue> {
-    Ok(match (lhs, rhs) {
-        // float64 coerces everything to f64
-        (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Float32(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int64(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int32(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int16(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::Int8(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt64(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt32(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt16(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        (ScalarValue::Float64(lhs), ScalarValue::UInt8(rhs)) => {
-            typed_sum!(lhs, rhs, Float64, f64)
-        }
-        // float32 has no cast
-        (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
-            typed_sum!(lhs, rhs, Float32, f32)
-        }
-        // u64 coerces u* to u64
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt32(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt16(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        (ScalarValue::UInt64(lhs), ScalarValue::UInt8(rhs)) => {
-            typed_sum!(lhs, rhs, UInt64, u64)
-        }
-        // i64 coerces i* to u64
-        (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        (ScalarValue::Int64(lhs), ScalarValue::Int32(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        (ScalarValue::Int64(lhs), ScalarValue::Int16(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        (ScalarValue::Int64(lhs), ScalarValue::Int8(rhs)) => {
-            typed_sum!(lhs, rhs, Int64, i64)
-        }
-        e => {
-            return Err(DataFusionError::Internal(format!(
-                "Sum is not expected to receive a scalar {:?}",
-                e
-            )))
-        }
-    })
-}
-
-impl Accumulator for SumAccumulator {
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let values = &values[0];
-        self.sum = sum(&self.sum, &sum_batch(values)?)?;
-        Ok(())
-    }
-
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
-        // sum(v1, v2, v3) = v1 + v2 + v3
-        self.sum = sum(&self.sum, &values[0])?;
-        Ok(())
-    }
-
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()> {
-        // sum(sum1, sum2) = sum1 + sum2
-        self.update(states)
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        // sum(sum1, sum2, sum3, ...) = sum1 + sum2 + sum3 + ...
-        self.update_batch(states)
-    }
-
-    fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.sum.clone()])
-    }
-
-    fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.sum.clone())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::{error::Result, generic_test_op};
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
-
-    #[test]
-    fn sum_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Sum,
-            ScalarValue::from(15i64),
-            DataType::Int64
-        )
-    }
-
-    #[test]
-    fn sum_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
-            Some(1),
-            None,
-            Some(3),
-            Some(4),
-            Some(5),
-        ]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Sum,
-            ScalarValue::from(13i64),
-            DataType::Int64
-        )
-    }
-
-    #[test]
-    fn sum_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
-        generic_test_op!(
-            a,
-            DataType::Int32,
-            Sum,
-            ScalarValue::Int64(None),
-            DataType::Int64
-        )
-    }
-
-    #[test]
-    fn sum_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
-        generic_test_op!(
-            a,
-            DataType::UInt32,
-            Sum,
-            ScalarValue::from(15u64),
-            DataType::UInt64
-        )
-    }
-
-    #[test]
-    fn sum_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
-        generic_test_op!(
-            a,
-            DataType::Float32,
-            Sum,
-            ScalarValue::from(15_f32),
-            DataType::Float32
-        )
-    }
-
-    #[test]
-    fn sum_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
-        generic_test_op!(
-            a,
-            DataType::Float64,
-            Sum,
-            ScalarValue::from(15_f64),
-            DataType::Float64
-        )
-    }
-
-    fn aggregate(
-        batch: &RecordBatch,
-        agg: Arc<dyn AggregateExpr>,
-    ) -> Result<ScalarValue> {
-        let mut accum = agg.create_accumulator()?;
-        let expr = agg.expressions();
-        let values = expr
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-            .collect::<Result<Vec<_>>>()?;
-        accum.update_batch(&values)?;
-        accum.evaluate()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/expressions/try_cast.rs b/rust/datafusion/src/physical_plan/expressions/try_cast.rs
deleted file mode 100644
index 5e402fdea28..00000000000
--- a/rust/datafusion/src/physical_plan/expressions/try_cast.rs
+++ /dev/null
@@ -1,247 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::any::Any;
-use std::fmt;
-use std::sync::Arc;
-
-use super::ColumnarValue;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::PhysicalExpr;
-use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::compute::kernels;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-use compute::can_cast_types;
-
-/// TRY_CAST expression casts an expression to a specific data type and retuns NULL on invalid cast
-#[derive(Debug)]
-pub struct TryCastExpr {
-    /// The expression to cast
-    expr: Arc<dyn PhysicalExpr>,
-    /// The data type to cast to
-    cast_type: DataType,
-}
-
-impl TryCastExpr {
-    /// Create a new CastExpr
-    pub fn new(expr: Arc<dyn PhysicalExpr>, cast_type: DataType) -> Self {
-        Self { expr, cast_type }
-    }
-
-    /// The expression to cast
-    pub fn expr(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.expr
-    }
-
-    /// The data type to cast to
-    pub fn cast_type(&self) -> &DataType {
-        &self.cast_type
-    }
-}
-
-impl fmt::Display for TryCastExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "CAST({} AS {:?})", self.expr, self.cast_type)
-    }
-}
-
-impl PhysicalExpr for TryCastExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.cast_type.clone())
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(true)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let value = self.expr.evaluate(batch)?;
-        match value {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(kernels::cast::cast(
-                &array,
-                &self.cast_type,
-            )?)),
-            ColumnarValue::Scalar(scalar) => {
-                let scalar_array = scalar.to_array();
-                let cast_array = kernels::cast::cast(&scalar_array, &self.cast_type)?;
-                let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?;
-                Ok(ColumnarValue::Scalar(cast_scalar))
-            }
-        }
-    }
-}
-
-/// Return a PhysicalExpression representing `expr` casted to
-/// `cast_type`, if any casting is needed.
-///
-/// Note that such casts may lose type information
-pub fn try_cast(
-    expr: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-    cast_type: DataType,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let expr_type = expr.data_type(input_schema)?;
-    if expr_type == cast_type {
-        Ok(expr.clone())
-    } else if can_cast_types(&expr_type, &cast_type) {
-        Ok(Arc::new(TryCastExpr::new(expr, cast_type)))
-    } else {
-        Err(DataFusionError::Internal(format!(
-            "Unsupported CAST from {:?} to {:?}",
-            expr_type, cast_type
-        )))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::Result;
-    use crate::physical_plan::expressions::col;
-    use arrow::array::{StringArray, Time64NanosecondArray};
-    use arrow::{
-        array::{Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array},
-        datatypes::*,
-    };
-
-    // runs an end-to-end test of physical type cast
-    // 1. construct a record batch with a column "a" of type A
-    // 2. construct a physical expression of CAST(a AS B)
-    // 3. evaluate the expression
-    // 4. verify that the resulting expression is of type B
-    // 5. verify that the resulting values are downcastable and correct
-    macro_rules! generic_test_cast {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr) => {{
-            let schema = Schema::new(vec![Field::new("a", $A_TYPE, false)]);
-            let a = $A_ARRAY::from($A_VEC);
-            let batch =
-                RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-
-            // verify that we can construct the expression
-            let expression = try_cast(col("a"), &schema, $TYPE)?;
-
-            // verify that its display is correct
-            assert_eq!(format!("CAST(a AS {:?})", $TYPE), format!("{}", expression));
-
-            // verify that the expression's type is correct
-            assert_eq!(expression.data_type(&schema)?, $TYPE);
-
-            // compute
-            let result = expression.evaluate(&batch)?.into_array(batch.num_rows());
-
-            // verify that the array's data_type is correct
-            assert_eq!(*result.data_type(), $TYPE);
-
-            // verify that the len is correct
-            assert_eq!(result.len(), $A_VEC.len());
-
-            // verify that the data itself is downcastable
-            let result = result
-                .as_any()
-                .downcast_ref::<$TYPEARRAY>()
-                .expect("failed to downcast");
-
-            // verify that the result itself is correct
-            for (i, x) in $VEC.iter().enumerate() {
-                match x {
-                    Some(x) => assert_eq!(result.value(i), *x),
-                    None => assert!(!result.is_valid(i)),
-                }
-            }
-        }};
-    }
-
-    #[test]
-    fn test_cast_i32_u32() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            UInt32Array,
-            DataType::UInt32,
-            vec![
-                Some(1_u32),
-                Some(2_u32),
-                Some(3_u32),
-                Some(4_u32),
-                Some(5_u32)
-            ]
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_cast_i32_utf8() -> Result<()> {
-        generic_test_cast!(
-            Int32Array,
-            DataType::Int32,
-            vec![1, 2, 3, 4, 5],
-            StringArray,
-            DataType::Utf8,
-            vec![Some("1"), Some("2"), Some("3"), Some("4"), Some("5")]
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_try_cast_utf8_i32() -> Result<()> {
-        generic_test_cast!(
-            StringArray,
-            DataType::Utf8,
-            vec!["a", "2", "3", "b", "5"],
-            Int32Array,
-            DataType::Int32,
-            vec![None, Some(2), Some(3), None, Some(5)]
-        );
-        Ok(())
-    }
-
-    #[allow(clippy::redundant_clone)]
-    #[test]
-    fn test_cast_i64_t64() -> Result<()> {
-        let original = vec![1, 2, 3, 4, 5];
-        let expected: Vec<Option<i64>> = original
-            .iter()
-            .map(|i| Some(Time64NanosecondArray::from(vec![*i]).value(0)))
-            .collect();
-        generic_test_cast!(
-            Int64Array,
-            DataType::Int64,
-            original.clone(),
-            TimestampNanosecondArray,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            expected
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn invalid_cast() {
-        // Ensure a useful error happens at plan time if invalid casts are used
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        let result = try_cast(col("a"), &schema, DataType::LargeBinary);
-        result.expect_err("expected Invalid CAST");
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/filter.rs b/rust/datafusion/src/physical_plan/filter.rs
deleted file mode 100644
index 61af78db8ed..00000000000
--- a/rust/datafusion/src/physical_plan/filter.rs
+++ /dev/null
@@ -1,240 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! FilterExec evaluates a boolean predicate against all input batches to determine which rows to
-//! include in its output batches.
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ExecutionPlan, Partitioning, PhysicalExpr};
-use arrow::array::BooleanArray;
-use arrow::compute::filter_record_batch;
-use arrow::datatypes::{DataType, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use async_trait::async_trait;
-
-use futures::stream::{Stream, StreamExt};
-
-/// FilterExec evaluates a boolean predicate against all input batches to determine which rows to
-/// include in its output batches.
-#[derive(Debug)]
-pub struct FilterExec {
-    /// The expression to filter on. This expression must evaluate to a boolean value.
-    predicate: Arc<dyn PhysicalExpr>,
-    /// The input plan
-    input: Arc<dyn ExecutionPlan>,
-}
-
-impl FilterExec {
-    /// Create a FilterExec on an input
-    pub fn try_new(
-        predicate: Arc<dyn PhysicalExpr>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        match predicate.data_type(input.schema().as_ref())? {
-            DataType::Boolean => Ok(Self {
-                predicate,
-                input: input.clone(),
-            }),
-            other => Err(DataFusionError::Plan(format!(
-                "Filter predicate must return boolean values, not {:?}",
-                other
-            ))),
-        }
-    }
-
-    /// The expression to filter on. This expression must evaluate to a boolean value.
-    pub fn predicate(&self) -> &Arc<dyn PhysicalExpr> {
-        &self.predicate
-    }
-
-    /// The input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for FilterExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        // The filter operator does not make any changes to the schema of its input
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(FilterExec::try_new(
-                self.predicate.clone(),
-                children[0].clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "FilterExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(FilterExecStream {
-            schema: self.input.schema().clone(),
-            predicate: self.predicate.clone(),
-            input: self.input.execute(partition).await?,
-        }))
-    }
-}
-
-/// The FilterExec streams wraps the input iterator and applies the predicate expression to
-/// determine which rows to include in its output batches
-struct FilterExecStream {
-    /// Output schema, which is the same as the input schema for this operator
-    schema: SchemaRef,
-    /// The expression to filter on. This expression must evaluate to a boolean value.
-    predicate: Arc<dyn PhysicalExpr>,
-    /// The input partition to filter.
-    input: SendableRecordBatchStream,
-}
-
-fn batch_filter(
-    batch: &RecordBatch,
-    predicate: &Arc<dyn PhysicalExpr>,
-) -> ArrowResult<RecordBatch> {
-    predicate
-        .evaluate(&batch)
-        .map(|v| v.into_array(batch.num_rows()))
-        .map_err(DataFusionError::into_arrow_external_error)
-        .and_then(|array| {
-            array
-                .as_any()
-                .downcast_ref::<BooleanArray>()
-                .ok_or_else(|| {
-                    DataFusionError::Internal(
-                        "Filter predicate evaluated to non-boolean value".to_string(),
-                    )
-                    .into_arrow_external_error()
-                })
-                // apply filter array to record batch
-                .and_then(|filter_array| filter_record_batch(batch, filter_array))
-        })
-}
-
-impl Stream for FilterExecStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.input.poll_next_unpin(cx).map(|x| match x {
-            Some(Ok(batch)) => Some(batch_filter(&batch, &self.predicate)),
-            other => other,
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // same number of record batches
-        self.input.size_hint()
-    }
-}
-
-impl RecordBatchStream for FilterExecStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::physical_plan::expressions::*;
-    use crate::physical_plan::ExecutionPlan;
-    use crate::scalar::ScalarValue;
-    use crate::test;
-    use crate::{logical_plan::Operator, physical_plan::collect};
-    use std::iter::Iterator;
-
-    #[tokio::test]
-    async fn simple_predicate() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let partitions = 4;
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let predicate: Arc<dyn PhysicalExpr> = binary(
-            binary(
-                col("c2"),
-                Operator::Gt,
-                lit(ScalarValue::from(1u32)),
-                &schema,
-            )?,
-            Operator::And,
-            binary(
-                col("c2"),
-                Operator::Lt,
-                lit(ScalarValue::from(4u32)),
-                &schema,
-            )?,
-            &schema,
-        )?;
-
-        let filter: Arc<dyn ExecutionPlan> =
-            Arc::new(FilterExec::try_new(predicate, Arc::new(csv))?);
-
-        let results = collect(filter).await?;
-
-        results
-            .iter()
-            .for_each(|batch| assert_eq!(13, batch.num_columns()));
-        let row_count: usize = results.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(41, row_count);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/functions.rs b/rust/datafusion/src/physical_plan/functions.rs
deleted file mode 100644
index 56365fec1dc..00000000000
--- a/rust/datafusion/src/physical_plan/functions.rs
+++ /dev/null
@@ -1,3767 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Declaration of built-in (scalar) functions.
-//! This module contains built-in functions' enumeration and metadata.
-//!
-//! Generally, a function has:
-//! * a signature
-//! * a return type, that is a function of the incoming argument's types
-//! * the computation, that must accept each valid signature
-//!
-//! * Signature: see `Signature`
-//! * Return type: a function `(arg_types) -> return_type`. E.g. for sqrt, ([f32]) -> f32, ([f64]) -> f64.
-//!
-//! This module also has a set of coercion rules to improve user experience: if an argument i32 is passed
-//! to a function that supports f64, it is coerced to f64.
-
-use super::{
-    type_coercion::{coerce, data_types},
-    ColumnarValue, PhysicalExpr,
-};
-use crate::physical_plan::array_expressions;
-use crate::physical_plan::datetime_expressions;
-use crate::physical_plan::expressions::{nullif_func, SUPPORTED_NULLIF_TYPES};
-use crate::physical_plan::math_expressions;
-use crate::physical_plan::string_expressions;
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
-use arrow::{
-    array::ArrayRef,
-    compute::kernels::length::{bit_length, length},
-    datatypes::TimeUnit,
-    datatypes::{DataType, Field, Int32Type, Int64Type, Schema},
-    record_batch::RecordBatch,
-};
-use fmt::{Debug, Formatter};
-use std::{any::Any, fmt, str::FromStr, sync::Arc};
-
-/// A function's signature, which defines the function's supported argument types.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Signature {
-    /// arbitrary number of arguments of an common type out of a list of valid types
-    // A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])`
-    Variadic(Vec<DataType>),
-    /// arbitrary number of arguments of an arbitrary but equal type
-    // A function such as `array` is `VariadicEqual`
-    // The first argument decides the type used for coercion
-    VariadicEqual,
-    /// fixed number of arguments of an arbitrary but equal type out of a list of valid types
-    // A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
-    // A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
-    Uniform(usize, Vec<DataType>),
-    /// exact number of arguments of an exact type
-    Exact(Vec<DataType>),
-    /// fixed number of arguments of arbitrary types
-    Any(usize),
-    /// One of a list of signatures
-    OneOf(Vec<Signature>),
-}
-
-/// Scalar function
-pub type ScalarFunctionImplementation =
-    Arc<dyn Fn(&[ColumnarValue]) -> Result<ColumnarValue> + Send + Sync>;
-
-/// A function's return type
-pub type ReturnTypeFunction =
-    Arc<dyn Fn(&[DataType]) -> Result<Arc<DataType>> + Send + Sync>;
-
-/// Enum of all built-in scalar functions
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum BuiltinScalarFunction {
-    // math functions
-    /// abs
-    Abs,
-    /// acos
-    Acos,
-    /// asin
-    Asin,
-    /// atan
-    Atan,
-    /// ceil
-    Ceil,
-    /// cos
-    Cos,
-    /// exp
-    Exp,
-    /// floor
-    Floor,
-    /// log, also known as ln
-    Log,
-    /// log10
-    Log10,
-    /// log2
-    Log2,
-    /// round
-    Round,
-    /// signum
-    Signum,
-    /// sin
-    Sin,
-    /// sqrt
-    Sqrt,
-    /// tan
-    Tan,
-    /// trunc
-    Trunc,
-
-    // string functions
-    /// construct an array from columns
-    Array,
-    /// ascii
-    Ascii,
-    /// bit_length
-    BitLength,
-    /// btrim
-    Btrim,
-    /// character_length
-    CharacterLength,
-    /// chr
-    Chr,
-    /// concat
-    Concat,
-    /// concat_ws
-    ConcatWithSeparator,
-    /// date_part
-    DatePart,
-    /// date_trunc
-    DateTrunc,
-    /// initcap
-    InitCap,
-    /// left
-    Left,
-    /// lpad
-    Lpad,
-    /// lower
-    Lower,
-    /// ltrim
-    Ltrim,
-    /// md5
-    MD5,
-    /// nullif
-    NullIf,
-    /// octet_length
-    OctetLength,
-    /// regexp_replace
-    RegexpReplace,
-    /// repeat
-    Repeat,
-    /// replace
-    Replace,
-    /// reverse
-    Reverse,
-    /// right
-    Right,
-    /// rpad
-    Rpad,
-    /// rtrim
-    Rtrim,
-    /// sha224
-    SHA224,
-    /// sha256
-    SHA256,
-    /// sha384
-    SHA384,
-    /// Sha512
-    SHA512,
-    /// split_part
-    SplitPart,
-    /// starts_with
-    StartsWith,
-    /// strpos
-    Strpos,
-    /// substr
-    Substr,
-    /// to_hex
-    ToHex,
-    /// to_timestamp
-    ToTimestamp,
-    /// translate
-    Translate,
-    /// trim
-    Trim,
-    /// upper
-    Upper,
-    /// regexp_match
-    RegexpMatch,
-}
-
-impl fmt::Display for BuiltinScalarFunction {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // lowercase of the debug.
-        write!(f, "{}", format!("{:?}", self).to_lowercase())
-    }
-}
-
-impl FromStr for BuiltinScalarFunction {
-    type Err = DataFusionError;
-    fn from_str(name: &str) -> Result<BuiltinScalarFunction> {
-        Ok(match name {
-            // math functions
-            "abs" => BuiltinScalarFunction::Abs,
-            "acos" => BuiltinScalarFunction::Acos,
-            "asin" => BuiltinScalarFunction::Asin,
-            "atan" => BuiltinScalarFunction::Atan,
-            "ceil" => BuiltinScalarFunction::Ceil,
-            "cos" => BuiltinScalarFunction::Cos,
-            "exp" => BuiltinScalarFunction::Exp,
-            "floor" => BuiltinScalarFunction::Floor,
-            "log" => BuiltinScalarFunction::Log,
-            "log10" => BuiltinScalarFunction::Log10,
-            "log2" => BuiltinScalarFunction::Log2,
-            "round" => BuiltinScalarFunction::Round,
-            "signum" => BuiltinScalarFunction::Signum,
-            "sin" => BuiltinScalarFunction::Sin,
-            "sqrt" => BuiltinScalarFunction::Sqrt,
-            "tan" => BuiltinScalarFunction::Tan,
-            "trunc" => BuiltinScalarFunction::Trunc,
-
-            // string functions
-            "array" => BuiltinScalarFunction::Array,
-            "ascii" => BuiltinScalarFunction::Ascii,
-            "bit_length" => BuiltinScalarFunction::BitLength,
-            "btrim" => BuiltinScalarFunction::Btrim,
-            "char_length" => BuiltinScalarFunction::CharacterLength,
-            "character_length" => BuiltinScalarFunction::CharacterLength,
-            "concat" => BuiltinScalarFunction::Concat,
-            "concat_ws" => BuiltinScalarFunction::ConcatWithSeparator,
-            "chr" => BuiltinScalarFunction::Chr,
-            "date_part" => BuiltinScalarFunction::DatePart,
-            "date_trunc" => BuiltinScalarFunction::DateTrunc,
-            "initcap" => BuiltinScalarFunction::InitCap,
-            "left" => BuiltinScalarFunction::Left,
-            "length" => BuiltinScalarFunction::CharacterLength,
-            "lower" => BuiltinScalarFunction::Lower,
-            "lpad" => BuiltinScalarFunction::Lpad,
-            "ltrim" => BuiltinScalarFunction::Ltrim,
-            "md5" => BuiltinScalarFunction::MD5,
-            "nullif" => BuiltinScalarFunction::NullIf,
-            "octet_length" => BuiltinScalarFunction::OctetLength,
-            "regexp_replace" => BuiltinScalarFunction::RegexpReplace,
-            "repeat" => BuiltinScalarFunction::Repeat,
-            "replace" => BuiltinScalarFunction::Replace,
-            "reverse" => BuiltinScalarFunction::Reverse,
-            "right" => BuiltinScalarFunction::Right,
-            "rpad" => BuiltinScalarFunction::Rpad,
-            "rtrim" => BuiltinScalarFunction::Rtrim,
-            "sha224" => BuiltinScalarFunction::SHA224,
-            "sha256" => BuiltinScalarFunction::SHA256,
-            "sha384" => BuiltinScalarFunction::SHA384,
-            "sha512" => BuiltinScalarFunction::SHA512,
-            "split_part" => BuiltinScalarFunction::SplitPart,
-            "starts_with" => BuiltinScalarFunction::StartsWith,
-            "strpos" => BuiltinScalarFunction::Strpos,
-            "substr" => BuiltinScalarFunction::Substr,
-            "to_hex" => BuiltinScalarFunction::ToHex,
-            "to_timestamp" => BuiltinScalarFunction::ToTimestamp,
-            "translate" => BuiltinScalarFunction::Translate,
-            "trim" => BuiltinScalarFunction::Trim,
-            "upper" => BuiltinScalarFunction::Upper,
-            "regexp_match" => BuiltinScalarFunction::RegexpMatch,
-            _ => {
-                return Err(DataFusionError::Plan(format!(
-                    "There is no built-in function named {}",
-                    name
-                )))
-            }
-        })
-    }
-}
-
-/// Returns the datatype of the scalar function
-pub fn return_type(
-    fun: &BuiltinScalarFunction,
-    arg_types: &[DataType],
-) -> Result<DataType> {
-    // Note that this function *must* return the same type that the respective physical expression returns
-    // or the execution panics.
-
-    // verify that this is a valid set of data types for this function
-    data_types(&arg_types, &signature(fun))?;
-
-    if arg_types.is_empty() {
-        // functions currently cannot be evaluated without arguments, as they can't
-        // know the number of rows to return.
-        return Err(DataFusionError::Plan(format!(
-            "Function '{}' requires at least one argument",
-            fun
-        )));
-    }
-
-    // the return type of the built in function.
-    // Some built-in functions' return type depends on the incoming type.
-    match fun {
-        BuiltinScalarFunction::Array => Ok(DataType::FixedSizeList(
-            Box::new(Field::new("item", arg_types[0].clone(), true)),
-            arg_types.len() as i32,
-        )),
-        BuiltinScalarFunction::Ascii => Ok(DataType::Int32),
-        BuiltinScalarFunction::BitLength => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The bit_length function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Btrim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The btrim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::CharacterLength => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The character_length function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Chr => Ok(DataType::Utf8),
-        BuiltinScalarFunction::Concat => Ok(DataType::Utf8),
-        BuiltinScalarFunction::ConcatWithSeparator => Ok(DataType::Utf8),
-        BuiltinScalarFunction::DatePart => Ok(DataType::Int32),
-        BuiltinScalarFunction::DateTrunc => {
-            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
-        }
-        BuiltinScalarFunction::InitCap => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The initcap function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Left => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The left function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Lower => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The upper function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Lpad => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The lpad function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Ltrim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The ltrim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::MD5 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The md5 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::NullIf => {
-            // NULLIF has two args and they might get coerced, get a preview of this
-            let coerced_types = data_types(arg_types, &signature(fun));
-            coerced_types.map(|typs| typs[0].clone())
-        }
-        BuiltinScalarFunction::OctetLength => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The octet_length function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::RegexpReplace => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The regexp_replace function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Repeat => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The repeat function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Replace => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The replace function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Reverse => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The reverse function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Right => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The right function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Rpad => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The rpad function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Rtrim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The rtrim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA224 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha224 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA256 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha256 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA384 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha384 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SHA512 => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Binary,
-            DataType::Utf8 => DataType::Binary,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The sha512 function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::SplitPart => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The split_part function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::StartsWith => Ok(DataType::Boolean),
-        BuiltinScalarFunction::Strpos => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::Int64,
-            DataType::Utf8 => DataType::Int32,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The strpos function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Substr => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The substr function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::ToHex => Ok(match arg_types[0] {
-            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
-                DataType::Utf8
-            }
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The to_hex function can only accept integers.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::ToTimestamp => {
-            Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
-        }
-        BuiltinScalarFunction::Translate => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The translate function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Trim => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The trim function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::Upper => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => DataType::LargeUtf8,
-            DataType::Utf8 => DataType::Utf8,
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The upper function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-        BuiltinScalarFunction::RegexpMatch => Ok(match arg_types[0] {
-            DataType::LargeUtf8 => {
-                DataType::List(Box::new(Field::new("item", DataType::LargeUtf8, true)))
-            }
-            DataType::Utf8 => {
-                DataType::List(Box::new(Field::new("item", DataType::Utf8, true)))
-            }
-            _ => {
-                // this error is internal as `data_types` should have captured this.
-                return Err(DataFusionError::Internal(
-                    "The regexp_extract function can only accept strings.".to_string(),
-                ));
-            }
-        }),
-
-        BuiltinScalarFunction::Abs
-        | BuiltinScalarFunction::Acos
-        | BuiltinScalarFunction::Asin
-        | BuiltinScalarFunction::Atan
-        | BuiltinScalarFunction::Ceil
-        | BuiltinScalarFunction::Cos
-        | BuiltinScalarFunction::Exp
-        | BuiltinScalarFunction::Floor
-        | BuiltinScalarFunction::Log
-        | BuiltinScalarFunction::Log10
-        | BuiltinScalarFunction::Log2
-        | BuiltinScalarFunction::Round
-        | BuiltinScalarFunction::Signum
-        | BuiltinScalarFunction::Sin
-        | BuiltinScalarFunction::Sqrt
-        | BuiltinScalarFunction::Tan
-        | BuiltinScalarFunction::Trunc => Ok(DataType::Float64),
-    }
-}
-
-#[cfg(feature = "crypto_expressions")]
-macro_rules! invoke_if_crypto_expressions_feature_flag {
-    ($FUNC:ident, $NAME:expr) => {{
-        use crate::physical_plan::crypto_expressions;
-        crypto_expressions::$FUNC
-    }};
-}
-
-#[cfg(not(feature = "crypto_expressions"))]
-macro_rules! invoke_if_crypto_expressions_feature_flag {
-    ($FUNC:ident, $NAME:expr) => {
-        |_: &[ColumnarValue]| -> Result<ColumnarValue> {
-            Err(DataFusionError::Internal(format!(
-                "function {} requires compilation with feature flag: crypto_expressions.",
-                $NAME
-            )))
-        }
-    };
-}
-
-#[cfg(feature = "regex_expressions")]
-macro_rules! invoke_if_regex_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {{
-        use crate::physical_plan::regex_expressions;
-        regex_expressions::$FUNC::<$T>
-    }};
-}
-
-#[cfg(not(feature = "regex_expressions"))]
-macro_rules! invoke_if_regex_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {
-        |_: &[ArrayRef]| -> Result<ArrayRef> {
-            Err(DataFusionError::Internal(format!(
-                "function {} requires compilation with feature flag: regex_expressions.",
-                $NAME
-            )))
-        }
-    };
-}
-
-#[cfg(feature = "unicode_expressions")]
-macro_rules! invoke_if_unicode_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {{
-        use crate::physical_plan::unicode_expressions;
-        unicode_expressions::$FUNC::<$T>
-    }};
-}
-
-#[cfg(not(feature = "unicode_expressions"))]
-macro_rules! invoke_if_unicode_expressions_feature_flag {
-    ($FUNC:ident, $T:tt, $NAME:expr) => {
-        |_: &[ArrayRef]| -> Result<ArrayRef> {
-            Err(DataFusionError::Internal(format!(
-                "function {} requires compilation with feature flag: unicode_expressions.",
-                $NAME
-            )))
-        }
-    };
-}
-
-/// Create a physical (function) expression.
-/// This function errors when `args`' can't be coerced to a valid argument type of the function.
-pub fn create_physical_expr(
-    fun: &BuiltinScalarFunction,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let fun_expr: ScalarFunctionImplementation = Arc::new(match fun {
-        // math functions
-        BuiltinScalarFunction::Abs => math_expressions::abs,
-        BuiltinScalarFunction::Acos => math_expressions::acos,
-        BuiltinScalarFunction::Asin => math_expressions::asin,
-        BuiltinScalarFunction::Atan => math_expressions::atan,
-        BuiltinScalarFunction::Ceil => math_expressions::ceil,
-        BuiltinScalarFunction::Cos => math_expressions::cos,
-        BuiltinScalarFunction::Exp => math_expressions::exp,
-        BuiltinScalarFunction::Floor => math_expressions::floor,
-        BuiltinScalarFunction::Log => math_expressions::ln,
-        BuiltinScalarFunction::Log10 => math_expressions::log10,
-        BuiltinScalarFunction::Log2 => math_expressions::log2,
-        BuiltinScalarFunction::Round => math_expressions::round,
-        BuiltinScalarFunction::Signum => math_expressions::signum,
-        BuiltinScalarFunction::Sin => math_expressions::sin,
-        BuiltinScalarFunction::Sqrt => math_expressions::sqrt,
-        BuiltinScalarFunction::Tan => math_expressions::tan,
-        BuiltinScalarFunction::Trunc => math_expressions::trunc,
-
-        // string functions
-        BuiltinScalarFunction::Array => array_expressions::array,
-        BuiltinScalarFunction::Ascii => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::ascii::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::ascii::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function ascii",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::BitLength => |args| match &args[0] {
-            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
-            ColumnarValue::Scalar(v) => match v {
-                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
-                    v.as_ref().map(|x| (x.len() * 8) as i32),
-                ))),
-                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as i64)),
-                )),
-                _ => unreachable!(),
-            },
-        },
-        BuiltinScalarFunction::Btrim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::btrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::btrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function btrim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::CharacterLength => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    character_length,
-                    Int32Type,
-                    "character_length"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    character_length,
-                    Int64Type,
-                    "character_length"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function character_length",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Chr => {
-            |args| make_scalar_function(string_expressions::chr)(args)
-        }
-        BuiltinScalarFunction::Concat => string_expressions::concat,
-        BuiltinScalarFunction::ConcatWithSeparator => {
-            |args| make_scalar_function(string_expressions::concat_ws)(args)
-        }
-        BuiltinScalarFunction::DatePart => datetime_expressions::date_part,
-        BuiltinScalarFunction::DateTrunc => datetime_expressions::date_trunc,
-        BuiltinScalarFunction::InitCap => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::initcap::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::initcap::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function initcap",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Left => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(left, i32, "left");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(left, i64, "left");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function left",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Lower => string_expressions::lower,
-        BuiltinScalarFunction::Lpad => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(lpad, i32, "lpad");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(lpad, i64, "lpad");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function lpad",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Ltrim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::ltrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::ltrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function ltrim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::MD5 => {
-            invoke_if_crypto_expressions_feature_flag!(md5, "md5")
-        }
-        BuiltinScalarFunction::NullIf => nullif_func,
-        BuiltinScalarFunction::OctetLength => |args| match &args[0] {
-            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
-            ColumnarValue::Scalar(v) => match v {
-                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
-                    v.as_ref().map(|x| x.len() as i32),
-                ))),
-                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
-                )),
-                _ => unreachable!(),
-            },
-        },
-        BuiltinScalarFunction::RegexpMatch => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_match,
-                    i32,
-                    "regexp_match"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_match,
-                    i64,
-                    "regexp_match"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function regexp_match",
-                other
-            ))),
-        },
-        BuiltinScalarFunction::RegexpReplace => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_replace,
-                    i32,
-                    "regexp_replace"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_regex_expressions_feature_flag!(
-                    regexp_replace,
-                    i64,
-                    "regexp_replace"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function regexp_replace",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Repeat => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::repeat::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::repeat::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function repeat",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Replace => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::replace::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::replace::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function replace",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Reverse => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(reverse, i32, "reverse");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(reverse, i64, "reverse");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function reverse",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Right => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(right, i32, "right");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(right, i64, "right");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function right",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Rpad => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(rpad, i32, "rpad");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(rpad, i64, "rpad");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function rpad",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Rtrim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::rtrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::rtrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function rtrim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::SHA224 => {
-            invoke_if_crypto_expressions_feature_flag!(sha224, "sha224")
-        }
-        BuiltinScalarFunction::SHA256 => {
-            invoke_if_crypto_expressions_feature_flag!(sha256, "sha256")
-        }
-        BuiltinScalarFunction::SHA384 => {
-            invoke_if_crypto_expressions_feature_flag!(sha384, "sha384")
-        }
-        BuiltinScalarFunction::SHA512 => {
-            invoke_if_crypto_expressions_feature_flag!(sha512, "sha512")
-        }
-        BuiltinScalarFunction::SplitPart => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::split_part::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::split_part::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function split_part",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::StartsWith => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::starts_with::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::starts_with::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function starts_with",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Strpos => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    strpos, Int32Type, "strpos"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    strpos, Int64Type, "strpos"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function strpos",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Substr => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(substr, i32, "substr");
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func =
-                    invoke_if_unicode_expressions_feature_flag!(substr, i64, "substr");
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function substr",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::ToHex => |args| match args[0].data_type() {
-            DataType::Int32 => {
-                make_scalar_function(string_expressions::to_hex::<Int32Type>)(args)
-            }
-            DataType::Int64 => {
-                make_scalar_function(string_expressions::to_hex::<Int64Type>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function to_hex",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::ToTimestamp => datetime_expressions::to_timestamp,
-        BuiltinScalarFunction::Translate => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    translate,
-                    i32,
-                    "translate"
-                );
-                make_scalar_function(func)(args)
-            }
-            DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    translate,
-                    i64,
-                    "translate"
-                );
-                make_scalar_function(func)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function translate",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Trim => |args| match args[0].data_type() {
-            DataType::Utf8 => {
-                make_scalar_function(string_expressions::btrim::<i32>)(args)
-            }
-            DataType::LargeUtf8 => {
-                make_scalar_function(string_expressions::btrim::<i64>)(args)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function trim",
-                other,
-            ))),
-        },
-        BuiltinScalarFunction::Upper => string_expressions::upper,
-    });
-    // coerce
-    let args = coerce(args, input_schema, &signature(fun))?;
-
-    let arg_types = args
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    Ok(Arc::new(ScalarFunctionExpr::new(
-        &format!("{}", fun),
-        fun_expr,
-        args,
-        &return_type(&fun, &arg_types)?,
-    )))
-}
-
-/// the signatures supported by the function `fun`.
-fn signature(fun: &BuiltinScalarFunction) -> Signature {
-    // note: the physical expression must accept the type returned by this function or the execution panics.
-
-    // for now, the list is small, as we do not have many built-in functions.
-    match fun {
-        BuiltinScalarFunction::Array => {
-            Signature::Variadic(array_expressions::SUPPORTED_ARRAY_TYPES.to_vec())
-        }
-        BuiltinScalarFunction::Concat | BuiltinScalarFunction::ConcatWithSeparator => {
-            Signature::Variadic(vec![DataType::Utf8])
-        }
-        BuiltinScalarFunction::Ascii
-        | BuiltinScalarFunction::BitLength
-        | BuiltinScalarFunction::CharacterLength
-        | BuiltinScalarFunction::InitCap
-        | BuiltinScalarFunction::Lower
-        | BuiltinScalarFunction::MD5
-        | BuiltinScalarFunction::OctetLength
-        | BuiltinScalarFunction::Reverse
-        | BuiltinScalarFunction::SHA224
-        | BuiltinScalarFunction::SHA256
-        | BuiltinScalarFunction::SHA384
-        | BuiltinScalarFunction::SHA512
-        | BuiltinScalarFunction::Trim
-        | BuiltinScalarFunction::Upper => {
-            Signature::Uniform(1, vec![DataType::Utf8, DataType::LargeUtf8])
-        }
-        BuiltinScalarFunction::Btrim
-        | BuiltinScalarFunction::Ltrim
-        | BuiltinScalarFunction::Rtrim => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8]),
-        ]),
-        BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
-            Signature::Uniform(1, vec![DataType::Int64])
-        }
-        BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
-            Signature::OneOf(vec![
-                Signature::Exact(vec![DataType::Utf8, DataType::Int64]),
-                Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64]),
-                Signature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Utf8]),
-                Signature::Exact(vec![
-                    DataType::LargeUtf8,
-                    DataType::Int64,
-                    DataType::Utf8,
-                ]),
-                Signature::Exact(vec![
-                    DataType::Utf8,
-                    DataType::Int64,
-                    DataType::LargeUtf8,
-                ]),
-                Signature::Exact(vec![
-                    DataType::LargeUtf8,
-                    DataType::Int64,
-                    DataType::LargeUtf8,
-                ]),
-            ])
-        }
-        BuiltinScalarFunction::Left
-        | BuiltinScalarFunction::Repeat
-        | BuiltinScalarFunction::Right => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64]),
-        ]),
-        BuiltinScalarFunction::ToTimestamp => Signature::Uniform(1, vec![DataType::Utf8]),
-        BuiltinScalarFunction::DateTrunc => Signature::Exact(vec![
-            DataType::Utf8,
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-        ]),
-        BuiltinScalarFunction::DatePart => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Date32]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Date64]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Second, None),
-            ]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-            ]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-            ]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-            ]),
-        ]),
-        BuiltinScalarFunction::SplitPart => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::Utf8, DataType::LargeUtf8, DataType::Int64]),
-            Signature::Exact(vec![
-                DataType::LargeUtf8,
-                DataType::LargeUtf8,
-                DataType::Int64,
-            ]),
-        ]),
-
-        BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith => {
-            Signature::OneOf(vec![
-                Signature::Exact(vec![DataType::Utf8, DataType::Utf8]),
-                Signature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]),
-                Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
-                Signature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
-            ])
-        }
-
-        BuiltinScalarFunction::Substr => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Int64]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Int64, DataType::Int64]),
-        ]),
-
-        BuiltinScalarFunction::Replace | BuiltinScalarFunction::Translate => {
-            Signature::OneOf(vec![Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Utf8,
-                DataType::Utf8,
-            ])])
-        }
-        BuiltinScalarFunction::RegexpReplace => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8, DataType::Utf8]),
-            Signature::Exact(vec![
-                DataType::Utf8,
-                DataType::Utf8,
-                DataType::Utf8,
-                DataType::Utf8,
-            ]),
-        ]),
-
-        BuiltinScalarFunction::NullIf => {
-            Signature::Uniform(2, SUPPORTED_NULLIF_TYPES.to_vec())
-        }
-        BuiltinScalarFunction::RegexpMatch => Signature::OneOf(vec![
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
-            Signature::Exact(vec![DataType::Utf8, DataType::Utf8, DataType::Utf8]),
-            Signature::Exact(vec![DataType::LargeUtf8, DataType::Utf8, DataType::Utf8]),
-        ]),
-        // math expressions expect 1 argument of type f64 or f32
-        // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
-        // return the best approximation for it (in f64).
-        // We accept f32 because in this case it is clear that the best approximation
-        // will be as good as the number of digits in the number
-        _ => Signature::Uniform(1, vec![DataType::Float64, DataType::Float32]),
-    }
-}
-
-/// Physical expression of a scalar function
-pub struct ScalarFunctionExpr {
-    fun: ScalarFunctionImplementation,
-    name: String,
-    args: Vec<Arc<dyn PhysicalExpr>>,
-    return_type: DataType,
-}
-
-impl Debug for ScalarFunctionExpr {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ScalarFunctionExpr")
-            .field("fun", &"<FUNC>")
-            .field("name", &self.name)
-            .field("args", &self.args)
-            .field("return_type", &self.return_type)
-            .finish()
-    }
-}
-
-impl ScalarFunctionExpr {
-    /// Create a new Scalar function
-    pub fn new(
-        name: &str,
-        fun: ScalarFunctionImplementation,
-        args: Vec<Arc<dyn PhysicalExpr>>,
-        return_type: &DataType,
-    ) -> Self {
-        Self {
-            fun,
-            name: name.to_owned(),
-            args,
-            return_type: return_type.clone(),
-        }
-    }
-
-    /// Get the scalar function implementation
-    pub fn fun(&self) -> &ScalarFunctionImplementation {
-        &self.fun
-    }
-
-    /// The name for this expression
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-
-    /// Input arguments
-    pub fn args(&self) -> &[Arc<dyn PhysicalExpr>] {
-        &self.args
-    }
-
-    /// Data type produced by this expression
-    pub fn return_type(&self) -> &DataType {
-        &self.return_type
-    }
-}
-
-impl fmt::Display for ScalarFunctionExpr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "{}({})",
-            self.name,
-            self.args
-                .iter()
-                .map(|e| format!("{}", e))
-                .collect::<Vec<String>>()
-                .join(", ")
-        )
-    }
-}
-
-impl PhysicalExpr for ScalarFunctionExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
-        Ok(self.return_type.clone())
-    }
-
-    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
-        Ok(true)
-    }
-
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        // evaluate the arguments
-        let inputs = self
-            .args
-            .iter()
-            .map(|e| e.evaluate(batch))
-            .collect::<Result<Vec<_>>>()?;
-
-        // evaluate the function
-        let fun = self.fun.as_ref();
-        (fun)(&inputs)
-    }
-}
-
-/// decorates a function to handle [`ScalarValue`]s by coverting them to arrays before calling the function
-/// and vice-versa after evaluation.
-pub fn make_scalar_function<F>(inner: F) -> ScalarFunctionImplementation
-where
-    F: Fn(&[ArrayRef]) -> Result<ArrayRef> + Sync + Send + 'static,
-{
-    Arc::new(move |args: &[ColumnarValue]| {
-        // first, identify if any of the arguments is an Array. If yes, store its `len`,
-        // as any scalar will need to be converted to an array of len `len`.
-        let len = args
-            .iter()
-            .fold(Option::<usize>::None, |acc, arg| match arg {
-                ColumnarValue::Scalar(_) => acc,
-                ColumnarValue::Array(a) => Some(a.len()),
-            });
-
-        // to array
-        let args = if let Some(len) = len {
-            args.iter()
-                .map(|arg| arg.clone().into_array(len))
-                .collect::<Vec<ArrayRef>>()
-        } else {
-            args.iter()
-                .map(|arg| arg.clone().into_array(1))
-                .collect::<Vec<ArrayRef>>()
-        };
-
-        let result = (inner)(&args);
-
-        // maybe back to scalar
-        if len.is_some() {
-            result.map(ColumnarValue::Array)
-        } else {
-            ScalarValue::try_from_array(&result?, 0).map(ColumnarValue::Scalar)
-        }
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        error::Result,
-        physical_plan::expressions::{col, lit},
-        scalar::ScalarValue,
-    };
-    use arrow::{
-        array::{
-            Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeListArray, Float64Array,
-            Int32Array, ListArray, StringArray, UInt32Array, UInt64Array,
-        },
-        datatypes::Field,
-        record_batch::RecordBatch,
-    };
-
-    /// $FUNC function to test
-    /// $ARGS arguments (vec) to pass to function
-    /// $EXPECTED a Result<Option<$EXPECTED_TYPE>> where Result allows testing errors and Option allows testing Null
-    /// $EXPECTED_TYPE is the expected value type
-    /// $DATA_TYPE is the function to test result type
-    /// $ARRAY_TYPE is the column type after function applied
-    macro_rules! test_function {
-        ($FUNC:ident, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $DATA_TYPE: ident, $ARRAY_TYPE:ident) => {
-            // used to provide type annotation
-            let expected: Result<Option<$EXPECTED_TYPE>> = $EXPECTED;
-
-            // any type works here: we evaluate against a literal of `value`
-            let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-            let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
-
-            let expr =
-                create_physical_expr(&BuiltinScalarFunction::$FUNC, $ARGS, &schema)?;
-
-            // type is correct
-            assert_eq!(expr.data_type(&schema)?, DataType::$DATA_TYPE);
-
-            let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-
-            match expected {
-                Ok(expected) => {
-                    let result = expr.evaluate(&batch)?;
-                    let result = result.into_array(batch.num_rows());
-                    let result = result.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
-
-                    // value is correct
-                    match expected {
-                        Some(v) => assert_eq!(result.value(0), v),
-                        None => assert!(result.is_null(0)),
-                    };
-                }
-                Err(expected_error) => {
-                    // evaluate is expected error - cannot use .expect_err() due to Debug not being implemented
-                    match expr.evaluate(&batch) {
-                        Ok(_) => assert!(false, "expected error"),
-                        Err(error) => {
-                            assert_eq!(error.to_string(), expected_error.to_string());
-                        }
-                    }
-                }
-            };
-        };
-    }
-
-    #[test]
-    fn test_functions() -> Result<()> {
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("x".to_string())))],
-            Ok(Some(120)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("ésoj".to_string())))],
-            Ok(Some(233)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("💯".to_string())))],
-            Ok(Some(128175)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("💯a".to_string())))],
-            Ok(Some(128175)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Ascii,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            BitLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(40)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            BitLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(40)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            BitLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[lit(ScalarValue::Utf8(Some("\n trim \n".to_string())))],
-            Ok(Some("\n trim \n")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(Some("xyxtrimyyx".to_string()))),
-                lit(ScalarValue::Utf8(Some("xyz".to_string()))),
-            ],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(Some("\nxyxtrimyyx\n".to_string()))),
-                lit(ScalarValue::Utf8(Some("xyz\n".to_string()))),
-            ],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("xyz".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Btrim,
-            &[
-                lit(ScalarValue::Utf8(Some("xyxtrimyyx".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(4)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            CharacterLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Err(DataFusionError::Internal(
-                "function character_length requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(128175)))],
-            Ok(Some("💯")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(120)))],
-            Ok(Some("x")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(128175)))],
-            Ok(Some("💯")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(0)))],
-            Err(DataFusionError::Execution(
-                "null character not permitted.".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Chr,
-            &[lit(ScalarValue::Int64(Some(i64::MAX)))],
-            Err(DataFusionError::Execution(
-                "requested character too large for encoding.".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Concat,
-            &[
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(Some("bb".to_string()))),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aabbcc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Concat,
-            &[
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aacc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Concat,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(Some("|".to_string()))),
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(Some("bb".to_string()))),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aa|bb|cc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(Some("|".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(Some("bb".to_string()))),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            ConcatWithSeparator,
-            &[
-                lit(ScalarValue::Utf8(Some("|".to_string()))),
-                lit(ScalarValue::Utf8(Some("aa".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("cc".to_string()))),
-            ],
-            Ok(Some("aa|cc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::Int32(Some(1)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::UInt32(Some(1)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::UInt64(Some(1)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::Float64(Some(1.0)))],
-            Ok(Some((1.0_f64).exp())),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            Exp,
-            &[lit(ScalarValue::Float32(Some(1.0)))],
-            Ok(Some((1.0_f32).exp() as f64)),
-            f64,
-            Float64,
-            Float64Array
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(Some("hi THOMAS".to_string())))],
-            Ok(Some("Hi Thomas")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            InitCap,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Ok(Some("ab")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(200))),
-            ],
-            Ok(Some("abcde")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-2))),
-            ],
-            Ok(Some("abc")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-200))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("joséé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
-            ],
-            Ok(Some("joséé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Left,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Err(DataFusionError::Internal(
-                "function left requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some(" josé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("xyxhi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(21))),
-                lit(ScalarValue::Utf8(Some("abcdef".to_string()))),
-            ],
-            Ok(Some("abcdefabcdefabcdefahi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some(" ".to_string()))),
-            ],
-            Ok(Some("   hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("".to_string()))),
-            ],
-            Ok(Some("hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("xyxyxyjosé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("éñ".to_string()))),
-            ],
-            Ok(Some("éñéñéñjosé")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Lpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Err(DataFusionError::Internal(
-                "function lpad requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))],
-            Ok(Some("\n trim ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some("34b7da764b21d298ef307d04d8152dc5")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some("d41d8cd98f00b204e9800998ecf8427e")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            MD5,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function md5 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("Thomas".to_string()))),
-                lit(ScalarValue::Utf8(Some(".[mN]a.".to_string()))),
-                lit(ScalarValue::Utf8(Some("M".to_string()))),
-            ],
-            Ok(Some("ThM")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b..".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-            ],
-            Ok(Some("fooXbaz")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b..".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(Some("fooXX")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(Some("fooXarYXazY")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("g".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b(..)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X\\1Y".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("ABCabcABC".to_string()))),
-                lit(ScalarValue::Utf8(Some("(abc)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-                lit(ScalarValue::Utf8(Some("gi".to_string()))),
-            ],
-            Ok(Some("XXX")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "regex_expressions")]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("ABCabcABC".to_string()))),
-                lit(ScalarValue::Utf8(Some("(abc)".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-                lit(ScalarValue::Utf8(Some("i".to_string()))),
-            ],
-            Ok(Some("XabcABC")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "regex_expressions"))]
-        test_function!(
-            RegexpReplace,
-            &[
-                lit(ScalarValue::Utf8(Some("foobarbaz".to_string()))),
-                lit(ScalarValue::Utf8(Some("b..".to_string()))),
-                lit(ScalarValue::Utf8(Some("X".to_string()))),
-            ],
-            Err(DataFusionError::Internal(
-                "function regexp_replace requires compilation with feature flag: regex_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(Some("Pg".to_string()))),
-                lit(ScalarValue::Int64(Some(4))),
-            ],
-            Ok(Some("PgPgPgPg")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(4))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Repeat,
-            &[
-                lit(ScalarValue::Utf8(Some("Pg".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("abcde".to_string())))],
-            Ok(Some("edcba")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("loẅks".to_string())))],
-            Ok(Some("skẅol")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("loẅks".to_string())))],
-            Ok(Some("skẅol")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Reverse,
-            &[lit(ScalarValue::Utf8(Some("abcde".to_string())))],
-            Err(DataFusionError::Internal(
-                "function reverse requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Ok(Some("de")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(200))),
-            ],
-            Ok(Some("abcde")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-2))),
-            ],
-            Ok(Some("cde")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(-200))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("éésoj")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
-            ],
-            Ok(Some("éésoj")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Right,
-            &[
-                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
-                lit(ScalarValue::Int8(Some(2))),
-            ],
-            Err(DataFusionError::Internal(
-                "function right requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("josé ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("hi   ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("hixyx")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(21))),
-                lit(ScalarValue::Utf8(Some("abcdef".to_string()))),
-            ],
-            Ok(Some("hiabcdefabcdefabcdefa")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some(" ".to_string()))),
-            ],
-            Ok(Some("hi   ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("".to_string()))),
-            ],
-            Ok(Some("hi")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("hi".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("xy".to_string()))),
-            ],
-            Ok(Some("joséxyxyxy")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(10))),
-                lit(ScalarValue::Utf8(Some("éñ".to_string()))),
-            ],
-            Ok(Some("josééñéñéñ")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Rpad,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Err(DataFusionError::Internal(
-                "function rpad requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some(" trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim \n".to_string())))],
-            Ok(Some(" trim \n")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some(" trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some("trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                11u8, 246u8, 203u8, 98u8, 100u8, 156u8, 66u8, 169u8, 174u8, 56u8, 118u8,
-                171u8, 111u8, 109u8, 146u8, 173u8, 54u8, 203u8, 84u8, 20u8, 228u8, 149u8,
-                248u8, 135u8, 50u8, 146u8, 190u8, 77u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                209u8, 74u8, 2u8, 140u8, 42u8, 58u8, 43u8, 201u8, 71u8, 97u8, 2u8, 187u8,
-                40u8, 130u8, 52u8, 196u8, 21u8, 162u8, 176u8, 31u8, 130u8, 142u8, 166u8,
-                42u8, 197u8, 179u8, 228u8, 47u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA224,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha224 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                225u8, 96u8, 143u8, 117u8, 197u8, 215u8, 129u8, 63u8, 61u8, 64u8, 49u8,
-                203u8, 48u8, 191u8, 183u8, 134u8, 80u8, 125u8, 152u8, 19u8, 117u8, 56u8,
-                255u8, 142u8, 18u8, 138u8, 111u8, 247u8, 78u8, 132u8, 230u8, 67u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                227u8, 176u8, 196u8, 66u8, 152u8, 252u8, 28u8, 20u8, 154u8, 251u8, 244u8,
-                200u8, 153u8, 111u8, 185u8, 36u8, 39u8, 174u8, 65u8, 228u8, 100u8, 155u8,
-                147u8, 76u8, 164u8, 149u8, 153u8, 27u8, 120u8, 82u8, 184u8, 85u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA256,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha256 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                9u8, 111u8, 91u8, 104u8, 170u8, 119u8, 132u8, 142u8, 79u8, 223u8, 92u8,
-                28u8, 11u8, 53u8, 13u8, 226u8, 219u8, 250u8, 214u8, 15u8, 253u8, 124u8,
-                37u8, 217u8, 234u8, 7u8, 198u8, 193u8, 155u8, 138u8, 77u8, 85u8, 169u8,
-                24u8, 126u8, 177u8, 23u8, 197u8, 87u8, 136u8, 63u8, 88u8, 193u8, 109u8,
-                250u8, 195u8, 227u8, 67u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                56u8, 176u8, 96u8, 167u8, 81u8, 172u8, 150u8, 56u8, 76u8, 217u8, 50u8,
-                126u8, 177u8, 177u8, 227u8, 106u8, 33u8, 253u8, 183u8, 17u8, 20u8, 190u8,
-                7u8, 67u8, 76u8, 12u8, 199u8, 191u8, 99u8, 246u8, 225u8, 218u8, 39u8,
-                78u8, 222u8, 191u8, 231u8, 111u8, 101u8, 251u8, 213u8, 26u8, 210u8,
-                241u8, 72u8, 152u8, 185u8, 91u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA384,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha384 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Ok(Some(&[
-                110u8, 27u8, 155u8, 63u8, 232u8, 64u8, 104u8, 14u8, 55u8, 5u8, 31u8,
-                122u8, 213u8, 233u8, 89u8, 214u8, 243u8, 154u8, 208u8, 248u8, 136u8,
-                93u8, 133u8, 81u8, 102u8, 245u8, 92u8, 101u8, 148u8, 105u8, 211u8, 200u8,
-                183u8, 129u8, 24u8, 196u8, 74u8, 42u8, 73u8, 199u8, 45u8, 219u8, 72u8,
-                28u8, 214u8, 216u8, 115u8, 16u8, 52u8, 225u8, 28u8, 192u8, 48u8, 7u8,
-                11u8, 168u8, 67u8, 169u8, 11u8, 52u8, 149u8, 203u8, 141u8, 62u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(&[
-                207u8, 131u8, 225u8, 53u8, 126u8, 239u8, 184u8, 189u8, 241u8, 84u8, 40u8,
-                80u8, 214u8, 109u8, 128u8, 7u8, 214u8, 32u8, 228u8, 5u8, 11u8, 87u8,
-                21u8, 220u8, 131u8, 244u8, 169u8, 33u8, 211u8, 108u8, 233u8, 206u8, 71u8,
-                208u8, 209u8, 60u8, 93u8, 133u8, 242u8, 176u8, 255u8, 131u8, 24u8, 210u8,
-                135u8, 126u8, 236u8, 47u8, 99u8, 185u8, 49u8, 189u8, 71u8, 65u8, 122u8,
-                129u8, 165u8, 56u8, 50u8, 122u8, 249u8, 39u8, 218u8, 62u8
-            ])),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(feature = "crypto_expressions")]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        #[cfg(not(feature = "crypto_expressions"))]
-        test_function!(
-            SHA512,
-            &[lit(ScalarValue::Utf8(Some("tom".to_string())))],
-            Err(DataFusionError::Internal(
-                "function sha512 requires compilation with feature flag: crypto_expressions.".to_string()
-            )),
-            &[u8],
-            Binary,
-            BinaryArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit(ScalarValue::Utf8(Some("abc~@~def~@~ghi".to_string()))),
-                lit(ScalarValue::Utf8(Some("~@~".to_string()))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("def")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit(ScalarValue::Utf8(Some("abc~@~def~@~ghi".to_string()))),
-                lit(ScalarValue::Utf8(Some("~@~".to_string()))),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            SplitPart,
-            &[
-                lit(ScalarValue::Utf8(Some("abc~@~def~@~ghi".to_string()))),
-                lit(ScalarValue::Utf8(Some("~@~".to_string()))),
-                lit(ScalarValue::Int64(Some(-1))),
-            ],
-            Err(DataFusionError::Execution(
-                "field position must be greater than zero".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Utf8(Some("alph".to_string()))),
-            ],
-            Ok(Some(true)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Utf8(Some("blph".to_string()))),
-            ],
-            Ok(Some(false)),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("alph".to_string()))),
-            ],
-            Ok(None),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        test_function!(
-            StartsWith,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            bool,
-            Boolean,
-            BooleanArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("abc".to_string()))),
-                lit(ScalarValue::Utf8(Some("c".to_string()))),
-            ],
-            Ok(Some(3)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("josé".to_string()))),
-                lit(ScalarValue::Utf8(Some("é".to_string()))),
-            ],
-            Ok(Some(4)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(Some("so".to_string()))),
-            ],
-            Ok(Some(6)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(Some("abc".to_string()))),
-            ],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("abc".to_string()))),
-            ],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Strpos,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Err(DataFusionError::Internal(
-                "function strpos requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            i32,
-            Int32,
-            Int32Array
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Ok(Some("alphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-            ],
-            Ok(Some("ésoj")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(1))),
-            ],
-            Ok(Some("alphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("lphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-            ],
-            Ok(Some("phabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
-            ],
-            Ok(Some("alphabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(30))),
-            ],
-            Ok(Some("")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("ph")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(Some("phabet")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Int64(Some(20))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(1))),
-                lit(ScalarValue::Int64(Some(-1))),
-            ],
-            Err(DataFusionError::Execution(
-                "negative substring length not allowed".to_string(),
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Int64(Some(2))),
-            ],
-            Ok(Some("és")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Substr,
-            &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
-            ],
-            Err(DataFusionError::Internal(
-                "function substr requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Ok(Some("a2x5")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(None)),
-            ],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(feature = "unicode_expressions")]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("é2íñ5".to_string()))),
-                lit(ScalarValue::Utf8(Some("éñí".to_string()))),
-                lit(ScalarValue::Utf8(Some("óü".to_string()))),
-            ],
-            Ok(Some("ó2ü5")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        #[cfg(not(feature = "unicode_expressions"))]
-        test_function!(
-            Translate,
-            &[
-                lit(ScalarValue::Utf8(Some("12345".to_string()))),
-                lit(ScalarValue::Utf8(Some("143".to_string()))),
-                lit(ScalarValue::Utf8(Some("ax".to_string()))),
-            ],
-            Err(DataFusionError::Internal(
-                "function translate requires compilation with feature flag: unicode_expressions.".to_string()
-            )),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Trim,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(Some("upper".to_string())))],
-            Ok(Some("UPPER")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(Some("UPPER".to_string())))],
-            Ok(Some("UPPER")),
-            &str,
-            Utf8,
-            StringArray
-        );
-        test_function!(
-            Upper,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            &str,
-            Utf8,
-            StringArray
-        );
-        Ok(())
-    }
-
-    #[test]
-    fn test_concat_error() -> Result<()> {
-        let result = return_type(&BuiltinScalarFunction::Concat, &[]);
-        if result.is_ok() {
-            Err(DataFusionError::Plan(
-                "Function 'concat' cannot accept zero arguments".to_string(),
-            ))
-        } else {
-            Ok(())
-        }
-    }
-
-    fn generic_test_array(
-        value1: ArrayRef,
-        value2: ArrayRef,
-        expected_type: DataType,
-        expected: &str,
-    ) -> Result<()> {
-        // any type works here: we evaluate against a literal of `value`
-        let schema = Schema::new(vec![
-            Field::new("a", value1.data_type().clone(), false),
-            Field::new("b", value2.data_type().clone(), false),
-        ]);
-        let columns: Vec<ArrayRef> = vec![value1, value2];
-
-        let expr = create_physical_expr(
-            &BuiltinScalarFunction::Array,
-            &[col("a"), col("b")],
-            &schema,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            // type equals to a common coercion
-            DataType::FixedSizeList(Box::new(Field::new("item", expected_type, true)), 2)
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-
-        // downcast works
-        let result = result
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap();
-
-        // value is correct
-        assert_eq!(format!("{:?}", result.value(0)), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_array() -> Result<()> {
-        generic_test_array(
-            Arc::new(StringArray::from(vec!["aa"])),
-            Arc::new(StringArray::from(vec!["bb"])),
-            DataType::Utf8,
-            "StringArray\n[\n  \"aa\",\n  \"bb\",\n]",
-        )?;
-
-        // different types, to validate that casting happens
-        generic_test_array(
-            Arc::new(UInt32Array::from(vec![1u32])),
-            Arc::new(UInt64Array::from(vec![1u64])),
-            DataType::UInt64,
-            "PrimitiveArray<UInt64>\n[\n  1,\n  1,\n]",
-        )?;
-
-        // different types (another order), to validate that casting happens
-        generic_test_array(
-            Arc::new(UInt64Array::from(vec![1u64])),
-            Arc::new(UInt32Array::from(vec![1u32])),
-            DataType::UInt64,
-            "PrimitiveArray<UInt64>\n[\n  1,\n  1,\n]",
-        )
-    }
-
-    #[test]
-    #[cfg(feature = "regex_expressions")]
-    fn test_regexp_match() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
-
-        // concat(value, value)
-        let col_value: ArrayRef = Arc::new(StringArray::from(vec!["aaa-555"]));
-        let pattern = lit(ScalarValue::Utf8(Some(r".*-(\d*)".to_string())));
-        let columns: Vec<ArrayRef> = vec![col_value];
-        let expr = create_physical_expr(
-            &BuiltinScalarFunction::RegexpMatch,
-            &[col("a"), pattern],
-            &schema,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true)))
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-
-        // downcast works
-        let result = result.as_any().downcast_ref::<ListArray>().unwrap();
-        let first_row = result.value(0);
-        let first_row = first_row.as_any().downcast_ref::<StringArray>().unwrap();
-
-        // value is correct
-        let expected = "555".to_string();
-        assert_eq!(first_row.value(0), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    #[cfg(feature = "regex_expressions")]
-    fn test_regexp_match_all_literals() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        // concat(value, value)
-        let col_value = lit(ScalarValue::Utf8(Some("aaa-555".to_string())));
-        let pattern = lit(ScalarValue::Utf8(Some(r".*-(\d*)".to_string())));
-        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
-        let expr = create_physical_expr(
-            &BuiltinScalarFunction::RegexpMatch,
-            &[col_value, pattern],
-            &schema,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true)))
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
-
-        // downcast works
-        let result = result.as_any().downcast_ref::<ListArray>().unwrap();
-        let first_row = result.value(0);
-        let first_row = first_row.as_any().downcast_ref::<StringArray>().unwrap();
-
-        // value is correct
-        let expected = "555".to_string();
-        assert_eq!(first_row.value(0), expected);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/group_scalar.rs b/rust/datafusion/src/physical_plan/group_scalar.rs
deleted file mode 100644
index f4987ae3a7d..00000000000
--- a/rust/datafusion/src/physical_plan/group_scalar.rs
+++ /dev/null
@@ -1,212 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines scalars used to construct groups, ex. in GROUP BY clauses.
-
-use ordered_float::OrderedFloat;
-use std::convert::{From, TryFrom};
-
-use crate::error::{DataFusionError, Result};
-use crate::scalar::ScalarValue;
-
-/// Enumeration of types that can be used in a GROUP BY expression
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
-pub(crate) enum GroupByScalar {
-    Float32(OrderedFloat<f32>),
-    Float64(OrderedFloat<f64>),
-    UInt8(u8),
-    UInt16(u16),
-    UInt32(u32),
-    UInt64(u64),
-    Int8(i8),
-    Int16(i16),
-    Int32(i32),
-    Int64(i64),
-    Utf8(Box<String>),
-    Boolean(bool),
-    TimeMillisecond(i64),
-    TimeMicrosecond(i64),
-    TimeNanosecond(i64),
-    Date32(i32),
-}
-
-impl TryFrom<&ScalarValue> for GroupByScalar {
-    type Error = DataFusionError;
-
-    fn try_from(scalar_value: &ScalarValue) -> Result<Self> {
-        Ok(match scalar_value {
-            ScalarValue::Float32(Some(v)) => {
-                GroupByScalar::Float32(OrderedFloat::from(*v))
-            }
-            ScalarValue::Float64(Some(v)) => {
-                GroupByScalar::Float64(OrderedFloat::from(*v))
-            }
-            ScalarValue::Boolean(Some(v)) => GroupByScalar::Boolean(*v),
-            ScalarValue::Int8(Some(v)) => GroupByScalar::Int8(*v),
-            ScalarValue::Int16(Some(v)) => GroupByScalar::Int16(*v),
-            ScalarValue::Int32(Some(v)) => GroupByScalar::Int32(*v),
-            ScalarValue::Int64(Some(v)) => GroupByScalar::Int64(*v),
-            ScalarValue::UInt8(Some(v)) => GroupByScalar::UInt8(*v),
-            ScalarValue::UInt16(Some(v)) => GroupByScalar::UInt16(*v),
-            ScalarValue::UInt32(Some(v)) => GroupByScalar::UInt32(*v),
-            ScalarValue::UInt64(Some(v)) => GroupByScalar::UInt64(*v),
-            ScalarValue::TimestampMillisecond(Some(v)) => {
-                GroupByScalar::TimeMillisecond(*v)
-            }
-            ScalarValue::TimestampMicrosecond(Some(v)) => {
-                GroupByScalar::TimeMicrosecond(*v)
-            }
-            ScalarValue::TimestampNanosecond(Some(v)) => {
-                GroupByScalar::TimeNanosecond(*v)
-            }
-            ScalarValue::Utf8(Some(v)) => GroupByScalar::Utf8(Box::new(v.clone())),
-            ScalarValue::Float32(None)
-            | ScalarValue::Float64(None)
-            | ScalarValue::Boolean(None)
-            | ScalarValue::Int8(None)
-            | ScalarValue::Int16(None)
-            | ScalarValue::Int32(None)
-            | ScalarValue::Int64(None)
-            | ScalarValue::UInt8(None)
-            | ScalarValue::UInt16(None)
-            | ScalarValue::UInt32(None)
-            | ScalarValue::UInt64(None)
-            | ScalarValue::Utf8(None) => {
-                return Err(DataFusionError::Internal(format!(
-                    "Cannot convert a ScalarValue holding NULL ({:?})",
-                    scalar_value
-                )));
-            }
-            v => {
-                return Err(DataFusionError::Internal(format!(
-                    "Cannot convert a ScalarValue with associated DataType {:?}",
-                    v.get_datatype()
-                )))
-            }
-        })
-    }
-}
-
-impl From<&GroupByScalar> for ScalarValue {
-    fn from(group_by_scalar: &GroupByScalar) -> Self {
-        match group_by_scalar {
-            GroupByScalar::Float32(v) => ScalarValue::Float32(Some((*v).into())),
-            GroupByScalar::Float64(v) => ScalarValue::Float64(Some((*v).into())),
-            GroupByScalar::Boolean(v) => ScalarValue::Boolean(Some(*v)),
-            GroupByScalar::Int8(v) => ScalarValue::Int8(Some(*v)),
-            GroupByScalar::Int16(v) => ScalarValue::Int16(Some(*v)),
-            GroupByScalar::Int32(v) => ScalarValue::Int32(Some(*v)),
-            GroupByScalar::Int64(v) => ScalarValue::Int64(Some(*v)),
-            GroupByScalar::UInt8(v) => ScalarValue::UInt8(Some(*v)),
-            GroupByScalar::UInt16(v) => ScalarValue::UInt16(Some(*v)),
-            GroupByScalar::UInt32(v) => ScalarValue::UInt32(Some(*v)),
-            GroupByScalar::UInt64(v) => ScalarValue::UInt64(Some(*v)),
-            GroupByScalar::Utf8(v) => ScalarValue::Utf8(Some(v.to_string())),
-            GroupByScalar::TimeMillisecond(v) => {
-                ScalarValue::TimestampMillisecond(Some(*v))
-            }
-            GroupByScalar::TimeMicrosecond(v) => {
-                ScalarValue::TimestampMicrosecond(Some(*v))
-            }
-            GroupByScalar::TimeNanosecond(v) => {
-                ScalarValue::TimestampNanosecond(Some(*v))
-            }
-            GroupByScalar::Date32(v) => ScalarValue::Date32(Some(*v)),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::error::DataFusionError;
-
-    macro_rules! scalar_eq_test {
-        ($TYPE:expr, $VALUE:expr) => {{
-            let scalar_value = $TYPE($VALUE);
-            let a = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            let scalar_value = $TYPE($VALUE);
-            let b = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            assert_eq!(a, b);
-        }};
-    }
-
-    #[test]
-    fn test_scalar_ne_non_std() {
-        // Test only Scalars with non native Eq, Hash
-        scalar_eq_test!(ScalarValue::Float32, Some(1.0));
-        scalar_eq_test!(ScalarValue::Float64, Some(1.0));
-    }
-
-    macro_rules! scalar_ne_test {
-        ($TYPE:expr, $LVALUE:expr, $RVALUE:expr) => {{
-            let scalar_value = $TYPE($LVALUE);
-            let a = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            let scalar_value = $TYPE($RVALUE);
-            let b = GroupByScalar::try_from(&scalar_value).unwrap();
-
-            assert_ne!(a, b);
-        }};
-    }
-
-    #[test]
-    fn test_scalar_eq_non_std() {
-        // Test only Scalars with non native Eq, Hash
-        scalar_ne_test!(ScalarValue::Float32, Some(1.0), Some(2.0));
-        scalar_ne_test!(ScalarValue::Float64, Some(1.0), Some(2.0));
-    }
-
-    #[test]
-    fn from_scalar_holding_none() {
-        let scalar_value = ScalarValue::Int8(None);
-        let result = GroupByScalar::try_from(&scalar_value);
-
-        match result {
-            Err(DataFusionError::Internal(error_message)) => assert_eq!(
-                error_message,
-                String::from("Cannot convert a ScalarValue holding NULL (Int8(NULL))")
-            ),
-            _ => panic!("Unexpected result"),
-        }
-    }
-
-    #[test]
-    fn from_scalar_unsupported() {
-        // Use any ScalarValue type not supported by GroupByScalar.
-        let scalar_value = ScalarValue::LargeUtf8(Some("1.1".to_string()));
-        let result = GroupByScalar::try_from(&scalar_value);
-
-        match result {
-            Err(DataFusionError::Internal(error_message)) => assert_eq!(
-                error_message,
-                String::from(
-                    "Cannot convert a ScalarValue with associated DataType LargeUtf8"
-                )
-            ),
-            _ => panic!("Unexpected result"),
-        }
-    }
-
-    #[test]
-    fn size_of_group_by_scalar() {
-        assert_eq!(std::mem::size_of::<GroupByScalar>(), 16);
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/hash_aggregate.rs b/rust/datafusion/src/physical_plan/hash_aggregate.rs
deleted file mode 100644
index 234265022ef..00000000000
--- a/rust/datafusion/src/physical_plan/hash_aggregate.rs
+++ /dev/null
@@ -1,1395 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the execution plan for the hash aggregate operation
-
-use std::any::Any;
-use std::sync::{Arc, Mutex};
-use std::task::{Context, Poll};
-
-use ahash::RandomState;
-use futures::{
-    stream::{Stream, StreamExt},
-    Future,
-};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Accumulator, AggregateExpr, SQLMetric};
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning, PhysicalExpr};
-
-use arrow::{
-    array::{Array, UInt32Builder},
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    compute,
-};
-use arrow::{
-    array::{BooleanArray, Date32Array, DictionaryArray},
-    compute::cast,
-    datatypes::{
-        ArrowDictionaryKeyType, ArrowNativeType, Int16Type, Int32Type, Int64Type,
-        Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-    },
-};
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit},
-    record_batch::RecordBatch,
-};
-use hashbrown::HashMap;
-use ordered_float::OrderedFloat;
-use pin_project_lite::pin_project;
-
-use arrow::array::{
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-};
-use async_trait::async_trait;
-
-use super::{
-    expressions::Column, group_scalar::GroupByScalar, RecordBatchStream,
-    SendableRecordBatchStream,
-};
-
-/// Hash aggregate modes
-#[derive(Debug, Copy, Clone)]
-pub enum AggregateMode {
-    /// Partial aggregate that can be applied in parallel across input partitions
-    Partial,
-    /// Final aggregate that produces a single partition of output
-    Final,
-}
-
-/// Hash aggregate execution plan
-#[derive(Debug)]
-pub struct HashAggregateExec {
-    /// Aggregation mode (full, partial)
-    mode: AggregateMode,
-    /// Grouping expressions
-    group_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-    /// Aggregate expressions
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-    /// Input plan, could be a partial aggregate or the input to the aggregate
-    input: Arc<dyn ExecutionPlan>,
-    /// Schema after the aggregate is applied
-    schema: SchemaRef,
-    /// Input schema before any aggregation is applied. For partial aggregate this will be the
-    /// same as input.schema() but for the final aggregate it will be the same as the input
-    /// to the partial aggregate
-    input_schema: SchemaRef,
-    /// Metric to track number of output rows
-    output_rows: Arc<Mutex<SQLMetric>>,
-}
-
-fn create_schema(
-    input_schema: &Schema,
-    group_expr: &[(Arc<dyn PhysicalExpr>, String)],
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-    mode: AggregateMode,
-) -> Result<Schema> {
-    let mut fields = Vec::with_capacity(group_expr.len() + aggr_expr.len());
-    for (expr, name) in group_expr {
-        fields.push(Field::new(
-            name,
-            expr.data_type(&input_schema)?,
-            expr.nullable(&input_schema)?,
-        ))
-    }
-
-    match mode {
-        AggregateMode::Partial => {
-            // in partial mode, the fields of the accumulator's state
-            for expr in aggr_expr {
-                fields.extend(expr.state_fields()?.iter().cloned())
-            }
-        }
-        AggregateMode::Final => {
-            // in final mode, the field with the final result of the accumulator
-            for expr in aggr_expr {
-                fields.push(expr.field()?)
-            }
-        }
-    }
-
-    Ok(Schema::new(fields))
-}
-
-impl HashAggregateExec {
-    /// Create a new hash aggregate execution plan
-    pub fn try_new(
-        mode: AggregateMode,
-        group_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-        input: Arc<dyn ExecutionPlan>,
-        input_schema: SchemaRef,
-    ) -> Result<Self> {
-        let schema = create_schema(&input.schema(), &group_expr, &aggr_expr, mode)?;
-
-        let schema = Arc::new(schema);
-
-        let output_rows = SQLMetric::counter("outputRows");
-
-        Ok(HashAggregateExec {
-            mode,
-            group_expr,
-            aggr_expr,
-            input,
-            schema,
-            input_schema,
-            output_rows,
-        })
-    }
-
-    /// Aggregation mode (full, partial)
-    pub fn mode(&self) -> &AggregateMode {
-        &self.mode
-    }
-
-    /// Grouping expressions
-    pub fn group_expr(&self) -> &[(Arc<dyn PhysicalExpr>, String)] {
-        &self.group_expr
-    }
-
-    /// Aggregate expressions
-    pub fn aggr_expr(&self) -> &[Arc<dyn AggregateExpr>] {
-        &self.aggr_expr
-    }
-
-    /// Input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Get the input schema before any aggregates are applied
-    pub fn input_schema(&self) -> SchemaRef {
-        self.input_schema.clone()
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for HashAggregateExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        match &self.mode {
-            AggregateMode::Partial => Distribution::UnspecifiedDistribution,
-            AggregateMode::Final => Distribution::SinglePartition,
-        }
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let input = self.input.execute(partition).await?;
-        let group_expr = self.group_expr.iter().map(|x| x.0.clone()).collect();
-
-        if self.group_expr.is_empty() {
-            Ok(Box::pin(HashAggregateStream::new(
-                self.mode,
-                self.schema.clone(),
-                self.aggr_expr.clone(),
-                input,
-            )))
-        } else {
-            Ok(Box::pin(GroupedHashAggregateStream::new(
-                self.mode,
-                self.schema.clone(),
-                group_expr,
-                self.aggr_expr.clone(),
-                input,
-                self.output_rows.clone(),
-            )))
-        }
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(HashAggregateExec::try_new(
-                self.mode,
-                self.group_expr.clone(),
-                self.aggr_expr.clone(),
-                children[0].clone(),
-                self.input_schema.clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "HashAggregateExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    fn metrics(&self) -> HashMap<String, SQLMetric> {
-        let mut metrics = HashMap::new();
-        metrics.insert(
-            "outputRows".to_owned(),
-            self.output_rows.lock().unwrap().clone(),
-        );
-        metrics
-    }
-}
-
-/*
-The architecture is the following:
-
-1. An accumulator has state that is updated on each batch.
-2. At the end of the aggregation (e.g. end of batches in a partition), the accumulator converts its state to a RecordBatch of a single row
-3. The RecordBatches of all accumulators are merged (`concatenate` in `rust/arrow`) together to a single RecordBatch.
-4. The state's RecordBatch is `merge`d to a new state
-5. The state is mapped to the final value
-
-Why:
-
-* Accumulators' state can be statically typed, but it is more efficient to transmit data from the accumulators via `Array`
-* The `merge` operation must have access to the state of the aggregators because it uses it to correctly merge
-* It uses Arrow's native dynamically typed object, `Array`.
-* Arrow shines in batch operations and both `merge` and `concatenate` of uniform types are very performant.
-
-Example: average
-
-* the state is `n: u32` and `sum: f64`
-* For every batch, we update them accordingly.
-* At the end of the accumulation (of a partition), we convert `n` and `sum` to a RecordBatch of 1 row and two columns: `[n, sum]`
-* The RecordBatch is (sent back / transmitted over network)
-* Once all N record batches arrive, `merge` is performed, which builds a RecordBatch with N rows and 2 columns.
-* Finally, `get_value` returns an array with one entry computed from the state
-*/
-pin_project! {
-    struct GroupedHashAggregateStream {
-        schema: SchemaRef,
-        #[pin]
-        output: futures::channel::oneshot::Receiver<ArrowResult<RecordBatch>>,
-        finished: bool,
-        output_rows: Arc<Mutex<SQLMetric>>,
-    }
-}
-
-fn group_aggregate_batch(
-    mode: &AggregateMode,
-    group_expr: &[Arc<dyn PhysicalExpr>],
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-    batch: RecordBatch,
-    mut accumulators: Accumulators,
-    aggregate_expressions: &[Vec<Arc<dyn PhysicalExpr>>],
-) -> Result<Accumulators> {
-    // evaluate the grouping expressions
-    let group_values = evaluate(group_expr, &batch)?;
-
-    // evaluate the aggregation expressions.
-    // We could evaluate them after the `take`, but since we need to evaluate all
-    // of them anyways, it is more performant to do it while they are together.
-    let aggr_input_values = evaluate_many(aggregate_expressions, &batch)?;
-
-    // create vector large enough to hold the grouping key
-    // this is an optimization to avoid allocating `key` on every row.
-    // it will be overwritten on every iteration of the loop below
-    let mut group_by_values = Vec::with_capacity(group_values.len());
-    for _ in 0..group_values.len() {
-        group_by_values.push(GroupByScalar::UInt32(0));
-    }
-
-    let mut group_by_values = group_by_values.into_boxed_slice();
-
-    let mut key = Vec::with_capacity(group_values.len());
-
-    // 1.1 construct the key from the group values
-    // 1.2 construct the mapping key if it does not exist
-    // 1.3 add the row' index to `indices`
-
-    // Make sure we can create the accumulators or otherwise return an error
-    create_accumulators(aggr_expr).map_err(DataFusionError::into_arrow_external_error)?;
-
-    // Keys received in this batch
-    let mut batch_keys = vec![];
-
-    for row in 0..batch.num_rows() {
-        // 1.1
-        create_key(&group_values, row, &mut key)
-            .map_err(DataFusionError::into_arrow_external_error)?;
-
-        accumulators
-            .raw_entry_mut()
-            .from_key(&key)
-            // 1.3
-            .and_modify(|_, (_, _, v)| {
-                if v.is_empty() {
-                    batch_keys.push(key.clone())
-                };
-                v.push(row as u32)
-            })
-            // 1.2
-            .or_insert_with(|| {
-                // We can safely unwrap here as we checked we can create an accumulator before
-                let accumulator_set = create_accumulators(aggr_expr).unwrap();
-                batch_keys.push(key.clone());
-                let _ = create_group_by_values(&group_values, row, &mut group_by_values);
-                (
-                    key.clone(),
-                    (group_by_values.clone(), accumulator_set, vec![row as u32]),
-                )
-            });
-    }
-
-    // Collect all indices + offsets based on keys in this vec
-    let mut batch_indices: UInt32Builder = UInt32Builder::new(0);
-    let mut offsets = vec![0];
-    let mut offset_so_far = 0;
-    for key in batch_keys.iter() {
-        let (_, _, indices) = accumulators.get_mut(key).unwrap();
-        batch_indices.append_slice(&indices)?;
-        offset_so_far += indices.len();
-        offsets.push(offset_so_far);
-    }
-    let batch_indices = batch_indices.finish();
-
-    // `Take` all values based on indices into Arrays
-    let values: Vec<Vec<Arc<dyn Array>>> = aggr_input_values
-        .iter()
-        .map(|array| {
-            array
-                .iter()
-                .map(|array| {
-                    compute::take(
-                        array.as_ref(),
-                        &batch_indices,
-                        None, // None: no index check
-                    )
-                    .unwrap()
-                })
-                .collect()
-            // 2.3
-        })
-        .collect();
-
-    // 2.1 for each key in this batch
-    // 2.2 for each aggregation
-    // 2.3 `slice` from each of its arrays the keys' values
-    // 2.4 update / merge the accumulator with the values
-    // 2.5 clear indices
-    batch_keys
-        .iter_mut()
-        .zip(offsets.windows(2))
-        .try_for_each(|(key, offsets)| {
-            let (_, accumulator_set, indices) = accumulators.get_mut(key).unwrap();
-            // 2.2
-            accumulator_set
-                .iter_mut()
-                .zip(values.iter())
-                .map(|(accumulator, aggr_array)| {
-                    (
-                        accumulator,
-                        aggr_array
-                            .iter()
-                            .map(|array| {
-                                // 2.3
-                                array.slice(offsets[0], offsets[1] - offsets[0])
-                            })
-                            .collect::<Vec<ArrayRef>>(),
-                    )
-                })
-                .try_for_each(|(accumulator, values)| match mode {
-                    AggregateMode::Partial => accumulator.update_batch(&values),
-                    AggregateMode::Final => {
-                        // note: the aggregation here is over states, not values, thus the merge
-                        accumulator.merge_batch(&values)
-                    }
-                })
-                // 2.5
-                .and({
-                    indices.clear();
-                    Ok(())
-                })
-        })?;
-    Ok(accumulators)
-}
-
-/// Appends a sequence of [u8] bytes for the value in `col[row]` to
-/// `vec` to be used as a key into the hash map for a dictionary type
-///
-/// Note that ideally, for dictionary encoded columns, we would be
-/// able to simply use the dictionary idicies themselves (no need to
-/// look up values) or possibly simply build the hash table entirely
-/// on the dictionary indexes.
-///
-/// This aproach would likely work (very) well for the common case,
-/// but it also has to to handle the case where the dictionary itself
-/// is not the same across all record batches (and thus indexes in one
-/// record batch may not correspond to the same index in another)
-fn dictionary_create_key_for_col<K: ArrowDictionaryKeyType>(
-    col: &ArrayRef,
-    row: usize,
-    vec: &mut Vec<u8>,
-) -> Result<()> {
-    let dict_col = col.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    // look up the index in the values dictionary
-    let keys_col = dict_col.keys_array();
-    let values_index = keys_col.value(row).to_usize().ok_or_else(|| {
-        DataFusionError::Internal(format!(
-            "Can not convert index to usize in dictionary of type creating group by value {:?}",
-            keys_col.data_type()
-        ))
-    })?;
-
-    create_key_for_col(&dict_col.values(), values_index, vec)
-}
-
-/// Appends a sequence of [u8] bytes for the value in `col[row]` to
-/// `vec` to be used as a key into the hash map
-fn create_key_for_col(col: &ArrayRef, row: usize, vec: &mut Vec<u8>) -> Result<()> {
-    match col.data_type() {
-        DataType::Boolean => {
-            let array = col.as_any().downcast_ref::<BooleanArray>().unwrap();
-            vec.extend_from_slice(&[array.value(row) as u8]);
-        }
-        DataType::Float32 => {
-            let array = col.as_any().downcast_ref::<Float32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Float64 => {
-            let array = col.as_any().downcast_ref::<Float64Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt8 => {
-            let array = col.as_any().downcast_ref::<UInt8Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt16 => {
-            let array = col.as_any().downcast_ref::<UInt16Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt32 => {
-            let array = col.as_any().downcast_ref::<UInt32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::UInt64 => {
-            let array = col.as_any().downcast_ref::<UInt64Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Int8 => {
-            let array = col.as_any().downcast_ref::<Int8Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Int16 => {
-            let array = col.as_any().downcast_ref::<Int16Array>().unwrap();
-            vec.extend(array.value(row).to_le_bytes().iter());
-        }
-        DataType::Int32 => {
-            let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Int64 => {
-            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMillisecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Utf8 => {
-            let array = col.as_any().downcast_ref::<StringArray>().unwrap();
-            let value = array.value(row);
-            // store the size
-            vec.extend_from_slice(&value.len().to_le_bytes());
-            // store the string value
-            vec.extend_from_slice(value.as_bytes());
-        }
-        DataType::Date32 => {
-            let array = col.as_any().downcast_ref::<Date32Array>().unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Dictionary(index_type, _) => match **index_type {
-            DataType::Int8 => {
-                dictionary_create_key_for_col::<Int8Type>(col, row, vec)?;
-            }
-            DataType::Int16 => {
-                dictionary_create_key_for_col::<Int16Type>(col, row, vec)?;
-            }
-            DataType::Int32 => {
-                dictionary_create_key_for_col::<Int32Type>(col, row, vec)?;
-            }
-            DataType::Int64 => {
-                dictionary_create_key_for_col::<Int64Type>(col, row, vec)?;
-            }
-            DataType::UInt8 => {
-                dictionary_create_key_for_col::<UInt8Type>(col, row, vec)?;
-            }
-            DataType::UInt16 => {
-                dictionary_create_key_for_col::<UInt16Type>(col, row, vec)?;
-            }
-            DataType::UInt32 => {
-                dictionary_create_key_for_col::<UInt32Type>(col, row, vec)?;
-            }
-            DataType::UInt64 => {
-                dictionary_create_key_for_col::<UInt64Type>(col, row, vec)?;
-            }
-            _ => return Err(DataFusionError::Internal(format!(
-                "Unsupported GROUP BY type (dictionary index type not supported creating key) {}",
-                col.data_type(),
-            ))),
-        },
-        _ => {
-            // This is internal because we should have caught this before.
-            return Err(DataFusionError::Internal(format!(
-                "Unsupported GROUP BY type creating key {}",
-                col.data_type(),
-            )));
-        }
-    }
-    Ok(())
-}
-
-/// Create a key `Vec<u8>` that is used as key for the hashmap
-pub(crate) fn create_key(
-    group_by_keys: &[ArrayRef],
-    row: usize,
-    vec: &mut Vec<u8>,
-) -> Result<()> {
-    vec.clear();
-    for col in group_by_keys {
-        create_key_for_col(col, row, vec)?
-    }
-    Ok(())
-}
-
-async fn compute_grouped_hash_aggregate(
-    mode: AggregateMode,
-    schema: SchemaRef,
-    group_expr: Vec<Arc<dyn PhysicalExpr>>,
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-    mut input: SendableRecordBatchStream,
-) -> ArrowResult<RecordBatch> {
-    // the expressions to evaluate the batch, one vec of expressions per aggregation
-    let aggregate_expressions = aggregate_expressions(&aggr_expr, &mode)
-        .map_err(DataFusionError::into_arrow_external_error)?;
-
-    // mapping key -> (set of accumulators, indices of the key in the batch)
-    // * the indexes are updated at each row
-    // * the accumulators are updated at the end of each batch
-    // * the indexes are `clear`ed at the end of each batch
-    //let mut accumulators: Accumulators = FnvHashMap::default();
-
-    // iterate over all input batches and update the accumulators
-    let mut accumulators = Accumulators::default();
-    while let Some(batch) = input.next().await {
-        let batch = batch?;
-        accumulators = group_aggregate_batch(
-            &mode,
-            &group_expr,
-            &aggr_expr,
-            batch,
-            accumulators,
-            &aggregate_expressions,
-        )
-        .map_err(DataFusionError::into_arrow_external_error)?;
-    }
-
-    create_batch_from_map(&mode, &accumulators, group_expr.len(), &schema)
-}
-
-impl GroupedHashAggregateStream {
-    /// Create a new HashAggregateStream
-    pub fn new(
-        mode: AggregateMode,
-        schema: SchemaRef,
-        group_expr: Vec<Arc<dyn PhysicalExpr>>,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-        input: SendableRecordBatchStream,
-        output_rows: Arc<Mutex<SQLMetric>>,
-    ) -> Self {
-        let (tx, rx) = futures::channel::oneshot::channel();
-
-        let schema_clone = schema.clone();
-        tokio::spawn(async move {
-            let result = compute_grouped_hash_aggregate(
-                mode,
-                schema_clone,
-                group_expr,
-                aggr_expr,
-                input,
-            )
-            .await;
-            tx.send(result)
-        });
-
-        GroupedHashAggregateStream {
-            schema,
-            output: rx,
-            finished: false,
-            output_rows,
-        }
-    }
-}
-
-type AccumulatorItem = Box<dyn Accumulator>;
-type Accumulators =
-    HashMap<Vec<u8>, (Box<[GroupByScalar]>, Vec<AccumulatorItem>, Vec<u32>), RandomState>;
-
-impl Stream for GroupedHashAggregateStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.finished {
-            return Poll::Ready(None);
-        }
-
-        let output_rows = self.output_rows.clone();
-
-        // is the output ready?
-        let this = self.project();
-        let output_poll = this.output.poll(cx);
-
-        match output_poll {
-            Poll::Ready(result) => {
-                *this.finished = true;
-
-                // check for error in receiving channel and unwrap actual result
-                let result = match result {
-                    Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
-                    Ok(result) => result,
-                };
-
-                if let Ok(batch) = &result {
-                    let mut output_rows = output_rows.lock().unwrap();
-                    output_rows.add(batch.num_rows())
-                }
-
-                Poll::Ready(Some(result))
-            }
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for GroupedHashAggregateStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Evaluates expressions against a record batch.
-fn evaluate(
-    expr: &[Arc<dyn PhysicalExpr>],
-    batch: &RecordBatch,
-) -> Result<Vec<ArrayRef>> {
-    expr.iter()
-        .map(|expr| expr.evaluate(&batch))
-        .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-        .collect::<Result<Vec<_>>>()
-}
-
-/// Evaluates expressions against a record batch.
-fn evaluate_many(
-    expr: &[Vec<Arc<dyn PhysicalExpr>>],
-    batch: &RecordBatch,
-) -> Result<Vec<Vec<ArrayRef>>> {
-    expr.iter()
-        .map(|expr| evaluate(expr, batch))
-        .collect::<Result<Vec<_>>>()
-}
-
-/// uses `state_fields` to build a vec of expressions required to merge the AggregateExpr' accumulator's state.
-fn merge_expressions(
-    expr: &Arc<dyn AggregateExpr>,
-) -> Result<Vec<Arc<dyn PhysicalExpr>>> {
-    Ok(expr
-        .state_fields()?
-        .iter()
-        .map(|f| Arc::new(Column::new(f.name())) as Arc<dyn PhysicalExpr>)
-        .collect::<Vec<_>>())
-}
-
-/// returns physical expressions to evaluate against a batch
-/// The expressions are different depending on `mode`:
-/// * Partial: AggregateExpr::expressions
-/// * Final: columns of `AggregateExpr::state_fields()`
-/// The return value is to be understood as:
-/// * index 0 is the aggregation
-/// * index 1 is the expression i of the aggregation
-fn aggregate_expressions(
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-    mode: &AggregateMode,
-) -> Result<Vec<Vec<Arc<dyn PhysicalExpr>>>> {
-    match mode {
-        AggregateMode::Partial => {
-            Ok(aggr_expr.iter().map(|agg| agg.expressions()).collect())
-        }
-        // in this mode, we build the merge expressions of the aggregation
-        AggregateMode::Final => Ok(aggr_expr
-            .iter()
-            .map(|agg| merge_expressions(agg))
-            .collect::<Result<Vec<_>>>()?),
-    }
-}
-
-pin_project! {
-    struct HashAggregateStream {
-        schema: SchemaRef,
-        #[pin]
-        output: futures::channel::oneshot::Receiver<ArrowResult<RecordBatch>>,
-        finished: bool,
-    }
-}
-
-async fn compute_hash_aggregate(
-    mode: AggregateMode,
-    schema: SchemaRef,
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-    mut input: SendableRecordBatchStream,
-) -> ArrowResult<RecordBatch> {
-    let mut accumulators = create_accumulators(&aggr_expr)
-        .map_err(DataFusionError::into_arrow_external_error)?;
-
-    let expressions = aggregate_expressions(&aggr_expr, &mode)
-        .map_err(DataFusionError::into_arrow_external_error)?;
-
-    let expressions = Arc::new(expressions);
-
-    // 1 for each batch, update / merge accumulators with the expressions' values
-    // future is ready when all batches are computed
-    while let Some(batch) = input.next().await {
-        let batch = batch?;
-        aggregate_batch(&mode, &batch, &mut accumulators, &expressions)
-            .map_err(DataFusionError::into_arrow_external_error)?;
-    }
-
-    // 2. convert values to a record batch
-    finalize_aggregation(&accumulators, &mode)
-        .map(|columns| RecordBatch::try_new(schema.clone(), columns))
-        .map_err(DataFusionError::into_arrow_external_error)?
-}
-
-impl HashAggregateStream {
-    /// Create a new HashAggregateStream
-    pub fn new(
-        mode: AggregateMode,
-        schema: SchemaRef,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
-        input: SendableRecordBatchStream,
-    ) -> Self {
-        let (tx, rx) = futures::channel::oneshot::channel();
-
-        let schema_clone = schema.clone();
-        tokio::spawn(async move {
-            let result =
-                compute_hash_aggregate(mode, schema_clone, aggr_expr, input).await;
-            tx.send(result)
-        });
-
-        HashAggregateStream {
-            schema,
-            output: rx,
-            finished: false,
-        }
-    }
-}
-
-fn aggregate_batch(
-    mode: &AggregateMode,
-    batch: &RecordBatch,
-    accumulators: &mut [AccumulatorItem],
-    expressions: &[Vec<Arc<dyn PhysicalExpr>>],
-) -> Result<()> {
-    // 1.1 iterate accumulators and respective expressions together
-    // 1.2 evaluate expressions
-    // 1.3 update / merge accumulators with the expressions' values
-
-    // 1.1
-    accumulators
-        .iter_mut()
-        .zip(expressions)
-        .try_for_each(|(accum, expr)| {
-            // 1.2
-            let values = &expr
-                .iter()
-                .map(|e| e.evaluate(batch))
-                .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-                .collect::<Result<Vec<_>>>()?;
-
-            // 1.3
-            match mode {
-                AggregateMode::Partial => accum.update_batch(values),
-                AggregateMode::Final => accum.merge_batch(values),
-            }
-        })
-}
-
-impl Stream for HashAggregateStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.finished {
-            return Poll::Ready(None);
-        }
-
-        // is the output ready?
-        let this = self.project();
-        let output_poll = this.output.poll(cx);
-
-        match output_poll {
-            Poll::Ready(result) => {
-                *this.finished = true;
-
-                // check for error in receiving channel and unwrap actual result
-                let result = match result {
-                    Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
-                    Ok(result) => result,
-                };
-
-                Poll::Ready(Some(result))
-            }
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for HashAggregateStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Given Vec<Vec<ArrayRef>>, concatenates the inners `Vec<ArrayRef>` into `ArrayRef`, returning `Vec<ArrayRef>`
-/// This assumes that `arrays` is not empty.
-fn concatenate(arrays: Vec<Vec<ArrayRef>>) -> ArrowResult<Vec<ArrayRef>> {
-    (0..arrays[0].len())
-        .map(|column| {
-            let array_list = arrays
-                .iter()
-                .map(|a| a[column].as_ref())
-                .collect::<Vec<_>>();
-            compute::concat(&array_list)
-        })
-        .collect::<ArrowResult<Vec<_>>>()
-}
-
-/// Create a RecordBatch with all group keys and accumulator' states or values.
-fn create_batch_from_map(
-    mode: &AggregateMode,
-    accumulators: &Accumulators,
-    num_group_expr: usize,
-    output_schema: &Schema,
-) -> ArrowResult<RecordBatch> {
-    // 1. for each key
-    // 2. create single-row ArrayRef with all group expressions
-    // 3. create single-row ArrayRef with all aggregate states or values
-    // 4. collect all in a vector per key of vec<ArrayRef>, vec[i][j]
-    // 5. concatenate the arrays over the second index [j] into a single vec<ArrayRef>.
-    let arrays = accumulators
-        .iter()
-        .map(|(_, (group_by_values, accumulator_set, _))| {
-            // 2.
-            let mut groups = (0..num_group_expr)
-                .map(|i| match &group_by_values[i] {
-                    GroupByScalar::Float32(n) => {
-                        Arc::new(Float32Array::from(vec![(*n).into()] as Vec<f32>))
-                            as ArrayRef
-                    }
-                    GroupByScalar::Float64(n) => {
-                        Arc::new(Float64Array::from(vec![(*n).into()] as Vec<f64>))
-                            as ArrayRef
-                    }
-                    GroupByScalar::Int8(n) => {
-                        Arc::new(Int8Array::from(vec![*n])) as ArrayRef
-                    }
-                    GroupByScalar::Int16(n) => Arc::new(Int16Array::from(vec![*n])),
-                    GroupByScalar::Int32(n) => Arc::new(Int32Array::from(vec![*n])),
-                    GroupByScalar::Int64(n) => Arc::new(Int64Array::from(vec![*n])),
-                    GroupByScalar::UInt8(n) => Arc::new(UInt8Array::from(vec![*n])),
-                    GroupByScalar::UInt16(n) => Arc::new(UInt16Array::from(vec![*n])),
-                    GroupByScalar::UInt32(n) => Arc::new(UInt32Array::from(vec![*n])),
-                    GroupByScalar::UInt64(n) => Arc::new(UInt64Array::from(vec![*n])),
-                    GroupByScalar::Utf8(str) => {
-                        Arc::new(StringArray::from(vec![&***str]))
-                    }
-                    GroupByScalar::Boolean(b) => Arc::new(BooleanArray::from(vec![*b])),
-                    GroupByScalar::TimeMillisecond(n) => {
-                        Arc::new(TimestampMillisecondArray::from(vec![*n]))
-                    }
-                    GroupByScalar::TimeMicrosecond(n) => {
-                        Arc::new(TimestampMicrosecondArray::from(vec![*n]))
-                    }
-                    GroupByScalar::TimeNanosecond(n) => {
-                        Arc::new(TimestampNanosecondArray::from_vec(vec![*n], None))
-                    }
-                    GroupByScalar::Date32(n) => Arc::new(Date32Array::from(vec![*n])),
-                })
-                .collect::<Vec<ArrayRef>>();
-
-            // 3.
-            groups.extend(
-                finalize_aggregation(accumulator_set, mode)
-                    .map_err(DataFusionError::into_arrow_external_error)?,
-            );
-
-            Ok(groups)
-        })
-        // 4.
-        .collect::<ArrowResult<Vec<Vec<ArrayRef>>>>()?;
-
-    let batch = if !arrays.is_empty() {
-        // 5.
-        let columns = concatenate(arrays)?;
-
-        // cast output if needed (e.g. for types like Dictionary where
-        // the intermediate GroupByScalar type was not the same as the
-        // output
-        let columns = columns
-            .iter()
-            .zip(output_schema.fields().iter())
-            .map(|(col, desired_field)| cast(col, desired_field.data_type()))
-            .collect::<ArrowResult<Vec<_>>>()?;
-
-        RecordBatch::try_new(Arc::new(output_schema.to_owned()), columns)?
-    } else {
-        RecordBatch::new_empty(Arc::new(output_schema.to_owned()))
-    };
-    Ok(batch)
-}
-
-fn create_accumulators(
-    aggr_expr: &[Arc<dyn AggregateExpr>],
-) -> Result<Vec<AccumulatorItem>> {
-    aggr_expr
-        .iter()
-        .map(|expr| expr.create_accumulator())
-        .collect::<Result<Vec<_>>>()
-}
-
-/// returns a vector of ArrayRefs, where each entry corresponds to either the
-/// final value (mode = Final) or states (mode = Partial)
-fn finalize_aggregation(
-    accumulators: &[AccumulatorItem],
-    mode: &AggregateMode,
-) -> Result<Vec<ArrayRef>> {
-    match mode {
-        AggregateMode::Partial => {
-            // build the vector of states
-            let a = accumulators
-                .iter()
-                .map(|accumulator| accumulator.state())
-                .map(|value| {
-                    value.map(|e| {
-                        e.iter().map(|v| v.to_array()).collect::<Vec<ArrayRef>>()
-                    })
-                })
-                .collect::<Result<Vec<_>>>()?;
-            Ok(a.iter().flatten().cloned().collect::<Vec<_>>())
-        }
-        AggregateMode::Final => {
-            // merge the state to the final value
-            accumulators
-                .iter()
-                .map(|accumulator| accumulator.evaluate().map(|v| v.to_array()))
-                .collect::<Result<Vec<ArrayRef>>>()
-        }
-    }
-}
-
-/// Extract the value in `col[row]` from a dictionary a GroupByScalar
-fn dictionary_create_group_by_value<K: ArrowDictionaryKeyType>(
-    col: &ArrayRef,
-    row: usize,
-) -> Result<GroupByScalar> {
-    let dict_col = col.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
-
-    // look up the index in the values dictionary
-    let keys_col = dict_col.keys_array();
-    let values_index = keys_col.value(row).to_usize().ok_or_else(|| {
-        DataFusionError::Internal(format!(
-            "Can not convert index to usize in dictionary of type creating group by value {:?}",
-            keys_col.data_type()
-        ))
-    })?;
-
-    create_group_by_value(&dict_col.values(), values_index)
-}
-
-/// Extract the value in `col[row]` as a GroupByScalar
-fn create_group_by_value(col: &ArrayRef, row: usize) -> Result<GroupByScalar> {
-    match col.data_type() {
-        DataType::Float32 => {
-            let array = col.as_any().downcast_ref::<Float32Array>().unwrap();
-            Ok(GroupByScalar::Float32(OrderedFloat::from(array.value(row))))
-        }
-        DataType::Float64 => {
-            let array = col.as_any().downcast_ref::<Float64Array>().unwrap();
-            Ok(GroupByScalar::Float64(OrderedFloat::from(array.value(row))))
-        }
-        DataType::UInt8 => {
-            let array = col.as_any().downcast_ref::<UInt8Array>().unwrap();
-            Ok(GroupByScalar::UInt8(array.value(row)))
-        }
-        DataType::UInt16 => {
-            let array = col.as_any().downcast_ref::<UInt16Array>().unwrap();
-            Ok(GroupByScalar::UInt16(array.value(row)))
-        }
-        DataType::UInt32 => {
-            let array = col.as_any().downcast_ref::<UInt32Array>().unwrap();
-            Ok(GroupByScalar::UInt32(array.value(row)))
-        }
-        DataType::UInt64 => {
-            let array = col.as_any().downcast_ref::<UInt64Array>().unwrap();
-            Ok(GroupByScalar::UInt64(array.value(row)))
-        }
-        DataType::Int8 => {
-            let array = col.as_any().downcast_ref::<Int8Array>().unwrap();
-            Ok(GroupByScalar::Int8(array.value(row)))
-        }
-        DataType::Int16 => {
-            let array = col.as_any().downcast_ref::<Int16Array>().unwrap();
-            Ok(GroupByScalar::Int16(array.value(row)))
-        }
-        DataType::Int32 => {
-            let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
-            Ok(GroupByScalar::Int32(array.value(row)))
-        }
-        DataType::Int64 => {
-            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
-            Ok(GroupByScalar::Int64(array.value(row)))
-        }
-        DataType::Utf8 => {
-            let array = col.as_any().downcast_ref::<StringArray>().unwrap();
-            Ok(GroupByScalar::Utf8(Box::new(array.value(row).into())))
-        }
-        DataType::Boolean => {
-            let array = col.as_any().downcast_ref::<BooleanArray>().unwrap();
-            Ok(GroupByScalar::Boolean(array.value(row)))
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMillisecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeMillisecond(array.value(row)))
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeMicrosecond(array.value(row)))
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeNanosecond(array.value(row)))
-        }
-        DataType::Date32 => {
-            let array = col.as_any().downcast_ref::<Date32Array>().unwrap();
-            Ok(GroupByScalar::Date32(array.value(row)))
-        }
-        DataType::Dictionary(index_type, _) => match **index_type {
-            DataType::Int8 => dictionary_create_group_by_value::<Int8Type>(col, row),
-            DataType::Int16 => dictionary_create_group_by_value::<Int16Type>(col, row),
-            DataType::Int32 => dictionary_create_group_by_value::<Int32Type>(col, row),
-            DataType::Int64 => dictionary_create_group_by_value::<Int64Type>(col, row),
-            DataType::UInt8 => dictionary_create_group_by_value::<UInt8Type>(col, row),
-            DataType::UInt16 => dictionary_create_group_by_value::<UInt16Type>(col, row),
-            DataType::UInt32 => dictionary_create_group_by_value::<UInt32Type>(col, row),
-            DataType::UInt64 => dictionary_create_group_by_value::<UInt64Type>(col, row),
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported GROUP BY type (dictionary index type not supported) {}",
-                col.data_type(),
-            ))),
-        },
-        _ => Err(DataFusionError::NotImplemented(format!(
-            "Unsupported GROUP BY type {}",
-            col.data_type(),
-        ))),
-    }
-}
-
-/// Extract the values in `group_by_keys` arrow arrays into the target vector
-/// as GroupByScalar values
-pub(crate) fn create_group_by_values(
-    group_by_keys: &[ArrayRef],
-    row: usize,
-    vec: &mut Box<[GroupByScalar]>,
-) -> Result<()> {
-    for (i, col) in group_by_keys.iter().enumerate() {
-        vec[i] = create_group_by_value(col, row)?
-    }
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-
-    use arrow::array::Float64Array;
-
-    use super::*;
-    use crate::physical_plan::expressions::{col, Avg};
-    use crate::{assert_batches_sorted_eq, physical_plan::common};
-
-    use crate::physical_plan::merge::MergeExec;
-
-    /// some mock data to aggregates
-    fn some_data() -> (Arc<Schema>, Vec<RecordBatch>) {
-        // define a schema.
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::UInt32, false),
-            Field::new("b", DataType::Float64, false),
-        ]));
-
-        // define data.
-        (
-            schema.clone(),
-            vec![
-                RecordBatch::try_new(
-                    schema.clone(),
-                    vec![
-                        Arc::new(UInt32Array::from(vec![2, 3, 4, 4])),
-                        Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-                    ],
-                )
-                .unwrap(),
-                RecordBatch::try_new(
-                    schema,
-                    vec![
-                        Arc::new(UInt32Array::from(vec![2, 3, 3, 4])),
-                        Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-                    ],
-                )
-                .unwrap(),
-            ],
-        )
-    }
-
-    /// build the aggregates on the data from some_data() and check the results
-    async fn check_aggregates(input: Arc<dyn ExecutionPlan>) -> Result<()> {
-        let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
-            vec![(col("a"), "a".to_string())];
-
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![Arc::new(Avg::new(
-            col("b"),
-            "AVG(b)".to_string(),
-            DataType::Float64,
-        ))];
-
-        let input_schema = input.schema();
-        let partial_aggregate = Arc::new(HashAggregateExec::try_new(
-            AggregateMode::Partial,
-            groups.clone(),
-            aggregates.clone(),
-            input,
-            input_schema.clone(),
-        )?);
-
-        let result = common::collect(partial_aggregate.execute(0).await?).await?;
-
-        let expected = vec![
-            "+---+---------------+-------------+",
-            "| a | AVG(b)[count] | AVG(b)[sum] |",
-            "+---+---------------+-------------+",
-            "| 2 | 2             | 2           |",
-            "| 3 | 3             | 7           |",
-            "| 4 | 3             | 11          |",
-            "+---+---------------+-------------+",
-        ];
-        assert_batches_sorted_eq!(expected, &result);
-
-        let merge = Arc::new(MergeExec::new(partial_aggregate));
-
-        let final_group: Vec<Arc<dyn PhysicalExpr>> =
-            (0..groups.len()).map(|i| col(&groups[i].1)).collect();
-
-        let merged_aggregate = Arc::new(HashAggregateExec::try_new(
-            AggregateMode::Final,
-            final_group
-                .iter()
-                .enumerate()
-                .map(|(i, expr)| (expr.clone(), groups[i].1.clone()))
-                .collect(),
-            aggregates,
-            merge,
-            input_schema,
-        )?);
-
-        let result = common::collect(merged_aggregate.execute(0).await?).await?;
-        assert_eq!(result.len(), 1);
-
-        let batch = &result[0];
-        assert_eq!(batch.num_columns(), 2);
-        assert_eq!(batch.num_rows(), 3);
-
-        let expected = vec![
-            "+---+--------------------+",
-            "| a | AVG(b)             |",
-            "+---+--------------------+",
-            "| 2 | 1                  |",
-            "| 3 | 2.3333333333333335 |", // 3, (2 + 3 + 2) / 3
-            "| 4 | 3.6666666666666665 |", // 4, (3 + 4 + 4) / 3
-            "+---+--------------------+",
-        ];
-
-        assert_batches_sorted_eq!(&expected, &result);
-
-        let metrics = merged_aggregate.metrics();
-        let output_rows = metrics.get("outputRows").unwrap();
-        assert_eq!(3, output_rows.value());
-
-        Ok(())
-    }
-
-    /// Define a test source that can yield back to runtime before returning its first item ///
-
-    #[derive(Debug)]
-    struct TestYieldingExec {
-        /// True if this exec should yield back to runtime the first time it is polled
-        pub yield_first: bool,
-    }
-
-    #[async_trait]
-    impl ExecutionPlan for TestYieldingExec {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-        fn schema(&self) -> SchemaRef {
-            some_data().0
-        }
-
-        fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-            vec![]
-        }
-
-        fn output_partitioning(&self) -> Partitioning {
-            Partitioning::UnknownPartitioning(1)
-        }
-
-        fn with_new_children(
-            &self,
-            _: Vec<Arc<dyn ExecutionPlan>>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-
-        async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
-            let stream;
-            if self.yield_first {
-                stream = TestYieldingStream::New;
-            } else {
-                stream = TestYieldingStream::Yielded;
-            }
-            Ok(Box::pin(stream))
-        }
-    }
-
-    /// A stream using the demo data. If inited as new, it will first yield to runtime before returning records
-    enum TestYieldingStream {
-        New,
-        Yielded,
-        ReturnedBatch1,
-        ReturnedBatch2,
-    }
-
-    impl Stream for TestYieldingStream {
-        type Item = ArrowResult<RecordBatch>;
-
-        fn poll_next(
-            mut self: std::pin::Pin<&mut Self>,
-            cx: &mut Context<'_>,
-        ) -> Poll<Option<Self::Item>> {
-            match &*self {
-                TestYieldingStream::New => {
-                    *(self.as_mut()) = TestYieldingStream::Yielded;
-                    cx.waker().wake_by_ref();
-                    Poll::Pending
-                }
-                TestYieldingStream::Yielded => {
-                    *(self.as_mut()) = TestYieldingStream::ReturnedBatch1;
-                    Poll::Ready(Some(Ok(some_data().1[0].clone())))
-                }
-                TestYieldingStream::ReturnedBatch1 => {
-                    *(self.as_mut()) = TestYieldingStream::ReturnedBatch2;
-                    Poll::Ready(Some(Ok(some_data().1[1].clone())))
-                }
-                TestYieldingStream::ReturnedBatch2 => Poll::Ready(None),
-            }
-        }
-    }
-
-    impl RecordBatchStream for TestYieldingStream {
-        fn schema(&self) -> SchemaRef {
-            some_data().0
-        }
-    }
-
-    //// Tests ////
-
-    #[tokio::test]
-    async fn aggregate_source_not_yielding() -> Result<()> {
-        let input: Arc<dyn ExecutionPlan> =
-            Arc::new(TestYieldingExec { yield_first: false });
-
-        check_aggregates(input).await
-    }
-
-    #[tokio::test]
-    async fn aggregate_source_with_yielding() -> Result<()> {
-        let input: Arc<dyn ExecutionPlan> =
-            Arc::new(TestYieldingExec { yield_first: true });
-
-        check_aggregates(input).await
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/hash_join.rs b/rust/datafusion/src/physical_plan/hash_join.rs
deleted file mode 100644
index 401fe6580a9..00000000000
--- a/rust/datafusion/src/physical_plan/hash_join.rs
+++ /dev/null
@@ -1,1265 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the join plan for executing partitions in parallel and then joining the results
-//! into a set of partitions.
-
-use ahash::CallHasher;
-use ahash::RandomState;
-
-use arrow::{
-    array::{
-        ArrayData, ArrayRef, BooleanArray, LargeStringArray, PrimitiveArray,
-        TimestampMicrosecondArray, TimestampNanosecondArray, UInt32BufferBuilder,
-        UInt32Builder, UInt64BufferBuilder, UInt64Builder,
-    },
-    compute,
-    datatypes::{TimeUnit, UInt32Type, UInt64Type},
-};
-use smallvec::{smallvec, SmallVec};
-use std::time::Instant;
-use std::{any::Any, collections::HashSet};
-use std::{hash::Hasher, sync::Arc};
-
-use async_trait::async_trait;
-use futures::{Stream, StreamExt, TryStreamExt};
-use hashbrown::HashMap;
-use tokio::sync::Mutex;
-
-use arrow::array::Array;
-use arrow::datatypes::DataType;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use arrow::array::{
-    Int16Array, Int32Array, Int64Array, Int8Array, StringArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array,
-};
-
-use super::expressions::col;
-use super::{
-    hash_utils::{build_join_schema, check_join_is_valid, JoinOn, JoinType},
-    merge::MergeExec,
-};
-use crate::error::{DataFusionError, Result};
-
-use super::{ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream};
-use crate::physical_plan::coalesce_batches::concat_batches;
-use log::debug;
-
-// Maps a `u64` hash value based on the left ["on" values] to a list of indices with this key's value.
-// E.g. 1 -> [3, 6, 8] indicates that the column values map to rows 3, 6 and 8 for hash value 1
-// As the key is a hash value, we need to check possible hash collisions in the probe stage
-type JoinHashMap = HashMap<u64, SmallVec<[u64; 1]>, IdHashBuilder>;
-type JoinLeftData = Arc<(JoinHashMap, RecordBatch)>;
-
-/// join execution plan executes partitions in parallel and combines them into a set of
-/// partitions.
-#[derive(Debug)]
-pub struct HashJoinExec {
-    /// left (build) side which gets hashed
-    left: Arc<dyn ExecutionPlan>,
-    /// right (probe) side which are filtered by the hash table
-    right: Arc<dyn ExecutionPlan>,
-    /// Set of common columns used to join on
-    on: Vec<(String, String)>,
-    /// How the join is performed
-    join_type: JoinType,
-    /// The schema once the join is applied
-    schema: SchemaRef,
-    /// Build-side
-    build_side: Arc<Mutex<Option<JoinLeftData>>>,
-    /// Shares the `RandomState` for the hashing algorithm
-    random_state: RandomState,
-    /// Partitioning mode to use
-    mode: PartitionMode,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq)]
-/// Partitioning mode to use for hash join
-pub enum PartitionMode {
-    /// Left/right children are partitioned using the left and right keys
-    Partitioned,
-    /// Left side will collected into one partition
-    CollectLeft,
-}
-
-/// Information about the index and placement (left or right) of the columns
-struct ColumnIndex {
-    /// Index of the column
-    index: usize,
-    /// Whether the column is at the left or right side
-    is_left: bool,
-}
-
-impl HashJoinExec {
-    /// Tries to create a new [HashJoinExec].
-    /// # Error
-    /// This function errors when it is not possible to join the left and right sides on keys `on`.
-    pub fn try_new(
-        left: Arc<dyn ExecutionPlan>,
-        right: Arc<dyn ExecutionPlan>,
-        on: &JoinOn,
-        join_type: &JoinType,
-        partition_mode: PartitionMode,
-    ) -> Result<Self> {
-        let left_schema = left.schema();
-        let right_schema = right.schema();
-        check_join_is_valid(&left_schema, &right_schema, &on)?;
-
-        let schema = Arc::new(build_join_schema(
-            &left_schema,
-            &right_schema,
-            on,
-            &join_type,
-        ));
-
-        let on = on
-            .iter()
-            .map(|(l, r)| (l.to_string(), r.to_string()))
-            .collect();
-
-        let random_state = RandomState::with_seeds(0, 0, 0, 0);
-
-        Ok(HashJoinExec {
-            left,
-            right,
-            on,
-            join_type: *join_type,
-            schema,
-            build_side: Arc::new(Mutex::new(None)),
-            random_state,
-            mode: partition_mode,
-        })
-    }
-
-    /// left (build) side which gets hashed
-    pub fn left(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.left
-    }
-
-    /// right (probe) side which are filtered by the hash table
-    pub fn right(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.right
-    }
-
-    /// Set of common columns used to join on
-    pub fn on(&self) -> &[(String, String)] {
-        &self.on
-    }
-
-    /// How the join is performed
-    pub fn join_type(&self) -> &JoinType {
-        &self.join_type
-    }
-
-    /// Calculates column indices and left/right placement on input / output schemas and jointype
-    fn column_indices_from_schema(&self) -> ArrowResult<Vec<ColumnIndex>> {
-        let (primary_is_left, primary_schema, secondary_schema) = match self.join_type {
-            JoinType::Inner | JoinType::Left => {
-                (true, self.left.schema(), self.right.schema())
-            }
-            JoinType::Right => (false, self.right.schema(), self.left.schema()),
-        };
-        let mut column_indices = Vec::with_capacity(self.schema.fields().len());
-        for field in self.schema.fields() {
-            let (is_primary, index) = match primary_schema.index_of(field.name()) {
-                    Ok(i) => Ok((true, i)),
-                    Err(_) => {
-                        match secondary_schema.index_of(field.name()) {
-                            Ok(i) => Ok((false, i)),
-                            _ => Err(DataFusionError::Internal(
-                                format!("During execution, the column {} was not found in neither the left or right side of the join", field.name()).to_string()
-                            ))
-                        }
-                    }
-                }.map_err(DataFusionError::into_arrow_external_error)?;
-
-            let is_left =
-                is_primary && primary_is_left || !is_primary && !primary_is_left;
-            column_indices.push(ColumnIndex { index, is_left });
-        }
-
-        Ok(column_indices)
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for HashJoinExec {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.left.clone(), self.right.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            2 => Ok(Arc::new(HashJoinExec::try_new(
-                children[0].clone(),
-                children[1].clone(),
-                &self.on,
-                &self.join_type,
-                self.mode,
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "HashJoinExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.right.output_partitioning()
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let on_left = self.on.iter().map(|on| on.0.clone()).collect::<Vec<_>>();
-        // we only want to compute the build side once for PartitionMode::CollectLeft
-        let left_data = {
-            match self.mode {
-                PartitionMode::CollectLeft => {
-                    let mut build_side = self.build_side.lock().await;
-
-                    match build_side.as_ref() {
-                        Some(stream) => stream.clone(),
-                        None => {
-                            let start = Instant::now();
-
-                            // merge all left parts into a single stream
-                            let merge = MergeExec::new(self.left.clone());
-                            let stream = merge.execute(0).await?;
-
-                            // This operation performs 2 steps at once:
-                            // 1. creates a [JoinHashMap] of all batches from the stream
-                            // 2. stores the batches in a vector.
-                            let initial = (
-                                JoinHashMap::with_hasher(IdHashBuilder {}),
-                                Vec::new(),
-                                0,
-                                Vec::new(),
-                            );
-                            let (hashmap, batches, num_rows, _) = stream
-                                .try_fold(initial, |mut acc, batch| async {
-                                    let hash = &mut acc.0;
-                                    let values = &mut acc.1;
-                                    let offset = acc.2;
-                                    acc.3.clear();
-                                    acc.3.resize(batch.num_rows(), 0);
-                                    update_hash(
-                                        &on_left,
-                                        &batch,
-                                        hash,
-                                        offset,
-                                        &self.random_state,
-                                        &mut acc.3,
-                                    )
-                                    .unwrap();
-                                    acc.2 += batch.num_rows();
-                                    values.push(batch);
-                                    Ok(acc)
-                                })
-                                .await?;
-
-                            // Merge all batches into a single batch, so we
-                            // can directly index into the arrays
-                            let single_batch =
-                                concat_batches(&self.left.schema(), &batches, num_rows)?;
-
-                            let left_side = Arc::new((hashmap, single_batch));
-
-                            *build_side = Some(left_side.clone());
-
-                            debug!(
-                            "Built build-side of hash join containing {} rows in {} ms",
-                            num_rows,
-                            start.elapsed().as_millis()
-                        );
-
-                            left_side
-                        }
-                    }
-                }
-                PartitionMode::Partitioned => {
-                    let start = Instant::now();
-
-                    // Load 1 partition of left side in memory
-                    let stream = self.left.execute(partition).await?;
-
-                    // This operation performs 2 steps at once:
-                    // 1. creates a [JoinHashMap] of all batches from the stream
-                    // 2. stores the batches in a vector.
-                    let initial = (
-                        JoinHashMap::with_hasher(IdHashBuilder {}),
-                        Vec::new(),
-                        0,
-                        Vec::new(),
-                    );
-                    let (hashmap, batches, num_rows, _) = stream
-                        .try_fold(initial, |mut acc, batch| async {
-                            let hash = &mut acc.0;
-                            let values = &mut acc.1;
-                            let offset = acc.2;
-                            acc.3.clear();
-                            acc.3.resize(batch.num_rows(), 0);
-                            update_hash(
-                                &on_left,
-                                &batch,
-                                hash,
-                                offset,
-                                &self.random_state,
-                                &mut acc.3,
-                            )
-                            .unwrap();
-                            acc.2 += batch.num_rows();
-                            values.push(batch);
-                            Ok(acc)
-                        })
-                        .await?;
-
-                    // Merge all batches into a single batch, so we
-                    // can directly index into the arrays
-                    let single_batch =
-                        concat_batches(&self.left.schema(), &batches, num_rows)?;
-
-                    let left_side = Arc::new((hashmap, single_batch));
-
-                    debug!(
-                        "Built build-side {} of hash join containing {} rows in {} ms",
-                        partition,
-                        num_rows,
-                        start.elapsed().as_millis()
-                    );
-
-                    left_side
-                }
-            }
-        };
-
-        // we have the batches and the hash map with their keys. We can how create a stream
-        // over the right that uses this information to issue new batches.
-
-        let stream = self.right.execute(partition).await?;
-        let on_right = self.on.iter().map(|on| on.1.clone()).collect::<Vec<_>>();
-
-        let column_indices = self.column_indices_from_schema()?;
-        Ok(Box::pin(HashJoinStream {
-            schema: self.schema.clone(),
-            on_left,
-            on_right,
-            join_type: self.join_type,
-            left_data,
-            right: stream,
-            column_indices,
-            num_input_batches: 0,
-            num_input_rows: 0,
-            num_output_batches: 0,
-            num_output_rows: 0,
-            join_time: 0,
-            random_state: self.random_state.clone(),
-        }))
-    }
-}
-
-/// Updates `hash` with new entries from [RecordBatch] evaluated against the expressions `on`,
-/// assuming that the [RecordBatch] corresponds to the `index`th
-fn update_hash(
-    on: &[String],
-    batch: &RecordBatch,
-    hash: &mut JoinHashMap,
-    offset: usize,
-    random_state: &RandomState,
-    hashes_buffer: &mut Vec<u64>,
-) -> Result<()> {
-    // evaluate the keys
-    let keys_values = on
-        .iter()
-        .map(|name| Ok(col(name).evaluate(batch)?.into_array(batch.num_rows())))
-        .collect::<Result<Vec<_>>>()?;
-
-    // update the hash map
-    let hash_values = create_hashes(&keys_values, &random_state, hashes_buffer)?;
-
-    // insert hashes to key of the hashmap
-    for (row, hash_value) in hash_values.iter().enumerate() {
-        hash.raw_entry_mut()
-            .from_key_hashed_nocheck(*hash_value, hash_value)
-            .and_modify(|_, v| v.push((row + offset) as u64))
-            .or_insert_with(|| (*hash_value, smallvec![(row + offset) as u64]));
-    }
-    Ok(())
-}
-
-/// A stream that issues [RecordBatch]es as they arrive from the right  of the join.
-struct HashJoinStream {
-    /// Input schema
-    schema: Arc<Schema>,
-    /// columns from the left
-    on_left: Vec<String>,
-    /// columns from the right used to compute the hash
-    on_right: Vec<String>,
-    /// type of the join
-    join_type: JoinType,
-    /// information from the left
-    left_data: JoinLeftData,
-    /// right
-    right: SendableRecordBatchStream,
-    /// Information of index and left / right placement of columns
-    column_indices: Vec<ColumnIndex>,
-    /// number of input batches
-    num_input_batches: usize,
-    /// number of input rows
-    num_input_rows: usize,
-    /// number of batches produced
-    num_output_batches: usize,
-    /// number of rows produced
-    num_output_rows: usize,
-    /// total time for joining probe-side batches to the build-side batches
-    join_time: usize,
-    /// Random state used for hashing initialization
-    random_state: RandomState,
-}
-
-impl RecordBatchStream for HashJoinStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-/// Returns a new [RecordBatch] by combining the `left` and `right` according to `indices`.
-/// The resulting batch has [Schema] `schema`.
-/// # Error
-/// This function errors when:
-/// *
-fn build_batch_from_indices(
-    schema: &Schema,
-    left: &RecordBatch,
-    right: &RecordBatch,
-    left_indices: UInt64Array,
-    right_indices: UInt32Array,
-    column_indices: &[ColumnIndex],
-) -> ArrowResult<RecordBatch> {
-    // build the columns of the new [RecordBatch]:
-    // 1. pick whether the column is from the left or right
-    // 2. based on the pick, `take` items from the different RecordBatches
-    let mut columns: Vec<Arc<dyn Array>> = Vec::with_capacity(schema.fields().len());
-
-    for column_index in column_indices {
-        let array = if column_index.is_left {
-            let array = left.column(column_index.index);
-            compute::take(array.as_ref(), &left_indices, None)?
-        } else {
-            let array = right.column(column_index.index);
-            compute::take(array.as_ref(), &right_indices, None)?
-        };
-        columns.push(array);
-    }
-    RecordBatch::try_new(Arc::new(schema.clone()), columns)
-}
-
-#[allow(clippy::too_many_arguments)]
-fn build_batch(
-    batch: &RecordBatch,
-    left_data: &JoinLeftData,
-    on_left: &[String],
-    on_right: &[String],
-    join_type: JoinType,
-    schema: &Schema,
-    column_indices: &[ColumnIndex],
-    random_state: &RandomState,
-) -> ArrowResult<RecordBatch> {
-    let (left_indices, right_indices) = build_join_indexes(
-        &left_data,
-        &batch,
-        join_type,
-        on_left,
-        on_right,
-        random_state,
-    )
-    .unwrap();
-
-    build_batch_from_indices(
-        schema,
-        &left_data.1,
-        batch,
-        left_indices,
-        right_indices,
-        column_indices,
-    )
-}
-
-/// returns a vector with (index from left, index from right).
-/// The size of this vector corresponds to the total size of a joined batch
-// For a join on column A:
-// left       right
-//     batch 1
-// A B         A D
-// ---------------
-// 1 a         3 6
-// 2 b         1 2
-// 3 c         2 4
-//     batch 2
-// A B         A D
-// ---------------
-// 1 a         5 10
-// 2 b         2 2
-// 4 d         1 1
-// indices (batch, batch_row)
-// left       right
-// (0, 2)     (0, 0)
-// (0, 0)     (0, 1)
-// (0, 1)     (0, 2)
-// (1, 0)     (0, 1)
-// (1, 1)     (0, 2)
-// (0, 1)     (1, 1)
-// (0, 0)     (1, 2)
-// (1, 1)     (1, 1)
-// (1, 0)     (1, 2)
-fn build_join_indexes(
-    left_data: &JoinLeftData,
-    right: &RecordBatch,
-    join_type: JoinType,
-    left_on: &[String],
-    right_on: &[String],
-    random_state: &RandomState,
-) -> Result<(UInt64Array, UInt32Array)> {
-    let keys_values = right_on
-        .iter()
-        .map(|name| Ok(col(name).evaluate(right)?.into_array(right.num_rows())))
-        .collect::<Result<Vec<_>>>()?;
-    let left_join_values = left_on
-        .iter()
-        .map(|name| {
-            Ok(col(name)
-                .evaluate(&left_data.1)?
-                .into_array(left_data.1.num_rows()))
-        })
-        .collect::<Result<Vec<_>>>()?;
-    let hashes_buffer = &mut vec![0; keys_values[0].len()];
-    let hash_values = create_hashes(&keys_values, &random_state, hashes_buffer)?;
-    let left = &left_data.0;
-
-    match join_type {
-        JoinType::Inner => {
-            // Using a buffer builder to avoid slower normal builder
-            let mut left_indices = UInt64BufferBuilder::new(0);
-            let mut right_indices = UInt32BufferBuilder::new(0);
-
-            // Visit all of the right rows
-            for (row, hash_value) in hash_values.iter().enumerate() {
-                // Get the hash and find it in the build index
-
-                // For every item on the left and right we check if it matches
-                // This possibly contains rows with hash collisions,
-                // So we have to check here whether rows are equal or not
-                if let Some(indices) = left.get(hash_value) {
-                    for &i in indices {
-                        // Check hash collisions
-                        if equal_rows(i as usize, row, &left_join_values, &keys_values)? {
-                            left_indices.append(i);
-                            right_indices.append(row as u32);
-                        }
-                    }
-                }
-            }
-            let left = ArrayData::builder(DataType::UInt64)
-                .len(left_indices.len())
-                .add_buffer(left_indices.finish())
-                .build();
-            let right = ArrayData::builder(DataType::UInt32)
-                .len(right_indices.len())
-                .add_buffer(right_indices.finish())
-                .build();
-
-            Ok((
-                PrimitiveArray::<UInt64Type>::from(left),
-                PrimitiveArray::<UInt32Type>::from(right),
-            ))
-        }
-        JoinType::Left => {
-            let mut left_indices = UInt64Builder::new(0);
-            let mut right_indices = UInt32Builder::new(0);
-
-            // Keep track of which item is visited in the build input
-            // TODO: this can be stored more efficiently with a marker
-            //       https://issues.apache.org/jira/browse/ARROW-11116
-            // TODO: Fix LEFT join with multiple right batches
-            //       https://issues.apache.org/jira/browse/ARROW-10971
-            let mut is_visited = HashSet::new();
-
-            // First visit all of the rows
-            for (row, hash_value) in hash_values.iter().enumerate() {
-                if let Some(indices) = left.get(hash_value) {
-                    for &i in indices {
-                        // Collision check
-                        if equal_rows(i as usize, row, &left_join_values, &keys_values)? {
-                            left_indices.append_value(i)?;
-                            right_indices.append_value(row as u32)?;
-                            is_visited.insert(i);
-                        }
-                    }
-                };
-            }
-            // Add the remaining left rows to the result set with None on the right side
-            for (_, indices) in left {
-                for i in indices.iter() {
-                    if !is_visited.contains(i) {
-                        left_indices.append_slice(&indices)?;
-                        right_indices.append_null()?;
-                    }
-                }
-            }
-            Ok((left_indices.finish(), right_indices.finish()))
-        }
-        JoinType::Right => {
-            let mut left_indices = UInt64Builder::new(0);
-            let mut right_indices = UInt32Builder::new(0);
-
-            for (row, hash_value) in hash_values.iter().enumerate() {
-                match left.get(hash_value) {
-                    Some(indices) => {
-                        for &i in indices {
-                            if equal_rows(
-                                i as usize,
-                                row,
-                                &left_join_values,
-                                &keys_values,
-                            )? {
-                                left_indices.append_value(i)?;
-                                right_indices.append_value(row as u32)?;
-                            }
-                        }
-                    }
-                    None => {
-                        // when no match, add the row with None for the left side
-                        left_indices.append_null()?;
-                        right_indices.append_value(row as u32)?;
-                    }
-                }
-            }
-            Ok((left_indices.finish(), right_indices.finish()))
-        }
-    }
-}
-use core::hash::BuildHasher;
-
-/// `Hasher` that returns the same `u64` value as a hash, to avoid re-hashing
-/// it when inserting/indexing or regrowing the `HashMap`
-struct IdHasher {
-    hash: u64,
-}
-
-impl Hasher for IdHasher {
-    fn finish(&self) -> u64 {
-        self.hash
-    }
-
-    fn write_u64(&mut self, i: u64) {
-        self.hash = i;
-    }
-
-    fn write(&mut self, _bytes: &[u8]) {
-        unreachable!("IdHasher should only be used for u64 keys")
-    }
-}
-
-#[derive(Debug)]
-struct IdHashBuilder {}
-
-impl BuildHasher for IdHashBuilder {
-    type Hasher = IdHasher;
-
-    fn build_hasher(&self) -> Self::Hasher {
-        IdHasher { hash: 0 }
-    }
-}
-
-// Combines two hashes into one hash
-fn combine_hashes(l: u64, r: u64) -> u64 {
-    let hash = (17 * 37u64).wrapping_add(l);
-    hash.wrapping_mul(37).wrapping_add(r)
-}
-
-macro_rules! equal_rows_elem {
-    ($array_type:ident, $l: ident, $r: ident, $left: ident, $right: ident) => {{
-        let left_array = $l.as_any().downcast_ref::<$array_type>().unwrap();
-        let right_array = $r.as_any().downcast_ref::<$array_type>().unwrap();
-
-        match (left_array.is_null($left), left_array.is_null($right)) {
-            (true, true) => true,
-            (false, false) => left_array.value($left) == right_array.value($right),
-            _ => false,
-        }
-    }};
-}
-
-/// Left and right row have equal values
-fn equal_rows(
-    left: usize,
-    right: usize,
-    left_arrays: &[ArrayRef],
-    right_arrays: &[ArrayRef],
-) -> Result<bool> {
-    let mut err = None;
-    let res = left_arrays
-        .iter()
-        .zip(right_arrays)
-        .all(|(l, r)| match l.data_type() {
-            DataType::Null => true,
-            DataType::Boolean => equal_rows_elem!(BooleanArray, l, r, left, right),
-            DataType::Int8 => equal_rows_elem!(Int8Array, l, r, left, right),
-            DataType::Int16 => equal_rows_elem!(Int16Array, l, r, left, right),
-            DataType::Int32 => equal_rows_elem!(Int32Array, l, r, left, right),
-            DataType::Int64 => equal_rows_elem!(Int64Array, l, r, left, right),
-            DataType::UInt8 => equal_rows_elem!(UInt8Array, l, r, left, right),
-            DataType::UInt16 => equal_rows_elem!(UInt16Array, l, r, left, right),
-            DataType::UInt32 => equal_rows_elem!(UInt32Array, l, r, left, right),
-            DataType::UInt64 => equal_rows_elem!(UInt64Array, l, r, left, right),
-            DataType::Timestamp(_, None) => {
-                equal_rows_elem!(Int64Array, l, r, left, right)
-            }
-            DataType::Utf8 => equal_rows_elem!(StringArray, l, r, left, right),
-            DataType::LargeUtf8 => equal_rows_elem!(LargeStringArray, l, r, left, right),
-            _ => {
-                // This is internal because we should have caught this before.
-                err = Some(Err(DataFusionError::Internal(
-                    "Unsupported data type in hasher".to_string(),
-                )));
-                false
-            }
-        });
-
-    err.unwrap_or(Ok(res))
-}
-
-macro_rules! hash_array {
-    ($array_type:ident, $column: ident, $ty: ident, $hashes: ident, $random_state: ident) => {
-        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
-        if array.null_count() == 0 {
-            for (i, hash) in $hashes.iter_mut().enumerate() {
-                *hash =
-                    combine_hashes($ty::get_hash(&array.value(i), $random_state), *hash);
-            }
-        } else {
-            for (i, hash) in $hashes.iter_mut().enumerate() {
-                if !array.is_null(i) {
-                    *hash = combine_hashes(
-                        $ty::get_hash(&array.value(i), $random_state),
-                        *hash,
-                    );
-                }
-            }
-        }
-    };
-}
-
-/// Creates hash values for every element in the row based on the values in the columns
-pub fn create_hashes<'a>(
-    arrays: &[ArrayRef],
-    random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for col in arrays {
-        match col.data_type() {
-            DataType::UInt8 => {
-                hash_array!(UInt8Array, col, u8, hashes_buffer, random_state);
-            }
-            DataType::UInt16 => {
-                hash_array!(UInt16Array, col, u16, hashes_buffer, random_state);
-            }
-            DataType::UInt32 => {
-                hash_array!(UInt32Array, col, u32, hashes_buffer, random_state);
-            }
-            DataType::UInt64 => {
-                hash_array!(UInt64Array, col, u64, hashes_buffer, random_state);
-            }
-            DataType::Int8 => {
-                hash_array!(Int8Array, col, i8, hashes_buffer, random_state);
-            }
-            DataType::Int16 => {
-                hash_array!(Int16Array, col, i16, hashes_buffer, random_state);
-            }
-            DataType::Int32 => {
-                hash_array!(Int32Array, col, i32, hashes_buffer, random_state);
-            }
-            DataType::Int64 => {
-                hash_array!(Int64Array, col, i64, hashes_buffer, random_state);
-            }
-            DataType::Timestamp(TimeUnit::Microsecond, None) => {
-                hash_array!(
-                    TimestampMicrosecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state
-                );
-            }
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                hash_array!(
-                    TimestampNanosecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state
-                );
-            }
-            DataType::Boolean => {
-                hash_array!(BooleanArray, col, u8, hashes_buffer, random_state);
-            }
-            DataType::Utf8 => {
-                hash_array!(StringArray, col, str, hashes_buffer, random_state);
-            }
-            _ => {
-                // This is internal because we should have caught this before.
-                return Err(DataFusionError::Internal(
-                    "Unsupported data type in hasher".to_string(),
-                ));
-            }
-        }
-    }
-    Ok(hashes_buffer)
-}
-
-impl Stream for HashJoinStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Option<Self::Item>> {
-        self.right
-            .poll_next_unpin(cx)
-            .map(|maybe_batch| match maybe_batch {
-                Some(Ok(batch)) => {
-                    let start = Instant::now();
-                    let result = build_batch(
-                        &batch,
-                        &self.left_data,
-                        &self.on_left,
-                        &self.on_right,
-                        self.join_type,
-                        &self.schema,
-                        &self.column_indices,
-                        &self.random_state,
-                    );
-                    self.num_input_batches += 1;
-                    self.num_input_rows += batch.num_rows();
-                    if let Ok(ref batch) = result {
-                        self.join_time += start.elapsed().as_millis() as usize;
-                        self.num_output_batches += 1;
-                        self.num_output_rows += batch.num_rows();
-                    }
-                    Some(result)
-                }
-                other => {
-                    debug!(
-                        "Processed {} probe-side input batches containing {} rows and \
-                        produced {} output batches containing {} rows in {} ms",
-                        self.num_input_batches,
-                        self.num_input_rows,
-                        self.num_output_batches,
-                        self.num_output_rows,
-                        self.join_time
-                    );
-                    other
-                }
-            })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{
-        assert_batches_sorted_eq,
-        physical_plan::{common, memory::MemoryExec},
-        test::{build_table_i32, columns},
-    };
-
-    use super::*;
-    use std::sync::Arc;
-
-    fn build_table(
-        a: (&str, &Vec<i32>),
-        b: (&str, &Vec<i32>),
-        c: (&str, &Vec<i32>),
-    ) -> Arc<dyn ExecutionPlan> {
-        let batch = build_table_i32(a, b, c);
-        let schema = batch.schema();
-        Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap())
-    }
-
-    fn join(
-        left: Arc<dyn ExecutionPlan>,
-        right: Arc<dyn ExecutionPlan>,
-        on: &[(&str, &str)],
-        join_type: &JoinType,
-    ) -> Result<HashJoinExec> {
-        let on: Vec<_> = on
-            .iter()
-            .map(|(l, r)| (l.to_string(), r.to_string()))
-            .collect();
-        HashJoinExec::try_new(left, right, &on, join_type, PartitionMode::CollectLeft)
-    }
-
-    #[tokio::test]
-    async fn join_inner_one() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 5]), // this has a repetition
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b1", &vec![4, 5, 6]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 70 |",
-            "| 2  | 5  | 8  | 20 | 80 |",
-            "| 3  | 5  | 9  | 20 | 80 |",
-            "+----+----+----+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_inner_one_no_shared_column_names() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 5]), // this has a repetition
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b2", &vec![4, 5, 6]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b2")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "b2", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | b2 | c2 |",
-            "+----+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 4  | 70 |",
-            "| 2  | 5  | 8  | 20 | 5  | 80 |",
-            "| 3  | 5  | 9  | 20 | 5  | 80 |",
-            "+----+----+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_inner_two() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 2]),
-            ("b2", &vec![1, 2, 2]),
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b2", &vec![1, 2, 2]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("a1", "a1"), ("b2", "b2")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b2", "c1", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-
-        let expected = vec![
-            "+----+----+----+----+",
-            "| a1 | b2 | c1 | c2 |",
-            "+----+----+----+----+",
-            "| 1  | 1  | 7  | 70 |",
-            "| 2  | 2  | 8  | 80 |",
-            "| 2  | 2  | 9  | 80 |",
-            "+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    /// Test where the left has 2 parts, the right with 1 part => 1 part
-    #[tokio::test]
-    async fn join_inner_one_two_parts_left() -> Result<()> {
-        let batch1 = build_table_i32(
-            ("a1", &vec![1, 2]),
-            ("b2", &vec![1, 2]),
-            ("c1", &vec![7, 8]),
-        );
-        let batch2 =
-            build_table_i32(("a1", &vec![2]), ("b2", &vec![2]), ("c1", &vec![9]));
-        let schema = batch1.schema();
-        let left = Arc::new(
-            MemoryExec::try_new(&[vec![batch1], vec![batch2]], schema, None).unwrap(),
-        );
-
-        let right = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b2", &vec![1, 2, 2]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("a1", "a1"), ("b2", "b2")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b2", "c1", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-
-        let expected = vec![
-            "+----+----+----+----+",
-            "| a1 | b2 | c1 | c2 |",
-            "+----+----+----+----+",
-            "| 1  | 1  | 7  | 70 |",
-            "| 2  | 2  | 8  | 80 |",
-            "| 2  | 2  | 9  | 80 |",
-            "+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    /// Test where the left has 1 part, the right has 2 parts => 2 parts
-    #[tokio::test]
-    async fn join_inner_one_two_parts_right() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 5]), // this has a repetition
-            ("c1", &vec![7, 8, 9]),
-        );
-
-        let batch1 = build_table_i32(
-            ("a2", &vec![10, 20]),
-            ("b1", &vec![4, 6]),
-            ("c2", &vec![70, 80]),
-        );
-        let batch2 =
-            build_table_i32(("a2", &vec![30]), ("b1", &vec![5]), ("c2", &vec![90]));
-        let schema = batch1.schema();
-        let right = Arc::new(
-            MemoryExec::try_new(&[vec![batch1], vec![batch2]], schema, None).unwrap(),
-        );
-
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Inner)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "c2"]);
-
-        // first part
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 70 |",
-            "+----+----+----+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &batches);
-
-        // second part
-        let stream = join.execute(1).await?;
-        let batches = common::collect(stream).await?;
-        assert_eq!(batches.len(), 1);
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 2  | 5  | 8  | 30 | 90 |",
-            "| 3  | 5  | 9  | 30 | 90 |",
-            "+----+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_left_one() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 7]), // 7 does not exist on the right
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b1", &vec![4, 5, 6]),
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Left)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "b1", "c1", "a2", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | b1 | c1 | a2 | c2 |",
-            "+----+----+----+----+----+",
-            "| 1  | 4  | 7  | 10 | 70 |",
-            "| 2  | 5  | 8  | 20 | 80 |",
-            "| 3  | 7  | 9  |    |    |",
-            "+----+----+----+----+----+",
-        ];
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn join_right_one() -> Result<()> {
-        let left = build_table(
-            ("a1", &vec![1, 2, 3]),
-            ("b1", &vec![4, 5, 7]),
-            ("c1", &vec![7, 8, 9]),
-        );
-        let right = build_table(
-            ("a2", &vec![10, 20, 30]),
-            ("b1", &vec![4, 5, 6]), // 6 does not exist on the left
-            ("c2", &vec![70, 80, 90]),
-        );
-        let on = &[("b1", "b1")];
-
-        let join = join(left, right, on, &JoinType::Right)?;
-
-        let columns = columns(&join.schema());
-        assert_eq!(columns, vec!["a1", "c1", "a2", "b1", "c2"]);
-
-        let stream = join.execute(0).await?;
-        let batches = common::collect(stream).await?;
-
-        let expected = vec![
-            "+----+----+----+----+----+",
-            "| a1 | c1 | a2 | b1 | c2 |",
-            "+----+----+----+----+----+",
-            "|    |    | 30 | 6  | 90 |",
-            "| 1  | 7  | 10 | 4  | 70 |",
-            "| 2  | 8  | 20 | 5  | 80 |",
-            "+----+----+----+----+----+",
-        ];
-
-        assert_batches_sorted_eq!(expected, &batches);
-
-        Ok(())
-    }
-
-    #[test]
-    fn join_with_hash_collision() -> Result<()> {
-        let mut hashmap_left = HashMap::with_hasher(IdHashBuilder {});
-        let left = build_table_i32(
-            ("a", &vec![10, 20]),
-            ("x", &vec![100, 200]),
-            ("y", &vec![200, 300]),
-        );
-
-        let random_state = RandomState::new();
-        let hashes_buff = &mut vec![0; left.num_rows()];
-        let hashes =
-            create_hashes(&[left.columns()[0].clone()], &random_state, hashes_buff)?;
-
-        // Create hash collisions
-        hashmap_left.insert(hashes[0], smallvec![0, 1]);
-        hashmap_left.insert(hashes[1], smallvec![0, 1]);
-
-        let right = build_table_i32(
-            ("a", &vec![10, 20]),
-            ("b", &vec![0, 0]),
-            ("c", &vec![30, 40]),
-        );
-
-        let left_data = JoinLeftData::new((hashmap_left, left));
-        let (l, r) = build_join_indexes(
-            &left_data,
-            &right,
-            JoinType::Inner,
-            &["a".to_string()],
-            &["a".to_string()],
-            &random_state,
-        )?;
-
-        let mut left_ids = UInt64Builder::new(0);
-        left_ids.append_value(0)?;
-        left_ids.append_value(1)?;
-
-        let mut right_ids = UInt32Builder::new(0);
-
-        right_ids.append_value(0)?;
-        right_ids.append_value(1)?;
-
-        assert_eq!(left_ids.finish(), l);
-
-        assert_eq!(right_ids.finish(), r);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/hash_utils.rs b/rust/datafusion/src/physical_plan/hash_utils.rs
deleted file mode 100644
index b26ff9bb5fc..00000000000
--- a/rust/datafusion/src/physical_plan/hash_utils.rs
+++ /dev/null
@@ -1,201 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Functionality used both on logical and physical plans
-
-use crate::error::{DataFusionError, Result};
-use arrow::datatypes::{Field, Schema};
-use std::collections::HashSet;
-
-/// All valid types of joins.
-#[derive(Clone, Copy, Debug)]
-pub enum JoinType {
-    /// Inner join
-    Inner,
-    /// Left
-    Left,
-    /// Right
-    Right,
-}
-
-/// The on clause of the join, as vector of (left, right) columns.
-pub type JoinOn = [(String, String)];
-
-/// Checks whether the schemas "left" and "right" and columns "on" represent a valid join.
-/// They are valid whenever their columns' intersection equals the set `on`
-pub fn check_join_is_valid(left: &Schema, right: &Schema, on: &JoinOn) -> Result<()> {
-    let left: HashSet<String> = left.fields().iter().map(|f| f.name().clone()).collect();
-    let right: HashSet<String> =
-        right.fields().iter().map(|f| f.name().clone()).collect();
-
-    check_join_set_is_valid(&left, &right, on)
-}
-
-/// Checks whether the sets left, right and on compose a valid join.
-/// They are valid whenever their intersection equals the set `on`
-fn check_join_set_is_valid(
-    left: &HashSet<String>,
-    right: &HashSet<String>,
-    on: &JoinOn,
-) -> Result<()> {
-    if on.is_empty() {
-        return Err(DataFusionError::Plan(
-            "The 'on' clause of a join cannot be empty".to_string(),
-        ));
-    }
-    let on_left = &on.iter().map(|on| on.0.to_string()).collect::<HashSet<_>>();
-    let left_missing = on_left.difference(left).collect::<HashSet<_>>();
-
-    let on_right = &on.iter().map(|on| on.1.to_string()).collect::<HashSet<_>>();
-    let right_missing = on_right.difference(right).collect::<HashSet<_>>();
-
-    if !left_missing.is_empty() | !right_missing.is_empty() {
-        return Err(DataFusionError::Plan(format!(
-                "The left or right side of the join does not have all columns on \"on\": \nMissing on the left: {:?}\nMissing on the right: {:?}",
-                left_missing,
-                right_missing,
-            )));
-    };
-
-    let remaining = right
-        .difference(on_right)
-        .cloned()
-        .collect::<HashSet<String>>();
-
-    let collisions = left.intersection(&remaining).collect::<HashSet<_>>();
-
-    if !collisions.is_empty() {
-        return Err(DataFusionError::Plan(format!(
-                "The left schema and the right schema have the following columns with the same name without being on the ON statement: {:?}. Consider aliasing them.",
-                collisions,
-            )));
-    };
-
-    Ok(())
-}
-
-/// Creates a schema for a join operation.
-/// The fields from the left side are first
-pub fn build_join_schema(
-    left: &Schema,
-    right: &Schema,
-    on: &JoinOn,
-    join_type: &JoinType,
-) -> Schema {
-    let fields: Vec<Field> = match join_type {
-        JoinType::Inner | JoinType::Left => {
-            // remove right-side join keys if they have the same names as the left-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left.fields().iter();
-
-            let right_fields = right
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-        JoinType::Right => {
-            // remove left-side join keys if they have the same names as the right-side
-            let duplicate_keys = &on
-                .iter()
-                .filter(|(l, r)| l == r)
-                .map(|on| on.1.to_string())
-                .collect::<HashSet<_>>();
-
-            let left_fields = left
-                .fields()
-                .iter()
-                .filter(|f| !duplicate_keys.contains(f.name()));
-
-            let right_fields = right.fields().iter();
-
-            // left then right
-            left_fields.chain(right_fields).cloned().collect()
-        }
-    };
-    Schema::new(fields)
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-
-    fn check(left: &[&str], right: &[&str], on: &[(&str, &str)]) -> Result<()> {
-        let left = left.iter().map(|x| x.to_string()).collect::<HashSet<_>>();
-        let right = right.iter().map(|x| x.to_string()).collect::<HashSet<_>>();
-        let on: Vec<_> = on
-            .iter()
-            .map(|(l, r)| (l.to_string(), r.to_string()))
-            .collect();
-        check_join_set_is_valid(&left, &right, &on)
-    }
-
-    #[test]
-    fn check_valid() -> Result<()> {
-        let left = vec!["a", "b1"];
-        let right = vec!["a", "b2"];
-        let on = &[("a", "a")];
-
-        check(&left, &right, on)?;
-        Ok(())
-    }
-
-    #[test]
-    fn check_not_in_right() {
-        let left = vec!["a", "b"];
-        let right = vec!["b"];
-        let on = &[("a", "a")];
-
-        assert!(check(&left, &right, on).is_err());
-    }
-
-    #[test]
-    fn check_not_in_left() {
-        let left = vec!["b"];
-        let right = vec!["a"];
-        let on = &[("a", "a")];
-
-        assert!(check(&left, &right, on).is_err());
-    }
-
-    #[test]
-    fn check_collision() {
-        // column "a" would appear both in left and right
-        let left = vec!["a", "c"];
-        let right = vec!["a", "b"];
-        let on = &[("a", "b")];
-
-        assert!(check(&left, &right, on).is_err());
-    }
-
-    #[test]
-    fn check_in_right() {
-        let left = vec!["a", "c"];
-        let right = vec!["b"];
-        let on = &[("a", "b")];
-
-        assert!(check(&left, &right, on).is_ok());
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/limit.rs b/rust/datafusion/src/physical_plan/limit.rs
deleted file mode 100644
index c091196483f..00000000000
--- a/rust/datafusion/src/physical_plan/limit.rs
+++ /dev/null
@@ -1,338 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the LIMIT plan
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use futures::stream::Stream;
-use futures::stream::StreamExt;
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
-use arrow::array::ArrayRef;
-use arrow::compute::limit;
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-
-use async_trait::async_trait;
-
-/// Limit execution plan
-#[derive(Debug)]
-pub struct GlobalLimitExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Maximum number of rows to return
-    limit: usize,
-}
-
-impl GlobalLimitExec {
-    /// Create a new MergeExec
-    pub fn new(input: Arc<dyn ExecutionPlan>, limit: usize) -> Self {
-        GlobalLimitExec { input, limit }
-    }
-
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Maximum number of rows to return
-    pub fn limit(&self) -> usize {
-        self.limit
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for GlobalLimitExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(GlobalLimitExec::new(
-                children[0].clone(),
-                self.limit,
-            ))),
-            _ => Err(DataFusionError::Internal(
-                "GlobalLimitExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // GlobalLimitExec has a single output partition
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "GlobalLimitExec invalid partition {}",
-                partition
-            )));
-        }
-
-        // GlobalLimitExec requires a single input partition
-        if 1 != self.input.output_partitioning().partition_count() {
-            return Err(DataFusionError::Internal(
-                "GlobalLimitExec requires a single input partition".to_owned(),
-            ));
-        }
-
-        let stream = self.input.execute(0).await?;
-        Ok(Box::pin(LimitStream::new(stream, self.limit)))
-    }
-}
-
-/// LocalLimitExec applies a limit to a single partition
-#[derive(Debug)]
-pub struct LocalLimitExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Maximum number of rows to return
-    limit: usize,
-}
-
-impl LocalLimitExec {
-    /// Create a new LocalLimitExec partition
-    pub fn new(input: Arc<dyn ExecutionPlan>, limit: usize) -> Self {
-        Self { input, limit }
-    }
-
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Maximum number of rows to return
-    pub fn limit(&self) -> usize {
-        self.limit
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for LocalLimitExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(LocalLimitExec::new(
-                children[0].clone(),
-                self.limit,
-            ))),
-            _ => Err(DataFusionError::Internal(
-                "LocalLimitExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        let stream = self.input.execute(partition).await?;
-        Ok(Box::pin(LimitStream::new(stream, self.limit)))
-    }
-}
-
-/// Truncate a RecordBatch to maximum of n rows
-pub fn truncate_batch(batch: &RecordBatch, n: usize) -> RecordBatch {
-    let limited_columns: Vec<ArrayRef> = (0..batch.num_columns())
-        .map(|i| limit(batch.column(i), n))
-        .collect();
-
-    RecordBatch::try_new(batch.schema(), limited_columns).unwrap()
-}
-
-/// A Limit stream limits the stream to up to `limit` rows.
-struct LimitStream {
-    /// The maximum number of rows to produce
-    limit: usize,
-    /// The input to read from. This is set to None once the limit is
-    /// reached to enable early termination
-    input: Option<SendableRecordBatchStream>,
-    /// Copy of the input schema
-    schema: SchemaRef,
-    // the current number of rows which have been produced
-    current_len: usize,
-}
-
-impl LimitStream {
-    fn new(input: SendableRecordBatchStream, limit: usize) -> Self {
-        let schema = input.schema();
-        Self {
-            limit,
-            input: Some(input),
-            schema,
-            current_len: 0,
-        }
-    }
-
-    fn stream_limit(&mut self, batch: RecordBatch) -> Option<RecordBatch> {
-        if self.current_len == self.limit {
-            self.input = None; // clear input so it can be dropped early
-            None
-        } else if self.current_len + batch.num_rows() <= self.limit {
-            self.current_len += batch.num_rows();
-            Some(batch)
-        } else {
-            let batch_rows = self.limit - self.current_len;
-            self.current_len = self.limit;
-            self.input = None; // clear input so it can be dropped early
-            Some(truncate_batch(&batch, batch_rows))
-        }
-    }
-}
-
-impl Stream for LimitStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        match &mut self.input {
-            Some(input) => input.poll_next_unpin(cx).map(|x| match x {
-                Some(Ok(batch)) => Ok(self.stream_limit(batch)).transpose(),
-                other => other,
-            }),
-            // input has been cleared
-            None => Poll::Ready(None),
-        }
-    }
-}
-
-impl RecordBatchStream for LimitStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use common::collect;
-
-    use super::*;
-    use crate::physical_plan::common;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::physical_plan::merge::MergeExec;
-    use crate::test;
-
-    #[tokio::test]
-    async fn limit() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let num_partitions = 4;
-        let path =
-            test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        // input should have 4 partitions
-        assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
-
-        let limit = GlobalLimitExec::new(Arc::new(MergeExec::new(Arc::new(csv))), 7);
-
-        // the result should contain 4 batches (one per input partition)
-        let iter = limit.execute(0).await?;
-        let batches = common::collect(iter).await?;
-
-        // there should be a total of 100 rows
-        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(row_count, 7);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn limit_early_shutdown() -> Result<()> {
-        let batches = vec![
-            test::make_partition(5),
-            test::make_partition(10),
-            test::make_partition(15),
-            test::make_partition(20),
-            test::make_partition(25),
-        ];
-        let input = test::exec::TestStream::new(batches);
-
-        let index = input.index();
-        assert_eq!(index.value(), 0);
-
-        // limit of six needs to consume the entire first record batch
-        // (5 rows) and 1 row from the second (1 row)
-        let limit_stream = LimitStream::new(Box::pin(input), 6);
-        assert_eq!(index.value(), 0);
-
-        let results = collect(Box::pin(limit_stream)).await.unwrap();
-        let num_rows: usize = results.into_iter().map(|b| b.num_rows()).sum();
-        // Only 6 rows should have been produced
-        assert_eq!(num_rows, 6);
-
-        // Only the first two batches should be consumed
-        assert_eq!(index.value(), 2);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/math_expressions.rs b/rust/datafusion/src/physical_plan/math_expressions.rs
deleted file mode 100644
index 382a15f8ccf..00000000000
--- a/rust/datafusion/src/physical_plan/math_expressions.rs
+++ /dev/null
@@ -1,118 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Math expressions
-
-use arrow::array::{make_array, Array, ArrayData, Float32Array, Float64Array};
-use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType, ToByteSlice};
-
-use super::{ColumnarValue, ScalarValue};
-use crate::error::{DataFusionError, Result};
-
-macro_rules! compute_op {
-    ($ARRAY:expr, $FUNC:ident, $TYPE:ident) => {{
-        let len = $ARRAY.len();
-        let result = (0..len)
-            .map(|i| $ARRAY.value(i).$FUNC() as f64)
-            .collect::<Vec<f64>>();
-        let data = ArrayData::new(
-            DataType::Float64,
-            len,
-            Some($ARRAY.null_count()),
-            $ARRAY.data().null_buffer().cloned(),
-            0,
-            vec![Buffer::from(result.to_byte_slice())],
-            vec![],
-        );
-        Ok(make_array(data))
-    }};
-}
-
-macro_rules! downcast_compute_op {
-    ($ARRAY:expr, $NAME:expr, $FUNC:ident, $TYPE:ident) => {{
-        let n = $ARRAY.as_any().downcast_ref::<$TYPE>();
-        match n {
-            Some(array) => compute_op!(array, $FUNC, $TYPE),
-            _ => Err(DataFusionError::Internal(format!(
-                "Invalid data type for {}",
-                $NAME
-            ))),
-        }
-    }};
-}
-
-macro_rules! unary_primitive_array_op {
-    ($VALUE:expr, $NAME:expr, $FUNC:ident) => {{
-        match ($VALUE) {
-            ColumnarValue::Array(array) => match array.data_type() {
-                DataType::Float32 => {
-                    let result = downcast_compute_op!(array, $NAME, $FUNC, Float32Array);
-                    Ok(ColumnarValue::Array(result?))
-                }
-                DataType::Float64 => {
-                    let result = downcast_compute_op!(array, $NAME, $FUNC, Float64Array);
-                    Ok(ColumnarValue::Array(result?))
-                }
-                other => Err(DataFusionError::Internal(format!(
-                    "Unsupported data type {:?} for function {}",
-                    other, $NAME,
-                ))),
-            },
-            ColumnarValue::Scalar(a) => match a {
-                ScalarValue::Float32(a) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Float64(a.map(|x| x.$FUNC() as f64)),
-                )),
-                ScalarValue::Float64(a) => Ok(ColumnarValue::Scalar(
-                    ScalarValue::Float64(a.map(|x| x.$FUNC())),
-                )),
-                _ => Err(DataFusionError::Internal(format!(
-                    "Unsupported data type {:?} for function {}",
-                    ($VALUE).data_type(),
-                    $NAME,
-                ))),
-            },
-        }
-    }};
-}
-
-macro_rules! math_unary_function {
-    ($NAME:expr, $FUNC:ident) => {
-        /// mathematical function that accepts f32 or f64 and returns f64
-        pub fn $FUNC(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-            unary_primitive_array_op!(&args[0], $NAME, $FUNC)
-        }
-    };
-}
-
-math_unary_function!("sqrt", sqrt);
-math_unary_function!("sin", sin);
-math_unary_function!("cos", cos);
-math_unary_function!("tan", tan);
-math_unary_function!("asin", asin);
-math_unary_function!("acos", acos);
-math_unary_function!("atan", atan);
-math_unary_function!("floor", floor);
-math_unary_function!("ceil", ceil);
-math_unary_function!("round", round);
-math_unary_function!("trunc", trunc);
-math_unary_function!("abs", abs);
-math_unary_function!("signum", signum);
-math_unary_function!("exp", exp);
-math_unary_function!("log", ln);
-math_unary_function!("log2", log2);
-math_unary_function!("log10", log10);
diff --git a/rust/datafusion/src/physical_plan/memory.rs b/rust/datafusion/src/physical_plan/memory.rs
deleted file mode 100644
index bef9bcc62df..00000000000
--- a/rust/datafusion/src/physical_plan/memory.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Execution plan for reading in-memory batches of data
-
-use std::any::Any;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use super::{ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use async_trait::async_trait;
-use futures::Stream;
-
-/// Execution plan for reading in-memory batches of data
-#[derive(Debug)]
-pub struct MemoryExec {
-    /// The partitions to query
-    partitions: Vec<Vec<RecordBatch>>,
-    /// Schema representing the data after the optional projection is applied
-    schema: SchemaRef,
-    /// Optional projection
-    projection: Option<Vec<usize>>,
-}
-
-#[async_trait]
-impl ExecutionPlan for MemoryExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partitions.len())
-    }
-
-    fn with_new_children(
-        &self,
-        _: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Err(DataFusionError::Internal(format!(
-            "Children cannot be replaced in {:?}",
-            self
-        )))
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(MemoryStream::try_new(
-            self.partitions[partition].clone(),
-            self.schema.clone(),
-            self.projection.clone(),
-        )?))
-    }
-}
-
-impl MemoryExec {
-    /// Create a new execution plan for reading in-memory record batches
-    pub fn try_new(
-        partitions: &[Vec<RecordBatch>],
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        Ok(Self {
-            partitions: partitions.to_vec(),
-            schema,
-            projection,
-        })
-    }
-}
-
-/// Iterator over batches
-pub(crate) struct MemoryStream {
-    /// Vector of record batches
-    data: Vec<RecordBatch>,
-    /// Schema representing the data
-    schema: SchemaRef,
-    /// Optional projection for which columns to load
-    projection: Option<Vec<usize>>,
-    /// Index into the data
-    index: usize,
-}
-
-impl MemoryStream {
-    /// Create an iterator for a vector of record batches
-    pub fn try_new(
-        data: Vec<RecordBatch>,
-        schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self> {
-        Ok(Self {
-            data,
-            schema,
-            projection,
-            index: 0,
-        })
-    }
-}
-
-impl Stream for MemoryStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(if self.index < self.data.len() {
-            self.index += 1;
-            let batch = &self.data[self.index - 1];
-            // apply projection
-            match &self.projection {
-                Some(columns) => Some(RecordBatch::try_new(
-                    self.schema.clone(),
-                    columns.iter().map(|i| batch.column(*i).clone()).collect(),
-                )),
-                None => Some(Ok(batch.clone())),
-            }
-        } else {
-            None
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.data.len(), Some(self.data.len()))
-    }
-}
-
-impl RecordBatchStream for MemoryStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/merge.rs b/rust/datafusion/src/physical_plan/merge.rs
deleted file mode 100644
index c66532b73cc..00000000000
--- a/rust/datafusion/src/physical_plan/merge.rs
+++ /dev/null
@@ -1,225 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the merge plan for executing partitions in parallel and then merging the results
-//! into a single partition
-
-use std::any::Any;
-use std::sync::Arc;
-
-use futures::channel::mpsc;
-use futures::sink::SinkExt;
-use futures::stream::StreamExt;
-use futures::Stream;
-
-use async_trait::async_trait;
-
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    datatypes::SchemaRef,
-    error::{ArrowError, Result as ArrowResult},
-};
-
-use super::RecordBatchStream;
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::ExecutionPlan;
-use crate::physical_plan::Partitioning;
-
-use super::SendableRecordBatchStream;
-use pin_project_lite::pin_project;
-
-/// Merge execution plan executes partitions in parallel and combines them into a single
-/// partition. No guarantees are made about the order of the resulting partition.
-#[derive(Debug)]
-pub struct MergeExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-}
-
-impl MergeExec {
-    /// Create a new MergeExec
-    pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
-        MergeExec { input }
-    }
-
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for MergeExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(MergeExec::new(children[0].clone()))),
-            _ => Err(DataFusionError::Internal(
-                "MergeExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // MergeExec produces a single partition
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "MergeExec invalid partition {}",
-                partition
-            )));
-        }
-
-        let input_partitions = self.input.output_partitioning().partition_count();
-        match input_partitions {
-            0 => Err(DataFusionError::Internal(
-                "MergeExec requires at least one input partition".to_owned(),
-            )),
-            1 => {
-                // bypass any threading if there is a single partition
-                self.input.execute(0).await
-            }
-            _ => {
-                // use a stream that allows each sender to put in at
-                // least one result in an attempt to maximize
-                // parallelism.
-                let (sender, receiver) =
-                    mpsc::channel::<ArrowResult<RecordBatch>>(input_partitions);
-
-                // spawn independent tasks whose resulting streams (of batches)
-                // are sent to the channel for consumption.
-                for part_i in 0..input_partitions {
-                    let input = self.input.clone();
-                    let mut sender = sender.clone();
-                    tokio::spawn(async move {
-                        let mut stream = match input.execute(part_i).await {
-                            Err(e) => {
-                                // If send fails, plan being torn
-                                // down, no place to send the error
-                                let arrow_error = ArrowError::ExternalError(Box::new(e));
-                                sender.send(Err(arrow_error)).await.ok();
-                                return;
-                            }
-                            Ok(stream) => stream,
-                        };
-
-                        while let Some(item) = stream.next().await {
-                            // If send fails, plan being torn down,
-                            // there is no place to send the error
-                            sender.send(item).await.ok();
-                        }
-                    });
-                }
-
-                Ok(Box::pin(MergeStream {
-                    input: receiver,
-                    schema: self.schema(),
-                }))
-            }
-        }
-    }
-}
-
-pin_project! {
-    struct MergeStream {
-        schema: SchemaRef,
-        #[pin]
-        input: mpsc::Receiver<ArrowResult<RecordBatch>>,
-    }
-}
-
-impl Stream for MergeStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Option<Self::Item>> {
-        let this = self.project();
-        this.input.poll_next(cx)
-    }
-}
-
-impl RecordBatchStream for MergeStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::common;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::test;
-
-    #[tokio::test]
-    async fn merge() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let num_partitions = 4;
-        let path =
-            test::create_partitioned_csv("aggregate_test_100.csv", num_partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        // input should have 4 partitions
-        assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
-
-        let merge = MergeExec::new(Arc::new(csv));
-
-        // output of MergeExec should have a single partition
-        assert_eq!(merge.output_partitioning().partition_count(), 1);
-
-        // the result should contain 4 batches (one per input partition)
-        let iter = merge.execute(0).await?;
-        let batches = common::collect(iter).await?;
-        assert_eq!(batches.len(), num_partitions);
-
-        // there should be a total of 100 rows
-        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(row_count, 100);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs
deleted file mode 100644
index 5036dcb921b..00000000000
--- a/rust/datafusion/src/physical_plan/mod.rs
+++ /dev/null
@@ -1,369 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Traits for physical query plan, supporting parallel execution for partitioned relations.
-
-use std::fmt::{Debug, Display};
-use std::sync::{Arc, Mutex};
-use std::{any::Any, pin::Pin};
-
-use crate::execution::context::ExecutionContextState;
-use crate::logical_plan::LogicalPlan;
-use crate::{error::Result, scalar::ScalarValue};
-use arrow::datatypes::{DataType, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use arrow::{array::ArrayRef, datatypes::Field};
-
-use async_trait::async_trait;
-use futures::stream::Stream;
-
-use self::merge::MergeExec;
-use hashbrown::HashMap;
-
-/// Trait for types that stream [arrow::record_batch::RecordBatch]
-pub trait RecordBatchStream: Stream<Item = ArrowResult<RecordBatch>> {
-    /// Returns the schema of this `RecordBatchStream`.
-    ///
-    /// Implementation of this trait should guarantee that all `RecordBatch`'s returned by this
-    /// stream should have the same schema as returned from this method.
-    fn schema(&self) -> SchemaRef;
-}
-
-/// Trait for a stream of record batches.
-pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send + Sync>>;
-
-/// SQL metric type
-#[derive(Debug, Clone)]
-pub enum MetricType {
-    /// Simple counter
-    Counter,
-    /// Wall clock time in nanoseconds
-    TimeNanos,
-}
-
-/// SQL metric such as counter (number of input or output rows) or timing information about
-/// a physical operator.
-#[derive(Debug, Clone)]
-pub struct SQLMetric {
-    /// Metric name
-    name: String,
-    /// Metric value
-    value: usize,
-    /// Metric type
-    metric_type: MetricType,
-}
-
-impl SQLMetric {
-    /// Create a new metric for tracking a counter
-    pub fn counter(name: &str) -> Arc<Mutex<SQLMetric>> {
-        Arc::new(Mutex::new(SQLMetric::new(name, MetricType::Counter)))
-    }
-
-    /// Create a new metric for tracking time in nanoseconds
-    pub fn time_nanos(name: &str) -> Arc<Mutex<SQLMetric>> {
-        Arc::new(Mutex::new(SQLMetric::new(name, MetricType::TimeNanos)))
-    }
-
-    /// Create a new SQLMetric
-    pub fn new(name: &str, metric_type: MetricType) -> Self {
-        Self {
-            name: name.to_owned(),
-            value: 0,
-            metric_type,
-        }
-    }
-
-    /// Add to the value
-    pub fn add(&mut self, n: usize) {
-        self.value += n;
-    }
-
-    /// Get the current value
-    pub fn value(&self) -> usize {
-        self.value
-    }
-}
-
-/// Physical query planner that converts a `LogicalPlan` to an
-/// `ExecutionPlan` suitable for execution.
-pub trait PhysicalPlanner {
-    /// Create a physical plan from a logical plan
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-}
-
-/// Partition-aware execution plan for a relation
-#[async_trait]
-pub trait ExecutionPlan: Debug + Send + Sync {
-    /// Returns the execution plan as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef;
-    /// Specifies the output partitioning scheme of this plan
-    fn output_partitioning(&self) -> Partitioning;
-    /// Specifies the data distribution requirements of all the children for this operator
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
-    }
-    /// Get a list of child execution plans that provide the input for this plan. The returned list
-    /// will be empty for leaf nodes, will contain a single value for unary nodes, or two
-    /// values for binary nodes (such as joins).
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>>;
-    /// Returns a new plan where all children were replaced by new plans.
-    /// The size of `children` must be equal to the size of `ExecutionPlan::children()`.
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>>;
-
-    /// creates an iterator
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream>;
-
-    /// Return a snapshot of the metrics collected during execution
-    fn metrics(&self) -> HashMap<String, SQLMetric> {
-        HashMap::new()
-    }
-}
-
-/// Execute the [ExecutionPlan] and collect the results in memory
-pub async fn collect(plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
-    match plan.output_partitioning().partition_count() {
-        0 => Ok(vec![]),
-        1 => {
-            let it = plan.execute(0).await?;
-            common::collect(it).await
-        }
-        _ => {
-            // merge into a single partition
-            let plan = MergeExec::new(plan.clone());
-            // MergeExec must produce a single partition
-            assert_eq!(1, plan.output_partitioning().partition_count());
-            common::collect(plan.execute(0).await?).await
-        }
-    }
-}
-
-/// Execute the [ExecutionPlan] and collect the results in memory
-pub async fn collect_partitioned(
-    plan: Arc<dyn ExecutionPlan>,
-) -> Result<Vec<Vec<RecordBatch>>> {
-    match plan.output_partitioning().partition_count() {
-        0 => Ok(vec![]),
-        1 => {
-            let it = plan.execute(0).await?;
-            Ok(vec![common::collect(it).await?])
-        }
-        _ => {
-            let mut partitions = vec![];
-            for i in 0..plan.output_partitioning().partition_count() {
-                partitions.push(common::collect(plan.execute(i).await?).await?)
-            }
-            Ok(partitions)
-        }
-    }
-}
-
-/// Partitioning schemes supported by operators.
-#[derive(Debug, Clone)]
-pub enum Partitioning {
-    /// Allocate batches using a round-robin algorithm and the specified number of partitions
-    RoundRobinBatch(usize),
-    /// Allocate rows based on a hash of one of more expressions and the specified
-    /// number of partitions
-    /// This partitioning scheme is not yet fully supported. See [ARROW-11011](https://issues.apache.org/jira/browse/ARROW-11011)
-    Hash(Vec<Arc<dyn PhysicalExpr>>, usize),
-    /// Unknown partitioning scheme with a known number of partitions
-    UnknownPartitioning(usize),
-}
-
-impl Partitioning {
-    /// Returns the number of partitions in this partitioning scheme
-    pub fn partition_count(&self) -> usize {
-        use Partitioning::*;
-        match self {
-            RoundRobinBatch(n) => *n,
-            Hash(_, n) => *n,
-            UnknownPartitioning(n) => *n,
-        }
-    }
-}
-
-/// Distribution schemes
-#[derive(Debug, Clone, PartialEq)]
-pub enum Distribution {
-    /// Unspecified distribution
-    UnspecifiedDistribution,
-    /// A single partition is required
-    SinglePartition,
-}
-
-/// Represents the result from an expression
-#[derive(Clone)]
-pub enum ColumnarValue {
-    /// Array of values
-    Array(ArrayRef),
-    /// A single value
-    Scalar(ScalarValue),
-}
-
-impl ColumnarValue {
-    fn data_type(&self) -> DataType {
-        match self {
-            ColumnarValue::Array(array_value) => array_value.data_type().clone(),
-            ColumnarValue::Scalar(scalar_value) => scalar_value.get_datatype(),
-        }
-    }
-
-    fn into_array(self, num_rows: usize) -> ArrayRef {
-        match self {
-            ColumnarValue::Array(array) => array,
-            ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(num_rows),
-        }
-    }
-}
-
-/// Expression that can be evaluated against a RecordBatch
-/// A Physical expression knows its type, nullability and how to evaluate itself.
-pub trait PhysicalExpr: Send + Sync + Display + Debug {
-    /// Returns the physical expression as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-    /// Get the data type of this expression, given the schema of the input
-    fn data_type(&self, input_schema: &Schema) -> Result<DataType>;
-    /// Determine whether this expression is nullable, given the schema of the input
-    fn nullable(&self, input_schema: &Schema) -> Result<bool>;
-    /// Evaluate an expression against a RecordBatch
-    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue>;
-}
-
-/// An aggregate expression that:
-/// * knows its resulting field
-/// * knows how to create its accumulator
-/// * knows its accumulator's state's field
-/// * knows the expressions from whose its accumulator will receive values
-pub trait AggregateExpr: Send + Sync + Debug {
-    /// Returns the aggregate expression as [`Any`](std::any::Any) so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-    /// the field of the final result of this aggregation.
-    fn field(&self) -> Result<Field>;
-
-    /// the accumulator used to accumulate values from the expressions.
-    /// the accumulator expects the same number of arguments as `expressions` and must
-    /// return states with the same description as `state_fields`
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>>;
-
-    /// the fields that encapsulate the Accumulator's state
-    /// the number of fields here equals the number of states that the accumulator contains
-    fn state_fields(&self) -> Result<Vec<Field>>;
-
-    /// expressions that are passed to the Accumulator.
-    /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many.
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>;
-}
-
-/// An accumulator represents a stateful object that lives throughout the evaluation of multiple rows and
-/// generically accumulates values. An accumulator knows how to:
-/// * update its state from inputs via `update`
-/// * convert its internal state to a vector of scalar values
-/// * update its state from multiple accumulators' states via `merge`
-/// * compute the final value from its internal state via `evaluate`
-pub trait Accumulator: Send + Sync + Debug {
-    /// Returns the state of the accumulator at the end of the accumulation.
-    // in the case of an average on which we track `sum` and `n`, this function should return a vector
-    // of two values, sum and n.
-    fn state(&self) -> Result<Vec<ScalarValue>>;
-
-    /// updates the accumulator's state from a vector of scalars.
-    fn update(&mut self, values: &[ScalarValue]) -> Result<()>;
-
-    /// updates the accumulator's state from a vector of arrays.
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        };
-        (0..values[0].len()).try_for_each(|index| {
-            let v = values
-                .iter()
-                .map(|array| ScalarValue::try_from_array(array, index))
-                .collect::<Result<Vec<_>>>()?;
-            self.update(&v)
-        })
-    }
-
-    /// updates the accumulator's state from a vector of scalars.
-    fn merge(&mut self, states: &[ScalarValue]) -> Result<()>;
-
-    /// updates the accumulator's state from a vector of states.
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        if states.is_empty() {
-            return Ok(());
-        };
-        (0..states[0].len()).try_for_each(|index| {
-            let v = states
-                .iter()
-                .map(|array| ScalarValue::try_from_array(array, index))
-                .collect::<Result<Vec<_>>>()?;
-            self.merge(&v)
-        })
-    }
-
-    /// returns its value based on its current state.
-    fn evaluate(&self) -> Result<ScalarValue>;
-}
-
-pub mod aggregates;
-pub mod array_expressions;
-pub mod coalesce_batches;
-pub mod common;
-#[cfg(feature = "crypto_expressions")]
-pub mod crypto_expressions;
-pub mod csv;
-pub mod datetime_expressions;
-pub mod distinct_expressions;
-pub mod empty;
-pub mod explain;
-pub mod expressions;
-pub mod filter;
-pub mod functions;
-pub mod group_scalar;
-pub mod hash_aggregate;
-pub mod hash_join;
-pub mod hash_utils;
-pub mod limit;
-pub mod math_expressions;
-pub mod memory;
-pub mod merge;
-pub mod parquet;
-pub mod planner;
-pub mod projection;
-#[cfg(feature = "regex_expressions")]
-pub mod regex_expressions;
-pub mod repartition;
-pub mod sort;
-pub mod string_expressions;
-pub mod type_coercion;
-pub mod udaf;
-pub mod udf;
-#[cfg(feature = "unicode_expressions")]
-pub mod unicode_expressions;
-pub mod union;
diff --git a/rust/datafusion/src/physical_plan/parquet.rs b/rust/datafusion/src/physical_plan/parquet.rs
deleted file mode 100644
index d41d6968fee..00000000000
--- a/rust/datafusion/src/physical_plan/parquet.rs
+++ /dev/null
@@ -1,1535 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Execution plan for reading Parquet files
-
-use std::fmt;
-use std::fs::File;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::{
-    any::Any,
-    collections::{HashMap, HashSet},
-};
-
-use super::{
-    planner::DefaultPhysicalPlanner, ColumnarValue, PhysicalExpr, RecordBatchStream,
-    SendableRecordBatchStream,
-};
-use crate::{
-    catalog::catalog::MemoryCatalogList,
-    physical_plan::{common, ExecutionPlan, Partitioning},
-};
-use crate::{
-    error::{DataFusionError, Result},
-    execution::context::ExecutionContextState,
-    logical_plan::{Expr, Operator},
-    optimizer::utils,
-    prelude::ExecutionConfig,
-};
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    array::new_null_array,
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{
-    array::{make_array, ArrayData, ArrayRef, BooleanArray, BooleanBufferBuilder},
-    buffer::MutableBuffer,
-    datatypes::{DataType, Field, Schema, SchemaRef},
-};
-use parquet::file::{
-    metadata::RowGroupMetaData,
-    reader::{FileReader, SerializedFileReader},
-    statistics::Statistics as ParquetStatistics,
-};
-
-use fmt::Debug;
-use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
-use tokio::{
-    sync::mpsc::{channel, Receiver, Sender},
-    task,
-};
-use tokio_stream::wrappers::ReceiverStream;
-
-use crate::datasource::datasource::{ColumnStatistics, Statistics};
-use async_trait::async_trait;
-use futures::stream::{Stream, StreamExt};
-
-/// Execution plan for scanning one or more Parquet partitions
-#[derive(Debug, Clone)]
-pub struct ParquetExec {
-    /// Parquet partitions to read
-    partitions: Vec<ParquetPartition>,
-    /// Schema after projection is applied
-    schema: SchemaRef,
-    /// Projection for which columns to load
-    projection: Vec<usize>,
-    /// Batch size
-    batch_size: usize,
-    /// Statistics for the data set (sum of statistics for all partitions)
-    statistics: Statistics,
-    /// Optional predicate builder
-    predicate_builder: Option<RowGroupPredicateBuilder>,
-    /// Optional limit of the number of rows
-    limit: Option<usize>,
-}
-
-/// Represents one partition of a Parquet data set and this currently means one Parquet file.
-///
-/// In the future it would be good to support subsets of files based on ranges of row groups
-/// so that we can better parallelize reads of large files across available cores (see
-/// [ARROW-10995](https://issues.apache.org/jira/browse/ARROW-10995)).
-///
-/// We may also want to support reading Parquet files that are partitioned based on a key and
-/// in this case we would want this partition struct to represent multiple files for a given
-/// partition key (see [ARROW-11019](https://issues.apache.org/jira/browse/ARROW-11019)).
-#[derive(Debug, Clone)]
-pub struct ParquetPartition {
-    /// The Parquet filename for this partition
-    pub filenames: Vec<String>,
-    /// Statistics for this partition
-    pub statistics: Statistics,
-}
-
-impl ParquetExec {
-    /// Create a new Parquet reader execution plan based on the specified Parquet filename or
-    /// directory containing Parquet files
-    pub fn try_from_path(
-        path: &str,
-        projection: Option<Vec<usize>>,
-        predicate: Option<Expr>,
-        batch_size: usize,
-        max_concurrency: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        // build a list of filenames from the specified path, which could be a single file or
-        // a directory containing one or more parquet files
-        let mut filenames: Vec<String> = vec![];
-        common::build_file_list(path, &mut filenames, ".parquet")?;
-        if filenames.is_empty() {
-            Err(DataFusionError::Plan(format!(
-                "No Parquet files found at path {}",
-                path
-            )))
-        } else {
-            let filenames = filenames
-                .iter()
-                .map(|filename| filename.as_str())
-                .collect::<Vec<&str>>();
-            Self::try_from_files(
-                &filenames,
-                projection,
-                predicate,
-                batch_size,
-                max_concurrency,
-                limit,
-            )
-        }
-    }
-
-    /// Create a new Parquet reader execution plan based on the specified list of Parquet
-    /// files
-    pub fn try_from_files(
-        filenames: &[&str],
-        projection: Option<Vec<usize>>,
-        predicate: Option<Expr>,
-        batch_size: usize,
-        max_concurrency: usize,
-        limit: Option<usize>,
-    ) -> Result<Self> {
-        // build a list of Parquet partitions with statistics and gather all unique schemas
-        // used in this data set
-        let mut schemas: Vec<Schema> = vec![];
-        let mut partitions = Vec::with_capacity(max_concurrency);
-        let filenames: Vec<String> = filenames.iter().map(|s| s.to_string()).collect();
-        let chunks = split_files(&filenames, max_concurrency);
-        let mut num_rows = 0;
-        let mut total_byte_size = 0;
-        let mut null_counts = Vec::new();
-        let mut limit_exhausted = false;
-        for chunk in chunks {
-            let mut filenames: Vec<String> =
-                chunk.iter().map(|x| x.to_string()).collect();
-            let mut total_files = 0;
-            for filename in &filenames {
-                total_files += 1;
-                let file = File::open(filename)?;
-                let file_reader = Arc::new(SerializedFileReader::new(file)?);
-                let mut arrow_reader = ParquetFileArrowReader::new(file_reader);
-                let meta_data = arrow_reader.get_metadata();
-                // collect all the unique schemas in this data set
-                let schema = arrow_reader.get_schema()?;
-                let num_fields = schema.fields().len();
-                if schemas.is_empty() || schema != schemas[0] {
-                    schemas.push(schema);
-                    null_counts = vec![0; num_fields]
-                }
-                for row_group_meta in meta_data.row_groups() {
-                    num_rows += row_group_meta.num_rows();
-                    total_byte_size += row_group_meta.total_byte_size();
-
-                    // Currently assumes every Parquet file has same schema
-                    // https://issues.apache.org/jira/browse/ARROW-11017
-                    let columns_null_counts = row_group_meta
-                        .columns()
-                        .iter()
-                        .flat_map(|c| c.statistics().map(|stats| stats.null_count()));
-
-                    for (i, cnt) in columns_null_counts.enumerate() {
-                        null_counts[i] += cnt
-                    }
-                    if limit.map(|x| num_rows >= x as i64).unwrap_or(false) {
-                        limit_exhausted = true;
-                        break;
-                    }
-                }
-            }
-
-            let column_stats = null_counts
-                .iter()
-                .map(|null_count| ColumnStatistics {
-                    null_count: Some(*null_count as usize),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                })
-                .collect();
-
-            let statistics = Statistics {
-                num_rows: Some(num_rows as usize),
-                total_byte_size: Some(total_byte_size as usize),
-                column_statistics: Some(column_stats),
-            };
-            // remove files that are not needed in case of limit
-            filenames.truncate(total_files);
-            partitions.push(ParquetPartition {
-                filenames,
-                statistics,
-            });
-            if limit_exhausted {
-                break;
-            }
-        }
-
-        // we currently get the schema information from the first file rather than do
-        // schema merging and this is a limitation.
-        // See https://issues.apache.org/jira/browse/ARROW-11017
-        if schemas.len() > 1 {
-            return Err(DataFusionError::Plan(format!(
-                "The Parquet files have {} different schemas and DataFusion does \
-                not yet support schema merging",
-                schemas.len()
-            )));
-        }
-        let schema = schemas[0].clone();
-        let predicate_builder = predicate.and_then(|predicate_expr| {
-            RowGroupPredicateBuilder::try_new(&predicate_expr, schema.clone()).ok()
-        });
-
-        Ok(Self::new(
-            partitions,
-            schema,
-            projection,
-            predicate_builder,
-            batch_size,
-            limit,
-        ))
-    }
-
-    /// Create a new Parquet reader execution plan with provided partitions and schema
-    pub fn new(
-        partitions: Vec<ParquetPartition>,
-        schema: Schema,
-        projection: Option<Vec<usize>>,
-        predicate_builder: Option<RowGroupPredicateBuilder>,
-        batch_size: usize,
-        limit: Option<usize>,
-    ) -> Self {
-        let projection = match projection {
-            Some(p) => p,
-            None => (0..schema.fields().len()).collect(),
-        };
-
-        let projected_schema = Schema::new(
-            projection
-                .iter()
-                .map(|i| schema.field(*i).clone())
-                .collect(),
-        );
-
-        // sum the statistics
-        let mut num_rows: Option<usize> = None;
-        let mut total_byte_size: Option<usize> = None;
-        let mut null_counts: Vec<usize> = vec![0; schema.fields().len()];
-        let mut has_null_counts = false;
-        for part in &partitions {
-            if let Some(n) = part.statistics.num_rows {
-                num_rows = Some(num_rows.unwrap_or(0) + n)
-            }
-            if let Some(n) = part.statistics.total_byte_size {
-                total_byte_size = Some(total_byte_size.unwrap_or(0) + n)
-            }
-            if let Some(x) = &part.statistics.column_statistics {
-                let part_nulls: Vec<Option<usize>> =
-                    x.iter().map(|c| c.null_count).collect();
-                has_null_counts = true;
-
-                for &i in projection.iter() {
-                    null_counts[i] = part_nulls[i].unwrap_or(0);
-                }
-            }
-        }
-        let column_stats = if has_null_counts {
-            Some(
-                null_counts
-                    .iter()
-                    .map(|null_count| ColumnStatistics {
-                        null_count: Some(*null_count),
-                        distinct_count: None,
-                        max_value: None,
-                        min_value: None,
-                    })
-                    .collect(),
-            )
-        } else {
-            None
-        };
-
-        let statistics = Statistics {
-            num_rows,
-            total_byte_size,
-            column_statistics: column_stats,
-        };
-        Self {
-            partitions,
-            schema: Arc::new(projected_schema),
-            projection,
-            predicate_builder,
-            batch_size,
-            statistics,
-            limit,
-        }
-    }
-
-    /// Parquet partitions to read
-    pub fn partitions(&self) -> &[ParquetPartition] {
-        &self.partitions
-    }
-
-    /// Projection for which columns to load
-    pub fn projection(&self) -> &[usize] {
-        &self.projection
-    }
-
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-
-    /// Statistics for the data set (sum of statistics for all partitions)
-    pub fn statistics(&self) -> &Statistics {
-        &self.statistics
-    }
-}
-
-impl ParquetPartition {
-    /// Create a new parquet partition
-    pub fn new(filenames: Vec<String>, statistics: Statistics) -> Self {
-        Self {
-            filenames,
-            statistics,
-        }
-    }
-
-    /// The Parquet filename for this partition
-    pub fn filenames(&self) -> &[String] {
-        &self.filenames
-    }
-
-    /// Statistics for this partition
-    pub fn statistics(&self) -> &Statistics {
-        &self.statistics
-    }
-}
-
-#[derive(Debug, Clone)]
-/// Predicate builder used for generating of predicate functions, used to filter row group metadata
-pub struct RowGroupPredicateBuilder {
-    parquet_schema: Schema,
-    predicate_expr: Arc<dyn PhysicalExpr>,
-    stat_column_req: Vec<(String, StatisticsType, Field)>,
-}
-
-impl RowGroupPredicateBuilder {
-    /// Try to create a new instance of PredicateExpressionBuilder.
-    /// This will translate the filter expression into a statistics predicate expression
-    /// (for example (column / 2) = 4 becomes (column_min / 2) <= 4 && 4 <= (column_max / 2)),
-    /// then convert it to a DataFusion PhysicalExpression and cache it for later use by build_row_group_predicate.
-    pub fn try_new(expr: &Expr, parquet_schema: Schema) -> Result<Self> {
-        // build predicate expression once
-        let mut stat_column_req = Vec::<(String, StatisticsType, Field)>::new();
-        let logical_predicate_expr =
-            build_predicate_expression(expr, &parquet_schema, &mut stat_column_req)?;
-        // println!(
-        //     "RowGroupPredicateBuilder::try_new, logical_predicate_expr: {:?}",
-        //     logical_predicate_expr
-        // );
-        // build physical predicate expression
-        let stat_fields = stat_column_req
-            .iter()
-            .map(|(_, _, f)| f.clone())
-            .collect::<Vec<_>>();
-        let stat_schema = Schema::new(stat_fields);
-        let execution_context_state = ExecutionContextState {
-            catalog_list: Arc::new(MemoryCatalogList::new()),
-            scalar_functions: HashMap::new(),
-            var_provider: HashMap::new(),
-            aggregate_functions: HashMap::new(),
-            config: ExecutionConfig::new(),
-        };
-        let predicate_expr = DefaultPhysicalPlanner::default().create_physical_expr(
-            &logical_predicate_expr,
-            &stat_schema,
-            &execution_context_state,
-        )?;
-        // println!(
-        //     "RowGroupPredicateBuilder::try_new, predicate_expr: {:?}",
-        //     predicate_expr
-        // );
-        Ok(Self {
-            parquet_schema,
-            predicate_expr,
-            stat_column_req,
-        })
-    }
-
-    /// Generate a predicate function used to filter row group metadata.
-    /// This function takes a list of all row groups as parameter,
-    /// so that DataFusion's physical expressions can be re-used by
-    /// generating a RecordBatch, containing statistics arrays,
-    /// on which the physical predicate expression is executed to generate a row group filter array.
-    /// The generated filter array is then used in the returned closure to filter row groups.
-    pub fn build_row_group_predicate(
-        &self,
-        row_group_metadata: &[RowGroupMetaData],
-    ) -> Box<dyn Fn(&RowGroupMetaData, usize) -> bool> {
-        // build statistics record batch
-        let predicate_result = build_statistics_record_batch(
-            row_group_metadata,
-            &self.parquet_schema,
-            &self.stat_column_req,
-        )
-        .and_then(|statistics_batch| {
-            // execute predicate expression
-            self.predicate_expr.evaluate(&statistics_batch)
-        })
-        .and_then(|v| match v {
-            ColumnarValue::Array(array) => Ok(array),
-            ColumnarValue::Scalar(_) => Err(DataFusionError::Plan(
-                "predicate expression didn't return an array".to_string(),
-            )),
-        });
-
-        let predicate_array = match predicate_result {
-            Ok(array) => array,
-            // row group filter array could not be built
-            // return a closure which will not filter out any row groups
-            _ => return Box::new(|_r, _i| true),
-        };
-
-        let predicate_array = predicate_array.as_any().downcast_ref::<BooleanArray>();
-        match predicate_array {
-            // return row group predicate function
-            Some(array) => {
-                // when the result of the predicate expression for a row group is null / undefined,
-                // e.g. due to missing statistics, this row group can't be filtered out,
-                // so replace with true
-                let predicate_values =
-                    array.iter().map(|x| x.unwrap_or(true)).collect::<Vec<_>>();
-                Box::new(move |_, i| predicate_values[i])
-            }
-            // predicate result is not a BooleanArray
-            // return a closure which will not filter out any row groups
-            _ => Box::new(|_r, _i| true),
-        }
-    }
-}
-
-/// Build a RecordBatch from a list of RowGroupMetadata structs,
-/// creating arrays, one for each statistics column,
-/// as requested in the stat_column_req parameter.
-fn build_statistics_record_batch(
-    row_groups: &[RowGroupMetaData],
-    parquet_schema: &Schema,
-    stat_column_req: &[(String, StatisticsType, Field)],
-) -> Result<RecordBatch> {
-    let mut fields = Vec::<Field>::new();
-    let mut arrays = Vec::<ArrayRef>::new();
-    for (column_name, statistics_type, stat_field) in stat_column_req {
-        if let Some((column_index, _)) = parquet_schema.column_with_name(column_name) {
-            let statistics = row_groups
-                .iter()
-                .map(|g| g.column(column_index).statistics())
-                .collect::<Vec<_>>();
-            let array = build_statistics_array(
-                &statistics,
-                *statistics_type,
-                stat_field.data_type(),
-            );
-            fields.push(stat_field.clone());
-            arrays.push(array);
-        }
-    }
-    let schema = Arc::new(Schema::new(fields));
-    RecordBatch::try_new(schema, arrays)
-        .map_err(|err| DataFusionError::Plan(err.to_string()))
-}
-
-struct StatisticsExpressionBuilder<'a> {
-    column_name: String,
-    column_expr: &'a Expr,
-    scalar_expr: &'a Expr,
-    parquet_field: &'a Field,
-    stat_column_req: &'a mut Vec<(String, StatisticsType, Field)>,
-    reverse_operator: bool,
-}
-
-impl<'a> StatisticsExpressionBuilder<'a> {
-    fn try_new(
-        left: &'a Expr,
-        right: &'a Expr,
-        parquet_schema: &'a Schema,
-        stat_column_req: &'a mut Vec<(String, StatisticsType, Field)>,
-    ) -> Result<Self> {
-        // find column name; input could be a more complicated expression
-        let mut left_columns = HashSet::<String>::new();
-        utils::expr_to_column_names(left, &mut left_columns)?;
-        let mut right_columns = HashSet::<String>::new();
-        utils::expr_to_column_names(right, &mut right_columns)?;
-        let (column_expr, scalar_expr, column_names, reverse_operator) =
-            match (left_columns.len(), right_columns.len()) {
-                (1, 0) => (left, right, left_columns, false),
-                (0, 1) => (right, left, right_columns, true),
-                _ => {
-                    // if more than one column used in expression - not supported
-                    return Err(DataFusionError::Plan(
-                        "Multi-column expressions are not currently supported"
-                            .to_string(),
-                    ));
-                }
-            };
-        let column_name = column_names.iter().next().unwrap().clone();
-        let field = match parquet_schema.column_with_name(&column_name) {
-            Some((_, f)) => f,
-            _ => {
-                // field not found in parquet schema
-                return Err(DataFusionError::Plan(
-                    "Field not found in parquet schema".to_string(),
-                ));
-            }
-        };
-
-        Ok(Self {
-            column_name,
-            column_expr,
-            scalar_expr,
-            parquet_field: field,
-            stat_column_req,
-            reverse_operator,
-        })
-    }
-
-    fn correct_operator(&self, op: Operator) -> Operator {
-        if !self.reverse_operator {
-            return op;
-        }
-
-        match op {
-            Operator::Lt => Operator::Gt,
-            Operator::Gt => Operator::Lt,
-            Operator::LtEq => Operator::GtEq,
-            Operator::GtEq => Operator::LtEq,
-            _ => op,
-        }
-    }
-
-    // fn column_expr(&self) -> &Expr {
-    //     self.column_expr
-    // }
-
-    fn scalar_expr(&self) -> &Expr {
-        self.scalar_expr
-    }
-
-    // fn column_name(&self) -> &String {
-    //     &self.column_name
-    // }
-
-    fn is_stat_column_missing(&self, statistics_type: StatisticsType) -> bool {
-        self.stat_column_req
-            .iter()
-            .filter(|(c, t, _f)| c == &self.column_name && t == &statistics_type)
-            .count()
-            == 0
-    }
-
-    fn stat_column_expr(
-        &mut self,
-        stat_type: StatisticsType,
-        suffix: &str,
-    ) -> Result<Expr> {
-        let stat_column_name = format!("{}_{}", self.column_name, suffix);
-        let stat_field = Field::new(
-            stat_column_name.as_str(),
-            self.parquet_field.data_type().clone(),
-            self.parquet_field.is_nullable(),
-        );
-        if self.is_stat_column_missing(stat_type) {
-            // only add statistics column if not previously added
-            self.stat_column_req
-                .push((self.column_name.clone(), stat_type, stat_field));
-        }
-        rewrite_column_expr(
-            self.column_expr,
-            self.column_name.as_str(),
-            stat_column_name.as_str(),
-        )
-    }
-
-    fn min_column_expr(&mut self) -> Result<Expr> {
-        self.stat_column_expr(StatisticsType::Min, "min")
-    }
-
-    fn max_column_expr(&mut self) -> Result<Expr> {
-        self.stat_column_expr(StatisticsType::Max, "max")
-    }
-}
-
-/// replaces a column with an old name with a new name in an expression
-fn rewrite_column_expr(
-    expr: &Expr,
-    column_old_name: &str,
-    column_new_name: &str,
-) -> Result<Expr> {
-    let expressions = utils::expr_sub_expressions(&expr)?;
-    let expressions = expressions
-        .iter()
-        .map(|e| rewrite_column_expr(e, column_old_name, column_new_name))
-        .collect::<Result<Vec<_>>>()?;
-
-    if let Expr::Column(name) = expr {
-        if name == column_old_name {
-            return Ok(Expr::Column(column_new_name.to_string()));
-        }
-    }
-    utils::rewrite_expression(&expr, &expressions)
-}
-
-/// Translate logical filter expression into parquet statistics predicate expression
-fn build_predicate_expression(
-    expr: &Expr,
-    parquet_schema: &Schema,
-    stat_column_req: &mut Vec<(String, StatisticsType, Field)>,
-) -> Result<Expr> {
-    use crate::logical_plan;
-    // predicate expression can only be a binary expression
-    let (left, op, right) = match expr {
-        Expr::BinaryExpr { left, op, right } => (left, *op, right),
-        _ => {
-            // unsupported expression - replace with TRUE
-            // this can still be useful when multiple conditions are joined using AND
-            // such as: column > 10 AND TRUE
-            return Ok(logical_plan::lit(true));
-        }
-    };
-
-    if op == Operator::And || op == Operator::Or {
-        let left_expr =
-            build_predicate_expression(left, parquet_schema, stat_column_req)?;
-        let right_expr =
-            build_predicate_expression(right, parquet_schema, stat_column_req)?;
-        return Ok(logical_plan::binary_expr(left_expr, op, right_expr));
-    }
-
-    let expr_builder = StatisticsExpressionBuilder::try_new(
-        left,
-        right,
-        parquet_schema,
-        stat_column_req,
-    );
-    let mut expr_builder = match expr_builder {
-        Ok(builder) => builder,
-        // allow partial failure in predicate expression generation
-        // this can still produce a useful predicate when multiple conditions are joined using AND
-        Err(_) => {
-            return Ok(logical_plan::lit(true));
-        }
-    };
-    let corrected_op = expr_builder.correct_operator(op);
-    let statistics_expr = match corrected_op {
-        Operator::Eq => {
-            // column = literal => (min, max) = literal => min <= literal && literal <= max
-            // (column / 2) = 4 => (column_min / 2) <= 4 && 4 <= (column_max / 2)
-            let min_column_expr = expr_builder.min_column_expr()?;
-            let max_column_expr = expr_builder.max_column_expr()?;
-            min_column_expr
-                .lt_eq(expr_builder.scalar_expr().clone())
-                .and(expr_builder.scalar_expr().clone().lt_eq(max_column_expr))
-        }
-        Operator::Gt => {
-            // column > literal => (min, max) > literal => max > literal
-            expr_builder
-                .max_column_expr()?
-                .gt(expr_builder.scalar_expr().clone())
-        }
-        Operator::GtEq => {
-            // column >= literal => (min, max) >= literal => max >= literal
-            expr_builder
-                .max_column_expr()?
-                .gt_eq(expr_builder.scalar_expr().clone())
-        }
-        Operator::Lt => {
-            // column < literal => (min, max) < literal => min < literal
-            expr_builder
-                .min_column_expr()?
-                .lt(expr_builder.scalar_expr().clone())
-        }
-        Operator::LtEq => {
-            // column <= literal => (min, max) <= literal => min <= literal
-            expr_builder
-                .min_column_expr()?
-                .lt_eq(expr_builder.scalar_expr().clone())
-        }
-        // other expressions are not supported
-        _ => logical_plan::lit(true),
-    };
-    Ok(statistics_expr)
-}
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-enum StatisticsType {
-    Min,
-    Max,
-}
-
-fn build_statistics_array(
-    statistics: &[Option<&ParquetStatistics>],
-    statistics_type: StatisticsType,
-    data_type: &DataType,
-) -> ArrayRef {
-    let statistics_count = statistics.len();
-    let first_group_stats = statistics.iter().find(|s| s.is_some());
-    let first_group_stats = if let Some(Some(statistics)) = first_group_stats {
-        // found first row group with statistics defined
-        statistics
-    } else {
-        // no row group has statistics defined
-        return new_null_array(data_type, statistics_count);
-    };
-
-    let (data_size, arrow_type) = match first_group_stats {
-        ParquetStatistics::Int32(_) => (std::mem::size_of::<i32>(), DataType::Int32),
-        ParquetStatistics::Int64(_) => (std::mem::size_of::<i64>(), DataType::Int64),
-        ParquetStatistics::Float(_) => (std::mem::size_of::<f32>(), DataType::Float32),
-        ParquetStatistics::Double(_) => (std::mem::size_of::<f64>(), DataType::Float64),
-        ParquetStatistics::ByteArray(_) if data_type == &DataType::Utf8 => {
-            (0, DataType::Utf8)
-        }
-        _ => {
-            // type of statistics not supported
-            return new_null_array(data_type, statistics_count);
-        }
-    };
-
-    let statistics = statistics.iter().map(|s| {
-        s.filter(|s| s.has_min_max_set())
-            .map(|s| match statistics_type {
-                StatisticsType::Min => s.min_bytes(),
-                StatisticsType::Max => s.max_bytes(),
-            })
-    });
-
-    if arrow_type == DataType::Utf8 {
-        let data_size = statistics
-            .clone()
-            .map(|x| x.map(|b| b.len()).unwrap_or(0))
-            .sum();
-        let mut builder =
-            arrow::array::StringBuilder::with_capacity(statistics_count, data_size);
-        let string_statistics =
-            statistics.map(|x| x.and_then(|bytes| std::str::from_utf8(bytes).ok()));
-        for maybe_string in string_statistics {
-            match maybe_string {
-                Some(string_value) => builder.append_value(string_value).unwrap(),
-                None => builder.append_null().unwrap(),
-            };
-        }
-        return Arc::new(builder.finish());
-    }
-
-    let mut data_buffer = MutableBuffer::new(statistics_count * data_size);
-    let mut bitmap_builder = BooleanBufferBuilder::new(statistics_count);
-    let mut null_count = 0;
-    for s in statistics {
-        if let Some(stat_data) = s {
-            bitmap_builder.append(true);
-            data_buffer.extend_from_slice(stat_data);
-        } else {
-            bitmap_builder.append(false);
-            data_buffer.resize(data_buffer.len() + data_size, 0);
-            null_count += 1;
-        }
-    }
-
-    let mut builder = ArrayData::builder(arrow_type)
-        .len(statistics_count)
-        .add_buffer(data_buffer.into());
-    if null_count > 0 {
-        builder = builder.null_bit_buffer(bitmap_builder.finish());
-    }
-    let array_data = builder.build();
-    let statistics_array = make_array(array_data);
-    if statistics_array.data_type() == data_type {
-        return statistics_array;
-    }
-    // cast statistics array to required data type
-    arrow::compute::cast(&statistics_array, data_type)
-        .unwrap_or_else(|_| new_null_array(data_type, statistics_count))
-}
-
-#[async_trait]
-impl ExecutionPlan for ParquetExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        // this is a leaf node and has no children
-        vec![]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partitions.len())
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "Children cannot be replaced in {:?}",
-                self
-            )))
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // because the parquet implementation is not thread-safe, it is necessary to execute
-        // on a thread and communicate with channels
-        let (response_tx, response_rx): (
-            Sender<ArrowResult<RecordBatch>>,
-            Receiver<ArrowResult<RecordBatch>>,
-        ) = channel(2);
-
-        let filenames = self.partitions[partition].filenames.clone();
-        let projection = self.projection.clone();
-        let predicate_builder = self.predicate_builder.clone();
-        let batch_size = self.batch_size;
-        let limit = self.limit;
-
-        task::spawn_blocking(move || {
-            if let Err(e) = read_files(
-                &filenames,
-                &projection,
-                &predicate_builder,
-                batch_size,
-                response_tx,
-                limit,
-            ) {
-                println!("Parquet reader thread terminated due to error: {:?}", e);
-            }
-        });
-
-        Ok(Box::pin(ParquetStream {
-            schema: self.schema.clone(),
-            inner: ReceiverStream::new(response_rx),
-        }))
-    }
-}
-
-fn send_result(
-    response_tx: &Sender<ArrowResult<RecordBatch>>,
-    result: ArrowResult<RecordBatch>,
-) -> Result<()> {
-    // Note this function is running on its own blockng tokio thread so blocking here is ok.
-    response_tx
-        .blocking_send(result)
-        .map_err(|e| DataFusionError::Execution(e.to_string()))?;
-    Ok(())
-}
-
-fn read_files(
-    filenames: &[String],
-    projection: &[usize],
-    predicate_builder: &Option<RowGroupPredicateBuilder>,
-    batch_size: usize,
-    response_tx: Sender<ArrowResult<RecordBatch>>,
-    limit: Option<usize>,
-) -> Result<()> {
-    let mut total_rows = 0;
-    'outer: for filename in filenames {
-        let file = File::open(&filename)?;
-        let mut file_reader = SerializedFileReader::new(file)?;
-        if let Some(predicate_builder) = predicate_builder {
-            let row_group_predicate = predicate_builder
-                .build_row_group_predicate(file_reader.metadata().row_groups());
-            file_reader.filter_row_groups(&row_group_predicate);
-        }
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut batch_reader = arrow_reader
-            .get_record_reader_by_columns(projection.to_owned(), batch_size)?;
-        loop {
-            match batch_reader.next() {
-                Some(Ok(batch)) => {
-                    //println!("ParquetExec got new batch from {}", filename);
-                    total_rows += batch.num_rows();
-                    send_result(&response_tx, Ok(batch))?;
-                    if limit.map(|l| total_rows >= l).unwrap_or(false) {
-                        break 'outer;
-                    }
-                }
-                None => {
-                    break;
-                }
-                Some(Err(e)) => {
-                    let err_msg = format!(
-                        "Error reading batch from {}: {}",
-                        filename,
-                        e.to_string()
-                    );
-                    // send error to operator
-                    send_result(
-                        &response_tx,
-                        Err(ArrowError::ParquetError(err_msg.clone())),
-                    )?;
-                    // terminate thread with error
-                    return Err(DataFusionError::Execution(err_msg));
-                }
-            }
-        }
-    }
-
-    // finished reading files (dropping response_tx will close
-    // channel)
-    Ok(())
-}
-
-fn split_files(filenames: &[String], n: usize) -> Vec<&[String]> {
-    let mut chunk_size = filenames.len() / n;
-    if filenames.len() % n > 0 {
-        chunk_size += 1;
-    }
-    filenames.chunks(chunk_size).collect()
-}
-
-struct ParquetStream {
-    schema: SchemaRef,
-    inner: ReceiverStream<ArrowResult<RecordBatch>>,
-}
-
-impl Stream for ParquetStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.inner.poll_next_unpin(cx)
-    }
-}
-
-impl RecordBatchStream for ParquetStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::{Int32Array, StringArray};
-    use futures::StreamExt;
-    use parquet::basic::Type as PhysicalType;
-    use parquet::schema::types::SchemaDescPtr;
-
-    #[test]
-    fn test_split_files() {
-        let filenames = vec![
-            "a".to_string(),
-            "b".to_string(),
-            "c".to_string(),
-            "d".to_string(),
-            "e".to_string(),
-        ];
-
-        let chunks = split_files(&filenames, 1);
-        assert_eq!(1, chunks.len());
-        assert_eq!(5, chunks[0].len());
-
-        let chunks = split_files(&filenames, 2);
-        assert_eq!(2, chunks.len());
-        assert_eq!(3, chunks[0].len());
-        assert_eq!(2, chunks[1].len());
-
-        let chunks = split_files(&filenames, 5);
-        assert_eq!(5, chunks.len());
-        assert_eq!(1, chunks[0].len());
-        assert_eq!(1, chunks[1].len());
-        assert_eq!(1, chunks[2].len());
-        assert_eq!(1, chunks[3].len());
-        assert_eq!(1, chunks[4].len());
-
-        let chunks = split_files(&filenames, 123);
-        assert_eq!(5, chunks.len());
-        assert_eq!(1, chunks[0].len());
-        assert_eq!(1, chunks[1].len());
-        assert_eq!(1, chunks[2].len());
-        assert_eq!(1, chunks[3].len());
-        assert_eq!(1, chunks[4].len());
-    }
-
-    #[tokio::test]
-    async fn test() -> Result<()> {
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let filename = format!("{}/alltypes_plain.parquet", testdata);
-        let parquet_exec = ParquetExec::try_from_path(
-            &filename,
-            Some(vec![0, 1, 2]),
-            None,
-            1024,
-            4,
-            None,
-        )?;
-        assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
-
-        let mut results = parquet_exec.execute(0).await?;
-        let batch = results.next().await.unwrap()?;
-
-        assert_eq!(8, batch.num_rows());
-        assert_eq!(3, batch.num_columns());
-
-        let schema = batch.schema();
-        let field_names: Vec<&str> =
-            schema.fields().iter().map(|f| f.name().as_str()).collect();
-        assert_eq!(vec!["id", "bool_col", "tinyint_col"], field_names);
-
-        let batch = results.next().await;
-        assert!(batch.is_none());
-
-        let batch = results.next().await;
-        assert!(batch.is_none());
-
-        let batch = results.next().await;
-        assert!(batch.is_none());
-
-        Ok(())
-    }
-
-    #[test]
-    fn build_statistics_array_int32() {
-        // build row group metadata array
-        let s1 = ParquetStatistics::int32(None, Some(10), None, 0, false);
-        let s2 = ParquetStatistics::int32(Some(2), Some(20), None, 0, false);
-        let s3 = ParquetStatistics::int32(Some(3), Some(30), None, 0, false);
-        let statistics = vec![Some(&s1), Some(&s2), Some(&s3)];
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &DataType::Int32);
-        let int32_array = statistics_array
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        let int32_vec = int32_array.into_iter().collect::<Vec<_>>();
-        assert_eq!(int32_vec, vec![None, Some(2), Some(3)]);
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Max, &DataType::Int32);
-        let int32_array = statistics_array
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap();
-        let int32_vec = int32_array.into_iter().collect::<Vec<_>>();
-        // here the first max value is None and not the Some(10) value which was actually set
-        // because the min value is None
-        assert_eq!(int32_vec, vec![None, Some(20), Some(30)]);
-    }
-
-    #[test]
-    fn build_statistics_array_utf8() {
-        // build row group metadata array
-        let s1 = ParquetStatistics::byte_array(None, Some("10".into()), None, 0, false);
-        let s2 = ParquetStatistics::byte_array(
-            Some("2".into()),
-            Some("20".into()),
-            None,
-            0,
-            false,
-        );
-        let s3 = ParquetStatistics::byte_array(
-            Some("3".into()),
-            Some("30".into()),
-            None,
-            0,
-            false,
-        );
-        let statistics = vec![Some(&s1), Some(&s2), Some(&s3)];
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &DataType::Utf8);
-        let string_array = statistics_array
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let string_vec = string_array.into_iter().collect::<Vec<_>>();
-        assert_eq!(string_vec, vec![None, Some("2"), Some("3")]);
-
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Max, &DataType::Utf8);
-        let string_array = statistics_array
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let string_vec = string_array.into_iter().collect::<Vec<_>>();
-        // here the first max value is None and not the Some("10") value which was actually set
-        // because the min value is None
-        assert_eq!(string_vec, vec![None, Some("20"), Some("30")]);
-    }
-
-    #[test]
-    fn build_statistics_array_empty_stats() {
-        let data_type = DataType::Int32;
-        let statistics = vec![];
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &data_type);
-        assert_eq!(statistics_array.len(), 0);
-
-        let statistics = vec![None, None];
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &data_type);
-        assert_eq!(statistics_array.len(), statistics.len());
-        assert_eq!(statistics_array.data_type(), &data_type);
-        for i in 0..statistics_array.len() {
-            assert_eq!(statistics_array.is_null(i), true);
-            assert_eq!(statistics_array.is_valid(i), false);
-        }
-    }
-
-    #[test]
-    fn build_statistics_array_unsupported_type() {
-        // boolean is not currently a supported type for statistics
-        let s1 = ParquetStatistics::boolean(Some(false), Some(true), None, 0, false);
-        let s2 = ParquetStatistics::boolean(Some(false), Some(true), None, 0, false);
-        let statistics = vec![Some(&s1), Some(&s2)];
-        let data_type = DataType::Boolean;
-        let statistics_array =
-            build_statistics_array(&statistics, StatisticsType::Min, &data_type);
-        assert_eq!(statistics_array.len(), statistics.len());
-        assert_eq!(statistics_array.data_type(), &data_type);
-        for i in 0..statistics_array.len() {
-            assert_eq!(statistics_array.is_null(i), true);
-            assert_eq!(statistics_array.is_valid(i), false);
-        }
-    }
-
-    #[test]
-    fn row_group_predicate_eq() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_min LtEq Int32(1) And Int32(1) LtEq #c1_max";
-
-        // test column on the left
-        let expr = col("c1").eq(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        // test column on the right
-        let expr = lit(1).eq(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_gt() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_max Gt Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").gt(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        // test column on the right
-        let expr = lit(1).lt(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_gt_eq() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_max GtEq Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").gt_eq(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-        // test column on the right
-        let expr = lit(1).lt_eq(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_lt() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_min Lt Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").lt(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        // test column on the right
-        let expr = lit(1).gt(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_lt_eq() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let expected_expr = "#c1_min LtEq Int32(1)";
-
-        // test column on the left
-        let expr = col("c1").lt_eq(lit(1));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-        // test column on the right
-        let expr = lit(1).gt_eq(col("c1"));
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_and() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-            Field::new("c3", DataType::Int32, false),
-        ]);
-        // test AND operator joining supported c1 < 1 expression and unsupported c2 > c3 expression
-        let expr = col("c1").lt(lit(1)).and(col("c2").lt(col("c3")));
-        let expected_expr = "#c1_min Lt Int32(1) And Boolean(true)";
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_or() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]);
-        // test OR operator joining supported c1 < 1 expression and unsupported c2 % 2 expression
-        let expr = col("c1").lt(lit(1)).or(col("c2").modulus(lit(2)));
-        let expected_expr = "#c1_min Lt Int32(1) Or Boolean(true)";
-        let predicate_expr = build_predicate_expression(&expr, &schema, &mut vec![])?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_stat_column_req() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]);
-        let mut stat_column_req = vec![];
-        // c1 < 1 and (c2 = 2 or c2 = 3)
-        let expr = col("c1")
-            .lt(lit(1))
-            .and(col("c2").eq(lit(2)).or(col("c2").eq(lit(3))));
-        let expected_expr = "#c1_min Lt Int32(1) And #c2_min LtEq Int32(2) And Int32(2) LtEq #c2_max Or #c2_min LtEq Int32(3) And Int32(3) LtEq #c2_max";
-        let predicate_expr =
-            build_predicate_expression(&expr, &schema, &mut stat_column_req)?;
-        assert_eq!(format!("{:?}", predicate_expr), expected_expr);
-        // c1 < 1 should add c1_min
-        let c1_min_field = Field::new("c1_min", DataType::Int32, false);
-        assert_eq!(
-            stat_column_req[0],
-            ("c1".to_owned(), StatisticsType::Min, c1_min_field)
-        );
-        // c2 = 2 should add c2_min and c2_max
-        let c2_min_field = Field::new("c2_min", DataType::Int32, false);
-        assert_eq!(
-            stat_column_req[1],
-            ("c2".to_owned(), StatisticsType::Min, c2_min_field)
-        );
-        let c2_max_field = Field::new("c2_max", DataType::Int32, false);
-        assert_eq!(
-            stat_column_req[2],
-            ("c2".to_owned(), StatisticsType::Max, c2_max_field)
-        );
-        // c2 = 3 shouldn't add any new statistics fields
-        assert_eq!(stat_column_req.len(), 3);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_simple_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // int > 1 => c1_max > 1
-        let expr = col("c1").gt(lit(15));
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-
-        let schema_descr = get_test_schema_descr(vec![("c1", PhysicalType::INT32)]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(Some(1), Some(10), None, 0, false)],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(Some(11), Some(20), None, 0, false)],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        assert_eq!(row_group_filter, vec![false, true]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_missing_stats() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // int > 1 => c1_max > 1
-        let expr = col("c1").gt(lit(15));
-        let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-
-        let schema_descr = get_test_schema_descr(vec![("c1", PhysicalType::INT32)]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(None, None, None, 0, false)],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![ParquetStatistics::int32(Some(11), Some(20), None, 0, false)],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        // missing statistics for first row group mean that the result from the predicate expression
-        // is null / undefined so the first row group can't be filtered out
-        assert_eq!(row_group_filter, vec![true, true]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_partial_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // test row group predicate with partially supported expression
-        // int > 1 and int % 2 => c1_max > 1 and true
-        let expr = col("c1").gt(lit(15)).and(col("c2").modulus(lit(2)));
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema.clone())?;
-
-        let schema_descr = get_test_schema_descr(vec![
-            ("c1", PhysicalType::INT32),
-            ("c2", PhysicalType::INT32),
-        ]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(1), Some(10), None, 0, false),
-                ParquetStatistics::int32(Some(1), Some(10), None, 0, false),
-            ],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(11), Some(20), None, 0, false),
-                ParquetStatistics::int32(Some(11), Some(20), None, 0, false),
-            ],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        // the first row group is still filtered out because the predicate expression can be partially evaluated
-        // when conditions are joined using AND
-        assert_eq!(row_group_filter, vec![false, true]);
-
-        // if conditions in predicate are joined with OR and an unsupported expression is used
-        // this bypasses the entire predicate expression and no row groups are filtered out
-        let expr = col("c1").gt(lit(15)).or(col("c2").modulus(lit(2)));
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        assert_eq!(row_group_filter, vec![true, true]);
-
-        Ok(())
-    }
-
-    #[test]
-    fn row_group_predicate_builder_unsupported_type() -> Result<()> {
-        use crate::logical_plan::{col, lit};
-        // test row group predicate with unsupported statistics type (boolean)
-        // where a null array is generated for some statistics columns
-        // int > 1 and bool = true => c1_max > 1 and null
-        let expr = col("c1").gt(lit(15)).and(col("c2").eq(lit(true)));
-        let schema = Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Boolean, false),
-        ]);
-        let predicate_builder = RowGroupPredicateBuilder::try_new(&expr, schema)?;
-
-        let schema_descr = get_test_schema_descr(vec![
-            ("c1", PhysicalType::INT32),
-            ("c2", PhysicalType::BOOLEAN),
-        ]);
-        let rgm1 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(1), Some(10), None, 0, false),
-                ParquetStatistics::boolean(Some(false), Some(true), None, 0, false),
-            ],
-        );
-        let rgm2 = get_row_group_meta_data(
-            &schema_descr,
-            vec![
-                ParquetStatistics::int32(Some(11), Some(20), None, 0, false),
-                ParquetStatistics::boolean(Some(false), Some(true), None, 0, false),
-            ],
-        );
-        let row_group_metadata = vec![rgm1, rgm2];
-        let row_group_predicate =
-            predicate_builder.build_row_group_predicate(&row_group_metadata);
-        let row_group_filter = row_group_metadata
-            .iter()
-            .enumerate()
-            .map(|(i, g)| row_group_predicate(g, i))
-            .collect::<Vec<_>>();
-        // no row group is filtered out because the predicate expression can't be evaluated
-        // when a null array is generated for a statistics column,
-        // because the null values propagate to the end result, making the predicate result undefined
-        assert_eq!(row_group_filter, vec![true, true]);
-
-        Ok(())
-    }
-
-    fn get_row_group_meta_data(
-        schema_descr: &SchemaDescPtr,
-        column_statistics: Vec<ParquetStatistics>,
-    ) -> RowGroupMetaData {
-        use parquet::file::metadata::ColumnChunkMetaData;
-        let mut columns = vec![];
-        for (i, s) in column_statistics.iter().enumerate() {
-            let column = ColumnChunkMetaData::builder(schema_descr.column(i))
-                .set_statistics(s.clone())
-                .build()
-                .unwrap();
-            columns.push(column);
-        }
-        RowGroupMetaData::builder(schema_descr.clone())
-            .set_num_rows(1000)
-            .set_total_byte_size(2000)
-            .set_column_metadata(columns)
-            .build()
-            .unwrap()
-    }
-
-    fn get_test_schema_descr(fields: Vec<(&str, PhysicalType)>) -> SchemaDescPtr {
-        use parquet::schema::types::{SchemaDescriptor, Type as SchemaType};
-        let mut schema_fields = fields
-            .iter()
-            .map(|(n, t)| {
-                Arc::new(SchemaType::primitive_type_builder(n, *t).build().unwrap())
-            })
-            .collect::<Vec<_>>();
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut schema_fields)
-            .build()
-            .unwrap();
-
-        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/planner.rs b/rust/datafusion/src/physical_plan/planner.rs
deleted file mode 100644
index f9279ae48f0..00000000000
--- a/rust/datafusion/src/physical_plan/planner.rs
+++ /dev/null
@@ -1,1106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Physical query planner
-
-use std::sync::Arc;
-
-use super::{
-    aggregates, empty::EmptyExec, expressions::binary, functions,
-    hash_join::PartitionMode, udaf, union::UnionExec,
-};
-use crate::error::{DataFusionError, Result};
-use crate::execution::context::ExecutionContextState;
-use crate::logical_plan::{
-    DFSchema, Expr, LogicalPlan, Operator, Partitioning as LogicalPartitioning, PlanType,
-    StringifiedPlan, UserDefinedLogicalNode,
-};
-use crate::physical_plan::explain::ExplainExec;
-use crate::physical_plan::expressions;
-use crate::physical_plan::expressions::{CaseExpr, Column, Literal, PhysicalSortExpr};
-use crate::physical_plan::filter::FilterExec;
-use crate::physical_plan::hash_aggregate::{AggregateMode, HashAggregateExec};
-use crate::physical_plan::hash_join::HashJoinExec;
-use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use crate::physical_plan::projection::ProjectionExec;
-use crate::physical_plan::repartition::RepartitionExec;
-use crate::physical_plan::sort::SortExec;
-use crate::physical_plan::udf;
-use crate::physical_plan::{hash_utils, Partitioning};
-use crate::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr, PhysicalPlanner};
-use crate::prelude::JoinType;
-use crate::scalar::ScalarValue;
-use crate::variable::VarType;
-use arrow::compute::can_cast_types;
-
-use arrow::compute::SortOptions;
-use arrow::datatypes::{Schema, SchemaRef};
-use expressions::col;
-use log::debug;
-
-/// This trait exposes the ability to plan an [`ExecutionPlan`] out of a [`LogicalPlan`].
-pub trait ExtensionPlanner {
-    /// Create a physical plan for a [`UserDefinedLogicalNode`].
-    /// This errors when the planner knows how to plan the concrete implementation of `node`
-    /// but errors while doing so, and `None` when the planner does not know how to plan the `node`
-    /// and wants to delegate the planning to another [`ExtensionPlanner`].
-    fn plan_extension(
-        &self,
-        node: &dyn UserDefinedLogicalNode,
-        inputs: &[Arc<dyn ExecutionPlan>],
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Option<Arc<dyn ExecutionPlan>>>;
-}
-
-/// Default single node physical query planner that converts a
-/// `LogicalPlan` to an `ExecutionPlan` suitable for execution.
-pub struct DefaultPhysicalPlanner {
-    extension_planners: Vec<Arc<dyn ExtensionPlanner + Send + Sync>>,
-}
-
-impl Default for DefaultPhysicalPlanner {
-    fn default() -> Self {
-        Self {
-            extension_planners: vec![],
-        }
-    }
-}
-
-impl PhysicalPlanner for DefaultPhysicalPlanner {
-    /// Create a physical plan from a logical plan
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let plan = self.create_initial_plan(logical_plan, ctx_state)?;
-        self.optimize_plan(plan, ctx_state)
-    }
-}
-
-impl DefaultPhysicalPlanner {
-    /// Create a physical planner that uses `extension_planners` to
-    /// plan user-defined logical nodes [`LogicalPlan::Extension`].
-    /// The planner uses the first [`ExtensionPlanner`] to return a non-`None`
-    /// plan.
-    pub fn with_extension_planners(
-        extension_planners: Vec<Arc<dyn ExtensionPlanner + Send + Sync>>,
-    ) -> Self {
-        Self { extension_planners }
-    }
-
-    /// Optimize a physical plan
-    fn optimize_plan(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let optimizers = &ctx_state.config.physical_optimizers;
-        debug!("Physical plan:\n{:?}", plan);
-
-        let mut new_plan = plan;
-        for optimizer in optimizers {
-            new_plan = optimizer.optimize(new_plan, &ctx_state.config)?;
-        }
-        debug!("Optimized physical plan:\n{:?}", new_plan);
-        Ok(new_plan)
-    }
-
-    /// Create a physical plan from a logical plan
-    fn create_initial_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let batch_size = ctx_state.config.batch_size;
-
-        match logical_plan {
-            LogicalPlan::TableScan {
-                source,
-                projection,
-                filters,
-                limit,
-                ..
-            } => source.scan(projection, batch_size, filters, *limit),
-            LogicalPlan::Aggregate {
-                input,
-                group_expr,
-                aggr_expr,
-                ..
-            } => {
-                // Initially need to perform the aggregate and then merge the partitions
-                let input_exec = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input_exec.schema();
-                let physical_input_schema = input_exec.as_ref().schema();
-                let logical_input_schema = input.as_ref().schema();
-
-                let groups = group_expr
-                    .iter()
-                    .map(|e| {
-                        tuple_err((
-                            self.create_physical_expr(
-                                e,
-                                &physical_input_schema,
-                                ctx_state,
-                            ),
-                            e.name(&logical_input_schema),
-                        ))
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                let aggregates = aggr_expr
-                    .iter()
-                    .map(|e| {
-                        self.create_aggregate_expr(
-                            e,
-                            &logical_input_schema,
-                            &physical_input_schema,
-                            ctx_state,
-                        )
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                let initial_aggr = Arc::new(HashAggregateExec::try_new(
-                    AggregateMode::Partial,
-                    groups.clone(),
-                    aggregates.clone(),
-                    input_exec,
-                    input_schema.clone(),
-                )?);
-
-                let final_group: Vec<Arc<dyn PhysicalExpr>> =
-                    (0..groups.len()).map(|i| col(&groups[i].1)).collect();
-
-                // construct a second aggregation, keeping the final column name equal to the first aggregation
-                // and the expressions corresponding to the respective aggregate
-                Ok(Arc::new(HashAggregateExec::try_new(
-                    AggregateMode::Final,
-                    final_group
-                        .iter()
-                        .enumerate()
-                        .map(|(i, expr)| (expr.clone(), groups[i].1.clone()))
-                        .collect(),
-                    aggregates,
-                    initial_aggr,
-                    input_schema,
-                )?))
-            }
-            LogicalPlan::Projection { input, expr, .. } => {
-                let input_exec = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.as_ref().schema();
-                let runtime_expr = expr
-                    .iter()
-                    .map(|e| {
-                        tuple_err((
-                            self.create_physical_expr(
-                                e,
-                                &input_exec.schema(),
-                                &ctx_state,
-                            ),
-                            e.name(&input_schema),
-                        ))
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                Ok(Arc::new(ProjectionExec::try_new(runtime_expr, input_exec)?))
-            }
-            LogicalPlan::Filter {
-                input, predicate, ..
-            } => {
-                let input = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.as_ref().schema();
-                let runtime_expr =
-                    self.create_physical_expr(predicate, &input_schema, ctx_state)?;
-                Ok(Arc::new(FilterExec::try_new(runtime_expr, input)?))
-            }
-            LogicalPlan::Union { inputs, .. } => {
-                let physical_plans = inputs
-                    .iter()
-                    .map(|input| self.create_initial_plan(input, ctx_state))
-                    .collect::<Result<Vec<_>>>()?;
-                Ok(Arc::new(UnionExec::new(physical_plans)))
-            }
-            LogicalPlan::Repartition {
-                input,
-                partitioning_scheme,
-            } => {
-                let input = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.schema();
-                let physical_partitioning = match partitioning_scheme {
-                    LogicalPartitioning::RoundRobinBatch(n) => {
-                        Partitioning::RoundRobinBatch(*n)
-                    }
-                    LogicalPartitioning::Hash(expr, n) => {
-                        let runtime_expr = expr
-                            .iter()
-                            .map(|e| {
-                                self.create_physical_expr(e, &input_schema, &ctx_state)
-                            })
-                            .collect::<Result<Vec<_>>>()?;
-                        Partitioning::Hash(runtime_expr, *n)
-                    }
-                };
-                Ok(Arc::new(RepartitionExec::try_new(
-                    input,
-                    physical_partitioning,
-                )?))
-            }
-            LogicalPlan::Sort { expr, input, .. } => {
-                let input = self.create_initial_plan(input, ctx_state)?;
-                let input_schema = input.as_ref().schema();
-
-                let sort_expr = expr
-                    .iter()
-                    .map(|e| match e {
-                        Expr::Sort {
-                            expr,
-                            asc,
-                            nulls_first,
-                        } => self.create_physical_sort_expr(
-                            expr,
-                            &input_schema,
-                            SortOptions {
-                                descending: !*asc,
-                                nulls_first: *nulls_first,
-                            },
-                            ctx_state,
-                        ),
-                        _ => Err(DataFusionError::Plan(
-                            "Sort only accepts sort expressions".to_string(),
-                        )),
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                Ok(Arc::new(SortExec::try_new(sort_expr, input)?))
-            }
-            LogicalPlan::Join {
-                left,
-                right,
-                on: keys,
-                join_type,
-                ..
-            } => {
-                let left = self.create_initial_plan(left, ctx_state)?;
-                let right = self.create_initial_plan(right, ctx_state)?;
-                let physical_join_type = match join_type {
-                    JoinType::Inner => hash_utils::JoinType::Inner,
-                    JoinType::Left => hash_utils::JoinType::Left,
-                    JoinType::Right => hash_utils::JoinType::Right,
-                };
-                if ctx_state.config.concurrency > 1 && ctx_state.config.repartition_joins
-                {
-                    let left_expr = keys.iter().map(|x| col(&x.0)).collect();
-                    let right_expr = keys.iter().map(|x| col(&x.1)).collect();
-
-                    // Use hash partition by defualt to parallelize hash joins
-                    Ok(Arc::new(HashJoinExec::try_new(
-                        Arc::new(RepartitionExec::try_new(
-                            left,
-                            Partitioning::Hash(left_expr, ctx_state.config.concurrency),
-                        )?),
-                        Arc::new(RepartitionExec::try_new(
-                            right,
-                            Partitioning::Hash(right_expr, ctx_state.config.concurrency),
-                        )?),
-                        &keys,
-                        &physical_join_type,
-                        PartitionMode::Partitioned,
-                    )?))
-                } else {
-                    Ok(Arc::new(HashJoinExec::try_new(
-                        left,
-                        right,
-                        &keys,
-                        &physical_join_type,
-                        PartitionMode::CollectLeft,
-                    )?))
-                }
-            }
-            LogicalPlan::EmptyRelation {
-                produce_one_row,
-                schema,
-            } => Ok(Arc::new(EmptyExec::new(
-                *produce_one_row,
-                SchemaRef::new(schema.as_ref().to_owned().into()),
-            ))),
-            LogicalPlan::Limit { input, n, .. } => {
-                let limit = *n;
-                let input = self.create_initial_plan(input, ctx_state)?;
-
-                // GlobalLimitExec requires a single partition for input
-                let input = if input.output_partitioning().partition_count() == 1 {
-                    input
-                } else {
-                    // Apply a LocalLimitExec to each partition. The optimizer will also insert
-                    // a MergeExec between the GlobalLimitExec and LocalLimitExec
-                    Arc::new(LocalLimitExec::new(input, limit))
-                };
-
-                Ok(Arc::new(GlobalLimitExec::new(input, limit)))
-            }
-            LogicalPlan::CreateExternalTable { .. } => {
-                // There is no default plan for "CREATE EXTERNAL
-                // TABLE" -- it must be handled at a higher level (so
-                // that the appropriate table can be registered with
-                // the context)
-                Err(DataFusionError::Internal(
-                    "Unsupported logical plan: CreateExternalTable".to_string(),
-                ))
-            }
-            LogicalPlan::Explain {
-                verbose,
-                plan,
-                stringified_plans,
-                schema,
-            } => {
-                let input = self.create_initial_plan(plan, ctx_state)?;
-
-                let mut stringified_plans = stringified_plans
-                    .iter()
-                    .filter(|s| s.should_display(*verbose))
-                    .cloned()
-                    .collect::<Vec<_>>();
-
-                // add in the physical plan if requested
-                if *verbose {
-                    stringified_plans.push(StringifiedPlan::new(
-                        PlanType::PhysicalPlan,
-                        format!("{:#?}", input),
-                    ));
-                }
-                Ok(Arc::new(ExplainExec::new(
-                    SchemaRef::new(schema.as_ref().to_owned().into()),
-                    stringified_plans,
-                )))
-            }
-            LogicalPlan::Extension { node } => {
-                let inputs = node
-                    .inputs()
-                    .into_iter()
-                    .map(|input_plan| self.create_initial_plan(input_plan, ctx_state))
-                    .collect::<Result<Vec<_>>>()?;
-
-                let maybe_plan = self.extension_planners.iter().try_fold(
-                    None,
-                    |maybe_plan, planner| {
-                        if let Some(plan) = maybe_plan {
-                            Ok(Some(plan))
-                        } else {
-                            planner.plan_extension(node.as_ref(), &inputs, ctx_state)
-                        }
-                    },
-                )?;
-                let plan = maybe_plan.ok_or_else(|| DataFusionError::Plan(format!(
-                    "No installed planner was able to convert the custom node to an execution plan: {:?}", node
-                )))?;
-
-                // Ensure the ExecutionPlan's  schema matches the
-                // declared logical schema to catch and warn about
-                // logic errors when creating user defined plans.
-                if plan.schema() != node.schema().as_ref().to_owned().into() {
-                    Err(DataFusionError::Plan(format!(
-                        "Extension planner for {:?} created an ExecutionPlan with mismatched schema. \
-                         LogicalPlan schema: {:?}, ExecutionPlan schema: {:?}",
-                        node, node.schema(), plan.schema()
-                    )))
-                } else {
-                    Ok(plan)
-                }
-            }
-        }
-    }
-
-    /// Create a physical expression from a logical expression
-    pub fn create_physical_expr(
-        &self,
-        e: &Expr,
-        input_schema: &Schema,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn PhysicalExpr>> {
-        match e {
-            Expr::Alias(expr, ..) => {
-                Ok(self.create_physical_expr(expr, input_schema, ctx_state)?)
-            }
-            Expr::Column(name) => {
-                // check that name exists
-                input_schema.field_with_name(&name)?;
-                Ok(Arc::new(Column::new(name)))
-            }
-            Expr::Literal(value) => Ok(Arc::new(Literal::new(value.clone()))),
-            Expr::ScalarVariable(variable_names) => {
-                if &variable_names[0][0..2] == "@@" {
-                    match ctx_state.var_provider.get(&VarType::System) {
-                        Some(provider) => {
-                            let scalar_value =
-                                provider.get_value(variable_names.clone())?;
-                            Ok(Arc::new(Literal::new(scalar_value)))
-                        }
-                        _ => Err(DataFusionError::Plan(
-                            "No system variable provider found".to_string(),
-                        )),
-                    }
-                } else {
-                    match ctx_state.var_provider.get(&VarType::UserDefined) {
-                        Some(provider) => {
-                            let scalar_value =
-                                provider.get_value(variable_names.clone())?;
-                            Ok(Arc::new(Literal::new(scalar_value)))
-                        }
-                        _ => Err(DataFusionError::Plan(
-                            "No user defined variable provider found".to_string(),
-                        )),
-                    }
-                }
-            }
-            Expr::BinaryExpr { left, op, right } => {
-                let lhs = self.create_physical_expr(left, input_schema, ctx_state)?;
-                let rhs = self.create_physical_expr(right, input_schema, ctx_state)?;
-                binary(lhs, *op, rhs, input_schema)
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-                ..
-            } => {
-                let expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = expr {
-                    Some(self.create_physical_expr(
-                        e.as_ref(),
-                        input_schema,
-                        ctx_state,
-                    )?)
-                } else {
-                    None
-                };
-                let when_expr = when_then_expr
-                    .iter()
-                    .map(|(w, _)| {
-                        self.create_physical_expr(w.as_ref(), input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                let then_expr = when_then_expr
-                    .iter()
-                    .map(|(_, t)| {
-                        self.create_physical_expr(t.as_ref(), input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                let when_then_expr: Vec<(Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>)> =
-                    when_expr
-                        .iter()
-                        .zip(then_expr.iter())
-                        .map(|(w, t)| (w.clone(), t.clone()))
-                        .collect();
-                let else_expr: Option<Arc<dyn PhysicalExpr>> = if let Some(e) = else_expr
-                {
-                    Some(self.create_physical_expr(
-                        e.as_ref(),
-                        input_schema,
-                        ctx_state,
-                    )?)
-                } else {
-                    None
-                };
-                Ok(Arc::new(CaseExpr::try_new(
-                    expr,
-                    &when_then_expr,
-                    else_expr,
-                )?))
-            }
-            Expr::Cast { expr, data_type } => expressions::cast(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-                data_type.clone(),
-            ),
-            Expr::TryCast { expr, data_type } => expressions::try_cast(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-                data_type.clone(),
-            ),
-            Expr::Not(expr) => expressions::not(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-            ),
-            Expr::Negative(expr) => expressions::negative(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-                input_schema,
-            ),
-            Expr::IsNull(expr) => expressions::is_null(self.create_physical_expr(
-                expr,
-                input_schema,
-                ctx_state,
-            )?),
-            Expr::IsNotNull(expr) => expressions::is_not_null(
-                self.create_physical_expr(expr, input_schema, ctx_state)?,
-            ),
-            Expr::ScalarFunction { fun, args } => {
-                let physical_args = args
-                    .iter()
-                    .map(|e| self.create_physical_expr(e, input_schema, ctx_state))
-                    .collect::<Result<Vec<_>>>()?;
-                functions::create_physical_expr(fun, &physical_args, input_schema)
-            }
-            Expr::ScalarUDF { fun, args } => {
-                let mut physical_args = vec![];
-                for e in args {
-                    physical_args.push(self.create_physical_expr(
-                        e,
-                        input_schema,
-                        ctx_state,
-                    )?);
-                }
-
-                udf::create_physical_expr(
-                    fun.clone().as_ref(),
-                    &physical_args,
-                    input_schema,
-                )
-            }
-            Expr::Between {
-                expr,
-                negated,
-                low,
-                high,
-            } => {
-                let value_expr =
-                    self.create_physical_expr(expr, input_schema, ctx_state)?;
-                let low_expr = self.create_physical_expr(low, input_schema, ctx_state)?;
-                let high_expr =
-                    self.create_physical_expr(high, input_schema, ctx_state)?;
-
-                // rewrite the between into the two binary operators
-                let binary_expr = binary(
-                    binary(value_expr.clone(), Operator::GtEq, low_expr, input_schema)?,
-                    Operator::And,
-                    binary(value_expr.clone(), Operator::LtEq, high_expr, input_schema)?,
-                    input_schema,
-                );
-
-                if *negated {
-                    expressions::not(binary_expr?, input_schema)
-                } else {
-                    binary_expr
-                }
-            }
-            Expr::InList {
-                expr,
-                list,
-                negated,
-            } => match expr.as_ref() {
-                Expr::Literal(ScalarValue::Utf8(None)) => {
-                    Ok(expressions::lit(ScalarValue::Boolean(None)))
-                }
-                _ => {
-                    let value_expr =
-                        self.create_physical_expr(expr, input_schema, ctx_state)?;
-                    let value_expr_data_type = value_expr.data_type(input_schema)?;
-
-                    let list_exprs =
-                        list.iter()
-                            .map(|expr| match expr {
-                                Expr::Literal(ScalarValue::Utf8(None)) => self
-                                    .create_physical_expr(expr, input_schema, ctx_state),
-                                _ => {
-                                    let list_expr = self.create_physical_expr(
-                                        expr,
-                                        input_schema,
-                                        ctx_state,
-                                    )?;
-                                    let list_expr_data_type =
-                                        list_expr.data_type(input_schema)?;
-
-                                    if list_expr_data_type == value_expr_data_type {
-                                        Ok(list_expr)
-                                    } else if can_cast_types(
-                                        &list_expr_data_type,
-                                        &value_expr_data_type,
-                                    ) {
-                                        expressions::cast(
-                                            list_expr,
-                                            input_schema,
-                                            value_expr.data_type(input_schema)?,
-                                        )
-                                    } else {
-                                        Err(DataFusionError::Plan(format!(
-                                            "Unsupported CAST from {:?} to {:?}",
-                                            list_expr_data_type, value_expr_data_type
-                                        )))
-                                    }
-                                }
-                            })
-                            .collect::<Result<Vec<_>>>()?;
-
-                    expressions::in_list(value_expr, list_exprs, negated)
-                }
-            },
-            other => Err(DataFusionError::NotImplemented(format!(
-                "Physical plan does not support logical expression {:?}",
-                other
-            ))),
-        }
-    }
-
-    /// Create an aggregate expression from a logical expression
-    pub fn create_aggregate_expr(
-        &self,
-        e: &Expr,
-        logical_input_schema: &DFSchema,
-        physical_input_schema: &Schema,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn AggregateExpr>> {
-        // unpack aliased logical expressions, e.g. "sum(col) as total"
-        let (name, e) = match e {
-            Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
-            _ => (e.name(logical_input_schema)?, e),
-        };
-
-        match e {
-            Expr::AggregateFunction {
-                fun,
-                distinct,
-                args,
-                ..
-            } => {
-                let args = args
-                    .iter()
-                    .map(|e| {
-                        self.create_physical_expr(e, physical_input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                aggregates::create_aggregate_expr(
-                    fun,
-                    *distinct,
-                    &args,
-                    physical_input_schema,
-                    name,
-                )
-            }
-            Expr::AggregateUDF { fun, args, .. } => {
-                let args = args
-                    .iter()
-                    .map(|e| {
-                        self.create_physical_expr(e, physical_input_schema, ctx_state)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-
-                udaf::create_aggregate_expr(fun, &args, physical_input_schema, name)
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Invalid aggregate expression '{:?}'",
-                other
-            ))),
-        }
-    }
-
-    /// Create an aggregate expression from a logical expression
-    pub fn create_physical_sort_expr(
-        &self,
-        e: &Expr,
-        input_schema: &Schema,
-        options: SortOptions,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<PhysicalSortExpr> {
-        Ok(PhysicalSortExpr {
-            expr: self.create_physical_expr(e, input_schema, ctx_state)?,
-            options,
-        })
-    }
-}
-
-fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
-    match value {
-        (Ok(e), Ok(e1)) => Ok((e, e1)),
-        (Err(e), Ok(_)) => Err(e),
-        (Ok(_), Err(e1)) => Err(e1),
-        (Err(e), Err(_)) => Err(e),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::{csv::CsvReadOptions, expressions, Partitioning};
-    use crate::prelude::ExecutionConfig;
-    use crate::scalar::ScalarValue;
-    use crate::{
-        catalog::catalog::MemoryCatalogList,
-        logical_plan::{DFField, DFSchema, DFSchemaRef},
-    };
-    use crate::{
-        logical_plan::{col, lit, sum, LogicalPlanBuilder},
-        physical_plan::SendableRecordBatchStream,
-    };
-    use arrow::datatypes::{DataType, Field, SchemaRef};
-    use async_trait::async_trait;
-    use fmt::Debug;
-    use std::{any::Any, collections::HashMap, fmt};
-
-    fn make_ctx_state() -> ExecutionContextState {
-        ExecutionContextState {
-            catalog_list: Arc::new(MemoryCatalogList::new()),
-            scalar_functions: HashMap::new(),
-            var_provider: HashMap::new(),
-            aggregate_functions: HashMap::new(),
-            config: ExecutionConfig::new(),
-        }
-    }
-
-    fn plan(logical_plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
-        let ctx_state = make_ctx_state();
-        let planner = DefaultPhysicalPlanner::default();
-        planner.create_physical_plan(logical_plan, &ctx_state)
-    }
-
-    #[test]
-    fn test_all_operators() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            // filter clause needs the type coercion rule applied
-            .filter(col("c7").lt(lit(5_u8)))?
-            .project(vec![col("c1"), col("c2")])?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .sort(vec![col("c1").sort(true, true)])?
-            .limit(10)?
-            .build()?;
-
-        let plan = plan(&logical_plan)?;
-
-        // verify that the plan correctly casts u8 to i64
-        // the cast here is implicit so has CastOptions with safe=true
-        let expected = "BinaryExpr { left: Column { name: \"c7\" }, op: Lt, right: TryCastExpr { expr: Literal { value: UInt8(5) }, cast_type: Int64 } }";
-        assert!(format!("{:?}", plan).contains(expected));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_create_not() -> Result<()> {
-        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
-
-        let planner = DefaultPhysicalPlanner::default();
-
-        let expr =
-            planner.create_physical_expr(&col("a").not(), &schema, &make_ctx_state())?;
-        let expected = expressions::not(expressions::col("a"), &schema)?;
-
-        assert_eq!(format!("{:?}", expr), format!("{:?}", expected));
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_with_csv_plan() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            .filter(col("c7").lt(col("c12")))?
-            .build()?;
-
-        let plan = plan(&logical_plan)?;
-
-        // c12 is f64, c7 is u8 -> cast c7 to f64
-        // the cast here is implicit so has CastOptions with safe=true
-        let expected = "predicate: BinaryExpr { left: TryCastExpr { expr: Column { name: \"c7\" }, cast_type: Float64 }, op: Lt, right: Column { name: \"c12\" } }";
-        assert!(format!("{:?}", plan).contains(expected));
-        Ok(())
-    }
-
-    #[test]
-    fn errors() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-
-        let bool_expr = col("c1").eq(col("c1"));
-        let cases = vec![
-            // utf8 < u32
-            col("c1").lt(col("c2")),
-            // utf8 AND utf8
-            col("c1").and(col("c1")),
-            // u8 AND u8
-            col("c3").and(col("c3")),
-            // utf8 = u32
-            col("c1").eq(col("c2")),
-            // utf8 = bool
-            col("c1").eq(bool_expr.clone()),
-            // u32 AND bool
-            col("c2").and(bool_expr),
-            // utf8 LIKE u32
-            col("c1").like(col("c2")),
-        ];
-        for case in cases {
-            let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-                .project(vec![case.clone()]);
-            let message = format!(
-                "Expression {:?} expected to error due to impossible coercion",
-                case
-            );
-            assert!(logical_plan.is_err(), "{}", message);
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn default_extension_planner() {
-        let ctx_state = make_ctx_state();
-        let planner = DefaultPhysicalPlanner::default();
-        let logical_plan = LogicalPlan::Extension {
-            node: Arc::new(NoOpExtensionNode::default()),
-        };
-        let plan = planner.create_physical_plan(&logical_plan, &ctx_state);
-
-        let expected_error =
-            "No installed planner was able to convert the custom node to an execution plan: NoOp";
-        match plan {
-            Ok(_) => panic!("Expected planning failure"),
-            Err(e) => assert!(
-                e.to_string().contains(expected_error),
-                "Error '{}' did not contain expected error '{}'",
-                e.to_string(),
-                expected_error
-            ),
-        }
-    }
-
-    #[test]
-    fn bad_extension_planner() {
-        // Test that creating an execution plan whose schema doesn't
-        // match the logical plan's schema generates an error.
-        let ctx_state = make_ctx_state();
-        let planner = DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(
-            BadExtensionPlanner {},
-        )]);
-
-        let logical_plan = LogicalPlan::Extension {
-            node: Arc::new(NoOpExtensionNode::default()),
-        };
-        let plan = planner.create_physical_plan(&logical_plan, &ctx_state);
-
-        let expected_error: &str = "Error during planning: \
-        Extension planner for NoOp created an ExecutionPlan with mismatched schema. \
-        LogicalPlan schema: DFSchema { fields: [\
-            DFField { qualifier: None, field: Field { \
-                name: \"a\", \
-                data_type: Int32, \
-                nullable: false, \
-                dict_id: 0, \
-                dict_is_ordered: false, \
-                metadata: None } }\
-        ] }, \
-        ExecutionPlan schema: Schema { fields: [\
-            Field { \
-                name: \"b\", \
-                data_type: Int32, \
-                nullable: false, \
-                dict_id: 0, \
-                dict_is_ordered: false, \
-                metadata: None }\
-        ], metadata: {} }";
-        match plan {
-            Ok(_) => panic!("Expected planning failure"),
-            Err(e) => assert!(
-                e.to_string().contains(expected_error),
-                "Error '{}' did not contain expected error '{}'",
-                e.to_string(),
-                expected_error
-            ),
-        }
-    }
-
-    #[test]
-    fn in_list_types() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-
-        // expression: "a in ('a', 1)"
-        let list = vec![
-            Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
-            Expr::Literal(ScalarValue::Int64(Some(1))),
-        ];
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            // filter clause needs the type coercion rule applied
-            .filter(col("c12").lt(lit(0.05)))?
-            .project(vec![col("c1").in_list(list, false)])?
-            .build()?;
-        let execution_plan = plan(&logical_plan)?;
-        // verify that the plan correctly adds cast from Int64(1) to Utf8
-        let expected = "InListExpr { expr: Column { name: \"c1\" }, list: [Literal { value: Utf8(\"a\") }, CastExpr { expr: Literal { value: Int64(1) }, cast_type: Utf8, cast_options: CastOptions { safe: false } }], negated: false }";
-        println!("{:?}", execution_plan);
-        assert!(format!("{:?}", execution_plan).contains(expected));
-
-        // expression: "a in (true, 'a')"
-        let list = vec![
-            Expr::Literal(ScalarValue::Boolean(Some(true))),
-            Expr::Literal(ScalarValue::Utf8(Some("a".to_string()))),
-        ];
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            // filter clause needs the type coercion rule applied
-            .filter(col("c12").lt(lit(0.05)))?
-            .project(vec![col("c12").lt_eq(lit(0.025)).in_list(list, false)])?
-            .build()?;
-        let execution_plan = plan(&logical_plan);
-
-        let expected_error = "Unsupported CAST from Utf8 to Boolean";
-        match execution_plan {
-            Ok(_) => panic!("Expected planning failure"),
-            Err(e) => assert!(
-                e.to_string().contains(expected_error),
-                "Error '{}' did not contain expected error '{}'",
-                e.to_string(),
-                expected_error
-            ),
-        }
-
-        Ok(())
-    }
-
-    #[test]
-    fn hash_agg_input_schema() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
-        let path = format!("{}/csv/aggregate_test_100.csv", testdata);
-
-        let options = CsvReadOptions::new().schema_infer_max_records(100);
-        let logical_plan = LogicalPlanBuilder::scan_csv(&path, options, None)?
-            .aggregate(vec![col("c1")], vec![sum(col("c2"))])?
-            .build()?;
-
-        let execution_plan = plan(&logical_plan)?;
-        let final_hash_agg = execution_plan
-            .as_any()
-            .downcast_ref::<HashAggregateExec>()
-            .expect("hash aggregate");
-        assert_eq!("SUM(c2)", final_hash_agg.schema().field(1).name());
-        // we need access to the input to the partial aggregate so that other projects can
-        // implement serde
-        assert_eq!("c2", final_hash_agg.input_schema().field(1).name());
-
-        Ok(())
-    }
-
-    /// An example extension node that doesn't do anything
-    struct NoOpExtensionNode {
-        schema: DFSchemaRef,
-    }
-
-    impl Default for NoOpExtensionNode {
-        fn default() -> Self {
-            Self {
-                schema: DFSchemaRef::new(
-                    DFSchema::new(vec![DFField::new(None, "a", DataType::Int32, false)])
-                        .unwrap(),
-                ),
-            }
-        }
-    }
-
-    impl Debug for NoOpExtensionNode {
-        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "NoOp")
-        }
-    }
-
-    impl UserDefinedLogicalNode for NoOpExtensionNode {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn inputs(&self) -> Vec<&LogicalPlan> {
-            vec![]
-        }
-
-        fn schema(&self) -> &DFSchemaRef {
-            &self.schema
-        }
-
-        fn expressions(&self) -> Vec<Expr> {
-            vec![]
-        }
-
-        fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
-            write!(f, "NoOp")
-        }
-
-        fn from_template(
-            &self,
-            _exprs: &[Expr],
-            _inputs: &[LogicalPlan],
-        ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync> {
-            unimplemented!("NoOp");
-        }
-    }
-
-    #[derive(Debug)]
-    struct NoOpExecutionPlan {
-        schema: SchemaRef,
-    }
-
-    #[async_trait]
-    impl ExecutionPlan for NoOpExecutionPlan {
-        /// Return a reference to Any that can be used for downcasting
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn schema(&self) -> SchemaRef {
-            self.schema.clone()
-        }
-
-        fn output_partitioning(&self) -> Partitioning {
-            Partitioning::UnknownPartitioning(1)
-        }
-
-        fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-            vec![]
-        }
-
-        fn with_new_children(
-            &self,
-            _children: Vec<Arc<dyn ExecutionPlan>>,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!("NoOpExecutionPlan::with_new_children");
-        }
-
-        async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
-            unimplemented!("NoOpExecutionPlan::execute");
-        }
-    }
-
-    //  Produces an execution plan where the schema is mismatched from
-    //  the logical plan node.
-    struct BadExtensionPlanner {}
-
-    impl ExtensionPlanner for BadExtensionPlanner {
-        /// Create a physical plan for an extension node
-        fn plan_extension(
-            &self,
-            _node: &dyn UserDefinedLogicalNode,
-            _inputs: &[Arc<dyn ExecutionPlan>],
-            _ctx_state: &ExecutionContextState,
-        ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-            Ok(Some(Arc::new(NoOpExecutionPlan {
-                schema: SchemaRef::new(Schema::new(vec![Field::new(
-                    "b",
-                    DataType::Int32,
-                    false,
-                )])),
-            })))
-        }
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/projection.rs b/rust/datafusion/src/physical_plan/projection.rs
deleted file mode 100644
index a881beb453a..00000000000
--- a/rust/datafusion/src/physical_plan/projection.rs
+++ /dev/null
@@ -1,232 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the projection execution plan. A projection determines which columns or expressions
-//! are returned from a query. The SQL statement `SELECT a, b, a+b FROM t1` is an example
-//! of a projection on table `t1` where the expressions `a`, `b`, and `a+b` are the
-//! projection expressions. `SELECT` without `FROM` will only evaluate expressions.
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ExecutionPlan, Partitioning, PhysicalExpr};
-use arrow::datatypes::{Field, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use async_trait::async_trait;
-
-use futures::stream::Stream;
-use futures::stream::StreamExt;
-
-/// Execution plan for a projection
-#[derive(Debug)]
-pub struct ProjectionExec {
-    /// The projection expressions stored as tuples of (expression, output column name)
-    expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-    /// The schema once the projection has been applied to the input
-    schema: SchemaRef,
-    /// The input plan
-    input: Arc<dyn ExecutionPlan>,
-}
-
-impl ProjectionExec {
-    /// Create a projection on an input
-    pub fn try_new(
-        expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        let input_schema = input.schema();
-
-        let fields: Result<Vec<_>> = expr
-            .iter()
-            .map(|(e, name)| {
-                Ok(Field::new(
-                    name,
-                    e.data_type(&input_schema)?,
-                    e.nullable(&input_schema)?,
-                ))
-            })
-            .collect();
-
-        let schema = Arc::new(Schema::new(fields?));
-
-        Ok(Self {
-            expr,
-            schema,
-            input: input.clone(),
-        })
-    }
-
-    /// The projection expressions stored as tuples of (expression, output column name)
-    pub fn expr(&self) -> &[(Arc<dyn PhysicalExpr>, String)] {
-        &self.expr
-    }
-
-    /// The input plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for ProjectionExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(ProjectionExec::try_new(
-                self.expr.clone(),
-                children[0].clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "ProjectionExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(ProjectionStream {
-            schema: self.schema.clone(),
-            expr: self.expr.iter().map(|x| x.0.clone()).collect(),
-            input: self.input.execute(partition).await?,
-        }))
-    }
-}
-
-fn batch_project(
-    batch: &RecordBatch,
-    expressions: &[Arc<dyn PhysicalExpr>],
-    schema: &SchemaRef,
-) -> ArrowResult<RecordBatch> {
-    expressions
-        .iter()
-        .map(|expr| expr.evaluate(&batch))
-        .map(|r| r.map(|v| v.into_array(batch.num_rows())))
-        .collect::<Result<Vec<_>>>()
-        .map_or_else(
-            |e| Err(DataFusionError::into_arrow_external_error(e)),
-            |arrays| RecordBatch::try_new(schema.clone(), arrays),
-        )
-}
-
-/// Projection iterator
-struct ProjectionStream {
-    schema: SchemaRef,
-    expr: Vec<Arc<dyn PhysicalExpr>>,
-    input: SendableRecordBatchStream,
-}
-
-impl Stream for ProjectionStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        self.input.poll_next_unpin(cx).map(|x| match x {
-            Some(Ok(batch)) => Some(batch_project(&batch, &self.expr, &self.schema)),
-            other => other,
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // same number of record batches
-        self.input.size_hint()
-    }
-}
-
-impl RecordBatchStream for ProjectionStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_plan::csv::{CsvExec, CsvReadOptions};
-    use crate::physical_plan::expressions::col;
-    use crate::test;
-    use futures::future;
-
-    #[tokio::test]
-    async fn project_first_column() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        let partitions = 4;
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        // pick column c1 and name it column c1 in the output schema
-        let projection =
-            ProjectionExec::try_new(vec![(col("c1"), "c1".to_string())], Arc::new(csv))?;
-
-        let mut partition_count = 0;
-        let mut row_count = 0;
-        for partition in 0..projection.output_partitioning().partition_count() {
-            partition_count += 1;
-            let stream = projection.execute(partition).await?;
-
-            row_count += stream
-                .map(|batch| {
-                    let batch = batch.unwrap();
-                    assert_eq!(1, batch.num_columns());
-                    batch.num_rows()
-                })
-                .fold(0, |acc, x| future::ready(acc + x))
-                .await;
-        }
-        assert_eq!(partitions, partition_count);
-        assert_eq!(100, row_count);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/regex_expressions.rs b/rust/datafusion/src/physical_plan/regex_expressions.rs
deleted file mode 100644
index b526e7259ef..00000000000
--- a/rust/datafusion/src/physical_plan/regex_expressions.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! Regex expressions
-
-use std::any::type_name;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use arrow::array::{ArrayRef, GenericStringArray, StringOffsetSizeTrait};
-use arrow::compute;
-use hashbrown::HashMap;
-use regex::Regex;
-
-macro_rules! downcast_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<GenericStringArray<T>>()
-                ))
-            })?
-    }};
-}
-
-/// extract a specific group from a string column, using a regular expression
-pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T), None)
-        .map_err(DataFusionError::ArrowError),
-        3 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T),  Some(downcast_string_arg!(args[1], "flags", T)))
-        .map_err(DataFusionError::ArrowError),
-        other => Err(DataFusionError::Internal(format!(
-            "regexp_match was called with {} arguments. It requires at least 2 and at most 3.",
-            other
-        ))),
-    }
-}
-
-/// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
-/// used by regexp_replace
-fn regex_replace_posix_groups(replacement: &str) -> String {
-    lazy_static! {
-        static ref CAPTURE_GROUPS_RE: Regex = Regex::new("(\\\\)(\\d*)").unwrap();
-    }
-    CAPTURE_GROUPS_RE
-        .replace_all(replacement, "$${$2}")
-        .into_owned()
-}
-
-/// Replaces substring(s) matching a POSIX regular expression.
-///
-/// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'`
-pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    // creating Regex is expensive so create hashmap for memoization
-    let mut patterns: HashMap<String, Regex> = HashMap::new();
-
-    match args.len() {
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let pattern_array = downcast_string_arg!(args[1], "pattern", T);
-            let replacement_array = downcast_string_arg!(args[2], "replacement", T);
-
-            let result = string_array
-            .iter()
-            .zip(pattern_array.iter())
-            .zip(replacement_array.iter())
-            .map(|((string, pattern), replacement)| match (string, pattern, replacement) {
-                (Some(string), Some(pattern), Some(replacement)) => {
-                    let replacement = regex_replace_posix_groups(replacement);
-
-                    // if patterns hashmap already has regexp then use else else create and return
-                    let re = match patterns.get(pattern) {
-                        Some(re) => Ok(re.clone()),
-                        None => {
-                            match Regex::new(pattern) {
-                                Ok(re) => {
-                                    patterns.insert(pattern.to_string(), re.clone());
-                                    Ok(re)
-                                },
-                                Err(err) => Err(DataFusionError::Execution(err.to_string())),
-                            }
-                        }
-                    };
-
-                    Some(re.map(|re| re.replace(string, replacement.as_str()))).transpose()
-                }
-            _ => Ok(None)
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        4 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let pattern_array = downcast_string_arg!(args[1], "pattern", T);
-            let replacement_array = downcast_string_arg!(args[2], "replacement", T);
-            let flags_array = downcast_string_arg!(args[3], "flags", T);
-
-            let result = string_array
-            .iter()
-            .zip(pattern_array.iter())
-            .zip(replacement_array.iter())
-            .zip(flags_array.iter())
-            .map(|(((string, pattern), replacement), flags)| match (string, pattern, replacement, flags) {
-                (Some(string), Some(pattern), Some(replacement), Some(flags)) => {
-                    let replacement = regex_replace_posix_groups(replacement);
-
-                    // format flags into rust pattern
-                    let (pattern, replace_all) = if flags == "g" {
-                        (pattern.to_string(), true)
-                    } else if flags.contains('g') {
-                        (format!("(?{}){}", flags.to_string().replace("g", ""), pattern), true)
-                    } else {
-                        (format!("(?{}){}", flags, pattern), false)
-                    };
-
-                    // if patterns hashmap already has regexp then use else else create and return
-                    let re = match patterns.get(&pattern) {
-                        Some(re) => Ok(re.clone()),
-                        None => {
-                            match Regex::new(pattern.as_str()) {
-                                Ok(re) => {
-                                    patterns.insert(pattern, re.clone());
-                                    Ok(re)
-                                },
-                                Err(err) => Err(DataFusionError::Execution(err.to_string())),
-                            }
-                        }
-                    };
-
-                    Some(re.map(|re| {
-                        if replace_all {
-                            re.replace_all(string, replacement.as_str())
-                        } else {
-                            re.replace(string, replacement.as_str())
-                        }
-                    })).transpose()
-                }
-            _ => Ok(None)
-            })
-            .collect::<Result<GenericStringArray<T>>>()?;
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "regexp_replace was called with {} arguments. It requires at least 3 and at most 4.",
-            other
-        ))),
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/repartition.rs b/rust/datafusion/src/physical_plan/repartition.rs
deleted file mode 100644
index 7243550127b..00000000000
--- a/rust/datafusion/src/physical_plan/repartition.rs
+++ /dev/null
@@ -1,461 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! The repartition operator maps N input partitions to M output partitions based on a
-//! partitioning scheme.
-
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-use std::{any::Any, collections::HashMap, vec};
-
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::{ExecutionPlan, Partitioning};
-use arrow::record_batch::RecordBatch;
-use arrow::{array::Array, error::Result as ArrowResult};
-use arrow::{compute::take, datatypes::SchemaRef};
-use tokio_stream::wrappers::UnboundedReceiverStream;
-
-use super::{hash_join::create_hashes, RecordBatchStream, SendableRecordBatchStream};
-use async_trait::async_trait;
-
-use futures::stream::Stream;
-use futures::StreamExt;
-use tokio::sync::{
-    mpsc::{UnboundedReceiver, UnboundedSender},
-    Mutex,
-};
-use tokio::task::JoinHandle;
-
-type MaybeBatch = Option<ArrowResult<RecordBatch>>;
-
-/// The repartition operator maps N input partitions to M output partitions based on a
-/// partitioning scheme. No guarantees are made about the order of the resulting partitions.
-#[derive(Debug)]
-pub struct RepartitionExec {
-    /// Input execution plan
-    input: Arc<dyn ExecutionPlan>,
-    /// Partitioning scheme to use
-    partitioning: Partitioning,
-    /// Channels for sending batches from input partitions to output partitions.
-    /// Key is the partition number
-    channels: Arc<
-        Mutex<
-            HashMap<usize, (UnboundedSender<MaybeBatch>, UnboundedReceiver<MaybeBatch>)>,
-        >,
-    >,
-}
-
-impl RepartitionExec {
-    /// Input execution plan
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Partitioning scheme to use
-    pub fn partitioning(&self) -> &Partitioning {
-        &self.partitioning
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for RepartitionExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Get the schema for this execution plan
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(RepartitionExec::try_new(
-                children[0].clone(),
-                self.partitioning.clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "RepartitionExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        self.partitioning.clone()
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        // lock mutexes
-        let mut channels = self.channels.lock().await;
-
-        let num_input_partitions = self.input.output_partitioning().partition_count();
-        let num_output_partitions = self.partitioning.partition_count();
-
-        // if this is the first partition to be invoked then we need to set up initial state
-        if channels.is_empty() {
-            // create one channel per *output* partition
-            for partition in 0..num_output_partitions {
-                // Note that this operator uses unbounded channels to avoid deadlocks because
-                // the output partitions can be read in any order and this could cause input
-                // partitions to be blocked when sending data to output UnboundedReceivers that are not
-                // being read yet. This may cause high memory usage if the next operator is
-                // reading output partitions in order rather than concurrently. One workaround
-                // for this would be to add spill-to-disk capabilities.
-                let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<
-                    Option<ArrowResult<RecordBatch>>,
-                >();
-                channels.insert(partition, (sender, receiver));
-            }
-            // Use fixed random state
-            let random = ahash::RandomState::with_seeds(0, 0, 0, 0);
-
-            // launch one async task per *input* partition
-            for i in 0..num_input_partitions {
-                let random_state = random.clone();
-                let input = self.input.clone();
-                let mut txs: HashMap<_, _> = channels
-                    .iter()
-                    .map(|(partition, (tx, _rx))| (*partition, tx.clone()))
-                    .collect();
-                let partitioning = self.partitioning.clone();
-                let _: JoinHandle<Result<()>> = tokio::spawn(async move {
-                    let mut stream = input.execute(i).await?;
-                    let mut counter = 0;
-                    let hashes_buf = &mut vec![];
-
-                    while let Some(result) = stream.next().await {
-                        match &partitioning {
-                            Partitioning::RoundRobinBatch(_) => {
-                                let output_partition = counter % num_output_partitions;
-                                let tx = txs.get_mut(&output_partition).unwrap();
-                                tx.send(Some(result)).map_err(|e| {
-                                    DataFusionError::Execution(e.to_string())
-                                })?;
-                            }
-                            Partitioning::Hash(exprs, _) => {
-                                let input_batch = result?;
-                                let arrays = exprs
-                                    .iter()
-                                    .map(|expr| {
-                                        Ok(expr
-                                            .evaluate(&input_batch)?
-                                            .into_array(input_batch.num_rows()))
-                                    })
-                                    .collect::<Result<Vec<_>>>()?;
-                                hashes_buf.clear();
-                                hashes_buf.resize(arrays[0].len(), 0);
-                                // Hash arrays and compute buckets based on number of partitions
-                                let hashes =
-                                    create_hashes(&arrays, &random_state, hashes_buf)?;
-                                let mut indices = vec![vec![]; num_output_partitions];
-                                for (index, hash) in hashes.iter().enumerate() {
-                                    indices
-                                        [(*hash % num_output_partitions as u64) as usize]
-                                        .push(index as u64)
-                                }
-                                for (num_output_partition, partition_indices) in
-                                    indices.into_iter().enumerate()
-                                {
-                                    let indices = partition_indices.into();
-                                    // Produce batches based on indices
-                                    let columns = input_batch
-                                        .columns()
-                                        .iter()
-                                        .map(|c| {
-                                            take(c.as_ref(), &indices, None).map_err(
-                                                |e| {
-                                                    DataFusionError::Execution(
-                                                        e.to_string(),
-                                                    )
-                                                },
-                                            )
-                                        })
-                                        .collect::<Result<Vec<Arc<dyn Array>>>>()?;
-                                    let output_batch = RecordBatch::try_new(
-                                        input_batch.schema(),
-                                        columns,
-                                    );
-                                    let tx = txs.get_mut(&num_output_partition).unwrap();
-                                    tx.send(Some(output_batch)).map_err(|e| {
-                                        DataFusionError::Execution(e.to_string())
-                                    })?;
-                                }
-                            }
-                            other => {
-                                // this should be unreachable as long as the validation logic
-                                // in the constructor is kept up-to-date
-                                return Err(DataFusionError::NotImplemented(format!(
-                                    "Unsupported repartitioning scheme {:?}",
-                                    other
-                                )));
-                            }
-                        }
-                        counter += 1;
-                    }
-
-                    // notify each output partition that this input partition has no more data
-                    for (_, tx) in txs {
-                        tx.send(None)
-                            .map_err(|e| DataFusionError::Execution(e.to_string()))?;
-                    }
-                    Ok(())
-                });
-            }
-        }
-
-        // now return stream for the specified *output* partition which will
-        // read from the channel
-        Ok(Box::pin(RepartitionStream {
-            num_input_partitions,
-            num_input_partitions_processed: 0,
-            schema: self.input.schema(),
-            input: UnboundedReceiverStream::new(channels.remove(&partition).unwrap().1),
-        }))
-    }
-}
-
-impl RepartitionExec {
-    /// Create a new RepartitionExec
-    pub fn try_new(
-        input: Arc<dyn ExecutionPlan>,
-        partitioning: Partitioning,
-    ) -> Result<Self> {
-        Ok(RepartitionExec {
-            input,
-            partitioning,
-            channels: Arc::new(Mutex::new(HashMap::new())),
-        })
-    }
-}
-
-struct RepartitionStream {
-    /// Number of input partitions that will be sending batches to this output channel
-    num_input_partitions: usize,
-    /// Number of input partitions that have finished sending batches to this output channel
-    num_input_partitions_processed: usize,
-    /// Schema
-    schema: SchemaRef,
-    /// channel containing the repartitioned batches
-    input: UnboundedReceiverStream<Option<ArrowResult<RecordBatch>>>,
-}
-
-impl Stream for RepartitionStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        match self.input.poll_next_unpin(cx) {
-            Poll::Ready(Some(Some(v))) => Poll::Ready(Some(v)),
-            Poll::Ready(Some(None)) => {
-                self.num_input_partitions_processed += 1;
-                if self.num_input_partitions == self.num_input_partitions_processed {
-                    // all input partitions have finished sending batches
-                    Poll::Ready(None)
-                } else {
-                    // other partitions still have data to send
-                    self.poll_next(cx)
-                }
-            }
-            Poll::Ready(None) => Poll::Ready(None),
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for RepartitionStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::memory::MemoryExec;
-    use arrow::array::UInt32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-
-    #[tokio::test]
-    async fn one_to_many_round_robin() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition];
-
-        // repartition from 1 input to 4 output
-        let output_partitions =
-            repartition(&schema, partitions, Partitioning::RoundRobinBatch(4)).await?;
-
-        assert_eq!(4, output_partitions.len());
-        assert_eq!(13, output_partitions[0].len());
-        assert_eq!(13, output_partitions[1].len());
-        assert_eq!(12, output_partitions[2].len());
-        assert_eq!(12, output_partitions[3].len());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn many_to_one_round_robin() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
-
-        // repartition from 3 input to 1 output
-        let output_partitions =
-            repartition(&schema, partitions, Partitioning::RoundRobinBatch(1)).await?;
-
-        assert_eq!(1, output_partitions.len());
-        assert_eq!(150, output_partitions[0].len());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn many_to_many_round_robin() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
-
-        // repartition from 3 input to 5 output
-        let output_partitions =
-            repartition(&schema, partitions, Partitioning::RoundRobinBatch(5)).await?;
-
-        assert_eq!(5, output_partitions.len());
-        assert_eq!(30, output_partitions[0].len());
-        assert_eq!(30, output_partitions[1].len());
-        assert_eq!(30, output_partitions[2].len());
-        assert_eq!(30, output_partitions[3].len());
-        assert_eq!(30, output_partitions[4].len());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn many_to_many_hash_partition() -> Result<()> {
-        // define input partitions
-        let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
-        let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
-
-        let output_partitions = repartition(
-            &schema,
-            partitions,
-            Partitioning::Hash(
-                vec![Arc::new(crate::physical_plan::expressions::Column::new(
-                    &"c0",
-                ))],
-                8,
-            ),
-        )
-        .await?;
-
-        let total_rows: usize = output_partitions.iter().map(|x| x.len()).sum();
-
-        assert_eq!(8, output_partitions.len());
-        assert_eq!(total_rows, 8 * 50 * 3);
-
-        Ok(())
-    }
-
-    fn test_schema() -> Arc<Schema> {
-        Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]))
-    }
-
-    fn create_vec_batches(schema: &Arc<Schema>, n: usize) -> Vec<RecordBatch> {
-        let batch = create_batch(schema);
-        let mut vec = Vec::with_capacity(n);
-        for _ in 0..n {
-            vec.push(batch.clone());
-        }
-        vec
-    }
-
-    fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
-        RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
-        )
-        .unwrap()
-    }
-
-    async fn repartition(
-        schema: &SchemaRef,
-        input_partitions: Vec<Vec<RecordBatch>>,
-        partitioning: Partitioning,
-    ) -> Result<Vec<Vec<RecordBatch>>> {
-        // create physical plan
-        let exec = MemoryExec::try_new(&input_partitions, schema.clone(), None)?;
-        let exec = RepartitionExec::try_new(Arc::new(exec), partitioning)?;
-
-        // execute and collect results
-        let mut output_partitions = vec![];
-        for i in 0..exec.partitioning.partition_count() {
-            // execute this *output* partition and collect all batches
-            let mut stream = exec.execute(i).await?;
-            let mut batches = vec![];
-            while let Some(result) = stream.next().await {
-                batches.push(result?);
-            }
-            output_partitions.push(batches);
-        }
-        Ok(output_partitions)
-    }
-
-    #[tokio::test]
-    async fn many_to_many_round_robin_within_tokio_task() -> Result<()> {
-        let join_handle: JoinHandle<Result<Vec<Vec<RecordBatch>>>> =
-            tokio::spawn(async move {
-                // define input partitions
-                let schema = test_schema();
-                let partition = create_vec_batches(&schema, 50);
-                let partitions =
-                    vec![partition.clone(), partition.clone(), partition.clone()];
-
-                // repartition from 3 input to 5 output
-                repartition(&schema, partitions, Partitioning::RoundRobinBatch(5)).await
-            });
-
-        let output_partitions = join_handle
-            .await
-            .map_err(|e| DataFusionError::Internal(e.to_string()))??;
-
-        assert_eq!(5, output_partitions.len());
-        assert_eq!(30, output_partitions[0].len());
-        assert_eq!(30, output_partitions[1].len());
-        assert_eq!(30, output_partitions[2].len());
-        assert_eq!(30, output_partitions[3].len());
-        assert_eq!(30, output_partitions[4].len());
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/sort.rs b/rust/datafusion/src/physical_plan/sort.rs
deleted file mode 100644
index 26855b354db..00000000000
--- a/rust/datafusion/src/physical_plan/sort.rs
+++ /dev/null
@@ -1,478 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines the SORT plan
-
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::{Arc, Mutex};
-use std::task::{Context, Poll};
-use std::time::Instant;
-
-use async_trait::async_trait;
-use futures::stream::Stream;
-use futures::Future;
-use hashbrown::HashMap;
-
-use pin_project_lite::pin_project;
-
-pub use arrow::compute::SortOptions;
-use arrow::compute::{concat, lexsort_to_indices, take, SortColumn, TakeOptions};
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use arrow::{array::ArrayRef, error::ArrowError};
-
-use super::{RecordBatchStream, SendableRecordBatchStream};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::{
-    common, Distribution, ExecutionPlan, Partitioning, SQLMetric,
-};
-
-/// Sort execution plan
-#[derive(Debug)]
-pub struct SortExec {
-    /// Input schema
-    input: Arc<dyn ExecutionPlan>,
-    /// Sort expressions
-    expr: Vec<PhysicalSortExpr>,
-    /// Output rows
-    output_rows: Arc<Mutex<SQLMetric>>,
-    /// Time to sort batches
-    sort_time_nanos: Arc<Mutex<SQLMetric>>,
-}
-
-impl SortExec {
-    /// Create a new sort execution plan
-    pub fn try_new(
-        expr: Vec<PhysicalSortExpr>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Result<Self> {
-        Ok(Self {
-            expr,
-            input,
-            output_rows: SQLMetric::counter("outputRows"),
-            sort_time_nanos: SQLMetric::time_nanos("sortTime"),
-        })
-    }
-
-    /// Input schema
-    pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
-        &self.input
-    }
-
-    /// Sort expressions
-    pub fn expr(&self) -> &[PhysicalSortExpr] {
-        &self.expr
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for SortExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    /// Get the output partitioning of this plan
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(SortExec::try_new(
-                self.expr.clone(),
-                children[0].clone(),
-            )?)),
-            _ => Err(DataFusionError::Internal(
-                "SortExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "SortExec invalid partition {}",
-                partition
-            )));
-        }
-
-        // sort needs to operate on a single partition currently
-        if 1 != self.input.output_partitioning().partition_count() {
-            return Err(DataFusionError::Internal(
-                "SortExec requires a single input partition".to_owned(),
-            ));
-        }
-        let input = self.input.execute(0).await?;
-
-        Ok(Box::pin(SortStream::new(
-            input,
-            self.expr.clone(),
-            self.output_rows.clone(),
-            self.sort_time_nanos.clone(),
-        )))
-    }
-
-    fn metrics(&self) -> HashMap<String, SQLMetric> {
-        let mut metrics = HashMap::new();
-        metrics.insert(
-            "outputRows".to_owned(),
-            self.output_rows.lock().unwrap().clone(),
-        );
-        metrics.insert(
-            "sortTime".to_owned(),
-            self.sort_time_nanos.lock().unwrap().clone(),
-        );
-        metrics
-    }
-}
-
-fn sort_batches(
-    batches: &[RecordBatch],
-    schema: &SchemaRef,
-    expr: &[PhysicalSortExpr],
-) -> ArrowResult<Option<RecordBatch>> {
-    if batches.is_empty() {
-        return Ok(None);
-    }
-    // combine all record batches into one for each column
-    let combined_batch = RecordBatch::try_new(
-        schema.clone(),
-        schema
-            .fields()
-            .iter()
-            .enumerate()
-            .map(|(i, _)| {
-                concat(
-                    &batches
-                        .iter()
-                        .map(|batch| batch.column(i).as_ref())
-                        .collect::<Vec<_>>(),
-                )
-            })
-            .collect::<ArrowResult<Vec<ArrayRef>>>()?,
-    )?;
-
-    // sort combined record batch
-    // TODO: pushup the limit expression to sort
-    let indices = lexsort_to_indices(
-        &expr
-            .iter()
-            .map(|e| e.evaluate_to_sort_column(&combined_batch))
-            .collect::<Result<Vec<SortColumn>>>()
-            .map_err(DataFusionError::into_arrow_external_error)?,
-        None,
-    )?;
-
-    // reorder all rows based on sorted indices
-    let sorted_batch = RecordBatch::try_new(
-        schema.clone(),
-        combined_batch
-            .columns()
-            .iter()
-            .map(|column| {
-                take(
-                    column.as_ref(),
-                    &indices,
-                    // disable bound check overhead since indices are already generated from
-                    // the same record batch
-                    Some(TakeOptions {
-                        check_bounds: false,
-                    }),
-                )
-            })
-            .collect::<ArrowResult<Vec<ArrayRef>>>()?,
-    );
-    sorted_batch.map(Some)
-}
-
-pin_project! {
-    struct SortStream {
-        #[pin]
-        output: futures::channel::oneshot::Receiver<ArrowResult<Option<RecordBatch>>>,
-        finished: bool,
-        schema: SchemaRef,
-        output_rows: Arc<Mutex<SQLMetric>>,
-    }
-}
-
-impl SortStream {
-    fn new(
-        input: SendableRecordBatchStream,
-        expr: Vec<PhysicalSortExpr>,
-        output_rows: Arc<Mutex<SQLMetric>>,
-        sort_time: Arc<Mutex<SQLMetric>>,
-    ) -> Self {
-        let (tx, rx) = futures::channel::oneshot::channel();
-
-        let schema = input.schema();
-        tokio::spawn(async move {
-            let schema = input.schema();
-            let sorted_batch = common::collect(input)
-                .await
-                .map_err(DataFusionError::into_arrow_external_error)
-                .and_then(move |batches| {
-                    let now = Instant::now();
-                    let result = sort_batches(&batches, &schema, &expr);
-                    let mut sort_time = sort_time.lock().unwrap();
-                    sort_time.add(now.elapsed().as_nanos() as usize);
-                    result
-                });
-
-            tx.send(sorted_batch)
-        });
-
-        Self {
-            output: rx,
-            finished: false,
-            schema,
-            output_rows,
-        }
-    }
-}
-
-impl Stream for SortStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        let output_rows = self.output_rows.clone();
-
-        if self.finished {
-            return Poll::Ready(None);
-        }
-
-        // is the output ready?
-        let this = self.project();
-        let output_poll = this.output.poll(cx);
-
-        match output_poll {
-            Poll::Ready(result) => {
-                *this.finished = true;
-
-                // check for error in receiving channel and unwrap actual result
-                let result = match result {
-                    Err(e) => Some(Err(ArrowError::ExternalError(Box::new(e)))), // error receiving
-                    Ok(result) => result.transpose(),
-                };
-
-                if let Some(Ok(batch)) = &result {
-                    let mut output_rows = output_rows.lock().unwrap();
-                    output_rows.add(batch.num_rows());
-                }
-
-                Poll::Ready(result)
-            }
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
-
-impl RecordBatchStream for SortStream {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::merge::MergeExec;
-    use crate::physical_plan::{
-        collect,
-        csv::{CsvExec, CsvReadOptions},
-    };
-    use crate::test;
-    use arrow::array::*;
-    use arrow::datatypes::*;
-
-    #[tokio::test]
-    async fn test_sort() -> Result<()> {
-        let schema = test::aggr_test_schema();
-        let partitions = 4;
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", partitions)?;
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let sort_exec = Arc::new(SortExec::try_new(
-            vec![
-                // c1 string column
-                PhysicalSortExpr {
-                    expr: col("c1"),
-                    options: SortOptions::default(),
-                },
-                // c2 uin32 column
-                PhysicalSortExpr {
-                    expr: col("c2"),
-                    options: SortOptions::default(),
-                },
-                // c7 uin8 column
-                PhysicalSortExpr {
-                    expr: col("c7"),
-                    options: SortOptions::default(),
-                },
-            ],
-            Arc::new(MergeExec::new(Arc::new(csv))),
-        )?);
-
-        let result: Vec<RecordBatch> = collect(sort_exec).await?;
-        assert_eq!(result.len(), 1);
-
-        let columns = result[0].columns();
-
-        let c1 = as_string_array(&columns[0]);
-        assert_eq!(c1.value(0), "a");
-        assert_eq!(c1.value(c1.len() - 1), "e");
-
-        let c2 = as_primitive_array::<UInt32Type>(&columns[1]);
-        assert_eq!(c2.value(0), 1);
-        assert_eq!(c2.value(c2.len() - 1), 5,);
-
-        let c7 = as_primitive_array::<UInt8Type>(&columns[6]);
-        assert_eq!(c7.value(0), 15);
-        assert_eq!(c7.value(c7.len() - 1), 254,);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_lex_sort_by_float() -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Float32, true),
-            Field::new("b", DataType::Float64, true),
-        ]));
-
-        // define data.
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![
-                Arc::new(Float32Array::from(vec![
-                    Some(f32::NAN),
-                    None,
-                    None,
-                    Some(f32::NAN),
-                    Some(1.0_f32),
-                    Some(1.0_f32),
-                    Some(2.0_f32),
-                    Some(3.0_f32),
-                ])),
-                Arc::new(Float64Array::from(vec![
-                    Some(200.0_f64),
-                    Some(20.0_f64),
-                    Some(10.0_f64),
-                    Some(100.0_f64),
-                    Some(f64::NAN),
-                    None,
-                    None,
-                    Some(f64::NAN),
-                ])),
-            ],
-        )?;
-
-        let sort_exec = Arc::new(SortExec::try_new(
-            vec![
-                PhysicalSortExpr {
-                    expr: col("a"),
-                    options: SortOptions {
-                        descending: true,
-                        nulls_first: true,
-                    },
-                },
-                PhysicalSortExpr {
-                    expr: col("b"),
-                    options: SortOptions {
-                        descending: false,
-                        nulls_first: false,
-                    },
-                },
-            ],
-            Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?),
-        )?);
-
-        assert_eq!(DataType::Float32, *sort_exec.schema().field(0).data_type());
-        assert_eq!(DataType::Float64, *sort_exec.schema().field(1).data_type());
-
-        let result: Vec<RecordBatch> = collect(sort_exec.clone()).await?;
-        assert!(sort_exec.metrics().get("sortTime").unwrap().value > 0);
-        assert_eq!(sort_exec.metrics().get("outputRows").unwrap().value, 8);
-        assert_eq!(result.len(), 1);
-
-        let columns = result[0].columns();
-
-        assert_eq!(DataType::Float32, *columns[0].data_type());
-        assert_eq!(DataType::Float64, *columns[1].data_type());
-
-        let a = as_primitive_array::<Float32Type>(&columns[0]);
-        let b = as_primitive_array::<Float64Type>(&columns[1]);
-
-        // convert result to strings to allow comparing to expected result containing NaN
-        let result: Vec<(Option<String>, Option<String>)> = (0..result[0].num_rows())
-            .map(|i| {
-                let aval = if a.is_valid(i) {
-                    Some(a.value(i).to_string())
-                } else {
-                    None
-                };
-                let bval = if b.is_valid(i) {
-                    Some(b.value(i).to_string())
-                } else {
-                    None
-                };
-                (aval, bval)
-            })
-            .collect();
-
-        let expected: Vec<(Option<String>, Option<String>)> = vec![
-            (None, Some("10".to_owned())),
-            (None, Some("20".to_owned())),
-            (Some("NaN".to_owned()), Some("100".to_owned())),
-            (Some("NaN".to_owned()), Some("200".to_owned())),
-            (Some("3".to_owned()), Some("NaN".to_owned())),
-            (Some("2".to_owned()), None),
-            (Some("1".to_owned()), Some("NaN".to_owned())),
-            (Some("1".to_owned()), None),
-        ];
-
-        assert_eq!(expected, result);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/string_expressions.rs b/rust/datafusion/src/physical_plan/string_expressions.rs
deleted file mode 100644
index 882fe30502f..00000000000
--- a/rust/datafusion/src/physical_plan/string_expressions.rs
+++ /dev/null
@@ -1,595 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! String expressions
-
-use std::any::type_name;
-use std::sync::Arc;
-
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
-use arrow::{
-    array::{
-        Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, Int64Array,
-        PrimitiveArray, StringArray, StringOffsetSizeTrait,
-    },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
-};
-
-use super::ColumnarValue;
-
-macro_rules! downcast_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<GenericStringArray<T>>()
-                ))
-            })?
-    }};
-}
-
-macro_rules! downcast_primitive_array_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<PrimitiveArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<PrimitiveArray<T>>()
-                ))
-            })?
-    }};
-}
-
-macro_rules! downcast_arg {
-    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
-        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
-            DataFusionError::Internal(format!(
-                "could not cast {} to {}",
-                $NAME,
-                type_name::<$ARRAY_TYPE>()
-            ))
-        })?
-    }};
-}
-
-macro_rules! downcast_vec {
-    ($ARGS:expr, $ARRAY_TYPE:ident) => {{
-        $ARGS
-            .iter()
-            .map(|e| match e.as_any().downcast_ref::<$ARRAY_TYPE>() {
-                Some(array) => Ok(array),
-                _ => Err(DataFusionError::Internal("failed to downcast".to_string())),
-            })
-    }};
-}
-
-/// applies a unary expression to `args[0]` that is expected to be downcastable to
-/// a `GenericStringArray` and returns a `GenericStringArray` (which may have a different offset)
-/// # Errors
-/// This function errors when:
-/// * the number of arguments is not 1
-/// * the first argument is not castable to a `GenericStringArray`
-pub(crate) fn unary_string_function<'a, T, O, F, R>(
-    args: &[&'a dyn Array],
-    op: F,
-    name: &str,
-) -> Result<GenericStringArray<O>>
-where
-    R: AsRef<str>,
-    O: StringOffsetSizeTrait,
-    T: StringOffsetSizeTrait,
-    F: Fn(&'a str) -> R,
-{
-    if args.len() != 1 {
-        return Err(DataFusionError::Internal(format!(
-            "{:?} args were supplied but {} takes exactly one argument",
-            args.len(),
-            name,
-        )));
-    }
-
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    // first map is the iterator, second is for the `Option<_>`
-    Ok(string_array
-        .iter()
-        .map(|string| string.map(|string| op(string)))
-        .collect())
-}
-
-fn handle<'a, F, R>(args: &'a [ColumnarValue], op: F, name: &str) -> Result<ColumnarValue>
-where
-    R: AsRef<str>,
-    F: Fn(&'a str) -> R,
-{
-    match &args[0] {
-        ColumnarValue::Array(a) => match a.data_type() {
-            DataType::Utf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
-                    i32,
-                    i32,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            DataType::LargeUtf8 => {
-                Ok(ColumnarValue::Array(Arc::new(unary_string_function::<
-                    i64,
-                    i64,
-                    _,
-                    _,
-                >(
-                    &[a.as_ref()], op, name
-                )?)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-        ColumnarValue::Scalar(scalar) => match scalar {
-            ScalarValue::Utf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
-                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
-            }
-            ScalarValue::LargeUtf8(a) => {
-                let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
-                Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
-            }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for function {}",
-                other, name,
-            ))),
-        },
-    }
-}
-
-/// Returns the numeric code of the first character of the argument.
-/// ascii('x') = 120
-pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut chars = string.chars();
-                chars.next().map_or(0, |v| v as i32)
-            })
-        })
-        .collect::<Int32Array>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Removes the longest string containing only characters in characters (a space by default) from the start and end of string.
-/// btrim('xyxtrimyyx', 'xyz') = 'trim'
-pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        1 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-
-            let result = string_array
-                .iter()
-                .map(|string| {
-                    string.map(|string: &str| {
-                        string.trim_start_matches(' ').trim_end_matches(' ')
-                    })
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let characters_array = downcast_string_arg!(args[1], "characters", T);
-
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .map(|(string, characters)| match (string, characters) {
-                    (None, _) => None,
-                    (_, None) => None,
-                    (Some(string), Some(characters)) => {
-                        let chars: Vec<char> = characters.chars().collect();
-                        Some(
-                            string
-                                .trim_start_matches(&chars[..])
-                                .trim_end_matches(&chars[..]),
-                        )
-                    }
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "btrim was called with {} arguments. It requires at least 1 and at most 2.",
-            other
-        ))),
-    }
-}
-
-/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
-/// chr(65) = 'A'
-pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let integer_array = downcast_arg!(args[0], "integer", Int64Array);
-
-    // first map is the iterator, second is for the `Option<_>`
-    let result = integer_array
-        .iter()
-        .map(|integer: Option<i64>| {
-            integer
-                .map(|integer| {
-                    if integer == 0 {
-                        Err(DataFusionError::Execution(
-                            "null character not permitted.".to_string(),
-                        ))
-                    } else {
-                        match core::char::from_u32(integer as u32) {
-                            Some(integer) => Ok(integer.to_string()),
-                            None => Err(DataFusionError::Execution(
-                                "requested character too large for encoding.".to_string(),
-                            )),
-                        }
-                    }
-                })
-                .transpose()
-        })
-        .collect::<Result<StringArray>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
-/// concat('abcde', 2, NULL, 22) = 'abcde222'
-pub fn concat(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    // do not accept 0 arguments.
-    if args.is_empty() {
-        return Err(DataFusionError::Internal(format!(
-            "concat was called with {} arguments. It requires at least 1.",
-            args.len()
-        )));
-    }
-
-    // first, decide whether to return a scalar or a vector.
-    let mut return_array = args.iter().filter_map(|x| match x {
-        ColumnarValue::Array(array) => Some(array.len()),
-        _ => None,
-    });
-    if let Some(size) = return_array.next() {
-        let result = (0..size)
-            .map(|index| {
-                let mut owned_string: String = "".to_owned();
-                for arg in args {
-                    match arg {
-                        ColumnarValue::Scalar(ScalarValue::Utf8(maybe_value)) => {
-                            if let Some(value) = maybe_value {
-                                owned_string.push_str(value);
-                            }
-                        }
-                        ColumnarValue::Array(v) => {
-                            if v.is_valid(index) {
-                                let v = v.as_any().downcast_ref::<StringArray>().unwrap();
-                                owned_string.push_str(&v.value(index));
-                            }
-                        }
-                        _ => unreachable!(),
-                    }
-                }
-                Some(owned_string)
-            })
-            .collect::<StringArray>();
-
-        Ok(ColumnarValue::Array(Arc::new(result)))
-    } else {
-        // short avenue with only scalars
-        let initial = Some("".to_string());
-        let result = args.iter().fold(initial, |mut acc, rhs| {
-            if let Some(ref mut inner) = acc {
-                match rhs {
-                    ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) => {
-                        inner.push_str(v);
-                    }
-                    ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {}
-                    _ => unreachable!(""),
-                };
-            };
-            acc
-        });
-        Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
-    }
-}
-
-/// Concatenates all but the first argument, with separators. The first argument is used as the separator string, and should not be NULL. Other NULL arguments are ignored.
-/// concat_ws(',', 'abcde', 2, NULL, 22) = 'abcde,2,22'
-pub fn concat_ws(args: &[ArrayRef]) -> Result<ArrayRef> {
-    // downcast all arguments to strings
-    let args = downcast_vec!(args, StringArray).collect::<Result<Vec<&StringArray>>>()?;
-
-    // do not accept 0 or 1 arguments.
-    if args.len() < 2 {
-        return Err(DataFusionError::Internal(format!(
-            "concat_ws was called with {} arguments. It requires at least 2.",
-            args.len()
-        )));
-    }
-
-    // first map is the iterator, second is for the `Option<_>`
-    let result = args[0]
-        .iter()
-        .enumerate()
-        .map(|(index, x)| {
-            x.map(|sep: &str| {
-                let mut owned_string: String = "".to_owned();
-                for arg_index in 1..args.len() {
-                    let arg = &args[arg_index];
-                    if !arg.is_null(index) {
-                        owned_string.push_str(&arg.value(index));
-                        // if not last push separator
-                        if arg_index != args.len() - 1 {
-                            owned_string.push_str(&sep);
-                        }
-                    }
-                }
-                owned_string
-            })
-        })
-        .collect::<StringArray>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
-/// initcap('hi THOMAS') = 'Hi Thomas'
-pub fn initcap<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    // first map is the iterator, second is for the `Option<_>`
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                let mut char_vector = Vec::<char>::new();
-                let mut previous_character_letter_or_number = false;
-                for c in string.chars() {
-                    if previous_character_letter_or_number {
-                        char_vector.push(c.to_ascii_lowercase());
-                    } else {
-                        char_vector.push(c.to_ascii_uppercase());
-                    }
-                    previous_character_letter_or_number = ('A'..='Z').contains(&c)
-                        || ('a'..='z').contains(&c)
-                        || ('0'..='9').contains(&c);
-                }
-                char_vector.iter().collect::<String>()
-            })
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the string to all lower case.
-/// lower('TOM') = 'tom'
-pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, |string| string.to_ascii_lowercase(), "lower")
-}
-
-/// Removes the longest string containing only characters in characters (a space by default) from the start of string.
-/// ltrim('zzzytest', 'xyz') = 'test'
-pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        1 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-
-            let result = string_array
-                .iter()
-                .map(|string| string.map(|string: &str| string.trim_start_matches(' ')))
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let characters_array = downcast_string_arg!(args[1], "characters", T);
-
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .map(|(string, characters)| match (string, characters) {
-                    (Some(string), Some(characters)) => {
-                        let chars: Vec<char> = characters.chars().collect();
-                        Some(string.trim_start_matches(&chars[..]))
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "ltrim was called with {} arguments. It requires at least 1 and at most 2.",
-            other
-        ))),
-    }
-}
-
-/// Repeats string the specified number of times.
-/// repeat('Pg', 4) = 'PgPgPgPg'
-pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let number_array = downcast_arg!(args[1], "number", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(number_array.iter())
-        .map(|(string, number)| match (string, number) {
-            (Some(string), Some(number)) => Some(string.repeat(number as usize)),
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Replaces all occurrences in string of substring from with substring to.
-/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
-pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let from_array = downcast_string_arg!(args[1], "from", T);
-    let to_array = downcast_string_arg!(args[2], "to", T);
-
-    let result = string_array
-        .iter()
-        .zip(from_array.iter())
-        .zip(to_array.iter())
-        .map(|((string, from), to)| match (string, from, to) {
-            (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)),
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Removes the longest string containing only characters in characters (a space by default) from the end of string.
-/// rtrim('testxxzx', 'xyz') = 'test'
-pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        1 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-
-            let result = string_array
-                .iter()
-                .map(|string| string.map(|string: &str| string.trim_end_matches(' ')))
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let characters_array = downcast_string_arg!(args[1], "characters", T);
-
-            let result = string_array
-                .iter()
-                .zip(characters_array.iter())
-                .map(|(string, characters)| match (string, characters) {
-                    (Some(string), Some(characters)) => {
-                        let chars: Vec<char> = characters.chars().collect();
-                        Some(string.trim_end_matches(&chars[..]))
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "rtrim was called with {} arguments. It requires at least 1 and at most 2.",
-            other
-        ))),
-    }
-}
-
-/// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
-/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let delimiter_array = downcast_string_arg!(args[1], "delimiter", T);
-    let n_array = downcast_arg!(args[2], "n", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(delimiter_array.iter())
-        .zip(n_array.iter())
-        .map(|((string, delimiter), n)| match (string, delimiter, n) {
-            (Some(string), Some(delimiter), Some(n)) => {
-                if n <= 0 {
-                    Err(DataFusionError::Execution(
-                        "field position must be greater than zero".to_string(),
-                    ))
-                } else {
-                    let split_string: Vec<&str> = string.split(delimiter).collect();
-                    match split_string.get(n as usize - 1) {
-                        Some(s) => Ok(Some(*s)),
-                        None => Ok(Some("")),
-                    }
-                }
-            }
-            _ => Ok(None),
-        })
-        .collect::<Result<GenericStringArray<T>>>()?;
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Returns true if string starts with prefix.
-/// starts_with('alphabet', 'alph') = 't'
-pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let prefix_array = downcast_string_arg!(args[1], "prefix", T);
-
-    let result = string_array
-        .iter()
-        .zip(prefix_array.iter())
-        .map(|(string, prefix)| match (string, prefix) {
-            (Some(string), Some(prefix)) => Some(string.starts_with(prefix)),
-            _ => None,
-        })
-        .collect::<BooleanArray>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the number to its equivalent hexadecimal representation.
-/// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let integer_array = downcast_primitive_array_arg!(args[0], "integer", T);
-
-    let result = integer_array
-        .iter()
-        .map(|integer| {
-            integer.map(|integer| format!("{:x}", integer.to_usize().unwrap()))
-        })
-        .collect::<GenericStringArray<i32>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Converts the string to all upper case.
-/// upper('tom') = 'TOM'
-pub fn upper(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle(args, |string| string.to_ascii_uppercase(), "upper")
-}
diff --git a/rust/datafusion/src/physical_plan/type_coercion.rs b/rust/datafusion/src/physical_plan/type_coercion.rs
deleted file mode 100644
index 24b51ba6069..00000000000
--- a/rust/datafusion/src/physical_plan/type_coercion.rs
+++ /dev/null
@@ -1,361 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Type coercion rules for functions with multiple valid signatures
-//!
-//! Coercion is performed automatically by DataFusion when the types
-//! of arguments passed to a function do not exacty match the types
-//! required by that function. In this case, DataFusion will attempt to
-//! *coerce* the arguments to types accepted by the function by
-//! inserting CAST operations.
-//!
-//! CAST operations added by coercion are lossless and never discard
-//! information. For example coercion from i32 -> i64 might be
-//! performed because all valid i32 values can be represented using an
-//! i64. However, i64 -> i32 is never performed as there are i64
-//! values which can not be represented by i32 values.
-
-use std::{sync::Arc, vec};
-
-use arrow::datatypes::{DataType, Schema, TimeUnit};
-
-use super::{functions::Signature, PhysicalExpr};
-use crate::error::{DataFusionError, Result};
-use crate::physical_plan::expressions::try_cast;
-
-/// Returns `expressions` coerced to types compatible with
-/// `signature`, if possible.
-///
-/// See the module level documentation for more detail on coercion.
-pub fn coerce(
-    expressions: &[Arc<dyn PhysicalExpr>],
-    schema: &Schema,
-    signature: &Signature,
-) -> Result<Vec<Arc<dyn PhysicalExpr>>> {
-    let current_types = expressions
-        .iter()
-        .map(|e| e.data_type(schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    let new_types = data_types(&current_types, signature)?;
-
-    expressions
-        .iter()
-        .enumerate()
-        .map(|(i, expr)| try_cast(expr.clone(), &schema, new_types[i].clone()))
-        .collect::<Result<Vec<_>>>()
-}
-
-/// Returns the data types that each argument must be coerced to match
-/// `signature`.
-///
-/// See the module level documentation for more detail on coercion.
-pub fn data_types(
-    current_types: &[DataType],
-    signature: &Signature,
-) -> Result<Vec<DataType>> {
-    let valid_types = get_valid_types(signature, current_types)?;
-
-    if valid_types
-        .iter()
-        .any(|data_type| data_type == current_types)
-    {
-        return Ok(current_types.to_vec());
-    }
-
-    for valid_types in valid_types {
-        if let Some(types) = maybe_data_types(&valid_types, &current_types) {
-            return Ok(types);
-        }
-    }
-
-    // none possible -> Error
-    Err(DataFusionError::Plan(format!(
-        "Coercion from {:?} to the signature {:?} failed.",
-        current_types, signature
-    )))
-}
-
-fn get_valid_types(
-    signature: &Signature,
-    current_types: &[DataType],
-) -> Result<Vec<Vec<DataType>>> {
-    let valid_types = match signature {
-        Signature::Variadic(valid_types) => valid_types
-            .iter()
-            .map(|valid_type| current_types.iter().map(|_| valid_type.clone()).collect())
-            .collect(),
-        Signature::Uniform(number, valid_types) => valid_types
-            .iter()
-            .map(|valid_type| (0..*number).map(|_| valid_type.clone()).collect())
-            .collect(),
-        Signature::VariadicEqual => {
-            // one entry with the same len as current_types, whose type is `current_types[0]`.
-            vec![current_types
-                .iter()
-                .map(|_| current_types[0].clone())
-                .collect()]
-        }
-        Signature::Exact(valid_types) => vec![valid_types.clone()],
-        Signature::Any(number) => {
-            if current_types.len() != *number {
-                return Err(DataFusionError::Plan(format!(
-                    "The function expected {} arguments but received {}",
-                    number,
-                    current_types.len()
-                )));
-            }
-            vec![(0..*number).map(|i| current_types[i].clone()).collect()]
-        }
-        Signature::OneOf(types) => {
-            let mut r = vec![];
-            for s in types {
-                r.extend(get_valid_types(s, current_types)?);
-            }
-            r
-        }
-    };
-
-    Ok(valid_types)
-}
-
-/// Try to coerce current_types into valid_types.
-fn maybe_data_types(
-    valid_types: &[DataType],
-    current_types: &[DataType],
-) -> Option<Vec<DataType>> {
-    if valid_types.len() != current_types.len() {
-        return None;
-    }
-
-    let mut new_type = Vec::with_capacity(valid_types.len());
-    for (i, valid_type) in valid_types.iter().enumerate() {
-        let current_type = &current_types[i];
-
-        if current_type == valid_type {
-            new_type.push(current_type.clone())
-        } else {
-            // attempt to coerce
-            if can_coerce_from(valid_type, &current_type) {
-                new_type.push(valid_type.clone())
-            } else {
-                // not possible
-                return None;
-            }
-        }
-    }
-    Some(new_type)
-}
-
-/// Return true if a value of type `type_from` can be coerced
-/// (losslessly converted) into a value of `type_to`
-///
-/// See the module level documentation for more detail on coercion.
-pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
-    use self::DataType::*;
-    match type_into {
-        Int8 => matches!(type_from, Int8),
-        Int16 => matches!(type_from, Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Int8 | Int16 | Int32 | UInt8 | UInt16),
-        Int64 => matches!(
-            type_from,
-            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-        ),
-        UInt8 => matches!(type_from, UInt8),
-        UInt16 => matches!(type_from, UInt8 | UInt16),
-        UInt32 => matches!(type_from, UInt8 | UInt16 | UInt32),
-        UInt64 => matches!(type_from, UInt8 | UInt16 | UInt32 | UInt64),
-        Float32 => matches!(
-            type_from,
-            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 | Float32
-        ),
-        Float64 => matches!(
-            type_from,
-            Int8 | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-        ),
-        Timestamp(TimeUnit::Nanosecond, None) => matches!(type_from, Timestamp(_, None)),
-        Utf8 => true,
-        _ => false,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::expressions::col;
-    use arrow::datatypes::{DataType, Field, Schema};
-
-    #[test]
-    fn test_maybe_data_types() {
-        // this vec contains: arg1, arg2, expected result
-        let cases = vec![
-            // 2 entries, same values
-            (
-                vec![DataType::UInt8, DataType::UInt16],
-                vec![DataType::UInt8, DataType::UInt16],
-                Some(vec![DataType::UInt8, DataType::UInt16]),
-            ),
-            // 2 entries, can coerse values
-            (
-                vec![DataType::UInt16, DataType::UInt16],
-                vec![DataType::UInt8, DataType::UInt16],
-                Some(vec![DataType::UInt16, DataType::UInt16]),
-            ),
-            // 0 entries, all good
-            (vec![], vec![], Some(vec![])),
-            // 2 entries, can't coerce
-            (
-                vec![DataType::Boolean, DataType::UInt16],
-                vec![DataType::UInt8, DataType::UInt16],
-                None,
-            ),
-            // u32 -> u16 is possible
-            (
-                vec![DataType::Boolean, DataType::UInt32],
-                vec![DataType::Boolean, DataType::UInt16],
-                Some(vec![DataType::Boolean, DataType::UInt32]),
-            ),
-        ];
-
-        for case in cases {
-            assert_eq!(maybe_data_types(&case.0, &case.1), case.2)
-        }
-    }
-
-    #[test]
-    fn test_coerce() -> Result<()> {
-        // create a schema
-        let schema = |t: Vec<DataType>| {
-            Schema::new(
-                t.iter()
-                    .enumerate()
-                    .map(|(i, t)| Field::new(&*format!("c{}", i), t.clone(), true))
-                    .collect(),
-            )
-        };
-
-        // create a vector of expressions
-        let expressions = |t: Vec<DataType>, schema| -> Result<Vec<_>> {
-            t.iter()
-                .enumerate()
-                .map(|(i, t)| try_cast(col(&format!("c{}", i)), &schema, t.clone()))
-                .collect::<Result<Vec<_>>>()
-        };
-
-        // create a case: input + expected result
-        let case =
-            |observed: Vec<DataType>, valid, expected: Vec<DataType>| -> Result<_> {
-                let schema = schema(observed.clone());
-                let expr = expressions(observed, schema.clone())?;
-                let expected = expressions(expected, schema.clone())?;
-                Ok((expr.clone(), schema, valid, expected))
-            };
-
-        let cases = vec![
-            // u16 -> u32
-            case(
-                vec![DataType::UInt16],
-                Signature::Uniform(1, vec![DataType::UInt32]),
-                vec![DataType::UInt32],
-            )?,
-            // same type
-            case(
-                vec![DataType::UInt32, DataType::UInt32],
-                Signature::Uniform(2, vec![DataType::UInt32]),
-                vec![DataType::UInt32, DataType::UInt32],
-            )?,
-            case(
-                vec![DataType::UInt32],
-                Signature::Uniform(1, vec![DataType::Float32, DataType::Float64]),
-                vec![DataType::Float32],
-            )?,
-            // u32 -> f32
-            case(
-                vec![DataType::UInt32, DataType::UInt32],
-                Signature::Variadic(vec![DataType::Float32]),
-                vec![DataType::Float32, DataType::Float32],
-            )?,
-            // u32 -> f32
-            case(
-                vec![DataType::Float32, DataType::UInt32],
-                Signature::VariadicEqual,
-                vec![DataType::Float32, DataType::Float32],
-            )?,
-            // common type is u64
-            case(
-                vec![DataType::UInt32, DataType::UInt64],
-                Signature::Variadic(vec![DataType::UInt32, DataType::UInt64]),
-                vec![DataType::UInt64, DataType::UInt64],
-            )?,
-            // f32 -> f32
-            case(
-                vec![DataType::Float32],
-                Signature::Any(1),
-                vec![DataType::Float32],
-            )?,
-        ];
-
-        for case in cases {
-            let observed = format!("{:?}", coerce(&case.0, &case.1, &case.2)?);
-            let expected = format!("{:?}", case.3);
-            assert_eq!(observed, expected);
-        }
-
-        // now cases that are expected to fail
-        let cases = vec![
-            // we do not know how to cast bool to UInt16 => fail
-            case(
-                vec![DataType::Boolean],
-                Signature::Uniform(1, vec![DataType::UInt16]),
-                vec![],
-            )?,
-            // u32 and bool are not uniform
-            case(
-                vec![DataType::UInt32, DataType::Boolean],
-                Signature::VariadicEqual,
-                vec![],
-            )?,
-            // bool is not castable to u32
-            case(
-                vec![DataType::Boolean, DataType::Boolean],
-                Signature::Variadic(vec![DataType::UInt32]),
-                vec![],
-            )?,
-            // expected two arguments
-            case(vec![DataType::UInt32], Signature::Any(2), vec![])?,
-        ];
-
-        for case in cases {
-            if coerce(&case.0, &case.1, &case.2).is_ok() {
-                return Err(DataFusionError::Plan(format!(
-                    "Error was expected in {:?}",
-                    case
-                )));
-            }
-        }
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/udaf.rs b/rust/datafusion/src/physical_plan/udaf.rs
deleted file mode 100644
index 3dc6aa402f5..00000000000
--- a/rust/datafusion/src/physical_plan/udaf.rs
+++ /dev/null
@@ -1,168 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains functions and structs supporting user-defined aggregate functions.
-
-use fmt::{Debug, Formatter};
-use std::any::Any;
-use std::fmt;
-
-use arrow::{
-    datatypes::Field,
-    datatypes::{DataType, Schema},
-};
-
-use crate::physical_plan::PhysicalExpr;
-use crate::{error::Result, logical_plan::Expr};
-
-use super::{
-    aggregates::AccumulatorFunctionImplementation,
-    aggregates::StateTypeFunction,
-    expressions::format_state_name,
-    functions::{ReturnTypeFunction, Signature},
-    type_coercion::coerce,
-    Accumulator, AggregateExpr,
-};
-use std::sync::Arc;
-
-/// Logical representation of a user-defined aggregate function (UDAF)
-/// A UDAF is different from a UDF in that it is stateful across batches.
-#[derive(Clone)]
-pub struct AggregateUDF {
-    /// name
-    pub name: String,
-    /// signature
-    pub signature: Signature,
-    /// Return type
-    pub return_type: ReturnTypeFunction,
-    /// actual implementation
-    pub accumulator: AccumulatorFunctionImplementation,
-    /// the accumulator's state's description as a function of the return type
-    pub state_type: StateTypeFunction,
-}
-
-impl Debug for AggregateUDF {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        f.debug_struct("AggregateUDF")
-            .field("name", &self.name)
-            .field("signature", &self.signature)
-            .field("fun", &"<FUNC>")
-            .finish()
-    }
-}
-
-impl PartialEq for AggregateUDF {
-    fn eq(&self, other: &Self) -> bool {
-        self.name == other.name && self.signature == other.signature
-    }
-}
-
-impl AggregateUDF {
-    /// Create a new AggregateUDF
-    pub fn new(
-        name: &str,
-        signature: &Signature,
-        return_type: &ReturnTypeFunction,
-        accumulator: &AccumulatorFunctionImplementation,
-        state_type: &StateTypeFunction,
-    ) -> Self {
-        Self {
-            name: name.to_owned(),
-            signature: signature.clone(),
-            return_type: return_type.clone(),
-            accumulator: accumulator.clone(),
-            state_type: state_type.clone(),
-        }
-    }
-
-    /// creates a logical expression with a call of the UDAF
-    /// This utility allows using the UDAF without requiring access to the registry.
-    pub fn call(&self, args: Vec<Expr>) -> Expr {
-        Expr::AggregateUDF {
-            fun: Arc::new(self.clone()),
-            args,
-        }
-    }
-}
-
-/// Creates a physical expression of the UDAF, that includes all necessary type coercion.
-/// This function errors when `args`' can't be coerced to a valid argument type of the UDAF.
-pub fn create_aggregate_expr(
-    fun: &AggregateUDF,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-    name: String,
-) -> Result<Arc<dyn AggregateExpr>> {
-    // coerce
-    let args = coerce(args, input_schema, &fun.signature)?;
-
-    let arg_types = args
-        .iter()
-        .map(|arg| arg.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    Ok(Arc::new(AggregateFunctionExpr {
-        fun: fun.clone(),
-        args: args.clone(),
-        data_type: (fun.return_type)(&arg_types)?.as_ref().clone(),
-        name,
-    }))
-}
-
-/// Physical aggregate expression of a UDAF.
-#[derive(Debug)]
-pub struct AggregateFunctionExpr {
-    fun: AggregateUDF,
-    args: Vec<Arc<dyn PhysicalExpr>>,
-    data_type: DataType,
-    name: String,
-}
-
-impl AggregateExpr for AggregateFunctionExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.args.clone()
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        let fields = (self.fun.state_type)(&self.data_type)?
-            .iter()
-            .enumerate()
-            .map(|(i, data_type)| {
-                Field::new(
-                    &format_state_name(&self.name, &format!("{}", i)),
-                    data_type.clone(),
-                    true,
-                )
-            })
-            .collect::<Vec<Field>>();
-
-        Ok(fields)
-    }
-
-    fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
-    }
-
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        (self.fun.accumulator)()
-    }
-}
diff --git a/rust/datafusion/src/physical_plan/udf.rs b/rust/datafusion/src/physical_plan/udf.rs
deleted file mode 100644
index 9189da47bd6..00000000000
--- a/rust/datafusion/src/physical_plan/udf.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! UDF support
-
-use fmt::{Debug, Formatter};
-use std::fmt;
-
-use arrow::datatypes::Schema;
-
-use crate::error::Result;
-use crate::{logical_plan::Expr, physical_plan::PhysicalExpr};
-
-use super::{
-    functions::{
-        ReturnTypeFunction, ScalarFunctionExpr, ScalarFunctionImplementation, Signature,
-    },
-    type_coercion::coerce,
-};
-use std::sync::Arc;
-
-/// Logical representation of a UDF.
-#[derive(Clone)]
-pub struct ScalarUDF {
-    /// name
-    pub name: String,
-    /// signature
-    pub signature: Signature,
-    /// Return type
-    pub return_type: ReturnTypeFunction,
-    /// actual implementation
-    pub fun: ScalarFunctionImplementation,
-}
-
-impl Debug for ScalarUDF {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ScalarUDF")
-            .field("name", &self.name)
-            .field("signature", &self.signature)
-            .field("fun", &"<FUNC>")
-            .finish()
-    }
-}
-
-impl PartialEq for ScalarUDF {
-    fn eq(&self, other: &Self) -> bool {
-        self.name == other.name && self.signature == other.signature
-    }
-}
-
-impl ScalarUDF {
-    /// Create a new ScalarUDF
-    pub fn new(
-        name: &str,
-        signature: &Signature,
-        return_type: &ReturnTypeFunction,
-        fun: &ScalarFunctionImplementation,
-    ) -> Self {
-        Self {
-            name: name.to_owned(),
-            signature: signature.clone(),
-            return_type: return_type.clone(),
-            fun: fun.clone(),
-        }
-    }
-
-    /// creates a logical expression with a call of the UDF
-    /// This utility allows using the UDF without requiring access to the registry.
-    pub fn call(&self, args: Vec<Expr>) -> Expr {
-        Expr::ScalarUDF {
-            fun: Arc::new(self.clone()),
-            args,
-        }
-    }
-}
-
-/// Create a physical expression of the UDF.
-/// This function errors when `args`' can't be coerced to a valid argument type of the UDF.
-pub fn create_physical_expr(
-    fun: &ScalarUDF,
-    args: &[Arc<dyn PhysicalExpr>],
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    // coerce
-    let args = coerce(args, input_schema, &fun.signature)?;
-
-    let arg_types = args
-        .iter()
-        .map(|e| e.data_type(input_schema))
-        .collect::<Result<Vec<_>>>()?;
-
-    Ok(Arc::new(ScalarFunctionExpr::new(
-        &fun.name,
-        fun.fun.clone(),
-        args,
-        (fun.return_type)(&arg_types)?.as_ref(),
-    )))
-}
diff --git a/rust/datafusion/src/physical_plan/unicode_expressions.rs b/rust/datafusion/src/physical_plan/unicode_expressions.rs
deleted file mode 100644
index 787ea7ea267..00000000000
--- a/rust/datafusion/src/physical_plan/unicode_expressions.rs
+++ /dev/null
@@ -1,532 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! Unicode expressions
-
-use std::any::type_name;
-use std::cmp::Ordering;
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use arrow::{
-    array::{
-        ArrayRef, GenericStringArray, Int64Array, PrimitiveArray, StringOffsetSizeTrait,
-    },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType},
-};
-use hashbrown::HashMap;
-use unicode_segmentation::UnicodeSegmentation;
-
-macro_rules! downcast_string_arg {
-    ($ARG:expr, $NAME:expr, $T:ident) => {{
-        $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .ok_or_else(|| {
-                DataFusionError::Internal(format!(
-                    "could not cast {} to {}",
-                    $NAME,
-                    type_name::<GenericStringArray<T>>()
-                ))
-            })?
-    }};
-}
-
-macro_rules! downcast_arg {
-    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
-        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
-            DataFusionError::Internal(format!(
-                "could not cast {} to {}",
-                $NAME,
-                type_name::<$ARRAY_TYPE>()
-            ))
-        })?
-    }};
-}
-
-/// Returns number of characters in the string.
-/// character_length('josé') = 4
-pub fn character_length<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let string_array: &GenericStringArray<T::Native> = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("could not cast string to StringArray".to_string())
-        })?;
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                T::Native::from_usize(string.graphemes(true).count()).expect(
-                    "should not fail as graphemes.count will always return integer",
-                )
-            })
-        })
-        .collect::<PrimitiveArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
-/// left('abcde', 2) = 'ab'
-pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let n_array = downcast_arg!(args[1], "n", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(n_array.iter())
-        .map(|(string, n)| match (string, n) {
-            (Some(string), Some(n)) => match n.cmp(&0) {
-                Ordering::Less => {
-                    let graphemes = string.graphemes(true);
-                    let len = graphemes.clone().count() as i64;
-                    match n.abs().cmp(&len) {
-                        Ordering::Less => {
-                            Some(graphemes.take((len + n) as usize).collect::<String>())
-                        }
-                        Ordering::Equal => Some("".to_string()),
-                        Ordering::Greater => Some("".to_string()),
-                    }
-                }
-                Ordering::Equal => Some("".to_string()),
-                Ordering::Greater => {
-                    Some(string.graphemes(true).take(n as usize).collect::<String>())
-                }
-            },
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right).
-/// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        let length = length as usize;
-                        if length == 0 {
-                            Some("".to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Some(graphemes[..length].concat())
-                            } else {
-                                let mut s = string.to_string();
-                                s.insert_str(
-                                    0,
-                                    " ".repeat(length - graphemes.len()).as_str(),
-                                );
-                                Some(s)
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-            let fill_array = downcast_string_arg!(args[2], "fill", T);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        let length = length as usize;
-
-                        if length == 0 {
-                            Some("".to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let fill_chars = fill.chars().collect::<Vec<char>>();
-
-                            if length < graphemes.len() {
-                                Some(graphemes[..length].concat())
-                            } else if fill_chars.is_empty() {
-                                Some(string.to_string())
-                            } else {
-                                let mut s = string.to_string();
-                                let mut char_vector =
-                                    Vec::<char>::with_capacity(length - graphemes.len());
-                                for l in 0..length - graphemes.len() {
-                                    char_vector.push(
-                                        *fill_chars.get(l % fill_chars.len()).unwrap(),
-                                    );
-                                }
-                                s.insert_str(
-                                    0,
-                                    char_vector.iter().collect::<String>().as_str(),
-                                );
-                                Some(s)
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "lpad was called with {} arguments. It requires at least 2 and at most 3.",
-            other
-        ))),
-    }
-}
-
-/// Reverses the order of the characters in the string.
-/// reverse('abcde') = 'edcba'
-pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| string.graphemes(true).rev().collect::<String>())
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
-/// right('abcde', 2) = 'de'
-pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let n_array = downcast_arg!(args[1], "n", Int64Array);
-
-    let result = string_array
-        .iter()
-        .zip(n_array.iter())
-        .map(|(string, n)| match (string, n) {
-            (Some(string), Some(n)) => match n.cmp(&0) {
-                Ordering::Less => {
-                    let graphemes = string.graphemes(true).rev();
-                    let len = graphemes.clone().count() as i64;
-                    match n.abs().cmp(&len) {
-                        Ordering::Less => Some(
-                            graphemes
-                                .take((len + n) as usize)
-                                .collect::<Vec<&str>>()
-                                .iter()
-                                .rev()
-                                .copied()
-                                .collect::<String>(),
-                        ),
-                        Ordering::Equal => Some("".to_string()),
-                        Ordering::Greater => Some("".to_string()),
-                    }
-                }
-                Ordering::Equal => Some("".to_string()),
-                Ordering::Greater => Some(
-                    string
-                        .graphemes(true)
-                        .rev()
-                        .take(n as usize)
-                        .collect::<Vec<&str>>()
-                        .iter()
-                        .rev()
-                        .copied()
-                        .collect::<String>(),
-                ),
-            },
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
-/// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
-                        let length = length as usize;
-                        if length == 0 {
-                            Some("".to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            if length < graphemes.len() {
-                                Some(graphemes[..length].concat())
-                            } else {
-                                let mut s = string.to_string();
-                                s.push_str(" ".repeat(length - graphemes.len()).as_str());
-                                Some(s)
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let length_array = downcast_arg!(args[1], "length", Int64Array);
-            let fill_array = downcast_string_arg!(args[2], "fill", T);
-
-            let result = string_array
-                .iter()
-                .zip(length_array.iter())
-                .zip(fill_array.iter())
-                .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
-                        let length = length as usize;
-                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        let fill_chars = fill.chars().collect::<Vec<char>>();
-
-                        if length < graphemes.len() {
-                            Some(graphemes[..length].concat())
-                        } else if fill_chars.is_empty() {
-                            Some(string.to_string())
-                        } else {
-                            let mut s = string.to_string();
-                            let mut char_vector =
-                                Vec::<char>::with_capacity(length - graphemes.len());
-                            for l in 0..length - graphemes.len() {
-                                char_vector
-                                    .push(*fill_chars.get(l % fill_chars.len()).unwrap());
-                            }
-                            s.push_str(char_vector.iter().collect::<String>().as_str());
-                            Some(s)
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "rpad was called with {} arguments. It requires at least 2 and at most 3.",
-            other
-        ))),
-    }
-}
-
-/// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)
-/// strpos('high', 'ig') = 2
-pub fn strpos<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let string_array: &GenericStringArray<T::Native> = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("could not cast string to StringArray".to_string())
-        })?;
-
-    let substring_array: &GenericStringArray<T::Native> = args[1]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal(
-                "could not cast substring to StringArray".to_string(),
-            )
-        })?;
-
-    let result = string_array
-        .iter()
-        .zip(substring_array.iter())
-        .map(|(string, substring)| match (string, substring) {
-            (Some(string), Some(substring)) => {
-                // the rfind method returns the byte index of the substring which may or may not be the same as the character index due to UTF8 encoding
-                // this method first finds the matching byte using rfind
-                // then maps that to the character index by matching on the grapheme_index of the byte_index
-                Some(
-                    T::Native::from_usize(string.to_string().rfind(substring).map_or(
-                        0,
-                        |byte_offset| {
-                            string
-                                .grapheme_indices(true)
-                                .collect::<Vec<(usize, &str)>>()
-                                .iter()
-                                .enumerate()
-                                .filter(|(_, (offset, _))| *offset == byte_offset)
-                                .map(|(index, _)| index)
-                                .collect::<Vec<usize>>()
-                                .first()
-                                .expect("should not fail as grapheme_indices and byte offsets are tightly coupled")
-                                .to_owned()
-                                + 1
-                        },
-                    ))
-                    .expect("should not fail due to map_or default value")
-                )
-            }
-            _ => None,
-        })
-        .collect::<PrimitiveArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).)
-/// substr('alphabet', 3) = 'phabet'
-/// substr('alphabet', 3, 2) = 'ph'
-pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args.len() {
-        2 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let start_array = downcast_arg!(args[1], "start", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(start_array.iter())
-                .map(|(string, start)| match (string, start) {
-                    (Some(string), Some(start)) => {
-                        if start <= 0 {
-                            Some(string.to_string())
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let start_pos = start as usize - 1;
-                            if graphemes.len() < start_pos {
-                                Some("".to_string())
-                            } else {
-                                Some(graphemes[start_pos..].concat())
-                            }
-                        }
-                    }
-                    _ => None,
-                })
-                .collect::<GenericStringArray<T>>();
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        3 => {
-            let string_array = downcast_string_arg!(args[0], "string", T);
-            let start_array = downcast_arg!(args[1], "start", Int64Array);
-            let count_array = downcast_arg!(args[2], "count", Int64Array);
-
-            let result = string_array
-                .iter()
-                .zip(start_array.iter())
-                .zip(count_array.iter())
-                .map(|((string, start), count)| match (string, start, count) {
-                    (Some(string), Some(start), Some(count)) => {
-                        if count < 0 {
-                            Err(DataFusionError::Execution(
-                                "negative substring length not allowed".to_string(),
-                            ))
-                        } else if start <= 0 {
-                            Ok(Some(string.to_string()))
-                        } else {
-                            let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                            let start_pos = start as usize - 1;
-                            let count_usize = count as usize;
-                            if graphemes.len() < start_pos {
-                                Ok(Some("".to_string()))
-                            } else if graphemes.len() < start_pos + count_usize {
-                                Ok(Some(graphemes[start_pos..].concat()))
-                            } else {
-                                Ok(Some(
-                                    graphemes[start_pos..start_pos + count_usize]
-                                        .concat(),
-                                ))
-                            }
-                        }
-                    }
-                    _ => Ok(None),
-                })
-                .collect::<Result<GenericStringArray<T>>>()?;
-
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        other => Err(DataFusionError::Internal(format!(
-            "substr was called with {} arguments. It requires 2 or 3.",
-            other
-        ))),
-    }
-}
-
-/// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.
-/// translate('12345', '143', 'ax') = 'a2x5'
-pub fn translate<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let string_array = downcast_string_arg!(args[0], "string", T);
-    let from_array = downcast_string_arg!(args[1], "from", T);
-    let to_array = downcast_string_arg!(args[2], "to", T);
-
-    let result = string_array
-        .iter()
-        .zip(from_array.iter())
-        .zip(to_array.iter())
-        .map(|((string, from), to)| match (string, from, to) {
-            (Some(string), Some(from), Some(to)) => {
-                // create a hashmap of [char, index] to change from O(n) to O(1) for from list
-                let from_map: HashMap<&str, usize> = from
-                    .graphemes(true)
-                    .collect::<Vec<&str>>()
-                    .iter()
-                    .enumerate()
-                    .map(|(index, c)| (c.to_owned(), index))
-                    .collect();
-
-                let to = to.graphemes(true).collect::<Vec<&str>>();
-
-                Some(
-                    string
-                        .graphemes(true)
-                        .collect::<Vec<&str>>()
-                        .iter()
-                        .flat_map(|c| match from_map.get(*c) {
-                            Some(n) => to.get(*n).copied(),
-                            None => Some(*c),
-                        })
-                        .collect::<Vec<&str>>()
-                        .concat(),
-                )
-            }
-            _ => None,
-        })
-        .collect::<GenericStringArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
-}
diff --git a/rust/datafusion/src/physical_plan/union.rs b/rust/datafusion/src/physical_plan/union.rs
deleted file mode 100644
index cbab728a842..00000000000
--- a/rust/datafusion/src/physical_plan/union.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Some of these functions reference the Postgres documentation
-// or implementation to ensure compatibility and are subject to
-// the Postgres license.
-
-//! The Union operator combines multiple inputs with the same schema
-
-use std::{any::Any, sync::Arc};
-
-use arrow::datatypes::SchemaRef;
-
-use super::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
-use crate::error::Result;
-use async_trait::async_trait;
-
-/// UNION ALL execution plan
-#[derive(Debug)]
-pub struct UnionExec {
-    /// Input execution plan
-    inputs: Vec<Arc<dyn ExecutionPlan>>,
-}
-
-impl UnionExec {
-    /// Create a new UnionExec
-    pub fn new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Self {
-        UnionExec { inputs }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for UnionExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.inputs[0].schema()
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        self.inputs.clone()
-    }
-
-    /// Output of the union is the combination of all output partitions of the inputs
-    fn output_partitioning(&self) -> Partitioning {
-        // Sums all the output partitions
-        let num_partitions = self
-            .inputs
-            .iter()
-            .map(|plan| plan.output_partitioning().partition_count())
-            .sum();
-        // TODO: this loses partitioning info in case of same partitioning scheme (for example `Partitioning::Hash`)
-        // https://issues.apache.org/jira/browse/ARROW-11991
-        Partitioning::UnknownPartitioning(num_partitions)
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(UnionExec::new(children)))
-    }
-
-    async fn execute(&self, mut partition: usize) -> Result<SendableRecordBatchStream> {
-        // find partition to execute
-        for input in self.inputs.iter() {
-            // Calculate whether partition belongs to the current partition
-            if partition < input.output_partitioning().partition_count() {
-                return input.execute(partition).await;
-            } else {
-                partition -= input.output_partitioning().partition_count();
-            }
-        }
-
-        Err(crate::error::DataFusionError::Execution(format!(
-            "Partition {} not found in Union",
-            partition
-        )))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::physical_plan::{
-        collect,
-        csv::{CsvExec, CsvReadOptions},
-    };
-    use crate::test;
-    use arrow::record_batch::RecordBatch;
-
-    #[tokio::test]
-    async fn test_union_partitions() -> Result<()> {
-        let schema = test::aggr_test_schema();
-
-        // Create csv's with different partitioning
-        let path = test::create_partitioned_csv("aggregate_test_100.csv", 4)?;
-        let path2 = test::create_partitioned_csv("aggregate_test_100.csv", 5)?;
-
-        let csv = CsvExec::try_new(
-            &path,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let csv2 = CsvExec::try_new(
-            &path2,
-            CsvReadOptions::new().schema(&schema),
-            None,
-            1024,
-            None,
-        )?;
-
-        let union_exec = Arc::new(UnionExec::new(vec![Arc::new(csv), Arc::new(csv2)]));
-
-        // Should have 9 partitions and 9 output batches
-        assert_eq!(union_exec.output_partitioning().partition_count(), 9);
-
-        let result: Vec<RecordBatch> = collect(union_exec).await?;
-        assert_eq!(result.len(), 9);
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/prelude.rs b/rust/datafusion/src/prelude.rs
deleted file mode 100644
index 0edc82a98af..00000000000
--- a/rust/datafusion/src/prelude.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.pub},
-
-//! A "prelude" for users of the datafusion crate.
-//!
-//! Like the standard library's prelude, this module simplifies importing of
-//! common items. Unlike the standard prelude, the contents of this module must
-//! be imported manually:
-//!
-//! ```
-//! use datafusion::prelude::*;
-//! ```
-
-pub use crate::dataframe::DataFrame;
-pub use crate::execution::context::{ExecutionConfig, ExecutionContext};
-pub use crate::logical_plan::{
-    array, ascii, avg, bit_length, btrim, character_length, chr, col, concat, concat_ws,
-    count, create_udf, in_list, initcap, left, length, lit, lower, lpad, ltrim, max, md5,
-    min, octet_length, regexp_replace, repeat, replace, reverse, right, rpad, rtrim,
-    sha224, sha256, sha384, sha512, split_part, starts_with, strpos, substr, sum, to_hex,
-    translate, trim, upper, JoinType, Partitioning,
-};
-pub use crate::physical_plan::csv::CsvReadOptions;
diff --git a/rust/datafusion/src/scalar.rs b/rust/datafusion/src/scalar.rs
deleted file mode 100644
index 833f707e971..00000000000
--- a/rust/datafusion/src/scalar.rs
+++ /dev/null
@@ -1,821 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides ScalarValue, an enum that can be used for storage of single elements
-
-use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
-
-use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
-use arrow::{
-    array::*,
-    datatypes::{ArrowNativeType, Float32Type, TimestampNanosecondType},
-};
-use arrow::{
-    array::{
-        ArrayRef, Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
-        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-        UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
-    },
-    datatypes::{
-        TimestampMicrosecondType, TimestampMillisecondType, TimestampSecondType,
-    },
-};
-
-use crate::error::{DataFusionError, Result};
-
-/// Represents a dynamically typed, nullable single value.
-/// This is the single-valued counter-part of arrow’s `Array`.
-#[derive(Clone, PartialEq)]
-pub enum ScalarValue {
-    /// true or false value
-    Boolean(Option<bool>),
-    /// 32bit float
-    Float32(Option<f32>),
-    /// 64bit float
-    Float64(Option<f64>),
-    /// signed 8bit int
-    Int8(Option<i8>),
-    /// signed 16bit int
-    Int16(Option<i16>),
-    /// signed 32bit int
-    Int32(Option<i32>),
-    /// signed 64bit int
-    Int64(Option<i64>),
-    /// unsigned 8bit int
-    UInt8(Option<u8>),
-    /// unsigned 16bit int
-    UInt16(Option<u16>),
-    /// unsigned 32bit int
-    UInt32(Option<u32>),
-    /// unsigned 64bit int
-    UInt64(Option<u64>),
-    /// utf-8 encoded string.
-    Utf8(Option<String>),
-    /// utf-8 encoded string representing a LargeString's arrow type.
-    LargeUtf8(Option<String>),
-    /// binary
-    Binary(Option<Vec<u8>>),
-    /// large binary
-    LargeBinary(Option<Vec<u8>>),
-    /// list of nested ScalarValue
-    List(Option<Vec<ScalarValue>>, DataType),
-    /// Date stored as a signed 32bit int
-    Date32(Option<i32>),
-    /// Date stored as a signed 64bit int
-    Date64(Option<i64>),
-    /// Timestamp Second
-    TimestampSecond(Option<i64>),
-    /// Timestamp Milliseconds
-    TimestampMillisecond(Option<i64>),
-    /// Timestamp Microseconds
-    TimestampMicrosecond(Option<i64>),
-    /// Timestamp Nanoseconds
-    TimestampNanosecond(Option<i64>),
-    /// Interval with YearMonth unit
-    IntervalYearMonth(Option<i32>),
-    /// Interval with DayTime unit
-    IntervalDayTime(Option<i64>),
-}
-
-macro_rules! typed_cast {
-    ($array:expr, $index:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
-        let array = $array.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        ScalarValue::$SCALAR(match array.is_null($index) {
-            true => None,
-            false => Some(array.value($index).into()),
-        })
-    }};
-}
-
-macro_rules! build_list {
-    ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr) => {{
-        match $VALUES {
-            // the return on the macro is necessary, to short-circuit and return ArrayRef
-            None => {
-                return new_null_array(
-                    &DataType::List(Box::new(Field::new(
-                        "item",
-                        DataType::$SCALAR_TY,
-                        true,
-                    ))),
-                    $SIZE,
-                )
-            }
-            Some(values) => {
-                let mut builder = ListBuilder::new($VALUE_BUILDER_TY::new(values.len()));
-
-                for _ in 0..$SIZE {
-                    for scalar_value in values {
-                        match scalar_value {
-                            ScalarValue::$SCALAR_TY(Some(v)) => {
-                                builder.values().append_value(v.clone()).unwrap()
-                            }
-                            ScalarValue::$SCALAR_TY(None) => {
-                                builder.values().append_null().unwrap();
-                            }
-                            _ => panic!("Incompatible ScalarValue for list"),
-                        };
-                    }
-                    builder.append(true).unwrap();
-                }
-
-                builder.finish()
-            }
-        }
-    }};
-}
-
-impl ScalarValue {
-    /// Getter for the `DataType` of the value
-    pub fn get_datatype(&self) -> DataType {
-        match self {
-            ScalarValue::Boolean(_) => DataType::Boolean,
-            ScalarValue::UInt8(_) => DataType::UInt8,
-            ScalarValue::UInt16(_) => DataType::UInt16,
-            ScalarValue::UInt32(_) => DataType::UInt32,
-            ScalarValue::UInt64(_) => DataType::UInt64,
-            ScalarValue::Int8(_) => DataType::Int8,
-            ScalarValue::Int16(_) => DataType::Int16,
-            ScalarValue::Int32(_) => DataType::Int32,
-            ScalarValue::Int64(_) => DataType::Int64,
-            ScalarValue::TimestampSecond(_) => {
-                DataType::Timestamp(TimeUnit::Second, None)
-            }
-            ScalarValue::TimestampMillisecond(_) => {
-                DataType::Timestamp(TimeUnit::Millisecond, None)
-            }
-            ScalarValue::TimestampMicrosecond(_) => {
-                DataType::Timestamp(TimeUnit::Microsecond, None)
-            }
-            ScalarValue::TimestampNanosecond(_) => {
-                DataType::Timestamp(TimeUnit::Nanosecond, None)
-            }
-            ScalarValue::Float32(_) => DataType::Float32,
-            ScalarValue::Float64(_) => DataType::Float64,
-            ScalarValue::Utf8(_) => DataType::Utf8,
-            ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
-            ScalarValue::Binary(_) => DataType::Binary,
-            ScalarValue::LargeBinary(_) => DataType::LargeBinary,
-            ScalarValue::List(_, data_type) => {
-                DataType::List(Box::new(Field::new("item", data_type.clone(), true)))
-            }
-            ScalarValue::Date32(_) => DataType::Date32,
-            ScalarValue::Date64(_) => DataType::Date64,
-            ScalarValue::IntervalYearMonth(_) => {
-                DataType::Interval(IntervalUnit::YearMonth)
-            }
-            ScalarValue::IntervalDayTime(_) => DataType::Interval(IntervalUnit::DayTime),
-        }
-    }
-
-    /// Calculate arithmetic negation for a scalar value
-    pub fn arithmetic_negate(&self) -> Self {
-        match self {
-            ScalarValue::Boolean(None)
-            | ScalarValue::Int8(None)
-            | ScalarValue::Int16(None)
-            | ScalarValue::Int32(None)
-            | ScalarValue::Int64(None)
-            | ScalarValue::Float32(None) => self.clone(),
-            ScalarValue::Float64(Some(v)) => ScalarValue::Float64(Some(-v)),
-            ScalarValue::Float32(Some(v)) => ScalarValue::Float32(Some(-v)),
-            ScalarValue::Int8(Some(v)) => ScalarValue::Int8(Some(-v)),
-            ScalarValue::Int16(Some(v)) => ScalarValue::Int16(Some(-v)),
-            ScalarValue::Int32(Some(v)) => ScalarValue::Int32(Some(-v)),
-            ScalarValue::Int64(Some(v)) => ScalarValue::Int64(Some(-v)),
-            _ => panic!("Cannot run arithmetic negate on scalar value: {:?}", self),
-        }
-    }
-
-    /// whether this value is null or not.
-    pub fn is_null(&self) -> bool {
-        matches!(
-            *self,
-            ScalarValue::Boolean(None)
-                | ScalarValue::UInt8(None)
-                | ScalarValue::UInt16(None)
-                | ScalarValue::UInt32(None)
-                | ScalarValue::UInt64(None)
-                | ScalarValue::Int8(None)
-                | ScalarValue::Int16(None)
-                | ScalarValue::Int32(None)
-                | ScalarValue::Int64(None)
-                | ScalarValue::Float32(None)
-                | ScalarValue::Float64(None)
-                | ScalarValue::Utf8(None)
-                | ScalarValue::LargeUtf8(None)
-                | ScalarValue::List(None, _)
-                | ScalarValue::TimestampMillisecond(None)
-                | ScalarValue::TimestampMicrosecond(None)
-                | ScalarValue::TimestampNanosecond(None)
-        )
-    }
-
-    /// Converts a scalar value into an 1-row array.
-    pub fn to_array(&self) -> ArrayRef {
-        self.to_array_of_size(1)
-    }
-
-    /// Converts a scalar value into an array of `size` rows.
-    pub fn to_array_of_size(&self, size: usize) -> ArrayRef {
-        match self {
-            ScalarValue::Boolean(e) => {
-                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
-            }
-            ScalarValue::Float64(e) => match e {
-                Some(value) => Arc::new(Float64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Float64, size),
-            },
-            ScalarValue::Float32(e) => match e {
-                Some(value) => Arc::new(Float32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Float32, size),
-            },
-            ScalarValue::Int8(e) => match e {
-                Some(value) => Arc::new(Int8Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int8, size),
-            },
-            ScalarValue::Int16(e) => match e {
-                Some(value) => Arc::new(Int16Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int16, size),
-            },
-            ScalarValue::Int32(e) => match e {
-                Some(value) => Arc::new(Int32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int32, size),
-            },
-            ScalarValue::Int64(e) => match e {
-                Some(value) => Arc::new(Int64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int64, size),
-            },
-            ScalarValue::UInt8(e) => match e {
-                Some(value) => Arc::new(UInt8Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt8, size),
-            },
-            ScalarValue::UInt16(e) => match e {
-                Some(value) => Arc::new(UInt16Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt16, size),
-            },
-            ScalarValue::UInt32(e) => match e {
-                Some(value) => Arc::new(UInt32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt32, size),
-            },
-            ScalarValue::UInt64(e) => match e {
-                Some(value) => Arc::new(UInt64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt64, size),
-            },
-            ScalarValue::TimestampSecond(e) => match e {
-                Some(value) => Arc::new(TimestampSecondArray::from_iter_values(
-                    repeat(*value).take(size),
-                )),
-                None => {
-                    new_null_array(&DataType::Timestamp(TimeUnit::Second, None), size)
-                }
-            },
-            ScalarValue::TimestampMillisecond(e) => match e {
-                Some(value) => Arc::new(TimestampMillisecondArray::from_iter_values(
-                    repeat(*value).take(size),
-                )),
-                None => new_null_array(
-                    &DataType::Timestamp(TimeUnit::Millisecond, None),
-                    size,
-                ),
-            },
-            ScalarValue::TimestampMicrosecond(e) => match e {
-                Some(value) => {
-                    Arc::new(TimestampMicrosecondArray::from_value(*value, size))
-                }
-                None => new_null_array(
-                    &DataType::Timestamp(TimeUnit::Microsecond, None),
-                    size,
-                ),
-            },
-            ScalarValue::TimestampNanosecond(e) => match e {
-                Some(value) => {
-                    Arc::new(TimestampNanosecondArray::from_value(*value, size))
-                }
-                None => {
-                    new_null_array(&DataType::Timestamp(TimeUnit::Nanosecond, None), size)
-                }
-            },
-            ScalarValue::Utf8(e) => match e {
-                Some(value) => {
-                    Arc::new(StringArray::from_iter_values(repeat(value).take(size)))
-                }
-                None => new_null_array(&DataType::Utf8, size),
-            },
-            ScalarValue::LargeUtf8(e) => match e {
-                Some(value) => {
-                    Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size)))
-                }
-                None => new_null_array(&DataType::LargeUtf8, size),
-            },
-            ScalarValue::Binary(e) => match e {
-                Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<BinaryArray>(),
-                ),
-                None => {
-                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>())
-                }
-            },
-            ScalarValue::LargeBinary(e) => match e {
-                Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
-                ),
-                None => Arc::new(
-                    repeat(None::<&str>)
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
-                ),
-            },
-            ScalarValue::List(values, data_type) => Arc::new(match data_type {
-                DataType::Int8 => build_list!(Int8Builder, Int8, values, size),
-                DataType::Int16 => build_list!(Int16Builder, Int16, values, size),
-                DataType::Int32 => build_list!(Int32Builder, Int32, values, size),
-                DataType::Int64 => build_list!(Int64Builder, Int64, values, size),
-                DataType::UInt8 => build_list!(UInt8Builder, UInt8, values, size),
-                DataType::UInt16 => build_list!(UInt16Builder, UInt16, values, size),
-                DataType::UInt32 => build_list!(UInt32Builder, UInt32, values, size),
-                DataType::UInt64 => build_list!(UInt64Builder, UInt64, values, size),
-                DataType::Utf8 => build_list!(StringBuilder, Utf8, values, size),
-                DataType::LargeUtf8 => {
-                    build_list!(LargeStringBuilder, LargeUtf8, values, size)
-                }
-                _ => panic!("Unexpected DataType for list"),
-            }),
-            ScalarValue::Date32(e) => match e {
-                Some(value) => Arc::new(Date32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Date32, size),
-            },
-            ScalarValue::Date64(e) => match e {
-                Some(value) => Arc::new(Date64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Date64, size),
-            },
-            ScalarValue::IntervalDayTime(e) => match e {
-                Some(value) => Arc::new(IntervalDayTimeArray::from_value(*value, size)),
-                None => new_null_array(&DataType::Interval(IntervalUnit::DayTime), size),
-            },
-            ScalarValue::IntervalYearMonth(e) => match e {
-                Some(value) => Arc::new(IntervalYearMonthArray::from_value(*value, size)),
-                None => {
-                    new_null_array(&DataType::Interval(IntervalUnit::YearMonth), size)
-                }
-            },
-        }
-    }
-
-    /// Converts a value in `array` at `index` into a ScalarValue
-    pub fn try_from_array(array: &ArrayRef, index: usize) -> Result<Self> {
-        Ok(match array.data_type() {
-            DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean),
-            DataType::Float64 => typed_cast!(array, index, Float64Array, Float64),
-            DataType::Float32 => typed_cast!(array, index, Float32Array, Float32),
-            DataType::UInt64 => typed_cast!(array, index, UInt64Array, UInt64),
-            DataType::UInt32 => typed_cast!(array, index, UInt32Array, UInt32),
-            DataType::UInt16 => typed_cast!(array, index, UInt16Array, UInt16),
-            DataType::UInt8 => typed_cast!(array, index, UInt8Array, UInt8),
-            DataType::Int64 => typed_cast!(array, index, Int64Array, Int64),
-            DataType::Int32 => typed_cast!(array, index, Int32Array, Int32),
-            DataType::Int16 => typed_cast!(array, index, Int16Array, Int16),
-            DataType::Int8 => typed_cast!(array, index, Int8Array, Int8),
-            DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8),
-            DataType::LargeUtf8 => typed_cast!(array, index, LargeStringArray, LargeUtf8),
-            DataType::List(nested_type) => {
-                let list_array =
-                    array.as_any().downcast_ref::<ListArray>().ok_or_else(|| {
-                        DataFusionError::Internal(
-                            "Failed to downcast ListArray".to_string(),
-                        )
-                    })?;
-                let value = match list_array.is_null(index) {
-                    true => None,
-                    false => {
-                        let nested_array = list_array.value(index);
-                        let scalar_vec = (0..nested_array.len())
-                            .map(|i| ScalarValue::try_from_array(&nested_array, i))
-                            .collect::<Result<Vec<_>>>()?;
-                        Some(scalar_vec)
-                    }
-                };
-                ScalarValue::List(value, nested_type.data_type().clone())
-            }
-            DataType::Date32 => {
-                typed_cast!(array, index, Date32Array, Date32)
-            }
-            DataType::Date64 => {
-                typed_cast!(array, index, Date64Array, Date64)
-            }
-            DataType::Timestamp(TimeUnit::Second, _) => {
-                typed_cast!(array, index, TimestampSecondArray, TimestampSecond)
-            }
-            DataType::Timestamp(TimeUnit::Millisecond, _) => {
-                typed_cast!(
-                    array,
-                    index,
-                    TimestampMillisecondArray,
-                    TimestampMillisecond
-                )
-            }
-            DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                typed_cast!(
-                    array,
-                    index,
-                    TimestampMicrosecondArray,
-                    TimestampMicrosecond
-                )
-            }
-            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                typed_cast!(array, index, TimestampNanosecondArray, TimestampNanosecond)
-            }
-            other => {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Can't create a scalar of array of type \"{:?}\"",
-                    other
-                )))
-            }
-        })
-    }
-}
-
-impl From<f64> for ScalarValue {
-    fn from(value: f64) -> Self {
-        ScalarValue::Float64(Some(value))
-    }
-}
-
-impl From<f32> for ScalarValue {
-    fn from(value: f32) -> Self {
-        ScalarValue::Float32(Some(value))
-    }
-}
-
-impl From<i8> for ScalarValue {
-    fn from(value: i8) -> Self {
-        ScalarValue::Int8(Some(value))
-    }
-}
-
-impl From<i16> for ScalarValue {
-    fn from(value: i16) -> Self {
-        ScalarValue::Int16(Some(value))
-    }
-}
-
-impl From<i32> for ScalarValue {
-    fn from(value: i32) -> Self {
-        ScalarValue::Int32(Some(value))
-    }
-}
-
-impl From<i64> for ScalarValue {
-    fn from(value: i64) -> Self {
-        ScalarValue::Int64(Some(value))
-    }
-}
-
-impl From<bool> for ScalarValue {
-    fn from(value: bool) -> Self {
-        ScalarValue::Boolean(Some(value))
-    }
-}
-
-impl From<u8> for ScalarValue {
-    fn from(value: u8) -> Self {
-        ScalarValue::UInt8(Some(value))
-    }
-}
-
-impl From<u16> for ScalarValue {
-    fn from(value: u16) -> Self {
-        ScalarValue::UInt16(Some(value))
-    }
-}
-
-impl From<u32> for ScalarValue {
-    fn from(value: u32) -> Self {
-        ScalarValue::UInt32(Some(value))
-    }
-}
-
-impl From<u64> for ScalarValue {
-    fn from(value: u64) -> Self {
-        ScalarValue::UInt64(Some(value))
-    }
-}
-
-macro_rules! impl_try_from {
-    ($SCALAR:ident, $NATIVE:ident) => {
-        impl TryFrom<ScalarValue> for $NATIVE {
-            type Error = DataFusionError;
-
-            fn try_from(value: ScalarValue) -> Result<Self> {
-                match value {
-                    ScalarValue::$SCALAR(Some(inner_value)) => Ok(inner_value),
-                    _ => Err(DataFusionError::Internal(format!(
-                        "Cannot convert {:?} to {}",
-                        value,
-                        std::any::type_name::<Self>()
-                    ))),
-                }
-            }
-        }
-    };
-}
-
-impl_try_from!(Int8, i8);
-impl_try_from!(Int16, i16);
-
-// special implementation for i32 because of Date32
-impl TryFrom<ScalarValue> for i32 {
-    type Error = DataFusionError;
-
-    fn try_from(value: ScalarValue) -> Result<Self> {
-        match value {
-            ScalarValue::Int32(Some(inner_value))
-            | ScalarValue::Date32(Some(inner_value)) => Ok(inner_value),
-            _ => Err(DataFusionError::Internal(format!(
-                "Cannot convert {:?} to {}",
-                value,
-                std::any::type_name::<Self>()
-            ))),
-        }
-    }
-}
-
-// special implementation for i64 because of TimeNanosecond
-impl TryFrom<ScalarValue> for i64 {
-    type Error = DataFusionError;
-
-    fn try_from(value: ScalarValue) -> Result<Self> {
-        match value {
-            ScalarValue::Int64(Some(inner_value))
-            | ScalarValue::TimestampNanosecond(Some(inner_value)) => Ok(inner_value),
-            _ => Err(DataFusionError::Internal(format!(
-                "Cannot convert {:?} to {}",
-                value,
-                std::any::type_name::<Self>()
-            ))),
-        }
-    }
-}
-
-impl_try_from!(UInt8, u8);
-impl_try_from!(UInt16, u16);
-impl_try_from!(UInt32, u32);
-impl_try_from!(UInt64, u64);
-impl_try_from!(Float32, f32);
-impl_try_from!(Float64, f64);
-impl_try_from!(Boolean, bool);
-
-impl TryFrom<&DataType> for ScalarValue {
-    type Error = DataFusionError;
-
-    fn try_from(datatype: &DataType) -> Result<Self> {
-        Ok(match datatype {
-            DataType::Boolean => ScalarValue::Boolean(None),
-            DataType::Float64 => ScalarValue::Float64(None),
-            DataType::Float32 => ScalarValue::Float32(None),
-            DataType::Int8 => ScalarValue::Int8(None),
-            DataType::Int16 => ScalarValue::Int16(None),
-            DataType::Int32 => ScalarValue::Int32(None),
-            DataType::Int64 => ScalarValue::Int64(None),
-            DataType::UInt8 => ScalarValue::UInt8(None),
-            DataType::UInt16 => ScalarValue::UInt16(None),
-            DataType::UInt32 => ScalarValue::UInt32(None),
-            DataType::UInt64 => ScalarValue::UInt64(None),
-            DataType::Utf8 => ScalarValue::Utf8(None),
-            DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
-            DataType::Timestamp(TimeUnit::Second, _) => {
-                ScalarValue::TimestampSecond(None)
-            }
-            DataType::Timestamp(TimeUnit::Millisecond, _) => {
-                ScalarValue::TimestampMillisecond(None)
-            }
-            DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                ScalarValue::TimestampMicrosecond(None)
-            }
-            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                ScalarValue::TimestampNanosecond(None)
-            }
-            DataType::List(ref nested_type) => {
-                ScalarValue::List(None, nested_type.data_type().clone())
-            }
-            _ => {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Can't create a scalar of type \"{:?}\"",
-                    datatype
-                )))
-            }
-        })
-    }
-}
-
-macro_rules! format_option {
-    ($F:expr, $EXPR:expr) => {{
-        match $EXPR {
-            Some(e) => write!($F, "{}", e),
-            None => write!($F, "NULL"),
-        }
-    }};
-}
-
-impl fmt::Display for ScalarValue {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            ScalarValue::Boolean(e) => format_option!(f, e)?,
-            ScalarValue::Float32(e) => format_option!(f, e)?,
-            ScalarValue::Float64(e) => format_option!(f, e)?,
-            ScalarValue::Int8(e) => format_option!(f, e)?,
-            ScalarValue::Int16(e) => format_option!(f, e)?,
-            ScalarValue::Int32(e) => format_option!(f, e)?,
-            ScalarValue::Int64(e) => format_option!(f, e)?,
-            ScalarValue::UInt8(e) => format_option!(f, e)?,
-            ScalarValue::UInt16(e) => format_option!(f, e)?,
-            ScalarValue::UInt32(e) => format_option!(f, e)?,
-            ScalarValue::UInt64(e) => format_option!(f, e)?,
-            ScalarValue::TimestampSecond(e) => format_option!(f, e)?,
-            ScalarValue::TimestampMillisecond(e) => format_option!(f, e)?,
-            ScalarValue::TimestampMicrosecond(e) => format_option!(f, e)?,
-            ScalarValue::TimestampNanosecond(e) => format_option!(f, e)?,
-            ScalarValue::Utf8(e) => format_option!(f, e)?,
-            ScalarValue::LargeUtf8(e) => format_option!(f, e)?,
-            ScalarValue::Binary(e) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::LargeBinary(e) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::List(e, _) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::Date32(e) => format_option!(f, e)?,
-            ScalarValue::Date64(e) => format_option!(f, e)?,
-            ScalarValue::IntervalDayTime(e) => format_option!(f, e)?,
-            ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
-        };
-        Ok(())
-    }
-}
-
-impl fmt::Debug for ScalarValue {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            ScalarValue::Boolean(_) => write!(f, "Boolean({})", self),
-            ScalarValue::Float32(_) => write!(f, "Float32({})", self),
-            ScalarValue::Float64(_) => write!(f, "Float64({})", self),
-            ScalarValue::Int8(_) => write!(f, "Int8({})", self),
-            ScalarValue::Int16(_) => write!(f, "Int16({})", self),
-            ScalarValue::Int32(_) => write!(f, "Int32({})", self),
-            ScalarValue::Int64(_) => write!(f, "Int64({})", self),
-            ScalarValue::UInt8(_) => write!(f, "UInt8({})", self),
-            ScalarValue::UInt16(_) => write!(f, "UInt16({})", self),
-            ScalarValue::UInt32(_) => write!(f, "UInt32({})", self),
-            ScalarValue::UInt64(_) => write!(f, "UInt64({})", self),
-            ScalarValue::TimestampSecond(_) => write!(f, "TimestampSecond({})", self),
-            ScalarValue::TimestampMillisecond(_) => {
-                write!(f, "TimestampMillisecond({})", self)
-            }
-            ScalarValue::TimestampMicrosecond(_) => {
-                write!(f, "TimestampMicrosecond({})", self)
-            }
-            ScalarValue::TimestampNanosecond(_) => {
-                write!(f, "TimestampNanosecond({})", self)
-            }
-            ScalarValue::Utf8(None) => write!(f, "Utf8({})", self),
-            ScalarValue::Utf8(Some(_)) => write!(f, "Utf8(\"{}\")", self),
-            ScalarValue::LargeUtf8(None) => write!(f, "LargeUtf8({})", self),
-            ScalarValue::LargeUtf8(Some(_)) => write!(f, "LargeUtf8(\"{}\")", self),
-            ScalarValue::Binary(None) => write!(f, "Binary({})", self),
-            ScalarValue::Binary(Some(_)) => write!(f, "Binary(\"{}\")", self),
-            ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({})", self),
-            ScalarValue::LargeBinary(Some(_)) => write!(f, "LargeBinary(\"{}\")", self),
-            ScalarValue::List(_, _) => write!(f, "List([{}])", self),
-            ScalarValue::Date32(_) => write!(f, "Date32(\"{}\")", self),
-            ScalarValue::Date64(_) => write!(f, "Date64(\"{}\")", self),
-            ScalarValue::IntervalDayTime(_) => {
-                write!(f, "IntervalDayTime(\"{}\")", self)
-            }
-            ScalarValue::IntervalYearMonth(_) => {
-                write!(f, "IntervalYearMonth(\"{}\")", self)
-            }
-        }
-    }
-}
-
-/// Trait used to map a NativeTime to a ScalarType.
-pub trait ScalarType<T: ArrowNativeType> {
-    /// returns a scalar from an optional T
-    fn scalar(r: Option<T>) -> ScalarValue;
-}
-
-impl ScalarType<f32> for Float32Type {
-    fn scalar(r: Option<f32>) -> ScalarValue {
-        ScalarValue::Float32(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampSecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampSecond(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampMillisecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampMillisecond(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampMicrosecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampMicrosecond(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampNanosecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampNanosecond(r)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn scalar_list_null_to_array() {
-        let list_array_ref = ScalarValue::List(None, DataType::UInt64).to_array();
-        let list_array = list_array_ref.as_any().downcast_ref::<ListArray>().unwrap();
-
-        assert!(list_array.is_null(0));
-        assert_eq!(list_array.len(), 1);
-        assert_eq!(list_array.values().len(), 0);
-    }
-
-    #[test]
-    fn scalar_list_to_array() {
-        let list_array_ref = ScalarValue::List(
-            Some(vec![
-                ScalarValue::UInt64(Some(100)),
-                ScalarValue::UInt64(None),
-                ScalarValue::UInt64(Some(101)),
-            ]),
-            DataType::UInt64,
-        )
-        .to_array();
-
-        let list_array = list_array_ref.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(list_array.len(), 1);
-        assert_eq!(list_array.values().len(), 3);
-
-        let prim_array_ref = list_array.value(0);
-        let prim_array = prim_array_ref
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .unwrap();
-        assert_eq!(prim_array.len(), 3);
-        assert_eq!(prim_array.value(0), 100);
-        assert!(prim_array.is_null(1));
-        assert_eq!(prim_array.value(2), 101);
-    }
-}
diff --git a/rust/datafusion/src/sql/mod.rs b/rust/datafusion/src/sql/mod.rs
deleted file mode 100644
index 456ad4c2e36..00000000000
--- a/rust/datafusion/src/sql/mod.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides a SQL parser that translates SQL queries into an abstract syntax
-//! tree (AST), and a SQL query planner that creates a logical plan from the AST.
-
-pub mod parser;
-pub mod planner;
-mod utils;
diff --git a/rust/datafusion/src/sql/parser.rs b/rust/datafusion/src/sql/parser.rs
deleted file mode 100644
index 3637e882d2f..00000000000
--- a/rust/datafusion/src/sql/parser.rs
+++ /dev/null
@@ -1,380 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! SQL Parser
-//!
-//! Declares a SQL parser based on sqlparser that handles custom formats that we need.
-
-use sqlparser::{
-    ast::{ColumnDef, ColumnOptionDef, Statement as SQLStatement, TableConstraint},
-    dialect::{keywords::Keyword, Dialect, GenericDialect},
-    parser::{Parser, ParserError},
-    tokenizer::{Token, Tokenizer},
-};
-
-// Use `Parser::expected` instead, if possible
-macro_rules! parser_err {
-    ($MSG:expr) => {
-        Err(ParserError::ParserError($MSG.to_string()))
-    };
-}
-
-/// Types of files to parse as DataFrames
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum FileType {
-    /// Newline-delimited JSON
-    NdJson,
-    /// Apache Parquet columnar storage
-    Parquet,
-    /// Comma separated values
-    CSV,
-}
-
-/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
-#[derive(Debug, Clone, PartialEq)]
-pub struct CreateExternalTable {
-    /// Table name
-    pub name: String,
-    /// Optional schema
-    pub columns: Vec<ColumnDef>,
-    /// File type (Parquet, NDJSON, CSV)
-    pub file_type: FileType,
-    /// CSV Header row?
-    pub has_header: bool,
-    /// Path to file
-    pub location: String,
-}
-
-/// DataFusion Statement representations.
-///
-/// Tokens parsed by `DFParser` are converted into these values.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Statement {
-    /// ANSI SQL AST node
-    Statement(SQLStatement),
-    /// Extension: `CREATE EXTERNAL TABLE`
-    CreateExternalTable(CreateExternalTable),
-}
-
-/// SQL Parser
-pub struct DFParser<'a> {
-    parser: Parser<'a>,
-}
-
-impl<'a> DFParser<'a> {
-    /// Parse the specified tokens
-    pub fn new(sql: &str) -> Result<Self, ParserError> {
-        let dialect = &GenericDialect {};
-        DFParser::new_with_dialect(sql, dialect)
-    }
-
-    /// Parse the specified tokens with dialect
-    pub fn new_with_dialect(
-        sql: &str,
-        dialect: &'a dyn Dialect,
-    ) -> Result<Self, ParserError> {
-        let mut tokenizer = Tokenizer::new(dialect, sql);
-        let tokens = tokenizer.tokenize()?;
-
-        Ok(DFParser {
-            parser: Parser::new(tokens, dialect),
-        })
-    }
-
-    /// Parse a SQL statement and produce a set of statements with dialect
-    pub fn parse_sql(sql: &str) -> Result<Vec<Statement>, ParserError> {
-        let dialect = &GenericDialect {};
-        DFParser::parse_sql_with_dialect(sql, dialect)
-    }
-
-    /// Parse a SQL statement and produce a set of statements
-    pub fn parse_sql_with_dialect(
-        sql: &str,
-        dialect: &dyn Dialect,
-    ) -> Result<Vec<Statement>, ParserError> {
-        let mut parser = DFParser::new_with_dialect(sql, dialect)?;
-        let mut stmts = Vec::new();
-        let mut expecting_statement_delimiter = false;
-        loop {
-            // ignore empty statements (between successive statement delimiters)
-            while parser.parser.consume_token(&Token::SemiColon) {
-                expecting_statement_delimiter = false;
-            }
-
-            if parser.parser.peek_token() == Token::EOF {
-                break;
-            }
-            if expecting_statement_delimiter {
-                return parser.expected("end of statement", parser.parser.peek_token());
-            }
-
-            let statement = parser.parse_statement()?;
-            stmts.push(statement);
-            expecting_statement_delimiter = true;
-        }
-        Ok(stmts)
-    }
-
-    /// Report unexpected token
-    fn expected<T>(&self, expected: &str, found: Token) -> Result<T, ParserError> {
-        parser_err!(format!("Expected {}, found: {}", expected, found))
-    }
-
-    /// Parse a new expression
-    pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
-        match self.parser.peek_token() {
-            Token::Word(w) => {
-                match w.keyword {
-                    Keyword::CREATE => {
-                        // move one token forward
-                        self.parser.next_token();
-                        // use custom parsing
-                        self.parse_create()
-                    }
-                    _ => {
-                        // use the native parser
-                        Ok(Statement::Statement(self.parser.parse_statement()?))
-                    }
-                }
-            }
-            _ => {
-                // use the native parser
-                Ok(Statement::Statement(self.parser.parse_statement()?))
-            }
-        }
-    }
-
-    /// Parse a SQL CREATE statement
-    pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
-        if self.parser.parse_keyword(Keyword::EXTERNAL) {
-            self.parse_create_external_table()
-        } else {
-            Ok(Statement::Statement(self.parser.parse_create()?))
-        }
-    }
-
-    // This is a copy of the equivalent implementation in sqlparser.
-    fn parse_columns(
-        &mut self,
-    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), ParserError> {
-        let mut columns = vec![];
-        let mut constraints = vec![];
-        if !self.parser.consume_token(&Token::LParen)
-            || self.parser.consume_token(&Token::RParen)
-        {
-            return Ok((columns, constraints));
-        }
-
-        loop {
-            if let Some(constraint) = self.parser.parse_optional_table_constraint()? {
-                constraints.push(constraint);
-            } else if let Token::Word(_) = self.parser.peek_token() {
-                let column_def = self.parse_column_def()?;
-                columns.push(column_def);
-            } else {
-                return self.expected(
-                    "column name or constraint definition",
-                    self.parser.peek_token(),
-                );
-            }
-            let comma = self.parser.consume_token(&Token::Comma);
-            if self.parser.consume_token(&Token::RParen) {
-                // allow a trailing comma, even though it's not in standard
-                break;
-            } else if !comma {
-                return self.expected(
-                    "',' or ')' after column definition",
-                    self.parser.peek_token(),
-                );
-            }
-        }
-
-        Ok((columns, constraints))
-    }
-
-    fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
-        let name = self.parser.parse_identifier()?;
-        let data_type = self.parser.parse_data_type()?;
-        let collation = if self.parser.parse_keyword(Keyword::COLLATE) {
-            Some(self.parser.parse_object_name()?)
-        } else {
-            None
-        };
-        let mut options = vec![];
-        loop {
-            if self.parser.parse_keyword(Keyword::CONSTRAINT) {
-                let name = Some(self.parser.parse_identifier()?);
-                if let Some(option) = self.parser.parse_optional_column_option()? {
-                    options.push(ColumnOptionDef { name, option });
-                } else {
-                    return self.expected(
-                        "constraint details after CONSTRAINT <name>",
-                        self.parser.peek_token(),
-                    );
-                }
-            } else if let Some(option) = self.parser.parse_optional_column_option()? {
-                options.push(ColumnOptionDef { name: None, option });
-            } else {
-                break;
-            };
-        }
-        Ok(ColumnDef {
-            name,
-            data_type,
-            collation,
-            options,
-        })
-    }
-
-    fn parse_create_external_table(&mut self) -> Result<Statement, ParserError> {
-        self.parser.expect_keyword(Keyword::TABLE)?;
-        let table_name = self.parser.parse_object_name()?;
-        let (columns, _) = self.parse_columns()?;
-        self.parser
-            .expect_keywords(&[Keyword::STORED, Keyword::AS])?;
-
-        // THIS is the main difference: we parse a different file format.
-        let file_type = self.parse_file_format()?;
-
-        let has_header = self.parse_csv_has_header();
-
-        self.parser.expect_keyword(Keyword::LOCATION)?;
-        let location = self.parser.parse_literal_string()?;
-
-        let create = CreateExternalTable {
-            name: table_name.to_string(),
-            columns,
-            file_type,
-            has_header,
-            location,
-        };
-        Ok(Statement::CreateExternalTable(create))
-    }
-
-    /// Parses the set of valid formats
-    fn parse_file_format(&mut self) -> Result<FileType, ParserError> {
-        match self.parser.next_token() {
-            Token::Word(w) => match &*w.value {
-                "PARQUET" => Ok(FileType::Parquet),
-                "NDJSON" => Ok(FileType::NdJson),
-                "CSV" => Ok(FileType::CSV),
-                _ => self.expected("one of PARQUET, NDJSON, or CSV", Token::Word(w)),
-            },
-            unexpected => self.expected("one of PARQUET, NDJSON, or CSV", unexpected),
-        }
-    }
-
-    fn consume_token(&mut self, expected: &str) -> bool {
-        if self.parser.peek_token().to_string() == *expected {
-            self.parser.next_token();
-            true
-        } else {
-            false
-        }
-    }
-
-    fn parse_csv_has_header(&mut self) -> bool {
-        self.consume_token("WITH")
-            & self.consume_token("HEADER")
-            & self.consume_token("ROW")
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use sqlparser::ast::{DataType, Ident};
-
-    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
-        let statements = DFParser::parse_sql(sql)?;
-        assert_eq!(
-            statements.len(),
-            1,
-            "Expected to parse exactly one statement"
-        );
-        assert_eq!(statements[0], expected);
-        Ok(())
-    }
-
-    /// Parses sql and asserts that the expected error message was found
-    fn expect_parse_error(sql: &str, expected_error: &str) {
-        match DFParser::parse_sql(sql) {
-            Ok(statements) => {
-                panic!(
-                    "Expected parse error for '{}', but was successful: {:?}",
-                    sql, statements
-                );
-            }
-            Err(e) => {
-                let error_message = e.to_string();
-                assert!(
-                    error_message.contains(expected_error),
-                    "Expected error '{}' not found in actual error '{}'",
-                    expected_error,
-                    error_message
-                );
-            }
-        }
-    }
-
-    fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef {
-        ColumnDef {
-            name: Ident {
-                value: name.into(),
-                quote_style: None,
-            },
-            data_type,
-            collation: None,
-            options: vec![],
-        }
-    }
-
-    #[test]
-    fn create_external_table() -> Result<(), ParserError> {
-        // positive case
-        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
-        let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
-            columns: vec![make_column_def("c1", DataType::Int)],
-            file_type: FileType::CSV,
-            has_header: false,
-            location: "foo.csv".into(),
-        });
-        expect_parse_ok(sql, expected)?;
-
-        // positive case: it is ok for parquet files not to have columns specified
-        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
-        let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
-            columns: vec![],
-            file_type: FileType::Parquet,
-            has_header: false,
-            location: "foo.parquet".into(),
-        });
-        expect_parse_ok(sql, expected)?;
-
-        // Error cases: Invalid type
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS UNKNOWN_TYPE LOCATION 'foo.csv'";
-        expect_parse_error(
-            sql,
-            "Expected one of PARQUET, NDJSON, or CSV, found: UNKNOWN_TYPE",
-        );
-
-        Ok(())
-    }
-}
diff --git a/rust/datafusion/src/sql/planner.rs b/rust/datafusion/src/sql/planner.rs
deleted file mode 100644
index f3cba232a23..00000000000
--- a/rust/datafusion/src/sql/planner.rs
+++ /dev/null
@@ -1,2723 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! SQL Query Planner (produces logical plan from SQL AST)
-
-use std::convert::TryInto;
-use std::str::FromStr;
-use std::sync::Arc;
-
-use crate::catalog::TableReference;
-use crate::datasource::TableProvider;
-use crate::logical_plan::Expr::Alias;
-use crate::logical_plan::{
-    and, lit, DFSchema, Expr, LogicalPlan, LogicalPlanBuilder, Operator, PlanType,
-    StringifiedPlan, ToDFSchema,
-};
-use crate::scalar::ScalarValue;
-use crate::{
-    error::{DataFusionError, Result},
-    physical_plan::udaf::AggregateUDF,
-};
-use crate::{
-    physical_plan::udf::ScalarUDF,
-    physical_plan::{aggregates, functions},
-    sql::parser::{CreateExternalTable, FileType, Statement as DFStatement},
-};
-
-use arrow::datatypes::*;
-use hashbrown::HashMap;
-
-use crate::prelude::JoinType;
-use sqlparser::ast::{
-    BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
-    Ident, Join, JoinConstraint, JoinOperator, ObjectName, Query, Select, SelectItem,
-    SetExpr, SetOperator, ShowStatementFilter, TableFactor, TableWithJoins,
-    UnaryOperator, Value,
-};
-use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
-use sqlparser::ast::{OrderByExpr, Statement};
-use sqlparser::parser::ParserError::ParserError;
-
-use super::{
-    parser::DFParser,
-    utils::{
-        can_columns_satisfy_exprs, expand_wildcard, expr_as_column_expr, extract_aliases,
-        find_aggregate_exprs, find_column_exprs, rebase_expr, resolve_aliases_to_exprs,
-    },
-};
-
-/// The ContextProvider trait allows the query planner to obtain meta-data about tables and
-/// functions referenced in SQL statements
-pub trait ContextProvider {
-    /// Getter for a datasource
-    fn get_table_provider(&self, name: TableReference) -> Option<Arc<dyn TableProvider>>;
-    /// Getter for a UDF description
-    fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>>;
-    /// Getter for a UDAF description
-    fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>>;
-}
-
-/// SQL query planner
-pub struct SqlToRel<'a, S: ContextProvider> {
-    schema_provider: &'a S,
-}
-
-impl<'a, S: ContextProvider> SqlToRel<'a, S> {
-    /// Create a new query planner
-    pub fn new(schema_provider: &'a S) -> Self {
-        SqlToRel { schema_provider }
-    }
-
-    /// Generate a logical plan from an DataFusion SQL statement
-    pub fn statement_to_plan(&self, statement: &DFStatement) -> Result<LogicalPlan> {
-        match statement {
-            DFStatement::CreateExternalTable(s) => self.external_table_to_plan(&s),
-            DFStatement::Statement(s) => self.sql_statement_to_plan(&s),
-        }
-    }
-
-    /// Generate a logical plan from an SQL statement
-    pub fn sql_statement_to_plan(&self, sql: &Statement) -> Result<LogicalPlan> {
-        match sql {
-            Statement::Explain {
-                verbose,
-                statement,
-                analyze: _,
-            } => self.explain_statement_to_plan(*verbose, &statement),
-            Statement::Query(query) => self.query_to_plan(&query),
-            Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable),
-            Statement::ShowColumns {
-                extended,
-                full,
-                table_name,
-                filter,
-            } => self.show_columns_to_plan(*extended, *full, table_name, filter.as_ref()),
-            _ => Err(DataFusionError::NotImplemented(
-                "Only SELECT statements are implemented".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a logic plan from an SQL query
-    pub fn query_to_plan(&self, query: &Query) -> Result<LogicalPlan> {
-        self.query_to_plan_with_alias(query, None, &mut HashMap::new())
-    }
-
-    /// Generate a logic plan from an SQL query with optional alias
-    pub fn query_to_plan_with_alias(
-        &self,
-        query: &Query,
-        alias: Option<String>,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let set_expr = &query.body;
-        if let Some(with) = &query.with {
-            // Process CTEs from top to bottom
-            // do not allow self-references
-            for cte in &with.cte_tables {
-                // create logical plan & pass backreferencing CTEs
-                let logical_plan = self.query_to_plan_with_alias(
-                    &cte.query,
-                    Some(cte.alias.name.value.clone()),
-                    &mut ctes.clone(),
-                )?;
-                ctes.insert(cte.alias.name.value.clone(), logical_plan);
-            }
-        }
-        let plan = self.set_expr_to_plan(set_expr, alias, ctes)?;
-
-        let plan = self.order_by(&plan, &query.order_by)?;
-
-        self.limit(&plan, &query.limit)
-    }
-
-    fn set_expr_to_plan(
-        &self,
-        set_expr: &SetExpr,
-        alias: Option<String>,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        match set_expr {
-            SetExpr::Select(s) => self.select_to_plan(s.as_ref(), ctes),
-            SetExpr::SetOperation {
-                op,
-                left,
-                right,
-                all,
-            } => match (op, all) {
-                (SetOperator::Union, true) => {
-                    let left_plan = self.set_expr_to_plan(left.as_ref(), None, ctes)?;
-                    let right_plan = self.set_expr_to_plan(right.as_ref(), None, ctes)?;
-                    let inputs = vec![left_plan, right_plan]
-                        .into_iter()
-                        .flat_map(|p| match p {
-                            LogicalPlan::Union { inputs, .. } => inputs,
-                            x => vec![x],
-                        })
-                        .collect::<Vec<_>>();
-                    if inputs.is_empty() {
-                        return Err(DataFusionError::Plan(format!(
-                            "Empty UNION: {}",
-                            set_expr
-                        )));
-                    }
-                    if !inputs.iter().all(|s| s.schema() == inputs[0].schema()) {
-                        return Err(DataFusionError::Plan(
-                            "UNION ALL schemas are expected to be the same".to_string(),
-                        ));
-                    }
-                    Ok(LogicalPlan::Union {
-                        schema: inputs[0].schema().clone(),
-                        inputs,
-                        alias,
-                    })
-                }
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Only UNION ALL is supported, found {}",
-                    op
-                ))),
-            },
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Query {} not implemented yet",
-                set_expr
-            ))),
-        }
-    }
-
-    /// Generate a logical plan from a CREATE EXTERNAL TABLE statement
-    pub fn external_table_to_plan(
-        &self,
-        statement: &CreateExternalTable,
-    ) -> Result<LogicalPlan> {
-        let CreateExternalTable {
-            name,
-            columns,
-            file_type,
-            has_header,
-            location,
-        } = statement;
-
-        // semantic checks
-        match *file_type {
-            FileType::CSV => {
-                if columns.is_empty() {
-                    return Err(DataFusionError::Plan(
-                        "Column definitions required for CSV files. None found".into(),
-                    ));
-                }
-            }
-            FileType::Parquet => {
-                if !columns.is_empty() {
-                    return Err(DataFusionError::Plan(
-                        "Column definitions can not be specified for PARQUET files."
-                            .into(),
-                    ));
-                }
-            }
-            FileType::NdJson => {}
-        };
-
-        let schema = self.build_schema(&columns)?;
-
-        Ok(LogicalPlan::CreateExternalTable {
-            schema: schema.to_dfschema_ref()?,
-            name: name.clone(),
-            location: location.clone(),
-            file_type: *file_type,
-            has_header: *has_header,
-        })
-    }
-
-    /// Generate a plan for EXPLAIN ... that will print out a plan
-    ///
-    pub fn explain_statement_to_plan(
-        &self,
-        verbose: bool,
-        statement: &Statement,
-    ) -> Result<LogicalPlan> {
-        let plan = self.sql_statement_to_plan(&statement)?;
-
-        let stringified_plans = vec![StringifiedPlan::new(
-            PlanType::LogicalPlan,
-            format!("{:#?}", plan),
-        )];
-
-        let schema = LogicalPlan::explain_schema();
-        let plan = Arc::new(plan);
-
-        Ok(LogicalPlan::Explain {
-            verbose,
-            plan,
-            stringified_plans,
-            schema: schema.to_dfschema_ref()?,
-        })
-    }
-
-    fn build_schema(&self, columns: &[SQLColumnDef]) -> Result<Schema> {
-        let mut fields = Vec::new();
-
-        for column in columns {
-            let data_type = self.make_data_type(&column.data_type)?;
-            let allow_null = column
-                .options
-                .iter()
-                .any(|x| x.option == ColumnOption::Null);
-            fields.push(Field::new(&column.name.value, data_type, allow_null));
-        }
-
-        Ok(Schema::new(fields))
-    }
-
-    /// Maps the SQL type to the corresponding Arrow `DataType`
-    fn make_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
-        match sql_type {
-            SQLDataType::BigInt => Ok(DataType::Int64),
-            SQLDataType::Int => Ok(DataType::Int32),
-            SQLDataType::SmallInt => Ok(DataType::Int16),
-            SQLDataType::Char(_) | SQLDataType::Varchar(_) | SQLDataType::Text => {
-                Ok(DataType::Utf8)
-            }
-            SQLDataType::Decimal(_, _) => Ok(DataType::Float64),
-            SQLDataType::Float(_) => Ok(DataType::Float32),
-            SQLDataType::Real | SQLDataType::Double => Ok(DataType::Float64),
-            SQLDataType::Boolean => Ok(DataType::Boolean),
-            SQLDataType::Date => Ok(DataType::Date32),
-            SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Millisecond)),
-            SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "The SQL data type {:?} is not implemented",
-                sql_type
-            ))),
-        }
-    }
-
-    fn plan_from_tables(
-        &self,
-        from: &[TableWithJoins],
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<Vec<LogicalPlan>> {
-        match from.len() {
-            0 => Ok(vec![LogicalPlanBuilder::empty(true).build()?]),
-            _ => from
-                .iter()
-                .map(|t| self.plan_table_with_joins(t, ctes))
-                .collect::<Result<Vec<_>>>(),
-        }
-    }
-
-    fn plan_table_with_joins(
-        &self,
-        t: &TableWithJoins,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let left = self.create_relation(&t.relation, ctes)?;
-        match t.joins.len() {
-            0 => Ok(left),
-            n => {
-                let mut left = self.parse_relation_join(&left, &t.joins[0], ctes)?;
-                for i in 1..n {
-                    left = self.parse_relation_join(&left, &t.joins[i], ctes)?;
-                }
-                Ok(left)
-            }
-        }
-    }
-
-    fn parse_relation_join(
-        &self,
-        left: &LogicalPlan,
-        join: &Join,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let right = self.create_relation(&join.relation, ctes)?;
-        match &join.join_operator {
-            JoinOperator::LeftOuter(constraint) => {
-                self.parse_join(left, &right, constraint, JoinType::Left)
-            }
-            JoinOperator::RightOuter(constraint) => {
-                self.parse_join(left, &right, constraint, JoinType::Right)
-            }
-            JoinOperator::Inner(constraint) => {
-                self.parse_join(left, &right, constraint, JoinType::Inner)
-            }
-            other => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported JOIN operator {:?}",
-                other
-            ))),
-        }
-    }
-
-    fn parse_join(
-        &self,
-        left: &LogicalPlan,
-        right: &LogicalPlan,
-        constraint: &JoinConstraint,
-        join_type: JoinType,
-    ) -> Result<LogicalPlan> {
-        match constraint {
-            JoinConstraint::On(sql_expr) => {
-                let mut keys: Vec<(String, String)> = vec![];
-                let join_schema = left.schema().join(&right.schema())?;
-
-                // parse ON expression
-                let expr = self.sql_to_rex(sql_expr, &join_schema)?;
-
-                // extract join keys
-                extract_join_keys(&expr, &mut keys)?;
-                let left_keys: Vec<&str> =
-                    keys.iter().map(|pair| pair.0.as_str()).collect();
-                let right_keys: Vec<&str> =
-                    keys.iter().map(|pair| pair.1.as_str()).collect();
-
-                // return the logical plan representing the join
-                LogicalPlanBuilder::from(&left)
-                    .join(&right, join_type, &left_keys, &right_keys)?
-                    .build()
-            }
-            JoinConstraint::Using(idents) => {
-                let keys: Vec<&str> = idents.iter().map(|x| x.value.as_str()).collect();
-                LogicalPlanBuilder::from(&left)
-                    .join(&right, join_type, &keys, &keys)?
-                    .build()
-            }
-            JoinConstraint::Natural => {
-                // https://issues.apache.org/jira/browse/ARROW-10727
-                Err(DataFusionError::NotImplemented(
-                    "NATURAL JOIN is not supported (https://issues.apache.org/jira/browse/ARROW-10727)".to_string(),
-                ))
-            }
-            JoinConstraint::None => Err(DataFusionError::NotImplemented(
-                "NONE contraint is not supported".to_string(),
-            )),
-        }
-    }
-
-    fn create_relation(
-        &self,
-        relation: &TableFactor,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        match relation {
-            TableFactor::Table { name, .. } => {
-                let table_name = name.to_string();
-                let cte = ctes.get(&table_name);
-                match (
-                    cte,
-                    self.schema_provider.get_table_provider(name.try_into()?),
-                ) {
-                    (Some(cte_plan), _) => Ok(cte_plan.clone()),
-                    (_, Some(provider)) => {
-                        LogicalPlanBuilder::scan(&table_name, provider, None)?.build()
-                    }
-                    (_, None) => Err(DataFusionError::Plan(format!(
-                        "Table or CTE with name '{}' not found",
-                        name
-                    ))),
-                }
-            }
-            TableFactor::Derived {
-                subquery, alias, ..
-            } => self.query_to_plan_with_alias(
-                subquery,
-                alias.as_ref().map(|a| a.name.value.to_string()),
-                ctes,
-            ),
-            TableFactor::NestedJoin(table_with_joins) => {
-                self.plan_table_with_joins(table_with_joins, ctes)
-            }
-            // @todo Support TableFactory::TableFunction?
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported ast node {:?} in create_relation",
-                relation
-            ))),
-        }
-    }
-
-    /// Generate a logic plan from an SQL select
-    fn select_to_plan(
-        &self,
-        select: &Select,
-        ctes: &mut HashMap<String, LogicalPlan>,
-    ) -> Result<LogicalPlan> {
-        let plans = self.plan_from_tables(&select.from, ctes)?;
-
-        let plan = match &select.selection {
-            Some(predicate_expr) => {
-                // build join schema
-                let mut fields = vec![];
-                for plan in &plans {
-                    fields.extend_from_slice(&plan.schema().fields());
-                }
-                let join_schema = DFSchema::new(fields)?;
-
-                let filter_expr = self.sql_to_rex(predicate_expr, &join_schema)?;
-
-                // look for expressions of the form `<column> = <column>`
-                let mut possible_join_keys = vec![];
-                extract_possible_join_keys(&filter_expr, &mut possible_join_keys)?;
-
-                let mut all_join_keys = vec![];
-                let mut left = plans[0].clone();
-                for right in plans.iter().skip(1) {
-                    let left_schema = left.schema();
-                    let right_schema = right.schema();
-                    let mut join_keys = vec![];
-                    for (l, r) in &possible_join_keys {
-                        if left_schema.field_with_unqualified_name(l).is_ok()
-                            && right_schema.field_with_unqualified_name(r).is_ok()
-                        {
-                            join_keys.push((l.as_str(), r.as_str()));
-                        } else if left_schema.field_with_unqualified_name(r).is_ok()
-                            && right_schema.field_with_unqualified_name(l).is_ok()
-                        {
-                            join_keys.push((r.as_str(), l.as_str()));
-                        }
-                    }
-                    if join_keys.is_empty() {
-                        return Err(DataFusionError::NotImplemented(
-                            "Cartesian joins are not supported".to_string(),
-                        ));
-                    } else {
-                        let left_keys: Vec<_> =
-                            join_keys.iter().map(|(l, _)| *l).collect();
-                        let right_keys: Vec<_> =
-                            join_keys.iter().map(|(_, r)| *r).collect();
-                        let builder = LogicalPlanBuilder::from(&left);
-                        left = builder
-                            .join(right, JoinType::Inner, &left_keys, &right_keys)?
-                            .build()?;
-                    }
-                    all_join_keys.extend_from_slice(&join_keys);
-                }
-
-                // remove join expressions from filter
-                match remove_join_expressions(&filter_expr, &all_join_keys)? {
-                    Some(filter_expr) => {
-                        LogicalPlanBuilder::from(&left).filter(filter_expr)?.build()
-                    }
-                    _ => Ok(left),
-                }
-            }
-            None => {
-                if plans.len() == 1 {
-                    Ok(plans[0].clone())
-                } else {
-                    Err(DataFusionError::NotImplemented(
-                        "Cartesian joins are not supported".to_string(),
-                    ))
-                }
-            }
-        };
-        let plan = plan?;
-
-        // The SELECT expressions, with wildcards expanded.
-        let select_exprs = self.prepare_select_exprs(&plan, &select.projection)?;
-
-        // Optionally the HAVING expression.
-        let having_expr_opt = select
-            .having
-            .as_ref()
-            .map::<Result<Expr>, _>(|having_expr| {
-                let having_expr = self.sql_expr_to_logical_expr(having_expr)?;
-
-                // This step "dereferences" any aliases in the HAVING clause.
-                //
-                // This is how we support queries with HAVING expressions that
-                // refer to aliased columns.
-                //
-                // For example:
-                //
-                //   SELECT c1 AS m FROM t HAVING m > 10;
-                //   SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING m > 10;
-                //
-                // are rewritten as, respectively:
-                //
-                //   SELECT c1 AS m FROM t HAVING c1 > 10;
-                //   SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING MAX(c2) > 10;
-                //
-                let having_expr = resolve_aliases_to_exprs(
-                    &having_expr,
-                    &extract_aliases(&select_exprs),
-                )?;
-
-                Ok(having_expr)
-            })
-            .transpose()?;
-
-        // The outer expressions we will search through for
-        // aggregates. Aggregates may be sourced from the SELECT...
-        let mut aggr_expr_haystack = select_exprs.clone();
-
-        // ... or from the HAVING.
-        if let Some(having_expr) = &having_expr_opt {
-            aggr_expr_haystack.push(having_expr.clone());
-        }
-
-        // All of the aggregate expressions (deduplicated).
-        let aggr_exprs = find_aggregate_exprs(&aggr_expr_haystack);
-
-        let (plan, select_exprs_post_aggr, having_expr_post_aggr_opt) =
-            if !select.group_by.is_empty() || !aggr_exprs.is_empty() {
-                self.aggregate(
-                    &plan,
-                    &select_exprs,
-                    &having_expr_opt,
-                    &select.group_by,
-                    aggr_exprs,
-                )?
-            } else {
-                if let Some(having_expr) = &having_expr_opt {
-                    let available_columns = select_exprs
-                        .iter()
-                        .map(|expr| expr_as_column_expr(expr, &plan))
-                        .collect::<Result<Vec<Expr>>>()?;
-
-                    // Ensure the HAVING expression is using only columns
-                    // provided by the SELECT.
-                    if !can_columns_satisfy_exprs(
-                        &available_columns,
-                        &[having_expr.clone()],
-                    )? {
-                        return Err(DataFusionError::Plan(
-                            "Having references column(s) not provided by the select"
-                                .to_owned(),
-                        ));
-                    }
-                }
-
-                (plan, select_exprs, having_expr_opt)
-            };
-
-        let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr_opt {
-            LogicalPlanBuilder::from(&plan)
-                .filter(having_expr_post_aggr)?
-                .build()?
-        } else {
-            plan
-        };
-
-        self.project(&plan, select_exprs_post_aggr, false)
-    }
-
-    /// Returns the `Expr`'s corresponding to a SQL query's SELECT expressions.
-    ///
-    /// Wildcards are expanded into the concrete list of columns.
-    fn prepare_select_exprs(
-        &self,
-        plan: &LogicalPlan,
-        projection: &[SelectItem],
-    ) -> Result<Vec<Expr>> {
-        let input_schema = plan.schema();
-
-        Ok(projection
-            .iter()
-            .map(|expr| self.sql_select_to_rex(&expr, &input_schema))
-            .collect::<Result<Vec<Expr>>>()?
-            .iter()
-            .flat_map(|expr| expand_wildcard(&expr, &input_schema))
-            .collect::<Vec<Expr>>())
-    }
-
-    /// Wrap a plan in a projection
-    ///
-    /// If the `force` argument is `false`, the projection is applied only when
-    /// necessary, i.e., when the input fields are different than the
-    /// projection. Note that if the input fields are the same, but out of
-    /// order, the projection will be applied.
-    fn project(
-        &self,
-        input: &LogicalPlan,
-        expr: Vec<Expr>,
-        force: bool,
-    ) -> Result<LogicalPlan> {
-        self.validate_schema_satisfies_exprs(&input.schema(), &expr)?;
-        let plan = LogicalPlanBuilder::from(input).project(expr)?.build()?;
-
-        let project = force
-            || match input {
-                LogicalPlan::TableScan { .. } => true,
-                _ => plan.schema().fields() != input.schema().fields(),
-            };
-
-        if project {
-            Ok(plan)
-        } else {
-            Ok(input.clone())
-        }
-    }
-
-    fn aggregate(
-        &self,
-        input: &LogicalPlan,
-        select_exprs: &[Expr],
-        having_expr_opt: &Option<Expr>,
-        group_by: &[SQLExpr],
-        aggr_exprs: Vec<Expr>,
-    ) -> Result<(LogicalPlan, Vec<Expr>, Option<Expr>)> {
-        let group_by_exprs = group_by
-            .iter()
-            .map(|e| self.sql_to_rex(e, &input.schema()))
-            .collect::<Result<Vec<Expr>>>()?;
-
-        let aggr_projection_exprs = group_by_exprs
-            .iter()
-            .chain(aggr_exprs.iter())
-            .cloned()
-            .collect::<Vec<Expr>>();
-
-        let plan = LogicalPlanBuilder::from(&input)
-            .aggregate(group_by_exprs, aggr_exprs)?
-            .build()?;
-
-        // After aggregation, these are all of the columns that will be
-        // available to next phases of planning.
-        let column_exprs_post_aggr = aggr_projection_exprs
-            .iter()
-            .map(|expr| expr_as_column_expr(expr, input))
-            .collect::<Result<Vec<Expr>>>()?;
-
-        // Rewrite the SELECT expression to use the columns produced by the
-        // aggregation.
-        let select_exprs_post_aggr = select_exprs
-            .iter()
-            .map(|expr| rebase_expr(expr, &aggr_projection_exprs, input))
-            .collect::<Result<Vec<Expr>>>()?;
-
-        if !can_columns_satisfy_exprs(&column_exprs_post_aggr, &select_exprs_post_aggr)? {
-            return Err(DataFusionError::Plan(
-                "Projection references non-aggregate values".to_owned(),
-            ));
-        }
-
-        // Rewrite the HAVING expression to use the columns produced by the
-        // aggregation.
-        let having_expr_post_aggr_opt = if let Some(having_expr) = having_expr_opt {
-            let having_expr_post_aggr =
-                rebase_expr(having_expr, &aggr_projection_exprs, input)?;
-
-            if !can_columns_satisfy_exprs(
-                &column_exprs_post_aggr,
-                &[having_expr_post_aggr.clone()],
-            )? {
-                return Err(DataFusionError::Plan(
-                    "Having references non-aggregate values".to_owned(),
-                ));
-            }
-
-            Some(having_expr_post_aggr)
-        } else {
-            None
-        };
-
-        Ok((plan, select_exprs_post_aggr, having_expr_post_aggr_opt))
-    }
-
-    /// Wrap a plan in a limit
-    fn limit(&self, input: &LogicalPlan, limit: &Option<SQLExpr>) -> Result<LogicalPlan> {
-        match *limit {
-            Some(ref limit_expr) => {
-                let n = match self.sql_to_rex(&limit_expr, &input.schema())? {
-                    Expr::Literal(ScalarValue::Int64(Some(n))) => Ok(n as usize),
-                    _ => Err(DataFusionError::Plan(
-                        "Unexpected expression for LIMIT clause".to_string(),
-                    )),
-                }?;
-
-                LogicalPlanBuilder::from(&input).limit(n)?.build()
-            }
-            _ => Ok(input.clone()),
-        }
-    }
-
-    /// Wrap the logical in a sort
-    fn order_by(
-        &self,
-        plan: &LogicalPlan,
-        order_by: &[OrderByExpr],
-    ) -> Result<LogicalPlan> {
-        if order_by.is_empty() {
-            return Ok(plan.clone());
-        }
-
-        let input_schema = plan.schema();
-        let order_by_rex: Result<Vec<Expr>> = order_by
-            .iter()
-            .map(|e| {
-                Ok(Expr::Sort {
-                    expr: Box::new(self.sql_to_rex(&e.expr, &input_schema)?),
-                    // by default asc
-                    asc: e.asc.unwrap_or(true),
-                    // by default nulls first to be consistent with spark
-                    nulls_first: e.nulls_first.unwrap_or(true),
-                })
-            })
-            .collect();
-
-        LogicalPlanBuilder::from(&plan).sort(order_by_rex?)?.build()
-    }
-
-    /// Validate the schema provides all of the columns referenced in the expressions.
-    fn validate_schema_satisfies_exprs(
-        &self,
-        schema: &DFSchema,
-        exprs: &[Expr],
-    ) -> Result<()> {
-        find_column_exprs(exprs)
-            .iter()
-            .try_for_each(|col| match col {
-                Expr::Column(name) => {
-                    schema.field_with_unqualified_name(&name).map_err(|_| {
-                        DataFusionError::Plan(format!(
-                            "Invalid identifier '{}' for schema {}",
-                            name,
-                            schema.to_string()
-                        ))
-                    })?;
-                    Ok(())
-                }
-                _ => Err(DataFusionError::Internal("Not a column".to_string())),
-            })
-    }
-
-    /// Generate a relational expression from a select SQL expression
-    fn sql_select_to_rex(&self, sql: &SelectItem, schema: &DFSchema) -> Result<Expr> {
-        match sql {
-            SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
-            SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
-                Box::new(self.sql_to_rex(&expr, schema)?),
-                alias.value.clone(),
-            )),
-            SelectItem::Wildcard => Ok(Expr::Wildcard),
-            SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
-                "Qualified wildcards are not supported".to_string(),
-            )),
-        }
-    }
-
-    /// Generate a relational expression from a SQL expression
-    pub fn sql_to_rex(&self, sql: &SQLExpr, schema: &DFSchema) -> Result<Expr> {
-        let expr = self.sql_expr_to_logical_expr(sql)?;
-        self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
-        Ok(expr)
-    }
-
-    fn sql_fn_arg_to_logical_expr(&self, sql: &FunctionArg) -> Result<Expr> {
-        match sql {
-            FunctionArg::Named { name: _, arg } => self.sql_expr_to_logical_expr(arg),
-            FunctionArg::Unnamed(value) => self.sql_expr_to_logical_expr(value),
-        }
-    }
-
-    fn sql_expr_to_logical_expr(&self, sql: &SQLExpr) -> Result<Expr> {
-        match sql {
-            SQLExpr::Value(Value::Number(n, _)) => match n.parse::<i64>() {
-                Ok(n) => Ok(lit(n)),
-                Err(_) => Ok(lit(n.parse::<f64>().unwrap())),
-            },
-            SQLExpr::Value(Value::SingleQuotedString(ref s)) => Ok(lit(s.clone())),
-
-            SQLExpr::Value(Value::Boolean(n)) => Ok(lit(*n)),
-
-            SQLExpr::Value(Value::Null) => Ok(Expr::Literal(ScalarValue::Utf8(None))),
-            SQLExpr::Extract { field, expr } => Ok(Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::DatePart,
-                args: vec![
-                    Expr::Literal(ScalarValue::Utf8(Some(format!("{}", field)))),
-                    self.sql_expr_to_logical_expr(expr)?,
-                ],
-            }),
-
-            SQLExpr::Value(Value::Interval {
-                value,
-                leading_field,
-                leading_precision,
-                last_field,
-                fractional_seconds_precision,
-            }) => self.sql_interval_to_literal(
-                value,
-                leading_field,
-                leading_precision,
-                last_field,
-                fractional_seconds_precision,
-            ),
-
-            SQLExpr::Identifier(ref id) => {
-                if &id.value[0..1] == "@" {
-                    let var_names = vec![id.value.clone()];
-                    Ok(Expr::ScalarVariable(var_names))
-                } else {
-                    Ok(Expr::Column(id.value.to_string()))
-                }
-            }
-
-            SQLExpr::CompoundIdentifier(ids) => {
-                let mut var_names = vec![];
-                for id in ids {
-                    var_names.push(id.value.clone());
-                }
-                if &var_names[0][0..1] == "@" {
-                    Ok(Expr::ScalarVariable(var_names))
-                } else {
-                    Err(DataFusionError::NotImplemented(format!(
-                        "Unsupported compound identifier '{:?}'",
-                        var_names,
-                    )))
-                }
-            }
-
-            SQLExpr::Wildcard => Ok(Expr::Wildcard),
-
-            SQLExpr::Case {
-                operand,
-                conditions,
-                results,
-                else_result,
-            } => {
-                let expr = if let Some(e) = operand {
-                    Some(Box::new(self.sql_expr_to_logical_expr(e)?))
-                } else {
-                    None
-                };
-                let when_expr = conditions
-                    .iter()
-                    .map(|e| self.sql_expr_to_logical_expr(e))
-                    .collect::<Result<Vec<_>>>()?;
-                let then_expr = results
-                    .iter()
-                    .map(|e| self.sql_expr_to_logical_expr(e))
-                    .collect::<Result<Vec<_>>>()?;
-                let else_expr = if let Some(e) = else_result {
-                    Some(Box::new(self.sql_expr_to_logical_expr(e)?))
-                } else {
-                    None
-                };
-
-                Ok(Expr::Case {
-                    expr,
-                    when_then_expr: when_expr
-                        .iter()
-                        .zip(then_expr.iter())
-                        .map(|(w, t)| (Box::new(w.to_owned()), Box::new(t.to_owned())))
-                        .collect(),
-                    else_expr,
-                })
-            }
-
-            SQLExpr::Cast {
-                ref expr,
-                ref data_type,
-            } => Ok(Expr::Cast {
-                expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                data_type: convert_data_type(data_type)?,
-            }),
-
-            SQLExpr::TryCast {
-                ref expr,
-                ref data_type,
-            } => Ok(Expr::TryCast {
-                expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                data_type: convert_data_type(data_type)?,
-            }),
-
-            SQLExpr::TypedString {
-                ref data_type,
-                ref value,
-            } => Ok(Expr::Cast {
-                expr: Box::new(lit(&**value)),
-                data_type: convert_data_type(data_type)?,
-            }),
-
-            SQLExpr::IsNull(ref expr) => {
-                Ok(Expr::IsNull(Box::new(self.sql_expr_to_logical_expr(expr)?)))
-            }
-
-            SQLExpr::IsNotNull(ref expr) => Ok(Expr::IsNotNull(Box::new(
-                self.sql_expr_to_logical_expr(expr)?,
-            ))),
-
-            SQLExpr::UnaryOp { ref op, ref expr } => match op {
-                UnaryOperator::Not => {
-                    Ok(Expr::Not(Box::new(self.sql_expr_to_logical_expr(expr)?)))
-                }
-                UnaryOperator::Plus => Ok(self.sql_expr_to_logical_expr(expr)?),
-                UnaryOperator::Minus => {
-                    match expr.as_ref() {
-                        // optimization: if it's a number literal, we applly the negative operator
-                        // here directly to calculate the new literal.
-                        SQLExpr::Value(Value::Number(n,_)) => match n.parse::<i64>() {
-                            Ok(n) => Ok(lit(-n)),
-                            Err(_) => Ok(lit(-n
-                                .parse::<f64>()
-                                .map_err(|_e| {
-                                    DataFusionError::Internal(format!(
-                                        "negative operator can be only applied to integer and float operands, got: {}",
-                                    n))
-                                })?)),
-                        },
-                        // not a literal, apply negative operator on expression
-                        _ => Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(expr)?))),
-                    }
-                }
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Unsupported SQL unary operator {:?}",
-                    op
-                ))),
-            },
-
-            SQLExpr::Between {
-                ref expr,
-                ref negated,
-                ref low,
-                ref high,
-            } => Ok(Expr::Between {
-                expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                negated: *negated,
-                low: Box::new(self.sql_expr_to_logical_expr(&low)?),
-                high: Box::new(self.sql_expr_to_logical_expr(&high)?),
-            }),
-
-            SQLExpr::InList {
-                ref expr,
-                ref list,
-                ref negated,
-            } => {
-                let list_expr = list
-                    .iter()
-                    .map(|e| self.sql_expr_to_logical_expr(e))
-                    .collect::<Result<Vec<_>>>()?;
-
-                Ok(Expr::InList {
-                    expr: Box::new(self.sql_expr_to_logical_expr(&expr)?),
-                    list: list_expr,
-                    negated: *negated,
-                })
-            }
-
-            SQLExpr::BinaryOp {
-                ref left,
-                ref op,
-                ref right,
-            } => {
-                let operator = match *op {
-                    BinaryOperator::Gt => Ok(Operator::Gt),
-                    BinaryOperator::GtEq => Ok(Operator::GtEq),
-                    BinaryOperator::Lt => Ok(Operator::Lt),
-                    BinaryOperator::LtEq => Ok(Operator::LtEq),
-                    BinaryOperator::Eq => Ok(Operator::Eq),
-                    BinaryOperator::NotEq => Ok(Operator::NotEq),
-                    BinaryOperator::Plus => Ok(Operator::Plus),
-                    BinaryOperator::Minus => Ok(Operator::Minus),
-                    BinaryOperator::Multiply => Ok(Operator::Multiply),
-                    BinaryOperator::Divide => Ok(Operator::Divide),
-                    BinaryOperator::Modulus => Ok(Operator::Modulus),
-                    BinaryOperator::And => Ok(Operator::And),
-                    BinaryOperator::Or => Ok(Operator::Or),
-                    BinaryOperator::Like => Ok(Operator::Like),
-                    BinaryOperator::NotLike => Ok(Operator::NotLike),
-                    _ => Err(DataFusionError::NotImplemented(format!(
-                        "Unsupported SQL binary operator {:?}",
-                        op
-                    ))),
-                }?;
-
-                Ok(Expr::BinaryExpr {
-                    left: Box::new(self.sql_expr_to_logical_expr(&left)?),
-                    op: operator,
-                    right: Box::new(self.sql_expr_to_logical_expr(&right)?),
-                })
-            }
-
-            SQLExpr::Function(function) => {
-                let name = if function.name.0.len() > 1 {
-                    // DF doesn't handle compound identifiers
-                    // (e.g. "foo.bar") for function names yet
-                    function.name.to_string()
-                } else {
-                    // if there is a quote style, then don't normalize
-                    // the name, otherwise normalize to lowercase
-                    let ident = &function.name.0[0];
-                    match ident.quote_style {
-                        Some(_) => ident.value.clone(),
-                        None => ident.value.to_ascii_lowercase(),
-                    }
-                };
-
-                // first, scalar built-in
-                if let Ok(fun) = functions::BuiltinScalarFunction::from_str(&name) {
-                    let args = function
-                        .args
-                        .iter()
-                        .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                        .collect::<Result<Vec<Expr>>>()?;
-
-                    return Ok(Expr::ScalarFunction { fun, args });
-                };
-
-                // next, aggregate built-ins
-                if let Ok(fun) = aggregates::AggregateFunction::from_str(&name) {
-                    let args = if fun == aggregates::AggregateFunction::Count {
-                        function
-                            .args
-                            .iter()
-                            .map(|a| match a {
-                                FunctionArg::Unnamed(SQLExpr::Value(Value::Number(
-                                    _,
-                                    _,
-                                ))) => Ok(lit(1_u8)),
-                                FunctionArg::Unnamed(SQLExpr::Wildcard) => Ok(lit(1_u8)),
-                                _ => self.sql_fn_arg_to_logical_expr(a),
-                            })
-                            .collect::<Result<Vec<Expr>>>()?
-                    } else {
-                        function
-                            .args
-                            .iter()
-                            .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                            .collect::<Result<Vec<Expr>>>()?
-                    };
-
-                    return Ok(Expr::AggregateFunction {
-                        fun,
-                        distinct: function.distinct,
-                        args,
-                    });
-                };
-
-                // finally, user-defined functions (UDF) and UDAF
-                match self.schema_provider.get_function_meta(&name) {
-                    Some(fm) => {
-                        let args = function
-                            .args
-                            .iter()
-                            .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                            .collect::<Result<Vec<Expr>>>()?;
-
-                        Ok(Expr::ScalarUDF { fun: fm, args })
-                    }
-                    None => match self.schema_provider.get_aggregate_meta(&name) {
-                        Some(fm) => {
-                            let args = function
-                                .args
-                                .iter()
-                                .map(|a| self.sql_fn_arg_to_logical_expr(a))
-                                .collect::<Result<Vec<Expr>>>()?;
-
-                            Ok(Expr::AggregateUDF { fun: fm, args })
-                        }
-                        _ => Err(DataFusionError::Plan(format!(
-                            "Invalid function '{}'",
-                            name
-                        ))),
-                    },
-                }
-            }
-
-            SQLExpr::Nested(e) => self.sql_expr_to_logical_expr(&e),
-
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported ast node {:?} in sqltorel",
-                sql
-            ))),
-        }
-    }
-
-    fn sql_interval_to_literal(
-        &self,
-        value: &str,
-        leading_field: &Option<DateTimeField>,
-        leading_precision: &Option<u64>,
-        last_field: &Option<DateTimeField>,
-        fractional_seconds_precision: &Option<u64>,
-    ) -> Result<Expr> {
-        if leading_field.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with leading_field {:?}",
-                leading_field
-            )));
-        }
-
-        if leading_precision.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with leading_precision {:?}",
-                leading_precision
-            )));
-        }
-
-        if last_field.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with last_field {:?}",
-                last_field
-            )));
-        }
-
-        if fractional_seconds_precision.is_some() {
-            return Err(DataFusionError::NotImplemented(format!(
-                "Unsupported Interval Expression with fractional_seconds_precision {:?}",
-                fractional_seconds_precision
-            )));
-        }
-
-        const SECONDS_PER_HOUR: f32 = 3_600_f32;
-        const MILLIS_PER_SECOND: f32 = 1_000_f32;
-
-        // We are storing parts as integers, it's why we need to align parts fractional
-        // INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
-        // INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
-        let align_interval_parts = |month_part: f32,
-                                    mut day_part: f32,
-                                    mut milles_part: f32|
-         -> (i32, i32, f32) {
-            // Convert fractional month to days, It's not supported by Arrow types, but anyway
-            day_part += (month_part - (month_part as i32) as f32) * 30_f32;
-
-            // Convert fractional days to hours
-            milles_part += (day_part - ((day_part as i32) as f32))
-                * 24_f32
-                * SECONDS_PER_HOUR
-                * MILLIS_PER_SECOND;
-
-            (month_part as i32, day_part as i32, milles_part)
-        };
-
-        let calculate_from_part = |interval_period_str: &str,
-                                   interval_type: &str|
-         -> Result<(i32, i32, f32)> {
-            // @todo It's better to use Decimal in order to protect rounding errors
-            // Wait https://github.com/apache/arrow/pull/9232
-            let interval_period = match f32::from_str(interval_period_str) {
-                Ok(n) => n,
-                Err(_) => {
-                    return Err(DataFusionError::SQL(ParserError(format!(
-                        "Unsupported Interval Expression with value {:?}",
-                        value
-                    ))))
-                }
-            };
-
-            if interval_period > (i32::MAX as f32) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-
-            match interval_type.to_lowercase().as_str() {
-                "year" => Ok(align_interval_parts(interval_period * 12_f32, 0.0, 0.0)),
-                "month" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
-                "day" | "days" => Ok(align_interval_parts(0.0, interval_period, 0.0)),
-                "hour" | "hours" => {
-                    Ok((0, 0, interval_period * SECONDS_PER_HOUR * MILLIS_PER_SECOND))
-                }
-                "minutes" | "minute" => {
-                    Ok((0, 0, interval_period * 60_f32 * MILLIS_PER_SECOND))
-                }
-                "seconds" | "second" => Ok((0, 0, interval_period * MILLIS_PER_SECOND)),
-                "milliseconds" | "millisecond" => Ok((0, 0, interval_period)),
-                _ => Err(DataFusionError::NotImplemented(format!(
-                    "Invalid input syntax for type interval: {:?}",
-                    value
-                ))),
-            }
-        };
-
-        let mut result_month: i64 = 0;
-        let mut result_days: i64 = 0;
-        let mut result_millis: i64 = 0;
-
-        let mut parts = value.split_whitespace();
-
-        loop {
-            let interval_period_str = parts.next();
-            if interval_period_str.is_none() {
-                break;
-            }
-
-            let (diff_month, diff_days, diff_millis) = calculate_from_part(
-                interval_period_str.unwrap(),
-                parts.next().unwrap_or("second"),
-            )?;
-
-            result_month += diff_month as i64;
-
-            if result_month > (i32::MAX as i64) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-
-            result_days += diff_days as i64;
-
-            if result_days > (i32::MAX as i64) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-
-            result_millis += diff_millis as i64;
-
-            if result_millis > (i32::MAX as i64) {
-                return Err(DataFusionError::NotImplemented(format!(
-                    "Interval field value out of range: {:?}",
-                    value
-                )));
-            }
-        }
-
-        // Interval is tricky thing
-        // 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days != 1 month
-        // The true way to store and calculate intervals is to store it as it defined
-        // Due the fact that Arrow supports only two types YearMonth (month) and DayTime (day, time)
-        // It's not possible to store complex intervals
-        // It's possible to do select (NOW() + INTERVAL '1 year') + INTERVAL '1 day'; as workaround
-        if result_month != 0 && (result_days != 0 || result_millis != 0) {
-            return Err(DataFusionError::NotImplemented(format!(
-                "DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: {:?}. Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL '1 year') + INTERVAL '1 day'",
-                value
-            )));
-        }
-
-        if result_month != 0 {
-            return Ok(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
-                result_month as i32,
-            ))));
-        }
-
-        let result: i64 = (result_days << 32) | result_millis;
-        Ok(Expr::Literal(ScalarValue::IntervalDayTime(Some(result))))
-    }
-
-    fn show_variable_to_plan(&self, variable: &[Ident]) -> Result<LogicalPlan> {
-        // Special case SHOW TABLES
-        let variable = ObjectName(variable.to_vec()).to_string();
-        if variable.as_str().eq_ignore_ascii_case("tables") {
-            if self.has_table("information_schema", "tables") {
-                let rewrite =
-                    DFParser::parse_sql("SELECT * FROM information_schema.tables;")?;
-                self.statement_to_plan(&rewrite[0])
-            } else {
-                Err(DataFusionError::Plan(
-                    "SHOW TABLES is not supported unless information_schema is enabled"
-                        .to_string(),
-                ))
-            }
-        } else {
-            Err(DataFusionError::NotImplemented(format!(
-                "SHOW {} not implemented. Supported syntax: SHOW <TABLES>",
-                variable
-            )))
-        }
-    }
-
-    fn show_columns_to_plan(
-        &self,
-        extended: bool,
-        full: bool,
-        table_name: &ObjectName,
-        filter: Option<&ShowStatementFilter>,
-    ) -> Result<LogicalPlan> {
-        if filter.is_some() {
-            return Err(DataFusionError::Plan(
-                "SHOW COLUMNS with WHERE or LIKE is not supported".to_string(),
-            ));
-        }
-
-        if !self.has_table("information_schema", "columns") {
-            return Err(DataFusionError::Plan(
-                "SHOW COLUMNS is not supported unless information_schema is enabled"
-                    .to_string(),
-            ));
-        }
-
-        if self
-            .schema_provider
-            .get_table_provider(table_name.try_into()?)
-            .is_none()
-        {
-            return Err(DataFusionError::Plan(format!(
-                "Unknown relation for SHOW COLUMNS: {}",
-                table_name
-            )));
-        }
-
-        // Figure out the where clause
-        let columns = vec!["table_name", "table_schema", "table_catalog"].into_iter();
-        let where_clause = table_name
-            .0
-            .iter()
-            .rev()
-            .zip(columns)
-            .map(|(ident, column_name)| {
-                format!(r#"{} = '{}'"#, column_name, ident.to_string())
-            })
-            .collect::<Vec<_>>()
-            .join(" AND ");
-
-        // treat both FULL and EXTENDED as the same
-        let select_list = if full || extended {
-            "*"
-        } else {
-            "table_catalog, table_schema, table_name, column_name, data_type, is_nullable"
-        };
-
-        let query = format!(
-            "SELECT {} FROM information_schema.columns WHERE {}",
-            select_list, where_clause
-        );
-
-        let rewrite = DFParser::parse_sql(&query)?;
-        self.statement_to_plan(&rewrite[0])
-    }
-
-    /// Return true if there is a table provider available for "schema.table"
-    fn has_table(&self, schema: &str, table: &str) -> bool {
-        let tables_reference = TableReference::Partial { schema, table };
-        self.schema_provider
-            .get_table_provider(tables_reference)
-            .is_some()
-    }
-}
-
-/// Remove join expressions from a filter expression
-fn remove_join_expressions(
-    expr: &Expr,
-    join_columns: &[(&str, &str)],
-) -> Result<Option<Expr>> {
-    match expr {
-        Expr::BinaryExpr { left, op, right } => match op {
-            Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(l), Expr::Column(r)) => {
-                    if join_columns.contains(&(l, r)) || join_columns.contains(&(r, l)) {
-                        Ok(None)
-                    } else {
-                        Ok(Some(expr.clone()))
-                    }
-                }
-                _ => Ok(Some(expr.clone())),
-            },
-            Operator::And => {
-                let l = remove_join_expressions(left, join_columns)?;
-                let r = remove_join_expressions(right, join_columns)?;
-                match (l, r) {
-                    (Some(ll), Some(rr)) => Ok(Some(and(ll, rr))),
-                    (Some(ll), _) => Ok(Some(ll)),
-                    (_, Some(rr)) => Ok(Some(rr)),
-                    _ => Ok(None),
-                }
-            }
-            _ => Ok(Some(expr.clone())),
-        },
-        _ => Ok(Some(expr.clone())),
-    }
-}
-
-/// Parse equijoin ON condition which could be a single Eq or multiple conjunctive Eqs
-///
-/// Examples
-///
-/// foo = bar
-/// foo = bar AND bar = baz AND ...
-///
-fn extract_join_keys(expr: &Expr, accum: &mut Vec<(String, String)>) -> Result<()> {
-    match expr {
-        Expr::BinaryExpr { left, op, right } => match op {
-            Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(l), Expr::Column(r)) => {
-                    accum.push((l.to_owned(), r.to_owned()));
-                    Ok(())
-                }
-                other => Err(DataFusionError::SQL(ParserError(format!(
-                    "Unsupported expression '{:?}' in JOIN condition",
-                    other
-                )))),
-            },
-            Operator::And => {
-                extract_join_keys(left, accum)?;
-                extract_join_keys(right, accum)
-            }
-            other => Err(DataFusionError::SQL(ParserError(format!(
-                "Unsupported expression '{:?}' in JOIN condition",
-                other
-            )))),
-        },
-        other => Err(DataFusionError::SQL(ParserError(format!(
-            "Unsupported expression '{:?}' in JOIN condition",
-            other
-        )))),
-    }
-}
-
-/// Extract join keys from a WHERE clause
-fn extract_possible_join_keys(
-    expr: &Expr,
-    accum: &mut Vec<(String, String)>,
-) -> Result<()> {
-    match expr {
-        Expr::BinaryExpr { left, op, right } => match op {
-            Operator::Eq => match (left.as_ref(), right.as_ref()) {
-                (Expr::Column(l), Expr::Column(r)) => {
-                    accum.push((l.to_owned(), r.to_owned()));
-                    Ok(())
-                }
-                _ => Ok(()),
-            },
-            Operator::And => {
-                extract_possible_join_keys(left, accum)?;
-                extract_possible_join_keys(right, accum)
-            }
-            _ => Ok(()),
-        },
-        _ => Ok(()),
-    }
-}
-
-/// Convert SQL data type to relational representation of data type
-pub fn convert_data_type(sql: &SQLDataType) -> Result<DataType> {
-    match sql {
-        SQLDataType::Boolean => Ok(DataType::Boolean),
-        SQLDataType::SmallInt => Ok(DataType::Int16),
-        SQLDataType::Int => Ok(DataType::Int32),
-        SQLDataType::BigInt => Ok(DataType::Int64),
-        SQLDataType::Float(_) | SQLDataType::Real => Ok(DataType::Float64),
-        SQLDataType::Double => Ok(DataType::Float64),
-        SQLDataType::Char(_) | SQLDataType::Varchar(_) => Ok(DataType::Utf8),
-        SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-        SQLDataType::Date => Ok(DataType::Date32),
-        other => Err(DataFusionError::NotImplemented(format!(
-            "Unsupported SQL type {:?}",
-            other
-        ))),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::datasource::empty::EmptyTable;
-    use crate::{logical_plan::create_udf, sql::parser::DFParser};
-    use functions::ScalarFunctionImplementation;
-
-    const PERSON_COLUMN_NAMES: &str =
-        "id, first_name, last_name, age, state, salary, birth_date";
-
-    #[test]
-    fn select_no_relation() {
-        quick_test(
-            "SELECT 1",
-            "Projection: Int64(1)\
-             \n  EmptyRelation",
-        );
-    }
-
-    #[test]
-    fn select_column_does_not_exist() {
-        let sql = "SELECT doesnotexist FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_repeated_column() {
-        let sql = "SELECT age, age FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#age\\\" at position 0 and \\\"#age\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_wildcard_with_repeated_column() {
-        let sql = "SELECT *, age FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#age\\\" at position 3 and \\\"#age\\\" at position 7 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_wildcard_with_repeated_column_but_is_aliased() {
-        quick_test(
-            "SELECT *, first_name AS fn from person",
-            "Projection: #id, #first_name, #last_name, #age, #state, #salary, #birth_date, #first_name AS fn\
-            \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_scalar_func_with_literal_no_relation() {
-        quick_test(
-            "SELECT sqrt(9)",
-            "Projection: sqrt(Int64(9))\
-             \n  EmptyRelation",
-        );
-    }
-
-    #[test]
-    fn select_simple_filter() {
-        let sql = "SELECT id, first_name, last_name \
-                   FROM person WHERE state = 'CO'";
-        let expected = "Projection: #id, #first_name, #last_name\
-                        \n  Filter: #state Eq Utf8(\"CO\")\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_filter_column_does_not_exist() {
-        let sql = "SELECT first_name FROM person WHERE doesnotexist = 'A'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_filter_cannot_use_alias() {
-        let sql = "SELECT first_name AS x FROM person WHERE x = 'A'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'x\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_neg_filter() {
-        let sql = "SELECT id, first_name, last_name \
-                   FROM person WHERE NOT state";
-        let expected = "Projection: #id, #first_name, #last_name\
-                        \n  Filter: NOT #state\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_compound_filter() {
-        let sql = "SELECT id, first_name, last_name \
-                   FROM person WHERE state = 'CO' AND age >= 21 AND age <= 65";
-        let expected = "Projection: #id, #first_name, #last_name\
-            \n  Filter: #state Eq Utf8(\"CO\") And #age GtEq Int64(21) And #age LtEq Int64(65)\
-            \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn test_timestamp_filter() {
-        let sql =
-            "SELECT state FROM person WHERE birth_date < CAST (158412331400600000 as timestamp)";
-
-        let expected = "Projection: #state\
-            \n  Filter: #birth_date Lt CAST(Int64(158412331400600000) AS Timestamp(Nanosecond, None))\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn test_date_filter() {
-        let sql =
-            "SELECT state FROM person WHERE birth_date < CAST ('2020-01-01' as date)";
-
-        let expected = "Projection: #state\
-            \n  Filter: #birth_date Lt CAST(Utf8(\"2020-01-01\") AS Date32)\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_all_boolean_operators() {
-        let sql = "SELECT age, first_name, last_name \
-                   FROM person \
-                   WHERE age = 21 \
-                   AND age != 21 \
-                   AND age > 21 \
-                   AND age >= 21 \
-                   AND age < 65 \
-                   AND age <= 65";
-        let expected = "Projection: #age, #first_name, #last_name\
-                        \n  Filter: #age Eq Int64(21) \
-                        And #age NotEq Int64(21) \
-                        And #age Gt Int64(21) \
-                        And #age GtEq Int64(21) \
-                        And #age Lt Int64(65) \
-                        And #age LtEq Int64(65)\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_between() {
-        let sql = "SELECT state FROM person WHERE age BETWEEN 21 AND 65";
-        let expected = "Projection: #state\
-            \n  Filter: #age BETWEEN Int64(21) AND Int64(65)\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_between_negated() {
-        let sql = "SELECT state FROM person WHERE age NOT BETWEEN 21 AND 65";
-        let expected = "Projection: #state\
-            \n  Filter: #age NOT BETWEEN Int64(21) AND Int64(65)\
-            \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_nested() {
-        let sql = "SELECT fn2, last_name
-                   FROM (
-                     SELECT fn1 as fn2, last_name, birth_date
-                     FROM (
-                       SELECT first_name AS fn1, last_name, birth_date, age
-                       FROM person
-                     )
-                   )";
-        let expected = "Projection: #fn2, #last_name\
-                        \n  Projection: #fn1 AS fn2, #last_name, #birth_date\
-                        \n    Projection: #first_name AS fn1, #last_name, #birth_date, #age\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_nested_with_filters() {
-        let sql = "SELECT fn1, age
-                   FROM (
-                     SELECT first_name AS fn1, age
-                     FROM person
-                     WHERE age > 20
-                   )
-                   WHERE fn1 = 'X' AND age < 30";
-
-        let expected = "Filter: #fn1 Eq Utf8(\"X\") And #age Lt Int64(30)\
-                        \n  Projection: #first_name AS fn1, #age\
-                        \n    Filter: #age Gt Int64(20)\
-                        \n      TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_with_having() {
-        let sql = "SELECT id, age
-                   FROM person
-                   HAVING age > 100 AND age < 200";
-        let expected = "Projection: #id, #age\
-                        \n  Filter: #age Gt Int64(100) And #age Lt Int64(200)\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_with_having_referencing_column_not_in_select() {
-        let sql = "SELECT id, age
-                   FROM person
-                   HAVING first_name = 'M'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references column(s) not provided by the select\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_with_having_referencing_column_nested_in_select_expression() {
-        let sql = "SELECT id, age + 1
-                   FROM person
-                   HAVING age > 100";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references column(s) not provided by the select\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_with_having_with_aggregate_not_in_select() {
-        let sql = "SELECT first_name
-                   FROM person
-                   HAVING MAX(age) > 100";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_aggregate_with_having_that_reuses_aggregate() {
-        let sql = "SELECT MAX(age)
-                   FROM person
-                   HAVING MAX(age) < 30";
-        let expected = "Filter: #MAX(age) Lt Int64(30)\
-                        \n  Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_having_with_aggregate_not_in_select() {
-        let sql = "SELECT MAX(age)
-                   FROM person
-                   HAVING MAX(first_name) > 'M'";
-        let expected = "Projection: #MAX(age)\
-                        \n  Filter: #MAX(first_name) Gt Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age), MAX(#first_name)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_having_referencing_column_not_in_select() {
-        let sql = "SELECT COUNT(*)
-                   FROM person
-                   HAVING first_name = 'M'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_aggregate_aliased_with_having_referencing_aggregate_by_its_alias() {
-        let sql = "SELECT MAX(age) as max_age
-                   FROM person
-                   HAVING max_age < 30";
-        let expected = "Projection: #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Lt Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_aliased_with_having_that_reuses_aggregate_but_not_by_its_alias() {
-        let sql = "SELECT MAX(age) as max_age
-                   FROM person
-                   HAVING MAX(age) < 30";
-        let expected = "Projection: #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Lt Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING first_name = 'M'";
-        let expected = "Filter: #first_name Eq Utf8(\"M\")\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_and_where() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   WHERE id > 5
-                   GROUP BY first_name
-                   HAVING MAX(age) < 100";
-        let expected = "Filter: #MAX(age) Lt Int64(100)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    Filter: #id Gt Int64(5)\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_and_where_filtering_on_aggregate_column(
-    ) {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   WHERE id > 5 AND age > 18
-                   GROUP BY first_name
-                   HAVING MAX(age) < 100";
-        let expected = "Filter: #MAX(age) Lt Int64(100)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    Filter: #id Gt Int64(5) And #age Gt Int64(18)\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_column_by_alias() {
-        let sql = "SELECT first_name AS fn, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 2 AND fn = 'M'";
-        let expected = "Projection: #first_name AS fn, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(2) And #first_name Eq Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases(
-    ) {
-        let sql = "SELECT first_name AS fn, MAX(age) AS max_age
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 2 AND max_age < 5 AND first_name = 'M' AND fn = 'N'";
-        let expected = "Projection: #first_name AS fn, #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Gt Int64(2) And #MAX(age) Lt Int64(5) And #first_name Eq Utf8(\"M\") And #first_name Eq Utf8(\"N\")\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100";
-        let expected = "Filter: #MAX(age) Gt Int64(100)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 10 AND last_name = 'M'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Having references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_times() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND MAX(age) < 200";
-        let expected = "Filter: #MAX(age) Gt Int64(100) And #MAX(age) Lt Int64(200)\
-                        \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_aggreagate_not_in_select() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND MIN(id) < 50";
-        let expected = "Projection: #first_name, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(100) And #MIN(id) Lt Int64(50)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age), MIN(#id)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_its_alias(
-    ) {
-        let sql = "SELECT first_name, MAX(age) AS max_age
-                   FROM person
-                   GROUP BY first_name
-                   HAVING max_age > 100";
-        let expected = "Projection: #first_name, #MAX(age) AS max_age\
-                        \n  Filter: #MAX(age) Gt Int64(100)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias(
-    ) {
-        let sql = "SELECT first_name, MAX(age) + 1 AS max_age_plus_one
-                   FROM person
-                   GROUP BY first_name
-                   HAVING max_age_plus_one > 100";
-        let expected =
-            "Projection: #first_name, #MAX(age) Plus Int64(1) AS max_age_plus_one\
-                        \n  Filter: #MAX(age) Plus Int64(1) Gt Int64(100)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age)]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_derived_column_aggreagate_not_in_select(
-    ) {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND MIN(id - 2) < 50";
-        let expected = "Projection: #first_name, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(100) And #MIN(id Minus Int64(2)) Lt Int64(50)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age), MIN(#id Minus Int64(2))]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aggregate_with_group_by_with_having_using_count_star_not_in_select() {
-        let sql = "SELECT first_name, MAX(age)
-                   FROM person
-                   GROUP BY first_name
-                   HAVING MAX(age) > 100 AND COUNT(*) < 50";
-        let expected = "Projection: #first_name, #MAX(age)\
-                        \n  Filter: #MAX(age) Gt Int64(100) And #COUNT(UInt8(1)) Lt Int64(50)\
-                        \n    Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#age), COUNT(UInt8(1))]]\
-                        \n      TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_binary_expr() {
-        let sql = "SELECT age + salary from person";
-        let expected = "Projection: #age Plus #salary\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_binary_expr_nested() {
-        let sql = "SELECT (age + salary)/2 from person";
-        let expected = "Projection: #age Plus #salary Divide Int64(2)\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_wildcard_with_groupby() {
-        quick_test(
-            "SELECT * FROM person GROUP BY id, first_name, last_name, age, state, salary, birth_date",
-            "Aggregate: groupBy=[[#id, #first_name, #last_name, #age, #state, #salary, #birth_date]], aggr=[[]]\
-             \n  TableScan: person projection=None",
-        );
-        quick_test(
-            "SELECT * FROM (SELECT first_name, last_name FROM person) GROUP BY first_name, last_name",
-            "Aggregate: groupBy=[[#first_name, #last_name]], aggr=[[]]\
-             \n  Projection: #first_name, #last_name\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate() {
-        quick_test(
-            "SELECT MIN(age) FROM person",
-            "Aggregate: groupBy=[[]], aggr=[[MIN(#age)]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn test_sum_aggregate() {
-        quick_test(
-            "SELECT SUM(age) from person",
-            "Aggregate: groupBy=[[]], aggr=[[SUM(#age)]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_column_does_not_exist() {
-        let sql = "SELECT MIN(doesnotexist) FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate() {
-        let sql = "SELECT MIN(age), MIN(age) FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age)\\\" at position 0 and \\\"#MIN(age)\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate_with_single_alias() {
-        quick_test(
-            "SELECT MIN(age), MIN(age) AS a FROM person",
-            "Projection: #MIN(age), #MIN(age) AS a\
-             \n  Aggregate: groupBy=[[]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() {
-        quick_test(
-            "SELECT MIN(age) AS a, MIN(age) AS b FROM person",
-            "Projection: #MIN(age) AS a, #MIN(age) AS b\
-             \n  Aggregate: groupBy=[[]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
-        let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age) AS a\\\" at position 0 and \\\"#MIN(age) AS a\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby() {
-        quick_test(
-            "SELECT state, MIN(age), MAX(age) FROM person GROUP BY state",
-            "Aggregate: groupBy=[[#state]], aggr=[[MIN(#age), MAX(#age)]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_with_aliases() {
-        quick_test(
-            "SELECT state AS a, MIN(age) AS b FROM person GROUP BY state",
-            "Projection: #state AS a, #MIN(age) AS b\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
-        let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#state AS a\\\" at position 0 and \\\"#MIN(age) AS a\\\" at position 1 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_column_unselected() {
-        quick_test(
-            "SELECT MIN(age), MAX(age) FROM person GROUP BY state",
-            "Projection: #MIN(age), #MAX(age)\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age), MAX(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_and_column_in_group_by_does_not_exist() {
-        let sql = "SELECT SUM(age) FROM person GROUP BY doesnotexist";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_and_column_in_aggregate_does_not_exist() {
-        let sql = "SELECT SUM(doesnotexist) FROM person GROUP BY first_name";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'doesnotexist\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_interval_out_of_range() {
-        let sql = "SELECT INTERVAL '100000000000000000 day'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "NotImplemented(\"Interval field value out of range: \\\"100000000000000000 day\\\"\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_unsupported_complex_interval() {
-        let sql = "SELECT INTERVAL '1 year 1 day'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "NotImplemented(\"DF does not support intervals that have both a Year/Month part as well as Days/Hours/Mins/Seconds: \\\"1 year 1 day\\\". Hint: try breaking the interval into two parts, one with Year/Month and the other with Days/Hours/Mins/Seconds - e.g. (NOW() + INTERVAL \\\'1 year\\\') + INTERVAL \\\'1 day\\\'\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() {
-        quick_test(
-            "SELECT MAX(first_name) FROM person GROUP BY first_name",
-            "Projection: #MAX(first_name)\
-             \n  Aggregate: groupBy=[[#first_name]], aggr=[[MAX(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_cannot_use_alias() {
-        let sql = "SELECT state AS x, MAX(age) FROM person GROUP BY x";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            format!(
-                "Plan(\"Invalid identifier \\\'x\\\' for schema {}\")",
-                PERSON_COLUMN_NAMES
-            ),
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_aggregate_repeated() {
-        let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projections require unique expression names but the expression \\\"#MIN(age)\\\" at position 1 and \\\"#MIN(age)\\\" at position 2 have the same name. Consider aliasing (\\\"AS\\\") one of them.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_aggregate_repeated_and_one_has_alias() {
-        quick_test(
-            "SELECT state, MIN(age), MIN(age) AS ma FROM person GROUP BY state",
-            "Projection: #state, #MIN(age), #MIN(age) AS ma\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        )
-    }
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_unselected() {
-        quick_test(
-            "SELECT MIN(first_name) FROM person GROUP BY age + 1",
-            "Projection: #MIN(first_name)\
-             \n  Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resolvable(
-    ) {
-        quick_test(
-            "SELECT age + 1, MIN(first_name) FROM person GROUP BY age + 1",
-            "Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n  TableScan: person projection=None",
-        );
-        quick_test(
-            "SELECT MIN(first_name), age + 1 FROM person GROUP BY age + 1",
-            "Projection: #MIN(first_name), #age Plus Int64(1)\
-             \n  Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_resolvable()
-    {
-        quick_test(
-            "SELECT ((age + 1) / 2) * (age + 1), MIN(first_name) FROM person GROUP BY age + 1",
-            "Projection: #age Plus Int64(1) Divide Int64(2) Multiply #age Plus Int64(1), #MIN(first_name)\
-             \n  Aggregate: groupBy=[[#age Plus Int64(1)]], aggr=[[MIN(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_resolvable(
-    ) {
-        // The query should fail, because age + 9 is not in the group by.
-        let sql =
-            "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_selected(
-    ) {
-        let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_nested_in_binary_expr_with_groupby() {
-        quick_test(
-            "SELECT state, MIN(age) < 10 FROM person GROUP BY state",
-            "Projection: #state, #MIN(age) Lt Int64(10)\
-             \n  Aggregate: groupBy=[[#state]], aggr=[[MIN(#age)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_simple_aggregate_and_nested_groupby_column() {
-        quick_test(
-            "SELECT age + 1, MAX(first_name) FROM person GROUP BY age",
-            "Projection: #age Plus Int64(1), #MAX(first_name)\
-             \n  Aggregate: groupBy=[[#age]], aggr=[[MAX(#first_name)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_aggregate_compounded_with_groupby_column() {
-        quick_test(
-            "SELECT age + MIN(salary) FROM person GROUP BY age",
-            "Projection: #age Plus #MIN(salary)\
-             \n  Aggregate: groupBy=[[#age]], aggr=[[MIN(#salary)]]\
-             \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_aggregate_with_non_column_inner_expression_with_groupby() {
-        quick_test(
-            "SELECT state, MIN(age + 1) FROM person GROUP BY state",
-            "Aggregate: groupBy=[[#state]], aggr=[[MIN(#age Plus Int64(1))]]\
-             \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn test_wildcard() {
-        quick_test(
-            "SELECT * from person",
-            "Projection: #id, #first_name, #last_name, #age, #state, #salary, #birth_date\
-            \n  TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_count_one() {
-        let sql = "SELECT COUNT(1) FROM person";
-        let expected = "Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_count_column() {
-        let sql = "SELECT COUNT(id) FROM person";
-        let expected = "Aggregate: groupBy=[[]], aggr=[[COUNT(#id)]]\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_scalar_func() {
-        let sql = "SELECT sqrt(age) FROM person";
-        let expected = "Projection: sqrt(#age)\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_aliased_scalar_func() {
-        let sql = "SELECT sqrt(age) AS square_people FROM person";
-        let expected = "Projection: sqrt(#age) AS square_people\
-                        \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_where_nullif_division() {
-        let sql = "SELECT c3/(c4+c5) \
-                   FROM aggregate_test_100 WHERE c3/nullif(c4+c5, 0) > 0.1";
-        let expected = "Projection: #c3 Divide #c4 Plus #c5\
-            \n  Filter: #c3 Divide nullif(#c4 Plus #c5, Int64(0)) Gt Float64(0.1)\
-            \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_where_with_negative_operator() {
-        let sql = "SELECT c3 FROM aggregate_test_100 WHERE c3 > -0.1 AND -c4 > 0";
-        let expected = "Projection: #c3\
-            \n  Filter: #c3 Gt Float64(-0.1) And (- #c4) Gt Int64(0)\
-            \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_where_with_positive_operator() {
-        let sql = "SELECT c3 FROM aggregate_test_100 WHERE c3 > +0.1 AND +c4 > 0";
-        let expected = "Projection: #c3\
-            \n  Filter: #c3 Gt Float64(0.1) And #c4 Gt Int64(0)\
-            \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_order_by() {
-        let sql = "SELECT id FROM person ORDER BY id";
-        let expected = "Sort: #id ASC NULLS FIRST\
-                        \n  Projection: #id\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_order_by_desc() {
-        let sql = "SELECT id FROM person ORDER BY id DESC";
-        let expected = "Sort: #id DESC NULLS FIRST\
-                        \n  Projection: #id\
-                        \n    TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_order_by_nulls_last() {
-        quick_test(
-            "SELECT id FROM person ORDER BY id DESC NULLS LAST",
-            "Sort: #id DESC NULLS LAST\
-            \n  Projection: #id\
-            \n    TableScan: person projection=None",
-        );
-
-        quick_test(
-            "SELECT id FROM person ORDER BY id NULLS LAST",
-            "Sort: #id ASC NULLS LAST\
-            \n  Projection: #id\
-            \n    TableScan: person projection=None",
-        );
-    }
-
-    #[test]
-    fn select_group_by() {
-        let sql = "SELECT state FROM person GROUP BY state";
-        let expected = "Aggregate: groupBy=[[#state]], aggr=[[]]\
-                        \n  TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_group_by_columns_not_in_select() {
-        let sql = "SELECT MAX(age) FROM person GROUP BY state";
-        let expected = "Projection: #MAX(age)\
-                        \n  Aggregate: groupBy=[[#state]], aggr=[[MAX(#age)]]\
-                        \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_group_by_count_star() {
-        let sql = "SELECT state, COUNT(*) FROM person GROUP BY state";
-        let expected = "Aggregate: groupBy=[[#state]], aggr=[[COUNT(UInt8(1))]]\
-                        \n  TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_group_by_needs_projection() {
-        let sql = "SELECT COUNT(state), state FROM person GROUP BY state";
-        let expected = "\
-        Projection: #COUNT(state), #state\
-        \n  Aggregate: groupBy=[[#state]], aggr=[[COUNT(#state)]]\
-        \n    TableScan: person projection=None";
-
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_7480_1() {
-        let sql = "SELECT c1, MIN(c12) FROM aggregate_test_100 GROUP BY c1, c13";
-        let expected = "Projection: #c1, #MIN(c12)\
-                       \n  Aggregate: groupBy=[[#c1, #c13]], aggr=[[MIN(#c12)]]\
-                       \n    TableScan: aggregate_test_100 projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn select_7480_2() {
-        let sql = "SELECT c1, c13, MIN(c12) FROM aggregate_test_100 GROUP BY c1";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Projection references non-aggregate values\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn create_external_table_csv() {
-        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
-        let expected = "CreateExternalTable: \"t\"";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn create_external_table_csv_no_schema() {
-        let sql = "CREATE EXTERNAL TABLE t STORED AS CSV LOCATION 'foo.csv'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Column definitions required for CSV files. None found\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn create_external_table_parquet() {
-        let sql =
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS PARQUET LOCATION 'foo.parquet'";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"Column definitions can not be specified for PARQUET files.\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn create_external_table_parquet_no_schema() {
-        let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
-        let expected = "CreateExternalTable: \"t\"";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn equijoin_explicit_syntax() {
-        let sql = "SELECT id, order_id \
-            FROM person \
-            JOIN orders \
-            ON id = customer_id";
-        let expected = "Projection: #id, #order_id\
-        \n  Join: id = customer_id\
-        \n    TableScan: person projection=None\
-        \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn equijoin_explicit_syntax_3_tables() {
-        let sql = "SELECT id, order_id, l_description \
-            FROM person \
-            JOIN orders ON id = customer_id \
-            JOIN lineitem ON o_item_id = l_item_id";
-        let expected = "Projection: #id, #order_id, #l_description\
-            \n  Join: o_item_id = l_item_id\
-            \n    Join: id = customer_id\
-            \n      TableScan: person projection=None\
-            \n      TableScan: orders projection=None\
-            \n    TableScan: lineitem projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn boolean_literal_in_condition_expression() {
-        let sql = "SELECT order_id \
-        FROM orders \
-        WHERE delivered = false OR delivered = true";
-        let expected = "Projection: #order_id\
-            \n  Filter: #delivered Eq Boolean(false) Or #delivered Eq Boolean(true)\
-            \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn union() {
-        let sql = "SELECT order_id from orders UNION ALL SELECT order_id FROM orders";
-        let expected = "Union\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn union_4_combined_in_one() {
-        let sql = "SELECT order_id from orders
-                    UNION ALL SELECT order_id FROM orders
-                    UNION ALL SELECT order_id FROM orders
-                    UNION ALL SELECT order_id FROM orders";
-        let expected = "Union\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None\
-            \n  Projection: #order_id\
-            \n    TableScan: orders projection=None";
-        quick_test(sql, expected);
-    }
-
-    #[test]
-    fn union_schemas_should_be_same() {
-        let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Plan(\"UNION ALL schemas are expected to be the same\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn only_union_all_supported() {
-        let sql = "SELECT order_id from orders EXCEPT SELECT order_id FROM orders";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "NotImplemented(\"Only UNION ALL is supported, found EXCEPT\")",
-            format!("{:?}", err)
-        );
-    }
-
-    #[test]
-    fn select_typedstring() {
-        let sql = "SELECT date '2020-12-10' AS date FROM person";
-        let expected = "Projection: CAST(Utf8(\"2020-12-10\") AS Date32) AS date\
-            \n  TableScan: person projection=None";
-        quick_test(sql, expected);
-    }
-
-    fn logical_plan(sql: &str) -> Result<LogicalPlan> {
-        let planner = SqlToRel::new(&MockContextProvider {});
-        let result = DFParser::parse_sql(&sql);
-        let ast = result.unwrap();
-        planner.statement_to_plan(&ast[0])
-    }
-
-    /// Create logical plan, write with formatter, compare to expected output
-    fn quick_test(sql: &str, expected: &str) {
-        let plan = logical_plan(sql).unwrap();
-        assert_eq!(expected, format!("{:?}", plan));
-    }
-
-    struct MockContextProvider {}
-
-    impl ContextProvider for MockContextProvider {
-        fn get_table_provider(
-            &self,
-            name: TableReference,
-        ) -> Option<Arc<dyn TableProvider>> {
-            let schema = match name.table() {
-                "person" => Some(Schema::new(vec![
-                    Field::new("id", DataType::UInt32, false),
-                    Field::new("first_name", DataType::Utf8, false),
-                    Field::new("last_name", DataType::Utf8, false),
-                    Field::new("age", DataType::Int32, false),
-                    Field::new("state", DataType::Utf8, false),
-                    Field::new("salary", DataType::Float64, false),
-                    Field::new(
-                        "birth_date",
-                        DataType::Timestamp(TimeUnit::Nanosecond, None),
-                        false,
-                    ),
-                ])),
-                "orders" => Some(Schema::new(vec![
-                    Field::new("order_id", DataType::UInt32, false),
-                    Field::new("customer_id", DataType::UInt32, false),
-                    Field::new("o_item_id", DataType::Utf8, false),
-                    Field::new("qty", DataType::Int32, false),
-                    Field::new("price", DataType::Float64, false),
-                    Field::new("delivered", DataType::Boolean, false),
-                ])),
-                "lineitem" => Some(Schema::new(vec![
-                    Field::new("l_item_id", DataType::UInt32, false),
-                    Field::new("l_description", DataType::Utf8, false),
-                ])),
-                "aggregate_test_100" => Some(Schema::new(vec![
-                    Field::new("c1", DataType::Utf8, false),
-                    Field::new("c2", DataType::UInt32, false),
-                    Field::new("c3", DataType::Int8, false),
-                    Field::new("c4", DataType::Int16, false),
-                    Field::new("c5", DataType::Int32, false),
-                    Field::new("c6", DataType::Int64, false),
-                    Field::new("c7", DataType::UInt8, false),
-                    Field::new("c8", DataType::UInt16, false),
-                    Field::new("c9", DataType::UInt32, false),
-                    Field::new("c10", DataType::UInt64, false),
-                    Field::new("c11", DataType::Float32, false),
-                    Field::new("c12", DataType::Float64, false),
-                    Field::new("c13", DataType::Utf8, false),
-                ])),
-                _ => None,
-            };
-            schema.map(|s| -> Arc<dyn TableProvider> {
-                Arc::new(EmptyTable::new(Arc::new(s)))
-            })
-        }
-
-        fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
-            let f: ScalarFunctionImplementation =
-                Arc::new(|_| Err(DataFusionError::NotImplemented("".to_string())));
-            match name {
-                "my_sqrt" => Some(Arc::new(create_udf(
-                    "my_sqrt",
-                    vec![DataType::Float64],
-                    Arc::new(DataType::Float64),
-                    f,
-                ))),
-                _ => None,
-            }
-        }
-
-        fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
-            unimplemented!()
-        }
-    }
-}
diff --git a/rust/datafusion/src/sql/utils.rs b/rust/datafusion/src/sql/utils.rs
deleted file mode 100644
index f41643d2ab4..00000000000
--- a/rust/datafusion/src/sql/utils.rs
+++ /dev/null
@@ -1,376 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::logical_plan::{DFSchema, Expr, LogicalPlan};
-use crate::{
-    error::{DataFusionError, Result},
-    logical_plan::{ExpressionVisitor, Recursion},
-};
-use std::collections::HashMap;
-
-/// Resolves an `Expr::Wildcard` to a collection of `Expr::Column`'s.
-pub(crate) fn expand_wildcard(expr: &Expr, schema: &DFSchema) -> Vec<Expr> {
-    match expr {
-        Expr::Wildcard => schema
-            .fields()
-            .iter()
-            .map(|f| Expr::Column(f.name().to_string()))
-            .collect::<Vec<Expr>>(),
-        _ => vec![expr.clone()],
-    }
-}
-
-/// Collect all deeply nested `Expr::AggregateFunction` and
-/// `Expr::AggregateUDF`. They are returned in order of occurrence (depth
-/// first), with duplicates omitted.
-pub(crate) fn find_aggregate_exprs(exprs: &[Expr]) -> Vec<Expr> {
-    find_exprs_in_exprs(exprs, &|nested_expr| {
-        matches!(
-            nested_expr,
-            Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. }
-        )
-    })
-}
-
-/// Collect all deeply nested `Expr::Column`'s. They are returned in order of
-/// appearance (depth first), with duplicates omitted.
-pub(crate) fn find_column_exprs(exprs: &[Expr]) -> Vec<Expr> {
-    find_exprs_in_exprs(exprs, &|nested_expr| matches!(nested_expr, Expr::Column(_)))
-}
-
-/// Search the provided `Expr`'s, and all of their nested `Expr`, for any that
-/// pass the provided test. The returned `Expr`'s are deduplicated and returned
-/// in order of appearance (depth first).
-fn find_exprs_in_exprs<F>(exprs: &[Expr], test_fn: &F) -> Vec<Expr>
-where
-    F: Fn(&Expr) -> bool,
-{
-    exprs
-        .iter()
-        .flat_map(|expr| find_exprs_in_expr(expr, test_fn))
-        .fold(vec![], |mut acc, expr| {
-            if !acc.contains(&expr) {
-                acc.push(expr)
-            }
-            acc
-        })
-}
-
-// Visitor that find expressions that match a particular predicate
-struct Finder<'a, F>
-where
-    F: Fn(&Expr) -> bool,
-{
-    test_fn: &'a F,
-    exprs: Vec<Expr>,
-}
-
-impl<'a, F> Finder<'a, F>
-where
-    F: Fn(&Expr) -> bool,
-{
-    /// Create a new finder with the `test_fn`
-    fn new(test_fn: &'a F) -> Self {
-        Self {
-            test_fn,
-            exprs: Vec::new(),
-        }
-    }
-}
-
-impl<'a, F> ExpressionVisitor for Finder<'a, F>
-where
-    F: Fn(&Expr) -> bool,
-{
-    fn pre_visit(mut self, expr: &Expr) -> Result<Recursion<Self>> {
-        if (self.test_fn)(expr) {
-            if !(self.exprs.contains(expr)) {
-                self.exprs.push(expr.clone())
-            }
-            // stop recursing down this expr once we find a match
-            return Ok(Recursion::Stop(self));
-        }
-
-        Ok(Recursion::Continue(self))
-    }
-}
-
-/// Search an `Expr`, and all of its nested `Expr`'s, for any that pass the
-/// provided test. The returned `Expr`'s are deduplicated and returned in order
-/// of appearance (depth first).
-fn find_exprs_in_expr<F>(expr: &Expr, test_fn: &F) -> Vec<Expr>
-where
-    F: Fn(&Expr) -> bool,
-{
-    let Finder { exprs, .. } = expr
-        .accept(Finder::new(test_fn))
-        // pre_visit always returns OK, so this will always too
-        .expect("no way to return error during recursion");
-    exprs
-}
-
-/// Convert any `Expr` to an `Expr::Column`.
-pub(crate) fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
-    match expr {
-        Expr::Column(_) => Ok(expr.clone()),
-        _ => Ok(Expr::Column(expr.name(&plan.schema())?)),
-    }
-}
-
-/// Rebuilds an `Expr` as a projection on top of a collection of `Expr`'s.
-///
-/// For example, the expression `a + b < 1` would require, as input, the 2
-/// individual columns, `a` and `b`. But, if the base expressions already
-/// contain the `a + b` result, then that may be used in lieu of the `a` and
-/// `b` columns.
-///
-/// This is useful in the context of a query like:
-///
-/// SELECT a + b < 1 ... GROUP BY a + b
-///
-/// where post-aggregation, `a + b` need not be a projection against the
-/// individual columns `a` and `b`, but rather it is a projection against the
-/// `a + b` found in the GROUP BY.
-pub(crate) fn rebase_expr(
-    expr: &Expr,
-    base_exprs: &[Expr],
-    plan: &LogicalPlan,
-) -> Result<Expr> {
-    clone_with_replacement(expr, &|nested_expr| {
-        if base_exprs.contains(nested_expr) {
-            Ok(Some(expr_as_column_expr(nested_expr, plan)?))
-        } else {
-            Ok(None)
-        }
-    })
-}
-
-/// Determines if the set of `Expr`'s are a valid projection on the input
-/// `Expr::Column`'s.
-pub(crate) fn can_columns_satisfy_exprs(
-    columns: &[Expr],
-    exprs: &[Expr],
-) -> Result<bool> {
-    columns.iter().try_for_each(|c| match c {
-        Expr::Column(_) => Ok(()),
-        _ => Err(DataFusionError::Internal(
-            "Expr::Column are required".to_string(),
-        )),
-    })?;
-
-    Ok(find_column_exprs(exprs).iter().all(|c| columns.contains(c)))
-}
-
-/// Returns a cloned `Expr`, but any of the `Expr`'s in the tree may be
-/// replaced/customized by the replacement function.
-///
-/// The replacement function is called repeatedly with `Expr`, starting with
-/// the argument `expr`, then descending depth-first through its
-/// descendants. The function chooses to replace or keep (clone) each `Expr`.
-///
-/// The function's return type is `Result<Option<Expr>>>`, where:
-///
-/// * `Ok(Some(replacement_expr))`: A replacement `Expr` is provided; it is
-///       swapped in at the particular node in the tree. Any nested `Expr` are
-///       not subject to cloning/replacement.
-/// * `Ok(None)`: A replacement `Expr` is not provided. The `Expr` is
-///       recreated, with all of its nested `Expr`'s subject to
-///       cloning/replacement.
-/// * `Err(err)`: Any error returned by the function is returned as-is by
-///       `clone_with_replacement()`.
-fn clone_with_replacement<F>(expr: &Expr, replacement_fn: &F) -> Result<Expr>
-where
-    F: Fn(&Expr) -> Result<Option<Expr>>,
-{
-    let replacement_opt = replacement_fn(expr)?;
-
-    match replacement_opt {
-        // If we were provided a replacement, use the replacement. Do not
-        // descend further.
-        Some(replacement) => Ok(replacement),
-        // No replacement was provided, clone the node and recursively call
-        // clone_with_replacement() on any nested expressions.
-        None => match expr {
-            Expr::AggregateFunction {
-                fun,
-                args,
-                distinct,
-            } => Ok(Expr::AggregateFunction {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-                distinct: *distinct,
-            }),
-            Expr::AggregateUDF { fun, args } => Ok(Expr::AggregateUDF {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-            }),
-            Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias(
-                Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                alias_name.clone(),
-            )),
-            Expr::Between {
-                expr: nested_expr,
-                negated,
-                low,
-                high,
-            } => Ok(Expr::Between {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                negated: *negated,
-                low: Box::new(clone_with_replacement(&**low, replacement_fn)?),
-                high: Box::new(clone_with_replacement(&**high, replacement_fn)?),
-            }),
-            Expr::InList {
-                expr: nested_expr,
-                list,
-                negated,
-            } => Ok(Expr::InList {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                list: list
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-                negated: *negated,
-            }),
-            Expr::BinaryExpr { left, right, op } => Ok(Expr::BinaryExpr {
-                left: Box::new(clone_with_replacement(&**left, replacement_fn)?),
-                op: *op,
-                right: Box::new(clone_with_replacement(&**right, replacement_fn)?),
-            }),
-            Expr::Case {
-                expr: case_expr_opt,
-                when_then_expr,
-                else_expr: else_expr_opt,
-            } => Ok(Expr::Case {
-                expr: match case_expr_opt {
-                    Some(case_expr) => Some(Box::new(clone_with_replacement(
-                        &**case_expr,
-                        replacement_fn,
-                    )?)),
-                    None => None,
-                },
-                when_then_expr: when_then_expr
-                    .iter()
-                    .map(|(a, b)| {
-                        Ok((
-                            Box::new(clone_with_replacement(&**a, replacement_fn)?),
-                            Box::new(clone_with_replacement(&**b, replacement_fn)?),
-                        ))
-                    })
-                    .collect::<Result<Vec<(_, _)>>>()?,
-                else_expr: match else_expr_opt {
-                    Some(else_expr) => Some(Box::new(clone_with_replacement(
-                        &**else_expr,
-                        replacement_fn,
-                    )?)),
-                    None => None,
-                },
-            }),
-            Expr::ScalarFunction { fun, args } => Ok(Expr::ScalarFunction {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|e| clone_with_replacement(e, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-            }),
-            Expr::ScalarUDF { fun, args } => Ok(Expr::ScalarUDF {
-                fun: fun.clone(),
-                args: args
-                    .iter()
-                    .map(|arg| clone_with_replacement(arg, replacement_fn))
-                    .collect::<Result<Vec<Expr>>>()?,
-            }),
-            Expr::Negative(nested_expr) => Ok(Expr::Negative(Box::new(
-                clone_with_replacement(&**nested_expr, replacement_fn)?,
-            ))),
-            Expr::Not(nested_expr) => Ok(Expr::Not(Box::new(clone_with_replacement(
-                &**nested_expr,
-                replacement_fn,
-            )?))),
-            Expr::IsNotNull(nested_expr) => Ok(Expr::IsNotNull(Box::new(
-                clone_with_replacement(&**nested_expr, replacement_fn)?,
-            ))),
-            Expr::IsNull(nested_expr) => Ok(Expr::IsNull(Box::new(
-                clone_with_replacement(&**nested_expr, replacement_fn)?,
-            ))),
-            Expr::Cast {
-                expr: nested_expr,
-                data_type,
-            } => Ok(Expr::Cast {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                data_type: data_type.clone(),
-            }),
-            Expr::TryCast {
-                expr: nested_expr,
-                data_type,
-            } => Ok(Expr::TryCast {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                data_type: data_type.clone(),
-            }),
-            Expr::Sort {
-                expr: nested_expr,
-                asc,
-                nulls_first,
-            } => Ok(Expr::Sort {
-                expr: Box::new(clone_with_replacement(&**nested_expr, replacement_fn)?),
-                asc: *asc,
-                nulls_first: *nulls_first,
-            }),
-            Expr::Column(_) | Expr::Literal(_) | Expr::ScalarVariable(_) => {
-                Ok(expr.clone())
-            }
-            Expr::Wildcard => Ok(Expr::Wildcard),
-        },
-    }
-}
-
-/// Returns mapping of each alias (`String`) to the expression (`Expr`) it is
-/// aliasing.
-pub(crate) fn extract_aliases(exprs: &[Expr]) -> HashMap<String, Expr> {
-    exprs
-        .iter()
-        .filter_map(|expr| match expr {
-            Expr::Alias(nested_expr, alias_name) => {
-                Some((alias_name.clone(), *nested_expr.clone()))
-            }
-            _ => None,
-        })
-        .collect::<HashMap<String, Expr>>()
-}
-
-/// Rebuilds an `Expr` with columns that refer to aliases replaced by the
-/// alias' underlying `Expr`.
-pub(crate) fn resolve_aliases_to_exprs(
-    expr: &Expr,
-    aliases: &HashMap<String, Expr>,
-) -> Result<Expr> {
-    clone_with_replacement(expr, &|nested_expr| match nested_expr {
-        Expr::Column(name) => {
-            if let Some(aliased_expr) = aliases.get(name) {
-                Ok(Some(aliased_expr.clone()))
-            } else {
-                Ok(None)
-            }
-        }
-        _ => Ok(None),
-    })
-}
diff --git a/rust/datafusion/src/test/exec.rs b/rust/datafusion/src/test/exec.rs
deleted file mode 100644
index 04cd29530c0..00000000000
--- a/rust/datafusion/src/test/exec.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Simple iterator over batches for use in testing
-
-use std::task::{Context, Poll};
-
-use arrow::{
-    datatypes::SchemaRef, error::Result as ArrowResult, record_batch::RecordBatch,
-};
-use futures::Stream;
-
-use crate::physical_plan::RecordBatchStream;
-
-/// Index into the data that has been returned so far
-#[derive(Debug, Default, Clone)]
-pub struct BatchIndex {
-    inner: std::sync::Arc<std::sync::Mutex<usize>>,
-}
-
-impl BatchIndex {
-    /// Return the current index
-    pub fn value(&self) -> usize {
-        let inner = self.inner.lock().unwrap();
-        *inner
-    }
-
-    // increment the current index by one
-    pub fn incr(&self) {
-        let mut inner = self.inner.lock().unwrap();
-        *inner += 1;
-    }
-}
-
-/// Iterator over batches
-#[derive(Debug, Default)]
-pub(crate) struct TestStream {
-    /// Vector of record batches
-    data: Vec<RecordBatch>,
-    /// Index into the data that has been returned so far
-    index: BatchIndex,
-}
-
-impl TestStream {
-    /// Create an iterator for a vector of record batches. Assumes at
-    /// least one entry in data (for the schema)
-    pub fn new(data: Vec<RecordBatch>) -> Self {
-        Self {
-            data,
-            ..Default::default()
-        }
-    }
-
-    /// Return a handle to the index counter for this stream
-    pub fn index(&self) -> BatchIndex {
-        self.index.clone()
-    }
-}
-
-impl Stream for TestStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        let next_batch = self.index.value();
-
-        Poll::Ready(if next_batch < self.data.len() {
-            let next_batch = self.index.value();
-            self.index.incr();
-            Some(Ok(self.data[next_batch].clone()))
-        } else {
-            None
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (self.data.len(), Some(self.data.len()))
-    }
-}
-
-impl RecordBatchStream for TestStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        self.data[0].schema()
-    }
-}
diff --git a/rust/datafusion/src/test/mod.rs b/rust/datafusion/src/test/mod.rs
deleted file mode 100644
index 926a6922616..00000000000
--- a/rust/datafusion/src/test/mod.rs
+++ /dev/null
@@ -1,346 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common unit test utility methods
-
-use crate::datasource::{MemTable, TableProvider};
-use crate::error::Result;
-use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
-use array::{
-    Array, ArrayRef, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray,
-};
-use arrow::array::{self, Int32Array};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use std::fs::File;
-use std::io::prelude::*;
-use std::io::{BufReader, BufWriter};
-use std::sync::Arc;
-use tempfile::TempDir;
-
-pub fn create_table_dual() -> Arc<dyn TableProvider> {
-    let dual_schema = Arc::new(Schema::new(vec![
-        Field::new("id", DataType::Int32, false),
-        Field::new("name", DataType::Utf8, false),
-    ]));
-    let batch = RecordBatch::try_new(
-        dual_schema.clone(),
-        vec![
-            Arc::new(array::Int32Array::from(vec![1])),
-            Arc::new(array::StringArray::from(vec!["a"])),
-        ],
-    )
-    .unwrap();
-    let provider = MemTable::try_new(dual_schema, vec![vec![batch]]).unwrap();
-    Arc::new(provider)
-}
-
-/// Generated partitioned copy of a CSV file
-pub fn create_partitioned_csv(filename: &str, partitions: usize) -> Result<String> {
-    let testdata = arrow::util::test_util::arrow_test_data();
-    let path = format!("{}/csv/{}", testdata, filename);
-
-    let tmp_dir = TempDir::new()?;
-
-    let mut writers = vec![];
-    for i in 0..partitions {
-        let filename = format!("partition-{}.csv", i);
-        let filename = tmp_dir.path().join(&filename);
-
-        let writer = BufWriter::new(File::create(&filename).unwrap());
-        writers.push(writer);
-    }
-
-    let f = File::open(&path)?;
-    let f = BufReader::new(f);
-    for (i, line) in f.lines().enumerate() {
-        let line = line.unwrap();
-
-        if i == 0 {
-            // write header to all partitions
-            for w in writers.iter_mut() {
-                w.write_all(line.as_bytes()).unwrap();
-                w.write_all(b"\n").unwrap();
-            }
-        } else {
-            // write data line to single partition
-            let partition = i % partitions;
-            writers[partition].write_all(line.as_bytes()).unwrap();
-            writers[partition].write_all(b"\n").unwrap();
-        }
-    }
-    for w in writers.iter_mut() {
-        w.flush().unwrap();
-    }
-
-    Ok(tmp_dir.into_path().to_str().unwrap().to_string())
-}
-
-/// Get the schema for the aggregate_test_* csv files
-pub fn aggr_test_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]))
-}
-
-/// some tests share a common table
-pub fn test_table_scan() -> Result<LogicalPlan> {
-    let schema = Schema::new(vec![
-        Field::new("a", DataType::UInt32, false),
-        Field::new("b", DataType::UInt32, false),
-        Field::new("c", DataType::UInt32, false),
-    ]);
-    LogicalPlanBuilder::scan_empty("test", &schema, None)?.build()
-}
-
-pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
-    let actual: Vec<String> = plan
-        .schema()
-        .fields()
-        .iter()
-        .map(|f| f.name().clone())
-        .collect();
-    assert_eq!(actual, expected);
-}
-
-/// returns a table with 3 columns of i32 in memory
-pub fn build_table_i32(
-    a: (&str, &Vec<i32>),
-    b: (&str, &Vec<i32>),
-    c: (&str, &Vec<i32>),
-) -> RecordBatch {
-    let schema = Schema::new(vec![
-        Field::new(a.0, DataType::Int32, false),
-        Field::new(b.0, DataType::Int32, false),
-        Field::new(c.0, DataType::Int32, false),
-    ]);
-
-    RecordBatch::try_new(
-        Arc::new(schema),
-        vec![
-            Arc::new(Int32Array::from(a.1.clone())),
-            Arc::new(Int32Array::from(b.1.clone())),
-            Arc::new(Int32Array::from(c.1.clone())),
-        ],
-    )
-    .unwrap()
-}
-
-/// Returns the column names on the schema
-pub fn columns(schema: &Schema) -> Vec<String> {
-    schema.fields().iter().map(|f| f.name().clone()).collect()
-}
-
-/// Return a new table provider that has a single Int32 column with
-/// values between `seq_start` and `seq_end`
-pub fn table_with_sequence(
-    seq_start: i32,
-    seq_end: i32,
-) -> Result<Arc<dyn TableProvider>> {
-    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::<Vec<_>>()));
-    let partitions = vec![vec![RecordBatch::try_new(
-        schema.clone(),
-        vec![arr as ArrayRef],
-    )?]];
-    Ok(Arc::new(MemTable::try_new(schema, partitions)?))
-}
-
-/// Return a RecordBatch with a single Int32 array with values (0..sz)
-pub fn make_partition(sz: i32) -> RecordBatch {
-    let seq_start = 0;
-    let seq_end = sz;
-    let values = (seq_start..seq_end).collect::<Vec<_>>();
-    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from(values));
-    let arr = arr as ArrayRef;
-
-    RecordBatch::try_new(schema, vec![arr]).unwrap()
-}
-
-/// Return a new table provider containing all of the supported timestamp types
-pub fn table_with_timestamps() -> Arc<dyn TableProvider> {
-    let batch = make_timestamps();
-    let schema = batch.schema();
-    let partitions = vec![vec![batch]];
-    Arc::new(MemTable::try_new(schema, partitions).unwrap())
-}
-
-/// Return  record batch with all of the supported timestamp types
-/// values
-///
-/// Columns are named:
-/// "nanos" --> TimestampNanosecondArray
-/// "micros" --> TimestampMicrosecondArray
-/// "millis" --> TimestampMillisecondArray
-/// "secs" --> TimestampSecondArray
-/// "names" --> StringArray
-pub fn make_timestamps() -> RecordBatch {
-    let ts_strings = vec![
-        Some("2018-11-13T17:11:10.011375885995"),
-        Some("2011-12-13T11:13:10.12345"),
-        None,
-        Some("2021-1-1T05:11:10.432"),
-    ];
-
-    let ts_nanos = ts_strings
-        .into_iter()
-        .map(|t| {
-            t.map(|t| {
-                t.parse::<chrono::NaiveDateTime>()
-                    .unwrap()
-                    .timestamp_nanos()
-            })
-        })
-        .collect::<Vec<_>>();
-
-    let ts_micros = ts_nanos
-        .iter()
-        .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000))
-        .collect::<Vec<_>>();
-
-    let ts_millis = ts_nanos
-        .iter()
-        .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000000))
-        .collect::<Vec<_>>();
-
-    let ts_secs = ts_nanos
-        .iter()
-        .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000000000))
-        .collect::<Vec<_>>();
-
-    let names = ts_nanos
-        .iter()
-        .enumerate()
-        .map(|(i, _)| format!("Row {}", i))
-        .collect::<Vec<_>>();
-
-    let arr_nanos = TimestampNanosecondArray::from_opt_vec(ts_nanos, None);
-    let arr_micros = TimestampMicrosecondArray::from_opt_vec(ts_micros, None);
-    let arr_millis = TimestampMillisecondArray::from_opt_vec(ts_millis, None);
-    let arr_secs = TimestampSecondArray::from_opt_vec(ts_secs, None);
-
-    let names = names.iter().map(|s| s.as_str()).collect::<Vec<_>>();
-    let arr_names = StringArray::from(names);
-
-    let schema = Schema::new(vec![
-        Field::new("nanos", arr_nanos.data_type().clone(), false),
-        Field::new("micros", arr_micros.data_type().clone(), false),
-        Field::new("millis", arr_millis.data_type().clone(), false),
-        Field::new("secs", arr_secs.data_type().clone(), false),
-        Field::new("name", arr_names.data_type().clone(), false),
-    ]);
-    let schema = Arc::new(schema);
-
-    RecordBatch::try_new(
-        schema,
-        vec![
-            Arc::new(arr_nanos),
-            Arc::new(arr_micros),
-            Arc::new(arr_millis),
-            Arc::new(arr_secs),
-            Arc::new(arr_names),
-        ],
-    )
-    .unwrap()
-}
-
-pub mod exec;
-pub mod user_defined;
-pub mod variable;
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings, with the result of pretty formatting record
-/// batches. This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
-
-        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings in a way that order does not matter.
-/// This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_sorted_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let mut expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        // sort except for header + footer
-        let num_lines = expected_lines.len();
-        if num_lines > 3 {
-            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
-        // fix for windows: \r\n -->
-
-        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        // sort except for header + footer
-        let num_lines = actual_lines.len();
-        if num_lines > 3 {
-            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
diff --git a/rust/datafusion/src/test/user_defined.rs b/rust/datafusion/src/test/user_defined.rs
deleted file mode 100644
index 9a850d52759..00000000000
--- a/rust/datafusion/src/test/user_defined.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Simple user defined logical plan node for testing
-
-use std::{
-    any::Any,
-    fmt::{self, Debug},
-    sync::Arc,
-};
-
-use crate::logical_plan::{DFSchemaRef, Expr, LogicalPlan, UserDefinedLogicalNode};
-
-/// Create a new user defined plan node, for testing
-pub fn new(input: LogicalPlan) -> LogicalPlan {
-    let node = Arc::new(TestUserDefinedPlanNode { input });
-    LogicalPlan::Extension { node }
-}
-
-struct TestUserDefinedPlanNode {
-    input: LogicalPlan,
-}
-
-impl Debug for TestUserDefinedPlanNode {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.fmt_for_explain(f)
-    }
-}
-
-impl UserDefinedLogicalNode for TestUserDefinedPlanNode {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    fn schema(&self) -> &DFSchemaRef {
-        self.input.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![]
-    }
-
-    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TestUserDefined")
-    }
-
-    fn from_template(
-        &self,
-        exprs: &[Expr],
-        inputs: &[LogicalPlan],
-    ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync> {
-        assert_eq!(inputs.len(), 1, "input size inconsistent");
-        assert_eq!(exprs.len(), 0, "expression size inconsistent");
-        Arc::new(TestUserDefinedPlanNode {
-            input: inputs[0].clone(),
-        })
-    }
-}
diff --git a/rust/datafusion/src/test/variable.rs b/rust/datafusion/src/test/variable.rs
deleted file mode 100644
index 47d1370e801..00000000000
--- a/rust/datafusion/src/test/variable.rs
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! System variable provider
-
-use crate::error::Result;
-use crate::scalar::ScalarValue;
-use crate::variable::VarProvider;
-
-/// System variable
-pub struct SystemVar {}
-
-impl SystemVar {
-    /// new system variable
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl VarProvider for SystemVar {
-    /// get system variable value
-    fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue> {
-        let s = format!("{}-{}", "system-var".to_string(), var_names.concat());
-        Ok(ScalarValue::Utf8(Some(s)))
-    }
-}
-
-/// user defined variable
-pub struct UserDefinedVar {}
-
-impl UserDefinedVar {
-    /// new user defined variable
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl VarProvider for UserDefinedVar {
-    /// Get user defined variable value
-    fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue> {
-        let s = format!("{}-{}", "user-defined-var".to_string(), var_names.concat());
-        Ok(ScalarValue::Utf8(Some(s)))
-    }
-}
diff --git a/rust/datafusion/src/variable/mod.rs b/rust/datafusion/src/variable/mod.rs
deleted file mode 100644
index db9ff7f0a60..00000000000
--- a/rust/datafusion/src/variable/mod.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Variable provider
-
-use crate::error::Result;
-use crate::scalar::ScalarValue;
-
-/// Variable type, system/user defined
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum VarType {
-    /// System variable, like @@version
-    System,
-    /// User defined variable, like @name
-    UserDefined,
-}
-
-/// A var provider for @variable
-pub trait VarProvider {
-    /// Get variable value
-    fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue>;
-}
diff --git a/rust/datafusion/tests/aggregate_simple.csv b/rust/datafusion/tests/aggregate_simple.csv
deleted file mode 100644
index 7a0256cb7db..00000000000
--- a/rust/datafusion/tests/aggregate_simple.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-c1,c2,c3
-0.00001,0.000000000001,true
-0.00002,0.000000000002,false
-0.00002,0.000000000002,false
-0.00003,0.000000000003,true
-0.00003,0.000000000003,true
-0.00003,0.000000000003,true
-0.00004,0.000000000004,false
-0.00004,0.000000000004,false
-0.00004,0.000000000004,false
-0.00004,0.000000000004,false
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
-0.00005,0.000000000005,true
\ No newline at end of file
diff --git a/rust/datafusion/tests/custom_sources.rs b/rust/datafusion/tests/custom_sources.rs
deleted file mode 100644
index a00dd6ac282..00000000000
--- a/rust/datafusion/tests/custom_sources.rs
+++ /dev/null
@@ -1,200 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::array::Int32Array;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-
-use datafusion::error::{DataFusionError, Result};
-use datafusion::{
-    datasource::{datasource::Statistics, TableProvider},
-    physical_plan::collect,
-};
-
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::{col, Expr, LogicalPlan, LogicalPlanBuilder};
-use datafusion::physical_plan::{
-    ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
-};
-
-use futures::stream::Stream;
-use std::any::Any;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use async_trait::async_trait;
-
-//// Custom source dataframe tests ////
-
-struct CustomTableProvider;
-#[derive(Debug, Clone)]
-struct CustomExecutionPlan {
-    projection: Option<Vec<usize>>,
-}
-struct TestCustomRecordBatchStream {
-    /// the nb of batches of TEST_CUSTOM_RECORD_BATCH generated
-    nb_batch: i32,
-}
-macro_rules! TEST_CUSTOM_SCHEMA_REF {
-    () => {
-        Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Int32, false),
-            Field::new("c2", DataType::Int32, false),
-        ]))
-    };
-}
-macro_rules! TEST_CUSTOM_RECORD_BATCH {
-    () => {
-        RecordBatch::try_new(
-            TEST_CUSTOM_SCHEMA_REF!(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-            ],
-        )
-    };
-}
-
-impl RecordBatchStream for TestCustomRecordBatchStream {
-    fn schema(&self) -> SchemaRef {
-        TEST_CUSTOM_SCHEMA_REF!()
-    }
-}
-
-impl Stream for TestCustomRecordBatchStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        self: Pin<&mut Self>,
-        _cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.nb_batch > 0 {
-            self.get_mut().nb_batch -= 1;
-            Poll::Ready(Some(TEST_CUSTOM_RECORD_BATCH!()))
-        } else {
-            Poll::Ready(None)
-        }
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for CustomExecutionPlan {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn schema(&self) -> SchemaRef {
-        let schema = TEST_CUSTOM_SCHEMA_REF!();
-        match &self.projection {
-            None => schema,
-            Some(p) => Arc::new(Schema::new(
-                p.iter().map(|i| schema.field(*i).clone()).collect(),
-            )),
-        }
-    }
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if children.is_empty() {
-            Ok(Arc::new(self.clone()))
-        } else {
-            Err(DataFusionError::Internal(
-                "Children cannot be replaced in CustomExecutionPlan".to_owned(),
-            ))
-        }
-    }
-    async fn execute(&self, _partition: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(TestCustomRecordBatchStream { nb_batch: 1 }))
-    }
-}
-
-impl TableProvider for CustomTableProvider {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        TEST_CUSTOM_SCHEMA_REF!()
-    }
-
-    fn scan(
-        &self,
-        projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        Ok(Arc::new(CustomExecutionPlan {
-            projection: projection.clone(),
-        }))
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
-    }
-}
-
-#[tokio::test]
-async fn custom_source_dataframe() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-
-    let table = ctx.read_table(Arc::new(CustomTableProvider))?;
-    let logical_plan = LogicalPlanBuilder::from(&table.to_logical_plan())
-        .project(vec![col("c2")])?
-        .build()?;
-
-    let optimized_plan = ctx.optimize(&logical_plan)?;
-    match &optimized_plan {
-        LogicalPlan::Projection { input, .. } => match &**input {
-            LogicalPlan::TableScan {
-                source,
-                projected_schema,
-                ..
-            } => {
-                assert_eq!(source.schema().fields().len(), 2);
-                assert_eq!(projected_schema.fields().len(), 1);
-            }
-            _ => panic!("input to projection should be TableScan"),
-        },
-        _ => panic!("expect optimized_plan to be projection"),
-    }
-
-    let expected = "Projection: #c2\
-        \n  TableScan: projection=Some([1])";
-    assert_eq!(format!("{:?}", optimized_plan), expected);
-
-    let physical_plan = ctx.create_physical_plan(&optimized_plan)?;
-
-    assert_eq!(1, physical_plan.schema().fields().len());
-    assert_eq!("c2", physical_plan.schema().field(0).name().as_str());
-
-    let batches = collect(physical_plan).await?;
-    let origin_rec_batch = TEST_CUSTOM_RECORD_BATCH!()?;
-    assert_eq!(1, batches.len());
-    assert_eq!(1, batches[0].num_columns());
-    assert_eq!(origin_rec_batch.num_rows(), batches[0].num_rows());
-
-    Ok(())
-}
diff --git a/rust/datafusion/tests/customer.csv b/rust/datafusion/tests/customer.csv
deleted file mode 100644
index 2abcd659046..00000000000
--- a/rust/datafusion/tests/customer.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-andrew,100
-jorge,200
-andy,150
-paul,300
diff --git a/rust/datafusion/tests/dataframe.rs b/rust/datafusion/tests/dataframe.rs
deleted file mode 100644
index b93e21f4aba..00000000000
--- a/rust/datafusion/tests/dataframe.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::{
-    array::{Int32Array, StringArray},
-    record_batch::RecordBatch,
-};
-
-use datafusion::error::Result;
-use datafusion::{datasource::MemTable, prelude::JoinType};
-
-use datafusion::execution::context::ExecutionContext;
-
-#[tokio::test]
-async fn join() -> Result<()> {
-    let schema1 = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Int32, false),
-    ]));
-    let schema2 = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("c", DataType::Int32, false),
-    ]));
-
-    // define data.
-    let batch1 = RecordBatch::try_new(
-        schema1.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
-            Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-        ],
-    )?;
-    // define data.
-    let batch2 = RecordBatch::try_new(
-        schema2.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
-            Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-        ],
-    )?;
-
-    let mut ctx = ExecutionContext::new();
-
-    let table1 = MemTable::try_new(schema1, vec![vec![batch1]])?;
-    let table2 = MemTable::try_new(schema2, vec![vec![batch2]])?;
-
-    ctx.register_table("aa", Arc::new(table1))?;
-
-    let df1 = ctx.table("aa")?;
-
-    ctx.register_table("aaa", Arc::new(table2))?;
-
-    let df2 = ctx.table("aaa")?;
-
-    let a = df1.join(df2, JoinType::Inner, &["a"], &["a"])?;
-
-    let batches = a.collect().await?;
-
-    assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 4);
-
-    Ok(())
-}
diff --git a/rust/datafusion/tests/example.csv b/rust/datafusion/tests/example.csv
deleted file mode 100644
index 0eadb69396b..00000000000
--- a/rust/datafusion/tests/example.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-a,b,c
-1,2,3
\ No newline at end of file
diff --git a/rust/datafusion/tests/provider_filter_pushdown.rs b/rust/datafusion/tests/provider_filter_pushdown.rs
deleted file mode 100644
index 0bf67bea8b9..00000000000
--- a/rust/datafusion/tests/provider_filter_pushdown.rs
+++ /dev/null
@@ -1,177 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::array::{as_primitive_array, Int32Builder, UInt64Array};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use datafusion::datasource::datasource::{
-    Statistics, TableProvider, TableProviderFilterPushDown,
-};
-use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::Expr;
-use datafusion::physical_plan::common::SizedRecordBatchStream;
-use datafusion::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
-use datafusion::prelude::*;
-use datafusion::scalar::ScalarValue;
-use std::sync::Arc;
-
-fn create_batch(value: i32, num_rows: usize) -> Result<RecordBatch> {
-    let mut builder = Int32Builder::new(num_rows);
-    for _ in 0..num_rows {
-        builder.append_value(value)?;
-    }
-
-    Ok(RecordBatch::try_new(
-        Arc::new(Schema::new(vec![Field::new(
-            "flag",
-            DataType::Int32,
-            false,
-        )])),
-        vec![Arc::new(builder.finish())],
-    )?)
-}
-
-#[derive(Debug)]
-struct CustomPlan {
-    schema: SchemaRef,
-    batches: Vec<Arc<RecordBatch>>,
-}
-
-#[async_trait]
-impl ExecutionPlan for CustomPlan {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![]
-    }
-
-    fn with_new_children(
-        &self,
-        _: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        unreachable!()
-    }
-
-    async fn execute(&self, _: usize) -> Result<SendableRecordBatchStream> {
-        Ok(Box::pin(SizedRecordBatchStream::new(
-            self.schema(),
-            self.batches.clone(),
-        )))
-    }
-}
-
-#[derive(Clone)]
-struct CustomProvider {
-    zero_batch: RecordBatch,
-    one_batch: RecordBatch,
-}
-
-impl TableProvider for CustomProvider {
-    fn as_any(&self) -> &dyn std::any::Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.zero_batch.schema()
-    }
-
-    fn scan(
-        &self,
-        _: &Option<Vec<usize>>,
-        _: usize,
-        filters: &[Expr],
-        _: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match &filters[0] {
-            Expr::BinaryExpr { right, .. } => {
-                let int_value = match &**right {
-                    Expr::Literal(ScalarValue::Int64(i)) => i.unwrap(),
-                    _ => unimplemented!(),
-                };
-
-                Ok(Arc::new(CustomPlan {
-                    schema: self.zero_batch.schema(),
-                    batches: match int_value {
-                        0 => vec![Arc::new(self.zero_batch.clone())],
-                        1 => vec![Arc::new(self.one_batch.clone())],
-                        _ => vec![],
-                    },
-                }))
-            }
-            _ => Ok(Arc::new(CustomPlan {
-                schema: self.zero_batch.schema(),
-                batches: vec![],
-            })),
-        }
-    }
-
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
-    }
-
-    fn supports_filter_pushdown(&self, _: &Expr) -> Result<TableProviderFilterPushDown> {
-        Ok(TableProviderFilterPushDown::Exact)
-    }
-}
-
-async fn assert_provider_row_count(value: i64, expected_count: u64) -> Result<()> {
-    let provider = CustomProvider {
-        zero_batch: create_batch(0, 10)?,
-        one_batch: create_batch(1, 5)?,
-    };
-
-    let mut ctx = ExecutionContext::new();
-    let df = ctx
-        .read_table(Arc::new(provider.clone()))?
-        .filter(col("flag").eq(lit(value)))?
-        .aggregate(vec![], vec![count(col("flag"))])?;
-
-    let results = df.collect().await?;
-    let result_col: &UInt64Array = as_primitive_array(results[0].column(0));
-    assert_eq!(result_col.value(0), expected_count);
-
-    ctx.register_table("data", Arc::new(provider))?;
-    let sql_results = ctx
-        .sql(&format!("select count(*) from data where flag = {}", value))?
-        .collect()
-        .await?;
-
-    let sql_result_col: &UInt64Array = as_primitive_array(sql_results[0].column(0));
-    assert_eq!(sql_result_col.value(0), expected_count);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_filter_pushdown_results() -> Result<()> {
-    assert_provider_row_count(0, 10).await?;
-    assert_provider_row_count(1, 5).await?;
-    assert_provider_row_count(2, 0).await?;
-    Ok(())
-}
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
deleted file mode 100644
index f4d4e65f3a4..00000000000
--- a/rust/datafusion/tests/sql.rs
+++ /dev/null
@@ -1,2707 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::convert::TryFrom;
-use std::sync::Arc;
-
-use chrono::prelude::*;
-use chrono::Duration;
-
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{array::*, datatypes::TimeUnit};
-use arrow::{datatypes::Int32Type, datatypes::Int64Type, record_batch::RecordBatch};
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef},
-    util::display::array_value_to_string,
-};
-
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logical_plan::LogicalPlan;
-use datafusion::prelude::create_udf;
-use datafusion::{
-    datasource::{csv::CsvReadOptions, MemTable},
-    physical_plan::collect,
-};
-use datafusion::{
-    error::{DataFusionError, Result},
-    physical_plan::ColumnarValue,
-};
-
-#[tokio::test]
-async fn nyc() -> Result<()> {
-    // schema for nyxtaxi csv files
-    let schema = Schema::new(vec![
-        Field::new("VendorID", DataType::Utf8, true),
-        Field::new("tpep_pickup_datetime", DataType::Utf8, true),
-        Field::new("tpep_dropoff_datetime", DataType::Utf8, true),
-        Field::new("passenger_count", DataType::Utf8, true),
-        Field::new("trip_distance", DataType::Float64, true),
-        Field::new("RatecodeID", DataType::Utf8, true),
-        Field::new("store_and_fwd_flag", DataType::Utf8, true),
-        Field::new("PULocationID", DataType::Utf8, true),
-        Field::new("DOLocationID", DataType::Utf8, true),
-        Field::new("payment_type", DataType::Utf8, true),
-        Field::new("fare_amount", DataType::Float64, true),
-        Field::new("extra", DataType::Float64, true),
-        Field::new("mta_tax", DataType::Float64, true),
-        Field::new("tip_amount", DataType::Float64, true),
-        Field::new("tolls_amount", DataType::Float64, true),
-        Field::new("improvement_surcharge", DataType::Float64, true),
-        Field::new("total_amount", DataType::Float64, true),
-    ]);
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_csv(
-        "tripdata",
-        "file.csv",
-        CsvReadOptions::new().schema(&schema),
-    )?;
-
-    let logical_plan = ctx.create_logical_plan(
-        "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount) \
-         FROM tripdata GROUP BY passenger_count",
-    )?;
-
-    let optimized_plan = ctx.optimize(&logical_plan)?;
-
-    match &optimized_plan {
-        LogicalPlan::Aggregate { input, .. } => match input.as_ref() {
-            LogicalPlan::TableScan {
-                ref projected_schema,
-                ..
-            } => {
-                assert_eq!(2, projected_schema.fields().len());
-                assert_eq!(projected_schema.field(0).name(), "passenger_count");
-                assert_eq!(projected_schema.field(1).name(), "fare_amount");
-            }
-            _ => unreachable!(),
-        },
-        _ => unreachable!(false),
-    }
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn parquet_query() {
-    let mut ctx = ExecutionContext::new();
-    register_alltypes_parquet(&mut ctx);
-    // NOTE that string_col is actually a binary column and does not have the UTF8 logical type
-    // so we need an explicit cast
-    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["4", "0"],
-        vec!["5", "1"],
-        vec!["6", "0"],
-        vec!["7", "1"],
-        vec!["2", "0"],
-        vec!["3", "1"],
-        vec!["0", "0"],
-        vec!["1", "1"],
-    ];
-
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn parquet_single_nan_schema() {
-    let mut ctx = ExecutionContext::new();
-    let testdata = arrow::util::test_util::parquet_test_data();
-    ctx.register_parquet("single_nan", &format!("{}/single_nan.parquet", testdata))
-        .unwrap();
-    let sql = "SELECT mycol FROM single_nan";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let results = collect(plan).await.unwrap();
-    for batch in results {
-        assert_eq!(1, batch.num_rows());
-        assert_eq!(1, batch.num_columns());
-    }
-}
-
-#[tokio::test]
-#[ignore = "Test ignored, will be enabled as part of the nested Parquet reader"]
-async fn parquet_list_columns() {
-    let mut ctx = ExecutionContext::new();
-    let testdata = arrow::util::test_util::parquet_test_data();
-    ctx.register_parquet(
-        "list_columns",
-        &format!("{}/list_columns.parquet", testdata),
-    )
-    .unwrap();
-
-    let schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "int64_list",
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
-            true,
-        ),
-        Field::new(
-            "utf8_list",
-            DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
-            true,
-        ),
-    ]));
-
-    let sql = "SELECT int64_list, utf8_list FROM list_columns";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let results = collect(plan).await.unwrap();
-
-    //   int64_list              utf8_list
-    // 0  [1, 2, 3]        [abc, efg, hij]
-    // 1  [None, 1]                   None
-    // 2        [4]  [efg, None, hij, xyz]
-
-    assert_eq!(1, results.len());
-    let batch = &results[0];
-    assert_eq!(3, batch.num_rows());
-    assert_eq!(2, batch.num_columns());
-    assert_eq!(schema, batch.schema());
-
-    let int_list_array = batch
-        .column(0)
-        .as_any()
-        .downcast_ref::<ListArray>()
-        .unwrap();
-    let utf8_list_array = batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<ListArray>()
-        .unwrap();
-
-    assert_eq!(
-        int_list_array
-            .value(0)
-            .as_any()
-            .downcast_ref::<PrimitiveArray<Int64Type>>()
-            .unwrap(),
-        &PrimitiveArray::<Int64Type>::from(vec![Some(1), Some(2), Some(3),])
-    );
-
-    assert_eq!(
-        utf8_list_array
-            .value(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap(),
-        &StringArray::try_from(vec![Some("abc"), Some("efg"), Some("hij"),]).unwrap()
-    );
-
-    assert_eq!(
-        int_list_array
-            .value(1)
-            .as_any()
-            .downcast_ref::<PrimitiveArray<Int64Type>>()
-            .unwrap(),
-        &PrimitiveArray::<Int64Type>::from(vec![None, Some(1),])
-    );
-
-    assert!(utf8_list_array.is_null(1));
-
-    assert_eq!(
-        int_list_array
-            .value(2)
-            .as_any()
-            .downcast_ref::<PrimitiveArray<Int64Type>>()
-            .unwrap(),
-        &PrimitiveArray::<Int64Type>::from(vec![Some(4),])
-    );
-
-    let result = utf8_list_array.value(2);
-    let result = result.as_any().downcast_ref::<StringArray>().unwrap();
-
-    assert_eq!(result.value(0), "efg");
-    assert!(result.is_null(1));
-    assert_eq!(result.value(2), "hij");
-    assert_eq!(result.value(3), "xyz");
-}
-
-#[tokio::test]
-async fn csv_select_nested() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT o1, o2, c3
-               FROM (
-                 SELECT c1 AS o1, c2 + 1 AS o2, c3
-                 FROM (
-                   SELECT c1, c2, c3, c4
-                   FROM aggregate_test_100
-                   WHERE c1 = 'a' AND c2 >= 4
-                   ORDER BY c2 ASC, c3 ASC
-                 )
-               )";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["a", "5", "-101"],
-        vec!["a", "5", "-54"],
-        vec!["a", "5", "-38"],
-        vec!["a", "5", "65"],
-        vec!["a", "6", "-101"],
-        vec!["a", "6", "-31"],
-        vec!["a", "6", "36"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_count_star() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(*), COUNT(1) AS c, COUNT(c1) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100", "100", "100"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c12 FROM aggregate_test_100 WHERE c12 > 0.376 AND c12 < 0.4";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["e", "0.39144436569161134"],
-        vec!["d", "0.38870280983958583"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_negative_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c4 FROM aggregate_test_100 WHERE c3 < -55 AND -c4 > 30000";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["e", "-31500"], vec!["c", "-30187"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_negated_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE NOT(c1 != 'a')";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["21"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_is_not_null_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE c1 IS NOT NULL";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_with_is_null_predicate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100 WHERE c1 IS NULL";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_int_min_max() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c2, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c2";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["1", "0.05636955101974106", "0.9965400387585364"],
-        vec!["2", "0.16301110515739792", "0.991517828651004"],
-        vec!["3", "0.047343434291126085", "0.9293883502480845"],
-        vec!["4", "0.02182578039211991", "0.9237877978193884"],
-        vec!["5", "0.01479305307777301", "0.9723580396501548"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_float32() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_simple_csv(&mut ctx)?;
-
-    let sql =
-        "SELECT COUNT(*) as cnt, c1 FROM aggregate_simple GROUP BY c1 ORDER BY cnt DESC";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![
-        vec!["5", "0.00005"],
-        vec!["4", "0.00004"],
-        vec!["3", "0.00003"],
-        vec!["2", "0.00002"],
-        vec!["1", "0.00001"],
-    ];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_float64() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_simple_csv(&mut ctx)?;
-
-    let sql =
-        "SELECT COUNT(*) as cnt, c2 FROM aggregate_simple GROUP BY c2 ORDER BY cnt DESC";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![
-        vec!["5", "0.000000000005"],
-        vec!["4", "0.000000000004"],
-        vec!["3", "0.000000000003"],
-        vec!["2", "0.000000000002"],
-        vec!["1", "0.000000000001"],
-    ];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_boolean() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_simple_csv(&mut ctx)?;
-
-    let sql =
-        "SELECT COUNT(*) as cnt, c3 FROM aggregate_simple GROUP BY c3 ORDER BY cnt DESC";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![vec!["9", "true"], vec!["6", "false"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_two_columns() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c2, MIN(c3) FROM aggregate_test_100 GROUP BY c1, c2";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "1", "-85"],
-        vec!["a", "2", "-48"],
-        vec!["a", "3", "-72"],
-        vec!["a", "4", "-101"],
-        vec!["a", "5", "-101"],
-        vec!["b", "1", "12"],
-        vec!["b", "2", "-60"],
-        vec!["b", "3", "-101"],
-        vec!["b", "4", "-117"],
-        vec!["b", "5", "-82"],
-        vec!["c", "1", "-24"],
-        vec!["c", "2", "-117"],
-        vec!["c", "3", "-2"],
-        vec!["c", "4", "-90"],
-        vec!["c", "5", "-94"],
-        vec!["d", "1", "-99"],
-        vec!["d", "2", "93"],
-        vec!["d", "3", "-76"],
-        vec!["d", "4", "5"],
-        vec!["d", "5", "-59"],
-        vec!["e", "1", "36"],
-        vec!["e", "2", "-61"],
-        vec!["e", "3", "-95"],
-        vec!["e", "4", "-56"],
-        vec!["e", "5", "-86"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_and_having() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, MIN(c3) AS m FROM aggregate_test_100 GROUP BY c1 HAVING m < -100 AND MAX(c3) > 70";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["a", "-101"], vec!["c", "-117"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_and_having_and_where() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, MIN(c3) AS m
-               FROM aggregate_test_100
-               WHERE c1 IN ('a', 'b')
-               GROUP BY c1
-               HAVING m < -100 AND MAX(c3) > 70";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["a", "-101"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_having_without_group_by() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 HAVING c2 >= 4 AND c3 > 90";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["c", "4", "123"],
-        vec!["c", "5", "118"],
-        vec!["d", "4", "102"],
-        vec!["e", "4", "96"],
-        vec!["e", "4", "97"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_avg_sqrt() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(custom_sqrt(c12)) FROM aggregate_test_100";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["0.6706002946036462"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-/// test that casting happens on udfs.
-/// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
-/// physical plan have the same schema.
-#[tokio::test]
-async fn csv_query_custom_udf_with_cast() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(custom_sqrt(c11)) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.6584408483418833"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-/// sqrt(f32) is slightly different than sqrt(CAST(f32 AS double)))
-#[tokio::test]
-async fn sqrt_f32_vs_f64() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    // sqrt(f32)'s plan passes
-    let sql = "SELECT avg(sqrt(c11)) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.6584408485889435"]];
-
-    assert_eq!(actual, expected);
-    let sql = "SELECT avg(sqrt(CAST(c11 AS double))) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.6584408483418833"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_error() -> Result<()> {
-    // sin(utf8) should error
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT sin(c1) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    Ok(())
-}
-
-// this query used to deadlock due to the call udf(udf())
-#[tokio::test]
-async fn csv_query_sqrt_sqrt() -> Result<()> {
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT sqrt(sqrt(c12)) FROM aggregate_test_100 LIMIT 1";
-    let actual = execute(&mut ctx, sql).await;
-    // sqrt(sqrt(c12=0.9294097332465232)) = 0.9818650561397431
-    let expected = vec![vec!["0.9818650561397431"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn create_ctx() -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-
-    // register a custom UDF
-    ctx.register_udf(create_udf(
-        "custom_sqrt",
-        vec![DataType::Float64],
-        Arc::new(DataType::Float64),
-        Arc::new(custom_sqrt),
-    ));
-
-    Ok(ctx)
-}
-
-fn custom_sqrt(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arg = &args[0];
-    if let ColumnarValue::Array(v) = arg {
-        let input = v
-            .as_any()
-            .downcast_ref::<Float64Array>()
-            .expect("cast failed");
-
-        let array: Float64Array = input.iter().map(|v| v.map(|x| x.sqrt())).collect();
-        Ok(ColumnarValue::Array(Arc::new(array)))
-    } else {
-        unimplemented!()
-    }
-}
-
-#[tokio::test]
-async fn csv_query_avg() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(c12) FROM aggregate_test_100";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["0.5089725099127211"]];
-    assert_float_eq(&expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_avg() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, avg(c12) FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "0.48754517466109415"],
-        vec!["b", "0.41040709263815384"],
-        vec!["c", "0.6600456536439784"],
-        vec!["d", "0.48855379387549824"],
-        vec!["e", "0.48600669271341534"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_avg_with_projection() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(c12), c1 FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["0.41040709263815384", "b"],
-        vec!["0.48600669271341534", "e"],
-        vec!["0.48754517466109415", "a"],
-        vec!["0.48855379387549824", "d"],
-        vec!["0.6600456536439784", "c"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_avg_multi_batch() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT avg(c12) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let results = collect(plan).await.unwrap();
-    let batch = &results[0];
-    let column = batch.column(0);
-    let array = column.as_any().downcast_ref::<Float64Array>().unwrap();
-    let actual = array.value(0);
-    let expected = 0.5089725;
-    // Due to float number's accuracy, different batch size will lead to different
-    // answers.
-    assert!((expected - actual).abs() < 0.01);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_nullif_divide_by_0() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c8/nullif(c7, 0) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let actual = &actual[80..90]; // We just want to compare rows 80-89
-    let expected = vec![
-        vec!["258"],
-        vec!["664"],
-        vec!["NULL"],
-        vec!["22"],
-        vec!["164"],
-        vec!["448"],
-        vec!["365"],
-        vec!["1640"],
-        vec!["671"],
-        vec!["203"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_count() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT count(c12) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_int_count() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, count(c12) FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "21"],
-        vec!["b", "19"],
-        vec!["c", "21"],
-        vec!["d", "18"],
-        vec!["e", "21"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_with_aliased_aggregate() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, count(c12) AS count FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "21"],
-        vec!["b", "19"],
-        vec!["c", "21"],
-        vec!["d", "18"],
-        vec!["e", "21"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_group_by_string_min_max() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c1";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![
-        vec!["a", "0.02182578039211991", "0.9800193410444061"],
-        vec!["b", "0.04893135681998029", "0.9185813970744787"],
-        vec!["c", "0.0494924465469434", "0.991517828651004"],
-        vec!["d", "0.061029375346466685", "0.9748360509016578"],
-        vec!["e", "0.01479305307777301", "0.9965400387585364"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_cast() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT CAST(c12 AS float) FROM aggregate_test_100 WHERE c12 > 0.376 AND c12 < 0.4";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0.39144436569161134"], vec!["0.38870280983958583"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_cast_literal() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql =
-        "SELECT c12, CAST(1 AS float) FROM aggregate_test_100 WHERE c12 > CAST(0 AS float) LIMIT 2";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["0.9294097332465232", "1"],
-        vec!["0.3114712539863804", "1"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn union_all() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    let sql = "SELECT 1 as x UNION ALL SELECT 2 as x";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_union_all() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql =
-        "SELECT c1 FROM aggregate_test_100 UNION ALL SELECT c1 FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    assert_eq!(actual.len(), 200);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1 FROM aggregate_test_100 LIMIT 2";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["c"], vec!["d"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit_bigger_than_nbr_of_rows() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c2 FROM aggregate_test_100 LIMIT 200";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["2"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["3"],
-        vec!["3"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["5"],
-        vec!["3"],
-        vec!["1"],
-        vec!["2"],
-        vec!["3"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-        vec!["4"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["5"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit_with_same_nbr_of_rows() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c2 FROM aggregate_test_100 LIMIT 100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["2"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["3"],
-        vec!["3"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["5"],
-        vec!["3"],
-        vec!["1"],
-        vec!["2"],
-        vec!["3"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["2"],
-        vec!["1"],
-        vec!["4"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-        vec!["4"],
-        vec!["5"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["4"],
-        vec!["2"],
-        vec!["3"],
-        vec!["5"],
-        vec!["1"],
-        vec!["1"],
-        vec!["4"],
-        vec!["2"],
-        vec!["1"],
-        vec!["2"],
-        vec!["1"],
-        vec!["1"],
-        vec!["5"],
-        vec!["4"],
-        vec!["5"],
-        vec!["2"],
-        vec!["3"],
-        vec!["2"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["4"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["3"],
-        vec!["3"],
-        vec!["2"],
-        vec!["5"],
-        vec!["5"],
-        vec!["4"],
-        vec!["1"],
-        vec!["3"],
-        vec!["3"],
-        vec!["4"],
-        vec!["4"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_limit_zero() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c1 FROM aggregate_test_100 LIMIT 0";
-    let actual = execute(&mut ctx, sql).await;
-    let expected: Vec<Vec<String>> = vec![];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_create_external_table() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT c1, c2, c3, c4, c5, c6, c7, c8, c9, 10, c11, c12, c13 FROM aggregate_test_100 LIMIT 1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec![
-        "c",
-        "2",
-        "1",
-        "18109",
-        "2033001162",
-        "-6513304855495910254",
-        "25",
-        "43062",
-        "1491205016",
-        "10",
-        "0.110830784",
-        "0.9294097332465232",
-        "6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW",
-    ]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_external_table_count() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(c12) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_external_table_sum() {
-    let mut ctx = ExecutionContext::new();
-    // cast smallint and int to bigint to avoid overflow during calculation
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql =
-        "SELECT SUM(CAST(c7 AS BIGINT)), SUM(CAST(c8 AS BIGINT)) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["13060", "3017641"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_count_star() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(*) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_query_count_one() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(1) FROM aggregate_test_100";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["100"]];
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn case_when() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE WHEN c1 = 'a' THEN 1 \
-             WHEN c1 = 'b' THEN 2 \
-             END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["NULL"], vec!["NULL"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn case_when_else() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE WHEN c1 = 'a' THEN 1 \
-             WHEN c1 = 'b' THEN 2 \
-             ELSE 999 END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["999"], vec!["999"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn case_when_with_base_expr() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE c1 WHEN 'a' THEN 1 \
-             WHEN 'b' THEN 2 \
-             END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["NULL"], vec!["NULL"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn case_when_else_with_base_expr() -> Result<()> {
-    let mut ctx = create_case_context()?;
-    let sql = "SELECT \
-        CASE c1 WHEN 'a' THEN 1 \
-             WHEN 'b' THEN 2 \
-             ELSE 999 END \
-        FROM t1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"], vec!["999"], vec!["999"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-fn create_case_context() -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Utf8, true)]));
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(StringArray::from(vec![
-            Some("a"),
-            Some("b"),
-            Some("c"),
-            None,
-        ]))],
-    )?;
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-    ctx.register_table("t1", Arc::new(table))?;
-    Ok(ctx)
-}
-
-#[tokio::test]
-async fn equijoin() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1 JOIN t2 ON t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql = "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["33", "c", "NULL"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn right_join() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1 RIGHT JOIN t2 ON t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["NULL", "NULL", "w"],
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn left_join_using() -> Result<()> {
-    let mut ctx = create_join_context("id", "id")?;
-    let sql = "SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["33", "c", "NULL"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn equijoin_implicit_syntax() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE t1_id = t2_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn equijoin_implicit_syntax_with_filter() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql = "SELECT t1_id, t1_name, t2_name \
-        FROM t1, t2 \
-        WHERE t1_id > 0 \
-        AND t1_id = t2_id \
-        AND t2_id < 99 \
-        ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn equijoin_implicit_syntax_reversed() -> Result<()> {
-    let mut ctx = create_join_context("t1_id", "t2_id")?;
-    let sql =
-        "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE t2_id = t1_id ORDER BY t1_id";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["11", "a", "z"],
-        vec!["22", "b", "y"],
-        vec!["44", "d", "x"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn cartesian_join() -> Result<()> {
-    let ctx = create_join_context("t1_id", "t2_id")?;
-    let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id";
-    let maybe_plan = ctx.create_logical_plan(&sql);
-    assert_eq!(
-        "This feature is not implemented: Cartesian joins are not supported",
-        &format!("{}", maybe_plan.err().unwrap())
-    );
-    Ok(())
-}
-
-fn create_join_context(
-    column_left: &str,
-    column_right: &str,
-) -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new(column_left, DataType::UInt32, true),
-        Field::new("t1_name", DataType::Utf8, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![11, 22, 33, 44])),
-            Arc::new(StringArray::from(vec![
-                Some("a"),
-                Some("b"),
-                Some("c"),
-                Some("d"),
-            ])),
-        ],
-    )?;
-    let t1_table = MemTable::try_new(t1_schema, vec![vec![t1_data]])?;
-    ctx.register_table("t1", Arc::new(t1_table))?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new(column_right, DataType::UInt32, true),
-        Field::new("t2_name", DataType::Utf8, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![11, 22, 44, 55])),
-            Arc::new(StringArray::from(vec![
-                Some("z"),
-                Some("y"),
-                Some("x"),
-                Some("w"),
-            ])),
-        ],
-    )?;
-    let t2_table = MemTable::try_new(t2_schema, vec![vec![t2_data]])?;
-    ctx.register_table("t2", Arc::new(t2_table))?;
-
-    Ok(ctx)
-}
-
-fn create_join_context_qualified() -> Result<ExecutionContext> {
-    let mut ctx = ExecutionContext::new();
-
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::UInt32, true),
-        Field::new("b", DataType::UInt32, true),
-        Field::new("c", DataType::UInt32, true),
-    ]));
-    let t1_data = RecordBatch::try_new(
-        t1_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
-            Arc::new(UInt32Array::from(vec![10, 20, 30, 40])),
-            Arc::new(UInt32Array::from(vec![50, 60, 70, 80])),
-        ],
-    )?;
-    let t1_table = MemTable::try_new(t1_schema, vec![vec![t1_data]])?;
-    ctx.register_table("t1", Arc::new(t1_table))?;
-
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::UInt32, true),
-        Field::new("b", DataType::UInt32, true),
-        Field::new("c", DataType::UInt32, true),
-    ]));
-    let t2_data = RecordBatch::try_new(
-        t2_schema.clone(),
-        vec![
-            Arc::new(UInt32Array::from(vec![1, 2, 9, 4])),
-            Arc::new(UInt32Array::from(vec![100, 200, 300, 400])),
-            Arc::new(UInt32Array::from(vec![500, 600, 700, 800])),
-        ],
-    )?;
-    let t2_table = MemTable::try_new(t2_schema, vec![vec![t2_data]])?;
-    ctx.register_table("t2", Arc::new(t2_table))?;
-
-    Ok(ctx)
-}
-
-#[tokio::test]
-async fn csv_explain() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "EXPLAIN SELECT c1 FROM aggregate_test_100 where c2 > 10";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec![
-            "logical_plan",
-            "Projection: #c1\n  Filter: #c2 Gt Int64(10)\n    TableScan: aggregate_test_100 projection=None"
-        ]
-    ];
-    assert_eq!(expected, actual);
-
-    // Also, expect same result with lowercase explain
-    let sql = "explain SELECT c1 FROM aggregate_test_100 where c2 > 10";
-    let actual = execute(&mut ctx, sql).await;
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn csv_explain_verbose() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "EXPLAIN VERBOSE SELECT c1 FROM aggregate_test_100 where c2 > 10";
-    let actual = execute(&mut ctx, sql).await;
-
-    // flatten to a single string
-    let actual = actual.into_iter().map(|r| r.join("\t")).collect::<String>();
-
-    // Don't actually test the contents of the debuging output (as
-    // that may change and keeping this test updated will be a
-    // pain). Instead just check for a few key pieces.
-    assert!(actual.contains("logical_plan"), "Actual: '{}'", actual);
-    assert!(actual.contains("physical_plan"), "Actual: '{}'", actual);
-    assert!(actual.contains("#c2 Gt Int64(10)"), "Actual: '{}'", actual);
-}
-
-fn aggr_test_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::UInt32, false),
-        Field::new("c3", DataType::Int8, false),
-        Field::new("c4", DataType::Int16, false),
-        Field::new("c5", DataType::Int32, false),
-        Field::new("c6", DataType::Int64, false),
-        Field::new("c7", DataType::UInt8, false),
-        Field::new("c8", DataType::UInt16, false),
-        Field::new("c9", DataType::UInt32, false),
-        Field::new("c10", DataType::UInt64, false),
-        Field::new("c11", DataType::Float32, false),
-        Field::new("c12", DataType::Float64, false),
-        Field::new("c13", DataType::Utf8, false),
-    ]))
-}
-
-async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
-    let testdata = arrow::util::test_util::arrow_test_data();
-
-    // TODO: The following c9 should be migrated to UInt32 and c10 should be UInt64 once
-    // unsigned is supported.
-    let df = ctx
-        .sql(&format!(
-            "
-    CREATE EXTERNAL TABLE aggregate_test_100 (
-        c1  VARCHAR NOT NULL,
-        c2  INT NOT NULL,
-        c3  SMALLINT NOT NULL,
-        c4  SMALLINT NOT NULL,
-        c5  INT NOT NULL,
-        c6  BIGINT NOT NULL,
-        c7  SMALLINT NOT NULL,
-        c8  INT NOT NULL,
-        c9  BIGINT NOT NULL,
-        c10 VARCHAR NOT NULL,
-        c11 FLOAT NOT NULL,
-        c12 DOUBLE NOT NULL,
-        c13 VARCHAR NOT NULL
-    )
-    STORED AS CSV
-    WITH HEADER ROW
-    LOCATION '{}/csv/aggregate_test_100.csv'
-    ",
-            testdata
-        ))
-        .expect("Creating dataframe for CREATE EXTERNAL TABLE");
-
-    // Mimic the CLI and execute the resulting plan -- even though it
-    // is effectively a no-op (returns zero rows)
-    let results = df.collect().await.expect("Executing CREATE EXTERNAL TABLE");
-    assert!(
-        results.is_empty(),
-        "Expected no rows from executing CREATE EXTERNAL TABLE"
-    );
-}
-
-fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
-    let testdata = arrow::util::test_util::arrow_test_data();
-    let schema = aggr_test_schema();
-    ctx.register_csv(
-        "aggregate_test_100",
-        &format!("{}/csv/aggregate_test_100.csv", testdata),
-        CsvReadOptions::new().schema(&schema),
-    )?;
-    Ok(())
-}
-
-fn register_aggregate_simple_csv(ctx: &mut ExecutionContext) -> Result<()> {
-    // It's not possible to use aggregate_test_100, not enought similar values to test grouping on floats
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Float32, false),
-        Field::new("c2", DataType::Float64, false),
-        Field::new("c3", DataType::Boolean, false),
-    ]));
-
-    ctx.register_csv(
-        "aggregate_simple",
-        "tests/aggregate_simple.csv",
-        CsvReadOptions::new().schema(&schema),
-    )?;
-    Ok(())
-}
-
-fn register_alltypes_parquet(ctx: &mut ExecutionContext) {
-    let testdata = arrow::util::test_util::parquet_test_data();
-    ctx.register_parquet(
-        "alltypes_plain",
-        &format!("{}/alltypes_plain.parquet", testdata),
-    )
-    .unwrap();
-}
-
-/// Execute query and return result set as 2-d table of Vecs
-/// `result[row][column]`
-async fn execute(ctx: &mut ExecutionContext, sql: &str) -> Vec<Vec<String>> {
-    let msg = format!("Creating logical plan for '{}'", sql);
-    let plan = ctx.create_logical_plan(&sql).expect(&msg);
-    let logical_schema = plan.schema();
-
-    let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan);
-    let plan = ctx.optimize(&plan).expect(&msg);
-    let optimized_logical_schema = plan.schema();
-
-    let msg = format!("Creating physical plan for '{}': {:?}", sql, plan);
-    let plan = ctx.create_physical_plan(&plan).expect(&msg);
-
-    let msg = format!("Executing physical plan for '{}': {:?}", sql, plan);
-    let results = collect(plan).await.expect(&msg);
-
-    assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref());
-
-    result_vec(&results)
-}
-
-/// Specialised String representation
-fn col_str(column: &ArrayRef, row_index: usize) -> String {
-    if column.is_null(row_index) {
-        return "NULL".to_string();
-    }
-
-    // Special case ListArray as there is no pretty print support for it yet
-    if let DataType::FixedSizeList(_, n) = column.data_type() {
-        let array = column
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap()
-            .value(row_index);
-
-        let mut r = Vec::with_capacity(*n as usize);
-        for i in 0..*n {
-            r.push(col_str(&array, i as usize));
-        }
-        return format!("[{}]", r.join(","));
-    }
-
-    array_value_to_string(column, row_index)
-        .ok()
-        .unwrap_or_else(|| "???".to_string())
-}
-
-/// Converts the results into a 2d array of strings, `result[row][column]`
-/// Special cases nulls to NULL for testing
-fn result_vec(results: &[RecordBatch]) -> Vec<Vec<String>> {
-    let mut result = vec![];
-    for batch in results {
-        for row_index in 0..batch.num_rows() {
-            let row_vec = batch
-                .columns()
-                .iter()
-                .map(|column| col_str(column, row_index))
-                .collect();
-            result.push(row_vec);
-        }
-    }
-    result
-}
-
-async fn generic_query_length<T: 'static + Array + From<Vec<&'static str>>>(
-    datatype: DataType,
-) -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", datatype, false)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(T::from(vec!["", "a", "aa", "aaa"]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT length(c1) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["0"], vec!["1"], vec!["2"], vec!["3"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "unicode_expressions"), ignore)]
-async fn query_length() -> Result<()> {
-    generic_query_length::<StringArray>(DataType::Utf8).await
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "unicode_expressions"), ignore)]
-async fn query_large_length() -> Result<()> {
-    generic_query_length::<LargeStringArray>(DataType::LargeUtf8).await
-}
-
-#[tokio::test]
-async fn query_not() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Boolean, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(BooleanArray::from(vec![
-            Some(false),
-            None,
-            Some(true),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT NOT c1 FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["true"], vec!["NULL"], vec!["false"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_concat() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Int32, true),
-    ]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["", "a", "aa", "aaa"])),
-            Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
-        ],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT concat(c1, '-hi-', cast(c2 as varchar)) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["-hi-0"],
-        vec!["a-hi-1"],
-        vec!["aa-hi-"],
-        vec!["aaa-hi-3"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_array() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Utf8, false),
-        Field::new("c2", DataType::Int32, true),
-    ]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(StringArray::from(vec!["", "a", "aa", "aaa"])),
-            Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])),
-        ],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["[,0]"],
-        vec!["[a,1]"],
-        vec!["[aa,NULL]"],
-        vec!["[aaa,3]"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_sum_cast() {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    // c8 = i32; c9 = i64
-    let sql = "SELECT c8 + c9 FROM aggregate_test_100";
-    // check that the physical and logical schemas are equal
-    execute(&mut ctx, sql).await;
-}
-
-#[tokio::test]
-async fn query_where_neg_num() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-
-    // Negative numbers do not parse correctly as of Arrow 2.0.0
-    let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2 and c7 < 10";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["7", "45465"],
-        vec!["5", "40622"],
-        vec!["0", "61069"],
-        vec!["2", "20120"],
-        vec!["4", "39363"],
-    ];
-    assert_eq!(expected, actual);
-
-    // Also check floating point neg numbers
-    let sql = "select c7, c8 from aggregate_test_100 where c7 >= -2.9 and c7 < 10";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["7", "45465"],
-        vec!["5", "40622"],
-        vec!["0", "61069"],
-        vec!["2", "20120"],
-        vec!["4", "39363"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn like() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT COUNT(c1) FROM aggregate_test_100 WHERE c13 LIKE '%FB%'";
-    // check that the physical and logical schemas are equal
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-fn make_timestamp_nano_table() -> Result<Arc<MemTable>> {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), false),
-        Field::new("value", DataType::Int32, true),
-    ]));
-
-    let mut builder = TimestampNanosecondArray::builder(3);
-
-    builder.append_value(1599572549190855000)?; // 2020-09-08T13:42:29.190855+00:00
-    builder.append_value(1599568949190855000)?; // 2020-09-08T12:42:29.190855+00:00
-    builder.append_value(1599565349190855000)?; // 2020-09-08T11:42:29.190855+00:00
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(builder.finish()),
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
-        ],
-    )?;
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-    Ok(Arc::new(table))
-}
-
-#[tokio::test]
-async fn to_timestamp() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("ts_data", make_timestamp_nano_table()?)?;
-
-    let sql = "SELECT COUNT(*) FROM ts_data where ts > to_timestamp('2020-09-08T12:00:00+00:00')";
-    let actual = execute(&mut ctx, sql).await;
-
-    let expected = vec![vec!["2"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_is_null() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Float64, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float64Array::from(vec![
-            Some(1.0),
-            None,
-            Some(f64::NAN),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT c1 IS NULL FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["false"], vec!["true"], vec!["false"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_is_not_null() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Float64, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Float64Array::from(vec![
-            Some(1.0),
-            None,
-            Some(f64::NAN),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT c1 IS NOT NULL FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["true"], vec!["false"], vec!["true"]];
-
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_count_distinct() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Int32Array::from(vec![
-            Some(0),
-            Some(1),
-            None,
-            Some(3),
-            Some(3),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT COUNT(DISTINCT c1) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["3".to_string()]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_on_string_dictionary() -> Result<()> {
-    // Test to ensure DataFusion can operate on dictionary types
-    // Use StringDictionary (32 bit indexes = keys)
-    let field_type =
-        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-    let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
-
-    let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-    let values_builder = StringBuilder::new(10);
-    let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-    builder.append("one")?;
-    builder.append_null()?;
-    builder.append("three")?;
-    let array = Arc::new(builder.finish());
-
-    let data = RecordBatch::try_new(schema.clone(), vec![array])?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-
-    // Basic SELECT
-    let sql = "SELECT * FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["one"], vec!["NULL"], vec!["three"]];
-    assert_eq!(expected, actual);
-
-    // basic filtering
-    let sql = "SELECT * FROM test WHERE d1 IS NOT NULL";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["one"], vec!["three"]];
-    assert_eq!(expected, actual);
-
-    // filtering with constant
-    let sql = "SELECT * FROM test WHERE d1 = 'three'";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["three"]];
-    assert_eq!(expected, actual);
-
-    // Expression evaluation
-    let sql = "SELECT concat(d1, '-foo') FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["one-foo"], vec!["-foo"], vec!["three-foo"]];
-    assert_eq!(expected, actual);
-
-    // aggregation
-    let sql = "SELECT COUNT(d1) FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["2"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_without_from() -> Result<()> {
-    // Test for SELECT <expression> without FROM.
-    // Should evaluate expressions in project position.
-    let mut ctx = ExecutionContext::new();
-
-    let sql = "SELECT 1";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-
-    let sql = "SELECT 1+2, 3/4, cos(0)";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["3", "0", "1"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_cte() -> Result<()> {
-    // Test for SELECT <expression> without FROM.
-    // Should evaluate expressions in project position.
-    let mut ctx = ExecutionContext::new();
-
-    // simple with
-    let sql = "WITH t AS (SELECT 1) SELECT * FROM t";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-
-    // with + union
-    let sql =
-        "WITH t AS (SELECT 1 AS a), u AS (SELECT 2 AS a) SELECT * FROM t UNION ALL SELECT * FROM u";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"], vec!["2"]];
-    assert_eq!(expected, actual);
-
-    // with + join
-    let sql = "WITH t AS (SELECT 1 AS id1), u AS (SELECT 1 AS id2, 5 as x) SELECT x FROM t JOIN u ON (id1 = id2)";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["5"]];
-    assert_eq!(expected, actual);
-
-    // backward reference
-    let sql = "WITH t AS (SELECT 1 AS id1), u AS (SELECT * FROM t) SELECT * from u";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["1"]];
-    assert_eq!(expected, actual);
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_cte_incorrect() -> Result<()> {
-    let ctx = ExecutionContext::new();
-
-    // self reference
-    let sql = "WITH t AS (SELECT * FROM t) SELECT * from u";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    assert_eq!(
-        format!("{}", plan.unwrap_err()),
-        "Error during planning: Table or CTE with name \'t\' not found"
-    );
-
-    // forward referencing
-    let sql = "WITH t AS (SELECT * FROM u), u AS (SELECT 1) SELECT * from u";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    assert_eq!(
-        format!("{}", plan.unwrap_err()),
-        "Error during planning: Table or CTE with name \'u\' not found"
-    );
-
-    // wrapping should hide u
-    let sql = "WITH t AS (WITH u as (SELECT 1) SELECT 1) SELECT * from u";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());
-    assert_eq!(
-        format!("{}", plan.unwrap_err()),
-        "Error during planning: Table or CTE with name \'u\' not found"
-    );
-
-    Ok(())
-}
-
-#[tokio::test]
-async fn query_scalar_minus_array() -> Result<()> {
-    let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)]));
-
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![Arc::new(Int32Array::from(vec![
-            Some(0),
-            Some(1),
-            None,
-            Some(3),
-        ]))],
-    )?;
-
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    let mut ctx = ExecutionContext::new();
-    ctx.register_table("test", Arc::new(table))?;
-    let sql = "SELECT 4 - c1 FROM test";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![vec!["4"], vec!["3"], vec!["NULL"], vec!["1"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-fn assert_float_eq<T>(expected: &[Vec<T>], received: &[Vec<String>])
-where
-    T: AsRef<str>,
-{
-    expected
-        .iter()
-        .flatten()
-        .zip(received.iter().flatten())
-        .for_each(|(l, r)| {
-            let (l, r) = (
-                l.as_ref().parse::<f64>().unwrap(),
-                r.as_str().parse::<f64>().unwrap(),
-            );
-            assert!((l - r).abs() <= 2.0 * f64::EPSILON);
-        });
-}
-
-#[tokio::test]
-async fn csv_between_expr() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c4 FROM aggregate_test_100 WHERE c12 BETWEEN 0.995 AND 1.0";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["10837"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_between_expr_negated() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT c4 FROM aggregate_test_100 WHERE c12 NOT BETWEEN 0 AND 0.995";
-    let mut actual = execute(&mut ctx, sql).await;
-    actual.sort();
-    let expected = vec![vec!["10837"]];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn csv_group_by_date() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("date", DataType::Date32, false),
-        Field::new("cnt", DataType::Int32, false),
-    ]));
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(Date32Array::from(vec![
-                Some(100),
-                Some(100),
-                Some(100),
-                Some(101),
-                Some(101),
-                Some(101),
-            ])),
-            Arc::new(Int32Array::from(vec![
-                Some(1),
-                Some(2),
-                Some(3),
-                Some(3),
-                Some(3),
-                Some(3),
-            ])),
-        ],
-    )?;
-    let table = MemTable::try_new(schema, vec![vec![data]])?;
-
-    ctx.register_table("dates", Arc::new(table))?;
-    let sql = "SELECT SUM(cnt) FROM dates GROUP BY date";
-    let actual = execute(&mut ctx, sql).await;
-    let mut actual: Vec<String> = actual.iter().flatten().cloned().collect();
-    actual.sort();
-    let expected = vec!["6", "9"];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn group_by_timestamp_millis() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-
-    let schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "timestamp",
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-            false,
-        ),
-        Field::new("count", DataType::Int32, false),
-    ]));
-    let base_dt = Utc.ymd(2018, 7, 1).and_hms(6, 0, 0); // 2018-Jul-01 06:00
-    let hour1 = Duration::hours(1);
-    let timestamps = vec![
-        base_dt.timestamp_millis(),
-        (base_dt + hour1).timestamp_millis(),
-        base_dt.timestamp_millis(),
-        base_dt.timestamp_millis(),
-        (base_dt + hour1).timestamp_millis(),
-        (base_dt + hour1).timestamp_millis(),
-    ];
-    let data = RecordBatch::try_new(
-        schema.clone(),
-        vec![
-            Arc::new(TimestampMillisecondArray::from(timestamps)),
-            Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50, 60])),
-        ],
-    )?;
-    let t1_table = MemTable::try_new(schema, vec![vec![data]])?;
-    ctx.register_table("t1", Arc::new(t1_table)).unwrap();
-
-    let sql =
-        "SELECT timestamp, SUM(count) FROM t1 GROUP BY timestamp ORDER BY timestamp ASC";
-    let actual = execute(&mut ctx, sql).await;
-    let actual: Vec<String> = actual.iter().map(|row| row[1].clone()).collect();
-    let expected = vec!["80", "130"];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-macro_rules! test_expression {
-    ($SQL:expr, $EXPECTED:expr) => {
-        let mut ctx = ExecutionContext::new();
-        let sql = format!("SELECT {}", $SQL);
-        let actual = execute(&mut ctx, sql.as_str()).await;
-        assert_eq!($EXPECTED, actual[0][0]);
-    };
-}
-
-#[tokio::test]
-async fn test_boolean_expressions() -> Result<()> {
-    test_expression!("true", "true");
-    test_expression!("false", "false");
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "crypto_expressions"), ignore)]
-async fn test_crypto_expressions() -> Result<()> {
-    test_expression!("md5('tom')", "34b7da764b21d298ef307d04d8152dc5");
-    test_expression!("md5('')", "d41d8cd98f00b204e9800998ecf8427e");
-    test_expression!("md5(NULL)", "NULL");
-    test_expression!(
-        "sha224('tom')",
-        "0bf6cb62649c42a9ae3876ab6f6d92ad36cb5414e495f8873292be4d"
-    );
-    test_expression!(
-        "sha224('')",
-        "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"
-    );
-    test_expression!("sha224(NULL)", "NULL");
-    test_expression!(
-        "sha256('tom')",
-        "e1608f75c5d7813f3d4031cb30bfb786507d98137538ff8e128a6ff74e84e643"
-    );
-    test_expression!(
-        "sha256('')",
-        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
-    );
-    test_expression!("sha256(NULL)", "NULL");
-    test_expression!("sha384('tom')", "096f5b68aa77848e4fdf5c1c0b350de2dbfad60ffd7c25d9ea07c6c19b8a4d55a9187eb117c557883f58c16dfac3e343");
-    test_expression!("sha384('')", "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b");
-    test_expression!("sha384(NULL)", "NULL");
-    test_expression!("sha512('tom')", "6e1b9b3fe840680e37051f7ad5e959d6f39ad0f8885d855166f55c659469d3c8b78118c44a2a49c72ddb481cd6d8731034e11cc030070ba843a90b3495cb8d3e");
-    test_expression!("sha512('')", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
-    test_expression!("sha512(NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_interval_expressions() -> Result<()> {
-    test_expression!(
-        "interval '1'",
-        "0 years 0 mons 0 days 0 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '1 second'",
-        "0 years 0 mons 0 days 0 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '500 milliseconds'",
-        "0 years 0 mons 0 days 0 hours 0 mins 0.500 secs"
-    );
-    test_expression!(
-        "interval '5 second'",
-        "0 years 0 mons 0 days 0 hours 0 mins 5.00 secs"
-    );
-    test_expression!(
-        "interval '0.5 minute'",
-        "0 years 0 mons 0 days 0 hours 0 mins 30.00 secs"
-    );
-    test_expression!(
-        "interval '.5 minute'",
-        "0 years 0 mons 0 days 0 hours 0 mins 30.00 secs"
-    );
-    test_expression!(
-        "interval '5 minute'",
-        "0 years 0 mons 0 days 0 hours 5 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 minute 1 second'",
-        "0 years 0 mons 0 days 0 hours 5 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '1 hour'",
-        "0 years 0 mons 0 days 1 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 hour'",
-        "0 years 0 mons 0 days 5 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 day'",
-        "0 years 0 mons 1 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 day 1'",
-        "0 years 0 mons 1 days 0 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '0.5'",
-        "0 years 0 mons 0 days 0 hours 0 mins 0.500 secs"
-    );
-    test_expression!(
-        "interval '0.5 day 1'",
-        "0 years 0 mons 0 days 12 hours 0 mins 1.00 secs"
-    );
-    test_expression!(
-        "interval '0.49 day'",
-        "0 years 0 mons 0 days 11 hours 45 mins 36.00 secs"
-    );
-    test_expression!(
-        "interval '0.499 day'",
-        "0 years 0 mons 0 days 11 hours 58 mins 33.596 secs"
-    );
-    test_expression!(
-        "interval '0.4999 day'",
-        "0 years 0 mons 0 days 11 hours 59 mins 51.364 secs"
-    );
-    test_expression!(
-        "interval '0.49999 day'",
-        "0 years 0 mons 0 days 11 hours 59 mins 59.136 secs"
-    );
-    test_expression!(
-        "interval '0.49999999999 day'",
-        "0 years 0 mons 0 days 12 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 day'",
-        "0 years 0 mons 5 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'",
-        "0 years 0 mons 5 days 4 hours 3 mins 2.100 secs"
-    );
-    test_expression!(
-        "interval '0.5 month'",
-        "0 years 0 mons 15 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 month'",
-        "0 years 1 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '5 month'",
-        "0 years 5 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '13 month'",
-        "1 years 1 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '0.5 year'",
-        "0 years 6 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '1 year'",
-        "1 years 0 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    test_expression!(
-        "interval '2 year'",
-        "2 years 0 mons 0 days 0 hours 0 mins 0.00 secs"
-    );
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_string_expressions() -> Result<()> {
-    test_expression!("ascii('')", "0");
-    test_expression!("ascii('x')", "120");
-    test_expression!("ascii(NULL)", "NULL");
-    test_expression!("bit_length('')", "0");
-    test_expression!("bit_length('chars')", "40");
-    test_expression!("bit_length('josé')", "40");
-    test_expression!("bit_length(NULL)", "NULL");
-    test_expression!("btrim(' xyxtrimyyx ', NULL)", "NULL");
-    test_expression!("btrim(' xyxtrimyyx ')", "xyxtrimyyx");
-    test_expression!("btrim('\n xyxtrimyyx \n')", "\n xyxtrimyyx \n");
-    test_expression!("btrim('xyxtrimyyx', 'xyz')", "trim");
-    test_expression!("btrim('\nxyxtrimyyx\n', 'xyz\n')", "trim");
-    test_expression!("btrim(NULL, 'xyz')", "NULL");
-    test_expression!("chr(CAST(120 AS int))", "x");
-    test_expression!("chr(CAST(128175 AS int))", "💯");
-    test_expression!("chr(CAST(NULL AS int))", "NULL");
-    test_expression!("concat('a','b','c')", "abc");
-    test_expression!("concat('abcde', 2, NULL, 22)", "abcde222");
-    test_expression!("concat(NULL)", "");
-    test_expression!("concat_ws(',', 'abcde', 2, NULL, 22)", "abcde,2,22");
-    test_expression!("concat_ws('|','a','b','c')", "a|b|c");
-    test_expression!("concat_ws('|',NULL)", "");
-    test_expression!("concat_ws(NULL,'a',NULL,'b','c')", "NULL");
-    test_expression!("initcap('')", "");
-    test_expression!("initcap('hi THOMAS')", "Hi Thomas");
-    test_expression!("initcap(NULL)", "NULL");
-    test_expression!("lower('')", "");
-    test_expression!("lower('TOM')", "tom");
-    test_expression!("lower(NULL)", "NULL");
-    test_expression!("ltrim(' zzzytest ', NULL)", "NULL");
-    test_expression!("ltrim(' zzzytest ')", "zzzytest ");
-    test_expression!("ltrim('zzzytest', 'xyz')", "test");
-    test_expression!("ltrim(NULL, 'xyz')", "NULL");
-    test_expression!("octet_length('')", "0");
-    test_expression!("octet_length('chars')", "5");
-    test_expression!("octet_length('josé')", "5");
-    test_expression!("octet_length(NULL)", "NULL");
-    test_expression!("repeat('Pg', 4)", "PgPgPgPg");
-    test_expression!("repeat('Pg', CAST(NULL AS INT))", "NULL");
-    test_expression!("repeat(NULL, 4)", "NULL");
-    test_expression!("replace('abcdefabcdef', 'cd', 'XX')", "abXXefabXXef");
-    test_expression!("replace('abcdefabcdef', 'cd', NULL)", "NULL");
-    test_expression!("replace('abcdefabcdef', 'notmatch', 'XX')", "abcdefabcdef");
-    test_expression!("replace('abcdefabcdef', NULL, 'XX')", "NULL");
-    test_expression!("replace(NULL, 'cd', 'XX')", "NULL");
-    test_expression!("rtrim(' testxxzx ')", " testxxzx");
-    test_expression!("rtrim(' zzzytest ', NULL)", "NULL");
-    test_expression!("rtrim('testxxzx', 'xyz')", "test");
-    test_expression!("rtrim(NULL, 'xyz')", "NULL");
-    test_expression!("split_part('abc~@~def~@~ghi', '~@~', 2)", "def");
-    test_expression!("split_part('abc~@~def~@~ghi', '~@~', 20)", "");
-    test_expression!("split_part(NULL, '~@~', 20)", "NULL");
-    test_expression!("split_part('abc~@~def~@~ghi', NULL, 20)", "NULL");
-    test_expression!(
-        "split_part('abc~@~def~@~ghi', '~@~', CAST(NULL AS INT))",
-        "NULL"
-    );
-    test_expression!("starts_with('alphabet', 'alph')", "true");
-    test_expression!("starts_with('alphabet', 'blph')", "false");
-    test_expression!("starts_with(NULL, 'blph')", "NULL");
-    test_expression!("starts_with('alphabet', NULL)", "NULL");
-    test_expression!("to_hex(2147483647)", "7fffffff");
-    test_expression!("to_hex(9223372036854775807)", "7fffffffffffffff");
-    test_expression!("to_hex(CAST(NULL AS int))", "NULL");
-    test_expression!("trim(' tom ')", "tom");
-    test_expression!("trim(' tom')", "tom");
-    test_expression!("trim('')", "");
-    test_expression!("trim('tom ')", "tom");
-    test_expression!("upper('')", "");
-    test_expression!("upper('tom')", "TOM");
-    test_expression!("upper(NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "unicode_expressions"), ignore)]
-async fn test_unicode_expressions() -> Result<()> {
-    test_expression!("char_length('')", "0");
-    test_expression!("char_length('chars')", "5");
-    test_expression!("char_length('josé')", "4");
-    test_expression!("char_length(NULL)", "NULL");
-    test_expression!("character_length('')", "0");
-    test_expression!("character_length('chars')", "5");
-    test_expression!("character_length('josé')", "4");
-    test_expression!("character_length(NULL)", "NULL");
-    test_expression!("left('abcde', -2)", "abc");
-    test_expression!("left('abcde', -200)", "");
-    test_expression!("left('abcde', 0)", "");
-    test_expression!("left('abcde', 2)", "ab");
-    test_expression!("left('abcde', 200)", "abcde");
-    test_expression!("left('abcde', CAST(NULL AS INT))", "NULL");
-    test_expression!("left(NULL, 2)", "NULL");
-    test_expression!("left(NULL, CAST(NULL AS INT))", "NULL");
-    test_expression!("length('')", "0");
-    test_expression!("length('chars')", "5");
-    test_expression!("length('josé')", "4");
-    test_expression!("length(NULL)", "NULL");
-    test_expression!("lpad('hi', 5, 'xy')", "xyxhi");
-    test_expression!("lpad('hi', 0)", "");
-    test_expression!("lpad('hi', 21, 'abcdef')", "abcdefabcdefabcdefahi");
-    test_expression!("lpad('hi', 5, 'xy')", "xyxhi");
-    test_expression!("lpad('hi', 5, NULL)", "NULL");
-    test_expression!("lpad('hi', 5)", "   hi");
-    test_expression!("lpad('hi', CAST(NULL AS INT), 'xy')", "NULL");
-    test_expression!("lpad('hi', CAST(NULL AS INT))", "NULL");
-    test_expression!("lpad('xyxhi', 3)", "xyx");
-    test_expression!("lpad(NULL, 0)", "NULL");
-    test_expression!("lpad(NULL, 5, 'xy')", "NULL");
-    test_expression!("reverse('abcde')", "edcba");
-    test_expression!("reverse('loẅks')", "skẅol");
-    test_expression!("reverse(NULL)", "NULL");
-    test_expression!("right('abcde', -2)", "cde");
-    test_expression!("right('abcde', -200)", "");
-    test_expression!("right('abcde', 0)", "");
-    test_expression!("right('abcde', 2)", "de");
-    test_expression!("right('abcde', 200)", "abcde");
-    test_expression!("right('abcde', CAST(NULL AS INT))", "NULL");
-    test_expression!("right(NULL, 2)", "NULL");
-    test_expression!("right(NULL, CAST(NULL AS INT))", "NULL");
-    test_expression!("rpad('hi', 5, 'xy')", "hixyx");
-    test_expression!("rpad('hi', 0)", "");
-    test_expression!("rpad('hi', 21, 'abcdef')", "hiabcdefabcdefabcdefa");
-    test_expression!("rpad('hi', 5, 'xy')", "hixyx");
-    test_expression!("rpad('hi', 5, NULL)", "NULL");
-    test_expression!("rpad('hi', 5)", "hi   ");
-    test_expression!("rpad('hi', CAST(NULL AS INT), 'xy')", "NULL");
-    test_expression!("rpad('hi', CAST(NULL AS INT))", "NULL");
-    test_expression!("rpad('xyxhi', 3)", "xyx");
-    test_expression!("strpos('abc', 'c')", "3");
-    test_expression!("strpos('josé', 'é')", "4");
-    test_expression!("strpos('joséésoj', 'so')", "6");
-    test_expression!("strpos('joséésoj', 'abc')", "0");
-    test_expression!("strpos(NULL, 'abc')", "NULL");
-    test_expression!("strpos('joséésoj', NULL)", "NULL");
-    test_expression!("substr('alphabet', -3)", "alphabet");
-    test_expression!("substr('alphabet', 0)", "alphabet");
-    test_expression!("substr('alphabet', 1)", "alphabet");
-    test_expression!("substr('alphabet', 2)", "lphabet");
-    test_expression!("substr('alphabet', 3)", "phabet");
-    test_expression!("substr('alphabet', 30)", "");
-    test_expression!("substr('alphabet', CAST(NULL AS int))", "NULL");
-    test_expression!("substr('alphabet', 3, 2)", "ph");
-    test_expression!("substr('alphabet', 3, 20)", "phabet");
-    test_expression!("substr('alphabet', CAST(NULL AS int), 20)", "NULL");
-    test_expression!("substr('alphabet', 3, CAST(NULL AS int))", "NULL");
-    test_expression!("translate('12345', '143', 'ax')", "a2x5");
-    test_expression!("translate(NULL, '143', 'ax')", "NULL");
-    test_expression!("translate('12345', NULL, 'ax')", "NULL");
-    test_expression!("translate('12345', '143', NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "regex_expressions"), ignore)]
-async fn test_regex_expressions() -> Result<()> {
-    test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'gi')", "XXX");
-    test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'i')", "XabcABC");
-    test_expression!("regexp_replace('foobarbaz', 'b..', 'X', 'g')", "fooXX");
-    test_expression!("regexp_replace('foobarbaz', 'b..', 'X')", "fooXbaz");
-    test_expression!(
-        "regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g')",
-        "fooXarYXazY"
-    );
-    test_expression!(
-        "regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', NULL)",
-        "NULL"
-    );
-    test_expression!("regexp_replace('foobarbaz', 'b(..)', NULL, 'g')", "NULL");
-    test_expression!("regexp_replace('foobarbaz', NULL, 'X\\1Y', 'g')", "NULL");
-    test_expression!("regexp_replace('Thomas', '.[mN]a.', 'M')", "ThM");
-    test_expression!("regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g')", "NULL");
-    test_expression!("regexp_match('foobarbequebaz', '')", "[]");
-    test_expression!(
-        "regexp_match('foobarbequebaz', '(bar)(beque)')",
-        "[bar, beque]"
-    );
-    test_expression!("regexp_match('foobarbequebaz', '(ba3r)(bequ34e)')", "NULL");
-    test_expression!("regexp_match('aaa-0', '.*-(\\d)')", "[0]");
-    test_expression!("regexp_match('bb-1', '.*-(\\d)')", "[1]");
-    test_expression!("regexp_match('aa', '.*-(\\d)')", "NULL");
-    test_expression!("regexp_match(NULL, '.*-(\\d)')", "NULL");
-    test_expression!("regexp_match('aaa-0', NULL)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_extract_date_part() -> Result<()> {
-    test_expression!("date_part('hour', CAST('2020-01-01' AS DATE))", "0");
-    test_expression!("EXTRACT(HOUR FROM CAST('2020-01-01' AS DATE))", "0");
-    test_expression!(
-        "EXTRACT(HOUR FROM to_timestamp('2020-09-08T12:00:00+00:00'))",
-        "12"
-    );
-    test_expression!("date_part('YEAR', CAST('2000-01-01' AS DATE))", "2000");
-    test_expression!(
-        "EXTRACT(year FROM to_timestamp('2020-09-08T12:00:00+00:00'))",
-        "2020"
-    );
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_in_list_scalar() -> Result<()> {
-    test_expression!("'a' IN ('a','b')", "true");
-    test_expression!("'c' IN ('a','b')", "false");
-    test_expression!("'c' NOT IN ('a','b')", "true");
-    test_expression!("'a' NOT IN ('a','b')", "false");
-    test_expression!("NULL IN ('a','b')", "NULL");
-    test_expression!("NULL NOT IN ('a','b')", "NULL");
-    test_expression!("'a' IN ('a','b',NULL)", "true");
-    test_expression!("'c' IN ('a','b',NULL)", "NULL");
-    test_expression!("'a' NOT IN ('a','b',NULL)", "false");
-    test_expression!("'c' NOT IN ('a','b',NULL)", "NULL");
-    test_expression!("0 IN (0,1,2)", "true");
-    test_expression!("3 IN (0,1,2)", "false");
-    test_expression!("3 NOT IN (0,1,2)", "true");
-    test_expression!("0 NOT IN (0,1,2)", "false");
-    test_expression!("NULL IN (0,1,2)", "NULL");
-    test_expression!("NULL NOT IN (0,1,2)", "NULL");
-    test_expression!("0 IN (0,1,2,NULL)", "true");
-    test_expression!("3 IN (0,1,2,NULL)", "NULL");
-    test_expression!("0 NOT IN (0,1,2,NULL)", "false");
-    test_expression!("3 NOT IN (0,1,2,NULL)", "NULL");
-    test_expression!("0.0 IN (0.0,0.1,0.2)", "true");
-    test_expression!("0.3 IN (0.0,0.1,0.2)", "false");
-    test_expression!("0.3 NOT IN (0.0,0.1,0.2)", "true");
-    test_expression!("0.0 NOT IN (0.0,0.1,0.2)", "false");
-    test_expression!("NULL IN (0.0,0.1,0.2)", "NULL");
-    test_expression!("NULL NOT IN (0.0,0.1,0.2)", "NULL");
-    test_expression!("0.0 IN (0.0,0.1,0.2,NULL)", "true");
-    test_expression!("0.3 IN (0.0,0.1,0.2,NULL)", "NULL");
-    test_expression!("0.0 NOT IN (0.0,0.1,0.2,NULL)", "false");
-    test_expression!("0.3 NOT IN (0.0,0.1,0.2,NULL)", "NULL");
-    test_expression!("'1' IN ('a','b',1)", "true");
-    test_expression!("'2' IN ('a','b',1)", "false");
-    test_expression!("'2' NOT IN ('a','b',1)", "true");
-    test_expression!("'1' NOT IN ('a','b',1)", "false");
-    test_expression!("NULL IN ('a','b',1)", "NULL");
-    test_expression!("NULL NOT IN ('a','b',1)", "NULL");
-    test_expression!("'1' IN ('a','b',NULL,1)", "true");
-    test_expression!("'2' IN ('a','b',NULL,1)", "NULL");
-    test_expression!("'1' NOT IN ('a','b',NULL,1)", "false");
-    test_expression!("'2' NOT IN ('a','b',NULL,1)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn in_list_array() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv_by_sql(&mut ctx).await;
-    let sql = "SELECT
-            c1 IN ('a', 'c') AS utf8_in_true
-            ,c1 IN ('x', 'y') AS utf8_in_false
-            ,c1 NOT IN ('x', 'y') AS utf8_not_in_true
-            ,c1 NOT IN ('a', 'c') AS utf8_not_in_false
-            ,NULL IN ('a', 'c') AS utf8_in_null
-        FROM aggregate_test_100 WHERE c12 < 0.05";
-    let actual = execute(&mut ctx, sql).await;
-    let expected = vec![
-        vec!["true", "false", "true", "false", "NULL"],
-        vec!["true", "false", "true", "false", "NULL"],
-        vec!["true", "false", "true", "false", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-        vec!["false", "false", "true", "true", "NULL"],
-    ];
-    assert_eq!(expected, actual);
-    Ok(())
-}
-
-// TODO Tests to prove correct implementation of INNER JOIN's with qualified names.
-//  https://issues.apache.org/jira/projects/ARROW/issues/ARROW-11432.
-#[tokio::test]
-#[ignore]
-async fn inner_join_qualified_names() -> Result<()> {
-    // Setup the statements that test qualified names function correctly.
-    let equivalent_sql = [
-        "SELECT t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
-            FROM t1
-            INNER JOIN t2 ON t1.a = t2.a
-            ORDER BY t1.a",
-        "SELECT t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
-            FROM t1
-            INNER JOIN t2 ON t2.a = t1.a
-            ORDER BY t1.a",
-    ];
-
-    let expected = vec![
-        vec!["1", "10", "50", "1", "100", "500"],
-        vec!["2", "20", "60", "2", "20", "600"],
-        vec!["4", "40", "80", "4", "400", "800"],
-    ];
-
-    for sql in equivalent_sql.iter() {
-        let mut ctx = create_join_context_qualified()?;
-        let actual = execute(&mut ctx, sql).await;
-        assert_eq!(expected, actual);
-    }
-    Ok(())
-}
-
-#[tokio::test]
-#[ignore = "https://issues.apache.org/jira/browse/ARROW-12266"]
-async fn inner_join_nulls() {
-    let sql = "SELECT * FROM (SELECT null AS id1) t1
-            INNER JOIN (SELECT null AS id2) t2 ON id1 = id2";
-
-    let expected: &[&[&str]] = &[&[]];
-
-    let mut ctx = create_join_context_qualified().unwrap();
-    let actual = execute(&mut ctx, sql).await;
-
-    // left and right shouldn't match anything
-    assert_eq!(expected, actual);
-}
-
-#[tokio::test]
-async fn qualified_table_references() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-
-    for table_ref in &[
-        "aggregate_test_100",
-        "public.aggregate_test_100",
-        "datafusion.public.aggregate_test_100",
-    ] {
-        let sql = format!("SELECT COUNT(*) FROM {}", table_ref);
-        let results = execute(&mut ctx, &sql).await;
-        assert_eq!(results, vec![vec!["100"]]);
-    }
-    Ok(())
-}
-
-#[tokio::test]
-async fn invalid_qualified_table_references() -> Result<()> {
-    let mut ctx = ExecutionContext::new();
-    register_aggregate_csv(&mut ctx)?;
-
-    for table_ref in &[
-        "nonexistentschema.aggregate_test_100",
-        "nonexistentcatalog.public.aggregate_test_100",
-        "way.too.many.namespaces.as.ident.prefixes.aggregate_test_100",
-    ] {
-        let sql = format!("SELECT COUNT(*) FROM {}", table_ref);
-        assert!(matches!(ctx.sql(&sql), Err(DataFusionError::Plan(_))));
-    }
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_cast_expressions() -> Result<()> {
-    test_expression!("CAST('0' AS INT)", "0");
-    test_expression!("CAST(NULL AS INT)", "NULL");
-    test_expression!("TRY_CAST('0' AS INT)", "0");
-    test_expression!("TRY_CAST('x' AS INT)", "NULL");
-    Ok(())
-}
-
-#[tokio::test]
-async fn test_cast_expressions_error() -> Result<()> {
-    // sin(utf8) should error
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT CAST(c1 AS INT) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql).unwrap();
-    let plan = ctx.optimize(&plan).unwrap();
-    let plan = ctx.create_physical_plan(&plan).unwrap();
-    let result = collect(plan).await;
-
-    match result {
-        Ok(_) => panic!("expected error"),
-        Err(e) => {
-            assert!(e.to_string().contains(
-                "Cast error: Cannot cast string 'c' to value of arrow::datatypes::types::Int32Type type"
-            ))
-        }
-    }
-
-    Ok(())
-}
diff --git a/rust/datafusion/tests/user_defined_plan.rs b/rust/datafusion/tests/user_defined_plan.rs
deleted file mode 100644
index f9f24430104..00000000000
--- a/rust/datafusion/tests/user_defined_plan.rs
+++ /dev/null
@@ -1,512 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains an end to end demonstration of creating
-//! a user defined operator in DataFusion.
-//!
-//! Specifically, it shows how to define a `TopKNode` that implements
-//! `ExtensionPlanNode`, add an OptimizerRule to rewrite a
-//! `LogicalPlan` to use that node a `LogicalPlan`, create an
-//! `ExecutionPlan` and finally produce results.
-//!
-//! # TopK Background:
-//!
-//! A "Top K" node is a common query optimization which is used for
-//! queries such as "find the top 3 customers by revenue". The
-//! (simplified) SQL for such a query might be:
-//!
-//! ```sql
-//! CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT)
-//!   STORED AS CSV location 'tests/customer.csv';
-//!
-//! SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3;
-//! ```
-//!
-//! And a naive plan would be:
-//!
-//! ```
-//! > explain SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3;
-//! +--------------+----------------------------------------+
-//! | plan_type    | plan                                   |
-//! +--------------+----------------------------------------+
-//! | logical_plan | Limit: 3                               |
-//! |              |   Sort: #revenue DESC NULLS FIRST      |
-//! |              |     Projection: #customer_id, #revenue |
-//! |              |       TableScan: sales projection=None |
-//! +--------------+----------------------------------------+
-//! ```
-//!
-//! While this plan produces the correct answer, the careful reader
-//! will note it fully sorts the input before discarding everything
-//! other than the top 3 elements.
-//!
-//! The same answer can be produced by simply keeping track of the top
-//! N elements, reducing the total amount of required buffer memory.
-//!
-
-use futures::{Stream, StreamExt};
-
-use arrow::{
-    array::{Int64Array, StringArray},
-    datatypes::SchemaRef,
-    error::ArrowError,
-    record_batch::RecordBatch,
-    util::pretty::pretty_format_batches,
-};
-use datafusion::{
-    error::{DataFusionError, Result},
-    execution::context::ExecutionContextState,
-    execution::context::QueryPlanner,
-    logical_plan::{Expr, LogicalPlan, UserDefinedLogicalNode},
-    optimizer::{optimizer::OptimizerRule, utils::optimize_children},
-    physical_plan::{
-        planner::{DefaultPhysicalPlanner, ExtensionPlanner},
-        Distribution, ExecutionPlan, Partitioning, PhysicalPlanner, RecordBatchStream,
-        SendableRecordBatchStream,
-    },
-    prelude::{ExecutionConfig, ExecutionContext},
-};
-use fmt::Debug;
-use std::task::{Context, Poll};
-use std::{any::Any, collections::BTreeMap, fmt, sync::Arc};
-
-use async_trait::async_trait;
-use datafusion::logical_plan::DFSchemaRef;
-
-/// Execute the specified sql and return the resulting record batches
-/// pretty printed as a String.
-async fn exec_sql(ctx: &mut ExecutionContext, sql: &str) -> Result<String> {
-    let df = ctx.sql(sql)?;
-    let batches = df.collect().await?;
-    pretty_format_batches(&batches).map_err(DataFusionError::ArrowError)
-}
-
-/// Create a test table.
-async fn setup_table(mut ctx: ExecutionContext) -> Result<ExecutionContext> {
-    let sql = "CREATE EXTERNAL TABLE sales(customer_id VARCHAR, revenue BIGINT) STORED AS CSV location 'tests/customer.csv'";
-
-    let expected = vec!["++", "++"];
-
-    let s = exec_sql(&mut ctx, sql).await?;
-    let actual = s.lines().collect::<Vec<_>>();
-
-    assert_eq!(expected, actual, "Creating table");
-    Ok(ctx)
-}
-
-const QUERY: &str =
-    "SELECT customer_id, revenue FROM sales ORDER BY revenue DESC limit 3";
-
-// Run the query using the specified execution context and compare it
-// to the known result
-async fn run_and_compare_query(
-    mut ctx: ExecutionContext,
-    description: &str,
-) -> Result<()> {
-    let expected = vec![
-        "+-------------+---------+",
-        "| customer_id | revenue |",
-        "+-------------+---------+",
-        "| paul        | 300     |",
-        "| jorge       | 200     |",
-        "| andy        | 150     |",
-        "+-------------+---------+",
-    ];
-
-    let s = exec_sql(&mut ctx, QUERY).await?;
-    let actual = s.lines().collect::<Vec<_>>();
-
-    assert_eq!(
-        expected,
-        actual,
-        "output mismatch for {}. Expectedn\n{}Actual:\n{}",
-        description,
-        expected.join("\n"),
-        s
-    );
-    Ok(())
-}
-
-#[tokio::test]
-// Run the query using default planners and optimizer
-async fn normal_query() -> Result<()> {
-    let ctx = setup_table(ExecutionContext::new()).await?;
-    run_and_compare_query(ctx, "Default context").await
-}
-
-#[tokio::test]
-// Run the query using topk optimization
-async fn topk_query() -> Result<()> {
-    // Note the only difference is that the top
-    let ctx = setup_table(make_topk_context()).await?;
-    run_and_compare_query(ctx, "Topk context").await
-}
-
-#[tokio::test]
-// Run EXPLAIN PLAN and show the plan was in fact rewritten
-async fn topk_plan() -> Result<()> {
-    let mut ctx = setup_table(make_topk_context()).await?;
-
-    let expected = vec![
-        "| logical_plan after topk                 | TopK: k=3                                      |",
-        "|                                         |   Projection: #customer_id, #revenue           |",
-        "|                                         |     TableScan: sales projection=Some([0, 1])   |",
-    ].join("\n");
-
-    let explain_query = format!("EXPLAIN VERBOSE {}", QUERY);
-    let actual_output = exec_sql(&mut ctx, &explain_query).await?;
-
-    // normalize newlines (output on windows uses \r\n)
-    let actual_output = actual_output.replace("\r\n", "\n");
-
-    assert!(actual_output.contains(&expected) , "Expected output not present in actual output\nExpected:\n---------\n{}\nActual:\n--------\n{}", expected, actual_output);
-    Ok(())
-}
-
-fn make_topk_context() -> ExecutionContext {
-    let config = ExecutionConfig::new()
-        .with_query_planner(Arc::new(TopKQueryPlanner {}))
-        .with_concurrency(48)
-        .add_optimizer_rule(Arc::new(TopKOptimizerRule {}));
-
-    ExecutionContext::with_config(config)
-}
-
-// ------ The implementation of the TopK code follows -----
-
-struct TopKQueryPlanner {}
-
-impl QueryPlanner for TopKQueryPlanner {
-    /// Given a `LogicalPlan` created from above, create an
-    /// `ExecutionPlan` suitable for execution
-    fn create_physical_plan(
-        &self,
-        logical_plan: &LogicalPlan,
-        ctx_state: &ExecutionContextState,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Teach the default physical planner how to plan TopK nodes.
-        let physical_planner =
-            DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(
-                TopKPlanner {},
-            )]);
-        // Delegate most work of physical planning to the default physical planner
-        physical_planner.create_physical_plan(logical_plan, ctx_state)
-    }
-}
-
-struct TopKOptimizerRule {}
-impl OptimizerRule for TopKOptimizerRule {
-    // Example rewrite pass to insert a user defined LogicalPlanNode
-    fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        // Note: this code simply looks for the pattern of a Limit followed by a
-        // Sort and replaces it by a TopK node. It does not handle many
-        // edge cases (e.g multiple sort columns, sort ASC / DESC), etc.
-        if let LogicalPlan::Limit { ref n, ref input } = plan {
-            if let LogicalPlan::Sort {
-                ref expr,
-                ref input,
-            } = **input
-            {
-                if expr.len() == 1 {
-                    // we found a sort with a single sort expr, replace with a a TopK
-                    return Ok(LogicalPlan::Extension {
-                        node: Arc::new(TopKPlanNode {
-                            k: *n,
-                            input: self.optimize(input.as_ref())?,
-                            expr: expr[0].clone(),
-                        }),
-                    });
-                }
-            }
-        }
-
-        // If we didn't find the Limit/Sort combination, recurse as
-        // normal and build the result.
-        optimize_children(self, plan)
-    }
-
-    fn name(&self) -> &str {
-        "topk"
-    }
-}
-
-struct TopKPlanNode {
-    k: usize,
-    input: LogicalPlan,
-    /// The sort expression (this example only supports a single sort
-    /// expr)
-    expr: Expr,
-}
-
-impl Debug for TopKPlanNode {
-    /// For TopK, use explain format for the Debug format. Other types
-    /// of nodes may
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.fmt_for_explain(f)
-    }
-}
-
-impl UserDefinedLogicalNode for TopKPlanNode {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        vec![&self.input]
-    }
-
-    /// Schema for TopK is the same as the input
-    fn schema(&self) -> &DFSchemaRef {
-        self.input.schema()
-    }
-
-    fn expressions(&self) -> Vec<Expr> {
-        vec![self.expr.clone()]
-    }
-
-    /// For example: `TopK: k=10`
-    fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TopK: k={}", self.k)
-    }
-
-    fn from_template(
-        &self,
-        exprs: &[Expr],
-        inputs: &[LogicalPlan],
-    ) -> Arc<dyn UserDefinedLogicalNode + Send + Sync> {
-        assert_eq!(inputs.len(), 1, "input size inconsistent");
-        assert_eq!(exprs.len(), 1, "expression size inconsistent");
-        Arc::new(TopKPlanNode {
-            k: self.k,
-            input: inputs[0].clone(),
-            expr: exprs[0].clone(),
-        })
-    }
-}
-
-/// Physical planner for TopK nodes
-struct TopKPlanner {}
-
-impl ExtensionPlanner for TopKPlanner {
-    /// Create a physical plan for an extension node
-    fn plan_extension(
-        &self,
-        node: &dyn UserDefinedLogicalNode,
-        inputs: &[Arc<dyn ExecutionPlan>],
-        _ctx_state: &ExecutionContextState,
-    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-        Ok(
-            if let Some(topk_node) = node.as_any().downcast_ref::<TopKPlanNode>() {
-                assert_eq!(inputs.len(), 1, "Inconsistent number of inputs");
-                // figure out input name
-                Some(Arc::new(TopKExec {
-                    input: inputs[0].clone(),
-                    k: topk_node.k,
-                }))
-            } else {
-                None
-            },
-        )
-    }
-}
-
-/// Physical operator that implements TopK for u64 data types. This
-/// code is not general and is meant as an illustration only
-struct TopKExec {
-    input: Arc<dyn ExecutionPlan>,
-    /// The maxium number of values
-    k: usize,
-}
-
-impl Debug for TopKExec {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TopKExec")
-    }
-}
-
-#[async_trait]
-impl ExecutionPlan for TopKExec {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-
-    fn output_partitioning(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(1)
-    }
-
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
-    }
-
-    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
-        vec![self.input.clone()]
-    }
-
-    fn with_new_children(
-        &self,
-        children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        match children.len() {
-            1 => Ok(Arc::new(TopKExec {
-                input: children[0].clone(),
-                k: self.k,
-            })),
-            _ => Err(DataFusionError::Internal(
-                "TopKExec wrong number of children".to_string(),
-            )),
-        }
-    }
-
-    /// Execute one partition and return an iterator over RecordBatch
-    async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
-        if 0 != partition {
-            return Err(DataFusionError::Internal(format!(
-                "TopKExec invalid partition {}",
-                partition
-            )));
-        }
-
-        Ok(Box::pin(TopKReader {
-            input: self.input.execute(partition).await?,
-            k: self.k,
-            done: false,
-            state: BTreeMap::new(),
-        }))
-    }
-}
-
-// A very specialized TopK implementation
-struct TopKReader {
-    /// The input to read data from
-    input: SendableRecordBatchStream,
-    /// Maximum number of output values
-    k: usize,
-    /// Have we produced the output yet?
-    done: bool,
-    /// Output
-    state: BTreeMap<i64, String>,
-}
-
-/// Keeps track of the revenue from customer_id and stores if it
-/// is the top values we have seen so far.
-fn add_row(
-    top_values: &mut BTreeMap<i64, String>,
-    customer_id: &str,
-    revenue: i64,
-    k: &usize,
-) {
-    top_values.insert(revenue, customer_id.into());
-    // only keep top k
-    while top_values.len() > *k {
-        remove_lowest_value(top_values)
-    }
-}
-
-fn remove_lowest_value(top_values: &mut BTreeMap<i64, String>) {
-    if !top_values.is_empty() {
-        let smallest_revenue = {
-            let (revenue, _) = top_values.iter().next().unwrap();
-            *revenue
-        };
-        top_values.remove(&smallest_revenue);
-    }
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn accumulate_batch(
-    input_batch: &RecordBatch,
-    mut top_values: BTreeMap<i64, String>,
-    k: &usize,
-) -> BTreeMap<i64, String> {
-    let num_rows = input_batch.num_rows();
-    // Assuming the input columns are
-    // column[0]: customer_id / UTF8
-    // column[1]: revenue: Int64
-    let customer_id = input_batch
-        .column(0)
-        .as_any()
-        .downcast_ref::<StringArray>()
-        .expect("Column 0 is not customer_id");
-
-    let revenue = input_batch
-        .column(1)
-        .as_any()
-        .downcast_ref::<Int64Array>()
-        .expect("Column 1 is not revenue");
-
-    for row in 0..num_rows {
-        add_row(
-            &mut top_values,
-            customer_id.value(row),
-            revenue.value(row),
-            k,
-        );
-    }
-    top_values
-}
-
-impl Stream for TopKReader {
-    type Item = std::result::Result<RecordBatch, ArrowError>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        if self.done {
-            return Poll::Ready(None);
-        }
-        // this aggregates and thus returns a single RecordBatch.
-
-        // take this as immutable
-        let k = self.k;
-        let schema = self.schema();
-        let poll = self.input.poll_next_unpin(cx);
-
-        match poll {
-            Poll::Ready(Some(Ok(batch))) => {
-                self.state = accumulate_batch(&batch, self.state.clone(), &k);
-                Poll::Ready(Some(Ok(RecordBatch::new_empty(schema))))
-            }
-            Poll::Ready(None) => {
-                self.done = true;
-                let (revenue, customer): (Vec<i64>, Vec<&String>) =
-                    self.state.iter().rev().unzip();
-
-                let customer: Vec<&str> = customer.iter().map(|&s| &**s).collect();
-                Poll::Ready(Some(RecordBatch::try_new(
-                    schema,
-                    vec![
-                        Arc::new(StringArray::from(customer)),
-                        Arc::new(Int64Array::from(revenue)),
-                    ],
-                )))
-            }
-            other => other,
-        }
-    }
-}
-
-impl RecordBatchStream for TopKReader {
-    fn schema(&self) -> SchemaRef {
-        self.input.schema()
-    }
-}
diff --git a/rust/integration-testing/Cargo.toml b/rust/integration-testing/Cargo.toml
deleted file mode 100644
index 9c170457e35..00000000000
--- a/rust/integration-testing/Cargo.toml
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "arrow-integration-testing"
-description = "Binaries used in the Arrow integration tests"
-version = "5.0.0-SNAPSHOT"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-license = "Apache-2.0"
-edition = "2018"
-publish = false
-
-[features]
-logging = ["tracing-subscriber"]
-
-[dependencies]
-arrow = { path = "../arrow" }
-arrow-flight = { path = "../arrow-flight" }
-async-trait = "0.1.41"
-clap = "2.33"
-futures = "0.3"
-hex = "0.4"
-prost = "0.7"
-serde = { version = "1.0", features = ["rc"] }
-serde_derive = "1.0"
-serde_json = { version = "1.0", features = ["preserve_order"] }
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
-tonic = "0.4"
-tracing-subscriber = { version = "0.2.15", optional = true }
diff --git a/rust/integration-testing/README.md b/rust/integration-testing/README.md
deleted file mode 100644
index 66248deb346..00000000000
--- a/rust/integration-testing/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Apache Arrow Rust Integration Testing
-
-See [Integration.rst](../../docs/source/format/Integration.rst) for an overview of integration testing.
-
-This crate contains the following binaries, which are invoked by Archery during integration testing with other Arrow implementations.
-
-| Binary | Purpose |
-|--------|---------|
-| arrow-file-to-stream | Converts an Arrow file to an Arrow stream |
-| arrow-stream-to-file | Converts an Arrow stream to an Arrow file |
-| arrow-json-integration-test | Converts between Arrow and JSON formats |
diff --git a/rust/integration-testing/src/bin/arrow-file-to-stream.rs b/rust/integration-testing/src/bin/arrow-file-to-stream.rs
deleted file mode 100644
index d6bb0428c0f..00000000000
--- a/rust/integration-testing/src/bin/arrow-file-to-stream.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::env;
-use std::fs::File;
-use std::io::{self, BufReader};
-
-use arrow::error::Result;
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::StreamWriter;
-
-fn main() -> Result<()> {
-    let args: Vec<String> = env::args().collect();
-    let filename = &args[1];
-    let f = File::open(filename)?;
-    let reader = BufReader::new(f);
-    let mut reader = FileReader::try_new(reader)?;
-    let schema = reader.schema();
-
-    let mut writer = StreamWriter::try_new(io::stdout(), &schema)?;
-
-    reader.try_for_each(|batch| {
-        let batch = batch?;
-        writer.write(&batch)
-    })?;
-    writer.finish()?;
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/arrow-json-integration-test.rs b/rust/integration-testing/src/bin/arrow-json-integration-test.rs
deleted file mode 100644
index 257802028b2..00000000000
--- a/rust/integration-testing/src/bin/arrow-json-integration-test.rs
+++ /dev/null
@@ -1,180 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::File;
-
-use clap::{App, Arg};
-
-use arrow::error::{ArrowError, Result};
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::FileWriter;
-use arrow::util::integration_util::*;
-use arrow_integration_testing::read_json_file;
-
-fn main() -> Result<()> {
-    let matches = App::new("rust arrow-json-integration-test")
-        .arg(Arg::with_name("integration")
-            .long("integration"))
-        .arg(Arg::with_name("arrow")
-            .long("arrow")
-            .help("path to ARROW file")
-            .takes_value(true))
-        .arg(Arg::with_name("json")
-            .long("json")
-            .help("path to JSON file")
-            .takes_value(true))
-        .arg(Arg::with_name("mode")
-            .long("mode")
-            .help("mode of integration testing tool (ARROW_TO_JSON, JSON_TO_ARROW, VALIDATE)")
-            .takes_value(true)
-            .default_value("VALIDATE"))
-        .arg(Arg::with_name("verbose")
-            .long("verbose")
-            .help("enable/disable verbose mode"))
-        .get_matches();
-
-    let arrow_file = matches
-        .value_of("arrow")
-        .expect("must provide path to arrow file");
-    let json_file = matches
-        .value_of("json")
-        .expect("must provide path to json file");
-    let mode = matches.value_of("mode").unwrap();
-    let verbose = true; //matches.value_of("verbose").is_some();
-
-    match mode {
-        "JSON_TO_ARROW" => json_to_arrow(json_file, arrow_file, verbose),
-        "ARROW_TO_JSON" => arrow_to_json(arrow_file, json_file, verbose),
-        "VALIDATE" => validate(arrow_file, json_file, verbose),
-        _ => panic!("mode {} not supported", mode),
-    }
-}
-
-fn json_to_arrow(json_name: &str, arrow_name: &str, verbose: bool) -> Result<()> {
-    if verbose {
-        eprintln!("Converting {} to {}", json_name, arrow_name);
-    }
-
-    let json_file = read_json_file(json_name)?;
-
-    let arrow_file = File::create(arrow_name)?;
-    let mut writer = FileWriter::try_new(arrow_file, &json_file.schema)?;
-
-    for b in json_file.batches {
-        writer.write(&b)?;
-    }
-
-    writer.finish()?;
-
-    Ok(())
-}
-
-fn arrow_to_json(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
-    if verbose {
-        eprintln!("Converting {} to {}", arrow_name, json_name);
-    }
-
-    let arrow_file = File::open(arrow_name)?;
-    let reader = FileReader::try_new(arrow_file)?;
-
-    let mut fields: Vec<ArrowJsonField> = vec![];
-    for f in reader.schema().fields() {
-        fields.push(ArrowJsonField::from(f));
-    }
-    let schema = ArrowJsonSchema { fields };
-
-    let batches = reader
-        .map(|batch| Ok(ArrowJsonBatch::from_batch(&batch?)))
-        .collect::<Result<Vec<_>>>()?;
-
-    let arrow_json = ArrowJson {
-        schema,
-        batches,
-        dictionaries: None,
-    };
-
-    let json_file = File::create(json_name)?;
-    serde_json::to_writer(&json_file, &arrow_json).unwrap();
-
-    Ok(())
-}
-
-fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
-    if verbose {
-        eprintln!("Validating {} and {}", arrow_name, json_name);
-    }
-
-    // open JSON file
-    let json_file = read_json_file(json_name)?;
-
-    // open Arrow file
-    let arrow_file = File::open(arrow_name)?;
-    let mut arrow_reader = FileReader::try_new(arrow_file)?;
-    let arrow_schema = arrow_reader.schema().as_ref().to_owned();
-
-    // compare schemas
-    if json_file.schema != arrow_schema {
-        return Err(ArrowError::ComputeError(format!(
-            "Schemas do not match. JSON: {:?}. Arrow: {:?}",
-            json_file.schema, arrow_schema
-        )));
-    }
-
-    let json_batches = &json_file.batches;
-
-    // compare number of batches
-    assert!(
-        json_batches.len() == arrow_reader.num_batches(),
-        "JSON batches and Arrow batches are unequal"
-    );
-
-    if verbose {
-        eprintln!(
-            "Schemas match. JSON file has {} batches.",
-            json_batches.len()
-        );
-    }
-
-    for json_batch in json_batches {
-        if let Some(Ok(arrow_batch)) = arrow_reader.next() {
-            // compare batches
-            let num_columns = arrow_batch.num_columns();
-            assert!(num_columns == json_batch.num_columns());
-            assert!(arrow_batch.num_rows() == json_batch.num_rows());
-
-            for i in 0..num_columns {
-                assert_eq!(
-                    arrow_batch.column(i).data(),
-                    json_batch.column(i).data(),
-                    "Arrow and JSON batch columns not the same"
-                );
-            }
-        } else {
-            return Err(ArrowError::ComputeError(
-                "no more arrow batches left".to_owned(),
-            ));
-        }
-    }
-
-    if arrow_reader.next().is_some() {
-        return Err(ArrowError::ComputeError(
-            "no more json batches left".to_owned(),
-        ));
-    }
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/arrow-stream-to-file.rs b/rust/integration-testing/src/bin/arrow-stream-to-file.rs
deleted file mode 100644
index f81d42e6eda..00000000000
--- a/rust/integration-testing/src/bin/arrow-stream-to-file.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io;
-
-use arrow::error::Result;
-use arrow::ipc::reader::StreamReader;
-use arrow::ipc::writer::FileWriter;
-
-fn main() -> Result<()> {
-    let mut arrow_stream_reader = StreamReader::try_new(io::stdin())?;
-    let schema = arrow_stream_reader.schema();
-
-    let mut writer = FileWriter::try_new(io::stdout(), &schema)?;
-
-    arrow_stream_reader.try_for_each(|batch| writer.write(&batch?))?;
-    writer.finish()?;
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/flight-test-integration-client.rs b/rust/integration-testing/src/bin/flight-test-integration-client.rs
deleted file mode 100644
index 1901553109f..00000000000
--- a/rust/integration-testing/src/bin/flight-test-integration-client.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_integration_testing::flight_client_scenarios;
-
-use clap::{App, Arg};
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-#[tokio::main]
-async fn main() -> Result {
-    #[cfg(feature = "logging")]
-    tracing_subscriber::fmt::init();
-
-    let matches = App::new("rust flight-test-integration-client")
-        .arg(Arg::with_name("host").long("host").takes_value(true))
-        .arg(Arg::with_name("port").long("port").takes_value(true))
-        .arg(Arg::with_name("path").long("path").takes_value(true))
-        .arg(
-            Arg::with_name("scenario")
-                .long("scenario")
-                .takes_value(true),
-        )
-        .get_matches();
-
-    let host = matches.value_of("host").expect("Host is required");
-    let port = matches.value_of("port").expect("Port is required");
-
-    match matches.value_of("scenario") {
-        Some("middleware") => {
-            flight_client_scenarios::middleware::run_scenario(host, port).await?
-        }
-        Some("auth:basic_proto") => {
-            flight_client_scenarios::auth_basic_proto::run_scenario(host, port).await?
-        }
-        Some(scenario_name) => unimplemented!("Scenario not found: {}", scenario_name),
-        None => {
-            let path = matches
-                .value_of("path")
-                .expect("Path is required if scenario is not specified");
-            flight_client_scenarios::integration_test::run_scenario(host, port, path)
-                .await?;
-        }
-    }
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/bin/flight-test-integration-server.rs b/rust/integration-testing/src/bin/flight-test-integration-server.rs
deleted file mode 100644
index b1b280743c3..00000000000
--- a/rust/integration-testing/src/bin/flight-test-integration-server.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use clap::{App, Arg};
-
-use arrow_integration_testing::flight_server_scenarios;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-#[tokio::main]
-async fn main() -> Result {
-    #[cfg(feature = "logging")]
-    tracing_subscriber::fmt::init();
-
-    let matches = App::new("rust flight-test-integration-server")
-        .about("Integration testing server for Flight.")
-        .arg(Arg::with_name("port").long("port").takes_value(true))
-        .arg(
-            Arg::with_name("scenario")
-                .long("scenario")
-                .takes_value(true),
-        )
-        .get_matches();
-
-    let port = matches.value_of("port").unwrap_or("0");
-
-    match matches.value_of("scenario") {
-        Some("middleware") => {
-            flight_server_scenarios::middleware::scenario_setup(port).await?
-        }
-        Some("auth:basic_proto") => {
-            flight_server_scenarios::auth_basic_proto::scenario_setup(port).await?
-        }
-        Some(scenario_name) => unimplemented!("Scenario not found: {}", scenario_name),
-        None => {
-            flight_server_scenarios::integration_test::scenario_setup(port).await?;
-        }
-    }
-    Ok(())
-}
diff --git a/rust/integration-testing/src/flight_client_scenarios.rs b/rust/integration-testing/src/flight_client_scenarios.rs
deleted file mode 100644
index 66cced5f4c2..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod auth_basic_proto;
-pub mod integration_test;
-pub mod middleware;
diff --git a/rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs b/rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
deleted file mode 100644
index 5e8cd467198..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
+++ /dev/null
@@ -1,109 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{AUTH_PASSWORD, AUTH_USERNAME};
-
-use arrow_flight::{
-    flight_service_client::FlightServiceClient, BasicAuth, HandshakeRequest,
-};
-use futures::{stream, StreamExt};
-use prost::Message;
-use tonic::{metadata::MetadataValue, Request, Status};
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-type Client = FlightServiceClient<tonic::transport::Channel>;
-
-pub async fn run_scenario(host: &str, port: &str) -> Result {
-    let url = format!("http://{}:{}", host, port);
-    let mut client = FlightServiceClient::connect(url).await?;
-
-    let action = arrow_flight::Action::default();
-
-    let resp = client.do_action(Request::new(action.clone())).await;
-    // This client is unauthenticated and should fail.
-    match resp {
-        Err(e) => {
-            if e.code() != tonic::Code::Unauthenticated {
-                return Err(Box::new(Status::internal(format!(
-                    "Expected UNAUTHENTICATED but got {:?}",
-                    e
-                ))));
-            }
-        }
-        Ok(other) => {
-            return Err(Box::new(Status::internal(format!(
-                "Expected UNAUTHENTICATED but got {:?}",
-                other
-            ))));
-        }
-    }
-
-    let token = authenticate(&mut client, AUTH_USERNAME, AUTH_PASSWORD)
-        .await
-        .expect("must respond successfully from handshake");
-
-    let mut request = Request::new(action);
-    let metadata = request.metadata_mut();
-    metadata.insert_bin(
-        "auth-token-bin",
-        MetadataValue::from_bytes(token.as_bytes()),
-    );
-
-    let resp = client.do_action(request).await?;
-    let mut resp = resp.into_inner();
-
-    let r = resp
-        .next()
-        .await
-        .expect("No response received")
-        .expect("Invalid response received");
-
-    let body = String::from_utf8(r.body).unwrap();
-    assert_eq!(body, AUTH_USERNAME);
-
-    Ok(())
-}
-
-async fn authenticate(
-    client: &mut Client,
-    username: &str,
-    password: &str,
-) -> Result<String> {
-    let auth = BasicAuth {
-        username: username.into(),
-        password: password.into(),
-    };
-    let mut payload = vec![];
-    auth.encode(&mut payload)?;
-
-    let req = stream::once(async {
-        HandshakeRequest {
-            payload,
-            ..HandshakeRequest::default()
-        }
-    });
-
-    let rx = client.handshake(Request::new(req)).await?;
-    let mut rx = rx.into_inner();
-
-    let r = rx.next().await.expect("must respond from handshake")?;
-    assert!(rx.next().await.is_none(), "must not respond a second time");
-
-    Ok(String::from_utf8(r.payload).unwrap())
-}
diff --git a/rust/integration-testing/src/flight_client_scenarios/integration_test.rs b/rust/integration-testing/src/flight_client_scenarios/integration_test.rs
deleted file mode 100644
index ff61b5ce2db..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios/integration_test.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{read_json_file, ArrowFile};
-
-use arrow::{
-    array::ArrayRef,
-    datatypes::SchemaRef,
-    ipc::{self, reader, writer},
-    record_batch::RecordBatch,
-};
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, Location, Ticket,
-};
-use futures::{channel::mpsc, sink::SinkExt, stream, StreamExt};
-use tonic::{Request, Streaming};
-
-use std::sync::Arc;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-type Client = FlightServiceClient<tonic::transport::Channel>;
-
-pub async fn run_scenario(host: &str, port: &str, path: &str) -> Result {
-    let url = format!("http://{}:{}", host, port);
-
-    let client = FlightServiceClient::connect(url).await?;
-
-    let ArrowFile {
-        schema, batches, ..
-    } = read_json_file(path)?;
-
-    let schema = Arc::new(schema);
-
-    let mut descriptor = FlightDescriptor::default();
-    descriptor.set_type(DescriptorType::Path);
-    descriptor.path = vec![path.to_string()];
-
-    upload_data(
-        client.clone(),
-        schema.clone(),
-        descriptor.clone(),
-        batches.clone(),
-    )
-    .await?;
-    verify_data(client, descriptor, schema, &batches).await?;
-
-    Ok(())
-}
-
-async fn upload_data(
-    mut client: Client,
-    schema: SchemaRef,
-    descriptor: FlightDescriptor,
-    original_data: Vec<RecordBatch>,
-) -> Result {
-    let (mut upload_tx, upload_rx) = mpsc::channel(10);
-
-    let options = arrow::ipc::writer::IpcWriteOptions::default();
-    let mut schema_flight_data =
-        arrow_flight::utils::flight_data_from_arrow_schema(&schema, &options);
-    schema_flight_data.flight_descriptor = Some(descriptor.clone());
-    upload_tx.send(schema_flight_data).await?;
-
-    let mut original_data_iter = original_data.iter().enumerate();
-
-    if let Some((counter, first_batch)) = original_data_iter.next() {
-        let metadata = counter.to_string().into_bytes();
-        // Preload the first batch into the channel before starting the request
-        send_batch(&mut upload_tx, &metadata, first_batch, &options).await?;
-
-        let outer = client.do_put(Request::new(upload_rx)).await?;
-        let mut inner = outer.into_inner();
-
-        let r = inner
-            .next()
-            .await
-            .expect("No response received")
-            .expect("Invalid response received");
-        assert_eq!(metadata, r.app_metadata);
-
-        // Stream the rest of the batches
-        for (counter, batch) in original_data_iter {
-            let metadata = counter.to_string().into_bytes();
-            send_batch(&mut upload_tx, &metadata, batch, &options).await?;
-
-            let r = inner
-                .next()
-                .await
-                .expect("No response received")
-                .expect("Invalid response received");
-            assert_eq!(metadata, r.app_metadata);
-        }
-        drop(upload_tx);
-        assert!(
-            inner.next().await.is_none(),
-            "Should not receive more results"
-        );
-    } else {
-        drop(upload_tx);
-        client.do_put(Request::new(upload_rx)).await?;
-    }
-
-    Ok(())
-}
-
-async fn send_batch(
-    upload_tx: &mut mpsc::Sender<FlightData>,
-    metadata: &[u8],
-    batch: &RecordBatch,
-    options: &writer::IpcWriteOptions,
-) -> Result {
-    let (dictionary_flight_data, mut batch_flight_data) =
-        arrow_flight::utils::flight_data_from_arrow_batch(batch, &options);
-
-    upload_tx
-        .send_all(&mut stream::iter(dictionary_flight_data).map(Ok))
-        .await?;
-
-    // Only the record batch's FlightData gets app_metadata
-    batch_flight_data.app_metadata = metadata.to_vec();
-    upload_tx.send(batch_flight_data).await?;
-    Ok(())
-}
-
-async fn verify_data(
-    mut client: Client,
-    descriptor: FlightDescriptor,
-    expected_schema: SchemaRef,
-    expected_data: &[RecordBatch],
-) -> Result {
-    let resp = client.get_flight_info(Request::new(descriptor)).await?;
-    let info = resp.into_inner();
-
-    assert!(
-        !info.endpoint.is_empty(),
-        "No endpoints returned from Flight server",
-    );
-    for endpoint in info.endpoint {
-        let ticket = endpoint
-            .ticket
-            .expect("No ticket returned from Flight server");
-
-        assert!(
-            !endpoint.location.is_empty(),
-            "No locations returned from Flight server",
-        );
-        for location in endpoint.location {
-            consume_flight_location(
-                location,
-                ticket.clone(),
-                &expected_data,
-                expected_schema.clone(),
-            )
-            .await?;
-        }
-    }
-
-    Ok(())
-}
-
-async fn consume_flight_location(
-    location: Location,
-    ticket: Ticket,
-    expected_data: &[RecordBatch],
-    schema: SchemaRef,
-) -> Result {
-    let mut location = location;
-    // The other Flight implementations use the `grpc+tcp` scheme, but the Rust http libs
-    // don't recognize this as valid.
-    location.uri = location.uri.replace("grpc+tcp://", "grpc://");
-
-    let mut client = FlightServiceClient::connect(location.uri).await?;
-    let resp = client.do_get(ticket).await?;
-    let mut resp = resp.into_inner();
-
-    // We already have the schema from the FlightInfo, but the server sends it again as the
-    // first FlightData. Ignore this one.
-    let _schema_again = resp.next().await.unwrap();
-
-    let mut dictionaries_by_field = vec![None; schema.fields().len()];
-
-    for (counter, expected_batch) in expected_data.iter().enumerate() {
-        let data = receive_batch_flight_data(
-            &mut resp,
-            schema.clone(),
-            &mut dictionaries_by_field,
-        )
-        .await
-        .unwrap_or_else(|| {
-            panic!(
-                "Got fewer batches than expected, received so far: {} expected: {}",
-                counter,
-                expected_data.len(),
-            )
-        });
-
-        let metadata = counter.to_string().into_bytes();
-        assert_eq!(metadata, data.app_metadata);
-
-        let actual_batch =
-            flight_data_to_arrow_batch(&data, schema.clone(), &dictionaries_by_field)
-                .expect("Unable to convert flight data to Arrow batch");
-
-        assert_eq!(expected_batch.schema(), actual_batch.schema());
-        assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());
-        assert_eq!(expected_batch.num_rows(), actual_batch.num_rows());
-        let schema = expected_batch.schema();
-        for i in 0..expected_batch.num_columns() {
-            let field = schema.field(i);
-            let field_name = field.name();
-
-            let expected_data = expected_batch.column(i).data();
-            let actual_data = actual_batch.column(i).data();
-
-            assert_eq!(expected_data, actual_data, "Data for field {}", field_name);
-        }
-    }
-
-    assert!(
-        resp.next().await.is_none(),
-        "Got more batches than the expected: {}",
-        expected_data.len(),
-    );
-
-    Ok(())
-}
-
-async fn receive_batch_flight_data(
-    resp: &mut Streaming<FlightData>,
-    schema: SchemaRef,
-    dictionaries_by_field: &mut [Option<ArrayRef>],
-) -> Option<FlightData> {
-    let mut data = resp.next().await?.ok()?;
-    let mut message = arrow::ipc::root_as_message(&data.data_header[..])
-        .expect("Error parsing first message");
-
-    while message.header_type() == ipc::MessageHeader::DictionaryBatch {
-        reader::read_dictionary(
-            &data.data_body,
-            message
-                .header_as_dictionary_batch()
-                .expect("Error parsing dictionary"),
-            &schema,
-            dictionaries_by_field,
-        )
-        .expect("Error reading dictionary");
-
-        data = resp.next().await?.ok()?;
-        message = arrow::ipc::root_as_message(&data.data_header[..])
-            .expect("Error parsing message");
-    }
-
-    Some(data)
-}
diff --git a/rust/integration-testing/src/flight_client_scenarios/middleware.rs b/rust/integration-testing/src/flight_client_scenarios/middleware.rs
deleted file mode 100644
index cbca879dca5..00000000000
--- a/rust/integration-testing/src/flight_client_scenarios/middleware.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    FlightDescriptor,
-};
-use tonic::{Request, Status};
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn run_scenario(host: &str, port: &str) -> Result {
-    let url = format!("http://{}:{}", host, port);
-    let conn = tonic::transport::Endpoint::new(url)?.connect().await?;
-    let mut client = FlightServiceClient::with_interceptor(conn, middleware_interceptor);
-
-    let mut descriptor = FlightDescriptor::default();
-    descriptor.set_type(DescriptorType::Cmd);
-    descriptor.cmd = b"".to_vec();
-
-    // This call is expected to fail.
-    match client
-        .get_flight_info(Request::new(descriptor.clone()))
-        .await
-    {
-        Ok(_) => return Err(Box::new(Status::internal("Expected call to fail"))),
-        Err(e) => {
-            let headers = e.metadata();
-            let middleware_header = headers.get("x-middleware");
-            let value = middleware_header.map(|v| v.to_str().unwrap()).unwrap_or("");
-
-            if value != "expected value" {
-                let msg = format!(
-                    "On failing call: Expected to receive header 'x-middleware: expected value', \
-                     but instead got: '{}'",
-                    value
-                );
-                return Err(Box::new(Status::internal(msg)));
-            }
-        }
-    }
-
-    // This call should succeed
-    descriptor.cmd = b"success".to_vec();
-    let resp = client.get_flight_info(Request::new(descriptor)).await?;
-
-    let headers = resp.metadata();
-    let middleware_header = headers.get("x-middleware");
-    let value = middleware_header.map(|v| v.to_str().unwrap()).unwrap_or("");
-
-    if value != "expected value" {
-        let msg = format!(
-            "On success call: Expected to receive header 'x-middleware: expected value', \
-            but instead got: '{}'",
-            value
-        );
-        return Err(Box::new(Status::internal(msg)));
-    }
-
-    Ok(())
-}
-
-#[allow(clippy::unnecessary_wraps)]
-fn middleware_interceptor(mut req: Request<()>) -> Result<Request<()>, Status> {
-    let metadata = req.metadata_mut();
-    metadata.insert("x-middleware", "expected value".parse().unwrap());
-    Ok(req)
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios.rs b/rust/integration-testing/src/flight_server_scenarios.rs
deleted file mode 100644
index 9163b692086..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::net::SocketAddr;
-
-use arrow_flight::{FlightEndpoint, Location, Ticket};
-use tokio::net::TcpListener;
-
-pub mod auth_basic_proto;
-pub mod integration_test;
-pub mod middleware;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn listen_on(port: &str) -> Result<SocketAddr> {
-    let addr: SocketAddr = format!("0.0.0.0:{}", port).parse()?;
-
-    let listener = TcpListener::bind(addr).await?;
-    let addr = listener.local_addr()?;
-
-    Ok(addr)
-}
-
-pub fn endpoint(ticket: &str, location_uri: impl Into<String>) -> FlightEndpoint {
-    FlightEndpoint {
-        ticket: Some(Ticket {
-            ticket: ticket.as_bytes().to_vec(),
-        }),
-        location: vec![Location {
-            uri: location_uri.into(),
-        }],
-    }
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs b/rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
deleted file mode 100644
index ea7ad3c3385..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
+++ /dev/null
@@ -1,225 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-use std::sync::Arc;
-
-use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, BasicAuth, Criteria, Empty, FlightData, FlightDescriptor,
-    FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
-use tokio::sync::Mutex;
-use tonic::{
-    metadata::MetadataMap, transport::Server, Request, Response, Status, Streaming,
-};
-type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-use prost::Message;
-
-use crate::{AUTH_PASSWORD, AUTH_USERNAME};
-
-pub async fn scenario_setup(port: &str) -> Result {
-    let service = AuthBasicProtoScenarioImpl {
-        username: AUTH_USERNAME.into(),
-        password: AUTH_PASSWORD.into(),
-        peer_identity: Arc::new(Mutex::new(None)),
-    };
-    let addr = super::listen_on(port).await?;
-    let svc = FlightServiceServer::new(service);
-
-    let server = Server::builder().add_service(svc).serve(addr);
-
-    // NOTE: Log output used in tests to signal server is ready
-    println!("Server listening on localhost:{}", addr.port());
-    server.await?;
-    Ok(())
-}
-
-#[derive(Clone)]
-pub struct AuthBasicProtoScenarioImpl {
-    username: Arc<str>,
-    password: Arc<str>,
-    peer_identity: Arc<Mutex<Option<String>>>,
-}
-
-impl AuthBasicProtoScenarioImpl {
-    async fn check_auth(
-        &self,
-        metadata: &MetadataMap,
-    ) -> Result<GrpcServerCallContext, Status> {
-        let token = metadata
-            .get_bin("auth-token-bin")
-            .and_then(|v| v.to_bytes().ok())
-            .and_then(|b| String::from_utf8(b.to_vec()).ok());
-        self.is_valid(token).await
-    }
-
-    async fn is_valid(
-        &self,
-        token: Option<String>,
-    ) -> Result<GrpcServerCallContext, Status> {
-        match token {
-            Some(t) if t == *self.username => Ok(GrpcServerCallContext {
-                peer_identity: self.username.to_string(),
-            }),
-            _ => Err(Status::unauthenticated("Invalid token")),
-        }
-    }
-}
-
-struct GrpcServerCallContext {
-    peer_identity: String,
-}
-
-impl GrpcServerCallContext {
-    pub fn peer_identity(&self) -> &str {
-        &self.peer_identity
-    }
-}
-
-#[tonic::async_trait]
-impl FlightService for AuthBasicProtoScenarioImpl {
-    type HandshakeStream = TonicStream<Result<HandshakeResponse, Status>>;
-    type ListFlightsStream = TonicStream<Result<FlightInfo, Status>>;
-    type DoGetStream = TonicStream<Result<FlightData, Status>>;
-    type DoPutStream = TonicStream<Result<PutResult, Status>>;
-    type DoActionStream = TonicStream<Result<arrow_flight::Result, Status>>;
-    type ListActionsStream = TonicStream<Result<ActionType, Status>>;
-    type DoExchangeStream = TonicStream<Result<FlightData, Status>>;
-
-    async fn get_schema(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn handshake(
-        &self,
-        request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        let (tx, rx) = mpsc::channel(10);
-
-        tokio::spawn({
-            let username = self.username.clone();
-            let password = self.password.clone();
-
-            async move {
-                let requests = request.into_inner();
-
-                requests
-                    .for_each(move |req| {
-                        let mut tx = tx.clone();
-                        let req = req.expect("Error reading handshake request");
-                        let HandshakeRequest { payload, .. } = req;
-
-                        let auth = BasicAuth::decode(&*payload)
-                            .expect("Error parsing handshake request");
-
-                        let resp = if *auth.username == *username
-                            && *auth.password == *password
-                        {
-                            Ok(HandshakeResponse {
-                                payload: username.as_bytes().to_vec(),
-                                ..HandshakeResponse::default()
-                            })
-                        } else {
-                            Err(Status::unauthenticated(format!(
-                                "Don't know user {}",
-                                auth.username
-                            )))
-                        };
-
-                        async move {
-                            tx.send(resp)
-                                .await
-                                .expect("Error sending handshake response");
-                        }
-                    })
-                    .await;
-            }
-        });
-
-        Ok(Response::new(Box::pin(rx)))
-    }
-
-    async fn list_flights(
-        &self,
-        request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_put(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        let flight_context = self.check_auth(request.metadata()).await?;
-        // Respond with the authenticated username.
-        let buf = flight_context.peer_identity().as_bytes().to_vec();
-        let result = arrow_flight::Result { body: buf };
-        let output = futures::stream::once(async { Ok(result) });
-        Ok(Response::new(Box::pin(output) as Self::DoActionStream))
-    }
-
-    async fn list_actions(
-        &self,
-        request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        self.check_auth(request.metadata()).await?;
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios/integration_test.rs b/rust/integration-testing/src/flight_server_scenarios/integration_test.rs
deleted file mode 100644
index ee42a47c9a4..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios/integration_test.rs
+++ /dev/null
@@ -1,385 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::convert::TryFrom;
-use std::pin::Pin;
-use std::sync::Arc;
-
-use arrow::{
-    array::ArrayRef,
-    datatypes::Schema,
-    datatypes::SchemaRef,
-    ipc::{self, reader},
-    record_batch::RecordBatch,
-};
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, PutResult, SchemaResult, Ticket,
-};
-use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
-use tokio::sync::Mutex;
-use tonic::{transport::Server, Request, Response, Status, Streaming};
-
-type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn scenario_setup(port: &str) -> Result {
-    let addr = super::listen_on(port).await?;
-
-    let service = FlightServiceImpl {
-        server_location: format!("grpc+tcp://{}", addr),
-        ..Default::default()
-    };
-    let svc = FlightServiceServer::new(service);
-
-    let server = Server::builder().add_service(svc).serve(addr);
-
-    // NOTE: Log output used in tests to signal server is ready
-    println!("Server listening on localhost:{}", addr.port());
-    server.await?;
-    Ok(())
-}
-
-#[derive(Debug, Clone)]
-struct IntegrationDataset {
-    schema: Schema,
-    chunks: Vec<RecordBatch>,
-}
-
-#[derive(Clone, Default)]
-pub struct FlightServiceImpl {
-    server_location: String,
-    uploaded_chunks: Arc<Mutex<HashMap<String, IntegrationDataset>>>,
-}
-
-impl FlightServiceImpl {
-    fn endpoint_from_path(&self, path: &str) -> FlightEndpoint {
-        super::endpoint(path, &self.server_location)
-    }
-}
-
-#[tonic::async_trait]
-impl FlightService for FlightServiceImpl {
-    type HandshakeStream = TonicStream<Result<HandshakeResponse, Status>>;
-    type ListFlightsStream = TonicStream<Result<FlightInfo, Status>>;
-    type DoGetStream = TonicStream<Result<FlightData, Status>>;
-    type DoPutStream = TonicStream<Result<PutResult, Status>>;
-    type DoActionStream = TonicStream<Result<arrow_flight::Result, Status>>;
-    type ListActionsStream = TonicStream<Result<ActionType, Status>>;
-    type DoExchangeStream = TonicStream<Result<FlightData, Status>>;
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        let ticket = request.into_inner();
-
-        let key = String::from_utf8(ticket.ticket.to_vec())
-            .map_err(|e| Status::invalid_argument(format!("Invalid ticket: {:?}", e)))?;
-
-        let uploaded_chunks = self.uploaded_chunks.lock().await;
-
-        let flight = uploaded_chunks.get(&key).ok_or_else(|| {
-            Status::not_found(format!("Could not find flight. {}", key))
-        })?;
-
-        let options = arrow::ipc::writer::IpcWriteOptions::default();
-
-        let schema = std::iter::once({
-            Ok(arrow_flight::utils::flight_data_from_arrow_schema(
-                &flight.schema,
-                &options,
-            ))
-        });
-
-        let batches = flight
-            .chunks
-            .iter()
-            .enumerate()
-            .flat_map(|(counter, batch)| {
-                let (dictionary_flight_data, mut batch_flight_data) =
-                    arrow_flight::utils::flight_data_from_arrow_batch(batch, &options);
-
-                // Only the record batch's FlightData gets app_metadata
-                let metadata = counter.to_string().into_bytes();
-                batch_flight_data.app_metadata = metadata;
-
-                dictionary_flight_data
-                    .into_iter()
-                    .chain(std::iter::once(batch_flight_data))
-                    .map(Ok)
-            });
-
-        let output = futures::stream::iter(schema.chain(batches).collect::<Vec<_>>());
-
-        Ok(Response::new(Box::pin(output) as Self::DoGetStream))
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        let descriptor = request.into_inner();
-
-        match descriptor.r#type {
-            t if t == DescriptorType::Path as i32 => {
-                let path = &descriptor.path;
-                if path.is_empty() {
-                    return Err(Status::invalid_argument("Invalid path"));
-                }
-
-                let uploaded_chunks = self.uploaded_chunks.lock().await;
-                let flight = uploaded_chunks.get(&path[0]).ok_or_else(|| {
-                    Status::not_found(format!("Could not find flight. {}", path[0]))
-                })?;
-
-                let endpoint = self.endpoint_from_path(&path[0]);
-
-                let total_records: usize =
-                    flight.chunks.iter().map(|chunk| chunk.num_rows()).sum();
-
-                let options = arrow::ipc::writer::IpcWriteOptions::default();
-                let schema = arrow_flight::utils::ipc_message_from_arrow_schema(
-                    &flight.schema,
-                    &options,
-                )
-                .expect(
-                    "Could not generate schema bytes from schema stored by a DoPut; \
-                         this should be impossible",
-                );
-
-                let info = FlightInfo {
-                    schema,
-                    flight_descriptor: Some(descriptor.clone()),
-                    endpoint: vec![endpoint],
-                    total_records: total_records as i64,
-                    total_bytes: -1,
-                };
-
-                Ok(Response::new(info))
-            }
-            other => Err(Status::unimplemented(format!("Request type: {}", other))),
-        }
-    }
-
-    async fn do_put(
-        &self,
-        request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        let mut input_stream = request.into_inner();
-        let flight_data = input_stream
-            .message()
-            .await?
-            .ok_or_else(|| Status::invalid_argument("Must send some FlightData"))?;
-
-        let descriptor = flight_data
-            .flight_descriptor
-            .clone()
-            .ok_or_else(|| Status::invalid_argument("Must have a descriptor"))?;
-
-        if descriptor.r#type != DescriptorType::Path as i32 || descriptor.path.is_empty()
-        {
-            return Err(Status::invalid_argument("Must specify a path"));
-        }
-
-        let key = descriptor.path[0].clone();
-
-        let schema = Schema::try_from(&flight_data)
-            .map_err(|e| Status::invalid_argument(format!("Invalid schema: {:?}", e)))?;
-        let schema_ref = Arc::new(schema.clone());
-
-        let (response_tx, response_rx) = mpsc::channel(10);
-
-        let uploaded_chunks = self.uploaded_chunks.clone();
-
-        tokio::spawn(async {
-            let mut error_tx = response_tx.clone();
-            if let Err(e) = save_uploaded_chunks(
-                uploaded_chunks,
-                schema_ref,
-                input_stream,
-                response_tx,
-                schema,
-                key,
-            )
-            .await
-            {
-                error_tx.send(Err(e)).await.expect("Error sending error")
-            }
-        });
-
-        Ok(Response::new(Box::pin(response_rx) as Self::DoPutStream))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
-
-async fn send_app_metadata(
-    tx: &mut mpsc::Sender<Result<PutResult, Status>>,
-    app_metadata: &[u8],
-) -> Result<(), Status> {
-    tx.send(Ok(PutResult {
-        app_metadata: app_metadata.to_vec(),
-    }))
-    .await
-    .map_err(|e| Status::internal(format!("Could not send PutResult: {:?}", e)))
-}
-
-async fn record_batch_from_message(
-    message: ipc::Message<'_>,
-    data_body: &[u8],
-    schema_ref: SchemaRef,
-    dictionaries_by_field: &[Option<ArrayRef>],
-) -> Result<RecordBatch, Status> {
-    let ipc_batch = message.header_as_record_batch().ok_or_else(|| {
-        Status::internal("Could not parse message header as record batch")
-    })?;
-
-    let arrow_batch_result = reader::read_record_batch(
-        data_body,
-        ipc_batch,
-        schema_ref,
-        &dictionaries_by_field,
-    );
-
-    arrow_batch_result.map_err(|e| {
-        Status::internal(format!("Could not convert to RecordBatch: {:?}", e))
-    })
-}
-
-async fn dictionary_from_message(
-    message: ipc::Message<'_>,
-    data_body: &[u8],
-    schema_ref: SchemaRef,
-    dictionaries_by_field: &mut [Option<ArrayRef>],
-) -> Result<(), Status> {
-    let ipc_batch = message.header_as_dictionary_batch().ok_or_else(|| {
-        Status::internal("Could not parse message header as dictionary batch")
-    })?;
-
-    let dictionary_batch_result =
-        reader::read_dictionary(data_body, ipc_batch, &schema_ref, dictionaries_by_field);
-    dictionary_batch_result.map_err(|e| {
-        Status::internal(format!("Could not convert to Dictionary: {:?}", e))
-    })
-}
-
-async fn save_uploaded_chunks(
-    uploaded_chunks: Arc<Mutex<HashMap<String, IntegrationDataset>>>,
-    schema_ref: Arc<Schema>,
-    mut input_stream: Streaming<FlightData>,
-    mut response_tx: mpsc::Sender<Result<PutResult, Status>>,
-    schema: Schema,
-    key: String,
-) -> Result<(), Status> {
-    let mut chunks = vec![];
-    let mut uploaded_chunks = uploaded_chunks.lock().await;
-
-    let mut dictionaries_by_field = vec![None; schema_ref.fields().len()];
-
-    while let Some(Ok(data)) = input_stream.next().await {
-        let message = arrow::ipc::root_as_message(&data.data_header[..])
-            .map_err(|e| Status::internal(format!("Could not parse message: {:?}", e)))?;
-
-        match message.header_type() {
-            ipc::MessageHeader::Schema => {
-                return Err(Status::internal(
-                    "Not expecting a schema when messages are read",
-                ))
-            }
-            ipc::MessageHeader::RecordBatch => {
-                send_app_metadata(&mut response_tx, &data.app_metadata).await?;
-
-                let batch = record_batch_from_message(
-                    message,
-                    &data.data_body,
-                    schema_ref.clone(),
-                    &dictionaries_by_field,
-                )
-                .await?;
-
-                chunks.push(batch);
-            }
-            ipc::MessageHeader::DictionaryBatch => {
-                dictionary_from_message(
-                    message,
-                    &data.data_body,
-                    schema_ref.clone(),
-                    &mut dictionaries_by_field,
-                )
-                .await?;
-            }
-            t => {
-                return Err(Status::internal(format!(
-                    "Reading types other than record batches not yet supported, \
-                                              unable to read {:?}",
-                    t
-                )));
-            }
-        }
-    }
-
-    let dataset = IntegrationDataset { schema, chunks };
-    uploaded_chunks.insert(key, dataset);
-
-    Ok(())
-}
diff --git a/rust/integration-testing/src/flight_server_scenarios/middleware.rs b/rust/integration-testing/src/flight_server_scenarios/middleware.rs
deleted file mode 100644
index 1416acc4088..00000000000
--- a/rust/integration-testing/src/flight_server_scenarios/middleware.rs
+++ /dev/null
@@ -1,150 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::pin::Pin;
-
-use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
-    PutResult, SchemaResult, Ticket,
-};
-use futures::Stream;
-use tonic::{transport::Server, Request, Response, Status, Streaming};
-
-type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
-
-type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
-type Result<T = (), E = Error> = std::result::Result<T, E>;
-
-pub async fn scenario_setup(port: &str) -> Result {
-    let service = MiddlewareScenarioImpl {};
-    let svc = FlightServiceServer::new(service);
-    let addr = super::listen_on(port).await?;
-
-    let server = Server::builder().add_service(svc).serve(addr);
-
-    // NOTE: Log output used in tests to signal server is ready
-    println!("Server listening on localhost:{}", addr.port());
-    server.await?;
-    Ok(())
-}
-
-#[derive(Clone, Default)]
-pub struct MiddlewareScenarioImpl {}
-
-#[tonic::async_trait]
-impl FlightService for MiddlewareScenarioImpl {
-    type HandshakeStream = TonicStream<Result<HandshakeResponse, Status>>;
-    type ListFlightsStream = TonicStream<Result<FlightInfo, Status>>;
-    type DoGetStream = TonicStream<Result<FlightData, Status>>;
-    type DoPutStream = TonicStream<Result<PutResult, Status>>;
-    type DoActionStream = TonicStream<Result<arrow_flight::Result, Status>>;
-    type ListActionsStream = TonicStream<Result<ActionType, Status>>;
-    type DoExchangeStream = TonicStream<Result<FlightData, Status>>;
-
-    async fn get_schema(
-        &self,
-        _request: Request<FlightDescriptor>,
-    ) -> Result<Response<SchemaResult>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_get(
-        &self,
-        _request: Request<Ticket>,
-    ) -> Result<Response<Self::DoGetStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn handshake(
-        &self,
-        _request: Request<Streaming<HandshakeRequest>>,
-    ) -> Result<Response<Self::HandshakeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_flights(
-        &self,
-        _request: Request<Criteria>,
-    ) -> Result<Response<Self::ListFlightsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn get_flight_info(
-        &self,
-        request: Request<FlightDescriptor>,
-    ) -> Result<Response<FlightInfo>, Status> {
-        let middleware_header = request.metadata().get("x-middleware").cloned();
-
-        let descriptor = request.into_inner();
-
-        if descriptor.r#type == DescriptorType::Cmd as i32 && descriptor.cmd == b"success"
-        {
-            // Return a fake location - the test doesn't read it
-            let endpoint = super::endpoint("foo", "grpc+tcp://localhost:10010");
-
-            let info = FlightInfo {
-                flight_descriptor: Some(descriptor),
-                endpoint: vec![endpoint],
-                ..Default::default()
-            };
-
-            let mut response = Response::new(info);
-            if let Some(value) = middleware_header {
-                response.metadata_mut().insert("x-middleware", value);
-            }
-
-            return Ok(response);
-        }
-
-        let mut status = Status::unknown("Unknown");
-        if let Some(value) = middleware_header {
-            status.metadata_mut().insert("x-middleware", value);
-        }
-
-        Err(status)
-    }
-
-    async fn do_put(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoPutStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_action(
-        &self,
-        _request: Request<Action>,
-    ) -> Result<Response<Self::DoActionStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn list_actions(
-        &self,
-        _request: Request<Empty>,
-    ) -> Result<Response<Self::ListActionsStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-
-    async fn do_exchange(
-        &self,
-        _request: Request<Streaming<FlightData>>,
-    ) -> Result<Response<Self::DoExchangeStream>, Status> {
-        Err(Status::unimplemented("Not yet implemented"))
-    }
-}
diff --git a/rust/integration-testing/src/lib.rs b/rust/integration-testing/src/lib.rs
deleted file mode 100644
index 22eed0395c5..00000000000
--- a/rust/integration-testing/src/lib.rs
+++ /dev/null
@@ -1,601 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common code used in the integration test binaries
-
-use hex::decode;
-use serde_json::Value;
-
-use arrow::util::integration_util::ArrowJsonBatch;
-
-use arrow::array::*;
-use arrow::datatypes::{DataType, Field, IntervalUnit, Schema};
-use arrow::error::{ArrowError, Result};
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    buffer::Buffer,
-    buffer::MutableBuffer,
-    datatypes::ToByteSlice,
-    util::{bit_util, integration_util::*},
-};
-
-use std::collections::HashMap;
-use std::fs::File;
-use std::io::BufReader;
-use std::sync::Arc;
-
-/// The expected username for the basic auth integration test.
-pub const AUTH_USERNAME: &str = "arrow";
-/// The expected password for the basic auth integration test.
-pub const AUTH_PASSWORD: &str = "flight";
-
-pub mod flight_client_scenarios;
-pub mod flight_server_scenarios;
-
-pub struct ArrowFile {
-    pub schema: Schema,
-    // we can evolve this into a concrete Arrow type
-    // this is temporarily not being read from
-    pub _dictionaries: HashMap<i64, ArrowJsonDictionaryBatch>,
-    pub batches: Vec<RecordBatch>,
-}
-
-pub fn read_json_file(json_name: &str) -> Result<ArrowFile> {
-    let json_file = File::open(json_name)?;
-    let reader = BufReader::new(json_file);
-    let arrow_json: Value = serde_json::from_reader(reader).unwrap();
-    let schema = Schema::from(&arrow_json["schema"])?;
-    // read dictionaries
-    let mut dictionaries = HashMap::new();
-    if let Some(dicts) = arrow_json.get("dictionaries") {
-        for d in dicts
-            .as_array()
-            .expect("Unable to get dictionaries as array")
-        {
-            let json_dict: ArrowJsonDictionaryBatch = serde_json::from_value(d.clone())
-                .expect("Unable to get dictionary from JSON");
-            // TODO: convert to a concrete Arrow type
-            dictionaries.insert(json_dict.id, json_dict);
-        }
-    }
-
-    let mut batches = vec![];
-    for b in arrow_json["batches"].as_array().unwrap() {
-        let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
-        let batch = record_batch_from_json(&schema, json_batch, Some(&dictionaries))?;
-        batches.push(batch);
-    }
-    Ok(ArrowFile {
-        schema,
-        _dictionaries: dictionaries,
-        batches,
-    })
-}
-
-fn record_batch_from_json(
-    schema: &Schema,
-    json_batch: ArrowJsonBatch,
-    json_dictionaries: Option<&HashMap<i64, ArrowJsonDictionaryBatch>>,
-) -> Result<RecordBatch> {
-    let mut columns = vec![];
-
-    for (field, json_col) in schema.fields().iter().zip(json_batch.columns) {
-        let col = array_from_json(field, json_col, json_dictionaries)?;
-        columns.push(col);
-    }
-
-    RecordBatch::try_new(Arc::new(schema.clone()), columns)
-}
-
-/// Construct an Arrow array from a partially typed JSON column
-fn array_from_json(
-    field: &Field,
-    json_col: ArrowJsonColumn,
-    dictionaries: Option<&HashMap<i64, ArrowJsonDictionaryBatch>>,
-) -> Result<ArrayRef> {
-    match field.data_type() {
-        DataType::Null => Ok(Arc::new(NullArray::new(json_col.count))),
-        DataType::Boolean => {
-            let mut b = BooleanBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_bool().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Int8 => {
-            let mut b = Int8Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_i64().ok_or_else(|| {
-                        ArrowError::JsonError(format!(
-                            "Unable to get {:?} as int64",
-                            value
-                        ))
-                    })? as i8),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Int16 => {
-            let mut b = Int16Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_i64().unwrap() as i16),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Int32
-        | DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => {
-            let mut b = Int32Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_i64().unwrap() as i32),
-                    _ => b.append_null(),
-                }?;
-            }
-            let array = Arc::new(b.finish()) as ArrayRef;
-            arrow::compute::cast(&array, field.data_type())
-        }
-        DataType::Int64
-        | DataType::Date64
-        | DataType::Time64(_)
-        | DataType::Timestamp(_, _)
-        | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => {
-            let mut b = Int64Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(match value {
-                        Value::Number(n) => n.as_i64().unwrap(),
-                        Value::String(s) => {
-                            s.parse().expect("Unable to parse string as i64")
-                        }
-                        _ => panic!("Unable to parse {:?} as number", value),
-                    }),
-                    _ => b.append_null(),
-                }?;
-            }
-            let array = Arc::new(b.finish()) as ArrayRef;
-            arrow::compute::cast(&array, field.data_type())
-        }
-        DataType::UInt8 => {
-            let mut b = UInt8Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_u64().unwrap() as u8),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::UInt16 => {
-            let mut b = UInt16Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_u64().unwrap() as u16),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::UInt32 => {
-            let mut b = UInt32Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_u64().unwrap() as u32),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::UInt64 => {
-            let mut b = UInt64Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(
-                        value
-                            .as_str()
-                            .unwrap()
-                            .parse()
-                            .expect("Unable to parse string as u64"),
-                    ),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Float32 => {
-            let mut b = Float32Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_f64().unwrap() as f32),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Float64 => {
-            let mut b = Float64Builder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_f64().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Binary => {
-            let mut b = BinaryBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => {
-                        let v = decode(value.as_str().unwrap()).unwrap();
-                        b.append_value(&v)
-                    }
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::LargeBinary => {
-            let mut b = LargeBinaryBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => {
-                        let v = decode(value.as_str().unwrap()).unwrap();
-                        b.append_value(&v)
-                    }
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::Utf8 => {
-            let mut b = StringBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_str().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::LargeUtf8 => {
-            let mut b = LargeStringBuilder::new(json_col.count);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => b.append_value(value.as_str().unwrap()),
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::FixedSizeBinary(len) => {
-            let mut b = FixedSizeBinaryBuilder::new(json_col.count, *len);
-            for (is_valid, value) in json_col
-                .validity
-                .as_ref()
-                .unwrap()
-                .iter()
-                .zip(json_col.data.unwrap())
-            {
-                match is_valid {
-                    1 => {
-                        let v = hex::decode(value.as_str().unwrap()).unwrap();
-                        b.append_value(&v)
-                    }
-                    _ => b.append_null(),
-                }?;
-            }
-            Ok(Arc::new(b.finish()))
-        }
-        DataType::List(child_field) => {
-            let null_buf = create_null_buf(&json_col);
-            let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                &child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
-            let offsets: Vec<i32> = json_col
-                .offset
-                .unwrap()
-                .iter()
-                .map(|v| v.as_i64().unwrap() as i32)
-                .collect();
-            let list_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .offset(0)
-                .add_buffer(Buffer::from(&offsets.to_byte_slice()))
-                .add_child_data(child_array.data().clone())
-                .null_bit_buffer(null_buf)
-                .build();
-            Ok(Arc::new(ListArray::from(list_data)))
-        }
-        DataType::LargeList(child_field) => {
-            let null_buf = create_null_buf(&json_col);
-            let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                &child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
-            let offsets: Vec<i64> = json_col
-                .offset
-                .unwrap()
-                .iter()
-                .map(|v| match v {
-                    Value::Number(n) => n.as_i64().unwrap(),
-                    Value::String(s) => s.parse::<i64>().unwrap(),
-                    _ => panic!("64-bit offset must be either string or number"),
-                })
-                .collect();
-            let list_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .offset(0)
-                .add_buffer(Buffer::from(&offsets.to_byte_slice()))
-                .add_child_data(child_array.data().clone())
-                .null_bit_buffer(null_buf)
-                .build();
-            Ok(Arc::new(LargeListArray::from(list_data)))
-        }
-        DataType::FixedSizeList(child_field, _) => {
-            let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                &child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
-            let null_buf = create_null_buf(&json_col);
-            let list_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .add_child_data(child_array.data().clone())
-                .null_bit_buffer(null_buf)
-                .build();
-            Ok(Arc::new(FixedSizeListArray::from(list_data)))
-        }
-        DataType::Struct(fields) => {
-            // construct struct with null data
-            let null_buf = create_null_buf(&json_col);
-            let mut array_data = ArrayData::builder(field.data_type().clone())
-                .len(json_col.count)
-                .null_bit_buffer(null_buf);
-
-            for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
-                let array = array_from_json(field, col, dictionaries)?;
-                array_data = array_data.add_child_data(array.data().clone());
-            }
-
-            let array = StructArray::from(array_data.build());
-            Ok(Arc::new(array))
-        }
-        DataType::Dictionary(key_type, value_type) => {
-            let dict_id = field.dict_id().ok_or_else(|| {
-                ArrowError::JsonError(format!(
-                    "Unable to find dict_id for field {:?}",
-                    field
-                ))
-            })?;
-            // find dictionary
-            let dictionary = dictionaries
-                .ok_or_else(|| {
-                    ArrowError::JsonError(format!(
-                        "Unable to find any dictionaries for field {:?}",
-                        field
-                    ))
-                })?
-                .get(&dict_id);
-            match dictionary {
-                Some(dictionary) => dictionary_array_from_json(
-                    field, json_col, key_type, value_type, dictionary,
-                ),
-                None => Err(ArrowError::JsonError(format!(
-                    "Unable to find dictionary for field {:?}",
-                    field
-                ))),
-            }
-        }
-        t => Err(ArrowError::JsonError(format!(
-            "data type {:?} not supported",
-            t
-        ))),
-    }
-}
-
-fn dictionary_array_from_json(
-    field: &Field,
-    json_col: ArrowJsonColumn,
-    dict_key: &DataType,
-    dict_value: &DataType,
-    dictionary: &ArrowJsonDictionaryBatch,
-) -> Result<ArrayRef> {
-    match dict_key {
-        DataType::Int8
-        | DataType::Int16
-        | DataType::Int32
-        | DataType::Int64
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64 => {
-            let null_buf = create_null_buf(&json_col);
-
-            // build the key data into a buffer, then construct values separately
-            let key_field = Field::new_dict(
-                "key",
-                dict_key.clone(),
-                field.is_nullable(),
-                field
-                    .dict_id()
-                    .expect("Dictionary fields must have a dict_id value"),
-                field
-                    .dict_is_ordered()
-                    .expect("Dictionary fields must have a dict_is_ordered value"),
-            );
-            let keys = array_from_json(&key_field, json_col, None)?;
-            // note: not enough info on nullability of dictionary
-            let value_field = Field::new("value", dict_value.clone(), true);
-            println!("dictionary value type: {:?}", dict_value);
-            let values =
-                array_from_json(&value_field, dictionary.data.columns[0].clone(), None)?;
-
-            // convert key and value to dictionary data
-            let dict_data = ArrayData::builder(field.data_type().clone())
-                .len(keys.len())
-                .add_buffer(keys.data().buffers()[0].clone())
-                .null_bit_buffer(null_buf)
-                .add_child_data(values.data().clone())
-                .build();
-
-            let array = match dict_key {
-                DataType::Int8 => {
-                    Arc::new(Int8DictionaryArray::from(dict_data)) as ArrayRef
-                }
-                DataType::Int16 => Arc::new(Int16DictionaryArray::from(dict_data)),
-                DataType::Int32 => Arc::new(Int32DictionaryArray::from(dict_data)),
-                DataType::Int64 => Arc::new(Int64DictionaryArray::from(dict_data)),
-                DataType::UInt8 => Arc::new(UInt8DictionaryArray::from(dict_data)),
-                DataType::UInt16 => Arc::new(UInt16DictionaryArray::from(dict_data)),
-                DataType::UInt32 => Arc::new(UInt32DictionaryArray::from(dict_data)),
-                DataType::UInt64 => Arc::new(UInt64DictionaryArray::from(dict_data)),
-                _ => unreachable!(),
-            };
-            Ok(array)
-        }
-        _ => Err(ArrowError::JsonError(format!(
-            "Dictionary key type {:?} not supported",
-            dict_key
-        ))),
-    }
-}
-
-/// A helper to create a null buffer from a Vec<bool>
-fn create_null_buf(json_col: &ArrowJsonColumn) -> Buffer {
-    let num_bytes = bit_util::ceil(json_col.count, 8);
-    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
-    json_col
-        .validity
-        .clone()
-        .unwrap()
-        .iter()
-        .enumerate()
-        .for_each(|(i, v)| {
-            let null_slice = null_buf.as_slice_mut();
-            if *v != 0 {
-                bit_util::set_bit(null_slice, i);
-            }
-        });
-    null_buf.into()
-}
diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
deleted file mode 100644
index b0c91555de0..00000000000
--- a/rust/parquet/Cargo.toml
+++ /dev/null
@@ -1,78 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "parquet"
-version = "5.0.0-SNAPSHOT"
-license = "Apache-2.0"
-description = "Apache Parquet implementation in Rust"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-keywords = [ "arrow", "parquet", "hadoop" ]
-readme = "README.md"
-build = "build.rs"
-edition = "2018"
-
-[dependencies]
-# update note: pin `parquet-format` to specific version until it does not break at minor
-# version, see ARROW-11187.
-parquet-format = "~2.6.1"
-byteorder = "1"
-thrift = "0.13"
-snap = { version = "1.0", optional = true }
-brotli = { version = "3.3", optional = true }
-flate2 = { version = "1.0", optional = true }
-lz4 = { version = "1.23", optional = true }
-zstd = { version = "0.7", optional = true }
-chrono = "0.4"
-num-bigint = "0.3"
-arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", optional = true }
-base64 = { version = "0.12", optional = true }
-clap = { version = "2.33.3", optional = true }
-serde_json = { version = "1.0", features = ["preserve_order"], optional = true }
-
-[dev-dependencies]
-criterion = "0.3"
-rand = "0.8"
-snap = "1.0"
-brotli = "3.3"
-flate2 = "1.0"
-lz4 = "1.23"
-zstd = "0.7"
-arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
-serde_json = { version = "1.0", features = ["preserve_order"] }
-
-[features]
-default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
-cli = ["serde_json", "base64", "clap"]
-
-[[ bin ]]
-name = "parquet-read"
-required-features = ["cli"]
-
-[[ bin ]]
-name = "parquet-schema"
-required-features = ["cli"]
-
-[[ bin ]]
-name = "parquet-rowcount"
-required-features = ["cli"]
-
-[[bench]]
-name = "arrow_writer"
-harness = false
diff --git a/rust/parquet/README.md b/rust/parquet/README.md
deleted file mode 100644
index 836a23bbc12..00000000000
--- a/rust/parquet/README.md
+++ /dev/null
@@ -1,126 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# An Apache Parquet implementation in Rust
-
-## Usage
-Add this to your Cargo.toml:
-```toml
-[dependencies]
-parquet = "5.0.0-SNAPSHOT"
-```
-
-and this to your crate root:
-```rust
-extern crate parquet;
-```
-
-Example usage of reading data:
-```rust
-use std::fs::File;
-use std::path::Path;
-use parquet::file::reader::{FileReader, SerializedFileReader};
-
-let file = File::open(&Path::new("/path/to/file")).unwrap();
-let reader = SerializedFileReader::new(file).unwrap();
-let mut iter = reader.get_row_iter(None).unwrap();
-while let Some(record) = iter.next() {
-    println!("{}", record);
-}
-```
-See [crate documentation](https://docs.rs/crate/parquet/5.0.0-SNAPSHOT) on available API.
-
-## Upgrading from versions prior to 4.0
-
-If you are upgrading from version 3.0 or previous of this crate, you
-likely need to change your code to use [`ConvertedType`] rather than
-[`LogicalType`] to preserve existing behaviour in your code.
-
-Version 2.4.0 of the Parquet format introduced a `LogicalType` to replace the existing `ConvertedType`.
-This crate used `parquet::basic::LogicalType` to map to the `ConvertedType`, but this has been renamed to `parquet::basic::ConvertedType` from version 4.0 of this crate.
-
-The `ConvertedType` is deprecated in the format, but is still written
-to preserve backward compatibility.
-It is preferred that `LogicalType` is used, as it supports nanosecond
-precision timestamps without using the deprecated `Int96` Parquet type.
-
-## Supported Parquet Version
-- Parquet-format 2.6.0
-
-To update Parquet format to a newer version, check if [parquet-format](https://github.com/sunchao/parquet-format-rs)
-version is available. Then simply update version of `parquet-format` crate in Cargo.toml.
-
-## Features
-- [X] All encodings supported
-- [X] All compression codecs supported
-- [X] Read support
-  - [X] Primitive column value readers
-  - [X] Row record reader
-  - [X] Arrow record reader
-- [ ] Statistics support
-- [X] Write support
-  - [X] Primitive column value writers
-  - [ ] Row record writer
-  - [X] Arrow record writer
-- [ ] Predicate pushdown
-- [X] Parquet format 2.6.0 support
-
-## Requirements
-
-Parquet requires LLVM.  Our windows CI image includes LLVM but to build the libraries locally windows
-users will have to install LLVM. Follow [this](https://github.com/appveyor/ci/issues/2651) link for info.
-
-## Build
-Run `cargo build` or `cargo build --release` to build in release mode.
-Some features take advantage of SSE4.2 instructions, which can be
-enabled by adding `RUSTFLAGS="-C target-feature=+sse4.2"` before the
-`cargo build` command.
-
-## Test
-Run `cargo test` for unit tests. To also run tests related to the binaries, use `cargo test --features cli`.
-
-## Binaries
-The following binaries are provided (use `cargo install --features cli` to install them):
-- **parquet-schema** for printing Parquet file schema and metadata.
-`Usage: parquet-schema <file-path>`, where `file-path` is the path to a Parquet file. Use `-v/--verbose` flag
-to print full metadata or schema only (when not specified only schema will be printed).
-
-- **parquet-read** for reading records from a Parquet file.
-`Usage: parquet-read <file-path> [num-records]`, where `file-path` is the path to a Parquet file,
-and `num-records` is the number of records to read from a file (when not specified all records will
-be printed). Use `-j/--json` to print records in JSON lines format.
-
-- **parquet-rowcount** for reporting the number of records in one or more Parquet files.
-`Usage: parquet-rowcount <file-paths>...`, where `<file-paths>...` is a space separated list of one or more
-files to read.
-
-If you see `Library not loaded` error, please make sure `LD_LIBRARY_PATH` is set properly:
-```
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(rustc --print sysroot)/lib
-```
-
-## Benchmarks
-Run `cargo bench` for benchmarks.
-
-## Docs
-To build documentation, run `cargo doc --no-deps`.
-To compile and view in the browser, run `cargo doc --no-deps --open`.
-
-## License
-Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/rust/parquet/benches/arrow_writer.rs b/rust/parquet/benches/arrow_writer.rs
deleted file mode 100644
index 069ed39d103..00000000000
--- a/rust/parquet/benches/arrow_writer.rs
+++ /dev/null
@@ -1,202 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use criterion::{Criterion, Throughput};
-
-extern crate arrow;
-extern crate parquet;
-
-use std::sync::Arc;
-
-use arrow::datatypes::*;
-use arrow::{record_batch::RecordBatch, util::data_gen::*};
-use parquet::{
-    arrow::ArrowWriter, errors::Result, file::writer::InMemoryWriteableCursor,
-};
-
-fn create_primitive_bench_batch(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<RecordBatch> {
-    let fields = vec![
-        Field::new("_1", DataType::Int8, true),
-        Field::new("_2", DataType::Int16, true),
-        Field::new("_3", DataType::Int32, true),
-        Field::new("_4", DataType::Int64, true),
-        Field::new("_5", DataType::UInt8, true),
-        Field::new("_6", DataType::UInt16, true),
-        Field::new("_7", DataType::UInt32, true),
-        Field::new("_8", DataType::UInt64, true),
-        Field::new("_9", DataType::Float32, true),
-        Field::new("_10", DataType::Float64, true),
-        Field::new("_11", DataType::Date32, true),
-        Field::new("_12", DataType::Date64, true),
-        Field::new("_13", DataType::Time32(TimeUnit::Second), true),
-        Field::new("_14", DataType::Time32(TimeUnit::Millisecond), true),
-        Field::new("_15", DataType::Time64(TimeUnit::Microsecond), true),
-        Field::new("_16", DataType::Time64(TimeUnit::Nanosecond), true),
-        Field::new("_17", DataType::Utf8, true),
-        Field::new("_18", DataType::LargeUtf8, true),
-        Field::new("_19", DataType::Boolean, true),
-    ];
-    let schema = Schema::new(fields);
-    Ok(create_random_batch(
-        Arc::new(schema),
-        size,
-        null_density,
-        true_density,
-    )?)
-}
-
-fn _create_nested_bench_batch(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> Result<RecordBatch> {
-    let fields = vec![
-        Field::new(
-            "_1",
-            DataType::Struct(vec![
-                Field::new("_1", DataType::Int8, true),
-                Field::new(
-                    "_2",
-                    DataType::Struct(vec![
-                        Field::new("_1", DataType::Int8, true),
-                        Field::new(
-                            "_1",
-                            DataType::Struct(vec![
-                                Field::new("_1", DataType::Int8, true),
-                                Field::new("_2", DataType::Utf8, true),
-                            ]),
-                            true,
-                        ),
-                        Field::new("_2", DataType::UInt8, true),
-                    ]),
-                    true,
-                ),
-            ]),
-            true,
-        ),
-        Field::new(
-            "_2",
-            DataType::LargeList(Box::new(Field::new(
-                "item",
-                DataType::List(Box::new(Field::new(
-                    "item",
-                    DataType::Struct(vec![
-                        Field::new(
-                            "_1",
-                            DataType::Struct(vec![
-                                Field::new("_1", DataType::Int8, true),
-                                Field::new("_2", DataType::Int16, true),
-                                Field::new("_3", DataType::Int32, true),
-                            ]),
-                            true,
-                        ),
-                        Field::new(
-                            "_2",
-                            DataType::List(Box::new(Field::new(
-                                "",
-                                DataType::FixedSizeBinary(2),
-                                true,
-                            ))),
-                            true,
-                        ),
-                    ]),
-                    true,
-                ))),
-                true,
-            ))),
-            true,
-        ),
-    ];
-    let schema = Schema::new(fields);
-    Ok(create_random_batch(
-        Arc::new(schema),
-        size,
-        null_density,
-        true_density,
-    )?)
-}
-
-#[inline]
-fn write_batch(batch: &RecordBatch) -> Result<()> {
-    // Write batch to an in-memory writer
-    let cursor = InMemoryWriteableCursor::default();
-    let mut writer = ArrowWriter::try_new(cursor, batch.schema(), None)?;
-
-    writer.write(&batch)?;
-    writer.close()?;
-    Ok(())
-}
-
-fn bench_primitive_writer(c: &mut Criterion) {
-    let batch = create_primitive_bench_batch(1024, 0.25, 0.75).unwrap();
-    let mut group = c.benchmark_group("write_batch primitive");
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("1024 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    group.finish();
-}
-
-// This bench triggers a write error, it is ignored for now
-fn _bench_nested_writer(c: &mut Criterion) {
-    let batch = _create_nested_bench_batch(1024, 0.25, 0.75).unwrap();
-    let mut group = c.benchmark_group("write_batch nested");
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("1024 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values", |b| b.iter(|| write_batch(&batch).unwrap()));
-
-    group.finish();
-}
-
-criterion_group!(benches, bench_primitive_writer);
-criterion_main!(benches);
diff --git a/rust/parquet/build.rs b/rust/parquet/build.rs
deleted file mode 100644
index b42b2a4babf..00000000000
--- a/rust/parquet/build.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::process::Command;
-
-fn main() {
-    // Set Parquet version, build hash and "created by" string.
-    let version = env!("CARGO_PKG_VERSION");
-    let mut created_by = format!("parquet-rs version {}", version);
-    if let Ok(git_hash) = run(Command::new("git").arg("rev-parse").arg("HEAD")) {
-        created_by.push_str(format!(" (build {})", git_hash).as_str());
-        println!("cargo:rustc-env=PARQUET_BUILD={}", git_hash);
-    }
-    println!("cargo:rustc-env=PARQUET_VERSION={}", version);
-    println!("cargo:rustc-env=PARQUET_CREATED_BY={}", created_by);
-}
-
-/// Runs command and returns either content of stdout for successful execution,
-/// or an error message otherwise.
-fn run(command: &mut Command) -> Result<String, String> {
-    println!("Running: `{:?}`", command);
-    match command.output() {
-        Ok(ref output) if output.status.success() => {
-            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
-        }
-        Ok(ref output) => Err(format!("Failed: `{:?}` ({})", command, output.status)),
-        Err(error) => Err(format!("Failed: `{:?}` ({})", command, error)),
-    }
-}
diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs
deleted file mode 100644
index a906147a8f9..00000000000
--- a/rust/parquet/src/arrow/array_reader.rs
+++ /dev/null
@@ -1,2530 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::cmp::{max, min};
-use std::collections::{HashMap, HashSet};
-use std::marker::PhantomData;
-use std::mem::size_of;
-use std::result::Result::Ok;
-use std::sync::Arc;
-use std::vec::Vec;
-
-use arrow::array::{
-    new_empty_array, Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryArray,
-    BinaryBuilder, BooleanArray, BooleanBufferBuilder, BooleanBuilder, DecimalBuilder,
-    FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder,
-    Int32Array, Int64Array, OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder,
-    StringArray, StringBuilder, StructArray,
-};
-use arrow::buffer::{Buffer, MutableBuffer};
-use arrow::datatypes::{
-    ArrowPrimitiveType, BooleanType as ArrowBooleanType, DataType as ArrowType,
-    Date32Type as ArrowDate32Type, Date64Type as ArrowDate64Type,
-    DurationMicrosecondType as ArrowDurationMicrosecondType,
-    DurationMillisecondType as ArrowDurationMillisecondType,
-    DurationNanosecondType as ArrowDurationNanosecondType,
-    DurationSecondType as ArrowDurationSecondType, Field,
-    Float32Type as ArrowFloat32Type, Float64Type as ArrowFloat64Type,
-    Int16Type as ArrowInt16Type, Int32Type as ArrowInt32Type,
-    Int64Type as ArrowInt64Type, Int8Type as ArrowInt8Type, IntervalUnit, Schema,
-    Time32MillisecondType as ArrowTime32MillisecondType,
-    Time32SecondType as ArrowTime32SecondType,
-    Time64MicrosecondType as ArrowTime64MicrosecondType,
-    Time64NanosecondType as ArrowTime64NanosecondType, TimeUnit as ArrowTimeUnit,
-    TimestampMicrosecondType as ArrowTimestampMicrosecondType,
-    TimestampMillisecondType as ArrowTimestampMillisecondType,
-    TimestampNanosecondType as ArrowTimestampNanosecondType,
-    TimestampSecondType as ArrowTimestampSecondType, ToByteSlice,
-    UInt16Type as ArrowUInt16Type, UInt32Type as ArrowUInt32Type,
-    UInt64Type as ArrowUInt64Type, UInt8Type as ArrowUInt8Type,
-};
-use arrow::util::bit_util;
-
-use crate::arrow::converter::{
-    BinaryArrayConverter, BinaryConverter, Converter, DecimalArrayConverter,
-    DecimalConverter, FixedLenBinaryConverter, FixedSizeArrayConverter,
-    Int96ArrayConverter, Int96Converter, IntervalDayTimeArrayConverter,
-    IntervalDayTimeConverter, IntervalYearMonthArrayConverter,
-    IntervalYearMonthConverter, LargeBinaryArrayConverter, LargeBinaryConverter,
-    LargeUtf8ArrayConverter, LargeUtf8Converter, Utf8ArrayConverter, Utf8Converter,
-};
-use crate::arrow::record_reader::RecordReader;
-use crate::arrow::schema::parquet_to_arrow_field;
-use crate::basic::{ConvertedType, Repetition, Type as PhysicalType};
-use crate::column::page::PageIterator;
-use crate::column::reader::ColumnReaderImpl;
-use crate::data_type::{
-    BoolType, ByteArrayType, DataType, DoubleType, FixedLenByteArrayType, FloatType,
-    Int32Type, Int64Type, Int96Type,
-};
-use crate::errors::{ParquetError, ParquetError::ArrowError, Result};
-use crate::file::reader::{FilePageIterator, FileReader};
-use crate::schema::types::{
-    ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, Type, TypePtr,
-};
-use crate::schema::visitor::TypeVisitor;
-use std::any::Any;
-
-/// Array reader reads parquet data into arrow array.
-pub trait ArrayReader {
-    fn as_any(&self) -> &dyn Any;
-
-    /// Returns the arrow type of this array reader.
-    fn get_data_type(&self) -> &ArrowType;
-
-    /// Reads at most `batch_size` records into an arrow array and return it.
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef>;
-
-    /// Returns the definition levels of data from last call of `next_batch`.
-    /// The result is used by parent array reader to calculate its own definition
-    /// levels and repetition levels, so that its parent can calculate null bitmap.
-    fn get_def_levels(&self) -> Option<&[i16]>;
-
-    /// Return the repetition levels of data from last call of `next_batch`.
-    /// The result is used by parent array reader to calculate its own definition
-    /// levels and repetition levels, so that its parent can calculate null bitmap.
-    fn get_rep_levels(&self) -> Option<&[i16]>;
-}
-
-/// A NullArrayReader reads Parquet columns stored as null int32s with an Arrow
-/// NullArray type.
-pub struct NullArrayReader<T: DataType> {
-    data_type: ArrowType,
-    pages: Box<dyn PageIterator>,
-    def_levels_buffer: Option<Buffer>,
-    rep_levels_buffer: Option<Buffer>,
-    column_desc: ColumnDescPtr,
-    record_reader: RecordReader<T>,
-    _type_marker: PhantomData<T>,
-}
-
-impl<T: DataType> NullArrayReader<T> {
-    /// Construct null array reader.
-    pub fn new(
-        mut pages: Box<dyn PageIterator>,
-        column_desc: ColumnDescPtr,
-    ) -> Result<Self> {
-        let mut record_reader = RecordReader::<T>::new(column_desc.clone());
-        if let Some(page_reader) = pages.next() {
-            record_reader.set_page_reader(page_reader?)?;
-        }
-
-        Ok(Self {
-            data_type: ArrowType::Null,
-            pages,
-            def_levels_buffer: None,
-            rep_levels_buffer: None,
-            column_desc,
-            record_reader,
-            _type_marker: PhantomData,
-        })
-    }
-}
-
-/// Implementation of primitive array reader.
-impl<T: DataType> ArrayReader for NullArrayReader<T> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type of primitive array.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    /// Reads at most `batch_size` records into array.
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        let mut records_read = 0usize;
-        while records_read < batch_size {
-            let records_to_read = batch_size - records_read;
-
-            // NB can be 0 if at end of page
-            let records_read_once = self.record_reader.read_records(records_to_read)?;
-            records_read += records_read_once;
-
-            // Record reader exhausted
-            if records_read_once < records_to_read {
-                if let Some(page_reader) = self.pages.next() {
-                    // Read from new page reader
-                    self.record_reader.set_page_reader(page_reader?)?;
-                } else {
-                    // Page reader also exhausted
-                    break;
-                }
-            }
-        }
-
-        // convert to arrays
-        let array = arrow::array::NullArray::new(records_read);
-
-        // save definition and repetition buffers
-        self.def_levels_buffer = self.record_reader.consume_def_levels()?;
-        self.rep_levels_buffer = self.record_reader.consume_rep_levels()?;
-        self.record_reader.reset();
-        Ok(Arc::new(array))
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Primitive array readers are leaves of array reader tree. They accept page iterator
-/// and read them into primitive arrays.
-pub struct PrimitiveArrayReader<T: DataType> {
-    data_type: ArrowType,
-    pages: Box<dyn PageIterator>,
-    def_levels_buffer: Option<Buffer>,
-    rep_levels_buffer: Option<Buffer>,
-    column_desc: ColumnDescPtr,
-    record_reader: RecordReader<T>,
-    _type_marker: PhantomData<T>,
-}
-
-impl<T: DataType> PrimitiveArrayReader<T> {
-    /// Construct primitive array reader.
-    pub fn new(
-        mut pages: Box<dyn PageIterator>,
-        column_desc: ColumnDescPtr,
-        arrow_type: Option<ArrowType>,
-    ) -> Result<Self> {
-        // Check if Arrow type is specified, else create it from Parquet type
-        let data_type = match arrow_type {
-            Some(t) => t,
-            None => parquet_to_arrow_field(column_desc.as_ref())?
-                .data_type()
-                .clone(),
-        };
-
-        let mut record_reader = RecordReader::<T>::new(column_desc.clone());
-        if let Some(page_reader) = pages.next() {
-            record_reader.set_page_reader(page_reader?)?;
-        }
-
-        Ok(Self {
-            data_type,
-            pages,
-            def_levels_buffer: None,
-            rep_levels_buffer: None,
-            column_desc,
-            record_reader,
-            _type_marker: PhantomData,
-        })
-    }
-}
-
-/// Implementation of primitive array reader.
-impl<T: DataType> ArrayReader for PrimitiveArrayReader<T> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type of primitive array.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    /// Reads at most `batch_size` records into array.
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        let mut records_read = 0usize;
-        while records_read < batch_size {
-            let records_to_read = batch_size - records_read;
-
-            // NB can be 0 if at end of page
-            let records_read_once = self.record_reader.read_records(records_to_read)?;
-            records_read += records_read_once;
-
-            // Record reader exhausted
-            if records_read_once < records_to_read {
-                if let Some(page_reader) = self.pages.next() {
-                    // Read from new page reader
-                    self.record_reader.set_page_reader(page_reader?)?;
-                } else {
-                    // Page reader also exhausted
-                    break;
-                }
-            }
-        }
-
-        let arrow_data_type = match T::get_physical_type() {
-            PhysicalType::BOOLEAN => ArrowBooleanType::DATA_TYPE,
-            PhysicalType::INT32 => ArrowInt32Type::DATA_TYPE,
-            PhysicalType::INT64 => ArrowInt64Type::DATA_TYPE,
-            PhysicalType::FLOAT => ArrowFloat32Type::DATA_TYPE,
-            PhysicalType::DOUBLE => ArrowFloat64Type::DATA_TYPE,
-            PhysicalType::INT96
-            | PhysicalType::BYTE_ARRAY
-            | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                unreachable!(
-                    "PrimitiveArrayReaders don't support complex physical types"
-                );
-            }
-        };
-
-        // Convert to arrays by using the Parquet phyisical type.
-        // The physical types are then cast to Arrow types if necessary
-
-        let mut record_data = self.record_reader.consume_record_data()?;
-
-        if T::get_physical_type() == PhysicalType::BOOLEAN {
-            let mut boolean_buffer = BooleanBufferBuilder::new(record_data.len());
-
-            for e in record_data.as_slice() {
-                boolean_buffer.append(*e > 0);
-            }
-            record_data = boolean_buffer.finish();
-        }
-
-        let mut array_data = ArrayDataBuilder::new(arrow_data_type)
-            .len(self.record_reader.num_values())
-            .add_buffer(record_data);
-
-        if let Some(b) = self.record_reader.consume_bitmap_buffer()? {
-            array_data = array_data.null_bit_buffer(b);
-        }
-
-        let array = match T::get_physical_type() {
-            PhysicalType::BOOLEAN => {
-                Arc::new(BooleanArray::from(array_data.build())) as ArrayRef
-            }
-            PhysicalType::INT32 => {
-                Arc::new(PrimitiveArray::<ArrowInt32Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::INT64 => {
-                Arc::new(PrimitiveArray::<ArrowInt64Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::FLOAT => {
-                Arc::new(PrimitiveArray::<ArrowFloat32Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::DOUBLE => {
-                Arc::new(PrimitiveArray::<ArrowFloat64Type>::from(array_data.build()))
-                    as ArrayRef
-            }
-            PhysicalType::INT96
-            | PhysicalType::BYTE_ARRAY
-            | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                unreachable!(
-                    "PrimitiveArrayReaders don't support complex physical types"
-                );
-            }
-        };
-
-        // cast to Arrow type
-        // We make a strong assumption here that the casts should be infallible.
-        // If the cast fails because of incompatible datatypes, then there might
-        // be a bigger problem with how Arrow schemas are converted to Parquet.
-        //
-        // As there is not always a 1:1 mapping between Arrow and Parquet, there
-        // are datatypes which we must convert explicitly.
-        // These are:
-        // - date64: we should cast int32 to date32, then date32 to date64.
-        let target_type = self.get_data_type();
-        let array = match target_type {
-            ArrowType::Date64 => {
-                // this is cheap as it internally reinterprets the data
-                let a = arrow::compute::cast(&array, &ArrowType::Date32)?;
-                arrow::compute::cast(&a, target_type)?
-            }
-            ArrowType::Decimal(p, s) => {
-                let mut builder = DecimalBuilder::new(array.len(), *p, *s);
-                match array.data_type() {
-                    ArrowType::Int32 => {
-                        let values = array.as_any().downcast_ref::<Int32Array>().unwrap();
-                        for maybe_value in values.iter() {
-                            match maybe_value {
-                                Some(value) => builder.append_value(value as i128)?,
-                                None => builder.append_null()?,
-                            }
-                        }
-                    }
-                    ArrowType::Int64 => {
-                        let values = array.as_any().downcast_ref::<Int64Array>().unwrap();
-                        for maybe_value in values.iter() {
-                            match maybe_value {
-                                Some(value) => builder.append_value(value as i128)?,
-                                None => builder.append_null()?,
-                            }
-                        }
-                    }
-                    _ => {
-                        return Err(ArrowError(format!(
-                            "Cannot convert {:?} to decimal",
-                            array.data_type()
-                        )))
-                    }
-                }
-                Arc::new(builder.finish()) as ArrayRef
-            }
-            _ => arrow::compute::cast(&array, target_type)?,
-        };
-
-        // save definition and repetition buffers
-        self.def_levels_buffer = self.record_reader.consume_def_levels()?;
-        self.rep_levels_buffer = self.record_reader.consume_rep_levels()?;
-        self.record_reader.reset();
-        Ok(array)
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_levels_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Primitive array readers are leaves of array reader tree. They accept page iterator
-/// and read them into primitive arrays.
-pub struct ComplexObjectArrayReader<T, C>
-where
-    T: DataType,
-    C: Converter<Vec<Option<T::T>>, ArrayRef> + 'static,
-{
-    data_type: ArrowType,
-    pages: Box<dyn PageIterator>,
-    def_levels_buffer: Option<Vec<i16>>,
-    rep_levels_buffer: Option<Vec<i16>>,
-    column_desc: ColumnDescPtr,
-    column_reader: Option<ColumnReaderImpl<T>>,
-    converter: C,
-    _parquet_type_marker: PhantomData<T>,
-    _converter_marker: PhantomData<C>,
-}
-
-impl<T, C> ArrayReader for ComplexObjectArrayReader<T, C>
-where
-    T: DataType,
-    C: Converter<Vec<Option<T::T>>, ArrayRef> + 'static,
-{
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        // Try to initialize column reader
-        if self.column_reader.is_none() {
-            self.next_column_reader()?;
-        }
-
-        let mut data_buffer: Vec<T::T> = Vec::with_capacity(batch_size);
-        data_buffer.resize_with(batch_size, T::T::default);
-
-        let mut def_levels_buffer = if self.column_desc.max_def_level() > 0 {
-            let mut buf: Vec<i16> = Vec::with_capacity(batch_size);
-            buf.resize_with(batch_size, || 0);
-            Some(buf)
-        } else {
-            None
-        };
-
-        let mut rep_levels_buffer = if self.column_desc.max_rep_level() > 0 {
-            let mut buf: Vec<i16> = Vec::with_capacity(batch_size);
-            buf.resize_with(batch_size, || 0);
-            Some(buf)
-        } else {
-            None
-        };
-
-        let mut num_read = 0;
-
-        while self.column_reader.is_some() && num_read < batch_size {
-            let num_to_read = batch_size - num_read;
-            let cur_data_buf = &mut data_buffer[num_read..];
-            let cur_def_levels_buf =
-                def_levels_buffer.as_mut().map(|b| &mut b[num_read..]);
-            let cur_rep_levels_buf =
-                rep_levels_buffer.as_mut().map(|b| &mut b[num_read..]);
-            let (data_read, levels_read) =
-                self.column_reader.as_mut().unwrap().read_batch(
-                    num_to_read,
-                    cur_def_levels_buf,
-                    cur_rep_levels_buf,
-                    cur_data_buf,
-                )?;
-
-            // Fill space
-            if levels_read > data_read {
-                def_levels_buffer.iter().for_each(|def_levels_buffer| {
-                    let (mut level_pos, mut data_pos) = (levels_read, data_read);
-                    while level_pos > 0 && data_pos > 0 {
-                        if def_levels_buffer[num_read + level_pos - 1]
-                            == self.column_desc.max_def_level()
-                        {
-                            cur_data_buf.swap(level_pos - 1, data_pos - 1);
-                            level_pos -= 1;
-                            data_pos -= 1;
-                        } else {
-                            level_pos -= 1;
-                        }
-                    }
-                });
-            }
-
-            let values_read = max(levels_read, data_read);
-            num_read += values_read;
-            // current page exhausted && page iterator exhausted
-            if values_read < num_to_read && !self.next_column_reader()? {
-                break;
-            }
-        }
-
-        data_buffer.truncate(num_read);
-        def_levels_buffer
-            .iter_mut()
-            .for_each(|buf| buf.truncate(num_read));
-        rep_levels_buffer
-            .iter_mut()
-            .for_each(|buf| buf.truncate(num_read));
-
-        self.def_levels_buffer = def_levels_buffer;
-        self.rep_levels_buffer = rep_levels_buffer;
-
-        let data: Vec<Option<T::T>> = if self.def_levels_buffer.is_some() {
-            data_buffer
-                .into_iter()
-                .zip(self.def_levels_buffer.as_ref().unwrap().iter())
-                .map(|(t, def_level)| {
-                    if *def_level == self.column_desc.max_def_level() {
-                        Some(t)
-                    } else {
-                        None
-                    }
-                })
-                .collect()
-        } else {
-            data_buffer.into_iter().map(Some).collect()
-        };
-
-        let mut array = self.converter.convert(data)?;
-
-        if let ArrowType::Dictionary(_, _) = self.data_type {
-            array = arrow::compute::cast(&array, &self.data_type)?;
-        }
-
-        Ok(array)
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_levels_buffer.as_deref()
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_levels_buffer.as_deref()
-    }
-}
-
-impl<T, C> ComplexObjectArrayReader<T, C>
-where
-    T: DataType,
-    C: Converter<Vec<Option<T::T>>, ArrayRef> + 'static,
-{
-    fn new(
-        pages: Box<dyn PageIterator>,
-        column_desc: ColumnDescPtr,
-        converter: C,
-        arrow_type: Option<ArrowType>,
-    ) -> Result<Self> {
-        let data_type = match arrow_type {
-            Some(t) => t,
-            None => parquet_to_arrow_field(column_desc.as_ref())?
-                .data_type()
-                .clone(),
-        };
-
-        Ok(Self {
-            data_type,
-            pages,
-            def_levels_buffer: None,
-            rep_levels_buffer: None,
-            column_desc,
-            column_reader: None,
-            converter,
-            _parquet_type_marker: PhantomData,
-            _converter_marker: PhantomData,
-        })
-    }
-
-    fn next_column_reader(&mut self) -> Result<bool> {
-        Ok(match self.pages.next() {
-            Some(page) => {
-                self.column_reader =
-                    Some(ColumnReaderImpl::<T>::new(self.column_desc.clone(), page?));
-                true
-            }
-            None => false,
-        })
-    }
-}
-
-/// Implementation of list array reader.
-pub struct ListArrayReader<OffsetSize: OffsetSizeTrait> {
-    item_reader: Box<dyn ArrayReader>,
-    data_type: ArrowType,
-    item_type: ArrowType,
-    list_def_level: i16,
-    list_rep_level: i16,
-    def_level_buffer: Option<Buffer>,
-    rep_level_buffer: Option<Buffer>,
-    _marker: PhantomData<OffsetSize>,
-}
-
-impl<OffsetSize: OffsetSizeTrait> ListArrayReader<OffsetSize> {
-    /// Construct list array reader.
-    pub fn new(
-        item_reader: Box<dyn ArrayReader>,
-        data_type: ArrowType,
-        item_type: ArrowType,
-        def_level: i16,
-        rep_level: i16,
-    ) -> Self {
-        Self {
-            item_reader,
-            data_type,
-            item_type,
-            list_def_level: def_level,
-            list_rep_level: rep_level,
-            def_level_buffer: None,
-            rep_level_buffer: None,
-            _marker: PhantomData,
-        }
-    }
-}
-
-macro_rules! remove_primitive_array_indices {
-    ($arr: expr, $item_type:ty, $indices:expr) => {{
-        let array_data = match $arr.as_any().downcast_ref::<PrimitiveArray<$item_type>>() {
-            Some(a) => a,
-            _ => return Err(ParquetError::General(format!("Error generating next batch for ListArray: {:?} cannot be downcast to PrimitiveArray", $arr))),
-        };
-        let mut builder = PrimitiveBuilder::<$item_type>::new($arr.len());
-        for i in 0..array_data.len() {
-            if !$indices.contains(&i) {
-                if array_data.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(array_data.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-macro_rules! remove_array_indices_custom_builder {
-    ($arr: expr, $array_type:ty, $item_builder:ident, $indices:expr) => {{
-        let array_data = match $arr.as_any().downcast_ref::<$array_type>() {
-            Some(a) => a,
-            _ => return Err(ParquetError::General(format!("Error generating next batch for ListArray: {:?} cannot be downcast to PrimitiveArray", $arr))),
-        };
-        let mut builder = $item_builder::new(array_data.len());
-
-        for i in 0..array_data.len() {
-            if !$indices.contains(&i) {
-                if array_data.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(array_data.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-macro_rules! remove_fixed_size_binary_array_indices {
-    ($arr: expr, $array_type:ty, $item_builder:ident, $indices:expr, $len:expr) => {{
-        let array_data = match $arr.as_any().downcast_ref::<$array_type>() {
-            Some(a) => a,
-            _ => return Err(ParquetError::General(format!("Error generating next batch for ListArray: {:?} cannot be downcast to PrimitiveArray", $arr))),
-        };
-        let mut builder = FixedSizeBinaryBuilder::new(array_data.len(), $len);
-        for i in 0..array_data.len() {
-            if !$indices.contains(&i) {
-                if array_data.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(array_data.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn remove_indices(
-    arr: ArrayRef,
-    item_type: ArrowType,
-    indices: Vec<usize>,
-) -> Result<ArrayRef> {
-    match item_type {
-        ArrowType::UInt8 => remove_primitive_array_indices!(arr, ArrowUInt8Type, indices),
-        ArrowType::UInt16 => {
-            remove_primitive_array_indices!(arr, ArrowUInt16Type, indices)
-        }
-        ArrowType::UInt32 => {
-            remove_primitive_array_indices!(arr, ArrowUInt32Type, indices)
-        }
-        ArrowType::UInt64 => {
-            remove_primitive_array_indices!(arr, ArrowUInt64Type, indices)
-        }
-        ArrowType::Int8 => remove_primitive_array_indices!(arr, ArrowInt8Type, indices),
-        ArrowType::Int16 => remove_primitive_array_indices!(arr, ArrowInt16Type, indices),
-        ArrowType::Int32 => remove_primitive_array_indices!(arr, ArrowInt32Type, indices),
-        ArrowType::Int64 => remove_primitive_array_indices!(arr, ArrowInt64Type, indices),
-        ArrowType::Float32 => {
-            remove_primitive_array_indices!(arr, ArrowFloat32Type, indices)
-        }
-        ArrowType::Float64 => {
-            remove_primitive_array_indices!(arr, ArrowFloat64Type, indices)
-        }
-        ArrowType::Boolean => {
-            remove_array_indices_custom_builder!(
-                arr,
-                BooleanArray,
-                BooleanBuilder,
-                indices
-            )
-        }
-        ArrowType::Date32 => {
-            remove_primitive_array_indices!(arr, ArrowDate32Type, indices)
-        }
-        ArrowType::Date64 => {
-            remove_primitive_array_indices!(arr, ArrowDate64Type, indices)
-        }
-        ArrowType::Time32(ArrowTimeUnit::Second) => {
-            remove_primitive_array_indices!(arr, ArrowTime32SecondType, indices)
-        }
-        ArrowType::Time32(ArrowTimeUnit::Millisecond) => {
-            remove_primitive_array_indices!(arr, ArrowTime32MillisecondType, indices)
-        }
-        ArrowType::Time64(ArrowTimeUnit::Microsecond) => {
-            remove_primitive_array_indices!(arr, ArrowTime64MicrosecondType, indices)
-        }
-        ArrowType::Time64(ArrowTimeUnit::Nanosecond) => {
-            remove_primitive_array_indices!(arr, ArrowTime64NanosecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Second) => {
-            remove_primitive_array_indices!(arr, ArrowDurationSecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Millisecond) => {
-            remove_primitive_array_indices!(arr, ArrowDurationMillisecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Microsecond) => {
-            remove_primitive_array_indices!(arr, ArrowDurationMicrosecondType, indices)
-        }
-        ArrowType::Duration(ArrowTimeUnit::Nanosecond) => {
-            remove_primitive_array_indices!(arr, ArrowDurationNanosecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Second, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampSecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Millisecond, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampMillisecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Microsecond, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampMicrosecondType, indices)
-        }
-        ArrowType::Timestamp(ArrowTimeUnit::Nanosecond, _) => {
-            remove_primitive_array_indices!(arr, ArrowTimestampNanosecondType, indices)
-        }
-        ArrowType::Utf8 => {
-            remove_array_indices_custom_builder!(arr, StringArray, StringBuilder, indices)
-        }
-        ArrowType::Binary => {
-            remove_array_indices_custom_builder!(arr, BinaryArray, BinaryBuilder, indices)
-        }
-        ArrowType::FixedSizeBinary(size) => remove_fixed_size_binary_array_indices!(
-            arr,
-            FixedSizeBinaryArray,
-            FixedSizeBinaryBuilder,
-            indices,
-            size
-        ),
-        _ => Err(ParquetError::General(format!(
-            "ListArray of type List({:?}) is not supported by array_reader",
-            item_type
-        ))),
-    }
-}
-
-/// Implementation of ListArrayReader. Nested lists and lists of structs are not yet supported.
-impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type.
-    /// This must be a List.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        let next_batch_array = self.item_reader.next_batch(batch_size)?;
-        let item_type = self.item_reader.get_data_type().clone();
-
-        if next_batch_array.len() == 0 {
-            return Ok(new_empty_array(&self.data_type));
-        }
-        let def_levels = self
-            .item_reader
-            .get_def_levels()
-            .ok_or_else(|| ArrowError("item_reader def levels are None.".to_string()))?;
-        let rep_levels = self
-            .item_reader
-            .get_rep_levels()
-            .ok_or_else(|| ArrowError("item_reader rep levels are None.".to_string()))?;
-
-        if !((def_levels.len() == rep_levels.len())
-            && (rep_levels.len() == next_batch_array.len()))
-        {
-            return Err(ArrowError(
-                "Expected item_reader def_levels and rep_levels to be same length as batch".to_string(),
-            ));
-        }
-
-        // List definitions can be encoded as 4 values:
-        // - n + 0: the list slot is null
-        // - n + 1: the list slot is not null, but is empty (i.e. [])
-        // - n + 2: the list slot is not null, but its child is empty (i.e. [ null ])
-        // - n + 3: the list slot is not null, and its child is not empty
-        // Where n is the max definition level of the list's parent.
-        // If a Parquet schema's only leaf is the list, then n = 0.
-
-        // TODO: ARROW-10391 - add a test case with a non-nullable child, check if max is 3
-        let list_field_type = match self.get_data_type() {
-            ArrowType::List(field)
-            | ArrowType::FixedSizeList(field, _)
-            | ArrowType::LargeList(field) => field,
-            _ => {
-                // Panic: this is safe as we only write lists from list datatypes
-                unreachable!()
-            }
-        };
-        let max_list_def_range = if list_field_type.is_nullable() { 3 } else { 2 };
-        let max_list_definition = *(def_levels.iter().max().unwrap());
-        // TODO: ARROW-10391 - Find a reliable way of validating deeply-nested lists
-        // debug_assert!(
-        //     max_list_definition >= max_list_def_range,
-        //     "Lift definition max less than range"
-        // );
-        let list_null_def = max_list_definition - max_list_def_range;
-        let list_empty_def = max_list_definition - 1;
-        let mut null_list_indices: Vec<usize> = Vec::new();
-        for i in 0..def_levels.len() {
-            if def_levels[i] == list_null_def {
-                null_list_indices.push(i);
-            }
-        }
-        let batch_values = match null_list_indices.len() {
-            0 => next_batch_array.clone(),
-            _ => remove_indices(next_batch_array.clone(), item_type, null_list_indices)?,
-        };
-
-        // null list has def_level = 0
-        // empty list has def_level = 1
-        // null item in a list has def_level = 2
-        // non-null item has def_level = 3
-        // first item in each list has rep_level = 0, subsequent items have rep_level = 1
-
-        let mut offsets: Vec<OffsetSize> = Vec::new();
-        let mut cur_offset = OffsetSize::zero();
-        for i in 0..rep_levels.len() {
-            if rep_levels[i] == 0 {
-                offsets.push(cur_offset)
-            }
-            if def_levels[i] >= list_empty_def {
-                cur_offset += OffsetSize::one();
-            }
-        }
-        offsets.push(cur_offset);
-
-        let num_bytes = bit_util::ceil(offsets.len(), 8);
-        let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
-        let null_slice = null_buf.as_slice_mut();
-        let mut list_index = 0;
-        for i in 0..rep_levels.len() {
-            if rep_levels[i] == 0 && def_levels[i] != 0 {
-                bit_util::set_bit(null_slice, list_index);
-            }
-            if rep_levels[i] == 0 {
-                list_index += 1;
-            }
-        }
-        let value_offsets = Buffer::from(&offsets.to_byte_slice());
-
-        let list_data = ArrayData::builder(self.get_data_type().clone())
-            .len(offsets.len() - 1)
-            .add_buffer(value_offsets)
-            .add_child_data(batch_values.data().clone())
-            .null_bit_buffer(null_buf.into())
-            .offset(next_batch_array.offset())
-            .build();
-
-        let result_array = GenericListArray::<OffsetSize>::from(list_data);
-        Ok(Arc::new(result_array))
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Implementation of struct array reader.
-pub struct StructArrayReader {
-    children: Vec<Box<dyn ArrayReader>>,
-    data_type: ArrowType,
-    struct_def_level: i16,
-    struct_rep_level: i16,
-    def_level_buffer: Option<Buffer>,
-    rep_level_buffer: Option<Buffer>,
-}
-
-impl StructArrayReader {
-    /// Construct struct array reader.
-    pub fn new(
-        data_type: ArrowType,
-        children: Vec<Box<dyn ArrayReader>>,
-        def_level: i16,
-        rep_level: i16,
-    ) -> Self {
-        Self {
-            data_type,
-            children,
-            struct_def_level: def_level,
-            struct_rep_level: rep_level,
-            def_level_buffer: None,
-            rep_level_buffer: None,
-        }
-    }
-}
-
-impl ArrayReader for StructArrayReader {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns data type.
-    /// This must be a struct.
-    fn get_data_type(&self) -> &ArrowType {
-        &self.data_type
-    }
-
-    /// Read `batch_size` struct records.
-    ///
-    /// Definition levels of struct array is calculated as following:
-    /// ```ignore
-    /// def_levels[i] = min(child1_def_levels[i], child2_def_levels[i], ...,
-    /// childn_def_levels[i]);
-    /// ```
-    ///
-    /// Repetition levels of struct array is calculated as following:
-    /// ```ignore
-    /// rep_levels[i] = child1_rep_levels[i];
-    /// ```
-    ///
-    /// The null bitmap of struct array is calculated from def_levels:
-    /// ```ignore
-    /// null_bitmap[i] = (def_levels[i] >= self.def_level);
-    /// ```
-    fn next_batch(&mut self, batch_size: usize) -> Result<ArrayRef> {
-        if self.children.is_empty() {
-            self.def_level_buffer = None;
-            self.rep_level_buffer = None;
-            return Ok(Arc::new(StructArray::from(Vec::new())));
-        }
-
-        let children_array = self
-            .children
-            .iter_mut()
-            .map(|reader| reader.next_batch(batch_size))
-            .try_fold(
-                Vec::new(),
-                |mut result, child_array| -> Result<Vec<ArrayRef>> {
-                    result.push(child_array?);
-                    Ok(result)
-                },
-            )?;
-
-        // check that array child data has same size
-        let children_array_len =
-            children_array.first().map(|arr| arr.len()).ok_or_else(|| {
-                general_err!("Struct array reader should have at least one child!")
-            })?;
-
-        let all_children_len_eq = children_array
-            .iter()
-            .all(|arr| arr.len() == children_array_len);
-        if !all_children_len_eq {
-            return Err(general_err!("Not all children array length are the same!"));
-        }
-
-        // calculate struct def level data
-        let buffer_size = children_array_len * size_of::<i16>();
-        let mut def_level_data_buffer = MutableBuffer::new(buffer_size);
-        def_level_data_buffer.resize(buffer_size, 0);
-
-        let def_level_data = def_level_data_buffer.typed_data_mut();
-
-        def_level_data
-            .iter_mut()
-            .for_each(|v| *v = self.struct_def_level);
-
-        for child in &self.children {
-            if let Some(current_child_def_levels) = child.get_def_levels() {
-                if current_child_def_levels.len() != children_array_len {
-                    return Err(general_err!("Child array length are not equal!"));
-                } else {
-                    for i in 0..children_array_len {
-                        def_level_data[i] =
-                            min(def_level_data[i], current_child_def_levels[i]);
-                    }
-                }
-            }
-        }
-
-        // calculate bitmap for current array
-        let mut bitmap_builder = BooleanBufferBuilder::new(children_array_len);
-        for def_level in def_level_data {
-            let not_null = *def_level >= self.struct_def_level;
-            bitmap_builder.append(not_null);
-        }
-
-        // Now we can build array data
-        let array_data = ArrayDataBuilder::new(self.data_type.clone())
-            .len(children_array_len)
-            .null_bit_buffer(bitmap_builder.finish())
-            .child_data(
-                children_array
-                    .iter()
-                    .map(|x| x.data().clone())
-                    .collect::<Vec<ArrayData>>(),
-            )
-            .build();
-
-        // calculate struct rep level data, since struct doesn't add to repetition
-        // levels, here we just need to keep repetition levels of first array
-        // TODO: Verify that all children array reader has same repetition levels
-        let rep_level_data = self
-            .children
-            .first()
-            .ok_or_else(|| {
-                general_err!("Struct array reader should have at least one child!")
-            })?
-            .get_rep_levels()
-            .map(|data| -> Result<Buffer> {
-                let mut buffer = Int16BufferBuilder::new(children_array_len);
-                buffer.append_slice(data);
-                Ok(buffer.finish())
-            })
-            .transpose()?;
-
-        self.def_level_buffer = Some(def_level_data_buffer.into());
-        self.rep_level_buffer = rep_level_data;
-        Ok(Arc::new(StructArray::from(array_data)))
-    }
-
-    fn get_def_levels(&self) -> Option<&[i16]> {
-        self.def_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-
-    fn get_rep_levels(&self) -> Option<&[i16]> {
-        self.rep_level_buffer
-            .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
-    }
-}
-
-/// Create array reader from parquet schema, column indices, and parquet file reader.
-pub fn build_array_reader<T>(
-    parquet_schema: SchemaDescPtr,
-    arrow_schema: Schema,
-    column_indices: T,
-    file_reader: Arc<dyn FileReader>,
-) -> Result<Box<dyn ArrayReader>>
-where
-    T: IntoIterator<Item = usize>,
-{
-    let mut leaves = HashMap::<*const Type, usize>::new();
-
-    let mut filtered_root_names = HashSet::<String>::new();
-
-    for c in column_indices {
-        let column = parquet_schema.column(c).self_type() as *const Type;
-
-        leaves.insert(column, c);
-
-        let root = parquet_schema.get_column_root_ptr(c);
-        filtered_root_names.insert(root.name().to_string());
-    }
-
-    if leaves.is_empty() {
-        return Err(general_err!("Can't build array reader without columns!"));
-    }
-
-    // Only pass root fields that take part in the projection
-    // to avoid traversal of columns that are not read.
-    // TODO: also prune unread parts of the tree in child structures
-    let filtered_root_fields = parquet_schema
-        .root_schema()
-        .get_fields()
-        .iter()
-        .filter(|field| filtered_root_names.contains(field.name()))
-        .cloned()
-        .collect::<Vec<_>>();
-
-    let proj = Type::GroupType {
-        basic_info: parquet_schema.root_schema().get_basic_info().clone(),
-        fields: filtered_root_fields,
-    };
-
-    ArrayReaderBuilder::new(
-        Arc::new(proj),
-        Arc::new(arrow_schema),
-        Arc::new(leaves),
-        file_reader,
-    )
-    .build_array_reader()
-}
-
-/// Used to build array reader.
-struct ArrayReaderBuilder {
-    root_schema: TypePtr,
-    arrow_schema: Arc<Schema>,
-    // Key: columns that need to be included in final array builder
-    // Value: column index in schema
-    columns_included: Arc<HashMap<*const Type, usize>>,
-    file_reader: Arc<dyn FileReader>,
-}
-
-/// Used in type visitor.
-#[derive(Clone)]
-struct ArrayReaderBuilderContext {
-    def_level: i16,
-    rep_level: i16,
-    path: ColumnPath,
-}
-
-impl Default for ArrayReaderBuilderContext {
-    fn default() -> Self {
-        Self {
-            def_level: 0i16,
-            rep_level: 0i16,
-            path: ColumnPath::new(Vec::new()),
-        }
-    }
-}
-
-/// Create array reader by visiting schema.
-impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a ArrayReaderBuilderContext>
-    for ArrayReaderBuilder
-{
-    /// Build array reader for primitive type.
-    /// Currently we don't have a list reader implementation, so repeated type is not
-    /// supported yet.
-    fn visit_primitive(
-        &mut self,
-        cur_type: TypePtr,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        if self.is_included(cur_type.as_ref()) {
-            let mut new_context = context.clone();
-            new_context.path.append(vec![cur_type.name().to_string()]);
-
-            match cur_type.get_basic_info().repetition() {
-                Repetition::REPEATED => {
-                    new_context.def_level += 1;
-                    new_context.rep_level += 1;
-                }
-                Repetition::OPTIONAL => {
-                    new_context.def_level += 1;
-                }
-                _ => (),
-            }
-
-            let reader =
-                self.build_for_primitive_type_inner(cur_type.clone(), &new_context)?;
-
-            if cur_type.get_basic_info().repetition() == Repetition::REPEATED {
-                Err(ArrowError(
-                    "Reading repeated field is not supported yet!".to_string(),
-                ))
-            } else {
-                Ok(Some(reader))
-            }
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Build array reader for struct type.
-    fn visit_struct(
-        &mut self,
-        cur_type: Arc<Type>,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        let mut new_context = context.clone();
-        new_context.path.append(vec![cur_type.name().to_string()]);
-
-        if cur_type.get_basic_info().has_repetition() {
-            match cur_type.get_basic_info().repetition() {
-                Repetition::REPEATED => {
-                    new_context.def_level += 1;
-                    new_context.rep_level += 1;
-                }
-                Repetition::OPTIONAL => {
-                    new_context.def_level += 1;
-                }
-                _ => (),
-            }
-        }
-
-        if let Some(reader) = self.build_for_struct_type_inner(&cur_type, &new_context)? {
-            if cur_type.get_basic_info().has_repetition()
-                && cur_type.get_basic_info().repetition() == Repetition::REPEATED
-            {
-                Err(ArrowError(
-                    "Reading repeated field is not supported yet!".to_string(),
-                ))
-            } else {
-                Ok(Some(reader))
-            }
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Build array reader for map type.
-    /// Currently this is not supported.
-    fn visit_map(
-        &mut self,
-        _cur_type: Arc<Type>,
-        _context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        Err(ArrowError(
-            "Reading parquet map array into arrow is not supported yet!".to_string(),
-        ))
-    }
-
-    /// Build array reader for list type.
-    fn visit_list_with_item(
-        &mut self,
-        list_type: Arc<Type>,
-        item_type: Arc<Type>,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        let list_child = &list_type
-            .get_fields()
-            .first()
-            .ok_or_else(|| ArrowError("List field must have a child.".to_string()))?;
-        let mut new_context = context.clone();
-
-        new_context.path.append(vec![list_type.name().to_string()]);
-
-        match list_type.get_basic_info().repetition() {
-            Repetition::REPEATED => {
-                new_context.def_level += 1;
-                new_context.rep_level += 1;
-            }
-            Repetition::OPTIONAL => {
-                new_context.def_level += 1;
-            }
-            _ => (),
-        }
-
-        match list_child.get_basic_info().repetition() {
-            Repetition::REPEATED => {
-                new_context.def_level += 1;
-                new_context.rep_level += 1;
-            }
-            Repetition::OPTIONAL => {
-                new_context.def_level += 1;
-            }
-            _ => (),
-        }
-
-        let item_reader = self
-            .dispatch(item_type.clone(), &new_context)
-            .unwrap()
-            .unwrap();
-
-        let item_reader_type = item_reader.get_data_type().clone();
-
-        match item_reader_type {
-            ArrowType::List(_)
-            | ArrowType::FixedSizeList(_, _)
-            | ArrowType::Struct(_)
-            | ArrowType::Dictionary(_, _) => Err(ArrowError(format!(
-                "reading List({:?}) into arrow not supported yet",
-                item_type
-            ))),
-            _ => {
-                let arrow_type = self
-                    .arrow_schema
-                    .field_with_name(list_type.name())
-                    .ok()
-                    .map(|f| f.data_type().to_owned())
-                    .unwrap_or_else(|| {
-                        ArrowType::List(Box::new(Field::new(
-                            list_type.name(),
-                            item_reader_type.clone(),
-                            list_type.is_optional(),
-                        )))
-                    });
-
-                let list_array_reader: Box<dyn ArrayReader> = match arrow_type {
-                    ArrowType::List(_) => Box::new(ListArrayReader::<i32>::new(
-                        item_reader,
-                        arrow_type,
-                        item_reader_type,
-                        new_context.def_level,
-                        new_context.rep_level,
-                    )),
-                    ArrowType::LargeList(_) => Box::new(ListArrayReader::<i64>::new(
-                        item_reader,
-                        arrow_type,
-                        item_reader_type,
-                        new_context.def_level,
-                        new_context.rep_level,
-                    )),
-
-                    _ => {
-                        return Err(ArrowError(format!(
-                        "creating ListArrayReader with type {:?} should be unreachable",
-                        arrow_type
-                    )))
-                    }
-                };
-
-                Ok(Some(list_array_reader))
-            }
-        }
-    }
-}
-
-impl<'a> ArrayReaderBuilder {
-    /// Construct array reader builder.
-    fn new(
-        root_schema: TypePtr,
-        arrow_schema: Arc<Schema>,
-        columns_included: Arc<HashMap<*const Type, usize>>,
-        file_reader: Arc<dyn FileReader>,
-    ) -> Self {
-        Self {
-            root_schema,
-            arrow_schema,
-            columns_included,
-            file_reader,
-        }
-    }
-
-    /// Main entry point.
-    fn build_array_reader(&mut self) -> Result<Box<dyn ArrayReader>> {
-        let context = ArrayReaderBuilderContext::default();
-
-        self.visit_struct(self.root_schema.clone(), &context)
-            .and_then(|reader_opt| {
-                reader_opt.ok_or_else(|| general_err!("Failed to build array reader!"))
-            })
-    }
-
-    // Utility functions
-
-    /// Check whether one column in included in this array reader builder.
-    fn is_included(&self, t: &Type) -> bool {
-        self.columns_included.contains_key(&(t as *const Type))
-    }
-
-    /// Creates primitive array reader for each primitive type.
-    fn build_for_primitive_type_inner(
-        &self,
-        cur_type: TypePtr,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Box<dyn ArrayReader>> {
-        let column_desc = Arc::new(ColumnDescriptor::new(
-            cur_type.clone(),
-            context.def_level,
-            context.rep_level,
-            context.path.clone(),
-        ));
-        let page_iterator = Box::new(FilePageIterator::new(
-            self.columns_included[&(cur_type.as_ref() as *const Type)],
-            self.file_reader.clone(),
-        )?);
-
-        let arrow_type: Option<ArrowType> = match self.get_arrow_field(&cur_type, context)
-        {
-            Some(f) => Some(f.data_type().clone()),
-            _ => None,
-        };
-
-        match cur_type.get_physical_type() {
-            PhysicalType::BOOLEAN => Ok(Box::new(PrimitiveArrayReader::<BoolType>::new(
-                page_iterator,
-                column_desc,
-                arrow_type,
-            )?)),
-            PhysicalType::INT32 => {
-                if let Some(ArrowType::Null) = arrow_type {
-                    Ok(Box::new(NullArrayReader::<Int32Type>::new(
-                        page_iterator,
-                        column_desc,
-                    )?))
-                } else {
-                    Ok(Box::new(PrimitiveArrayReader::<Int32Type>::new(
-                        page_iterator,
-                        column_desc,
-                        arrow_type,
-                    )?))
-                }
-            }
-            PhysicalType::INT64 => Ok(Box::new(PrimitiveArrayReader::<Int64Type>::new(
-                page_iterator,
-                column_desc,
-                arrow_type,
-            )?)),
-            PhysicalType::INT96 => {
-                // get the optional timezone information from arrow type
-                let timezone = arrow_type
-                    .as_ref()
-                    .map(|data_type| {
-                        if let ArrowType::Timestamp(_, tz) = data_type {
-                            tz.clone()
-                        } else {
-                            None
-                        }
-                    })
-                    .flatten();
-                let converter = Int96Converter::new(Int96ArrayConverter { timezone });
-                Ok(Box::new(ComplexObjectArrayReader::<
-                    Int96Type,
-                    Int96Converter,
-                >::new(
-                    page_iterator,
-                    column_desc,
-                    converter,
-                    arrow_type,
-                )?))
-            }
-            PhysicalType::FLOAT => Ok(Box::new(PrimitiveArrayReader::<FloatType>::new(
-                page_iterator,
-                column_desc,
-                arrow_type,
-            )?)),
-            PhysicalType::DOUBLE => {
-                Ok(Box::new(PrimitiveArrayReader::<DoubleType>::new(
-                    page_iterator,
-                    column_desc,
-                    arrow_type,
-                )?))
-            }
-            PhysicalType::BYTE_ARRAY => {
-                if cur_type.get_basic_info().converted_type() == ConvertedType::UTF8 {
-                    if let Some(ArrowType::LargeUtf8) = arrow_type {
-                        let converter =
-                            LargeUtf8Converter::new(LargeUtf8ArrayConverter {});
-                        Ok(Box::new(ComplexObjectArrayReader::<
-                            ByteArrayType,
-                            LargeUtf8Converter,
-                        >::new(
-                            page_iterator,
-                            column_desc,
-                            converter,
-                            arrow_type,
-                        )?))
-                    } else {
-                        let converter = Utf8Converter::new(Utf8ArrayConverter {});
-                        Ok(Box::new(ComplexObjectArrayReader::<
-                            ByteArrayType,
-                            Utf8Converter,
-                        >::new(
-                            page_iterator,
-                            column_desc,
-                            converter,
-                            arrow_type,
-                        )?))
-                    }
-                } else if let Some(ArrowType::LargeBinary) = arrow_type {
-                    let converter =
-                        LargeBinaryConverter::new(LargeBinaryArrayConverter {});
-                    Ok(Box::new(ComplexObjectArrayReader::<
-                        ByteArrayType,
-                        LargeBinaryConverter,
-                    >::new(
-                        page_iterator,
-                        column_desc,
-                        converter,
-                        arrow_type,
-                    )?))
-                } else {
-                    let converter = BinaryConverter::new(BinaryArrayConverter {});
-                    Ok(Box::new(ComplexObjectArrayReader::<
-                        ByteArrayType,
-                        BinaryConverter,
-                    >::new(
-                        page_iterator,
-                        column_desc,
-                        converter,
-                        arrow_type,
-                    )?))
-                }
-            }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY
-                if cur_type.get_basic_info().converted_type()
-                    == ConvertedType::DECIMAL =>
-            {
-                let converter = DecimalConverter::new(DecimalArrayConverter::new(
-                    cur_type.get_precision(),
-                    cur_type.get_scale(),
-                ));
-                Ok(Box::new(ComplexObjectArrayReader::<
-                    FixedLenByteArrayType,
-                    DecimalConverter,
-                >::new(
-                    page_iterator,
-                    column_desc,
-                    converter,
-                    arrow_type,
-                )?))
-            }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                if cur_type.get_basic_info().converted_type() == ConvertedType::INTERVAL {
-                    let byte_width = match *cur_type {
-                        Type::PrimitiveType {
-                            ref type_length, ..
-                        } => *type_length,
-                        _ => {
-                            return Err(ArrowError(
-                                "Expected a physical type, not a group type".to_string(),
-                            ))
-                        }
-                    };
-                    if byte_width != 12 {
-                        return Err(ArrowError(format!(
-                            "Parquet interval type should have length of 12, found {}",
-                            byte_width
-                        )));
-                    }
-                    match arrow_type {
-                        Some(ArrowType::Interval(IntervalUnit::DayTime)) => {
-                            let converter = IntervalDayTimeConverter::new(
-                                IntervalDayTimeArrayConverter {},
-                            );
-                            Ok(Box::new(ComplexObjectArrayReader::<
-                                FixedLenByteArrayType,
-                                IntervalDayTimeConverter,
-                            >::new(
-                                page_iterator,
-                                column_desc,
-                                converter,
-                                arrow_type,
-                            )?))
-                        }
-                        Some(ArrowType::Interval(IntervalUnit::YearMonth)) => {
-                            let converter = IntervalYearMonthConverter::new(
-                                IntervalYearMonthArrayConverter {},
-                            );
-                            Ok(Box::new(ComplexObjectArrayReader::<
-                                FixedLenByteArrayType,
-                                IntervalYearMonthConverter,
-                            >::new(
-                                page_iterator,
-                                column_desc,
-                                converter,
-                                arrow_type,
-                            )?))
-                        }
-                        Some(t) => Err(ArrowError(format!(
-                            "Cannot write a Parquet interval to {:?}",
-                            t
-                        ))),
-                        None => {
-                            // we do not support an interval not matched to an Arrow type,
-                            // because we risk data loss as we won't know which of the 12 bytes
-                            // are or should be populated
-                            Err(ArrowError(
-                                "Cannot write a Parquet interval with no Arrow type specified.
-                                There is a risk of data loss as Arrow either supports YearMonth or
-                                DayTime precision. Without the Arrow type, we cannot infer the type.
-                                ".to_string()
-                            ))
-                        }
-                    }
-                } else {
-                    let byte_width = match *cur_type {
-                        Type::PrimitiveType {
-                            ref type_length, ..
-                        } => *type_length,
-                        _ => {
-                            return Err(ArrowError(
-                                "Expected a physical type, not a group type".to_string(),
-                            ))
-                        }
-                    };
-                    let converter = FixedLenBinaryConverter::new(
-                        FixedSizeArrayConverter::new(byte_width),
-                    );
-                    Ok(Box::new(ComplexObjectArrayReader::<
-                        FixedLenByteArrayType,
-                        FixedLenBinaryConverter,
-                    >::new(
-                        page_iterator,
-                        column_desc,
-                        converter,
-                        arrow_type,
-                    )?))
-                }
-            }
-        }
-    }
-
-    /// Constructs struct array reader without considering repetition.
-    fn build_for_struct_type_inner(
-        &mut self,
-        cur_type: &Type,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Result<Option<Box<dyn ArrayReader>>> {
-        let mut fields = Vec::with_capacity(cur_type.get_fields().len());
-        let mut children_reader = Vec::with_capacity(cur_type.get_fields().len());
-
-        for child in cur_type.get_fields() {
-            let mut struct_context = context.clone();
-            if let Some(child_reader) = self.dispatch(child.clone(), context)? {
-                // TODO: this results in calling get_arrow_field twice, it could be reused
-                // from child_reader above, by making child_reader carry its `Field`
-                struct_context.path.append(vec![child.name().to_string()]);
-                let field = match self.get_arrow_field(child, &struct_context) {
-                    Some(f) => f.clone(),
-                    _ => Field::new(
-                        child.name(),
-                        child_reader.get_data_type().clone(),
-                        child.is_optional(),
-                    ),
-                };
-                fields.push(field);
-                children_reader.push(child_reader);
-            }
-        }
-
-        if !fields.is_empty() {
-            let arrow_type = ArrowType::Struct(fields);
-            Ok(Some(Box::new(StructArrayReader::new(
-                arrow_type,
-                children_reader,
-                context.def_level,
-                context.rep_level,
-            ))))
-        } else {
-            Ok(None)
-        }
-    }
-
-    fn get_arrow_field(
-        &self,
-        cur_type: &Type,
-        context: &'a ArrayReaderBuilderContext,
-    ) -> Option<&Field> {
-        let parts: Vec<&str> = context
-            .path
-            .parts()
-            .iter()
-            .map(|x| -> &str { x })
-            .collect::<Vec<&str>>();
-
-        // If the parts length is one it'll have the top level "schema" type. If
-        // it's two then it'll be a top-level type that we can get from the arrow
-        // schema directly.
-        if parts.len() <= 2 {
-            self.arrow_schema.field_with_name(cur_type.name()).ok()
-        } else {
-            // If it's greater than two then we need to traverse the type path
-            // until we find the actual field we're looking for.
-            let mut field: Option<&Field> = None;
-
-            for (i, part) in parts.iter().enumerate().skip(1) {
-                if i == 1 {
-                    field = self.arrow_schema.field_with_name(part).ok();
-                } else if let Some(f) = field {
-                    if let ArrowType::Struct(fields) = f.data_type() {
-                        field = fields.iter().find(|f| f.name() == part)
-                    } else {
-                        field = None
-                    }
-                } else {
-                    field = None
-                }
-            }
-            field
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::arrow::converter::Utf8Converter;
-    use crate::arrow::schema::parquet_to_arrow_schema;
-    use crate::basic::{Encoding, Type as PhysicalType};
-    use crate::column::page::{Page, PageReader};
-    use crate::data_type::{ByteArray, DataType, Int32Type, Int64Type};
-    use crate::errors::Result;
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
-    use crate::util::test_common::page_util::{
-        DataPageBuilder, DataPageBuilderImpl, InMemoryPageIterator,
-    };
-    use crate::util::test_common::{get_test_file, make_pages};
-    use arrow::array::{
-        Array, ArrayRef, LargeListArray, ListArray, PrimitiveArray, StringArray,
-        StructArray,
-    };
-    use arrow::datatypes::{
-        ArrowPrimitiveType, DataType as ArrowType, Date32Type as ArrowDate32, Field,
-        Int32Type as ArrowInt32, Int64Type as ArrowInt64,
-        Time32MillisecondType as ArrowTime32MillisecondArray,
-        Time64MicrosecondType as ArrowTime64MicrosecondArray,
-        TimestampMicrosecondType as ArrowTimestampMicrosecondType,
-        TimestampMillisecondType as ArrowTimestampMillisecondType,
-    };
-    use rand::distributions::uniform::SampleUniform;
-    use rand::{thread_rng, Rng};
-    use std::any::Any;
-    use std::collections::VecDeque;
-    use std::sync::Arc;
-
-    fn make_column_chunks<T: DataType>(
-        column_desc: ColumnDescPtr,
-        encoding: Encoding,
-        num_levels: usize,
-        min_value: T::T,
-        max_value: T::T,
-        def_levels: &mut Vec<i16>,
-        rep_levels: &mut Vec<i16>,
-        values: &mut Vec<T::T>,
-        page_lists: &mut Vec<Vec<Page>>,
-        use_v2: bool,
-        num_chunks: usize,
-    ) where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        for _i in 0..num_chunks {
-            let mut pages = VecDeque::new();
-            let mut data = Vec::new();
-            let mut page_def_levels = Vec::new();
-            let mut page_rep_levels = Vec::new();
-
-            make_pages::<T>(
-                column_desc.clone(),
-                encoding,
-                1,
-                num_levels,
-                min_value,
-                max_value,
-                &mut page_def_levels,
-                &mut page_rep_levels,
-                &mut data,
-                &mut pages,
-                use_v2,
-            );
-
-            def_levels.append(&mut page_def_levels);
-            rep_levels.append(&mut page_rep_levels);
-            values.append(&mut data);
-            page_lists.push(Vec::from(pages));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_reader_empty_pages() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REQUIRED INT32 leaf;
-        }
-        ";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let column_desc = schema.column(0);
-        let page_iterator = EmptyPageIterator::new(schema);
-
-        let mut array_reader = PrimitiveArrayReader::<Int32Type>::new(
-            Box::new(page_iterator),
-            column_desc,
-            None,
-        )
-        .unwrap();
-
-        // expect no values to be read
-        let array = array_reader.next_batch(50).unwrap();
-        assert!(array.is_empty());
-    }
-
-    #[test]
-    fn test_primitive_array_reader_data() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REQUIRED INT32 leaf;
-        }
-        ";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let column_desc = schema.column(0);
-
-        // Construct page iterator
-        {
-            let mut data = Vec::new();
-            let mut page_lists = Vec::new();
-            make_column_chunks::<Int32Type>(
-                column_desc.clone(),
-                Encoding::PLAIN,
-                100,
-                1,
-                200,
-                &mut Vec::new(),
-                &mut Vec::new(),
-                &mut data,
-                &mut page_lists,
-                true,
-                2,
-            );
-            let page_iterator =
-                InMemoryPageIterator::new(schema, column_desc.clone(), page_lists);
-
-            let mut array_reader = PrimitiveArrayReader::<Int32Type>::new(
-                Box::new(page_iterator),
-                column_desc,
-                None,
-            )
-            .unwrap();
-
-            // Read first 50 values, which are all from the first column chunk
-            let array = array_reader.next_batch(50).unwrap();
-            let array = array
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap();
-
-            assert_eq!(
-                &PrimitiveArray::<ArrowInt32>::from(data[0..50].to_vec()),
-                array
-            );
-
-            // Read next 100 values, the first 50 ones are from the first column chunk,
-            // and the last 50 ones are from the second column chunk
-            let array = array_reader.next_batch(100).unwrap();
-            let array = array
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap();
-
-            assert_eq!(
-                &PrimitiveArray::<ArrowInt32>::from(data[50..150].to_vec()),
-                array
-            );
-
-            // Try to read 100 values, however there are only 50 values
-            let array = array_reader.next_batch(100).unwrap();
-            let array = array
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap();
-
-            assert_eq!(
-                &PrimitiveArray::<ArrowInt32>::from(data[150..200].to_vec()),
-                array
-            );
-        }
-    }
-
-    macro_rules! test_primitive_array_reader_one_type {
-        ($arrow_parquet_type:ty, $physical_type:expr, $converted_type_str:expr, $result_arrow_type:ty, $result_arrow_cast_type:ty, $result_primitive_type:ty) => {{
-            let message_type = format!(
-                "
-            message test_schema {{
-              REQUIRED {:?} leaf ({});
-          }}
-            ",
-                $physical_type, $converted_type_str
-            );
-            let schema = parse_message_type(&message_type)
-                .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-                .unwrap();
-
-            let column_desc = schema.column(0);
-
-            // Construct page iterator
-            {
-                let mut data = Vec::new();
-                let mut page_lists = Vec::new();
-                make_column_chunks::<$arrow_parquet_type>(
-                    column_desc.clone(),
-                    Encoding::PLAIN,
-                    100,
-                    1,
-                    200,
-                    &mut Vec::new(),
-                    &mut Vec::new(),
-                    &mut data,
-                    &mut page_lists,
-                    true,
-                    2,
-                );
-                let page_iterator = InMemoryPageIterator::new(
-                    schema.clone(),
-                    column_desc.clone(),
-                    page_lists,
-                );
-                let mut array_reader = PrimitiveArrayReader::<$arrow_parquet_type>::new(
-                    Box::new(page_iterator),
-                    column_desc.clone(),
-                    None,
-                )
-                .expect("Unable to get array reader");
-
-                let array = array_reader
-                    .next_batch(50)
-                    .expect("Unable to get batch from reader");
-
-                let result_data_type = <$result_arrow_type>::DATA_TYPE;
-                let array = array
-                    .as_any()
-                    .downcast_ref::<PrimitiveArray<$result_arrow_type>>()
-                    .expect(
-                        format!(
-                            "Unable to downcast {:?} to {:?}",
-                            array.data_type(),
-                            result_data_type
-                        )
-                        .as_str(),
-                    );
-
-                // create expected array as primitive, and cast to result type
-                let expected = PrimitiveArray::<$result_arrow_cast_type>::from(
-                    data[0..50]
-                        .iter()
-                        .map(|x| *x as $result_primitive_type)
-                        .collect::<Vec<$result_primitive_type>>(),
-                );
-                let expected = Arc::new(expected) as ArrayRef;
-                let expected = arrow::compute::cast(&expected, &result_data_type)
-                    .expect("Unable to cast expected array");
-                assert_eq!(expected.data_type(), &result_data_type);
-                let expected = expected
-                    .as_any()
-                    .downcast_ref::<PrimitiveArray<$result_arrow_type>>()
-                    .expect(
-                        format!(
-                            "Unable to downcast expected {:?} to {:?}",
-                            expected.data_type(),
-                            result_data_type
-                        )
-                        .as_str(),
-                    );
-                assert_eq!(expected, array);
-            }
-        }};
-    }
-
-    #[test]
-    fn test_primitive_array_reader_temporal_types() {
-        test_primitive_array_reader_one_type!(
-            Int32Type,
-            PhysicalType::INT32,
-            "DATE",
-            ArrowDate32,
-            ArrowInt32,
-            i32
-        );
-        test_primitive_array_reader_one_type!(
-            Int32Type,
-            PhysicalType::INT32,
-            "TIME_MILLIS",
-            ArrowTime32MillisecondArray,
-            ArrowInt32,
-            i32
-        );
-        test_primitive_array_reader_one_type!(
-            Int64Type,
-            PhysicalType::INT64,
-            "TIME_MICROS",
-            ArrowTime64MicrosecondArray,
-            ArrowInt64,
-            i64
-        );
-        test_primitive_array_reader_one_type!(
-            Int64Type,
-            PhysicalType::INT64,
-            "TIMESTAMP_MILLIS",
-            ArrowTimestampMillisecondType,
-            ArrowInt64,
-            i64
-        );
-        test_primitive_array_reader_one_type!(
-            Int64Type,
-            PhysicalType::INT64,
-            "TIMESTAMP_MICROS",
-            ArrowTimestampMicrosecondType,
-            ArrowInt64,
-            i64
-        );
-    }
-
-    #[test]
-    fn test_primitive_array_reader_def_and_rep_levels() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-            REPEATED Group test_mid {
-                OPTIONAL INT32 leaf;
-            }
-        }
-        ";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let column_desc = schema.column(0);
-
-        // Construct page iterator
-        {
-            let mut def_levels = Vec::new();
-            let mut rep_levels = Vec::new();
-            let mut page_lists = Vec::new();
-            make_column_chunks::<Int32Type>(
-                column_desc.clone(),
-                Encoding::PLAIN,
-                100,
-                1,
-                200,
-                &mut def_levels,
-                &mut rep_levels,
-                &mut Vec::new(),
-                &mut page_lists,
-                true,
-                2,
-            );
-
-            let page_iterator =
-                InMemoryPageIterator::new(schema, column_desc.clone(), page_lists);
-
-            let mut array_reader = PrimitiveArrayReader::<Int32Type>::new(
-                Box::new(page_iterator),
-                column_desc,
-                None,
-            )
-            .unwrap();
-
-            let mut accu_len: usize = 0;
-
-            // Read first 50 values, which are all from the first column chunk
-            let array = array_reader.next_batch(50).unwrap();
-            assert_eq!(
-                Some(&def_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_def_levels()
-            );
-            assert_eq!(
-                Some(&rep_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_rep_levels()
-            );
-            accu_len += array.len();
-
-            // Read next 100 values, the first 50 ones are from the first column chunk,
-            // and the last 50 ones are from the second column chunk
-            let array = array_reader.next_batch(100).unwrap();
-            assert_eq!(
-                Some(&def_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_def_levels()
-            );
-            assert_eq!(
-                Some(&rep_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_rep_levels()
-            );
-            accu_len += array.len();
-
-            // Try to read 100 values, however there are only 50 values
-            let array = array_reader.next_batch(100).unwrap();
-            assert_eq!(
-                Some(&def_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_def_levels()
-            );
-            assert_eq!(
-                Some(&rep_levels[accu_len..(accu_len + array.len())]),
-                array_reader.get_rep_levels()
-            );
-        }
-    }
-
-    #[test]
-    fn test_complex_array_reader_no_pages() {
-        let message_type = "
-        message test_schema {
-            REPEATED Group test_mid {
-                OPTIONAL BYTE_ARRAY leaf (UTF8);
-            }
-        }
-        ";
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-        let column_desc = schema.column(0);
-        let pages: Vec<Vec<Page>> = Vec::new();
-        let page_iterator = InMemoryPageIterator::new(schema, column_desc.clone(), pages);
-
-        let converter = Utf8Converter::new(Utf8ArrayConverter {});
-        let mut array_reader =
-            ComplexObjectArrayReader::<ByteArrayType, Utf8Converter>::new(
-                Box::new(page_iterator),
-                column_desc,
-                converter,
-                None,
-            )
-            .unwrap();
-
-        let values_per_page = 100; // this value is arbitrary in this test - the result should always be an array of 0 length
-        let array = array_reader.next_batch(values_per_page).unwrap();
-        assert_eq!(array.len(), 0);
-    }
-
-    #[test]
-    fn test_complex_array_reader_def_and_rep_levels() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-            REPEATED Group test_mid {
-                OPTIONAL BYTE_ARRAY leaf (UTF8);
-            }
-        }
-        ";
-        let num_pages = 2;
-        let values_per_page = 100;
-        let str_base = "Hello World";
-
-        let schema = parse_message_type(message_type)
-            .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
-            .unwrap();
-
-        let max_def_level = schema.column(0).max_def_level();
-        let max_rep_level = schema.column(0).max_rep_level();
-
-        assert_eq!(max_def_level, 2);
-        assert_eq!(max_rep_level, 1);
-
-        let mut rng = thread_rng();
-        let column_desc = schema.column(0);
-        let mut pages: Vec<Vec<Page>> = Vec::new();
-
-        let mut rep_levels = Vec::with_capacity(num_pages * values_per_page);
-        let mut def_levels = Vec::with_capacity(num_pages * values_per_page);
-        let mut all_values = Vec::with_capacity(num_pages * values_per_page);
-
-        for i in 0..num_pages {
-            let mut values = Vec::with_capacity(values_per_page);
-
-            for _ in 0..values_per_page {
-                let def_level = rng.gen_range(0..max_def_level + 1);
-                let rep_level = rng.gen_range(0..max_rep_level + 1);
-                if def_level == max_def_level {
-                    let len = rng.gen_range(1..str_base.len());
-                    let slice = &str_base[..len];
-                    values.push(ByteArray::from(slice));
-                    all_values.push(Some(slice.to_string()));
-                } else {
-                    all_values.push(None)
-                }
-                rep_levels.push(rep_level);
-                def_levels.push(def_level)
-            }
-
-            let range = i * values_per_page..(i + 1) * values_per_page;
-            let mut pb =
-                DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true);
-
-            pb.add_rep_levels(max_rep_level, &rep_levels.as_slice()[range.clone()]);
-            pb.add_def_levels(max_def_level, &def_levels.as_slice()[range]);
-            pb.add_values::<ByteArrayType>(Encoding::PLAIN, values.as_slice());
-
-            let data_page = pb.consume();
-            pages.push(vec![data_page]);
-        }
-
-        let page_iterator = InMemoryPageIterator::new(schema, column_desc.clone(), pages);
-
-        let converter = Utf8Converter::new(Utf8ArrayConverter {});
-        let mut array_reader =
-            ComplexObjectArrayReader::<ByteArrayType, Utf8Converter>::new(
-                Box::new(page_iterator),
-                column_desc,
-                converter,
-                None,
-            )
-            .unwrap();
-
-        let mut accu_len: usize = 0;
-
-        let array = array_reader.next_batch(values_per_page / 2).unwrap();
-        assert_eq!(array.len(), values_per_page / 2);
-        assert_eq!(
-            Some(&def_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(&rep_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_rep_levels()
-        );
-        accu_len += array.len();
-
-        // Read next values_per_page values, the first values_per_page/2 ones are from the first column chunk,
-        // and the last values_per_page/2 ones are from the second column chunk
-        let array = array_reader.next_batch(values_per_page).unwrap();
-        assert_eq!(array.len(), values_per_page);
-        assert_eq!(
-            Some(&def_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(&rep_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_rep_levels()
-        );
-        let strings = array.as_any().downcast_ref::<StringArray>().unwrap();
-        for i in 0..array.len() {
-            if array.is_valid(i) {
-                assert_eq!(
-                    all_values[i + accu_len].as_ref().unwrap().as_str(),
-                    strings.value(i)
-                )
-            } else {
-                assert_eq!(all_values[i + accu_len], None)
-            }
-        }
-        accu_len += array.len();
-
-        // Try to read values_per_page values, however there are only values_per_page/2 values
-        let array = array_reader.next_batch(values_per_page).unwrap();
-        assert_eq!(array.len(), values_per_page / 2);
-        assert_eq!(
-            Some(&def_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(&rep_levels[accu_len..(accu_len + array.len())]),
-            array_reader.get_rep_levels()
-        );
-    }
-
-    /// Array reader for test.
-    struct InMemoryArrayReader {
-        data_type: ArrowType,
-        array: ArrayRef,
-        def_levels: Option<Vec<i16>>,
-        rep_levels: Option<Vec<i16>>,
-    }
-
-    impl InMemoryArrayReader {
-        pub fn new(
-            data_type: ArrowType,
-            array: ArrayRef,
-            def_levels: Option<Vec<i16>>,
-            rep_levels: Option<Vec<i16>>,
-        ) -> Self {
-            Self {
-                data_type,
-                array,
-                def_levels,
-                rep_levels,
-            }
-        }
-    }
-
-    impl ArrayReader for InMemoryArrayReader {
-        fn as_any(&self) -> &dyn Any {
-            self
-        }
-
-        fn get_data_type(&self) -> &ArrowType {
-            &self.data_type
-        }
-
-        fn next_batch(&mut self, _batch_size: usize) -> Result<ArrayRef> {
-            Ok(self.array.clone())
-        }
-
-        fn get_def_levels(&self) -> Option<&[i16]> {
-            self.def_levels.as_deref()
-        }
-
-        fn get_rep_levels(&self) -> Option<&[i16]> {
-            self.rep_levels.as_deref()
-        }
-    }
-
-    /// Iterator for testing reading empty columns
-    struct EmptyPageIterator {
-        schema: SchemaDescPtr,
-    }
-
-    impl EmptyPageIterator {
-        fn new(schema: SchemaDescPtr) -> Self {
-            EmptyPageIterator { schema }
-        }
-    }
-
-    impl Iterator for EmptyPageIterator {
-        type Item = Result<Box<dyn PageReader>>;
-
-        fn next(&mut self) -> Option<Self::Item> {
-            None
-        }
-    }
-
-    impl PageIterator for EmptyPageIterator {
-        fn schema(&mut self) -> Result<SchemaDescPtr> {
-            Ok(self.schema.clone())
-        }
-
-        fn column_schema(&mut self) -> Result<ColumnDescPtr> {
-            Ok(self.schema.column(0))
-        }
-    }
-
-    #[test]
-    fn test_struct_array_reader() {
-        let array_1 = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![1, 2, 3, 4, 5]));
-        let array_reader_1 = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array_1.clone(),
-            Some(vec![0, 1, 2, 3, 1]),
-            Some(vec![1, 1, 1, 1, 1]),
-        );
-
-        let array_2 = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![5, 4, 3, 2, 1]));
-        let array_reader_2 = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array_2.clone(),
-            Some(vec![0, 1, 3, 1, 2]),
-            Some(vec![1, 1, 1, 1, 1]),
-        );
-
-        let struct_type = ArrowType::Struct(vec![
-            Field::new("f1", array_1.data_type().clone(), true),
-            Field::new("f2", array_2.data_type().clone(), true),
-        ]);
-
-        let mut struct_array_reader = StructArrayReader::new(
-            struct_type,
-            vec![Box::new(array_reader_1), Box::new(array_reader_2)],
-            1,
-            1,
-        );
-
-        let struct_array = struct_array_reader.next_batch(5).unwrap();
-        let struct_array = struct_array.as_any().downcast_ref::<StructArray>().unwrap();
-
-        assert_eq!(5, struct_array.len());
-        assert_eq!(
-            vec![true, false, false, false, false],
-            (0..5)
-                .map(|idx| struct_array.data_ref().is_null(idx))
-                .collect::<Vec<bool>>()
-        );
-        assert_eq!(
-            Some(vec![0, 1, 1, 1, 1].as_slice()),
-            struct_array_reader.get_def_levels()
-        );
-        assert_eq!(
-            Some(vec![1, 1, 1, 1, 1].as_slice()),
-            struct_array_reader.get_rep_levels()
-        );
-    }
-
-    #[test]
-    fn test_create_array_reader() {
-        let file = get_test_file("nulls.snappy.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-
-        let file_metadata = file_reader.metadata().file_metadata();
-        let arrow_schema = parquet_to_arrow_schema(
-            file_metadata.schema_descr(),
-            file_metadata.key_value_metadata(),
-        )
-        .unwrap();
-
-        let array_reader = build_array_reader(
-            file_reader.metadata().file_metadata().schema_descr_ptr(),
-            arrow_schema,
-            vec![0usize].into_iter(),
-            file_reader,
-        )
-        .unwrap();
-
-        // Create arrow types
-        let arrow_type = ArrowType::Struct(vec![Field::new(
-            "b_struct",
-            ArrowType::Struct(vec![Field::new("b_c_int", ArrowType::Int32, true)]),
-            true,
-        )]);
-
-        assert_eq!(array_reader.get_data_type(), &arrow_type);
-    }
-
-    #[test]
-    fn test_list_array_reader() {
-        // [[1, null, 2], null, [3, 4]]
-        let array = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-        ]));
-        let item_array_reader = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array,
-            Some(vec![3, 2, 3, 0, 3, 3]),
-            Some(vec![0, 1, 1, 0, 0, 1]),
-        );
-
-        let mut list_array_reader = ListArrayReader::<i32>::new(
-            Box::new(item_array_reader),
-            ArrowType::List(Box::new(Field::new("item", ArrowType::Int32, true))),
-            ArrowType::Int32,
-            1,
-            1,
-        );
-
-        let next_batch = list_array_reader.next_batch(1024).unwrap();
-        let list_array = next_batch.as_any().downcast_ref::<ListArray>().unwrap();
-
-        assert_eq!(3, list_array.len());
-        // This passes as I expect
-        assert_eq!(1, list_array.null_count());
-
-        assert_eq!(
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(1), None, Some(2)])
-        );
-
-        assert!(list_array.is_null(1));
-
-        assert_eq!(
-            list_array
-                .value(2)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(3), Some(4)])
-        );
-    }
-
-    #[test]
-    fn test_large_list_array_reader() {
-        // [[1, null, 2], null, [3, 4]]
-        let array = Arc::new(PrimitiveArray::<ArrowInt32>::from(vec![
-            Some(1),
-            None,
-            Some(2),
-            None,
-            Some(3),
-            Some(4),
-        ]));
-        let item_array_reader = InMemoryArrayReader::new(
-            ArrowType::Int32,
-            array,
-            Some(vec![3, 2, 3, 0, 3, 3]),
-            Some(vec![0, 1, 1, 0, 0, 1]),
-        );
-
-        let mut list_array_reader = ListArrayReader::<i64>::new(
-            Box::new(item_array_reader),
-            ArrowType::LargeList(Box::new(Field::new("item", ArrowType::Int32, true))),
-            ArrowType::Int32,
-            1,
-            1,
-        );
-
-        let next_batch = list_array_reader.next_batch(1024).unwrap();
-        let list_array = next_batch
-            .as_any()
-            .downcast_ref::<LargeListArray>()
-            .unwrap();
-
-        assert_eq!(3, list_array.len());
-
-        assert_eq!(
-            list_array
-                .value(0)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(1), None, Some(2)])
-        );
-
-        assert!(list_array.is_null(1));
-
-        assert_eq!(
-            list_array
-                .value(2)
-                .as_any()
-                .downcast_ref::<PrimitiveArray<ArrowInt32>>()
-                .unwrap(),
-            &PrimitiveArray::<ArrowInt32>::from(vec![Some(3), Some(4)])
-        );
-    }
-}
diff --git a/rust/parquet/src/arrow/arrow_reader.rs b/rust/parquet/src/arrow/arrow_reader.rs
deleted file mode 100644
index 83fb0a2f7e9..00000000000
--- a/rust/parquet/src/arrow/arrow_reader.rs
+++ /dev/null
@@ -1,671 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains reader which reads parquet data into arrow array.
-
-use crate::arrow::array_reader::{build_array_reader, ArrayReader, StructArrayReader};
-use crate::arrow::schema::parquet_to_arrow_schema;
-use crate::arrow::schema::{
-    parquet_to_arrow_schema_by_columns, parquet_to_arrow_schema_by_root_columns,
-};
-use crate::errors::{ParquetError, Result};
-use crate::file::metadata::ParquetMetaData;
-use crate::file::reader::FileReader;
-use arrow::datatypes::{DataType as ArrowType, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::{array::StructArray, error::ArrowError};
-use std::sync::Arc;
-
-/// Arrow reader api.
-/// With this api, user can get arrow schema from parquet file, and read parquet data
-/// into arrow arrays.
-pub trait ArrowReader {
-    type RecordReader: RecordBatchReader;
-
-    /// Read parquet schema and convert it into arrow schema.
-    fn get_schema(&mut self) -> Result<Schema>;
-
-    /// Read parquet schema and convert it into arrow schema.
-    /// This schema only includes columns identified by `column_indices`.
-    /// To select leaf columns (i.e. `a.b.c` instead of `a`), set `leaf_columns = true`
-    fn get_schema_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        leaf_columns: bool,
-    ) -> Result<Schema>
-    where
-        T: IntoIterator<Item = usize>;
-
-    /// Returns record batch reader from whole parquet file.
-    ///
-    /// # Arguments
-    ///
-    /// `batch_size`: The size of each record batch returned from this reader. Only the
-    /// last batch may contain records less than this size, otherwise record batches
-    /// returned from this reader should contains exactly `batch_size` elements.
-    fn get_record_reader(&mut self, batch_size: usize) -> Result<Self::RecordReader>;
-
-    /// Returns record batch reader whose record batch contains columns identified by
-    /// `column_indices`.
-    ///
-    /// # Arguments
-    ///
-    /// `column_indices`: The columns that should be included in record batches.
-    /// `batch_size`: Please refer to `get_record_reader`.
-    fn get_record_reader_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        batch_size: usize,
-    ) -> Result<Self::RecordReader>
-    where
-        T: IntoIterator<Item = usize>;
-}
-
-pub struct ParquetFileArrowReader {
-    file_reader: Arc<dyn FileReader>,
-}
-
-impl ArrowReader for ParquetFileArrowReader {
-    type RecordReader = ParquetRecordBatchReader;
-
-    fn get_schema(&mut self) -> Result<Schema> {
-        let file_metadata = self.file_reader.metadata().file_metadata();
-        parquet_to_arrow_schema(
-            file_metadata.schema_descr(),
-            file_metadata.key_value_metadata(),
-        )
-    }
-
-    fn get_schema_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        leaf_columns: bool,
-    ) -> Result<Schema>
-    where
-        T: IntoIterator<Item = usize>,
-    {
-        let file_metadata = self.file_reader.metadata().file_metadata();
-        if leaf_columns {
-            parquet_to_arrow_schema_by_columns(
-                file_metadata.schema_descr(),
-                column_indices,
-                file_metadata.key_value_metadata(),
-            )
-        } else {
-            parquet_to_arrow_schema_by_root_columns(
-                file_metadata.schema_descr(),
-                column_indices,
-                file_metadata.key_value_metadata(),
-            )
-        }
-    }
-
-    fn get_record_reader(
-        &mut self,
-        batch_size: usize,
-    ) -> Result<ParquetRecordBatchReader> {
-        let column_indices = 0..self
-            .file_reader
-            .metadata()
-            .file_metadata()
-            .schema_descr()
-            .num_columns();
-
-        self.get_record_reader_by_columns(column_indices, batch_size)
-    }
-
-    fn get_record_reader_by_columns<T>(
-        &mut self,
-        column_indices: T,
-        batch_size: usize,
-    ) -> Result<ParquetRecordBatchReader>
-    where
-        T: IntoIterator<Item = usize>,
-    {
-        let array_reader = build_array_reader(
-            self.file_reader
-                .metadata()
-                .file_metadata()
-                .schema_descr_ptr(),
-            self.get_schema()?,
-            column_indices,
-            self.file_reader.clone(),
-        )?;
-
-        ParquetRecordBatchReader::try_new(batch_size, array_reader)
-    }
-}
-
-impl ParquetFileArrowReader {
-    pub fn new(file_reader: Arc<dyn FileReader>) -> Self {
-        Self { file_reader }
-    }
-
-    // Expose the reader metadata
-    pub fn get_metadata(&mut self) -> ParquetMetaData {
-        self.file_reader.metadata().clone()
-    }
-}
-
-pub struct ParquetRecordBatchReader {
-    batch_size: usize,
-    array_reader: Box<dyn ArrayReader>,
-    schema: SchemaRef,
-}
-
-impl Iterator for ParquetRecordBatchReader {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.array_reader.next_batch(self.batch_size) {
-            Err(error) => Some(Err(error.into())),
-            Ok(array) => {
-                let struct_array =
-                    array.as_any().downcast_ref::<StructArray>().ok_or_else(|| {
-                        ArrowError::ParquetError(
-                            "Struct array reader should return struct array".to_string(),
-                        )
-                    });
-                match struct_array {
-                    Err(err) => Some(Err(err)),
-                    Ok(e) => {
-                        match RecordBatch::try_new(self.schema.clone(), e.columns_ref()) {
-                            Err(err) => Some(Err(err)),
-                            Ok(record_batch) => {
-                                if record_batch.num_rows() > 0 {
-                                    Some(Ok(record_batch))
-                                } else {
-                                    None
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-impl RecordBatchReader for ParquetRecordBatchReader {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
-impl ParquetRecordBatchReader {
-    pub fn try_new(
-        batch_size: usize,
-        array_reader: Box<dyn ArrayReader>,
-    ) -> Result<Self> {
-        // Check that array reader is struct array reader
-        array_reader
-            .as_any()
-            .downcast_ref::<StructArrayReader>()
-            .ok_or_else(|| general_err!("The input must be struct array reader!"))?;
-
-        let schema = match array_reader.get_data_type() {
-            ArrowType::Struct(ref fields) => Schema::new(fields.clone()),
-            _ => unreachable!("Struct array reader's data type is not struct!"),
-        };
-
-        Ok(Self {
-            batch_size,
-            array_reader,
-            schema: Arc::new(schema),
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::arrow::arrow_reader::{ArrowReader, ParquetFileArrowReader};
-    use crate::arrow::converter::{
-        Converter, FixedSizeArrayConverter, FromConverter, IntervalDayTimeArrayConverter,
-        Utf8ArrayConverter,
-    };
-    use crate::column::writer::get_typed_column_writer_mut;
-    use crate::data_type::{
-        BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray,
-        FixedLenByteArrayType, Int32Type,
-    };
-    use crate::errors::Result;
-    use crate::file::properties::WriterProperties;
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::file::writer::{FileWriter, SerializedFileWriter};
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::TypePtr;
-    use crate::util::test_common::{get_temp_filename, RandGen};
-    use arrow::array::*;
-    use arrow::record_batch::RecordBatchReader;
-    use rand::RngCore;
-    use serde_json::json;
-    use serde_json::Value::{Array as JArray, Null as JNull, Object as JObject};
-    use std::cmp::min;
-    use std::convert::TryFrom;
-    use std::fs::File;
-    use std::path::{Path, PathBuf};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_arrow_reader_all_columns() {
-        let json_values = get_json_array("parquet/generated_simple_numerics/blogs.json");
-
-        let parquet_file_reader =
-            get_test_reader("parquet/generated_simple_numerics/blogs.parquet");
-
-        let max_len = parquet_file_reader.metadata().file_metadata().num_rows() as usize;
-
-        let mut arrow_reader = ParquetFileArrowReader::new(parquet_file_reader);
-
-        let mut record_batch_reader = arrow_reader
-            .get_record_reader(60)
-            .expect("Failed to read into array!");
-
-        // Verify that the schema was correctly parsed
-        let original_schema = arrow_reader.get_schema().unwrap().fields().clone();
-        assert_eq!(original_schema, *record_batch_reader.schema().fields());
-
-        compare_batch_json(&mut record_batch_reader, json_values, max_len);
-    }
-
-    #[test]
-    fn test_arrow_reader_single_column() {
-        let json_values = get_json_array("parquet/generated_simple_numerics/blogs.json");
-
-        let projected_json_values = json_values
-            .into_iter()
-            .map(|value| match value {
-                JObject(fields) => {
-                    json!({ "blog_id": fields.get("blog_id").unwrap_or(&JNull).clone()})
-                }
-                _ => panic!("Input should be json object array!"),
-            })
-            .collect::<Vec<_>>();
-
-        let parquet_file_reader =
-            get_test_reader("parquet/generated_simple_numerics/blogs.parquet");
-
-        let max_len = parquet_file_reader.metadata().file_metadata().num_rows() as usize;
-
-        let mut arrow_reader = ParquetFileArrowReader::new(parquet_file_reader);
-
-        let mut record_batch_reader = arrow_reader
-            .get_record_reader_by_columns(vec![2], 60)
-            .expect("Failed to read into array!");
-
-        // Verify that the schema was correctly parsed
-        let original_schema = arrow_reader.get_schema().unwrap().fields().clone();
-        assert_eq!(1, record_batch_reader.schema().fields().len());
-        assert_eq!(original_schema[1], record_batch_reader.schema().fields()[0]);
-
-        compare_batch_json(&mut record_batch_reader, projected_json_values, max_len);
-    }
-
-    #[test]
-    fn test_bool_single_column_reader_test() {
-        let message_type = "
-        message test_schema {
-          REQUIRED BOOLEAN leaf;
-        }
-        ";
-
-        let converter = FromConverter::new();
-        run_single_column_reader_tests::<
-            BoolType,
-            BooleanArray,
-            FromConverter<Vec<Option<bool>>, BooleanArray>,
-            BoolType,
-        >(2, message_type, &converter);
-    }
-
-    struct RandFixedLenGen {}
-
-    impl RandGen<FixedLenByteArrayType> for RandFixedLenGen {
-        fn gen(len: i32) -> FixedLenByteArray {
-            let mut v = vec![0u8; len as usize];
-            rand::thread_rng().fill_bytes(&mut v);
-            ByteArray::from(v).into()
-        }
-    }
-
-    #[test]
-    fn test_fixed_length_binary_column_reader() {
-        let message_type = "
-        message test_schema {
-          REQUIRED FIXED_LEN_BYTE_ARRAY (20) leaf;
-        }
-        ";
-
-        let converter = FixedSizeArrayConverter::new(20);
-        run_single_column_reader_tests::<
-            FixedLenByteArrayType,
-            FixedSizeBinaryArray,
-            FixedSizeArrayConverter,
-            RandFixedLenGen,
-        >(20, message_type, &converter);
-    }
-
-    #[test]
-    fn test_interval_day_time_column_reader() {
-        let message_type = "
-        message test_schema {
-          REQUIRED FIXED_LEN_BYTE_ARRAY (12) leaf (INTERVAL);
-        }
-        ";
-
-        let converter = IntervalDayTimeArrayConverter {};
-        run_single_column_reader_tests::<
-            FixedLenByteArrayType,
-            IntervalDayTimeArray,
-            IntervalDayTimeArrayConverter,
-            RandFixedLenGen,
-        >(12, message_type, &converter);
-    }
-
-    struct RandUtf8Gen {}
-
-    impl RandGen<ByteArrayType> for RandUtf8Gen {
-        fn gen(len: i32) -> ByteArray {
-            Int32Type::gen(len).to_string().as_str().into()
-        }
-    }
-
-    #[test]
-    fn test_utf8_single_column_reader_test() {
-        let message_type = "
-        message test_schema {
-          REQUIRED BINARY leaf (UTF8);
-        }
-        ";
-
-        let converter = Utf8ArrayConverter {};
-        run_single_column_reader_tests::<
-            ByteArrayType,
-            StringArray,
-            Utf8ArrayConverter,
-            RandUtf8Gen,
-        >(2, message_type, &converter);
-    }
-
-    #[test]
-    fn test_read_decimal_file() {
-        use arrow::array::DecimalArray;
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let file_variants = vec![("fixed_length", 25), ("int32", 4), ("int64", 10)];
-        for (prefix, target_precision) in file_variants {
-            let path = format!("{}/{}_decimal.parquet", testdata, prefix);
-            let parquet_reader =
-                SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-            let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-
-            let mut record_reader = arrow_reader.get_record_reader(32).unwrap();
-
-            let batch = record_reader.next().unwrap().unwrap();
-            assert_eq!(batch.num_rows(), 24);
-            let col = batch
-                .column(0)
-                .as_any()
-                .downcast_ref::<DecimalArray>()
-                .unwrap();
-
-            let expected = 1..25;
-
-            assert_eq!(col.precision(), target_precision);
-            assert_eq!(col.scale(), 2);
-
-            for (i, v) in expected.enumerate() {
-                assert_eq!(col.value(i), v * 100_i128);
-            }
-        }
-    }
-
-    /// Parameters for single_column_reader_test
-    #[derive(Debug)]
-    struct TestOptions {
-        /// Number of row group to write to parquet (row group size =
-        /// num_row_groups / num_rows)
-        num_row_groups: usize,
-        /// Total number of rows
-        num_rows: usize,
-        /// Size of batches to read back
-        record_batch_size: usize,
-        /// Total number of batches to attempt to read.
-        /// `record_batch_size` * `num_iterations` should be greater
-        /// than `num_rows` to ensure the data can be read back completely
-        num_iterations: usize,
-    }
-
-    /// Create a parquet file and then read it using
-    /// `ParquetFileArrowReader` using a standard set of parameters
-    /// `opts`.
-    ///
-    /// `rand_max` represents the maximum size of value to pass to to
-    /// value generator
-    fn run_single_column_reader_tests<T, A, C, G>(
-        rand_max: i32,
-        message_type: &str,
-        converter: &C,
-    ) where
-        T: DataType,
-        G: RandGen<T>,
-        A: PartialEq + Array + 'static,
-        C: Converter<Vec<Option<T::T>>, A> + 'static,
-    {
-        let all_options = vec![
-            // choose record_batch_batch (15) so batches cross row
-            // group boundaries (50 rows in 2 row groups) cases.
-            TestOptions {
-                num_row_groups: 2,
-                num_rows: 100,
-                record_batch_size: 15,
-                num_iterations: 50,
-            },
-            // choose record_batch_batch (5) so batches sometime fall
-            // on row group boundaries and (25 rows in 3 row groups
-            // --> row groups of 10, 10, and 5). Tests buffer
-            // refilling edge cases.
-            TestOptions {
-                num_row_groups: 3,
-                num_rows: 25,
-                record_batch_size: 5,
-                num_iterations: 50,
-            },
-            // Choose record_batch_size (25) so all batches fall
-            // exactly on row group boundary (25). Tests buffer
-            // refilling edge cases.
-            TestOptions {
-                num_row_groups: 4,
-                num_rows: 100,
-                record_batch_size: 25,
-                num_iterations: 50,
-            },
-        ];
-
-        all_options.into_iter().for_each(|opts| {
-            // Print out options to facilitate debugging failures on CI
-            println!("Running with Test Options: {:?}", opts);
-            single_column_reader_test::<T, A, C, G>(
-                opts,
-                rand_max,
-                message_type,
-                converter,
-            )
-        });
-    }
-
-    /// Create a parquet file and then read it using
-    /// `ParquetFileArrowReader` using the parameters described in
-    /// `opts`.
-    fn single_column_reader_test<T, A, C, G>(
-        opts: TestOptions,
-        rand_max: i32,
-        message_type: &str,
-        converter: &C,
-    ) where
-        T: DataType,
-        G: RandGen<T>,
-        A: PartialEq + Array + 'static,
-        C: Converter<Vec<Option<T::T>>, A> + 'static,
-    {
-        let values: Vec<Vec<T::T>> = (0..opts.num_row_groups)
-            .map(|_| G::gen_vec(rand_max, opts.num_rows))
-            .collect();
-
-        let path = get_temp_filename();
-
-        let schema = parse_message_type(message_type).map(Arc::new).unwrap();
-
-        generate_single_column_file_with_data::<T>(&values, path.as_path(), schema)
-            .unwrap();
-
-        let parquet_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-
-        let mut record_reader = arrow_reader
-            .get_record_reader(opts.record_batch_size)
-            .unwrap();
-
-        let expected_data: Vec<Option<T::T>> = values
-            .iter()
-            .flat_map(|v| v.iter())
-            .map(|b| Some(b.clone()))
-            .collect();
-
-        for i in 0..opts.num_iterations {
-            let start = i * opts.record_batch_size;
-
-            let batch = record_reader.next();
-            if start < expected_data.len() {
-                let end = min(start + opts.record_batch_size, expected_data.len());
-                assert!(batch.is_some());
-
-                let mut data = vec![];
-                data.extend_from_slice(&expected_data[start..end]);
-
-                assert_eq!(
-                    &converter.convert(data).unwrap(),
-                    batch
-                        .unwrap()
-                        .unwrap()
-                        .column(0)
-                        .as_any()
-                        .downcast_ref::<A>()
-                        .unwrap()
-                );
-            } else {
-                assert!(batch.is_none());
-            }
-        }
-    }
-
-    fn generate_single_column_file_with_data<T: DataType>(
-        values: &[Vec<T::T>],
-        path: &Path,
-        schema: TypePtr,
-    ) -> Result<parquet_format::FileMetaData> {
-        let file = File::create(path)?;
-        let writer_props = Arc::new(WriterProperties::builder().build());
-
-        let mut writer = SerializedFileWriter::new(file, schema, writer_props)?;
-
-        for v in values {
-            let mut row_group_writer = writer.next_row_group()?;
-            let mut column_writer = row_group_writer
-                .next_column()?
-                .expect("Column writer is none!");
-
-            get_typed_column_writer_mut::<T>(&mut column_writer)
-                .write_batch(v, None, None)?;
-
-            row_group_writer.close_column(column_writer)?;
-            writer.close_row_group(row_group_writer)?
-        }
-
-        writer.close()
-    }
-
-    fn get_test_reader(file_name: &str) -> Arc<dyn FileReader> {
-        let file = get_test_file(file_name);
-
-        let reader =
-            SerializedFileReader::new(file).expect("Failed to create serialized reader");
-
-        Arc::new(reader)
-    }
-
-    fn get_test_file(file_name: &str) -> File {
-        let mut path = PathBuf::new();
-        path.push(arrow::util::test_util::arrow_test_data());
-        path.push(file_name);
-
-        File::open(path.as_path()).expect("File not found!")
-    }
-
-    fn get_json_array(filename: &str) -> Vec<serde_json::Value> {
-        match serde_json::from_reader(get_test_file(filename))
-            .expect("Failed to read json value from file!")
-        {
-            JArray(values) => values,
-            _ => panic!("Input should be json array!"),
-        }
-    }
-
-    fn compare_batch_json(
-        record_batch_reader: &mut dyn RecordBatchReader,
-        json_values: Vec<serde_json::Value>,
-        max_len: usize,
-    ) {
-        for i in 0..20 {
-            let array: Option<StructArray> = record_batch_reader
-                .next()
-                .map(|r| r.expect("Failed to read record batch!").into());
-
-            let (start, end) = (i * 60_usize, (i + 1) * 60_usize);
-
-            if start < max_len {
-                assert!(array.is_some());
-                assert_ne!(0, array.as_ref().unwrap().len());
-                let end = min(end, max_len);
-                let json = JArray(Vec::from(&json_values[start..end]));
-                assert_eq!(array.unwrap(), json)
-            } else {
-                assert!(array.is_none());
-            }
-        }
-    }
-
-    #[test]
-    fn test_read_structs() {
-        // This particular test file has columns of struct types where there is
-        // a column that has the same name as one of the struct fields
-        // (see: ARROW-11452)
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let path = format!("{}/nested_structs.rust.parquet", testdata);
-        let parquet_file_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_file_reader));
-        let record_batch_reader = arrow_reader
-            .get_record_reader(60)
-            .expect("Failed to read into array!");
-
-        for batch in record_batch_reader {
-            batch.unwrap();
-        }
-    }
-}
diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
deleted file mode 100644
index e8eaf334e2b..00000000000
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ /dev/null
@@ -1,1402 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains writer which writes arrow data into parquet data.
-
-use std::sync::Arc;
-
-use arrow::array as arrow_array;
-use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, SchemaRef};
-use arrow::record_batch::RecordBatch;
-use arrow_array::Array;
-
-use super::levels::LevelInfo;
-use super::schema::{
-    add_encoded_arrow_schema_to_metadata, decimal_length_from_precision,
-};
-
-use crate::column::writer::ColumnWriter;
-use crate::errors::{ParquetError, Result};
-use crate::file::properties::WriterProperties;
-use crate::{
-    data_type::*,
-    file::writer::{FileWriter, ParquetWriter, RowGroupWriter, SerializedFileWriter},
-};
-
-/// Arrow writer
-///
-/// Writes Arrow `RecordBatch`es to a Parquet writer
-pub struct ArrowWriter<W: ParquetWriter> {
-    /// Underlying Parquet writer
-    writer: SerializedFileWriter<W>,
-    /// A copy of the Arrow schema.
-    ///
-    /// The schema is used to verify that each record batch written has the correct schema
-    arrow_schema: SchemaRef,
-}
-
-impl<W: 'static + ParquetWriter> ArrowWriter<W> {
-    /// Try to create a new Arrow writer
-    ///
-    /// The writer will fail if:
-    ///  * a `SerializedFileWriter` cannot be created from the ParquetWriter
-    ///  * the Arrow schema contains unsupported datatypes such as Unions
-    pub fn try_new(
-        writer: W,
-        arrow_schema: SchemaRef,
-        props: Option<WriterProperties>,
-    ) -> Result<Self> {
-        let schema = crate::arrow::arrow_to_parquet_schema(&arrow_schema)?;
-        // add serialized arrow schema
-        let mut props = props.unwrap_or_else(|| WriterProperties::builder().build());
-        add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);
-
-        let file_writer = SerializedFileWriter::new(
-            writer.try_clone()?,
-            schema.root_schema_ptr(),
-            Arc::new(props),
-        )?;
-
-        Ok(Self {
-            writer: file_writer,
-            arrow_schema,
-        })
-    }
-
-    /// Write a RecordBatch to writer
-    ///
-    /// *NOTE:* The writer currently does not support all Arrow data types
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        // validate batch schema against writer's supplied schema
-        if self.arrow_schema != batch.schema() {
-            return Err(ParquetError::ArrowError(
-                "Record batch schema does not match writer schema".to_string(),
-            ));
-        }
-        // compute the definition and repetition levels of the batch
-        let batch_level = LevelInfo::new_from_batch(batch);
-        let mut row_group_writer = self.writer.next_row_group()?;
-        for (array, field) in batch.columns().iter().zip(batch.schema().fields()) {
-            let mut levels = batch_level.calculate_array_levels(array, field);
-            write_leaves(&mut row_group_writer, array, &mut levels)?;
-        }
-
-        self.writer.close_row_group(row_group_writer)
-    }
-
-    /// Close and finalize the underlying Parquet writer
-    pub fn close(&mut self) -> Result<parquet_format::FileMetaData> {
-        self.writer.close()
-    }
-}
-
-/// Convenience method to get the next ColumnWriter from the RowGroupWriter
-#[inline]
-#[allow(clippy::borrowed_box)]
-fn get_col_writer(
-    row_group_writer: &mut Box<dyn RowGroupWriter>,
-) -> Result<ColumnWriter> {
-    let col_writer = row_group_writer
-        .next_column()?
-        .expect("Unable to get column writer");
-    Ok(col_writer)
-}
-
-#[allow(clippy::borrowed_box)]
-fn write_leaves(
-    mut row_group_writer: &mut Box<dyn RowGroupWriter>,
-    array: &arrow_array::ArrayRef,
-    mut levels: &mut Vec<LevelInfo>,
-) -> Result<()> {
-    match array.data_type() {
-        ArrowDataType::Null
-        | ArrowDataType::Boolean
-        | ArrowDataType::Int8
-        | ArrowDataType::Int16
-        | ArrowDataType::Int32
-        | ArrowDataType::Int64
-        | ArrowDataType::UInt8
-        | ArrowDataType::UInt16
-        | ArrowDataType::UInt32
-        | ArrowDataType::UInt64
-        | ArrowDataType::Float32
-        | ArrowDataType::Float64
-        | ArrowDataType::Timestamp(_, _)
-        | ArrowDataType::Date32
-        | ArrowDataType::Date64
-        | ArrowDataType::Time32(_)
-        | ArrowDataType::Time64(_)
-        | ArrowDataType::Duration(_)
-        | ArrowDataType::Interval(_)
-        | ArrowDataType::LargeBinary
-        | ArrowDataType::Binary
-        | ArrowDataType::Utf8
-        | ArrowDataType::LargeUtf8
-        | ArrowDataType::Decimal(_, _)
-        | ArrowDataType::FixedSizeBinary(_) => {
-            let mut col_writer = get_col_writer(&mut row_group_writer)?;
-            write_leaf(
-                &mut col_writer,
-                array,
-                levels.pop().expect("Levels exhausted"),
-            )?;
-            row_group_writer.close_column(col_writer)?;
-            Ok(())
-        }
-        ArrowDataType::List(_) | ArrowDataType::LargeList(_) => {
-            // write the child list
-            let data = array.data();
-            let child_array = arrow_array::make_array(data.child_data()[0].clone());
-            write_leaves(&mut row_group_writer, &child_array, &mut levels)?;
-            Ok(())
-        }
-        ArrowDataType::Struct(_) => {
-            let struct_array: &arrow_array::StructArray = array
-                .as_any()
-                .downcast_ref::<arrow_array::StructArray>()
-                .expect("Unable to get struct array");
-            for field in struct_array.columns() {
-                write_leaves(&mut row_group_writer, field, &mut levels)?;
-            }
-            Ok(())
-        }
-        ArrowDataType::Dictionary(_, value_type) => {
-            // cast dictionary to a primitive
-            let array = arrow::compute::cast(array, value_type)?;
-
-            let mut col_writer = get_col_writer(&mut row_group_writer)?;
-            write_leaf(
-                &mut col_writer,
-                &array,
-                levels.pop().expect("Levels exhausted"),
-            )?;
-            row_group_writer.close_column(col_writer)?;
-            Ok(())
-        }
-        ArrowDataType::Float16 => Err(ParquetError::ArrowError(
-            "Float16 arrays not supported".to_string(),
-        )),
-        ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => {
-            Err(ParquetError::NYI(
-                format!(
-                    "Attempting to write an Arrow type {:?} to parquet that is not yet implemented", 
-                    array.data_type()
-                )
-            ))
-        }
-    }
-}
-
-fn write_leaf(
-    writer: &mut ColumnWriter,
-    column: &arrow_array::ArrayRef,
-    levels: LevelInfo,
-) -> Result<i64> {
-    let indices = levels.filter_array_indices();
-    let written = match writer {
-        ColumnWriter::Int32ColumnWriter(ref mut typed) => {
-            // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
-            let array = if let ArrowDataType::Date64 = column.data_type() {
-                let array = arrow::compute::cast(column, &ArrowDataType::Date32)?;
-                arrow::compute::cast(&array, &ArrowDataType::Int32)?
-            } else {
-                arrow::compute::cast(column, &ArrowDataType::Int32)?
-            };
-            let array = array
-                .as_any()
-                .downcast_ref::<arrow_array::Int32Array>()
-                .expect("Unable to get int32 array");
-            typed.write_batch(
-                get_numeric_array_slice::<Int32Type, _>(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::BoolColumnWriter(ref mut typed) => {
-            let array = column
-                .as_any()
-                .downcast_ref::<arrow_array::BooleanArray>()
-                .expect("Unable to get boolean array");
-            typed.write_batch(
-                get_bool_array_slice(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::Int64ColumnWriter(ref mut typed) => {
-            let values = match column.data_type() {
-                ArrowDataType::Int64 => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::Int64Array>()
-                        .expect("Unable to get i64 array");
-                    get_numeric_array_slice::<Int64Type, _>(&array, &indices)
-                }
-                _ => {
-                    let array = arrow::compute::cast(column, &ArrowDataType::Int64)?;
-                    let array = array
-                        .as_any()
-                        .downcast_ref::<arrow_array::Int64Array>()
-                        .expect("Unable to get i64 array");
-                    get_numeric_array_slice::<Int64Type, _>(&array, &indices)
-                }
-            };
-            typed.write_batch(
-                values.as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::Int96ColumnWriter(ref mut _typed) => {
-            unreachable!("Currently unreachable because data type not supported")
-        }
-        ColumnWriter::FloatColumnWriter(ref mut typed) => {
-            let array = column
-                .as_any()
-                .downcast_ref::<arrow_array::Float32Array>()
-                .expect("Unable to get Float32 array");
-            typed.write_batch(
-                get_numeric_array_slice::<FloatType, _>(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::DoubleColumnWriter(ref mut typed) => {
-            let array = column
-                .as_any()
-                .downcast_ref::<arrow_array::Float64Array>()
-                .expect("Unable to get Float64 array");
-            typed.write_batch(
-                get_numeric_array_slice::<DoubleType, _>(&array, &indices).as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-        ColumnWriter::ByteArrayColumnWriter(ref mut typed) => match column.data_type() {
-            ArrowDataType::Binary => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::BinaryArray>()
-                    .expect("Unable to get BinaryArray array");
-                typed.write_batch(
-                    get_binary_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            ArrowDataType::Utf8 => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::StringArray>()
-                    .expect("Unable to get LargeBinaryArray array");
-                typed.write_batch(
-                    get_string_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            ArrowDataType::LargeBinary => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::LargeBinaryArray>()
-                    .expect("Unable to get LargeBinaryArray array");
-                typed.write_batch(
-                    get_large_binary_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            ArrowDataType::LargeUtf8 => {
-                let array = column
-                    .as_any()
-                    .downcast_ref::<arrow_array::LargeStringArray>()
-                    .expect("Unable to get LargeUtf8 array");
-                typed.write_batch(
-                    get_large_string_array(&array).as_slice(),
-                    Some(levels.definition.as_slice()),
-                    levels.repetition.as_deref(),
-                )?
-            }
-            _ => unreachable!("Currently unreachable because data type not supported"),
-        },
-        ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) => {
-            let bytes = match column.data_type() {
-                ArrowDataType::Interval(interval_unit) => match interval_unit {
-                    IntervalUnit::YearMonth => {
-                        let array = column
-                            .as_any()
-                            .downcast_ref::<arrow_array::IntervalYearMonthArray>()
-                            .unwrap();
-                        get_interval_ym_array_slice(&array, &indices)
-                    }
-                    IntervalUnit::DayTime => {
-                        let array = column
-                            .as_any()
-                            .downcast_ref::<arrow_array::IntervalDayTimeArray>()
-                            .unwrap();
-                        get_interval_dt_array_slice(&array, &indices)
-                    }
-                },
-                ArrowDataType::FixedSizeBinary(_) => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::FixedSizeBinaryArray>()
-                        .unwrap();
-                    get_fsb_array_slice(&array, &indices)
-                }
-                ArrowDataType::Decimal(_, _) => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::DecimalArray>()
-                        .unwrap();
-                    get_decimal_array_slice(&array, &indices)
-                }
-                _ => {
-                    return Err(ParquetError::NYI(
-                        "Attempting to write an Arrow type that is not yet implemented"
-                            .to_string(),
-                    ));
-                }
-            };
-            typed.write_batch(
-                bytes.as_slice(),
-                Some(levels.definition.as_slice()),
-                levels.repetition.as_deref(),
-            )?
-        }
-    };
-    Ok(written as i64)
-}
-
-macro_rules! def_get_binary_array_fn {
-    ($name:ident, $ty:ty) => {
-        fn $name(array: &$ty) -> Vec<ByteArray> {
-            let mut values = Vec::with_capacity(array.len() - array.null_count());
-            for i in 0..array.len() {
-                if array.is_valid(i) {
-                    let bytes: Vec<u8> = array.value(i).into();
-                    let bytes = ByteArray::from(bytes);
-                    values.push(bytes);
-                }
-            }
-            values
-        }
-    };
-}
-
-def_get_binary_array_fn!(get_binary_array, arrow_array::BinaryArray);
-def_get_binary_array_fn!(get_string_array, arrow_array::StringArray);
-def_get_binary_array_fn!(get_large_binary_array, arrow_array::LargeBinaryArray);
-def_get_binary_array_fn!(get_large_string_array, arrow_array::LargeStringArray);
-
-/// Get the underlying numeric array slice, skipping any null values.
-/// If there are no null values, it might be quicker to get the slice directly instead of
-/// calling this function.
-fn get_numeric_array_slice<T, A>(
-    array: &arrow_array::PrimitiveArray<A>,
-    indices: &[usize],
-) -> Vec<T::T>
-where
-    T: DataType,
-    A: arrow::datatypes::ArrowNumericType,
-    T::T: From<A::Native>,
-{
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        values.push(array.value(*i).into())
-    }
-    values
-}
-
-fn get_bool_array_slice(
-    array: &arrow_array::BooleanArray,
-    indices: &[usize],
-) -> Vec<bool> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        values.push(array.value(*i))
-    }
-    values
-}
-
-/// Returns 12-byte values representing 3 values of months, days and milliseconds (4-bytes each).
-/// An Arrow YearMonth interval only stores months, thus only the first 4 bytes are populated.
-fn get_interval_ym_array_slice(
-    array: &arrow_array::IntervalYearMonthArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        let mut value = array.value(*i).to_le_bytes().to_vec();
-        let mut suffix = vec![0; 8];
-        value.append(&mut suffix);
-        values.push(FixedLenByteArray::from(ByteArray::from(value)))
-    }
-    values
-}
-
-/// Returns 12-byte values representing 3 values of months, days and milliseconds (4-bytes each).
-/// An Arrow DayTime interval only stores days and millis, thus the first 4 bytes are not populated.
-fn get_interval_dt_array_slice(
-    array: &arrow_array::IntervalDayTimeArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        let mut prefix = vec![0; 4];
-        let mut value = array.value(*i).to_le_bytes().to_vec();
-        prefix.append(&mut value);
-        debug_assert_eq!(prefix.len(), 12);
-        values.push(FixedLenByteArray::from(ByteArray::from(prefix)));
-    }
-    values
-}
-
-fn get_decimal_array_slice(
-    array: &arrow_array::DecimalArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    let size = decimal_length_from_precision(array.precision());
-    for i in indices {
-        let as_be_bytes = array.value(*i).to_be_bytes();
-        let resized_value = as_be_bytes[(16 - size)..].to_vec();
-        values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
-    }
-    values
-}
-
-fn get_fsb_array_slice(
-    array: &arrow_array::FixedSizeBinaryArray,
-    indices: &[usize],
-) -> Vec<FixedLenByteArray> {
-    let mut values = Vec::with_capacity(indices.len());
-    for i in indices {
-        let value = array.value(*i).to_vec();
-        values.push(FixedLenByteArray::from(ByteArray::from(value)))
-    }
-    values
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::io::Seek;
-    use std::sync::Arc;
-
-    use arrow::datatypes::ToByteSlice;
-    use arrow::datatypes::{DataType, Field, Schema, UInt32Type, UInt8Type};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, buffer::Buffer};
-
-    use crate::arrow::{ArrowReader, ParquetFileArrowReader};
-    use crate::file::{reader::SerializedFileReader, writer::InMemoryWriteableCursor};
-    use crate::util::test_common::get_temp_file;
-
-    #[test]
-    fn arrow_writer() {
-        // define schema
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-        ]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-
-        // build a record batch
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(a), Arc::new(b)],
-        )
-        .unwrap();
-
-        let file = get_temp_file("test_arrow_writer.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(file, Arc::new(schema), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-    }
-
-    #[test]
-    fn roundtrip_bytes() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-
-        // build a record batch
-        let expected_batch =
-            RecordBatch::try_new(schema.clone(), vec![Arc::new(a), Arc::new(b)]).unwrap();
-
-        let cursor = InMemoryWriteableCursor::default();
-
-        {
-            let mut writer = ArrowWriter::try_new(cursor.clone(), schema, None).unwrap();
-            writer.write(&expected_batch).unwrap();
-            writer.close().unwrap();
-        }
-
-        let buffer = cursor.into_inner().unwrap();
-
-        let cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
-        let reader = SerializedFileReader::new(cursor).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let actual_batch = record_batch_reader
-            .next()
-            .expect("No batch found")
-            .expect("Unable to get batch");
-
-        assert_eq!(expected_batch.schema(), actual_batch.schema());
-        assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());
-        assert_eq!(expected_batch.num_rows(), actual_batch.num_rows());
-        for i in 0..expected_batch.num_columns() {
-            let expected_data = expected_batch.column(i).data().clone();
-            let actual_data = actual_batch.column(i).data().clone();
-
-            assert_eq!(expected_data, actual_data);
-        }
-    }
-
-    #[test]
-    fn arrow_writer_non_null() {
-        // define schema
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-
-        // build a record batch
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)]).unwrap();
-
-        let file = get_temp_file("test_arrow_writer_non_null.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(file, Arc::new(schema), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-    }
-
-    #[test]
-    fn arrow_writer_list() {
-        // define schema
-        let schema = Schema::new(vec![Field::new(
-            "a",
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
-            false,
-        )]);
-
-        // create some data
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[1], [2, 3], null, [4, 5, 6], [7, 8, 9, 10]]
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-
-        // Construct a list array from the above two
-        let a_list_data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(5)
-        .add_buffer(a_value_offsets)
-        .add_child_data(a_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00011011]))
-        .build();
-        let a = ListArray::from(a_list_data);
-
-        // build a record batch
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)]).unwrap();
-
-        // I think this setup is incorrect because this should pass
-        assert_eq!(batch.column(0).data().null_count(), 1);
-
-        let file = get_temp_file("test_arrow_writer_list.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(file, Arc::new(schema), None).unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-    }
-
-    #[test]
-    fn arrow_writer_binary() {
-        let string_field = Field::new("a", DataType::Utf8, false);
-        let binary_field = Field::new("b", DataType::Binary, false);
-        let schema = Schema::new(vec![string_field, binary_field]);
-
-        let raw_string_values = vec!["foo", "bar", "baz", "quux"];
-        let raw_binary_values = vec![
-            b"foo".to_vec(),
-            b"bar".to_vec(),
-            b"baz".to_vec(),
-            b"quux".to_vec(),
-        ];
-        let raw_binary_value_refs = raw_binary_values
-            .iter()
-            .map(|x| x.as_slice())
-            .collect::<Vec<_>>();
-
-        let string_values = StringArray::from(raw_string_values.clone());
-        let binary_values = BinaryArray::from(raw_binary_value_refs);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(string_values), Arc::new(binary_values)],
-        )
-        .unwrap();
-
-        let mut file = get_temp_file("test_arrow_writer_binary.parquet", &[]);
-        let mut writer =
-            ArrowWriter::try_new(file.try_clone().unwrap(), Arc::new(schema), None)
-                .unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        file.seek(std::io::SeekFrom::Start(0)).unwrap();
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let batch = record_batch_reader.next().unwrap().unwrap();
-        let string_col = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let binary_col = batch
-            .column(1)
-            .as_any()
-            .downcast_ref::<BinaryArray>()
-            .unwrap();
-
-        for i in 0..batch.num_rows() {
-            assert_eq!(string_col.value(i), raw_string_values[i]);
-            assert_eq!(binary_col.value(i), raw_binary_values[i].as_slice());
-        }
-    }
-
-    #[test]
-    fn arrow_writer_decimal() {
-        let decimal_field = Field::new("a", DataType::Decimal(5, 2), false);
-        let schema = Schema::new(vec![decimal_field]);
-
-        let mut dec_builder = DecimalBuilder::new(4, 5, 2);
-        dec_builder.append_value(10_000).unwrap();
-        dec_builder.append_value(50_000).unwrap();
-        dec_builder.append_value(0).unwrap();
-        dec_builder.append_value(-100).unwrap();
-
-        let raw_decimal_i128_values: Vec<i128> = vec![10_000, 50_000, 0, -100];
-        let decimal_values = dec_builder.finish();
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(decimal_values)],
-        )
-        .unwrap();
-
-        let mut file = get_temp_file("test_arrow_writer_decimal.parquet", &[]);
-        let mut writer =
-            ArrowWriter::try_new(file.try_clone().unwrap(), Arc::new(schema), None)
-                .unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        file.seek(std::io::SeekFrom::Start(0)).unwrap();
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let batch = record_batch_reader.next().unwrap().unwrap();
-        let decimal_col = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<DecimalArray>()
-            .unwrap();
-
-        for i in 0..batch.num_rows() {
-            assert_eq!(decimal_col.value(i), raw_decimal_i128_values[i]);
-        }
-    }
-
-    #[test]
-    #[ignore = "See ARROW-11294, data is correct but list field name is incorrect"]
-    fn arrow_writer_complex() {
-        // define schema
-        let struct_field_d = Field::new("d", DataType::Float64, true);
-        let struct_field_f = Field::new("f", DataType::Float32, true);
-        let struct_field_g = Field::new(
-            "g",
-            DataType::List(Box::new(Field::new("item", DataType::Int16, true))),
-            true,
-        );
-        let struct_field_e = Field::new(
-            "e",
-            DataType::Struct(vec![struct_field_f.clone(), struct_field_g.clone()]),
-            true,
-        );
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-            Field::new(
-                "c",
-                DataType::Struct(vec![struct_field_d.clone(), struct_field_e.clone()]),
-                true, // NB: this test fails if value is false. Why?
-            ),
-        ]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-        let d = Float64Array::from(vec![None, None, None, Some(1.0), None]);
-        let f = Float32Array::from(vec![Some(0.0), None, Some(333.3), None, Some(5.25)]);
-
-        let g_value = Int16Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[1], [2, 3], [], [4, 5, 6], [7, 8, 9, 10]]
-        let g_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-
-        // Construct a list array from the above two
-        let g_list_data = ArrayData::builder(struct_field_g.data_type().clone())
-            .len(5)
-            .add_buffer(g_value_offsets)
-            .add_child_data(g_value.data().clone())
-            // .null_bit_buffer(Buffer::from(vec![0b00011011])) // TODO: add to test after resolving other issues
-            .build();
-        let g = ListArray::from(g_list_data);
-
-        let e = StructArray::from(vec![
-            (struct_field_f, Arc::new(f) as ArrayRef),
-            (struct_field_g, Arc::new(g) as ArrayRef),
-        ]);
-
-        let c = StructArray::from(vec![
-            (struct_field_d, Arc::new(d) as ArrayRef),
-            (struct_field_e, Arc::new(e) as ArrayRef),
-        ]);
-
-        // build a record batch
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
-        )
-        .unwrap();
-
-        roundtrip("test_arrow_writer_complex.parquet", batch);
-    }
-
-    #[test]
-    fn arrow_writer_2_level_struct() {
-        // tests writing <struct<struct<primitive>>
-        let field_c = Field::new("c", DataType::Int32, true);
-        let field_b = Field::new("b", DataType::Struct(vec![field_c]), true);
-        let field_a = Field::new("a", DataType::Struct(vec![field_b.clone()]), true);
-        let schema = Schema::new(vec![field_a.clone()]);
-
-        // create data
-        let c = Int32Array::from(vec![Some(1), None, Some(3), None, None, Some(6)]);
-        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
-            .len(6)
-            .null_bit_buffer(Buffer::from(vec![0b00100111]))
-            .add_child_data(c.data().clone())
-            .build();
-        let b = StructArray::from(b_data);
-        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
-            .len(6)
-            .null_bit_buffer(Buffer::from(vec![0b00101111]))
-            .add_child_data(b.data().clone())
-            .build();
-        let a = StructArray::from(a_data);
-
-        assert_eq!(a.null_count(), 1);
-        assert_eq!(a.column(0).null_count(), 2);
-
-        // build a racord batch
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-
-        roundtrip("test_arrow_writer_2_level_struct.parquet", batch);
-    }
-
-    #[test]
-    fn arrow_writer_2_level_struct_non_null() {
-        // tests writing <struct<struct<primitive>>
-        let field_c = Field::new("c", DataType::Int32, false);
-        let field_b = Field::new("b", DataType::Struct(vec![field_c]), false);
-        let field_a = Field::new("a", DataType::Struct(vec![field_b.clone()]), false);
-        let schema = Schema::new(vec![field_a.clone()]);
-
-        // create data
-        let c = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
-        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
-            .len(6)
-            .add_child_data(c.data().clone())
-            .build();
-        let b = StructArray::from(b_data);
-        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
-            .len(6)
-            .add_child_data(b.data().clone())
-            .build();
-        let a = StructArray::from(a_data);
-
-        assert_eq!(a.null_count(), 0);
-        assert_eq!(a.column(0).null_count(), 0);
-
-        // build a racord batch
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-
-        roundtrip("test_arrow_writer_2_level_struct_non_null.parquet", batch);
-    }
-
-    #[test]
-    #[ignore = "The levels generated are correct, but because of field_a being non-nullable, we cannot write record"]
-    fn arrow_writer_2_level_struct_mixed_null() {
-        // tests writing <struct<struct<primitive>>
-        let field_c = Field::new("c", DataType::Int32, false);
-        let field_b = Field::new("b", DataType::Struct(vec![field_c]), true);
-        let field_a = Field::new("a", DataType::Struct(vec![field_b.clone()]), false);
-        let schema = Schema::new(vec![field_a.clone()]);
-
-        // create data
-        let c = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
-        let b_data = ArrayDataBuilder::new(field_b.data_type().clone())
-            .len(6)
-            .null_bit_buffer(Buffer::from(vec![0b00100111]))
-            .add_child_data(c.data().clone())
-            .build();
-        let b = StructArray::from(b_data);
-        // a intentionally has no null buffer, to test that this is handled correctly
-        let a_data = ArrayDataBuilder::new(field_a.data_type().clone())
-            .len(6)
-            .add_child_data(b.data().clone())
-            .build();
-        let a = StructArray::from(a_data);
-
-        assert_eq!(a.null_count(), 0);
-        assert_eq!(a.column(0).null_count(), 2);
-
-        // build a racord batch
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();
-
-        roundtrip("test_arrow_writer_2_level_struct_mixed_null.parquet", batch);
-    }
-
-    const SMALL_SIZE: usize = 4;
-
-    fn roundtrip(filename: &str, expected_batch: RecordBatch) {
-        let file = get_temp_file(filename, &[]);
-
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            expected_batch.schema(),
-            None,
-        )
-        .expect("Unable to write file");
-        writer.write(&expected_batch).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
-        let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
-
-        let actual_batch = record_batch_reader
-            .next()
-            .expect("No batch found")
-            .expect("Unable to get batch");
-
-        assert_eq!(expected_batch.schema(), actual_batch.schema());
-        assert_eq!(expected_batch.num_columns(), actual_batch.num_columns());
-        assert_eq!(expected_batch.num_rows(), actual_batch.num_rows());
-        for i in 0..expected_batch.num_columns() {
-            let expected_data = expected_batch.column(i).data();
-            let actual_data = actual_batch.column(i).data();
-
-            assert_eq!(expected_data, actual_data);
-            // assert_eq!(expected_data, actual_data, "L: {:#?}\nR: {:#?}", expected_data, actual_data);
-        }
-    }
-
-    fn one_column_roundtrip(filename: &str, values: ArrayRef, nullable: bool) {
-        let schema = Schema::new(vec![Field::new(
-            "col",
-            values.data_type().clone(),
-            nullable,
-        )]);
-        let expected_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
-
-        roundtrip(filename, expected_batch);
-    }
-
-    fn values_required<A, I>(iter: I, filename: &str)
-    where
-        A: From<Vec<I::Item>> + Array + 'static,
-        I: IntoIterator,
-    {
-        let raw_values: Vec<_> = iter.into_iter().collect();
-        let values = Arc::new(A::from(raw_values));
-        one_column_roundtrip(filename, values, false);
-    }
-
-    fn values_optional<A, I>(iter: I, filename: &str)
-    where
-        A: From<Vec<Option<I::Item>>> + Array + 'static,
-        I: IntoIterator,
-    {
-        let optional_raw_values: Vec<_> = iter
-            .into_iter()
-            .enumerate()
-            .map(|(i, v)| if i % 2 == 0 { None } else { Some(v) })
-            .collect();
-        let optional_values = Arc::new(A::from(optional_raw_values));
-        one_column_roundtrip(filename, optional_values, true);
-    }
-
-    fn required_and_optional<A, I>(iter: I, filename: &str)
-    where
-        A: From<Vec<I::Item>> + From<Vec<Option<I::Item>>> + Array + 'static,
-        I: IntoIterator + Clone,
-    {
-        values_required::<A, I>(iter.clone(), filename);
-        values_optional::<A, I>(iter, filename);
-    }
-
-    #[test]
-    fn all_null_primitive_single_column() {
-        let values = Arc::new(Int32Array::from(vec![None; SMALL_SIZE]));
-        one_column_roundtrip("all_null_primitive_single_column", values, true);
-    }
-    #[test]
-    fn null_single_column() {
-        let values = Arc::new(NullArray::new(SMALL_SIZE));
-        one_column_roundtrip("null_single_column", values, true);
-        // null arrays are always nullable, a test with non-nullable nulls fails
-    }
-
-    #[test]
-    fn bool_single_column() {
-        required_and_optional::<BooleanArray, _>(
-            [true, false].iter().cycle().copied().take(SMALL_SIZE),
-            "bool_single_column",
-        );
-    }
-
-    #[test]
-    fn i8_single_column() {
-        required_and_optional::<Int8Array, _>(0..SMALL_SIZE as i8, "i8_single_column");
-    }
-
-    #[test]
-    fn i16_single_column() {
-        required_and_optional::<Int16Array, _>(0..SMALL_SIZE as i16, "i16_single_column");
-    }
-
-    #[test]
-    fn i32_single_column() {
-        required_and_optional::<Int32Array, _>(0..SMALL_SIZE as i32, "i32_single_column");
-    }
-
-    #[test]
-    fn i64_single_column() {
-        required_and_optional::<Int64Array, _>(0..SMALL_SIZE as i64, "i64_single_column");
-    }
-
-    #[test]
-    fn u8_single_column() {
-        required_and_optional::<UInt8Array, _>(0..SMALL_SIZE as u8, "u8_single_column");
-    }
-
-    #[test]
-    fn u16_single_column() {
-        required_and_optional::<UInt16Array, _>(
-            0..SMALL_SIZE as u16,
-            "u16_single_column",
-        );
-    }
-
-    #[test]
-    fn u32_single_column() {
-        required_and_optional::<UInt32Array, _>(
-            0..SMALL_SIZE as u32,
-            "u32_single_column",
-        );
-    }
-
-    #[test]
-    fn u64_single_column() {
-        required_and_optional::<UInt64Array, _>(
-            0..SMALL_SIZE as u64,
-            "u64_single_column",
-        );
-    }
-
-    #[test]
-    fn f32_single_column() {
-        required_and_optional::<Float32Array, _>(
-            (0..SMALL_SIZE).map(|i| i as f32),
-            "f32_single_column",
-        );
-    }
-
-    #[test]
-    fn f64_single_column() {
-        required_and_optional::<Float64Array, _>(
-            (0..SMALL_SIZE).map(|i| i as f64),
-            "f64_single_column",
-        );
-    }
-
-    // The timestamp array types don't implement From<Vec<T>> because they need the timezone
-    // argument, and they also doesn't support building from a Vec<Option<T>>, so call
-    // one_column_roundtrip manually instead of calling required_and_optional for these tests.
-
-    #[test]
-    fn timestamp_second_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampSecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_second_single_column", values, false);
-    }
-
-    #[test]
-    fn timestamp_millisecond_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampMillisecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_millisecond_single_column", values, false);
-    }
-
-    #[test]
-    fn timestamp_microsecond_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampMicrosecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_microsecond_single_column", values, false);
-    }
-
-    #[test]
-    fn timestamp_nanosecond_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE as i64).collect();
-        let values = Arc::new(TimestampNanosecondArray::from_vec(raw_values, None));
-
-        one_column_roundtrip("timestamp_nanosecond_single_column", values, false);
-    }
-
-    #[test]
-    fn date32_single_column() {
-        required_and_optional::<Date32Array, _>(
-            0..SMALL_SIZE as i32,
-            "date32_single_column",
-        );
-    }
-
-    #[test]
-    fn date64_single_column() {
-        // Date64 must be a multiple of 86400000, see ARROW-10925
-        required_and_optional::<Date64Array, _>(
-            (0..(SMALL_SIZE as i64 * 86400000)).step_by(86400000),
-            "date64_single_column",
-        );
-    }
-
-    #[test]
-    fn time32_second_single_column() {
-        required_and_optional::<Time32SecondArray, _>(
-            0..SMALL_SIZE as i32,
-            "time32_second_single_column",
-        );
-    }
-
-    #[test]
-    fn time32_millisecond_single_column() {
-        required_and_optional::<Time32MillisecondArray, _>(
-            0..SMALL_SIZE as i32,
-            "time32_millisecond_single_column",
-        );
-    }
-
-    #[test]
-    fn time64_microsecond_single_column() {
-        required_and_optional::<Time64MicrosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "time64_microsecond_single_column",
-        );
-    }
-
-    #[test]
-    fn time64_nanosecond_single_column() {
-        required_and_optional::<Time64NanosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "time64_nanosecond_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_second_single_column() {
-        required_and_optional::<DurationSecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_second_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_millisecond_single_column() {
-        required_and_optional::<DurationMillisecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_millisecond_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_microsecond_single_column() {
-        required_and_optional::<DurationMicrosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_microsecond_single_column",
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Converting Duration to parquet not supported")]
-    fn duration_nanosecond_single_column() {
-        required_and_optional::<DurationNanosecondArray, _>(
-            0..SMALL_SIZE as i64,
-            "duration_nanosecond_single_column",
-        );
-    }
-
-    #[test]
-    fn interval_year_month_single_column() {
-        required_and_optional::<IntervalYearMonthArray, _>(
-            0..SMALL_SIZE as i32,
-            "interval_year_month_single_column",
-        );
-    }
-
-    #[test]
-    fn interval_day_time_single_column() {
-        required_and_optional::<IntervalDayTimeArray, _>(
-            0..SMALL_SIZE as i64,
-            "interval_day_time_single_column",
-        );
-    }
-
-    #[test]
-    fn binary_single_column() {
-        let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
-        let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
-
-        // BinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
-        values_required::<BinaryArray, _>(many_vecs_iter, "binary_single_column");
-    }
-
-    #[test]
-    fn large_binary_single_column() {
-        let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
-        let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
-
-        // LargeBinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
-        values_required::<LargeBinaryArray, _>(
-            many_vecs_iter,
-            "large_binary_single_column",
-        );
-    }
-
-    #[test]
-    fn fixed_size_binary_single_column() {
-        let mut builder = FixedSizeBinaryBuilder::new(16, 4);
-        builder.append_value(b"0123").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"8910").unwrap();
-        builder.append_value(b"1112").unwrap();
-        let array = Arc::new(builder.finish());
-
-        one_column_roundtrip("fixed_size_binary_single_column", array, true);
-    }
-
-    #[test]
-    fn string_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE).map(|i| i.to_string()).collect();
-        let raw_strs = raw_values.iter().map(|s| s.as_str());
-
-        required_and_optional::<StringArray, _>(raw_strs, "string_single_column");
-    }
-
-    #[test]
-    fn large_string_single_column() {
-        let raw_values: Vec<_> = (0..SMALL_SIZE).map(|i| i.to_string()).collect();
-        let raw_strs = raw_values.iter().map(|s| s.as_str());
-
-        required_and_optional::<LargeStringArray, _>(
-            raw_strs,
-            "large_string_single_column",
-        );
-    }
-
-    #[test]
-    fn list_single_column() {
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_data = ArrayData::builder(DataType::List(Box::new(Field::new(
-            "item",
-            DataType::Int32,
-            true, // TODO: why does this fail when false? Is it related to logical nulls?
-        ))))
-        .len(5)
-        .add_buffer(a_value_offsets)
-        .null_bit_buffer(Buffer::from(vec![0b00011011]))
-        .add_child_data(a_values.data().clone())
-        .build();
-
-        assert_eq!(a_list_data.null_count(), 1);
-
-        let a = ListArray::from(a_list_data);
-        let values = Arc::new(a);
-
-        one_column_roundtrip("list_single_column", values, true);
-    }
-
-    #[test]
-    fn large_list_single_column() {
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0i64, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_data = ArrayData::builder(DataType::LargeList(Box::new(Field::new(
-            "large_item",
-            DataType::Int32,
-            true,
-        ))))
-        .len(5)
-        .add_buffer(a_value_offsets)
-        .add_child_data(a_values.data().clone())
-        .null_bit_buffer(Buffer::from(vec![0b00011011]))
-        .build();
-
-        // I think this setup is incorrect because this should pass
-        assert_eq!(a_list_data.null_count(), 1);
-
-        let a = LargeListArray::from(a_list_data);
-        let values = Arc::new(a);
-
-        one_column_roundtrip("large_list_single_column", values, true);
-    }
-
-    #[test]
-    fn struct_single_column() {
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let struct_field_a = Field::new("f", DataType::Int32, false);
-        let s = StructArray::from(vec![(struct_field_a, Arc::new(a_values) as ArrayRef)]);
-
-        let values = Arc::new(s);
-        one_column_roundtrip("struct_single_column", values, false);
-    }
-
-    #[test]
-    fn arrow_writer_string_dictionary() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![Field::new_dict(
-            "dictionary",
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-            true,
-            42,
-            true,
-        )]));
-
-        // create some data
-        let d: Int32DictionaryArray = [Some("alpha"), None, Some("beta"), Some("alpha")]
-            .iter()
-            .copied()
-            .collect();
-
-        // build a record batch
-        let expected_batch = RecordBatch::try_new(schema, vec![Arc::new(d)]).unwrap();
-
-        roundtrip(
-            "test_arrow_writer_string_dictionary.parquet",
-            expected_batch,
-        );
-    }
-
-    #[test]
-    fn arrow_writer_primitive_dictionary() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![Field::new_dict(
-            "dictionary",
-            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt32)),
-            true,
-            42,
-            true,
-        )]));
-
-        // create some data
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        builder.append(12345678).unwrap();
-        let d = builder.finish();
-
-        // build a record batch
-        let expected_batch = RecordBatch::try_new(schema, vec![Arc::new(d)]).unwrap();
-
-        roundtrip(
-            "test_arrow_writer_primitive_dictionary.parquet",
-            expected_batch,
-        );
-    }
-
-    #[test]
-    fn arrow_writer_string_dictionary_unsigned_index() {
-        // define schema
-        let schema = Arc::new(Schema::new(vec![Field::new_dict(
-            "dictionary",
-            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
-            true,
-            42,
-            true,
-        )]));
-
-        // create some data
-        let d: UInt8DictionaryArray = [Some("alpha"), None, Some("beta"), Some("alpha")]
-            .iter()
-            .copied()
-            .collect();
-
-        // build a record batch
-        let expected_batch = RecordBatch::try_new(schema, vec![Arc::new(d)]).unwrap();
-
-        roundtrip(
-            "test_arrow_writer_string_dictionary_unsigned_index.parquet",
-            expected_batch,
-        );
-    }
-}
diff --git a/rust/parquet/src/arrow/converter.rs b/rust/parquet/src/arrow/converter.rs
deleted file mode 100644
index 1672be9c046..00000000000
--- a/rust/parquet/src/arrow/converter.rs
+++ /dev/null
@@ -1,454 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data_type::{ByteArray, DataType, FixedLenByteArray, Int96};
-// TODO: clean up imports (best done when there are few moving parts)
-use arrow::array::{
-    Array, ArrayRef, BinaryBuilder, DecimalBuilder, FixedSizeBinaryBuilder,
-    IntervalDayTimeArray, IntervalDayTimeBuilder, IntervalYearMonthArray,
-    IntervalYearMonthBuilder, LargeBinaryBuilder, LargeStringBuilder, PrimitiveBuilder,
-    PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder,
-};
-use arrow::compute::cast;
-use std::convert::{From, TryInto};
-use std::sync::Arc;
-
-use crate::errors::Result;
-use arrow::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType};
-
-use arrow::array::{
-    BinaryArray, DecimalArray, DictionaryArray, FixedSizeBinaryArray, LargeBinaryArray,
-    LargeStringArray, PrimitiveArray, StringArray, TimestampNanosecondArray,
-};
-use std::marker::PhantomData;
-
-use crate::data_type::Int32Type as ParquetInt32Type;
-use arrow::datatypes::Int32Type;
-
-/// A converter is used to consume record reader's content and convert it to arrow
-/// primitive array.
-pub trait Converter<S, T> {
-    /// This method converts record reader's buffered content into arrow array.
-    /// It will consume record reader's data, but will not reset record reader's
-    /// state.
-    fn convert(&self, source: S) -> Result<T>;
-}
-
-pub struct FixedSizeArrayConverter {
-    byte_width: i32,
-}
-
-impl FixedSizeArrayConverter {
-    pub fn new(byte_width: i32) -> Self {
-        Self { byte_width }
-    }
-}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, FixedSizeBinaryArray>
-    for FixedSizeArrayConverter
-{
-    fn convert(
-        &self,
-        source: Vec<Option<FixedLenByteArray>>,
-    ) -> Result<FixedSizeBinaryArray> {
-        let mut builder = FixedSizeBinaryBuilder::new(source.len(), self.byte_width);
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.data()),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct DecimalArrayConverter {
-    precision: i32,
-    scale: i32,
-}
-
-impl DecimalArrayConverter {
-    pub fn new(precision: i32, scale: i32) -> Self {
-        Self { precision, scale }
-    }
-
-    fn from_bytes_to_i128(b: &[u8]) -> i128 {
-        assert!(b.len() <= 16, "DecimalArray supports only up to size 16");
-        let first_bit = b[0] & 128u8 == 128u8;
-        let mut result = if first_bit { [255u8; 16] } else { [0u8; 16] };
-        for (i, v) in b.iter().enumerate() {
-            result[i + (16 - b.len())] = *v;
-        }
-        i128::from_be_bytes(result)
-    }
-}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, DecimalArray> for DecimalArrayConverter {
-    fn convert(&self, source: Vec<Option<FixedLenByteArray>>) -> Result<DecimalArray> {
-        let mut builder = DecimalBuilder::new(
-            source.len(),
-            self.precision as usize,
-            self.scale as usize,
-        );
-        for v in source {
-            match v {
-                Some(array) => {
-                    builder.append_value(Self::from_bytes_to_i128(array.data()))
-                }
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-/// An Arrow Interval converter, which reads the first 4 bytes of a Parquet interval,
-/// and interprets it as an i32 value representing the Arrow YearMonth value
-pub struct IntervalYearMonthArrayConverter {}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, IntervalYearMonthArray>
-    for IntervalYearMonthArrayConverter
-{
-    fn convert(
-        &self,
-        source: Vec<Option<FixedLenByteArray>>,
-    ) -> Result<IntervalYearMonthArray> {
-        let mut builder = IntervalYearMonthBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(i32::from_le_bytes(
-                    array.data()[0..4].try_into().unwrap(),
-                )),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-/// An Arrow Interval converter, which reads the last 8 bytes of a Parquet interval,
-/// and interprets it as an i32 value representing the Arrow DayTime value
-pub struct IntervalDayTimeArrayConverter {}
-
-impl Converter<Vec<Option<FixedLenByteArray>>, IntervalDayTimeArray>
-    for IntervalDayTimeArrayConverter
-{
-    fn convert(
-        &self,
-        source: Vec<Option<FixedLenByteArray>>,
-    ) -> Result<IntervalDayTimeArray> {
-        let mut builder = IntervalDayTimeBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(i64::from_le_bytes(
-                    array.data()[4..12].try_into().unwrap(),
-                )),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct Int96ArrayConverter {
-    pub timezone: Option<String>,
-}
-
-impl Converter<Vec<Option<Int96>>, TimestampNanosecondArray> for Int96ArrayConverter {
-    fn convert(&self, source: Vec<Option<Int96>>) -> Result<TimestampNanosecondArray> {
-        Ok(TimestampNanosecondArray::from_opt_vec(
-            source
-                .into_iter()
-                .map(|int96| int96.map(|val| val.to_i64() * 1_000_000))
-                .collect(),
-            self.timezone.clone(),
-        ))
-    }
-}
-
-pub struct Utf8ArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, StringArray> for Utf8ArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<StringArray> {
-        let data_size = source
-            .iter()
-            .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
-            .sum();
-
-        let mut builder = StringBuilder::with_capacity(source.len(), data_size);
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.as_utf8()?),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct LargeUtf8ArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, LargeStringArray> for LargeUtf8ArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeStringArray> {
-        let data_size = source
-            .iter()
-            .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
-            .sum();
-
-        let mut builder = LargeStringBuilder::with_capacity(source.len(), data_size);
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.as_utf8()?),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct BinaryArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, BinaryArray> for BinaryArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<BinaryArray> {
-        let mut builder = BinaryBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.data()),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct LargeBinaryArrayConverter {}
-
-impl Converter<Vec<Option<ByteArray>>, LargeBinaryArray> for LargeBinaryArrayConverter {
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeBinaryArray> {
-        let mut builder = LargeBinaryBuilder::new(source.len());
-        for v in source {
-            match v {
-                Some(array) => builder.append_value(array.data()),
-                None => builder.append_null(),
-            }?
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct StringDictionaryArrayConverter {}
-
-impl<K: ArrowDictionaryKeyType> Converter<Vec<Option<ByteArray>>, DictionaryArray<K>>
-    for StringDictionaryArrayConverter
-{
-    fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<DictionaryArray<K>> {
-        let data_size = source
-            .iter()
-            .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0))
-            .sum();
-
-        let keys_builder = PrimitiveBuilder::<K>::new(source.len());
-        let values_builder = StringBuilder::with_capacity(source.len(), data_size);
-
-        let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-        for v in source {
-            match v {
-                Some(array) => {
-                    let _ = builder.append(array.as_utf8()?)?;
-                }
-                None => builder.append_null()?,
-            }
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub struct DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
-{
-    _dict_value_source_marker: PhantomData<DictValueSourceType>,
-    _dict_value_target_marker: PhantomData<DictValueTargetType>,
-    _parquet_marker: PhantomData<ParquetType>,
-}
-
-impl<DictValueSourceType, DictValueTargetType, ParquetType>
-    DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
-{
-    pub fn new() -> Self {
-        Self {
-            _dict_value_source_marker: PhantomData,
-            _dict_value_target_marker: PhantomData,
-            _parquet_marker: PhantomData,
-        }
-    }
-}
-
-impl<K, DictValueSourceType, DictValueTargetType, ParquetType>
-    Converter<Vec<Option<<ParquetType as DataType>::T>>, DictionaryArray<K>>
-    for DictionaryArrayConverter<DictValueSourceType, DictValueTargetType, ParquetType>
-where
-    K: ArrowPrimitiveType,
-    DictValueSourceType: ArrowPrimitiveType,
-    DictValueTargetType: ArrowPrimitiveType,
-    ParquetType: DataType,
-    PrimitiveArray<DictValueSourceType>: From<Vec<Option<<ParquetType as DataType>::T>>>,
-{
-    fn convert(
-        &self,
-        source: Vec<Option<<ParquetType as DataType>::T>>,
-    ) -> Result<DictionaryArray<K>> {
-        let keys_builder = PrimitiveBuilder::<K>::new(source.len());
-        let values_builder = PrimitiveBuilder::<DictValueTargetType>::new(source.len());
-
-        let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);
-
-        let source_array: Arc<dyn Array> =
-            Arc::new(PrimitiveArray::<DictValueSourceType>::from(source));
-        let target_array = cast(&source_array, &DictValueTargetType::DATA_TYPE)?;
-        let target = target_array
-            .as_any()
-            .downcast_ref::<PrimitiveArray<DictValueTargetType>>()
-            .unwrap();
-
-        for i in 0..target.len() {
-            if target.is_null(i) {
-                builder.append_null()?;
-            } else {
-                let _ = builder.append(target.value(i))?;
-            }
-        }
-
-        Ok(builder.finish())
-    }
-}
-
-pub type Utf8Converter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, StringArray, Utf8ArrayConverter>;
-pub type LargeUtf8Converter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, LargeStringArray, LargeUtf8ArrayConverter>;
-pub type BinaryConverter =
-    ArrayRefConverter<Vec<Option<ByteArray>>, BinaryArray, BinaryArrayConverter>;
-pub type LargeBinaryConverter = ArrayRefConverter<
-    Vec<Option<ByteArray>>,
-    LargeBinaryArray,
-    LargeBinaryArrayConverter,
->;
-pub type StringDictionaryConverter<T> = ArrayRefConverter<
-    Vec<Option<ByteArray>>,
-    DictionaryArray<T>,
-    StringDictionaryArrayConverter,
->;
-pub type DictionaryConverter<K, SV, TV, P> = ArrayRefConverter<
-    Vec<Option<<P as DataType>::T>>,
-    DictionaryArray<K>,
-    DictionaryArrayConverter<SV, TV, P>,
->;
-pub type PrimitiveDictionaryConverter<K, V> = ArrayRefConverter<
-    Vec<Option<<ParquetInt32Type as DataType>::T>>,
-    DictionaryArray<K>,
-    DictionaryArrayConverter<Int32Type, V, ParquetInt32Type>,
->;
-
-pub type Int96Converter =
-    ArrayRefConverter<Vec<Option<Int96>>, TimestampNanosecondArray, Int96ArrayConverter>;
-
-pub type FixedLenBinaryConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    FixedSizeBinaryArray,
-    FixedSizeArrayConverter,
->;
-pub type IntervalYearMonthConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    IntervalYearMonthArray,
-    IntervalYearMonthArrayConverter,
->;
-pub type IntervalDayTimeConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    IntervalDayTimeArray,
-    IntervalDayTimeArrayConverter,
->;
-
-pub type DecimalConverter = ArrayRefConverter<
-    Vec<Option<FixedLenByteArray>>,
-    DecimalArray,
-    DecimalArrayConverter,
->;
-
-pub struct FromConverter<S, T> {
-    _source: PhantomData<S>,
-    _dest: PhantomData<T>,
-}
-
-impl<S, T> FromConverter<S, T>
-where
-    T: From<S>,
-{
-    pub fn new() -> Self {
-        Self {
-            _source: PhantomData,
-            _dest: PhantomData,
-        }
-    }
-}
-
-impl<S, T> Converter<S, T> for FromConverter<S, T>
-where
-    T: From<S>,
-{
-    fn convert(&self, source: S) -> Result<T> {
-        Ok(T::from(source))
-    }
-}
-
-pub struct ArrayRefConverter<S, A, C> {
-    _source: PhantomData<S>,
-    _array: PhantomData<A>,
-    converter: C,
-}
-
-impl<S, A, C> ArrayRefConverter<S, A, C>
-where
-    A: Array + 'static,
-    C: Converter<S, A> + 'static,
-{
-    pub fn new(converter: C) -> Self {
-        Self {
-            _source: PhantomData,
-            _array: PhantomData,
-            converter,
-        }
-    }
-}
-
-impl<S, A, C> Converter<S, ArrayRef> for ArrayRefConverter<S, A, C>
-where
-    A: Array + 'static,
-    C: Converter<S, A> + 'static,
-{
-    fn convert(&self, source: S) -> Result<ArrayRef> {
-        self.converter
-            .convert(source)
-            .map(|array| Arc::new(array) as ArrayRef)
-    }
-}
diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs
deleted file mode 100644
index 2168670bb59..00000000000
--- a/rust/parquet/src/arrow/levels.rs
+++ /dev/null
@@ -1,1411 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet definition and repetition levels
-//!
-//! Contains the algorithm for computing definition and repetition levels.
-//! The algorithm works by tracking the slots of an array that should
-//! ultimately be populated when writing to Parquet.
-//! Parquet achieves nesting through definition levels and repetition levels \[1\].
-//! Definition levels specify how many optional fields in the part for the column
-//! are defined.
-//! Repetition levels specify at what repeated field (list) in the path a column
-//! is defined.
-//!
-//! In a nested data structure such as `a.b.c`, one can see levels as defining
-//! whether a record is defined at `a`, `a.b`, or `a.b.c`.
-//! Optional fields are nullable fields, thus if all 3 fields
-//! are nullable, the maximum definition could be = 3 if there are no lists.
-//!
-//! The algorithm in this module computes the necessary information to enable
-//! the writer to keep track of which columns are at which levels, and to extract
-//! the correct values at the correct slots from Arrow arrays.
-//!
-//! It works by walking a record batch's arrays, keeping track of what values
-//! are non-null, their positions and computing what their levels are.
-//!
-//! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding)
-
-use arrow::array::{make_array, ArrayRef, StructArray};
-use arrow::datatypes::{DataType, Field};
-use arrow::record_batch::RecordBatch;
-
-/// Keeps track of the level information per array that is needed to write an Arrow array to Parquet.
-///
-/// When a nested schema is traversed, intermediate [LevelInfo] structs are created to track
-/// the state of parent arrays. When a primitive Arrow array is encountered, a final [LevelInfo]
-/// is created, and this is what is used to index into the array when writing data to Parquet.
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub(crate) struct LevelInfo {
-    /// Array's definition levels
-    pub definition: Vec<i16>,
-    /// Array's optional repetition levels
-    pub repetition: Option<Vec<i16>>,
-    /// Array's offsets, 64-bit is used to accommodate large offset arrays
-    pub array_offsets: Vec<i64>,
-    // TODO: Convert to an Arrow Buffer after ARROW-10766 is merged.
-    /// Array's logical validity mask, whcih gets unpacked for list children.
-    /// If the parent of an array is null, all children are logically treated as
-    /// null. This mask keeps track of that.
-    ///
-    pub array_mask: Vec<bool>,
-    /// The maximum definition at this level, 0 at the record batch
-    pub max_definition: i16,
-    /// Whether this array or any of its parents is a list
-    pub is_list: bool,
-    /// Whether the current array is nullable (affects definition levels)
-    pub is_nullable: bool,
-}
-
-impl LevelInfo {
-    /// Create a new [LevelInfo] from a record batch.
-    ///
-    /// This is a convenience function to populate the starting point of the traversal.
-    pub(crate) fn new_from_batch(batch: &RecordBatch) -> Self {
-        let num_rows = batch.num_rows();
-        Self {
-            // a batch has no definition level yet
-            definition: vec![0; num_rows],
-            // a batch has no repetition as it is not a list
-            repetition: None,
-            // a batch has sequential offsets, should be num_rows + 1
-            array_offsets: (0..=(num_rows as i64)).collect(),
-            // all values at a batch-level are non-null
-            array_mask: vec![true; num_rows],
-            max_definition: 0,
-            is_list: false,
-            // a batch is treated as nullable even though it has no nulls,
-            // this is required to compute nested type levels correctly
-            is_nullable: false,
-        }
-    }
-
-    /// Compute nested levels of the Arrow array, recursing into lists and structs.
-    ///
-    /// Returns a list of `LevelInfo`, where each level is for nested primitive arrays.
-    pub(crate) fn calculate_array_levels(
-        &self,
-        array: &ArrayRef,
-        field: &Field,
-    ) -> Vec<Self> {
-        let (array_offsets, array_mask) = Self::get_array_offsets_and_masks(array);
-        match array.data_type() {
-            DataType::Null => vec![Self {
-                definition: self.definition.clone(),
-                repetition: self.repetition.clone(),
-                array_offsets: self.array_offsets.clone(),
-                array_mask,
-                max_definition: self.max_definition.max(1),
-                is_list: self.is_list,
-                is_nullable: true, // always nullable as all values are nulls
-            }],
-            DataType::Boolean
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Interval(_)
-            | DataType::Binary
-            | DataType::LargeBinary
-            | DataType::Decimal(_, _)
-            | DataType::FixedSizeBinary(_) => {
-                // we return a vector of 1 value to represent the primitive
-                vec![self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    false,
-                    field.is_nullable(),
-                )]
-            }
-            DataType::List(list_field) | DataType::LargeList(list_field) => {
-                // Calculate the list level
-                let list_level = self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    true,
-                    field.is_nullable(),
-                );
-
-                // Construct the child array of the list, and get its offset + mask
-                let array_data = array.data();
-                let child_data = array_data.child_data().get(0).unwrap();
-                let child_array = make_array(child_data.clone());
-                let (child_offsets, child_mask) =
-                    Self::get_array_offsets_and_masks(&child_array);
-
-                match child_array.data_type() {
-                    // TODO: The behaviour of a <list<null>> is untested
-                    DataType::Null => vec![list_level],
-                    DataType::Boolean
-                    | DataType::Int8
-                    | DataType::Int16
-                    | DataType::Int32
-                    | DataType::Int64
-                    | DataType::UInt8
-                    | DataType::UInt16
-                    | DataType::UInt32
-                    | DataType::UInt64
-                    | DataType::Float16
-                    | DataType::Float32
-                    | DataType::Float64
-                    | DataType::Timestamp(_, _)
-                    | DataType::Date32
-                    | DataType::Date64
-                    | DataType::Time32(_)
-                    | DataType::Time64(_)
-                    | DataType::Duration(_)
-                    | DataType::Interval(_)
-                    | DataType::Binary
-                    | DataType::LargeBinary
-                    | DataType::Utf8
-                    | DataType::LargeUtf8
-                    | DataType::Dictionary(_, _)
-                    | DataType::Decimal(_, _)
-                    | DataType::FixedSizeBinary(_) => {
-                        vec![list_level.calculate_child_levels(
-                            child_offsets,
-                            child_mask,
-                            false,
-                            list_field.is_nullable(),
-                        )]
-                    }
-                    DataType::List(_) | DataType::LargeList(_) | DataType::Struct(_) => {
-                        list_level.calculate_array_levels(&child_array, list_field)
-                    }
-                    DataType::FixedSizeList(_, _) => unimplemented!(),
-                    DataType::Union(_) => unimplemented!(),
-                }
-            }
-            DataType::FixedSizeList(_, _) => unimplemented!(),
-            DataType::Struct(struct_fields) => {
-                let struct_array: &StructArray = array
-                    .as_any()
-                    .downcast_ref::<StructArray>()
-                    .expect("Unable to get struct array");
-                let struct_level = self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    false,
-                    field.is_nullable(),
-                );
-                let mut struct_levels = vec![];
-                struct_array
-                    .columns()
-                    .into_iter()
-                    .zip(struct_fields)
-                    .for_each(|(child_array, child_field)| {
-                        let mut levels =
-                            struct_level.calculate_array_levels(child_array, child_field);
-                        struct_levels.append(&mut levels);
-                    });
-                struct_levels
-            }
-            DataType::Union(_) => unimplemented!(),
-            DataType::Dictionary(_, _) => {
-                // Need to check for these cases not implemented in C++:
-                // - "Writing DictionaryArray with nested dictionary type not yet supported"
-                // - "Writing DictionaryArray with null encoded in dictionary type not yet supported"
-                // vec![self.get_primitive_def_levels(array, field, array_mask)]
-                vec![self.calculate_child_levels(
-                    array_offsets,
-                    array_mask,
-                    false,
-                    field.is_nullable(),
-                )]
-            }
-        }
-    }
-
-    /// Calculate child/leaf array levels.
-    ///
-    /// The algorithm works by incrementing definitions of array values based on whether:
-    /// - a value is optional or required (is_nullable)
-    /// - a list value is repeated + optional or required (is_list)
-    ///
-    /// A record batch always starts at a populated definition = level 0.
-    /// When a batch only has a primitive, i.e. `<batch<primitive[a]>>, column `a`
-    /// can only have a maximum level of 1 if it is not null.
-    /// If it is not null, we increment by 1, such that the null slots will = level 1.
-    /// The above applies to types that have no repetition (anything not a list or map).
-    ///
-    /// If a batch has lists, then we increment by up to 2 levels:
-    /// - 1 level for the list (repeated)
-    /// - 1 level if the list itself is nullable (optional)
-    ///
-    /// A list's child then gets incremented using the above rules.
-    ///
-    /// *Exceptions*
-    ///
-    /// There are 2 exceptions from the above rules:
-    ///
-    /// 1. When at the root of the schema: We always increment the
-    /// level regardless of whether the child is nullable or not. If we do not do
-    /// this, we could have a non-nullable array having a definition of 0.
-    ///
-    /// 2. List parent, non-list child: We always increment the level in this case,
-    /// regardless of whether the child is nullable or not.
-    ///
-    /// *Examples*
-    ///
-    /// A batch with only a primitive that's non-nullable. `<primitive[required]>`:
-    /// * We don't increment the definition level as the array is not optional.
-    /// * This would leave us with a definition of 0, so the first exception applies.
-    /// * The definition level becomes 1.
-    ///
-    /// A batch with only a primitive that's nullable. `<primitive[optional]>`:
-    /// * The definition level becomes 1, as we increment it once.
-    ///
-    /// A batch with a single non-nullable list (both list and child not null):
-    /// * We calculate the level twice, for the list, and for the child.
-    /// * At the list, the level becomes 1, where 0 indicates that the list is
-    ///  empty, and 1 says it's not (determined through offsets).
-    /// * At the primitive level, the second exception applies. The level becomes 2.
-    fn calculate_child_levels(
-        &self,
-        // we use 64-bit offsets to also accommodate large arrays
-        array_offsets: Vec<i64>,
-        array_mask: Vec<bool>,
-        is_list: bool,
-        is_nullable: bool,
-    ) -> Self {
-        let min_len = *(array_offsets.last().unwrap()) as usize;
-        let mut definition = Vec::with_capacity(min_len);
-        let mut repetition = Vec::with_capacity(min_len);
-        let mut merged_array_mask = Vec::with_capacity(min_len);
-
-        // determine the total level increment based on data types
-        let max_definition = match is_list {
-            false => {
-                // first exception, start of a batch, and not list
-                if self.max_definition == 0 {
-                    1
-                } else if self.is_list {
-                    // second exception, always increment after a list
-                    self.max_definition + 1
-                } else {
-                    self.max_definition + is_nullable as i16
-                }
-            }
-            true => self.max_definition + 1 + is_nullable as i16,
-        };
-
-        match (self.is_list, is_list) {
-            (false, false) => {
-                self.definition
-                    .iter()
-                    .zip(array_mask.into_iter().zip(&self.array_mask))
-                    .for_each(|(def, (child_mask, parent_mask))| {
-                        merged_array_mask.push(*parent_mask && child_mask);
-                        match (parent_mask, child_mask) {
-                            (true, true) => {
-                                definition.push(max_definition);
-                            }
-                            (true, false) => {
-                                // The child is only legally null if its array is nullable.
-                                // Thus parent's max_definition is lower
-                                definition.push(if *def <= self.max_definition {
-                                    *def
-                                } else {
-                                    self.max_definition
-                                });
-                            }
-                            // if the parent was false, retain its definitions
-                            (false, _) => {
-                                definition.push(*def);
-                            }
-                        }
-                    });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: self.repetition.clone(), // it's None
-                    array_offsets,
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: false,
-                    is_nullable,
-                }
-            }
-            (true, true) => {
-                // parent is a list or descendant of a list, and child is a list
-                let reps = self.repetition.clone().unwrap();
-                // Calculate the 2 list hierarchy definitions in advance
-                // List is not empty, but null
-                let l2 = max_definition - is_nullable as i16;
-                // List is not empty, and not null
-                let l3 = max_definition;
-
-                let mut nulls_seen = 0;
-
-                self.array_offsets.windows(2).for_each(|w| {
-                    let start = w[0] as usize;
-                    let end = w[1] as usize;
-                    let parent_len = end - start;
-
-                    if parent_len == 0 {
-                        // If the parent length is 0, there won't be a slot for the child
-                        let index = start + nulls_seen;
-                        definition.push(self.definition[index]);
-                        repetition.push(0);
-                        merged_array_mask.push(self.array_mask[index]);
-                        nulls_seen += 1;
-                    } else {
-                        (start..end).for_each(|parent_index| {
-                            let index = parent_index + nulls_seen;
-
-                            // parent is either defined at this level, or earlier
-                            let parent_def = self.definition[index];
-                            let parent_rep = reps[index];
-                            let parent_mask = self.array_mask[index];
-
-                            // valid parent, index into children
-                            let child_start = array_offsets[parent_index] as usize;
-                            let child_end = array_offsets[parent_index + 1] as usize;
-                            let child_len = child_end - child_start;
-                            let child_mask = array_mask[parent_index];
-                            let merged_mask = parent_mask && child_mask;
-
-                            if child_len == 0 {
-                                definition.push(parent_def);
-                                repetition.push(parent_rep);
-                                merged_array_mask.push(merged_mask);
-                            } else {
-                                (child_start..child_end).for_each(|child_index| {
-                                    let rep = match (
-                                        parent_index == start,
-                                        child_index == child_start,
-                                    ) {
-                                        (true, true) => parent_rep,
-                                        (true, false) => parent_rep + 2,
-                                        (false, true) => parent_rep,
-                                        (false, false) => parent_rep + 1,
-                                    };
-
-                                    definition.push(if !parent_mask {
-                                        parent_def
-                                    } else if child_mask {
-                                        l3
-                                    } else {
-                                        l2
-                                    });
-                                    repetition.push(rep);
-                                    merged_array_mask.push(merged_mask);
-                                });
-                            }
-                        });
-                    }
-                });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: Some(repetition),
-                    array_offsets,
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: true,
-                    is_nullable,
-                }
-            }
-            (true, false) => {
-                // List and primitive (or struct).
-                // The list can have more values than the primitive, indicating that there
-                // are slots where the list is empty. We use a counter to track this behaviour.
-                let mut nulls_seen = 0;
-
-                // let child_max_definition = list_max_definition + is_nullable as i16;
-                // child values are a function of parent list offsets
-                let reps = self.repetition.as_deref().unwrap();
-                self.array_offsets.windows(2).for_each(|w| {
-                    let start = w[0] as usize;
-                    let end = w[1] as usize;
-                    let parent_len = end - start;
-
-                    if parent_len == 0 {
-                        let index = start + nulls_seen;
-                        definition.push(self.definition[index]);
-                        repetition.push(reps[index]);
-                        merged_array_mask.push(self.array_mask[index]);
-                        nulls_seen += 1;
-                    } else {
-                        // iterate through the array, adjusting child definitions for nulls
-                        (start..end).for_each(|child_index| {
-                            let index = child_index + nulls_seen;
-                            let child_mask = array_mask[child_index];
-                            let parent_mask = self.array_mask[index];
-                            let parent_def = self.definition[index];
-
-                            if !parent_mask || parent_def < self.max_definition {
-                                definition.push(parent_def);
-                                repetition.push(reps[index]);
-                                merged_array_mask.push(parent_mask);
-                            } else {
-                                definition.push(max_definition - !child_mask as i16);
-                                repetition.push(reps[index]);
-                                merged_array_mask.push(child_mask);
-                            }
-                        });
-                    }
-                });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: Some(repetition),
-                    array_offsets: self.array_offsets.clone(),
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: true,
-                    is_nullable,
-                }
-            }
-            (false, true) => {
-                // Encountering a list for the first time.
-                // Calculate the 2 list hierarchy definitions in advance
-
-                // List is not empty, but null (if nullable)
-                let l2 = max_definition - is_nullable as i16;
-                // List is not empty, and not null
-                let l3 = max_definition;
-
-                self.definition
-                    .iter()
-                    .enumerate()
-                    .for_each(|(parent_index, def)| {
-                        let child_from = array_offsets[parent_index];
-                        let child_to = array_offsets[parent_index + 1];
-                        let child_len = child_to - child_from;
-                        let child_mask = array_mask[parent_index];
-                        let parent_mask = self.array_mask[parent_index];
-
-                        match (parent_mask, child_len) {
-                            (true, 0) => {
-                                // empty slot that is valid, i.e. {"parent": {"child": [] } }
-                                definition.push(if child_mask {
-                                    l2
-                                } else {
-                                    self.max_definition
-                                });
-                                repetition.push(0);
-                                merged_array_mask.push(child_mask);
-                            }
-                            (false, 0) => {
-                                definition.push(*def);
-                                repetition.push(0);
-                                merged_array_mask.push(child_mask);
-                            }
-                            (true, _) => {
-                                (child_from..child_to).for_each(|child_index| {
-                                    definition.push(if child_mask { l3 } else { l2 });
-                                    // mark the first child slot as 0, and the next as 1
-                                    repetition.push(if child_index == child_from {
-                                        0
-                                    } else {
-                                        1
-                                    });
-                                    merged_array_mask.push(child_mask);
-                                });
-                            }
-                            (false, _) => {
-                                (child_from..child_to).for_each(|child_index| {
-                                    definition.push(*def);
-                                    // mark the first child slot as 0, and the next as 1
-                                    repetition.push(if child_index == child_from {
-                                        0
-                                    } else {
-                                        1
-                                    });
-                                    merged_array_mask.push(false);
-                                });
-                            }
-                        }
-                    });
-
-                debug_assert_eq!(definition.len(), merged_array_mask.len());
-
-                Self {
-                    definition,
-                    repetition: Some(repetition),
-                    array_offsets,
-                    array_mask: merged_array_mask,
-                    max_definition,
-                    is_list: true,
-                    is_nullable,
-                }
-            }
-        }
-    }
-
-    /// Get the offsets of an array as 64-bit values, and validity masks as booleans
-    /// - Primitive, binary and struct arrays' offsets will be a sequence, masks obtained
-    ///   from validity bitmap
-    /// - List array offsets will be the value offsets, masks are computed from offsets
-    fn get_array_offsets_and_masks(array: &ArrayRef) -> (Vec<i64>, Vec<bool>) {
-        match array.data_type() {
-            DataType::Null
-            | DataType::Boolean
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64
-            | DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Float16
-            | DataType::Float32
-            | DataType::Float64
-            | DataType::Timestamp(_, _)
-            | DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_)
-            | DataType::Duration(_)
-            | DataType::Interval(_)
-            | DataType::Binary
-            | DataType::LargeBinary
-            | DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Struct(_)
-            | DataType::Dictionary(_, _)
-            | DataType::Decimal(_, _) => {
-                let array_mask = match array.data().null_buffer() {
-                    Some(buf) => get_bool_array_slice(buf, array.offset(), array.len()),
-                    None => vec![true; array.len()],
-                };
-                ((0..=(array.len() as i64)).collect(), array_mask)
-            }
-            DataType::List(_) => {
-                let data = array.data();
-                let offsets = unsafe { data.buffers()[0].typed_data::<i32>() };
-                let offsets = offsets
-                    .to_vec()
-                    .into_iter()
-                    .map(|v| v as i64)
-                    .collect::<Vec<i64>>();
-                let masks = offsets.windows(2).map(|w| w[1] > w[0]).collect();
-                (offsets, masks)
-            }
-            DataType::LargeList(_) => {
-                let offsets =
-                    unsafe { array.data().buffers()[0].typed_data::<i64>() }.to_vec();
-                let masks = offsets.windows(2).map(|w| w[1] > w[0]).collect();
-                (offsets, masks)
-            }
-            DataType::FixedSizeBinary(value_len) => {
-                let array_mask = match array.data().null_buffer() {
-                    Some(buf) => get_bool_array_slice(buf, array.offset(), array.len()),
-                    None => vec![true; array.len()],
-                };
-                let value_len = *value_len as i64;
-                (
-                    (0..=(array.len() as i64)).map(|v| v * value_len).collect(),
-                    array_mask,
-                )
-            }
-            DataType::FixedSizeList(_, _) | DataType::Union(_) => {
-                unimplemented!("Getting offsets not yet implemented")
-            }
-        }
-    }
-
-    /// Given a level's information, calculate the offsets required to index an array correctly.
-    pub(crate) fn filter_array_indices(&self) -> Vec<usize> {
-        // happy path if not dealing with lists
-        if !self.is_list {
-            return self
-                .definition
-                .iter()
-                .enumerate()
-                .filter_map(|(i, def)| {
-                    if *def == self.max_definition {
-                        Some(i)
-                    } else {
-                        None
-                    }
-                })
-                .collect();
-        }
-        let mut filtered = vec![];
-        // remove slots that are false from definition_mask
-        let mut index = 0;
-        self.definition.iter().for_each(|def| {
-            if *def == self.max_definition {
-                filtered.push(index);
-            }
-            if *def >= self.max_definition - self.is_nullable as i16 {
-                index += 1;
-            }
-        });
-        filtered
-    }
-}
-
-/// Convert an Arrow buffer to a boolean array slice
-/// TODO: this was created for buffers, so might not work for bool array, might be slow too
-#[inline]
-fn get_bool_array_slice(
-    buffer: &arrow::buffer::Buffer,
-    offset: usize,
-    len: usize,
-) -> Vec<bool> {
-    let data = buffer.as_slice();
-    (offset..(len + offset))
-        .map(|i| arrow::util::bit_util::get_bit(data, i))
-        .collect()
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::{
-        array::ListArray,
-        array::{Array, ArrayData, Int32Array},
-        buffer::Buffer,
-        datatypes::Schema,
-    };
-    use arrow::{
-        array::{Float32Array, Float64Array, Int16Array},
-        datatypes::ToByteSlice,
-    };
-
-    use super::*;
-
-    #[test]
-    fn test_calculate_array_levels_twitter_example() {
-        // based on the example at https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html
-        // [[a, b, c], [d, e, f, g]], [[h], [i,j]]
-        let parent_levels = LevelInfo {
-            definition: vec![0, 0],
-            repetition: None,
-            array_offsets: vec![0, 1, 2], // 2 records, root offsets always sequential
-            array_mask: vec![true, true], // both lists defined
-            max_definition: 0,
-            is_list: false,     // root is never list
-            is_nullable: false, // root in example is non-nullable
-        };
-        // offset into array, each level1 has 2 values
-        let array_offsets = vec![0, 2, 4];
-        let array_mask = vec![true, true];
-
-        // calculate level1 levels
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            false,
-        );
-        //
-        let expected_levels = LevelInfo {
-            definition: vec![1, 1, 1, 1],
-            repetition: Some(vec![0, 1, 0, 1]),
-            array_offsets,
-            array_mask: vec![true, true, true, true],
-            max_definition: 1,
-            is_list: true,
-            is_nullable: false,
-        };
-        // the separate asserts make it easier to see what's failing
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        // this assert is to help if there are more variables added to the struct
-        assert_eq!(&levels, &expected_levels);
-
-        // level2
-        let parent_levels = levels;
-        let array_offsets = vec![0, 3, 7, 8, 10];
-        let array_mask = vec![true, true, true, true];
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            false,
-        );
-        let expected_levels = LevelInfo {
-            definition: vec![2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
-            repetition: Some(vec![0, 2, 2, 1, 2, 2, 2, 0, 1, 2]),
-            array_offsets,
-            array_mask: vec![true; 10],
-            max_definition: 2,
-            is_list: true,
-            is_nullable: false,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_one_level_1() {
-        // This test calculates the levels for a non-null primitive array
-        let parent_levels = LevelInfo {
-            definition: vec![0; 10],
-            repetition: None,
-            array_offsets: (0..=10).collect(),
-            array_mask: vec![true; 10],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-        let array_offsets: Vec<i64> = (0..=10).collect();
-        let array_mask = vec![true; 10];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask.clone(),
-            false,
-            false,
-        );
-        let expected_levels = LevelInfo {
-            definition: vec![1; 10],
-            repetition: None,
-            array_offsets,
-            array_mask,
-            max_definition: 1,
-            is_list: false,
-            is_nullable: false,
-        };
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_one_level_2() {
-        // This test calculates the levels for a non-null primitive array
-        let parent_levels = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: (0..=5).collect(),
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-        let array_offsets: Vec<i64> = (0..=5).collect();
-        let array_mask = vec![true, false, true, true, false];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask.clone(),
-            false,
-            true,
-        );
-        let expected_levels = LevelInfo {
-            definition: vec![1, 0, 1, 1, 0],
-            repetition: None,
-            array_offsets,
-            array_mask,
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_array_levels_1() {
-        // if all array values are defined (e.g. batch<list<_>>)
-        // [[0], [1], [2], [3], [4]]
-        let parent_levels = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-        let array_offsets = vec![0, 2, 2, 4, 8, 11];
-        let array_mask = vec![true, false, true, true, true];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        // array: [[0, 0], _1_, [2, 2], [3, 3, 3, 3], [4, 4, 4]]
-        // all values are defined as we do not have nulls on the root (batch)
-        // repetition:
-        //   0: 0, 1
-        //   1:
-        //   2: 0, 1
-        //   3: 0, 1, 1, 1
-        //   4: 0, 1, 1
-        let expected_levels = LevelInfo {
-            definition: vec![2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2],
-            repetition: Some(vec![0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1]),
-            array_offsets,
-            array_mask: vec![
-                true, true, false, true, true, true, true, true, true, true, true, true,
-            ],
-            max_definition: 2,
-            is_list: true,
-            is_nullable: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_array_levels_2() {
-        // If some values are null
-        //
-        // This emulates an array in the form: <struct<list<?>>
-        // with values:
-        // - 0: [0, 1], but is null because of the struct
-        // - 1: []
-        // - 2: [2, 3], but is null because of the struct
-        // - 3: [4, 5, 6, 7]
-        // - 4: [8, 9, 10]
-        //
-        // If the first values of a list are null due to a parent, we have to still account for them
-        // while indexing, because they would affect the way the child is indexed
-        // i.e. in the above example, we have to know that [0, 1] has to be skipped
-        let parent_levels = LevelInfo {
-            definition: vec![0, 1, 0, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![false, true, false, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        let array_offsets = vec![0, 2, 2, 4, 8, 11];
-        let array_mask = vec![true, false, true, true, true];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        let expected_levels = LevelInfo {
-            // 0 1 [2] are 0 (not defined at level 1)
-            // [2] is 1, but has 0 slots so is not populated (defined at level 1 only)
-            // 2 3 [4] are 0
-            // 4 5 6 7 [8] are 1 (defined at level 1 only)
-            // 8 9 10 [11] are 2 (defined at both levels)
-            definition: vec![0, 0, 1, 0, 0, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1]),
-            array_offsets,
-            array_mask: vec![
-                false, false, false, false, false, true, true, true, true, true, true,
-                true,
-            ],
-            max_definition: 3,
-            is_nullable: true,
-            is_list: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-
-        // nested lists (using previous test)
-        let nested_parent_levels = levels;
-        let array_offsets = vec![0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22];
-        let array_mask = vec![
-            true, true, true, true, true, true, true, true, true, true, true,
-        ];
-        let levels = nested_parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        let expected_levels = LevelInfo {
-            // (def: 0) 0 1 [2] are 0 (take parent)
-            // (def: 0) 2 3 [4] are 0 (take parent)
-            // (def: 0) 4 5 [6] are 0 (take parent)
-            // (def: 0) 6 7 [8] are 0 (take parent)
-            // (def: 1) 8 9 [10] are 1 (take parent)
-            // (def: 1) 10 11 [12] are 1 (take parent)
-            // (def: 1) 12 23 [14] are 1 (take parent)
-            // (def: 1) 14 15 [16] are 1 (take parent)
-            // (def: 2) 16 17 [18] are 2 (defined at all levels)
-            // (def: 2) 18 19 [20] are 2 (defined at all levels)
-            // (def: 2) 20 21 [22] are 2 (defined at all levels)
-            //
-            // 0 1 [2] are 0 (not defined at level 1)
-            // [2] is 1, but has 0 slots so is not populated (defined at level 1 only)
-            // 2 3 [4] are 0
-            // 4 5 6 7 [8] are 1 (defined at level 1 only)
-            // 8 9 10 [11] are 2 (defined at both levels)
-            //
-            // 0: [[100, 101], [102, 103]]
-            // 1: []
-            // 2: [[104, 105], [106, 107]]
-            // 3: [[108, 109], [110, 111], [112, 113], [114, 115]]
-            // 4: [[116, 117], [118, 119], [120, 121]]
-            definition: vec![
-                0, 0, 0, 0, 1, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-            ],
-            repetition: Some(vec![
-                0, 2, 1, 2, 0, 0, 2, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 0, 2, 1, 2, 1, 2,
-            ]),
-            array_offsets,
-            array_mask: vec![
-                false, false, false, false, false, false, false, false, false, true,
-                true, true, true, true, true, true, true, true, true, true, true, true,
-                true,
-            ],
-            max_definition: 5,
-            is_nullable: true,
-            is_list: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_array_levels_nested_list() {
-        // if all array values are defined (e.g. batch<list<_>>)
-        // The array at this level looks like:
-        // 0: [a]
-        // 1: [a]
-        // 2: [a]
-        // 3: [a]
-        let parent_levels = LevelInfo {
-            definition: vec![1, 1, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4],
-            array_mask: vec![true, true, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: false,
-        };
-        // 0: null ([], but mask is false, so it's not just an empty list)
-        // 1: [1, 2, 3]
-        // 2: [4, 5]
-        // 3: [6, 7]
-        let array_offsets = vec![0, 1, 4, 6, 8];
-        let array_mask = vec![false, true, true, true];
-
-        let levels = parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        // 0: [null], level 1 is defined, but not 2
-        // 1: [1, 2, 3]
-        // 2: [4, 5]
-        // 3: [6, 7]
-        let expected_levels = LevelInfo {
-            definition: vec![2, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 0, 1, 1, 0, 1, 0, 1]),
-            array_offsets,
-            array_mask: vec![false, true, true, true, true, true, true, true],
-            max_definition: 3,
-            is_list: true,
-            is_nullable: true,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-
-        // nested lists (using previous test)
-        let nested_parent_levels = levels;
-        // 0: [null] (was a populated null slot at the parent)
-        // 1: [201]
-        // 2: [202, 203]
-        // 3: null ([])
-        // 4: [204, 205, 206]
-        // 5: [207, 208, 209, 210]
-        // 6: [] (tests a non-null empty list slot)
-        // 7: [211, 212, 213, 214, 215]
-        let array_offsets = vec![0, 1, 2, 4, 4, 7, 11, 11, 16];
-        // logically, the fist slot of the mask is false
-        let array_mask = vec![true, true, true, false, true, true, true, true];
-        let levels = nested_parent_levels.calculate_child_levels(
-            array_offsets.clone(),
-            array_mask,
-            true,
-            true,
-        );
-        // We have 7 array values, and at least 15 primitives (from array_offsets)
-        // 0: (-)[null], parent was null, no value populated here
-        // 1: (0)[201], (1)[202, 203], (2)[[null]]
-        // 2: (3)[204, 205, 206], (4)[207, 208, 209, 210]
-        // 3: (5)[[]], (6)[211, 212, 213, 214, 215]
-        //
-        // In a JSON syntax with the schema: <struct<list<list<primitive>>>>, this translates into:
-        // 0: {"struct": [ null ]}
-        // 1: {"struct": [ [201], [202, 203], [] ]}
-        // 2: {"struct": [ [204, 205, 206], [207, 208, 209, 210] ]}
-        // 3: {"struct": [ [], [211, 212, 213, 214, 215] ]}
-        let expected_levels = LevelInfo {
-            definition: vec![2, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5],
-            repetition: Some(vec![0, 0, 1, 2, 1, 0, 2, 2, 1, 2, 2, 2, 0, 1, 2, 2, 2, 2]),
-            array_mask: vec![
-                false, true, true, true, false, true, true, true, true, true, true, true,
-                true, true, true, true, true, true,
-            ],
-            array_offsets,
-            is_list: true,
-            is_nullable: true,
-            max_definition: 5,
-        };
-        assert_eq!(&levels.definition, &expected_levels.definition);
-        assert_eq!(&levels.repetition, &expected_levels.repetition);
-        assert_eq!(&levels.array_offsets, &expected_levels.array_offsets);
-        assert_eq!(&levels.array_mask, &expected_levels.array_mask);
-        assert_eq!(&levels.max_definition, &expected_levels.max_definition);
-        assert_eq!(&levels.is_list, &expected_levels.is_list);
-        assert_eq!(&levels.is_nullable, &expected_levels.is_nullable);
-        assert_eq!(&levels, &expected_levels);
-    }
-
-    #[test]
-    fn test_calculate_nested_struct_levels() {
-        // tests a <struct[a]<struct[b]<int[c]>>
-        // array:
-        //  - {a: {b: {c: 1}}}
-        //  - {a: {b: {c: null}}}
-        //  - {a: {b: {c: 3}}}
-        //  - {a: {b: null}}
-        //  - {a: null}}
-        //  - {a: {b: {c: 6}}}
-        let a_levels = LevelInfo {
-            definition: vec![1, 1, 1, 1, 0, 1],
-            repetition: None,
-            array_offsets: (0..=6).collect(),
-            array_mask: vec![true, true, true, true, false, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        // b's offset and mask
-        let b_offsets: Vec<i64> = (0..=6).collect();
-        let b_mask = vec![true, true, true, false, false, true];
-        // b's expected levels
-        let b_expected_levels = LevelInfo {
-            definition: vec![2, 2, 2, 1, 0, 2],
-            repetition: None,
-            array_offsets: (0..=6).collect(),
-            array_mask: vec![true, true, true, false, false, true],
-            max_definition: 2,
-            is_list: false,
-            is_nullable: true,
-        };
-        let b_levels =
-            a_levels.calculate_child_levels(b_offsets.clone(), b_mask, false, true);
-        assert_eq!(&b_expected_levels, &b_levels);
-
-        // c's offset and mask
-        let c_offsets = b_offsets;
-        let c_mask = vec![true, false, true, false, false, true];
-        // c's expected levels
-        let c_expected_levels = LevelInfo {
-            definition: vec![3, 2, 3, 1, 0, 3],
-            repetition: None,
-            array_offsets: c_offsets.clone(),
-            array_mask: vec![true, false, true, false, false, true],
-            max_definition: 3,
-            is_list: false,
-            is_nullable: true,
-        };
-        let c_levels = b_levels.calculate_child_levels(c_offsets, c_mask, false, true);
-        assert_eq!(&c_expected_levels, &c_levels);
-    }
-
-    #[test]
-    fn list_single_column() {
-        // this tests the level generation from the arrow_writer equivalent test
-
-        let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-        let a_list_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
-        let a_list_data = ArrayData::builder(a_list_type.clone())
-            .len(5)
-            .add_buffer(a_value_offsets)
-            .null_bit_buffer(Buffer::from(vec![0b00011011]))
-            .add_child_data(a_values.data().clone())
-            .build();
-
-        assert_eq!(a_list_data.null_count(), 1);
-
-        let a = ListArray::from(a_list_data);
-        let values = Arc::new(a);
-
-        let schema = Schema::new(vec![Field::new("item", a_list_type, true)]);
-
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
-
-        let expected_batch_level = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: (0..=5).collect(),
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-
-        let batch_level = LevelInfo::new_from_batch(&batch);
-        assert_eq!(&batch_level, &expected_batch_level);
-
-        // calculate the list's level
-        let mut levels = vec![];
-        batch
-            .columns()
-            .iter()
-            .zip(batch.schema().fields())
-            .for_each(|(array, field)| {
-                let mut array_levels = batch_level.calculate_array_levels(array, field);
-                levels.append(&mut array_levels);
-            });
-        assert_eq!(levels.len(), 1);
-
-        let list_level = levels.get(0).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3],
-            repetition: Some(vec![0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1]),
-            array_offsets: vec![0, 1, 3, 3, 6, 10],
-            array_mask: vec![
-                true, true, true, false, true, true, true, true, true, true, true,
-            ],
-            max_definition: 3,
-            is_list: true,
-            is_nullable: true,
-        };
-        assert_eq!(&list_level.definition, &expected_level.definition);
-        assert_eq!(&list_level.repetition, &expected_level.repetition);
-        assert_eq!(&list_level.array_offsets, &expected_level.array_offsets);
-        assert_eq!(&list_level.array_mask, &expected_level.array_mask);
-        assert_eq!(&list_level.max_definition, &expected_level.max_definition);
-        assert_eq!(&list_level.is_list, &expected_level.is_list);
-        assert_eq!(&list_level.is_nullable, &expected_level.is_nullable);
-        assert_eq!(list_level, &expected_level);
-    }
-
-    #[test]
-    fn mixed_struct_list() {
-        // this tests the level generation from the equivalent arrow_writer_complex test
-
-        // define schema
-        let struct_field_d = Field::new("d", DataType::Float64, true);
-        let struct_field_f = Field::new("f", DataType::Float32, true);
-        let struct_field_g = Field::new(
-            "g",
-            DataType::List(Box::new(Field::new("items", DataType::Int16, false))),
-            false,
-        );
-        let struct_field_e = Field::new(
-            "e",
-            DataType::Struct(vec![struct_field_f.clone(), struct_field_g.clone()]),
-            true,
-        );
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, true),
-            Field::new(
-                "c",
-                DataType::Struct(vec![struct_field_d.clone(), struct_field_e.clone()]),
-                false,
-            ),
-        ]);
-
-        // create some data
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
-        let d = Float64Array::from(vec![None, None, None, Some(1.0), None]);
-        let f = Float32Array::from(vec![Some(0.0), None, Some(333.3), None, Some(5.25)]);
-
-        let g_value = Int16Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-
-        // Construct a buffer for value offsets, for the nested array:
-        //  [[1], [2, 3], null, [4, 5, 6], [7, 8, 9, 10]]
-        let g_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
-
-        // Construct a list array from the above two
-        let g_list_data = ArrayData::builder(struct_field_g.data_type().clone())
-            .len(5)
-            .add_buffer(g_value_offsets)
-            .add_child_data(g_value.data().clone())
-            .build();
-        let g = ListArray::from(g_list_data);
-
-        let e = StructArray::from(vec![
-            (struct_field_f, Arc::new(f) as ArrayRef),
-            (struct_field_g, Arc::new(g) as ArrayRef),
-        ]);
-
-        let c = StructArray::from(vec![
-            (struct_field_d, Arc::new(d) as ArrayRef),
-            (struct_field_e, Arc::new(e) as ArrayRef),
-        ]);
-
-        // build a record batch
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
-        )
-        .unwrap();
-
-        //////////////////////////////////////////////
-        let expected_batch_level = LevelInfo {
-            definition: vec![0; 5],
-            repetition: None,
-            array_offsets: (0..=5).collect(),
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 0,
-            is_list: false,
-            is_nullable: false,
-        };
-
-        let batch_level = LevelInfo::new_from_batch(&batch);
-        assert_eq!(&batch_level, &expected_batch_level);
-
-        // calculate the list's level
-        let mut levels = vec![];
-        batch
-            .columns()
-            .iter()
-            .zip(batch.schema().fields())
-            .for_each(|(array, field)| {
-                let mut array_levels = batch_level.calculate_array_levels(array, field);
-                levels.append(&mut array_levels);
-            });
-        assert_eq!(levels.len(), 5);
-
-        // test "a" levels
-        let list_level = levels.get(0).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![1, 1, 1, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, true, true, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: false,
-        };
-        assert_eq!(list_level, &expected_level);
-
-        // test "b" levels
-        let list_level = levels.get(1).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![1, 0, 0, 1, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, false, false, true, true],
-            max_definition: 1,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(list_level, &expected_level);
-
-        // test "d" levels
-        let list_level = levels.get(2).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![1, 1, 1, 2, 1],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![false, false, false, true, false],
-            max_definition: 2,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(list_level, &expected_level);
-
-        // test "f" levels
-        let list_level = levels.get(3).unwrap();
-
-        let expected_level = LevelInfo {
-            definition: vec![3, 2, 3, 2, 3],
-            repetition: None,
-            array_offsets: vec![0, 1, 2, 3, 4, 5],
-            array_mask: vec![true, false, true, false, true],
-            max_definition: 3,
-            is_list: false,
-            is_nullable: true,
-        };
-        assert_eq!(list_level, &expected_level);
-    }
-
-    #[test]
-    fn test_filter_array_indices() {
-        let level = LevelInfo {
-            definition: vec![3, 3, 3, 1, 3, 3, 3],
-            repetition: Some(vec![0, 1, 1, 0, 0, 1, 1]),
-            array_offsets: vec![0, 3, 3, 6],
-            array_mask: vec![true, true, true, false, true, true, true],
-            max_definition: 3,
-            is_list: true,
-            is_nullable: true,
-        };
-
-        let expected = vec![0, 1, 2, 3, 4, 5];
-        let filter = level.filter_array_indices();
-        assert_eq!(expected, filter);
-    }
-}
diff --git a/rust/parquet/src/arrow/mod.rs b/rust/parquet/src/arrow/mod.rs
deleted file mode 100644
index b1aa39ebafa..00000000000
--- a/rust/parquet/src/arrow/mod.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [Apache Arrow](http://arrow.apache.org/) is a cross-language development platform for
-//! in-memory data.
-//!
-//! This mod provides API for converting between arrow and parquet.
-//!
-//! # Example of reading parquet file into arrow record batch
-//!
-//! ```rust, no_run
-//! use arrow::record_batch::RecordBatchReader;
-//! use parquet::file::reader::SerializedFileReader;
-//! use parquet::arrow::{ParquetFileArrowReader, ArrowReader};
-//! use std::sync::Arc;
-//! use std::fs::File;
-//!
-//! let file = File::open("parquet.file").unwrap();
-//! let file_reader = SerializedFileReader::new(file).unwrap();
-//! let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-//!
-//! println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());
-//! println!("Arrow schema after projection is: {}",
-//!    arrow_reader.get_schema_by_columns(vec![2, 4, 6], true).unwrap());
-//!
-//! let mut record_batch_reader = arrow_reader.get_record_reader(2048).unwrap();
-//!
-//! for maybe_record_batch in record_batch_reader {
-//!    let record_batch = maybe_record_batch.unwrap();
-//!    if record_batch.num_rows() > 0 {
-//!        println!("Read {} records.", record_batch.num_rows());
-//!    } else {
-//!        println!("End of file!");
-//!    }
-//!}
-//! ```
-
-pub(in crate::arrow) mod array_reader;
-pub mod arrow_reader;
-pub mod arrow_writer;
-pub(in crate::arrow) mod converter;
-pub(in crate::arrow) mod levels;
-pub(in crate::arrow) mod record_reader;
-pub mod schema;
-
-pub use self::arrow_reader::ArrowReader;
-pub use self::arrow_reader::ParquetFileArrowReader;
-pub use self::arrow_writer::ArrowWriter;
-pub use self::schema::{
-    arrow_to_parquet_schema, parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns,
-    parquet_to_arrow_schema_by_root_columns,
-};
-
-/// Schema metadata key used to store serialized Arrow IPC schema
-pub const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";
diff --git a/rust/parquet/src/arrow/record_reader.rs b/rust/parquet/src/arrow/record_reader.rs
deleted file mode 100644
index 7e3b6a847e7..00000000000
--- a/rust/parquet/src/arrow/record_reader.rs
+++ /dev/null
@@ -1,794 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::cmp::{max, min};
-use std::mem::{replace, size_of};
-
-use crate::column::{page::PageReader, reader::ColumnReaderImpl};
-use crate::data_type::DataType;
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use arrow::array::BooleanBufferBuilder;
-use arrow::bitmap::Bitmap;
-use arrow::buffer::{Buffer, MutableBuffer};
-
-const MIN_BATCH_SIZE: usize = 1024;
-
-/// A `RecordReader` is a stateful column reader that delimits semantic records.
-pub struct RecordReader<T: DataType> {
-    column_desc: ColumnDescPtr,
-
-    records: MutableBuffer,
-    def_levels: Option<MutableBuffer>,
-    rep_levels: Option<MutableBuffer>,
-    null_bitmap: Option<BooleanBufferBuilder>,
-    column_reader: Option<ColumnReaderImpl<T>>,
-
-    /// Number of records accumulated in records
-    num_records: usize,
-    /// Number of values `num_records` contains.
-    num_values: usize,
-
-    values_seen: usize,
-    /// Starts from 1, number of values have been written to buffer
-    values_written: usize,
-    in_middle_of_record: bool,
-}
-
-impl<T: DataType> RecordReader<T> {
-    pub fn new(column_schema: ColumnDescPtr) -> Self {
-        let (def_levels, null_map) = if column_schema.max_def_level() > 0 {
-            (
-                Some(MutableBuffer::new(MIN_BATCH_SIZE)),
-                Some(BooleanBufferBuilder::new(0)),
-            )
-        } else {
-            (None, None)
-        };
-
-        let rep_levels = if column_schema.max_rep_level() > 0 {
-            Some(MutableBuffer::new(MIN_BATCH_SIZE))
-        } else {
-            None
-        };
-
-        Self {
-            records: MutableBuffer::new(MIN_BATCH_SIZE),
-            def_levels,
-            rep_levels,
-            null_bitmap: null_map,
-            column_reader: None,
-            column_desc: column_schema,
-            num_records: 0,
-            num_values: 0,
-            values_seen: 0,
-            values_written: 0,
-            in_middle_of_record: false,
-        }
-    }
-
-    /// Set the current page reader.
-    pub fn set_page_reader(&mut self, page_reader: Box<dyn PageReader>) -> Result<()> {
-        self.column_reader =
-            Some(ColumnReaderImpl::new(self.column_desc.clone(), page_reader));
-        Ok(())
-    }
-
-    /// Try to read `num_records` of column data into internal buffer.
-    ///
-    /// # Returns
-    ///
-    /// Number of actual records read.
-    pub fn read_records(&mut self, num_records: usize) -> Result<usize> {
-        if self.column_reader.is_none() {
-            return Ok(0);
-        }
-
-        let mut records_read = 0;
-
-        // Used to mark whether we have reached the end of current
-        // column chunk
-        let mut end_of_column = false;
-
-        loop {
-            // Try to find some records from buffers that has been read into memory
-            // but not counted as seen records.
-            records_read += self.split_records(num_records - records_read)?;
-
-            // Since page reader contains complete records, so if we reached end of a
-            // page reader, we should reach the end of a record
-            if end_of_column
-                && self.values_seen >= self.values_written
-                && self.in_middle_of_record
-            {
-                self.num_records += 1;
-                self.num_values = self.values_seen;
-                self.in_middle_of_record = false;
-                records_read += 1;
-            }
-
-            if (records_read >= num_records) || end_of_column {
-                break;
-            }
-
-            let batch_size = max(num_records - records_read, MIN_BATCH_SIZE);
-
-            // Try to more value from parquet pages
-            let values_read = self.read_one_batch(batch_size)?;
-            if values_read < batch_size {
-                end_of_column = true;
-            }
-        }
-
-        Ok(records_read)
-    }
-
-    /// Returns number of records stored in buffer.
-    pub fn num_records(&self) -> usize {
-        self.num_records
-    }
-
-    /// Return number of values stored in buffer.
-    /// If the parquet column is not repeated, it should be equals to `num_records`,
-    /// otherwise it should be larger than or equal to `num_records`.
-    pub fn num_values(&self) -> usize {
-        self.num_values
-    }
-
-    /// Returns definition level data.
-    /// The implementation has side effects. It will create a new buffer to hold those
-    /// definition level values that have already been read into memory but not counted
-    /// as record values, e.g. those from `self.num_values` to `self.values_written`.
-    pub fn consume_def_levels(&mut self) -> Result<Option<Buffer>> {
-        let new_buffer = if let Some(ref mut def_levels_buf) = &mut self.def_levels {
-            let num_left_values = self.values_written - self.num_values;
-            // create an empty buffer, as it will be resized below
-            let mut new_buffer = MutableBuffer::new(0);
-            let num_bytes = num_left_values * size_of::<i16>();
-            let new_len = self.num_values * size_of::<i16>();
-
-            new_buffer.resize(num_bytes, 0);
-
-            let new_def_levels = new_buffer.as_slice_mut();
-            let left_def_levels = &def_levels_buf.as_slice_mut()[new_len..];
-
-            new_def_levels[0..num_bytes].copy_from_slice(&left_def_levels[0..num_bytes]);
-
-            def_levels_buf.resize(new_len, 0);
-            Some(new_buffer)
-        } else {
-            None
-        };
-
-        Ok(replace(&mut self.def_levels, new_buffer).map(|x| x.into()))
-    }
-
-    /// Return repetition level data.
-    /// The side effect is similar to `consume_def_levels`.
-    pub fn consume_rep_levels(&mut self) -> Result<Option<Buffer>> {
-        // TODO: Optimize to reduce the copy
-        let new_buffer = if let Some(ref mut rep_levels_buf) = &mut self.rep_levels {
-            let num_left_values = self.values_written - self.num_values;
-            // create an empty buffer, as it will be resized below
-            let mut new_buffer = MutableBuffer::new(0);
-            let num_bytes = num_left_values * size_of::<i16>();
-            let new_len = self.num_values * size_of::<i16>();
-
-            new_buffer.resize(num_bytes, 0);
-
-            let new_rep_levels = new_buffer.as_slice_mut();
-            let left_rep_levels = &rep_levels_buf.as_slice_mut()[new_len..];
-
-            new_rep_levels[0..num_bytes].copy_from_slice(&left_rep_levels[0..num_bytes]);
-
-            rep_levels_buf.resize(new_len, 0);
-
-            Some(new_buffer)
-        } else {
-            None
-        };
-
-        Ok(replace(&mut self.rep_levels, new_buffer).map(|x| x.into()))
-    }
-
-    /// Returns currently stored buffer data.
-    /// The side effect is similar to `consume_def_levels`.
-    pub fn consume_record_data(&mut self) -> Result<Buffer> {
-        // TODO: Optimize to reduce the copy
-        let num_left_values = self.values_written - self.num_values;
-        // create an empty buffer, as it will be resized below
-        let mut new_buffer = MutableBuffer::new(0);
-        let num_bytes = num_left_values * T::get_type_size();
-        let new_len = self.num_values * T::get_type_size();
-
-        new_buffer.resize(num_bytes, 0);
-
-        let new_records = new_buffer.as_slice_mut();
-        let left_records = &mut self.records.as_slice_mut()[new_len..];
-
-        new_records[0..num_bytes].copy_from_slice(&left_records[0..num_bytes]);
-
-        self.records.resize(new_len, 0);
-
-        Ok(replace(&mut self.records, new_buffer).into())
-    }
-
-    /// Returns currently stored null bitmap data.
-    /// The side effect is similar to `consume_def_levels`.
-    pub fn consume_bitmap_buffer(&mut self) -> Result<Option<Buffer>> {
-        // TODO: Optimize to reduce the copy
-        if self.column_desc.max_def_level() > 0 {
-            assert!(self.null_bitmap.is_some());
-            let num_left_values = self.values_written - self.num_values;
-            let new_bitmap_builder = Some(BooleanBufferBuilder::new(max(
-                MIN_BATCH_SIZE,
-                num_left_values,
-            )));
-
-            let old_bitmap = replace(&mut self.null_bitmap, new_bitmap_builder)
-                .map(|mut builder| builder.finish())
-                .unwrap();
-
-            let old_bitmap = Bitmap::from(old_bitmap);
-
-            for i in self.num_values..self.values_written {
-                self.null_bitmap
-                    .as_mut()
-                    .unwrap()
-                    .append(old_bitmap.is_set(i));
-            }
-
-            Ok(Some(old_bitmap.into_buffer()))
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Reset state of record reader.
-    /// Should be called after consuming data, e.g. `consume_rep_levels`,
-    /// `consume_rep_levels`, `consume_record_data` and `consume_bitmap_buffer`.
-    pub fn reset(&mut self) {
-        self.values_written -= self.num_values;
-        self.num_records = 0;
-        self.num_values = 0;
-        self.values_seen = 0;
-        self.in_middle_of_record = false;
-    }
-
-    /// Returns bitmap data.
-    pub fn consume_bitmap(&mut self) -> Result<Option<Bitmap>> {
-        self.consume_bitmap_buffer()
-            .map(|buffer| buffer.map(Bitmap::from))
-    }
-
-    /// Try to read one batch of data.
-    fn read_one_batch(&mut self, batch_size: usize) -> Result<usize> {
-        // Reserve spaces
-        self.records
-            .resize(self.records.len() + batch_size * T::get_type_size(), 0);
-        if let Some(ref mut buf) = self.rep_levels {
-            buf.resize(buf.len() + batch_size * size_of::<i16>(), 0);
-        }
-        if let Some(ref mut buf) = self.def_levels {
-            buf.resize(buf.len() + batch_size * size_of::<i16>(), 0);
-        }
-
-        let values_written = self.values_written;
-
-        // Convert mutable buffer spaces to mutable slices
-        let (prefix, values, suffix) =
-            unsafe { self.records.as_slice_mut().align_to_mut::<T::T>() };
-        assert!(prefix.is_empty() && suffix.is_empty());
-        let values = &mut values[values_written..];
-
-        let def_levels = self.def_levels.as_mut().map(|buf| {
-            let (prefix, def_levels, suffix) =
-                unsafe { buf.as_slice_mut().align_to_mut::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            &mut def_levels[values_written..]
-        });
-
-        let rep_levels = self.rep_levels.as_mut().map(|buf| {
-            let (prefix, rep_levels, suffix) =
-                unsafe { buf.as_slice_mut().align_to_mut::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            &mut rep_levels[values_written..]
-        });
-
-        let (values_read, levels_read) = self
-            .column_reader
-            .as_mut()
-            .unwrap()
-            .read_batch(batch_size, def_levels, rep_levels, values)?;
-
-        // get new references for the def levels.
-        let def_levels = self.def_levels.as_ref().map(|buf| {
-            let (prefix, def_levels, suffix) =
-                unsafe { buf.as_slice().align_to::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            &def_levels[values_written..]
-        });
-
-        let max_def_level = self.column_desc.max_def_level();
-
-        if values_read < levels_read {
-            let def_levels = def_levels.ok_or_else(|| {
-                general_err!(
-                    "Definition levels should exist when data is less than levels!"
-                )
-            })?;
-
-            // Fill spaces in column data with default values
-            let mut values_pos = values_read;
-            let mut level_pos = levels_read;
-
-            while level_pos > values_pos {
-                if def_levels[level_pos - 1] == max_def_level {
-                    // This values is not empty
-                    // We use swap rather than assign here because T::T doesn't
-                    // implement Copy
-                    values.swap(level_pos - 1, values_pos - 1);
-                    values_pos -= 1;
-                } else {
-                    values[level_pos - 1] = T::T::default();
-                }
-
-                level_pos -= 1;
-            }
-        }
-
-        // Fill in bitmap data
-        if let Some(null_buffer) = self.null_bitmap.as_mut() {
-            let def_levels = def_levels.ok_or_else(|| {
-                general_err!(
-                    "Definition levels should exist when data is less than levels!"
-                )
-            })?;
-            (0..levels_read)
-                .for_each(|idx| null_buffer.append(def_levels[idx] == max_def_level));
-        }
-
-        let values_read = max(values_read, levels_read);
-        self.set_values_written(self.values_written + values_read)?;
-        Ok(values_read)
-    }
-
-    /// Split values into records according repetition definition and returns number of
-    /// records read.
-    #[allow(clippy::unnecessary_wraps)]
-    fn split_records(&mut self, records_to_read: usize) -> Result<usize> {
-        let rep_levels = self.rep_levels.as_ref().map(|buf| {
-            let (prefix, rep_levels, suffix) =
-                unsafe { buf.as_slice().align_to::<i16>() };
-            assert!(prefix.is_empty() && suffix.is_empty());
-            rep_levels
-        });
-
-        match rep_levels {
-            Some(buf) => {
-                let mut records_read = 0;
-
-                while (self.values_seen < self.values_written)
-                    && (records_read < records_to_read)
-                {
-                    if buf[self.values_seen] == 0 {
-                        if self.in_middle_of_record {
-                            records_read += 1;
-                            self.num_records += 1;
-                            self.num_values = self.values_seen;
-                        }
-                        self.in_middle_of_record = true;
-                    }
-                    self.values_seen += 1;
-                }
-
-                Ok(records_read)
-            }
-            None => {
-                let records_read =
-                    min(records_to_read, self.values_written - self.values_seen);
-                self.num_records += records_read;
-                self.num_values += records_read;
-                self.values_seen += records_read;
-                self.in_middle_of_record = false;
-
-                Ok(records_read)
-            }
-        }
-    }
-
-    #[allow(clippy::unnecessary_wraps)]
-    fn set_values_written(&mut self, new_values_written: usize) -> Result<()> {
-        self.values_written = new_values_written;
-        self.records
-            .resize(self.values_written * T::get_type_size(), 0);
-
-        let new_levels_len = self.values_written * size_of::<i16>();
-
-        if let Some(ref mut buf) = self.rep_levels {
-            buf.resize(new_levels_len, 0)
-        };
-
-        if let Some(ref mut buf) = self.def_levels {
-            buf.resize(new_levels_len, 0)
-        };
-
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::RecordReader;
-    use crate::basic::Encoding;
-    use crate::column::page::Page;
-    use crate::column::page::PageReader;
-    use crate::data_type::Int32Type;
-    use crate::errors::Result;
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::SchemaDescriptor;
-    use crate::util::test_common::page_util::{DataPageBuilder, DataPageBuilderImpl};
-    use arrow::array::{BooleanBufferBuilder, Int16BufferBuilder, Int32BufferBuilder};
-    use arrow::bitmap::Bitmap;
-    use std::sync::Arc;
-
-    struct TestPageReader {
-        pages: Box<dyn Iterator<Item = Page>>,
-    }
-
-    impl TestPageReader {
-        pub fn new(pages: Vec<Page>) -> Self {
-            Self {
-                pages: Box::new(pages.into_iter()),
-            }
-        }
-    }
-
-    impl PageReader for TestPageReader {
-        fn get_next_page(&mut self) -> Result<Option<Page>> {
-            Ok(self.pages.next())
-        }
-    }
-
-    #[test]
-    fn test_read_required_records() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REQUIRED INT32 leaf;
-        }
-        ";
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        // First page
-
-        // Records data:
-        // test_schema
-        //   leaf: 4
-        // test_schema
-        //   leaf: 7
-        // test_schema
-        //   leaf: 6
-        // test_schema
-        //   left: 3
-        // test_schema
-        //   left: 2
-        {
-            let values = [4, 7, 6, 3, 2];
-            let mut pb = DataPageBuilderImpl::new(desc.clone(), 5, true);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(2).unwrap());
-            assert_eq!(2, record_reader.num_records());
-            assert_eq!(2, record_reader.num_values());
-            assert_eq!(3, record_reader.read_records(3).unwrap());
-            assert_eq!(5, record_reader.num_records());
-            assert_eq!(5, record_reader.num_values());
-        }
-
-        // Second page
-
-        // Records data:
-        // test_schema
-        //   leaf: 8
-        // test_schema
-        //   leaf: 9
-        {
-            let values = [8, 9];
-            let mut pb = DataPageBuilderImpl::new(desc, 2, true);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(10).unwrap());
-            assert_eq!(7, record_reader.num_records());
-            assert_eq!(7, record_reader.num_values());
-        }
-
-        let mut bb = Int32BufferBuilder::new(7);
-        bb.append_slice(&[4, 7, 6, 3, 2, 8, 9]);
-        let expected_buffer = bb.finish();
-        assert_eq!(
-            expected_buffer,
-            record_reader.consume_record_data().unwrap()
-        );
-        assert_eq!(None, record_reader.consume_def_levels().unwrap());
-        assert_eq!(None, record_reader.consume_bitmap().unwrap());
-    }
-
-    #[test]
-    fn test_read_optional_records() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          OPTIONAL Group test_struct {
-            OPTIONAL INT32 leaf;
-          }
-        }
-        ";
-
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        // First page
-
-        // Records data:
-        // test_schema
-        //   test_struct
-        // test_schema
-        //   test_struct
-        //     left: 7
-        // test_schema
-        // test_schema
-        //   test_struct
-        //     leaf: 6
-        // test_schema
-        //   test_struct
-        //     leaf: 6
-        {
-            let values = [7, 6, 3];
-            //empty, non-empty, empty, non-empty, non-empty
-            let def_levels = [1i16, 2i16, 0i16, 2i16, 2i16];
-            let mut pb = DataPageBuilderImpl::new(desc.clone(), 5, true);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(2).unwrap());
-            assert_eq!(2, record_reader.num_records());
-            assert_eq!(2, record_reader.num_values());
-            assert_eq!(3, record_reader.read_records(3).unwrap());
-            assert_eq!(5, record_reader.num_records());
-            assert_eq!(5, record_reader.num_values());
-        }
-
-        // Second page
-
-        // Records data:
-        // test_schema
-        // test_schema
-        //   test_struct
-        //     left: 8
-        {
-            let values = [8];
-            //empty, non-empty
-            let def_levels = [0i16, 2i16];
-            let mut pb = DataPageBuilderImpl::new(desc, 2, true);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-            assert_eq!(2, record_reader.read_records(10).unwrap());
-            assert_eq!(7, record_reader.num_records());
-            assert_eq!(7, record_reader.num_values());
-        }
-
-        // Verify result record data
-        let mut bb = Int32BufferBuilder::new(7);
-        bb.append_slice(&[0, 7, 0, 6, 3, 0, 8]);
-        let expected_buffer = bb.finish();
-        assert_eq!(
-            expected_buffer,
-            record_reader.consume_record_data().unwrap()
-        );
-
-        // Verify result def levels
-        let mut bb = Int16BufferBuilder::new(7);
-        bb.append_slice(&[1i16, 2i16, 0i16, 2i16, 2i16, 0i16, 2i16]);
-        let expected_def_levels = bb.finish();
-        assert_eq!(
-            Some(expected_def_levels),
-            record_reader.consume_def_levels().unwrap()
-        );
-
-        // Verify bitmap
-        let mut bb = BooleanBufferBuilder::new(7);
-        bb.append_slice(&[false, true, false, true, true, false, true]);
-        let expected_bitmap = Bitmap::from(bb.finish());
-        assert_eq!(
-            Some(expected_bitmap),
-            record_reader.consume_bitmap().unwrap()
-        );
-    }
-
-    #[test]
-    fn test_read_repeated_records() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REPEATED Group test_struct {
-            REPEATED  INT32 leaf;
-          }
-        }
-        ";
-
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        // First page
-
-        // Records data:
-        // test_schema
-        //   test_struct
-        //     leaf: 4
-        // test_schema
-        // test_schema
-        //   test_struct
-        //   test_struct
-        //     leaf: 7
-        //     leaf: 6
-        //     leaf: 3
-        //   test_struct
-        //     leaf: 2
-        {
-            let values = [4, 7, 6, 3, 2];
-            let def_levels = [2i16, 0i16, 1i16, 2i16, 2i16, 2i16, 2i16];
-            let rep_levels = [0i16, 0i16, 0i16, 1i16, 2i16, 2i16, 1i16];
-            let mut pb = DataPageBuilderImpl::new(desc.clone(), 7, true);
-            pb.add_rep_levels(2, &rep_levels);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-
-            assert_eq!(1, record_reader.read_records(1).unwrap());
-            assert_eq!(1, record_reader.num_records());
-            assert_eq!(1, record_reader.num_values());
-            assert_eq!(2, record_reader.read_records(3).unwrap());
-            assert_eq!(3, record_reader.num_records());
-            assert_eq!(7, record_reader.num_values());
-        }
-
-        // Second page
-
-        // Records data:
-        // test_schema
-        //   test_struct
-        //     leaf: 8
-        //     leaf: 9
-        {
-            let values = [8, 9];
-            let def_levels = [2i16, 2i16];
-            let rep_levels = [0i16, 2i16];
-            let mut pb = DataPageBuilderImpl::new(desc, 2, true);
-            pb.add_rep_levels(2, &rep_levels);
-            pb.add_def_levels(2, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-
-            assert_eq!(1, record_reader.read_records(10).unwrap());
-            assert_eq!(4, record_reader.num_records());
-            assert_eq!(9, record_reader.num_values());
-        }
-
-        // Verify result record data
-        let mut bb = Int32BufferBuilder::new(9);
-        bb.append_slice(&[4, 0, 0, 7, 6, 3, 2, 8, 9]);
-        let expected_buffer = bb.finish();
-        assert_eq!(
-            expected_buffer,
-            record_reader.consume_record_data().unwrap()
-        );
-
-        // Verify result def levels
-        let mut bb = Int16BufferBuilder::new(9);
-        bb.append_slice(&[2i16, 0i16, 1i16, 2i16, 2i16, 2i16, 2i16, 2i16, 2i16]);
-        let expected_def_levels = bb.finish();
-        assert_eq!(
-            Some(expected_def_levels),
-            record_reader.consume_def_levels().unwrap()
-        );
-
-        // Verify bitmap
-        let mut bb = BooleanBufferBuilder::new(9);
-        bb.append_slice(&[true, false, false, true, true, true, true, true, true]);
-        let expected_bitmap = Bitmap::from(bb.finish());
-        assert_eq!(
-            Some(expected_bitmap),
-            record_reader.consume_bitmap().unwrap()
-        );
-    }
-
-    #[test]
-    fn test_read_more_than_one_batch() {
-        // Construct column schema
-        let message_type = "
-        message test_schema {
-          REPEATED  INT32 leaf;
-        }
-        ";
-
-        let desc = parse_message_type(message_type)
-            .map(|t| SchemaDescriptor::new(Arc::new(t)))
-            .map(|s| s.column(0))
-            .unwrap();
-
-        // Construct record reader
-        let mut record_reader = RecordReader::<Int32Type>::new(desc.clone());
-
-        {
-            let values = [100; 5000];
-            let def_levels = [1i16; 5000];
-            let mut rep_levels = [1i16; 5000];
-            for idx in 0..1000 {
-                rep_levels[idx * 5] = 0i16;
-            }
-
-            let mut pb = DataPageBuilderImpl::new(desc, 5000, true);
-            pb.add_rep_levels(1, &rep_levels);
-            pb.add_def_levels(1, &def_levels);
-            pb.add_values::<Int32Type>(Encoding::PLAIN, &values);
-            let page = pb.consume();
-
-            let page_reader = Box::new(TestPageReader::new(vec![page]));
-            record_reader.set_page_reader(page_reader).unwrap();
-
-            assert_eq!(1000, record_reader.read_records(1000).unwrap());
-            assert_eq!(1000, record_reader.num_records());
-            assert_eq!(5000, record_reader.num_values());
-        }
-    }
-}
diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs
deleted file mode 100644
index b15bb7e4140..00000000000
--- a/rust/parquet/src/arrow/schema.rs
+++ /dev/null
@@ -1,1945 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Provides API for converting parquet schema to arrow schema and vice versa.
-//!
-//! The main interfaces for converting parquet schema to arrow schema  are
-//! `parquet_to_arrow_schema`, `parquet_to_arrow_schema_by_columns` and
-//! `parquet_to_arrow_field`.
-//!
-//! The interfaces for converting arrow schema to parquet schema is coming.
-
-use std::collections::{HashMap, HashSet};
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit};
-use arrow::ipc::writer;
-
-use crate::errors::{ParquetError::ArrowError, Result};
-use crate::file::{metadata::KeyValue, properties::WriterProperties};
-use crate::schema::types::{ColumnDescriptor, SchemaDescriptor, Type, TypePtr};
-use crate::{
-    basic::{
-        ConvertedType, DecimalType, IntType, LogicalType, Repetition, TimeType,
-        TimeUnit as ParquetTimeUnit, TimestampType, Type as PhysicalType,
-    },
-    errors::ParquetError,
-};
-
-/// Convert Parquet schema to Arrow schema including optional metadata.
-/// Attempts to decode any existing Arrow schema metadata, falling back
-/// to converting the Parquet schema column-wise
-pub fn parquet_to_arrow_schema(
-    parquet_schema: &SchemaDescriptor,
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Result<Schema> {
-    let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default();
-    metadata
-        .remove(super::ARROW_SCHEMA_META_KEY)
-        .map(|encoded| get_arrow_schema_from_metadata(&encoded))
-        .unwrap_or(parquet_to_arrow_schema_by_columns(
-            parquet_schema,
-            0..parquet_schema.columns().len(),
-            key_value_metadata,
-        ))
-}
-
-/// Convert parquet schema to arrow schema including optional metadata,
-/// only preserving some root columns.
-/// This is useful if we have columns `a.b`, `a.c.e` and `a.d`,
-/// and want `a` with all its child fields
-pub fn parquet_to_arrow_schema_by_root_columns<T>(
-    parquet_schema: &SchemaDescriptor,
-    column_indices: T,
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Result<Schema>
-where
-    T: IntoIterator<Item = usize>,
-{
-    // Reconstruct the index ranges of the parent columns
-    // An Arrow struct gets represented by 1+ columns based on how many child fields the
-    // struct has. This means that getting fields 1 and 2 might return the struct twice,
-    // if field 1 is the struct having say 3 fields, and field 2 is a primitive.
-    //
-    // The below gets the parent columns, and counts the number of child fields in each parent,
-    // such that we would end up with:
-    // - field 1 - columns: [0, 1, 2]
-    // - field 2 - columns: [3]
-    let mut parent_columns = vec![];
-    let mut curr_name = "";
-    let mut prev_name = "";
-    let mut indices = vec![];
-    (0..(parquet_schema.num_columns())).for_each(|i| {
-        let p_type = parquet_schema.get_column_root(i);
-        curr_name = p_type.get_basic_info().name();
-        if prev_name.is_empty() {
-            // first index
-            indices.push(i);
-            prev_name = curr_name;
-        } else if curr_name != prev_name {
-            prev_name = curr_name;
-            parent_columns.push((curr_name.to_string(), indices.clone()));
-            indices = vec![i];
-        } else {
-            indices.push(i);
-        }
-    });
-    // push the last column if indices has values
-    if !indices.is_empty() {
-        parent_columns.push((curr_name.to_string(), indices));
-    }
-
-    // gather the required leaf columns
-    let leaf_columns = column_indices
-        .into_iter()
-        .flat_map(|i| parent_columns[i].1.clone());
-
-    parquet_to_arrow_schema_by_columns(parquet_schema, leaf_columns, key_value_metadata)
-}
-
-/// Convert parquet schema to arrow schema including optional metadata,
-/// only preserving some leaf columns.
-pub fn parquet_to_arrow_schema_by_columns<T>(
-    parquet_schema: &SchemaDescriptor,
-    column_indices: T,
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Result<Schema>
-where
-    T: IntoIterator<Item = usize>,
-{
-    let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default();
-    let arrow_schema_metadata = metadata
-        .remove(super::ARROW_SCHEMA_META_KEY)
-        .map(|encoded| get_arrow_schema_from_metadata(&encoded))
-        .map_or(Ok(None), |v| v.map(Some))?;
-
-    // add the Arrow metadata to the Parquet metadata
-    if let Some(arrow_schema) = &arrow_schema_metadata {
-        arrow_schema.metadata().iter().for_each(|(k, v)| {
-            metadata.insert(k.clone(), v.clone());
-        });
-    }
-
-    let mut base_nodes = Vec::new();
-    let mut base_nodes_set = HashSet::new();
-    let mut leaves = HashSet::new();
-
-    enum FieldType<'a> {
-        Parquet(&'a Type),
-        Arrow(Field),
-    }
-
-    for c in column_indices {
-        let column = parquet_schema.column(c);
-        let name = column.name();
-
-        if let Some(field) = arrow_schema_metadata
-            .as_ref()
-            .and_then(|schema| schema.field_with_name(name).ok().cloned())
-        {
-            base_nodes.push(FieldType::Arrow(field));
-        } else {
-            let column = column.self_type() as *const Type;
-            let root = parquet_schema.get_column_root(c);
-            let root_raw_ptr = root as *const Type;
-
-            leaves.insert(column);
-            if !base_nodes_set.contains(&root_raw_ptr) {
-                base_nodes.push(FieldType::Parquet(root));
-                base_nodes_set.insert(root_raw_ptr);
-            }
-        }
-    }
-
-    base_nodes
-        .into_iter()
-        .map(|t| match t {
-            FieldType::Parquet(t) => ParquetTypeConverter::new(t, &leaves).to_field(),
-            FieldType::Arrow(f) => Ok(Some(f)),
-        })
-        .collect::<Result<Vec<Option<Field>>>>()
-        .map(|result| result.into_iter().filter_map(|f| f).collect::<Vec<Field>>())
-        .map(|fields| Schema::new_with_metadata(fields, metadata))
-}
-
-/// Try to convert Arrow schema metadata into a schema
-fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
-    let decoded = base64::decode(encoded_meta);
-    match decoded {
-        Ok(bytes) => {
-            let slice = if bytes[0..4] == [255u8; 4] {
-                &bytes[8..]
-            } else {
-                bytes.as_slice()
-            };
-            match arrow::ipc::root_as_message(slice) {
-                Ok(message) => message
-                    .header_as_schema()
-                    .map(arrow::ipc::convert::fb_to_schema)
-                    .ok_or(ArrowError("the message is not Arrow Schema".to_string())),
-                Err(err) => {
-                    // The flatbuffers implementation returns an error on verification error.
-                    Err(ArrowError(format!(
-                        "Unable to get root as message stored in {}: {:?}",
-                        super::ARROW_SCHEMA_META_KEY,
-                        err
-                    )))
-                }
-            }
-        }
-        Err(err) => {
-            // The C++ implementation returns an error if the schema can't be parsed.
-            Err(ArrowError(format!(
-                "Unable to decode the encoded schema stored in {}, {:?}",
-                super::ARROW_SCHEMA_META_KEY,
-                err
-            )))
-        }
-    }
-}
-
-/// Encodes the Arrow schema into the IPC format, and base64 encodes it
-fn encode_arrow_schema(schema: &Schema) -> String {
-    let options = writer::IpcWriteOptions::default();
-    let data_gen = arrow::ipc::writer::IpcDataGenerator::default();
-    let mut serialized_schema = data_gen.schema_to_bytes(&schema, &options);
-
-    // manually prepending the length to the schema as arrow uses the legacy IPC format
-    // TODO: change after addressing ARROW-9777
-    let schema_len = serialized_schema.ipc_message.len();
-    let mut len_prefix_schema = Vec::with_capacity(schema_len + 8);
-    len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]);
-    len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut());
-    len_prefix_schema.append(&mut serialized_schema.ipc_message);
-
-    base64::encode(&len_prefix_schema)
-}
-
-/// Mutates writer metadata by storing the encoded Arrow schema.
-/// If there is an existing Arrow schema metadata, it is replaced.
-pub(crate) fn add_encoded_arrow_schema_to_metadata(
-    schema: &Schema,
-    props: &mut WriterProperties,
-) {
-    let encoded = encode_arrow_schema(schema);
-
-    let schema_kv = KeyValue {
-        key: super::ARROW_SCHEMA_META_KEY.to_string(),
-        value: Some(encoded),
-    };
-
-    let mut meta = props.key_value_metadata.clone().unwrap_or_default();
-    // check if ARROW:schema exists, and overwrite it
-    let schema_meta = meta
-        .iter()
-        .enumerate()
-        .find(|(_, kv)| kv.key.as_str() == super::ARROW_SCHEMA_META_KEY);
-    match schema_meta {
-        Some((i, _)) => {
-            meta.remove(i);
-            meta.push(schema_kv);
-        }
-        None => {
-            meta.push(schema_kv);
-        }
-    }
-    props.key_value_metadata = Some(meta);
-}
-
-/// Convert arrow schema to parquet schema
-pub fn arrow_to_parquet_schema(schema: &Schema) -> Result<SchemaDescriptor> {
-    let fields: Result<Vec<TypePtr>> = schema
-        .fields()
-        .iter()
-        .map(|field| arrow_to_parquet_type(field).map(Arc::new))
-        .collect();
-    let group = Type::group_type_builder("arrow_schema")
-        .with_fields(&mut fields?)
-        .build()?;
-    Ok(SchemaDescriptor::new(Arc::new(group)))
-}
-
-fn parse_key_value_metadata(
-    key_value_metadata: &Option<Vec<KeyValue>>,
-) -> Option<HashMap<String, String>> {
-    match key_value_metadata {
-        Some(key_values) => {
-            let map: HashMap<String, String> = key_values
-                .iter()
-                .filter_map(|kv| {
-                    kv.value
-                        .as_ref()
-                        .map(|value| (kv.key.clone(), value.clone()))
-                })
-                .collect();
-
-            if map.is_empty() {
-                None
-            } else {
-                Some(map)
-            }
-        }
-        None => None,
-    }
-}
-
-/// Convert parquet column schema to arrow field.
-pub fn parquet_to_arrow_field(parquet_column: &ColumnDescriptor) -> Result<Field> {
-    let schema = parquet_column.self_type();
-
-    let mut leaves = HashSet::new();
-    leaves.insert(parquet_column.self_type() as *const Type);
-
-    ParquetTypeConverter::new(schema, &leaves)
-        .to_field()
-        .map(|opt| opt.unwrap())
-}
-
-pub fn decimal_length_from_precision(precision: usize) -> usize {
-    (10.0_f64.powi(precision as i32).log2() / 8.0).ceil() as usize
-}
-
-/// Convert an arrow field to a parquet `Type`
-fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
-    let name = field.name().as_str();
-    let repetition = if field.is_nullable() {
-        Repetition::OPTIONAL
-    } else {
-        Repetition::REQUIRED
-    };
-    // create type from field
-    match field.data_type() {
-        DataType::Null => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::UNKNOWN(Default::default())))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Boolean => Type::primitive_type_builder(name, PhysicalType::BOOLEAN)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int8 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int16 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: true,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int32 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Int64 => Type::primitive_type_builder(name, PhysicalType::INT64)
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt8 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt16 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt32 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::UInt64 => Type::primitive_type_builder(name, PhysicalType::INT64)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false,
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Float16 => Err(ArrowError("Float16 arrays not supported".to_string())),
-        DataType::Float32 => Type::primitive_type_builder(name, PhysicalType::FLOAT)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Float64 => Type::primitive_type_builder(name, PhysicalType::DOUBLE)
-            .with_repetition(repetition)
-            .build(),
-        DataType::Timestamp(time_unit, zone) => Type::primitive_type_builder(
-            name,
-            PhysicalType::INT64,
-        )
-        .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
-            is_adjusted_to_u_t_c: matches!(zone, Some(z) if !z.as_str().is_empty()),
-            unit: match time_unit {
-                TimeUnit::Second => ParquetTimeUnit::MILLIS(Default::default()),
-                TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()),
-                TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
-            },
-        })))
-        .with_repetition(repetition)
-        .build(),
-        DataType::Date32 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::DATE(Default::default())))
-            .with_repetition(repetition)
-            .build(),
-        // date64 is cast to date32
-        DataType::Date64 => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::DATE(Default::default())))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Time32(_) => Type::primitive_type_builder(name, PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: ParquetTimeUnit::MILLIS(Default::default()),
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Time64(unit) => Type::primitive_type_builder(name, PhysicalType::INT64)
-            .with_logical_type(Some(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: match unit {
-                    TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                    TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
-                    u => unreachable!("Invalid unit for Time64: {:?}", u),
-                },
-            })))
-            .with_repetition(repetition)
-            .build(),
-        DataType::Duration(_) => Err(ArrowError(
-            "Converting Duration to parquet not supported".to_string(),
-        )),
-        DataType::Interval(_) => {
-            Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                .with_converted_type(ConvertedType::INTERVAL)
-                .with_repetition(repetition)
-                .with_length(12)
-                .build()
-        }
-        DataType::Binary | DataType::LargeBinary => {
-            Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::FixedSizeBinary(length) => {
-            Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                .with_repetition(repetition)
-                .with_length(*length)
-                .build()
-        }
-        DataType::Decimal(precision, scale) => {
-            // Decimal precision determines the Parquet physical type to use.
-            // TODO(ARROW-12018): Enable the below after ARROW-10818 Decimal support
-            //
-            // let (physical_type, length) = if *precision > 1 && *precision <= 9 {
-            //     (PhysicalType::INT32, -1)
-            // } else if *precision <= 18 {
-            //     (PhysicalType::INT64, -1)
-            // } else {
-            //     (
-            //         PhysicalType::FIXED_LEN_BYTE_ARRAY,
-            //         decimal_length_from_precision(*precision) as i32,
-            //     )
-            // };
-            Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                .with_repetition(repetition)
-                .with_length(decimal_length_from_precision(*precision) as i32)
-                .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                    scale: *scale as i32,
-                    precision: *precision as i32,
-                })))
-                .with_precision(*precision as i32)
-                .with_scale(*scale as i32)
-                .build()
-        }
-        DataType::Utf8 | DataType::LargeUtf8 => {
-            Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
-                .with_logical_type(Some(LogicalType::STRING(Default::default())))
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::List(f) | DataType::FixedSizeList(f, _) | DataType::LargeList(f) => {
-            Type::group_type_builder(name)
-                .with_fields(&mut vec![Arc::new(
-                    Type::group_type_builder("list")
-                        .with_fields(&mut vec![Arc::new(arrow_to_parquet_type(f)?)])
-                        .with_repetition(Repetition::REPEATED)
-                        .build()?,
-                )])
-                .with_logical_type(Some(LogicalType::LIST(Default::default())))
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::Struct(fields) => {
-            if fields.is_empty() {
-                return Err(ArrowError(
-                    "Parquet does not support writing empty structs".to_string(),
-                ));
-            }
-            // recursively convert children to types/nodes
-            let fields: Result<Vec<TypePtr>> = fields
-                .iter()
-                .map(|f| arrow_to_parquet_type(f).map(Arc::new))
-                .collect();
-            Type::group_type_builder(name)
-                .with_fields(&mut fields?)
-                .with_repetition(repetition)
-                .build()
-        }
-        DataType::Union(_) => unimplemented!("See ARROW-8817."),
-        DataType::Dictionary(_, ref value) => {
-            // Dictionary encoding not handled at the schema level
-            let dict_field = Field::new(name, *value.clone(), field.is_nullable());
-            arrow_to_parquet_type(&dict_field)
-        }
-    }
-}
-/// This struct is used to group methods and data structures used to convert parquet
-/// schema together.
-struct ParquetTypeConverter<'a> {
-    schema: &'a Type,
-    /// This is the columns that need to be converted to arrow schema.
-    columns_to_convert: &'a HashSet<*const Type>,
-}
-
-impl<'a> ParquetTypeConverter<'a> {
-    fn new(schema: &'a Type, columns_to_convert: &'a HashSet<*const Type>) -> Self {
-        Self {
-            schema,
-            columns_to_convert,
-        }
-    }
-
-    fn clone_with_schema(&self, other: &'a Type) -> Self {
-        Self {
-            schema: other,
-            columns_to_convert: self.columns_to_convert,
-        }
-    }
-}
-
-impl ParquetTypeConverter<'_> {
-    // Public interfaces.
-
-    /// Converts parquet schema to arrow data type.
-    ///
-    /// This function discards schema name.
-    ///
-    /// If this schema is a primitive type and not included in the leaves, the result is
-    /// Ok(None).
-    ///
-    /// If this schema is a group type and none of its children is reserved in the
-    /// conversion, the result is Ok(None).
-    fn to_data_type(&self) -> Result<Option<DataType>> {
-        match self.schema {
-            Type::PrimitiveType { .. } => self.to_primitive_type(),
-            Type::GroupType { .. } => self.to_group_type(),
-        }
-    }
-
-    /// Converts parquet schema to arrow field.
-    ///
-    /// This method is roughly the same as
-    /// [`to_data_type`](`ParquetTypeConverter::to_data_type`), except it reserves schema
-    /// name.
-    fn to_field(&self) -> Result<Option<Field>> {
-        self.to_data_type().map(|opt| {
-            opt.map(|dt| Field::new(self.schema.name(), dt, self.is_nullable()))
-        })
-    }
-
-    // Utility functions.
-
-    /// Checks whether this schema is nullable.
-    fn is_nullable(&self) -> bool {
-        let basic_info = self.schema.get_basic_info();
-        if basic_info.has_repetition() {
-            match basic_info.repetition() {
-                Repetition::OPTIONAL => true,
-                Repetition::REPEATED => true,
-                Repetition::REQUIRED => false,
-            }
-        } else {
-            false
-        }
-    }
-
-    fn is_repeated(&self) -> bool {
-        let basic_info = self.schema.get_basic_info();
-
-        basic_info.has_repetition() && basic_info.repetition() == Repetition::REPEATED
-    }
-
-    fn is_self_included(&self) -> bool {
-        self.columns_to_convert
-            .contains(&(self.schema as *const Type))
-    }
-
-    // Functions for primitive types.
-
-    /// Entry point for converting parquet primitive type to arrow type.
-    ///
-    /// This function takes care of repetition.
-    fn to_primitive_type(&self) -> Result<Option<DataType>> {
-        if self.is_self_included() {
-            self.to_primitive_type_inner().map(|dt| {
-                if self.is_repeated() {
-                    Some(DataType::List(Box::new(Field::new(
-                        self.schema.name(),
-                        dt,
-                        self.is_nullable(),
-                    ))))
-                } else {
-                    Some(dt)
-                }
-            })
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Converting parquet primitive type to arrow data type.
-    fn to_primitive_type_inner(&self) -> Result<DataType> {
-        match self.schema.get_physical_type() {
-            PhysicalType::BOOLEAN => Ok(DataType::Boolean),
-            PhysicalType::INT32 => self.from_int32(),
-            PhysicalType::INT64 => self.from_int64(),
-            PhysicalType::INT96 => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
-            PhysicalType::FLOAT => Ok(DataType::Float32),
-            PhysicalType::DOUBLE => Ok(DataType::Float64),
-            PhysicalType::BYTE_ARRAY => self.from_byte_array(),
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => self.from_fixed_len_byte_array(),
-        }
-    }
-
-    fn from_int32(&self) -> Result<DataType> {
-        match (
-            self.schema.get_basic_info().logical_type(),
-            self.schema.get_basic_info().converted_type(),
-        ) {
-            (None, ConvertedType::NONE) => Ok(DataType::Int32),
-            (Some(LogicalType::INTEGER(t)), _) => match (t.bit_width, t.is_signed) {
-                (8, true) => Ok(DataType::Int8),
-                (16, true) => Ok(DataType::Int16),
-                (32, true) => Ok(DataType::Int32),
-                (8, false) => Ok(DataType::UInt8),
-                (16, false) => Ok(DataType::UInt16),
-                (32, false) => Ok(DataType::UInt32),
-                _ => Err(ArrowError(format!(
-                    "Cannot create INT32 physical type from {:?}",
-                    t
-                ))),
-            },
-            (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
-            (Some(LogicalType::DATE(_)), _) => Ok(DataType::Date32),
-            (Some(LogicalType::TIME(t)), _) => match t.unit {
-                ParquetTimeUnit::MILLIS(_) => Ok(DataType::Time32(TimeUnit::Millisecond)),
-                _ => Err(ArrowError(format!(
-                    "Cannot create INT32 physical type from {:?}",
-                    t.unit
-                ))),
-            },
-            (None, ConvertedType::UINT_8) => Ok(DataType::UInt8),
-            (None, ConvertedType::UINT_16) => Ok(DataType::UInt16),
-            (None, ConvertedType::UINT_32) => Ok(DataType::UInt32),
-            (None, ConvertedType::INT_8) => Ok(DataType::Int8),
-            (None, ConvertedType::INT_16) => Ok(DataType::Int16),
-            (None, ConvertedType::INT_32) => Ok(DataType::Int32),
-            (None, ConvertedType::DATE) => Ok(DataType::Date32),
-            (None, ConvertedType::TIME_MILLIS) => {
-                Ok(DataType::Time32(TimeUnit::Millisecond))
-            }
-            (None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
-            (logical, converted) => Err(ArrowError(format!(
-                "Unable to convert parquet INT32 logical type {:?} or converted type {}",
-                logical, converted
-            ))),
-        }
-    }
-
-    fn from_int64(&self) -> Result<DataType> {
-        match (
-            self.schema.get_basic_info().logical_type(),
-            self.schema.get_basic_info().converted_type(),
-        ) {
-            (None, ConvertedType::NONE) => Ok(DataType::Int64),
-            (Some(LogicalType::INTEGER(t)), _) if t.bit_width == 64 => {
-                match t.is_signed {
-                    true => Ok(DataType::Int64),
-                    false => Ok(DataType::UInt64),
-                }
-            }
-            (Some(LogicalType::TIME(t)), _) => match t.unit {
-                ParquetTimeUnit::MILLIS(_) => Err(ArrowError(
-                    "Cannot create INT64 from MILLIS time unit".to_string(),
-                )),
-                ParquetTimeUnit::MICROS(_) => Ok(DataType::Time64(TimeUnit::Microsecond)),
-                ParquetTimeUnit::NANOS(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)),
-            },
-            (Some(LogicalType::TIMESTAMP(t)), _) => Ok(DataType::Timestamp(
-                match t.unit {
-                    ParquetTimeUnit::MILLIS(_) => TimeUnit::Millisecond,
-                    ParquetTimeUnit::MICROS(_) => TimeUnit::Microsecond,
-                    ParquetTimeUnit::NANOS(_) => TimeUnit::Nanosecond,
-                },
-                if t.is_adjusted_to_u_t_c {
-                    Some("UTC".to_string())
-                } else {
-                    None
-                },
-            )),
-            (None, ConvertedType::INT_64) => Ok(DataType::Int64),
-            (None, ConvertedType::UINT_64) => Ok(DataType::UInt64),
-            (None, ConvertedType::TIME_MICROS) => {
-                Ok(DataType::Time64(TimeUnit::Microsecond))
-            }
-            (None, ConvertedType::TIMESTAMP_MILLIS) => {
-                Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
-            }
-            (None, ConvertedType::TIMESTAMP_MICROS) => {
-                Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
-            }
-            (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
-            (None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
-            (logical, converted) => Err(ArrowError(format!(
-                "Unable to convert parquet INT64 logical type {:?} or converted type {}",
-                logical, converted
-            ))),
-        }
-    }
-
-    fn from_fixed_len_byte_array(&self) -> Result<DataType> {
-        match (
-            self.schema.get_basic_info().logical_type(),
-            self.schema.get_basic_info().converted_type(),
-        ) {
-            (Some(LogicalType::DECIMAL(_)), _) => Ok(self.to_decimal()),
-            (None, ConvertedType::DECIMAL) => Ok(self.to_decimal()),
-            (None, ConvertedType::INTERVAL) => {
-                // There is currently no reliable way of determining which IntervalUnit
-                // to return. Thus without the original Arrow schema, the results
-                // would be incorrect if all 12 bytes of the interval are populated
-                Ok(DataType::Interval(IntervalUnit::DayTime))
-            }
-            _ => {
-                let byte_width = match self.schema {
-                    Type::PrimitiveType {
-                        ref type_length, ..
-                    } => *type_length,
-                    _ => {
-                        return Err(ArrowError(
-                            "Expected a physical type, not a group type".to_string(),
-                        ))
-                    }
-                };
-
-                Ok(DataType::FixedSizeBinary(byte_width))
-            }
-        }
-    }
-
-    fn to_decimal(&self) -> DataType {
-        assert!(self.schema.is_primitive());
-        DataType::Decimal(
-            self.schema.get_precision() as usize,
-            self.schema.get_scale() as usize,
-        )
-    }
-
-    fn from_byte_array(&self) -> Result<DataType> {
-        match (self.schema.get_basic_info().logical_type(), self.schema.get_basic_info().converted_type()) {
-            (Some(LogicalType::STRING(_)), _) => Ok(DataType::Utf8),
-            (Some(LogicalType::JSON(_)), _) => Ok(DataType::Binary),
-            (Some(LogicalType::BSON(_)), _) => Ok(DataType::Binary),
-            (Some(LogicalType::ENUM(_)), _) => Ok(DataType::Binary),
-            (None, ConvertedType::NONE) => Ok(DataType::Binary),
-            (None, ConvertedType::JSON) => Ok(DataType::Binary),
-            (None, ConvertedType::BSON) => Ok(DataType::Binary),
-            (None, ConvertedType::ENUM) => Ok(DataType::Binary),
-            (None, ConvertedType::UTF8) => Ok(DataType::Utf8),
-            (logical, converted) => Err(ArrowError(format!(
-                "Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}",
-                logical, converted
-            ))),
-        }
-    }
-
-    // Functions for group types.
-
-    /// Entry point for converting parquet group type.
-    ///
-    /// This function takes care of logical type and repetition.
-    fn to_group_type(&self) -> Result<Option<DataType>> {
-        if self.is_repeated() {
-            self.to_struct().map(|opt| {
-                opt.map(|dt| {
-                    DataType::List(Box::new(Field::new(
-                        self.schema.name(),
-                        dt,
-                        self.is_nullable(),
-                    )))
-                })
-            })
-        } else {
-            match (
-                self.schema.get_basic_info().logical_type(),
-                self.schema.get_basic_info().converted_type(),
-            ) {
-                (Some(LogicalType::LIST(_)), _) => self.to_list(),
-                (None, ConvertedType::LIST) => self.to_list(),
-                _ => self.to_struct(),
-            }
-        }
-    }
-
-    /// Converts a parquet group type to arrow struct.
-    fn to_struct(&self) -> Result<Option<DataType>> {
-        match self.schema {
-            Type::PrimitiveType { .. } => Err(ParquetError::General(format!(
-                "{:?} is a struct type, and can't be processed as primitive.",
-                self.schema
-            ))),
-            Type::GroupType {
-                basic_info: _,
-                fields,
-            } => fields
-                .iter()
-                .map(|field_ptr| self.clone_with_schema(field_ptr).to_field())
-                .collect::<Result<Vec<Option<Field>>>>()
-                .map(|result| {
-                    result.into_iter().filter_map(|f| f).collect::<Vec<Field>>()
-                })
-                .map(|fields| {
-                    if fields.is_empty() {
-                        None
-                    } else {
-                        Some(DataType::Struct(fields))
-                    }
-                }),
-        }
-    }
-
-    /// Converts a parquet list to arrow list.
-    ///
-    /// To fully understand this algorithm, please refer to
-    /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
-    fn to_list(&self) -> Result<Option<DataType>> {
-        match self.schema {
-            Type::PrimitiveType { .. } => Err(ParquetError::General(format!(
-                "{:?} is a list type and can't be processed as primitive.",
-                self.schema
-            ))),
-            Type::GroupType {
-                basic_info: _,
-                fields,
-            } if fields.len() == 1 => {
-                let list_item = fields.first().unwrap();
-                let item_converter = self.clone_with_schema(list_item);
-
-                let item_type = match list_item.as_ref() {
-                    Type::PrimitiveType { .. } => {
-                        if item_converter.is_repeated() {
-                            item_converter.to_primitive_type_inner().map(Some)
-                        } else {
-                            Err(ArrowError(
-                                "Primitive element type of list must be repeated."
-                                    .to_string(),
-                            ))
-                        }
-                    }
-                    Type::GroupType {
-                        basic_info: _,
-                        fields,
-                    } => {
-                        if fields.len() > 1 {
-                            item_converter.to_struct()
-                        } else if fields.len() == 1
-                            && list_item.name() != "array"
-                            && list_item.name() != format!("{}_tuple", self.schema.name())
-                        {
-                            let nested_item = fields.first().unwrap();
-                            let nested_item_converter =
-                                self.clone_with_schema(nested_item);
-
-                            nested_item_converter.to_data_type()
-                        } else {
-                            item_converter.to_struct()
-                        }
-                    }
-                };
-
-                // Check that the name of the list child is "list", in which case we
-                // get the child nullability and name (normally "element") from the nested
-                // group type.
-                // Without this step, the child incorrectly inherits the parent's optionality
-                let (list_item_name, item_is_optional) = match &item_converter.schema {
-                    Type::GroupType { basic_info, fields }
-                        if basic_info.name() == "list" && fields.len() == 1 =>
-                    {
-                        let field = fields.first().unwrap();
-                        (field.name(), field.is_optional())
-                    }
-                    _ => (list_item.name(), list_item.is_optional()),
-                };
-
-                item_type.map(|opt| {
-                    opt.map(|dt| {
-                        DataType::List(Box::new(Field::new(
-                            list_item_name,
-                            dt,
-                            item_is_optional,
-                        )))
-                    })
-                })
-            }
-            _ => Err(ArrowError(
-                "Group element type of list can only contain one field.".to_string(),
-            )),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::{collections::HashMap, convert::TryFrom, sync::Arc};
-
-    use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
-
-    use crate::file::{metadata::KeyValue, reader::SerializedFileReader};
-    use crate::{
-        arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader},
-        schema::{parser::parse_message_type, types::SchemaDescriptor},
-        util::test_common::get_temp_file,
-    };
-
-    #[test]
-    fn test_flat_primitives() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32   int8  (INT_8);
-            REQUIRED INT32   int16 (INT_16);
-            REQUIRED INT32   uint8 (INTEGER(8,false));
-            REQUIRED INT32   uint16 (INTEGER(16,false));
-            REQUIRED INT32   int32;
-            REQUIRED INT64   int64 ;
-            OPTIONAL DOUBLE  double;
-            OPTIONAL FLOAT   float;
-            OPTIONAL BINARY  string (UTF8);
-            OPTIONAL BINARY  string_2 (STRING);
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-            Field::new("int16", DataType::Int16, false),
-            Field::new("uint8", DataType::UInt8, false),
-            Field::new("uint16", DataType::UInt16, false),
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("double", DataType::Float64, true),
-            Field::new("float", DataType::Float32, true),
-            Field::new("string", DataType::Utf8, true),
-            Field::new("string_2", DataType::Utf8, true),
-        ];
-
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-    }
-
-    #[test]
-    fn test_byte_array_fields() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BYTE_ARRAY binary;
-            REQUIRED FIXED_LEN_BYTE_ARRAY (20) fixed_binary;
-        }
-        ";
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-
-        let arrow_fields = vec![
-            Field::new("binary", DataType::Binary, false),
-            Field::new("fixed_binary", DataType::FixedSizeBinary(20), false),
-        ];
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-    }
-
-    #[test]
-    fn test_duplicate_fields() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32 int8 (INT_8);
-        }
-        ";
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-        ];
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-
-        let converted_arrow_schema = parquet_to_arrow_schema_by_columns(
-            &parquet_schema,
-            vec![0usize, 1usize],
-            &None,
-        )
-        .unwrap();
-        assert_eq!(&arrow_fields, converted_arrow_schema.fields());
-    }
-
-    #[test]
-    fn test_parquet_lists() {
-        let mut arrow_fields = Vec::new();
-
-        // LIST encoding example taken from parquet-format/LogicalTypes.md
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP my_list (LIST) {
-            REPEATED GROUP list {
-              OPTIONAL BINARY element (UTF8);
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP list {
-              REQUIRED BINARY element (UTF8);
-            }
-          }
-          OPTIONAL GROUP array_of_arrays (LIST) {
-            REPEATED GROUP list {
-              REQUIRED GROUP element (LIST) {
-                REPEATED GROUP list {
-                  REQUIRED INT32 element;
-                }
-              }
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP element {
-              REQUIRED BINARY str (UTF8);
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED INT32 element;
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP element {
-              REQUIRED BINARY str (UTF8);
-              REQUIRED INT32 num;
-            }
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP array {
-              REQUIRED BINARY str (UTF8);
-            }
-
-          }
-          OPTIONAL GROUP my_list (LIST) {
-            REPEATED GROUP my_list_tuple {
-              REQUIRED BINARY str (UTF8);
-            }
-          }
-          REPEATED INT32 name;
-        }
-        ";
-
-        // // List<String> (list non-null, elements nullable)
-        // required group my_list (LIST) {
-        //   repeated group list {
-        //     optional binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
-                false,
-            ));
-        }
-
-        // // List<String> (list nullable, elements non-null)
-        // optional group my_list (LIST) {
-        //   repeated group list {
-        //     required binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
-                true,
-            ));
-        }
-
-        // Element types can be nested structures. For example, a list of lists:
-        //
-        // // List<List<Integer>>
-        // optional group array_of_arrays (LIST) {
-        //   repeated group list {
-        //     required group element (LIST) {
-        //       repeated group list {
-        //         required int32 element;
-        //       }
-        //     }
-        //   }
-        // }
-        {
-            let arrow_inner_list =
-                DataType::List(Box::new(Field::new("element", DataType::Int32, false)));
-            arrow_fields.push(Field::new(
-                "array_of_arrays",
-                DataType::List(Box::new(Field::new("element", arrow_inner_list, false))),
-                true,
-            ));
-        }
-
-        // // List<String> (list nullable, elements non-null)
-        // optional group my_list (LIST) {
-        //   repeated group element {
-        //     required binary str (UTF8);
-        //   };
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
-                true,
-            ));
-        }
-
-        // // List<Integer> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated int32 element;
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
-                true,
-            ));
-        }
-
-        // // List<Tuple<String, Integer>> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated group element {
-        //     required binary str (UTF8);
-        //     required int32 num;
-        //   };
-        // }
-        {
-            let arrow_struct = DataType::Struct(vec![
-                Field::new("str", DataType::Utf8, false),
-                Field::new("num", DataType::Int32, false),
-            ]);
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("element", arrow_struct, true))),
-                true,
-            ));
-        }
-
-        // // List<OneTuple<String>> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated group array {
-        //     required binary str (UTF8);
-        //   };
-        // }
-        // Special case: group is named array
-        {
-            let arrow_struct =
-                DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("array", arrow_struct, true))),
-                true,
-            ));
-        }
-
-        // // List<OneTuple<String>> (nullable list, non-null elements)
-        // optional group my_list (LIST) {
-        //   repeated group my_list_tuple {
-        //     required binary str (UTF8);
-        //   };
-        // }
-        // Special case: group named ends in _tuple
-        {
-            let arrow_struct =
-                DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
-            arrow_fields.push(Field::new(
-                "my_list",
-                DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))),
-                true,
-            ));
-        }
-
-        // One-level encoding: Only allows required lists with required cells
-        //   repeated value_type name
-        {
-            arrow_fields.push(Field::new(
-                "name",
-                DataType::List(Box::new(Field::new("name", DataType::Int32, true))),
-                true,
-            ));
-        }
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_parquet_list_nullable() {
-        let mut arrow_fields = Vec::new();
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP my_list1 (LIST) {
-            REPEATED GROUP list {
-              OPTIONAL BINARY element (UTF8);
-            }
-          }
-          OPTIONAL GROUP my_list2 (LIST) {
-            REPEATED GROUP list {
-              REQUIRED BINARY element (UTF8);
-            }
-          }
-          REQUIRED GROUP my_list3 (LIST) {
-            REPEATED GROUP list {
-              REQUIRED BINARY element (UTF8);
-            }
-          }
-        }
-        ";
-
-        // // List<String> (list non-null, elements nullable)
-        // required group my_list1 (LIST) {
-        //   repeated group list {
-        //     optional binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list1",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
-                false,
-            ));
-        }
-
-        // // List<String> (list nullable, elements non-null)
-        // optional group my_list2 (LIST) {
-        //   repeated group list {
-        //     required binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list2",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
-                true,
-            ));
-        }
-
-        // // List<String> (list non-null, elements non-null)
-        // repeated group my_list3 (LIST) {
-        //   repeated group list {
-        //     required binary element (UTF8);
-        //   }
-        // }
-        {
-            arrow_fields.push(Field::new(
-                "my_list3",
-                DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
-                false,
-            ));
-        }
-
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_nested_schema() {
-        let mut arrow_fields = Vec::new();
-        {
-            let group1_fields = vec![
-                Field::new("leaf1", DataType::Boolean, false),
-                Field::new("leaf2", DataType::Int32, false),
-            ];
-            let group1_struct =
-                Field::new("group1", DataType::Struct(group1_fields), false);
-            arrow_fields.push(group1_struct);
-
-            let leaf3_field = Field::new("leaf3", DataType::Int64, false);
-            arrow_fields.push(leaf3_field);
-        }
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP group1 {
-            REQUIRED BOOLEAN leaf1;
-            REQUIRED INT32 leaf2;
-          }
-          REQUIRED INT64 leaf3;
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_nested_schema_partial() {
-        let mut arrow_fields = Vec::new();
-        {
-            let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)];
-            let group1 = Field::new("group1", DataType::Struct(group1_fields), false);
-            arrow_fields.push(group1);
-
-            let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)];
-            let group2 = Field::new("group2", DataType::Struct(group2_fields), false);
-            arrow_fields.push(group2);
-
-            arrow_fields.push(Field::new("leaf5", DataType::Int64, false));
-        }
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP group1 {
-            REQUIRED INT64 leaf1;
-            REQUIRED INT64 leaf2;
-          }
-          REQUIRED  GROUP group2 {
-            REQUIRED INT64 leaf3;
-            REQUIRED INT64 leaf4;
-          }
-          REQUIRED INT64 leaf5;
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        // Expected partial arrow schema (columns 0, 3, 4):
-        // required group group1 {
-        //   required int64 leaf1;
-        // }
-        // required group group2 {
-        //   required int64 leaf4;
-        // }
-        // required int64 leaf5;
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema_by_columns(&parquet_schema, vec![0, 3, 4], &None)
-                .unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_nested_schema_partial_ordering() {
-        let mut arrow_fields = Vec::new();
-        {
-            let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)];
-            let group2 = Field::new("group2", DataType::Struct(group2_fields), false);
-            arrow_fields.push(group2);
-
-            arrow_fields.push(Field::new("leaf5", DataType::Int64, false));
-
-            let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)];
-            let group1 = Field::new("group1", DataType::Struct(group1_fields), false);
-            arrow_fields.push(group1);
-        }
-
-        let message_type = "
-        message test_schema {
-          REQUIRED GROUP group1 {
-            REQUIRED INT64 leaf1;
-            REQUIRED INT64 leaf2;
-          }
-          REQUIRED  GROUP group2 {
-            REQUIRED INT64 leaf3;
-            REQUIRED INT64 leaf4;
-          }
-          REQUIRED INT64 leaf5;
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        // Expected partial arrow schema (columns 3, 4, 0):
-        // required group group1 {
-        //   required int64 leaf1;
-        // }
-        // required group group2 {
-        //   required int64 leaf4;
-        // }
-        // required int64 leaf5;
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema_by_columns(&parquet_schema, vec![3, 4, 0], &None)
-                .unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_repeated_nested_schema() {
-        let mut arrow_fields = Vec::new();
-        {
-            arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
-
-            let inner_group_list = Field::new(
-                "innerGroup",
-                DataType::List(Box::new(Field::new(
-                    "innerGroup",
-                    DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]),
-                    true,
-                ))),
-                true,
-            );
-
-            let outer_group_list = Field::new(
-                "outerGroup",
-                DataType::List(Box::new(Field::new(
-                    "outerGroup",
-                    DataType::Struct(vec![
-                        Field::new("leaf2", DataType::Int32, true),
-                        inner_group_list,
-                    ]),
-                    true,
-                ))),
-                true,
-            );
-            arrow_fields.push(outer_group_list);
-        }
-
-        let message_type = "
-        message test_schema {
-          OPTIONAL INT32 leaf1;
-          REPEATED GROUP outerGroup {
-            OPTIONAL INT32 leaf2;
-            REPEATED GROUP innerGroup {
-              OPTIONAL INT32 leaf3;
-            }
-          }
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &None).unwrap();
-        let converted_fields = converted_arrow_schema.fields();
-
-        assert_eq!(arrow_fields.len(), converted_fields.len());
-        for i in 0..arrow_fields.len() {
-            assert_eq!(arrow_fields[i], converted_fields[i]);
-        }
-    }
-
-    #[test]
-    fn test_column_desc_to_field() {
-        let message_type = "
-        message test_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32   int8  (INT_8);
-            REQUIRED INT32   uint8 (INTEGER(8,false));
-            REQUIRED INT32   int16 (INT_16);
-            REQUIRED INT32   uint16 (INTEGER(16,false));
-            REQUIRED INT32   int32;
-            REQUIRED INT64   int64;
-            OPTIONAL DOUBLE  double;
-            OPTIONAL FLOAT   float;
-            OPTIONAL BINARY  string (UTF8);
-            REPEATED BOOLEAN bools;
-            OPTIONAL INT32   date       (DATE);
-            OPTIONAL INT32   time_milli (TIME_MILLIS);
-            OPTIONAL INT64   time_micro (TIME_MICROS);
-            OPTIONAL INT64   time_nano (TIME(NANOS,false));
-            OPTIONAL INT64   ts_milli (TIMESTAMP_MILLIS);
-            REQUIRED INT64   ts_micro (TIMESTAMP_MICROS);
-            REQUIRED INT64   ts_nano (TIMESTAMP(NANOS,true));
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_fields = parquet_schema
-            .columns()
-            .iter()
-            .map(|c| parquet_to_arrow_field(c).unwrap())
-            .collect::<Vec<Field>>();
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-            Field::new("uint8", DataType::UInt8, false),
-            Field::new("int16", DataType::Int16, false),
-            Field::new("uint16", DataType::UInt16, false),
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("double", DataType::Float64, true),
-            Field::new("float", DataType::Float32, true),
-            Field::new("string", DataType::Utf8, true),
-            Field::new(
-                "bools",
-                DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new("date", DataType::Date32, true),
-            Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
-            Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
-            Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond), true),
-            Field::new(
-                "ts_milli",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_micro",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                false,
-            ),
-            Field::new(
-                "ts_nano",
-                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".to_string())),
-                false,
-            ),
-        ];
-
-        assert_eq!(arrow_fields, converted_arrow_fields);
-    }
-
-    #[test]
-    fn test_field_to_column_desc() {
-        let message_type = "
-        message arrow_schema {
-            REQUIRED BOOLEAN boolean;
-            REQUIRED INT32   int8  (INT_8);
-            REQUIRED INT32   int16 (INTEGER(16,true));
-            REQUIRED INT32   int32;
-            REQUIRED INT64   int64;
-            OPTIONAL DOUBLE  double;
-            OPTIONAL FLOAT   float;
-            OPTIONAL BINARY  string (STRING);
-            OPTIONAL GROUP   bools (LIST) {
-                REPEATED GROUP list {
-                    OPTIONAL BOOLEAN element;
-                }
-            }
-            REQUIRED GROUP   bools_non_null (LIST) {
-                REPEATED GROUP list {
-                    REQUIRED BOOLEAN element;
-                }
-            }
-            OPTIONAL INT32   date       (DATE);
-            OPTIONAL INT32   time_milli (TIME(MILLIS,false));
-            OPTIONAL INT64   time_micro (TIME_MICROS);
-            OPTIONAL INT64   ts_milli (TIMESTAMP_MILLIS);
-            REQUIRED INT64   ts_micro (TIMESTAMP(MICROS,false));
-            REQUIRED GROUP struct {
-                REQUIRED BOOLEAN bools;
-                REQUIRED INT32 uint32 (INTEGER(32,false));
-                REQUIRED GROUP   int32 (LIST) {
-                    REPEATED GROUP list {
-                        OPTIONAL INT32 element;
-                    }
-                }
-            }
-            REQUIRED BINARY  dictionary_strings (STRING);
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-
-        let arrow_fields = vec![
-            Field::new("boolean", DataType::Boolean, false),
-            Field::new("int8", DataType::Int8, false),
-            Field::new("int16", DataType::Int16, false),
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("double", DataType::Float64, true),
-            Field::new("float", DataType::Float32, true),
-            Field::new("string", DataType::Utf8, true),
-            Field::new(
-                "bools",
-                DataType::List(Box::new(Field::new("element", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new(
-                "bools_non_null",
-                DataType::List(Box::new(Field::new("element", DataType::Boolean, false))),
-                false,
-            ),
-            Field::new("date", DataType::Date32, true),
-            Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
-            Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
-            Field::new(
-                "ts_milli",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                true,
-            ),
-            Field::new(
-                "ts_micro",
-                DataType::Timestamp(TimeUnit::Microsecond, None),
-                false,
-            ),
-            Field::new(
-                "struct",
-                DataType::Struct(vec![
-                    Field::new("bools", DataType::Boolean, false),
-                    Field::new("uint32", DataType::UInt32, false),
-                    Field::new(
-                        "int32",
-                        DataType::List(Box::new(Field::new(
-                            "element",
-                            DataType::Int32,
-                            true,
-                        ))),
-                        false,
-                    ),
-                ]),
-                false,
-            ),
-            Field::new(
-                "dictionary_strings",
-                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
-                false,
-            ),
-        ];
-        let arrow_schema = Schema::new(arrow_fields);
-        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema).unwrap();
-
-        assert_eq!(
-            parquet_schema.columns().len(),
-            converted_arrow_schema.columns().len()
-        );
-        parquet_schema
-            .columns()
-            .iter()
-            .zip(converted_arrow_schema.columns())
-            .for_each(|(a, b)| {
-                // Only check logical type if it's set on the Parquet side.
-                // This is because the Arrow conversion always sets logical type,
-                // even if there wasn't originally one.
-                // This is not an issue, but is an inconvenience for this test.
-                match a.logical_type() {
-                    Some(_) => {
-                        assert_eq!(a, b)
-                    }
-                    None => {
-                        assert_eq!(a.name(), b.name());
-                        assert_eq!(a.physical_type(), b.physical_type());
-                        assert_eq!(a.converted_type(), b.converted_type());
-                    }
-                };
-            });
-    }
-
-    #[test]
-    #[should_panic(expected = "Parquet does not support writing empty structs")]
-    fn test_empty_struct_field() {
-        let arrow_fields = vec![Field::new("struct", DataType::Struct(vec![]), false)];
-        let arrow_schema = Schema::new(arrow_fields);
-        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema);
-
-        assert!(converted_arrow_schema.is_err());
-        converted_arrow_schema.unwrap();
-    }
-
-    #[test]
-    fn test_metadata() {
-        let message_type = "
-        message test_schema {
-            OPTIONAL BINARY  string (STRING);
-        }
-        ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-
-        let mut key_value_metadata: Vec<KeyValue> = Vec::new();
-        key_value_metadata.push(KeyValue::new("foo".to_owned(), Some("bar".to_owned())));
-        key_value_metadata.push(KeyValue::new("baz".to_owned(), None));
-
-        let mut expected_metadata: HashMap<String, String> = HashMap::new();
-        expected_metadata.insert("foo".to_owned(), "bar".to_owned());
-
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
-        let converted_arrow_schema =
-            parquet_to_arrow_schema(&parquet_schema, &Some(key_value_metadata)).unwrap();
-
-        assert_eq!(converted_arrow_schema.metadata(), &expected_metadata);
-    }
-
-    #[test]
-    fn test_arrow_schema_roundtrip() -> Result<()> {
-        // This tests the roundtrip of an Arrow schema
-        // Fields that are commented out fail roundtrip tests or are unsupported by the writer
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new("c1", DataType::Utf8, false),
-                Field::new("c2", DataType::Binary, false),
-                Field::new("c3", DataType::FixedSizeBinary(3), false),
-                Field::new("c4", DataType::Boolean, false),
-                Field::new("c5", DataType::Date32, false),
-                Field::new("c6", DataType::Date64, false),
-                Field::new("c7", DataType::Time32(TimeUnit::Second), false),
-                Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
-                Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
-                Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
-                Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false),
-                Field::new(
-                    "c16",
-                    DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())),
-                    false,
-                ),
-                Field::new(
-                    "c17",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".to_string()),
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c18",
-                    DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    false,
-                ),
-                Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
-                Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
-                Field::new(
-                    "c21",
-                    DataType::List(Box::new(Field::new("list", DataType::Boolean, true))),
-                    false,
-                ),
-                // Field::new(
-                //     "c22",
-                //     DataType::FixedSizeList(Box::new(DataType::Boolean), 5),
-                //     false,
-                // ),
-                // Field::new(
-                //     "c23",
-                //     DataType::List(Box::new(DataType::LargeList(Box::new(
-                //         DataType::Struct(vec![
-                //             Field::new("a", DataType::Int16, true),
-                //             Field::new("b", DataType::Float64, false),
-                //         ]),
-                //     )))),
-                //     true,
-                // ),
-                Field::new(
-                    "c24",
-                    DataType::Struct(vec![
-                        Field::new("a", DataType::Utf8, false),
-                        Field::new("b", DataType::UInt16, false),
-                    ]),
-                    false,
-                ),
-                Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true),
-                Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true),
-                // Field::new("c27", DataType::Duration(TimeUnit::Second), false),
-                // Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
-                // Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
-                // Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
-                Field::new_dict(
-                    "c31",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
-                    true,
-                    123,
-                    true,
-                ),
-                Field::new("c32", DataType::LargeBinary, true),
-                Field::new("c33", DataType::LargeUtf8, true),
-                // Field::new(
-                //     "c34",
-                //     DataType::LargeList(Box::new(DataType::List(Box::new(
-                //         DataType::Struct(vec![
-                //             Field::new("a", DataType::Int16, true),
-                //             Field::new("b", DataType::Float64, true),
-                //         ]),
-                //     )))),
-                //     true,
-                // ),
-                Field::new("c35", DataType::Null, true),
-                Field::new("c36", DataType::Decimal(2, 1), false),
-                Field::new("c37", DataType::Decimal(50, 20), false),
-                Field::new("c38", DataType::Decimal(18, 12), true),
-            ],
-            metadata,
-        );
-
-        // write to an empty parquet file so that schema is serialized
-        let file = get_temp_file("test_arrow_schema_roundtrip.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            Arc::new(schema.clone()),
-            None,
-        )?;
-        writer.close()?;
-
-        // read file back
-        let parquet_reader = SerializedFileReader::try_from(file)?;
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-        let read_schema = arrow_reader.get_schema()?;
-        assert_eq!(schema, read_schema);
-
-        // read all fields by columns
-        let partial_read_schema =
-            arrow_reader.get_schema_by_columns(0..(schema.fields().len()), false)?;
-        assert_eq!(schema, partial_read_schema);
-
-        Ok(())
-    }
-
-    #[test]
-    #[ignore = "Roundtrip of lists currently fails because we don't check their types correctly in the Arrow schema"]
-    fn test_arrow_schema_roundtrip_lists() -> Result<()> {
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
-
-        let schema = Schema::new_with_metadata(
-            vec![
-                Field::new(
-                    "c21",
-                    DataType::List(Box::new(Field::new(
-                        "array",
-                        DataType::Boolean,
-                        true,
-                    ))),
-                    false,
-                ),
-                Field::new(
-                    "c22",
-                    DataType::FixedSizeList(
-                        Box::new(Field::new("items", DataType::Boolean, false)),
-                        5,
-                    ),
-                    false,
-                ),
-                Field::new(
-                    "c23",
-                    DataType::List(Box::new(Field::new(
-                        "items",
-                        DataType::LargeList(Box::new(Field::new(
-                            "items",
-                            DataType::Struct(vec![
-                                Field::new("a", DataType::Int16, true),
-                                Field::new("b", DataType::Float64, false),
-                            ]),
-                            true,
-                        ))),
-                        true,
-                    ))),
-                    true,
-                ),
-            ],
-            metadata,
-        );
-
-        // write to an empty parquet file so that schema is serialized
-        let file = get_temp_file("test_arrow_schema_roundtrip_lists.parquet", &[]);
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            Arc::new(schema.clone()),
-            None,
-        )?;
-        writer.close()?;
-
-        // read file back
-        let parquet_reader = SerializedFileReader::try_from(file)?;
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-        let read_schema = arrow_reader.get_schema()?;
-        assert_eq!(schema, read_schema);
-
-        // read all fields by columns
-        let partial_read_schema =
-            arrow_reader.get_schema_by_columns(0..(schema.fields().len()), false)?;
-        assert_eq!(schema, partial_read_schema);
-
-        Ok(())
-    }
-}
diff --git a/rust/parquet/src/basic.rs b/rust/parquet/src/basic.rs
deleted file mode 100644
index 631257e0ed1..00000000000
--- a/rust/parquet/src/basic.rs
+++ /dev/null
@@ -1,1969 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains Rust mappings for Thrift definition.
-//! Refer to `parquet.thrift` file to see raw definitions.
-
-use std::{convert, fmt, result, str};
-
-use parquet_format as parquet;
-
-use crate::errors::ParquetError;
-
-// Re-export parquet_format types used in this module
-pub use parquet_format::{
-    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType,
-    NullType, StringType, TimeType, TimeUnit, TimestampType, UUIDType,
-};
-
-// ----------------------------------------------------------------------
-// Types from the Thrift definition
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::Type`
-
-/// Types supported by Parquet.
-/// These physical types are intended to be used in combination with the encodings to
-/// control the on disk storage format.
-/// For example INT16 is not included as a type since a good encoding of INT32
-/// would handle this.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Type {
-    BOOLEAN,
-    INT32,
-    INT64,
-    INT96,
-    FLOAT,
-    DOUBLE,
-    BYTE_ARRAY,
-    FIXED_LEN_BYTE_ARRAY,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::ConvertedType`
-
-/// Common types (converted types) used by frameworks when using Parquet.
-/// This helps map between types in those frameworks to the base types in Parquet.
-/// This is only metadata and not needed to read or write the data.
-///
-/// This struct was renamed from `LogicalType` in version 4.0.0.
-/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum ConvertedType {
-    NONE,
-    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
-    UTF8,
-
-    /// A map is converted as an optional field containing a repeated key/value pair.
-    MAP,
-
-    /// A key/value pair is converted into a group of two fields.
-    MAP_KEY_VALUE,
-
-    /// A list is converted into an optional field containing a repeated field for its
-    /// values.
-    LIST,
-
-    /// An enum is converted into a binary field
-    ENUM,
-
-    /// A decimal value.
-    /// This may be used to annotate binary or fixed primitive types. The
-    /// underlying byte array stores the unscaled value encoded as two's
-    /// complement using big-endian byte order (the most significant byte is the
-    /// zeroth element).
-    ///
-    /// This must be accompanied by a (maximum) precision and a scale in the
-    /// SchemaElement. The precision specifies the number of digits in the decimal
-    /// and the scale stores the location of the decimal point. For example 1.23
-    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
-    /// 2 digits over).
-    DECIMAL,
-
-    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
-    DATE,
-
-    /// The total number of milliseconds since midnight. The value is stored as an INT32
-    /// physical type.
-    TIME_MILLIS,
-
-    /// The total number of microseconds since midnight. The value is stored as an INT64
-    /// physical type.
-    TIME_MICROS,
-
-    /// Date and time recorded as milliseconds since the Unix epoch.
-    /// Recorded as a physical type of INT64.
-    TIMESTAMP_MILLIS,
-
-    /// Date and time recorded as microseconds since the Unix epoch.
-    /// The value is stored as an INT64 physical type.
-    TIMESTAMP_MICROS,
-
-    /// An unsigned 8 bit integer value stored as INT32 physical type.
-    UINT_8,
-
-    /// An unsigned 16 bit integer value stored as INT32 physical type.
-    UINT_16,
-
-    /// An unsigned 32 bit integer value stored as INT32 physical type.
-    UINT_32,
-
-    /// An unsigned 64 bit integer value stored as INT64 physical type.
-    UINT_64,
-
-    /// A signed 8 bit integer value stored as INT32 physical type.
-    INT_8,
-
-    /// A signed 16 bit integer value stored as INT32 physical type.
-    INT_16,
-
-    /// A signed 32 bit integer value stored as INT32 physical type.
-    INT_32,
-
-    /// A signed 64 bit integer value stored as INT64 physical type.
-    INT_64,
-
-    /// A JSON document embedded within a single UTF8 column.
-    JSON,
-
-    /// A BSON document embedded within a single BINARY column.
-    BSON,
-
-    /// An interval of time.
-    ///
-    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
-    /// This data is composed of three separate little endian unsigned integers.
-    /// Each stores a component of a duration of time. The first integer identifies
-    /// the number of months associated with the duration, the second identifies
-    /// the number of days associated with the duration and the third identifies
-    /// the number of milliseconds associated with the provided duration.
-    /// This duration of time is independent of any particular timezone or date.
-    INTERVAL,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::LogicalType`
-
-/// Logical types used by version 2.4.0+ of the Parquet format.
-///
-/// This is an *entirely new* struct as of version
-/// 4.0.0. The struct previously named `LogicalType` was renamed to
-/// [`ConvertedType`]. Please see the README.md for more details.
-#[derive(Debug, Clone, PartialEq)]
-pub enum LogicalType {
-    STRING(StringType),
-    MAP(MapType),
-    LIST(ListType),
-    ENUM(EnumType),
-    DECIMAL(DecimalType),
-    DATE(DateType),
-    TIME(TimeType),
-    TIMESTAMP(TimestampType),
-    INTEGER(IntType),
-    UNKNOWN(NullType),
-    JSON(JsonType),
-    BSON(BsonType),
-    UUID(UUIDType),
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::FieldRepetitionType`
-
-/// Representation of field types in schema.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Repetition {
-    /// Field is required (can not be null) and each record has exactly 1 value.
-    REQUIRED,
-    /// Field is optional (can be null) and each record has 0 or 1 values.
-    OPTIONAL,
-    /// Field is repeated and can contain 0 or more values.
-    REPEATED,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::Encoding`
-
-/// Encodings supported by Parquet.
-/// Not all encodings are valid for all types. These enums are also used to specify the
-/// encoding of definition and repetition levels.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum Encoding {
-    /// Default byte encoding.
-    /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
-    /// - INT32 - 4 bytes per value, stored as little-endian.
-    /// - INT64 - 8 bytes per value, stored as little-endian.
-    /// - FLOAT - 4 bytes per value, stored as little-endian.
-    /// - DOUBLE - 8 bytes per value, stored as little-endian.
-    /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
-    /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
-    PLAIN,
-
-    /// **Deprecated** dictionary encoding.
-    ///
-    /// The values in the dictionary are encoded using PLAIN encoding.
-    /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
-    /// PLAIN encoding is used for dictionary page.
-    PLAIN_DICTIONARY,
-
-    /// Group packed run length encoding.
-    ///
-    /// Usable for definition/repetition levels encoding and boolean values.
-    RLE,
-
-    /// Bit packed encoding.
-    ///
-    /// This can only be used if the data has a known max width.
-    /// Usable for definition/repetition levels encoding.
-    BIT_PACKED,
-
-    /// Delta encoding for integers, either INT32 or INT64.
-    ///
-    /// Works best on sorted data.
-    DELTA_BINARY_PACKED,
-
-    /// Encoding for byte arrays to separate the length values and the data.
-    ///
-    /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
-    DELTA_LENGTH_BYTE_ARRAY,
-
-    /// Incremental encoding for byte arrays.
-    ///
-    /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
-    /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
-    DELTA_BYTE_ARRAY,
-
-    /// Dictionary encoding.
-    ///
-    /// The ids are encoded using the RLE encoding.
-    RLE_DICTIONARY,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::CompressionCodec`
-
-/// Supported compression algorithms.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Compression {
-    UNCOMPRESSED,
-    SNAPPY,
-    GZIP,
-    LZO,
-    BROTLI,
-    LZ4,
-    ZSTD,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::PageType`
-
-/// Available data pages for Parquet file format.
-/// Note that some of the page types may not be supported.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum PageType {
-    DATA_PAGE,
-    INDEX_PAGE,
-    DICTIONARY_PAGE,
-    DATA_PAGE_V2,
-}
-
-// ----------------------------------------------------------------------
-// Mirrors `parquet::ColumnOrder`
-
-/// Sort order for page and column statistics.
-///
-/// Types are associated with sort orders and column stats are aggregated using a sort
-/// order, and a sort order should be considered when comparing values with statistics
-/// min/max.
-///
-/// See reference in
-/// <https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h>
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum SortOrder {
-    /// Signed (either value or legacy byte-wise) comparison.
-    SIGNED,
-    /// Unsigned (depending on physical type either value or byte-wise) comparison.
-    UNSIGNED,
-    /// Comparison is undefined.
-    UNDEFINED,
-}
-
-/// Column order that specifies what method was used to aggregate min/max values for
-/// statistics.
-///
-/// If column order is undefined, then it is the legacy behaviour and all values should
-/// be compared as signed values/bytes.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum ColumnOrder {
-    /// Column uses the order defined by its logical or physical type
-    /// (if there is no logical type), parquet-format 2.4.0+.
-    TYPE_DEFINED_ORDER(SortOrder),
-    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
-    /// Sort order is always SIGNED.
-    UNDEFINED,
-}
-
-impl ColumnOrder {
-    /// Returns sort order for a physical/logical type.
-    pub fn get_sort_order(
-        logical_type: Option<LogicalType>,
-        converted_type: ConvertedType,
-        physical_type: Type,
-    ) -> SortOrder {
-        // TODO: Should this take converted and logical type, for compatibility?
-        match logical_type {
-            Some(logical) => match logical {
-                LogicalType::STRING(_)
-                | LogicalType::ENUM(_)
-                | LogicalType::JSON(_)
-                | LogicalType::BSON(_) => SortOrder::UNSIGNED,
-                LogicalType::INTEGER(t) => match t.is_signed {
-                    true => SortOrder::SIGNED,
-                    false => SortOrder::UNSIGNED,
-                },
-                LogicalType::MAP(_) | LogicalType::LIST(_) => SortOrder::UNDEFINED,
-                LogicalType::DECIMAL(_) => SortOrder::SIGNED,
-                LogicalType::DATE(_) => SortOrder::SIGNED,
-                LogicalType::TIME(_) => SortOrder::SIGNED,
-                LogicalType::TIMESTAMP(_) => SortOrder::SIGNED,
-                LogicalType::UNKNOWN(_) => SortOrder::UNDEFINED,
-                LogicalType::UUID(_) => SortOrder::UNSIGNED,
-            },
-            // Fall back to converted type
-            None => Self::get_converted_sort_order(converted_type, physical_type),
-        }
-    }
-
-    fn get_converted_sort_order(
-        converted_type: ConvertedType,
-        physical_type: Type,
-    ) -> SortOrder {
-        match converted_type {
-            // Unsigned byte-wise comparison.
-            ConvertedType::UTF8
-            | ConvertedType::JSON
-            | ConvertedType::BSON
-            | ConvertedType::ENUM => SortOrder::UNSIGNED,
-
-            ConvertedType::INT_8
-            | ConvertedType::INT_16
-            | ConvertedType::INT_32
-            | ConvertedType::INT_64 => SortOrder::SIGNED,
-
-            ConvertedType::UINT_8
-            | ConvertedType::UINT_16
-            | ConvertedType::UINT_32
-            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
-
-            // Signed comparison of the represented value.
-            ConvertedType::DECIMAL => SortOrder::SIGNED,
-
-            ConvertedType::DATE => SortOrder::SIGNED,
-
-            ConvertedType::TIME_MILLIS
-            | ConvertedType::TIME_MICROS
-            | ConvertedType::TIMESTAMP_MILLIS
-            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
-
-            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
-
-            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                SortOrder::UNDEFINED
-            }
-
-            // Fall back to physical type.
-            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
-        }
-    }
-
-    /// Returns default sort order based on physical type.
-    fn get_default_sort_order(physical_type: Type) -> SortOrder {
-        match physical_type {
-            // Order: false, true
-            Type::BOOLEAN => SortOrder::UNSIGNED,
-            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
-            Type::INT96 => SortOrder::UNDEFINED,
-            // Notes to remember when comparing float/double values:
-            // If the min is a NaN, it should be ignored.
-            // If the max is a NaN, it should be ignored.
-            // If the min is +0, the row group may contain -0 values as well.
-            // If the max is -0, the row group may contain +0 values as well.
-            // When looking for NaN values, min and max should be ignored.
-            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
-            // Unsigned byte-wise comparison
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
-        }
-    }
-
-    /// Returns sort order associated with this column order.
-    pub fn sort_order(&self) -> SortOrder {
-        match *self {
-            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
-            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
-        }
-    }
-}
-
-impl fmt::Display for Type {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for ConvertedType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for Repetition {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for Encoding {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for Compression {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for PageType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for SortOrder {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-impl fmt::Display for ColumnOrder {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::Type <=> Type conversion
-
-impl convert::From<parquet::Type> for Type {
-    fn from(value: parquet::Type) -> Self {
-        match value {
-            parquet::Type::Boolean => Type::BOOLEAN,
-            parquet::Type::Int32 => Type::INT32,
-            parquet::Type::Int64 => Type::INT64,
-            parquet::Type::Int96 => Type::INT96,
-            parquet::Type::Float => Type::FLOAT,
-            parquet::Type::Double => Type::DOUBLE,
-            parquet::Type::ByteArray => Type::BYTE_ARRAY,
-            parquet::Type::FixedLenByteArray => Type::FIXED_LEN_BYTE_ARRAY,
-        }
-    }
-}
-
-impl convert::From<Type> for parquet::Type {
-    fn from(value: Type) -> Self {
-        match value {
-            Type::BOOLEAN => parquet::Type::Boolean,
-            Type::INT32 => parquet::Type::Int32,
-            Type::INT64 => parquet::Type::Int64,
-            Type::INT96 => parquet::Type::Int96,
-            Type::FLOAT => parquet::Type::Float,
-            Type::DOUBLE => parquet::Type::Double,
-            Type::BYTE_ARRAY => parquet::Type::ByteArray,
-            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FixedLenByteArray,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::ConvertedType <=> ConvertedType conversion
-
-impl convert::From<Option<parquet::ConvertedType>> for ConvertedType {
-    fn from(option: Option<parquet::ConvertedType>) -> Self {
-        match option {
-            None => ConvertedType::NONE,
-            Some(value) => match value {
-                parquet::ConvertedType::Utf8 => ConvertedType::UTF8,
-                parquet::ConvertedType::Map => ConvertedType::MAP,
-                parquet::ConvertedType::MapKeyValue => ConvertedType::MAP_KEY_VALUE,
-                parquet::ConvertedType::List => ConvertedType::LIST,
-                parquet::ConvertedType::Enum => ConvertedType::ENUM,
-                parquet::ConvertedType::Decimal => ConvertedType::DECIMAL,
-                parquet::ConvertedType::Date => ConvertedType::DATE,
-                parquet::ConvertedType::TimeMillis => ConvertedType::TIME_MILLIS,
-                parquet::ConvertedType::TimeMicros => ConvertedType::TIME_MICROS,
-                parquet::ConvertedType::TimestampMillis => {
-                    ConvertedType::TIMESTAMP_MILLIS
-                }
-                parquet::ConvertedType::TimestampMicros => {
-                    ConvertedType::TIMESTAMP_MICROS
-                }
-                parquet::ConvertedType::Uint8 => ConvertedType::UINT_8,
-                parquet::ConvertedType::Uint16 => ConvertedType::UINT_16,
-                parquet::ConvertedType::Uint32 => ConvertedType::UINT_32,
-                parquet::ConvertedType::Uint64 => ConvertedType::UINT_64,
-                parquet::ConvertedType::Int8 => ConvertedType::INT_8,
-                parquet::ConvertedType::Int16 => ConvertedType::INT_16,
-                parquet::ConvertedType::Int32 => ConvertedType::INT_32,
-                parquet::ConvertedType::Int64 => ConvertedType::INT_64,
-                parquet::ConvertedType::Json => ConvertedType::JSON,
-                parquet::ConvertedType::Bson => ConvertedType::BSON,
-                parquet::ConvertedType::Interval => ConvertedType::INTERVAL,
-            },
-        }
-    }
-}
-
-impl convert::From<ConvertedType> for Option<parquet::ConvertedType> {
-    fn from(value: ConvertedType) -> Self {
-        match value {
-            ConvertedType::NONE => None,
-            ConvertedType::UTF8 => Some(parquet::ConvertedType::Utf8),
-            ConvertedType::MAP => Some(parquet::ConvertedType::Map),
-            ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MapKeyValue),
-            ConvertedType::LIST => Some(parquet::ConvertedType::List),
-            ConvertedType::ENUM => Some(parquet::ConvertedType::Enum),
-            ConvertedType::DECIMAL => Some(parquet::ConvertedType::Decimal),
-            ConvertedType::DATE => Some(parquet::ConvertedType::Date),
-            ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TimeMillis),
-            ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TimeMicros),
-            ConvertedType::TIMESTAMP_MILLIS => {
-                Some(parquet::ConvertedType::TimestampMillis)
-            }
-            ConvertedType::TIMESTAMP_MICROS => {
-                Some(parquet::ConvertedType::TimestampMicros)
-            }
-            ConvertedType::UINT_8 => Some(parquet::ConvertedType::Uint8),
-            ConvertedType::UINT_16 => Some(parquet::ConvertedType::Uint16),
-            ConvertedType::UINT_32 => Some(parquet::ConvertedType::Uint32),
-            ConvertedType::UINT_64 => Some(parquet::ConvertedType::Uint64),
-            ConvertedType::INT_8 => Some(parquet::ConvertedType::Int8),
-            ConvertedType::INT_16 => Some(parquet::ConvertedType::Int16),
-            ConvertedType::INT_32 => Some(parquet::ConvertedType::Int32),
-            ConvertedType::INT_64 => Some(parquet::ConvertedType::Int64),
-            ConvertedType::JSON => Some(parquet::ConvertedType::Json),
-            ConvertedType::BSON => Some(parquet::ConvertedType::Bson),
-            ConvertedType::INTERVAL => Some(parquet::ConvertedType::Interval),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::LogicalType <=> LogicalType conversion
-
-impl convert::From<parquet::LogicalType> for LogicalType {
-    fn from(value: parquet::LogicalType) -> Self {
-        match value {
-            parquet::LogicalType::STRING(t) => LogicalType::STRING(t),
-            parquet::LogicalType::MAP(t) => LogicalType::MAP(t),
-            parquet::LogicalType::LIST(t) => LogicalType::LIST(t),
-            parquet::LogicalType::ENUM(t) => LogicalType::ENUM(t),
-            parquet::LogicalType::DECIMAL(t) => LogicalType::DECIMAL(t),
-            parquet::LogicalType::DATE(t) => LogicalType::DATE(t),
-            parquet::LogicalType::TIME(t) => LogicalType::TIME(t),
-            parquet::LogicalType::TIMESTAMP(t) => LogicalType::TIMESTAMP(t),
-            parquet::LogicalType::INTEGER(t) => LogicalType::INTEGER(t),
-            parquet::LogicalType::UNKNOWN(t) => LogicalType::UNKNOWN(t),
-            parquet::LogicalType::JSON(t) => LogicalType::JSON(t),
-            parquet::LogicalType::BSON(t) => LogicalType::BSON(t),
-            parquet::LogicalType::UUID(t) => LogicalType::UUID(t),
-        }
-    }
-}
-
-impl convert::From<LogicalType> for parquet::LogicalType {
-    fn from(value: LogicalType) -> Self {
-        match value {
-            LogicalType::STRING(t) => parquet::LogicalType::STRING(t),
-            LogicalType::MAP(t) => parquet::LogicalType::MAP(t),
-            LogicalType::LIST(t) => parquet::LogicalType::LIST(t),
-            LogicalType::ENUM(t) => parquet::LogicalType::ENUM(t),
-            LogicalType::DECIMAL(t) => parquet::LogicalType::DECIMAL(t),
-            LogicalType::DATE(t) => parquet::LogicalType::DATE(t),
-            LogicalType::TIME(t) => parquet::LogicalType::TIME(t),
-            LogicalType::TIMESTAMP(t) => parquet::LogicalType::TIMESTAMP(t),
-            LogicalType::INTEGER(t) => parquet::LogicalType::INTEGER(t),
-            LogicalType::UNKNOWN(t) => parquet::LogicalType::UNKNOWN(t),
-            LogicalType::JSON(t) => parquet::LogicalType::JSON(t),
-            LogicalType::BSON(t) => parquet::LogicalType::BSON(t),
-            LogicalType::UUID(t) => parquet::LogicalType::UUID(t),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// LogicalType <=> ConvertedType conversion
-
-// Note: To prevent type loss when converting from ConvertedType to LogicalType,
-// the conversion from ConvertedType -> LogicalType is not implemented.
-// Such type loss includes:
-// - Not knowing the decimal scale and precision of ConvertedType
-// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
-
-impl From<Option<LogicalType>> for ConvertedType {
-    fn from(value: Option<LogicalType>) -> Self {
-        match value {
-            Some(value) => match value {
-                LogicalType::STRING(_) => ConvertedType::UTF8,
-                LogicalType::MAP(_) => ConvertedType::MAP,
-                LogicalType::LIST(_) => ConvertedType::LIST,
-                LogicalType::ENUM(_) => ConvertedType::ENUM,
-                LogicalType::DECIMAL(_) => ConvertedType::DECIMAL,
-                LogicalType::DATE(_) => ConvertedType::DATE,
-                LogicalType::TIME(t) => match t.unit {
-                    TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
-                    TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
-                    TimeUnit::NANOS(_) => ConvertedType::NONE,
-                },
-                LogicalType::TIMESTAMP(t) => match t.unit {
-                    TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
-                    TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
-                    TimeUnit::NANOS(_) => ConvertedType::NONE,
-                },
-                LogicalType::INTEGER(t) => match (t.bit_width, t.is_signed) {
-                    (8, true) => ConvertedType::INT_8,
-                    (16, true) => ConvertedType::INT_16,
-                    (32, true) => ConvertedType::INT_32,
-                    (64, true) => ConvertedType::INT_64,
-                    (8, false) => ConvertedType::UINT_8,
-                    (16, false) => ConvertedType::UINT_16,
-                    (32, false) => ConvertedType::UINT_32,
-                    (64, false) => ConvertedType::UINT_64,
-                    t => panic!("Integer type {:?} is not supported", t),
-                },
-                LogicalType::UNKNOWN(_) => ConvertedType::NONE,
-                LogicalType::JSON(_) => ConvertedType::JSON,
-                LogicalType::BSON(_) => ConvertedType::BSON,
-                LogicalType::UUID(_) => ConvertedType::NONE,
-            },
-            None => ConvertedType::NONE,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::FieldRepetitionType <=> Repetition conversion
-
-impl convert::From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
-    }
-}
-
-impl convert::From<Repetition> for parquet::FieldRepetitionType {
-    fn from(value: Repetition) -> Self {
-        match value {
-            Repetition::REQUIRED => parquet::FieldRepetitionType::Required,
-            Repetition::OPTIONAL => parquet::FieldRepetitionType::Optional,
-            Repetition::REPEATED => parquet::FieldRepetitionType::Repeated,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::Encoding <=> Encoding conversion
-
-impl convert::From<parquet::Encoding> for Encoding {
-    fn from(value: parquet::Encoding) -> Self {
-        match value {
-            parquet::Encoding::Plain => Encoding::PLAIN,
-            parquet::Encoding::PlainDictionary => Encoding::PLAIN_DICTIONARY,
-            parquet::Encoding::Rle => Encoding::RLE,
-            parquet::Encoding::BitPacked => Encoding::BIT_PACKED,
-            parquet::Encoding::DeltaBinaryPacked => Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DeltaLengthByteArray => Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            parquet::Encoding::DeltaByteArray => Encoding::DELTA_BYTE_ARRAY,
-            parquet::Encoding::RleDictionary => Encoding::RLE_DICTIONARY,
-        }
-    }
-}
-
-impl convert::From<Encoding> for parquet::Encoding {
-    fn from(value: Encoding) -> Self {
-        match value {
-            Encoding::PLAIN => parquet::Encoding::Plain,
-            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PlainDictionary,
-            Encoding::RLE => parquet::Encoding::Rle,
-            Encoding::BIT_PACKED => parquet::Encoding::BitPacked,
-            Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DeltaBinaryPacked,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DeltaLengthByteArray,
-            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DeltaByteArray,
-            Encoding::RLE_DICTIONARY => parquet::Encoding::RleDictionary,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::CompressionCodec <=> Compression conversion
-
-impl convert::From<parquet::CompressionCodec> for Compression {
-    fn from(value: parquet::CompressionCodec) -> Self {
-        match value {
-            parquet::CompressionCodec::Uncompressed => Compression::UNCOMPRESSED,
-            parquet::CompressionCodec::Snappy => Compression::SNAPPY,
-            parquet::CompressionCodec::Gzip => Compression::GZIP,
-            parquet::CompressionCodec::Lzo => Compression::LZO,
-            parquet::CompressionCodec::Brotli => Compression::BROTLI,
-            parquet::CompressionCodec::Lz4 => Compression::LZ4,
-            parquet::CompressionCodec::Zstd => Compression::ZSTD,
-        }
-    }
-}
-
-impl convert::From<Compression> for parquet::CompressionCodec {
-    fn from(value: Compression) -> Self {
-        match value {
-            Compression::UNCOMPRESSED => parquet::CompressionCodec::Uncompressed,
-            Compression::SNAPPY => parquet::CompressionCodec::Snappy,
-            Compression::GZIP => parquet::CompressionCodec::Gzip,
-            Compression::LZO => parquet::CompressionCodec::Lzo,
-            Compression::BROTLI => parquet::CompressionCodec::Brotli,
-            Compression::LZ4 => parquet::CompressionCodec::Lz4,
-            Compression::ZSTD => parquet::CompressionCodec::Zstd,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::PageType <=> PageType conversion
-
-impl convert::From<parquet::PageType> for PageType {
-    fn from(value: parquet::PageType) -> Self {
-        match value {
-            parquet::PageType::DataPage => PageType::DATA_PAGE,
-            parquet::PageType::IndexPage => PageType::INDEX_PAGE,
-            parquet::PageType::DictionaryPage => PageType::DICTIONARY_PAGE,
-            parquet::PageType::DataPageV2 => PageType::DATA_PAGE_V2,
-        }
-    }
-}
-
-impl convert::From<PageType> for parquet::PageType {
-    fn from(value: PageType) -> Self {
-        match value {
-            PageType::DATA_PAGE => parquet::PageType::DataPage,
-            PageType::INDEX_PAGE => parquet::PageType::IndexPage,
-            PageType::DICTIONARY_PAGE => parquet::PageType::DictionaryPage,
-            PageType::DATA_PAGE_V2 => parquet::PageType::DataPageV2,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// String conversions for schema parsing.
-
-impl str::FromStr for Repetition {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            "REQUIRED" => Ok(Repetition::REQUIRED),
-            "OPTIONAL" => Ok(Repetition::OPTIONAL),
-            "REPEATED" => Ok(Repetition::REPEATED),
-            other => Err(general_err!("Invalid repetition {}", other)),
-        }
-    }
-}
-
-impl str::FromStr for Type {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            "BOOLEAN" => Ok(Type::BOOLEAN),
-            "INT32" => Ok(Type::INT32),
-            "INT64" => Ok(Type::INT64),
-            "INT96" => Ok(Type::INT96),
-            "FLOAT" => Ok(Type::FLOAT),
-            "DOUBLE" => Ok(Type::DOUBLE),
-            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
-            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
-            other => Err(general_err!("Invalid type {}", other)),
-        }
-    }
-}
-
-impl str::FromStr for ConvertedType {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            "NONE" => Ok(ConvertedType::NONE),
-            "UTF8" => Ok(ConvertedType::UTF8),
-            "MAP" => Ok(ConvertedType::MAP),
-            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
-            "LIST" => Ok(ConvertedType::LIST),
-            "ENUM" => Ok(ConvertedType::ENUM),
-            "DECIMAL" => Ok(ConvertedType::DECIMAL),
-            "DATE" => Ok(ConvertedType::DATE),
-            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
-            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
-            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
-            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
-            "UINT_8" => Ok(ConvertedType::UINT_8),
-            "UINT_16" => Ok(ConvertedType::UINT_16),
-            "UINT_32" => Ok(ConvertedType::UINT_32),
-            "UINT_64" => Ok(ConvertedType::UINT_64),
-            "INT_8" => Ok(ConvertedType::INT_8),
-            "INT_16" => Ok(ConvertedType::INT_16),
-            "INT_32" => Ok(ConvertedType::INT_32),
-            "INT_64" => Ok(ConvertedType::INT_64),
-            "JSON" => Ok(ConvertedType::JSON),
-            "BSON" => Ok(ConvertedType::BSON),
-            "INTERVAL" => Ok(ConvertedType::INTERVAL),
-            other => Err(general_err!("Invalid converted type {}", other)),
-        }
-    }
-}
-
-impl str::FromStr for LogicalType {
-    type Err = ParquetError;
-
-    fn from_str(s: &str) -> result::Result<Self, Self::Err> {
-        match s {
-            // The type is a placeholder that gets updated elsewhere
-            "INTEGER" => Ok(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            })),
-            "MAP" => Ok(LogicalType::MAP(MapType {})),
-            "LIST" => Ok(LogicalType::LIST(ListType {})),
-            "ENUM" => Ok(LogicalType::ENUM(EnumType {})),
-            "DECIMAL" => Ok(LogicalType::DECIMAL(DecimalType {
-                precision: -1,
-                scale: -1,
-            })),
-            "DATE" => Ok(LogicalType::DATE(DateType {})),
-            "TIME" => Ok(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
-            })),
-            "TIMESTAMP" => Ok(LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
-            })),
-            "STRING" => Ok(LogicalType::STRING(StringType {})),
-            "JSON" => Ok(LogicalType::JSON(JsonType {})),
-            "BSON" => Ok(LogicalType::BSON(BsonType {})),
-            "UUID" => Ok(LogicalType::UUID(UUIDType {})),
-            "UNKNOWN" => Ok(LogicalType::UNKNOWN(NullType {})),
-            "INTERVAL" => Err(general_err!("Interval logical type not yet supported")),
-            other => Err(general_err!("Invalid logical type {}", other)),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_display_type() {
-        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
-        assert_eq!(Type::INT32.to_string(), "INT32");
-        assert_eq!(Type::INT64.to_string(), "INT64");
-        assert_eq!(Type::INT96.to_string(), "INT96");
-        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
-        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
-        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
-        assert_eq!(
-            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
-            "FIXED_LEN_BYTE_ARRAY"
-        );
-    }
-
-    #[test]
-    fn test_from_type() {
-        assert_eq!(Type::from(parquet::Type::Boolean), Type::BOOLEAN);
-        assert_eq!(Type::from(parquet::Type::Int32), Type::INT32);
-        assert_eq!(Type::from(parquet::Type::Int64), Type::INT64);
-        assert_eq!(Type::from(parquet::Type::Int96), Type::INT96);
-        assert_eq!(Type::from(parquet::Type::Float), Type::FLOAT);
-        assert_eq!(Type::from(parquet::Type::Double), Type::DOUBLE);
-        assert_eq!(Type::from(parquet::Type::ByteArray), Type::BYTE_ARRAY);
-        assert_eq!(
-            Type::from(parquet::Type::FixedLenByteArray),
-            Type::FIXED_LEN_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_into_type() {
-        assert_eq!(parquet::Type::Boolean, Type::BOOLEAN.into());
-        assert_eq!(parquet::Type::Int32, Type::INT32.into());
-        assert_eq!(parquet::Type::Int64, Type::INT64.into());
-        assert_eq!(parquet::Type::Int96, Type::INT96.into());
-        assert_eq!(parquet::Type::Float, Type::FLOAT.into());
-        assert_eq!(parquet::Type::Double, Type::DOUBLE.into());
-        assert_eq!(parquet::Type::ByteArray, Type::BYTE_ARRAY.into());
-        assert_eq!(
-            parquet::Type::FixedLenByteArray,
-            Type::FIXED_LEN_BYTE_ARRAY.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_type() {
-        assert_eq!(
-            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
-            Type::BOOLEAN
-        );
-        assert_eq!(
-            Type::INT32.to_string().parse::<Type>().unwrap(),
-            Type::INT32
-        );
-        assert_eq!(
-            Type::INT64.to_string().parse::<Type>().unwrap(),
-            Type::INT64
-        );
-        assert_eq!(
-            Type::INT96.to_string().parse::<Type>().unwrap(),
-            Type::INT96
-        );
-        assert_eq!(
-            Type::FLOAT.to_string().parse::<Type>().unwrap(),
-            Type::FLOAT
-        );
-        assert_eq!(
-            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
-            Type::DOUBLE
-        );
-        assert_eq!(
-            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
-            Type::BYTE_ARRAY
-        );
-        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
-        assert_eq!(
-            Type::FIXED_LEN_BYTE_ARRAY
-                .to_string()
-                .parse::<Type>()
-                .unwrap(),
-            Type::FIXED_LEN_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_display_converted_type() {
-        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
-        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
-        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
-        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
-        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
-        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
-        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
-        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
-        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
-        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
-        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MILLIS.to_string(),
-            "TIMESTAMP_MILLIS"
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MICROS.to_string(),
-            "TIMESTAMP_MICROS"
-        );
-        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
-        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
-        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
-        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
-        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
-        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
-        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
-        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
-        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
-        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
-        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
-    }
-
-    #[test]
-    fn test_from_converted_type() {
-        let parquet_conv_none: Option<parquet::ConvertedType> = None;
-        assert_eq!(ConvertedType::from(parquet_conv_none), ConvertedType::NONE);
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Utf8)),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Map)),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::MapKeyValue)),
-            ConvertedType::MAP_KEY_VALUE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::List)),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Enum)),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Decimal)),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Date)),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimeMillis)),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimeMicros)),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimestampMillis)),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::TimestampMicros)),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint8)),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint16)),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint32)),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Uint64)),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int8)),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int16)),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int32)),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Int64)),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Json)),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Bson)),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(parquet::ConvertedType::Interval)),
-            ConvertedType::INTERVAL
-        );
-    }
-
-    #[test]
-    fn test_into_converted_type() {
-        let converted_type: Option<parquet::ConvertedType> = None;
-        assert_eq!(converted_type, ConvertedType::NONE.into());
-        assert_eq!(
-            Some(parquet::ConvertedType::Utf8),
-            ConvertedType::UTF8.into()
-        );
-        assert_eq!(Some(parquet::ConvertedType::Map), ConvertedType::MAP.into());
-        assert_eq!(
-            Some(parquet::ConvertedType::MapKeyValue),
-            ConvertedType::MAP_KEY_VALUE.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::List),
-            ConvertedType::LIST.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Enum),
-            ConvertedType::ENUM.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Decimal),
-            ConvertedType::DECIMAL.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Date),
-            ConvertedType::DATE.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimeMillis),
-            ConvertedType::TIME_MILLIS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimeMicros),
-            ConvertedType::TIME_MICROS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimestampMillis),
-            ConvertedType::TIMESTAMP_MILLIS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TimestampMicros),
-            ConvertedType::TIMESTAMP_MICROS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint8),
-            ConvertedType::UINT_8.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint16),
-            ConvertedType::UINT_16.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint32),
-            ConvertedType::UINT_32.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Uint64),
-            ConvertedType::UINT_64.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int8),
-            ConvertedType::INT_8.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int16),
-            ConvertedType::INT_16.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int32),
-            ConvertedType::INT_32.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Int64),
-            ConvertedType::INT_64.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Json),
-            ConvertedType::JSON.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Bson),
-            ConvertedType::BSON.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::Interval),
-            ConvertedType::INTERVAL.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_converted_type() {
-        assert_eq!(
-            ConvertedType::NONE
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::UTF8
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::MAP
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::MAP_KEY_VALUE
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::MAP_KEY_VALUE
-        );
-        assert_eq!(
-            ConvertedType::LIST
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::ENUM
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::DECIMAL
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::DATE
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::TIME_MILLIS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::TIME_MICROS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MILLIS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MICROS
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::UINT_8
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::UINT_16
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::UINT_32
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::UINT_64
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::INT_8
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::INT_16
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::INT_32
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::INT_64
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::JSON
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::BSON
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::INTERVAL
-                .to_string()
-                .parse::<ConvertedType>()
-                .unwrap(),
-            ConvertedType::INTERVAL
-        );
-    }
-
-    #[test]
-    fn test_logical_to_converted_type() {
-        let logical_none: Option<LogicalType> = None;
-        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::DECIMAL(DecimalType {
-                precision: 20,
-                scale: 5
-            }))),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::BSON(Default::default()))),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::JSON(Default::default()))),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::STRING(Default::default()))),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::DATE(Default::default()))),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIME(TimeType {
-                unit: TimeUnit::MILLIS(Default::default()),
-                is_adjusted_to_u_t_c: true,
-            }))),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIME(TimeType {
-                unit: TimeUnit::MICROS(Default::default()),
-                is_adjusted_to_u_t_c: true,
-            }))),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIME(TimeType {
-                unit: TimeUnit::NANOS(Default::default()),
-                is_adjusted_to_u_t_c: false,
-            }))),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
-                unit: TimeUnit::MILLIS(Default::default()),
-                is_adjusted_to_u_t_c: true,
-            }))),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
-                unit: TimeUnit::MICROS(Default::default()),
-                is_adjusted_to_u_t_c: false,
-            }))),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::TIMESTAMP(TimestampType {
-                unit: TimeUnit::NANOS(Default::default()),
-                is_adjusted_to_u_t_c: false,
-            }))),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true
-            }))),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: true
-            }))),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: true
-            }))),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false
-            }))),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: true
-            }))),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::LIST(Default::default()))),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::MAP(Default::default()))),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::UUID(Default::default()))),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::ENUM(Default::default()))),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::from(Some(LogicalType::UNKNOWN(Default::default()))),
-            ConvertedType::NONE
-        );
-    }
-
-    #[test]
-    fn test_display_repetition() {
-        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
-        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
-        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
-    }
-
-    #[test]
-    fn test_from_repetition() {
-        assert_eq!(
-            Repetition::from(parquet::FieldRepetitionType::Required),
-            Repetition::REQUIRED
-        );
-        assert_eq!(
-            Repetition::from(parquet::FieldRepetitionType::Optional),
-            Repetition::OPTIONAL
-        );
-        assert_eq!(
-            Repetition::from(parquet::FieldRepetitionType::Repeated),
-            Repetition::REPEATED
-        );
-    }
-
-    #[test]
-    fn test_into_repetition() {
-        assert_eq!(
-            parquet::FieldRepetitionType::Required,
-            Repetition::REQUIRED.into()
-        );
-        assert_eq!(
-            parquet::FieldRepetitionType::Optional,
-            Repetition::OPTIONAL.into()
-        );
-        assert_eq!(
-            parquet::FieldRepetitionType::Repeated,
-            Repetition::REPEATED.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_repetition() {
-        assert_eq!(
-            Repetition::REQUIRED
-                .to_string()
-                .parse::<Repetition>()
-                .unwrap(),
-            Repetition::REQUIRED
-        );
-        assert_eq!(
-            Repetition::OPTIONAL
-                .to_string()
-                .parse::<Repetition>()
-                .unwrap(),
-            Repetition::OPTIONAL
-        );
-        assert_eq!(
-            Repetition::REPEATED
-                .to_string()
-                .parse::<Repetition>()
-                .unwrap(),
-            Repetition::REPEATED
-        );
-    }
-
-    #[test]
-    fn test_display_encoding() {
-        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
-        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
-        assert_eq!(Encoding::RLE.to_string(), "RLE");
-        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
-        assert_eq!(
-            Encoding::DELTA_BINARY_PACKED.to_string(),
-            "DELTA_BINARY_PACKED"
-        );
-        assert_eq!(
-            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
-            "DELTA_LENGTH_BYTE_ARRAY"
-        );
-        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
-        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
-    }
-
-    #[test]
-    fn test_from_encoding() {
-        assert_eq!(Encoding::from(parquet::Encoding::Plain), Encoding::PLAIN);
-        assert_eq!(
-            Encoding::from(parquet::Encoding::PlainDictionary),
-            Encoding::PLAIN_DICTIONARY
-        );
-        assert_eq!(Encoding::from(parquet::Encoding::Rle), Encoding::RLE);
-        assert_eq!(
-            Encoding::from(parquet::Encoding::BitPacked),
-            Encoding::BIT_PACKED
-        );
-        assert_eq!(
-            Encoding::from(parquet::Encoding::DeltaBinaryPacked),
-            Encoding::DELTA_BINARY_PACKED
-        );
-        assert_eq!(
-            Encoding::from(parquet::Encoding::DeltaLengthByteArray),
-            Encoding::DELTA_LENGTH_BYTE_ARRAY
-        );
-        assert_eq!(
-            Encoding::from(parquet::Encoding::DeltaByteArray),
-            Encoding::DELTA_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_into_encoding() {
-        assert_eq!(parquet::Encoding::Plain, Encoding::PLAIN.into());
-        assert_eq!(
-            parquet::Encoding::PlainDictionary,
-            Encoding::PLAIN_DICTIONARY.into()
-        );
-        assert_eq!(parquet::Encoding::Rle, Encoding::RLE.into());
-        assert_eq!(parquet::Encoding::BitPacked, Encoding::BIT_PACKED.into());
-        assert_eq!(
-            parquet::Encoding::DeltaBinaryPacked,
-            Encoding::DELTA_BINARY_PACKED.into()
-        );
-        assert_eq!(
-            parquet::Encoding::DeltaLengthByteArray,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
-        );
-        assert_eq!(
-            parquet::Encoding::DeltaByteArray,
-            Encoding::DELTA_BYTE_ARRAY.into()
-        );
-    }
-
-    #[test]
-    fn test_display_compression() {
-        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
-        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
-        assert_eq!(Compression::GZIP.to_string(), "GZIP");
-        assert_eq!(Compression::LZO.to_string(), "LZO");
-        assert_eq!(Compression::BROTLI.to_string(), "BROTLI");
-        assert_eq!(Compression::LZ4.to_string(), "LZ4");
-        assert_eq!(Compression::ZSTD.to_string(), "ZSTD");
-    }
-
-    #[test]
-    fn test_from_compression() {
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Uncompressed),
-            Compression::UNCOMPRESSED
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Snappy),
-            Compression::SNAPPY
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Gzip),
-            Compression::GZIP
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Lzo),
-            Compression::LZO
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Brotli),
-            Compression::BROTLI
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Lz4),
-            Compression::LZ4
-        );
-        assert_eq!(
-            Compression::from(parquet::CompressionCodec::Zstd),
-            Compression::ZSTD
-        );
-    }
-
-    #[test]
-    fn test_into_compression() {
-        assert_eq!(
-            parquet::CompressionCodec::Uncompressed,
-            Compression::UNCOMPRESSED.into()
-        );
-        assert_eq!(
-            parquet::CompressionCodec::Snappy,
-            Compression::SNAPPY.into()
-        );
-        assert_eq!(parquet::CompressionCodec::Gzip, Compression::GZIP.into());
-        assert_eq!(parquet::CompressionCodec::Lzo, Compression::LZO.into());
-        assert_eq!(
-            parquet::CompressionCodec::Brotli,
-            Compression::BROTLI.into()
-        );
-        assert_eq!(parquet::CompressionCodec::Lz4, Compression::LZ4.into());
-        assert_eq!(parquet::CompressionCodec::Zstd, Compression::ZSTD.into());
-    }
-
-    #[test]
-    fn test_display_page_type() {
-        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
-        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
-        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
-        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
-    }
-
-    #[test]
-    fn test_from_page_type() {
-        assert_eq!(
-            PageType::from(parquet::PageType::DataPage),
-            PageType::DATA_PAGE
-        );
-        assert_eq!(
-            PageType::from(parquet::PageType::IndexPage),
-            PageType::INDEX_PAGE
-        );
-        assert_eq!(
-            PageType::from(parquet::PageType::DictionaryPage),
-            PageType::DICTIONARY_PAGE
-        );
-        assert_eq!(
-            PageType::from(parquet::PageType::DataPageV2),
-            PageType::DATA_PAGE_V2
-        );
-    }
-
-    #[test]
-    fn test_into_page_type() {
-        assert_eq!(parquet::PageType::DataPage, PageType::DATA_PAGE.into());
-        assert_eq!(parquet::PageType::IndexPage, PageType::INDEX_PAGE.into());
-        assert_eq!(
-            parquet::PageType::DictionaryPage,
-            PageType::DICTIONARY_PAGE.into()
-        );
-        assert_eq!(parquet::PageType::DataPageV2, PageType::DATA_PAGE_V2.into());
-    }
-
-    #[test]
-    fn test_display_sort_order() {
-        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
-        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
-        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
-    }
-
-    #[test]
-    fn test_display_column_order() {
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
-            "TYPE_DEFINED_ORDER(SIGNED)"
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
-            "TYPE_DEFINED_ORDER(UNSIGNED)"
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
-            "TYPE_DEFINED_ORDER(UNDEFINED)"
-        );
-        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
-    }
-
-    #[test]
-    fn test_column_order_get_logical_type_sort_order() {
-        // Helper to check the order in a list of values.
-        // Only logical type is checked.
-        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
-            for tpe in types {
-                assert_eq!(
-                    ColumnOrder::get_sort_order(
-                        Some(tpe),
-                        ConvertedType::NONE,
-                        Type::BYTE_ARRAY
-                    ),
-                    expected_order
-                );
-            }
-        }
-
-        // Unsigned comparison (physical type does not matter)
-        let unsigned = vec![
-            LogicalType::STRING(Default::default()),
-            LogicalType::JSON(Default::default()),
-            LogicalType::BSON(Default::default()),
-            LogicalType::ENUM(Default::default()),
-            LogicalType::UUID(Default::default()),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false,
-            }),
-        ];
-        check_sort_order(unsigned, SortOrder::UNSIGNED);
-
-        // Signed comparison (physical type does not matter)
-        let signed = vec![
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            }),
-            LogicalType::DECIMAL(DecimalType {
-                scale: 20,
-                precision: 4,
-            }),
-            LogicalType::DATE(Default::default()),
-            LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(Default::default()),
-            }),
-            LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MICROS(Default::default()),
-            }),
-            LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: true,
-                unit: TimeUnit::NANOS(Default::default()),
-            }),
-            LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(Default::default()),
-            }),
-            LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MICROS(Default::default()),
-            }),
-            LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: true,
-                unit: TimeUnit::NANOS(Default::default()),
-            }),
-        ];
-        check_sort_order(signed, SortOrder::SIGNED);
-
-        // Undefined comparison
-        let undefined = vec![
-            LogicalType::LIST(Default::default()),
-            LogicalType::MAP(Default::default()),
-        ];
-        check_sort_order(undefined, SortOrder::UNDEFINED);
-    }
-
-    #[test]
-    fn test_column_order_get_coverted_type_sort_order() {
-        // Helper to check the order in a list of values.
-        // Only converted type is checked.
-        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
-            for tpe in types {
-                assert_eq!(
-                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
-                    expected_order
-                );
-            }
-        }
-
-        // Unsigned comparison (physical type does not matter)
-        let unsigned = vec![
-            ConvertedType::UTF8,
-            ConvertedType::JSON,
-            ConvertedType::BSON,
-            ConvertedType::ENUM,
-            ConvertedType::UINT_8,
-            ConvertedType::UINT_16,
-            ConvertedType::UINT_32,
-            ConvertedType::UINT_64,
-        ];
-        check_sort_order(unsigned, SortOrder::UNSIGNED);
-
-        // Signed comparison (physical type does not matter)
-        let signed = vec![
-            ConvertedType::INT_8,
-            ConvertedType::INT_16,
-            ConvertedType::INT_32,
-            ConvertedType::INT_64,
-            ConvertedType::DECIMAL,
-            ConvertedType::DATE,
-            ConvertedType::TIME_MILLIS,
-            ConvertedType::TIME_MICROS,
-            ConvertedType::TIMESTAMP_MILLIS,
-            ConvertedType::TIMESTAMP_MICROS,
-        ];
-        check_sort_order(signed, SortOrder::SIGNED);
-
-        // Undefined comparison
-        let undefined = vec![
-            ConvertedType::LIST,
-            ConvertedType::MAP,
-            ConvertedType::MAP_KEY_VALUE,
-            ConvertedType::INTERVAL,
-        ];
-        check_sort_order(undefined, SortOrder::UNDEFINED);
-
-        // Check None logical type
-        // This should return a sort order for byte array type.
-        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
-    }
-
-    #[test]
-    fn test_column_order_get_default_sort_order() {
-        // Comparison based on physical type
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
-            SortOrder::UNSIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::INT32),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::INT64),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::INT96),
-            SortOrder::UNDEFINED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::FLOAT),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::DOUBLE),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
-            SortOrder::UNSIGNED
-        );
-        assert_eq!(
-            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
-            SortOrder::UNSIGNED
-        );
-    }
-
-    #[test]
-    fn test_column_order_sort_order() {
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
-            SortOrder::SIGNED
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
-            SortOrder::UNSIGNED
-        );
-        assert_eq!(
-            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
-            SortOrder::UNDEFINED
-        );
-        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
-    }
-}
diff --git a/rust/parquet/src/bin/parquet-read.rs b/rust/parquet/src/bin/parquet-read.rs
deleted file mode 100644
index aa3b8272dad..00000000000
--- a/rust/parquet/src/bin/parquet-read.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Binary file to read data from a Parquet file.
-//!
-//! # Install
-//!
-//! `parquet-read` can be installed using `cargo`:
-//! ```
-//! cargo install parquet
-//! ```
-//! After this `parquet-read` should be globally available:
-//! ```
-//! parquet-read XYZ.parquet
-//! ```
-//!
-//! The binary can also be built from the source code and run as follows:
-//! ```
-//! cargo run --bin parquet-read XYZ.parquet
-//! ```
-//!
-//! # Usage
-//! ```
-//! parquet-read <file-path> [num-records]
-//! ```
-//!
-//! ## Flags
-//!     -h, --help       Prints help information
-//!     -j, --json       Print Parquet file in JSON lines Format
-//!     -V, --version    Prints version information
-//!
-//! ## Args
-//!     <file-path>      Path to a Parquet file
-//!     <num-records>    Number of records to read. When not provided, all records are read.
-//!
-//! Note that `parquet-read` reads full file schema, no projection or filtering is
-//! applied.
-
-extern crate parquet;
-
-use std::{env, fs::File, path::Path};
-
-use clap::{crate_authors, crate_version, App, Arg};
-
-use parquet::file::reader::{FileReader, SerializedFileReader};
-use parquet::record::Row;
-
-fn main() {
-    let app = App::new("parquet-read")
-        .version(crate_version!())
-        .author(crate_authors!())
-        .about("Read data from a Parquet file and print output in console, in either built-in or JSON format")
-        .arg(
-            Arg::with_name("file_path")
-                .value_name("file-path")
-                .required(true)
-                .index(1)
-                .help("Path to a parquet file"),
-        )
-        .arg(
-            Arg::with_name("num_records")
-                .value_name("num-records")
-                .index(2)
-                .help(
-                    "Number of records to read. When not provided, all records are read.",
-                ),
-        )
-        .arg(
-            Arg::with_name("json")
-                .short("j")
-                .long("json")
-                .takes_value(false)
-                .help("Print Parquet file in JSON lines format"),
-        );
-
-    let matches = app.get_matches();
-    let filename = matches.value_of("file_path").unwrap();
-    let num_records: Option<usize> = if matches.is_present("num_records") {
-        match matches.value_of("num_records").unwrap().parse() {
-            Ok(value) => Some(value),
-            Err(e) => panic!("Error when reading value for [num-records], {}", e),
-        }
-    } else {
-        None
-    };
-
-    let json = matches.is_present("json");
-    let path = Path::new(&filename);
-    let file = File::open(&path).unwrap();
-    let parquet_reader = SerializedFileReader::new(file).unwrap();
-
-    // Use full schema as projected schema
-    let mut iter = parquet_reader.get_row_iter(None).unwrap();
-
-    let mut start = 0;
-    let end = num_records.unwrap_or(0);
-    let all_records = num_records.is_none();
-
-    while all_records || start < end {
-        match iter.next() {
-            Some(row) => print_row(&row, json),
-            None => break,
-        }
-        start += 1;
-    }
-}
-
-fn print_row(row: &Row, json: bool) {
-    if json {
-        println!("{}", row.to_json_value())
-    } else {
-        println!("{}", row.to_string());
-    }
-}
diff --git a/rust/parquet/src/bin/parquet-rowcount.rs b/rust/parquet/src/bin/parquet-rowcount.rs
deleted file mode 100644
index 3c61bab882a..00000000000
--- a/rust/parquet/src/bin/parquet-rowcount.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Binary file to return the number of rows found from Parquet file(s).
-//!
-//! # Install
-//!
-//! `parquet-rowcount` can be installed using `cargo`:
-//! ```
-//! cargo install parquet
-//! ```
-//! After this `parquet-rowcount` should be globally available:
-//! ```
-//! parquet-rowcount XYZ.parquet
-//! ```
-//!
-//! The binary can also be built from the source code and run as follows:
-//! ```
-//! cargo run --bin parquet-rowcount XYZ.parquet ABC.parquet ZXC.parquet
-//! ```
-//!
-//! # Usage
-//! ```
-//! parquet-rowcount <file-paths>...
-//! ```
-//!
-//! ## Flags
-//!     -h, --help       Prints help information
-//!     -V, --version    Prints version information
-//!
-//! ## Args
-//!     <file-paths>...    List of Parquet files to read from
-//!
-//! Note that `parquet-rowcount` reads full file schema, no projection or filtering is
-//! applied.
-
-extern crate parquet;
-
-use std::{env, fs::File, path::Path};
-
-use clap::{crate_authors, crate_version, App, Arg};
-
-use parquet::file::reader::{FileReader, SerializedFileReader};
-
-fn main() {
-    let matches = App::new("parquet-rowcount")
-        .version(crate_version!())
-        .author(crate_authors!())
-        .about("Return number of rows in Parquet file")
-        .arg(
-            Arg::with_name("file_paths")
-                .value_name("file-paths")
-                .required(true)
-                .multiple(true)
-                .help("List of Parquet files to read from separated by space"),
-        )
-        .get_matches();
-
-    let filenames: Vec<&str> = matches.values_of("file_paths").unwrap().collect();
-    for filename in &filenames {
-        let path = Path::new(filename);
-        let file = File::open(path).unwrap();
-        let parquet_reader = SerializedFileReader::new(file).unwrap();
-        let row_group_metadata = parquet_reader.metadata().row_groups();
-        let mut total_num_rows = 0;
-
-        for group_metadata in row_group_metadata {
-            total_num_rows += group_metadata.num_rows();
-        }
-
-        eprintln!("File {}: rowcount={}", filename, total_num_rows);
-    }
-}
diff --git a/rust/parquet/src/bin/parquet-schema.rs b/rust/parquet/src/bin/parquet-schema.rs
deleted file mode 100644
index 1b806372b10..00000000000
--- a/rust/parquet/src/bin/parquet-schema.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Binary file to print the schema and metadata of a Parquet file.
-//!
-//! # Install
-//!
-//! `parquet-schema` can be installed using `cargo`:
-//! ```
-//! cargo install parquet
-//! ```
-//! After this `parquet-schema` should be globally available:
-//! ```
-//! parquet-schema XYZ.parquet
-//! ```
-//!
-//! The binary can also be built from the source code and run as follows:
-//! ```
-//! cargo run --bin parquet-schema XYZ.parquet
-//! ```
-//!
-//! # Usage
-//! ```
-//! parquet-schema [FLAGS] <file-path>
-//! ```
-//!
-//! ## Flags
-//!     -h, --help       Prints help information
-//!     -V, --version    Prints version information
-//!     -v, --verbose    Enable printing full file metadata
-//!
-//! ## Args
-//!     <file-path>    Path to a Parquet file
-//!
-//! Note that `verbose` is an optional boolean flag that allows to print schema only,
-//! when not provided or print full file metadata when provided.
-
-extern crate parquet;
-
-use std::{env, fs::File, path::Path};
-
-use clap::{crate_authors, crate_version, App, Arg};
-
-use parquet::{
-    file::reader::{FileReader, SerializedFileReader},
-    schema::printer::{print_file_metadata, print_parquet_metadata},
-};
-
-fn main() {
-    let matches = App::new("parquet-schema")
-        .version(crate_version!())
-        .author(crate_authors!())
-        .arg(
-            Arg::with_name("file_path")
-                .value_name("file-path")
-                .required(true)
-                .index(1)
-                .help("Path to a Parquet file"),
-        )
-        .arg(
-            Arg::with_name("verbose")
-                .short("v")
-                .long("verbose")
-                .takes_value(false)
-                .help("Enable printing full file metadata"),
-        )
-        .get_matches();
-
-    let filename = matches.value_of("file_path").unwrap();
-    let path = Path::new(&filename);
-    let file = match File::open(&path) {
-        Err(e) => panic!("Error when opening file {}: {}", path.display(), e),
-        Ok(f) => f,
-    };
-    let verbose = matches.is_present("verbose");
-
-    match SerializedFileReader::new(file) {
-        Err(e) => panic!("Error when parsing Parquet file: {}", e),
-        Ok(parquet_reader) => {
-            let metadata = parquet_reader.metadata();
-            println!("Metadata for file: {}", &filename);
-            println!();
-            if verbose {
-                print_parquet_metadata(&mut std::io::stdout(), &metadata);
-            } else {
-                print_file_metadata(&mut std::io::stdout(), &metadata.file_metadata());
-            }
-        }
-    }
-}
diff --git a/rust/parquet/src/column/mod.rs b/rust/parquet/src/column/mod.rs
deleted file mode 100644
index 7ed7bfc256e..00000000000
--- a/rust/parquet/src/column/mod.rs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Low level column reader and writer APIs.
-//!
-//! This API is designed for reading and writing column values, definition and repetition
-//! levels directly.
-//!
-//! # Example of writing and reading data
-//!
-//! Data has the following format:
-//! ```text
-//! +---------------+
-//! |         values|
-//! +---------------+
-//! |[1, 2]         |
-//! |[3, null, null]|
-//! +---------------+
-//! ```
-//!
-//! The example uses column writer and reader APIs to write raw values, definition and
-//! repetition levels and read them to verify write/read correctness.
-//!
-//! ```rust,no_run
-//! use std::{fs, path::Path, sync::Arc};
-//!
-//! use parquet::{
-//!     column::{reader::ColumnReader, writer::ColumnWriter},
-//!     file::{
-//!         properties::WriterProperties,
-//!         reader::{FileReader, SerializedFileReader},
-//!         writer::{FileWriter, SerializedFileWriter},
-//!     },
-//!     schema::parser::parse_message_type,
-//! };
-//!
-//! let path = Path::new("/path/to/column_sample.parquet");
-//!
-//! // Writing data using column writer API.
-//!
-//! let message_type = "
-//!   message schema {
-//!     optional group values (LIST) {
-//!       repeated group list {
-//!         optional INT32 element;
-//!       }
-//!     }
-//!   }
-//! ";
-//! let schema = Arc::new(parse_message_type(message_type).unwrap());
-//! let props = Arc::new(WriterProperties::builder().build());
-//! let file = fs::File::create(path).unwrap();
-//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-//! let mut row_group_writer = writer.next_row_group().unwrap();
-//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
-//!     match col_writer {
-//!         // You can also use `get_typed_column_writer` method to extract typed writer.
-//!         ColumnWriter::Int32ColumnWriter(ref mut typed_writer) => {
-//!             typed_writer
-//!                 .write_batch(&[1, 2, 3], Some(&[3, 3, 3, 2, 2]), Some(&[0, 1, 0, 1, 1]))
-//!                 .unwrap();
-//!         }
-//!         _ => {}
-//!     }
-//!     row_group_writer.close_column(col_writer).unwrap();
-//! }
-//! writer.close_row_group(row_group_writer).unwrap();
-//! writer.close().unwrap();
-//!
-//! // Reading data using column reader API.
-//!
-//! let file = fs::File::open(path).unwrap();
-//! let reader = SerializedFileReader::new(file).unwrap();
-//! let metadata = reader.metadata();
-//!
-//! let mut res = Ok((0, 0));
-//! let mut values = vec![0; 8];
-//! let mut def_levels = vec![0; 8];
-//! let mut rep_levels = vec![0; 8];
-//!
-//! for i in 0..metadata.num_row_groups() {
-//!     let row_group_reader = reader.get_row_group(i).unwrap();
-//!     let row_group_metadata = metadata.row_group(i);
-//!
-//!     for j in 0..row_group_metadata.num_columns() {
-//!         let mut column_reader = row_group_reader.get_column_reader(j).unwrap();
-//!         match column_reader {
-//!             // You can also use `get_typed_column_reader` method to extract typed reader.
-//!             ColumnReader::Int32ColumnReader(ref mut typed_reader) => {
-//!                 res = typed_reader.read_batch(
-//!                     8, // batch size
-//!                     Some(&mut def_levels),
-//!                     Some(&mut rep_levels),
-//!                     &mut values,
-//!                 );
-//!             }
-//!             _ => {}
-//!         }
-//!     }
-//! }
-//!
-//! assert_eq!(res, Ok((3, 5)));
-//! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
-//! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
-//! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
-//! ```
-
-pub mod page;
-pub mod reader;
-pub mod writer;
diff --git a/rust/parquet/src/column/page.rs b/rust/parquet/src/column/page.rs
deleted file mode 100644
index b3515780884..00000000000
--- a/rust/parquet/src/column/page.rs
+++ /dev/null
@@ -1,306 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains Parquet Page definitions and page reader interface.
-
-use crate::basic::{Encoding, PageType};
-use crate::errors::Result;
-use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics};
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr};
-use crate::util::memory::ByteBufferPtr;
-
-/// Parquet Page definition.
-///
-/// List of supported pages.
-/// These are 1-to-1 mapped from the equivalent Thrift definitions, except `buf` which
-/// used to store uncompressed bytes of the page.
-pub enum Page {
-    DataPage {
-        buf: ByteBufferPtr,
-        num_values: u32,
-        encoding: Encoding,
-        def_level_encoding: Encoding,
-        rep_level_encoding: Encoding,
-        statistics: Option<Statistics>,
-    },
-    DataPageV2 {
-        buf: ByteBufferPtr,
-        num_values: u32,
-        encoding: Encoding,
-        num_nulls: u32,
-        num_rows: u32,
-        def_levels_byte_len: u32,
-        rep_levels_byte_len: u32,
-        is_compressed: bool,
-        statistics: Option<Statistics>,
-    },
-    DictionaryPage {
-        buf: ByteBufferPtr,
-        num_values: u32,
-        encoding: Encoding,
-        is_sorted: bool,
-    },
-}
-
-impl Page {
-    /// Returns [`PageType`](crate::basic::PageType) for this page.
-    pub fn page_type(&self) -> PageType {
-        match self {
-            Page::DataPage { .. } => PageType::DATA_PAGE,
-            Page::DataPageV2 { .. } => PageType::DATA_PAGE_V2,
-            Page::DictionaryPage { .. } => PageType::DICTIONARY_PAGE,
-        }
-    }
-
-    /// Returns internal byte buffer reference for this page.
-    pub fn buffer(&self) -> &ByteBufferPtr {
-        match self {
-            Page::DataPage { ref buf, .. } => &buf,
-            Page::DataPageV2 { ref buf, .. } => &buf,
-            Page::DictionaryPage { ref buf, .. } => &buf,
-        }
-    }
-
-    /// Returns number of values in this page.
-    pub fn num_values(&self) -> u32 {
-        match self {
-            Page::DataPage { num_values, .. } => *num_values,
-            Page::DataPageV2 { num_values, .. } => *num_values,
-            Page::DictionaryPage { num_values, .. } => *num_values,
-        }
-    }
-
-    /// Returns this page [`Encoding`](crate::basic::Encoding).
-    pub fn encoding(&self) -> Encoding {
-        match self {
-            Page::DataPage { encoding, .. } => *encoding,
-            Page::DataPageV2 { encoding, .. } => *encoding,
-            Page::DictionaryPage { encoding, .. } => *encoding,
-        }
-    }
-
-    /// Returns optional [`Statistics`](crate::file::statistics::Statistics).
-    pub fn statistics(&self) -> Option<&Statistics> {
-        match self {
-            Page::DataPage { ref statistics, .. } => statistics.as_ref(),
-            Page::DataPageV2 { ref statistics, .. } => statistics.as_ref(),
-            Page::DictionaryPage { .. } => None,
-        }
-    }
-}
-
-/// Helper struct to represent pages with potentially compressed buffer (data page v1) or
-/// compressed and concatenated buffer (def levels + rep levels + compressed values for
-/// data page v2).
-///
-/// The difference with `Page` is that `Page` buffer is always uncompressed.
-pub struct CompressedPage {
-    compressed_page: Page,
-    uncompressed_size: usize,
-}
-
-impl CompressedPage {
-    /// Creates `CompressedPage` from a page with potentially compressed buffer and
-    /// uncompressed size.
-    pub fn new(compressed_page: Page, uncompressed_size: usize) -> Self {
-        Self {
-            compressed_page,
-            uncompressed_size,
-        }
-    }
-
-    /// Returns page type.
-    pub fn page_type(&self) -> PageType {
-        self.compressed_page.page_type()
-    }
-
-    /// Returns underlying page with potentially compressed buffer.
-    pub fn compressed_page(&self) -> &Page {
-        &self.compressed_page
-    }
-
-    /// Returns uncompressed size in bytes.
-    pub fn uncompressed_size(&self) -> usize {
-        self.uncompressed_size
-    }
-
-    /// Returns compressed size in bytes.
-    ///
-    /// Note that it is assumed that buffer is compressed, but it may not be. In this
-    /// case compressed size will be equal to uncompressed size.
-    pub fn compressed_size(&self) -> usize {
-        self.compressed_page.buffer().len()
-    }
-
-    /// Number of values in page.
-    pub fn num_values(&self) -> u32 {
-        self.compressed_page.num_values()
-    }
-
-    /// Returns encoding for values in page.
-    pub fn encoding(&self) -> Encoding {
-        self.compressed_page.encoding()
-    }
-
-    /// Returns slice of compressed buffer in the page.
-    pub fn data(&self) -> &[u8] {
-        self.compressed_page.buffer().data()
-    }
-}
-
-/// Contains page write metrics.
-pub struct PageWriteSpec {
-    pub page_type: PageType,
-    pub uncompressed_size: usize,
-    pub compressed_size: usize,
-    pub num_values: u32,
-    pub offset: u64,
-    pub bytes_written: u64,
-}
-
-impl PageWriteSpec {
-    /// Creates new spec with default page write metrics.
-    pub fn new() -> Self {
-        Self {
-            page_type: PageType::DATA_PAGE,
-            uncompressed_size: 0,
-            compressed_size: 0,
-            num_values: 0,
-            offset: 0,
-            bytes_written: 0,
-        }
-    }
-}
-
-/// API for reading pages from a column chunk.
-/// This offers a iterator like API to get the next page.
-pub trait PageReader {
-    /// Gets the next page in the column chunk associated with this reader.
-    /// Returns `None` if there are no pages left.
-    fn get_next_page(&mut self) -> Result<Option<Page>>;
-}
-
-/// API for writing pages in a column chunk.
-///
-/// It is reasonable to assume that all pages will be written in the correct order, e.g.
-/// dictionary page followed by data pages, or a set of data pages, etc.
-pub trait PageWriter {
-    /// Writes a page into the output stream/sink.
-    /// Returns `PageWriteSpec` that contains information about written page metrics,
-    /// including number of bytes, size, number of values, offset, etc.
-    ///
-    /// This method is called for every compressed page we write into underlying buffer,
-    /// either data page or dictionary page.
-    fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec>;
-
-    /// Writes column chunk metadata into the output stream/sink.
-    ///
-    /// This method is called once before page writer is closed, normally when writes are
-    /// finalised in column writer.
-    fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()>;
-
-    /// Closes resources and flushes underlying sink.
-    /// Page writer should not be used after this method is called.
-    fn close(&mut self) -> Result<()>;
-}
-
-/// An iterator over pages of some specific column in a parquet file.
-pub trait PageIterator: Iterator<Item = Result<Box<dyn PageReader>>> {
-    /// Get schema of parquet file.
-    fn schema(&mut self) -> Result<SchemaDescPtr>;
-
-    /// Get column schema of this page iterator.
-    fn column_schema(&mut self) -> Result<ColumnDescPtr>;
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_page() {
-        let data_page = Page::DataPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            def_level_encoding: Encoding::RLE,
-            rep_level_encoding: Encoding::RLE,
-            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
-        };
-        assert_eq!(data_page.page_type(), PageType::DATA_PAGE);
-        assert_eq!(data_page.buffer().data(), vec![0, 1, 2].as_slice());
-        assert_eq!(data_page.num_values(), 10);
-        assert_eq!(data_page.encoding(), Encoding::PLAIN);
-        assert_eq!(
-            data_page.statistics(),
-            Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
-        );
-
-        let data_page_v2 = Page::DataPageV2 {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            num_nulls: 5,
-            num_rows: 20,
-            def_levels_byte_len: 30,
-            rep_levels_byte_len: 40,
-            is_compressed: false,
-            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
-        };
-        assert_eq!(data_page_v2.page_type(), PageType::DATA_PAGE_V2);
-        assert_eq!(data_page_v2.buffer().data(), vec![0, 1, 2].as_slice());
-        assert_eq!(data_page_v2.num_values(), 10);
-        assert_eq!(data_page_v2.encoding(), Encoding::PLAIN);
-        assert_eq!(
-            data_page_v2.statistics(),
-            Some(&Statistics::int32(Some(1), Some(2), None, 1, true))
-        );
-
-        let dict_page = Page::DictionaryPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            is_sorted: false,
-        };
-        assert_eq!(dict_page.page_type(), PageType::DICTIONARY_PAGE);
-        assert_eq!(dict_page.buffer().data(), vec![0, 1, 2].as_slice());
-        assert_eq!(dict_page.num_values(), 10);
-        assert_eq!(dict_page.encoding(), Encoding::PLAIN);
-        assert_eq!(dict_page.statistics(), None);
-    }
-
-    #[test]
-    fn test_compressed_page() {
-        let data_page = Page::DataPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
-            num_values: 10,
-            encoding: Encoding::PLAIN,
-            def_level_encoding: Encoding::RLE,
-            rep_level_encoding: Encoding::RLE,
-            statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
-        };
-
-        let cpage = CompressedPage::new(data_page, 5);
-
-        assert_eq!(cpage.page_type(), PageType::DATA_PAGE);
-        assert_eq!(cpage.uncompressed_size(), 5);
-        assert_eq!(cpage.compressed_size(), 3);
-        assert_eq!(cpage.num_values(), 10);
-        assert_eq!(cpage.encoding(), Encoding::PLAIN);
-        assert_eq!(cpage.data(), &[0, 1, 2]);
-    }
-}
diff --git a/rust/parquet/src/column/reader.rs b/rust/parquet/src/column/reader.rs
deleted file mode 100644
index 1181565bdcf..00000000000
--- a/rust/parquet/src/column/reader.rs
+++ /dev/null
@@ -1,1356 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains column reader API.
-
-use std::{
-    cmp::{max, min},
-    collections::HashMap,
-};
-
-use super::page::{Page, PageReader};
-use crate::basic::*;
-use crate::data_type::*;
-use crate::encodings::{
-    decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder},
-    levels::LevelDecoder,
-};
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
-
-/// Column reader for a Parquet type.
-pub enum ColumnReader {
-    BoolColumnReader(ColumnReaderImpl<BoolType>),
-    Int32ColumnReader(ColumnReaderImpl<Int32Type>),
-    Int64ColumnReader(ColumnReaderImpl<Int64Type>),
-    Int96ColumnReader(ColumnReaderImpl<Int96Type>),
-    FloatColumnReader(ColumnReaderImpl<FloatType>),
-    DoubleColumnReader(ColumnReaderImpl<DoubleType>),
-    ByteArrayColumnReader(ColumnReaderImpl<ByteArrayType>),
-    FixedLenByteArrayColumnReader(ColumnReaderImpl<FixedLenByteArrayType>),
-}
-
-/// Gets a specific column reader corresponding to column descriptor `col_descr`. The
-/// column reader will read from pages in `col_page_reader`.
-pub fn get_column_reader(
-    col_descr: ColumnDescPtr,
-    col_page_reader: Box<dyn PageReader>,
-) -> ColumnReader {
-    match col_descr.physical_type() {
-        Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
-            ColumnReaderImpl::new(col_descr, col_page_reader),
-        ),
-    }
-}
-
-/// Gets a typed column reader for the specific type `T`, by "up-casting" `col_reader` of
-/// non-generic type to a generic column reader type `ColumnReaderImpl`.
-///
-/// Panics if actual enum value for `col_reader` does not match the type `T`.
-pub fn get_typed_column_reader<T: DataType>(
-    col_reader: ColumnReader,
-) -> ColumnReaderImpl<T> {
-    T::get_column_reader(col_reader).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column reader into a typed column reader for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Typed value reader for a particular primitive column.
-pub struct ColumnReaderImpl<T: DataType> {
-    descr: ColumnDescPtr,
-    def_level_decoder: Option<LevelDecoder>,
-    rep_level_decoder: Option<LevelDecoder>,
-    page_reader: Box<dyn PageReader>,
-    current_encoding: Option<Encoding>,
-
-    // The total number of values stored in the data page.
-    num_buffered_values: u32,
-
-    // The number of values from the current data page that has been decoded into memory
-    // so far.
-    num_decoded_values: u32,
-
-    // Cache of decoders for existing encodings
-    decoders: HashMap<Encoding, Box<dyn Decoder<T>>>,
-}
-
-impl<T: DataType> ColumnReaderImpl<T> {
-    /// Creates new column reader based on column descriptor and page reader.
-    pub fn new(descr: ColumnDescPtr, page_reader: Box<dyn PageReader>) -> Self {
-        Self {
-            descr,
-            def_level_decoder: None,
-            rep_level_decoder: None,
-            page_reader,
-            current_encoding: None,
-            num_buffered_values: 0,
-            num_decoded_values: 0,
-            decoders: HashMap::new(),
-        }
-    }
-
-    /// Reads a batch of values of at most `batch_size`.
-    ///
-    /// This will try to read from the row group, and fills up at most `batch_size` values
-    /// for `def_levels`, `rep_levels` and `values`. It will stop either when the row
-    /// group is depleted or `batch_size` values has been read, or there is no space
-    /// in the input slices (values/definition levels/repetition levels).
-    ///
-    /// Note that in case the field being read is not required, `values` could contain
-    /// less values than `def_levels`. Also note that this will skip reading def / rep
-    /// levels if the field is required / not repeated, respectively.
-    ///
-    /// If `def_levels` or `rep_levels` is `None`, this will also skip reading the
-    /// respective levels. This is useful when the caller of this function knows in
-    /// advance that the field is required and non-repeated, therefore can avoid
-    /// allocating memory for the levels data. Note that if field has definition
-    /// levels, but caller provides None, there might be inconsistency between
-    /// levels/values (see comments below).
-    ///
-    /// Returns a tuple where the first element is the actual number of values read,
-    /// and the second element is the actual number of levels read.
-    #[inline]
-    pub fn read_batch(
-        &mut self,
-        batch_size: usize,
-        mut def_levels: Option<&mut [i16]>,
-        mut rep_levels: Option<&mut [i16]>,
-        values: &mut [T::T],
-    ) -> Result<(usize, usize)> {
-        let mut values_read = 0;
-        let mut levels_read = 0;
-
-        // Compute the smallest batch size we can read based on provided slices
-        let mut batch_size = min(batch_size, values.len());
-        if let Some(ref levels) = def_levels {
-            batch_size = min(batch_size, levels.len());
-        }
-        if let Some(ref levels) = rep_levels {
-            batch_size = min(batch_size, levels.len());
-        }
-
-        // Read exhaustively all pages until we read all batch_size values/levels
-        // or there are no more values/levels to read.
-        while max(values_read, levels_read) < batch_size {
-            if !self.has_next()? {
-                break;
-            }
-
-            // Batch size for the current iteration
-            let iter_batch_size = {
-                // Compute approximate value based on values decoded so far
-                let mut adjusted_size = min(
-                    batch_size,
-                    (self.num_buffered_values - self.num_decoded_values) as usize,
-                );
-
-                // Adjust batch size by taking into account how much data there
-                // to read. As batch_size is also smaller than value and level
-                // slices (if available), this ensures that available space is not
-                // exceeded.
-                adjusted_size = min(adjusted_size, batch_size - values_read);
-                adjusted_size = min(adjusted_size, batch_size - levels_read);
-
-                adjusted_size
-            };
-
-            let mut values_to_read = 0;
-            let mut num_def_levels = 0;
-            let mut num_rep_levels = 0;
-
-            // If the field is required and non-repeated, there are no definition levels
-            if self.descr.max_def_level() > 0 && def_levels.as_ref().is_some() {
-                if let Some(ref mut levels) = def_levels {
-                    num_def_levels = self.read_def_levels(
-                        &mut levels[levels_read..levels_read + iter_batch_size],
-                    )?;
-                    for i in levels_read..levels_read + num_def_levels {
-                        if levels[i] == self.descr.max_def_level() {
-                            values_to_read += 1;
-                        }
-                    }
-                }
-            } else {
-                // If max definition level == 0, then it is REQUIRED field, read all
-                // values. If definition levels are not provided, we still
-                // read all values.
-                values_to_read = iter_batch_size;
-            }
-
-            if self.descr.max_rep_level() > 0 && rep_levels.is_some() {
-                if let Some(ref mut levels) = rep_levels {
-                    num_rep_levels = self.read_rep_levels(
-                        &mut levels[levels_read..levels_read + iter_batch_size],
-                    )?;
-
-                    // If definition levels are defined, check that rep levels == def
-                    // levels
-                    if def_levels.is_some() {
-                        assert_eq!(
-                            num_def_levels, num_rep_levels,
-                            "Number of decoded rep / def levels did not match"
-                        );
-                    }
-                }
-            }
-
-            // At this point we have read values, definition and repetition levels.
-            // If both definition and repetition levels are defined, their counts
-            // should be equal. Values count is always less or equal to definition levels.
-            //
-            // Note that if field is not required, but no definition levels are provided,
-            // we would read values of batch size and (if provided, of course) repetition
-            // levels of batch size - [!] they will not be synced, because only definition
-            // levels enforce number of non-null values to read.
-
-            let curr_values_read =
-                self.read_values(&mut values[values_read..values_read + values_to_read])?;
-
-            // Update all "return" counters and internal state.
-
-            // This is to account for when def or rep levels are not provided
-            let curr_levels_read = max(num_def_levels, num_rep_levels);
-            self.num_decoded_values += max(curr_levels_read, curr_values_read) as u32;
-            levels_read += curr_levels_read;
-            values_read += curr_values_read;
-        }
-
-        Ok((values_read, levels_read))
-    }
-
-    /// Reads a new page and set up the decoders for levels, values or dictionary.
-    /// Returns false if there's no page left.
-    fn read_new_page(&mut self) -> Result<bool> {
-        #[allow(while_true)]
-        while true {
-            match self.page_reader.get_next_page()? {
-                // No more page to read
-                None => return Ok(false),
-                Some(current_page) => {
-                    match current_page {
-                        // 1. Dictionary page: configure dictionary for this page.
-                        p @ Page::DictionaryPage { .. } => {
-                            self.configure_dictionary(p)?;
-                            continue;
-                        }
-                        // 2. Data page v1
-                        Page::DataPage {
-                            buf,
-                            num_values,
-                            encoding,
-                            def_level_encoding,
-                            rep_level_encoding,
-                            statistics: _,
-                        } => {
-                            self.num_buffered_values = num_values;
-                            self.num_decoded_values = 0;
-
-                            let mut buffer_ptr = buf;
-
-                            if self.descr.max_rep_level() > 0 {
-                                let mut rep_decoder = LevelDecoder::v1(
-                                    rep_level_encoding,
-                                    self.descr.max_rep_level(),
-                                );
-                                let total_bytes = rep_decoder.set_data(
-                                    self.num_buffered_values as usize,
-                                    buffer_ptr.all(),
-                                );
-                                buffer_ptr = buffer_ptr.start_from(total_bytes);
-                                self.rep_level_decoder = Some(rep_decoder);
-                            }
-
-                            if self.descr.max_def_level() > 0 {
-                                let mut def_decoder = LevelDecoder::v1(
-                                    def_level_encoding,
-                                    self.descr.max_def_level(),
-                                );
-                                let total_bytes = def_decoder.set_data(
-                                    self.num_buffered_values as usize,
-                                    buffer_ptr.all(),
-                                );
-                                buffer_ptr = buffer_ptr.start_from(total_bytes);
-                                self.def_level_decoder = Some(def_decoder);
-                            }
-
-                            // Data page v1 does not have offset, all content of buffer
-                            // should be passed
-                            self.set_current_page_encoding(
-                                encoding,
-                                &buffer_ptr,
-                                0,
-                                num_values as usize,
-                            )?;
-                            return Ok(true);
-                        }
-                        // 3. Data page v2
-                        Page::DataPageV2 {
-                            buf,
-                            num_values,
-                            encoding,
-                            num_nulls: _,
-                            num_rows: _,
-                            def_levels_byte_len,
-                            rep_levels_byte_len,
-                            is_compressed: _,
-                            statistics: _,
-                        } => {
-                            self.num_buffered_values = num_values;
-                            self.num_decoded_values = 0;
-
-                            let mut offset = 0;
-
-                            // DataPage v2 only supports RLE encoding for repetition
-                            // levels
-                            if self.descr.max_rep_level() > 0 {
-                                let mut rep_decoder =
-                                    LevelDecoder::v2(self.descr.max_rep_level());
-                                let bytes_read = rep_decoder.set_data_range(
-                                    self.num_buffered_values as usize,
-                                    &buf,
-                                    offset,
-                                    rep_levels_byte_len as usize,
-                                );
-                                offset += bytes_read;
-                                self.rep_level_decoder = Some(rep_decoder);
-                            }
-
-                            // DataPage v2 only supports RLE encoding for definition
-                            // levels
-                            if self.descr.max_def_level() > 0 {
-                                let mut def_decoder =
-                                    LevelDecoder::v2(self.descr.max_def_level());
-                                let bytes_read = def_decoder.set_data_range(
-                                    self.num_buffered_values as usize,
-                                    &buf,
-                                    offset,
-                                    def_levels_byte_len as usize,
-                                );
-                                offset += bytes_read;
-                                self.def_level_decoder = Some(def_decoder);
-                            }
-
-                            self.set_current_page_encoding(
-                                encoding,
-                                &buf,
-                                offset,
-                                num_values as usize,
-                            )?;
-                            return Ok(true);
-                        }
-                    };
-                }
-            }
-        }
-
-        Ok(true)
-    }
-
-    /// Resolves and updates encoding and set decoder for the current page
-    fn set_current_page_encoding(
-        &mut self,
-        mut encoding: Encoding,
-        buffer_ptr: &ByteBufferPtr,
-        offset: usize,
-        len: usize,
-    ) -> Result<()> {
-        if encoding == Encoding::PLAIN_DICTIONARY {
-            encoding = Encoding::RLE_DICTIONARY;
-        }
-
-        let decoder = if encoding == Encoding::RLE_DICTIONARY {
-            self.decoders
-                .get_mut(&encoding)
-                .expect("Decoder for dict should have been set")
-        } else {
-            // Search cache for data page decoder
-            #[allow(clippy::map_entry)]
-            if !self.decoders.contains_key(&encoding) {
-                // Initialize decoder for this page
-                let data_decoder = get_decoder::<T>(self.descr.clone(), encoding)?;
-                self.decoders.insert(encoding, data_decoder);
-            }
-            self.decoders.get_mut(&encoding).unwrap()
-        };
-
-        decoder.set_data(buffer_ptr.start_from(offset), len as usize)?;
-        self.current_encoding = Some(encoding);
-        Ok(())
-    }
-
-    #[inline]
-    fn has_next(&mut self) -> Result<bool> {
-        if self.num_buffered_values == 0
-            || self.num_buffered_values == self.num_decoded_values
-        {
-            // TODO: should we return false if read_new_page() = true and
-            // num_buffered_values = 0?
-            if !self.read_new_page()? {
-                Ok(false)
-            } else {
-                Ok(self.num_buffered_values != 0)
-            }
-        } else {
-            Ok(true)
-        }
-    }
-
-    #[inline]
-    fn read_rep_levels(&mut self, buffer: &mut [i16]) -> Result<usize> {
-        let level_decoder = self
-            .rep_level_decoder
-            .as_mut()
-            .expect("rep_level_decoder be set");
-        level_decoder.get(buffer)
-    }
-
-    #[inline]
-    fn read_def_levels(&mut self, buffer: &mut [i16]) -> Result<usize> {
-        let level_decoder = self
-            .def_level_decoder
-            .as_mut()
-            .expect("def_level_decoder be set");
-        level_decoder.get(buffer)
-    }
-
-    #[inline]
-    fn read_values(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        let encoding = self
-            .current_encoding
-            .expect("current_encoding should be set");
-        let current_decoder = self
-            .decoders
-            .get_mut(&encoding)
-            .unwrap_or_else(|| panic!("decoder for encoding {} should be set", encoding));
-        current_decoder.get(buffer)
-    }
-
-    #[inline]
-    fn configure_dictionary(&mut self, page: Page) -> Result<bool> {
-        let mut encoding = page.encoding();
-        if encoding == Encoding::PLAIN || encoding == Encoding::PLAIN_DICTIONARY {
-            encoding = Encoding::RLE_DICTIONARY
-        }
-
-        if self.decoders.contains_key(&encoding) {
-            return Err(general_err!("Column cannot have more than one dictionary"));
-        }
-
-        if encoding == Encoding::RLE_DICTIONARY {
-            let mut dictionary = PlainDecoder::<T>::new(self.descr.type_length());
-            let num_values = page.num_values();
-            dictionary.set_data(page.buffer().clone(), num_values as usize)?;
-
-            let mut decoder = DictDecoder::new();
-            decoder.set_dict(Box::new(dictionary))?;
-            self.decoders.insert(encoding, Box::new(decoder));
-            Ok(true)
-        } else {
-            Err(nyi_err!(
-                "Invalid/Unsupported encoding type for dictionary: {}",
-                encoding
-            ))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::distributions::uniform::SampleUniform;
-    use std::{collections::VecDeque, sync::Arc, vec::IntoIter};
-
-    use crate::basic::Type as PhysicalType;
-    use crate::column::page::Page;
-    use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
-    use crate::util::test_common::make_pages;
-
-    const NUM_LEVELS: usize = 128;
-    const NUM_PAGES: usize = 2;
-    const MAX_DEF_LEVEL: i16 = 5;
-    const MAX_REP_LEVEL: i16 = 5;
-
-    // Macro to generate test cases
-    macro_rules! test {
-        // branch for generating i32 cases
-        ($test_func:ident, i32, $func:ident, $def_level:expr, $rep_level:expr,
-     $num_pages:expr, $num_levels:expr, $batch_size:expr, $min:expr, $max:expr) => {
-            test_internal!(
-                $test_func,
-                Int32Type,
-                get_test_int32_type,
-                $func,
-                $def_level,
-                $rep_level,
-                $num_pages,
-                $num_levels,
-                $batch_size,
-                $min,
-                $max
-            );
-        };
-        // branch for generating i64 cases
-        ($test_func:ident, i64, $func:ident, $def_level:expr, $rep_level:expr,
-     $num_pages:expr, $num_levels:expr, $batch_size:expr, $min:expr, $max:expr) => {
-            test_internal!(
-                $test_func,
-                Int64Type,
-                get_test_int64_type,
-                $func,
-                $def_level,
-                $rep_level,
-                $num_pages,
-                $num_levels,
-                $batch_size,
-                $min,
-                $max
-            );
-        };
-    }
-
-    macro_rules! test_internal {
-        ($test_func:ident, $ty:ident, $pty:ident, $func:ident, $def_level:expr,
-     $rep_level:expr, $num_pages:expr, $num_levels:expr, $batch_size:expr,
-     $min:expr, $max:expr) => {
-            #[test]
-            fn $test_func() {
-                let desc = Arc::new(ColumnDescriptor::new(
-                    Arc::new($pty()),
-                    $def_level,
-                    $rep_level,
-                    ColumnPath::new(Vec::new()),
-                ));
-                let mut tester = ColumnReaderTester::<$ty>::new();
-                tester.$func(desc, $num_pages, $num_levels, $batch_size, $min, $max);
-            }
-        };
-    }
-
-    test!(
-        test_read_plain_v1_int32,
-        i32,
-        plain_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32,
-        i32,
-        plain_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int32_uneven,
-        i32,
-        plain_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32_uneven,
-        i32,
-        plain_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int32_multi_page,
-        i32,
-        plain_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32_multi_page,
-        i32,
-        plain_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    // test cases when column descriptor has MAX_DEF_LEVEL = 0 and MAX_REP_LEVEL = 0
-    test!(
-        test_read_plain_v1_int32_required_non_repeated,
-        i32,
-        plain_v1,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-    test!(
-        test_read_plain_v2_int32_required_non_repeated,
-        i32,
-        plain_v2,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i32::MIN,
-        std::i32::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int64,
-        i64,
-        plain_v1,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64,
-        i64,
-        plain_v2,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int64_uneven,
-        i64,
-        plain_v1,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64_uneven,
-        i64,
-        plain_v2,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    test!(
-        test_read_plain_v1_int64_multi_page,
-        i64,
-        plain_v1,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64_multi_page,
-        i64,
-        plain_v2,
-        1,
-        1,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    // test cases when column descriptor has MAX_DEF_LEVEL = 0 and MAX_REP_LEVEL = 0
-    test!(
-        test_read_plain_v1_int64_required_non_repeated,
-        i64,
-        plain_v1,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-    test!(
-        test_read_plain_v2_int64_required_non_repeated,
-        i64,
-        plain_v2,
-        0,
-        0,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        std::i64::MIN,
-        std::i64::MAX
-    );
-
-    test!(
-        test_read_dict_v1_int32_small,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        2,
-        2,
-        16,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32_small,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        2,
-        2,
-        16,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int32,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int32_uneven,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32_uneven,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        17,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int32_multi_page,
-        i32,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int32_multi_page,
-        i32,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        512,
-        0,
-        3
-    );
-
-    test!(
-        test_read_dict_v1_int64,
-        i64,
-        dict_v1,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-    test!(
-        test_read_dict_v2_int64,
-        i64,
-        dict_v2,
-        MAX_DEF_LEVEL,
-        MAX_REP_LEVEL,
-        NUM_PAGES,
-        NUM_LEVELS,
-        16,
-        0,
-        3
-    );
-
-    #[test]
-    fn test_read_batch_values_only() {
-        test_read_batch_int32(16, &mut [0; 10], None, None); // < batch_size
-        test_read_batch_int32(16, &mut [0; 16], None, None); // == batch_size
-        test_read_batch_int32(16, &mut [0; 51], None, None); // > batch_size
-    }
-
-    #[test]
-    fn test_read_batch_values_def_levels() {
-        test_read_batch_int32(16, &mut [0; 10], Some(&mut [0; 10]), None);
-        test_read_batch_int32(16, &mut [0; 16], Some(&mut [0; 16]), None);
-        test_read_batch_int32(16, &mut [0; 51], Some(&mut [0; 51]), None);
-    }
-
-    #[test]
-    fn test_read_batch_values_rep_levels() {
-        test_read_batch_int32(16, &mut [0; 10], None, Some(&mut [0; 10]));
-        test_read_batch_int32(16, &mut [0; 16], None, Some(&mut [0; 16]));
-        test_read_batch_int32(16, &mut [0; 51], None, Some(&mut [0; 51]));
-    }
-
-    #[test]
-    fn test_read_batch_different_buf_sizes() {
-        test_read_batch_int32(16, &mut [0; 8], Some(&mut [0; 9]), Some(&mut [0; 7]));
-        test_read_batch_int32(16, &mut [0; 1], Some(&mut [0; 9]), Some(&mut [0; 3]));
-    }
-
-    #[test]
-    fn test_read_batch_values_def_rep_levels() {
-        test_read_batch_int32(
-            128,
-            &mut [0; 128],
-            Some(&mut [0; 128]),
-            Some(&mut [0; 128]),
-        );
-    }
-
-    #[test]
-    fn test_read_batch_adjust_after_buffering_page() {
-        // This test covers scenario when buffering new page results in setting number
-        // of decoded values to 0, resulting on reading `batch_size` of values, but it is
-        // larger than we can insert into slice (affects values and levels).
-        //
-        // Note: values are chosen to reproduce the issue.
-        //
-        let primitive_type = get_test_int32_type();
-        let desc = Arc::new(ColumnDescriptor::new(
-            Arc::new(primitive_type),
-            1,
-            1,
-            ColumnPath::new(Vec::new()),
-        ));
-
-        let num_pages = 2;
-        let num_levels = 4;
-        let batch_size = 5;
-        let values = &mut vec![0; 7];
-        let def_levels = &mut vec![0; 7];
-        let rep_levels = &mut vec![0; 7];
-
-        let mut tester = ColumnReaderTester::<Int32Type>::new();
-        tester.test_read_batch(
-            desc,
-            Encoding::RLE_DICTIONARY,
-            num_pages,
-            num_levels,
-            batch_size,
-            std::i32::MIN,
-            std::i32::MAX,
-            values,
-            Some(def_levels),
-            Some(rep_levels),
-            false,
-        );
-    }
-
-    // ----------------------------------------------------------------------
-    // Helper methods to make pages and test
-    //
-    // # Overview
-    //
-    // Most of the test functionality is implemented in `ColumnReaderTester`, which
-    // provides some general data page test methods:
-    // - `test_read_batch_general`
-    // - `test_read_batch`
-    //
-    // There are also some high level wrappers that are part of `ColumnReaderTester`:
-    // - `plain_v1` -> call `test_read_batch_general` with data page v1 and plain encoding
-    // - `plain_v2` -> call `test_read_batch_general` with data page v2 and plain encoding
-    // - `dict_v1` -> call `test_read_batch_general` with data page v1 + dictionary page
-    // - `dict_v2` -> call `test_read_batch_general` with data page v2 + dictionary page
-    //
-    // And even higher level wrappers that simplify testing of almost the same test cases:
-    // - `get_test_int32_type`, provides dummy schema type
-    // - `get_test_int64_type`, provides dummy schema type
-    // - `test_read_batch_int32`, wrapper for `read_batch` tests, since they are basically
-    //   the same, just different def/rep levels and batch size.
-    //
-    // # Page assembly
-    //
-    // Page construction and generation of values, definition and repetition levels
-    // happens in `make_pages` function.
-    // All values are randomly generated based on provided min/max, levels are calculated
-    // based on provided max level for column descriptor (which is basically either int32
-    // or int64 type in tests) and `levels_per_page` variable.
-    //
-    // We use `DataPageBuilder` and its implementation `DataPageBuilderImpl` to actually
-    // turn values, definition and repetition levels into data pages (either v1 or v2).
-    //
-    // Those data pages are then stored as part of `TestPageReader` (we just pass vector
-    // of generated pages directly), which implements `PageReader` interface.
-    //
-    // # Comparison
-    //
-    // This allows us to pass test page reader into column reader, so we can test
-    // functionality of column reader - see `test_read_batch`, where we create column
-    // reader -> typed column reader, buffer values in `read_batch` method and compare
-    // output with generated data.
-
-    // Returns dummy Parquet `Type` for primitive field, because most of our tests use
-    // INT32 physical type.
-    fn get_test_int32_type() -> SchemaType {
-        SchemaType::primitive_type_builder("a", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_32)
-            .with_length(-1)
-            .build()
-            .expect("build() should be OK")
-    }
-
-    // Returns dummy Parquet `Type` for INT64 physical type.
-    fn get_test_int64_type() -> SchemaType {
-        SchemaType::primitive_type_builder("a", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_64)
-            .with_length(-1)
-            .build()
-            .expect("build() should be OK")
-    }
-
-    // Tests `read_batch()` functionality for INT32.
-    //
-    // This is a high level wrapper on `ColumnReaderTester` that allows us to specify some
-    // boilerplate code for setting up definition/repetition levels and column descriptor.
-    fn test_read_batch_int32(
-        batch_size: usize,
-        values: &mut [i32],
-        def_levels: Option<&mut [i16]>,
-        rep_levels: Option<&mut [i16]>,
-    ) {
-        let primitive_type = get_test_int32_type();
-        // make field is required based on provided slices of levels
-        let max_def_level = if def_levels.is_some() {
-            MAX_DEF_LEVEL
-        } else {
-            0
-        };
-        let max_rep_level = if def_levels.is_some() {
-            MAX_REP_LEVEL
-        } else {
-            0
-        };
-
-        let desc = Arc::new(ColumnDescriptor::new(
-            Arc::new(primitive_type),
-            max_def_level,
-            max_rep_level,
-            ColumnPath::new(Vec::new()),
-        ));
-        let mut tester = ColumnReaderTester::<Int32Type>::new();
-        tester.test_read_batch(
-            desc,
-            Encoding::RLE_DICTIONARY,
-            NUM_PAGES,
-            NUM_LEVELS,
-            batch_size,
-            std::i32::MIN,
-            std::i32::MAX,
-            values,
-            def_levels,
-            rep_levels,
-            false,
-        );
-    }
-
-    struct ColumnReaderTester<T: DataType>
-    where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        rep_levels: Vec<i16>,
-        def_levels: Vec<i16>,
-        values: Vec<T::T>,
-    }
-
-    impl<T: DataType> ColumnReaderTester<T>
-    where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        pub fn new() -> Self {
-            Self {
-                rep_levels: Vec::new(),
-                def_levels: Vec::new(),
-                values: Vec::new(),
-            }
-        }
-
-        // Method to generate and test data pages v1
-        fn plain_v1(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::PLAIN,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                false,
-            );
-        }
-
-        // Method to generate and test data pages v2
-        fn plain_v2(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::PLAIN,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                true,
-            );
-        }
-
-        // Method to generate and test dictionary page + data pages v1
-        fn dict_v1(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::RLE_DICTIONARY,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                false,
-            );
-        }
-
-        // Method to generate and test dictionary page + data pages v2
-        fn dict_v2(
-            &mut self,
-            desc: ColumnDescPtr,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-        ) {
-            self.test_read_batch_general(
-                desc,
-                Encoding::RLE_DICTIONARY,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                true,
-            );
-        }
-
-        // Helper function for the general case of `read_batch()` where `values`,
-        // `def_levels` and `rep_levels` are always provided with enough space.
-        fn test_read_batch_general(
-            &mut self,
-            desc: ColumnDescPtr,
-            encoding: Encoding,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-            use_v2: bool,
-        ) {
-            let mut def_levels = vec![0; num_levels * num_pages];
-            let mut rep_levels = vec![0; num_levels * num_pages];
-            let mut values = vec![T::T::default(); num_levels * num_pages];
-            self.test_read_batch(
-                desc,
-                encoding,
-                num_pages,
-                num_levels,
-                batch_size,
-                min,
-                max,
-                &mut values,
-                Some(&mut def_levels),
-                Some(&mut rep_levels),
-                use_v2,
-            );
-        }
-
-        // Helper function to test `read_batch()` method with custom buffers for values,
-        // definition and repetition levels.
-        fn test_read_batch(
-            &mut self,
-            desc: ColumnDescPtr,
-            encoding: Encoding,
-            num_pages: usize,
-            num_levels: usize,
-            batch_size: usize,
-            min: T::T,
-            max: T::T,
-            values: &mut [T::T],
-            mut def_levels: Option<&mut [i16]>,
-            mut rep_levels: Option<&mut [i16]>,
-            use_v2: bool,
-        ) {
-            let mut pages = VecDeque::new();
-            make_pages::<T>(
-                desc.clone(),
-                encoding,
-                num_pages,
-                num_levels,
-                min,
-                max,
-                &mut self.def_levels,
-                &mut self.rep_levels,
-                &mut self.values,
-                &mut pages,
-                use_v2,
-            );
-            let max_def_level = desc.max_def_level();
-            let page_reader = TestPageReader::new(Vec::from(pages));
-            let column_reader: ColumnReader =
-                get_column_reader(desc, Box::new(page_reader));
-            let mut typed_column_reader = get_typed_column_reader::<T>(column_reader);
-
-            let mut curr_values_read = 0;
-            let mut curr_levels_read = 0;
-            let mut done = false;
-            while !done {
-                let actual_def_levels =
-                    def_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
-                let actual_rep_levels =
-                    rep_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
-
-                let (values_read, levels_read) = typed_column_reader
-                    .read_batch(
-                        batch_size,
-                        actual_def_levels,
-                        actual_rep_levels,
-                        &mut values[curr_values_read..],
-                    )
-                    .expect("read_batch() should be OK");
-
-                if values_read == 0 && levels_read == 0 {
-                    done = true;
-                }
-
-                curr_values_read += values_read;
-                curr_levels_read += levels_read;
-            }
-
-            assert!(
-                values.len() >= curr_values_read,
-                "values.len() >= values_read"
-            );
-            assert_eq!(
-                &values[0..curr_values_read],
-                &self.values[0..curr_values_read],
-                "values content doesn't match"
-            );
-
-            if let Some(ref levels) = def_levels {
-                assert!(
-                    levels.len() >= curr_levels_read,
-                    "def_levels.len() >= levels_read"
-                );
-                assert_eq!(
-                    &levels[0..curr_levels_read],
-                    &self.def_levels[0..curr_levels_read],
-                    "definition levels content doesn't match"
-                );
-            }
-
-            if let Some(ref levels) = rep_levels {
-                assert!(
-                    levels.len() >= curr_levels_read,
-                    "rep_levels.len() >= levels_read"
-                );
-                assert_eq!(
-                    &levels[0..curr_levels_read],
-                    &self.rep_levels[0..curr_levels_read],
-                    "repetition levels content doesn't match"
-                );
-            }
-
-            if def_levels.is_none() && rep_levels.is_none() {
-                assert!(
-                    curr_levels_read == 0,
-                    "expected to read 0 levels, found {}",
-                    curr_levels_read
-                );
-            } else if def_levels.is_some() && max_def_level > 0 {
-                assert!(
-                    curr_levels_read >= curr_values_read,
-                    "expected levels read to be greater than values read"
-                );
-            }
-        }
-    }
-
-    struct TestPageReader {
-        pages: IntoIter<Page>,
-    }
-
-    impl TestPageReader {
-        pub fn new(pages: Vec<Page>) -> Self {
-            Self {
-                pages: pages.into_iter(),
-            }
-        }
-    }
-
-    impl PageReader for TestPageReader {
-        fn get_next_page(&mut self) -> Result<Option<Page>> {
-            Ok(self.pages.next())
-        }
-    }
-}
diff --git a/rust/parquet/src/column/writer.rs b/rust/parquet/src/column/writer.rs
deleted file mode 100644
index 0b56594c0b6..00000000000
--- a/rust/parquet/src/column/writer.rs
+++ /dev/null
@@ -1,1908 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains column writer API.
-use std::{cmp, collections::VecDeque, convert::TryFrom, marker::PhantomData, sync::Arc};
-
-use crate::basic::{Compression, Encoding, PageType, Type};
-use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
-use crate::compression::{create_codec, Codec};
-use crate::data_type::AsBytes;
-use crate::data_type::*;
-use crate::encodings::{
-    encoding::{get_encoder, DictEncoder, Encoder},
-    levels::{max_buffer_size, LevelEncoder},
-};
-use crate::errors::{ParquetError, Result};
-use crate::file::statistics::Statistics;
-use crate::file::{
-    metadata::ColumnChunkMetaData,
-    properties::{WriterProperties, WriterPropertiesPtr, WriterVersion},
-};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::bit_util::FromBytes;
-use crate::util::memory::{ByteBufferPtr, MemTracker};
-
-/// Column writer for a Parquet type.
-pub enum ColumnWriter {
-    BoolColumnWriter(ColumnWriterImpl<BoolType>),
-    Int32ColumnWriter(ColumnWriterImpl<Int32Type>),
-    Int64ColumnWriter(ColumnWriterImpl<Int64Type>),
-    Int96ColumnWriter(ColumnWriterImpl<Int96Type>),
-    FloatColumnWriter(ColumnWriterImpl<FloatType>),
-    DoubleColumnWriter(ColumnWriterImpl<DoubleType>),
-    ByteArrayColumnWriter(ColumnWriterImpl<ByteArrayType>),
-    FixedLenByteArrayColumnWriter(ColumnWriterImpl<FixedLenByteArrayType>),
-}
-
-pub enum Level {
-    Page,
-    Column,
-}
-
-macro_rules! gen_stats_section {
-    ($physical_ty: ty, $stat_fn: ident, $min: ident, $max: ident, $distinct: ident, $nulls: ident) => {{
-        let min = $min.as_ref().and_then(|v| {
-            Some(read_num_bytes!(
-                $physical_ty,
-                v.as_bytes().len(),
-                &v.as_bytes()
-            ))
-        });
-        let max = $max.as_ref().and_then(|v| {
-            Some(read_num_bytes!(
-                $physical_ty,
-                v.as_bytes().len(),
-                &v.as_bytes()
-            ))
-        });
-        Statistics::$stat_fn(min, max, $distinct, $nulls, false)
-    }};
-}
-
-/// Gets a specific column writer corresponding to column descriptor `descr`.
-pub fn get_column_writer(
-    descr: ColumnDescPtr,
-    props: WriterPropertiesPtr,
-    page_writer: Box<dyn PageWriter>,
-) -> ColumnWriter {
-    match descr.physical_type() {
-        Type::BOOLEAN => ColumnWriter::BoolColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT32 => ColumnWriter::Int32ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT64 => ColumnWriter::Int64ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT96 => ColumnWriter::Int96ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::FLOAT => ColumnWriter::FloatColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::DOUBLE => ColumnWriter::DoubleColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::BYTE_ARRAY => ColumnWriter::ByteArrayColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::FIXED_LEN_BYTE_ARRAY => ColumnWriter::FixedLenByteArrayColumnWriter(
-            ColumnWriterImpl::new(descr, props, page_writer),
-        ),
-    }
-}
-
-/// Gets a typed column writer for the specific type `T`, by "up-casting" `col_writer` of
-/// non-generic type to a generic column writer type `ColumnWriterImpl`.
-///
-/// Panics if actual enum value for `col_writer` does not match the type `T`.
-pub fn get_typed_column_writer<T: DataType>(
-    col_writer: ColumnWriter,
-) -> ColumnWriterImpl<T> {
-    T::get_column_writer(col_writer).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column writer into a typed column writer for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Similar to `get_typed_column_writer` but returns a reference.
-pub fn get_typed_column_writer_ref<T: DataType>(
-    col_writer: &ColumnWriter,
-) -> &ColumnWriterImpl<T> {
-    T::get_column_writer_ref(col_writer).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column writer into a typed column writer for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Similar to `get_typed_column_writer` but returns a reference.
-pub fn get_typed_column_writer_mut<T: DataType>(
-    col_writer: &mut ColumnWriter,
-) -> &mut ColumnWriterImpl<T> {
-    T::get_column_writer_mut(col_writer).unwrap_or_else(|| {
-        panic!(
-            "Failed to convert column writer into a typed column writer for `{}` type",
-            T::get_physical_type()
-        )
-    })
-}
-
-/// Typed column writer for a primitive column.
-pub struct ColumnWriterImpl<T: DataType> {
-    // Column writer properties
-    descr: ColumnDescPtr,
-    props: WriterPropertiesPtr,
-    page_writer: Box<dyn PageWriter>,
-    has_dictionary: bool,
-    dict_encoder: Option<DictEncoder<T>>,
-    encoder: Box<dyn Encoder<T>>,
-    codec: Compression,
-    compressor: Option<Box<dyn Codec>>,
-    // Metrics per page
-    num_buffered_values: u32,
-    num_buffered_encoded_values: u32,
-    num_buffered_rows: u32,
-    min_page_value: Option<T::T>,
-    max_page_value: Option<T::T>,
-    num_page_nulls: u64,
-    page_distinct_count: Option<u64>,
-    // Metrics per column writer
-    total_bytes_written: u64,
-    total_rows_written: u64,
-    total_uncompressed_size: u64,
-    total_compressed_size: u64,
-    total_num_values: u64,
-    dictionary_page_offset: Option<u64>,
-    data_page_offset: Option<u64>,
-    min_column_value: Option<T::T>,
-    max_column_value: Option<T::T>,
-    num_column_nulls: u64,
-    column_distinct_count: Option<u64>,
-    // Reused buffers
-    def_levels_sink: Vec<i16>,
-    rep_levels_sink: Vec<i16>,
-    data_pages: VecDeque<CompressedPage>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> ColumnWriterImpl<T> {
-    pub fn new(
-        descr: ColumnDescPtr,
-        props: WriterPropertiesPtr,
-        page_writer: Box<dyn PageWriter>,
-    ) -> Self {
-        let codec = props.compression(descr.path());
-        let compressor = create_codec(codec).unwrap();
-
-        // Optionally set dictionary encoder.
-        let dict_encoder = if props.dictionary_enabled(descr.path())
-            && has_dictionary_support(T::get_physical_type(), &props)
-        {
-            Some(DictEncoder::new(descr.clone(), Arc::new(MemTracker::new())))
-        } else {
-            None
-        };
-
-        // Whether or not this column writer has a dictionary encoding.
-        let has_dictionary = dict_encoder.is_some();
-
-        // Set either main encoder or fallback encoder.
-        let fallback_encoder = get_encoder(
-            descr.clone(),
-            props
-                .encoding(descr.path())
-                .unwrap_or_else(|| fallback_encoding(T::get_physical_type(), &props)),
-            Arc::new(MemTracker::new()),
-        )
-        .unwrap();
-
-        Self {
-            descr,
-            props,
-            page_writer,
-            has_dictionary,
-            dict_encoder,
-            encoder: fallback_encoder,
-            codec,
-            compressor,
-            num_buffered_values: 0,
-            num_buffered_encoded_values: 0,
-            num_buffered_rows: 0,
-            total_bytes_written: 0,
-            total_rows_written: 0,
-            total_uncompressed_size: 0,
-            total_compressed_size: 0,
-            total_num_values: 0,
-            dictionary_page_offset: None,
-            data_page_offset: None,
-            def_levels_sink: vec![],
-            rep_levels_sink: vec![],
-            data_pages: VecDeque::new(),
-            min_page_value: None,
-            max_page_value: None,
-            num_page_nulls: 0,
-            page_distinct_count: None,
-            min_column_value: None,
-            max_column_value: None,
-            num_column_nulls: 0,
-            column_distinct_count: None,
-            _phantom: PhantomData,
-        }
-    }
-
-    fn write_batch_internal(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-        min: &Option<T::T>,
-        max: &Option<T::T>,
-        null_count: Option<u64>,
-        distinct_count: Option<u64>,
-    ) -> Result<usize> {
-        // We check for DataPage limits only after we have inserted the values. If a user
-        // writes a large number of values, the DataPage size can be well above the limit.
-        //
-        // The purpose of this chunking is to bound this. Even if a user writes large
-        // number of values, the chunking will ensure that we add data page at a
-        // reasonable pagesize limit.
-
-        // TODO: find out why we don't account for size of levels when we estimate page
-        // size.
-
-        // Find out the minimal length to prevent index out of bound errors.
-        let mut min_len = values.len();
-        if let Some(levels) = def_levels {
-            min_len = cmp::min(min_len, levels.len());
-        }
-        if let Some(levels) = rep_levels {
-            min_len = cmp::min(min_len, levels.len());
-        }
-
-        // Find out number of batches to process.
-        let write_batch_size = self.props.write_batch_size();
-        let num_batches = min_len / write_batch_size;
-
-        // Process pre-calculated statistics
-        match (min, max) {
-            (Some(min), Some(max)) => {
-                if self.min_column_value.as_ref().map_or(true, |v| v > min) {
-                    self.min_column_value = Some(min.clone());
-                }
-                if self.max_column_value.as_ref().map_or(true, |v| v < max) {
-                    self.max_column_value = Some(max.clone());
-                }
-            }
-            (None, Some(_)) | (Some(_), None) => {
-                panic!("min/max should be both set or both None")
-            }
-            (None, None) => {}
-        }
-
-        if let Some(distinct) = distinct_count {
-            self.column_distinct_count =
-                Some(self.column_distinct_count.unwrap_or(0) + distinct);
-        }
-
-        if let Some(nulls) = null_count {
-            self.num_column_nulls += nulls;
-        }
-
-        let calculate_page_stats = (min.is_none() || max.is_none())
-            && null_count.is_none()
-            && distinct_count.is_none();
-
-        let mut values_offset = 0;
-        let mut levels_offset = 0;
-        for _ in 0..num_batches {
-            values_offset += self.write_mini_batch(
-                &values[values_offset..values_offset + write_batch_size],
-                def_levels.map(|lv| &lv[levels_offset..levels_offset + write_batch_size]),
-                rep_levels.map(|lv| &lv[levels_offset..levels_offset + write_batch_size]),
-                calculate_page_stats,
-            )?;
-            levels_offset += write_batch_size;
-        }
-
-        values_offset += self.write_mini_batch(
-            &values[values_offset..],
-            def_levels.map(|lv| &lv[levels_offset..]),
-            rep_levels.map(|lv| &lv[levels_offset..]),
-            calculate_page_stats,
-        )?;
-
-        // Return total number of values processed.
-        Ok(values_offset)
-    }
-
-    /// Writes batch of values, definition levels and repetition levels.
-    /// Returns number of values processed (written).
-    ///
-    /// If definition and repetition levels are provided, we write fully those levels and
-    /// select how many values to write (this number will be returned), since number of
-    /// actual written values may be smaller than provided values.
-    ///
-    /// If only values are provided, then all values are written and the length of
-    /// of the values buffer is returned.
-    ///
-    /// Definition and/or repetition levels can be omitted, if values are
-    /// non-nullable and/or non-repeated.
-    pub fn write_batch(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-    ) -> Result<usize> {
-        self.write_batch_internal(
-            values, def_levels, rep_levels, &None, &None, None, None,
-        )
-    }
-
-    /// Writer may optionally provide pre-calculated statistics for this batch, in which case we do
-    /// not calculate page level statistics as this will defeat the purpose of speeding up the write
-    /// process with pre-calculated statistics.
-    pub fn write_batch_with_statistics(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-        min: &Option<T::T>,
-        max: &Option<T::T>,
-        nulls_count: Option<u64>,
-        distinct_count: Option<u64>,
-    ) -> Result<usize> {
-        self.write_batch_internal(
-            values,
-            def_levels,
-            rep_levels,
-            min,
-            max,
-            nulls_count,
-            distinct_count,
-        )
-    }
-
-    /// Returns total number of bytes written by this column writer so far.
-    /// This value is also returned when column writer is closed.
-    pub fn get_total_bytes_written(&self) -> u64 {
-        self.total_bytes_written
-    }
-
-    /// Returns total number of rows written by this column writer so far.
-    /// This value is also returned when column writer is closed.
-    pub fn get_total_rows_written(&self) -> u64 {
-        self.total_rows_written
-    }
-
-    /// Finalises writes and closes the column writer.
-    /// Returns total bytes written, total rows written and column chunk metadata.
-    pub fn close(mut self) -> Result<(u64, u64, ColumnChunkMetaData)> {
-        if self.dict_encoder.is_some() {
-            self.write_dictionary_page()?;
-        }
-        self.flush_data_pages()?;
-        let metadata = self.write_column_metadata()?;
-        self.dict_encoder = None;
-        self.page_writer.close()?;
-
-        Ok((self.total_bytes_written, self.total_rows_written, metadata))
-    }
-
-    /// Writes mini batch of values, definition and repetition levels.
-    /// This allows fine-grained processing of values and maintaining a reasonable
-    /// page size.
-    fn write_mini_batch(
-        &mut self,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-        calculate_page_stats: bool,
-    ) -> Result<usize> {
-        let mut values_to_write = 0;
-
-        // Check if number of definition levels is the same as number of repetition
-        // levels.
-        if let (Some(def), Some(rep)) = (def_levels, rep_levels) {
-            if def.len() != rep.len() {
-                return Err(general_err!(
-                    "Inconsistent length of definition and repetition levels: {} != {}",
-                    def.len(),
-                    rep.len()
-                ));
-            }
-        }
-
-        // Process definition levels and determine how many values to write.
-        let num_values = if self.descr.max_def_level() > 0 {
-            let levels = def_levels.ok_or_else(|| {
-                general_err!(
-                    "Definition levels are required, because max definition level = {}",
-                    self.descr.max_def_level()
-                )
-            })?;
-
-            for &level in levels {
-                if level == self.descr.max_def_level() {
-                    values_to_write += 1;
-                } else if calculate_page_stats {
-                    self.num_page_nulls += 1
-                }
-            }
-
-            self.write_definition_levels(levels);
-            u32::try_from(levels.len()).unwrap()
-        } else {
-            values_to_write = values.len();
-            u32::try_from(values_to_write).unwrap()
-        };
-
-        // Process repetition levels and determine how many rows we are about to process.
-        if self.descr.max_rep_level() > 0 {
-            // A row could contain more than one value.
-            let levels = rep_levels.ok_or_else(|| {
-                general_err!(
-                    "Repetition levels are required, because max repetition level = {}",
-                    self.descr.max_rep_level()
-                )
-            })?;
-
-            // Count the occasions where we start a new row
-            for &level in levels {
-                self.num_buffered_rows += (level == 0) as u32
-            }
-
-            self.write_repetition_levels(levels);
-        } else {
-            // Each value is exactly one row.
-            // Equals to the number of values, we count nulls as well.
-            self.num_buffered_rows += num_values;
-        }
-
-        // Check that we have enough values to write.
-        let values_to_write = values.get(0..values_to_write).ok_or_else(|| {
-            general_err!(
-                "Expected to write {} values, but have only {}",
-                values_to_write,
-                values.len()
-            )
-        })?;
-
-        if calculate_page_stats {
-            for val in values_to_write {
-                self.update_page_min_max(val);
-            }
-        }
-
-        self.write_values(values_to_write)?;
-
-        self.num_buffered_values += num_values;
-        self.num_buffered_encoded_values += u32::try_from(values_to_write.len()).unwrap();
-
-        if self.should_add_data_page() {
-            self.add_data_page(calculate_page_stats)?;
-        }
-
-        if self.should_dict_fallback() {
-            self.dict_fallback()?;
-        }
-
-        Ok(values_to_write.len())
-    }
-
-    #[inline]
-    fn write_definition_levels(&mut self, def_levels: &[i16]) {
-        self.def_levels_sink.extend_from_slice(def_levels);
-    }
-
-    #[inline]
-    fn write_repetition_levels(&mut self, rep_levels: &[i16]) {
-        self.rep_levels_sink.extend_from_slice(rep_levels);
-    }
-
-    #[inline]
-    fn write_values(&mut self, values: &[T::T]) -> Result<()> {
-        match self.dict_encoder {
-            Some(ref mut encoder) => encoder.put(values),
-            None => self.encoder.put(values),
-        }
-    }
-
-    /// Returns true if we need to fall back to non-dictionary encoding.
-    ///
-    /// We can only fall back if dictionary encoder is set and we have exceeded dictionary
-    /// size.
-    #[inline]
-    fn should_dict_fallback(&self) -> bool {
-        match self.dict_encoder {
-            Some(ref encoder) => {
-                encoder.dict_encoded_size() >= self.props.dictionary_pagesize_limit()
-            }
-            None => false,
-        }
-    }
-
-    /// Returns true if there is enough data for a data page, false otherwise.
-    #[inline]
-    fn should_add_data_page(&self) -> bool {
-        match self.dict_encoder {
-            Some(ref encoder) => {
-                encoder.estimated_data_encoded_size() >= self.props.data_pagesize_limit()
-            }
-            None => {
-                self.encoder.estimated_data_encoded_size()
-                    >= self.props.data_pagesize_limit()
-            }
-        }
-    }
-
-    /// Performs dictionary fallback.
-    /// Prepares and writes dictionary and all data pages into page writer.
-    fn dict_fallback(&mut self) -> Result<()> {
-        // At this point we know that we need to fall back.
-        self.write_dictionary_page()?;
-        self.flush_data_pages()?;
-        self.dict_encoder = None;
-        Ok(())
-    }
-
-    /// Adds data page.
-    /// Data page is either buffered in case of dictionary encoding or written directly.
-    fn add_data_page(&mut self, calculate_page_stat: bool) -> Result<()> {
-        // Extract encoded values
-        let value_bytes = match self.dict_encoder {
-            Some(ref mut encoder) => encoder.write_indices()?,
-            None => self.encoder.flush_buffer()?,
-        };
-
-        // Select encoding based on current encoder and writer version (v1 or v2).
-        let encoding = if self.dict_encoder.is_some() {
-            self.props.dictionary_data_page_encoding()
-        } else {
-            self.encoder.encoding()
-        };
-
-        let max_def_level = self.descr.max_def_level();
-        let max_rep_level = self.descr.max_rep_level();
-
-        let page_statistics = if calculate_page_stat {
-            self.update_column_min_max();
-            self.num_column_nulls += self.num_page_nulls;
-            Some(self.make_page_statistics())
-        } else {
-            None
-        };
-
-        let compressed_page = match self.props.writer_version() {
-            WriterVersion::PARQUET_1_0 => {
-                let mut buffer = vec![];
-
-                if max_rep_level > 0 {
-                    buffer.extend_from_slice(
-                        &self.encode_levels_v1(
-                            Encoding::RLE,
-                            &self.rep_levels_sink[..],
-                            max_rep_level,
-                        )?[..],
-                    );
-                }
-
-                if max_def_level > 0 {
-                    buffer.extend_from_slice(
-                        &self.encode_levels_v1(
-                            Encoding::RLE,
-                            &self.def_levels_sink[..],
-                            max_def_level,
-                        )?[..],
-                    );
-                }
-
-                buffer.extend_from_slice(value_bytes.data());
-                let uncompressed_size = buffer.len();
-
-                if let Some(ref mut cmpr) = self.compressor {
-                    let mut compressed_buf = Vec::with_capacity(value_bytes.data().len());
-                    cmpr.compress(&buffer[..], &mut compressed_buf)?;
-                    buffer = compressed_buf;
-                }
-
-                let data_page = Page::DataPage {
-                    buf: ByteBufferPtr::new(buffer),
-                    num_values: self.num_buffered_values,
-                    encoding,
-                    def_level_encoding: Encoding::RLE,
-                    rep_level_encoding: Encoding::RLE,
-                    statistics: page_statistics,
-                };
-
-                CompressedPage::new(data_page, uncompressed_size)
-            }
-            WriterVersion::PARQUET_2_0 => {
-                let mut rep_levels_byte_len = 0;
-                let mut def_levels_byte_len = 0;
-                let mut buffer = vec![];
-
-                if max_rep_level > 0 {
-                    let levels =
-                        self.encode_levels_v2(&self.rep_levels_sink[..], max_rep_level)?;
-                    rep_levels_byte_len = levels.len();
-                    buffer.extend_from_slice(&levels[..]);
-                }
-
-                if max_def_level > 0 {
-                    let levels =
-                        self.encode_levels_v2(&self.def_levels_sink[..], max_def_level)?;
-                    def_levels_byte_len = levels.len();
-                    buffer.extend_from_slice(&levels[..]);
-                }
-
-                let uncompressed_size =
-                    rep_levels_byte_len + def_levels_byte_len + value_bytes.len();
-
-                // Data Page v2 compresses values only.
-                match self.compressor {
-                    Some(ref mut cmpr) => {
-                        cmpr.compress(value_bytes.data(), &mut buffer)?;
-                    }
-                    None => buffer.extend_from_slice(value_bytes.data()),
-                }
-
-                let data_page = Page::DataPageV2 {
-                    buf: ByteBufferPtr::new(buffer),
-                    num_values: self.num_buffered_values,
-                    encoding,
-                    num_nulls: self.num_buffered_values
-                        - self.num_buffered_encoded_values,
-                    num_rows: self.num_buffered_rows,
-                    def_levels_byte_len: def_levels_byte_len as u32,
-                    rep_levels_byte_len: rep_levels_byte_len as u32,
-                    is_compressed: self.compressor.is_some(),
-                    statistics: page_statistics,
-                };
-
-                CompressedPage::new(data_page, uncompressed_size)
-            }
-        };
-
-        // Check if we need to buffer data page or flush it to the sink directly.
-        if self.dict_encoder.is_some() {
-            self.data_pages.push_back(compressed_page);
-        } else {
-            self.write_data_page(compressed_page)?;
-        }
-
-        // Update total number of rows.
-        self.total_rows_written += self.num_buffered_rows as u64;
-
-        // Reset state.
-        self.rep_levels_sink.clear();
-        self.def_levels_sink.clear();
-        self.num_buffered_values = 0;
-        self.num_buffered_encoded_values = 0;
-        self.num_buffered_rows = 0;
-        self.min_page_value = None;
-        self.max_page_value = None;
-        self.num_page_nulls = 0;
-        self.page_distinct_count = None;
-
-        Ok(())
-    }
-
-    /// Finalises any outstanding data pages and flushes buffered data pages from
-    /// dictionary encoding into underlying sink.
-    #[inline]
-    fn flush_data_pages(&mut self) -> Result<()> {
-        // Write all outstanding data to a new page.
-        let calculate_page_stats =
-            self.min_page_value.is_some() && self.max_page_value.is_some();
-        if self.num_buffered_values > 0 {
-            self.add_data_page(calculate_page_stats)?;
-        }
-
-        while let Some(page) = self.data_pages.pop_front() {
-            self.write_data_page(page)?;
-        }
-
-        Ok(())
-    }
-
-    /// Assembles and writes column chunk metadata.
-    fn write_column_metadata(&mut self) -> Result<ColumnChunkMetaData> {
-        let total_compressed_size = self.total_compressed_size as i64;
-        let total_uncompressed_size = self.total_uncompressed_size as i64;
-        let num_values = self.total_num_values as i64;
-        let dict_page_offset = self.dictionary_page_offset.map(|v| v as i64);
-        // If data page offset is not set, then no pages have been written
-        let data_page_offset = self.data_page_offset.unwrap_or(0) as i64;
-
-        let file_offset;
-        let mut encodings = Vec::new();
-
-        if self.has_dictionary {
-            assert!(dict_page_offset.is_some(), "Dictionary offset is not set");
-            file_offset = dict_page_offset.unwrap() + total_compressed_size;
-            // NOTE: This should be in sync with writing dictionary pages.
-            encodings.push(self.props.dictionary_page_encoding());
-            encodings.push(self.props.dictionary_data_page_encoding());
-            // Fallback to alternative encoding, add it to the list.
-            if self.dict_encoder.is_none() {
-                encodings.push(self.encoder.encoding());
-            }
-        } else {
-            file_offset = data_page_offset + total_compressed_size;
-            encodings.push(self.encoder.encoding());
-        }
-        // We use only RLE level encoding for data page v1 and data page v2.
-        encodings.push(Encoding::RLE);
-
-        let statistics = self.make_column_statistics();
-        let metadata = ColumnChunkMetaData::builder(self.descr.clone())
-            .set_compression(self.codec)
-            .set_encodings(encodings)
-            .set_file_offset(file_offset)
-            .set_total_compressed_size(total_compressed_size)
-            .set_total_uncompressed_size(total_uncompressed_size)
-            .set_num_values(num_values)
-            .set_data_page_offset(data_page_offset)
-            .set_dictionary_page_offset(dict_page_offset)
-            .set_statistics(statistics)
-            .build()?;
-
-        self.page_writer.write_metadata(&metadata)?;
-
-        Ok(metadata)
-    }
-
-    /// Encodes definition or repetition levels for Data Page v1.
-    #[inline]
-    fn encode_levels_v1(
-        &self,
-        encoding: Encoding,
-        levels: &[i16],
-        max_level: i16,
-    ) -> Result<Vec<u8>> {
-        let size = max_buffer_size(encoding, max_level, levels.len());
-        let mut encoder = LevelEncoder::v1(encoding, max_level, vec![0; size]);
-        encoder.put(&levels)?;
-        encoder.consume()
-    }
-
-    /// Encodes definition or repetition levels for Data Page v2.
-    /// Encoding is always RLE.
-    #[inline]
-    fn encode_levels_v2(&self, levels: &[i16], max_level: i16) -> Result<Vec<u8>> {
-        let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
-        let mut encoder = LevelEncoder::v2(max_level, vec![0; size]);
-        encoder.put(&levels)?;
-        encoder.consume()
-    }
-
-    /// Writes compressed data page into underlying sink and updates global metrics.
-    #[inline]
-    fn write_data_page(&mut self, page: CompressedPage) -> Result<()> {
-        let page_spec = self.page_writer.write_page(page)?;
-        self.update_metrics_for_page(page_spec);
-        Ok(())
-    }
-
-    /// Writes dictionary page into underlying sink.
-    #[inline]
-    fn write_dictionary_page(&mut self) -> Result<()> {
-        let compressed_page = {
-            let encoder = self
-                .dict_encoder
-                .as_ref()
-                .ok_or_else(|| general_err!("Dictionary encoder is not set"))?;
-
-            let is_sorted = encoder.is_sorted();
-            let num_values = encoder.num_entries();
-            let mut values_buf = encoder.write_dict()?;
-            let uncompressed_size = values_buf.len();
-
-            if let Some(ref mut cmpr) = self.compressor {
-                let mut output_buf = Vec::with_capacity(uncompressed_size);
-                cmpr.compress(values_buf.data(), &mut output_buf)?;
-                values_buf = ByteBufferPtr::new(output_buf);
-            }
-
-            let dict_page = Page::DictionaryPage {
-                buf: values_buf,
-                num_values: num_values as u32,
-                encoding: self.props.dictionary_page_encoding(),
-                is_sorted,
-            };
-            CompressedPage::new(dict_page, uncompressed_size)
-        };
-
-        let page_spec = self.page_writer.write_page(compressed_page)?;
-        self.update_metrics_for_page(page_spec);
-        Ok(())
-    }
-
-    /// Updates column writer metrics with each page metadata.
-    #[inline]
-    fn update_metrics_for_page(&mut self, page_spec: PageWriteSpec) {
-        self.total_uncompressed_size += page_spec.uncompressed_size as u64;
-        self.total_compressed_size += page_spec.compressed_size as u64;
-        self.total_num_values += page_spec.num_values as u64;
-        self.total_bytes_written += page_spec.bytes_written;
-
-        match page_spec.page_type {
-            PageType::DATA_PAGE | PageType::DATA_PAGE_V2 => {
-                if self.data_page_offset.is_none() {
-                    self.data_page_offset = Some(page_spec.offset);
-                }
-            }
-            PageType::DICTIONARY_PAGE => {
-                assert!(
-                    self.dictionary_page_offset.is_none(),
-                    "Dictionary offset is already set"
-                );
-                self.dictionary_page_offset = Some(page_spec.offset);
-            }
-            _ => {}
-        }
-    }
-
-    /// Returns reference to the underlying page writer.
-    /// This method is intended to use in tests only.
-    fn get_page_writer_ref(&self) -> &dyn PageWriter {
-        self.page_writer.as_ref()
-    }
-
-    fn make_column_statistics(&self) -> Statistics {
-        self.make_typed_statistics(Level::Column)
-    }
-
-    fn make_page_statistics(&self) -> Statistics {
-        self.make_typed_statistics(Level::Page)
-    }
-
-    pub fn make_typed_statistics(&self, level: Level) -> Statistics {
-        let (min, max, distinct, nulls) = match level {
-            Level::Page => (
-                self.min_page_value.as_ref(),
-                self.max_page_value.as_ref(),
-                self.page_distinct_count,
-                self.num_page_nulls,
-            ),
-            Level::Column => (
-                self.min_column_value.as_ref(),
-                self.max_column_value.as_ref(),
-                self.column_distinct_count,
-                self.num_column_nulls,
-            ),
-        };
-        match self.descr.physical_type() {
-            Type::INT32 => gen_stats_section!(i32, int32, min, max, distinct, nulls),
-            Type::BOOLEAN => gen_stats_section!(i32, int32, min, max, distinct, nulls),
-            Type::INT64 => gen_stats_section!(i64, int64, min, max, distinct, nulls),
-            Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls),
-            Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls),
-            Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls),
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
-                let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
-                let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
-                Statistics::byte_array(min, max, distinct, nulls, false)
-            }
-        }
-    }
-
-    fn update_page_min_max(&mut self, val: &T::T) {
-        if self.min_page_value.as_ref().map_or(true, |min| min > val) {
-            self.min_page_value = Some(val.clone());
-        }
-        if self.max_page_value.as_ref().map_or(true, |max| max < val) {
-            self.max_page_value = Some(val.clone());
-        }
-    }
-
-    fn update_column_min_max(&mut self) {
-        if self
-            .min_column_value
-            .as_ref()
-            .map_or(true, |min| min > self.min_page_value.as_ref().unwrap())
-        {
-            self.min_column_value = self.min_page_value.clone();
-        }
-        if self
-            .max_column_value
-            .as_ref()
-            .map_or(true, |max| max < self.max_page_value.as_ref().unwrap())
-        {
-            self.max_column_value = self.max_page_value.clone();
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// Encoding support for column writer.
-// This mirrors parquet-mr default encodings for writes. See:
-// https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV1ValuesWriterFactory.java
-// https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV2ValuesWriterFactory.java
-
-/// Trait to define default encoding for types, including whether or not the type
-/// supports dictionary encoding.
-trait EncodingWriteSupport {
-    /// Returns true if dictionary is supported for column writer, false otherwise.
-    fn has_dictionary_support(props: &WriterProperties) -> bool;
-}
-
-/// Returns encoding for a column when no other encoding is provided in writer properties.
-fn fallback_encoding(kind: Type, props: &WriterProperties) -> Encoding {
-    match (kind, props.writer_version()) {
-        (Type::BOOLEAN, WriterVersion::PARQUET_2_0) => Encoding::RLE,
-        (Type::INT32, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
-        (Type::INT64, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
-        (Type::BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => {
-            Encoding::DELTA_BYTE_ARRAY
-        }
-        _ => Encoding::PLAIN,
-    }
-}
-
-/// Returns true if dictionary is supported for column writer, false otherwise.
-fn has_dictionary_support(kind: Type, props: &WriterProperties) -> bool {
-    match (kind, props.writer_version()) {
-        // Booleans do not support dict encoding and should use a fallback encoding.
-        (Type::BOOLEAN, _) => false,
-        // Dictionary encoding was not enabled in PARQUET 1.0
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_1_0) => false,
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => true,
-        _ => true,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use rand::distributions::uniform::SampleUniform;
-
-    use crate::column::{
-        page::PageReader,
-        reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl},
-    };
-    use crate::file::{
-        properties::WriterProperties, reader::SerializedPageReader,
-        writer::SerializedPageWriter,
-    };
-    use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
-    use crate::util::{
-        io::{FileSink, FileSource},
-        test_common::{get_temp_file, random_numbers_range},
-    };
-
-    use super::*;
-
-    #[test]
-    fn test_column_writer_inconsistent_def_rep_length() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 1, props);
-        let res = writer.write_batch(&[1, 2, 3, 4], Some(&[1, 1, 1]), Some(&[0, 0]));
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Inconsistent length of definition and repetition levels: 3 != 2"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_invalid_def_levels() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
-        let res = writer.write_batch(&[1, 2, 3, 4], None, None);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Definition levels are required, because max definition level = 1"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_invalid_rep_levels() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 1, props);
-        let res = writer.write_batch(&[1, 2, 3, 4], None, None);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Repetition levels are required, because max repetition level = 1"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_not_enough_values_to_write() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
-        let res = writer.write_batch(&[1, 2], Some(&[1, 1, 1, 1]), None);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Expected to write 4 values, but have only 2"
-            );
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary offset is already set")]
-    fn test_column_writer_write_only_one_dictionary_page() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
-        // First page should be correctly written.
-        let res = writer.write_dictionary_page();
-        assert!(res.is_ok());
-        writer.write_dictionary_page().unwrap();
-    }
-
-    #[test]
-    fn test_column_writer_error_when_writing_disabled_dictionary() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_dictionary_enabled(false)
-                .build(),
-        );
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
-        let res = writer.write_dictionary_page();
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Dictionary encoder is not set"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_boolean_type_does_not_support_dictionary() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_dictionary_enabled(true)
-                .build(),
-        );
-        let mut writer = get_test_column_writer::<BoolType>(page_writer, 0, 0, props);
-        writer
-            .write_batch(&[true, false, true, false], None, None)
-            .unwrap();
-
-        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
-        // PlainEncoder uses bit writer to write boolean values, which all fit into 1
-        // byte.
-        assert_eq!(bytes_written, 1);
-        assert_eq!(rows_written, 4);
-        assert_eq!(metadata.encodings(), &vec![Encoding::PLAIN, Encoding::RLE]);
-        assert_eq!(metadata.num_values(), 4); // just values
-        assert_eq!(metadata.dictionary_page_offset(), None);
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_bool() {
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[true, false],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[true, false],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[true, false],
-            None,
-            &[Encoding::RLE, Encoding::RLE],
-        );
-        check_encoding_write_support::<BoolType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[true, false],
-            None,
-            &[Encoding::RLE, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_int32() {
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int32Type>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::DELTA_BINARY_PACKED, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_int64() {
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1, 2],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int64Type>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1, 2],
-            None,
-            &[Encoding::DELTA_BINARY_PACKED, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_int96() {
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[Int96::from(vec![1, 2, 3])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[Int96::from(vec![1, 2, 3])],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[Int96::from(vec![1, 2, 3])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<Int96Type>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[Int96::from(vec![1, 2, 3])],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_float() {
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<FloatType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_double() {
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[1.0, 2.0],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<DoubleType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[1.0, 2.0],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_byte_array() {
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[ByteArray::from(vec![1u8])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[ByteArray::from(vec![1u8])],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[ByteArray::from(vec![1u8])],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<ByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[ByteArray::from(vec![1u8])],
-            None,
-            &[Encoding::DELTA_BYTE_ARRAY, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_default_encoding_support_fixed_len_byte_array() {
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            true,
-            &[ByteArray::from(vec![1u8]).into()],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_1_0,
-            false,
-            &[ByteArray::from(vec![1u8]).into()],
-            None,
-            &[Encoding::PLAIN, Encoding::RLE],
-        );
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            true,
-            &[ByteArray::from(vec![1u8]).into()],
-            Some(0),
-            &[Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE],
-        );
-        check_encoding_write_support::<FixedLenByteArrayType>(
-            WriterVersion::PARQUET_2_0,
-            false,
-            &[ByteArray::from(vec![1u8]).into()],
-            None,
-            &[Encoding::DELTA_BYTE_ARRAY, Encoding::RLE],
-        );
-    }
-
-    #[test]
-    fn test_column_writer_check_metadata() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
-
-        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
-        assert_eq!(bytes_written, 20);
-        assert_eq!(rows_written, 4);
-        assert_eq!(
-            metadata.encodings(),
-            &vec![Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE]
-        );
-        assert_eq!(metadata.num_values(), 8); // dictionary + value indexes
-        assert_eq!(metadata.compressed_size(), 20);
-        assert_eq!(metadata.uncompressed_size(), 20);
-        assert_eq!(metadata.data_page_offset(), 0);
-        assert_eq!(metadata.dictionary_page_offset(), Some(0));
-        if let Some(stats) = metadata.statistics() {
-            assert!(stats.has_min_max_set());
-            assert_eq!(stats.null_count(), 0);
-            assert_eq!(stats.distinct_count(), None);
-            if let Statistics::Int32(stats) = stats {
-                assert_eq!(stats.min(), &1);
-                assert_eq!(stats.max(), &4);
-            } else {
-                panic!("expecting Statistics::Int32");
-            }
-        } else {
-            panic!("metadata missing statistics");
-        }
-    }
-
-    #[test]
-    fn test_column_writer_precalculated_statistics() {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer
-            .write_batch_with_statistics(
-                &[1, 2, 3, 4],
-                None,
-                None,
-                &Some(-17),
-                &Some(9000),
-                Some(21),
-                Some(55),
-            )
-            .unwrap();
-
-        let (bytes_written, rows_written, metadata) = writer.close().unwrap();
-        assert_eq!(bytes_written, 20);
-        assert_eq!(rows_written, 4);
-        assert_eq!(
-            metadata.encodings(),
-            &vec![Encoding::PLAIN, Encoding::RLE_DICTIONARY, Encoding::RLE]
-        );
-        assert_eq!(metadata.num_values(), 8); // dictionary + value indexes
-        assert_eq!(metadata.compressed_size(), 20);
-        assert_eq!(metadata.uncompressed_size(), 20);
-        assert_eq!(metadata.data_page_offset(), 0);
-        assert_eq!(metadata.dictionary_page_offset(), Some(0));
-        if let Some(stats) = metadata.statistics() {
-            assert!(stats.has_min_max_set());
-            assert_eq!(stats.null_count(), 21);
-            assert_eq!(stats.distinct_count().unwrap_or(0), 55);
-            if let Statistics::Int32(stats) = stats {
-                assert_eq!(stats.min(), &-17);
-                assert_eq!(stats.max(), &9000);
-            } else {
-                panic!("expecting Statistics::Int32");
-            }
-        } else {
-            panic!("metadata missing statistics");
-        }
-    }
-
-    #[test]
-    fn test_column_writer_empty_column_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip::<Int32Type>("test_col_writer_rnd_1", props, &[], None, None);
-    }
-
-    #[test]
-    fn test_column_writer_non_nullable_values_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_2",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            0,
-            0,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_nullable_non_repeated_values_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip_random::<Int32Type>(
-            "test_column_writer_nullable_non_repeated_values_roundtrip",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            0,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_nullable_repeated_values_roundtrip() {
-        let props = WriterProperties::builder().build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_3",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_dictionary_fallback_small_data_page() {
-        let props = WriterProperties::builder()
-            .set_dictionary_pagesize_limit(32)
-            .set_data_pagesize_limit(32)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_4",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_small_write_batch_size() {
-        for i in &[1usize, 2, 5, 10, 11, 1023] {
-            let props = WriterProperties::builder().set_write_batch_size(*i).build();
-
-            column_roundtrip_random::<Int32Type>(
-                "test_col_writer_rnd_5",
-                props,
-                1024,
-                std::i32::MIN,
-                std::i32::MAX,
-                10,
-                10,
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_writer_dictionary_disabled_v1() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_1_0)
-            .set_dictionary_enabled(false)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_6",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_dictionary_disabled_v2() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_2_0)
-            .set_dictionary_enabled(false)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_7",
-            props,
-            1024,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_compression_v1() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_1_0)
-            .set_compression(Compression::SNAPPY)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_8",
-            props,
-            2048,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_compression_v2() {
-        let props = WriterProperties::builder()
-            .set_writer_version(WriterVersion::PARQUET_2_0)
-            .set_compression(Compression::SNAPPY)
-            .build();
-        column_roundtrip_random::<Int32Type>(
-            "test_col_writer_rnd_9",
-            props,
-            2048,
-            std::i32::MIN,
-            std::i32::MAX,
-            10,
-            10,
-        );
-    }
-
-    #[test]
-    fn test_column_writer_add_data_pages_with_dict() {
-        // ARROW-5129: Test verifies that we add data page in case of dictionary encoding
-        // and no fallback occurred so far.
-        let file = get_temp_file("test_column_writer_add_data_pages_with_dict", &[]);
-        let sink = FileSink::new(&file);
-        let page_writer = Box::new(SerializedPageWriter::new(sink));
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_data_pagesize_limit(15) // actually each page will have size 15-18 bytes
-                .set_write_batch_size(3) // write 3 values at a time
-                .build(),
-        );
-        let data = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
-        let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
-        writer.write_batch(data, None, None).unwrap();
-        let (bytes_written, _, _) = writer.close().unwrap();
-
-        // Read pages and check the sequence
-        let source = FileSource::new(&file, 0, bytes_written as usize);
-        let mut page_reader = Box::new(
-            SerializedPageReader::new(
-                source,
-                data.len() as i64,
-                Compression::UNCOMPRESSED,
-                Int32Type::get_physical_type(),
-            )
-            .unwrap(),
-        );
-        let mut res = Vec::new();
-        while let Some(page) = page_reader.get_next_page().unwrap() {
-            res.push((page.page_type(), page.num_values()));
-        }
-        assert_eq!(
-            res,
-            vec![
-                (PageType::DICTIONARY_PAGE, 10),
-                (PageType::DATA_PAGE, 3),
-                (PageType::DATA_PAGE, 3),
-                (PageType::DATA_PAGE, 3),
-                (PageType::DATA_PAGE, 1)
-            ]
-        );
-    }
-
-    /// Performs write-read roundtrip with randomly generated values and levels.
-    /// `max_size` is maximum number of values or levels (if `max_def_level` > 0) to write
-    /// for a column.
-    fn column_roundtrip_random<T: DataType>(
-        file_name: &str,
-        props: WriterProperties,
-        max_size: usize,
-        min_value: T::T,
-        max_value: T::T,
-        max_def_level: i16,
-        max_rep_level: i16,
-    ) where
-        T::T: PartialOrd + SampleUniform + Copy,
-    {
-        let mut num_values: usize = 0;
-
-        let mut buf: Vec<i16> = Vec::new();
-        let def_levels = if max_def_level > 0 {
-            random_numbers_range(max_size, 0, max_def_level + 1, &mut buf);
-            for &dl in &buf[..] {
-                if dl == max_def_level {
-                    num_values += 1;
-                }
-            }
-            Some(&buf[..])
-        } else {
-            num_values = max_size;
-            None
-        };
-
-        let mut buf: Vec<i16> = Vec::new();
-        let rep_levels = if max_rep_level > 0 {
-            random_numbers_range(max_size, 0, max_rep_level + 1, &mut buf);
-            Some(&buf[..])
-        } else {
-            None
-        };
-
-        let mut values: Vec<T::T> = Vec::new();
-        random_numbers_range(num_values, min_value, max_value, &mut values);
-
-        column_roundtrip::<T>(file_name, props, &values[..], def_levels, rep_levels);
-    }
-
-    /// Performs write-read roundtrip and asserts written values and levels.
-    fn column_roundtrip<'a, T: DataType>(
-        file_name: &'a str,
-        props: WriterProperties,
-        values: &[T::T],
-        def_levels: Option<&[i16]>,
-        rep_levels: Option<&[i16]>,
-    ) {
-        let file = get_temp_file(file_name, &[]);
-        let sink = FileSink::new(&file);
-        let page_writer = Box::new(SerializedPageWriter::new(sink));
-
-        let max_def_level = match def_levels {
-            Some(buf) => *buf.iter().max().unwrap_or(&0i16),
-            None => 0i16,
-        };
-
-        let max_rep_level = match rep_levels {
-            Some(buf) => *buf.iter().max().unwrap_or(&0i16),
-            None => 0i16,
-        };
-
-        let mut max_batch_size = values.len();
-        if let Some(levels) = def_levels {
-            max_batch_size = cmp::max(max_batch_size, levels.len());
-        }
-        if let Some(levels) = rep_levels {
-            max_batch_size = cmp::max(max_batch_size, levels.len());
-        }
-
-        let mut writer = get_test_column_writer::<T>(
-            page_writer,
-            max_def_level,
-            max_rep_level,
-            Arc::new(props),
-        );
-
-        let values_written = writer.write_batch(values, def_levels, rep_levels).unwrap();
-        assert_eq!(values_written, values.len());
-        let (bytes_written, rows_written, column_metadata) = writer.close().unwrap();
-
-        let source = FileSource::new(&file, 0, bytes_written as usize);
-        let page_reader = Box::new(
-            SerializedPageReader::new(
-                source,
-                column_metadata.num_values(),
-                column_metadata.compression(),
-                T::get_physical_type(),
-            )
-            .unwrap(),
-        );
-        let reader =
-            get_test_column_reader::<T>(page_reader, max_def_level, max_rep_level);
-
-        let mut actual_values = vec![T::T::default(); max_batch_size];
-        let mut actual_def_levels = def_levels.map(|_| vec![0i16; max_batch_size]);
-        let mut actual_rep_levels = rep_levels.map(|_| vec![0i16; max_batch_size]);
-
-        let (values_read, levels_read) = read_fully(
-            reader,
-            max_batch_size,
-            actual_def_levels.as_mut(),
-            actual_rep_levels.as_mut(),
-            actual_values.as_mut_slice(),
-        );
-
-        // Assert values, definition and repetition levels.
-
-        assert_eq!(&actual_values[..values_read], values);
-        match actual_def_levels {
-            Some(ref vec) => assert_eq!(Some(&vec[..levels_read]), def_levels),
-            None => assert_eq!(None, def_levels),
-        }
-        match actual_rep_levels {
-            Some(ref vec) => assert_eq!(Some(&vec[..levels_read]), rep_levels),
-            None => assert_eq!(None, rep_levels),
-        }
-
-        // Assert written rows.
-
-        if let Some(levels) = actual_rep_levels {
-            let mut actual_rows_written = 0;
-            for l in levels {
-                if l == 0 {
-                    actual_rows_written += 1;
-                }
-            }
-            assert_eq!(actual_rows_written, rows_written);
-        } else if actual_def_levels.is_some() {
-            assert_eq!(levels_read as u64, rows_written);
-        } else {
-            assert_eq!(values_read as u64, rows_written);
-        }
-    }
-
-    /// Performs write of provided values and returns column metadata of those values.
-    /// Used to test encoding support for column writer.
-    fn column_write_and_get_metadata<T: DataType>(
-        props: WriterProperties,
-        values: &[T::T],
-    ) -> ColumnChunkMetaData {
-        let page_writer = get_test_page_writer();
-        let props = Arc::new(props);
-        let mut writer = get_test_column_writer::<T>(page_writer, 0, 0, props);
-        writer.write_batch(values, None, None).unwrap();
-        let (_, _, metadata) = writer.close().unwrap();
-        metadata
-    }
-
-    // Function to use in tests for EncodingWriteSupport. This checks that dictionary
-    // offset and encodings to make sure that column writer uses provided by trait
-    // encodings.
-    fn check_encoding_write_support<T: DataType>(
-        version: WriterVersion,
-        dict_enabled: bool,
-        data: &[T::T],
-        dictionary_page_offset: Option<i64>,
-        encodings: &[Encoding],
-    ) {
-        let props = WriterProperties::builder()
-            .set_writer_version(version)
-            .set_dictionary_enabled(dict_enabled)
-            .build();
-        let meta = column_write_and_get_metadata::<T>(props, data);
-        assert_eq!(meta.dictionary_page_offset(), dictionary_page_offset);
-        assert_eq!(meta.encodings(), &encodings);
-    }
-
-    /// Reads one batch of data, considering that batch is large enough to capture all of
-    /// the values and levels.
-    fn read_fully<T: DataType>(
-        mut reader: ColumnReaderImpl<T>,
-        batch_size: usize,
-        mut def_levels: Option<&mut Vec<i16>>,
-        mut rep_levels: Option<&mut Vec<i16>>,
-        values: &mut [T::T],
-    ) -> (usize, usize) {
-        let actual_def_levels = def_levels.as_mut().map(|vec| &mut vec[..]);
-        let actual_rep_levels = rep_levels.as_mut().map(|vec| &mut vec[..]);
-        reader
-            .read_batch(batch_size, actual_def_levels, actual_rep_levels, values)
-            .unwrap()
-    }
-
-    /// Returns column writer.
-    fn get_test_column_writer<T: DataType>(
-        page_writer: Box<dyn PageWriter>,
-        max_def_level: i16,
-        max_rep_level: i16,
-        props: WriterPropertiesPtr,
-    ) -> ColumnWriterImpl<T> {
-        let descr = Arc::new(get_test_column_descr::<T>(max_def_level, max_rep_level));
-        let column_writer = get_column_writer(descr, props, page_writer);
-        get_typed_column_writer::<T>(column_writer)
-    }
-
-    /// Returns column reader.
-    fn get_test_column_reader<T: DataType>(
-        page_reader: Box<dyn PageReader>,
-        max_def_level: i16,
-        max_rep_level: i16,
-    ) -> ColumnReaderImpl<T> {
-        let descr = Arc::new(get_test_column_descr::<T>(max_def_level, max_rep_level));
-        let column_reader = get_column_reader(descr, page_reader);
-        get_typed_column_reader::<T>(column_reader)
-    }
-
-    /// Returns descriptor for primitive column.
-    fn get_test_column_descr<T: DataType>(
-        max_def_level: i16,
-        max_rep_level: i16,
-    ) -> ColumnDescriptor {
-        let path = ColumnPath::from("col");
-        let tpe = SchemaType::primitive_type_builder("col", T::get_physical_type())
-            // length is set for "encoding support" tests for FIXED_LEN_BYTE_ARRAY type,
-            // it should be no-op for other types
-            .with_length(1)
-            .build()
-            .unwrap();
-        ColumnDescriptor::new(Arc::new(tpe), max_def_level, max_rep_level, path)
-    }
-
-    /// Returns page writer that collects pages without serializing them.
-    fn get_test_page_writer() -> Box<dyn PageWriter> {
-        Box::new(TestPageWriter {})
-    }
-
-    struct TestPageWriter {}
-
-    impl PageWriter for TestPageWriter {
-        fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec> {
-            let mut res = PageWriteSpec::new();
-            res.page_type = page.page_type();
-            res.uncompressed_size = page.uncompressed_size();
-            res.compressed_size = page.compressed_size();
-            res.num_values = page.num_values();
-            res.offset = 0;
-            res.bytes_written = page.data().len() as u64;
-            Ok(res)
-        }
-
-        fn write_metadata(&mut self, _metadata: &ColumnChunkMetaData) -> Result<()> {
-            Ok(())
-        }
-
-        fn close(&mut self) -> Result<()> {
-            Ok(())
-        }
-    }
-}
diff --git a/rust/parquet/src/compression.rs b/rust/parquet/src/compression.rs
deleted file mode 100644
index a1155971fbd..00000000000
--- a/rust/parquet/src/compression.rs
+++ /dev/null
@@ -1,393 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains codec interface and supported codec implementations.
-//!
-//! See [`Compression`](crate::basic::Compression) enum for all available compression
-//! algorithms.
-//!
-//! # Example
-//!
-//! ```no_run
-//! use parquet::{basic::Compression, compression::create_codec};
-//!
-//! let mut codec = match create_codec(Compression::SNAPPY) {
-//!     Ok(Some(codec)) => codec,
-//!     _ => panic!(),
-//! };
-//!
-//! let data = vec![b'p', b'a', b'r', b'q', b'u', b'e', b't'];
-//! let mut compressed = vec![];
-//! codec.compress(&data[..], &mut compressed).unwrap();
-//!
-//! let mut output = vec![];
-//! codec.decompress(&compressed[..], &mut output).unwrap();
-//!
-//! assert_eq!(output, data);
-//! ```
-
-use crate::basic::Compression as CodecType;
-use crate::errors::{ParquetError, Result};
-
-/// Parquet compression codec interface.
-pub trait Codec {
-    /// Compresses data stored in slice `input_buf` and writes the compressed result
-    /// to `output_buf`.
-    /// Note that you'll need to call `clear()` before reusing the same `output_buf`
-    /// across different `compress` calls.
-    fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
-
-    /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
-    /// Returns the total number of bytes written.
-    fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>)
-        -> Result<usize>;
-}
-
-/// Given the compression type `codec`, returns a codec used to compress and decompress
-/// bytes for the compression type.
-/// This returns `None` if the codec type is `UNCOMPRESSED`.
-pub fn create_codec(codec: CodecType) -> Result<Option<Box<dyn Codec>>> {
-    match codec {
-        #[cfg(any(feature = "brotli", test))]
-        CodecType::BROTLI => Ok(Some(Box::new(BrotliCodec::new()))),
-        #[cfg(any(feature = "flate2", test))]
-        CodecType::GZIP => Ok(Some(Box::new(GZipCodec::new()))),
-        #[cfg(any(feature = "snap", test))]
-        CodecType::SNAPPY => Ok(Some(Box::new(SnappyCodec::new()))),
-        #[cfg(any(feature = "lz4", test))]
-        CodecType::LZ4 => Ok(Some(Box::new(LZ4Codec::new()))),
-        #[cfg(any(feature = "zstd", test))]
-        CodecType::ZSTD => Ok(Some(Box::new(ZSTDCodec::new()))),
-        CodecType::UNCOMPRESSED => Ok(None),
-        _ => Err(nyi_err!("The codec type {} is not supported yet", codec)),
-    }
-}
-
-#[cfg(any(feature = "snap", test))]
-mod snappy_codec {
-    use snap::raw::{decompress_len, max_compress_len, Decoder, Encoder};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    /// Codec for Snappy compression format.
-    pub struct SnappyCodec {
-        decoder: Decoder,
-        encoder: Encoder,
-    }
-
-    impl SnappyCodec {
-        /// Creates new Snappy compression codec.
-        pub(crate) fn new() -> Self {
-            Self {
-                decoder: Decoder::new(),
-                encoder: Encoder::new(),
-            }
-        }
-    }
-
-    impl Codec for SnappyCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let len = decompress_len(input_buf)?;
-            output_buf.resize(len, 0);
-            self.decoder
-                .decompress(input_buf, output_buf)
-                .map_err(|e| e.into())
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let output_buf_len = output_buf.len();
-            let required_len = max_compress_len(input_buf.len());
-            output_buf.resize(output_buf_len + required_len, 0);
-            let n = self
-                .encoder
-                .compress(input_buf, &mut output_buf[output_buf_len..])?;
-            output_buf.truncate(output_buf_len + n);
-            Ok(())
-        }
-    }
-}
-#[cfg(any(feature = "snap", test))]
-pub use snappy_codec::*;
-
-#[cfg(any(feature = "flate2", test))]
-mod gzip_codec {
-
-    use std::io::{Read, Write};
-
-    use flate2::{read, write, Compression};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    /// Codec for GZIP compression algorithm.
-    pub struct GZipCodec {}
-
-    impl GZipCodec {
-        /// Creates new GZIP compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    impl Codec for GZipCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let mut decoder = read::GzDecoder::new(input_buf);
-            decoder.read_to_end(output_buf).map_err(|e| e.into())
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = write::GzEncoder::new(output_buf, Compression::default());
-            encoder.write_all(input_buf)?;
-            encoder.try_finish().map_err(|e| e.into())
-        }
-    }
-}
-#[cfg(any(feature = "flate2", test))]
-pub use gzip_codec::*;
-
-#[cfg(any(feature = "brotli", test))]
-mod brotli_codec {
-
-    use std::io::{Read, Write};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
-    const BROTLI_DEFAULT_COMPRESSION_QUALITY: u32 = 1; // supported levels 0-9
-    const BROTLI_DEFAULT_LG_WINDOW_SIZE: u32 = 22; // recommended between 20-22
-
-    /// Codec for Brotli compression algorithm.
-    pub struct BrotliCodec {}
-
-    impl BrotliCodec {
-        /// Creates new Brotli compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    impl Codec for BrotliCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            brotli::Decompressor::new(input_buf, BROTLI_DEFAULT_BUFFER_SIZE)
-                .read_to_end(output_buf)
-                .map_err(|e| e.into())
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = brotli::CompressorWriter::new(
-                output_buf,
-                BROTLI_DEFAULT_BUFFER_SIZE,
-                BROTLI_DEFAULT_COMPRESSION_QUALITY,
-                BROTLI_DEFAULT_LG_WINDOW_SIZE,
-            );
-            encoder.write_all(input_buf)?;
-            encoder.flush().map_err(|e| e.into())
-        }
-    }
-}
-#[cfg(any(feature = "brotli", test))]
-pub use brotli_codec::*;
-
-#[cfg(any(feature = "lz4", test))]
-mod lz4_codec {
-    use std::io::{Read, Write};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    const LZ4_BUFFER_SIZE: usize = 4096;
-
-    /// Codec for LZ4 compression algorithm.
-    pub struct LZ4Codec {}
-
-    impl LZ4Codec {
-        /// Creates new LZ4 compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    impl Codec for LZ4Codec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let mut decoder = lz4::Decoder::new(input_buf)?;
-            let mut buffer: [u8; LZ4_BUFFER_SIZE] = [0; LZ4_BUFFER_SIZE];
-            let mut total_len = 0;
-            loop {
-                let len = decoder.read(&mut buffer)?;
-                if len == 0 {
-                    break;
-                }
-                total_len += len;
-                output_buf.write_all(&buffer[0..len])?;
-            }
-            Ok(total_len)
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = lz4::EncoderBuilder::new().build(output_buf)?;
-            let mut from = 0;
-            loop {
-                let to = std::cmp::min(from + LZ4_BUFFER_SIZE, input_buf.len());
-                encoder.write_all(&input_buf[from..to])?;
-                from += LZ4_BUFFER_SIZE;
-                if from >= input_buf.len() {
-                    break;
-                }
-            }
-            encoder.finish().1.map_err(|e| e.into())
-        }
-    }
-}
-#[cfg(any(feature = "lz4", test))]
-pub use lz4_codec::*;
-
-#[cfg(any(feature = "zstd", test))]
-mod zstd_codec {
-    use std::io::{self, Write};
-
-    use crate::compression::Codec;
-    use crate::errors::Result;
-
-    /// Codec for Zstandard compression algorithm.
-    pub struct ZSTDCodec {}
-
-    impl ZSTDCodec {
-        /// Creates new Zstandard compression codec.
-        pub(crate) fn new() -> Self {
-            Self {}
-        }
-    }
-
-    /// Compression level (1-21) for ZSTD. Choose 1 here for better compression speed.
-    const ZSTD_COMPRESSION_LEVEL: i32 = 1;
-
-    impl Codec for ZSTDCodec {
-        fn decompress(
-            &mut self,
-            input_buf: &[u8],
-            output_buf: &mut Vec<u8>,
-        ) -> Result<usize> {
-            let mut decoder = zstd::Decoder::new(input_buf)?;
-            match io::copy(&mut decoder, output_buf) {
-                Ok(n) => Ok(n as usize),
-                Err(e) => Err(e.into()),
-            }
-        }
-
-        fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = zstd::Encoder::new(output_buf, ZSTD_COMPRESSION_LEVEL)?;
-            encoder.write_all(input_buf)?;
-            match encoder.finish() {
-                Ok(_) => Ok(()),
-                Err(e) => Err(e.into()),
-            }
-        }
-    }
-}
-#[cfg(any(feature = "zstd", test))]
-pub use zstd_codec::*;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::util::test_common::*;
-
-    fn test_roundtrip(c: CodecType, data: &[u8]) {
-        let mut c1 = create_codec(c).unwrap().unwrap();
-        let mut c2 = create_codec(c).unwrap().unwrap();
-
-        // Compress with c1
-        let mut compressed = Vec::new();
-        let mut decompressed = Vec::new();
-        c1.compress(data, &mut compressed)
-            .expect("Error when compressing");
-
-        // Decompress with c2
-        let mut decompressed_size = c2
-            .decompress(compressed.as_slice(), &mut decompressed)
-            .expect("Error when decompressing");
-        assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
-        assert_eq!(data, decompressed.as_slice());
-
-        compressed.clear();
-
-        // Compress with c2
-        c2.compress(data, &mut compressed)
-            .expect("Error when compressing");
-
-        // Decompress with c1
-        decompressed_size = c1
-            .decompress(compressed.as_slice(), &mut decompressed)
-            .expect("Error when decompressing");
-        assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
-        assert_eq!(data, decompressed.as_slice());
-    }
-
-    fn test_codec(c: CodecType) {
-        let sizes = vec![100, 10000, 100000];
-        for size in sizes {
-            let data = random_bytes(size);
-            test_roundtrip(c, &data);
-        }
-    }
-
-    #[test]
-    fn test_codec_snappy() {
-        test_codec(CodecType::SNAPPY);
-    }
-
-    #[test]
-    fn test_codec_gzip() {
-        test_codec(CodecType::GZIP);
-    }
-
-    #[test]
-    fn test_codec_brotli() {
-        test_codec(CodecType::BROTLI);
-    }
-
-    #[test]
-    fn test_codec_lz4() {
-        test_codec(CodecType::LZ4);
-    }
-
-    #[test]
-    fn test_codec_zstd() {
-        test_codec(CodecType::ZSTD);
-    }
-}
diff --git a/rust/parquet/src/data_type.rs b/rust/parquet/src/data_type.rs
deleted file mode 100644
index aa1def3db97..00000000000
--- a/rust/parquet/src/data_type.rs
+++ /dev/null
@@ -1,1358 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Data types that connect Parquet physical types with their Rust-specific
-//! representations.
-use std::cmp::Ordering;
-use std::fmt;
-use std::mem;
-use std::ops::{Deref, DerefMut};
-use std::str::from_utf8;
-
-use byteorder::{BigEndian, ByteOrder};
-
-use crate::basic::Type;
-use crate::column::reader::{ColumnReader, ColumnReaderImpl};
-use crate::column::writer::{ColumnWriter, ColumnWriterImpl};
-use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{from_ne_slice, FromBytes},
-    memory::{ByteBuffer, ByteBufferPtr},
-};
-
-/// Rust representation for logical type INT96, value is backed by an array of `u32`.
-/// The type only takes 12 bytes, without extra padding.
-#[derive(Clone, Debug, PartialOrd)]
-pub struct Int96 {
-    value: Option<[u32; 3]>,
-}
-
-impl Int96 {
-    /// Creates new INT96 type struct with no data set.
-    pub fn new() -> Self {
-        Self { value: None }
-    }
-
-    /// Returns underlying data as slice of [`u32`].
-    #[inline]
-    pub fn data(&self) -> &[u32] {
-        self.value
-            .as_ref()
-            .expect("set_data should have been called")
-    }
-
-    /// Sets data for this INT96 type.
-    #[inline]
-    pub fn set_data(&mut self, elem0: u32, elem1: u32, elem2: u32) {
-        self.value = Some([elem0, elem1, elem2]);
-    }
-
-    /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch
-    pub fn to_i64(&self) -> i64 {
-        const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
-        const SECONDS_PER_DAY: i64 = 86_400;
-        const MILLIS_PER_SECOND: i64 = 1_000;
-
-        let day = self.data()[2] as i64;
-        let nanoseconds = ((self.data()[1] as i64) << 32) + self.data()[0] as i64;
-        let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY;
-
-        seconds * MILLIS_PER_SECOND + nanoseconds / 1_000_000
-    }
-}
-
-impl Default for Int96 {
-    fn default() -> Self {
-        Self { value: None }
-    }
-}
-
-impl PartialEq for Int96 {
-    fn eq(&self, other: &Int96) -> bool {
-        match (&self.value, &other.value) {
-            (Some(v1), Some(v2)) => v1 == v2,
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl From<Vec<u32>> for Int96 {
-    fn from(buf: Vec<u32>) -> Self {
-        assert_eq!(buf.len(), 3);
-        let mut result = Self::new();
-        result.set_data(buf[0], buf[1], buf[2]);
-        result
-    }
-}
-
-impl fmt::Display for Int96 {
-    #[cold]
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self.data())
-    }
-}
-
-/// Rust representation for BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY Parquet physical types.
-/// Value is backed by a byte buffer.
-#[derive(Clone, Debug)]
-pub struct ByteArray {
-    data: Option<ByteBufferPtr>,
-}
-
-impl PartialOrd for ByteArray {
-    fn partial_cmp(&self, other: &ByteArray) -> Option<Ordering> {
-        if self.data.is_some() && other.data.is_some() {
-            match self.len().cmp(&other.len()) {
-                Ordering::Greater => Some(Ordering::Greater),
-                Ordering::Less => Some(Ordering::Less),
-                Ordering::Equal => {
-                    for (v1, v2) in self.data().iter().zip(other.data().iter()) {
-                        match v1.cmp(v2) {
-                            Ordering::Greater => return Some(Ordering::Greater),
-                            Ordering::Less => return Some(Ordering::Less),
-                            _ => {}
-                        }
-                    }
-                    Some(Ordering::Equal)
-                }
-            }
-        } else {
-            None
-        }
-    }
-}
-
-impl ByteArray {
-    /// Creates new byte array with no data set.
-    #[inline]
-    pub fn new() -> Self {
-        ByteArray { data: None }
-    }
-
-    /// Gets length of the underlying byte buffer.
-    #[inline]
-    pub fn len(&self) -> usize {
-        assert!(self.data.is_some());
-        self.data.as_ref().unwrap().len()
-    }
-
-    /// Checks if the underlying buffer is empty.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    /// Returns slice of data.
-    #[inline]
-    pub fn data(&self) -> &[u8] {
-        self.data
-            .as_ref()
-            .expect("set_data should have been called")
-            .as_ref()
-    }
-
-    /// Set data from another byte buffer.
-    #[inline]
-    pub fn set_data(&mut self, data: ByteBufferPtr) {
-        self.data = Some(data);
-    }
-
-    /// Returns `ByteArray` instance with slice of values for a data.
-    #[inline]
-    pub fn slice(&self, start: usize, len: usize) -> Self {
-        Self::from(
-            self.data
-                .as_ref()
-                .expect("set_data should have been called")
-                .range(start, len),
-        )
-    }
-
-    pub fn as_utf8(&self) -> Result<&str> {
-        self.data
-            .as_ref()
-            .map(|ptr| ptr.as_ref())
-            .ok_or_else(|| general_err!("Can't convert empty byte array to utf8"))
-            .and_then(|bytes| from_utf8(bytes).map_err(|e| e.into()))
-    }
-}
-
-impl From<Vec<u8>> for ByteArray {
-    fn from(buf: Vec<u8>) -> ByteArray {
-        Self {
-            data: Some(ByteBufferPtr::new(buf)),
-        }
-    }
-}
-
-impl<'a> From<&'a str> for ByteArray {
-    fn from(s: &'a str) -> ByteArray {
-        let mut v = Vec::new();
-        v.extend_from_slice(s.as_bytes());
-        Self {
-            data: Some(ByteBufferPtr::new(v)),
-        }
-    }
-}
-
-impl From<ByteBufferPtr> for ByteArray {
-    fn from(ptr: ByteBufferPtr) -> ByteArray {
-        Self { data: Some(ptr) }
-    }
-}
-
-impl From<ByteBuffer> for ByteArray {
-    fn from(mut buf: ByteBuffer) -> ByteArray {
-        Self {
-            data: Some(buf.consume()),
-        }
-    }
-}
-
-impl Default for ByteArray {
-    fn default() -> Self {
-        ByteArray { data: None }
-    }
-}
-
-impl PartialEq for ByteArray {
-    fn eq(&self, other: &ByteArray) -> bool {
-        match (&self.data, &other.data) {
-            (Some(d1), Some(d2)) => d1.as_ref() == d2.as_ref(),
-            (None, None) => true,
-            _ => false,
-        }
-    }
-}
-
-impl fmt::Display for ByteArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self.data())
-    }
-}
-
-/// Wrapper type for performance reasons, this represents `FIXED_LEN_BYTE_ARRAY` but in all other
-/// considerations behaves the same as `ByteArray`
-///
-/// # Performance notes:
-/// This type is a little unfortunate, without it the compiler generates code that takes quite a
-/// big hit on the CPU pipeline. Essentially the previous version stalls awaiting the result of
-/// `T::get_physical_type() == Type::FIXED_LEN_BYTE_ARRAY`.
-///
-/// Its debatable if this is wanted, it is out of spec for what parquet documents as its base
-/// types, although there are code paths in the Rust (and potentially the C++) versions that
-/// warrant this.
-///
-/// With this wrapper type the compiler generates more targetted code paths matching the higher
-/// level logical types, removing the data-hazard from all decoding and encoding paths.
-#[repr(transparent)]
-#[derive(Clone, Debug, Default)]
-pub struct FixedLenByteArray(ByteArray);
-
-impl PartialEq for FixedLenByteArray {
-    fn eq(&self, other: &FixedLenByteArray) -> bool {
-        self.0.eq(&other.0)
-    }
-}
-
-impl PartialEq<ByteArray> for FixedLenByteArray {
-    fn eq(&self, other: &ByteArray) -> bool {
-        self.0.eq(other)
-    }
-}
-
-impl PartialEq<FixedLenByteArray> for ByteArray {
-    fn eq(&self, other: &FixedLenByteArray) -> bool {
-        self.eq(&other.0)
-    }
-}
-
-impl fmt::Display for FixedLenByteArray {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.0.fmt(f)
-    }
-}
-
-impl PartialOrd for FixedLenByteArray {
-    fn partial_cmp(&self, other: &FixedLenByteArray) -> Option<Ordering> {
-        self.0.partial_cmp(&other.0)
-    }
-}
-
-impl PartialOrd<FixedLenByteArray> for ByteArray {
-    fn partial_cmp(&self, other: &FixedLenByteArray) -> Option<Ordering> {
-        self.partial_cmp(&other.0)
-    }
-}
-
-impl PartialOrd<ByteArray> for FixedLenByteArray {
-    fn partial_cmp(&self, other: &ByteArray) -> Option<Ordering> {
-        self.0.partial_cmp(other)
-    }
-}
-
-impl Deref for FixedLenByteArray {
-    type Target = ByteArray;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl DerefMut for FixedLenByteArray {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-impl From<ByteArray> for FixedLenByteArray {
-    fn from(other: ByteArray) -> Self {
-        Self(other)
-    }
-}
-
-impl From<FixedLenByteArray> for ByteArray {
-    fn from(other: FixedLenByteArray) -> Self {
-        other.0
-    }
-}
-
-/// Rust representation for Decimal values.
-///
-/// This is not a representation of Parquet physical type, but rather a wrapper for
-/// DECIMAL logical type, and serves as container for raw parts of decimal values:
-/// unscaled value in bytes, precision and scale.
-#[derive(Clone, Debug)]
-pub enum Decimal {
-    /// Decimal backed by `i32`.
-    Int32 {
-        value: [u8; 4],
-        precision: i32,
-        scale: i32,
-    },
-    /// Decimal backed by `i64`.
-    Int64 {
-        value: [u8; 8],
-        precision: i32,
-        scale: i32,
-    },
-    /// Decimal backed by byte array.
-    Bytes {
-        value: ByteArray,
-        precision: i32,
-        scale: i32,
-    },
-}
-
-impl Decimal {
-    /// Creates new decimal value from `i32`.
-    pub fn from_i32(value: i32, precision: i32, scale: i32) -> Self {
-        let mut bytes = [0; 4];
-        BigEndian::write_i32(&mut bytes, value);
-        Decimal::Int32 {
-            value: bytes,
-            precision,
-            scale,
-        }
-    }
-
-    /// Creates new decimal value from `i64`.
-    pub fn from_i64(value: i64, precision: i32, scale: i32) -> Self {
-        let mut bytes = [0; 8];
-        BigEndian::write_i64(&mut bytes, value);
-        Decimal::Int64 {
-            value: bytes,
-            precision,
-            scale,
-        }
-    }
-
-    /// Creates new decimal value from `ByteArray`.
-    pub fn from_bytes(value: ByteArray, precision: i32, scale: i32) -> Self {
-        Decimal::Bytes {
-            value,
-            precision,
-            scale,
-        }
-    }
-
-    /// Returns bytes of unscaled value.
-    pub fn data(&self) -> &[u8] {
-        match *self {
-            Decimal::Int32 { ref value, .. } => value,
-            Decimal::Int64 { ref value, .. } => value,
-            Decimal::Bytes { ref value, .. } => value.data(),
-        }
-    }
-
-    /// Returns decimal precision.
-    pub fn precision(&self) -> i32 {
-        match *self {
-            Decimal::Int32 { precision, .. } => precision,
-            Decimal::Int64 { precision, .. } => precision,
-            Decimal::Bytes { precision, .. } => precision,
-        }
-    }
-
-    /// Returns decimal scale.
-    pub fn scale(&self) -> i32 {
-        match *self {
-            Decimal::Int32 { scale, .. } => scale,
-            Decimal::Int64 { scale, .. } => scale,
-            Decimal::Bytes { scale, .. } => scale,
-        }
-    }
-}
-
-impl Default for Decimal {
-    fn default() -> Self {
-        Self::from_i32(0, 0, 0)
-    }
-}
-
-impl PartialEq for Decimal {
-    fn eq(&self, other: &Decimal) -> bool {
-        self.precision() == other.precision()
-            && self.scale() == other.scale()
-            && self.data() == other.data()
-    }
-}
-
-/// Converts an instance of data type to a slice of bytes as `u8`.
-pub trait AsBytes {
-    /// Returns slice of bytes for this data type.
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Converts an slice of a data type to a slice of bytes.
-pub trait SliceAsBytes: Sized {
-    /// Returns slice of bytes for a slice of this data type.
-    fn slice_as_bytes(self_: &[Self]) -> &[u8];
-    /// Return the internal representation as a mutable slice
-    ///
-    /// # Safety
-    /// If modified you are _required_ to ensure the internal representation
-    /// is valid and correct for the actual raw data
-    unsafe fn slice_as_bytes_mut(self_: &mut [Self]) -> &mut [u8];
-}
-
-impl AsBytes for [u8] {
-    fn as_bytes(&self) -> &[u8] {
-        self
-    }
-}
-
-macro_rules! gen_as_bytes {
-    ($source_ty:ident) => {
-        impl AsBytes for $source_ty {
-            #[allow(clippy::size_of_in_element_count)]
-            fn as_bytes(&self) -> &[u8] {
-                unsafe {
-                    std::slice::from_raw_parts(
-                        self as *const $source_ty as *const u8,
-                        std::mem::size_of::<$source_ty>(),
-                    )
-                }
-            }
-        }
-
-        impl SliceAsBytes for $source_ty {
-            #[inline]
-            #[allow(clippy::size_of_in_element_count)]
-            fn slice_as_bytes(self_: &[Self]) -> &[u8] {
-                unsafe {
-                    std::slice::from_raw_parts(
-                        self_.as_ptr() as *const u8,
-                        std::mem::size_of::<$source_ty>() * self_.len(),
-                    )
-                }
-            }
-
-            #[inline]
-            #[allow(clippy::size_of_in_element_count)]
-            unsafe fn slice_as_bytes_mut(self_: &mut [Self]) -> &mut [u8] {
-                std::slice::from_raw_parts_mut(
-                    self_.as_mut_ptr() as *mut u8,
-                    std::mem::size_of::<$source_ty>() * self_.len(),
-                )
-            }
-        }
-    };
-}
-
-gen_as_bytes!(i8);
-gen_as_bytes!(i16);
-gen_as_bytes!(i32);
-gen_as_bytes!(i64);
-gen_as_bytes!(u8);
-gen_as_bytes!(u16);
-gen_as_bytes!(u32);
-gen_as_bytes!(u64);
-gen_as_bytes!(f32);
-gen_as_bytes!(f64);
-
-macro_rules! unimplemented_slice_as_bytes {
-    ($ty: ty) => {
-        impl SliceAsBytes for $ty {
-            fn slice_as_bytes(_self: &[Self]) -> &[u8] {
-                unimplemented!()
-            }
-
-            unsafe fn slice_as_bytes_mut(_self: &mut [Self]) -> &mut [u8] {
-                unimplemented!()
-            }
-        }
-    };
-}
-
-// TODO - Can Int96 and bool be implemented in these terms?
-unimplemented_slice_as_bytes!(Int96);
-unimplemented_slice_as_bytes!(bool);
-unimplemented_slice_as_bytes!(ByteArray);
-unimplemented_slice_as_bytes!(FixedLenByteArray);
-
-impl AsBytes for bool {
-    fn as_bytes(&self) -> &[u8] {
-        unsafe { std::slice::from_raw_parts(self as *const bool as *const u8, 1) }
-    }
-}
-
-impl AsBytes for Int96 {
-    fn as_bytes(&self) -> &[u8] {
-        unsafe {
-            std::slice::from_raw_parts(self.data() as *const [u32] as *const u8, 12)
-        }
-    }
-}
-
-impl AsBytes for ByteArray {
-    fn as_bytes(&self) -> &[u8] {
-        self.data()
-    }
-}
-
-impl AsBytes for FixedLenByteArray {
-    fn as_bytes(&self) -> &[u8] {
-        self.data()
-    }
-}
-
-impl AsBytes for Decimal {
-    fn as_bytes(&self) -> &[u8] {
-        self.data()
-    }
-}
-
-impl AsBytes for Vec<u8> {
-    fn as_bytes(&self) -> &[u8] {
-        self.as_slice()
-    }
-}
-
-impl<'a> AsBytes for &'a str {
-    fn as_bytes(&self) -> &[u8] {
-        (self as &str).as_bytes()
-    }
-}
-
-impl AsBytes for str {
-    fn as_bytes(&self) -> &[u8] {
-        (self as &str).as_bytes()
-    }
-}
-
-pub(crate) mod private {
-    use crate::encodings::decoding::PlainDecoderDetails;
-    use crate::util::bit_util::{BitReader, BitWriter};
-    use crate::util::memory::ByteBufferPtr;
-
-    use byteorder::ByteOrder;
-    use std::convert::TryInto;
-
-    use super::{ParquetError, Result, SliceAsBytes};
-
-    pub type BitIndex = u64;
-
-    /// Sealed trait to start to remove specialisation from implementations
-    ///
-    /// This is done to force the associated value type to be unimplementable outside of this
-    /// crate, and thus hint to the type system (and end user) traits are public for the contract
-    /// and not for extension.
-    pub trait ParquetValueType:
-        std::cmp::PartialEq
-        + std::fmt::Debug
-        + std::fmt::Display
-        + std::default::Default
-        + std::clone::Clone
-        + super::AsBytes
-        + super::FromBytes
-        + super::SliceAsBytes
-        + PartialOrd
-    {
-        /// Encode the value directly from a higher level encoder
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            bit_writer: &mut BitWriter,
-        ) -> Result<()>;
-
-        /// Establish the data that will be decoded in a buffer
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        );
-
-        /// Decode the value from a given buffer for a higher level decoder
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize>;
-
-        /// Return the encoded size for a type
-        fn dict_encoding_size(&self) -> (usize, usize) {
-            (std::mem::size_of::<Self>(), 1)
-        }
-
-        /// Return the value as i64 if possible
-        ///
-        /// This is essentially the same as `std::convert::TryInto<i64>` but can
-        /// implemented for `f32` and `f64`, types that would fail orphan rules
-        fn as_i64(&self) -> Result<i64> {
-            Err(general_err!("Type cannot be converted to i64"))
-        }
-
-        /// Return the value as u64 if possible
-        ///
-        /// This is essentially the same as `std::convert::TryInto<u64>` but can
-        /// implemented for `f32` and `f64`, types that would fail orphan rules
-        fn as_u64(&self) -> Result<u64> {
-            self.as_i64()
-                .map_err(|_| general_err!("Type cannot be converted to u64"))
-                .map(|x| x as u64)
-        }
-
-        /// Return the value as an Any to allow for downcasts without transmutation
-        fn as_any(&self) -> &dyn std::any::Any;
-
-        /// Return the value as an mutable Any to allow for downcasts without transmutation
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any;
-    }
-
-    impl ParquetValueType for bool {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            _: &mut W,
-            bit_writer: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                bit_writer.put_value(*value as u64, 1);
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.bit_reader.replace(BitReader::new(data));
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            let bit_reader = decoder.bit_reader.as_mut().unwrap();
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            let values_read = bit_reader.get_batch(&mut buffer[..num_values], 1);
-            decoder.num_values -= values_read;
-            Ok(values_read)
-        }
-
-        #[inline]
-        fn as_i64(&self) -> Result<i64> {
-            Ok(*self as i64)
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-
-    /// Hopelessly unsafe function that emulates `num::as_ne_bytes`
-    ///
-    /// It is not recommended to use this outside of this private module as, while it
-    /// _should_ work for primitive values, it is little better than a transmutation
-    /// and can act as a backdoor into mis-interpreting types as arbitary byte slices
-    #[inline]
-    fn as_raw<'a, T>(value: *const T) -> &'a [u8] {
-        unsafe {
-            let value = value as *const u8;
-            std::slice::from_raw_parts(value, std::mem::size_of::<T>())
-        }
-    }
-
-    macro_rules! impl_from_raw {
-        ($ty: ty, $self: ident => $as_i64: block) => {
-            impl ParquetValueType for $ty {
-                #[inline]
-                fn encode<W: std::io::Write>(values: &[Self], writer: &mut W, _: &mut BitWriter) -> Result<()> {
-                    let raw = unsafe {
-                        std::slice::from_raw_parts(
-                            values.as_ptr() as *const u8,
-                            std::mem::size_of::<$ty>() * values.len(),
-                        )
-                    };
-                    writer.write_all(raw)?;
-
-                    Ok(())
-                }
-
-                #[inline]
-                fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
-                    decoder.data.replace(data);
-                    decoder.start = 0;
-                    decoder.num_values = num_values;
-                }
-
-                #[inline]
-                fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
-                    let data = decoder.data.as_ref().expect("set_data should have been called");
-                    let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-                    let bytes_left = data.len() - decoder.start;
-                    let bytes_to_decode = std::mem::size_of::<Self>() * num_values;
-
-                    if bytes_left < bytes_to_decode {
-                        return Err(eof_err!("Not enough bytes to decode"));
-                    }
-
-                    // SAFETY: Raw types should be as per the standard rust bit-vectors
-                    unsafe {
-                        let raw_buffer = &mut Self::slice_as_bytes_mut(buffer)[..bytes_to_decode];
-                        raw_buffer.copy_from_slice(data.range(decoder.start, bytes_to_decode).as_ref());
-                    };
-                    decoder.start += bytes_to_decode;
-                    decoder.num_values -= num_values;
-
-                    Ok(num_values)
-                }
-
-                #[inline]
-                fn as_i64(&$self) -> Result<i64> {
-                    $as_i64
-                }
-
-                #[inline]
-                fn as_any(&self) -> &dyn std::any::Any {
-                    self
-                }
-
-                #[inline]
-                fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-                    self
-                }
-            }
-        }
-    }
-
-    impl_from_raw!(i32, self => { Ok(*self as i64) });
-    impl_from_raw!(i64, self => { Ok(*self) });
-    impl_from_raw!(f32, self => { Err(general_err!("Type cannot be converted to i64")) });
-    impl_from_raw!(f64, self => { Err(general_err!("Type cannot be converted to i64")) });
-
-    impl ParquetValueType for super::Int96 {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            _: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                let raw = unsafe {
-                    std::slice::from_raw_parts(
-                        value.data() as *const [u32] as *const u8,
-                        12,
-                    )
-                };
-                writer.write_all(raw)?;
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.data.replace(data);
-            decoder.start = 0;
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            // TODO - Remove the duplication between this and the general slice method
-            let data = decoder
-                .data
-                .as_ref()
-                .expect("set_data should have been called");
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            let bytes_left = data.len() - decoder.start;
-            let bytes_to_decode = 12 * num_values;
-
-            if bytes_left < bytes_to_decode {
-                return Err(eof_err!("Not enough bytes to decode"));
-            }
-
-            let data_range = data.range(decoder.start, bytes_to_decode);
-            let bytes: &[u8] = data_range.data();
-            decoder.start += bytes_to_decode;
-
-            let mut pos = 0; // position in byte array
-            for i in 0..num_values {
-                let elem0 = byteorder::LittleEndian::read_u32(&bytes[pos..pos + 4]);
-                let elem1 = byteorder::LittleEndian::read_u32(&bytes[pos + 4..pos + 8]);
-                let elem2 = byteorder::LittleEndian::read_u32(&bytes[pos + 8..pos + 12]);
-
-                buffer[i]
-                    .as_mut_any()
-                    .downcast_mut::<Self>()
-                    .unwrap()
-                    .set_data(elem0, elem1, elem2);
-
-                pos += 12;
-            }
-            decoder.num_values -= num_values;
-
-            Ok(num_values)
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-
-    // TODO - Why does macro importing fail?
-    /// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-    /// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-    /// compile.
-    /// This is copied and modified from byteorder crate.
-    macro_rules! read_num_bytes {
-        ($ty:ty, $size:expr, $src:expr) => {{
-            assert!($size <= $src.len());
-            let mut buffer =
-                <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
-            buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
-            <$ty>::from_ne_bytes(buffer)
-        }};
-    }
-
-    impl ParquetValueType for super::ByteArray {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            _: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                let len: u32 = value.len().try_into().unwrap();
-                writer.write_all(&len.to_ne_bytes())?;
-                let raw = value.data();
-                writer.write_all(raw)?;
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.data.replace(data);
-            decoder.start = 0;
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            let data = decoder
-                .data
-                .as_mut()
-                .expect("set_data should have been called");
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            for i in 0..num_values {
-                let len: usize =
-                    read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
-                        as usize;
-                decoder.start += std::mem::size_of::<u32>();
-
-                if data.len() < decoder.start + len {
-                    return Err(eof_err!("Not enough bytes to decode"));
-                }
-
-                let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
-
-                val.set_data(data.range(decoder.start, len));
-                decoder.start += len;
-            }
-            decoder.num_values -= num_values;
-
-            Ok(num_values)
-        }
-
-        #[inline]
-        fn dict_encoding_size(&self) -> (usize, usize) {
-            (std::mem::size_of::<u32>(), self.len())
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-
-    impl ParquetValueType for super::FixedLenByteArray {
-        #[inline]
-        fn encode<W: std::io::Write>(
-            values: &[Self],
-            writer: &mut W,
-            _: &mut BitWriter,
-        ) -> Result<()> {
-            for value in values {
-                let raw = value.data();
-                writer.write_all(raw)?;
-            }
-            Ok(())
-        }
-
-        #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
-            decoder.data.replace(data);
-            decoder.start = 0;
-            decoder.num_values = num_values;
-        }
-
-        #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
-            assert!(decoder.type_length > 0);
-
-            let data = decoder
-                .data
-                .as_mut()
-                .expect("set_data should have been called");
-            let num_values = std::cmp::min(buffer.len(), decoder.num_values);
-            for i in 0..num_values {
-                let len = decoder.type_length as usize;
-
-                if data.len() < decoder.start + len {
-                    return Err(eof_err!("Not enough bytes to decode"));
-                }
-
-                let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
-
-                val.set_data(data.range(decoder.start, len));
-                decoder.start += len;
-            }
-            decoder.num_values -= num_values;
-
-            Ok(num_values)
-        }
-
-        #[inline]
-        fn dict_encoding_size(&self) -> (usize, usize) {
-            (std::mem::size_of::<u32>(), self.len())
-        }
-
-        #[inline]
-        fn as_any(&self) -> &dyn std::any::Any {
-            self
-        }
-
-        #[inline]
-        fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
-            self
-        }
-    }
-}
-
-/// Contains the Parquet physical type information as well as the Rust primitive type
-/// presentation.
-pub trait DataType: 'static {
-    type T: private::ParquetValueType;
-
-    /// Returns Parquet physical type.
-    fn get_physical_type() -> Type;
-
-    /// Returns size in bytes for Rust representation of the physical type.
-    fn get_type_size() -> usize;
-
-    fn get_column_reader(column_writer: ColumnReader) -> Option<ColumnReaderImpl<Self>>
-    where
-        Self: Sized;
-
-    fn get_column_writer(column_writer: ColumnWriter) -> Option<ColumnWriterImpl<Self>>
-    where
-        Self: Sized;
-
-    fn get_column_writer_ref(
-        column_writer: &ColumnWriter,
-    ) -> Option<&ColumnWriterImpl<Self>>
-    where
-        Self: Sized;
-
-    fn get_column_writer_mut(
-        column_writer: &mut ColumnWriter,
-    ) -> Option<&mut ColumnWriterImpl<Self>>
-    where
-        Self: Sized;
-}
-
-// Workaround bug in specialization
-pub trait SliceAsBytesDataType: DataType
-where
-    Self::T: SliceAsBytes,
-{
-}
-
-impl<T> SliceAsBytesDataType for T
-where
-    T: DataType,
-    <T as DataType>::T: SliceAsBytes,
-{
-}
-
-macro_rules! make_type {
-    ($name:ident, $physical_ty:path, $reader_ident: ident, $writer_ident: ident, $native_ty:ty, $size:expr) => {
-        #[derive(Clone)]
-        pub struct $name {}
-
-        impl DataType for $name {
-            type T = $native_ty;
-
-            fn get_physical_type() -> Type {
-                $physical_ty
-            }
-
-            fn get_type_size() -> usize {
-                $size
-            }
-
-            fn get_column_reader(
-                column_writer: ColumnReader,
-            ) -> Option<ColumnReaderImpl<Self>> {
-                match column_writer {
-                    ColumnReader::$reader_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-
-            fn get_column_writer(
-                column_writer: ColumnWriter,
-            ) -> Option<ColumnWriterImpl<Self>> {
-                match column_writer {
-                    ColumnWriter::$writer_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-
-            fn get_column_writer_ref(
-                column_writer: &ColumnWriter,
-            ) -> Option<&ColumnWriterImpl<Self>> {
-                match column_writer {
-                    ColumnWriter::$writer_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-
-            fn get_column_writer_mut(
-                column_writer: &mut ColumnWriter,
-            ) -> Option<&mut ColumnWriterImpl<Self>> {
-                match column_writer {
-                    ColumnWriter::$writer_ident(w) => Some(w),
-                    _ => None,
-                }
-            }
-        }
-    };
-}
-
-// Generate struct definitions for all physical types
-
-make_type!(
-    BoolType,
-    Type::BOOLEAN,
-    BoolColumnReader,
-    BoolColumnWriter,
-    bool,
-    1
-);
-make_type!(
-    Int32Type,
-    Type::INT32,
-    Int32ColumnReader,
-    Int32ColumnWriter,
-    i32,
-    4
-);
-make_type!(
-    Int64Type,
-    Type::INT64,
-    Int64ColumnReader,
-    Int64ColumnWriter,
-    i64,
-    8
-);
-make_type!(
-    Int96Type,
-    Type::INT96,
-    Int96ColumnReader,
-    Int96ColumnWriter,
-    Int96,
-    mem::size_of::<Int96>()
-);
-make_type!(
-    FloatType,
-    Type::FLOAT,
-    FloatColumnReader,
-    FloatColumnWriter,
-    f32,
-    4
-);
-make_type!(
-    DoubleType,
-    Type::DOUBLE,
-    DoubleColumnReader,
-    DoubleColumnWriter,
-    f64,
-    8
-);
-make_type!(
-    ByteArrayType,
-    Type::BYTE_ARRAY,
-    ByteArrayColumnReader,
-    ByteArrayColumnWriter,
-    ByteArray,
-    mem::size_of::<ByteArray>()
-);
-make_type!(
-    FixedLenByteArrayType,
-    Type::FIXED_LEN_BYTE_ARRAY,
-    FixedLenByteArrayColumnReader,
-    FixedLenByteArrayColumnWriter,
-    FixedLenByteArray,
-    mem::size_of::<FixedLenByteArray>()
-);
-
-impl FromBytes for Int96 {
-    type Buffer = [u8; 12];
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unimplemented!()
-    }
-    fn from_be_bytes(_bs: Self::Buffer) -> Self {
-        unimplemented!()
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        let mut i = Int96::new();
-        i.set_data(
-            from_ne_slice(&bs[0..4]),
-            from_ne_slice(&bs[4..8]),
-            from_ne_slice(&bs[8..12]),
-        );
-        i
-    }
-}
-
-// FIXME Needed to satisfy the constraint of many decoding functions but ByteArray does not
-// appear to actual be converted directly from bytes
-impl FromBytes for ByteArray {
-    type Buffer = [u8; 8];
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_be_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        ByteArray::from(bs.to_vec())
-    }
-}
-
-impl FromBytes for FixedLenByteArray {
-    type Buffer = [u8; 8];
-
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_be_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        Self(ByteArray::from(bs.to_vec()))
-    }
-}
-
-/// Macro to reduce repetition in making type assertions on the physical type against `T`
-macro_rules! ensure_phys_ty {
-    ($($ty: pat)|+ , $err: literal) => {
-        match T::get_physical_type() {
-            $($ty => (),)*
-            _ => panic!($err),
-        };
-    }
-}
-
-#[cfg(test)]
-#[allow(clippy::float_cmp, clippy::approx_constant)]
-mod tests {
-    use super::*;
-
-    #[test]
-    #[allow(clippy::string_lit_as_bytes)]
-    fn test_as_bytes() {
-        assert_eq!(false.as_bytes(), &[0]);
-        assert_eq!(true.as_bytes(), &[1]);
-        assert_eq!(7_i32.as_bytes(), &[7, 0, 0, 0]);
-        assert_eq!(555_i32.as_bytes(), &[43, 2, 0, 0]);
-        assert_eq!(555_u32.as_bytes(), &[43, 2, 0, 0]);
-        assert_eq!(i32::max_value().as_bytes(), &[255, 255, 255, 127]);
-        assert_eq!(i32::min_value().as_bytes(), &[0, 0, 0, 128]);
-        assert_eq!(7_i64.as_bytes(), &[7, 0, 0, 0, 0, 0, 0, 0]);
-        assert_eq!(555_i64.as_bytes(), &[43, 2, 0, 0, 0, 0, 0, 0]);
-        assert_eq!(
-            (i64::max_value()).as_bytes(),
-            &[255, 255, 255, 255, 255, 255, 255, 127]
-        );
-        assert_eq!((i64::min_value()).as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 128]);
-        assert_eq!(3.14_f32.as_bytes(), &[195, 245, 72, 64]);
-        assert_eq!(3.14_f64.as_bytes(), &[31, 133, 235, 81, 184, 30, 9, 64]);
-        assert_eq!("hello".as_bytes(), &[b'h', b'e', b'l', b'l', b'o']);
-        assert_eq!(
-            Vec::from("hello".as_bytes()).as_bytes(),
-            &[b'h', b'e', b'l', b'l', b'o']
-        );
-
-        // Test Int96
-        let i96 = Int96::from(vec![1, 2, 3]);
-        assert_eq!(i96.as_bytes(), &[1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]);
-
-        // Test ByteArray
-        let ba = ByteArray::from(vec![1, 2, 3]);
-        assert_eq!(ba.as_bytes(), &[1, 2, 3]);
-
-        // Test Decimal
-        let decimal = Decimal::from_i32(123, 5, 2);
-        assert_eq!(decimal.as_bytes(), &[0, 0, 0, 123]);
-        let decimal = Decimal::from_i64(123, 5, 2);
-        assert_eq!(decimal.as_bytes(), &[0, 0, 0, 0, 0, 0, 0, 123]);
-        let decimal = Decimal::from_bytes(ByteArray::from(vec![1, 2, 3]), 5, 2);
-        assert_eq!(decimal.as_bytes(), &[1, 2, 3]);
-    }
-
-    #[test]
-    fn test_int96_from() {
-        assert_eq!(
-            Int96::from(vec![1, 12345, 1234567890]).data(),
-            &[1, 12345, 1234567890]
-        );
-    }
-
-    #[test]
-    fn test_byte_array_from() {
-        assert_eq!(
-            ByteArray::from(vec![b'A', b'B', b'C']).data(),
-            &[b'A', b'B', b'C']
-        );
-        assert_eq!(ByteArray::from("ABC").data(), &[b'A', b'B', b'C']);
-        assert_eq!(
-            ByteArray::from(ByteBufferPtr::new(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(),
-            &[1u8, 2u8, 3u8, 4u8, 5u8]
-        );
-        let mut buf = ByteBuffer::new();
-        buf.set_data(vec![6u8, 7u8, 8u8, 9u8, 10u8]);
-        assert_eq!(ByteArray::from(buf).data(), &[6u8, 7u8, 8u8, 9u8, 10u8]);
-    }
-
-    #[test]
-    fn test_decimal_partial_eq() {
-        assert_eq!(Decimal::default(), Decimal::from_i32(0, 0, 0));
-        assert_eq!(Decimal::from_i32(222, 5, 2), Decimal::from_i32(222, 5, 2));
-        assert_eq!(
-            Decimal::from_bytes(ByteArray::from(vec![0, 0, 0, 3]), 5, 2),
-            Decimal::from_i32(3, 5, 2)
-        );
-
-        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(111, 5, 2));
-        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(222, 6, 2));
-        assert!(Decimal::from_i32(222, 5, 2) != Decimal::from_i32(222, 5, 3));
-
-        assert!(Decimal::from_i64(222, 5, 2) != Decimal::from_i32(222, 5, 2));
-    }
-
-    #[test]
-    fn test_byte_array_ord() {
-        let ba1 = ByteArray::from(vec![1, 2, 3]);
-        let ba11 = ByteArray::from(vec![1, 2, 3]);
-        let ba2 = ByteArray::from(vec![3, 4]);
-        let ba3 = ByteArray::from(vec![1, 2, 4]);
-        let ba4 = ByteArray::from(vec![]);
-        let ba5 = ByteArray::from(vec![2, 2, 3]);
-
-        assert!(ba1 > ba2);
-        assert!(ba3 > ba1);
-        assert!(ba1 > ba4);
-        assert_eq!(ba1, ba11);
-        assert!(ba5 > ba1);
-    }
-}
diff --git a/rust/parquet/src/encodings/decoding.rs b/rust/parquet/src/encodings/decoding.rs
deleted file mode 100644
index b73ebf0285c..00000000000
--- a/rust/parquet/src/encodings/decoding.rs
+++ /dev/null
@@ -1,1387 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains all supported decoders for Parquet.
-
-use std::{cmp, marker::PhantomData, mem};
-
-use super::rle::RleDecoder;
-
-use crate::basic::*;
-use crate::data_type::private::*;
-use crate::data_type::*;
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::{
-    bit_util::{self, BitReader, FromBytes},
-    memory::{ByteBuffer, ByteBufferPtr},
-};
-
-// ----------------------------------------------------------------------
-// Decoders
-
-/// A Parquet decoder for the data type `T`.
-pub trait Decoder<T: DataType> {
-    /// Sets the data to decode to be `data`, which should contain `num_values` of values
-    /// to decode.
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()>;
-
-    /// Consumes values from this decoder and write the results to `buffer`. This will try
-    /// to fill up `buffer`.
-    ///
-    /// Returns the actual number of values decoded, which should be equal to
-    /// `buffer.len()` unless the remaining number of values is less than
-    /// `buffer.len()`.
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize>;
-
-    /// Consume values from this decoder and write the results to `buffer`, leaving
-    /// "spaces" for null values.
-    ///
-    /// `null_count` is the number of nulls we expect to see in `buffer`, after reading.
-    /// `valid_bits` stores the valid bit for each value in the buffer. It should contain
-    ///   at least number of bits that equal to `buffer.len()`.
-    ///
-    /// Returns the actual number of values decoded.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `null_count` is greater than `buffer.len()`.
-    fn get_spaced(
-        &mut self,
-        buffer: &mut [T::T],
-        null_count: usize,
-        valid_bits: &[u8],
-    ) -> Result<usize> {
-        assert!(buffer.len() >= null_count);
-
-        // TODO: check validity of the input arguments?
-        if null_count == 0 {
-            return self.get(buffer);
-        }
-
-        let num_values = buffer.len();
-        let values_to_read = num_values - null_count;
-        let values_read = self.get(buffer)?;
-        if values_read != values_to_read {
-            return Err(general_err!(
-                "Number of values read: {}, doesn't match expected: {}",
-                values_read,
-                values_to_read
-            ));
-        }
-        let mut values_to_move = values_read;
-        for i in (0..num_values).rev() {
-            if bit_util::get_bit(valid_bits, i) {
-                values_to_move -= 1;
-                buffer.swap(i, values_to_move);
-            }
-        }
-
-        Ok(num_values)
-    }
-
-    /// Returns the number of values left in this decoder stream.
-    fn values_left(&self) -> usize;
-
-    /// Returns the encoding for this decoder.
-    fn encoding(&self) -> Encoding;
-}
-
-/// Gets a decoder for the column descriptor `descr` and encoding type `encoding`.
-///
-/// NOTE: the primitive type in `descr` MUST match the data type `T`, otherwise
-/// disastrous consequence could occur.
-pub fn get_decoder<T: DataType>(
-    descr: ColumnDescPtr,
-    encoding: Encoding,
-) -> Result<Box<dyn Decoder<T>>> {
-    let decoder: Box<dyn Decoder<T>> = match encoding {
-        Encoding::PLAIN => Box::new(PlainDecoder::new(descr.type_length())),
-        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
-            return Err(general_err!(
-                "Cannot initialize this encoding through this function"
-            ));
-        }
-        Encoding::RLE => Box::new(RleValueDecoder::new()),
-        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackDecoder::new()),
-        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayDecoder::new()),
-        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayDecoder::new()),
-        e => return Err(nyi_err!("Encoding {} is not supported", e)),
-    };
-    Ok(decoder)
-}
-
-// ----------------------------------------------------------------------
-// PLAIN Decoding
-
-#[derive(Default)]
-pub struct PlainDecoderDetails {
-    // The remaining number of values in the byte array
-    pub(crate) num_values: usize,
-
-    // The current starting index in the byte array. Not used when `T` is bool.
-    pub(crate) start: usize,
-
-    // The length for the type `T`. Only used when `T` is `FixedLenByteArrayType`
-    pub(crate) type_length: i32,
-
-    // The byte array to decode from. Not set if `T` is bool.
-    pub(crate) data: Option<ByteBufferPtr>,
-
-    // Read `data` bit by bit. Only set if `T` is bool.
-    pub(crate) bit_reader: Option<BitReader>,
-}
-
-/// Plain decoding that supports all types.
-/// Values are encoded back to back. For native types, data is encoded as little endian.
-/// Floating point types are encoded in IEEE.
-/// See [`PlainEncoder`](crate::encoding::PlainEncoder) for more information.
-pub struct PlainDecoder<T: DataType> {
-    // The binary details needed for decoding
-    inner: PlainDecoderDetails,
-
-    // To allow `T` in the generic parameter for this struct. This doesn't take any
-    // space.
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> PlainDecoder<T> {
-    /// Creates new plain decoder.
-    pub fn new(type_length: i32) -> Self {
-        PlainDecoder {
-            inner: PlainDecoderDetails {
-                type_length,
-                num_values: 0,
-                start: 0,
-                data: None,
-                bit_reader: None,
-            },
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Decoder<T> for PlainDecoder<T> {
-    #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        T::T::set_data(&mut self.inner, data, num_values);
-        Ok(())
-    }
-
-    #[inline]
-    fn values_left(&self) -> usize {
-        self.inner.num_values
-    }
-
-    #[inline]
-    fn encoding(&self) -> Encoding {
-        Encoding::PLAIN
-    }
-
-    #[inline]
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        T::T::decode(buffer, &mut self.inner)
-    }
-}
-
-// ----------------------------------------------------------------------
-// RLE_DICTIONARY/PLAIN_DICTIONARY Decoding
-
-/// Dictionary decoder.
-/// The dictionary encoding builds a dictionary of values encountered in a given column.
-/// The dictionary is be stored in a dictionary page per column chunk.
-/// See [`DictEncoder`](crate::encoding::DictEncoder) for more information.
-pub struct DictDecoder<T: DataType> {
-    // The dictionary, which maps ids to the values
-    dictionary: Vec<T::T>,
-
-    // Whether `dictionary` has been initialized
-    has_dictionary: bool,
-
-    // The decoder for the value ids
-    rle_decoder: Option<RleDecoder>,
-
-    // Number of values left in the data stream
-    num_values: usize,
-}
-
-impl<T: DataType> DictDecoder<T> {
-    /// Creates new dictionary decoder.
-    pub fn new() -> Self {
-        Self {
-            dictionary: vec![],
-            has_dictionary: false,
-            rle_decoder: None,
-            num_values: 0,
-        }
-    }
-
-    /// Decodes and sets values for dictionary using `decoder` decoder.
-    pub fn set_dict(&mut self, mut decoder: Box<dyn Decoder<T>>) -> Result<()> {
-        let num_values = decoder.values_left();
-        self.dictionary.resize(num_values, T::T::default());
-        let _ = decoder.get(&mut self.dictionary)?;
-        self.has_dictionary = true;
-        Ok(())
-    }
-}
-
-impl<T: DataType> Decoder<T> for DictDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        // First byte in `data` is bit width
-        let bit_width = data.as_ref()[0];
-        let mut rle_decoder = RleDecoder::new(bit_width);
-        rle_decoder.set_data(data.start_from(1));
-        self.num_values = num_values;
-        self.rle_decoder = Some(rle_decoder);
-        Ok(())
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        assert!(self.rle_decoder.is_some());
-        assert!(self.has_dictionary, "Must call set_dict() first!");
-
-        let rle = self.rle_decoder.as_mut().unwrap();
-        let num_values = cmp::min(buffer.len(), self.num_values);
-        rle.get_batch_with_dict(&self.dictionary[..], buffer, num_values)
-    }
-
-    /// Number of values left in this decoder stream
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::RLE_DICTIONARY
-    }
-}
-
-// ----------------------------------------------------------------------
-// RLE Decoding
-
-/// RLE/Bit-Packing hybrid decoding for values.
-/// Currently is used only for data pages v2 and supports boolean types.
-/// See [`RleValueEncoder`](crate::encoding::RleValueEncoder) for more information.
-pub struct RleValueDecoder<T: DataType> {
-    values_left: usize,
-    decoder: RleDecoder,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> RleValueDecoder<T> {
-    pub fn new() -> Self {
-        Self {
-            values_left: 0,
-            decoder: RleDecoder::new(1),
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
-    #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        // Only support RLE value reader for boolean values with bit width of 1.
-        ensure_phys_ty!(Type::BOOLEAN, "RleValueDecoder only supports BoolType");
-
-        // We still need to remove prefix of i32 from the stream.
-        const I32_SIZE: usize = mem::size_of::<i32>();
-        let data_size = read_num_bytes!(i32, I32_SIZE, data.as_ref()) as usize;
-        self.decoder = RleDecoder::new(1);
-        self.decoder.set_data(data.range(I32_SIZE, data_size));
-        self.values_left = num_values;
-        Ok(())
-    }
-
-    #[inline]
-    fn values_left(&self) -> usize {
-        self.values_left
-    }
-
-    #[inline]
-    fn encoding(&self) -> Encoding {
-        Encoding::RLE
-    }
-
-    #[inline]
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        let num_values = cmp::min(buffer.len(), self.values_left);
-        let values_read = self.decoder.get_batch(&mut buffer[..num_values])?;
-        self.values_left -= values_read;
-        Ok(values_read)
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BINARY_PACKED Decoding
-
-/// Delta binary packed decoder.
-/// Supports INT32 and INT64 types.
-/// See [`DeltaBitPackEncoder`](crate::encoding::DeltaBitPackEncoder) for more
-/// information.
-pub struct DeltaBitPackDecoder<T: DataType> {
-    bit_reader: BitReader,
-    initialized: bool,
-
-    // Header info
-    num_values: usize,
-    num_mini_blocks: i64,
-    values_per_mini_block: usize,
-    values_current_mini_block: usize,
-    first_value: i64,
-    first_value_read: bool,
-
-    // Per block info
-    min_delta: i64,
-    mini_block_idx: usize,
-    delta_bit_width: u8,
-    delta_bit_widths: ByteBuffer,
-    deltas_in_mini_block: Vec<T::T>, // eagerly loaded deltas for a mini block
-    use_batch: bool,
-
-    current_value: i64,
-
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaBitPackDecoder<T> {
-    /// Creates new delta bit packed decoder.
-    pub fn new() -> Self {
-        Self {
-            bit_reader: BitReader::from(vec![]),
-            initialized: false,
-            num_values: 0,
-            num_mini_blocks: 0,
-            values_per_mini_block: 0,
-            values_current_mini_block: 0,
-            first_value: 0,
-            first_value_read: false,
-            min_delta: 0,
-            mini_block_idx: 0,
-            delta_bit_width: 0,
-            delta_bit_widths: ByteBuffer::new(),
-            deltas_in_mini_block: vec![],
-            use_batch: mem::size_of::<T::T>() == 4,
-            current_value: 0,
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Returns underlying bit reader offset.
-    pub fn get_offset(&self) -> usize {
-        assert!(self.initialized, "Bit reader is not initialized");
-        self.bit_reader.get_byte_offset()
-    }
-
-    /// Initializes new mini block.
-    #[inline]
-    fn init_block(&mut self) -> Result<()> {
-        self.min_delta = self
-            .bit_reader
-            .get_zigzag_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'min_delta'"))?;
-
-        let mut widths = vec![];
-        for _ in 0..self.num_mini_blocks {
-            let w = self
-                .bit_reader
-                .get_aligned::<u8>(1)
-                .ok_or_else(|| eof_err!("Not enough data to decode 'width'"))?;
-            widths.push(w);
-        }
-
-        self.delta_bit_widths.set_data(widths);
-        self.mini_block_idx = 0;
-        self.delta_bit_width = self.delta_bit_widths.data()[0];
-        self.values_current_mini_block = self.values_per_mini_block;
-        Ok(())
-    }
-
-    /// Loads delta into mini block.
-    #[inline]
-    fn load_deltas_in_mini_block(&mut self) -> Result<()>
-    where
-        T::T: FromBytes,
-    {
-        self.deltas_in_mini_block.clear();
-        if self.use_batch {
-            self.deltas_in_mini_block
-                .resize(self.values_current_mini_block, T::T::default());
-            let loaded = self.bit_reader.get_batch::<T::T>(
-                &mut self.deltas_in_mini_block[..],
-                self.delta_bit_width as usize,
-            );
-            assert!(loaded == self.values_current_mini_block);
-        } else {
-            for _ in 0..self.values_current_mini_block {
-                // TODO: load one batch at a time similar to int32
-                let delta = self
-                    .bit_reader
-                    .get_value::<T::T>(self.delta_bit_width as usize)
-                    .ok_or_else(|| eof_err!("Not enough data to decode 'delta'"))?;
-                self.deltas_in_mini_block.push(delta);
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T> {
-    // # of total values is derived from encoding
-    #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, _index: usize) -> Result<()> {
-        self.bit_reader = BitReader::new(data);
-        self.initialized = true;
-
-        let block_size = self
-            .bit_reader
-            .get_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'block_size'"))?;
-        self.num_mini_blocks = self
-            .bit_reader
-            .get_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'num_mini_blocks'"))?;
-        self.num_values = self
-            .bit_reader
-            .get_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'num_values'"))?
-            as usize;
-        self.first_value = self
-            .bit_reader
-            .get_zigzag_vlq_int()
-            .ok_or_else(|| eof_err!("Not enough data to decode 'first_value'"))?;
-
-        // Reset decoding state
-        self.first_value_read = false;
-        self.mini_block_idx = 0;
-        self.delta_bit_widths.clear();
-        self.values_current_mini_block = 0;
-
-        self.values_per_mini_block = (block_size / self.num_mini_blocks) as usize;
-        assert!(self.values_per_mini_block % 8 == 0);
-
-        Ok(())
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        assert!(self.initialized, "Bit reader is not initialized");
-
-        let num_values = cmp::min(buffer.len(), self.num_values);
-        for i in 0..num_values {
-            if !self.first_value_read {
-                self.set_decoded_value(buffer, i, self.first_value);
-                self.current_value = self.first_value;
-                self.first_value_read = true;
-                continue;
-            }
-
-            if self.values_current_mini_block == 0 {
-                self.mini_block_idx += 1;
-                if self.mini_block_idx < self.delta_bit_widths.size() {
-                    self.delta_bit_width =
-                        self.delta_bit_widths.data()[self.mini_block_idx];
-                    self.values_current_mini_block = self.values_per_mini_block;
-                } else {
-                    self.init_block()?;
-                }
-                self.load_deltas_in_mini_block()?;
-            }
-
-            // we decrement values in current mini block, so we need to invert index for
-            // delta
-            let delta = self.get_delta(
-                self.deltas_in_mini_block.len() - self.values_current_mini_block,
-            );
-            // It is OK for deltas to contain "overflowed" values after encoding,
-            // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and
-            // restore original value.
-            self.current_value = self.current_value.wrapping_add(self.min_delta);
-            self.current_value = self.current_value.wrapping_add(delta as i64);
-            self.set_decoded_value(buffer, i, self.current_value);
-            self.values_current_mini_block -= 1;
-        }
-
-        self.num_values -= num_values;
-        Ok(num_values)
-    }
-
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BINARY_PACKED
-    }
-}
-
-/// Helper trait to define specific conversions when decoding values
-trait DeltaBitPackDecoderConversion<T: DataType> {
-    /// Sets decoded value based on type `T`.
-    fn get_delta(&self, index: usize) -> i64;
-
-    fn set_decoded_value(&self, buffer: &mut [T::T], index: usize, value: i64);
-}
-
-impl<T: DataType> DeltaBitPackDecoderConversion<T> for DeltaBitPackDecoder<T> {
-    #[inline]
-    fn get_delta(&self, index: usize) -> i64 {
-        ensure_phys_ty!(
-            Type::INT32 | Type::INT64,
-            "DeltaBitPackDecoder only supports Int32Type and Int64Type"
-        );
-        self.deltas_in_mini_block[index].as_i64().unwrap()
-    }
-
-    #[inline]
-    fn set_decoded_value(&self, buffer: &mut [T::T], index: usize, value: i64) {
-        match T::get_physical_type() {
-            Type::INT32 => {
-                let val = buffer[index].as_mut_any().downcast_mut::<i32>().unwrap();
-
-                *val = value as i32;
-            }
-            Type::INT64 => {
-                let val = buffer[index].as_mut_any().downcast_mut::<i64>().unwrap();
-
-                *val = value;
-            }
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
-        };
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY Decoding
-
-/// Delta length byte array decoder.
-/// Only applied to byte arrays to separate the length values and the data, the lengths
-/// are encoded using DELTA_BINARY_PACKED encoding.
-/// See [`DeltaLengthByteArrayEncoder`](crate::encoding::DeltaLengthByteArrayEncoder)
-/// for more information.
-pub struct DeltaLengthByteArrayDecoder<T: DataType> {
-    // Lengths for each byte array in `data`
-    // TODO: add memory tracker to this
-    lengths: Vec<i32>,
-
-    // Current index into `lengths`
-    current_idx: usize,
-
-    // Concatenated byte array data
-    data: Option<ByteBufferPtr>,
-
-    // Offset into `data`, always point to the beginning of next byte array.
-    offset: usize,
-
-    // Number of values left in this decoder stream
-    num_values: usize,
-
-    // Placeholder to allow `T` as generic parameter
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaLengthByteArrayDecoder<T> {
-    /// Creates new delta length byte array decoder.
-    pub fn new() -> Self {
-        Self {
-            lengths: vec![],
-            current_idx: 0,
-            data: None,
-            offset: 0,
-            num_values: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Decoder<T> for DeltaLengthByteArrayDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY => {
-                let mut len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-                len_decoder.set_data(data.all(), num_values)?;
-                let num_lengths = len_decoder.values_left();
-                self.lengths.resize(num_lengths, 0);
-                len_decoder.get(&mut self.lengths[..])?;
-
-                self.data = Some(data.start_from(len_decoder.get_offset()));
-                self.offset = 0;
-                self.current_idx = 0;
-                self.num_values = num_lengths;
-                Ok(())
-            }
-            _ => Err(general_err!(
-                "DeltaLengthByteArrayDecoder only support ByteArrayType"
-            )),
-        }
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY => {
-                assert!(self.data.is_some());
-
-                let data = self.data.as_ref().unwrap();
-                let num_values = cmp::min(buffer.len(), self.num_values);
-                for i in 0..num_values {
-                    let len = self.lengths[self.current_idx] as usize;
-
-                    buffer[i]
-                        .as_mut_any()
-                        .downcast_mut::<ByteArray>()
-                        .unwrap()
-                        .set_data(data.range(self.offset, len));
-
-                    self.offset += len;
-                    self.current_idx += 1;
-                }
-
-                self.num_values -= num_values;
-                Ok(num_values)
-            }
-            _ => Err(general_err!(
-                "DeltaLengthByteArrayDecoder only support ByteArrayType"
-            )),
-        }
-    }
-
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_LENGTH_BYTE_ARRAY
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY Decoding
-
-/// Delta byte array decoder.
-/// Prefix lengths are encoded using `DELTA_BINARY_PACKED` encoding, Suffixes are stored
-/// using `DELTA_LENGTH_BYTE_ARRAY` encoding.
-/// See [`DeltaByteArrayEncoder`](crate::encoding::DeltaByteArrayEncoder) for more
-/// information.
-pub struct DeltaByteArrayDecoder<T: DataType> {
-    // Prefix lengths for each byte array
-    // TODO: add memory tracker to this
-    prefix_lengths: Vec<i32>,
-
-    // The current index into `prefix_lengths`,
-    current_idx: usize,
-
-    // Decoder for all suffixes, the # of which should be the same as
-    // `prefix_lengths.len()`
-    suffix_decoder: Option<DeltaLengthByteArrayDecoder<ByteArrayType>>,
-
-    // The last byte array, used to derive the current prefix
-    previous_value: Vec<u8>,
-
-    // Number of values left
-    num_values: usize,
-
-    // Placeholder to allow `T` as generic parameter
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaByteArrayDecoder<T> {
-    /// Creates new delta byte array decoder.
-    pub fn new() -> Self {
-        Self {
-            prefix_lengths: vec![],
-            current_idx: 0,
-            suffix_decoder: None,
-            previous_value: vec![],
-            num_values: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<'m, T: DataType> Decoder<T> for DeltaByteArrayDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
-                let mut prefix_len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-                prefix_len_decoder.set_data(data.all(), num_values)?;
-                let num_prefixes = prefix_len_decoder.values_left();
-                self.prefix_lengths.resize(num_prefixes, 0);
-                prefix_len_decoder.get(&mut self.prefix_lengths[..])?;
-
-                let mut suffix_decoder = DeltaLengthByteArrayDecoder::new();
-                suffix_decoder
-                    .set_data(data.start_from(prefix_len_decoder.get_offset()), num_values)?;
-                self.suffix_decoder = Some(suffix_decoder);
-                self.num_values = num_prefixes;
-                self.current_idx = 0;
-                self.previous_value.clear();
-                Ok(())
-            }
-            _ => {
-                Err(general_err!(
-                    "DeltaByteArrayDecoder only supports ByteArrayType and FixedLenByteArrayType"
-                ))
-            }
-        }
-    }
-
-    fn get(&mut self, buffer: &mut [T::T]) -> Result<usize> {
-        match T::get_physical_type() {
-            ty @ Type::BYTE_ARRAY | ty @ Type::FIXED_LEN_BYTE_ARRAY => {
-                let num_values = cmp::min(buffer.len(), self.num_values);
-                let mut v: [ByteArray; 1] = [ByteArray::new(); 1];
-                for i in 0..num_values {
-                    // Process suffix
-                    // TODO: this is awkward - maybe we should add a non-vectorized API?
-                    let suffix_decoder = self.suffix_decoder.as_mut().expect("decoder not initialized");
-                    suffix_decoder.get(&mut v[..])?;
-                    let suffix = v[0].data();
-
-                    // Extract current prefix length, can be 0
-                    let prefix_len = self.prefix_lengths[self.current_idx] as usize;
-
-                    // Concatenate prefix with suffix
-                    let mut result = Vec::new();
-                    result.extend_from_slice(&self.previous_value[0..prefix_len]);
-                    result.extend_from_slice(suffix);
-
-                    let data = ByteBufferPtr::new(result.clone());
-
-                    match ty {
-                        Type::BYTE_ARRAY => buffer[i]
-                            .as_mut_any()
-                            .downcast_mut::<ByteArray>()
-                            .unwrap()
-                            .set_data(data),
-                        Type::FIXED_LEN_BYTE_ARRAY => buffer[i]
-                            .as_mut_any()
-                            .downcast_mut::<FixedLenByteArray>()
-                            .unwrap()
-                            .set_data(data),
-                        _ => unreachable!(),
-                    };
-
-                    self.previous_value = result;
-                    self.current_idx += 1;
-                }
-
-                self.num_values -= num_values;
-                Ok(num_values)
-            }
-            _ => {
-                Err(general_err!(
-                    "DeltaByteArrayDecoder only supports ByteArrayType and FixedLenByteArrayType"
-                ))
-            }
-        }
-    }
-
-    fn values_left(&self) -> usize {
-        self.num_values
-    }
-
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BYTE_ARRAY
-    }
-}
-
-#[cfg(test)]
-#[allow(clippy::approx_constant)]
-mod tests {
-    use super::{super::encoding::*, *};
-
-    use std::sync::Arc;
-
-    use crate::schema::types::{
-        ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
-    };
-    use crate::util::{
-        bit_util::set_array_bit, memory::MemTracker, test_common::RandGen,
-    };
-
-    #[test]
-    fn test_get_decoders() {
-        // supported encodings
-        create_and_check_decoder::<Int32Type>(Encoding::PLAIN, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
-        create_and_check_decoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
-        create_and_check_decoder::<BoolType>(Encoding::RLE, None);
-
-        // error when initializing
-        create_and_check_decoder::<Int32Type>(
-            Encoding::RLE_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-        create_and_check_decoder::<Int32Type>(
-            Encoding::PLAIN_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-
-        // unsupported
-        create_and_check_decoder::<Int32Type>(
-            Encoding::BIT_PACKED,
-            Some(nyi_err!("Encoding BIT_PACKED is not supported")),
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int32() {
-        let data = vec![42, 18, 52];
-        let data_bytes = Int32Type::to_byte_array(&data[..]);
-        let mut buffer = vec![0; 3];
-        test_plain_decode::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int32_spaced() {
-        let data = [42, 18, 52];
-        let expected_data = [0, 42, 0, 18, 0, 0, 52, 0];
-        let data_bytes = Int32Type::to_byte_array(&data[..]);
-        let mut buffer = vec![0; 8];
-        let num_nulls = 5;
-        let valid_bits = [0b01001010];
-        test_plain_decode_spaced::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            num_nulls,
-            &valid_bits,
-            &expected_data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int64() {
-        let data = vec![42, 18, 52];
-        let data_bytes = Int64Type::to_byte_array(&data[..]);
-        let mut buffer = vec![0; 3];
-        test_plain_decode::<Int64Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_float() {
-        let data = vec![3.14, 2.414, 12.51];
-        let data_bytes = FloatType::to_byte_array(&data[..]);
-        let mut buffer = vec![0.0; 3];
-        test_plain_decode::<FloatType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_double() {
-        let data = vec![3.14f64, 2.414f64, 12.51f64];
-        let data_bytes = DoubleType::to_byte_array(&data[..]);
-        let mut buffer = vec![0.0f64; 3];
-        test_plain_decode::<DoubleType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_int96() {
-        let mut data = vec![Int96::new(); 4];
-        data[0].set_data(11, 22, 33);
-        data[1].set_data(44, 55, 66);
-        data[2].set_data(10, 20, 30);
-        data[3].set_data(40, 50, 60);
-        let data_bytes = Int96Type::to_byte_array(&data[..]);
-        let mut buffer = vec![Int96::new(); 4];
-        test_plain_decode::<Int96Type>(
-            ByteBufferPtr::new(data_bytes),
-            4,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_bool() {
-        let data = vec![
-            false, true, false, false, true, false, true, true, false, true,
-        ];
-        let data_bytes = BoolType::to_byte_array(&data[..]);
-        let mut buffer = vec![false; 10];
-        test_plain_decode::<BoolType>(
-            ByteBufferPtr::new(data_bytes),
-            10,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_byte_array() {
-        let mut data = vec![ByteArray::new(); 2];
-        data[0].set_data(ByteBufferPtr::new(String::from("hello").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("parquet").into_bytes()));
-        let data_bytes = ByteArrayType::to_byte_array(&data[..]);
-        let mut buffer = vec![ByteArray::new(); 2];
-        test_plain_decode::<ByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            2,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    #[test]
-    fn test_plain_decode_fixed_len_byte_array() {
-        let mut data = vec![FixedLenByteArray::default(); 3];
-        data[0].set_data(ByteBufferPtr::new(String::from("bird").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("come").into_bytes()));
-        data[2].set_data(ByteBufferPtr::new(String::from("flow").into_bytes()));
-        let data_bytes = FixedLenByteArrayType::to_byte_array(&data[..]);
-        let mut buffer = vec![FixedLenByteArray::default(); 3];
-        test_plain_decode::<FixedLenByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            4,
-            &mut buffer[..],
-            &data[..],
-        );
-    }
-
-    fn test_plain_decode<T: DataType>(
-        data: ByteBufferPtr,
-        num_values: usize,
-        type_length: i32,
-        buffer: &mut [T::T],
-        expected: &[T::T],
-    ) {
-        let mut decoder: PlainDecoder<T> = PlainDecoder::new(type_length);
-        let result = decoder.set_data(data, num_values);
-        assert!(result.is_ok());
-        let result = decoder.get(buffer);
-        assert!(result.is_ok());
-        assert_eq!(decoder.values_left(), 0);
-        assert_eq!(buffer, expected);
-    }
-
-    fn test_plain_decode_spaced<T: DataType>(
-        data: ByteBufferPtr,
-        num_values: usize,
-        type_length: i32,
-        buffer: &mut [T::T],
-        num_nulls: usize,
-        valid_bits: &[u8],
-        expected: &[T::T],
-    ) {
-        let mut decoder: PlainDecoder<T> = PlainDecoder::new(type_length);
-        let result = decoder.set_data(data, num_values);
-        assert!(result.is_ok());
-        let result = decoder.get_spaced(buffer, num_nulls, valid_bits);
-        assert!(result.is_ok());
-        assert_eq!(num_values + num_nulls, result.unwrap());
-        assert_eq!(decoder.values_left(), 0);
-        assert_eq!(buffer, expected);
-    }
-
-    #[test]
-    #[should_panic(expected = "RleValueEncoder only supports BoolType")]
-    fn test_rle_value_encode_int32_not_supported() {
-        let mut encoder = RleValueEncoder::<Int32Type>::new();
-        encoder.put(&[1, 2, 3, 4]).unwrap();
-    }
-
-    #[test]
-    #[should_panic(expected = "RleValueDecoder only supports BoolType")]
-    fn test_rle_value_decode_int32_not_supported() {
-        let mut decoder = RleValueDecoder::<Int32Type>::new();
-        decoder
-            .set_data(ByteBufferPtr::new(vec![5, 0, 0, 0]), 1)
-            .unwrap();
-    }
-
-    #[test]
-    fn test_rle_value_decode_bool_decode() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![
-            BoolType::gen_vec(-1, 256),
-            BoolType::gen_vec(-1, 257),
-            BoolType::gen_vec(-1, 126),
-        ];
-        test_rle_value_decode::<BoolType>(data);
-    }
-
-    #[test]
-    #[should_panic(expected = "Bit reader is not initialized")]
-    fn test_delta_bit_packed_not_initialized_offset() {
-        // Fail if set_data() is not called before get_offset()
-        let decoder = DeltaBitPackDecoder::<Int32Type>::new();
-        decoder.get_offset();
-    }
-
-    #[test]
-    #[should_panic(expected = "Bit reader is not initialized")]
-    fn test_delta_bit_packed_not_initialized_get() {
-        // Fail if set_data() is not called before get()
-        let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
-        let mut buffer = vec![];
-        decoder.get(&mut buffer).unwrap();
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_empty() {
-        let data = vec![vec![0; 0]];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_repeat() {
-        let block_data = vec![
-            1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
-            3, 4, 5, 6, 7, 8,
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_uneven() {
-        let block_data = vec![1, -2, 3, -4, 5, 6, 7, 8, 9, 10, 11];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_same_values() {
-        let block_data = vec![
-            127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
-            127,
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-
-        let block_data = vec![
-            -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
-            -127, -127, -127,
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_min_max() {
-        let block_data = vec![
-            i32::min_value(),
-            i32::max_value(),
-            i32::min_value(),
-            i32::max_value(),
-            i32::min_value(),
-            i32::max_value(),
-            i32::min_value(),
-            i32::max_value(),
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_multiple_blocks() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![
-            Int32Type::gen_vec(-1, 64),
-            Int32Type::gen_vec(-1, 128),
-            Int32Type::gen_vec(-1, 64),
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_data_across_blocks() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![Int32Type::gen_vec(-1, 256), Int32Type::gen_vec(-1, 257)];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int32_with_empty_blocks() {
-        let data = vec![
-            Int32Type::gen_vec(-1, 128),
-            vec![0; 0],
-            Int32Type::gen_vec(-1, 64),
-        ];
-        test_delta_bit_packed_decode::<Int32Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int64_empty() {
-        let data = vec![vec![0; 0]];
-        test_delta_bit_packed_decode::<Int64Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int64_min_max() {
-        let block_data = vec![
-            i64::min_value(),
-            i64::max_value(),
-            i64::min_value(),
-            i64::max_value(),
-            i64::min_value(),
-            i64::max_value(),
-            i64::min_value(),
-            i64::max_value(),
-        ];
-        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_int64_multiple_blocks() {
-        // Test multiple 'put' calls on the same encoder
-        let data = vec![
-            Int64Type::gen_vec(-1, 64),
-            Int64Type::gen_vec(-1, 128),
-            Int64Type::gen_vec(-1, 64),
-        ];
-        test_delta_bit_packed_decode::<Int64Type>(data);
-    }
-
-    #[test]
-    fn test_delta_bit_packed_decoder_sample() {
-        let data_bytes = vec![
-            128, 1, 4, 3, 58, 28, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        ];
-        let buffer = ByteBufferPtr::new(data_bytes);
-        let mut decoder: DeltaBitPackDecoder<Int32Type> = DeltaBitPackDecoder::new();
-        decoder.set_data(buffer, 3).unwrap();
-        // check exact offsets, because when reading partial values we end up with
-        // some data not being read from bit reader
-        assert_eq!(decoder.get_offset(), 5);
-        let mut result = vec![0, 0, 0];
-        decoder.get(&mut result).unwrap();
-        assert_eq!(decoder.get_offset(), 34);
-        assert_eq!(result, vec![29, 43, 89]);
-    }
-
-    #[test]
-    fn test_delta_byte_array_same_arrays() {
-        let data = vec![
-            vec![ByteArray::from(vec![1, 2, 3, 4, 5, 6])],
-            vec![
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-            ],
-            vec![
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-                ByteArray::from(vec![1, 2, 3, 4, 5, 6]),
-            ],
-        ];
-        test_delta_byte_array_decode(data);
-    }
-
-    #[test]
-    fn test_delta_byte_array_unique_arrays() {
-        let data = vec![
-            vec![ByteArray::from(vec![1])],
-            vec![ByteArray::from(vec![2, 3]), ByteArray::from(vec![4, 5, 6])],
-            vec![
-                ByteArray::from(vec![7, 8]),
-                ByteArray::from(vec![9, 0, 1, 2]),
-            ],
-        ];
-        test_delta_byte_array_decode(data);
-    }
-
-    #[test]
-    fn test_delta_byte_array_single_array() {
-        let data = vec![vec![ByteArray::from(vec![1, 2, 3, 4, 5, 6])]];
-        test_delta_byte_array_decode(data);
-    }
-
-    fn test_rle_value_decode<T: DataType>(data: Vec<Vec<T::T>>) {
-        test_encode_decode::<T>(data, Encoding::RLE);
-    }
-
-    fn test_delta_bit_packed_decode<T: DataType>(data: Vec<Vec<T::T>>) {
-        test_encode_decode::<T>(data, Encoding::DELTA_BINARY_PACKED);
-    }
-
-    fn test_delta_byte_array_decode(data: Vec<Vec<ByteArray>>) {
-        test_encode_decode::<ByteArrayType>(data, Encoding::DELTA_BYTE_ARRAY);
-    }
-
-    // Input data represents vector of data slices to write (test multiple `put()` calls)
-    // For example,
-    //   vec![vec![1, 2, 3]] invokes `put()` once and writes {1, 2, 3}
-    //   vec![vec![1, 2], vec![3]] invokes `put()` twice and writes {1, 2, 3}
-    fn test_encode_decode<T: DataType>(data: Vec<Vec<T::T>>, encoding: Encoding) {
-        // Type length should not really matter for encode/decode test,
-        // otherwise change it based on type
-        let col_descr = create_test_col_desc_ptr(-1, T::get_physical_type());
-
-        // Encode data
-        let mut encoder =
-            get_encoder::<T>(col_descr.clone(), encoding, Arc::new(MemTracker::new()))
-                .expect("get encoder");
-
-        for v in &data[..] {
-            encoder.put(&v[..]).expect("ok to encode");
-        }
-        let bytes = encoder.flush_buffer().expect("ok to flush buffer");
-
-        // Flatten expected data as contiguous array of values
-        let expected: Vec<T::T> = data.iter().flat_map(|s| s.clone()).collect();
-
-        // Decode data and compare with original
-        let mut decoder = get_decoder::<T>(col_descr, encoding).expect("get decoder");
-
-        let mut result = vec![T::T::default(); expected.len()];
-        decoder
-            .set_data(bytes, expected.len())
-            .expect("ok to set data");
-        let mut result_num_values = 0;
-        while decoder.values_left() > 0 {
-            result_num_values += decoder
-                .get(&mut result[result_num_values..])
-                .expect("ok to decode");
-        }
-        assert_eq!(result_num_values, expected.len());
-        assert_eq!(result, expected);
-    }
-
-    fn create_and_check_decoder<T: DataType>(
-        encoding: Encoding,
-        err: Option<ParquetError>,
-    ) {
-        let descr = create_test_col_desc_ptr(-1, T::get_physical_type());
-        let decoder = get_decoder::<T>(descr, encoding);
-        match err {
-            Some(parquet_error) => {
-                assert!(decoder.is_err());
-                assert_eq!(decoder.err().unwrap(), parquet_error);
-            }
-            None => {
-                assert!(decoder.is_ok());
-                assert_eq!(decoder.unwrap().encoding(), encoding);
-            }
-        }
-    }
-
-    // Creates test column descriptor.
-    fn create_test_col_desc_ptr(type_len: i32, t: Type) -> ColumnDescPtr {
-        let ty = SchemaType::primitive_type_builder("t", t)
-            .with_length(type_len)
-            .build()
-            .unwrap();
-        Arc::new(ColumnDescriptor::new(
-            Arc::new(ty),
-            0,
-            0,
-            ColumnPath::new(vec![]),
-        ))
-    }
-
-    fn usize_to_bytes(v: usize) -> [u8; 4] {
-        (v as u32).to_ne_bytes()
-    }
-
-    /// A util trait to convert slices of different types to byte arrays
-    trait ToByteArray<T: DataType> {
-        #[allow(clippy::wrong_self_convention)]
-        fn to_byte_array(data: &[T::T]) -> Vec<u8>;
-    }
-
-    macro_rules! to_byte_array_impl {
-        ($ty: ty) => {
-            impl ToByteArray<$ty> for $ty {
-                fn to_byte_array(data: &[<$ty as DataType>::T]) -> Vec<u8> {
-                    <$ty as DataType>::T::slice_as_bytes(data).to_vec()
-                }
-            }
-        };
-    }
-
-    to_byte_array_impl!(Int32Type);
-    to_byte_array_impl!(Int64Type);
-    to_byte_array_impl!(FloatType);
-    to_byte_array_impl!(DoubleType);
-
-    impl ToByteArray<BoolType> for BoolType {
-        fn to_byte_array(data: &[bool]) -> Vec<u8> {
-            let mut v = vec![];
-            for i in 0..data.len() {
-                if i % 8 == 0 {
-                    v.push(0);
-                }
-                if data[i] {
-                    set_array_bit(&mut v[..], i);
-                }
-            }
-            v
-        }
-    }
-
-    impl ToByteArray<Int96Type> for Int96Type {
-        fn to_byte_array(data: &[Int96]) -> Vec<u8> {
-            let mut v = vec![];
-            for d in data {
-                v.extend_from_slice(d.as_bytes());
-            }
-            v
-        }
-    }
-
-    impl ToByteArray<ByteArrayType> for ByteArrayType {
-        fn to_byte_array(data: &[ByteArray]) -> Vec<u8> {
-            let mut v = vec![];
-            for d in data {
-                let buf = d.data();
-                let len = &usize_to_bytes(buf.len());
-                v.extend_from_slice(len);
-                v.extend(buf);
-            }
-            v
-        }
-    }
-
-    impl ToByteArray<FixedLenByteArrayType> for FixedLenByteArrayType {
-        fn to_byte_array(data: &[FixedLenByteArray]) -> Vec<u8> {
-            let mut v = vec![];
-            for d in data {
-                let buf = d.data();
-                v.extend(buf);
-            }
-            v
-        }
-    }
-}
diff --git a/rust/parquet/src/encodings/encoding.rs b/rust/parquet/src/encodings/encoding.rs
deleted file mode 100644
index d04273817e1..00000000000
--- a/rust/parquet/src/encodings/encoding.rs
+++ /dev/null
@@ -1,1334 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains all supported encoders for Parquet.
-
-use std::{cmp, io::Write, marker::PhantomData};
-
-use crate::basic::*;
-use crate::data_type::private::ParquetValueType;
-use crate::data_type::*;
-use crate::encodings::rle::RleEncoder;
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-use crate::util::{
-    bit_util::{self, log2, num_required_bits, BitWriter},
-    hash_util,
-    memory::{Buffer, ByteBuffer, ByteBufferPtr, MemTrackerPtr},
-};
-
-// ----------------------------------------------------------------------
-// Encoders
-
-/// An Parquet encoder for the data type `T`.
-///
-/// Currently this allocates internal buffers for the encoded values. After done putting
-/// values, caller should call `flush_buffer()` to get an immutable buffer pointer.
-pub trait Encoder<T: DataType> {
-    /// Encodes data from `values`.
-    fn put(&mut self, values: &[T::T]) -> Result<()>;
-
-    /// Encodes data from `values`, which contains spaces for null values, that is
-    /// identified by `valid_bits`.
-    ///
-    /// Returns the number of non-null values encoded.
-    fn put_spaced(&mut self, values: &[T::T], valid_bits: &[u8]) -> Result<usize> {
-        let num_values = values.len();
-        let mut buffer = Vec::with_capacity(num_values);
-        // TODO: this is pretty inefficient. Revisit in future.
-        for i in 0..num_values {
-            if bit_util::get_bit(valid_bits, i) {
-                buffer.push(values[i].clone());
-            }
-        }
-        self.put(&buffer[..])?;
-        Ok(buffer.len())
-    }
-
-    /// Returns the encoding type of this encoder.
-    fn encoding(&self) -> Encoding;
-
-    /// Returns an estimate of the encoded data, in bytes.
-    /// Method call must be O(1).
-    fn estimated_data_encoded_size(&self) -> usize;
-
-    /// Flushes the underlying byte buffer that's being processed by this encoder, and
-    /// return the immutable copy of it. This will also reset the internal state.
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr>;
-}
-
-/// Gets a encoder for the particular data type `T` and encoding `encoding`. Memory usage
-/// for the encoder instance is tracked by `mem_tracker`.
-pub fn get_encoder<T: DataType>(
-    desc: ColumnDescPtr,
-    encoding: Encoding,
-    mem_tracker: MemTrackerPtr,
-) -> Result<Box<dyn Encoder<T>>> {
-    let encoder: Box<dyn Encoder<T>> = match encoding {
-        Encoding::PLAIN => Box::new(PlainEncoder::new(desc, mem_tracker, vec![])),
-        Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
-            return Err(general_err!(
-                "Cannot initialize this encoding through this function"
-            ));
-        }
-        Encoding::RLE => Box::new(RleValueEncoder::new()),
-        Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackEncoder::new()),
-        Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayEncoder::new()),
-        Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayEncoder::new()),
-        e => return Err(nyi_err!("Encoding {} is not supported", e)),
-    };
-    Ok(encoder)
-}
-
-// ----------------------------------------------------------------------
-// Plain encoding
-
-/// Plain encoding that supports all types.
-/// Values are encoded back to back.
-/// The plain encoding is used whenever a more efficient encoding can not be used.
-/// It stores the data in the following format:
-/// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
-/// - INT32 - 4 bytes per value, stored as little-endian.
-/// - INT64 - 8 bytes per value, stored as little-endian.
-/// - FLOAT - 4 bytes per value, stored as IEEE little-endian.
-/// - DOUBLE - 8 bytes per value, stored as IEEE little-endian.
-/// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
-/// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
-pub struct PlainEncoder<T: DataType> {
-    buffer: ByteBuffer,
-    bit_writer: BitWriter,
-    desc: ColumnDescPtr,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> PlainEncoder<T> {
-    /// Creates new plain encoder.
-    pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr, vec: Vec<u8>) -> Self {
-        let mut byte_buffer = ByteBuffer::new().with_mem_tracker(mem_tracker);
-        byte_buffer.set_data(vec);
-        Self {
-            buffer: byte_buffer,
-            bit_writer: BitWriter::new(256),
-            desc,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for PlainEncoder<T> {
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::PLAIN
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.buffer.size() + self.bit_writer.bytes_written()
-    }
-
-    #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        self.buffer.write_all(self.bit_writer.flush_buffer())?;
-        self.buffer.flush()?;
-        self.bit_writer.clear();
-
-        Ok(self.buffer.consume())
-    }
-
-    #[inline]
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        T::T::encode(values, &mut self.buffer, &mut self.bit_writer)?;
-        Ok(())
-    }
-}
-
-// ----------------------------------------------------------------------
-// Dictionary encoding
-
-const INITIAL_HASH_TABLE_SIZE: usize = 1024;
-const MAX_HASH_LOAD: f32 = 0.7;
-const HASH_SLOT_EMPTY: i32 = -1;
-
-/// Dictionary encoder.
-/// The dictionary encoding builds a dictionary of values encountered in a given column.
-/// The dictionary page is written first, before the data pages of the column chunk.
-///
-/// Dictionary page format: the entries in the dictionary - in dictionary order -
-/// using the plain encoding.
-///
-/// Data page format: the bit width used to encode the entry ids stored as 1 byte
-/// (max bit width = 32), followed by the values encoded using RLE/Bit packed described
-/// above (with the given bit width).
-pub struct DictEncoder<T: DataType> {
-    // Descriptor for the column to be encoded.
-    desc: ColumnDescPtr,
-
-    // Size of the table. **Must be** a power of 2.
-    hash_table_size: usize,
-
-    // Store `hash_table_size` - 1, so that `j & mod_bitmask` is equivalent to
-    // `j % hash_table_size`, but uses far fewer CPU cycles.
-    mod_bitmask: u32,
-
-    // Stores indices which map (many-to-one) to the values in the `uniques` array.
-    // Here we are using fix-sized array with linear probing.
-    // A slot with `HASH_SLOT_EMPTY` indicates the slot is not currently occupied.
-    hash_slots: Buffer<i32>,
-
-    // Indices that have not yet be written out by `write_indices()`.
-    buffered_indices: Buffer<i32>,
-
-    // The unique observed values.
-    uniques: Buffer<T::T>,
-
-    // Size in bytes needed to encode this dictionary.
-    uniques_size_in_bytes: usize,
-
-    // Tracking memory usage for the various data structures in this struct.
-    mem_tracker: MemTrackerPtr,
-}
-
-impl<T: DataType> DictEncoder<T> {
-    /// Creates new dictionary encoder.
-    pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr) -> Self {
-        let mut slots = Buffer::new().with_mem_tracker(mem_tracker.clone());
-        slots.resize(INITIAL_HASH_TABLE_SIZE, -1);
-        Self {
-            desc,
-            hash_table_size: INITIAL_HASH_TABLE_SIZE,
-            mod_bitmask: (INITIAL_HASH_TABLE_SIZE - 1) as u32,
-            hash_slots: slots,
-            buffered_indices: Buffer::new().with_mem_tracker(mem_tracker.clone()),
-            uniques: Buffer::new().with_mem_tracker(mem_tracker.clone()),
-            uniques_size_in_bytes: 0,
-            mem_tracker,
-        }
-    }
-
-    /// Returns true if dictionary entries are sorted, false otherwise.
-    #[inline]
-    pub fn is_sorted(&self) -> bool {
-        // Sorting is not supported currently.
-        false
-    }
-
-    /// Returns number of unique values (keys) in the dictionary.
-    pub fn num_entries(&self) -> usize {
-        self.uniques.size()
-    }
-
-    /// Returns size of unique values (keys) in the dictionary, in bytes.
-    pub fn dict_encoded_size(&self) -> usize {
-        self.uniques_size_in_bytes
-    }
-
-    /// Writes out the dictionary values with PLAIN encoding in a byte buffer, and return
-    /// the result.
-    #[inline]
-    pub fn write_dict(&self) -> Result<ByteBufferPtr> {
-        let mut plain_encoder =
-            PlainEncoder::<T>::new(self.desc.clone(), self.mem_tracker.clone(), vec![]);
-        plain_encoder.put(self.uniques.data())?;
-        plain_encoder.flush_buffer()
-    }
-
-    /// Writes out the dictionary values with RLE encoding in a byte buffer, and return
-    /// the result.
-    pub fn write_indices(&mut self) -> Result<ByteBufferPtr> {
-        // TODO: the caller should allocate the buffer
-        let buffer_len = self.estimated_data_encoded_size();
-        let mut buffer: Vec<u8> = vec![0; buffer_len as usize];
-        buffer[0] = self.bit_width() as u8;
-        self.mem_tracker.alloc(buffer.capacity() as i64);
-
-        // Write bit width in the first byte
-        buffer.write_all((self.bit_width() as u8).as_bytes())?;
-        let mut encoder = RleEncoder::new_from_buf(self.bit_width(), buffer, 1);
-        for index in self.buffered_indices.data() {
-            if !encoder.put(*index as u64)? {
-                return Err(general_err!("Encoder doesn't have enough space"));
-            }
-        }
-        self.buffered_indices.clear();
-        Ok(ByteBufferPtr::new(encoder.consume()?))
-    }
-
-    #[inline]
-    #[allow(clippy::unnecessary_wraps)]
-    fn put_one(&mut self, value: &T::T) -> Result<()> {
-        let mut j = (hash_util::hash(value, 0) & self.mod_bitmask) as usize;
-        let mut index = self.hash_slots[j];
-
-        while index != HASH_SLOT_EMPTY && self.uniques[index as usize] != *value {
-            j += 1;
-            if j == self.hash_table_size {
-                j = 0;
-            }
-            index = self.hash_slots[j];
-        }
-
-        if index == HASH_SLOT_EMPTY {
-            index = self.insert_fresh_slot(j, value.clone());
-        }
-
-        self.buffered_indices.push(index);
-        Ok(())
-    }
-
-    #[inline(never)]
-    fn insert_fresh_slot(&mut self, slot: usize, value: T::T) -> i32 {
-        let index = self.uniques.size() as i32;
-        self.hash_slots[slot] = index;
-
-        let (base_size, num_elements) = value.dict_encoding_size();
-
-        let unique_size = match T::get_physical_type() {
-            Type::BYTE_ARRAY => base_size + num_elements,
-            Type::FIXED_LEN_BYTE_ARRAY => self.desc.type_length() as usize,
-            _ => base_size,
-        };
-
-        self.uniques_size_in_bytes += unique_size;
-        self.uniques.push(value);
-
-        if self.uniques.size() > (self.hash_table_size as f32 * MAX_HASH_LOAD) as usize {
-            self.double_table_size();
-        }
-
-        index
-    }
-
-    #[inline]
-    fn bit_width(&self) -> u8 {
-        let num_entries = self.uniques.size();
-        if num_entries == 0 {
-            0
-        } else if num_entries == 1 {
-            1
-        } else {
-            log2(num_entries as u64) as u8
-        }
-    }
-
-    fn double_table_size(&mut self) {
-        let new_size = self.hash_table_size * 2;
-        let mut new_hash_slots = Buffer::new().with_mem_tracker(self.mem_tracker.clone());
-        new_hash_slots.resize(new_size, HASH_SLOT_EMPTY);
-        for i in 0..self.hash_table_size {
-            let index = self.hash_slots[i];
-            if index == HASH_SLOT_EMPTY {
-                continue;
-            }
-            let value = &self.uniques[index as usize];
-            let mut j = (hash_util::hash(value, 0) & ((new_size - 1) as u32)) as usize;
-            let mut slot = new_hash_slots[j];
-            while slot != HASH_SLOT_EMPTY && self.uniques[slot as usize] != *value {
-                j += 1;
-                if j == new_size {
-                    j = 0;
-                }
-                slot = new_hash_slots[j];
-            }
-
-            new_hash_slots[j] = index;
-        }
-
-        self.hash_table_size = new_size;
-        self.mod_bitmask = (new_size - 1) as u32;
-        self.hash_slots = new_hash_slots;
-    }
-}
-
-impl<T: DataType> Encoder<T> for DictEncoder<T> {
-    #[inline]
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        for i in values {
-            self.put_one(&i)?
-        }
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::PLAIN_DICTIONARY
-    }
-
-    #[inline]
-    fn estimated_data_encoded_size(&self) -> usize {
-        let bit_width = self.bit_width();
-        1 + RleEncoder::min_buffer_size(bit_width)
-            + RleEncoder::max_buffer_size(bit_width, self.buffered_indices.size())
-    }
-
-    #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        self.write_indices()
-    }
-}
-
-// ----------------------------------------------------------------------
-// RLE encoding
-
-const DEFAULT_RLE_BUFFER_LEN: usize = 1024;
-
-/// RLE/Bit-Packing hybrid encoding for values.
-/// Currently is used only for data pages v2 and supports boolean types.
-pub struct RleValueEncoder<T: DataType> {
-    // Buffer with raw values that we collect,
-    // when flushing buffer they are encoded using RLE encoder
-    encoder: Option<RleEncoder>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> RleValueEncoder<T> {
-    /// Creates new rle value encoder.
-    pub fn new() -> Self {
-        Self {
-            encoder: None,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for RleValueEncoder<T> {
-    #[inline]
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        ensure_phys_ty!(Type::BOOLEAN, "RleValueEncoder only supports BoolType");
-
-        if self.encoder.is_none() {
-            self.encoder = Some(RleEncoder::new(1, DEFAULT_RLE_BUFFER_LEN));
-        }
-        let rle_encoder = self.encoder.as_mut().unwrap();
-        for value in values {
-            let value = value.as_u64()?;
-            if !rle_encoder.put(value)? {
-                return Err(general_err!("RLE buffer is full"));
-            }
-        }
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::RLE
-    }
-
-    #[inline]
-    fn estimated_data_encoded_size(&self) -> usize {
-        match self.encoder {
-            Some(ref enc) => enc.len(),
-            None => 0,
-        }
-    }
-
-    #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        ensure_phys_ty!(Type::BOOLEAN, "RleValueEncoder only supports BoolType");
-        let rle_encoder = self
-            .encoder
-            .as_mut()
-            .expect("RLE value encoder is not initialized");
-
-        // Flush all encoder buffers and raw values
-        let encoded_data = {
-            let buf = rle_encoder.flush_buffer()?;
-
-            // Note that buf does not have any offset, all data is encoded bytes
-            let len = (buf.len() as i32).to_le();
-            let len_bytes = len.as_bytes();
-            let mut encoded_data = vec![];
-            encoded_data.extend_from_slice(len_bytes);
-            encoded_data.extend_from_slice(buf);
-            encoded_data
-        };
-        // Reset rle encoder for the next batch
-        rle_encoder.clear();
-
-        Ok(ByteBufferPtr::new(encoded_data))
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BINARY_PACKED encoding
-
-const MAX_PAGE_HEADER_WRITER_SIZE: usize = 32;
-const MAX_BIT_WRITER_SIZE: usize = 10 * 1024 * 1024;
-const DEFAULT_BLOCK_SIZE: usize = 128;
-const DEFAULT_NUM_MINI_BLOCKS: usize = 4;
-
-/// Delta bit packed encoder.
-/// Consists of a header followed by blocks of delta encoded values binary packed.
-///
-/// Delta-binary-packing:
-/// ```shell
-///   [page-header] [block 1], [block 2], ... [block N]
-/// ```
-///
-/// Each page header consists of:
-/// ```shell
-///   [block size] [number of miniblocks in a block] [total value count] [first value]
-/// ```
-///
-/// Each block consists of:
-/// ```shell
-///   [min delta] [list of bitwidths of miniblocks] [miniblocks]
-/// ```
-///
-/// Current implementation writes values in `put` method, multiple calls to `put` to
-/// existing block or start new block if block size is exceeded. Calling `flush_buffer`
-/// writes out all data and resets internal state, including page header.
-///
-/// Supports only INT32 and INT64.
-pub struct DeltaBitPackEncoder<T: DataType> {
-    page_header_writer: BitWriter,
-    bit_writer: BitWriter,
-    total_values: usize,
-    first_value: i64,
-    current_value: i64,
-    block_size: usize,
-    mini_block_size: usize,
-    num_mini_blocks: usize,
-    values_in_block: usize,
-    deltas: Vec<i64>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaBitPackEncoder<T> {
-    /// Creates new delta bit packed encoder.
-    pub fn new() -> Self {
-        let block_size = DEFAULT_BLOCK_SIZE;
-        let num_mini_blocks = DEFAULT_NUM_MINI_BLOCKS;
-        let mini_block_size = block_size / num_mini_blocks;
-        assert!(mini_block_size % 8 == 0);
-        Self::assert_supported_type();
-
-        DeltaBitPackEncoder {
-            page_header_writer: BitWriter::new(MAX_PAGE_HEADER_WRITER_SIZE),
-            bit_writer: BitWriter::new(MAX_BIT_WRITER_SIZE),
-            total_values: 0,
-            first_value: 0,
-            current_value: 0, // current value to keep adding deltas
-            block_size,       // can write fewer values than block size for last block
-            mini_block_size,
-            num_mini_blocks,
-            values_in_block: 0, // will be at most block_size
-            deltas: vec![0; block_size],
-            _phantom: PhantomData,
-        }
-    }
-
-    /// Writes page header for blocks, this method is invoked when we are done encoding
-    /// values. It is also okay to encode when no values have been provided
-    fn write_page_header(&mut self) {
-        // We ignore the result of each 'put' operation, because
-        // MAX_PAGE_HEADER_WRITER_SIZE is chosen to fit all header values and
-        // guarantees that writes will not fail.
-
-        // Write the size of each block
-        self.page_header_writer.put_vlq_int(self.block_size as u64);
-        // Write the number of mini blocks
-        self.page_header_writer
-            .put_vlq_int(self.num_mini_blocks as u64);
-        // Write the number of all values (including non-encoded first value)
-        self.page_header_writer
-            .put_vlq_int(self.total_values as u64);
-        // Write first value
-        self.page_header_writer.put_zigzag_vlq_int(self.first_value);
-    }
-
-    // Write current delta buffer (<= 'block size' values) into bit writer
-    #[inline(never)]
-    fn flush_block_values(&mut self) -> Result<()> {
-        if self.values_in_block == 0 {
-            return Ok(());
-        }
-
-        let mut min_delta = i64::max_value();
-        for i in 0..self.values_in_block {
-            min_delta = cmp::min(min_delta, self.deltas[i]);
-        }
-
-        // Write min delta
-        self.bit_writer.put_zigzag_vlq_int(min_delta);
-
-        // Slice to store bit width for each mini block
-        let offset = self.bit_writer.skip(self.num_mini_blocks)?;
-
-        for i in 0..self.num_mini_blocks {
-            // Find how many values we need to encode - either block size or whatever
-            // values left
-            let n = cmp::min(self.mini_block_size, self.values_in_block);
-            if n == 0 {
-                break;
-            }
-
-            // Compute the max delta in current mini block
-            let mut max_delta = i64::min_value();
-            for j in 0..n {
-                max_delta =
-                    cmp::max(max_delta, self.deltas[i * self.mini_block_size + j]);
-            }
-
-            // Compute bit width to store (max_delta - min_delta)
-            let bit_width = num_required_bits(self.subtract_u64(max_delta, min_delta));
-            self.bit_writer.write_at(offset + i, bit_width as u8);
-
-            // Encode values in current mini block using min_delta and bit_width
-            for j in 0..n {
-                let packed_value = self
-                    .subtract_u64(self.deltas[i * self.mini_block_size + j], min_delta);
-                self.bit_writer.put_value(packed_value, bit_width);
-            }
-
-            // Pad the last block (n < mini_block_size)
-            for _ in n..self.mini_block_size {
-                self.bit_writer.put_value(0, bit_width);
-            }
-
-            self.values_in_block -= n;
-        }
-
-        assert!(
-            self.values_in_block == 0,
-            "Expected 0 values in block, found {}",
-            self.values_in_block
-        );
-        Ok(())
-    }
-}
-
-// Implementation is shared between Int32Type and Int64Type,
-// see `DeltaBitPackEncoderConversion` below for specifics.
-impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        // Define values to encode, initialize state
-        let mut idx = if self.total_values == 0 {
-            self.first_value = self.as_i64(values, 0);
-            self.current_value = self.first_value;
-            1
-        } else {
-            0
-        };
-        // Add all values (including first value)
-        self.total_values += values.len();
-
-        // Write block
-        while idx < values.len() {
-            let value = self.as_i64(values, idx);
-            self.deltas[self.values_in_block] = self.subtract(value, self.current_value);
-            self.current_value = value;
-            idx += 1;
-            self.values_in_block += 1;
-            if self.values_in_block == self.block_size {
-                self.flush_block_values()?;
-            }
-        }
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BINARY_PACKED
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.bit_writer.bytes_written()
-    }
-
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        // Write remaining values
-        self.flush_block_values()?;
-        // Write page header with total values
-        self.write_page_header();
-
-        let mut buffer = ByteBuffer::new();
-        buffer.write_all(self.page_header_writer.flush_buffer())?;
-        buffer.write_all(self.bit_writer.flush_buffer())?;
-        buffer.flush()?;
-
-        // Reset state
-        self.page_header_writer.clear();
-        self.bit_writer.clear();
-        self.total_values = 0;
-        self.first_value = 0;
-        self.current_value = 0;
-        self.values_in_block = 0;
-
-        Ok(buffer.consume())
-    }
-}
-
-/// Helper trait to define specific conversions and subtractions when computing deltas
-trait DeltaBitPackEncoderConversion<T: DataType> {
-    // Method should panic if type is not supported, otherwise no-op
-    fn assert_supported_type();
-
-    fn as_i64(&self, values: &[T::T], index: usize) -> i64;
-
-    fn subtract(&self, left: i64, right: i64) -> i64;
-
-    fn subtract_u64(&self, left: i64, right: i64) -> u64;
-}
-
-impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
-    #[inline]
-    fn assert_supported_type() {
-        ensure_phys_ty!(
-            Type::INT32 | Type::INT64,
-            "DeltaBitPackDecoder only supports Int32Type and Int64Type"
-        );
-    }
-
-    #[inline]
-    fn as_i64(&self, values: &[T::T], index: usize) -> i64 {
-        values[index]
-            .as_i64()
-            .expect("DeltaBitPackDecoder only supports Int32Type and Int64Type")
-    }
-
-    #[inline]
-    fn subtract(&self, left: i64, right: i64) -> i64 {
-        // It is okay for values to overflow, wrapping_sub wrapping around at the boundary
-        match T::get_physical_type() {
-            Type::INT32 => (left as i32).wrapping_sub(right as i32) as i64,
-            Type::INT64 => left.wrapping_sub(right),
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
-        }
-    }
-
-    #[inline]
-    fn subtract_u64(&self, left: i64, right: i64) -> u64 {
-        match T::get_physical_type() {
-            // Conversion of i32 -> u32 -> u64 is to avoid non-zero left most bytes in int repr
-            Type::INT32 => (left as i32).wrapping_sub(right as i32) as u32 as u64,
-            Type::INT64 => left.wrapping_sub(right) as u64,
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY encoding
-
-/// Encoding for byte arrays to separate the length values and the data.
-/// The lengths are encoded using DELTA_BINARY_PACKED encoding, data is
-/// stored as raw bytes.
-pub struct DeltaLengthByteArrayEncoder<T: DataType> {
-    // length encoder
-    len_encoder: DeltaBitPackEncoder<Int32Type>,
-    // byte array data
-    data: Vec<ByteArray>,
-    // data size in bytes of encoded values
-    encoded_size: usize,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaLengthByteArrayEncoder<T> {
-    /// Creates new delta length byte array encoder.
-    pub fn new() -> Self {
-        Self {
-            len_encoder: DeltaBitPackEncoder::new(),
-            data: vec![],
-            encoded_size: 0,
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for DeltaLengthByteArrayEncoder<T> {
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        ensure_phys_ty!(
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY,
-            "DeltaLengthByteArrayEncoder only supports ByteArrayType"
-        );
-
-        let val_it = || {
-            values
-                .iter()
-                .map(|x| x.as_any().downcast_ref::<ByteArray>().unwrap())
-        };
-
-        let lengths: Vec<i32> =
-            val_it().map(|byte_array| byte_array.len() as i32).collect();
-        self.len_encoder.put(&lengths)?;
-        for byte_array in val_it() {
-            self.encoded_size += byte_array.len();
-            self.data.push(byte_array.clone());
-        }
-
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_LENGTH_BYTE_ARRAY
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.len_encoder.estimated_data_encoded_size() + self.encoded_size
-    }
-
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        ensure_phys_ty!(
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY,
-            "DeltaLengthByteArrayEncoder only supports ByteArrayType"
-        );
-
-        let mut total_bytes = vec![];
-        let lengths = self.len_encoder.flush_buffer()?;
-        total_bytes.extend_from_slice(lengths.data());
-        self.data.iter().for_each(|byte_array| {
-            total_bytes.extend_from_slice(byte_array.data());
-        });
-        self.data.clear();
-        self.encoded_size = 0;
-
-        Ok(ByteBufferPtr::new(total_bytes))
-    }
-}
-
-// ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY encoding
-
-/// Encoding for byte arrays, prefix lengths are encoded using DELTA_BINARY_PACKED
-/// encoding, followed by suffixes with DELTA_LENGTH_BYTE_ARRAY encoding.
-pub struct DeltaByteArrayEncoder<T: DataType> {
-    prefix_len_encoder: DeltaBitPackEncoder<Int32Type>,
-    suffix_writer: DeltaLengthByteArrayEncoder<ByteArrayType>,
-    previous: Vec<u8>,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: DataType> DeltaByteArrayEncoder<T> {
-    /// Creates new delta byte array encoder.
-    pub fn new() -> Self {
-        Self {
-            prefix_len_encoder: DeltaBitPackEncoder::new(),
-            suffix_writer: DeltaLengthByteArrayEncoder::new(),
-            previous: vec![],
-            _phantom: PhantomData,
-        }
-    }
-}
-
-impl<T: DataType> Encoder<T> for DeltaByteArrayEncoder<T> {
-    fn put(&mut self, values: &[T::T]) -> Result<()> {
-        let mut prefix_lengths: Vec<i32> = vec![];
-        let mut suffixes: Vec<ByteArray> = vec![];
-
-        let values = values.iter()
-            .map(|x| x.as_any())
-            .map(|x| match T::get_physical_type() {
-                Type::BYTE_ARRAY => x.downcast_ref::<ByteArray>().unwrap(),
-                Type::FIXED_LEN_BYTE_ARRAY => x.downcast_ref::<FixedLenByteArray>().unwrap(),
-                _ => panic!(
-                    "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
-                )
-            });
-
-        for byte_array in values {
-            let current = byte_array.data();
-            // Maximum prefix length that is shared between previous value and current
-            // value
-            let prefix_len = cmp::min(self.previous.len(), current.len());
-            let mut match_len = 0;
-            while match_len < prefix_len && self.previous[match_len] == current[match_len]
-            {
-                match_len += 1;
-            }
-            prefix_lengths.push(match_len as i32);
-            suffixes.push(byte_array.slice(match_len, byte_array.len() - match_len));
-            // Update previous for the next prefix
-            self.previous.clear();
-            self.previous.extend_from_slice(current);
-        }
-        self.prefix_len_encoder.put(&prefix_lengths)?;
-        self.suffix_writer.put(&suffixes)?;
-
-        Ok(())
-    }
-
-    // Performance Note:
-    // As far as can be seen these functions are rarely called and as such we can hint to the
-    // compiler that they dont need to be folded into hot locations in the final output.
-    #[cold]
-    fn encoding(&self) -> Encoding {
-        Encoding::DELTA_BYTE_ARRAY
-    }
-
-    fn estimated_data_encoded_size(&self) -> usize {
-        self.prefix_len_encoder.estimated_data_encoded_size()
-            + self.suffix_writer.estimated_data_encoded_size()
-    }
-
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
-        match T::get_physical_type() {
-            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
-                // TODO: investigate if we can merge lengths and suffixes
-                // without copying data into new vector.
-                let mut total_bytes = vec![];
-                // Insert lengths ...
-                let lengths = self.prefix_len_encoder.flush_buffer()?;
-                total_bytes.extend_from_slice(lengths.data());
-                // ... followed by suffixes
-                let suffixes = self.suffix_writer.flush_buffer()?;
-                total_bytes.extend_from_slice(suffixes.data());
-
-                self.previous.clear();
-                Ok(ByteBufferPtr::new(total_bytes))
-            }
-            _ => panic!(
-                "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
-            )
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder};
-    use crate::schema::types::{
-        ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
-    };
-    use crate::util::{
-        memory::MemTracker,
-        test_common::{random_bytes, RandGen},
-    };
-
-    const TEST_SET_SIZE: usize = 1024;
-
-    #[test]
-    fn test_get_encoders() {
-        // supported encodings
-        create_and_check_encoder::<Int32Type>(Encoding::PLAIN, None);
-        create_and_check_encoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
-        create_and_check_encoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
-        create_and_check_encoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
-        create_and_check_encoder::<BoolType>(Encoding::RLE, None);
-
-        // error when initializing
-        create_and_check_encoder::<Int32Type>(
-            Encoding::RLE_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-        create_and_check_encoder::<Int32Type>(
-            Encoding::PLAIN_DICTIONARY,
-            Some(general_err!(
-                "Cannot initialize this encoding through this function"
-            )),
-        );
-
-        // unsupported
-        create_and_check_encoder::<Int32Type>(
-            Encoding::BIT_PACKED,
-            Some(nyi_err!("Encoding BIT_PACKED is not supported")),
-        );
-    }
-
-    #[test]
-    fn test_bool() {
-        BoolType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        BoolType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        BoolType::test(Encoding::RLE, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_i32() {
-        Int32Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        Int32Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        Int32Type::test(Encoding::DELTA_BINARY_PACKED, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_i64() {
-        Int64Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        Int64Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        Int64Type::test(Encoding::DELTA_BINARY_PACKED, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_i96() {
-        Int96Type::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        Int96Type::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_float() {
-        FloatType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        FloatType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_double() {
-        DoubleType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        DoubleType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_byte_array() {
-        ByteArrayType::test(Encoding::PLAIN, TEST_SET_SIZE, -1);
-        ByteArrayType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, -1);
-        ByteArrayType::test(Encoding::DELTA_LENGTH_BYTE_ARRAY, TEST_SET_SIZE, -1);
-        ByteArrayType::test(Encoding::DELTA_BYTE_ARRAY, TEST_SET_SIZE, -1);
-    }
-
-    #[test]
-    fn test_fixed_lenbyte_array() {
-        FixedLenByteArrayType::test(Encoding::PLAIN, TEST_SET_SIZE, 100);
-        FixedLenByteArrayType::test(Encoding::PLAIN_DICTIONARY, TEST_SET_SIZE, 100);
-        FixedLenByteArrayType::test(Encoding::DELTA_BYTE_ARRAY, TEST_SET_SIZE, 100);
-    }
-
-    #[test]
-    fn test_dict_encoded_size() {
-        fn run_test<T: DataType>(
-            type_length: i32,
-            values: &[T::T],
-            expected_size: usize,
-        ) {
-            let mut encoder = create_test_dict_encoder::<T>(type_length);
-            assert_eq!(encoder.dict_encoded_size(), 0);
-            encoder.put(values).unwrap();
-            assert_eq!(encoder.dict_encoded_size(), expected_size);
-            // We do not reset encoded size of the dictionary keys after flush_buffer
-            encoder.flush_buffer().unwrap();
-            assert_eq!(encoder.dict_encoded_size(), expected_size);
-        }
-
-        // Only 2 variations of values 1 byte each
-        run_test::<BoolType>(-1, &[true, false, true, false, true], 2);
-        run_test::<Int32Type>(-1, &[1i32, 2i32, 3i32, 4i32, 5i32], 20);
-        run_test::<Int64Type>(-1, &[1i64, 2i64, 3i64, 4i64, 5i64], 40);
-        run_test::<FloatType>(-1, &[1f32, 2f32, 3f32, 4f32, 5f32], 20);
-        run_test::<DoubleType>(-1, &[1f64, 2f64, 3f64, 4f64, 5f64], 40);
-        // Int96: len + reference
-        run_test::<Int96Type>(
-            -1,
-            &[Int96::from(vec![1, 2, 3]), Int96::from(vec![2, 3, 4])],
-            32,
-        );
-        run_test::<ByteArrayType>(
-            -1,
-            &[ByteArray::from("abcd"), ByteArray::from("efj")],
-            15,
-        );
-        run_test::<FixedLenByteArrayType>(
-            2,
-            &[ByteArray::from("ab").into(), ByteArray::from("bc").into()],
-            4,
-        );
-    }
-
-    #[test]
-    fn test_estimated_data_encoded_size() {
-        fn run_test<T: DataType>(
-            encoding: Encoding,
-            type_length: i32,
-            values: &[T::T],
-            initial_size: usize,
-            max_size: usize,
-            flush_size: usize,
-        ) {
-            let mut encoder = match encoding {
-                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
-                    Box::new(create_test_dict_encoder::<T>(type_length))
-                }
-                _ => create_test_encoder::<T>(type_length, encoding),
-            };
-            assert_eq!(encoder.estimated_data_encoded_size(), initial_size);
-
-            encoder.put(values).unwrap();
-            assert_eq!(encoder.estimated_data_encoded_size(), max_size);
-
-            encoder.flush_buffer().unwrap();
-            assert_eq!(encoder.estimated_data_encoded_size(), flush_size);
-        }
-
-        // PLAIN
-        run_test::<Int32Type>(Encoding::PLAIN, -1, &[123; 1024], 0, 4096, 0);
-
-        // DICTIONARY
-        // NOTE: The final size is almost the same because the dictionary entries are
-        // preserved after encoded values have been written.
-        run_test::<Int32Type>(Encoding::RLE_DICTIONARY, -1, &[123, 1024], 11, 68, 66);
-
-        // DELTA_BINARY_PACKED
-        run_test::<Int32Type>(Encoding::DELTA_BINARY_PACKED, -1, &[123; 1024], 0, 35, 0);
-
-        // RLE
-        let mut values = vec![];
-        values.extend_from_slice(&[true; 16]);
-        values.extend_from_slice(&[false; 16]);
-        run_test::<BoolType>(Encoding::RLE, -1, &values, 0, 2, 0);
-
-        // DELTA_LENGTH_BYTE_ARRAY
-        run_test::<ByteArrayType>(
-            Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            -1,
-            &[ByteArray::from("ab"), ByteArray::from("abc")],
-            0,
-            5, // only value bytes, length encoder is not flushed yet
-            0,
-        );
-
-        // DELTA_BYTE_ARRAY
-        run_test::<ByteArrayType>(
-            Encoding::DELTA_BYTE_ARRAY,
-            -1,
-            &[ByteArray::from("ab"), ByteArray::from("abc")],
-            0,
-            3, // only suffix bytes, length encoder is not flushed yet
-            0,
-        );
-    }
-
-    // See: https://github.com/sunchao/parquet-rs/issues/47
-    #[test]
-    fn test_issue_47() {
-        let mut encoder =
-            create_test_encoder::<ByteArrayType>(0, Encoding::DELTA_BYTE_ARRAY);
-        let mut decoder =
-            create_test_decoder::<ByteArrayType>(0, Encoding::DELTA_BYTE_ARRAY);
-
-        let mut input = vec![];
-        input.push(ByteArray::from("aa"));
-        input.push(ByteArray::from("aaa"));
-        input.push(ByteArray::from("aa"));
-        input.push(ByteArray::from("aaa"));
-        let mut output = vec![ByteArray::default(); input.len()];
-
-        let mut result =
-            put_and_get(&mut encoder, &mut decoder, &input[..2], &mut output[..2]);
-        assert!(
-            result.is_ok(),
-            "first put_and_get() failed with: {}",
-            result.unwrap_err()
-        );
-        result = put_and_get(&mut encoder, &mut decoder, &input[2..], &mut output[2..]);
-        assert!(
-            result.is_ok(),
-            "second put_and_get() failed with: {}",
-            result.unwrap_err()
-        );
-        assert_eq!(output, input);
-    }
-
-    trait EncodingTester<T: DataType> {
-        fn test(enc: Encoding, total: usize, type_length: i32) {
-            let result = match enc {
-                Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
-                    Self::test_dict_internal(total, type_length)
-                }
-                enc => Self::test_internal(enc, total, type_length),
-            };
-
-            assert!(
-                result.is_ok(),
-                "Expected result to be OK but got err:\n {}",
-                result.unwrap_err()
-            );
-        }
-
-        fn test_internal(enc: Encoding, total: usize, type_length: i32) -> Result<()>;
-
-        fn test_dict_internal(total: usize, type_length: i32) -> Result<()>;
-    }
-
-    impl<T: DataType + RandGen<T>> EncodingTester<T> for T {
-        fn test_internal(enc: Encoding, total: usize, type_length: i32) -> Result<()> {
-            let mut encoder = create_test_encoder::<T>(type_length, enc);
-            let mut decoder = create_test_decoder::<T>(type_length, enc);
-            let mut values = <T as RandGen<T>>::gen_vec(type_length, total);
-            let mut result_data = vec![T::T::default(); total];
-
-            // Test put/get spaced.
-            let num_bytes = bit_util::ceil(total as i64, 8);
-            let valid_bits = random_bytes(num_bytes as usize);
-            let values_written = encoder.put_spaced(&values[..], &valid_bits[..])?;
-            let data = encoder.flush_buffer()?;
-            decoder.set_data(data, values_written)?;
-            let _ = decoder.get_spaced(
-                &mut result_data[..],
-                values.len() - values_written,
-                &valid_bits[..],
-            )?;
-
-            // Check equality
-            for i in 0..total {
-                if bit_util::get_bit(&valid_bits[..], i) {
-                    assert_eq!(result_data[i], values[i]);
-                } else {
-                    assert_eq!(result_data[i], T::T::default());
-                }
-            }
-
-            let mut actual_total = put_and_get(
-                &mut encoder,
-                &mut decoder,
-                &values[..],
-                &mut result_data[..],
-            )?;
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            // Encode more data after flush and test with decoder
-
-            values = <T as RandGen<T>>::gen_vec(type_length, total);
-            actual_total = put_and_get(
-                &mut encoder,
-                &mut decoder,
-                &values[..],
-                &mut result_data[..],
-            )?;
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            Ok(())
-        }
-
-        fn test_dict_internal(total: usize, type_length: i32) -> Result<()> {
-            let mut encoder = create_test_dict_encoder::<T>(type_length);
-            let mut values = <T as RandGen<T>>::gen_vec(type_length, total);
-            encoder.put(&values[..])?;
-
-            let mut data = encoder.flush_buffer()?;
-            let mut decoder = create_test_dict_decoder::<T>();
-            let mut dict_decoder = PlainDecoder::<T>::new(type_length);
-            dict_decoder.set_data(encoder.write_dict()?, encoder.num_entries())?;
-            decoder.set_dict(Box::new(dict_decoder))?;
-            let mut result_data = vec![T::T::default(); total];
-            decoder.set_data(data, total)?;
-            let mut actual_total = decoder.get(&mut result_data)?;
-
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            // Encode more data after flush and test with decoder
-
-            values = <T as RandGen<T>>::gen_vec(type_length, total);
-            encoder.put(&values[..])?;
-            data = encoder.flush_buffer()?;
-
-            let mut dict_decoder = PlainDecoder::<T>::new(type_length);
-            dict_decoder.set_data(encoder.write_dict()?, encoder.num_entries())?;
-            decoder.set_dict(Box::new(dict_decoder))?;
-            decoder.set_data(data, total)?;
-            actual_total = decoder.get(&mut result_data)?;
-
-            assert_eq!(actual_total, total);
-            assert_eq!(result_data, values);
-
-            Ok(())
-        }
-    }
-
-    fn put_and_get<T: DataType>(
-        encoder: &mut Box<dyn Encoder<T>>,
-        decoder: &mut Box<dyn Decoder<T>>,
-        input: &[T::T],
-        output: &mut [T::T],
-    ) -> Result<usize> {
-        encoder.put(input)?;
-        let data = encoder.flush_buffer()?;
-        decoder.set_data(data, input.len())?;
-        decoder.get(output)
-    }
-
-    fn create_and_check_encoder<T: DataType>(
-        encoding: Encoding,
-        err: Option<ParquetError>,
-    ) {
-        let descr = create_test_col_desc_ptr(-1, T::get_physical_type());
-        let mem_tracker = Arc::new(MemTracker::new());
-        let encoder = get_encoder::<T>(descr, encoding, mem_tracker);
-        match err {
-            Some(parquet_error) => {
-                assert!(encoder.is_err());
-                assert_eq!(encoder.err().unwrap(), parquet_error);
-            }
-            None => {
-                assert!(encoder.is_ok());
-                assert_eq!(encoder.unwrap().encoding(), encoding);
-            }
-        }
-    }
-
-    // Creates test column descriptor.
-    fn create_test_col_desc_ptr(type_len: i32, t: Type) -> ColumnDescPtr {
-        let ty = SchemaType::primitive_type_builder("t", t)
-            .with_length(type_len)
-            .build()
-            .unwrap();
-        Arc::new(ColumnDescriptor::new(
-            Arc::new(ty),
-            0,
-            0,
-            ColumnPath::new(vec![]),
-        ))
-    }
-
-    fn create_test_encoder<T: DataType>(
-        type_len: i32,
-        enc: Encoding,
-    ) -> Box<dyn Encoder<T>> {
-        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
-        let mem_tracker = Arc::new(MemTracker::new());
-        get_encoder(desc, enc, mem_tracker).unwrap()
-    }
-
-    fn create_test_decoder<T: DataType>(
-        type_len: i32,
-        enc: Encoding,
-    ) -> Box<dyn Decoder<T>> {
-        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
-        get_decoder(desc, enc).unwrap()
-    }
-
-    fn create_test_dict_encoder<T: DataType>(type_len: i32) -> DictEncoder<T> {
-        let desc = create_test_col_desc_ptr(type_len, T::get_physical_type());
-        let mem_tracker = Arc::new(MemTracker::new());
-        DictEncoder::<T>::new(desc, mem_tracker)
-    }
-
-    fn create_test_dict_decoder<T: DataType>() -> DictDecoder<T> {
-        DictDecoder::<T>::new()
-    }
-}
diff --git a/rust/parquet/src/encodings/levels.rs b/rust/parquet/src/encodings/levels.rs
deleted file mode 100644
index 6727589f17e..00000000000
--- a/rust/parquet/src/encodings/levels.rs
+++ /dev/null
@@ -1,563 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cmp, mem};
-
-use super::rle::{RleDecoder, RleEncoder};
-
-use crate::basic::Encoding;
-use crate::data_type::AsBytes;
-use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{ceil, log2, BitReader, BitWriter},
-    memory::ByteBufferPtr,
-};
-
-/// Computes max buffer size for level encoder/decoder based on encoding, max
-/// repetition/definition level and number of total buffered values (includes null
-/// values).
-#[inline]
-pub fn max_buffer_size(
-    encoding: Encoding,
-    max_level: i16,
-    num_buffered_values: usize,
-) -> usize {
-    let bit_width = log2(max_level as u64 + 1) as u8;
-    match encoding {
-        Encoding::RLE => {
-            RleEncoder::max_buffer_size(bit_width, num_buffered_values)
-                + RleEncoder::min_buffer_size(bit_width)
-        }
-        Encoding::BIT_PACKED => {
-            ceil((num_buffered_values * bit_width as usize) as i64, 8) as usize
-        }
-        _ => panic!("Unsupported encoding type {}", encoding),
-    }
-}
-
-/// Encoder for definition/repetition levels.
-/// Currently only supports RLE and BIT_PACKED (dev/null) encoding, including v2.
-pub enum LevelEncoder {
-    RLE(RleEncoder),
-    RLE_V2(RleEncoder),
-    BIT_PACKED(u8, BitWriter),
-}
-
-impl LevelEncoder {
-    /// Creates new level encoder based on encoding, max level and underlying byte buffer.
-    /// For bit packed encoding it is assumed that buffer is already allocated with
-    /// `levels::max_buffer_size` method.
-    ///
-    /// Used to encode levels for Data Page v1.
-    ///
-    /// Panics, if encoding is not supported.
-    pub fn v1(encoding: Encoding, max_level: i16, byte_buffer: Vec<u8>) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        match encoding {
-            Encoding::RLE => LevelEncoder::RLE(RleEncoder::new_from_buf(
-                bit_width,
-                byte_buffer,
-                mem::size_of::<i32>(),
-            )),
-            Encoding::BIT_PACKED => {
-                // Here we set full byte buffer without adjusting for num_buffered_values,
-                // because byte buffer will already be allocated with size from
-                // `max_buffer_size()` method.
-                LevelEncoder::BIT_PACKED(
-                    bit_width,
-                    BitWriter::new_from_buf(byte_buffer, 0),
-                )
-            }
-            _ => panic!("Unsupported encoding type {}", encoding),
-        }
-    }
-
-    /// Creates new level encoder based on RLE encoding. Used to encode Data Page v2
-    /// repetition and definition levels.
-    pub fn v2(max_level: i16, byte_buffer: Vec<u8>) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        LevelEncoder::RLE_V2(RleEncoder::new_from_buf(bit_width, byte_buffer, 0))
-    }
-
-    /// Put/encode levels vector into this level encoder.
-    /// Returns number of encoded values that are less than or equal to length of the
-    /// input buffer.
-    ///
-    /// RLE and BIT_PACKED level encoders return Err() when internal buffer overflows or
-    /// flush fails.
-    #[inline]
-    pub fn put(&mut self, buffer: &[i16]) -> Result<usize> {
-        let mut num_encoded = 0;
-        match *self {
-            LevelEncoder::RLE(ref mut encoder)
-            | LevelEncoder::RLE_V2(ref mut encoder) => {
-                for value in buffer {
-                    if !encoder.put(*value as u64)? {
-                        return Err(general_err!("RLE buffer is full"));
-                    }
-                    num_encoded += 1;
-                }
-                encoder.flush()?;
-            }
-            LevelEncoder::BIT_PACKED(bit_width, ref mut encoder) => {
-                for value in buffer {
-                    if !encoder.put_value(*value as u64, bit_width as usize) {
-                        return Err(general_err!("Not enough bytes left"));
-                    }
-                    num_encoded += 1;
-                }
-                encoder.flush();
-            }
-        }
-        Ok(num_encoded)
-    }
-
-    /// Finalizes level encoder, flush all intermediate buffers and return resulting
-    /// encoded buffer. Returned buffer is already truncated to encoded bytes only.
-    #[inline]
-    pub fn consume(self) -> Result<Vec<u8>> {
-        match self {
-            LevelEncoder::RLE(encoder) => {
-                let mut encoded_data = encoder.consume()?;
-                // Account for the buffer offset
-                let encoded_len = encoded_data.len() - mem::size_of::<i32>();
-                let len = (encoded_len as i32).to_le();
-                let len_bytes = len.as_bytes();
-                encoded_data[0..len_bytes.len()].copy_from_slice(len_bytes);
-                Ok(encoded_data)
-            }
-            LevelEncoder::RLE_V2(encoder) => encoder.consume(),
-            LevelEncoder::BIT_PACKED(_, encoder) => Ok(encoder.consume()),
-        }
-    }
-}
-
-/// Decoder for definition/repetition levels.
-/// Currently only supports RLE and BIT_PACKED encoding for Data Page v1 and
-/// RLE for Data Page v2.
-pub enum LevelDecoder {
-    RLE(Option<usize>, RleDecoder),
-    RLE_V2(Option<usize>, RleDecoder),
-    BIT_PACKED(Option<usize>, u8, BitReader),
-}
-
-impl LevelDecoder {
-    /// Creates new level decoder based on encoding and max definition/repetition level.
-    /// This method only initializes level decoder, `set_data` method must be called
-    /// before reading any value.
-    ///
-    /// Used to encode levels for Data Page v1.
-    ///
-    /// Panics if encoding is not supported
-    pub fn v1(encoding: Encoding, max_level: i16) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        match encoding {
-            Encoding::RLE => LevelDecoder::RLE(None, RleDecoder::new(bit_width)),
-            Encoding::BIT_PACKED => {
-                LevelDecoder::BIT_PACKED(None, bit_width, BitReader::from(Vec::new()))
-            }
-            _ => panic!("Unsupported encoding type {}", encoding),
-        }
-    }
-
-    /// Creates new level decoder based on RLE encoding.
-    /// Used to decode Data Page v2 repetition and definition levels.
-    ///
-    /// To set data for this decoder, use `set_data_range` method.
-    pub fn v2(max_level: i16) -> Self {
-        let bit_width = log2(max_level as u64 + 1) as u8;
-        LevelDecoder::RLE_V2(None, RleDecoder::new(bit_width))
-    }
-
-    /// Sets data for this level decoder, and returns total number of bytes set.
-    /// This is used for Data Page v1 levels.
-    ///
-    /// `data` is encoded data as byte buffer, `num_buffered_values` represents total
-    /// number of values that is expected.
-    ///
-    /// Both RLE and BIT_PACKED level decoders set `num_buffered_values` as total number
-    /// of values that they can return and track num values.
-    #[inline]
-    pub fn set_data(&mut self, num_buffered_values: usize, data: ByteBufferPtr) -> usize {
-        match *self {
-            LevelDecoder::RLE(ref mut num_values, ref mut decoder) => {
-                *num_values = Some(num_buffered_values);
-                let i32_size = mem::size_of::<i32>();
-                let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
-                decoder.set_data(data.range(i32_size, data_size));
-                i32_size + data_size
-            }
-            LevelDecoder::BIT_PACKED(ref mut num_values, bit_width, ref mut decoder) => {
-                *num_values = Some(num_buffered_values);
-                // Set appropriate number of bytes: if max size is larger than buffer -
-                // set full buffer
-                let num_bytes =
-                    ceil((num_buffered_values * bit_width as usize) as i64, 8);
-                let data_size = cmp::min(num_bytes as usize, data.len());
-                decoder.reset(data.range(data.start(), data_size));
-                data_size
-            }
-            _ => panic!(),
-        }
-    }
-
-    /// Sets byte array explicitly when start position `start` and length `len` are known
-    /// in advance. Only supported by RLE level decoder and used for Data Page v2 levels.
-    /// Returns number of total bytes set for this decoder (len).
-    #[inline]
-    pub fn set_data_range(
-        &mut self,
-        num_buffered_values: usize,
-        data: &ByteBufferPtr,
-        start: usize,
-        len: usize,
-    ) -> usize {
-        match *self {
-            LevelDecoder::RLE_V2(ref mut num_values, ref mut decoder) => {
-                decoder.set_data(data.range(start, len));
-                *num_values = Some(num_buffered_values);
-                len
-            }
-            _ => panic!(
-                "set_data_range() method is only supported by RLE v2 encoding type"
-            ),
-        }
-    }
-
-    /// Returns true if data is set for decoder, false otherwise.
-    #[inline]
-    pub fn is_data_set(&self) -> bool {
-        match self {
-            LevelDecoder::RLE(ref num_values, _) => num_values.is_some(),
-            LevelDecoder::RLE_V2(ref num_values, _) => num_values.is_some(),
-            LevelDecoder::BIT_PACKED(ref num_values, ..) => num_values.is_some(),
-        }
-    }
-
-    /// Decodes values and puts them into `buffer`.
-    /// Returns number of values that were successfully decoded (less than or equal to
-    /// buffer length).
-    #[inline]
-    pub fn get(&mut self, buffer: &mut [i16]) -> Result<usize> {
-        assert!(self.is_data_set(), "No data set for decoding");
-        match *self {
-            LevelDecoder::RLE(ref mut num_values, ref mut decoder)
-            | LevelDecoder::RLE_V2(ref mut num_values, ref mut decoder) => {
-                // Max length we can read
-                let len = cmp::min(num_values.unwrap(), buffer.len());
-                let values_read = decoder.get_batch::<i16>(&mut buffer[0..len])?;
-                *num_values = num_values.map(|len| len - values_read);
-                Ok(values_read)
-            }
-            LevelDecoder::BIT_PACKED(ref mut num_values, bit_width, ref mut decoder) => {
-                // When extracting values from bit reader, it might return more values
-                // than left because of padding to a full byte, we use
-                // num_values to track precise number of values.
-                let len = cmp::min(num_values.unwrap(), buffer.len());
-                let values_read =
-                    decoder.get_batch::<i16>(&mut buffer[..len], bit_width as usize);
-                *num_values = num_values.map(|len| len - values_read);
-                Ok(values_read)
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::util::test_common::random_numbers_range;
-
-    fn test_internal_roundtrip(enc: Encoding, levels: &[i16], max_level: i16, v2: bool) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        encoder.put(&levels).expect("put() should be OK");
-        let encoded_levels = encoder.consume().expect("consume() should be OK");
-
-        let byte_buf = ByteBufferPtr::new(encoded_levels);
-        let mut decoder;
-        if v2 {
-            decoder = LevelDecoder::v2(max_level);
-            decoder.set_data_range(levels.len(), &byte_buf, 0, byte_buf.len());
-        } else {
-            decoder = LevelDecoder::v1(enc, max_level);
-            decoder.set_data(levels.len(), byte_buf);
-        };
-
-        let mut buffer = vec![0; levels.len()];
-        let num_decoded = decoder.get(&mut buffer).expect("get() should be OK");
-        assert_eq!(num_decoded, levels.len());
-        assert_eq!(buffer, levels);
-    }
-
-    // Performs incremental read until all bytes are read
-    fn test_internal_roundtrip_incremental(
-        enc: Encoding,
-        levels: &[i16],
-        max_level: i16,
-        v2: bool,
-    ) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        encoder.put(&levels).expect("put() should be OK");
-        let encoded_levels = encoder.consume().expect("consume() should be OK");
-
-        let byte_buf = ByteBufferPtr::new(encoded_levels);
-        let mut decoder;
-        if v2 {
-            decoder = LevelDecoder::v2(max_level);
-            decoder.set_data_range(levels.len(), &byte_buf, 0, byte_buf.len());
-        } else {
-            decoder = LevelDecoder::v1(enc, max_level);
-            decoder.set_data(levels.len(), byte_buf);
-        }
-
-        let mut buffer = vec![0; levels.len() * 2];
-        let mut total_decoded = 0;
-        let mut safe_stop = levels.len() * 2; // still terminate in case of issues in the code
-        while safe_stop > 0 {
-            safe_stop -= 1;
-            let num_decoded = decoder
-                .get(&mut buffer[total_decoded..total_decoded + 1])
-                .expect("get() should be OK");
-            if num_decoded == 0 {
-                break;
-            }
-            total_decoded += num_decoded;
-        }
-        assert!(
-            safe_stop > 0,
-            "Failed to read values incrementally, reached safe stop"
-        );
-        assert_eq!(total_decoded, levels.len());
-        assert_eq!(&buffer[0..levels.len()], levels);
-    }
-
-    // Tests encoding/decoding of values when output buffer is larger than number of
-    // encoded values
-    fn test_internal_roundtrip_underflow(
-        enc: Encoding,
-        levels: &[i16],
-        max_level: i16,
-        v2: bool,
-    ) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        // Encode only one value
-        let num_encoded = encoder.put(&levels[0..1]).expect("put() should be OK");
-        let encoded_levels = encoder.consume().expect("consume() should be OK");
-        assert_eq!(num_encoded, 1);
-
-        let byte_buf = ByteBufferPtr::new(encoded_levels);
-        let mut decoder;
-        // Set one encoded value as `num_buffered_values`
-        if v2 {
-            decoder = LevelDecoder::v2(max_level);
-            decoder.set_data_range(1, &byte_buf, 0, byte_buf.len());
-        } else {
-            decoder = LevelDecoder::v1(enc, max_level);
-            decoder.set_data(1, byte_buf);
-        }
-
-        let mut buffer = vec![0; levels.len()];
-        let num_decoded = decoder.get(&mut buffer).expect("get() should be OK");
-        assert_eq!(num_decoded, num_encoded);
-        assert_eq!(buffer[0..num_decoded], levels[0..num_decoded]);
-    }
-
-    // Tests when encoded values are larger than encoder's buffer
-    fn test_internal_roundtrip_overflow(
-        enc: Encoding,
-        levels: &[i16],
-        max_level: i16,
-        v2: bool,
-    ) {
-        let size = max_buffer_size(enc, max_level, levels.len());
-        let mut encoder = if v2 {
-            LevelEncoder::v2(max_level, vec![0; size])
-        } else {
-            LevelEncoder::v1(enc, max_level, vec![0; size])
-        };
-        let mut found_err = false;
-        // Insert a large number of values, so we run out of space
-        for _ in 0..100 {
-            if let Err(err) = encoder.put(&levels) {
-                assert!(format!("{}", err).contains("Not enough bytes left"));
-                found_err = true;
-                break;
-            };
-        }
-        if !found_err {
-            panic!("Failed test: no buffer overflow");
-        }
-    }
-
-    #[test]
-    fn test_roundtrip_one() {
-        let levels = vec![0, 1, 1, 1, 1, 0, 0, 0, 0, 1];
-        let max_level = 1;
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip() {
-        let levels = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let max_level = 10;
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_incremental() {
-        let levels = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let max_level = 10;
-        test_internal_roundtrip_incremental(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip_incremental(
-            Encoding::BIT_PACKED,
-            &levels,
-            max_level,
-            false,
-        );
-        test_internal_roundtrip_incremental(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_all_zeros() {
-        let levels = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let max_level = 1;
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_random() {
-        // This test is mainly for bit packed level encoder/decoder
-        let mut levels = Vec::new();
-        let max_level = 5;
-        random_numbers_range::<i16>(120, 0, max_level, &mut levels);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_underflow() {
-        let levels = vec![1, 1, 2, 3, 2, 1, 1, 2, 3, 1];
-        let max_level = 3;
-        test_internal_roundtrip_underflow(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip_underflow(
-            Encoding::BIT_PACKED,
-            &levels,
-            max_level,
-            false,
-        );
-        test_internal_roundtrip_underflow(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_roundtrip_overflow() {
-        let levels = vec![1, 1, 2, 3, 2, 1, 1, 2, 3, 1];
-        let max_level = 3;
-        test_internal_roundtrip_overflow(Encoding::RLE, &levels, max_level, false);
-        test_internal_roundtrip_overflow(Encoding::BIT_PACKED, &levels, max_level, false);
-        test_internal_roundtrip_overflow(Encoding::RLE, &levels, max_level, true);
-    }
-
-    #[test]
-    fn test_rle_decoder_set_data_range() {
-        // Buffer containing both repetition and definition levels
-        let buffer = ByteBufferPtr::new(vec![5, 198, 2, 5, 42, 168, 10, 0, 2, 3, 36, 73]);
-
-        let max_rep_level = 1;
-        let mut decoder = LevelDecoder::v2(max_rep_level);
-        assert_eq!(decoder.set_data_range(10, &buffer, 0, 3), 3);
-        let mut result = vec![0; 10];
-        let num_decoded = decoder.get(&mut result).expect("get() should be OK");
-        assert_eq!(num_decoded, 10);
-        assert_eq!(result, vec![0, 1, 1, 0, 0, 0, 1, 1, 0, 1]);
-
-        let max_def_level = 2;
-        let mut decoder = LevelDecoder::v2(max_def_level);
-        assert_eq!(decoder.set_data_range(10, &buffer, 3, 5), 5);
-        let mut result = vec![0; 10];
-        let num_decoded = decoder.get(&mut result).expect("get() should be OK");
-        assert_eq!(num_decoded, 10);
-        assert_eq!(result, vec![2, 2, 2, 0, 0, 2, 2, 2, 2, 2]);
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "set_data_range() method is only supported by RLE v2 encoding type"
-    )]
-    fn test_bit_packed_decoder_set_data_range() {
-        // Buffer containing both repetition and definition levels
-        let buffer = ByteBufferPtr::new(vec![1, 2, 3, 4, 5]);
-        let max_level = 1;
-        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_level);
-        decoder.set_data_range(10, &buffer, 0, 3);
-    }
-
-    #[test]
-    fn test_bit_packed_decoder_set_data() {
-        // Test the maximum size that is assigned based on number of values and buffer
-        // length
-        let buffer = ByteBufferPtr::new(vec![1, 2, 3, 4, 5]);
-        let max_level = 1;
-        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_level);
-        // This should reset to entire buffer
-        assert_eq!(decoder.set_data(1024, buffer.all()), buffer.len());
-        // This should set smallest num bytes
-        assert_eq!(decoder.set_data(3, buffer.all()), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "No data set for decoding")]
-    fn test_rle_level_decoder_get_no_set_data() {
-        // `get()` normally panics because bit_reader is not set for RLE decoding
-        // we have explicit check now in set_data
-        let max_rep_level = 2;
-        let mut decoder = LevelDecoder::v1(Encoding::RLE, max_rep_level);
-        let mut buffer = vec![0; 16];
-        decoder.get(&mut buffer).unwrap();
-    }
-
-    #[test]
-    #[should_panic(expected = "No data set for decoding")]
-    fn test_bit_packed_level_decoder_get_no_set_data() {
-        let max_rep_level = 2;
-        let mut decoder = LevelDecoder::v1(Encoding::BIT_PACKED, max_rep_level);
-        let mut buffer = vec![0; 16];
-        decoder.get(&mut buffer).unwrap();
-    }
-}
diff --git a/rust/parquet/src/encodings/mod.rs b/rust/parquet/src/encodings/mod.rs
deleted file mode 100644
index 33b1e233d89..00000000000
--- a/rust/parquet/src/encodings/mod.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod decoding;
-pub mod encoding;
-pub mod levels;
-mod rle;
diff --git a/rust/parquet/src/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs
deleted file mode 100644
index b2a23da7c0b..00000000000
--- a/rust/parquet/src/encodings/rle.rs
+++ /dev/null
@@ -1,831 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cmp, mem::size_of};
-
-use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{self, from_ne_slice, BitReader, BitWriter, FromBytes},
-    memory::ByteBufferPtr,
-};
-
-/// Rle/Bit-Packing Hybrid Encoding
-/// The grammar for this encoding looks like the following (copied verbatim
-/// from <https://github.com/Parquet/parquet-format/blob/master/Encodings.md>):
-///
-/// rle-bit-packed-hybrid: <length> <encoded-data>
-/// length := length of the <encoded-data> in bytes stored as 4 bytes little endian
-/// encoded-data := <run>*
-/// run := <bit-packed-run> | <rle-run>
-/// bit-packed-run := <bit-packed-header> <bit-packed-values>
-/// bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1)
-/// we always bit-pack a multiple of 8 values at a time, so we only store the number of
-/// values / 8
-/// bit-pack-count := (number of values in this run) / 8
-/// bit-packed-values := *see 1 below*
-/// rle-run := <rle-header> <repeated-value>
-/// rle-header := varint-encode( (number of times repeated) << 1)
-/// repeated-value := value that is repeated, using a fixed-width of
-/// round-up-to-next-byte(bit-width)
-
-/// Maximum groups per bit-packed run. Current value is 64.
-const MAX_GROUPS_PER_BIT_PACKED_RUN: usize = 1 << 6;
-const MAX_VALUES_PER_BIT_PACKED_RUN: usize = MAX_GROUPS_PER_BIT_PACKED_RUN * 8;
-const MAX_WRITER_BUF_SIZE: usize = 1 << 10;
-
-/// A RLE/Bit-Packing hybrid encoder.
-// TODO: tracking memory usage
-pub struct RleEncoder {
-    // Number of bits needed to encode the value. Must be in the range of [0, 64].
-    bit_width: u8,
-
-    // Underlying writer which holds an internal buffer.
-    bit_writer: BitWriter,
-
-    // The maximum byte size a single run can take.
-    max_run_byte_size: usize,
-
-    // Buffered values for bit-packed runs.
-    buffered_values: [u64; 8],
-
-    // Number of current buffered values. Must be less than 8.
-    num_buffered_values: usize,
-
-    // The current (also last) value that was written and the count of how many
-    // times in a row that value has been seen.
-    current_value: u64,
-
-    // The number of repetitions for `current_value`. If this gets too high we'd
-    // switch to use RLE encoding.
-    repeat_count: usize,
-
-    // Number of bit-packed values in the current run. This doesn't include values
-    // in `buffered_values`.
-    bit_packed_count: usize,
-
-    // The position of the indicator byte in the `bit_writer`.
-    indicator_byte_pos: i64,
-}
-
-impl RleEncoder {
-    pub fn new(bit_width: u8, buffer_len: usize) -> Self {
-        let buffer = vec![0; buffer_len];
-        RleEncoder::new_from_buf(bit_width, buffer, 0)
-    }
-
-    /// Initialize the encoder from existing `buffer` and the starting offset `start`.
-    pub fn new_from_buf(bit_width: u8, buffer: Vec<u8>, start: usize) -> Self {
-        assert!(bit_width <= 64, "bit_width ({}) out of range.", bit_width);
-        let max_run_byte_size = RleEncoder::min_buffer_size(bit_width);
-        assert!(
-            buffer.len() >= max_run_byte_size,
-            "buffer length {} must be greater than {}",
-            buffer.len(),
-            max_run_byte_size
-        );
-        let bit_writer = BitWriter::new_from_buf(buffer, start);
-        RleEncoder {
-            bit_width,
-            bit_writer,
-            max_run_byte_size,
-            buffered_values: [0; 8],
-            num_buffered_values: 0,
-            current_value: 0,
-            repeat_count: 0,
-            bit_packed_count: 0,
-            indicator_byte_pos: -1,
-        }
-    }
-
-    /// Returns the minimum buffer size needed to use the encoder for `bit_width`.
-    /// This is the maximum length of a single run for `bit_width`.
-    pub fn min_buffer_size(bit_width: u8) -> usize {
-        let max_bit_packed_run_size = 1 + bit_util::ceil(
-            (MAX_VALUES_PER_BIT_PACKED_RUN * bit_width as usize) as i64,
-            8,
-        );
-        let max_rle_run_size =
-            bit_util::MAX_VLQ_BYTE_LEN + bit_util::ceil(bit_width as i64, 8) as usize;
-        std::cmp::max(max_bit_packed_run_size as usize, max_rle_run_size)
-    }
-
-    /// Returns the maximum buffer size takes to encode `num_values` values with
-    /// `bit_width`.
-    pub fn max_buffer_size(bit_width: u8, num_values: usize) -> usize {
-        // First the maximum size for bit-packed run
-        let bytes_per_run = bit_width;
-        let num_runs = bit_util::ceil(num_values as i64, 8) as usize;
-        let bit_packed_max_size = num_runs + num_runs * bytes_per_run as usize;
-
-        // Second the maximum size for RLE run
-        let min_rle_run_size = 1 + bit_util::ceil(bit_width as i64, 8) as usize;
-        let rle_max_size =
-            bit_util::ceil(num_values as i64, 8) as usize * min_rle_run_size;
-        std::cmp::max(bit_packed_max_size, rle_max_size) as usize
-    }
-
-    /// Encodes `value`, which must be representable with `bit_width` bits.
-    /// Returns true if the value fits in buffer, false if it doesn't, or
-    /// error if something is wrong.
-    #[inline]
-    pub fn put(&mut self, value: u64) -> Result<bool> {
-        // This function buffers 8 values at a time. After seeing 8 values, it
-        // decides whether the current run should be encoded in bit-packed or RLE.
-        if self.current_value == value {
-            self.repeat_count += 1;
-            if self.repeat_count > 8 {
-                // A continuation of last value. No need to buffer.
-                return Ok(true);
-            }
-        } else {
-            if self.repeat_count >= 8 {
-                // The current RLE run has ended and we've gathered enough. Flush first.
-                assert_eq!(self.bit_packed_count, 0);
-                self.flush_rle_run()?;
-            }
-            self.repeat_count = 1;
-            self.current_value = value;
-        }
-
-        self.buffered_values[self.num_buffered_values] = value;
-        self.num_buffered_values += 1;
-        if self.num_buffered_values == 8 {
-            // Buffered values are full. Flush them.
-            assert_eq!(self.bit_packed_count % 8, 0);
-            self.flush_buffered_values()?;
-        }
-
-        Ok(true)
-    }
-
-    #[inline]
-    pub fn buffer(&self) -> &[u8] {
-        self.bit_writer.buffer()
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.bit_writer.bytes_written()
-    }
-
-    #[inline]
-    pub fn consume(mut self) -> Result<Vec<u8>> {
-        self.flush()?;
-        Ok(self.bit_writer.consume())
-    }
-
-    /// Borrow equivalent of the `consume` method.
-    /// Call `clear()` after invoking this method.
-    #[inline]
-    pub fn flush_buffer(&mut self) -> Result<&[u8]> {
-        self.flush()?;
-        Ok(self.bit_writer.flush_buffer())
-    }
-
-    /// Clears the internal state so this encoder can be reused (e.g., after becoming
-    /// full).
-    #[inline]
-    pub fn clear(&mut self) {
-        self.bit_writer.clear();
-        self.num_buffered_values = 0;
-        self.current_value = 0;
-        self.repeat_count = 0;
-        self.bit_packed_count = 0;
-        self.indicator_byte_pos = -1;
-    }
-
-    /// Flushes all remaining values and return the final byte buffer maintained by the
-    /// internal writer.
-    #[inline]
-    pub fn flush(&mut self) -> Result<()> {
-        if self.bit_packed_count > 0
-            || self.repeat_count > 0
-            || self.num_buffered_values > 0
-        {
-            let all_repeat = self.bit_packed_count == 0
-                && (self.repeat_count == self.num_buffered_values
-                    || self.num_buffered_values == 0);
-            if self.repeat_count > 0 && all_repeat {
-                self.flush_rle_run()?;
-            } else {
-                // Buffer the last group of bit-packed values to 8 by padding with 0s.
-                if self.num_buffered_values > 0 {
-                    while self.num_buffered_values < 8 {
-                        self.buffered_values[self.num_buffered_values] = 0;
-                        self.num_buffered_values += 1;
-                    }
-                }
-                self.bit_packed_count += self.num_buffered_values;
-                self.flush_bit_packed_run(true)?;
-                self.repeat_count = 0;
-            }
-        }
-        Ok(())
-    }
-
-    fn flush_rle_run(&mut self) -> Result<()> {
-        assert!(self.repeat_count > 0);
-        let indicator_value = self.repeat_count << 1;
-        let mut result = self.bit_writer.put_vlq_int(indicator_value as u64);
-        result &= self.bit_writer.put_aligned(
-            self.current_value,
-            bit_util::ceil(self.bit_width as i64, 8) as usize,
-        );
-        if !result {
-            return Err(general_err!("Failed to write RLE run"));
-        }
-        self.num_buffered_values = 0;
-        self.repeat_count = 0;
-        Ok(())
-    }
-
-    fn flush_bit_packed_run(&mut self, update_indicator_byte: bool) -> Result<()> {
-        if self.indicator_byte_pos < 0 {
-            self.indicator_byte_pos = self.bit_writer.skip(1)? as i64;
-        }
-
-        // Write all buffered values as bit-packed literals
-        for i in 0..self.num_buffered_values {
-            let _ = self
-                .bit_writer
-                .put_value(self.buffered_values[i], self.bit_width as usize);
-        }
-        self.num_buffered_values = 0;
-        if update_indicator_byte {
-            // Write the indicator byte to the reserved position in `bit_writer`
-            let num_groups = self.bit_packed_count / 8;
-            let indicator_byte = ((num_groups << 1) | 1) as u8;
-            if !self.bit_writer.put_aligned_offset(
-                indicator_byte,
-                1,
-                self.indicator_byte_pos as usize,
-            ) {
-                return Err(general_err!("Not enough space to write indicator byte"));
-            }
-            self.indicator_byte_pos = -1;
-            self.bit_packed_count = 0;
-        }
-        Ok(())
-    }
-
-    #[inline(never)]
-    fn flush_buffered_values(&mut self) -> Result<()> {
-        if self.repeat_count >= 8 {
-            self.num_buffered_values = 0;
-            if self.bit_packed_count > 0 {
-                // In this case we choose RLE encoding. Flush the current buffered values
-                // as bit-packed encoding.
-                assert_eq!(self.bit_packed_count % 8, 0);
-                self.flush_bit_packed_run(true)?
-            }
-            return Ok(());
-        }
-
-        self.bit_packed_count += self.num_buffered_values;
-        let num_groups = self.bit_packed_count / 8;
-        if num_groups + 1 >= MAX_GROUPS_PER_BIT_PACKED_RUN {
-            // We've reached the maximum value that can be hold in a single bit-packed
-            // run.
-            assert!(self.indicator_byte_pos >= 0);
-            self.flush_bit_packed_run(true)?;
-        } else {
-            self.flush_bit_packed_run(false)?;
-        }
-        self.repeat_count = 0;
-        Ok(())
-    }
-}
-
-/// A RLE/Bit-Packing hybrid decoder.
-pub struct RleDecoder {
-    // Number of bits used to encode the value. Must be between [0, 64].
-    bit_width: u8,
-
-    // Bit reader loaded with input buffer.
-    bit_reader: Option<BitReader>,
-
-    // Buffer used when `bit_reader` is not `None`, for batch reading.
-    index_buf: [i32; 1024],
-
-    // The remaining number of values in RLE for this run
-    rle_left: u32,
-
-    // The remaining number of values in Bit-Packing for this run
-    bit_packed_left: u32,
-
-    // The current value for the case of RLE mode
-    current_value: Option<u64>,
-}
-
-impl RleDecoder {
-    pub fn new(bit_width: u8) -> Self {
-        RleDecoder {
-            bit_width,
-            rle_left: 0,
-            bit_packed_left: 0,
-            bit_reader: None,
-            index_buf: [0; 1024],
-            current_value: None,
-        }
-    }
-
-    #[inline]
-    pub fn set_data(&mut self, data: ByteBufferPtr) {
-        if let Some(ref mut bit_reader) = self.bit_reader {
-            bit_reader.reset(data);
-        } else {
-            self.bit_reader = Some(BitReader::new(data));
-        }
-
-        let _ = self.reload();
-    }
-
-    // These functions inline badly, they tend to inline and then create very large loop unrolls
-    // that damage L1d-cache occupancy. This results in a ~18% performance drop
-    #[inline(never)]
-    pub fn get<T: FromBytes>(&mut self) -> Result<Option<T>> {
-        assert!(size_of::<T>() <= 8);
-
-        while self.rle_left == 0 && self.bit_packed_left == 0 {
-            if !self.reload() {
-                return Ok(None);
-            }
-        }
-
-        let value = if self.rle_left > 0 {
-            let rle_value = from_ne_slice(
-                &self
-                    .current_value
-                    .as_mut()
-                    .expect("current_value should be Some")
-                    .to_ne_bytes(),
-            );
-            self.rle_left -= 1;
-            rle_value
-        } else {
-            // self.bit_packed_left > 0
-            let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be Some");
-            let bit_packed_value = bit_reader
-                .get_value(self.bit_width as usize)
-                .ok_or_else(|| eof_err!("Not enough data for 'bit_packed_value'"))?;
-            self.bit_packed_left -= 1;
-            bit_packed_value
-        };
-
-        Ok(Some(value))
-    }
-
-    #[inline(never)]
-    pub fn get_batch<T: FromBytes>(&mut self, buffer: &mut [T]) -> Result<usize> {
-        assert!(size_of::<T>() <= 8);
-
-        let mut values_read = 0;
-        while values_read < buffer.len() {
-            if self.rle_left > 0 {
-                let num_values =
-                    cmp::min(buffer.len() - values_read, self.rle_left as usize);
-                for i in 0..num_values {
-                    let repeated_value = from_ne_slice(
-                        &self.current_value.as_mut().unwrap().to_ne_bytes(),
-                    );
-                    buffer[values_read + i] = repeated_value;
-                }
-                self.rle_left -= num_values as u32;
-                values_read += num_values;
-            } else if self.bit_packed_left > 0 {
-                let mut num_values =
-                    cmp::min(buffer.len() - values_read, self.bit_packed_left as usize);
-                let bit_reader =
-                    self.bit_reader.as_mut().expect("bit_reader should be set");
-
-                num_values = bit_reader.get_batch::<T>(
-                    &mut buffer[values_read..values_read + num_values],
-                    self.bit_width as usize,
-                );
-                self.bit_packed_left -= num_values as u32;
-                values_read += num_values;
-            } else if !self.reload() {
-                break;
-            }
-        }
-
-        Ok(values_read)
-    }
-
-    #[inline(never)]
-    pub fn get_batch_with_dict<T>(
-        &mut self,
-        dict: &[T],
-        buffer: &mut [T],
-        max_values: usize,
-    ) -> Result<usize>
-    where
-        T: Default + Clone,
-    {
-        assert!(buffer.len() >= max_values);
-
-        let mut values_read = 0;
-        while values_read < max_values {
-            if self.rle_left > 0 {
-                let num_values =
-                    cmp::min(max_values - values_read, self.rle_left as usize);
-                let dict_idx = self.current_value.unwrap() as usize;
-                for i in 0..num_values {
-                    buffer[values_read + i].clone_from(&dict[dict_idx]);
-                }
-                self.rle_left -= num_values as u32;
-                values_read += num_values;
-            } else if self.bit_packed_left > 0 {
-                let bit_reader =
-                    self.bit_reader.as_mut().expect("bit_reader should be set");
-
-                let mut num_values =
-                    cmp::min(max_values - values_read, self.bit_packed_left as usize);
-
-                num_values = cmp::min(num_values, self.index_buf.len());
-                loop {
-                    num_values = bit_reader.get_batch::<i32>(
-                        &mut self.index_buf[..num_values],
-                        self.bit_width as usize,
-                    );
-                    for i in 0..num_values {
-                        buffer[values_read + i]
-                            .clone_from(&dict[self.index_buf[i] as usize])
-                    }
-                    self.bit_packed_left -= num_values as u32;
-                    values_read += num_values;
-                    if num_values < self.index_buf.len() {
-                        break;
-                    }
-                }
-            } else if !self.reload() {
-                break;
-            }
-        }
-
-        Ok(values_read)
-    }
-
-    #[inline]
-    fn reload(&mut self) -> bool {
-        let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
-
-        if let Some(indicator_value) = bit_reader.get_vlq_int() {
-            if indicator_value & 1 == 1 {
-                self.bit_packed_left = ((indicator_value >> 1) * 8) as u32;
-            } else {
-                self.rle_left = (indicator_value >> 1) as u32;
-                let value_width = bit_util::ceil(self.bit_width as i64, 8);
-                self.current_value = bit_reader.get_aligned::<u64>(value_width as usize);
-                assert!(self.current_value.is_some());
-            }
-            true
-        } else {
-            false
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::{self, distributions::Standard, thread_rng, Rng, SeedableRng};
-
-    use crate::util::memory::ByteBufferPtr;
-
-    const MAX_WIDTH: usize = 32;
-
-    #[test]
-    fn test_rle_decode_int32() {
-        // Test data: 0-7 with bit width 3
-        // 00000011 10001000 11000110 11111010
-        let data = ByteBufferPtr::new(vec![0x03, 0x88, 0xC6, 0xFA]);
-        let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
-        let mut buffer = vec![0; 8];
-        let expected = vec![0, 1, 2, 3, 4, 5, 6, 7];
-        let result = decoder.get_batch::<i32>(&mut buffer);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-    }
-
-    #[test]
-    fn test_rle_consume_flush_buffer() {
-        let data = vec![1, 1, 1, 2, 2, 3, 3, 3];
-        let mut encoder1 = RleEncoder::new(3, 256);
-        let mut encoder2 = RleEncoder::new(3, 256);
-        for value in data {
-            encoder1.put(value as u64).unwrap();
-            encoder2.put(value as u64).unwrap();
-        }
-        let res1 = encoder1.flush_buffer().unwrap();
-        let res2 = encoder2.consume().unwrap();
-        assert_eq!(res1, &res2[..]);
-    }
-
-    #[test]
-    fn test_rle_decode_bool() {
-        // RLE test data: 50 1s followed by 50 0s
-        // 01100100 00000001 01100100 00000000
-        let data1 = ByteBufferPtr::new(vec![0x64, 0x01, 0x64, 0x00]);
-
-        // Bit-packing test data: alternating 1s and 0s, 100 total
-        // 100 / 8 = 13 groups
-        // 00011011 10101010 ... 00001010
-        let data2 = ByteBufferPtr::new(vec![
-            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
-            0x0A,
-        ]);
-
-        let mut decoder: RleDecoder = RleDecoder::new(1);
-        decoder.set_data(data1);
-        let mut buffer = vec![false; 100];
-        let mut expected = vec![];
-        for i in 0..100 {
-            if i < 50 {
-                expected.push(true);
-            } else {
-                expected.push(false);
-            }
-        }
-        let result = decoder.get_batch::<bool>(&mut buffer);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-
-        decoder.set_data(data2);
-        let mut buffer = vec![false; 100];
-        let mut expected = vec![];
-        for i in 0..100 {
-            if i % 2 == 0 {
-                expected.push(false);
-            } else {
-                expected.push(true);
-            }
-        }
-        let result = decoder.get_batch::<bool>(&mut buffer);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-    }
-
-    #[test]
-    fn test_rle_decode_with_dict_int32() {
-        // Test RLE encoding: 3 0s followed by 4 1s followed by 5 2s
-        // 00000110 00000000 00001000 00000001 00001010 00000010
-        let dict = vec![10, 20, 30];
-        let data = ByteBufferPtr::new(vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02]);
-        let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
-        let mut buffer = vec![0; 12];
-        let expected = vec![10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
-        let result = decoder.get_batch_with_dict::<i32>(&dict, &mut buffer, 12);
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-
-        // Test bit-pack encoding: 345345345455 (2 groups: 8 and 4)
-        // 011 100 101 011 100 101 011 100 101 100 101 101
-        // 00000011 01100011 11000111 10001110 00000011 01100101 00001011
-        let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
-        let data = ByteBufferPtr::new(vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B]);
-        let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
-        let mut buffer = vec![""; 12];
-        let expected = vec![
-            "ddd", "eee", "fff", "ddd", "eee", "fff", "ddd", "eee", "fff", "eee", "fff",
-            "fff",
-        ];
-        let result = decoder.get_batch_with_dict::<&str>(
-            dict.as_slice(),
-            buffer.as_mut_slice(),
-            12,
-        );
-        assert!(result.is_ok());
-        assert_eq!(buffer, expected);
-    }
-
-    fn validate_rle(
-        values: &[i64],
-        bit_width: u8,
-        expected_encoding: Option<&[u8]>,
-        expected_len: i32,
-    ) {
-        let buffer_len = 64 * 1024;
-        let mut encoder = RleEncoder::new(bit_width, buffer_len);
-        for v in values {
-            let result = encoder.put(*v as u64);
-            assert!(result.is_ok());
-        }
-        let buffer = ByteBufferPtr::new(encoder.consume().expect("Expect consume() OK"));
-        if expected_len != -1 {
-            assert_eq!(buffer.len(), expected_len as usize);
-        }
-        if let Some(b) = expected_encoding {
-            assert_eq!(buffer.as_ref(), b);
-        }
-
-        // Verify read
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.all());
-        for v in values {
-            let val: i64 = decoder
-                .get()
-                .expect("get() should be OK")
-                .expect("get() should return more value");
-            assert_eq!(val, *v);
-        }
-
-        // Verify batch read
-        decoder.set_data(buffer);
-        let mut values_read: Vec<i64> = vec![0; values.len()];
-        decoder
-            .get_batch(&mut values_read[..])
-            .expect("get_batch() should be OK");
-        assert_eq!(&values_read[..], values);
-    }
-
-    #[test]
-    fn test_rle_specific_sequences() {
-        let mut expected_buffer = Vec::new();
-        let mut values = Vec::new();
-        for _ in 0..50 {
-            values.push(0);
-        }
-        for _ in 0..50 {
-            values.push(1);
-        }
-        expected_buffer.push(50 << 1);
-        expected_buffer.push(0);
-        expected_buffer.push(50 << 1);
-        expected_buffer.push(1);
-
-        for width in 1..9 {
-            validate_rle(&values[..], width, Some(&expected_buffer[..]), 4);
-        }
-        for width in 9..MAX_WIDTH + 1 {
-            validate_rle(
-                &values[..],
-                width as u8,
-                None,
-                2 * (1 + bit_util::ceil(width as i64, 8) as i32),
-            );
-        }
-
-        // Test 100 0's and 1's alternating
-        values.clear();
-        expected_buffer.clear();
-        for i in 0..101 {
-            values.push(i % 2);
-        }
-        let num_groups = bit_util::ceil(100, 8) as u8;
-        expected_buffer.push(((num_groups << 1) as u8) | 1);
-        for _ in 1..(100 / 8) + 1 {
-            expected_buffer.push(0b10101010);
-        }
-        // For the last 4 0 and 1's, padded with 0.
-        expected_buffer.push(0b00001010);
-        validate_rle(
-            &values,
-            1,
-            Some(&expected_buffer[..]),
-            1 + num_groups as i32,
-        );
-        for width in 2..MAX_WIDTH + 1 {
-            let num_values = bit_util::ceil(100, 8) * 8;
-            validate_rle(
-                &values,
-                width as u8,
-                None,
-                1 + bit_util::ceil(width as i64 * num_values, 8) as i32,
-            );
-        }
-    }
-
-    // `validate_rle` on `num_vals` with width `bit_width`. If `value` is -1, that value
-    // is used, otherwise alternating values are used.
-    fn test_rle_values(bit_width: usize, num_vals: usize, value: i32) {
-        let mod_val = if bit_width == 64 {
-            1
-        } else {
-            1u64 << bit_width
-        };
-        let mut values: Vec<i64> = vec![];
-        for v in 0..num_vals {
-            let val = if value == -1 {
-                v as i64 % mod_val as i64
-            } else {
-                value as i64
-            };
-            values.push(val);
-        }
-        validate_rle(&values, bit_width as u8, None, -1);
-    }
-
-    #[test]
-    fn test_values() {
-        for width in 1..MAX_WIDTH + 1 {
-            test_rle_values(width, 1, -1);
-            test_rle_values(width, 1024, -1);
-            test_rle_values(width, 1024, 0);
-            test_rle_values(width, 1024, 1);
-        }
-    }
-
-    #[test]
-    fn test_rle_specific_roundtrip() {
-        let bit_width = 1;
-        let buffer_len = RleEncoder::min_buffer_size(bit_width);
-        let values: Vec<i16> = vec![0, 1, 1, 1, 1, 0, 0, 0, 0, 1];
-        let mut encoder = RleEncoder::new(bit_width, buffer_len);
-        for v in &values {
-            assert!(encoder.put(*v as u64).expect("put() should be OK"));
-        }
-        let buffer = encoder.consume().expect("consume() should be OK");
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(ByteBufferPtr::new(buffer));
-        let mut actual_values: Vec<i16> = vec![0; values.len()];
-        decoder
-            .get_batch(&mut actual_values)
-            .expect("get_batch() should be OK");
-        assert_eq!(actual_values, values);
-    }
-
-    fn test_round_trip(values: &[i32], bit_width: u8) {
-        let buffer_len = 64 * 1024;
-        let mut encoder = RleEncoder::new(bit_width, buffer_len);
-        for v in values {
-            let result = encoder.put(*v as u64).expect("put() should be OK");
-            assert!(result, "put() should not return false");
-        }
-
-        let buffer =
-            ByteBufferPtr::new(encoder.consume().expect("consume() should be OK"));
-
-        // Verify read
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.all());
-        for v in values {
-            let val = decoder
-                .get::<i32>()
-                .expect("get() should be OK")
-                .expect("get() should return value");
-            assert_eq!(val, *v);
-        }
-
-        // Verify batch read
-        let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer);
-        let mut values_read: Vec<i32> = vec![0; values.len()];
-        decoder
-            .get_batch(&mut values_read[..])
-            .expect("get_batch() should be OK");
-        assert_eq!(&values_read[..], values);
-    }
-
-    #[test]
-    fn test_random() {
-        let seed_len = 32;
-        let niters = 50;
-        let ngroups = 1000;
-        let max_group_size = 15;
-        let mut values = vec![];
-
-        for _ in 0..niters {
-            values.clear();
-            let rng = thread_rng();
-            let seed_vec: Vec<u8> =
-                rng.sample_iter::<u8, _>(&Standard).take(seed_len).collect();
-            let mut seed = [0u8; 32];
-            seed.copy_from_slice(&seed_vec[0..seed_len]);
-            let mut gen = rand::rngs::StdRng::from_seed(seed);
-
-            let mut parity = false;
-            for _ in 0..ngroups {
-                let mut group_size = gen.gen_range(1..20);
-                if group_size > max_group_size {
-                    group_size = 1;
-                }
-                for _ in 0..group_size {
-                    values.push(parity as i32);
-                }
-                parity = !parity;
-            }
-            let bit_width = bit_util::num_required_bits(values.len() as u64);
-            assert!(bit_width < 64);
-            test_round_trip(&values[..], bit_width as u8);
-        }
-    }
-}
diff --git a/rust/parquet/src/errors.rs b/rust/parquet/src/errors.rs
deleted file mode 100644
index 021c1f063f8..00000000000
--- a/rust/parquet/src/errors.rs
+++ /dev/null
@@ -1,146 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Common Parquet errors and macros.
-
-use std::{cell, convert, io, result, str};
-
-#[cfg(any(feature = "arrow", test))]
-use arrow::error::ArrowError;
-
-#[derive(Debug, PartialEq)]
-pub enum ParquetError {
-    /// General Parquet error.
-    /// Returned when code violates normal workflow of working with Parquet files.
-    General(String),
-    /// "Not yet implemented" Parquet error.
-    /// Returned when functionality is not yet available.
-    NYI(String),
-    /// "End of file" Parquet error.
-    /// Returned when IO related failures occur, e.g. when there are not enough bytes to
-    /// decode.
-    EOF(String),
-    #[cfg(any(feature = "arrow", test))]
-    /// Arrow error.
-    /// Returned when reading into arrow or writing from arrow.
-    ArrowError(String),
-    IndexOutOfBound(usize, usize),
-}
-
-impl std::fmt::Display for ParquetError {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match *self {
-            ParquetError::General(ref message) => {
-                write!(fmt, "Parquet error: {}", message)
-            }
-            ParquetError::NYI(ref message) => write!(fmt, "NYI: {}", message),
-            ParquetError::EOF(ref message) => write!(fmt, "EOF: {}", message),
-            #[cfg(any(feature = "arrow", test))]
-            ParquetError::ArrowError(ref message) => write!(fmt, "Arrow: {}", message),
-            ParquetError::IndexOutOfBound(ref index, ref bound) => {
-                write!(fmt, "Index {} out of bound: {}", index, bound)
-            }
-        }
-    }
-}
-
-impl std::error::Error for ParquetError {
-    fn cause(&self) -> Option<&dyn ::std::error::Error> {
-        None
-    }
-}
-
-impl From<io::Error> for ParquetError {
-    fn from(e: io::Error) -> ParquetError {
-        ParquetError::General(format!("underlying IO error: {}", e))
-    }
-}
-
-#[cfg(any(feature = "snap", test))]
-impl From<snap::Error> for ParquetError {
-    fn from(e: snap::Error) -> ParquetError {
-        ParquetError::General(format!("underlying snap error: {}", e))
-    }
-}
-
-impl From<thrift::Error> for ParquetError {
-    fn from(e: thrift::Error) -> ParquetError {
-        ParquetError::General(format!("underlying Thrift error: {}", e))
-    }
-}
-
-impl From<cell::BorrowMutError> for ParquetError {
-    fn from(e: cell::BorrowMutError) -> ParquetError {
-        ParquetError::General(format!("underlying borrow error: {}", e))
-    }
-}
-
-impl From<str::Utf8Error> for ParquetError {
-    fn from(e: str::Utf8Error) -> ParquetError {
-        ParquetError::General(format!("underlying utf8 error: {}", e))
-    }
-}
-
-#[cfg(any(feature = "arrow", test))]
-impl From<ArrowError> for ParquetError {
-    fn from(e: ArrowError) -> ParquetError {
-        ParquetError::ArrowError(format!("underlying Arrow error: {}", e))
-    }
-}
-
-/// A specialized `Result` for Parquet errors.
-pub type Result<T> = result::Result<T, ParquetError>;
-
-// ----------------------------------------------------------------------
-// Conversion from `ParquetError` to other types of `Error`s
-
-impl convert::From<ParquetError> for io::Error {
-    fn from(e: ParquetError) -> Self {
-        io::Error::new(io::ErrorKind::Other, e)
-    }
-}
-
-// ----------------------------------------------------------------------
-// Convenient macros for different errors
-
-macro_rules! general_err {
-    ($fmt:expr) => (ParquetError::General($fmt.to_owned()));
-    ($fmt:expr, $($args:expr),*) => (ParquetError::General(format!($fmt, $($args),*)));
-    ($e:expr, $fmt:expr) => (ParquetError::General($fmt.to_owned(), $e));
-    ($e:ident, $fmt:expr, $($args:tt),*) => (
-        ParquetError::General(&format!($fmt, $($args),*), $e));
-}
-
-macro_rules! nyi_err {
-    ($fmt:expr) => (ParquetError::NYI($fmt.to_owned()));
-    ($fmt:expr, $($args:expr),*) => (ParquetError::NYI(format!($fmt, $($args),*)));
-}
-
-macro_rules! eof_err {
-    ($fmt:expr) => (ParquetError::EOF($fmt.to_owned()));
-    ($fmt:expr, $($args:expr),*) => (ParquetError::EOF(format!($fmt, $($args),*)));
-}
-
-// ----------------------------------------------------------------------
-// Convert parquet error into other errors
-
-#[cfg(any(feature = "arrow", test))]
-impl Into<ArrowError> for ParquetError {
-    fn into(self) -> ArrowError {
-        ArrowError::ParquetError(format!("{}", self))
-    }
-}
diff --git a/rust/parquet/src/file/footer.rs b/rust/parquet/src/file/footer.rs
deleted file mode 100644
index 2e572944868..00000000000
--- a/rust/parquet/src/file/footer.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{
-    cmp::min,
-    io::{Cursor, Read, Seek, SeekFrom},
-    sync::Arc,
-};
-
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
-use thrift::protocol::TCompactInputProtocol;
-
-use crate::basic::ColumnOrder;
-
-use crate::errors::{ParquetError, Result};
-use crate::file::{
-    metadata::*, reader::ChunkReader, DEFAULT_FOOTER_READ_SIZE, FOOTER_SIZE,
-    PARQUET_MAGIC,
-};
-
-use crate::schema::types::{self, SchemaDescriptor};
-
-/// Layout of Parquet file
-/// +---------------------------+-----+---+
-/// |      Rest of file         |  B  | A |
-/// +---------------------------+-----+---+
-/// where A: parquet footer, B: parquet metadata.
-///
-/// The reader first reads DEFAULT_FOOTER_SIZE bytes from the end of the file.
-/// If it is not enough according to the length indicated in the footer, it reads more bytes.
-pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaData> {
-    // check file is large enough to hold footer
-    let file_size = chunk_reader.len();
-    if file_size < (FOOTER_SIZE as u64) {
-        return Err(general_err!(
-            "Invalid Parquet file. Size is smaller than footer"
-        ));
-    }
-
-    // read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
-    let default_end_len = min(DEFAULT_FOOTER_READ_SIZE, chunk_reader.len() as usize);
-    let mut default_end_reader = chunk_reader
-        .get_read(chunk_reader.len() - default_end_len as u64, default_end_len)?;
-    let mut default_len_end_buf = vec![0; default_end_len];
-    default_end_reader.read_exact(&mut default_len_end_buf)?;
-
-    // check this is indeed a parquet file
-    if default_len_end_buf[default_end_len - 4..] != PARQUET_MAGIC {
-        return Err(general_err!("Invalid Parquet file. Corrupt footer"));
-    }
-
-    // get the metadata length from the footer
-    let metadata_len = LittleEndian::read_i32(
-        &default_len_end_buf[default_end_len - 8..default_end_len - 4],
-    ) as i64;
-    if metadata_len < 0 {
-        return Err(general_err!(
-            "Invalid Parquet file. Metadata length is less than zero ({})",
-            metadata_len
-        ));
-    }
-    let footer_metadata_len = FOOTER_SIZE + metadata_len as usize;
-
-    // build up the reader covering the entire metadata
-    let mut default_end_cursor = Cursor::new(default_len_end_buf);
-    let metadata_read: Box<dyn Read>;
-    if footer_metadata_len > file_size as usize {
-        return Err(general_err!(
-            "Invalid Parquet file. Metadata start is less than zero ({})",
-            file_size as i64 - footer_metadata_len as i64
-        ));
-    } else if footer_metadata_len < DEFAULT_FOOTER_READ_SIZE {
-        // the whole metadata is in the bytes we already read
-        default_end_cursor.seek(SeekFrom::End(-(footer_metadata_len as i64)))?;
-        metadata_read = Box::new(default_end_cursor);
-    } else {
-        // the end of file read by default is not long enough, read missing bytes
-        let complementary_end_read = chunk_reader.get_read(
-            file_size - footer_metadata_len as u64,
-            FOOTER_SIZE + metadata_len as usize - default_end_len,
-        )?;
-        metadata_read = Box::new(complementary_end_read.chain(default_end_cursor));
-    }
-
-    // TODO: row group filtering
-    let mut prot = TCompactInputProtocol::new(metadata_read);
-    let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
-        .map_err(|e| ParquetError::General(format!("Could not parse metadata: {}", e)))?;
-    let schema = types::from_thrift(&t_file_metadata.schema)?;
-    let schema_descr = Arc::new(SchemaDescriptor::new(schema));
-    let mut row_groups = Vec::new();
-    for rg in t_file_metadata.row_groups {
-        row_groups.push(RowGroupMetaData::from_thrift(schema_descr.clone(), rg)?);
-    }
-    let column_orders = parse_column_orders(t_file_metadata.column_orders, &schema_descr);
-
-    let file_metadata = FileMetaData::new(
-        t_file_metadata.version,
-        t_file_metadata.num_rows,
-        t_file_metadata.created_by,
-        t_file_metadata.key_value_metadata,
-        schema_descr,
-        column_orders,
-    );
-    Ok(ParquetMetaData::new(file_metadata, row_groups))
-}
-
-/// Parses column orders from Thrift definition.
-/// If no column orders are defined, returns `None`.
-fn parse_column_orders(
-    t_column_orders: Option<Vec<TColumnOrder>>,
-    schema_descr: &SchemaDescriptor,
-) -> Option<Vec<ColumnOrder>> {
-    match t_column_orders {
-        Some(orders) => {
-            // Should always be the case
-            assert_eq!(
-                orders.len(),
-                schema_descr.num_columns(),
-                "Column order length mismatch"
-            );
-            let mut res = Vec::new();
-            for (i, column) in schema_descr.columns().iter().enumerate() {
-                match orders[i] {
-                    TColumnOrder::TYPEORDER(_) => {
-                        let sort_order = ColumnOrder::get_sort_order(
-                            column.logical_type(),
-                            column.converted_type(),
-                            column.physical_type(),
-                        );
-                        res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order));
-                    }
-                }
-            }
-            Some(res)
-        }
-        None => None,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::basic::SortOrder;
-    use crate::basic::Type;
-    use crate::schema::types::Type as SchemaType;
-    use crate::util::test_common::get_temp_file;
-    use parquet_format::TypeDefinedOrder;
-
-    #[test]
-    fn test_parse_metadata_size_smaller_than_footer() {
-        let test_file = get_temp_file("corrupt-1.parquet", &[]);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Size is smaller than footer")
-        );
-    }
-
-    #[test]
-    fn test_parse_metadata_corrupt_footer() {
-        let test_file = get_temp_file("corrupt-2.parquet", &[1, 2, 3, 4, 5, 6, 7, 8]);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Corrupt footer")
-        );
-    }
-
-    #[test]
-    fn test_parse_metadata_invalid_length() {
-        let test_file =
-            get_temp_file("corrupt-3.parquet", &[0, 0, 0, 255, b'P', b'A', b'R', b'1']);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!(
-                "Invalid Parquet file. Metadata length is less than zero (-16777216)"
-            )
-        );
-    }
-
-    #[test]
-    fn test_parse_metadata_invalid_start() {
-        let test_file =
-            get_temp_file("corrupt-4.parquet", &[255, 0, 0, 0, b'P', b'A', b'R', b'1']);
-        let reader_result = parse_metadata(&test_file);
-        assert!(reader_result.is_err());
-        assert_eq!(
-            reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Metadata start is less than zero (-255)")
-        );
-    }
-
-    #[test]
-    fn test_metadata_column_orders_parse() {
-        // Define simple schema, we do not need to provide logical types.
-        let mut fields = vec![
-            Arc::new(
-                SchemaType::primitive_type_builder("col1", Type::INT32)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                SchemaType::primitive_type_builder("col2", Type::FLOAT)
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut fields)
-            .build()
-            .unwrap();
-        let schema_descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let t_column_orders = Some(vec![
-            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
-            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
-        ]);
-
-        assert_eq!(
-            parse_column_orders(t_column_orders, &schema_descr),
-            Some(vec![
-                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED),
-                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
-            ])
-        );
-
-        // Test when no column orders are defined.
-        assert_eq!(parse_column_orders(None, &schema_descr), None);
-    }
-
-    #[test]
-    #[should_panic(expected = "Column order length mismatch")]
-    fn test_metadata_column_orders_len_mismatch() {
-        let schema = SchemaType::group_type_builder("schema").build().unwrap();
-        let schema_descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let t_column_orders =
-            Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
-
-        parse_column_orders(t_column_orders, &schema_descr);
-    }
-}
diff --git a/rust/parquet/src/file/metadata.rs b/rust/parquet/src/file/metadata.rs
deleted file mode 100644
index 150c42c578a..00000000000
--- a/rust/parquet/src/file/metadata.rs
+++ /dev/null
@@ -1,789 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains information about available Parquet metadata.
-//!
-//! The hierarchy of metadata is as follows:
-//!
-//! [`ParquetMetaData`](struct.ParquetMetaData.html) contains
-//! [`FileMetaData`](struct.FileMetaData.html) and zero or more
-//! [`RowGroupMetaData`](struct.RowGroupMetaData.html) for each row group.
-//!
-//! [`FileMetaData`](struct.FileMetaData.html) includes file version, application specific
-//! metadata.
-//!
-//! Each [`RowGroupMetaData`](struct.RowGroupMetaData.html) contains information about row
-//! group and one or more [`ColumnChunkMetaData`](struct.ColumnChunkMetaData.html) for
-//! each column chunk.
-//!
-//! [`ColumnChunkMetaData`](struct.ColumnChunkMetaData.html) has information about column
-//! chunk (primitive leaf column), including encoding/compression, number of values, etc.
-
-use std::sync::Arc;
-
-use parquet_format::{ColumnChunk, ColumnMetaData, RowGroup};
-
-use crate::basic::{ColumnOrder, Compression, Encoding, Type};
-use crate::errors::{ParquetError, Result};
-use crate::file::statistics::{self, Statistics};
-use crate::schema::types::{
-    ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor,
-    Type as SchemaType,
-};
-
-/// Global Parquet metadata.
-#[derive(Debug, Clone)]
-pub struct ParquetMetaData {
-    file_metadata: FileMetaData,
-    row_groups: Vec<RowGroupMetaData>,
-}
-
-impl ParquetMetaData {
-    /// Creates Parquet metadata from file metadata and a list of row group metadata `Arc`s
-    /// for each available row group.
-    pub fn new(file_metadata: FileMetaData, row_groups: Vec<RowGroupMetaData>) -> Self {
-        ParquetMetaData {
-            file_metadata,
-            row_groups,
-        }
-    }
-
-    /// Returns file metadata as reference.
-    pub fn file_metadata(&self) -> &FileMetaData {
-        &self.file_metadata
-    }
-
-    /// Returns number of row groups in this file.
-    pub fn num_row_groups(&self) -> usize {
-        self.row_groups.len()
-    }
-
-    /// Returns row group metadata for `i`th position.
-    /// Position should be less than number of row groups `num_row_groups`.
-    pub fn row_group(&self, i: usize) -> &RowGroupMetaData {
-        &self.row_groups[i]
-    }
-
-    /// Returns slice of row groups in this file.
-    pub fn row_groups(&self) -> &[RowGroupMetaData] {
-        &self.row_groups
-    }
-}
-
-pub type KeyValue = parquet_format::KeyValue;
-
-/// Reference counted pointer for [`FileMetaData`].
-pub type FileMetaDataPtr = Arc<FileMetaData>;
-
-/// Metadata for a Parquet file.
-#[derive(Debug, Clone)]
-pub struct FileMetaData {
-    version: i32,
-    num_rows: i64,
-    created_by: Option<String>,
-    key_value_metadata: Option<Vec<KeyValue>>,
-    schema_descr: SchemaDescPtr,
-    column_orders: Option<Vec<ColumnOrder>>,
-}
-
-impl FileMetaData {
-    /// Creates new file metadata.
-    pub fn new(
-        version: i32,
-        num_rows: i64,
-        created_by: Option<String>,
-        key_value_metadata: Option<Vec<KeyValue>>,
-        schema_descr: SchemaDescPtr,
-        column_orders: Option<Vec<ColumnOrder>>,
-    ) -> Self {
-        FileMetaData {
-            version,
-            num_rows,
-            created_by,
-            key_value_metadata,
-            schema_descr,
-            column_orders,
-        }
-    }
-
-    /// Returns version of this file.
-    pub fn version(&self) -> i32 {
-        self.version
-    }
-
-    /// Returns number of rows in the file.
-    pub fn num_rows(&self) -> i64 {
-        self.num_rows
-    }
-
-    /// String message for application that wrote this file.
-    ///
-    /// This should have the following format:
-    /// `<application> version <application version> (build <application build hash>)`.
-    ///
-    /// ```shell
-    /// parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)
-    /// ```
-    pub fn created_by(&self) -> &Option<String> {
-        &self.created_by
-    }
-
-    /// Returns key_value_metadata of this file.
-    pub fn key_value_metadata(&self) -> &Option<Vec<KeyValue>> {
-        &self.key_value_metadata
-    }
-
-    /// Returns Parquet ['Type`] that describes schema in this file.
-    pub fn schema(&self) -> &SchemaType {
-        self.schema_descr.root_schema()
-    }
-
-    /// Returns a reference to schema descriptor.
-    pub fn schema_descr(&self) -> &SchemaDescriptor {
-        &self.schema_descr
-    }
-
-    /// Returns reference counted clone for schema descriptor.
-    pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
-        self.schema_descr.clone()
-    }
-
-    /// Column (sort) order used for `min` and `max` values of each column in this file.
-    ///
-    /// Each column order corresponds to one column, determined by its position in the
-    /// list, matching the position of the column in the schema.
-    ///
-    /// When `None` is returned, there are no column orders available, and each column
-    /// should be assumed to have undefined (legacy) column order.
-    pub fn column_orders(&self) -> Option<&Vec<ColumnOrder>> {
-        self.column_orders.as_ref()
-    }
-
-    /// Returns column order for `i`th column in this file.
-    /// If column orders are not available, returns undefined (legacy) column order.
-    pub fn column_order(&self, i: usize) -> ColumnOrder {
-        self.column_orders
-            .as_ref()
-            .map(|data| data[i])
-            .unwrap_or(ColumnOrder::UNDEFINED)
-    }
-}
-
-/// Reference counted pointer for [`RowGroupMetaData`].
-pub type RowGroupMetaDataPtr = Arc<RowGroupMetaData>;
-
-/// Metadata for a row group.
-#[derive(Debug, Clone)]
-pub struct RowGroupMetaData {
-    columns: Vec<ColumnChunkMetaData>,
-    num_rows: i64,
-    total_byte_size: i64,
-    schema_descr: SchemaDescPtr,
-}
-
-impl RowGroupMetaData {
-    /// Returns builer for row group metadata.
-    pub fn builder(schema_descr: SchemaDescPtr) -> RowGroupMetaDataBuilder {
-        RowGroupMetaDataBuilder::new(schema_descr)
-    }
-
-    /// Number of columns in this row group.
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    /// Returns column chunk metadata for `i`th column.
-    pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
-        &self.columns[i]
-    }
-
-    /// Returns slice of column chunk metadata.
-    pub fn columns(&self) -> &[ColumnChunkMetaData] {
-        &self.columns
-    }
-
-    /// Number of rows in this row group.
-    pub fn num_rows(&self) -> i64 {
-        self.num_rows
-    }
-
-    /// Total byte size of all uncompressed column data in this row group.
-    pub fn total_byte_size(&self) -> i64 {
-        self.total_byte_size
-    }
-
-    /// Total size of all compressed column data in this row group.
-    pub fn compressed_size(&self) -> i64 {
-        self.columns.iter().map(|c| c.total_compressed_size).sum()
-    }
-
-    /// Returns reference to a schema descriptor.
-    pub fn schema_descr(&self) -> &SchemaDescriptor {
-        self.schema_descr.as_ref()
-    }
-
-    /// Returns reference counted clone of schema descriptor.
-    pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
-        self.schema_descr.clone()
-    }
-
-    /// Method to convert from Thrift.
-    pub fn from_thrift(
-        schema_descr: SchemaDescPtr,
-        mut rg: RowGroup,
-    ) -> Result<RowGroupMetaData> {
-        assert_eq!(schema_descr.num_columns(), rg.columns.len());
-        let total_byte_size = rg.total_byte_size;
-        let num_rows = rg.num_rows;
-        let mut columns = vec![];
-        for (c, d) in rg.columns.drain(0..).zip(schema_descr.columns()) {
-            let cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
-            columns.push(cc);
-        }
-        Ok(RowGroupMetaData {
-            columns,
-            num_rows,
-            total_byte_size,
-            schema_descr,
-        })
-    }
-
-    /// Method to convert to Thrift.
-    pub fn to_thrift(&self) -> RowGroup {
-        RowGroup {
-            columns: self.columns().iter().map(|v| v.to_thrift()).collect(),
-            total_byte_size: self.total_byte_size,
-            num_rows: self.num_rows,
-            sorting_columns: None,
-        }
-    }
-}
-
-/// Builder for row group metadata.
-pub struct RowGroupMetaDataBuilder {
-    columns: Vec<ColumnChunkMetaData>,
-    schema_descr: SchemaDescPtr,
-    num_rows: i64,
-    total_byte_size: i64,
-}
-
-impl RowGroupMetaDataBuilder {
-    /// Creates new builder from schema descriptor.
-    fn new(schema_descr: SchemaDescPtr) -> Self {
-        Self {
-            columns: Vec::with_capacity(schema_descr.num_columns()),
-            schema_descr,
-            num_rows: 0,
-            total_byte_size: 0,
-        }
-    }
-
-    /// Sets number of rows in this row group.
-    pub fn set_num_rows(mut self, value: i64) -> Self {
-        self.num_rows = value;
-        self
-    }
-
-    /// Sets total size in bytes for this row group.
-    pub fn set_total_byte_size(mut self, value: i64) -> Self {
-        self.total_byte_size = value;
-        self
-    }
-
-    /// Sets column metadata for this row group.
-    pub fn set_column_metadata(mut self, value: Vec<ColumnChunkMetaData>) -> Self {
-        self.columns = value;
-        self
-    }
-
-    /// Builds row group metadata.
-    pub fn build(self) -> Result<RowGroupMetaData> {
-        if self.schema_descr.num_columns() != self.columns.len() {
-            return Err(general_err!(
-                "Column length mismatch: {} != {}",
-                self.schema_descr.num_columns(),
-                self.columns.len()
-            ));
-        }
-
-        Ok(RowGroupMetaData {
-            columns: self.columns,
-            num_rows: self.num_rows,
-            total_byte_size: self.total_byte_size,
-            schema_descr: self.schema_descr,
-        })
-    }
-}
-
-/// Metadata for a column chunk.
-#[derive(Debug, Clone)]
-pub struct ColumnChunkMetaData {
-    column_type: Type,
-    column_path: ColumnPath,
-    column_descr: ColumnDescPtr,
-    encodings: Vec<Encoding>,
-    file_path: Option<String>,
-    file_offset: i64,
-    num_values: i64,
-    compression: Compression,
-    total_compressed_size: i64,
-    total_uncompressed_size: i64,
-    data_page_offset: i64,
-    index_page_offset: Option<i64>,
-    dictionary_page_offset: Option<i64>,
-    statistics: Option<Statistics>,
-}
-
-/// Represents common operations for a column chunk.
-impl ColumnChunkMetaData {
-    /// Returns builder for column chunk metadata.
-    pub fn builder(column_descr: ColumnDescPtr) -> ColumnChunkMetaDataBuilder {
-        ColumnChunkMetaDataBuilder::new(column_descr)
-    }
-
-    /// File where the column chunk is stored.
-    ///
-    /// If not set, assumed to belong to the same file as the metadata.
-    /// This path is relative to the current file.
-    pub fn file_path(&self) -> Option<&String> {
-        self.file_path.as_ref()
-    }
-
-    /// Byte offset in `file_path()`.
-    pub fn file_offset(&self) -> i64 {
-        self.file_offset
-    }
-
-    /// Type of this column. Must be primitive.
-    pub fn column_type(&self) -> Type {
-        self.column_type
-    }
-
-    /// Path (or identifier) of this column.
-    pub fn column_path(&self) -> &ColumnPath {
-        &self.column_path
-    }
-
-    /// Descriptor for this column.
-    pub fn column_descr(&self) -> &ColumnDescriptor {
-        self.column_descr.as_ref()
-    }
-
-    /// Reference counted clone of descriptor for this column.
-    pub fn column_descr_ptr(&self) -> ColumnDescPtr {
-        self.column_descr.clone()
-    }
-
-    /// All encodings used for this column.
-    pub fn encodings(&self) -> &Vec<Encoding> {
-        &self.encodings
-    }
-
-    /// Total number of values in this column chunk.
-    pub fn num_values(&self) -> i64 {
-        self.num_values
-    }
-
-    /// Compression for this column.
-    pub fn compression(&self) -> Compression {
-        self.compression
-    }
-
-    /// Returns the total compressed data size of this column chunk.
-    pub fn compressed_size(&self) -> i64 {
-        self.total_compressed_size
-    }
-
-    /// Returns the total uncompressed data size of this column chunk.
-    pub fn uncompressed_size(&self) -> i64 {
-        self.total_uncompressed_size
-    }
-
-    /// Returns the offset for the column data.
-    pub fn data_page_offset(&self) -> i64 {
-        self.data_page_offset
-    }
-
-    /// Returns `true` if this column chunk contains a index page, `false` otherwise.
-    pub fn has_index_page(&self) -> bool {
-        self.index_page_offset.is_some()
-    }
-
-    /// Returns the offset for the index page.
-    pub fn index_page_offset(&self) -> Option<i64> {
-        self.index_page_offset
-    }
-
-    /// Returns `true` if this column chunk contains a dictionary page, `false` otherwise.
-    pub fn has_dictionary_page(&self) -> bool {
-        self.dictionary_page_offset.is_some()
-    }
-
-    /// Returns the offset for the dictionary page, if any.
-    pub fn dictionary_page_offset(&self) -> Option<i64> {
-        self.dictionary_page_offset
-    }
-
-    /// Returns the offset and length in bytes of the column chunk within the file
-    pub fn byte_range(&self) -> (u64, u64) {
-        let col_start = if self.has_dictionary_page() {
-            self.dictionary_page_offset().unwrap()
-        } else {
-            self.data_page_offset()
-        };
-        let col_len = self.compressed_size();
-        assert!(
-            col_start >= 0 && col_len >= 0,
-            "column start and length should not be negative"
-        );
-        (col_start as u64, col_len as u64)
-    }
-
-    /// Returns statistics that are set for this column chunk,
-    /// or `None` if no statistics are available.
-    pub fn statistics(&self) -> Option<&Statistics> {
-        self.statistics.as_ref()
-    }
-
-    /// Method to convert from Thrift.
-    pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
-        if cc.meta_data.is_none() {
-            return Err(general_err!("Expected to have column metadata"));
-        }
-        let mut col_metadata: ColumnMetaData = cc.meta_data.unwrap();
-        let column_type = Type::from(col_metadata.type_);
-        let column_path = ColumnPath::new(col_metadata.path_in_schema);
-        let encodings = col_metadata
-            .encodings
-            .drain(0..)
-            .map(Encoding::from)
-            .collect();
-        let compression = Compression::from(col_metadata.codec);
-        let file_path = cc.file_path;
-        let file_offset = cc.file_offset;
-        let num_values = col_metadata.num_values;
-        let total_compressed_size = col_metadata.total_compressed_size;
-        let total_uncompressed_size = col_metadata.total_uncompressed_size;
-        let data_page_offset = col_metadata.data_page_offset;
-        let index_page_offset = col_metadata.index_page_offset;
-        let dictionary_page_offset = col_metadata.dictionary_page_offset;
-        let statistics = statistics::from_thrift(column_type, col_metadata.statistics);
-        let result = ColumnChunkMetaData {
-            column_type,
-            column_path,
-            column_descr,
-            encodings,
-            file_path,
-            file_offset,
-            num_values,
-            compression,
-            total_compressed_size,
-            total_uncompressed_size,
-            data_page_offset,
-            index_page_offset,
-            dictionary_page_offset,
-            statistics,
-        };
-        Ok(result)
-    }
-
-    /// Method to convert to Thrift.
-    pub fn to_thrift(&self) -> ColumnChunk {
-        let column_metadata = ColumnMetaData {
-            type_: self.column_type.into(),
-            encodings: self.encodings().iter().map(|&v| v.into()).collect(),
-            path_in_schema: Vec::from(self.column_path.as_ref()),
-            codec: self.compression.into(),
-            num_values: self.num_values,
-            total_uncompressed_size: self.total_uncompressed_size,
-            total_compressed_size: self.total_compressed_size,
-            key_value_metadata: None,
-            data_page_offset: self.data_page_offset,
-            index_page_offset: self.index_page_offset,
-            dictionary_page_offset: self.dictionary_page_offset,
-            statistics: statistics::to_thrift(self.statistics.as_ref()),
-            encoding_stats: None,
-        };
-
-        ColumnChunk {
-            file_path: self.file_path().cloned(),
-            file_offset: self.file_offset,
-            meta_data: Some(column_metadata),
-            offset_index_offset: None,
-            offset_index_length: None,
-            column_index_offset: None,
-            column_index_length: None,
-        }
-    }
-}
-
-/// Builder for column chunk metadata.
-pub struct ColumnChunkMetaDataBuilder {
-    column_descr: ColumnDescPtr,
-    encodings: Vec<Encoding>,
-    file_path: Option<String>,
-    file_offset: i64,
-    num_values: i64,
-    compression: Compression,
-    total_compressed_size: i64,
-    total_uncompressed_size: i64,
-    data_page_offset: i64,
-    index_page_offset: Option<i64>,
-    dictionary_page_offset: Option<i64>,
-    statistics: Option<Statistics>,
-}
-
-impl ColumnChunkMetaDataBuilder {
-    /// Creates new column chunk metadata builder.
-    fn new(column_descr: ColumnDescPtr) -> Self {
-        Self {
-            column_descr,
-            encodings: Vec::new(),
-            file_path: None,
-            file_offset: 0,
-            num_values: 0,
-            compression: Compression::UNCOMPRESSED,
-            total_compressed_size: 0,
-            total_uncompressed_size: 0,
-            data_page_offset: 0,
-            index_page_offset: None,
-            dictionary_page_offset: None,
-            statistics: None,
-        }
-    }
-
-    /// Sets list of encodings for this column chunk.
-    pub fn set_encodings(mut self, encodings: Vec<Encoding>) -> Self {
-        self.encodings = encodings;
-        self
-    }
-
-    /// Sets optional file path for this column chunk.
-    pub fn set_file_path(mut self, value: String) -> Self {
-        self.file_path = Some(value);
-        self
-    }
-
-    /// Sets file offset in bytes.
-    pub fn set_file_offset(mut self, value: i64) -> Self {
-        self.file_offset = value;
-        self
-    }
-
-    /// Sets number of values.
-    pub fn set_num_values(mut self, value: i64) -> Self {
-        self.num_values = value;
-        self
-    }
-
-    /// Sets compression.
-    pub fn set_compression(mut self, value: Compression) -> Self {
-        self.compression = value;
-        self
-    }
-
-    /// Sets total compressed size in bytes.
-    pub fn set_total_compressed_size(mut self, value: i64) -> Self {
-        self.total_compressed_size = value;
-        self
-    }
-
-    /// Sets total uncompressed size in bytes.
-    pub fn set_total_uncompressed_size(mut self, value: i64) -> Self {
-        self.total_uncompressed_size = value;
-        self
-    }
-
-    /// Sets data page offset in bytes.
-    pub fn set_data_page_offset(mut self, value: i64) -> Self {
-        self.data_page_offset = value;
-        self
-    }
-
-    /// Sets optional dictionary page ofset in bytes.
-    pub fn set_dictionary_page_offset(mut self, value: Option<i64>) -> Self {
-        self.dictionary_page_offset = value;
-        self
-    }
-
-    /// Sets optional index page offset in bytes.
-    pub fn set_index_page_offset(mut self, value: Option<i64>) -> Self {
-        self.index_page_offset = value;
-        self
-    }
-
-    /// Sets statistics for this column chunk.
-    pub fn set_statistics(mut self, value: Statistics) -> Self {
-        self.statistics = Some(value);
-        self
-    }
-
-    /// Builds column chunk metadata.
-    pub fn build(self) -> Result<ColumnChunkMetaData> {
-        Ok(ColumnChunkMetaData {
-            column_type: self.column_descr.physical_type(),
-            column_path: self.column_descr.path().clone(),
-            column_descr: self.column_descr,
-            encodings: self.encodings,
-            file_path: self.file_path,
-            file_offset: self.file_offset,
-            num_values: self.num_values,
-            compression: self.compression,
-            total_compressed_size: self.total_compressed_size,
-            total_uncompressed_size: self.total_uncompressed_size,
-            data_page_offset: self.data_page_offset,
-            index_page_offset: self.index_page_offset,
-            dictionary_page_offset: self.dictionary_page_offset,
-            statistics: self.statistics,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_row_group_metadata_thrift_conversion() {
-        let schema_descr = get_test_schema_descr();
-
-        let mut columns = vec![];
-        for ptr in schema_descr.columns() {
-            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
-            columns.push(column);
-        }
-        let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
-            .set_num_rows(1000)
-            .set_total_byte_size(2000)
-            .set_column_metadata(columns)
-            .build()
-            .unwrap();
-
-        let row_group_exp = row_group_meta.to_thrift();
-        let row_group_res =
-            RowGroupMetaData::from_thrift(schema_descr, row_group_exp.clone())
-                .unwrap()
-                .to_thrift();
-
-        assert_eq!(row_group_res, row_group_exp);
-    }
-
-    #[test]
-    fn test_row_group_metadata_thrift_conversion_empty() {
-        let schema_descr = get_test_schema_descr();
-
-        let row_group_meta = RowGroupMetaData::builder(schema_descr).build();
-
-        assert!(row_group_meta.is_err());
-        if let Err(e) = row_group_meta {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Column length mismatch: 2 != 0"
-            );
-        }
-    }
-
-    #[test]
-    fn test_column_chunk_metadata_thrift_conversion() {
-        let column_descr = get_test_schema_descr().column(0);
-
-        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
-            .set_encodings(vec![Encoding::PLAIN, Encoding::RLE])
-            .set_file_path("file_path".to_owned())
-            .set_file_offset(100)
-            .set_num_values(1000)
-            .set_compression(Compression::SNAPPY)
-            .set_total_compressed_size(2000)
-            .set_total_uncompressed_size(3000)
-            .set_data_page_offset(4000)
-            .set_dictionary_page_offset(Some(5000))
-            .build()
-            .unwrap();
-
-        let col_chunk_exp = col_metadata.to_thrift();
-
-        let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
-                .unwrap()
-                .to_thrift();
-
-        assert_eq!(col_chunk_res, col_chunk_exp);
-    }
-
-    #[test]
-    fn test_column_chunk_metadata_thrift_conversion_empty() {
-        let column_descr = get_test_schema_descr().column(0);
-
-        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
-            .build()
-            .unwrap();
-
-        let col_chunk_exp = col_metadata.to_thrift();
-        let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
-                .unwrap()
-                .to_thrift();
-
-        assert_eq!(col_chunk_res, col_chunk_exp);
-    }
-
-    #[test]
-    fn test_compressed_size() {
-        let schema_descr = get_test_schema_descr();
-
-        let mut columns = vec![];
-        for column_descr in schema_descr.columns() {
-            let column = ColumnChunkMetaData::builder(column_descr.clone())
-                .set_total_compressed_size(500)
-                .set_total_uncompressed_size(700)
-                .build()
-                .unwrap();
-            columns.push(column);
-        }
-        let row_group_meta = RowGroupMetaData::builder(schema_descr)
-            .set_num_rows(1000)
-            .set_column_metadata(columns)
-            .build()
-            .unwrap();
-
-        let compressed_size_res: i64 = row_group_meta.compressed_size();
-        let compressed_size_exp: i64 = 1000;
-
-        assert_eq!(compressed_size_res, compressed_size_exp);
-    }
-
-    /// Returns sample schema descriptor so we can create column metadata.
-    fn get_test_schema_descr() -> SchemaDescPtr {
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut vec![
-                Arc::new(
-                    SchemaType::primitive_type_builder("a", Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-                Arc::new(
-                    SchemaType::primitive_type_builder("b", Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-            ])
-            .build()
-            .unwrap();
-
-        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
-    }
-}
diff --git a/rust/parquet/src/file/mod.rs b/rust/parquet/src/file/mod.rs
deleted file mode 100644
index f85de98ccab..00000000000
--- a/rust/parquet/src/file/mod.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Main entrypoint for working with Parquet API.
-//!
-//! Provides access to file and row group readers and writers, record API, metadata, etc.
-//!
-//! See [`reader::SerializedFileReader`](reader/struct.SerializedFileReader.html) or
-//! [`writer::SerializedFileWriter`](writer/struct.SerializedFileWriter.html) for a
-//! starting reference, [`metadata::ParquetMetaData`](metadata/index.html) for file
-//! metadata, and [`statistics`](statistics/index.html) for working with statistics.
-//!
-//! # Example of writing a new file
-//!
-//! ```rust,no_run
-//! use std::{fs, path::Path, sync::Arc};
-//!
-//! use parquet::{
-//!     file::{
-//!         properties::WriterProperties,
-//!         writer::{FileWriter, SerializedFileWriter},
-//!     },
-//!     schema::parser::parse_message_type,
-//! };
-//!
-//! let path = Path::new("/path/to/sample.parquet");
-//!
-//! let message_type = "
-//!   message schema {
-//!     REQUIRED INT32 b;
-//!   }
-//! ";
-//! let schema = Arc::new(parse_message_type(message_type).unwrap());
-//! let props = Arc::new(WriterProperties::builder().build());
-//! let file = fs::File::create(&path).unwrap();
-//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-//! let mut row_group_writer = writer.next_row_group().unwrap();
-//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
-//!     // ... write values to a column writer
-//!     row_group_writer.close_column(col_writer).unwrap();
-//! }
-//! writer.close_row_group(row_group_writer).unwrap();
-//! writer.close().unwrap();
-//!
-//! let bytes = fs::read(&path).unwrap();
-//! assert_eq!(&bytes[0..4], &[b'P', b'A', b'R', b'1']);
-//! ```
-//! # Example of reading an existing file
-//!
-//! ```rust,no_run
-//! use parquet::file::reader::{FileReader, SerializedFileReader};
-//! use std::{fs::File, path::Path};
-//!
-//! let path = Path::new("/path/to/sample.parquet");
-//! if let Ok(file) = File::open(&path) {
-//!     let reader = SerializedFileReader::new(file).unwrap();
-//!
-//!     let parquet_metadata = reader.metadata();
-//!     assert_eq!(parquet_metadata.num_row_groups(), 1);
-//!
-//!     let row_group_reader = reader.get_row_group(0).unwrap();
-//!     assert_eq!(row_group_reader.num_columns(), 1);
-//! }
-//! ```
-//! # Example of reading multiple files
-//!
-//! ```rust,no_run
-//! use parquet::file::reader::SerializedFileReader;
-//! use std::convert::TryFrom;
-//!
-//! let paths = vec![
-//!     "/path/to/sample.parquet/part-1.snappy.parquet",
-//!     "/path/to/sample.parquet/part-2.snappy.parquet"
-//! ];
-//! // Create a reader for each file and flat map rows
-//! let rows = paths.iter()
-//!     .map(|p| SerializedFileReader::try_from(*p).unwrap())
-//!     .flat_map(|r| r.into_iter());
-//!
-//! for row in rows {
-//!     println!("{}", row);
-//! }
-//! ```
-pub mod footer;
-pub mod metadata;
-pub mod properties;
-pub mod reader;
-pub mod serialized_reader;
-pub mod statistics;
-pub mod writer;
-
-const FOOTER_SIZE: usize = 8;
-const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
-
-/// The number of bytes read at the end of the parquet file on first read
-const DEFAULT_FOOTER_READ_SIZE: usize = 64 * 1024;
diff --git a/rust/parquet/src/file/properties.rs b/rust/parquet/src/file/properties.rs
deleted file mode 100644
index b0b25f9b952..00000000000
--- a/rust/parquet/src/file/properties.rs
+++ /dev/null
@@ -1,679 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Writer properties.
-//!
-//! # Usage
-//!
-//! ```rust
-//! use parquet::{
-//!     basic::{Compression, Encoding},
-//!     file::properties::*,
-//!     schema::types::ColumnPath,
-//! };
-//!
-//! // Create properties with default configuration.
-//! let props = WriterProperties::builder().build();
-//!
-//! // Use properties builder to set certain options and assemble the configuration.
-//! let props = WriterProperties::builder()
-//!     .set_writer_version(WriterVersion::PARQUET_1_0)
-//!     .set_encoding(Encoding::PLAIN)
-//!     .set_column_encoding(ColumnPath::from("col1"), Encoding::DELTA_BINARY_PACKED)
-//!     .set_compression(Compression::SNAPPY)
-//!     .build();
-//!
-//! assert_eq!(props.writer_version(), WriterVersion::PARQUET_1_0);
-//! assert_eq!(
-//!     props.encoding(&ColumnPath::from("col1")),
-//!     Some(Encoding::DELTA_BINARY_PACKED)
-//! );
-//! assert_eq!(
-//!     props.encoding(&ColumnPath::from("col2")),
-//!     Some(Encoding::PLAIN)
-//! );
-//! ```
-
-use std::{collections::HashMap, sync::Arc};
-
-use crate::basic::{Compression, Encoding};
-use crate::file::metadata::KeyValue;
-use crate::schema::types::ColumnPath;
-
-const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;
-const DEFAULT_WRITE_BATCH_SIZE: usize = 1024;
-const DEFAULT_WRITER_VERSION: WriterVersion = WriterVersion::PARQUET_1_0;
-const DEFAULT_COMPRESSION: Compression = Compression::UNCOMPRESSED;
-const DEFAULT_DICTIONARY_ENABLED: bool = true;
-const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE;
-const DEFAULT_STATISTICS_ENABLED: bool = true;
-const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
-const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 128 * 1024 * 1024;
-const DEFAULT_CREATED_BY: &str = env!("PARQUET_CREATED_BY");
-
-/// Parquet writer version.
-///
-/// Basic constant, which is not part of the Thrift definition.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum WriterVersion {
-    PARQUET_1_0,
-    PARQUET_2_0,
-}
-
-impl WriterVersion {
-    /// Returns writer version as `i32`.
-    pub fn as_num(&self) -> i32 {
-        match self {
-            WriterVersion::PARQUET_1_0 => 1,
-            WriterVersion::PARQUET_2_0 => 2,
-        }
-    }
-}
-
-/// Reference counted writer properties.
-pub type WriterPropertiesPtr = Arc<WriterProperties>;
-
-/// Writer properties.
-///
-/// All properties except the key-value metadata are immutable,
-/// use [`WriterPropertiesBuilder`] to assemble these properties.
-#[derive(Debug, Clone)]
-pub struct WriterProperties {
-    data_pagesize_limit: usize,
-    dictionary_pagesize_limit: usize,
-    write_batch_size: usize,
-    max_row_group_size: usize,
-    writer_version: WriterVersion,
-    created_by: String,
-    pub(crate) key_value_metadata: Option<Vec<KeyValue>>,
-    default_column_properties: ColumnProperties,
-    column_properties: HashMap<ColumnPath, ColumnProperties>,
-}
-
-impl WriterProperties {
-    /// Returns builder for writer properties with default values.
-    pub fn builder() -> WriterPropertiesBuilder {
-        WriterPropertiesBuilder::with_defaults()
-    }
-
-    /// Returns data page size limit.
-    pub fn data_pagesize_limit(&self) -> usize {
-        self.data_pagesize_limit
-    }
-
-    /// Returns dictionary page size limit.
-    pub fn dictionary_pagesize_limit(&self) -> usize {
-        self.dictionary_pagesize_limit
-    }
-
-    /// Returns configured batch size for writes.
-    ///
-    /// When writing a batch of data, this setting allows to split it internally into
-    /// smaller batches so we can better estimate the size of a page currently being
-    /// written.
-    pub fn write_batch_size(&self) -> usize {
-        self.write_batch_size
-    }
-
-    /// Returns max size for a row group.
-    pub fn max_row_group_size(&self) -> usize {
-        self.max_row_group_size
-    }
-
-    /// Returns configured writer version.
-    pub fn writer_version(&self) -> WriterVersion {
-        self.writer_version
-    }
-
-    /// Returns `created_by` string.
-    pub fn created_by(&self) -> &str {
-        &self.created_by
-    }
-
-    /// Returns `key_value_metadata` KeyValue pairs.
-    pub fn key_value_metadata(&self) -> &Option<Vec<KeyValue>> {
-        &self.key_value_metadata
-    }
-
-    /// Returns encoding for a data page, when dictionary encoding is enabled.
-    /// This is not configurable.
-    #[inline]
-    pub fn dictionary_data_page_encoding(&self) -> Encoding {
-        // PLAIN_DICTIONARY encoding is deprecated in writer version 1.
-        // Dictionary values are encoded using RLE_DICTIONARY encoding.
-        Encoding::RLE_DICTIONARY
-    }
-
-    /// Returns encoding for dictionary page, when dictionary encoding is enabled.
-    /// This is not configurable.
-    #[inline]
-    pub fn dictionary_page_encoding(&self) -> Encoding {
-        // PLAIN_DICTIONARY is deprecated in writer version 1.
-        // Dictionary is encoded using plain encoding.
-        Encoding::PLAIN
-    }
-
-    /// Returns encoding for a column, if set.
-    /// In case when dictionary is enabled, returns fallback encoding.
-    ///
-    /// If encoding is not set, then column writer will choose the best encoding
-    /// based on the column type.
-    pub fn encoding(&self, col: &ColumnPath) -> Option<Encoding> {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.encoding())
-            .or_else(|| self.default_column_properties.encoding())
-    }
-
-    /// Returns compression codec for a column.
-    pub fn compression(&self, col: &ColumnPath) -> Compression {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.compression())
-            .or_else(|| self.default_column_properties.compression())
-            .unwrap_or(DEFAULT_COMPRESSION)
-    }
-
-    /// Returns `true` if dictionary encoding is enabled for a column.
-    pub fn dictionary_enabled(&self, col: &ColumnPath) -> bool {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.dictionary_enabled())
-            .or_else(|| self.default_column_properties.dictionary_enabled())
-            .unwrap_or(DEFAULT_DICTIONARY_ENABLED)
-    }
-
-    /// Returns `true` if statistics are enabled for a column.
-    pub fn statistics_enabled(&self, col: &ColumnPath) -> bool {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.statistics_enabled())
-            .or_else(|| self.default_column_properties.statistics_enabled())
-            .unwrap_or(DEFAULT_STATISTICS_ENABLED)
-    }
-
-    /// Returns max size for statistics.
-    /// Only applicable if statistics are enabled.
-    pub fn max_statistics_size(&self, col: &ColumnPath) -> usize {
-        self.column_properties
-            .get(col)
-            .and_then(|c| c.max_statistics_size())
-            .or_else(|| self.default_column_properties.max_statistics_size())
-            .unwrap_or(DEFAULT_MAX_STATISTICS_SIZE)
-    }
-}
-
-/// Writer properties builder.
-pub struct WriterPropertiesBuilder {
-    data_pagesize_limit: usize,
-    dictionary_pagesize_limit: usize,
-    write_batch_size: usize,
-    max_row_group_size: usize,
-    writer_version: WriterVersion,
-    created_by: String,
-    key_value_metadata: Option<Vec<KeyValue>>,
-    default_column_properties: ColumnProperties,
-    column_properties: HashMap<ColumnPath, ColumnProperties>,
-}
-
-impl WriterPropertiesBuilder {
-    /// Returns default state of the builder.
-    fn with_defaults() -> Self {
-        Self {
-            data_pagesize_limit: DEFAULT_PAGE_SIZE,
-            dictionary_pagesize_limit: DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT,
-            write_batch_size: DEFAULT_WRITE_BATCH_SIZE,
-            max_row_group_size: DEFAULT_MAX_ROW_GROUP_SIZE,
-            writer_version: DEFAULT_WRITER_VERSION,
-            created_by: DEFAULT_CREATED_BY.to_string(),
-            key_value_metadata: None,
-            default_column_properties: ColumnProperties::new(),
-            column_properties: HashMap::new(),
-        }
-    }
-
-    /// Finalizes the configuration and returns immutable writer properties struct.
-    pub fn build(self) -> WriterProperties {
-        WriterProperties {
-            data_pagesize_limit: self.data_pagesize_limit,
-            dictionary_pagesize_limit: self.dictionary_pagesize_limit,
-            write_batch_size: self.write_batch_size,
-            max_row_group_size: self.max_row_group_size,
-            writer_version: self.writer_version,
-            created_by: self.created_by,
-            key_value_metadata: self.key_value_metadata,
-            default_column_properties: self.default_column_properties,
-            column_properties: self.column_properties,
-        }
-    }
-
-    // ----------------------------------------------------------------------
-    // Writer properties related to a file
-
-    /// Sets writer version.
-    pub fn set_writer_version(mut self, value: WriterVersion) -> Self {
-        self.writer_version = value;
-        self
-    }
-
-    /// Sets data page size limit.
-    pub fn set_data_pagesize_limit(mut self, value: usize) -> Self {
-        self.data_pagesize_limit = value;
-        self
-    }
-
-    /// Sets dictionary page size limit.
-    pub fn set_dictionary_pagesize_limit(mut self, value: usize) -> Self {
-        self.dictionary_pagesize_limit = value;
-        self
-    }
-
-    /// Sets write batch size.
-    pub fn set_write_batch_size(mut self, value: usize) -> Self {
-        self.write_batch_size = value;
-        self
-    }
-
-    /// Sets max size for a row group.
-    pub fn set_max_row_group_size(mut self, value: usize) -> Self {
-        self.max_row_group_size = value;
-        self
-    }
-
-    /// Sets "created by" property.
-    pub fn set_created_by(mut self, value: String) -> Self {
-        self.created_by = value;
-        self
-    }
-
-    /// Sets "key_value_metadata" property.
-    pub fn set_key_value_metadata(mut self, value: Option<Vec<KeyValue>>) -> Self {
-        self.key_value_metadata = value;
-        self
-    }
-
-    // ----------------------------------------------------------------------
-    // Setters for any column (global)
-
-    /// Sets encoding for any column.
-    ///
-    /// If dictionary is not enabled, this is treated as a primary encoding for all
-    /// columns. In case when dictionary is enabled for any column, this value is
-    /// considered to be a fallback encoding for that column.
-    ///
-    /// Panics if user tries to set dictionary encoding here, regardless of dictionary
-    /// encoding flag being set.
-    pub fn set_encoding(mut self, value: Encoding) -> Self {
-        self.default_column_properties.set_encoding(value);
-        self
-    }
-
-    /// Sets compression codec for any column.
-    pub fn set_compression(mut self, value: Compression) -> Self {
-        self.default_column_properties.set_compression(value);
-        self
-    }
-
-    /// Sets flag to enable/disable dictionary encoding for any column.
-    ///
-    /// Use this method to set dictionary encoding, instead of explicitly specifying
-    /// encoding in `set_encoding` method.
-    pub fn set_dictionary_enabled(mut self, value: bool) -> Self {
-        self.default_column_properties.set_dictionary_enabled(value);
-        self
-    }
-
-    /// Sets flag to enable/disable statistics for any column.
-    pub fn set_statistics_enabled(mut self, value: bool) -> Self {
-        self.default_column_properties.set_statistics_enabled(value);
-        self
-    }
-
-    /// Sets max statistics size for any column.
-    /// Applicable only if statistics are enabled.
-    pub fn set_max_statistics_size(mut self, value: usize) -> Self {
-        self.default_column_properties
-            .set_max_statistics_size(value);
-        self
-    }
-
-    // ----------------------------------------------------------------------
-    // Setters for a specific column
-
-    /// Helper method to get existing or new mutable reference of column properties.
-    #[inline]
-    fn get_mut_props(&mut self, col: ColumnPath) -> &mut ColumnProperties {
-        self.column_properties
-            .entry(col)
-            .or_insert(ColumnProperties::new())
-    }
-
-    /// Sets encoding for a column.
-    /// Takes precedence over globally defined settings.
-    ///
-    /// If dictionary is not enabled, this is treated as a primary encoding for this
-    /// column. In case when dictionary is enabled for this column, either through
-    /// global defaults or explicitly, this value is considered to be a fallback
-    /// encoding for this column.
-    ///
-    /// Panics if user tries to set dictionary encoding here, regardless of dictionary
-    /// encoding flag being set.
-    pub fn set_column_encoding(mut self, col: ColumnPath, value: Encoding) -> Self {
-        self.get_mut_props(col).set_encoding(value);
-        self
-    }
-
-    /// Sets compression codec for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_compression(mut self, col: ColumnPath, value: Compression) -> Self {
-        self.get_mut_props(col).set_compression(value);
-        self
-    }
-
-    /// Sets flag to enable/disable dictionary encoding for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_dictionary_enabled(mut self, col: ColumnPath, value: bool) -> Self {
-        self.get_mut_props(col).set_dictionary_enabled(value);
-        self
-    }
-
-    /// Sets flag to enable/disable statistics for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_statistics_enabled(mut self, col: ColumnPath, value: bool) -> Self {
-        self.get_mut_props(col).set_statistics_enabled(value);
-        self
-    }
-
-    /// Sets max size for statistics for a column.
-    /// Takes precedence over globally defined settings.
-    pub fn set_column_max_statistics_size(
-        mut self,
-        col: ColumnPath,
-        value: usize,
-    ) -> Self {
-        self.get_mut_props(col).set_max_statistics_size(value);
-        self
-    }
-}
-
-/// Container for column properties that can be changed as part of writer.
-///
-/// If a field is `None`, it means that no specific value has been set for this column,
-/// so some subsequent or default value must be used.
-#[derive(Debug, Clone, PartialEq)]
-struct ColumnProperties {
-    encoding: Option<Encoding>,
-    codec: Option<Compression>,
-    dictionary_enabled: Option<bool>,
-    statistics_enabled: Option<bool>,
-    max_statistics_size: Option<usize>,
-}
-
-impl ColumnProperties {
-    /// Initialise column properties with default values.
-    fn new() -> Self {
-        Self {
-            encoding: None,
-            codec: None,
-            dictionary_enabled: None,
-            statistics_enabled: None,
-            max_statistics_size: None,
-        }
-    }
-
-    /// Sets encoding for this column.
-    ///
-    /// If dictionary is not enabled, this is treated as a primary encoding for a column.
-    /// In case when dictionary is enabled for a column, this value is considered to
-    /// be a fallback encoding.
-    ///
-    /// Panics if user tries to set dictionary encoding here, regardless of dictionary
-    /// encoding flag being set. Use `set_dictionary_enabled` method to enable dictionary
-    /// for a column.
-    fn set_encoding(&mut self, value: Encoding) {
-        if value == Encoding::PLAIN_DICTIONARY || value == Encoding::RLE_DICTIONARY {
-            panic!("Dictionary encoding can not be used as fallback encoding");
-        }
-        self.encoding = Some(value);
-    }
-
-    /// Sets compression codec for this column.
-    fn set_compression(&mut self, value: Compression) {
-        self.codec = Some(value);
-    }
-
-    /// Sets whether or not dictionary encoding is enabled for this column.
-    fn set_dictionary_enabled(&mut self, enabled: bool) {
-        self.dictionary_enabled = Some(enabled);
-    }
-
-    /// Sets whether or not statistics are enabled for this column.
-    fn set_statistics_enabled(&mut self, enabled: bool) {
-        self.statistics_enabled = Some(enabled);
-    }
-
-    /// Sets max size for statistics for this column.
-    fn set_max_statistics_size(&mut self, value: usize) {
-        self.max_statistics_size = Some(value);
-    }
-
-    /// Returns optional encoding for this column.
-    fn encoding(&self) -> Option<Encoding> {
-        self.encoding
-    }
-
-    /// Returns optional compression codec for this column.
-    fn compression(&self) -> Option<Compression> {
-        self.codec
-    }
-
-    /// Returns `Some(true)` if dictionary encoding is enabled for this column, if
-    /// disabled then returns `Some(false)`. If result is `None`, then no setting has
-    /// been provided.
-    fn dictionary_enabled(&self) -> Option<bool> {
-        self.dictionary_enabled
-    }
-
-    /// Returns `Some(true)` if statistics are enabled for this column, if disabled then
-    /// returns `Some(false)`. If result is `None`, then no setting has been provided.
-    fn statistics_enabled(&self) -> Option<bool> {
-        self.statistics_enabled
-    }
-
-    /// Returns optional max size in bytes for statistics.
-    fn max_statistics_size(&self) -> Option<usize> {
-        self.max_statistics_size
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_writer_version() {
-        assert_eq!(WriterVersion::PARQUET_1_0.as_num(), 1);
-        assert_eq!(WriterVersion::PARQUET_2_0.as_num(), 2);
-    }
-
-    #[test]
-    fn test_writer_properties_default_settings() {
-        let props = WriterProperties::builder().build();
-        assert_eq!(props.data_pagesize_limit(), DEFAULT_PAGE_SIZE);
-        assert_eq!(
-            props.dictionary_pagesize_limit(),
-            DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT
-        );
-        assert_eq!(props.write_batch_size(), DEFAULT_WRITE_BATCH_SIZE);
-        assert_eq!(props.max_row_group_size(), DEFAULT_MAX_ROW_GROUP_SIZE);
-        assert_eq!(props.writer_version(), DEFAULT_WRITER_VERSION);
-        assert_eq!(props.created_by(), DEFAULT_CREATED_BY);
-        assert_eq!(props.key_value_metadata(), &None);
-        assert_eq!(props.encoding(&ColumnPath::from("col")), None);
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            DEFAULT_COMPRESSION
-        );
-        assert_eq!(
-            props.dictionary_enabled(&ColumnPath::from("col")),
-            DEFAULT_DICTIONARY_ENABLED
-        );
-        assert_eq!(
-            props.statistics_enabled(&ColumnPath::from("col")),
-            DEFAULT_STATISTICS_ENABLED
-        );
-        assert_eq!(
-            props.max_statistics_size(&ColumnPath::from("col")),
-            DEFAULT_MAX_STATISTICS_SIZE
-        );
-    }
-
-    #[test]
-    fn test_writer_properties_dictionary_encoding() {
-        // dictionary encoding is not configurable, and it should be the same for both
-        // writer version 1 and 2.
-        for version in &[WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
-            let props = WriterProperties::builder()
-                .set_writer_version(*version)
-                .build();
-            assert_eq!(props.dictionary_page_encoding(), Encoding::PLAIN);
-            assert_eq!(
-                props.dictionary_data_page_encoding(),
-                Encoding::RLE_DICTIONARY
-            );
-        }
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_plain_dictionary_is_fallback() {
-        // Should panic when user specifies dictionary encoding as fallback encoding.
-        WriterProperties::builder()
-            .set_encoding(Encoding::PLAIN_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_rle_dictionary_is_fallback() {
-        // Should panic when user specifies dictionary encoding as fallback encoding.
-        WriterProperties::builder()
-            .set_encoding(Encoding::RLE_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_dictionary_is_enabled() {
-        WriterProperties::builder()
-            .set_dictionary_enabled(true)
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    #[should_panic(expected = "Dictionary encoding can not be used as fallback encoding")]
-    fn test_writer_properties_panic_when_dictionary_is_disabled() {
-        WriterProperties::builder()
-            .set_dictionary_enabled(false)
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE_DICTIONARY)
-            .build();
-    }
-
-    #[test]
-    fn test_writer_properties_builder() {
-        let props = WriterProperties::builder()
-            // file settings
-            .set_writer_version(WriterVersion::PARQUET_2_0)
-            .set_data_pagesize_limit(10)
-            .set_dictionary_pagesize_limit(20)
-            .set_write_batch_size(30)
-            .set_max_row_group_size(40)
-            .set_created_by("default".to_owned())
-            .set_key_value_metadata(Some(vec![KeyValue::new(
-                "key".to_string(),
-                "value".to_string(),
-            )]))
-            // global column settings
-            .set_encoding(Encoding::DELTA_BINARY_PACKED)
-            .set_compression(Compression::GZIP)
-            .set_dictionary_enabled(false)
-            .set_statistics_enabled(false)
-            .set_max_statistics_size(50)
-            // specific column settings
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
-            .set_column_compression(ColumnPath::from("col"), Compression::SNAPPY)
-            .set_column_dictionary_enabled(ColumnPath::from("col"), true)
-            .set_column_statistics_enabled(ColumnPath::from("col"), true)
-            .set_column_max_statistics_size(ColumnPath::from("col"), 123)
-            .build();
-
-        assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
-        assert_eq!(props.data_pagesize_limit(), 10);
-        assert_eq!(props.dictionary_pagesize_limit(), 20);
-        assert_eq!(props.write_batch_size(), 30);
-        assert_eq!(props.max_row_group_size(), 40);
-        assert_eq!(props.created_by(), "default");
-        assert_eq!(
-            props.key_value_metadata(),
-            &Some(vec![KeyValue::new("key".to_string(), "value".to_string(),)])
-        );
-
-        assert_eq!(
-            props.encoding(&ColumnPath::from("a")),
-            Some(Encoding::DELTA_BINARY_PACKED)
-        );
-        assert_eq!(props.compression(&ColumnPath::from("a")), Compression::GZIP);
-        assert_eq!(props.dictionary_enabled(&ColumnPath::from("a")), false);
-        assert_eq!(props.statistics_enabled(&ColumnPath::from("a")), false);
-        assert_eq!(props.max_statistics_size(&ColumnPath::from("a")), 50);
-
-        assert_eq!(
-            props.encoding(&ColumnPath::from("col")),
-            Some(Encoding::RLE)
-        );
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            Compression::SNAPPY
-        );
-        assert_eq!(props.dictionary_enabled(&ColumnPath::from("col")), true);
-        assert_eq!(props.statistics_enabled(&ColumnPath::from("col")), true);
-        assert_eq!(props.max_statistics_size(&ColumnPath::from("col")), 123);
-    }
-
-    #[test]
-    fn test_writer_properties_builder_partial_defaults() {
-        let props = WriterProperties::builder()
-            .set_encoding(Encoding::DELTA_BINARY_PACKED)
-            .set_compression(Compression::GZIP)
-            .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
-            .build();
-
-        assert_eq!(
-            props.encoding(&ColumnPath::from("col")),
-            Some(Encoding::RLE)
-        );
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            Compression::GZIP
-        );
-        assert_eq!(
-            props.dictionary_enabled(&ColumnPath::from("col")),
-            DEFAULT_DICTIONARY_ENABLED
-        );
-    }
-}
diff --git a/rust/parquet/src/file/reader.rs b/rust/parquet/src/file/reader.rs
deleted file mode 100644
index aa8ba83a6c0..00000000000
--- a/rust/parquet/src/file/reader.rs
+++ /dev/null
@@ -1,206 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains file reader API and provides methods to access file metadata, row group
-//! readers to read individual column chunks, or access record iterator.
-
-use std::{boxed::Box, io::Read, sync::Arc};
-
-use crate::column::page::PageIterator;
-use crate::column::{page::PageReader, reader::ColumnReader};
-use crate::errors::{ParquetError, Result};
-use crate::file::metadata::*;
-pub use crate::file::serialized_reader::{SerializedFileReader, SerializedPageReader};
-use crate::record::reader::RowIter;
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, Type as SchemaType};
-
-use crate::basic::Type;
-
-use crate::column::reader::ColumnReaderImpl;
-
-/// Length should return the total number of bytes in the input source.
-/// It's mainly used to read the metadata, which is at the end of the source.
-#[allow(clippy::len_without_is_empty)]
-pub trait Length {
-    /// Returns the amount of bytes of the inner source.
-    fn len(&self) -> u64;
-}
-
-/// The ChunkReader trait generates readers of chunks of a source.
-/// For a file system reader, each chunk might contain a clone of File bounded on a given range.
-/// For an object store reader, each read can be mapped to a range request.
-pub trait ChunkReader: Length {
-    type T: Read;
-    /// get a serialy readeable slice of the current reader
-    /// This should fail if the slice exceeds the current bounds
-    fn get_read(&self, start: u64, length: usize) -> Result<Self::T>;
-}
-
-// ----------------------------------------------------------------------
-// APIs for file & row group readers
-
-/// Parquet file reader API. With this, user can get metadata information about the
-/// Parquet file, can get reader for each row group, and access record iterator.
-pub trait FileReader {
-    /// Get metadata information about this file.
-    fn metadata(&self) -> &ParquetMetaData;
-
-    /// Get the total number of row groups for this file.
-    fn num_row_groups(&self) -> usize;
-
-    /// Get the `i`th row group reader. Note this doesn't do bound check.
-    fn get_row_group(&self, i: usize) -> Result<Box<dyn RowGroupReader + '_>>;
-
-    /// Get full iterator of `Row`s from a file (over all row groups).
-    ///
-    /// Iterator will automatically load the next row group to advance.
-    ///
-    /// Projected schema can be a subset of or equal to the file schema, when it is None,
-    /// full file schema is assumed.
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
-}
-
-/// Parquet row group reader API. With this, user can get metadata information about the
-/// row group, as well as readers for each individual column chunk.
-pub trait RowGroupReader {
-    /// Get metadata information about this row group.
-    fn metadata(&self) -> &RowGroupMetaData;
-
-    /// Get the total number of column chunks in this row group.
-    fn num_columns(&self) -> usize;
-
-    /// Get page reader for the `i`th column chunk.
-    fn get_column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>>;
-
-    /// Get value reader for the `i`th column chunk.
-    fn get_column_reader(&self, i: usize) -> Result<ColumnReader> {
-        let schema_descr = self.metadata().schema_descr();
-        let col_descr = schema_descr.column(i);
-        let col_page_reader = self.get_column_page_reader(i)?;
-        let col_reader = match col_descr.physical_type() {
-            Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(
-                ColumnReaderImpl::new(col_descr, col_page_reader),
-            ),
-            Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
-                ColumnReaderImpl::new(col_descr, col_page_reader),
-            ),
-        };
-        Ok(col_reader)
-    }
-
-    /// Get iterator of `Row`s from this row group.
-    ///
-    /// Projected schema can be a subset of or equal to the file schema, when it is None,
-    /// full file schema is assumed.
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
-}
-
-// ----------------------------------------------------------------------
-// Iterator
-
-/// Implementation of page iterator for parquet file.
-pub struct FilePageIterator {
-    column_index: usize,
-    row_group_indices: Box<dyn Iterator<Item = usize>>,
-    file_reader: Arc<dyn FileReader>,
-}
-
-impl FilePageIterator {
-    /// Creates a page iterator for all row groups in file.
-    pub fn new(column_index: usize, file_reader: Arc<dyn FileReader>) -> Result<Self> {
-        let num_row_groups = file_reader.metadata().num_row_groups();
-
-        let row_group_indices = Box::new(0..num_row_groups);
-
-        Self::with_row_groups(column_index, row_group_indices, file_reader)
-    }
-
-    /// Create page iterator from parquet file reader with only some row groups.
-    pub fn with_row_groups(
-        column_index: usize,
-        row_group_indices: Box<dyn Iterator<Item = usize>>,
-        file_reader: Arc<dyn FileReader>,
-    ) -> Result<Self> {
-        // Check that column_index is valid
-        let num_columns = file_reader
-            .metadata()
-            .file_metadata()
-            .schema_descr()
-            .num_columns();
-
-        if column_index >= num_columns {
-            return Err(ParquetError::IndexOutOfBound(column_index, num_columns));
-        }
-
-        // We don't check iterators here because iterator may be infinite
-        Ok(Self {
-            column_index,
-            row_group_indices,
-            file_reader,
-        })
-    }
-}
-
-impl Iterator for FilePageIterator {
-    type Item = Result<Box<dyn PageReader>>;
-
-    fn next(&mut self) -> Option<Result<Box<dyn PageReader>>> {
-        self.row_group_indices.next().map(|row_group_index| {
-            self.file_reader
-                .get_row_group(row_group_index)
-                .and_then(|r| r.get_column_page_reader(self.column_index))
-        })
-    }
-}
-
-impl PageIterator for FilePageIterator {
-    fn schema(&mut self) -> Result<SchemaDescPtr> {
-        Ok(self
-            .file_reader
-            .metadata()
-            .file_metadata()
-            .schema_descr_ptr())
-    }
-
-    fn column_schema(&mut self) -> Result<ColumnDescPtr> {
-        self.schema().map(|s| s.column(self.column_index))
-    }
-}
diff --git a/rust/parquet/src/file/serialized_reader.rs b/rust/parquet/src/file/serialized_reader.rs
deleted file mode 100644
index 0877e622ce4..00000000000
--- a/rust/parquet/src/file/serialized_reader.rs
+++ /dev/null
@@ -1,771 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains implementations of the reader traits FileReader, RowGroupReader and PageReader
-//! Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)
-
-use std::{convert::TryFrom, fs::File, io::Read, path::Path, sync::Arc};
-
-use parquet_format::{PageHeader, PageType};
-use thrift::protocol::TCompactInputProtocol;
-
-use crate::basic::{Compression, Encoding, Type};
-use crate::column::page::{Page, PageReader};
-use crate::compression::{create_codec, Codec};
-use crate::errors::{ParquetError, Result};
-use crate::file::{footer, metadata::*, reader::*, statistics};
-use crate::record::reader::RowIter;
-use crate::record::Row;
-use crate::schema::types::Type as SchemaType;
-use crate::util::{io::TryClone, memory::ByteBufferPtr};
-
-// export `SliceableCursor` and `FileSource` publically so clients can
-// re-use the logic in their own ParquetFileWriter wrappers
-pub use crate::util::{cursor::SliceableCursor, io::FileSource};
-
-// ----------------------------------------------------------------------
-// Implementations of traits facilitating the creation of a new reader
-
-impl Length for File {
-    fn len(&self) -> u64 {
-        self.metadata().map(|m| m.len()).unwrap_or(0u64)
-    }
-}
-
-impl TryClone for File {
-    fn try_clone(&self) -> std::io::Result<Self> {
-        self.try_clone()
-    }
-}
-
-impl ChunkReader for File {
-    type T = FileSource<File>;
-
-    fn get_read(&self, start: u64, length: usize) -> Result<Self::T> {
-        Ok(FileSource::new(self, start, length))
-    }
-}
-
-impl Length for SliceableCursor {
-    fn len(&self) -> u64 {
-        SliceableCursor::len(self)
-    }
-}
-
-impl ChunkReader for SliceableCursor {
-    type T = SliceableCursor;
-
-    fn get_read(&self, start: u64, length: usize) -> Result<Self::T> {
-        self.slice(start, length).map_err(|e| e.into())
-    }
-}
-
-impl TryFrom<File> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(file: File) -> Result<Self> {
-        Self::new(file)
-    }
-}
-
-impl<'a> TryFrom<&'a Path> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(path: &Path) -> Result<Self> {
-        let file = File::open(path)?;
-        Self::try_from(file)
-    }
-}
-
-impl TryFrom<String> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(path: String) -> Result<Self> {
-        Self::try_from(Path::new(&path))
-    }
-}
-
-impl<'a> TryFrom<&'a str> for SerializedFileReader<File> {
-    type Error = ParquetError;
-
-    fn try_from(path: &str) -> Result<Self> {
-        Self::try_from(Path::new(&path))
-    }
-}
-
-/// Conversion into a [`RowIter`](crate::record::reader::RowIter)
-/// using the full file schema over all row groups.
-impl IntoIterator for SerializedFileReader<File> {
-    type Item = Row;
-    type IntoIter = RowIter<'static>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        RowIter::from_file_into(Box::new(self))
-    }
-}
-
-// ----------------------------------------------------------------------
-// Implementations of file & row group readers
-
-/// A serialized implementation for Parquet [`FileReader`].
-pub struct SerializedFileReader<R: ChunkReader> {
-    chunk_reader: Arc<R>,
-    metadata: ParquetMetaData,
-}
-
-impl<R: 'static + ChunkReader> SerializedFileReader<R> {
-    /// Creates file reader from a Parquet file.
-    /// Returns error if Parquet file does not exist or is corrupt.
-    pub fn new(chunk_reader: R) -> Result<Self> {
-        let metadata = footer::parse_metadata(&chunk_reader)?;
-        Ok(Self {
-            chunk_reader: Arc::new(chunk_reader),
-            metadata,
-        })
-    }
-
-    /// Filters row group metadata to only those row groups,
-    /// for which the predicate function returns true
-    pub fn filter_row_groups(
-        &mut self,
-        predicate: &dyn Fn(&RowGroupMetaData, usize) -> bool,
-    ) {
-        let mut filtered_row_groups = Vec::<RowGroupMetaData>::new();
-        for (i, row_group_metadata) in self.metadata.row_groups().iter().enumerate() {
-            if predicate(row_group_metadata, i) {
-                filtered_row_groups.push(row_group_metadata.clone());
-            }
-        }
-        self.metadata = ParquetMetaData::new(
-            self.metadata.file_metadata().clone(),
-            filtered_row_groups,
-        );
-    }
-}
-
-impl<R: 'static + ChunkReader> FileReader for SerializedFileReader<R> {
-    fn metadata(&self) -> &ParquetMetaData {
-        &self.metadata
-    }
-
-    fn num_row_groups(&self) -> usize {
-        self.metadata.num_row_groups()
-    }
-
-    fn get_row_group(&self, i: usize) -> Result<Box<dyn RowGroupReader + '_>> {
-        let row_group_metadata = self.metadata.row_group(i);
-        // Row groups should be processed sequentially.
-        let f = Arc::clone(&self.chunk_reader);
-        Ok(Box::new(SerializedRowGroupReader::new(
-            f,
-            row_group_metadata,
-        )))
-    }
-
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
-        RowIter::from_file(projection, self)
-    }
-}
-
-/// A serialized implementation for Parquet [`RowGroupReader`].
-pub struct SerializedRowGroupReader<'a, R: ChunkReader> {
-    chunk_reader: Arc<R>,
-    metadata: &'a RowGroupMetaData,
-}
-
-impl<'a, R: ChunkReader> SerializedRowGroupReader<'a, R> {
-    /// Creates new row group reader from a file and row group metadata.
-    fn new(chunk_reader: Arc<R>, metadata: &'a RowGroupMetaData) -> Self {
-        Self {
-            chunk_reader,
-            metadata,
-        }
-    }
-}
-
-impl<'a, R: 'static + ChunkReader> RowGroupReader for SerializedRowGroupReader<'a, R> {
-    fn metadata(&self) -> &RowGroupMetaData {
-        &self.metadata
-    }
-
-    fn num_columns(&self) -> usize {
-        self.metadata.num_columns()
-    }
-
-    // TODO: fix PARQUET-816
-    fn get_column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>> {
-        let col = self.metadata.column(i);
-        let (col_start, col_length) = col.byte_range();
-        let file_chunk = self.chunk_reader.get_read(col_start, col_length as usize)?;
-        let page_reader = SerializedPageReader::new(
-            file_chunk,
-            col.num_values(),
-            col.compression(),
-            col.column_descr().physical_type(),
-        )?;
-        Ok(Box::new(page_reader))
-    }
-
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
-        RowIter::from_row_group(projection, self)
-    }
-}
-
-/// A serialized implementation for Parquet [`PageReader`].
-pub struct SerializedPageReader<T: Read> {
-    // The file source buffer which references exactly the bytes for the column trunk
-    // to be read by this page reader.
-    buf: T,
-
-    // The compression codec for this column chunk. Only set for non-PLAIN codec.
-    decompressor: Option<Box<dyn Codec>>,
-
-    // The number of values we have seen so far.
-    seen_num_values: i64,
-
-    // The number of total values in this column chunk.
-    total_num_values: i64,
-
-    // Column chunk type.
-    physical_type: Type,
-}
-
-impl<T: Read> SerializedPageReader<T> {
-    /// Creates a new serialized page reader from file source.
-    pub fn new(
-        buf: T,
-        total_num_values: i64,
-        compression: Compression,
-        physical_type: Type,
-    ) -> Result<Self> {
-        let decompressor = create_codec(compression)?;
-        let result = Self {
-            buf,
-            total_num_values,
-            seen_num_values: 0,
-            decompressor,
-            physical_type,
-        };
-        Ok(result)
-    }
-
-    /// Reads Page header from Thrift.
-    fn read_page_header(&mut self) -> Result<PageHeader> {
-        let mut prot = TCompactInputProtocol::new(&mut self.buf);
-        let page_header = PageHeader::read_from_in_protocol(&mut prot)?;
-        Ok(page_header)
-    }
-}
-
-impl<T: Read> PageReader for SerializedPageReader<T> {
-    fn get_next_page(&mut self) -> Result<Option<Page>> {
-        while self.seen_num_values < self.total_num_values {
-            let page_header = self.read_page_header()?;
-
-            // When processing data page v2, depending on enabled compression for the
-            // page, we should account for uncompressed data ('offset') of
-            // repetition and definition levels.
-            //
-            // We always use 0 offset for other pages other than v2, `true` flag means
-            // that compression will be applied if decompressor is defined
-            let mut offset: usize = 0;
-            let mut can_decompress = true;
-
-            if let Some(ref header_v2) = page_header.data_page_header_v2 {
-                offset = (header_v2.definition_levels_byte_length
-                    + header_v2.repetition_levels_byte_length)
-                    as usize;
-                // When is_compressed flag is missing the page is considered compressed
-                can_decompress = header_v2.is_compressed.unwrap_or(true);
-            }
-
-            let compressed_len = page_header.compressed_page_size as usize - offset;
-            let uncompressed_len = page_header.uncompressed_page_size as usize - offset;
-            // We still need to read all bytes from buffered stream
-            let mut buffer = vec![0; offset + compressed_len];
-            self.buf.read_exact(&mut buffer)?;
-
-            // TODO: page header could be huge because of statistics. We should set a
-            // maximum page header size and abort if that is exceeded.
-            if let Some(decompressor) = self.decompressor.as_mut() {
-                if can_decompress {
-                    let mut decompressed_buffer = Vec::with_capacity(uncompressed_len);
-                    let decompressed_size = decompressor
-                        .decompress(&buffer[offset..], &mut decompressed_buffer)?;
-                    if decompressed_size != uncompressed_len {
-                        return Err(general_err!(
-              "Actual decompressed size doesn't match the expected one ({} vs {})",
-              decompressed_size,
-              uncompressed_len
-            ));
-                    }
-                    if offset == 0 {
-                        buffer = decompressed_buffer;
-                    } else {
-                        // Prepend saved offsets to the buffer
-                        buffer.truncate(offset);
-                        buffer.append(&mut decompressed_buffer);
-                    }
-                }
-            }
-
-            let result = match page_header.type_ {
-                PageType::DictionaryPage => {
-                    assert!(page_header.dictionary_page_header.is_some());
-                    let dict_header =
-                        page_header.dictionary_page_header.as_ref().unwrap();
-                    let is_sorted = dict_header.is_sorted.unwrap_or(false);
-                    Page::DictionaryPage {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: dict_header.num_values as u32,
-                        encoding: Encoding::from(dict_header.encoding),
-                        is_sorted,
-                    }
-                }
-                PageType::DataPage => {
-                    assert!(page_header.data_page_header.is_some());
-                    let header = page_header.data_page_header.unwrap();
-                    self.seen_num_values += header.num_values as i64;
-                    Page::DataPage {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: header.num_values as u32,
-                        encoding: Encoding::from(header.encoding),
-                        def_level_encoding: Encoding::from(
-                            header.definition_level_encoding,
-                        ),
-                        rep_level_encoding: Encoding::from(
-                            header.repetition_level_encoding,
-                        ),
-                        statistics: statistics::from_thrift(
-                            self.physical_type,
-                            header.statistics,
-                        ),
-                    }
-                }
-                PageType::DataPageV2 => {
-                    assert!(page_header.data_page_header_v2.is_some());
-                    let header = page_header.data_page_header_v2.unwrap();
-                    let is_compressed = header.is_compressed.unwrap_or(true);
-                    self.seen_num_values += header.num_values as i64;
-                    Page::DataPageV2 {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: header.num_values as u32,
-                        encoding: Encoding::from(header.encoding),
-                        num_nulls: header.num_nulls as u32,
-                        num_rows: header.num_rows as u32,
-                        def_levels_byte_len: header.definition_levels_byte_length as u32,
-                        rep_levels_byte_len: header.repetition_levels_byte_length as u32,
-                        is_compressed,
-                        statistics: statistics::from_thrift(
-                            self.physical_type,
-                            header.statistics,
-                        ),
-                    }
-                }
-                _ => {
-                    // For unknown page type (e.g., INDEX_PAGE), skip and read next.
-                    continue;
-                }
-            };
-            return Ok(Some(result));
-        }
-
-        // We are at the end of this column chunk and no more page left. Return None.
-        Ok(None)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::basic::ColumnOrder;
-    use crate::record::RowAccessor;
-    use crate::schema::parser::parse_message_type;
-    use crate::util::test_common::{get_test_file, get_test_path};
-    use std::sync::Arc;
-
-    #[test]
-    fn test_cursor_and_file_has_the_same_behaviour() {
-        let mut buf: Vec<u8> = Vec::new();
-        get_test_file("alltypes_plain.parquet")
-            .read_to_end(&mut buf)
-            .unwrap();
-        let cursor = SliceableCursor::new(buf);
-        let read_from_cursor = SerializedFileReader::new(cursor).unwrap();
-
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let read_from_file = SerializedFileReader::new(test_file).unwrap();
-
-        let file_iter = read_from_file.get_row_iter(None).unwrap();
-        let cursor_iter = read_from_cursor.get_row_iter(None).unwrap();
-
-        assert!(file_iter.eq(cursor_iter));
-    }
-
-    #[test]
-    fn test_file_reader_try_from() {
-        // Valid file path
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let test_path_buf = get_test_path("alltypes_plain.parquet");
-        let test_path = test_path_buf.as_path();
-        let test_path_str = test_path.to_str().unwrap();
-
-        let reader = SerializedFileReader::try_from(test_file);
-        assert!(reader.is_ok());
-
-        let reader = SerializedFileReader::try_from(test_path);
-        assert!(reader.is_ok());
-
-        let reader = SerializedFileReader::try_from(test_path_str);
-        assert!(reader.is_ok());
-
-        let reader = SerializedFileReader::try_from(test_path_str.to_string());
-        assert!(reader.is_ok());
-
-        // Invalid file path
-        let test_path = Path::new("invalid.parquet");
-        let test_path_str = test_path.to_str().unwrap();
-
-        let reader = SerializedFileReader::try_from(test_path);
-        assert!(reader.is_err());
-
-        let reader = SerializedFileReader::try_from(test_path_str);
-        assert!(reader.is_err());
-
-        let reader = SerializedFileReader::try_from(test_path_str.to_string());
-        assert!(reader.is_err());
-    }
-
-    #[test]
-    fn test_file_reader_into_iter() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let vec = vec![path.clone(), path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| r.into_iter())
-            .flat_map(|r| r.get_int(0))
-            .collect::<Vec<_>>();
-
-        // rows in the parquet file are not sorted by "id"
-        // each file contains [id:4, id:5, id:6, id:7, id:2, id:3, id:0, id:1]
-        assert_eq!(vec, vec![4, 5, 6, 7, 2, 3, 0, 1, 4, 5, 6, 7, 2, 3, 0, 1]);
-    }
-
-    #[test]
-    fn test_file_reader_into_iter_project() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let result = vec![path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| {
-                let schema = "message schema { OPTIONAL INT32 id; }";
-                let proj = parse_message_type(&schema).ok();
-
-                r.into_iter().project(proj).unwrap()
-            })
-            .map(|r| format!("{}", r))
-            .collect::<Vec<_>>()
-            .join(",");
-
-        assert_eq!(
-            result,
-            "{id: 4},{id: 5},{id: 6},{id: 7},{id: 2},{id: 3},{id: 0},{id: 1}"
-        );
-    }
-
-    #[test]
-    fn test_reuse_file_chunk() {
-        // This test covers the case of maintaining the correct start position in a file
-        // stream for each column reader after initializing and moving to the next one
-        // (without necessarily reading the entire column).
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let reader = SerializedFileReader::new(test_file).unwrap();
-        let row_group = reader.get_row_group(0).unwrap();
-
-        let mut page_readers = Vec::new();
-        for i in 0..row_group.num_columns() {
-            page_readers.push(row_group.get_column_page_reader(i).unwrap());
-        }
-
-        // Now buffer each col reader, we do not expect any failures like:
-        // General("underlying Thrift error: end of file")
-        for mut page_reader in page_readers {
-            assert!(page_reader.get_next_page().is_ok());
-        }
-    }
-
-    #[test]
-    fn test_file_reader() {
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let reader_result = SerializedFileReader::new(test_file);
-        assert!(reader_result.is_ok());
-        let reader = reader_result.unwrap();
-
-        // Test contents in Parquet metadata
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        // Test contents in file metadata
-        let file_metadata = metadata.file_metadata();
-        assert!(file_metadata.created_by().is_some());
-        assert_eq!(
-      file_metadata.created_by().as_ref().unwrap(),
-      "impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)"
-    );
-        assert!(file_metadata.key_value_metadata().is_none());
-        assert_eq!(file_metadata.num_rows(), 8);
-        assert_eq!(file_metadata.version(), 1);
-        assert_eq!(file_metadata.column_orders(), None);
-
-        // Test contents in row group metadata
-        let row_group_metadata = metadata.row_group(0);
-        assert_eq!(row_group_metadata.num_columns(), 11);
-        assert_eq!(row_group_metadata.num_rows(), 8);
-        assert_eq!(row_group_metadata.total_byte_size(), 671);
-        // Check each column order
-        for i in 0..row_group_metadata.num_columns() {
-            assert_eq!(file_metadata.column_order(i), ColumnOrder::UNDEFINED);
-        }
-
-        // Test row group reader
-        let row_group_reader_result = reader.get_row_group(0);
-        assert!(row_group_reader_result.is_ok());
-        let row_group_reader: Box<dyn RowGroupReader> = row_group_reader_result.unwrap();
-        assert_eq!(
-            row_group_reader.num_columns(),
-            row_group_metadata.num_columns()
-        );
-        assert_eq!(
-            row_group_reader.metadata().total_byte_size(),
-            row_group_metadata.total_byte_size()
-        );
-
-        // Test page readers
-        // TODO: test for every column
-        let page_reader_0_result = row_group_reader.get_column_page_reader(0);
-        assert!(page_reader_0_result.is_ok());
-        let mut page_reader_0: Box<dyn PageReader> = page_reader_0_result.unwrap();
-        let mut page_count = 0;
-        while let Ok(Some(page)) = page_reader_0.get_next_page() {
-            let is_expected_page = match page {
-                Page::DictionaryPage {
-                    buf,
-                    num_values,
-                    encoding,
-                    is_sorted,
-                } => {
-                    assert_eq!(buf.len(), 32);
-                    assert_eq!(num_values, 8);
-                    assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
-                    assert_eq!(is_sorted, false);
-                    true
-                }
-                Page::DataPage {
-                    buf,
-                    num_values,
-                    encoding,
-                    def_level_encoding,
-                    rep_level_encoding,
-                    statistics,
-                } => {
-                    assert_eq!(buf.len(), 11);
-                    assert_eq!(num_values, 8);
-                    assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
-                    assert_eq!(def_level_encoding, Encoding::RLE);
-                    assert_eq!(rep_level_encoding, Encoding::BIT_PACKED);
-                    assert!(statistics.is_none());
-                    true
-                }
-                _ => false,
-            };
-            assert!(is_expected_page);
-            page_count += 1;
-        }
-        assert_eq!(page_count, 2);
-    }
-
-    #[test]
-    fn test_file_reader_datapage_v2() {
-        let test_file = get_test_file("datapage_v2.snappy.parquet");
-        let reader_result = SerializedFileReader::new(test_file);
-        assert!(reader_result.is_ok());
-        let reader = reader_result.unwrap();
-
-        // Test contents in Parquet metadata
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        // Test contents in file metadata
-        let file_metadata = metadata.file_metadata();
-        assert!(file_metadata.created_by().is_some());
-        assert_eq!(
-            file_metadata.created_by().as_ref().unwrap(),
-            "parquet-mr version 1.8.1 (build 4aba4dae7bb0d4edbcf7923ae1339f28fd3f7fcf)"
-        );
-        assert!(file_metadata.key_value_metadata().is_some());
-        assert_eq!(
-            file_metadata.key_value_metadata().to_owned().unwrap().len(),
-            1
-        );
-
-        assert_eq!(file_metadata.num_rows(), 5);
-        assert_eq!(file_metadata.version(), 1);
-        assert_eq!(file_metadata.column_orders(), None);
-
-        let row_group_metadata = metadata.row_group(0);
-
-        // Check each column order
-        for i in 0..row_group_metadata.num_columns() {
-            assert_eq!(file_metadata.column_order(i), ColumnOrder::UNDEFINED);
-        }
-
-        // Test row group reader
-        let row_group_reader_result = reader.get_row_group(0);
-        assert!(row_group_reader_result.is_ok());
-        let row_group_reader: Box<dyn RowGroupReader> = row_group_reader_result.unwrap();
-        assert_eq!(
-            row_group_reader.num_columns(),
-            row_group_metadata.num_columns()
-        );
-        assert_eq!(
-            row_group_reader.metadata().total_byte_size(),
-            row_group_metadata.total_byte_size()
-        );
-
-        // Test page readers
-        // TODO: test for every column
-        let page_reader_0_result = row_group_reader.get_column_page_reader(0);
-        assert!(page_reader_0_result.is_ok());
-        let mut page_reader_0: Box<dyn PageReader> = page_reader_0_result.unwrap();
-        let mut page_count = 0;
-        while let Ok(Some(page)) = page_reader_0.get_next_page() {
-            let is_expected_page = match page {
-                Page::DictionaryPage {
-                    buf,
-                    num_values,
-                    encoding,
-                    is_sorted,
-                } => {
-                    assert_eq!(buf.len(), 7);
-                    assert_eq!(num_values, 1);
-                    assert_eq!(encoding, Encoding::PLAIN);
-                    assert_eq!(is_sorted, false);
-                    true
-                }
-                Page::DataPageV2 {
-                    buf,
-                    num_values,
-                    encoding,
-                    num_nulls,
-                    num_rows,
-                    def_levels_byte_len,
-                    rep_levels_byte_len,
-                    is_compressed,
-                    statistics,
-                } => {
-                    assert_eq!(buf.len(), 4);
-                    assert_eq!(num_values, 5);
-                    assert_eq!(encoding, Encoding::RLE_DICTIONARY);
-                    assert_eq!(num_nulls, 1);
-                    assert_eq!(num_rows, 5);
-                    assert_eq!(def_levels_byte_len, 2);
-                    assert_eq!(rep_levels_byte_len, 0);
-                    assert_eq!(is_compressed, true);
-                    assert!(statistics.is_some());
-                    true
-                }
-                _ => false,
-            };
-            assert!(is_expected_page);
-            page_count += 1;
-        }
-        assert_eq!(page_count, 2);
-    }
-
-    #[test]
-    fn test_page_iterator() {
-        let file = get_test_file("alltypes_plain.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-
-        let mut page_iterator = FilePageIterator::new(0, file_reader.clone()).unwrap();
-
-        // read first page
-        let page = page_iterator.next();
-        assert!(page.is_some());
-        assert!(page.unwrap().is_ok());
-
-        // reach end of file
-        let page = page_iterator.next();
-        assert!(page.is_none());
-
-        let row_group_indices = Box::new(0..1);
-        let mut page_iterator =
-            FilePageIterator::with_row_groups(0, row_group_indices, file_reader).unwrap();
-
-        // read first page
-        let page = page_iterator.next();
-        assert!(page.is_some());
-        assert!(page.unwrap().is_ok());
-
-        // reach end of file
-        let page = page_iterator.next();
-        assert!(page.is_none());
-    }
-
-    #[test]
-    fn test_file_reader_key_value_metadata() {
-        let file = get_test_file("binary.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-
-        let metadata = file_reader
-            .metadata
-            .file_metadata()
-            .key_value_metadata()
-            .as_ref()
-            .unwrap();
-
-        assert_eq!(metadata.len(), 3);
-
-        assert_eq!(metadata.get(0).unwrap().key, "parquet.proto.descriptor");
-
-        assert_eq!(metadata.get(1).unwrap().key, "writer.model.name");
-        assert_eq!(metadata.get(1).unwrap().value, Some("protobuf".to_owned()));
-
-        assert_eq!(metadata.get(2).unwrap().key, "parquet.proto.class");
-        assert_eq!(
-            metadata.get(2).unwrap().value,
-            Some("foo.baz.Foobaz$Event".to_owned())
-        );
-    }
-
-    #[test]
-    fn test_file_reader_filter_row_groups() -> Result<()> {
-        let test_file = get_test_file("alltypes_plain.parquet");
-        let mut reader = SerializedFileReader::new(test_file)?;
-
-        // test initial number of row groups
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        // test filtering out all row groups
-        reader.filter_row_groups(&|_, _| false);
-        let metadata = reader.metadata();
-        assert_eq!(metadata.num_row_groups(), 0);
-
-        Ok(())
-    }
-}
diff --git a/rust/parquet/src/file/statistics.rs b/rust/parquet/src/file/statistics.rs
deleted file mode 100644
index 4f5d0e94bef..00000000000
--- a/rust/parquet/src/file/statistics.rs
+++ /dev/null
@@ -1,664 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains definitions for working with Parquet statistics.
-//!
-//! Though some common methods are available on enum, use pattern match to extract
-//! actual min and max values from statistics, see below:
-//!
-//! ```rust
-//! use parquet::file::statistics::Statistics;
-//!
-//! let stats = Statistics::int32(Some(1), Some(10), None, 3, true);
-//! assert_eq!(stats.null_count(), 3);
-//! assert!(stats.has_min_max_set());
-//! assert!(stats.is_min_max_deprecated());
-//!
-//! match stats {
-//!     Statistics::Int32(ref typed) => {
-//!         assert_eq!(*typed.min(), 1);
-//!         assert_eq!(*typed.max(), 10);
-//!     }
-//!     _ => {}
-//! }
-//! ```
-
-use std::{cmp, fmt};
-
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format::Statistics as TStatistics;
-
-use crate::basic::Type;
-use crate::data_type::*;
-use crate::util::bit_util::from_ne_slice;
-
-// Macro to generate methods create Statistics.
-macro_rules! statistics_new_func {
-    ($func:ident, $vtype:ty, $stat:ident) => {
-        pub fn $func(
-            min: $vtype,
-            max: $vtype,
-            distinct: Option<u64>,
-            nulls: u64,
-            is_deprecated: bool,
-        ) -> Self {
-            Statistics::$stat(TypedStatistics::new(
-                min,
-                max,
-                distinct,
-                nulls,
-                is_deprecated,
-            ))
-        }
-    };
-}
-
-// Macro to generate getter functions for Statistics.
-macro_rules! statistics_enum_func {
-    ($self:ident, $func:ident) => {{
-        match *$self {
-            Statistics::Boolean(ref typed) => typed.$func(),
-            Statistics::Int32(ref typed) => typed.$func(),
-            Statistics::Int64(ref typed) => typed.$func(),
-            Statistics::Int96(ref typed) => typed.$func(),
-            Statistics::Float(ref typed) => typed.$func(),
-            Statistics::Double(ref typed) => typed.$func(),
-            Statistics::ByteArray(ref typed) => typed.$func(),
-            Statistics::FixedLenByteArray(ref typed) => typed.$func(),
-        }
-    }};
-}
-
-/// Converts Thrift definition into `Statistics`.
-pub fn from_thrift(
-    physical_type: Type,
-    thrift_stats: Option<TStatistics>,
-) -> Option<Statistics> {
-    match thrift_stats {
-        Some(stats) => {
-            // Number of nulls recorded, when it is not available, we just mark it as 0.
-            let null_count = stats.null_count.unwrap_or(0);
-            assert!(
-                null_count >= 0,
-                "Statistics null count is negative ({})",
-                null_count
-            );
-
-            // Generic null count.
-            let null_count = null_count as u64;
-            // Generic distinct count (count of distinct values occurring)
-            let distinct_count = stats.distinct_count.map(|value| value as u64);
-            // Whether or not statistics use deprecated min/max fields.
-            let old_format = stats.min_value.is_none() && stats.max_value.is_none();
-            // Generic min value as bytes.
-            let min = if old_format {
-                stats.min
-            } else {
-                stats.min_value
-            };
-            // Generic max value as bytes.
-            let max = if old_format {
-                stats.max
-            } else {
-                stats.max_value
-            };
-
-            // Values are encoded using PLAIN encoding definition, except that
-            // variable-length byte arrays do not include a length prefix.
-            //
-            // Instead of using actual decoder, we manually convert values.
-            let res = match physical_type {
-                Type::BOOLEAN => Statistics::boolean(
-                    min.map(|data| data[0] != 0),
-                    max.map(|data| data[0] != 0),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::INT32 => Statistics::int32(
-                    min.map(|data| LittleEndian::read_i32(&data)),
-                    max.map(|data| LittleEndian::read_i32(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::INT64 => Statistics::int64(
-                    min.map(|data| LittleEndian::read_i64(&data)),
-                    max.map(|data| LittleEndian::read_i64(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::INT96 => {
-                    // INT96 statistics may not be correct, because comparison is signed
-                    // byte-wise, not actual timestamps. It is recommended to ignore
-                    // min/max statistics for INT96 columns.
-                    let min = min.map(|data| {
-                        assert_eq!(data.len(), 12);
-                        from_ne_slice::<Int96>(&data)
-                    });
-                    let max = max.map(|data| {
-                        assert_eq!(data.len(), 12);
-                        from_ne_slice::<Int96>(&data)
-                    });
-                    Statistics::int96(min, max, distinct_count, null_count, old_format)
-                }
-                Type::FLOAT => Statistics::float(
-                    min.map(|data| LittleEndian::read_f32(&data)),
-                    max.map(|data| LittleEndian::read_f32(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::DOUBLE => Statistics::double(
-                    min.map(|data| LittleEndian::read_f64(&data)),
-                    max.map(|data| LittleEndian::read_f64(&data)),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::BYTE_ARRAY => Statistics::byte_array(
-                    min.map(ByteArray::from),
-                    max.map(ByteArray::from),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-                Type::FIXED_LEN_BYTE_ARRAY => Statistics::fixed_len_byte_array(
-                    min.map(ByteArray::from).map(FixedLenByteArray::from),
-                    max.map(ByteArray::from).map(FixedLenByteArray::from),
-                    distinct_count,
-                    null_count,
-                    old_format,
-                ),
-            };
-
-            Some(res)
-        }
-        None => None,
-    }
-}
-
-// Convert Statistics into Thrift definition.
-pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
-    let stats = stats?;
-
-    let mut thrift_stats = TStatistics {
-        max: None,
-        min: None,
-        null_count: if stats.has_nulls() {
-            Some(stats.null_count() as i64)
-        } else {
-            None
-        },
-        distinct_count: stats.distinct_count().map(|value| value as i64),
-        max_value: None,
-        min_value: None,
-    };
-
-    // Get min/max if set.
-    let (min, max) = if stats.has_min_max_set() {
-        (
-            Some(stats.min_bytes().to_vec()),
-            Some(stats.max_bytes().to_vec()),
-        )
-    } else {
-        (None, None)
-    };
-
-    if stats.is_min_max_deprecated() {
-        thrift_stats.min = min;
-        thrift_stats.max = max;
-    } else {
-        thrift_stats.min_value = min;
-        thrift_stats.max_value = max;
-    }
-
-    Some(thrift_stats)
-}
-
-/// Statistics for a column chunk and data page.
-#[derive(Debug, Clone, PartialEq)]
-pub enum Statistics {
-    Boolean(TypedStatistics<BoolType>),
-    Int32(TypedStatistics<Int32Type>),
-    Int64(TypedStatistics<Int64Type>),
-    Int96(TypedStatistics<Int96Type>),
-    Float(TypedStatistics<FloatType>),
-    Double(TypedStatistics<DoubleType>),
-    ByteArray(TypedStatistics<ByteArrayType>),
-    FixedLenByteArray(TypedStatistics<FixedLenByteArrayType>),
-}
-
-impl Statistics {
-    statistics_new_func![boolean, Option<bool>, Boolean];
-
-    statistics_new_func![int32, Option<i32>, Int32];
-
-    statistics_new_func![int64, Option<i64>, Int64];
-
-    statistics_new_func![int96, Option<Int96>, Int96];
-
-    statistics_new_func![float, Option<f32>, Float];
-
-    statistics_new_func![double, Option<f64>, Double];
-
-    statistics_new_func![byte_array, Option<ByteArray>, ByteArray];
-
-    statistics_new_func![
-        fixed_len_byte_array,
-        Option<FixedLenByteArray>,
-        FixedLenByteArray
-    ];
-
-    /// Returns `true` if statistics have old `min` and `max` fields set.
-    /// This means that the column order is likely to be undefined, which, for old files
-    /// could mean a signed sort order of values.
-    ///
-    /// Refer to [`ColumnOrder`](crate::basic::ColumnOrder) and
-    /// [`SortOrder`](crate::basic::SortOrder) for more information.
-    pub fn is_min_max_deprecated(&self) -> bool {
-        statistics_enum_func![self, is_min_max_deprecated]
-    }
-
-    /// Returns optional value of number of distinct values occurring.
-    /// When it is `None`, the value should be ignored.
-    pub fn distinct_count(&self) -> Option<u64> {
-        statistics_enum_func![self, distinct_count]
-    }
-
-    /// Returns number of null values for the column.
-    /// Note that this includes all nulls when column is part of the complex type.
-    pub fn null_count(&self) -> u64 {
-        statistics_enum_func![self, null_count]
-    }
-
-    /// Returns `true` if statistics collected any null values, `false` otherwise.
-    pub fn has_nulls(&self) -> bool {
-        self.null_count() > 0
-    }
-
-    /// Returns `true` if min value and max value are set.
-    /// Normally both min/max values will be set to `Some(value)` or `None`.
-    pub fn has_min_max_set(&self) -> bool {
-        statistics_enum_func![self, has_min_max_set]
-    }
-
-    /// Returns slice of bytes that represent min value.
-    /// Panics if min value is not set.
-    pub fn min_bytes(&self) -> &[u8] {
-        statistics_enum_func![self, min_bytes]
-    }
-
-    /// Returns slice of bytes that represent max value.
-    /// Panics if max value is not set.
-    pub fn max_bytes(&self) -> &[u8] {
-        statistics_enum_func![self, max_bytes]
-    }
-
-    /// Returns physical type associated with statistics.
-    pub fn physical_type(&self) -> Type {
-        match self {
-            Statistics::Boolean(_) => Type::BOOLEAN,
-            Statistics::Int32(_) => Type::INT32,
-            Statistics::Int64(_) => Type::INT64,
-            Statistics::Int96(_) => Type::INT96,
-            Statistics::Float(_) => Type::FLOAT,
-            Statistics::Double(_) => Type::DOUBLE,
-            Statistics::ByteArray(_) => Type::BYTE_ARRAY,
-            Statistics::FixedLenByteArray(_) => Type::FIXED_LEN_BYTE_ARRAY,
-        }
-    }
-}
-
-impl fmt::Display for Statistics {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Statistics::Boolean(typed) => write!(f, "{}", typed),
-            Statistics::Int32(typed) => write!(f, "{}", typed),
-            Statistics::Int64(typed) => write!(f, "{}", typed),
-            Statistics::Int96(typed) => write!(f, "{}", typed),
-            Statistics::Float(typed) => write!(f, "{}", typed),
-            Statistics::Double(typed) => write!(f, "{}", typed),
-            Statistics::ByteArray(typed) => write!(f, "{}", typed),
-            Statistics::FixedLenByteArray(typed) => write!(f, "{}", typed),
-        }
-    }
-}
-
-/// Typed implementation for [`Statistics`].
-#[derive(Clone)]
-pub struct TypedStatistics<T: DataType> {
-    min: Option<T::T>,
-    max: Option<T::T>,
-    // Distinct count could be omitted in some cases
-    distinct_count: Option<u64>,
-    null_count: u64,
-    is_min_max_deprecated: bool,
-}
-
-impl<T: DataType> TypedStatistics<T> {
-    /// Creates new typed statistics.
-    pub fn new(
-        min: Option<T::T>,
-        max: Option<T::T>,
-        distinct_count: Option<u64>,
-        null_count: u64,
-        is_min_max_deprecated: bool,
-    ) -> Self {
-        Self {
-            min,
-            max,
-            distinct_count,
-            null_count,
-            is_min_max_deprecated,
-        }
-    }
-
-    /// Returns min value of the statistics.
-    ///
-    /// Panics if min value is not set, e.g. all values are `null`.
-    /// Use `has_min_max_set` method to check that.
-    pub fn min(&self) -> &T::T {
-        self.min.as_ref().unwrap()
-    }
-
-    /// Returns max value of the statistics.
-    ///
-    /// Panics if max value is not set, e.g. all values are `null`.
-    /// Use `has_min_max_set` method to check that.
-    pub fn max(&self) -> &T::T {
-        self.max.as_ref().unwrap()
-    }
-
-    /// Returns min value as bytes of the statistics.
-    ///
-    /// Panics if min value is not set, use `has_min_max_set` method to check
-    /// if values are set.
-    pub fn min_bytes(&self) -> &[u8] {
-        self.min().as_bytes()
-    }
-
-    /// Returns max value as bytes of the statistics.
-    ///
-    /// Panics if max value is not set, use `has_min_max_set` method to check
-    /// if values are set.
-    pub fn max_bytes(&self) -> &[u8] {
-        self.max().as_bytes()
-    }
-
-    /// Whether or not min and max values are set.
-    /// Normally both min/max values will be set to `Some(value)` or `None`.
-    fn has_min_max_set(&self) -> bool {
-        self.min.is_some() && self.max.is_some()
-    }
-
-    /// Returns optional value of number of distinct values occurring.
-    fn distinct_count(&self) -> Option<u64> {
-        self.distinct_count
-    }
-
-    /// Returns null count.
-    fn null_count(&self) -> u64 {
-        self.null_count
-    }
-
-    /// Returns `true` if statistics were created using old min/max fields.
-    fn is_min_max_deprecated(&self) -> bool {
-        self.is_min_max_deprecated
-    }
-}
-
-impl<T: DataType> fmt::Display for TypedStatistics<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{{")?;
-        write!(f, "min: ")?;
-        match self.min {
-            Some(ref value) => write!(f, "{}", value)?,
-            None => write!(f, "N/A")?,
-        }
-        write!(f, ", max: ")?;
-        match self.max {
-            Some(ref value) => write!(f, "{}", value)?,
-            None => write!(f, "N/A")?,
-        }
-        write!(f, ", distinct_count: ")?;
-        match self.distinct_count {
-            Some(value) => write!(f, "{}", value)?,
-            None => write!(f, "N/A")?,
-        }
-        write!(f, ", null_count: {}", self.null_count)?;
-        write!(f, ", min_max_deprecated: {}", self.is_min_max_deprecated)?;
-        write!(f, "}}")
-    }
-}
-
-impl<T: DataType> fmt::Debug for TypedStatistics<T> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(
-            f,
-            "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {}, \
-             min_max_deprecated: {}}}",
-            self.min,
-            self.max,
-            self.distinct_count,
-            self.null_count,
-            self.is_min_max_deprecated
-        )
-    }
-}
-
-impl<T: DataType> cmp::PartialEq for TypedStatistics<T> {
-    fn eq(&self, other: &TypedStatistics<T>) -> bool {
-        self.min == other.min
-            && self.max == other.max
-            && self.distinct_count == other.distinct_count
-            && self.null_count == other.null_count
-            && self.is_min_max_deprecated == other.is_min_max_deprecated
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_statistics_min_max_bytes() {
-        let stats = Statistics::int32(Some(-123), Some(234), None, 1, false);
-        assert!(stats.has_min_max_set());
-        assert_eq!(stats.min_bytes(), (-123).as_bytes());
-        assert_eq!(stats.max_bytes(), 234.as_bytes());
-
-        let stats = Statistics::byte_array(
-            Some(ByteArray::from(vec![1, 2, 3])),
-            Some(ByteArray::from(vec![3, 4, 5])),
-            None,
-            1,
-            true,
-        );
-        assert!(stats.has_min_max_set());
-        assert_eq!(stats.min_bytes(), &[1, 2, 3]);
-        assert_eq!(stats.max_bytes(), &[3, 4, 5]);
-    }
-
-    #[test]
-    #[should_panic(expected = "Statistics null count is negative (-10)")]
-    fn test_statistics_negative_null_count() {
-        let thrift_stats = TStatistics {
-            max: None,
-            min: None,
-            null_count: Some(-10),
-            distinct_count: None,
-            max_value: None,
-            min_value: None,
-        };
-
-        from_thrift(Type::INT32, Some(thrift_stats));
-    }
-
-    #[test]
-    fn test_statistics_thrift_none() {
-        assert_eq!(from_thrift(Type::INT32, None), None);
-        assert_eq!(from_thrift(Type::BYTE_ARRAY, None), None);
-    }
-
-    #[test]
-    fn test_statistics_debug() {
-        let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
-        assert_eq!(
-            format!("{:?}", stats),
-            "Int32({min: Some(1), max: Some(12), distinct_count: None, null_count: 12, \
-             min_max_deprecated: true})"
-        );
-
-        let stats = Statistics::int32(None, None, None, 7, false);
-        assert_eq!(
-            format!("{:?}", stats),
-            "Int32({min: None, max: None, distinct_count: None, null_count: 7, \
-             min_max_deprecated: false})"
-        )
-    }
-
-    #[test]
-    fn test_statistics_display() {
-        let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
-        assert_eq!(
-            format!("{}", stats),
-            "{min: 1, max: 12, distinct_count: N/A, null_count: 12, min_max_deprecated: true}"
-        );
-
-        let stats = Statistics::int64(None, None, None, 7, false);
-        assert_eq!(
-            format!("{}", stats),
-            "{min: N/A, max: N/A, distinct_count: N/A, null_count: 7, min_max_deprecated: \
-             false}"
-        );
-
-        let stats = Statistics::int96(
-            Some(Int96::from(vec![1, 0, 0])),
-            Some(Int96::from(vec![2, 3, 4])),
-            None,
-            3,
-            true,
-        );
-        assert_eq!(
-            format!("{}", stats),
-            "{min: [1, 0, 0], max: [2, 3, 4], distinct_count: N/A, null_count: 3, \
-             min_max_deprecated: true}"
-        );
-
-        let stats = Statistics::byte_array(
-            Some(ByteArray::from(vec![1u8])),
-            Some(ByteArray::from(vec![2u8])),
-            Some(5),
-            7,
-            false,
-        );
-        assert_eq!(
-            format!("{}", stats),
-            "{min: [1], max: [2], distinct_count: 5, null_count: 7, min_max_deprecated: false}"
-        );
-    }
-
-    #[test]
-    fn test_statistics_partial_eq() {
-        let expected = Statistics::int32(Some(12), Some(45), None, 11, true);
-
-        assert!(Statistics::int32(Some(12), Some(45), None, 11, true) == expected);
-        assert!(Statistics::int32(Some(11), Some(45), None, 11, true) != expected);
-        assert!(Statistics::int32(Some(12), Some(44), None, 11, true) != expected);
-        assert!(Statistics::int32(Some(12), Some(45), None, 23, true) != expected);
-        assert!(Statistics::int32(Some(12), Some(45), None, 11, false) != expected);
-
-        assert!(
-            Statistics::int32(Some(12), Some(45), None, 11, false)
-                != Statistics::int64(Some(12), Some(45), None, 11, false)
-        );
-
-        assert!(
-            Statistics::boolean(Some(false), Some(true), None, 0, true)
-                != Statistics::double(Some(1.2), Some(4.5), None, 0, true)
-        );
-
-        assert!(
-            Statistics::byte_array(
-                Some(ByteArray::from(vec![1, 2, 3])),
-                Some(ByteArray::from(vec![1, 2, 3])),
-                None,
-                0,
-                true
-            ) != Statistics::fixed_len_byte_array(
-                Some(ByteArray::from(vec![1, 2, 3]).into()),
-                Some(ByteArray::from(vec![1, 2, 3]).into()),
-                None,
-                0,
-                true
-            )
-        );
-    }
-
-    #[test]
-    fn test_statistics_from_thrift() {
-        // Helper method to check statistics conversion.
-        fn check_stats(stats: Statistics) {
-            let tpe = stats.physical_type();
-            let thrift_stats = to_thrift(Some(&stats));
-            assert_eq!(from_thrift(tpe, thrift_stats), Some(stats));
-        }
-
-        check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
-        check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
-        check_stats(Statistics::boolean(Some(false), Some(true), None, 0, false));
-        check_stats(Statistics::boolean(Some(true), Some(true), None, 7, true));
-        check_stats(Statistics::boolean(Some(false), Some(false), None, 7, true));
-        check_stats(Statistics::boolean(None, None, None, 7, true));
-
-        check_stats(Statistics::int32(Some(-100), Some(500), None, 7, true));
-        check_stats(Statistics::int32(Some(-100), Some(500), None, 0, false));
-        check_stats(Statistics::int32(None, None, None, 7, true));
-
-        check_stats(Statistics::int64(Some(-100), Some(200), None, 7, true));
-        check_stats(Statistics::int64(Some(-100), Some(200), None, 0, false));
-        check_stats(Statistics::int64(None, None, None, 7, true));
-
-        check_stats(Statistics::float(Some(1.2), Some(3.4), None, 7, true));
-        check_stats(Statistics::float(Some(1.2), Some(3.4), None, 0, false));
-        check_stats(Statistics::float(None, None, None, 7, true));
-
-        check_stats(Statistics::double(Some(1.2), Some(3.4), None, 7, true));
-        check_stats(Statistics::double(Some(1.2), Some(3.4), None, 0, false));
-        check_stats(Statistics::double(None, None, None, 7, true));
-
-        check_stats(Statistics::byte_array(
-            Some(ByteArray::from(vec![1, 2, 3])),
-            Some(ByteArray::from(vec![3, 4, 5])),
-            None,
-            7,
-            true,
-        ));
-        check_stats(Statistics::byte_array(None, None, None, 7, true));
-
-        check_stats(Statistics::fixed_len_byte_array(
-            Some(ByteArray::from(vec![1, 2, 3]).into()),
-            Some(ByteArray::from(vec![3, 4, 5]).into()),
-            None,
-            7,
-            true,
-        ));
-        check_stats(Statistics::fixed_len_byte_array(None, None, None, 7, true));
-    }
-}
diff --git a/rust/parquet/src/file/writer.rs b/rust/parquet/src/file/writer.rs
deleted file mode 100644
index e1c2dc6b616..00000000000
--- a/rust/parquet/src/file/writer.rs
+++ /dev/null
@@ -1,1164 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains file writer API, and provides methods to write row groups and columns by
-//! using row group writers and column writers respectively.
-
-use std::{
-    io::{Seek, SeekFrom, Write},
-    sync::Arc,
-};
-
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format as parquet;
-use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol};
-
-use crate::basic::PageType;
-use crate::column::{
-    page::{CompressedPage, Page, PageWriteSpec, PageWriter},
-    writer::{get_column_writer, ColumnWriter},
-};
-use crate::errors::{ParquetError, Result};
-use crate::file::{
-    metadata::*, properties::WriterPropertiesPtr,
-    statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC,
-};
-use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr};
-use crate::util::io::{FileSink, Position};
-
-// Exposed publically so client code can implement [`ParquetWriter`]
-pub use crate::util::io::TryClone;
-
-// Exposed publically for convenience of writing Parquet to a buffer of bytes
-pub use crate::util::cursor::InMemoryWriteableCursor;
-
-// ----------------------------------------------------------------------
-// APIs for file & row group writers
-
-/// Parquet file writer API.
-/// Provides methods to write row groups sequentially.
-///
-/// The main workflow should be as following:
-/// - Create file writer, this will open a new file and potentially write some metadata.
-/// - Request a new row group writer by calling `next_row_group`.
-/// - Once finished writing row group, close row group writer by passing it into
-/// `close_row_group` method - this will finalise row group metadata and update metrics.
-/// - Write subsequent row groups, if necessary.
-/// - After all row groups have been written, close the file writer using `close` method.
-pub trait FileWriter {
-    /// Creates new row group from this file writer.
-    /// In case of IO error or Thrift error, returns `Err`.
-    ///
-    /// There is no limit on a number of row groups in a file; however, row groups have
-    /// to be written sequentially. Every time the next row group is requested, the
-    /// previous row group must be finalised and closed using `close_row_group` method.
-    fn next_row_group(&mut self) -> Result<Box<dyn RowGroupWriter>>;
-
-    /// Finalises and closes row group that was created using `next_row_group` method.
-    /// After calling this method, the next row group is available for writes.
-    fn close_row_group(
-        &mut self,
-        row_group_writer: Box<dyn RowGroupWriter>,
-    ) -> Result<()>;
-
-    /// Closes and finalises file writer, returning the file metadata.
-    ///
-    /// All row groups must be appended before this method is called.
-    /// No writes are allowed after this point.
-    ///
-    /// Can be called multiple times. It is up to implementation to either result in
-    /// no-op, or return an `Err` for subsequent calls.
-    fn close(&mut self) -> Result<parquet::FileMetaData>;
-}
-
-/// Parquet row group writer API.
-/// Provides methods to access column writers in an iterator-like fashion, order is
-/// guaranteed to match the order of schema leaves (column descriptors).
-///
-/// All columns should be written sequentially; the main workflow is:
-/// - Request the next column using `next_column` method - this will return `None` if no
-/// more columns are available to write.
-/// - Once done writing a column, close column writer with `close_column` method - this
-/// will finalise column chunk metadata and update row group metrics.
-/// - Once all columns have been written, close row group writer with `close` method -
-/// it will return row group metadata and is no-op on already closed row group.
-pub trait RowGroupWriter {
-    /// Returns the next column writer, if available; otherwise returns `None`.
-    /// In case of any IO error or Thrift error, or if row group writer has already been
-    /// closed returns `Err`.
-    ///
-    /// To request the next column writer, the previous one must be finalised and closed
-    /// using `close_column`.
-    fn next_column(&mut self) -> Result<Option<ColumnWriter>>;
-
-    /// Closes column writer that was created using `next_column` method.
-    /// This should be called before requesting the next column writer.
-    fn close_column(&mut self, column_writer: ColumnWriter) -> Result<()>;
-
-    /// Closes this row group writer and returns row group metadata.
-    /// After calling this method row group writer must not be used.
-    ///
-    /// It is recommended to call this method before requesting another row group, but it
-    /// will be closed automatically before returning a new row group.
-    ///
-    /// Can be called multiple times. In subsequent calls will result in no-op and return
-    /// already created row group metadata.
-    fn close(&mut self) -> Result<RowGroupMetaDataPtr>;
-}
-
-// ----------------------------------------------------------------------
-// Serialized impl for file & row group writers
-
-pub trait ParquetWriter: Write + Seek + TryClone {}
-impl<T: Write + Seek + TryClone> ParquetWriter for T {}
-
-/// A serialized implementation for Parquet [`FileWriter`].
-/// See documentation on file writer for more information.
-pub struct SerializedFileWriter<W: ParquetWriter> {
-    buf: W,
-    schema: TypePtr,
-    descr: SchemaDescPtr,
-    props: WriterPropertiesPtr,
-    total_num_rows: i64,
-    row_groups: Vec<RowGroupMetaDataPtr>,
-    previous_writer_closed: bool,
-    is_closed: bool,
-}
-
-impl<W: ParquetWriter> SerializedFileWriter<W> {
-    /// Creates new file writer.
-    pub fn new(
-        mut buf: W,
-        schema: TypePtr,
-        properties: WriterPropertiesPtr,
-    ) -> Result<Self> {
-        Self::start_file(&mut buf)?;
-        Ok(Self {
-            buf,
-            schema: schema.clone(),
-            descr: Arc::new(SchemaDescriptor::new(schema)),
-            props: properties,
-            total_num_rows: 0,
-            row_groups: Vec::new(),
-            previous_writer_closed: true,
-            is_closed: false,
-        })
-    }
-
-    /// Writes magic bytes at the beginning of the file.
-    fn start_file(buf: &mut W) -> Result<()> {
-        buf.write_all(&PARQUET_MAGIC)?;
-        Ok(())
-    }
-
-    /// Finalises active row group writer, otherwise no-op.
-    fn finalise_row_group_writer(
-        &mut self,
-        mut row_group_writer: Box<dyn RowGroupWriter>,
-    ) -> Result<()> {
-        let row_group_metadata = row_group_writer.close()?;
-        self.total_num_rows += row_group_metadata.num_rows();
-        self.row_groups.push(row_group_metadata);
-        Ok(())
-    }
-
-    /// Assembles and writes metadata at the end of the file.
-    fn write_metadata(&mut self) -> Result<parquet::FileMetaData> {
-        let file_metadata = parquet::FileMetaData {
-            version: self.props.writer_version().as_num(),
-            schema: types::to_thrift(self.schema.as_ref())?,
-            num_rows: self.total_num_rows as i64,
-            row_groups: self
-                .row_groups
-                .as_slice()
-                .iter()
-                .map(|v| v.to_thrift())
-                .collect(),
-            key_value_metadata: self.props.key_value_metadata().to_owned(),
-            created_by: Some(self.props.created_by().to_owned()),
-            column_orders: None,
-        };
-
-        // Write file metadata
-        let start_pos = self.buf.seek(SeekFrom::Current(0))?;
-        {
-            let mut protocol = TCompactOutputProtocol::new(&mut self.buf);
-            file_metadata.write_to_out_protocol(&mut protocol)?;
-            protocol.flush()?;
-        }
-        let end_pos = self.buf.seek(SeekFrom::Current(0))?;
-
-        // Write footer
-        let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE];
-        let metadata_len = (end_pos - start_pos) as i32;
-        LittleEndian::write_i32(&mut footer_buffer, metadata_len);
-        (&mut footer_buffer[4..]).write_all(&PARQUET_MAGIC)?;
-        self.buf.write_all(&footer_buffer)?;
-        Ok(file_metadata)
-    }
-
-    #[inline]
-    fn assert_closed(&self) -> Result<()> {
-        if self.is_closed {
-            Err(general_err!("File writer is closed"))
-        } else {
-            Ok(())
-        }
-    }
-
-    #[inline]
-    fn assert_previous_writer_closed(&self) -> Result<()> {
-        if !self.previous_writer_closed {
-            Err(general_err!("Previous row group writer was not closed"))
-        } else {
-            Ok(())
-        }
-    }
-}
-
-impl<W: 'static + ParquetWriter> FileWriter for SerializedFileWriter<W> {
-    #[inline]
-    fn next_row_group(&mut self) -> Result<Box<dyn RowGroupWriter>> {
-        self.assert_closed()?;
-        self.assert_previous_writer_closed()?;
-        let row_group_writer = SerializedRowGroupWriter::new(
-            self.descr.clone(),
-            self.props.clone(),
-            &self.buf,
-        );
-        self.previous_writer_closed = false;
-        Ok(Box::new(row_group_writer))
-    }
-
-    #[inline]
-    fn close_row_group(
-        &mut self,
-        row_group_writer: Box<dyn RowGroupWriter>,
-    ) -> Result<()> {
-        self.assert_closed()?;
-        let res = self.finalise_row_group_writer(row_group_writer);
-        self.previous_writer_closed = res.is_ok();
-        res
-    }
-
-    #[inline]
-    fn close(&mut self) -> Result<parquet::FileMetaData> {
-        self.assert_closed()?;
-        self.assert_previous_writer_closed()?;
-        let metadata = self.write_metadata()?;
-        self.is_closed = true;
-        Ok(metadata)
-    }
-}
-
-/// A serialized implementation for Parquet [`RowGroupWriter`].
-/// Coordinates writing of a row group with column writers.
-/// See documentation on row group writer for more information.
-pub struct SerializedRowGroupWriter<W: ParquetWriter> {
-    descr: SchemaDescPtr,
-    props: WriterPropertiesPtr,
-    buf: W,
-    total_rows_written: Option<u64>,
-    total_bytes_written: u64,
-    column_index: usize,
-    previous_writer_closed: bool,
-    row_group_metadata: Option<RowGroupMetaDataPtr>,
-    column_chunks: Vec<ColumnChunkMetaData>,
-}
-
-impl<W: 'static + ParquetWriter> SerializedRowGroupWriter<W> {
-    pub fn new(
-        schema_descr: SchemaDescPtr,
-        properties: WriterPropertiesPtr,
-        buf: &W,
-    ) -> Self {
-        let num_columns = schema_descr.num_columns();
-        Self {
-            descr: schema_descr,
-            props: properties,
-            buf: buf.try_clone().unwrap(),
-            total_rows_written: None,
-            total_bytes_written: 0,
-            column_index: 0,
-            previous_writer_closed: true,
-            row_group_metadata: None,
-            column_chunks: Vec::with_capacity(num_columns),
-        }
-    }
-
-    /// Checks and finalises current column writer.
-    fn finalise_column_writer(&mut self, writer: ColumnWriter) -> Result<()> {
-        let (bytes_written, rows_written, metadata) = match writer {
-            ColumnWriter::BoolColumnWriter(typed) => typed.close()?,
-            ColumnWriter::Int32ColumnWriter(typed) => typed.close()?,
-            ColumnWriter::Int64ColumnWriter(typed) => typed.close()?,
-            ColumnWriter::Int96ColumnWriter(typed) => typed.close()?,
-            ColumnWriter::FloatColumnWriter(typed) => typed.close()?,
-            ColumnWriter::DoubleColumnWriter(typed) => typed.close()?,
-            ColumnWriter::ByteArrayColumnWriter(typed) => typed.close()?,
-            ColumnWriter::FixedLenByteArrayColumnWriter(typed) => typed.close()?,
-        };
-
-        // Update row group writer metrics
-        self.total_bytes_written += bytes_written;
-        self.column_chunks.push(metadata);
-        if let Some(rows) = self.total_rows_written {
-            if rows != rows_written {
-                return Err(general_err!(
-                    "Incorrect number of rows, expected {} != {} rows",
-                    rows,
-                    rows_written
-                ));
-            }
-        } else {
-            self.total_rows_written = Some(rows_written);
-        }
-
-        Ok(())
-    }
-
-    #[inline]
-    fn assert_closed(&self) -> Result<()> {
-        if self.row_group_metadata.is_some() {
-            Err(general_err!("Row group writer is closed"))
-        } else {
-            Ok(())
-        }
-    }
-
-    #[inline]
-    fn assert_previous_writer_closed(&self) -> Result<()> {
-        if !self.previous_writer_closed {
-            Err(general_err!("Previous column writer was not closed"))
-        } else {
-            Ok(())
-        }
-    }
-}
-
-impl<W: 'static + ParquetWriter> RowGroupWriter for SerializedRowGroupWriter<W> {
-    #[inline]
-    fn next_column(&mut self) -> Result<Option<ColumnWriter>> {
-        self.assert_closed()?;
-        self.assert_previous_writer_closed()?;
-
-        if self.column_index >= self.descr.num_columns() {
-            return Ok(None);
-        }
-        let sink = FileSink::new(&self.buf);
-        let page_writer = Box::new(SerializedPageWriter::new(sink));
-        let column_writer = get_column_writer(
-            self.descr.column(self.column_index),
-            self.props.clone(),
-            page_writer,
-        );
-        self.column_index += 1;
-        self.previous_writer_closed = false;
-
-        Ok(Some(column_writer))
-    }
-
-    #[inline]
-    fn close_column(&mut self, column_writer: ColumnWriter) -> Result<()> {
-        let res = self.finalise_column_writer(column_writer);
-        self.previous_writer_closed = res.is_ok();
-        res
-    }
-
-    #[inline]
-    fn close(&mut self) -> Result<RowGroupMetaDataPtr> {
-        if self.row_group_metadata.is_none() {
-            self.assert_previous_writer_closed()?;
-
-            let column_chunks = std::mem::take(&mut self.column_chunks);
-            let row_group_metadata = RowGroupMetaData::builder(self.descr.clone())
-                .set_column_metadata(column_chunks)
-                .set_total_byte_size(self.total_bytes_written as i64)
-                .set_num_rows(self.total_rows_written.unwrap_or(0) as i64)
-                .build()?;
-
-            self.row_group_metadata = Some(Arc::new(row_group_metadata));
-        }
-
-        let metadata = self.row_group_metadata.as_ref().unwrap().clone();
-        Ok(metadata)
-    }
-}
-
-/// A serialized implementation for Parquet [`PageWriter`].
-/// Writes and serializes pages and metadata into output stream.
-///
-/// `SerializedPageWriter` should not be used after calling `close()`.
-pub struct SerializedPageWriter<T: Write + Position> {
-    sink: T,
-}
-
-impl<T: Write + Position> SerializedPageWriter<T> {
-    /// Creates new page writer.
-    pub fn new(sink: T) -> Self {
-        Self { sink }
-    }
-
-    /// Serializes page header into Thrift.
-    /// Returns number of bytes that have been written into the sink.
-    #[inline]
-    fn serialize_page_header(&mut self, header: parquet::PageHeader) -> Result<usize> {
-        let start_pos = self.sink.pos();
-        {
-            let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
-            header.write_to_out_protocol(&mut protocol)?;
-            protocol.flush()?;
-        }
-        Ok((self.sink.pos() - start_pos) as usize)
-    }
-
-    /// Serializes column chunk into Thrift.
-    /// Returns Ok() if there are not errors serializing and writing data into the sink.
-    #[inline]
-    fn serialize_column_chunk(&mut self, chunk: parquet::ColumnChunk) -> Result<()> {
-        let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
-        chunk.write_to_out_protocol(&mut protocol)?;
-        protocol.flush()?;
-        Ok(())
-    }
-}
-
-impl<T: Write + Position> PageWriter for SerializedPageWriter<T> {
-    fn write_page(&mut self, page: CompressedPage) -> Result<PageWriteSpec> {
-        let uncompressed_size = page.uncompressed_size();
-        let compressed_size = page.compressed_size();
-        let num_values = page.num_values();
-        let encoding = page.encoding();
-        let page_type = page.page_type();
-
-        let mut page_header = parquet::PageHeader {
-            type_: page_type.into(),
-            uncompressed_page_size: uncompressed_size as i32,
-            compressed_page_size: compressed_size as i32,
-            // TODO: Add support for crc checksum
-            crc: None,
-            data_page_header: None,
-            index_page_header: None,
-            dictionary_page_header: None,
-            data_page_header_v2: None,
-        };
-
-        match *page.compressed_page() {
-            Page::DataPage {
-                def_level_encoding,
-                rep_level_encoding,
-                ref statistics,
-                ..
-            } => {
-                let data_page_header = parquet::DataPageHeader {
-                    num_values: num_values as i32,
-                    encoding: encoding.into(),
-                    definition_level_encoding: def_level_encoding.into(),
-                    repetition_level_encoding: rep_level_encoding.into(),
-                    statistics: statistics_to_thrift(statistics.as_ref()),
-                };
-                page_header.data_page_header = Some(data_page_header);
-            }
-            Page::DataPageV2 {
-                num_nulls,
-                num_rows,
-                def_levels_byte_len,
-                rep_levels_byte_len,
-                is_compressed,
-                ref statistics,
-                ..
-            } => {
-                let data_page_header_v2 = parquet::DataPageHeaderV2 {
-                    num_values: num_values as i32,
-                    num_nulls: num_nulls as i32,
-                    num_rows: num_rows as i32,
-                    encoding: encoding.into(),
-                    definition_levels_byte_length: def_levels_byte_len as i32,
-                    repetition_levels_byte_length: rep_levels_byte_len as i32,
-                    is_compressed: Some(is_compressed),
-                    statistics: statistics_to_thrift(statistics.as_ref()),
-                };
-                page_header.data_page_header_v2 = Some(data_page_header_v2);
-            }
-            Page::DictionaryPage { is_sorted, .. } => {
-                let dictionary_page_header = parquet::DictionaryPageHeader {
-                    num_values: num_values as i32,
-                    encoding: encoding.into(),
-                    is_sorted: Some(is_sorted),
-                };
-                page_header.dictionary_page_header = Some(dictionary_page_header);
-            }
-        }
-
-        let start_pos = self.sink.pos();
-
-        let header_size = self.serialize_page_header(page_header)?;
-        self.sink.write_all(page.data())?;
-
-        let mut spec = PageWriteSpec::new();
-        spec.page_type = page_type;
-        spec.uncompressed_size = uncompressed_size + header_size;
-        spec.compressed_size = compressed_size + header_size;
-        spec.offset = start_pos;
-        spec.bytes_written = self.sink.pos() - start_pos;
-        // Number of values is incremented for data pages only
-        if page_type == PageType::DATA_PAGE || page_type == PageType::DATA_PAGE_V2 {
-            spec.num_values = num_values;
-        }
-
-        Ok(spec)
-    }
-
-    fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()> {
-        self.serialize_column_chunk(metadata.to_thrift())
-    }
-
-    fn close(&mut self) -> Result<()> {
-        self.sink.flush()?;
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::{fs::File, io::Cursor};
-
-    use crate::basic::{Compression, Encoding, IntType, LogicalType, Repetition, Type};
-    use crate::column::page::PageReader;
-    use crate::compression::{create_codec, Codec};
-    use crate::file::{
-        properties::{WriterProperties, WriterVersion},
-        reader::{FileReader, SerializedFileReader, SerializedPageReader},
-        statistics::{from_thrift, to_thrift, Statistics},
-    };
-    use crate::record::RowAccessor;
-    use crate::util::{memory::ByteBufferPtr, test_common::get_temp_file};
-
-    #[test]
-    fn test_file_writer_error_after_close() {
-        let file = get_temp_file("test_file_writer_error_after_close", &[]);
-        let schema = Arc::new(types::Type::group_type_builder("schema").build().unwrap());
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        writer.close().unwrap();
-        {
-            let res = writer.next_row_group();
-            assert!(res.is_err());
-            if let Err(err) = res {
-                assert_eq!(format!("{}", err), "Parquet error: File writer is closed");
-            }
-        }
-        {
-            let res = writer.close();
-            assert!(res.is_err());
-            if let Err(err) = res {
-                assert_eq!(format!("{}", err), "Parquet error: File writer is closed");
-            }
-        }
-    }
-
-    #[test]
-    fn test_row_group_writer_error_after_close() {
-        let file = get_temp_file("test_file_writer_row_group_error_after_close", &[]);
-        let schema = Arc::new(types::Type::group_type_builder("schema").build().unwrap());
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        let mut row_group_writer = writer.next_row_group().unwrap();
-        row_group_writer.close().unwrap();
-
-        let res = row_group_writer.next_column();
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Row group writer is closed"
-            );
-        }
-    }
-
-    #[test]
-    fn test_row_group_writer_error_not_all_columns_written() {
-        let file =
-            get_temp_file("test_row_group_writer_error_not_all_columns_written", &[]);
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        let mut row_group_writer = writer.next_row_group().unwrap();
-        let res = row_group_writer.close();
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Column length mismatch: 1 != 0"
-            );
-        }
-    }
-
-    #[test]
-    fn test_row_group_writer_num_records_mismatch() {
-        let file = get_temp_file("test_row_group_writer_num_records_mismatch", &[]);
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![
-                    Arc::new(
-                        types::Type::primitive_type_builder("col1", Type::INT32)
-                            .with_repetition(Repetition::REQUIRED)
-                            .build()
-                            .unwrap(),
-                    ),
-                    Arc::new(
-                        types::Type::primitive_type_builder("col2", Type::INT32)
-                            .with_repetition(Repetition::REQUIRED)
-                            .build()
-                            .unwrap(),
-                    ),
-                ])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-        let mut row_group_writer = writer.next_row_group().unwrap();
-
-        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
-        if let ColumnWriter::Int32ColumnWriter(ref mut typed) = col_writer {
-            typed.write_batch(&[1, 2, 3], None, None).unwrap();
-        }
-        row_group_writer.close_column(col_writer).unwrap();
-
-        let mut col_writer = row_group_writer.next_column().unwrap().unwrap();
-        if let ColumnWriter::Int32ColumnWriter(ref mut typed) = col_writer {
-            typed.write_batch(&[1, 2], None, None).unwrap();
-        }
-
-        let res = row_group_writer.close_column(col_writer);
-        assert!(res.is_err());
-        if let Err(err) = res {
-            assert_eq!(
-                format!("{}", err),
-                "Parquet error: Incorrect number of rows, expected 3 != 2 rows"
-            );
-        }
-    }
-
-    #[test]
-    fn test_file_writer_empty_file() {
-        let file = get_temp_file("test_file_writer_write_empty_file", &[]);
-
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-        assert_eq!(reader.get_row_iter(None).unwrap().count(), 0);
-    }
-
-    #[test]
-    fn test_file_writer_with_metadata() {
-        let file = get_temp_file("test_file_writer_write_with_metadata", &[]);
-
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_key_value_metadata(Some(vec![KeyValue::new(
-                    "key".to_string(),
-                    "value".to_string(),
-                )]))
-                .build(),
-        );
-        let mut writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-        assert_eq!(
-            reader
-                .metadata()
-                .file_metadata()
-                .key_value_metadata()
-                .to_owned()
-                .unwrap()
-                .len(),
-            1
-        );
-    }
-
-    #[test]
-    fn test_file_writer_v2_with_metadata() {
-        let file = get_temp_file("test_file_writer_v2_write_with_metadata", &[]);
-        let field_logical_type = Some(LogicalType::INTEGER(IntType {
-            bit_width: 8,
-            is_signed: false,
-        }));
-        let field = Arc::new(
-            types::Type::primitive_type_builder("col1", Type::INT32)
-                .with_logical_type(field_logical_type.clone())
-                .with_converted_type(field_logical_type.into())
-                .build()
-                .unwrap(),
-        );
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![field.clone()])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(
-            WriterProperties::builder()
-                .set_key_value_metadata(Some(vec![KeyValue::new(
-                    "key".to_string(),
-                    "value".to_string(),
-                )]))
-                .set_writer_version(WriterVersion::PARQUET_2_0)
-                .build(),
-        );
-        let mut writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
-        writer.close().unwrap();
-
-        let reader = SerializedFileReader::new(file).unwrap();
-
-        assert_eq!(
-            reader
-                .metadata()
-                .file_metadata()
-                .key_value_metadata()
-                .to_owned()
-                .unwrap()
-                .len(),
-            1
-        );
-
-        // ARROW-11803: Test that the converted and logical types have been populated
-        let fields = reader.metadata().file_metadata().schema().get_fields();
-        assert_eq!(fields.len(), 1);
-        let read_field = fields.get(0).unwrap();
-        assert_eq!(read_field, &field);
-    }
-
-    #[test]
-    fn test_file_writer_empty_row_groups() {
-        let file = get_temp_file("test_file_writer_write_empty_row_groups", &[]);
-        test_file_roundtrip(file, vec![]);
-    }
-
-    #[test]
-    fn test_file_writer_single_row_group() {
-        let file = get_temp_file("test_file_writer_write_single_row_group", &[]);
-        test_file_roundtrip(file, vec![vec![1, 2, 3, 4, 5]]);
-    }
-
-    #[test]
-    fn test_file_writer_multiple_row_groups() {
-        let file = get_temp_file("test_file_writer_write_multiple_row_groups", &[]);
-        test_file_roundtrip(
-            file,
-            vec![
-                vec![1, 2, 3, 4, 5],
-                vec![1, 2, 3],
-                vec![1],
-                vec![1, 2, 3, 4, 5, 6],
-            ],
-        );
-    }
-
-    #[test]
-    fn test_file_writer_multiple_large_row_groups() {
-        let file = get_temp_file("test_file_writer_multiple_large_row_groups", &[]);
-        test_file_roundtrip(
-            file,
-            vec![vec![123; 1024], vec![124; 1000], vec![125; 15], vec![]],
-        );
-    }
-
-    #[test]
-    fn test_page_writer_data_pages() {
-        let pages = vec![
-            Page::DataPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                def_level_encoding: Encoding::RLE,
-                rep_level_encoding: Encoding::RLE,
-                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
-            },
-            Page::DataPageV2 {
-                buf: ByteBufferPtr::new(vec![4; 128]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                num_nulls: 2,
-                num_rows: 12,
-                def_levels_byte_len: 24,
-                rep_levels_byte_len: 32,
-                is_compressed: false,
-                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
-            },
-        ];
-
-        test_page_roundtrip(&pages[..], Compression::SNAPPY, Type::INT32);
-        test_page_roundtrip(&pages[..], Compression::UNCOMPRESSED, Type::INT32);
-    }
-
-    #[test]
-    fn test_page_writer_dict_pages() {
-        let pages = vec![
-            Page::DictionaryPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5]),
-                num_values: 5,
-                encoding: Encoding::RLE_DICTIONARY,
-                is_sorted: false,
-            },
-            Page::DataPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                def_level_encoding: Encoding::RLE,
-                rep_level_encoding: Encoding::RLE,
-                statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
-            },
-            Page::DataPageV2 {
-                buf: ByteBufferPtr::new(vec![4; 128]),
-                num_values: 10,
-                encoding: Encoding::DELTA_BINARY_PACKED,
-                num_nulls: 2,
-                num_rows: 12,
-                def_levels_byte_len: 24,
-                rep_levels_byte_len: 32,
-                is_compressed: false,
-                statistics: None,
-            },
-        ];
-
-        test_page_roundtrip(&pages[..], Compression::SNAPPY, Type::INT32);
-        test_page_roundtrip(&pages[..], Compression::UNCOMPRESSED, Type::INT32);
-    }
-
-    /// Tests writing and reading pages.
-    /// Physical type is for statistics only, should match any defined statistics type in
-    /// pages.
-    fn test_page_roundtrip(pages: &[Page], codec: Compression, physical_type: Type) {
-        let mut compressed_pages = vec![];
-        let mut total_num_values = 0i64;
-        let mut compressor = create_codec(codec).unwrap();
-
-        for page in pages {
-            let uncompressed_len = page.buffer().len();
-
-            let compressed_page = match *page {
-                Page::DataPage {
-                    ref buf,
-                    num_values,
-                    encoding,
-                    def_level_encoding,
-                    rep_level_encoding,
-                    ref statistics,
-                } => {
-                    total_num_values += num_values as i64;
-                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
-
-                    Page::DataPage {
-                        buf: ByteBufferPtr::new(output_buf),
-                        num_values,
-                        encoding,
-                        def_level_encoding,
-                        rep_level_encoding,
-                        statistics: from_thrift(
-                            physical_type,
-                            to_thrift(statistics.as_ref()),
-                        ),
-                    }
-                }
-                Page::DataPageV2 {
-                    ref buf,
-                    num_values,
-                    encoding,
-                    num_nulls,
-                    num_rows,
-                    def_levels_byte_len,
-                    rep_levels_byte_len,
-                    ref statistics,
-                    ..
-                } => {
-                    total_num_values += num_values as i64;
-                    let offset = (def_levels_byte_len + rep_levels_byte_len) as usize;
-                    let cmp_buf =
-                        compress_helper(compressor.as_mut(), &buf.data()[offset..]);
-                    let mut output_buf = Vec::from(&buf.data()[..offset]);
-                    output_buf.extend_from_slice(&cmp_buf[..]);
-
-                    Page::DataPageV2 {
-                        buf: ByteBufferPtr::new(output_buf),
-                        num_values,
-                        encoding,
-                        num_nulls,
-                        num_rows,
-                        def_levels_byte_len,
-                        rep_levels_byte_len,
-                        is_compressed: compressor.is_some(),
-                        statistics: from_thrift(
-                            physical_type,
-                            to_thrift(statistics.as_ref()),
-                        ),
-                    }
-                }
-                Page::DictionaryPage {
-                    ref buf,
-                    num_values,
-                    encoding,
-                    is_sorted,
-                } => {
-                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
-
-                    Page::DictionaryPage {
-                        buf: ByteBufferPtr::new(output_buf),
-                        num_values,
-                        encoding,
-                        is_sorted,
-                    }
-                }
-            };
-
-            let compressed_page = CompressedPage::new(compressed_page, uncompressed_len);
-            compressed_pages.push(compressed_page);
-        }
-
-        let mut buffer: Vec<u8> = vec![];
-        let mut result_pages: Vec<Page> = vec![];
-        {
-            let cursor = Cursor::new(&mut buffer);
-            let mut page_writer = SerializedPageWriter::new(cursor);
-
-            for page in compressed_pages {
-                page_writer.write_page(page).unwrap();
-            }
-            page_writer.close().unwrap();
-        }
-        {
-            let mut page_reader = SerializedPageReader::new(
-                Cursor::new(&buffer),
-                total_num_values,
-                codec,
-                physical_type,
-            )
-            .unwrap();
-
-            while let Some(page) = page_reader.get_next_page().unwrap() {
-                result_pages.push(page);
-            }
-        }
-
-        assert_eq!(result_pages.len(), pages.len());
-        for i in 0..result_pages.len() {
-            assert_page(&result_pages[i], &pages[i]);
-        }
-    }
-
-    /// Helper function to compress a slice
-    fn compress_helper(compressor: Option<&mut Box<dyn Codec>>, data: &[u8]) -> Vec<u8> {
-        let mut output_buf = vec![];
-        if let Some(cmpr) = compressor {
-            cmpr.compress(data, &mut output_buf).unwrap();
-        } else {
-            output_buf.extend_from_slice(data);
-        }
-        output_buf
-    }
-
-    /// Check if pages match.
-    fn assert_page(left: &Page, right: &Page) {
-        assert_eq!(left.page_type(), right.page_type());
-        assert_eq!(left.buffer().data(), right.buffer().data());
-        assert_eq!(left.num_values(), right.num_values());
-        assert_eq!(left.encoding(), right.encoding());
-        assert_eq!(to_thrift(left.statistics()), to_thrift(right.statistics()));
-    }
-
-    /// File write-read roundtrip.
-    /// `data` consists of arrays of values for each row group.
-    fn test_file_roundtrip(file: File, data: Vec<Vec<i32>>) {
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .with_repetition(Repetition::REQUIRED)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut file_writer = assert_send(
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap(),
-        );
-        let mut rows: i64 = 0;
-
-        for subset in &data {
-            let mut row_group_writer = file_writer.next_row_group().unwrap();
-            let col_writer = row_group_writer.next_column().unwrap();
-            if let Some(mut writer) = col_writer {
-                match writer {
-                    ColumnWriter::Int32ColumnWriter(ref mut typed) => {
-                        rows +=
-                            typed.write_batch(&subset[..], None, None).unwrap() as i64;
-                    }
-                    _ => {
-                        unimplemented!();
-                    }
-                }
-                row_group_writer.close_column(writer).unwrap();
-            }
-            file_writer.close_row_group(row_group_writer).unwrap();
-        }
-
-        file_writer.close().unwrap();
-
-        let reader = assert_send(SerializedFileReader::new(file).unwrap());
-        assert_eq!(reader.num_row_groups(), data.len());
-        assert_eq!(
-            reader.metadata().file_metadata().num_rows(),
-            rows,
-            "row count in metadata not equal to number of rows written"
-        );
-        for i in 0..reader.num_row_groups() {
-            let row_group_reader = reader.get_row_group(i).unwrap();
-            let iter = row_group_reader.get_row_iter(None).unwrap();
-            let res = iter
-                .map(|elem| elem.get_int(0).unwrap())
-                .collect::<Vec<i32>>();
-            assert_eq!(res, data[i]);
-        }
-    }
-
-    fn assert_send<T: Send>(t: T) -> T {
-        t
-    }
-
-    #[test]
-    fn test_bytes_writer_empty_row_groups() {
-        test_bytes_roundtrip(vec![]);
-    }
-
-    #[test]
-    fn test_bytes_writer_single_row_group() {
-        test_bytes_roundtrip(vec![vec![1, 2, 3, 4, 5]]);
-    }
-
-    #[test]
-    fn test_bytes_writer_multiple_row_groups() {
-        test_bytes_roundtrip(vec![
-            vec![1, 2, 3, 4, 5],
-            vec![1, 2, 3],
-            vec![1],
-            vec![1, 2, 3, 4, 5, 6],
-        ]);
-    }
-
-    fn test_bytes_roundtrip(data: Vec<Vec<i32>>) {
-        let cursor = InMemoryWriteableCursor::default();
-
-        let schema = Arc::new(
-            types::Type::group_type_builder("schema")
-                .with_fields(&mut vec![Arc::new(
-                    types::Type::primitive_type_builder("col1", Type::INT32)
-                        .with_repetition(Repetition::REQUIRED)
-                        .build()
-                        .unwrap(),
-                )])
-                .build()
-                .unwrap(),
-        );
-
-        let mut rows: i64 = 0;
-        {
-            let props = Arc::new(WriterProperties::builder().build());
-            let mut writer =
-                SerializedFileWriter::new(cursor.clone(), schema, props).unwrap();
-
-            for subset in &data {
-                let mut row_group_writer = writer.next_row_group().unwrap();
-                let col_writer = row_group_writer.next_column().unwrap();
-                if let Some(mut writer) = col_writer {
-                    match writer {
-                        ColumnWriter::Int32ColumnWriter(ref mut typed) => {
-                            rows += typed.write_batch(&subset[..], None, None).unwrap()
-                                as i64;
-                        }
-                        _ => {
-                            unimplemented!();
-                        }
-                    }
-                    row_group_writer.close_column(writer).unwrap();
-                }
-                writer.close_row_group(row_group_writer).unwrap();
-            }
-
-            writer.close().unwrap();
-        }
-
-        let buffer = cursor.into_inner().unwrap();
-
-        let reading_cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
-        let reader = SerializedFileReader::new(reading_cursor).unwrap();
-
-        assert_eq!(reader.num_row_groups(), data.len());
-        assert_eq!(
-            reader.metadata().file_metadata().num_rows(),
-            rows,
-            "row count in metadata not equal to number of rows written"
-        );
-        for i in 0..reader.num_row_groups() {
-            let row_group_reader = reader.get_row_group(i).unwrap();
-            let iter = row_group_reader.get_row_iter(None).unwrap();
-            let res = iter
-                .map(|elem| elem.get_int(0).unwrap())
-                .collect::<Vec<i32>>();
-            assert_eq!(res, data[i]);
-        }
-    }
-}
diff --git a/rust/parquet/src/lib.rs b/rust/parquet/src/lib.rs
deleted file mode 100644
index a931b95622d..00000000000
--- a/rust/parquet/src/lib.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(incomplete_features)]
-#![allow(dead_code)]
-#![allow(non_camel_case_types)]
-#![allow(
-    clippy::approx_constant,
-    clippy::cast_ptr_alignment,
-    clippy::float_cmp,
-    clippy::float_equality_without_abs,
-    clippy::from_over_into,
-    clippy::many_single_char_names,
-    clippy::needless_range_loop,
-    clippy::new_without_default,
-    clippy::or_fun_call,
-    clippy::same_item_push,
-    clippy::too_many_arguments,
-    clippy::transmute_ptr_to_ptr,
-    clippy::upper_case_acronyms,
-    clippy::vec_init_then_push
-)]
-
-#[macro_use]
-pub mod errors;
-pub mod basic;
-#[macro_use]
-pub mod data_type;
-
-// Exported for external use, such as benchmarks
-pub use self::encodings::{decoding, encoding};
-pub use self::util::memory;
-
-#[macro_use]
-mod util;
-#[cfg(any(feature = "arrow", test))]
-pub mod arrow;
-pub mod column;
-pub mod compression;
-mod encodings;
-pub mod file;
-pub mod record;
-pub mod schema;
diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs
deleted file mode 100644
index 411016e7ce8..00000000000
--- a/rust/parquet/src/record/api.rs
+++ /dev/null
@@ -1,1846 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains Row enum that is used to represent record in Rust.
-
-use std::fmt;
-
-use chrono::{TimeZone, Utc};
-use num_bigint::{BigInt, Sign};
-
-use crate::basic::{ConvertedType, Type as PhysicalType};
-use crate::data_type::{ByteArray, Decimal, Int96};
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::ColumnDescPtr;
-
-#[cfg(feature = "cli")]
-use serde_json::Value;
-
-/// Macro as a shortcut to generate 'not yet implemented' panic error.
-macro_rules! nyi {
-    ($column_descr:ident, $value:ident) => {{
-        unimplemented!(
-            "Conversion for physical type {}, converted type {}, value {:?}",
-            $column_descr.physical_type(),
-            $column_descr.converted_type(),
-            $value
-        );
-    }};
-}
-
-/// `Row` represents a nested Parquet record.
-#[derive(Clone, Debug, PartialEq)]
-pub struct Row {
-    fields: Vec<(String, Field)>,
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl Row {
-    /// Get the number of fields in this row.
-    pub fn len(&self) -> usize {
-        self.fields.len()
-    }
-
-    /// Get an iterator to go through all columns in the row.
-    ///
-    /// # Example
-    ///
-    /// ```no_run
-    /// use std::fs::File;
-    /// use parquet::record::Row;
-    /// use parquet::file::reader::{FileReader, SerializedFileReader};
-    ///
-    /// let file = File::open("/path/to/file").unwrap();
-    /// let reader = SerializedFileReader::new(file).unwrap();
-    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap();
-    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
-    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
-    /// }
-    /// ```
-    pub fn get_column_iter(&self) -> RowColumnIter {
-        RowColumnIter {
-            fields: &self.fields,
-            curr: 0,
-            count: self.fields.len(),
-        }
-    }
-
-    #[cfg(feature = "cli")]
-    pub fn to_json_value(&self) -> Value {
-        Value::Object(
-            self.fields
-                .iter()
-                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
-                .collect(),
-        )
-    }
-}
-
-pub struct RowColumnIter<'a> {
-    fields: &'a Vec<(String, Field)>,
-    curr: usize,
-    count: usize,
-}
-
-impl<'a> Iterator for RowColumnIter<'a> {
-    type Item = (&'a String, &'a Field);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let idx = self.curr;
-        if idx >= self.count {
-            return None;
-        }
-        self.curr += 1;
-        Some((&self.fields[idx].0, &self.fields[idx].1))
-    }
-}
-
-/// Trait for type-safe convenient access to fields within a Row.
-pub trait RowAccessor {
-    fn get_bool(&self, i: usize) -> Result<bool>;
-    fn get_byte(&self, i: usize) -> Result<i8>;
-    fn get_short(&self, i: usize) -> Result<i16>;
-    fn get_int(&self, i: usize) -> Result<i32>;
-    fn get_long(&self, i: usize) -> Result<i64>;
-    fn get_ubyte(&self, i: usize) -> Result<u8>;
-    fn get_ushort(&self, i: usize) -> Result<u16>;
-    fn get_uint(&self, i: usize) -> Result<u32>;
-    fn get_ulong(&self, i: usize) -> Result<u64>;
-    fn get_float(&self, i: usize) -> Result<f32>;
-    fn get_double(&self, i: usize) -> Result<f64>;
-    fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
-    fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
-    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
-    fn get_string(&self, i: usize) -> Result<&String>;
-    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
-    fn get_group(&self, i: usize) -> Result<&Row>;
-    fn get_list(&self, i: usize) -> Result<&List>;
-    fn get_map(&self, i: usize) -> Result<&Map>;
-}
-
-/// Trait for formating fields within a Row.
-pub trait RowFormatter {
-    fn fmt(&self, i: usize) -> &dyn fmt::Display;
-}
-
-/// Macro to generate type-safe get_xxx methods for primitive types,
-/// e.g. `get_bool`, `get_short`.
-macro_rules! row_primitive_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<$TY> {
-            match self.fields[i].1 {
-                Field::$VARIANT(v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.fields[i].1.get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-/// Macro to generate type-safe get_xxx methods for reference types,
-/// e.g. `get_list`, `get_map`.
-macro_rules! row_complex_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<&$TY> {
-            match self.fields[i].1 {
-                Field::$VARIANT(ref v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.fields[i].1.get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-impl RowFormatter for Row {
-    /// Get Display reference for a given field.
-    fn fmt(&self, i: usize) -> &dyn fmt::Display {
-        &self.fields[i].1
-    }
-}
-
-impl RowAccessor for Row {
-    row_primitive_accessor!(get_bool, Bool, bool);
-
-    row_primitive_accessor!(get_byte, Byte, i8);
-
-    row_primitive_accessor!(get_short, Short, i16);
-
-    row_primitive_accessor!(get_int, Int, i32);
-
-    row_primitive_accessor!(get_long, Long, i64);
-
-    row_primitive_accessor!(get_ubyte, UByte, u8);
-
-    row_primitive_accessor!(get_ushort, UShort, u16);
-
-    row_primitive_accessor!(get_uint, UInt, u32);
-
-    row_primitive_accessor!(get_ulong, ULong, u64);
-
-    row_primitive_accessor!(get_float, Float, f32);
-
-    row_primitive_accessor!(get_double, Double, f64);
-
-    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
-
-    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
-
-    row_complex_accessor!(get_decimal, Decimal, Decimal);
-
-    row_complex_accessor!(get_string, Str, String);
-
-    row_complex_accessor!(get_bytes, Bytes, ByteArray);
-
-    row_complex_accessor!(get_group, Group, Row);
-
-    row_complex_accessor!(get_list, ListInternal, List);
-
-    row_complex_accessor!(get_map, MapInternal, Map);
-}
-
-/// Constructs a `Row` from the list of `fields` and returns it.
-#[inline]
-pub fn make_row(fields: Vec<(String, Field)>) -> Row {
-    Row { fields }
-}
-
-impl fmt::Display for Row {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{{")?;
-        for (i, &(ref key, ref value)) in self.fields.iter().enumerate() {
-            key.fmt(f)?;
-            write!(f, ": ")?;
-            value.fmt(f)?;
-            if i < self.fields.len() - 1 {
-                write!(f, ", ")?;
-            }
-        }
-        write!(f, "}}")
-    }
-}
-
-/// `List` represents a list which contains an array of elements.
-#[derive(Clone, Debug, PartialEq)]
-pub struct List {
-    elements: Vec<Field>,
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl List {
-    /// Get the number of fields in this row
-    pub fn len(&self) -> usize {
-        self.elements.len()
-    }
-
-    pub fn elements(&self) -> &[Field] {
-        self.elements.as_slice()
-    }
-}
-
-/// Constructs a `List` from the list of `fields` and returns it.
-#[inline]
-pub fn make_list(elements: Vec<Field>) -> List {
-    List { elements }
-}
-
-/// Trait for type-safe access of an index for a `List`.
-/// Note that the get_XXX methods do not do bound checking.
-pub trait ListAccessor {
-    fn get_bool(&self, i: usize) -> Result<bool>;
-    fn get_byte(&self, i: usize) -> Result<i8>;
-    fn get_short(&self, i: usize) -> Result<i16>;
-    fn get_int(&self, i: usize) -> Result<i32>;
-    fn get_long(&self, i: usize) -> Result<i64>;
-    fn get_ubyte(&self, i: usize) -> Result<u8>;
-    fn get_ushort(&self, i: usize) -> Result<u16>;
-    fn get_uint(&self, i: usize) -> Result<u32>;
-    fn get_ulong(&self, i: usize) -> Result<u64>;
-    fn get_float(&self, i: usize) -> Result<f32>;
-    fn get_double(&self, i: usize) -> Result<f64>;
-    fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
-    fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
-    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
-    fn get_string(&self, i: usize) -> Result<&String>;
-    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
-    fn get_group(&self, i: usize) -> Result<&Row>;
-    fn get_list(&self, i: usize) -> Result<&List>;
-    fn get_map(&self, i: usize) -> Result<&Map>;
-}
-
-/// Macro to generate type-safe get_xxx methods for primitive types,
-/// e.g. get_bool, get_short
-macro_rules! list_primitive_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.elements[i].get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-/// Macro to generate type-safe get_xxx methods for reference types
-/// e.g. get_list, get_map
-macro_rules! list_complex_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<&$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(ref v) => Ok(v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.elements[i].get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-impl ListAccessor for List {
-    list_primitive_accessor!(get_bool, Bool, bool);
-
-    list_primitive_accessor!(get_byte, Byte, i8);
-
-    list_primitive_accessor!(get_short, Short, i16);
-
-    list_primitive_accessor!(get_int, Int, i32);
-
-    list_primitive_accessor!(get_long, Long, i64);
-
-    list_primitive_accessor!(get_ubyte, UByte, u8);
-
-    list_primitive_accessor!(get_ushort, UShort, u16);
-
-    list_primitive_accessor!(get_uint, UInt, u32);
-
-    list_primitive_accessor!(get_ulong, ULong, u64);
-
-    list_primitive_accessor!(get_float, Float, f32);
-
-    list_primitive_accessor!(get_double, Double, f64);
-
-    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
-
-    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
-
-    list_complex_accessor!(get_decimal, Decimal, Decimal);
-
-    list_complex_accessor!(get_string, Str, String);
-
-    list_complex_accessor!(get_bytes, Bytes, ByteArray);
-
-    list_complex_accessor!(get_group, Group, Row);
-
-    list_complex_accessor!(get_list, ListInternal, List);
-
-    list_complex_accessor!(get_map, MapInternal, Map);
-}
-
-/// `Map` represents a map which contains a list of key->value pairs.
-#[derive(Clone, Debug, PartialEq)]
-pub struct Map {
-    entries: Vec<(Field, Field)>,
-}
-
-#[allow(clippy::len_without_is_empty)]
-impl Map {
-    /// Get the number of fields in this row
-    pub fn len(&self) -> usize {
-        self.entries.len()
-    }
-
-    pub fn entries(&self) -> &[(Field, Field)] {
-        self.entries.as_slice()
-    }
-}
-
-/// Constructs a `Map` from the list of `entries` and returns it.
-#[inline]
-pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
-    Map { entries }
-}
-
-/// Trait for type-safe access of an index for a `Map`
-pub trait MapAccessor {
-    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
-    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
-}
-
-struct MapList<'a> {
-    elements: Vec<&'a Field>,
-}
-
-/// Macro to generate type-safe get_xxx methods for primitive types,
-/// e.g. get_bool, get_short
-macro_rules! map_list_primitive_accessor {
-    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-        fn $METHOD(&self, i: usize) -> Result<$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(v) => Ok(*v),
-                _ => Err(general_err!(
-                    "Cannot access {} as {}",
-                    self.elements[i].get_type_name(),
-                    stringify!($VARIANT)
-                )),
-            }
-        }
-    };
-}
-
-impl<'a> ListAccessor for MapList<'a> {
-    map_list_primitive_accessor!(get_bool, Bool, bool);
-
-    map_list_primitive_accessor!(get_byte, Byte, i8);
-
-    map_list_primitive_accessor!(get_short, Short, i16);
-
-    map_list_primitive_accessor!(get_int, Int, i32);
-
-    map_list_primitive_accessor!(get_long, Long, i64);
-
-    map_list_primitive_accessor!(get_ubyte, UByte, u8);
-
-    map_list_primitive_accessor!(get_ushort, UShort, u16);
-
-    map_list_primitive_accessor!(get_uint, UInt, u32);
-
-    map_list_primitive_accessor!(get_ulong, ULong, u64);
-
-    map_list_primitive_accessor!(get_float, Float, f32);
-
-    map_list_primitive_accessor!(get_double, Double, f64);
-
-    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
-
-    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
-
-    list_complex_accessor!(get_decimal, Decimal, Decimal);
-
-    list_complex_accessor!(get_string, Str, String);
-
-    list_complex_accessor!(get_bytes, Bytes, ByteArray);
-
-    list_complex_accessor!(get_group, Group, Row);
-
-    list_complex_accessor!(get_list, ListInternal, List);
-
-    list_complex_accessor!(get_map, MapInternal, Map);
-}
-
-impl MapAccessor for Map {
-    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
-        let map_list = MapList {
-            elements: self.entries.iter().map(|v| &v.0).collect(),
-        };
-        Box::new(map_list)
-    }
-
-    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
-        let map_list = MapList {
-            elements: self.entries.iter().map(|v| &v.1).collect(),
-        };
-        Box::new(map_list)
-    }
-}
-
-/// API to represent a single field in a `Row`.
-#[derive(Clone, Debug, PartialEq)]
-pub enum Field {
-    // Primitive types
-    /// Null value.
-    Null,
-    /// Boolean value (`true`, `false`).
-    Bool(bool),
-    /// Signed integer INT_8.
-    Byte(i8),
-    /// Signed integer INT_16.
-    Short(i16),
-    /// Signed integer INT_32.
-    Int(i32),
-    /// Signed integer INT_64.
-    Long(i64),
-    // Unsigned integer UINT_8.
-    UByte(u8),
-    // Unsigned integer UINT_16.
-    UShort(u16),
-    // Unsigned integer UINT_32.
-    UInt(u32),
-    // Unsigned integer UINT_64.
-    ULong(u64),
-    /// IEEE 32-bit floating point value.
-    Float(f32),
-    /// IEEE 64-bit floating point value.
-    Double(f64),
-    /// Decimal value.
-    Decimal(Decimal),
-    /// UTF-8 encoded character string.
-    Str(String),
-    /// General binary value.
-    Bytes(ByteArray),
-    /// Date without a time of day, stores the number of days from the
-    /// Unix epoch, 1 January 1970.
-    Date(u32),
-    /// Milliseconds from the Unix epoch, 1 January 1970.
-    TimestampMillis(u64),
-    /// Microseconds from the Unix epoch, 1 Janiary 1970.
-    TimestampMicros(u64),
-
-    // ----------------------------------------------------------------------
-    // Complex types
-    /// Struct, child elements are tuples of field-value pairs.
-    Group(Row),
-    /// List of elements.
-    ListInternal(List),
-    /// List of key-value pairs.
-    MapInternal(Map),
-}
-
-impl Field {
-    /// Get the type name.
-    fn get_type_name(&self) -> &'static str {
-        match *self {
-            Field::Null => "Null",
-            Field::Bool(_) => "Bool",
-            Field::Byte(_) => "Byte",
-            Field::Short(_) => "Short",
-            Field::Int(_) => "Int",
-            Field::Long(_) => "Long",
-            Field::UByte(_) => "UByte",
-            Field::UShort(_) => "UShort",
-            Field::UInt(_) => "UInt",
-            Field::ULong(_) => "ULong",
-            Field::Float(_) => "Float",
-            Field::Double(_) => "Double",
-            Field::Decimal(_) => "Decimal",
-            Field::Date(_) => "Date",
-            Field::Str(_) => "Str",
-            Field::Bytes(_) => "Bytes",
-            Field::TimestampMillis(_) => "TimestampMillis",
-            Field::TimestampMicros(_) => "TimestampMicros",
-            Field::Group(_) => "Group",
-            Field::ListInternal(_) => "ListInternal",
-            Field::MapInternal(_) => "MapInternal",
-        }
-    }
-
-    /// Determines if this Row represents a primitive value.
-    pub fn is_primitive(&self) -> bool {
-        !matches!(
-            *self,
-            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
-        )
-    }
-
-    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
-    #[inline]
-    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
-        Field::Bool(value)
-    }
-
-    /// Converts Parquet INT32 type with converted type into `i32` value.
-    #[inline]
-    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
-        match descr.converted_type() {
-            ConvertedType::INT_8 => Field::Byte(value as i8),
-            ConvertedType::INT_16 => Field::Short(value as i16),
-            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
-            ConvertedType::UINT_8 => Field::UByte(value as u8),
-            ConvertedType::UINT_16 => Field::UShort(value as u16),
-            ConvertedType::UINT_32 => Field::UInt(value as u32),
-            ConvertedType::DATE => Field::Date(value as u32),
-            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
-                value,
-                descr.type_precision(),
-                descr.type_scale(),
-            )),
-            _ => nyi!(descr, value),
-        }
-    }
-
-    /// Converts Parquet INT64 type with converted type into `i64` value.
-    #[inline]
-    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
-        match descr.converted_type() {
-            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
-            ConvertedType::UINT_64 => Field::ULong(value as u64),
-            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value as u64),
-            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value as u64),
-            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
-                value,
-                descr.type_precision(),
-                descr.type_scale(),
-            )),
-            _ => nyi!(descr, value),
-        }
-    }
-
-    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
-    /// `Timestamp` value.
-    #[inline]
-    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
-        Field::TimestampMillis(value.to_i64() as u64)
-    }
-
-    /// Converts Parquet FLOAT type with logical type into `f32` value.
-    #[inline]
-    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
-        Field::Float(value)
-    }
-
-    /// Converts Parquet DOUBLE type with converted type into `f64` value.
-    #[inline]
-    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
-        Field::Double(value)
-    }
-
-    /// Converts Parquet BYTE_ARRAY type with converted type into either UTF8 string or
-    /// array of bytes.
-    #[inline]
-    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self {
-        match descr.physical_type() {
-            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
-                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
-                    let value = String::from_utf8(value.data().to_vec()).unwrap();
-                    Field::Str(value)
-                }
-                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
-                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
-                    value,
-                    descr.type_precision(),
-                    descr.type_scale(),
-                )),
-                _ => nyi!(descr, value),
-            },
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
-                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
-                    value,
-                    descr.type_precision(),
-                    descr.type_scale(),
-                )),
-                ConvertedType::NONE => Field::Bytes(value),
-                _ => nyi!(descr, value),
-            },
-            _ => nyi!(descr, value),
-        }
-    }
-
-    #[cfg(feature = "cli")]
-    pub fn to_json_value(&self) -> Value {
-        match &self {
-            Field::Null => Value::Null,
-            Field::Bool(b) => Value::Bool(*b),
-            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
-            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
-                .map(Value::Number)
-                .unwrap_or(Value::Null),
-            Field::Double(n) => serde_json::Number::from_f64(*n)
-                .map(Value::Number)
-                .unwrap_or(Value::Null),
-            Field::Decimal(n) => Value::String(convert_decimal_to_string(&n)),
-            Field::Str(s) => Value::String(s.to_owned()),
-            Field::Bytes(b) => Value::String(base64::encode(b.data())),
-            Field::Date(d) => Value::String(convert_date_to_string(*d)),
-            Field::TimestampMillis(ts) => {
-                Value::String(convert_timestamp_millis_to_string(*ts))
-            }
-            Field::TimestampMicros(ts) => {
-                Value::String(convert_timestamp_micros_to_string(*ts))
-            }
-            Field::Group(row) => row.to_json_value(),
-            Field::ListInternal(fields) => {
-                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
-            }
-            Field::MapInternal(map) => Value::Object(
-                map.entries
-                    .iter()
-                    .map(|(key_field, value_field)| {
-                        let key_val = key_field.to_json_value();
-                        let key_str = key_val
-                            .as_str()
-                            .map(|s| s.to_owned())
-                            .unwrap_or_else(|| key_val.to_string());
-                        (key_str, value_field.to_json_value())
-                    })
-                    .collect(),
-            ),
-        }
-    }
-}
-
-impl fmt::Display for Field {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Field::Null => write!(f, "null"),
-            Field::Bool(value) => write!(f, "{}", value),
-            Field::Byte(value) => write!(f, "{}", value),
-            Field::Short(value) => write!(f, "{}", value),
-            Field::Int(value) => write!(f, "{}", value),
-            Field::Long(value) => write!(f, "{}", value),
-            Field::UByte(value) => write!(f, "{}", value),
-            Field::UShort(value) => write!(f, "{}", value),
-            Field::UInt(value) => write!(f, "{}", value),
-            Field::ULong(value) => write!(f, "{}", value),
-            Field::Float(value) => {
-                if !(1e-15..=1e19).contains(&value) {
-                    write!(f, "{:E}", value)
-                } else {
-                    write!(f, "{:?}", value)
-                }
-            }
-            Field::Double(value) => {
-                if !(1e-15..=1e19).contains(&value) {
-                    write!(f, "{:E}", value)
-                } else {
-                    write!(f, "{:?}", value)
-                }
-            }
-            Field::Decimal(ref value) => {
-                write!(f, "{}", convert_decimal_to_string(value))
-            }
-            Field::Str(ref value) => write!(f, "\"{}\"", value),
-            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
-            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
-            Field::TimestampMillis(value) => {
-                write!(f, "{}", convert_timestamp_millis_to_string(value))
-            }
-            Field::TimestampMicros(value) => {
-                write!(f, "{}", convert_timestamp_micros_to_string(value))
-            }
-            Field::Group(ref fields) => write!(f, "{}", fields),
-            Field::ListInternal(ref list) => {
-                let elems = &list.elements;
-                write!(f, "[")?;
-                for (i, field) in elems.iter().enumerate() {
-                    field.fmt(f)?;
-                    if i < elems.len() - 1 {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, "]")
-            }
-            Field::MapInternal(ref map) => {
-                let entries = &map.entries;
-                write!(f, "{{")?;
-                for (i, &(ref key, ref value)) in entries.iter().enumerate() {
-                    key.fmt(f)?;
-                    write!(f, " -> ")?;
-                    value.fmt(f)?;
-                    if i < entries.len() - 1 {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, "}}")
-            }
-        }
-    }
-}
-
-/// Helper method to convert Parquet date into a string.
-/// Input `value` is a number of days since the epoch in UTC.
-/// Date is displayed in local timezone.
-#[inline]
-fn convert_date_to_string(value: u32) -> String {
-    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
-    let dt = Utc.timestamp(value as i64 * NUM_SECONDS_IN_DAY, 0).date();
-    format!("{}", dt.format("%Y-%m-%d %:z"))
-}
-
-/// Helper method to convert Parquet timestamp into a string.
-/// Input `value` is a number of milliseconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
-#[inline]
-fn convert_timestamp_millis_to_string(value: u64) -> String {
-    let dt = Utc.timestamp((value / 1000) as i64, 0);
-    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
-}
-
-/// Helper method to convert Parquet timestamp into a string.
-/// Input `value` is a number of microseconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
-#[inline]
-fn convert_timestamp_micros_to_string(value: u64) -> String {
-    convert_timestamp_millis_to_string(value / 1000)
-}
-
-/// Helper method to convert Parquet decimal into a string.
-/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
-/// when constructing Parquet schema.
-#[inline]
-fn convert_decimal_to_string(decimal: &Decimal) -> String {
-    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
-
-    // Specify as signed bytes to resolve sign as part of conversion.
-    let num = BigInt::from_signed_bytes_be(decimal.data());
-
-    // Offset of the first digit in a string.
-    let negative = if num.sign() == Sign::Minus { 1 } else { 0 };
-    let mut num_str = num.to_string();
-    let mut point = num_str.len() as i32 - decimal.scale() - negative;
-
-    // Convert to string form without scientific notation.
-    if point <= 0 {
-        // Zeros need to be prepended to the unscaled value.
-        while point < 0 {
-            num_str.insert(negative as usize, '0');
-            point += 1;
-        }
-        num_str.insert_str(negative as usize, "0.");
-    } else {
-        // No zeroes need to be prepended to the unscaled value, simply insert decimal
-        // point.
-        num_str.insert((point + negative) as usize, '.');
-    }
-
-    num_str
-}
-
-#[cfg(test)]
-#[allow(clippy::approx_constant, clippy::many_single_char_names)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
-
-    /// Creates test column descriptor based on provided type parameters.
-    macro_rules! make_column_descr {
-        ($physical_type:expr, $logical_type:expr) => {{
-            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
-                .with_converted_type($logical_type)
-                .build()
-                .unwrap();
-            Arc::new(ColumnDescriptor::new(
-                Arc::new(tpe),
-                0,
-                0,
-                ColumnPath::from("col"),
-            ))
-        }};
-        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
-            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
-                .with_converted_type($logical_type)
-                .with_length($len)
-                .with_precision($prec)
-                .with_scale($scale)
-                .build()
-                .unwrap();
-            Arc::new(ColumnDescriptor::new(
-                Arc::new(tpe),
-                0,
-                0,
-                ColumnPath::from("col"),
-            ))
-        }};
-    }
-
-    #[test]
-    fn test_row_convert_bool() {
-        // BOOLEAN value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
-
-        let row = Field::convert_bool(&descr, true);
-        assert_eq!(row, Field::Bool(true));
-
-        let row = Field::convert_bool(&descr, false);
-        assert_eq!(row, Field::Bool(false));
-    }
-
-    #[test]
-    fn test_row_convert_int32() {
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
-        let row = Field::convert_int32(&descr, 111);
-        assert_eq!(row, Field::Byte(111));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
-        let row = Field::convert_int32(&descr, 222);
-        assert_eq!(row, Field::Short(222));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
-        let row = Field::convert_int32(&descr, 333);
-        assert_eq!(row, Field::Int(333));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
-        let row = Field::convert_int32(&descr, -1);
-        assert_eq!(row, Field::UByte(255));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
-        let row = Field::convert_int32(&descr, 256);
-        assert_eq!(row, Field::UShort(256));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
-        let row = Field::convert_int32(&descr, 1234);
-        assert_eq!(row, Field::UInt(1234));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
-        let row = Field::convert_int32(&descr, 444);
-        assert_eq!(row, Field::Int(444));
-
-        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
-        let row = Field::convert_int32(&descr, 14611);
-        assert_eq!(row, Field::Date(14611));
-
-        let descr =
-            make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
-        let row = Field::convert_int32(&descr, 444);
-        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
-    }
-
-    #[test]
-    fn test_row_convert_int64() {
-        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
-        let row = Field::convert_int64(&descr, 1111);
-        assert_eq!(row, Field::Long(1111));
-
-        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
-        let row = Field::convert_int64(&descr, 78239823);
-        assert_eq!(row, Field::ULong(78239823));
-
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
-        let row = Field::convert_int64(&descr, 1541186529153);
-        assert_eq!(row, Field::TimestampMillis(1541186529153));
-
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
-        let row = Field::convert_int64(&descr, 1541186529153123);
-        assert_eq!(row, Field::TimestampMicros(1541186529153123));
-
-        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
-        let row = Field::convert_int64(&descr, 2222);
-        assert_eq!(row, Field::Long(2222));
-
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
-        let row = Field::convert_int64(&descr, 3333);
-        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
-    }
-
-    #[test]
-    fn test_row_convert_int96() {
-        // INT96 value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
-
-        let value = Int96::from(vec![0, 0, 2454923]);
-        let row = Field::convert_int96(&descr, value);
-        assert_eq!(row, Field::TimestampMillis(1238544000000));
-
-        let value = Int96::from(vec![4165425152, 13, 2454923]);
-        let row = Field::convert_int96(&descr, value);
-        assert_eq!(row, Field::TimestampMillis(1238544060000));
-    }
-
-    #[test]
-    fn test_row_convert_float() {
-        // FLOAT value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
-        let row = Field::convert_float(&descr, 2.31);
-        assert_eq!(row, Field::Float(2.31));
-    }
-
-    #[test]
-    fn test_row_convert_double() {
-        // DOUBLE value does not depend on logical type
-        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
-        let row = Field::convert_double(&descr, 1.56);
-        assert_eq!(row, Field::Double(1.56));
-    }
-
-    #[test]
-    fn test_row_convert_byte_array() {
-        // UTF8
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
-        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
-        let row = Field::convert_byte_array(&descr, value);
-        assert_eq!(row, Field::Str("ABCD".to_string()));
-
-        // ENUM
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
-        let value = ByteArray::from(vec![b'1', b'2', b'3']);
-        let row = Field::convert_byte_array(&descr, value);
-        assert_eq!(row, Field::Str("123".to_string()));
-
-        // JSON
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
-        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
-        let row = Field::convert_byte_array(&descr, value);
-        assert_eq!(row, Field::Str("{\"a\":1}".to_string()));
-
-        // NONE
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
-        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Bytes(value));
-
-        // BSON
-        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
-        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Bytes(value));
-
-        // DECIMAL
-        let descr =
-            make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
-        let value = ByteArray::from(vec![207, 200]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 8, 2)));
-
-        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
-        let descr = make_column_descr![
-            PhysicalType::FIXED_LEN_BYTE_ARRAY,
-            ConvertedType::DECIMAL,
-            8,
-            17,
-            5
-        ];
-        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 17, 5)));
-
-        // NONE (FIXED_LEN_BYTE_ARRAY)
-        let descr = make_column_descr![
-            PhysicalType::FIXED_LEN_BYTE_ARRAY,
-            ConvertedType::NONE,
-            6,
-            0,
-            0
-        ];
-        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
-        let row = Field::convert_byte_array(&descr, value.clone());
-        assert_eq!(row, Field::Bytes(value));
-    }
-
-    #[test]
-    fn test_convert_date_to_string() {
-        fn check_date_conversion(y: u32, m: u32, d: u32) {
-            let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(0, 0, 0);
-            let dt = Utc.from_utc_datetime(&datetime);
-            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as u32);
-            let exp = format!("{}", dt.format("%Y-%m-%d %:z"));
-            assert_eq!(res, exp);
-        }
-
-        check_date_conversion(2010, 1, 2);
-        check_date_conversion(2014, 5, 1);
-        check_date_conversion(2016, 2, 29);
-        check_date_conversion(2017, 9, 12);
-        check_date_conversion(2018, 3, 31);
-    }
-
-    #[test]
-    fn test_convert_timestamp_to_string() {
-        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
-            let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(h, mi, s);
-            let dt = Utc.from_utc_datetime(&datetime);
-            let res = convert_timestamp_millis_to_string(dt.timestamp_millis() as u64);
-            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
-            assert_eq!(res, exp);
-        }
-
-        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
-        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
-        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
-        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
-        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
-    }
-
-    #[test]
-    fn test_convert_float_to_string() {
-        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
-        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
-        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
-        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
-        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
-        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
-        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
-        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
-    }
-
-    #[test]
-    fn test_convert_double_to_string() {
-        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
-        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
-        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
-        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
-        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
-        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
-        assert_eq!(
-            format!("{}", Field::Double(1.79769313486E308)),
-            "1.79769313486E308"
-        );
-        assert_eq!(
-            format!("{}", Field::Double(-1.79769313486E308)),
-            "-1.79769313486E308"
-        );
-    }
-
-    #[test]
-    fn test_convert_decimal_to_string() {
-        // Helper method to compare decimal
-        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
-            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
-            assert_eq!(convert_decimal_to_string(&decimal), res);
-        }
-
-        // This example previously used to fail in some engines
-        check_decimal(
-            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
-            38,
-            18,
-            "1.000000000000000000",
-        );
-        check_decimal(
-            vec![
-                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
-            ],
-            36,
-            28,
-            "-12344.0242342304923409234234293432",
-        );
-        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
-        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
-        check_decimal(vec![207, 200], 10, 2, "-123.44");
-        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
-    }
-
-    #[test]
-    fn test_row_display() {
-        // Primitive types
-        assert_eq!(format!("{}", Field::Null), "null");
-        assert_eq!(format!("{}", Field::Bool(true)), "true");
-        assert_eq!(format!("{}", Field::Bool(false)), "false");
-        assert_eq!(format!("{}", Field::Byte(1)), "1");
-        assert_eq!(format!("{}", Field::Short(2)), "2");
-        assert_eq!(format!("{}", Field::Int(3)), "3");
-        assert_eq!(format!("{}", Field::Long(4)), "4");
-        assert_eq!(format!("{}", Field::UByte(1)), "1");
-        assert_eq!(format!("{}", Field::UShort(2)), "2");
-        assert_eq!(format!("{}", Field::UInt(3)), "3");
-        assert_eq!(format!("{}", Field::ULong(4)), "4");
-        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
-        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
-        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
-        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
-        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
-        assert_eq!(
-            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
-            "[1, 2, 3]"
-        );
-        assert_eq!(
-            format!("{}", Field::Date(14611)),
-            convert_date_to_string(14611)
-        );
-        assert_eq!(
-            format!("{}", Field::TimestampMillis(1262391174000)),
-            convert_timestamp_millis_to_string(1262391174000)
-        );
-        assert_eq!(
-            format!("{}", Field::TimestampMicros(1262391174000000)),
-            convert_timestamp_micros_to_string(1262391174000000)
-        );
-        assert_eq!(
-            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
-            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
-        );
-
-        // Complex types
-        let fields = vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-            ("z".to_string(), Field::Float(3.1)),
-            ("a".to_string(), Field::Str("abc".to_string())),
-        ];
-        let row = Field::Group(make_row(fields));
-        assert_eq!(format!("{}", row), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
-
-        let row = Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12),
-        ]));
-        assert_eq!(format!("{}", row), "[2, 1, null, 12]");
-
-        let row = Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3)),
-        ]));
-        assert_eq!(format!("{}", row), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
-    }
-
-    #[test]
-    fn test_is_primitive() {
-        // primitives
-        assert!(Field::Null.is_primitive());
-        assert!(Field::Bool(true).is_primitive());
-        assert!(Field::Bool(false).is_primitive());
-        assert!(Field::Byte(1).is_primitive());
-        assert!(Field::Short(2).is_primitive());
-        assert!(Field::Int(3).is_primitive());
-        assert!(Field::Long(4).is_primitive());
-        assert!(Field::UByte(1).is_primitive());
-        assert!(Field::UShort(2).is_primitive());
-        assert!(Field::UInt(3).is_primitive());
-        assert!(Field::ULong(4).is_primitive());
-        assert!(Field::Float(5.0).is_primitive());
-        assert!(Field::Float(5.1234).is_primitive());
-        assert!(Field::Double(6.0).is_primitive());
-        assert!(Field::Double(6.1234).is_primitive());
-        assert!(Field::Str("abc".to_string()).is_primitive());
-        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
-        assert!(Field::TimestampMillis(12345678).is_primitive());
-        assert!(Field::TimestampMicros(12345678901).is_primitive());
-        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
-
-        // complex types
-        assert_eq!(
-            false,
-            Field::Group(make_row(vec![
-                ("x".to_string(), Field::Null),
-                ("Y".to_string(), Field::Int(2)),
-                ("z".to_string(), Field::Float(3.1)),
-                ("a".to_string(), Field::Str("abc".to_string()))
-            ]))
-            .is_primitive()
-        );
-
-        assert_eq!(
-            false,
-            Field::ListInternal(make_list(vec![
-                Field::Int(2),
-                Field::Int(1),
-                Field::Null,
-                Field::Int(12)
-            ]))
-            .is_primitive()
-        );
-
-        assert_eq!(
-            false,
-            Field::MapInternal(make_map(vec![
-                (Field::Int(1), Field::Float(1.2)),
-                (Field::Int(2), Field::Float(4.5)),
-                (Field::Int(3), Field::Float(2.3))
-            ]))
-            .is_primitive()
-        );
-    }
-
-    #[test]
-    fn test_row_primitive_field_fmt() {
-        // Primitives types
-        let row = make_row(vec![
-            ("00".to_string(), Field::Null),
-            ("01".to_string(), Field::Bool(false)),
-            ("02".to_string(), Field::Byte(3)),
-            ("03".to_string(), Field::Short(4)),
-            ("04".to_string(), Field::Int(5)),
-            ("05".to_string(), Field::Long(6)),
-            ("06".to_string(), Field::UByte(7)),
-            ("07".to_string(), Field::UShort(8)),
-            ("08".to_string(), Field::UInt(9)),
-            ("09".to_string(), Field::ULong(10)),
-            ("10".to_string(), Field::Float(11.1)),
-            ("11".to_string(), Field::Double(12.1)),
-            ("12".to_string(), Field::Str("abc".to_string())),
-            (
-                "13".to_string(),
-                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
-            ),
-            ("14".to_string(), Field::Date(14611)),
-            ("15".to_string(), Field::TimestampMillis(1262391174000)),
-            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
-            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
-        ]);
-
-        assert_eq!("null", format!("{}", row.fmt(0)));
-        assert_eq!("false", format!("{}", row.fmt(1)));
-        assert_eq!("3", format!("{}", row.fmt(2)));
-        assert_eq!("4", format!("{}", row.fmt(3)));
-        assert_eq!("5", format!("{}", row.fmt(4)));
-        assert_eq!("6", format!("{}", row.fmt(5)));
-        assert_eq!("7", format!("{}", row.fmt(6)));
-        assert_eq!("8", format!("{}", row.fmt(7)));
-        assert_eq!("9", format!("{}", row.fmt(8)));
-        assert_eq!("10", format!("{}", row.fmt(9)));
-        assert_eq!("11.1", format!("{}", row.fmt(10)));
-        assert_eq!("12.1", format!("{}", row.fmt(11)));
-        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
-        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
-        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
-        assert_eq!(
-            convert_timestamp_millis_to_string(1262391174000),
-            format!("{}", row.fmt(15))
-        );
-        assert_eq!(
-            convert_timestamp_micros_to_string(1262391174000000),
-            format!("{}", row.fmt(16))
-        );
-        assert_eq!("0.04", format!("{}", row.fmt(17)));
-    }
-
-    #[test]
-    fn test_row_complex_field_fmt() {
-        // Complex types
-        let row = make_row(vec![
-            (
-                "00".to_string(),
-                Field::Group(make_row(vec![
-                    ("x".to_string(), Field::Null),
-                    ("Y".to_string(), Field::Int(2)),
-                ])),
-            ),
-            (
-                "01".to_string(),
-                Field::ListInternal(make_list(vec![
-                    Field::Int(2),
-                    Field::Int(1),
-                    Field::Null,
-                    Field::Int(12),
-                ])),
-            ),
-            (
-                "02".to_string(),
-                Field::MapInternal(make_map(vec![
-                    (Field::Int(1), Field::Float(1.2)),
-                    (Field::Int(2), Field::Float(4.5)),
-                    (Field::Int(3), Field::Float(2.3)),
-                ])),
-            ),
-        ]);
-
-        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
-        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
-        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
-    }
-
-    #[test]
-    fn test_row_primitive_accessors() {
-        // primitives
-        let row = make_row(vec![
-            ("a".to_string(), Field::Null),
-            ("b".to_string(), Field::Bool(false)),
-            ("c".to_string(), Field::Byte(3)),
-            ("d".to_string(), Field::Short(4)),
-            ("e".to_string(), Field::Int(5)),
-            ("f".to_string(), Field::Long(6)),
-            ("g".to_string(), Field::UByte(3)),
-            ("h".to_string(), Field::UShort(4)),
-            ("i".to_string(), Field::UInt(5)),
-            ("j".to_string(), Field::ULong(6)),
-            ("k".to_string(), Field::Float(7.1)),
-            ("l".to_string(), Field::Double(8.1)),
-            ("m".to_string(), Field::Str("abc".to_string())),
-            (
-                "n".to_string(),
-                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
-            ),
-            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
-        ]);
-
-        assert_eq!(false, row.get_bool(1).unwrap());
-        assert_eq!(3, row.get_byte(2).unwrap());
-        assert_eq!(4, row.get_short(3).unwrap());
-        assert_eq!(5, row.get_int(4).unwrap());
-        assert_eq!(6, row.get_long(5).unwrap());
-        assert_eq!(3, row.get_ubyte(6).unwrap());
-        assert_eq!(4, row.get_ushort(7).unwrap());
-        assert_eq!(5, row.get_uint(8).unwrap());
-        assert_eq!(6, row.get_ulong(9).unwrap());
-        assert!(7.1 - row.get_float(10).unwrap() < f32::EPSILON);
-        assert!(8.1 - row.get_double(11).unwrap() < f64::EPSILON);
-        assert_eq!("abc", row.get_string(12).unwrap());
-        assert_eq!(5, row.get_bytes(13).unwrap().len());
-        assert_eq!(7, row.get_decimal(14).unwrap().precision());
-    }
-
-    #[test]
-    fn test_row_primitive_invalid_accessors() {
-        // primitives
-        let row = make_row(vec![
-            ("a".to_string(), Field::Null),
-            ("b".to_string(), Field::Bool(false)),
-            ("c".to_string(), Field::Byte(3)),
-            ("d".to_string(), Field::Short(4)),
-            ("e".to_string(), Field::Int(5)),
-            ("f".to_string(), Field::Long(6)),
-            ("g".to_string(), Field::UByte(3)),
-            ("h".to_string(), Field::UShort(4)),
-            ("i".to_string(), Field::UInt(5)),
-            ("j".to_string(), Field::ULong(6)),
-            ("k".to_string(), Field::Float(7.1)),
-            ("l".to_string(), Field::Double(8.1)),
-            ("m".to_string(), Field::Str("abc".to_string())),
-            (
-                "n".to_string(),
-                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
-            ),
-            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
-        ]);
-
-        for i in 0..row.len() {
-            assert!(row.get_group(i).is_err());
-        }
-    }
-
-    #[test]
-    fn test_row_complex_accessors() {
-        let row = make_row(vec![
-            (
-                "a".to_string(),
-                Field::Group(make_row(vec![
-                    ("x".to_string(), Field::Null),
-                    ("Y".to_string(), Field::Int(2)),
-                ])),
-            ),
-            (
-                "b".to_string(),
-                Field::ListInternal(make_list(vec![
-                    Field::Int(2),
-                    Field::Int(1),
-                    Field::Null,
-                    Field::Int(12),
-                ])),
-            ),
-            (
-                "c".to_string(),
-                Field::MapInternal(make_map(vec![
-                    (Field::Int(1), Field::Float(1.2)),
-                    (Field::Int(2), Field::Float(4.5)),
-                    (Field::Int(3), Field::Float(2.3)),
-                ])),
-            ),
-        ]);
-
-        assert_eq!(2, row.get_group(0).unwrap().len());
-        assert_eq!(4, row.get_list(1).unwrap().len());
-        assert_eq!(3, row.get_map(2).unwrap().len());
-    }
-
-    #[test]
-    fn test_row_complex_invalid_accessors() {
-        let row = make_row(vec![
-            (
-                "a".to_string(),
-                Field::Group(make_row(vec![
-                    ("x".to_string(), Field::Null),
-                    ("Y".to_string(), Field::Int(2)),
-                ])),
-            ),
-            (
-                "b".to_string(),
-                Field::ListInternal(make_list(vec![
-                    Field::Int(2),
-                    Field::Int(1),
-                    Field::Null,
-                    Field::Int(12),
-                ])),
-            ),
-            (
-                "c".to_string(),
-                Field::MapInternal(make_map(vec![
-                    (Field::Int(1), Field::Float(1.2)),
-                    (Field::Int(2), Field::Float(4.5)),
-                    (Field::Int(3), Field::Float(2.3)),
-                ])),
-            ),
-        ]);
-
-        assert_eq!(
-            ParquetError::General("Cannot access Group as Float".to_string()),
-            row.get_float(0).unwrap_err()
-        );
-        assert_eq!(
-            ParquetError::General("Cannot access ListInternal as Float".to_string()),
-            row.get_float(1).unwrap_err()
-        );
-        assert_eq!(
-            ParquetError::General("Cannot access MapInternal as Float".to_string()),
-            row.get_float(2).unwrap_err()
-        );
-    }
-
-    #[test]
-    fn test_list_primitive_accessors() {
-        // primitives
-        let list = make_list(vec![Field::Bool(false)]);
-        assert_eq!(false, list.get_bool(0).unwrap());
-
-        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
-        assert_eq!(4, list.get_byte(1).unwrap());
-
-        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
-        assert_eq!(6, list.get_short(2).unwrap());
-
-        let list = make_list(vec![Field::Int(5)]);
-        assert_eq!(5, list.get_int(0).unwrap());
-
-        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
-        assert_eq!(7, list.get_long(1).unwrap());
-
-        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
-        assert_eq!(4, list.get_ubyte(1).unwrap());
-
-        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
-        assert_eq!(6, list.get_ushort(2).unwrap());
-
-        let list = make_list(vec![Field::UInt(5)]);
-        assert_eq!(5, list.get_uint(0).unwrap());
-
-        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
-        assert_eq!(7, list.get_ulong(1).unwrap());
-
-        let list = make_list(vec![
-            Field::Float(8.1),
-            Field::Float(9.2),
-            Field::Float(10.3),
-        ]);
-        assert!(10.3 - list.get_float(2).unwrap() < f32::EPSILON);
-
-        let list = make_list(vec![Field::Double(3.1415)]);
-        assert!(3.1415 - list.get_double(0).unwrap() < f64::EPSILON);
-
-        let list = make_list(vec![Field::Str("abc".to_string())]);
-        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
-
-        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
-        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
-
-        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
-        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
-    }
-
-    #[test]
-    fn test_list_primitive_invalid_accessors() {
-        // primitives
-        let list = make_list(vec![Field::Bool(false)]);
-        assert!(list.get_byte(0).is_err());
-
-        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
-        assert!(list.get_short(1).is_err());
-
-        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
-        assert!(list.get_int(2).is_err());
-
-        let list = make_list(vec![Field::Int(5)]);
-        assert!(list.get_long(0).is_err());
-
-        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
-        assert!(list.get_float(1).is_err());
-
-        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
-        assert!(list.get_short(1).is_err());
-
-        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
-        assert!(list.get_int(2).is_err());
-
-        let list = make_list(vec![Field::UInt(5)]);
-        assert!(list.get_long(0).is_err());
-
-        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
-        assert!(list.get_float(1).is_err());
-
-        let list = make_list(vec![
-            Field::Float(8.1),
-            Field::Float(9.2),
-            Field::Float(10.3),
-        ]);
-        assert!(list.get_double(2).is_err());
-
-        let list = make_list(vec![Field::Double(3.1415)]);
-        assert!(list.get_string(0).is_err());
-
-        let list = make_list(vec![Field::Str("abc".to_string())]);
-        assert!(list.get_bytes(0).is_err());
-
-        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
-        assert!(list.get_bool(0).is_err());
-
-        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
-        assert!(list.get_bool(0).is_err());
-    }
-
-    #[test]
-    fn test_list_complex_accessors() {
-        let list = make_list(vec![Field::Group(make_row(vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-        ]))]);
-        assert_eq!(2, list.get_group(0).unwrap().len());
-
-        let list = make_list(vec![Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12),
-        ]))]);
-        assert_eq!(4, list.get_list(0).unwrap().len());
-
-        let list = make_list(vec![Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3)),
-        ]))]);
-        assert_eq!(3, list.get_map(0).unwrap().len());
-    }
-
-    #[test]
-    fn test_list_complex_invalid_accessors() {
-        let list = make_list(vec![Field::Group(make_row(vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-        ]))]);
-        assert_eq!(
-            general_err!("Cannot access Group as Float".to_string()),
-            list.get_float(0).unwrap_err()
-        );
-
-        let list = make_list(vec![Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12),
-        ]))]);
-        assert_eq!(
-            general_err!("Cannot access ListInternal as Float".to_string()),
-            list.get_float(0).unwrap_err()
-        );
-
-        let list = make_list(vec![Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3)),
-        ]))]);
-        assert_eq!(
-            general_err!("Cannot access MapInternal as Float".to_string()),
-            list.get_float(0).unwrap_err()
-        );
-    }
-
-    #[test]
-    fn test_map_accessors() {
-        // a map from int to string
-        let map = make_map(vec![
-            (Field::Int(1), Field::Str("a".to_string())),
-            (Field::Int(2), Field::Str("b".to_string())),
-            (Field::Int(3), Field::Str("c".to_string())),
-            (Field::Int(4), Field::Str("d".to_string())),
-            (Field::Int(5), Field::Str("e".to_string())),
-        ]);
-
-        assert_eq!(5, map.len());
-        for i in 0..5 {
-            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
-            assert_eq!(
-                &((i as u8 + b'a') as char).to_string(),
-                map.get_values().get_string(i).unwrap()
-            );
-        }
-    }
-
-    #[test]
-    #[cfg(feature = "cli")]
-    fn test_to_json_value() {
-        assert_eq!(Field::Null.to_json_value(), Value::Null);
-        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
-        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
-        assert_eq!(
-            Field::Byte(1).to_json_value(),
-            Value::Number(serde_json::Number::from(1))
-        );
-        assert_eq!(
-            Field::Short(2).to_json_value(),
-            Value::Number(serde_json::Number::from(2))
-        );
-        assert_eq!(
-            Field::Int(3).to_json_value(),
-            Value::Number(serde_json::Number::from(3))
-        );
-        assert_eq!(
-            Field::Long(4).to_json_value(),
-            Value::Number(serde_json::Number::from(4))
-        );
-        assert_eq!(
-            Field::UByte(1).to_json_value(),
-            Value::Number(serde_json::Number::from(1))
-        );
-        assert_eq!(
-            Field::UShort(2).to_json_value(),
-            Value::Number(serde_json::Number::from(2))
-        );
-        assert_eq!(
-            Field::UInt(3).to_json_value(),
-            Value::Number(serde_json::Number::from(3))
-        );
-        assert_eq!(
-            Field::ULong(4).to_json_value(),
-            Value::Number(serde_json::Number::from(4))
-        );
-        assert_eq!(
-            Field::Float(5.0).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(f64::from(5.0 as f32)).unwrap())
-        );
-        assert_eq!(
-            Field::Float(5.1234).to_json_value(),
-            Value::Number(
-                serde_json::Number::from_f64(f64::from(5.1234 as f32)).unwrap()
-            )
-        );
-        assert_eq!(
-            Field::Double(6.0).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(6.0 as f64).unwrap())
-        );
-        assert_eq!(
-            Field::Double(6.1234).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(6.1234 as f64).unwrap())
-        );
-        assert_eq!(
-            Field::Str("abc".to_string()).to_json_value(),
-            Value::String(String::from("abc"))
-        );
-        assert_eq!(
-            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
-            Value::String(String::from("0.04"))
-        );
-        assert_eq!(
-            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
-            Value::String(String::from("AQID"))
-        );
-        assert_eq!(
-            Field::TimestampMillis(12345678).to_json_value(),
-            Value::String("1970-01-01 03:25:45 +00:00".to_string())
-        );
-        assert_eq!(
-            Field::TimestampMicros(12345678901).to_json_value(),
-            Value::String(convert_timestamp_micros_to_string(12345678901))
-        );
-
-        let fields = vec![
-            ("X".to_string(), Field::Int(1)),
-            ("Y".to_string(), Field::Double(2.2)),
-            ("Z".to_string(), Field::Str("abc".to_string())),
-        ];
-        let row = Field::Group(make_row(fields));
-        assert_eq!(
-            row.to_json_value(),
-            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
-        );
-
-        let row = Field::ListInternal(make_list(vec![
-            Field::Int(1),
-            Field::Int(12),
-            Field::Null,
-        ]));
-        let array = vec![
-            Value::Number(serde_json::Number::from(1)),
-            Value::Number(serde_json::Number::from(12)),
-            Value::Null,
-        ];
-        assert_eq!(row.to_json_value(), Value::Array(array));
-
-        let row = Field::MapInternal(make_map(vec![
-            (Field::Str("k1".to_string()), Field::Double(1.2)),
-            (Field::Str("k2".to_string()), Field::Double(3.4)),
-            (Field::Str("k3".to_string()), Field::Double(4.5)),
-        ]));
-        assert_eq!(
-            row.to_json_value(),
-            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
-        );
-    }
-}
-
-#[cfg(test)]
-#[allow(clippy::approx_constant, clippy::many_single_char_names)]
-mod api_tests {
-    use super::{make_list, make_map, make_row};
-    use crate::record::Field;
-
-    #[test]
-    fn test_field_visibility() {
-        let row = make_row(vec![(
-            "a".to_string(),
-            Field::Group(make_row(vec![
-                ("x".to_string(), Field::Null),
-                ("Y".to_string(), Field::Int(2)),
-            ])),
-        )]);
-
-        match row.get_column_iter().next() {
-            Some(column) => {
-                assert_eq!("a", column.0);
-                match column.1 {
-                    Field::Group(r) => {
-                        assert_eq!(
-                            &make_row(vec![
-                                ("x".to_string(), Field::Null),
-                                ("Y".to_string(), Field::Int(2)),
-                            ]),
-                            r
-                        );
-                    }
-                    _ => panic!("Expected the first column to be Field::Group"),
-                }
-            }
-            None => panic!("Expected at least one column"),
-        }
-    }
-
-    #[test]
-    fn test_list_element_access() {
-        let expected = vec![
-            Field::Int(1),
-            Field::Group(make_row(vec![
-                ("x".to_string(), Field::Null),
-                ("Y".to_string(), Field::Int(2)),
-            ])),
-        ];
-
-        let list = make_list(expected.clone());
-        assert_eq!(expected.as_slice(), list.elements());
-    }
-
-    #[test]
-    fn test_map_entry_access() {
-        let expected = vec![
-            (Field::Str("one".to_owned()), Field::Int(1)),
-            (Field::Str("two".to_owned()), Field::Int(2)),
-        ];
-
-        let map = make_map(expected.clone());
-        assert_eq!(expected.as_slice(), map.entries());
-    }
-}
diff --git a/rust/parquet/src/record/mod.rs b/rust/parquet/src/record/mod.rs
deleted file mode 100644
index fb4abb539d9..00000000000
--- a/rust/parquet/src/record/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains record-based API for reading Parquet files.
-
-mod api;
-pub mod reader;
-mod record_writer;
-mod triplet;
-
-pub use self::{
-    api::{Field, List, ListAccessor, Map, MapAccessor, Row, RowAccessor},
-    record_writer::RecordWriter,
-};
diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs
deleted file mode 100644
index 691afe8c203..00000000000
--- a/rust/parquet/src/record/reader.rs
+++ /dev/null
@@ -1,1667 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains implementation of record assembly and converting Parquet types into
-//! [`Row`](crate::record::Row)s.
-
-use std::{collections::HashMap, fmt, sync::Arc};
-
-use crate::basic::{ConvertedType, Repetition};
-use crate::errors::{ParquetError, Result};
-use crate::file::reader::{FileReader, RowGroupReader};
-use crate::record::{
-    api::{make_list, make_map, make_row, Field, Row},
-    triplet::TripletIter,
-};
-use crate::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr};
-
-/// Default batch size for a reader
-const DEFAULT_BATCH_SIZE: usize = 1024;
-
-/// Tree builder for `Reader` enum.
-/// Serves as a container of options for building a reader tree and a builder, and
-/// accessing a records iterator [`RowIter`].
-pub struct TreeBuilder {
-    // Batch size (>= 1) for triplet iterators
-    batch_size: usize,
-}
-
-impl TreeBuilder {
-    /// Creates new tree builder with default parameters.
-    pub fn new() -> Self {
-        Self {
-            batch_size: DEFAULT_BATCH_SIZE,
-        }
-    }
-
-    /// Sets batch size for this tree builder.
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// Creates new root reader for provided schema and row group.
-    pub fn build(
-        &self,
-        descr: SchemaDescPtr,
-        row_group_reader: &dyn RowGroupReader,
-    ) -> Reader {
-        // Prepare lookup table of column path -> original column index
-        // This allows to prune columns and map schema leaf nodes to the column readers
-        let mut paths: HashMap<ColumnPath, usize> = HashMap::new();
-        let row_group_metadata = row_group_reader.metadata();
-
-        for col_index in 0..row_group_reader.num_columns() {
-            let col_meta = row_group_metadata.column(col_index);
-            let col_path = col_meta.column_path().clone();
-            paths.insert(col_path, col_index);
-        }
-
-        // Build child readers for the message type
-        let mut readers = Vec::new();
-        let mut path = Vec::new();
-
-        for field in descr.root_schema().get_fields() {
-            let reader = self.reader_tree(
-                field.clone(),
-                &mut path,
-                0,
-                0,
-                &paths,
-                row_group_reader,
-            );
-            readers.push(reader);
-        }
-
-        // Return group reader for message type,
-        // it is always required with definition level 0
-        Reader::GroupReader(None, 0, readers)
-    }
-
-    /// Creates iterator of `Row`s directly from schema descriptor and row group.
-    pub fn as_iter(
-        &self,
-        descr: SchemaDescPtr,
-        row_group_reader: &dyn RowGroupReader,
-    ) -> ReaderIter {
-        let num_records = row_group_reader.metadata().num_rows() as usize;
-        ReaderIter::new(self.build(descr, row_group_reader), num_records)
-    }
-
-    /// Builds tree of readers for the current schema recursively.
-    fn reader_tree(
-        &self,
-        field: TypePtr,
-        mut path: &mut Vec<String>,
-        mut curr_def_level: i16,
-        mut curr_rep_level: i16,
-        paths: &HashMap<ColumnPath, usize>,
-        row_group_reader: &dyn RowGroupReader,
-    ) -> Reader {
-        assert!(field.get_basic_info().has_repetition());
-        // Update current definition and repetition levels for this type
-        let repetition = field.get_basic_info().repetition();
-        match repetition {
-            Repetition::OPTIONAL => {
-                curr_def_level += 1;
-            }
-            Repetition::REPEATED => {
-                curr_def_level += 1;
-                curr_rep_level += 1;
-            }
-            _ => {}
-        }
-
-        path.push(String::from(field.name()));
-        let reader = if field.is_primitive() {
-            let col_path = ColumnPath::new(path.to_vec());
-            let orig_index = *paths.get(&col_path).unwrap();
-            let col_descr = row_group_reader
-                .metadata()
-                .column(orig_index)
-                .column_descr_ptr();
-            let col_reader = row_group_reader.get_column_reader(orig_index).unwrap();
-            let column = TripletIter::new(col_descr, col_reader, self.batch_size);
-            Reader::PrimitiveReader(field, column)
-        } else {
-            match field.get_basic_info().converted_type() {
-                // List types
-                ConvertedType::LIST => {
-                    assert_eq!(
-                        field.get_fields().len(),
-                        1,
-                        "Invalid list type {:?}",
-                        field
-                    );
-
-                    let repeated_field = field.get_fields()[0].clone();
-                    assert_eq!(
-                        repeated_field.get_basic_info().repetition(),
-                        Repetition::REPEATED,
-                        "Invalid list type {:?}",
-                        field
-                    );
-
-                    if Reader::is_element_type(&repeated_field) {
-                        // Support for backward compatible lists
-                        let reader = self.reader_tree(
-                            repeated_field,
-                            &mut path,
-                            curr_def_level,
-                            curr_rep_level,
-                            paths,
-                            row_group_reader,
-                        );
-
-                        Reader::RepeatedReader(
-                            field,
-                            curr_def_level,
-                            curr_rep_level,
-                            Box::new(reader),
-                        )
-                    } else {
-                        let child_field = repeated_field.get_fields()[0].clone();
-
-                        path.push(String::from(repeated_field.name()));
-
-                        let reader = self.reader_tree(
-                            child_field,
-                            &mut path,
-                            curr_def_level + 1,
-                            curr_rep_level + 1,
-                            paths,
-                            row_group_reader,
-                        );
-
-                        path.pop();
-
-                        Reader::RepeatedReader(
-                            field,
-                            curr_def_level,
-                            curr_rep_level,
-                            Box::new(reader),
-                        )
-                    }
-                }
-                // Map types (key-value pairs)
-                ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                    assert_eq!(
-                        field.get_fields().len(),
-                        1,
-                        "Invalid map type: {:?}",
-                        field
-                    );
-                    assert!(
-                        !field.get_fields()[0].is_primitive(),
-                        "Invalid map type: {:?}",
-                        field
-                    );
-
-                    let key_value_type = field.get_fields()[0].clone();
-                    assert_eq!(
-                        key_value_type.get_basic_info().repetition(),
-                        Repetition::REPEATED,
-                        "Invalid map type: {:?}",
-                        field
-                    );
-                    assert_eq!(
-                        key_value_type.get_fields().len(),
-                        2,
-                        "Invalid map type: {:?}",
-                        field
-                    );
-
-                    path.push(String::from(key_value_type.name()));
-
-                    let key_type = &key_value_type.get_fields()[0];
-                    assert!(
-                        key_type.is_primitive(),
-                        "Map key type is expected to be a primitive type, but found {:?}",
-                        key_type
-                    );
-                    let key_reader = self.reader_tree(
-                        key_type.clone(),
-                        &mut path,
-                        curr_def_level + 1,
-                        curr_rep_level + 1,
-                        paths,
-                        row_group_reader,
-                    );
-
-                    let value_type = &key_value_type.get_fields()[1];
-                    let value_reader = self.reader_tree(
-                        value_type.clone(),
-                        &mut path,
-                        curr_def_level + 1,
-                        curr_rep_level + 1,
-                        paths,
-                        row_group_reader,
-                    );
-
-                    path.pop();
-
-                    Reader::KeyValueReader(
-                        field,
-                        curr_def_level,
-                        curr_rep_level,
-                        Box::new(key_reader),
-                        Box::new(value_reader),
-                    )
-                }
-                // A repeated field that is neither contained by a `LIST`- or
-                // `MAP`-annotated group nor annotated by `LIST` or `MAP`
-                // should be interpreted as a required list of required
-                // elements where the element type is the type of the field.
-                _ if repetition == Repetition::REPEATED => {
-                    let required_field = Type::group_type_builder(field.name())
-                        .with_repetition(Repetition::REQUIRED)
-                        .with_converted_type(field.get_basic_info().converted_type())
-                        .with_fields(&mut Vec::from(field.get_fields()))
-                        .build()
-                        .unwrap();
-
-                    path.pop();
-
-                    let reader = self.reader_tree(
-                        Arc::new(required_field),
-                        &mut path,
-                        curr_def_level,
-                        curr_rep_level,
-                        paths,
-                        row_group_reader,
-                    );
-
-                    Reader::RepeatedReader(
-                        field,
-                        curr_def_level - 1,
-                        curr_rep_level - 1,
-                        Box::new(reader),
-                    )
-                }
-                // Group types (structs)
-                _ => {
-                    let mut readers = Vec::new();
-                    for child in field.get_fields() {
-                        let reader = self.reader_tree(
-                            child.clone(),
-                            &mut path,
-                            curr_def_level,
-                            curr_rep_level,
-                            paths,
-                            row_group_reader,
-                        );
-                        readers.push(reader);
-                    }
-                    Reader::GroupReader(Some(field), curr_def_level, readers)
-                }
-            }
-        };
-        path.pop();
-
-        Reader::option(repetition, curr_def_level, reader)
-    }
-}
-
-/// Reader tree for record assembly
-pub enum Reader {
-    // Primitive reader with type information and triplet iterator
-    PrimitiveReader(TypePtr, TripletIter),
-    // Optional reader with definition level of a parent and a reader
-    OptionReader(i16, Box<Reader>),
-    // Group (struct) reader with type information, definition level and list of child
-    // readers. When it represents message type, type information is None
-    GroupReader(Option<TypePtr>, i16, Vec<Reader>),
-    // Reader for repeated values, e.g. lists, contains type information, definition
-    // level, repetition level and a child reader
-    RepeatedReader(TypePtr, i16, i16, Box<Reader>),
-    // Reader of key-value pairs, e.g. maps, contains type information, definition
-    // level, repetition level, child reader for keys and child reader for values
-    KeyValueReader(TypePtr, i16, i16, Box<Reader>, Box<Reader>),
-}
-
-impl Reader {
-    /// Wraps reader in option reader based on repetition.
-    fn option(repetition: Repetition, def_level: i16, reader: Reader) -> Self {
-        if repetition == Repetition::OPTIONAL {
-            Reader::OptionReader(def_level - 1, Box::new(reader))
-        } else {
-            reader
-        }
-    }
-
-    /// Returns true if repeated type is an element type for the list.
-    /// Used to determine legacy list types.
-    /// This method is copied from Spark Parquet reader and is based on the reference:
-    /// <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md>
-    ///   #backward-compatibility-rules
-    fn is_element_type(repeated_type: &Type) -> bool {
-        // For legacy 2-level list types with primitive element type, e.g.:
-        //
-        //    // ARRAY<INT> (nullable list, non-null elements)
-        //    optional group my_list (LIST) {
-        //      repeated int32 element;
-        //    }
-        //
-        repeated_type.is_primitive() ||
-    // For legacy 2-level list types whose element type is a group type with 2 or more
-    // fields, e.g.:
-    //
-    //    // ARRAY<STRUCT<str: STRING, num: INT>> (nullable list, non-null elements)
-    //    optional group my_list (LIST) {
-    //      repeated group element {
-    //        required binary str (UTF8);
-    //        required int32 num;
-    //      };
-    //    }
-    //
-    repeated_type.is_group() && repeated_type.get_fields().len() > 1 ||
-    // For legacy 2-level list types generated by parquet-avro (Parquet version < 1.6.0),
-    // e.g.:
-    //
-    //    // ARRAY<STRUCT<str: STRING>> (nullable list, non-null elements)
-    //    optional group my_list (LIST) {
-    //      repeated group array {
-    //        required binary str (UTF8);
-    //      };
-    //    }
-    //
-    repeated_type.name() == "array" ||
-    // For Parquet data generated by parquet-thrift, e.g.:
-    //
-    //    // ARRAY<STRUCT<str: STRING>> (nullable list, non-null elements)
-    //    optional group my_list (LIST) {
-    //      repeated group my_list_tuple {
-    //        required binary str (UTF8);
-    //      };
-    //    }
-    //
-    repeated_type.name().ends_with("_tuple")
-    }
-
-    /// Reads current record as `Row` from the reader tree.
-    /// Automatically advances all necessary readers.
-    /// This must be called on the root level reader (i.e., for Message type).
-    /// Otherwise, it will panic.
-    fn read(&mut self) -> Row {
-        match *self {
-            Reader::GroupReader(_, _, ref mut readers) => {
-                let mut fields = Vec::new();
-                for reader in readers {
-                    fields.push((String::from(reader.field_name()), reader.read_field()));
-                }
-                make_row(fields)
-            }
-            _ => panic!("Cannot call read() on {}", self),
-        }
-    }
-
-    /// Reads current record as `Field` from the reader tree.
-    /// Automatically advances all necessary readers.
-    fn read_field(&mut self) -> Field {
-        match *self {
-            Reader::PrimitiveReader(_, ref mut column) => {
-                let value = column.current_value();
-                column.read_next().unwrap();
-                value
-            }
-            Reader::OptionReader(def_level, ref mut reader) => {
-                if reader.current_def_level() > def_level {
-                    reader.read_field()
-                } else {
-                    reader.advance_columns();
-                    Field::Null
-                }
-            }
-            Reader::GroupReader(_, def_level, ref mut readers) => {
-                let mut fields = Vec::new();
-                for reader in readers {
-                    if reader.repetition() != Repetition::OPTIONAL
-                        || reader.current_def_level() > def_level
-                    {
-                        fields.push((
-                            String::from(reader.field_name()),
-                            reader.read_field(),
-                        ));
-                    } else {
-                        reader.advance_columns();
-                        fields.push((String::from(reader.field_name()), Field::Null));
-                    }
-                }
-                let row = make_row(fields);
-                Field::Group(row)
-            }
-            Reader::RepeatedReader(_, def_level, rep_level, ref mut reader) => {
-                let mut elements = Vec::new();
-                loop {
-                    if reader.current_def_level() > def_level {
-                        elements.push(reader.read_field());
-                    } else {
-                        reader.advance_columns();
-                        // If the current definition level is equal to the definition
-                        // level of this repeated type, then the
-                        // result is an empty list and the repetition level
-                        // will always be <= rl.
-                        break;
-                    }
-
-                    // This covers case when we are out of repetition levels and should
-                    // close the group, or there are no values left to
-                    // buffer.
-                    if !reader.has_next() || reader.current_rep_level() <= rep_level {
-                        break;
-                    }
-                }
-                Field::ListInternal(make_list(elements))
-            }
-            Reader::KeyValueReader(
-                _,
-                def_level,
-                rep_level,
-                ref mut keys,
-                ref mut values,
-            ) => {
-                let mut pairs = Vec::new();
-                loop {
-                    if keys.current_def_level() > def_level {
-                        pairs.push((keys.read_field(), values.read_field()));
-                    } else {
-                        keys.advance_columns();
-                        values.advance_columns();
-                        // If the current definition level is equal to the definition
-                        // level of this repeated type, then the
-                        // result is an empty list and the repetition level
-                        // will always be <= rl.
-                        break;
-                    }
-
-                    // This covers case when we are out of repetition levels and should
-                    // close the group, or there are no values left to
-                    // buffer.
-                    if !keys.has_next() || keys.current_rep_level() <= rep_level {
-                        break;
-                    }
-                }
-
-                Field::MapInternal(make_map(pairs))
-            }
-        }
-    }
-
-    /// Returns field name for the current reader.
-    fn field_name(&self) -> &str {
-        match *self {
-            Reader::PrimitiveReader(ref field, _) => field.name(),
-            Reader::OptionReader(_, ref reader) => reader.field_name(),
-            Reader::GroupReader(ref opt, ..) => match opt {
-                Some(ref field) => field.name(),
-                None => panic!("Field is None for group reader"),
-            },
-            Reader::RepeatedReader(ref field, ..) => field.name(),
-            Reader::KeyValueReader(ref field, ..) => field.name(),
-        }
-    }
-
-    /// Returns repetition for the current reader.
-    fn repetition(&self) -> Repetition {
-        match *self {
-            Reader::PrimitiveReader(ref field, _) => field.get_basic_info().repetition(),
-            Reader::OptionReader(_, ref reader) => reader.repetition(),
-            Reader::GroupReader(ref opt, ..) => match opt {
-                Some(ref field) => field.get_basic_info().repetition(),
-                None => panic!("Field is None for group reader"),
-            },
-            Reader::RepeatedReader(ref field, ..) => field.get_basic_info().repetition(),
-            Reader::KeyValueReader(ref field, ..) => field.get_basic_info().repetition(),
-        }
-    }
-
-    /// Returns true, if current reader has more values, false otherwise.
-    /// Method does not advance internal iterator.
-    fn has_next(&self) -> bool {
-        match *self {
-            Reader::PrimitiveReader(_, ref column) => column.has_next(),
-            Reader::OptionReader(_, ref reader) => reader.has_next(),
-            Reader::GroupReader(_, _, ref readers) => readers.first().unwrap().has_next(),
-            Reader::RepeatedReader(_, _, _, ref reader) => reader.has_next(),
-            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.has_next(),
-        }
-    }
-
-    /// Returns current definition level,
-    /// Method does not advance internal iterator.
-    fn current_def_level(&self) -> i16 {
-        match *self {
-            Reader::PrimitiveReader(_, ref column) => column.current_def_level(),
-            Reader::OptionReader(_, ref reader) => reader.current_def_level(),
-            Reader::GroupReader(_, _, ref readers) => match readers.first() {
-                Some(reader) => reader.current_def_level(),
-                None => panic!("Current definition level: empty group reader"),
-            },
-            Reader::RepeatedReader(_, _, _, ref reader) => reader.current_def_level(),
-            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.current_def_level(),
-        }
-    }
-
-    /// Returns current repetition level.
-    /// Method does not advance internal iterator.
-    fn current_rep_level(&self) -> i16 {
-        match *self {
-            Reader::PrimitiveReader(_, ref column) => column.current_rep_level(),
-            Reader::OptionReader(_, ref reader) => reader.current_rep_level(),
-            Reader::GroupReader(_, _, ref readers) => match readers.first() {
-                Some(reader) => reader.current_rep_level(),
-                None => panic!("Current repetition level: empty group reader"),
-            },
-            Reader::RepeatedReader(_, _, _, ref reader) => reader.current_rep_level(),
-            Reader::KeyValueReader(_, _, _, ref keys, _) => keys.current_rep_level(),
-        }
-    }
-
-    /// Advances leaf columns for the current reader.
-    fn advance_columns(&mut self) {
-        match *self {
-            Reader::PrimitiveReader(_, ref mut column) => {
-                column.read_next().unwrap();
-            }
-            Reader::OptionReader(_, ref mut reader) => {
-                reader.advance_columns();
-            }
-            Reader::GroupReader(_, _, ref mut readers) => {
-                for reader in readers {
-                    reader.advance_columns();
-                }
-            }
-            Reader::RepeatedReader(_, _, _, ref mut reader) => {
-                reader.advance_columns();
-            }
-            Reader::KeyValueReader(_, _, _, ref mut keys, ref mut values) => {
-                keys.advance_columns();
-                values.advance_columns();
-            }
-        }
-    }
-}
-
-impl fmt::Display for Reader {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let s = match self {
-            Reader::PrimitiveReader(..) => "PrimitiveReader",
-            Reader::OptionReader(..) => "OptionReader",
-            Reader::GroupReader(..) => "GroupReader",
-            Reader::RepeatedReader(..) => "RepeatedReader",
-            Reader::KeyValueReader(..) => "KeyValueReader",
-        };
-        write!(f, "{}", s)
-    }
-}
-
-// ----------------------------------------------------------------------
-// Row iterators
-
-/// The enum Either with variants That represet a reference and a box of
-/// [`FileReader`](crate::file::reader::FileReader).
-enum Either<'a> {
-    Left(&'a dyn FileReader),
-    Right(Box<dyn FileReader>),
-}
-
-impl<'a> Either<'a> {
-    fn reader(&self) -> &dyn FileReader {
-        match *self {
-            Either::Left(r) => r,
-            Either::Right(ref r) => &**r,
-        }
-    }
-}
-
-/// Iterator of [`Row`](crate::record::Row)s.
-/// It is used either for a single row group to iterate over data in that row group, or
-/// an entire file with auto buffering of all row groups.
-pub struct RowIter<'a> {
-    descr: SchemaDescPtr,
-    tree_builder: TreeBuilder,
-    file_reader: Option<Either<'a>>,
-    current_row_group: usize,
-    num_row_groups: usize,
-    row_iter: Option<ReaderIter>,
-}
-
-impl<'a> RowIter<'a> {
-    /// Creates a new iterator of [`Row`](crate::record::Row)s.
-    fn new(
-        file_reader: Option<Either<'a>>,
-        row_iter: Option<ReaderIter>,
-        descr: SchemaDescPtr,
-    ) -> Self {
-        let tree_builder = Self::tree_builder();
-        let num_row_groups = match file_reader {
-            Some(ref r) => r.reader().num_row_groups(),
-            None => 0,
-        };
-
-        Self {
-            descr,
-            file_reader,
-            tree_builder,
-            num_row_groups,
-            row_iter,
-            current_row_group: 0,
-        }
-    }
-
-    /// Creates iterator of [`Row`](crate::record::Row)s for all row groups in a
-    /// file.
-    pub fn from_file(proj: Option<Type>, reader: &'a dyn FileReader) -> Result<Self> {
-        let either = Either::Left(reader);
-        let descr = Self::get_proj_descr(
-            proj,
-            reader.metadata().file_metadata().schema_descr_ptr(),
-        )?;
-
-        Ok(Self::new(Some(either), None, descr))
-    }
-
-    /// Creates iterator of [`Row`](crate::record::Row)s for a specific row group.
-    pub fn from_row_group(
-        proj: Option<Type>,
-        reader: &'a dyn RowGroupReader,
-    ) -> Result<Self> {
-        let descr = Self::get_proj_descr(proj, reader.metadata().schema_descr_ptr())?;
-        let tree_builder = Self::tree_builder();
-        let row_iter = tree_builder.as_iter(descr.clone(), reader);
-
-        // For row group we need to set `current_row_group` >= `num_row_groups`, because
-        // we only have one row group and can't buffer more.
-        Ok(Self::new(None, Some(row_iter), descr))
-    }
-
-    /// Creates a iterator of [`Row`](crate::record::Row)s from a
-    /// [`FileReader`](crate::file::reader::FileReader) using the full file schema.
-    pub fn from_file_into(reader: Box<dyn FileReader>) -> Self {
-        let either = Either::Right(reader);
-        let descr = either
-            .reader()
-            .metadata()
-            .file_metadata()
-            .schema_descr_ptr();
-
-        Self::new(Some(either), None, descr)
-    }
-
-    /// Tries to create a iterator of [`Row`](crate::record::Row)s using projections.
-    /// Returns a error if a file reader is not the source of this iterator.
-    ///
-    /// The Projected schema can be a subset of or equal to the file schema,
-    /// when it is None, full file schema is assumed.
-    pub fn project(self, proj: Option<Type>) -> Result<Self> {
-        match self.file_reader {
-            Some(ref either) => {
-                let schema = either
-                    .reader()
-                    .metadata()
-                    .file_metadata()
-                    .schema_descr_ptr();
-                let descr = Self::get_proj_descr(proj, schema)?;
-
-                Ok(Self::new(self.file_reader, None, descr))
-            }
-            None => Err(general_err!("File reader is required to use projections")),
-        }
-    }
-
-    /// Helper method to get schema descriptor for projected schema.
-    /// If projection is None, then full schema is returned.
-    #[inline]
-    fn get_proj_descr(
-        proj: Option<Type>,
-        root_descr: SchemaDescPtr,
-    ) -> Result<SchemaDescPtr> {
-        match proj {
-            Some(projection) => {
-                // check if projection is part of file schema
-                let root_schema = root_descr.root_schema();
-                if !root_schema.check_contains(&projection) {
-                    return Err(general_err!("Root schema does not contain projection"));
-                }
-                Ok(Arc::new(SchemaDescriptor::new(Arc::new(projection))))
-            }
-            None => Ok(root_descr),
-        }
-    }
-
-    /// Returns common tree builder, so the same settings are applied to both iterators
-    /// from file reader and row group.
-    #[inline]
-    fn tree_builder() -> TreeBuilder {
-        TreeBuilder::new()
-    }
-}
-
-impl<'a> Iterator for RowIter<'a> {
-    type Item = Row;
-
-    fn next(&mut self) -> Option<Row> {
-        let mut row = None;
-        if let Some(ref mut iter) = self.row_iter {
-            row = iter.next();
-        }
-
-        while row.is_none() && self.current_row_group < self.num_row_groups {
-            // We do not expect any failures when accessing a row group, and file reader
-            // must be set for selecting next row group.
-            if let Some(ref either) = self.file_reader {
-                let file_reader = either.reader();
-                let row_group_reader = &*file_reader
-                    .get_row_group(self.current_row_group)
-                    .expect("Row group is required to advance");
-
-                let mut iter = self
-                    .tree_builder
-                    .as_iter(self.descr.clone(), row_group_reader);
-
-                row = iter.next();
-
-                self.current_row_group += 1;
-                self.row_iter = Some(iter);
-            }
-        }
-
-        row
-    }
-}
-
-/// Internal iterator of [`Row`](crate::record::Row)s for a reader.
-pub struct ReaderIter {
-    root_reader: Reader,
-    records_left: usize,
-}
-
-impl ReaderIter {
-    fn new(mut root_reader: Reader, num_records: usize) -> Self {
-        // Prepare root reader by advancing all column vectors
-        root_reader.advance_columns();
-        Self {
-            root_reader,
-            records_left: num_records,
-        }
-    }
-}
-
-impl Iterator for ReaderIter {
-    type Item = Row;
-
-    fn next(&mut self) -> Option<Row> {
-        if self.records_left > 0 {
-            self.records_left -= 1;
-            Some(self.root_reader.read())
-        } else {
-            None
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::errors::{ParquetError, Result};
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::record::api::{Field, Row, RowAccessor, RowFormatter};
-    use crate::schema::parser::parse_message_type;
-    use crate::util::test_common::{get_test_file, get_test_path};
-    use std::convert::TryFrom;
-
-    // Convenient macros to assemble row, list, map, and group.
-
-    macro_rules! row {
-        () => {
-            {
-                let result = Vec::new();
-                make_row(result)
-            }
-        };
-        ( $( $e:expr ), + ) => {
-            {
-                let mut result = Vec::new();
-                $(
-                    result.push($e);
-                )*
-                    make_row(result)
-            }
-        }
-    }
-
-    macro_rules! list {
-        () => {
-            {
-                let result = Vec::new();
-                Field::ListInternal(make_list(result))
-            }
-        };
-        ( $( $e:expr ), + ) => {
-            {
-                let mut result = Vec::new();
-                $(
-                    result.push($e);
-                )*
-                    Field::ListInternal(make_list(result))
-            }
-        }
-    }
-
-    macro_rules! map {
-        () => {
-            {
-                let result = Vec::new();
-                Field::MapInternal(make_map(result))
-            }
-        };
-        ( $( $e:expr ), + ) => {
-            {
-                let mut result = Vec::new();
-                $(
-                    result.push($e);
-                )*
-                    Field::MapInternal(make_map(result))
-            }
-        }
-    }
-
-    macro_rules! group {
-        ( $( $e:expr ), * ) => {
-            {
-                Field::Group(row!($( $e ), *))
-            }
-        }
-    }
-
-    #[test]
-    fn test_file_reader_rows_nulls() {
-        let rows = test_file_reader_rows("nulls.snappy.parquet", None).unwrap();
-        let expected_rows = vec![
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-            row![(
-                "b_struct".to_string(),
-                group![("b_c_int".to_string(), Field::Null)]
-            )],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_nonnullable() {
-        let rows = test_file_reader_rows("nonnullable.impala.parquet", None).unwrap();
-        let expected_rows = vec![row![
-            ("ID".to_string(), Field::Long(8)),
-            ("Int_Array".to_string(), list![Field::Int(-1)]),
-            (
-                "int_array_array".to_string(),
-                list![list![Field::Int(-1), Field::Int(-2)], list![]]
-            ),
-            (
-                "Int_Map".to_string(),
-                map![(Field::Str("k1".to_string()), Field::Int(-1))]
-            ),
-            (
-                "int_map_array".to_string(),
-                list![
-                    map![],
-                    map![(Field::Str("k1".to_string()), Field::Int(1))],
-                    map![],
-                    map![]
-                ]
-            ),
-            (
-                "nested_Struct".to_string(),
-                group![
-                    ("a".to_string(), Field::Int(-1)),
-                    ("B".to_string(), list![Field::Int(-1)]),
-                    (
-                        "c".to_string(),
-                        group![(
-                            "D".to_string(),
-                            list![list![group![
-                                ("e".to_string(), Field::Int(-1)),
-                                ("f".to_string(), Field::Str("nonnullable".to_string()))
-                            ]]]
-                        )]
-                    ),
-                    ("G".to_string(), map![])
-                ]
-            )
-        ]];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_nullable() {
-        let rows = test_file_reader_rows("nullable.impala.parquet", None).unwrap();
-        let expected_rows = vec![
-            row![
-                ("id".to_string(), Field::Long(1)),
-                (
-                    "int_array".to_string(),
-                    list![Field::Int(1), Field::Int(2), Field::Int(3)]
-                ),
-                (
-                    "int_array_Array".to_string(),
-                    list![
-                        list![Field::Int(1), Field::Int(2)],
-                        list![Field::Int(3), Field::Int(4)]
-                    ]
-                ),
-                (
-                    "int_map".to_string(),
-                    map![
-                        (Field::Str("k1".to_string()), Field::Int(1)),
-                        (Field::Str("k2".to_string()), Field::Int(100))
-                    ]
-                ),
-                (
-                    "int_Map_Array".to_string(),
-                    list![map![(Field::Str("k1".to_string()), Field::Int(1))]]
-                ),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Int(1)),
-                        ("b".to_string(), list![Field::Int(1)]),
-                        (
-                            "C".to_string(),
-                            group![(
-                                "d".to_string(),
-                                list![
-                                    list![
-                                        group![
-                                            ("E".to_string(), Field::Int(10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("aaa".to_string())
-                                            )
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Int(-10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("bbb".to_string())
-                                            )
-                                        ]
-                                    ],
-                                    list![group![
-                                        ("E".to_string(), Field::Int(11)),
-                                        ("F".to_string(), Field::Str("c".to_string()))
-                                    ]]
-                                ]
-                            )]
-                        ),
-                        (
-                            "g".to_string(),
-                            map![(
-                                Field::Str("foo".to_string()),
-                                group![(
-                                    "H".to_string(),
-                                    group![("i".to_string(), list![Field::Double(1.1)])]
-                                )]
-                            )]
-                        )
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(2)),
-                (
-                    "int_array".to_string(),
-                    list![
-                        Field::Null,
-                        Field::Int(1),
-                        Field::Int(2),
-                        Field::Null,
-                        Field::Int(3),
-                        Field::Null
-                    ]
-                ),
-                (
-                    "int_array_Array".to_string(),
-                    list![
-                        list![Field::Null, Field::Int(1), Field::Int(2), Field::Null],
-                        list![Field::Int(3), Field::Null, Field::Int(4)],
-                        list![],
-                        Field::Null
-                    ]
-                ),
-                (
-                    "int_map".to_string(),
-                    map![
-                        (Field::Str("k1".to_string()), Field::Int(2)),
-                        (Field::Str("k2".to_string()), Field::Null)
-                    ]
-                ),
-                (
-                    "int_Map_Array".to_string(),
-                    list![
-                        map![
-                            (Field::Str("k3".to_string()), Field::Null),
-                            (Field::Str("k1".to_string()), Field::Int(1))
-                        ],
-                        Field::Null,
-                        map![]
-                    ]
-                ),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), list![Field::Null]),
-                        (
-                            "C".to_string(),
-                            group![(
-                                "d".to_string(),
-                                list![
-                                    list![
-                                        group![
-                                            ("E".to_string(), Field::Null),
-                                            ("F".to_string(), Field::Null)
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Int(10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("aaa".to_string())
-                                            )
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Null),
-                                            ("F".to_string(), Field::Null)
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Int(-10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("bbb".to_string())
-                                            )
-                                        ],
-                                        group![
-                                            ("E".to_string(), Field::Null),
-                                            ("F".to_string(), Field::Null)
-                                        ]
-                                    ],
-                                    list![
-                                        group![
-                                            ("E".to_string(), Field::Int(11)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("c".to_string())
-                                            )
-                                        ],
-                                        Field::Null
-                                    ],
-                                    list![],
-                                    Field::Null
-                                ]
-                            )]
-                        ),
-                        (
-                            "g".to_string(),
-                            map![
-                                (
-                                    Field::Str("g1".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![(
-                                            "i".to_string(),
-                                            list![Field::Double(2.2), Field::Null]
-                                        )]
-                                    )]
-                                ),
-                                (
-                                    Field::Str("g2".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![("i".to_string(), list![])]
-                                    )]
-                                ),
-                                (Field::Str("g3".to_string()), Field::Null),
-                                (
-                                    Field::Str("g4".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![("i".to_string(), Field::Null)]
-                                    )]
-                                ),
-                                (
-                                    Field::Str("g5".to_string()),
-                                    group![("H".to_string(), Field::Null)]
-                                )
-                            ]
-                        )
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(3)),
-                ("int_array".to_string(), list![]),
-                ("int_array_Array".to_string(), list![Field::Null]),
-                ("int_map".to_string(), map![]),
-                ("int_Map_Array".to_string(), list![Field::Null, Field::Null]),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), Field::Null),
-                        ("C".to_string(), group![("d".to_string(), list![])]),
-                        ("g".to_string(), map![])
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(4)),
-                ("int_array".to_string(), Field::Null),
-                ("int_array_Array".to_string(), list![]),
-                ("int_map".to_string(), map![]),
-                ("int_Map_Array".to_string(), list![]),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), Field::Null),
-                        ("C".to_string(), group![("d".to_string(), Field::Null)]),
-                        ("g".to_string(), Field::Null)
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(5)),
-                ("int_array".to_string(), Field::Null),
-                ("int_array_Array".to_string(), Field::Null),
-                ("int_map".to_string(), map![]),
-                ("int_Map_Array".to_string(), Field::Null),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Null),
-                        ("b".to_string(), Field::Null),
-                        ("C".to_string(), Field::Null),
-                        (
-                            "g".to_string(),
-                            map![(
-                                Field::Str("foo".to_string()),
-                                group![(
-                                    "H".to_string(),
-                                    group![(
-                                        "i".to_string(),
-                                        list![Field::Double(2.2), Field::Double(3.3)]
-                                    )]
-                                )]
-                            )]
-                        )
-                    ]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Long(6)),
-                ("int_array".to_string(), Field::Null),
-                ("int_array_Array".to_string(), Field::Null),
-                ("int_map".to_string(), Field::Null),
-                ("int_Map_Array".to_string(), Field::Null),
-                ("nested_struct".to_string(), Field::Null)
-            ],
-            row![
-                ("id".to_string(), Field::Long(7)),
-                ("int_array".to_string(), Field::Null),
-                (
-                    "int_array_Array".to_string(),
-                    list![Field::Null, list![Field::Int(5), Field::Int(6)]]
-                ),
-                (
-                    "int_map".to_string(),
-                    map![
-                        (Field::Str("k1".to_string()), Field::Null),
-                        (Field::Str("k3".to_string()), Field::Null)
-                    ]
-                ),
-                ("int_Map_Array".to_string(), Field::Null),
-                (
-                    "nested_struct".to_string(),
-                    group![
-                        ("A".to_string(), Field::Int(7)),
-                        (
-                            "b".to_string(),
-                            list![Field::Int(2), Field::Int(3), Field::Null]
-                        ),
-                        (
-                            "C".to_string(),
-                            group![(
-                                "d".to_string(),
-                                list![list![], list![Field::Null], Field::Null]
-                            )]
-                        ),
-                        ("g".to_string(), Field::Null)
-                    ]
-                )
-            ],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_projection() {
-        let schema = "
-      message spark_schema {
-        REQUIRED DOUBLE c;
-        REQUIRED INT32 b;
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
-        let expected_rows = vec![
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-            row![
-                ("c".to_string(), Field::Double(1.0)),
-                ("b".to_string(), Field::Int(1))
-            ],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_iter_columns_in_row() {
-        let r = row![
-            ("c".to_string(), Field::Double(1.0)),
-            ("b".to_string(), Field::Int(1))
-        ];
-        let mut result = Vec::new();
-        for (name, record) in r.get_column_iter() {
-            result.push((name, record));
-        }
-        assert_eq!(
-            vec![
-                (&"c".to_string(), &Field::Double(1.0)),
-                (&"b".to_string(), &Field::Int(1))
-            ],
-            result
-        );
-    }
-
-    #[test]
-    fn test_file_reader_rows_projection_map() {
-        let schema = "
-      message spark_schema {
-        OPTIONAL group a (MAP) {
-          REPEATED group key_value {
-            REQUIRED BYTE_ARRAY key (UTF8);
-            OPTIONAL group value (MAP) {
-              REPEATED group key_value {
-                REQUIRED INT32 key;
-                REQUIRED BOOLEAN value;
-              }
-            }
-          }
-        }
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
-        let expected_rows = vec![
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("a".to_string()),
-                    map![
-                        (Field::Int(1), Field::Bool(true)),
-                        (Field::Int(2), Field::Bool(false))
-                    ]
-                )]
-            )],
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("b".to_string()),
-                    map![(Field::Int(1), Field::Bool(true))]
-                )]
-            )],
-            row![(
-                "a".to_string(),
-                map![(Field::Str("c".to_string()), Field::Null)]
-            )],
-            row![("a".to_string(), map![(Field::Str("d".to_string()), map![])])],
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("e".to_string()),
-                    map![(Field::Int(1), Field::Bool(true))]
-                )]
-            )],
-            row![(
-                "a".to_string(),
-                map![(
-                    Field::Str("f".to_string()),
-                    map![
-                        (Field::Int(3), Field::Bool(true)),
-                        (Field::Int(4), Field::Bool(false)),
-                        (Field::Int(5), Field::Bool(true))
-                    ]
-                )]
-            )],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_projection_list() {
-        let schema = "
-      message spark_schema {
-        OPTIONAL group a (LIST) {
-          REPEATED group list {
-            OPTIONAL group element (LIST) {
-              REPEATED group list {
-                OPTIONAL group element (LIST) {
-                  REPEATED group list {
-                    OPTIONAL BYTE_ARRAY element (UTF8);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_lists.snappy.parquet", Some(schema)).unwrap();
-        let expected_rows = vec![
-            row![(
-                "a".to_string(),
-                list![
-                    list![
-                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
-                        list![Field::Str("c".to_string())]
-                    ],
-                    list![Field::Null, list![Field::Str("d".to_string())]]
-                ]
-            )],
-            row![(
-                "a".to_string(),
-                list![
-                    list![
-                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
-                        list![Field::Str("c".to_string()), Field::Str("d".to_string())]
-                    ],
-                    list![Field::Null, list![Field::Str("e".to_string())]]
-                ]
-            )],
-            row![(
-                "a".to_string(),
-                list![
-                    list![
-                        list![Field::Str("a".to_string()), Field::Str("b".to_string())],
-                        list![Field::Str("c".to_string()), Field::Str("d".to_string())],
-                        list![Field::Str("e".to_string())]
-                    ],
-                    list![Field::Null, list![Field::Str("f".to_string())]]
-                ]
-            )],
-        ];
-        assert_eq!(rows, expected_rows);
-    }
-
-    #[test]
-    fn test_file_reader_rows_invalid_projection() {
-        let schema = "
-      message spark_schema {
-        REQUIRED INT32 key;
-        REQUIRED BOOLEAN value;
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let res = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema));
-        assert!(res.is_err());
-        assert_eq!(
-            res.unwrap_err(),
-            general_err!("Root schema does not contain projection")
-        );
-    }
-
-    #[test]
-    fn test_row_group_rows_invalid_projection() {
-        let schema = "
-      message spark_schema {
-        REQUIRED INT32 key;
-        REQUIRED BOOLEAN value;
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        let res = test_row_group_rows("nested_maps.snappy.parquet", Some(schema));
-        assert!(res.is_err());
-        assert_eq!(
-            res.unwrap_err(),
-            general_err!("Root schema does not contain projection")
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "Invalid map type")]
-    fn test_file_reader_rows_invalid_map_type() {
-        let schema = "
-      message spark_schema {
-        OPTIONAL group a (MAP) {
-          REPEATED group key_value {
-            REQUIRED BYTE_ARRAY key (UTF8);
-            OPTIONAL group value (MAP) {
-              REPEATED group key_value {
-                REQUIRED INT32 key;
-              }
-            }
-          }
-        }
-      }
-    ";
-        let schema = parse_message_type(&schema).unwrap();
-        test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
-    }
-
-    #[test]
-    fn test_file_reader_iter() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let vec = vec![path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| RowIter::from_file_into(Box::new(r)))
-            .flat_map(|r| r.get_int(0))
-            .collect::<Vec<_>>();
-
-        assert_eq!(vec, vec![4, 5, 6, 7, 2, 3, 0, 1]);
-    }
-
-    #[test]
-    fn test_file_reader_iter_projection() {
-        let path = get_test_path("alltypes_plain.parquet");
-        let values = vec![path]
-            .iter()
-            .map(|p| SerializedFileReader::try_from(p.as_path()).unwrap())
-            .flat_map(|r| {
-                let schema = "message schema { OPTIONAL INT32 id; }";
-                let proj = parse_message_type(&schema).ok();
-
-                RowIter::from_file_into(Box::new(r)).project(proj).unwrap()
-            })
-            .map(|r| format!("id:{}", r.fmt(0)))
-            .collect::<Vec<_>>()
-            .join(", ");
-
-        assert_eq!(values, "id:4, id:5, id:6, id:7, id:2, id:3, id:0, id:1");
-    }
-
-    #[test]
-    fn test_file_reader_iter_projection_err() {
-        let schema = "
-      message spark_schema {
-        REQUIRED INT32 key;
-        REQUIRED BOOLEAN value;
-      }
-    ";
-        let proj = parse_message_type(&schema).ok();
-        let path = get_test_path("nested_maps.snappy.parquet");
-        let reader = SerializedFileReader::try_from(path.as_path()).unwrap();
-        let res = RowIter::from_file_into(Box::new(reader)).project(proj);
-
-        assert!(res.is_err());
-        assert_eq!(
-            res.err().unwrap(),
-            general_err!("Root schema does not contain projection")
-        );
-    }
-
-    #[test]
-    fn test_tree_reader_handle_repeated_fields_with_no_annotation() {
-        // Array field `phoneNumbers` does not contain LIST annotation.
-        // We parse it as struct with `phone` repeated field as array.
-        let rows = test_file_reader_rows("repeated_no_annotation.parquet", None).unwrap();
-        let expected_rows = vec![
-            row![
-                ("id".to_string(), Field::Int(1)),
-                ("phoneNumbers".to_string(), Field::Null)
-            ],
-            row![
-                ("id".to_string(), Field::Int(2)),
-                ("phoneNumbers".to_string(), Field::Null)
-            ],
-            row![
-                ("id".to_string(), Field::Int(3)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![("phone".to_string(), list![])]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Int(4)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![(
-                        "phone".to_string(),
-                        list![group![
-                            ("number".to_string(), Field::Long(5555555555)),
-                            ("kind".to_string(), Field::Null)
-                        ]]
-                    )]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Int(5)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![(
-                        "phone".to_string(),
-                        list![group![
-                            ("number".to_string(), Field::Long(1111111111)),
-                            ("kind".to_string(), Field::Str("home".to_string()))
-                        ]]
-                    )]
-                )
-            ],
-            row![
-                ("id".to_string(), Field::Int(6)),
-                (
-                    "phoneNumbers".to_string(),
-                    group![(
-                        "phone".to_string(),
-                        list![
-                            group![
-                                ("number".to_string(), Field::Long(1111111111)),
-                                ("kind".to_string(), Field::Str("home".to_string()))
-                            ],
-                            group![
-                                ("number".to_string(), Field::Long(2222222222)),
-                                ("kind".to_string(), Field::Null)
-                            ],
-                            group![
-                                ("number".to_string(), Field::Long(3333333333)),
-                                ("kind".to_string(), Field::Str("mobile".to_string()))
-                            ]
-                        ]
-                    )]
-                )
-            ],
-        ];
-
-        assert_eq!(rows, expected_rows);
-    }
-
-    fn test_file_reader_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
-        let file = get_test_file(file_name);
-        let file_reader: Box<dyn FileReader> = Box::new(SerializedFileReader::new(file)?);
-        let iter = file_reader.get_row_iter(schema)?;
-        Ok(iter.collect())
-    }
-
-    fn test_row_group_rows(file_name: &str, schema: Option<Type>) -> Result<Vec<Row>> {
-        let file = get_test_file(file_name);
-        let file_reader: Box<dyn FileReader> = Box::new(SerializedFileReader::new(file)?);
-        // Check the first row group only, because files will contain only single row
-        // group
-        let row_group_reader = file_reader.get_row_group(0).unwrap();
-        let iter = row_group_reader.get_row_iter(schema)?;
-        Ok(iter.collect())
-    }
-}
diff --git a/rust/parquet/src/record/record_writer.rs b/rust/parquet/src/record/record_writer.rs
deleted file mode 100644
index 56817eb2eca..00000000000
--- a/rust/parquet/src/record/record_writer.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::super::errors::ParquetError;
-use super::super::file::writer::RowGroupWriter;
-
-pub trait RecordWriter<T> {
-    fn write_to_row_group(
-        &self,
-        row_group_writer: &mut Box<dyn RowGroupWriter>,
-    ) -> Result<(), ParquetError>;
-}
diff --git a/rust/parquet/src/record/triplet.rs b/rust/parquet/src/record/triplet.rs
deleted file mode 100644
index bb4f942fd18..00000000000
--- a/rust/parquet/src/record/triplet.rs
+++ /dev/null
@@ -1,561 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::basic::Type as PhysicalType;
-use crate::column::reader::{get_typed_column_reader, ColumnReader, ColumnReaderImpl};
-use crate::data_type::*;
-use crate::errors::{ParquetError, Result};
-use crate::record::api::Field;
-use crate::schema::types::ColumnDescPtr;
-
-/// Macro to generate simple functions that cover all types of triplet iterator.
-/// $func is a function of a typed triplet iterator and $token is a either {`ref`} or
-/// {`ref`, `mut`}
-macro_rules! triplet_enum_func {
-  ($self:ident, $func:ident, $( $token:tt ),*) => ({
-    match *$self {
-      TripletIter::BoolTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::Int32TripletIter($($token)* typed) => typed.$func(),
-      TripletIter::Int64TripletIter($($token)* typed) => typed.$func(),
-      TripletIter::Int96TripletIter($($token)* typed) => typed.$func(),
-      TripletIter::FloatTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::DoubleTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::ByteArrayTripletIter($($token)* typed) => typed.$func(),
-      TripletIter::FixedLenByteArrayTripletIter($($token)* typed) => typed.$func()
-    }
-  });
-}
-
-/// High level API wrapper on column reader.
-/// Provides per-element access for each primitive column.
-pub enum TripletIter {
-    BoolTripletIter(TypedTripletIter<BoolType>),
-    Int32TripletIter(TypedTripletIter<Int32Type>),
-    Int64TripletIter(TypedTripletIter<Int64Type>),
-    Int96TripletIter(TypedTripletIter<Int96Type>),
-    FloatTripletIter(TypedTripletIter<FloatType>),
-    DoubleTripletIter(TypedTripletIter<DoubleType>),
-    ByteArrayTripletIter(TypedTripletIter<ByteArrayType>),
-    FixedLenByteArrayTripletIter(TypedTripletIter<FixedLenByteArrayType>),
-}
-
-impl TripletIter {
-    /// Creates new triplet for column reader
-    pub fn new(descr: ColumnDescPtr, reader: ColumnReader, batch_size: usize) -> Self {
-        match descr.physical_type() {
-            PhysicalType::BOOLEAN => TripletIter::BoolTripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT32 => TripletIter::Int32TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT64 => TripletIter::Int64TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT96 => TripletIter::Int96TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::FLOAT => TripletIter::FloatTripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::DOUBLE => TripletIter::DoubleTripletIter(
-                TypedTripletIter::new(descr, batch_size, reader),
-            ),
-            PhysicalType::BYTE_ARRAY => TripletIter::ByteArrayTripletIter(
-                TypedTripletIter::new(descr, batch_size, reader),
-            ),
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                TripletIter::FixedLenByteArrayTripletIter(TypedTripletIter::new(
-                    descr, batch_size, reader,
-                ))
-            }
-        }
-    }
-
-    /// Invokes underlying typed triplet iterator to buffer current value.
-    /// Should be called once - either before `is_null` or `current_value`.
-    #[inline]
-    pub fn read_next(&mut self) -> Result<bool> {
-        triplet_enum_func!(self, read_next, ref, mut)
-    }
-
-    /// Provides check on values/levels left without invoking the underlying typed triplet
-    /// iterator.
-    /// Returns true if more values/levels exist, false otherwise.
-    /// It is always in sync with `read_next` method.
-    #[inline]
-    pub fn has_next(&self) -> bool {
-        triplet_enum_func!(self, has_next, ref)
-    }
-
-    /// Returns current definition level for a leaf triplet iterator
-    #[inline]
-    pub fn current_def_level(&self) -> i16 {
-        triplet_enum_func!(self, current_def_level, ref)
-    }
-
-    /// Returns max definition level for a leaf triplet iterator
-    #[inline]
-    pub fn max_def_level(&self) -> i16 {
-        triplet_enum_func!(self, max_def_level, ref)
-    }
-
-    /// Returns current repetition level for a leaf triplet iterator
-    #[inline]
-    pub fn current_rep_level(&self) -> i16 {
-        triplet_enum_func!(self, current_rep_level, ref)
-    }
-
-    /// Returns max repetition level for a leaf triplet iterator
-    #[inline]
-    pub fn max_rep_level(&self) -> i16 {
-        triplet_enum_func!(self, max_rep_level, ref)
-    }
-
-    /// Returns true, if current value is null.
-    /// Based on the fact that for non-null value current definition level
-    /// equals to max definition level.
-    #[inline]
-    pub fn is_null(&self) -> bool {
-        self.current_def_level() < self.max_def_level()
-    }
-
-    /// Updates non-null value for current row.
-    pub fn current_value(&self) -> Field {
-        assert!(!self.is_null(), "Value is null");
-        match *self {
-            TripletIter::BoolTripletIter(ref typed) => {
-                Field::convert_bool(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::Int32TripletIter(ref typed) => {
-                Field::convert_int32(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::Int64TripletIter(ref typed) => {
-                Field::convert_int64(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::Int96TripletIter(ref typed) => {
-                Field::convert_int96(typed.column_descr(), typed.current_value().clone())
-            }
-            TripletIter::FloatTripletIter(ref typed) => {
-                Field::convert_float(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::DoubleTripletIter(ref typed) => {
-                Field::convert_double(typed.column_descr(), *typed.current_value())
-            }
-            TripletIter::ByteArrayTripletIter(ref typed) => Field::convert_byte_array(
-                typed.column_descr(),
-                typed.current_value().clone(),
-            ),
-            TripletIter::FixedLenByteArrayTripletIter(ref typed) => {
-                Field::convert_byte_array(
-                    typed.column_descr(),
-                    typed.current_value().clone().into(),
-                )
-            }
-        }
-    }
-}
-
-/// Internal typed triplet iterator as a wrapper for column reader
-/// (primitive leaf column), provides per-element access.
-pub struct TypedTripletIter<T: DataType> {
-    reader: ColumnReaderImpl<T>,
-    column_descr: ColumnDescPtr,
-    batch_size: usize,
-    // type properties
-    max_def_level: i16,
-    max_rep_level: i16,
-    // values and levels
-    values: Vec<T::T>,
-    def_levels: Option<Vec<i16>>,
-    rep_levels: Option<Vec<i16>>,
-    // current index for the triplet (value, def, rep)
-    curr_triplet_index: usize,
-    // how many triplets are left before we need to buffer
-    triplets_left: usize,
-    // helper flag to quickly check if we have more values/levels to read
-    has_next: bool,
-}
-
-impl<T: DataType> TypedTripletIter<T> {
-    /// Creates new typed triplet iterator based on provided column reader.
-    /// Use batch size to specify the amount of values to buffer from column reader.
-    fn new(descr: ColumnDescPtr, batch_size: usize, column_reader: ColumnReader) -> Self {
-        assert!(
-            batch_size > 0,
-            "Expected positive batch size, found: {}",
-            batch_size
-        );
-
-        let max_def_level = descr.max_def_level();
-        let max_rep_level = descr.max_rep_level();
-
-        let def_levels = if max_def_level == 0 {
-            None
-        } else {
-            Some(vec![0; batch_size])
-        };
-        let rep_levels = if max_rep_level == 0 {
-            None
-        } else {
-            Some(vec![0; batch_size])
-        };
-
-        Self {
-            reader: get_typed_column_reader(column_reader),
-            column_descr: descr,
-            batch_size,
-            max_def_level,
-            max_rep_level,
-            values: vec![T::T::default(); batch_size],
-            def_levels,
-            rep_levels,
-            curr_triplet_index: 0,
-            triplets_left: 0,
-            has_next: false,
-        }
-    }
-
-    /// Returns column descriptor reference for the current typed triplet iterator.
-    #[inline]
-    pub fn column_descr(&self) -> &ColumnDescPtr {
-        &self.column_descr
-    }
-
-    /// Returns maximum definition level for the triplet iterator (leaf column).
-    #[inline]
-    fn max_def_level(&self) -> i16 {
-        self.max_def_level
-    }
-
-    /// Returns maximum repetition level for the triplet iterator (leaf column).
-    #[inline]
-    fn max_rep_level(&self) -> i16 {
-        self.max_rep_level
-    }
-
-    /// Returns current value.
-    /// Method does not advance the iterator, therefore can be called multiple times.
-    #[inline]
-    fn current_value(&self) -> &T::T {
-        assert!(
-            self.current_def_level() == self.max_def_level(),
-            "Cannot extract value, max definition level: {}, current level: {}",
-            self.max_def_level(),
-            self.current_def_level()
-        );
-        &self.values[self.curr_triplet_index]
-    }
-
-    /// Returns current definition level.
-    /// If field is required, then maximum definition level is returned.
-    #[inline]
-    fn current_def_level(&self) -> i16 {
-        match self.def_levels {
-            Some(ref vec) => vec[self.curr_triplet_index],
-            None => self.max_def_level,
-        }
-    }
-
-    /// Returns current repetition level.
-    /// If field is required, then maximum repetition level is returned.
-    #[inline]
-    fn current_rep_level(&self) -> i16 {
-        match self.rep_levels {
-            Some(ref vec) => vec[self.curr_triplet_index],
-            None => self.max_rep_level,
-        }
-    }
-
-    /// Quick check if iterator has more values/levels to read.
-    /// It is updated as a result of `read_next` method, so they are synchronized.
-    #[inline]
-    fn has_next(&self) -> bool {
-        self.has_next
-    }
-
-    /// Advances to the next triplet.
-    /// Returns true, if there are more records to read, false there are no records left.
-    fn read_next(&mut self) -> Result<bool> {
-        self.curr_triplet_index += 1;
-
-        if self.curr_triplet_index >= self.triplets_left {
-            let (values_read, levels_read) = {
-                // Get slice of definition levels, if available
-                let def_levels = self.def_levels.as_mut().map(|vec| &mut vec[..]);
-
-                // Get slice of repetition levels, if available
-                let rep_levels = self.rep_levels.as_mut().map(|vec| &mut vec[..]);
-
-                // Buffer triplets
-                self.reader.read_batch(
-                    self.batch_size,
-                    def_levels,
-                    rep_levels,
-                    &mut self.values,
-                )?
-            };
-
-            // No more values or levels to read
-            if values_read == 0 && levels_read == 0 {
-                self.has_next = false;
-                return Ok(false);
-            }
-
-            // We never read values more than levels
-            if levels_read == 0 || values_read == levels_read {
-                // There are no definition levels to read, column is required
-                // or definition levels match values, so it does not require spacing
-                self.curr_triplet_index = 0;
-                self.triplets_left = values_read;
-            } else if values_read < levels_read {
-                // Add spacing for triplets.
-                // The idea is setting values for positions in def_levels when current
-                // definition level equals to maximum definition level.
-                // Values and levels are guaranteed to line up, because of
-                // the column reader method.
-
-                // Note: if values_read == 0, then spacing will not be triggered
-                let mut idx = values_read;
-                let def_levels = self.def_levels.as_ref().unwrap();
-                for i in 0..levels_read {
-                    if def_levels[levels_read - i - 1] == self.max_def_level {
-                        idx -= 1; // This is done to avoid usize becoming a negative value
-                        self.values.swap(levels_read - i - 1, idx);
-                    }
-                }
-                self.curr_triplet_index = 0;
-                self.triplets_left = levels_read;
-            } else {
-                return Err(general_err!(
-                    "Spacing of values/levels is wrong, values_read: {}, levels_read: {}",
-                    values_read,
-                    levels_read
-                ));
-            }
-        }
-
-        self.has_next = true;
-        Ok(true)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::file::reader::{FileReader, SerializedFileReader};
-    use crate::schema::types::ColumnPath;
-    use crate::util::test_common::get_test_file;
-
-    #[test]
-    #[should_panic(expected = "Expected positive batch size, found: 0")]
-    fn test_triplet_zero_batch_size() {
-        let column_path =
-            ColumnPath::from(vec!["b_struct".to_string(), "b_c_int".to_string()]);
-        test_column_in_file("nulls.snappy.parquet", 0, &column_path, &[], &[], &[]);
-    }
-
-    #[test]
-    fn test_triplet_null_column() {
-        let path = vec!["b_struct", "b_c_int"];
-        let values = vec![];
-        let def_levels = vec![1, 1, 1, 1, 1, 1, 1, 1];
-        let rep_levels = vec![0, 0, 0, 0, 0, 0, 0, 0];
-        test_triplet_iter(
-            "nulls.snappy.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_required_column() {
-        let path = vec!["ID"];
-        let values = vec![Field::Long(8)];
-        let def_levels = vec![0];
-        let rep_levels = vec![0];
-        test_triplet_iter(
-            "nonnullable.impala.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_optional_column() {
-        let path = vec!["nested_struct", "A"];
-        let values = vec![Field::Int(1), Field::Int(7)];
-        let def_levels = vec![2, 1, 1, 1, 1, 0, 2];
-        let rep_levels = vec![0, 0, 0, 0, 0, 0, 0];
-        test_triplet_iter(
-            "nullable.impala.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_optional_list_column() {
-        let path = vec!["a", "list", "element", "list", "element", "list", "element"];
-        let values = vec![
-            Field::Str("a".to_string()),
-            Field::Str("b".to_string()),
-            Field::Str("c".to_string()),
-            Field::Str("d".to_string()),
-            Field::Str("a".to_string()),
-            Field::Str("b".to_string()),
-            Field::Str("c".to_string()),
-            Field::Str("d".to_string()),
-            Field::Str("e".to_string()),
-            Field::Str("a".to_string()),
-            Field::Str("b".to_string()),
-            Field::Str("c".to_string()),
-            Field::Str("d".to_string()),
-            Field::Str("e".to_string()),
-            Field::Str("f".to_string()),
-        ];
-        let def_levels = vec![7, 7, 7, 4, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 7, 7, 4, 7];
-        let rep_levels = vec![0, 3, 2, 1, 2, 0, 3, 2, 3, 1, 2, 0, 3, 2, 3, 2, 1, 2];
-        test_triplet_iter(
-            "nested_lists.snappy.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    #[test]
-    fn test_triplet_optional_map_column() {
-        let path = vec!["a", "key_value", "value", "key_value", "key"];
-        let values = vec![
-            Field::Int(1),
-            Field::Int(2),
-            Field::Int(1),
-            Field::Int(1),
-            Field::Int(3),
-            Field::Int(4),
-            Field::Int(5),
-        ];
-        let def_levels = vec![4, 4, 4, 2, 3, 4, 4, 4, 4];
-        let rep_levels = vec![0, 2, 0, 0, 0, 0, 0, 2, 2];
-        test_triplet_iter(
-            "nested_maps.snappy.parquet",
-            path,
-            &values,
-            &def_levels,
-            &rep_levels,
-        );
-    }
-
-    // Check triplet iterator across different batch sizes
-    fn test_triplet_iter(
-        file_name: &str,
-        column_path: Vec<&str>,
-        expected_values: &[Field],
-        expected_def_levels: &[i16],
-        expected_rep_levels: &[i16],
-    ) {
-        // Convert path into column path
-        let path: Vec<String> = column_path.iter().map(|x| x.to_string()).collect();
-        let column_path = ColumnPath::from(path);
-
-        let batch_sizes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 128, 256];
-        for batch_size in batch_sizes {
-            test_column_in_file(
-                file_name,
-                batch_size,
-                &column_path,
-                expected_values,
-                expected_def_levels,
-                expected_rep_levels,
-            );
-        }
-    }
-
-    // Check values of a selectd column in a file
-    fn test_column_in_file(
-        file_name: &str,
-        batch_size: usize,
-        column_path: &ColumnPath,
-        expected_values: &[Field],
-        expected_def_levels: &[i16],
-        expected_rep_levels: &[i16],
-    ) {
-        let file = get_test_file(file_name);
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let metadata = file_reader.metadata();
-        // Get schema descriptor
-        let file_metadata = metadata.file_metadata();
-        let schema = file_metadata.schema_descr();
-        // Get first row group
-        let row_group_reader = file_reader.get_row_group(0).unwrap();
-
-        for i in 0..schema.num_columns() {
-            let descr = schema.column(i);
-            if descr.path() == column_path {
-                let reader = row_group_reader.get_column_reader(i).unwrap();
-                test_triplet_column(
-                    descr,
-                    reader,
-                    batch_size,
-                    expected_values,
-                    expected_def_levels,
-                    expected_rep_levels,
-                );
-            }
-        }
-    }
-
-    // Check values for individual triplet iterator
-    fn test_triplet_column(
-        descr: ColumnDescPtr,
-        reader: ColumnReader,
-        batch_size: usize,
-        expected_values: &[Field],
-        expected_def_levels: &[i16],
-        expected_rep_levels: &[i16],
-    ) {
-        let mut iter = TripletIter::new(descr.clone(), reader, batch_size);
-        let mut values: Vec<Field> = Vec::new();
-        let mut def_levels: Vec<i16> = Vec::new();
-        let mut rep_levels: Vec<i16> = Vec::new();
-
-        assert_eq!(iter.max_def_level(), descr.max_def_level());
-        assert_eq!(iter.max_rep_level(), descr.max_rep_level());
-
-        while let Ok(true) = iter.read_next() {
-            assert!(iter.has_next());
-            if !iter.is_null() {
-                values.push(iter.current_value());
-            }
-            def_levels.push(iter.current_def_level());
-            rep_levels.push(iter.current_rep_level());
-        }
-
-        assert_eq!(values, expected_values);
-        assert_eq!(def_levels, expected_def_levels);
-        assert_eq!(rep_levels, expected_rep_levels);
-    }
-}
diff --git a/rust/parquet/src/schema/mod.rs b/rust/parquet/src/schema/mod.rs
deleted file mode 100644
index 1ebee2e06e8..00000000000
--- a/rust/parquet/src/schema/mod.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet schema definitions and methods to print and parse schema.
-//!
-//! # Example
-//!
-//! ```rust
-//! use parquet::{
-//!     basic::{ConvertedType, Repetition, Type as PhysicalType},
-//!     schema::{parser, printer, types::Type},
-//! };
-//! use std::sync::Arc;
-//!
-//! // Create the following schema:
-//! //
-//! // message schema {
-//! //   OPTIONAL BYTE_ARRAY a (UTF8);
-//! //   REQUIRED INT32 b;
-//! // }
-//!
-//! let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
-//!     .with_converted_type(ConvertedType::UTF8)
-//!     .with_repetition(Repetition::OPTIONAL)
-//!     .build()
-//!     .unwrap();
-//!
-//! let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
-//!     .with_repetition(Repetition::REQUIRED)
-//!     .build()
-//!     .unwrap();
-//!
-//! let schema = Type::group_type_builder("schema")
-//!     .with_fields(&mut vec![Arc::new(field_a), Arc::new(field_b)])
-//!     .build()
-//!     .unwrap();
-//!
-//! let mut buf = Vec::new();
-//!
-//! // Print schema into buffer
-//! printer::print_schema(&mut buf, &schema);
-//!
-//! // Parse schema from the string
-//! let string_schema = String::from_utf8(buf).unwrap();
-//! let parsed_schema = parser::parse_message_type(&string_schema).unwrap();
-//!
-//! assert_eq!(schema, parsed_schema);
-//! ```
-
-pub mod parser;
-pub mod printer;
-pub mod types;
-pub mod visitor;
diff --git a/rust/parquet/src/schema/parser.rs b/rust/parquet/src/schema/parser.rs
deleted file mode 100644
index 3ce347c8745..00000000000
--- a/rust/parquet/src/schema/parser.rs
+++ /dev/null
@@ -1,1241 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet schema parser.
-//! Provides methods to parse and validate string message type into Parquet
-//! [`Type`](crate::schema::types::Type).
-//!
-//! # Example
-//!
-//! ```rust
-//! use parquet::schema::parser::parse_message_type;
-//!
-//! let message_type = "
-//!   message spark_schema {
-//!     OPTIONAL BYTE_ARRAY a (UTF8);
-//!     REQUIRED INT32 b;
-//!     REQUIRED DOUBLE c;
-//!     REQUIRED BOOLEAN d;
-//!     OPTIONAL group e (LIST) {
-//!       REPEATED group list {
-//!         REQUIRED INT32 element;
-//!       }
-//!     }
-//!   }
-//! ";
-//!
-//! let schema = parse_message_type(message_type).expect("Expected valid schema");
-//! println!("{:?}", schema);
-//! ```
-
-use std::sync::Arc;
-
-use crate::basic::{
-    ConvertedType, DecimalType, IntType, LogicalType, Repetition, TimeType, TimeUnit,
-    TimestampType, Type as PhysicalType,
-};
-use crate::errors::{ParquetError, Result};
-use crate::schema::types::{Type, TypePtr};
-
-/// Parses message type as string into a Parquet [`Type`](crate::schema::types::Type)
-/// which, for example, could be used to extract individual columns. Returns Parquet
-/// general error when parsing or validation fails.
-pub fn parse_message_type(message_type: &str) -> Result<Type> {
-    let mut parser = Parser {
-        tokenizer: &mut Tokenizer::from_str(message_type),
-    };
-    parser.parse_message_type()
-}
-
-/// Tokenizer to split message type string into tokens that are separated using characters
-/// defined in `is_schema_delim` method. Tokenizer also preserves delimiters as tokens.
-/// Tokenizer provides Iterator interface to process tokens; it also allows to step back
-/// to reprocess previous tokens.
-struct Tokenizer<'a> {
-    // List of all tokens for a string
-    tokens: Vec<&'a str>,
-    // Current index of vector
-    index: usize,
-}
-
-impl<'a> Tokenizer<'a> {
-    // Create tokenizer from message type string
-    pub fn from_str(string: &'a str) -> Self {
-        let vec = string
-            .split_whitespace()
-            .flat_map(|t| Self::split_token(t))
-            .collect();
-        Tokenizer {
-            tokens: vec,
-            index: 0,
-        }
-    }
-
-    // List of all special characters in schema
-    fn is_schema_delim(c: char) -> bool {
-        c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
-    }
-
-    /// Splits string into tokens; input string can already be token or can contain
-    /// delimiters, e.g. required" -> Vec("required") and
-    /// "(UTF8);" -> Vec("(", "UTF8", ")", ";")
-    fn split_token(string: &str) -> Vec<&str> {
-        let mut buffer: Vec<&str> = Vec::new();
-        let mut tail = string;
-        while let Some(index) = tail.find(Self::is_schema_delim) {
-            let (h, t) = tail.split_at(index);
-            if !h.is_empty() {
-                buffer.push(h);
-            }
-            buffer.push(&t[0..1]);
-            tail = &t[1..];
-        }
-        if !tail.is_empty() {
-            buffer.push(tail);
-        }
-        buffer
-    }
-
-    // Move pointer to a previous element
-    fn backtrack(&mut self) {
-        self.index -= 1;
-    }
-}
-
-impl<'a> Iterator for Tokenizer<'a> {
-    type Item = &'a str;
-
-    fn next(&mut self) -> Option<&'a str> {
-        if self.index < self.tokens.len() {
-            self.index += 1;
-            Some(self.tokens[self.index - 1])
-        } else {
-            None
-        }
-    }
-}
-
-/// Internal Schema parser.
-/// Traverses message type using tokenizer and parses each group/primitive type
-/// recursively.
-struct Parser<'a> {
-    tokenizer: &'a mut Tokenizer<'a>,
-}
-
-// Utility function to assert token on validity.
-fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
-    match token {
-        Some(value) if value == expected => Ok(()),
-        Some(other) => Err(general_err!(
-            "Expected '{}', found token '{}'",
-            expected,
-            other
-        )),
-        None => Err(general_err!(
-            "Expected '{}', but no token found (None)",
-            expected
-        )),
-    }
-}
-
-// Utility function to parse i32 or return general error.
-#[inline]
-fn parse_i32(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<i32> {
-    value
-        .ok_or_else(|| general_err!(not_found_msg))
-        .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
-}
-
-// Utility function to parse boolean or return general error.
-#[inline]
-fn parse_bool(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<bool> {
-    value
-        .ok_or_else(|| general_err!(not_found_msg))
-        .and_then(|v| {
-            v.to_lowercase()
-                .parse::<bool>()
-                .map_err(|_| general_err!(parse_fail_msg))
-        })
-}
-
-// Utility function to parse TimeUnit or return general error.
-fn parse_timeunit(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<TimeUnit> {
-    value
-        .ok_or_else(|| general_err!(not_found_msg))
-        .and_then(|v| match v.to_uppercase().as_str() {
-            "MILLIS" => Ok(TimeUnit::MILLIS(Default::default())),
-            "MICROS" => Ok(TimeUnit::MICROS(Default::default())),
-            "NANOS" => Ok(TimeUnit::NANOS(Default::default())),
-            _ => Err(general_err!(parse_fail_msg)),
-        })
-}
-
-impl<'a> Parser<'a> {
-    // Entry function to parse message type, uses internal tokenizer.
-    fn parse_message_type(&mut self) -> Result<Type> {
-        // Check that message type starts with "message".
-        match self.tokenizer.next() {
-            Some("message") => {
-                let name = self
-                    .tokenizer
-                    .next()
-                    .ok_or_else(|| general_err!("Expected name, found None"))?;
-                let mut fields = self.parse_child_types()?;
-                Type::group_type_builder(name)
-                    .with_fields(&mut fields)
-                    .build()
-            }
-            _ => Err(general_err!("Message type does not start with 'message'")),
-        }
-    }
-
-    // Parses child types for a current group type.
-    // This is only invoked on root and group types.
-    fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
-        assert_token(self.tokenizer.next(), "{")?;
-        let mut vec = Vec::new();
-        while let Some(value) = self.tokenizer.next() {
-            if value == "}" {
-                break;
-            } else {
-                self.tokenizer.backtrack();
-                vec.push(Arc::new(self.add_type()?));
-            }
-        }
-        Ok(vec)
-    }
-
-    fn add_type(&mut self) -> Result<Type> {
-        // Parse repetition
-        let repetition = self
-            .tokenizer
-            .next()
-            .ok_or_else(|| general_err!("Expected repetition, found None"))
-            .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
-
-        match self.tokenizer.next() {
-            Some(group) if group.to_uppercase() == "GROUP" => {
-                self.add_group_type(Some(repetition))
-            }
-            Some(type_string) => {
-                let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
-                self.add_primitive_type(repetition, physical_type)
-            }
-            None => Err(general_err!("Invalid type, could not extract next token")),
-        }
-    }
-
-    fn add_group_type(&mut self, repetition: Option<Repetition>) -> Result<Type> {
-        // Parse name of the group type
-        let name = self
-            .tokenizer
-            .next()
-            .ok_or_else(|| general_err!("Expected name, found None"))?;
-
-        // Parse logical or converted type if exists
-        let (logical_type, converted_type) = if let Some("(") = self.tokenizer.next() {
-            let tpe = self
-                .tokenizer
-                .next()
-                .ok_or_else(|| general_err!("Expected converted type, found None"))
-                .and_then(|v| {
-                    // Try logical type first
-                    let upper = v.to_uppercase();
-                    let logical = upper.parse::<LogicalType>();
-                    match logical {
-                        Ok(logical) => Ok((
-                            Some(logical.clone()),
-                            ConvertedType::from(Some(logical)),
-                        )),
-                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
-                    }
-                })?;
-            assert_token(self.tokenizer.next(), ")")?;
-            tpe
-        } else {
-            self.tokenizer.backtrack();
-            (None, ConvertedType::NONE)
-        };
-
-        // Parse optional id
-        let id = if let Some("=") = self.tokenizer.next() {
-            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
-        } else {
-            self.tokenizer.backtrack();
-            None
-        };
-
-        let mut fields = self.parse_child_types()?;
-        let mut builder = Type::group_type_builder(name)
-            .with_logical_type(logical_type)
-            .with_converted_type(converted_type)
-            .with_fields(&mut fields);
-        if let Some(rep) = repetition {
-            builder = builder.with_repetition(rep);
-        }
-        if let Some(id) = id {
-            builder = builder.with_id(id);
-        }
-        builder.build()
-    }
-
-    fn add_primitive_type(
-        &mut self,
-        repetition: Repetition,
-        physical_type: PhysicalType,
-    ) -> Result<Type> {
-        // Read type length if the type is FIXED_LEN_BYTE_ARRAY.
-        let mut length: i32 = -1;
-        if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
-            assert_token(self.tokenizer.next(), "(")?;
-            length = parse_i32(
-                self.tokenizer.next(),
-                "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
-                "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
-            )?;
-            assert_token(self.tokenizer.next(), ")")?;
-        }
-
-        // Parse name of the primitive type
-        let name = self
-            .tokenizer
-            .next()
-            .ok_or_else(|| general_err!("Expected name, found None"))?;
-
-        // Parse converted type
-        let (logical_type, converted_type, precision, scale) = if let Some("(") =
-            self.tokenizer.next()
-        {
-            let (mut logical, mut converted) = self
-                .tokenizer
-                .next()
-                .ok_or_else(|| {
-                    general_err!("Expected logical or converted type, found None")
-                })
-                .and_then(|v| {
-                    let upper = v.to_uppercase();
-                    let logical = upper.parse::<LogicalType>();
-                    match logical {
-                        Ok(logical) => Ok((
-                            Some(logical.clone()),
-                            ConvertedType::from(Some(logical)),
-                        )),
-                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
-                    }
-                })?;
-
-            // Parse precision and scale for decimals
-            let mut precision: i32 = -1;
-            let mut scale: i32 = -1;
-
-            // Parse the concrete logical type
-            if let Some(tpe) = &logical {
-                match tpe {
-                    LogicalType::DECIMAL(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            precision = parse_i32(
-                                self.tokenizer.next(),
-                                "Expected precision, found None",
-                                "Failed to parse precision for DECIMAL type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                scale = parse_i32(
-                                    self.tokenizer.next(),
-                                    "Expected scale, found None",
-                                    "Failed to parse scale for DECIMAL type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::DECIMAL(DecimalType {
-                                    precision,
-                                    scale,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                scale = 0;
-                                logical = Some(LogicalType::DECIMAL(DecimalType {
-                                    precision,
-                                    scale,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            }
-                        }
-                    }
-                    LogicalType::TIME(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let unit = parse_timeunit(
-                                self.tokenizer.next(),
-                                "Invalid timeunit found",
-                                "Failed to parse timeunit for TIME type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_adjusted_to_u_t_c = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse timezone info for TIME type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::TIME(TimeType {
-                                    unit,
-                                    is_adjusted_to_u_t_c,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
-                        }
-                    }
-                    LogicalType::TIMESTAMP(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let unit = parse_timeunit(
-                                self.tokenizer.next(),
-                                "Invalid timeunit found",
-                                "Failed to parse timeunit for TIMESTAMP type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_adjusted_to_u_t_c = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse timezone info for TIMESTAMP type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::TIMESTAMP(TimestampType {
-                                    unit,
-                                    is_adjusted_to_u_t_c,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
-                        }
-                    }
-                    LogicalType::INTEGER(_) => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let bit_width = parse_i32(
-                                self.tokenizer.next(),
-                                "Invalid bit_width found",
-                                "Failed to parse bit_width for INTEGER type",
-                            )? as i8;
-                            match physical_type {
-                                PhysicalType::INT32 => {
-                                    match bit_width {
-                                        8 | 16 | 32 => {}
-                                        _ => {
-                                            return Err(general_err!("Incorrect bit width {} for INT32", bit_width))
-                                        }
-                                    }
-                                }
-                                PhysicalType::INT64 => {
-                                    if bit_width != 64 {
-                                        return Err(general_err!("Incorrect bit width {} for INT64", bit_width))
-                                    }
-                                }
-                                _ => {
-                                    return Err(general_err!("Logical type INTEGER cannot be used with physical type {}", physical_type))
-                                }
-                            }
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_signed = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse is_signed for INTEGER type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::INTEGER(IntType {
-                                    bit_width,
-                                    is_signed,
-                                }));
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
-                        }
-                    }
-                    _ => {}
-                }
-            } else if converted == ConvertedType::DECIMAL {
-                if let Some("(") = self.tokenizer.next() {
-                    // Parse precision
-                    precision = parse_i32(
-                        self.tokenizer.next(),
-                        "Expected precision, found None",
-                        "Failed to parse precision for DECIMAL type",
-                    )?;
-
-                    // Parse scale
-                    scale = if let Some(",") = self.tokenizer.next() {
-                        parse_i32(
-                            self.tokenizer.next(),
-                            "Expected scale, found None",
-                            "Failed to parse scale for DECIMAL type",
-                        )?
-                    } else {
-                        // Scale is not provided, set it to 0.
-                        self.tokenizer.backtrack();
-                        0
-                    };
-
-                    assert_token(self.tokenizer.next(), ")")?;
-                } else {
-                    self.tokenizer.backtrack();
-                }
-            }
-
-            assert_token(self.tokenizer.next(), ")")?;
-            (logical, converted, precision, scale)
-        } else {
-            self.tokenizer.backtrack();
-            (None, ConvertedType::NONE, -1, -1)
-        };
-
-        // Parse optional id
-        let id = if let Some("=") = self.tokenizer.next() {
-            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
-        } else {
-            self.tokenizer.backtrack();
-            None
-        };
-        assert_token(self.tokenizer.next(), ";")?;
-
-        let mut builder = Type::primitive_type_builder(name, physical_type)
-            .with_repetition(repetition)
-            .with_logical_type(logical_type)
-            .with_converted_type(converted_type)
-            .with_length(length)
-            .with_precision(precision)
-            .with_scale(scale);
-        if let Some(id) = id {
-            builder = builder.with_id(id);
-        }
-        builder.build()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_tokenize_empty_string() {
-        assert_eq!(Tokenizer::from_str("").next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_delimiters() {
-        let mut iter = Tokenizer::from_str(",;{}()=");
-        assert_eq!(iter.next(), Some(","));
-        assert_eq!(iter.next(), Some(";"));
-        assert_eq!(iter.next(), Some("{"));
-        assert_eq!(iter.next(), Some("}"));
-        assert_eq!(iter.next(), Some("("));
-        assert_eq!(iter.next(), Some(")"));
-        assert_eq!(iter.next(), Some("="));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_delimiters_with_whitespaces() {
-        let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
-        assert_eq!(iter.next(), Some(","));
-        assert_eq!(iter.next(), Some(";"));
-        assert_eq!(iter.next(), Some("{"));
-        assert_eq!(iter.next(), Some("}"));
-        assert_eq!(iter.next(), Some("("));
-        assert_eq!(iter.next(), Some(")"));
-        assert_eq!(iter.next(), Some("="));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_words() {
-        let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
-        assert_eq!(iter.next(), Some("abc"));
-        assert_eq!(iter.next(), Some("def"));
-        assert_eq!(iter.next(), Some("ghi"));
-        assert_eq!(iter.next(), Some("jkl"));
-        assert_eq!(iter.next(), Some("mno"));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_backtrack() {
-        let mut iter = Tokenizer::from_str("abc;");
-        assert_eq!(iter.next(), Some("abc"));
-        assert_eq!(iter.next(), Some(";"));
-        iter.backtrack();
-        assert_eq!(iter.next(), Some(";"));
-        assert_eq!(iter.next(), None);
-    }
-
-    #[test]
-    fn test_tokenize_message_type() {
-        let schema = "
-    message schema {
-      required int32 a;
-      optional binary c (UTF8);
-      required group d {
-        required int32 a;
-        optional binary c (UTF8);
-      }
-      required group e (LIST) {
-        repeated group list {
-          required int32 element;
-        }
-      }
-    }
-    ";
-        let iter = Tokenizer::from_str(schema);
-        let mut res = Vec::new();
-        for token in iter {
-            res.push(token);
-        }
-        assert_eq!(
-            res,
-            vec![
-                "message", "schema", "{", "required", "int32", "a", ";", "optional",
-                "binary", "c", "(", "UTF8", ")", ";", "required", "group", "d", "{",
-                "required", "int32", "a", ";", "optional", "binary", "c", "(", "UTF8",
-                ")", ";", "}", "required", "group", "e", "(", "LIST", ")", "{",
-                "repeated", "group", "list", "{", "required", "int32", "element", ";",
-                "}", "}", "}"
-            ]
-        );
-    }
-
-    #[test]
-    fn test_assert_token() {
-        assert!(assert_token(Some("a"), "a").is_ok());
-        assert!(assert_token(Some("a"), "b").is_err());
-        assert!(assert_token(None, "b").is_err());
-    }
-
-    #[test]
-    fn test_parse_message_type_invalid() {
-        let mut iter = Tokenizer::from_str("test");
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Parquet error: Message type does not start with 'message'"
-        );
-    }
-
-    #[test]
-    fn test_parse_message_type_no_name() {
-        let mut iter = Tokenizer::from_str("message");
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-        assert_eq!(
-            result.unwrap_err().to_string(),
-            "Parquet error: Expected name, found None"
-        );
-    }
-
-    #[test]
-    fn test_parse_message_type_fixed_byte_array() {
-        let schema = "
-    message schema {
-      REQUIRED FIXED_LEN_BYTE_ARRAY col;
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        let schema = "
-    message schema {
-      REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_integer() {
-        // Invalid integer syntax
-        let schema = "
-    message root {
-      optional int64 f1 (INTEGER());
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse bit_width for INTEGER type"))
-        );
-
-        // Invalid integer syntax, needs both bit-width and UTC sign
-        let schema = "
-    message root {
-      optional int64 f1 (INTEGER(32,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Incorrect bit width 32 for INT64"))
-        );
-
-        // Invalid integer because of non-numeric bit width
-        let schema = "
-    message root {
-      optional int32 f1 (INTEGER(eight,true));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse bit_width for INTEGER type"))
-        );
-
-        // Valid types
-        let schema = "
-    message root {
-      optional int32 f1 (INTEGER(8,false));
-      optional int32 f2 (INTEGER(8,true));
-      optional int32 f3 (INTEGER(16,false));
-      optional int32 f4 (INTEGER(16,true));
-      optional int32 f5 (INTEGER(32,false));
-      optional int32 f6 (INTEGER(32,true));
-      optional int64 f7 (INTEGER(64,false));
-      optional int64 f7 (INTEGER(64,true));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_temporal() {
-        // Invalid timestamp syntax
-        let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP();
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
-        );
-
-        // Invalid timestamp syntax, needs both unit and UTC adjustment
-        let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP(MILLIS,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!(
-                "Failed to parse timezone info for TIMESTAMP type"
-            ))
-        );
-
-        // Invalid timestamp because of unknown unit
-        let schema = "
-    message root {
-      optional int64 f1 (TIMESTAMP(YOCTOS,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert_eq!(
-            result,
-            Err(general_err!("Failed to parse timeunit for TIMESTAMP type"))
-        );
-
-        // Valid types
-        let schema = "
-    message root {
-      optional int32 f1 (DATE);
-      optional int32 f2 (TIME(MILLIS,true));
-      optional int64 f3 (TIME(MICROS,false));
-      optional int64 f4 (TIME(NANOS,true));
-      optional int64 f5 (TIMESTAMP(MILLIS,true));
-      optional int64 f6 (TIMESTAMP(MICROS,true));
-      optional int64 f7 (TIMESTAMP(NANOS,false));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_decimal() {
-        // It is okay for decimal to omit precision and scale with right syntax.
-        // Here we test wrong syntax of decimal type
-
-        // Invalid decimal syntax
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL();
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Invalid decimal, need precision and scale
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL());
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Invalid decimal because of `,` - has precision, needs scale
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL(8,));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Invalid decimal because, we always require either precision or scale to be
-        // specified as part of converted type
-        let schema = "
-    message root {
-      optional int32 f3 (DECIMAL);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_err());
-
-        // Valid decimal (precision, scale)
-        let schema = "
-    message root {
-      optional int32 f1 (DECIMAL(8, 3));
-      optional int32 f2 (DECIMAL(8));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let result = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type();
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_1() {
-        let schema = "
-    message root {
-      optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
-      optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut vec![
-                Arc::new(
-                    Type::primitive_type_builder(
-                        "f1",
-                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                    )
-                    .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                        precision: 9,
-                        scale: 3,
-                    })))
-                    .with_converted_type(ConvertedType::DECIMAL)
-                    .with_length(5)
-                    .with_precision(9)
-                    .with_scale(3)
-                    .build()
-                    .unwrap(),
-                ),
-                Arc::new(
-                    Type::primitive_type_builder(
-                        "f2",
-                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                    )
-                    .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                        precision: 38,
-                        scale: 18,
-                    })))
-                    .with_converted_type(ConvertedType::DECIMAL)
-                    .with_length(16)
-                    .with_precision(38)
-                    .with_scale(18)
-                    .build()
-                    .unwrap(),
-                ),
-            ])
-            .build()
-            .unwrap();
-
-        assert_eq!(message, expected);
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_2() {
-        let schema = "
-    message root {
-      required group a0 {
-        optional group a1 (LIST) {
-          repeated binary a2 (UTF8);
-        }
-
-        optional group b1 (LIST) {
-          repeated group b2 {
-            optional int32 b3;
-            optional double b4;
-          }
-        }
-      }
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut vec![Arc::new(
-                Type::group_type_builder("a0")
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_fields(&mut vec![
-                        Arc::new(
-                            Type::group_type_builder("a1")
-                                .with_repetition(Repetition::OPTIONAL)
-                                .with_logical_type(Some(LogicalType::LIST(
-                                    Default::default(),
-                                )))
-                                .with_converted_type(ConvertedType::LIST)
-                                .with_fields(&mut vec![Arc::new(
-                                    Type::primitive_type_builder(
-                                        "a2",
-                                        PhysicalType::BYTE_ARRAY,
-                                    )
-                                    .with_repetition(Repetition::REPEATED)
-                                    .with_converted_type(ConvertedType::UTF8)
-                                    .build()
-                                    .unwrap(),
-                                )])
-                                .build()
-                                .unwrap(),
-                        ),
-                        Arc::new(
-                            Type::group_type_builder("b1")
-                                .with_repetition(Repetition::OPTIONAL)
-                                .with_logical_type(Some(LogicalType::LIST(
-                                    Default::default(),
-                                )))
-                                .with_converted_type(ConvertedType::LIST)
-                                .with_fields(&mut vec![Arc::new(
-                                    Type::group_type_builder("b2")
-                                        .with_repetition(Repetition::REPEATED)
-                                        .with_fields(&mut vec![
-                                            Arc::new(
-                                                Type::primitive_type_builder(
-                                                    "b3",
-                                                    PhysicalType::INT32,
-                                                )
-                                                .build()
-                                                .unwrap(),
-                                            ),
-                                            Arc::new(
-                                                Type::primitive_type_builder(
-                                                    "b4",
-                                                    PhysicalType::DOUBLE,
-                                                )
-                                                .build()
-                                                .unwrap(),
-                                            ),
-                                        ])
-                                        .build()
-                                        .unwrap(),
-                                )])
-                                .build()
-                                .unwrap(),
-                        ),
-                    ])
-                    .build()
-                    .unwrap(),
-            )])
-            .build()
-            .unwrap();
-
-        assert_eq!(message, expected);
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_3() {
-        let schema = "
-    message root {
-      required int32 _1 (INT_8);
-      required int32 _2 (INT_16);
-      required float _3;
-      required double _4;
-      optional int32 _5 (DATE);
-      optional binary _6 (UTF8);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let mut fields = vec![
-            Arc::new(
-                Type::primitive_type_builder("_1", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_converted_type(ConvertedType::INT_8)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_2", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_converted_type(ConvertedType::INT_16)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_3", PhysicalType::FLOAT)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_5", PhysicalType::INT32)
-                    .with_logical_type(Some(LogicalType::DATE(Default::default())))
-                    .with_converted_type(ConvertedType::DATE)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_6", PhysicalType::BYTE_ARRAY)
-                    .with_converted_type(ConvertedType::UTF8)
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut fields)
-            .build()
-            .unwrap();
-        assert_eq!(message, expected);
-    }
-
-    #[test]
-    fn test_parse_message_type_compare_4() {
-        let schema = "
-    message root {
-      required int32 _1 (INTEGER(8,true));
-      required int32 _2 (INTEGER(16,false));
-      required float _3;
-      required double _4;
-      optional int32 _5 (DATE);
-      optional int32 _6 (TIME(MILLIS,false));
-      optional int64 _7 (TIME(MICROS,true));
-      optional int64 _8 (TIMESTAMP(MILLIS,true));
-      optional int64 _9 (TIMESTAMP(NANOS,false));
-      optional binary _10 (STRING);
-    }
-    ";
-        let mut iter = Tokenizer::from_str(schema);
-        let message = Parser {
-            tokenizer: &mut iter,
-        }
-        .parse_message_type()
-        .unwrap();
-
-        let mut fields = vec![
-            Arc::new(
-                Type::primitive_type_builder("_1", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                        bit_width: 8,
-                        is_signed: true,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_2", PhysicalType::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                        bit_width: 16,
-                        is_signed: false,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_3", PhysicalType::FLOAT)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_5", PhysicalType::INT32)
-                    .with_logical_type(Some(LogicalType::DATE(Default::default())))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_6", PhysicalType::INT32)
-                    .with_logical_type(Some(LogicalType::TIME(TimeType {
-                        unit: TimeUnit::MILLIS(Default::default()),
-                        is_adjusted_to_u_t_c: false,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_7", PhysicalType::INT64)
-                    .with_logical_type(Some(LogicalType::TIME(TimeType {
-                        unit: TimeUnit::MICROS(Default::default()),
-                        is_adjusted_to_u_t_c: true,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_8", PhysicalType::INT64)
-                    .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
-                        unit: TimeUnit::MILLIS(Default::default()),
-                        is_adjusted_to_u_t_c: true,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_9", PhysicalType::INT64)
-                    .with_logical_type(Some(LogicalType::TIMESTAMP(TimestampType {
-                        unit: TimeUnit::NANOS(Default::default()),
-                        is_adjusted_to_u_t_c: false,
-                    })))
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                Type::primitive_type_builder("_10", PhysicalType::BYTE_ARRAY)
-                    .with_logical_type(Some(LogicalType::STRING(Default::default())))
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-
-        let expected = Type::group_type_builder("root")
-            .with_fields(&mut fields)
-            .build()
-            .unwrap();
-        assert_eq!(message, expected);
-    }
-}
diff --git a/rust/parquet/src/schema/printer.rs b/rust/parquet/src/schema/printer.rs
deleted file mode 100644
index b1e739f77b4..00000000000
--- a/rust/parquet/src/schema/printer.rs
+++ /dev/null
@@ -1,827 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet schema printer.
-//! Provides methods to print Parquet file schema and list file metadata.
-//!
-//! # Example
-//!
-//! ```rust
-//! use parquet::{
-//!     file::reader::{FileReader, SerializedFileReader},
-//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
-//! };
-//! use std::{fs::File, path::Path};
-//!
-//! // Open a file
-//! let path = Path::new("test.parquet");
-//! if let Ok(file) = File::open(&path) {
-//!     let reader = SerializedFileReader::new(file).unwrap();
-//!     let parquet_metadata = reader.metadata();
-//!
-//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
-//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
-//!
-//!     print_schema(
-//!         &mut std::io::stdout(),
-//!         &parquet_metadata.file_metadata().schema(),
-//!     );
-//! }
-//! ```
-
-use std::{fmt, io};
-
-use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
-use crate::file::metadata::{
-    ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData,
-};
-use crate::schema::types::Type;
-
-/// Prints Parquet metadata [`ParquetMetaData`](crate::file::metadata::ParquetMetaData)
-/// information.
-#[allow(unused_must_use)]
-pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
-    print_file_metadata(out, &metadata.file_metadata());
-    writeln!(out);
-    writeln!(out);
-    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
-    writeln!(out, "row groups:");
-    writeln!(out);
-    for (i, rg) in metadata.row_groups().iter().enumerate() {
-        writeln!(out, "row group {}:", i);
-        print_dashes(out, 80);
-        print_row_group_metadata(out, rg);
-    }
-}
-
-/// Prints file metadata [`FileMetaData`](crate::file::metadata::FileMetaData)
-/// information.
-#[allow(unused_must_use)]
-pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
-    writeln!(out, "version: {}", file_metadata.version());
-    writeln!(out, "num of rows: {}", file_metadata.num_rows());
-    if let Some(created_by) = file_metadata.created_by().as_ref() {
-        writeln!(out, "created by: {}", created_by);
-    }
-    if let Some(metadata) = file_metadata.key_value_metadata() {
-        writeln!(out, "metadata:");
-        for kv in metadata.iter() {
-            writeln!(
-                out,
-                "  {}: {}",
-                &kv.key,
-                kv.value.as_ref().unwrap_or(&"".to_owned())
-            );
-        }
-    }
-    let schema = file_metadata.schema();
-    print_schema(out, schema);
-}
-
-/// Prints Parquet [`Type`](crate::schema::types::Type) information.
-#[allow(unused_must_use)]
-pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
-    // TODO: better if we can pass fmt::Write to Printer.
-    // But how can we make it to accept both io::Write & fmt::Write?
-    let mut s = String::new();
-    {
-        let mut printer = Printer::new(&mut s);
-        printer.print(tp);
-    }
-    writeln!(out, "{}", s);
-}
-
-#[allow(unused_must_use)]
-fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
-    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
-    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
-    writeln!(out);
-    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
-    writeln!(out, "columns: ");
-    for (i, cc) in rg_metadata.columns().iter().enumerate() {
-        writeln!(out);
-        writeln!(out, "column {}:", i);
-        print_dashes(out, 80);
-        print_column_chunk_metadata(out, cc);
-    }
-}
-
-#[allow(unused_must_use)]
-fn print_column_chunk_metadata(
-    out: &mut dyn io::Write,
-    cc_metadata: &ColumnChunkMetaData,
-) {
-    writeln!(out, "column type: {}", cc_metadata.column_type());
-    writeln!(out, "column path: {}", cc_metadata.column_path());
-    let encoding_strs: Vec<_> = cc_metadata
-        .encodings()
-        .iter()
-        .map(|e| format!("{}", e))
-        .collect();
-    writeln!(out, "encodings: {}", encoding_strs.join(" "));
-    let file_path_str = match cc_metadata.file_path() {
-        None => "N/A",
-        Some(ref fp) => *fp,
-    };
-    writeln!(out, "file path: {}", file_path_str);
-    writeln!(out, "file offset: {}", cc_metadata.file_offset());
-    writeln!(out, "num of values: {}", cc_metadata.num_values());
-    writeln!(
-        out,
-        "total compressed size (in bytes): {}",
-        cc_metadata.compressed_size()
-    );
-    writeln!(
-        out,
-        "total uncompressed size (in bytes): {}",
-        cc_metadata.uncompressed_size()
-    );
-    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
-    let index_page_offset_str = match cc_metadata.index_page_offset() {
-        None => "N/A".to_owned(),
-        Some(ipo) => ipo.to_string(),
-    };
-    writeln!(out, "index page offset: {}", index_page_offset_str);
-    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
-        None => "N/A".to_owned(),
-        Some(dpo) => dpo.to_string(),
-    };
-    writeln!(out, "dictionary page offset: {}", dict_page_offset_str);
-    let statistics_str = match cc_metadata.statistics() {
-        None => "N/A".to_owned(),
-        Some(stats) => stats.to_string(),
-    };
-    writeln!(out, "statistics: {}", statistics_str);
-    writeln!(out);
-}
-
-#[allow(unused_must_use)]
-fn print_dashes(out: &mut dyn io::Write, num: i32) {
-    for _ in 0..num {
-        write!(out, "-");
-    }
-    writeln!(out);
-}
-
-const INDENT_WIDTH: i32 = 2;
-
-/// Struct for printing Parquet message type.
-struct Printer<'a> {
-    output: &'a mut dyn fmt::Write,
-    indent: i32,
-}
-
-#[allow(unused_must_use)]
-impl<'a> Printer<'a> {
-    fn new(output: &'a mut dyn fmt::Write) -> Self {
-        Printer { output, indent: 0 }
-    }
-
-    fn print_indent(&mut self) {
-        for _ in 0..self.indent {
-            write!(self.output, " ");
-        }
-    }
-}
-
-#[inline]
-fn print_timeunit(unit: &TimeUnit) -> &str {
-    match unit {
-        TimeUnit::MILLIS(_) => "MILLIS",
-        TimeUnit::MICROS(_) => "MICROS",
-        TimeUnit::NANOS(_) => "NANOS",
-    }
-}
-
-#[inline]
-fn print_logical_and_converted(
-    logical_type: &Option<LogicalType>,
-    converted_type: ConvertedType,
-    precision: i32,
-    scale: i32,
-) -> String {
-    match logical_type {
-        Some(logical_type) => match logical_type {
-            LogicalType::INTEGER(t) => {
-                format!("INTEGER({},{})", t.bit_width, t.is_signed)
-            }
-            LogicalType::DECIMAL(t) => {
-                format!("DECIMAL({},{})", t.precision, t.scale)
-            }
-            LogicalType::TIMESTAMP(t) => {
-                format!(
-                    "TIMESTAMP({},{})",
-                    print_timeunit(&t.unit),
-                    t.is_adjusted_to_u_t_c
-                )
-            }
-            LogicalType::TIME(t) => {
-                format!(
-                    "TIME({},{})",
-                    print_timeunit(&t.unit),
-                    t.is_adjusted_to_u_t_c
-                )
-            }
-            LogicalType::DATE(_) => "DATE".to_string(),
-            LogicalType::BSON(_) => "BSON".to_string(),
-            LogicalType::JSON(_) => "JSON".to_string(),
-            LogicalType::STRING(_) => "STRING".to_string(),
-            LogicalType::UUID(_) => "UUID".to_string(),
-            LogicalType::ENUM(_) => "ENUM".to_string(),
-            LogicalType::LIST(_) => "LIST".to_string(),
-            LogicalType::MAP(_) => "MAP".to_string(),
-            LogicalType::UNKNOWN(_) => "UNKNOWN".to_string(),
-        },
-        None => {
-            // Also print converted type if it is available
-            match converted_type {
-                ConvertedType::NONE => format!(""),
-                decimal @ ConvertedType::DECIMAL => {
-                    // For decimal type we should print precision and scale if they
-                    // are > 0, e.g. DECIMAL(9, 2) -
-                    // DECIMAL(9) - DECIMAL
-                    let precision_scale = match (precision, scale) {
-                        (p, s) if p > 0 && s > 0 => {
-                            format!("{}, {}", p, s)
-                        }
-                        (p, 0) if p > 0 => format!("{}", p),
-                        _ => format!(""),
-                    };
-                    format!("{}{}", decimal, precision_scale)
-                }
-                other_converted_type => {
-                    format!("{}", other_converted_type)
-                }
-            }
-        }
-    }
-}
-
-#[allow(unused_must_use)]
-impl<'a> Printer<'a> {
-    pub fn print(&mut self, tp: &Type) {
-        self.print_indent();
-        match *tp {
-            Type::PrimitiveType {
-                ref basic_info,
-                physical_type,
-                type_length,
-                scale,
-                precision,
-            } => {
-                let phys_type_str = match physical_type {
-                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                        // We need to include length for fixed byte array
-                        format!("{} ({})", physical_type, type_length)
-                    }
-                    _ => format!("{}", physical_type),
-                };
-                // Also print logical type if it is available
-                // If there is a logical type, do not print converted type
-                let logical_type_str = print_logical_and_converted(
-                    &basic_info.logical_type(),
-                    basic_info.converted_type(),
-                    scale,
-                    precision,
-                );
-                if logical_type_str.is_empty() {
-                    write!(
-                        self.output,
-                        "{} {} {};",
-                        basic_info.repetition(),
-                        phys_type_str,
-                        basic_info.name()
-                    );
-                } else {
-                    write!(
-                        self.output,
-                        "{} {} {} ({});",
-                        basic_info.repetition(),
-                        phys_type_str,
-                        basic_info.name(),
-                        logical_type_str
-                    );
-                }
-            }
-            Type::GroupType {
-                ref basic_info,
-                ref fields,
-            } => {
-                if basic_info.has_repetition() {
-                    let r = basic_info.repetition();
-                    write!(self.output, "{} group {} ", r, basic_info.name());
-                    let logical_str = print_logical_and_converted(
-                        &basic_info.logical_type(),
-                        basic_info.converted_type(),
-                        0,
-                        0,
-                    );
-                    if !logical_str.is_empty() {
-                        write!(self.output, "({}) ", logical_str);
-                    }
-                    writeln!(self.output, "{{");
-                } else {
-                    writeln!(self.output, "message {} {{", basic_info.name());
-                }
-
-                self.indent += INDENT_WIDTH;
-                for c in fields {
-                    self.print(&c);
-                    writeln!(self.output);
-                }
-                self.indent -= INDENT_WIDTH;
-                self.print_indent();
-                write!(self.output, "}}");
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::sync::Arc;
-
-    use crate::basic::{
-        DateType, DecimalType, IntType, LogicalType, Repetition, TimeType, TimestampType,
-        Type as PhysicalType,
-    };
-    use crate::errors::Result;
-    use crate::schema::{parser::parse_message_type, types::Type};
-
-    fn assert_print_parse_message(message: Type) {
-        let mut s = String::new();
-        {
-            let mut p = Printer::new(&mut s);
-            p.print(&message);
-        }
-        println!("{}", &s);
-        let parsed = parse_message_type(&s).unwrap();
-        assert_eq!(message, parsed);
-    }
-
-    #[test]
-    fn test_print_primitive_type() {
-        let mut s = String::new();
-        {
-            let mut p = Printer::new(&mut s);
-            let field = Type::primitive_type_builder("field", PhysicalType::INT32)
-                .with_repetition(Repetition::REQUIRED)
-                .with_converted_type(ConvertedType::INT_32)
-                .build()
-                .unwrap();
-            p.print(&field);
-        }
-        assert_eq!(&mut s, "REQUIRED INT32 field (INT_32);");
-    }
-
-    #[inline]
-    fn build_primitive_type(
-        name: &str,
-        physical_type: PhysicalType,
-        logical_type: Option<LogicalType>,
-        converted_type: ConvertedType,
-        repetition: Repetition,
-    ) -> Result<Type> {
-        Type::primitive_type_builder(name, physical_type)
-            .with_repetition(repetition)
-            .with_logical_type(logical_type)
-            .with_converted_type(converted_type)
-            .build()
-    }
-
-    #[test]
-    fn test_print_logical_types() {
-        let types_and_strings = vec![
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::INTEGER(IntType {
-                        bit_width: 32,
-                        is_signed: true,
-                    })),
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED INT32 field (INTEGER(32,true));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::INTEGER(IntType {
-                        bit_width: 8,
-                        is_signed: false,
-                    })),
-                    ConvertedType::NONE,
-                    Repetition::OPTIONAL,
-                )
-                .unwrap(),
-                "OPTIONAL INT32 field (INTEGER(8,false));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::INTEGER(IntType {
-                        bit_width: 16,
-                        is_signed: true,
-                    })),
-                    ConvertedType::INT_16,
-                    Repetition::REPEATED,
-                )
-                .unwrap(),
-                "REPEATED INT32 field (INTEGER(16,true));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT64,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::REPEATED,
-                )
-                .unwrap(),
-                "REPEATED INT64 field;",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::FLOAT,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED FLOAT field;",
-            ),
-            (
-                build_primitive_type(
-                    "booleans",
-                    PhysicalType::BOOLEAN,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::OPTIONAL,
-                )
-                .unwrap(),
-                "OPTIONAL BOOLEAN booleans;",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT64,
-                    Some(LogicalType::TIMESTAMP(TimestampType {
-                        is_adjusted_to_u_t_c: true,
-                        unit: TimeUnit::MILLIS(Default::default()),
-                    })),
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::DATE(DateType {})),
-                    ConvertedType::NONE,
-                    Repetition::OPTIONAL,
-                )
-                .unwrap(),
-                "OPTIONAL INT32 field (DATE);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::INT32,
-                    Some(LogicalType::TIME(TimeType {
-                        unit: TimeUnit::MILLIS(Default::default()),
-                        is_adjusted_to_u_t_c: false,
-                    })),
-                    ConvertedType::TIME_MILLIS,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED INT32 field (TIME(MILLIS,false));",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    None,
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field;",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    None,
-                    ConvertedType::UTF8,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (UTF8);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    Some(LogicalType::JSON(Default::default())),
-                    ConvertedType::JSON,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (JSON);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    Some(LogicalType::BSON(Default::default())),
-                    ConvertedType::BSON,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (BSON);",
-            ),
-            (
-                build_primitive_type(
-                    "field",
-                    PhysicalType::BYTE_ARRAY,
-                    Some(LogicalType::STRING(Default::default())),
-                    ConvertedType::NONE,
-                    Repetition::REQUIRED,
-                )
-                .unwrap(),
-                "REQUIRED BYTE_ARRAY field (STRING);",
-            ),
-        ];
-
-        types_and_strings.into_iter().for_each(|(field, expected)| {
-            let mut s = String::new();
-            {
-                let mut p = Printer::new(&mut s);
-                p.print(&field);
-            }
-            assert_eq!(&s, expected)
-        });
-    }
-
-    #[inline]
-    fn decimal_length_from_precision(precision: usize) -> i32 {
-        (10.0_f64.powi(precision as i32).log2() / 8.0).ceil() as i32
-    }
-
-    #[test]
-    fn test_print_flba_logical_types() {
-        let types_and_strings = vec![
-            (
-                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_logical_type(None)
-                    .with_converted_type(ConvertedType::INTERVAL)
-                    .with_length(12)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
-            ),
-            (
-                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_logical_type(Some(LogicalType::UUID(Default::default())))
-                    .with_length(16)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
-            ),
-            (
-                Type::primitive_type_builder(
-                    "decimal",
-                    PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                )
-                .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                    precision: 32,
-                    scale: 20,
-                })))
-                .with_precision(32)
-                .with_scale(20)
-                .with_length(decimal_length_from_precision(32))
-                .with_repetition(Repetition::REPEATED)
-                .build()
-                .unwrap(),
-                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
-            ),
-        ];
-
-        types_and_strings.into_iter().for_each(|(field, expected)| {
-            let mut s = String::new();
-            {
-                let mut p = Printer::new(&mut s);
-                p.print(&field);
-            }
-            assert_eq!(&s, expected)
-        });
-    }
-
-    #[test]
-    fn test_print_group_type() {
-        let mut s = String::new();
-        {
-            let mut p = Printer::new(&mut s);
-            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-                .with_repetition(Repetition::REQUIRED)
-                .with_converted_type(ConvertedType::INT_32)
-                .with_id(0)
-                .build();
-            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
-                .with_converted_type(ConvertedType::UTF8)
-                .with_id(1)
-                .build();
-            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
-                .with_logical_type(Some(LogicalType::STRING(Default::default())))
-                .with_id(1)
-                .build();
-            let f4 =
-                Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_repetition(Repetition::REPEATED)
-                    .with_converted_type(ConvertedType::INTERVAL)
-                    .with_length(12)
-                    .with_id(2)
-                    .build();
-            let mut struct_fields = Vec::new();
-            struct_fields.push(Arc::new(f1.unwrap()));
-            struct_fields.push(Arc::new(f2.unwrap()));
-            struct_fields.push(Arc::new(f3.unwrap()));
-            let field = Type::group_type_builder("field")
-                .with_repetition(Repetition::OPTIONAL)
-                .with_fields(&mut struct_fields)
-                .with_id(1)
-                .build()
-                .unwrap();
-            let mut fields = Vec::new();
-            fields.push(Arc::new(field));
-            fields.push(Arc::new(f4.unwrap()));
-            let message = Type::group_type_builder("schema")
-                .with_fields(&mut fields)
-                .with_id(2)
-                .build()
-                .unwrap();
-            p.print(&message);
-        }
-        let expected = "message schema {
-  OPTIONAL group field {
-    REQUIRED INT32 f1 (INT_32);
-    OPTIONAL BYTE_ARRAY f2 (UTF8);
-    OPTIONAL BYTE_ARRAY f3 (STRING);
-  }
-  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
-}";
-        assert_eq!(&mut s, expected);
-    }
-
-    #[test]
-    fn test_print_and_parse_primitive() {
-        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()
-            .unwrap();
-
-        let a1 = Type::group_type_builder("a1")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::LIST(Default::default())))
-            .with_converted_type(ConvertedType::LIST)
-            .with_fields(&mut vec![Arc::new(a2)])
-            .build()
-            .unwrap();
-
-        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-
-        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-
-        let b2 = Type::group_type_builder("b2")
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::NONE)
-            .with_fields(&mut vec![Arc::new(b3), Arc::new(b4)])
-            .build()
-            .unwrap();
-
-        let b1 = Type::group_type_builder("b1")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::LIST(Default::default())))
-            .with_converted_type(ConvertedType::LIST)
-            .with_fields(&mut vec![Arc::new(b2)])
-            .build()
-            .unwrap();
-
-        let a0 = Type::group_type_builder("a0")
-            .with_repetition(Repetition::REQUIRED)
-            .with_fields(&mut vec![Arc::new(a1), Arc::new(b1)])
-            .build()
-            .unwrap();
-
-        let message = Type::group_type_builder("root")
-            .with_fields(&mut vec![Arc::new(a0)])
-            .build()
-            .unwrap();
-
-        assert_print_parse_message(message);
-    }
-
-    #[test]
-    fn test_print_and_parse_nested() {
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_32)
-            .build()
-            .unwrap();
-
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()
-            .unwrap();
-
-        let field = Type::group_type_builder("field")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_fields(&mut vec![Arc::new(f1), Arc::new(f2)])
-            .build()
-            .unwrap();
-
-        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::INTERVAL)
-            .with_length(12)
-            .build()
-            .unwrap();
-
-        let message = Type::group_type_builder("schema")
-            .with_fields(&mut vec![Arc::new(field), Arc::new(f3)])
-            .build()
-            .unwrap();
-
-        assert_print_parse_message(message);
-    }
-
-    #[test]
-    fn test_print_and_parse_decimal() {
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                precision: 9,
-                scale: 2,
-            })))
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(9)
-            .with_scale(2)
-            .build()
-            .unwrap();
-
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                precision: 9,
-                scale: 0,
-            })))
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(9)
-            .with_scale(0)
-            .build()
-            .unwrap();
-
-        let message = Type::group_type_builder("schema")
-            .with_fields(&mut vec![Arc::new(f1), Arc::new(f2)])
-            .build()
-            .unwrap();
-
-        assert_print_parse_message(message);
-    }
-}
diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs
deleted file mode 100644
index 03b2500a3cd..00000000000
--- a/rust/parquet/src/schema/types.rs
+++ /dev/null
@@ -1,2078 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains structs and methods to build Parquet schema and schema descriptors.
-
-use std::{collections::HashMap, convert::From, fmt, sync::Arc};
-
-use parquet_format::SchemaElement;
-
-use crate::basic::{
-    ConvertedType, LogicalType, Repetition, TimeType, TimeUnit, Type as PhysicalType,
-};
-use crate::errors::{ParquetError, Result};
-
-// ----------------------------------------------------------------------
-// Parquet Type definitions
-
-/// Type alias for `Arc<Type>`.
-pub type TypePtr = Arc<Type>;
-/// Type alias for `Arc<SchemaDescriptor>`.
-pub type SchemaDescPtr = Arc<SchemaDescriptor>;
-/// Type alias for `Arc<ColumnDescriptor>`.
-pub type ColumnDescPtr = Arc<ColumnDescriptor>;
-
-/// Representation of a Parquet type.
-/// Used to describe primitive leaf fields and structs, including top-level schema.
-/// Note that the top-level schema type is represented using `GroupType` whose
-/// repetition is `None`.
-#[derive(Clone, Debug, PartialEq)]
-pub enum Type {
-    PrimitiveType {
-        basic_info: BasicTypeInfo,
-        physical_type: PhysicalType,
-        type_length: i32,
-        scale: i32,
-        precision: i32,
-    },
-    GroupType {
-        basic_info: BasicTypeInfo,
-        fields: Vec<TypePtr>,
-    },
-}
-
-impl Type {
-    /// Creates primitive type builder with provided field name and physical type.
-    pub fn primitive_type_builder(
-        name: &str,
-        physical_type: PhysicalType,
-    ) -> PrimitiveTypeBuilder {
-        PrimitiveTypeBuilder::new(name, physical_type)
-    }
-
-    /// Creates group type builder with provided column name.
-    pub fn group_type_builder(name: &str) -> GroupTypeBuilder {
-        GroupTypeBuilder::new(name)
-    }
-
-    /// Returns [`BasicTypeInfo`] information about the type.
-    pub fn get_basic_info(&self) -> &BasicTypeInfo {
-        match *self {
-            Type::PrimitiveType { ref basic_info, .. } => &basic_info,
-            Type::GroupType { ref basic_info, .. } => &basic_info,
-        }
-    }
-
-    /// Returns this type's field name.
-    pub fn name(&self) -> &str {
-        self.get_basic_info().name()
-    }
-
-    /// Gets the fields from this group type.
-    /// Note that this will panic if called on a non-group type.
-    // TODO: should we return `&[&Type]` here?
-    pub fn get_fields(&self) -> &[TypePtr] {
-        match *self {
-            Type::GroupType { ref fields, .. } => &fields[..],
-            _ => panic!("Cannot call get_fields() on a non-group type"),
-        }
-    }
-
-    /// Gets physical type of this primitive type.
-    /// Note that this will panic if called on a non-primitive type.
-    pub fn get_physical_type(&self) -> PhysicalType {
-        match *self {
-            Type::PrimitiveType {
-                basic_info: _,
-                physical_type,
-                ..
-            } => physical_type,
-            _ => panic!("Cannot call get_physical_type() on a non-primitive type"),
-        }
-    }
-
-    /// Gets precision of this primitive type.
-    /// Note that this will panic if called on a non-primitive type.
-    pub fn get_precision(&self) -> i32 {
-        match *self {
-            Type::PrimitiveType { precision, .. } => precision,
-            _ => panic!("Cannot call get_precision() on non-primitive type"),
-        }
-    }
-
-    /// Gets scale of this primitive type.
-    /// Note that this will panic if called on a non-primitive type.
-    pub fn get_scale(&self) -> i32 {
-        match *self {
-            Type::PrimitiveType { scale, .. } => scale,
-            _ => panic!("Cannot call get_scale() on non-primitive type"),
-        }
-    }
-
-    /// Checks if `sub_type` schema is part of current schema.
-    /// This method can be used to check if projected columns are part of the root schema.
-    pub fn check_contains(&self, sub_type: &Type) -> bool {
-        // Names match, and repetitions match or not set for both
-        let basic_match = self.get_basic_info().name()
-            == sub_type.get_basic_info().name()
-            && (self.is_schema() && sub_type.is_schema()
-                || !self.is_schema()
-                    && !sub_type.is_schema()
-                    && self.get_basic_info().repetition()
-                        == sub_type.get_basic_info().repetition());
-
-        match *self {
-            Type::PrimitiveType { .. } if basic_match && sub_type.is_primitive() => {
-                self.get_physical_type() == sub_type.get_physical_type()
-            }
-            Type::GroupType { .. } if basic_match && sub_type.is_group() => {
-                // build hashmap of name -> TypePtr
-                let mut field_map = HashMap::new();
-                for field in self.get_fields() {
-                    field_map.insert(field.name(), field);
-                }
-
-                for field in sub_type.get_fields() {
-                    if !field_map
-                        .get(field.name())
-                        .map(|tpe| tpe.check_contains(field))
-                        .unwrap_or(false)
-                    {
-                        return false;
-                    }
-                }
-                true
-            }
-            _ => false,
-        }
-    }
-
-    /// Returns `true` if this type is a primitive type, `false` otherwise.
-    pub fn is_primitive(&self) -> bool {
-        matches!(*self, Type::PrimitiveType { .. })
-    }
-
-    /// Returns `true` if this type is a group type, `false` otherwise.
-    pub fn is_group(&self) -> bool {
-        matches!(*self, Type::GroupType { .. })
-    }
-
-    /// Returns `true` if this type is the top-level schema type (message type).
-    pub fn is_schema(&self) -> bool {
-        match *self {
-            Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
-            _ => false,
-        }
-    }
-
-    /// Returns `true` if this type is repeated or optional.
-    /// If this type doesn't have repetition defined, we still treat it as optional.
-    pub fn is_optional(&self) -> bool {
-        self.get_basic_info().has_repetition()
-            && self.get_basic_info().repetition() != Repetition::REQUIRED
-    }
-}
-
-/// A builder for primitive types. All attributes are optional
-/// except the name and physical type.
-/// Note that if not specified explicitly, `Repetition::OPTIONAL` is used.
-pub struct PrimitiveTypeBuilder<'a> {
-    name: &'a str,
-    repetition: Repetition,
-    physical_type: PhysicalType,
-    converted_type: ConvertedType,
-    logical_type: Option<LogicalType>,
-    length: i32,
-    precision: i32,
-    scale: i32,
-    id: Option<i32>,
-}
-
-impl<'a> PrimitiveTypeBuilder<'a> {
-    /// Creates new primitive type builder with provided field name and physical type.
-    pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
-        Self {
-            name,
-            repetition: Repetition::OPTIONAL,
-            physical_type,
-            converted_type: ConvertedType::NONE,
-            logical_type: None,
-            length: -1,
-            precision: -1,
-            scale: -1,
-            id: None,
-        }
-    }
-
-    /// Sets [`Repetition`](crate::basic::Repetition) for this field and returns itself.
-    pub fn with_repetition(mut self, repetition: Repetition) -> Self {
-        self.repetition = repetition;
-        self
-    }
-
-    /// Sets [`ConvertedType`](crate::basic::ConvertedType) for this field and returns itself.
-    pub fn with_converted_type(mut self, converted_type: ConvertedType) -> Self {
-        self.converted_type = converted_type;
-        self
-    }
-
-    /// Sets [`LogicalType`](crate::basic::LogicalType) for this field and returns itself.
-    /// If only the logical type is populated for a primitive type, the converted type
-    /// will be automatically populated, and can thus be omitted.
-    pub fn with_logical_type(mut self, logical_type: Option<LogicalType>) -> Self {
-        self.logical_type = logical_type;
-        self
-    }
-
-    /// Sets type length and returns itself.
-    /// This is only applied to FIXED_LEN_BYTE_ARRAY and INT96 (INTERVAL) types, because
-    /// they maintain fixed size underlying byte array.
-    /// By default, value is `0`.
-    pub fn with_length(mut self, length: i32) -> Self {
-        self.length = length;
-        self
-    }
-
-    /// Sets precision for Parquet DECIMAL physical type and returns itself.
-    /// By default, it equals to `0` and used only for decimal context.
-    pub fn with_precision(mut self, precision: i32) -> Self {
-        self.precision = precision;
-        self
-    }
-
-    /// Sets scale for Parquet DECIMAL physical type and returns itself.
-    /// By default, it equals to `0` and used only for decimal context.
-    pub fn with_scale(mut self, scale: i32) -> Self {
-        self.scale = scale;
-        self
-    }
-
-    /// Sets optional field id and returns itself.
-    pub fn with_id(mut self, id: i32) -> Self {
-        self.id = Some(id);
-        self
-    }
-
-    /// Creates a new `PrimitiveType` instance from the collected attributes.
-    /// Returns `Err` in case of any building conditions are not met.
-    pub fn build(self) -> Result<Type> {
-        let mut basic_info = BasicTypeInfo {
-            name: String::from(self.name),
-            repetition: Some(self.repetition),
-            converted_type: self.converted_type,
-            logical_type: self.logical_type.clone(),
-            id: self.id,
-        };
-
-        // Check length before logical type, since it is used for logical type validation.
-        if self.physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY && self.length < 0 {
-            return Err(general_err!(
-                "Invalid FIXED_LEN_BYTE_ARRAY length: {}",
-                self.length
-            ));
-        }
-
-        match &self.logical_type {
-            Some(logical_type) => {
-                // If a converted type is populated, check that it is consistent with
-                // its logical type
-                if self.converted_type != ConvertedType::NONE {
-                    if ConvertedType::from(self.logical_type.clone())
-                        != self.converted_type
-                    {
-                        return Err(general_err!(
-                            "Logical type {:?} is imcompatible with converted type {}",
-                            logical_type,
-                            self.converted_type
-                        ));
-                    }
-                } else {
-                    // Populate the converted type for backwards compatibility
-                    basic_info.converted_type = self.logical_type.clone().into();
-                }
-                // Check that logical type and physical type are compatible
-                match (logical_type, self.physical_type) {
-                    (LogicalType::MAP(_), _) | (LogicalType::LIST(_), _) => {
-                        return Err(general_err!(
-                            "{:?} cannot be applied to a primitive type",
-                            logical_type
-                        ));
-                    }
-                    (LogicalType::ENUM(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::DECIMAL(t), _) => {
-                        // Check that scale and precision are consistent with legacy values
-                        if t.scale != self.scale {
-                            return Err(general_err!(
-                                "DECIMAL logical type scale {} must match self.scale {}",
-                                t.scale,
-                                self.scale
-                            ));
-                        }
-                        if t.precision != self.precision {
-                            return Err(general_err!(
-                                "DECIMAL logical type precision {} must match self.precision {}",
-                                t.precision,
-                                self.precision
-                            ));
-                        }
-                        self.check_decimal_precision_scale()?;
-                    }
-                    (LogicalType::DATE(_), PhysicalType::INT32) => {}
-                    (
-                        LogicalType::TIME(TimeType {
-                            unit: TimeUnit::MILLIS(_),
-                            ..
-                        }),
-                        PhysicalType::INT32,
-                    ) => {}
-                    (LogicalType::TIME(t), PhysicalType::INT64) => {
-                        if t.unit == TimeUnit::MILLIS(Default::default()) {
-                            return Err(general_err!(
-                                "Cannot use millisecond unit on INT64 type"
-                            ));
-                        }
-                    }
-                    (LogicalType::TIMESTAMP(_), PhysicalType::INT64) => {}
-                    (LogicalType::INTEGER(t), PhysicalType::INT32)
-                        if t.bit_width <= 32 => {}
-                    (LogicalType::INTEGER(t), PhysicalType::INT64)
-                        if t.bit_width == 64 => {}
-                    // Null type
-                    (LogicalType::UNKNOWN(_), PhysicalType::INT32) => {}
-                    (LogicalType::STRING(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::JSON(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::BSON(_), PhysicalType::BYTE_ARRAY) => {}
-                    (LogicalType::UUID(_), PhysicalType::FIXED_LEN_BYTE_ARRAY) => {}
-                    (a, b) => {
-                        return Err(general_err!(
-                            "Cannot annotate {:?} from {} fields",
-                            a,
-                            b
-                        ))
-                    }
-                }
-            }
-            None => {}
-        }
-
-        match self.converted_type {
-            ConvertedType::NONE => {}
-            ConvertedType::UTF8 | ConvertedType::BSON | ConvertedType::JSON => {
-                if self.physical_type != PhysicalType::BYTE_ARRAY {
-                    return Err(general_err!(
-                        "{} can only annotate BYTE_ARRAY fields",
-                        self.converted_type
-                    ));
-                }
-            }
-            ConvertedType::DECIMAL => {
-                self.check_decimal_precision_scale()?;
-            }
-            ConvertedType::DATE
-            | ConvertedType::TIME_MILLIS
-            | ConvertedType::UINT_8
-            | ConvertedType::UINT_16
-            | ConvertedType::UINT_32
-            | ConvertedType::INT_8
-            | ConvertedType::INT_16
-            | ConvertedType::INT_32 => {
-                if self.physical_type != PhysicalType::INT32 {
-                    return Err(general_err!(
-                        "{} can only annotate INT32",
-                        self.converted_type
-                    ));
-                }
-            }
-            ConvertedType::TIME_MICROS
-            | ConvertedType::TIMESTAMP_MILLIS
-            | ConvertedType::TIMESTAMP_MICROS
-            | ConvertedType::UINT_64
-            | ConvertedType::INT_64 => {
-                if self.physical_type != PhysicalType::INT64 {
-                    return Err(general_err!(
-                        "{} can only annotate INT64",
-                        self.converted_type
-                    ));
-                }
-            }
-            ConvertedType::INTERVAL => {
-                if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY
-                    || self.length != 12
-                {
-                    return Err(general_err!(
-                        "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
-                    ));
-                }
-            }
-            ConvertedType::ENUM => {
-                if self.physical_type != PhysicalType::BYTE_ARRAY {
-                    return Err(general_err!("ENUM can only annotate BYTE_ARRAY fields"));
-                }
-            }
-            _ => {
-                return Err(general_err!(
-                    "{} cannot be applied to a primitive type",
-                    self.converted_type
-                ));
-            }
-        }
-
-        Ok(Type::PrimitiveType {
-            basic_info,
-            physical_type: self.physical_type,
-            type_length: self.length,
-            scale: self.scale,
-            precision: self.precision,
-        })
-    }
-
-    #[inline]
-    fn check_decimal_precision_scale(&self) -> Result<()> {
-        match self.physical_type {
-            PhysicalType::INT32
-            | PhysicalType::INT64
-            | PhysicalType::BYTE_ARRAY
-            | PhysicalType::FIXED_LEN_BYTE_ARRAY => (),
-            _ => {
-                return Err(general_err!(
-                    "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
-                ));
-            }
-        }
-
-        // Precision is required and must be a non-zero positive integer.
-        if self.precision < 1 {
-            return Err(general_err!(
-                "Invalid DECIMAL precision: {}",
-                self.precision
-            ));
-        }
-
-        // Scale must be zero or a positive integer less than the precision.
-        if self.scale < 0 {
-            return Err(general_err!("Invalid DECIMAL scale: {}", self.scale));
-        }
-
-        if self.scale >= self.precision {
-            return Err(general_err!(
-            "Invalid DECIMAL: scale ({}) cannot be greater than or equal to precision \
-             ({})",
-            self.scale,
-            self.precision
-        ));
-        }
-
-        // Check precision and scale based on physical type limitations.
-        match self.physical_type {
-            PhysicalType::INT32 => {
-                if self.precision > 9 {
-                    return Err(general_err!(
-                        "Cannot represent INT32 as DECIMAL with precision {}",
-                        self.precision
-                    ));
-                }
-            }
-            PhysicalType::INT64 => {
-                if self.precision > 18 {
-                    return Err(general_err!(
-                        "Cannot represent INT64 as DECIMAL with precision {}",
-                        self.precision
-                    ));
-                }
-            }
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                let max_precision =
-                    (2f64.powi(8 * self.length - 1) - 1f64).log10().floor() as i32;
-
-                if self.precision > max_precision {
-                    return Err(general_err!(
-                        "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
-                        precision {}. The max precision can only be {}",
-                        self.length,
-                        self.precision,
-                        max_precision
-                    ));
-                }
-            }
-            _ => (), // For BYTE_ARRAY precision is not limited
-        }
-
-        Ok(())
-    }
-}
-
-/// A builder for group types. All attributes are optional except the name.
-/// Note that if not specified explicitly, `None` is used as the repetition of the group,
-/// which means it is a root (message) type.
-pub struct GroupTypeBuilder<'a> {
-    name: &'a str,
-    repetition: Option<Repetition>,
-    converted_type: ConvertedType,
-    logical_type: Option<LogicalType>,
-    fields: Vec<TypePtr>,
-    id: Option<i32>,
-}
-
-impl<'a> GroupTypeBuilder<'a> {
-    /// Creates new group type builder with provided field name.
-    pub fn new(name: &'a str) -> Self {
-        Self {
-            name,
-            repetition: None,
-            converted_type: ConvertedType::NONE,
-            logical_type: None,
-            fields: Vec::new(),
-            id: None,
-        }
-    }
-
-    /// Sets [`Repetition`](crate::basic::Repetition) for this field and returns itself.
-    pub fn with_repetition(mut self, repetition: Repetition) -> Self {
-        self.repetition = Some(repetition);
-        self
-    }
-
-    /// Sets [`ConvertedType`](crate::basic::ConvertedType) for this field and returns itself.
-    pub fn with_converted_type(mut self, converted_type: ConvertedType) -> Self {
-        self.converted_type = converted_type;
-        self
-    }
-
-    /// Sets [`LogicalType`](crate::basic::LogicalType) for this field and returns itself.
-    pub fn with_logical_type(mut self, logical_type: Option<LogicalType>) -> Self {
-        self.logical_type = logical_type;
-        self
-    }
-
-    /// Sets a list of fields that should be child nodes of this field.
-    /// Returns updated self.
-    pub fn with_fields(mut self, fields: &mut Vec<TypePtr>) -> Self {
-        self.fields.append(fields);
-        self
-    }
-
-    /// Sets optional field id and returns itself.
-    pub fn with_id(mut self, id: i32) -> Self {
-        self.id = Some(id);
-        self
-    }
-
-    /// Creates a new `GroupType` instance from the gathered attributes.
-    pub fn build(self) -> Result<Type> {
-        let mut basic_info = BasicTypeInfo {
-            name: String::from(self.name),
-            repetition: self.repetition,
-            converted_type: self.converted_type,
-            logical_type: self.logical_type.clone(),
-            id: self.id,
-        };
-        // Populate the converted type if only the logical type is populated
-        if self.logical_type.is_some() && self.converted_type == ConvertedType::NONE {
-            basic_info.converted_type = self.logical_type.into();
-        }
-        Ok(Type::GroupType {
-            basic_info,
-            fields: self.fields,
-        })
-    }
-}
-
-/// Basic type info. This contains information such as the name of the type,
-/// the repetition level, the logical type and the kind of the type (group, primitive).
-#[derive(Clone, Debug, PartialEq)]
-pub struct BasicTypeInfo {
-    name: String,
-    repetition: Option<Repetition>,
-    converted_type: ConvertedType,
-    logical_type: Option<LogicalType>,
-    id: Option<i32>,
-}
-
-impl BasicTypeInfo {
-    /// Returns field name.
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-
-    /// Returns `true` if type has repetition field set, `false` otherwise.
-    /// This is mostly applied to group type, because primitive type always has
-    /// repetition set.
-    pub fn has_repetition(&self) -> bool {
-        self.repetition.is_some()
-    }
-
-    /// Returns [`Repetition`](crate::basic::Repetition) value for the type.
-    pub fn repetition(&self) -> Repetition {
-        assert!(self.repetition.is_some());
-        self.repetition.unwrap()
-    }
-
-    /// Returns [`ConvertedType`](crate::basic::ConvertedType) value for the type.
-    pub fn converted_type(&self) -> ConvertedType {
-        self.converted_type
-    }
-
-    /// Returns [`LogicalType`](crate::basic::LogicalType) value for the type.
-    pub fn logical_type(&self) -> Option<LogicalType> {
-        // Unlike ConvertedType, LogicalType cannot implement Copy, thus we clone it
-        self.logical_type.clone()
-    }
-
-    /// Returns `true` if id is set, `false` otherwise.
-    pub fn has_id(&self) -> bool {
-        self.id.is_some()
-    }
-
-    /// Returns id value for the type.
-    pub fn id(&self) -> i32 {
-        assert!(self.id.is_some());
-        self.id.unwrap()
-    }
-}
-
-// ----------------------------------------------------------------------
-// Parquet descriptor definitions
-
-/// Represents a path in a nested schema
-#[derive(Clone, PartialEq, Debug, Eq, Hash)]
-pub struct ColumnPath {
-    parts: Vec<String>,
-}
-
-impl ColumnPath {
-    /// Creates new column path from vector of field names.
-    pub fn new(parts: Vec<String>) -> Self {
-        ColumnPath { parts }
-    }
-
-    /// Returns string representation of this column path.
-    /// ```rust
-    /// use parquet::schema::types::ColumnPath;
-    ///
-    /// let path = ColumnPath::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
-    /// assert_eq!(&path.string(), "a.b.c");
-    /// ```
-    pub fn string(&self) -> String {
-        self.parts.join(".")
-    }
-
-    /// Appends more components to end of column path.
-    /// ```rust
-    /// use parquet::schema::types::ColumnPath;
-    ///
-    /// let mut path = ColumnPath::new(vec!["a".to_string(), "b".to_string(), "c"
-    /// .to_string()]);
-    /// assert_eq!(&path.string(), "a.b.c");
-    ///
-    /// path.append(vec!["d".to_string(), "e".to_string()]);
-    /// assert_eq!(&path.string(), "a.b.c.d.e");
-    /// ```
-    pub fn append(&mut self, mut tail: Vec<String>) {
-        self.parts.append(&mut tail);
-    }
-
-    pub fn parts(&self) -> &[String] {
-        &self.parts
-    }
-}
-
-impl fmt::Display for ColumnPath {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:?}", self.string())
-    }
-}
-
-impl From<Vec<String>> for ColumnPath {
-    fn from(parts: Vec<String>) -> Self {
-        ColumnPath { parts }
-    }
-}
-
-impl<'a> From<&'a str> for ColumnPath {
-    fn from(single_path: &str) -> Self {
-        let s = String::from(single_path);
-        ColumnPath::from(s)
-    }
-}
-
-impl From<String> for ColumnPath {
-    fn from(single_path: String) -> Self {
-        let v = vec![single_path];
-        ColumnPath { parts: v }
-    }
-}
-
-impl AsRef<[String]> for ColumnPath {
-    fn as_ref(&self) -> &[String] {
-        &self.parts
-    }
-}
-
-/// A descriptor for leaf-level primitive columns.
-/// This encapsulates information such as definition and repetition levels and is used to
-/// re-assemble nested data.
-#[derive(Debug, PartialEq)]
-pub struct ColumnDescriptor {
-    // The "leaf" primitive type of this column
-    primitive_type: TypePtr,
-
-    // The maximum definition level for this column
-    max_def_level: i16,
-
-    // The maximum repetition level for this column
-    max_rep_level: i16,
-
-    // The path of this column. For instance, "a.b.c.d".
-    path: ColumnPath,
-}
-
-impl ColumnDescriptor {
-    /// Creates new descriptor for leaf-level column.
-    pub fn new(
-        primitive_type: TypePtr,
-        max_def_level: i16,
-        max_rep_level: i16,
-        path: ColumnPath,
-    ) -> Self {
-        Self {
-            primitive_type,
-            max_def_level,
-            max_rep_level,
-            path,
-        }
-    }
-
-    /// Returns maximum definition level for this column.
-    pub fn max_def_level(&self) -> i16 {
-        self.max_def_level
-    }
-
-    /// Returns maximum repetition level for this column.
-    pub fn max_rep_level(&self) -> i16 {
-        self.max_rep_level
-    }
-
-    /// Returns [`ColumnPath`] for this column.
-    pub fn path(&self) -> &ColumnPath {
-        &self.path
-    }
-
-    /// Returns self type [`Type`](crate::schema::types::Type) for this leaf column.
-    pub fn self_type(&self) -> &Type {
-        self.primitive_type.as_ref()
-    }
-
-    /// Returns self type [`TypePtr`](crate::schema::types::TypePtr)  for this leaf
-    /// column.
-    pub fn self_type_ptr(&self) -> TypePtr {
-        self.primitive_type.clone()
-    }
-
-    /// Returns column name.
-    pub fn name(&self) -> &str {
-        self.primitive_type.name()
-    }
-
-    /// Returns [`ConvertedType`](crate::basic::ConvertedType) for this column.
-    pub fn converted_type(&self) -> ConvertedType {
-        self.primitive_type.get_basic_info().converted_type()
-    }
-
-    /// Returns [`LogicalType`](crate::basic::LogicalType) for this column.
-    pub fn logical_type(&self) -> Option<LogicalType> {
-        self.primitive_type.get_basic_info().logical_type()
-    }
-
-    /// Returns physical type for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn physical_type(&self) -> PhysicalType {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { physical_type, .. } => *physical_type,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-
-    /// Returns type length for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn type_length(&self) -> i32 {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { type_length, .. } => *type_length,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-
-    /// Returns type precision for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn type_precision(&self) -> i32 {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { precision, .. } => *precision,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-
-    /// Returns type scale for this column.
-    /// Note that it will panic if called on a non-primitive type.
-    pub fn type_scale(&self) -> i32 {
-        match self.primitive_type.as_ref() {
-            Type::PrimitiveType { scale, .. } => *scale,
-            _ => panic!("Expected primitive type!"),
-        }
-    }
-}
-
-/// A schema descriptor. This encapsulates the top-level schemas for all the columns,
-/// as well as all descriptors for all the primitive columns.
-pub struct SchemaDescriptor {
-    // The top-level schema (the "message" type).
-    // This must be a `GroupType` where each field is a root column type in the schema.
-    schema: TypePtr,
-
-    // All the descriptors for primitive columns in this schema, constructed from
-    // `schema` in DFS order.
-    leaves: Vec<ColumnDescPtr>,
-
-    // Mapping from a leaf column's index to the root column type that it
-    // comes from. For instance: the leaf `a.b.c.d` would have a link back to `a`:
-    // -- a  <-----+
-    // -- -- b     |
-    // -- -- -- c  |
-    // -- -- -- -- d
-    leaf_to_base: Vec<TypePtr>,
-}
-
-impl fmt::Debug for SchemaDescriptor {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // Skip leaves and leaf_to_base as they only a cache information already found in `schema`
-        f.debug_struct("SchemaDescriptor")
-            .field("schema", &self.schema)
-            .finish()
-    }
-}
-
-impl SchemaDescriptor {
-    /// Creates new schema descriptor from Parquet schema.
-    pub fn new(tp: TypePtr) -> Self {
-        assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
-        let mut leaves = vec![];
-        let mut leaf_to_base = Vec::new();
-        for f in tp.get_fields() {
-            let mut path = vec![];
-            build_tree(f, f, 0, 0, &mut leaves, &mut leaf_to_base, &mut path);
-        }
-
-        Self {
-            schema: tp,
-            leaves,
-            leaf_to_base,
-        }
-    }
-
-    /// Returns [`ColumnDescriptor`] for a field position.
-    pub fn column(&self, i: usize) -> ColumnDescPtr {
-        assert!(
-            i < self.leaves.len(),
-            "Index out of bound: {} not in [0, {})",
-            i,
-            self.leaves.len()
-        );
-        self.leaves[i].clone()
-    }
-
-    /// Returns slice of [`ColumnDescriptor`].
-    pub fn columns(&self) -> &[ColumnDescPtr] {
-        &self.leaves
-    }
-
-    /// Returns number of leaf-level columns.
-    pub fn num_columns(&self) -> usize {
-        self.leaves.len()
-    }
-
-    /// Returns column root [`Type`](crate::schema::types::Type) for a field position.
-    pub fn get_column_root(&self, i: usize) -> &Type {
-        let result = self.column_root_of(i);
-        result.as_ref()
-    }
-
-    /// Returns column root [`Type`](crate::schema::types::Type) pointer for a field
-    /// position.
-    pub fn get_column_root_ptr(&self, i: usize) -> TypePtr {
-        let result = self.column_root_of(i);
-        result.clone()
-    }
-
-    fn column_root_of(&self, i: usize) -> &Arc<Type> {
-        assert!(
-            i < self.leaves.len(),
-            "Index out of bound: {} not in [0, {})",
-            i,
-            self.leaves.len()
-        );
-
-        self.leaf_to_base
-            .get(i)
-            .unwrap_or_else(|| panic!("Expected a value for index {} but found None", i))
-    }
-
-    /// Returns schema as [`Type`](crate::schema::types::Type).
-    pub fn root_schema(&self) -> &Type {
-        self.schema.as_ref()
-    }
-
-    pub fn root_schema_ptr(&self) -> TypePtr {
-        self.schema.clone()
-    }
-
-    /// Returns schema name.
-    pub fn name(&self) -> &str {
-        self.schema.name()
-    }
-}
-
-fn build_tree<'a>(
-    tp: &'a TypePtr,
-    base_tp: &TypePtr,
-    mut max_rep_level: i16,
-    mut max_def_level: i16,
-    leaves: &mut Vec<ColumnDescPtr>,
-    leaf_to_base: &mut Vec<TypePtr>,
-    path_so_far: &mut Vec<&'a str>,
-) {
-    assert!(tp.get_basic_info().has_repetition());
-
-    path_so_far.push(tp.name());
-    match tp.get_basic_info().repetition() {
-        Repetition::OPTIONAL => {
-            max_def_level += 1;
-        }
-        Repetition::REPEATED => {
-            max_def_level += 1;
-            max_rep_level += 1;
-        }
-        _ => {}
-    }
-
-    match tp.as_ref() {
-        Type::PrimitiveType { .. } => {
-            let mut path: Vec<String> = vec![];
-            path.extend(path_so_far.iter().copied().map(String::from));
-            leaves.push(Arc::new(ColumnDescriptor::new(
-                tp.clone(),
-                max_def_level,
-                max_rep_level,
-                ColumnPath::new(path),
-            )));
-            leaf_to_base.push(base_tp.clone());
-        }
-        Type::GroupType { ref fields, .. } => {
-            for f in fields {
-                build_tree(
-                    f,
-                    base_tp,
-                    max_rep_level,
-                    max_def_level,
-                    leaves,
-                    leaf_to_base,
-                    path_so_far,
-                );
-                path_so_far.pop();
-            }
-        }
-    }
-}
-
-/// Method to convert from Thrift.
-pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
-    let mut index = 0;
-    let mut schema_nodes = Vec::new();
-    while index < elements.len() {
-        let t = from_thrift_helper(elements, index)?;
-        index = t.0;
-        schema_nodes.push(t.1);
-    }
-    if schema_nodes.len() != 1 {
-        return Err(general_err!(
-            "Expected exactly one root node, but found {}",
-            schema_nodes.len()
-        ));
-    }
-
-    Ok(schema_nodes.remove(0))
-}
-
-/// Constructs a new Type from the `elements`, starting at index `index`.
-/// The first result is the starting index for the next Type after this one. If it is
-/// equal to `elements.len()`, then this Type is the last one.
-/// The second result is the result Type.
-fn from_thrift_helper(
-    elements: &[SchemaElement],
-    index: usize,
-) -> Result<(usize, TypePtr)> {
-    // Whether or not the current node is root (message type).
-    // There is only one message type node in the schema tree.
-    let is_root_node = index == 0;
-
-    if index > elements.len() {
-        return Err(general_err!(
-            "Index out of bound, index = {}, len = {}",
-            index,
-            elements.len()
-        ));
-    }
-    let element = &elements[index];
-    let converted_type = ConvertedType::from(element.converted_type);
-    // LogicalType is only present in v2 Parquet files. ConvertedType is always
-    // populated, regardless of the version of the file (v1 or v2).
-    let logical_type = element
-        .logical_type
-        .as_ref()
-        .map(|value| LogicalType::from(value.clone()));
-    let field_id = elements[index].field_id;
-    match elements[index].num_children {
-        // From parquet-format:
-        //   The children count is used to construct the nested relationship.
-        //   This field is not set when the element is a primitive type
-        // Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we
-        // have to handle this case too.
-        None | Some(0) => {
-            // primitive type
-            if elements[index].repetition_type.is_none() {
-                return Err(general_err!(
-                    "Repetition level must be defined for a primitive type"
-                ));
-            }
-            let repetition = Repetition::from(elements[index].repetition_type.unwrap());
-            let physical_type = PhysicalType::from(elements[index].type_.unwrap());
-            let length = elements[index].type_length.unwrap_or(-1);
-            let scale = elements[index].scale.unwrap_or(-1);
-            let precision = elements[index].precision.unwrap_or(-1);
-            let name = &elements[index].name;
-            let mut builder = Type::primitive_type_builder(name, physical_type)
-                .with_repetition(repetition)
-                .with_converted_type(converted_type)
-                .with_logical_type(logical_type)
-                .with_length(length)
-                .with_precision(precision)
-                .with_scale(scale);
-            if let Some(id) = field_id {
-                builder = builder.with_id(id);
-            }
-            Ok((index + 1, Arc::new(builder.build()?)))
-        }
-        Some(n) => {
-            let repetition = elements[index].repetition_type.map(Repetition::from);
-            let mut fields = vec![];
-            let mut next_index = index + 1;
-            for _ in 0..n {
-                let child_result = from_thrift_helper(elements, next_index as usize)?;
-                next_index = child_result.0;
-                fields.push(child_result.1);
-            }
-
-            let mut builder = Type::group_type_builder(&elements[index].name)
-                .with_converted_type(converted_type)
-                .with_logical_type(logical_type)
-                .with_fields(&mut fields);
-            if let Some(rep) = repetition {
-                // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
-                // REPEATED for root node.
-                //
-                // We only set repetition for group types that are not top-level message
-                // type. According to parquet-format:
-                //   Root of the schema does not have a repetition_type.
-                //   All other types must have one.
-                if !is_root_node {
-                    builder = builder.with_repetition(rep);
-                }
-            }
-            if let Some(id) = field_id {
-                builder = builder.with_id(id);
-            }
-            Ok((next_index, Arc::new(builder.build().unwrap())))
-        }
-    }
-}
-
-/// Method to convert to Thrift.
-pub fn to_thrift(schema: &Type) -> Result<Vec<SchemaElement>> {
-    if !schema.is_group() {
-        return Err(general_err!("Root schema must be Group type"));
-    }
-    let mut elements: Vec<SchemaElement> = Vec::new();
-    to_thrift_helper(schema, &mut elements);
-    Ok(elements)
-}
-
-/// Constructs list of `SchemaElement` from the schema using depth-first traversal.
-/// Here we assume that schema is always valid and starts with group type.
-fn to_thrift_helper(schema: &Type, elements: &mut Vec<SchemaElement>) {
-    match *schema {
-        Type::PrimitiveType {
-            ref basic_info,
-            physical_type,
-            type_length,
-            scale,
-            precision,
-        } => {
-            let element = SchemaElement {
-                type_: Some(physical_type.into()),
-                type_length: if type_length >= 0 {
-                    Some(type_length)
-                } else {
-                    None
-                },
-                repetition_type: Some(basic_info.repetition().into()),
-                name: basic_info.name().to_owned(),
-                num_children: None,
-                converted_type: basic_info.converted_type().into(),
-                scale: if scale >= 0 { Some(scale) } else { None },
-                precision: if precision >= 0 {
-                    Some(precision)
-                } else {
-                    None
-                },
-                field_id: if basic_info.has_id() {
-                    Some(basic_info.id())
-                } else {
-                    None
-                },
-                logical_type: basic_info.logical_type().map(|value| value.into()),
-            };
-
-            elements.push(element);
-        }
-        Type::GroupType {
-            ref basic_info,
-            ref fields,
-        } => {
-            let repetition = if basic_info.has_repetition() {
-                Some(basic_info.repetition().into())
-            } else {
-                None
-            };
-
-            let element = SchemaElement {
-                type_: None,
-                type_length: None,
-                repetition_type: repetition,
-                name: basic_info.name().to_owned(),
-                num_children: Some(fields.len() as i32),
-                converted_type: basic_info.converted_type().into(),
-                scale: None,
-                precision: None,
-                field_id: if basic_info.has_id() {
-                    Some(basic_info.id())
-                } else {
-                    None
-                },
-                logical_type: basic_info.logical_type().map(|value| value.into()),
-            };
-
-            elements.push(element);
-
-            // Add child elements for a group
-            for field in fields {
-                to_thrift_helper(field, elements);
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::basic::{DecimalType, IntType};
-    use crate::schema::parser::parse_message_type;
-
-    // TODO: add tests for v2 types
-
-    #[test]
-    fn test_primitive_type() {
-        let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: true,
-            })))
-            .with_id(0)
-            .build();
-        assert!(result.is_ok());
-
-        if let Ok(tp) = result {
-            assert!(tp.is_primitive());
-            assert!(!tp.is_group());
-            let basic_info = tp.get_basic_info();
-            assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
-            assert_eq!(
-                basic_info.logical_type(),
-                Some(LogicalType::INTEGER(IntType {
-                    bit_width: 32,
-                    is_signed: true
-                }))
-            );
-            assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
-            assert_eq!(basic_info.id(), 0);
-            match tp {
-                Type::PrimitiveType { physical_type, .. } => {
-                    assert_eq!(physical_type, PhysicalType::INT32);
-                }
-                _ => panic!(),
-            }
-        }
-
-        // Test illegal inputs with logical type
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REPEATED)
-            .with_logical_type(Some(LogicalType::INTEGER(IntType {
-                is_signed: true,
-                bit_width: 8,
-            })))
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot annotate INTEGER(IntType { bit_width: 8, is_signed: true }) from INT64 fields"
-            );
-        }
-
-        // Test illegal inputs with converted type
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::BSON)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: BSON can only annotate BYTE_ARRAY fields"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT96)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(-1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_logical_type(Some(LogicalType::DECIMAL(DecimalType {
-                scale: 32,
-                precision: 12,
-            })))
-            .with_precision(-1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: DECIMAL logical type scale 32 must match self.scale -1"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(-1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid DECIMAL precision: -1"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(0)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid DECIMAL precision: 0"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(1)
-            .with_scale(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(format!("{}", e), "Parquet error: Invalid DECIMAL scale: -1");
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(1)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid DECIMAL: scale (2) cannot be greater than or equal to precision (1)"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(18)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot represent INT32 as DECIMAL with precision 18"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_precision(32)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot represent INT64 as DECIMAL with precision 32"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_length(5)
-            .with_precision(12)
-            .with_scale(2)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length 5 and precision 12. The max precision can only be 11"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::UINT_8)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: UINT_8 can only annotate INT32"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::TIME_MICROS)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: TIME_MICROS can only annotate INT64"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INTERVAL)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INTERVAL)
-            .with_length(1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::ENUM)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: ENUM can only annotate BYTE_ARRAY fields"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::MAP)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: MAP cannot be applied to a primitive type"
-            );
-        }
-
-        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::DECIMAL)
-            .with_length(-1)
-            .build();
-        assert!(result.is_err());
-        if let Err(e) = result {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Invalid FIXED_LEN_BYTE_ARRAY length: -1"
-            );
-        }
-    }
-
-    #[test]
-    fn test_group_type() {
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .with_converted_type(ConvertedType::INT_32)
-            .with_id(0)
-            .build();
-        assert!(f1.is_ok());
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
-            .with_converted_type(ConvertedType::UTF8)
-            .with_id(1)
-            .build();
-        assert!(f2.is_ok());
-
-        let mut fields = vec![];
-        fields.push(Arc::new(f1.unwrap()));
-        fields.push(Arc::new(f2.unwrap()));
-
-        let result = Type::group_type_builder("foo")
-            .with_repetition(Repetition::REPEATED)
-            .with_logical_type(Some(LogicalType::LIST(Default::default())))
-            .with_fields(&mut fields)
-            .with_id(1)
-            .build();
-        assert!(result.is_ok());
-
-        let tp = result.unwrap();
-        let basic_info = tp.get_basic_info();
-        assert!(tp.is_group());
-        assert!(!tp.is_primitive());
-        assert_eq!(basic_info.repetition(), Repetition::REPEATED);
-        assert_eq!(
-            basic_info.logical_type(),
-            Some(LogicalType::LIST(Default::default()))
-        );
-        assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
-        assert_eq!(basic_info.id(), 1);
-        assert_eq!(tp.get_fields().len(), 2);
-        assert_eq!(tp.get_fields()[0].name(), "f1");
-        assert_eq!(tp.get_fields()[1].name(), "f2");
-    }
-
-    #[test]
-    fn test_column_descriptor() {
-        let result = test_column_descriptor_helper();
-        assert!(
-            result.is_ok(),
-            "Expected result to be OK but got err:\n {}",
-            result.unwrap_err()
-        );
-    }
-
-    fn test_column_descriptor_helper() -> Result<()> {
-        let tp = Type::primitive_type_builder("name", PhysicalType::BYTE_ARRAY)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()?;
-
-        let descr = ColumnDescriptor::new(Arc::new(tp), 4, 1, ColumnPath::from("name"));
-
-        assert_eq!(descr.path(), &ColumnPath::from("name"));
-        assert_eq!(descr.converted_type(), ConvertedType::UTF8);
-        assert_eq!(descr.physical_type(), PhysicalType::BYTE_ARRAY);
-        assert_eq!(descr.max_def_level(), 4);
-        assert_eq!(descr.max_rep_level(), 1);
-        assert_eq!(descr.name(), "name");
-        assert_eq!(descr.type_length(), -1);
-        assert_eq!(descr.type_precision(), -1);
-        assert_eq!(descr.type_scale(), -1);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_descriptor() {
-        let result = test_schema_descriptor_helper();
-        assert!(
-            result.is_ok(),
-            "Expected result to be OK but got err:\n {}",
-            result.unwrap_err()
-        );
-    }
-
-    // A helper fn to avoid handling the results from type creation
-    fn test_schema_descriptor_helper() -> Result<()> {
-        let mut fields = vec![];
-
-        let inta = Type::primitive_type_builder("a", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_32)
-            .build()?;
-        fields.push(Arc::new(inta));
-        let intb = Type::primitive_type_builder("b", PhysicalType::INT64)
-            .with_converted_type(ConvertedType::INT_64)
-            .build()?;
-        fields.push(Arc::new(intb));
-        let intc = Type::primitive_type_builder("c", PhysicalType::BYTE_ARRAY)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::UTF8)
-            .build()?;
-        fields.push(Arc::new(intc));
-
-        // 3-level list encoding
-        let item1 = Type::primitive_type_builder("item1", PhysicalType::INT64)
-            .with_repetition(Repetition::REQUIRED)
-            .with_converted_type(ConvertedType::INT_64)
-            .build()?;
-        let item2 =
-            Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
-        let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::INT_32)
-            .build()?;
-        let list = Type::group_type_builder("records")
-            .with_repetition(Repetition::REPEATED)
-            .with_converted_type(ConvertedType::LIST)
-            .with_fields(&mut vec![Arc::new(item1), Arc::new(item2), Arc::new(item3)])
-            .build()?;
-        let bag = Type::group_type_builder("bag")
-            .with_repetition(Repetition::OPTIONAL)
-            .with_fields(&mut vec![Arc::new(list)])
-            .build()?;
-        fields.push(Arc::new(bag));
-
-        let schema = Type::group_type_builder("schema")
-            .with_repetition(Repetition::REPEATED)
-            .with_fields(&mut fields)
-            .build()?;
-        let descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let nleaves = 6;
-        assert_eq!(descr.num_columns(), nleaves);
-
-        //                             mdef mrep
-        // required int32 a            0    0
-        // optional int64 b            1    0
-        // repeated byte_array c       1    1
-        // optional group bag          1    0
-        //   repeated group records    2    1
-        //     required int64 item1    2    1
-        //     optional boolean item2  3    1
-        //     repeated int32 item3    3    2
-        let ex_max_def_levels = vec![0, 1, 1, 2, 3, 3];
-        let ex_max_rep_levels = vec![0, 0, 1, 1, 1, 2];
-
-        for i in 0..nleaves {
-            let col = descr.column(i);
-            assert_eq!(col.max_def_level(), ex_max_def_levels[i], "{}", i);
-            assert_eq!(col.max_rep_level(), ex_max_rep_levels[i], "{}", i);
-        }
-
-        assert_eq!(descr.column(0).path().string(), "a");
-        assert_eq!(descr.column(1).path().string(), "b");
-        assert_eq!(descr.column(2).path().string(), "c");
-        assert_eq!(descr.column(3).path().string(), "bag.records.item1");
-        assert_eq!(descr.column(4).path().string(), "bag.records.item2");
-        assert_eq!(descr.column(5).path().string(), "bag.records.item3");
-
-        assert_eq!(descr.get_column_root(0).name(), "a");
-        assert_eq!(descr.get_column_root(3).name(), "bag");
-        assert_eq!(descr.get_column_root(4).name(), "bag");
-        assert_eq!(descr.get_column_root(5).name(), "bag");
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_schema_build_tree_def_rep_levels() {
-        let message_type = "
-    message spark_schema {
-      REQUIRED INT32 a;
-      OPTIONAL group b {
-        OPTIONAL INT32 _1;
-        OPTIONAL INT32 _2;
-      }
-      OPTIONAL group c (LIST) {
-        REPEATED group list {
-          OPTIONAL INT32 element;
-        }
-      }
-    }
-    ";
-        let schema = parse_message_type(message_type).expect("should parse schema");
-        let descr = SchemaDescriptor::new(Arc::new(schema));
-        // required int32 a
-        assert_eq!(descr.column(0).max_def_level(), 0);
-        assert_eq!(descr.column(0).max_rep_level(), 0);
-        // optional int32 b._1
-        assert_eq!(descr.column(1).max_def_level(), 2);
-        assert_eq!(descr.column(1).max_rep_level(), 0);
-        // optional int32 b._2
-        assert_eq!(descr.column(2).max_def_level(), 2);
-        assert_eq!(descr.column(2).max_rep_level(), 0);
-        // repeated optional int32 c.list.element
-        assert_eq!(descr.column(3).max_def_level(), 3);
-        assert_eq!(descr.column(3).max_rep_level(), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
-    fn test_get_physical_type_panic() {
-        let list = Type::group_type_builder("records")
-            .with_repetition(Repetition::REPEATED)
-            .build()
-            .unwrap();
-        list.get_physical_type();
-    }
-
-    #[test]
-    fn test_get_physical_type_primitive() {
-        let f = Type::primitive_type_builder("f", PhysicalType::INT64)
-            .build()
-            .unwrap();
-        assert_eq!(f.get_physical_type(), PhysicalType::INT64);
-
-        let f = Type::primitive_type_builder("f", PhysicalType::BYTE_ARRAY)
-            .build()
-            .unwrap();
-        assert_eq!(f.get_physical_type(), PhysicalType::BYTE_ARRAY);
-    }
-
-    #[test]
-    fn test_check_contains_primitive_primitive() {
-        // OK
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        assert!(f1.check_contains(&f2));
-
-        // OK: different logical type does not affect check_contains
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_converted_type(ConvertedType::UINT_8)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_converted_type(ConvertedType::UINT_16)
-            .build()
-            .unwrap();
-        assert!(f1.check_contains(&f2));
-
-        // KO: different name
-        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different type
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different repetition
-        let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_repetition(Repetition::REQUIRED)
-            .build()
-            .unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-    }
-
-    // function to create a new group type for testing
-    fn test_new_group_type(name: &str, repetition: Repetition, types: Vec<Type>) -> Type {
-        let mut fields = Vec::new();
-        for tpe in types {
-            fields.push(Arc::new(tpe))
-        }
-        Type::group_type_builder(name)
-            .with_repetition(repetition)
-            .with_fields(&mut fields)
-            .build()
-            .unwrap()
-    }
-
-    #[test]
-    fn test_check_contains_group_group() {
-        // OK: should match okay with empty fields
-        let f1 = Type::group_type_builder("f").build().unwrap();
-        let f2 = Type::group_type_builder("f").build().unwrap();
-        assert!(f1.check_contains(&f2));
-
-        // OK: fields match
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        assert!(f1.check_contains(&f2));
-
-        // OK: subset of fields
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f2", PhysicalType::INT64)
-                .build()
-                .unwrap()],
-        );
-        assert!(f1.check_contains(&f2));
-
-        // KO: different name
-        let f1 = Type::group_type_builder("f1").build().unwrap();
-        let f2 = Type::group_type_builder("f2").build().unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different repetition
-        let f1 = Type::group_type_builder("f")
-            .with_repetition(Repetition::OPTIONAL)
-            .build()
-            .unwrap();
-        let f2 = Type::group_type_builder("f")
-            .with_repetition(Repetition::REPEATED)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different fields
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::BOOLEAN)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        assert!(!f1.check_contains(&f2));
-
-        // KO: different fields
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![
-                Type::primitive_type_builder("f1", PhysicalType::INT32)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("f2", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f3", PhysicalType::INT32)
-                .build()
-                .unwrap()],
-        );
-        assert!(!f1.check_contains(&f2));
-    }
-
-    #[test]
-    fn test_check_contains_group_primitive() {
-        // KO: should not match
-        let f1 = Type::group_type_builder("f").build().unwrap();
-        let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-        assert!(!f2.check_contains(&f1));
-
-        // KO: should not match when primitive field is part of group type
-        let f1 = test_new_group_type(
-            "f",
-            Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f1", PhysicalType::INT32)
-                .build()
-                .unwrap()],
-        );
-        let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        assert!(!f1.check_contains(&f2));
-        assert!(!f2.check_contains(&f1));
-
-        // OK: match nested types
-        let f1 = test_new_group_type(
-            "a",
-            Repetition::REPEATED,
-            vec![
-                test_new_group_type(
-                    "b",
-                    Repetition::REPEATED,
-                    vec![Type::primitive_type_builder("c", PhysicalType::INT32)
-                        .build()
-                        .unwrap()],
-                ),
-                Type::primitive_type_builder("d", PhysicalType::INT64)
-                    .build()
-                    .unwrap(),
-                Type::primitive_type_builder("e", PhysicalType::BOOLEAN)
-                    .build()
-                    .unwrap(),
-            ],
-        );
-        let f2 = test_new_group_type(
-            "a",
-            Repetition::REPEATED,
-            vec![test_new_group_type(
-                "b",
-                Repetition::REPEATED,
-                vec![Type::primitive_type_builder("c", PhysicalType::INT32)
-                    .build()
-                    .unwrap()],
-            )],
-        );
-        assert!(f1.check_contains(&f2)); // should match
-        assert!(!f2.check_contains(&f1)); // should fail
-    }
-
-    #[test]
-    fn test_schema_type_thrift_conversion_err() {
-        let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
-            .build()
-            .unwrap();
-        let thrift_schema = to_thrift(&schema);
-        assert!(thrift_schema.is_err());
-        if let Err(e) = thrift_schema {
-            assert_eq!(
-                format!("{}", e),
-                "Parquet error: Root schema must be Group type"
-            );
-        }
-    }
-
-    #[test]
-    fn test_schema_type_thrift_conversion() {
-        let message_type = "
-    message conversions {
-      REQUIRED INT64 id;
-      OPTIONAL group int_array_Array (LIST) {
-        REPEATED group list {
-          OPTIONAL group element (LIST) {
-            REPEATED group list {
-              OPTIONAL INT32 element;
-            }
-          }
-        }
-      }
-      OPTIONAL group int_map (MAP) {
-        REPEATED group map (MAP_KEY_VALUE) {
-          REQUIRED BYTE_ARRAY key (UTF8);
-          OPTIONAL INT32 value;
-        }
-      }
-      OPTIONAL group int_Map_Array (LIST) {
-        REPEATED group list {
-          OPTIONAL group g (MAP) {
-            REPEATED group map (MAP_KEY_VALUE) {
-              REQUIRED BYTE_ARRAY key (UTF8);
-              OPTIONAL group value {
-                OPTIONAL group H {
-                  OPTIONAL group i (LIST) {
-                    REPEATED group list {
-                      OPTIONAL DOUBLE element;
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      OPTIONAL group nested_struct {
-        OPTIONAL INT32 A;
-        OPTIONAL group b (LIST) {
-          REPEATED group list {
-            REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
-          }
-        }
-      }
-    }
-    ";
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let thrift_schema = to_thrift(&expected_schema).unwrap();
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-
-    #[test]
-    fn test_schema_type_thrift_conversion_decimal() {
-        let message_type = "
-    message decimals {
-      OPTIONAL INT32 field0;
-      OPTIONAL INT64 field1 (DECIMAL (18, 2));
-      OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
-      OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
-    }
-    ";
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let thrift_schema = to_thrift(&expected_schema).unwrap();
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-
-    // Tests schema conversion from thrift, when num_children is set to Some(0) for a
-    // primitive type.
-    #[test]
-    fn test_schema_from_thrift_with_num_children_set() {
-        // schema definition written by parquet-cpp version 1.3.2-SNAPSHOT
-        let message_type = "
-    message schema {
-      OPTIONAL BYTE_ARRAY id (UTF8);
-      OPTIONAL BYTE_ARRAY name (UTF8);
-      OPTIONAL BYTE_ARRAY message (UTF8);
-      OPTIONAL INT32 type (UINT_8);
-      OPTIONAL INT64 author_time (TIMESTAMP_MILLIS);
-      OPTIONAL INT64 __index_level_0__;
-    }
-    ";
-
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
-        // Change all of None to Some(0)
-        for mut elem in &mut thrift_schema[..] {
-            if elem.num_children == None {
-                elem.num_children = Some(0);
-            }
-        }
-
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-
-    // Sometimes parquet-cpp sets repetition level for the root node, which is against
-    // the format definition, but we need to handle it by setting it back to None.
-    #[test]
-    fn test_schema_from_thrift_root_has_repetition() {
-        // schema definition written by parquet-cpp version 1.3.2-SNAPSHOT
-        let message_type = "
-    message schema {
-      OPTIONAL BYTE_ARRAY a (UTF8);
-      OPTIONAL INT32 b (UINT_8);
-    }
-    ";
-
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
-        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
-
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
-    }
-}
diff --git a/rust/parquet/src/schema/visitor.rs b/rust/parquet/src/schema/visitor.rs
deleted file mode 100644
index 61bc3be951d..00000000000
--- a/rust/parquet/src/schema/visitor.rs
+++ /dev/null
@@ -1,240 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::basic::{ConvertedType, Repetition};
-use crate::errors::ParquetError::General;
-use crate::errors::Result;
-use crate::schema::types::{Type, TypePtr};
-
-/// A utility trait to help user to traverse against parquet type.
-pub trait TypeVisitor<R, C> {
-    /// Called when a primitive type hit.
-    fn visit_primitive(&mut self, primitive_type: TypePtr, context: C) -> Result<R>;
-
-    /// Default implementation when visiting a list.
-    ///
-    /// It checks list type definition and calls `visit_list_with_item` with extracted
-    /// item type.
-    ///
-    /// To fully understand this algorithm, please refer to
-    /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
-    fn visit_list(&mut self, list_type: TypePtr, context: C) -> Result<R> {
-        match list_type.as_ref() {
-            Type::PrimitiveType { .. } => panic!(
-                "{:?} is a list type and can't be processed as primitive.",
-                list_type
-            ),
-            Type::GroupType {
-                basic_info: _,
-                fields,
-            } if fields.len() == 1 => {
-                let list_item = fields.first().unwrap();
-
-                match list_item.as_ref() {
-                    Type::PrimitiveType { .. } => {
-                        if list_item.get_basic_info().repetition() == Repetition::REPEATED
-                        {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                list_item.clone(),
-                                context,
-                            )
-                        } else {
-                            Err(General(
-                                "Primitive element type of list must be repeated."
-                                    .to_string(),
-                            ))
-                        }
-                    }
-                    Type::GroupType {
-                        basic_info: _,
-                        fields,
-                    } => {
-                        if fields.len() == 1
-                            && list_item.name() != "array"
-                            && list_item.name() != format!("{}_tuple", list_type.name())
-                        {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                fields.first().unwrap().clone(),
-                                context,
-                            )
-                        } else {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                list_item.clone(),
-                                context,
-                            )
-                        }
-                    }
-                }
-            }
-            _ => Err(General(
-                "Group element type of list can only contain one field.".to_string(),
-            )),
-        }
-    }
-
-    /// Called when a struct type hit.
-    fn visit_struct(&mut self, struct_type: TypePtr, context: C) -> Result<R>;
-
-    /// Called when a map type hit.
-    fn visit_map(&mut self, map_type: TypePtr, context: C) -> Result<R>;
-
-    /// A utility method which detects input type and calls corresponding method.
-    fn dispatch(&mut self, cur_type: TypePtr, context: C) -> Result<R> {
-        if cur_type.is_primitive() {
-            self.visit_primitive(cur_type, context)
-        } else {
-            match cur_type.get_basic_info().converted_type() {
-                ConvertedType::LIST => self.visit_list(cur_type, context),
-                ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                    self.visit_map(cur_type, context)
-                }
-                _ => self.visit_struct(cur_type, context),
-            }
-        }
-    }
-
-    /// Called by `visit_list`.
-    fn visit_list_with_item(
-        &mut self,
-        list_type: TypePtr,
-        item_type: TypePtr,
-        context: C,
-    ) -> Result<R>;
-}
-
-#[cfg(test)]
-mod tests {
-    use super::TypeVisitor;
-    use crate::basic::Type as PhysicalType;
-    use crate::errors::Result;
-    use crate::schema::parser::parse_message_type;
-    use crate::schema::types::TypePtr;
-    use std::sync::Arc;
-
-    struct TestVisitorContext {}
-    struct TestVisitor {
-        primitive_visited: bool,
-        struct_visited: bool,
-        list_visited: bool,
-        root_type: TypePtr,
-    }
-
-    impl TypeVisitor<bool, TestVisitorContext> for TestVisitor {
-        fn visit_primitive(
-            &mut self,
-            primitive_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            assert_eq!(
-                self.get_field_by_name(primitive_type.name()).as_ref(),
-                primitive_type.as_ref()
-            );
-            self.primitive_visited = true;
-            Ok(true)
-        }
-
-        fn visit_struct(
-            &mut self,
-            struct_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            assert_eq!(
-                self.get_field_by_name(struct_type.name()).as_ref(),
-                struct_type.as_ref()
-            );
-            self.struct_visited = true;
-            Ok(true)
-        }
-
-        fn visit_map(
-            &mut self,
-            _map_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            unimplemented!()
-        }
-
-        fn visit_list_with_item(
-            &mut self,
-            list_type: TypePtr,
-            item_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
-            assert_eq!(
-                self.get_field_by_name(list_type.name()).as_ref(),
-                list_type.as_ref()
-            );
-            assert_eq!("element", item_type.name());
-            assert_eq!(PhysicalType::INT32, item_type.get_physical_type());
-            self.list_visited = true;
-            Ok(true)
-        }
-    }
-
-    impl TestVisitor {
-        fn new(root: TypePtr) -> Self {
-            Self {
-                primitive_visited: false,
-                struct_visited: false,
-                list_visited: false,
-                root_type: root,
-            }
-        }
-
-        fn get_field_by_name(&self, name: &str) -> TypePtr {
-            self.root_type
-                .get_fields()
-                .iter()
-                .find(|t| t.name() == name)
-                .cloned()
-                .unwrap()
-        }
-    }
-
-    #[test]
-    fn test_visitor() {
-        let message_type = "
-          message spark_schema {
-            REQUIRED INT32 a;
-            OPTIONAL group inner_schema {
-              REQUIRED INT32 b;
-              REQUIRED DOUBLE c;
-            }
-
-            OPTIONAL group e (LIST) {
-              REPEATED group list {
-                REQUIRED INT32 element;
-              }
-            }
-        ";
-
-        let parquet_type = Arc::new(parse_message_type(&message_type).unwrap());
-
-        let mut visitor = TestVisitor::new(parquet_type.clone());
-        for f in parquet_type.get_fields() {
-            let c = TestVisitorContext {};
-            assert!(visitor.dispatch(f.clone(), c).unwrap());
-        }
-
-        assert!(visitor.struct_visited);
-        assert!(visitor.primitive_visited);
-        assert!(visitor.list_visited);
-    }
-}
diff --git a/rust/parquet/src/util/bit_packing.rs b/rust/parquet/src/util/bit_packing.rs
deleted file mode 100644
index 6b9673f6c30..00000000000
--- a/rust/parquet/src/util/bit_packing.rs
+++ /dev/null
@@ -1,3662 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Unpack 32 values with bit width `num_bits` from `in_ptr`, and write to `out_ptr`.
-/// Return the `in_ptr` where the starting offset points to the first byte after all the
-/// bytes that were consumed.
-// TODO: may be better to make these more compact using if-else conditions.
-//  However, this may require const generics:
-//     https://github.com/rust-lang/rust/issues/44580
-//  to eliminate the branching cost.
-// TODO: we should use SIMD instructions to further optimize this. I have explored
-//    https://github.com/tantivy-search/bitpacking
-// but the layout it uses for SIMD is different from Parquet.
-// TODO: support packing as well, which is used for encoding.
-pub unsafe fn unpack32(
-    mut in_ptr: *const u32,
-    out_ptr: *mut u32,
-    num_bits: usize,
-) -> *const u32 {
-    in_ptr = match num_bits {
-        0 => nullunpacker32(in_ptr, out_ptr),
-        1 => unpack1_32(in_ptr, out_ptr),
-        2 => unpack2_32(in_ptr, out_ptr),
-        3 => unpack3_32(in_ptr, out_ptr),
-        4 => unpack4_32(in_ptr, out_ptr),
-        5 => unpack5_32(in_ptr, out_ptr),
-        6 => unpack6_32(in_ptr, out_ptr),
-        7 => unpack7_32(in_ptr, out_ptr),
-        8 => unpack8_32(in_ptr, out_ptr),
-        9 => unpack9_32(in_ptr, out_ptr),
-        10 => unpack10_32(in_ptr, out_ptr),
-        11 => unpack11_32(in_ptr, out_ptr),
-        12 => unpack12_32(in_ptr, out_ptr),
-        13 => unpack13_32(in_ptr, out_ptr),
-        14 => unpack14_32(in_ptr, out_ptr),
-        15 => unpack15_32(in_ptr, out_ptr),
-        16 => unpack16_32(in_ptr, out_ptr),
-        17 => unpack17_32(in_ptr, out_ptr),
-        18 => unpack18_32(in_ptr, out_ptr),
-        19 => unpack19_32(in_ptr, out_ptr),
-        20 => unpack20_32(in_ptr, out_ptr),
-        21 => unpack21_32(in_ptr, out_ptr),
-        22 => unpack22_32(in_ptr, out_ptr),
-        23 => unpack23_32(in_ptr, out_ptr),
-        24 => unpack24_32(in_ptr, out_ptr),
-        25 => unpack25_32(in_ptr, out_ptr),
-        26 => unpack26_32(in_ptr, out_ptr),
-        27 => unpack27_32(in_ptr, out_ptr),
-        28 => unpack28_32(in_ptr, out_ptr),
-        29 => unpack29_32(in_ptr, out_ptr),
-        30 => unpack30_32(in_ptr, out_ptr),
-        31 => unpack31_32(in_ptr, out_ptr),
-        32 => unpack32_32(in_ptr, out_ptr),
-        _ => unimplemented!(),
-    };
-    in_ptr
-}
-
-unsafe fn nullunpacker32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    for _ in 0..32 {
-        *out = 0;
-        out = out.offset(1);
-    }
-    in_buf
-}
-
-unsafe fn unpack1_32(in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 1) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 2) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 3) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 5) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 25) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 27) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 29) & 1;
-    out = out.offset(1);
-    *out = ((*in_buf) >> 30) & 1;
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack2_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 2) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 2);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-    *out = (*in_buf) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 2) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 2);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 2);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack3_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 3) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 27) % (1u32 << 3);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (3 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 25) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 3);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (3 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 5) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) % (1u32 << 3);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 3);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack4_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 4) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 4);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 4);
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack5_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 5) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 25) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (5 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 28) % (1u32 << 5);
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (5 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 26) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (5 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (5 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 5);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 5);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack6_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (6 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (6 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 6) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (6 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (6 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 6);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 6);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack7_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 7) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (7 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 24) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (7 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (7 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 23) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (7 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (7 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (7 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 7);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 7);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack8_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 8) % (1u32 << 8);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 8);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack9_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 9) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (9 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 22) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (9 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (9 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 21) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (9 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (9 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (9 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (9 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (9 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 9);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 9);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack10_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (10 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (10 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (10 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (10 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 10) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (10 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (10 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (10 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (10 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 10);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 10);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack11_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 11) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (11 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (11 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (11 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (11 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (11 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (11 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (11 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (11 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 19) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (11 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 11);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 20) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (11 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 11);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack12_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 12) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (12 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 12);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (12 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 12);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack13_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 13) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (13 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (13 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (13 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (13 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (13 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (13 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (13 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (13 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 17) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (13 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (13 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 13);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 18) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (13 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (13 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 13);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack14_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (14 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (14 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (14 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (14 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (14 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (14 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 14) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (14 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (14 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (14 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 14);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (14 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (14 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (14 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 14);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack15_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 15);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 15) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (15 - 13);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 13) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (15 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (15 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (15 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (15 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (15 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (15 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 15);
-    out = out.offset(1);
-    *out = ((*in_buf) >> 16) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (15 - 14);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 14) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (15 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (15 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (15 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (15 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (15 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (15 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 15);
-    out = out.offset(1);
-    *out = (*in_buf) >> 17;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack16_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-    out = out.offset(1);
-    in_buf = in_buf.offset(1);
-
-    *out = (*in_buf) % (1u32 << 16);
-    out = out.offset(1);
-    *out = (*in_buf) >> 16;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack17_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (17 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (17 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (17 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (17 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (17 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (17 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (17 - 14);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 14) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (17 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (17 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (17 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (17 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (17 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (17 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (17 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (17 - 13);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 13) % (1u32 << 17);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (17 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack18_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (18 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (18 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (18 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (18 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (18 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (18 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (18 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (18 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (18 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (18 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (18 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (18 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (18 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (18 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (18 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 18);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (18 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack19_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (19 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (19 - 12);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 12) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (19 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (19 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (19 - 11);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 11) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (19 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (19 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (19 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (19 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (19 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (19 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (19 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (19 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (19 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (19 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (19 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (19 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 19);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (19 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack20_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (20 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (20 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (20 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 20);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (20 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack21_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (21 - 10);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 10) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (21 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (21 - 9);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 9) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (21 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (21 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (21 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (21 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (21 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (21 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (21 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (21 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (21 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (21 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (21 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (21 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (21 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (21 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (21 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (21 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 21);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (21 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack22_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (22 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (22 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (22 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (22 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (22 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (22 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (22 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (22 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (22 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (22 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (22 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (22 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (22 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (22 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (22 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (22 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (22 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (22 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 22);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (22 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (22 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack23_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (23 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (23 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (23 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (23 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (23 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (23 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (23 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (23 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (23 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (23 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (23 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (23 - 7);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 7) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (23 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (23 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (23 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (23 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (23 - 8);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 8) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (23 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (23 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (23 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 23);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (23 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (23 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack24_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 24);
-    out = out.offset(1);
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (24 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (24 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack25_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (25 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (25 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (25 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (25 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (25 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (25 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (25 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (25 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (25 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (25 - 5);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 5) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (25 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (25 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (25 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (25 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (25 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (25 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (25 - 6);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 6) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (25 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (25 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (25 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (25 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 25);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (25 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (25 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (25 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack26_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (26 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (26 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (26 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (26 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (26 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (26 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (26 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (26 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (26 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (26 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (26 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (26 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (26 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (26 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (26 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (26 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (26 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (26 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (26 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (26 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 26);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (26 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (26 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (26 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (26 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack27_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (27 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (27 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (27 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (27 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (27 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (27 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (27 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (27 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (27 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (27 - 4);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 4) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (27 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (27 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (27 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (27 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (27 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (27 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (27 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (27 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (27 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (27 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (27 - 3);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 3) % (1u32 << 27);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 25)) << (27 - 25);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (27 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (27 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (27 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (27 - 5);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 5;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack28_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 28);
-    out = out.offset(1);
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (28 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (28 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (28 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (28 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (28 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (28 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack29_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 29);
-    out = out.offset(1);
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (29 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (29 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (29 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (29 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (29 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (29 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (29 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (29 - 5);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 5;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (29 - 2);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 2) % (1u32 << 29);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (29 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 25)) << (29 - 25);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (29 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (29 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (29 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (29 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (29 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (29 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (29 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (29 - 1);
-    out = out.offset(1);
-
-    *out = ((*in_buf) >> 1) % (1u32 << 29);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 27)) << (29 - 27);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (29 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (29 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (29 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (29 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (29 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (29 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (29 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (29 - 3);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 3;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack30_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 30);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (30 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (30 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (30 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (30 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (30 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (30 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (30 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (30 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (30 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (30 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (30 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (30 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (30 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (30 - 2);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 2;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = (*in_buf) % (1u32 << 30);
-    out = out.offset(1);
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (30 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (30 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (30 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (30 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (30 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (30 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (30 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (30 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (30 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (30 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (30 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (30 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (30 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (30 - 2);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 2;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack31_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = (*in_buf) % (1u32 << 31);
-    out = out.offset(1);
-    *out = (*in_buf) >> 31;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 30)) << (31 - 30);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 30;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 29)) << (31 - 29);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 29;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 28)) << (31 - 28);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 28;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 27)) << (31 - 27);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 27;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 26)) << (31 - 26);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 26;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 25)) << (31 - 25);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 25;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 24)) << (31 - 24);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 24;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 23)) << (31 - 23);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 23;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 22)) << (31 - 22);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 22;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 21)) << (31 - 21);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 21;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 20)) << (31 - 20);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 20;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 19)) << (31 - 19);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 19;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 18)) << (31 - 18);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 18;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 17)) << (31 - 17);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 17;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 16)) << (31 - 16);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 16;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 15)) << (31 - 15);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 15;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 14)) << (31 - 14);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 14;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 13)) << (31 - 13);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 13;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 12)) << (31 - 12);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 12;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 11)) << (31 - 11);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 11;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 10)) << (31 - 10);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 10;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 9)) << (31 - 9);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 9;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 8)) << (31 - 8);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 8;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 7)) << (31 - 7);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 7;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 6)) << (31 - 6);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 6;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 5)) << (31 - 5);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 5;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 4)) << (31 - 4);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 4;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 3)) << (31 - 3);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 3;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 2)) << (31 - 2);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 2;
-    in_buf = in_buf.offset(1);
-    *out |= ((*in_buf) % (1u32 << 1)) << (31 - 1);
-    out = out.offset(1);
-
-    *out = (*in_buf) >> 1;
-
-    in_buf.offset(1)
-}
-
-unsafe fn unpack32_32(mut in_buf: *const u32, mut out: *mut u32) -> *const u32 {
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-    in_buf = in_buf.offset(1);
-    out = out.offset(1);
-
-    *out = *in_buf;
-
-    in_buf.offset(1)
-}
diff --git a/rust/parquet/src/util/bit_util.rs b/rust/parquet/src/util/bit_util.rs
deleted file mode 100644
index 677b669287b..00000000000
--- a/rust/parquet/src/util/bit_util.rs
+++ /dev/null
@@ -1,1143 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cmp, mem::size_of};
-
-use crate::data_type::AsBytes;
-use crate::errors::{ParquetError, Result};
-use crate::util::{bit_packing::unpack32, memory::ByteBufferPtr};
-
-#[inline]
-pub fn from_ne_slice<T: FromBytes>(bs: &[u8]) -> T {
-    let mut b = T::Buffer::default();
-    {
-        let b = b.as_mut();
-        let bs = &bs[..b.len()];
-        b.copy_from_slice(bs);
-    }
-    T::from_ne_bytes(b)
-}
-
-pub trait FromBytes: Sized {
-    type Buffer: AsMut<[u8]> + Default;
-    fn from_le_bytes(bs: Self::Buffer) -> Self;
-    fn from_be_bytes(bs: Self::Buffer) -> Self;
-    fn from_ne_bytes(bs: Self::Buffer) -> Self;
-}
-
-macro_rules! from_le_bytes {
-    ($($ty: ty),*) => {
-        $(
-        impl FromBytes for $ty {
-            type Buffer = [u8; size_of::<Self>()];
-            fn from_le_bytes(bs: Self::Buffer) -> Self {
-                <$ty>::from_le_bytes(bs)
-            }
-            fn from_be_bytes(bs: Self::Buffer) -> Self {
-                <$ty>::from_be_bytes(bs)
-            }
-            fn from_ne_bytes(bs: Self::Buffer) -> Self {
-                <$ty>::from_ne_bytes(bs)
-            }
-        }
-        )*
-    };
-}
-
-impl FromBytes for bool {
-    type Buffer = [u8; 1];
-    fn from_le_bytes(bs: Self::Buffer) -> Self {
-        Self::from_ne_bytes(bs)
-    }
-    fn from_be_bytes(bs: Self::Buffer) -> Self {
-        Self::from_ne_bytes(bs)
-    }
-    fn from_ne_bytes(bs: Self::Buffer) -> Self {
-        match bs[0] {
-            0 => false,
-            1 => true,
-            _ => panic!("Invalid byte when reading bool"),
-        }
-    }
-}
-
-from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }
-
-/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
-/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
-/// compile.
-/// This is copied and modified from byteorder crate.
-macro_rules! read_num_bytes {
-    ($ty:ty, $size:expr, $src:expr) => {{
-        assert!($size <= $src.len());
-        let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
-        buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
-        <$ty>::from_ne_bytes(buffer)
-    }};
-}
-
-/// Converts value `val` of type `T` to a byte vector, by reading `num_bytes` from `val`.
-/// NOTE: if `val` is less than the size of `T` then it can be truncated.
-#[inline]
-pub fn convert_to_bytes<T>(val: &T, num_bytes: usize) -> Vec<u8>
-where
-    T: ?Sized + AsBytes,
-{
-    let mut bytes: Vec<u8> = vec![0; num_bytes];
-    memcpy_value(val.as_bytes(), num_bytes, &mut bytes);
-    bytes
-}
-
-#[inline]
-pub fn memcpy(source: &[u8], target: &mut [u8]) {
-    assert!(target.len() >= source.len());
-    target[..source.len()].copy_from_slice(source)
-}
-
-#[inline]
-pub fn memcpy_value<T>(source: &T, num_bytes: usize, target: &mut [u8])
-where
-    T: ?Sized + AsBytes,
-{
-    assert!(
-        target.len() >= num_bytes,
-        "Not enough space. Only had {} bytes but need to put {} bytes",
-        target.len(),
-        num_bytes
-    );
-    memcpy(&source.as_bytes()[..num_bytes], target)
-}
-
-/// Returns the ceil of value/divisor
-#[inline]
-pub fn ceil(value: i64, divisor: i64) -> i64 {
-    value / divisor + ((value % divisor != 0) as i64)
-}
-
-/// Returns ceil(log2(x))
-#[inline]
-pub fn log2(mut x: u64) -> i32 {
-    if x == 1 {
-        return 0;
-    }
-    x -= 1;
-    let mut result = 0;
-    while x > 0 {
-        x >>= 1;
-        result += 1;
-    }
-    result
-}
-
-/// Returns the `num_bits` least-significant bits of `v`
-#[inline]
-pub fn trailing_bits(v: u64, num_bits: usize) -> u64 {
-    if num_bits == 0 {
-        return 0;
-    }
-    if num_bits >= 64 {
-        return v;
-    }
-    let n = 64 - num_bits;
-    (v << n) >> n
-}
-
-#[inline]
-pub fn set_array_bit(bits: &mut [u8], i: usize) {
-    bits[i / 8] |= 1 << (i % 8);
-}
-
-#[inline]
-pub fn unset_array_bit(bits: &mut [u8], i: usize) {
-    bits[i / 8] &= !(1 << (i % 8));
-}
-
-/// Returns the minimum number of bits needed to represent the value 'x'
-#[inline]
-pub fn num_required_bits(x: u64) -> usize {
-    for i in (0..64).rev() {
-        if x & (1u64 << i) != 0 {
-            return i + 1;
-        }
-    }
-    0
-}
-
-static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
-
-/// Returns whether bit at position `i` in `data` is set or not
-#[inline]
-pub fn get_bit(data: &[u8], i: usize) -> bool {
-    (data[i >> 3] & BIT_MASK[i & 7]) != 0
-}
-
-/// Utility class for writing bit/byte streams. This class can write data in either
-/// bit packed or byte aligned fashion.
-pub struct BitWriter {
-    buffer: Vec<u8>,
-    max_bytes: usize,
-    buffered_values: u64,
-    byte_offset: usize,
-    bit_offset: usize,
-    start: usize,
-}
-
-impl BitWriter {
-    pub fn new(max_bytes: usize) -> Self {
-        Self {
-            buffer: vec![0; max_bytes],
-            max_bytes,
-            buffered_values: 0,
-            byte_offset: 0,
-            bit_offset: 0,
-            start: 0,
-        }
-    }
-
-    /// Initializes the writer from the existing buffer `buffer` and starting
-    /// offset `start`.
-    pub fn new_from_buf(buffer: Vec<u8>, start: usize) -> Self {
-        assert!(start < buffer.len());
-        let len = buffer.len();
-        Self {
-            buffer,
-            max_bytes: len,
-            buffered_values: 0,
-            byte_offset: start,
-            bit_offset: 0,
-            start,
-        }
-    }
-
-    /// Consumes and returns the current buffer.
-    #[inline]
-    pub fn consume(mut self) -> Vec<u8> {
-        self.flush();
-        self.buffer.truncate(self.byte_offset);
-        self.buffer
-    }
-
-    /// Flushes the internal buffered bits and returns the buffer's content.
-    /// This is a borrow equivalent of `consume` method.
-    #[inline]
-    pub fn flush_buffer(&mut self) -> &[u8] {
-        self.flush();
-        &self.buffer()[0..self.byte_offset]
-    }
-
-    /// Clears the internal state so the buffer can be reused.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.buffered_values = 0;
-        self.byte_offset = self.start;
-        self.bit_offset = 0;
-    }
-
-    /// Flushes the internal buffered bits and the align the buffer to the next byte.
-    #[inline]
-    pub fn flush(&mut self) {
-        let num_bytes = ceil(self.bit_offset as i64, 8) as usize;
-        assert!(self.byte_offset + num_bytes <= self.max_bytes);
-        memcpy_value(
-            &self.buffered_values,
-            num_bytes,
-            &mut self.buffer[self.byte_offset..],
-        );
-        self.buffered_values = 0;
-        self.bit_offset = 0;
-        self.byte_offset += num_bytes;
-    }
-
-    /// Advances the current offset by skipping `num_bytes`, flushing the internal bit
-    /// buffer first.
-    /// This is useful when you want to jump over `num_bytes` bytes and come back later
-    /// to fill these bytes.
-    ///
-    /// Returns error if `num_bytes` is beyond the boundary of the internal buffer.
-    /// Otherwise, returns the old offset.
-    #[inline]
-    pub fn skip(&mut self, num_bytes: usize) -> Result<usize> {
-        self.flush();
-        assert!(self.byte_offset <= self.max_bytes);
-        if self.byte_offset + num_bytes > self.max_bytes {
-            return Err(general_err!(
-                "Not enough bytes left in BitWriter. Need {} but only have {}",
-                self.byte_offset + num_bytes,
-                self.max_bytes
-            ));
-        }
-        let result = self.byte_offset;
-        self.byte_offset += num_bytes;
-        Ok(result)
-    }
-
-    /// Returns a slice containing the next `num_bytes` bytes starting from the current
-    /// offset, and advances the underlying buffer by `num_bytes`.
-    /// This is useful when you want to jump over `num_bytes` bytes and come back later
-    /// to fill these bytes.
-    #[inline]
-    pub fn get_next_byte_ptr(&mut self, num_bytes: usize) -> Result<&mut [u8]> {
-        let offset = self.skip(num_bytes)?;
-        Ok(&mut self.buffer[offset..offset + num_bytes])
-    }
-
-    #[inline]
-    pub fn bytes_written(&self) -> usize {
-        self.byte_offset - self.start + ceil(self.bit_offset as i64, 8) as usize
-    }
-
-    #[inline]
-    pub fn buffer(&self) -> &[u8] {
-        &self.buffer[self.start..]
-    }
-
-    #[inline]
-    pub fn byte_offset(&self) -> usize {
-        self.byte_offset
-    }
-
-    /// Returns the internal buffer length. This is the maximum number of bytes that this
-    /// writer can write. User needs to call `consume` to consume the current buffer
-    /// before more data can be written.
-    #[inline]
-    pub fn buffer_len(&self) -> usize {
-        self.max_bytes
-    }
-
-    pub fn write_at(&mut self, offset: usize, value: u8) {
-        self.buffer[offset] = value;
-    }
-
-    /// Writes the `num_bits` LSB of value `v` to the internal buffer of this writer.
-    /// The `num_bits` must not be greater than 64. This is bit packed.
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_value(&mut self, v: u64, num_bits: usize) -> bool {
-        assert!(num_bits <= 64);
-        assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); // covers case v >> 64
-
-        if self.byte_offset * 8 + self.bit_offset + num_bits > self.max_bytes as usize * 8
-        {
-            return false;
-        }
-
-        self.buffered_values |= v << self.bit_offset;
-        self.bit_offset += num_bits;
-        if self.bit_offset >= 64 {
-            memcpy_value(
-                &self.buffered_values,
-                8,
-                &mut self.buffer[self.byte_offset..],
-            );
-            self.byte_offset += 8;
-            self.bit_offset -= 64;
-            self.buffered_values = 0;
-            // Perform checked right shift: v >> offset, where offset < 64, otherwise we
-            // shift all bits
-            self.buffered_values = v
-                .checked_shr((num_bits - self.bit_offset) as u32)
-                .unwrap_or(0);
-        }
-        assert!(self.bit_offset < 64);
-        true
-    }
-
-    /// Writes `val` of `num_bytes` bytes to the next aligned byte. If size of `T` is
-    /// larger than `num_bytes`, extra higher ordered bytes will be ignored.
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_aligned<T: AsBytes>(&mut self, val: T, num_bytes: usize) -> bool {
-        let result = self.get_next_byte_ptr(num_bytes);
-        if result.is_err() {
-            // TODO: should we return `Result` for this func?
-            return false;
-        }
-        let mut ptr = result.unwrap();
-        memcpy_value(&val, num_bytes, &mut ptr);
-        true
-    }
-
-    /// Writes `val` of `num_bytes` bytes at the designated `offset`. The `offset` is the
-    /// offset starting from the beginning of the internal buffer that this writer
-    /// maintains. Note that this will overwrite any existing data between `offset` and
-    /// `offset + num_bytes`. Also that if size of `T` is larger than `num_bytes`, extra
-    /// higher ordered bytes will be ignored.
-    ///
-    /// Returns false if there's not enough room left, or the `pos` is not valid.
-    /// True otherwise.
-    #[inline]
-    pub fn put_aligned_offset<T: AsBytes>(
-        &mut self,
-        val: T,
-        num_bytes: usize,
-        offset: usize,
-    ) -> bool {
-        if num_bytes + offset > self.max_bytes {
-            return false;
-        }
-        memcpy_value(
-            &val,
-            num_bytes,
-            &mut self.buffer[offset..offset + num_bytes],
-        );
-        true
-    }
-
-    /// Writes a VLQ encoded integer `v` to this buffer. The value is byte aligned.
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_vlq_int(&mut self, mut v: u64) -> bool {
-        let mut result = true;
-        while v & 0xFFFFFFFFFFFFFF80 != 0 {
-            result &= self.put_aligned::<u8>(((v & 0x7F) | 0x80) as u8, 1);
-            v >>= 7;
-        }
-        result &= self.put_aligned::<u8>((v & 0x7F) as u8, 1);
-        result
-    }
-
-    /// Writes a zigzag-VLQ encoded (in little endian order) int `v` to this buffer.
-    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive
-    /// numbers are encoded in a zigzag fashion.
-    /// See: https://developers.google.com/protocol-buffers/docs/encoding
-    ///
-    /// Returns false if there's not enough room left. True otherwise.
-    #[inline]
-    pub fn put_zigzag_vlq_int(&mut self, v: i64) -> bool {
-        let u: u64 = ((v << 1) ^ (v >> 63)) as u64;
-        self.put_vlq_int(u)
-    }
-}
-
-/// Maximum byte length for a VLQ encoded integer
-/// MAX_VLQ_BYTE_LEN = 5 for i32, and MAX_VLQ_BYTE_LEN = 10 for i64
-pub const MAX_VLQ_BYTE_LEN: usize = 10;
-
-pub struct BitReader {
-    // The byte buffer to read from, passed in by client
-    buffer: ByteBufferPtr,
-
-    // Bytes are memcpy'd from `buffer` and values are read from this variable.
-    // This is faster than reading values byte by byte directly from `buffer`
-    buffered_values: u64,
-
-    //
-    // End                                         Start
-    // |............|B|B|B|B|B|B|B|B|..............|
-    //                   ^          ^
-    //                 bit_offset   byte_offset
-    //
-    // Current byte offset in `buffer`
-    byte_offset: usize,
-
-    // Current bit offset in `buffered_values`
-    bit_offset: usize,
-
-    // Total number of bytes in `buffer`
-    total_bytes: usize,
-}
-
-/// Utility class to read bit/byte stream. This class can read bits or bytes that are
-/// either byte aligned or not.
-impl BitReader {
-    pub fn new(buffer: ByteBufferPtr) -> Self {
-        let total_bytes = buffer.len();
-        let num_bytes = cmp::min(8, total_bytes);
-        let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
-        BitReader {
-            buffer,
-            buffered_values,
-            byte_offset: 0,
-            bit_offset: 0,
-            total_bytes,
-        }
-    }
-
-    pub fn reset(&mut self, buffer: ByteBufferPtr) {
-        self.buffer = buffer;
-        self.total_bytes = self.buffer.len();
-        let num_bytes = cmp::min(8, self.total_bytes);
-        self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
-        self.byte_offset = 0;
-        self.bit_offset = 0;
-    }
-
-    /// Gets the current byte offset
-    #[inline]
-    pub fn get_byte_offset(&self) -> usize {
-        self.byte_offset + ceil(self.bit_offset as i64, 8) as usize
-    }
-
-    /// Reads a value of type `T` and of size `num_bits`.
-    ///
-    /// Returns `None` if there's not enough data available. `Some` otherwise.
-    pub fn get_value<T: FromBytes>(&mut self, num_bits: usize) -> Option<T> {
-        assert!(num_bits <= 64);
-        assert!(num_bits <= size_of::<T>() * 8);
-
-        if self.byte_offset * 8 + self.bit_offset + num_bits > self.total_bytes * 8 {
-            return None;
-        }
-
-        let mut v = trailing_bits(self.buffered_values, self.bit_offset + num_bits)
-            >> self.bit_offset;
-        self.bit_offset += num_bits;
-
-        if self.bit_offset >= 64 {
-            self.byte_offset += 8;
-            self.bit_offset -= 64;
-
-            self.reload_buffer_values();
-            v |= trailing_bits(self.buffered_values, self.bit_offset)
-                .wrapping_shl((num_bits - self.bit_offset) as u32);
-        }
-
-        // TODO: better to avoid copying here
-        Some(from_ne_slice(v.as_bytes()))
-    }
-
-    pub fn get_batch<T: FromBytes>(&mut self, batch: &mut [T], num_bits: usize) -> usize {
-        assert!(num_bits <= 32);
-        assert!(num_bits <= size_of::<T>() * 8);
-
-        let mut values_to_read = batch.len();
-        let needed_bits = num_bits * values_to_read;
-        let remaining_bits = (self.total_bytes - self.byte_offset) * 8 - self.bit_offset;
-        if remaining_bits < needed_bits {
-            values_to_read = remaining_bits / num_bits;
-        }
-
-        let mut i = 0;
-
-        // First align bit offset to byte offset
-        if self.bit_offset != 0 {
-            while i < values_to_read && self.bit_offset != 0 {
-                batch[i] = self
-                    .get_value(num_bits)
-                    .expect("expected to have more data");
-                i += 1;
-            }
-        }
-
-        unsafe {
-            let in_buf = &self.buffer.data()[self.byte_offset..];
-            let mut in_ptr = in_buf as *const [u8] as *const u8 as *const u32;
-            // FIXME assert!(memory::is_ptr_aligned(in_ptr));
-            if size_of::<T>() == 4 {
-                while values_to_read - i >= 32 {
-                    let out_ptr = &mut batch[i..] as *mut [T] as *mut T as *mut u32;
-                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
-                    self.byte_offset += 4 * num_bits;
-                    i += 32;
-                }
-            } else {
-                let mut out_buf = [0u32; 32];
-                let out_ptr = &mut out_buf as &mut [u32] as *mut [u32] as *mut u32;
-                while values_to_read - i >= 32 {
-                    in_ptr = unpack32(in_ptr, out_ptr, num_bits);
-                    self.byte_offset += 4 * num_bits;
-                    for n in 0..32 {
-                        // We need to copy from smaller size to bigger size to avoid
-                        // overwriting other memory regions.
-                        if size_of::<T>() > size_of::<u32>() {
-                            std::ptr::copy_nonoverlapping(
-                                out_buf[n..].as_ptr() as *const u32,
-                                &mut batch[i] as *mut T as *mut u32,
-                                1,
-                            );
-                        } else {
-                            std::ptr::copy_nonoverlapping(
-                                out_buf[n..].as_ptr() as *const T,
-                                &mut batch[i] as *mut T,
-                                1,
-                            );
-                        }
-                        i += 1;
-                    }
-                }
-            }
-        }
-
-        assert!(values_to_read - i < 32);
-
-        self.reload_buffer_values();
-        while i < values_to_read {
-            batch[i] = self
-                .get_value(num_bits)
-                .expect("expected to have more data");
-            i += 1;
-        }
-
-        values_to_read
-    }
-
-    /// Reads a `num_bytes`-sized value from this buffer and return it.
-    /// `T` needs to be a little-endian native type. The value is assumed to be byte
-    /// aligned so the bit reader will be advanced to the start of the next byte before
-    /// reading the value.
-
-    /// Returns `Some` if there's enough bytes left to form a value of `T`.
-    /// Otherwise `None`.
-    pub fn get_aligned<T: FromBytes>(&mut self, num_bytes: usize) -> Option<T> {
-        let bytes_read = ceil(self.bit_offset as i64, 8) as usize;
-        if self.byte_offset + bytes_read + num_bytes > self.total_bytes {
-            return None;
-        }
-
-        // Advance byte_offset to next unread byte and read num_bytes
-        self.byte_offset += bytes_read;
-        let v = read_num_bytes!(
-            T,
-            num_bytes,
-            self.buffer.start_from(self.byte_offset).as_ref()
-        );
-        self.byte_offset += num_bytes;
-
-        // Reset buffered_values
-        self.bit_offset = 0;
-        self.reload_buffer_values();
-        Some(v)
-    }
-
-    /// Reads a VLQ encoded (in little endian order) int from the stream.
-    /// The encoded int must start at the beginning of a byte.
-    ///
-    /// Returns `None` if there's not enough bytes in the stream. `Some` otherwise.
-    pub fn get_vlq_int(&mut self) -> Option<i64> {
-        let mut shift = 0;
-        let mut v: i64 = 0;
-        while let Some(byte) = self.get_aligned::<u8>(1) {
-            v |= ((byte & 0x7F) as i64) << shift;
-            shift += 7;
-            assert!(
-                shift <= MAX_VLQ_BYTE_LEN * 7,
-                "Num of bytes exceed MAX_VLQ_BYTE_LEN ({})",
-                MAX_VLQ_BYTE_LEN
-            );
-            if byte & 0x80 == 0 {
-                return Some(v);
-            }
-        }
-        None
-    }
-
-    /// Reads a zigzag-VLQ encoded (in little endian order) int from the stream
-    /// Zigzag-VLQ is a variant of VLQ encoding where negative and positive numbers are
-    /// encoded in a zigzag fashion.
-    /// See: https://developers.google.com/protocol-buffers/docs/encoding
-    ///
-    /// Note: the encoded int must start at the beginning of a byte.
-    ///
-    /// Returns `None` if the number of bytes there's not enough bytes in the stream.
-    /// `Some` otherwise.
-    #[inline]
-    pub fn get_zigzag_vlq_int(&mut self) -> Option<i64> {
-        self.get_vlq_int().map(|v| {
-            let u = v as u64;
-            (u >> 1) as i64 ^ -((u & 1) as i64)
-        })
-    }
-
-    fn reload_buffer_values(&mut self) {
-        let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
-        self.buffered_values = read_num_bytes!(
-            u64,
-            bytes_to_read,
-            self.buffer.start_from(self.byte_offset).as_ref()
-        );
-    }
-}
-
-impl From<Vec<u8>> for BitReader {
-    #[inline]
-    fn from(buffer: Vec<u8>) -> Self {
-        BitReader::new(ByteBufferPtr::new(buffer))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::super::test_common::*;
-    use super::*;
-
-    use rand::distributions::{Distribution, Standard};
-    use std::fmt::Debug;
-
-    #[test]
-    fn test_ceil() {
-        assert_eq!(ceil(0, 1), 0);
-        assert_eq!(ceil(1, 1), 1);
-        assert_eq!(ceil(1, 2), 1);
-        assert_eq!(ceil(1, 8), 1);
-        assert_eq!(ceil(7, 8), 1);
-        assert_eq!(ceil(8, 8), 1);
-        assert_eq!(ceil(9, 8), 2);
-        assert_eq!(ceil(9, 9), 1);
-        assert_eq!(ceil(10000000000, 10), 1000000000);
-        assert_eq!(ceil(10, 10000000000), 1);
-        assert_eq!(ceil(10000000000, 1000000000), 10);
-    }
-
-    #[test]
-    fn test_bit_reader_get_byte_offset() {
-        let buffer = vec![255; 10];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_byte_offset(), 0); // offset (0 bytes, 0 bits)
-        bit_reader.get_value::<i32>(6);
-        assert_eq!(bit_reader.get_byte_offset(), 1); // offset (0 bytes, 6 bits)
-        bit_reader.get_value::<i32>(10);
-        assert_eq!(bit_reader.get_byte_offset(), 2); // offset (0 bytes, 16 bits)
-        bit_reader.get_value::<i32>(20);
-        assert_eq!(bit_reader.get_byte_offset(), 5); // offset (0 bytes, 36 bits)
-        bit_reader.get_value::<i32>(30);
-        assert_eq!(bit_reader.get_byte_offset(), 9); // offset (8 bytes, 2 bits)
-    }
-
-    #[test]
-    fn test_bit_reader_get_value() {
-        let buffer = vec![255, 0];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_value::<i32>(1), Some(1));
-        assert_eq!(bit_reader.get_value::<i32>(2), Some(3));
-        assert_eq!(bit_reader.get_value::<i32>(3), Some(7));
-        assert_eq!(bit_reader.get_value::<i32>(4), Some(3));
-    }
-
-    #[test]
-    fn test_bit_reader_get_value_boundary() {
-        let buffer = vec![10, 0, 0, 0, 20, 0, 30, 0, 0, 0, 40, 0];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_value::<i64>(32), Some(10));
-        assert_eq!(bit_reader.get_value::<i64>(16), Some(20));
-        assert_eq!(bit_reader.get_value::<i64>(32), Some(30));
-        assert_eq!(bit_reader.get_value::<i64>(16), Some(40));
-    }
-
-    #[test]
-    fn test_bit_reader_get_aligned() {
-        // 01110101 11001011
-        let buffer = ByteBufferPtr::new(vec![0x75, 0xCB]);
-        let mut bit_reader = BitReader::new(buffer.all());
-        assert_eq!(bit_reader.get_value::<i32>(3), Some(5));
-        assert_eq!(bit_reader.get_aligned::<i32>(1), Some(203));
-        assert_eq!(bit_reader.get_value::<i32>(1), None);
-        bit_reader.reset(buffer.all());
-        assert_eq!(bit_reader.get_aligned::<i32>(3), None);
-    }
-
-    #[test]
-    fn test_bit_reader_get_vlq_int() {
-        // 10001001 00000001 11110010 10110101 00000110
-        let buffer: Vec<u8> = vec![0x89, 0x01, 0xF2, 0xB5, 0x06];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_vlq_int(), Some(137));
-        assert_eq!(bit_reader.get_vlq_int(), Some(105202));
-    }
-
-    #[test]
-    fn test_bit_reader_get_zigzag_vlq_int() {
-        let buffer: Vec<u8> = vec![0, 1, 2, 3];
-        let mut bit_reader = BitReader::from(buffer);
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(0));
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-1));
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(1));
-        assert_eq!(bit_reader.get_zigzag_vlq_int(), Some(-2));
-    }
-
-    #[test]
-    fn test_set_array_bit() {
-        let mut buffer = vec![0, 0, 0];
-        set_array_bit(&mut buffer[..], 1);
-        assert_eq!(buffer, vec![2, 0, 0]);
-        set_array_bit(&mut buffer[..], 4);
-        assert_eq!(buffer, vec![18, 0, 0]);
-        unset_array_bit(&mut buffer[..], 1);
-        assert_eq!(buffer, vec![16, 0, 0]);
-        set_array_bit(&mut buffer[..], 10);
-        assert_eq!(buffer, vec![16, 4, 0]);
-        set_array_bit(&mut buffer[..], 10);
-        assert_eq!(buffer, vec![16, 4, 0]);
-        set_array_bit(&mut buffer[..], 11);
-        assert_eq!(buffer, vec![16, 12, 0]);
-        unset_array_bit(&mut buffer[..], 10);
-        assert_eq!(buffer, vec![16, 8, 0]);
-    }
-
-    #[test]
-    fn test_num_required_bits() {
-        assert_eq!(num_required_bits(0), 0);
-        assert_eq!(num_required_bits(1), 1);
-        assert_eq!(num_required_bits(2), 2);
-        assert_eq!(num_required_bits(4), 3);
-        assert_eq!(num_required_bits(8), 4);
-        assert_eq!(num_required_bits(10), 4);
-        assert_eq!(num_required_bits(12), 4);
-        assert_eq!(num_required_bits(16), 5);
-    }
-
-    #[test]
-    fn test_get_bit() {
-        // 00001101
-        assert_eq!(true, get_bit(&[0b00001101], 0));
-        assert_eq!(false, get_bit(&[0b00001101], 1));
-        assert_eq!(true, get_bit(&[0b00001101], 2));
-        assert_eq!(true, get_bit(&[0b00001101], 3));
-
-        // 01001001 01010010
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 0));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 1));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 2));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 3));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 4));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 5));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 6));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 7));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 8));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 9));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 10));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 11));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 12));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 13));
-        assert_eq!(true, get_bit(&[0b01001001, 0b01010010], 14));
-        assert_eq!(false, get_bit(&[0b01001001, 0b01010010], 15));
-    }
-
-    #[test]
-    fn test_log2() {
-        assert_eq!(log2(1), 0);
-        assert_eq!(log2(2), 1);
-        assert_eq!(log2(3), 2);
-        assert_eq!(log2(4), 2);
-        assert_eq!(log2(5), 3);
-        assert_eq!(log2(5), 3);
-        assert_eq!(log2(6), 3);
-        assert_eq!(log2(7), 3);
-        assert_eq!(log2(8), 3);
-        assert_eq!(log2(9), 4);
-    }
-
-    #[test]
-    fn test_skip() {
-        let mut writer = BitWriter::new(5);
-        let old_offset = writer.skip(1).expect("skip() should return OK");
-        writer.put_aligned(42, 4);
-        writer.put_aligned_offset(0x10, 1, old_offset);
-        let result = writer.consume();
-        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
-
-        writer = BitWriter::new(4);
-        let result = writer.skip(5);
-        assert!(result.is_err());
-    }
-
-    #[test]
-    fn test_get_next_byte_ptr() {
-        let mut writer = BitWriter::new(5);
-        {
-            let first_byte = writer
-                .get_next_byte_ptr(1)
-                .expect("get_next_byte_ptr() should return OK");
-            first_byte[0] = 0x10;
-        }
-        writer.put_aligned(42, 4);
-        let result = writer.consume();
-        assert_eq!(result.as_ref(), [0x10, 42, 0, 0, 0]);
-    }
-
-    #[test]
-    fn test_consume_flush_buffer() {
-        let mut writer1 = BitWriter::new(3);
-        let mut writer2 = BitWriter::new(3);
-        for i in 1..10 {
-            writer1.put_value(i, 4);
-            writer2.put_value(i, 4);
-        }
-        let res1 = writer1.flush_buffer();
-        let res2 = writer2.consume();
-        assert_eq!(res1, &res2[..]);
-    }
-
-    #[test]
-    fn test_put_get_bool() {
-        let len = 8;
-        let mut writer = BitWriter::new(len);
-
-        for i in 0..8 {
-            let result = writer.put_value(i % 2, 1);
-            assert!(result);
-        }
-
-        writer.flush();
-        {
-            let buffer = writer.buffer();
-            assert_eq!(buffer[0], 0b10101010);
-        }
-
-        // Write 00110011
-        for i in 0..8 {
-            let result = match i {
-                0 | 1 | 4 | 5 => writer.put_value(false as u64, 1),
-                _ => writer.put_value(true as u64, 1),
-            };
-            assert!(result);
-        }
-        writer.flush();
-        {
-            let buffer = writer.buffer();
-            assert_eq!(buffer[0], 0b10101010);
-            assert_eq!(buffer[1], 0b11001100);
-        }
-
-        let mut reader = BitReader::from(writer.consume());
-
-        for i in 0..8 {
-            let val = reader
-                .get_value::<u8>(1)
-                .expect("get_value() should return OK");
-            assert_eq!(val, i % 2);
-        }
-
-        for i in 0..8 {
-            let val = reader
-                .get_value::<bool>(1)
-                .expect("get_value() should return OK");
-            match i {
-                0 | 1 | 4 | 5 => assert_eq!(val, false),
-                _ => assert_eq!(val, true),
-            }
-        }
-    }
-
-    #[test]
-    fn test_put_value_roundtrip() {
-        test_put_value_rand_numbers(32, 2);
-        test_put_value_rand_numbers(32, 3);
-        test_put_value_rand_numbers(32, 4);
-        test_put_value_rand_numbers(32, 5);
-        test_put_value_rand_numbers(32, 6);
-        test_put_value_rand_numbers(32, 7);
-        test_put_value_rand_numbers(32, 8);
-        test_put_value_rand_numbers(64, 16);
-        test_put_value_rand_numbers(64, 24);
-        test_put_value_rand_numbers(64, 32);
-    }
-
-    fn test_put_value_rand_numbers(total: usize, num_bits: usize) {
-        assert!(num_bits < 64);
-        let num_bytes = ceil(num_bits as i64, 8);
-        let mut writer = BitWriter::new(num_bytes as usize * total);
-        let values: Vec<u64> = random_numbers::<u64>(total)
-            .iter()
-            .map(|v| v & ((1 << num_bits) - 1))
-            .collect();
-        (0..total).for_each(|i| {
-            assert!(
-                writer.put_value(values[i] as u64, num_bits),
-                "[{}]: put_value() failed",
-                i
-            );
-        });
-
-        let mut reader = BitReader::from(writer.consume());
-        (0..total).for_each(|i| {
-            let v = reader
-                .get_value::<u64>(num_bits)
-                .expect("get_value() should return OK");
-            assert_eq!(
-                v, values[i],
-                "[{}]: expected {} but got {}",
-                i, values[i], v
-            );
-        });
-    }
-
-    #[test]
-    fn test_get_batch() {
-        const SIZE: &[usize] = &[1, 31, 32, 33, 128, 129];
-        for s in SIZE {
-            for i in 0..33 {
-                match i {
-                    0..=8 => test_get_batch_helper::<u8>(*s, i),
-                    9..=16 => test_get_batch_helper::<u16>(*s, i),
-                    _ => test_get_batch_helper::<u32>(*s, i),
-                }
-            }
-        }
-    }
-
-    fn test_get_batch_helper<T>(total: usize, num_bits: usize)
-    where
-        T: FromBytes + Default + Clone + Debug + Eq,
-    {
-        assert!(num_bits <= 32);
-        let num_bytes = ceil(num_bits as i64, 8);
-        let mut writer = BitWriter::new(num_bytes as usize * total);
-
-        let values: Vec<u32> = random_numbers::<u32>(total)
-            .iter()
-            .map(|v| v & ((1u64 << num_bits) - 1) as u32)
-            .collect();
-
-        // Generic values used to check against actual values read from `get_batch`.
-        let expected_values: Vec<T> =
-            values.iter().map(|v| from_ne_slice(v.as_bytes())).collect();
-
-        (0..total).for_each(|i| {
-            assert!(writer.put_value(values[i] as u64, num_bits));
-        });
-
-        let buf = writer.consume();
-        let mut reader = BitReader::from(buf);
-        let mut batch = vec![T::default(); values.len()];
-        let values_read = reader.get_batch::<T>(&mut batch, num_bits);
-        assert_eq!(values_read, values.len());
-        for i in 0..batch.len() {
-            assert_eq!(
-                batch[i], expected_values[i],
-                "num_bits = {}, index = {}",
-                num_bits, i
-            );
-        }
-    }
-
-    #[test]
-    fn test_put_aligned_roundtrip() {
-        test_put_aligned_rand_numbers::<u8>(4, 3);
-        test_put_aligned_rand_numbers::<u8>(16, 5);
-        test_put_aligned_rand_numbers::<i16>(32, 7);
-        test_put_aligned_rand_numbers::<i16>(32, 9);
-        test_put_aligned_rand_numbers::<i32>(32, 11);
-        test_put_aligned_rand_numbers::<i32>(32, 13);
-        test_put_aligned_rand_numbers::<i64>(32, 17);
-        test_put_aligned_rand_numbers::<i64>(32, 23);
-    }
-
-    fn test_put_aligned_rand_numbers<T>(total: usize, num_bits: usize)
-    where
-        T: Copy + FromBytes + AsBytes + Debug + PartialEq,
-        Standard: Distribution<T>,
-    {
-        assert!(num_bits <= 32);
-        assert!(total % 2 == 0);
-
-        let aligned_value_byte_width = std::mem::size_of::<T>();
-        let value_byte_width = ceil(num_bits as i64, 8) as usize;
-        let mut writer =
-            BitWriter::new((total / 2) * (aligned_value_byte_width + value_byte_width));
-        let values: Vec<u32> = random_numbers::<u32>(total / 2)
-            .iter()
-            .map(|v| v & ((1 << num_bits) - 1))
-            .collect();
-        let aligned_values = random_numbers::<T>(total / 2);
-
-        for i in 0..total {
-            let j = i / 2;
-            if i % 2 == 0 {
-                assert!(
-                    writer.put_value(values[j] as u64, num_bits),
-                    "[{}]: put_value() failed",
-                    i
-                );
-            } else {
-                assert!(
-                    writer.put_aligned::<T>(aligned_values[j], aligned_value_byte_width),
-                    "[{}]: put_aligned() failed",
-                    i
-                );
-            }
-        }
-
-        let mut reader = BitReader::from(writer.consume());
-        for i in 0..total {
-            let j = i / 2;
-            if i % 2 == 0 {
-                let v = reader
-                    .get_value::<u64>(num_bits)
-                    .expect("get_value() should return OK");
-                assert_eq!(
-                    v, values[j] as u64,
-                    "[{}]: expected {} but got {}",
-                    i, values[j], v
-                );
-            } else {
-                let v = reader
-                    .get_aligned::<T>(aligned_value_byte_width)
-                    .expect("get_aligned() should return OK");
-                assert_eq!(
-                    v, aligned_values[j],
-                    "[{}]: expected {:?} but got {:?}",
-                    i, aligned_values[j], v
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_put_vlq_int() {
-        let total = 64;
-        let mut writer = BitWriter::new(total * 32);
-        let values = random_numbers::<u32>(total);
-        (0..total).for_each(|i| {
-            assert!(
-                writer.put_vlq_int(values[i] as u64),
-                "[{}]; put_vlq_int() failed",
-                i
-            );
-        });
-
-        let mut reader = BitReader::from(writer.consume());
-        (0..total).for_each(|i| {
-            let v = reader
-                .get_vlq_int()
-                .expect("get_vlq_int() should return OK");
-            assert_eq!(
-                v as u32, values[i],
-                "[{}]: expected {} but got {}",
-                i, values[i], v
-            );
-        });
-    }
-
-    #[test]
-    fn test_put_zigzag_vlq_int() {
-        let total = 64;
-        let mut writer = BitWriter::new(total * 32);
-        let values = random_numbers::<i32>(total);
-        (0..total).for_each(|i| {
-            assert!(
-                writer.put_zigzag_vlq_int(values[i] as i64),
-                "[{}]; put_zigzag_vlq_int() failed",
-                i
-            );
-        });
-
-        let mut reader = BitReader::from(writer.consume());
-        (0..total).for_each(|i| {
-            let v = reader
-                .get_zigzag_vlq_int()
-                .expect("get_zigzag_vlq_int() should return OK");
-            assert_eq!(
-                v as i32, values[i],
-                "[{}]: expected {} but got {}",
-                i, values[i], v
-            );
-        });
-    }
-}
diff --git a/rust/parquet/src/util/cursor.rs b/rust/parquet/src/util/cursor.rs
deleted file mode 100644
index bce8383767c..00000000000
--- a/rust/parquet/src/util/cursor.rs
+++ /dev/null
@@ -1,260 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::{self, Cursor, Error, ErrorKind, Read, Seek, SeekFrom, Write};
-use std::sync::{Arc, Mutex};
-use std::{cmp, fmt};
-
-use crate::file::writer::TryClone;
-
-/// This is object to use if your file is already in memory.
-/// The sliceable cursor is similar to std::io::Cursor, except that it makes it easy to create "cursor slices".
-/// To achieve this, it uses Arc instead of shared references. Indeed reference fields are painful
-/// because the lack of Generic Associated Type implies that you would require complex lifetime propagation when
-/// returning such a cursor.
-#[allow(clippy::rc_buffer)]
-pub struct SliceableCursor {
-    inner: Arc<Vec<u8>>,
-    start: u64,
-    length: usize,
-    pos: u64,
-}
-
-impl fmt::Debug for SliceableCursor {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("SliceableCursor")
-            .field("start", &self.start)
-            .field("length", &self.length)
-            .field("pos", &self.pos)
-            .field("inner.len", &self.inner.len())
-            .finish()
-    }
-}
-
-impl SliceableCursor {
-    pub fn new(content: Vec<u8>) -> Self {
-        let size = content.len();
-        SliceableCursor {
-            inner: Arc::new(content),
-            start: 0,
-            pos: 0,
-            length: size,
-        }
-    }
-
-    /// Create a slice cursor using the same data as a current one.
-    pub fn slice(&self, start: u64, length: usize) -> io::Result<Self> {
-        let new_start = self.start + start;
-        if new_start >= self.inner.len() as u64
-            || new_start as usize + length > self.inner.len()
-        {
-            return Err(Error::new(ErrorKind::InvalidInput, "out of bound"));
-        }
-        Ok(SliceableCursor {
-            inner: Arc::clone(&self.inner),
-            start: new_start,
-            pos: new_start,
-            length,
-        })
-    }
-
-    fn remaining_slice(&self) -> &[u8] {
-        let end = self.start as usize + self.length;
-        let offset = cmp::min(self.pos, end as u64) as usize;
-        &self.inner[offset..end]
-    }
-
-    /// Get the length of the current cursor slice
-    pub fn len(&self) -> u64 {
-        self.length as u64
-    }
-
-    /// return true if the cursor is empty (self.len() == 0)
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-/// Implementation inspired by std::io::Cursor
-impl Read for SliceableCursor {
-    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
-        let n = Read::read(&mut self.remaining_slice(), buf)?;
-        self.pos += n as u64;
-        Ok(n)
-    }
-}
-
-impl Seek for SliceableCursor {
-    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
-        let new_pos = match pos {
-            SeekFrom::Start(pos) => pos as i64,
-            SeekFrom::End(pos) => self.inner.len() as i64 + pos as i64,
-            SeekFrom::Current(pos) => self.pos as i64 + pos as i64,
-        };
-
-        if new_pos < 0 {
-            Err(Error::new(
-                ErrorKind::InvalidInput,
-                format!(
-                    "Request out of bounds: cur position {} + seek {:?} < 0: {}",
-                    self.pos, pos, new_pos
-                ),
-            ))
-        } else if new_pos >= self.inner.len() as i64 {
-            Err(Error::new(
-                ErrorKind::InvalidInput,
-                format!(
-                    "Request out of bounds: cur position {} + seek {:?} >= length {}: {}",
-                    self.pos,
-                    pos,
-                    self.inner.len(),
-                    new_pos
-                ),
-            ))
-        } else {
-            self.pos = new_pos as u64;
-            Ok(self.start)
-        }
-    }
-}
-
-/// Use this type to write Parquet to memory rather than a file.
-#[derive(Debug, Default, Clone)]
-pub struct InMemoryWriteableCursor {
-    buffer: Arc<Mutex<Cursor<Vec<u8>>>>,
-}
-
-impl InMemoryWriteableCursor {
-    /// Consume this instance and return the underlying buffer as long as there are no other
-    /// references to this instance.
-    pub fn into_inner(self) -> Option<Vec<u8>> {
-        Arc::try_unwrap(self.buffer)
-            .ok()
-            .and_then(|mutex| mutex.into_inner().ok())
-            .map(|cursor| cursor.into_inner())
-    }
-
-    /// Returns a clone of the underlying buffer
-    pub fn data(&self) -> Vec<u8> {
-        let inner = self.buffer.lock().unwrap();
-        inner.get_ref().to_vec()
-    }
-}
-
-impl TryClone for InMemoryWriteableCursor {
-    fn try_clone(&self) -> std::io::Result<Self> {
-        Ok(Self {
-            buffer: self.buffer.clone(),
-        })
-    }
-}
-
-impl Write for InMemoryWriteableCursor {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        let mut inner = self.buffer.lock().unwrap();
-        inner.write(buf)
-    }
-
-    fn flush(&mut self) -> std::io::Result<()> {
-        let mut inner = self.buffer.lock().unwrap();
-        inner.flush()
-    }
-}
-
-impl Seek for InMemoryWriteableCursor {
-    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
-        let mut inner = self.buffer.lock().unwrap();
-        inner.seek(pos)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    /// Create a SliceableCursor of all u8 values in ascending order
-    fn get_u8_range() -> SliceableCursor {
-        let data: Vec<u8> = (0u8..=255).collect();
-        SliceableCursor::new(data)
-    }
-
-    /// Reads all the bytes in the slice and checks that it matches the u8 range from start to end_included
-    fn check_read_all(mut cursor: SliceableCursor, start: u8, end_included: u8) {
-        let mut target = vec![];
-        let cursor_res = cursor.read_to_end(&mut target);
-        println!("{:?}", cursor_res);
-        assert!(!cursor_res.is_err(), "reading error");
-        assert_eq!((end_included - start) as usize + 1, cursor_res.unwrap());
-        assert_eq!((start..=end_included).collect::<Vec<_>>(), target);
-    }
-
-    #[test]
-    fn read_all_whole() {
-        let cursor = get_u8_range();
-        check_read_all(cursor, 0, 255);
-    }
-
-    #[test]
-    fn read_all_slice() {
-        let cursor = get_u8_range().slice(10, 10).expect("error while slicing");
-        check_read_all(cursor, 10, 19);
-    }
-
-    #[test]
-    fn seek_cursor_start() {
-        let mut cursor = get_u8_range();
-
-        cursor.seek(SeekFrom::Start(5)).unwrap();
-        check_read_all(cursor, 5, 255);
-    }
-
-    #[test]
-    fn seek_cursor_current() {
-        let mut cursor = get_u8_range();
-        cursor.seek(SeekFrom::Start(10)).unwrap();
-        cursor.seek(SeekFrom::Current(10)).unwrap();
-        check_read_all(cursor, 20, 255);
-    }
-
-    #[test]
-    fn seek_cursor_end() {
-        let mut cursor = get_u8_range();
-
-        cursor.seek(SeekFrom::End(-10)).unwrap();
-        check_read_all(cursor, 246, 255);
-    }
-
-    #[test]
-    fn seek_cursor_error_too_long() {
-        let mut cursor = get_u8_range();
-        let res = cursor.seek(SeekFrom::Start(1000));
-        let actual_error = res.expect_err("expected error").to_string();
-        let expected_error =
-            "Request out of bounds: cur position 0 + seek Start(1000) >= length 256: 1000";
-        assert_eq!(actual_error, expected_error);
-    }
-
-    #[test]
-    fn seek_cursor_error_too_short() {
-        let mut cursor = get_u8_range();
-        let res = cursor.seek(SeekFrom::End(-1000));
-        let actual_error = res.expect_err("expected error").to_string();
-        let expected_error =
-            "Request out of bounds: cur position 0 + seek End(-1000) < 0: -744";
-        assert_eq!(actual_error, expected_error);
-    }
-}
diff --git a/rust/parquet/src/util/hash_util.rs b/rust/parquet/src/util/hash_util.rs
deleted file mode 100644
index f7849da41a0..00000000000
--- a/rust/parquet/src/util/hash_util.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::data_type::AsBytes;
-
-/// Computes hash value for `data`, with a seed value `seed`.
-/// The data type `T` must implement the `AsBytes` trait.
-pub fn hash<T: AsBytes>(data: &T, seed: u32) -> u32 {
-    hash_(data.as_bytes(), seed)
-}
-
-fn hash_(data: &[u8], seed: u32) -> u32 {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    unsafe {
-        if is_x86_feature_detected!("sse4.2") {
-            crc32_hash(data, seed)
-        } else {
-            murmur_hash2_64a(data, seed as u64) as u32
-        }
-    }
-
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
-    unsafe {
-        murmur_hash2_64a(data, seed as u64) as u32
-    }
-}
-
-const MURMUR_PRIME: u64 = 0xc6a4a7935bd1e995;
-const MURMUR_R: i32 = 47;
-
-/// Rust implementation of MurmurHash2, 64-bit version for 64-bit platforms
-///
-/// SAFTETY Only safe on platforms which support unaligned loads (like x86_64)
-unsafe fn murmur_hash2_64a(data_bytes: &[u8], seed: u64) -> u64 {
-    let len = data_bytes.len();
-    let len_64 = (len / 8) * 8;
-    let data_bytes_64 = std::slice::from_raw_parts(
-        &data_bytes[0..len_64] as *const [u8] as *const u64,
-        len / 8,
-    );
-
-    let mut h = seed ^ (MURMUR_PRIME.wrapping_mul(data_bytes.len() as u64));
-    for v in data_bytes_64 {
-        let mut k = *v;
-        k = k.wrapping_mul(MURMUR_PRIME);
-        k ^= k >> MURMUR_R;
-        k = k.wrapping_mul(MURMUR_PRIME);
-        h ^= k;
-        h = h.wrapping_mul(MURMUR_PRIME);
-    }
-
-    let data2 = &data_bytes[len_64..];
-
-    let v = len & 7;
-    if v == 7 {
-        h ^= (data2[6] as u64) << 48;
-    }
-    if v >= 6 {
-        h ^= (data2[5] as u64) << 40;
-    }
-    if v >= 5 {
-        h ^= (data2[4] as u64) << 32;
-    }
-    if v >= 4 {
-        h ^= (data2[3] as u64) << 24;
-    }
-    if v >= 3 {
-        h ^= (data2[2] as u64) << 16;
-    }
-    if v >= 2 {
-        h ^= (data2[1] as u64) << 8;
-    }
-    if v >= 1 {
-        h ^= data2[0] as u64;
-    }
-    if v > 0 {
-        h = h.wrapping_mul(MURMUR_PRIME);
-    }
-
-    h ^= h >> MURMUR_R;
-    h = h.wrapping_mul(MURMUR_PRIME);
-    h ^= h >> MURMUR_R;
-    h
-}
-
-/// CRC32 hash implementation using SSE4 instructions. Borrowed from Impala.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[target_feature(enable = "sse4.2")]
-unsafe fn crc32_hash(bytes: &[u8], seed: u32) -> u32 {
-    #[cfg(target_arch = "x86")]
-    use std::arch::x86::*;
-    #[cfg(target_arch = "x86_64")]
-    use std::arch::x86_64::*;
-
-    let u32_num_bytes = std::mem::size_of::<u32>();
-    let mut num_bytes = bytes.len();
-    let num_words = num_bytes / u32_num_bytes;
-    num_bytes %= u32_num_bytes;
-
-    let bytes_u32: &[u32] = std::slice::from_raw_parts(
-        &bytes[0..num_words * u32_num_bytes] as *const [u8] as *const u32,
-        num_words,
-    );
-
-    let mut offset = 0;
-    let mut hash = seed;
-    while offset < num_words {
-        hash = _mm_crc32_u32(hash, bytes_u32[offset]);
-        offset += 1;
-    }
-
-    offset = num_words * u32_num_bytes;
-    while offset < num_bytes {
-        hash = _mm_crc32_u8(hash, bytes[offset]);
-        offset += 1;
-    }
-
-    // The lower half of the CRC hash has poor uniformity, so swap the halves
-    // for anyone who only uses the first several bits of the hash.
-    hash = (hash << 16) | (hash >> 16);
-    hash
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_murmur2_64a() {
-        unsafe {
-            let result = murmur_hash2_64a(b"hello", 123);
-            assert_eq!(result, 2597646618390559622);
-
-            let result = murmur_hash2_64a(b"helloworld", 123);
-            assert_eq!(result, 4934371746140206573);
-
-            let result = murmur_hash2_64a(b"helloworldparquet", 123);
-            assert_eq!(result, 2392198230801491746);
-        }
-    }
-
-    #[test]
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    fn test_crc32() {
-        if is_x86_feature_detected!("sse4.2") {
-            unsafe {
-                let result = crc32_hash(b"hello", 123);
-                assert_eq!(result, 2927487359);
-
-                let result = crc32_hash(b"helloworld", 123);
-                assert_eq!(result, 314229527);
-
-                let result = crc32_hash(b"helloworldparquet", 123);
-                assert_eq!(result, 667078870);
-            }
-        }
-    }
-}
diff --git a/rust/parquet/src/util/io.rs b/rust/parquet/src/util/io.rs
deleted file mode 100644
index 44e99ac0a77..00000000000
--- a/rust/parquet/src/util/io.rs
+++ /dev/null
@@ -1,329 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{cell::RefCell, cmp, fmt, io::*};
-
-use crate::file::{reader::Length, writer::ParquetWriter};
-
-const DEFAULT_BUF_SIZE: usize = 8 * 1024;
-
-// ----------------------------------------------------------------------
-
-/// TryClone tries to clone the type and should maintain the `Seek` position of the given
-/// instance.
-pub trait TryClone: Sized {
-    /// Clones the type returning a new instance or an error if it's not possible
-    /// to clone it.
-    fn try_clone(&self) -> Result<Self>;
-}
-
-/// ParquetReader is the interface which needs to be fulfilled to be able to parse a
-/// parquet source.
-pub trait ParquetReader: Read + Seek + Length + TryClone {}
-impl<T: Read + Seek + Length + TryClone> ParquetReader for T {}
-
-// Read/Write wrappers for `File`.
-
-/// Position trait returns the current position in the stream.
-/// Should be viewed as a lighter version of `Seek` that does not allow seek operations,
-/// and does not require mutable reference for the current position.
-pub trait Position {
-    /// Returns position in the stream.
-    fn pos(&self) -> u64;
-}
-
-/// Struct that represents a slice of a file data with independent start position and
-/// length. Internally clones provided file handle, wraps with a custom implementation
-/// of BufReader that resets position before any read.
-///
-/// This is workaround and alternative for `file.try_clone()` method. It clones `File`
-/// while preserving independent position, which is not available with `try_clone()`.
-///
-/// Designed after `arrow::io::RandomAccessFile` and `std::io::BufReader`
-pub struct FileSource<R: ParquetReader> {
-    reader: RefCell<R>,
-    start: u64,     // start position in a file
-    end: u64,       // end position in a file
-    buf: Vec<u8>,   // buffer where bytes read in advance are stored
-    buf_pos: usize, // current position of the reader in the buffer
-    buf_cap: usize, // current number of bytes read into the buffer
-}
-
-impl<R: ParquetReader> fmt::Debug for FileSource<R> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("FileSource")
-            .field("reader", &"OPAQUE")
-            .field("start", &self.start)
-            .field("end", &self.end)
-            .field("buf.len", &self.buf.len())
-            .field("buf_pos", &self.buf_pos)
-            .field("buf_cap", &self.buf_cap)
-            .finish()
-    }
-}
-
-impl<R: ParquetReader> FileSource<R> {
-    /// Creates new file reader with start and length from a file handle
-    pub fn new(fd: &R, start: u64, length: usize) -> Self {
-        let reader = RefCell::new(fd.try_clone().unwrap());
-        Self {
-            reader,
-            start,
-            end: start + length as u64,
-            buf: vec![0_u8; DEFAULT_BUF_SIZE],
-            buf_pos: 0,
-            buf_cap: 0,
-        }
-    }
-
-    fn fill_inner_buf(&mut self) -> Result<&[u8]> {
-        if self.buf_pos >= self.buf_cap {
-            // If we've reached the end of our internal buffer then we need to fetch
-            // some more data from the underlying reader.
-            // Branch using `>=` instead of the more correct `==`
-            // to tell the compiler that the pos..cap slice is always valid.
-            debug_assert!(self.buf_pos == self.buf_cap);
-            let mut reader = self.reader.borrow_mut();
-            reader.seek(SeekFrom::Start(self.start))?; // always seek to start before reading
-            self.buf_cap = reader.read(&mut self.buf)?;
-            self.buf_pos = 0;
-        }
-        Ok(&self.buf[self.buf_pos..self.buf_cap])
-    }
-
-    fn skip_inner_buf(&mut self, buf: &mut [u8]) -> Result<usize> {
-        // discard buffer
-        self.buf_pos = 0;
-        self.buf_cap = 0;
-        // read directly into param buffer
-        let mut reader = self.reader.borrow_mut();
-        reader.seek(SeekFrom::Start(self.start))?; // always seek to start before reading
-        let nread = reader.read(buf)?;
-        self.start += nread as u64;
-        Ok(nread)
-    }
-}
-
-impl<R: ParquetReader> Read for FileSource<R> {
-    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
-        let bytes_to_read = cmp::min(buf.len(), (self.end - self.start) as usize);
-        let buf = &mut buf[0..bytes_to_read];
-
-        // If we don't have any buffered data and we're doing a massive read
-        // (larger than our internal buffer), bypass our internal buffer
-        // entirely.
-        if self.buf_pos == self.buf_cap && buf.len() >= self.buf.len() {
-            return self.skip_inner_buf(buf);
-        }
-        let nread = {
-            let mut rem = self.fill_inner_buf()?;
-            // copy the data from the inner buffer to the param buffer
-            rem.read(buf)?
-        };
-        // consume from buffer
-        self.buf_pos = cmp::min(self.buf_pos + nread, self.buf_cap);
-
-        self.start += nread as u64;
-        Ok(nread)
-    }
-}
-
-impl<R: ParquetReader> Position for FileSource<R> {
-    fn pos(&self) -> u64 {
-        self.start
-    }
-}
-
-impl<R: ParquetReader> Length for FileSource<R> {
-    fn len(&self) -> u64 {
-        self.end - self.start
-    }
-}
-
-/// Struct that represents `File` output stream with position tracking.
-/// Used as a sink in file writer.
-pub struct FileSink<W: ParquetWriter> {
-    buf: BufWriter<W>,
-    // This is not necessarily position in the underlying file,
-    // but rather current position in the sink.
-    pos: u64,
-}
-
-impl<W: ParquetWriter> FileSink<W> {
-    /// Creates new file sink.
-    /// Position is set to whatever position file has.
-    pub fn new(buf: &W) -> Self {
-        let mut owned_buf = buf.try_clone().unwrap();
-        let pos = owned_buf.seek(SeekFrom::Current(0)).unwrap();
-        Self {
-            buf: BufWriter::new(owned_buf),
-            pos,
-        }
-    }
-}
-
-impl<W: ParquetWriter> Write for FileSink<W> {
-    fn write(&mut self, buf: &[u8]) -> Result<usize> {
-        let num_bytes = self.buf.write(buf)?;
-        self.pos += num_bytes as u64;
-        Ok(num_bytes)
-    }
-
-    fn flush(&mut self) -> Result<()> {
-        self.buf.flush()
-    }
-}
-
-impl<W: ParquetWriter> Position for FileSink<W> {
-    fn pos(&self) -> u64 {
-        self.pos
-    }
-}
-
-// Position implementation for Cursor to use in various tests.
-impl<'a> Position for Cursor<&'a mut Vec<u8>> {
-    fn pos(&self) -> u64 {
-        self.position()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::iter;
-
-    use crate::util::test_common::{get_temp_file, get_test_file};
-
-    #[test]
-    fn test_io_read_fully() {
-        let mut buf = vec![0; 8];
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        let bytes_read = src.read(&mut buf[..]).unwrap();
-        assert_eq!(bytes_read, 4);
-        assert_eq!(buf, vec![b'P', b'A', b'R', b'1', 0, 0, 0, 0]);
-    }
-
-    #[test]
-    fn test_io_read_in_chunks() {
-        let mut buf = vec![0; 4];
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        let bytes_read = src.read(&mut buf[0..2]).unwrap();
-        assert_eq!(bytes_read, 2);
-        let bytes_read = src.read(&mut buf[2..]).unwrap();
-        assert_eq!(bytes_read, 2);
-        assert_eq!(buf, vec![b'P', b'A', b'R', b'1']);
-    }
-
-    #[test]
-    fn test_io_read_pos() {
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        let _ = src.read(&mut [0; 1]).unwrap();
-        assert_eq!(src.pos(), 1);
-
-        let _ = src.read(&mut [0; 4]).unwrap();
-        assert_eq!(src.pos(), 4);
-    }
-
-    #[test]
-    fn test_io_read_over_limit() {
-        let mut src = FileSource::new(&get_test_file("alltypes_plain.parquet"), 0, 4);
-
-        // Read all bytes from source
-        let _ = src.read(&mut [0; 128]).unwrap();
-        assert_eq!(src.pos(), 4);
-
-        // Try reading again, should return 0 bytes.
-        let bytes_read = src.read(&mut [0; 128]).unwrap();
-        assert_eq!(bytes_read, 0);
-        assert_eq!(src.pos(), 4);
-    }
-
-    #[test]
-    fn test_io_seek_switch() {
-        let mut buf = vec![0; 4];
-        let mut file = get_test_file("alltypes_plain.parquet");
-        let mut src = FileSource::new(&file, 0, 4);
-
-        file.seek(SeekFrom::Start(5_u64))
-            .expect("File seek to a position");
-
-        let bytes_read = src.read(&mut buf[..]).unwrap();
-        assert_eq!(bytes_read, 4);
-        assert_eq!(buf, vec![b'P', b'A', b'R', b'1']);
-    }
-
-    #[test]
-    fn test_io_write_with_pos() {
-        let mut file = get_temp_file("file_sink_test", &[b'a', b'b', b'c']);
-        file.seek(SeekFrom::Current(3)).unwrap();
-
-        // Write into sink
-        let mut sink = FileSink::new(&file);
-        assert_eq!(sink.pos(), 3);
-
-        sink.write_all(&[b'd', b'e', b'f', b'g']).unwrap();
-        assert_eq!(sink.pos(), 7);
-
-        sink.flush().unwrap();
-        assert_eq!(sink.pos(), file.seek(SeekFrom::Current(0)).unwrap());
-
-        // Read data using file chunk
-        let mut res = vec![0u8; 7];
-        let mut chunk =
-            FileSource::new(&file, 0, file.metadata().unwrap().len() as usize);
-        chunk.read_exact(&mut res[..]).unwrap();
-        assert_eq!(res, vec![b'a', b'b', b'c', b'd', b'e', b'f', b'g']);
-    }
-
-    #[test]
-    fn test_io_large_read() {
-        // Generate repeated 'abcdef' pattern and write it into a file
-        let patterned_data: Vec<u8> = iter::repeat(vec![0, 1, 2, 3, 4, 5])
-            .flatten()
-            .take(3 * DEFAULT_BUF_SIZE)
-            .collect();
-        // always use different temp files as test might be run in parallel
-        let mut file = get_temp_file("large_file_sink_test", &patterned_data);
-
-        // seek the underlying file to the first 'd'
-        file.seek(SeekFrom::Start(3)).unwrap();
-
-        // create the FileSource reader that starts at pos 1 ('b')
-        let mut chunk = FileSource::new(&file, 1, patterned_data.len() - 1);
-
-        // read the 'b' at pos 1
-        let mut res = vec![0u8; 1];
-        chunk.read_exact(&mut res).unwrap();
-        assert_eq!(res, &[1]);
-
-        // the underlying file is sought to 'e'
-        file.seek(SeekFrom::Start(4)).unwrap();
-
-        // now read large chunk that starts with 'c' (after 'b')
-        let mut res = vec![0u8; 2 * DEFAULT_BUF_SIZE];
-        chunk.read_exact(&mut res).unwrap();
-        assert_eq!(
-            res,
-            &patterned_data[2..2 + 2 * DEFAULT_BUF_SIZE],
-            "read buf and original data are not equal"
-        );
-    }
-}
diff --git a/rust/parquet/src/util/memory.rs b/rust/parquet/src/util/memory.rs
deleted file mode 100644
index 57d0c243fe6..00000000000
--- a/rust/parquet/src/util/memory.rs
+++ /dev/null
@@ -1,532 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utility methods and structs for working with memory.
-
-use std::{
-    fmt::{Debug, Display, Formatter, Result as FmtResult},
-    io::{Result as IoResult, Write},
-    mem,
-    ops::{Index, IndexMut},
-    sync::{
-        atomic::{AtomicI64, Ordering},
-        Arc, Weak,
-    },
-};
-
-// ----------------------------------------------------------------------
-// Memory Tracker classes
-
-/// Reference counted pointer for [`MemTracker`].
-pub type MemTrackerPtr = Arc<MemTracker>;
-/// Non-owning reference for [`MemTracker`].
-pub type WeakMemTrackerPtr = Weak<MemTracker>;
-
-/// Struct to track memory usage information.
-#[derive(Debug)]
-pub struct MemTracker {
-    // In the tuple, the first element is the current memory allocated (in bytes),
-    // and the second element is the maximum memory allocated so far (in bytes).
-    current_memory_usage: AtomicI64,
-    max_memory_usage: AtomicI64,
-}
-
-impl MemTracker {
-    /// Creates new memory tracker.
-    #[inline]
-    pub fn new() -> MemTracker {
-        MemTracker {
-            current_memory_usage: Default::default(),
-            max_memory_usage: Default::default(),
-        }
-    }
-
-    /// Returns the current memory consumption, in bytes.
-    pub fn memory_usage(&self) -> i64 {
-        self.current_memory_usage.load(Ordering::Acquire)
-    }
-
-    /// Returns the maximum memory consumption so far, in bytes.
-    pub fn max_memory_usage(&self) -> i64 {
-        self.max_memory_usage.load(Ordering::Acquire)
-    }
-
-    /// Adds `num_bytes` to the memory consumption tracked by this memory tracker.
-    #[inline]
-    pub fn alloc(&self, num_bytes: i64) {
-        let new_current = self
-            .current_memory_usage
-            .fetch_add(num_bytes, Ordering::Acquire)
-            + num_bytes;
-        self.max_memory_usage
-            .fetch_max(new_current, Ordering::Acquire);
-    }
-}
-
-// ----------------------------------------------------------------------
-// Buffer classes
-
-/// Type alias for [`Buffer`].
-pub type ByteBuffer = Buffer<u8>;
-/// Type alias for [`BufferPtr`].
-pub type ByteBufferPtr = BufferPtr<u8>;
-
-/// A resize-able buffer class with generic member, with optional memory tracker.
-///
-/// Note that a buffer has two attributes:
-/// `capacity` and `size`: the former is the total number of space reserved for
-/// the buffer, while the latter is the actual number of elements.
-/// Invariant: `capacity` >= `size`.
-/// The total allocated bytes for a buffer equals to `capacity * sizeof<T>()`.
-pub struct Buffer<T: Clone> {
-    data: Vec<T>,
-    mem_tracker: Option<MemTrackerPtr>,
-    type_length: usize,
-}
-
-impl<T: Clone> Buffer<T> {
-    /// Creates new empty buffer.
-    pub fn new() -> Self {
-        Buffer {
-            data: vec![],
-            mem_tracker: None,
-            type_length: std::mem::size_of::<T>(),
-        }
-    }
-
-    /// Adds [`MemTracker`] for this buffer.
-    #[inline]
-    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
-        mc.alloc((self.data.capacity() * self.type_length) as i64);
-        self.mem_tracker = Some(mc);
-        self
-    }
-
-    /// Returns slice of data in this buffer.
-    #[inline]
-    pub fn data(&self) -> &[T] {
-        self.data.as_slice()
-    }
-
-    /// Sets data for this buffer.
-    #[inline]
-    pub fn set_data(&mut self, new_data: Vec<T>) {
-        if let Some(ref mc) = self.mem_tracker {
-            let capacity_diff = new_data.capacity() as i64 - self.data.capacity() as i64;
-            mc.alloc(capacity_diff * self.type_length as i64);
-        }
-        self.data = new_data;
-    }
-
-    /// Resizes underlying data in place to a new length `new_size`.
-    ///
-    /// If `new_size` is less than current length, data is truncated, otherwise, it is
-    /// extended to `new_size` with provided default value `init_value`.
-    ///
-    /// Memory tracker is also updated, if available.
-    #[inline]
-    pub fn resize(&mut self, new_size: usize, init_value: T) {
-        let old_capacity = self.data.capacity();
-        self.data.resize(new_size, init_value);
-        if let Some(ref mc) = self.mem_tracker {
-            let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
-            mc.alloc(capacity_diff * self.type_length as i64);
-        }
-    }
-
-    /// Clears underlying data.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.data.clear()
-    }
-
-    /// Reserves capacity `additional_capacity` for underlying data vector.
-    ///
-    /// Memory tracker is also updated, if available.
-    #[inline]
-    pub fn reserve(&mut self, additional_capacity: usize) {
-        let old_capacity = self.data.capacity();
-        self.data.reserve(additional_capacity);
-        if self.data.capacity() > old_capacity {
-            if let Some(ref mc) = self.mem_tracker {
-                let capacity_diff = self.data.capacity() as i64 - old_capacity as i64;
-                mc.alloc(capacity_diff * self.type_length as i64);
-            }
-        }
-    }
-
-    /// Returns [`BufferPtr`] with buffer data.
-    /// Buffer data is reset.
-    #[inline]
-    pub fn consume(&mut self) -> BufferPtr<T> {
-        let old_data = mem::replace(&mut self.data, vec![]);
-        let mut result = BufferPtr::new(old_data);
-        if let Some(ref mc) = self.mem_tracker {
-            result = result.with_mem_tracker(mc.clone());
-        }
-        result
-    }
-
-    /// Adds `value` to the buffer.
-    #[inline]
-    pub fn push(&mut self, value: T) {
-        self.data.push(value)
-    }
-
-    /// Returns current capacity for the buffer.
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.data.capacity()
-    }
-
-    /// Returns current size for the buffer.
-    #[inline]
-    pub fn size(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns `true` if memory tracker is added to buffer, `false` otherwise.
-    #[inline]
-    pub fn is_mem_tracked(&self) -> bool {
-        self.mem_tracker.is_some()
-    }
-
-    /// Returns memory tracker associated with this buffer.
-    /// This may panic, if memory tracker is not set, use method above to check if
-    /// memory tracker is available.
-    #[inline]
-    pub fn mem_tracker(&self) -> &MemTrackerPtr {
-        self.mem_tracker.as_ref().unwrap()
-    }
-}
-
-impl<T: Sized + Clone> Index<usize> for Buffer<T> {
-    type Output = T;
-
-    fn index(&self, index: usize) -> &T {
-        &self.data[index]
-    }
-}
-
-impl<T: Sized + Clone> IndexMut<usize> for Buffer<T> {
-    fn index_mut(&mut self, index: usize) -> &mut T {
-        &mut self.data[index]
-    }
-}
-
-// TODO: implement this for other types
-impl Write for Buffer<u8> {
-    #[inline]
-    fn write(&mut self, buf: &[u8]) -> IoResult<usize> {
-        let old_capacity = self.data.capacity();
-        let bytes_written = self.data.write(buf)?;
-        if let Some(ref mc) = self.mem_tracker {
-            if self.data.capacity() - old_capacity > 0 {
-                mc.alloc((self.data.capacity() - old_capacity) as i64)
-            }
-        }
-        Ok(bytes_written)
-    }
-
-    fn flush(&mut self) -> IoResult<()> {
-        // No-op
-        self.data.flush()
-    }
-}
-
-impl AsRef<[u8]> for Buffer<u8> {
-    fn as_ref(&self) -> &[u8] {
-        self.data.as_slice()
-    }
-}
-
-impl<T: Clone> Drop for Buffer<T> {
-    #[inline]
-    fn drop(&mut self) {
-        if let Some(ref mc) = self.mem_tracker {
-            mc.alloc(-((self.data.capacity() * self.type_length) as i64));
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// Immutable Buffer (BufferPtr) classes
-
-/// An representation of a slice on a reference-counting and read-only byte array.
-/// Sub-slices can be further created from this. The byte array will be released
-/// when all slices are dropped.
-#[allow(clippy::rc_buffer)]
-#[derive(Clone, Debug)]
-pub struct BufferPtr<T> {
-    data: Arc<Vec<T>>,
-    start: usize,
-    len: usize,
-    // TODO: will this create too many references? rethink about this.
-    mem_tracker: Option<MemTrackerPtr>,
-}
-
-impl<T> BufferPtr<T> {
-    /// Creates new buffer from a vector.
-    pub fn new(v: Vec<T>) -> Self {
-        let len = v.len();
-        Self {
-            data: Arc::new(v),
-            start: 0,
-            len,
-            mem_tracker: None,
-        }
-    }
-
-    /// Returns slice of data in this buffer.
-    pub fn data(&self) -> &[T] {
-        &self.data[self.start..self.start + self.len]
-    }
-
-    /// Updates this buffer with new `start` position and length `len`.
-    ///
-    /// Range should be within current start position and length.
-    pub fn with_range(mut self, start: usize, len: usize) -> Self {
-        assert!(start <= self.len);
-        assert!(start + len <= self.len);
-        self.start = start;
-        self.len = len;
-        self
-    }
-
-    /// Adds memory tracker to this buffer.
-    pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
-        self.mem_tracker = Some(mc);
-        self
-    }
-
-    /// Returns start position of this buffer.
-    pub fn start(&self) -> usize {
-        self.start
-    }
-
-    /// Returns length of this buffer
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether this buffer is empty
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns `true` if this buffer has memory tracker, `false` otherwise.
-    pub fn is_mem_tracked(&self) -> bool {
-        self.mem_tracker.is_some()
-    }
-
-    /// Returns a shallow copy of the buffer.
-    /// Reference counted pointer to the data is copied.
-    pub fn all(&self) -> BufferPtr<T> {
-        BufferPtr {
-            data: self.data.clone(),
-            start: self.start,
-            len: self.len,
-            mem_tracker: self.mem_tracker.as_ref().cloned(),
-        }
-    }
-
-    /// Returns a shallow copy of the buffer that starts with `start` position.
-    pub fn start_from(&self, start: usize) -> BufferPtr<T> {
-        assert!(start <= self.len);
-        BufferPtr {
-            data: self.data.clone(),
-            start: self.start + start,
-            len: self.len - start,
-            mem_tracker: self.mem_tracker.as_ref().cloned(),
-        }
-    }
-
-    /// Returns a shallow copy that is a range slice within this buffer.
-    pub fn range(&self, start: usize, len: usize) -> BufferPtr<T> {
-        assert!(start + len <= self.len);
-        BufferPtr {
-            data: self.data.clone(),
-            start: self.start + start,
-            len,
-            mem_tracker: self.mem_tracker.as_ref().cloned(),
-        }
-    }
-}
-
-impl<T: Sized> Index<usize> for BufferPtr<T> {
-    type Output = T;
-
-    fn index(&self, index: usize) -> &T {
-        assert!(index < self.len);
-        &self.data[self.start + index]
-    }
-}
-
-impl<T: Debug> Display for BufferPtr<T> {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{:?}", self.data)
-    }
-}
-
-impl<T> Drop for BufferPtr<T> {
-    fn drop(&mut self) {
-        if let Some(ref mc) = self.mem_tracker {
-            if Arc::strong_count(&self.data) == 1 && Arc::weak_count(&self.data) == 0 {
-                mc.alloc(-(self.data.capacity() as i64));
-            }
-        }
-    }
-}
-
-impl AsRef<[u8]> for BufferPtr<u8> {
-    fn as_ref(&self) -> &[u8] {
-        &self.data[self.start..self.start + self.len]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_byte_buffer_mem_tracker() {
-        let mem_tracker = Arc::new(MemTracker::new());
-
-        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
-        buffer.set_data(vec![0; 10]);
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-        buffer.set_data(vec![0; 20]);
-        let capacity = buffer.capacity() as i64;
-        assert_eq!(mem_tracker.memory_usage(), capacity);
-
-        let max_capacity = {
-            let mut buffer2 = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
-            buffer2.reserve(30);
-            assert_eq!(
-                mem_tracker.memory_usage(),
-                buffer2.capacity() as i64 + capacity
-            );
-            buffer2.set_data(vec![0; 100]);
-            assert_eq!(
-                mem_tracker.memory_usage(),
-                buffer2.capacity() as i64 + capacity
-            );
-            buffer2.capacity() as i64 + capacity
-        };
-
-        assert_eq!(mem_tracker.memory_usage(), capacity);
-        assert_eq!(mem_tracker.max_memory_usage(), max_capacity);
-
-        buffer.reserve(40);
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-
-        buffer.consume();
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-    }
-
-    #[test]
-    fn test_byte_ptr_mem_tracker() {
-        let mem_tracker = Arc::new(MemTracker::new());
-
-        let mut buffer = ByteBuffer::new().with_mem_tracker(mem_tracker.clone());
-        buffer.set_data(vec![0; 60]);
-
-        {
-            let buffer_capacity = buffer.capacity() as i64;
-            let buf_ptr = buffer.consume();
-            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-            {
-                let buf_ptr1 = buf_ptr.all();
-                {
-                    let _ = buf_ptr.start_from(20);
-                    assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-                }
-                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-                let _ = buf_ptr1.range(30, 20);
-                assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-            }
-            assert_eq!(mem_tracker.memory_usage(), buffer_capacity);
-        }
-        assert_eq!(mem_tracker.memory_usage(), buffer.capacity() as i64);
-    }
-
-    #[test]
-    fn test_byte_buffer() {
-        let mut buffer = ByteBuffer::new();
-        assert_eq!(buffer.size(), 0);
-        assert_eq!(buffer.capacity(), 0);
-
-        let mut buffer2 = ByteBuffer::new();
-        buffer2.reserve(40);
-        assert_eq!(buffer2.size(), 0);
-        assert_eq!(buffer2.capacity(), 40);
-
-        buffer.set_data((0..5).collect());
-        assert_eq!(buffer.size(), 5);
-        assert_eq!(buffer[4], 4);
-
-        buffer.set_data((0..20).collect());
-        assert_eq!(buffer.size(), 20);
-        assert_eq!(buffer[10], 10);
-
-        let expected: Vec<u8> = (0..20).collect();
-        {
-            let data = buffer.data();
-            assert_eq!(data, expected.as_slice());
-        }
-
-        buffer.reserve(40);
-        assert!(buffer.capacity() >= 40);
-
-        let byte_ptr = buffer.consume();
-        assert_eq!(buffer.size(), 0);
-        assert_eq!(byte_ptr.as_ref(), expected.as_slice());
-
-        let values: Vec<u8> = (0..30).collect();
-        let _ = buffer.write(values.as_slice());
-        let _ = buffer.flush();
-
-        assert_eq!(buffer.data(), values.as_slice());
-    }
-
-    #[test]
-    fn test_byte_ptr() {
-        let values = (0..50).collect();
-        let ptr = ByteBufferPtr::new(values);
-        assert_eq!(ptr.len(), 50);
-        assert_eq!(ptr.start(), 0);
-        assert_eq!(ptr[40], 40);
-
-        let ptr2 = ptr.all();
-        assert_eq!(ptr2.len(), 50);
-        assert_eq!(ptr2.start(), 0);
-        assert_eq!(ptr2[40], 40);
-
-        let ptr3 = ptr.start_from(20);
-        assert_eq!(ptr3.len(), 30);
-        assert_eq!(ptr3.start(), 20);
-        assert_eq!(ptr3[0], 20);
-
-        let ptr4 = ptr3.range(10, 10);
-        assert_eq!(ptr4.len(), 10);
-        assert_eq!(ptr4.start(), 30);
-        assert_eq!(ptr4[0], 30);
-
-        let expected: Vec<u8> = (30..40).collect();
-        assert_eq!(ptr4.as_ref(), expected.as_slice());
-    }
-}
diff --git a/rust/parquet/src/util/mod.rs b/rust/parquet/src/util/mod.rs
deleted file mode 100644
index af9a1aa1eba..00000000000
--- a/rust/parquet/src/util/mod.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod io;
-pub mod memory;
-#[macro_use]
-pub mod bit_util;
-mod bit_packing;
-pub mod cursor;
-pub mod hash_util;
-
-#[cfg(test)]
-pub mod test_common;
diff --git a/rust/parquet/src/util/test_common/file_util.rs b/rust/parquet/src/util/test_common/file_util.rs
deleted file mode 100644
index 7393b55f1ed..00000000000
--- a/rust/parquet/src/util/test_common/file_util.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{env, fs, io::Write, path::PathBuf, str::FromStr};
-
-/// Returns path to the test parquet file in 'data' directory
-pub fn get_test_path(file_name: &str) -> PathBuf {
-    let mut pathbuf =
-        PathBuf::from_str(&arrow::util::test_util::parquet_test_data()).unwrap();
-    pathbuf.push(file_name);
-    pathbuf
-}
-
-/// Returns file handle for a test parquet file from 'data' directory
-pub fn get_test_file(file_name: &str) -> fs::File {
-    let path = get_test_path(file_name);
-    fs::File::open(path.as_path()).unwrap_or_else(|err| {
-        panic!(
-            "Test file {} could not be opened, did you do `git submodule update`?: {}",
-            path.display(),
-            err
-        )
-    })
-}
-
-/// Returns file handle for a temp file in 'target' directory with a provided content
-pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
-    // build tmp path to a file in "target/debug/testdata"
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(file_name);
-
-    // write file content
-    let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
-    tmp_file.write_all(content).unwrap();
-    tmp_file.sync_all().unwrap();
-
-    // return file handle for both read and write
-    let file = fs::OpenOptions::new()
-        .read(true)
-        .write(true)
-        .open(path_buf.as_path());
-    assert!(file.is_ok());
-    file.unwrap()
-}
-
-pub fn get_temp_filename() -> PathBuf {
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(rand::random::<i16>().to_string());
-
-    path_buf
-}
diff --git a/rust/parquet/src/util/test_common/mod.rs b/rust/parquet/src/util/test_common/mod.rs
deleted file mode 100644
index ed65bbe8a82..00000000000
--- a/rust/parquet/src/util/test_common/mod.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod file_util;
-pub mod page_util;
-pub mod rand_gen;
-
-pub use self::rand_gen::random_bools;
-pub use self::rand_gen::random_bytes;
-pub use self::rand_gen::random_numbers;
-pub use self::rand_gen::random_numbers_range;
-pub use self::rand_gen::RandGen;
-
-pub use self::file_util::get_temp_file;
-pub use self::file_util::get_temp_filename;
-pub use self::file_util::get_test_file;
-pub use self::file_util::get_test_path;
-
-pub use self::page_util::make_pages;
diff --git a/rust/parquet/src/util/test_common/page_util.rs b/rust/parquet/src/util/test_common/page_util.rs
deleted file mode 100644
index 2e0e8e926bc..00000000000
--- a/rust/parquet/src/util/test_common/page_util.rs
+++ /dev/null
@@ -1,313 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::basic::Encoding;
-use crate::column::page::PageReader;
-use crate::column::page::{Page, PageIterator};
-use crate::data_type::DataType;
-use crate::encodings::encoding::{get_encoder, DictEncoder, Encoder};
-use crate::encodings::levels::max_buffer_size;
-use crate::encodings::levels::LevelEncoder;
-use crate::errors::Result;
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr};
-use crate::util::memory::ByteBufferPtr;
-use crate::util::memory::MemTracker;
-use crate::util::memory::MemTrackerPtr;
-use crate::util::test_common::random_numbers_range;
-use rand::distributions::uniform::SampleUniform;
-use std::collections::VecDeque;
-use std::mem;
-use std::sync::Arc;
-use std::vec::IntoIter;
-
-pub trait DataPageBuilder {
-    fn add_rep_levels(&mut self, max_level: i16, rep_levels: &[i16]);
-    fn add_def_levels(&mut self, max_level: i16, def_levels: &[i16]);
-    fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]);
-    fn add_indices(&mut self, indices: ByteBufferPtr);
-    fn consume(self) -> Page;
-}
-
-/// A utility struct for building data pages (v1 or v2). Callers must call:
-///   - add_rep_levels()
-///   - add_def_levels()
-///   - add_values() for normal data page / add_indices() for dictionary data page
-///   - consume()
-/// in order to populate and obtain a data page.
-pub struct DataPageBuilderImpl {
-    desc: ColumnDescPtr,
-    encoding: Option<Encoding>,
-    mem_tracker: MemTrackerPtr,
-    num_values: u32,
-    buffer: Vec<u8>,
-    rep_levels_byte_len: u32,
-    def_levels_byte_len: u32,
-    datapage_v2: bool,
-}
-
-impl DataPageBuilderImpl {
-    // `num_values` is the number of non-null values to put in the data page.
-    // `datapage_v2` flag is used to indicate if the generated data page should use V2
-    // format or not.
-    pub fn new(desc: ColumnDescPtr, num_values: u32, datapage_v2: bool) -> Self {
-        DataPageBuilderImpl {
-            desc,
-            encoding: None,
-            mem_tracker: Arc::new(MemTracker::new()),
-            num_values,
-            buffer: vec![],
-            rep_levels_byte_len: 0,
-            def_levels_byte_len: 0,
-            datapage_v2,
-        }
-    }
-
-    // Adds levels to the buffer and return number of encoded bytes
-    fn add_levels(&mut self, max_level: i16, levels: &[i16]) -> u32 {
-        let size = max_buffer_size(Encoding::RLE, max_level, levels.len());
-        let mut level_encoder = LevelEncoder::v1(Encoding::RLE, max_level, vec![0; size]);
-        level_encoder.put(levels).expect("put() should be OK");
-        let encoded_levels = level_encoder.consume().expect("consume() should be OK");
-        // Actual encoded bytes (without length offset)
-        let encoded_bytes = &encoded_levels[mem::size_of::<i32>()..];
-        if self.datapage_v2 {
-            // Level encoder always initializes with offset of i32, where it stores
-            // length of encoded data; for data page v2 we explicitly
-            // store length, therefore we should skip i32 bytes.
-            self.buffer.extend_from_slice(encoded_bytes);
-        } else {
-            self.buffer.extend_from_slice(encoded_levels.as_slice());
-        }
-        encoded_bytes.len() as u32
-    }
-}
-
-impl DataPageBuilder for DataPageBuilderImpl {
-    fn add_rep_levels(&mut self, max_levels: i16, rep_levels: &[i16]) {
-        self.num_values = rep_levels.len() as u32;
-        self.rep_levels_byte_len = self.add_levels(max_levels, rep_levels);
-    }
-
-    fn add_def_levels(&mut self, max_levels: i16, def_levels: &[i16]) {
-        assert!(
-            self.num_values == def_levels.len() as u32,
-            "Must call `add_rep_levels() first!`"
-        );
-
-        self.def_levels_byte_len = self.add_levels(max_levels, def_levels);
-    }
-
-    fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]) {
-        assert!(
-            self.num_values >= values.len() as u32,
-            "num_values: {}, values.len(): {}",
-            self.num_values,
-            values.len()
-        );
-        self.encoding = Some(encoding);
-        let mut encoder: Box<dyn Encoder<T>> =
-            get_encoder::<T>(self.desc.clone(), encoding, self.mem_tracker.clone())
-                .expect("get_encoder() should be OK");
-        encoder.put(values).expect("put() should be OK");
-        let encoded_values = encoder
-            .flush_buffer()
-            .expect("consume_buffer() should be OK");
-        self.buffer.extend_from_slice(encoded_values.data());
-    }
-
-    fn add_indices(&mut self, indices: ByteBufferPtr) {
-        self.encoding = Some(Encoding::RLE_DICTIONARY);
-        self.buffer.extend_from_slice(indices.data());
-    }
-
-    fn consume(self) -> Page {
-        if self.datapage_v2 {
-            Page::DataPageV2 {
-                buf: ByteBufferPtr::new(self.buffer),
-                num_values: self.num_values,
-                encoding: self.encoding.unwrap(),
-                num_nulls: 0, /* set to dummy value - don't need this when reading
-                               * data page */
-                num_rows: self.num_values, /* also don't need this when reading
-                                            * data page */
-                def_levels_byte_len: self.def_levels_byte_len,
-                rep_levels_byte_len: self.rep_levels_byte_len,
-                is_compressed: false,
-                statistics: None, // set to None, we do not need statistics for tests
-            }
-        } else {
-            Page::DataPage {
-                buf: ByteBufferPtr::new(self.buffer),
-                num_values: self.num_values,
-                encoding: self.encoding.unwrap(),
-                def_level_encoding: Encoding::RLE,
-                rep_level_encoding: Encoding::RLE,
-                statistics: None, // set to None, we do not need statistics for tests
-            }
-        }
-    }
-}
-
-/// A utility page reader which stores pages in memory.
-pub struct InMemoryPageReader {
-    pages: Box<dyn Iterator<Item = Page>>,
-}
-
-impl InMemoryPageReader {
-    pub fn new(pages: Vec<Page>) -> Self {
-        Self {
-            pages: Box::new(pages.into_iter()),
-        }
-    }
-}
-
-impl PageReader for InMemoryPageReader {
-    fn get_next_page(&mut self) -> Result<Option<Page>> {
-        Ok(self.pages.next())
-    }
-}
-
-/// A utility page iterator which stores page readers in memory, used for tests.
-pub struct InMemoryPageIterator {
-    schema: SchemaDescPtr,
-    column_desc: ColumnDescPtr,
-    page_readers: IntoIter<Box<dyn PageReader>>,
-}
-
-impl InMemoryPageIterator {
-    pub fn new(
-        schema: SchemaDescPtr,
-        column_desc: ColumnDescPtr,
-        pages: Vec<Vec<Page>>,
-    ) -> Self {
-        let page_readers = pages
-            .into_iter()
-            .map(|pages| Box::new(InMemoryPageReader::new(pages)) as Box<dyn PageReader>)
-            .collect::<Vec<Box<dyn PageReader>>>()
-            .into_iter();
-
-        Self {
-            schema,
-            column_desc,
-            page_readers,
-        }
-    }
-}
-
-impl Iterator for InMemoryPageIterator {
-    type Item = Result<Box<dyn PageReader>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.page_readers.next().map(Ok)
-    }
-}
-
-impl PageIterator for InMemoryPageIterator {
-    fn schema(&mut self) -> Result<SchemaDescPtr> {
-        Ok(self.schema.clone())
-    }
-
-    fn column_schema(&mut self) -> Result<ColumnDescPtr> {
-        Ok(self.column_desc.clone())
-    }
-}
-
-pub fn make_pages<T: DataType>(
-    desc: ColumnDescPtr,
-    encoding: Encoding,
-    num_pages: usize,
-    levels_per_page: usize,
-    min: T::T,
-    max: T::T,
-    def_levels: &mut Vec<i16>,
-    rep_levels: &mut Vec<i16>,
-    values: &mut Vec<T::T>,
-    pages: &mut VecDeque<Page>,
-    use_v2: bool,
-) where
-    T::T: PartialOrd + SampleUniform + Copy,
-{
-    let mut num_values = 0;
-    let max_def_level = desc.max_def_level();
-    let max_rep_level = desc.max_rep_level();
-
-    let mem_tracker = Arc::new(MemTracker::new());
-    let mut dict_encoder = DictEncoder::<T>::new(desc.clone(), mem_tracker);
-
-    for i in 0..num_pages {
-        let mut num_values_cur_page = 0;
-        let level_range = i * levels_per_page..(i + 1) * levels_per_page;
-
-        if max_def_level > 0 {
-            random_numbers_range(levels_per_page, 0, max_def_level + 1, def_levels);
-            for dl in &def_levels[level_range.clone()] {
-                if *dl == max_def_level {
-                    num_values_cur_page += 1;
-                }
-            }
-        } else {
-            num_values_cur_page = levels_per_page;
-        }
-        if max_rep_level > 0 {
-            random_numbers_range(levels_per_page, 0, max_rep_level + 1, rep_levels);
-        }
-        random_numbers_range(num_values_cur_page, min, max, values);
-
-        // Generate the current page
-
-        let mut pb =
-            DataPageBuilderImpl::new(desc.clone(), num_values_cur_page as u32, use_v2);
-        if max_rep_level > 0 {
-            pb.add_rep_levels(max_rep_level, &rep_levels[level_range.clone()]);
-        }
-        if max_def_level > 0 {
-            pb.add_def_levels(max_def_level, &def_levels[level_range]);
-        }
-
-        let value_range = num_values..num_values + num_values_cur_page;
-        match encoding {
-            Encoding::PLAIN_DICTIONARY | Encoding::RLE_DICTIONARY => {
-                let _ = dict_encoder.put(&values[value_range.clone()]);
-                let indices = dict_encoder
-                    .write_indices()
-                    .expect("write_indices() should be OK");
-                pb.add_indices(indices);
-            }
-            Encoding::PLAIN => {
-                pb.add_values::<T>(encoding, &values[value_range]);
-            }
-            enc => panic!("Unexpected encoding {}", enc),
-        }
-
-        let data_page = pb.consume();
-        pages.push_back(data_page);
-        num_values += num_values_cur_page;
-    }
-
-    if encoding == Encoding::PLAIN_DICTIONARY || encoding == Encoding::RLE_DICTIONARY {
-        let dict = dict_encoder
-            .write_dict()
-            .expect("write_dict() should be OK");
-        let dict_page = Page::DictionaryPage {
-            buf: dict,
-            num_values: dict_encoder.num_entries() as u32,
-            encoding: Encoding::RLE_DICTIONARY,
-            is_sorted: false,
-        };
-        pages.push_front(dict_page);
-    }
-}
diff --git a/rust/parquet/src/util/test_common/rand_gen.rs b/rust/parquet/src/util/test_common/rand_gen.rs
deleted file mode 100644
index ea91b28d496..00000000000
--- a/rust/parquet/src/util/test_common/rand_gen.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use rand::{
-    distributions::{uniform::SampleUniform, Distribution, Standard},
-    thread_rng, Rng,
-};
-
-use crate::data_type::*;
-use crate::util::memory::ByteBufferPtr;
-
-/// Random generator of data type `T` values and sequences.
-pub trait RandGen<T: DataType> {
-    fn gen(len: i32) -> T::T;
-
-    fn gen_vec(len: i32, total: usize) -> Vec<T::T> {
-        let mut result = vec![];
-        for _ in 0..total {
-            result.push(Self::gen(len))
-        }
-        result
-    }
-}
-
-impl RandGen<BoolType> for BoolType {
-    fn gen(_: i32) -> bool {
-        thread_rng().gen::<bool>()
-    }
-}
-
-impl RandGen<Int32Type> for Int32Type {
-    fn gen(_: i32) -> i32 {
-        thread_rng().gen::<i32>()
-    }
-}
-
-impl RandGen<Int64Type> for Int64Type {
-    fn gen(_: i32) -> i64 {
-        thread_rng().gen::<i64>()
-    }
-}
-
-impl RandGen<Int96Type> for Int96Type {
-    fn gen(_: i32) -> Int96 {
-        let mut rng = thread_rng();
-        let mut result = Int96::new();
-        result.set_data(rng.gen::<u32>(), rng.gen::<u32>(), rng.gen::<u32>());
-        result
-    }
-}
-
-impl RandGen<FloatType> for FloatType {
-    fn gen(_: i32) -> f32 {
-        thread_rng().gen::<f32>()
-    }
-}
-
-impl RandGen<DoubleType> for DoubleType {
-    fn gen(_: i32) -> f64 {
-        thread_rng().gen::<f64>()
-    }
-}
-
-impl RandGen<ByteArrayType> for ByteArrayType {
-    fn gen(_: i32) -> ByteArray {
-        let mut rng = thread_rng();
-        let mut result = ByteArray::new();
-        let mut value = vec![];
-        let len = rng.gen_range(0..128);
-        for _ in 0..len {
-            value.push(rng.gen_range(0..255));
-        }
-        result.set_data(ByteBufferPtr::new(value));
-        result
-    }
-}
-
-impl RandGen<FixedLenByteArrayType> for FixedLenByteArrayType {
-    fn gen(len: i32) -> FixedLenByteArray {
-        let mut rng = thread_rng();
-        let value_len = if len < 0 {
-            rng.gen_range(0..128)
-        } else {
-            len as usize
-        };
-        let value = random_bytes(value_len);
-        ByteArray::from(value).into()
-    }
-}
-
-pub fn random_bytes(n: usize) -> Vec<u8> {
-    let mut result = vec![];
-    let mut rng = thread_rng();
-    for _ in 0..n {
-        result.push(rng.gen_range(0..255));
-    }
-    result
-}
-
-pub fn random_bools(n: usize) -> Vec<bool> {
-    let mut result = vec![];
-    let mut rng = thread_rng();
-    for _ in 0..n {
-        result.push(rng.gen::<bool>());
-    }
-    result
-}
-
-pub fn random_numbers<T>(n: usize) -> Vec<T>
-where
-    Standard: Distribution<T>,
-{
-    let mut rng = thread_rng();
-    Standard.sample_iter(&mut rng).take(n).collect()
-}
-
-pub fn random_numbers_range<T>(n: usize, low: T, high: T, result: &mut Vec<T>)
-where
-    T: PartialOrd + SampleUniform + Copy,
-{
-    let mut rng = thread_rng();
-    for _ in 0..n {
-        result.push(rng.gen_range(low..high));
-    }
-}
diff --git a/rust/parquet/tests/custom_writer.rs b/rust/parquet/tests/custom_writer.rs
deleted file mode 100644
index 0a57e79d955..00000000000
--- a/rust/parquet/tests/custom_writer.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::File;
-use std::{
-    fs,
-    io::{prelude::*, SeekFrom},
-    sync::Arc,
-};
-
-use parquet::file::writer::TryClone;
-use parquet::{
-    basic::Repetition, basic::Type, file::properties::WriterProperties,
-    file::writer::SerializedFileWriter, schema::types,
-};
-use std::env;
-
-// Test creating some sort of custom writer to ensure the
-// appropriate traits are exposed
-struct CustomWriter {
-    file: File,
-}
-
-impl Write for CustomWriter {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.file.write(buf)
-    }
-    fn flush(&mut self) -> std::io::Result<()> {
-        self.file.flush()
-    }
-}
-
-impl Seek for CustomWriter {
-    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
-        self.file.seek(pos)
-    }
-}
-
-impl TryClone for CustomWriter {
-    fn try_clone(&self) -> std::io::Result<Self> {
-        use std::io::{Error, ErrorKind};
-        Err(Error::new(ErrorKind::Other, "Clone not supported"))
-    }
-}
-
-#[test]
-fn test_custom_writer() {
-    let schema = Arc::new(
-        types::Type::group_type_builder("schema")
-            .with_fields(&mut vec![Arc::new(
-                types::Type::primitive_type_builder("col1", Type::INT32)
-                    .with_repetition(Repetition::REQUIRED)
-                    .build()
-                    .unwrap(),
-            )])
-            .build()
-            .unwrap(),
-    );
-    let props = Arc::new(WriterProperties::builder().build());
-
-    let file = get_temp_file("test_custom_file_writer");
-    let test_file = file.try_clone().unwrap();
-
-    let writer = CustomWriter { file };
-
-    // test is that this file can be created
-    let file_writer = SerializedFileWriter::new(writer, schema, props).unwrap();
-    std::mem::drop(file_writer);
-
-    // ensure the file now exists and has non zero size
-    let metadata = test_file.metadata().unwrap();
-    assert!(metadata.len() > 0);
-}
-
-/// Returns file handle for a temp file in 'target' directory with a provided content
-fn get_temp_file(file_name: &str) -> fs::File {
-    // build tmp path to a file in "target/debug/testdata"
-    let mut path_buf = env::current_dir().unwrap();
-    path_buf.push("target");
-    path_buf.push("debug");
-    path_buf.push("testdata");
-    fs::create_dir_all(&path_buf).unwrap();
-    path_buf.push(file_name);
-
-    File::create(path_buf).unwrap()
-}
diff --git a/rust/parquet_derive/Cargo.toml b/rust/parquet_derive/Cargo.toml
deleted file mode 100644
index 4fbda38c450..00000000000
--- a/rust/parquet_derive/Cargo.toml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "parquet_derive"
-version = "5.0.0-SNAPSHOT"
-license = "Apache-2.0"
-description = "Derive macros for the Rust implementation of Apache Parquet"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-keywords = [ "parquet" ]
-readme = "README.md"
-edition = "2018"
-
-[lib]
-proc-macro = true
-
-[features]
-chrono = []
-bigdecimal = []
-uuid = []
-
-[dependencies]
-proc-macro2 = "1.0"
-quote = "1.0"
-syn = { version = "1.0", features = ["full", "extra-traits"] }
-parquet = { path = "../parquet", version = "5.0.0-SNAPSHOT" }
diff --git a/rust/parquet_derive/README.md b/rust/parquet_derive/README.md
deleted file mode 100644
index 2e9e2a04419..00000000000
--- a/rust/parquet_derive/README.md
+++ /dev/null
@@ -1,98 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Parquet Derive
-
-A crate for deriving `RecordWriter` for arbitrary, _simple_ structs. This does not generate writers for arbitrarily nested
-structures. It only works for primitives and a few generic structures and
-various levels of reference. Please see features checklist for what is currently
-supported.
-
-Derive also has some support for the chrono time library. You must must enable the `chrono` feature to get this support.
-
-## Usage
-Add this to your Cargo.toml:
-```toml
-[dependencies]
-parquet = "5.0.0-SNAPSHOT"
-parquet_derive = "5.0.0-SNAPSHOT"
-```
-
-and this to your crate root:
-```rust
-extern crate parquet;
-#[macro_use] extern crate parquet_derive;
-```
-
-Example usage of deriving a `RecordWriter` for your struct:
-
-```rust
-use parquet;
-use parquet::record::RecordWriter;
-
-#[derive(ParquetRecordWriter)]
-struct ACompleteRecord<'a> {
-    pub a_bool: bool,
-    pub a_str: &'a str,
-    pub a_string: String,
-    pub a_borrowed_string: &'a String,
-    pub maybe_a_str: Option<&'a str>,
-    pub magic_number: i32,
-    pub low_quality_pi: f32,
-    pub high_quality_pi: f64,
-    pub maybe_pi: Option<f32>,
-    pub maybe_best_pi: Option<f64>,
-}
-
-// Initialize your parquet file
-let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-let mut row_group = writer.next_row_group().unwrap();
-
-// Build up your records
-let chunks = vec![ACompleteRecord{...}];
-
-// The derived `RecordWriter` takes over here
-(&chunks[..]).write_to_row_group(&mut row_group);
-
-writer.close_row_group(row_group).unwrap();
-writer.close().unwrap();
-```
-
-## Features
-- [X] Support writing `String`, `&str`, `bool`, `i32`, `f32`, `f64`, `Vec<u8>`
-- [ ] Support writing dictionaries
-- [X] Support writing logical types like timestamp
-- [X] Derive definition_levels for `Option`
-- [ ] Derive definition levels for nested structures
-- [ ] Derive writing tuple struct
-- [ ] Derive writing `tuple` container types
-
-## Requirements
-- Same as `parquet-rs`
-
-## Test
-Testing a `*_derive` crate requires an intermediate crate. Go to `parquet_derive_test` and run `cargo test` for
-unit tests.
-
-## Docs
-To build documentation, run `cargo doc --no-deps`.
-To compile and view in the browser, run `cargo doc --no-deps --open`.
-
-## License
-Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/rust/parquet_derive/src/lib.rs b/rust/parquet_derive/src/lib.rs
deleted file mode 100644
index 279d0f77f3e..00000000000
--- a/rust/parquet_derive/src/lib.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![recursion_limit = "128"]
-
-extern crate proc_macro;
-extern crate proc_macro2;
-extern crate syn;
-#[macro_use]
-extern crate quote;
-
-extern crate parquet;
-
-use syn::{parse_macro_input, Data, DataStruct, DeriveInput};
-
-mod parquet_field;
-
-/// Derive flat, simple RecordWriter implementations. Works by parsing
-/// a struct tagged with `#[derive(ParquetRecordWriter)]` and emitting
-/// the correct writing code for each field of the struct. Column writers
-/// are generated in the order they are defined.
-///
-/// It is up to the programmer to keep the order of the struct
-/// fields lined up with the schema.
-///
-/// Example:
-///
-/// ```ignore
-/// use parquet;
-/// use parquet::record::RecordWriter;
-/// use parquet::schema::parser::parse_message_type;
-///
-/// use std::sync::Arc;
-//
-/// #[derive(ParquetRecordWriter)]
-/// struct ACompleteRecord<'a> {
-///   pub a_bool: bool,
-///   pub a_str: &'a str,
-/// }
-///
-/// let schema_str = "message schema {
-///   REQUIRED boolean         a_bool;
-///   REQUIRED BINARY          a_str (UTF8);
-/// }";
-///
-/// pub fn write_some_records() {
-///   let samples = vec![
-///     ACompleteRecord {
-///       a_bool: true,
-///       a_str: "I'm true"
-///     },
-///     ACompleteRecord {
-///       a_bool: false,
-///       a_str: "I'm false"
-///     }
-///   ];
-///
-///  let schema = Arc::new(parse_message_type(schema_str).unwrap());
-///
-///  let props = Arc::new(WriterProperties::builder().build());
-///  let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-///
-///  let mut row_group = writer.next_row_group().unwrap();
-///  samples.as_slice().write_to_row_group(&mut row_group).unwrap();
-///  writer.close_row_group(row_group).unwrap();
-///  writer.close().unwrap();
-/// }
-/// ```
-///
-#[proc_macro_derive(ParquetRecordWriter)]
-pub fn parquet_record_writer(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
-    let input: DeriveInput = parse_macro_input!(input as DeriveInput);
-    let fields = match input.data {
-        Data::Struct(DataStruct { fields, .. }) => fields,
-        Data::Enum(_) => unimplemented!("Enum currently is not supported"),
-        Data::Union(_) => unimplemented!("Union currently is not supported"),
-    };
-
-    let field_infos: Vec<_> = fields
-        .iter()
-        .map(|f: &syn::Field| parquet_field::Field::from(f))
-        .collect();
-
-    let writer_snippets: Vec<proc_macro2::TokenStream> =
-        field_infos.iter().map(|x| x.writer_snippet()).collect();
-
-    let derived_for = input.ident;
-    let generics = input.generics;
-
-    (quote! {
-    impl#generics RecordWriter<#derived_for#generics> for &[#derived_for#generics] {
-      fn write_to_row_group(&self, row_group_writer: &mut Box<parquet::file::writer::RowGroupWriter>) -> Result<(), parquet::errors::ParquetError> {
-        let mut row_group_writer = row_group_writer;
-        let records = &self; // Used by all the writer snippets to be more clear
-
-        #(
-          {
-              let mut some_column_writer = row_group_writer.next_column().unwrap();
-              if let Some(mut column_writer) = some_column_writer {
-                  #writer_snippets
-                  row_group_writer.close_column(column_writer)?;
-              } else {
-                  return Err(parquet::errors::ParquetError::General("Failed to get next column".into()))
-              }
-          }
-        );*
-
-        Ok(())
-      }
-    }
-  }).into()
-}
diff --git a/rust/parquet_derive/src/parquet_field.rs b/rust/parquet_derive/src/parquet_field.rs
deleted file mode 100644
index 328f4a6680f..00000000000
--- a/rust/parquet_derive/src/parquet_field.rs
+++ /dev/null
@@ -1,920 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[derive(Debug, PartialEq)]
-pub struct Field {
-    ident: syn::Ident,
-    ty: Type,
-    is_a_byte_buf: bool,
-    third_party_type: Option<ThirdPartyType>,
-}
-
-/// Use third party libraries, detected
-/// at compile time. These libraries will
-/// be written to parquet as their preferred
-/// physical type.
-///
-///   ChronoNaiveDateTime is written as i64
-///   ChronoNaiveDate is written as i32
-#[derive(Debug, PartialEq)]
-enum ThirdPartyType {
-    ChronoNaiveDateTime,
-    ChronoNaiveDate,
-    Uuid,
-}
-
-impl Field {
-    pub fn from(f: &syn::Field) -> Self {
-        let ty = Type::from(f);
-        let is_a_byte_buf = ty.physical_type() == parquet::basic::Type::BYTE_ARRAY;
-
-        let third_party_type = match &ty.last_part()[..] {
-            "NaiveDateTime" => Some(ThirdPartyType::ChronoNaiveDateTime),
-            "NaiveDate" => Some(ThirdPartyType::ChronoNaiveDate),
-            "Uuid" => Some(ThirdPartyType::Uuid),
-            _ => None,
-        };
-
-        Field {
-            ident: f
-                .ident
-                .clone()
-                .expect("Only structs with named fields are currently supported"),
-            ty,
-            is_a_byte_buf,
-            third_party_type,
-        }
-    }
-
-    /// Takes the parsed field of the struct and emits a valid
-    /// column writer snippet. Should match exactly what you
-    /// would write by hand.
-    ///
-    /// Can only generate writers for basic structs, for example:
-    ///
-    /// struct Record {
-    ///   a_bool: bool,
-    ///   maybe_a_bool: Option<bool>
-    /// }
-    ///
-    /// but not
-    ///
-    /// struct UnsupportedNestedRecord {
-    ///   a_property: bool,
-    ///   nested_record: Record
-    /// }
-    ///
-    /// because this parsing logic is not sophisticated enough for definition
-    /// levels beyond 2.
-    pub fn writer_snippet(&self) -> proc_macro2::TokenStream {
-        let ident = &self.ident;
-        let column_writer = self.ty.column_writer();
-
-        let vals_builder = match &self.ty {
-            Type::TypePath(_) => self.copied_direct_vals(),
-            Type::Option(ref first_type) => match **first_type {
-                Type::TypePath(_) => self.option_into_vals(),
-                Type::Reference(_, ref second_type) => match **second_type {
-                    Type::TypePath(_) => self.option_into_vals(),
-                    _ => unimplemented!("Unsupported type encountered"),
-                },
-                ref f => unimplemented!("Unsupported: {:#?}", f),
-            },
-            Type::Reference(_, ref first_type) => match **first_type {
-                Type::TypePath(_) => self.copied_direct_vals(),
-                Type::Option(ref second_type) => match **second_type {
-                    Type::TypePath(_) => self.option_into_vals(),
-                    Type::Reference(_, ref second_type) => match **second_type {
-                        Type::TypePath(_) => self.option_into_vals(),
-                        _ => unimplemented!("Unsupported type encountered"),
-                    },
-                    ref f => unimplemented!("Unsupported: {:#?}", f),
-                },
-                ref f => unimplemented!("Unsupported: {:#?}", f),
-            },
-            f => unimplemented!("Unsupported: {:#?}", f),
-        };
-
-        let definition_levels = match &self.ty {
-            Type::TypePath(_) => None,
-            Type::Option(ref first_type) => match **first_type {
-                Type::TypePath(_) => Some(self.optional_definition_levels()),
-                Type::Option(_) => unimplemented!("Unsupported nesting encountered"),
-                Type::Reference(_, ref second_type)
-                | Type::Vec(ref second_type)
-                | Type::Array(ref second_type) => match **second_type {
-                    Type::TypePath(_) => Some(self.optional_definition_levels()),
-                    _ => unimplemented!("Unsupported nesting encountered"),
-                },
-            },
-            Type::Reference(_, ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type) => match **first_type {
-                Type::TypePath(_) => None,
-                Type::Reference(_, ref second_type)
-                | Type::Vec(ref second_type)
-                | Type::Array(ref second_type)
-                | Type::Option(ref second_type) => match **second_type {
-                    Type::TypePath(_) => Some(self.optional_definition_levels()),
-                    Type::Reference(_, ref third_type) => match **third_type {
-                        Type::TypePath(_) => Some(self.optional_definition_levels()),
-                        _ => unimplemented!("Unsupported definition encountered"),
-                    },
-                    _ => unimplemented!("Unsupported definition encountered"),
-                },
-            },
-        };
-
-        // "vals" is the run of primitive data being written for the column
-        // "definition_levels" is a vector of bools which controls whether a value is missing or present
-        // this TokenStream is only one part of the code for writing a column and
-        // it relies on values calculated in prior code snippets, namely "definition_levels" and "vals_builder".
-        // All the context is put together in this functions final quote and
-        // this expression just switches between non-nullable and nullable write statements
-        let write_batch_expr = if definition_levels.is_some() {
-            quote! {
-                if let #column_writer(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None)?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}", stringify!{#ident})
-                }
-            }
-        } else {
-            quote! {
-                if let #column_writer(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None)?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}", stringify!{#ident})
-                }
-            }
-        };
-
-        quote! {
-            {
-                #definition_levels
-
-                #vals_builder
-
-                #write_batch_expr
-            }
-        }
-    }
-
-    fn option_into_vals(&self) -> proc_macro2::TokenStream {
-        let field_name = &self.ident;
-        let is_a_byte_buf = self.is_a_byte_buf;
-        let is_a_timestamp =
-            self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
-        let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate);
-        let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid);
-        let copy_to_vec = !matches!(
-            self.ty.physical_type(),
-            parquet::basic::Type::BYTE_ARRAY | parquet::basic::Type::FIXED_LEN_BYTE_ARRAY
-        );
-
-        let binding = if copy_to_vec {
-            quote! { let Some(inner) = rec.#field_name }
-        } else {
-            quote! { let Some(ref inner) = rec.#field_name }
-        };
-
-        let some = if is_a_timestamp {
-            quote! { Some(inner.timestamp_millis()) }
-        } else if is_a_date {
-            quote! { Some(inner.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32)  }
-        } else if is_a_uuid {
-            quote! { Some((&inner.to_string()[..]).into()) }
-        } else if is_a_byte_buf {
-            quote! { Some((&inner[..]).into())}
-        } else {
-            quote! { Some(inner) }
-        };
-
-        quote! {
-            let vals: Vec<_> = records.iter().filter_map(|rec| {
-                if #binding {
-                    #some
-                } else {
-                    None
-                }
-            }).collect();
-        }
-    }
-
-    fn copied_direct_vals(&self) -> proc_macro2::TokenStream {
-        let field_name = &self.ident;
-        let is_a_byte_buf = self.is_a_byte_buf;
-        let is_a_timestamp =
-            self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
-        let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate);
-        let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid);
-
-        let access = if is_a_timestamp {
-            quote! { rec.#field_name.timestamp_millis() }
-        } else if is_a_date {
-            quote! { rec.#field_name.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32 }
-        } else if is_a_uuid {
-            quote! { (&rec.#field_name.to_string()[..]).into() }
-        } else if is_a_byte_buf {
-            quote! { (&rec.#field_name[..]).into() }
-        } else {
-            quote! { rec.#field_name }
-        };
-
-        quote! {
-            let vals: Vec<_> = records.iter().map(|rec| #access).collect();
-        }
-    }
-
-    fn optional_definition_levels(&self) -> proc_macro2::TokenStream {
-        let field_name = &self.ident;
-
-        quote! {
-            let definition_levels: Vec<i16> = self
-              .iter()
-              .map(|rec| if rec.#field_name.is_some() { 1 } else { 0 })
-              .collect();
-        }
-    }
-}
-
-#[allow(clippy::enum_variant_names)]
-#[allow(clippy::large_enum_variant)]
-#[derive(Debug, PartialEq)]
-enum Type {
-    Array(Box<Type>),
-    Option(Box<Type>),
-    Vec(Box<Type>),
-    TypePath(syn::Type),
-    Reference(Option<syn::Lifetime>, Box<Type>),
-}
-
-impl Type {
-    /// Takes a rust type and returns the appropriate
-    /// parquet-rs column writer
-    fn column_writer(&self) -> syn::TypePath {
-        use parquet::basic::Type as BasicType;
-
-        match self.physical_type() {
-            BasicType::BOOLEAN => {
-                syn::parse_quote!(parquet::column::writer::ColumnWriter::BoolColumnWriter)
-            }
-            BasicType::INT32 => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::Int32ColumnWriter
-            ),
-            BasicType::INT64 => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::Int64ColumnWriter
-            ),
-            BasicType::INT96 => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::Int96ColumnWriter
-            ),
-            BasicType::FLOAT => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::FloatColumnWriter
-            ),
-            BasicType::DOUBLE => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::DoubleColumnWriter
-            ),
-            BasicType::BYTE_ARRAY => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::ByteArrayColumnWriter
-            ),
-            BasicType::FIXED_LEN_BYTE_ARRAY => syn::parse_quote!(
-                parquet::column::writer::ColumnWriter::FixedLenByteArrayColumnWriter
-            ),
-        }
-    }
-
-    /// Helper to simplify a nested field definition to its leaf type
-    ///
-    /// Ex:
-    ///   Option<&String> => Type::TypePath(String)
-    ///   &Option<i32> => Type::TypePath(i32)
-    ///   Vec<Vec<u8>> => Type::Vec(u8)
-    ///
-    /// Useful in determining the physical type of a field and the
-    /// definition levels.
-    fn leaf_type_recursive(&self) -> &Type {
-        self.leaf_type_recursive_helper(self, None)
-    }
-
-    fn leaf_type_recursive_helper<'a>(
-        &'a self,
-        ty: &'a Type,
-        parent_ty: Option<&'a Type>,
-    ) -> &Type {
-        match ty {
-            Type::TypePath(_) => parent_ty.unwrap_or(ty),
-            Type::Option(ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type)
-            | Type::Reference(_, ref first_type) => {
-                self.leaf_type_recursive_helper(first_type, Some(ty))
-            }
-        }
-    }
-
-    /// Helper method to further unwrap leaf_type() to get inner-most
-    /// type information, useful for determining the physical type
-    /// and normalizing the type paths.
-    fn inner_type(&self) -> &syn::Type {
-        let leaf_type = self.leaf_type_recursive();
-
-        match leaf_type {
-            Type::TypePath(ref type_) => type_,
-            Type::Option(ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type)
-            | Type::Reference(_, ref first_type) => match **first_type {
-                Type::TypePath(ref type_) => type_,
-                _ => unimplemented!("leaf_type() should only return shallow types"),
-            },
-        }
-    }
-
-    /// Helper to normalize a type path by extracting the
-    /// most identifiable part
-    ///
-    /// Ex:
-    ///   std::string::String => String
-    ///   Vec<u8> => Vec<u8>
-    ///   chrono::NaiveDateTime => NaiveDateTime
-    ///
-    /// Does run the risk of mis-identifying a type if import
-    /// rename is in play. Please note procedural macros always
-    /// run before type resolution so this is a risk the user
-    /// takes on when renaming imports.
-    fn last_part(&self) -> String {
-        let inner_type = self.inner_type();
-        let inner_type_str = (quote! { #inner_type }).to_string();
-
-        inner_type_str
-            .split("::")
-            .last()
-            .unwrap()
-            .trim()
-            .to_string()
-    }
-
-    /// Converts rust types to parquet physical types.
-    ///
-    /// Ex:
-    ///   [u8; 10] => FIXED_LEN_BYTE_ARRAY
-    ///   Vec<u8>  => BYTE_ARRAY
-    ///   String => BYTE_ARRAY
-    ///   i32 => INT32
-    fn physical_type(&self) -> parquet::basic::Type {
-        use parquet::basic::Type as BasicType;
-
-        let last_part = self.last_part();
-        let leaf_type = self.leaf_type_recursive();
-
-        match leaf_type {
-            Type::Array(ref first_type) => {
-                if let Type::TypePath(_) = **first_type {
-                    if last_part == "u8" {
-                        return BasicType::FIXED_LEN_BYTE_ARRAY;
-                    }
-                }
-            }
-            Type::Vec(ref first_type) => {
-                if let Type::TypePath(_) = **first_type {
-                    if last_part == "u8" {
-                        return BasicType::BYTE_ARRAY;
-                    }
-                }
-            }
-            _ => (),
-        }
-
-        match last_part.trim() {
-            "bool" => BasicType::BOOLEAN,
-            "u8" | "u16" | "u32" => BasicType::INT32,
-            "i8" | "i16" | "i32" | "NaiveDate" => BasicType::INT32,
-            "u64" | "i64" | "usize" | "NaiveDateTime" => BasicType::INT64,
-            "f32" => BasicType::FLOAT,
-            "f64" => BasicType::DOUBLE,
-            "String" | "str" | "Uuid" => BasicType::BYTE_ARRAY,
-            f => unimplemented!("{} currently is not supported", f),
-        }
-    }
-
-    /// Convert a parsed rust field AST in to a more easy to manipulate
-    /// parquet_derive::Field
-    fn from(f: &syn::Field) -> Self {
-        Type::from_type(f, &f.ty)
-    }
-
-    fn from_type(f: &syn::Field, ty: &syn::Type) -> Self {
-        match ty {
-            syn::Type::Path(ref p) => Type::from_type_path(f, p),
-            syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
-            syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
-            other => unimplemented!(
-                "Unable to derive {:?} - it is currently an unsupported type\n{:#?}",
-                f.ident.as_ref().unwrap(),
-                other
-            ),
-        }
-    }
-
-    fn from_type_path(f: &syn::Field, p: &syn::TypePath) -> Self {
-        let last_segment = p.path.segments.last().unwrap();
-
-        let is_vec =
-            last_segment.ident == syn::Ident::new("Vec", proc_macro2::Span::call_site());
-        let is_option = last_segment.ident
-            == syn::Ident::new("Option", proc_macro2::Span::call_site());
-
-        if is_vec || is_option {
-            let generic_type = match &last_segment.arguments {
-                syn::PathArguments::AngleBracketed(angle_args) => {
-                    assert_eq!(angle_args.args.len(), 1);
-                    let first_arg = &angle_args.args[0];
-
-                    match first_arg {
-                        syn::GenericArgument::Type(ref typath) => typath.clone(),
-                        other => unimplemented!("Unsupported: {:#?}", other),
-                    }
-                }
-                other => unimplemented!("Unsupported: {:#?}", other),
-            };
-
-            if is_vec {
-                Type::Vec(Box::new(Type::from_type(f, &generic_type)))
-            } else {
-                Type::Option(Box::new(Type::from_type(f, &generic_type)))
-            }
-        } else {
-            Type::TypePath(syn::Type::Path(p.clone()))
-        }
-    }
-
-    fn from_type_reference(f: &syn::Field, tr: &syn::TypeReference) -> Self {
-        let lifetime = tr.lifetime.clone();
-        let inner_type = Type::from_type(f, tr.elem.as_ref());
-        Type::Reference(lifetime, Box::new(inner_type))
-    }
-
-    fn from_type_array(f: &syn::Field, ta: &syn::TypeArray) -> Self {
-        let inner_type = Type::from_type(f, ta.elem.as_ref());
-        Type::Array(Box::new(inner_type))
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use syn::{self, Data, DataStruct, DeriveInput};
-
-    fn extract_fields(input: proc_macro2::TokenStream) -> Vec<syn::Field> {
-        let input: DeriveInput = syn::parse2(input).unwrap();
-
-        let fields = match input.data {
-            Data::Struct(DataStruct { fields, .. }) => fields,
-            _ => panic!("Input must be a struct"),
-        };
-
-        fields.iter().map(|field| field.to_owned()).collect()
-    }
-
-    #[test]
-    fn test_generating_a_simple_writer_snippet() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ABoringStruct {
-            counter: usize,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let counter = Field::from(&fields[0]);
-
-        let snippet = counter.writer_snippet().to_string();
-        assert_eq!(snippet,
-                   (quote!{
-                        {
-                            let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter ) . collect ( );
-
-                            if let parquet::column::writer::ColumnWriter::Int64ColumnWriter ( ref mut typed ) = column_writer {
-                                typed . write_batch ( & vals [ .. ] , None , None ) ?;
-                            }  else {
-                                panic!("Schema and struct disagree on type for {}" , stringify!{ counter } )
-                            }
-                        }
-                   }).to_string()
-        )
-    }
-
-    #[test]
-    fn test_optional_to_writer_snippet() {
-        let struct_def: proc_macro2::TokenStream = quote! {
-          struct StringBorrower<'a> {
-            optional_str: Option<&'a str>,
-            optional_string: &Option<String>,
-            optional_dumb_int: &Option<&i32>,
-          }
-        };
-
-        let fields = extract_fields(struct_def);
-
-        let optional = Field::from(&fields[0]);
-        let snippet = optional.writer_snippet();
-        assert_eq!(snippet.to_string(),
-          (quote! {
-          {
-                let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_str . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
-
-                let vals: Vec <_> = records.iter().filter_map( |rec| {
-                    if let Some ( ref inner ) = rec . optional_str {
-                        Some ( (&inner[..]).into() )
-                    } else {
-                        None
-                    }
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter ( ref mut typed ) = column_writer {
-                    typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify ! { optional_str } )
-                }
-           }
-            }
-          ).to_string());
-
-        let optional = Field::from(&fields[1]);
-        let snippet = optional.writer_snippet();
-        assert_eq!(snippet.to_string(),
-                   (quote!{
-                   {
-                        let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_string . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
-
-                        let vals: Vec <_> = records.iter().filter_map( |rec| {
-                            if let Some ( ref inner ) = rec . optional_string {
-                                Some ( (&inner[..]).into() )
-                            } else {
-                                None
-                            }
-                        }).collect();
-
-                        if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter ( ref mut typed ) = column_writer {
-                            typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
-                        } else {
-                            panic!("Schema and struct disagree on type for {}" , stringify ! { optional_string } )
-                        }
-                    }
-        }).to_string());
-
-        let optional = Field::from(&fields[2]);
-        let snippet = optional.writer_snippet();
-        assert_eq!(snippet.to_string(),
-                   (quote!{
-                    {
-                        let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_dumb_int . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
-
-                        let vals: Vec <_> = records.iter().filter_map( |rec| {
-                            if let Some ( inner ) = rec . optional_dumb_int {
-                                Some ( inner )
-                            } else {
-                                None
-                            }
-                        }).collect();
-
-                        if let parquet::column::writer::ColumnWriter::Int32ColumnWriter ( ref mut typed ) = column_writer {
-                            typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
-                        }  else {
-                            panic!("Schema and struct disagree on type for {}" , stringify ! { optional_dumb_int } )
-                        }
-                    }
-        }).to_string());
-    }
-
-    #[test]
-    fn test_converting_to_column_writer_type() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ABasicStruct {
-            yes_no: bool,
-            name: String,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect();
-
-        let column_writers: Vec<_> = processed
-            .iter()
-            .map(|field| field.ty.column_writer())
-            .collect();
-
-        assert_eq!(
-            column_writers,
-            vec![
-                syn::parse_quote!(
-                    parquet::column::writer::ColumnWriter::BoolColumnWriter
-                ),
-                syn::parse_quote!(
-                    parquet::column::writer::ColumnWriter::ByteArrayColumnWriter
-                )
-            ]
-        );
-    }
-
-    #[test]
-    fn convert_basic_struct() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ABasicStruct {
-            yes_no: bool,
-            name: String,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect();
-        assert_eq!(processed.len(), 2);
-
-        assert_eq!(
-            processed,
-            vec![
-                Field {
-                    ident: syn::Ident::new("yes_no", proc_macro2::Span::call_site()),
-                    ty: Type::TypePath(syn::parse_quote!(bool)),
-                    is_a_byte_buf: false,
-                    third_party_type: None,
-                },
-                Field {
-                    ident: syn::Ident::new("name", proc_macro2::Span::call_site()),
-                    ty: Type::TypePath(syn::parse_quote!(String)),
-                    is_a_byte_buf: true,
-                    third_party_type: None,
-                }
-            ]
-        )
-    }
-
-    #[test]
-    fn test_get_inner_type() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct LotsOfInnerTypes {
-            a_vec: Vec<u8>,
-            a_option: std::option::Option<bool>,
-            a_silly_string: std::string::String,
-            a_complicated_thing: std::option::Option<std::result::Result<(),()>>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let converted_fields: Vec<_> =
-            fields.iter().map(|field| Type::from(field)).collect();
-        let inner_types: Vec<_> = converted_fields
-            .iter()
-            .map(|field| field.inner_type())
-            .collect();
-        let inner_types_strs: Vec<_> = inner_types
-            .iter()
-            .map(|ty| (quote! { #ty }).to_string())
-            .collect();
-
-        assert_eq!(
-            inner_types_strs,
-            vec![
-                "u8",
-                "bool",
-                "std :: string :: String",
-                "std :: result :: Result < () , () >"
-            ]
-        )
-    }
-
-    #[test]
-    fn test_physical_type() {
-        use parquet::basic::Type as BasicType;
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct LotsOfInnerTypes {
-            a_buf: Vec<u8>,
-            a_number: i32,
-            a_verbose_option: std::option::Option<bool>,
-            a_silly_string: std::string::String,
-            a_fix_byte_buf: [u8; 10],
-            a_complex_option: Option<&Vec<u8>>,
-            a_complex_vec: &Vec<&Option<u8>>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let converted_fields: Vec<_> =
-            fields.iter().map(|field| Type::from(field)).collect();
-        let physical_types: Vec<_> = converted_fields
-            .iter()
-            .map(|ty| ty.physical_type())
-            .collect();
-
-        assert_eq!(
-            physical_types,
-            vec![
-                BasicType::BYTE_ARRAY,
-                BasicType::INT32,
-                BasicType::BOOLEAN,
-                BasicType::BYTE_ARRAY,
-                BasicType::FIXED_LEN_BYTE_ARRAY,
-                BasicType::BYTE_ARRAY,
-                BasicType::INT32
-            ]
-        )
-    }
-
-    #[test]
-    fn test_convert_comprehensive_owned_struct() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct VecHolder {
-            a_vec: Vec<u8>,
-            a_option: std::option::Option<bool>,
-            a_silly_string: std::string::String,
-            a_complicated_thing: std::option::Option<std::result::Result<(),()>>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let converted_fields: Vec<_> =
-            fields.iter().map(|field| Type::from(field)).collect();
-
-        assert_eq!(
-            converted_fields,
-            vec![
-                Type::Vec(Box::new(Type::TypePath(syn::parse_quote!(u8)))),
-                Type::Option(Box::new(Type::TypePath(syn::parse_quote!(bool)))),
-                Type::TypePath(syn::parse_quote!(std::string::String)),
-                Type::Option(Box::new(Type::TypePath(
-                    syn::parse_quote!(std::result::Result<(),()>)
-                ))),
-            ]
-        );
-    }
-
-    #[test]
-    fn test_convert_borrowed_struct() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct Borrower<'a> {
-            a_str: &'a str,
-            a_borrowed_option: &'a Option<bool>,
-            so_many_borrows: &'a Option<&'a str>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let types: Vec<_> = fields.iter().map(|field| Type::from(field)).collect();
-
-        assert_eq!(
-            types,
-            vec![
-                Type::Reference(
-                    Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                    Box::new(Type::TypePath(syn::parse_quote!(str)))
-                ),
-                Type::Reference(
-                    Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                    Box::new(Type::Option(Box::new(Type::TypePath(syn::parse_quote!(
-                        bool
-                    )))))
-                ),
-                Type::Reference(
-                    Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                    Box::new(Type::Option(Box::new(Type::Reference(
-                        Some(syn::Lifetime::new("'a", proc_macro2::Span::call_site())),
-                        Box::new(Type::TypePath(syn::parse_quote!(str)))
-                    ))))
-                ),
-            ]
-        );
-    }
-
-    #[test]
-    #[cfg(feature = "chrono")]
-    fn test_chrono_timestamp_millis() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ATimestampStruct {
-            henceforth: chrono::NaiveDateTime,
-            maybe_happened: Option<&chrono::NaiveDateTime>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let when = Field::from(&fields[0]);
-        assert_eq!(when.writer_snippet().to_string(),(quote!{
-            {
-                let vals : Vec<_> = records.iter().map(|rec| rec.henceforth.timestamp_millis() ).collect();
-                if let parquet::column::writer::ColumnWriter::Int64ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ henceforth })
-                }
-            }
-        }).to_string());
-
-        let maybe_happened = Field::from(&fields[1]);
-        assert_eq!(maybe_happened.writer_snippet().to_string(),(quote!{
-            {
-                let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_happened.is_some() { 1 } else { 0 }).collect();
-                let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    rec.maybe_happened.map(|inner| {  inner.timestamp_millis()  })
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::Int64ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_happened })
-                }
-            }
-        }).to_string());
-    }
-
-    #[test]
-    #[cfg(feature = "chrono")]
-    fn test_chrono_date() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ATimestampStruct {
-            henceforth: chrono::NaiveDate,
-            maybe_happened: Option<&chrono::NaiveDate>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let when = Field::from(&fields[0]);
-        assert_eq!(when.writer_snippet().to_string(),(quote!{
-            {
-                let vals : Vec<_> = records.iter().map(|rec| rec.henceforth.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32).collect();
-                if let parquet::column::writer::ColumnWriter::Int32ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ henceforth })
-                }
-            }
-        }).to_string());
-
-        let maybe_happened = Field::from(&fields[1]);
-        assert_eq!(maybe_happened.writer_snippet().to_string(),(quote!{
-            {
-                let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_happened.is_some() { 1 } else { 0 }).collect();
-                let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    rec.maybe_happened.map(|inner| {  inner.signed_duration_since(chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32  })
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::Int32ColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_happened })
-                }
-            }
-        }).to_string());
-    }
-
-    #[test]
-    #[cfg(feature = "uuid")]
-    fn test_uuid() {
-        let snippet: proc_macro2::TokenStream = quote! {
-          struct ATimestampStruct {
-            unique_id: uuid::Uuid,
-            maybe_unique_id: Option<&uuid::Uuid>,
-          }
-        };
-
-        let fields = extract_fields(snippet);
-        let when = Field::from(&fields[0]);
-        assert_eq!(when.writer_snippet().to_string(),(quote!{
-            {
-                let vals : Vec<_> = records.iter().map(|rec| (&rec.unique_id.to_string()[..]).into() ).collect();
-                if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], None, None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ unique_id })
-                }
-            }
-        }).to_string());
-
-        let maybe_happened = Field::from(&fields[1]);
-        assert_eq!(maybe_happened.writer_snippet().to_string(),(quote!{
-            {
-                let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_unique_id.is_some() { 1 } else { 0 }).collect();
-                let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    rec.maybe_unique_id.map(|ref inner| {  (&inner.to_string()[..]).into()  })
-                }).collect();
-
-                if let parquet::column::writer::ColumnWriter::ByteArrayColumnWriter(ref mut typed) = column_writer {
-                    typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
-                } else {
-                    panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_unique_id })
-                }
-            }
-        }).to_string());
-    }
-}
diff --git a/rust/parquet_derive_test/Cargo.toml b/rust/parquet_derive_test/Cargo.toml
deleted file mode 100644
index 84f88c58d46..00000000000
--- a/rust/parquet_derive_test/Cargo.toml
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "parquet_derive_test"
-version = "5.0.0-SNAPSHOT"
-license = "Apache-2.0"
-description = "Integration test package for parquet-derive"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
-authors = ["Apache Arrow <dev@arrow.apache.org>"]
-keywords = [ "parquet" ]
-edition = "2018"
-publish = false
-
-[dependencies]
-parquet = { path = "../parquet", version = "5.0.0-SNAPSHOT" }
-parquet_derive = { path = "../parquet_derive", version = "5.0.0-SNAPSHOT" }
diff --git a/rust/parquet_derive_test/src/lib.rs b/rust/parquet_derive_test/src/lib.rs
deleted file mode 100644
index b4bfc42cab2..00000000000
--- a/rust/parquet_derive_test/src/lib.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#![allow(clippy::approx_constant)]
-
-extern crate parquet;
-
-#[macro_use]
-extern crate parquet_derive;
-
-use parquet::record::RecordWriter;
-
-#[derive(ParquetRecordWriter)]
-struct ACompleteRecord<'a> {
-    pub a_bool: bool,
-    pub a_str: &'a str,
-    pub a_string: String,
-    pub a_borrowed_string: &'a String,
-    pub maybe_a_str: Option<&'a str>,
-    pub maybe_a_string: Option<String>,
-    pub magic_number: i32,
-    pub low_quality_pi: f32,
-    pub high_quality_pi: f64,
-    pub maybe_pi: Option<f32>,
-    pub maybe_best_pi: Option<f64>,
-    pub borrowed_maybe_a_string: &'a Option<String>,
-    pub borrowed_maybe_a_str: &'a Option<&'a str>,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use parquet::{
-        file::{
-            properties::WriterProperties,
-            writer::{FileWriter, SerializedFileWriter},
-        },
-        schema::parser::parse_message_type,
-    };
-    use std::{env, fs, io::Write, sync::Arc};
-
-    #[test]
-    fn test_parquet_derive_hello() {
-        let file = get_temp_file("test_parquet_derive_hello", &[]);
-        let schema_str = "message schema {
-            REQUIRED boolean         a_bool;
-            REQUIRED BINARY          a_str (UTF8);
-            REQUIRED BINARY          a_string (UTF8);
-            REQUIRED BINARY          a_borrowed_string (UTF8);
-            OPTIONAL BINARY          a_maybe_str (UTF8);
-            OPTIONAL BINARY          a_maybe_string (UTF8);
-            REQUIRED INT32           magic_number;
-            REQUIRED FLOAT           low_quality_pi;
-            REQUIRED DOUBLE          high_quality_pi;
-            OPTIONAL FLOAT           maybe_pi;
-            OPTIONAL DOUBLE          maybe_best_pi;
-            OPTIONAL BINARY          borrowed_maybe_a_string (UTF8);
-            OPTIONAL BINARY          borrowed_maybe_a_str (UTF8);
-        }";
-
-        let schema = Arc::new(parse_message_type(schema_str).unwrap());
-
-        let props = Arc::new(WriterProperties::builder().build());
-        let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
-
-        let a_str = "hello mother".to_owned();
-        let a_borrowed_string = "cool news".to_owned();
-        let maybe_a_string = Some("it's true, I'm a string".to_owned());
-        let maybe_a_str = Some(&a_str[..]);
-
-        let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
-            a_bool: true,
-            a_str: &a_str[..],
-            a_string: "hello father".into(),
-            a_borrowed_string: &a_borrowed_string,
-            maybe_a_str: Some(&a_str[..]),
-            maybe_a_string: Some(a_str.clone()),
-            magic_number: 100,
-            low_quality_pi: 3.14,
-            high_quality_pi: 3.1415,
-            maybe_pi: Some(3.14),
-            maybe_best_pi: Some(3.1415),
-            borrowed_maybe_a_string: &maybe_a_string,
-            borrowed_maybe_a_str: &maybe_a_str,
-        }];
-
-        let mut row_group = writer.next_row_group().unwrap();
-        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
-        writer.close_row_group(row_group).unwrap();
-        writer.close().unwrap();
-    }
-
-    /// Returns file handle for a temp file in 'target' directory with a provided content
-    pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
-        // build tmp path to a file in "target/debug/testdata"
-        let mut path_buf = env::current_dir().unwrap();
-        path_buf.push("target");
-        path_buf.push("debug");
-        path_buf.push("testdata");
-        fs::create_dir_all(&path_buf).unwrap();
-        path_buf.push(file_name);
-
-        // write file content
-        let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
-        tmp_file.write_all(content).unwrap();
-        tmp_file.sync_all().unwrap();
-
-        // return file handle for both read and write
-        let file = fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .open(path_buf.as_path());
-        assert!(file.is_ok());
-        file.unwrap()
-    }
-}
diff --git a/rust/pre-commit.sh b/rust/pre-commit.sh
deleted file mode 100755
index 5ce08079324..00000000000
--- a/rust/pre-commit.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This file is git pre-commit hook.
-#
-# Soft link it as git hook under top dir of apache arrow git repository:
-# $ ln -s  ../../rust/pre-commit.sh .git/hooks/pre-commit
-#
-# This file be run directly:
-# $ ./pre-commit.sh
-
-function RED() {
-	echo "\033[0;31m$@\033[0m"
-}
-
-function GREEN() {
-	echo "\033[0;32m$@\033[0m"
-}
-
-function BYELLOW() {
-	echo "\033[1;33m$@\033[0m"
-}
-
-RUST_DIR="rust"
-
-# env GIT_DIR is set by git when run a pre-commit hook.
-if [ -z "${GIT_DIR}" ]; then
-	GIT_DIR=$(git rev-parse --show-toplevel)
-fi
-
-cd ${GIT_DIR}/${RUST_DIR}
-
-NUM_CHANGES=$(git diff --cached --name-only . |
-	grep -e ".*/*.rs$" |
-	awk '{print $1}' |
-	wc -l)
-
-if [ ${NUM_CHANGES} -eq 0 ]; then
-	echo -e "$(GREEN INFO): no staged changes in *.rs, $(GREEN skip cargo fmt/clippy)"
-	exit 0
-fi
-
-# 1. cargo clippy
-
-echo -e "$(GREEN INFO): cargo clippy ..."
-
-# Cargo clippy always return exit code 0, and `tee` doesn't work.
-# So let's just run cargo clippy.
-cargo clippy
-echo -e "$(GREEN INFO): cargo clippy done"
-
-# 2. cargo fmt: format with nightly and stable.
-
-CHANGED_BY_CARGO_FMT=false
-echo -e "$(GREEN INFO): cargo fmt with nightly and stable ..."
-
-for version in nightly stable; do
-	CMD="cargo +${version} fmt"
-	${CMD} --all -q -- --check 2>/dev/null
-	if [ $? -ne 0 ]; then
-		${CMD} --all
-		echo -e "$(BYELLOW WARN): ${CMD} changed some files"
-		CHANGED_BY_CARGO_FMT=true
-	fi
-done
-
-if ${CHANGED_BY_CARGO_FMT}; then
-	echo -e "$(RED FAIL): git commit $(RED ABORTED), please have a look and run git add/commit again"
-	exit 1
-fi
-
-exit 0
diff --git a/rust/rustfmt.toml b/rust/rustfmt.toml
deleted file mode 100644
index c49cccdd9f5..00000000000
--- a/rust/rustfmt.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-edition = "2018"
-max_width = 90
-
-# ignore generated files
-# ignore = [
-#    "arrow/src/ipc/gen",
-#]

From 71144650b271794ec3a1d4fcc485df7205fbf0e5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 3 May 2021 14:01:21 +0200
Subject: [PATCH 166/719] ARROW-12611: [CI][Python] Add different numpy
 versions to pandas nightly builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10221 from jorisvandenbossche/ARROW-12611-ci-pandas-numpy

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/python.yml       |  1 +
 dev/tasks/tasks.yml                | 13 +++++++------
 python/pyarrow/tests/test_array.py |  4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 4e3eaad98b5..02451bdcd01 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -62,6 +62,7 @@ jobs:
             title: AMD64 Conda Python 3.6 Pandas 0.23
             python: 3.6
             pandas: 0.23
+            numpy: 1.16
           - name: conda-python-3.7-pandas-latest
             cache: conda-python-3.7
             image: conda-python-pandas
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c0a9fe69d26..f82960bf353 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -968,12 +968,12 @@ tasks:
 
   ############################## Integration tests ############################
 
-{% for python_version, pandas_version, cache_leaf in [("3.6", "0.23", True),
-                                                      ("3.7", "0.24", True),
-                                                      ("3.7", "latest", False),
-                                                      ("3.8", "latest", False),
-                                                      ("3.8", "nightly", False),
-                                                      ("3.7", "master", False)] %}
+{% for python_version, pandas_version, numpy_version, cache_leaf in [("3.6", "0.23", "1.16", True),
+                                                                     ("3.7", "0.24", "1.19", True),
+                                                                     ("3.7", "latest", "latest", False),
+                                                                     ("3.8", "latest", "latest", False),
+                                                                     ("3.8", "nightly", "nightly", False),
+                                                                     ("3.9", "master", "nightly", False)] %}
   test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}:
     ci: github
     template: docker-tests/github.linux.yml
@@ -981,6 +981,7 @@ tasks:
       env:
         PYTHON: {{ python_version }}
         PANDAS: {{ pandas_version }}
+        NUMPY: {{ numpy_version }}
     {% if cache_leaf %}
       # use the latest pandas release, so prevent reusing any cached layers
       flags: --no-leaf-cache
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 128e6920dd4..d8e75ab3dbf 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -750,7 +750,7 @@ def test_dictionary_to_numpy():
     expected = pa.array(
         [13.7, 11.0, None, 13.7]
     ).to_numpy(zero_copy_only=False)
-    assert np.array_equal(
+    np.testing.assert_allclose(
         afloat2.to_numpy(zero_copy_only=False),
         expected,
         equal_nan=True
@@ -764,7 +764,7 @@ def test_dictionary_to_numpy():
         pa.array([7, 11])
     )
     expected = pa.array([7, 11, None, 7]).to_numpy(zero_copy_only=False)
-    assert np.array_equal(
+    np.testing.assert_allclose(
         aints.to_numpy(zero_copy_only=False),
         expected,
         equal_nan=True

From 78e993bb8c8cb3afe5fd72d33391545ad45d2855 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Mon, 3 May 2021 16:23:39 +0200
Subject: [PATCH 167/719] ARROW-12240: [Python] Fix invalid-offsetof warning

arrow::csv::ConvertOptions has stl container data members which are
not guaranteed to be standard layout type. It causes invalid-offsetof
warning when compiled with clang-12.

This patch changes directly embedded CCSVConvertOptions to unique_ptr
to fix the issue. Parse and Read options are also updated.

Closes #10154 from cyb70289/12240-invalid-offsetof

Lead-authored-by: Yibo Cai <yibo.cai@arm.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_csv.pxd              |   6 +-
 python/pyarrow/_csv.pyx              | 132 ++++++++++++++-------------
 python/pyarrow/_dataset.pyx          |   6 +-
 python/pyarrow/includes/libarrow.pxd |   9 ++
 python/pyarrow/tests/test_csv.py     |  16 +++-
 5 files changed, 100 insertions(+), 69 deletions(-)

diff --git a/python/pyarrow/_csv.pxd b/python/pyarrow/_csv.pxd
index f8e12f16bc8..030c4610e5d 100644
--- a/python/pyarrow/_csv.pxd
+++ b/python/pyarrow/_csv.pxd
@@ -23,7 +23,7 @@ from pyarrow.lib cimport _Weakrefable
 
 cdef class ConvertOptions(_Weakrefable):
     cdef:
-        CCSVConvertOptions options
+        unique_ptr[CCSVConvertOptions] options
 
     @staticmethod
     cdef ConvertOptions wrap(CCSVConvertOptions options)
@@ -31,7 +31,7 @@ cdef class ConvertOptions(_Weakrefable):
 
 cdef class ParseOptions(_Weakrefable):
     cdef:
-        CCSVParseOptions options
+        unique_ptr[CCSVParseOptions] options
 
     @staticmethod
     cdef ParseOptions wrap(CCSVParseOptions options)
@@ -39,7 +39,7 @@ cdef class ParseOptions(_Weakrefable):
 
 cdef class ReadOptions(_Weakrefable):
     cdef:
-        CCSVReadOptions options
+        unique_ptr[CCSVReadOptions] options
         public object encoding
 
     @staticmethod
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index a330664f641..04b9cfd2bcd 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -77,10 +77,13 @@ cdef class ReadOptions(_Weakrefable):
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
+    # __init__() is not called when unpickling, initialize storage here
+    def __cinit__(self, *argw, **kwargs):
+        self.options.reset(new CCSVReadOptions(CCSVReadOptions.Defaults()))
+
     def __init__(self, *, use_threads=None, block_size=None, skip_rows=None,
                  column_names=None, autogenerate_column_names=None,
                  encoding='utf8'):
-        self.options = CCSVReadOptions.Defaults()
         if use_threads is not None:
             self.use_threads = use_threads
         if block_size is not None:
@@ -99,11 +102,11 @@ cdef class ReadOptions(_Weakrefable):
         """
         Whether to use multiple threads to accelerate reading.
         """
-        return self.options.use_threads
+        return deref(self.options).use_threads
 
     @use_threads.setter
     def use_threads(self, value):
-        self.options.use_threads = value
+        deref(self.options).use_threads = value
 
     @property
     def block_size(self):
@@ -112,11 +115,11 @@ cdef class ReadOptions(_Weakrefable):
         This will determine multi-threading granularity as well as
         the size of individual record batches or table chunks.
         """
-        return self.options.block_size
+        return deref(self.options).block_size
 
     @block_size.setter
     def block_size(self, value):
-        self.options.block_size = value
+        deref(self.options).block_size = value
 
     @property
     def skip_rows(self):
@@ -124,11 +127,11 @@ cdef class ReadOptions(_Weakrefable):
         The number of rows to skip before the column names (if any)
         and the CSV data.
         """
-        return self.options.skip_rows
+        return deref(self.options).skip_rows
 
     @skip_rows.setter
     def skip_rows(self, value):
-        self.options.skip_rows = value
+        deref(self.options).skip_rows = value
 
     @property
     def column_names(self):
@@ -136,13 +139,13 @@ cdef class ReadOptions(_Weakrefable):
         The column names of the target table.  If empty, fall back on
         `autogenerate_column_names`.
         """
-        return [frombytes(s) for s in self.options.column_names]
+        return [frombytes(s) for s in deref(self.options).column_names]
 
     @column_names.setter
     def column_names(self, value):
-        self.options.column_names.clear()
+        deref(self.options).column_names.clear()
         for item in value:
-            self.options.column_names.push_back(tobytes(item))
+            deref(self.options).column_names.push_back(tobytes(item))
 
     @property
     def autogenerate_column_names(self):
@@ -152,11 +155,11 @@ cdef class ReadOptions(_Weakrefable):
         If false, column names will be read from the first CSV row
         after `skip_rows`.
         """
-        return self.options.autogenerate_column_names
+        return deref(self.options).autogenerate_column_names
 
     @autogenerate_column_names.setter
     def autogenerate_column_names(self, value):
-        self.options.autogenerate_column_names = value
+        deref(self.options).autogenerate_column_names = value
 
     def equals(self, ReadOptions other):
         return (
@@ -172,7 +175,7 @@ cdef class ReadOptions(_Weakrefable):
     @staticmethod
     cdef ReadOptions wrap(CCSVReadOptions options):
         out = ReadOptions()
-        out.options = options
+        out.options.reset(new CCSVReadOptions(move(options)))
         out.encoding = 'utf8'  # No way to know this
         return out
 
@@ -221,10 +224,12 @@ cdef class ParseOptions(_Weakrefable):
     """
     __slots__ = ()
 
+    def __cinit__(self, *argw, **kwargs):
+        self.options.reset(new CCSVParseOptions(CCSVParseOptions.Defaults()))
+
     def __init__(self, *, delimiter=None, quote_char=None, double_quote=None,
                  escape_char=None, newlines_in_values=None,
                  ignore_empty_lines=None):
-        self.options = CCSVParseOptions.Defaults()
         if delimiter is not None:
             self.delimiter = delimiter
         if quote_char is not None:
@@ -243,11 +248,11 @@ cdef class ParseOptions(_Weakrefable):
         """
         The character delimiting individual cells in the CSV data.
         """
-        return chr(self.options.delimiter)
+        return chr(deref(self.options).delimiter)
 
     @delimiter.setter
     def delimiter(self, value):
-        self.options.delimiter = _single_char(value)
+        deref(self.options).delimiter = _single_char(value)
 
     @property
     def quote_char(self):
@@ -255,18 +260,18 @@ cdef class ParseOptions(_Weakrefable):
         The character used optionally for quoting CSV values
         (False if quoting is not allowed).
         """
-        if self.options.quoting:
-            return chr(self.options.quote_char)
+        if deref(self.options).quoting:
+            return chr(deref(self.options).quote_char)
         else:
             return False
 
     @quote_char.setter
     def quote_char(self, value):
         if value is False:
-            self.options.quoting = False
+            deref(self.options).quoting = False
         else:
-            self.options.quote_char = _single_char(value)
-            self.options.quoting = True
+            deref(self.options).quote_char = _single_char(value)
+            deref(self.options).quoting = True
 
     @property
     def double_quote(self):
@@ -274,11 +279,11 @@ cdef class ParseOptions(_Weakrefable):
         Whether two quotes in a quoted CSV value denote a single quote
         in the data.
         """
-        return self.options.double_quote
+        return deref(self.options).double_quote
 
     @double_quote.setter
     def double_quote(self, value):
-        self.options.double_quote = value
+        deref(self.options).double_quote = value
 
     @property
     def escape_char(self):
@@ -286,18 +291,18 @@ cdef class ParseOptions(_Weakrefable):
         The character used optionally for escaping special characters
         (False if escaping is not allowed).
         """
-        if self.options.escaping:
-            return chr(self.options.escape_char)
+        if deref(self.options).escaping:
+            return chr(deref(self.options).escape_char)
         else:
             return False
 
     @escape_char.setter
     def escape_char(self, value):
         if value is False:
-            self.options.escaping = False
+            deref(self.options).escaping = False
         else:
-            self.options.escape_char = _single_char(value)
-            self.options.escaping = True
+            deref(self.options).escape_char = _single_char(value)
+            deref(self.options).escaping = True
 
     @property
     def newlines_in_values(self):
@@ -306,11 +311,11 @@ cdef class ParseOptions(_Weakrefable):
         Setting this to True reduces the performance of multi-threaded
         CSV reading.
         """
-        return self.options.newlines_in_values
+        return deref(self.options).newlines_in_values
 
     @newlines_in_values.setter
     def newlines_in_values(self, value):
-        self.options.newlines_in_values = value
+        deref(self.options).newlines_in_values = value
 
     @property
     def ignore_empty_lines(self):
@@ -319,11 +324,11 @@ cdef class ParseOptions(_Weakrefable):
         If False, an empty line is interpreted as containing a single empty
         value (assuming a one-column CSV file).
         """
-        return self.options.ignore_empty_lines
+        return deref(self.options).ignore_empty_lines
 
     @ignore_empty_lines.setter
     def ignore_empty_lines(self, value):
-        self.options.ignore_empty_lines = value
+        deref(self.options).ignore_empty_lines = value
 
     def equals(self, ParseOptions other):
         return (
@@ -338,7 +343,7 @@ cdef class ParseOptions(_Weakrefable):
     @staticmethod
     cdef ParseOptions wrap(CCSVParseOptions options):
         out = ParseOptions()
-        out.options = options
+        out.options.reset(new CCSVParseOptions(move(options)))
         return out
 
     def __getstate__(self):
@@ -431,12 +436,15 @@ cdef class ConvertOptions(_Weakrefable):
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
+    def __cinit__(self, *argw, **kwargs):
+        self.options.reset(
+            new CCSVConvertOptions(CCSVConvertOptions.Defaults()))
+
     def __init__(self, *, check_utf8=None, column_types=None, null_values=None,
                  true_values=None, false_values=None,
                  strings_can_be_null=None, include_columns=None,
                  include_missing_columns=None, auto_dict_encode=None,
                  auto_dict_max_cardinality=None, timestamp_parsers=None):
-        self.options = CCSVConvertOptions.Defaults()
         if check_utf8 is not None:
             self.check_utf8 = check_utf8
         if column_types is not None:
@@ -465,22 +473,22 @@ cdef class ConvertOptions(_Weakrefable):
         """
         Whether to check UTF8 validity of string columns.
         """
-        return self.options.check_utf8
+        return deref(self.options).check_utf8
 
     @check_utf8.setter
     def check_utf8(self, value):
-        self.options.check_utf8 = value
+        deref(self.options).check_utf8 = value
 
     @property
     def strings_can_be_null(self):
         """
         Whether string / binary columns can have null values.
         """
-        return self.options.strings_can_be_null
+        return deref(self.options).strings_can_be_null
 
     @strings_can_be_null.setter
     def strings_can_be_null(self, value):
-        self.options.strings_can_be_null = value
+        deref(self.options).strings_can_be_null = value
 
     @property
     def column_types(self):
@@ -488,7 +496,7 @@ cdef class ConvertOptions(_Weakrefable):
         Explicitly map column names to column types.
         """
         d = {frombytes(item.first): pyarrow_wrap_data_type(item.second)
-             for item in self.options.column_types}
+             for item in deref(self.options).column_types}
         return d
 
     @column_types.setter
@@ -499,7 +507,7 @@ cdef class ConvertOptions(_Weakrefable):
         if isinstance(value, Mapping):
             value = value.items()
 
-        self.options.column_types.clear()
+        deref(self.options).column_types.clear()
         for item in value:
             if isinstance(item, Field):
                 k = item.name
@@ -508,51 +516,51 @@ cdef class ConvertOptions(_Weakrefable):
                 k, v = item
             typ = pyarrow_unwrap_data_type(ensure_type(v))
             assert typ != NULL
-            self.options.column_types[tobytes(k)] = typ
+            deref(self.options).column_types[tobytes(k)] = typ
 
     @property
     def null_values(self):
         """
         A sequence of strings that denote nulls in the data.
         """
-        return [frombytes(x) for x in self.options.null_values]
+        return [frombytes(x) for x in deref(self.options).null_values]
 
     @null_values.setter
     def null_values(self, value):
-        self.options.null_values = [tobytes(x) for x in value]
+        deref(self.options).null_values = [tobytes(x) for x in value]
 
     @property
     def true_values(self):
         """
         A sequence of strings that denote true booleans in the data.
         """
-        return [frombytes(x) for x in self.options.true_values]
+        return [frombytes(x) for x in deref(self.options).true_values]
 
     @true_values.setter
     def true_values(self, value):
-        self.options.true_values = [tobytes(x) for x in value]
+        deref(self.options).true_values = [tobytes(x) for x in value]
 
     @property
     def false_values(self):
         """
         A sequence of strings that denote false booleans in the data.
         """
-        return [frombytes(x) for x in self.options.false_values]
+        return [frombytes(x) for x in deref(self.options).false_values]
 
     @false_values.setter
     def false_values(self, value):
-        self.options.false_values = [tobytes(x) for x in value]
+        deref(self.options).false_values = [tobytes(x) for x in value]
 
     @property
     def auto_dict_encode(self):
         """
         Whether to try to automatically dict-encode string / binary data.
         """
-        return self.options.auto_dict_encode
+        return deref(self.options).auto_dict_encode
 
     @auto_dict_encode.setter
     def auto_dict_encode(self, value):
-        self.options.auto_dict_encode = value
+        deref(self.options).auto_dict_encode = value
 
     @property
     def auto_dict_max_cardinality(self):
@@ -561,11 +569,11 @@ cdef class ConvertOptions(_Weakrefable):
 
         This value is per chunk.
         """
-        return self.options.auto_dict_max_cardinality
+        return deref(self.options).auto_dict_max_cardinality
 
     @auto_dict_max_cardinality.setter
     def auto_dict_max_cardinality(self, value):
-        self.options.auto_dict_max_cardinality = value
+        deref(self.options).auto_dict_max_cardinality = value
 
     @property
     def include_columns(self):
@@ -575,13 +583,13 @@ cdef class ConvertOptions(_Weakrefable):
         If empty, the Table will include all columns from the CSV file.
         If not empty, only these columns will be included, in this order.
         """
-        return [frombytes(s) for s in self.options.include_columns]
+        return [frombytes(s) for s in deref(self.options).include_columns]
 
     @include_columns.setter
     def include_columns(self, value):
-        self.options.include_columns.clear()
+        deref(self.options).include_columns.clear()
         for item in value:
-            self.options.include_columns.push_back(tobytes(item))
+            deref(self.options).include_columns.push_back(tobytes(item))
 
     @property
     def include_missing_columns(self):
@@ -593,11 +601,11 @@ cdef class ConvertOptions(_Weakrefable):
         or null by default).
         This option is ignored if `include_columns` is empty.
         """
-        return self.options.include_missing_columns
+        return deref(self.options).include_missing_columns
 
     @include_missing_columns.setter
     def include_missing_columns(self, value):
-        self.options.include_missing_columns = value
+        deref(self.options).include_missing_columns = value
 
     @property
     def timestamp_parsers(self):
@@ -612,7 +620,7 @@ cdef class ConvertOptions(_Weakrefable):
             c_string kind
 
         parsers = []
-        for c_parser in self.options.timestamp_parsers:
+        for c_parser in deref(self.options).timestamp_parsers:
             kind = deref(c_parser).kind()
             if kind == b'strptime':
                 parsers.append(frombytes(deref(c_parser).format()))
@@ -635,12 +643,12 @@ cdef class ConvertOptions(_Weakrefable):
             else:
                 raise TypeError("Expected list of str or ISO8601 objects")
 
-        self.options.timestamp_parsers = move(c_parsers)
+        deref(self.options).timestamp_parsers = move(c_parsers)
 
     @staticmethod
     cdef ConvertOptions wrap(CCSVConvertOptions options):
         out = ConvertOptions()
-        out.options = options
+        out.options.reset(new CCSVConvertOptions(move(options)))
         return out
 
     def equals(self, ConvertOptions other):
@@ -694,14 +702,14 @@ cdef _get_read_options(ReadOptions read_options, CCSVReadOptions* out):
     if read_options is None:
         out[0] = CCSVReadOptions.Defaults()
     else:
-        out[0] = read_options.options
+        out[0] = deref(read_options.options)
 
 
 cdef _get_parse_options(ParseOptions parse_options, CCSVParseOptions* out):
     if parse_options is None:
         out[0] = CCSVParseOptions.Defaults()
     else:
-        out[0] = parse_options.options
+        out[0] = deref(parse_options.options)
 
 
 cdef _get_convert_options(ConvertOptions convert_options,
@@ -709,7 +717,7 @@ cdef _get_convert_options(ConvertOptions convert_options,
     if convert_options is None:
         out[0] = CCSVConvertOptions.Defaults()
     else:
-        out[0] = convert_options.options
+        out[0] = deref(convert_options.options)
 
 
 cdef class CSVStreamingReader(RecordBatchReader):
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 44f016f5218..0b6c695235c 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1710,7 +1710,7 @@ cdef class CsvFileFormat(FileFormat):
 
     @parse_options.setter
     def parse_options(self, ParseOptions parse_options not None):
-        self.csv_format.parse_options = parse_options.options
+        self.csv_format.parse_options = deref(parse_options.options)
 
     cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
         if options.type_name == 'csv':
@@ -1760,7 +1760,7 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
 
     @convert_options.setter
     def convert_options(self, ConvertOptions convert_options not None):
-        self.csv_options.convert_options = convert_options.options
+        self.csv_options.convert_options = deref(convert_options.options)
 
     @property
     def read_options(self):
@@ -1768,7 +1768,7 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
 
     @read_options.setter
     def read_options(self, ReadOptions read_options not None):
-        self.csv_options.read_options = read_options.options
+        self.csv_options.read_options = deref(read_options.options)
 
     def equals(self, CsvFragmentScanOptions other):
         return (
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8f0f973a791..dec0038a0e4 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1583,6 +1583,9 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         c_bool newlines_in_values
         c_bool ignore_empty_lines
 
+        CCSVParseOptions()
+        CCSVParseOptions(CCSVParseOptions&&)
+
         @staticmethod
         CCSVParseOptions Defaults()
 
@@ -1601,6 +1604,9 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         vector[c_string] include_columns
         c_bool include_missing_columns
 
+        CCSVConvertOptions()
+        CCSVConvertOptions(CCSVConvertOptions&&)
+
         @staticmethod
         CCSVConvertOptions Defaults()
 
@@ -1611,6 +1617,9 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         vector[c_string] column_names
         c_bool autogenerate_column_names
 
+        CCSVReadOptions()
+        CCSVReadOptions(CCSVReadOptions&&)
+
         @staticmethod
         CCSVReadOptions Defaults()
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 395f9486315..4e95ab3bd60 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -97,6 +97,7 @@ def check_options_class(cls, **attr_values):
         assert getattr(opts, name) == value
 
 
+# The various options classes need to be picklable for dataset
 def check_options_class_pickling(cls, **attr_values):
     opts = cls(**attr_values)
     new_opts = pickle.loads(pickle.dumps(opts,
@@ -115,6 +116,12 @@ def test_read_options():
                         autogenerate_column_names=[False, True],
                         encoding=['utf8', 'utf16'])
 
+    check_options_class_pickling(cls, use_threads=True,
+                                 skip_rows=3,
+                                 column_names=["ab", "cd"],
+                                 autogenerate_column_names=False,
+                                 encoding='utf16')
+
     assert opts.block_size > 0
     opts.block_size = 12345
     assert opts.block_size == 12345
@@ -133,7 +140,6 @@ def test_parse_options():
                         newlines_in_values=[False, True],
                         ignore_empty_lines=[True, False])
 
-    # ParseOptions needs to be picklable for dataset
     check_options_class_pickling(cls, delimiter='x',
                                  escape_char='y',
                                  quote_char=False,
@@ -154,6 +160,14 @@ def test_convert_options():
         auto_dict_encode=[False, True],
         timestamp_parsers=[[], [ISO8601, '%y-%m']])
 
+    check_options_class_pickling(
+        cls, check_utf8=True,
+        strings_can_be_null=False,
+        include_columns=['def', 'abc'],
+        include_missing_columns=False,
+        auto_dict_encode=True,
+        timestamp_parsers=[ISO8601, '%y-%m'])
+
     assert opts.auto_dict_max_cardinality > 0
     opts.auto_dict_max_cardinality = 99999
     assert opts.auto_dict_max_cardinality == 99999

From 5159d2b74427e351a8ab7c4a1128aa8f1d7fcf6c Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 3 May 2021 16:53:16 +0200
Subject: [PATCH 168/719] ARROW-12622: [Python] Fix segfault in read_csv when
 not on main thread

An uninitialized StopToken caused segfaults if you ever called read_csv with cancellation disabled or when not on the main thread (e.g. if used in a Flight server). If we have a 4.0.1 I think this qualifies as a regression.

Closes #10227 from lidavidm/arrow-12622

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/error.pxi         | 2 +-
 python/pyarrow/tests/test_csv.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index f9e45f238df..2866848272a 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -188,8 +188,8 @@ cdef class SignalStopHandler:
                 if signal.getsignal(sig) not in (signal.SIG_DFL,
                                                  signal.SIG_IGN, None)]
 
+        self._stop_token = StopToken()
         if not self._signals.empty():
-            self._stop_token = StopToken()
             self._stop_token.init(GetResultValue(
                 SetSignalStopSource()).token())
             self._enabled = True
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 4e95ab3bd60..fef1ac60f37 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -956,6 +956,14 @@ def signal_from_thread():
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
+    def test_cancellation_disabled(self):
+        # ARROW-12622: reader would segfault when the cancelling signal
+        # handler was not enabled (e.g. if disabled, or if not on the
+        # main thread)
+        t = threading.Thread(target=lambda: self.read_bytes(b"f64\n0.1"))
+        t.start()
+        t.join()
+
 
 class TestSerialCSVRead(BaseTestCSVRead, unittest.TestCase):
 

From 3b6bf4e062cc9c9e8c058abcdfc1eaec463c3702 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 3 May 2021 17:29:16 +0200
Subject: [PATCH 169/719] ARROW-12617: [Python] Align orc.write_table keyword
 order with parquet.write_table

Closes #10223 from jorisvandenbossche/ARROW-12617-orc-write_table-signature

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_orc.pyx          |  3 ++-
 python/pyarrow/orc.py            | 16 ++++++++++++----
 python/pyarrow/tests/test_orc.py | 10 +++++++++-
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index 2640057ab16..e56a62d8def 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -29,6 +29,7 @@ from pyarrow.lib cimport (check_status, _Weakrefable,
                           Schema, pyarrow_wrap_schema,
                           pyarrow_wrap_batch,
                           RecordBatch,
+                          Table,
                           pyarrow_wrap_table,
                           pyarrow_unwrap_schema,
                           pyarrow_unwrap_table,
@@ -127,7 +128,7 @@ cdef class ORCWriter(_Weakrefable):
             self.writer = move(GetResultValue[unique_ptr[ORCFileWriter]](
                 ORCFileWriter.Open(self.rd_handle.get())))
 
-    def write(self, object table):
+    def write(self, Table table):
         cdef:
             shared_ptr[CTable] sp_table
         sp_table = pyarrow_unwrap_table(table)
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index e1009bcca08..13af5a2376a 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -18,9 +18,10 @@
 
 from itertools import count
 from numbers import Integral
+import warnings
 
 from pyarrow import types
-from pyarrow.lib import Schema
+from pyarrow.lib import Schema, Table
 import pyarrow._orc as _orc
 
 
@@ -184,19 +185,26 @@ def close(self):
         self.writer.close()
 
 
-def write_table(where, table):
+def write_table(table, where):
     """
     Write a table into an ORC file
 
     Parameters
     ----------
+    table : pyarrow.lib.Table
+        The table to be written into the ORC file
     where : str or pyarrow.io.NativeFile
         Writable target. For passing Python file objects or byte buffers,
         see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
         or pyarrow.io.FixedSizeBufferWriter.
-    table : pyarrow.lib.Table
-        The table to be written into the ORC file
     """
+    if isinstance(where, Table):
+        warnings.warn(
+            "The order of the arguments has changed. Pass as "
+            "'write_table(table, where)' instead. The old order will raise "
+            "an error in the future.", FutureWarning, stacklevel=2
+        )
+        table, where = where, table
     writer = ORCWriter(where)
     writer.write(table)
     writer.close()
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index 14edad87d71..e71c4529c04 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -176,7 +176,15 @@ def test_orcfile_readwrite():
     a = pa.array([1, None, 3, None])
     b = pa.array([None, "Arrow", None, "ORC"])
     table = pa.table({"int64": a, "utf8": b})
-    orc.write_table(buffer_output_stream, table)
+    orc.write_table(table, buffer_output_stream)
+    buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
+    output_table = orc.ORCFile(buffer_reader).read()
+    assert table.equals(output_table)
+
+    # deprecated keyword order
+    buffer_output_stream = pa.BufferOutputStream()
+    with pytest.warns(FutureWarning):
+        orc.write_table(buffer_output_stream, table)
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
     output_table = orc.ORCFile(buffer_reader).read()
     assert table.equals(output_table)

From ce2861713472818eea264957de4cc83d5a2c567c Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Mon, 3 May 2021 17:37:12 +0200
Subject: [PATCH 170/719] ARROW-12568: [C++][Compute] Fix nullptr deference
 when array contains no nulls

Closes #10184 from cyb70289/12568-cast-crash

Lead-authored-by: Yibo Cai <yibo.cai@arm.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/hash_aggregate.cc   |  8 ++-
 .../compute/kernels/hash_aggregate_test.cc    | 23 ++++++
 .../arrow/compute/kernels/scalar_boolean.cc   | 72 ++++++++++++++-----
 .../compute/kernels/scalar_boolean_test.cc    | 18 +++++
 .../compute/kernels/scalar_cast_nested.cc     |  8 ++-
 .../arrow/compute/kernels/scalar_cast_test.cc | 14 ++++
 6 files changed, 119 insertions(+), 24 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index f45e82e04af..ae7bf9324db 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -483,9 +483,11 @@ struct GroupedCountImpl : public GroupedAggregator {
     const auto& input = batch[0].array();
 
     if (options_.count_mode == CountOptions::COUNT_NULL) {
-      for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
-        auto g = group_ids[i];
-        raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
+      if (input->GetNullCount() != 0) {
+        for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
+          auto g = group_ids[i];
+          raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
+        }
       }
       return Status::OK();
     }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 7858d8bb147..507f1716110 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -678,6 +678,29 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
   }
 }
 
+// Count nulls/non_nulls from record batch with no nulls
+TEST(GroupBy, CountNull) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", utf8())}), R"([
+    [1.0, "alfa"],
+    [2.0, "beta"],
+    [3.0, "gama"]
+  ])");
+
+  CountOptions count_non_null{CountOptions::COUNT_NON_NULL},
+      count_null{CountOptions::COUNT_NULL};
+
+  using internal::Aggregate;
+  for (auto agg : {
+           Aggregate{"hash_count", &count_non_null},
+           Aggregate{"hash_count", &count_null},
+       }) {
+    SCOPED_TRACE(agg.function);
+    ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
+                    {batch->GetColumnByName("key")});
+  }
+}
+
 TEST(GroupBy, RandomArraySum) {
   for (int64_t length : {1 << 10, 1 << 12, 1 << 15}) {
     for (auto null_probability : {0.0, 0.01, 0.5, 1.0}) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index d555a81392a..3d47d239888 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -169,21 +169,32 @@ struct KleeneAnd : Commutative<KleeneAnd> {
     bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
 
     if (right_false) {
-      GetBitmap(*out, 0).SetBitsTo(true);
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
       GetBitmap(*out, 1).SetBitsTo(false);  // all false case
       return Status::OK();
     }
 
     if (right_true) {
-      GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      if (left.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      }
       GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
       return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff left[i] was false
-    ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
-                                    left.buffers[1]->data(), left.offset, left.length,
-                                    out->offset, out->buffers[0]->mutable_data());
+    if (left.GetNullCount() == 0) {
+      ::arrow::internal::InvertBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                      out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
+                                      left.buffers[1]->data(), left.offset, left.length,
+                                      out->offset, out->buffers[0]->mutable_data());
+    }
     ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
                                   out->buffers[1]->mutable_data(), out->offset);
     return Status::OK();
@@ -192,7 +203,8 @@ struct KleeneAnd : Commutative<KleeneAnd> {
   static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
                      ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      GetBitmap(*out, 0).SetBitsTo(true);
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
       return And::Call(ctx, left, right, out);
     }
     auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
@@ -260,21 +272,32 @@ struct KleeneOr : Commutative<KleeneOr> {
     bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
 
     if (right_true) {
-      GetBitmap(*out, 0).SetBitsTo(true);
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
       GetBitmap(*out, 1).SetBitsTo(true);  // all true case
       return Status::OK();
     }
 
     if (right_false) {
-      GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      if (left.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
+      }
       GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
       return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff left[i] was true
-    ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
-                                 left.buffers[1]->data(), left.offset, left.length,
-                                 out->offset, out->buffers[0]->mutable_data());
+    if (left.GetNullCount() == 0) {
+      ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                    out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
+                                   left.buffers[1]->data(), left.offset, left.length,
+                                   out->offset, out->buffers[0]->mutable_data());
+    }
     ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
                                   out->buffers[1]->mutable_data(), out->offset);
     return Status::OK();
@@ -283,7 +306,8 @@ struct KleeneOr : Commutative<KleeneOr> {
   static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
                      ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      GetBitmap(*out, 0).SetBitsTo(true);
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
       return Or::Call(ctx, left, right, out);
     }
 
@@ -373,21 +397,32 @@ struct KleeneAndNot {
     bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
 
     if (left_false) {
-      GetBitmap(*out, 0).SetBitsTo(true);
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
       GetBitmap(*out, 1).SetBitsTo(false);  // all false case
       return Status::OK();
     }
 
     if (left_true) {
-      GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
+      if (right.GetNullCount() == 0) {
+        out->null_count = 0;
+        out->buffers[0] = nullptr;
+      } else {
+        GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
+      }
       GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
       return Status::OK();
     }
 
     // scalar was null: out[i] is valid iff right[i] was true
-    ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
-                                 right.buffers[1]->data(), right.offset, right.length,
-                                 out->offset, out->buffers[0]->mutable_data());
+    if (right.GetNullCount() == 0) {
+      ::arrow::internal::CopyBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[0]->mutable_data(), out->offset);
+    } else {
+      ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                   right.buffers[1]->data(), right.offset, right.length,
+                                   out->offset, out->buffers[0]->mutable_data());
+    }
     ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
                                     out->buffers[1]->mutable_data(), out->offset);
     return Status::OK();
@@ -401,7 +436,8 @@ struct KleeneAndNot {
   static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
                      ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      GetBitmap(*out, 0).SetBitsTo(true);
+      out->null_count = 0;
+      out->buffers[0] = nullptr;
       return AndNot::Call(ctx, left, right, out);
     }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
index 7d3f68e2aef..4c11eb6db30 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
@@ -110,6 +110,12 @@ TEST(TestBooleanKernel, KleeneAnd) {
   expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
   CheckScalarBinary("and_kleene", left, right, expected);
   CheckBooleanScalarArrayBinary("and_kleene", left);
+
+  left = ArrayFromJSON(boolean(), "    [true, true,  false, true]");
+  right = ArrayFromJSON(boolean(), "   [true, false, false, false]");
+  expected = ArrayFromJSON(boolean(), "[true, false, false, false]");
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);
 }
 
 TEST(TestBooleanKernel, KleeneAndNot) {
@@ -121,6 +127,12 @@ TEST(TestBooleanKernel, KleeneAndNot) {
       boolean(), "[false, true,  null, false, false, false, false, null, null]");
   CheckScalarBinary("and_not_kleene", left, right, expected);
   CheckBooleanScalarArrayBinary("and_not_kleene", left);
+
+  left = ArrayFromJSON(boolean(), "    [true,  true,  false, false]");
+  right = ArrayFromJSON(boolean(), "   [true,  false, true,  false]");
+  expected = ArrayFromJSON(boolean(), "[false, true,  false, false]");
+  CheckScalarBinary("and_not_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_not_kleene", left);
 }
 
 TEST(TestBooleanKernel, KleeneOr) {
@@ -135,6 +147,12 @@ TEST(TestBooleanKernel, KleeneOr) {
   expected = ArrayFromJSON(boolean(), "[true, true,  false, true, null]");
   CheckScalarBinary("or_kleene", left, right, expected);
   CheckBooleanScalarArrayBinary("or_kleene", left);
+
+  left = ArrayFromJSON(boolean(), "    [true, true,  false, false]");
+  right = ArrayFromJSON(boolean(), "   [true, false, false, true]");
+  expected = ArrayFromJSON(boolean(), "[true, true,  false, true]");
+  CheckScalarBinary("or_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("or_kleene", left);
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 9364120c133..ec92dbb5d60 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -65,9 +65,11 @@ Status CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   Datum values = in_array.child_data[0];
 
   if (in_array.offset != 0) {
-    ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
-                          CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
-                                     in_array.offset, in_array.length));
+    if (in_array.buffers[0]) {
+      ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                            CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
+                                       in_array.offset, in_array.length));
+    }
     ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
                           ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 6efecbb2ad0..e9618fa5c5d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1702,6 +1702,20 @@ TEST(Cast, ListToList) {
     CheckCast(MakeArray(list_float32), MakeArray(list_int32));
     CheckCast(MakeArray(list_int64), MakeArray(list_int32));
   }
+
+  // No nulls (ARROW-12568)
+  for (auto make_list : std::vector<make_list_t*>{&list, &large_list}) {
+    auto list_int32 = ArrayFromJSON(make_list(int32()),
+                                    "[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]")
+                          ->data();
+    auto list_int64 = list_int32->Copy();
+    list_int64->type = make_list(int64());
+    list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
+    ASSERT_OK(MakeArray(list_int64)->ValidateFull());
+
+    CheckCast(MakeArray(list_int32), MakeArray(list_int64));
+    CheckCast(MakeArray(list_int64), MakeArray(list_int32));
+  }
 }
 
 TEST(Cast, ListToListOptionsPassthru) {

From d9a2f91dc08d2413f4c5a61ff48c194c53709939 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 3 May 2021 12:23:23 -0400
Subject: [PATCH 171/719] ARROW-12612: [C++] Add Expression to type_fwd.h

Closes #10210 from lidavidm/r-minimal

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec/expression.h | 2 --
 cpp/src/arrow/compute/type_fwd.h        | 2 ++
 cpp/src/arrow/dataset/type_fwd.h        | 8 +-------
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
index 3d3ce99c257..98221422ee2 100644
--- a/cpp/src/arrow/compute/exec/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -27,8 +27,6 @@
 #include <vector>
 
 #include "arrow/compute/type_fwd.h"
-#include "arrow/dataset/type_fwd.h"
-#include "arrow/dataset/visibility.h"
 #include "arrow/datum.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/variant.h"
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 9888e610aa7..4f4393486ff 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -39,5 +39,7 @@ struct VectorKernel;
 
 struct KernelState;
 
+class Expression;
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h
index 0a4148c49e1..67a999456be 100644
--- a/cpp/src/arrow/dataset/type_fwd.h
+++ b/cpp/src/arrow/dataset/type_fwd.h
@@ -22,18 +22,12 @@
 #include <memory>
 #include <vector>
 
+#include "arrow/compute/type_fwd.h"  // IWYU pragma: export
 #include "arrow/dataset/visibility.h"
 #include "arrow/filesystem/type_fwd.h"  // IWYU pragma: export
 #include "arrow/type_fwd.h"             // IWYU pragma: export
 
 namespace arrow {
-namespace compute {
-
-class ExecContext;
-class Expression;
-
-}  // namespace compute
-
 namespace dataset {
 
 class Dataset;

From 9af99c54e50a3654e3b49d7cace64816842dca21 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 3 May 2021 13:56:05 -0400
Subject: [PATCH 172/719] ARROW-12614: [C++][Compute] Remove support for Tables
 in ExecuteScalarExpression

Cleanup PR after ARROW-11929 which added untested support for ExecuteScalarExpression(Table).

Closes #10213 from bkietz/12614-Add-support-for-Tables-to

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/compute/exec/expression.cc      | 17 -----------------
 cpp/src/arrow/compute/exec/expression_test.cc |  3 ++-
 cpp/src/arrow/dataset/dataset_internal.h      |  5 -----
 cpp/src/arrow/dataset/test_util.h             |  2 +-
 cpp/src/arrow/record_batch.h                  |  2 ++
 cpp/src/arrow/util/iterator.h                 |  6 ++++++
 6 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index 1f819cf3d04..59def380db5 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -526,23 +526,6 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input
         "ExecuteScalarExpression cannot Execute non-scalar expression ", expr.ToString());
   }
 
-  if (input.kind() == Datum::TABLE) {
-    TableBatchReader reader(*input.table());
-    std::shared_ptr<RecordBatch> batch;
-
-    while (true) {
-      RETURN_NOT_OK(reader.ReadNext(&batch));
-      if (batch != nullptr) {
-        break;
-      }
-      ARROW_ASSIGN_OR_RAISE(Datum res, ExecuteScalarExpression(expr, batch));
-      if (res.is_scalar()) {
-        ARROW_ASSIGN_OR_RAISE(res, MakeArrayFromScalar(*res.scalar(), batch->num_rows(),
-                                                       exec_context->memory_pool()));
-      }
-    }
-  }
-
   if (auto lit = expr.literal()) return *lit;
 
   if (auto ref = expr.field_ref()) {
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index ab3fbb4d196..e8b8fb31cd8 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -172,6 +172,7 @@ TEST(Expression, ToString) {
   EXPECT_EQ(literal("a").ToString(), "\"a\"");
   EXPECT_EQ(literal("a\nb").ToString(), "\"a\\nb\"");
   EXPECT_EQ(literal(std::make_shared<BooleanScalar>()).ToString(), "null");
+  EXPECT_EQ(literal(std::make_shared<Int64Scalar>()).ToString(), "null");
   EXPECT_EQ(literal(std::make_shared<BinaryScalar>(Buffer::FromString("az"))).ToString(),
             "\"617A\"");
 
@@ -540,7 +541,7 @@ void ExpectExecute(Expression expr, Datum in, Datum* actual_out = NULLPTR) {
   if (in.is_value()) {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
   } else {
-    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.record_batch()->schema()));
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.schema()));
   }
 
   ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, in));
diff --git a/cpp/src/arrow/dataset/dataset_internal.h b/cpp/src/arrow/dataset/dataset_internal.h
index 6527eac07dd..4336f9c157e 100644
--- a/cpp/src/arrow/dataset/dataset_internal.h
+++ b/cpp/src/arrow/dataset/dataset_internal.h
@@ -54,11 +54,6 @@ inline Result<FragmentIterator> GetFragmentsFromDatasets(const DatasetVector& da
   return MakeFlattenIterator(std::move(fragments_it));
 }
 
-inline RecordBatchIterator IteratorFromReader(
-    const std::shared_ptr<RecordBatchReader>& reader) {
-  return MakeFunctionIterator([reader] { return reader->Next(); });
-}
-
 inline std::shared_ptr<Schema> SchemaFromColumnNames(
     const std::shared_ptr<Schema>& input, const std::vector<std::string>& column_names) {
   std::vector<std::shared_ptr<Field>> columns;
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 83ae4bbf1e8..42c544dd93e 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -1161,7 +1161,7 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
       std::shared_ptr<Array> actual_struct;
 
       for (auto maybe_batch :
-           IteratorFromReader(std::make_shared<TableBatchReader>(*actual_table))) {
+           MakeIteratorFromReader(std::make_shared<TableBatchReader>(*actual_table))) {
         ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
         ASSERT_OK_AND_ASSIGN(actual_struct, batch->ToStructArray());
       }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 59c6d5568e9..a75dd043e5d 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -199,6 +199,8 @@ class ARROW_EXPORT RecordBatch {
 /// \brief Abstract interface for reading stream of record batches
 class ARROW_EXPORT RecordBatchReader {
  public:
+  using ValueType = std::shared_ptr<RecordBatch>;
+
   virtual ~RecordBatchReader() = default;
 
   /// \return the shared schema of the record batches in the stream
diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h
index 4d9e7b18290..b82021e4b21 100644
--- a/cpp/src/arrow/util/iterator.h
+++ b/cpp/src/arrow/util/iterator.h
@@ -560,4 +560,10 @@ Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
   return Iterator<T>(FlattenIterator<T>(std::move(it)));
 }
 
+template <typename Reader>
+Iterator<typename Reader::ValueType> MakeIteratorFromReader(
+    const std::shared_ptr<Reader>& reader) {
+  return MakeFunctionIterator([reader] { return reader->Next(); });
+}
+
 }  // namespace arrow

From 8fc91af7ee347688e3903f376bd95a0a5b3a6107 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 3 May 2021 15:04:07 -0400
Subject: [PATCH 173/719] ARROW-12620: [C++][Dataset] Fix projection during
 writing

We were filtering/projecting twice on accident.

Closes #10229 from lidavidm/arrow-12620

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/file_test.cc       | 43 ++++++++++++++++++++++++
 cpp/src/arrow/dataset/scanner_internal.h |  6 +---
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index 0c8954e6b7b..839b48a0e64 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -295,6 +295,49 @@ TEST_F(TestFileSystemDataset, FragmentPartitions) {
                 });
 }
 
+TEST_F(TestFileSystemDataset, WriteProjected) {
+  // Regression test for ARROW-12620
+  auto format = std::make_shared<IpcFileFormat>();
+  auto fs = std::make_shared<fs::internal::MockFileSystem>(fs::kNoTime);
+  FileSystemDatasetWriteOptions write_options;
+  write_options.file_write_options = format->DefaultWriteOptions();
+  write_options.filesystem = fs;
+  write_options.base_dir = "root";
+  write_options.partitioning = std::make_shared<HivePartitioning>(schema({}));
+  write_options.basename_template = "{i}.feather";
+
+  auto dataset_schema = schema({field("a", int64())});
+  RecordBatchVector batches{
+      ConstantArrayGenerator::Zeroes(kRowsPerBatch, dataset_schema)};
+  ASSERT_EQ(0, batches[0]->column(0)->null_count());
+  auto dataset = std::make_shared<InMemoryDataset>(dataset_schema, batches);
+  ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
+  ASSERT_OK(scanner_builder->Project(
+      {compute::call("add", {compute::field_ref("a"), compute::literal(1)})},
+      {"a_plus_one"}));
+  ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+
+  ASSERT_OK(FileSystemDataset::Write(write_options, scanner));
+
+  ASSERT_OK_AND_ASSIGN(auto dataset_factory, FileSystemDatasetFactory::Make(
+                                                 fs, {"root/0.feather"}, format, {}));
+  ASSERT_OK_AND_ASSIGN(auto written_dataset, dataset_factory->Finish(FinishOptions{}));
+  auto expected_schema = schema({field("a_plus_one", int64())});
+  AssertSchemaEqual(*expected_schema, *written_dataset->schema());
+  ASSERT_OK_AND_ASSIGN(scanner_builder, written_dataset->NewScan());
+  ASSERT_OK_AND_ASSIGN(scanner, scanner_builder->Finish());
+  ASSERT_OK_AND_ASSIGN(auto table, scanner->ToTable());
+  auto col = table->column(0);
+  ASSERT_EQ(0, col->null_count());
+  for (auto chunk : col->chunks()) {
+    auto arr = std::dynamic_pointer_cast<Int64Array>(chunk);
+    for (auto val : *arr) {
+      ASSERT_TRUE(val.has_value());
+      ASSERT_EQ(1, *val);
+    }
+  }
+}
+
 // Tests of subtree pruning
 
 struct TestPathTree {
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 56065d9983e..30fb4e07cef 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -169,11 +169,7 @@ class FilterAndProjectScanTask : public ScanTask {
           ARROW_ASSIGN_OR_RAISE(auto projected, FilterAndProjectBatch(batch));
           return visitor(projected);
         };
-    return task_->SafeExecute(executor).Then(
-        [this, filter_and_project_visitor](const RecordBatchVector& rbs) -> Status {
-          ARROW_ASSIGN_OR_RAISE(auto projected_it, ToFilteredAndProjectedIterator(rbs));
-          return projected_it.Visit(filter_and_project_visitor);
-        });
+    return task_->SafeVisit(executor, filter_and_project_visitor);
   }
 
  private:

From 6c591d023b9e2c981746e84c2d78e1ec3eaf9bf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 3 May 2021 21:45:01 +0200
Subject: [PATCH 174/719] ARROW-12415: [CI] [Python] Failed building wheel for
 pygit2 on ARM64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10180 from pachamaltese/patch-1

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Pachamaltese <mvargas@dcc.uchile.cl>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/cli.py             |  5 ++++-
 dev/archery/setup.py                   | 10 ++--------
 dev/tasks/conda-recipes/drone-steps.sh | 12 ++++++++----
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 5329e0abbe2..9f9ec6bbc2f 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -1160,7 +1160,10 @@ def release_cherry_pick(obj, version, dry_run, recreate):
 
     @archery.command(
         'crossbow',
-        context_settings={"ignore_unknown_options": True}
+        context_settings={
+            "allow_extra_args": True,
+            "ignore_unknown_options": True,
+        }
     )
     def crossbow():
         raise click.ClickException(
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 0537e8b4d31..e3fa47ad14f 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -19,7 +19,7 @@
 import functools
 import operator
 import sys
-from setuptools import setup
+from setuptools import setup, find_packages
 
 if sys.version_info < (3, 6):
     sys.exit('Python < 3.6 is not supported')
@@ -44,13 +44,7 @@
     url='http://github.com/apache/arrow',
     maintainer='Arrow Developers',
     maintainer_email='dev@arrow.apache.org',
-    packages=[
-        'archery',
-        'archery.benchmark',
-        'archery.integration',
-        'archery.lang',
-        'archery.utils'
-    ],
+    packages=find_packages(),
     include_package_data=True,
     install_requires=['click>=7'],
     tests_require=['pytest', 'responses'],
diff --git a/dev/tasks/conda-recipes/drone-steps.sh b/dev/tasks/conda-recipes/drone-steps.sh
index dffdb41b088..3c379b824db 100755
--- a/dev/tasks/conda-recipes/drone-steps.sh
+++ b/dev/tasks/conda-recipes/drone-steps.sh
@@ -12,12 +12,14 @@ conda install -y mamba
 $FEEDSTOCK_ROOT/build_steps.sh ${OUTPUT_DIR}
 
 # Upload as Github release
-mamba install -y anaconda-client shyaml -c conda-forge
+mamba install -y anaconda-client shyaml pygit2 -c conda-forge
+
+pushd $DRONE_WORKSPACE/arrow
+
+pip install dev/archery[crossbow]
 
-pushd $DRONE_WORKSPACE
-pip install -e arrow/dev/archery[crossbow]
 archery crossbow \
-  --queue-path . \
+  --queue-path $DRONE_WORKSPACE \
   --queue-remote ${QUEUE_REMOTE_URL} \
   upload-artifacts \
   --sha ${TASK_BRANCH} \
@@ -27,3 +29,5 @@ archery crossbow \
 if [[ "${UPLOAD_TO_ANACONDA}" == "1" ]]; then
   anaconda -t ${CROSSBOW_ANACONDA_TOKEN} upload --force build_artifacts/linux-aarch64/*.tar.bz2
 fi
+
+popd

From b3e43987c47b2f01b204a2d954f882f7161616ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 4 May 2021 08:17:08 +0200
Subject: [PATCH 175/719] ARROW-12639: [CI][Archery] Archery build fails to
 create branch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10231 from kszucs/ARROW-12639

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/archery.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index e5244ece92b..ac2799bee70 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -44,9 +44,8 @@ jobs:
         with:
           fetch-depth: 0
       - name: Git Fixup
-        if: ${{ github.ref != 'master' }}
         shell: bash
-        run: git branch master origin/master
+        run: git branch master origin/master || true
       - name: Free Up Disk Space
         run: ci/scripts/util_cleanup.sh
       - name: Setup Python

From 02b020c9b06f7ab6741e01b18d0e146d2e4347cd Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 4 May 2021 12:11:46 +0200
Subject: [PATCH 176/719] ARROW-12613: [Python] Support comparison to None in
 Scalar values

Closes #10228 from amol-/ARROW-12613

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/scalar.pxi            | 2 +-
 python/pyarrow/tests/test_scalars.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index effe60c73b2..9b250e499ea 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -83,7 +83,7 @@ cdef class Scalar(_Weakrefable):
     def __str__(self):
         return str(self.as_py())
 
-    def equals(self, Scalar other):
+    def equals(self, Scalar other not None):
         return self.wrapped.get().Equals(other.unwrap().get()[0])
 
     def __eq__(self, other):
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 120c9659f8f..176f1c12053 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -77,6 +77,7 @@ def test_basics(value, ty, klass, deprecated):
     assert s != "else"
     assert hash(s) == hash(s)
     assert s.is_valid is True
+    assert s != None  # noqa: E711
     with pytest.warns(FutureWarning):
         assert isinstance(s, deprecated)
 

From 1455fa0e88f4f8a269966d3a337dda4f9eb8792f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 4 May 2021 13:22:19 +0200
Subject: [PATCH 177/719] ARROW-12630: [Dev][Integration] conda-integration
 docker build fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Disable the rust integration tests by default.

In order to execute the rust integration tests as well pass `ARCHERY_INTEGRATION_WITH_RUST=1` environment variable to the docker container.

Closes #10225 from kszucs/rust-integration

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/integration.yml |  4 +---
 ci/scripts/integration_arrow.sh   | 11 +++++++----
 ci/scripts/rust_build.sh          | 28 +++++++++++++++++++++-------
 dev/archery/archery/cli.py        |  3 ++-
 docker-compose.yml                |  1 +
 5 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index fb41f36caf8..59007e0ec8a 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -29,7 +29,6 @@ on:
       - 'cpp/**'
       - 'java/**'
       - 'format/**'
-      - 'rust/**'
   pull_request:
     paths:
       - '.github/workflows/integration.yml'
@@ -41,7 +40,6 @@ on:
       - 'cpp/**'
       - 'java/**'
       - 'format/**'
-      - 'rust/**'
 
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
@@ -82,7 +80,7 @@ jobs:
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
-        run: archery docker run conda-integration
+        run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration
       - name: Docker Push
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh
index 5d2e71916ed..453561d3171 100755
--- a/ci/scripts/integration_arrow.sh
+++ b/ci/scripts/integration_arrow.sh
@@ -20,14 +20,17 @@
 set -ex
 
 arrow_dir=${1}
-source_dir=${1}/cpp
-build_dir=${2}/cpp
-
 gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
 
 pip install -e $arrow_dir/dev/archery
 
-archery integration --with-all --run-flight \
+# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
+archery integration \
+    --run-flight \
+    --with-cpp=1 \
+    --with-java=1 \
+    --with-js=1 \
+    --with-go=1 \
     --gold-dirs=$gold_dir/0.14.1 \
     --gold-dirs=$gold_dir/0.17.1 \
     --gold-dirs=$gold_dir/1.0.0-bigendian \
diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh
index 726ecd80f1a..3532ea3d5c6 100755
--- a/ci/scripts/rust_build.sh
+++ b/ci/scripts/rust_build.sh
@@ -17,13 +17,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
 
+arrow_dir=${1}
 source_dir=${1}/rust
 
-# This file is used to build the rust binaries needed for the
-# archery integration tests. Testing of the rust implementation
-# in normal CI is handled by github workflows
+# This file is used to build the rust binaries needed for the archery
+# integration tests. Testing of the rust implementation in normal CI is handled
+# by github workflows in the arrow-rs repository.
 
 # Disable full debug symbol generation to speed up CI build / reduce memory required
 export RUSTFLAGS="-C debuginfo=1"
@@ -31,6 +32,22 @@ export RUSTFLAGS="-C debuginfo=1"
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
 export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
 
+if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then
+  echo "====================================================================="
+  echo "Not building the Rust implementation."
+  echo "====================================================================="
+  exit 0;
+elif [ ! -d "${source_dir}" ]; then
+  echo "====================================================================="
+  echo "The Rust source is missing. Please clone the arrow-rs repository"
+  echo "to arrow/rust before running the integration tests:"
+  echo "  git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust"
+  echo "====================================================================="
+  exit 1;
+fi
+
+set -x
+
 # show activated toolchain
 rustup show
 
@@ -39,7 +56,4 @@ pushd ${source_dir}
 # build only the integration testing binaries
 cargo build -p arrow-integration-testing
 
-# Remove incremental build artifacts to save space
-rm -rf  target/debug/deps/ target/debug/build/
-
 popd
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 9f9ec6bbc2f..32ec5ac845b 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -726,7 +726,8 @@ def _set_default(opt, default):
 @click.option('--with-go', type=bool, default=False,
               help='Include Go in integration tests')
 @click.option('--with-rust', type=bool, default=False,
-              help='Include Rust in integration tests')
+              help='Include Rust in integration tests',
+              envvar="ARCHERY_INTEGRATION_WITH_RUST")
 @click.option('--write_generated_json', default=False,
               help='Generate test JSON to indicated path')
 @click.option('--run-flight', is_flag=True, default=False,
diff --git a/docker-compose.yml b/docker-compose.yml
index 4a3092ec04d..4158ee3ff64 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1197,6 +1197,7 @@ services:
       <<: *ccache
       # tell archery where the arrow binaries are located
       ARROW_CPP_EXE_PATH: /build/cpp/debug
+      ARCHERY_INTEGRATION_WITH_RUST: 0
     command:
       ["/arrow/ci/scripts/rust_build.sh /arrow /build &&
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&

From 0ee3b90cac8f8eb3bf512e51a7f941fcead026d9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 4 May 2021 08:44:35 -0400
Subject: [PATCH 178/719] ARROW-12631: [Python] Accept Scanner in
 pyarrow.dataset.write_dataset

We were already creating a Scanner to pass to the actual C++ `Write` method, so easy to also pass through a scanner if supplied by the user.

Closes #10224 from jorisvandenbossche/ARROW-12631-dataset-write-scanner

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/_dataset.pyx          |  8 ++------
 python/pyarrow/dataset.py            | 26 ++++++++++++++++----------
 python/pyarrow/tests/test_dataset.py | 25 +++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 0b6c695235c..cf076f6536b 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2866,14 +2866,12 @@ def _get_partition_keys(Expression partition_expression):
 
 
 def _filesystemdataset_write(
-    Dataset data not None,
+    Scanner data not None,
     object base_dir not None,
     str basename_template not None,
-    Schema schema not None,
     FileSystem filesystem not None,
     Partitioning partitioning not None,
     FileWriteOptions file_options not None,
-    bint use_threads,
     int max_partitions,
 ):
     """
@@ -2891,8 +2889,6 @@ def _filesystemdataset_write(
     c_options.max_partitions = max_partitions
     c_options.basename_template = tobytes(basename_template)
 
-    scanner = data.scanner(use_threads=use_threads)
-
-    c_scanner = (<Scanner> scanner).unwrap()
+    c_scanner = data.unwrap()
     with nogil:
         check_status(CFileSystemDataset.Write(c_options, c_scanner))
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index a5d457ced62..97d08844f27 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -733,19 +733,17 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
     """
     from pyarrow.fs import _resolve_filesystem_and_path
 
-    if isinstance(data, Dataset):
-        schema = schema or data.schema
-    elif isinstance(data, (list, tuple)):
+    if isinstance(data, (list, tuple)):
         schema = schema or data[0].schema
         data = InMemoryDataset(data, schema=schema)
     elif isinstance(data, (pa.RecordBatch, pa.ipc.RecordBatchReader,
                            pa.Table)) or _is_iterable(data):
         data = InMemoryDataset(data, schema=schema)
-        schema = schema or data.schema
-    else:
+    elif not isinstance(data, (Dataset, Scanner)):
         raise ValueError(
-            "Only Dataset, Table/RecordBatch, RecordBatchReader, a list "
-            "of Tables/RecordBatches, or iterable of batches are supported."
+            "Only Dataset, Scanner, Table/RecordBatch, RecordBatchReader, "
+            "a list of Tables/RecordBatches, or iterable of batches are "
+            "supported."
         )
 
     if format is None and isinstance(data, FileSystemDataset):
@@ -771,8 +769,16 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
 
     filesystem, base_dir = _resolve_filesystem_and_path(base_dir, filesystem)
 
+    if isinstance(data, Dataset):
+        scanner = data.scanner(use_threads=use_threads)
+    else:
+        # scanner was passed directly by the user, in which case a schema
+        # cannot be passed
+        if schema is not None:
+            raise ValueError("Cannot specify a schema when writing a Scanner")
+        scanner = data
+
     _filesystemdataset_write(
-        data, base_dir, basename_template, schema,
-        filesystem, partitioning, file_options, use_threads,
-        max_partitions
+        scanner, base_dir, basename_template, filesystem, partitioning,
+        file_options, max_partitions
     )
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 78bbbe2893c..8791c22f103 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2999,6 +2999,31 @@ def test_write_iterable(tempdir):
     assert result.equals(table)
 
 
+def test_write_scanner(tempdir):
+    table = pa.table([
+        pa.array(range(20)), pa.array(np.random.randn(20)),
+        pa.array(np.repeat(['a', 'b'], 10))
+    ], names=["f1", "f2", "part"])
+    dataset = ds.dataset(table)
+
+    base_dir = tempdir / 'dataset_from_scanner'
+    ds.write_dataset(dataset.scanner(), base_dir, format="feather")
+    result = ds.dataset(base_dir, format="ipc").to_table()
+    assert result.equals(table)
+
+    # scanner with different projected_schema
+    base_dir = tempdir / 'dataset_from_scanner2'
+    ds.write_dataset(dataset.scanner(columns=["f1"]),
+                     base_dir, format="feather")
+    result = ds.dataset(base_dir, format="ipc").to_table()
+    assert result.equals(table.select(["f1"]))
+
+    # schema not allowed when writing a scanner
+    with pytest.raises(ValueError, match="Cannot specify a schema"):
+        ds.write_dataset(dataset.scanner(), base_dir, schema=table.schema,
+                         format="feather")
+
+
 def test_write_table_partitioned_dict(tempdir):
     # ensure writing table partitioned on a dictionary column works without
     # specifying the dictionary values explicitly

From fc10964bb23c5218757795a0136c0dd58b633971 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 4 May 2021 16:43:00 +0200
Subject: [PATCH 179/719] ARROW-12522: [C++] Add ReadRangeCache::WaitFor

This was split out of ARROW-11883 since it may also be useful to test with ARROW-11772.

This adds a method to get a Future<> from a ReadRangeCache so it can be easily used in an async context. Also, it adds a config flag to make the cache not perform readahead so that readahead can be handled at a different layer of the stack.

Closes #10145 from lidavidm/async-cache

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs_benchmark.cc |   6 +-
 cpp/src/arrow/io/caching.cc                | 186 ++++++++++++++++-----
 cpp/src/arrow/io/caching.h                 |  39 ++++-
 cpp/src/arrow/io/interfaces.cc             |  10 +-
 cpp/src/arrow/io/memory_test.cc            | 143 ++++++++++++++--
 5 files changed, 318 insertions(+), 66 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
index 36564a70d29..869601b844e 100644
--- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc
+++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
@@ -260,8 +260,10 @@ static void CoalescedRead(benchmark::State& st, S3FileSystem* fs,
     ASSERT_OK_AND_ASSIGN(size, file->GetSize());
     total_items += 1;
 
-    io::internal::ReadRangeCache cache(file, {},
-                                       io::CacheOptions{8192, 64 * 1024 * 1024});
+    io::internal::ReadRangeCache cache(
+        file, {},
+        io::CacheOptions{/*hole_size_limit=*/8192, /*range_size_limit=*/64 * 1024 * 1024,
+                         /*lazy=*/false});
     std::vector<io::ReadRange> ranges;
 
     int64_t offset = 0;
diff --git a/cpp/src/arrow/io/caching.cc b/cpp/src/arrow/io/caching.cc
index 1a7a55cd1b2..722026ccd9b 100644
--- a/cpp/src/arrow/io/caching.cc
+++ b/cpp/src/arrow/io/caching.cc
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <atomic>
 #include <cmath>
+#include <mutex>
 #include <utility>
 #include <vector>
 
@@ -33,7 +34,14 @@ namespace io {
 
 CacheOptions CacheOptions::Defaults() {
   return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
-                      internal::ReadRangeCache::kDefaultRangeSizeLimit};
+                      internal::ReadRangeCache::kDefaultRangeSizeLimit,
+                      /*lazy=*/false};
+}
+
+CacheOptions CacheOptions::LazyDefaults() {
+  return CacheOptions{internal::ReadRangeCache::kDefaultHoleSizeLimit,
+                      internal::ReadRangeCache::kDefaultRangeSizeLimit,
+                      /*lazy=*/true};
 }
 
 CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_millis,
@@ -117,7 +125,7 @@ CacheOptions CacheOptions::MakeFromNetworkMetrics(int64_t time_to_first_byte_mil
                                       (1 - ideal_bandwidth_utilization_frac))));
   DCHECK_GT(range_size_limit, 0) << "Computed range_size_limit must be > 0";
 
-  return {hole_size_limit, range_size_limit};
+  return {hole_size_limit, range_size_limit, false};
 }
 
 namespace internal {
@@ -126,6 +134,10 @@ struct RangeCacheEntry {
   ReadRange range;
   Future<std::shared_ptr<Buffer>> future;
 
+  RangeCacheEntry() = default;
+  RangeCacheEntry(const ReadRange& range_, Future<std::shared_ptr<Buffer>> future_)
+      : range(range_), future(std::move(future_)) {}
+
   friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& right) {
     return left.range.offset < right.range.offset;
   }
@@ -139,8 +151,30 @@ struct ReadRangeCache::Impl {
   // Ordered by offset (so as to find a matching region by binary search)
   std::vector<RangeCacheEntry> entries;
 
-  // Add new entries, themselves ordered by offset
-  void AddEntries(std::vector<RangeCacheEntry> new_entries) {
+  virtual ~Impl() = default;
+
+  // Get the future corresponding to a range
+  virtual Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) {
+    return entry->future;
+  }
+
+  // Make cache entries for ranges
+  virtual std::vector<RangeCacheEntry> MakeCacheEntries(
+      const std::vector<ReadRange>& ranges) {
+    std::vector<RangeCacheEntry> new_entries;
+    new_entries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      new_entries.emplace_back(range, file->ReadAsync(ctx, range.offset, range.length));
+    }
+    return new_entries;
+  }
+
+  // Add the given ranges to the cache, coalescing them where possible
+  virtual Status Cache(std::vector<ReadRange> ranges) {
+    ranges = internal::CoalesceReadRanges(std::move(ranges), options.hole_size_limit,
+                                          options.range_size_limit);
+    std::vector<RangeCacheEntry> new_entries = MakeCacheEntries(ranges);
+    // Add new entries, themselves ordered by offset
     if (entries.size() > 0) {
       std::vector<RangeCacheEntry> merged(entries.size() + new_entries.size());
       std::merge(entries.begin(), entries.end(), new_entries.begin(), new_entries.end(),
@@ -149,12 +183,115 @@ struct ReadRangeCache::Impl {
     } else {
       entries = std::move(new_entries);
     }
+    // Prefetch immediately, regardless of executor availability, if possible
+    return file->WillNeed(ranges);
+  }
+
+  // Read the given range from the cache, blocking if needed. Cannot read a range
+  // that spans cache entries.
+  virtual Result<std::shared_ptr<Buffer>> Read(ReadRange range) {
+    if (range.length == 0) {
+      static const uint8_t byte = 0;
+      return std::make_shared<Buffer>(&byte, 0);
+    }
+
+    const auto it = std::lower_bound(
+        entries.begin(), entries.end(), range,
+        [](const RangeCacheEntry& entry, const ReadRange& range) {
+          return entry.range.offset + entry.range.length < range.offset + range.length;
+        });
+    if (it != entries.end() && it->range.Contains(range)) {
+      auto fut = MaybeRead(&*it);
+      ARROW_ASSIGN_OR_RAISE(auto buf, fut.result());
+      return SliceBuffer(std::move(buf), range.offset - it->range.offset, range.length);
+    }
+    return Status::Invalid("ReadRangeCache did not find matching cache entry");
+  }
+
+  virtual Future<> Wait() {
+    std::vector<Future<>> futures;
+    for (auto& entry : entries) {
+      futures.emplace_back(MaybeRead(&entry));
+    }
+    return AllComplete(futures);
+  }
+
+  // Return a Future that completes when the given ranges have been read.
+  virtual Future<> WaitFor(std::vector<ReadRange> ranges) {
+    auto end = std::remove_if(ranges.begin(), ranges.end(),
+                              [](const ReadRange& range) { return range.length == 0; });
+    ranges.resize(end - ranges.begin());
+    std::vector<Future<>> futures;
+    futures.reserve(ranges.size());
+    for (auto& range : ranges) {
+      const auto it = std::lower_bound(
+          entries.begin(), entries.end(), range,
+          [](const RangeCacheEntry& entry, const ReadRange& range) {
+            return entry.range.offset + entry.range.length < range.offset + range.length;
+          });
+      if (it != entries.end() && it->range.Contains(range)) {
+        futures.push_back(Future<>(MaybeRead(&*it)));
+      } else {
+        return Status::Invalid("Range was not requested for caching: offset=",
+                               range.offset, " length=", range.length);
+      }
+    }
+    return AllComplete(futures);
+  }
+};
+
+// Don't read ranges when they're first added. Instead, wait until they're requested
+// (either through Read or WaitFor).
+struct ReadRangeCache::LazyImpl : public ReadRangeCache::Impl {
+  // Protect against concurrent modification of entries[i]->future
+  std::mutex entry_mutex;
+
+  virtual ~LazyImpl() = default;
+
+  Future<std::shared_ptr<Buffer>> MaybeRead(RangeCacheEntry* entry) override {
+    // Called by superclass Read()/WaitFor() so we have the lock
+    if (!entry->future.is_valid()) {
+      entry->future = file->ReadAsync(ctx, entry->range.offset, entry->range.length);
+    }
+    return entry->future;
+  }
+
+  std::vector<RangeCacheEntry> MakeCacheEntries(
+      const std::vector<ReadRange>& ranges) override {
+    std::vector<RangeCacheEntry> new_entries;
+    new_entries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      // In the lazy variant, don't read data here - later, a call to Read or WaitFor
+      // will call back to MaybeRead (under the lock) which will fill the future.
+      new_entries.emplace_back(range, Future<std::shared_ptr<Buffer>>());
+    }
+    return new_entries;
+  }
+
+  Status Cache(std::vector<ReadRange> ranges) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Cache(std::move(ranges));
+  }
+
+  Result<std::shared_ptr<Buffer>> Read(ReadRange range) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Read(range);
+  }
+
+  Future<> Wait() override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::Wait();
+  }
+
+  Future<> WaitFor(std::vector<ReadRange> ranges) override {
+    std::unique_lock<std::mutex> guard(entry_mutex);
+    return ReadRangeCache::Impl::WaitFor(std::move(ranges));
   }
 };
 
 ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext ctx,
                                CacheOptions options)
-    : impl_(new Impl()) {
+    : impl_(options.lazy ? new LazyImpl() : new Impl()) {
   impl_->file = std::move(file);
   impl_->ctx = std::move(ctx);
   impl_->options = options;
@@ -163,44 +300,17 @@ ReadRangeCache::ReadRangeCache(std::shared_ptr<RandomAccessFile> file, IOContext
 ReadRangeCache::~ReadRangeCache() = default;
 
 Status ReadRangeCache::Cache(std::vector<ReadRange> ranges) {
-  ranges = internal::CoalesceReadRanges(std::move(ranges), impl_->options.hole_size_limit,
-                                        impl_->options.range_size_limit);
-  std::vector<RangeCacheEntry> entries;
-  entries.reserve(ranges.size());
-  for (const auto& range : ranges) {
-    auto fut = impl_->file->ReadAsync(impl_->ctx, range.offset, range.length);
-    entries.push_back({range, std::move(fut)});
-  }
-
-  impl_->AddEntries(std::move(entries));
-  // Prefetch immediately, regardless of executor availability, if possible
-  return impl_->file->WillNeed(ranges);
+  return impl_->Cache(std::move(ranges));
 }
 
 Result<std::shared_ptr<Buffer>> ReadRangeCache::Read(ReadRange range) {
-  if (range.length == 0) {
-    static const uint8_t byte = 0;
-    return std::make_shared<Buffer>(&byte, 0);
-  }
-
-  const auto it = std::lower_bound(
-      impl_->entries.begin(), impl_->entries.end(), range,
-      [](const RangeCacheEntry& entry, const ReadRange& range) {
-        return entry.range.offset + entry.range.length < range.offset + range.length;
-      });
-  if (it != impl_->entries.end() && it->range.Contains(range)) {
-    ARROW_ASSIGN_OR_RAISE(auto buf, it->future.result());
-    return SliceBuffer(std::move(buf), range.offset - it->range.offset, range.length);
-  }
-  return Status::Invalid("ReadRangeCache did not find matching cache entry");
+  return impl_->Read(range);
 }
 
-Future<> ReadRangeCache::Wait() {
-  std::vector<Future<>> futures;
-  for (const auto& entry : impl_->entries) {
-    futures.emplace_back(entry.future);
-  }
-  return AllComplete(futures);
+Future<> ReadRangeCache::Wait() { return impl_->Wait(); }
+
+Future<> ReadRangeCache::WaitFor(std::vector<ReadRange> ranges) {
+  return impl_->WaitFor(std::move(ranges));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/io/caching.h b/cpp/src/arrow/io/caching.h
index a5b48dd885e..59a9b60e82f 100644
--- a/cpp/src/arrow/io/caching.h
+++ b/cpp/src/arrow/io/caching.h
@@ -34,17 +34,19 @@ struct ARROW_EXPORT CacheOptions {
   static constexpr double kDefaultIdealBandwidthUtilizationFrac = 0.9;
   static constexpr int64_t kDefaultMaxIdealRequestSizeMib = 64;
 
-  /// /brief The maximum distance in bytes between two consecutive
+  /// \brief The maximum distance in bytes between two consecutive
   ///   ranges; beyond this value, ranges are not combined
   int64_t hole_size_limit;
-  /// /brief The maximum size in bytes of a combined range; if
+  /// \brief The maximum size in bytes of a combined range; if
   ///   combining two consecutive ranges would produce a range of a
   ///   size greater than this, they are not combined
   int64_t range_size_limit;
+  /// \brief A lazy cache does not perform any I/O until requested.
+  bool lazy;
 
   bool operator==(const CacheOptions& other) const {
     return hole_size_limit == other.hole_size_limit &&
-           range_size_limit == other.range_size_limit;
+           range_size_limit == other.range_size_limit && lazy == other.lazy;
   }
 
   /// \brief Construct CacheOptions from network storage metrics (e.g. S3).
@@ -67,16 +69,34 @@ struct ARROW_EXPORT CacheOptions {
       int64_t max_ideal_request_size_mib = kDefaultMaxIdealRequestSizeMib);
 
   static CacheOptions Defaults();
+  static CacheOptions LazyDefaults();
 };
 
 namespace internal {
 
 /// \brief A read cache designed to hide IO latencies when reading.
 ///
-/// To use this, you must first pass it the ranges you'll need in the future.
-/// The cache will combine those ranges according to parameters (see constructor)
-/// and start fetching the combined ranges in the background.
-/// You can then individually fetch them using Read().
+/// This class takes multiple byte ranges that an application expects to read, and
+/// coalesces them into fewer, larger read requests, which benefits performance on some
+/// filesystems, particularly remote ones like Amazon S3. By default, it also issues
+/// these read requests in parallel up front.
+///
+/// To use:
+/// 1. Cache() the ranges you expect to read in the future. Ideally, these ranges have
+///    the exact offset and length that will later be read. The cache will combine those
+///    ranges according to parameters (see constructor).
+///
+///    By default, the cache will also start fetching the combined ranges in parallel in
+///    the background, unless CacheOptions.lazy is set.
+///
+/// 2. Call WaitFor() to be notified when the given ranges have been read. If
+///    CacheOptions.lazy is set, I/O will be triggered in the background here instead.
+///    This can be done in parallel (e.g. if parsing a file, call WaitFor() for each
+///    chunk of the file that can be parsed in parallel).
+///
+/// 3. Call Read() to retrieve the actual data for the given ranges.
+///    A synchronous application may skip WaitFor() and just call Read() - it will still
+///    benefit from coalescing and parallel fetching.
 class ARROW_EXPORT ReadRangeCache {
  public:
   static constexpr int64_t kDefaultHoleSizeLimit = 8192;
@@ -103,8 +123,13 @@ class ARROW_EXPORT ReadRangeCache {
   /// \brief Wait until all ranges added so far have been cached.
   Future<> Wait();
 
+  /// \brief Wait until all given ranges have been cached.
+  Future<> WaitFor(std::vector<ReadRange> ranges);
+
  protected:
   struct Impl;
+  struct LazyImpl;
+
   std::unique_ptr<Impl> impl_;
 };
 
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index d052c016837..670fab415d7 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -357,10 +357,16 @@ struct ReadRangeCombiner {
     // Remove zero-sized ranges
     auto end = std::remove_if(ranges.begin(), ranges.end(),
                               [](const ReadRange& range) { return range.length == 0; });
-    ranges.resize(end - ranges.begin());
     // Sort in position order
-    std::sort(ranges.begin(), ranges.end(),
+    std::sort(ranges.begin(), end,
               [](const ReadRange& a, const ReadRange& b) { return a.offset < b.offset; });
+    // Remove ranges that overlap 100%
+    end = std::unique(ranges.begin(), end,
+                      [](const ReadRange& left, const ReadRange& right) {
+                        return right.offset >= left.offset &&
+                               right.offset + right.length <= left.offset + left.length;
+                      });
+    ranges.resize(end - ranges.begin());
 
     // Skip further processing if ranges is empty after removing zero-sized ranges.
     if (ranges.empty()) {
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index 00a1dcedb55..6a24b0c764f 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -37,6 +37,7 @@
 #include "arrow/io/transform.h"
 #include "arrow/io/util_internal.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/bit_util.h"
@@ -44,6 +45,7 @@
 #include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/parallel.h"
 
 namespace arrow {
 
@@ -690,43 +692,149 @@ TEST(CoalesceReadRanges, Basics) {
   // Same as (*) but unsorted
   check({{140, 100}, {120, 11}, {240, 11}, {110, 10}, {260, 11}},
         {{110, 21}, {140, 100}, {240, 31}});
+
+  // Completely overlapping ranges should be eliminated
+  check({{20, 5}, {20, 5}, {21, 2}}, {{20, 5}});
 }
 
+class CountingBufferReader : public BufferReader {
+ public:
+  using BufferReader::BufferReader;
+  Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext& context, int64_t position,
+                                            int64_t nbytes) override {
+    read_count_++;
+    return BufferReader::ReadAsync(context, position, nbytes);
+  }
+  int64_t read_count() const { return read_count_; }
+
+ private:
+  int64_t read_count_ = 0;
+};
+
 TEST(RangeReadCache, Basics) {
   std::string data = "abcdefghijklmnopqrstuvwxyz";
 
-  auto file = std::make_shared<BufferReader>(Buffer(data));
   CacheOptions options = CacheOptions::Defaults();
   options.hole_size_limit = 2;
   options.range_size_limit = 10;
+
+  for (auto lazy : std::vector<bool>{false, true}) {
+    SCOPED_TRACE(lazy);
+    options.lazy = lazy;
+    auto file = std::make_shared<CountingBufferReader>(Buffer(data));
+    internal::ReadRangeCache cache(file, {}, options);
+
+    ASSERT_OK(cache.Cache({{1, 2}, {3, 2}, {8, 2}, {20, 2}, {25, 0}}));
+    ASSERT_OK(cache.Cache({{10, 4}, {14, 0}, {15, 4}}));
+
+    ASSERT_OK_AND_ASSIGN(auto buf, cache.Read({20, 2}));
+    AssertBufferEqual(*buf, "uv");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 2}));
+    AssertBufferEqual(*buf, "bc");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({3, 2}));
+    AssertBufferEqual(*buf, "de");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({8, 2}));
+    AssertBufferEqual(*buf, "ij");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({10, 4}));
+    AssertBufferEqual(*buf, "klmn");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({15, 4}));
+    AssertBufferEqual(*buf, "pqrs");
+    ASSERT_FINISHES_OK(cache.WaitFor({{15, 1}, {16, 3}, {25, 0}, {1, 2}}));
+    // Zero-sized
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({14, 0}));
+    AssertBufferEqual(*buf, "");
+    ASSERT_OK_AND_ASSIGN(buf, cache.Read({25, 0}));
+    AssertBufferEqual(*buf, "");
+
+    // Non-cached ranges
+    ASSERT_RAISES(Invalid, cache.Read({20, 3}));
+    ASSERT_RAISES(Invalid, cache.Read({19, 3}));
+    ASSERT_RAISES(Invalid, cache.Read({0, 3}));
+    ASSERT_RAISES(Invalid, cache.Read({25, 2}));
+    ASSERT_FINISHES_AND_RAISES(Invalid, cache.WaitFor({{25, 2}}));
+    ASSERT_FINISHES_AND_RAISES(Invalid, cache.WaitFor({{1, 2}, {25, 2}}));
+
+    ASSERT_FINISHES_OK(cache.Wait());
+    // 8 ranges should lead to less than 8 reads
+    ASSERT_LT(file->read_count(), 8);
+  }
+}
+
+TEST(RangeReadCache, Concurrency) {
+  std::string data = "abcdefghijklmnopqrstuvwxyz";
+
+  auto file = std::make_shared<BufferReader>(Buffer(data));
+  std::vector<ReadRange> ranges{{1, 2},  {3, 2},  {8, 2},  {20, 2},
+                                {25, 0}, {10, 4}, {14, 0}, {15, 4}};
+
+  for (auto lazy : std::vector<bool>{false, true}) {
+    SCOPED_TRACE(lazy);
+    CacheOptions options = CacheOptions::Defaults();
+    options.hole_size_limit = 2;
+    options.range_size_limit = 10;
+    options.lazy = lazy;
+
+    {
+      internal::ReadRangeCache cache(file, {}, options);
+      ASSERT_OK(cache.Cache(ranges));
+      std::vector<Future<std::shared_ptr<Buffer>>> futures;
+      for (const auto& range : ranges) {
+        futures.push_back(cache.WaitFor({range}).Then(
+            [&cache, range](const detail::Empty&) { return cache.Read(range); }));
+      }
+      for (auto fut : futures) {
+        ASSERT_FINISHES_OK(fut);
+      }
+    }
+    {
+      internal::ReadRangeCache cache(file, {}, options);
+      ASSERT_OK(cache.Cache(ranges));
+      ASSERT_OK(arrow::internal::ParallelFor(
+          static_cast<int>(ranges.size()),
+          [&](int index) { return cache.Read(ranges[index]).status(); }));
+    }
+  }
+}
+
+TEST(RangeReadCache, Lazy) {
+  std::string data = "abcdefghijklmnopqrstuvwxyz";
+
+  auto file = std::make_shared<CountingBufferReader>(Buffer(data));
+  CacheOptions options = CacheOptions::LazyDefaults();
+  options.hole_size_limit = 2;
+  options.range_size_limit = 10;
   internal::ReadRangeCache cache(file, {}, options);
 
   ASSERT_OK(cache.Cache({{1, 2}, {3, 2}, {8, 2}, {20, 2}, {25, 0}}));
   ASSERT_OK(cache.Cache({{10, 4}, {14, 0}, {15, 4}}));
 
+  // Lazy cache doesn't fetch ranges until requested
+  ASSERT_EQ(0, file->read_count());
+
   ASSERT_OK_AND_ASSIGN(auto buf, cache.Read({20, 2}));
   AssertBufferEqual(*buf, "uv");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 2}));
-  AssertBufferEqual(*buf, "bc");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({3, 2}));
-  AssertBufferEqual(*buf, "de");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({8, 2}));
-  AssertBufferEqual(*buf, "ij");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({10, 4}));
-  AssertBufferEqual(*buf, "klmn");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({15, 4}));
-  AssertBufferEqual(*buf, "pqrs");
-  // Zero-sized
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({14, 0}));
-  AssertBufferEqual(*buf, "");
-  ASSERT_OK_AND_ASSIGN(buf, cache.Read({25, 0}));
-  AssertBufferEqual(*buf, "");
+  ASSERT_EQ(1, file->read_count());
+
+  ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 4}));
+  AssertBufferEqual(*buf, "bcde");
+  ASSERT_EQ(2, file->read_count());
+
+  // Requested ranges are still cached
+  ASSERT_OK_AND_ASSIGN(buf, cache.Read({1, 4}));
+  ASSERT_EQ(2, file->read_count());
 
   // Non-cached ranges
   ASSERT_RAISES(Invalid, cache.Read({20, 3}));
   ASSERT_RAISES(Invalid, cache.Read({19, 3}));
   ASSERT_RAISES(Invalid, cache.Read({0, 3}));
   ASSERT_RAISES(Invalid, cache.Read({25, 2}));
+
+  // Can asynchronously kick off a read (though BufferReader::ReadAsync is synchronous so
+  // it will increment the read count here)
+  ASSERT_FINISHES_OK(cache.WaitFor({{10, 2}, {15, 4}}));
+  ASSERT_EQ(3, file->read_count());
+  ASSERT_OK_AND_ASSIGN(buf, cache.Read({10, 2}));
+  ASSERT_EQ(3, file->read_count());
 }
 
 TEST(CacheOptions, Basics) {
@@ -734,7 +842,8 @@ TEST(CacheOptions, Basics) {
                   const double expected_range_size_limit_MiB) -> void {
     const CacheOptions expected = {
         static_cast<int64_t>(std::round(expected_hole_size_limit_MiB * 1024 * 1024)),
-        static_cast<int64_t>(std::round(expected_range_size_limit_MiB * 1024 * 1024))};
+        static_cast<int64_t>(std::round(expected_range_size_limit_MiB * 1024 * 1024)),
+        /*lazy=*/false};
     ASSERT_EQ(actual, expected);
   };
 

From cb29537de79da0d6693e8d44ef0f57434d6d38da Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 4 May 2021 08:39:48 -0700
Subject: [PATCH 180/719] ARROW-11787: [R] Implement write csv

Closes #10141 from thisisnic/arrow-11787-write_csv

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/csv/type_fwd.h |  1 +
 r/NAMESPACE                  |  2 +
 r/R/arrowExports.R           | 12 ++++++
 r/R/csv.R                    | 64 ++++++++++++++++++++++++++++++
 r/_pkgdown.yml               |  2 +
 r/man/CsvWriteOptions.Rd     | 22 +++++++++++
 r/man/write_csv_arrow.Rd     | 32 +++++++++++++++
 r/src/arrowExports.cpp       | 54 ++++++++++++++++++++++++++
 r/src/arrow_types.h          |  1 +
 r/src/csv.cpp                | 30 +++++++++++++++
 r/tests/testthat/test-csv.R  | 75 +++++++++++++++++++++++++++++++++++-
 11 files changed, 293 insertions(+), 2 deletions(-)
 create mode 100644 r/man/CsvWriteOptions.Rd
 create mode 100644 r/man/write_csv_arrow.Rd

diff --git a/cpp/src/arrow/csv/type_fwd.h b/cpp/src/arrow/csv/type_fwd.h
index 17fcdbdcc56..c0a53847a90 100644
--- a/cpp/src/arrow/csv/type_fwd.h
+++ b/cpp/src/arrow/csv/type_fwd.h
@@ -22,6 +22,7 @@ class TableReader;
 struct ConvertOptions;
 struct ReadOptions;
 struct ParseOptions;
+struct WriteOptions;
 
 }  // namespace csv
 }  // namespace arrow
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 607177235e9..9a05b87476a 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -122,6 +122,7 @@ export(CsvFragmentScanOptions)
 export(CsvParseOptions)
 export(CsvReadOptions)
 export(CsvTableReader)
+export(CsvWriteOptions)
 export(Dataset)
 export(DatasetFactory)
 export(DateUnit)
@@ -278,6 +279,7 @@ export(unify_schemas)
 export(utf8)
 export(value_counts)
 export(write_arrow)
+export(write_csv_arrow)
 export(write_dataset)
 export(write_feather)
 export(write_ipc_stream)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index b8d72c30ed6..bf7b3b777c1 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -292,6 +292,10 @@ compute__GetFunctionNames <- function(){
     .Call(`_arrow_compute__GetFunctionNames`)
 }
 
+csv___WriteOptions__initialize <- function(options){
+    .Call(`_arrow_csv___WriteOptions__initialize`, options)
+}
+
 csv___ReadOptions__initialize <- function(options){
     .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
@@ -332,6 +336,14 @@ TimestampParser__MakeISO8601 <- function(){
     .Call(`_arrow_TimestampParser__MakeISO8601`)
 }
 
+csv___WriteCSV__Table <- function(table, write_options, stream){
+    invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
+}
+
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream){
+    invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
+}
+
 dataset___Dataset__NewScan <- function(ds){
     .Call(`_arrow_dataset___Dataset__NewScan`, ds)
 }
diff --git a/r/R/csv.R b/r/R/csv.R
index 160c46e4753..3357df52132 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -381,6 +381,11 @@ CsvTableReader$create <- function(file,
 #' `TimestampParser$create()` takes an optional `format` string argument.
 #' See [`strptime()`][base::strptime()] for example syntax.
 #' The default is to use an ISO-8601 format parser.
+#' 
+#' The `CsvWriteOptions$create()` factory method takes the following arguments:
+#' - `include_header` Whether to write an initial header line with column names
+#' - `batch_size` Maximum number of rows processed at a time. Default is 1024.
+#' 
 #' @section Active bindings:
 #'
 #' - `column_names`: from `CsvReadOptions`
@@ -408,6 +413,19 @@ CsvReadOptions$create <- function(use_threads = option_use_threads(),
   )
 }
 
+#' @rdname CsvReadOptions
+#' @export
+CsvWriteOptions <- R6Class("CsvWriteOptions", inherit = ArrowObject)
+CsvWriteOptions$create <- function(include_header = TRUE, batch_size = 1024L){
+  assert_that(is_integerish(batch_size, n = 1, finite = TRUE), batch_size > 0)
+  csv___WriteOptions__initialize(
+    list(
+      include_header = include_header,
+      batch_size = as.integer(batch_size)
+    )
+  )
+}
+
 readr_to_csv_read_options <- function(skip, col_names, col_types) {
   if (isTRUE(col_names)) {
     # C++ default to parse is 0-length string array
@@ -585,3 +603,49 @@ readr_to_csv_convert_options <- function(na,
     include_columns = include_columns
   )
 }
+
+#' Write CSV file to disk
+#'
+#' @param x `data.frame`, [RecordBatch], or [Table]
+#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' system (`SubTreeFileSystem`)
+#' @param include_header Whether to write an initial header line with column names
+#' @param batch_size Maximum number of rows processed at a time. Default is 1024.
+#'
+#' @return The input `x`, invisibly. Note that if `sink` is an [OutputStream],
+#' the stream will be left open.
+#' @export
+#' @examples
+#' \donttest{
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_csv_arrow(mtcars, tf)
+#' }
+#' @include arrow-package.R
+write_csv_arrow <- function(x,
+                            sink,
+                            include_header = TRUE,
+                            batch_size = 1024L) {
+  
+  write_options <- CsvWriteOptions$create(include_header, batch_size)
+  
+  x_out <- x
+  if (is.data.frame(x)) {
+    x <- Table$create(x)
+  }
+  
+  assert_is(x, "ArrowTabular")
+  
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
+    on.exit(sink$close())
+  }
+  
+  if(inherits(x, "RecordBatch")){
+    csv___WriteCSV__RecordBatch(x, write_options, sink)
+  } else if(inherits(x, "Table")){
+    csv___WriteCSV__Table(x, write_options, sink)
+  }
+  
+  invisible(x_out)
+}
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index bb77b416aab..b2266cde758 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -98,6 +98,7 @@ reference:
       - write_ipc_stream
       - write_to_raw
       - write_parquet
+      - write_csv_arrow
   - title: C++ reader/writer interface
     contents:
       - ParquetFileReader
@@ -109,6 +110,7 @@ reference:
       - RecordBatchReader
       - RecordBatchWriter
       - CsvReadOptions
+      - CsvWriteOptions
   - title: Arrow data containers
     contents:
       - array
diff --git a/r/man/CsvWriteOptions.Rd b/r/man/CsvWriteOptions.Rd
new file mode 100644
index 00000000000..e83126c9f9a
--- /dev/null
+++ b/r/man/CsvWriteOptions.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\docType{class}
+\name{CsvWriteOptions}
+\alias{CsvWriteOptions}
+\title{File writer options}
+\description{
+\code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
+\code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
+file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
+\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
+}
+\section{Factory}{
+
+
+The \code{CsvWriteOptions$create()} factory method takes the following arguments:
+\itemize{
+\item \code{include_header} Whether to write an initial header line with column names
+\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024
+}
+}
+
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
new file mode 100644
index 00000000000..f583e487e1f
--- /dev/null
+++ b/r/man/write_csv_arrow.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{write_csv_arrow}
+\alias{write_csv_arrow}
+\title{Write CSV file to disk}
+\usage{
+write_csv_arrow(x, sink, include_header = TRUE, batch_size = 1024L)
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+system (\code{SubTreeFileSystem})}
+
+\item{include_header}{Whether to write an initial header line with column names}
+
+\item{batch_size}{Maximum number of rows processed at a time. Default is 1024}
+}
+\value{
+The input \code{x}, invisibly. Note that if \code{sink} is an \link{OutputStream},
+the stream will be left open.
+}
+\description{
+Write CSV file to disk
+}
+\examples{
+\donttest{
+tf <- tempfile()
+on.exit(unlink(tf))
+write_csv_arrow(mtcars, tf)
+}
+}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 3feef14a873..418b61be08e 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1143,6 +1143,21 @@ extern "C" SEXP _arrow_compute__GetFunctionNames(){
 }
 #endif
 
+// csv.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::csv::WriteOptions> csv___WriteOptions__initialize(cpp11::list options);
+extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<cpp11::list>::type options(options_sexp);
+	return cpp11::as_sexp(csv___WriteOptions__initialize(options));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_csv___WriteOptions__initialize(SEXP options_sexp){
+	Rf_error("Cannot call csv___WriteOptions__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // csv.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(cpp11::list options);
@@ -1295,6 +1310,42 @@ extern "C" SEXP _arrow_TimestampParser__MakeISO8601(){
 }
 #endif
 
+// csv.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void csv___WriteCSV__Table(const std::shared_ptr<arrow::Table>& table, const std::shared_ptr<arrow::csv::WriteOptions>& write_options, const std::shared_ptr<arrow::io::OutputStream>& stream);
+extern "C" SEXP _arrow_csv___WriteCSV__Table(SEXP table_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::csv::WriteOptions>&>::type write_options(write_options_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::io::OutputStream>&>::type stream(stream_sexp);
+	csv___WriteCSV__Table(table, write_options, stream);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_csv___WriteCSV__Table(SEXP table_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+	Rf_error("Cannot call csv___WriteCSV__Table(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// csv.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void csv___WriteCSV__RecordBatch(const std::shared_ptr<arrow::RecordBatch>& record_batch, const std::shared_ptr<arrow::csv::WriteOptions>& write_options, const std::shared_ptr<arrow::io::OutputStream>& stream);
+extern "C" SEXP _arrow_csv___WriteCSV__RecordBatch(SEXP record_batch_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type record_batch(record_batch_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::csv::WriteOptions>&>::type write_options(write_options_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::io::OutputStream>&>::type stream(stream_sexp);
+	csv___WriteCSV__RecordBatch(record_batch, write_options, stream);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_csv___WriteCSV__RecordBatch(SEXP record_batch_sexp, SEXP write_options_sexp, SEXP stream_sexp){
+	Rf_error("Cannot call csv___WriteCSV__RecordBatch(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<ds::ScannerBuilder> dataset___Dataset__NewScan(const std::shared_ptr<ds::Dataset>& ds);
@@ -6677,6 +6728,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
 		{ "_arrow_compute__GroupBy", (DL_FUNC) &_arrow_compute__GroupBy, 3}, 
 		{ "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, 
+		{ "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, 
 		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, 
@@ -6687,6 +6739,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, 
 		{ "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, 
 		{ "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, 
+		{ "_arrow_csv___WriteCSV__Table", (DL_FUNC) &_arrow_csv___WriteCSV__Table, 3}, 
+		{ "_arrow_csv___WriteCSV__RecordBatch", (DL_FUNC) &_arrow_csv___WriteCSV__RecordBatch, 3}, 
 		{ "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, 
 		{ "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, 
 		{ "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 5f7c725ffec..5aa26eebd71 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -179,6 +179,7 @@ R6_CLASS_NAME(arrow::csv::ReadOptions, "CsvReadOptions");
 R6_CLASS_NAME(arrow::csv::ParseOptions, "CsvParseOptions");
 R6_CLASS_NAME(arrow::csv::ConvertOptions, "CsvConvertOptions");
 R6_CLASS_NAME(arrow::csv::TableReader, "CsvTableReader");
+R6_CLASS_NAME(arrow::csv::WriteOptions, "CsvWriteOptions");
 
 #if defined(ARROW_R_WITH_PARQUET)
 R6_CLASS_NAME(parquet::ArrowReaderProperties, "ParquetArrowReaderProperties");
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
index 0ce4cd699f8..3df5db87efa 100644
--- a/r/src/csv.cpp
+++ b/r/src/csv.cpp
@@ -20,8 +20,21 @@
 #if defined(ARROW_R_WITH_ARROW)
 
 #include <arrow/csv/reader.h>
+#include <arrow/csv/writer.h>
+#include <arrow/memory_pool.h>
+
 #include <arrow/util/value_parsing.h>
 
+// [[arrow::export]]
+std::shared_ptr<arrow::csv::WriteOptions> csv___WriteOptions__initialize(
+    cpp11::list options) {
+  auto res =
+      std::make_shared<arrow::csv::WriteOptions>(arrow::csv::WriteOptions::Defaults());
+  res->include_header = cpp11::as_cpp<bool>(options["include_header"]);
+  res->batch_size = cpp11::as_cpp<int>(options["batch_size"]);
+  return res;
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::csv::ReadOptions> csv___ReadOptions__initialize(
     cpp11::list options) {
@@ -174,4 +187,21 @@ std::shared_ptr<arrow::TimestampParser> TimestampParser__MakeISO8601() {
   return arrow::TimestampParser::MakeISO8601();
 }
 
+// [[arrow::export]]
+void csv___WriteCSV__Table(const std::shared_ptr<arrow::Table>& table,
+                           const std::shared_ptr<arrow::csv::WriteOptions>& write_options,
+                           const std::shared_ptr<arrow::io::OutputStream>& stream) {
+  StopIfNotOk(
+      arrow::csv::WriteCSV(*table, *write_options, gc_memory_pool(), stream.get()));
+}
+
+// [[arrow::export]]
+void csv___WriteCSV__RecordBatch(
+    const std::shared_ptr<arrow::RecordBatch>& record_batch,
+    const std::shared_ptr<arrow::csv::WriteOptions>& write_options,
+    const std::shared_ptr<arrow::io::OutputStream>& stream) {
+  StopIfNotOk(arrow::csv::WriteCSV(*record_batch, *write_options, gc_memory_pool(),
+                                   stream.get()));
+}
+
 #endif
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index d27706f060d..a61480fb33a 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -15,13 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("CsvTableReader")
-
 # Not all types round trip via CSV 100% identical by default
 tbl <- example_data[, c("dbl", "lgl", "false", "chr")]
+tbl_no_dates <- tbl
 # Add a date to test its parsing
 tbl$date <- Sys.Date() + 1:10
 
+csv_file <- tempfile()
+
 test_that("Can read csv file", {
   tf <- tempfile()
   on.exit(unlink(tf))
@@ -256,3 +257,73 @@ test_that("Mix of guessing and declaring types", {
   df <- read_csv_arrow(tf, col_types = "d-?c", col_names = cols, skip = 1)
   expect_identical(df, tbl[, c("dbl", "false", "chr")])
 })
+
+
+test_that("Write a CSV file with header", {
+  tbl_out <- write_csv_arrow(tbl_no_dates, csv_file)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl_no_dates)
+  
+  tbl_in <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in, tbl_no_dates)
+  
+  skip("Doesn't yet work with date columns due to ARROW-12540")
+  
+  tbl_out <- write_csv_arrow(tbl, csv_file)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl)
+  
+  tbl_in <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in, tbl)
+})
+
+
+test_that("Write a CSV file with no header", {
+  
+  tbl_out <- write_csv_arrow(tbl_no_dates, csv_file, include_header = FALSE)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl_no_dates)
+  tbl_in <- read_csv_arrow(csv_file, col_names = FALSE)
+  
+  tbl_expected <- tbl_no_dates
+  names(tbl_expected) <- c("f0", "f1", "f2", "f3")
+  
+  expect_identical(tbl_in, tbl_expected)
+  
+})
+
+test_that("Write a CSV file with different batch sizes", {
+  
+  tbl_out1 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 1)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out1, tbl_no_dates)
+  tbl_in1 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in1, tbl_no_dates)
+  
+  tbl_out2 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 2)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out2, tbl_no_dates)
+  tbl_in2 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in2, tbl_no_dates)
+  
+  tbl_out3 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 12)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out3, tbl_no_dates)
+  tbl_in3 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in3, tbl_no_dates)
+  
+})
+
+test_that("Write a CSV file with invalid input type", {
+  expect_error(
+    write_csv_arrow(Array$create(1:5), csv_file),
+    regexp = 'x must be a "ArrowTabular"'
+    )
+})
+
+test_that("Write a CSV file with invalid batch size", {
+  expect_error(
+    write_csv_arrow(tbl_no_dates, csv_file, batch_size = -1),
+    regexp = 'batch_size not greater than 0'
+  )
+})

From 4eb7a15d47c7efa06a6f4c95af8c9a1e2bcc6ad7 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Tue, 4 May 2021 12:24:35 -0400
Subject: [PATCH 181/719] ARROW-12640: [C++] Fix errors from VS 2019 in
 cpp/src/parquet/types.h

This resolves a problem where Visual Studio 2019 reported syntax errors in `cpp/src/parquet/types.h` after ARROW-11929

Closes #10234 from ianmcook/ARROW-12640

Lead-authored-by: Ian Cook <ianmcook@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 cpp/src/parquet/types.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index f3d3abfc918..4529dbe6133 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -29,6 +29,15 @@
 #include "parquet/platform.h"
 #include "parquet/type_fwd.h"
 
+#ifdef _WIN32
+
+// Repetition::OPTIONAL conflicts with a #define, so we undefine it
+#ifdef OPTIONAL
+#undef OPTIONAL
+#endif
+
+#endif  // _WIN32
+
 namespace arrow {
 namespace util {
 

From b7b32341ab815ec757daf747eff913eab2db0d7a Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 4 May 2021 13:54:19 -0700
Subject: [PATCH 182/719] ARROW-12602: [R] Add BuildInfo from C++ to arrow_info

I had to leave C++ flags out of the print function because they were too long and messed up the output but one could still access them with `arrow_info()$build_info$cpp_compiler_flags`.

Current Output:

```
> arrow_info()
Arrow package version: 4.0.0.9000

Capabilities:

dataset    TRUE
parquet    TRUE
s3         TRUE
utf8proc   TRUE
re2        TRUE
snappy     TRUE
gzip       TRUE
brotli    FALSE
zstd       TRUE
lz4        TRUE
lz4_frame  TRUE
lzo       FALSE
bz2       FALSE
jemalloc   TRUE
mimalloc   TRUE

Memory:

Allocator jemalloc
Current    0 bytes
Max        0 bytes

Runtime:

SIMD Level          avx2
Detected SIMD Level avx2

Build:

C++ Library Version                            5.0.0-SNAPSHOT
C++ Compiler                                              GNU
C++ Compiler Version                                    9.3.0
Git ID               7bbfb41980d467188c96983050b06c697e0e46ee
```

Output with compiler flags left in...

```
> arrow_info()
Arrow package version: 4.0.0.9000

Capabilities:

dataset    TRUE
parquet    TRUE
s3         TRUE
utf8proc   TRUE
re2        TRUE
snappy     TRUE
gzip       TRUE
brotli    FALSE
zstd       TRUE
lz4        TRUE
lz4_frame  TRUE
lzo       FALSE
bz2       FALSE
jemalloc   TRUE
mimalloc   TRUE

Memory:

Allocator jemalloc
Current    0 bytes
Max        0 bytes

Runtime:

SIMD Level          avx2
Detected SIMD Level avx2

Build:

C++ Library Version                                                                                                                                                                                                                                                                            5.0.0-SNAPSHOT
C++ Compiler                                                                                                                                                                                                                                                                                              GNU
C++ Compiler Version                                                                                                                                                                                                                                                                                    9.3.0
C++ Compiler Flags   -fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/pace/anaconda3/envs/conbench2/include -fdiagnostics-color=always -fuse-ld=gold -O3 -DNDEBUG
Git ID                                                                                                                                                                                                                                                               7bbfb41980d467188c96983050b06c697e0e46ee
```

Closes #10200 from westonpace/feature/ARROW-12602--r-add-buildinfo-from-c-to-arrow_info

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/arrow-package.R                   | 16 ++++++++++
 r/R/arrowExports.R                    | 12 ++++---
 r/src/arrowExports.cpp                | 45 ++++++++++++++++++---------
 r/src/{runtimeinfo.cpp => config.cpp} |  7 +++++
 4 files changed, 61 insertions(+), 19 deletions(-)
 rename r/src/{runtimeinfo.cpp => config.cpp} (82%)

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 51f4987484c..f6f01fe623a 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -144,6 +144,7 @@ arrow_info <- function() {
   if (out$libarrow) {
     pool <- default_memory_pool()
     runtimeinfo <- runtime_info()
+    buildinfo <- build_info()
     compute_funcs <- list_compute_functions()
     out <- c(out, list(
       capabilities = c(
@@ -163,6 +164,15 @@ arrow_info <- function() {
       runtime_info = list(
         simd_level = runtimeinfo[1],
         detected_simd_level = runtimeinfo[2]
+      ),
+      build_info = list(
+        cpp_version = buildinfo[1],
+        cpp_compiler = buildinfo[2],
+        cpp_compiler_version = buildinfo[3],
+        cpp_compiler_flags = buildinfo[4],
+        # git_id is "" if not built from a git checkout
+        # convert that to NULL
+        git_id = if (nzchar(buildinfo[5])) buildinfo[5]
       )
     ))
   }
@@ -217,6 +227,12 @@ print.arrow_info <- function(x, ...) {
       `SIMD Level` = x$runtime_info$simd_level,
       `Detected SIMD Level` = x$runtime_info$detected_simd_level
     ))
+    print_key_values("Build", c(
+      `C++ Library Version` = x$build_info$cpp_version,
+      `C++ Compiler` = x$build_info$cpp_compiler,
+      `C++ Compiler Version` = x$build_info$cpp_compiler_version,
+      `Git ID` = x$build_info$git_id
+    ))
   } else {
     cat("Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html for troubleshooting.\n")
   }
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index bf7b3b777c1..b5a2f525a01 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -292,6 +292,14 @@ compute__GetFunctionNames <- function(){
     .Call(`_arrow_compute__GetFunctionNames`)
 }
 
+build_info <- function(){
+    .Call(`_arrow_build_info`)
+}
+
+runtime_info <- function(){
+    .Call(`_arrow_runtime_info`)
+}
+
 csv___WriteOptions__initialize <- function(options){
     .Call(`_arrow_csv___WriteOptions__initialize`, options)
 }
@@ -1468,10 +1476,6 @@ ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_forma
     .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-runtime_info <- function(){
-    .Call(`_arrow_runtime_info`)
-}
-
 Array__GetScalar <- function(x, i){
     .Call(`_arrow_Array__GetScalar`, x, i)
 }
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 418b61be08e..ef6d3063771 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1143,6 +1143,34 @@ extern "C" SEXP _arrow_compute__GetFunctionNames(){
 }
 #endif
 
+// config.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::vector<std::string> build_info();
+extern "C" SEXP _arrow_build_info(){
+BEGIN_CPP11
+	return cpp11::as_sexp(build_info());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_build_info(){
+	Rf_error("Cannot call build_info(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// config.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::vector<std::string> runtime_info();
+extern "C" SEXP _arrow_runtime_info(){
+BEGIN_CPP11
+	return cpp11::as_sexp(runtime_info());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_runtime_info(){
+	Rf_error("Cannot call runtime_info(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // csv.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::csv::WriteOptions> csv___WriteOptions__initialize(cpp11::list options);
@@ -5774,20 +5802,6 @@ extern "C" SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP stream_sexp, SEX
 }
 #endif
 
-// runtimeinfo.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::vector<std::string> runtime_info();
-extern "C" SEXP _arrow_runtime_info(){
-BEGIN_CPP11
-	return cpp11::as_sexp(runtime_info());
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_runtime_info(){
-	Rf_error("Cannot call runtime_info(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // scalar.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Scalar> Array__GetScalar(const std::shared_ptr<arrow::Array>& x, int64_t i);
@@ -6728,6 +6742,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
 		{ "_arrow_compute__GroupBy", (DL_FUNC) &_arrow_compute__GroupBy, 3}, 
 		{ "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, 
+		{ "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, 
+		{ "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, 
 		{ "_arrow_csv___WriteOptions__initialize", (DL_FUNC) &_arrow_csv___WriteOptions__initialize, 1}, 
 		{ "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, 
 		{ "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, 
@@ -7022,7 +7038,6 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, 
 		{ "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, 
 		{ "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, 
-		{ "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, 
 		{ "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, 
 		{ "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, 
 		{ "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, 
diff --git a/r/src/runtimeinfo.cpp b/r/src/config.cpp
similarity index 82%
rename from r/src/runtimeinfo.cpp
rename to r/src/config.cpp
index c18fa702342..497843573bb 100644
--- a/r/src/runtimeinfo.cpp
+++ b/r/src/config.cpp
@@ -21,6 +21,13 @@
 
 #include <arrow/config.h>
 
+// [[arrow::export]]
+std::vector<std::string> build_info() {
+  auto info = arrow::GetBuildInfo();
+  return {info.version_string, info.compiler_id, info.compiler_version,
+          info.compiler_flags, info.git_id};
+}
+
 // [[arrow::export]]
 std::vector<std::string> runtime_info() {
   auto info = arrow::GetRuntimeInfo();

From 2bd8dc2d4dcbc905d0277e3b232c8129762cf888 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 4 May 2021 13:56:54 -0700
Subject: [PATCH 183/719] ARROW-12199: [R] bindings for stddev, variance

Adds bindings for stddev and variance, and also uses these kernels in the dplyr expressions when calls to sd/var

Closes #10215 from thisisnic/ARROW-12199-stddev_variance

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/dplyr.R                               |  3 +--
 r/src/compute.cpp                         |  5 +++++
 r/tests/testthat/test-compute-aggregate.R | 18 ++++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 845cb3a1815..c172e9ba065 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -632,7 +632,7 @@ arrow_mask <- function(.data) {
   # Some R functions will still try to evaluate on an Expression
   # and return NA with a warning
   fail <- function(...) stop("Not implemented")
-  for (f in c("mean")) {
+  for (f in c("mean", "sd")) {
     f_env[[f]] <- fail
   }
 
@@ -1006,7 +1006,6 @@ abandon_ship <- function(call, .data, msg = NULL) {
       stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
     }
   }
-
   # else, collect and call dplyr method
   if (!is.null(msg)) {
     warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 34bc3bea456..c215d661e3a 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -233,6 +233,11 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_replacements);
   }
 
+  if (func_name == "variance" || func_name == "stddev") {
+    using Options = arrow::compute::VarianceOptions;
+    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["ddof"]));
+  }
+
   return nullptr;
 }
 
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 0621b7779c7..398b39fb17f 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -381,3 +381,21 @@ test_that("all.Array and all.ChunkedArray", {
   expect_vector_equal(all(input, na.rm = TRUE), data_logical)
   
 })
+
+test_that("variance", {
+  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
+  arr <- Array$create(data)
+  chunked_arr <- ChunkedArray$create(data)
+  
+  expect_equal(call_function("variance", arr, options = list(ddof = 5)), Scalar$create(34596))
+  expect_equal(call_function("variance", chunked_arr, options = list(ddof = 5)), Scalar$create(34596))
+})
+
+test_that("stddev", {
+  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
+  arr <- Array$create(data)
+  chunked_arr <- ChunkedArray$create(data)
+  
+  expect_equal(call_function("stddev", arr, options = list(ddof = 5)), Scalar$create(186))
+  expect_equal(call_function("stddev", chunked_arr, options = list(ddof = 5)), Scalar$create(186))
+})

From 3a9aea3dd0e2a2b1c68bd9cfc141d83adb9fbc35 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 5 May 2021 09:32:41 +0200
Subject: [PATCH 184/719] ARROW-12428: [Python] Expose pre_buffer in
 pyarrow.parquet

This allows using the new option without going through the datasets API, such as to read a single file. A simple benchmark is in the JIRA. This helps close the gap between PyArrow and fsspec read performance, as fsspec performs readahead by default.

Closes #10074 from lidavidm/arrow-12428

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/_parquet.pyx                   |  4 +-
 python/pyarrow/parquet.py                     | 39 +++++++++++++++----
 python/pyarrow/tests/parquet/test_dataset.py  | 21 ++++++++++
 .../tests/parquet/test_parquet_file.py        | 16 ++++++++
 python/setup.py                               |  1 +
 5 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 4b435ba1d1c..0b66ea0e0b0 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -929,7 +929,7 @@ cdef class ParquetReader(_Weakrefable):
 
     def open(self, object source not None, bint use_memory_map=True,
              read_dictionary=None, FileMetaData metadata=None,
-             int buffer_size=0):
+             int buffer_size=0, bint pre_buffer=False):
         cdef:
             shared_ptr[CRandomAccessFile] rd_handle
             shared_ptr[CFileMetaData] c_metadata
@@ -950,6 +950,8 @@ cdef class ParquetReader(_Weakrefable):
         else:
             raise ValueError('Buffer size must be larger than zero')
 
+        arrow_props.set_pre_buffer(pre_buffer)
+
         self.source = source
 
         get_reader(source, use_memory_map, &rd_handle)
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 88683d95013..97e431c4179 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -209,13 +209,18 @@ class ParquetFile:
     buffer_size : int, default 0
         If positive, perform read buffering when deserializing individual
         column chunks. Otherwise IO calls are unbuffered.
+    pre_buffer : bool, default False
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool.
     """
 
     def __init__(self, source, metadata=None, common_metadata=None,
-                 read_dictionary=None, memory_map=False, buffer_size=0):
+                 read_dictionary=None, memory_map=False, buffer_size=0,
+                 pre_buffer=False):
         self.reader = ParquetReader()
         self.reader.open(source, use_memory_map=memory_map,
-                         buffer_size=buffer_size,
+                         buffer_size=buffer_size, pre_buffer=pre_buffer,
                          read_dictionary=read_dictionary, metadata=metadata)
         self.common_metadata = common_metadata
         self._nested_paths_by_prefix = self._build_nested_paths()
@@ -1212,13 +1217,20 @@ class ParquetDataset:
     new Arrow Dataset API). Among other things, this allows to pass
     `filters` for all columns and not only the partition keys, enables
     different partitioning schemes, etc.
+pre_buffer : bool, default True
+    Coalesce and issue file reads in parallel to improve performance on
+    high-latency filesystems (e.g. S3). If True, Arrow will use a
+    background I/O thread pool. This option is only supported for
+    use_legacy_dataset=False. If using a filesystem layer that itself
+    performs readahead (e.g. fsspec's S3FS), disable readahead for best
+    results.
 """.format(_read_docstring_common, _DNF_filter_doc)
 
     def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
                 metadata=None, split_row_groups=False, validate_schema=True,
                 filters=None, metadata_nthreads=1, read_dictionary=None,
                 memory_map=False, buffer_size=0, partitioning="hive",
-                use_legacy_dataset=None):
+                use_legacy_dataset=None, pre_buffer=True):
         if use_legacy_dataset is None:
             # if a new filesystem is passed -> default to new implementation
             if isinstance(filesystem, FileSystem):
@@ -1234,6 +1246,7 @@ def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
                                      read_dictionary=read_dictionary,
                                      memory_map=memory_map,
                                      buffer_size=buffer_size,
+                                     pre_buffer=pre_buffer,
                                      # unsupported keywords
                                      schema=schema, metadata=metadata,
                                      split_row_groups=split_row_groups,
@@ -1246,7 +1259,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
                  metadata=None, split_row_groups=False, validate_schema=True,
                  filters=None, metadata_nthreads=1, read_dictionary=None,
                  memory_map=False, buffer_size=0, partitioning="hive",
-                 use_legacy_dataset=True):
+                 use_legacy_dataset=True, pre_buffer=True):
         if partitioning != "hive":
             raise ValueError(
                 'Only "hive" for hive-like partitioning is supported when '
@@ -1480,7 +1493,8 @@ class _ParquetDatasetV2:
 
     def __init__(self, path_or_paths, filesystem=None, filters=None,
                  partitioning="hive", read_dictionary=None, buffer_size=None,
-                 memory_map=False, ignore_prefixes=None, **kwargs):
+                 memory_map=False, ignore_prefixes=None, pre_buffer=True,
+                 **kwargs):
         import pyarrow.dataset as ds
 
         # Raise error for not supported keywords
@@ -1494,7 +1508,7 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
                     "Dataset API".format(keyword))
 
         # map format arguments
-        read_options = {}
+        read_options = {"pre_buffer": pre_buffer}
         if buffer_size:
             read_options.update(use_buffered_stream=True,
                                 buffer_size=buffer_size)
@@ -1676,6 +1690,13 @@ def pieces(self):
     keys and only a hive-style directory structure is supported. When
     setting `use_legacy_dataset` to False, also within-file level filtering
     and different partitioning schemes are supported.
+pre_buffer : bool, default True
+    Coalesce and issue file reads in parallel to improve performance on
+    high-latency filesystems (e.g. S3). If True, Arrow will use a
+    background I/O thread pool. This option is only supported for
+    use_legacy_dataset=False. If using a filesystem layer that itself
+    performs readahead (e.g. fsspec's S3FS), disable readahead for best
+    results.
 
     {3}
 
@@ -1689,7 +1710,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
                use_pandas_metadata=False, memory_map=False,
                read_dictionary=None, filesystem=None, filters=None,
                buffer_size=0, partitioning="hive", use_legacy_dataset=False,
-               ignore_prefixes=None):
+               ignore_prefixes=None, pre_buffer=True):
     if not use_legacy_dataset:
         if metadata is not None:
             raise ValueError(
@@ -1708,6 +1729,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
                 buffer_size=buffer_size,
                 filters=filters,
                 ignore_prefixes=ignore_prefixes,
+                pre_buffer=pre_buffer,
             )
         except ImportError:
             # fall back on ParquetFile for simple cases when pyarrow.dataset
@@ -1728,7 +1750,8 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
             # TODO test that source is not a directory or a list
             dataset = ParquetFile(
                 source, metadata=metadata, read_dictionary=read_dictionary,
-                memory_map=memory_map, buffer_size=buffer_size)
+                memory_map=memory_map, buffer_size=buffer_size,
+                pre_buffer=pre_buffer)
 
         return dataset.read(columns=columns, use_threads=use_threads,
                             use_pandas_metadata=use_pandas_metadata)
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index fce6ae58af7..70ea37b5af0 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1021,6 +1021,27 @@ def test_dataset_enable_buffered_stream(tempdir, use_legacy_dataset):
         assert dataset.read().equals(table)
 
 
+@pytest.mark.pandas
+@parametrize_legacy_dataset
+def test_dataset_enable_pre_buffer(tempdir, use_legacy_dataset):
+    dirpath = tempdir / guid()
+    dirpath.mkdir()
+
+    df = _test_dataframe(10, seed=0)
+    path = dirpath / '{}.parquet'.format(0)
+    table = pa.Table.from_pandas(df)
+    _write_table(table, path, version='2.0')
+
+    for pre_buffer in (True, False):
+        dataset = pq.ParquetDataset(
+            dirpath, pre_buffer=pre_buffer,
+            use_legacy_dataset=use_legacy_dataset)
+        assert dataset.read().equals(table)
+        actual = pq.read_table(dirpath, pre_buffer=pre_buffer,
+                               use_legacy_dataset=use_legacy_dataset)
+        assert actual.equals(table)
+
+
 def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5):
     test_data = []
     paths = []
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 85f81a3423e..dc9a3bb5274 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -256,3 +256,19 @@ def get_all_batches(f):
         )
 
         batch_no += 1
+
+
+@pytest.mark.pandas
+@pytest.mark.parametrize('pre_buffer', [False, True])
+def test_pre_buffer(pre_buffer):
+    N, K = 10000, 4
+    df = alltypes_sample(size=N)
+    a_table = pa.Table.from_pandas(df)
+
+    buf = io.BytesIO()
+    _write_table(a_table, buf, row_group_size=N / K,
+                 compression='snappy', version='2.0')
+
+    buf.seek(0)
+    pf = pq.ParquetFile(buf, pre_buffer=pre_buffer)
+    assert pf.read().num_rows == N
diff --git a/python/setup.py b/python/setup.py
index 24d54809a42..fac8e0b32e3 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -110,6 +110,7 @@ def run(self):
                      ('with-flight', None, 'build the Flight extension'),
                      ('with-dataset', None, 'build the Dataset extension'),
                      ('with-parquet', None, 'build the Parquet extension'),
+                     ('with-s3', None, 'build the Amazon S3 extension'),
                      ('with-static-parquet', None, 'link parquet statically'),
                      ('with-static-boost', None, 'link boost statically'),
                      ('with-plasma', None, 'build the Plasma extension'),

From 900d79eabb473db6fece165283c41b577c4f98b2 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Wed, 5 May 2021 21:01:22 +0900
Subject: [PATCH 185/719] ARROW-12610: [C++] Skip TestS3FSGeneric TestDeleteDir
 and TestDeleteDirContents on Windows as they are flaky

Closes #10208 from amol-/ARROW-12610

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/test_util.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index 466b8826aef..2a40c041848 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -211,6 +211,9 @@ void GenericFileSystemTest::TestCreateDir(FileSystem* fs) {
 }
 
 void GenericFileSystemTest::TestDeleteDir(FileSystem* fs) {
+  if (have_flaky_directory_tree_deletion())
+    GTEST_SKIP() << "Flaky directory deletion on Windows";
+
   ASSERT_OK(fs->CreateDir("AB/CD/EF"));
   ASSERT_OK(fs->CreateDir("AB/GH/IJ"));
   CreateFile(fs, "AB/abc", "");
@@ -237,6 +240,9 @@ void GenericFileSystemTest::TestDeleteDir(FileSystem* fs) {
 }
 
 void GenericFileSystemTest::TestDeleteDirContents(FileSystem* fs) {
+  if (have_flaky_directory_tree_deletion())
+    GTEST_SKIP() << "Flaky directory deletion on Windows";
+
   ASSERT_OK(fs->CreateDir("AB/CD/EF"));
   ASSERT_OK(fs->CreateDir("AB/GH/IJ"));
   CreateFile(fs, "AB/abc", "");

From a73fb8377e9f9e8e15b54e8d00d955ecf5d0334a Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 5 May 2021 07:50:36 -0500
Subject: [PATCH 186/719] ARROW-12571: [R][CI] Run nightly R with valgrind

Closes #10237 from jonkeane/ARROW-12571-valgrindCI-patch

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/etc/valgrind-cran.supp                 | 34 +++++++++++++++++
 ci/scripts/r_valgrind.sh                  | 45 +++++++++++++++++++++++
 dev/tasks/tasks.yml                       |  8 ++++
 docker-compose.yml                        | 26 +++++++++++++
 r/inst/build_arrow_static.sh              |  4 +-
 r/tests/testthat/helper-skip.R            | 19 ++++++++++
 r/tests/testthat/test-Array.R             |  1 +
 r/tests/testthat/test-arrow.R             |  4 ++
 r/tests/testthat/test-compute-aggregate.R | 12 +++++-
 9 files changed, 149 insertions(+), 4 deletions(-)
 create mode 100644 ci/etc/valgrind-cran.supp
 create mode 100755 ci/scripts/r_valgrind.sh

diff --git a/ci/etc/valgrind-cran.supp b/ci/etc/valgrind-cran.supp
new file mode 100644
index 00000000000..4d292202608
--- /dev/null
+++ b/ci/etc/valgrind-cran.supp
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{
+   # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. 
+   <testthat_skip_error>
+   Memcheck:Cond
+   fun:gregexpr_Regexc
+   fun:do_regexpr
+   fun:bcEval
+   fun:Rf_eval
+   fun:R_execClosure
+   fun:Rf_applyClosure
+   fun:bcEval
+   fun:Rf_eval
+   fun:forcePromise
+   fun:FORCE_PROMISE
+   fun:getvar
+   fun:bcEval
+}
diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh
new file mode 100755
index 00000000000..43f8c26739a
--- /dev/null
+++ b/ci/scripts/r_valgrind.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=RDvalgrind}
+
+source_dir=${1}/r
+
+${R_BIN} CMD INSTALL ${source_dir}
+pushd ${source_dir}/tests
+
+export TEST_R_WITH_ARROW=TRUE
+# to generate suppression files run:
+# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
+${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out
+
+# valgrind --error-exitcode=1 should return an erroring exit code that we can catch,
+# but R eats that and returns 0, so we need to look at the output and make sure that
+# we have 0 errors instead.
+if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then
+  cat testthat.out
+  echo "Found Valgrind errors"
+  exit 1
+fi
+
+# We might also considering using the greps that LibthGBM uses:
+# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85
+
+popd
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index f82960bf353..832d16d9f69 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -862,6 +862,14 @@ tasks:
         FEDORA: 33
       run: fedora-python
 
+  test-r-linux-valgrind:
+    ci: azure
+    template: docker-tests/azure.linux.yml
+    params:
+      env:
+        UBUNTU: 18.04
+      run: ubuntu-r-valgrind
+
   test-r-linux-as-cran:
     ci: github
     template: r/github.linux.cran.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index 4158ee3ff64..d9ca731dbd3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -116,6 +116,7 @@ x-hierarchy:
   - ubuntu-cpp-sanitizer
   - ubuntu-cpp-thread-sanitizer
   - ubuntu-r-sanitizer
+  - ubuntu-r-valgrind
   - python-sdist
   - r
   # helper services
@@ -1050,6 +1051,31 @@ services:
       /bin/bash -c "
         /arrow/ci/scripts/r_sanitize.sh /arrow"
 
+  ubuntu-r-valgrind:
+    # Only 18.04 and amd64 supported
+    # Usage:
+    #   docker-compose build ubuntu-r-valgrind
+    #   docker-compose run ubuntu-r-valgrind
+    image: ${REPO}:amd64-ubuntu-18.04-r-valgrind
+    build:
+      context: .
+      dockerfile: ci/docker/linux-r.dockerfile
+      cache_from:
+        - ${REPO}:amd64-ubuntu-18.04-r-valgrind
+      args:
+        base: wch1/r-debug:latest
+        r_bin: RDvalgrind
+    environment:
+      <<: *ccache
+      # AVX512 not supported by Valgrind (similar to ARROW-9851) some runners support AVX512 and some do not
+      # so some build might pass without this setting, but we want to ensure that we stay to AVX2 regardless of runner.
+      EXTRA_CMAKE_FLAGS: "-DARROW_RUNTIME_SIMD_LEVEL=AVX2"
+    volumes: *ubuntu-volumes
+    command: >
+      /bin/bash -c "
+        /arrow/ci/scripts/r_valgrind.sh /arrow"
+
+
   ################################# Go ########################################
 
   debian-go:
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index e9c82a10e40..6fba5be34bc 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -53,8 +53,8 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_DATASET=${ARROW_DATASET:-ON} \
     -DARROW_DEPENDENCY_SOURCE=BUNDLED \
     -DARROW_FILESYSTEM=ON \
-    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
-    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-$ARROW_DEFAULT_PARAM} \
+    -DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \
+    -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \
     -DARROW_JSON=ON \
     -DARROW_PARQUET=${ARROW_PARQUET:-ON} \
     -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 6fb97da000d..b1c7d66bec8 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -22,6 +22,11 @@ build_features <- c(
 )
 
 skip_if_not_available <- function(feature) {
+  if (feature == "re2") {
+    # RE2 does not support valgrind (on purpose): https://github.com/google/re2/issues/177
+    skip_on_valgrind()
+  }
+
   yes <- feature %in% names(build_features) && build_features[feature]
   if (!yes) {
     skip(paste("Arrow C++ not built with", feature))
@@ -29,6 +34,8 @@ skip_if_not_available <- function(feature) {
 }
 
 skip_if_no_pyarrow <- function() {
+  skip_on_valgrind()
+
   skip_if_not_installed("reticulate")
   if (!reticulate::py_module_available("pyarrow")) {
     skip("pyarrow not available for testing")
@@ -49,6 +56,18 @@ skip_if_not_running_large_memory_tests <- function() {
   )
 }
 
+skip_on_valgrind <- function() {
+  # This does not actually skip on valgrind because we can't exactly detect it.
+  # Instead, it skips on CRAN when the OS is linux + and the R version is development 
+  # (which is where valgrind is run as of this code)
+  linux_dev <- identical(tolower(Sys.info()[["sysname"]]), "linux") &&
+    grepl("devel", R.version.string)
+
+  if (linux_dev) {
+    skip_on_cran()
+  }
+}
+
 process_is_running <- function(x) {
   cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x)
   tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE)
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index e064f81cdfa..26d0a3005e4 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -53,6 +53,7 @@ test_that("binary Array", {
   expect_array_roundtrip(bin, fixed_size_binary(byte_width = 10))
 
   # degenerate cases
+  skip_on_valgrind() # valgrind errors on these tests ARROW-12638
   bin <- vctrs::new_vctr(
     list(1:10),
     class = "arrow_binary"
diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R
index 2c901e1c96a..2ab127e4800 100644
--- a/r/tests/testthat/test-arrow.R
+++ b/r/tests/testthat/test-arrow.R
@@ -62,6 +62,10 @@ test_that("check for an ArrowObject in functions use std::shared_ptr", {
 })
 
 test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-10080)", {
+  # There is a valgrind error on this test because there cannot be memory allocated
+  # which is exactly what this test is checking, but we quiet this
+  skip_on_valgrind()
+
   env <- new.env()
   trace(gc, print = FALSE, tracer = function() {
           env$gc_was_called <- TRUE
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 398b39fb17f..95f958cf9f3 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -37,7 +37,11 @@ test_that("sum.Array", {
 
   floats <- c(floats, NA)
   na <- Array$create(floats)
-  expect_identical(as.numeric(sum(na)), sum(floats))
+  if (!grepl("devel", R.version.string)) {
+    # Valgrind on R-devel confuses NaN and NA_real_
+    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
+    expect_identical(as.numeric(sum(na)), sum(floats))
+  }
   expect_r6_class(sum(na, na.rm = TRUE), "Scalar")
   expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE))
 
@@ -78,7 +82,11 @@ test_that("mean.Array", {
 
   floats <- c(floats, NA)
   na <- Array$create(floats)
-  expect_identical(as.vector(mean(na)), mean(floats))
+  if (!grepl("devel", R.version.string)) {
+    # Valgrind on R-devel confuses NaN and NA_real_
+    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
+    expect_identical(as.vector(mean(na)), mean(floats))
+  }
   expect_r6_class(mean(na, na.rm = TRUE), "Scalar")
   expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))
 

From f4dc0560964bd205ee06002411f97b402e30e5a2 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 5 May 2021 11:38:06 -0500
Subject: [PATCH 187/719] ARROW-12642: [R] LIBARROW_MINIMAL, LIBARROW_DOWNLOAD,
 NOT_CRAN env vars should not be case-sensitive

This makes the environment variables `LIBARROW_MINIMAL`, `LIBARROW_DOWNLOAD`, and `NOT_CRAN` case-insensitive

Closes #10252 from ianmcook/ARROW-12642

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/configure                  | 3 +++
 r/inst/build_arrow_static.sh | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/r/configure b/r/configure
index 2da2b9f0178..aa7e7a8d01b 100755
--- a/r/configure
+++ b/r/configure
@@ -38,6 +38,9 @@ ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
 FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'`
 FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
 ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
+LIBARROW_MINIMAL=`echo $LIBARROW_MINIMAL | tr '[:upper:]' '[:lower:]'`
+LIBARROW_DOWNLOAD=`echo $LIBARROW_DOWNLOAD | tr '[:upper:]' '[:lower:]'`
+NOT_CRAN=`echo $NOT_CRAN | tr '[:upper:]' '[:lower:]'`
 
 VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
 UNAME=`uname -s`
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index 6fba5be34bc..5ae615dae9b 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -36,6 +36,9 @@ set -x
 SOURCE_DIR="$(cd "${SOURCE_DIR}" && pwd)"
 DEST_DIR="$(mkdir -p "${DEST_DIR}" && cd "${DEST_DIR}" && pwd)"
 
+# Make some env vars case-insensitive
+LIBARROW_MINIMAL=`echo $LIBARROW_MINIMAL | tr '[:upper:]' '[:lower:]'`
+
 if [ "$LIBARROW_MINIMAL" = "false" ]; then
   ARROW_DEFAULT_PARAM="ON"
 else

From 1f4951b15f502bb1df9c321ebd37d3c84c4abf62 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 5 May 2021 16:30:02 -0400
Subject: [PATCH 188/719] ARROW-12658: [C++] Bump aws-c-common to v0.5.10

This updates aws-c-common to the latest version to fix build errors with new clang versions

Closes #10250 from ianmcook/ARROW-12658

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 cpp/thirdparty/versions.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 637435d19b4..a2f7f2c7213 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -27,7 +27,7 @@
 ARROW_ABSL_BUILD_VERSION=0f3bb466b868b523cf1dc9b2aaaed65c77b28862
 ARROW_AWSSDK_BUILD_VERSION=1.8.133
 ARROW_AWS_CHECKSUMS_BUILD_VERSION=v0.1.10
-ARROW_AWS_C_COMMON_BUILD_VERSION=v0.4.59
+ARROW_AWS_C_COMMON_BUILD_VERSION=v0.5.10
 ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION=v0.1.5
 ARROW_BOOST_BUILD_VERSION=1.75.0
 ARROW_BROTLI_BUILD_VERSION=v1.0.9

From b8138b2ed690654535038080c62ed76ec359bd7c Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 6 May 2021 05:47:27 +0900
Subject: [PATCH 189/719] ARROW-12491: [Packaging][RPM] Add support for Amazon
 Linux 2

Closes #10248 from kou/linux-amazon-linux-2

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/05-binary-upload.sh               |  38 ++-
 dev/release/binary-task.rb                    |  43 +--
 dev/release/verify-release-candidate.sh       |   3 +-
 dev/release/verify-yum.sh                     |  28 +-
 dev/tasks/linux-packages/Rakefile             |   3 +
 .../yum/Apache-Arrow.repo                     |  10 +-
 .../yum/amazon-linux-2/Dockerfile             |  26 ++
 .../yum/apache-arrow-release.spec.in          |  25 +-
 .../yum/centos-7/Dockerfile                   |   2 -
 .../yum/centos-8/Dockerfile                   |   2 -
 .../linux-packages/apache-arrow/debian/rules  |   1 +
 .../yum/amazon-linux-2/Dockerfile             |  56 ++++
 .../yum/amazon-linux-2/qemu-dummy-static      |  33 +++
 .../apache-arrow/yum/arrow.spec.in            |  62 +++--
 .../apache-arrow/yum/centos-7/Dockerfile      |   4 +-
 .../apache-arrow/yum/centos-8/Dockerfile      |   1 -
 dev/tasks/linux-packages/package-task.rb      |  21 +-
 dev/tasks/linux-packages/yum/build.sh         |  21 +-
 dev/tasks/tasks.yml                           | 256 ++++++++----------
 19 files changed, 372 insertions(+), 263 deletions(-)
 create mode 100644 dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
 create mode 100644 dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
 create mode 100755 dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static

diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh
index 61ac42b6fb5..6f318fc088f 100755
--- a/dev/release/05-binary-upload.sh
+++ b/dev/release/05-binary-upload.sh
@@ -64,6 +64,8 @@ fi
 # To deactivate one category, deactivate the category and all of its dependents.
 # To explicitly select one category, set UPLOAD_DEFAULT=0 UPLOAD_X=1.
 : ${UPLOAD_DEFAULT:=1}
+: ${UPLOAD_AMAZON_LINUX_RPM:=${UPLOAD_DEFAULT}}
+: ${UPLOAD_AMAZON_LINUX_YUM:=${UPLOAD_DEFAULT}}
 : ${UPLOAD_CENTOS_RPM:=${UPLOAD_DEFAULT}}
 : ${UPLOAD_CENTOS_YUM:=${UPLOAD_DEFAULT}}
 : ${UPLOAD_DEBIAN_APT:=${UPLOAD_DEFAULT}}
@@ -76,21 +78,13 @@ fi
 rake_tasks=()
 apt_targets=()
 yum_targets=()
-if [ ${UPLOAD_DEBIAN_DEB} -gt 0 ]; then
-  rake_tasks+=(deb)
-  apt_targets+=(debian)
-fi
-if [ ${UPLOAD_DEBIAN_APT} -gt 0 ]; then
-  rake_tasks+=(apt:rc)
-  apt_targets+=(debian)
-fi
-if [ ${UPLOAD_UBUNTU_DEB} -gt 0 ]; then
-  rake_tasks+=(deb)
-  apt_targets+=(ubuntu)
+if [ ${UPLOAD_AMAZON_LINUX_RPM} -gt 0 ]; then
+  rake_tasks+=(rpm)
+  yum_targets+=(amazon-linux)
 fi
-if [ ${UPLOAD_UBUNTU_APT} -gt 0 ]; then
-  rake_tasks+=(apt:rc)
-  apt_targets+=(ubuntu)
+if [ ${UPLOAD_AMAZON_LINUX_YUM} -gt 0 ]; then
+  rake_tasks+=(yum:rc)
+  yum_targets+=(amazon-linux)
 fi
 if [ ${UPLOAD_CENTOS_RPM} -gt 0 ]; then
   rake_tasks+=(rpm)
@@ -100,12 +94,28 @@ if [ ${UPLOAD_CENTOS_YUM} -gt 0 ]; then
   rake_tasks+=(yum:rc)
   yum_targets+=(centos)
 fi
+if [ ${UPLOAD_DEBIAN_DEB} -gt 0 ]; then
+  rake_tasks+=(deb)
+  apt_targets+=(debian)
+fi
+if [ ${UPLOAD_DEBIAN_APT} -gt 0 ]; then
+  rake_tasks+=(apt:rc)
+  apt_targets+=(debian)
+fi
 if [ ${UPLOAD_NUGET} -gt 0 ]; then
   rake_tasks+=(nuget:rc)
 fi
 if [ ${UPLOAD_PYTHON} -gt 0 ]; then
   rake_tasks+=(python:rc)
 fi
+if [ ${UPLOAD_UBUNTU_DEB} -gt 0 ]; then
+  rake_tasks+=(deb)
+  apt_targets+=(ubuntu)
+fi
+if [ ${UPLOAD_UBUNTU_APT} -gt 0 ]; then
+  rake_tasks+=(apt:rc)
+  apt_targets+=(ubuntu)
+fi
 rake_tasks+=(summary:rc)
 
 tmp_dir=binary/tmp
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index 0795f732bcd..8e3327cdeb5 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -872,17 +872,6 @@ def available_apt_targets
     ]
   end
 
-  def apt_distribution_label(distribution)
-    case distribution
-    when "debian"
-      "Debian"
-    when "ubuntu"
-      "Ubuntu"
-    else
-      distribution
-    end
-  end
-
   def apt_targets
     env_apt_targets = (ENV["APT_TARGETS"] || "").split(",")
     if env_apt_targets.empty?
@@ -991,7 +980,6 @@ def define_deb_tasks
       task :upload do
         apt_distributions.each do |distribution|
           distribution_dir = "#{deb_dir}/#{distribution}"
-          distribution_label = apt_distribution_label(distribution)
           uploader = ArtifactoryUploader.new(distribution: distribution,
                                              rc: rc,
                                              source: distribution_dir,
@@ -1173,7 +1161,6 @@ def define_apt_rc_tasks
         task :upload => apt_rc_repositories_dir do
           apt_distributions.each do |distribution|
             dists_dir = "#{apt_rc_repositories_dir}/#{distribution}/dists"
-            distribution_label = apt_distribution_label(distribution)
             uploader = ArtifactoryUploader.new(distribution: distribution,
                                                rc: rc,
                                                source: dists_dir,
@@ -1211,7 +1198,6 @@ def define_apt_release_tasks
         task :upload => apt_release_repositories_dir do
           apt_distributions.each do |distribution|
             distribution_dir = "#{apt_release_repositories_dir}/#{distribution}"
-            distribution_label = apt_distribution_label(distribution)
             uploader = ArtifactoryUploader.new(distribution: distribution,
                                                source: distribution_dir,
                                                api_key: artifactory_api_key)
@@ -1249,20 +1235,12 @@ def yum_release_repositories_dir
 
   def available_yum_targets
     [
+      ["amazon-linux", "2"],
       ["centos", "7"],
       ["centos", "8"],
     ]
   end
 
-  def yum_distribution_label(distribution)
-    case distribution
-    when "centos"
-      "CentOS"
-    else
-      distribution
-    end
-  end
-
   def yum_targets
     env_yum_targets = (ENV["YUM_TARGETS"] || "").split(",")
     if env_yum_targets.empty?
@@ -1270,7 +1248,7 @@ def yum_targets
     else
       available_yum_targets.select do |distribution, distribution_version|
         env_yum_targets.any? do |env_yum_target|
-          if env_yum_target.include?("-")
+          if /\d/.match?(env_yum_target)
             env_yum_target.start_with?("#{distribution}-#{distribution_version}")
           else
             env_yum_target == distribution
@@ -1432,7 +1410,6 @@ def define_rpm_tasks
       task :upload do
         yum_distributions.each do |distribution|
           distribution_dir = "#{rpm_dir}/#{distribution}"
-          distribution_label = yum_distribution_label(distribution)
           uploader = ArtifactoryUploader.new(distribution: distribution,
                                              rc: rc,
                                              source: distribution_dir,
@@ -1517,7 +1494,6 @@ def define_yum_rc_tasks
         desc "Upload RC Yum repositories"
         task :upload => yum_rc_repositories_dir do
           yum_targets.each do |distribution, distribution_version|
-            distribution_label = yum_distribution_label(distribution)
             base_dir = [
               yum_rc_repositories_dir,
               distribution,
@@ -1569,7 +1545,6 @@ def define_yum_release_tasks
         task :upload => yum_release_repositories_dir do
           yum_distributions.each do |distribution|
             distribution_dir = "#{yum_release_repositories_dir}/#{distribution}"
-            distribution_label = yum_distribution_label(distribution)
             uploader = ArtifactoryUploader.new(distribution: distribution,
                                                source: distribution_dir,
                                                api_key: artifactory_api_key)
@@ -1706,11 +1681,12 @@ def define_summary_tasks
       task :rc do
         puts(<<-SUMMARY)
 Success! The release candidate binaries are available here:
-  https://apache.jfrog.io/artifactory/arrow/debian-rc/
-  https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+  https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
   https://apache.jfrog.io/artifactory/arrow/centos-rc/
-  https://apache.jfrog.io/artifactory/arrow/python-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/debian-rc/
   https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/python-rc/#{full_version}
+  https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
         SUMMARY
       end
 
@@ -1718,11 +1694,12 @@ def define_summary_tasks
       task :release do
         puts(<<-SUMMARY)
 Success! The release binaries are available here:
-  https://apache.jfrog.io/arrow/debian/
-  https://apache.jfrog.io/arrow/ubuntu/
+  https://apache.jfrog.io/arrow/amazon-linux/
   https://apache.jfrog.io/arrow/centos/
-  https://apache.jfrog.io/arrow/python/#{version}
+  https://apache.jfrog.io/arrow/debian/
   https://apache.jfrog.io/arrow/nuget/#{version}
+  https://apache.jfrog.io/arrow/python/#{version}
+  https://apache.jfrog.io/arrow/ubuntu/
         SUMMARY
       end
     end
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index c421e2f0be6..320654dd800 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -171,7 +171,8 @@ test_apt() {
 }
 
 test_yum() {
-  for target in "centos:7" \
+  for target in "amazonlinux:2" \
+                "centos:7" \
                 "centos:8" \
                 "arm64v8/centos:8"; do
     case "${target}" in
diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index b9c46c43898..71d28f8644f 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -48,8 +48,17 @@ have_flight=yes
 have_gandiva=yes
 have_glib=yes
 have_parquet=yes
+have_python=yes
 install_command="dnf install -y --enablerepo=powertools"
 case "${distribution}-${distribution_version}" in
+  amzn-2)
+    cmake_package=cmake3
+    cmake_command=cmake3
+    have_flight=no
+    have_gandiva=no
+    have_python=no
+    install_command="yum install -y"
+    ;;
   centos-7)
     cmake_package=cmake3
     cmake_command=cmake3
@@ -75,9 +84,19 @@ if [ "${TYPE}" = "local" ]; then
       package_version="${VERSION}-1"
       ;;
   esac
-  package_version+=".el${distribution_version}"
   release_path="${local_prefix}/yum/repositories"
-  release_path+="/centos/${distribution_version}/$(arch)/Packages"
+  case "${distribution}" in
+    amzn)
+      package_version+=".${distribution}${distribution_version}"
+      release_path+="/amazon-linux"
+      amazon-linux-extras install -y epel
+      ;;
+    *)
+      package_version+=".el${distribution_version}"
+      release_path+="/centos"
+      ;;
+  esac
+  release_path+="/${distribution_version}/$(arch)/Packages"
   release_path+="/apache-arrow-release-${package_version}.noarch.rpm"
   ${install_command} "${release_path}"
 else
@@ -122,7 +141,10 @@ if [ "${have_glib}" = "yes" ]; then
   ${install_command} --enablerepo=epel arrow-glib-devel-${package_version}
   ${install_command} --enablerepo=epel arrow-glib-doc-${package_version}
 fi
-${install_command} --enablerepo=epel arrow-python-devel-${package_version}
+
+if [ "${have_python}" = "yes" ]; then
+  ${install_command} --enablerepo=epel arrow-python-devel-${package_version}
+fi
 
 if [ "${have_glib}" = "yes" ]; then
   ${install_command} --enablerepo=epel plasma-glib-devel-${package_version}
diff --git a/dev/tasks/linux-packages/Rakefile b/dev/tasks/linux-packages/Rakefile
index a84a43ae517..53a372cf5c2 100644
--- a/dev/tasks/linux-packages/Rakefile
+++ b/dev/tasks/linux-packages/Rakefile
@@ -109,6 +109,7 @@ class LocalBinaryTask < BinaryTask
 
   def resolve_docker_image(target)
     image = ""
+    target = target.gsub(/\Aamazon-linux/, "amazonlinux")
     case target
     when /-(?:arm64|aarch64)\z/
       target = $PREMATCH
@@ -203,6 +204,8 @@ class LocalBinaryTask < BinaryTask
     # Disable aarch64 targets by default for now
     # because they require some setups on host.
     [
+      "amazon-linux-2",
+      # "amazon-linux-2-aarch64",
       "centos-7",
       "centos-8",
       # "centos-8-aarch64",
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
index fd77306e6f5..060a935e3b8 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/Apache-Arrow.repo
@@ -16,22 +16,22 @@
 # under the License.
 
 [apache-arrow-amazon-linux]
-name=Apache Arrow for Amazon Linux 2 - $basearch
-baseurl=https://apache.jfrog.io/artifactory/arrow/centos/7/$basearch/
+name=Apache Arrow for Amazon Linux $releasever - $basearch
+baseurl=https://apache.jfrog.io/artifactory/arrow/amazon-linux/$releasever/$basearch/
 gpgcheck=1
-enabled=1
+enabled=0
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
 
 [apache-arrow-centos]
 name=Apache Arrow for CentOS $releasever - $basearch
 baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
 gpgcheck=1
-enabled=1
+enabled=0
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
 
 [apache-arrow-rhel]
 name=Apache Arrow for RHEL $releasever - $basearch
 baseurl=https://apache.jfrog.io/artifactory/arrow/centos/$releasever/$basearch/
 gpgcheck=1
-enabled=1
+enabled=0
 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile b/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
new file mode 100644
index 00000000000..800df6c68e0
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/amazon-linux-2/Dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM amazonlinux:2
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+  yum install -y ${quiet} \
+    rpmdevtools && \
+  yum clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 22eaa42341f..cab546957d8 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -17,9 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-%define _centos_ver %{?centos_ver:%{centos_ver}}%{!?centos_ver:8}
-
-%define use_dnf (%{_centos_ver} >= 8)
+%define use_dnf (%{rhel} >= 8)
 %if %{use_dnf}
 %define yum_repository_enable() (dnf config-manager --set-enabled %1)
 %define yum_repository_disable() (dnf config-manager --set-disabled %1)
@@ -53,12 +51,15 @@ Apache Arrow release files.
 %setup -q
 
 %build
-# We use distribution version explicitly because we can't use symbolic link
-# on Artifactory CentOS uses 7 and 8 but RHEL uses 7Server and 8Server
-# for $releasever. If we can use symbolic link on Artifactory we can use
-# $releasever directly.
-distribution_version=$(cut -d: -f5 /etc/system-release-cpe)
-sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo
+distribution=$(. /etc/os-release && echo "${ID}")
+if [ "${distribution}" = "rhel" ]; then
+  # We use distribution version explicitly for RHEL because we can't
+  # use symbolic link on Artifactory. CentOS uses 7 and 8 but RHEL uses
+  # 7Server and 8Server for $releasever. If we can use symbolic link
+  # on Artifactory we can use $releasever directly.
+  distribution_version=$(. /etc/os-release && echo "${VERSION_ID}")
+  sed -i'' -e "s/\\\$releasever/${distribution_version}/g" Apache-Arrow.repo
+fi
 
 %install
 rm -rf $RPM_BUILD_ROOT
@@ -81,16 +82,10 @@ rm -rf $RPM_BUILD_ROOT
 %post
 if grep -q 'Amazon Linux release 2' /etc/system-release 2>/dev/null; then
   %{yum_repository_enable apache-arrow-amazon-linux}
-  %{yum_repository_disable apache-arrow-centos}
-  %{yum_repository_disable apache-arrow-rhel}
 elif grep -q 'Red Hat Enterprise Linux' /etc/system-release 2>/dev/null; then
-  %{yum_repository_disable apache-arrow-amazon-linux}
-  %{yum_repository_disable apache-arrow-centos}
   %{yum_repository_enable apache-arrow-rhel}
 else
-  %{yum_repository_disable apache-arrow-amazon-linux}
   %{yum_repository_enable apache-arrow-centos}
-  %{yum_repository_disable apache-arrow-rhel}
 fi
 
 %changelog
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
index 0396593d7d5..236b0e2972d 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
@@ -21,8 +21,6 @@ ARG DEBUG
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
-  yum install -y ${quiet} epel-release && \
   yum install -y ${quiet} \
-    rpm-build \
     rpmdevtools && \
   yum clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
index c2131bf8412..e368506302e 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8/Dockerfile
@@ -21,8 +21,6 @@ ARG DEBUG
 
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
-  dnf install -y ${quiet} epel-release && \
   dnf install --enablerepo=powertools -y ${quiet} \
-    rpm-build \
     rpmdevtools && \
   dnf clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules
index 7b8dff26a08..2de53361559 100755
--- a/dev/tasks/linux-packages/apache-arrow/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -32,6 +32,7 @@ override_dh_auto_configure:
 	  -DARROW_GANDIVA_JAVA=OFF				\
 	  -DARROW_MIMALLOC=ON					\
 	  -DARROW_ORC=ON					\
+	  -DARROW_PACKAGE_KIND=deb				\
 	  -DARROW_PARQUET=ON					\
 	  -DARROW_PLASMA=$${ARROW_PLASMA}			\
 	  -DARROW_PYTHON=ON					\
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
new file mode 100644
index 00000000000..40cf3861feb
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=amazonlinux:2
+FROM ${FROM}
+
+COPY qemu-* /usr/bin/
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
+  yum update -y ${quiet} && \
+  amazon-linux-extras install -y epel && \
+  yum install -y ${quiet} \
+    bison \
+    brotli-devel \
+    bzip2-devel \
+    ccache \
+    cmake3 \
+    flex \
+    gcc-c++ \
+    git \
+    glog-devel \
+    gobject-introspection-devel \
+    gtk-doc \
+    libzstd-devel \
+    lz4-devel \
+    make \
+    openssl-devel \
+    pkg-config \
+    rapidjson-devel \
+    rpmdevtools \
+    snappy-devel \
+    tar \
+    utf8proc-devel \
+    zlib-devel && \
+  # Install ninja-build dependencies in amzn2-core
+  yum install -y ${quiet} ninja-build && \
+  # Install ninja-build from EPEL because ninja-build in amzn2-core is old.
+  yum install -y ${quiet} --disablerepo=amzn2-core ninja-build && \
+  yum clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static
new file mode 100755
index 00000000000..c42e0962def
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/qemu-dummy-static
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Do nothing. This exists only for not requiring qemu-aarch64-static copy.
+# Recent Debian (buster or later) and Ubuntu (18.10 or later) on amd64 hosts or
+# arm64 host don't require qemu-aarch64-static in Docker image. But old Debian
+# and Ubuntu hosts on amd64 require qemu-aarch64-static in Docker image.
+#
+# We use "COPY qemu* /usr/bin/" in Dockerfile. If we don't put any "qemnu*",
+# the "COPY" is failed. It means that we always require "qemu*" even if we
+# use recent Debian/Ubuntu or arm64 host. If we have this dummy "qemu*" file,
+# the "COPY" isn't failed. It means that we can copy "qemu*" only when we
+# need.
+#
+# See also "script" in dev/tasks/linux-packages/azure.linux.arm64.yml.
+# Azure Pipelines uses old Ubuntu (18.04).
+# So we need to put "qemu-aarch64-static" into this directory.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 842b3b0f014..4d23c55ca4d 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -17,35 +17,38 @@
 # specific language governing permissions and limitations
 # under the License.
 
-%define _centos_ver %{?centos_ver:%{centos_ver}}%{!?centos_ver:8}
+%define _amzn %{?amzn:%{amzn}}%{!?amzn:0}
+%define is_amazon_linux (%{_amzn} != 0)
 
 %define boost_version %( \
-  if [ "%{_centos_ver}" = 7 ]; then \
+  if [ %{rhel} -eq 7 ]; then \
     echo 169; \
   fi)
 %define cmake_version %( \
-  if [ "%{_centos_ver}" -lt 8 ]; then \
+  if [ %{rhel} -lt 8 ]; then \
     echo 3; \
   fi)
 %define python_version %( \
-  if [ "%{_centos_ver}" = 7 ]; then \
+  if [ %{rhel} -eq 7 ]; then \
     echo 36; \
   else \
     echo 3; \
   fi)
 
-%define use_flight (%{_centos_ver} >= 8)
-%define use_gandiva (%{_centos_ver} >= 8 && %{_arch} != "aarch64")
-%define use_mimalloc (%{_centos_ver} >= 8)
-%define use_ninja (%{_centos_ver} >= 8)
+%define use_boost (!%{is_amazon_linux})
+%define use_flight (%{rhel} >= 8)
+%define use_gandiva (%{rhel} >= 8 && %{_arch} != "aarch64")
+%define use_gflags (!%{is_amazon_linux})
+%define use_mimalloc (%{rhel} >= 8)
+%define use_python (!%{is_amazon_linux})
 # TODO: Enable this. This works on local but is fragile on GitHub Actions and
 # Travis CI.
 # %define use_s3 (%{_centos_ver} >= 8)
 %define use_s3 0
 
-%define have_rapidjson (%{_centos_ver} == 7)
-%define have_re2 (%{_centos_ver} >= 8)
-%define have_utf8proc (%{_centos_ver} == 7)
+%define have_rapidjson (%{rhel} == 7)
+%define have_re2 (%{rhel} >= 8)
+%define have_utf8proc (%{rhel} == 7)
 
 Name:		@PACKAGE@
 Version:	@VERSION@
@@ -57,7 +60,9 @@ URL:		https://arrow.apache.org/
 Source0:	https://dist.apache.org/repos/dist/release/@PACKAGE@/@PACKAGE@-%{version}/apache-@PACKAGE@-%{version}.tar.gz
 
 BuildRequires:	bison
+%if %{use_boost}
 BuildRequires:	boost%{boost_version}-devel
+%endif
 BuildRequires:	brotli-devel
 BuildRequires:	bzip2-devel
 BuildRequires:	cmake%{cmake_version}
@@ -66,15 +71,20 @@ BuildRequires:	curl-devel
 %endif
 BuildRequires:	flex
 BuildRequires:	gcc-c++
+%if %{use_gflags}
 BuildRequires:	gflags-devel
+%endif
 BuildRequires:	git
 BuildRequires:	glog-devel
 BuildRequires:	libzstd-devel
 BuildRequires:	lz4-devel
+BuildRequires:	ninja-build
 BuildRequires:	openssl-devel
 BuildRequires:	pkgconfig
+%if %{use_python}
 BuildRequires:	python%{python_version}-devel
 BuildRequires:	python%{python_version}-numpy
+%endif
 %if %{have_rapidjson}
 BuildRequires:	rapidjson-devel
 %endif
@@ -106,19 +116,26 @@ cpp_build_type=release
 mkdir cpp/build
 cd cpp/build
 %cmake3 .. \
+  -DARROW_CSV=ON \
+  -DARROW_DATASET=ON \
 %if %{use_flight}
   -DARROW_FLIGHT=ON \
 %endif
 %if %{use_gandiva}
   -DARROW_GANDIVA=ON \
 %endif
+  -DARROW_HDFS=ON \
+  -DARROW_JSON=ON \
 %if %{use_mimalloc}
   -DARROW_MIMALLOC=ON \
 %endif
   -DARROW_ORC=ON \
+  -DARROW_PACKAGE_KIND=rpm \
   -DARROW_PARQUET=ON \
   -DARROW_PLASMA=ON \
+%if %{use_python}
   -DARROW_PYTHON=ON \
+%endif
 %if %{use_s3}
   -DARROW_S3=ON \
 %endif
@@ -132,17 +149,13 @@ cd cpp/build
   -DARROW_USE_CCACHE=OFF \
   -DCMAKE_UNITY_BUILD=ON \
   -DPARQUET_REQUIRE_ENCRYPTION=ON \
+%if %{use_python}
   -DPythonInterp_FIND_VERSION=ON \
   -DPythonInterp_FIND_VERSION_MAJOR=3 \
-%if %{use_ninja}
-  -GNinja
 %endif
+  -GNinja
 
-%if %{use_ninja}
 ninja %{?_smp_mflags}
-%else
-make %{?_smp_mflags}
-%endif
 cd -
 
 cd c_glib
@@ -154,7 +167,8 @@ meson setup build \
   -Darrow_cpp_build_dir=../cpp/build \
   -Darrow_cpp_build_type=$cpp_build_type \
   -Dgtk_doc=true
-LD_LIBRARY_PATH=$PWD/../cpp/build/$cpp_build_type ninja -C build %{?_smp_mflags}
+LD_LIBRARY_PATH=$PWD/../cpp/build/$cpp_build_type \
+  ninja -C build %{?_smp_mflags}
 cd -
 
 %install
@@ -166,22 +180,17 @@ ninja -C build clean
 cd -
 
 cd cpp/build
-%if %{use_ninja}
 DESTDIR=$RPM_BUILD_ROOT ninja install
 ninja clean
-%else
-make install DESTDIR=$RPM_BUILD_ROOT
-make clean
-%endif
 cd -
 
 %package libs
 Summary:	Runtime libraries for Apache Arrow C++
 License:	Apache-2.0
-Requires:	boost%{boost_version}-system
-Requires:	boost%{boost_version}-filesystem
 Requires:	brotli
+%if %{use_gflags}
 Requires:	gflags
+%endif
 Requires:	glog
 Requires:	libzstd
 Requires:	lz4
@@ -364,6 +373,7 @@ Libraries and header files for Gandiva.
 %{_libdir}/pkgconfig/gandiva.pc
 %endif
 
+%if %{use_python}
 %package python-libs
 Summary:	Python integration library for Apache Arrow
 License:	Apache-2.0
@@ -437,6 +447,7 @@ Apache Arrow Flight.
 %{_libdir}/libarrow_python_flight.so
 %{_libdir}/pkgconfig/arrow-python-flight.pc
 %endif
+%endif
 
 %package -n plasma-libs
 Summary:	Runtime libraries for Plasma in-memory object store
@@ -486,7 +497,6 @@ Libraries and header files for Plasma in-memory object store.
 %package -n parquet-libs
 Summary:	Runtime libraries for Apache Parquet C++
 License:	Apache-2.0
-Requires:	boost%{boost_version}-program-options
 Requires:	%{name}-libs = %{version}-%{release}
 Requires:	openssl
 
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index d4c56a50235..b668165758f 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -24,7 +24,6 @@ RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
   yum update -y ${quiet} && \
   yum install -y ${quiet} epel-release && \
-  yum groupinstall -y ${quiet} "Development Tools" && \
   yum install -y ${quiet} \
     bison \
     boost169-devel \
@@ -33,6 +32,7 @@ RUN \
     ccache \
     cmake3 \
     flex \
+    gcc-c++ \
     gflags-devel \
     git \
     glog-devel \
@@ -40,6 +40,7 @@ RUN \
     gtk-doc \
     libzstd-devel \
     lz4-devel \
+    make \
     ninja-build \
     openssl-devel \
     pkg-config \
@@ -47,7 +48,6 @@ RUN \
     python36-devel \
     python36-numpy \
     rapidjson-devel \
-    rpm-build \
     rpmdevtools \
     snappy-devel \
     tar \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
index bb30de16b6b..5ced36cab0d 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
@@ -54,7 +54,6 @@ RUN \
     python3-pip \
     re2-devel \
     # rapidjson-devel \
-    rpm-build \
     rpmdevtools \
     snappy-devel \
     tar \
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index b3227bf84b5..0d894a365bd 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -228,6 +228,17 @@ def define_dist_task
     task :dist => [@archive_name]
   end
 
+  def split_target(target)
+    components = target.split("-")
+    if components[0, 2] == ["amazon", "linux"]
+      components[0, 2] = components[0, 2].join("-")
+    end
+    if components.size >= 3
+      components[2..-1] = components[2..-1].join("-")
+    end
+    components
+  end
+
   def enable_apt?
     true
   end
@@ -277,7 +288,7 @@ def apt_dir
   def apt_prepare_debian_dir(tmp_dir, target)
     source_debian_dir = nil
     specific_debian_dir = "debian.#{target}"
-    distribution, code_name, _architecture = target.split("-", 3)
+    distribution, code_name, _architecture = split_target(target)
     platform = [distribution, code_name].join("-")
     platform_debian_dir = "debian.#{platform}"
     if File.exist?(specific_debian_dir)
@@ -326,7 +337,7 @@ def apt_build(console: false)
 
     apt_targets.each do |target|
       cd(apt_dir) do
-        distribution, version, architecture = target.split("-", 3)
+        distribution, version, architecture = split_target(target)
         os = "#{distribution}-#{version}"
         docker_run(os, architecture, console: console)
       end
@@ -393,6 +404,8 @@ def yum_targets_default
     # Disable aarch64 targets by default for now
     # because they require some setups on host.
     [
+      "amazon-linux-2",
+      # "amazon-linux-2-arch64",
       "centos-7",
       # "centos-7-aarch64",
       "centos-8",
@@ -461,7 +474,7 @@ def yum_build(console: false)
 
     yum_targets.each do |target|
       cd(yum_dir) do
-        distribution, version, architecture = target.split("-", 3)
+        distribution, version, architecture = split_target(target)
         os = "#{distribution}-#{version}"
         docker_run(os, architecture, console: console)
       end
@@ -594,7 +607,7 @@ def define_docker_tasks
       push_tasks = []
 
       (apt_targets + yum_targets).each do |target|
-        distribution, version, architecture = target.split("-", 3)
+        distribution, version, architecture = split_target(target)
         os = "#{distribution}-#{version}"
 
         namespace :pull do
diff --git a/dev/tasks/linux-packages/yum/build.sh b/dev/tasks/linux-packages/yum/build.sh
index 01746803adf..8bb8a261795 100755
--- a/dev/tasks/linux-packages/yum/build.sh
+++ b/dev/tasks/linux-packages/yum/build.sh
@@ -33,11 +33,12 @@ rpmbuild_options=
 
 . /host/env.sh
 
-distribution=$(cut -d " " -f 1 /etc/redhat-release | tr "A-Z" "a-z")
-if grep -q Linux /etc/redhat-release; then
-  distribution_version=$(cut -d " " -f 4 /etc/redhat-release)
+if grep -q amazon /etc/system-release-cpe; then
+  distribution=$(cut -d ":" -f 5 /etc/system-release-cpe | tr '_' '-')
+  distribution_version=$(cut -d ":" -f 6 /etc/system-release-cpe)
 else
-  distribution_version=$(cut -d " " -f 3 /etc/redhat-release)
+  distribution=$(cut -d ":" -f 4 /etc/system-release-cpe)
+  distribution_version=$(cut -d ":" -f 5 /etc/system-release-cpe)
 fi
 distribution_version=$(echo ${distribution_version} | sed -e 's/\..*$//g')
 
@@ -124,20 +125,20 @@ run cat <<BUILD > build.sh
 rpmbuild -ba ${rpmbuild_options} rpmbuild/SPECS/${PACKAGE}.spec
 BUILD
 run chmod +x build.sh
-if [ -n "${DEVTOOLSET_VERSION:-}" ]; then
+if [ -n "${SCL:-}" ]; then
   run cat <<WHICH_STRIP > which-strip.sh
 #!/bin/bash
 
 which strip
 WHICH_STRIP
   run chmod +x which-strip.sh
-  run cat <<USE_DEVTOOLSET_STRIP >> ~/.rpmmacros
-%__strip $(run scl enable devtoolset-${DEVTOOLSET_VERSION} ./which-strip.sh)
-USE_DEVTOOLSET_STRIP
+  run cat <<USE_SCL_STRIP >> ~/.rpmmacros
+%__strip $(run scl enable ${SCL} ./which-strip.sh)
+USE_SCL_STRIP
   if [ "${DEBUG:-no}" = "yes" ]; then
-    run scl enable devtoolset-${DEVTOOLSET_VERSION} ./build.sh
+    run scl enable ${SCL} ./build.sh
   else
-    run scl enable devtoolset-${DEVTOOLSET_VERSION} ./build.sh > /dev/null
+    run scl enable ${SCL} ./build.sh > /dev/null
   fi
 else
   if [ "${DEBUG:-no}" = "yes" ]; then
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 832d16d9f69..d4592afeda1 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -28,19 +28,21 @@ groups:
     - wheel-*
 
   linux:
+    - amazon-linux-*
+    - centos-*
     - debian-*
     - ubuntu-*
-    - centos-*
 
   linux-amd64:
+    - amazon-linux-*-amd64
+    - centos-*-amd64
     - debian-*-amd64
     - ubuntu-*-amd64
-    - centos-*-amd64
 
   linux-arm64:
+    - centos-*-arm64
     - debian-*-arm64
     - ubuntu-*-arm64
-    - centos-*-arm64
 
   gandiva:
     - gandiva-*
@@ -49,13 +51,14 @@ groups:
     - homebrew-*
 
   packaging:
+    - amazon-linux-*
+    - centos-*
     - conda-*
-    - wheel-*
     - debian-*
-    - ubuntu-*
-    - centos-*
-    - python-sdist
     - nuget
+    - python-sdist
+    - ubuntu-*
+    - wheel-*
 
 {############################# Testing tasks #################################}
 
@@ -490,152 +493,115 @@ tasks:
   {% endfor %}
 {% endfor %}
 
-  centos-7-amd64:
-    ci: github
-    template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "centos-7"
-      task_namespace: yum
-      upload_extensions:
-        - .rpm
-    artifacts:
-      - apache-arrow-release-{no_rc_version}-1.el7.noarch.rpm
-      - apache-arrow-release-{no_rc_version}-1.el7.src.rpm
-      - arrow-{no_rc_version}-1.el7.src.rpm
-      - arrow-debuginfo-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-python-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - arrow-python-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - parquet-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-glib-devel-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-glib-doc-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-glib-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-libs-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-      - plasma-store-server-{no_rc_version}-1.el7.[a-z0-9_]+.rpm
-
-  centos-8-amd64:
+{% for target in ["amazon-linux-2",
+                  "centos-7",
+                  "centos-8"] %}
+  {% for architecture in ["amd64", "arm64"] %}
+    {% if not (target in ["amazon-linux-2", "centos-7"] and architecture == "arm64") %}
+  {{ target }}-{{ architecture }}:
+      {% if architecture == "amd64" %}
     ci: github
     template: linux-packages/github.linux.amd64.yml
-    params:
-      target: "centos-8"
-      task_namespace: yum
-      upload_extensions:
-        - .rpm
-    artifacts:
-      - apache-arrow-release-{no_rc_version}-1.el8.noarch.rpm
-      - apache-arrow-release-{no_rc_version}-1.el8.src.rpm
-      - arrow-{no_rc_version}-1.el8.src.rpm
-      - arrow-dataset-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - gandiva-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-
-  centos-8-arm64:
+      {% else %}
     ci: travis
     template: linux-packages/travis.linux.arm64.yml
+      {% endif %}
     params:
-      target: "centos-8-aarch64"
-      task_namespace: yum
+      {% if architecture == "amd64" %}
+      target: "{{ target }}"
+      {% else %}
+      target: "{{ target }}-aarch64"
+      {% endif %}
+      task_namespace: "yum"
       upload_extensions:
         - .rpm
     artifacts:
-      - arrow-{no_rc_version}-1.el8.src.rpm
-      - arrow-dataset-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-dataset-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-flight-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - arrow-python-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - parquet-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-devel-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-doc-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-glib-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-libs-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-debuginfo-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
-      - plasma-store-server-{no_rc_version}-1.el8.[a-z0-9_]+.rpm
+      {% if architecture == "amd64" %}
+      - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.noarch.rpm
+      - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.src.rpm
+      {% endif %}
+      - arrow-dataset-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-dataset-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-dataset-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-dataset-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-dataset-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-dataset-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-dataset-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - arrow-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - arrow-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target != "amazon-linux-2" %}
+      - arrow-python-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+        {% if target == "centos-8" %}
+      - arrow-python-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-python-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-python-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-python-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+        {% endif %}
+      - arrow-python-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      {% if architecture == "amd64" %}
+      - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm
+      {% endif %}
+      {% if target == "centos-8" and architecture == "amd64" %}
+      - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - gandiva-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - parquet-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - parquet-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - parquet-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - parquet-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - parquet-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - parquet-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - parquet-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - plasma-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - plasma-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - plasma-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - plasma-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - plasma-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - plasma-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - plasma-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% if target == "centos-8" %}
+      - plasma-store-server-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      {% endif %}
+      - plasma-store-server-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+    {% endif %}
+  {% endfor %}
+{% endfor %}
 
   ############################## Homebrew Tasks ################################
 

From 7a5ec7ba5734cbff02007d7a86b4a9cf27921a95 Mon Sep 17 00:00:00 2001
From: Pachamaltese <mvargas@dcc.uchile.cl>
Date: Wed, 5 May 2021 15:51:59 -0500
Subject: [PATCH 190/719] ARROW-11146: [CI] Remove test-conda-python-3.8-jpype
 build

Closes #10238 from pachamaltese/patch-2

Authored-by: Pachamaltese <mvargas@dcc.uchile.cl>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 dev/tasks/tasks.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index d4592afeda1..c3a2c5a1b6a 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -91,7 +91,6 @@ groups:
   integration:
     - test-*dask*
     - test-*hdfs*
-    - test-*jpype*
     - test-*kartothek*
     - test-*pandas*
     - test-*spark*
@@ -1027,14 +1026,6 @@ tasks:
       image: conda-python-spark
 {% endfor %}
 
-  test-conda-python-3.8-jpype:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: 3.8
-      image: conda-python-jpype
-
   # Remove the "skipped-" prefix in ARROW-8475
   skipped-test-conda-cpp-hiveserver2:
     ci: github

From 8b4942728e7347dc921a2d423e996fea5f9e2102 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 5 May 2021 16:58:41 -0400
Subject: [PATCH 191/719] ARROW-9697: [C++][Python][R][Dataset] Add CountRows
 for Scanner

This implements a CountRows method for scanner. It will ask the fragment if it can count rows using only metadata, and otherwise project away columns and count the resulting rows.

Originally, I thought we did not need a special optimization for the metadata-only case, because the Parquet reader will skip I/O and fabricate empty batches if you ask it to read no columns. However, in benchmarking, the overhead of the rest of the pipeline was still significant and so I implemented the optimization after all.

Closes #10060 from lidavidm/arrow-9697

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec/expression.cc      |  11 ++
 cpp/src/arrow/compute/exec/expression.h       |   4 +
 cpp/src/arrow/compute/exec/expression_test.cc |  22 ++++
 cpp/src/arrow/dataset/dataset.cc              |   5 +
 cpp/src/arrow/dataset/dataset.h               |   9 ++
 cpp/src/arrow/dataset/file_base.cc            |  17 +++
 cpp/src/arrow/dataset/file_base.h             |   5 +
 cpp/src/arrow/dataset/file_ipc.cc             |  14 ++
 cpp/src/arrow/dataset/file_ipc.h              |   4 +
 cpp/src/arrow/dataset/file_ipc_test.cc        |   1 +
 cpp/src/arrow/dataset/file_parquet.cc         |  80 ++++++++++--
 cpp/src/arrow/dataset/file_parquet.h          |  10 ++
 cpp/src/arrow/dataset/file_parquet_test.cc    |  63 +++++++++
 cpp/src/arrow/dataset/scanner.cc              | 120 +++++++++++++++++-
 cpp/src/arrow/dataset/scanner.h               |   5 +
 cpp/src/arrow/dataset/scanner_test.cc         | 107 +++++++++++++++-
 cpp/src/arrow/dataset/test_util.h             |  36 ++++++
 cpp/src/arrow/ipc/reader.cc                   |  19 +++
 cpp/src/arrow/ipc/reader.h                    |   3 +
 python/pyarrow/_dataset.pyx                   |  76 +++++++++--
 python/pyarrow/includes/libarrow_dataset.pxd  |   1 +
 python/pyarrow/tests/test_dataset.py          |  28 ++++
 r/R/arrowExports.R                            |   4 +
 r/R/dataset-scan.R                            |   3 +-
 r/R/dataset.R                                 |  19 +--
 r/R/dplyr.R                                   |   5 +-
 r/src/arrowExports.cpp                        |  16 +++
 r/src/dataset.cpp                             |   5 +
 r/tests/testthat/test-dataset.R               |  34 ++---
 29 files changed, 656 insertions(+), 70 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index 59def380db5..91bf73166ca 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -613,6 +613,17 @@ std::vector<FieldRef> FieldsInExpression(const Expression& expr) {
   return fields;
 }
 
+bool ExpressionHasFieldRefs(const Expression& expr) {
+  if (expr.literal()) return false;
+
+  if (auto ref = expr.field_ref()) return true;
+
+  for (const Expression& arg : CallNotNull(expr)->arguments) {
+    if (ExpressionHasFieldRefs(arg)) return true;
+  }
+  return false;
+}
+
 Result<Expression> FoldConstants(Expression expr) {
   return Modify(
       std::move(expr), [](Expression expr) { return expr; },
diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
index 98221422ee2..f5ca2c2118d 100644
--- a/cpp/src/arrow/compute/exec/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -157,6 +157,10 @@ Expression call(std::string function, std::vector<Expression> arguments,
 ARROW_EXPORT
 std::vector<FieldRef> FieldsInExpression(const Expression&);
 
+/// Check if the expression references any fields.
+ARROW_EXPORT
+bool ExpressionHasFieldRefs(const Expression&);
+
 /// Assemble a mapping from field references to known values.
 ARROW_EXPORT
 Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldValues(
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index e8b8fb31cd8..66212bf99d6 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -357,6 +357,28 @@ TEST(Expression, FieldsInExpression) {
                   {"a", "b", "c"});
 }
 
+TEST(Expression, ExpressionHasFieldRefs) {
+  EXPECT_FALSE(ExpressionHasFieldRefs(literal(true)));
+
+  EXPECT_FALSE(ExpressionHasFieldRefs(call("add", {literal(1), literal(3)})));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(field_ref("a")));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(equal(field_ref("a"), literal(1))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(equal(field_ref("a"), field_ref("b"))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(
+      or_(equal(field_ref("a"), literal(1)), equal(field_ref("a"), literal(2)))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(
+      or_(equal(field_ref("a"), literal(1)), equal(field_ref("b"), literal(2)))));
+
+  EXPECT_TRUE(ExpressionHasFieldRefs(or_(
+      and_(not_(equal(field_ref("a"), literal(1))), equal(field_ref("b"), literal(2))),
+      not_(less(field_ref("c"), literal(3))))));
+}
+
 TEST(Expression, BindLiteral) {
   for (Datum dat : {
            Datum(3),
diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index ab0600dd1a8..bfb3c4b0596 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -52,6 +52,11 @@ Result<std::shared_ptr<Schema>> Fragment::ReadPhysicalSchema() {
   return physical_schema_;
 }
 
+Future<util::optional<int64_t>> Fragment::CountRows(compute::Expression,
+                                                    std::shared_ptr<ScanOptions>) {
+  return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+}
+
 Result<std::shared_ptr<Schema>> InMemoryFragment::ReadPhysicalSchemaImpl() {
   return physical_schema_;
 }
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 40a60ffd48e..30699accd2d 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -30,6 +30,7 @@
 #include "arrow/dataset/visibility.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/mutex.h"
+#include "arrow/util/optional.h"
 
 namespace arrow {
 namespace dataset {
@@ -70,6 +71,14 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
   virtual Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options) = 0;
 
+  /// \brief Count the number of rows in this fragment matching the filter using metadata
+  /// only. That is, this method may perform I/O, but will not load data.
+  ///
+  /// If this is not possible, resolve with an empty optional. The fragment can perform
+  /// I/O (e.g. to read metadata) before it deciding whether it can satisfy the request.
+  virtual Future<util::optional<int64_t>> CountRows(compute::Expression predicate,
+                                                    std::shared_ptr<ScanOptions> options);
+
   virtual std::string type_name() const = 0;
   virtual std::string ToString() const { return type_name(); }
 
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 1f47fc8ae86..b3ee09b5b7d 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -84,6 +84,12 @@ Result<std::shared_ptr<io::InputStream>> FileSource::OpenCompressed(
   return io::CompressedInputStream::Make(codec.get(), std::move(file));
 }
 
+Future<util::optional<int64_t>> FileFormat::CountRows(
+    const std::shared_ptr<FileFragment>&, compute::Expression,
+    std::shared_ptr<ScanOptions>) {
+  return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+}
+
 Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
     FileSource source, std::shared_ptr<Schema> physical_schema) {
   return MakeFragment(std::move(source), compute::literal(true),
@@ -169,6 +175,17 @@ Result<RecordBatchGenerator> FileFragment::ScanBatchesAsync(
   return format_->ScanBatchesAsync(options, self);
 }
 
+Future<util::optional<int64_t>> FileFragment::CountRows(
+    compute::Expression predicate, std::shared_ptr<ScanOptions> options) {
+  ARROW_ASSIGN_OR_RAISE(predicate, compute::SimplifyWithGuarantee(std::move(predicate),
+                                                                  partition_expression_));
+  if (!predicate.IsSatisfiable()) {
+    return Future<util::optional<int64_t>>::MakeFinished(0);
+  }
+  auto self = internal::checked_pointer_cast<FileFragment>(shared_from_this());
+  return format()->CountRows(self, std::move(predicate), std::move(options));
+}
+
 struct FileSystemDataset::FragmentSubtrees {
   // Forest for skipping fragments based on extracted subtree expressions
   Forest forest;
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 41f7933aca9..4119dfb75e1 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -155,6 +155,9 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   virtual Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file);
+  virtual Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      std::shared_ptr<ScanOptions> options);
 
   /// \brief Open a fragment
   virtual Result<std::shared_ptr<FileFragment>> MakeFragment(
@@ -184,6 +187,8 @@ class ARROW_DS_EXPORT FileFragment : public Fragment {
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
   Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options) override;
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, std::shared_ptr<ScanOptions> options) override;
 
   std::string type_name() const override { return format_->type_name(); }
   std::string ToString() const override { return source_.path(); };
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index 49893cde6d9..a60e31bf7d2 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -173,6 +173,20 @@ Result<ScanTaskIterator> IpcFileFormat::ScanFile(
   return IpcScanTaskIterator::Make(options, fragment);
 }
 
+Future<util::optional<int64_t>> IpcFileFormat::CountRows(
+    const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+    std::shared_ptr<ScanOptions> options) {
+  if (ExpressionHasFieldRefs(predicate)) {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  auto self = internal::checked_pointer_cast<IpcFileFormat>(shared_from_this());
+  return DeferNotOk(options->io_context.executor()->Submit(
+      [self, file]() -> Result<util::optional<int64_t>> {
+        ARROW_ASSIGN_OR_RAISE(auto reader, OpenReader(file->source()));
+        return reader->CountRows();
+      }));
+}
+
 //
 // IpcFileWriter, IpcFileWriteOptions
 //
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index 2c65078c754..d1c16a93cf4 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -56,6 +56,10 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
       const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      std::shared_ptr<ScanOptions> options) override;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
       std::shared_ptr<FileWriteOptions> options) const override;
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index 24fed4795da..561ef00ae0b 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -89,6 +89,7 @@ TEST_F(TestIpcFileFormat, InspectFailureWithRelevantError) {
 }
 TEST_F(TestIpcFileFormat, Inspect) { TestInspect(); }
 TEST_F(TestIpcFileFormat, IsSupported) { TestIsSupported(); }
+TEST_F(TestIpcFileFormat, CountRows) { TestCountRows(); }
 
 class TestIpcFileSystemDataset : public testing::Test,
                                  public WriteFileSystemDatasetMixin {
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 8dbc18059b3..94bf5355dc4 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compute/exec.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/scanner.h"
 #include "arrow/filesystem/path_util.h"
@@ -185,8 +186,8 @@ static util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
   auto field_expr = compute::field_ref(field->name());
 
   // Optimize for corner case where all values are nulls
-  if (statistics->num_values() == statistics->null_count()) {
-    return equal(std::move(field_expr), compute::literal(MakeNullScalar(field->type())));
+  if (statistics->num_values() == 0 && statistics->null_count() > 0) {
+    return is_null(std::move(field_expr));
   }
 
   std::shared_ptr<Scalar> min, max;
@@ -197,12 +198,16 @@ static util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
   auto maybe_min = min->CastTo(field->type());
   auto maybe_max = max->CastTo(field->type());
   if (maybe_min.ok() && maybe_max.ok()) {
-    auto lower_bound =
-        compute::greater_equal(field_expr, compute::literal(maybe_min.MoveValueUnsafe()));
-
-    auto upper_bound = compute::less_equal(std::move(field_expr),
-                                           compute::literal(maybe_max.MoveValueUnsafe()));
+    auto col_min = maybe_min.MoveValueUnsafe();
+    auto col_max = maybe_max.MoveValueUnsafe();
+    if (col_min->Equals(col_max)) {
+      return compute::equal(std::move(field_expr), compute::literal(std::move(col_min)));
+    }
 
+    auto lower_bound =
+        compute::greater_equal(field_expr, compute::literal(std::move(col_min)));
+    auto upper_bound =
+        compute::less_equal(std::move(field_expr), compute::literal(std::move(col_max)));
     return compute::and_(std::move(lower_bound), std::move(upper_bound));
   }
 
@@ -385,6 +390,23 @@ Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
   return MakeVectorIterator(std::move(tasks));
 }
 
+Future<util::optional<int64_t>> ParquetFileFormat::CountRows(
+    const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+    std::shared_ptr<ScanOptions> options) {
+  auto parquet_file = internal::checked_pointer_cast<ParquetFileFragment>(file);
+  if (parquet_file->metadata()) {
+    ARROW_ASSIGN_OR_RAISE(auto maybe_count,
+                          parquet_file->TryCountRows(std::move(predicate)));
+    return Future<util::optional<int64_t>>::MakeFinished(maybe_count);
+  } else {
+    return DeferNotOk(options->io_context.executor()->Submit(
+        [parquet_file, predicate]() -> Result<util::optional<int64_t>> {
+          RETURN_NOT_OK(parquet_file->EnsureCompleteMetadata());
+          return parquet_file->TryCountRows(predicate);
+        }));
+  }
+}
+
 Result<std::shared_ptr<ParquetFileFragment>> ParquetFileFormat::MakeFragment(
     FileSource source, compute::Expression partition_expression,
     std::shared_ptr<Schema> physical_schema, std::vector<int> row_groups) {
@@ -559,6 +581,21 @@ inline void FoldingAnd(compute::Expression* l, compute::Expression r) {
 
 Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(
     compute::Expression predicate) {
+  std::vector<int> row_groups;
+  ARROW_ASSIGN_OR_RAISE(auto expressions, TestRowGroups(std::move(predicate)));
+
+  auto lock = physical_schema_mutex_.Lock();
+  DCHECK(expressions.empty() || (expressions.size() == row_groups_->size()));
+  for (size_t i = 0; i < expressions.size(); i++) {
+    if (expressions[i].IsSatisfiable()) {
+      row_groups.push_back(row_groups_->at(i));
+    }
+  }
+  return row_groups;
+}
+
+Result<std::vector<compute::Expression>> ParquetFileFragment::TestRowGroups(
+    compute::Expression predicate) {
   auto lock = physical_schema_mutex_.Lock();
 
   DCHECK_NE(metadata_, nullptr);
@@ -566,7 +603,7 @@ Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(
       predicate, SimplifyWithGuarantee(std::move(predicate), partition_expression_));
 
   if (!predicate.IsSatisfiable()) {
-    return std::vector<int>{};
+    return std::vector<compute::Expression>{};
   }
 
   for (const FieldRef& ref : FieldsInExpression(predicate)) {
@@ -592,18 +629,35 @@ Result<std::vector<int>> ParquetFileFragment::FilterRowGroups(
     }
   }
 
-  std::vector<int> row_groups;
+  std::vector<compute::Expression> row_groups(row_groups_->size());
   for (size_t i = 0; i < row_groups_->size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(auto row_group_predicate,
                           SimplifyWithGuarantee(predicate, statistics_expressions_[i]));
-    if (row_group_predicate.IsSatisfiable()) {
-      row_groups.push_back(row_groups_->at(i));
-    }
+    row_groups[i] = std::move(row_group_predicate);
   }
-
   return row_groups;
 }
 
+Result<util::optional<int64_t>> ParquetFileFragment::TryCountRows(
+    compute::Expression predicate) {
+  DCHECK_NE(metadata_, nullptr);
+  if (ExpressionHasFieldRefs(predicate)) {
+    ARROW_ASSIGN_OR_RAISE(auto expressions, TestRowGroups(std::move(predicate)));
+    int64_t rows = 0;
+    for (size_t i = 0; i < row_groups_->size(); i++) {
+      // If the row group is entirely excluded, exclude it from the row count
+      if (!expressions[i].IsSatisfiable()) continue;
+      // Unless the row group is entirely included, bail out of fast path
+      if (expressions[i] != compute::literal(true)) return util::nullopt;
+      BEGIN_PARQUET_CATCH_EXCEPTIONS
+      rows += metadata()->RowGroup((*row_groups_)[i])->num_rows();
+      END_PARQUET_CATCH_EXCEPTIONS
+    }
+    return rows;
+  }
+  return metadata()->num_rows();
+}
+
 //
 // ParquetFragmentScanOptions
 //
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 99e18337ad7..f49637b13a1 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -99,6 +99,10 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
       const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file) const override;
 
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      std::shared_ptr<ScanOptions> options) override;
+
   using FileFormat::MakeFragment;
 
   /// \brief Create a Fragment targeting all RowGroups.
@@ -172,6 +176,12 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
 
   /// Return a filtered subset of row group indices.
   Result<std::vector<int>> FilterRowGroups(compute::Expression predicate);
+  /// Simplify the predicate against the statistics of each row group.
+  Result<std::vector<compute::Expression>> TestRowGroups(compute::Expression predicate);
+  /// Try to count rows matching the predicate using metadata. Expects
+  /// metadata to be present, and expects the predicate to have been
+  /// simplified against the partition expression already.
+  Result<util::optional<int64_t>> TryCountRows(compute::Expression predicate);
 
   ParquetFileFormat& parquet_format_;
 
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 599c6240c1c..7722d4da885 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -220,6 +220,69 @@ TEST_F(TestParquetFileFormat, WriteRecordBatchReaderCustomOptions) {
                     *actual_schema);
 }
 
+TEST_F(TestParquetFileFormat, CountRows) { TestCountRows(); }
+
+TEST_F(TestParquetFileFormat, CountRowsPredicatePushdown) {
+  constexpr int64_t kNumRowGroups = 16;
+  constexpr int64_t kTotalNumRows = kNumRowGroups * (kNumRowGroups + 1) / 2;
+
+  // See PredicatePushdown test below for a description of the generated data
+  auto reader = ArithmeticDatasetFixture::GetRecordBatchReader(kNumRowGroups);
+  auto source = GetFileSource(reader.get());
+  auto options = std::make_shared<ScanOptions>();
+
+  auto fragment = MakeFragment(*source);
+
+  ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(kTotalNumRows),
+                            fragment->CountRows(literal(true), options));
+
+  for (int i = 1; i <= kNumRowGroups; i++) {
+    SCOPED_TRACE(i);
+    // The row group for which all values in column i64 == i has i rows
+    auto predicate = less_equal(field_ref("i64"), literal(i));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*reader->schema()));
+    auto expected = i * (i + 1) / 2;
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected),
+                              fragment->CountRows(predicate, options));
+
+    predicate = and_(less_equal(field_ref("i64"), literal(i)),
+                     greater_equal(field_ref("i64"), literal(i)));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*reader->schema()));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(i),
+                              fragment->CountRows(predicate, options));
+
+    predicate = equal(field_ref("i64"), literal(i));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*reader->schema()));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(i),
+                              fragment->CountRows(predicate, options));
+  }
+
+  // Ensure nulls are properly handled
+  {
+    auto dataset_schema = schema({field("i64", int64())});
+    auto null_batch = RecordBatchFromJSON(dataset_schema, R"([
+[null],
+[null],
+[null]
+])");
+    auto batch = RecordBatchFromJSON(dataset_schema, R"([
+[1],
+[2]
+])");
+    ASSERT_OK_AND_ASSIGN(auto reader,
+                         RecordBatchReader::Make({null_batch, batch}, dataset_schema));
+    auto source = GetFileSource(reader.get());
+    auto fragment = MakeFragment(*source);
+    ASSERT_OK_AND_ASSIGN(
+        auto predicate,
+        greater_equal(field_ref("i64"), literal(1)).Bind(*dataset_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(2),
+                              fragment->CountRows(predicate, options));
+    // TODO(ARROW-12659): SimplifyWithGuarantee can't handle
+    // not(is_null) so trying to count with is_null doesn't work
+  }
+}
+
 class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
                                      public testing::Test {
  public:
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 41fa7ec5c77..43239a676e5 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -153,6 +153,17 @@ Result<EnumeratedRecordBatchIterator> Scanner::AddPositioningToInOrderScan(
       EnumeratingIterator{std::make_shared<State>(std::move(scan), std::move(first))});
 }
 
+Result<int64_t> Scanner::CountRows() {
+  // Naive base implementation
+  ARROW_ASSIGN_OR_RAISE(auto batch_it, ScanBatchesUnordered());
+  int64_t count = 0;
+  RETURN_NOT_OK(batch_it.Visit([&](EnumeratedRecordBatch batch) {
+    count += batch.record_batch.value->num_rows();
+    return Status::OK();
+  }));
+  return count;
+}
+
 struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState> {
   explicit ScanBatchesState(ScanTaskIterator scan_task_it,
                             std::shared_ptr<TaskGroup> task_group_)
@@ -286,10 +297,12 @@ class ARROW_DS_EXPORT SyncScanner : public Scanner {
   Result<std::shared_ptr<Table>> ToTable() override;
   Result<TaggedRecordBatchGenerator> ScanBatchesAsync() override;
   Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() override;
+  Result<int64_t> CountRows() override;
 
  protected:
   /// \brief GetFragments returns an iterator over all Fragments in this scan.
   Result<FragmentIterator> GetFragments();
+  Result<TaggedRecordBatchIterator> ScanBatches(ScanTaskIterator scan_task_it);
   Future<std::shared_ptr<Table>> ToTableInternal(internal::Executor* cpu_executor);
   Result<ScanTaskIterator> ScanInternal();
 
@@ -300,6 +313,11 @@ class ARROW_DS_EXPORT SyncScanner : public Scanner {
 
 Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanInternal());
+  return ScanBatches(std::move(scan_task_it));
+}
+
+Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches(
+    ScanTaskIterator scan_task_it) {
   auto task_group = scan_options_->TaskGroup();
   auto state = std::make_shared<ScanBatchesState>(std::move(scan_task_it), task_group);
   for (int i = 0; i < scan_options_->fragment_readahead; i++) {
@@ -388,6 +406,7 @@ class ARROW_DS_EXPORT AsyncScanner : public Scanner,
   Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered() override;
   Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() override;
   Result<std::shared_ptr<Table>> ToTable() override;
+  Result<int64_t> CountRows() override;
 
  private:
   Result<TaggedRecordBatchGenerator> ScanBatchesAsync(internal::Executor* executor);
@@ -466,9 +485,9 @@ inline EnumeratedRecordBatchGenerator FilterAndProjectRecordBatchAsync(
 
 Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
     std::shared_ptr<AsyncScanner> scanner,
-    const Enumerated<std::shared_ptr<Fragment>>& fragment) {
-  ARROW_ASSIGN_OR_RAISE(auto batch_gen,
-                        fragment.value->ScanBatchesAsync(scanner->options()));
+    const Enumerated<std::shared_ptr<Fragment>>& fragment,
+    const std::shared_ptr<ScanOptions>& options) {
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen, fragment.value->ScanBatchesAsync(options));
   auto enumerated_batch_gen = MakeEnumeratedGenerator(std::move(batch_gen));
 
   auto combine_fn =
@@ -488,10 +507,43 @@ Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
   return MakeMappedGenerator<EnumeratedRecordBatchGenerator>(
       std::move(enumerated_fragment_gen),
       [scanner](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
-        return FragmentToBatches(scanner, fragment);
+        return FragmentToBatches(scanner, fragment, scanner->options());
       });
 }
 
+Result<AsyncGenerator<AsyncGenerator<util::optional<int64_t>>>> FragmentsToRowCount(
+    std::shared_ptr<AsyncScanner> scanner, FragmentGenerator fragment_gen) {
+  // Must use optional<int64_t> to avoid breaking the pipeline on empty batches
+  auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
+  auto options = std::make_shared<ScanOptions>(*scanner->options());
+  RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+  auto count_fragment_fn =
+      [scanner, options](const Enumerated<std::shared_ptr<Fragment>>& fragment)
+      -> Result<AsyncGenerator<util::optional<int64_t>>> {
+    auto count_fut = fragment.value->CountRows(options->filter, options);
+    return MakeFromFuture(
+        count_fut.Then([=](util::optional<int64_t> val)
+                           -> Result<AsyncGenerator<util::optional<int64_t>>> {
+          // Fast path
+          if (val.has_value()) {
+            return MakeSingleFutureGenerator(
+                Future<util::optional<int64_t>>::MakeFinished(val));
+          }
+          // Slow path
+          ARROW_ASSIGN_OR_RAISE(auto batch_gen,
+                                FragmentToBatches(scanner, fragment, options));
+          auto count_fn =
+              [](const EnumeratedRecordBatch& enumerated) -> util::optional<int64_t> {
+            return enumerated.record_batch.value->num_rows();
+          };
+          return MakeMappedGenerator<util::optional<int64_t>>(batch_gen,
+                                                              std::move(count_fn));
+        }));
+  };
+  return MakeMappedGenerator<AsyncGenerator<util::optional<int64_t>>>(
+      std::move(enumerated_fragment_gen), std::move(count_fragment_fn));
+}
+
 }  // namespace
 
 Result<FragmentGenerator> AsyncScanner::GetFragments() const {
@@ -651,6 +703,23 @@ Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
       });
 }
 
+Result<int64_t> AsyncScanner::CountRows() {
+  auto self = shared_from_this();
+  ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
+  ARROW_ASSIGN_OR_RAISE(auto count_gen_gen,
+                        FragmentsToRowCount(self, std::move(fragment_gen)));
+  auto count_gen = MakeConcatenatedGenerator(std::move(count_gen_gen));
+  int64_t total = 0;
+  auto sum_fn = [&total](util::optional<int64_t> count) -> Status {
+    if (count.has_value()) total += *count;
+    return Status::OK();
+  };
+  RETURN_NOT_OK(VisitAsyncGenerator<util::optional<int64_t>>(std::move(count_gen),
+                                                             std::move(sum_fn))
+                    .status());
+  return total;
+}
+
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset)
     : ScannerBuilder(std::move(dataset), std::make_shared<ScanOptions>()) {}
 
@@ -912,5 +981,48 @@ Result<std::shared_ptr<Table>> Scanner::Head(int64_t num_rows) {
   return Table::FromRecordBatches(options()->projected_schema, batches);
 }
 
+Result<int64_t> SyncScanner::CountRows() {
+  // While readers could implement an optimization where they just fabricate empty
+  // batches based on metadata when no columns are selected, skipping I/O (and
+  // indeed, the Parquet reader does this), counting rows using that optimization is
+  // still slower than just hitting metadata directly where possible.
+  ARROW_ASSIGN_OR_RAISE(auto fragment_it, GetFragments());
+  std::vector<Future<int64_t>> futures;
+  FragmentVector fragments;
+  for (auto maybe_fragment : fragment_it) {
+    ARROW_ASSIGN_OR_RAISE(auto fragment, maybe_fragment);
+    auto count_fut = fragment->CountRows(scan_options_->filter, scan_options_);
+    // Take fragments by reference since future must complete before method returns
+    futures.push_back(
+        count_fut.Then([&fragments, fragment](util::optional<int64_t> count) -> int64_t {
+          if (count.has_value()) {
+            return *count;
+          }
+          fragments.push_back(fragment);
+          return 0;
+        }));
+  }
+
+  int64_t count = 0;
+  for (auto& future : futures) {
+    ARROW_ASSIGN_OR_RAISE(auto subcount, future.result());
+    count += subcount;
+  }
+  // Now check for any fragments where we couldn't take the fast path
+  if (!fragments.empty()) {
+    auto options = std::make_shared<ScanOptions>(*scan_options_);
+    RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+    ARROW_ASSIGN_OR_RAISE(
+        auto scan_task_it,
+        GetScanTaskIterator(MakeVectorIterator(std::move(fragments)), options));
+    ARROW_ASSIGN_OR_RAISE(auto batch_it, ScanBatches(std::move(scan_task_it)));
+    RETURN_NOT_OK(batch_it.Visit([&](TaggedRecordBatch batch) {
+      count += batch.record_batch->num_rows();
+      return Status::OK();
+    }));
+  }
+  return count;
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 15bd27ab4f3..99833c95bf0 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -285,6 +285,11 @@ class ARROW_DS_EXPORT Scanner {
   virtual Result<std::shared_ptr<Table>> TakeRows(const Array& indices);
   /// \brief Get the first N rows.
   virtual Result<std::shared_ptr<Table>> Head(int64_t num_rows);
+  /// \brief Count rows matching a predicate.
+  ///
+  /// This method will push down the predicate and compute the result based on fragment
+  /// metadata if possible.
+  virtual Result<int64_t> CountRows();
 
   /// \brief Get the options for this scan.
   const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 17f4e079ae4..b7c68daa0fc 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -321,6 +321,108 @@ TEST_P(TestScanner, TakeIndices) {
   }
 }
 
+TEST_P(TestScanner, CountRows) {
+  const auto items_per_batch = GetParam().items_per_batch;
+  const auto num_batches = GetParam().num_batches;
+  const auto num_datasets = GetParam().num_child_datasets;
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  ArrayVector arrays(2);
+  ArrayFromVector<Int32Type>(
+      internal::Iota<int32_t>(static_cast<int32_t>(items_per_batch)), &arrays[0]);
+  ArrayFromVector<DoubleType>(
+      internal::Iota<double>(static_cast<double>(items_per_batch)), &arrays[1]);
+  auto batch = RecordBatch::Make(schema_, items_per_batch, arrays);
+  auto scanner = MakeScanner(batch);
+
+  ASSERT_OK_AND_ASSIGN(auto rows, scanner->CountRows());
+  ASSERT_EQ(rows, num_datasets * num_batches * items_per_batch);
+
+  ASSERT_OK_AND_ASSIGN(options_->filter,
+                       greater_equal(field_ref("i32"), literal(64)).Bind(*schema_));
+  ASSERT_OK_AND_ASSIGN(rows, scanner->CountRows());
+  ASSERT_EQ(rows, num_datasets * num_batches * (items_per_batch - 64));
+}
+
+class CountRowsOnlyFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+
+  Future<util::optional<int64_t>> CountRows(compute::Expression predicate,
+                                            std::shared_ptr<ScanOptions>) override {
+    if (compute::FieldsInExpression(predicate).size() > 0) {
+      return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+    }
+    int64_t sum = 0;
+    for (const auto& batch : record_batches_) {
+      sum += batch->num_rows();
+    }
+    return Future<util::optional<int64_t>>::MakeFinished(sum);
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions>) override {
+    return Status::Invalid("Don't scan me!");
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>&) override {
+    return Status::Invalid("Don't scan me!");
+  }
+};
+
+class ScanOnlyFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+
+  Future<util::optional<int64_t>> CountRows(compute::Expression predicate,
+                                            std::shared_ptr<ScanOptions>) override {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    auto self = shared_from_this();
+    ScanTaskVector tasks{
+        std::make_shared<InMemoryScanTask>(record_batches_, options, self)};
+    return MakeVectorIterator(std::move(tasks));
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>&) override {
+    return MakeVectorGenerator(record_batches_);
+  }
+};
+
+// Ensure the pipeline does not break on an empty batch
+TEST_P(TestScanner, CountRowsEmpty) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto empty_batch = ConstantArrayGenerator::Zeroes(0, schema_);
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {empty_batch, batch};
+  ScannerBuilder builder(
+      std::make_shared<FragmentDataset>(
+          schema_, FragmentVector{std::make_shared<ScanOnlyFragment>(batches)}),
+      options_);
+  ASSERT_OK(builder.UseAsync(GetParam().use_async));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+  ASSERT_OK_AND_EQ(batch->num_rows(), scanner->CountRows());
+}
+
+TEST_P(TestScanner, CountRowsWithMetadata) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {batch, batch, batch, batch};
+  ScannerBuilder builder(
+      std::make_shared<FragmentDataset>(
+          schema_, FragmentVector{std::make_shared<CountRowsOnlyFragment>(batches)}),
+      options_);
+  ASSERT_OK(builder.UseAsync(GetParam().use_async));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+  ASSERT_OK_AND_EQ(4 * batch->num_rows(), scanner->CountRows());
+
+  ASSERT_OK(builder.Filter(equal(field_ref("i32"), literal(5))));
+  ASSERT_OK_AND_ASSIGN(scanner, builder.Finish());
+  // Scanner should fall back on reading data and hit the error
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("Don't scan me!"),
+                                  scanner->CountRows());
+}
+
 class FailingFragment : public InMemoryFragment {
  public:
   using InMemoryFragment::InMemoryFragment;
@@ -508,7 +610,10 @@ TEST_P(TestScanner, Head) {
 }
 
 INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner,
-                         ::testing::ValuesIn(TestScannerParams::Values()));
+                         ::testing::ValuesIn(TestScannerParams::Values()),
+                         [](const ::testing::TestParamInfo<TestScannerParams>& info) {
+                           return std::to_string(info.index) + info.param.ToString();
+                         });
 
 /// These ControlledXyz classes allow for controlling the order in which things are
 /// delivered so that we can test out of order resequencing.  The dataset allows
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 42c544dd93e..1e4222eec8c 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -42,6 +42,7 @@
 #include "arrow/filesystem/test_util.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
@@ -363,6 +364,13 @@ class FileFormatFixtureMixin : public ::testing::Test {
     return fragment;
   }
 
+  std::shared_ptr<FileFragment> MakeFragment(const FileSource& source,
+                                             compute::Expression partition_expression) {
+    EXPECT_OK_AND_ASSIGN(auto fragment,
+                         format_->MakeFragment(source, partition_expression));
+    return fragment;
+  }
+
   std::shared_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
     EXPECT_OK_AND_ASSIGN(auto buffer, FormatHelper::Write(reader));
     return std::make_shared<FileSource>(std::move(buffer));
@@ -450,6 +458,34 @@ class FileFormatFixtureMixin : public ::testing::Test {
     auto written = this->WriteToBuffer(reader->schema());
     AssertBufferEqual(*written, *source->buffer());
   }
+  void TestCountRows() {
+    auto options = std::make_shared<ScanOptions>();
+    auto reader = this->GetRecordBatchReader(schema({field("f64", float64())}));
+    auto full_schema = schema({field("f64", float64()), field("part", int64())});
+    auto source = this->GetFileSource(reader.get());
+
+    auto fragment = this->MakeFragment(*source);
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected_rows()),
+                              fragment->CountRows(literal(true), options));
+
+    fragment = this->MakeFragment(*source, equal(field_ref("part"), literal(2)));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected_rows()),
+                              fragment->CountRows(literal(true), options));
+
+    auto predicate = equal(field_ref("part"), literal(1));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*full_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(0),
+                              fragment->CountRows(predicate, options));
+
+    predicate = equal(field_ref("part"), literal(2));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*full_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::make_optional<int64_t>(expected_rows()),
+                              fragment->CountRows(predicate, options));
+
+    predicate = equal(call("add", {field_ref("f64"), literal(3)}), literal(2));
+    ASSERT_OK_AND_ASSIGN(predicate, predicate.Bind(*full_schema));
+    ASSERT_FINISHES_OK_AND_EQ(util::nullopt, fragment->CountRows(predicate, options));
+  }
 
  protected:
   std::shared_ptr<typename FormatHelper::FormatType> format_;
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 16ac0fe9c23..06ad0c89f2b 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -995,6 +995,25 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     return batch;
   }
 
+  Result<int64_t> CountRows() override {
+    int64_t total = 0;
+    for (int i = 0; i < num_record_batches(); i++) {
+      ARROW_ASSIGN_OR_RAISE(auto outer_message,
+                            ReadMessageFromBlock(GetRecordBatchBlock(i)));
+      auto metadata = outer_message->metadata();
+      const flatbuf::Message* message = nullptr;
+      RETURN_NOT_OK(
+          internal::VerifyMessage(metadata->data(), metadata->size(), &message));
+      auto batch = message->header_as_RecordBatch();
+      if (batch == nullptr) {
+        return Status::IOError(
+            "Header-type of flatbuffer-encoded Message is not RecordBatch.");
+      }
+      total += batch->length();
+    }
+    return total;
+  }
+
   Status Open(const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
               const IpcReadOptions& options) {
     owned_file_ = file;
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index fe9a3b72e16..38f7f2ed8b9 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -169,6 +169,9 @@ class ARROW_EXPORT RecordBatchFileReader {
 
   /// \brief Return current read statistics
   virtual ReadStats stats() const = 0;
+
+  /// \brief Computes the total number of rows in the file.
+  virtual Result<int64_t> CountRows() = 0;
 };
 
 /// \brief A general listener class to receive events.
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index cf076f6536b..eaeebdcde60 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -410,22 +410,24 @@ cdef class Dataset(_Weakrefable):
 
         Selecting a subset of the columns:
 
-        >>> dataset.scan(columns=["A", "B"])
+        >>> dataset.scanner(columns=["A", "B"]).to_table()
 
         Projecting selected columns using an expression:
 
-        >>> dataset.scan(columns={"A_int": ds.field("A").cast("int64")})
+        >>> dataset.scanner(columns={
+        ...     "A_int": ds.field("A").cast("int64"),
+        ... }).to_table()
 
         Filtering rows while scanning:
 
-        >>> dataset.scan(filter=ds.field("A") > 0)
+        >>> dataset.scanner(filter=ds.field("A") > 0).to_table()
         """
         return Scanner.from_dataset(self, **kwargs)
 
     def to_batches(self, **kwargs):
         """Read the dataset as materialized record batches.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
@@ -439,7 +441,7 @@ cdef class Dataset(_Weakrefable):
         Note that this method reads all the selected data from the dataset
         into memory.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
@@ -447,10 +449,21 @@ cdef class Dataset(_Weakrefable):
         """
         return self.scanner(**kwargs).to_table()
 
+    def take(self, object indices, **kwargs):
+        """Select rows of data by index.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self.scanner(**kwargs).take(indices)
+
     def head(self, int num_rows, **kwargs):
         """Load the first N rows of the dataset.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
@@ -458,6 +471,17 @@ cdef class Dataset(_Weakrefable):
         """
         return self.scanner(**kwargs).head(num_rows)
 
+    def count_rows(self, **kwargs):
+        """Count rows matching the scanner filter.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        count : int
+        """
+        return self.scanner(**kwargs).count_rows()
+
     @property
     def schema(self):
         """The common schema of the full Dataset"""
@@ -962,7 +986,7 @@ cdef class Fragment(_Weakrefable):
     def to_batches(self, Schema schema=None, **kwargs):
         """Read the fragment as materialized record batches.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
@@ -976,7 +1000,7 @@ cdef class Fragment(_Weakrefable):
         Use this convenience utility with care. This will serially materialize
         the Scan result in memory before creating the Table.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
@@ -984,10 +1008,21 @@ cdef class Fragment(_Weakrefable):
         """
         return self.scanner(schema=schema, **kwargs).to_table()
 
+    def take(self, object indices, **kwargs):
+        """Select rows of data by index.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        table : Table instance
+        """
+        return self.scanner(**kwargs).take(indices)
+
     def head(self, int num_rows, **kwargs):
         """Load the first N rows of the fragment.
 
-        See scan method parameters documentation.
+        See scanner method parameters documentation.
 
         Returns
         -------
@@ -995,6 +1030,17 @@ cdef class Fragment(_Weakrefable):
         """
         return self.scanner(**kwargs).head(num_rows)
 
+    def count_rows(self, **kwargs):
+        """Count rows matching the scanner filter.
+
+        See scanner method parameters documentation.
+
+        Returns
+        -------
+        count : int
+        """
+        return self.scanner(**kwargs).count_rows()
+
 
 cdef class FileFragment(Fragment):
     """A Fragment representing a data file."""
@@ -2838,6 +2884,18 @@ cdef class Scanner(_Weakrefable):
             result = self.scanner.Head(num_rows)
         return pyarrow_wrap_table(GetResultValue(result))
 
+    def count_rows(self):
+        """Count rows matching the scanner filter.
+
+        Returns
+        -------
+        count : int
+        """
+        cdef CResult[int64_t] result
+        with nogil:
+            result = self.scanner.CountRows()
+        return GetResultValue(result)
+
 
 def _get_partition_keys(Expression partition_expression):
     """
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index bff1a2bbb54..7a02e0e4ba6 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -113,6 +113,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CResult[shared_ptr[CTable]] ToTable()
         CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
         CResult[shared_ptr[CTable]] Head(int64_t num_rows)
+        CResult[int64_t] CountRows()
         CResult[CFragmentIterator] GetFragments()
         const shared_ptr[CScanOptions]& options()
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 8791c22f103..8c9dc226630 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -355,6 +355,8 @@ def test_scanner(dataset):
     with pytest.raises(pa.ArrowIndexError):
         scanner.take(pa.array([table.num_rows]))
 
+    assert table.num_rows == scanner.count_rows()
+
 
 def test_head(dataset):
     result = dataset.head(0)
@@ -378,6 +380,32 @@ def test_head(dataset):
     assert result == {'i64': list(range(5))}
 
 
+def test_take(dataset):
+    fragment = next(dataset.get_fragments())
+    indices = pa.array([1, 3])
+    assert fragment.take(indices) == fragment.to_table().take(indices)
+    with pytest.raises(IndexError):
+        fragment.take(pa.array([5]))
+
+    indices = pa.array([1, 7])
+    assert dataset.take(indices) == dataset.to_table().take(indices)
+    with pytest.raises(IndexError):
+        dataset.take(pa.array([10]))
+
+
+def test_count_rows(dataset):
+    fragment = next(dataset.get_fragments())
+    assert fragment.count_rows() == 5
+    assert fragment.count_rows(filter=ds.field("i64") == 4) == 1
+
+    assert dataset.count_rows() == 10
+    # Filter on partition key
+    assert dataset.count_rows(filter=ds.field("group") == 1) == 5
+    # Filter on data
+    assert dataset.count_rows(filter=ds.field("i64") >= 3) == 4
+    assert dataset.count_rows(filter=ds.field("i64") < 0) == 0
+
+
 def test_abstract_classes():
     classes = [
         ds.FileFormat,
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index b5a2f525a01..0063836970e 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -548,6 +548,10 @@ dataset___Scanner__TakeRows <- function(scanner, indices){
     .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
 }
 
+dataset___Scanner__CountRows <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
+}
+
 Int8__initialize <- function(){
     .Call(`_arrow_Int8__initialize`)
 }
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 750401e1736..84949bbd397 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -56,7 +56,8 @@
 Scanner <- R6Class("Scanner", inherit = ArrowObject,
   public = list(
     ToTable = function() dataset___Scanner__ToTable(self),
-    ScanBatches = function() dataset___Scanner__ScanBatches(self)
+    ScanBatches = function() dataset___Scanner__ScanBatches(self),
+    CountRows = function() dataset___Scanner__CountRows(self)
   ),
   active = list(
     schema = function() dataset___Scanner__schema(self)
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 266633964b1..745c39af068 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -173,10 +173,7 @@ Dataset <- R6Class("Dataset", inherit = ArrowObject,
       }
     },
     metadata = function() self$schema$metadata,
-    num_rows = function() {
-      warning("Number of rows unknown; returning NA", call. = FALSE)
-      NA_integer_
-    },
+    num_rows = function() self$NewScan()$Finish()$CountRows(),
     num_cols = function() length(self$schema),
     # @description
     # Return the Dataset's type.
@@ -220,20 +217,6 @@ FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset,
     # Return the filesystem of files in this `Dataset`
     filesystem = function() {
       dataset___FileSystemDataset__filesystem(self)
-    },
-    num_rows = function() {
-      if (inherits(self$format, "ParquetFileFormat")) {
-        # It's generally fast enough to skim the files directly
-        sum(map_int(self$files, ~ParquetFileReader$create(.x)$num_rows))
-      } else {
-        # TODO: implement for other file formats
-        warning("Number of rows unknown; returning NA", call. = FALSE)
-        NA_integer_
-        # Could do a scan, picking only the last column, which hopefully is virtual
-        # But this is can be slow
-        # Scanner$create(self, projection = tail(names(self), 1))$ToTable()$num_rows
-        # See also https://issues.apache.org/jira/browse/ARROW-9697
-      }
     }
   )
 )
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index c172e9ba065..21266d39b26 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -147,9 +147,8 @@ dim.arrow_dplyr_query <- function(x) {
   if (isTRUE(x$filtered)) {
     rows <- x$.data$num_rows
   } else if (query_on_dataset(x)) {
-    warning("Number of rows unknown; returning NA", call. = FALSE)
-    # TODO: https://issues.apache.org/jira/browse/ARROW-9697
-    rows <- NA_integer_
+    scanner <- Scanner$create(x)
+    rows <- scanner$CountRows()
   } else {
     # Evaluate the filter expression to a BooleanArray and count
     rows <- as.integer(sum(eval_array_expression(x$filtered_rows, x$.data), na.rm = TRUE))
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index ef6d3063771..b274ac5f3af 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2161,6 +2161,21 @@ extern "C" SEXP _arrow_dataset___Scanner__TakeRows(SEXP scanner_sexp, SEXP indic
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+int64_t dataset___Scanner__CountRows(const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Scanner__CountRows(SEXP scanner_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	return cpp11::as_sexp(dataset___Scanner__CountRows(scanner));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__CountRows(SEXP scanner_sexp){
+	Rf_error("Cannot call dataset___Scanner__CountRows(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // datatype.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::DataType> Int8__initialize();
@@ -6806,6 +6821,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
 		{ "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, 
 		{ "_arrow_dataset___Scanner__TakeRows", (DL_FUNC) &_arrow_dataset___Scanner__TakeRows, 2}, 
+		{ "_arrow_dataset___Scanner__CountRows", (DL_FUNC) &_arrow_dataset___Scanner__CountRows, 1}, 
 		{ "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, 
 		{ "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, 
 		{ "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 7d8ccae6eee..205423318c0 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -484,4 +484,9 @@ std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(
   return ValueOrStop(scanner->TakeRows(*indices));
 }
 
+// [[dataset::export]]
+int64_t dataset___Scanner__CountRows(const std::shared_ptr<ds::Scanner>& scanner) {
+  return ValueOrStop(scanner->CountRows());
+}
+
 #endif
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 4570c1f5762..1b0dcc07128 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -145,13 +145,11 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
 
-  expect_warning(
-    expect_identical(
-      ds %>%
-        filter(chr == 'a') %>%
-        dim(),
-      c(NA, 7L)
-    )
+  expect_identical(
+    ds %>%
+      filter(chr == 'a') %>%
+      dim(),
+    c(2L, 7L)
   )
   expect_equal(
     ds %>%
@@ -159,14 +157,12 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_
       dim(),
     c(20L, 3L)
   )
-  expect_warning(
-    expect_identical(
-      ds %>%
-        select(chr, fct, int) %>%
-        filter(chr == 'a') %>%
-        dim(),
-      c(NA, 3L)
-    )
+  expect_identical(
+    ds %>%
+      select(chr, fct, int) %>%
+      filter(chr == 'a') %>%
+      dim(),
+    c(2L, 3L)
   )
 })
 
@@ -330,9 +326,7 @@ test_that("IPC/Feather format data", {
   expect_r6_class(ds$format, "IpcFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
-  expect_warning(
-    expect_identical(dim(ds), c(NA, 7L))
-  )
+  expect_identical(dim(ds), c(20L, 7L))
 
   expect_equivalent(
     ds %>%
@@ -358,9 +352,7 @@ test_that("CSV dataset", {
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
-  expect_warning(
-    expect_identical(dim(ds), c(NA, 7L))
-  )
+  expect_identical(dim(ds), c(20L, 7L))
   expect_equivalent(
     ds %>%
       select(string = chr, integer = int, part) %>%

From 2e44be43778bd3c2fb75e694bebdec2934c0d225 Mon Sep 17 00:00:00 2001
From: Lorenz Walthert <lorenz.walthert@icloud.com>
Date: Thu, 6 May 2021 00:41:12 +0200
Subject: [PATCH 192/719] MINOR: [R] Typo in argument in NEWS.md

---
 r/NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 71b69bb69c4..4efb4dbd40f 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -27,7 +27,7 @@ Many more `dplyr` verbs are supported on Arrow objects:
 
 * `dplyr::mutate()` is now supported in Arrow for many applications. For queries on `Table` and `RecordBatch` that are not yet supported in Arrow, the implementation falls back to pulling data into an in-memory R `data.frame` first, as in the previous release. For queries on `Dataset` (which can be larger than memory), it raises an error if the function is not implemented. The main `mutate()` features that cannot yet be called on Arrow objects are (1) `mutate()` after `group_by()` (which is typically used in combination with aggregation) and (2) queries that use `dplyr::across()`.
 * `dplyr::transmute()` (which calls `mutate()`)
-* `dplyr::group_by()` now preserves the `.drop()` argument and supports on-the-fly definition of columns
+* `dplyr::group_by()` now preserves the `.drop` argument and supports on-the-fly definition of columns
 * `dplyr::relocate()` to reorder columns
 * `dplyr::arrange()` to sort rows
 * `dplyr::compute()` to evaluate the lazy expressions and return an Arrow Table. This is equivalent to `dplyr::collect(as_data_frame = FALSE)`, which was added in 2.0.0.

From eb19aed0841b27be087908abad352d16c2a456d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 6 May 2021 11:00:45 +0200
Subject: [PATCH 193/719] ARROW-12646: [C++][CI][Packaging][Python] Bump vcpkg
 version to its latest release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- [x] bump vcpkg version
- [x] update vcpkg ports patch
- [x] update windows docker images hosted on ghcr.io
- [x] update crossbow cache builds for macos (https://github.com/ursacomputing/crossbow/pull/4 should be merged after this one)

Closes #10236 from kszucs/vcpkg-release

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .env                                          |  2 +-
 ci/vcpkg/ports.patch                          | 20 +++++++++----------
 .../python-wheels/github.linux.amd64.yml      |  8 ++++++++
 dev/tasks/python-wheels/github.osx.yml        |  2 +-
 docker-compose.yml                            | 10 +++++-----
 5 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/.env b/.env
index 510e11d9568..16e971c82e6 100644
--- a/.env
+++ b/.env
@@ -70,4 +70,4 @@ DEVTOOLSET_VERSION=-1
 
 # Used for the manylinux and windows wheels, please update the crossbow configuration on update:
 #   https://github.com/ursacomputing/crossbow/blob/master/.github/workflows/cache_vcpkg.yml
-VCPKG=fced4bef1606260f110d74de1ae1975c2b9ac549
+VCPKG="2021.04.30"
diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch
index 14b9678690e..7bcba49c194 100644
--- a/ci/vcpkg/ports.patch
+++ b/ci/vcpkg/ports.patch
@@ -1,5 +1,5 @@
 diff --git a/ports/aws-c-common/portfile.cmake b/ports/aws-c-common/portfile.cmake
-index f3704ef05..3af543058 100644
+index f3704ef05b..3af543058d 100644
 --- a/ports/aws-c-common/portfile.cmake
 +++ b/ports/aws-c-common/portfile.cmake
 @@ -1,8 +1,8 @@
@@ -12,22 +12,22 @@ index f3704ef05..3af543058 100644
 +    SHA512 28256522ac6af544d7464e3e7dcd4dc802ae2b09728bf8f167f86a6487bb756d0cad5eb4a2480610b2967b9c24c4a7f70621894517aa2828ffdeb0479453803b
      HEAD_REF master
      PATCHES
-         disable-error-4068.patch # This patch fixes dependency port compilation failure
+         disable-error-4068.patch # This patch fixes dependency port compilation failure
 diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake
-index 6e18aecd0..2ccecf33c 100644
+index be66d452be..a5ce325e9d 100644
 --- a/ports/curl/portfile.cmake
 +++ b/ports/curl/portfile.cmake
-@@ -76,6 +76,8 @@ vcpkg_configure_cmake(
+@@ -94,6 +94,8 @@ vcpkg_configure_cmake(
          -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON
          -DENABLE_DEBUG=ON
          -DCURL_CA_FALLBACK=ON
 +        -DCURL_CA_PATH=none
 +        -DCURL_CA_BUNDLE=none
- )
- 
- vcpkg_install_cmake()
+     OPTIONS_DEBUG
+         ${EXTRA_ARGS_DEBUG}
+     OPTIONS_RELEASE
 diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
-index 75dd13302..84345c7ca 100644
+index 75dd133027..84345c7caa 100644
 --- a/ports/snappy/portfile.cmake
 +++ b/ports/snappy/portfile.cmake
 @@ -4,6 +4,7 @@ vcpkg_from_github(
@@ -36,11 +36,11 @@ index 75dd13302..84345c7ca 100644
      HEAD_REF master
 +    PATCHES "snappy-disable-bmi.patch"
  )
- 
+
  vcpkg_configure_cmake(
 diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
 new file mode 100644
-index 000000000..2cbb1533a
+index 0000000000..2cbb1533a8
 --- /dev/null
 +++ b/ports/snappy/snappy-disable-bmi.patch
 @@ -0,0 +1,17 @@
diff --git a/dev/tasks/python-wheels/github.linux.amd64.yml b/dev/tasks/python-wheels/github.linux.amd64.yml
index a6264078715..3b9d1ad9a7b 100644
--- a/dev/tasks/python-wheels/github.linux.amd64.yml
+++ b/dev/tasks/python-wheels/github.linux.amd64.yml
@@ -46,3 +46,11 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+
+      {% if arrow.branch == 'master' %}
+      - name: Push Docker Image
+        shell: bash
+        run: |
+          archery docker push python-wheel-manylinux-{{ manylinux_version }}
+          archery docker push python-wheel-manylinux-test-unittests
+      {% endif %}
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index af0cc44ef09..e5af0ce0ce8 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -54,7 +54,7 @@ jobs:
           doNotSaveCache: true
           appendedCacheKey: "-macos-{{ macos_deployment_target }}"
           vcpkgDirectory: {{ "${{ github.workspace }}/vcpkg" }}
-          vcpkgGitCommitId: fced4bef1606260f110d74de1ae1975c2b9ac549
+          vcpkgGitCommitId: "2021.04.30"
 
       - name: Patch Vcpkg Ports
         run: |
diff --git a/docker-compose.yml b/docker-compose.yml
index d9ca731dbd3..ebcc1bb6039 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -721,7 +721,7 @@ services:
   #    https://quay.io/repository/pypa/manylinux2010_x86_64?tab=tags
   #    only amd64 arch is supported
   python-wheel-manylinux-2010:
-    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010
+    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010-vcpkg-${VCPKG}
     build:
       args:
         arch_alias: ${ARCH_ALIAS}
@@ -732,7 +732,7 @@ services:
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-201x.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010
+        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2010-vcpkg-${VCPKG}
     environment:
       <<: *ccache
       MANYLINUX_VERSION: 2010
@@ -744,7 +744,7 @@ services:
   # See available versions at:
   #    https://quay.io/repository/pypa/manylinux2014_x86_64?tab=tags
   python-wheel-manylinux-2014:
-    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014
+    image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
     build:
       args:
         arch_alias: ${ARCH_ALIAS}
@@ -755,7 +755,7 @@ services:
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-201x.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014
+        - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
     environment:
       <<: *ccache
       MANYLINUX_VERSION: 2014
@@ -787,7 +787,7 @@ services:
     command: /arrow/ci/scripts/python_wheel_manylinux_test.sh unittests
 
   python-wheel-windows-vs2017:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017
+    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}
     build:
       args:
         vcpkg: ${VCPKG}

From 602a76ac58bc8de60a353648f02cf11891563e77 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Thu, 6 May 2021 07:59:34 -0400
Subject: [PATCH 194/719] ARROW-12355: [C++] Implement efficient async CSV
 scanning

A tiny bit of cleanup and combines ARROW-12392 and ARROW-12289 to create a true async CSV format.

~~Keeping in draft until ARROW-12392, ARROW-12289, and ARROW-12386 are resolved.~~

Closes #10103 from westonpace/feature/ARROW-12355--c-implement-efficient-async-csv-scanning

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/file_base.cc |  2 +-
 cpp/src/arrow/dataset/file_base.h  |  2 +-
 cpp/src/arrow/dataset/file_csv.cc  | 36 ++++++++++++++++++------------
 cpp/src/arrow/dataset/file_csv.h   |  4 ++++
 cpp/src/arrow/dataset/scanner.cc   |  6 +++--
 5 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index b3ee09b5b7d..f8f4cc38444 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -114,7 +114,7 @@ Result<std::shared_ptr<FileFragment>> FileFormat::MakeFragment(
 // formats should provide their own efficient implementation.
 Result<RecordBatchGenerator> FileFormat::ScanBatchesAsync(
     const std::shared_ptr<ScanOptions>& scan_options,
-    const std::shared_ptr<FileFragment>& file) {
+    const std::shared_ptr<FileFragment>& file) const {
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, ScanFile(scan_options, file));
   struct State {
     State(std::shared_ptr<ScanOptions> scan_options, ScanTaskIterator scan_task_it)
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 4119dfb75e1..43085a2d8de 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -154,7 +154,7 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
 
   virtual Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options,
-      const std::shared_ptr<FileFragment>& file);
+      const std::shared_ptr<FileFragment>& file) const;
   virtual Future<util::optional<int64_t>> CountRows(
       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
       std::shared_ptr<ScanOptions> options);
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 4dd4fac91d8..eb6d623818e 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -82,9 +82,10 @@ Result<std::unordered_set<std::string>> GetColumnNames(
 
 static inline Result<csv::ConvertOptions> GetConvertOptions(
     const CsvFileFormat& format, const std::shared_ptr<ScanOptions>& scan_options,
-    const util::string_view first_block, MemoryPool* pool) {
-  ARROW_ASSIGN_OR_RAISE(auto column_names,
-                        GetColumnNames(format.parse_options, first_block, pool));
+    const util::string_view first_block) {
+  ARROW_ASSIGN_OR_RAISE(
+      auto column_names,
+      GetColumnNames(format.parse_options, first_block, scan_options->pool));
 
   ARROW_ASSIGN_OR_RAISE(
       auto csv_scan_options,
@@ -122,8 +123,7 @@ static inline Result<csv::ReadOptions> GetReadOptions(
 
 static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
     const FileSource& source, const CsvFileFormat& format,
-    const std::shared_ptr<ScanOptions>& scan_options, internal::Executor* cpu_executor,
-    MemoryPool* pool) {
+    const std::shared_ptr<ScanOptions>& scan_options, internal::Executor* cpu_executor) {
   ARROW_ASSIGN_OR_RAISE(auto reader_options, GetReadOptions(format, scan_options));
 
   ARROW_ASSIGN_OR_RAISE(auto input, source.OpenCompressed());
@@ -139,8 +139,8 @@ static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
         const auto& parse_options = format.parse_options;
         auto convert_options = csv::ConvertOptions::Defaults();
         if (scan_options != nullptr) {
-          ARROW_ASSIGN_OR_RAISE(convert_options, GetConvertOptions(format, scan_options,
-                                                                   first_block, pool));
+          ARROW_ASSIGN_OR_RAISE(convert_options,
+                                GetConvertOptions(format, scan_options, first_block));
         }
 
         auto reader_fut = csv::StreamingReader::MakeAsync(
@@ -162,7 +162,7 @@ static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
     const std::shared_ptr<ScanOptions>& scan_options = nullptr,
     MemoryPool* pool = default_memory_pool()) {
   auto open_reader_fut =
-      OpenReaderAsync(source, format, scan_options, internal::GetCpuThreadPool(), pool);
+      OpenReaderAsync(source, format, scan_options, internal::GetCpuThreadPool());
   return open_reader_fut.result();
 }
 
@@ -186,15 +186,14 @@ class CsvScanTask : public ScanTask {
         source_(fragment->source()) {}
 
   Result<RecordBatchIterator> Execute() override {
-    auto reader_fut = OpenReaderAsync(source_, *format_, options(),
-                                      internal::GetCpuThreadPool(), options()->pool);
+    auto reader_fut =
+        OpenReaderAsync(source_, *format_, options(), internal::GetCpuThreadPool());
     auto reader_gen = GeneratorFromReader(std::move(reader_fut));
     return MakeGeneratorIterator(std::move(reader_gen));
   }
 
   Future<RecordBatchVector> SafeExecute(internal::Executor* executor) override {
-    auto reader_fut =
-        OpenReaderAsync(source_, *format_, options(), executor, options()->pool);
+    auto reader_fut = OpenReaderAsync(source_, *format_, options(), executor);
     auto reader_gen = GeneratorFromReader(std::move(reader_fut));
     return CollectAsyncGenerator(reader_gen);
   }
@@ -202,8 +201,7 @@ class CsvScanTask : public ScanTask {
   Future<> SafeVisit(
       internal::Executor* executor,
       std::function<Status(std::shared_ptr<RecordBatch>)> visitor) override {
-    auto reader_fut =
-        OpenReaderAsync(source_, *format_, options(), executor, options()->pool);
+    auto reader_fut = OpenReaderAsync(source_, *format_, options(), executor);
     auto reader_gen = GeneratorFromReader(std::move(reader_fut));
     return VisitAsyncGenerator(reader_gen, visitor);
   }
@@ -248,5 +246,15 @@ Result<ScanTaskIterator> CsvFileFormat::ScanFile(
   return MakeVectorIterator<std::shared_ptr<ScanTask>>({std::move(task)});
 }
 
+Result<RecordBatchGenerator> CsvFileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& scan_options,
+    const std::shared_ptr<FileFragment>& file) const {
+  auto this_ = checked_pointer_cast<const CsvFileFormat>(shared_from_this());
+  auto source = file->source();
+  auto reader_fut =
+      OpenReaderAsync(source, *this, scan_options, internal::GetCpuThreadPool());
+  return GeneratorFromReader(std::move(reader_fut));
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index 9289c016afb..0b24e083cca 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -57,6 +57,10 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
       const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& scan_options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
       std::shared_ptr<FileWriteOptions> options) const override {
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 43239a676e5..652cd67c262 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -583,8 +583,10 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
                         FragmentsToBatches(self, std::move(fragment_gen)));
   auto batch_gen_gen_readahead = MakeSerialReadaheadGenerator(
       std::move(batch_gen_gen), scan_options_->fragment_readahead);
-  return MakeMergedGenerator(std::move(batch_gen_gen_readahead),
-                             scan_options_->fragment_readahead);
+  auto merged_batch_gen = MakeMergedGenerator(std::move(batch_gen_gen_readahead),
+                                              scan_options_->fragment_readahead);
+  return MakeReadaheadGenerator(std::move(merged_batch_gen),
+                                scan_options_->fragment_readahead);
 }
 
 Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync() {

From 843ef269317cfcf77fd35e2092434e43be554902 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 6 May 2021 08:57:47 -0700
Subject: [PATCH 195/719] MINOR: [JS] Update deprecated jest config key

Closes #10152 from domoritz/jest

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Brian Hulette <bhulette@google.com>
---
 js/jest.config.js          | 2 +-
 js/jest.coverage.config.js | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/js/jest.config.js b/js/jest.config.js
index 55028d09f96..30f7bb6311f 100644
--- a/js/jest.config.js
+++ b/js/jest.config.js
@@ -24,7 +24,7 @@ module.exports = {
     "globals": {
       "ts-jest": {
         "diagnostics": false,
-        "tsConfig": "test/tsconfig.json"
+        "tsconfig": "test/tsconfig.json"
       }
     },
     "roots": [
diff --git a/js/jest.coverage.config.js b/js/jest.coverage.config.js
index ac98794f800..37917720367 100644
--- a/js/jest.coverage.config.js
+++ b/js/jest.coverage.config.js
@@ -24,7 +24,7 @@ module.exports = {
     globals: {
         'ts-jest': {
             diagnostics: false,
-            tsConfig: 'test/tsconfig.coverage.json'
+            tsconfig: 'test/tsconfig.coverage.json'
         }
     }
 };

From b0e1284089139160f33edce425875c3e5a8ed39a Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 6 May 2021 13:08:17 -0400
Subject: [PATCH 196/719] ARROW-12231: [C++][Python][Dataset] Isolate one-shot
 data to scanner

This isolates the one-shot portion of InMemoryDataset to Scanner, so that it more clearly is used only for writing data from a source that cannot be re-read.

Closes #10070 from lidavidm/arrow-12231

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/dataset/dataset.cc             | 22 --------
 cpp/src/arrow/dataset/dataset.h              |  1 -
 cpp/src/arrow/dataset/dataset_test.cc        | 17 ------
 cpp/src/arrow/dataset/scanner.cc             | 58 ++++++++++++++++++++
 cpp/src/arrow/dataset/scanner.h              |  8 +++
 cpp/src/arrow/dataset/scanner_test.cc        | 24 ++++++++
 python/pyarrow/_dataset.pyx                  | 54 ++++++++++++++----
 python/pyarrow/dataset.py                    | 12 ++--
 python/pyarrow/includes/libarrow_dataset.pxd |  4 ++
 python/pyarrow/tests/test_dataset.py         | 50 ++++++-----------
 10 files changed, 160 insertions(+), 90 deletions(-)

diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index bfb3c4b0596..4305420fd74 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -204,28 +204,6 @@ InMemoryDataset::InMemoryDataset(std::shared_ptr<Table> table)
     : Dataset(table->schema()),
       get_batches_(new TableRecordBatchGenerator(std::move(table))) {}
 
-struct ReaderRecordBatchGenerator : InMemoryDataset::RecordBatchGenerator {
-  explicit ReaderRecordBatchGenerator(std::shared_ptr<RecordBatchReader> reader)
-      : reader_(std::move(reader)), consumed_(false) {}
-
-  RecordBatchIterator Get() const final {
-    if (consumed_) {
-      return MakeErrorIterator<std::shared_ptr<RecordBatch>>(Status::Invalid(
-          "RecordBatchReader-backed InMemoryDataset was already consumed"));
-    }
-    consumed_ = true;
-    auto reader = reader_;
-    return MakeFunctionIterator([reader] { return reader->Next(); });
-  }
-
-  std::shared_ptr<RecordBatchReader> reader_;
-  mutable bool consumed_;
-};
-
-InMemoryDataset::InMemoryDataset(std::shared_ptr<RecordBatchReader> reader)
-    : Dataset(reader->schema()),
-      get_batches_(new ReaderRecordBatchGenerator(std::move(reader))) {}
-
 Result<std::shared_ptr<Dataset>> InMemoryDataset::ReplaceSchema(
     std::shared_ptr<Schema> schema) const {
   RETURN_NOT_OK(CheckProjectable(*schema_, *schema));
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 30699accd2d..a58e1c425fe 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -215,7 +215,6 @@ class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
 
   /// Convenience constructor taking a Table
   explicit InMemoryDataset(std::shared_ptr<Table> table);
-  explicit InMemoryDataset(std::shared_ptr<RecordBatchReader> reader);
 
   std::string type_name() const override { return "in-memory"; }
 
diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc
index 7aa0e1a2413..66d69c30c82 100644
--- a/cpp/src/arrow/dataset/dataset_test.cc
+++ b/cpp/src/arrow/dataset/dataset_test.cc
@@ -79,23 +79,6 @@ TEST_F(TestInMemoryDataset, ReplaceSchema) {
                     .status());
 }
 
-TEST_F(TestInMemoryDataset, FromReader) {
-  constexpr int64_t kBatchSize = 1024;
-  constexpr int64_t kNumberBatches = 16;
-
-  SetSchema({field("i32", int32()), field("f64", float64())});
-  auto batch = ConstantArrayGenerator::Zeroes(kBatchSize, schema_);
-  auto source_reader = ConstantArrayGenerator::Repeat(kNumberBatches, batch);
-  auto target_reader = ConstantArrayGenerator::Repeat(kNumberBatches, batch);
-
-  auto dataset = std::make_shared<InMemoryDataset>(source_reader);
-
-  AssertDatasetEquals(target_reader.get(), dataset.get());
-  // Such datasets can only be scanned once
-  ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments());
-  ASSERT_RAISES(Invalid, fragments.Next());
-}
-
 TEST_F(TestInMemoryDataset, GetFragments) {
   constexpr int64_t kBatchSize = 1024;
   constexpr int64_t kNumberBatches = 16;
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 652cd67c262..a2bc2146984 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -744,6 +744,64 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> schema,
   DCHECK_OK(Filter(scan_options_->filter));
 }
 
+namespace {
+class OneShotScanTask : public ScanTask {
+ public:
+  OneShotScanTask(RecordBatchIterator batch_it, std::shared_ptr<ScanOptions> options,
+                  std::shared_ptr<Fragment> fragment)
+      : ScanTask(std::move(options), std::move(fragment)),
+        batch_it_(std::move(batch_it)) {}
+  Result<RecordBatchIterator> Execute() override {
+    if (!batch_it_) return Status::Invalid("OneShotScanTask was already scanned");
+    return std::move(batch_it_);
+  }
+
+ private:
+  RecordBatchIterator batch_it_;
+};
+
+class OneShotFragment : public Fragment {
+ public:
+  OneShotFragment(std::shared_ptr<Schema> schema, RecordBatchIterator batch_it)
+      : Fragment(compute::literal(true), std::move(schema)),
+        batch_it_(std::move(batch_it)) {
+    DCHECK_NE(physical_schema_, nullptr);
+  }
+  Status CheckConsumed() {
+    if (!batch_it_) return Status::Invalid("OneShotFragment was already scanned");
+    return Status::OK();
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    RETURN_NOT_OK(CheckConsumed());
+    ScanTaskVector tasks{std::make_shared<OneShotScanTask>(
+        std::move(batch_it_), std::move(options), shared_from_this())};
+    return MakeVectorIterator(std::move(tasks));
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    RETURN_NOT_OK(CheckConsumed());
+    return MakeBackgroundGenerator(std::move(batch_it_), options->io_context.executor());
+  }
+  std::string type_name() const override { return "one-shot"; }
+
+ protected:
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
+    return physical_schema_;
+  }
+
+  RecordBatchIterator batch_it_;
+};
+}  // namespace
+
+std::shared_ptr<ScannerBuilder> ScannerBuilder::FromRecordBatchReader(
+    std::shared_ptr<RecordBatchReader> reader) {
+  auto batch_it = MakeIteratorFromReader(reader);
+  auto fragment =
+      std::make_shared<OneShotFragment>(reader->schema(), std::move(batch_it));
+  return std::make_shared<ScannerBuilder>(reader->schema(), std::move(fragment),
+                                          std::make_shared<ScanOptions>());
+}
+
 const std::shared_ptr<Schema>& ScannerBuilder::schema() const {
   return scan_options_->dataset_schema;
 }
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 99833c95bf0..5fdcbe586c0 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -317,6 +317,14 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ScannerBuilder(std::shared_ptr<Schema> schema, std::shared_ptr<Fragment> fragment,
                  std::shared_ptr<ScanOptions> scan_options);
 
+  /// \brief Make a scanner from a record batch reader.
+  ///
+  /// The resulting scanner can be scanned only once. This is intended
+  /// to support writing data from streaming sources or other sources
+  /// that can be iterated only once.
+  static std::shared_ptr<ScannerBuilder> FromRecordBatchReader(
+      std::shared_ptr<RecordBatchReader> reader);
+
   /// \brief Set the subset of columns to materialize.
   ///
   /// Columns which are not referenced may not be read from fragments.
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index b7c68daa0fc..9b5f49005b1 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -609,6 +609,30 @@ TEST_P(TestScanner, Head) {
   AssertTablesEqual(*expected, *actual);
 }
 
+TEST_P(TestScanner, FromReader) {
+  if (GetParam().use_async) {
+    GTEST_SKIP() << "Async scanner does not support construction from reader";
+  }
+  auto batch_size = GetParam().items_per_batch;
+  auto num_batches = GetParam().num_batches;
+
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(batch_size, schema_);
+  auto source_reader = ConstantArrayGenerator::Repeat(num_batches, batch);
+  auto target_reader = ConstantArrayGenerator::Repeat(num_batches, batch);
+
+  auto builder = ScannerBuilder::FromRecordBatchReader(source_reader);
+  ARROW_EXPECT_OK(builder->UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder->Finish());
+  AssertScannerEquals(target_reader.get(), scanner.get());
+
+  // Such datasets can only be scanned once (but you can get fragments multiple times)
+  ASSERT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("OneShotFragment was already scanned"),
+      batch_it.Next());
+}
+
 INSTANTIATE_TEST_SUITE_P(TestScannerThreading, TestScanner,
                          ::testing::ValuesIn(TestScannerParams::Values()),
                          [](const ::testing::TestParamInfo<TestScannerParams>& info) {
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index eaeebdcde60..90633a140d3 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -298,6 +298,7 @@ cdef class Dataset(_Weakrefable):
         classes = {
             'union': UnionDataset,
             'filesystem': FileSystemDataset,
+            'in-memory': InMemoryDataset,
         }
 
         class_ = classes.get(type_name, None)
@@ -535,19 +536,10 @@ cdef class InMemoryDataset(Dataset):
             table = pa.Table.from_batches(batches, schema=schema)
             in_memory_dataset = make_shared[CInMemoryDataset](
                 pyarrow_unwrap_table(table))
-        elif isinstance(source, pa.ipc.RecordBatchReader):
-            reader = source
-            in_memory_dataset = make_shared[CInMemoryDataset](reader.reader)
-        elif _is_iterable(source):
-            if schema is None:
-                raise ValueError('Must provide schema to construct in-memory '
-                                 'dataset from an iterable')
-            reader = pa.ipc.RecordBatchReader.from_batches(schema, source)
-            in_memory_dataset = make_shared[CInMemoryDataset](reader.reader)
         else:
             raise TypeError(
-                'Expected a table, batch, iterable of tables/batches, or a '
-                'record batch reader instead of the given type: ' +
+                'Expected a table, batch, or list of tables/batches '
+                'instead of the given type: ' +
                 type(source).__name__
             )
 
@@ -2797,6 +2789,46 @@ cdef class Scanner(_Weakrefable):
         scanner = GetResultValue(builder.get().Finish())
         return Scanner.wrap(scanner)
 
+    @staticmethod
+    def from_batches(source, Schema schema=None, bint use_threads=True,
+                     MemoryPool memory_pool=None, object columns=None,
+                     Expression filter=None,
+                     int batch_size=_DEFAULT_BATCH_SIZE,
+                     FragmentScanOptions fragment_scan_options=None):
+        """Create a Scanner from an iterator of batches.
+
+        This creates a scanner which can be used only once. It is
+        intended to support writing a dataset (which takes a scanner)
+        from a source which can be read only once (e.g. a
+        RecordBatchReader or generator).
+        """
+        cdef:
+            shared_ptr[CScanOptions] options = make_shared[CScanOptions]()
+            shared_ptr[CScannerBuilder] builder
+            shared_ptr[CScanner] scanner
+            RecordBatchReader reader
+        if isinstance(source, pa.ipc.RecordBatchReader):
+            if schema:
+                raise ValueError('Cannot specify a schema when providing '
+                                 'a RecordBatchReader')
+            reader = source
+        elif _is_iterable(source):
+            if schema is None:
+                raise ValueError('Must provide schema to construct scanner '
+                                 'from an iterable')
+            reader = pa.ipc.RecordBatchReader.from_batches(schema, source)
+        else:
+            raise TypeError('Expected a RecordBatchReader or an iterable of '
+                            'batches instead of the given type: ' +
+                            type(source).__name__)
+        builder = CScannerBuilder.FromRecordBatchReader(reader.reader)
+        _populate_builder(builder, columns=columns, filter=filter,
+                          batch_size=batch_size, use_threads=use_threads,
+                          memory_pool=memory_pool,
+                          fragment_scan_options=fragment_scan_options)
+        scanner = GetResultValue(builder.get().Finish())
+        return Scanner.wrap(scanner)
+
     @property
     def dataset_schema(self):
         """The schema with which batches will be read from fragments."""
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 97d08844f27..e80de1688e7 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -669,10 +669,7 @@ def dataset(source, schema=None, format=None, filesystem=None,
                 'of batches or tables. The given list contains the following '
                 'types: {}'.format(type_names)
             )
-    elif isinstance(source, (pa.RecordBatch, pa.ipc.RecordBatchReader,
-                             pa.Table)):
-        return _in_memory_dataset(source, **kwargs)
-    elif _is_iterable(source):
+    elif isinstance(source, (pa.RecordBatch, pa.Table)):
         return _in_memory_dataset(source, **kwargs)
     else:
         raise TypeError(
@@ -736,9 +733,12 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
     if isinstance(data, (list, tuple)):
         schema = schema or data[0].schema
         data = InMemoryDataset(data, schema=schema)
-    elif isinstance(data, (pa.RecordBatch, pa.ipc.RecordBatchReader,
-                           pa.Table)) or _is_iterable(data):
+    elif isinstance(data, (pa.RecordBatch, pa.Table)):
+        schema = schema or data.schema
         data = InMemoryDataset(data, schema=schema)
+    elif isinstance(data, pa.ipc.RecordBatchReader) or _is_iterable(data):
+        data = Scanner.from_batches(data, schema=schema)
+        schema = None
     elif not isinstance(data, (Dataset, Scanner)):
         raise ValueError(
             "Only Dataset, Scanner, Table/RecordBatch, RecordBatchReader, "
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 7a02e0e4ba6..31b3764245d 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -122,6 +122,10 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
                         shared_ptr[CScanOptions] scan_options)
         CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment],
                         shared_ptr[CScanOptions] scan_options)
+
+        @staticmethod
+        shared_ptr[CScannerBuilder] FromRecordBatchReader(
+            shared_ptr[CRecordBatchReader] reader)
         CStatus ProjectColumns "Project"(const vector[c_string]& columns)
         CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
         CStatus Filter(CExpression filter)
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 8c9dc226630..e489bcb3a73 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -1710,9 +1710,10 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
         ds.dataset(None)
 
     expected = (
-        "Must provide schema to construct in-memory dataset from an iterable"
+        "Expected a path-like, list of path-likes or a list of Datasets "
+        "instead of the given type: generator"
     )
-    with pytest.raises(ValueError, match=expected):
+    with pytest.raises(TypeError, match=expected):
         ds.dataset((batch1 for _ in range(3)))
 
     expected = (
@@ -1745,49 +1746,32 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
 def test_construct_in_memory():
     batch = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
     table = pa.Table.from_batches([batch])
-    reader = pa.ipc.RecordBatchReader.from_batches(batch.schema, [batch])
-    iterable = (batch for _ in range(1))
 
-    for source in (batch, table, reader, [batch], [table]):
-        dataset = ds.dataset(source)
-        assert dataset.to_table() == table
-
-    assert ds.dataset(iterable, schema=batch.schema).to_table().equals(table)
     assert ds.dataset([], schema=pa.schema([])).to_table() == pa.table([])
 
-    # When constructed from batches/tables, should be reusable
     for source in (batch, table, [batch], [table]):
         dataset = ds.dataset(source)
-        assert len(list(dataset.get_fragments())) == 1
-        assert len(list(dataset.get_fragments())) == 1
-        assert dataset.to_table() == table
         assert dataset.to_table() == table
+        assert len(list(dataset.get_fragments())) == 1
         assert next(dataset.get_fragments()).to_table() == table
         assert pa.Table.from_batches(list(dataset.to_batches())) == table
 
+
+def test_scan_iterator():
+    batch = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
+    table = pa.Table.from_batches([batch])
     # When constructed from readers/iterators, should be one-shot
-    match = "InMemoryDataset was already consumed"
-    for factory in (
-            lambda: pa.ipc.RecordBatchReader.from_batches(
-                batch.schema, [batch]),
-            lambda: (batch for _ in range(1)),
+    match = "OneShotFragment was already scanned"
+    for factory, schema in (
+            (lambda: pa.ipc.RecordBatchReader.from_batches(
+                batch.schema, [batch]), None),
+            (lambda: (batch for _ in range(1)), batch.schema),
     ):
-        dataset = ds.dataset(factory(), schema=batch.schema)
-        # Getting fragments consumes the underlying iterator
-        fragments = list(dataset.get_fragments())
-        assert len(fragments) == 1
-        assert fragments[0].to_table() == table
-        with pytest.raises(pa.ArrowInvalid, match=match):
-            list(dataset.get_fragments())
-        with pytest.raises(pa.ArrowInvalid, match=match):
-            dataset.to_table()
-        # Materializing consumes the underlying iterator
-        dataset = ds.dataset(factory(), schema=batch.schema)
-        assert dataset.to_table() == table
-        with pytest.raises(pa.ArrowInvalid, match=match):
-            list(dataset.get_fragments())
+        # Scanning the fragment consumes the underlying iterator
+        scanner = ds.Scanner.from_batches(factory(), schema=schema)
+        assert scanner.to_table() == table
         with pytest.raises(pa.ArrowInvalid, match=match):
-            dataset.to_table()
+            scanner.to_table()
 
 
 @pytest.mark.parquet

From 0d979bf0cbb10e4977c99a542c266134604e72a5 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 6 May 2021 10:35:40 -0700
Subject: [PATCH 197/719] ARROW-12575: [R] Use unary negative kernel

Closes #10196 from thisisnic/ARROW-12575-negative

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/expression.R | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index 1e104677d8b..30eb0906d43 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -56,13 +56,9 @@ build_array_expression <- function(FUN,
                                    args = list(...),
                                    options = empty_named_list()) {
   if (FUN == "-" && length(args) == 1L) {
-    # Unary -, i.e. just make it negative, and somehow this works
     if (inherits(args[[1]], c("ArrowObject", "array_expression"))) {
-      # Make it be 0 - arg
-      # TODO(ARROW-11950): do this in C++ compute
-      args <- list(0L, args[[1]])
+      return(build_array_expression("negate_checked", args[[1]]))
     } else {
-      # Somehow this works
       return(-args[[1]])
     }
   }
@@ -288,12 +284,9 @@ build_dataset_expression <- function(FUN,
                                      args = list(...),
                                      options = empty_named_list()) {
   if (FUN == "-" && length(args) == 1L) {
-    # Unary -, i.e. make it negative
     if (inherits(args[[1]], c("ArrowObject", "Expression"))) {
-      # TODO(ARROW-11950): do this in C++ compute
-      args <- list(0L, args[[1]])
+      return(build_dataset_expression("negate_checked", args[[1]]))
     } else {
-      # Somehow this just works
       return(-args[[1]])
     }
   }

From 264eb5f70b6a4bc68b785a6baddf3ff070a40ed7 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 6 May 2021 13:42:51 -0500
Subject: [PATCH 198/719] ARROW-12660: [R] Post-4.0 adjustments for CRAN

Closes #10254 from jonkeane/ARROW-12660-release-cleanup

Lead-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Co-authored-by: Lorenz Walthert <lorenz.walthert@icloud.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/NEWS.md                |  8 +++++++-
 r/man/CsvReadOptions.Rd  |  7 +++++++
 r/man/CsvWriteOptions.Rd | 22 ---------------------
 r/man/write_csv_arrow.Rd |  2 +-
 r/tools/nixlibs.R        | 41 ++++++++++++++++++++++++----------------
 5 files changed, 40 insertions(+), 40 deletions(-)
 delete mode 100644 r/man/CsvWriteOptions.Rd

diff --git a/r/NEWS.md b/r/NEWS.md
index 4efb4dbd40f..8e5d00ed623 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,7 +19,13 @@
 
 # arrow 4.0.0.9000
 
-# arrow 3.0.0.9000
+# arrow 4.0.0.1
+
+ * The mimalloc memory allocator is the default memory allocator when using a static source build of the package on Linux. This is because it has better behavior under valgrind than jemalloc does. A full-featured build (installed with `LIBARROW_MINIMAL=false`) includes both jemalloc and mimalloc, and it has still has jemalloc as default, though this is configurable at runtime with the `ARROW_DEFAULT_MEMORY_POOL` environment variable.
+ * Environment variables `LIBARROW_MINIMAL`, `LIBARROW_DOWNLOAD`, and `NOT_CRAN` are now case-insensitive in the Linux build script.
+ * A build configuration issue in the macOS binary package has been resolved.
+
+# arrow 4.0.0
 
 ## dplyr methods
 
diff --git a/r/man/CsvReadOptions.Rd b/r/man/CsvReadOptions.Rd
index 805330768b3..d088692708b 100644
--- a/r/man/CsvReadOptions.Rd
+++ b/r/man/CsvReadOptions.Rd
@@ -3,6 +3,7 @@
 \docType{class}
 \name{CsvReadOptions}
 \alias{CsvReadOptions}
+\alias{CsvWriteOptions}
 \alias{CsvParseOptions}
 \alias{TimestampParser}
 \alias{CsvConvertOptions}
@@ -89,6 +90,12 @@ starting from the beginning of this vector. Possible values are
 \code{TimestampParser$create()} takes an optional \code{format} string argument.
 See \code{\link[base:strptime]{strptime()}} for example syntax.
 The default is to use an ISO-8601 format parser.
+
+The \code{CsvWriteOptions$create()} factory method takes the following arguments:
+\itemize{
+\item \code{include_header} Whether to write an initial header line with column names
+\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
+}
 }
 
 \section{Active bindings}{
diff --git a/r/man/CsvWriteOptions.Rd b/r/man/CsvWriteOptions.Rd
deleted file mode 100644
index e83126c9f9a..00000000000
--- a/r/man/CsvWriteOptions.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/csv.R
-\docType{class}
-\name{CsvWriteOptions}
-\alias{CsvWriteOptions}
-\title{File writer options}
-\description{
-\code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
-\code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
-file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
-\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
-}
-\section{Factory}{
-
-
-The \code{CsvWriteOptions$create()} factory method takes the following arguments:
-\itemize{
-\item \code{include_header} Whether to write an initial header line with column names
-\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024
-}
-}
-
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
index f583e487e1f..dfed1613149 100644
--- a/r/man/write_csv_arrow.Rd
+++ b/r/man/write_csv_arrow.Rd
@@ -14,7 +14,7 @@ system (\code{SubTreeFileSystem})}
 
 \item{include_header}{Whether to write an initial header line with column names}
 
-\item{batch_size}{Maximum number of rows processed at a time. Default is 1024}
+\item{batch_size}{Maximum number of rows processed at a time. Default is 1024.}
 }
 \value{
 The input \code{x}, invisibly. Note that if \code{sink} is an \link{OutputStream},
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 9daae4b67aa..97cc69fa62d 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -212,21 +212,30 @@ find_available_binary <- function(os) {
 download_source <- function() {
   tf1 <- tempfile()
   src_dir <- tempfile()
-  if (bintray_download(tf1)) {
-    # First try from bintray
-    cat("*** Successfully retrieved C++ source\n")
-    unzip(tf1, exdir = src_dir)
-    unlink(tf1)
-    src_dir <- paste0(src_dir, "/cpp")
-  } else if (apache_download(tf1)) {
-    # If that fails, try for an official release
-    cat("*** Successfully retrieved C++ source\n")
-    untar(tf1, exdir = src_dir)
-    unlink(tf1)
-    src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
+
+  # Given VERSION as x.y.z.p
+  p <- package_version(VERSION)[1, 4]
+  if (is.na(p) || p < 1000) {
+    # This is either just x.y.z or it has a small (R-only) patch version
+    # Download from the official Apache release, dropping the p
+    VERSION <- as.character(package_version(VERSION)[1, -4])
+    if (apache_download(VERSION, tf1)) {
+      untar(tf1, exdir = src_dir)
+      unlink(tf1)
+      src_dir <- paste0(src_dir, "/apache-arrow-", VERSION, "/cpp")
+    }
+  } else if (p != 9000) {
+    # This is a custom dev version (x.y.z.9999) or a nightly (x.y.z.20210505)
+    # (Don't try to download on the default dev .9000 version)
+    if (nightly_download(VERSION, tf1)) {
+      unzip(tf1, exdir = src_dir)
+      unlink(tf1)
+      src_dir <- paste0(src_dir, "/cpp")
+    }
   }
 
   if (dir.exists(src_dir)) {
+    cat("*** Successfully retrieved C++ source\n")
     options(.arrow.cleanup = c(getOption(".arrow.cleanup"), src_dir))
     # These scripts need to be executable
     system(
@@ -239,13 +248,13 @@ download_source <- function() {
   }
 }
 
-bintray_download <- function(destfile) {
-  source_url <- paste0(arrow_repo, "src/arrow-", VERSION, ".zip")
+nightly_download <- function(version, destfile) {
+  source_url <- paste0(arrow_repo, "src/arrow-", version, ".zip")
   try_download(source_url, destfile)
 }
 
-apache_download <- function(destfile, n_mirrors = 3) {
-  apache_path <- paste0("arrow/arrow-", VERSION, "/apache-arrow-", VERSION, ".tar.gz")
+apache_download <- function(version, destfile, n_mirrors = 3) {
+  apache_path <- paste0("arrow/arrow-", version, "/apache-arrow-", version, ".tar.gz")
   apache_urls <- c(
     # This returns a different mirror each time
     rep("https://www.apache.org/dyn/closer.lua?action=download&filename=", n_mirrors),

From 43a9dae109ef15f50bfeb68b9108b59ba5eadbd1 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 6 May 2021 18:34:05 -0400
Subject: [PATCH 199/719] ARROW-12668: [C++][Dataset] Fix segfault in CountRows

The code was obviously incorrect before. This change avoids 1) capturing a variable by reference and 2) mutating a non-thread-safe variable concurrently.

Closes #10259 from lidavidm/arrow-12668

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/scanner.cc      | 21 ++++++++++-------
 cpp/src/arrow/dataset/scanner_test.cc | 34 +++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index a2bc2146984..0d481a83748 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -1047,26 +1047,29 @@ Result<int64_t> SyncScanner::CountRows() {
   // indeed, the Parquet reader does this), counting rows using that optimization is
   // still slower than just hitting metadata directly where possible.
   ARROW_ASSIGN_OR_RAISE(auto fragment_it, GetFragments());
-  std::vector<Future<int64_t>> futures;
-  FragmentVector fragments;
+  // Fragment is non-null iff fast path could not be taken.
+  std::vector<Future<std::pair<int64_t, std::shared_ptr<Fragment>>>> futures;
   for (auto maybe_fragment : fragment_it) {
     ARROW_ASSIGN_OR_RAISE(auto fragment, maybe_fragment);
     auto count_fut = fragment->CountRows(scan_options_->filter, scan_options_);
-    // Take fragments by reference since future must complete before method returns
     futures.push_back(
-        count_fut.Then([&fragments, fragment](util::optional<int64_t> count) -> int64_t {
+        count_fut.Then([fragment](const util::optional<int64_t>& count)
+                           -> std::pair<int64_t, std::shared_ptr<Fragment>> {
           if (count.has_value()) {
-            return *count;
+            return std::make_pair(*count, nullptr);
           }
-          fragments.push_back(fragment);
-          return 0;
+          return std::make_pair(0, std::move(fragment));
         }));
   }
 
   int64_t count = 0;
+  FragmentVector fragments;
   for (auto& future : futures) {
-    ARROW_ASSIGN_OR_RAISE(auto subcount, future.result());
-    count += subcount;
+    ARROW_ASSIGN_OR_RAISE(auto count_result, future.result());
+    count += count_result.first;
+    if (count_result.second) {
+      fragments.push_back(std::move(count_result.second));
+    }
   }
   // Now check for any fragments where we couldn't take the fast path
   if (!fragments.empty()) {
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 9b5f49005b1..cb90a8cbd45 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -403,6 +403,40 @@ TEST_P(TestScanner, CountRowsEmpty) {
   ASSERT_OK_AND_EQ(batch->num_rows(), scanner->CountRows());
 }
 
+// Regression test for ARROW-12668: ensure failures are properly handled
+class CountFailFragment : public InMemoryFragment {
+ public:
+  explicit CountFailFragment(RecordBatchVector record_batches)
+      : InMemoryFragment(std::move(record_batches)),
+        count(Future<util::optional<int64_t>>::Make()) {}
+
+  Future<util::optional<int64_t>> CountRows(compute::Expression,
+                                            std::shared_ptr<ScanOptions>) override {
+    return count;
+  }
+
+  Future<util::optional<int64_t>> count;
+};
+TEST_P(TestScanner, CountRowsFailure) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  RecordBatchVector batches = {batch};
+  auto fragment1 = std::make_shared<CountFailFragment>(batches);
+  auto fragment2 = std::make_shared<CountFailFragment>(batches);
+  ScannerBuilder builder(
+      std::make_shared<FragmentDataset>(schema_, FragmentVector{fragment1, fragment2}),
+      options_);
+  ASSERT_OK(builder.UseAsync(GetParam().use_async));
+  ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+  fragment1->count.MarkFinished(Status::Invalid(""));
+  // Should immediately stop the count
+  ASSERT_RAISES(Invalid, scanner->CountRows());
+  // Fragment 2 doesn't complete until after the count stops - should not break anything
+  // under ASan, etc.
+  fragment2->count.MarkFinished(util::nullopt);
+}
+
 TEST_P(TestScanner, CountRowsWithMetadata) {
   SetSchema({field("i32", int32()), field("f64", float64())});
   auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);

From 60cfcf860e786859f822f206f660a27aae78125a Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 7 May 2021 01:38:36 +0000
Subject: [PATCH 200/719] ARROW-12648: [C++][FlightRPC] Enable TLS for Flight
 benchmark

This allows you to enable TLS with a self-signed certificate in the Flight benchmark. Quick testing shows about a 20% performance impact.

Closes #10242 from lidavidm/arrow-12648

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/flight/flight_benchmark.cc | 52 +++++++++++++++++-------
 cpp/src/arrow/flight/perf_server.cc      | 48 ++++++++++++++++++----
 cpp/src/arrow/flight/test_util.cc        | 12 +++---
 cpp/src/arrow/flight/test_util.h         |  3 +-
 4 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index 288f03c1055..1b5f27d3121 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -62,6 +62,8 @@ DEFINE_string(compression, "",
 DEFINE_string(
     data_file, "",
     "Instead of random data, use data from the given IPC file. Only affects -test_put.");
+DEFINE_string(cert_file, "", "Path to TLS certificate");
+DEFINE_string(key_file, "", "Path to TLS private key (used when spawning a server)");
 
 namespace perf = arrow::flight::perf;
 
@@ -258,8 +260,9 @@ arrow::Result<PerformanceResult> RunDoPutTest(FlightClient* client,
   return PerformanceResult{static_cast<int64_t>(batches.size()), num_records, num_bytes};
 }
 
-Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_options,
-                       bool test_put, PerformanceStats* stats) {
+Status DoSinglePerfRun(FlightClient* client, const FlightClientOptions client_options,
+                       const FlightCallOptions& call_options, bool test_put,
+                       PerformanceStats* stats) {
   // schema not needed
   perf::Perf perf;
   perf.set_stream_count(FLAGS_num_streams);
@@ -282,11 +285,11 @@ Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_optio
   int64_t start_total_records = stats->total_records;
 
   auto test_loop = test_put ? &RunDoPutTest : &RunDoGetTest;
-  auto ConsumeStream = [&stats, &test_loop,
+  auto ConsumeStream = [&stats, &test_loop, &client_options,
                         &call_options](const FlightEndpoint& endpoint) {
-    // TODO(wesm): Use location from endpoint, same host/port for now
     std::unique_ptr<FlightClient> client;
-    RETURN_NOT_OK(FlightClient::Connect(endpoint.locations.front(), &client));
+    RETURN_NOT_OK(
+        FlightClient::Connect(endpoint.locations.front(), client_options, &client));
 
     perf::Token token;
     token.ParseFromString(endpoint.ticket.ticket);
@@ -326,14 +329,15 @@ Status DoSinglePerfRun(FlightClient* client, const FlightCallOptions& call_optio
   return Status::OK();
 }
 
-Status RunPerformanceTest(FlightClient* client, const FlightCallOptions& call_options,
-                          bool test_put) {
+Status RunPerformanceTest(FlightClient* client, const FlightClientOptions& client_options,
+                          const FlightCallOptions& call_options, bool test_put) {
   StopWatch timer;
   timer.Start();
 
   PerformanceStats stats;
   for (int i = 0; i < FLAGS_num_perf_runs; ++i) {
-    RETURN_NOT_OK(DoSinglePerfRun(client, call_options, test_put, &stats));
+    RETURN_NOT_OK(
+        DoSinglePerfRun(client, client_options, call_options, test_put, &stats));
   }
 
   // Elapsed time in seconds
@@ -422,6 +426,7 @@ int main(int argc, char** argv) {
 
   std::unique_ptr<arrow::flight::TestServer> server;
   arrow::flight::Location location;
+  auto options = arrow::flight::FlightClientOptions::Defaults();
   if (FLAGS_test_unix || !FLAGS_server_unix.empty()) {
     if (FLAGS_server_unix == "") {
       FLAGS_server_unix = "/tmp/flight-bench-spawn.sock";
@@ -440,22 +445,41 @@ int main(int argc, char** argv) {
       std::cout << "Using spawned TCP server" << std::endl;
       server.reset(
           new arrow::flight::TestServer("arrow-flight-perf-server", FLAGS_server_port));
-      server->Start();
+      std::vector<std::string> args;
+      if (!FLAGS_cert_file.empty() || !FLAGS_key_file.empty()) {
+        if (!FLAGS_cert_file.empty() && !FLAGS_key_file.empty()) {
+          std::cout << "Enabling TLS for spawned server" << std::endl;
+          args.push_back("-cert_file");
+          args.push_back(FLAGS_cert_file);
+          args.push_back("-key_file");
+          args.push_back(FLAGS_key_file);
+        } else {
+          std::cerr << "If providing TLS cert/key, must provide both" << std::endl;
+          return 1;
+        }
+      }
+      server->Start(args);
     } else {
       std::cout << "Using standalone TCP server" << std::endl;
     }
     std::cout << "Server host: " << FLAGS_server_host << std::endl
               << "Server port: " << FLAGS_server_port << std::endl;
-    ABORT_NOT_OK(arrow::flight::Location::ForGrpcTcp(FLAGS_server_host, FLAGS_server_port,
-                                                     &location));
+    if (FLAGS_cert_file.empty()) {
+      ABORT_NOT_OK(arrow::flight::Location::ForGrpcTcp(FLAGS_server_host,
+                                                       FLAGS_server_port, &location));
+    } else {
+      ABORT_NOT_OK(arrow::flight::Location::ForGrpcTls(FLAGS_server_host,
+                                                       FLAGS_server_port, &location));
+      options.disable_server_verification = true;
+    }
   }
 
   std::unique_ptr<arrow::flight::FlightClient> client;
-  ABORT_NOT_OK(arrow::flight::FlightClient::Connect(location, &client));
+  ABORT_NOT_OK(arrow::flight::FlightClient::Connect(location, options, &client));
   ABORT_NOT_OK(arrow::flight::WaitForReady(client.get(), call_options));
 
-  arrow::Status s =
-      arrow::flight::RunPerformanceTest(client.get(), call_options, FLAGS_test_put);
+  arrow::Status s = arrow::flight::RunPerformanceTest(client.get(), options, call_options,
+                                                      FLAGS_test_put);
 
   if (server) {
     server->Stop();
diff --git a/cpp/src/arrow/flight/perf_server.cc b/cpp/src/arrow/flight/perf_server.cc
index b9814035b3b..7efd034ad25 100644
--- a/cpp/src/arrow/flight/perf_server.cc
+++ b/cpp/src/arrow/flight/perf_server.cc
@@ -19,6 +19,7 @@
 
 #include <signal.h>
 #include <cstdint>
+#include <fstream>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -41,6 +42,8 @@
 DEFINE_string(server_host, "localhost", "Host where the server is running on");
 DEFINE_int32(port, 31337, "Server port to listen on");
 DEFINE_string(server_unix, "", "Unix socket path where the server is running on");
+DEFINE_string(cert_file, "", "Path to TLS certificate");
+DEFINE_string(key_file, "", "Path to TLS private key");
 
 namespace perf = arrow::flight::perf;
 namespace proto = arrow::flight::protocol;
@@ -142,15 +145,12 @@ Status GetPerfBatches(const perf::Token& token, const std::shared_ptr<Schema>& s
 class FlightPerfServer : public FlightServerBase {
  public:
   FlightPerfServer() : location_() {
-    if (FLAGS_server_unix.empty()) {
-      DCHECK_OK(Location::ForGrpcTcp(FLAGS_server_host, FLAGS_port, &location_));
-    } else {
-      DCHECK_OK(Location::ForGrpcUnix(FLAGS_server_unix, &location_));
-    }
     perf_schema_ = schema({field("a", int64()), field("b", int64()), field("c", int64()),
                            field("d", int64())});
   }
 
+  void SetLocation(Location location) { location_ = location; }
+
   Status GetFlightInfo(const ServerCallContext& context, const FlightDescriptor& request,
                        std::unique_ptr<FlightInfo>* info) override {
     perf::Perf perf_request;
@@ -233,13 +233,42 @@ int main(int argc, char** argv) {
 
   g_server.reset(new arrow::flight::FlightPerfServer);
 
-  arrow::flight::Location location;
+  arrow::flight::Location bind_location;
+  arrow::flight::Location connect_location;
   if (FLAGS_server_unix.empty()) {
-    ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTcp("0.0.0.0", FLAGS_port, &location));
+    if (!FLAGS_cert_file.empty() || !FLAGS_key_file.empty()) {
+      if (!FLAGS_cert_file.empty() && !FLAGS_key_file.empty()) {
+        ARROW_CHECK_OK(
+            arrow::flight::Location::ForGrpcTls("0.0.0.0", FLAGS_port, &bind_location));
+        ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTls(FLAGS_server_host, FLAGS_port,
+                                                           &connect_location));
+      } else {
+        std::cerr << "If providing TLS cert/key, must provide both" << std::endl;
+        return 1;
+      }
+    } else {
+      ARROW_CHECK_OK(
+          arrow::flight::Location::ForGrpcTcp("0.0.0.0", FLAGS_port, &bind_location));
+      ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTcp(FLAGS_server_host, FLAGS_port,
+                                                         &connect_location));
+    }
   } else {
-    ARROW_CHECK_OK(arrow::flight::Location::ForGrpcUnix(FLAGS_server_unix, &location));
+    ARROW_CHECK_OK(
+        arrow::flight::Location::ForGrpcUnix(FLAGS_server_unix, &bind_location));
+    ARROW_CHECK_OK(
+        arrow::flight::Location::ForGrpcUnix(FLAGS_server_unix, &connect_location));
+  }
+  arrow::flight::FlightServerOptions options(bind_location);
+  if (!FLAGS_cert_file.empty() && !FLAGS_key_file.empty()) {
+    std::cout << "Enabling TLS" << std::endl;
+    std::ifstream cert_file(FLAGS_cert_file);
+    std::string cert((std::istreambuf_iterator<char>(cert_file)),
+                     (std::istreambuf_iterator<char>()));
+    std::ifstream key_file(FLAGS_key_file);
+    std::string key((std::istreambuf_iterator<char>(key_file)),
+                    (std::istreambuf_iterator<char>()));
+    options.tls_certificates.push_back(arrow::flight::CertKeyPair{cert, key});
   }
-  arrow::flight::FlightServerOptions options(location);
 
   ARROW_CHECK_OK(g_server->Init(options));
   // Exit with a clean error code (0) on SIGTERM
@@ -250,6 +279,7 @@ int main(int argc, char** argv) {
   } else {
     std::cout << "Server unix socket: " << FLAGS_server_unix << std::endl;
   }
+  g_server->SetLocation(connect_location);
   ARROW_CHECK_OK(g_server->Serve());
   return 0;
 }
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 51267c5c0c1..6136b4367c0 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -86,7 +86,7 @@ Status ResolveCurrentExecutable(fs::path* out) {
 
 }  // namespace
 
-void TestServer::Start() {
+void TestServer::Start(const std::vector<std::string>& extra_args) {
   namespace fs = boost::filesystem;
 
   std::string str_port = std::to_string(port_);
@@ -104,11 +104,13 @@ void TestServer::Start() {
 
   try {
     if (unix_sock_.empty()) {
-      server_process_ = std::make_shared<bp::child>(
-          bp::search_path(executable_name_, search_path), "-port", str_port);
+      server_process_ =
+          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
+                                      "-port", str_port, bp::args(extra_args));
     } else {
-      server_process_ = std::make_shared<bp::child>(
-          bp::search_path(executable_name_, search_path), "-server_unix", unix_sock_);
+      server_process_ =
+          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
+                                      "-server_unix", unix_sock_, bp::args(extra_args));
     }
   } catch (...) {
     std::stringstream ss;
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index 21c48eabf7e..fff82df3d2c 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -56,7 +56,8 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start();
+  void Start(const std::vector<std::string>& extra_args);
+  void Start() { Start({}); }
 
   int Stop();
 

From 979a08039fa85e8f32da30d2c2b3e2ab9dca78eb Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Fri, 7 May 2021 11:41:39 +0200
Subject: [PATCH 201/719] ARROW-12649: [Python/Packaging] Move conda-aarch64 to
 Azure with cross-compilation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10243 from xhochy/ARROW-12649

Lead-authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Uwe L. Korn <uwe.korn@quantco.com>
---
 ...rch64_numpy1.17python3.6.____cpython.yaml} | 30 ++++++-------
 ...rch64_numpy1.17python3.7.____cpython.yaml} | 30 ++++++-------
 ...rch64_numpy1.17python3.8.____cpython.yaml} | 30 ++++++-------
 ...rch64_numpy1.19python3.9.____cpython.yaml} | 28 ++++++------
 dev/tasks/conda-recipes/drone-steps.sh        | 33 --------------
 dev/tasks/conda-recipes/drone.yml             | 43 -------------------
 dev/tasks/tasks.yml                           |  6 +--
 7 files changed, 58 insertions(+), 142 deletions(-)
 rename dev/tasks/conda-recipes/.ci_support/{linux_aarch64_python3.6.____cpython.yaml => linux_aarch64_numpy1.17python3.6.____cpython.yaml} (78%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_aarch64_python3.7.____cpython.yaml => linux_aarch64_numpy1.17python3.7.____cpython.yaml} (78%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_aarch64_python3.8.____cpython.yaml => linux_aarch64_numpy1.17python3.8.____cpython.yaml} (78%)
 rename dev/tasks/conda-recipes/.ci_support/{linux_aarch64_python3.9.____cpython.yaml => linux_aarch64_numpy1.19python3.9.____cpython.yaml} (79%)
 delete mode 100755 dev/tasks/conda-recipes/drone-steps.sh
 delete mode 100644 dev/tasks/conda-recipes/drone.yml

diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
similarity index 78%
rename from dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.6.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
index f2d3ceaac68..34a64ceb979 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
@@ -1,15 +1,13 @@
 BUILD:
 - aarch64-conda_cos7-linux-gnu
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
 cdt_arch:
 - aarch64
 cdt_name:
@@ -23,26 +21,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-aarch64
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.37'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -55,16 +51,18 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-aarch64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
similarity index 78%
rename from dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.7.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
index 611c39c907c..052c58122a6 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
@@ -1,15 +1,13 @@
 BUILD:
 - aarch64-conda_cos7-linux-gnu
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
 cdt_arch:
 - aarch64
 cdt_name:
@@ -23,26 +21,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-aarch64
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.37'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -55,16 +51,18 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-aarch64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
similarity index 78%
rename from dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.8.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
index 2f0fc0e2306..a61e0c42b55 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
@@ -1,15 +1,13 @@
 BUILD:
 - aarch64-conda_cos7-linux-gnu
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
 cdt_arch:
 - aarch64
 cdt_name:
@@ -23,26 +21,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-aarch64
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.37'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
-- '1.16'
+- '1.17'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -55,16 +51,18 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-aarch64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
similarity index 79%
rename from dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.9.____cpython.yaml
rename to dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
index 2ec87205a0f..0eccab25e47 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
@@ -1,15 +1,13 @@
 BUILD:
 - aarch64-conda_cos7-linux-gnu
 aws_sdk_cpp:
-- 1.8.63
-boost_cpp:
-- 1.74.0
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
 cdt_arch:
 - aarch64
 cdt_name:
@@ -23,26 +21,24 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-aarch64
+- quay.io/condaforge/linux-anvil-comp7
 gflags:
 - '2.2'
 glog:
 - 0.4.0
 grpc_cpp:
-- '1.32'
+- '1.37'
 libprotobuf:
-- '3.13'
+- '3.15'
 lz4_c:
-- 1.9.2
+- 1.9.3
 numpy:
 - '1.19'
 orc:
-- 1.6.5
+- 1.6.7
 pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
   bzip2:
     max_pin: x
   lz4-c:
@@ -55,16 +51,18 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2020.10.01
+- 2021.04.01
 snappy:
 - '1'
 target_platform:
 - linux-aarch64
+thrift_cpp:
+- 0.14.1
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-- - numpy
-  - python
+- - python
+  - numpy
 zlib:
 - '1.2'
 zstd:
diff --git a/dev/tasks/conda-recipes/drone-steps.sh b/dev/tasks/conda-recipes/drone-steps.sh
deleted file mode 100755
index 3c379b824db..00000000000
--- a/dev/tasks/conda-recipes/drone-steps.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-OUTPUT_DIR=$1
-QUEUE_REMOTE_URL=$2
-TASK_BRANCH=$3
-TASK_TAG=$4
-UPLOAD_TO_ANACONDA=$5
-
-conda install -y mamba
-$FEEDSTOCK_ROOT/build_steps.sh ${OUTPUT_DIR}
-
-# Upload as Github release
-mamba install -y anaconda-client shyaml pygit2 -c conda-forge
-
-pushd $DRONE_WORKSPACE/arrow
-
-pip install dev/archery[crossbow]
-
-archery crossbow \
-  --queue-path $DRONE_WORKSPACE \
-  --queue-remote ${QUEUE_REMOTE_URL} \
-  upload-artifacts \
-  --sha ${TASK_BRANCH} \
-  --tag ${TASK_TAG} \
-  --pattern "${OUTPUT_DIR}/linux-aarch64/*.tar.bz2"
-
-if [[ "${UPLOAD_TO_ANACONDA}" == "1" ]]; then
-  anaconda -t ${CROSSBOW_ANACONDA_TOKEN} upload --force build_artifacts/linux-aarch64/*.tar.bz2
-fi
-
-popd
diff --git a/dev/tasks/conda-recipes/drone.yml b/dev/tasks/conda-recipes/drone.yml
deleted file mode 100644
index a461c79b9d4..00000000000
--- a/dev/tasks/conda-recipes/drone.yml
+++ /dev/null
@@ -1,43 +0,0 @@
----
-kind: pipeline
-name: {{ config }}
-
-platform:
-  os: linux
-  arch: arm64
-
-# Omit double builds with crossbow
-trigger:
-  event:
-    - push
-
-steps:
-- name: Install and build
-  image: condaforge/linux-anvil-aarch64
-  environment:
-    CONFIG: {{ config }}
-    UPLOAD_PACKAGES: False
-    ARROW_VERSION: {{ arrow.no_rc_version }}
-    PLATFORM: linux-aarch64
-    BINSTAR_TOKEN:
-      from_secret: BINSTAR_TOKEN
-    FEEDSTOCK_TOKEN:
-      from_secret: FEEDSTOCK_TOKEN
-    STAGING_BINSTAR_TOKEN:
-      from_secret: STAGING_BINSTAR_TOKEN
-    CROSSBOW_GITHUB_TOKEN:
-      from_secret: CROSSBOW_GITHUB_TOKEN
-    CROSSBOW_ANACONDA_TOKEN:
-      from_secret: CROSSBOW_ANACONDA_TOKEN
-  commands:
-    - export RECIPE_ROOT="$FEEDSTOCK_ROOT/arrow-cpp"
-    - export CI=drone
-    - export GIT_BRANCH="{{ arrow.branch }}"
-    - export FEEDSTOCK_NAME=arrow-cpp
-    - export FEEDSTOCK_ROOT="$DRONE_WORKSPACE/arrow/dev/tasks/conda-recipes"
-    - sed -i '$ichown -R conda:conda "$FEEDSTOCK_ROOT"' /opt/docker/bin/entrypoint
-    - yum install -y git
-    - git clone --no-checkout {{ arrow.remote }} arrow
-    - pushd arrow && git fetch -t {{ arrow.remote }} {{ arrow.branch }} && git checkout FETCH_HEAD && git submodule update --init --recursive && popd
-    - mkdir -p $(pwd)/build_artifacts && chmod a+rwx $(pwd)/build_artifacts
-    - /opt/docker/bin/entrypoint $FEEDSTOCK_ROOT/drone-steps.sh $(pwd)/build_artifacts {{ queue_remote_url }} {{ task.branch }} {{ task.tag }} {% if arrow.branch == 'master' %}1{% else %}0{% endif %}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c3a2c5a1b6a..9fb5fbc3938 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -218,10 +218,10 @@ tasks:
       - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cuda.tar.bz2
 
   conda-linux-gcc-py{{ pyver }}-arm64:
-    ci: drone
-    template: conda-recipes/drone.yml
+    ci: azure
+    template: conda-recipes/azure.linux.yml
     params:
-      config: linux_aarch64_python{{ python_version }}.____cpython
+      config: linux_aarch64_numpy{{ numpy_version }}python{{ python_version }}.____cpython
     artifacts:
       - arrow-cpp-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py{{ pyver }}(h[a-z0-9]+)_0_cpu.tar.bz2

From 66cd3734b7318ff7dc4ff3025ec112744540255a Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Fri, 7 May 2021 11:31:59 -0400
Subject: [PATCH 202/719] ARROW-11515: [R] Bindings for strsplit

This PR adds bindings for both `strsplit()` and `stringr::str_split()` for dplyr

Closes #10190 from thisisnic/ARROW-11515-strsplit

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Ian Cook <ianmcook@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/compute.R                                 |   2 +
 r/R/dplyr.R                                   |  66 ++++-
 r/man/call_function.Rd                        |   4 +
 r/man/contains_regex.Rd                       |  18 ++
 r/man/get_stringr_pattern_options.Rd          |  22 ++
 r/src/compute.cpp                             |  27 +++
 .../testthat/test-dplyr-string-functions.R    | 225 +++++++++++++++---
 7 files changed, 326 insertions(+), 38 deletions(-)
 create mode 100644 r/man/contains_regex.Rd
 create mode 100644 r/man/get_stringr_pattern_options.Rd

diff --git a/r/R/compute.R b/r/R/compute.R
index 0641bf1615c..c3783ba3295 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -27,6 +27,8 @@
 #' `RecordBatch`, or `Table`.
 #' @param args list arguments as an alternative to specifying in `...`
 #' @param options named list of C++ function options.
+#' @details When passing indices in `...`, `args`, or `options`, express them as
+#' 0-based integers (consistent with C++).
 #' @return An `Array`, `ChunkedArray`, `Scalar`, `RecordBatch`, or `Table`, whatever the compute function results in.
 #' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for the functions and their respective options.
 #' @examples
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 21266d39b26..4e66c227bea 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -476,6 +476,8 @@ build_function_list <- function(FUN) {
     gsub = arrow_r_string_replace_function(FUN, -1L),
     str_replace = arrow_stringr_string_replace_function(FUN, 1L),
     str_replace_all = arrow_stringr_string_replace_function(FUN, -1L),
+    strsplit = arrow_r_string_split_function(FUN),
+    str_split = arrow_stringr_string_split_function(FUN),
     between = function(x, left, right) {
       x >= left & x <= right
     },
@@ -539,6 +541,44 @@ arrow_stringr_string_replace_function <- function(FUN, max_replacements) {
   }
 }
 
+arrow_r_string_split_function <- function(FUN, reverse = FALSE, max_splits = -1) {
+  function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) {
+    
+    assert_that(is.string(split))
+    
+    # if !fixed but no regex metachars in split pattern, allow to proceed as split isn't regex
+    if (!fixed && contains_regex(split)) {
+      stop("Regular expression matching not supported in strsplit for Arrow", call. = FALSE)
+    }
+    if (fixed && perl) {
+      warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
+    }
+    FUN("split_pattern", x, options = list(pattern = split, reverse = reverse, max_splits = max_splits))
+  }
+}
+
+arrow_stringr_string_split_function <- function(FUN, reverse = FALSE) {
+  function(string, pattern, n = Inf, simplify = FALSE) {
+    opts <- get_stringr_pattern_options(enexpr(pattern))
+    if (!opts$fixed && contains_regex(opts$pattern)) {
+      stop("Regular expression matching not supported in str_split() for Arrow", call. = FALSE)
+    }
+    if (opts$ignore_case) {
+      stop("Case-insensitive string splitting not supported in Arrow", call. = FALSE)
+    }
+    if (n == 0) {
+      stop("Splitting strings into zero parts not supported in Arrow" , call. = FALSE)
+    }
+    if (identical(n, Inf)) {
+      n <- 0L
+    }
+    if (simplify) {
+      warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
+    }
+    FUN("split_pattern", string, options = list(pattern = opts$pattern, reverse = reverse, max_splits = n - 1L))
+  }
+}
+
 # format `pattern` as needed for case insensitivity and literal matching by RE2
 format_string_pattern <- function(pattern, ignore.case, fixed) {
   # Arrow lacks native support for case-insensitive literal string matching and
@@ -571,9 +611,18 @@ format_string_replacement <- function(replacement, ignore.case, fixed) {
   replacement
 }
 
-# this function assigns definitions for the stringr pattern modifier functions
-# (fixed, regex, etc.) in itself, and uses them to evaluate the quoted
-# expression `pattern`
+#' Get `stringr` pattern options
+#'
+#' This function assigns definitions for the `stringr` pattern modifier
+#' functions (`fixed()`, `regex()`, etc.) inside itself, and uses them to
+#' evaluate the quoted expression `pattern`, returning a list that is used
+#' to control pattern matching behavior in internal `arrow` functions.
+#'
+#' @param pattern Unevaluated expression containing a call to a `stringr`
+#' pattern modifier function
+#'
+#' @return List containing elements `pattern`, `fixed`, and `ignore_case`
+#' @keywords internal
 get_stringr_pattern_options <- function(pattern) {
   fixed <- function(pattern, ignore_case = FALSE, ...) {
     check_dots(...)
@@ -605,7 +654,7 @@ get_stringr_pattern_options <- function(pattern) {
   }
   ensure_opts <- function(opts) {
     if (is.character(opts)) {
-      opts <- list(pattern = opts, fixed = TRUE, ignore_case = FALSE)
+      opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
     }
     opts
   }
@@ -1097,3 +1146,12 @@ not_implemented_for_dataset <- function(method) {
     call. = FALSE
   )
 }
+
+#' Does this string contain regex metacharacters?
+#' 
+#' @param string String to be tested
+#' @keywords internal
+#' @return Logical: does `string` contain regex metacharacters?
+contains_regex <- function(string) {
+  grepl("[.\\|()[{^$*+?]", string)
+}
diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd
index 4ab9fd7e942..e89fd00576e 100644
--- a/r/man/call_function.Rd
+++ b/r/man/call_function.Rd
@@ -31,6 +31,10 @@ Many Arrow compute functions are mapped to R methods,
 and in a \code{dplyr} evaluation context, \link[=list_compute_functions]{all Arrow functions}
 are callable with an \code{arrow_} prefix.
 }
+\details{
+When passing indices in \code{...}, \code{args}, or \code{options}, express them as
+0-based integers (consistent with C++).
+}
 \examples{
 \donttest{
 a <- Array$create(c(1L, 2L, 3L, NA, 5L))
diff --git a/r/man/contains_regex.Rd b/r/man/contains_regex.Rd
new file mode 100644
index 00000000000..d8fee96d99b
--- /dev/null
+++ b/r/man/contains_regex.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr.R
+\name{contains_regex}
+\alias{contains_regex}
+\title{Does this string contain regex metacharacters?}
+\usage{
+contains_regex(string)
+}
+\arguments{
+\item{string}{String to be tested}
+}
+\value{
+Logical: does \code{string} contain regex metacharacters?
+}
+\description{
+Does this string contain regex metacharacters?
+}
+\keyword{internal}
diff --git a/r/man/get_stringr_pattern_options.Rd b/r/man/get_stringr_pattern_options.Rd
new file mode 100644
index 00000000000..79a9a72b7cf
--- /dev/null
+++ b/r/man/get_stringr_pattern_options.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr.R
+\name{get_stringr_pattern_options}
+\alias{get_stringr_pattern_options}
+\title{Get \code{stringr} pattern options}
+\usage{
+get_stringr_pattern_options(pattern)
+}
+\arguments{
+\item{pattern}{Unevaluated expression containing a call to a \code{stringr}
+pattern modifier function}
+}
+\value{
+List containing elements \code{pattern}, \code{fixed}, and \code{ignore_case}
+}
+\description{
+This function assigns definitions for the \code{stringr} pattern modifier
+functions (\code{fixed()}, \code{regex()}, etc.) inside itself, and uses them to
+evaluate the quoted expression \code{pattern}, returning a list that is used
+to control pattern matching behavior in internal \code{arrow} functions.
+}
+\keyword{internal}
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index c215d661e3a..0ffe53578c4 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -233,6 +233,33 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_replacements);
   }
 
+  if (func_name == "split_pattern") {
+    using Options = arrow::compute::SplitPatternOptions;
+    int64_t max_splits = -1;
+    if (!Rf_isNull(options["max_splits"])) {
+      max_splits = cpp11::as_cpp<int64_t>(options["max_splits"]);
+    }
+    bool reverse = false;
+    if (!Rf_isNull(options["reverse"])) {
+      reverse = cpp11::as_cpp<bool>(options["reverse"]);
+    }
+    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
+                                     max_splits, reverse);
+  }
+
+  if (func_name == "utf8_split_whitespace" || func_name == "ascii_split_whitespace") {
+    using Options = arrow::compute::SplitOptions;
+    int64_t max_splits = -1;
+    if (!Rf_isNull(options["max_splits"])) {
+      max_splits = cpp11::as_cpp<int64_t>(options["max_splits"]);
+    }
+    bool reverse = false;
+    if (!Rf_isNull(options["reverse"])) {
+      reverse = cpp11::as_cpp<bool>(options["reverse"]);
+    }
+    return std::make_shared<Options>(max_splits, reverse);
+  }
+
   if (func_name == "variance" || func_name == "stddev") {
     using Options = arrow::compute::VarianceOptions;
     return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["ddof"]));
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 5faf2436f55..64351a83ea7 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -52,7 +52,7 @@ skip_if_not_available("re2")
 test_that("grepl", {
   df <- tibble(x = c("Foo", "bar"))
 
-  for(fixed in c(TRUE, FALSE)) {
+  for (fixed in c(TRUE, FALSE)) {
 
     expect_dplyr_equal(
       input %>%
@@ -150,7 +150,7 @@ test_that("str_detect", {
 test_that("sub and gsub", {
   df <- tibble(x = c("Foo", "bar"))
 
-  for(fixed in c(TRUE, FALSE)) {
+  for (fixed in c(TRUE, FALSE)) {
 
     expect_dplyr_equal(
       input %>%
@@ -206,12 +206,27 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
 test_that("str_replace and str_replace_all", {
   df <- tibble(x = c("Foo", "bar"))
 
+  expect_dplyr_equal(
+    input %>%
+      transmute(x = str_replace_all(x, "^F", "baz")) %>%
+      collect(),
+    df
+  )
+  
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
       collect(),
     df
   )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
+      collect(),
+    df
+  )
+  
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
@@ -239,7 +254,172 @@ test_that("str_replace and str_replace_all", {
 
 })
 
-test_that("backreferences in pattern", {
+test_that("strsplit and str_split", {
+
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, "and")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "and")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "and", n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, fixed("and"), n = 2)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, regex("and"), n = 2)) %>%
+      collect(),
+    df
+  )
+
+})
+
+test_that("arrow_*_split_whitespace functions", {
+
+  # use only ASCII whitespace characters
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+
+  # use only non-ASCII whitespace characters
+  df_utf8 <- tibble(x = c("Foo\u00A0and\u2000bar", "baz\u2006and\u1680qux\u3000and\u2008quux"))
+
+  df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux")))
+
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_split_whitespace(x)) %>%
+      collect(),
+    df_split
+  )
+  expect_equivalent(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(x = arrow_utf8_split_whitespace(x)) %>%
+      collect(),
+    df_split
+  )
+
+})
+
+test_that("errors and warnings in string splitting", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  # These conditions generate an error, but abandon_ship() catches the error,
+  # issues a warning, and pulls the data into R
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = strsplit(x, "and.*", fixed = FALSE)) %>%
+      collect(),
+    regexp = "not supported"
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, "and.?")) %>%
+      collect()
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, regex("and.?"), n = 2)) %>%
+      collect(),
+    regexp = "not supported"
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, fixed("and", ignore_case = TRUE))) %>%
+      collect(),
+    "not supported"
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, coll("and.?"))) %>%
+      collect(),
+    regexp = "not supported"
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, boundary(type = "word"))) %>%
+      collect(),
+    regexp = "not supported"
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, "and", n = 0)) %>%
+      collect(),
+    regexp = "not supported"
+  )
+
+  # This condition generates a warning
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_split(x, fixed("and"), simplify = TRUE)) %>%
+      collect(),
+    "ignored"
+  )
+
+})
+
+test_that("errors and warnings in string detection and replacement", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  # These conditions generate an error, but abandon_ship() catches the error,
+  # issues a warning, and pulls the data into R
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      filter(str_detect(x, boundary(type = "character"))) %>%
+      collect(),
+    regexp = "not implemented"
+  )
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
+      collect(),
+    regexp = "not supported"
+  )
+
+  # This condition generates a warning
+  expect_warning(
+    df %>%
+      Table$create() %>%
+      transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
+    "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
+  )
+
+})
+
+test_that("backreferences in pattern in string detection", {
   skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)")
   df <- tibble(x = c("Foo", "bar"))
 
@@ -251,7 +431,7 @@ test_that("backreferences in pattern", {
   )
 })
 
-test_that("backreferences (substitutions) in replacement", {
+test_that("backreferences (substitutions) in string replacement", {
   df <- tibble(x = c("Foo", "bar"))
 
   expect_dplyr_equal(
@@ -265,6 +445,12 @@ test_that("backreferences (substitutions) in replacement", {
       collect(),
     tibble(url = "https://arrow.apache.org/docs/r/")
   )
+  expect_dplyr_equal(
+    input %>%
+      transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>%
+      collect(),
+    df
+  )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
@@ -279,7 +465,7 @@ test_that("backreferences (substitutions) in replacement", {
   )
 })
 
-test_that("edge cases", {
+test_that("edge cases in string detection and replacement", {
 
   # in case-insensitive fixed match/replace, test that "\\E" in the search
   # string and backslashes in the replacement string are interpreted literally.
@@ -316,32 +502,3 @@ test_that("edge cases", {
   )
 
 })
-
-test_that("errors and warnings", {
-  df <- tibble(x = c("Foo", "bar"))
-
-  # These conditions generate an error, but abandon_ship() catches the error,
-  # issues a warning, and pulls the data into R
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      filter(str_detect(x, boundary(type = "character"))) %>%
-      collect(),
-    "not implemented"
-  )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
-      collect(),
-    "not supported"
-  )
-
-  # This condition generates a warning
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
-    "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
-  )
-})

From 4e0f0cf79cf836a29e4bfd4a7b2d692f8b50bffe Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Fri, 7 May 2021 11:33:21 -0400
Subject: [PATCH 203/719] ARROW-11769: [R] Pull groups from grouped_df into
 RecordBatch or Table

Closes #10261 from thisisnic/ARROW-11769_grouped_recordbatch

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/record-batch.R                  |  9 ++++++++-
 r/R/table.R                         |  7 +++++++
 r/tests/testthat/test-RecordBatch.R | 19 ++++++++++++++++++-
 r/tests/testthat/test-Table.R       | 19 ++++++++++++++++++-
 4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index db5c6177854..2ad3408d706 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -148,13 +148,20 @@ RecordBatch$create <- function(..., schema = NULL) {
   if (length(arrays) == 1 && inherits(arrays[[1]], c("raw", "Buffer", "InputStream", "Message"))) {
     return(RecordBatch$from_message(arrays[[1]], schema))
   }
-  # Else, list of arrays
+  
+  # Else, a list of arrays or data.frames
   # making sure there are always names
   if (is.null(names(arrays))) {
     names(arrays) <- rep_len("", length(arrays))
   }
   stopifnot(length(arrays) > 0)
 
+  # Preserve any grouping
+  if (length(arrays) == 1 && inherits(arrays[[1]], "grouped_df")) {
+    out <- RecordBatch__from_arrays(schema, arrays)
+    return(dplyr::group_by(out, !!!dplyr::groups(arrays[[1]])))
+  }
+  
   # TODO: should this also assert that they're all Arrays?
   RecordBatch__from_arrays(schema, arrays)
 }
diff --git a/r/R/table.R b/r/R/table.R
index fdf3f5cc20d..2c432ac8983 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -168,6 +168,13 @@ Table$create <- function(..., schema = NULL) {
     names(dots) <- rep_len("", length(dots))
   }
   stopifnot(length(dots) > 0)
+  
+  # Preserve any grouping
+  if (length(dots) == 1 && inherits(dots[[1]], "grouped_df")) {
+    out <- Table__from_dots(dots, schema)
+    return(dplyr::group_by(out, !!!dplyr::groups(dots[[1]])))
+  }
+  
   if (all_record_batches(dots)) {
     Table__from_record_batches(dots, schema)
   } else {
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index c3797914741..d60ed4fbaba 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("RecordBatch")
 
 test_that("RecordBatch", {
   # Note that we're reusing `tbl` and `batch` throughout the tests in this file
@@ -499,3 +498,21 @@ test_that("Handling string data with embedded nuls", {
     )
   })
 })
+
+test_that("ARROW-11769 - grouping preserved in record batch creation", {
+  
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+  
+  expect_identical(
+    tbl %>%
+      dplyr::group_by(fct, fct2) %>%
+      record_batch() %>%
+      dplyr::group_vars(),
+    c("fct", "fct2")
+  )
+  
+})
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 3788d416426..b88b1ba65e3 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-context("Table")
 
 test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
   tbl <- tibble::tibble(
@@ -475,3 +474,21 @@ test_that("Table$create() with different length columns", {
   expect_error(Table$create(a=1:5, b = 42), msg)
   expect_error(Table$create(a=1:5, b = 1:6), msg)
 })
+
+test_that("ARROW-11769 - grouping preserved in table creation", {
+  
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+  
+  expect_identical(
+    tbl %>%
+      dplyr::group_by(fct, fct2) %>%
+      Table$create() %>%
+      dplyr::group_vars(),
+    c("fct", "fct2")
+  )
+  
+})

From e2e7732c50f6d2d51b4232438a78e1cfb269eab4 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Fri, 7 May 2021 20:01:49 -0400
Subject: [PATCH 204/719] ARROW-12692: [R] Improve tests and comments for
 strsplit() bindings

This resolves a few outstanding comments raised in https://github.com/apache/arrow/pull/10190/

Closes #10271 from ianmcook/ARROW-12692

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr.R                                   | 24 ++++++++++++++---
 .../testthat/test-dplyr-string-functions.R    | 27 ++++++++++++++++---
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 4e66c227bea..264c4929f72 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -543,16 +543,28 @@ arrow_stringr_string_replace_function <- function(FUN, max_replacements) {
 
 arrow_r_string_split_function <- function(FUN, reverse = FALSE, max_splits = -1) {
   function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) {
-    
+
     assert_that(is.string(split))
-    
-    # if !fixed but no regex metachars in split pattern, allow to proceed as split isn't regex
+
+    # The Arrow C++ library does not support splitting a string by a regular
+    # expression pattern (ARROW-12608) but the default behavior of
+    # base::strsplit() is to interpret the split pattern as a regex
+    # (fixed = FALSE). R users commonly pass non-regex split patterns to
+    # strsplit() without bothering to set fixed = TRUE. It would be annoying if
+    # that didn't work here. So: if fixed = FALSE, let's check the split pattern
+    # to see if it is a regex (if it contains any regex metacharacters). If not,
+    # then allow to proceed.
     if (!fixed && contains_regex(split)) {
       stop("Regular expression matching not supported in strsplit for Arrow", call. = FALSE)
     }
+    # warn when the user specifies both fixed = TRUE and perl = TRUE, for
+    # consistency with the behavior of base::strsplit()
     if (fixed && perl) {
       warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
     }
+    # since split is not a regex, proceed without any warnings or errors
+    # regardless of the value of perl, for consistency with the behavior of
+    # base::strsplit()
     FUN("split_pattern", x, options = list(pattern = split, reverse = reverse, max_splits = max_splits))
   }
 }
@@ -575,6 +587,10 @@ arrow_stringr_string_split_function <- function(FUN, reverse = FALSE) {
     if (simplify) {
       warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
     }
+    # The max_splits option in the Arrow C++ library controls the maximum number
+    # of places at which the string is split, whereas the argument n to
+    # str_split() controls the maximum number of pieces to return. So we must
+    # subtract 1 from n to get max_splits.
     FUN("split_pattern", string, options = list(pattern = opts$pattern, reverse = reverse, max_splits = n - 1L))
   }
 }
@@ -1148,7 +1164,7 @@ not_implemented_for_dataset <- function(method) {
 }
 
 #' Does this string contain regex metacharacters?
-#' 
+#'
 #' @param string String to be tested
 #' @keywords internal
 #' @return Logical: does `string` contain regex metacharacters?
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 64351a83ea7..d7df83cc7a6 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -212,21 +212,21 @@ test_that("str_replace and str_replace_all", {
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
@@ -307,6 +307,7 @@ test_that("arrow_*_split_whitespace functions", {
 
   df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux")))
 
+  # use default option values
   expect_equivalent(
     df_ascii %>%
       Table$create() %>%
@@ -322,6 +323,26 @@ test_that("arrow_*_split_whitespace functions", {
     df_split
   )
 
+  # specify non-default option values
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(
+        x = arrow_ascii_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
+      ) %>%
+      collect(),
+    tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux")))
+  )
+  expect_equivalent(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(
+        x = arrow_utf8_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
+      ) %>%
+      collect(),
+    tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux")))
+  )
+
 })
 
 test_that("errors and warnings in string splitting", {

From 050f72c84f0ced88ad3a246162b76c4dbd8afcc4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 8 May 2021 19:01:07 +0800
Subject: [PATCH 205/719] ARROW-12672: [C++] Fix fill_null kernel to set
 null_count + cast kernel to handle no-bitmap with unknown null_count case

* The "fill_null" kernel was returning a slightly malformed ArrayData with no validity bitmap but unknown null_count. The null_count is now set to 0 if the fill_value was valid.
* The "cast" kernel is made robust to handle such ArrayData with unknown null_count but no validity bitmap

Closes #10263 from jorisvandenbossche/ARROW-12672

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/exec.cc                 |  2 +-
 .../arrow/compute/kernels/scalar_cast_test.cc | 14 +++++++++++
 .../arrow/compute/kernels/scalar_fill_null.cc |  2 ++
 .../compute/kernels/scalar_fill_null_test.cc  | 25 +++++++++++++++----
 python/pyarrow/tests/test_compute.py          |  7 ++++++
 5 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 6df845f0f44..0b1f6b5658e 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -269,7 +269,7 @@ struct NullGeneralization {
 
     // Do not count the bits if they haven't been counted already
     const int64_t known_null_count = arr.null_count.load();
-    if (known_null_count == 0) {
+    if ((known_null_count == 0) || (arr.buffers[0] == NULLPTR)) {
       return ALL_VALID;
     }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index e9618fa5c5d..ef22fa8cb72 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1769,6 +1769,20 @@ TEST(Cast, EmptyCasts) {
   }
 }
 
+TEST(Cast, CastWithNoValidityBitmapButUnknownNullCount) {
+  // ARROW-12672 segfault when casting slightly malformed array
+  // (no validity bitmap but atomic null count non-zero)
+  auto values = ArrayFromJSON(boolean(), "[true, true, false]");
+
+  ASSERT_OK_AND_ASSIGN(auto expected, Cast(*values, int8()));
+
+  ASSERT_EQ(values->data()->buffers[0], NULLPTR);
+  values->data()->null_count = kUnknownNullCount;
+  ASSERT_OK_AND_ASSIGN(auto result, Cast(*values, int8()));
+
+  AssertArraysEqual(*expected, *result);
+}
+
 // ----------------------------------------------------------------------
 // Test casting from NullType
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
index f52e0045470..85af0e5b1cd 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
@@ -80,6 +80,7 @@ struct FillNullFunctor<Type, enable_if_t<is_number_type<Type>::value>> {
         in_values += block.length;
       }
       output->buffers[1] = out_buf;
+      output->null_count = 0;
     } else {
       *output = data;
     }
@@ -131,6 +132,7 @@ struct FillNullFunctor<Type, enable_if_t<is_boolean_type<Type>::value>> {
         out_offset += block.length;
       }
       output->buffers[1] = out_buf;
+      output->null_count = 0;
     } else {
       *output = data;
     }
diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
index de1e0802343..a0b6fdc63a9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
@@ -33,12 +33,17 @@
 namespace arrow {
 namespace compute {
 
-void CheckFillNull(const Array& input, const Datum& fill_value, const Array& expected) {
+void CheckFillNull(const Array& input, const Datum& fill_value, const Array& expected,
+                   bool all_valid = true) {
   auto Check = [&](const Array& input, const Array& expected) {
     ASSERT_OK_AND_ASSIGN(Datum datum_out, FillNull(input, fill_value));
     std::shared_ptr<Array> result = datum_out.make_array();
     ASSERT_OK(result->ValidateFull());
     AssertArraysEqual(expected, *result, /*verbose=*/true);
+    if (all_valid) {
+      // Check null count of ArrayData is set, not the computed Array.null_count
+      ASSERT_EQ(result->data()->null_count, 0);
+    }
   };
 
   Check(input, expected);
@@ -48,10 +53,11 @@ void CheckFillNull(const Array& input, const Datum& fill_value, const Array& exp
 }
 
 void CheckFillNull(const std::shared_ptr<DataType>& type, const std::string& in_values,
-                   const Datum& fill_value, const std::string& out_values) {
+                   const Datum& fill_value, const std::string& out_values,
+                   bool all_valid = true) {
   std::shared_ptr<Array> input = ArrayFromJSON(type, in_values);
   std::shared_ptr<Array> expected = ArrayFromJSON(type, out_values);
-  CheckFillNull(*input, fill_value, *expected);
+  CheckFillNull(*input, fill_value, *expected, all_valid);
 }
 
 class TestFillNullKernel : public ::testing::Test {};
@@ -67,7 +73,8 @@ typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
 TEST_F(TestFillNullKernel, FillNullInvalidScalar) {
   auto scalar = std::make_shared<Int8Scalar>(3);
   scalar->is_valid = false;
-  CheckFillNull(int8(), "[1, null, 3, 2]", Datum(scalar), "[1, null, 3, 2]");
+  CheckFillNull(int8(), "[1, null, 3, 2]", Datum(scalar), "[1, null, 3, 2]",
+                /*all_valid=*/false);
 }
 
 TYPED_TEST_SUITE(TestFillNullPrimitive, PrimitiveTypes);
@@ -106,7 +113,8 @@ TYPED_TEST(TestFillNullPrimitive, FillNull) {
 
 TEST_F(TestFillNullKernel, FillNullNull) {
   auto datum = Datum(std::make_shared<NullScalar>());
-  CheckFillNull(null(), "[null, null, null, null]", datum, "[null, null, null, null]");
+  CheckFillNull(null(), "[null, null, null, null]", datum, "[null, null, null, null]",
+                /*all_valid=*/false);
 }
 
 TEST_F(TestFillNullKernel, FillNullBoolean) {
@@ -164,5 +172,12 @@ TEST_F(TestFillNullKernel, FillNullString) {
                 R"(["foo", "bar", "arrow"])");
 }
 
+TEST_F(TestFillNullKernel, FillNullSetsZeroNullCount) {
+  auto arr = ArrayFromJSON(int32(), "[1, null, 3, 4]");
+  auto fill_value = Datum(std::make_shared<Int32Scalar>(2, int32()));
+  std::shared_ptr<ArrayData> result = (*FillNull(arr, fill_value)).array();
+  ASSERT_EQ(result->null_count, 0);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 5ad0d2db91b..8e045fb4f2d 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1248,3 +1248,10 @@ def test_tdigest():
     arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
     result = pc.tdigest(arr, q=[0, 0.5, 1])
     assert result.to_pylist() == [1, 2.5, 4]
+
+
+def test_fill_null_segfault():
+    # ARROW-12672
+    arr = pa.array([None], pa.bool_()).fill_null(False)
+    result = arr.cast(pa.int8())
+    assert result == pa.array([0], pa.int8())

From dae3fcccca9bba114913b2b09564127bc0ee779e Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Sun, 9 May 2021 07:02:38 +0900
Subject: [PATCH 206/719] ARROW-12484: [CI] Change jinja macros to not require
 CROSSBOW_TOKEN to upload artifacts in Github Actions

The Pull Requests change the script to upload the artifacts and generate a new release to use the token provide by the Github Actions environment.

Closes #10273 from anthonylouisbsb/feature/remove-crossbow-token-dependency

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/archery/archery/cli.py | 4 +---
 dev/tasks/macros.jinja     | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 32ec5ac845b..e3ad0f299ef 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -789,9 +789,7 @@ def integration(with_all=False, random_seed=12345, **args):
               default='-', required=True)
 @click.option('--arrow-token', envvar='ARROW_GITHUB_TOKEN',
               help='OAuth token for responding comment in the arrow repo')
-@click.option('--crossbow-token', '-ct', envvar='CROSSBOW_GITHUB_TOKEN',
-              help='OAuth token for pushing to the crossow repository')
-def trigger_bot(event_name, event_payload, arrow_token, crossbow_token):
+def trigger_bot(event_name, event_payload, arrow_token):
     from .bot import CommentBot, actions
 
     event_payload = json.loads(event_payload.read())
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index bfbd6ec2588..59a7d8588a5 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -87,7 +87,7 @@ on:
       {% endfor %}
     {% endif %}
     env:
-      CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN }}' }}
+      CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.GITHUB_TOKEN }}' }}
 {% endmacro %}
 
 {%- macro github_upload_gemfury(pattern) -%}

From ec51aec4d15035f4d9d6a1c4346d0a2b9a37fb75 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 9 May 2021 00:59:59 +0200
Subject: [PATCH 207/719] MINOR: [Docs] Remove AppVeyor badge (#10280)

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index efe63e1b269..7d10b81c6e4 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,6 @@
 
 # Apache Arrow
 
-[![Build Status](https://ci.appveyor.com/api/projects/status/github/apache/arrow/branch/master?svg=true)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/arrow/branch/master)
 [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/arrow.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:arrow)
 [![License](http://img.shields.io/:license-Apache%202-blue.svg)](https://github.com/apache/arrow/blob/master/LICENSE.txt)
 [![Twitter Follow](https://img.shields.io/twitter/follow/apachearrow.svg?style=social&label=Follow)](https://twitter.com/apachearrow)

From 8648080a95847d5fdf3f619ad41f6e4b627077a3 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sun, 9 May 2021 08:19:46 -0700
Subject: [PATCH 208/719] ARROW-12694: [C++] Fix segfault under RTools35
 toolchain

I'm still not sure why exactly this happens, or why it only fails (consistently) in some tests.

Closes #10282 from lidavidm/arrow-12694

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/dataset/file_parquet.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 94bf5355dc4..356492cd164 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -642,7 +642,16 @@ Result<util::optional<int64_t>> ParquetFileFragment::TryCountRows(
     compute::Expression predicate) {
   DCHECK_NE(metadata_, nullptr);
   if (ExpressionHasFieldRefs(predicate)) {
+#if defined(__GNUC__) && (__GNUC__ < 5)
+    // ARROW-12694: with GCC 4.9 (RTools 35) we sometimes segfault here if we move(result)
+    auto result = TestRowGroups(std::move(predicate));
+    if (!result.ok()) {
+      return result.status();
+    }
+    auto expressions = result.ValueUnsafe();
+#else
     ARROW_ASSIGN_OR_RAISE(auto expressions, TestRowGroups(std::move(predicate)));
+#endif
     int64_t rows = 0;
     for (size_t i = 0; i < row_groups_->size(); i++) {
       // If the row group is entirely excluded, exclude it from the row count

From 12dc1b56598087f5770af6823cc6a0a47dd9db06 Mon Sep 17 00:00:00 2001
From: Zachary Blackwood <zachary.blackwood@dtn.com>
Date: Mon, 10 May 2021 13:56:42 +0200
Subject: [PATCH 209/719] ARROW-12472: [Python] Properly convert paths to
 strings (using __fspath__)

When passing objects to `read_table` which implement `__fspath__`, make sure to use this method when converting the paths to the a string (by using the `_stringify_path` method, rather than simply using `str`). This should allow passing of OpenFile objects from fsspec to `read_table`

Closes #10104 from blackary/properly-stringify-parquet-paths

Lead-authored-by: Zachary Blackwood <zachary.blackwood@dtn.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Zachary Blackwood <zblackwo@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/parquet.py                  | 20 +++++++++++++++++++-
 python/pyarrow/tests/parquet/test_basic.py | 21 ++++++++++++++++++++-
 python/pyarrow/tests/test_dataset.py       | 22 +++++++++++++++++++++-
 python/pyarrow/tests/util.py               |  8 ++++++++
 4 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 97e431c4179..fd07aeddb9e 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -1486,6 +1486,12 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
     return pieces, partitions, common_metadata_path, metadata_path
 
 
+def _is_local_file_system(fs):
+    return isinstance(fs, LocalFileSystem) or isinstance(
+        fs, legacyfs.LocalFileSystem
+    )
+
+
 class _ParquetDatasetV2:
     """
     ParquetDataset shim using the Dataset API under the hood.
@@ -1528,6 +1534,18 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
             # path can in principle be URI for any filesystem)
             filesystem = LocalFileSystem(use_mmap=memory_map)
 
+        # This needs to be checked after _ensure_filesystem, because that
+        # handles the case of an fsspec LocalFileSystem
+        if (
+            hasattr(path_or_paths, "__fspath__") and
+            filesystem is not None and
+            not _is_local_file_system(filesystem)
+        ):
+            raise TypeError(
+                "Path-like objects with __fspath__ must only be used with "
+                f"local file systems, not {type(filesystem)}"
+            )
+
         # check for single fragment dataset
         single_file = None
         if isinstance(path_or_paths, list):
@@ -1535,7 +1553,7 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
                 single_file = path_or_paths[0]
         else:
             if _is_path_like(path_or_paths):
-                path_or_paths = str(path_or_paths)
+                path_or_paths = _stringify_path(path_or_paths)
                 if filesystem is None:
                     # path might be a URI describing the FileSystem as well
                     try:
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 670c7c6ed1f..ebf9d44b3bb 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -23,7 +23,7 @@
 
 import pyarrow as pa
 from pyarrow import fs
-from pyarrow.filesystem import LocalFileSystem
+from pyarrow.filesystem import LocalFileSystem, FileSystem
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import (_check_roundtrip, _roundtrip_table,
                                           parametrize_legacy_dataset)
@@ -236,6 +236,25 @@ def test_multiple_path_types(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(df, df_read)
 
 
+@parametrize_legacy_dataset
+def test_fspath(tempdir, use_legacy_dataset):
+    # ARROW-12472 support __fspath__ objects without using str()
+    path = tempdir / "test.parquet"
+    table = pa.table({"a": [1, 2, 3]})
+    _write_table(table, path)
+
+    fs_protocol_obj = util.FSProtocolClass(path)
+
+    result = _read_table(
+        fs_protocol_obj, use_legacy_dataset=use_legacy_dataset
+    )
+    assert result.equals(table)
+
+    # combined with non-local filesystem raises
+    with pytest.raises(TypeError):
+        _read_table(fs_protocol_obj, filesystem=FileSystem())
+
+
 @pytest.mark.dataset
 @parametrize_legacy_dataset
 @pytest.mark.parametrize("filesystem", [
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index e489bcb3a73..b6409d8df6c 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -28,7 +28,7 @@
 import pyarrow as pa
 import pyarrow.csv
 import pyarrow.fs as fs
-from pyarrow.tests.util import change_cwd, _filesystem_uri
+from pyarrow.tests.util import change_cwd, _filesystem_uri, FSProtocolClass
 
 try:
     import pandas as pd
@@ -1571,6 +1571,26 @@ def test_open_dataset_list_of_files(tempdir):
         assert result.equals(table)
 
 
+@pytest.mark.parquet
+def test_open_dataset_filesystem_fspath(tempdir):
+    # single file
+    table, path = _create_single_file(tempdir)
+
+    fspath = FSProtocolClass(path)
+
+    # filesystem inferred from path
+    dataset1 = ds.dataset(fspath)
+    assert dataset1.schema.equals(table.schema)
+
+    # filesystem specified
+    dataset2 = ds.dataset(fspath, filesystem=fs.LocalFileSystem())
+    assert dataset2.schema.equals(table.schema)
+
+    # passing different filesystem
+    with pytest.raises(TypeError):
+        ds.dataset(fspath, filesystem=fs._MockFileSystem())
+
+
 def test_construct_from_single_file(tempdir):
     directory = tempdir / 'single-file'
     directory.mkdir()
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 8f29e5853f8..ea43b7c4e64 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -229,3 +229,11 @@ def _filesystem_uri(path):
     else:
         uri = 'file://{}'.format(path)
     return uri
+
+
+class FSProtocolClass:
+    def __init__(self, path):
+        self._path = path
+
+    def __fspath__(self):
+        return str(self._path)

From bce0872f52b481d1c92d62c86b2cb94cb5f0501c Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 10 May 2021 17:17:32 +0200
Subject: [PATCH 210/719] ARROW-12663: [C++] Fix a cuda 11.2 compiler segfault

With the nvcc 11.2 compiler we have a segfault when we have a copy and move assignment operator :
```
using Impl::operator=;
```
before a move-assignment operator:

 ```
Variant& operator=(Variant&& other) noexcept {
  this->destroy();
  other.move_to(this);
  return *this;
}
```

A minimal repro :

With a segfault : https://godbolt.org/z/h9eYv6zas

Without a segfault : https://godbolt.org/z/oWhK5qPd8

In this PR, as a workaround, we have essentially re-ordered the move-assignment of `Variant` before `using Impl::operator=;`.

Closes #10257 from galipremsagar/patch-2

Authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/util/variant.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/util/variant.h b/cpp/src/arrow/util/variant.h
index 89f39ab8917..962254a9b0f 100644
--- a/cpp/src/arrow/util/variant.h
+++ b/cpp/src/arrow/util/variant.h
@@ -262,18 +262,17 @@ class Variant : detail::VariantImpl<Variant<T...>, T...>,
 
   Variant(const Variant& other) = default;
   Variant& operator=(const Variant& other) = default;
-
-  using Impl::Impl;
-  using Impl::operator=;
-
-  Variant(Variant&& other) noexcept { other.move_to(this); }
-
   Variant& operator=(Variant&& other) noexcept {
     this->destroy();
     other.move_to(this);
     return *this;
   }
 
+  using Impl::Impl;
+  using Impl::operator=;
+
+  Variant(Variant&& other) noexcept { other.move_to(this); }
+
   ~Variant() {
     static_assert(offsetof(Variant, data_) == 0, "(void*)&Variant::data_ == (void*)this");
     this->destroy();

From da6cf6d8b8c975c406dfe84038ff8537a1517995 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 10 May 2021 18:05:25 +0200
Subject: [PATCH 211/719] ARROW-12645: [Python] Fix numpydoc validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Numpydoc helps identifying issues with our docstrings, for example we can list the undocumented parameters using the following command:

```bash
archery numpydoc -a PR01
```

parts of the output:

```console
pyarrow.parquet.write_table
PR01: Parameters {'use_compliant_nested_type', 'where', 'row_group_size', 'use_byte_stream_split', 'compression_level', '**kwargs'} not documented

pyarrow.parquet.write_metadata
PR01: Parameters {'where', 'metadata_collector'} not documented

pyarrow.parquet.read_table
PR01: Parameters {'source', 'columns'} not documented

pyarrow.parquet.read_pandas
PR01: Parameters {'source', '**kwargs', 'columns'} not documented

pyarrow.parquet.PartitionSet
PR01: Parameters {'keys', 'name'} not documented

pyarrow.parquet.PartitionSet.get_index
PR01: Parameters {'key'} not documented

pyarrow.parquet.ParquetWriter
PR01: Parameters {'writer_engine_version', 'use_compliant_nested_type', 'use_byte_stream_split', 'compression_level'} not documented
```

```console
pyarrow._flight.RecordBatchStream
-> pyarrow._flight.RecordBatchStream(data_source, options=None)
PR01: Parameters {'options', 'data_source'} not documented

pyarrow._flight.Location
-> pyarrow._flight.Location(uri)
PR01: Parameters {'uri'} not documented

pyarrow._flight.for_grpc_unix
-> pyarrow._flight.Location.for_grpc_unix(path)
PR01: Parameters {'path'} not documented

pyarrow._flight.for_grpc_tls
-> pyarrow._flight.Location.for_grpc_tls(host, port)
PR01: Parameters {'host', 'port'} not documented

pyarrow._flight.for_grpc_tcp
-> pyarrow._flight.Location.for_grpc_tcp(host, port)
PR01: Parameters {'host', 'port'} not documented

pyarrow._flight.GeneratorStream
-> pyarrow._flight.GeneratorStream(schema, generator, options=None)
PR01: Parameters {'options', 'schema', 'generator'} not documented

pyarrow._flight.FlightWriteSizeExceededError
-> pyarrow._flight.A write operation exceeded the client-configured limit.
PR01: Parameters {'actual', 'limit', 'message'} not documented
```

Closes #10241 from kszucs/numpydoc-validation

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/cli.py         |  2 +-
 dev/archery/archery/lang/python.py | 11 ++++++++---
 dev/archery/setup.py               |  1 +
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index e3ad0f299ef..1e70ee29128 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -348,7 +348,7 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
     disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
     try:
         results = python_numpydoc(symbols, allow_rules=allow_rule,
-                                  disallow_rule=disallow_rule)
+                                  disallow_rules=disallow_rule)
         for result in results:
             result.ok()
     except LintValidationException:
diff --git a/dev/archery/archery/lang/python.py b/dev/archery/archery/lang/python.py
index 4952d5f2305..c6ebbe65004 100644
--- a/dev/archery/archery/lang/python.py
+++ b/dev/archery/archery/lang/python.py
@@ -26,6 +26,7 @@
 else:
     have_numpydoc = True
 
+from ..utils.logger import logger
 from ..utils.command import Command, capture_stdout, default_bin
 
 
@@ -105,8 +106,7 @@ def __init__(self, symbols=None):
         if not have_numpydoc:
             raise RuntimeError(
                 'Numpydoc is not available, install the development version '
-                'with command: pip install '
-                'git+https://github.com/numpy/numpydoc'
+                'with command: pip install numpydoc==1.1.0'
             )
         self.symbols = set(symbols or {'pyarrow'})
 
@@ -192,7 +192,12 @@ def validate(self, from_package='', allow_rules=None,
         results = []
 
         def callback(obj):
-            result = validate(obj)
+            try:
+                result = validate(obj)
+            except OSError as e:
+                symbol = f"{obj.__module__}.{obj.__name__}"
+                logger.warning(f"Unable to validate `{symbol}` due to `{e}`")
+                return
 
             errors = []
             for errcode, errmsg in result.get('errors', []):
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index e3fa47ad14f..5ff9e214791 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -28,6 +28,7 @@
 jinja_req = 'jinja2>=2.11'
 
 extras = {
+    'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.5.2'],
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
     'release': [jinja_req, 'jira', 'semver', 'gitpython'],

From bf16c36952e45da4b237b2127c9e5346e5d271fd Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 10 May 2021 18:48:01 +0200
Subject: [PATCH 212/719] ARROW-12083: [C++][Dataset] Use given column types
 when determining CSV fragment schema

This lets you specify the types of some columns without having to specify the full schema.

Closes #10284 from lidavidm/arrow-12083

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/file_csv.cc      | 47 +++++++++++++-------------
 cpp/src/arrow/dataset/file_csv_test.cc | 20 +++++++++++
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index eb6d623818e..9c11afec264 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -81,17 +81,21 @@ Result<std::unordered_set<std::string>> GetColumnNames(
 }
 
 static inline Result<csv::ConvertOptions> GetConvertOptions(
-    const CsvFileFormat& format, const std::shared_ptr<ScanOptions>& scan_options,
+    const CsvFileFormat& format, const ScanOptions* scan_options,
     const util::string_view first_block) {
   ARROW_ASSIGN_OR_RAISE(
       auto column_names,
-      GetColumnNames(format.parse_options, first_block, scan_options->pool));
+      GetColumnNames(format.parse_options, first_block,
+                     scan_options ? scan_options->pool : default_memory_pool()));
 
   ARROW_ASSIGN_OR_RAISE(
       auto csv_scan_options,
       GetFragmentScanOptions<CsvFragmentScanOptions>(
-          kCsvTypeName, scan_options.get(), format.default_fragment_scan_options));
+          kCsvTypeName, scan_options, format.default_fragment_scan_options));
   auto convert_options = csv_scan_options->convert_options;
+
+  if (!scan_options) return convert_options;
+
   auto materialized = scan_options->MaterializedFields();
   std::unordered_set<std::string> materialized_fields(materialized.begin(),
                                                       materialized.end());
@@ -133,34 +137,31 @@ static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
 
   // Grab the first block and use it to determine the schema and create a reader.  The
   // input->Peek call blocks so we run the whole thing on the I/O thread pool.
-  return DeferNotOk(input->io_context().executor()->Submit(
+  auto reader_fut = DeferNotOk(input->io_context().executor()->Submit(
       [=]() -> Future<std::shared_ptr<csv::StreamingReader>> {
         ARROW_ASSIGN_OR_RAISE(auto first_block, input->Peek(reader_options.block_size));
         const auto& parse_options = format.parse_options;
-        auto convert_options = csv::ConvertOptions::Defaults();
-        if (scan_options != nullptr) {
-          ARROW_ASSIGN_OR_RAISE(convert_options,
-                                GetConvertOptions(format, scan_options, first_block));
-        }
-
-        auto reader_fut = csv::StreamingReader::MakeAsync(
-            io::default_io_context(), std::move(input), cpu_executor, reader_options,
-            parse_options, convert_options);
-        // Adds the filename to the error
-        return reader_fut.Then(
-            [](const std::shared_ptr<csv::StreamingReader>& maybe_reader)
-                -> Result<std::shared_ptr<csv::StreamingReader>> { return maybe_reader; },
-            [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
-              return err.WithMessage("Could not open CSV input source '", source.path(),
-                                     "': ", err);
-            });
+        ARROW_ASSIGN_OR_RAISE(
+            auto convert_options,
+            GetConvertOptions(format, scan_options ? scan_options.get() : nullptr,
+                              first_block));
+        return csv::StreamingReader::MakeAsync(io::default_io_context(), std::move(input),
+                                               cpu_executor, reader_options,
+                                               parse_options, convert_options);
       }));
+  return reader_fut.Then(
+      // Adds the filename to the error
+      [](const std::shared_ptr<csv::StreamingReader>& maybe_reader)
+          -> Result<std::shared_ptr<csv::StreamingReader>> { return maybe_reader; },
+      [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
+        return err.WithMessage("Could not open CSV input source '", source.path(),
+                               "': ", err);
+      });
 }
 
 static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
     const FileSource& source, const CsvFileFormat& format,
-    const std::shared_ptr<ScanOptions>& scan_options = nullptr,
-    MemoryPool* pool = default_memory_pool()) {
+    const std::shared_ptr<ScanOptions>& scan_options = nullptr) {
   auto open_reader_fut =
       OpenReaderAsync(source, format, scan_options, internal::GetCpuThreadPool());
   return open_reader_fut.result();
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index eff7f28fbed..489fea4ca56 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -223,6 +223,26 @@ N/A
   EXPECT_EQ(*actual, Schema({field("f64", float64())}));
 }
 
+TEST_P(TestCsvFileFormat, InspectWithCustomConvertOptions) {
+  // Regression test for ARROW-12083
+  auto source = GetFileSource(R"(actually_string
+1.0
+
+N/A
+2)");
+  auto defaults = std::make_shared<CsvFragmentScanOptions>();
+  format_->default_fragment_scan_options = defaults;
+
+  ASSERT_OK_AND_ASSIGN(auto actual, format_->Inspect(*source.get()));
+  // Default type inferred
+  EXPECT_EQ(*actual, Schema({field("actually_string", float64())}));
+
+  // Override the inferred type
+  defaults->convert_options.column_types["actually_string"] = utf8();
+  ASSERT_OK_AND_ASSIGN(actual, format_->Inspect(*source.get()));
+  EXPECT_EQ(*actual, Schema({field("actually_string", utf8())}));
+}
+
 TEST_P(TestCsvFileFormat, IsSupported) {
   TestIsSupported();
   bool supported;

From 1948a617622edd650e7ed84e3938f75bd0168f89 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 10 May 2021 19:05:02 +0200
Subject: [PATCH 213/719] ARROW-12606: [C++][Compute] Fix Quantile and Mode on
 arrays with offset

Fixed CopyNonNullValues utility to handle arrays with offset correctly.
This patch also fixed quantile kernel random tests for integer types.

Closes #10262 from cyb70289/12606-array-offset

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/aggregate_mode.cc   |   2 +-
 .../compute/kernels/aggregate_quantile.cc     |   2 +-
 .../arrow/compute/kernels/aggregate_test.cc   | 179 ++++++++++++++----
 cpp/src/arrow/compute/kernels/util_internal.h |  17 +-
 4 files changed, 152 insertions(+), 48 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index f7538ac5249..95362335261 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -206,7 +206,7 @@ struct SortModer {
     const int64_t in_length = datum.length() - datum.null_count();
     if (in_length > 0) {
       in_buffer.resize(in_length);
-      CopyNonNullValues<sizeof(CType)>(datum, in_buffer.data());
+      CopyNonNullValues(datum, in_buffer.data());
 
       // drop nan
       if (is_floating_type<T>::value) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 2bb026dbdbd..0b7821273cc 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -87,7 +87,7 @@ struct SortQuantiler {
     const int64_t in_length = datum.length() - datum.null_count();
     if (in_length > 0) {
       in_buffer.resize(in_length);
-      CopyNonNullValues<sizeof(CType)>(datum, in_buffer.data());
+      CopyNonNullValues(datum, in_buffer.data());
 
       // drop nan
       if (is_floating_type<InType>::value) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index da945252de9..455a3edaaf8 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -1054,11 +1054,7 @@ ModeResult<ArrowType> NaiveMode(const Array& array) {
 }
 
 template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
-void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
-  auto rand = random::RandomArrayGenerator(0x5487655);
-  // 32K items (>= counting mode cutoff) within range, 10% null
-  auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);
-
+void VerifyMode(const std::shared_ptr<Array>& array) {
   auto expected = NaiveMode<ArrowType>(*array);
   ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
   ASSERT_OK(out.make_array()->ValidateFull());
@@ -1072,6 +1068,31 @@ void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
   ASSERT_EQ(out_counts[0], expected.count);
 }
 
+template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
+void CheckModeWithRange(CTYPE range_min, CTYPE range_max) {
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  // 32K items (>= counting mode cutoff) within range, 10% null
+  auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);
+  VerifyMode<ArrowType>(array);
+}
+
+template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
+void CheckModeWithRangeSliced(CTYPE range_min, CTYPE range_max) {
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  auto array = rand.Numeric<ArrowType>(32 * 1024, range_min, range_max, 0.1);
+
+  const int64_t array_size = array->length();
+  const std::vector<std::array<int64_t, 2>> offset_size{
+      {0, 40},
+      {array_size - 40, 40},
+      {array_size / 3, array_size / 6},
+      {array_size * 9 / 10, array_size / 10},
+  };
+  for (const auto& os : offset_size) {
+    VerifyMode<ArrowType>(array->Slice(os[0], os[1]));
+  }
+}
+
 TEST_F(TestInt32ModeKernel, SmallValueRange) {
   // Small value range => should exercise counter-based Mode implementation
   CheckModeWithRange<ArrowType>(-100, 100);
@@ -1082,6 +1103,11 @@ TEST_F(TestInt32ModeKernel, LargeValueRange) {
   CheckModeWithRange<ArrowType>(-10000000, 10000000);
 }
 
+TEST_F(TestInt32ModeKernel, Sliced) {
+  CheckModeWithRangeSliced<ArrowType>(-100, 100);
+  CheckModeWithRangeSliced<ArrowType>(-10000000, 10000000);
+}
+
 //
 // Variance/Stddev
 //
@@ -1580,7 +1606,10 @@ TEST_F(TestInt64QuantileKernel, Int64) {
 #undef O
 
 #ifndef __MINGW32__
-class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType> {
+template <typename ArrowType>
+class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<ArrowType> {
+  using CType = typename ArrowType::c_type;
+
  public:
   void CheckQuantiles(int64_t array_size, int64_t num_quantiles) {
     std::shared_ptr<Array> array;
@@ -1589,17 +1618,77 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
     GenerateTestData(array_size, num_quantiles, -100, 200, &array, &quantiles);
 
     this->AssertQuantilesAre(array, QuantileOptions{quantiles},
-                             NaiveQuantile(*array, quantiles, interpolations_));
+                             NaiveQuantile(array, quantiles, this->interpolations_));
+  }
+
+  void CheckQuantilesSliced(int64_t array_size, int64_t num_quantiles) {
+    std::shared_ptr<Array> array;
+    std::vector<double> quantiles;
+    GenerateTestData(array_size, num_quantiles, -100, 200, &array, &quantiles);
+
+    const std::vector<std::array<int64_t, 2>> offset_size{
+        {0, array_size - 1},
+        {1, array_size - 1},
+        {array_size / 3, array_size / 2},
+        {array_size * 9 / 10, array_size / 10},
+    };
+    for (const auto& os : offset_size) {
+      auto sliced = array->Slice(os[0], os[1]);
+      this->AssertQuantilesAre(sliced, QuantileOptions{quantiles},
+                               NaiveQuantile(sliced, quantiles, this->interpolations_));
+    }
   }
 
   void CheckTDigests(const std::vector<int>& chunk_sizes, int64_t num_quantiles) {
+    std::shared_ptr<ChunkedArray> chunked;
+    std::vector<double> quantiles;
+    GenerateChunked(chunk_sizes, num_quantiles, &chunked, &quantiles);
+
+    VerifyTDigest(chunked, quantiles);
+  }
+
+  void CheckTDigestsSliced(const std::vector<int>& chunk_sizes, int64_t num_quantiles) {
+    std::shared_ptr<ChunkedArray> chunked;
+    std::vector<double> quantiles;
+    GenerateChunked(chunk_sizes, num_quantiles, &chunked, &quantiles);
+
+    const int64_t size = chunked->length();
+    const std::vector<std::array<int64_t, 2>> offset_size{
+        {0, size - 1},
+        {1, size - 1},
+        {size / 3, size / 2},
+        {size * 9 / 10, size / 10},
+    };
+    for (const auto& os : offset_size) {
+      VerifyTDigest(chunked->Slice(os[0], os[1]), quantiles);
+    }
+  }
+
+ private:
+  void GenerateTestData(int64_t array_size, int64_t num_quantiles, int min, int max,
+                        std::shared_ptr<Array>* array, std::vector<double>* quantiles) {
+    auto rand = random::RandomArrayGenerator(0x5487658);
+    if (is_floating_type<ArrowType>::value) {
+      *array = rand.Float64(array_size, min, max, /*null_prob=*/0.1, /*nan_prob=*/0.2);
+    } else {
+      *array = rand.Int64(array_size, min, max, /*null_prob=*/0.1);
+    }
+
+    random_real(num_quantiles, 0x5487658, 0.0, 1.0, quantiles);
+    // make sure to exercise 0 and 1 quantiles
+    *std::min_element(quantiles->begin(), quantiles->end()) = 0;
+    *std::max_element(quantiles->begin(), quantiles->end()) = 1;
+  }
+
+  void GenerateChunked(const std::vector<int>& chunk_sizes, int64_t num_quantiles,
+                       std::shared_ptr<ChunkedArray>* chunked,
+                       std::vector<double>* quantiles) {
     int total_size = 0;
     for (int size : chunk_sizes) {
       total_size += size;
     }
     std::shared_ptr<Array> array;
-    std::vector<double> quantiles;
-    GenerateTestData(total_size, num_quantiles, 100, 123456789, &array, &quantiles);
+    GenerateTestData(total_size, num_quantiles, 100, 123456789, &array, quantiles);
 
     total_size = 0;
     ArrayVector array_vector;
@@ -1607,8 +1696,11 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
       array_vector.emplace_back(array->Slice(total_size, size));
       total_size += size;
     }
-    auto chunked = *ChunkedArray::Make(array_vector);
+    *chunked = ChunkedArray::Make(array_vector).ValueOrDie();
+  }
 
+  void VerifyTDigest(const std::shared_ptr<ChunkedArray>& chunked,
+                     std::vector<double>& quantiles) {
     TDigestOptions options(quantiles);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, options));
     const auto& out_array = out.make_array();
@@ -1619,7 +1711,7 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
 
     // linear interpolated exact quantile as reference
     std::vector<std::vector<Datum>> exact =
-        NaiveQuantile(*array, quantiles, {QuantileOptions::LINEAR});
+        NaiveQuantile(*chunked, quantiles, {QuantileOptions::LINEAR});
     const double* approx = out_array->data()->GetValues<double>(1);
     for (size_t i = 0; i < quantiles.size(); ++i) {
       const auto& exact_scalar = checked_pointer_cast<DoubleScalar>(exact[i][0].scalar());
@@ -1628,29 +1720,26 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
     }
   }
 
- private:
-  void GenerateTestData(int64_t array_size, int64_t num_quantiles, int min, int max,
-                        std::shared_ptr<Array>* array, std::vector<double>* quantiles) {
-    auto rand = random::RandomArrayGenerator(0x5487658);
-    *array = rand.Float64(array_size, min, max, /*null_prob=*/0.1, /*nan_prob=*/0.2);
-
-    random_real(num_quantiles, 0x5487658, 0.0, 1.0, quantiles);
-    // make sure to exercise 0 and 1 quantiles
-    *std::min_element(quantiles->begin(), quantiles->end()) = 0;
-    *std::max_element(quantiles->begin(), quantiles->end()) = 1;
+  std::vector<std::vector<Datum>> NaiveQuantile(
+      const std::shared_ptr<Array>& array, const std::vector<double>& quantiles,
+      const std::vector<enum QuantileOptions::Interpolation>& interpolations) {
+    return NaiveQuantile(ChunkedArray(array), quantiles, interpolations);
   }
 
   std::vector<std::vector<Datum>> NaiveQuantile(
-      const Array& array, const std::vector<double>& quantiles,
+      const ChunkedArray& chunked, const std::vector<double>& quantiles,
       const std::vector<enum QuantileOptions::Interpolation>& interpolations) {
-    // copy and sort input array
-    std::vector<double> input(array.length() - array.null_count());
-    const double* values = array.data()->GetValues<double>(1);
-    const auto bitmap = array.null_bitmap_data();
+    // copy and sort input chunked array
     int64_t index = 0;
-    for (int64_t i = 0; i < array.length(); ++i) {
-      if (BitUtil::GetBit(bitmap, i) && !std::isnan(values[i])) {
-        input[index++] = values[i];
+    std::vector<CType> input(chunked.length() - chunked.null_count());
+    for (const auto& array : chunked.chunks()) {
+      const CType* values = array->data()->GetValues<CType>(1);
+      const auto bitmap = array->null_bitmap_data();
+      for (int64_t i = 0; i < array->length(); ++i) {
+        if ((!bitmap || BitUtil::GetBit(bitmap, array->data()->offset + i)) &&
+            !std::isnan(static_cast<double>(values[i]))) {
+          input[index++] = values[i];
+        }
       }
     }
     input.resize(index);
@@ -1667,7 +1756,7 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
     return output;
   }
 
-  Datum GetQuantile(const std::vector<double>& input, double q,
+  Datum GetQuantile(const std::vector<CType>& input, double q,
                     enum QuantileOptions::Interpolation interp) {
     const double index = (input.size() - 1) * q;
     const uint64_t lower_index = static_cast<uint64_t>(index);
@@ -1688,14 +1777,14 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
         }
       case QuantileOptions::LINEAR:
         if (fraction == 0) {
-          return Datum(input[lower_index]);
+          return Datum(input[lower_index] * 1.0);
         } else {
           return Datum(fraction * input[lower_index + 1] +
                        (1 - fraction) * input[lower_index]);
         }
       case QuantileOptions::MIDPOINT:
         if (fraction == 0) {
-          return Datum(input[lower_index]);
+          return Datum(input[lower_index] * 1.0);
         } else {
           return Datum(input[lower_index] / 2.0 + input[lower_index + 1] / 2.0);
         }
@@ -1705,24 +1794,42 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<DoubleType>
   }
 };
 
-TEST_F(TestRandomQuantileKernel, Normal) {
+class TestRandomInt64QuantileKernel : public TestRandomQuantileKernel<Int64Type> {};
+
+TEST_F(TestRandomInt64QuantileKernel, Normal) {
   // exercise copy and sort approach: size < 65536
   this->CheckQuantiles(/*array_size=*/10000, /*num_quantiles=*/100);
 }
 
-TEST_F(TestRandomQuantileKernel, Overlapped) {
+TEST_F(TestRandomInt64QuantileKernel, Overlapped) {
   // much more quantiles than array size => many overlaps
   this->CheckQuantiles(/*array_size=*/999, /*num_quantiles=*/9999);
 }
 
-TEST_F(TestRandomQuantileKernel, Histogram) {
+TEST_F(TestRandomInt64QuantileKernel, Histogram) {
   // exercise histogram approach: size >= 65536, range <= 65536
   this->CheckQuantiles(/*array_size=*/80000, /*num_quantiles=*/100);
 }
 
-TEST_F(TestRandomQuantileKernel, TDigest) {
+TEST_F(TestRandomInt64QuantileKernel, Sliced) {
+  this->CheckQuantilesSliced(1000, 10);   // sort
+  this->CheckQuantilesSliced(66000, 10);  // count
+}
+
+class TestRandomFloatQuantileKernel : public TestRandomQuantileKernel<DoubleType> {};
+
+TEST_F(TestRandomFloatQuantileKernel, Exact) {
+  this->CheckQuantiles(/*array_size=*/1000, /*num_quantiles=*/100);
+}
+
+TEST_F(TestRandomFloatQuantileKernel, TDigest) {
   this->CheckTDigests(/*chunk_sizes=*/{12345, 6789, 8765, 4321}, /*num_quantiles=*/100);
 }
+
+TEST_F(TestRandomFloatQuantileKernel, Sliced) {
+  this->CheckQuantilesSliced(1000, 10);
+  this->CheckTDigestsSliced({200, 600}, 10);
+}
 #endif
 
 class TestTDigestKernel : public ::testing::Test {};
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h b/cpp/src/arrow/compute/kernels/util_internal.h
index 326de2f56f5..f230bfbbd6d 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -125,29 +125,26 @@ int64_t CountValues(uint64_t* counts, const Datum& datum, T min) {
 }
 
 // Copy numerical array values to a buffer, ignore nulls.
-template <size_t SizeOfCType>
-ARROW_NOINLINE int64_t CopyNonNullValues(const ArrayData& data, void* out) {
-  uint8_t* u8_buffer = reinterpret_cast<uint8_t*>(out);
+template <typename T>
+ARROW_NOINLINE int64_t CopyNonNullValues(const ArrayData& data, T* out) {
   const int64_t n = data.length - data.GetNullCount();
   if (n > 0) {
     int64_t index = 0;
-    const uint8_t* u8_values = data.GetValues<uint8_t>(1);
+    const T* values = data.GetValues<T>(1);
     arrow::internal::VisitSetBitRunsVoid(
         data.buffers[0], data.offset, data.length, [&](int64_t pos, int64_t len) {
-          memcpy(u8_buffer + index * SizeOfCType, u8_values + pos * SizeOfCType,
-                 len * SizeOfCType);
+          memcpy(out + index, values + pos, len * sizeof(T));
           index += len;
         });
   }
   return n;
 }
 
-template <size_t SizeOfCType>
-int64_t CopyNonNullValues(const Datum& datum, void* out) {
-  uint8_t* u8_buffer = reinterpret_cast<uint8_t*>(out);
+template <typename T>
+int64_t CopyNonNullValues(const Datum& datum, T* out) {
   int64_t n = 0;
   for (const auto& array : datum.chunks()) {
-    n += CopyNonNullValues<SizeOfCType>(*array->data(), u8_buffer + n * SizeOfCType);
+    n += CopyNonNullValues(*array->data(), out + n);
   }
   return n;
 }

From ebf019159f0d455bd70e01af8c8a144800fb0bef Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 10 May 2021 12:47:31 -0500
Subject: [PATCH 214/719] ARROW-12653: [Archery] allow me to add a comment to
 crossbow requests

Closes #10247 from jonkeane/ARROW-12653

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 dev/archery/archery/bot.py            |  8 +++++++-
 dev/archery/archery/tests/test_bot.py | 14 ++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index c69cf9112da..cb3d1da14fd 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -110,7 +110,13 @@ def parse_command(self, payload):
         elif not comment['body'].lstrip().startswith(mention):
             raise EventError("The bot is not mentioned")
 
-        return payload['comment']['body'].split(mention)[-1].strip()
+        # Parse the comment, removing the bot mentioned (and everything
+        # before it)
+        command = payload['comment']['body'].split(mention)[-1]
+
+        # then split on newlines and keep only the first line
+        # (ignoring all other lines)
+        return command.split("\n")[0].strip()
 
     def handle(self, event, payload):
         try:
diff --git a/dev/archery/archery/tests/test_bot.py b/dev/archery/archery/tests/test_bot.py
index e00853ceb2c..e84fb7e273a 100644
--- a/dev/archery/archery/tests/test_bot.py
+++ b/dev/archery/archery/tests/test_bot.py
@@ -156,6 +156,7 @@ def handler(command, **kwargs):
 
 @pytest.mark.parametrize(('command', 'reaction'), [
     ('@ursabot build', '+1'),
+    ('@ursabot build\nwith a comment', '+1'),
     ('@ursabot listen', '-1'),
 ])
 def test_issue_comment_with_commands(load_fixture, responses, command,
@@ -199,3 +200,16 @@ def handler(command, **kwargs):
 
     post = responses.calls[3]
     assert json.loads(post.request.body) == {'content': reaction}
+
+
+def test_issue_comment_with_commands_bot_not_first(load_fixture, responses):
+    # when the @-mention is not first, this is a no-op
+    handler = Mock()
+
+    payload = load_fixture('event-issue-comment-build-command.json')
+    payload["comment"]["body"] = 'with a comment\n@ursabot build'
+
+    bot = CommentBot(name='ursabot', token='', handler=handler)
+    bot.handle('issue_comment', payload)
+
+    handler.assert_not_called()

From 90fa130cfa13fb70ec9f1b7080bdb4a216192b3c Mon Sep 17 00:00:00 2001
From: Adam Johnson <me@adamj.eu>
Date: Mon, 10 May 2021 14:07:35 -0400
Subject: [PATCH 215/719] MINOR: [Python] Document Python 3.9 support

Missed in #8386.

Closes #10286 from adamchainz/patch-1

Authored-by: Adam Johnson <me@adamj.eu>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/python/install.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst
index 60e81a1ab35..3c23d8a0f5b 100644
--- a/docs/source/python/install.rst
+++ b/docs/source/python/install.rst
@@ -28,7 +28,7 @@ using a 64-bit system.
 Python Compatibility
 --------------------
 
-PyArrow is currently compatible with Python 3.6, 3.7 and 3.8.
+PyArrow is currently compatible with Python 3.6, 3.7, 3.8, and 3.9.
 
 Using Conda
 -----------

From 6c61a969349366f1c165794934ab75b8bad07d62 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Tue, 11 May 2021 09:35:02 +0200
Subject: [PATCH 216/719] MINOR: document how to run benchmarks (#10291)

---
 js/DEVELOP.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index 952a5f25155..186032c3ec8 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -70,6 +70,10 @@ To run tests directly on the sources without bundling, use the `src` target (e.g
 
 Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm with [conventional](https://conventionalcommits.org/) [changelogs](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/conventional-changelog-cli).
 
+# Running the Performance Benchmarks
+
+First, compile the bundles with `yarn build` and generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. You can change the target you want to test by changing the imports in `perf/index.js`.
+
 # Updating the Arrow format flatbuffers generated code
 
 1. Once generated, the flatbuffers format code needs to be adjusted for our build scripts (assumes `gnu-sed`):

From 303a2907448559aea8308c5f354d364d3916ed85 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Tue, 11 May 2021 11:11:15 +0200
Subject: [PATCH 217/719] ARROW-12533: [C++] Add random real distribution
 function

Clang with gnu libstdc++ produces code very slow in generating random
real numbers on Arm64.

This patch implements three random utilities based on clang libc++:
- std::generate_canonical
- std::random_real_distribution
- std::bernoulli_distribution

It brings ~100x speedup on Arm64 and ~8x on x86_64 in generating
random reals when build arrow with clang + gnu libstdc++.
No influence to gcc + libstdc++, or clang + libc++.

Closes #10283 from cyb70289/12533-random-real

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/testing/random.cc      | 10 ++--
 cpp/src/arrow/testing/random.h       |  3 +-
 cpp/src/arrow/testing/random_test.cc | 72 ++++++++++++++++++++++++
 cpp/src/arrow/testing/uniform_real.h | 84 ++++++++++++++++++++++++++++
 cpp/src/arrow/testing/util.cc        |  4 +-
 5 files changed, 166 insertions(+), 7 deletions(-)
 create mode 100644 cpp/src/arrow/testing/uniform_real.h

diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index f8f5d6f10f0..ab0d4f33245 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -82,7 +82,7 @@ struct GenerateOptions {
     }
     pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);
-    std::bernoulli_distribution nan_dist(nan_probability_);
+    ::arrow::random::bernoulli_distribution nan_dist(nan_probability_);
     const ValueType nan_value = std::numeric_limits<ValueType>::quiet_NaN();
 
     // A static cast is required due to the int16 -> int8 handling.
@@ -102,7 +102,7 @@ struct GenerateOptions {
   void GenerateBitmap(uint8_t* buffer, size_t n, int64_t* null_count) {
     int64_t count = 0;
     pcg32_fast rng(seed_++);
-    std::bernoulli_distribution dist(1.0 - probability_);
+    ::arrow::random::bernoulli_distribution dist(1.0 - probability_);
 
     for (size_t i = 0; i < n; i++) {
       if (dist(rng)) {
@@ -211,7 +211,8 @@ PRIMITIVE_RAND_INTEGER_IMPL(Float16, int16_t, HalfFloatType)
 std::shared_ptr<Array> RandomArrayGenerator::Float32(int64_t size, float min, float max,
                                                      double null_probability,
                                                      double nan_probability) {
-  using OptionType = GenerateOptions<float, std::uniform_real_distribution<float>>;
+  using OptionType =
+      GenerateOptions<float, ::arrow::random::uniform_real_distribution<float>>;
   OptionType options(seed(), min, max, null_probability, nan_probability);
   return GenerateNumericArray<FloatType, OptionType>(size, options);
 }
@@ -219,7 +220,8 @@ std::shared_ptr<Array> RandomArrayGenerator::Float32(int64_t size, float min, fl
 std::shared_ptr<Array> RandomArrayGenerator::Float64(int64_t size, double min, double max,
                                                      double null_probability,
                                                      double nan_probability) {
-  using OptionType = GenerateOptions<double, std::uniform_real_distribution<double>>;
+  using OptionType =
+      GenerateOptions<double, ::arrow::random::uniform_real_distribution<double>>;
   OptionType options(seed(), min, max, null_probability, nan_probability);
   return GenerateNumericArray<DoubleType, OptionType>(size, options);
 }
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 11bf9017c56..9d7b4854679 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -25,6 +25,7 @@
 #include <random>
 #include <vector>
 
+#include "arrow/testing/uniform_real.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/type.h"
 
@@ -455,7 +456,7 @@ template <typename T, typename U>
 void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
                  std::vector<U>* out) {
   std::default_random_engine gen(seed);
-  std::uniform_real_distribution<T> d(min_value, max_value);
+  ::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
   out->resize(n, static_cast<T>(0));
   std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
 }
diff --git a/cpp/src/arrow/testing/random_test.cc b/cpp/src/arrow/testing/random_test.cc
index c1e5a83a31a..851088a11b9 100644
--- a/cpp/src/arrow/testing/random_test.cc
+++ b/cpp/src/arrow/testing/random_test.cc
@@ -23,6 +23,7 @@
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/key_value_metadata.h"
+#include "arrow/util/pcg_random.h"
 
 namespace arrow {
 
@@ -352,5 +353,76 @@ TEST(RandomList, Basics) {
   }
 }
 
+template <typename T>
+class UniformRealTest : public ::testing::Test {
+ protected:
+  void VerifyDist(int seed, T a, T b) {
+    pcg32_fast rng(seed);
+    ::arrow::random::uniform_real_distribution<T> dist(a, b);
+
+    const int kCount = 5000;
+    T min = std::numeric_limits<T>::max();
+    T max = std::numeric_limits<T>::lowest();
+    double sum = 0;
+    double square_sum = 0;
+    for (int i = 0; i < kCount; ++i) {
+      const T v = dist(rng);
+      min = std::min(min, v);
+      max = std::max(max, v);
+      sum += v;
+      square_sum += static_cast<double>(v) * v;
+    }
+
+    ASSERT_GE(min, a);
+    ASSERT_LT(max, b);
+
+    // verify E(X), E(X^2) is near theory
+    const double E_X = (a + b) / 2.0;
+    const double E_X2 = 1.0 / 12 * (a - b) * (a - b) + E_X * E_X;
+    ASSERT_NEAR(sum / kCount, E_X, std::abs(E_X) * 0.02);
+    ASSERT_NEAR(square_sum / kCount, E_X2, E_X2 * 0.02);
+  }
+};
+
+using RealCTypes = ::testing::Types<float, double>;
+TYPED_TEST_SUITE(UniformRealTest, RealCTypes);
+
+TYPED_TEST(UniformRealTest, Basic) {
+  int seed = 42;
+  this->VerifyDist(seed++, 0, 1);
+  this->VerifyDist(seed++, -3, 1);
+  this->VerifyDist(seed++, -123456, 654321);
+}
+
+TEST(BernoulliTest, Basic) {
+  int seed = 42;
+
+  // count #trues (values less than p), p = 0 ~ 1
+  auto count = [&seed](double p, int total) {
+    pcg32_fast rng(seed++);
+    ::arrow::random::bernoulli_distribution dist(p);
+    int cnt = 0;
+    for (int i = 0; i < total; ++i) {
+      cnt += dist(rng);
+    }
+    return cnt;
+  };
+
+  ASSERT_EQ(count(0, 1000), 0);
+  ASSERT_EQ(count(1, 1000), 1000);
+
+  // verify #trues is near p*total
+  auto verify = [&count](double p, int total, double dev) {
+    const int cnt = count(p, total);
+    const int min = std::max(0, static_cast<int>(total * p * (1 - dev)));
+    const int max = std::min(total, static_cast<int>(total * p * (1 + dev)));
+    ASSERT_TRUE(cnt >= min && cnt <= max);
+  };
+
+  for (double p = 0.1; p < 0.95; p += 0.1) {
+    verify(p, 5000, 0.1);
+  }
+}
+
 }  // namespace random
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/uniform_real.h b/cpp/src/arrow/testing/uniform_real.h
new file mode 100644
index 00000000000..155cb16b641
--- /dev/null
+++ b/cpp/src/arrow/testing/uniform_real.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Random real generation is very slow on Arm if built with clang + libstdc++
+// due to software emulated long double arithmetic.
+// This file ports some random real libs from llvm libc++ library, which are
+// free from long double calculation.
+// It improves performance significantly on both Arm (~100x) and x86 (~8x) in
+// generating random reals when built with clang + gnu libstdc++.
+// Based on: https://github.com/llvm/llvm-project/tree/main/libcxx
+
+#pragma once
+
+#include <limits>
+
+#include <arrow/util/bit_util.h>
+
+namespace arrow {
+namespace random {
+
+namespace detail {
+
+// std::generate_canonical, simplified
+// https://en.cppreference.com/w/cpp/numeric/random/generate_canonical
+template <typename RealType, typename Rng>
+RealType generate_canonical(Rng& rng) {
+  const size_t b = std::numeric_limits<RealType>::digits;
+  const size_t log2R = 63 - ::arrow::BitUtil::CountLeadingZeros(
+                                static_cast<uint64_t>(Rng::max() - Rng::min()) + 1);
+  const size_t k = b / log2R + (b % log2R != 0) + (b == 0);
+  const RealType r = static_cast<RealType>(Rng::max() - Rng::min()) + 1;
+  RealType base = r;
+  RealType sp = static_cast<RealType>(rng() - Rng::min());
+  for (size_t i = 1; i < k; ++i, base *= r) {
+    sp += (rng() - Rng::min()) * base;
+  }
+  return sp / base;
+}
+
+}  // namespace detail
+
+// std::uniform_real_distribution, simplified
+// https://en.cppreference.com/w/cpp/numeric/random/uniform_real_distribution
+template <typename RealType = double>
+struct uniform_real_distribution {
+  const RealType a, b;
+
+  explicit uniform_real_distribution(RealType a = 0, RealType b = 1) : a(a), b(b) {}
+
+  template <typename Rng>
+  RealType operator()(Rng& rng) {
+    return (b - a) * detail::generate_canonical<RealType>(rng) + a;
+  }
+};
+
+// std::bernoulli_distribution, simplified
+// https://en.cppreference.com/w/cpp/numeric/random/bernoulli_distribution
+struct bernoulli_distribution {
+  const double p;
+
+  explicit bernoulli_distribution(double p = 0.5) : p(p) {}
+
+  template <class Rng>
+  bool operator()(Rng& rng) {
+    return detail::generate_canonical<double>(rng) < p;
+  }
+};
+
+}  // namespace random
+}  // namespace arrow
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index a7085e5772d..9e3e2717479 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -55,7 +55,7 @@ uint64_t random_seed() {
 void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
   const int random_seed = 0;
   pcg32_fast gen(random_seed);
-  std::uniform_real_distribution<double> d(0.0, 1.0);
+  ::arrow::random::uniform_real_distribution<double> d(0.0, 1.0);
   std::generate(null_bytes, null_bytes + n,
                 [&d, &gen, &pct_null] { return d(gen) > pct_null; });
 }
@@ -63,7 +63,7 @@ void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
 void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid,
                      int random_seed) {
   pcg32_fast gen(random_seed);
-  std::uniform_real_distribution<double> d(0.0, 1.0);
+  ::arrow::random::uniform_real_distribution<double> d(0.0, 1.0);
   is_valid->resize(n, false);
   std::generate(is_valid->begin(), is_valid->end(),
                 [&d, &gen, &pct_null] { return d(gen) > pct_null; });

From 553f3d8211271e8eb576c9668e53dd5dc53c480a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 11 May 2021 16:06:01 +0200
Subject: [PATCH 218/719] ARROW-12721: [CI] Fix path for uploading aarch64
 conda artifacts from the nightly builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10288 from kszucs/conda-upload-error

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/tasks/conda-recipes/azure.linux.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/tasks/conda-recipes/azure.linux.yml b/dev/tasks/conda-recipes/azure.linux.yml
index 161fd14e90c..79fe7ae4518 100755
--- a/dev/tasks/conda-recipes/azure.linux.yml
+++ b/dev/tasks/conda-recipes/azure.linux.yml
@@ -34,5 +34,5 @@ jobs:
       CI=azure arrow/dev/tasks/conda-recipes/run_docker_build.sh $(pwd)/build_artifacts
     displayName: Run docker build
 
-  {{ macros.azure_upload_releases("build_artifacts/linux-64/*.tar.bz2") }}
-  {{ macros.azure_upload_anaconda("build_artifacts/linux-64/*.tar.bz2") }}
+  {{ macros.azure_upload_releases("build_artifacts/*/*.tar.bz2") }}
+  {{ macros.azure_upload_anaconda("build_artifacts/*/*.tar.bz2") }}

From e8a96153594e201617dc95524ea3aed50a7ae9a8 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 11 May 2021 11:36:41 -0400
Subject: [PATCH 219/719] MINOR: Fix pyarrow.parquet.read_table docstring
 (#10293)

---
 python/pyarrow/parquet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index fd07aeddb9e..22763680cd1 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -1708,6 +1708,8 @@ def pieces(self):
     keys and only a hive-style directory structure is supported. When
     setting `use_legacy_dataset` to False, also within-file level filtering
     and different partitioning schemes are supported.
+
+    {3}
 pre_buffer : bool, default True
     Coalesce and issue file reads in parallel to improve performance on
     high-latency filesystems (e.g. S3). If True, Arrow will use a
@@ -1716,8 +1718,6 @@ def pieces(self):
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results.
 
-    {3}
-
 Returns
 -------
 {2}

From 4a121b620020df3d9947cacd1f56991300bf33d8 Mon Sep 17 00:00:00 2001
From: Nick Bruno <nbruno@users.noreply.github.com>
Date: Tue, 11 May 2021 08:44:16 -0700
Subject: [PATCH 220/719] ARROW-11173: [Java] Add map type in complex reader /
 writer

This pull request adds support for Map types in FieldReader and FieldWriter.

Initial unit tests show how to add the following nested types:
- A list of maps: `List<Map<Integer, Long>>`
- Nesting a map as the value of another map: `Map<Long, Map<Long, Long>>`
- Nesting maps as both the key and value of another map: `Map<Map<Integer, Integer>, Map<Long, Long>>`

Appreciate any feedback or suggestions for improvement.

Closes #9151 from nbruno/ARROW-11173

Authored-by: Nick Bruno <nbruno@users.noreply.github.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 .../src/main/codegen/includes/vv_imports.ftl  |   1 +
 .../templates/AbstractFieldReader.java        |   4 +
 .../templates/AbstractFieldWriter.java        |  57 ++-
 .../AbstractPromotableFieldWriter.java        |  61 ++-
 .../main/codegen/templates/BaseReader.java    |  10 +
 .../main/codegen/templates/BaseWriter.java    |  15 +
 .../codegen/templates/DenseUnionReader.java   |  12 +
 .../codegen/templates/DenseUnionVector.java   |  16 +
 .../codegen/templates/DenseUnionWriter.java   |  40 ++
 .../main/codegen/templates/StructWriters.java |  40 ++
 .../templates/UnionFixedSizeListWriter.java   |  23 +
 .../codegen/templates/UnionListWriter.java    |  23 +
 .../codegen/templates/UnionMapWriter.java     |  12 +
 .../main/codegen/templates/UnionReader.java   |  13 +
 .../main/codegen/templates/UnionVector.java   |  28 ++
 .../main/codegen/templates/UnionWriter.java   |  86 ++++
 .../complex/impl/AbstractBaseReader.java      |   6 +
 .../vector/complex/impl/PromotableWriter.java |  13 +-
 .../vector/complex/reader/FieldReader.java    |   5 +-
 .../vector/complex/writer/FieldWriter.java    |   3 +-
 .../apache/arrow/vector/TestMapVector.java    | 411 ++++++++++++++++++
 .../apache/arrow/vector/TestUnionVector.java  |  63 +++
 .../complex/writer/TestComplexWriter.java     |  77 ++++
 23 files changed, 1003 insertions(+), 16 deletions(-)

diff --git a/java/vector/src/main/codegen/includes/vv_imports.ftl b/java/vector/src/main/codegen/includes/vv_imports.ftl
index 2acd4361553..c9a8820b258 100644
--- a/java/vector/src/main/codegen/includes/vv_imports.ftl
+++ b/java/vector/src/main/codegen/includes/vv_imports.ftl
@@ -36,6 +36,7 @@ import org.apache.arrow.vector.complex.impl.*;
 import org.apache.arrow.vector.complex.writer.*;
 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.util.JsonStringArrayList;
 
 import java.util.Arrays;
diff --git a/java/vector/src/main/codegen/templates/AbstractFieldReader.java b/java/vector/src/main/codegen/templates/AbstractFieldReader.java
index 05c1296a424..84bcbdf49fc 100644
--- a/java/vector/src/main/codegen/templates/AbstractFieldReader.java
+++ b/java/vector/src/main/codegen/templates/AbstractFieldReader.java
@@ -77,6 +77,10 @@ public void copyAsField(String name, ListWriter writer) {
     fail("CopyAsFieldList");
   }
 
+  public void copyAsField(String name, MapWriter writer) {
+    fail("CopyAsFieldMap");
+  }
+
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
   <#assign boxedType = (minor.boxedType!type.boxedType) />
   public void read(${name}Holder holder) {
diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
index bce842d5911..1f80f25266b 100644
--- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
@@ -56,12 +56,42 @@ public void end() {
 
   @Override
   public void startList() {
-    throw new IllegalStateException(String.format("You tried to start when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+    throw new IllegalStateException(String.format("You tried to start a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
   }
 
   @Override
   public void endList() {
-    throw new IllegalStateException(String.format("You tried to end when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+    throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void startMap() {
+    throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void endMap() {
+    throw new IllegalStateException(String.format("You tried to end a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void startEntry() {
+    throw new IllegalStateException(String.format("You tried to start a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public MapWriter key() {
+    throw new IllegalStateException(String.format("You tried to start a map key when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public MapWriter value() {
+    throw new IllegalStateException(String.format("You tried to start a map value when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void endEntry() {
+    throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
   }
 
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
@@ -123,6 +153,12 @@ public ListWriter list() {
     return null;
   }
 
+  @Override
+  public MapWriter map() {
+    fail("Map");
+    return null;
+  }
+
   @Override
   public StructWriter struct(String name) {
     fail("Struct");
@@ -135,6 +171,23 @@ public ListWriter list(String name) {
     return null;
   }
 
+  @Override
+  public MapWriter map(String name) {
+    fail("Map");
+    return null;
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    fail("Map");
+    return null;
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    fail("Map");
+    return null;
+  }
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
index 6b14dbf2a57..264e8502185 100644
--- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -15,11 +15,6 @@
  * limitations under the License.
  */
 
-import org.apache.arrow.memory.ArrowBuf;
-import org.apache.arrow.vector.types.Types;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-
 <@pp.dropOutputFile />
 <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractPromotableFieldWriter.java" />
 
@@ -44,7 +39,11 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter {
    * @param type the type of the values we want to write
    * @return the corresponding field writer
    */
-  abstract protected FieldWriter getWriter(MinorType type);
+  protected FieldWriter getWriter(MinorType type) {
+    return getWriter(type, null);
+  }
+
+  abstract protected FieldWriter getWriter(MinorType type, ArrowType arrowType);
 
   /**
    * @return the current FieldWriter
@@ -73,6 +72,37 @@ public void endList() {
     setPosition(idx() + 1);
   }
 
+  @Override
+  public void startMap() {
+    getWriter(MinorType.MAP).startMap();
+  }
+
+  @Override
+  public void endMap() {
+    getWriter(MinorType.MAP).endMap();
+    setPosition(idx() + 1);
+  }
+
+  @Override
+  public void startEntry() {
+    getWriter(MinorType.MAP).startEntry();
+  }
+
+  @Override
+  public MapWriter key() {
+    return getWriter(MinorType.MAP).key();
+  }
+
+  @Override
+  public MapWriter value() {
+    return getWriter(MinorType.MAP).value();
+  }
+
+  @Override
+  public void endEntry() {
+    getWriter(MinorType.MAP).endEntry();
+  }
+
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
     <#assign fields = minor.fields!type.fields />
   <#if minor.class != "Decimal" && minor.class != "Decimal256">
@@ -144,6 +174,16 @@ public ListWriter list() {
     return getWriter(MinorType.LIST).list();
   }
 
+  @Override
+  public MapWriter map() {
+    return getWriter(MinorType.LIST).map();
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+  }
+
   @Override
   public StructWriter struct(String name) {
     return getWriter(MinorType.STRUCT).struct(name);
@@ -154,6 +194,15 @@ public ListWriter list(String name) {
     return getWriter(MinorType.STRUCT).list(name);
   }
 
+  @Override
+  public MapWriter map(String name) {
+    return getWriter(MinorType.STRUCT).map(name);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    return getWriter(MinorType.STRUCT).map(name, keysSorted);
+  }
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/codegen/templates/BaseReader.java b/java/vector/src/main/codegen/templates/BaseReader.java
index 670644c0b17..85d582a53bf 100644
--- a/java/vector/src/main/codegen/templates/BaseReader.java
+++ b/java/vector/src/main/codegen/templates/BaseReader.java
@@ -60,6 +60,16 @@ public interface RepeatedListReader extends ListReader{
     int size();
     void copyAsValue(ListWriter writer);
   }
+
+  public interface MapReader extends BaseReader{
+    FieldReader reader();
+  }
+
+  public interface RepeatedMapReader extends MapReader{
+    boolean next();
+    int size();
+    void copyAsValue(MapWriter writer);
+  }
   
   public interface ScalarReader extends  
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list> 
diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java
index 9c9f7ee89bc..4d63fb73e98 100644
--- a/java/vector/src/main/codegen/templates/BaseWriter.java
+++ b/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -62,6 +62,8 @@ public interface StructWriter extends BaseWriter {
     void copyReaderToField(String name, FieldReader reader);
     StructWriter struct(String name);
     ListWriter list(String name);
+    MapWriter map(String name);
+    MapWriter map(String name, boolean keysSorted);
     void start();
     void end();
   }
@@ -71,6 +73,8 @@ public interface ListWriter extends BaseWriter {
     void endList();
     StructWriter struct();
     ListWriter list();
+    MapWriter map();
+    MapWriter map(boolean keysSorted);
     void copyReader(FieldReader reader);
 
     <#list vv.types as type><#list type.minor as minor>
@@ -82,6 +86,17 @@ public interface ListWriter extends BaseWriter {
     </#list></#list>
   }
 
+  public interface MapWriter extends ListWriter {
+    void startMap();
+    void endMap();
+
+    void startEntry();
+    void endEntry();
+
+    MapWriter key();
+    MapWriter value();
+  }
+
   public interface ScalarWriter extends
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, </#list></#list> BaseWriter {}
 
diff --git a/java/vector/src/main/codegen/templates/DenseUnionReader.java b/java/vector/src/main/codegen/templates/DenseUnionReader.java
index f7e161ac86f..d3e9b582406 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionReader.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionReader.java
@@ -129,6 +129,18 @@ private FieldReader getList(byte typeId) {
     return listReader;
   }
 
+  private UnionMapReader mapReader;
+
+  private FieldReader getMap(byte typeId) {
+    UnionMapReader mapReader = (UnionMapReader) readers[typeId];
+    if (mapReader == null) {
+      mapReader = new UnionMapReader((MapVector) data.getVectorByType(typeId));
+      mapReader.setPosition(idx());
+      readers[typeId] = mapReader;
+    }
+    return mapReader;
+  }
+
   @Override
   public java.util.Iterator<String> iterator() {
     throw new UnsupportedOperationException();
diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java
index cf56b514fae..fff8b8114ac 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionVector.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -342,6 +342,22 @@ public ListVector getList(byte typeId) {
     return listVector;
   }
 
+  public MapVector getMap(byte typeId) {
+    MapVector mapVector = typeId < 0 ? null : (MapVector) childVectors[typeId];
+    if (mapVector == null) {
+      int vectorCount = internalStruct.size();
+      mapVector = addOrGet(typeId, MinorType.MAP, MapVector.class);
+      if (internalStruct.size() > vectorCount) {
+        mapVector.allocateNew();
+        childVectors[typeId] = mapVector;
+        if (callBack != null) {
+          callBack.doWork();
+        }
+      }
+    }
+    return mapVector;
+  }
+
   public byte getTypeId(int index) {
     return typeBuffer.getByte(index * TYPE_WIDTH);
   }
diff --git a/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/java/vector/src/main/codegen/templates/DenseUnionWriter.java
index 769b84268af..e69a62a9e0f 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionWriter.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionWriter.java
@@ -111,6 +111,20 @@ public ListWriter asList(byte typeId) {
     return getListWriter(typeId);
   }
 
+  private MapWriter getMapWriter(byte typeId) {
+    MapWriter mapWriter = (MapWriter) writers[typeId];
+    if (mapWriter == null) {
+      mapWriter = new UnionMapWriter((MapVector) data.getVectorByType(typeId));
+      writers[typeId] = mapWriter;
+    }
+    return mapWriter;
+  }
+
+  public MapWriter asMap(byte typeId) {
+    data.setTypeId(idx(), typeId);
+    return getMapWriter(typeId);
+  }
+
   BaseWriter getWriter(byte typeId) {
     MinorType minorType = data.getVectorByType(typeId).getMinorType();
     switch (minorType) {
@@ -118,6 +132,8 @@ BaseWriter getWriter(byte typeId) {
         return getStructWriter(typeId);
       case LIST:
         return getListWriter(typeId);
+      case MAP:
+        return getMapWriter(typeId);
     <#list vv.types as type>
       <#list type.minor as minor>
         <#assign name = minor.class?cap_first />
@@ -195,6 +211,30 @@ public ListWriter list(String name) {
     return getStructWriter(typeId).list(name);
   }
 
+  @Override
+  public MapWriter map() {
+    byte typeId = data.getTypeId(idx());
+    data.setTypeId(idx(), typeId);
+    getListWriter(typeId).setPosition(data.getOffset(idx()));
+    return getMapWriter(typeId).map();
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    byte typeId = data.getTypeId(idx());
+    data.setTypeId(idx(), typeId);
+    getStructWriter(typeId).setPosition(data.getOffset(idx()));
+    return getStructWriter(typeId).map(name);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    byte typeId = data.getTypeId(idx());
+    data.setTypeId(idx(), typeId);
+    getStructWriter(typeId).setPosition(data.getOffset(idx()));
+    return getStructWriter(typeId).map(name, keysSorted);
+  }
+
   @Override
   public StructWriter struct(String name) {
     byte typeId = data.getTypeId(idx());
diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java
index b908d1058fb..69693c63011 100644
--- a/java/vector/src/main/codegen/templates/StructWriters.java
+++ b/java/vector/src/main/codegen/templates/StructWriters.java
@@ -64,6 +64,11 @@ public class ${mode}StructWriter extends AbstractFieldWriter {
       case LIST:
         list(child.getName());
         break;
+      case MAP: {
+        ArrowType.Map arrowType = (ArrowType.Map) child.getType();
+        map(child.getName(), arrowType.getKeysSorted());
+        break;
+      }
       case UNION:
         FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);
         UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
@@ -184,6 +189,41 @@ public ListWriter list(String name) {
     return writer;
   }
 
+  @Override
+  public MapWriter map(String name) {
+    return map(name, false);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    FieldWriter writer = fields.get(handleCase(name));
+    if(writer == null) {
+      ValueVector vector;
+      ValueVector currentVector = container.getChild(name);
+      MapVector v = container.addOrGet(name,
+          new FieldType(addVectorAsNullable,
+            new ArrowType.Map(keysSorted)
+          ,null, null),
+          MapVector.class);
+      writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
+      vector = v;
+      if (currentVector == null || currentVector != vector) {
+        if(this.initialCapacity > 0) {
+          vector.setInitialCapacity(this.initialCapacity);
+        }
+        vector.allocateNewSafe();
+      }
+      writer.setPosition(idx());
+      fields.put(handleCase(name), writer);
+    } else {
+      if (writer instanceof PromotableWriter) {
+        // ensure writers are initialized
+        ((PromotableWriter)writer).getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+      }
+    }
+    return writer;
+  }
+
   public void setValueCount(int count) {
     container.setValueCount(count);
   }
diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
index f04b4db3208..55c661bfc60 100644
--- a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
@@ -169,6 +169,29 @@ public StructWriter struct(String name) {
     return structWriter;
   }
 
+  @Override
+  public MapWriter map() {
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    MapWriter mapWriter = writer.map(name);
+    return mapWriter;
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    writer.map(keysSorted);
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    MapWriter mapWriter = writer.map(name, keysSorted);
+    return mapWriter;
+  }
+
   @Override
   public void startList() {
     int start = vector.startNewValue(idx());
diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java
index 15c601942c6..926276b5eb4 100644
--- a/java/vector/src/main/codegen/templates/UnionListWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionListWriter.java
@@ -176,6 +176,29 @@ public StructWriter struct(String name) {
     return structWriter;
   }
 
+  @Override
+  public MapWriter map() {
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    MapWriter mapWriter = writer.map(name);
+    return mapWriter;
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    writer.map(keysSorted);
+    return writer;
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    MapWriter mapWriter = writer.map(name, keysSorted);
+    return mapWriter;
+  }
+
   <#if listName == "LargeList">
   @Override
   public void startList() {
diff --git a/java/vector/src/main/codegen/templates/UnionMapWriter.java b/java/vector/src/main/codegen/templates/UnionMapWriter.java
index cec73c45f5c..606f880377b 100644
--- a/java/vector/src/main/codegen/templates/UnionMapWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionMapWriter.java
@@ -207,4 +207,16 @@ public ListWriter list() {
         return super.list();
     }
   }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    switch (mode) {
+      case KEY:
+        return entryWriter.map(MapVector.KEY_NAME, keysSorted);
+      case VALUE:
+        return entryWriter.map(MapVector.VALUE_NAME, keysSorted);
+      default:
+        return super.map();
+    }
+  }
 }
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 6ed03fa2117..0e263cc91fc 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -84,6 +84,8 @@ private FieldReader getReaderForIndex(int index) {
       return (FieldReader) getStruct();
     case LIST:
       return (FieldReader) getList();
+    case MAP:
+      return (FieldReader) getMap();
     <#list vv.types as type>
       <#list type.minor as minor>
         <#assign name = minor.class?cap_first />
@@ -121,6 +123,17 @@ private FieldReader getList() {
     return listReader;
   }
 
+  private UnionMapReader mapReader;
+
+  private FieldReader getMap() {
+    if (mapReader == null) {
+      mapReader = new UnionMapReader(data.getMap());
+      mapReader.setPosition(idx());
+      readers[MinorType.MAP.ordinal()] = mapReader;
+    }
+    return mapReader;
+  }
+
   @Override
   public java.util.Iterator<String> iterator() {
     return getStruct().iterator();
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index f33f44bbc60..0d9130da0e0 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -95,6 +95,7 @@ public class UnionVector implements FieldVector {
 
   private StructVector structVector;
   private ListVector listVector;
+  private MapVector mapVector;
 
   private FieldReader reader;
 
@@ -319,6 +320,31 @@ public ListVector getList() {
     return listVector;
   }
 
+  public MapVector getMap() {
+    if (mapVector == null) {
+      throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector");
+    }
+    return mapVector;
+  }
+
+  public MapVector getMap(ArrowType arrowType) {
+    return getMap(null, arrowType);
+  }
+
+  public MapVector getMap(String name, ArrowType arrowType) {
+    if (mapVector == null) {
+      int vectorCount = internalStruct.size();
+      mapVector = addOrGet(name, MinorType.MAP, arrowType, MapVector.class);
+      if (internalStruct.size() > vectorCount) {
+        mapVector.allocateNew();
+        if (callBack != null) {
+          callBack.doWork();
+        }
+      }
+    }
+    return mapVector;
+  }
+
   public int getTypeValue(int index) {
     return typeBuffer.getByte(index * TYPE_WIDTH);
   }
@@ -647,6 +673,8 @@ public ValueVector getVectorByType(int typeId, ArrowType arrowType) {
           return getStruct();
         case LIST:
           return getList();
+        case MAP:
+          return getMap(name, arrowType);
         default:
           throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]);
       }
diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java
index 59322d42fde..fc4fd7dd798 100644
--- a/java/vector/src/main/codegen/templates/UnionWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionWriter.java
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 
+import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
 import org.apache.arrow.vector.types.Types;
 
@@ -39,6 +40,7 @@ public class UnionWriter extends AbstractFieldWriter implements FieldWriter {
   UnionVector data;
   private StructWriter structWriter;
   private UnionListWriter listWriter;
+  private UnionMapWriter mapWriter;
   private List<BaseWriter> writers = new java.util.ArrayList<>();
   private final NullableStructWriterFactory nullableStructWriterFactory;
 
@@ -82,6 +84,37 @@ public void endList() {
     getListWriter().endList();
   }
 
+  @Override
+  public void startMap() {
+    getMapWriter().startMap();
+    data.setType(idx(), MinorType.MAP);
+  }
+
+  @Override
+  public void endMap() {
+    getMapWriter().endMap();
+  }
+
+  @Override
+  public void startEntry() {
+    getMapWriter().startEntry();
+  }
+
+  @Override
+  public MapWriter key() {
+    return getMapWriter().key();
+  }
+
+  @Override
+  public MapWriter value() {
+    return getMapWriter().value();
+  }
+
+  @Override
+  public void endEntry() {
+    getMapWriter().endEntry();
+  }
+
   private StructWriter getStructWriter() {
     if (structWriter == null) {
       structWriter = nullableStructWriterFactory.build(data.getStruct());
@@ -110,6 +143,29 @@ public ListWriter asList() {
     return getListWriter();
   }
 
+  private MapWriter getMapWriter() {
+    if (mapWriter == null) {
+      mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false)));
+      mapWriter.setPosition(idx());
+      writers.add(mapWriter);
+    }
+    return mapWriter;
+  }
+
+  private MapWriter getMapWriter(ArrowType arrowType) {
+    if (mapWriter == null) {
+      mapWriter = new UnionMapWriter(data.getMap(arrowType));
+      mapWriter.setPosition(idx());
+      writers.add(mapWriter);
+    }
+    return mapWriter;
+  }
+
+  public MapWriter asMap(ArrowType arrowType) {
+    data.setType(idx(), MinorType.MAP);
+    return getMapWriter(arrowType);
+  }
+
   BaseWriter getWriter(MinorType minorType) {
     return getWriter(minorType, null);
   }
@@ -120,6 +176,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) {
       return getStructWriter();
     case LIST:
       return getListWriter();
+    case MAP:
+      return getMapWriter(arrowType);
     <#list vv.types as type>
       <#list type.minor as minor>
         <#assign name = minor.class?cap_first />
@@ -221,6 +279,34 @@ public StructWriter struct(String name) {
     return getStructWriter().struct(name);
   }
 
+  @Override
+  public MapWriter map() {
+    data.setType(idx(), MinorType.MAP);
+    getListWriter().setPosition(idx());
+    return getListWriter().map();
+  }
+
+  @Override
+  public MapWriter map(boolean keysSorted) {
+    data.setType(idx(), MinorType.MAP);
+    getListWriter().setPosition(idx());
+    return getListWriter().map(keysSorted);
+  }
+
+  @Override
+  public MapWriter map(String name) {
+    data.setType(idx(), MinorType.MAP);
+    getStructWriter().setPosition(idx());
+    return getStructWriter().map(name);
+  }
+
+  @Override
+  public MapWriter map(String name, boolean keysSorted) {
+    data.setType(idx(), MinorType.MAP);
+    getStructWriter().setPosition(idx());
+    return getStructWriter().map(name, keysSorted);
+  }
+
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
index b40d13e4743..c80fcb89d0c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
@@ -21,6 +21,7 @@
 
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.FieldWriter;
 import org.apache.arrow.vector.holders.DenseUnionHolder;
 import org.apache.arrow.vector.holders.UnionHolder;
@@ -109,4 +110,9 @@ public void copyAsValue(DenseUnionWriter writer) {
   public void copyAsValue(ListWriter writer) {
     ComplexCopier.copy(this, (FieldWriter) writer);
   }
+
+  @Override
+  public void copyAsValue(MapWriter writer) {
+    ComplexCopier.copy(this, (FieldWriter) writer);
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
index d60e5b430f6..06b064fdaac 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.complex.writer.FieldWriter;
@@ -214,6 +215,9 @@ private void setWriter(ValueVector v) {
       case LIST:
         writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory);
         break;
+      case MAP:
+        writer = new UnionMapWriter((MapVector) vector);
+        break;
       case UNION:
         writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
         break;
@@ -243,13 +247,10 @@ public void setPosition(int index) {
     }
   }
 
-  protected FieldWriter getWriter(MinorType type) {
-    return getWriter(type, null);
-  }
-
+  @Override
   protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
     if (state == State.UNION) {
-      if (type == MinorType.DECIMAL) {
+      if (type == MinorType.DECIMAL || type == MinorType.MAP) {
         ((UnionWriter) writer).getWriter(type, arrowType);
       } else {
         ((UnionWriter) writer).getWriter(type);
@@ -276,7 +277,7 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
       writer.setPosition(position);
     } else if (type != this.type) {
       promoteToUnion();
-      if (type == MinorType.DECIMAL) {
+      if (type == MinorType.DECIMAL || type == MinorType.MAP) {
         ((UnionWriter) writer).getWriter(type, arrowType);
       } else {
         ((UnionWriter) writer).getWriter(type);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
index 8825bc35edb..a888abbaa7d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
@@ -18,7 +18,9 @@
 package org.apache.arrow.vector.complex.reader;
 
 import org.apache.arrow.vector.complex.reader.BaseReader.ListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.MapReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedMapReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedStructReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.ScalarReader;
 import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
@@ -28,5 +30,6 @@
  * Composite of all Reader types (e.g. {@link StructReader}, {@link ScalarReader}, etc).  Each reader type
  * is in essence a way of iterating over a {@link org.apache.arrow.vector.ValueVector}.
  */
-public interface FieldReader extends StructReader, ListReader, ScalarReader, RepeatedStructReader, RepeatedListReader {
+public interface FieldReader extends StructReader, ListReader, MapReader, ScalarReader,
+    RepeatedStructReader, RepeatedListReader, RepeatedMapReader {
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
index bf05a0ace30..a3cb7108a11 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.vector.complex.writer;
 
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
 
@@ -25,7 +26,7 @@
  * Composite of all writer types.  Writers are convenience classes for incrementally
  * adding values to {@linkplain org.apache.arrow.vector.ValueVector}s.
  */
-public interface FieldWriter extends StructWriter, ListWriter, ScalarWriter {
+public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter {
   void allocate();
 
   void clear();
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
index 3b85ed6fe69..6d699a456bd 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -33,6 +33,7 @@
 import org.apache.arrow.vector.complex.impl.UnionMapWriter;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
@@ -624,6 +625,416 @@ public void testMapWithListValue() throws Exception {
     }
   }
 
+  @Test
+  public void testMapWithMapValue() throws Exception {
+    try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+      UnionMapWriter mapWriter = mapVector.getWriter();
+      MapWriter valueWriter;
+
+      // we are essentially writing Map<Long, Map<Long, Long>>
+      // populate map vector with the following three records
+      // [
+      //    null,
+      //    [1:[50: 100, 200:400], 2:[75: 175, 150: 250]],
+      //    [3:[10: 20], 4:[15: 20], 5:[25: 30, 35: null]]
+      // ]
+
+      /* write null at index 0 */
+      mapWriter.setPosition(0);
+      mapWriter.writeNull();
+
+      /* write one or more maps at index 1 */
+      mapWriter.setPosition(1);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(1);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 50, 100L);
+      writeEntry(valueWriter, 200, 400L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(2);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 75, 175L);
+      writeEntry(valueWriter, 150, 250L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      /* write one or more maps at index 2 */
+      mapWriter.setPosition(2);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(3);
+      valueWriter = mapWriter.value().map(true);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 10, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(4);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 15, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      mapWriter.key().bigInt().writeBigInt(5);
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 25, 30L);
+      writeEntry(valueWriter, 35, (Long) null);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      assertEquals(2, mapVector.getLastSet());
+
+      mapWriter.setValueCount(3);
+
+      assertEquals(3, mapVector.getValueCount());
+
+      // Get mapVector element at index 0
+      Object result = mapVector.getObject(0);
+      assertNull(result);
+
+      // Get mapVector element at index 1
+      result = mapVector.getObject(1);
+      ArrayList<?> resultSet = (ArrayList<?>) result;
+
+      // 2 map entries at index 0
+      assertEquals(2, resultSet.size());
+
+      // First Map entry
+      Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+      assertEquals(1L, getResultKey(resultStruct));
+      ArrayList<Map<?, ?>> list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of 2 two maps
+      Map<?, ?> innerMap = list.get(0);
+      assertEquals(50L, getResultKey(innerMap));
+      assertEquals(100L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(200L, getResultKey(innerMap));
+      assertEquals(400L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(1);
+      assertEquals(2L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(75L, getResultKey(innerMap));
+      assertEquals(175L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(150L, getResultKey(innerMap));
+      assertEquals(250L, getResultValue(innerMap));
+
+      // Get mapVector element at index 2
+      result = mapVector.getObject(2);
+      resultSet = (ArrayList<?>) result;
+
+      // 3 map entries at index 1
+      assertEquals(3, resultSet.size());
+
+      // First Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(0);
+      assertEquals(3L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(10L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(1);
+      assertEquals(4L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(15L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Third Map entry
+      resultStruct = (Map<?, ?>) resultSet.get(2);
+      assertEquals(5L, getResultKey(resultStruct));
+      list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of maps with 2 elements
+      innerMap = list.get(0);
+      assertEquals(25L, getResultKey(innerMap));
+      assertEquals(30L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(35L, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      /* check underlying bitVector */
+      assertTrue(mapVector.isNull(0));
+      assertFalse(mapVector.isNull(1));
+      assertFalse(mapVector.isNull(2));
+
+      /* check underlying offsets */
+      final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+      /* mapVector has 0 entries at index 0, 2 entries at index 1, and 3 entries at index 2 */
+      assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+      assertEquals(5, offsetBuffer.getInt(3 * MapVector.OFFSET_WIDTH));
+    }
+  }
+
+  @Test
+  public void testMapWithMapKeyAndMapValue() throws Exception {
+    try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+      UnionMapWriter mapWriter = mapVector.getWriter();
+      MapWriter keyWriter;
+      MapWriter valueWriter;
+
+      // we are essentially writing Map<Map<Integer, Integer>, Map<Long, Long>>
+      // populate map vector with the following two records
+      // [
+      //    [[5: 10, 20: 40]:[50: 100, 200: 400], [50: 100]:[75: 175, 150: 250]],
+      //    [[1: 2]:[10: 20], [30: 40]:[15: 20], [50: 60, 70: null]:[25: 30, 35: null], [5: null]: null]
+      // ]
+
+      mapWriter.setPosition(0);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 5, 10);
+      writeEntry(keyWriter, 20, 40);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 50, 100L);
+      writeEntry(valueWriter, 200, 400L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 50, 100);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 75, 175L);
+      writeEntry(valueWriter, 150, 250L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      /* write one or more maps at index 1 */
+      mapWriter.setPosition(1);
+      mapWriter.startMap();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 1, 2);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(true);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 10, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 30, 40);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 15, 20L);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 50, 60);
+      writeEntry(keyWriter, 70, (Integer) null);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.startMap();
+      writeEntry(valueWriter, 25, 30L);
+      writeEntry(valueWriter, 35, (Long) null);
+      valueWriter.endMap();
+      mapWriter.endEntry();
+
+      mapWriter.startEntry();
+      keyWriter = mapWriter.key().map(false);
+      keyWriter.startMap();
+      writeEntry(keyWriter, 5, (Integer) null);
+      keyWriter.endMap();
+      valueWriter = mapWriter.value().map(false);
+      valueWriter.writeNull();
+      mapWriter.endEntry();
+
+      mapWriter.endMap();
+
+      assertEquals(1, mapVector.getLastSet());
+
+      mapWriter.setValueCount(2);
+
+      assertEquals(2, mapVector.getValueCount());
+
+      // Get mapVector element at index 0
+      Object result = mapVector.getObject(0);
+      ArrayList<?> resultSet = (ArrayList<?>) result;
+
+      // 2 map entries at index 0
+      assertEquals(2, resultSet.size());
+
+      // First Map entry
+      Map<?, ArrayList<Map<?, ?>>> resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+      ArrayList<Map<?, ?>> list = getResultKey(resultStruct);
+      assertEquals(2, list.size()); // key is a list of 2 two maps
+      Map<?, ?> innerMap = list.get(0);
+      assertEquals(5, getResultKey(innerMap));
+      assertEquals(10, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(20, getResultKey(innerMap));
+      assertEquals(40, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of 2 two maps
+      innerMap = list.get(0);
+      assertEquals(50L, getResultKey(innerMap));
+      assertEquals(100L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(200L, getResultKey(innerMap));
+      assertEquals(400L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of 1 two map
+      innerMap = list.get(0);
+      assertEquals(50, getResultKey(innerMap));
+      assertEquals(100, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(75L, getResultKey(innerMap));
+      assertEquals(175L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(150L, getResultKey(innerMap));
+      assertEquals(250L, getResultValue(innerMap));
+
+      // Get mapVector element at index 1
+      result = mapVector.getObject(1);
+      resultSet = (ArrayList<?>) result;
+
+      // 4 map entries at index 1
+      assertEquals(4, resultSet.size());
+
+      // First Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of 1 map
+      innerMap = list.get(0);
+      assertEquals(1, getResultKey(innerMap));
+      assertEquals(2, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(10L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Second Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of 1 map
+      innerMap = list.get(0);
+      assertEquals(30, getResultKey(innerMap));
+      assertEquals(40, getResultValue(innerMap));
+
+      list = getResultValue(resultStruct);
+      assertEquals(1, list.size()); // value is a list of maps with 1 element
+      innerMap = list.get(0);
+      assertEquals(15L, getResultKey(innerMap));
+      assertEquals(20L, getResultValue(innerMap));
+
+      // Third Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(2);
+      list = getResultKey(resultStruct);
+      assertEquals(2, list.size()); // key is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(50, getResultKey(innerMap));
+      assertEquals(60, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(70, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      list = getResultValue(resultStruct);
+      assertEquals(2, list.size()); // value is a list of maps with 2 elements
+      innerMap = list.get(0);
+      assertEquals(25L, getResultKey(innerMap));
+      assertEquals(30L, getResultValue(innerMap));
+      innerMap = list.get(1);
+      assertEquals(35L, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      // Fourth Map entry
+      resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(3);
+      list = getResultKey(resultStruct);
+      assertEquals(1, list.size()); // key is a list of two maps
+      innerMap = list.get(0);
+      assertEquals(5, getResultKey(innerMap));
+      assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+      assertNull(resultStruct.get(MapVector.VALUE_NAME));
+
+      /* check underlying bitVector */
+      assertFalse(mapVector.isNull(0));
+      assertFalse(mapVector.isNull(1));
+
+      /* check underlying offsets */
+      final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+      /* mapVector has 2 entries at index 0 and 4 entries at index 1 */
+      assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+      assertEquals(6, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+    }
+  }
+
+  private void writeEntry(MapWriter writer, long key, Long value) {
+    writer.startEntry();
+    writer.key().bigInt().writeBigInt(key);
+    if (value != null) {
+      writer.value().bigInt().writeBigInt(value);
+    }
+    writer.endEntry();
+  }
+
+  private void writeEntry(MapWriter writer, int key, Integer value) {
+    writer.startEntry();
+    writer.key().integer().writeInt(key);
+    if (value != null) {
+      writer.value().integer().writeInt(value);
+    }
+    writer.endEntry();
+  }
+
   @Test
   public void testClearAndReuse() {
     try (final MapVector vector = MapVector.empty("map", allocator, false)) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
index 15d81ab6799..defa82f8e41 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -29,7 +29,9 @@
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
 import org.apache.arrow.vector.holders.NullableBitHolder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
 import org.apache.arrow.vector.holders.NullableIntHolder;
@@ -91,6 +93,67 @@ public void testUnionVector() throws Exception {
     }
   }
 
+  @Test
+  public void testUnionVectorMapValue() throws Exception {
+    try (UnionVector unionVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) {
+      unionVector.allocateNew();
+
+      UnionWriter writer = (UnionWriter) unionVector.getWriter();
+
+      // populate map vector with the following two records
+      // [
+      //    null,
+      //    [[1: 2], [3: 4], [5: null]]
+      // ]
+
+      writer.setPosition(0);
+      writer.writeNull();
+
+      writer.setPosition(1);
+      writer.startMap();
+
+      writer.startEntry();
+      writer.key().integer().writeInt(1);
+      writer.value().integer().writeInt(2);
+      writer.endEntry();
+
+      writer.startEntry();
+      writer.key().integer().writeInt(3);
+      writer.value().integer().writeInt(4);
+      writer.endEntry();
+
+      writer.startEntry();
+      writer.key().integer().writeInt(5);
+      writer.endEntry();
+
+      writer.endMap();
+
+      unionVector.setValueCount(2);
+
+      // check that what we wrote is correct
+      assertEquals(2, unionVector.getValueCount());
+
+      // first entry
+      assertNull(unionVector.getObject(0));
+
+      // second entry
+      List<Map<String, Integer>> resultList = (List<Map<String, Integer>>) unionVector.getObject(1);
+      assertEquals(3, resultList.size());
+
+      Map<String, Integer> resultMap = resultList.get(0);
+      assertEquals(1, (int) resultMap.get(MapVector.KEY_NAME));
+      assertEquals(2, (int) resultMap.get(MapVector.VALUE_NAME));
+
+      resultMap = resultList.get(1);
+      assertEquals(3, (int) resultMap.get(MapVector.KEY_NAME));
+      assertEquals(4, (int) resultMap.get(MapVector.VALUE_NAME));
+
+      resultMap = resultList.get(2);
+      assertEquals(5, (int) resultMap.get(MapVector.KEY_NAME));
+      assertNull(resultMap.get(MapVector.VALUE_NAME));
+    }
+  }
+
   @Test
   public void testTransfer() throws Exception {
     try (UnionVector srcVector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 043022e96b2..d44ada2f30f 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -36,6 +36,7 @@
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.SchemaChangeCallBack;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -44,6 +45,7 @@
 import org.apache.arrow.vector.complex.impl.SingleStructWriter;
 import org.apache.arrow.vector.complex.impl.UnionListReader;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
 import org.apache.arrow.vector.complex.impl.UnionReader;
 import org.apache.arrow.vector.complex.impl.UnionWriter;
 import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
@@ -54,6 +56,7 @@
 import org.apache.arrow.vector.complex.reader.IntReader;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
 import org.apache.arrow.vector.holders.DecimalHolder;
 import org.apache.arrow.vector.holders.IntHolder;
@@ -546,6 +549,59 @@ private void checkUnionList(ListVector listVector) {
     }
   }
 
+  @Test
+  public void testListMapType() {
+    try (ListVector listVector = ListVector.empty("list", allocator)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      MapWriter innerMapWriter = listWriter.map(true);
+
+      for (int i = 0; i < COUNT; i++) {
+        listWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          innerMapWriter.startMap();
+          for (int k = 0; k < i % 13; k++) {
+            innerMapWriter.startEntry();
+            innerMapWriter.key().integer().writeInt(k);
+            if (k % 2 == 0) {
+              innerMapWriter.value().bigInt().writeBigInt(k);
+            }
+            innerMapWriter.endEntry();
+          }
+          innerMapWriter.endMap();
+        }
+        listWriter.endList();
+      }
+      listWriter.setValueCount(COUNT);
+      checkListMap(listVector);
+
+      // Verify that the map vector has keysSorted = true
+      MapVector mapVector = (MapVector) listVector.getDataVector();
+      ArrowType arrowType = mapVector.getField().getFieldType().getType();
+      assertTrue(((ArrowType.Map) arrowType).getKeysSorted());
+    }
+  }
+
+  private void checkListMap(ListVector listVector) {
+    UnionListReader listReader = new UnionListReader(listVector);
+    for (int i = 0; i < COUNT; i++) {
+      listReader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        listReader.next();
+        UnionMapReader mapReader = (UnionMapReader) listReader.reader();
+        for (int k = 0; k < i % 13; k++) {
+          mapReader.next();
+          Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue());
+          if (k % 2 == 0) {
+            Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue());
+          } else {
+            Assert.assertNull("record value: " + i, mapReader.value().readLong());
+          }
+        }
+      }
+    }
+  }
+
   @Test
   public void simpleUnion() {
     UnionVector vector = new UnionVector("union", allocator, null);
@@ -1022,6 +1078,7 @@ public void testSingleStructWriter1() {
       Float4Writer float4Writer = singleStructWriter.float4("float4Field");
       Float8Writer float8Writer = singleStructWriter.float8("float8Field");
       ListWriter listWriter = singleStructWriter.list("listField");
+      MapWriter mapWriter = singleStructWriter.map("mapField", false);
 
       int intValue = 100;
       long bigIntValue = 10000;
@@ -1044,6 +1101,18 @@ public void testSingleStructWriter1() {
         listWriter.integer().writeInt(intValue + i + 3);
         listWriter.endList();
 
+        mapWriter.setPosition(i);
+        mapWriter.startMap();
+        mapWriter.startEntry();
+        mapWriter.key().integer().writeInt(intValue + i);
+        mapWriter.value().integer().writeInt(intValue + i + 1);
+        mapWriter.endEntry();
+        mapWriter.startEntry();
+        mapWriter.key().integer().writeInt(intValue + i + 2);
+        mapWriter.value().integer().writeInt(intValue + i + 3);
+        mapWriter.endEntry();
+        mapWriter.endMap();
+
         singleStructWriter.end();
       }
 
@@ -1070,6 +1139,7 @@ public void testSingleStructWriter1() {
       Float4Reader float4Reader = singleStructReader.reader("float4Field");
       Float8Reader float8Reader = singleStructReader.reader("float8Field");
       UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField");
+      UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField");
 
       for (int i = 0; i < initialCapacity; i++) {
         intReader.setPosition(i);
@@ -1077,6 +1147,7 @@ public void testSingleStructWriter1() {
         float4Reader.setPosition(i);
         float8Reader.setPosition(i);
         listReader.setPosition(i);
+        mapReader.setPosition(i);
 
         assertEquals(intValue + i, intReader.readInteger().intValue());
         assertEquals(bigIntValue + (long) i, bigIntReader.readLong().longValue());
@@ -1087,6 +1158,12 @@ public void testSingleStructWriter1() {
           listReader.next();
           assertEquals(intValue + i + j, listReader.reader().readInteger().intValue());
         }
+
+        for (int k = 0; k < 4; k += 2) {
+          mapReader.next();
+          assertEquals(intValue + k + i, mapReader.key().readInteger().intValue());
+          assertEquals(intValue + k + i + 1, mapReader.value().readInteger().intValue());
+        }
       }
     }
 

From f06c50fad9022932d702f0c2eb997f9cc50e3e9c Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Tue, 11 May 2021 08:50:19 -0700
Subject: [PATCH 221/719] ARROW-12684: [Go][Flight] fix nil pointer
 dereference, add test.

Closes #10265 from zeroshade/arrow-12684

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 go/arrow/flight/flight_test.go         | 32 ++++++++++++++++++++++++++
 go/arrow/flight/record_batch_reader.go | 10 ++++----
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go
index 61bbc3bbba5..41d6ebe2c26 100644
--- a/go/arrow/flight/flight_test.go
+++ b/go/arrow/flight/flight_test.go
@@ -374,3 +374,35 @@ func TestFlightWithAppMetadata(t *testing.T) {
 		idx++
 	}
 }
+
+type flightErrorReturn struct {}
+
+func (f *flightErrorReturn) DoGet(_ *flight.Ticket, _ flight.FlightService_DoGetServer) error {
+	return status.Error(codes.NotFound, "nofound")
+}
+
+func TestReaderError(t *testing.T) {
+	f := &flightErrorReturn{}
+	s := flight.NewFlightServer(nil)
+	s.RegisterFlightService(&flight.FlightServiceService{DoGet: f.DoGet})
+	s.Init("localhost:0")
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewFlightClient(s.Addr().String(), nil, grpc.WithInsecure())
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer client.Close()
+
+	fdata, err := client.DoGet(context.Background(), &flight.Ticket{})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = flight.NewRecordReader(fdata)
+	if err == nil {
+		t.Fatal("should have errored")
+	}
+}
diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go
index 582187ee2b0..19b70127f44 100644
--- a/go/arrow/flight/record_batch_reader.go
+++ b/go/arrow/flight/record_batch_reader.go
@@ -45,9 +45,11 @@ type dataMessageReader struct {
 func (d *dataMessageReader) Message() (*ipc.Message, error) {
 	fd, err := d.rdr.Recv()
 	if err != nil {
-		// clear the previous message in the error case
-		d.msg.Release()
-		d.msg = nil
+		if d.msg != nil {
+			// clear the previous message in the error case
+			d.msg.Release()
+			d.msg = nil
+		}
 		d.lastAppMetadata = nil
 		return nil, err
 	}
@@ -68,8 +70,8 @@ func (d *dataMessageReader) Release() {
 		if d.msg != nil {
 			d.msg.Release()
 			d.msg = nil
-			d.lastAppMetadata = nil
 		}
+		d.lastAppMetadata = nil
 	}
 }
 

From 22506b9798f85499341e75477d2fa4fea92a4d98 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 11 May 2021 12:16:54 -0400
Subject: [PATCH 222/719] ARROW-12670: [C++] Fix extract_regex output after
 non-matching values

Closes #10287 from pitrou/ARROW-12670-extract-regex-nulls

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/kernels/scalar_string.cc      | 9 ++-------
 cpp/src/arrow/compute/kernels/scalar_string_test.cc | 4 ++++
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 065c512c552..be1cc83d158 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -1635,12 +1635,7 @@ struct ExtractRegex : public ExtractRegexBase {
             checked_cast<BuilderType*>(struct_builder->field_builder(i)));
       }
 
-      auto visit_null = [&]() {
-        for (int i = 0; i < group_count; i++) {
-          RETURN_NOT_OK(field_builders[i]->AppendEmptyValue());
-        }
-        return struct_builder->AppendNull();
-      };
+      auto visit_null = [&]() { return struct_builder->AppendNull(); };
       auto visit_value = [&](util::string_view s) {
         if (Match(s)) {
           for (int i = 0; i < group_count; i++) {
@@ -1648,7 +1643,7 @@ struct ExtractRegex : public ExtractRegexBase {
           }
           return struct_builder->Append();
         } else {
-          return visit_null();
+          return struct_builder->AppendNull();
         }
       };
       const ArrayData& input = *batch[0].array();
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 577493913b5..cb74b1449b5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -539,6 +539,10 @@ TYPED_TEST(TestStringKernels, ExtractRegex) {
       "extract_regex", R"(["a1", "b2", "c3", null])", type,
       R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "2"}, null, null])",
       &options);
+  this->CheckUnary(
+      "extract_regex", R"(["a1", "c3", null, "b2"])", type,
+      R"([{"letter": "a", "digit": "1"}, null, null, {"letter": "b", "digit": "2"}])",
+      &options);
   this->CheckUnary("extract_regex", R"(["a1", "b2"])", type,
                    R"([{"letter": "a", "digit": "1"}, {"letter": "b", "digit": "2"}])",
                    &options);

From 046a30b72c7259b5288e6a4df3f7ec03d0787b81 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 11 May 2021 12:51:08 -0400
Subject: [PATCH 223/719] ARROW-11928: [C++] Execution engine API

Closes #10204 from pitrou/ARROW-11928-engine-hierarchy-v2

Lead-authored-by: "Antoine Pitrou <antoine@python.org>"
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 ci/appveyor-cpp-build.bat                 |   1 +
 ci/scripts/cpp_build.sh                   |   1 +
 cpp/CMakeLists.txt                        |   4 +
 cpp/src/arrow/CMakeLists.txt              |   4 +
 cpp/src/arrow/compute/exec/CMakeLists.txt |   2 +
 cpp/src/arrow/compute/exec/exec_plan.cc   | 218 ++++++++++++
 cpp/src/arrow/compute/exec/exec_plan.h    | 246 +++++++++++++
 cpp/src/arrow/compute/exec/plan_test.cc   | 402 ++++++++++++++++++++++
 cpp/src/arrow/compute/exec/test_util.cc   | 400 +++++++++++++++++++++
 cpp/src/arrow/compute/exec/test_util.h    |  70 ++++
 cpp/src/arrow/compute/type_fwd.h          |   1 +
 cpp/src/arrow/type_fwd.h                  |   1 +
 cpp/src/arrow/util/iterator_test.cc       |   1 +
 13 files changed, 1351 insertions(+)
 create mode 100644 cpp/src/arrow/compute/exec/exec_plan.cc
 create mode 100644 cpp/src/arrow/compute/exec/exec_plan.h
 create mode 100644 cpp/src/arrow/compute/exec/plan_test.cc
 create mode 100644 cpp/src/arrow/compute/exec/test_util.cc
 create mode 100644 cpp/src/arrow/compute/exec/test_util.h

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 6b930939660..534f73c2d50 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -97,6 +97,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
       -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
       -DARROW_DATASET=ON ^
       -DARROW_ENABLE_TIMING_TESTS=OFF ^
+      -DARROW_ENGINE=ON ^
       -DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^
       -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^
       -DARROW_MIMALLOC=ON ^
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 8a1e4f32f3a..d47a6696e8f 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -59,6 +59,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \
       -DARROW_CUDA=${ARROW_CUDA:-OFF} \
       -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
       -DARROW_DATASET=${ARROW_DATASET:-ON} \
+      -DARROW_ENGINE=${ARROW_ENGINE:-ON} \
       -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
       -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
       -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a6946403deb..a31af74f68e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -343,6 +343,10 @@ if(ARROW_CUDA
   set(ARROW_IPC ON)
 endif()
 
+if(ARROW_ENGINE)
+  set(ARROW_COMPUTE ON)
+endif()
+
 if(ARROW_DATASET)
   set(ARROW_COMPUTE ON)
   set(ARROW_FILESYSTEM ON)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 62ea94b8d02..bee14ae4ce3 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -367,6 +367,7 @@ if(ARROW_COMPUTE)
               compute/api_vector.cc
               compute/cast.cc
               compute/exec.cc
+              compute/exec/exec_plan.cc
               compute/exec/expression.cc
               compute/function.cc
               compute/kernel.cc
@@ -405,6 +406,7 @@ if(ARROW_COMPUTE)
     set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
                                 COMPILE_FLAGS ${ARROW_AVX2_FLAG})
   endif()
+
   if(ARROW_HAVE_RUNTIME_AVX512)
     list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx512.cc)
     set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
@@ -412,6 +414,8 @@ if(ARROW_COMPUTE)
     set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
                                 COMPILE_FLAGS ${ARROW_AVX512_FLAG})
   endif()
+
+  list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc)
 endif()
 
 if(ARROW_FILESYSTEM)
diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt
index a10c1dad469..ac6ddc51dff 100644
--- a/cpp/src/arrow/compute/exec/CMakeLists.txt
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -19,4 +19,6 @@ arrow_install_all_headers("arrow/compute/exec")
 
 add_arrow_compute_test(expression_test PREFIX "arrow-compute")
 
+add_arrow_compute_test(plan_test PREFIX "arrow-compute")
+
 add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
new file mode 100644
index 00000000000..f765ceccf0c
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -0,0 +1,218 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/exec_plan.h"
+
+#include <unordered_set>
+
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace compute {
+
+namespace {
+
+struct ExecPlanImpl : public ExecPlan {
+  ExecPlanImpl() = default;
+
+  ~ExecPlanImpl() override = default;
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node) {
+    if (node->num_inputs() == 0) {
+      sources_.push_back(node.get());
+    }
+    if (node->num_outputs() == 0) {
+      sinks_.push_back(node.get());
+    }
+    nodes_.push_back(std::move(node));
+    return nodes_.back().get();
+  }
+
+  Status Validate() const {
+    if (nodes_.empty()) {
+      return Status::Invalid("ExecPlan has no node");
+    }
+    for (const auto& node : nodes_) {
+      RETURN_NOT_OK(node->Validate());
+    }
+    return Status::OK();
+  }
+
+  Status StartProducing() {
+    ARROW_ASSIGN_OR_RAISE(auto sorted_nodes, ReverseTopoSort());
+    Status st;
+    auto it = sorted_nodes.begin();
+    while (it != sorted_nodes.end() && st.ok()) {
+      st &= (*it++)->StartProducing();
+    }
+    if (!st.ok()) {
+      // Stop nodes that successfully started, in reverse order
+      // (`it` now points after the node that failed starting, so need to rewind)
+      --it;
+      while (it != sorted_nodes.begin()) {
+        (*--it)->StopProducing();
+      }
+    }
+    return st;
+  }
+
+  Result<NodeVector> ReverseTopoSort() {
+    struct TopoSort {
+      const std::vector<std::unique_ptr<ExecNode>>& nodes;
+      std::unordered_set<ExecNode*> visited;
+      std::unordered_set<ExecNode*> visiting;
+      NodeVector sorted;
+
+      explicit TopoSort(const std::vector<std::unique_ptr<ExecNode>>& nodes)
+          : nodes(nodes) {
+        visited.reserve(nodes.size());
+        sorted.reserve(nodes.size());
+      }
+
+      Status Sort() {
+        for (const auto& node : nodes) {
+          RETURN_NOT_OK(Visit(node.get()));
+        }
+        DCHECK_EQ(sorted.size(), nodes.size());
+        DCHECK_EQ(visited.size(), nodes.size());
+        DCHECK_EQ(visiting.size(), 0);
+        return Status::OK();
+      }
+
+      Status Visit(ExecNode* node) {
+        if (visited.count(node) != 0) {
+          return Status::OK();
+        }
+
+        auto it_success = visiting.insert(node);
+        if (!it_success.second) {
+          // Insertion failed => node is already being visited
+          return Status::Invalid("Cycle detected in execution plan");
+        }
+
+        for (auto input : node->inputs()) {
+          // Ensure that producers are inserted before this consumer
+          RETURN_NOT_OK(Visit(input));
+        }
+
+        visiting.erase(it_success.first);
+        visited.insert(node);
+        sorted.push_back(node);
+        return Status::OK();
+      }
+
+      NodeVector Reverse() {
+        std::reverse(sorted.begin(), sorted.end());
+        return std::move(sorted);
+      }
+    } topo_sort(nodes_);
+
+    RETURN_NOT_OK(topo_sort.Sort());
+    return topo_sort.Reverse();
+  }
+
+  std::vector<std::unique_ptr<ExecNode>> nodes_;
+  NodeVector sources_, sinks_;
+};
+
+ExecPlanImpl* ToDerived(ExecPlan* ptr) { return checked_cast<ExecPlanImpl*>(ptr); }
+
+const ExecPlanImpl* ToDerived(const ExecPlan* ptr) {
+  return checked_cast<const ExecPlanImpl*>(ptr);
+}
+
+util::optional<int> GetNodeIndex(const std::vector<ExecNode*>& nodes,
+                                 const ExecNode* node) {
+  for (int i = 0; i < static_cast<int>(nodes.size()); ++i) {
+    if (nodes[i] == node) return i;
+  }
+  return util::nullopt;
+}
+
+}  // namespace
+
+Result<std::shared_ptr<ExecPlan>> ExecPlan::Make() {
+  return std::make_shared<ExecPlanImpl>();
+}
+
+ExecNode* ExecPlan::AddNode(std::unique_ptr<ExecNode> node) {
+  return ToDerived(this)->AddNode(std::move(node));
+}
+
+const ExecPlan::NodeVector& ExecPlan::sources() const {
+  return ToDerived(this)->sources_;
+}
+
+const ExecPlan::NodeVector& ExecPlan::sinks() const { return ToDerived(this)->sinks_; }
+
+Status ExecPlan::Validate() { return ToDerived(this)->Validate(); }
+
+Status ExecPlan::StartProducing() { return ToDerived(this)->StartProducing(); }
+
+ExecNode::ExecNode(ExecPlan* plan, std::string label,
+                   std::vector<BatchDescr> input_descrs,
+                   std::vector<std::string> input_labels, BatchDescr output_descr,
+                   int num_outputs)
+    : plan_(plan),
+      label_(std::move(label)),
+      input_descrs_(std::move(input_descrs)),
+      input_labels_(std::move(input_labels)),
+      output_descr_(std::move(output_descr)),
+      num_outputs_(num_outputs) {}
+
+Status ExecNode::Validate() const {
+  if (inputs_.size() != input_descrs_.size()) {
+    return Status::Invalid("Invalid number of inputs for '", label(), "' (expected ",
+                           num_inputs(), ", actual ", inputs_.size(), ")");
+  }
+
+  if (static_cast<int>(outputs_.size()) != num_outputs_) {
+    return Status::Invalid("Invalid number of outputs for '", label(), "' (expected ",
+                           num_outputs(), ", actual ", outputs_.size(), ")");
+  }
+
+  DCHECK_EQ(input_descrs_.size(), input_labels_.size());
+
+  for (auto out : outputs_) {
+    auto input_index = GetNodeIndex(out->inputs(), this);
+    if (!input_index) {
+      return Status::Invalid("Node '", label(), "' outputs to node '", out->label(),
+                             "' but is not listed as an input.");
+    }
+
+    const auto& in_descr = out->input_descrs_[*input_index];
+    if (in_descr != output_descr_) {
+      return Status::Invalid(
+          "Node '", label(), "' (bound to input ", input_labels_[*input_index],
+          ") produces batches with type '", ValueDescr::ToString(output_descr_),
+          "' inconsistent with consumer '", out->label(), "' which accepts '",
+          ValueDescr::ToString(in_descr), "'");
+    }
+  }
+
+  return Status::OK();
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
new file mode 100644
index 00000000000..0d2faea0ddc
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -0,0 +1,246 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+// NOTES:
+// - ExecBatches only have arrays or scalars
+// - data streams may be ordered, so add input number?
+// - node to combine input needs to reorder
+
+namespace arrow {
+namespace compute {
+
+class ExecNode;
+
+class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecPlan() = default;
+
+  /// Make an empty exec plan
+  static Result<std::shared_ptr<ExecPlan>> Make();
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node);
+
+  template <typename Node, typename... Args>
+  ExecNode* EmplaceNode(Args&&... args) {
+    return AddNode(std::unique_ptr<Node>(new Node{std::forward<Args>(args)...}));
+  }
+
+  /// The initial inputs
+  const NodeVector& sources() const;
+
+  /// The final outputs
+  const NodeVector& sinks() const;
+
+  // XXX API question:
+  // There are clearly two phases in the ExecPlan lifecycle:
+  // - one construction phase where AddNode() and ExecNode::AddInput() is called
+  //   (with optional validation at the end)
+  // - one execution phase where the nodes are topo-sorted and then started
+  //
+  // => Should we separate out those APIs? e.g. have a ExecPlanBuilder
+  // for the first phase.
+
+  Status Validate();
+
+  /// Start producing on all nodes
+  ///
+  /// Nodes are started in reverse topological order, such that any node
+  /// is started before all of its inputs.
+  Status StartProducing();
+
+  // XXX should we also have `void StopProducing()`?
+
+ protected:
+  ExecPlan() = default;
+};
+
+class ARROW_EXPORT ExecNode {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+  using BatchDescr = std::vector<ValueDescr>;
+
+  virtual ~ExecNode() = default;
+
+  virtual const char* kind_name() = 0;
+
+  // The number of inputs/outputs expected by this node
+  int num_inputs() const { return static_cast<int>(input_descrs_.size()); }
+  int num_outputs() const { return num_outputs_; }
+
+  /// This node's predecessors in the exec plan
+  const NodeVector& inputs() const { return inputs_; }
+
+  /// The datatypes accepted by this node for each input
+  const std::vector<BatchDescr>& input_descrs() const { return input_descrs_; }
+
+  /// \brief Labels identifying the function of each input.
+  ///
+  /// For example, FilterNode accepts "target" and "filter" inputs.
+  const std::vector<std::string>& input_labels() const { return input_labels_; }
+
+  /// This node's successors in the exec plan
+  const NodeVector& outputs() const { return outputs_; }
+
+  /// The datatypes for batches produced by this node
+  const BatchDescr& output_descr() const { return output_descr_; }
+
+  /// This node's exec plan
+  ExecPlan* plan() { return plan_; }
+
+  /// \brief An optional label, for display and debugging
+  ///
+  /// There is no guarantee that this value is non-empty or unique.
+  const std::string& label() const { return label_; }
+
+  void AddInput(ExecNode* input) {
+    inputs_.push_back(input);
+    input->outputs_.push_back(this);
+  }
+
+  Status Validate() const;
+
+  /// Upstream API:
+  /// These functions are called by input nodes that want to inform this node
+  /// about an updated condition (a new input batch, an error, an impeding
+  /// end of stream).
+  ///
+  /// Implementation rules:
+  /// - these may be called anytime after StartProducing() has succeeded
+  ///   (and even during or after StopProducing())
+  /// - these may be called concurrently
+  /// - these are allowed to call back into PauseProducing(), ResumeProducing()
+  ///   and StopProducing()
+
+  /// Transfer input batch to ExecNode
+  virtual void InputReceived(ExecNode* input, int seq_num, compute::ExecBatch batch) = 0;
+
+  /// Signal error to ExecNode
+  virtual void ErrorReceived(ExecNode* input, Status error) = 0;
+
+  /// Mark the inputs finished after the given number of batches.
+  ///
+  /// This may be called before all inputs are received.  This simply fixes
+  /// the total number of incoming batches for an input, so that the ExecNode
+  /// knows when it has received all input, regardless of order.
+  virtual void InputFinished(ExecNode* input, int seq_stop) = 0;
+
+  /// Lifecycle API:
+  /// - start / stop to initiate and terminate production
+  /// - pause / resume to apply backpressure
+  ///
+  /// Implementation rules:
+  /// - StartProducing() should not recurse into the inputs, as it is
+  ///   handled by ExecPlan::StartProducing()
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   concurrently (but only after StartProducing() has returned successfully)
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   by the downstream nodes' InputReceived(), ErrorReceived(), InputFinished()
+  ///   methods
+  /// - StopProducing() should recurse into the inputs
+  /// - StopProducing() must be idempotent
+
+  // XXX What happens if StartProducing() calls an output's InputReceived()
+  // synchronously, and InputReceived() decides to call back into StopProducing()
+  // (or PauseProducing()) because it received enough data?
+  //
+  // Right now, since synchronous calls happen in both directions (input to
+  // output and then output to input), a node must be careful to be reentrant
+  // against synchronous calls from its output, *and* also concurrent calls from
+  // other threads.  The most reliable solution is to update the internal state
+  // first, and notify outputs only at the end.
+  //
+  // Alternate rules:
+  // - StartProducing(), ResumeProducing() can call synchronously into
+  //   its ouputs' consuming methods (InputReceived() etc.)
+  // - InputReceived(), ErrorReceived(), InputFinished() can call asynchronously
+  //   into its inputs' PauseProducing(), StopProducing()
+  //
+  // Alternate API:
+  // - InputReceived(), ErrorReceived(), InputFinished() return a ProductionHint
+  //   enum: either None (default), PauseProducing, ResumeProducing, StopProducing
+  // - A method allows passing a ProductionHint asynchronously from an output node
+  //   (replacing PauseProducing(), ResumeProducing(), StopProducing())
+
+  /// \brief Start producing
+  ///
+  /// This must only be called once.  If this fails, then other lifecycle
+  /// methods must not be called.
+  ///
+  /// This is typically called automatically by ExecPlan::StartProducing().
+  virtual Status StartProducing() = 0;
+
+  /// \brief Pause producing temporarily
+  ///
+  /// This call is a hint that an output node is currently not willing
+  /// to receive data.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// However, the node is still free to produce data (which may be difficult
+  /// to prevent anyway if data is produced using multiple threads).
+  virtual void PauseProducing(ExecNode* output) = 0;
+
+  /// \brief Resume producing after a temporary pause
+  ///
+  /// This call is a hint that an output node is willing to receive data again.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// This may also be called concurrently with PauseProducing(), which suggests
+  /// the implementation may use an atomic counter.
+  virtual void ResumeProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively to a single output
+  ///
+  /// This call is a hint that an output node has completed and is not willing
+  /// to not receive any further data.
+  virtual void StopProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively
+  virtual void StopProducing() = 0;
+
+ protected:
+  ExecNode(ExecPlan* plan, std::string label, std::vector<BatchDescr> input_descrs,
+           std::vector<std::string> input_labels, BatchDescr output_descr,
+           int num_outputs);
+
+  ExecPlan* plan_;
+
+  std::string label_;
+
+  std::vector<BatchDescr> input_descrs_;
+  std::vector<std::string> input_labels_;
+  NodeVector inputs_;
+
+  BatchDescr output_descr_;
+  int num_outputs_;
+  NodeVector outputs_;
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
new file mode 100644
index 00000000000..d809409b28d
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -0,0 +1,402 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock-matchers.h>
+
+#include <functional>
+#include <memory>
+
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/test_util.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/future_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Executor;
+
+namespace compute {
+
+void AssertBatchesEqual(const RecordBatchVector& expected,
+                        const RecordBatchVector& actual) {
+  ASSERT_EQ(expected.size(), actual.size());
+  for (size_t i = 0; i < expected.size(); ++i) {
+    AssertBatchesEqual(*expected[i], *actual[i]);
+  }
+}
+
+TEST(ExecPlanConstruction, Empty) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  ASSERT_RAISES(Invalid, plan->Validate());
+}
+
+TEST(ExecPlanConstruction, SingleNode) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/0, /*num_outputs=*/0);
+  ASSERT_OK(plan->Validate());
+  ASSERT_THAT(plan->sources(), ::testing::ElementsAre(node));
+  ASSERT_THAT(plan->sinks(), ::testing::ElementsAre(node));
+
+  ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
+  node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/1, /*num_outputs=*/0);
+  // Input not bound
+  ASSERT_RAISES(Invalid, plan->Validate());
+
+  ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
+  node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/0, /*num_outputs=*/1);
+  // Output not bound
+  ASSERT_RAISES(Invalid, plan->Validate());
+}
+
+TEST(ExecPlanConstruction, SourceSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto source = MakeDummyNode(plan.get(), "source", /*num_inputs=*/0, /*num_outputs=*/1);
+  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0);
+  // Input / output not bound
+  ASSERT_RAISES(Invalid, plan->Validate());
+
+  sink->AddInput(source);
+  ASSERT_OK(plan->Validate());
+  EXPECT_THAT(plan->sources(), ::testing::ElementsAre(source));
+  EXPECT_THAT(plan->sinks(), ::testing::ElementsAre(sink));
+}
+
+TEST(ExecPlanConstruction, MultipleNode) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto source1 =
+      MakeDummyNode(plan.get(), "source1", /*num_inputs=*/0, /*num_outputs=*/2);
+
+  auto source2 =
+      MakeDummyNode(plan.get(), "source2", /*num_inputs=*/0, /*num_outputs=*/1);
+
+  auto process1 =
+      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/1, /*num_outputs=*/2);
+
+  auto process2 =
+      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/2, /*num_outputs=*/1);
+
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/3, /*num_outputs=*/1);
+
+  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0);
+
+  sink->AddInput(process3);
+
+  process3->AddInput(process1);
+  process3->AddInput(process2);
+  process3->AddInput(process1);
+
+  process2->AddInput(source1);
+  process2->AddInput(source2);
+
+  process1->AddInput(source1);
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_THAT(plan->sources(), ::testing::ElementsAre(source1, source2));
+  ASSERT_THAT(plan->sinks(), ::testing::ElementsAre(sink));
+}
+
+struct StartStopTracker {
+  std::vector<std::string> started, stopped;
+
+  StartProducingFunc start_producing_func(Status st = Status::OK()) {
+    return [this, st](ExecNode* node) {
+      started.push_back(node->label());
+      return st;
+    };
+  }
+
+  StopProducingFunc stop_producing_func() {
+    return [this](ExecNode* node) { stopped.push_back(node->label()); };
+  }
+};
+
+TEST(ExecPlan, DummyStartProducing) {
+  StartStopTracker t;
+
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto source1 = MakeDummyNode(plan.get(), "source1", /*num_inputs=*/0, /*num_outputs=*/2,
+                               t.start_producing_func(), t.stop_producing_func());
+  auto source2 = MakeDummyNode(plan.get(), "source2", /*num_inputs=*/0, /*num_outputs=*/1,
+                               t.start_producing_func(), t.stop_producing_func());
+  auto process1 =
+      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/1, /*num_outputs=*/2,
+                    t.start_producing_func(), t.stop_producing_func());
+  auto process2 =
+      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/2, /*num_outputs=*/1,
+                    t.start_producing_func(), t.stop_producing_func());
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/3, /*num_outputs=*/1,
+                    t.start_producing_func(), t.stop_producing_func());
+
+  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0,
+                            t.start_producing_func(), t.stop_producing_func());
+
+  process1->AddInput(source1);
+  process2->AddInput(process1);
+  process2->AddInput(source2);
+  process3->AddInput(process1);
+  process3->AddInput(source1);
+  process3->AddInput(process2);
+  sink->AddInput(process3);
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_EQ(t.started.size(), 0);
+  ASSERT_EQ(t.stopped.size(), 0);
+
+  ASSERT_OK(plan->StartProducing());
+  // Note that any correct reverse topological order may do
+  ASSERT_THAT(t.started, ::testing::ElementsAre("sink", "process3", "process2",
+                                                "process1", "source2", "source1"));
+  ASSERT_EQ(t.stopped.size(), 0);
+}
+
+TEST(ExecPlan, DummyStartProducingCycle) {
+  // A trivial cycle
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/1, /*num_outputs=*/1);
+  node->AddInput(node);
+  ASSERT_OK(plan->Validate());
+  ASSERT_RAISES(Invalid, plan->StartProducing());
+
+  // A less trivial one
+  ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
+  auto source = MakeDummyNode(plan.get(), "source", /*num_inputs=*/0, /*num_outputs=*/1);
+  auto process1 =
+      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/2, /*num_outputs=*/2);
+  auto process2 =
+      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/1, /*num_outputs=*/1);
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/2, /*num_outputs=*/2);
+  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0);
+
+  process1->AddInput(source);
+  process2->AddInput(process1);
+  process3->AddInput(process2);
+  process3->AddInput(process1);
+  process1->AddInput(process3);
+  sink->AddInput(process3);
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_RAISES(Invalid, plan->StartProducing());
+}
+
+TEST(ExecPlan, DummyStartProducingError) {
+  StartStopTracker t;
+
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+  auto source1 = MakeDummyNode(plan.get(), "source1", /*num_inputs=*/0, /*num_outputs=*/2,
+                               t.start_producing_func(Status::NotImplemented("zzz")),
+                               t.stop_producing_func());
+  auto source2 = MakeDummyNode(plan.get(), "source2", /*num_inputs=*/0, /*num_outputs=*/1,
+                               t.start_producing_func(), t.stop_producing_func());
+  auto process1 = MakeDummyNode(
+      plan.get(), "process1", /*num_inputs=*/1, /*num_outputs=*/2,
+      t.start_producing_func(Status::IOError("xxx")), t.stop_producing_func());
+  auto process2 =
+      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/2, /*num_outputs=*/1,
+                    t.start_producing_func(), t.stop_producing_func());
+  process1->AddInput(source1);
+  process2->AddInput(process1);
+  process2->AddInput(source2);
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/3, /*num_outputs=*/1,
+                    t.start_producing_func(), t.stop_producing_func());
+  process3->AddInput(process1);
+  process3->AddInput(source1);
+  process3->AddInput(process2);
+  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0,
+                            t.start_producing_func(), t.stop_producing_func());
+  sink->AddInput(process3);
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_EQ(t.started.size(), 0);
+  ASSERT_EQ(t.stopped.size(), 0);
+
+  // `process1` raises IOError
+  ASSERT_RAISES(IOError, plan->StartProducing());
+  ASSERT_THAT(t.started,
+              ::testing::ElementsAre("sink", "process3", "process2", "process1"));
+  // Nodes that started successfully were stopped in reverse order
+  ASSERT_THAT(t.stopped, ::testing::ElementsAre("process2", "process3", "sink"));
+}
+
+// TODO move this to gtest_util.h?
+
+class SlowRecordBatchReader : public RecordBatchReader {
+ public:
+  explicit SlowRecordBatchReader(std::shared_ptr<RecordBatchReader> reader)
+      : reader_(std::move(reader)) {}
+
+  std::shared_ptr<Schema> schema() const override { return reader_->schema(); }
+
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    SleepABit();
+    return reader_->ReadNext(batch);
+  }
+
+  static Result<std::shared_ptr<RecordBatchReader>> Make(
+      RecordBatchVector batches, std::shared_ptr<Schema> schema = nullptr) {
+    ARROW_ASSIGN_OR_RAISE(auto reader,
+                          RecordBatchReader::Make(std::move(batches), std::move(schema)));
+    return std::make_shared<SlowRecordBatchReader>(std::move(reader));
+  }
+
+ protected:
+  std::shared_ptr<RecordBatchReader> reader_;
+};
+
+static Result<RecordBatchGenerator> MakeSlowRecordBatchGenerator(
+    RecordBatchVector batches, std::shared_ptr<Schema> schema) {
+  auto gen = MakeVectorGenerator(batches);
+  // TODO move this into testing/async_generator_util.h?
+  auto delayed_gen = MakeMappedGenerator<std::shared_ptr<RecordBatch>>(
+      std::move(gen), [](const std::shared_ptr<RecordBatch>& batch) {
+        auto fut = Future<std::shared_ptr<RecordBatch>>::Make();
+        SleepABitAsync().AddCallback(
+            [fut, batch](const Result<::arrow::detail::Empty>&) mutable {
+              fut.MarkFinished(batch);
+            });
+        return fut;
+      });
+  // Adding readahead implicitly adds parallelism by pulling reentrantly from
+  // the delayed generator
+  return MakeReadaheadGenerator(std::move(delayed_gen), /*max_readahead=*/64);
+}
+
+class TestExecPlanExecution : public ::testing::Test {
+ public:
+  void SetUp() override {
+    ASSERT_OK_AND_ASSIGN(io_executor_, internal::ThreadPool::Make(8));
+  }
+
+  RecordBatchVector MakeRandomBatches(const std::shared_ptr<Schema>& schema,
+                                      int num_batches = 10, int batch_size = 4) {
+    random::RandomArrayGenerator rng(42);
+    RecordBatchVector batches;
+    batches.reserve(num_batches);
+    for (int i = 0; i < num_batches; ++i) {
+      batches.push_back(rng.BatchOf(schema->fields(), batch_size));
+    }
+    return batches;
+  }
+
+  struct CollectorPlan {
+    std::shared_ptr<ExecPlan> plan;
+    RecordBatchCollectNode* sink;
+  };
+
+  Result<CollectorPlan> MakeSourceSink(std::shared_ptr<RecordBatchReader> reader,
+                                       const std::shared_ptr<Schema>& schema) {
+    ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make());
+    auto source =
+        MakeRecordBatchReaderNode(plan.get(), "source", reader, io_executor_.get());
+    auto sink = MakeRecordBatchCollectNode(plan.get(), "sink", schema);
+    sink->AddInput(source);
+    return CollectorPlan{plan, sink};
+  }
+
+  Result<CollectorPlan> MakeSourceSink(RecordBatchGenerator generator,
+                                       const std::shared_ptr<Schema>& schema) {
+    ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make());
+    auto source = MakeRecordBatchReaderNode(plan.get(), "source", schema, generator,
+                                            io_executor_.get());
+    auto sink = MakeRecordBatchCollectNode(plan.get(), "sink", schema);
+    sink->AddInput(source);
+    return CollectorPlan{plan, sink};
+  }
+
+  Result<CollectorPlan> MakeSourceSink(const RecordBatchVector& batches,
+                                       const std::shared_ptr<Schema>& schema) {
+    ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchReader::Make(batches, schema));
+    return MakeSourceSink(std::move(reader), schema);
+  }
+
+  Result<RecordBatchVector> StartAndCollect(ExecPlan* plan,
+                                            RecordBatchCollectNode* sink) {
+    RETURN_NOT_OK(plan->StartProducing());
+    auto fut = CollectAsyncGenerator(sink->generator());
+    return fut.result();
+  }
+
+  template <typename RecordBatchReaderFactory>
+  void TestSourceSink(RecordBatchReaderFactory reader_factory) {
+    auto schema = ::arrow::schema({field("a", int32()), field("b", boolean())});
+    RecordBatchVector batches{
+        RecordBatchFromJSON(schema, R"([{"a": null, "b": true},
+                                        {"a": 4,    "b": false}])"),
+        RecordBatchFromJSON(schema, R"([{"a": 5,    "b": null},
+                                        {"a": 6,    "b": false},
+                                        {"a": 7,    "b": false}])"),
+    };
+
+    ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(batches, schema));
+    ASSERT_OK_AND_ASSIGN(auto cp, MakeSourceSink(reader, schema));
+    ASSERT_OK(cp.plan->Validate());
+
+    ASSERT_OK_AND_ASSIGN(auto got_batches, StartAndCollect(cp.plan.get(), cp.sink));
+    AssertBatchesEqual(batches, got_batches);
+  }
+
+  template <typename RecordBatchReaderFactory>
+  void TestStressSourceSink(int num_batches, RecordBatchReaderFactory batch_factory) {
+    auto schema = ::arrow::schema({field("a", int32()), field("b", boolean())});
+    auto batches = MakeRandomBatches(schema, num_batches);
+
+    ASSERT_OK_AND_ASSIGN(auto reader, batch_factory(batches, schema));
+    ASSERT_OK_AND_ASSIGN(auto cp, MakeSourceSink(reader, schema));
+    ASSERT_OK(cp.plan->Validate());
+
+    ASSERT_OK_AND_ASSIGN(auto got_batches, StartAndCollect(cp.plan.get(), cp.sink));
+    AssertBatchesEqual(batches, got_batches);
+  }
+
+ protected:
+  std::shared_ptr<Executor> io_executor_;
+};
+
+TEST_F(TestExecPlanExecution, SourceSink) { TestSourceSink(RecordBatchReader::Make); }
+
+TEST_F(TestExecPlanExecution, SlowSourceSink) {
+  TestSourceSink(SlowRecordBatchReader::Make);
+}
+
+TEST_F(TestExecPlanExecution, SlowSourceSinkParallel) {
+  TestSourceSink(MakeSlowRecordBatchGenerator);
+}
+
+TEST_F(TestExecPlanExecution, StressSourceSink) {
+  TestStressSourceSink(/*num_batches=*/200, RecordBatchReader::Make);
+}
+
+TEST_F(TestExecPlanExecution, StressSlowSourceSink) {
+  // This doesn't create parallelism as the RecordBatchReader is iterated serially.
+  TestStressSourceSink(/*num_batches=*/30, SlowRecordBatchReader::Make);
+}
+
+TEST_F(TestExecPlanExecution, StressSlowSourceSinkParallel) {
+  TestStressSourceSink(/*num_batches=*/300, MakeSlowRecordBatchGenerator);
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
new file mode 100644
index 00000000000..f2cd7d2a740
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -0,0 +1,400 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/test_util.h"
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gmock/gmock-matchers.h>
+#include <gtest/gtest.h>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/type.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+
+using internal::Executor;
+
+namespace compute {
+namespace {
+
+// TODO expose this as `static ValueDescr::FromSchemaColumns`?
+std::vector<ValueDescr> DescrFromSchemaColumns(const Schema& schema) {
+  std::vector<ValueDescr> descr(schema.num_fields());
+  std::transform(schema.fields().begin(), schema.fields().end(), descr.begin(),
+                 [](const std::shared_ptr<Field>& field) {
+                   return ValueDescr::Array(field->type());
+                 });
+  return descr;
+}
+
+struct DummyNode : ExecNode {
+  DummyNode(ExecPlan* plan, std::string label, int num_inputs, int num_outputs,
+            StartProducingFunc start_producing, StopProducingFunc stop_producing)
+      : ExecNode(plan, std::move(label), std::vector<BatchDescr>(num_inputs, descr()), {},
+                 descr(), num_outputs),
+        start_producing_(std::move(start_producing)),
+        stop_producing_(std::move(stop_producing)) {
+    for (int i = 0; i < num_inputs; ++i) {
+      input_labels_.push_back(std::to_string(i));
+    }
+  }
+
+  const char* kind_name() override { return "Dummy"; }
+
+  void InputReceived(ExecNode* input, int seq_num, compute::ExecBatch batch) override {}
+
+  void ErrorReceived(ExecNode* input, Status error) override {}
+
+  void InputFinished(ExecNode* input, int seq_stop) override {}
+
+  Status StartProducing() override {
+    if (start_producing_) {
+      RETURN_NOT_OK(start_producing_(this));
+    }
+    started_ = true;
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {
+    ASSERT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    AssertIsOutput(output);
+  }
+
+  void ResumeProducing(ExecNode* output) override {
+    ASSERT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    AssertIsOutput(output);
+  }
+
+  void StopProducing(ExecNode* output) override {
+    ASSERT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    AssertIsOutput(output);
+    StopProducing();
+  }
+
+  void StopProducing() override {
+    if (started_) {
+      started_ = false;
+      for (const auto& input : inputs_) {
+        input->StopProducing(this);
+      }
+      if (stop_producing_) {
+        stop_producing_(this);
+      }
+    }
+  }
+
+ private:
+  void AssertIsOutput(ExecNode* output) {
+    ASSERT_NE(std::find(outputs_.begin(), outputs_.end(), output), outputs_.end());
+  }
+
+  BatchDescr descr() const { return std::vector<ValueDescr>{ValueDescr(null())}; }
+
+  StartProducingFunc start_producing_;
+  StopProducingFunc stop_producing_;
+  bool started_ = false;
+};
+
+struct RecordBatchReaderNode : ExecNode {
+  RecordBatchReaderNode(ExecPlan* plan, std::string label,
+                        std::shared_ptr<RecordBatchReader> reader, Executor* io_executor)
+      : ExecNode(plan, std::move(label), {}, {},
+                 DescrFromSchemaColumns(*reader->schema()), /*num_outputs=*/1),
+        schema_(reader->schema()),
+        reader_(std::move(reader)),
+        io_executor_(io_executor) {}
+
+  RecordBatchReaderNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> schema,
+                        RecordBatchGenerator generator, Executor* io_executor)
+      : ExecNode(plan, std::move(label), {}, {}, DescrFromSchemaColumns(*schema),
+                 /*num_outputs=*/1),
+        schema_(std::move(schema)),
+        generator_(std::move(generator)),
+        io_executor_(io_executor) {}
+
+  const char* kind_name() override { return "RecordBatchReader"; }
+
+  void InputReceived(ExecNode* input, int seq_num, compute::ExecBatch batch) override {}
+
+  void ErrorReceived(ExecNode* input, Status error) override {}
+
+  void InputFinished(ExecNode* input, int seq_stop) override {}
+
+  Status StartProducing() override {
+    next_batch_index_ = 0;
+    if (!generator_) {
+      auto it = MakeIteratorFromReader(reader_);
+      ARROW_ASSIGN_OR_RAISE(generator_,
+                            MakeBackgroundGenerator(std::move(it), io_executor_));
+    }
+    GenerateOne(std::unique_lock<std::mutex>{mutex_});
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    ASSERT_EQ(output, outputs_[0]);
+    std::unique_lock<std::mutex> lock(mutex_);
+    generator_ = nullptr;  // null function
+  }
+
+  void StopProducing() override { StopProducing(outputs_[0]); }
+
+ private:
+  void GenerateOne(std::unique_lock<std::mutex>&& lock) {
+    if (!generator_) {
+      // Stopped
+      return;
+    }
+    auto plan = this->plan()->shared_from_this();
+    auto fut = generator_();
+    const auto batch_index = next_batch_index_++;
+
+    lock.unlock();
+    // TODO we want to transfer always here
+    io_executor_->Transfer(std::move(fut))
+        .AddCallback(
+            [plan, batch_index, this](const Result<std::shared_ptr<RecordBatch>>& res) {
+              std::unique_lock<std::mutex> lock(mutex_);
+              if (!res.ok()) {
+                for (auto out : outputs_) {
+                  out->ErrorReceived(this, res.status());
+                }
+                return;
+              }
+              const auto& batch = *res;
+              if (IsIterationEnd(batch)) {
+                lock.unlock();
+                for (auto out : outputs_) {
+                  out->InputFinished(this, batch_index);
+                }
+              } else {
+                lock.unlock();
+                for (auto out : outputs_) {
+                  out->InputReceived(this, batch_index, compute::ExecBatch(*batch));
+                }
+                lock.lock();
+                GenerateOne(std::move(lock));
+              }
+            });
+  }
+
+  std::mutex mutex_;
+  const std::shared_ptr<Schema> schema_;
+  const std::shared_ptr<RecordBatchReader> reader_;
+  RecordBatchGenerator generator_;
+  int next_batch_index_;
+
+  Executor* const io_executor_;
+};
+
+struct RecordBatchCollectNodeImpl : public RecordBatchCollectNode {
+  RecordBatchCollectNodeImpl(ExecPlan* plan, std::string label,
+                             std::shared_ptr<Schema> schema)
+      : RecordBatchCollectNode(plan, std::move(label), {DescrFromSchemaColumns(*schema)},
+                               {"batches_to_collect"}, {}, 0),
+        schema_(std::move(schema)) {}
+
+  RecordBatchGenerator generator() override { return generator_; }
+
+  const char* kind_name() override { return "RecordBatchReader"; }
+
+  Status StartProducing() override {
+    num_received_ = 0;
+    num_emitted_ = 0;
+    emit_stop_ = -1;
+    stopped_ = false;
+    producer_.emplace(generator_.producer());
+    return Status::OK();
+  }
+
+  // sink nodes have no outputs from which to feel backpressure
+  void ResumeProducing(ExecNode* output) override {
+    FAIL() << "no outputs; this should never be called";
+  }
+  void PauseProducing(ExecNode* output) override {
+    FAIL() << "no outputs; this should never be called";
+  }
+  void StopProducing(ExecNode* output) override {
+    FAIL() << "no outputs; this should never be called";
+  }
+
+  void StopProducing() override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    StopProducingUnlocked();
+  }
+
+  void InputReceived(ExecNode* input, int seq_num,
+                     compute::ExecBatch exec_batch) override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (stopped_) {
+      return;
+    }
+    auto maybe_batch = MakeBatch(std::move(exec_batch));
+    if (!maybe_batch.ok()) {
+      lock.unlock();
+      producer_->Push(std::move(maybe_batch));
+      return;
+    }
+
+    // TODO would be nice to factor this out in a ReorderQueue
+    auto batch = *std::move(maybe_batch);
+    if (seq_num <= static_cast<int>(received_batches_.size())) {
+      received_batches_.resize(seq_num + 1, nullptr);
+    }
+    DCHECK_EQ(received_batches_[seq_num], nullptr);
+    received_batches_[seq_num] = std::move(batch);
+    ++num_received_;
+
+    if (seq_num != num_emitted_) {
+      // Cannot emit yet as there is a hole at `num_emitted_`
+      DCHECK_GT(seq_num, num_emitted_);
+      DCHECK_EQ(received_batches_[num_emitted_], nullptr);
+      return;
+    }
+    if (num_received_ == emit_stop_) {
+      StopProducingUnlocked();
+    }
+
+    // Emit batches in order as far as possible
+    // First collect these batches, then unlock before producing.
+    const auto seq_start = seq_num;
+    while (seq_num < static_cast<int>(received_batches_.size()) &&
+           received_batches_[seq_num] != nullptr) {
+      ++seq_num;
+    }
+    DCHECK_GT(seq_num, seq_start);
+    // By moving the values now, we make sure another thread won't emit the same values
+    // below
+    RecordBatchVector to_emit(
+        std::make_move_iterator(received_batches_.begin() + seq_start),
+        std::make_move_iterator(received_batches_.begin() + seq_num));
+
+    lock.unlock();
+    for (auto&& batch : to_emit) {
+      producer_->Push(std::move(batch));
+    }
+    lock.lock();
+
+    DCHECK_EQ(seq_start, num_emitted_);  // num_emitted_ wasn't bumped in the meantime
+    num_emitted_ = seq_num;
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    // XXX do we care about properly sequencing the error?
+    producer_->Push(std::move(error));
+    std::unique_lock<std::mutex> lock(mutex_);
+    StopProducingUnlocked();
+  }
+
+  void InputFinished(ExecNode* input, int seq_stop) override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    DCHECK_GE(seq_stop, static_cast<int>(received_batches_.size()));
+    received_batches_.reserve(seq_stop);
+    emit_stop_ = seq_stop;
+    if (emit_stop_ == num_received_) {
+      DCHECK_EQ(emit_stop_, num_emitted_);
+      StopProducingUnlocked();
+    }
+  }
+
+ private:
+  void StopProducingUnlocked() {
+    if (!stopped_) {
+      stopped_ = true;
+      producer_->Close();
+      inputs_[0]->StopProducing(this);
+    }
+  }
+
+  // TODO factor this out as ExecBatch::ToRecordBatch()?
+  Result<std::shared_ptr<RecordBatch>> MakeBatch(compute::ExecBatch&& exec_batch) {
+    ArrayDataVector columns;
+    columns.reserve(exec_batch.values.size());
+    for (auto&& value : exec_batch.values) {
+      if (!value.is_array()) {
+        return Status::TypeError("Expected array input");
+      }
+      columns.push_back(std::move(value).array());
+    }
+    return RecordBatch::Make(schema_, exec_batch.length, std::move(columns));
+  }
+
+  const std::shared_ptr<Schema> schema_;
+
+  std::mutex mutex_;
+  RecordBatchVector received_batches_;
+  int num_received_;
+  int num_emitted_;
+  int emit_stop_;
+  bool stopped_;
+
+  PushGenerator<std::shared_ptr<RecordBatch>> generator_;
+  util::optional<PushGenerator<std::shared_ptr<RecordBatch>>::Producer> producer_;
+};
+
+}  // namespace
+
+ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
+                                    std::shared_ptr<RecordBatchReader> reader,
+                                    Executor* io_executor) {
+  return plan->EmplaceNode<RecordBatchReaderNode>(plan, std::move(label),
+                                                  std::move(reader), io_executor);
+}
+
+ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
+                                    std::shared_ptr<Schema> schema,
+                                    RecordBatchGenerator generator,
+                                    ::arrow::internal::Executor* io_executor) {
+  return plan->EmplaceNode<RecordBatchReaderNode>(
+      plan, std::move(label), std::move(schema), std::move(generator), io_executor);
+}
+
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, int num_inputs,
+                        int num_outputs, StartProducingFunc start_producing,
+                        StopProducingFunc stop_producing) {
+  return plan->EmplaceNode<DummyNode>(plan, std::move(label), num_inputs, num_outputs,
+                                      std::move(start_producing),
+                                      std::move(stop_producing));
+}
+
+RecordBatchCollectNode* MakeRecordBatchCollectNode(
+    ExecPlan* plan, std::string label, const std::shared_ptr<Schema>& schema) {
+  return internal::checked_cast<RecordBatchCollectNode*>(
+      plan->EmplaceNode<RecordBatchCollectNodeImpl>(plan, std::move(label), schema));
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h
new file mode 100644
index 00000000000..c2dc785a501
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/test_util.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/visibility.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/type_fwd.h"
+
+namespace arrow {
+namespace compute {
+
+using StartProducingFunc = std::function<Status(ExecNode*)>;
+using StopProducingFunc = std::function<void(ExecNode*)>;
+
+// Make a dummy node that has no execution behaviour
+ARROW_TESTING_EXPORT
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, int num_inputs,
+                        int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
+
+using RecordBatchGenerator = AsyncGenerator<std::shared_ptr<RecordBatch>>;
+
+// Make a source node (no inputs) that produces record batches by reading in the
+// background from a RecordBatchReader.
+ARROW_TESTING_EXPORT
+ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
+                                    std::shared_ptr<RecordBatchReader> reader,
+                                    ::arrow::internal::Executor* io_executor);
+
+ARROW_TESTING_EXPORT
+ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
+                                    std::shared_ptr<Schema> schema,
+                                    RecordBatchGenerator generator,
+                                    ::arrow::internal::Executor* io_executor);
+
+class RecordBatchCollectNode : public ExecNode {
+ public:
+  virtual RecordBatchGenerator generator() = 0;
+
+ protected:
+  using ExecNode::ExecNode;
+};
+
+ARROW_TESTING_EXPORT
+RecordBatchCollectNode* MakeRecordBatchCollectNode(ExecPlan* plan, std::string label,
+                                                   const std::shared_ptr<Schema>& schema);
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 4f4393486ff..5370837f1b9 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -29,6 +29,7 @@ struct FunctionOptions;
 
 struct CastOptions;
 
+struct ExecBatch;
 class ExecContext;
 class KernelContext;
 
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 7eb318c8b41..d541209a314 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -81,6 +81,7 @@ class RecordBatchReader;
 class Table;
 
 struct Datum;
+struct ValueDescr;
 
 using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
 using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
diff --git a/cpp/src/arrow/util/iterator_test.cc b/cpp/src/arrow/util/iterator_test.cc
index 60b57dea1e2..ab62fcb7034 100644
--- a/cpp/src/arrow/util/iterator_test.cc
+++ b/cpp/src/arrow/util/iterator_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/util/iterator.h"
 #include "arrow/util/test_common.h"
 #include "arrow/util/vector.h"
+
 namespace arrow {
 
 template <typename T>

From a67dc7de44f86fdc3032655833928de446e2edf8 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 11 May 2021 15:13:29 -0400
Subject: [PATCH 224/719] ARROW-11909: [C++] Remove MakeIteratorGenerator

It's already unused so let's remove the temptation to use it.

Closes #10292 from lidavidm/arrow-11909

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/async_generator.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index fd5d0d28e9d..3827e1645f9 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1220,34 +1220,6 @@ AsyncGenerator<T> MakeTransferredGenerator(AsyncGenerator<T> source,
                                            internal::Executor* executor) {
   return TransferringGenerator<T>(std::move(source), executor);
 }
-/// \see MakeIteratorGenerator
-template <typename T>
-class IteratorGenerator {
- public:
-  explicit IteratorGenerator(Iterator<T> it) : it_(std::move(it)) {}
-
-  Future<T> operator()() { return Future<T>::MakeFinished(it_.Next()); }
-
- private:
-  Iterator<T> it_;
-};
-
-/// \brief Constructs a generator that yields futures from an iterator.
-///
-/// Note: Do not use this if you can avoid it.  This blocks in an async
-/// context which is a bad idea.  If you're converting sync-I/O to async
-/// then use MakeBackgroundGenerator.  Otherwise, convert the underlying
-/// source to async.  This function is only around until we can conver the
-/// remaining table readers to async.  Once all uses of this generator have
-/// been removed it should be removed(ARROW-11909).
-///
-/// This generator is not async-reentrant
-///
-/// This generator will not queue
-template <typename T>
-AsyncGenerator<T> MakeIteratorGenerator(Iterator<T> it) {
-  return IteratorGenerator<T>(std::move(it));
-}
 
 /// \see MakeBackgroundGenerator
 template <typename T>

From 3fe2df7b00291752e49beb3ee671a39df9a69cf4 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Wed, 12 May 2021 13:19:49 +0200
Subject: [PATCH 225/719] ARROW-12749: [C++] Construct RecordBatch/Table/Schema
 with rvalue arguments

Closes #10301 from cyb70289/12749-rvalue

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/adapters/orc/adapter.cc             |  2 +-
 cpp/src/arrow/compute/kernels/vector_selection.cc |  8 ++++----
 cpp/src/arrow/csv/reader.cc                       |  2 +-
 cpp/src/arrow/ipc/feather.cc                      | 10 ++++++----
 cpp/src/arrow/ipc/reader.cc                       |  2 +-
 cpp/src/arrow/ipc/writer.cc                       |  2 +-
 cpp/src/arrow/record_batch.cc                     | 12 +++++-------
 cpp/src/arrow/stl.h                               |  2 +-
 cpp/src/arrow/table.cc                            | 15 +++++++--------
 cpp/src/arrow/table_builder.cc                    |  4 ++--
 10 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index c67bc3c65b9..4548b9923a7 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -252,7 +252,7 @@ class ORCFileReader::Impl {
       }
     }
 
-    *out = std::make_shared<Schema>(fields, metadata);
+    *out = std::make_shared<Schema>(std::move(fields), std::move(metadata));
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index fc7a78a2305..6376ae10404 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -1793,7 +1793,7 @@ Result<std::shared_ptr<RecordBatch>> FilterRecordBatch(const RecordBatch& batch,
                                           TakeOptions::NoBoundsCheck(), ctx));
     columns[i] = out.make_array();
   }
-  return RecordBatch::Make(batch.schema(), indices->length, columns);
+  return RecordBatch::Make(batch.schema(), indices->length, std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> FilterTable(const Table& table, const Datum& filter,
@@ -1986,7 +1986,7 @@ Result<std::shared_ptr<RecordBatch>> TakeRA(const RecordBatch& batch,
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeAA(*batch.column(j), indices, options, ctx));
   }
-  return RecordBatch::Make(batch.schema(), nrows, columns);
+  return RecordBatch::Make(batch.schema(), nrows, std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
@@ -1997,7 +1997,7 @@ Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeCA(*table.column(j), indices, options, ctx));
   }
-  return Table::Make(table.schema(), columns);
+  return Table::Make(table.schema(), std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& indices,
@@ -2007,7 +2007,7 @@ Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& in
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(columns[j], TakeCC(*table.column(j), indices, options, ctx));
   }
-  return Table::Make(table.schema(), columns);
+  return Table::Make(table.schema(), std::move(columns));
 }
 
 static auto kDefaultTakeOptions = TakeOptions::Defaults();
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index c4352360e6b..baa40dcf46e 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -573,7 +573,7 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
       fields.push_back(::arrow::field(column.name, array->type()));
       columns.emplace_back(std::move(array));
     }
-    return Table::Make(schema(fields), columns);
+    return Table::Make(schema(std::move(fields)), std::move(columns));
   }
 
   // Column builders for target Table (in ConversionSchema order)
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 7561945d5f5..b1c30eec0b3 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -180,7 +180,7 @@ class ReaderV1 : public Reader {
           GetDataType(col->values(), col->metadata_type(), col->metadata(), &type));
       fields.push_back(::arrow::field(col->name()->str(), type));
     }
-    schema_ = ::arrow::schema(fields);
+    schema_ = ::arrow::schema(std::move(fields));
     return Status::OK();
   }
 
@@ -343,7 +343,7 @@ class ReaderV1 : public Reader {
       columns.emplace_back();
       RETURN_NOT_OK(GetColumn(i, &columns.back()));
     }
-    *out = Table::Make(this->schema(), columns, this->num_rows());
+    *out = Table::Make(this->schema(), std::move(columns), this->num_rows());
     return Status::OK();
   }
 
@@ -360,7 +360,8 @@ class ReaderV1 : public Reader {
       RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
       fields.push_back(my_schema->field(field_index));
     }
-    *out = Table::Make(::arrow::schema(fields), columns, this->num_rows());
+    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
+                       this->num_rows());
     return Status::OK();
   }
 
@@ -379,7 +380,8 @@ class ReaderV1 : public Reader {
       RETURN_NOT_OK(GetColumn(field_index, &columns.back()));
       fields.push_back(sch->field(field_index));
     }
-    *out = Table::Make(::arrow::schema(fields), columns, this->num_rows());
+    *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns),
+                       this->num_rows());
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 06ad0c89f2b..45a3d3e3cd8 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -518,7 +518,7 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
                             arrow::internal::SwapEndianArrayData(filtered_columns[i]));
     }
   }
-  return RecordBatch::Make(filtered_schema, metadata->length(),
+  return RecordBatch::Make(std::move(filtered_schema), metadata->length(),
                            std::move(filtered_columns));
 }
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index c14ff5ec9bc..7b9254b7e59 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -557,7 +557,7 @@ class DictionarySerializer : public RecordBatchSerializer {
   Status Assemble(const std::shared_ptr<Array>& dictionary) {
     // Make a dummy record batch. A bit tedious as we have to make a schema
     auto schema = arrow::schema({arrow::field("dictionary", dictionary->type())});
-    auto batch = RecordBatch::Make(schema, dictionary->length(), {dictionary});
+    auto batch = RecordBatch::Make(std::move(schema), dictionary->length(), {dictionary});
     return RecordBatchSerializer::Assemble(*batch);
   }
 
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 2e3e0f263ec..ce86ab1389a 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -100,8 +100,7 @@ class SimpleRecordBatch : public RecordBatch {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field));
-
-    return RecordBatch::Make(new_schema, num_rows_,
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::AddVectorElement(columns_, i, column->data()));
   }
 
@@ -123,21 +122,20 @@ class SimpleRecordBatch : public RecordBatch {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field));
-    return RecordBatch::Make(new_schema, num_rows_,
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::ReplaceVectorElement(columns_, i, column->data()));
   }
 
   Result<std::shared_ptr<RecordBatch>> RemoveColumn(int i) const override {
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
-
-    return RecordBatch::Make(new_schema, num_rows_,
+    return RecordBatch::Make(std::move(new_schema), num_rows_,
                              internal::DeleteVectorElement(columns_, i));
   }
 
   std::shared_ptr<RecordBatch> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->WithMetadata(metadata);
-    return RecordBatch::Make(new_schema, num_rows_, columns_);
+    return RecordBatch::Make(std::move(new_schema), num_rows_, columns_);
   }
 
   std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length) const override {
@@ -271,7 +269,7 @@ Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
 
   auto new_schema =
       std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
-  return RecordBatch::Make(new_schema, num_rows(), std::move(columns));
+  return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns));
 }
 
 std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset) const {
diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h
index 146c5706766..a1582ed2967 100644
--- a/cpp/src/arrow/stl.h
+++ b/cpp/src/arrow/stl.h
@@ -424,7 +424,7 @@ Status TableFromTupleRange(MemoryPool* pool, Range&& rows,
     arrays.emplace_back(array);
   }
 
-  *table = Table::Make(schema, arrays);
+  *table = Table::Make(std::move(schema), std::move(arrays));
 
   return Status::OK();
 }
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 02ba754ca69..f0a2b085448 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -99,13 +99,13 @@ class SimpleTable : public Table {
       column = column->Slice(offset, length);
       num_rows = column->length();
     }
-    return Table::Make(schema_, sliced, num_rows);
+    return Table::Make(schema_, std::move(sliced), num_rows);
   }
 
   Result<std::shared_ptr<Table>> RemoveColumn(int i) const override {
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i));
 
-    return Table::Make(new_schema, internal::DeleteVectorElement(columns_, i),
+    return Table::Make(std::move(new_schema), internal::DeleteVectorElement(columns_, i),
                        this->num_rows());
   }
 
@@ -125,8 +125,7 @@ class SimpleTable : public Table {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field_arg));
-
-    return Table::Make(new_schema,
+    return Table::Make(std::move(new_schema),
                        internal::AddVectorElement(columns_, i, std::move(col)));
   }
 
@@ -146,14 +145,14 @@ class SimpleTable : public Table {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field_arg));
-    return Table::Make(new_schema,
+    return Table::Make(std::move(new_schema),
                        internal::ReplaceVectorElement(columns_, i, std::move(col)));
   }
 
   std::shared_ptr<Table> ReplaceSchemaMetadata(
       const std::shared_ptr<const KeyValueMetadata>& metadata) const override {
     auto new_schema = schema_->WithMetadata(metadata);
-    return Table::Make(new_schema, columns_);
+    return Table::Make(std::move(new_schema), columns_);
   }
 
   Result<std::shared_ptr<Table>> Flatten(MemoryPool* pool) const override {
@@ -379,7 +378,7 @@ Result<std::shared_ptr<Table>> Table::SelectColumns(
 
   auto new_schema =
       std::make_shared<arrow::Schema>(std::move(fields), schema()->metadata());
-  return Table::Make(new_schema, std::move(columns), num_rows());
+  return Table::Make(std::move(new_schema), std::move(columns), num_rows());
 }
 
 std::string Table::ToString() const {
@@ -440,7 +439,7 @@ Result<std::shared_ptr<Table>> ConcatenateTables(
     }
     columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
   }
-  return Table::Make(schema, columns);
+  return Table::Make(std::move(schema), std::move(columns));
 }
 
 Result<std::shared_ptr<Table>> PromoteTableToSchema(const std::shared_ptr<Table>& table,
diff --git a/cpp/src/arrow/table_builder.cc b/cpp/src/arrow/table_builder.cc
index 78034c92868..c026c355758 100644
--- a/cpp/src/arrow/table_builder.cc
+++ b/cpp/src/arrow/table_builder.cc
@@ -74,9 +74,9 @@ Status RecordBatchBuilder::Flush(bool reset_builders,
     }
   }
   std::shared_ptr<Schema> schema =
-      std::make_shared<Schema>(schema_fields, schema_->metadata());
+      std::make_shared<Schema>(std::move(schema_fields), schema_->metadata());
 
-  *batch = RecordBatch::Make(schema, length, std::move(fields));
+  *batch = RecordBatch::Make(std::move(schema), length, std::move(fields));
   if (reset_builders) {
     return InitBuilders();
   } else {

From 02d6675cca525d1e5faee1286a704c7d4101ab15 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 12 May 2021 08:18:06 -0400
Subject: [PATCH 226/719] ARROW-12687: [C++][Python][Dataset] Convert Scanner
 into a RecordBatchReader

This provides compatibility with APIs that expect regular RecordBatchReaders, e.g. exporting via the C Data Interface.

Closes #10268 from lidavidm/arrow-12687

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/scanner.cc             | 30 ++++++++++++++++++++
 cpp/src/arrow/dataset/scanner.h              |  3 ++
 cpp/src/arrow/dataset/scanner_test.cc        | 17 +++++++++++
 python/pyarrow/_dataset.pyx                  |  7 +++++
 python/pyarrow/includes/libarrow_dataset.pxd |  1 +
 python/pyarrow/tests/test_dataset.py         |  3 +-
 6 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 0d481a83748..4eac2752add 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -164,6 +164,36 @@ Result<int64_t> Scanner::CountRows() {
   return count;
 }
 
+namespace {
+class ScannerRecordBatchReader : public RecordBatchReader {
+ public:
+  explicit ScannerRecordBatchReader(std::shared_ptr<Schema> schema,
+                                    TaggedRecordBatchIterator delegate)
+      : schema_(std::move(schema)), delegate_(std::move(delegate)) {}
+
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    ARROW_ASSIGN_OR_RAISE(auto next, delegate_.Next());
+    if (IsIterationEnd(next)) {
+      *batch = nullptr;
+    } else {
+      *batch = std::move(next.record_batch);
+    }
+    return Status::OK();
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  TaggedRecordBatchIterator delegate_;
+};
+}  // namespace
+
+Result<std::shared_ptr<RecordBatchReader>> Scanner::ToRecordBatchReader() {
+  ARROW_ASSIGN_OR_RAISE(auto it, ScanBatches());
+  return std::make_shared<ScannerRecordBatchReader>(options()->projected_schema,
+                                                    std::move(it));
+}
+
 struct ScanBatchesState : public std::enable_shared_from_this<ScanBatchesState> {
   explicit ScanBatchesState(ScanTaskIterator scan_task_it,
                             std::shared_ptr<TaskGroup> task_group_)
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 5fdcbe586c0..29fd5aad994 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -290,6 +290,9 @@ class ARROW_DS_EXPORT Scanner {
   /// This method will push down the predicate and compute the result based on fragment
   /// metadata if possible.
   virtual Result<int64_t> CountRows();
+  /// \brief Convert the Scanner to a RecordBatchReader so it can be
+  /// easily used with APIs that expect a reader.
+  Result<std::shared_ptr<RecordBatchReader>> ToRecordBatchReader();
 
   /// \brief Get the options for this scan.
   const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index cb90a8cbd45..04e4de406c6 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -457,6 +457,23 @@ TEST_P(TestScanner, CountRowsWithMetadata) {
                                   scanner->CountRows());
 }
 
+TEST_P(TestScanner, ToRecordBatchReader) {
+  SetSchema({field("i32", int32()), field("f64", float64())});
+  auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+  std::vector<std::shared_ptr<RecordBatch>> batches{
+      static_cast<std::size_t>(GetParam().num_batches * GetParam().num_child_datasets),
+      batch};
+
+  ASSERT_OK_AND_ASSIGN(auto expected, Table::FromRecordBatches(batches));
+
+  std::shared_ptr<Table> actual;
+  auto scanner = MakeScanner(batch);
+  ASSERT_OK_AND_ASSIGN(auto reader, scanner->ToRecordBatchReader());
+  scanner.reset();
+  ASSERT_OK(reader->ReadAll(&actual));
+  AssertTablesEqual(*expected, *actual);
+}
+
 class FailingFragment : public InMemoryFragment {
  public:
   using InMemoryFragment::InMemoryFragment;
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 90633a140d3..3e89ce14045 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2928,6 +2928,13 @@ cdef class Scanner(_Weakrefable):
             result = self.scanner.CountRows()
         return GetResultValue(result)
 
+    def to_reader(self):
+        """Consume this scanner as a RecordBatchReader."""
+        cdef RecordBatchReader reader
+        reader = RecordBatchReader.__new__(RecordBatchReader)
+        reader.reader = GetResultValue(self.scanner.ToRecordBatchReader())
+        return reader
+
 
 def _get_partition_keys(Expression partition_expression):
     """
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 31b3764245d..c3cfe01538d 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -115,6 +115,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CResult[shared_ptr[CTable]] Head(int64_t num_rows)
         CResult[int64_t] CountRows()
         CResult[CFragmentIterator] GetFragments()
+        CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader()
         const shared_ptr[CScanOptions]& options()
 
     cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder":
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index b6409d8df6c..2d83ee1f437 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -343,11 +343,12 @@ def test_scanner(dataset):
     assert scanner.projected_schema == pa.schema([("i64", pa.int64())])
 
     assert isinstance(scanner, ds.Scanner)
+    table = scanner.to_table()
     for batch in scanner.to_batches():
         assert batch.schema == scanner.projected_schema
         assert batch.num_columns == 1
+    assert table == scanner.to_reader().read_all()
 
-    table = scanner.to_table()
     assert table.schema == scanner.projected_schema
     for i in range(table.num_rows):
         indices = pa.array([i])

From 0a0c9f919dadf787f07ef1c2106cc9bcaff9fb4f Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 12 May 2021 16:53:27 +0200
Subject: [PATCH 227/719] ARROW-12584: [C++][Python] Expose method for
 benchmarking tools to release unused memory from the allocators

@pitrou What are your thoughts on this.  It's not urgent (benchmarking can always use the system allocator in the meantime if they are having RAM issues and I think they reduced already which benchmarks they run on the RAM-limited servers)

With mimalloc the following command has a max RSS of ~16GB.
```
conbench file-read fanniemae_2016Q4 --all=true
```

If I add a call to release_unused between runs then the max RSS drops to ~10GB.

Closes #10186 from westonpace/feature/ARROW-12584--c-python-expose-method-for-benchmarking-tools

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/memory_pool.cc         | 20 ++++++++++++++++++++
 cpp/src/arrow/memory_pool.h          |  7 +++++++
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/memory.pxi            | 15 +++++++++++++++
 python/pyarrow/tests/test_memory.py  |  5 +++++
 5 files changed, 48 insertions(+)

diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 793a2c8e5df..63e3cb93a25 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -35,6 +35,10 @@
 #include "arrow/util/optional.h"
 #include "arrow/util/string.h"
 
+#ifdef __GLIBC__
+#include <malloc.h>
+#endif
+
 #ifdef ARROW_JEMALLOC
 // Needed to support jemalloc 3 and 4
 #define JEMALLOC_MANGLE
@@ -253,6 +257,14 @@ class SystemAllocator {
 #endif
     }
   }
+
+  static void ReleaseUnused() {
+#ifdef __GLIBC__
+    // The return value of malloc_trim is not an error but to inform
+    // you if memory was actually released or not, which we do not care about here
+    ARROW_UNUSED(malloc_trim(0));
+#endif
+  }
 };
 
 #ifdef ARROW_JEMALLOC
@@ -300,6 +312,10 @@ class JemallocAllocator {
       dallocx(ptr, MALLOCX_ALIGN(kAlignment));
     }
   }
+
+  static void ReleaseUnused() {
+    mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0);
+  }
 };
 
 #endif  // defined(ARROW_JEMALLOC)
@@ -322,6 +338,8 @@ class MimallocAllocator {
     return Status::OK();
   }
 
+  static void ReleaseUnused() { mi_collect(true); }
+
   static Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) {
     uint8_t* previous_ptr = *ptr;
     if (previous_ptr == zero_size_area) {
@@ -428,6 +446,8 @@ class BaseMemoryPoolImpl : public MemoryPool {
     stats_.UpdateAllocatedBytes(-size);
   }
 
+  void ReleaseUnused() override { Allocator::ReleaseUnused(); }
+
   int64_t bytes_allocated() const override { return stats_.bytes_allocated(); }
 
   int64_t max_memory() const override { return stats_.max_memory(); }
diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h
index fed6640fdf3..81b1b112dc7 100644
--- a/cpp/src/arrow/memory_pool.h
+++ b/cpp/src/arrow/memory_pool.h
@@ -87,6 +87,13 @@ class ARROW_EXPORT MemoryPool {
   ///   faster deallocation if supported by its backend.
   virtual void Free(uint8_t* buffer, int64_t size) = 0;
 
+  /// Return unused memory to the OS
+  ///
+  /// Only applies to allocators that hold onto unused memory.  This will be
+  /// best effort, a memory pool may not implement this feature or may be
+  /// unable to fulfill the request due to fragmentation.
+  virtual void ReleaseUnused() {}
+
   /// The number of bytes that were allocated and not yet free'd through
   /// this allocator.
   virtual int64_t bytes_allocated() const = 0;
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index dec0038a0e4..4ac18d1e905 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -286,6 +286,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         int64_t bytes_allocated()
         int64_t max_memory()
         c_string backend_name()
+        void ReleaseUnused()
 
     cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
         CLoggingMemoryPool(CMemoryPool*)
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 28777ea629e..fc0d32aad56 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -36,6 +36,21 @@ cdef class MemoryPool(_Weakrefable):
     cdef void init(self, CMemoryPool* pool):
         self.pool = pool
 
+    def release_unused(self):
+        """
+        Attempt to return to the OS any memory being held onto by the pool.
+
+        This function should not be called except potentially for
+        benchmarking or debugging as it could be expensive and detrimental to
+        performance.
+
+        This is best effort and may not have any effect on some memory pools
+        or in some situations (e.g. fragmentation).
+        """
+        cdef CMemoryPool* pool = c_get_memory_pool()
+        with nogil:
+            pool.ReleaseUnused()
+
     def bytes_allocated(self):
         """
         Return the number of bytes that are currently allocated from this
diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py
index f6af92e63f1..b8dd7344fa9 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -104,6 +104,11 @@ def test_default_backend_name():
     assert pool.backend_name in possible_backends
 
 
+def test_release_unused():
+    pool = pa.default_memory_pool()
+    pool.release_unused()
+
+
 def check_env_var(name, expected, *, expect_warning=False):
     code = f"""if 1:
         import pyarrow as pa

From 9bbc986dc0ce1337b4c1ea246dbb8f412b078cd0 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 12 May 2021 16:54:18 +0200
Subject: [PATCH 228/719] ARROW-12736: [C++] Eliminate forced copy of
 potentially large vector<shared_ptr<>>

This is one of the contributors to the regression in scan times of wide datasets in ARROW-11469 - for every column, we're copying a vector of shared_ptrs of every column, leading to a quadratic number of shared_ptr copies/destructions. And the consuming API expects a reference anyways, so all this copying was pointless.

Closes #10294 from lidavidm/arrow-12736

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/record_batch.cc | 18 +++++++++---------
 cpp/src/arrow/record_batch.h  |  4 ++--
 cpp/src/arrow/table.cc        | 12 ++++--------
 cpp/src/arrow/table.h         |  2 +-
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index ce86ab1389a..66f9e932b58 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -69,6 +69,14 @@ class SimpleRecordBatch : public RecordBatch {
     boxed_columns_.resize(schema_->num_fields());
   }
 
+  const std::vector<std::shared_ptr<Array>>& columns() const override {
+    for (int i = 0; i < num_columns(); ++i) {
+      // Force all columns to be boxed
+      column(i);
+    }
+    return boxed_columns_;
+  }
+
   std::shared_ptr<Array> column(int i) const override {
     std::shared_ptr<Array> result = internal::atomic_load(&boxed_columns_[i]);
     if (!result) {
@@ -80,7 +88,7 @@ class SimpleRecordBatch : public RecordBatch {
 
   std::shared_ptr<ArrayData> column_data(int i) const override { return columns_[i]; }
 
-  ArrayDataVector column_data() const override { return columns_; }
+  const ArrayDataVector& column_data() const override { return columns_; }
 
   Result<std::shared_ptr<RecordBatch>> AddColumn(
       int i, const std::shared_ptr<Field>& field,
@@ -205,14 +213,6 @@ Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
-std::vector<std::shared_ptr<Array>> RecordBatch::columns() const {
-  std::vector<std::shared_ptr<Array>> children(num_columns());
-  for (int i = 0; i < num_columns(); ++i) {
-    children[i] = column(i);
-  }
-  return children;
-}
-
 const std::string& RecordBatch::column_name(int i) const {
   return schema_->field(i)->name();
 }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index a75dd043e5d..3dc1f54a083 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -90,7 +90,7 @@ class ARROW_EXPORT RecordBatch {
   const std::shared_ptr<Schema>& schema() const { return schema_; }
 
   /// \brief Retrieve all columns at once
-  std::vector<std::shared_ptr<Array>> columns() const;
+  virtual const std::vector<std::shared_ptr<Array>>& columns() const = 0;
 
   /// \brief Retrieve an array from the record batch
   /// \param[in] i field index, does not boundscheck
@@ -108,7 +108,7 @@ class ARROW_EXPORT RecordBatch {
   virtual std::shared_ptr<ArrayData> column_data(int i) const = 0;
 
   /// \brief Retrieve all arrays' internal data from the record batch.
-  virtual ArrayDataVector column_data() const = 0;
+  virtual const ArrayDataVector& column_data() const = 0;
 
   /// \brief Add column to the record batch, producing a new RecordBatch
   ///
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index f0a2b085448..d4c7802c834 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -92,6 +92,10 @@ class SimpleTable : public Table {
 
   std::shared_ptr<ChunkedArray> column(int i) const override { return columns_[i]; }
 
+  const std::vector<std::shared_ptr<ChunkedArray>>& columns() const override {
+    return columns_;
+  }
+
   std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const override {
     auto sliced = columns_;
     int64_t num_rows = length;
@@ -242,14 +246,6 @@ class SimpleTable : public Table {
 
 Table::Table() : num_rows_(0) {}
 
-std::vector<std::shared_ptr<ChunkedArray>> Table::columns() const {
-  std::vector<std::shared_ptr<ChunkedArray>> result;
-  for (int i = 0; i < this->num_columns(); ++i) {
-    result.emplace_back(this->column(i));
-  }
-  return result;
-}
-
 std::vector<std::shared_ptr<Field>> Table::fields() const {
   std::vector<std::shared_ptr<Field>> result;
   for (int i = 0; i < this->num_columns(); ++i) {
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index c547019c989..901c839843a 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -98,7 +98,7 @@ class ARROW_EXPORT Table {
   virtual std::shared_ptr<ChunkedArray> column(int i) const = 0;
 
   /// \brief Return vector of all columns for table
-  std::vector<std::shared_ptr<ChunkedArray>> columns() const;
+  virtual const std::vector<std::shared_ptr<ChunkedArray>>& columns() const = 0;
 
   /// Return a column's field by index
   std::shared_ptr<Field> field(int i) const { return schema_->field(i); }

From 408d1fa7d00657b9793c7944f19799e6732cbb33 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 12 May 2021 11:17:11 -0400
Subject: [PATCH 229/719] ARROW-12686: [C++][Python][FlightRPC] Convert Flight
 reader into a regular reader

This provides compatibility with APIs that expect regular RecordBatchReaders, e.g. exporting via the C Data Interface. The Flight interface itself cannot implement RecordBatchReader because getting the schema is not an infallible operation.

Closes #10267 from lidavidm/arrow-12686

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/flight/flight_test.cc         | 11 +++++++
 cpp/src/arrow/flight/types.cc               | 36 +++++++++++++++++++++
 cpp/src/arrow/flight/types.h                |  5 +++
 python/pyarrow/_flight.pyx                  | 10 ++++++
 python/pyarrow/includes/libarrow_flight.pxd |  4 +++
 python/pyarrow/tests/test_flight.py         |  4 +++
 6 files changed, 70 insertions(+)

diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 099d416aae4..35993f1eaa1 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -427,12 +427,20 @@ class TestFlightClient : public ::testing::Test {
     std::unique_ptr<FlightStreamReader> stream;
     ASSERT_OK(client_->DoGet(ticket, &stream));
 
+    std::unique_ptr<FlightStreamReader> stream2;
+    ASSERT_OK(client_->DoGet(ticket, &stream2));
+    ASSERT_OK_AND_ASSIGN(auto reader, MakeRecordBatchReader(std::move(stream2)));
+
     FlightStreamChunk chunk;
+    std::shared_ptr<RecordBatch> batch;
     for (int i = 0; i < num_batches; ++i) {
       ASSERT_OK(stream->Next(&chunk));
+      ASSERT_OK(reader->ReadNext(&batch));
       ASSERT_NE(nullptr, chunk.data);
+      ASSERT_NE(nullptr, batch);
 #if !defined(__MINGW32__)
       ASSERT_BATCHES_EQUAL(*expected_batches[i], *chunk.data);
+      ASSERT_BATCHES_EQUAL(*expected_batches[i], *batch);
 #else
       // In MINGW32, the following code does not have the reproducibility at the LSB
       // even when this is called twice with the same seed.
@@ -444,12 +452,15 @@ class TestFlightClient : public ::testing::Test {
       //                 [&dist, &rng] { return static_cast<ValueType>(dist(rng)); });
       //   /* data[1] = 0x40852cdfe23d3976 or 0x40852cdfe23d3975 */
       ASSERT_BATCHES_APPROX_EQUAL(*expected_batches[i], *chunk.data);
+      ASSERT_BATCHES_APPROX_EQUAL(*expected_batches[i], *batch);
 #endif
     }
 
     // Stream exhausted
     ASSERT_OK(stream->Next(&chunk));
+    ASSERT_OK(reader->ReadNext(&batch));
     ASSERT_EQ(nullptr, chunk.data);
+    ASSERT_EQ(nullptr, batch);
   }
 
  protected:
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 84973f033a3..8139b213a82 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -284,6 +284,42 @@ Status MetadataRecordBatchWriter::Begin(const std::shared_ptr<Schema>& schema) {
   return Begin(schema, ipc::IpcWriteOptions::Defaults());
 }
 
+namespace {
+class MetadataRecordBatchReaderAdapter : public RecordBatchReader {
+ public:
+  explicit MetadataRecordBatchReaderAdapter(
+      std::shared_ptr<Schema> schema, std::shared_ptr<MetadataRecordBatchReader> delegate)
+      : schema_(std::move(schema)), delegate_(std::move(delegate)) {}
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    FlightStreamChunk next;
+    while (true) {
+      RETURN_NOT_OK(delegate_->Next(&next));
+      if (!next.data && !next.app_metadata) {
+        // EOS
+        *batch = nullptr;
+        return Status::OK();
+      } else if (next.data) {
+        *batch = std::move(next.data);
+        return Status::OK();
+      }
+      // Got metadata, but no data (which is valid) - read the next message
+    }
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  std::shared_ptr<MetadataRecordBatchReader> delegate_;
+};
+};  // namespace
+
+arrow::Result<std::shared_ptr<RecordBatchReader>> MakeRecordBatchReader(
+    std::shared_ptr<MetadataRecordBatchReader> reader) {
+  ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
+  return std::make_shared<MetadataRecordBatchReaderAdapter>(std::move(schema),
+                                                            std::move(reader));
+}
+
 SimpleFlightListing::SimpleFlightListing(const std::vector<FlightInfo>& flights)
     : position_(0), flights_(flights) {}
 
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index 7538e4beb13..cd37318ef1d 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -474,6 +474,11 @@ class ARROW_FLIGHT_EXPORT MetadataRecordBatchReader {
   virtual Status ReadAll(std::shared_ptr<Table>* table);
 };
 
+/// \brief Convert a MetadataRecordBatchReader to a regular RecordBatchReader.
+ARROW_FLIGHT_EXPORT
+arrow::Result<std::shared_ptr<RecordBatchReader>> MakeRecordBatchReader(
+    std::shared_ptr<MetadataRecordBatchReader> reader);
+
 /// \brief An interface to write IPC payloads with metadata.
 class ARROW_FLIGHT_EXPORT MetadataRecordBatchWriter : public ipc::RecordBatchWriter {
  public:
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 7a8dcdbbfa8..e5d80df9380 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -874,6 +874,16 @@ cdef class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
 
         return chunk
 
+    def to_reader(self):
+        """Convert this reader into a regular RecordBatchReader.
+
+        This may fail if the schema cannot be read from the remote end.
+        """
+        cdef RecordBatchReader reader
+        reader = RecordBatchReader.__new__(RecordBatchReader)
+        reader.reader = GetResultValue(MakeRecordBatchReader(self.reader))
+        return reader
+
 
 cdef class MetadataRecordBatchReader(_MetadataRecordBatchReader):
     """The virtual base class for readers for Flight streams."""
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index 161a8041c31..737babb3fd5 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -151,6 +151,10 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         CStatus Next(CFlightStreamChunk* out)
         CStatus ReadAll(shared_ptr[CTable]* table)
 
+    CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\
+        " arrow::flight::MakeRecordBatchReader"(
+            shared_ptr[CMetadataRecordBatchReader])
+
     cdef cppclass CMetadataRecordBatchWriter \
             " arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter):
         CStatus Begin(shared_ptr[CSchema] schema,
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 45ba5c2dac9..585fdb2a062 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -857,6 +857,10 @@ def test_flight_do_get_ints():
         data = client.do_get(flight.Ticket(b'ints')).read_all()
         assert data.equals(table)
 
+        # Also test via RecordBatchReader interface
+        data = client.do_get(flight.Ticket(b'ints')).to_reader().read_all()
+        assert data.equals(table)
+
     with pytest.raises(flight.FlightServerError,
                        match="expected IpcWriteOptions, got <class 'int'>"):
         with ConstantFlightServer(options=42) as server:

From 699348b172e564099020b552cce6474e35ab113b Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 12 May 2021 12:00:23 -0500
Subject: [PATCH 230/719] ARROW-12750: [CI] [R] Actually pass parameterized
 docker options to the templates

Closes #10302 from jonkeane/ARROW-12750

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 dev/tasks/tasks.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 9fb5fbc3938..0644eb249fc 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -869,9 +869,9 @@ tasks:
     ci: azure
     template: r/azure.linux.yml
     params:
-      r_org: rocker
-      r_image: r-base
-      r_tag: latest
+      r_org: {{ r_org }}
+      r_image: {{ r_image }}
+      r_tag: {{ r_tag }}
 {% endfor %}
 
   test-r-rstudio-r-base-3.6-centos7-devtoolset-8:

From 5c9200d672f4193c2daed717ab4243150e976755 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 13 May 2021 02:33:49 +0000
Subject: [PATCH 231/719] ARROW-12530: [C++] Remove Buffer::mutable_data_

Deduce `mutable_data()` from `data_` and `is_mutable_`.
This simplifies Buffer construction and avoids errors in subclass constructors.

Closes #10303 from pitrou/ARROW-12530-buffer-mutable

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/buffer.cc               | 27 ++++++++++++++-------------
 cpp/src/arrow/buffer.h                | 24 ++++++------------------
 cpp/src/arrow/gpu/cuda_context.cc     |  5 +++--
 cpp/src/arrow/gpu/cuda_context.h      |  3 ++-
 cpp/src/arrow/gpu/cuda_memory.cc      | 21 +++++++++------------
 cpp/src/arrow/io/file.cc              | 11 ++++-------
 cpp/src/arrow/python/common.cc        |  3 ---
 cpp/src/arrow/python/numpy_convert.cc |  6 +-----
 cpp/src/gandiva/jni/jni_common.cc     |  2 +-
 9 files changed, 40 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 9215d9ab544..8275542c542 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -175,14 +175,15 @@ MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_
 // Pool buffer and allocation
 
 /// A Buffer whose lifetime is tied to a particular MemoryPool
-class PoolBuffer : public ResizableBuffer {
+class PoolBuffer final : public ResizableBuffer {
  public:
   explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
       : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
 
   ~PoolBuffer() override {
-    if (mutable_data_ != nullptr) {
-      pool_->Free(mutable_data_, capacity_);
+    uint8_t* ptr = mutable_data();
+    if (ptr) {
+      pool_->Free(ptr, capacity_);
     }
   }
 
@@ -190,16 +191,15 @@ class PoolBuffer : public ResizableBuffer {
     if (capacity < 0) {
       return Status::Invalid("Negative buffer capacity: ", capacity);
     }
-    if (!mutable_data_ || capacity > capacity_) {
-      uint8_t* new_data;
+    uint8_t* ptr = mutable_data();
+    if (!ptr || capacity > capacity_) {
       int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
-      if (mutable_data_) {
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
+      if (ptr) {
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
       } else {
-        RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
-        mutable_data_ = new_data;
+        RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr));
       }
-      data_ = mutable_data_;
+      data_ = ptr;
       capacity_ = new_capacity;
     }
     return Status::OK();
@@ -209,14 +209,15 @@ class PoolBuffer : public ResizableBuffer {
     if (ARROW_PREDICT_FALSE(new_size < 0)) {
       return Status::Invalid("Negative buffer resize: ", new_size);
     }
-    if (mutable_data_ && shrink_to_fit && new_size <= size_) {
+    uint8_t* ptr = mutable_data();
+    if (ptr && shrink_to_fit && new_size <= size_) {
       // Buffer is non-null and is not growing, so shrink to the requested size without
       // excess space.
       int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
       if (capacity_ != new_capacity) {
         // Buffer hasn't got yet the requested size.
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
-        data_ = mutable_data_;
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+        data_ = ptr;
         capacity_ = new_capacity;
       }
     } else {
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 1a3bb29e439..6c47a464b1d 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -56,23 +56,13 @@ class ARROW_EXPORT Buffer {
   ///
   /// \note The passed memory must be kept alive through some other means
   Buffer(const uint8_t* data, int64_t size)
-      : is_mutable_(false),
-        is_cpu_(true),
-        data_(data),
-        mutable_data_(NULLPTR),
-        size_(size),
-        capacity_(size) {
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) {
     SetMemoryManager(default_cpu_memory_manager());
   }
 
   Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
          std::shared_ptr<Buffer> parent = NULLPTR)
-      : is_mutable_(false),
-        data_(data),
-        mutable_data_(NULLPTR),
-        size_(size),
-        capacity_(size),
-        parent_(parent) {
+      : is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) {
     SetMemoryManager(std::move(mm));
   }
 
@@ -131,7 +121,7 @@ class ARROW_EXPORT Buffer {
 #endif
     // A zero-capacity buffer can have a null data pointer
     if (capacity_ != 0) {
-      memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_));
+      memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
     }
   }
 
@@ -205,7 +195,8 @@ class ARROW_EXPORT Buffer {
     CheckCPU();
     CheckMutable();
 #endif
-    return ARROW_PREDICT_TRUE(is_cpu_) ? mutable_data_ : NULLPTR;
+    return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
+                                                      : NULLPTR;
   }
 
   /// \brief Return the device address of the buffer's data
@@ -219,7 +210,7 @@ class ARROW_EXPORT Buffer {
 #ifndef NDEBUG
     CheckMutable();
 #endif
-    return reinterpret_cast<uintptr_t>(mutable_data_);
+    return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
   }
 
   /// \brief Return the buffer's size in bytes
@@ -289,7 +280,6 @@ class ARROW_EXPORT Buffer {
   bool is_mutable_;
   bool is_cpu_;
   const uint8_t* data_;
-  uint8_t* mutable_data_;
   int64_t size_;
   int64_t capacity_;
 
@@ -389,13 +379,11 @@ Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
 class ARROW_EXPORT MutableBuffer : public Buffer {
  public:
   MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
-    mutable_data_ = data;
     is_mutable_ = true;
   }
 
   MutableBuffer(uint8_t* data, const int64_t size, std::shared_ptr<MemoryManager> mm)
       : Buffer(data, size, std::move(mm)) {
-    mutable_data_ = data;
     is_mutable_ = true;
   }
 
diff --git a/cpp/src/arrow/gpu/cuda_context.cc b/cpp/src/arrow/gpu/cuda_context.cc
index bb0b055e5d2..8cb7e65fa6a 100644
--- a/cpp/src/arrow/gpu/cuda_context.cc
+++ b/cpp/src/arrow/gpu/cuda_context.cc
@@ -160,7 +160,8 @@ class CudaContext::Impl {
     return Status::OK();
   }
 
-  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(void* data, int64_t size) {
+  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(const void* data,
+                                                            int64_t size) {
     CUipcMemHandle cu_handle;
     if (size > 0) {
       ContextSaver set_temporary(context_);
@@ -538,7 +539,7 @@ Result<std::shared_ptr<CudaBuffer>> CudaContext::View(uint8_t* data, int64_t nby
   return std::make_shared<CudaBuffer>(data, nbytes, this->shared_from_this(), false);
 }
 
-Result<std::shared_ptr<CudaIpcMemHandle>> CudaContext::ExportIpcBuffer(void* data,
+Result<std::shared_ptr<CudaIpcMemHandle>> CudaContext::ExportIpcBuffer(const void* data,
                                                                        int64_t size) {
   return impl_->ExportIpcBuffer(data, size);
 }
diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h
index 246883c9b99..2cff4f57a74 100644
--- a/cpp/src/arrow/gpu/cuda_context.h
+++ b/cpp/src/arrow/gpu/cuda_context.h
@@ -279,7 +279,8 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext
  private:
   CudaContext();
 
-  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(void* data, int64_t size);
+  Result<std::shared_ptr<CudaIpcMemHandle>> ExportIpcBuffer(const void* data,
+                                                            int64_t size);
   Status CopyHostToDevice(void* dst, const void* src, int64_t nbytes);
   Status CopyHostToDevice(uintptr_t dst, const void* src, int64_t nbytes);
   Status CopyDeviceToHost(void* dst, const void* src, int64_t nbytes);
diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc
index 80304d44cfd..297e4dcf71e 100644
--- a/cpp/src/arrow/gpu/cuda_memory.cc
+++ b/cpp/src/arrow/gpu/cuda_memory.cc
@@ -102,7 +102,6 @@ CudaBuffer::CudaBuffer(uint8_t* data, int64_t size,
                        bool is_ipc)
     : Buffer(data, size), context_(context), own_data_(own_data), is_ipc_(is_ipc) {
   is_mutable_ = true;
-  mutable_data_ = data;
   SetMemoryManager(context_->memory_manager());
 }
 
@@ -118,7 +117,7 @@ Status CudaBuffer::Close() {
     if (is_ipc_) {
       return context_->CloseIpcBuffer(this);
     } else {
-      return context_->Free(mutable_data_, size_);
+      return context_->Free(const_cast<uint8_t*>(data_), size_);
     }
   }
   return Status::OK();
@@ -130,10 +129,7 @@ CudaBuffer::CudaBuffer(const std::shared_ptr<CudaBuffer>& parent, const int64_t
       context_(parent->context()),
       own_data_(false),
       is_ipc_(false) {
-  if (parent->is_mutable()) {
-    is_mutable_ = true;
-    mutable_data_ = const_cast<uint8_t*>(data_);
-  }
+  is_mutable_ = parent->is_mutable();
 }
 
 Result<std::shared_ptr<CudaBuffer>> CudaBuffer::FromBuffer(
@@ -171,7 +167,7 @@ Status CudaBuffer::CopyFromHost(const int64_t position, const void* data,
   if (nbytes > size_ - position) {
     return Status::Invalid("Copy would overflow buffer");
   }
-  return context_->CopyHostToDevice(mutable_data_ + position, data, nbytes);
+  return context_->CopyHostToDevice(const_cast<uint8_t*>(data_) + position, data, nbytes);
 }
 
 Status CudaBuffer::CopyFromDevice(const int64_t position, const void* data,
@@ -179,7 +175,8 @@ Status CudaBuffer::CopyFromDevice(const int64_t position, const void* data,
   if (nbytes > size_ - position) {
     return Status::Invalid("Copy would overflow buffer");
   }
-  return context_->CopyDeviceToDevice(mutable_data_ + position, data, nbytes);
+  return context_->CopyDeviceToDevice(const_cast<uint8_t*>(data_) + position, data,
+                                      nbytes);
 }
 
 Status CudaBuffer::CopyFromAnotherDevice(const std::shared_ptr<CudaContext>& src_ctx,
@@ -188,15 +185,15 @@ Status CudaBuffer::CopyFromAnotherDevice(const std::shared_ptr<CudaContext>& src
   if (nbytes > size_ - position) {
     return Status::Invalid("Copy would overflow buffer");
   }
-  return src_ctx->CopyDeviceToAnotherDevice(context_, mutable_data_ + position, data,
-                                            nbytes);
+  return src_ctx->CopyDeviceToAnotherDevice(
+      context_, const_cast<uint8_t*>(data_) + position, data, nbytes);
 }
 
 Result<std::shared_ptr<CudaIpcMemHandle>> CudaBuffer::ExportForIpc() {
   if (is_ipc_) {
     return Status::Invalid("Buffer has already been exported for IPC");
   }
-  ARROW_ASSIGN_OR_RAISE(auto handle, context_->ExportIpcBuffer(mutable_data_, size_));
+  ARROW_ASSIGN_OR_RAISE(auto handle, context_->ExportIpcBuffer(data_, size_));
   own_data_ = false;
   return handle;
 }
@@ -204,7 +201,7 @@ Result<std::shared_ptr<CudaIpcMemHandle>> CudaBuffer::ExportForIpc() {
 CudaHostBuffer::~CudaHostBuffer() {
   auto maybe_manager = CudaDeviceManager::Instance();
   ARROW_CHECK_OK(maybe_manager.status());
-  ARROW_CHECK_OK((*maybe_manager)->FreeHost(mutable_data_, size_));
+  ARROW_CHECK_OK((*maybe_manager)->FreeHost(const_cast<uint8_t*>(data_), size_));
 }
 
 Result<uintptr_t> CudaHostBuffer::GetDeviceAddress(
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index 8a4976db4aa..70e15335af2 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -390,15 +390,12 @@ class MemoryMappedFile::MemoryMap
   // An object representing the entire memory-mapped region.
   // It can be sliced in order to return individual subregions, which
   // will then keep the original region alive as long as necessary.
-  class Region : public MutableBuffer {
+  class Region : public Buffer {
    public:
     Region(std::shared_ptr<MemoryMappedFile::MemoryMap> memory_map, uint8_t* data,
            int64_t size)
-        : MutableBuffer(data, size) {
+        : Buffer(data, size) {
       is_mutable_ = memory_map->writable();
-      if (!is_mutable_) {
-        mutable_data_ = nullptr;
-      }
     }
 
     ~Region() {
@@ -542,10 +539,10 @@ class MemoryMappedFile::MemoryMap
 
   void advance(int64_t nbytes) { position_ = position_ + nbytes; }
 
-  uint8_t* head() { return data() + position_; }
-
   uint8_t* data() { return region_ ? region_->data() : nullptr; }
 
+  uint8_t* head() { return data() + position_; }
+
   bool writable() { return file_->mode() != FileMode::READ; }
 
   bool opened() { return file_->is_open(); }
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 8ff348509e2..6fe2ed4dae3 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -179,9 +179,6 @@ Status PyBuffer::Init(PyObject* obj) {
     size_ = py_buf_.len;
     capacity_ = py_buf_.len;
     is_mutable_ = !py_buf_.readonly;
-    if (is_mutable_) {
-      mutable_data_ = reinterpret_cast<uint8_t*>(py_buf_.buf);
-    }
     return Status::OK();
   } else {
     return ConvertPyError(StatusCode::Invalid);
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 11ce0e50309..bf4afb2a0a1 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -48,11 +48,7 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
     data_ = const_cast<const uint8_t*>(ptr);
     size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
     capacity_ = size_;
-
-    if (PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE) {
-      is_mutable_ = true;
-      mutable_data_ = ptr;
-    }
+    is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
   }
 }
 
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 871bd248e65..04953305432 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -730,7 +730,7 @@ Status JavaResizableBuffer::Resize(const int64_t new_size, bool shrink_to_fit) {
   jlong ret_capacity = env_->GetLongField(ret, vector_expander_ret_capacity_);
   DCHECK_GE(ret_capacity, new_size);
 
-  data_ = mutable_data_ = reinterpret_cast<uint8_t*>(ret_address);
+  data_ = reinterpret_cast<uint8_t*>(ret_address);
   size_ = new_size;
   capacity_ = ret_capacity;
   return Status::OK();

From 950e18b7ca5516b6d041da9b7776f96fd310532a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 13 May 2021 13:18:07 +0200
Subject: [PATCH 232/719] ARROW-12757: [Archery] Fix spurious warning when
 running "archery docker run"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10304 from pitrou/ARROW-12757-archery-rust-warning

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 docker-compose.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index ebcc1bb6039..2e93ebd8616 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1281,7 +1281,6 @@ services:
       args:
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
         clang_tools: ${CLANG_TOOLS}
-        rust: ${RUST}
     environment:
       <<: *ccache
     volumes: *ubuntu-volumes

From 73d833c07ac4cc0e17fcfa3423f45738b4388dc4 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Thu, 13 May 2021 13:24:26 +0200
Subject: [PATCH 233/719] ARROW-12490: [Dev] Use only miniforge in
 verify-release-candidate.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10298 from cyb70289/12490-miniforge

Lead-authored-by: Yibo Cai <yibo.cai@arm.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/release/verify-release-candidate.sh | 15 +++++----------
 dev/tasks/tasks.yml                     |  3 ++-
 dev/tasks/verify-rc/github.linux.yml    |  1 +
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 320654dd800..d03341ce44d 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -217,17 +217,12 @@ setup_tempdir() {
 
 setup_miniconda() {
   # Setup short-lived miniconda for Python and integration tests
-  if [ "$(uname)" == "Darwin" ]; then
-    if [ "$(uname -m)" == "arm64" ]; then
-	MINICONDA_URL=https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
-    else
-        MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-    fi
-  elif [ "$(uname)" == "Linux" ] && [ "$(uname -m)" == "aarch64" ]; then
-    MINICONDA_URL=https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh
-  else
-    MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+  OS="$(uname)"
+  if [ "${OS}" == "Darwin" ]; then
+    OS=MacOSX
   fi
+  ARCH="$(uname -m)"
+  MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-${OS}-${ARCH}.sh"
 
   MINICONDA=$PWD/test-miniconda
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 0644eb249fc..73d9bab23cf 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -667,7 +667,8 @@ tasks:
         TEST_DEFAULT: 0
       artifact: "wheels"
 
-  {% for target in ["csharp",
+  {% for target in ["cpp",
+                    "csharp",
                     "go",
                     "integration",
                     "java",
diff --git a/dev/tasks/verify-rc/github.linux.yml b/dev/tasks/verify-rc/github.linux.yml
index 2bbde2e7fcb..fdd9de668f0 100644
--- a/dev/tasks/verify-rc/github.linux.yml
+++ b/dev/tasks/verify-rc/github.linux.yml
@@ -37,6 +37,7 @@ jobs:
         run: |
           # TODO: don't require removing newer llvms
           sudo apt-get --purge remove -y llvm-9 clang-9
+          sudo apt-get update -y
           sudo apt-get install -y \
             autoconf-archive \
             binfmt-support \

From b2cd20067c081d007d46e8e7da6c210311237ad4 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Thu, 13 May 2021 14:16:36 +0200
Subject: [PATCH 234/719] ARROW-12756: [C++] MSVC build fails with latest gtest
 from vcpkg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This pins gtest to version 1.10.0 in the vcpkg manifest

Closes #10306 from ianmcook/ARROW-12756

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/vcpkg.json                            | 6 +++++-
 dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat | 1 +
 docs/source/developers/cpp/building.rst   | 3 ++-
 docs/source/developers/cpp/windows.rst    | 1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index c8b573e9e1a..b724412d397 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -36,5 +36,9 @@
     "utf8proc",
     "zlib",
     "zstd"
-  ]
+  ],
+  "overrides": [
+    { "name": "gtest", "version": "1.10.0", "port-version": 4 }
+  ],
+  "builtin-baseline": "a267ab118c09f56f3dae96c9a4b3410820ad2f0b"
 }
diff --git a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
index f748f92f3bc..12ff9b4b618 100644
--- a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
+++ b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
@@ -29,6 +29,7 @@ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Too
 vcpkg install ^
     --triplet x64-windows ^
     --x-manifest-root cpp  ^
+    --feature-flags=versions ^
     --clean-after-build ^
     || exit /B 1
 
diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst
index dfaee0ff7c0..085a5cd79a3 100644
--- a/docs/source/developers/cpp/building.rst
+++ b/docs/source/developers/cpp/building.rst
@@ -77,6 +77,7 @@ With `vcpkg <https://github.com/Microsoft/vcpkg>`_:
    cd arrow
    vcpkg install \
      --x-manifest-root cpp \
+     --feature-flags=versions \
      --clean-after-build
 
 On MSYS2:
@@ -294,7 +295,7 @@ the build system how to resolve each dependency. There are a few options:
   have this feature
 * ``CONDA``: Use ``$CONDA_PREFIX`` as alternative ``SYSTEM`` PATH
 * ``VCPKG``: Find dependencies installed by vcpkg, and if not found, run
-  ``vpckg install`` to install them
+  ``vcpkg install`` to install them
 * ``BREW``: Use Homebrew default paths as an alternative ``SYSTEM`` path
 
 The default method is ``AUTO`` unless you are developing within an active conda
diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst
index 4de67ba402c..3251dea2dfa 100644
--- a/docs/source/developers/cpp/windows.rst
+++ b/docs/source/developers/cpp/windows.rst
@@ -134,6 +134,7 @@ of Arrow and run the command:
    vcpkg install ^
      --triplet x64-windows ^
      --x-manifest-root cpp  ^
+     --feature-flags=versions ^
      --clean-after-build
 
 On Windows, vcpkg builds dynamic link libraries by default. Use the triplet

From 00a51524a51ac7499c17044d071ae6827924f493 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 13 May 2021 15:50:37 +0200
Subject: [PATCH 235/719] ARROW-12772: [CI] Merge script test fails due to
 missing dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10310 from kszucs/ARROW-12772

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index fdbb53c29da..4ae6f1dc9bd 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -85,7 +85,7 @@ jobs:
       - name: Install Dependencies
         shell: bash
         run: |
-          pip install cython setuptools pytest jira
+          pip install cython setuptools six pytest jira
       - name: Run Release Test
         shell: bash
         run: |

From b34c8f68367b28649ad48fe803fddd645ec307f7 Mon Sep 17 00:00:00 2001
From: Matthijs Brobbel <m1brobbel@gmail.com>
Date: Thu, 13 May 2021 10:07:49 -0400
Subject: [PATCH 236/719] MINOR: [Docs] Add link to File.fbs in columnar format
 docs

Add link in Arrow Columnar Format docs to `File.fbs` file in the repository. The other FlatBuffers files (`Schema.fbs` and `Message.fbs`) are also linked.

Closes #10313 from mbrobbel/docs

Authored-by: Matthijs Brobbel <m1brobbel@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/format/Columnar.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index a42e863d59c..102c3a73317 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -1018,7 +1018,7 @@ the stream format. At the end of the file, we write a *footer*
 containing a redundant copy of the schema (which is a part of the
 streaming format) plus memory offsets and sizes for each of the data
 blocks in the file. This enables random access any record batch in the
-file. See ``File.fbs`` for the precise details of the file footer.
+file. See `File.fbs`_ for the precise details of the file footer.
 
 Schematically we have: ::
 
@@ -1208,6 +1208,7 @@ the Arrow spec.
 .. _Flatbuffers protocol definition files: https://github.com/apache/arrow/tree/master/format
 .. _Schema.fbs: https://github.com/apache/arrow/blob/master/format/Schema.fbs
 .. _Message.fbs: https://github.com/apache/arrow/blob/master/format/Message.fbs
+.. _File.fbs: https://github.com/apache/arrow/blob/master/format/File.fbs
 .. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
 .. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
 .. _Endianness: https://en.wikipedia.org/wiki/Endianness

From 9347731fe611c25f51c8d4831f1198c9438babd5 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 13 May 2021 08:47:16 -0700
Subject: [PATCH 237/719] ARROW-12731: [R] Use InMemoryDataset for
 Table/RecordBatch in dplyr code

Discussing with @bkietz on #10166, we realized that we could already evaluate filter/project on Table/RecordBatch by wrapping it in InMemoryDataset and using the Dataset machinery, so I wanted to see how well that worked. Mostly it does, with a couple of caveats:

* You can't dictionary_encode a dataset column. `Error: Invalid: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}` (ARROW-12632). I will remove the `as.factor` method and leave a TODO to restore it after that JIRA is resolved.
* with the existing array_expressions, you could supply an additional Array (or R data convertible to an Array) when doing `mutate()`; this is not implemented for Datasets and that's ok. For Tables/RecordBatches, the behavior in this PR is to pull the data into R, which is fine.

There are a lot of changes here, which means the diff is big, but I've tried to group into distinct commits the main action. Highlights:

* https://github.com/apache/arrow/pull/10191/commits/5b501c508e8da7313dce0e361369dc62aa645a8f is the main switch to use InMemoryDataset
* https://github.com/apache/arrow/pull/10191/commits/b31fb5e594bc49628f7a4459109784caafe99cb4 deletes `array_expression`
* https://github.com/apache/arrow/pull/10191/commits/0d3193863fc578d93d9319ea2184e46e9f2f36e1 simplifies the interface for adding functions to the dplyr data_mask; definitely check this one out and see what you think of the new way--I hope it's much simpler to add new functions
* https://github.com/apache/arrow/pull/10191/commits/2e6374f94cbcc236becc3e41797a26127cf06ab0 improves the print method for queries by showing both the expression and the expected type of the output column, per suggestion from @bkietz
* https://github.com/apache/arrow/pull/10191/commits/d12f584e67531e251a1c72a5b67e14361d31f503 just splits up dplyr.R into many files; https://github.com/apache/arrow/pull/10191/commits/34dc1e6589ca622c8b1baeba7ce03c1d2b0b4c28 deletes tests that are duplicated between test-dplyr*.R and test-dataset.R (since they're now going through a common C++ interface).
* https://github.com/apache/arrow/pull/10191/commits/a0914f67319e659348396f106024d69064ea3943 + https://github.com/apache/arrow/pull/10191/commits/eee491a4e9e6735a0f304d1d71306bfd091f702b contain ARROW-12696

Closes #10191 from nealrichardson/dplyr-in-memory

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/DESCRIPTION                                 |   11 +-
 r/NAMESPACE                                   |    5 +-
 r/R/arrow-datum.R                             |   71 +-
 r/R/arrow-package.R                           |   16 +-
 r/R/arrow-tabular.R                           |    6 +-
 r/R/arrowExports.R                            |    4 +
 r/R/dataset-scan.R                            |   14 +-
 r/R/dataset-write.R                           |    8 -
 r/R/dplyr-arrange.R                           |   93 ++
 r/R/dplyr-collect.R                           |   62 +
 r/R/dplyr-eval.R                              |   99 ++
 r/R/dplyr-filter.R                            |   84 ++
 r/R/dplyr-functions.R                         |  352 ++++++
 r/R/dplyr-group-by.R                          |   65 ++
 r/R/dplyr-mutate.R                            |  117 ++
 r/R/dplyr-select.R                            |  120 ++
 r/R/dplyr-summarize.R                         |   36 +
 r/R/dplyr.R                                   | 1005 +----------------
 r/R/expression.R                              |  194 +---
 r/man/contains_regex.Rd                       |    2 +-
 r/man/get_stringr_pattern_options.Rd          |    2 +-
 r/src/arrowExports.cpp                        |   17 +
 r/src/expression.cpp                          |    8 +
 r/tests/testthat/helper-arrow.R               |    2 +-
 r/tests/testthat/test-RecordBatch.R           |    7 +-
 r/tests/testthat/test-Table.R                 |    7 +-
 r/tests/testthat/test-compute-arith.R         |    3 +-
 r/tests/testthat/test-compute-sort.R          |   17 +-
 r/tests/testthat/test-dataset.R               |  339 +-----
 r/tests/testthat/test-dplyr-arrange.R         |    2 +
 r/tests/testthat/test-dplyr-filter.R          |   57 +-
 r/tests/testthat/test-dplyr-group-by.R        |    2 +
 r/tests/testthat/test-dplyr-mutate.R          |   39 +-
 .../testthat/test-dplyr-string-functions.R    |  114 +-
 r/tests/testthat/test-dplyr.R                 |   28 +-
 r/tests/testthat/test-expression.R            |   56 +-
 r/tests/testthat/test-filesystem.R            |    4 +
 37 files changed, 1360 insertions(+), 1708 deletions(-)
 create mode 100644 r/R/dplyr-arrange.R
 create mode 100644 r/R/dplyr-collect.R
 create mode 100644 r/R/dplyr-eval.R
 create mode 100644 r/R/dplyr-filter.R
 create mode 100644 r/R/dplyr-functions.R
 create mode 100644 r/R/dplyr-group-by.R
 create mode 100644 r/R/dplyr-mutate.R
 create mode 100644 r/R/dplyr-select.R
 create mode 100644 r/R/dplyr-summarize.R

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 7f88320fb3d..82ca6fed617 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -78,9 +78,18 @@ Collate:
     'dataset-write.R'
     'deprecated.R'
     'dictionary.R'
+    'dplyr-arrange.R'
+    'dplyr-collect.R'
+    'dplyr-eval.R'
+    'dplyr-filter.R'
+    'expression.R'
+    'dplyr-functions.R'
+    'dplyr-group-by.R'
+    'dplyr-mutate.R'
+    'dplyr-select.R'
+    'dplyr-summarize.R'
     'record-batch.R'
     'table.R'
-    'expression.R'
     'dplyr.R'
     'feather.R'
     'field.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 9a05b87476a..f89d2effea7 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -21,7 +21,6 @@ S3method("[[<-",Schema)
 S3method("names<-",ArrowTabular)
 S3method(Ops,ArrowDatum)
 S3method(Ops,Expression)
-S3method(Ops,array_expression)
 S3method(all,ArrowDatum)
 S3method(all,equal.ArrowObject)
 S3method(any,ArrowDatum)
@@ -37,7 +36,6 @@ S3method(as.list,ArrowTabular)
 S3method(as.list,Schema)
 S3method(as.raw,Buffer)
 S3method(as.vector,ArrowDatum)
-S3method(as.vector,array_expression)
 S3method(c,Dataset)
 S3method(dim,ArrowTabular)
 S3method(dim,Dataset)
@@ -51,7 +49,6 @@ S3method(head,arrow_dplyr_query)
 S3method(is.na,ArrowDatum)
 S3method(is.na,Expression)
 S3method(is.na,Scalar)
-S3method(is.na,array_expression)
 S3method(is.nan,ArrowDatum)
 S3method(is_in,ArrowDatum)
 S3method(is_in,default)
@@ -80,7 +77,6 @@ S3method(names,StructArray)
 S3method(names,Table)
 S3method(names,arrow_dplyr_query)
 S3method(print,"arrow-enum")
-S3method(print,array_expression)
 S3method(print,arrow_dplyr_query)
 S3method(print,arrow_info)
 S3method(print,arrow_r_metadata)
@@ -295,6 +291,7 @@ importFrom(purrr,as_mapper)
 importFrom(purrr,keep)
 importFrom(purrr,map)
 importFrom(purrr,map2)
+importFrom(purrr,map2_chr)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_dfr)
 importFrom(purrr,map_int)
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index 4edcb200ea0..f7c1d4d4ed7 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -46,6 +46,73 @@ as.vector.ArrowDatum <- function(x, mode) {
   )
 }
 
+#' @export
+Ops.ArrowDatum <- function(e1, e2) {
+  if (.Generic == "!") {
+    eval_array_expression(.Generic, e1)
+  } else if (.Generic %in% names(.array_function_map)) {
+    eval_array_expression(.Generic, e1, e2)
+  } else {
+    stop(paste0("Unsupported operation on `", class(e1)[1L], "` : "), .Generic, call. = FALSE)
+  }
+}
+
+# Wrapper around call_function that:
+# (1) maps R function names to Arrow C++ compute ("/" --> "divide_checked")
+# (2) wraps R input args as Array or Scalar
+eval_array_expression <- function(FUN,
+                                  ...,
+                                  args = list(...),
+                                  options = empty_named_list()) {
+  if (FUN == "-" && length(args) == 1L) {
+    if (inherits(args[[1]], "ArrowObject")) {
+      return(eval_array_expression("negate_checked", args[[1]]))
+    } else {
+      return(-args[[1]])
+    }
+  }
+  args <- lapply(args, .wrap_arrow, FUN)
+
+  # In Arrow, "divide" is one function, which does integer division on
+  # integer inputs and floating-point division on floats
+  if (FUN == "/") {
+    # TODO: omg so many ways it's wrong to assume these types
+    args <- map(args, ~.$cast(float64()))
+  } else if (FUN == "%/%") {
+    # In R, integer division works like floor(float division)
+    out <- eval_array_expression("/", args = args, options = options)
+    return(out$cast(int32(), allow_float_truncate = TRUE))
+  } else if (FUN == "%%") {
+    # {e1 - e2 * ( e1 %/% e2 )}
+    # ^^^ form doesn't work because Ops.Array evaluates eagerly,
+    # but we can build that up
+    quotient <- eval_array_expression("%/%", args = args)
+    base <- eval_array_expression("*", quotient, args[[2]])
+    # this cast is to ensure that the result of this and e1 are the same
+    # (autocasting only applies to scalars)
+    base <- base$cast(args[[1]]$type)
+    return(eval_array_expression("-", args[[1]], base))
+  }
+
+  call_function(
+    .array_function_map[[FUN]] %||% FUN,
+    args = args,
+    options = options
+  )
+}
+
+.wrap_arrow <- function(arg, fun) {
+  if (!inherits(arg, "ArrowObject")) {
+    # TODO: Array$create if lengths are equal?
+    if (fun == "%in%") {
+      arg <- Array$create(arg)
+    } else {
+      arg <- Scalar$create(arg)
+    }
+  }
+  arg
+}
+
 #' @export
 na.omit.ArrowDatum <- function(object, ...){
   object$Filter(!is.na(object))
@@ -66,10 +133,6 @@ filter_rows <- function(x, i, keep_na = TRUE, ...) {
   # General purpose function for [ row subsetting with R semantics
   # Based on the input for `i`, calls x$Filter, x$Slice, or x$Take
   nrows <- x$num_rows %||% x$length() # Depends on whether Array or Table-like
-  if (inherits(i, "array_expression")) {
-    # Evaluate it
-    i <- eval_array_expression(i)
-  }
   if (is.logical(i)) {
     if (isTRUE(i)) {
       # Shortcut without doing any work
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index f6f01fe623a..9e8d629e08a 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -17,7 +17,7 @@
 
 #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
-#' @importFrom purrr as_mapper map map2 map_chr map_dfr map_int map_lgl keep
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep
 #' @importFrom assertthat assert_that is.string
 #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
@@ -49,8 +49,18 @@
 
   # Create these once, at package build time
   if (arrow_available()) {
-    dplyr_functions$dataset <- build_function_list(build_dataset_expression)
-    dplyr_functions$array <- build_function_list(build_array_expression)
+    # Also include all available Arrow Compute functions,
+    # namespaced as arrow_fun.
+    # We can't do this at install time because list_compute_functions() may error
+    all_arrow_funs <- list_compute_functions()
+    arrow_funcs <- set_names(
+      lapply(all_arrow_funs, function(fun) {
+        force(fun)
+        function(...) build_expr(fun, ...)
+      }),
+      paste0("arrow_", all_arrow_funs)
+    )
+    .cache$functions <- c(nse_funcs, arrow_funcs)
   }
   invisible()
 }
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index bba5ad5f5e6..2bd0a99534f 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -223,13 +223,13 @@ na.fail.ArrowTabular <- function(object, ...){
 
 #' @export
 na.omit.ArrowTabular <- function(object, ...){
-  not_na <- map(object$columns, ~build_array_expression("is_valid", .x))
+  not_na <- map(object$columns, ~call_function("is_valid", .x))
   not_na_agg <- Reduce("&", not_na)
-  object$Filter(eval_array_expression(not_na_agg))
+  object$Filter(not_na_agg)
 }
 
 #' @export
-na.exclude.ArrowTabular <- na.omit.ArrowTabular 
+na.exclude.ArrowTabular <- na.omit.ArrowTabular
 
 ToString_tabular <- function(x, ...) {
   # Generic to work with both RecordBatch and Table
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 0063836970e..c026c72899f 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -800,6 +800,10 @@ compute___expr__ToString <- function(x){
     .Call(`_arrow_compute___expr__ToString`, x)
 }
 
+compute___expr__type <- function(x, schema){
+    .Call(`_arrow_compute___expr__type`, x, schema)
+}
+
 ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
     invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 84949bbd397..a73bfb3dd74 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -162,16 +162,12 @@ ScannerBuilder <- R6Class("ScannerBuilder", inherit = ArrowObject,
       # cols is either a character vector or a named list of Expressions
       if (is.character(cols)) {
         dataset___ScannerBuilder__ProjectNames(self, cols)
+      } else if (length(cols) == 0) {
+        # Empty projection
+        dataset___ScannerBuilder__ProjectNames(self, character(0))
       } else {
-        # If we have expressions, but they all turn out to be field_refs,
-        # we can still call the simple method
-        field_names <- get_field_names(cols)
-        if (all(nzchar(field_names))) {
-          dataset___ScannerBuilder__ProjectNames(self, field_names)
-        } else {
-          # Else, we are projecting/mutating
-          dataset___ScannerBuilder__ProjectExprs(self, cols, names(cols))
-        }
+        # List of Expressions
+        dataset___ScannerBuilder__ProjectExprs(self, cols, names(cols))
       }
       self
     },
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 8c9a1efc8d8..90413e9b9ed 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -64,14 +64,6 @@ write_dataset <- function(dataset,
                           ...) {
   format <- match.arg(format)
   if (inherits(dataset, "arrow_dplyr_query")) {
-    if (inherits(dataset$.data, "ArrowTabular")) {
-      # collect() to materialize any mutate/rename
-      dataset <- dplyr::collect(dataset, as_data_frame = FALSE)
-    }
-    # We can select a subset of columns but we can't rename them
-    if (!all(get_field_names(dataset) == names(dataset$selected_columns))) {
-      stop("Renaming columns when writing a dataset is not yet supported", call. = FALSE)
-    }
     # partitioning vars need to be in the `select` schema
     dataset <- ensure_group_vars(dataset)
   } else if (inherits(dataset, "grouped_df")) {
diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R
new file mode 100644
index 00000000000..59afa4fe6a0
--- /dev/null
+++ b/r/R/dplyr-arrange.R
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
+  call <- match.call()
+  exprs <- quos(...)
+  if (.by_group) {
+    # when the data is is grouped and .by_group is TRUE, order the result by
+    # the grouping columns first
+    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
+  }
+  if (length(exprs) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+  .data <- arrow_dplyr_query(.data)
+  # find and remove any dplyr::desc() and tidy-eval
+  # the arrange expressions inside an Arrow data_mask
+  sorts <- vector("list", length(exprs))
+  descs <- logical(0)
+  mask <- arrow_mask(.data)
+  for (i in seq_along(exprs)) {
+    x <- find_and_remove_desc(exprs[[i]])
+    exprs[[i]] <- x[["quos"]]
+    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(sorts[[i]], "try-error")) {
+      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
+      return(abandon_ship(call, .data, msg))
+    }
+    names(sorts)[i] <- as_label(exprs[[i]])
+    descs[i] <- x[["desc"]]
+  }
+  .data$arrange_vars <- c(sorts, .data$arrange_vars)
+  .data$arrange_desc <- c(descs, .data$arrange_desc)
+  .data
+}
+arrange.Dataset <- arrange.ArrowTabular <- arrange.arrow_dplyr_query
+
+# Helper to handle desc() in arrange()
+# * Takes a quosure as input
+# * Returns a list with two elements:
+#   1. The quosure with any wrapping parentheses and desc() removed
+#   2. A logical value indicating whether desc() was found
+# * Performs some other validation
+find_and_remove_desc <- function(quosure) {
+  expr <- quo_get_expr(quosure)
+  descending <- FALSE
+  if (length(all.vars(expr)) < 1L) {
+    stop(
+      "Expression in arrange() does not contain any field names: ",
+      deparse(expr),
+      call. = FALSE
+    )
+  }
+  # Use a while loop to remove any number of nested pairs of enclosing
+  # parentheses and any number of nested desc() calls. In the case of multiple
+  # nested desc() calls, each one toggles the sort order.
+  while (identical(typeof(expr), "language") && is.call(expr)) {
+    if (identical(expr[[1]], quote(`(`))) {
+      # remove enclosing parentheses
+      expr <- expr[[2]]
+    } else if (identical(expr[[1]], quote(desc))) {
+      # remove desc() and toggle descending
+      expr <- expr[[2]]
+      descending <- !descending
+    } else {
+      break
+    }
+  }
+  return(
+    list(
+      quos = quo_set_expr(quosure, expr),
+      desc = descending
+    )
+  )
+}
\ No newline at end of file
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
new file mode 100644
index 00000000000..55716291dcb
--- /dev/null
+++ b/r/R/dplyr-collect.R
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
+  x <- ensure_group_vars(x)
+  x <- ensure_arrange_vars(x) # this sets x$temp_columns
+  # Pull only the selected rows and cols into R
+  # See dataset.R for Dataset and Scanner(Builder) classes
+  tab <- Scanner$create(x)$ToTable()
+  # Arrange rows
+  if (length(x$arrange_vars) > 0) {
+    tab <- tab[
+      tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
+      names(x$selected_columns), # this omits x$temp_columns from the result
+      drop = FALSE
+    ]
+  }
+  if (as_data_frame) {
+    df <- as.data.frame(tab)
+    tab$invalidate()
+    restore_dplyr_features(df, x)
+  } else {
+    restore_dplyr_features(tab, x)
+  }
+}
+collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
+  if (as_data_frame) {
+    as.data.frame(x, ...)
+  } else {
+    x
+  }
+}
+collect.Dataset <- function(x, ...) dplyr::collect(arrow_dplyr_query(x), ...)
+
+compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE)
+compute.ArrowTabular <- function(x, ...) x
+compute.Dataset <- compute.arrow_dplyr_query
+
+pull.arrow_dplyr_query <- function(.data, var = -1) {
+  .data <- arrow_dplyr_query(.data)
+  var <- vars_pull(names(.data), !!enquo(var))
+  .data$selected_columns <- set_names(.data$selected_columns[var], var)
+  dplyr::collect(.data)[[1]]
+}
+pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
\ No newline at end of file
diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
new file mode 100644
index 00000000000..2d19bd4cb90
--- /dev/null
+++ b/r/R/dplyr-eval.R
@@ -0,0 +1,99 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_eval <- function (expr, mask) {
+  # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
+  # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
+  # .data is a Dataset).
+
+  # This yields an Expression as long as the `exprs` are implemented in Arrow.
+  # Otherwise, it returns a try-error
+  tryCatch(eval_tidy(expr, mask), error = function(e) {
+    # Look for the cases where bad input was given, i.e. this would fail
+    # in regular dplyr anyway, and let those raise those as errors;
+    # else, for things not supported by Arrow return a "try-error",
+    # which we'll handle differently
+    msg <- conditionMessage(e)
+    patterns <- .cache$i18ized_error_pattern
+    if (is.null(patterns)) {
+      patterns <- i18ize_error_messages()
+      # Memoize it
+      .cache$i18ized_error_pattern <- patterns
+    }
+    if (grepl(patterns, msg)) {
+      stop(e)
+    }
+
+    out <- structure(msg, class = "try-error", condition = e)
+    if (grepl("not supported.*Arrow", msg)) {
+      # One of ours. Mark it so that consumers can handle it differently
+      class(out) <- c("arrow-try-error", class(out))
+    }
+    invisible(out)
+  })
+}
+
+handle_arrow_not_supported <- function(err, lab) {
+  # Look for informative message from the Arrow function version (see above)
+  if (inherits(err, "arrow-try-error")) {
+    # Include it if found
+    paste0('In ', lab, ', ', as.character(err))
+  } else {
+    # Otherwise be opaque (the original error is probably not useful)
+    paste('Expression', lab, 'not supported in Arrow')
+  }
+}
+
+i18ize_error_messages <- function() {
+  # Figure out what the error messages will be with this LANGUAGE
+  # so that we can look for them
+  out <- list(
+    obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
+    fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
+  )
+  paste(map(out, ~sub("X_____X", ".*", .)), collapse = "|")
+}
+
+# Helper to raise a common error
+arrow_not_supported <- function(msg) {
+  # TODO: raise a classed error?
+  stop(paste(msg, "not supported by Arrow"), call. = FALSE)
+}
+
+# Create a data mask for evaluating a dplyr expression
+arrow_mask <- function(.data) {
+  f_env <- new_environment(.cache$functions)
+
+  # Add functions that need to error hard and clear.
+  # Some R functions will still try to evaluate on an Expression
+  # and return NA with a warning
+  fail <- function(...) stop("Not implemented")
+  for (f in c("mean", "sd")) {
+    f_env[[f]] <- fail
+  }
+
+  # Add the column references and make the mask
+  out <- new_data_mask(
+    new_environment(.data$selected_columns, parent = f_env),
+    f_env
+  )
+  # Then insert the data pronoun
+  # TODO: figure out what rlang::as_data_pronoun does/why we should use it
+  # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
+  out$.data <- .data$selected_columns
+  out
+}
diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R
new file mode 100644
index 00000000000..3cbc34511a4
--- /dev/null
+++ b/r/R/dplyr-filter.R
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
+  # TODO something with the .preserve argument
+  filts <- quos(...)
+  if (length(filts) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+
+  .data <- arrow_dplyr_query(.data)
+  # tidy-eval the filter expressions inside an Arrow data_mask
+  filters <- lapply(filts, arrow_eval, arrow_mask(.data))
+  bad_filters <- map_lgl(filters, ~inherits(., "try-error"))
+  if (any(bad_filters)) {
+    # This is similar to abandon_ship() except that the filter eval is
+    # vectorized, and we apply filters that _did_ work before abandoning ship
+    # with the rest
+    expr_labs <- map_chr(filts[bad_filters], as_label)
+    if (query_on_dataset(.data)) {
+      # Abort. We don't want to auto-collect if this is a Dataset because that
+      # could blow up, too big.
+      stop(
+        "Filter expression not supported for Arrow Datasets: ",
+        oxford_paste(expr_labs, quote = FALSE),
+        "\nCall collect() first to pull data into R.",
+        call. = FALSE
+      )
+    } else {
+      arrow_errors <- map2_chr(
+        filters[bad_filters], expr_labs,
+        handle_arrow_not_supported
+      )
+      if (length(arrow_errors) == 1) {
+        msg <- paste0(arrow_errors, "; ")
+      } else {
+        msg <- paste0("* ", arrow_errors, "\n", collapse = "")
+      }
+      warning(
+        msg, "pulling data into R",
+        immediate. = TRUE,
+        call. = FALSE
+      )
+      # Set any valid filters first, then collect and then apply the invalid ones in R
+      .data <- set_filters(.data, filters[!bad_filters])
+      return(dplyr::filter(dplyr::collect(.data), !!!filts[bad_filters]))
+    }
+  }
+
+  set_filters(.data, filters)
+}
+filter.Dataset <- filter.ArrowTabular <- filter.arrow_dplyr_query
+
+set_filters <- function(.data, expressions) {
+  if (length(expressions)) {
+    # expressions is a list of Expressions. AND them together and set them on .data
+    new_filter <- Reduce("&", expressions)
+    if (isTRUE(.data$filtered_rows)) {
+      # TRUE is default (i.e. no filter yet), so we don't need to & with it
+      .data$filtered_rows <- new_filter
+    } else {
+      .data$filtered_rows <- .data$filtered_rows & new_filter
+    }
+  }
+  .data
+}
\ No newline at end of file
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
new file mode 100644
index 00000000000..bee06a7cb6a
--- /dev/null
+++ b/r/R/dplyr-functions.R
@@ -0,0 +1,352 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+#' @include expression.R
+NULL
+
+# This environment is an internal cache for things including data mask functions
+# We'll populate it at package load time.
+.cache <- NULL
+init_env <- function () {
+  .cache <<- new.env(hash = TRUE)
+}
+init_env()
+
+# nse_funcs is a list of functions that operated on (and return) Expressions
+# These will be the basis for a data_mask inside dplyr methods
+# and will be added to .cache at package load time
+
+# Start with mappings from R function name spellings
+nse_funcs <- lapply(set_names(names(.array_function_map)), function(operator) {
+  force(operator)
+  function(...) build_expr(operator, ...)
+})
+
+# Now add functions to that list where the mapping from R to Arrow isn't 1:1
+# Each of these functions should have the same signature as the R function
+# they're replacing.
+#
+# When to use `build_expr()` vs. `Expression$create()`?
+#
+# Use `build_expr()` if you need to
+# (1) map R function names to Arrow C++ functions
+# (2) wrap R inputs (vectors) as Array/Scalar
+#
+# `Expression$create()` is lower level. Most of the functions below use it
+# because they manage the preparation of the user-provided inputs
+# and don't need to wrap scalars
+
+nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
+  opts <- cast_options(safe, ...)
+  opts$to_type <- as_type(target_type)
+  Expression$create("cast", x, options = opts)
+}
+
+nse_funcs$dictionary_encode <- function(x,
+                                        null_encoding_behavior = c("mask", "encode")) {
+  behavior <- toupper(match.arg(null_encoding_behavior))
+  null_encoding_behavior <- NullEncodingBehavior[[behavior]]
+  Expression$create(
+    "dictionary_encode",
+    x,
+    options = list(null_encoding_behavior = null_encoding_behavior)
+  )
+}
+
+nse_funcs$between <- function(x, left, right) {
+  x >= left & x <= right
+}
+
+# as.* type casting functions
+# as.factor() is mapped in expression.R
+nse_funcs$as.character <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = string()))
+}
+nse_funcs$as.double <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = float64()))
+}
+nse_funcs$as.integer <- function(x) {
+  Expression$create(
+    "cast",
+    x,
+    options = cast_options(
+      to_type = int32(),
+      allow_float_truncate = TRUE,
+      allow_decimal_truncate = TRUE
+    )
+  )
+}
+nse_funcs$as.integer64 <- function(x) {
+  Expression$create(
+    "cast",
+    x,
+    options = cast_options(
+      to_type = int64(),
+      allow_float_truncate = TRUE,
+      allow_decimal_truncate = TRUE
+    )
+  )
+}
+nse_funcs$as.logical <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = boolean()))
+}
+nse_funcs$as.numeric <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = float64()))
+}
+
+# String functions
+nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
+  if (allowNA) {
+    arrow_not_supported("allowNA = TRUE")
+  }
+  if (is.na(keepNA)) {
+    keepNA <- !identical(type, "width")
+  }
+  if (!keepNA) {
+    # TODO: I think there is a fill_null kernel we could use, set null to 2
+    arrow_not_supported("keepNA = TRUE")
+  }
+  if (identical(type, "bytes")) {
+    Expression$create("binary_length", x)
+  } else {
+    Expression$create("utf8_length", x)
+  }
+}
+
+nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
+  side <- match.arg(side)
+  trim_fun <- switch(side,
+    left = "utf8_ltrim_whitespace",
+    right = "utf8_rtrim_whitespace",
+    both = "utf8_trim_whitespace"
+  )
+  Expression$create(trim_fun, string)
+}
+
+nse_funcs$grepl <- function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
+  arrow_fun <- ifelse(fixed && !ignore.case, "match_substring", "match_substring_regex")
+  Expression$create(
+    arrow_fun,
+    x,
+    options = list(pattern = format_string_pattern(pattern, ignore.case, fixed))
+  )
+}
+
+nse_funcs$str_detect <- function(string, pattern, negate = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  out <- nse_funcs$grepl(
+    pattern = opts$pattern,
+    x = string,
+    ignore.case = opts$ignore_case,
+    fixed = opts$fixed
+  )
+  if (negate) {
+    out <- !out
+  }
+  out
+}
+
+# Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
+arrow_r_string_replace_function <- function(max_replacements) {
+  function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
+    Expression$create(
+      ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
+      x,
+      options = list(
+        pattern = format_string_pattern(pattern, ignore.case, fixed),
+        replacement = format_string_replacement(replacement, ignore.case, fixed),
+        max_replacements = max_replacements
+      )
+    )
+  }
+}
+
+arrow_stringr_string_replace_function <- function(max_replacements) {
+  function(string, pattern, replacement) {
+    opts <- get_stringr_pattern_options(enexpr(pattern))
+    arrow_r_string_replace_function(max_replacements)(
+      pattern = opts$pattern,
+      replacement = replacement,
+      x = string,
+      ignore.case = opts$ignore_case,
+      fixed = opts$fixed
+    )
+  }
+}
+
+nse_funcs$sub <- arrow_r_string_replace_function(1L)
+nse_funcs$gsub <- arrow_r_string_replace_function(-1L)
+nse_funcs$str_replace <- arrow_stringr_string_replace_function(1L)
+nse_funcs$str_replace_all <- arrow_stringr_string_replace_function(-1L)
+
+nse_funcs$strsplit <- function(x,
+                               split,
+                               fixed = FALSE,
+                               perl = FALSE,
+                               useBytes = FALSE) {
+  assert_that(is.string(split))
+
+  # The Arrow C++ library does not support splitting a string by a regular
+  # expression pattern (ARROW-12608) but the default behavior of
+  # base::strsplit() is to interpret the split pattern as a regex
+  # (fixed = FALSE). R users commonly pass non-regex split patterns to
+  # strsplit() without bothering to set fixed = TRUE. It would be annoying if
+  # that didn't work here. So: if fixed = FALSE, let's check the split pattern
+  # to see if it is a regex (if it contains any regex metacharacters). If not,
+  # then allow to proceed.
+  if (!fixed && contains_regex(split)) {
+    arrow_not_supported("Regular expression matching in strsplit()")
+  }
+  # warn when the user specifies both fixed = TRUE and perl = TRUE, for
+  # consistency with the behavior of base::strsplit()
+  if (fixed && perl) {
+    warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
+  }
+  # since split is not a regex, proceed without any warnings or errors
+  # regardless of the value of perl, for consistency with the behavior of
+  # base::strsplit()
+  Expression$create(
+    "split_pattern",
+    x,
+    options = list(pattern = split, reverse = FALSE, max_splits = -1L)
+  )
+}
+
+nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  if (!opts$fixed && contains_regex(opts$pattern)) {
+    arrow_not_supported("Regular expression matching in str_split()")
+  }
+  if (opts$ignore_case) {
+    arrow_not_supported("Case-insensitive string splitting")
+  }
+  if (n == 0) {
+    arrow_not_supported("Splitting strings into zero parts")
+  }
+  if (identical(n, Inf)) {
+    n <- 0L
+  }
+  if (simplify) {
+    warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
+  }
+  # The max_splits option in the Arrow C++ library controls the maximum number
+  # of places at which the string is split, whereas the argument n to
+  # str_split() controls the maximum number of pieces to return. So we must
+  # subtract 1 from n to get max_splits.
+  Expression$create(
+    "split_pattern",
+    string,
+    options = list(
+      pattern =
+      opts$pattern,
+      reverse = FALSE,
+      max_splits = n - 1L
+    )
+  )
+}
+
+# String function helpers
+
+# format `pattern` as needed for case insensitivity and literal matching by RE2
+format_string_pattern <- function(pattern, ignore.case, fixed) {
+  # Arrow lacks native support for case-insensitive literal string matching and
+  # replacement, so we use the regular expression engine (RE2) to do this.
+  # https://github.com/google/re2/wiki/Syntax
+  if (ignore.case) {
+    if (fixed) {
+      # Everything between "\Q" and "\E" is treated as literal text.
+      # If the search text contains any literal "\E" strings, make them
+      # lowercase so they won't signal the end of the literal text:
+      pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE)
+      pattern <- paste0("\\Q", pattern, "\\E")
+    }
+    # Prepend "(?i)" for case-insensitive matching
+    pattern <- paste0("(?i)", pattern)
+  }
+  pattern
+}
+
+# format `replacement` as needed for literal replacement by RE2
+format_string_replacement <- function(replacement, ignore.case, fixed) {
+  # Arrow lacks native support for case-insensitive literal string
+  # replacement, so we use the regular expression engine (RE2) to do this.
+  # https://github.com/google/re2/wiki/Syntax
+  if (ignore.case && fixed) {
+    # Escape single backslashes in the regex replacement text so they are
+    # interpreted as literal backslashes:
+    replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE)
+  }
+  replacement
+}
+
+#' Get `stringr` pattern options
+#'
+#' This function assigns definitions for the `stringr` pattern modifier
+#' functions (`fixed()`, `regex()`, etc.) inside itself, and uses them to
+#' evaluate the quoted expression `pattern`, returning a list that is used
+#' to control pattern matching behavior in internal `arrow` functions.
+#'
+#' @param pattern Unevaluated expression containing a call to a `stringr`
+#' pattern modifier function
+#'
+#' @return List containing elements `pattern`, `fixed`, and `ignore_case`
+#' @keywords internal
+get_stringr_pattern_options <- function(pattern) {
+  fixed <- function(pattern, ignore_case = FALSE, ...) {
+    check_dots(...)
+    list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case)
+  }
+  regex <- function(pattern, ignore_case = FALSE, ...) {
+    check_dots(...)
+    list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
+  }
+  coll <- function(...) {
+    arrow_not_supported("Pattern modifier `coll()`")
+  }
+  boundary <- function(...) {
+    arrow_not_supported("Pattern modifier `boundary()`")
+  }
+  check_dots <- function(...) {
+    dots <- list(...)
+    if (length(dots)) {
+      warning(
+        "Ignoring pattern modifier ",
+        ngettext(length(dots), "argument ", "arguments "),
+        "not supported in Arrow: ",
+        oxford_paste(names(dots)),
+        call. = FALSE
+      )
+    }
+  }
+  ensure_opts <- function(opts) {
+    if (is.character(opts)) {
+      opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
+    }
+    opts
+  }
+  ensure_opts(eval(pattern))
+}
+
+#' Does this string contain regex metacharacters?
+#'
+#' @param string String to be tested
+#' @keywords internal
+#' @return Logical: does `string` contain regex metacharacters?
+contains_regex <- function(string) {
+  grepl("[.\\|()[{^$*+?]", string)
+}
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
new file mode 100644
index 00000000000..d2cf79253a5
--- /dev/null
+++ b/r/R/dplyr-group-by.R
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+group_by.arrow_dplyr_query <- function(.data,
+                                       ...,
+                                       .add = FALSE,
+                                       add = .add,
+                                       .drop = dplyr::group_by_drop_default(.data)) {
+  .data <- arrow_dplyr_query(.data)
+  # ... can contain expressions (i.e. can add (or rename?) columns)
+  # Check for those (they show up as named expressions)
+  new_groups <- enquos(...)
+  new_groups <- new_groups[nzchar(names(new_groups))]
+  if (length(new_groups)) {
+    # Add them to the data
+    .data <- dplyr::mutate(.data, !!!new_groups)
+  }
+  if (".add" %in% names(formals(dplyr::group_by))) {
+    # dplyr >= 1.0
+    gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
+  } else {
+    gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
+  }
+  .data$group_by_vars <- gv
+  .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
+  .data
+}
+group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query
+
+groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
+groups.Dataset <- groups.ArrowTabular <- function(x) NULL
+
+group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
+group_vars.Dataset <- group_vars.ArrowTabular <- function(x) NULL
+
+# the logical literal in the two functions below controls the default value of
+# the .drop argument to group_by()
+group_by_drop_default.arrow_dplyr_query <-
+  function(.tbl) .tbl$drop_empty_groups %||% TRUE
+group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <-
+  function(.tbl) TRUE
+
+ungroup.arrow_dplyr_query <- function(x, ...) {
+  x$group_by_vars <- character()
+  x$drop_empty_groups <- NULL
+  x
+}
+ungroup.Dataset <- ungroup.ArrowTabular <- force
\ No newline at end of file
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
new file mode 100644
index 00000000000..8513a45f6e9
--- /dev/null
+++ b/r/R/dplyr-mutate.R
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+mutate.arrow_dplyr_query <- function(.data,
+                                     ...,
+                                     .keep = c("all", "used", "unused", "none"),
+                                     .before = NULL,
+                                     .after = NULL) {
+  call <- match.call()
+  exprs <- quos(...)
+
+  .keep <- match.arg(.keep)
+  .before <- enquo(.before)
+  .after <- enquo(.after)
+
+  if (.keep %in% c("all", "unused") && length(exprs) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+
+  .data <- arrow_dplyr_query(.data)
+
+  # Restrict the cases we support for now
+  if (length(dplyr::group_vars(.data)) > 0) {
+    # mutate() on a grouped dataset does calculations within groups
+    # This doesn't matter on scalar ops (arithmetic etc.) but it does
+    # for things with aggregations (e.g. subtracting the mean)
+    return(abandon_ship(call, .data, 'mutate() on grouped data not supported in Arrow'))
+  }
+
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+
+  mask <- arrow_mask(.data)
+  results <- list()
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    new_var <- names(exprs)[i]
+    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(results[[new_var]], "try-error")) {
+      msg <- handle_arrow_not_supported(
+        results[[new_var]],
+        as_label(exprs[[i]])
+      )
+      return(abandon_ship(call, .data, msg))
+    } else if (!inherits(results[[new_var]], "Expression") &&
+               !is.null(results[[new_var]])) {
+      # We need some wrapping to handle literal values
+      if (length(results[[new_var]]) != 1) {
+        msg <- paste0('In ', new_var, " = ", as_label(exprs[[i]]), ", only values of size one are recycled")
+        return(abandon_ship(call, .data, msg))
+      }
+      results[[new_var]] <- Expression$scalar(results[[new_var]])
+    }
+    # Put it in the data mask too
+    mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+  }
+
+  old_vars <- names(.data$selected_columns)
+  # Note that this is names(exprs) not names(results):
+  # if results$new_var is NULL, that means we are supposed to remove it
+  new_vars <- names(exprs)
+
+  # Assign the new columns into the .data$selected_columns
+  for (new_var in new_vars) {
+    .data$selected_columns[[new_var]] <- results[[new_var]]
+  }
+
+  # Deduplicate new_vars and remove NULL columns from new_vars
+  new_vars <- intersect(new_vars, names(.data$selected_columns))
+
+  # Respect .before and .after
+  if (!quo_is_null(.before) || !quo_is_null(.after)) {
+    new <- setdiff(new_vars, old_vars)
+    .data <- dplyr::relocate(.data, !!new, .before = !!.before, .after = !!.after)
+  }
+
+  # Respect .keep
+  if (.keep == "none") {
+    .data$selected_columns <- .data$selected_columns[new_vars]
+  } else if (.keep != "all") {
+    # "used" or "unused"
+    used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE)
+    if (.keep == "used") {
+      .data$selected_columns[setdiff(old_vars, used_vars)] <- NULL
+    } else {
+      # "unused"
+      .data$selected_columns[intersect(old_vars, used_vars)] <- NULL
+    }
+  }
+  # Even if "none", we still keep group vars
+  ensure_group_vars(.data)
+}
+mutate.Dataset <- mutate.ArrowTabular <- mutate.arrow_dplyr_query
+
+transmute.arrow_dplyr_query <- function(.data, ...) dplyr::mutate(.data, ..., .keep = "none")
+transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
\ No newline at end of file
diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R
new file mode 100644
index 00000000000..3730fe63fec
--- /dev/null
+++ b/r/R/dplyr-select.R
@@ -0,0 +1,120 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
+
+select.arrow_dplyr_query <- function(.data, ...) {
+  check_select_helpers(enexprs(...))
+  column_select(arrow_dplyr_query(.data), !!!enquos(...))
+}
+select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
+
+rename.arrow_dplyr_query <- function(.data, ...) {
+  check_select_helpers(enexprs(...))
+  column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename)
+}
+rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
+
+column_select <- function(.data, ..., .FUN = vars_select) {
+  # .FUN is either tidyselect::vars_select or tidyselect::vars_rename
+  # It operates on the names() of selected_columns, i.e. the column names
+  # factoring in any renaming that may already have happened
+  out <- .FUN(names(.data), !!!enquos(...))
+  # Make sure that the resulting selected columns map back to the original data,
+  # as in when there are multiple renaming steps
+  .data$selected_columns <- set_names(.data$selected_columns[out], names(out))
+
+  # If we've renamed columns, we need to project that renaming into other
+  # query parameters we've collected
+  renamed <- out[names(out) != out]
+  if (length(renamed)) {
+    # Massage group_by
+    gbv <- .data$group_by_vars
+    renamed_groups <- gbv %in% renamed
+    gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
+    .data$group_by_vars <- gbv
+    # No need to massage filters because those contain references to Arrow objects
+  }
+  .data
+}
+
+relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
+  # The code in this function is adapted from the code in dplyr::relocate.data.frame
+  # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
+  # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
+  check_select_helpers(c(enexprs(...), enexpr(.before), enexpr(.after)))
+
+  .data <- arrow_dplyr_query(.data)
+
+  to_move <- eval_select(expr(c(...)), .data$selected_columns)
+
+  .before <- enquo(.before)
+  .after <- enquo(.after)
+  has_before <- !quo_is_null(.before)
+  has_after <- !quo_is_null(.after)
+
+  if (has_before && has_after) {
+    abort("Must supply only one of `.before` and `.after`.")
+  } else if (has_before) {
+    where <- min(unname(eval_select(.before, .data$selected_columns)))
+    if (!where %in% to_move) {
+      to_move <- c(to_move, where)
+    }
+  } else if (has_after) {
+    where <- max(unname(eval_select(.after, .data$selected_columns)))
+    if (!where %in% to_move) {
+      to_move <- c(where, to_move)
+    }
+  } else {
+    where <- 1L
+    if (!where %in% to_move) {
+      to_move <- c(to_move, where)
+    }
+  }
+
+  lhs <- setdiff(seq2(1, where - 1), to_move)
+  rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
+
+  pos <- vec_unique(c(lhs, to_move, rhs))
+  new_names <- names(pos)
+  .data$selected_columns <- .data$selected_columns[pos]
+
+  if (!is.null(new_names)) {
+    names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
+  }
+  .data
+}
+relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
+
+check_select_helpers <- function(exprs) {
+  # Throw an error if unsupported tidyselect selection helpers in `exprs`
+  exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
+  unsup_select_helpers <- "where"
+  funs_in_exprs <- unlist(lapply(exprs, all_funs))
+  unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
+  if (length(unsup_funs)) {
+    stop(
+      "Unsupported selection ",
+      ngettext(length(unsup_funs), "helper: ", "helpers: "),
+      oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
+      call. = FALSE
+    )
+  }
+}
\ No newline at end of file
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
new file mode 100644
index 00000000000..ecb459c982c
--- /dev/null
+++ b/r/R/dplyr-summarize.R
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+summarise.arrow_dplyr_query <- function(.data, ...) {
+  call <- match.call()
+  .data <- arrow_dplyr_query(.data)
+  if (query_on_dataset(.data)) {
+    not_implemented_for_dataset("summarize()")
+  }
+  exprs <- quos(...)
+  # Only retain the columns we need to do our aggregations
+  vars_to_keep <- unique(c(
+    unlist(lapply(exprs, all.vars)),     # vars referenced in summarise
+    dplyr::group_vars(.data)             # vars needed for grouping
+  ))
+  .data <- dplyr::select(.data, vars_to_keep)
+  dplyr::summarise(dplyr::collect(.data), ...)
+}
+summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
\ No newline at end of file
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 264c4929f72..56be8cff1db 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -30,14 +30,19 @@ arrow_dplyr_query <- function(.data) {
   if (inherits(.data, "arrow_dplyr_query")) {
     return(.data)
   }
+
   structure(
     list(
-      .data = .data$clone(),
+      .data = if (inherits(.data, "Dataset")) {
+        .data$clone()
+      } else {
+        InMemoryDataset$create(.data)
+      },
       # selected_columns is a named list:
       # * contents are references/expressions pointing to the data
       # * names are the names they should be in the end (i.e. this
       #   records any renaming)
-      selected_columns = make_field_refs(names(.data), dataset = inherits(.data, "Dataset")),
+      selected_columns = make_field_refs(names(.data)),
       # filtered_rows will be an Expression
       filtered_rows = TRUE,
       # group_by_vars is a character vector of columns (as renamed)
@@ -58,40 +63,39 @@ arrow_dplyr_query <- function(.data) {
   )
 }
 
+make_field_refs <- function(field_names) {
+  set_names(lapply(field_names, Expression$field_ref), field_names)
+}
+
 #' @export
 print.arrow_dplyr_query <- function(x, ...) {
   schm <- x$.data$schema
-  cols <- get_field_names(x)
-  # If cols are expressions, they won't be in the schema and will be "" in cols
-  fields <- map_chr(cols, function(name) {
+  types <- map_chr(x$selected_columns, function(expr) {
+    name <- expr$field_name
     if (nzchar(name)) {
-      schm$GetFieldByName(name)$ToString()
+      # Just a field_ref, so look up in the schema
+      schm$GetFieldByName(name)$type$ToString()
     } else {
-      "expr"
+      # Expression, so get its type and append the expression
+      paste0(
+        expr$type(schm)$ToString(),
+        " (", expr$ToString(), ")"
+      )
     }
   })
-  # Strip off the field names as they are in the dataset and add the renamed ones
-  fields <- paste(names(cols), sub("^.*?: ", "", fields), sep = ": ", collapse = "\n")
+  fields <- paste(names(types), types, sep = ": ", collapse = "\n")
   cat(class(x$.data)[1], " (query)\n", sep = "")
   cat(fields, "\n", sep = "")
   cat("\n")
   if (!isTRUE(x$filtered_rows)) {
-    if (query_on_dataset(x)) {
-      filter_string <- x$filtered_rows$ToString()
-    } else {
-      filter_string <- .format_array_expression(x$filtered_rows)
-    }
+    filter_string <- x$filtered_rows$ToString()
     cat("* Filter: ", filter_string, "\n", sep = "")
   }
   if (length(x$group_by_vars)) {
     cat("* Grouped by ", paste(x$group_by_vars, collapse = ", "), "\n", sep = "")
   }
   if (length(x$arrange_vars)) {
-    if (query_on_dataset(x)) {
-      arrange_strings <- map_chr(x$arrange_vars, function(x) x$ToString())
-    } else {
-      arrange_strings <- map_chr(x$arrange_vars, .format_array_expression)
-    }
+    arrange_strings <- map_chr(x$arrange_vars, function(x) x$ToString())
     cat(
       "* Sorted by ",
       paste(
@@ -109,33 +113,6 @@ print.arrow_dplyr_query <- function(x, ...) {
   invisible(x)
 }
 
-get_field_names <- function(selected_cols) {
-  if (inherits(selected_cols, "arrow_dplyr_query")) {
-    selected_cols <- selected_cols$selected_columns
-  }
-  map_chr(selected_cols, function(x) {
-    if (inherits(x, "Expression")) {
-      out <- x$field_name
-    } else if (inherits(x, "array_expression")) {
-      out <- x$args$field_name
-    } else {
-      out <- NULL
-    }
-    # If x isn't some kind of field reference, out is NULL,
-    # but we always need to return a string
-    out %||% ""
-  })
-}
-
-make_field_refs <- function(field_names, dataset = TRUE) {
-  if (dataset) {
-    out <- lapply(field_names, Expression$field_ref)
-  } else {
-    out <- lapply(field_names, function(x) array_expression("array_ref", field_name = x))
-  }
-  set_names(out, field_names)
-}
-
 # These are the names reflecting all select/rename, not what is in Arrow
 #' @export
 names.arrow_dplyr_query <- function(x) names(x$selected_columns)
@@ -146,12 +123,8 @@ dim.arrow_dplyr_query <- function(x) {
 
   if (isTRUE(x$filtered)) {
     rows <- x$.data$num_rows
-  } else if (query_on_dataset(x)) {
-    scanner <- Scanner$create(x)
-    rows <- scanner$CountRows()
   } else {
-    # Evaluate the filter expression to a BooleanArray and count
-    rows <- as.integer(sum(eval_array_expression(x$filtered_rows, x$.data), na.rm = TRUE))
+    rows <- Scanner$create(x)$CountRows()
   }
   c(rows, cols)
 }
@@ -163,631 +136,19 @@ as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALS
 
 #' @export
 head.arrow_dplyr_query <- function(x, n = 6L, ...) {
-  if (query_on_dataset(x)) {
-    head.Dataset(x, n, ...)
-  } else {
-    out <- collect.arrow_dplyr_query(x, as_data_frame = FALSE)
-    if (inherits(out, "arrow_dplyr_query")) {
-      out$.data <- head(out$.data, n)
-    } else {
-      out <- head(out, n)
-    }
-    out
-  }
+  out <- head.Dataset(x, n, ...)
+  restore_dplyr_features(out, x)
 }
 
 #' @export
 tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
-  if (query_on_dataset(x)) {
-    tail.Dataset(x, n, ...)
-  } else {
-    out <- collect.arrow_dplyr_query(x, as_data_frame = FALSE)
-    if (inherits(out, "arrow_dplyr_query")) {
-      out$.data <- tail(out$.data, n)
-    } else {
-      out <- tail(out, n)
-    }
-    out
-  }
+  out <- tail.Dataset(x, n, ...)
+  restore_dplyr_features(out, x)
 }
 
 #' @export
-`[.arrow_dplyr_query` <- function(x, i, j, ..., drop = FALSE) {
-  if (query_on_dataset(x)) {
-    `[.Dataset`(x, i, j, ..., drop = FALSE)
-  } else {
-    stop(
-      "[ method not implemented for queries. Call 'collect(x, as_data_frame = FALSE)' first",
-      call. = FALSE
-    )
-  }
-}
-
-# The following S3 methods are registered on load if dplyr is present
-tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
-
-select.arrow_dplyr_query <- function(.data, ...) {
-  check_select_helpers(enexprs(...))
-  column_select(arrow_dplyr_query(.data), !!!enquos(...))
-}
-select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
-
-rename.arrow_dplyr_query <- function(.data, ...) {
-  check_select_helpers(enexprs(...))
-  column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename)
-}
-rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
-
-column_select <- function(.data, ..., .FUN = vars_select) {
-  # .FUN is either tidyselect::vars_select or tidyselect::vars_rename
-  # It operates on the names() of selected_columns, i.e. the column names
-  # factoring in any renaming that may already have happened
-  out <- .FUN(names(.data), !!!enquos(...))
-  # Make sure that the resulting selected columns map back to the original data,
-  # as in when there are multiple renaming steps
-  .data$selected_columns <- set_names(.data$selected_columns[out], names(out))
-
-  # If we've renamed columns, we need to project that renaming into other
-  # query parameters we've collected
-  renamed <- out[names(out) != out]
-  if (length(renamed)) {
-    # Massage group_by
-    gbv <- .data$group_by_vars
-    renamed_groups <- gbv %in% renamed
-    gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
-    .data$group_by_vars <- gbv
-    # No need to massage filters because those contain references to Arrow objects
-  }
-  .data
-}
-
-relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
-  # The code in this function is adapted from the code in dplyr::relocate.data.frame
-  # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
-  # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
-  check_select_helpers(c(enexprs(...), enexpr(.before), enexpr(.after)))
-
-  .data <- arrow_dplyr_query(.data)
-
-  to_move <- eval_select(expr(c(...)), .data$selected_columns)
-
-  .before <- enquo(.before)
-  .after <- enquo(.after)
-  has_before <- !quo_is_null(.before)
-  has_after <- !quo_is_null(.after)
-
-  if (has_before && has_after) {
-    abort("Must supply only one of `.before` and `.after`.")
-  } else if (has_before) {
-    where <- min(unname(eval_select(.before, .data$selected_columns)))
-    if (!where %in% to_move) {
-      to_move <- c(to_move, where)
-    }
-  } else if (has_after) {
-    where <- max(unname(eval_select(.after, .data$selected_columns)))
-    if (!where %in% to_move) {
-      to_move <- c(where, to_move)
-    }
-  } else {
-    where <- 1L
-    if (!where %in% to_move) {
-      to_move <- c(to_move, where)
-    }
-  }
-
-  lhs <- setdiff(seq2(1, where - 1), to_move)
-  rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
-
-  pos <- vec_unique(c(lhs, to_move, rhs))
-  new_names <- names(pos)
-  .data$selected_columns <- .data$selected_columns[pos]
-
-  if (!is.null(new_names)) {
-    names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
-  }
-  .data
-}
-relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
-
-check_select_helpers <- function(exprs) {
-  # Throw an error if unsupported tidyselect selection helpers in `exprs`
-  exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
-  unsup_select_helpers <- "where"
-  funs_in_exprs <- unlist(lapply(exprs, all_funs))
-  unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
-  if (length(unsup_funs)) {
-    stop(
-      "Unsupported selection ",
-      ngettext(length(unsup_funs), "helper: ", "helpers: "),
-      oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
-      call. = FALSE
-    )
-  }
-}
-
-filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
-  # TODO something with the .preserve argument
-  filts <- quos(...)
-  if (length(filts) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-
-  .data <- arrow_dplyr_query(.data)
-  # tidy-eval the filter expressions inside an Arrow data_mask
-  filters <- lapply(filts, arrow_eval, arrow_mask(.data))
-  bad_filters <- map_lgl(filters, ~inherits(., "try-error"))
-  if (any(bad_filters)) {
-    bads <- oxford_paste(map_chr(filts, as_label)[bad_filters], quote = FALSE)
-    if (query_on_dataset(.data)) {
-      # Abort. We don't want to auto-collect if this is a Dataset because that
-      # could blow up, too big.
-      stop(
-        "Filter expression not supported for Arrow Datasets: ", bads,
-        "\nCall collect() first to pull data into R.",
-        call. = FALSE
-      )
-    } else {
-      # TODO: only show this in some debug mode?
-      warning(
-        "Filter expression not implemented in Arrow: ", bads, "; pulling data into R",
-        immediate. = TRUE,
-        call. = FALSE
-      )
-      # Set any valid filters first, then collect and then apply the invalid ones in R
-      .data <- set_filters(.data, filters[!bad_filters])
-      return(dplyr::filter(dplyr::collect(.data), !!!filts[bad_filters]))
-    }
-  }
-
-  set_filters(.data, filters)
-}
-filter.Dataset <- filter.ArrowTabular <- filter.arrow_dplyr_query
-
-arrow_eval <- function (expr, mask) {
-  # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
-  # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
-  # .data is a Dataset).
-
-  # This yields an Expression as long as the `exprs` are implemented in Arrow.
-  # Otherwise, it returns a try-error
-  tryCatch(eval_tidy(expr, mask), error = function(e) {
-    # Look for the cases where bad input was given, i.e. this would fail
-    # in regular dplyr anyway, and let those raise those as errors;
-    # else, for things not supported by Arrow return a "try-error",
-    # which we'll handle differently
-    msg <- conditionMessage(e)
-    patterns <- dplyr_functions$i18ized_error_pattern
-    if (is.null(patterns)) {
-      patterns <- i18ize_error_messages()
-      # Memoize it
-      dplyr_functions$i18ized_error_pattern <- patterns
-    }
-    if (grepl(patterns, msg)) {
-      stop(e)
-    }
-    invisible(structure(msg, class = "try-error", condition = e))
-  })
-}
-
-i18ize_error_messages <- function() {
-  # Figure out what the error messages will be with this LANGUAGE
-  # so that we can look for them
-  out <- list(
-    obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
-    fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
-  )
-  paste(map(out, ~sub("X_____X", ".*", .)), collapse = "|")
-}
-
-# Helper to assemble the functions that go in the NSE data mask
-# The only difference between the Dataset and the Table/RecordBatch versions
-# is that they use a different wrapping function (FUN) to hold the unevaluated
-# expression.
-build_function_list <- function(FUN) {
-  wrapper <- function(operator) {
-    force(operator)
-    function(...) FUN(operator, ...)
-  }
-  all_arrow_funs <- list_compute_functions()
-
-  c(
-    # Include mappings from R function name spellings
-    lapply(set_names(names(.array_function_map)), wrapper),
-    # Plus some special handling where it's not 1:1
-    cast = function(x, target_type, safe = TRUE, ...) {
-      opts <- cast_options(safe, ...)
-      opts$to_type <- as_type(target_type)
-      FUN("cast", x, options = opts)
-    },
-    dictionary_encode = function(x, null_encoding_behavior = c("mask", "encode")) {
-      null_encoding_behavior <-
-        NullEncodingBehavior[[toupper(match.arg(null_encoding_behavior))]]
-      FUN(
-        "dictionary_encode",
-        x,
-        options = list(null_encoding_behavior = null_encoding_behavior)
-      )
-    },
-    # as.factor() is mapped in expression.R
-    as.character = function(x) {
-      FUN("cast", x, options = cast_options(to_type = string()))
-    },
-    as.double = function(x) {
-      FUN("cast", x, options = cast_options(to_type = float64()))
-    },
-    as.integer = function(x) {
-      FUN(
-        "cast",
-        x,
-        options = cast_options(
-          to_type = int32(),
-          allow_float_truncate = TRUE,
-          allow_decimal_truncate = TRUE
-        )
-      )
-    },
-    as.integer64 = function(x) {
-      FUN(
-        "cast",
-        x,
-        options = cast_options(
-          to_type = int64(),
-          allow_float_truncate = TRUE,
-          allow_decimal_truncate = TRUE
-        )
-      )
-    },
-    as.logical = function(x) {
-      FUN("cast", x, options = cast_options(to_type = boolean()))
-    },
-    as.numeric = function(x) {
-      FUN("cast", x, options = cast_options(to_type = float64()))
-    },
-    nchar = function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
-      if (allowNA) {
-        stop("allowNA = TRUE not supported for Arrow", call. = FALSE)
-      }
-      if (is.na(keepNA)) {
-        keepNA <- !identical(type, "width")
-      }
-      if (!keepNA) {
-        # TODO: I think there is a fill_null kernel we could use, set null to 2
-        stop("keepNA = TRUE not supported for Arrow", call. = FALSE)
-      }
-      if (identical(type, "bytes")) {
-        FUN("binary_length", x)
-      } else {
-        FUN("utf8_length", x)
-      }
-    },
-    str_trim = function(string, side = c("both", "left", "right")) {
-      side <- match.arg(side)
-      switch(
-        side,
-        left = FUN("utf8_ltrim_whitespace", string),
-        right = FUN("utf8_rtrim_whitespace", string),
-        both = FUN("utf8_trim_whitespace", string)
-      )
-    },
-    grepl = arrow_r_string_match_function(FUN),
-    str_detect = arrow_stringr_string_match_function(FUN),
-    sub = arrow_r_string_replace_function(FUN, 1L),
-    gsub = arrow_r_string_replace_function(FUN, -1L),
-    str_replace = arrow_stringr_string_replace_function(FUN, 1L),
-    str_replace_all = arrow_stringr_string_replace_function(FUN, -1L),
-    strsplit = arrow_r_string_split_function(FUN),
-    str_split = arrow_stringr_string_split_function(FUN),
-    between = function(x, left, right) {
-      x >= left & x <= right
-    },
-    # Now also include all available Arrow Compute functions,
-    # namespaced as arrow_fun
-    set_names(
-      lapply(all_arrow_funs, wrapper),
-      paste0("arrow_", all_arrow_funs)
-    )
-  )
-}
-
-arrow_r_string_match_function <- function(FUN) {
-  function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
-    FUN(
-      ifelse(fixed && !ignore.case, "match_substring", "match_substring_regex"),
-      x,
-      options = list(pattern = format_string_pattern(pattern, ignore.case, fixed))
-    )
-  }
-}
-
-arrow_stringr_string_match_function <- function(FUN) {
-  function(string, pattern, negate = FALSE) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    out <- arrow_r_string_match_function(FUN)(
-      pattern = opts$pattern,
-      x = string,
-      ignore.case = opts$ignore_case,
-      fixed = opts$fixed
-    )
-    if (negate) out <- FUN("invert", out)
-    out
-  }
-}
-
-arrow_r_string_replace_function <- function(FUN, max_replacements) {
-  function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
-    FUN(
-      ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
-      x,
-      options = list(
-        pattern = format_string_pattern(pattern, ignore.case, fixed),
-        replacement =  format_string_replacement(replacement, ignore.case, fixed),
-        max_replacements = max_replacements
-      )
-    )
-  }
-}
-
-arrow_stringr_string_replace_function <- function(FUN, max_replacements) {
-  function(string, pattern, replacement) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    arrow_r_string_replace_function(FUN, max_replacements)(
-      pattern = opts$pattern,
-      replacement = replacement,
-      x = string,
-      ignore.case = opts$ignore_case,
-      fixed = opts$fixed
-    )
-  }
-}
-
-arrow_r_string_split_function <- function(FUN, reverse = FALSE, max_splits = -1) {
-  function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE) {
-
-    assert_that(is.string(split))
-
-    # The Arrow C++ library does not support splitting a string by a regular
-    # expression pattern (ARROW-12608) but the default behavior of
-    # base::strsplit() is to interpret the split pattern as a regex
-    # (fixed = FALSE). R users commonly pass non-regex split patterns to
-    # strsplit() without bothering to set fixed = TRUE. It would be annoying if
-    # that didn't work here. So: if fixed = FALSE, let's check the split pattern
-    # to see if it is a regex (if it contains any regex metacharacters). If not,
-    # then allow to proceed.
-    if (!fixed && contains_regex(split)) {
-      stop("Regular expression matching not supported in strsplit for Arrow", call. = FALSE)
-    }
-    # warn when the user specifies both fixed = TRUE and perl = TRUE, for
-    # consistency with the behavior of base::strsplit()
-    if (fixed && perl) {
-      warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
-    }
-    # since split is not a regex, proceed without any warnings or errors
-    # regardless of the value of perl, for consistency with the behavior of
-    # base::strsplit()
-    FUN("split_pattern", x, options = list(pattern = split, reverse = reverse, max_splits = max_splits))
-  }
-}
-
-arrow_stringr_string_split_function <- function(FUN, reverse = FALSE) {
-  function(string, pattern, n = Inf, simplify = FALSE) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    if (!opts$fixed && contains_regex(opts$pattern)) {
-      stop("Regular expression matching not supported in str_split() for Arrow", call. = FALSE)
-    }
-    if (opts$ignore_case) {
-      stop("Case-insensitive string splitting not supported in Arrow", call. = FALSE)
-    }
-    if (n == 0) {
-      stop("Splitting strings into zero parts not supported in Arrow" , call. = FALSE)
-    }
-    if (identical(n, Inf)) {
-      n <- 0L
-    }
-    if (simplify) {
-      warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
-    }
-    # The max_splits option in the Arrow C++ library controls the maximum number
-    # of places at which the string is split, whereas the argument n to
-    # str_split() controls the maximum number of pieces to return. So we must
-    # subtract 1 from n to get max_splits.
-    FUN("split_pattern", string, options = list(pattern = opts$pattern, reverse = reverse, max_splits = n - 1L))
-  }
-}
-
-# format `pattern` as needed for case insensitivity and literal matching by RE2
-format_string_pattern <- function(pattern, ignore.case, fixed) {
-  # Arrow lacks native support for case-insensitive literal string matching and
-  # replacement, so we use the regular expression engine (RE2) to do this.
-  # https://github.com/google/re2/wiki/Syntax
-  if (ignore.case) {
-    if (fixed) {
-      # Everything between "\Q" and "\E" is treated as literal text.
-      # If the search text contains any literal "\E" strings, make them
-      # lowercase so they won't signal the end of the literal text:
-      pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE)
-      pattern <- paste0("\\Q", pattern, "\\E")
-    }
-    # Prepend "(?i)" for case-insensitive matching
-    pattern <- paste0("(?i)", pattern)
-  }
-  pattern
-}
-
-# format `replacement` as needed for literal replacement by RE2
-format_string_replacement <- function(replacement, ignore.case, fixed) {
-  # Arrow lacks native support for case-insensitive literal string
-  # replacement, so we use the regular expression engine (RE2) to do this.
-  # https://github.com/google/re2/wiki/Syntax
-  if (ignore.case && fixed) {
-    # Escape single backslashes in the regex replacement text so they are
-    # interpreted as literal backslashes:
-    replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE)
-  }
-  replacement
-}
-
-#' Get `stringr` pattern options
-#'
-#' This function assigns definitions for the `stringr` pattern modifier
-#' functions (`fixed()`, `regex()`, etc.) inside itself, and uses them to
-#' evaluate the quoted expression `pattern`, returning a list that is used
-#' to control pattern matching behavior in internal `arrow` functions.
-#'
-#' @param pattern Unevaluated expression containing a call to a `stringr`
-#' pattern modifier function
-#'
-#' @return List containing elements `pattern`, `fixed`, and `ignore_case`
-#' @keywords internal
-get_stringr_pattern_options <- function(pattern) {
-  fixed <- function(pattern, ignore_case = FALSE, ...) {
-    check_dots(...)
-    list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case)
-  }
-  regex <- function(pattern, ignore_case = FALSE, ...) {
-    check_dots(...)
-    list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
-  }
-  coll <- boundary <- function(...) {
-    stop(
-      "Pattern modifier `",
-      match.call()[[1]],
-      "()` is not supported in Arrow",
-      call. = FALSE
-    )
-  }
-  check_dots <- function(...) {
-    dots <- list(...)
-    if (length(dots)) {
-      warning(
-        "Ignoring pattern modifier ",
-        ngettext(length(dots), "argument ", "arguments "),
-        "not supported in Arrow: ",
-        oxford_paste(names(dots)),
-        call. = FALSE
-      )
-    }
-  }
-  ensure_opts <- function(opts) {
-    if (is.character(opts)) {
-      opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
-    }
-    opts
-  }
-  ensure_opts(eval(pattern))
-}
-
-# We'll populate these at package load time.
-dplyr_functions <- NULL
-init_env <- function () {
-  dplyr_functions <<- new.env(hash = TRUE)
-}
-init_env()
-
-# Create a data mask for evaluating a dplyr expression
-arrow_mask <- function(.data) {
-  if (query_on_dataset(.data)) {
-    f_env <- new_environment(dplyr_functions$dataset)
-  } else {
-    f_env <- new_environment(dplyr_functions$array)
-  }
-
-  # Add functions that need to error hard and clear.
-  # Some R functions will still try to evaluate on an Expression
-  # and return NA with a warning
-  fail <- function(...) stop("Not implemented")
-  for (f in c("mean", "sd")) {
-    f_env[[f]] <- fail
-  }
-
-  # Add the column references and make the mask
-  out <- new_data_mask(
-    new_environment(.data$selected_columns, parent = f_env),
-    f_env
-  )
-  # Then insert the data pronoun
-  # TODO: figure out what rlang::as_data_pronoun does/why we should use it
-  # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
-  out$.data <- .data$selected_columns
-  out
-}
-
-set_filters <- function(.data, expressions) {
-  if (length(expressions)) {
-    # expressions is a list of Expressions. AND them together and set them on .data
-    new_filter <- Reduce("&", expressions)
-    if (isTRUE(.data$filtered_rows)) {
-      # TRUE is default (i.e. no filter yet), so we don't need to & with it
-      .data$filtered_rows <- new_filter
-    } else {
-      .data$filtered_rows <- .data$filtered_rows & new_filter
-    }
-  }
-  .data
-}
-
-collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
-  x <- ensure_group_vars(x)
-  x <- ensure_arrange_vars(x) # this sets x$temp_columns
-  # Pull only the selected rows and cols into R
-  if (query_on_dataset(x)) {
-    # See dataset.R for Dataset and Scanner(Builder) classes
-    tab <- Scanner$create(x)$ToTable()
-  } else {
-    # This is a Table or RecordBatch
-
-    # Filter and select the data referenced in selected columns
-    if (isTRUE(x$filtered_rows)) {
-      filter <- TRUE
-    } else {
-      filter <- eval_array_expression(x$filtered_rows, x$.data)
-    }
-    # TODO: shortcut if identical(names(x$.data), find_array_refs(c(x$selected_columns, x$temp_columns)))?
-    tab <- x$.data[
-      filter,
-      find_array_refs(c(x$selected_columns, x$temp_columns)),
-      keep_na = FALSE
-    ]
-    # Now evaluate those expressions on the filtered table
-    cols <- lapply(c(x$selected_columns, x$temp_columns), eval_array_expression, data = tab)
-    if (length(cols) == 0) {
-      tab <- tab[, integer(0)]
-    } else {
-      if (inherits(x$.data, "Table")) {
-        tab <- Table$create(!!!cols)
-      } else {
-        tab <- RecordBatch$create(!!!cols)
-      }
-    }
-  }
-  # Arrange rows
-  if (length(x$arrange_vars) > 0) {
-    tab <- tab[
-      tab$SortIndices(names(x$arrange_vars), x$arrange_desc),
-      names(x$selected_columns), # this omits x$temp_columns from the result
-      drop = FALSE
-    ]
-  }
-  if (as_data_frame) {
-    df <- as.data.frame(tab)
-    tab$invalidate()
-    restore_dplyr_features(df, x)
-  } else {
-    restore_dplyr_features(tab, x)
-  }
-}
-collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
-  if (as_data_frame) {
-    as.data.frame(x, ...)
-  } else {
-    x
-  }
-}
-collect.Dataset <- function(x, ...) dplyr::collect(arrow_dplyr_query(x), ...)
-
-compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE)
-compute.ArrowTabular <- function(x, ...) x
-compute.Dataset <- compute.arrow_dplyr_query
+`[.arrow_dplyr_query` <- `[.Dataset`
+# TODO: ^ should also probably restore_dplyr_features, and/or that should be moved down
 
 ensure_group_vars <- function(x) {
   if (inherits(x, "arrow_dplyr_query")) {
@@ -797,7 +158,7 @@ ensure_group_vars <- function(x) {
       # Add them back
       x$selected_columns <- c(
         x$selected_columns,
-        make_field_refs(gv, dataset = query_on_dataset(.data))
+        make_field_refs(gv)
       )
     }
   }
@@ -822,13 +183,7 @@ restore_dplyr_features <- function(df, query) {
   # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
   # After calling collect(), make sure these features are carried over
 
-  grouped <- length(query$group_by_vars) > 0
-  renamed <- ncol(df) && !identical(names(df), names(query))
-  if (renamed) {
-    # In case variables were renamed, apply those names
-    names(df) <- names(query)
-  }
-  if (grouped) {
+  if (length(query$group_by_vars) > 0) {
     # Preserve groupings, if present
     if (is.data.frame(df)) {
       df <- dplyr::grouped_df(
@@ -846,217 +201,6 @@ restore_dplyr_features <- function(df, query) {
   df
 }
 
-pull.arrow_dplyr_query <- function(.data, var = -1) {
-  .data <- arrow_dplyr_query(.data)
-  var <- vars_pull(names(.data), !!enquo(var))
-  .data$selected_columns <- set_names(.data$selected_columns[var], var)
-  dplyr::collect(.data)[[1]]
-}
-pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
-
-summarise.arrow_dplyr_query <- function(.data, ...) {
-  call <- match.call()
-  .data <- arrow_dplyr_query(.data)
-  if (query_on_dataset(.data)) {
-    not_implemented_for_dataset("summarize()")
-  }
-  exprs <- quos(...)
-  # Only retain the columns we need to do our aggregations
-  vars_to_keep <- unique(c(
-    unlist(lapply(exprs, all.vars)), # vars referenced in summarise
-    dplyr::group_vars(.data)             # vars needed for grouping
-  ))
-  .data <- dplyr::select(.data, vars_to_keep)
-  if (isTRUE(getOption("arrow.summarize", FALSE))) {
-    # Try stuff, if successful return()
-    out <- try(do_arrow_group_by(.data, ...), silent = TRUE)
-    if (inherits(out, "try-error")) {
-      return(abandon_ship(call, .data, format(out)))
-    } else {
-      return(out)
-    }
-  } else {
-    # If unsuccessful or if option not set, do the work in R
-    dplyr::summarise(dplyr::collect(.data), ...)
-  }
-}
-summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
-
-do_arrow_group_by <- function(.data, ...) {
-  exprs <- quos(...)
-  mask <- arrow_mask(.data)
-  # Add aggregation wrappers to arrow_mask somehow
-  # (this is not ideal, would overwrite same-named objects)
-  mask$sum <- function(x, na.rm = FALSE) {
-    list(
-      fun = "sum",
-      data = x,
-      options = list(na.rm = na.rm)
-    )
-  }
-  results <- list()
-  for (i in seq_along(exprs)) {
-    # Iterate over the indices and not the names because names may be repeated
-    # (which overwrites the previous name)
-    new_var <- names(exprs)[i]
-    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
-    if (inherits(results[[new_var]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
-      stop(msg, call. = FALSE)
-    }
-    # Put it in the data mask too?
-    #mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
-  }
-  # Now, from that, split out the array (expressions) and options
-  opts <- lapply(results, function(x) x[c("fun", "options")])
-  inputs <- lapply(results, function(x) eval_array_expression(x$data, .data$.data))
-  grouping_vars <- lapply(.data$group_by_vars, function(x) eval_array_expression(.data$selected_columns[[x]], .data$.data))
-  compute__GroupBy(inputs, grouping_vars, opts)
-}
-
-group_by.arrow_dplyr_query <- function(.data,
-                                       ...,
-                                       .add = FALSE,
-                                       add = .add,
-                                       .drop = dplyr::group_by_drop_default(.data)) {
-  .data <- arrow_dplyr_query(.data)
-  # ... can contain expressions (i.e. can add (or rename?) columns)
-  # Check for those (they show up as named expressions)
-  new_groups <- enquos(...)
-  new_groups <- new_groups[nzchar(names(new_groups))]
-  if (length(new_groups)) {
-    # Add them to the data
-    .data <- dplyr::mutate(.data, !!!new_groups)
-  }
-  if (".add" %in% names(formals(dplyr::group_by))) {
-    # dplyr >= 1.0
-    gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
-  } else {
-    gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
-  }
-  .data$group_by_vars <- gv
-  .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
-  .data
-}
-group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query
-
-groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
-groups.Dataset <- groups.ArrowTabular <- function(x) NULL
-
-group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
-group_vars.Dataset <- group_vars.ArrowTabular <- function(x) NULL
-
-# the logical literal in the two functions below controls the default value of
-# the .drop argument to group_by()
-group_by_drop_default.arrow_dplyr_query <-
-  function(.tbl) .tbl$drop_empty_groups %||% TRUE
-group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <-
-  function(.tbl) TRUE
-
-ungroup.arrow_dplyr_query <- function(x, ...) {
-  x$group_by_vars <- character()
-  x$drop_empty_groups <- NULL
-  x
-}
-ungroup.Dataset <- ungroup.ArrowTabular <- force
-
-mutate.arrow_dplyr_query <- function(.data,
-                                     ...,
-                                     .keep = c("all", "used", "unused", "none"),
-                                     .before = NULL,
-                                     .after = NULL) {
-  call <- match.call()
-  exprs <- quos(...)
-
-  .keep <- match.arg(.keep)
-  .before <- enquo(.before)
-  .after <- enquo(.after)
-
-  if (.keep %in% c("all", "unused") && length(exprs) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-
-  .data <- arrow_dplyr_query(.data)
-
-  # Restrict the cases we support for now
-  if (length(dplyr::group_vars(.data)) > 0) {
-    # mutate() on a grouped dataset does calculations within groups
-    # This doesn't matter on scalar ops (arithmetic etc.) but it does
-    # for things with aggregations (e.g. subtracting the mean)
-    return(abandon_ship(call, .data, 'mutate() on grouped data not supported in Arrow'))
-  }
-
-  # Check for unnamed expressions and fix if any
-  unnamed <- !nzchar(names(exprs))
-  # Deparse and take the first element in case they're long expressions
-  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
-
-  is_dataset <- query_on_dataset(.data)
-  mask <- arrow_mask(.data)
-  results <- list()
-  for (i in seq_along(exprs)) {
-    # Iterate over the indices and not the names because names may be repeated
-    # (which overwrites the previous name)
-    new_var <- names(exprs)[i]
-    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
-    if (inherits(results[[new_var]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
-      return(abandon_ship(call, .data, msg))
-    } else if (is_dataset &&
-               !inherits(results[[new_var]], "Expression") &&
-               !is.null(results[[new_var]])) {
-      # We need some wrapping to handle literal values
-      if (length(results[[new_var]]) != 1) {
-        msg <- paste0('In ', new_var, " = ", as_label(exprs[[i]]), ", only values of size one are recycled")
-        return(abandon_ship(call, .data, msg))
-      }
-      results[[new_var]] <- Expression$scalar(results[[new_var]])
-    }
-    # Put it in the data mask too
-    mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
-  }
-
-  old_vars <- names(.data$selected_columns)
-  # Note that this is names(exprs) not names(results):
-  # if results$new_var is NULL, that means we are supposed to remove it
-  new_vars <- names(exprs)
-
-  # Assign the new columns into the .data$selected_columns
-  for (new_var in new_vars) {
-    .data$selected_columns[[new_var]] <- results[[new_var]]
-  }
-
-  # Deduplicate new_vars and remove NULL columns from new_vars
-  new_vars <- intersect(new_vars, names(.data$selected_columns))
-
-  # Respect .before and .after
-  if (!quo_is_null(.before) || !quo_is_null(.after)) {
-    new <- setdiff(new_vars, old_vars)
-    .data <- dplyr::relocate(.data, !!new, .before = !!.before, .after = !!.after)
-  }
-
-  # Respect .keep
-  if (.keep == "none") {
-    .data$selected_columns <- .data$selected_columns[new_vars]
-  } else if (.keep != "all") {
-    # "used" or "unused"
-    used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE)
-    if (.keep == "used") {
-      .data$selected_columns[setdiff(old_vars, used_vars)] <- NULL
-    } else {
-      # "unused"
-      .data$selected_columns[intersect(old_vars, used_vars)] <- NULL
-    }
-  }
-  # Even if "none", we still keep group vars
-  ensure_group_vars(.data)
-}
-mutate.Dataset <- mutate.ArrowTabular <- mutate.arrow_dplyr_query
-
-transmute.arrow_dplyr_query <- function(.data, ...) dplyr::mutate(.data, ..., .keep = "none")
-transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
-
 # Helper to handle unsupported dplyr features
 # * For Table/RecordBatch, we collect() and then call the dplyr method in R
 # * For Dataset, we just error
@@ -1079,81 +223,7 @@ abandon_ship <- function(call, .data, msg = NULL) {
   eval.parent(call, 2)
 }
 
-arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
-  call <- match.call()
-  exprs <- quos(...)
-  if (.by_group) {
-    # when the data is is grouped and .by_group is TRUE, order the result by
-    # the grouping columns first
-    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
-  }
-  if (length(exprs) == 0) {
-    # Nothing to do
-    return(.data)
-  }
-  .data <- arrow_dplyr_query(.data)
-  # find and remove any dplyr::desc() and tidy-eval
-  # the arrange expressions inside an Arrow data_mask
-  sorts <- vector("list", length(exprs))
-  descs <- logical(0)
-  mask <- arrow_mask(.data)
-  for (i in seq_along(exprs)) {
-    x <- find_and_remove_desc(exprs[[i]])
-    exprs[[i]] <- x[["quos"]]
-    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
-    if (inherits(sorts[[i]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
-      return(abandon_ship(call, .data, msg))
-    }
-    names(sorts)[i] <- as_label(exprs[[i]])
-    descs[i] <- x[["desc"]]
-  }
-  .data$arrange_vars <- c(sorts, .data$arrange_vars)
-  .data$arrange_desc <- c(descs, .data$arrange_desc)
-  .data
-}
-arrange.Dataset <- arrange.ArrowTabular <- arrange.arrow_dplyr_query
-
-# Helper to handle desc() in arrange()
-# * Takes a quosure as input
-# * Returns a list with two elements:
-#   1. The quosure with any wrapping parentheses and desc() removed
-#   2. A logical value indicating whether desc() was found
-# * Performs some other validation
-find_and_remove_desc <- function(quosure) {
-  expr <- quo_get_expr(quosure)
-  descending <- FALSE
-  if (length(all.vars(expr)) < 1L) {
-    stop(
-      "Expression in arrange() does not contain any field names: ",
-      deparse(expr),
-      call. = FALSE
-    )
-  }
-  # Use a while loop to remove any number of nested pairs of enclosing
-  # parentheses and any number of nested desc() calls. In the case of multiple
-  # nested desc() calls, each one toggles the sort order.
-  while (identical(typeof(expr), "language") && is.call(expr)) {
-    if (identical(expr[[1]], quote(`(`))) {
-      # remove enclosing parentheses
-      expr <- expr[[2]]
-    } else if (identical(expr[[1]], quote(desc))) {
-      # remove desc() and toggle descending
-      expr <- expr[[2]]
-      descending <- !descending
-    } else {
-      break
-    }
-  }
-  return(
-    list(
-      quos = quo_set_expr(quosure, expr),
-      desc = descending
-    )
-  )
-}
-
-query_on_dataset <- function(x) inherits(x$.data, "Dataset")
+query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
 
 not_implemented_for_dataset <- function(method) {
   stop(
@@ -1162,12 +232,3 @@ not_implemented_for_dataset <- function(method) {
     call. = FALSE
   )
 }
-
-#' Does this string contain regex metacharacters?
-#'
-#' @param string String to be tested
-#' @keywords internal
-#' @return Logical: does `string` contain regex metacharacters?
-contains_regex <- function(string) {
-  grepl("[.\\|()[{^$*+?]", string)
-}
diff --git a/r/R/expression.R b/r/R/expression.R
index 30eb0906d43..3b24b09bb8b 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -17,100 +17,6 @@
 
 #' @include arrowExports.R
 
-array_expression <- function(FUN,
-                             ...,
-                             args = list(...),
-                             options = empty_named_list()) {
-  structure(
-    list(
-      fun = FUN,
-      args = args,
-      options = options
-    ),
-    class = "array_expression"
-  )
-}
-
-#' @export
-Ops.ArrowDatum <- function(e1, e2) {
-  if (.Generic == "!") {
-    eval_array_expression(build_array_expression(.Generic, e1))
-  } else if (.Generic %in% names(.array_function_map)) {
-    eval_array_expression(build_array_expression(.Generic, e1, e2))
-  } else {
-    stop(paste0("Unsupported operation on `", class(e1)[1L], "` : "), .Generic, call. = FALSE)
-  }
-}
-
-#' @export
-Ops.array_expression <- function(e1, e2) {
-  if (.Generic == "!") {
-    build_array_expression(.Generic, e1)
-  } else {
-    build_array_expression(.Generic, e1, e2)
-  }
-}
-
-build_array_expression <- function(FUN,
-                                   ...,
-                                   args = list(...),
-                                   options = empty_named_list()) {
-  if (FUN == "-" && length(args) == 1L) {
-    if (inherits(args[[1]], c("ArrowObject", "array_expression"))) {
-      return(build_array_expression("negate_checked", args[[1]]))
-    } else {
-      return(-args[[1]])
-    }
-  }
-  args <- lapply(args, .wrap_arrow, FUN)
-
-  # In Arrow, "divide" is one function, which does integer division on
-  # integer inputs and floating-point division on floats
-  if (FUN == "/") {
-    # TODO: omg so many ways it's wrong to assume these types
-    args <- lapply(args, cast_array_expression, float64())
-  } else if (FUN == "%/%") {
-    # In R, integer division works like floor(float division)
-    out <- build_array_expression("/", args = args, options = options)
-    return(cast_array_expression(out, int32(), allow_float_truncate = TRUE))
-  } else if (FUN == "%%") {
-    # {e1 - e2 * ( e1 %/% e2 )}
-    # ^^^ form doesn't work because Ops.Array evaluates eagerly,
-    # but we can build that up
-    quotient <- build_array_expression("%/%", args = args)
-    base <- build_array_expression("*", quotient, args[[2]])
-    # this cast is to ensure that the result of this and e1 are the same
-    # (autocasting only applies to scalars)
-    base <- cast_array_expression(base, args[[1]]$type)
-    return(build_array_expression("-", args[[1]], base))
-  }
-
-  array_expression(.array_function_map[[FUN]] %||% FUN, args = args, options = options)
-}
-
-cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
-  opts <- list(
-    to_type = to_type,
-    allow_int_overflow = !safe,
-    allow_time_truncate = !safe,
-    allow_float_truncate = !safe
-  )
-  array_expression("cast", x, options = modifyList(opts, list(...)))
-}
-
-.wrap_arrow <- function(arg, fun) {
-  if (!inherits(arg, c("ArrowObject", "array_expression"))) {
-    # TODO: Array$create if lengths are equal?
-    # TODO: these kernels should autocast like the dataset ones do (e.g. int vs. float)
-    if (fun == "%in%") {
-      arg <- Array$create(arg)
-    } else {
-      arg <- Scalar$create(arg)
-    }
-  }
-  arg
-}
-
 .unary_function_map <- list(
   "!" = "invert",
   "as.factor" = "dictionary_encode",
@@ -150,86 +56,6 @@ cast_array_expression <- function(x, to_type, safe = TRUE, ...) {
 
 .array_function_map <- c(.unary_function_map, .binary_function_map)
 
-eval_array_expression <- function(x, data = NULL) {
-  if (!is.null(data)) {
-    x <- bind_array_refs(x, data)
-  }
-  if (!inherits(x, "array_expression")) {
-    # Nothing to evaluate
-    return(x)
-  }
-  x$args <- lapply(x$args, function (a) {
-    if (inherits(a, "array_expression")) {
-      eval_array_expression(a)
-    } else {
-      a
-    }
-  })
-  if (x$fun == "is_in_meta_binary" && inherits(x$args[[2]], "Scalar")) {
-    x$args[[2]] <- Array$create(x$args[[2]])
-  }
-  call_function(x$fun, args = x$args, options = x$options %||% empty_named_list())
-}
-
-find_array_refs <- function(x) {
-  if (identical(x$fun, "array_ref")) {
-    out <- x$args$field_name
-  } else {
-    out <- lapply(x$args, find_array_refs)
-  }
-  unlist(out)
-}
-
-# Take an array_expression and replace array_refs with arrays/chunkedarrays from data
-bind_array_refs <- function(x, data) {
-  if (inherits(x, "array_expression")) {
-    if (identical(x$fun, "array_ref")) {
-      x <- data[[x$args$field_name]]
-    } else {
-      x$args <- lapply(x$args, bind_array_refs, data)
-    }
-  }
-  x
-}
-
-#' @export
-is.na.array_expression <- function(x) array_expression("is.na", x)
-
-#' @export
-as.vector.array_expression <- function(x, ...) {
-  as.vector(eval_array_expression(x))
-}
-
-#' @export
-print.array_expression <- function(x, ...) {
-  cat(.format_array_expression(x), "\n", sep = "")
-  invisible(x)
-}
-
-.format_array_expression <- function(x) {
-  printed_args <- map_chr(x$args, function(arg) {
-    if (inherits(arg, "Scalar")) {
-      deparse(as.vector(arg))
-    } else if (inherits(arg, "ArrowObject")) {
-      paste0("<", class(arg)[1], ">")
-    } else if (inherits(arg, "array_expression")) {
-      .format_array_expression(arg)
-    } else {
-      # Should not happen
-      deparse(arg)
-    }
-  })
-  if (identical(x$fun, "array_ref")) {
-    x$args$field_name
-  } else {
-    # Prune this for readability
-    function_name <- sub("_kleene", "", x$fun)
-    paste0(function_name, "(", paste(printed_args, collapse = ", "), ")")
-  }
-}
-
-###########
-
 #' Arrow expressions
 #'
 #' @description
@@ -250,6 +76,7 @@ print.array_expression <- function(x, ...) {
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
     ToString = function() compute___expr__ToString(self),
+    type = function(schema) compute___expr__type(self, schema),
     cast = function(to_type, safe = TRUE, ...) {
       opts <- list(
         to_type = to_type,
@@ -279,13 +106,16 @@ Expression$scalar <- function(x) {
   compute___expr__scalar(Scalar$create(x))
 }
 
-build_dataset_expression <- function(FUN,
-                                     ...,
-                                     args = list(...),
-                                     options = empty_named_list()) {
+# Wrapper around Expression$create that:
+# (1) maps R function names to Arrow C++ compute ("/" --> "divide_checked")
+# (2) wraps R input args as Array or Scalar
+build_expr <- function(FUN,
+                       ...,
+                       args = list(...),
+                       options = empty_named_list()) {
   if (FUN == "-" && length(args) == 1L) {
     if (inherits(args[[1]], c("ArrowObject", "Expression"))) {
-      return(build_dataset_expression("negate_checked", args[[1]]))
+      return(build_expr("negate_checked", args[[1]]))
     } else {
       return(-args[[1]])
     }
@@ -315,7 +145,7 @@ build_dataset_expression <- function(FUN,
       args <- lapply(args, function(x) x$cast(float64()))
     } else if (FUN == "%/%") {
       # In R, integer division works like floor(float division)
-      out <- build_dataset_expression("/", args = args)
+      out <- build_expr("/", args = args)
       return(out$cast(int32(), allow_float_truncate = TRUE))
     } else if (FUN == "%%") {
       return(args[[1]] - args[[2]] * ( args[[1]] %/% args[[2]] ))
@@ -329,9 +159,9 @@ build_dataset_expression <- function(FUN,
 #' @export
 Ops.Expression <- function(e1, e2) {
   if (.Generic == "!") {
-    build_dataset_expression(.Generic, e1)
+    build_expr(.Generic, e1)
   } else {
-    build_dataset_expression(.Generic, e1, e2)
+    build_expr(.Generic, e1, e2)
   }
 }
 
diff --git a/r/man/contains_regex.Rd b/r/man/contains_regex.Rd
index d8fee96d99b..f05f11d0279 100644
--- a/r/man/contains_regex.Rd
+++ b/r/man/contains_regex.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dplyr.R
+% Please edit documentation in R/dplyr-functions.R
 \name{contains_regex}
 \alias{contains_regex}
 \title{Does this string contain regex metacharacters?}
diff --git a/r/man/get_stringr_pattern_options.Rd b/r/man/get_stringr_pattern_options.Rd
index 79a9a72b7cf..7107b906024 100644
--- a/r/man/get_stringr_pattern_options.Rd
+++ b/r/man/get_stringr_pattern_options.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dplyr.R
+% Please edit documentation in R/dplyr-functions.R
 \name{get_stringr_pattern_options}
 \alias{get_stringr_pattern_options}
 \title{Get \code{stringr} pattern options}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index b274ac5f3af..9d75b2da54e 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3097,6 +3097,22 @@ extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){
 }
 #endif
 
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::DataType> compute___expr__type(const std::shared_ptr<compute::Expression>& x, const std::shared_ptr<arrow::Schema>& schema);
+extern "C" SEXP _arrow_compute___expr__type(SEXP x_sexp, SEXP schema_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schema(schema_sexp);
+	return cpp11::as_sexp(compute___expr__type(x, schema));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__type(SEXP x_sexp, SEXP schema_sexp){
+	Rf_error("Cannot call compute___expr__type(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // feather.cpp
 #if defined(ARROW_R_WITH_ARROW)
 void ipc___WriteFeather__Table(const std::shared_ptr<arrow::io::OutputStream>& stream, const std::shared_ptr<arrow::Table>& table, int version, int chunk_size, arrow::Compression::type compression, int compression_level);
@@ -6884,6 +6900,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
 		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
 		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
+		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, 
 		{ "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, 
 		{ "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, 
 		{ "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, 
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index 798853edd72..d8745ade479 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -68,4 +68,12 @@ std::string compute___expr__ToString(const std::shared_ptr<compute::Expression>&
   return x->ToString();
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::DataType> compute___expr__type(
+    const std::shared_ptr<compute::Expression>& x,
+    const std::shared_ptr<arrow::Schema>& schema) {
+  auto bound = ValueOrStop(x->Bind(*schema));
+  return bound.type();
+}
+
 #endif
diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R
index 89d9bf07ee6..0abbfb6a13a 100644
--- a/r/tests/testthat/helper-arrow.R
+++ b/r/tests/testthat/helper-arrow.R
@@ -47,7 +47,7 @@ with_language <- function(lang, expr) {
   Sys.setenv(LANGUAGE = lang)
   on.exit({
     Sys.setenv(LANGUAGE = old)
-    dplyr_functions$i18ized_error_pattern <<- NULL
+    .cache$i18ized_error_pattern <<- NULL
   })
   if (!identical(before, i18ize_error_messages())) {
     skip(paste("This OS either does not support changing languages to", lang, "or it caches translations"))
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index d60ed4fbaba..eef79100950 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -500,13 +500,14 @@ test_that("Handling string data with embedded nuls", {
 })
 
 test_that("ARROW-11769 - grouping preserved in record batch creation", {
-  
+  skip_if_not_available("dataset")
+
   tbl <- tibble::tibble(
     int = 1:10,
     fct = factor(rep(c("A", "B"), 5)),
     fct2 = factor(rep(c("C", "D"), each = 5)),
   )
-  
+
   expect_identical(
     tbl %>%
       dplyr::group_by(fct, fct2) %>%
@@ -514,5 +515,5 @@ test_that("ARROW-11769 - grouping preserved in record batch creation", {
       dplyr::group_vars(),
     c("fct", "fct2")
   )
-  
+
 })
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index b88b1ba65e3..ba41c2be705 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -476,13 +476,14 @@ test_that("Table$create() with different length columns", {
 })
 
 test_that("ARROW-11769 - grouping preserved in table creation", {
-  
+  skip_if_not_available("dataset")
+
   tbl <- tibble::tibble(
     int = 1:10,
     fct = factor(rep(c("A", "B"), 5)),
     fct2 = factor(rep(c("C", "D"), each = 5)),
   )
-  
+
   expect_identical(
     tbl %>%
       dplyr::group_by(fct, fct2) %>%
@@ -490,5 +491,5 @@ test_that("ARROW-11769 - grouping preserved in table creation", {
       dplyr::group_vars(),
     c("fct", "fct2")
   )
-  
+
 })
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 0b6d8e8dd17..2586ba865b3 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -111,6 +111,7 @@ test_that("Power", {
 test_that("Dates casting", {
   a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
 
-  skip("autocasting should happen in compute kernels; R workaround fails on this ARROW-8919")
+  skip("ARROW-11090 (date/datetime arithmetic)")
+  # Error: NotImplemented: Function add_checked has no kernel matching input types (array[date32[day]], scalar[double])
   expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
 })
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index ba38d4ce37e..63977b55414 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -138,28 +138,21 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
 })
 
 test_that("Table$SortIndices()", {
+  x <- Table$create(tbl)
   expect_identical(
-    {
-      x <- tbl %>% Table$create()
-      x$Take(x$SortIndices("chr")) %>% pull(chr)
-    },
+    as.vector(x$Take(x$SortIndices("chr"))$chr),
     sort(tbl$chr, na.last = TRUE)
   )
   expect_identical(
-    {
-      x <- tbl %>% Table$create()
-      x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE))) %>% collect()
-    },
+    as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
     tbl %>% arrange(int, dbl)
   )
 })
 
 test_that("RecordBatch$SortIndices()", {
+  x <- record_batch(tbl)
   expect_identical(
-    {
-      x <- tbl %>% record_batch()
-      x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE)) %>% collect()
-    },
+    as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
     tbl %>% arrange(desc(chr), desc(int), desc(dbl))
   )
 })
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 1b0dcc07128..334ff6d06f7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -613,22 +613,6 @@ test_that("Creating UnionDataset", {
   expect_error(c(ds1, 42), "character")
 })
 
-test_that("InMemoryDataset", {
-  ds <- InMemoryDataset$create(rbind(df1, df2))
-  expect_r6_class(ds, "InMemoryDataset")
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl > 7 & dbl < 53L) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
-})
-
 test_that("map_batches", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = "part")
@@ -647,18 +631,6 @@ test_that("partitioning = NULL to ignore partition information (but why?)", {
   expect_identical(names(ds), names(df1)) # i.e. not c(names(df1), "group", "other")
 })
 
-test_that("filter() with is.na()", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(part, lgl) %>%
-      filter(!is.na(lgl), part == 1) %>%
-      collect(),
-    tibble(part = 1L, lgl = df1$lgl[!is.na(df1$lgl)])
-  )
-})
-
 test_that("filter() with is.nan()", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
@@ -693,103 +665,6 @@ test_that("filter() with %in%", {
   )
 })
 
-test_that("filter() with negative scalar", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      filter(part == 1) %>%
-      select(chr, int) %>%
-      filter(int > -2) %>%
-      collect(),
-    df1[, c("chr", "int")]
-  )
-
-  expect_equivalent(
-    ds %>%
-      filter(part == 1) %>%
-      select(chr, int) %>%
-      filter(int %in% -2) %>%
-      collect(),
-    df1[FALSE, c("chr", "int")]
-  )
-
-  expect_equivalent(
-    ds %>%
-      filter(part == 1) %>%
-      select(chr, int) %>%
-      filter(-int < -2) %>%
-      collect(),
-    df1[df1$int > 2, c("chr", "int")]
-  )
-})
-
-test_that("filter() with strings", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(chr, part) %>%
-      filter(chr == "b", part == 1) %>%
-      collect(),
-    tibble(chr = "b", part = 1)
-  )
-
-  skip_if_not_available("utf8proc")
-  expect_equivalent(
-    ds %>%
-      select(chr, part) %>%
-      filter(toupper(chr) == "B", part == 1) %>%
-      collect(),
-    tibble(chr = "b", part = 1)
-  )
-})
-
-test_that("filter() with arrow compute functions by name", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(part, lgl) %>%
-      filter(arrow_is_valid(lgl), arrow_equal(part, 1)) %>%
-      collect(),
-    ds %>%
-       select(part, lgl) %>%
-       filter(!is.na(lgl), part == 1L) %>%
-       collect()
-  )
-})
-
-test_that("filter() with .data", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_equivalent(
-    ds %>%
-      select(.data$int, .data$part) %>%
-      filter(.data$int == 3, .data$part == 1) %>%
-      collect(),
-    tibble(int = df1$int[3], part = 1)
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(.data$int, .data$part) %>%
-      filter(.data$int %in% c(6, 4, 3, 103, 107), .data$part == 1) %>%
-      collect(),
-    tibble(int = df1$int[c(3, 4, 6)], part = 1)
-  )
-
-  # and the .env pronoun too!
-  chr <- 1
-  expect_equivalent(
-    ds %>%
-      select(.data$int, .data$part) %>%
-      filter(.data$int %in% c(6, 4, 3, 103, 107), .data$part == .env$chr) %>%
-      collect(),
-    tibble(int = df1$int[c(3, 4, 6)], part = 1)
-  )
-})
-
 test_that("filter() on timestamp columns", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
@@ -849,109 +724,6 @@ test_that("filter() on date32 columns", {
   )
 })
 
-test_that("filter() with expressions", {
-  skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  expect_r6_class(ds$format, "ParquetFileFormat")
-  expect_r6_class(ds$filesystem, "LocalFileSystem")
-  expect_r6_class(ds, "Dataset")
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
-
-  # check division's special casing.
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl) %>%
-      filter(dbl / 2 > 3.5 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl")],
-      df2[1:2, c("chr", "dbl")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %/% 2L > 3 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl", "int")],
-      df2[1:2, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %/% 2 > 3 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl", "int")],
-      df2[1:2, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %% 2L > 0 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
-      df2[1, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %% 2L > 0 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
-      df2[1, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(int %% 2 > 0 & dbl < 53) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[c(1, 3, 5, 7, 9), c("chr", "dbl", "int")],
-      df2[1, c("chr", "dbl", "int")]
-    )
-  )
-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(dbl + int > 15 & dbl < 53L) %>%
-      collect() %>%
-      arrange(dbl),
-    rbind(
-      df1[8:10, c("chr", "dbl", "int")],
-      df2[1:2, c("chr", "dbl", "int")]
-    )
-  )
-})
 
 test_that("mutate()", {
   ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
@@ -965,7 +737,7 @@ test_that("mutate()", {
 chr: string
 dbl: double
 int: int32
-twice: expr
+twice: double (multiply_checked(int, 2))
 
 * Filter: ((multiply_checked(dbl, 2) > 14) and (subtract_checked(dbl, 50) < 3))
 See $.data for the source Arrow object",
@@ -985,26 +757,6 @@ See $.data for the source Arrow object",
   )
 })
 
-test_that("transmute()", {
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
-  mutated <-
-  expect_equivalent(
-    ds %>%
-      select(chr, dbl, int) %>%
-      filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
-      transmute(twice = int * 2) %>%
-      collect() %>%
-      arrange(twice),
-    rbind(
-      df1[8:10, "int", drop = FALSE],
-      df2[1:2, "int", drop = FALSE]
-    ) %>%
-      transmute(
-        twice = int * 2
-      )
-  )
-})
-
 test_that("mutate() features not yet implemented", {
   expect_error(
     ds %>%
@@ -1015,66 +767,6 @@ test_that("mutate() features not yet implemented", {
   )
 })
 
-
-test_that("mutate() with scalar (length 1) literal inputs", {
-  expect_equal(
-    ds %>%
-      mutate(the_answer = 42) %>%
-      collect() %>%
-      pull(the_answer),
-    rep(42, nrow(ds))
-  )
-
-  expect_error(
-    ds %>% mutate(the_answer = c(42, 42)),
-    "In the_answer = c(42, 42), only values of size one are recycled\nCall collect() first to pull data into R.",
-    fixed = TRUE
-  )
-})
-
-test_that("mutate() with NULL inputs", {
-  expect_equal(
-    ds %>%
-      mutate(int = NULL) %>%
-      collect(),
-    ds %>%
-      select(-int) %>%
-      collect()
-  )
-})
-
-test_that("empty mutate()", {
-  expect_equal(
-    ds %>%
-      mutate() %>%
-      collect(),
-    ds %>%
-      collect()
-  )
-})
-
-test_that("transmute() with NULL inputs", {
-  expect_equal(
-    ds %>%
-      transmute(int = NULL) %>%
-      collect(),
-    ds %>%
-      select() %>%
-      collect()
-  )
-})
-
-test_that("empty transmute()", {
-  expect_equal(
-    ds %>%
-      transmute() %>%
-      collect(),
-    ds %>%
-      select() %>%
-      collect()
-  )
-})
-
 test_that("filter scalar validation doesn't crash (ARROW-7772)", {
   expect_error(
     ds %>%
@@ -1120,7 +812,7 @@ test_that("arrange()", {
 chr: string
 dbl: double
 int: int32
-twice: expr
+twice: double (multiply_checked(int, 2))
 
 * Filter: ((multiply_checked(dbl, 2) > 14) and (subtract_checked(dbl, 50) < 3))
 * Sorted by chr [asc], multiply_checked(int, 2) [desc], add_checked(dbl, int) [asc]
@@ -1189,8 +881,8 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
   # the group_by() prevents compute() from returning a Table...
   expect_is(tab5, "arrow_dplyr_query")
 
-  # ... but $.data is a Table...
-  expect_is(tab5$.data, "Table")
+  # ... but $.data is a Table (InMemoryDataset)...
+  expect_r6_class(tab5$.data, "InMemoryDataset")
   # ... and the mutate() was evaluated
   expect_true("negint" %in% names(tab5$.data))
 
@@ -1549,17 +1241,17 @@ test_that("Dataset writing: dplyr methods", {
   expect_true(dir.exists(dst_dir))
   expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
 
-  # select to specify schema
+  # select to specify schema (and rename)
   dst_dir2 <- tempfile()
   ds %>%
     group_by(int) %>%
-    select(chr, dbl) %>%
+    select(chr, dubs = dbl) %>%
     write_dataset(dst_dir2, format = "feather")
   new_ds <- open_dataset(dst_dir2, format = "feather")
 
   expect_equivalent(
     collect(new_ds) %>% arrange(int),
-    rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")])
+    rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")]) %>% rename(dubs = dbl)
   )
 
   # filter to restrict written rows
@@ -1573,6 +1265,19 @@ test_that("Dataset writing: dplyr methods", {
     new_ds %>% select(names(df1)) %>% collect(),
     df1 %>% filter(int == 4)
   )
+
+  # mutate
+  dst_dir3 <- tempfile()
+  ds %>%
+    filter(int == 4) %>%
+    mutate(twice = int * 2) %>%
+    write_dataset(dst_dir3, format = "feather")
+  new_ds <- open_dataset(dst_dir3, format = "feather")
+
+  expect_equivalent(
+    new_ds %>% select(c(names(df1), "twice")) %>% collect(),
+    df1 %>% filter(int == 4) %>% mutate(twice = int * 2)
+  )
 })
 
 test_that("Dataset writing: non-hive", {
@@ -1750,10 +1455,6 @@ test_that("Dataset writing: unsupported features/input validation", {
   expect_error(write_dataset(4), 'dataset must be a "Dataset"')
 
   ds <- open_dataset(hive_dir)
-  expect_error(
-    select(ds, integer = int) %>% write_dataset(ds),
-    "Renaming columns when writing a dataset is not yet supported"
-  )
   expect_error(
     write_dataset(ds, partitioning = c("int", "NOTACOLUMN"), format = "ipc"),
     'Invalid field name: "NOTACOLUMN"'
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index b476c032945..45cd687e848 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 
 # randomize order of rows in test data
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index d1bd3cec607..6bba58a7e06 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -193,7 +195,6 @@ test_that("Negative scalar values", {
   )
 })
 
-
 test_that("filter() with between()", {
   expect_dplyr_equal(
     input %>%
@@ -243,34 +244,6 @@ test_that("filter() with between()", {
 
 test_that("filter() with string ops", {
   skip_if_not_available("utf8proc")
-  skip_if(getRversion() < "3.4.0", "R < 3.4")
-  # Extra instrumentation to ensure that we're calling Arrow compute here
-  # because many base R string functions implicitly call as.character,
-  # which means they still work on Arrays but actually force data into R
-  # 1) wrapper that raises a warning if as.character is called. Can't wrap
-  #    the whole test because as.character apparently gets called in other
-  #    (presumably legitimate) places
-  # 2) Wrap the test in expect_warning(expr, NA) to catch the warning
-  with_no_as_character <- function(expr) {
-    trace(
-      "as.character",
-      tracer = quote(warning("as.character was called")),
-      print = FALSE,
-      where = toupper
-    )
-    on.exit(untrace("as.character", where = toupper))
-    force(expr)
-  }
-
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        filter(dbl > 2, with_no_as_character(toupper(chr)) %in% c("D", "F")) %>%
-        collect(),
-      tbl
-    ),
-  NA)
-
   expect_dplyr_equal(
     input %>%
       filter(dbl > 2, str_length(verses) > 25) %>%
@@ -303,9 +276,9 @@ test_that("filter environment scope", {
 
   skip("Need to substitute in user defined function too")
   # TODO: fix this: this isEqualTo function is eagerly evaluating; it should
-  # instead yield array_expressions. Probably bc the parent env of the function
-  # has the Ops.Array methods defined; we need to move it so that the parent
-  # env is the data mask we use in the dplyr eval
+  # instead yield Expressions. Probably bc the parent env of the function
+  # has the Ops.Expression methods defined; we need to move it so that the
+  # parent env is the data mask we use in the dplyr eval
   isEqualTo <- function(x, y) x == y & !is.na(x)
   expect_dplyr_equal(
     input %>%
@@ -341,7 +314,7 @@ test_that("Filtering on a column that doesn't exist errors correctly", {
   })
 })
 
-test_that("Filtering with a function that doesn't have an Array/expr method still works", {
+test_that("Filtering with unsupported functions", {
   expect_warning(
     expect_dplyr_equal(
       input %>%
@@ -349,7 +322,23 @@ test_that("Filtering with a function that doesn't have an Array/expr method stil
         collect(),
       tbl
     ),
-    'Filter expression not implemented in Arrow: pnorm(dbl) > 0.99; pulling data into R',
+    'Expression pnorm(dbl) > 0.99 not supported in Arrow; pulling data into R',
+    fixed = TRUE
+  )
+  expect_warning(
+    expect_dplyr_equal(
+      input %>%
+        filter(
+          nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
+          int > 2,                                         # good
+          pnorm(dbl) > .99                                 # bad, opaque
+        ) %>%
+        collect(),
+      tbl
+    ),
+'* In nchar(chr, type = "bytes", allowNA = TRUE) == 1, allowNA = TRUE not supported by Arrow
+* Expression pnorm(dbl) > 0.99 not supported in Arrow
+pulling data into R',
     fixed = TRUE
   )
 })
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 6f5d5672d19..8583c2f9024 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 4f202fa5958..98eb4983d32 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -116,6 +118,7 @@ test_that("nchar() arguments", {
       collect(),
     tbl
   )
+  # This tests the whole abandon_ship() machinery
   expect_warning(
     expect_dplyr_equal(
       input %>%
@@ -128,7 +131,8 @@ test_that("nchar() arguments", {
         collect(),
       tbl
     ),
-    "not supported"
+    'In nchar(verses, type = "bytes", allowNA = TRUE), allowNA = TRUE not supported by Arrow; pulling data into R',
+    fixed = TRUE
   )
 })
 
@@ -173,7 +177,6 @@ test_that("mutate with reassigning same name", {
 })
 
 test_that("mutate with single value for recycling", {
-  skip("Not implemented (ARROW-11705")
   expect_dplyr_equal(
     input %>%
       select(int, padded_strings) %>%
@@ -338,31 +341,31 @@ test_that("handle bad expressions", {
   })
 })
 
+test_that("Can't just add a vector column with mutate()", {
+  expect_warning(
+    expect_equal(
+      Table$create(tbl) %>%
+        select(int) %>%
+        mutate(again = 1:10),
+      tibble::tibble(int = tbl$int, again = 1:10)
+    ),
+    "In again = 1:10, only values of size one are recycled; pulling data into R"
+  )
+})
+
 test_that("print a mutated table", {
   expect_output(
     Table$create(tbl) %>%
       select(int) %>%
       mutate(twice = int * 2) %>%
       print(),
-'Table (query)
+'InMemoryDataset (query)
 int: int32
-twice: expr
+twice: double (multiply_checked(int, 2))
 
 See $.data for the source Arrow object',
-  fixed = TRUE)
-
-  # Handling non-expressions/edge cases
-  expect_output(
-    Table$create(tbl) %>%
-      select(int) %>%
-      mutate(again = 1:10) %>%
-      print(),
-'Table (query)
-int: int32
-again: expr
-
-See $.data for the source Arrow object',
-  fixed = TRUE)
+    fixed = TRUE
+  )
 })
 
 test_that("mutate and write_dataset", {
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index d7df83cc7a6..fb5e6752709 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
 skip_if_not_available("utf8proc")
 
 library(dplyr)
@@ -342,99 +343,73 @@ test_that("arrow_*_split_whitespace functions", {
       collect(),
     tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux")))
   )
-
 })
 
 test_that("errors and warnings in string splitting", {
-  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
-
   # These conditions generate an error, but abandon_ship() catches the error,
-  # issues a warning, and pulls the data into R
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = strsplit(x, "and.*", fixed = FALSE)) %>%
-      collect(),
-    regexp = "not supported"
+  # issues a warning, and pulls the data into R (if computing on InMemoryDataset)
+  # Elsewhere we test that abandon_ship() works,
+  # so here we can just call the functions directly
+
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$strsplit(x, "and.*", fixed = FALSE),
+    'Regular expression matching in strsplit() not supported by Arrow',
+    fixed = TRUE
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, "and.?")) %>%
-      collect()
+  expect_error(
+    nse_funcs$str_split(x, "and.?"),
+    'Regular expression matching in str_split() not supported by Arrow',
+    fixed = TRUE
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, regex("and.?"), n = 2)) %>%
-      collect(),
-    regexp = "not supported"
+  expect_error(
+    nse_funcs$str_split(x, regex("and.*")),
+    'Regular expression matching in str_split() not supported by Arrow',
+    fixed = TRUE
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, fixed("and", ignore_case = TRUE))) %>%
-      collect(),
-    "not supported"
+  expect_error(
+    nse_funcs$str_split(x, fixed("and", ignore_case = TRUE)),
+    "Case-insensitive string splitting not supported by Arrow"
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, coll("and.?"))) %>%
-      collect(),
-    regexp = "not supported"
+  expect_error(
+    nse_funcs$str_split(x, coll("and.?")),
+    "Pattern modifier `coll()` not supported by Arrow",
+    fixed = TRUE
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, boundary(type = "word"))) %>%
-      collect(),
-    regexp = "not supported"
+  expect_error(
+    nse_funcs$str_split(x, boundary(type = "word")),
+    "Pattern modifier `boundary()` not supported by Arrow",
+    fixed = TRUE
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, "and", n = 0)) %>%
-      collect(),
-    regexp = "not supported"
+  expect_error(
+    nse_funcs$str_split(x, "and", n = 0),
+    "Splitting strings into zero parts not supported by Arrow"
   )
 
   # This condition generates a warning
   expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_split(x, fixed("and"), simplify = TRUE)) %>%
-      collect(),
-    "ignored"
+    nse_funcs$str_split(x, fixed("and"), simplify = TRUE),
+    "Argument 'simplify = TRUE' will be ignored"
   )
-
 })
 
 test_that("errors and warnings in string detection and replacement", {
-  df <- tibble(x = c("Foo", "bar"))
+  x <- Expression$field_ref("x")
 
-  # These conditions generate an error, but abandon_ship() catches the error,
-  # issues a warning, and pulls the data into R
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      filter(str_detect(x, boundary(type = "character"))) %>%
-      collect(),
-    regexp = "not implemented"
+  expect_error(
+    nse_funcs$str_detect(x, boundary(type = "character")),
+    "Pattern modifier `boundary()` not supported by Arrow",
+    fixed = TRUE
   )
-  expect_warning(
-    df %>%
-      Table$create() %>%
-      mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
-      collect(),
-    regexp = "not supported"
+  expect_error(
+    nse_funcs$str_replace_all(x, coll("o", locale = "en"), "ó"),
+    "Pattern modifier `coll()` not supported by Arrow",
+    fixed = TRUE
   )
 
   # This condition generates a warning
   expect_warning(
-    df %>%
-      Table$create() %>%
-      transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
+    nse_funcs$str_replace_all(x, regex("o", multiline = TRUE), "u"),
     "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
   )
 
@@ -521,5 +496,4 @@ test_that("edge cases in string detection and replacement", {
       collect(),
     tibble(x = c("ABC"))
   )
-
 })
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index a02b00f3d95..46d30e37823 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(dplyr)
 library(stringr)
 
@@ -57,11 +59,11 @@ test_that("Print method", {
       filter(int < 5) %>%
       select(int, chr) %>%
       print(),
-'RecordBatch (query)
+'InMemoryDataset (query)
 int: int32
 chr: string
 
-* Filter: and(and(greater(dbl, 2), or(equal(chr, "d"), equal(chr, "f"))), less(int, 5))
+* Filter: (((dbl > 2) and ((chr == "d") or (chr == "f"))) and (int < 5))
 See $.data for the source Arrow object',
   fixed = TRUE
   )
@@ -187,7 +189,8 @@ test_that("collect(as_data_frame=FALSE)", {
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
 
-  expect_r6_class(b2, "RecordBatch")
+  # collect(as_data_frame = FALSE) always returns Table now
+  expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
   expect_equal(as.data.frame(b2), expected)
 
@@ -195,7 +198,7 @@ test_that("collect(as_data_frame=FALSE)", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     collect(as_data_frame = FALSE)
-  expect_r6_class(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -217,14 +220,14 @@ test_that("compute()", {
 
   b1 <- batch %>% compute()
 
-  expect_is(b1, "RecordBatch")
+  expect_r6_class(b1, "RecordBatch")
 
   b2 <- batch %>%
     select(int, chr) %>%
     filter(int > 5) %>%
     compute()
 
-  expect_is(b2, "RecordBatch")
+  expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
   expect_equal(as.data.frame(b2), expected)
 
@@ -232,7 +235,7 @@ test_that("compute()", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     compute()
-  expect_is(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -240,7 +243,7 @@ test_that("compute()", {
     filter(int > 5) %>%
     group_by(int) %>%
     compute()
-  expect_is(b4, "arrow_dplyr_query")
+  expect_s3_class(b4, "arrow_dplyr_query")
   expect_equal(
     as.data.frame(b4),
     expected %>%
@@ -257,7 +260,7 @@ test_that("head", {
     filter(int > 5) %>%
     head(2)
 
-  expect_r6_class(b2, "RecordBatch")
+  expect_r6_class(b2, "Table")
   expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")][1:2, ]
   expect_equal(as.data.frame(b2), expected)
 
@@ -265,7 +268,7 @@ test_that("head", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     head(2)
-  expect_r6_class(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -290,7 +293,7 @@ test_that("tail", {
     filter(int > 5) %>%
     tail(2)
 
-  expect_r6_class(b2, "RecordBatch")
+  expect_r6_class(b2, "Table")
   expected <- tail(tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")], 2)
   expect_equal(as.data.frame(b2), expected)
 
@@ -298,7 +301,7 @@ test_that("tail", {
     select(int, strng = chr) %>%
     filter(int > 5) %>%
     tail(2)
-  expect_r6_class(b3, "RecordBatch")
+  expect_r6_class(b3, "Table")
   expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
 
   b4 <- batch %>%
@@ -501,6 +504,7 @@ test_that("explicit type conversions with as.*()", {
 })
 
 test_that("as.factor()/dictionary_encode()", {
+  skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
   df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))
 
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index dd61b5e3ca2..d0459fde5b5 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -17,34 +17,6 @@
 
 context("Expressions")
 
-test_that("Can create an expression", {
-  expect_s3_class(build_array_expression(">", Array$create(1:5), 4), "array_expression")
-})
-
-test_that("as.vector(array_expression)", {
-  expect_equal(as.vector(build_array_expression(">", Array$create(1:5), 4)), c(FALSE, FALSE, FALSE, FALSE, TRUE))
-})
-
-test_that("array_expression print method", {
-  expect_output(
-    print(build_array_expression(">", Array$create(1:5), 4)),
-    # Not ideal but it is informative
-    "greater(<Array>, 4)",
-    fixed = TRUE
-  )
-})
-
-test_that("array_refs", {
-  tab <- Table$create(a = 1:5)
-  ex <- build_array_expression(">", array_expression("array_ref", field_name = "a"), 4)
-  expect_s3_class(ex, "array_expression")
-  expect_identical(ex$args[[1]]$args$field_name, "a")
-  expect_identical(find_array_refs(ex), "a")
-  out <- eval_array_expression(ex, tab)
-  expect_r6_class(out, "ChunkedArray")
-  expect_equal(as.vector(out), c(FALSE, FALSE, FALSE, FALSE, TRUE))
-})
-
 test_that("C++ expressions", {
   skip_if_not_available("dataset")
   f <- Expression$field_ref("f")
@@ -76,24 +48,14 @@ test_that("C++ expressions", {
     'Expression\n(f > 4)',
     fixed = TRUE
   )
+  expect_type_equal(
+    f$type(schema(f = float64())),
+    float64()
+  )
+  expect_type_equal(
+    (f > 4)$type(schema(f = float64())),
+    bool()
+  )
   # Interprets that as a list type
   expect_r6_class(f == c(1L, 2L), "Expression")
-})
-
-test_that("Can create an expression", {
-  a <- Array$create(as.numeric(1:5))
-  expr <- array_expression("cast", a, options = list(to_type = int32()))
-  expect_s3_class(expr, "array_expression")
-  expect_equal(eval_array_expression(expr), Array$create(1:5))
-
-  b <- Array$create(0.5:4.5)
-  bad_expr <- array_expression("cast", b, options = list(to_type = int32()))
-  expect_s3_class(bad_expr, "array_expression")
-  expect_error(
-    eval_array_expression(bad_expr),
-    "Invalid: Float value .* was truncated converting"
-  )
-  expr <- array_expression("cast", b, options = list(to_type = int32(), allow_float_truncate = TRUE))
-  expect_s3_class(expr, "array_expression")
-  expect_equal(eval_array_expression(expr), Array$create(0:4))
-})
+})
\ No newline at end of file
diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R
index 344865c077a..df084f35a49 100644
--- a/r/tests/testthat/test-filesystem.R
+++ b/r/tests/testthat/test-filesystem.R
@@ -136,6 +136,7 @@ test_that("LocalFileSystem + Selector", {
 test_that("FileSystem$from_uri", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   fs_and_path <- FileSystem$from_uri("s3://ursa-labs-taxi-data")
   expect_r6_class(fs_and_path$fs, "S3FileSystem")
   expect_identical(fs_and_path$fs$region, "us-east-2")
@@ -144,6 +145,7 @@ test_that("FileSystem$from_uri", {
 test_that("SubTreeFileSystem$create() with URI", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   fs <- SubTreeFileSystem$create("s3://ursa-labs-taxi-data")
   expect_r6_class(fs, "SubTreeFileSystem")
   expect_identical(
@@ -155,6 +157,7 @@ test_that("SubTreeFileSystem$create() with URI", {
 test_that("S3FileSystem", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   s3fs <- S3FileSystem$create()
   expect_r6_class(s3fs, "S3FileSystem")
 })
@@ -162,6 +165,7 @@ test_that("S3FileSystem", {
 test_that("s3_bucket", {
   skip_on_cran()
   skip_if_not_available("s3")
+  skip_if_offline()
   bucket <- s3_bucket("ursa-labs-r-test")
   expect_r6_class(bucket, "SubTreeFileSystem")
   expect_r6_class(bucket$base_fs, "S3FileSystem")

From f1a7c509b277f7089f81d592abcafa20ac39ea6b Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Thu, 13 May 2021 09:03:51 -0700
Subject: [PATCH 238/719] ARROW-5385: [Go] Implement EXTENSION datatype

Getting the extension metadata recognized for the integration tests with extension types also had the side effect of being a solution for the custom metadata integration tests, so i've also enabled those for Go.

Closes #10203 from zeroshade/extension-type

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 dev/archery/archery/integration/datagen.py    |   3 +-
 docs/source/status.rst                        |   4 +-
 go/arrow/array/array.go                       |   2 +-
 go/arrow/array/array_test.go                  |   5 +-
 go/arrow/array/builder.go                     |   2 +
 go/arrow/array/compare.go                     |   6 +
 go/arrow/array/extension.go                   | 236 +++++++++
 go/arrow/array/extension_test.go              |  96 ++++
 go/arrow/compare.go                           |  53 +-
 go/arrow/compare_test.go                      |  20 +-
 go/arrow/datatype_extension.go                | 163 ++++++
 go/arrow/datatype_extension_test.go           |  86 +++
 go/arrow/datatype_nested.go                   |  14 +-
 go/arrow/internal/arrdata/arrdata.go          |  99 ++++
 go/arrow/internal/arrjson/arrjson.go          | 180 ++++++-
 go/arrow/internal/arrjson/arrjson_test.go     | 497 +++++++++++++++++-
 go/arrow/internal/arrjson/reader.go           |   5 +-
 go/arrow/internal/arrjson/writer.go           |  77 +--
 .../internal/testing/types/extension_types.go | 247 +++++++++
 .../cmd/arrow-json-integration-test/main.go   |   4 +
 go/arrow/ipc/file_reader.go                   |   5 +
 go/arrow/ipc/metadata.go                      |  68 ++-
 go/arrow/ipc/metadata_test.go                 |  63 +++
 go/arrow/ipc/reader.go                        |   9 +-
 go/arrow/ipc/writer.go                        |   9 +
 go/arrow/schema.go                            |  29 +
 26 files changed, 1856 insertions(+), 126 deletions(-)
 create mode 100644 go/arrow/array/extension.go
 create mode 100644 go/arrow/array/extension_test.go
 create mode 100644 go/arrow/datatype_extension.go
 create mode 100644 go/arrow/datatype_extension_test.go
 create mode 100644 go/arrow/internal/testing/types/extension_types.go

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 8e63fe3be86..62fe17bffda 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1564,7 +1564,6 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_custom_metadata_case()
-        .skip_category('Go')
         .skip_category('JS'),
 
         generate_duplicate_fieldnames_case()
@@ -1586,7 +1585,7 @@ def _temp_path():
         .skip_category('Rust'),
 
         generate_extension_case()
-        .skip_category('Go')    # TODO(ARROW-5385)
+        .skip_category('Go')  # TODO(ARROW-3039): requires dictionaries
         .skip_category('JS')
         .skip_category('Rust'),
     ]
diff --git a/docs/source/status.rst b/docs/source/status.rst
index a06ef40968a..80b21f74e36 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -92,7 +92,7 @@ Data Types
 +===================+=======+=======+=======+============+=======+=======+=======+
 | Dictionary        | ✓     | ✓ (1) |       | ✓ (1)      |       | ✓ (1) | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
-| Extension         | ✓     | ✓     |       |            |       |       | ✓     |
+| Extension         | ✓     | ✓     | ✓     |            |       |       | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+
 
 Notes:
@@ -130,7 +130,7 @@ IPC Format
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Endianness conversion       | ✓ (2) |       |       |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Custom schema metadata      | ✓     | ✓     |       |            |       |  ✓    | ✓     |
+| Custom schema metadata      | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 
 Notes:
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index c191c76c7e9..cc2a1a3b983 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -198,7 +198,7 @@ func init() {
 		arrow.UNION:             unsupportedArrayType,
 		arrow.DICTIONARY:        unsupportedArrayType,
 		arrow.MAP:               func(data *Data) Interface { return NewMapData(data) },
-		arrow.EXTENSION:         unsupportedArrayType,
+		arrow.EXTENSION:         func(data *Data) Interface { return NewExtensionData(data) },
 		arrow.FIXED_SIZE_LIST:   func(data *Data) Interface { return NewFixedSizeListData(data) },
 		arrow.DURATION:          func(data *Data) Interface { return NewDurationData(data) },
 
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 904cbd8088f..3a2101a368f 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -22,6 +22,7 @@ import (
 	"github.com/apache/arrow/go/arrow"
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
 	"github.com/apache/arrow/go/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
@@ -92,10 +93,12 @@ func TestMakeFromData(t *testing.T) {
 			}, 0 /* nulls */, 0 /* offset */)},
 		},
 
+		{name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"},
+		{name: "extension", d: types.NewUUIDType()},
+
 		// unsupported types
 		{name: "union", d: &testDataType{arrow.UNION}, expPanic: true, expError: "unsupported data type: UNION"},
 		{name: "dictionary", d: &testDataType{arrow.DICTIONARY}, expPanic: true, expError: "unsupported data type: DICTIONARY"},
-		{name: "extension", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "unsupported data type: EXTENSION"},
 
 		// invalid types
 		{name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"},
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index fcb8d7670cf..378c6089222 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -280,6 +280,8 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 		typ := dtype.(*arrow.MapType)
 		return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(), typ.KeysSorted)
 	case arrow.EXTENSION:
+		typ := dtype.(arrow.ExtensionType)
+		return NewExtensionBuilder(mem, typ)
 	case arrow.FIXED_SIZE_LIST:
 		typ := dtype.(*arrow.FixedSizeListType)
 		return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index a02c5ac87be..f0ff5ed3529 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -167,6 +167,9 @@ func ArrayEqual(left, right Interface) bool {
 	case *Map:
 		r := right.(*Map)
 		return arrayEqualMap(l, r)
+	case ExtensionArray:
+		r := right.(ExtensionArray)
+		return arrayEqualExtension(l, r)
 	default:
 		panic(xerrors.Errorf("arrow/array: unknown array type %T", l))
 	}
@@ -358,6 +361,9 @@ func arrayApproxEqual(left, right Interface, opt equalOption) bool {
 	case *Map:
 		r := right.(*Map)
 		return arrayApproxEqualList(l.List, r.List, opt)
+	case ExtensionArray:
+		r := right.(ExtensionArray)
+		return arrayApproxEqualExtension(l, r, opt)
 	default:
 		panic(xerrors.Errorf("arrow/array: unknown array type %T", l))
 	}
diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go
new file mode 100644
index 00000000000..d79103cc171
--- /dev/null
+++ b/go/arrow/array/extension.go
@@ -0,0 +1,236 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
+)
+
+// ExtensionArray is the interface that needs to be implemented to handle
+// user-defined extension type arrays. In order to ensure consistency and
+// proper behavior, all ExtensionArray types must embed ExtensionArrayBase
+// in order to meet the interface which provides the default implementation
+// and handling for the array while allowing custom behavior to be built
+// on top of it.
+type ExtensionArray interface {
+	Interface
+	// ExtensionType returns the datatype as per calling DataType(), but
+	// already cast to ExtensionType
+	ExtensionType() arrow.ExtensionType
+	// Storage returns the underlying storage array for this array.
+	Storage() Interface
+
+	// by having a non-exported function in the interface, it means that
+	// consumers must embed ExtensionArrayBase in their structs in order
+	// to fulfill this interface.
+	mustEmbedExtensionArrayBase()
+}
+
+// two extension arrays are equal if their data types are equal and
+// their underlying storage arrays are equal.
+func arrayEqualExtension(l, r ExtensionArray) bool {
+	if !arrow.TypeEqual(l.DataType(), r.DataType()) {
+		return false
+	}
+
+	return ArrayEqual(l.Storage(), r.Storage())
+}
+
+// two extension arrays are approximately equal if their data types are
+// equal and their underlying storage arrays are approximately equal.
+func arrayApproxEqualExtension(l, r ExtensionArray, opt equalOption) bool {
+	if !arrow.TypeEqual(l.DataType(), r.DataType()) {
+		return false
+	}
+
+	return arrayApproxEqual(l.Storage(), r.Storage(), opt)
+}
+
+// NewExtensionArrayWithStorage constructs a new ExtensionArray from the provided
+// ExtensionType and uses the provided storage interface as the underlying storage.
+// This will not release the storage array passed in so consumers should call Release
+// on it manually while the new Extension array will share references to the underlying
+// Data buffers.
+func NewExtensionArrayWithStorage(dt arrow.ExtensionType, storage Interface) Interface {
+	if !arrow.TypeEqual(dt.StorageType(), storage.DataType()) {
+		panic(xerrors.Errorf("arrow/array: storage type %s for extension type %s, does not match expected type %s", storage.DataType(), dt.ExtensionName(), dt.StorageType()))
+	}
+
+	base := ExtensionArrayBase{}
+	base.refCount = 1
+	base.storage = storage
+	storage.Retain()
+
+	storageData := storage.Data()
+	// create a new data instance with the ExtensionType as the datatype but referencing the
+	// same underlying buffers to share them with the storage array.
+	baseData := NewData(dt, storageData.length, storageData.buffers, storageData.childData, storageData.nulls, storageData.offset)
+	defer baseData.Release()
+	base.array.setData(baseData)
+
+	// use the ExtensionType's ArrayType to construct the correctly typed object
+	// to use as the ExtensionArray interface. reflect.New returns a pointer to
+	// the newly created object.
+	arr := reflect.New(base.ExtensionType().ArrayType())
+	// set the embedded ExtensionArrayBase to the value we created above. We know
+	// that this field will exist because the interface requires embedding ExtensionArrayBase
+	// so we don't have to separately check, this will panic if called on an ArrayType
+	// that doesn't embed ExtensionArrayBase which is what we want.
+	arr.Elem().FieldByName("ExtensionArrayBase").Set(reflect.ValueOf(base))
+	return arr.Interface().(ExtensionArray)
+}
+
+// NewExtensionData expects a data with a datatype of arrow.ExtensionType and
+// underlying data built for the storage array.
+func NewExtensionData(data *Data) ExtensionArray {
+	base := ExtensionArrayBase{}
+	base.refCount = 1
+	base.setData(data)
+
+	// use the ExtensionType's ArrayType to construct the correctly typed object
+	// to use as the ExtensionArray interface. reflect.New returns a pointer to
+	// the newly created object.
+	arr := reflect.New(base.ExtensionType().ArrayType())
+	// set the embedded ExtensionArrayBase to the value we created above. We know
+	// that this field will exist because the interface requires embedding ExtensionArrayBase
+	// so we don't have to separately check, this will panic if called on an ArrayType
+	// that doesn't embed ExtensionArrayBase which is what we want.
+	arr.Elem().FieldByName("ExtensionArrayBase").Set(reflect.ValueOf(base))
+	return arr.Interface().(ExtensionArray)
+}
+
+// ExtensionArrayBase is the base struct for user-defined Extension Array types
+// and must be embedded in any user-defined extension arrays like so:
+//
+//   type UserDefinedArray struct {
+//       array.ExtensionArrayBase
+//   }
+//
+type ExtensionArrayBase struct {
+	array
+	storage Interface
+}
+
+// Retain increases the reference count by 1.
+// Retain may be called simultaneously from multiple goroutines.
+func (e *ExtensionArrayBase) Retain() {
+	e.array.Retain()
+	e.storage.Retain()
+}
+
+// Release decreases the reference count by 1.
+// Release may be called simultaneously from multiple goroutines.
+// When the reference count goes to zero, the memory is freed.
+func (e *ExtensionArrayBase) Release() {
+	e.array.Release()
+	e.storage.Release()
+}
+
+// Storage returns the underlying storage array
+func (e *ExtensionArrayBase) Storage() Interface { return e.storage }
+
+// ExtensionType returns the same thing as DataType, just already casted
+// to an ExtensionType interface for convenience.
+func (e *ExtensionArrayBase) ExtensionType() arrow.ExtensionType {
+	return e.DataType().(arrow.ExtensionType)
+}
+
+func (e *ExtensionArrayBase) setData(data *Data) {
+	if data.DataType().ID() != arrow.EXTENSION {
+		panic("arrow/array: must use extension type to construct an extension array")
+	}
+	extType, ok := data.dtype.(arrow.ExtensionType)
+	if !ok {
+		panic("arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType")
+	}
+
+	e.array.setData(data)
+	// our underlying storage needs to reference the same data buffers (no copying)
+	// but should have the storage type's datatype, so we create a Data for it.
+	storageData := NewData(extType.StorageType(), data.length, data.buffers, data.childData, data.nulls, data.offset)
+	defer storageData.Release()
+	e.storage = MakeFromData(storageData)
+}
+
+// no-op function that exists simply to force embedding this in any extension array types.
+func (ExtensionArrayBase) mustEmbedExtensionArrayBase() {}
+
+// ExtensionBuilder is a convenience builder so that NewBuilder and such will still work
+// with extension types properly. Depending on preference it may be cleaner or easier to just use
+// NewExtensionArrayWithStorage and pass a storage array.
+//
+// That said, this allows easily building an extension array by providing the extension
+// type and retrieving the storage builder.
+type ExtensionBuilder struct {
+	Builder
+	dt arrow.ExtensionType
+}
+
+// NewExtensionBuilder returns a builder using the provided memory allocator for the desired
+// extension type. It will internally construct a builder of the storage type for the extension
+// type and keep a copy of the extension type. The underlying type builder can then be retrieved
+// by calling `StorageBuilder` on this and then type asserting it to the desired builder type.
+//
+// After using the storage builder, calling NewArray or NewExtensionArray will construct
+// the appropriate extension array type and set the storage correctly, resetting the builder for
+// reuse.
+//
+// Example
+//
+// Simple example assuming an extension type of a UUID defined as a FixedSizeBinary(16) was registered
+// using the type name "uuid":
+//
+//   uuidType := arrow.GetExtensionType("uuid")
+//   bldr := array.NewExtensionBuilder(memory.DefaultAllocator, uuidType)
+//   defer bldr.Release()
+//   uuidBldr := bldr.StorageBuilder().(*array.FixedSizeBinaryBuilder)
+//   /* build up the fixed size binary array as usual via Append/AppendValues */
+//   uuidArr := bldr.NewExtensionArray()
+//   defer uuidArr.Release()
+//
+// Because the storage builder is embedded in the Extension builder it also means
+// that any of the functions available on the Builder interface can be called on
+// an instance of ExtensionBuilder and will respond appropriately as the storage
+// builder would for generically grabbing the Lenth, Cap, Nulls, reserving, etc.
+func NewExtensionBuilder(mem memory.Allocator, dt arrow.ExtensionType) *ExtensionBuilder {
+	return &ExtensionBuilder{Builder: NewBuilder(mem, dt.StorageType()), dt: dt}
+}
+
+// StorageBuilder returns the builder for the underlying storage type.
+func (b *ExtensionBuilder) StorageBuilder() Builder { return b.Builder }
+
+// NewArray creates a new array from the memory buffers used by the builder
+// and resets the builder so it can be used to build a new array.
+func (b *ExtensionBuilder) NewArray() Interface {
+	return b.NewExtensionArray()
+}
+
+// NewExtensionArray creates an Extension array from the memory buffers used
+// by the builder and resets the ExtensionBuilder so it can be used to build
+// a new ExtensionArray of the same type.
+func (b *ExtensionBuilder) NewExtensionArray() ExtensionArray {
+	storage := b.Builder.NewArray()
+	defer storage.Release()
+
+	data := NewData(b.dt, storage.Len(), storage.Data().buffers, storage.Data().childData, storage.Data().nulls, 0)
+	defer data.Release()
+	return NewExtensionData(data)
+}
diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go
new file mode 100644
index 00000000000..c053d38872f
--- /dev/null
+++ b/go/arrow/array/extension_test.go
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/stretchr/testify/suite"
+)
+
+type ExtensionTypeTestSuite struct {
+	suite.Suite
+}
+
+func (e *ExtensionTypeTestSuite) SetupTest() {
+	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
+}
+
+func (e *ExtensionTypeTestSuite) TearDownTest() {
+	if arrow.GetExtensionType("uuid") != nil {
+		e.NoError(arrow.UnregisterExtensionType("uuid"))
+	}
+}
+
+func (e *ExtensionTypeTestSuite) TestParametricEquals() {
+	p1Type := types.NewParametric1Type(6)
+	p2Type := types.NewParametric1Type(6)
+	p3Type := types.NewParametric1Type(3)
+
+	e.True(arrow.TypeEqual(p1Type, p2Type))
+	e.False(arrow.TypeEqual(p1Type, p3Type))
+}
+
+func exampleParametric(mem memory.Allocator, dt arrow.DataType, vals []int32, valid []bool) array.Interface {
+	bldr := array.NewBuilder(mem, dt)
+	defer bldr.Release()
+
+	exb := bldr.(*array.ExtensionBuilder)
+	sb := exb.StorageBuilder().(*array.Int32Builder)
+	sb.AppendValues(vals, valid)
+
+	return bldr.NewArray()
+}
+
+func (e *ExtensionTypeTestSuite) TestParametricArrays() {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(e.T(), 0)
+
+	p1Type := types.NewParametric1Type(6)
+	p1 := exampleParametric(pool, p1Type, []int32{-1, 1, 2, 3}, []bool{false, true, true, true})
+	defer p1.Release()
+
+	p2Type := types.NewParametric1Type(12)
+	p2 := exampleParametric(pool, p2Type, []int32{2, -1, 3, 4}, []bool{true, false, true, true})
+	defer p2.Release()
+
+	p3Type := types.NewParametric2Type(2)
+	p3 := exampleParametric(pool, p3Type, []int32{5, 6, 7, 8}, nil)
+	defer p3.Release()
+
+	p4Type := types.NewParametric2Type(3)
+	p4 := exampleParametric(pool, p4Type, []int32{5, 6, 7, 9}, nil)
+	defer p4.Release()
+
+	rb := array.NewRecord(arrow.NewSchema([]arrow.Field{
+		{Name: "f0", Type: p1Type, Nullable: true},
+		{Name: "f1", Type: p2Type, Nullable: true},
+		{Name: "f2", Type: p3Type, Nullable: true},
+		{Name: "f3", Type: p4Type, Nullable: true},
+	}, nil), []array.Interface{p1, p2, p3, p4}, -1)
+	defer rb.Release()
+
+	e.True(array.RecordEqual(rb, rb))
+}
+
+func TestExtensionTypes(t *testing.T) {
+	suite.Run(t, new(ExtensionTypeTestSuite))
+}
diff --git a/go/arrow/compare.go b/go/arrow/compare.go
index c2ca4e32141..5acfd940a59 100644
--- a/go/arrow/compare.go
+++ b/go/arrow/compare.go
@@ -46,34 +46,45 @@ func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool {
 
 	switch {
 	case left == nil || right == nil:
-		return false
+		return left == nil && right == nil
 	case left.ID() != right.ID():
 		return false
 	}
 
-	// StructType is the only type that has metadata.
-	l, ok := left.(*StructType)
-	if !ok || cfg.metadata {
-		return reflect.DeepEqual(left, right)
-	}
-
-	r := right.(*StructType)
-	switch {
-	case len(l.fields) != len(r.fields):
-		return false
-	case !reflect.DeepEqual(l.index, r.index):
-		return false
-	}
-	for i := range l.fields {
-		leftField, rightField := l.fields[i], r.fields[i]
-		switch {
-		case leftField.Name != rightField.Name:
+	switch l := left.(type) {
+	case ExtensionType:
+		return l.ExtensionEquals(right.(ExtensionType))
+	case *ListType:
+		if !TypeEqual(l.Elem(), right.(*ListType).Elem(), opts...) {
 			return false
-		case leftField.Nullable != rightField.Nullable:
+		}
+		if cfg.metadata {
+			return l.Meta.Equal(right.(*ListType).Meta)
+		}
+		return true
+	case *StructType:
+		r := right.(*StructType)
+		switch {
+		case len(l.fields) != len(r.fields):
 			return false
-		case !TypeEqual(leftField.Type, rightField.Type, opts...):
+		case !reflect.DeepEqual(l.index, r.index):
 			return false
 		}
+		for i := range l.fields {
+			leftField, rightField := l.fields[i], r.fields[i]
+			switch {
+			case leftField.Name != rightField.Name:
+				return false
+			case leftField.Nullable != rightField.Nullable:
+				return false
+			case !TypeEqual(leftField.Type, rightField.Type, opts...):
+				return false
+			case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata):
+				return false
+			}
+		}
+		return true
+	default:
+		return reflect.DeepEqual(left, right)
 	}
-	return true
 }
diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go
index 89112de9aea..9123036285b 100644
--- a/go/arrow/compare_test.go
+++ b/go/arrow/compare_test.go
@@ -27,7 +27,7 @@ func TestTypeEqual(t *testing.T) {
 		checkMetadata bool
 	}{
 		{
-			nil, nil, false, false,
+			nil, nil, true, false,
 		},
 		{
 			nil, PrimitiveTypes.Uint8, false, false,
@@ -69,25 +69,25 @@ func TestTypeEqual(t *testing.T) {
 			&TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Nanosecond, TimeZone: "CET"}, false, false,
 		},
 		{
-			&ListType{PrimitiveTypes.Uint64}, &ListType{PrimitiveTypes.Uint64}, true, false,
+			&ListType{elem: PrimitiveTypes.Uint64}, &ListType{elem: PrimitiveTypes.Uint64}, true, false,
 		},
 		{
-			&ListType{PrimitiveTypes.Uint64}, &ListType{PrimitiveTypes.Uint32}, false, false,
+			&ListType{elem: PrimitiveTypes.Uint64}, &ListType{elem: PrimitiveTypes.Uint32}, false, false,
 		},
 		{
-			&ListType{&Time32Type{Unit: Millisecond}}, &ListType{&Time32Type{Unit: Millisecond}}, true, false,
+			&ListType{elem: &Time32Type{Unit: Millisecond}}, &ListType{elem: &Time32Type{Unit: Millisecond}}, true, false,
 		},
 		{
-			&ListType{&Time32Type{Unit: Millisecond}}, &ListType{&Time32Type{Unit: Second}}, false, false,
+			&ListType{elem: &Time32Type{Unit: Millisecond}}, &ListType{elem: &Time32Type{Unit: Second}}, false, false,
 		},
 		{
-			&ListType{&ListType{PrimitiveTypes.Uint16}}, &ListType{&ListType{PrimitiveTypes.Uint16}}, true, false,
+			&ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}, &ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}, true, false,
 		},
 		{
-			&ListType{&ListType{PrimitiveTypes.Uint16}}, &ListType{&ListType{PrimitiveTypes.Uint8}}, false, false,
+			&ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}, &ListType{elem: &ListType{elem: PrimitiveTypes.Uint8}}, false, false,
 		},
 		{
-			&ListType{&ListType{&ListType{PrimitiveTypes.Uint16}}}, &ListType{&ListType{PrimitiveTypes.Uint8}}, false, false,
+			&ListType{elem: &ListType{elem: &ListType{elem: PrimitiveTypes.Uint16}}}, &ListType{elem: &ListType{elem: PrimitiveTypes.Uint8}}, false, false,
 		},
 		{
 			&StructType{
@@ -222,7 +222,7 @@ func TestTypeEqual(t *testing.T) {
 					Field{Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false},
 				},
 				index: map[string]int{"f1": 0, "f2": 1},
-				meta:  MetadataFrom(map[string]string{"k1": "v1"}),
+				meta:  MetadataFrom(map[string]string{"k1": "v1", "k2": "v2"}),
 			},
 			&StructType{
 				fields: []Field{
@@ -230,7 +230,7 @@ func TestTypeEqual(t *testing.T) {
 					Field{Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false},
 				},
 				index: map[string]int{"f1": 0, "f2": 1},
-				meta:  MetadataFrom(map[string]string{"k1": "v1"}),
+				meta:  MetadataFrom(map[string]string{"k2": "v2", "k1": "v1"}),
 			},
 			true, true,
 		},
diff --git a/go/arrow/datatype_extension.go b/go/arrow/datatype_extension.go
new file mode 100644
index 00000000000..52e68870f84
--- /dev/null
+++ b/go/arrow/datatype_extension.go
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+import (
+	"fmt"
+	"reflect"
+	"sync"
+
+	"golang.org/x/xerrors"
+)
+
+var (
+	// global extension type registry, initially left null to avoid paying
+	// the cost if no extension types are used.
+	// the choice to use a sync.Map here is because it's expected that most
+	// use cases would be to register some number of types at initialization
+	// or otherwise and leave them rather than a pattern of repeatedly registering
+	// and unregistering types. As per the documentation for sync.Map
+	// (https://pkg.go.dev/sync#Map), it is specialized for the case where an entry
+	// is written once but read many times which fits our case here as we register
+	// a type once and then have to read it many times when deserializing messages
+	// with that type.
+	extTypeRegistry *sync.Map
+	// used for initializing the registry once and only once
+	initReg sync.Once
+)
+
+// convenience function to ensure that the type registry is initialized once
+// and only once in a goroutine-safe manner.
+func getExtTypeRegistry() *sync.Map {
+	initReg.Do(func() { extTypeRegistry = &sync.Map{} })
+	return extTypeRegistry
+}
+
+// RegisterExtensionType registers the provided ExtensionType by calling ExtensionName
+// to use as a Key for registrying the type. If a type with the same name is already
+// registered then this will return an error saying so, otherwise it will return nil
+// if successful registering the type.
+// This function is safe to call from multiple goroutines simultaneously.
+func RegisterExtensionType(typ ExtensionType) error {
+	name := typ.ExtensionName()
+	registry := getExtTypeRegistry()
+	if _, existed := registry.LoadOrStore(name, typ); existed {
+		return xerrors.Errorf("arrow: type extension with name %s already defined", name)
+	}
+	return nil
+}
+
+// UnregisterExtensionType removes the type with the given name from the registry
+// causing any messages with that type which come in to be expressed with their
+// metadata and underlying type instead of the extension type that isn't known.
+// This function is safe to call from multiple goroutines simultaneously.
+func UnregisterExtensionType(typName string) error {
+	registry := getExtTypeRegistry()
+	if _, loaded := registry.LoadAndDelete(typName); !loaded {
+		return xerrors.Errorf("arrow: no type extension with name %s found", typName)
+	}
+	return nil
+}
+
+// GetExtensionType retrieves and returns the extension type of the given name
+// from the global extension type registry. If the type isn't found it will return
+// nil. This function is safe to call from multiple goroutines concurrently.
+func GetExtensionType(typName string) ExtensionType {
+	registry := getExtTypeRegistry()
+	if val, ok := registry.Load(typName); ok {
+		return val.(ExtensionType)
+	}
+	return nil
+}
+
+// ExtensionType is an interface for handling user-defined types. They must be
+// DataTypes and must embed arrow.ExtensionBase in them in order to work properly
+// ensuring that they always have the expected base behavior.
+//
+// The arrow.ExtensionBase that needs to be embedded implements the DataType interface
+// leaving the remaining functions having to be implemented by the actual user-defined
+// type in order to be handled properly.
+type ExtensionType interface {
+	DataType
+	// ArrayType should return the reflect.TypeOf(ExtensionArrayType{}) where the
+	// ExtensionArrayType is a type that implements the array.ExtensionArray interface.
+	// Such a type must also embed the array.ExtensionArrayBase in it. This will be used
+	// when creating arrays of this ExtensionType by using reflect.New
+	ArrayType() reflect.Type
+	// ExtensionName is what will be used when registering / unregistering this extension
+	// type. Multiple user-defined types can be defined with a parameterized ExtensionType
+	// as long as the parameter is used in the ExtensionName to distinguish the instances
+	// in the global Extension Type registry.
+	// The return from this is also what will be placed in the metadata for IPC communication
+	// under the key ARROW:extension:name
+	ExtensionName() string
+	// StorageType returns the underlying storage type which is used by this extension
+	// type. It is already implemented by the ExtensionBase struct and thus does not need
+	// to be re-implemented by a user-defined type.
+	StorageType() DataType
+	// ExtensionEquals is used to tell whether two ExtensionType instances are equal types.
+	ExtensionEquals(ExtensionType) bool
+	// Serialize should produce any extra metadata necessary for initializing an instance of
+	// this user-defined type. Not all user-defined types require this and it is valid to return
+	// nil from this function or an empty slice. This is used for the IPC format and will be
+	// added to metadata for IPC communication under the key ARROW:extension:metadata
+	// This should be implemented such that it is valid to be called by multiple goroutines
+	// concurrently.
+	Serialize() string
+	// Deserialize is called when reading in extension arrays and types via the IPC format
+	// in order to construct an instance of the appropriate extension type. The data passed in
+	// is pulled from the ARROW:extension:metadata key and may be nil or an empty slice.
+	// If the storage type is incorrect or something else is invalid with the data this should
+	// return nil and an appropriate error.
+	Deserialize(storageType DataType, data string) (ExtensionType, error)
+
+	mustEmbedExtensionBase()
+}
+
+// ExtensionBase is the base struct for user-defined Extension Types which must be
+// embedded in any user-defined types like so:
+//
+//     type UserDefinedType struct {
+//         arrow.ExtensionBase
+//         // any other data
+//     }
+//
+type ExtensionBase struct {
+	// Storage is the underlying storage type
+	Storage DataType
+}
+
+// ID always returns arrow.EXTENSION and should not be overridden
+func (*ExtensionBase) ID() Type { return EXTENSION }
+
+// Name should always return "extension" and should not be overridden
+func (*ExtensionBase) Name() string { return "extension" }
+
+// String by default will return "extension_type<storage=storage_type>" by can be overridden
+// to customize what is printed out when printing this extension type.
+func (e *ExtensionBase) String() string { return fmt.Sprintf("extension_type<storage=%s>", e.Storage) }
+
+// StorageType returns the underlying storage type and exists so that functions
+// written against the ExtensionType interface can access the storage type.
+func (e *ExtensionBase) StorageType() DataType { return e.Storage }
+
+// this no-op exists to ensure that this type must be embedded in any user-defined extension type.
+func (ExtensionBase) mustEmbedExtensionBase() {}
+
+var (
+	_ DataType = (*ExtensionBase)(nil)
+)
diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go
new file mode 100644
index 00000000000..1963d79fad1
--- /dev/null
+++ b/go/arrow/datatype_extension_test.go
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+type BadExtensionType struct{}
+
+func (BadExtensionType) ID() arrow.Type                           { return arrow.EXTENSION }
+func (BadExtensionType) ArrayType() reflect.Type                  { return nil }
+func (BadExtensionType) Name() string                             { return "bad" }
+func (BadExtensionType) StorageType() arrow.DataType              { return arrow.Null }
+func (BadExtensionType) ExtensionEquals(arrow.ExtensionType) bool { return false }
+func (BadExtensionType) ExtensionName() string                    { return "bad" }
+func (BadExtensionType) Serialize() string                        { return "" }
+func (BadExtensionType) Deserialize(_ arrow.DataType, _ string) (arrow.ExtensionType, error) {
+	return nil, nil
+}
+
+func TestMustEmbedBase(t *testing.T) {
+	var ext interface{} = &BadExtensionType{}
+	assert.Panics(t, func() {
+		var _ arrow.ExtensionType = ext.(arrow.ExtensionType)
+	})
+}
+
+type ExtensionTypeTestSuite struct {
+	suite.Suite
+}
+
+func (e *ExtensionTypeTestSuite) SetupTest() {
+	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
+}
+
+func (e *ExtensionTypeTestSuite) TearDownTest() {
+	if arrow.GetExtensionType("uuid") != nil {
+		e.NoError(arrow.UnregisterExtensionType("uuid"))
+	}
+}
+
+func (e *ExtensionTypeTestSuite) TestExtensionType() {
+	e.Nil(arrow.GetExtensionType("uuid-unknown"))
+	e.NotNil(arrow.GetExtensionType("uuid"))
+
+	e.Error(arrow.RegisterExtensionType(types.NewUUIDType()))
+	e.Error(arrow.UnregisterExtensionType("uuid-unknown"))
+
+	typ := types.NewUUIDType()
+	e.Implements((*arrow.ExtensionType)(nil), typ)
+	e.Equal(arrow.EXTENSION, typ.ID())
+	e.Equal("extension", typ.Name())
+
+	serialized := typ.Serialize()
+	deserialized, err := typ.Deserialize(&arrow.FixedSizeBinaryType{ByteWidth: 16}, serialized)
+	e.NoError(err)
+
+	e.True(arrow.TypeEqual(deserialized.StorageType(), &arrow.FixedSizeBinaryType{ByteWidth: 16}))
+	e.True(arrow.TypeEqual(deserialized, typ))
+	e.False(arrow.TypeEqual(deserialized, &arrow.FixedSizeBinaryType{ByteWidth: 16}))
+}
+
+func TestExtensionTypes(t *testing.T) {
+	suite.Run(t, new(ExtensionTypeTestSuite))
+}
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index 47ea77899ef..9350c35b655 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -18,7 +18,6 @@ package arrow
 
 import (
 	"fmt"
-	"reflect"
 	"strings"
 )
 
@@ -26,6 +25,7 @@ import (
 // a variable-size sequence of values, all having the same relative type.
 type ListType struct {
 	elem DataType // DataType of the list's elements
+	Meta Metadata
 }
 
 // ListOf returns the list type with element type t.
@@ -192,7 +192,17 @@ type Field struct {
 func (f Field) HasMetadata() bool { return f.Metadata.Len() != 0 }
 
 func (f Field) Equal(o Field) bool {
-	return reflect.DeepEqual(f, o)
+	switch {
+	case f.Name != o.Name:
+		return false
+	case f.Nullable != o.Nullable:
+		return false
+	case !TypeEqual(f.Type, o.Type, CheckMetadata()):
+		return false
+	case !f.Metadata.Equal(o.Metadata):
+		return false
+	}
+	return true
 }
 
 func (f Field) String() string {
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 8f7f59f53b0..69dc8a1a866 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -25,6 +25,8 @@ import (
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/decimal128"
 	"github.com/apache/arrow/go/arrow/float16"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 )
 
@@ -46,6 +48,7 @@ func init() {
 	Records["durations"] = makeDurationsRecords()
 	Records["decimal128"] = makeDecimal128sRecords()
 	Records["maps"] = makeMapsRecords()
+	Records["extension"] = makeExtensionRecords()
 
 	for k := range Records {
 		RecordNames = append(RecordNames, k)
@@ -813,6 +816,102 @@ func makeMapsRecords() []array.Record {
 	return recs
 }
 
+func makeExtensionRecords() []array.Record {
+	mem := memory.NewGoAllocator()
+
+	p1Type := types.NewParametric1Type(6)
+	p2Type := types.NewParametric1Type(12)
+	p3Type := types.NewParametric2Type(2)
+	p4Type := types.NewParametric2Type(3)
+	p5Type := types.NewExtStructType()
+
+	arrow.RegisterExtensionType(p1Type)
+	arrow.RegisterExtensionType(p3Type)
+	arrow.RegisterExtensionType(p4Type)
+	arrow.RegisterExtensionType(p5Type)
+
+	meta := arrow.NewMetadata(
+		[]string{"k1", "k2"},
+		[]string{"v1", "v2"},
+	)
+
+	unregisteredMeta := arrow.NewMetadata(
+		append(meta.Keys(), ipc.ExtensionTypeKeyName, ipc.ExtensionMetadataKeyName),
+		append(meta.Values(), "unregistered", ""))
+
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "p1", Type: p1Type, Nullable: true, Metadata: meta},
+			{Name: "p2", Type: p2Type, Nullable: true, Metadata: meta},
+			{Name: "p3", Type: p3Type, Nullable: true, Metadata: meta},
+			{Name: "p4", Type: p4Type, Nullable: true, Metadata: meta},
+			{Name: "p5", Type: p5Type, Nullable: true, Metadata: meta},
+			{Name: "unreg", Type: arrow.PrimitiveTypes.Int8, Nullable: true, Metadata: unregisteredMeta},
+		}, nil)
+
+	mask := []bool{true, false, true, true, false}
+	chunks := [][]array.Interface{
+		{
+			extArray(mem, p1Type, []int32{1, -1, 2, 3, -1}, mask),
+			extArray(mem, p2Type, []int32{2, -1, 3, 4, -1}, mask),
+			extArray(mem, p3Type, []int32{5, -1, 6, 7, 8}, mask),
+			extArray(mem, p4Type, []int32{5, -1, 7, 9, -1}, mask),
+			extArray(mem, p5Type, [][]array.Interface{
+				{
+					arrayOf(mem, []int64{1, -1, 2, 3, -1}, mask),
+					arrayOf(mem, []float64{0.1, -1, 0.2, 0.3, -1}, mask),
+				},
+			}, mask),
+			arrayOf(mem, []int8{-1, -2, -3, -4, -5}, mask),
+		},
+		{
+			extArray(mem, p1Type, []int32{10, -1, 20, 30, -1}, mask),
+			extArray(mem, p2Type, []int32{20, -1, 30, 40, -1}, mask),
+			extArray(mem, p3Type, []int32{50, -1, 60, 70, 8}, mask),
+			extArray(mem, p4Type, []int32{50, -1, 70, 90, -1}, mask),
+			extArray(mem, p5Type, [][]array.Interface{
+				{
+					arrayOf(mem, []int64{10, -1, 20, 30, -1}, mask),
+					arrayOf(mem, []float64{0.01, -1, 0.02, 0.03, -1}, mask),
+				},
+			}, mask),
+			arrayOf(mem, []int8{-11, -12, -13, -14, -15}, mask),
+		},
+	}
+
+	defer func() {
+		for _, chunk := range chunks {
+			for _, col := range chunk {
+				col.Release()
+			}
+		}
+	}()
+
+	recs := make([]array.Record, len(chunks))
+	for i, chunk := range chunks {
+		recs[i] = array.NewRecord(schema, chunk, -1)
+	}
+
+	return recs
+}
+
+func extArray(mem memory.Allocator, dt arrow.ExtensionType, a interface{}, valids []bool) array.Interface {
+	var storage array.Interface
+	switch st := dt.StorageType().(type) {
+	case *arrow.StructType:
+		storage = structOf(mem, st, a.([][]array.Interface), valids)
+	case *arrow.MapType:
+		storage = mapOf(mem, false, a.([]array.Interface), valids)
+	case *arrow.ListType:
+		storage = listOf(mem, a.([]array.Interface), valids)
+	default:
+		storage = arrayOf(mem, a, valids)
+	}
+	defer storage.Release()
+
+	return array.NewExtensionArrayWithStorage(dt, storage)
+}
+
 func arrayOf(mem memory.Allocator, a interface{}, valids []bool) array.Interface {
 	if mem == nil {
 		mem = memory.NewGoAllocator()
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index 731e78a5397..57e7cb9f084 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -19,6 +19,7 @@
 package arrjson // import "github.com/apache/arrow/go/arrow/internal/arrjson"
 
 import (
+	"bytes"
 	"encoding/hex"
 	"encoding/json"
 	"math/big"
@@ -29,12 +30,57 @@ import (
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/decimal128"
 	"github.com/apache/arrow/go/arrow/float16"
+	"github.com/apache/arrow/go/arrow/ipc"
 	"github.com/apache/arrow/go/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
 type Schema struct {
-	Fields []FieldWrapper `json:"fields"`
+	Fields    []FieldWrapper `json:"fields"`
+	arrowMeta arrow.Metadata `json:"-"`
+	Metadata  []metaKV       `json:"metadata,omitempty"`
+}
+
+func (s Schema) MarshalJSON() ([]byte, error) {
+	if s.arrowMeta.Len() > 0 {
+		s.Metadata = make([]metaKV, 0, s.arrowMeta.Len())
+		keys := s.arrowMeta.Keys()
+		vals := s.arrowMeta.Values()
+		for i := range keys {
+			s.Metadata = append(s.Metadata, metaKV{Key: keys[i], Value: vals[i]})
+		}
+	}
+	type alias Schema
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	enc.SetEscapeHTML(false)
+	err := enc.Encode(alias(s))
+	return buf.Bytes(), err
+}
+
+func (s *Schema) UnmarshalJSON(data []byte) error {
+	type Alias Schema
+	aux := &struct {
+		*Alias
+	}{Alias: (*Alias)(s)}
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	var (
+		mdkeys = make([]string, 0)
+		mdvals = make([]string, 0)
+	)
+
+	for _, kv := range s.Metadata {
+		mdkeys = append(mdkeys, kv.Key)
+		mdvals = append(mdvals, kv.Value)
+	}
+
+	if len(s.Metadata) > 0 {
+		s.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+	}
+	return nil
 }
 
 // FieldWrapper gets used in order to hook into the JSON marshalling and
@@ -52,12 +98,37 @@ type Field struct {
 	// leave this as a json RawMessage in order to partially unmarshal as needed
 	// during marshal/unmarshal time so we can determine what the structure is
 	// actually expected to be.
-	Type     json.RawMessage `json:"type"`
-	Nullable bool            `json:"nullable"`
-	Children []FieldWrapper  `json:"children"`
+	Type      json.RawMessage `json:"type"`
+	Nullable  bool            `json:"nullable"`
+	Children  []FieldWrapper  `json:"children"`
+	arrowMeta arrow.Metadata  `json:"-"`
+	Metadata  []metaKV        `json:"metadata,omitempty"`
+}
+
+type metaKV struct {
+	Key   string `json:"key"`
+	Value string `json:"value"`
 }
 
 func (f FieldWrapper) MarshalJSON() ([]byte, error) {
+	// for extension types, add the extension type metadata appropriately
+	// and then marshal as normal for the storage type.
+	if f.arrowType.ID() == arrow.EXTENSION {
+		exType := f.arrowType.(arrow.ExtensionType)
+
+		mdkeys := append(f.arrowMeta.Keys(), ipc.ExtensionTypeKeyName)
+		mdvals := append(f.arrowMeta.Values(), exType.ExtensionName())
+
+		serializedData := exType.Serialize()
+		if len(serializedData) > 0 {
+			mdkeys = append(mdkeys, ipc.ExtensionMetadataKeyName)
+			mdvals = append(mdvals, string(serializedData))
+		}
+
+		f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+		f.arrowType = exType.StorageType()
+	}
+
 	var typ interface{}
 	switch dt := f.arrowType.(type) {
 	case *arrow.NullType:
@@ -154,7 +225,20 @@ func (f FieldWrapper) MarshalJSON() ([]byte, error) {
 	if f.Type, err = json.Marshal(typ); err != nil {
 		return nil, err
 	}
-	return json.Marshal(f.Field)
+
+	// if we have metadata then add the key/value pairs to the json
+	if f.arrowMeta.Len() > 0 {
+		f.Metadata = make([]metaKV, 0, f.arrowMeta.Len())
+		for i := 0; i < f.arrowMeta.Len(); i++ {
+			f.Metadata = append(f.Metadata, metaKV{Key: f.arrowMeta.Keys()[i], Value: f.arrowMeta.Values()[i]})
+		}
+	}
+
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	enc.SetEscapeHTML(false)
+	err = enc.Encode(f.Field)
+	return buf.Bytes(), err
 }
 
 func (f *FieldWrapper) UnmarshalJSON(data []byte) error {
@@ -268,6 +352,8 @@ func (f *FieldWrapper) UnmarshalJSON(data []byte) error {
 		}
 	case "list":
 		f.arrowType = arrow.ListOf(f.Children[0].arrowType)
+		f.arrowType.(*arrow.ListType).Meta = f.Children[0].arrowMeta
+
 	case "map":
 		t := mapJSON{}
 		if err := json.Unmarshal(f.Type, &t); err != nil {
@@ -323,10 +409,71 @@ func (f *FieldWrapper) UnmarshalJSON(data []byte) error {
 		}
 		f.arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)}
 	}
-	if f.arrowType != nil {
-		return nil
+
+	if f.arrowType == nil {
+		return xerrors.Errorf("unhandled type unmarshalling from json: %s", tmp.Name)
+	}
+
+	var err error
+	if len(f.Metadata) > 0 { // unmarshal the key/value metadata pairs
+		var (
+			mdkeys         = make([]string, 0, len(f.Metadata))
+			mdvals         = make([]string, 0, len(f.Metadata))
+			extKeyIdx  int = -1
+			extDataIdx int = -1
+		)
+
+		for i, kv := range f.Metadata {
+			switch kv.Key {
+			case ipc.ExtensionTypeKeyName:
+				extKeyIdx = i
+			case ipc.ExtensionMetadataKeyName:
+				extDataIdx = i
+			}
+			mdkeys = append(mdkeys, kv.Key)
+			mdvals = append(mdvals, kv.Value)
+		}
+
+		if extKeyIdx == -1 { // no extension metadata just create the metadata
+			f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+			return nil
+		}
+
+		extType := arrow.GetExtensionType(mdvals[extKeyIdx])
+		if extType == nil { // unregistered extension type, just keep the metadata
+			f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
+			return nil
+		}
+
+		var extData string
+		if extDataIdx > -1 {
+			extData = mdvals[extDataIdx]
+			// if both extension type and extension type metadata exist
+			// filter out both keys
+			newkeys := make([]string, 0, len(mdkeys)-2)
+			newvals := make([]string, 0, len(mdvals)-2)
+			for i := range mdkeys {
+				if i != extKeyIdx && i != extDataIdx {
+					newkeys = append(newkeys, mdkeys[i])
+					newvals = append(newvals, mdvals[i])
+				}
+			}
+			mdkeys = newkeys
+			mdvals = newvals
+		} else {
+			// if only extension type key is present, we can simplify filtering it out
+			mdkeys = append(mdkeys[:extKeyIdx], mdkeys[extKeyIdx+1:]...)
+			mdvals = append(mdvals[:extKeyIdx], mdvals[extKeyIdx+1:]...)
+		}
+
+		if f.arrowType, err = extType.Deserialize(f.arrowType, extData); err != nil {
+			return err
+		}
+
+		f.arrowMeta = arrow.NewMetadata(mdkeys, mdvals)
 	}
-	return xerrors.Errorf("unhandled type unmarshalling from json: %s", tmp.Name)
+
+	return err
 }
 
 // the structs below represent various configurations of the Type
@@ -380,12 +527,13 @@ type mapJSON struct {
 
 func schemaToJSON(schema *arrow.Schema) Schema {
 	return Schema{
-		Fields: fieldsToJSON(schema.Fields()),
+		Fields:    fieldsToJSON(schema.Fields()),
+		arrowMeta: schema.Metadata(),
 	}
 }
 
 func schemaFromJSON(schema Schema) *arrow.Schema {
-	return arrow.NewSchema(fieldsFromJSON(schema.Fields), nil)
+	return arrow.NewSchema(fieldsFromJSON(schema.Fields), &schema.arrowMeta)
 }
 
 func fieldsToJSON(fields []arrow.Field) []FieldWrapper {
@@ -396,10 +544,11 @@ func fieldsToJSON(fields []arrow.Field) []FieldWrapper {
 			arrowType: f.Type,
 			Nullable:  f.Nullable,
 			Children:  []FieldWrapper{},
+			arrowMeta: f.Metadata,
 		}}
 		switch dt := f.Type.(type) {
 		case *arrow.ListType:
-			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable}})
+			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable, Metadata: dt.Meta}})
 		case *arrow.FixedSizeListType:
 			o[i].Children = fieldsToJSON([]arrow.Field{{Name: "item", Type: dt.Elem(), Nullable: f.Nullable}})
 		case *arrow.StructType:
@@ -424,6 +573,7 @@ func fieldFromJSON(f Field) arrow.Field {
 		Name:     f.Name,
 		Type:     f.arrowType,
 		Nullable: f.Nullable,
+		Metadata: f.arrowMeta,
 	}
 }
 
@@ -760,6 +910,11 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) array.Int
 		bldr.AppendValues(data, valids)
 		return bldr.NewArray()
 
+	case arrow.ExtensionType:
+		storage := arrayFromJSON(mem, dt.StorageType(), arr)
+		defer storage.Release()
+		return array.NewExtensionArrayWithStorage(dt, storage)
+
 	default:
 		panic(xerrors.Errorf("unknown data type %v %T", dt, dt))
 	}
@@ -1021,6 +1176,9 @@ func arrayToJSON(field arrow.Field, arr array.Interface) Array {
 			Valids: validsToJSON(arr),
 		}
 
+	case array.ExtensionArray:
+		return arrayToJSON(field, arr.Storage())
+
 	default:
 		panic(xerrors.Errorf("unknown array type %T", arr))
 	}
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index bace8324a53..6ffbfa98fca 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -20,6 +20,7 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
+	"strings"
 	"testing"
 
 	"github.com/apache/arrow/go/arrow/array"
@@ -41,6 +42,7 @@ func TestReadWrite(t *testing.T) {
 	wantJSONs["durations"] = makeDurationsWantJSONs()
 	wantJSONs["decimal128"] = makeDecimal128sWantJSONs()
 	wantJSONs["maps"] = makeMapsWantJSONs()
+	wantJSONs["extension"] = makeExtensionsWantJSONs()
 
 	tempDir, err := ioutil.TempDir("", "go-arrow-read-write-")
 	if err != nil {
@@ -83,7 +85,7 @@ func TestReadWrite(t *testing.T) {
 			}
 
 			fileBytes, _ := ioutil.ReadFile(f.Name())
-			if wantJSONs[name] != string(fileBytes) {
+			if wantJSONs[name] != strings.TrimSpace(string(fileBytes)) {
 				t.Fatalf("not expected JSON pretty output for case: %v", name)
 			}
 
@@ -145,6 +147,20 @@ func makeNullWantJSONs() string {
         "nullable": true,
         "children": []
       }
+    ],
+    "metadata": [
+      {
+        "key": "k1",
+        "value": "v1"
+      },
+      {
+        "key": "k2",
+        "value": "v2"
+      },
+      {
+        "key": "k3",
+        "value": "v3"
+      }
     ]
   },
   "batches": [
@@ -285,6 +301,20 @@ func makePrimitiveWantJSONs() string {
         "nullable": true,
         "children": []
       }
+    ],
+    "metadata": [
+      {
+        "key": "k1",
+        "value": "v1"
+      },
+      {
+        "key": "k2",
+        "value": "v2"
+      },
+      {
+        "key": "k3",
+        "value": "v3"
+      }
     ]
   },
   "batches": [
@@ -3822,3 +3852,468 @@ func makeMapsWantJSONs() string {
   ]
 }`
 }
+
+func makeExtensionsWantJSONs() string {
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "p1",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-1"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u0006\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p2",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-1"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u000c\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p3",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-2<param=2>"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u0002\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p4",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 32
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "parametric-type-2<param=3>"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "\u0003\u0000\u0000\u0000"
+          }
+        ]
+      },
+      {
+        "name": "p5",
+        "type": {
+          "name": "struct"
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "ext-struct-type"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": "ext-struct-type-unique-code"
+          }
+        ]
+      },
+      {
+        "name": "unreg",
+        "type": {
+          "name": "int",
+          "isSigned": true,
+          "bitWidth": 8
+        },
+        "nullable": true,
+        "children": [],
+        "metadata": [
+          {
+            "key": "k1",
+            "value": "v1"
+          },
+          {
+            "key": "k2",
+            "value": "v2"
+          },
+          {
+            "key": "ARROW:extension:name",
+            "value": "unregistered"
+          },
+          {
+            "key": "ARROW:extension:metadata",
+            "value": ""
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "p1",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            1,
+            -1,
+            2,
+            3,
+            -1
+          ]
+        },
+        {
+          "name": "p2",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            2,
+            -1,
+            3,
+            4,
+            -1
+          ]
+        },
+        {
+          "name": "p3",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            5,
+            -1,
+            6,
+            7,
+            8
+          ]
+        },
+        {
+          "name": "p4",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            5,
+            -1,
+            7,
+            9,
+            -1
+          ]
+        },
+        {
+          "name": "p5",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "a",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                "1",
+                "0",
+                "2",
+                "3",
+                "0"
+              ]
+            },
+            {
+              "name": "b",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                0.1,
+                0,
+                0.2,
+                0.3,
+                0
+              ]
+            }
+          ]
+        },
+        {
+          "name": "unreg",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            -1,
+            -2,
+            -3,
+            -4,
+            -5
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "p1",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            10,
+            -1,
+            20,
+            30,
+            -1
+          ]
+        },
+        {
+          "name": "p2",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            20,
+            -1,
+            30,
+            40,
+            -1
+          ]
+        },
+        {
+          "name": "p3",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            50,
+            -1,
+            60,
+            70,
+            8
+          ]
+        },
+        {
+          "name": "p4",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            50,
+            -1,
+            70,
+            90,
+            -1
+          ]
+        },
+        {
+          "name": "p5",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "a",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                "10",
+                "0",
+                "20",
+                "30",
+                "0"
+              ]
+            },
+            {
+              "name": "b",
+              "count": 5,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                0
+              ],
+              "DATA": [
+                0.01,
+                0,
+                0.02,
+                0.03,
+                0
+              ]
+            }
+          ]
+        },
+        {
+          "name": "unreg",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            1,
+            1,
+            0
+          ],
+          "DATA": [
+            -11,
+            -12,
+            -13,
+            -14,
+            -15
+          ]
+        }
+      ]
+    }
+  ]
+}`
+}
diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go
index cb953286db3..34c49c14f43 100644
--- a/go/arrow/internal/arrjson/reader.go
+++ b/go/arrow/internal/arrjson/reader.go
@@ -39,10 +39,7 @@ type Reader struct {
 func NewReader(r io.Reader, opts ...Option) (*Reader, error) {
 	dec := json.NewDecoder(r)
 	dec.UseNumber()
-	var raw struct {
-		Schema  Schema   `json:"schema"`
-		Records []Record `json:"batches"`
-	}
+	var raw rawJSON
 	err := dec.Decode(&raw)
 	if err != nil {
 		return nil, err
diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go
index 72fab46bc48..f5dd00c8f4c 100644
--- a/go/arrow/internal/arrjson/writer.go
+++ b/go/arrow/internal/arrjson/writer.go
@@ -31,84 +31,45 @@ const (
 	jsonRecPrefix = "    "
 )
 
+type rawJSON struct {
+	Schema  Schema   `json:"schema"`
+	Records []Record `json:"batches"`
+}
+
 type Writer struct {
 	w io.Writer
 
-	schema *arrow.Schema
-	nrecs  int64
+	nrecs int64
+	raw   rawJSON
 }
 
 func NewWriter(w io.Writer, schema *arrow.Schema) (*Writer, error) {
 	ww := &Writer{
-		w:      w,
-		schema: schema,
-	}
-	_, err := ww.w.Write([]byte("{\n"))
-	if err != nil {
-		return nil, err
-	}
-
-	err = ww.writeSchema()
-	if err != nil {
-		return nil, err
+		w: w,
 	}
+	ww.raw.Schema = schemaToJSON(schema)
+	ww.raw.Records = make([]Record, 0)
 	return ww, nil
 }
 
 func (w *Writer) Write(rec array.Record) error {
-	switch {
-	case w.nrecs == 0:
-		_, err := w.w.Write([]byte(",\n" + jsonPrefix + `"batches": [` + "\n" + jsonRecPrefix))
-		if err != nil {
-			return err
-		}
-	case w.nrecs > 0:
-		_, err := w.w.Write([]byte(",\n" + jsonRecPrefix))
-		if err != nil {
-			return err
-		}
-	}
-
-	raw, err := json.MarshalIndent(recordToJSON(rec), jsonRecPrefix, jsonIndent)
-	if err != nil {
-		return err
-	}
-
-	_, err = w.w.Write(raw)
-	if err != nil {
-		return err
-	}
-
+	w.raw.Records = append(w.raw.Records, recordToJSON(rec))
 	w.nrecs++
 	return nil
 }
 
-func (w *Writer) writeSchema() error {
-	_, err := w.w.Write([]byte(`  "schema": `))
-	if err != nil {
-		return err
-	}
-	raw, err := json.MarshalIndent(schemaToJSON(w.schema), jsonPrefix, jsonIndent)
-	if err != nil {
-		return err
-	}
-	_, err = w.w.Write(raw)
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
 func (w *Writer) Close() error {
 	if w.w == nil {
 		return nil
 	}
-	_, err := w.w.Write([]byte("\n  ]\n}"))
-	if err == nil {
-		w.w = nil
-	}
-	return err
+
+	enc := json.NewEncoder(w.w)
+	enc.SetIndent("", jsonIndent)
+	// ensure that we don't convert <, >, !, etc. to their unicode equivalents
+	// in the output json since we're not using this in an HTML context so that
+	// we can make sure that the json files match.
+	enc.SetEscapeHTML(false)
+	return enc.Encode(w.raw)
 }
 
 var (
diff --git a/go/arrow/internal/testing/types/extension_types.go b/go/arrow/internal/testing/types/extension_types.go
new file mode 100644
index 00000000000..bb0f984a8e1
--- /dev/null
+++ b/go/arrow/internal/testing/types/extension_types.go
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package types contains user-defined types for use in the tests for the arrow package
+package types
+
+import (
+	"encoding/binary"
+	"fmt"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"golang.org/x/xerrors"
+)
+
+// UUIDArray is a simple array which is a FixedSizeBinary(16)
+type UUIDArray struct {
+	array.ExtensionArrayBase
+}
+
+// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
+// to be used for representing UUIDs
+type UUIDType struct {
+	arrow.ExtensionBase
+}
+
+// NewUUIDType is a convenience function to create an instance of UuidType
+// with the correct storage type
+func NewUUIDType() *UUIDType {
+	return &UUIDType{
+		ExtensionBase: arrow.ExtensionBase{
+			Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
+}
+
+// ArrayType returns TypeOf(UuidArray) for constructing uuid arrays
+func (UUIDType) ArrayType() reflect.Type { return reflect.TypeOf(UUIDArray{}) }
+
+func (UUIDType) ExtensionName() string { return "uuid" }
+
+// Serialize returns "uuid-serialized" for testing proper metadata passing
+func (UUIDType) Serialize() string { return "uuid-serialized" }
+
+// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} and the data to be
+// "uuid-serialized" in order to correctly create a UuidType for testing deserialize.
+func (UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if string(data) != "uuid-serialized" {
+		return nil, xerrors.Errorf("type identifier did not match: '%s'", string(data))
+	}
+	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
+		return nil, xerrors.Errorf("invalid storage type for UuidType: %s", storageType.Name())
+	}
+	return NewUUIDType(), nil
+}
+
+// UuidTypes are equal if both are named "uuid"
+func (u UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return u.ExtensionName() == other.ExtensionName()
+}
+
+// Parametric1Array is a simple int32 array for use with the Parametric1Type
+// in testing a parameterized user-defined extension type.
+type Parametric1Array struct {
+	array.ExtensionArrayBase
+}
+
+// Parametric2Array is another simple int32 array for use with the Parametric2Type
+// also for testing a parameterized user-defined extension type that utilizes
+// the parameter for defining different types based on the param.
+type Parametric2Array struct {
+	array.ExtensionArrayBase
+}
+
+// A type where ExtensionName is always the same
+type Parametric1Type struct {
+	arrow.ExtensionBase
+
+	param int32
+}
+
+func NewParametric1Type(p int32) *Parametric1Type {
+	ret := &Parametric1Type{param: p}
+	ret.ExtensionBase.Storage = arrow.PrimitiveTypes.Int32
+	return ret
+}
+
+func (p *Parametric1Type) String() string { return "extension<" + p.ExtensionName() + ">" }
+
+// ExtensionEquals returns true if other is a *Parametric1Type and has the same param
+func (p *Parametric1Type) ExtensionEquals(other arrow.ExtensionType) bool {
+	o, ok := other.(*Parametric1Type)
+	if !ok {
+		return false
+	}
+	return p.param == o.param
+}
+
+// ExtensionName is always "parametric-type-1"
+func (Parametric1Type) ExtensionName() string { return "parametric-type-1" }
+
+// ArrayType returns the TypeOf(Parametric1Array{})
+func (Parametric1Type) ArrayType() reflect.Type { return reflect.TypeOf(Parametric1Array{}) }
+
+// Serialize returns the param as 4 little endian bytes
+func (p *Parametric1Type) Serialize() string {
+	var buf [4]byte
+	binary.LittleEndian.PutUint32(buf[:], uint32(p.param))
+	return string(buf[:])
+}
+
+// Deserialize requires storage to be an int32 type and data should be a 4 byte little endian int32 value
+func (Parametric1Type) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if len(data) != 4 {
+		return nil, xerrors.Errorf("parametric1type: invalid serialized data size: %d", len(data))
+	}
+
+	if storage.ID() != arrow.INT32 {
+		return nil, xerrors.New("parametric1type: must have int32 as underlying storage type")
+	}
+
+	return &Parametric1Type{arrow.ExtensionBase{Storage: arrow.PrimitiveTypes.Int32}, int32(binary.LittleEndian.Uint32([]byte(data)))}, nil
+}
+
+// a parametric type where the extension name is different for each
+// parameter, and must be registered separately
+type Parametric2Type struct {
+	arrow.ExtensionBase
+
+	param int32
+}
+
+func NewParametric2Type(p int32) *Parametric2Type {
+	ret := &Parametric2Type{param: p}
+	ret.ExtensionBase.Storage = arrow.PrimitiveTypes.Int32
+	return ret
+}
+
+func (p *Parametric2Type) String() string { return "extension<" + p.ExtensionName() + ">" }
+
+// ExtensionEquals returns true if other is a *Parametric2Type and has the same param
+func (p *Parametric2Type) ExtensionEquals(other arrow.ExtensionType) bool {
+	o, ok := other.(*Parametric2Type)
+	if !ok {
+		return false
+	}
+	return p.param == o.param
+}
+
+// ExtensionName incorporates the param in the name requiring different instances of
+// Parametric2Type to be registered separately if they have different params. this is
+// used for testing registration of different types with the same struct type.
+func (p *Parametric2Type) ExtensionName() string {
+	return fmt.Sprintf("parametric-type-2<param=%d>", p.param)
+}
+
+// ArrayType returns TypeOf(Parametric2Array{})
+func (Parametric2Type) ArrayType() reflect.Type { return reflect.TypeOf(Parametric2Array{}) }
+
+// Serialize returns the param as a 4 byte little endian slice
+func (p *Parametric2Type) Serialize() string {
+	var buf [4]byte
+	binary.LittleEndian.PutUint32(buf[:], uint32(p.param))
+	return string(buf[:])
+}
+
+// Deserialize expects storage to be int32 type and data must be a 4 byte little endian slice.
+func (Parametric2Type) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if len(data) != 4 {
+		return nil, xerrors.Errorf("parametric1type: invalid serialized data size: %d", len(data))
+	}
+
+	if storage.ID() != arrow.INT32 {
+		return nil, xerrors.New("parametric1type: must have int32 as underlying storage type")
+	}
+
+	return &Parametric2Type{arrow.ExtensionBase{Storage: arrow.PrimitiveTypes.Int32}, int32(binary.LittleEndian.Uint32([]byte(data)))}, nil
+}
+
+// ExtStructArray is a struct array type for testing an extension type with non-primitive storage
+type ExtStructArray struct {
+	array.ExtensionArrayBase
+}
+
+// ExtStructType is an extension type with a non-primitive storage type containing a struct
+// with fields {a: int64, b: float64}
+type ExtStructType struct {
+	arrow.ExtensionBase
+}
+
+func NewExtStructType() *ExtStructType {
+	return &ExtStructType{
+		ExtensionBase: arrow.ExtensionBase{Storage: arrow.StructOf(
+			arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int64},
+			arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64},
+		)},
+	}
+}
+
+func (p *ExtStructType) String() string { return "extension<" + p.ExtensionName() + ">" }
+
+// ExtensionName is always "ext-struct-type"
+func (ExtStructType) ExtensionName() string { return "ext-struct-type" }
+
+// ExtensionEquals returns true if other is a *ExtStructType
+func (ExtStructType) ExtensionEquals(other arrow.ExtensionType) bool {
+	_, ok := other.(*ExtStructType)
+	return ok
+}
+
+// ArrayType returns TypeOf(ExtStructType{})
+func (ExtStructType) ArrayType() reflect.Type { return reflect.TypeOf(ExtStructArray{}) }
+
+// Serialize just returns "ext-struct-type-unique-code" to test metadata passing in IPC
+func (ExtStructType) Serialize() string { return "ext-struct-type-unique-code" }
+
+// Deserialize ignores the passed in storage datatype and only checks the serialized data byte slice
+// returning the correct type if it matches "ext-struct-type-unique-code".
+func (ExtStructType) Deserialize(_ arrow.DataType, serialized string) (arrow.ExtensionType, error) {
+	if string(serialized) != "ext-struct-type-unique-code" {
+		return nil, xerrors.New("type identifier did not match")
+	}
+	return NewExtStructType(), nil
+}
+
+var (
+	_ arrow.ExtensionType  = (*UUIDType)(nil)
+	_ arrow.ExtensionType  = (*Parametric1Type)(nil)
+	_ arrow.ExtensionType  = (*Parametric2Type)(nil)
+	_ arrow.ExtensionType  = (*ExtStructType)(nil)
+	_ array.ExtensionArray = (*UUIDArray)(nil)
+	_ array.ExtensionArray = (*Parametric1Array)(nil)
+	_ array.ExtensionArray = (*Parametric2Array)(nil)
+	_ array.ExtensionArray = (*ExtStructArray)(nil)
+)
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
index 79662f76dec..90347b1618f 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
@@ -21,9 +21,11 @@ import (
 	"log"
 	"os"
 
+	"github.com/apache/arrow/go/arrow"
 	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/arrio"
 	"github.com/apache/arrow/go/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
 	"github.com/apache/arrow/go/arrow/ipc"
 	"golang.org/x/xerrors"
 )
@@ -48,6 +50,8 @@ func main() {
 }
 
 func runCommand(jsonName, arrowName, mode string, verbose bool) error {
+	arrow.RegisterExtensionType(types.NewUUIDType())
+
 	if jsonName == "" {
 		return xerrors.Errorf("must specify json file name")
 	}
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 66bd58ab0bb..04d736fb227 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -452,6 +452,11 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 	case *arrow.MapType:
 		return ctx.loadMap(dt)
 
+	case arrow.ExtensionType:
+		storage := ctx.loadArray(dt.StorageType())
+		defer storage.Release()
+		return array.NewExtensionArrayWithStorage(dt, storage)
+
 	default:
 		panic(xerrors.Errorf("array type %T not handled yet", dt))
 	}
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index 373e705e3e3..ccc231d17bf 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -35,8 +35,10 @@ const (
 	currentMetadataVersion = MetadataV5
 	minMetadataVersion     = MetadataV4
 
-	kExtensionTypeKeyName = "arrow_extension_name"
-	kExtensionDataKeyName = "arrow_extension_data"
+	// constants for the extension type metadata keys for the type name and
+	// any extension metadata to be passed to deserialize.
+	ExtensionTypeKeyName     = "ARROW:extension:name"
+	ExtensionMetadataKeyName = "ARROW:extension:metadata"
 
 	// ARROW-109: We set this number arbitrarily to help catch user mistakes. For
 	// deeply nested schemas, it is expected the user will indicate explicitly the
@@ -187,7 +189,7 @@ func fieldFromFB(field *flatbuf.Field, memo *dictMemo) (arrow.Field, error) {
 			children[i] = child
 		}
 
-		o.Type, err = typeFromFB(field, children, o.Metadata)
+		o.Type, err = typeFromFB(field, children, &o.Metadata)
 		if err != nil {
 			return o, xerrors.Errorf("arrow/ipc: could not convert field type: %w", err)
 		}
@@ -345,7 +347,7 @@ func (fv *fieldVisitor) visit(field arrow.Field) {
 
 	case *arrow.ListType:
 		fv.dtype = flatbuf.TypeList
-		fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "item", Type: dt.Elem(), Nullable: field.Nullable}, fv.memo))
+		fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "item", Type: dt.Elem(), Nullable: field.Nullable, Metadata: dt.Meta}, fv.memo))
 		flatbuf.ListStart(fv.b)
 		fv.offset = flatbuf.ListEnd(fv.b)
 
@@ -382,6 +384,12 @@ func (fv *fieldVisitor) visit(field arrow.Field) {
 		flatbuf.MapAddKeysSorted(fv.b, dt.KeysSorted)
 		fv.offset = flatbuf.MapEnd(fv.b)
 
+	case arrow.ExtensionType:
+		field.Type = dt.StorageType()
+		fv.visit(field)
+		fv.meta[ExtensionTypeKeyName] = dt.ExtensionName()
+		fv.meta[ExtensionMetadataKeyName] = string(dt.Serialize())
+
 	default:
 		err := xerrors.Errorf("arrow/ipc: invalid data type %v", dt)
 		panic(err) // FIXME(sbinet): implement all data-types.
@@ -484,7 +492,7 @@ func fieldFromFBDict(field *flatbuf.Field) (arrow.Field, error) {
 		return o, xerrors.Errorf("arrow/ipc: metadata for field from dict: %w", err)
 	}
 
-	o.Type, err = typeFromFB(field, kids, meta)
+	o.Type, err = typeFromFB(field, kids, &meta)
 	if err != nil {
 		return o, xerrors.Errorf("arrow/ipc: type for field from dict: %w", err)
 	}
@@ -492,7 +500,7 @@ func fieldFromFBDict(field *flatbuf.Field) (arrow.Field, error) {
 	return o, nil
 }
 
-func typeFromFB(field *flatbuf.Field, children []arrow.Field, md arrow.Metadata) (arrow.DataType, error) {
+func typeFromFB(field *flatbuf.Field, children []arrow.Field, md *arrow.Metadata) (arrow.DataType, error) {
 	var data flatbuffers.Table
 	if !field.Type(&data) {
 		return nil, xerrors.Errorf("arrow/ipc: could not load field type data")
@@ -505,12 +513,52 @@ func typeFromFB(field *flatbuf.Field, children []arrow.Field, md arrow.Metadata)
 
 	// look for extension metadata in custom metadata field.
 	if md.Len() > 0 {
-		i := md.FindKey(kExtensionTypeKeyName)
+		i := md.FindKey(ExtensionTypeKeyName)
 		if i < 0 {
 			return dt, err
 		}
 
-		panic("not implemented") // FIXME(sbinet)
+		extType := arrow.GetExtensionType(md.Values()[i])
+		if extType == nil {
+			// if the extension type is unknown, we do not error here.
+			// simply return the storage type.
+			return dt, err
+		}
+
+		var (
+			data    string
+			dataIdx int
+		)
+
+		if dataIdx = md.FindKey(ExtensionMetadataKeyName); dataIdx >= 0 {
+			data = md.Values()[dataIdx]
+		}
+
+		dt, err = extType.Deserialize(dt, data)
+		if err != nil {
+			return dt, err
+		}
+
+		mdkeys := md.Keys()
+		mdvals := md.Values()
+		if dataIdx < 0 {
+			// if there was no extension metadata, just the name, we only have to
+			// remove the extension name metadata key/value to ensure roundtrip
+			// metadata consistency
+			*md = arrow.NewMetadata(append(mdkeys[:i], mdkeys[i+1:]...), append(mdvals[:i], mdvals[i+1:]...))
+		} else {
+			// if there was extension metadata, we need to remove both the type name
+			// and the extension metadata keys and values.
+			newkeys := make([]string, 0, md.Len()-2)
+			newvals := make([]string, 0, md.Len()-2)
+			for j := range mdkeys {
+				if j != i && j != dataIdx { // copy everything except the extension metadata keys/values
+					newkeys = append(newkeys, mdkeys[j])
+					newvals = append(newvals, mdvals[j])
+				}
+			}
+			*md = arrow.NewMetadata(newkeys, newvals)
+		}
 	}
 
 	return dt, err
@@ -557,7 +605,9 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 		if len(children) != 1 {
 			return nil, xerrors.Errorf("arrow/ipc: List must have exactly 1 child field (got=%d)", len(children))
 		}
-		return arrow.ListOf(children[0].Type), nil
+		dt := arrow.ListOf(children[0].Type)
+		dt.Meta = children[0].Metadata
+		return dt, nil
 
 	case flatbuf.TypeFixedSizeList:
 		var dt flatbuf.FixedSizeList
diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go
index 974267239e4..a1408ae1ecf 100644
--- a/go/arrow/ipc/metadata_test.go
+++ b/go/arrow/ipc/metadata_test.go
@@ -22,8 +22,12 @@ import (
 	"testing"
 
 	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
 	"github.com/apache/arrow/go/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/memory"
 	flatbuffers "github.com/google/flatbuffers/go"
+	"github.com/stretchr/testify/assert"
 )
 
 func TestRWSchema(t *testing.T) {
@@ -157,3 +161,62 @@ func TestRWFooter(t *testing.T) {
 		})
 	}
 }
+
+func exampleUUID(mem memory.Allocator) array.Interface {
+	extType := types.NewUUIDType()
+	bldr := array.NewExtensionBuilder(mem, extType)
+	defer bldr.Release()
+
+	bldr.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(
+		[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+		[]bool{false, true, true, true})
+
+	return bldr.NewArray()
+}
+
+func TestUnrecognizedExtensionType(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	// register the uuid type
+	assert.NoError(t, arrow.RegisterExtensionType(types.NewUUIDType()))
+
+	extArr := exampleUUID(pool)
+	defer extArr.Release()
+
+	batch := array.NewRecord(
+		arrow.NewSchema([]arrow.Field{
+			{Name: "f0", Type: extArr.DataType(), Nullable: true}}, nil),
+		[]array.Interface{extArr}, 4)
+	defer batch.Release()
+
+	storageArr := extArr.(array.ExtensionArray).Storage()
+
+	var buf bytes.Buffer
+	wr := NewWriter(&buf, WithAllocator(pool), WithSchema(batch.Schema()))
+	assert.NoError(t, wr.Write(batch))
+	wr.Close()
+
+	// unregister the uuid type before we read back the buffer so it is
+	// unrecognized when reading back the record batch.
+	assert.NoError(t, arrow.UnregisterExtensionType("uuid"))
+	rdr, err := NewReader(&buf, WithAllocator(pool))
+	defer rdr.Release()
+
+	assert.NoError(t, err)
+	assert.True(t, rdr.Next())
+
+	rec := rdr.Record()
+	assert.NotNil(t, rec)
+
+	// create a record batch with the same data, but the field should contain the
+	// extension metadata and be of the storage type instead of being the extension type.
+	extMetadata := arrow.NewMetadata([]string{ExtensionTypeKeyName, ExtensionMetadataKeyName}, []string{"uuid", "uuid-serialized"})
+	batchNoExt := array.NewRecord(
+		arrow.NewSchema([]arrow.Field{
+			{Name: "f0", Type: storageArr.DataType(), Nullable: true, Metadata: extMetadata},
+		}, nil), []array.Interface{storageArr}, 4)
+	defer batchNoExt.Release()
+
+	assert.Truef(t, array.RecordEqual(rec, batchNoExt), "expected: %s\ngot: %s\n", batchNoExt, rec)
+}
diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go
index 42f0857b805..e63f4059e7b 100644
--- a/go/arrow/ipc/reader.go
+++ b/go/arrow/ipc/reader.go
@@ -58,10 +58,11 @@ func NewReaderFromMessageReader(r MessageReader, opts ...Option) (*Reader, error
 	}
 
 	rr := &Reader{
-		r:     r,
-		types: make(dictTypeMap),
-		memo:  newMemo(),
-		mem:   cfg.alloc,
+		r:        r,
+		refCount: 1,
+		types:    make(dictTypeMap),
+		memo:     newMemo(),
+		mem:      cfg.alloc,
 	}
 
 	err := rr.readSchema(cfg.schema)
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index 83dd7128c1f..e9c43bbe900 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -316,6 +316,15 @@ func (w *recordEncoder) visit(p *Payload, arr array.Interface) error {
 		return errBigArray
 	}
 
+	if arr.DataType().ID() == arrow.EXTENSION {
+		arr := arr.(array.ExtensionArray)
+		err := w.visit(p, arr.Storage())
+		if err != nil {
+			return xerrors.Errorf("failed visiting storage of for array %T: %w", arr, err)
+		}
+		return nil
+	}
+
 	// add all common elements
 	w.fields = append(w.fields, fieldMetadata{
 		Len:    int64(arr.Len()),
diff --git a/go/arrow/schema.go b/go/arrow/schema.go
index ab9e536e64c..f278308da81 100644
--- a/go/arrow/schema.go
+++ b/go/arrow/schema.go
@@ -104,6 +104,35 @@ func (md Metadata) clone() Metadata {
 	return o
 }
 
+func (md Metadata) sortedIndices() []int {
+	idxes := make([]int, len(md.keys))
+	for i := range idxes {
+		idxes[i] = i
+	}
+
+	sort.Slice(idxes, func(i, j int) bool {
+		return md.keys[idxes[i]] < md.keys[idxes[j]]
+	})
+	return idxes
+}
+
+func (md Metadata) Equal(rhs Metadata) bool {
+	if md.Len() != rhs.Len() {
+		return false
+	}
+
+	idxes := md.sortedIndices()
+	rhsIdxes := rhs.sortedIndices()
+	for i := range idxes {
+		j := idxes[i]
+		k := rhsIdxes[i]
+		if md.keys[j] != rhs.keys[k] || md.values[j] != rhs.values[k] {
+			return false
+		}
+	}
+	return true
+}
+
 // Schema is a sequence of Field values, describing the columns of a table or
 // a record batch.
 type Schema struct {

From 325eb073e0fb6971f3dd027299d37850377b39ea Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Thu, 13 May 2021 09:05:17 -0700
Subject: [PATCH 239/719] ARROW-12746: [Go][Flight] append instead of
 overwriting outgoing metadata

Closes #10297 from zeroshade/flight-client-metadata

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 go/arrow/flight/client_auth.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go/arrow/flight/client_auth.go b/go/arrow/flight/client_auth.go
index 5f7c151abf7..1c1e38ed7d2 100644
--- a/go/arrow/flight/client_auth.go
+++ b/go/arrow/flight/client_auth.go
@@ -65,7 +65,7 @@ func createClientAuthUnaryInterceptor(auth ClientAuthHandler) grpc.UnaryClientIn
 			return status.Errorf(codes.Unauthenticated, "error retrieving token: %s", err)
 		}
 
-		return invoker(metadata.NewOutgoingContext(ctx, metadata.Pairs(grpcAuthHeader, tok)), method, req, reply, cc, opts...)
+		return invoker(metadata.AppendToOutgoingContext(ctx, grpcAuthHeader, tok), method, req, reply, cc, opts...)
 	}
 }
 
@@ -86,6 +86,6 @@ func createClientAuthStreamInterceptor(auth ClientAuthHandler) grpc.StreamClient
 			return nil, status.Errorf(codes.Unauthenticated, "error retrieving token: %s", err)
 		}
 
-		return streamer(metadata.NewOutgoingContext(ctx, metadata.Pairs(grpcAuthHeader, tok)), desc, cc, method, opts...)
+		return streamer(metadata.AppendToOutgoingContext(ctx, grpcAuthHeader, tok), desc, cc, method, opts...)
 	}
 }

From 4b902f7320fc480bf094c1f4c737f78de2f76e6f Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 14 May 2021 09:06:34 +0900
Subject: [PATCH 240/719] ARROW-12780: [CI][C++] Install necessary packages for
 MinGW builds

libxml2 is now required to run clang on MinGW. This can be found by `strace`ing clang under MinGW; a dialog will pop up telling you the name of any missing libraries (ldd, notably, may not give you this info).

Closes #10319 from lidavidm/arrow-12780

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/msys2_setup.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh
index cb6ca30a64e..991d98bbb4a 100755
--- a/ci/scripts/msys2_setup.sh
+++ b/ci/scripts/msys2_setup.sh
@@ -35,6 +35,7 @@ case "${target}" in
     packages+=(${MINGW_PACKAGE_PREFIX}-grpc)
     packages+=(${MINGW_PACKAGE_PREFIX}-gtest)
     packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc)
+    packages+=(${MINGW_PACKAGE_PREFIX}-libxml2)
     packages+=(${MINGW_PACKAGE_PREFIX}-llvm)
     packages+=(${MINGW_PACKAGE_PREFIX}-lz4)
     packages+=(${MINGW_PACKAGE_PREFIX}-ninja)

From 02518a11ff8976b9dd63f3e3cf222c37aa1551b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 14 May 2021 13:00:34 +0200
Subject: [PATCH 241/719] ARROW-12776: [Archery][Integration] Fix decimal case
 generation in write_js_test_json
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The integration build has started to fail on master: https://github.com/apache/arrow/runs/2575265526#step:9:4265

I don't entirely understand the reason why we see this error, in order to call that function we would need to pass `--write_generated_json` to the archery command, but we don't.
The only occurrence of that option in the codebase is in the javascript [test runner](https://github.com/apache/arrow/blob/master/js/gulp/test-task.js#L97), but that seems to use the old `integration_test.py` script which have been deleted since we ported it to archery (cc @trxcllnt @domoritz).

Additionally, I'm unable to reproduce it locally since `archery integration` doesn't call `write_js_test_json` by default.

The implementation is clearly wrong though.

Closes #10314 from kszucs/integration-decimal

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/integration/runner.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 8aef1637490..6f4c1385abf 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -396,9 +396,12 @@ def write_js_test_json(directory):
     datagen.generate_nested_case().write(
         os.path.join(directory, 'nested.json')
     )
-    datagen.generate_decimal_case().write(
+    datagen.generate_decimal128_case().write(
         os.path.join(directory, 'decimal.json')
     )
+    datagen.generate_decimal256_case().write(
+        os.path.join(directory, 'decimal256.json')
+    )
     datagen.generate_datetime_case().write(
         os.path.join(directory, 'datetime.json')
     )

From dba299c04ed13438bcc4d4dc02e4170423ee2705 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 14 May 2021 15:40:43 +0200
Subject: [PATCH 242/719] ARROW-12741: [CI] Configure Crossbow GitHub Token for
 Nightly Builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This partially reverts commit dae3fcccca9bba114913b2b09564127bc0ee779e.

Closes #10311 from kszucs/ARROW-12741

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/tasks/macros.jinja | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 59a7d8588a5..38bbea4fc60 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -87,7 +87,7 @@ on:
       {% endfor %}
     {% endif %}
     env:
-      CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.GITHUB_TOKEN }}' }}
+      CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}' }}
 {% endmacro %}
 
 {%- macro github_upload_gemfury(pattern) -%}

From c5a9f5550e09fb6face43ac58aa184cf6bd14a90 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Fri, 14 May 2021 15:44:42 -0400
Subject: [PATCH 243/719] ARROW-12793: [Python] Fix support for pyarrow debug
 builds

Closes #10324 from amol-/ARROW-12793

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/cmake_modules/UseCython.cmake | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpp/cmake_modules/UseCython.cmake b/cpp/cmake_modules/UseCython.cmake
index 0d4b17d3e57..0b65a7842f9 100644
--- a/cpp/cmake_modules/UseCython.cmake
+++ b/cpp/cmake_modules/UseCython.cmake
@@ -107,8 +107,9 @@ function(compile_pyx
   endif()
 
   if(NOT WIN32)
-    if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug"
-       OR "${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo")
+    string( TOLOWER "${CMAKE_BUILD_TYPE}" build_type )
+    if("${build_type}" STREQUAL "debug"
+       OR "${build_type}" STREQUAL "relwithdebinfo")
       set(cython_debug_arg "--gdb")
     endif()
   endif()

From b01bcf2db2d493b25185be1d06223ccdd1cf33d0 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 14 May 2021 15:47:19 -0400
Subject: [PATCH 244/719] ARROW-11772: [C++] Provide reentrant IPC file reader

This provides an async-reentrant generator of record batches from an IPC file reader, intended to support Datasets once it becomes async itself.

IPC messages are read on an IO thread pool, then decoded on the CPU thread pool. All dictionaries must be read at the start, then record batches can be read independently.

Closes #9656 from lidavidm/arrow-11772

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/buffer.cc                   | 107 -------
 cpp/src/arrow/dataset/file_ipc.cc         | 104 ++++--
 cpp/src/arrow/dataset/file_ipc.h          |   8 +
 cpp/src/arrow/io/memory.cc                |   4 +-
 cpp/src/arrow/io/type_fwd.h               |   1 +
 cpp/src/arrow/ipc/message.cc              |  55 ++++
 cpp/src/arrow/ipc/message.h               |   5 +
 cpp/src/arrow/ipc/read_write_benchmark.cc | 127 ++++++--
 cpp/src/arrow/ipc/read_write_test.cc      |  78 +++--
 cpp/src/arrow/ipc/reader.cc               | 368 ++++++++++++++++++----
 cpp/src/arrow/ipc/reader.h                |  40 ++-
 cpp/src/arrow/memory_pool.cc              | 149 ++++++++-
 cpp/src/arrow/testing/future_util.h       |  11 +
 13 files changed, 801 insertions(+), 256 deletions(-)

diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 8275542c542..b1b2945d0f5 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -21,7 +21,6 @@
 #include <cstdint>
 #include <utility>
 
-#include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/util/bit_util.h"
@@ -171,112 +170,6 @@ MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_
   parent_ = parent;
 }
 
-// -----------------------------------------------------------------------
-// Pool buffer and allocation
-
-/// A Buffer whose lifetime is tied to a particular MemoryPool
-class PoolBuffer final : public ResizableBuffer {
- public:
-  explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
-      : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
-
-  ~PoolBuffer() override {
-    uint8_t* ptr = mutable_data();
-    if (ptr) {
-      pool_->Free(ptr, capacity_);
-    }
-  }
-
-  Status Reserve(const int64_t capacity) override {
-    if (capacity < 0) {
-      return Status::Invalid("Negative buffer capacity: ", capacity);
-    }
-    uint8_t* ptr = mutable_data();
-    if (!ptr || capacity > capacity_) {
-      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
-      if (ptr) {
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
-      } else {
-        RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr));
-      }
-      data_ = ptr;
-      capacity_ = new_capacity;
-    }
-    return Status::OK();
-  }
-
-  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
-    if (ARROW_PREDICT_FALSE(new_size < 0)) {
-      return Status::Invalid("Negative buffer resize: ", new_size);
-    }
-    uint8_t* ptr = mutable_data();
-    if (ptr && shrink_to_fit && new_size <= size_) {
-      // Buffer is non-null and is not growing, so shrink to the requested size without
-      // excess space.
-      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
-      if (capacity_ != new_capacity) {
-        // Buffer hasn't got yet the requested size.
-        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
-        data_ = ptr;
-        capacity_ = new_capacity;
-      }
-    } else {
-      RETURN_NOT_OK(Reserve(new_size));
-    }
-    size_ = new_size;
-
-    return Status::OK();
-  }
-
-  static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) {
-    std::shared_ptr<MemoryManager> mm;
-    if (pool == nullptr) {
-      pool = default_memory_pool();
-      mm = default_cpu_memory_manager();
-    } else {
-      mm = CPUDevice::memory_manager(pool);
-    }
-    return std::make_shared<PoolBuffer>(std::move(mm), pool);
-  }
-
-  static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) {
-    std::shared_ptr<MemoryManager> mm;
-    if (pool == nullptr) {
-      pool = default_memory_pool();
-      mm = default_cpu_memory_manager();
-    } else {
-      mm = CPUDevice::memory_manager(pool);
-    }
-    return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool));
-  }
-
- private:
-  MemoryPool* pool_;
-};
-
-namespace {
-// A utility that does most of the work of the `AllocateBuffer` and
-// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to
-// a PoolBuffer.
-template <typename BufferPtr, typename PoolBufferPtr>
-inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) {
-  RETURN_NOT_OK(buffer->Resize(size));
-  buffer->ZeroPadding();
-  return std::move(buffer);
-}
-
-}  // namespace
-
-Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) {
-  return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size);
-}
-
-Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
-                                                                 MemoryPool* pool) {
-  return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool),
-                                                            size);
-}
-
 Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length, MemoryPool* pool) {
   ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
   // Zero out any trailing bits
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index a60e31bf7d2..a8863ee0775 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -59,6 +59,21 @@ static inline Result<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReader(
   return reader;
 }
 
+static inline Future<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReaderAsync(
+    const FileSource& source,
+    const ipc::IpcReadOptions& options = default_read_options()) {
+  ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+  auto path = source.path();
+  return ipc::RecordBatchFileReader::OpenAsync(std::move(input), options)
+      .Then([](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
+                -> Result<std::shared_ptr<ipc::RecordBatchFileReader>> { return reader; },
+            [path](const Status& status)
+                -> Result<std::shared_ptr<ipc::RecordBatchFileReader>> {
+              return status.WithMessage("Could not open IPC input source '", path,
+                                        "': ", status.message());
+            });
+}
+
 static inline Result<std::vector<int>> GetIncludedFields(
     const Schema& schema, const std::vector<std::string>& materialized_fields) {
   std::vector<int> included_fields;
@@ -73,6 +88,26 @@ static inline Result<std::vector<int>> GetIncludedFields(
   return included_fields;
 }
 
+static inline Result<ipc::IpcReadOptions> GetReadOptions(
+    const Schema& schema, const FileFormat& format, const ScanOptions& scan_options) {
+  ARROW_ASSIGN_OR_RAISE(
+      auto ipc_scan_options,
+      GetFragmentScanOptions<IpcFragmentScanOptions>(
+          kIpcTypeName, &scan_options, format.default_fragment_scan_options));
+  auto options =
+      ipc_scan_options->options ? *ipc_scan_options->options : default_read_options();
+  options.memory_pool = scan_options.pool;
+  if (!options.included_fields.empty()) {
+    // Cannot set them here
+    ARROW_LOG(WARNING) << "IpcFragmentScanOptions.options->included_fields was set "
+                          "but will be ignored; included_fields are derived from "
+                          "fields referenced by the scan";
+  }
+  ARROW_ASSIGN_OR_RAISE(options.included_fields,
+                        GetIncludedFields(schema, scan_options.MaterializedFields()));
+  return options;
+}
+
 /// \brief A ScanTask backed by an Ipc file.
 class IpcScanTask : public ScanTask {
  public:
@@ -83,28 +118,11 @@ class IpcScanTask : public ScanTask {
   Result<RecordBatchIterator> Execute() override {
     struct Impl {
       static Result<RecordBatchIterator> Make(const FileSource& source,
-                                              FileFormat* format,
-                                              const ScanOptions* scan_options) {
+                                              const FileFormat& format,
+                                              const ScanOptions& scan_options) {
         ARROW_ASSIGN_OR_RAISE(auto reader, OpenReader(source));
-
-        ARROW_ASSIGN_OR_RAISE(
-            auto ipc_scan_options,
-            GetFragmentScanOptions<IpcFragmentScanOptions>(
-                kIpcTypeName, scan_options, format->default_fragment_scan_options));
-        auto options = ipc_scan_options->options ? *ipc_scan_options->options
-                                                 : default_read_options();
-        options.memory_pool = scan_options->pool;
-        options.use_threads = false;
-        if (!options.included_fields.empty()) {
-          // Cannot set them here
-          ARROW_LOG(WARNING) << "IpcFragmentScanOptions.options->included_fields was set "
-                                "but will be ignored; included_fields are derived from "
-                                "fields referenced by the scan";
-        }
-        ARROW_ASSIGN_OR_RAISE(
-            options.included_fields,
-            GetIncludedFields(*reader->schema(), scan_options->MaterializedFields()));
-
+        ARROW_ASSIGN_OR_RAISE(auto options,
+                              GetReadOptions(*reader->schema(), format, scan_options));
         ARROW_ASSIGN_OR_RAISE(reader, OpenReader(source, options));
         return RecordBatchIterator(Impl{std::move(reader), 0});
       }
@@ -121,9 +139,9 @@ class IpcScanTask : public ScanTask {
       int i_;
     };
 
-    return Impl::Make(
-        source_, internal::checked_pointer_cast<FileFragment>(fragment_)->format().get(),
-        options_.get());
+    return Impl::Make(source_,
+                      *internal::checked_pointer_cast<FileFragment>(fragment_)->format(),
+                      *options_);
   }
 
  private:
@@ -173,6 +191,44 @@ Result<ScanTaskIterator> IpcFileFormat::ScanFile(
   return IpcScanTaskIterator::Make(options, fragment);
 }
 
+Result<RecordBatchGenerator> IpcFileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options,
+    const std::shared_ptr<FileFragment>& file) const {
+  auto self = shared_from_this();
+  auto source = file->source();
+  auto open_reader = OpenReaderAsync(source);
+  auto reopen_reader = [self, options,
+                        source](std::shared_ptr<ipc::RecordBatchFileReader> reader)
+      -> Future<std::shared_ptr<ipc::RecordBatchFileReader>> {
+    ARROW_ASSIGN_OR_RAISE(auto options,
+                          GetReadOptions(*reader->schema(), *self, *options));
+    return OpenReader(source, options);
+  };
+  auto readahead_level = options->batch_readahead;
+  auto default_fragment_scan_options = this->default_fragment_scan_options;
+  auto open_generator = [=](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
+      -> Result<RecordBatchGenerator> {
+    ARROW_ASSIGN_OR_RAISE(
+        auto ipc_scan_options,
+        GetFragmentScanOptions<IpcFragmentScanOptions>(kIpcTypeName, options.get(),
+                                                       default_fragment_scan_options));
+
+    RecordBatchGenerator generator;
+    if (ipc_scan_options->cache_options) {
+      // Transferring helps performance when coalescing
+      ARROW_ASSIGN_OR_RAISE(
+          generator, reader->GetRecordBatchGenerator(
+                         /*coalesce=*/true, options->io_context,
+                         *ipc_scan_options->cache_options, internal::GetCpuThreadPool()));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(generator, reader->GetRecordBatchGenerator(
+                                           /*coalesce=*/false, options->io_context));
+    }
+    return MakeReadaheadGenerator(std::move(generator), readahead_level);
+  };
+  return MakeFromFuture(open_reader.Then(reopen_reader).Then(open_generator));
+}
+
 Future<util::optional<int64_t>> IpcFileFormat::CountRows(
     const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
     std::shared_ptr<ScanOptions> options) {
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index d1c16a93cf4..3888de027c5 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -25,6 +25,7 @@
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
+#include "arrow/io/type_fwd.h"
 #include "arrow/ipc/type_fwd.h"
 #include "arrow/result.h"
 
@@ -56,6 +57,10 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
       const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& fragment) const override;
 
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
   Future<util::optional<int64_t>> CountRows(
       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
       std::shared_ptr<ScanOptions> options) override;
@@ -75,6 +80,9 @@ class ARROW_DS_EXPORT IpcFragmentScanOptions : public FragmentScanOptions {
   /// Options passed to the IPC file reader.
   /// included_fields, memory_pool, and use_threads are ignored.
   std::shared_ptr<ipc::IpcReadOptions> options;
+  /// If present, the async scanner will enable I/O coalescing.
+  /// This is ignored by the sync scanner.
+  std::shared_ptr<io::CacheOptions> cache_options;
 };
 
 class ARROW_DS_EXPORT IpcFileWriteOptions : public FileWriteOptions {
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index a953c8f28a7..7d111183635 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -344,8 +344,8 @@ Result<std::shared_ptr<Buffer>> BufferReader::DoReadAt(int64_t position, int64_t
   DCHECK_GE(nbytes, 0);
 
   // Arrange for data to be paged in
-  RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
-      {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
+  // RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
+  //     {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
 
   if (nbytes > 0 && buffer_ != nullptr) {
     return SliceBuffer(buffer_, position, nbytes);
diff --git a/cpp/src/arrow/io/type_fwd.h b/cpp/src/arrow/io/type_fwd.h
index 041b825c988..d8208d39d60 100644
--- a/cpp/src/arrow/io/type_fwd.h
+++ b/cpp/src/arrow/io/type_fwd.h
@@ -27,6 +27,7 @@ struct FileMode {
 };
 
 struct IOContext;
+struct CacheOptions;
 
 /// EXPERIMENTAL: convenience global singleton for default IOContext settings
 ARROW_EXPORT
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 6db8a0f0d3d..197556efcea 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -33,6 +33,7 @@
 #include "arrow/ipc/util.h"
 #include "arrow/status.h"
 #include "arrow/util/endian.h"
+#include "arrow/util/future.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 
@@ -324,6 +325,60 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t offset, int32_t metadata_le
   }
 }
 
+Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metadata_length,
+                                                  int64_t body_length,
+                                                  io::RandomAccessFile* file,
+                                                  const io::IOContext& context) {
+  struct State {
+    std::unique_ptr<Message> result;
+    std::shared_ptr<MessageDecoderListener> listener;
+    std::shared_ptr<MessageDecoder> decoder;
+  };
+  auto state = std::make_shared<State>();
+  state->listener = std::make_shared<AssignMessageDecoderListener>(&state->result);
+  state->decoder = std::make_shared<MessageDecoder>(state->listener);
+
+  if (metadata_length < state->decoder->next_required_size()) {
+    return Status::Invalid("metadata_length should be at least ",
+                           state->decoder->next_required_size());
+  }
+  return file->ReadAsync(context, offset, metadata_length + body_length)
+      .Then([=](std::shared_ptr<Buffer> metadata) -> Result<std::shared_ptr<Message>> {
+        if (metadata->size() < metadata_length) {
+          return Status::Invalid("Expected to read ", metadata_length,
+                                 " metadata bytes but got ", metadata->size());
+        }
+        ARROW_RETURN_NOT_OK(
+            state->decoder->Consume(SliceBuffer(metadata, 0, metadata_length)));
+        switch (state->decoder->state()) {
+          case MessageDecoder::State::INITIAL:
+            return std::move(state->result);
+          case MessageDecoder::State::METADATA_LENGTH:
+            return Status::Invalid("metadata length is missing. File offset: ", offset,
+                                   ", metadata length: ", metadata_length);
+          case MessageDecoder::State::METADATA:
+            return Status::Invalid("flatbuffer size ",
+                                   state->decoder->next_required_size(),
+                                   " invalid. File offset: ", offset,
+                                   ", metadata length: ", metadata_length);
+          case MessageDecoder::State::BODY: {
+            auto body = SliceBuffer(metadata, metadata_length, body_length);
+            if (body->size() < state->decoder->next_required_size()) {
+              return Status::IOError("Expected to be able to read ",
+                                     state->decoder->next_required_size(),
+                                     " bytes for message body, got ", body->size());
+            }
+            RETURN_NOT_OK(state->decoder->Consume(body));
+            return std::move(state->result);
+          }
+          case MessageDecoder::State::EOS:
+            return Status::Invalid("Unexpected empty message in IPC file format");
+          default:
+            return Status::Invalid("Unexpected state: ", state->decoder->state());
+        }
+      });
+}
+
 Status AlignStream(io::InputStream* stream, int32_t alignment) {
   ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
   return stream->Advance(PaddedLength(position, alignment) - position);
diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h
index 6a7619d31b3..b2683259cb4 100644
--- a/cpp/src/arrow/ipc/message.h
+++ b/cpp/src/arrow/ipc/message.h
@@ -459,6 +459,11 @@ Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
                                              const int32_t metadata_length,
                                              io::RandomAccessFile* file);
 
+ARROW_EXPORT
+Future<std::shared_ptr<Message>> ReadMessageAsync(
+    const int64_t offset, const int32_t metadata_length, const int64_t body_length,
+    io::RandomAccessFile* file, const io::IOContext& context = io::default_io_context());
+
 /// \brief Advance stream to an 8-byte offset if its position is not a multiple
 /// of 8 already
 /// \param[in] stream an input stream
diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc
index a56dd3579e2..f5cc857acb0 100644
--- a/cpp/src/arrow/ipc/read_write_benchmark.cc
+++ b/cpp/src/arrow/ipc/read_write_benchmark.cc
@@ -21,6 +21,7 @@
 #include <sstream>
 #include <string>
 
+#include "arrow/io/file.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
 #include "arrow/record_batch.h"
@@ -90,36 +91,6 @@ static void ReadRecordBatch(benchmark::State& state) {  // NOLINT non-const refe
   state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
 }
 
-static void ReadFile(benchmark::State& state) {  // NOLINT non-const reference
-  // 1MB
-  constexpr int64_t kTotalSize = 1 << 20;
-  auto options = ipc::IpcWriteOptions::Defaults();
-
-  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);
-  {
-    // Make Arrow IPC file
-    auto record_batch = MakeRecordBatch(kTotalSize, state.range(0));
-
-    io::BufferOutputStream stream(buffer);
-    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options);
-    ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));
-    ABORT_NOT_OK(writer->Close());
-    ABORT_NOT_OK(stream.Close());
-  }
-
-  ipc::DictionaryMemo empty_memo;
-  while (state.KeepRunning()) {
-    io::BufferReader input(buffer);
-    auto reader =
-        *ipc::RecordBatchFileReader::Open(&input, ipc::IpcReadOptions::Defaults());
-    const int num_batches = reader->num_record_batches();
-    for (int i = 0; i < num_batches; ++i) {
-      auto batch = *reader->ReadRecordBatch(i);
-    }
-  }
-  state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
-}
-
 static void ReadStream(benchmark::State& state) {  // NOLINT non-const reference
   // 1MB
   constexpr int64_t kTotalSize = 1 << 20;
@@ -188,9 +159,103 @@ static void DecodeStream(benchmark::State& state) {  // NOLINT non-const referen
   state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize);
 }
 
+#define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
+  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+  constexpr int64_t kBatches = 16;                                                \
+  auto options = ipc::IpcWriteOptions::Defaults();                                \
+  ASSIGN_OR_ABORT(options.codec,                                                  \
+                  arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
+  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+  {                                                                               \
+    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+    io::BufferOutputStream stream(buffer);                                        \
+    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+    for (int i = 0; i < kBatches; i++) {                                          \
+      ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
+    }                                                                             \
+    ABORT_NOT_OK(writer->Close());                                                \
+    ABORT_NOT_OK(stream.Close());                                                 \
+  }
+
+#define GENERATE_DATA_IN_MEMORY()                                                 \
+  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+  constexpr int64_t kBatches = 1;                                                 \
+  auto options = ipc::IpcWriteOptions::Defaults();                                \
+  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+  {                                                                               \
+    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+    io::BufferOutputStream stream(buffer);                                        \
+    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+    ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                        \
+    ABORT_NOT_OK(writer->Close());                                                \
+    ABORT_NOT_OK(stream.Close());                                                 \
+  }
+
+#define GENERATE_DATA_TEMP_FILE()                                                 \
+  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+  constexpr int64_t kBatches = 16;                                                \
+  auto options = ipc::IpcWriteOptions::Defaults();                                \
+  ASSIGN_OR_ABORT(auto sink, io::FileOutputStream::Open("/tmp/benchmark.arrow")); \
+  {                                                                               \
+    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+    auto writer = *ipc::MakeFileWriter(sink, record_batch->schema(), options);    \
+    ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                        \
+    ABORT_NOT_OK(writer->Close());                                                \
+    ABORT_NOT_OK(sink->Close());                                                  \
+  }
+
+#define READ_DATA_IN_MEMORY() auto input = std::make_shared<io::BufferReader>(buffer);
+#define READ_DATA_TEMP_FILE() \
+  ASSIGN_OR_ABORT(auto input, io::ReadableFile::Open("/tmp/benchmark.arrow"));
+#define READ_DATA_MMAP_FILE()                                                    \
+  ASSIGN_OR_ABORT(auto input, io::MemoryMappedFile::Open("/tmp/benchmark.arrow", \
+                                                         io::FileMode::type::READ));
+
+#define READ_SYNC(NAME, GENERATE, READ)                                                 \
+  static void NAME(benchmark::State& state) {                                           \
+    GENERATE();                                                                         \
+    for (auto _ : state) {                                                              \
+      READ();                                                                           \
+      auto reader = *ipc::RecordBatchFileReader::Open(input.get(),                      \
+                                                      ipc::IpcReadOptions::Defaults()); \
+      const int num_batches = reader->num_record_batches();                             \
+      for (int i = 0; i < num_batches; ++i) {                                           \
+        auto batch = *reader->ReadRecordBatch(i);                                       \
+      }                                                                                 \
+    }                                                                                   \
+    state.SetBytesProcessed(int64_t(state.iterations()) * kBatchSize * kBatches);       \
+  }                                                                                     \
+  BENCHMARK(NAME)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
+
+#define READ_ASYNC(NAME, GENERATE, READ)                                                \
+  static void NAME##Async(benchmark::State& state) {                                    \
+    GENERATE();                                                                         \
+    for (auto _ : state) {                                                              \
+      READ();                                                                           \
+      auto reader = *ipc::RecordBatchFileReader::Open(input.get(),                      \
+                                                      ipc::IpcReadOptions::Defaults()); \
+      ASSIGN_OR_ABORT(auto generator, reader->GetRecordBatchGenerator());               \
+      const int num_batches = reader->num_record_batches();                             \
+      for (int i = 0; i < num_batches; ++i) {                                           \
+        auto batch = *generator().result();                                             \
+      }                                                                                 \
+    }                                                                                   \
+    state.SetBytesProcessed(int64_t(state.iterations()) * kBatchSize * kBatches);       \
+  }                                                                                     \
+  BENCHMARK(NAME##Async)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
+
+#define READ_BENCHMARK(NAME, GENERATE, READ) \
+  READ_SYNC(NAME, GENERATE, READ);           \
+  READ_ASYNC(NAME, GENERATE, READ);
+
+READ_BENCHMARK(ReadFile, GENERATE_DATA_IN_MEMORY, READ_DATA_IN_MEMORY);
+READ_BENCHMARK(ReadTempFile, GENERATE_DATA_TEMP_FILE, READ_DATA_TEMP_FILE);
+READ_BENCHMARK(ReadMmapFile, GENERATE_DATA_TEMP_FILE, READ_DATA_MMAP_FILE);
+READ_BENCHMARK(ReadCompressedFile, GENERATE_COMPRESSED_DATA_IN_MEMORY,
+               READ_DATA_IN_MEMORY);
+
 BENCHMARK(WriteRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 BENCHMARK(ReadRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
-BENCHMARK(ReadFile)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 BENCHMARK(ReadStream)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 BENCHMARK(DecodeStream)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime();
 
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 2efa79de8e0..9f8d69d2537 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -40,6 +40,7 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/testing/extension_type.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
@@ -963,24 +964,6 @@ struct FileWriterHelper {
     return Status::OK();
   }
 
-  virtual Status Read(const IpcReadOptions& options, RecordBatchVector* out_batches,
-                      ReadStats* out_stats = nullptr) {
-    auto buf_reader = std::make_shared<io::BufferReader>(buffer_);
-    ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchFileReader::Open(
-                                           buf_reader.get(), footer_offset_, options));
-
-    EXPECT_EQ(num_batches_written_, reader->num_record_batches());
-    for (int i = 0; i < num_batches_written_; ++i) {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<RecordBatch> chunk,
-                            reader->ReadRecordBatch(i));
-      out_batches->push_back(chunk);
-    }
-    if (out_stats) {
-      *out_stats = reader->stats();
-    }
-    return Status::OK();
-  }
-
   Status ReadSchema(std::shared_ptr<Schema>* out) {
     return ReadSchema(ipc::IpcReadOptions::Defaults(), out);
   }
@@ -1009,6 +992,42 @@ struct FileWriterHelper {
   int64_t footer_offset_;
 };
 
+struct FileGeneratorWriterHelper : public FileWriterHelper {
+  Status ReadBatches(const IpcReadOptions& options, RecordBatchVector* out_batches,
+                     ReadStats* out_stats = nullptr) override {
+    auto buf_reader = std::make_shared<io::BufferReader>(buffer_);
+    AsyncGenerator<std::shared_ptr<RecordBatch>> generator;
+
+    {
+      auto fut =
+          RecordBatchFileReader::OpenAsync(buf_reader.get(), footer_offset_, options);
+      // Do NOT assert OK since some tests check whether this fails properly
+      EXPECT_FINISHES(fut);
+      ARROW_ASSIGN_OR_RAISE(auto reader, fut.result());
+      EXPECT_EQ(num_batches_written_, reader->num_record_batches());
+      // Generator will keep reader alive internally
+      ARROW_ASSIGN_OR_RAISE(generator, reader->GetRecordBatchGenerator());
+    }
+
+    // Generator is async-reentrant
+    std::vector<Future<std::shared_ptr<RecordBatch>>> futures;
+    for (int i = 0; i < num_batches_written_; ++i) {
+      futures.push_back(generator());
+    }
+    auto fut = generator();
+    EXPECT_FINISHES_OK_AND_EQ(nullptr, fut);
+    for (auto& future : futures) {
+      EXPECT_FINISHES_OK_AND_ASSIGN(auto batch, future);
+      out_batches->push_back(batch);
+    }
+
+    // The generator doesn't track stats.
+    EXPECT_EQ(nullptr, out_stats);
+
+    return Status::OK();
+  }
+};
+
 struct StreamWriterHelper {
   static constexpr bool kIsFileFormat = false;
 
@@ -1342,6 +1361,9 @@ class ReaderWriterMixin : public ExtensionTypesMixin {
 class TestFileFormat : public ReaderWriterMixin<FileWriterHelper>,
                        public ::testing::TestWithParam<MakeRecordBatch*> {};
 
+class TestFileFormatGenerator : public ReaderWriterMixin<FileGeneratorWriterHelper>,
+                                public ::testing::TestWithParam<MakeRecordBatch*> {};
+
 class TestStreamFormat : public ReaderWriterMixin<StreamWriterHelper>,
                          public ::testing::TestWithParam<MakeRecordBatch*> {};
 
@@ -1366,6 +1388,16 @@ TEST_P(TestFileFormat, RoundTrip) {
   TestZeroLengthRoundTrip(*GetParam(), options);
 }
 
+TEST_P(TestFileFormatGenerator, RoundTrip) {
+  TestRoundTrip(*GetParam(), IpcWriteOptions::Defaults());
+  TestZeroLengthRoundTrip(*GetParam(), IpcWriteOptions::Defaults());
+
+  IpcWriteOptions options;
+  options.write_legacy_ipc_format = true;
+  TestRoundTrip(*GetParam(), options);
+  TestZeroLengthRoundTrip(*GetParam(), options);
+}
+
 Status MakeDictionaryBatch(std::shared_ptr<RecordBatch>* out) {
   auto f0_type = arrow::dictionary(int32(), utf8());
   auto f1_type = arrow::dictionary(int8(), utf8());
@@ -1571,6 +1603,8 @@ INSTANTIATE_TEST_SUITE_P(GenericIpcRoundTripTests, TestIpcRoundTrip,
                          ::testing::ValuesIn(kBatchCases));
 INSTANTIATE_TEST_SUITE_P(FileRoundTripTests, TestFileFormat,
                          ::testing::ValuesIn(kBatchCases));
+INSTANTIATE_TEST_SUITE_P(FileRoundTripTests, TestFileFormatGenerator,
+                         ::testing::ValuesIn(kBatchCases));
 INSTANTIATE_TEST_SUITE_P(StreamRoundTripTests, TestStreamFormat,
                          ::testing::ValuesIn(kBatchCases));
 INSTANTIATE_TEST_SUITE_P(StreamDecoderDataRoundTripTests, TestStreamDecoderData,
@@ -1635,18 +1669,26 @@ TEST_F(TestStreamFormat, DictionaryRoundTrip) { TestDictionaryRoundtrip(); }
 
 TEST_F(TestFileFormat, DictionaryRoundTrip) { TestDictionaryRoundtrip(); }
 
+TEST_F(TestFileFormatGenerator, DictionaryRoundTrip) { TestDictionaryRoundtrip(); }
+
 TEST_F(TestStreamFormat, DifferentSchema) { TestWriteDifferentSchema(); }
 
 TEST_F(TestFileFormat, DifferentSchema) { TestWriteDifferentSchema(); }
 
+TEST_F(TestFileFormatGenerator, DifferentSchema) { TestWriteDifferentSchema(); }
+
 TEST_F(TestStreamFormat, NoRecordBatches) { TestWriteNoRecordBatches(); }
 
 TEST_F(TestFileFormat, NoRecordBatches) { TestWriteNoRecordBatches(); }
 
+TEST_F(TestFileFormatGenerator, NoRecordBatches) { TestWriteNoRecordBatches(); }
+
 TEST_F(TestStreamFormat, ReadFieldSubset) { TestReadSubsetOfFields(); }
 
 TEST_F(TestFileFormat, ReadFieldSubset) { TestReadSubsetOfFields(); }
 
+TEST_F(TestFileFormatGenerator, ReadFieldSubset) { TestReadSubsetOfFields(); }
+
 TEST(TestRecordBatchStreamReader, EmptyStreamWithDictionaries) {
   // ARROW-6006
   auto f0 = arrow::field("f0", arrow::dictionary(arrow::int8(), arrow::utf8()));
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 45a3d3e3cd8..7c3115b7c3f 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -31,6 +31,7 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/extension_type.h"
+#include "arrow/io/caching.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/message.h"
@@ -51,6 +52,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/parallel.h"
 #include "arrow/util/string.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/visitor_inline.h"
 
@@ -958,10 +960,94 @@ Result<std::shared_ptr<RecordBatchStreamReader>> RecordBatchStreamReader::Open(
 // ----------------------------------------------------------------------
 // Reader implementation
 
+// Common functions used in both the random-access file reader and the
+// asynchronous generator
 static inline FileBlock FileBlockFromFlatbuffer(const flatbuf::Block* block) {
   return FileBlock{block->offset(), block->metaDataLength(), block->bodyLength()};
 }
 
+static Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block,
+                                                             io::RandomAccessFile* file) {
+  if (!BitUtil::IsMultipleOf8(block.offset) ||
+      !BitUtil::IsMultipleOf8(block.metadata_length) ||
+      !BitUtil::IsMultipleOf8(block.body_length)) {
+    return Status::Invalid("Unaligned block in IPC file");
+  }
+
+  // TODO(wesm): this breaks integration tests, see ARROW-3256
+  // DCHECK_EQ((*out)->body_length(), block.body_length);
+
+  ARROW_ASSIGN_OR_RAISE(auto message,
+                        ReadMessage(block.offset, block.metadata_length, file));
+  return std::move(message);
+}
+
+static Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync(
+    const FileBlock& block, io::RandomAccessFile* file, const io::IOContext& io_context) {
+  if (!BitUtil::IsMultipleOf8(block.offset) ||
+      !BitUtil::IsMultipleOf8(block.metadata_length) ||
+      !BitUtil::IsMultipleOf8(block.body_length)) {
+    return Status::Invalid("Unaligned block in IPC file");
+  }
+
+  // TODO(wesm): this breaks integration tests, see ARROW-3256
+  // DCHECK_EQ((*out)->body_length(), block.body_length);
+
+  return ReadMessageAsync(block.offset, block.metadata_length, block.body_length, file,
+                          io_context);
+}
+
+static Status ReadOneDictionary(Message* message, const IpcReadContext& context) {
+  CHECK_HAS_BODY(*message);
+  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+  DictionaryKind kind;
+  RETURN_NOT_OK(ReadDictionary(*message->metadata(), context, &kind, reader.get()));
+  if (kind != DictionaryKind::New) {
+    return Status::Invalid(
+        "Unsupported dictionary replacement or "
+        "dictionary delta in IPC file");
+  }
+  return Status::OK();
+}
+
+class RecordBatchFileReaderImpl;
+
+/// A generator of record batches.
+///
+/// All batches are yielded in order.
+class ARROW_EXPORT IpcFileRecordBatchGenerator {
+ public:
+  using Item = std::shared_ptr<RecordBatch>;
+
+  explicit IpcFileRecordBatchGenerator(
+      std::shared_ptr<RecordBatchFileReaderImpl> state,
+      std::shared_ptr<io::internal::ReadRangeCache> cached_source,
+      const io::IOContext& io_context, arrow::internal::Executor* executor)
+      : state_(std::move(state)),
+        cached_source_(std::move(cached_source)),
+        io_context_(io_context),
+        executor_(executor),
+        index_(0) {}
+
+  Future<Item> operator()();
+  Future<std::shared_ptr<Message>> ReadBlock(const FileBlock& block);
+
+  static Status ReadDictionaries(
+      RecordBatchFileReaderImpl* state,
+      std::vector<std::shared_ptr<Message>> dictionary_messages);
+  static Result<std::shared_ptr<RecordBatch>> ReadRecordBatch(
+      RecordBatchFileReaderImpl* state, Message* message);
+
+ private:
+  std::shared_ptr<RecordBatchFileReaderImpl> state_;
+  std::shared_ptr<io::internal::ReadRangeCache> cached_source_;
+  io::IOContext io_context_;
+  arrow::internal::Executor* executor_;
+  int index_;
+  // Odd Future type, but this lets us use All() easily
+  Future<> read_dictionaries_;
+};
+
 class RecordBatchFileReaderImpl : public RecordBatchFileReader {
  public:
   RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) {}
@@ -1035,13 +1121,70 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     return Status::OK();
   }
 
+  Future<> OpenAsync(const std::shared_ptr<io::RandomAccessFile>& file,
+                     int64_t footer_offset, const IpcReadOptions& options) {
+    owned_file_ = file;
+    return OpenAsync(file.get(), footer_offset, options);
+  }
+
+  Future<> OpenAsync(io::RandomAccessFile* file, int64_t footer_offset,
+                     const IpcReadOptions& options) {
+    file_ = file;
+    options_ = options;
+    footer_offset_ = footer_offset;
+    auto cpu_executor = ::arrow::internal::GetCpuThreadPool();
+    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    return ReadFooterAsync(cpu_executor)
+        .Then([self, options](const detail::Empty&) -> Status {
+          // Get the schema and record any observed dictionaries
+          RETURN_NOT_OK(UnpackSchemaMessage(
+              self->footer_->schema(), options, &self->dictionary_memo_, &self->schema_,
+              &self->out_schema_, &self->field_inclusion_mask_, &self->swap_endian_));
+          ++self->stats_.num_messages;
+          return Status::OK();
+        });
+  }
+
   std::shared_ptr<Schema> schema() const override { return out_schema_; }
 
   std::shared_ptr<const KeyValueMetadata> metadata() const override { return metadata_; }
 
   ReadStats stats() const override { return stats_; }
 
+  Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      const bool coalesce, const io::IOContext& io_context,
+      const io::CacheOptions cache_options,
+      arrow::internal::Executor* executor) override {
+    auto state = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    std::shared_ptr<io::internal::ReadRangeCache> cached_source;
+    if (coalesce) {
+      if (!owned_file_) return Status::Invalid("Cannot coalesce without an owned file");
+      cached_source = std::make_shared<io::internal::ReadRangeCache>(
+          owned_file_, io_context, cache_options);
+      auto num_dictionaries = this->num_dictionaries();
+      auto num_record_batches = this->num_record_batches();
+      std::vector<io::ReadRange> ranges(num_dictionaries + num_record_batches);
+      for (int i = 0; i < num_dictionaries; i++) {
+        auto block = FileBlockFromFlatbuffer(footer_->dictionaries()->Get(i));
+        ranges[i].offset = block.offset;
+        ranges[i].length = block.metadata_length + block.body_length;
+      }
+      for (int i = 0; i < num_record_batches; i++) {
+        auto block = FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
+        ranges[num_dictionaries + i].offset = block.offset;
+        ranges[num_dictionaries + i].length = block.metadata_length + block.body_length;
+      }
+      RETURN_NOT_OK(cached_source->Cache(std::move(ranges)));
+    }
+    return IpcFileRecordBatchGenerator(std::move(state), std::move(cached_source),
+                                       io_context, executor);
+  }
+
  private:
+  friend AsyncGenerator<std::shared_ptr<Message>> MakeMessageGenerator(
+      std::shared_ptr<RecordBatchFileReaderImpl>, const io::IOContext&);
+  friend class IpcFileRecordBatchGenerator;
+
   FileBlock GetRecordBatchBlock(int i) const {
     return FileBlockFromFlatbuffer(footer_->recordBatches()->Get(i));
   }
@@ -1051,42 +1194,28 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
   }
 
   Result<std::unique_ptr<Message>> ReadMessageFromBlock(const FileBlock& block) {
-    if (!BitUtil::IsMultipleOf8(block.offset) ||
-        !BitUtil::IsMultipleOf8(block.metadata_length) ||
-        !BitUtil::IsMultipleOf8(block.body_length)) {
-      return Status::Invalid("Unaligned block in IPC file");
-    }
-
-    // TODO(wesm): this breaks integration tests, see ARROW-3256
-    // DCHECK_EQ((*out)->body_length(), block.body_length);
-
-    ARROW_ASSIGN_OR_RAISE(auto message,
-                          ReadMessage(block.offset, block.metadata_length, file_));
+    ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_));
     ++stats_.num_messages;
     return std::move(message);
   }
 
   Status ReadDictionaries() {
     // Read all the dictionaries
+    IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
     for (int i = 0; i < num_dictionaries(); ++i) {
       ARROW_ASSIGN_OR_RAISE(auto message, ReadMessageFromBlock(GetDictionaryBlock(i)));
-
-      CHECK_HAS_BODY(*message);
-      ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
-      DictionaryKind kind;
-      IpcReadContext context(&dictionary_memo_, options_, swap_endian_);
-      RETURN_NOT_OK(ReadDictionary(*message->metadata(), context, &kind, reader.get()));
+      RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
       ++stats_.num_dictionary_batches;
-      if (kind != DictionaryKind::New) {
-        return Status::Invalid(
-            "Unsupported dictionary replacement or "
-            "dictionary delta in IPC file");
-      }
     }
     return Status::OK();
   }
 
   Status ReadFooter() {
+    auto fut = ReadFooterAsync(/*executor=*/nullptr);
+    return fut.status();
+  }
+
+  Future<> ReadFooterAsync(arrow::internal::Executor* executor) {
     const int32_t magic_size = static_cast<int>(strlen(kArrowMagicBytes));
 
     if (footer_offset_ <= magic_size * 2 + 4) {
@@ -1094,45 +1223,53 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     }
 
     int file_end_size = static_cast<int>(magic_size + sizeof(int32_t));
-    ARROW_ASSIGN_OR_RAISE(auto buffer,
-                          file_->ReadAt(footer_offset_ - file_end_size, file_end_size));
-
-    const int64_t expected_footer_size = magic_size + sizeof(int32_t);
-    if (buffer->size() < expected_footer_size) {
-      return Status::Invalid("Unable to read ", expected_footer_size, "from end of file");
-    }
-
-    if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
-      return Status::Invalid("Not an Arrow file");
-    }
-
-    int32_t footer_length =
-        BitUtil::FromLittleEndian(*reinterpret_cast<const int32_t*>(buffer->data()));
-
-    if (footer_length <= 0 || footer_length > footer_offset_ - magic_size * 2 - 4) {
-      return Status::Invalid("File is smaller than indicated metadata size");
-    }
-
-    // Now read the footer
-    ARROW_ASSIGN_OR_RAISE(
-        footer_buffer_,
-        file_->ReadAt(footer_offset_ - footer_length - file_end_size, footer_length));
-
-    const auto data = footer_buffer_->data();
-    const auto size = footer_buffer_->size();
-    if (!internal::VerifyFlatbuffers<flatbuf::Footer>(data, size)) {
-      return Status::IOError("Verification of flatbuffer-encoded Footer failed.");
-    }
-    footer_ = flatbuf::GetFooter(data);
-
-    auto fb_metadata = footer_->custom_metadata();
-    if (fb_metadata != nullptr) {
-      std::shared_ptr<KeyValueMetadata> md;
-      RETURN_NOT_OK(internal::GetKeyValueMetadata(fb_metadata, &md));
-      metadata_ = std::move(md);  // const-ify
-    }
-
-    return Status::OK();
+    auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
+    auto read_magic = file_->ReadAsync(footer_offset_ - file_end_size, file_end_size);
+    if (executor) read_magic = executor->Transfer(std::move(read_magic));
+    return read_magic
+        .Then([=](const std::shared_ptr<Buffer>& buffer)
+                  -> Future<std::shared_ptr<Buffer>> {
+          const int64_t expected_footer_size = magic_size + sizeof(int32_t);
+          if (buffer->size() < expected_footer_size) {
+            return Status::Invalid("Unable to read ", expected_footer_size,
+                                   "from end of file");
+          }
+
+          if (memcmp(buffer->data() + sizeof(int32_t), kArrowMagicBytes, magic_size)) {
+            return Status::Invalid("Not an Arrow file");
+          }
+
+          int32_t footer_length = BitUtil::FromLittleEndian(
+              *reinterpret_cast<const int32_t*>(buffer->data()));
+
+          if (footer_length <= 0 ||
+              footer_length > self->footer_offset_ - magic_size * 2 - 4) {
+            return Status::Invalid("File is smaller than indicated metadata size");
+          }
+
+          // Now read the footer
+          auto read_footer = self->file_->ReadAsync(
+              self->footer_offset_ - footer_length - file_end_size, footer_length);
+          if (executor) read_footer = executor->Transfer(std::move(read_footer));
+          return read_footer;
+        })
+        .Then([=](const std::shared_ptr<Buffer>& buffer) -> Status {
+          self->footer_buffer_ = buffer;
+          const auto data = self->footer_buffer_->data();
+          const auto size = self->footer_buffer_->size();
+          if (!internal::VerifyFlatbuffers<flatbuf::Footer>(data, size)) {
+            return Status::IOError("Verification of flatbuffer-encoded Footer failed.");
+          }
+          self->footer_ = flatbuf::GetFooter(data);
+
+          auto fb_metadata = self->footer_->custom_metadata();
+          if (fb_metadata != nullptr) {
+            std::shared_ptr<KeyValueMetadata> md;
+            RETURN_NOT_OK(internal::GetKeyValueMetadata(fb_metadata, &md));
+            self->metadata_ = std::move(md);  // const-ify
+          }
+          return Status::OK();
+        });
   }
 
   int num_dictionaries() const {
@@ -1194,6 +1331,115 @@ Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
   return result;
 }
 
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(std::move(file), footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, const IpcReadOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
+  return OpenAsync(file, footer_offset, options);
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+    const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then(
+          [=](...) -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+}
+
+Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
+    io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) {
+  auto result = std::make_shared<RecordBatchFileReaderImpl>();
+  return result->OpenAsync(file, footer_offset, options)
+      .Then(
+          [=](...) -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+}
+
+Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator()() {
+  auto state = state_;
+  if (!read_dictionaries_.is_valid()) {
+    std::vector<Future<std::shared_ptr<Message>>> messages(state->num_dictionaries());
+    for (int i = 0; i < state->num_dictionaries(); i++) {
+      auto block = FileBlockFromFlatbuffer(state->footer_->dictionaries()->Get(i));
+      messages[i] = ReadBlock(block);
+    }
+    auto read_messages = All(std::move(messages));
+    if (executor_) read_messages = executor_->Transfer(read_messages);
+    read_dictionaries_ = read_messages.Then(
+        [=](const std::vector<Result<std::shared_ptr<Message>>> maybe_messages)
+            -> Status {
+          std::vector<std::shared_ptr<Message>> messages(state->num_dictionaries());
+          for (size_t i = 0; i < messages.size(); i++) {
+            ARROW_ASSIGN_OR_RAISE(messages[i], maybe_messages[i]);
+          }
+          return ReadDictionaries(state.get(), std::move(messages));
+        });
+  }
+  if (index_ >= state_->num_record_batches()) {
+    return Future<Item>::MakeFinished(IterationTraits<Item>::End());
+  }
+  auto block = FileBlockFromFlatbuffer(state->footer_->recordBatches()->Get(index_++));
+  auto read_message = ReadBlock(block);
+  auto read_messages = read_dictionaries_.Then(
+      [read_message](const detail::Empty&) { return read_message; });
+  // Force transfer. This may be wasteful in some cases, but ensures we get off the
+  // I/O threads as soon as possible, and ensures we don't decode record batches
+  // synchronously in the case that the message read has already finished.
+  if (executor_) {
+    auto executor = executor_;
+    return read_messages.Then(
+        [=](const std::shared_ptr<Message>& message) -> Future<Item> {
+          return DeferNotOk(executor->Submit(
+              [=]() { return ReadRecordBatch(state.get(), message.get()); }));
+        });
+  }
+  return read_messages.Then([=](const std::shared_ptr<Message>& message) -> Result<Item> {
+    return ReadRecordBatch(state.get(), message.get());
+  });
+}
+
+Future<std::shared_ptr<Message>> IpcFileRecordBatchGenerator::ReadBlock(
+    const FileBlock& block) {
+  if (cached_source_) {
+    auto cached_source = cached_source_;
+    io::ReadRange range{block.offset, block.metadata_length + block.body_length};
+    auto pool = state_->options_.memory_pool;
+    return cached_source->WaitFor({range}).Then(
+        [cached_source, pool,
+         range](const detail::Empty&) -> Result<std::shared_ptr<Message>> {
+          ARROW_ASSIGN_OR_RAISE(auto buffer, cached_source->Read(range));
+          io::BufferReader stream(std::move(buffer));
+          return ReadMessage(&stream, pool);
+        });
+  } else {
+    return ReadMessageFromBlockAsync(block, state_->file_, io_context_);
+  }
+}
+
+Status IpcFileRecordBatchGenerator::ReadDictionaries(
+    RecordBatchFileReaderImpl* state,
+    std::vector<std::shared_ptr<Message>> dictionary_messages) {
+  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_);
+  for (const auto& message : dictionary_messages) {
+    RETURN_NOT_OK(ReadOneDictionary(message.get(), context));
+  }
+  return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatch>> IpcFileRecordBatchGenerator::ReadRecordBatch(
+    RecordBatchFileReaderImpl* state, Message* message) {
+  CHECK_HAS_BODY(*message);
+  ARROW_ASSIGN_OR_RAISE(auto reader, Buffer::GetReader(message->body()));
+  IpcReadContext context(&state->dictionary_memo_, state->options_, state->swap_endian_);
+  return ReadRecordBatchInternal(*message->metadata(), state->schema_,
+                                 state->field_inclusion_mask_, context, reader.get());
+}
+
 Status Listener::OnEOS() { return Status::OK(); }
 
 Status Listener::OnSchemaDecoded(std::shared_ptr<Schema> schema) { return Status::OK(); }
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index 38f7f2ed8b9..6f2157557f3 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -25,12 +25,14 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/io/caching.h"
 #include "arrow/io/type_fwd.h"
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/options.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -99,7 +101,8 @@ class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
 };
 
 /// \brief Reads the record batch file format
-class ARROW_EXPORT RecordBatchFileReader {
+class ARROW_EXPORT RecordBatchFileReader
+    : public std::enable_shared_from_this<RecordBatchFileReader> {
  public:
   virtual ~RecordBatchFileReader() = default;
 
@@ -147,6 +150,26 @@ class ARROW_EXPORT RecordBatchFileReader {
       const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
       const IpcReadOptions& options = IpcReadOptions::Defaults());
 
+  /// \brief Open a file asynchronously (owns the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      const std::shared_ptr<io::RandomAccessFile>& file,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (borrows the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      io::RandomAccessFile* file,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (owns the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      const std::shared_ptr<io::RandomAccessFile>& file, int64_t footer_offset,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
+  /// \brief Open a file asynchronously (borrows the file).
+  static Future<std::shared_ptr<RecordBatchFileReader>> OpenAsync(
+      io::RandomAccessFile* file, int64_t footer_offset,
+      const IpcReadOptions& options = IpcReadOptions::Defaults());
+
   /// \brief The schema read from the file
   virtual std::shared_ptr<Schema> schema() const = 0;
 
@@ -172,6 +195,21 @@ class ARROW_EXPORT RecordBatchFileReader {
 
   /// \brief Computes the total number of rows in the file.
   virtual Result<int64_t> CountRows() = 0;
+
+  /// \brief Get a reentrant generator of record batches.
+  ///
+  /// \param[in] coalesce If true, enable I/O coalescing.
+  /// \param[in] io_context The IOContext to use (controls which thread pool
+  ///     is used for I/O).
+  /// \param[in] cache_options Options for coalescing (if enabled).
+  /// \param[in] executor Optionally, an executor to use for decoding record
+  ///     batches. This is generally only a benefit for very wide and/or
+  ///     compressed batches.
+  virtual Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      const bool coalesce = false,
+      const io::IOContext& io_context = io::default_io_context(),
+      const io::CacheOptions cache_options = io::CacheOptions::LazyDefaults(),
+      arrow::internal::Executor* executor = NULLPTR) = 0;
 };
 
 /// \brief A general listener class to receive events.
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 63e3cb93a25..c80e8f6f680 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -18,9 +18,10 @@
 #include "arrow/memory_pool.h"
 
 #include <algorithm>  // IWYU pragma: keep
-#include <cstdlib>    // IWYU pragma: keep
-#include <cstring>    // IWYU pragma: keep
-#include <iostream>   // IWYU pragma: keep
+#include <atomic>
+#include <cstdlib>   // IWYU pragma: keep
+#include <cstring>   // IWYU pragma: keep
+#include <iostream>  // IWYU pragma: keep
 #include <limits>
 #include <memory>
 
@@ -28,12 +29,16 @@
 #include <stdlib.h>
 #endif
 
+#include "arrow/buffer.h"
+#include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"  // IWYU pragma: keep
 #include "arrow/util/optional.h"
 #include "arrow/util/string.h"
+#include "arrow/util/thread_pool.h"
 
 #ifdef __GLIBC__
 #include <malloc.h>
@@ -494,19 +499,27 @@ std::unique_ptr<MemoryPool> MemoryPool::CreateDefault() {
   }
 }
 
-static SystemMemoryPool system_pool;
+static struct GlobalState {
+  ~GlobalState() { finalizing.store(true, std::memory_order_relaxed); }
+
+  bool is_finalizing() const { return finalizing.load(std::memory_order_relaxed); }
+
+  std::atomic<bool> finalizing{false};  // constructed first, destroyed last
+
+  SystemMemoryPool system_pool;
 #ifdef ARROW_JEMALLOC
-static JemallocMemoryPool jemalloc_pool;
+  JemallocMemoryPool jemalloc_pool;
 #endif
 #ifdef ARROW_MIMALLOC
-static MimallocMemoryPool mimalloc_pool;
+  MimallocMemoryPool mimalloc_pool;
 #endif
+} global_state;
 
-MemoryPool* system_memory_pool() { return &system_pool; }
+MemoryPool* system_memory_pool() { return &global_state.system_pool; }
 
 Status jemalloc_memory_pool(MemoryPool** out) {
 #ifdef ARROW_JEMALLOC
-  *out = &jemalloc_pool;
+  *out = &global_state.jemalloc_pool;
   return Status::OK();
 #else
   return Status::NotImplemented("This Arrow build does not enable jemalloc");
@@ -515,7 +528,7 @@ Status jemalloc_memory_pool(MemoryPool** out) {
 
 Status mimalloc_memory_pool(MemoryPool** out) {
 #ifdef ARROW_MIMALLOC
-  *out = &mimalloc_pool;
+  *out = &global_state.mimalloc_pool;
   return Status::OK();
 #else
   return Status::NotImplemented("This Arrow build does not enable mimalloc");
@@ -526,14 +539,14 @@ MemoryPool* default_memory_pool() {
   auto backend = DefaultBackend();
   switch (backend) {
     case MemoryPoolBackend::System:
-      return &system_pool;
+      return &global_state.system_pool;
 #ifdef ARROW_JEMALLOC
     case MemoryPoolBackend::Jemalloc:
-      return &jemalloc_pool;
+      return &global_state.jemalloc_pool;
 #endif
 #ifdef ARROW_MIMALLOC
     case MemoryPoolBackend::Mimalloc:
-      return &mimalloc_pool;
+      return &global_state.mimalloc_pool;
 #endif
     default:
       ARROW_LOG(FATAL) << "Internal error: cannot create default memory pool";
@@ -669,4 +682,116 @@ std::vector<std::string> SupportedMemoryBackendNames() {
   return supported;
 }
 
+// -----------------------------------------------------------------------
+// Pool buffer and allocation
+
+/// A Buffer whose lifetime is tied to a particular MemoryPool
+class PoolBuffer final : public ResizableBuffer {
+ public:
+  explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
+      : ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
+
+  ~PoolBuffer() override {
+    // Avoid calling pool_->Free if the global pools are destroyed
+    // (XXX this will not work with user-defined pools)
+
+    // This can happen if a Future is destructing on one thread while or
+    // after memory pools are destructed on the main thread (as there is
+    // no guarantee of destructor order between thread/memory pools)
+    uint8_t* ptr = mutable_data();
+    if (ptr && !global_state.is_finalizing()) {
+      pool_->Free(ptr, capacity_);
+    }
+  }
+
+  Status Reserve(const int64_t capacity) override {
+    if (capacity < 0) {
+      return Status::Invalid("Negative buffer capacity: ", capacity);
+    }
+    uint8_t* ptr = mutable_data();
+    if (!ptr || capacity > capacity_) {
+      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
+      if (ptr) {
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+      } else {
+        RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr));
+      }
+      data_ = ptr;
+      capacity_ = new_capacity;
+    }
+    return Status::OK();
+  }
+
+  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
+    if (ARROW_PREDICT_FALSE(new_size < 0)) {
+      return Status::Invalid("Negative buffer resize: ", new_size);
+    }
+    uint8_t* ptr = mutable_data();
+    if (ptr && shrink_to_fit && new_size <= size_) {
+      // Buffer is non-null and is not growing, so shrink to the requested size without
+      // excess space.
+      int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
+      if (capacity_ != new_capacity) {
+        // Buffer hasn't got yet the requested size.
+        RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
+        data_ = ptr;
+        capacity_ = new_capacity;
+      }
+    } else {
+      RETURN_NOT_OK(Reserve(new_size));
+    }
+    size_ = new_size;
+
+    return Status::OK();
+  }
+
+  static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) {
+    std::shared_ptr<MemoryManager> mm;
+    if (pool == nullptr) {
+      pool = default_memory_pool();
+      mm = default_cpu_memory_manager();
+    } else {
+      mm = CPUDevice::memory_manager(pool);
+    }
+    return std::make_shared<PoolBuffer>(std::move(mm), pool);
+  }
+
+  static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) {
+    std::shared_ptr<MemoryManager> mm;
+    if (pool == nullptr) {
+      pool = default_memory_pool();
+      mm = default_cpu_memory_manager();
+    } else {
+      mm = CPUDevice::memory_manager(pool);
+    }
+    return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool));
+  }
+
+ private:
+  MemoryPool* pool_;
+};
+
+namespace {
+// A utility that does most of the work of the `AllocateBuffer` and
+// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to
+// a PoolBuffer.
+template <typename BufferPtr, typename PoolBufferPtr>
+inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) {
+  RETURN_NOT_OK(buffer->Resize(size));
+  buffer->ZeroPadding();
+  return std::move(buffer);
+}
+
+}  // namespace
+
+Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) {
+  return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size);
+}
+
+Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
+                                                                 MemoryPool* pool) {
+  return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool),
+                                                            size);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/future_util.h b/cpp/src/arrow/testing/future_util.h
index 0a20b5f4d57..190e5839bbf 100644
--- a/cpp/src/arrow/testing/future_util.h
+++ b/cpp/src/arrow/testing/future_util.h
@@ -81,10 +81,21 @@
   handle_error(future_name.status());                                                \
   EXPECT_OK_AND_ASSIGN(lhs, future_name.result());
 
+#define EXPECT_FINISHES(expr)   \
+  do {                          \
+    EXPECT_FINISHES_IMPL(expr); \
+  } while (0)
+
 #define EXPECT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
   ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(          \
       ARROW_EXPECT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__), lhs, rexpr);
 
+#define EXPECT_FINISHES_OK_AND_EQ(expected, expr)        \
+  do {                                                   \
+    EXPECT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
+    EXPECT_EQ(expected, _actual);                        \
+  } while (0)
+
 namespace arrow {
 
 template <typename T>

From 527c346229f97537dfa51f64dc15672654dec08b Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Fri, 14 May 2021 22:47:46 +0200
Subject: [PATCH 245/719] ARROW-12699: [CI][Packaging][Java] Generate a jar
 compatible with Linux and MacOS for all Arrow components
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the build to generate the Arrow's libraries jar files containing the C++ shared libs both for Linux and macOS.

**Note**: It only generates the artifact jars for the components that depend on C++ libraries at the end of the build: gandiva, adapter/orc, and dataset.

Closes #10300 from anthonylouisbsb/feature/generate-single-jar-for-all-jar-libraries

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .../docker/java-bundled-jars.dockerfile       |  33 ++---
 .../java_bundled_jars_check_dependencies.sh   |  42 +++---
 ci/scripts/java_bundled_jars_java_build.sh    |  41 ++++++
 ci/scripts/java_bundled_jars_macos_build.sh   |  92 +++++++++++++
 .../java_bundled_jars_manylinux_build.sh      | 124 ++++++++++++++++++
 dev/tasks/gandiva-jars/build-cpp-linux.sh     |  73 -----------
 dev/tasks/gandiva-jars/build-cpp-osx.sh       |  49 -------
 dev/tasks/{gandiva-jars => jars}/README.md    |  16 +--
 dev/tasks/{gandiva-jars => jars}/github.yml   |  65 ++++-----
 dev/tasks/tasks.yml                           |  15 ++-
 docker-compose.yml                            |  22 +++-
 11 files changed, 363 insertions(+), 209 deletions(-)
 rename dev/tasks/gandiva-jars/build-java.sh => ci/docker/java-bundled-jars.dockerfile (61%)
 mode change 100755 => 100644
 rename dev/tasks/gandiva-jars/check-shared-dependencies.sh => ci/scripts/java_bundled_jars_check_dependencies.sh (60%)
 create mode 100755 ci/scripts/java_bundled_jars_java_build.sh
 create mode 100755 ci/scripts/java_bundled_jars_macos_build.sh
 create mode 100755 ci/scripts/java_bundled_jars_manylinux_build.sh
 delete mode 100755 dev/tasks/gandiva-jars/build-cpp-linux.sh
 delete mode 100755 dev/tasks/gandiva-jars/build-cpp-osx.sh
 rename dev/tasks/{gandiva-jars => jars}/README.md (57%)
 rename dev/tasks/{gandiva-jars => jars}/github.yml (69%)

diff --git a/dev/tasks/gandiva-jars/build-java.sh b/ci/docker/java-bundled-jars.dockerfile
old mode 100755
new mode 100644
similarity index 61%
rename from dev/tasks/gandiva-jars/build-java.sh
rename to ci/docker/java-bundled-jars.dockerfile
index 79af606d3d0..96274b26dd9
--- a/dev/tasks/gandiva-jars/build-java.sh
+++ b/ci/docker/java-bundled-jars.dockerfile
@@ -1,5 +1,3 @@
-#!/bin/bash
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,19 +14,24 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+ARG base
+FROM ${base}
 
-set -e
-
-CPP_BUILD_DIR=$GITHUB_WORKSPACE/arrow/dist/
+# Install the libaries required by the Gandiva to run
+RUN vcpkg install --clean-after-build \
+        llvm \
+        boost-system \
+        boost-date-time \
+        boost-regex \
+        boost-predef \
+        boost-algorithm \
+        boost-locale \
+        boost-format \
+        boost-variant \
+        boost-multiprecision
 
-pushd java
-  # build the entire project
-  mvn clean install -q -DskipTests -P arrow-jni -Darrow.cpp.build.dir=$CPP_BUILD_DIR
-  # test only gandiva
-  mvn test -q -P arrow-jni -pl gandiva -Dgandiva.cpp.build.dir=$CPP_BUILD_DIR
+# Install dependencies
+ARG java=1.8.0
+RUN yum install -y java-$java-openjdk-devel && yum clean all
 
-  if [[ $COPY_JAR_TO_DISTRIBUTION_FOLDER ]] ; then
-    # copy the jars to distribution folder
-    find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $CPP_BUILD_DIR \;
-  fi
-popd
+ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/
\ No newline at end of file
diff --git a/dev/tasks/gandiva-jars/check-shared-dependencies.sh b/ci/scripts/java_bundled_jars_check_dependencies.sh
similarity index 60%
rename from dev/tasks/gandiva-jars/check-shared-dependencies.sh
rename to ci/scripts/java_bundled_jars_check_dependencies.sh
index ce93ff57183..b13d57036fd 100755
--- a/dev/tasks/gandiva-jars/check-shared-dependencies.sh
+++ b/ci/scripts/java_bundled_jars_check_dependencies.sh
@@ -19,40 +19,34 @@
 
 set -e
 
-CPP_BUILD_DIR=$GITHUB_WORKSPACE/arrow/dist/
+function check_dynamic_dependencies(){
+  local so_dep=$1
+  local library=$2
+  shift 2
+  local whitelist=("$@")
 
-if [[ $OS_NAME == "linux" ]]; then
-  SO_DEP=ldd
-  GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.so
-  WHITELIST=(linux-vdso libz librt libdl libpthread libstdc++ libm libgcc_s libc ld-linux-x86-64)
-else
-  SO_DEP="otool -L"
-  GANDIVA_LIB="$CPP_BUILD_DIR"libgandiva_jni.dylib
-  WHITELIST=(libgandiva_jni libz libncurses libSystem libc++)
-fi
+  # print the shared library dependencies
+  $so_dep "$library" | tee dependencies_temp_file.txt 
 
-# print the shared library dependencies
-$SO_DEP "$GANDIVA_LIB" | tee dependencies_temp_file.txt 
-
-if [[ $CHECK_SHARED_DEPENDENCIES ]] ; then
   # exit if any shared library not in whitelisted set is found
   echo "Checking shared dependencies"
-
   awk '{print $1}' dependencies_temp_file.txt | \
   while read -r line
   do
     found=false
-    
-    for item in "${WHITELIST[@]}"
+  
+    for item in "${whitelist[@]}"
     do
     if [[ "$line" == *"$item"* ]] ; then
-        found=true
+      found=true
     fi
-    done
+  done
 
-    if [[ "$found" == false ]] ; then
-      echo "Unexpected shared dependency found $line"
-      exit 1
-    fi
+  if [[ "$found" == false ]] ; then
+    echo "Unexpected shared dependency found in $library : $line"
+    exit 1
+  fi
   done
-fi
\ No newline at end of file
+
+  rm dependencies_temp_file.txt
+}
\ No newline at end of file
diff --git a/ci/scripts/java_bundled_jars_java_build.sh b/ci/scripts/java_bundled_jars_java_build.sh
new file mode 100755
index 00000000000..8ab502e30c8
--- /dev/null
+++ b/ci/scripts/java_bundled_jars_java_build.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+arrow_dir=${1}
+cpp_build_dir=${2}
+copy_jar_to_distribution_folder=${3:-true}
+java_dir=${arrow_dir}/java
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+
+pushd $java_dir
+  # build the entire project
+  mvn clean install -DskipTests -P arrow-jni -Darrow.cpp.build.dir=$cpp_build_dir
+  # test jars that have cpp dependencies
+  mvn test -P arrow-jni -pl adapter/orc,gandiva,dataset -Dgandiva.cpp.build.dir=$cpp_build_dir
+
+  if [[ $copy_jar_to_distribution_folder ]] ; then
+    # copy the jars that has cpp dependencies to distribution folder
+    find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $cpp_build_dir \;
+    find adapter/orc/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $cpp_build_dir \;
+    find dataset/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $cpp_build_dir \;
+  fi
+popd
diff --git a/ci/scripts/java_bundled_jars_macos_build.sh b/ci/scripts/java_bundled_jars_macos_build.sh
new file mode 100755
index 00000000000..c050c087b85
--- /dev/null
+++ b/ci/scripts/java_bundled_jars_macos_build.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+distribution_dir=${3}
+source_dir=${arrow_dir}/cpp
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${source_dir}/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Builds arrow + gandiva and tests the same.
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+  CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Release \
+        -DARROW_GANDIVA=ON \
+        -DARROW_GANDIVA_JAVA=ON \
+        -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+        -DARROW_ORC=ON \
+        -DARROW_JNI=ON \
+        -DARROW_PLASMA=ON \
+        -DARROW_PLASMA_JAVA_CLIENT=ON \
+        -DARROW_BUILD_TESTS=ON \
+        -DARROW_BUILD_UTILITIES=OFF \
+        -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+        -DARROW_PARQUET=ON \
+        -DPARQUET_BUILD_EXAMPLES=OFF \
+        -DPARQUET_BUILD_EXECUTABLES=OFF \
+        -DARROW_FILESYSTEM=ON \
+        -DARROW_DATASET=ON \
+        -DARROW_BOOST_USE_SHARED=OFF \
+        -DARROW_PROTOBUF_USE_SHARED=OFF \
+        -DARROW_GFLAGS_USE_SHARED=OFF \
+        -DARROW_OPENSSL_USE_SHARED=OFF \
+        -DARROW_BROTLI_USE_SHARED=OFF \
+        -DARROW_BZ2_USE_SHARED=OFF \
+        -DARROW_GRPC_USE_SHARED=OFF \
+        -DARROW_LZ4_USE_SHARED=OFF \
+        -DARROW_SNAPPY_USE_SHARED=OFF \
+        -DARROW_THRIFT_USE_SHARED=OFF \
+        -DARROW_UTF8PROC_USE_SHARED=OFF \
+        -DARROW_ZSTD_USE_SHARED=OFF \
+        -DCMAKE_INSTALL_PREFIX=${build_dir} \
+        -DCMAKE_INSTALL_LIBDIR=lib"
+
+  cmake $CMAKE_FLAGS $source_dir
+  make -j4
+  make install
+  ctest
+
+  # Copy all generated libraries to the distribution folder
+  mkdir -p "${distribution_dir}"
+  cp -L ${build_dir}/lib/libgandiva_jni.dylib ${distribution_dir}
+  cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${distribution_dir}
+  cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${distribution_dir}
+popd
+
+#Check if any libraries contains an unwhitelisted shared dependency
+source $arrow_dir/ci/scripts/java_bundled_jars_check_dependencies.sh
+SO_DEP="otool -L"
+
+GANDIVA_LIB=$distribution_dir/libgandiva_jni.dylib
+DATASET_LIB=$distribution_dir/libarrow_dataset_jni.dylib
+ORC_LIB=$distribution_dir/libarrow_orc_jni.dylib
+LIBRARIES=($GANDIVA_LIB $ORC_LIB $DATASET_LIB)
+
+WHITELIST=(libgandiva_jni libarrow_orc_jni libarrow_dataset_jni libz libncurses libSystem libc++)
+
+for library in "${LIBRARIES[@]}"
+do
+  check_dynamic_dependencies $SO_DEP $library "${WHITELIST[@]}"  
+done
\ No newline at end of file
diff --git a/ci/scripts/java_bundled_jars_manylinux_build.sh b/ci/scripts/java_bundled_jars_manylinux_build.sh
new file mode 100755
index 00000000000..934c221f11b
--- /dev/null
+++ b/ci/scripts/java_bundled_jars_manylinux_build.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Quit on failure
+set -e
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+distribution_dir=${3}
+source_dir=${arrow_dir}/cpp
+
+echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_RPATH_ORIGIN:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${ARROW_JNI:=ON}
+: ${ARROW_BUILD_TESTS:=ON}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${CMAKE_GENERATOR:=Ninja}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+: ${PYTHON_VERSION:=3.7}
+: ${GANDIVA_CXX_FLAGS:=-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9;-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9/x86_64-redhat-linux;-isystem;-lpthread}
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+  export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+  export PARQUET_TEST_DATA="${source_dir}/submodules/parquet-testing/data"
+  export AWS_EC2_METADATA_DISABLED=TRUE
+
+  cmake -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+      -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+      -DCMAKE_INSTALL_PREFIX=${build_dir} \
+      -DCMAKE_INSTALL_LIBDIR=lib \
+      -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+      -DARROW_BUILD_SHARED=ON \
+      -DARROW_BOOST_USE_SHARED=OFF \
+      -DARROW_PROTOBUF_USE_SHARED=OFF \
+      -DARROW_OPENSSL_USE_SHARED=OFF \
+      -DARROW_BROTLI_USE_SHARED=OFF \
+      -DARROW_BZ2_USE_SHARED=OFF \
+      -DARROW_GRPC_USE_SHARED=OFF \
+      -DARROW_LZ4_USE_SHARED=OFF \
+      -DARROW_SNAPPY_USE_SHARED=OFF \
+      -DARROW_THRIFT_USE_SHARED=OFF \
+      -DARROW_UTF8PROC_USE_SHARED=OFF \
+      -DARROW_ZSTD_USE_SHARED=OFF \
+      -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
+      -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+      -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
+      -DARROW_PYTHON=${ARROW_PYTHON} \
+      -DARROW_PARQUET=${ARROW_PARQUET} \
+      -DARROW_DATASET=${ARROW_DATASET} \
+      -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+      -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+      -DPARQUET_BUILD_EXAMPLES=OFF \
+      -DPARQUET_BUILD_EXECUTABLES=OFF \
+      -DPythonInterp_FIND_VERSION=ON \
+      -DPythonInterp_FIND_VERSION_MAJOR=3 \
+      -DARROW_GANDIVA=${ARROW_GANDIVA} \
+      -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+      -DARROW_ORC=${ARROW_ORC} \
+      -DARROW_JNI=${ARROW_JNI} \
+      -DARROW_PLASMA=${ARROW_PLASMA} \
+      -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+      -DARROW_BUILD_UTILITIES=OFF \
+      -DVCPKG_MANIFEST_MODE=OFF \
+      -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+      -GNinja ${source_dir}
+  ninja install
+  CTEST_OUTPUT_ON_FAILURE=1 ninja test
+popd
+
+echo "=== (${PYTHON_VERSION}) Copying libraries to the distribution folder ==="
+mkdir -p "${distribution_dir}"
+cp -L  ${build_dir}/lib/libgandiva_jni.so ${distribution_dir}
+cp -L  ${build_dir}/lib/libarrow_dataset_jni.so ${distribution_dir}
+cp -L  ${build_dir}/lib/libarrow_orc_jni.so ${distribution_dir}
+
+echo "=== (${PYTHON_VERSION}) Checking shared dependencies for libraries ==="
+source $arrow_dir/ci/scripts/java_bundled_jars_check_dependencies.sh
+SO_DEP=ldd
+
+GANDIVA_LIB=$distribution_dir/libgandiva_jni.so
+DATASET_LIB=$distribution_dir/libarrow_dataset_jni.so
+ORC_LIB=$distribution_dir/libarrow_orc_jni.so
+LIBRARIES=($GANDIVA_LIB $ORC_LIB $DATASET_LIB)
+
+WHITELIST=(linux-vdso libz librt libdl libpthread libstdc++ libm libgcc_s libc ld-linux-x86-64)
+
+for library in "${LIBRARIES[@]}"
+do
+  check_dynamic_dependencies $SO_DEP $library "${WHITELIST[@]}"  
+done
\ No newline at end of file
diff --git a/dev/tasks/gandiva-jars/build-cpp-linux.sh b/dev/tasks/gandiva-jars/build-cpp-linux.sh
deleted file mode 100755
index 42651739f84..00000000000
--- a/dev/tasks/gandiva-jars/build-cpp-linux.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-source /multibuild/manylinux_utils.sh
-
-# Quit on failure
-set -e
-
-PYTHON_VERSION=3.6
-CPYTHON_PATH="$(cpython_path ${PYTHON_VERSION})"
-PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python"
-PIP="${CPYTHON_PATH}/bin/pip"
-
-ARROW_BUILD_DIR=/tmp/arrow-build
-mkdir -p "${ARROW_BUILD_DIR}"
-pushd "${ARROW_BUILD_DIR}"
-
-PATH="${CPYTHON_PATH}/bin:${PATH}"
-export ARROW_TEST_DATA="/arrow/testing/data"
-
-cmake -DCMAKE_BUILD_TYPE=Release \
-    -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
-    -DZLIB_ROOT=/usr/local \
-    -DCMAKE_INSTALL_PREFIX=/arrow-dist \
-    -DCMAKE_INSTALL_LIBDIR=lib \
-    -DARROW_BUILD_TESTS=ON \
-    -DARROW_BUILD_SHARED=ON \
-    -DARROW_BOOST_USE_SHARED=OFF \
-    -DARROW_PROTOBUF_USE_SHARED=OFF \
-    -DARROW_OPENSSL_USE_SHARED=OFF \
-    -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \
-    -DARROW_JEMALLOC=ON \
-    -DARROW_RPATH_ORIGIN=ON \
-    -DARROW_PYTHON=OFF \
-    -DARROW_PARQUET=OFF \
-    -DARROW_DATASET=OFF \
-    -DARROW_FILESYSTEM=OFF \
-    -DPARQUET_BUILD_ENCRYPTION=OFF \
-    -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
-    -DARROW_GANDIVA=ON \
-    -DARROW_GANDIVA_JAVA=ON \
-    -DARROW_GANDIVA_JAVA7=ON \
-    -DBoost_NAMESPACE=arrow_boost \
-    -Dgflags_SOURCE=BUNDLED \
-    -DRapidJSON_SOURCE=BUNDLED \
-    -DRE2_SOURCE=BUNDLED \
-    -DARROW_BUILD_UTILITIES=OFF \
-    -DBoost_NAMESPACE=arrow_boost \
-    -DBOOST_ROOT=/arrow_boost_dist \
-    -GNinja /arrow/cpp
-ninja install
-CTEST_OUTPUT_ON_FAILURE=1 ninja test
-popd
-
-
-# copy the library to distribution
-cp -L  /arrow-dist/lib/libgandiva_jni.so /arrow/dist
diff --git a/dev/tasks/gandiva-jars/build-cpp-osx.sh b/dev/tasks/gandiva-jars/build-cpp-osx.sh
deleted file mode 100755
index cc6ab246d96..00000000000
--- a/dev/tasks/gandiva-jars/build-cpp-osx.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-set -x
-
-# Builds arrow + gandiva and tests the same.
-pushd cpp
-  mkdir build
-  pushd build
-    CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Release \
-          -DARROW_GANDIVA=ON \
-          -DARROW_GANDIVA_JAVA=ON \
-          -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
-          -DARROW_BUILD_TESTS=ON \
-          -DARROW_BUILD_UTILITIES=OFF \
-          -DPARQUET_BUILD_ENCRYPTION=OFF \
-          -DARROW_PARQUET=OFF \
-          -DARROW_FILESYSTEM=OFF \
-          -DARROW_DATASET=OFF \
-          -DARROW_BOOST_USE_SHARED=OFF \
-          -DARROW_PROTOBUF_USE_SHARED=OFF \
-          -DARROW_GFLAGS_USE_SHARED=OFF \
-          -DARROW_OPENSSL_USE_SHARED=OFF"
-
-    cmake $CMAKE_FLAGS ..
-    make -j4
-    ctest
-
-    cp -L release/libgandiva_jni.dylib $GITHUB_WORKSPACE/arrow/dist
-  popd
-popd
diff --git a/dev/tasks/gandiva-jars/README.md b/dev/tasks/jars/README.md
similarity index 57%
rename from dev/tasks/gandiva-jars/README.md
rename to dev/tasks/jars/README.md
index 2f4c694d799..1d61662d44a 100644
--- a/dev/tasks/gandiva-jars/README.md
+++ b/dev/tasks/jars/README.md
@@ -16,14 +16,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->
 
-# Updating manylinux for Gandiva Jar Build.
+# Jars.
 
-Do the following to update arrow manylinux docker image for building Gandiva Jars
+This directory is responsible to generate the jar files for the Arrow components that depend on C++ shared libraries to execute.
 
-- Install java in the manylinux image.
-- To do above, update Dockerfile-x86_64_base under python/manylinux1 to install java.
-- Please note only upto java7 is available in CentOS5, so install java7 in the base.
-- Export JAVA_HOME environment variable.
-- Then update build_boost.sh under python/manylinux1/scripts to build boost statically.
+The Arrow C++ libraries are compiled both on MacOS and Linux distributions, with their dependencies linked statically, and they are added
+in the jars at the end, so the file can be used on both systems.
 
-Please look at https://github.com/praveenbingo/arrow/tree/buildGandivaDocker that already has these changes.
\ No newline at end of file
+## Linux Docker Image
+To compile the C++ libraries in Linux, a docker image is used. 
+It is created used the **ci/docker/java-bundled-jars.dockerfile** file. 
+If it is necessary to add any new dependency, you need to change that file.
\ No newline at end of file
diff --git a/dev/tasks/gandiva-jars/github.yml b/dev/tasks/jars/github.yml
similarity index 69%
rename from dev/tasks/gandiva-jars/github.yml
rename to dev/tasks/jars/github.yml
index a1ac093c47b..773c4fcda36 100644
--- a/dev/tasks/gandiva-jars/github.yml
+++ b/dev/tasks/jars/github.yml
@@ -21,7 +21,7 @@
 
 jobs:
   build-cpp-ubuntu:
-    name: Build C++ Gandiva Libs Ubuntu
+    name: Build C++ Libs Ubuntu
     runs-on: ubuntu-18.04
     steps:
       - name: Checkout Arrow
@@ -30,7 +30,13 @@ jobs:
           git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
           if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
           git -C arrow submodule update --init --recursive
-      - name: Build Gandiva
+      - name: Setup Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.8
+      - name: Setup Archery
+        run: pip install -e arrow/dev/archery[docker]
+      - name: Build C++ Libs
         run: |
           python3 -VV
           cd arrow
@@ -38,18 +44,17 @@ jobs:
           export CC="gcc-4.9" CXX="g++-4.9"
           ulimit -c unlimited -S
           set -e
-          docker run -v $PWD:/arrow quay.io/anthonylouisbsb/arrow:gandivadocker /arrow/dev/tasks/gandiva-jars/build-cpp-linux.sh
-          dev/tasks/gandiva-jars/check-shared-dependencies.sh
-        env:
-          OS_NAME: "linux"
-          CHECK_SHARED_DEPENDENCIES: true
+          archery docker build java-bundled-jars
+          archery docker run java-bundled-jars
+      - name: Compress into single artifact
+        run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/dist/
       - name: Upload Artifacts
         uses: actions/upload-artifact@v2
         with:
           name: ubuntu-shared-lib
-          path: arrow/dist/libgandiva_jni.so
+          path: arrow-shared-libs-linux.tar.gz
   build-cpp-macos:
-    name: Build C++ Gandiva Libs MacOS
+    name: Build C++ Libs MacOS
     runs-on: macos-latest
     steps:
       - name: Checkout Arrow
@@ -58,25 +63,23 @@ jobs:
           git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
           if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
           git -C arrow submodule update --init --recursive
-      - name: Build Gandiva
+      - name: Build C++ Libs
         run: |
-          cd arrow
-          mkdir -p dist
-          export ARROW_TEST_DATA=$PWD/testing/data
           set -e
-          dev/tasks/gandiva-jars/build-cpp-osx.sh
-          dev/tasks/gandiva-jars/check-shared-dependencies.sh
+          arrow/ci/scripts/java_bundled_jars_macos_build.sh $GITHUB_WORKSPACE/arrow \
+            $GITHUB_WORKSPACE/arrow/cpp-build \
+            $GITHUB_WORKSPACE/arrow/dist
         env:
-          OS_NAME: "osx"
-          CHECK_SHARED_DEPENDENCIES: true
           MACOSX_DEPLOYMENT_TARGET: "10.11"
+      - name: Compress into single artifact
+        run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/dist/
       - name: Upload Artifacts
         uses: actions/upload-artifact@v2
         with:
           name: macos-shared-lib
-          path: arrow/dist/libgandiva_jni.dylib
+          path: arrow-shared-libs-macos.tar.gz
   package-jar:
-    name: Build Gandiva Jar
+    name: Build Jar Files
     runs-on: macos-latest
     needs: [build-cpp-macos, build-cpp-ubuntu]
     steps:
@@ -87,25 +90,23 @@ jobs:
           if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
           git -C arrow submodule update --init --recursive
           mkdir -p arrow/dist
-      - name: Download Linux Gandiva Library
+      - name: Download Linux C++ Libraries
         uses: actions/download-artifact@v2
         with:
           name: ubuntu-shared-lib
-          path: arrow/dist
-      - name: Download MacOS Gandiva Library
+      - name: Download MacOS C++ Library
         uses: actions/download-artifact@v2
         with:
           name: macos-shared-lib
-          path: arrow/dist
-      - name: Build Gandiva Jar
+      - name: Descompress artifacts
+        run: | 
+          tar -xvzf arrow-shared-libs-macos.tar.gz
+          tar -xvzf arrow-shared-libs-linux.tar.gz
+      - name: Build Jar
         run: |
-          cd arrow
-          export ARROW_TEST_DATA=$PWD/testing/data
           set -e
-          dev/tasks/gandiva-jars/build-java.sh
-        env:
-          OS_NAME: "osx"
-          COPY_JAR_TO_DISTRIBUTION_FOLDER: true
-          MACOSX_DEPLOYMENT_TARGET: "10.11"
-  
+          arrow/ci/scripts/java_bundled_jars_java_build.sh $GITHUB_WORKSPACE/arrow \
+            $GITHUB_WORKSPACE/arrow/dist \
+            true
+          
       {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 73d9bab23cf..b2e4d7a0545 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -44,9 +44,6 @@ groups:
     - debian-*-arm64
     - ubuntu-*-arm64
 
-  gandiva:
-    - gandiva-*
-
   homebrew:
     - homebrew-*
 
@@ -55,6 +52,7 @@ groups:
     - centos-*
     - conda-*
     - debian-*
+    - jars
     - nuget
     - python-sdist
     - ubuntu-*
@@ -127,7 +125,7 @@ groups:
     - ubuntu-*
     - centos-*
     - conda-*
-    - gandiva-*
+    - jars
     # List the homebrews explicitly because we don't care about running homebrew-cpp-autobrew
     - homebrew-cpp
     - homebrew-r-autobrew
@@ -621,13 +619,16 @@ tasks:
     ci: github
     template: r/github.macos.autobrew.yml
 
-  ############################## Gandiva Tasks ################################
+  ############################## Arrow JAR's ##################################
 
-  gandiva-jar:
+  jars:
+    # Build jar's that contains cpp libraries dependencies
     ci: github
-    template: gandiva-jars/github.yml
+    template: jars/github.yml
     artifacts:
       - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
+      - arrow-orc-{no_rc_version}-SNAPSHOT.jar
+      - arrow-dataset-{no_rc_version}-SNAPSHOT.jar
 
   ############################## NuGet packages ###############################
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 2e93ebd8616..53500b5f2cc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -123,7 +123,8 @@ x-hierarchy:
   - impala
   - postgres
   - python-wheel-manylinux-2010
-  - python-wheel-manylinux-2014
+  - python-wheel-manylinux-2014:
+    - java-bundled-jars
   - python-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
   - python-wheel-windows-vs2017
@@ -813,6 +814,25 @@ services:
         source: .
         target: "C:/arrow"
     command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
+  
+  java-bundled-jars:
+    # Docker image 
+    image: ${REPO}:${ARCH}-java-bundled-jars-vcpkg-${VCPKG}
+    build:
+      args:
+        base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
+        java: 1.8.0
+      context: .
+      dockerfile: ci/docker/java-bundled-jars.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-java-bundled-jars-vcpkg-${VCPKG}
+    environment:
+      <<: *ccache
+    volumes:
+      - .:/arrow:delegated
+      - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
+    command: 
+      [/arrow/ci/scripts/java_bundled_jars_manylinux_build.sh /arrow /build /arrow/dist]
 
   ##############################  Integration #################################
 

From f47703e5237aca8cc081e140fd8a6120492649db Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 14 May 2021 16:48:25 -0400
Subject: [PATCH 246/719] ARROW-12677: [Python] Add a mask argument to
 pyarrow.StructArray.from_arrays

This allows the user to supply an optional `mask` when creating a struct array.

 * The mask requirements are pretty strict (must be a boolean arrow array without nulls) compared with some of the other functions (e.g. `array.mask` accepts a wide variety of inputs).  I think this should be ok since this use case is probably rarer and there are other plenty of existing ways to convert other datatypes to an arrow array.
 * ~~Unfortunately, StructArray::Make interprets the "null buffer" as more of a validity buffer (1 = valid, 0 = null).  This is the opposite of everywhere else a `mask` is used.  I was torn between inverting the input buffer to mimic the python API and passing through directly to the C interface for simplicity.  I chose the simpler option but could be convinced otherwise.~~ Per request, I now invert the mask to align with the python API.

Closes #10272 from westonpace/feature/ARROW-12677--python-add-a-mask-argument-to-pyarrow-structarra

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/array.pxi           | 57 ++++++++++++++++++++++++++++--
 python/pyarrow/tests/test_array.py | 41 +++++++++++++++++++++
 2 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1c47ea3accc..df0ee85eee7 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1593,6 +1593,39 @@ cdef class ListArray(BaseListArray):
         Returns
         -------
         list_array : ListArray
+
+        Examples
+        --------
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
+        [
+          [
+            0,
+            1
+          ],
+          [
+            2,
+            3
+          ]
+        ]
+
+        # nulls in the offsets array become null lists
+        >>> offsets = pa.array([0, None, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
+        [
+          [
+            0,
+            1
+          ],
+          null,
+          [
+            2,
+            3
+          ]
+        ]
         """
         cdef:
             Array _offsets, _values
@@ -2153,7 +2186,8 @@ cdef class StructArray(Array):
         return [pyarrow_wrap_array(arr) for arr in arrays]
 
     @staticmethod
-    def from_arrays(arrays, names=None, fields=None):
+    def from_arrays(arrays, names=None, fields=None, mask=None,
+                    memory_pool=None):
         """
         Construct StructArray from collection of arrays representing
         each field in the struct.
@@ -2167,6 +2201,10 @@ cdef class StructArray(Array):
             Field names for each struct child.
         fields : List[Field] (optional)
             Field instances for each struct child.
+        mask : pyarrow.Array[bool] (optional)
+            Indicate which values are null (True) or not null (False).
+        memory_pool : MemoryPool (optional)
+            For memory allocations, if required, otherwise uses default pool.
 
         Returns
         -------
@@ -2174,6 +2212,7 @@ cdef class StructArray(Array):
         """
         cdef:
             shared_ptr[CArray] c_array
+            shared_ptr[CBuffer] c_mask
             vector[shared_ptr[CArray]] c_arrays
             vector[c_string] c_names
             vector[shared_ptr[CField]] c_fields
@@ -2189,6 +2228,18 @@ cdef class StructArray(Array):
         if names is not None and fields is not None:
             raise ValueError('Must pass either names or fields, not both')
 
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type.id != Type_BOOL:
+                raise ValueError('Mask must be a pyarrow.Array of type bool')
+            if mask.null_count != 0:
+                raise ValueError('Mask must not contain nulls')
+            inverted_mask = _pc().invert(mask, memory_pool=memory_pool)
+            c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+        else:
+            raise ValueError('Mask must be a pyarrow.Array of type bool')
+
         arrays = [asarray(x) for x in arrays]
         for arr in arrays:
             c_array = pyarrow_unwrap_array(arr)
@@ -2215,10 +2266,10 @@ cdef class StructArray(Array):
             # XXX Cannot pass "nullptr" for a shared_ptr<T> argument:
             # https://github.com/cython/cython/issues/3020
             c_result = CStructArray.MakeFromFieldNames(
-                c_arrays, c_names, shared_ptr[CBuffer](), -1, 0)
+                c_arrays, c_names, c_mask, -1, 0)
         else:
             c_result = CStructArray.MakeFromFields(
-                c_arrays, c_fields, shared_ptr[CBuffer](), -1, 0)
+                c_arrays, c_fields, c_mask, -1, 0)
         cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
         result.validate()
         return result
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index d8e75ab3dbf..54a13ba1ba4 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -668,6 +668,28 @@ def test_struct_from_arrays():
     with pytest.raises(ValueError, match="int64 vs int32"):
         pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc])
 
+    arrays = [a, b, c]
+    fields = [fa, fb, fc]
+    # With mask
+    mask = pa.array([True, False, False])
+    arr = pa.StructArray.from_arrays(arrays, fields=fields, mask=mask)
+    assert arr.to_pylist() == [None] + expected_list[1:]
+
+    arr = pa.StructArray.from_arrays(arrays, names=['a', 'b', 'c'], mask=mask)
+    assert arr.to_pylist() == [None] + expected_list[1:]
+
+    # Bad masks
+    with pytest.raises(ValueError, match='Mask must be'):
+        pa.StructArray.from_arrays(arrays, fields, mask=[True, False, False])
+
+    with pytest.raises(ValueError, match='not contain nulls'):
+        pa.StructArray.from_arrays(
+            arrays, fields, mask=pa.array([True, False, None]))
+
+    with pytest.raises(ValueError, match='Mask must be'):
+        pa.StructArray.from_arrays(
+            arrays, fields, mask=pa.chunked_array([mask]))
+
 
 def test_struct_array_from_chunked():
     # ARROW-11780
@@ -932,6 +954,25 @@ def test_fixed_size_list_from_arrays():
         pa.FixedSizeListArray.from_arrays(values, 5)
 
 
+def test_variable_list_from_arrays():
+    values = pa.array([1, 2, 3, 4], pa.int64())
+    offsets = pa.array([0, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], [3, 4]]
+    assert result.type.equals(pa.list_(pa.int64()))
+
+    offsets = pa.array([0, None, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], None, [3, 4]]
+
+    # raise if offset out of bounds
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([-1, 2, 4]), values)
+
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([0, 2, 5]), values)
+
+
 def test_union_from_dense():
     binary = pa.array([b'a', b'b', b'c', b'd'], type='binary')
     int64 = pa.array([1, 2, 3], type='int64')

From 068318fed5a2f3f77cf8cdd3ca2b0a86e0f8bace Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Sat, 15 May 2021 14:29:38 +0800
Subject: [PATCH 247/719] ARROW-12774 : [C++][Compute]
 replace_substring_regex() creates invalid arrays => crash

fixing ARROW-12774

Closes #10320 from nirandaperera/ARROW-12774

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/kernels/scalar_string.cc      | 2 +-
 cpp/src/arrow/compute/kernels/scalar_string_test.cc | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index be1cc83d158..65196b2a491 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -1332,7 +1332,7 @@ struct ReplaceSubString {
 
     if (batch[0].kind() == Datum::ARRAY) {
       // We already know how many strings we have, so we can use Reserve/UnsafeAppend
-      RETURN_NOT_OK(offset_builder.Reserve(batch[0].array()->length));
+      RETURN_NOT_OK(offset_builder.Reserve(batch[0].array()->length + 1));
       offset_builder.UnsafeAppend(0);  // offsets start at 0
 
       const ArrayData& input = *batch[0].array();
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index cb74b1449b5..a59634b7be8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -499,6 +499,14 @@ TYPED_TEST(TestStringKernels, ReplaceSubstringRegex) {
   ReplaceSubstringOptions options_regex2{"(a.a)", "aba\\1"};
   this->CheckUnary("replace_substring_regex", R"(["aaaaaa"])", this->type(),
                    R"(["abaaaaabaaaa"])", &options_regex2);
+
+  // ARROW-12774
+  ReplaceSubstringOptions options_regex3{"X", "Y"};
+  this->CheckUnary("replace_substring_regex",
+                   R"(["A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A"])",
+                   this->type(),
+                   R"(["A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A"])",
+                   &options_regex3);
 }
 
 TYPED_TEST(TestStringKernels, ReplaceSubstringRegexLimited) {

From b893dbacb70903caef23bce4cd74ca5350a034af Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 16 May 2021 05:54:36 +0900
Subject: [PATCH 248/719] ARROW-12796: [JS] Support JSON output from benchmarks

Results are now more compact and I'd argue more readble.

<img width="1040" alt="Screen Shot 2021-05-14 at 21 30 32" src="https://user-images.githubusercontent.com/589034/118347939-abf9d500-b4fb-11eb-8281-1e987372a04b.png">

Supports getting the results as JSON. Here is an example snippet.

```json
[
  {
    "name": "name: 'lat', length: 1,000,000, type: Float32, test: gt, value: 0",
    "ops": 246.1673421492707,
    "margin": 0.75,
    "options": {
      "delay": 0.005,
      "initCount": 1,
      "minTime": 0.05,
      "maxTime": 5,
      "minSamples": 5
    },
    "samples": 88,
    "promise": false,
    "details": {
      "min": 0.0038867760769230766,
      "max": 0.004488651,
      "mean": 0.004062277275568182,
      "median": 0.0040335091923076926,
      "standardDeviation": 0.0001450310669260862,
      "marginOfError": 0.000030302312815210857,
      "relativeMarginOfError": 0.7459439806696241,
      "standardErrorOfMean": 0.00001546036368123003,
      "sampleVariance": 2.1034010373718892e-8,
      "sampleResults": [
        0.0038867760769230766,
        0.003890444461538462,
        0.0038912562307692306,
        0.0038914766153846157,
        0.0038915368461538463,
        0.0038922476153846154,
        0.0038974643076923076,
        0.0038987466923076923,
        0.0039004746153846154,
        0.0039018483846153845,
        0.003905098538461539,
        0.0039061484615384614,
        0.00392067376923077,
        0.003926968,
        0.003928343999999999,
        0.0039290314615384615,
        0.003931758230769231,
        0.003938228461538461,
        0.0039384640769230764,
        0.00394105623076923,
        0.003958338923076923,
        0.003959598461538461,
        0.00396191123076923,
        0.0039643883076923075,
        0.0039722566923076925,
        0.003978636230769231,
        0.0039851349230769235,
        0.003986564307692308,
        0.003987425230769231,
        0.003987491461538461,
        0.003988166923076923,
        0.003993636461538461,
        0.003996644538461538,
        0.004000686923076923,
        0.004008064846153846,
        0.004012332538461539,
        0.004016019769230769,
        0.004024013,
        0.004024081076923077,
        0.004024150538461538,
        0.004027658307692307,
        0.0040293960769230775,
        0.00403129323076923,
        0.004033234461538462,
        0.004033783923076923,
        0.004034902076923077,
        0.004039443923076923,
        0.004040901615384616,
        0.004042571,
        0.0040440490769230765,
        0.004044344,
        0.004047865923076923,
        0.004051974769230769,
        0.004051975384615385,
        0.004052478,
        0.0040536076153846155,
        0.004059857076923077,
        0.0040602037692307694,
        0.004066126923076923,
        0.004071497692307692,
        0.0040739736153846155,
        0.0040783087692307695,
        0.004087748,
        0.004093152923076923,
        0.004110979076923077,
        0.004123302846153846,
        0.004127555461538462,
        0.004129532384615385,
        0.004132983538461538,
        0.004133533615384615,
        0.004145048846153846,
        0.004151413769230769,
        0.004195234583333333,
        0.004196755076923077,
        0.004203244384615385,
        0.0042411549230769235,
        0.004246916307692307,
        0.004261487538461539,
        0.0042885311666666665,
        0.0043158081666666665,
        0.0043421300000000005,
        0.004347883,
        0.004364019461538462,
        0.004386421615384615,
        0.0043920053076923074,
        0.004396970333333333,
        0.0044709042307692305,
        0.004488651
      ]
    },
    "completed": true
  },
  {
    "name": "name: 'lng', length: 1,000,000, type: Float32, test: gt, value: 0",
    "ops": 245.46322560133137,
    "margin": 0.97,
    "options": {
      "delay": 0.005,
      "initCount": 1,
      "minTime": 0.05,
      "maxTime": 5,
      "minSamples": 5
    },
    "samples": 89,
    "promise": false,
    "details": {
      "min": 0.0038802434615384613,
      "max": 0.005163148846153847,
      "mean": 0.004073930005401901,
      "median": 0.0040353953846153845,
      "standardDeviation": 0.0001897174677863493,
      "marginOfError": 0.000039415622276126204,
      "relativeMarginOfError": 0.967508578298163,
      "standardErrorOfMean": 0.000020110011365370514,
      "sampleVariance": 3.599271758326448e-8,
      "sampleResults": [
        0.0038802434615384613,
        0.0038827923076923078,
        0.003884043769230769,
        0.003892362769230769,
        0.0039080451538461534,
        0.003917222307692308,
        0.0039233262307692305,
        0.003924073153846154,
        0.003928673461538462,
        0.003929145230769231,
        0.003939217615384615,
        0.003944111384615385,
        0.003945712384615384,
        0.003947506769230769,
        0.003948513461538462,
        0.003957425846153846,
        0.00396102176923077,
        0.0039657037692307695,
        0.003968355923076923,
        0.003968570384615385,
        0.003972680461538461,
        0.003972802076923077,
        0.003976657076923077,
        0.003976809769230769,
        0.0039775893076923075,
        0.003978900692307693,
        0.003979527846153846,
        0.003983187076923077,
        0.003983268538461538,
        0.003983302307692308,
        0.003993201692307692,
        0.003994444692307692,
        0.004002045923076923,
        0.0040066217692307695,
        0.004008421230769231,
        0.004011795461538461,
        0.004020453153846154,
        0.0040281254615384616,
        0.004029364538461538,
        0.004029995615384615,
        0.004032546461538462,
        0.004032629307692308,
        0.004033298846153846,
        0.0040333729230769225,
        0.0040353953846153845,
        0.004042062692307692,
        0.004042307076923077,
        0.004043482153846154,
        0.004047125,
        0.004050561923076923,
        0.004055333538461539,
        0.004062441153846154,
        0.004062654461538462,
        0.004063269769230769,
        0.004065189538461538,
        0.004065284538461538,
        0.004065946461538462,
        0.004067272846153846,
        0.004067796923076923,
        0.004073014307692308,
        0.004092591384615384,
        0.00409801,
        0.0040981576153846155,
        0.004098685615384615,
        0.004106365769230769,
        0.004106560153846154,
        0.004108112461538462,
        0.004113928615384615,
        0.004120177,
        0.004124891538461539,
        0.004126251153846154,
        0.004137276615384615,
        0.004142810461538462,
        0.004148863538461538,
        0.0041514844615384615,
        0.004159885,
        0.004160035692307692,
        0.004167559538461539,
        0.004183368,
        0.004207832692307693,
        0.004226383615384615,
        0.004251580846153846,
        0.00429685025,
        0.004313780615384615,
        0.004316540769230769,
        0.004336609,
        0.004430088307692308,
        0.005025693769230769,
        0.005163148846153847
      ]
    },
    "completed": true
  },
  {
    "name": "name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8>, test: eq, value: Seattle",
    "ops": 0.2577444145672938,
    "margin": 16.4,
    "options": {
      "delay": 0.005,
      "initCount": 1,
      "minTime": 0.05,
      "maxTime": 5,
      "minSamples": 5
    },
    "samples": 5,
    "promise": false,
    "details": {
      "min": 3.211415446,
      "max": 4.398277249,
      "mean": 3.8798124944000003,
      "median": 4.105992699,
      "standardDeviation": 0.5123862225874,
      "marginOfError": 0.6361095316489571,
      "relativeMarginOfError": 16.39536788355359,
      "standardErrorOfMean": 0.22914608488795288,
      "sampleVariance": 0.2625396410973846,
      "sampleResults": [
        3.211415446,
        3.467166249,
        4.105992699,
        4.2162108289999995,
        4.398277249
      ]
    },
    "completed": true
  }
]
```

Closes #10331 from domoritz/bench

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/DEVELOP.md           |   2 +-
 js/package.json         |   2 +-
 js/perf/config.js       |  22 ++-
 js/perf/index.js        | 376 +++++++++++++++++++---------------------
 js/perf/table_config.js |  48 -----
 js/yarn.lock            | 112 +++++++++++-
 6 files changed, 306 insertions(+), 256 deletions(-)
 delete mode 100644 js/perf/table_config.js

diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index 186032c3ec8..88ec899e5a2 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -72,7 +72,7 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm
 
 # Running the Performance Benchmarks
 
-First, compile the bundles with `yarn build` and generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. You can change the target you want to test by changing the imports in `perf/index.js`.
+First, compile the bundles with `yarn build` and generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. You can change the target you want to test by changing the imports in `perf/index.js`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
 
 # Updating the Arrow format flatbuffers generated code
 
diff --git a/js/package.json b/js/package.json
index 976df640046..4d05ac416a0 100644
--- a/js/package.json
+++ b/js/package.json
@@ -70,7 +70,7 @@
     "@typescript-eslint/eslint-plugin": "^4.22.0",
     "@typescript-eslint/parser": "^4.22.0",
     "async-done": "1.3.1",
-    "benchmark": "2.1.4",
+    "benny": "3.6.15",
     "cpy": "^8.1.2",
     "cross-env": "^7.0.3",
     "del-cli": "3.0.1",
diff --git a/js/perf/config.js b/js/perf/config.js
index cca10801547..f733c67f933 100644
--- a/js/perf/config.js
+++ b/js/perf/config.js
@@ -20,11 +20,29 @@ const path = require('path');
 const glob = require('glob');
 
 const config = [];
-const filenames = glob.sync(path.resolve(__dirname, `../test/data/cpp/stream`, `*.arrow`));
+const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
+
+const countBys = {
+    tracks: ['origin', 'destination']
+}
+const counts = {
+    tracks: [
+        {column: 'lat',    test: 'gt', value: 0        },
+        {column: 'lng',    test: 'gt', value: 0        },
+        {column: 'origin', test: 'eq', value: 'Seattle'},
+    ]
+}
 
 for (const filename of filenames) {
     const { name } = path.parse(filename);
-    config.push({ name, buffers: [fs.readFileSync(filename)] });
+    if (name in counts) {
+        config.push({
+            name,
+            buffers: [fs.readFileSync(filename)],
+            countBys: countBys[name],
+            counts: counts[name],
+        });
+    }
 }
 
 module.exports = config;
diff --git a/js/perf/index.js b/js/perf/index.js
index 7535c9fe729..57f170ed4e9 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -19,230 +19,206 @@
 // const { predicate, Table, RecordBatchReader } = require('../targets/es5/umd');
 // const { predicate, Table, RecordBatchReader } = require('../targets/es5/cjs');
 // const { predicate, Table, RecordBatchReader } = require('../targets/es2015/umd');
-const { predicate, Table, RecordBatchReader } = require('../targets/es2015/cjs');
+const { predicate, Table, DataFrame, RecordBatchReader } = require('../targets/es2015/cjs');
+const kleur = require('kleur');
+const b = require('benny');
 const { col } = predicate;
 
-const Benchmark = require('benchmark');
-
-const suites = [];
-
-for (let { name, buffers } of require('./table_config')) {
-    const parseSuiteName = `Parse "${name}"`;
-    const sliceSuiteName = `Slice "${name}" vectors`;
-    const iterateSuiteName = `Iterate "${name}" vectors`;
-    const getByIndexSuiteName = `Get "${name}" values by index`;
-    const sliceToArraySuiteName = `Slice toArray "${name}" vectors`;
-    suites.push(createTestSuite(parseSuiteName, createFromTableTest(name, buffers)));
-    suites.push(createTestSuite(parseSuiteName, createReadBatchesTest(name, buffers)));
-    const table = Table.from(buffers), schema = table.schema;
-    suites.push(...schema.fields.map((f, i) => createTestSuite(getByIndexSuiteName, createGetByIndexTest(table.getColumnAt(i), f.name))));
-    suites.push(...schema.fields.map((f, i) => createTestSuite(iterateSuiteName, createIterateTest(table.getColumnAt(i), f.name))));
-    suites.push(...schema.fields.map((f, i) => createTestSuite(sliceToArraySuiteName, createSliceToArrayTest(table.getColumnAt(i), f.name))));
-    suites.push(...schema.fields.map((f, i) => createTestSuite(sliceSuiteName, createSliceTest(table.getColumnAt(i), f.name))));
-}
-
-for (let {name, buffers, countBys, counts} of require('./table_config')) {
-    const table = Table.from(buffers);
-
-    const tableIterateSuiteName = `Table Iterate "${name}"`;
-    const dfCountBySuiteName = `DataFrame Count By "${name}"`;
-    const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${name}"`;
-    const dfDirectCountSuiteName = `DataFrame Direct Count "${name}"`;
-    const dfFilterIterSuiteName = `DataFrame Filter-Iterate "${name}"`;
+const args = process.argv.slice(2);
+const json = args[0] === '--json';
 
-    suites.push(createTestSuite(tableIterateSuiteName, createTableIterateTest(table)));
-    suites.push(...countBys.map((countBy) => createTestSuite(dfCountBySuiteName, createDataFrameCountByTest(table, countBy))));
-    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterCountSuiteName, createDataFrameFilterCountTest(table, col, test, value))));
-    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfDirectCountSuiteName, createDataFrameDirectCountTest(table, col, test, value))));
-    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterIterSuiteName, createDataFrameFilterIterateTest(table, col, test, value))));
+const formatter = new Intl.NumberFormat();
+function formatNumber(number, precision) {
+    const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
+    return formatter.format(rounded)
 }
 
-console.log('Running apache-arrow performance tests...\n');
-
-run();
-
-function run() {
-    const suite = suites.shift();
-    suite && suite.on('complete', function() {
-        console.log(suite.name + ':\n' + this.map(function(x) {
-            const str = x.toString();
-            const meanMsPerOp = Math.round(x.stats.mean * 100000)/100;
-            const sliceOf60FPS = Math.round((meanMsPerOp / (1000/60)) * 100000)/1000;
-            return `${str}\n   avg: ${meanMsPerOp}ms\n   ${sliceOf60FPS}% of a frame @ 60FPS ${x.suffix || ''}`;
-        }).join('\n') + '\n');
-        if (suites.length > 0) {
-            setTimeout(run, 1000);
-        }
-    })
-    .run({ async: true });
-}
+const results = []
 
-function createTestSuite(name, test) {
-    return new Benchmark.Suite(name, { async: true }).add(test);
+function cycle(result, _summary) {
+    const duration = result.details.median * 1000;
+    if (json) {
+        results.push(result);
+    }
+    console.log(
+        `${kleur.cyan(result.name)} ${formatNumber(result.ops, 3)} ops/s ±${result.margin.toPrecision(2)}%, ${formatNumber(duration, 2)} ms, ${kleur.gray(result.samples + ' samples')}`,
+    );
 }
 
-function createFromTableTest(name, buffers) {
-    let table;
-    return {
-        async: true,
-        name: `Table.from\n`,
-        fn() { table = Table.from(buffers); }
-    };
-}
+for (const { name, buffers } of require('./config')) {
+    b.suite(
+        `Parse "${name}"`,
 
-function createReadBatchesTest(name, buffers) {
-    let recordBatch;
-    return {
-        async: true,
-        name: `readBatches\n`,
-        fn() { for (recordBatch of RecordBatchReader.from(buffers)) {} }
-    };
-}
+        b.add(`Table.from`, () => {
+            Table.from(buffers);
+        }),
 
-function createSliceTest(vector, name) {
-    let xs;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() { xs = vector.slice(); }
-    };
-}
+        b.add(`readBatches`, () => {
+            for (recordBatch of RecordBatchReader.from(buffers)) {}
+        }),
 
-function createSliceToArrayTest(vector, name) {
-    let xs;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() { xs = vector.slice().toArray(); }
-    };
-}
+        b.cycle(cycle)
+    );
 
-function createIterateTest(vector, name) {
-    let value;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() { for (value of vector) {} }
-    };
-}
+    const table = Table.from(buffers)
+    const schema = table.schema;
 
-function createGetByIndexTest(vector, name) {
-    let value;
-    return {
-        async: true,
-        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
-        fn() {
-            for (let i = -1, n = vector.length; ++i < n;) {
-                value = vector.get(i);
+    const suites = [{
+            name: `Get "${name}" values by index`,
+            fn(vector) {
+                for (let i = -1, n = vector.length; ++i < n;) {
+                    value = vector.get(i);
+                }
             }
-        }
-    };
+        }, {
+            name: `Iterate "${name}" vectors`,
+            fn(vector) { for (value of vector) {} }
+        }, {
+            name: `Slice toArray "${name}" vectors`,
+            fn(vector) { xs = vector.slice().toArray(); }
+        }, {
+            name: `Slice "${name}" vectors`,
+            fn(vector) { xs = vector.slice(); }
+        }];
+
+    for (const {name, fn} of suites) {
+        b.suite(
+            name,
+
+            ...schema.fields.map((f, i) => {
+                const vector = table.getColumnAt(i);
+                return b.add(`name: '${f.name}', length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
+                    fn(vector)
+                })
+            }),
+
+            b.cycle(cycle)
+        );
+    }
 }
 
-function createTableIterateTest(table) {
-    let value;
-    return {
-        async: true,
-        name: `length: ${table.length}\n`,
-        fn() { for (value of table) {} }
-    };
-}
 
-function createDataFrameDirectCountTest(table, column, test, value) {
-    let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column), op;
-
-    if (test == 'gt') {
-        op = () => {
-            sum = 0;
-            let batches = table.chunks;
-            let numBatches = batches.length;
-            for (let batchIndex = -1; ++batchIndex < numBatches;) {
-                // load batches
-                const batch = batches[batchIndex];
-                const vector = batch.getChildAt(colidx);
-                // yield all indices
-                for (let index = -1, length = batch.length; ++index < length;) {
-                    sum += (vector.get(index) >= value);
-                }
-            }
-            return sum;
-        }
-    } else if (test == 'eq') {
-        op = () => {
-            sum = 0;
-            let batches = table.chunks;
-            let numBatches = batches.length;
-            for (let batchIndex = -1; ++batchIndex < numBatches;) {
-                // load batches
-                const batch = batches[batchIndex];
-                const vector = batch.getChildAt(colidx);
-                // yield all indices
-                for (let index = -1, length = batch.length; ++index < length;) {
-                    sum += (vector.get(index) === value);
-                }
-            }
-            return sum;
-        }
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
+for (const { name, buffers, countBys, counts } of require('./config')) {
+    const df = DataFrame.from(buffers);
 
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}, test: ${test}, value: ${value}\n`,
-        fn: op
-    };
-}
+    b.suite(
+        `DataFrame Iterate "${name}"`,
 
-function createDataFrameCountByTest(table, column) {
-    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
+        b.add(`length: ${formatNumber(df.length)}`, () => {
+            for (value of df) {}
+        }),
 
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}\n`,
-        fn() {
-            table.countBy(column);
-        }
-    };
-}
+        b.cycle(cycle)
+    );
 
-function createDataFrameFilterCountTest(table, column, test, value) {
-    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
-    let df;
+    b.suite(
+        `DataFrame Count By "${name}"`,
 
-    if (test == 'gt') {
-        df = table.filter(col(column).gt(value));
-    } else if (test == 'eq') {
-        df = table.filter(col(column).eq(value));
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
+        ...countBys.map((column) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}`,
+            () => df.countBy(column)
+        )),
 
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}, test: ${test}, value: ${value}\n`,
-        fn() {
-            df.count();
-        }
-    };
-}
+        b.cycle(cycle)
+    );
 
-function createDataFrameFilterIterateTest(table, column, test, value) {
-    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
-    let df;
+    b.suite(
+        `DataFrame Filter-Scan Count "${name}"`,
 
-    if (test == 'gt') {
-        df = table.filter(col(column).gt(value));
-    } else if (test == 'eq') {
-        df = table.filter(col(column).eq(value));
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
+        ...counts.map(({ column, test, value }) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
+            () => {
+                let filteredDf;
+                if (test == 'gt') {
+                    filteredDf = df.filter(col(column).gt(value));
+                } else if (test == 'eq') {
+                    filteredDf = df.filter(col(column).eq(value));
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
 
-    return {
-        async: true,
-        name: `name: '${column}', length: ${table.length}, type: ${table.getColumnAt(colidx).type}, test: ${test}, value: ${value}\n`,
-        fn() { for (value of df) {} }
-    };
-}
+                return () => filteredDf.count();
+            }
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Filter-Iterate "${name}"`,
+
+        ...counts.map(({ column, test, value }) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
+            () => {
+                let filteredDf;
+                if (test == 'gt') {
+                    filteredDf = df.filter(col(column).gt(value));
+                } else if (test == 'eq') {
+                    filteredDf = df.filter(col(column).eq(value));
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+
+                return () => {
+                    for (value of filteredDf) {}
+                }
+            }
+        )),
+
+        b.cycle(cycle)
+    );
+
+    b.suite(
+        `DataFrame Direct Count "${name}"`,
+
+        ...counts.map(({ column, test, value }) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
+            () => {
+                let colidx = df.schema.fields.findIndex((c)=> c.name === column);
+
+                if (test == 'gt') {
+                    return () => {
+                        sum = 0;
+                        let batches = df.chunks;
+                        let numBatches = batches.length;
+                        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                            // load batches
+                            const batch = batches[batchIndex];
+                            const vector = batch.getChildAt(colidx);
+                            // yield all indices
+                            for (let index = -1, length = batch.length; ++index < length;) {
+                                sum += (vector.get(index) >= value);
+                            }
+                        }
+                        return sum;
+                    }
+                } else if (test == 'eq') {
+                    return () => {
+                        sum = 0;
+                        let batches = df.chunks;
+                        let numBatches = batches.length;
+                        for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                            // load batches
+                            const batch = batches[batchIndex];
+                            const vector = batch.getChildAt(colidx);
+                            // yield all indices
+                            for (let index = -1, length = batch.length; ++index < length;) {
+                                sum += (vector.get(index) === value);
+                            }
+                        }
+                        return sum;
+                    }
+                } else {
+                    throw new Error(`Unrecognized test "${test}"`);
+                }
+            }
+        )),
 
+        b.cycle(cycle),
+
+        b.complete(() => {
+            // last benchmark finished
+            json && process.stderr.write(JSON.stringify(results, null, 2))
+        })
+    );
+}
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
deleted file mode 100644
index 2946b5ab26a..00000000000
--- a/js/perf/table_config.js
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-const fs = require('fs');
-const path = require('path');
-const glob = require('glob');
-
-const config = [];
-const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
-
-const countBys = {
-    tracks: ['origin', 'destination']
-}
-const counts = {
-    tracks: [
-        {col: 'lat',    test: 'gt', value: 0        },
-        {col: 'lng',    test: 'gt', value: 0        },
-        {col: 'origin', test: 'eq', value: 'Seattle'},
-    ]
-}
-
-for (const filename of filenames) {
-    const { name } = path.parse(filename);
-    if (name in counts) {
-        config.push({
-            name,
-            buffers: [fs.readFileSync(filename)],
-            countBys: countBys[name],
-            counts: counts[name],
-        });
-    }
-}
-
-module.exports = config;
diff --git a/js/yarn.lock b/js/yarn.lock
index 98e5cf4c694..993af36408e 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -2,6 +2,40 @@
 # yarn lockfile v1
 
 
+"@arrows/array@^1.4.0":
+  version "1.4.1"
+  resolved "https://registry.yarnpkg.com/@arrows/array/-/array-1.4.1.tgz#a6580a08cee219755ca9a8eb14e956d3c29a5508"
+  integrity sha512-MGYS8xi3c4tTy1ivhrVntFvufoNzje0PchjEz6G/SsWRgUKxL4tKwS6iPdO8vsaJYldagAeWMd5KRD0aX3Q39g==
+  dependencies:
+    "@arrows/composition" "^1.2.2"
+
+"@arrows/composition@^1.0.0", "@arrows/composition@^1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@arrows/composition/-/composition-1.2.2.tgz#d0a213cac8f8c36c1c75856a1e6ed940c27e9169"
+  integrity sha512-9fh1yHwrx32lundiB3SlZ/VwuStPB4QakPsSLrGJFH6rCXvdrd060ivAZ7/2vlqPnEjBkPRRXOcG1YOu19p2GQ==
+
+"@arrows/dispatch@^1.0.2":
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/@arrows/dispatch/-/dispatch-1.0.3.tgz#c4c06260f89e9dd4ce280df3712980aa2f3de976"
+  integrity sha512-v/HwvrFonitYZM2PmBlAlCqVqxrkIIoiEuy5bQgn0BdfvlL0ooSBzcPzTMrtzY8eYktPyYcHg8fLbSgyybXEqw==
+  dependencies:
+    "@arrows/composition" "^1.2.2"
+
+"@arrows/error@^1.0.2":
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@arrows/error/-/error-1.0.2.tgz#4e68036f901118ba6f1de88656ef6be49e650414"
+  integrity sha512-yvkiv1ay4Z3+Z6oQsUkedsQm5aFdyPpkBUQs8vejazU/RmANABx6bMMcBPPHI4aW43VPQmXFfBzr/4FExwWTEA==
+
+"@arrows/multimethod@^1.1.6":
+  version "1.1.7"
+  resolved "https://registry.yarnpkg.com/@arrows/multimethod/-/multimethod-1.1.7.tgz#bc7c26c3aa7703fc967e65da4f00718b1428eb4a"
+  integrity sha512-EjHD3XuGAV4G28rm7mu8k7zQJh/EOizh104/p9i2ofGcnL5mgKONFH/Bq6H3SJjM+WDAlKcR9WBpNhaAKCnH2g==
+  dependencies:
+    "@arrows/array" "^1.4.0"
+    "@arrows/composition" "^1.2.2"
+    "@arrows/error" "^1.0.2"
+    fast-deep-equal "^3.1.1"
+
 "@babel/code-frame@7.12.11":
   version "7.12.11"
   resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.11.tgz#f4ad435aa263db935b8f10f2c552d23fb716a63f"
@@ -2089,7 +2123,7 @@ ansi-escapes@^3.2.0:
   resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-3.2.0.tgz#8780b98ff9dbf5638152d1f1fe5c1d7b4442976b"
   integrity sha512-cBhpre4ma+U0T1oM5fXg7Dy1Jw7zzwv7lt/GoCpr+hDQJoYnKVPLL4dCvSEFMmQurOQvSrwT7SL/DAlhBI97RQ==
 
-ansi-escapes@^4.2.1:
+ansi-escapes@^4.2.1, ansi-escapes@^4.3.0:
   version "4.3.2"
   resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e"
   integrity sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==
@@ -2550,7 +2584,7 @@ before-after-hook@^2.0.0:
   resolved "https://registry.yarnpkg.com/before-after-hook/-/before-after-hook-2.2.1.tgz#73540563558687586b52ed217dad6a802ab1549c"
   integrity sha512-/6FKxSTWoJdbsLDF8tdIjaRiFXiE6UHsEHE3OPI/cwPURCVi1ukP0gmLn7XWEiFk5TcwQjjY5PWsU+j+tgXgmw==
 
-benchmark@2.1.4:
+benchmark@^2.1.4:
   version "2.1.4"
   resolved "https://registry.yarnpkg.com/benchmark/-/benchmark-2.1.4.tgz#09f3de31c916425d498cc2ee565a0ebf3c2a5629"
   integrity sha1-CfPeMckWQl1JjMLuVloOvzwqVik=
@@ -2558,6 +2592,22 @@ benchmark@2.1.4:
     lodash "^4.17.4"
     platform "^1.3.3"
 
+benny@3.6.15:
+  version "3.6.15"
+  resolved "https://registry.yarnpkg.com/benny/-/benny-3.6.15.tgz#930826819b89546b274febe803da2d248a676caa"
+  integrity sha512-kq6XVGGYVou3Y8KNPs3SEF881vi5fJ8sIf9w69D2rreiNfRicWVWK6u6/mObMw6BiexoHHumtipn5gcu0Tngng==
+  dependencies:
+    "@arrows/composition" "^1.0.0"
+    "@arrows/dispatch" "^1.0.2"
+    "@arrows/multimethod" "^1.1.6"
+    benchmark "^2.1.4"
+    fs-extra "^9.0.1"
+    json2csv "^5.0.4"
+    kleur "^4.1.3"
+    log-update "^4.0.0"
+    prettier "^2.1.2"
+    stats-median "^1.0.1"
+
 big.js@^5.2.2:
   version "5.2.2"
   resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328"
@@ -3051,6 +3101,13 @@ cli-cursor@^2.1.0:
   dependencies:
     restore-cursor "^2.0.0"
 
+cli-cursor@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307"
+  integrity sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==
+  dependencies:
+    restore-cursor "^3.1.0"
+
 cli-width@^2.0.0:
   version "2.2.1"
   resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-2.2.1.tgz#b0433d0b4e9c847ef18868a4ef16fd5fc8271c48"
@@ -3232,6 +3289,11 @@ commander@^2.20.0:
   resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
   integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
 
+commander@^6.1.0:
+  version "6.2.1"
+  resolved "https://registry.yarnpkg.com/commander/-/commander-6.2.1.tgz#0792eb682dfbc325999bb2b84fddddba110ac73c"
+  integrity sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==
+
 commondir@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
@@ -4687,7 +4749,7 @@ fs-extra@^8.1.0:
     jsonfile "^4.0.0"
     universalify "^0.1.0"
 
-fs-extra@^9.1.0:
+fs-extra@^9.0.1, fs-extra@^9.1.0:
   version "9.1.0"
   resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
   integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==
@@ -6519,6 +6581,15 @@ json-stringify-safe@^5.0.1, json-stringify-safe@~5.0.1:
   resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb"
   integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=
 
+json2csv@^5.0.4:
+  version "5.0.6"
+  resolved "https://registry.yarnpkg.com/json2csv/-/json2csv-5.0.6.tgz#590e0e1b9579e59baa53bda0c0d840f4d8009687"
+  integrity sha512-0/4Lv6IenJV0qj2oBdgPIAmFiKKnh8qh7bmLFJ+/ZZHLjSeiL3fKKGX3UryvKPbxFbhV+JcYo9KUC19GJ/Z/4A==
+  dependencies:
+    commander "^6.1.0"
+    jsonparse "^1.3.1"
+    lodash.get "^4.4.2"
+
 json5@2.x, json5@^2.1.2:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3"
@@ -6549,7 +6620,7 @@ jsonfile@^6.0.1:
   optionalDependencies:
     graceful-fs "^4.1.6"
 
-jsonparse@^1.2.0:
+jsonparse@^1.2.0, jsonparse@^1.3.1:
   version "1.3.1"
   resolved "https://registry.yarnpkg.com/jsonparse/-/jsonparse-1.3.1.tgz#3f4dae4a91fac315f71062f8521cc239f1366280"
   integrity sha1-P02uSpH6wxX3EGL4UhzCOfE2YoA=
@@ -6603,6 +6674,11 @@ kleur@^3.0.3:
   resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e"
   integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==
 
+kleur@^4.1.3:
+  version "4.1.4"
+  resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.4.tgz#8c202987d7e577766d039a8cd461934c01cda04d"
+  integrity sha512-8QADVssbrFjivHWQU7KkMgptGTl6WAcSdlbBPY4uNF+mWr6DGcKrvY2w4FQJoXch7+fKMjj0dRrL75vk3k23OA==
+
 last-run@^1.1.0:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/last-run/-/last-run-1.1.1.tgz#45b96942c17b1c79c772198259ba943bebf8ca5b"
@@ -6835,6 +6911,16 @@ lodash@4.x, lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19,
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
 
+log-update@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/log-update/-/log-update-4.0.0.tgz#589ecd352471f2a1c0c570287543a64dfd20e0a1"
+  integrity sha512-9fkkDevMefjg0mmzWFBW8YkFP91OrizzkW3diF7CpG+S2EYdy4+TVfGwz1zeF8x7hCx1ovSPTOE9Ngib74qqUg==
+  dependencies:
+    ansi-escapes "^4.3.0"
+    cli-cursor "^3.1.0"
+    slice-ansi "^4.0.0"
+    wrap-ansi "^6.2.0"
+
 loud-rejection@^1.0.0:
   version "1.6.0"
   resolved "https://registry.yarnpkg.com/loud-rejection/-/loud-rejection-1.6.0.tgz#5b46f80147edee578870f086d04821cf998e551f"
@@ -8301,6 +8387,11 @@ prelude-ls@~1.1.2:
   resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
   integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
 
+prettier@^2.1.2:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.3.0.tgz#b6a5bf1284026ae640f17f7ff5658a7567fc0d18"
+  integrity sha512-kXtO4s0Lz/DW/IJ9QdWhAf7/NmPWQXkFr/r/WkR3vyI+0v8amTDxiaQSLzs8NBlytfLWX/7uQUMIW677yLKl4w==
+
 pretty-format@^26.0.0, pretty-format@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-26.6.2.tgz#e35c2705f14cb7fe2fe94fa078345b444120fc93"
@@ -8894,6 +8985,14 @@ restore-cursor@^2.0.0:
     onetime "^2.0.0"
     signal-exit "^3.0.2"
 
+restore-cursor@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-3.1.0.tgz#39f67c54b3a7a58cea5236d95cf0034239631f7e"
+  integrity sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==
+  dependencies:
+    onetime "^5.1.0"
+    signal-exit "^3.0.2"
+
 ret@~0.1.10:
   version "0.1.15"
   resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
@@ -9446,6 +9545,11 @@ static-extend@^0.1.1:
     define-property "^0.2.5"
     object-copy "^0.1.0"
 
+stats-median@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/stats-median/-/stats-median-1.0.1.tgz#ca8497cb1014d23d145db4d6fc93c8e815eed3ef"
+  integrity sha512-IYsheLg6dasD3zT/w9+8Iq9tcIQqqu91ZIpJOnIEM25C3X/g4Tl8mhXwW2ZQpbrsJISr9+wizEYgsibN5/b32Q==
+
 stealthy-require@^1.1.1:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"

From 2baee44120f6875649623b6f23dbb9dee671e4af Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 16 May 2021 06:21:11 +0900
Subject: [PATCH 249/719] ARROW-12797: [JS] Update readme with new links and
 remove outdated examples

* Remove MapD specific docs
* Remove outdated companies list
* Update libraries list

Closes #10335 from domoritz/patch-9

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/README.md | 51 +++------------------------------------------------
 1 file changed, 3 insertions(+), 48 deletions(-)

diff --git a/js/README.md b/js/README.md
index 9ca215363ed..b51a7896874 100644
--- a/js/README.md
+++ b/js/README.md
@@ -147,47 +147,6 @@ for (let i = -1, n = column.length; ++i < n;) {
 }
 ```
 
-### Usage with MapD Core
-
-```js
-import MapD from 'rxjs-mapd';
-import { Table } from 'apache-arrow';
-
-const port = 9091;
-const host = `localhost`;
-const db = `mapd`;
-const user = `mapd`;
-const password = `HyperInteractive`;
-
-MapD.open(host, port)
-  .connect(db, user, password)
-  .flatMap((session) =>
-    // queryDF returns Arrow buffers
-    session.queryDF(`
-      SELECT origin_city
-      FROM flights
-      WHERE dest_city ILIKE 'dallas'
-      LIMIT 5`
-    ).disconnect()
-  )
-  .map(([schema, records]) =>
-    // Create Arrow Table from results
-    Table.from([schema, records]))
-  .map((table) =>
-    // Stringify the table to CSV with row numbers
-    table.toString({ index: true }))
-  .subscribe((csvStr) =>
-    console.log(csvStr));
-/*
-Index,   origin_city
-    0, Oklahoma City
-    1, Oklahoma City
-    2, Oklahoma City
-    3,   San Antonio
-    4,   San Antonio
-*/
-```
-
 # Getting involved
 
 See [DEVELOP.md](DEVELOP.md)
@@ -255,15 +214,11 @@ Full list of broader Apache Arrow [projects & organizations](https://arrow.apach
 ## Open Source Projects
 
 * [Apache Arrow](https://arrow.apache.org) -- Parent project for Powering Columnar In-Memory Analytics, including affiliated open source projects
-* [rxjs-mapd](https://github.com/graphistry/rxjs-mapd) -- A MapD Core node-driver that returns query results as Arrow columns
 * [Perspective](https://github.com/jpmorganchase/perspective) -- Perspective is a streaming data visualization engine by J.P. Morgan for JavaScript for building real-time & user-configurable analytics entirely in the browser.
 * [Falcon](https://github.com/uwdata/falcon) is a visualization tool for linked interactions across multiple aggregate visualizations of millions or billions of records.
-
-## Companies & Organizations
-
-* [CCRi](https://www.ccri.com/) -- Commonwealth Computer Research Inc, or CCRi, is a Central Virginia based data science and software engineering company
-* [GOAI](https://gpuopenanalytics.com/) -- GPU Open Analytics Initiative standardizes on Arrow as part of creating common data frameworks that enable developers and statistical researchers to accelerate data science on GPUs
-* [Graphistry, Inc.](https://www.graphistry.com/) - An end-to-end GPU accelerated visual investigation platform used by teams for security, anti-fraud, and related investigations. Graphistry uses Arrow in its NodeJS GPU backend and client libraries, and is an early contributing member to GOAI and Arrow\[JS\] working to bring these technologies to the enterprise.
+* [Vega](https://github.com/vega) is an ecosystem of tools for interactive visualizations on the web. The Vega team implemented an [Arrow loader](https://github.com/vega/vega-loader-arrow).
+* [Arquero](https://github.com/uwdata/arquero) is a library for query processing and transformation of array-backed data tables.
+* [OmniSci](https://github.com/omnisci/mapd-connector) is a GPU database. Its JavaScript connector returns Arrow dataframes.
 
 # License
 

From 792107e05f08ed36f5e4dba9a166847a60ad718d Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 16 May 2021 06:23:30 +0900
Subject: [PATCH 250/719] MINOR: [C++] Fix typo in memory.h

Closes #10336 from domoritz/patch-11

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/io/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/io/memory.h b/cpp/src/arrow/io/memory.h
index bfebe9945f8..8213439ef74 100644
--- a/cpp/src/arrow/io/memory.h
+++ b/cpp/src/arrow/io/memory.h
@@ -88,7 +88,7 @@ class ARROW_EXPORT BufferOutputStream : public OutputStream {
   uint8_t* mutable_data_;
 };
 
-/// \brief A helper class to tracks the size of allocations
+/// \brief A helper class to track the size of allocations
 ///
 /// Writes to this stream do not copy or retain any data, they just bump
 /// a size counter that can be later used to know exactly which data size

From 659335dfa26c3b24e0b0754492d174a4bf48c1fa Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sat, 15 May 2021 14:24:59 -0700
Subject: [PATCH 251/719] MINOR: [JS] add VSCode settings (#10337)

---
 js/.gitignore            | 12 +++---------
 js/.vscode/settings.json |  7 +++++++
 2 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 js/.vscode/settings.json

diff --git a/js/.gitignore b/js/.gitignore
index 9a11ab8f2cb..799f789d64d 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -23,9 +23,6 @@ npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 
-.vscode/**
-!.vscode/launch.json
-
 # Runtime data
 pids
 *.pid
@@ -41,12 +38,6 @@ coverage
 # nyc test coverage
 .nyc_output
 
-# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
-.grunt
-
-# Bower dependency directory (https://bower.io/)
-bower_components
-
 # node-waf configuration
 .lock-wscript
 
@@ -89,3 +80,6 @@ test/data/**/*.arrow
 
 # jest snapshots (too big)
 test/__snapshots__/
+
+# VSCode
+!.vscode
diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json
new file mode 100644
index 00000000000..379ddf14d0f
--- /dev/null
+++ b/js/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+  "typescript.tsdk": "node_modules/typescript/lib",
+  "editor.trimAutoWhitespace": true,
+  "editor.codeActionsOnSave": {
+    "source.fixAll.eslint": true
+  }
+}

From c23d66a22fbaeabdbf96383bb827d982ccdfc70d Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 16 May 2021 06:37:48 +0900
Subject: [PATCH 252/719] ARROW-12578: [JS] Remove Buffer in favor of
 TextEncoder API to support bundlers such as Rollup

Bundlers such as Rollup do not recognize the `_Buffer` import, which breaks their builds. This change resolves this issue by removing Buffer in favor of `TextEncoder`. Note that change incurs a performance penalty on Node as `Buffer` is often faster.

Co-authored-by: Adam Lippai <adam@rigo.sk>
Co-authored-by: Paul Taylor <paul.e.taylor@me.com>

Closes #10332 from domoritz/remove-buffer-js

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/src/util/utf8.ts | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/js/src/util/utf8.ts b/js/src/util/utf8.ts
index 4e04a8e4a6e..1eee9314ccd 100644
--- a/js/src/util/utf8.ts
+++ b/js/src/util/utf8.ts
@@ -15,34 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { toUint8Array } from './buffer';
 import {
     TextDecoder as TextDecoderPolyfill,
     TextEncoder as TextEncoderPolyfill,
 } from 'text-encoding-utf-8';
 
-/** @ignore @suppress {missingRequire} */
-const _Buffer = eval("typeof Buffer === 'function' ? Buffer : null");
+const decoder = new (typeof TextDecoder !== 'undefined' ? TextDecoder : TextDecoderPolyfill)('utf-8');
 /** @ignore */
-const useNativeEncoders = typeof TextDecoder === 'function' && typeof TextEncoder === 'function';
+export const decodeUtf8 = (buffer?: ArrayBuffer | ArrayBufferView) => decoder.decode(buffer);
 
+const encoder = new (typeof TextEncoder !== 'undefined' ? TextEncoder : TextEncoderPolyfill)();
 /** @ignore */
-export const decodeUtf8 = ((TextDecoder) => {
-    if (useNativeEncoders || !_Buffer) {
-        const decoder = new TextDecoder('utf-8');
-        return (buffer?: ArrayBuffer | ArrayBufferView) => decoder.decode(buffer);
-    }
-    return (input: ArrayBufferLike | ArrayBufferView) => {
-        const { buffer, byteOffset, length } = toUint8Array(input);
-        return _Buffer.from(buffer, byteOffset, length).toString();
-    };
-})(typeof TextDecoder !== 'undefined' ? TextDecoder : TextDecoderPolyfill);
-
-/** @ignore */
-export const encodeUtf8 = ((TextEncoder) => {
-    if (useNativeEncoders || !_Buffer) {
-        const encoder = new TextEncoder();
-        return (value?: string) => encoder.encode(value);
-    }
-    return (input = '') => toUint8Array(_Buffer.from(input, 'utf8'));
-})(typeof TextEncoder !== 'undefined' ? TextEncoder : TextEncoderPolyfill);
+export const encodeUtf8 = (value?: string) => encoder.encode(value);

From 3f292bb3d456bc622b40889cedc02e11134e9002 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 16 May 2021 06:47:46 +0900
Subject: [PATCH 253/719] ARROW-12702: [JS] Update webpack and terser

Closes #10275 from domoritz/webpack

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/gulp/minify-task.js |   10 +-
 js/package.json        |   21 +-
 js/yarn.lock           | 1580 ++++++++++------------------------------
 3 files changed, 413 insertions(+), 1198 deletions(-)

diff --git a/js/gulp/minify-task.js b/js/gulp/minify-task.js
index 81cb5e5f3f5..ce1457f70b9 100644
--- a/js/gulp/minify-task.js
+++ b/js/gulp/minify-task.js
@@ -46,6 +46,7 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ
 
     const webpackConfigs = [mainExport].map((entry) => ({
         ...targetConfig,
+        mode: 'production',
         name: entry,
         entry: { [entry]: path.resolve(`${src}/${entry}.dom.js`) },
         plugins: [
@@ -62,13 +63,10 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ
             minimize: true,
             minimizer: [
                 new TerserPlugin({
-                    sourceMap: true,
                     terserOptions: {
                         ecma: terserLanguageNames[target],
-                        output: { comments: false, beautify: false },
-                        compress: { unsafe: true },
-                        mangle: true,
-                        safari10: true // <-- works around safari10 bugs, see the "safari10" option here: https://github.com/terser-js/terser#minify-options
+                        output: { comments: false },
+                        compress: { unsafe: true }
                     },
                 })
             ]
@@ -83,7 +81,7 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ
 }))({}, {
     resolve: { mainFields: [`module`, `main`] },
     module: { rules: [{ test: /\.js$/, enforce: `pre`, use: [`source-map-loader`] }] },
-    output: { filename: '[name].js', library: mainExport, libraryTarget: `umd`, umdNamedDefine: true },
+    output: { filename: '[name].js', library: mainExport, libraryTarget: `umd`, umdNamedDefine: true, globalObject: 'this' },
 });
 
 module.exports = minifyTask;
diff --git a/js/package.json b/js/package.json
index 4d05ac416a0..71e181b1dca 100644
--- a/js/package.json
+++ b/js/package.json
@@ -54,7 +54,7 @@
   ],
   "dependencies": {
     "@types/flatbuffers": "^1.10.0",
-    "@types/node": "^14.14.37",
+    "@types/node": "^15.0.2",
     "@types/text-encoding-utf-8": "^1.0.1",
     "command-line-args": "5.1.1",
     "command-line-usage": "6.1.1",
@@ -65,10 +65,10 @@
     "tslib": "^2.2.0"
   },
   "devDependencies": {
-    "@types/glob": "7.1.1",
-    "@types/jest": "26.0.22",
-    "@typescript-eslint/eslint-plugin": "^4.22.0",
-    "@typescript-eslint/parser": "^4.22.0",
+    "@types/glob": "7.1.3",
+    "@types/jest": "26.0.23",
+    "@typescript-eslint/eslint-plugin": "^4.22.1",
+    "@typescript-eslint/parser": "^4.22.1",
     "async-done": "1.3.1",
     "benny": "3.6.15",
     "cpy": "^8.1.2",
@@ -80,9 +80,9 @@
     "glob": "7.1.4",
     "google-closure-compiler": "20210406.0.0",
     "gulp": "4.0.2",
-    "gulp-json-transform": "0.4.6",
-    "gulp-rename": "1.4.0",
-    "gulp-sourcemaps": "2.6.5",
+    "gulp-json-transform": "0.4.7",
+    "gulp-rename": "2.0.0",
+    "gulp-sourcemaps": "3.0.0",
     "gulp-typescript": "5.0.1",
     "ix": "2.5.3",
     "jest": "26.6.3",
@@ -94,15 +94,14 @@
     "npm-run-all": "4.1.5",
     "randomatic": "3.1.1",
     "rxjs": "5.5.11",
-    "source-map-loader": "0.2.4",
-    "terser-webpack-plugin": "4.2.2",
+    "source-map-loader": "2.0.1",
     "ts-jest": "26.5.4",
     "ts-node": "9.1.1",
     "typedoc": "0.20.35",
     "typescript": "4.0.2",
     "web-stream-tools": "0.0.1",
     "web-streams-polyfill": "3.0.3",
-    "webpack": "4.29.0",
+    "webpack": "5.36.2",
     "xml2js": "0.4.19"
   },
   "engines": {
diff --git a/js/yarn.lock b/js/yarn.lock
index 993af36408e..cd8bef320a9 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -426,18 +426,18 @@
     unique-filename "^1.1.1"
     which "^1.3.1"
 
-"@gulp-sourcemaps/identity-map@1.X":
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/identity-map/-/identity-map-1.0.2.tgz#1e6fe5d8027b1f285dc0d31762f566bccd73d5a9"
-  integrity sha512-ciiioYMLdo16ShmfHBXJBOFm3xPC4AuwO4xeRpFeHz7WK9PYsWCmigagG2XyzZpubK4a3qNKoUBDhbzHfa50LQ==
+"@gulp-sourcemaps/identity-map@^2.0.1":
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/identity-map/-/identity-map-2.0.1.tgz#a6e8b1abec8f790ec6be2b8c500e6e68037c0019"
+  integrity sha512-Tb+nSISZku+eQ4X1lAkevcQa+jknn/OVUgZ3XCxEKIsLsqYuPoJwJOPQeaOk75X3WPftb29GWY1eqE7GLsXb1Q==
   dependencies:
-    acorn "^5.0.3"
-    css "^2.2.1"
-    normalize-path "^2.1.1"
+    acorn "^6.4.1"
+    normalize-path "^3.0.0"
+    postcss "^7.0.16"
     source-map "^0.6.0"
-    through2 "^2.0.3"
+    through2 "^3.0.1"
 
-"@gulp-sourcemaps/map-sources@1.X":
+"@gulp-sourcemaps/map-sources@^1.0.0":
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/map-sources/-/map-sources-1.0.0.tgz#890ae7c5d8c877f6d384860215ace9d7ec945bda"
   integrity sha1-iQrnxdjId/bThIYCFazp1+yUW9o=
@@ -1396,14 +1396,6 @@
     "@nodelib/fs.scandir" "2.1.4"
     fastq "^1.6.0"
 
-"@npmcli/move-file@^1.0.1":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-1.1.2.tgz#1a82c3e372f7cae9253eb66d72543d6b8685c674"
-  integrity sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg==
-  dependencies:
-    mkdirp "^1.0.4"
-    rimraf "^3.0.2"
-
 "@octokit/auth-token@^2.4.0":
   version "2.4.5"
   resolved "https://registry.yarnpkg.com/@octokit/auth-token/-/auth-token-2.4.5.tgz#568ccfb8cb46f36441fac094ce34f7a875b197f3"
@@ -1565,26 +1557,33 @@
   dependencies:
     "@babel/types" "^7.3.0"
 
-"@types/events@*":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/events/-/events-3.0.0.tgz#2862f3f58a9a7f7c3e78d79f130dd4d71c25c2a7"
-  integrity sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==
+"@types/eslint-scope@^3.7.0":
+  version "3.7.0"
+  resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.0.tgz#4792816e31119ebd506902a482caec4951fabd86"
+  integrity sha512-O/ql2+rrCUe2W2rs7wMR+GqPRcgB6UiqN5RhrR5xruFlY7l9YLMn0ZkDzjoHLeiFkR8MCQZVudUuuvQ2BLC9Qw==
+  dependencies:
+    "@types/eslint" "*"
+    "@types/estree" "*"
+
+"@types/eslint@*":
+  version "7.2.10"
+  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.10.tgz#4b7a9368d46c0f8cd5408c23288a59aa2394d917"
+  integrity sha512-kUEPnMKrqbtpCq/KTaGFFKAcz6Ethm2EjCoKIDaCmfRBWLbFuTcOJfTlorwbnboXBzahqWLgUp1BQeKHiJzPUQ==
+  dependencies:
+    "@types/estree" "*"
+    "@types/json-schema" "*"
+
+"@types/estree@*", "@types/estree@^0.0.47":
+  version "0.0.47"
+  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.47.tgz#d7a51db20f0650efec24cd04994f523d93172ed4"
+  integrity sha512-c5ciR06jK8u9BstrmJyO97m+klJrrhCf9u3rLu3DEAJBirxRqSCvDQoYKmxuYwQI5SZChAWu+tq9oVlGRuzPAg==
 
 "@types/flatbuffers@^1.10.0":
   version "1.10.0"
   resolved "https://registry.yarnpkg.com/@types/flatbuffers/-/flatbuffers-1.10.0.tgz#aa74e30ffdc86445f2f060e1808fc9d56b5603ba"
   integrity sha512-7btbphLrKvo5yl/5CC2OCxUSMx1wV1wvGT1qDXkSt7yi00/YW7E8k6qzXqJHsp+WU0eoG7r6MTQQXI9lIvd0qA==
 
-"@types/glob@7.1.1":
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.1.tgz#aa59a1c6e3fbc421e07ccd31a944c30eba521575"
-  integrity sha512-1Bh06cbWJUHMC97acuD6UMG29nMt0Aqz1vF3guLfG+kHHJhy3AyohZFFxYk2f7Q1SQIrNwvncxAE0N/9s70F2w==
-  dependencies:
-    "@types/events" "*"
-    "@types/minimatch" "*"
-    "@types/node" "*"
-
-"@types/glob@^7.1.1":
+"@types/glob@7.1.3", "@types/glob@^7.1.1":
   version "7.1.3"
   resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.3.tgz#e6ba80f36b7daad2c685acd9266382e68985c183"
   integrity sha512-SEYeGAIQIQX8NN6LDKprLjbrd5dARM5EXsd8GI/A5l0apYI1fGMWgPHSe4ZKL4eozlAyI+doUE9XbYS4xCkQ1w==
@@ -1626,15 +1625,15 @@
   dependencies:
     "@types/istanbul-lib-report" "*"
 
-"@types/jest@26.0.22":
-  version "26.0.22"
-  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.22.tgz#8308a1debdf1b807aa47be2838acdcd91e88fbe6"
-  integrity sha512-eeWwWjlqxvBxc4oQdkueW5OF/gtfSceKk4OnOAGlUSwS/liBRtZppbJuz1YkgbrbfGOoeBHun9fOvXnjNwrSOw==
+"@types/jest@26.0.23":
+  version "26.0.23"
+  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.23.tgz#a1b7eab3c503b80451d019efb588ec63522ee4e7"
+  integrity sha512-ZHLmWMJ9jJ9PTiT58juykZpL7KjwJywFN3Rr2pTSkyQfydf/rk22yS7W8p5DaVUMQ2BQC7oYiU3FjbTM/mYrOA==
   dependencies:
     jest-diff "^26.0.0"
     pretty-format "^26.0.0"
 
-"@types/json-schema@^7.0.3", "@types/json-schema@^7.0.5":
+"@types/json-schema@*", "@types/json-schema@^7.0.3", "@types/json-schema@^7.0.6":
   version "7.0.7"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
   integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
@@ -1649,7 +1648,7 @@
   resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
   integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
 
-"@types/node@*", "@types/node@>= 8", "@types/node@^14.14.37":
+"@types/node@*", "@types/node@>= 8":
   version "14.14.37"
   resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.37.tgz#a3dd8da4eb84a996c36e331df98d82abd76b516e"
   integrity sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==
@@ -1659,6 +1658,11 @@
   resolved "https://registry.yarnpkg.com/@types/node/-/node-11.15.50.tgz#a8c76622a20320d4a04adf2002b04737c510ef11"
   integrity sha512-kG/ZmA/uD1L1gVD7vVXQB6v+ICZlJgvakrodHiltT3Zq0YjXq5H9tfgop8MsdMGCwrcLJg9QCQDRP4DZsn9T/g==
 
+"@types/node@^15.0.2":
+  version "15.0.2"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-15.0.2.tgz#51e9c0920d1b45936ea04341aa3e2e58d339fb67"
+  integrity sha512-p68+a+KoxpoB47015IeYZYRrdqMUcpbK8re/zpFB8Ld46LHC1lPEbp3EXgkEhAYEcPvjJF6ZO+869SQ0aH1dcA==
+
 "@types/normalize-package-data@^2.4.0":
   version "2.4.0"
   resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
@@ -1703,13 +1707,13 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@^4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.22.0.tgz#3d5f29bb59e61a9dba1513d491b059e536e16dbc"
-  integrity sha512-U8SP9VOs275iDXaL08Ln1Fa/wLXfj5aTr/1c0t0j6CdbOnxh+TruXu1p4I0NAvdPBQgoPjHsgKn28mOi0FzfoA==
+"@typescript-eslint/eslint-plugin@^4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.22.1.tgz#6bcdbaa4548553ab861b4e5f34936ead1349a543"
+  integrity sha512-kVTAghWDDhsvQ602tHBc6WmQkdaYbkcTwZu+7l24jtJiYvm9l+/y/b2BZANEezxPDiX5MK2ZecE+9BFi/YJryw==
   dependencies:
-    "@typescript-eslint/experimental-utils" "4.22.0"
-    "@typescript-eslint/scope-manager" "4.22.0"
+    "@typescript-eslint/experimental-utils" "4.22.1"
+    "@typescript-eslint/scope-manager" "4.22.1"
     debug "^4.1.1"
     functional-red-black-tree "^1.0.1"
     lodash "^4.17.15"
@@ -1717,15 +1721,15 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
-"@typescript-eslint/experimental-utils@4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.22.0.tgz#68765167cca531178e7b650a53456e6e0bef3b1f"
-  integrity sha512-xJXHHl6TuAxB5AWiVrGhvbGL8/hbiCQ8FiWwObO3r0fnvBdrbWEDy1hlvGQOAWc6qsCWuWMKdVWlLAEMpxnddg==
+"@typescript-eslint/experimental-utils@4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.22.1.tgz#3938a5c89b27dc9a39b5de63a62ab1623ab27497"
+  integrity sha512-svYlHecSMCQGDO2qN1v477ax/IDQwWhc7PRBiwAdAMJE7GXk5stF4Z9R/8wbRkuX/5e9dHqbIWxjeOjckK3wLQ==
   dependencies:
     "@types/json-schema" "^7.0.3"
-    "@typescript-eslint/scope-manager" "4.22.0"
-    "@typescript-eslint/types" "4.22.0"
-    "@typescript-eslint/typescript-estree" "4.22.0"
+    "@typescript-eslint/scope-manager" "4.22.1"
+    "@typescript-eslint/types" "4.22.1"
+    "@typescript-eslint/typescript-estree" "4.22.1"
     eslint-scope "^5.0.0"
     eslint-utils "^2.0.0"
 
@@ -1741,14 +1745,14 @@
     eslint-scope "^5.0.0"
     eslint-utils "^2.0.0"
 
-"@typescript-eslint/parser@^4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.22.0.tgz#e1637327fcf796c641fe55f73530e90b16ac8fe8"
-  integrity sha512-z/bGdBJJZJN76nvAY9DkJANYgK3nlRstRRi74WHm3jjgf2I8AglrSY+6l7ogxOmn55YJ6oKZCLLy+6PW70z15Q==
+"@typescript-eslint/parser@^4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.22.1.tgz#a95bda0fd01d994a15fc3e99dc984294f25c19cc"
+  integrity sha512-l+sUJFInWhuMxA6rtirzjooh8cM/AATAe3amvIkqKFeMzkn85V+eLzb1RyuXkHak4dLfYzOmF6DXPyflJvjQnw==
   dependencies:
-    "@typescript-eslint/scope-manager" "4.22.0"
-    "@typescript-eslint/types" "4.22.0"
-    "@typescript-eslint/typescript-estree" "4.22.0"
+    "@typescript-eslint/scope-manager" "4.22.1"
+    "@typescript-eslint/types" "4.22.1"
+    "@typescript-eslint/typescript-estree" "4.22.1"
     debug "^4.1.1"
 
 "@typescript-eslint/scope-manager@4.21.0":
@@ -1759,23 +1763,23 @@
     "@typescript-eslint/types" "4.21.0"
     "@typescript-eslint/visitor-keys" "4.21.0"
 
-"@typescript-eslint/scope-manager@4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.22.0.tgz#ed411545e61161a8d702e703a4b7d96ec065b09a"
-  integrity sha512-OcCO7LTdk6ukawUM40wo61WdeoA7NM/zaoq1/2cs13M7GyiF+T4rxuA4xM+6LeHWjWbss7hkGXjFDRcKD4O04Q==
+"@typescript-eslint/scope-manager@4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.22.1.tgz#5bb357f94f9cd8b94e6be43dd637eb73b8f355b4"
+  integrity sha512-d5bAiPBiessSmNi8Amq/RuLslvcumxLmyhf1/Xa9IuaoFJ0YtshlJKxhlbY7l2JdEk3wS0EnmnfeJWSvADOe0g==
   dependencies:
-    "@typescript-eslint/types" "4.22.0"
-    "@typescript-eslint/visitor-keys" "4.22.0"
+    "@typescript-eslint/types" "4.22.1"
+    "@typescript-eslint/visitor-keys" "4.22.1"
 
 "@typescript-eslint/types@4.21.0":
   version "4.21.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.21.0.tgz#abdc3463bda5d31156984fa5bc316789c960edef"
   integrity sha512-+OQaupjGVVc8iXbt6M1oZMwyKQNehAfLYJJ3SdvnofK2qcjfor9pEM62rVjBknhowTkh+2HF+/KdRAc/wGBN2w==
 
-"@typescript-eslint/types@4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.22.0.tgz#0ca6fde5b68daf6dba133f30959cc0688c8dd0b6"
-  integrity sha512-sW/BiXmmyMqDPO2kpOhSy2Py5w6KvRRsKZnV0c4+0nr4GIcedJwXAq+RHNK4lLVEZAJYFltnnk1tJSlbeS9lYA==
+"@typescript-eslint/types@4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.22.1.tgz#bf99c6cec0b4a23d53a61894816927f2adad856a"
+  integrity sha512-2HTkbkdAeI3OOcWbqA8hWf/7z9c6gkmnWNGz0dKSLYLWywUlkOAQ2XcjhlKLj5xBFDf8FgAOF5aQbnLRvgNbCw==
 
 "@typescript-eslint/typescript-estree@4.21.0":
   version "4.21.0"
@@ -1790,13 +1794,13 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
-"@typescript-eslint/typescript-estree@4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.22.0.tgz#b5d95d6d366ff3b72f5168c75775a3e46250d05c"
-  integrity sha512-TkIFeu5JEeSs5ze/4NID+PIcVjgoU3cUQUIZnH3Sb1cEn1lBo7StSV5bwPuJQuoxKXlzAObjYTilOEKRuhR5yg==
+"@typescript-eslint/typescript-estree@4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.22.1.tgz#dca379eead8cdfd4edc04805e83af6d148c164f9"
+  integrity sha512-p3We0pAPacT+onSGM+sPR+M9CblVqdA9F1JEdIqRVlxK5Qth4ochXQgIyb9daBomyQKAXbygxp1aXQRV0GC79A==
   dependencies:
-    "@typescript-eslint/types" "4.22.0"
-    "@typescript-eslint/visitor-keys" "4.22.0"
+    "@typescript-eslint/types" "4.22.1"
+    "@typescript-eslint/visitor-keys" "4.22.1"
     debug "^4.1.1"
     globby "^11.0.1"
     is-glob "^4.0.1"
@@ -1811,166 +1815,144 @@
     "@typescript-eslint/types" "4.21.0"
     eslint-visitor-keys "^2.0.0"
 
-"@typescript-eslint/visitor-keys@4.22.0":
-  version "4.22.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.22.0.tgz#169dae26d3c122935da7528c839f42a8a42f6e47"
-  integrity sha512-nnMu4F+s4o0sll6cBSsTeVsT4cwxB7zECK3dFxzEjPBii9xLpq4yqqsy/FU5zMfan6G60DKZSCXAa3sHJZrcYw==
+"@typescript-eslint/visitor-keys@4.22.1":
+  version "4.22.1"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.22.1.tgz#6045ae25a11662c671f90b3a403d682dfca0b7a6"
+  integrity sha512-WPkOrIRm+WCLZxXQHCi+WG8T2MMTUFR70rWjdWYddLT7cEfb2P4a3O/J2U1FBVsSFTocXLCoXWY6MZGejeStvQ==
   dependencies:
-    "@typescript-eslint/types" "4.22.0"
+    "@typescript-eslint/types" "4.22.1"
     eslint-visitor-keys "^2.0.0"
 
-"@webassemblyjs/ast@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.7.11.tgz#b988582cafbb2b095e8b556526f30c90d057cace"
-  integrity sha512-ZEzy4vjvTzScC+SH8RBssQUawpaInUdMTYwYYLh54/s8TuT0gBLuyUnppKsVyZEi876VmmStKsUs28UxPgdvrA==
-  dependencies:
-    "@webassemblyjs/helper-module-context" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/wast-parser" "1.7.11"
-
-"@webassemblyjs/floating-point-hex-parser@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.7.11.tgz#a69f0af6502eb9a3c045555b1a6129d3d3f2e313"
-  integrity sha512-zY8dSNyYcgzNRNT666/zOoAyImshm3ycKdoLsyDw/Bwo6+/uktb7p4xyApuef1dwEBo/U/SYQzbGBvV+nru2Xg==
-
-"@webassemblyjs/helper-api-error@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.7.11.tgz#c7b6bb8105f84039511a2b39ce494f193818a32a"
-  integrity sha512-7r1qXLmiglC+wPNkGuXCvkmalyEstKVwcueZRP2GNC2PAvxbLYwLLPr14rcdJaE4UtHxQKfFkuDFuv91ipqvXg==
-
-"@webassemblyjs/helper-buffer@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.7.11.tgz#3122d48dcc6c9456ed982debe16c8f37101df39b"
-  integrity sha512-MynuervdylPPh3ix+mKZloTcL06P8tenNH3sx6s0qE8SLR6DdwnfgA7Hc9NSYeob2jrW5Vql6GVlsQzKQCa13w==
-
-"@webassemblyjs/helper-code-frame@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.7.11.tgz#cf8f106e746662a0da29bdef635fcd3d1248364b"
-  integrity sha512-T8ESC9KMXFTXA5urJcyor5cn6qWeZ4/zLPyWeEXZ03hj/x9weSokGNkVCdnhSabKGYWxElSdgJ+sFa9G/RdHNw==
-  dependencies:
-    "@webassemblyjs/wast-printer" "1.7.11"
-
-"@webassemblyjs/helper-fsm@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-fsm/-/helper-fsm-1.7.11.tgz#df38882a624080d03f7503f93e3f17ac5ac01181"
-  integrity sha512-nsAQWNP1+8Z6tkzdYlXT0kxfa2Z1tRTARd8wYnc/e3Zv3VydVVnaeePgqUzFrpkGUyhUUxOl5ML7f1NuT+gC0A==
-
-"@webassemblyjs/helper-module-context@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-module-context/-/helper-module-context-1.7.11.tgz#d874d722e51e62ac202476935d649c802fa0e209"
-  integrity sha512-JxfD5DX8Ygq4PvXDucq0M+sbUFA7BJAv/GGl9ITovqE+idGX+J3QSzJYz+LwQmL7fC3Rs+utvWoJxDb6pmC0qg==
-
-"@webassemblyjs/helper-wasm-bytecode@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.7.11.tgz#dd9a1e817f1c2eb105b4cf1013093cb9f3c9cb06"
-  integrity sha512-cMXeVS9rhoXsI9LLL4tJxBgVD/KMOKXuFqYb5oCJ/opScWpkCMEz9EJtkonaNcnLv2R3K5jIeS4TRj/drde1JQ==
-
-"@webassemblyjs/helper-wasm-section@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.7.11.tgz#9c9ac41ecf9fbcfffc96f6d2675e2de33811e68a"
-  integrity sha512-8ZRY5iZbZdtNFE5UFunB8mmBEAbSI3guwbrsCl4fWdfRiAcvqQpeqd5KHhSWLL5wuxo53zcaGZDBU64qgn4I4Q==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-buffer" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/wasm-gen" "1.7.11"
-
-"@webassemblyjs/ieee754@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.7.11.tgz#c95839eb63757a31880aaec7b6512d4191ac640b"
-  integrity sha512-Mmqx/cS68K1tSrvRLtaV/Lp3NZWzXtOHUW2IvDvl2sihAwJh4ACE0eL6A8FvMyDG9abes3saB6dMimLOs+HMoQ==
+"@webassemblyjs/ast@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.0.tgz#a5aa679efdc9e51707a4207139da57920555961f"
+  integrity sha512-kX2W49LWsbthrmIRMbQZuQDhGtjyqXfEmmHyEi4XWnSZtPmxY0+3anPIzsnRb45VH/J55zlOfWvZuY47aJZTJg==
+  dependencies:
+    "@webassemblyjs/helper-numbers" "1.11.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
+
+"@webassemblyjs/floating-point-hex-parser@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.0.tgz#34d62052f453cd43101d72eab4966a022587947c"
+  integrity sha512-Q/aVYs/VnPDVYvsCBL/gSgwmfjeCb4LW8+TMrO3cSzJImgv8lxxEPM2JA5jMrivE7LSz3V+PFqtMbls3m1exDA==
+
+"@webassemblyjs/helper-api-error@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.0.tgz#aaea8fb3b923f4aaa9b512ff541b013ffb68d2d4"
+  integrity sha512-baT/va95eXiXb2QflSx95QGT5ClzWpGaa8L7JnJbgzoYeaA27FCvuBXU758l+KXWRndEmUXjP0Q5fibhavIn8w==
+
+"@webassemblyjs/helper-buffer@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.0.tgz#d026c25d175e388a7dbda9694e91e743cbe9b642"
+  integrity sha512-u9HPBEl4DS+vA8qLQdEQ6N/eJQ7gT7aNvMIo8AAWvAl/xMrcOSiI2M0MAnMCy3jIFke7bEee/JwdX1nUpCtdyA==
+
+"@webassemblyjs/helper-numbers@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.0.tgz#7ab04172d54e312cc6ea4286d7d9fa27c88cd4f9"
+  integrity sha512-DhRQKelIj01s5IgdsOJMKLppI+4zpmcMQ3XboFPLwCpSNH6Hqo1ritgHgD0nqHeSYqofA6aBN/NmXuGjM1jEfQ==
+  dependencies:
+    "@webassemblyjs/floating-point-hex-parser" "1.11.0"
+    "@webassemblyjs/helper-api-error" "1.11.0"
+    "@xtuc/long" "4.2.2"
+
+"@webassemblyjs/helper-wasm-bytecode@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.0.tgz#85fdcda4129902fe86f81abf7e7236953ec5a4e1"
+  integrity sha512-MbmhvxXExm542tWREgSFnOVo07fDpsBJg3sIl6fSp9xuu75eGz5lz31q7wTLffwL3Za7XNRCMZy210+tnsUSEA==
+
+"@webassemblyjs/helper-wasm-section@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.0.tgz#9ce2cc89300262509c801b4af113d1ca25c1a75b"
+  integrity sha512-3Eb88hcbfY/FCukrg6i3EH8H2UsD7x8Vy47iVJrP967A9JGqgBVL9aH71SETPx1JrGsOUVLo0c7vMCN22ytJew==
+  dependencies:
+    "@webassemblyjs/ast" "1.11.0"
+    "@webassemblyjs/helper-buffer" "1.11.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
+    "@webassemblyjs/wasm-gen" "1.11.0"
+
+"@webassemblyjs/ieee754@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.11.0.tgz#46975d583f9828f5d094ac210e219441c4e6f5cf"
+  integrity sha512-KXzOqpcYQwAfeQ6WbF6HXo+0udBNmw0iXDmEK5sFlmQdmND+tr773Ti8/5T/M6Tl/413ArSJErATd8In3B+WBA==
   dependencies:
     "@xtuc/ieee754" "^1.2.0"
 
-"@webassemblyjs/leb128@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.7.11.tgz#d7267a1ee9c4594fd3f7e37298818ec65687db63"
-  integrity sha512-vuGmgZjjp3zjcerQg+JA+tGOncOnJLWVkt8Aze5eWQLwTQGNgVLcyOTqgSCxWTR4J42ijHbBxnuRaL1Rv7XMdw==
-  dependencies:
-    "@xtuc/long" "4.2.1"
-
-"@webassemblyjs/utf8@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.7.11.tgz#06d7218ea9fdc94a6793aa92208160db3d26ee82"
-  integrity sha512-C6GFkc7aErQIAH+BMrIdVSmW+6HSe20wg57HEC1uqJP8E/xpMjXqQUxkQw07MhNDSDcGpxI9G5JSNOQCqJk4sA==
-
-"@webassemblyjs/wasm-edit@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.7.11.tgz#8c74ca474d4f951d01dbae9bd70814ee22a82005"
-  integrity sha512-FUd97guNGsCZQgeTPKdgxJhBXkUbMTY6hFPf2Y4OedXd48H97J+sOY2Ltaq6WGVpIH8o/TGOVNiVz/SbpEMJGg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-buffer" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/helper-wasm-section" "1.7.11"
-    "@webassemblyjs/wasm-gen" "1.7.11"
-    "@webassemblyjs/wasm-opt" "1.7.11"
-    "@webassemblyjs/wasm-parser" "1.7.11"
-    "@webassemblyjs/wast-printer" "1.7.11"
-
-"@webassemblyjs/wasm-gen@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.7.11.tgz#9bbba942f22375686a6fb759afcd7ac9c45da1a8"
-  integrity sha512-U/KDYp7fgAZX5KPfq4NOupK/BmhDc5Kjy2GIqstMhvvdJRcER/kUsMThpWeRP8BMn4LXaKhSTggIJPOeYHwISA==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/ieee754" "1.7.11"
-    "@webassemblyjs/leb128" "1.7.11"
-    "@webassemblyjs/utf8" "1.7.11"
-
-"@webassemblyjs/wasm-opt@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.7.11.tgz#b331e8e7cef8f8e2f007d42c3a36a0580a7d6ca7"
-  integrity sha512-XynkOwQyiRidh0GLua7SkeHvAPXQV/RxsUeERILmAInZegApOUAIJfRuPYe2F7RcjOC9tW3Cb9juPvAC/sCqvg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-buffer" "1.7.11"
-    "@webassemblyjs/wasm-gen" "1.7.11"
-    "@webassemblyjs/wasm-parser" "1.7.11"
-
-"@webassemblyjs/wasm-parser@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.7.11.tgz#6e3d20fa6a3519f6b084ef9391ad58211efb0a1a"
-  integrity sha512-6lmXRTrrZjYD8Ng8xRyvyXQJYUQKYSXhJqXOBLw24rdiXsHAOlvw5PhesjdcaMadU/pyPQOJ5dHreMjBxwnQKg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-api-error" "1.7.11"
-    "@webassemblyjs/helper-wasm-bytecode" "1.7.11"
-    "@webassemblyjs/ieee754" "1.7.11"
-    "@webassemblyjs/leb128" "1.7.11"
-    "@webassemblyjs/utf8" "1.7.11"
-
-"@webassemblyjs/wast-parser@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-parser/-/wast-parser-1.7.11.tgz#25bd117562ca8c002720ff8116ef9072d9ca869c"
-  integrity sha512-lEyVCg2np15tS+dm7+JJTNhNWq9yTZvi3qEhAIIOaofcYlUp0UR5/tVqOwa/gXYr3gjwSZqw+/lS9dscyLelbQ==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/floating-point-hex-parser" "1.7.11"
-    "@webassemblyjs/helper-api-error" "1.7.11"
-    "@webassemblyjs/helper-code-frame" "1.7.11"
-    "@webassemblyjs/helper-fsm" "1.7.11"
-    "@xtuc/long" "4.2.1"
-
-"@webassemblyjs/wast-printer@1.7.11":
-  version "1.7.11"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.7.11.tgz#c4245b6de242cb50a2cc950174fdbf65c78d7813"
-  integrity sha512-m5vkAsuJ32QpkdkDOUPGSltrg8Cuk3KBx4YrmAGQwCZPRdUHXxG4phIOuuycLemHFr74sWL9Wthqss4fzdzSwg==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/wast-parser" "1.7.11"
-    "@xtuc/long" "4.2.1"
+"@webassemblyjs/leb128@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.11.0.tgz#f7353de1df38aa201cba9fb88b43f41f75ff403b"
+  integrity sha512-aqbsHa1mSQAbeeNcl38un6qVY++hh8OpCOzxhixSYgbRfNWcxJNJQwe2rezK9XEcssJbbWIkblaJRwGMS9zp+g==
+  dependencies:
+    "@xtuc/long" "4.2.2"
+
+"@webassemblyjs/utf8@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.11.0.tgz#86e48f959cf49e0e5091f069a709b862f5a2cadf"
+  integrity sha512-A/lclGxH6SpSLSyFowMzO/+aDEPU4hvEiooCMXQPcQFPPJaYcPQNKGOCLUySJsYJ4trbpr+Fs08n4jelkVTGVw==
+
+"@webassemblyjs/wasm-edit@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.0.tgz#ee4a5c9f677046a210542ae63897094c2027cb78"
+  integrity sha512-JHQ0damXy0G6J9ucyKVXO2j08JVJ2ntkdJlq1UTiUrIgfGMmA7Ik5VdC/L8hBK46kVJgujkBIoMtT8yVr+yVOQ==
+  dependencies:
+    "@webassemblyjs/ast" "1.11.0"
+    "@webassemblyjs/helper-buffer" "1.11.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
+    "@webassemblyjs/helper-wasm-section" "1.11.0"
+    "@webassemblyjs/wasm-gen" "1.11.0"
+    "@webassemblyjs/wasm-opt" "1.11.0"
+    "@webassemblyjs/wasm-parser" "1.11.0"
+    "@webassemblyjs/wast-printer" "1.11.0"
+
+"@webassemblyjs/wasm-gen@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.0.tgz#3cdb35e70082d42a35166988dda64f24ceb97abe"
+  integrity sha512-BEUv1aj0WptCZ9kIS30th5ILASUnAPEvE3tVMTrItnZRT9tXCLW2LEXT8ezLw59rqPP9klh9LPmpU+WmRQmCPQ==
+  dependencies:
+    "@webassemblyjs/ast" "1.11.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
+    "@webassemblyjs/ieee754" "1.11.0"
+    "@webassemblyjs/leb128" "1.11.0"
+    "@webassemblyjs/utf8" "1.11.0"
+
+"@webassemblyjs/wasm-opt@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.0.tgz#1638ae188137f4bb031f568a413cd24d32f92978"
+  integrity sha512-tHUSP5F4ywyh3hZ0+fDQuWxKx3mJiPeFufg+9gwTpYp324mPCQgnuVKwzLTZVqj0duRDovnPaZqDwoyhIO8kYg==
+  dependencies:
+    "@webassemblyjs/ast" "1.11.0"
+    "@webassemblyjs/helper-buffer" "1.11.0"
+    "@webassemblyjs/wasm-gen" "1.11.0"
+    "@webassemblyjs/wasm-parser" "1.11.0"
+
+"@webassemblyjs/wasm-parser@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.0.tgz#3e680b8830d5b13d1ec86cc42f38f3d4a7700754"
+  integrity sha512-6L285Sgu9gphrcpDXINvm0M9BskznnzJTE7gYkjDbxET28shDqp27wpruyx3C2S/dvEwiigBwLA1cz7lNUi0kw==
+  dependencies:
+    "@webassemblyjs/ast" "1.11.0"
+    "@webassemblyjs/helper-api-error" "1.11.0"
+    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
+    "@webassemblyjs/ieee754" "1.11.0"
+    "@webassemblyjs/leb128" "1.11.0"
+    "@webassemblyjs/utf8" "1.11.0"
+
+"@webassemblyjs/wast-printer@1.11.0":
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.11.0.tgz#680d1f6a5365d6d401974a8e949e05474e1fab7e"
+  integrity sha512-Fg5OX46pRdTgB7rKIUojkh9vXaVN6sGYCnEiJN1GYkb0RPwShZXp6KTDqmoMdQPKhcroOXh3fEzmkWmCYaKYhQ==
+  dependencies:
+    "@webassemblyjs/ast" "1.11.0"
+    "@xtuc/long" "4.2.2"
 
 "@xtuc/ieee754@^1.2.0":
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/@xtuc/ieee754/-/ieee754-1.2.0.tgz#eef014a3145ae477a1cbc00cd1e552336dceb790"
   integrity sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==
 
-"@xtuc/long@4.2.1":
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.1.tgz#5c85d662f76fa1d34575766c5dcd6615abcd30d8"
-  integrity sha512-FZdkNBDqBRHKQ2MEbSC17xnPFOhZxeJ2YGSfr2BKf3sujG49Qe3bB+rGCwQfIaA7WHnGeGkSijX4FuBCdrzW/g==
+"@xtuc/long@4.2.2":
+  version "4.2.2"
+  resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d"
+  integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==
 
 "@zkochan/cmd-shim@^3.1.0":
   version "3.1.0"
@@ -1999,11 +1981,6 @@ abbrev@1:
   resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8"
   integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==
 
-acorn-dynamic-import@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/acorn-dynamic-import/-/acorn-dynamic-import-4.0.0.tgz#482210140582a36b83c3e342e1cfebcaa9240948"
-  integrity sha512-d3OEjQV4ROpoflsnUA8HozoIR504TFxNivYEUi6uwz0IYhBkTDXGuWlNdMtybRt3nqVx/L6XqMt0FxkXuWKZhw==
-
 acorn-globals@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-6.0.0.tgz#46cdd39f0f8ff08a876619b55f5ac8a6dc770b45"
@@ -2022,12 +1999,7 @@ acorn-walk@^7.1.1:
   resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-7.2.0.tgz#0de889a601203909b0fbe07b8938dc21d2e967bc"
   integrity sha512-OPdCF6GsMIP+Az+aWfAAOEt2/+iVDKE7oy6lJ098aoe59oAmK76qV6Gw60SbZ8jHuG2wH058GF4pLFbYamYrVA==
 
-acorn@5.X, acorn@^5.0.3:
-  version "5.7.4"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-5.7.4.tgz#3e8d8a9947d0599a1796d10225d7432f4a4acf5e"
-  integrity sha512-1D++VG7BhrtvQpNbBzovKNc1FLGGEE/oGe7b9xJm/RFHMBeUaUGpluV9RLjZa47YFdPcDAenEYuq9pQPcMdLJg==
-
-acorn@^6.0.5:
+acorn@^6.4.1:
   version "6.4.2"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6"
   integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ==
@@ -2042,6 +2014,11 @@ acorn@^8.1.0:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.1.0.tgz#52311fd7037ae119cbb134309e901aa46295b3fe"
   integrity sha512-LWCF/Wn0nfHOmJ9rzQApGnxnvgfROzGilS8936rqN/lfcYkY9MYZzdMqN+2NJ4SlTc+m5HiSa+kNfDtI64dwUA==
 
+acorn@^8.2.1:
+  version "8.2.4"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.2.4.tgz#caba24b08185c3b56e3168e97d15ed17f4d31fd0"
+  integrity sha512-Ibt84YwBDDA890eDiDCEqcbwvHlBvzzDkU2cGBBDDI1QWT12jTiXIOn2CIw5KK4i6N5Z2HUxwYjzriDyqaqqZg==
+
 agent-base@4, agent-base@^4.3.0:
   version "4.3.0"
   resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee"
@@ -2071,17 +2048,12 @@ aggregate-error@^3.0.0:
     clean-stack "^2.0.0"
     indent-string "^4.0.0"
 
-ajv-errors@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/ajv-errors/-/ajv-errors-1.0.1.tgz#f35986aceb91afadec4102fbd85014950cefa64d"
-  integrity sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==
-
-ajv-keywords@^3.1.0, ajv-keywords@^3.5.2:
+ajv-keywords@^3.5.2:
   version "3.5.2"
   resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
   integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==
 
-ajv@^6.1.0, ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4:
+ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4, ajv@^6.12.5:
   version "6.12.6"
   resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
   integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==
@@ -2189,7 +2161,7 @@ anymatch@^2.0.0:
     micromatch "^3.1.4"
     normalize-path "^2.1.1"
 
-anymatch@^3.0.3, anymatch@~3.1.1:
+anymatch@^3.0.3:
   version "3.1.2"
   resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.2.tgz#c0557c096af32f106198f4f4e2a383537e378716"
   integrity sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==
@@ -2364,16 +2336,6 @@ asap@^2.0.0, asap@~2.0.6:
   resolved "https://registry.yarnpkg.com/asap/-/asap-2.0.6.tgz#e50347611d7e690943208bbdafebcbc2fb866d46"
   integrity sha1-5QNHYR1+aQlDIIu9r+vLwvuGbUY=
 
-asn1.js@^5.2.0:
-  version "5.4.1"
-  resolved "https://registry.yarnpkg.com/asn1.js/-/asn1.js-5.4.1.tgz#11a980b84ebb91781ce35b0fdc2ee294e3783f07"
-  integrity sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==
-  dependencies:
-    bn.js "^4.0.0"
-    inherits "^2.0.1"
-    minimalistic-assert "^1.0.0"
-    safer-buffer "^2.1.0"
-
 asn1@~0.2.3:
   version "0.2.4"
   resolved "https://registry.yarnpkg.com/asn1/-/asn1-0.2.4.tgz#8d2475dfab553bb33e77b54e59e880bb8ce23136"
@@ -2386,14 +2348,6 @@ assert-plus@1.0.0, assert-plus@^1.0.0:
   resolved "https://registry.yarnpkg.com/assert-plus/-/assert-plus-1.0.0.tgz#f12e0f3c5d77b0b1cdd9146942e4e96c1e4dd525"
   integrity sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=
 
-assert@^1.1.1:
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/assert/-/assert-1.5.0.tgz#55c109aaf6e0aefdb3dc4b71240c70bf574b18eb"
-  integrity sha512-EDsgawzwoun2CZkCgtxJbv392v4nbk9XDD06zI+kQYoBM/3RBWLlEyJARDOmhAAosBjWACEkKL6S+lIZtcAubA==
-  dependencies:
-    object-assign "^4.1.1"
-    util "0.10.3"
-
 assign-symbols@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
@@ -2436,13 +2390,6 @@ async-settle@^1.0.0:
   dependencies:
     async-done "^1.2.2"
 
-async@^2.5.0:
-  version "2.6.3"
-  resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff"
-  integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==
-  dependencies:
-    lodash "^4.17.14"
-
 asynckit@^0.4.0:
   version "0.4.0"
   resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
@@ -2554,11 +2501,6 @@ balanced-match@^1.0.0:
   resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
   integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==
 
-base64-js@^1.0.2:
-  version "1.5.1"
-  resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
-  integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==
-
 base@^0.11.1:
   version "0.11.2"
   resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f"
@@ -2608,21 +2550,11 @@ benny@3.6.15:
     prettier "^2.1.2"
     stats-median "^1.0.1"
 
-big.js@^5.2.2:
-  version "5.2.2"
-  resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328"
-  integrity sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==
-
 binary-extensions@^1.0.0:
   version "1.13.1"
   resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-1.13.1.tgz#598afe54755b2868a5330d2aff9d4ebb53209b65"
   integrity sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw==
 
-binary-extensions@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d"
-  integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==
-
 bindings@^1.5.0:
   version "1.5.0"
   resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df"
@@ -2635,16 +2567,6 @@ bluebird@^3.5.1, bluebird@^3.5.3, bluebird@^3.5.5:
   resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f"
   integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==
 
-bn.js@^4.0.0, bn.js@^4.1.0, bn.js@^4.11.9:
-  version "4.12.0"
-  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-4.12.0.tgz#775b3f278efbb9718eec7361f483fb36fbbfea88"
-  integrity sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==
-
-bn.js@^5.0.0, bn.js@^5.1.1:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-5.2.0.tgz#358860674396c6997771a9d051fcc1b57d4ae002"
-  integrity sha512-D7iWRBvnZE8ecXiLj/9wbxH7Tk79fAh8IHaTNq1RWRixsS02W+5qS+iE9yq6RYl0asXx5tw0bLhmT5pIfbSquw==
-
 brace-expansion@^1.1.7:
   version "1.1.11"
   resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@@ -2669,84 +2591,18 @@ braces@^2.3.1, braces@^2.3.2:
     split-string "^3.0.2"
     to-regex "^3.0.1"
 
-braces@^3.0.1, braces@~3.0.2:
+braces@^3.0.1:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107"
   integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==
   dependencies:
     fill-range "^7.0.1"
 
-brorand@^1.0.1, brorand@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/brorand/-/brorand-1.1.0.tgz#12c25efe40a45e3c323eb8675a0a0ce57b22371f"
-  integrity sha1-EsJe/kCkXjwyPrhnWgoM5XsiNx8=
-
 browser-process-hrtime@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
   integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
 
-browserify-aes@^1.0.0, browserify-aes@^1.0.4:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/browserify-aes/-/browserify-aes-1.2.0.tgz#326734642f403dabc3003209853bb70ad428ef48"
-  integrity sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==
-  dependencies:
-    buffer-xor "^1.0.3"
-    cipher-base "^1.0.0"
-    create-hash "^1.1.0"
-    evp_bytestokey "^1.0.3"
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
-browserify-cipher@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/browserify-cipher/-/browserify-cipher-1.0.1.tgz#8d6474c1b870bfdabcd3bcfcc1934a10e94f15f0"
-  integrity sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==
-  dependencies:
-    browserify-aes "^1.0.4"
-    browserify-des "^1.0.0"
-    evp_bytestokey "^1.0.0"
-
-browserify-des@^1.0.0:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/browserify-des/-/browserify-des-1.0.2.tgz#3af4f1f59839403572f1c66204375f7a7f703e9c"
-  integrity sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==
-  dependencies:
-    cipher-base "^1.0.1"
-    des.js "^1.0.0"
-    inherits "^2.0.1"
-    safe-buffer "^5.1.2"
-
-browserify-rsa@^4.0.0, browserify-rsa@^4.0.1:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/browserify-rsa/-/browserify-rsa-4.1.0.tgz#b2fd06b5b75ae297f7ce2dc651f918f5be158c8d"
-  integrity sha512-AdEER0Hkspgno2aR97SAf6vi0y0k8NuOpGnVH3O99rcA5Q6sh8QxcngtHuJ6uXwnfAXNM4Gn1Gb7/MV1+Ymbog==
-  dependencies:
-    bn.js "^5.0.0"
-    randombytes "^2.0.1"
-
-browserify-sign@^4.0.0:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/browserify-sign/-/browserify-sign-4.2.1.tgz#eaf4add46dd54be3bb3b36c0cf15abbeba7956c3"
-  integrity sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==
-  dependencies:
-    bn.js "^5.1.1"
-    browserify-rsa "^4.0.1"
-    create-hash "^1.2.0"
-    create-hmac "^1.1.7"
-    elliptic "^6.5.3"
-    inherits "^2.0.4"
-    parse-asn1 "^5.1.5"
-    readable-stream "^3.6.0"
-    safe-buffer "^5.2.0"
-
-browserify-zlib@^0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/browserify-zlib/-/browserify-zlib-0.2.0.tgz#2869459d9aa3be245fe8fe2ca1f46e2e7f54d73f"
-  integrity sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==
-  dependencies:
-    pako "~1.0.5"
-
 browserslist@^4.14.5:
   version "4.16.3"
   resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.3.tgz#340aa46940d7db878748567c5dea24a48ddf3717"
@@ -2787,25 +2643,6 @@ buffer-from@1.x, buffer-from@^1.0.0:
   resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef"
   integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==
 
-buffer-xor@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9"
-  integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk=
-
-buffer@^4.3.0:
-  version "4.9.2"
-  resolved "https://registry.yarnpkg.com/buffer/-/buffer-4.9.2.tgz#230ead344002988644841ab0244af8c44bbe3ef8"
-  integrity sha512-xq+q3SRMOxGivLhBNaUdC64hDTQwejJ+H0T/NB1XMtTVEwNTrfFF3gAxiyW0Bu/xWEGhjVKgUcMhCrUy2+uCWg==
-  dependencies:
-    base64-js "^1.0.2"
-    ieee754 "^1.1.4"
-    isarray "^1.0.0"
-
-builtin-status-codes@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz#85982878e21b98e1c66425e03d0174788f569ee8"
-  integrity sha1-hZgoeOIbmOHGZCXgPQF0eI9Wnug=
-
 builtins@^1.0.3:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/builtins/-/builtins-1.0.3.tgz#cb94faeb61c8696451db36534e1422f94f0aee88"
@@ -2821,7 +2658,7 @@ byte-size@^5.0.1:
   resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-5.0.1.tgz#4b651039a5ecd96767e71a3d7ed380e48bed4191"
   integrity sha512-/XuKeqWocKsYa/cBY1YbSJSWWqTi4cFgr9S6OyM7PBaPbr9zvNGwWP33vt0uqGhwDdN+y3yhbXVILEUpnwEWGw==
 
-cacache@^12.0.0, cacache@^12.0.2, cacache@^12.0.3:
+cacache@^12.0.0, cacache@^12.0.3:
   version "12.0.4"
   resolved "https://registry.yarnpkg.com/cacache/-/cacache-12.0.4.tgz#668bcbd105aeb5f1d92fe25570ec9525c8faa40c"
   integrity sha512-a0tMB40oefvuInr4Cwb3GerbL9xTj1D5yg0T5xrjGCGyfvbxseIXX7BAO/u/hIXdafzOI5JC3wDwHyf24buOAQ==
@@ -2842,29 +2679,6 @@ cacache@^12.0.0, cacache@^12.0.2, cacache@^12.0.3:
     unique-filename "^1.1.1"
     y18n "^4.0.0"
 
-cacache@^15.0.5:
-  version "15.0.6"
-  resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.0.6.tgz#65a8c580fda15b59150fb76bf3f3a8e45d583099"
-  integrity sha512-g1WYDMct/jzW+JdWEyjaX2zoBkZ6ZT9VpOyp2I/VMtDsNLffNat3kqPFfi1eDRSK9/SuKGyORDHcQMcPF8sQ/w==
-  dependencies:
-    "@npmcli/move-file" "^1.0.1"
-    chownr "^2.0.0"
-    fs-minipass "^2.0.0"
-    glob "^7.1.4"
-    infer-owner "^1.0.4"
-    lru-cache "^6.0.0"
-    minipass "^3.1.1"
-    minipass-collect "^1.0.2"
-    minipass-flush "^1.0.5"
-    minipass-pipeline "^1.2.2"
-    mkdirp "^1.0.3"
-    p-map "^4.0.0"
-    promise-inflight "^1.0.1"
-    rimraf "^3.0.2"
-    ssri "^8.0.1"
-    tar "^6.0.2"
-    unique-filename "^1.1.1"
-
 cache-base@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/cache-base/-/cache-base-1.0.1.tgz#0a7f46416831c8b662ee36fe4e7c59d76f666ab2"
@@ -3012,7 +2826,7 @@ chardet@^0.7.0:
   resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e"
   integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==
 
-chokidar@^2.0.0, chokidar@^2.1.8:
+chokidar@^2.0.0:
   version "2.1.8"
   resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-2.1.8.tgz#804b3a7b6a99358c3c5c61e71d8728f041cff917"
   integrity sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==
@@ -3031,32 +2845,12 @@ chokidar@^2.0.0, chokidar@^2.1.8:
   optionalDependencies:
     fsevents "^1.2.7"
 
-chokidar@^3.4.1:
-  version "3.5.1"
-  resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.1.tgz#ee9ce7bbebd2b79f49f304799d5468e31e14e68a"
-  integrity sha512-9+s+Od+W0VJJzawDma/gvBNQqkTiqYTWLuZoyAsivsI4AaWTCzHG06/TMjsf1cYe9Cb97UCEhjz7HvnPk2p/tw==
-  dependencies:
-    anymatch "~3.1.1"
-    braces "~3.0.2"
-    glob-parent "~5.1.0"
-    is-binary-path "~2.1.0"
-    is-glob "~4.0.1"
-    normalize-path "~3.0.0"
-    readdirp "~3.5.0"
-  optionalDependencies:
-    fsevents "~2.3.1"
-
 chownr@^1.1.1, chownr@^1.1.2:
   version "1.1.4"
   resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
   integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
 
-chownr@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
-  integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
-
-chrome-trace-event@^1.0.0:
+chrome-trace-event@^1.0.2:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz#1015eced4741e15d06664a957dbbf50d041e26ac"
   integrity sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==
@@ -3066,14 +2860,6 @@ ci-info@^2.0.0:
   resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
   integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
 
-cipher-base@^1.0.0, cipher-base@^1.0.1, cipher-base@^1.0.3:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/cipher-base/-/cipher-base-1.0.4.tgz#8760e4ecc272f4c363532f926d874aae2c1397de"
-  integrity sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==
-  dependencies:
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
 cjs-module-lexer@^0.6.0:
   version "0.6.0"
   resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-0.6.0.tgz#4186fcca0eae175970aee870b9fe2d6cf8d5655f"
@@ -3294,11 +3080,6 @@ commander@^6.1.0:
   resolved "https://registry.yarnpkg.com/commander/-/commander-6.2.1.tgz#0792eb682dfbc325999bb2b84fddddba110ac73c"
   integrity sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA==
 
-commondir@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
-  integrity sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=
-
 compare-func@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/compare-func/-/compare-func-2.0.0.tgz#fb65e75edbddfd2e568554e8b5b05fff7a51fcb3"
@@ -3345,21 +3126,11 @@ config-chain@^1.1.11:
     ini "^1.3.4"
     proto-list "~1.2.1"
 
-console-browserify@^1.1.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/console-browserify/-/console-browserify-1.2.0.tgz#67063cef57ceb6cf4993a2ab3a55840ae8c49336"
-  integrity sha512-ZMkYO/LkF17QvCPqM0gxw8yUzigAOZOSWSHg91FH6orS7vcEj5dVZTidN2fQ14yBSdg97RqhSNwLUXInd52OTA==
-
 console-control-strings@^1.0.0, console-control-strings@~1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e"
   integrity sha1-PXz0Rk22RG6mRL9LOVB/mFEAjo4=
 
-constants-browserify@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/constants-browserify/-/constants-browserify-1.0.0.tgz#c20b96d8c617748aaf1c16021760cd27fcb8cb75"
-  integrity sha1-wguW2MYXdIqvHBYCF2DNJ/y4y3U=
-
 conventional-changelog-angular@^5.0.3:
   version "5.0.12"
   resolved "https://registry.yarnpkg.com/conventional-changelog-angular/-/conventional-changelog-angular-5.0.12.tgz#c979b8b921cbfe26402eb3da5bbfda02d865a2b9"
@@ -3443,7 +3214,7 @@ conventional-recommended-bump@^5.0.0:
     meow "^4.0.0"
     q "^1.5.1"
 
-convert-source-map@1.X, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
+convert-source-map@^1.0.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
   version "1.7.0"
   resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.7.0.tgz#17a2cb882d7f77d3490585e2ce6c524424a3a442"
   integrity sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==
@@ -3515,37 +3286,6 @@ cpy@^8.1.2:
     p-filter "^2.1.0"
     p-map "^3.0.0"
 
-create-ecdh@^4.0.0:
-  version "4.0.4"
-  resolved "https://registry.yarnpkg.com/create-ecdh/-/create-ecdh-4.0.4.tgz#d6e7f4bffa66736085a0762fd3a632684dabcc4e"
-  integrity sha512-mf+TCx8wWc9VpuxfP2ht0iSISLZnt0JgWlrOKZiNqyUZWnjIaCIVNQArMHnCZKfEYRg6IM7A+NeJoN8gf/Ws0A==
-  dependencies:
-    bn.js "^4.1.0"
-    elliptic "^6.5.3"
-
-create-hash@^1.1.0, create-hash@^1.1.2, create-hash@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/create-hash/-/create-hash-1.2.0.tgz#889078af11a63756bcfb59bd221996be3a9ef196"
-  integrity sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==
-  dependencies:
-    cipher-base "^1.0.1"
-    inherits "^2.0.1"
-    md5.js "^1.3.4"
-    ripemd160 "^2.0.1"
-    sha.js "^2.4.0"
-
-create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7:
-  version "1.1.7"
-  resolved "https://registry.yarnpkg.com/create-hmac/-/create-hmac-1.1.7.tgz#69170c78b3ab957147b2b8b04572e47ead2243ff"
-  integrity sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==
-  dependencies:
-    cipher-base "^1.0.3"
-    create-hash "^1.1.0"
-    inherits "^2.0.1"
-    ripemd160 "^2.0.0"
-    safe-buffer "^5.0.1"
-    sha.js "^2.4.8"
-
 create-require@^1.1.0:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
@@ -3578,32 +3318,14 @@ cross-spawn@^7.0.0, cross-spawn@^7.0.1, cross-spawn@^7.0.2:
     shebang-command "^2.0.0"
     which "^2.0.1"
 
-crypto-browserify@^3.11.0:
-  version "3.12.0"
-  resolved "https://registry.yarnpkg.com/crypto-browserify/-/crypto-browserify-3.12.0.tgz#396cf9f3137f03e4b8e532c58f698254e00f80ec"
-  integrity sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==
-  dependencies:
-    browserify-cipher "^1.0.0"
-    browserify-sign "^4.0.0"
-    create-ecdh "^4.0.0"
-    create-hash "^1.1.0"
-    create-hmac "^1.1.0"
-    diffie-hellman "^5.0.0"
-    inherits "^2.0.1"
-    pbkdf2 "^3.0.3"
-    public-encrypt "^4.0.0"
-    randombytes "^2.0.0"
-    randomfill "^1.0.3"
-
-css@2.X, css@^2.2.1:
-  version "2.2.4"
-  resolved "https://registry.yarnpkg.com/css/-/css-2.2.4.tgz#c646755c73971f2bba6a601e2cf2fd71b1298929"
-  integrity sha512-oUnjmWpy0niI3x/mPL8dVEI1l7MnG3+HHyRPHf+YFSbK+svOhXpmSOcDURUh2aOCgl2grzrOPt1nHLuCVFULLw==
+css@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/css/-/css-3.0.0.tgz#4447a4d58fdd03367c516ca9f64ae365cee4aa5d"
+  integrity sha512-DG9pFfwOrzc+hawpmqX/dHYHJG+Bsdb0klhyi1sDneOgGOXy9wQIC8hzyVp1e4NRYDBdxcylvywPkkXCHAzTyQ==
   dependencies:
-    inherits "^2.0.3"
+    inherits "^2.0.4"
     source-map "^0.6.1"
-    source-map-resolve "^0.5.2"
-    urix "^0.1.0"
+    source-map-resolve "^0.6.0"
 
 cssom@^0.4.4:
   version "0.4.4"
@@ -3670,7 +3392,7 @@ dateformat@^3.0.0:
   resolved "https://registry.yarnpkg.com/dateformat/-/dateformat-3.0.3.tgz#a6e37499a4d9a9cf85ef5872044d62901c9889ae"
   integrity sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==
 
-debug-fabulous@1.X:
+debug-fabulous@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/debug-fabulous/-/debug-fabulous-1.1.0.tgz#af8a08632465224ef4174a9f06308c3c2a1ebc8e"
   integrity sha512-GZqvGIgKNlUnHUPQhepnUZFIMoi3dgZKQBzKDeL2g7oJF9SNAji/AAu36dusFUas0O+pae74lNeoIPHqXWDkLg==
@@ -3840,14 +3562,6 @@ deprecation@^2.0.0, deprecation@^2.3.1:
   resolved "https://registry.yarnpkg.com/deprecation/-/deprecation-2.3.1.tgz#6368cbdb40abf3373b525ac87e4a260c3a700919"
   integrity sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==
 
-des.js@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/des.js/-/des.js-1.0.1.tgz#5382142e1bdc53f85d86d53e5f4aa7deb91e0843"
-  integrity sha512-Q0I4pfFrv2VPd34/vfLrFOoRmlYj3OV50i7fskps1jZWK1kApMWWT9G6RRUeYedLcBDIhnSDaUvJMb3AhUlaEA==
-  dependencies:
-    inherits "^2.0.1"
-    minimalistic-assert "^1.0.0"
-
 detect-file@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/detect-file/-/detect-file-1.0.0.tgz#f0d66d03672a825cb1b73bdb3fe62310c8e552b7"
@@ -3858,7 +3572,7 @@ detect-indent@^5.0.0:
   resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-5.0.0.tgz#3871cc0a6a002e8c3e5b3cf7f336264675f06b9d"
   integrity sha1-OHHMCmoALow+Wzz38zYmRnXwa50=
 
-detect-newline@2.X:
+detect-newline@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-2.1.0.tgz#f41f1c10be4b00e87b5f13da680759f2c5bfd3e2"
   integrity sha1-9B8cEL5LAOh7XxPaaAdZ8sW/0+I=
@@ -3886,15 +3600,6 @@ diff@^4.0.1:
   resolved "https://registry.yarnpkg.com/diff/-/diff-4.0.2.tgz#60f3aecb89d5fae520c11aa19efc2bb982aade7d"
   integrity sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==
 
-diffie-hellman@^5.0.0:
-  version "5.0.3"
-  resolved "https://registry.yarnpkg.com/diffie-hellman/-/diffie-hellman-5.0.3.tgz#40e8ee98f55a2149607146921c63e1ae5f3d2875"
-  integrity sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==
-  dependencies:
-    bn.js "^4.1.0"
-    miller-rabin "^4.0.0"
-    randombytes "^2.0.0"
-
 dir-glob@^2.2.2:
   version "2.2.2"
   resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-2.2.2.tgz#fa09f0694153c8918b18ba0deafae94769fc50c4"
@@ -3916,11 +3621,6 @@ doctrine@^3.0.0:
   dependencies:
     esutils "^2.0.2"
 
-domain-browser@^1.1.1:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/domain-browser/-/domain-browser-1.2.0.tgz#3d31f50191a6749dd1375a7f522e823d42e54eda"
-  integrity sha512-jnjyiM6eRyZl2H+W8Q/zLMA481hzi0eszAaBUzIVnmYVDBbnLxVNnfu1HgEBvCbL+71FrxMl3E6lpKH7Ge3OXA==
-
 domexception@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/domexception/-/domexception-2.0.1.tgz#fb44aefba793e1574b0af6aed2801d057529f304"
@@ -3978,19 +3678,6 @@ electron-to-chromium@^1.3.649:
   resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.711.tgz#92c3caf7ffed5e18bf63f66b4b57b4db2409c450"
   integrity sha512-XbklBVCDiUeho0PZQCjC25Ha6uBwqqJeyDhPLwLwfWRAo4x+FZFsmu1pPPkXT+B4MQMQoQULfyaMltDopfeiHQ==
 
-elliptic@^6.5.3:
-  version "6.5.4"
-  resolved "https://registry.yarnpkg.com/elliptic/-/elliptic-6.5.4.tgz#da37cebd31e79a1367e941b592ed1fbebd58abbb"
-  integrity sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==
-  dependencies:
-    bn.js "^4.11.9"
-    brorand "^1.1.0"
-    hash.js "^1.0.0"
-    hmac-drbg "^1.0.1"
-    inherits "^2.0.4"
-    minimalistic-assert "^1.0.1"
-    minimalistic-crypto-utils "^1.0.1"
-
 emittery@^0.7.1:
   version "0.7.2"
   resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.7.2.tgz#25595908e13af0f5674ab419396e2fb394cdfa82"
@@ -4006,11 +3693,6 @@ emoji-regex@^8.0.0:
   resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
   integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
 
-emojis-list@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/emojis-list/-/emojis-list-3.0.0.tgz#5570662046ad29e2e916e71aae260abdff4f6a78"
-  integrity sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==
-
 encoding@^0.1.11:
   version "0.1.13"
   resolved "https://registry.yarnpkg.com/encoding/-/encoding-0.1.13.tgz#56574afdd791f54a8e9b2785c0582a2d26210fa9"
@@ -4025,14 +3707,13 @@ end-of-stream@^1.0.0, end-of-stream@^1.1.0:
   dependencies:
     once "^1.4.0"
 
-enhanced-resolve@^4.1.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-4.5.0.tgz#2f3cfd84dbe3b487f18f2db2ef1e064a571ca5ec"
-  integrity sha512-Nv9m36S/vxpsI+Hc4/ZGRs0n9mXqSWGGq49zxb/cJfPAQMbUtttJAlNPS4AQzaBdw/pKskw5bMbekT/Y7W/Wlg==
+enhanced-resolve@^5.8.0:
+  version "5.8.0"
+  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.8.0.tgz#d9deae58f9d3773b6a111a5a46831da5be5c9ac0"
+  integrity sha512-Sl3KRpJA8OpprrtaIswVki3cWPiPKxXuFxJXBp+zNb6s6VwNWwFRUdtmzd2ReUut8n+sCPx7QCtQ7w5wfJhSgQ==
   dependencies:
-    graceful-fs "^4.1.2"
-    memory-fs "^0.5.0"
-    tapable "^1.0.0"
+    graceful-fs "^4.2.4"
+    tapable "^2.2.0"
 
 enquirer@^2.3.5:
   version "2.3.6"
@@ -4056,13 +3737,6 @@ err-code@^1.0.0:
   resolved "https://registry.yarnpkg.com/err-code/-/err-code-1.1.2.tgz#06e0116d3028f6aef4806849eb0ea6a748ae6960"
   integrity sha1-BuARbTAo9q70gGhJ6w6mp0iuaWA=
 
-errno@^0.1.3, errno@~0.1.7:
-  version "0.1.8"
-  resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.8.tgz#8bb3e9c7d463be4976ff888f76b4809ebc2e811f"
-  integrity sha512-dJ6oBr5SQ1VSd9qkk7ByRgb/1SH4JZjCHSW/mr63/QcXO9zLVxvJ6Oy13nio03rxpSnVDDjFor75SjVeZWPW/A==
-  dependencies:
-    prr "~1.0.1"
-
 error-ex@^1.2.0, error-ex@^1.3.1:
   version "1.3.2"
   resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf"
@@ -4092,6 +3766,11 @@ es-abstract@^1.18.0-next.2:
     string.prototype.trimstart "^1.0.4"
     unbox-primitive "^1.0.0"
 
+es-module-lexer@^0.4.0:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/es-module-lexer/-/es-module-lexer-0.4.1.tgz#dda8c6a14d8f340a24e34331e0fab0cb50438e0e"
+  integrity sha512-ooYciCUtfw6/d2w56UVeqHPcoCFAiJdz5XOkYpv/Txl1HMUozpXjz/2RIQgqwKdXNDPSF1W7mJCFse3G+HDyAA==
+
 es-to-primitive@^1.2.1:
   version "1.2.1"
   resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a"
@@ -4183,14 +3862,6 @@ eslint-plugin-jest@^24.3.5:
   dependencies:
     "@typescript-eslint/experimental-utils" "^4.0.1"
 
-eslint-scope@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-4.0.3.tgz#ca03833310f6889a3264781aa82e63eb9cfe7848"
-  integrity sha512-p7VutNr1O/QrxysMo3E45FjYDTeXBy0iTltPFNSqKAIfjDSXC+4dj+qfyuD8bfAXrW/y6lW3O76VaYNPKfpKrg==
-  dependencies:
-    esrecurse "^4.1.0"
-    estraverse "^4.1.1"
-
 eslint-scope@^5.0.0, eslint-scope@^5.1.1:
   version "5.1.1"
   resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
@@ -4285,7 +3956,7 @@ esquery@^1.4.0:
   dependencies:
     estraverse "^5.1.0"
 
-esrecurse@^4.1.0, esrecurse@^4.3.0:
+esrecurse@^4.3.0:
   version "4.3.0"
   resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921"
   integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==
@@ -4320,19 +3991,11 @@ eventemitter3@^3.1.0:
   resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-3.1.2.tgz#2d3d48f9c346698fce83a85d7d664e98535df6e7"
   integrity sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==
 
-events@^3.0.0:
+events@^3.2.0:
   version "3.3.0"
   resolved "https://registry.yarnpkg.com/events/-/events-3.3.0.tgz#31a95ad0a924e2d2c419a813aeb2c4e878ea7400"
   integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==
 
-evp_bytestokey@^1.0.0, evp_bytestokey@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz#7fcbdb198dc71959432efe13842684e0525acb02"
-  integrity sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==
-  dependencies:
-    md5.js "^1.3.4"
-    safe-buffer "^5.1.1"
-
 exec-sh@^0.3.2:
   version "0.3.6"
   resolved "https://registry.yarnpkg.com/exec-sh/-/exec-sh-0.3.6.tgz#ff264f9e325519a60cb5e273692943483cca63bc"
@@ -4582,24 +4245,6 @@ filter-obj@^1.1.0:
   resolved "https://registry.yarnpkg.com/filter-obj/-/filter-obj-1.1.0.tgz#9b311112bc6c6127a16e016c6c5d7f19e0805c5b"
   integrity sha1-mzERErxsYSehbgFsbF1/GeCAXFs=
 
-find-cache-dir@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-2.1.0.tgz#8d0f94cd13fe43c6c7c261a0d86115ca918c05f7"
-  integrity sha512-Tq6PixE0w/VMFfCgbONnkiQIVol/JJL7nRMi20fqzA4NRs9AfeqMGeRdPi3wIhYkxjeBaWh2rxwapn5Tu3IqOQ==
-  dependencies:
-    commondir "^1.0.1"
-    make-dir "^2.0.0"
-    pkg-dir "^3.0.0"
-
-find-cache-dir@^3.3.1:
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/find-cache-dir/-/find-cache-dir-3.3.1.tgz#89b33fad4a4670daa94f855f7fbe31d6d84fe880"
-  integrity sha512-t2GDMt3oGC/v+BMwzmllWDuJF/xcDtE5j/fCGbqDD7OLuJkj0cfh1YSA5VKPvwMeLFLNDBkwOKZ2X85jGLVftQ==
-  dependencies:
-    commondir "^1.0.1"
-    make-dir "^3.0.2"
-    pkg-dir "^4.1.0"
-
 find-replace@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-3.0.0.tgz#3e7e23d3b05167a76f770c9fbd5258b0def68c38"
@@ -4766,13 +4411,6 @@ fs-minipass@^1.2.5:
   dependencies:
     minipass "^2.6.0"
 
-fs-minipass@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
-  integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==
-  dependencies:
-    minipass "^3.0.0"
-
 fs-mkdirp-stream@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/fs-mkdirp-stream/-/fs-mkdirp-stream-1.0.0.tgz#0b7815fc3201c6a69e14db98ce098c16935259eb"
@@ -4809,7 +4447,7 @@ fsevents@^1.2.7:
     bindings "^1.5.0"
     nan "^2.12.1"
 
-fsevents@^2.1.2, fsevents@~2.3.1:
+fsevents@^2.1.2:
   version "2.3.2"
   resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
   integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
@@ -4976,7 +4614,7 @@ glob-parent@^3.1.0:
     is-glob "^3.1.0"
     path-dirname "^1.0.0"
 
-glob-parent@^5.0.0, glob-parent@^5.1.0, glob-parent@~5.1.0:
+glob-parent@^5.0.0, glob-parent@^5.1.0:
   version "5.1.2"
   resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4"
   integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@@ -5004,6 +4642,11 @@ glob-to-regexp@^0.3.0:
   resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.3.0.tgz#8c5a1494d2066c570cc3bfe4496175acc4d502ab"
   integrity sha1-jFoUlNIGbFcMw7/kSWF1rMTVAqs=
 
+glob-to-regexp@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz#c75297087c851b9a578bd217dd59a92f59fe546e"
+  integrity sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==
+
 glob-watcher@^5.0.3:
   version "5.0.5"
   resolved "https://registry.yarnpkg.com/glob-watcher/-/glob-watcher-5.0.5.tgz#aa6bce648332924d9a8489be41e3e5c52d4186dc"
@@ -5162,7 +4805,7 @@ google-closure-compiler@20210406.0.0:
     google-closure-compiler-osx "^20210406.0.0"
     google-closure-compiler-windows "^20210406.0.0"
 
-graceful-fs@4.X, graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.4:
+graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.4:
   version "4.2.6"
   resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee"
   integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==
@@ -5196,10 +4839,10 @@ gulp-cli@^2.2.0:
     v8flags "^3.2.0"
     yargs "^7.1.0"
 
-gulp-json-transform@0.4.6:
-  version "0.4.6"
-  resolved "https://registry.yarnpkg.com/gulp-json-transform/-/gulp-json-transform-0.4.6.tgz#37ab209463df62c9e779887d675fb6025eb07b89"
-  integrity sha512-laPoNiJP/+lAeiyb0lgY3cynOOi7R/QbPvKBEXJY6bm836nYg90pwY4mgwR7w8nFDlXiCToUeaoQCBIc2NudjA==
+gulp-json-transform@0.4.7:
+  version "0.4.7"
+  resolved "https://registry.yarnpkg.com/gulp-json-transform/-/gulp-json-transform-0.4.7.tgz#41c37524c976e41f3d46c06f985b01530a472e34"
+  integrity sha512-Wi0p5GpoLXbTDwaZnw6rgj3FMLW3PscaHaX1okxrTgPWeqnIiMo4aJz7VlG68JYkxPeAXJrPce8AGEfcT2IifA==
   dependencies:
     ansi-colors "^1.0.1"
     fancy-log "^1.3.2"
@@ -5208,27 +4851,27 @@ gulp-json-transform@0.4.6:
     through2 "^2.0.3"
     vinyl "^2.1.0"
 
-gulp-rename@1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/gulp-rename/-/gulp-rename-1.4.0.tgz#de1c718e7c4095ae861f7296ef4f3248648240bd"
-  integrity sha512-swzbIGb/arEoFK89tPY58vg3Ok1bw+d35PfUNwWqdo7KM4jkmuGA78JiDNqR+JeZFaeeHnRg9N7aihX3YPmsyg==
-
-gulp-sourcemaps@2.6.5:
-  version "2.6.5"
-  resolved "https://registry.yarnpkg.com/gulp-sourcemaps/-/gulp-sourcemaps-2.6.5.tgz#a3f002d87346d2c0f3aec36af7eb873f23de8ae6"
-  integrity sha512-SYLBRzPTew8T5Suh2U8jCSDKY+4NARua4aqjj8HOysBh2tSgT9u4jc1FYirAdPx1akUxxDeK++fqw6Jg0LkQRg==
-  dependencies:
-    "@gulp-sourcemaps/identity-map" "1.X"
-    "@gulp-sourcemaps/map-sources" "1.X"
-    acorn "5.X"
-    convert-source-map "1.X"
-    css "2.X"
-    debug-fabulous "1.X"
-    detect-newline "2.X"
-    graceful-fs "4.X"
-    source-map "~0.6.0"
-    strip-bom-string "1.X"
-    through2 "2.X"
+gulp-rename@2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/gulp-rename/-/gulp-rename-2.0.0.tgz#9bbc3962b0c0f52fc67cd5eaff6c223ec5b9cf6c"
+  integrity sha512-97Vba4KBzbYmR5VBs9mWmK+HwIf5mj+/zioxfZhOKeXtx5ZjBk57KFlePf5nxq9QsTtFl0ejnHE3zTC9MHXqyQ==
+
+gulp-sourcemaps@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/gulp-sourcemaps/-/gulp-sourcemaps-3.0.0.tgz#2e154e1a2efed033c0e48013969e6f30337b2743"
+  integrity sha512-RqvUckJkuYqy4VaIH60RMal4ZtG0IbQ6PXMNkNsshEGJ9cldUPRb/YCgboYae+CLAs1HQNb4ADTKCx65HInquQ==
+  dependencies:
+    "@gulp-sourcemaps/identity-map" "^2.0.1"
+    "@gulp-sourcemaps/map-sources" "^1.0.0"
+    acorn "^6.4.1"
+    convert-source-map "^1.0.0"
+    css "^3.0.0"
+    debug-fabulous "^1.0.0"
+    detect-newline "^2.0.0"
+    graceful-fs "^4.0.0"
+    source-map "^0.6.0"
+    strip-bom-string "^1.0.0"
+    through2 "^2.0.0"
 
 gulp-typescript@5.0.1:
   version "5.0.1"
@@ -5359,32 +5002,6 @@ has@^1.0.3:
   dependencies:
     function-bind "^1.1.1"
 
-hash-base@^3.0.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/hash-base/-/hash-base-3.1.0.tgz#55c381d9e06e1d2997a883b4a3fddfe7f0d3af33"
-  integrity sha512-1nmYp/rhMDiE7AYkDw+lLwlAzz0AntGIe51F3RfFfEqyQ3feY2eI/NcwC6umIQVOASPMsWJLJScWKSSvzL9IVA==
-  dependencies:
-    inherits "^2.0.4"
-    readable-stream "^3.6.0"
-    safe-buffer "^5.2.0"
-
-hash.js@^1.0.0, hash.js@^1.0.3:
-  version "1.1.7"
-  resolved "https://registry.yarnpkg.com/hash.js/-/hash.js-1.1.7.tgz#0babca538e8d4ee4a0f8988d68866537a003cf42"
-  integrity sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==
-  dependencies:
-    inherits "^2.0.3"
-    minimalistic-assert "^1.0.1"
-
-hmac-drbg@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/hmac-drbg/-/hmac-drbg-1.0.1.tgz#d2745701025a6c775a6c545793ed502fc0c649a1"
-  integrity sha1-0nRXAQJabHdabFRXk+1QL8DGSaE=
-  dependencies:
-    hash.js "^1.0.3"
-    minimalistic-assert "^1.0.0"
-    minimalistic-crypto-utils "^1.0.1"
-
 homedir-polyfill@^1.0.1:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/homedir-polyfill/-/homedir-polyfill-1.0.3.tgz#743298cef4e5af3e194161fbadcc2151d3a058e8"
@@ -5438,11 +5055,6 @@ http-signature@~1.2.0:
     jsprim "^1.2.2"
     sshpk "^1.7.0"
 
-https-browserify@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73"
-  integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=
-
 https-proxy-agent@^2.2.3:
   version "2.2.4"
   resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b"
@@ -5477,11 +5089,6 @@ iconv-lite@^0.6.2:
   dependencies:
     safer-buffer ">= 2.1.2 < 3.0.0"
 
-ieee754@^1.1.4:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352"
-  integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==
-
 iferr@^0.1.5:
   version "0.1.5"
   resolved "https://registry.yarnpkg.com/iferr/-/iferr-0.1.5.tgz#c60eed69e6d8fdb6b3104a1fcbca1c192dc5b501"
@@ -5571,21 +5178,11 @@ inflight@^1.0.4:
     once "^1.3.0"
     wrappy "1"
 
-inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.1, inherits@~2.0.3:
+inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.3:
   version "2.0.4"
   resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
   integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
 
-inherits@2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.1.tgz#b17d08d326b4423e568eff719f91b0b1cbdf69f1"
-  integrity sha1-sX0I0ya0Qj5Wjv9xn5GwscvfafE=
-
-inherits@2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
-  integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
-
 ini@^1.3.2, ini@^1.3.4:
   version "1.3.8"
   resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
@@ -5678,13 +5275,6 @@ is-binary-path@^1.0.0:
   dependencies:
     binary-extensions "^1.0.0"
 
-is-binary-path@~2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09"
-  integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==
-  dependencies:
-    binary-extensions "^2.0.0"
-
 is-boolean-object@^1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.0.tgz#e2aaad3a3a8fca34c28f6eee135b156ed2587ff0"
@@ -5814,7 +5404,7 @@ is-glob@^3.0.0, is-glob@^3.1.0:
   dependencies:
     is-extglob "^2.1.0"
 
-is-glob@^4.0.0, is-glob@^4.0.1, is-glob@~4.0.1:
+is-glob@^4.0.0, is-glob@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.1.tgz#7567dbe9f2f5e2467bc77ab83c4a29482407a5dc"
   integrity sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==
@@ -5978,11 +5568,6 @@ is-windows@^1.0.0, is-windows@^1.0.1, is-windows@^1.0.2:
   resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d"
   integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==
 
-is-wsl@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-1.1.0.tgz#1f16e4aa22b04d1336b66188a66af3c600c3a66d"
-  integrity sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0=
-
 is-wsl@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271"
@@ -5990,7 +5575,7 @@ is-wsl@^2.2.0:
   dependencies:
     is-docker "^2.0.0"
 
-isarray@1.0.0, isarray@^1.0.0, isarray@~1.0.0:
+isarray@1.0.0, isarray@~1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
   integrity sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=
@@ -6468,7 +6053,7 @@ jest-watcher@^26.6.2:
     jest-util "^26.6.2"
     string-length "^4.0.1"
 
-jest-worker@^26.3.0, jest-worker@^26.6.2:
+jest-worker@^26.6.2:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-26.6.2.tgz#7f72cbc4d643c365e27b9fd775f9d0eaa9c7a8ed"
   integrity sha512-KWYVV1c4i+jbMpaBC+U++4Va0cp8OisU185o73T1vo99hqi7w8tSJfUXYswwqqrjzwxa6KpRK54WhPvwf5w6PQ==
@@ -6597,13 +6182,6 @@ json5@2.x, json5@^2.1.2:
   dependencies:
     minimist "^1.2.5"
 
-json5@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe"
-  integrity sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==
-  dependencies:
-    minimist "^1.2.0"
-
 jsonfile@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb"
@@ -6804,19 +6382,10 @@ load-json-file@^5.3.0:
     strip-bom "^3.0.0"
     type-fest "^0.3.0"
 
-loader-runner@^2.3.0:
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-2.4.0.tgz#ed47066bfe534d7e84c4c7b9998c2a75607d9357"
-  integrity sha512-Jsmr89RcXGIwivFY21FcRrisYZfvLMTWx5kOLc+JTxtpBOG6xML0vzbc6SEQG2FO9/4Fc3wW4LVcB5DmGflaRw==
-
-loader-utils@^1.1.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.0.tgz#c579b5e34cb34b1a74edc6c1fb36bfa371d5a613"
-  integrity sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA==
-  dependencies:
-    big.js "^5.2.2"
-    emojis-list "^3.0.0"
-    json5 "^1.0.1"
+loader-runner@^4.2.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-4.2.0.tgz#d7022380d66d14c5fb1d496b89864ebcfd478384"
+  integrity sha512-92+huvxMvYlMzMt0iIOukcwYBFpkYJdpl2xsZ7LrlayO7E8SOv+JJUEK17B/dJIHAOLMfh2dZZ/Y18WgmGtYNw==
 
 locate-path@^2.0.0:
   version "2.0.0"
@@ -6906,7 +6475,7 @@ lodash.uniq@^4.5.0:
   resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
   integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
 
-lodash@4.x, lodash@^4.17.12, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
+lodash@4.x, lodash@^4.17.12, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
   version "4.17.21"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
@@ -6967,7 +6536,7 @@ make-dir@^1.0.0:
   dependencies:
     pify "^3.0.0"
 
-make-dir@^2.0.0, make-dir@^2.1.0:
+make-dir@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-2.1.0.tgz#5f0310e18b8be898cc07009295a30ae41e91e6f5"
   integrity sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==
@@ -6975,7 +6544,7 @@ make-dir@^2.0.0, make-dir@^2.1.0:
     pify "^4.0.1"
     semver "^5.6.0"
 
-make-dir@^3.0.0, make-dir@^3.0.2:
+make-dir@^3.0.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
   integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
@@ -7065,15 +6634,6 @@ math-random@^1.0.1:
   resolved "https://registry.yarnpkg.com/math-random/-/math-random-1.0.4.tgz#5dd6943c938548267016d4e34f057583080c514c"
   integrity sha512-rUxjysqif/BZQH2yhd5Aaq7vXMSx9NdEsQcyA07uEzIvxgI7zIr33gGsh+RU0/XjmQpCW7RsVof1vlkvQVCK5A==
 
-md5.js@^1.3.4:
-  version "1.3.5"
-  resolved "https://registry.yarnpkg.com/md5.js/-/md5.js-1.3.5.tgz#b5d07b8e3216e3e27cd728d72f70d1e6a342005f"
-  integrity sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==
-  dependencies:
-    hash-base "^3.0.0"
-    inherits "^2.0.1"
-    safe-buffer "^5.1.2"
-
 memfs@2.15.2:
   version "2.15.2"
   resolved "https://registry.yarnpkg.com/memfs/-/memfs-2.15.2.tgz#199b64580cf849ea641d8fac81d96742bfebd26d"
@@ -7096,22 +6656,6 @@ memoizee@0.4.X:
     next-tick "^1.1.0"
     timers-ext "^0.1.7"
 
-memory-fs@^0.5.0:
-  version "0.5.0"
-  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.5.0.tgz#324c01288b88652966d161db77838720845a8e3c"
-  integrity sha512-jA0rdU5KoQMC0e6ppoNRtpp6vjFq6+NY7r8hywnC7V+1Xj/MtHwGIbB1QaK/dunyjWteJzmkpd7ooeWg10T7GA==
-  dependencies:
-    errno "^0.1.3"
-    readable-stream "^2.0.1"
-
-memory-fs@~0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/memory-fs/-/memory-fs-0.4.1.tgz#3a9a20b8462523e447cfbc7e8bb80ed667bfc552"
-  integrity sha1-OpoguEYlI+RHz7x+i7gO1me/xVI=
-  dependencies:
-    errno "^0.1.3"
-    readable-stream "^2.0.1"
-
 memorystream@^0.3.1:
   version "0.3.1"
   resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2"
@@ -7192,7 +6736,7 @@ merge2@^1.2.3, merge2@^1.3.0:
   resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae"
   integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==
 
-micromatch@^3.0.4, micromatch@^3.1.10, micromatch@^3.1.4, micromatch@^3.1.8:
+micromatch@^3.0.4, micromatch@^3.1.10, micromatch@^3.1.4:
   version "3.1.10"
   resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-3.1.10.tgz#70859bc95c9840952f359a068a3fc49f9ecfac23"
   integrity sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==
@@ -7219,20 +6763,12 @@ micromatch@^4.0.2:
     braces "^3.0.1"
     picomatch "^2.0.5"
 
-miller-rabin@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/miller-rabin/-/miller-rabin-4.0.1.tgz#f080351c865b0dc562a8462966daa53543c78a4d"
-  integrity sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==
-  dependencies:
-    bn.js "^4.0.0"
-    brorand "^1.0.1"
-
 mime-db@1.47.0:
   version "1.47.0"
   resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.47.0.tgz#8cb313e59965d3c05cfbf898915a267af46a335c"
   integrity sha512-QBmA/G2y+IfeS4oktet3qRZ+P5kPhCKRXxXnQEudYqUaEioAU1/Lq2us3D/t1Jfo4hE9REQPrbB7K5sOczJVIw==
 
-mime-types@^2.1.12, mime-types@~2.1.19:
+mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.19:
   version "2.1.30"
   resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.30.tgz#6e7be8b4c479825f85ed6326695db73f9305d62d"
   integrity sha512-crmjA4bLtR8m9qLpHvgxSChT+XoSlZi8J4n/aIdn3z92e/U47Z0V/yl+Wh9W046GgFVAmoNR/fmdbZYcSSIUeg==
@@ -7254,16 +6790,6 @@ min-indent@^1.0.0:
   resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869"
   integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==
 
-minimalistic-assert@^1.0.0, minimalistic-assert@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz#2e194de044626d4a10e7f7fbc00ce73e83e4d5c7"
-  integrity sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==
-
-minimalistic-crypto-utils@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz#f6c00c1c0b082246e5c4d99dfb8c7c083b2b582a"
-  integrity sha1-9sAMHAsIIkblxNmd+4x8CDsrWCo=
-
 minimatch@^3.0.0, minimatch@^3.0.4:
   version "3.0.4"
   resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
@@ -7293,27 +6819,6 @@ minimist@1.x, minimist@^1.1.1, minimist@^1.1.3, minimist@^1.2.0, minimist@^1.2.5
   resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
   integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
 
-minipass-collect@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/minipass-collect/-/minipass-collect-1.0.2.tgz#22b813bf745dc6edba2576b940022ad6edc8c617"
-  integrity sha512-6T6lH0H8OG9kITm/Jm6tdooIbogG9e0tLgpY6mphXSm/A9u8Nq1ryBG+Qspiub9LjWlBPsPS3tWQ/Botq4FdxA==
-  dependencies:
-    minipass "^3.0.0"
-
-minipass-flush@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/minipass-flush/-/minipass-flush-1.0.5.tgz#82e7135d7e89a50ffe64610a787953c4c4cbb373"
-  integrity sha512-JmQSYYpPUqX5Jyn1mXaRwOda1uQ8HP5KAT/oDSLCzt1BYRhQU0/hDtsB1ufZfEEzMZ9aAVmsBw8+FWsIXlClWw==
-  dependencies:
-    minipass "^3.0.0"
-
-minipass-pipeline@^1.2.2:
-  version "1.2.4"
-  resolved "https://registry.yarnpkg.com/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz#68472f79711c084657c067c5c6ad93cddea8214c"
-  integrity sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==
-  dependencies:
-    minipass "^3.0.0"
-
 minipass@^2.3.5, minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
   version "2.9.0"
   resolved "https://registry.yarnpkg.com/minipass/-/minipass-2.9.0.tgz#e713762e7d3e32fed803115cf93e04bca9fcc9a6"
@@ -7322,13 +6827,6 @@ minipass@^2.3.5, minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
     safe-buffer "^5.1.2"
     yallist "^3.0.0"
 
-minipass@^3.0.0, minipass@^3.1.1:
-  version "3.1.3"
-  resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.3.tgz#7d42ff1f39635482e15f9cdb53184deebd5815fd"
-  integrity sha512-Mgd2GdMVzY+x3IJ+oHnVM+KG3lA5c8tnabyJKmHSaG2kAGpudxuOf8ToDkhumF7UzME7DecbQE9uOZhNm7PuJg==
-  dependencies:
-    yallist "^4.0.0"
-
 minizlib@^1.2.1:
   version "1.3.3"
   resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-1.3.3.tgz#2290de96818a34c29551c8a8d301216bd65a861d"
@@ -7336,14 +6834,6 @@ minizlib@^1.2.1:
   dependencies:
     minipass "^2.9.0"
 
-minizlib@^2.1.1:
-  version "2.1.2"
-  resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
-  integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==
-  dependencies:
-    minipass "^3.0.0"
-    yallist "^4.0.0"
-
 mississippi@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/mississippi/-/mississippi-3.0.0.tgz#ea0a3291f97e0b5e8776b363d5f0a12d94c67022"
@@ -7375,12 +6865,12 @@ mkdirp-promise@^5.0.1:
   dependencies:
     mkdirp "*"
 
-mkdirp@*, mkdirp@1.0.4, mkdirp@1.x, mkdirp@^1.0.3, mkdirp@^1.0.4:
+mkdirp@*, mkdirp@1.0.4, mkdirp@1.x:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
   integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
 
-mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@~0.5.0:
+mkdirp@^0.5.0, mkdirp@^0.5.1:
   version "0.5.5"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def"
   integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==
@@ -7488,7 +6978,7 @@ natural-compare@^1.4.0:
   resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
   integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=
 
-neo-async@^2.5.0, neo-async@^2.6.0:
+neo-async@^2.6.0, neo-async@^2.6.2:
   version "2.6.2"
   resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
   integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==
@@ -7549,35 +7039,6 @@ node-int64@^0.4.0:
   resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
   integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs=
 
-node-libs-browser@^2.0.0:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/node-libs-browser/-/node-libs-browser-2.2.1.tgz#b64f513d18338625f90346d27b0d235e631f6425"
-  integrity sha512-h/zcD8H9kaDZ9ALUWwlBUDo6TKF8a7qBSCSEGfjTVIYeqsioSKaAX+BN7NgiMGp6iSIXZ3PxgCu8KS3b71YK5Q==
-  dependencies:
-    assert "^1.1.1"
-    browserify-zlib "^0.2.0"
-    buffer "^4.3.0"
-    console-browserify "^1.1.0"
-    constants-browserify "^1.0.0"
-    crypto-browserify "^3.11.0"
-    domain-browser "^1.1.1"
-    events "^3.0.0"
-    https-browserify "^1.0.0"
-    os-browserify "^0.3.0"
-    path-browserify "0.0.1"
-    process "^0.11.10"
-    punycode "^1.2.4"
-    querystring-es3 "^0.2.0"
-    readable-stream "^2.3.3"
-    stream-browserify "^2.0.1"
-    stream-http "^2.7.2"
-    string_decoder "^1.0.0"
-    timers-browserify "^2.0.4"
-    tty-browserify "0.0.0"
-    url "^0.11.0"
-    util "^0.11.0"
-    vm-browserify "^1.0.1"
-
 node-modules-regexp@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz#8d9dbe28964a4ac5712e9131642107c71e90ec40"
@@ -7635,7 +7096,7 @@ normalize-path@^2.0.1, normalize-path@^2.1.1:
   dependencies:
     remove-trailing-separator "^1.0.1"
 
-normalize-path@^3.0.0, normalize-path@~3.0.0:
+normalize-path@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
   integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
@@ -7760,7 +7221,7 @@ oauth-sign@~0.9.0:
   resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.9.0.tgz#47a7b016baa68b5fa0ecf3dee08a85c679ac6455"
   integrity sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==
 
-object-assign@4.X, object-assign@^4.0.1, object-assign@^4.1.0, object-assign@^4.1.1:
+object-assign@4.X, object-assign@^4.0.1, object-assign@^4.1.0:
   version "4.1.1"
   resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
   integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
@@ -7907,11 +7368,6 @@ ordered-read-streams@^1.0.0:
   dependencies:
     readable-stream "^2.0.1"
 
-os-browserify@^0.3.0:
-  version "0.3.0"
-  resolved "https://registry.yarnpkg.com/os-browserify/-/os-browserify-0.3.0.tgz#854373c7f5c2315914fc9bfc6bd8238fdda1ec27"
-  integrity sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=
-
 os-homedir@^1.0.0:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/os-homedir/-/os-homedir-1.0.2.tgz#ffbc4988336e0e833de0c168c7ef152121aa7fb3"
@@ -7990,7 +7446,7 @@ p-limit@^2.0.0, p-limit@^2.2.0:
   dependencies:
     p-try "^2.0.0"
 
-p-limit@^3.0.2:
+p-limit@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b"
   integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==
@@ -8037,13 +7493,6 @@ p-map@^3.0.0:
   dependencies:
     aggregate-error "^3.0.0"
 
-p-map@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/p-map/-/p-map-4.0.0.tgz#bb2f95a5eda2ec168ec9274e06a747c3e2904d2b"
-  integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==
-  dependencies:
-    aggregate-error "^3.0.0"
-
 p-pipe@^1.2.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-1.2.0.tgz#4b1a11399a11520a67790ee5a0c1d5881d6befe9"
@@ -8092,11 +7541,6 @@ pad-left@^2.1.0:
   dependencies:
     repeat-string "^1.5.4"
 
-pako@~1.0.5:
-  version "1.0.11"
-  resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.11.tgz#6c9599d340d54dfd3946380252a35705a6b992bf"
-  integrity sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==
-
 parallel-transform@^1.1.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/parallel-transform/-/parallel-transform-1.2.0.tgz#9049ca37d6cb2182c3b1d2c720be94d14a5814fc"
@@ -8113,17 +7557,6 @@ parent-module@^1.0.0:
   dependencies:
     callsites "^3.0.0"
 
-parse-asn1@^5.0.0, parse-asn1@^5.1.5:
-  version "5.1.6"
-  resolved "https://registry.yarnpkg.com/parse-asn1/-/parse-asn1-5.1.6.tgz#385080a3ec13cb62a62d39409cb3e88844cdaed4"
-  integrity sha512-RnZRo1EPU6JBnra2vGHj0yhp6ebyjBZpmUCLHWiFhxlzvBCCpAuZ7elsBp1PVAbQN0/04VD/19rfzlBSwLstMw==
-  dependencies:
-    asn1.js "^5.2.0"
-    browserify-aes "^1.0.0"
-    evp_bytestokey "^1.0.0"
-    pbkdf2 "^3.0.3"
-    safe-buffer "^5.1.1"
-
 parse-filepath@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/parse-filepath/-/parse-filepath-1.0.2.tgz#a632127f53aaf3d15876f5872f3ffac763d6c891"
@@ -8203,11 +7636,6 @@ pascalcase@^0.1.1:
   resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14"
   integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ=
 
-path-browserify@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-0.0.1.tgz#e6c4ddd7ed3aa27c68a20cc4e50e1a4ee83bbc4a"
-  integrity sha512-BapA40NHICOS+USX9SN4tyhq+A2RrN/Ws5F0Z5aMHDp98Fl86lX8Oti8B7uN93L4Ifv4fHOEA+pQw87gmMO/lQ==
-
 path-dirname@^1.0.0:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/path-dirname/-/path-dirname-1.0.2.tgz#cc33d24d525e099a5388c0336c6e32b9160609e0"
@@ -8283,17 +7711,6 @@ path-type@^4.0.0:
   resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
   integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
 
-pbkdf2@^3.0.3:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/pbkdf2/-/pbkdf2-3.1.1.tgz#cb8724b0fada984596856d1a6ebafd3584654b94"
-  integrity sha512-4Ejy1OPxi9f2tt1rRV7Go7zmfDQ+ZectEQz3VGUQhgq62HtIRPDyG/JtnwIxs6x3uNMwo2V7q1fMvKjb+Tnpqg==
-  dependencies:
-    create-hash "^1.1.2"
-    create-hmac "^1.1.4"
-    ripemd160 "^2.0.1"
-    safe-buffer "^5.0.1"
-    sha.js "^2.4.8"
-
 performance-now@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
@@ -8350,7 +7767,7 @@ pkg-dir@^3.0.0:
   dependencies:
     find-up "^3.0.0"
 
-pkg-dir@^4.1.0, pkg-dir@^4.2.0:
+pkg-dir@^4.2.0:
   version "4.2.0"
   resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3"
   integrity sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==
@@ -8377,6 +7794,15 @@ posix-character-classes@^0.1.0:
   resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab"
   integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
 
+postcss@^7.0.16:
+  version "7.0.35"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.35.tgz#d2be00b998f7f211d8a276974079f2e92b970e24"
+  integrity sha512-3QT8bBJeX/S5zKTTjTCIjRF3If4avAT6kqxcASlTWEtAFCb9NH0OUxNDfgZSWdP5fJnBYCMEWkIFfWeugjzYMg==
+  dependencies:
+    chalk "^2.4.2"
+    source-map "^0.6.1"
+    supports-color "^6.1.0"
+
 prelude-ls@^1.2.1:
   version "1.2.1"
   resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
@@ -8417,11 +7843,6 @@ process-nextick-args@^2.0.0, process-nextick-args@~2.0.0:
   resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
   integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
 
-process@^0.11.10:
-  version "0.11.10"
-  resolved "https://registry.yarnpkg.com/process/-/process-0.11.10.tgz#7332300e840161bda3e69a1d1d91a7d4bc16f182"
-  integrity sha1-czIwDoQBYb2j5podHZGn1LwW8YI=
-
 progress@^2.0.0, progress@^2.0.3:
   version "2.0.3"
   resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
@@ -8479,28 +7900,11 @@ protoduck@^5.0.1:
   dependencies:
     genfun "^5.0.0"
 
-prr@~1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476"
-  integrity sha1-0/wRS6BplaRexok/SEzrHXj19HY=
-
 psl@^1.1.28, psl@^1.1.33:
   version "1.8.0"
   resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
   integrity sha512-RIdOzyoavK+hA18OGGWDqUTsCLhtA7IcZ/6NCs4fFJaHBDab+pDDmDIByWFRQJq2Cd7r1OoQxBGKOaztq+hjIQ==
 
-public-encrypt@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/public-encrypt/-/public-encrypt-4.0.3.tgz#4fcc9d77a07e48ba7527e7cbe0de33d0701331e0"
-  integrity sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==
-  dependencies:
-    bn.js "^4.1.0"
-    browserify-rsa "^4.0.0"
-    create-hash "^1.1.0"
-    parse-asn1 "^5.0.0"
-    randombytes "^2.0.1"
-    safe-buffer "^5.1.2"
-
 pump@^2.0.0:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/pump/-/pump-2.0.1.tgz#12399add6e4cf7526d973cbc8b5ce2e2908b3909"
@@ -8526,16 +7930,6 @@ pumpify@^1.3.3, pumpify@^1.3.5:
     inherits "^2.0.3"
     pump "^2.0.0"
 
-punycode@1.3.2:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.3.2.tgz#9653a036fb7c1ee42342f2325cceefea3926c48d"
-  integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
-
-punycode@^1.2.4:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
-  integrity sha1-wNWmOycYgArY4esPpSachN1BhF4=
-
 punycode@^2.1.0, punycode@^2.1.1:
   version "2.1.1"
   resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
@@ -8568,16 +7962,6 @@ query-string@^6.13.8:
     split-on-first "^1.0.0"
     strict-uri-encode "^2.0.0"
 
-querystring-es3@^0.2.0:
-  version "0.2.1"
-  resolved "https://registry.yarnpkg.com/querystring-es3/-/querystring-es3-0.2.1.tgz#9ec61f79049875707d69414596fd907a4d711e73"
-  integrity sha1-nsYfeQSYdXB9aUFFlv2Qek1xHnM=
-
-querystring@0.2.0:
-  version "0.2.0"
-  resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620"
-  integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
-
 queue-microtask@^1.2.2:
   version "1.2.3"
   resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243"
@@ -8602,21 +7986,13 @@ randomatic@3.1.1:
     kind-of "^6.0.0"
     math-random "^1.0.1"
 
-randombytes@^2.0.0, randombytes@^2.0.1, randombytes@^2.0.5, randombytes@^2.1.0:
+randombytes@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
   integrity sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==
   dependencies:
     safe-buffer "^5.1.0"
 
-randomfill@^1.0.3:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/randomfill/-/randomfill-1.0.4.tgz#c92196fc86ab42be983f1bf31778224931d61458"
-  integrity sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==
-  dependencies:
-    randombytes "^2.0.5"
-    safe-buffer "^5.1.0"
-
 react-is@^17.0.1:
   version "17.0.2"
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0"
@@ -8721,7 +8097,7 @@ read@1, read@~1.0.1:
     string_decoder "~1.1.1"
     util-deprecate "~1.0.1"
 
-"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.6.0:
+"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2:
   version "3.6.0"
   resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
   integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
@@ -8749,13 +8125,6 @@ readdirp@^2.2.1:
     micromatch "^3.1.10"
     readable-stream "^2.0.2"
 
-readdirp@~3.5.0:
-  version "3.5.0"
-  resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.5.0.tgz#9ba74c019b15d365278d2e91bb8c48d7b4d42c9e"
-  integrity sha512-cMhu7c/8rdhkHXWsY+osBhfSy0JikwpHK/5+imo+LpeasTF8ouErHrlYkwT0++njiyuDvc7OFY5T3ukvZ8qmFQ==
-  dependencies:
-    picomatch "^2.2.1"
-
 rechoir@^0.6.2:
   version "0.6.2"
   resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"
@@ -9022,14 +8391,6 @@ rimraf@^3.0.0, rimraf@^3.0.2:
   dependencies:
     glob "^7.1.3"
 
-ripemd160@^2.0.0, ripemd160@^2.0.1:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c"
-  integrity sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==
-  dependencies:
-    hash-base "^3.0.0"
-    inherits "^2.0.1"
-
 rsvp@^4.8.4:
   version "4.8.5"
   resolved "https://registry.yarnpkg.com/rsvp/-/rsvp-4.8.5.tgz#c8f155311d167f68f21e168df71ec5b083113734"
@@ -9117,30 +8478,13 @@ saxes@^5.0.1:
   dependencies:
     xmlchars "^2.2.0"
 
-schema-utils@^0.4.4:
-  version "0.4.7"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-0.4.7.tgz#ba74f597d2be2ea880131746ee17d0a093c68187"
-  integrity sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==
-  dependencies:
-    ajv "^6.1.0"
-    ajv-keywords "^3.1.0"
-
-schema-utils@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-1.0.0.tgz#0b79a93204d7b600d4b2850d1f66c2a34951c770"
-  integrity sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==
-  dependencies:
-    ajv "^6.1.0"
-    ajv-errors "^1.0.0"
-    ajv-keywords "^3.1.0"
-
-schema-utils@^2.7.1:
-  version "2.7.1"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-2.7.1.tgz#1ca4f32d1b24c590c203b8e7a50bf0ea4cd394d7"
-  integrity sha512-SHiNtMOUGWBQJwzISiVYKu82GiV4QYGePp3odlY1tuKO7gPtphAT5R/py0fA6xtbgLL/RvtJZnU9b8s0F1q0Xg==
+schema-utils@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-3.0.0.tgz#67502f6aa2b66a2d4032b4279a2944978a0913ef"
+  integrity sha512-6D82/xSzO094ajanoOSbe4YvXWMfn2A//8Y1+MUqFAJul5Bs+yn36xbK9OtNDcRVSBJ9jjeoXftM6CfztsjOAA==
   dependencies:
-    "@types/json-schema" "^7.0.5"
-    ajv "^6.12.4"
+    "@types/json-schema" "^7.0.6"
+    ajv "^6.12.5"
     ajv-keywords "^3.5.2"
 
 semver-greatest-satisfied-range@^1.1.0:
@@ -9167,13 +8511,6 @@ semver@^6.0.0, semver@^6.2.0, semver@^6.3.0:
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
   integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
 
-serialize-javascript@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-4.0.0.tgz#b525e1238489a5ecfc42afacc3fe99e666f4b1aa"
-  integrity sha512-GaNA54380uFefWghODBWEGisLZFj00nS5ACs6yHa9nLqlLpVLO8ChDGeKRjZnV4Nh4n0Qi7nhYZD/9fCPzEqkw==
-  dependencies:
-    randombytes "^2.1.0"
-
 serialize-javascript@^5.0.1:
   version "5.0.1"
   resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
@@ -9196,19 +8533,6 @@ set-value@^2.0.0, set-value@^2.0.1:
     is-plain-object "^2.0.3"
     split-string "^3.0.1"
 
-setimmediate@^1.0.4:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285"
-  integrity sha1-KQy7Iy4waULX1+qbg3Mqt4VvgoU=
-
-sha.js@^2.4.0, sha.js@^2.4.8:
-  version "2.4.11"
-  resolved "https://registry.yarnpkg.com/sha.js/-/sha.js-2.4.11.tgz#37a5cf0b81ecbc6943de109ba2960d1b26584ae7"
-  integrity sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==
-  dependencies:
-    inherits "^2.0.1"
-    safe-buffer "^5.0.1"
-
 shallow-clone@^3.0.0:
   version "3.0.1"
   resolved "https://registry.yarnpkg.com/shallow-clone/-/shallow-clone-3.0.1.tgz#8f2981ad92531f55035b01fb230769a40e02efa3"
@@ -9368,20 +8692,26 @@ sort-keys@^2.0.0:
   dependencies:
     is-plain-obj "^1.0.0"
 
-source-list-map@^2.0.0:
+source-list-map@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/source-list-map/-/source-list-map-2.0.1.tgz#3993bd873bfc48479cca9ea3a547835c7c154b34"
   integrity sha512-qnQ7gVMxGNxsiL4lEuJwe/To8UnK7fAnmbGEEH8RpLouuKbeEm0lhbQVFIrNSuB+G7tVrAlVsZgETT5nljf+Iw==
 
-source-map-loader@0.2.4:
-  version "0.2.4"
-  resolved "https://registry.yarnpkg.com/source-map-loader/-/source-map-loader-0.2.4.tgz#c18b0dc6e23bf66f6792437557c569a11e072271"
-  integrity sha512-OU6UJUty+i2JDpTItnizPrlpOIBLmQbWMuBg9q5bVtnHACqw1tn9nNwqJLbv0/00JjnJb/Ee5g5WS5vrRv7zIQ==
+source-map-js@^0.6.2:
+  version "0.6.2"
+  resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e"
+  integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug==
+
+source-map-loader@2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/source-map-loader/-/source-map-loader-2.0.1.tgz#b4fd0ae7fa7e7d3954300f383f2d6fcc230a4261"
+  integrity sha512-UzOTTQhoNPeTNzOxwFw220RSRzdGSyH4lpNyWjR7Qm34P4/N0W669YSUFdH07+YNeN75h765XLHmNsF/bm97RQ==
   dependencies:
-    async "^2.5.0"
-    loader-utils "^1.1.0"
+    abab "^2.0.5"
+    iconv-lite "^0.6.2"
+    source-map-js "^0.6.2"
 
-source-map-resolve@^0.5.0, source-map-resolve@^0.5.2:
+source-map-resolve@^0.5.0:
   version "0.5.3"
   resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a"
   integrity sha512-Htz+RnsXWk5+P2slx5Jh3Q66vhQj1Cllm0zvnaY98+NFx+Dv2CF/f5O/t8x+KaNdrdIAsruNzoh/KpialbqAnw==
@@ -9392,7 +8722,15 @@ source-map-resolve@^0.5.0, source-map-resolve@^0.5.2:
     source-map-url "^0.4.0"
     urix "^0.1.0"
 
-source-map-support@^0.5.17, source-map-support@^0.5.6, source-map-support@~0.5.12, source-map-support@~0.5.19:
+source-map-resolve@^0.6.0:
+  version "0.6.0"
+  resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.6.0.tgz#3d9df87e236b53f16d01e58150fc7711138e5ed2"
+  integrity sha512-KXBr9d/fO/bWo97NXsPIAW1bFSBOuCnjbNTBMO7N59hsv5i9yzRDfcYwwt0l04+VqnKC+EwzvJZIP/qkuMgR/w==
+  dependencies:
+    atob "^2.1.2"
+    decode-uri-component "^0.2.0"
+
+source-map-support@^0.5.17, source-map-support@^0.5.6, source-map-support@~0.5.19:
   version "0.5.19"
   resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
   integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
@@ -9410,7 +8748,7 @@ source-map@^0.5.0, source-map@^0.5.1, source-map@^0.5.6:
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"
   integrity sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=
 
-source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.0, source-map@~0.6.1:
+source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.1:
   version "0.6.1"
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
   integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
@@ -9511,13 +8849,6 @@ ssri@^6.0.0, ssri@^6.0.1:
   dependencies:
     figgy-pudding "^3.5.1"
 
-ssri@^8.0.1:
-  version "8.0.1"
-  resolved "https://registry.yarnpkg.com/ssri/-/ssri-8.0.1.tgz#638e4e439e2ffbd2cd289776d5ca457c4f51a2af"
-  integrity sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==
-  dependencies:
-    minipass "^3.1.1"
-
 stack-trace@0.0.10:
   version "0.0.10"
   resolved "https://registry.yarnpkg.com/stack-trace/-/stack-trace-0.0.10.tgz#547c70b347e8d32b4e108ea1a2a159e5fdde19c0"
@@ -9555,14 +8886,6 @@ stealthy-require@^1.1.1:
   resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
   integrity sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=
 
-stream-browserify@^2.0.1:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.2.tgz#87521d38a44aa7ee91ce1cd2a47df0cb49dd660b"
-  integrity sha512-nX6hmklHs/gr2FuxYDltq8fJA1GDlxKQCz8O/IM4atRqBH8OORmBNgfvW5gG10GT/qQ9u0CzIvr2X5Pkt6ntqg==
-  dependencies:
-    inherits "~2.0.1"
-    readable-stream "^2.0.2"
-
 stream-each@^1.1.0:
   version "1.2.3"
   resolved "https://registry.yarnpkg.com/stream-each/-/stream-each-1.2.3.tgz#ebe27a0c389b04fbcc233642952e10731afa9bae"
@@ -9576,17 +8899,6 @@ stream-exhaust@^1.0.1:
   resolved "https://registry.yarnpkg.com/stream-exhaust/-/stream-exhaust-1.0.2.tgz#acdac8da59ef2bc1e17a2c0ccf6c320d120e555d"
   integrity sha512-b/qaq/GlBK5xaq1yrK9/zFcyRSTNxmcZwFLGSTG0mXgZl/4Z6GgiyYOXOvY7N3eEvFRAG1bkDRz5EPGSvPYQlw==
 
-stream-http@^2.7.2:
-  version "2.8.3"
-  resolved "https://registry.yarnpkg.com/stream-http/-/stream-http-2.8.3.tgz#b2d242469288a5a27ec4fe8933acf623de6514fc"
-  integrity sha512-+TSkfINHDo4J+ZobQLWiMouQYB+UVYFttRA94FpEzzJ7ZdqcL4uUUQ7WkdkI4DSozGmgBUE/a47L+38PenXhUw==
-  dependencies:
-    builtin-status-codes "^3.0.0"
-    inherits "^2.0.1"
-    readable-stream "^2.3.6"
-    to-arraybuffer "^1.0.0"
-    xtend "^4.0.0"
-
 stream-shift@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.1.tgz#d7088281559ab2778424279b0877da3c392d5a3d"
@@ -9665,7 +8977,7 @@ string.prototype.trimstart@^1.0.4:
     call-bind "^1.0.2"
     define-properties "^1.1.3"
 
-string_decoder@^1.0.0, string_decoder@^1.1.1:
+string_decoder@^1.1.1:
   version "1.3.0"
   resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
   integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
@@ -9707,7 +9019,7 @@ strip-ansi@^6.0.0:
   dependencies:
     ansi-regex "^5.0.0"
 
-strip-bom-string@1.X:
+strip-bom-string@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/strip-bom-string/-/strip-bom-string-1.0.0.tgz#e5211e9224369fbb81d633a2f00044dc8cedad92"
   integrity sha1-5SEekiQ2n7uB1jOi8ABE3IztrZI=
@@ -9779,6 +9091,13 @@ supports-color@^5.3.0:
   dependencies:
     has-flag "^3.0.0"
 
+supports-color@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-6.1.0.tgz#0764abc69c63d5ac842dd4867e8d025e880df8f3"
+  integrity sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==
+  dependencies:
+    has-flag "^3.0.0"
+
 supports-color@^7.0.0, supports-color@^7.1.0:
   version "7.2.0"
   resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
@@ -9837,10 +9156,10 @@ table@^6.0.4:
     slice-ansi "^4.0.0"
     string-width "^4.2.0"
 
-tapable@^1.0.0, tapable@^1.1.0:
-  version "1.1.3"
-  resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2"
-  integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA==
+tapable@^2.1.1, tapable@^2.2.0:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.0.tgz#5c373d281d9c672848213d0e037d1c4165ab426b"
+  integrity sha512-FBk4IesMV1rBxX2tfiK8RAmogtWn53puLOQlvO8XuwlgxcYbP4mVPS9Ph4aeamSyyVjOl24aYWAuc8U5kCVwMw==
 
 tar@^4.4.10, tar@^4.4.12, tar@^4.4.8:
   version "4.4.13"
@@ -9855,18 +9174,6 @@ tar@^4.4.10, tar@^4.4.12, tar@^4.4.8:
     safe-buffer "^5.1.2"
     yallist "^3.0.3"
 
-tar@^6.0.2:
-  version "6.1.0"
-  resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83"
-  integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA==
-  dependencies:
-    chownr "^2.0.0"
-    fs-minipass "^2.0.0"
-    minipass "^3.0.0"
-    minizlib "^2.1.1"
-    mkdirp "^1.0.3"
-    yallist "^4.0.0"
-
 temp-dir@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d"
@@ -9892,49 +9199,22 @@ terminal-link@^2.0.0:
     ansi-escapes "^4.2.1"
     supports-hyperlinks "^2.0.0"
 
-terser-webpack-plugin@4.2.2:
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-4.2.2.tgz#d86200c700053bba637913fe4310ba1bdeb5568e"
-  integrity sha512-3qAQpykRTD5DReLu5/cwpsg7EZFzP3Q0Hp2XUWJUw2mpq2jfgOKTZr8IZKKnNieRVVo1UauROTdhbQJZveGKtQ==
-  dependencies:
-    cacache "^15.0.5"
-    find-cache-dir "^3.3.1"
-    jest-worker "^26.3.0"
-    p-limit "^3.0.2"
-    schema-utils "^2.7.1"
+terser-webpack-plugin@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.1.1.tgz#7effadee06f7ecfa093dbbd3e9ab23f5f3ed8673"
+  integrity sha512-5XNNXZiR8YO6X6KhSGXfY0QrGrCRlSwAEjIIrlRQR4W8nP69TaJUlh3bkuac6zzgspiGPfKEHcY295MMVExl5Q==
+  dependencies:
+    jest-worker "^26.6.2"
+    p-limit "^3.1.0"
+    schema-utils "^3.0.0"
     serialize-javascript "^5.0.1"
     source-map "^0.6.1"
-    terser "^5.3.2"
-    webpack-sources "^1.4.3"
-
-terser-webpack-plugin@^1.1.0:
-  version "1.4.5"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-1.4.5.tgz#a217aefaea330e734ffacb6120ec1fa312d6040b"
-  integrity sha512-04Rfe496lN8EYruwi6oPQkG0vo8C+HT49X687FZnpPF0qMAIHONI6HEXYPKDOE8e5HjXTyKfqRd/agHtH0kOtw==
-  dependencies:
-    cacache "^12.0.2"
-    find-cache-dir "^2.1.0"
-    is-wsl "^1.1.0"
-    schema-utils "^1.0.0"
-    serialize-javascript "^4.0.0"
-    source-map "^0.6.1"
-    terser "^4.1.2"
-    webpack-sources "^1.4.0"
-    worker-farm "^1.7.0"
+    terser "^5.5.1"
 
-terser@^4.1.2:
-  version "4.8.0"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.0.tgz#63056343d7c70bb29f3af665865a46fe03a0df17"
-  integrity sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==
-  dependencies:
-    commander "^2.20.0"
-    source-map "~0.6.1"
-    source-map-support "~0.5.12"
-
-terser@^5.3.2:
-  version "5.6.1"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-5.6.1.tgz#a48eeac5300c0a09b36854bf90d9c26fb201973c"
-  integrity sha512-yv9YLFQQ+3ZqgWCUk+pvNJwgUTdlIxUk1WTN+RnaFJe2L7ipG2csPT0ra2XRm7Cs8cxN7QXmK1rFzEwYEQkzXw==
+terser@^5.5.1:
+  version "5.7.0"
+  resolved "https://registry.yarnpkg.com/terser/-/terser-5.7.0.tgz#a761eeec206bc87b605ab13029876ead938ae693"
+  integrity sha512-HP5/9hp2UaZt5fYkuhNBR8YyRcT8juw8+uFbAme53iN9hblvKnLUTKkmwJG6ocWpIKf8UK4DoeWG4ty0J6S6/g==
   dependencies:
     commander "^2.20.0"
     source-map "~0.7.2"
@@ -9991,7 +9271,7 @@ through2-filter@^3.0.0:
     through2 "~2.0.0"
     xtend "~4.0.0"
 
-through2@2.X, through2@^2.0.0, through2@^2.0.2, through2@^2.0.3, through2@~2.0.0:
+through2@^2.0.0, through2@^2.0.2, through2@^2.0.3, through2@~2.0.0:
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/through2/-/through2-2.0.5.tgz#01c1e39eb31d07cb7d03a96a70823260b23132cd"
   integrity sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==
@@ -9999,7 +9279,7 @@ through2@2.X, through2@^2.0.0, through2@^2.0.2, through2@^2.0.3, through2@~2.0.0
     readable-stream "~2.3.6"
     xtend "~4.0.1"
 
-through2@^3.0.0:
+through2@^3.0.0, through2@^3.0.1:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/through2/-/through2-3.0.2.tgz#99f88931cfc761ec7678b41d5d7336b5b6a07bf4"
   integrity sha512-enaDQ4MUyP2W6ZyT6EsMzqBPZaM/avg8iuo+l2d3QCs0J+6RaqkHV/2/lOwDTueBHeJ/2LG9lrLW3d5rWPucuQ==
@@ -10024,13 +9304,6 @@ time-stamp@^1.0.0:
   resolved "https://registry.yarnpkg.com/time-stamp/-/time-stamp-1.1.0.tgz#764a5a11af50561921b133f3b44e618687e0f5c3"
   integrity sha1-dkpaEa9QVhkhsTPztE5hhofg9cM=
 
-timers-browserify@^2.0.4:
-  version "2.0.12"
-  resolved "https://registry.yarnpkg.com/timers-browserify/-/timers-browserify-2.0.12.tgz#44a45c11fbf407f34f97bccd1577c652361b00ee"
-  integrity sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==
-  dependencies:
-    setimmediate "^1.0.4"
-
 timers-ext@^0.1.7:
   version "0.1.7"
   resolved "https://registry.yarnpkg.com/timers-ext/-/timers-ext-0.1.7.tgz#6f57ad8578e07a3fb9f91d9387d65647555e25c6"
@@ -10059,11 +9332,6 @@ to-absolute-glob@^2.0.0:
     is-absolute "^1.0.0"
     is-negated-glob "^1.0.0"
 
-to-arraybuffer@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/to-arraybuffer/-/to-arraybuffer-1.0.1.tgz#7d229b1fcc637e466ca081180836a7aabff83f43"
-  integrity sha1-fSKbH8xjfkZsoIEYCDanqr/4P0M=
-
 to-fast-properties@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e"
@@ -10204,11 +9472,6 @@ tsutils@^3.17.1:
   dependencies:
     tslib "^1.8.1"
 
-tty-browserify@0.0.0:
-  version "0.0.0"
-  resolved "https://registry.yarnpkg.com/tty-browserify/-/tty-browserify-0.0.0.tgz#a157ba402da24e9bf957f9aa69d524eed42901a6"
-  integrity sha1-oVe6QC2iTpv5V/mqadUk7tQpAaY=
-
 tunnel-agent@^0.6.0:
   version "0.6.0"
   resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
@@ -10464,14 +9727,6 @@ urix@^0.1.0:
   resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72"
   integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=
 
-url@^0.11.0:
-  version "0.11.0"
-  resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1"
-  integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
-  dependencies:
-    punycode "1.3.2"
-    querystring "0.2.0"
-
 use@^3.1.0:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
@@ -10489,20 +9744,6 @@ util-promisify@^2.1.0:
   dependencies:
     object.getownpropertydescriptors "^2.0.3"
 
-util@0.10.3:
-  version "0.10.3"
-  resolved "https://registry.yarnpkg.com/util/-/util-0.10.3.tgz#7afb1afe50805246489e3db7fe0ed379336ac0f9"
-  integrity sha1-evsa/lCAUkZInj23/g7TeTNqwPk=
-  dependencies:
-    inherits "2.0.1"
-
-util@^0.11.0:
-  version "0.11.1"
-  resolved "https://registry.yarnpkg.com/util/-/util-0.11.1.tgz#3236733720ec64bb27f6e26f421aaa2e1b588d61"
-  integrity sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==
-  dependencies:
-    inherits "2.0.3"
-
 uuid@^3.0.1, uuid@^3.3.2:
   version "3.4.0"
   resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee"
@@ -10618,11 +9859,6 @@ vinyl@2.x, vinyl@^2.0.0, vinyl@^2.1.0:
     remove-trailing-separator "^1.0.1"
     replace-ext "^1.0.0"
 
-vm-browserify@^1.0.1:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/vm-browserify/-/vm-browserify-1.1.2.tgz#78641c488b8e6ca91a75f511e7a3b32a86e5dda0"
-  integrity sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==
-
 vscode-textmate@^5.2.0:
   version "5.4.0"
   resolved "https://registry.yarnpkg.com/vscode-textmate/-/vscode-textmate-5.4.0.tgz#4b25ffc1f14ac3a90faf9a388c67a01d24257cd7"
@@ -10649,23 +9885,13 @@ walker@^1.0.7, walker@~1.0.5:
   dependencies:
     makeerror "1.0.x"
 
-watchpack-chokidar2@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/watchpack-chokidar2/-/watchpack-chokidar2-2.0.1.tgz#38500072ee6ece66f3769936950ea1771be1c957"
-  integrity sha512-nCFfBIPKr5Sh61s4LPpy1Wtfi0HE8isJ3d2Yb5/Ppw2P2B/3eVSEBjKfN0fmHJSK14+31KwMKmcrzs2GM4P0Ww==
-  dependencies:
-    chokidar "^2.1.8"
-
-watchpack@^1.5.0:
-  version "1.7.5"
-  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-1.7.5.tgz#1267e6c55e0b9b5be44c2023aed5437a2c26c453"
-  integrity sha512-9P3MWk6SrKjHsGkLT2KHXdQ/9SNkyoJbabxnKOoJepsvJjJG8uYTR3yTPxPQvNDI3w4Nz1xnE0TLHK4RIVe/MQ==
+watchpack@^2.0.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.1.1.tgz#e99630550fca07df9f90a06056987baa40a689c7"
+  integrity sha512-Oo7LXCmc1eE1AjyuSBmtC3+Wy4HcV8PxWh2kP6fOl8yTlNS7r0K9l1ao2lrrUza7V39Y3D/BbJgY8VeSlc5JKw==
   dependencies:
+    glob-to-regexp "^0.4.1"
     graceful-fs "^4.1.2"
-    neo-async "^2.5.0"
-  optionalDependencies:
-    chokidar "^3.4.1"
-    watchpack-chokidar2 "^2.0.1"
 
 wcwidth@^1.0.0:
   version "1.0.1"
@@ -10699,43 +9925,42 @@ webidl-conversions@^6.1.0:
   resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-6.1.0.tgz#9111b4d7ea80acd40f5270d666621afa78b69514"
   integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
 
-webpack-sources@^1.3.0, webpack-sources@^1.4.0, webpack-sources@^1.4.3:
-  version "1.4.3"
-  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-1.4.3.tgz#eedd8ec0b928fbf1cbfe994e22d2d890f330a933"
-  integrity sha512-lgTS3Xhv1lCOKo7SA5TjKXMjpSM4sBjNV5+q2bqesbSPs5FjGmU6jjtBSkX9b4qW87vDIsCIlUPOEhbZrMdjeQ==
+webpack-sources@^2.1.1:
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-2.2.0.tgz#058926f39e3d443193b6c31547229806ffd02bac"
+  integrity sha512-bQsA24JLwcnWGArOKUxYKhX3Mz/nK1Xf6hxullKERyktjNMC4x8koOeaDNTA2fEJ09BdWLbM/iTW0ithREUP0w==
   dependencies:
-    source-list-map "^2.0.0"
-    source-map "~0.6.1"
+    source-list-map "^2.0.1"
+    source-map "^0.6.1"
 
-webpack@4.29.0:
-  version "4.29.0"
-  resolved "https://registry.yarnpkg.com/webpack/-/webpack-4.29.0.tgz#f2cfef83f7ae404ba889ff5d43efd285ca26e750"
-  integrity sha512-pxdGG0keDBtamE1mNvT5zyBdx+7wkh6mh7uzMOo/uRQ/fhsdj5FXkh/j5mapzs060forql1oXqXN9HJGju+y7w==
-  dependencies:
-    "@webassemblyjs/ast" "1.7.11"
-    "@webassemblyjs/helper-module-context" "1.7.11"
-    "@webassemblyjs/wasm-edit" "1.7.11"
-    "@webassemblyjs/wasm-parser" "1.7.11"
-    acorn "^6.0.5"
-    acorn-dynamic-import "^4.0.0"
-    ajv "^6.1.0"
-    ajv-keywords "^3.1.0"
-    chrome-trace-event "^1.0.0"
-    enhanced-resolve "^4.1.0"
-    eslint-scope "^4.0.0"
+webpack@5.36.2:
+  version "5.36.2"
+  resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.36.2.tgz#6ef1fb2453ad52faa61e78d486d353d07cca8a0f"
+  integrity sha512-XJumVnnGoH2dV+Pk1VwgY4YT6AiMKpVoudUFCNOXMIVrEKPUgEwdIfWPjIuGLESAiS8EdIHX5+TiJz/5JccmRg==
+  dependencies:
+    "@types/eslint-scope" "^3.7.0"
+    "@types/estree" "^0.0.47"
+    "@webassemblyjs/ast" "1.11.0"
+    "@webassemblyjs/wasm-edit" "1.11.0"
+    "@webassemblyjs/wasm-parser" "1.11.0"
+    acorn "^8.2.1"
+    browserslist "^4.14.5"
+    chrome-trace-event "^1.0.2"
+    enhanced-resolve "^5.8.0"
+    es-module-lexer "^0.4.0"
+    eslint-scope "^5.1.1"
+    events "^3.2.0"
+    glob-to-regexp "^0.4.1"
+    graceful-fs "^4.2.4"
     json-parse-better-errors "^1.0.2"
-    loader-runner "^2.3.0"
-    loader-utils "^1.1.0"
-    memory-fs "~0.4.1"
-    micromatch "^3.1.8"
-    mkdirp "~0.5.0"
-    neo-async "^2.5.0"
-    node-libs-browser "^2.0.0"
-    schema-utils "^0.4.4"
-    tapable "^1.1.0"
-    terser-webpack-plugin "^1.1.0"
-    watchpack "^1.5.0"
-    webpack-sources "^1.3.0"
+    loader-runner "^4.2.0"
+    mime-types "^2.1.27"
+    neo-async "^2.6.2"
+    schema-utils "^3.0.0"
+    tapable "^2.1.1"
+    terser-webpack-plugin "^5.1.1"
+    watchpack "^2.0.0"
+    webpack-sources "^2.1.1"
 
 whatwg-encoding@^1.0.5:
   version "1.0.5"
@@ -10834,13 +10059,6 @@ wordwrapjs@^4.0.0:
     reduce-flatten "^2.0.0"
     typical "^5.2.0"
 
-worker-farm@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/worker-farm/-/worker-farm-1.7.0.tgz#26a94c5391bbca926152002f69b84a4bf772e5a8"
-  integrity sha512-rvw3QTZc8lAxyVrqcSGVm5yP/IJ2UcB3U0graE3LCFoZ0Yn2x4EoVSqJKdB/T5M+FLcRPjz4TDacRf3OCfNUzw==
-  dependencies:
-    errno "~0.1.7"
-
 wrap-ansi@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-2.1.0.tgz#d8fc3d284dd05794fe84973caecdd1cf824fdd85"
@@ -10951,7 +10169,7 @@ xmlchars@^2.2.0:
   resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb"
   integrity sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==
 
-xtend@^4.0.0, xtend@~4.0.0, xtend@~4.0.1:
+xtend@~4.0.0, xtend@~4.0.1:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
   integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==

From f0399316fa4d10a431460e0eba513ca2210cc8b7 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 16 May 2021 10:24:44 +0900
Subject: [PATCH 254/719] ARROW-12704: [JS] Support and use optional chaining

Optional chaining makes the code more concise and easier to reason about.

Needs a fork of esm because of https://github.com/standard-things/esm/issues/866.

Closes #10278 from domoritz/modern-js

Lead-authored-by: Dominik Moritz <domoritz@gmail.com>
Co-authored-by: p42-ai[bot] <72252241+p42-ai[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/bin/integration.js          |  8 ++---
 js/gulp/closure-task.js        |  2 +-
 js/gulp/minify-task.js         |  3 +-
 js/gulp/test-task.js           |  1 -
 js/gulp/util.js                | 10 ++-----
 js/package.json                |  4 +--
 js/src/bin/arrow2csv.ts        |  4 +--
 js/src/io/node/iterable.ts     |  8 ++---
 js/src/io/node/reader.ts       |  6 ++--
 js/src/io/node/writer.ts       |  6 ++--
 js/src/io/whatwg/iterable.ts   | 12 ++++----
 js/src/ipc/message.ts          | 10 +++----
 js/src/ipc/reader.ts           |  8 ++---
 js/src/table.ts                |  2 +-
 js/src/type.ts                 | 36 +++++++++++------------
 js/src/util/args.ts            |  2 +-
 js/src/util/buffer.ts          |  2 +-
 js/src/visitor/set.ts          |  8 ++---
 js/test/Arrow.ts               |  4 +--
 js/test/unit/builders/utils.ts |  2 +-
 js/yarn.lock                   | 53 +++++++++++++++++-----------------
 21 files changed, 92 insertions(+), 99 deletions(-)

diff --git a/js/bin/integration.js b/js/bin/integration.js
index 2e5f16bdf80..c357c128aa1 100755
--- a/js/bin/integration.js
+++ b/js/bin/integration.js
@@ -63,7 +63,7 @@ const exists = async (p) => {
     }
 })()
 .then((x) => +x || 0, (e) => {
-    e && process.stderr.write(`${e && e.stack || e}\n`);
+    e && process.stderr.write(`${e?.stack || e}\n`);
     return process.exitCode || 1;
 }).then((code) => process.exit(code));
 
@@ -141,7 +141,7 @@ function validateReaderIntegration(jsonData, arrowBuffer) {
         for (const [jsonRecordBatch, binaryRecordBatch] of zip(jsonReader, binaryReader)) {
             compareTableIsh(jsonRecordBatch, binaryRecordBatch);
         }
-    } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+    } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); }
     process.stdout.write(`${msg}: pass\n`);
 }
 
@@ -151,7 +151,7 @@ function validateTableFromBuffersIntegration(jsonData, arrowBuffer) {
         const jsonTable = Table.from(jsonData);
         const binaryTable = Table.from(arrowBuffer);
         compareTableIsh(jsonTable, binaryTable);
-    } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+    } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); }
     process.stdout.write(`${msg}: pass\n`);
 }
 
@@ -164,7 +164,7 @@ function validateTableToBuffersIntegration(srcFormat, arrowFormat) {
             const srcTable = Table.from(srcFormat === `json` ? jsonData : arrowBuffer);
             const dstTable = Table.from(srcTable.serialize(`binary`, arrowFormat === `stream`));
             compareTableIsh(dstTable, refTable);
-        } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); }
+        } catch (e) { throw new Error(`${msg}: fail \n ${e?.stack || e}`); }
         process.stdout.write(`${msg}: pass\n`);
     };
 }
diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index d0ecb12d874..4ab9e7416fd 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -50,7 +50,7 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
     const exportedImports = publicModulePaths(srcAbsolute).reduce((entries, publicModulePath) => [
         ...entries, {
             publicModulePath,
-            exports_: getPublicExportedNames(esmRequire(publicModulePath, { warnings: false }))
+            exports_: getPublicExportedNames(esmRequire(publicModulePath))
         }
     ], []);
 
diff --git a/js/gulp/minify-task.js b/js/gulp/minify-task.js
index ce1457f70b9..d987b72e675 100644
--- a/js/gulp/minify-task.js
+++ b/js/gulp/minify-task.js
@@ -19,7 +19,6 @@ const {
     targetDir,
     mainExport,
     UMDSourceTargets,
-    terserLanguageNames,
     shouldRunInChildProcess,
     spawnGulpCommandInChildProcess,
 } = require('./util');
@@ -64,7 +63,7 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ
             minimizer: [
                 new TerserPlugin({
                     terserOptions: {
-                        ecma: terserLanguageNames[target],
+                        ecma: target,
                         output: { comments: false },
                         compress: { unsafe: true }
                     },
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index 8c1eab1e3b8..cd01131779d 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -73,7 +73,6 @@ const ARROW_JAVA_DIR = process.env.ARROW_JAVA_DIR || path.join(ARROW_HOME, 'java
 const CPP_EXE_PATH = process.env.ARROW_CPP_EXE_PATH || path.join(ARROW_HOME, 'cpp/build/debug');
 const ARROW_INTEGRATION_DIR = process.env.ARROW_INTEGRATION_DIR || path.join(ARROW_HOME, 'integration');
 const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'arrow-json-integration-test');
-const CPP_STREAM_TO_FILE = path.join(CPP_EXE_PATH, 'arrow-stream-to-file');
 const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'arrow-file-to-stream');
 
 const testFilesDir = path.join(ARROW_HOME, 'js/test/data');
diff --git a/js/gulp/util.js b/js/gulp/util.js
index c07f5f3062f..8a72c5356d6 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -61,6 +61,8 @@ const gCCLanguageNames = {
  es2015: `ECMASCRIPT_2015`,
  es2016: `ECMASCRIPT_2016`,
  es2017: `ECMASCRIPT_2017`,
+ es2018: `ECMASCRIPT_2018`,
+ es2019: `ECMASCRIPT_2019`,
  esnext: `ECMASCRIPT_NEXT`
 };
 
@@ -72,12 +74,6 @@ const UMDSourceTargets = {
  esnext: `esnext`
 };
 
-const terserLanguageNames = {
-    es5: 5, es2015: 6,
- es2016: 7, es2017: 8,
- esnext: 8 // <--- ?
-};
-
 // ES7+ keywords Terser shouldn't mangle
 // Hardcoded here since some are from ES7+, others are
 // only defined in interfaces, so difficult to get by reflection.
@@ -211,7 +207,7 @@ module.exports = {
     mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields,
 
     knownTargets, knownModules, tasksToSkipPerTargetOrFormat,
-    gCCLanguageNames, UMDSourceTargets, terserLanguageNames,
+    gCCLanguageNames, UMDSourceTargets,
 
     taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams,
     ESKeywords, publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess
diff --git a/js/package.json b/js/package.json
index 71e181b1dca..58721f6d6bd 100644
--- a/js/package.json
+++ b/js/package.json
@@ -76,9 +76,9 @@
     "del-cli": "3.0.1",
     "eslint": "^7.24.0",
     "eslint-plugin-jest": "^24.3.5",
-    "esm": "3.2.25",
+    "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
     "glob": "7.1.4",
-    "google-closure-compiler": "20210406.0.0",
+    "google-closure-compiler": "20210505.0.0",
     "gulp": "4.0.2",
     "gulp-json-transform": "0.4.7",
     "gulp-rename": "2.0.0",
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 064b6ee5934..d5803cce02c 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -67,7 +67,7 @@ type ToStringState = {
 })()
 .then((x) => +x || 0, (err) => {
     if (err) {
-        console.error(`${err && err.stack || err}`);
+        console.error(`${err?.stack || err}`);
     }
     return process.exitCode || 1;
 }).then((code) => process.exit(code));
@@ -147,7 +147,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
         },
         transform(batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) {
 
-            batch = !(state.schema && state.schema.length) ? batch : batch.select(...state.schema);
+            batch = !state.schema?.length ? batch : batch.select(...state.schema);
 
             if (state.closed) { return cb(undefined, null); }
 
diff --git a/js/src/io/node/iterable.ts b/js/src/io/node/iterable.ts
index 8bf5ad72a0c..457bc894dad 100644
--- a/js/src/io/node/iterable.ts
+++ b/js/src/io/node/iterable.ts
@@ -54,7 +54,7 @@ class IterableReadable<T extends Uint8Array | any> extends Readable {
         const it = this._iterator;
         let fn: any;
         it && (fn = e != null && it.throw || it.return);
-        fn && fn.call(it, e);
+        fn?.call(it, e);
         cb && cb(null);
     }
     private _pull(size: number, it: SourceIterator<T>) {
@@ -66,7 +66,7 @@ class IterableReadable<T extends Uint8Array | any> extends Readable {
             }
             if (!this.push(r.value) || size <= 0) { break; }
         }
-        if ((r && r.done || !this.readable) && (this.push(null) || true)) {
+        if ((r?.done || !this.readable) && (this.push(null) || true)) {
             it.return && it.return();
         }
         return !this.readable;
@@ -94,7 +94,7 @@ class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
         const it = this._iterator;
         let fn: any;
         it && (fn = e != null && it.throw || it.return);
-        fn && fn.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
+        fn?.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
     }
     private async _pull(size: number, it: AsyncSourceIterator<T>) {
         const bm = this._bytesMode;
@@ -105,7 +105,7 @@ class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
             }
             if (!this.push(r.value) || size <= 0) { break; }
         }
-        if ((r && r.done || !this.readable) && (this.push(null) || true)) {
+        if ((r?.done || !this.readable) && (this.push(null) || true)) {
             it.return && it.return();
         }
         return !this.readable;
diff --git a/js/src/io/node/reader.ts b/js/src/io/node/reader.ts
index 498fe6a7f68..a51fb0b4036 100644
--- a/js/src/io/node/reader.ts
+++ b/js/src/io/node/reader.ts
@@ -44,12 +44,12 @@ class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> exten
     }
     _final(cb?: CB) {
         const aq = this._asyncQueue;
-        aq && aq.close();
+        aq?.close();
         cb && cb();
     }
     _write(x: any, _: string, cb: CB) {
         const aq = this._asyncQueue;
-        aq && aq.write(x);
+        aq?.write(x);
         cb && cb();
         return true;
     }
@@ -77,7 +77,7 @@ class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> exten
         while (this.readable && !(r = await reader.next()).done) {
             if (!this.push(r.value) || (size != null && --size <= 0)) { break; }
         }
-        if (!this.readable || (r && r.done && (reader.autoDestroy || (await reader.reset().open()).closed))) {
+        if (!this.readable || (r?.done && (reader.autoDestroy || (await reader.reset().open()).closed))) {
             this.push(null);
             await reader.cancel();
         }
diff --git a/js/src/io/node/writer.ts b/js/src/io/node/writer.ts
index b7e02782880..79d61b9a315 100644
--- a/js/src/io/node/writer.ts
+++ b/js/src/io/node/writer.ts
@@ -40,12 +40,12 @@ class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> exten
     }
     _final(cb?: CB) {
         const writer = this._writer;
-        writer && writer.close();
+        writer?.close();
         cb && cb();
     }
     _write(x: any, _: string, cb: CB) {
         const writer = this._writer;
-        writer && writer.write(x);
+        writer?.write(x);
         cb && cb();
         return true;
     }
@@ -68,7 +68,7 @@ class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> exten
             }
             if (!this.push(r.value) || size <= 0) { break; }
         }
-        if ((r && r.done || !this.readable)) {
+        if ((r?.done || !this.readable)) {
             this.push(null);
             await reader.cancel();
         }
diff --git a/js/src/io/whatwg/iterable.ts b/js/src/io/whatwg/iterable.ts
index b8428f6852c..ce9e97369f1 100644
--- a/js/src/io/whatwg/iterable.ts
+++ b/js/src/io/whatwg/iterable.ts
@@ -36,14 +36,14 @@ export function toDOMStream<T>(source: Iterable<T> | AsyncIterable<T>, options?:
 function iterableAsReadableDOMStream<T>(source: Iterable<T>, options?: ReadableDOMStreamOptions) {
 
     let it: SourceIterator<T> | null = null;
-    const bm = (options && options.type === 'bytes') || false;
-    const hwm = options && options.highWaterMark || (2 ** 24);
+    const bm = (options?.type === 'bytes') || false;
+    const hwm = options?.highWaterMark || (2 ** 24);
 
     return new ReadableStream<T>({
         ...options as any,
         start(controller) { next(controller, it || (it = source[Symbol.iterator]() as SourceIterator<T>)); },
         pull(controller) { it ? (next(controller, it)) : controller.close(); },
-        cancel() { (it && (it.return && it.return()) || true) && (it = null); }
+        cancel() { (it?.return && it.return() || true) && (it = null); }
     }, { highWaterMark: bm ? hwm : undefined, ...options });
 
     function next(controller: ReadableStreamDefaultController<T>, it: SourceIterator<T>) {
@@ -66,14 +66,14 @@ function iterableAsReadableDOMStream<T>(source: Iterable<T>, options?: ReadableD
 function asyncIterableAsReadableDOMStream<T>(source: AsyncIterable<T>, options?: ReadableDOMStreamOptions) {
 
     let it: AsyncSourceIterator<T> | null = null;
-    const bm = (options && options.type === 'bytes') || false;
-    const hwm = options && options.highWaterMark || (2 ** 24);
+    const bm = (options?.type === 'bytes') || false;
+    const hwm = options?.highWaterMark || (2 ** 24);
 
     return new ReadableStream<T>({
         ...options as any,
         async start(controller) { await next(controller, it || (it = source[Symbol.asyncIterator]() as AsyncSourceIterator<T>)); },
         async pull(controller) { it ? (await next(controller, it)) : controller.close(); },
-        async cancel() { (it && (it.return && await it.return()) || true) && (it = null); },
+        async cancel() { (it?.return && await it.return() || true) && (it = null); },
     }, { highWaterMark: bm ? hwm : undefined, ...options });
 
     async function next(controller: ReadableStreamDefaultController<T>, it: AsyncSourceIterator<T>) {
diff --git a/js/src/ipc/message.ts b/js/src/ipc/message.ts
index 47136b7a6c0..34c0aa3082a 100644
--- a/js/src/ipc/message.ts
+++ b/js/src/ipc/message.ts
@@ -72,7 +72,7 @@ export class MessageReader implements IterableIterator<Message> {
     public readSchema(throwIfNull = false) {
         const type = MessageHeader.Schema;
         const message = this.readMessage(type);
-        const schema = message && message.header();
+        const schema = message?.header();
         if (throwIfNull && !schema) {
             throw new Error(nullMessage(type));
         }
@@ -81,7 +81,7 @@ export class MessageReader implements IterableIterator<Message> {
     protected readMetadataLength(): IteratorResult<number> {
         const buf = this.source.read(PADDING);
         const bb = buf && new ByteBuffer(buf);
-        const len = bb && bb.readInt32(0) || 0;
+        const len = bb?.readInt32(0) || 0;
         return { done: len === 0, value: len };
     }
     protected readMetadata(metadataLength: number): IteratorResult<Message> {
@@ -141,7 +141,7 @@ export class AsyncMessageReader implements AsyncIterableIterator<Message> {
     public async readSchema(throwIfNull = false) {
         const type = MessageHeader.Schema;
         const message = await this.readMessage(type);
-        const schema = message && message.header();
+        const schema = message?.header();
         if (throwIfNull && !schema) {
             throw new Error(nullMessage(type));
         }
@@ -150,7 +150,7 @@ export class AsyncMessageReader implements AsyncIterableIterator<Message> {
     protected async readMetadataLength(): Promise<IteratorResult<number>> {
         const buf = await this.source.read(PADDING);
         const bb = buf && new ByteBuffer(buf);
-        const len = bb && bb.readInt32(0) || 0;
+        const len = bb?.readInt32(0) || 0;
         return { done: len === 0, value: len };
     }
     protected async readMetadata(metadataLength: number): Promise<IteratorResult<Message>> {
@@ -220,7 +220,7 @@ export class JSONMessageReader extends MessageReader {
     public readSchema() {
         const type = MessageHeader.Schema;
         const message = this.readMessage(type);
-        const schema = message && message.header();
+        const schema = message?.header();
         if (!message || !schema) {
             throw new Error(nullMessage(type));
         }
diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts
index 1ed634c12ec..a150ac1bb3c 100644
--- a/js/src/ipc/reader.ts
+++ b/js/src/ipc/reader.ts
@@ -547,7 +547,7 @@ class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> ext
         const block = this._footer && this._footer.getRecordBatch(index);
         if (block && this._handle.seek(block.offset)) {
             const message = this._reader.readMessage(MessageHeader.RecordBatch);
-            if (message && message.isRecordBatch()) {
+            if (message?.isRecordBatch()) {
                 const header = message.header();
                 const buffer = this._reader.readMessageBody(message.bodyLength);
                 const recordBatch = this._loadRecordBatch(header, buffer);
@@ -560,7 +560,7 @@ class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> ext
         const block = this._footer && this._footer.getDictionaryBatch(index);
         if (block && this._handle.seek(block.offset)) {
             const message = this._reader.readMessage(MessageHeader.DictionaryBatch);
-            if (message && message.isDictionaryBatch()) {
+            if (message?.isDictionaryBatch()) {
                 const header = message.header();
                 const buffer = this._reader.readMessageBody(message.bodyLength);
                 const vector = this._loadDictionaryBatch(header, buffer);
@@ -621,7 +621,7 @@ class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any
         const block = this._footer && this._footer.getRecordBatch(index);
         if (block && (await this._handle.seek(block.offset))) {
             const message = await this._reader.readMessage(MessageHeader.RecordBatch);
-            if (message && message.isRecordBatch()) {
+            if (message?.isRecordBatch()) {
                 const header = message.header();
                 const buffer = await this._reader.readMessageBody(message.bodyLength);
                 const recordBatch = this._loadRecordBatch(header, buffer);
@@ -634,7 +634,7 @@ class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any
         const block = this._footer && this._footer.getDictionaryBatch(index);
         if (block && (await this._handle.seek(block.offset))) {
             const message = await this._reader.readMessage(MessageHeader.DictionaryBatch);
-            if (message && message.isDictionaryBatch()) {
+            if (message?.isDictionaryBatch()) {
                 const header = message.header();
                 const buffer = await this._reader.readMessageBody(message.bodyLength);
                 const vector = this._loadDictionaryBatch(header, buffer);
diff --git a/js/src/table.ts b/js/src/table.ts
index e94e1adf195..0a8d4459e47 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -185,7 +185,7 @@ export class Table<T extends { [key: string]: DataType } = any>
 
         const chunks = selectArgs<RecordBatch<T>>(RecordBatch, args);
 
-        if (!schema && !(schema = chunks[0] && chunks[0].schema)) {
+        if (!schema && !(schema = chunks[0]?.schema)) {
             throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
         }
 
diff --git a/js/src/type.ts b/js/src/type.ts
index 782b44a279c..7d5c051ad0e 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -65,24 +65,24 @@ export abstract class DataType<TType extends Type = Type, TChildren extends { [k
 
     public [Symbol.toStringTag]: string;
 
-    /** @nocollapse */ static            isNull (x: any): x is Null            { return x && x.typeId === Type.Null;            }
-    /** @nocollapse */ static             isInt (x: any): x is Int_            { return x && x.typeId === Type.Int;             }
-    /** @nocollapse */ static           isFloat (x: any): x is Float           { return x && x.typeId === Type.Float;           }
-    /** @nocollapse */ static          isBinary (x: any): x is Binary          { return x && x.typeId === Type.Binary;          }
-    /** @nocollapse */ static            isUtf8 (x: any): x is Utf8            { return x && x.typeId === Type.Utf8;            }
-    /** @nocollapse */ static            isBool (x: any): x is Bool            { return x && x.typeId === Type.Bool;            }
-    /** @nocollapse */ static         isDecimal (x: any): x is Decimal         { return x && x.typeId === Type.Decimal;         }
-    /** @nocollapse */ static            isDate (x: any): x is Date_           { return x && x.typeId === Type.Date;            }
-    /** @nocollapse */ static            isTime (x: any): x is Time_           { return x && x.typeId === Type.Time;            }
-    /** @nocollapse */ static       isTimestamp (x: any): x is Timestamp_      { return x && x.typeId === Type.Timestamp;       }
-    /** @nocollapse */ static        isInterval (x: any): x is Interval_       { return x && x.typeId === Type.Interval;        }
-    /** @nocollapse */ static            isList (x: any): x is List            { return x && x.typeId === Type.List;            }
-    /** @nocollapse */ static          isStruct (x: any): x is Struct          { return x && x.typeId === Type.Struct;          }
-    /** @nocollapse */ static           isUnion (x: any): x is Union_          { return x && x.typeId === Type.Union;           }
-    /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x && x.typeId === Type.FixedSizeBinary; }
-    /** @nocollapse */ static   isFixedSizeList (x: any): x is FixedSizeList   { return x && x.typeId === Type.FixedSizeList;   }
-    /** @nocollapse */ static             isMap (x: any): x is Map_            { return x && x.typeId === Type.Map;             }
-    /** @nocollapse */ static      isDictionary (x: any): x is Dictionary      { return x && x.typeId === Type.Dictionary;      }
+    /** @nocollapse */ static            isNull (x: any): x is Null            { return x?.typeId === Type.Null;            }
+    /** @nocollapse */ static             isInt (x: any): x is Int_            { return x?.typeId === Type.Int;             }
+    /** @nocollapse */ static           isFloat (x: any): x is Float           { return x?.typeId === Type.Float;           }
+    /** @nocollapse */ static          isBinary (x: any): x is Binary          { return x?.typeId === Type.Binary;          }
+    /** @nocollapse */ static            isUtf8 (x: any): x is Utf8            { return x?.typeId === Type.Utf8;            }
+    /** @nocollapse */ static            isBool (x: any): x is Bool            { return x?.typeId === Type.Bool;            }
+    /** @nocollapse */ static         isDecimal (x: any): x is Decimal         { return x?.typeId === Type.Decimal;         }
+    /** @nocollapse */ static            isDate (x: any): x is Date_           { return x?.typeId === Type.Date;            }
+    /** @nocollapse */ static            isTime (x: any): x is Time_           { return x?.typeId === Type.Time;            }
+    /** @nocollapse */ static       isTimestamp (x: any): x is Timestamp_      { return x?.typeId === Type.Timestamp;       }
+    /** @nocollapse */ static        isInterval (x: any): x is Interval_       { return x?.typeId === Type.Interval;        }
+    /** @nocollapse */ static            isList (x: any): x is List            { return x?.typeId === Type.List;            }
+    /** @nocollapse */ static          isStruct (x: any): x is Struct          { return x?.typeId === Type.Struct;          }
+    /** @nocollapse */ static           isUnion (x: any): x is Union_          { return x?.typeId === Type.Union;           }
+    /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x?.typeId === Type.FixedSizeBinary; }
+    /** @nocollapse */ static   isFixedSizeList (x: any): x is FixedSizeList   { return x?.typeId === Type.FixedSizeList;   }
+    /** @nocollapse */ static             isMap (x: any): x is Map_            { return x?.typeId === Type.Map;             }
+    /** @nocollapse */ static      isDictionary (x: any): x is Dictionary      { return x?.typeId === Type.Dictionary;      }
 
     public get typeId(): TType { return <any> Type.NONE; }
 
diff --git a/js/src/util/args.ts b/js/src/util/args.ts
index ff56f775dd3..25f571999ff 100644
--- a/js/src/util/args.ts
+++ b/js/src/util/args.ts
@@ -186,7 +186,7 @@ function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], re
             ({ [idx]: field = idx } = keys);
             if (val instanceof DataType && (values[++valueIndex] = val)) {
                 fields[++fieldIndex] = Field.new(field, val as DataType, true) as Field<T[keyof T]>;
-            } else if (val && val.type && (values[++valueIndex] = val)) {
+            } else if (val?.type && (values[++valueIndex] = val)) {
                 val instanceof Data && (values[valueIndex] = val = Vector.new(val) as Vector);
                 fields[++fieldIndex] = Field.new(field, val.type, true) as Field<T[keyof T]>;
             }
diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts
index dde131eb5e2..86dae86c6b3 100644
--- a/js/src/util/buffer.ts
+++ b/js/src/util/buffer.ts
@@ -177,7 +177,7 @@ export async function* toArrayBufferViewAsyncIterator<T extends TypedArray>(Arra
         yield* pump((function*(it: Iterator<any>) {
             let r: IteratorResult<any> = <any> null;
             do {
-                r = it.next(yield r && r.value);
+                r = it.next(yield r?.value);
             } while (!r.done);
         })(source[Symbol.iterator]()));
     };
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index 0c3cea3e619..77985e5be71 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -239,10 +239,10 @@ const setMap = <T extends Map_>(vector: VectorType<T>, index: number, value: T['
     }
 };
 
-/** @ignore */ const _setStructArrayValue = (o: number, v: any[]) => (c: Vector | null, _: Field, i: number) => c && c.set(o, v[i]);
-/** @ignore */ const _setStructVectorValue = (o: number, v: Vector) => (c: Vector | null, _: Field, i: number) => c && c.set(o, v.get(i));
-/** @ignore */ const _setStructMapValue = (o: number, v: Map<string, any>) => (c: Vector | null, f: Field, _: number) => c && c.set(o, v.get(f.name));
-/** @ignore */ const _setStructObjectValue = (o: number, v: { [key: string]: any }) => (c: Vector | null, f: Field, _: number) => c && c.set(o, v[f.name]);
+/** @ignore */ const _setStructArrayValue = (o: number, v: any[]) => (c: Vector | null, _: Field, i: number) => c?.set(o, v[i]);
+/** @ignore */ const _setStructVectorValue = (o: number, v: Vector) => (c: Vector | null, _: Field, i: number) => c?.set(o, v.get(i));
+/** @ignore */ const _setStructMapValue = (o: number, v: Map<string, any>) => (c: Vector | null, f: Field, _: number) => c?.set(o, v.get(f.name));
+/** @ignore */ const _setStructObjectValue = (o: number, v: { [key: string]: any }) => (c: Vector | null, f: Field, _: number) => c?.set(o, v[f.name]);
 /** @ignore */
 const setStruct = <T extends Struct>(vector: VectorType<T>, index: number, value: T['TValue']) => {
 
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index f70cb29db05..8fe53b019d2 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -29,14 +29,14 @@ Object.defineProperty(Object, Symbol.hasInstance, {
     writable: true,
     configurable: true,
     value(inst: any) {
-        return inst && inst.constructor && inst.constructor.name === 'Object';
+        return inst?.constructor && inst.constructor.name === 'Object';
     }
 });
 Object.defineProperty(ArrayBuffer, Symbol.hasInstance, {
     writable: true,
     configurable: true,
     value(inst: any) {
-        return inst && inst.constructor && inst.constructor.name === 'ArrayBuffer';
+        return inst?.constructor && inst.constructor.name === 'ArrayBuffer';
     }
 });
 
diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts
index 975828c1075..7ec8ca714ab 100644
--- a/js/test/unit/builders/utils.ts
+++ b/js/test/unit/builders/utils.ts
@@ -193,7 +193,7 @@ export function validateVector<T extends DataType>(vals: (T['TValue'] | null)[],
         // debugger;
         // vec.get(i);
         throw new Error([
-            `${(vec as any).VectorName}[${i}]: ${e && e.stack || e}`,
+            `${(vec as any).VectorName}[${i}]: ${e?.stack || e}`,
             `nulls: [${nullVals.join(', ')}]`,
             `values: [${vals.join(', ')}]`,
         ].join('\n'));
diff --git a/js/yarn.lock b/js/yarn.lock
index cd8bef320a9..6399e592561 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -3930,10 +3930,9 @@ eslint@^7.24.0:
     text-table "^0.2.0"
     v8-compile-cache "^2.0.3"
 
-esm@3.2.25:
+"esm@https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz":
   version "3.2.25"
-  resolved "https://registry.yarnpkg.com/esm/-/esm-3.2.25.tgz#342c18c29d56157688ba5ce31f8431fbb795cc10"
-  integrity sha512-U1suiZ2oDVWv4zPO56S0NcR5QriEahGtdN2OR6FiOG4WJvcjBVFB0qI4+eKoWFH483PKGuLuu6V8Z4T5g63UVA==
+  resolved "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz#c463cfa4e14aceea6b7cd7e669ef90de072ea60a"
 
 espree@^7.3.0, espree@^7.3.1:
   version "7.3.1"
@@ -4770,40 +4769,40 @@ glogg@^1.0.0:
   dependencies:
     sparkles "^1.0.0"
 
-google-closure-compiler-java@^20210406.0.0:
-  version "20210406.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20210406.0.0.tgz#f2be1f825e1c08027698e24dc3ad7c762a7b838d"
-  integrity sha512-hVOoFiIenZuicZSLqi4sNdwzWeg9hRi3acpvOy6WPwKQUuUNkSXNtUiiXpKgCY5puDs49onhV7FzAHoQ/908lg==
+google-closure-compiler-java@^20210505.0.0:
+  version "20210505.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20210505.0.0.tgz#f1acdedbff960ad9c81a6b39d3d02876e33b2141"
+  integrity sha512-h+DfQAaaCLFmmtasOS8eyh0M4D+JInTJfEP4byV5R1cnMninpGGLHOG3PNgLLzkXkIO/fu4ILEcVzoGmgJEoMA==
 
-google-closure-compiler-linux@^20210406.0.0:
-  version "20210406.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20210406.0.0.tgz#e31ecb9ae6cdfb4e1b02052d8f24ac457e8fd435"
-  integrity sha512-KzE39AD3OOZMkR1TtE3nwPBhB3eEJwH8w4Jm3vx2k4veFhryWASFAnDMfHcASzlzjk05tPjecuFtGrHhVafL+w==
+google-closure-compiler-linux@^20210505.0.0:
+  version "20210505.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20210505.0.0.tgz#87ceaa5750d447725b2dd556b01a2e36e5cbf9cd"
+  integrity sha512-ADN2kFfIR1NiR24kLYb4YkX4MeXDJaT5OfRQEkiuIdZMtd28oEkm80LxCGuC7ftKEixoMm3f9/OG01B4U+xsnA==
 
-google-closure-compiler-osx@^20210406.0.0:
-  version "20210406.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20210406.0.0.tgz#17049155b2eba6a74b383d6b013d929a8a3a6d6a"
-  integrity sha512-Kph0hewevDC2T3uEQSRFoZAI5oE18ceyx5gUy93B0fd8cbL7vUCVjazBcHKOUiQ/Opq2CT96V0moCSFEhq8d1w==
+google-closure-compiler-osx@^20210505.0.0:
+  version "20210505.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20210505.0.0.tgz#1c31cd460cb6b8357a94add25b3500436c69ce26"
+  integrity sha512-JTwdh23aD2pwRU4QZjujxp/+rGfhex3utNWEdUDRMNpUGstUK7XPCDG8jNBtUpyuRiXFnpZa90qButqRgotQBA==
 
-google-closure-compiler-windows@^20210406.0.0:
-  version "20210406.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20210406.0.0.tgz#030a8f4bc7d9aa3fbcfe6028a8b8e23bc0cab755"
-  integrity sha512-IlFWn3vv8SLCRcxK6MSfRgnU4we7zy+s6OczmEmH4wymkpRM6aydAaD4Vxz68i00Om0hkT5l2oO3cFq5FiQBLg==
+google-closure-compiler-windows@^20210505.0.0:
+  version "20210505.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20210505.0.0.tgz#45559acde54d1b85973c9984253c15bfa98b5cfb"
+  integrity sha512-bKTbg/f4ak72OggEMaH/7oExqOO9dS+TxwGhoovYOt/YaVR/8MDfGdxsOhqoiboiFwYysTPz8bwINjYQK6AwnA==
 
-google-closure-compiler@20210406.0.0:
-  version "20210406.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20210406.0.0.tgz#954be1b1347ccfab00dbcaed5c6296133e710e0c"
-  integrity sha512-qaQqEjIneTK5OXYfZmGnWwy5S1nYLeTTphpbc7LzhsvEq4s2xapKCi6fC8VsbCHZvgq8z5VNomMJU97ErRCyGQ==
+google-closure-compiler@20210505.0.0:
+  version "20210505.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20210505.0.0.tgz#8a321ac49c9d3f0df30d7e15c2adbb2b11c5dd89"
+  integrity sha512-moeYaj4S6YTdOOvjv1ZLdUld/2YXw7q1GqUUHJJd+rE/uViyesozg8yKQZWcB3tvurhb+qEvFFet8CYoeaQHng==
   dependencies:
     chalk "2.x"
-    google-closure-compiler-java "^20210406.0.0"
+    google-closure-compiler-java "^20210505.0.0"
     minimist "1.x"
     vinyl "2.x"
     vinyl-sourcemaps-apply "^0.2.0"
   optionalDependencies:
-    google-closure-compiler-linux "^20210406.0.0"
-    google-closure-compiler-osx "^20210406.0.0"
-    google-closure-compiler-windows "^20210406.0.0"
+    google-closure-compiler-linux "^20210505.0.0"
+    google-closure-compiler-osx "^20210505.0.0"
+    google-closure-compiler-windows "^20210505.0.0"
 
 graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.4:
   version "4.2.6"

From 9067829cf0d5da3184f37844813732b5074add91 Mon Sep 17 00:00:00 2001
From: Joris Peeters <joris.mg.peeters@gmail.com>
Date: Mon, 17 May 2021 11:27:29 +0800
Subject: [PATCH 255/719] ARROW-12679: [Java] JDBC->Arrow for NOT NULL columns.

Prior to this patch, the VectorSchemaRoot's schema coming out of the JDBC adaptor always has all columns as nullable, even when the SQL column is NOT NULL. Even if this has no immediate impact on performance, it throws away information that can be useful downstream.

The fix just replaces the `true` (for nullable) by the actual `isNullableColumn` information.

Closes #10285 from jmgpeeters/ARROW-12679

Authored-by: Joris Peeters <joris.mg.peeters@gmail.com>
Signed-off-by: liyafan82 <fan_li_ya@foxmail.com>
---
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java  |  9 +-
 .../jdbc/h2/JdbcToArrowCharSetTest.java       |  2 +-
 .../jdbc/h2/JdbcToArrowDataTypesTest.java     |  2 +-
 .../adapter/jdbc/h2/JdbcToArrowNullTest.java  |  2 +-
 .../h2/JdbcToArrowOptionalColumnsTest.java    | 91 +++++++++++++++++++
 .../adapter/jdbc/h2/JdbcToArrowTest.java      |  2 +-
 .../jdbc/h2/JdbcToArrowTimeZoneTest.java      |  2 +-
 .../h2/JdbcToArrowVectorIteratorTest.java     |  2 +-
 .../resources/h2/test1_null_and_notnull.yml   | 26 ++++++
 9 files changed, 130 insertions(+), 8 deletions(-)
 create mode 100644 java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
 create mode 100644 java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 8361c11b5ec..9f773e8f664 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -155,7 +155,8 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
 
       final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(new JdbcFieldInfo(rsmd, i));
       if (arrowType != null) {
-        final FieldType fieldType = new FieldType(true, arrowType, /* dictionary encoding */ null, metadata);
+        final FieldType fieldType = new FieldType(
+                isColumnNullable(rsmd, i), arrowType, /* dictionary encoding */ null, metadata);
 
         List<Field> children = null;
         if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) {
@@ -219,7 +220,11 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
   }
 
   static boolean isColumnNullable(ResultSet resultSet, int index) throws SQLException {
-    int nullableValue = resultSet.getMetaData().isNullable(index);
+    return isColumnNullable(resultSet.getMetaData(), index);
+  }
+
+  static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index) throws SQLException {
+    int nullableValue = resultSetMetadata.isNullable(index);
     return nullableValue == ResultSetMetaData.columnNullable ||
         nullableValue == ResultSetMetaData.columnNullableUnknown;
   }
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
index f4c78d09588..b2ac349b596 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
@@ -62,7 +62,7 @@ public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table oject
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index cafb7a050d7..2be6a83c342 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -117,7 +117,7 @@ public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
index 57fcf566d7d..fd373091f93 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
@@ -94,7 +94,7 @@ public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
new file mode 100644
index 00000000000..4ab9017e247
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality for
+ * (non-)optional columns, in particular with regard to the ensuing VectorSchemaRoot's schema.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest {
+  private static final String[] testFiles = {
+    "h2/test1_null_and_notnull.yml"
+  };
+
+  /**
+   * Constructor which populates the table object for each test iteration.
+   *
+   * @param table Table object
+   */
+  public JdbcToArrowOptionalColumnsTest(Table table) {
+    this.table = table;
+  }
+
+  /**
+   * Get the test data as a collection of Table objects for each test iteration.
+   *
+   * @return Collection of Table objects
+   * @throws SQLException           on error
+   * @throws ClassNotFoundException on error
+   * @throws IOException            on error
+   */
+  @Parameterized.Parameters
+  public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+    return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class));
+  }
+
+  /**
+   * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns.
+   */
+  @Test
+  public void testJdbcToArrowValues() throws SQLException, IOException {
+    testDataSets(JdbcToArrow.sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+  }
+
+  /**
+   * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes
+   * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable.
+   *
+   * @param root VectorSchemaRoot for test
+   */
+  public void testDataSets(VectorSchemaRoot root) {
+    JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+    assertTrue(root.getSchema().getFields().get(0).isNullable());
+    assertFalse(root.getSchema().getFields().get(1).isNullable());
+  }
+
+}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 76bb56da58c..85c42c0f40d 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -109,7 +109,7 @@ public class JdbcToArrowTest extends AbstractJdbcToArrowTest {
   private static final String[] testFiles = {"h2/test1_all_datatypes_h2.yml"};
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
index ef2b406d120..7062fa6aec1 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
@@ -78,7 +78,7 @@ public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest {
   };
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
index 36a23701580..edd1952be05 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
@@ -74,7 +74,7 @@
 public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest {
 
   /**
-   * Constructor which populate table object for each test iteration.
+   * Constructor which populates the table object for each test iteration.
    *
    * @param table Table object
    */
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml b/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml
new file mode 100644
index 00000000000..20e9e5e5ecc
--- /dev/null
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml
@@ -0,0 +1,26 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'nullnotnull'
+
+create: 'CREATE TABLE table1 (int_field_null INT, int_field_notnull INT NOT NULL);'
+
+data:
+  - 'INSERT INTO table1 VALUES (0, 0);'
+  - 'INSERT INTO table1 VALUES (1, 1);'
+
+rowCount: '2'
+
+query: 'select int_field_null, int_field_notnull from table1;'
+
+drop: 'DROP table table1;'
\ No newline at end of file

From 497c59604b46c8744107c0538234ffb3523998ef Mon Sep 17 00:00:00 2001
From: Shuai Zhang <zhangshuai.ustc@gmail.com>
Date: Mon, 17 May 2021 08:01:06 -0400
Subject: [PATCH 256/719] ARROW-12773: [Docs] Clarify Java support for ORC and
 Parquet via JNI bindings

See https://issues.apache.org/jira/browse/ARROW-12773 for further details.

Closes #10312 from hcoona/private/zhangshuai.ustc/fix-document-status-3rdparty-format

Authored-by: Shuai Zhang <zhangshuai.ustc@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/status.rst | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/source/status.rst b/docs/source/status.rst
index 80b21f74e36..e4e838a393c 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -221,9 +221,9 @@ Third-Party Data Formats
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
 | CSV                         | R       |         | R/W   |            |       | R/W     | R/W   |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
-| ORC                         | R/W     |         |       |            |       |         |       |
+| ORC                         | R/W     | R (2)   |       |            |       |         |       |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
-| Parquet                     | R/W     | R (2)   |       |            |       | R/W (1) |       |
+| Parquet                     | R/W     | R (3)   |       |            |       | R/W (1) |       |
 +-----------------------------+---------+---------+-------+------------+-------+---------+-------+
 
 Notes:
@@ -232,6 +232,8 @@ Notes:
 
 * *W* = Write supported
 
-* \(1) Nested read/write not supported
+* \(1) Nested read/write not supported.
 
-* \(2) Through JNI bindings to datasets.
+* \(2) Through JNI bindings. (Provided by ``org.apache.arrow.orc:arrow-orc``)
+
+* \(3) Through JNI bindings to Arrow C++ Datasets. (Provided by ``org.apache.arrow:arrow-dataset``)

From e4952e4c080506cab131bce26a92c209cd128d39 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 17 May 2021 10:09:46 -0400
Subject: [PATCH 257/719] ARROW-12769: [Python] Fix slicing array with
 "negative" length (start > stop)

When the normalized slice has a start > stop, we were creating invalid arrays with a negative length (which then errors on subsequent operations)

Closes #10341 from jorisvandenbossche/ARROW-12769

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/array.pxi           | 5 ++++-
 python/pyarrow/tests/test_array.py | 9 ++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index df0ee85eee7..3da5033ac47 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -537,7 +537,8 @@ def _normalize_slice(object arrow_obj, slice key):
         indices = np.arange(start, stop, step)
         return arrow_obj.take(indices)
     else:
-        return arrow_obj.slice(start, stop - start)
+        length = max(stop - start, 0)
+        return arrow_obj.slice(start, length)
 
 
 cdef Py_ssize_t _normalize_index(Py_ssize_t index,
@@ -1103,6 +1104,8 @@ cdef class Array(_PandasConvertible):
         if length is None:
             result = self.ap.Slice(offset)
         else:
+            if length < 0:
+                raise ValueError('Length must be non-negative')
             result = self.ap.Slice(offset, length)
 
         return pyarrow_wrap_array(result)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 54a13ba1ba4..086ed4cb160 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -413,6 +413,9 @@ def test_array_slice():
     with pytest.raises(IndexError):
         arr.slice(-1)
 
+    with pytest.raises(ValueError):
+        arr.slice(2, -1)
+
     # Test slice notation
     assert arr[2:].equals(arr.slice(2))
     assert arr[2:5].equals(arr.slice(2, 3))
@@ -421,7 +424,11 @@ def test_array_slice():
     n = len(arr)
     for start in range(-n * 2, n * 2):
         for stop in range(-n * 2, n * 2):
-            assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]
+            res = arr[start:stop]
+            res.validate()
+            expected = arr.to_pylist()[start:stop]
+            assert res.to_pylist() == expected
+            assert res.to_numpy().tolist() == expected
 
 
 def test_array_slice_negative_step():

From c783db68a105de7537047521154dfcc2bb37a2ca Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 17 May 2021 10:19:37 -0400
Subject: [PATCH 258/719] ARROW-12004: [C++] Result<detail::Empty> is annoying

Per the JIRA

`Future<>::AddCallback` callbacks receive a `Status`.
`Future<T>::AddCallback` callbacks receive a `Result<T>`
`Future<>::Then` callbacks receive nothing
`Future<T>::Then` callbacks receive `const T&`

To achieve this I had to explicitly specialize the empty `Future` but I introduced `FutureBase` to reduce the amount of duplicated code.  `detail::Empty` is still around (although it got renamed to `internal::Empty` as a side effect of moving into `functional.h`).  It could even be removed if one wanted to create a specialized `FutureImpl` but that doesn't seem to be needed at the moment.

Closes #10205 from westonpace/feature/ARROW-12004--c-result-detail-empty-is-annoying

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec/plan_test.cc     |   4 +-
 cpp/src/arrow/csv/reader.cc                 |  25 +-
 cpp/src/arrow/dataset/file_base.cc          |  10 +-
 cpp/src/arrow/dataset/scanner.cc            |  16 +-
 cpp/src/arrow/filesystem/s3fs.cc            |  14 +-
 cpp/src/arrow/io/memory_test.cc             |   4 +-
 cpp/src/arrow/testing/gtest_util.cc         |   4 +-
 cpp/src/arrow/type_fwd.h                    |   6 +-
 cpp/src/arrow/util/async_generator.h        |  44 +--
 cpp/src/arrow/util/async_generator_test.cc  |  19 +-
 cpp/src/arrow/util/functional.h             |  32 ++-
 cpp/src/arrow/util/future.cc                |   8 +-
 cpp/src/arrow/util/future.h                 | 304 ++++++++++++++++----
 cpp/src/arrow/util/future_test.cc           |  69 +++--
 cpp/src/arrow/util/thread_pool.cc           |   5 -
 cpp/src/arrow/util/thread_pool.h            |  36 +--
 cpp/src/arrow/util/thread_pool_benchmark.cc |   6 +-
 cpp/src/arrow/util/thread_pool_test.cc      |  12 +-
 cpp/src/arrow/util/type_fwd.h               |   6 +-
 19 files changed, 405 insertions(+), 219 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index d809409b28d..86f1879cbe9 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -274,9 +274,7 @@ static Result<RecordBatchGenerator> MakeSlowRecordBatchGenerator(
       std::move(gen), [](const std::shared_ptr<RecordBatch>& batch) {
         auto fut = Future<std::shared_ptr<RecordBatch>>::Make();
         SleepABitAsync().AddCallback(
-            [fut, batch](const Result<::arrow::detail::Empty>&) mutable {
-              fut.MarkFinished(batch);
-            });
+            [fut, batch](const Status& status) mutable { fut.MarkFinished(batch); });
         return fut;
       });
   // Adding readahead implicitly adds parallelism by pulling reentrantly from
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index baa40dcf46e..f05f8cac9a9 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -759,8 +759,7 @@ class SerialStreamingReader : public BaseStreamingReader,
             }
             return Status::OK();
           })
-          .Then([self](const ::arrow::detail::Empty& st)
-                    -> Result<std::shared_ptr<RecordBatch>> {
+          .Then([self]() -> Result<std::shared_ptr<RecordBatch>> {
             return self->DecodeBatchAndUpdateSchema();
           });
     }
@@ -788,14 +787,14 @@ class SerialStreamingReader : public BaseStreamingReader,
     }
     auto self = shared_from_this();
     if (!block_generator_) {
-      return SetupReader(self).Then([self](const Result<::arrow::detail::Empty>& res)
-                                        -> Future<std::shared_ptr<RecordBatch>> {
-        if (!res.ok()) {
-          self->eof_ = true;
-          return res.status();
-        }
-        return self->ReadNextSkippingEmpty(self);
-      });
+      return SetupReader(self).Then(
+          [self]() -> Future<std::shared_ptr<RecordBatch>> {
+            return self->ReadNextSkippingEmpty(self);
+          },
+          [self](const Status& err) -> Result<std::shared_ptr<RecordBatch>> {
+            self->eof_ = true;
+            return err;
+          });
     } else {
       return self->ReadNextSkippingEmpty(self);
     }
@@ -925,7 +924,7 @@ class AsyncThreadedTableReader
         internal::TaskGroup::MakeThreaded(cpu_executor_, io_context_.stop_token());
 
     auto self = shared_from_this();
-    return ProcessFirstBuffer().Then([self](std::shared_ptr<Buffer> first_buffer) {
+    return ProcessFirstBuffer().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
       auto block_generator = ThreadedBlockReader::MakeAsyncIterator(
           self->buffer_generator_, MakeChunker(self->parse_options_),
           std::move(first_buffer));
@@ -950,12 +949,12 @@ class AsyncThreadedTableReader
       };
 
       return VisitAsyncGenerator(std::move(block_generator), block_visitor)
-          .Then([self](...) -> Future<> {
+          .Then([self]() -> Future<> {
             // By this point we've added all top level tasks so it is safe to call
             // FinishAsync
             return self->task_group_->FinishAsync();
           })
-          .Then([self](...) -> Result<std::shared_ptr<Table>> {
+          .Then([self]() -> Result<std::shared_ptr<Table>> {
             // Finish conversion, create schema and table
             return self->MakeTable();
           });
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index f8f4cc38444..56b97414602 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -508,11 +508,11 @@ Status WriteInternal(const ScanOptions& scan_options, WriteState& state,
           [&](std::shared_ptr<RecordBatch> batch) {
             return WriteNextBatch(state, scan_task->fragment(), std::move(batch));
           };
-      return internal::SerialExecutor::RunInSerialExecutor<detail::Empty>(
-                 [&](internal::Executor* executor) {
-                   return scan_task->SafeVisit(executor, visitor);
-                 })
-          .status();
+      return internal::RunSynchronously<Future<>>(
+          [&](internal::Executor* executor) {
+            return scan_task->SafeVisit(executor, visitor);
+          },
+          /*use_threads=*/false);
     });
   }
   return task_group->Finish();
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 4eac2752add..d9c03239e83 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -698,11 +698,10 @@ struct AsyncTableAssemblyState {
 };
 
 Status AsyncScanner::Scan(std::function<Status(TaggedRecordBatch)> visitor) {
-  return internal::RunSynchronouslyVoid(
-      [this, &visitor](Executor* executor) {
-        return VisitBatchesAsync(visitor, executor);
-      },
-      scan_options_->use_threads);
+  auto top_level_task = [this, &visitor](Executor* executor) {
+    return VisitBatchesAsync(visitor, executor);
+  };
+  return internal::RunSynchronously<Future<>>(top_level_task, scan_options_->use_threads);
 }
 
 Future<> AsyncScanner::VisitBatchesAsync(std::function<Status(TaggedRecordBatch)> visitor,
@@ -729,10 +728,9 @@ Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
   auto table_building_gen = MakeMappedGenerator<EnumeratedRecordBatch>(
       positioned_batch_gen, table_building_task);
 
-  return DiscardAllFromAsyncGenerator(table_building_gen)
-      .Then([state, scan_options](const detail::Empty&) {
-        return Table::FromRecordBatches(scan_options->projected_schema, state->Finish());
-      });
+  return DiscardAllFromAsyncGenerator(table_building_gen).Then([state, scan_options]() {
+    return Table::FromRecordBatches(scan_options->projected_schema, state->Finish());
+  });
 }
 
 Result<int64_t> AsyncScanner::CountRows() {
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index ab6c8fad92a..c22571aaa56 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1595,8 +1595,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
 
     TreeWalker::WalkAsync(client_, io_context_, bucket, key, kListObjectsMaxKeys,
                           handle_results, handle_error, handle_recursion)
-        .AddCallback([collector, producer,
-                      self](const Result<::arrow::detail::Empty>& res) mutable {
+        .AddCallback([collector, producer, self](const Status& status) mutable {
           auto st = collector->Finish(self.get());
           if (!st.ok()) {
             producer.Push(st);
@@ -1645,11 +1644,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     struct DeleteCallback {
       const std::string bucket;
 
-      Status operator()(const Result<S3Model::DeleteObjectsOutcome>& result) {
-        if (!result.ok()) {
-          return result.status();
-        }
-        const auto& outcome = *result;
+      Status operator()(const S3Model::DeleteObjectsOutcome& outcome) {
         if (!outcome.IsSuccess()) {
           return ErrorToStatus(outcome.GetError());
         }
@@ -1754,7 +1749,10 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
   Future<std::vector<std::string>> ListBucketsAsync(io::IOContext ctx) {
     auto self = shared_from_this();
     return DeferNotOk(SubmitIO(ctx, [self]() { return self->client_->ListBuckets(); }))
-        .Then(Impl::ProcessListBuckets);
+        // TODO(ARROW-12655) Change to Then(Impl::ProcessListBuckets)
+        .Then([](const Aws::S3::Model::ListBucketsOutcome& outcome) {
+          return Impl::ProcessListBuckets(outcome);
+        });
   }
 
   Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const std::string& s,
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index 6a24b0c764f..be3c3ab6605 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -779,8 +779,8 @@ TEST(RangeReadCache, Concurrency) {
       ASSERT_OK(cache.Cache(ranges));
       std::vector<Future<std::shared_ptr<Buffer>>> futures;
       for (const auto& range : ranges) {
-        futures.push_back(cache.WaitFor({range}).Then(
-            [&cache, range](const detail::Empty&) { return cache.Read(range); }));
+        futures.push_back(
+            cache.WaitFor({range}).Then([&cache, range]() { return cache.Read(range); }));
       }
       for (auto fut : futures) {
         ASSERT_FINISHES_OK(fut);
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 67343b0d86a..ba4fe1e1fe7 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -637,7 +637,7 @@ Future<> SleepAsync(double seconds) {
   auto out = Future<>::Make();
   std::thread([out, seconds]() mutable {
     SleepFor(seconds);
-    out.MarkFinished(Status::OK());
+    out.MarkFinished();
   }).detach();
   return out;
 }
@@ -646,7 +646,7 @@ Future<> SleepABitAsync() {
   auto out = Future<>::Make();
   std::thread([out]() mutable {
     SleepABit();
-    out.MarkFinished(Status::OK());
+    out.MarkFinished();
   }).detach();
   return out;
 }
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index d541209a314..1c953583c3b 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -37,10 +37,10 @@ class Result;
 
 class Status;
 
-namespace detail {
+namespace internal {
 struct Empty;
-}
-template <typename T = detail::Empty>
+}  // namespace internal
+template <typename T = internal::Empty>
 class Future;
 
 namespace util {
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 3827e1645f9..4cd8a3a9c9d 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -78,9 +78,9 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
                              std::function<Status(T)> visitor) {
   struct LoopBody {
     struct Callback {
-      Result<ControlFlow<detail::Empty>> operator()(const T& result) {
+      Result<ControlFlow<>> operator()(const T& result) {
         if (IsIterationEnd(result)) {
-          return Break(detail::Empty());
+          return Break();
         } else {
           auto visited = visitor(result);
           if (visited.ok()) {
@@ -94,7 +94,7 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
       std::function<Status(T)> visitor;
     };
 
-    Future<ControlFlow<detail::Empty>> operator()() {
+    Future<ControlFlow<>> operator()() {
       Callback callback{visitor};
       auto next = generator();
       return next.Then(std::move(callback));
@@ -484,13 +484,9 @@ class TransformingGenerator {
           // Otherwise, if not finished immediately, add callback to process results
         } else {
           auto self = this->shared_from_this();
-          return next_fut.Then([self](const Result<T>& next_result) {
-            if (next_result.ok()) {
-              self->last_value_ = *next_result;
-              return (*self)();
-            } else {
-              return Future<V>::MakeFinished(next_result.status());
-            }
+          return next_fut.Then([self](const T& next_result) {
+            self->last_value_ = next_result;
+            return (*self)();
           });
         }
       }
@@ -565,7 +561,7 @@ class SerialReadaheadGenerator {
       // Lazy generator, need to wait for the first ask to prime the pump
       state_->first_ = false;
       auto next = state_->source_();
-      return next.Then(Callback{state_});
+      return next.Then(Callback{state_}, ErrCallback{state_});
     }
 
     // This generator is not async-reentrant.  We won't be called until the last
@@ -600,7 +596,7 @@ class SerialReadaheadGenerator {
           readahead_queue_(max_readahead + 1) {}
 
     Status Pump(const std::shared_ptr<State>& self) {
-      // Can't do readahead_queue.write(source().Then(Callback{self})) because then the
+      // Can't do readahead_queue.write(source().Then(...)) because then the
       // callback might run immediately and add itself to the queue before this gets added
       // to the queue messing up the order.
       auto next_slot = std::make_shared<Future<T>>();
@@ -614,7 +610,7 @@ class SerialReadaheadGenerator {
       // writing.  That is because this callback (the callback for future X) must be
       // finished before future X is marked complete and this source is not pulled
       // reentrantly so it will not poll for future X+1 until this callback has completed.
-      *next_slot = source_().Then(Callback{self});
+      *next_slot = source_().Then(Callback{self}, ErrCallback{self});
       return Status::OK();
     }
 
@@ -634,21 +630,25 @@ class SerialReadaheadGenerator {
   };
 
   struct Callback {
-    Result<T> operator()(const Result<T>& maybe_next) {
-      if (!maybe_next.ok()) {
-        state_->finished_.store(true);
-        return maybe_next;
-      }
-      const auto& next = *maybe_next;
+    Result<T> operator()(const T& next) {
       if (IsIterationEnd(next)) {
         state_->finished_.store(true);
-        return maybe_next;
+        return next;
       }
       auto last_available = state_->spaces_available_.fetch_sub(1);
       if (last_available > 1) {
         ARROW_RETURN_NOT_OK(state_->Pump(state_));
       }
-      return maybe_next;
+      return next;
+    }
+
+    std::shared_ptr<State> state_;
+  };
+
+  struct ErrCallback {
+    Result<T> operator()(const Status& st) {
+      state_->finished_.store(true);
+      return st;
     }
 
     std::shared_ptr<State> state_;
@@ -1308,7 +1308,7 @@ class BackgroundGenerator {
         // If the task is still cleaning up we need to wait for it to finish before
         // restarting.  We also want to block the consumer until we've restarted the
         // reader to avoid multiple restarts
-        return task_finished.Then([state, next](...) {
+        return task_finished.Then([state, next]() {
           // This may appear dangerous (recursive mutex) but we should be guaranteed the
           // outer guard has been released by this point.  We know...
           // * task_finished is not already finished (it would be invalid in that case)
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 474c4f6a0e7..74850b625a2 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -52,8 +52,7 @@ AsyncGenerator<T> FailsAt(AsyncGenerator<T> src, int failing_index) {
 template <typename T>
 AsyncGenerator<T> SlowdownABit(AsyncGenerator<T> source) {
   return MakeMappedGenerator<T, T>(std::move(source), [](const T& res) -> Future<T> {
-    return SleepABitAsync().Then(
-        [res](const Result<detail::Empty>& empty) { return res; });
+    return SleepABitAsync().Then([res]() { return res; });
   });
 }
 
@@ -164,7 +163,7 @@ class ReentrantChecker {
     std::atomic<bool> valid;
   };
   struct Callback {
-    Future<T> operator()(const Result<T>& result) {
+    Future<T> operator()(const T& result) {
       state_->generated_unfinished_future.store(false);
       return result;
     }
@@ -362,9 +361,7 @@ TEST(TestAsyncUtil, MapAsync) {
   std::vector<TestInt> input = {1, 2, 3};
   auto generator = AsyncVectorIt(input);
   std::function<Future<TestStr>(const TestInt&)> mapper = [](const TestInt& in) {
-    return SleepAsync(1e-3).Then([in](const Result<detail::Empty>& empty) {
-      return TestStr(std::to_string(in.value));
-    });
+    return SleepAsync(1e-3).Then([in]() { return TestStr(std::to_string(in.value)); });
   };
   auto mapped = MakeMappedGenerator(std::move(generator), mapper);
   std::vector<TestStr> expected{"1", "2", "3"};
@@ -383,7 +380,7 @@ TEST(TestAsyncUtil, MapReentrant) {
   Future<> can_proceed = Future<>::Make();
   std::function<Future<TestStr>(const TestInt&)> mapper = [&](const TestInt& in) {
     map_tasks_running.fetch_add(1);
-    return can_proceed.Then([in](...) { return TestStr(std::to_string(in.value)); });
+    return can_proceed.Then([in]() { return TestStr(std::to_string(in.value)); });
   };
   auto mapped = MakeMappedGenerator(std::move(source), mapper);
 
@@ -469,7 +466,7 @@ TEST_P(FromFutureFixture, Basic) {
   auto source = Future<std::vector<TestInt>>::MakeFinished(RangeVector(3));
   if (IsSlow()) {
     source = SleepABitAsync().Then(
-        [](...) -> Result<std::vector<TestInt>> { return RangeVector(3); });
+        []() -> Result<std::vector<TestInt>> { return RangeVector(3); });
   }
   auto slow = IsSlow();
   auto to_gen = source.Then([slow](const std::vector<TestInt>& vec) {
@@ -651,7 +648,7 @@ TEST(TestAsyncUtil, MakeTransferredGenerator) {
       MakeTransferredGenerator<TestInt>(std::move(slow_generator), thread_pool.get());
 
   auto current_thread_id = std::this_thread::get_id();
-  auto fut = transferred().Then([&current_thread_id](const Result<TestInt>& result) {
+  auto fut = transferred().Then([&current_thread_id](const TestInt&) {
     ASSERT_NE(current_thread_id, std::this_thread::get_id());
   });
 
@@ -1009,8 +1006,8 @@ TEST(TestAsyncUtil, SerialReadaheadStressFailing) {
     AsyncGenerator<TestInt> it = BackgroundAsyncVectorIt(RangeVector(NITEMS));
     AsyncGenerator<TestInt> fails_at_ten = [&it]() {
       auto next = it();
-      return next.Then([](const Result<TestInt>& item) -> Result<TestInt> {
-        if (item->value >= 10) {
+      return next.Then([](const TestInt& item) -> Result<TestInt> {
+        if (item.value >= 10) {
           return Status::Invalid("XYZ");
         } else {
           return item;
diff --git a/cpp/src/arrow/util/functional.h b/cpp/src/arrow/util/functional.h
index 3588e8540e8..9da79046fec 100644
--- a/cpp/src/arrow/util/functional.h
+++ b/cpp/src/arrow/util/functional.h
@@ -21,13 +21,23 @@
 #include <tuple>
 #include <type_traits>
 
+#include "arrow/result.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
 namespace internal {
 
+struct Empty {
+  static Result<Empty> ToResult(Status s) {
+    if (ARROW_PREDICT_TRUE(s.ok())) {
+      return Empty{};
+    }
+    return s;
+  }
+};
+
 /// Helper struct for examining lambdas and other callables.
-/// TODO(bkietz) support function pointers
+/// TODO(ARROW-12655) support function pointers
 struct call_traits {
  public:
   template <typename R, typename... A>
@@ -57,6 +67,16 @@ struct call_traits {
   static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
       R (F::*)(A...) &&);
 
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...));
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...)
+                                                                           const);
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) &&);
+
   /// bool constant indicating whether F is a callable with more than one possible
   /// signature. Will be true_type for objects which define multiple operator() or which
   /// define a template operator()
@@ -76,12 +96,22 @@ struct call_traits {
   template <std::size_t I, typename F>
   using argument_type = decltype(argument_type_impl<I>(&std::decay<F>::type::operator()));
 
+  template <typename F>
+  using argument_count = decltype(argument_count_impl(&std::decay<F>::type::operator()));
+
   template <typename F>
   using return_type = decltype(return_type_impl(&std::decay<F>::type::operator()));
 
   template <typename F, typename T, typename RT = T>
   using enable_if_return =
       typename std::enable_if<std::is_same<return_type<F>, T>::value, RT>;
+
+  template <typename T, typename R = void>
+  using enable_if_empty = typename std::enable_if<std::is_same<T, Empty>::value, R>::type;
+
+  template <typename T, typename R = void>
+  using enable_if_not_empty =
+      typename std::enable_if<!std::is_same<T, Empty>::value, R>::type;
 };
 
 /// A type erased callable object which may only be invoked once.
diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc
index 90e8db3c6a6..c7d7c37ad33 100644
--- a/cpp/src/arrow/util/future.cc
+++ b/cpp/src/arrow/util/future.cc
@@ -357,16 +357,16 @@ Future<> AllComplete(const std::vector<Future<>>& futures) {
   auto state = std::make_shared<State>(futures.size());
   auto out = Future<>::Make();
   for (const auto& future : futures) {
-    future.AddCallback([state, out](const Result<detail::Empty>& result) mutable {
-      if (!result.ok()) {
+    future.AddCallback([state, out](const Status& status) mutable {
+      if (!status.ok()) {
         std::unique_lock<std::mutex> lock(state->mutex);
         if (!out.is_finished()) {
-          out.MarkFinished(result);
+          out.MarkFinished(status);
         }
         return;
       }
       if (state->n_remaining.fetch_sub(1) != 1) return;
-      out.MarkFinished(Status::OK());
+      out.MarkFinished();
     });
   }
   return out;
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index 4c8de912f81..132443176ed 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -38,15 +38,6 @@ namespace arrow {
 
 namespace detail {
 
-struct Empty {
-  static Result<Empty> ToResult(Status s) {
-    if (ARROW_PREDICT_TRUE(s.ok())) {
-      return Empty{};
-    }
-    return s;
-  }
-};
-
 template <typename>
 struct is_future : std::false_type {};
 
@@ -56,6 +47,59 @@ struct is_future<Future<T>> : std::true_type {};
 template <typename Signature>
 using result_of_t = typename std::result_of<Signature>::type;
 
+// Helper to find the synchronous counterpart for a Future
+template <typename T>
+struct SyncType {
+  using type = Result<T>;
+};
+
+template <>
+struct SyncType<internal::Empty> {
+  using type = Status;
+};
+
+template <typename Fn>
+using first_arg_is_status =
+    std::is_same<typename std::decay<internal::call_traits::argument_type<0, Fn>>::type,
+                 Status>;
+
+template <typename Fn>
+struct has_no_args {
+  static constexpr bool value = internal::call_traits::argument_count<Fn>::value == 0;
+};
+
+/// Creates a callback that can be added to a future to mark a `dest` future finished
+template <typename Source, typename Dest, bool SourceEmpty = Source::is_empty,
+          bool DestEmpty = Dest::is_empty>
+struct MarkNextFinished {};
+
+/// If the source and dest are both empty we can pass on the status
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, true, true> {
+  void operator()(const Status& status) && { next.MarkFinished(status); }
+  Dest next;
+};
+
+/// If the source is not empty but the dest is then we can take the
+/// status out of the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, true> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(internal::Empty::ToResult(res.status()));
+  }
+  Dest next;
+};
+
+/// If neither are empty we pass on the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, false> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(res);
+  }
+  Dest next;
+};
+
+/// Helper that contains information about how to apply a continuation
 struct ContinueFuture {
   template <typename Return>
   struct ForReturnImpl;
@@ -66,6 +110,7 @@ struct ContinueFuture {
   template <typename Signature>
   using ForSignature = ForReturn<result_of_t<Signature>>;
 
+  // If the callback returns void then we return Future<> that always finishes OK.
   template <typename ContinueFunc, typename... Args,
             typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
             typename NextFuture = ForReturn<ContinueResult>>
@@ -75,15 +120,42 @@ struct ContinueFuture {
     next.MarkFinished();
   }
 
+  /// If the callback returns a non-future then we return Future<T>
+  /// and mark the future finished with the callback result.  It will get promoted
+  /// to Result<T> as part of MarkFinished if it isn't already.
+  ///
+  /// If the callback returns Status and we return Future<> then also send the callback
+  /// result as-is to the destination future.
   template <typename ContinueFunc, typename... Args,
             typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
             typename NextFuture = ForReturn<ContinueResult>>
-  typename std::enable_if<!std::is_void<ContinueResult>::value &&
-                          !is_future<ContinueResult>::value>::type
+  typename std::enable_if<
+      !std::is_void<ContinueResult>::value && !is_future<ContinueResult>::value &&
+      (!NextFuture::is_empty || std::is_same<ContinueResult, Status>::value)>::type
   operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
     next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...));
   }
 
+  /// If the callback returns a Result and the next future is Future<> then we mark
+  /// the future finished with the callback result.
+  ///
+  /// It may seem odd that the next future is Future<> when the callback returns a
+  /// result but this can occur if the OnFailure callback returns a result while the
+  /// OnSuccess callback is void/Status (e.g. you would get this calling the one-arg
+  /// version of Then with an OnSuccess callback that returns void)
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<!std::is_void<ContinueResult>::value &&
+                          !is_future<ContinueResult>::value && NextFuture::is_empty &&
+                          !std::is_same<ContinueResult, Status>::value>::type
+  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...).status());
+  }
+
+  /// If the callback returns a Future<T> then we return Future<T>.  We create a new
+  /// future and add a callback to the future given to us by the user that forwards the
+  /// result to the future we just created
   template <typename ContinueFunc, typename... Args,
             typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
             typename NextFuture = ForReturn<ContinueResult>>
@@ -91,18 +163,13 @@ struct ContinueFuture {
       NextFuture next, ContinueFunc&& f, Args&&... a) const {
     ContinueResult signal_to_complete_next =
         std::forward<ContinueFunc>(f)(std::forward<Args>(a)...);
-
-    struct MarkNextFinished {
-      void operator()(const Result<typename ContinueResult::ValueType>& result) && {
-        next.MarkFinished(result);
-      }
-      NextFuture next;
-    };
-
-    signal_to_complete_next.AddCallback(MarkNextFinished{std::move(next)});
+    MarkNextFinished<ContinueResult, NextFuture> callback{std::move(next)};
+    signal_to_complete_next.AddCallback(std::move(callback));
   }
 };
 
+/// Helper struct which tells us what kind of Future gets returned from `Then` based on
+/// the return type of the OnSuccess callback
 template <>
 struct ContinueFuture::ForReturnImpl<void> {
   using type = Future<>;
@@ -242,10 +309,11 @@ class ARROW_EXPORT FutureWaiter {
 /// to complete, or wait on multiple Futures at once (using WaitForAll,
 /// WaitForAny or AsCompletedIterator).
 template <typename T>
-class ARROW_MUST_USE_TYPE Future {
+class Future {
  public:
   using ValueType = T;
-
+  using SyncType = typename detail::SyncType<T>::type;
+  static constexpr bool is_empty = std::is_same<T, internal::Empty>::value;
   // The default constructor creates an invalid Future.  Use Future::Make()
   // for a valid Future.  This constructor is mostly for the convenience
   // of being able to presize a vector of Futures.
@@ -331,7 +399,7 @@ class ARROW_MUST_USE_TYPE Future {
 
   /// \brief Mark a Future<> completed with the provided Status.
   template <typename E = ValueType, typename = typename std::enable_if<
-                                        std::is_same<E, detail::Empty>::value>::type>
+                                        std::is_same<E, internal::Empty>::value>::type>
   void MarkFinished(Status s = Status::OK()) {
     return DoMarkFinished(E::ToResult(std::move(s)));
   }
@@ -349,20 +417,15 @@ class ARROW_MUST_USE_TYPE Future {
   }
 
   /// \brief Producer API: instantiate a finished Future
-  static Future MakeFinished(Result<ValueType> res) {
-    Future fut;
-    if (ARROW_PREDICT_TRUE(res.ok())) {
-      fut.impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
-    } else {
-      fut.impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
-    }
-    fut.SetResult(std::move(res));
+  static Future<ValueType> MakeFinished(Result<ValueType> res) {
+    Future<ValueType> fut;
+    fut.InitializeFromResult(std::move(res));
     return fut;
   }
 
   /// \brief Make a finished Future<> with the provided Status.
   template <typename E = ValueType, typename = typename std::enable_if<
-                                        std::is_same<E, detail::Empty>::value>::type>
+                                        std::is_same<E, internal::Empty>::value>::type>
   static Future<> MakeFinished(Status s = Status::OK()) {
     return MakeFinished(E::ToResult(std::move(s)));
   }
@@ -370,8 +433,7 @@ class ARROW_MUST_USE_TYPE Future {
   /// \brief Consumer API: Register a callback to run when this future completes
   ///
   /// The callback should receive the result of the future (const Result<T>&)
-  /// For a void or statusy future this should be
-  /// (const Result<detail::Empty>& result)
+  /// For a void or statusy future this should be (const Status&)
   ///
   /// There is no guarantee to the order in which callbacks will run.  In
   /// particular, callbacks added while the future is being marked complete
@@ -384,18 +446,37 @@ class ARROW_MUST_USE_TYPE Future {
   ///
   /// {
   ///     auto fut = Future<>::Make();
-  ///     fut.AddCallback([fut](...) {});
+  ///     fut.AddCallback([fut]() {});
   /// }
   ///
   /// In this example `fut` falls out of scope but is not destroyed because it holds a
   /// cyclic reference to itself through the callback.
   template <typename OnComplete>
-  void AddCallback(OnComplete on_complete) const {
+  typename std::enable_if<!detail::first_arg_is_status<OnComplete>::value>::type
+  AddCallback(OnComplete on_complete) const {
     // We know impl_ will not be dangling when invoking callbacks because at least one
     // thread will be waiting for MarkFinished to return. Thus it's safe to keep a
     // weak reference to impl_ here
-    impl_->AddCallback(
-        Callback<OnComplete>{WeakFuture<T>(*this), std::move(on_complete)});
+    struct Callback {
+      void operator()() && { std::move(on_complete)(weak_self.get().result()); }
+      WeakFuture<T> weak_self;
+      OnComplete on_complete;
+    };
+    impl_->AddCallback(Callback{WeakFuture<T>(*this), std::move(on_complete)});
+  }
+
+  /// Overload for callbacks accepting a Status
+  template <typename OnComplete>
+  typename std::enable_if<detail::first_arg_is_status<OnComplete>::value>::type
+  AddCallback(OnComplete on_complete) const {
+    static_assert(std::is_same<internal::Empty, ValueType>::value,
+                  "Callbacks for Future<> should accept Status and not Result");
+    struct Callback {
+      void operator()() && { std::move(on_complete)(weak_self.get().status()); }
+      WeakFuture<T> weak_self;
+      OnComplete on_complete;
+    };
+    impl_->AddCallback(Callback{WeakFuture<T>(*this), std::move(on_complete)});
   }
 
   /// \brief Overload of AddCallback that will return false instead of running
@@ -411,11 +492,32 @@ class ARROW_MUST_USE_TYPE Future {
   ///
   /// Returns true if a callback was actually added and false if the callback failed
   /// to add because the future was marked complete.
-  template <typename CallbackFactory>
-  bool TryAddCallback(const CallbackFactory& callback_factory) const {
+  template <typename CallbackFactory,
+            typename OnComplete = detail::result_of_t<CallbackFactory()>>
+  typename std::enable_if<!detail::first_arg_is_status<OnComplete>::value, bool>::type
+  TryAddCallback(const CallbackFactory& callback_factory) const {
+    struct Callback {
+      void operator()() && { std::move(on_complete)(weak_self.get().result()); }
+      WeakFuture<T> weak_self;
+      OnComplete on_complete;
+    };
+    return impl_->TryAddCallback([this, &callback_factory]() {
+      return Callback{WeakFuture<T>(*this), callback_factory()};
+    });
+  }
+
+  template <typename CallbackFactory,
+            typename OnComplete = detail::result_of_t<CallbackFactory()>>
+  typename std::enable_if<detail::first_arg_is_status<OnComplete>::value, bool>::type
+  TryAddCallback(const CallbackFactory& callback_factory) const {
+    struct Callback {
+      void operator()() && { std::move(on_complete)(weak_self.get().status()); }
+      WeakFuture<T> weak_self;
+      OnComplete on_complete;
+    };
+
     return impl_->TryAddCallback([this, &callback_factory]() {
-      return Callback<detail::result_of_t<CallbackFactory()>>{WeakFuture<T>(*this),
-                                                              callback_factory()};
+      return Callback{WeakFuture<T>(*this), callback_factory()};
     });
   }
 
@@ -427,8 +529,9 @@ class ARROW_MUST_USE_TYPE Future {
   /// returning the future.
   ///
   /// Two callbacks are supported:
-  /// - OnSuccess, called against the result (const ValueType&) on successul completion.
-  /// - OnFailure, called against the error (const Status&) on failed completion.
+  /// - OnSuccess, called with the result (const ValueType&) on successul completion.
+  ///              for an empty future this will be called with nothing ()
+  /// - OnFailure, called with the error (const Status&) on failed completion.
   ///
   /// Then() returns a Future whose ValueType is derived from the return type of the
   /// callbacks. If a callback returns:
@@ -454,11 +557,19 @@ class ARROW_MUST_USE_TYPE Future {
   template <typename OnSuccess, typename OnFailure,
             typename ContinuedFuture =
                 detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>>
-  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure) const {
+  ContinuedFuture Then(
+      OnSuccess on_success, OnFailure on_failure,
+      typename std::enable_if<!detail::has_no_args<OnSuccess>::value>::type* =
+          NULLPTR) const {
     static_assert(
         std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
                      ContinuedFuture>::value,
         "OnSuccess and OnFailure must continue with the same future type");
+    using OnSuccessArg =
+        typename std::decay<internal::call_traits::argument_type<0, OnSuccess>>::type;
+    static_assert(
+        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value,
+        "OnSuccess' argument should not be a Result");
 
     auto next = ContinuedFuture::Make();
 
@@ -486,11 +597,66 @@ class ARROW_MUST_USE_TYPE Future {
     return next;
   }
 
+  /// \brief Overload for callbacks which ignore the value
+  template <
+      typename OnSuccess, typename OnFailure,
+      typename ContinuedFuture = detail::ContinueFuture::ForSignature<OnSuccess && ()>>
+  ContinuedFuture Then(
+      OnSuccess on_success, OnFailure on_failure,
+      typename std::enable_if<detail::has_no_args<OnSuccess>::value>::type* =
+          NULLPTR) const {
+    static_assert(
+        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
+                     ContinuedFuture>::value,
+        "OnSuccess and OnFailure must continue with the same future type");
+
+    auto next = ContinuedFuture::Make();
+
+    struct Callback {
+      void operator()(const Result<T>& result) && {
+        detail::ContinueFuture continue_future;
+        if (ARROW_PREDICT_TRUE(result.ok())) {
+          // move on_failure to a(n immediately destroyed) temporary to free its resources
+          ARROW_UNUSED(OnFailure(std::move(on_failure)));
+          continue_future(std::move(next), std::move(on_success));
+        } else {
+          ARROW_UNUSED(OnSuccess(std::move(on_success)));
+          continue_future(std::move(next), std::move(on_failure), result.status());
+        }
+      }
+
+      OnSuccess on_success;
+      OnFailure on_failure;
+      ContinuedFuture next;
+    };
+
+    AddCallback(Callback{std::forward<OnSuccess>(on_success),
+                         std::forward<OnFailure>(on_failure), next});
+
+    return next;
+  }
+
   /// \brief Overload without OnFailure. Failures will be passed through unchanged.
   template <typename OnSuccess,
             typename ContinuedFuture =
-                detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>>
-  ContinuedFuture Then(OnSuccess&& on_success) const {
+                detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>,
+            typename E = ValueType>
+  typename std::enable_if<!detail::has_no_args<OnSuccess>::value, ContinuedFuture>::type
+  Then(OnSuccess&& on_success) const {
+    return Then(std::forward<OnSuccess>(on_success), [](const Status& s) {
+      return Result<typename ContinuedFuture::ValueType>(s);
+    });
+  }
+
+  /// \brief Statusy overload without OnFailure
+  template <
+      typename OnSuccess,
+      typename ContinuedFuture = detail::ContinueFuture::ForSignature<OnSuccess && ()>,
+      typename E = ValueType>
+  typename std::enable_if<detail::has_no_args<OnSuccess>::value, ContinuedFuture>::type
+  Then(OnSuccess&& on_success) const {
+    static_assert(std::is_same<internal::Empty, ValueType>::value,
+                  "Then callback OnSuccess must receive const T&");
     return Then(std::forward<OnSuccess>(on_success), [](const Status& s) {
       return Result<typename ContinuedFuture::ValueType>(s);
     });
@@ -519,16 +685,16 @@ class ARROW_MUST_USE_TYPE Future {
       : Future(Result<ValueType>(std::move(s))) {}
 
  protected:
-  template <typename OnComplete>
-  struct Callback {
-    void operator()() && {
-      auto self = weak_self.get();
-      std::move(on_complete)(*self.GetResult());
+  void InitializeFromResult(Result<ValueType> res) {
+    if (ARROW_PREDICT_TRUE(res.ok())) {
+      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    } else {
+      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
     }
+    SetResult(std::move(res));
+  }
 
-    WeakFuture<T> weak_self;
-    OnComplete on_complete;
-  };
+  void Initialize() { impl_ = FutureImpl::Make(); }
 
   Result<ValueType>* GetResult() const {
     return static_cast<Result<ValueType>*>(impl_->result_.get());
@@ -573,6 +739,17 @@ class ARROW_MUST_USE_TYPE Future {
   FRIEND_TEST(FutureRefTest, HeadRemoved);
 };
 
+template <typename T>
+typename Future<T>::SyncType FutureToSync(const Future<T>& fut) {
+  return fut.result();
+}
+
+template <>
+inline typename Future<internal::Empty>::SyncType FutureToSync<internal::Empty>(
+    const Future<internal::Empty>& fut) {
+  return fut.status();
+}
+
 template <typename T>
 class WeakFuture {
  public:
@@ -691,26 +868,26 @@ struct Continue {
   }
 };
 
-template <typename T = detail::Empty>
+template <typename T = internal::Empty>
 util::optional<T> Break(T break_value = {}) {
   return util::optional<T>{std::move(break_value)};
 }
 
-template <typename T = detail::Empty>
+template <typename T = internal::Empty>
 using ControlFlow = util::optional<T>;
 
 /// \brief Loop through an asynchronous sequence
 ///
-/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion of
-/// each yielded future the resulting ControlFlow will be examined. A Break will terminate
-/// the loop, while a Continue will re-invoke `iterate`. \return A future which will
-/// complete when a Future returned by iterate completes with a Break
+/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion
+/// of each yielded future the resulting ControlFlow will be examined. A Break will
+/// terminate the loop, while a Continue will re-invoke `iterate`.
+///
+/// \return A future which will complete when a Future returned by iterate completes with
+/// a Break
 template <typename Iterate,
           typename Control = typename detail::result_of_t<Iterate()>::ValueType,
           typename BreakValueType = typename Control::value_type>
 Future<BreakValueType> Loop(Iterate iterate) {
-  auto break_fut = Future<BreakValueType>::Make();
-
   struct Callback {
     bool CheckForTermination(const Result<Control>& control_res) {
       if (!control_res.ok()) {
@@ -753,6 +930,7 @@ Future<BreakValueType> Loop(Iterate iterate) {
     Future<BreakValueType> break_fut;
   };
 
+  auto break_fut = Future<BreakValueType>::Make();
   auto control_fut = iterate();
   control_fut.AddCallback(Callback{std::move(iterate), break_fut});
 
diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc
index a505df5ef04..8c1e72a48bd 100644
--- a/cpp/src/arrow/util/future_test.cc
+++ b/cpp/src/arrow/util/future_test.cc
@@ -261,21 +261,17 @@ TEST(FutureSyncTest, Empty) {
     // MakeFinished()
     auto fut = Future<>::MakeFinished();
     AssertSuccessful(fut);
-    auto res = fut.result();
-    ASSERT_OK(res);
-    res = std::move(fut.result());
-    ASSERT_OK(res);
   }
   {
     // MarkFinished(Status)
     auto fut = Future<>::Make();
     AssertNotFinished(fut);
-    fut.MarkFinished(Status::OK());
+    fut.MarkFinished();
     AssertSuccessful(fut);
   }
   {
     // MakeFinished(Status)
-    auto fut = Future<>::MakeFinished(Status::OK());
+    auto fut = Future<>::MakeFinished();
     AssertSuccessful(fut);
     fut = Future<>::MakeFinished(Status::IOError("xxx"));
     AssertFailed(fut);
@@ -352,8 +348,7 @@ TEST(FutureRefTest, ChainRemoved) {
   std::weak_ptr<FutureImpl> ref2;
   {
     auto fut = Future<>::Make();
-    auto fut2 =
-        fut.Then([](const Result<detail::Empty>& status) { return Status::OK(); });
+    auto fut2 = fut.Then([]() { return Status::OK(); });
     ref = fut.impl_;
     ref2 = fut2.impl_;
   }
@@ -362,7 +357,7 @@ TEST(FutureRefTest, ChainRemoved) {
 
   {
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](const Result<detail::Empty>&) { return Future<>::Make(); });
+    auto fut2 = fut.Then([]() { return Future<>::Make(); });
     ref = fut.impl_;
     ref2 = fut2.impl_;
   }
@@ -377,7 +372,7 @@ TEST(FutureRefTest, TailRemoved) {
   bool side_effect_run = false;
   {
     ref = std::make_shared<Future<>>(Future<>::Make());
-    auto fut2 = ref->Then([&side_effect_run](const Result<detail::Empty>& status) {
+    auto fut2 = ref->Then([&side_effect_run]() {
       side_effect_run = true;
       return Status::OK();
     });
@@ -401,13 +396,13 @@ TEST(FutureRefTest, HeadRemoved) {
   {
     auto fut = std::make_shared<Future<>>(Future<>::Make());
     ref = fut->impl_;
-    ref2 = std::make_shared<Future<>>(fut->Then([](...) {}));
+    ref2 = std::make_shared<Future<>>(fut->Then([]() {}));
   }
   ASSERT_TRUE(ref.expired());
 
   {
     auto fut = Future<>::Make();
-    ref2 = std::make_shared<Future<>>(fut.Then([&](...) {
+    ref2 = std::make_shared<Future<>>(fut.Then([&]() {
       auto intermediate = Future<>::Make();
       ref = intermediate.impl_;
       return intermediate;
@@ -434,7 +429,8 @@ TEST(FutureStressTest, Callback) {
       auto test_thread = std::this_thread::get_id();
       while (!finished.load()) {
         fut.AddCallback([&test_thread, &count_finished_immediately,
-                         &count_finished_deferred](const Result<detail::Empty>& result) {
+                         &count_finished_deferred](const Status& status) {
+          ARROW_EXPECT_OK(status);
           if (std::this_thread::get_id() == test_thread) {
             count_finished_immediately++;
           } else {
@@ -483,14 +479,15 @@ TEST(FutureStressTest, TryAddCallback) {
 
     std::thread callback_adder([&] {
       callback_adder_thread_id = std::this_thread::get_id();
-      std::function<void(const Result<detail::Empty>&)> callback =
-          [&callback_adder_thread_id](const Result<detail::Empty>&) {
+      std::function<void(const Status&)> callback =
+          [&callback_adder_thread_id](const Status& st) {
+            ARROW_EXPECT_OK(st);
             if (std::this_thread::get_id() == callback_adder_thread_id) {
               FAIL() << "TryAddCallback allowed a callback to be run synchronously";
             }
           };
-      std::function<std::function<void(const Result<detail::Empty>&)>()>
-          callback_factory = [&callback]() { return callback; };
+      std::function<std::function<void(const Status&)>()> callback_factory =
+          [&callback]() { return callback; };
       while (true) {
         auto callback_added = fut.TryAddCallback(callback_factory);
         if (callback_added) {
@@ -541,7 +538,7 @@ TEST(FutureCompletionTest, Void) {
   {
     // Propagate failure by returning it from on_failure
     auto fut = Future<int>::Make();
-    auto fut2 = fut.Then([](...) {}, [](const Status& s) { return s; });
+    auto fut2 = fut.Then([](const int&) {}, [](const Status& s) { return s; });
     fut.MarkFinished(Status::IOError("xxx"));
     AssertFailed(fut2);
     ASSERT_TRUE(fut2.status().IsIOError());
@@ -549,7 +546,7 @@ TEST(FutureCompletionTest, Void) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](const Result<detail::Empty>&) {});
+    auto fut2 = fut.Then([]() {});
     fut.MarkFinished();
     AssertSuccessful(fut2);
   }
@@ -557,9 +554,9 @@ TEST(FutureCompletionTest, Void) {
     // Propagate failure by not having on_failure
     auto fut = Future<>::Make();
     auto cb_was_run = false;
-    auto fut2 = fut.Then([&cb_was_run](const Result<detail::Empty>& res) {
+    auto fut2 = fut.Then([&cb_was_run]() {
       cb_was_run = true;
-      return res;
+      return Status::OK();
     });
     fut.MarkFinished(Status::IOError("xxx"));
     AssertFailed(fut2);
@@ -569,7 +566,7 @@ TEST(FutureCompletionTest, Void) {
     // Swallow failure by catching in on_failure
     auto fut = Future<>::Make();
     Status status_seen = Status::OK();
-    auto fut2 = fut.Then([](...) {},
+    auto fut2 = fut.Then([]() {},
                          [&status_seen](const Status& s) {
                            status_seen = s;
                            return Status::OK();
@@ -626,7 +623,7 @@ TEST(FutureCompletionTest, NonVoid) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](...) { return 42; });
+    auto fut2 = fut.Then([]() { return 42; });
     fut.MarkFinished();
     AssertSuccessful(fut2);
     auto result = *fut2.result();
@@ -702,7 +699,7 @@ TEST(FutureCompletionTest, FutureNonVoid) {
     // From void
     auto fut = Future<>::Make();
     auto innerFut = Future<std::string>::Make();
-    auto fut2 = fut.Then([&innerFut](...) { return innerFut; });
+    auto fut2 = fut.Then([&innerFut]() { return innerFut; });
     fut.MarkFinished();
     AssertNotFinished(fut2);
     innerFut.MarkFinished("hello");
@@ -716,7 +713,7 @@ TEST(FutureCompletionTest, FutureNonVoid) {
     auto innerFut = Future<std::string>::Make();
     auto was_cb_run = false;
     auto fut2 = fut.Then(
-        [&innerFut, &was_cb_run](...) {
+        [&innerFut, &was_cb_run]() {
           was_cb_run = true;
           return Result<Future<std::string>>(innerFut);
         },
@@ -775,7 +772,7 @@ TEST(FutureCompletionTest, Status) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](const Result<detail::Empty>& res) { return Status::OK(); });
+    auto fut2 = fut.Then([]() { return Status::OK(); });
     fut.MarkFinished();
     AssertSuccessful(fut2);
   }
@@ -784,7 +781,7 @@ TEST(FutureCompletionTest, Status) {
     auto fut = Future<>::Make();
     auto was_cb_run = false;
     auto fut2 = fut.Then(
-        [&was_cb_run](const Result<detail::Empty>& res) {
+        [&was_cb_run]() {
           was_cb_run = true;
           return Status::OK();
         },
@@ -846,7 +843,7 @@ TEST(FutureCompletionTest, Result) {
   {
     // From void
     auto fut = Future<>::Make();
-    auto fut2 = fut.Then([](...) { return Result<int>(42); });
+    auto fut2 = fut.Then([]() { return Result<int>(42); });
     fut.MarkFinished();
     AssertSuccessful(fut2);
     auto result = *fut2.result();
@@ -857,7 +854,7 @@ TEST(FutureCompletionTest, Result) {
     auto fut = Future<>::Make();
     auto was_cb_run = false;
     auto fut2 = fut.Then(
-        [&was_cb_run](...) {
+        [&was_cb_run]() {
           was_cb_run = true;
           return Result<int>(42);
         },
@@ -938,7 +935,7 @@ TEST(FutureCompletionTest, FutureVoid) {
     // From void
     auto fut = Future<>::Make();
     auto innerFut = Future<>::Make();
-    auto fut2 = fut.Then([&innerFut](...) { return innerFut; });
+    auto fut2 = fut.Then([&innerFut]() { return innerFut; });
     fut.MarkFinished();
     AssertNotFinished(fut2);
     innerFut.MarkFinished();
@@ -948,7 +945,7 @@ TEST(FutureCompletionTest, FutureVoid) {
     // Propagate failure by returning failure
     auto fut = Future<>::Make();
     auto innerFut = Future<>::Make();
-    auto fut2 = fut.Then([&innerFut](...) { return innerFut; },
+    auto fut2 = fut.Then([&innerFut]() { return innerFut; },
                          [](const Status& s) { return Future<>::MakeFinished(s); });
     fut.MarkFinished(Status::IOError("xxx"));
     AssertFailed(fut2);
@@ -1080,7 +1077,7 @@ TEST(FutureLoopTest, Sync) {
 
 TEST(FutureLoopTest, EmptyBreakValue) {
   Future<> none_fut =
-      Loop([&] { return Future<>::MakeFinished().Then([&](...) { return Break(); }); });
+      Loop([&] { return Future<>::MakeFinished().Then([&]() { return Break(); }); });
   AssertSuccessful(none_fut);
 }
 
@@ -1145,7 +1142,7 @@ TEST(FutureLoopTest, AllowsBreakFutToBeDiscarded) {
     }
     return Future<ControlFlow<int>>::MakeFinished(Break(-1));
   };
-  auto loop_fut = Loop(loop_body).Then([](...) { return Status::OK(); });
+  auto loop_fut = Loop(loop_body).Then([](const int&) { return Status::OK(); });
   ASSERT_TRUE(loop_fut.Wait(0.1));
 }
 
@@ -1175,7 +1172,7 @@ class MoveTrackingCallable {
     return *this;
   }
 
-  Status operator()(...) {
+  Status operator()() {
     // std::cout << "TRIGGER" << std::endl;
     if (valid_) {
       return Status::OK();
@@ -1197,7 +1194,7 @@ TEST(FutureCompletionTest, ReuseCallback) {
     continuation = fut.Then(callback);
   }
 
-  fut.MarkFinished(Status::OK());
+  fut.MarkFinished();
 
   ASSERT_TRUE(continuation.is_finished());
   if (continuation.is_finished()) {
@@ -1596,7 +1593,5 @@ TEST(FnOnceTest, MoveOnlyDataType) {
   ASSERT_EQ(i0.moves, 0);
   ASSERT_EQ(i1.moves, 0);
 }
-
 }  // namespace internal
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index 6465ebbc6fc..672839b67d5 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -424,11 +424,6 @@ ThreadPool* GetCpuThreadPool() {
   return singleton.get();
 }
 
-Status RunSynchronouslyVoid(FnOnce<Future<arrow::detail::Empty>(Executor*)> get_future,
-                            bool use_threads) {
-  return RunSynchronously(std::move(get_future), use_threads).status();
-}
-
 }  // namespace internal
 
 int GetCpuThreadPoolCapacity() { return internal::GetCpuThreadPool()->GetCapacity(); }
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index fc7dc85b15e..8626132a348 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -102,10 +102,10 @@ class ARROW_EXPORT Executor {
   // The continuations of that future should run on the CPU thread pool keeping
   // CPU heavy work off the I/O thread pool.  So the I/O task should transfer
   // the future to the CPU executor before returning.
-  template <typename T>
+  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
   Future<T> Transfer(Future<T> future) {
     auto transferred = Future<T>::Make();
-    auto callback = [this, transferred](const Result<T>& result) mutable {
+    auto callback = [this, transferred](const FTSync& result) mutable {
       auto spawn_status =
           Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
       if (!spawn_status.ok()) {
@@ -197,7 +197,7 @@ class ARROW_EXPORT Executor {
 /// asynchronous continuation.
 class ARROW_EXPORT SerialExecutor : public Executor {
  public:
-  template <typename T = ::arrow::detail::Empty>
+  template <typename T = ::arrow::internal::Empty>
   using TopLevelTask = internal::FnOnce<Future<T>(Executor*)>;
 
   ~SerialExecutor();
@@ -214,9 +214,11 @@ class ARROW_EXPORT SerialExecutor : public Executor {
   /// RunSynchronously/RunSerially which delegates the responsiblity onto a Future
   /// producer's existing responsibility to always mark a future finished (which can
   /// someday be aided by ARROW-12207).
-  template <typename T>
-  static Result<T> RunInSerialExecutor(TopLevelTask<T> initial_task) {
-    return SerialExecutor().Run<T>(std::move(initial_task));
+  template <typename T = internal::Empty, typename FT = Future<T>,
+            typename FTSync = typename FT::SyncType>
+  static FTSync RunInSerialExecutor(TopLevelTask<T> initial_task) {
+    Future<T> fut = SerialExecutor().Run<T>(std::move(initial_task));
+    return FutureToSync(fut);
   }
 
  private:
@@ -226,15 +228,15 @@ class ARROW_EXPORT SerialExecutor : public Executor {
   struct State;
   std::shared_ptr<State> state_;
 
-  template <typename T>
-  Result<T> Run(TopLevelTask<T> initial_task) {
+  template <typename T, typename FTSync = typename Future<T>::SyncType>
+  Future<T> Run(TopLevelTask<T> initial_task) {
     auto final_fut = std::move(initial_task)(this);
     if (final_fut.is_finished()) {
-      return final_fut.result();
+      return final_fut;
     }
-    final_fut.AddCallback([this](const Result<T>&) { MarkFinished(); });
+    final_fut.AddCallback([this](const FTSync&) { MarkFinished(); });
     RunLoop();
-    return final_fut.result();
+    return final_fut;
   }
   void RunLoop();
   void MarkFinished();
@@ -328,16 +330,16 @@ ARROW_EXPORT ThreadPool* GetCpuThreadPool();
 /// `get_future` is called (from this thread) with the chosen executor and must
 /// return a future that will eventually finish. This function returns once the
 /// future has finished.
-template <typename T>
-Result<T> RunSynchronously(FnOnce<Future<T>(Executor*)> get_future, bool use_threads) {
+template <typename Fut, typename ValueType = typename Fut::ValueType>
+typename Fut::SyncType RunSynchronously(FnOnce<Fut(Executor*)> get_future,
+                                        bool use_threads) {
   if (use_threads) {
-    return std::move(get_future)(GetCpuThreadPool()).result();
+    auto fut = std::move(get_future)(GetCpuThreadPool());
+    return FutureToSync(fut);
   } else {
-    return SerialExecutor::RunInSerialExecutor<T>(std::move(get_future));
+    return SerialExecutor::RunInSerialExecutor<ValueType>(std::move(get_future));
   }
 }
 
-ARROW_EXPORT Status RunSynchronouslyVoid(
-    FnOnce<Future<arrow::detail::Empty>(Executor*)> get_future, bool use_threads);
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/thread_pool_benchmark.cc b/cpp/src/arrow/util/thread_pool_benchmark.cc
index b02ca9bb54b..7c342c47fb1 100644
--- a/cpp/src/arrow/util/thread_pool_benchmark.cc
+++ b/cpp/src/arrow/util/thread_pool_benchmark.cc
@@ -110,8 +110,8 @@ static void RunInSerialExecutor(benchmark::State& state) {  // NOLINT non-const
   Workload workload(workload_size);
 
   for (auto _ : state) {
-    ABORT_NOT_OK(SerialExecutor::RunInSerialExecutor<arrow::detail::Empty>(
-        [&](internal::Executor* executor) {
+    ABORT_NOT_OK(
+        SerialExecutor::RunInSerialExecutor<Future<>>([&](internal::Executor* executor) {
           return DeferNotOk(executor->Submit(std::ref(workload)));
         }));
   }
@@ -136,7 +136,7 @@ static void ThreadPoolSubmit(benchmark::State& state) {  // NOLINT non-const ref
 
     for (int32_t i = 0; i < nspawns; ++i) {
       // Pass the task by reference to avoid copying it around
-      (void)DeferNotOk(pool->Submit(std::ref(workload))).Then([&](...) {
+      (void)DeferNotOk(pool->Submit(std::ref(workload))).Then([&]() {
         n_finished.fetch_add(1);
       });
     }
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 9926ac1a7a4..bac6baf839f 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -133,7 +133,7 @@ class TestRunSynchronously : public testing::TestWithParam<bool> {
   }
 
   Status RunVoid(FnOnce<Future<>(Executor*)> top_level_task) {
-    return RunSynchronouslyVoid(std::move(top_level_task), UseThreads());
+    return RunSynchronously(std::move(top_level_task), UseThreads());
   }
 
   void TestContinueAfterExternal(bool transfer_to_main_thread) {
@@ -141,7 +141,7 @@ class TestRunSynchronously : public testing::TestWithParam<bool> {
     EXPECT_OK_AND_ASSIGN(auto external_pool, ThreadPool::Make(1));
     auto top_level_task = [&](Executor* executor) {
       struct Callback {
-        Status operator()(...) {
+        Status operator()() {
           *continuation_ran = true;
           return Status::OK();
         }
@@ -166,7 +166,7 @@ TEST_P(TestRunSynchronously, SimpleRun) {
   auto task = [&](Executor* executor) {
     EXPECT_NE(executor, nullptr);
     task_ran = true;
-    return Future<>::MakeFinished(Status::OK());
+    return Future<>::MakeFinished();
   };
   ASSERT_OK(RunVoid(std::move(task)));
   EXPECT_TRUE(task_ran);
@@ -189,11 +189,7 @@ TEST_P(TestRunSynchronously, SpawnMoreNested) {
   auto top_level_task = [&](Executor* executor) -> Future<> {
     auto fut_a = DeferNotOk(executor->Submit([&] { nested_ran++; }));
     auto fut_b = DeferNotOk(executor->Submit([&] { nested_ran++; }));
-    return AllComplete({fut_a, fut_b})
-        .Then([&](const Result<arrow::detail::Empty>& result) {
-          nested_ran++;
-          return result;
-        });
+    return AllComplete({fut_a, fut_b}).Then([&]() { nested_ran++; });
   };
   ASSERT_OK(RunVoid(std::move(top_level_task)));
   EXPECT_EQ(nested_ran, 3);
diff --git a/cpp/src/arrow/util/type_fwd.h b/cpp/src/arrow/util/type_fwd.h
index f5d01518862..ca107c2c69d 100644
--- a/cpp/src/arrow/util/type_fwd.h
+++ b/cpp/src/arrow/util/type_fwd.h
@@ -19,11 +19,11 @@
 
 namespace arrow {
 
-namespace detail {
+namespace internal {
 struct Empty;
-}  // namespace detail
+}  // namespace internal
 
-template <typename T = detail::Empty>
+template <typename T = internal::Empty>
 class WeakFuture;
 class FutureWaiter;
 

From b38ab81cb96e393a026d05a22e5a2f62ff6c23d7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 17 May 2021 10:54:55 -0400
Subject: [PATCH 259/719] ARROW-12806: [Python]
 test_write_to_dataset_filesystem missing a dataset mark

Closes #10346 from jorisvandenbossche/ARROW-12806

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/tests/parquet/test_dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 70ea37b5af0..8cff6954cf2 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1404,6 +1404,7 @@ def partition_filename_callback(keys):
     assert sorted(expected_basenames) == sorted(output_basenames)
 
 
+@pytest.mark.dataset
 @pytest.mark.pandas
 def test_write_to_dataset_filesystem(tempdir):
     df = pd.DataFrame({'A': [1, 2, 3]})

From 1dcbfa39d7f0c3643bd1a4b06b5a308a147b1b46 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 17 May 2021 10:09:08 -0500
Subject: [PATCH 260/719] ARROW-12785: [CI] the r-devdocs build errors when
 brew installing gcc

Closes #10328 from jonkeane/ARROW-12785-devdocs-gcc

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 dev/tasks/r/github.devdocs.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/r/github.devdocs.yml b/dev/tasks/r/github.devdocs.yml
index 1224a2555c8..e4d6bfb6953 100644
--- a/dev/tasks/r/github.devdocs.yml
+++ b/dev/tasks/r/github.devdocs.yml
@@ -57,6 +57,8 @@ jobs:
           pkgs <- installed.packages()[, "Package"]
           sessioninfo::session_info(pkgs, include_base = TRUE)
         shell: Rscript {0}
+      - name: Remove system gfortran so that brew can install gcc successfully
+        run: rm -f /usr/local/bin/gfortran
       - name: Write the install script
         env:
           RUN_DEVDOCS: TRUE
@@ -65,7 +67,7 @@ jobs:
           DEVDOCS_SYSTEM_INSTALL: {{ "${{contains(matrix.system-install, 'true')}}" }}
           DEVDOCS_PRIOR_SYSTEM_INSTALL: {{ "${{contains(matrix.system-install, 'old')}}" }}
         run: |
-          # This isn't actually rendering the docs, but will save arrow/r/vignettes/script.sh 
+          # This isn't actually rendering the docs, but will save arrow/r/vignettes/script.sh
           # which can be sourced to install arrow.
           rmarkdown::render("arrow/r/vignettes/developing.Rmd")
         shell: Rscript {0}

From 9ca0c43dd77b953134cf6c182f0c40651c2a5b79 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 17 May 2021 13:14:00 -0400
Subject: [PATCH 261/719] ARROW-12807: [C++] Fix build errors in IPC reader

ARROW-12004 and ARROW-11772 merge cleanly but conflict with each other.

Closes #10347 from lidavidm/arrow-12807

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/ipc/reader.cc | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 7c3115b7c3f..7c26bce913d 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -1134,15 +1134,14 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     footer_offset_ = footer_offset;
     auto cpu_executor = ::arrow::internal::GetCpuThreadPool();
     auto self = std::dynamic_pointer_cast<RecordBatchFileReaderImpl>(shared_from_this());
-    return ReadFooterAsync(cpu_executor)
-        .Then([self, options](const detail::Empty&) -> Status {
-          // Get the schema and record any observed dictionaries
-          RETURN_NOT_OK(UnpackSchemaMessage(
-              self->footer_->schema(), options, &self->dictionary_memo_, &self->schema_,
-              &self->out_schema_, &self->field_inclusion_mask_, &self->swap_endian_));
-          ++self->stats_.num_messages;
-          return Status::OK();
-        });
+    return ReadFooterAsync(cpu_executor).Then([self, options]() -> Status {
+      // Get the schema and record any observed dictionaries
+      RETURN_NOT_OK(UnpackSchemaMessage(
+          self->footer_->schema(), options, &self->dictionary_memo_, &self->schema_,
+          &self->out_schema_, &self->field_inclusion_mask_, &self->swap_endian_));
+      ++self->stats_.num_messages;
+      return Status::OK();
+    });
   }
 
   std::shared_ptr<Schema> schema() const override { return out_schema_; }
@@ -1348,16 +1347,14 @@ Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
     const IpcReadOptions& options) {
   auto result = std::make_shared<RecordBatchFileReaderImpl>();
   return result->OpenAsync(file, footer_offset, options)
-      .Then(
-          [=](...) -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
 }
 
 Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
     io::RandomAccessFile* file, int64_t footer_offset, const IpcReadOptions& options) {
   auto result = std::make_shared<RecordBatchFileReaderImpl>();
   return result->OpenAsync(file, footer_offset, options)
-      .Then(
-          [=](...) -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
+      .Then([=]() -> Result<std::shared_ptr<RecordBatchFileReader>> { return result; });
 }
 
 Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator()() {
@@ -1385,8 +1382,7 @@ Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator(
   }
   auto block = FileBlockFromFlatbuffer(state->footer_->recordBatches()->Get(index_++));
   auto read_message = ReadBlock(block);
-  auto read_messages = read_dictionaries_.Then(
-      [read_message](const detail::Empty&) { return read_message; });
+  auto read_messages = read_dictionaries_.Then([read_message]() { return read_message; });
   // Force transfer. This may be wasteful in some cases, but ensures we get off the
   // I/O threads as soon as possible, and ensures we don't decode record batches
   // synchronously in the case that the message read has already finished.
@@ -1410,8 +1406,7 @@ Future<std::shared_ptr<Message>> IpcFileRecordBatchGenerator::ReadBlock(
     io::ReadRange range{block.offset, block.metadata_length + block.body_length};
     auto pool = state_->options_.memory_pool;
     return cached_source->WaitFor({range}).Then(
-        [cached_source, pool,
-         range](const detail::Empty&) -> Result<std::shared_ptr<Message>> {
+        [cached_source, pool, range]() -> Result<std::shared_ptr<Message>> {
           ARROW_ASSIGN_OR_RAISE(auto buffer, cached_source->Read(range));
           io::BufferReader stream(std::move(buffer));
           return ReadMessage(&stream, pool);

From 868814164119431a082249c32ce7958374698e01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 17 May 2021 20:10:39 +0200
Subject: [PATCH 262/719] ARROW-12619: [Python] pyarrow sdist should not
 require git
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a fallback_version configuration option for setuptools_scm which we don't use: https://github.com/pypa/setuptools_scm#configuration-parameters

Although this setting seems to have issues according to https://github.com/pypa/setuptools_scm/issues/549
We already have a workaround in setup.py for the functionality of the fallback_version option, but it is disabled for the case of sdist: https://github.com/apache/arrow/blob/master/python/setup.py#L529

Closes #10342 from kszucs/ARROW-12619

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/python_sdist_test.sh | 8 +++++++-
 docker-compose.yml              | 7 ++++---
 python/setup.py                 | 8 ++------
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
index 1388ca09e43..3dd7d7ddd5b 100755
--- a/ci/scripts/python_sdist_test.sh
+++ b/ci/scripts/python_sdist_test.sh
@@ -42,10 +42,16 @@ export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
 # unset ARROW_HOME
 # apt purge -y pkg-config
 
+# ARROW-12619
+if command -v git &> /dev/null; then
+  echo "Git exists, remove it from PATH before executing this script."
+  exit 1
+fi
+
 if [ -n "${PYARROW_VERSION:-}" ]; then
   sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
 else
-  sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1)
+  sdist=$(ls ${arrow_dir}/python/dist/pyarrow-*.tar.gz | sort -r | head -n1)
 fi
 ${PYTHON:-python} -m pip install ${sdist}
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 53500b5f2cc..215aa2c6b7a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -713,6 +713,7 @@ services:
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
+        apt remove -y git &&
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_sdist_test.sh /arrow"
 
@@ -814,9 +815,9 @@ services:
         source: .
         target: "C:/arrow"
     command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
-  
+
   java-bundled-jars:
-    # Docker image 
+    # Docker image
     image: ${REPO}:${ARCH}-java-bundled-jars-vcpkg-${VCPKG}
     build:
       args:
@@ -831,7 +832,7 @@ services:
     volumes:
       - .:/arrow:delegated
       - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
-    command: 
+    command:
       [/arrow/ci/scripts/java_bundled_jars_manylinux_build.sh /arrow /build /arrow/dist]
 
   ##############################  Integration #################################
diff --git a/python/setup.py b/python/setup.py
index fac8e0b32e3..80b6d70dd08 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -525,12 +525,8 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
 default_version = '5.0.0-SNAPSHOT'
 if (not os.path.exists('../.git') and
         not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
-    if os.path.exists('PKG-INFO'):
-        # We're probably in a Python sdist, setuptools_scm will handle fine
-        pass
-    else:
-        os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
-            default_version.replace('-SNAPSHOT', 'a0')
+    os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
+        default_version.replace('-SNAPSHOT', 'a0')
 
 
 # See https://github.com/pypa/setuptools_scm#configuration-parameters

From 9d39b10f51e494d8ba46f17090dbe81bf87c2fa0 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 17 May 2021 15:25:08 -0700
Subject: [PATCH 263/719] ARROW-12689: [R] Implement ArrowArrayStream C
 interface

Includes Scanner$ToRecordBatchReader() binding (added to C++ in #10268), and also fills out the other import/export methods in the C bridge (Type, Field).

Closes #10307 from nealrichardson/r-c-stream

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NEWS.md                       |   6 +
 r/R/arrow-package.R             |   3 +-
 r/R/arrowExports.R              |  68 ++++++---
 r/R/dataset-scan.R              |   1 +
 r/R/python.R                    |  65 ++++++++
 r/R/record-batch-reader.R       |  13 +-
 r/src/arrowExports.cpp          | 254 ++++++++++++++++++++++++--------
 r/src/dataset.cpp               |   6 +
 r/src/py-to-r.cpp               |  48 +++++-
 r/src/recordbatchreader.cpp     |  40 ++---
 r/tests/testthat/test-dataset.R |  15 ++
 r/tests/testthat/test-python.R  |  47 +++++-
 12 files changed, 440 insertions(+), 126 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 8e5d00ed623..d416aa34dd3 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,6 +19,12 @@
 
 # arrow 4.0.0.9000
 
+* `write_csv_arrow()` to write Arrow data to CSV
+* Bindings and support for more Arrow C++ Compute functions: `strsplit()` and `str_split()`, `na.omit()` et al., `any()`/`all()`,
+* `arrow_info()` now includes details on the C++ build, such as compiler version
+* `dplyr` queries on `Table` and `RecordBatch` now use the same expression internals as `Dataset` (via `InMemoryDataset`). Among other (mostly internal) benefits that come with this, the print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
+* Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
+
 # arrow 4.0.0.1
 
  * The mimalloc memory allocator is the default memory allocator when using a static source build of the package on Linux. This is because it has better behavior under valgrind than jemalloc does. A full-featured build (installed with `LIBARROW_MINIMAL=false`) includes both jemalloc and mimalloc, and it has still has jemalloc as default, though this is configurable at runtime with the `ARROW_DEFAULT_MEMORY_POOL` environment variable.
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 9e8d629e08a..e557f869325 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -42,7 +42,8 @@
   }
   s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
 
-  for (cl in c("Array", "RecordBatch", "ChunkedArray", "Table", "Schema")) {
+  for (cl in c("Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
+               "Field", "DataType", "RecordBatchReader")) {
     s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
     s3_register("reticulate::r_to_py", cl)
   }
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index c026c72899f..8477f949f1b 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -528,6 +528,10 @@ dataset___Scanner__ScanBatches <- function(scanner){
     .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
 }
 
+dataset___Scanner__ToRecordBatchReader <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
+}
+
 dataset___Scanner__head <- function(scanner, n){
     .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
@@ -1288,6 +1292,30 @@ parquet___arrow___FileReader__GetSchema <- function(reader){
     .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
 }
 
+allocate_arrow_schema <- function(){
+    .Call(`_arrow_allocate_arrow_schema`)
+}
+
+delete_arrow_schema <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+}
+
+allocate_arrow_array <- function(){
+    .Call(`_arrow_allocate_arrow_array`)
+}
+
+delete_arrow_array <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+}
+
+allocate_arrow_array_stream <- function(){
+    .Call(`_arrow_allocate_arrow_array_stream`)
+}
+
+delete_arrow_array_stream <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
+}
+
 ImportArray <- function(array, schema){
     .Call(`_arrow_ImportArray`, array, schema)
 }
@@ -1300,26 +1328,26 @@ ImportSchema <- function(schema){
     .Call(`_arrow_ImportSchema`, schema)
 }
 
-allocate_arrow_schema <- function(){
-    .Call(`_arrow_allocate_arrow_schema`)
-}
-
-delete_arrow_schema <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+ImportField <- function(field){
+    .Call(`_arrow_ImportField`, field)
 }
 
-allocate_arrow_array <- function(){
-    .Call(`_arrow_allocate_arrow_array`)
+ImportType <- function(type){
+    .Call(`_arrow_ImportType`, type)
 }
 
-delete_arrow_array <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+ImportRecordBatchReader <- function(stream){
+    .Call(`_arrow_ImportRecordBatchReader`, stream)
 }
 
 ExportType <- function(type, ptr){
     invisible(.Call(`_arrow_ExportType`, type, ptr))
 }
 
+ExportField <- function(field, ptr){
+    invisible(.Call(`_arrow_ExportField`, field, ptr))
+}
+
 ExportSchema <- function(schema, ptr){
     invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
 }
@@ -1332,6 +1360,10 @@ ExportRecordBatch <- function(batch, array_ptr, schema_ptr){
     invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
 }
 
+ExportRecordBatchReader <- function(reader, stream_ptr){
+    invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
+}
+
 vec_to_arrow <- function(x, s_type){
     .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
@@ -1428,12 +1460,16 @@ RecordBatchReader__ReadNext <- function(reader){
     .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
 }
 
-ipc___RecordBatchStreamReader__Open <- function(stream){
-    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+RecordBatchReader__batches <- function(reader){
+    .Call(`_arrow_RecordBatchReader__batches`, reader)
+}
+
+Table__from_RecordBatchReader <- function(reader){
+    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
 }
 
-ipc___RecordBatchStreamReader__batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchStreamReader__batches`, reader)
+ipc___RecordBatchStreamReader__Open <- function(stream){
+    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
 ipc___RecordBatchFileReader__schema <- function(reader){
@@ -1452,10 +1488,6 @@ ipc___RecordBatchFileReader__Open <- function(file){
     .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
 }
 
-Table__from_RecordBatchReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
-}
-
 Table__from_RecordBatchFileReader <- function(reader){
     .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
 }
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index a73bfb3dd74..43e72456dc0 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -57,6 +57,7 @@ Scanner <- R6Class("Scanner", inherit = ArrowObject,
   public = list(
     ToTable = function() dataset___Scanner__ToTable(self),
     ScanBatches = function() dataset___Scanner__ScanBatches(self),
+    ToRecordBatchReader = function() dataset___Scanner__ToRecordBatchReader(self),
     CountRows = function() dataset___Scanner__CountRows(self)
   ),
   active = list(
diff --git a/r/R/python.R b/r/R/python.R
index b200d939a96..52e4bcd7ac8 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -124,6 +124,71 @@ r_to_py.Schema <- function(x, convert = FALSE) {
   out
 }
 
+py_to_r.pyarrow.lib.Field <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  ImportField(schema_ptr)
+}
+
+r_to_py.Field <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  ExportField(x, schema_ptr)
+  out <- pa$Field$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.DataType <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  ImportType(schema_ptr)
+}
+
+r_to_py.DataType <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  ExportType(x, schema_ptr)
+  out <- pa$DataType$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.RecordBatchReader <- function(x, ...) {
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+
+  x$`_export_to_c`(stream_ptr)
+  ImportRecordBatchReader(stream_ptr)
+}
+
+r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  ExportRecordBatchReader(x, stream_ptr)
+  # TODO: handle subclasses of RecordBatchReader?
+  out <- pa$lib$RecordBatchReader$`_import_from_c`(stream_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+
 maybe_py_to_r <- function(x) {
   if (inherits(x, "python.builtin.object")) {
     # Depending on some auto-convert behavior, x may already be converted
diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R
index 119ebd67fd9..b4daa8b24e3 100644
--- a/r/R/record-batch-reader.R
+++ b/r/R/record-batch-reader.R
@@ -94,9 +94,9 @@
 #' }
 RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
   public = list(
-    read_next_batch = function() {
-      RecordBatchReader__ReadNext(self)
-    }
+    read_next_batch = function() RecordBatchReader__ReadNext(self),
+    batches = function() RecordBatchReader__batches(self),
+    read_table = function() Table__from_RecordBatchReader(self)
   ),
   active = list(
     schema = function() RecordBatchReader__schema(self)
@@ -107,12 +107,7 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
 #' @usage NULL
 #' @format NULL
 #' @export
-RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader,
-  public = list(
-    batches = function() ipc___RecordBatchStreamReader__batches(self),
-    read_table = function() Table__from_RecordBatchReader(self)
-  )
-)
+RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader)
 RecordBatchStreamReader$create <- function(stream) {
   if (inherits(stream, c("raw", "Buffer"))) {
     # TODO: deprecate this because it doesn't close the connection to the Buffer
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 9d75b2da54e..1ccfd593d2c 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2078,6 +2078,21 @@ extern "C" SEXP _arrow_dataset___Scanner__ScanBatches(SEXP scanner_sexp){
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+std::shared_ptr<arrow::RecordBatchReader> dataset___Scanner__ToRecordBatchReader(const std::shared_ptr<ds::Scanner>& scanner);
+extern "C" SEXP _arrow_dataset___Scanner__ToRecordBatchReader(SEXP scanner_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::Scanner>&>::type scanner(scanner_sexp);
+	return cpp11::as_sexp(dataset___Scanner__ToRecordBatchReader(scanner));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___Scanner__ToRecordBatchReader(SEXP scanner_sexp){
+	Rf_error("Cannot call dataset___Scanner__ToRecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<arrow::Table> dataset___Scanner__head(const std::shared_ptr<ds::Scanner>& scanner, int n);
@@ -5051,6 +5066,96 @@ extern "C" SEXP _arrow_parquet___arrow___FileReader__GetSchema(SEXP reader_sexp)
 }
 #endif
 
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema();
+extern "C" SEXP _arrow_allocate_arrow_schema(){
+BEGIN_CPP11
+	return cpp11::as_sexp(allocate_arrow_schema());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_allocate_arrow_schema(){
+	Rf_error("Cannot call allocate_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr);
+extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
+	delete_arrow_schema(ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
+	Rf_error("Cannot call delete_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::r::Pointer<struct ArrowArray> allocate_arrow_array();
+extern "C" SEXP _arrow_allocate_arrow_array(){
+BEGIN_CPP11
+	return cpp11::as_sexp(allocate_arrow_array());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_allocate_arrow_array(){
+	Rf_error("Cannot call allocate_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr);
+extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArray>>::type ptr(ptr_sexp);
+	delete_arrow_array(ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
+	Rf_error("Cannot call delete_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::r::Pointer<struct ArrowArrayStream> allocate_arrow_array_stream();
+extern "C" SEXP _arrow_allocate_arrow_array_stream(){
+BEGIN_CPP11
+	return cpp11::as_sexp(allocate_arrow_array_stream());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_allocate_arrow_array_stream(){
+	Rf_error("Cannot call allocate_arrow_array_stream(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void delete_arrow_array_stream(arrow::r::Pointer<struct ArrowArrayStream> ptr);
+extern "C" SEXP _arrow_delete_arrow_array_stream(SEXP ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArrayStream>>::type ptr(ptr_sexp);
+	delete_arrow_array_stream(ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_delete_arrow_array_stream(SEXP ptr_sexp){
+	Rf_error("Cannot call delete_arrow_array_stream(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Array> ImportArray(arrow::r::Pointer<struct ArrowArray> array, arrow::r::Pointer<struct ArrowSchema> schema);
@@ -5100,78 +5205,80 @@ extern "C" SEXP _arrow_ImportSchema(SEXP schema_sexp){
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema();
-extern "C" SEXP _arrow_allocate_arrow_schema(){
+std::shared_ptr<arrow::Field> ImportField(arrow::r::Pointer<struct ArrowSchema> field);
+extern "C" SEXP _arrow_ImportField(SEXP field_sexp){
 BEGIN_CPP11
-	return cpp11::as_sexp(allocate_arrow_schema());
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type field(field_sexp);
+	return cpp11::as_sexp(ImportField(field));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_allocate_arrow_schema(){
-	Rf_error("Cannot call allocate_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ImportField(SEXP field_sexp){
+	Rf_error("Cannot call ImportField(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr);
-extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
+std::shared_ptr<arrow::DataType> ImportType(arrow::r::Pointer<struct ArrowSchema> type);
+extern "C" SEXP _arrow_ImportType(SEXP type_sexp){
 BEGIN_CPP11
-	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
-	delete_arrow_schema(ptr);
-	return R_NilValue;
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type type(type_sexp);
+	return cpp11::as_sexp(ImportType(type));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_delete_arrow_schema(SEXP ptr_sexp){
-	Rf_error("Cannot call delete_arrow_schema(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ImportType(SEXP type_sexp){
+	Rf_error("Cannot call ImportType(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-arrow::r::Pointer<struct ArrowArray> allocate_arrow_array();
-extern "C" SEXP _arrow_allocate_arrow_array(){
+std::shared_ptr<arrow::RecordBatchReader> ImportRecordBatchReader(arrow::r::Pointer<struct ArrowArrayStream> stream);
+extern "C" SEXP _arrow_ImportRecordBatchReader(SEXP stream_sexp){
 BEGIN_CPP11
-	return cpp11::as_sexp(allocate_arrow_array());
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArrayStream>>::type stream(stream_sexp);
+	return cpp11::as_sexp(ImportRecordBatchReader(stream));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_allocate_arrow_array(){
-	Rf_error("Cannot call allocate_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ImportRecordBatchReader(SEXP stream_sexp){
+	Rf_error("Cannot call ImportRecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr);
-extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
+void ExportType(const std::shared_ptr<arrow::DataType>& type, arrow::r::Pointer<struct ArrowSchema> ptr);
+extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
 BEGIN_CPP11
-	arrow::r::Input<arrow::r::Pointer<struct ArrowArray>>::type ptr(ptr_sexp);
-	delete_arrow_array(ptr);
+	arrow::r::Input<const std::shared_ptr<arrow::DataType>&>::type type(type_sexp);
+	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
+	ExportType(type, ptr);
 	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_delete_arrow_array(SEXP ptr_sexp){
-	Rf_error("Cannot call delete_arrow_array(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
+	Rf_error("Cannot call ExportType(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // py-to-r.cpp
 #if defined(ARROW_R_WITH_ARROW)
-void ExportType(const std::shared_ptr<arrow::DataType>& type, arrow::r::Pointer<struct ArrowSchema> ptr);
-extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
+void ExportField(const std::shared_ptr<arrow::Field>& field, arrow::r::Pointer<struct ArrowSchema> ptr);
+extern "C" SEXP _arrow_ExportField(SEXP field_sexp, SEXP ptr_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::DataType>&>::type type(type_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<arrow::r::Pointer<struct ArrowSchema>>::type ptr(ptr_sexp);
-	ExportType(type, ptr);
+	ExportField(field, ptr);
 	return R_NilValue;
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ExportType(SEXP type_sexp, SEXP ptr_sexp){
-	Rf_error("Cannot call ExportType(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ExportField(SEXP field_sexp, SEXP ptr_sexp){
+	Rf_error("Cannot call ExportField(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -5228,6 +5335,23 @@ extern "C" SEXP _arrow_ExportRecordBatch(SEXP batch_sexp, SEXP array_ptr_sexp, S
 }
 #endif
 
+// py-to-r.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void ExportRecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader, arrow::r::Pointer<struct ArrowArrayStream> stream_ptr);
+extern "C" SEXP _arrow_ExportRecordBatchReader(SEXP reader_sexp, SEXP stream_ptr_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
+	arrow::r::Input<arrow::r::Pointer<struct ArrowArrayStream>>::type stream_ptr(stream_ptr_sexp);
+	ExportRecordBatchReader(reader, stream_ptr);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExportRecordBatchReader(SEXP reader_sexp, SEXP stream_ptr_sexp){
+	Rf_error("Cannot call ExportRecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // r_to_arrow.cpp
 #if defined(ARROW_R_WITH_ARROW)
 SEXP vec_to_arrow(SEXP x, SEXP s_type);
@@ -5613,31 +5737,46 @@ extern "C" SEXP _arrow_RecordBatchReader__ReadNext(SEXP reader_sexp){
 
 // recordbatchreader.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::ipc::RecordBatchStreamReader> ipc___RecordBatchStreamReader__Open(const std::shared_ptr<arrow::io::InputStream>& stream);
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
+cpp11::list RecordBatchReader__batches(const std::shared_ptr<arrow::RecordBatchReader>& reader);
+extern "C" SEXP _arrow_RecordBatchReader__batches(SEXP reader_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::io::InputStream>&>::type stream(stream_sexp);
-	return cpp11::as_sexp(ipc___RecordBatchStreamReader__Open(stream));
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
+	return cpp11::as_sexp(RecordBatchReader__batches(reader));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
-	Rf_error("Cannot call ipc___RecordBatchStreamReader__Open(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_RecordBatchReader__batches(SEXP reader_sexp){
+	Rf_error("Cannot call RecordBatchReader__batches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// recordbatchreader.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader);
+extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
+	return cpp11::as_sexp(Table__from_RecordBatchReader(reader));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
+	Rf_error("Cannot call Table__from_RecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // recordbatchreader.cpp
 #if defined(ARROW_R_WITH_ARROW)
-cpp11::list ipc___RecordBatchStreamReader__batches(const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& reader);
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP reader_sexp){
+std::shared_ptr<arrow::ipc::RecordBatchStreamReader> ipc___RecordBatchStreamReader__Open(const std::shared_ptr<arrow::io::InputStream>& stream);
+extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
 BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>&>::type reader(reader_sexp);
-	return cpp11::as_sexp(ipc___RecordBatchStreamReader__batches(reader));
+	arrow::r::Input<const std::shared_ptr<arrow::io::InputStream>&>::type stream(stream_sexp);
+	return cpp11::as_sexp(ipc___RecordBatchStreamReader__Open(stream));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP reader_sexp){
-	Rf_error("Cannot call ipc___RecordBatchStreamReader__batches(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){
+	Rf_error("Cannot call ipc___RecordBatchStreamReader__Open(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -5702,21 +5841,6 @@ extern "C" SEXP _arrow_ipc___RecordBatchFileReader__Open(SEXP file_sexp){
 }
 #endif
 
-// recordbatchreader.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader);
-extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
-BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::RecordBatchReader>&>::type reader(reader_sexp);
-	return cpp11::as_sexp(Table__from_RecordBatchReader(reader));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_Table__from_RecordBatchReader(SEXP reader_sexp){
-	Rf_error("Cannot call Table__from_RecordBatchReader(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // recordbatchreader.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Table> Table__from_RecordBatchFileReader(const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader);
@@ -6832,6 +6956,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, 
 		{ "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, 
 		{ "_arrow_dataset___Scanner__ScanBatches", (DL_FUNC) &_arrow_dataset___Scanner__ScanBatches, 1}, 
+		{ "_arrow_dataset___Scanner__ToRecordBatchReader", (DL_FUNC) &_arrow_dataset___Scanner__ToRecordBatchReader, 1}, 
 		{ "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, 
 		{ "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, 
 		{ "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, 
@@ -7022,17 +7147,24 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, 
 		{ "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, 
 		{ "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, 
-		{ "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, 
-		{ "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, 
-		{ "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, 
 		{ "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, 
 		{ "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, 
 		{ "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, 
 		{ "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, 
+		{ "_arrow_allocate_arrow_array_stream", (DL_FUNC) &_arrow_allocate_arrow_array_stream, 0}, 
+		{ "_arrow_delete_arrow_array_stream", (DL_FUNC) &_arrow_delete_arrow_array_stream, 1}, 
+		{ "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, 
+		{ "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, 
+		{ "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, 
+		{ "_arrow_ImportField", (DL_FUNC) &_arrow_ImportField, 1}, 
+		{ "_arrow_ImportType", (DL_FUNC) &_arrow_ImportType, 1}, 
+		{ "_arrow_ImportRecordBatchReader", (DL_FUNC) &_arrow_ImportRecordBatchReader, 1}, 
 		{ "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, 
+		{ "_arrow_ExportField", (DL_FUNC) &_arrow_ExportField, 2}, 
 		{ "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, 
 		{ "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, 
 		{ "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, 
+		{ "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2}, 
 		{ "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, 
 		{ "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, 
 		{ "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, 
@@ -7057,13 +7189,13 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, 
 		{ "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, 
 		{ "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, 
+		{ "_arrow_RecordBatchReader__batches", (DL_FUNC) &_arrow_RecordBatchReader__batches, 1}, 
+		{ "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, 
 		{ "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, 
-		{ "_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, 
 		{ "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, 
-		{ "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, 
 		{ "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, 
 		{ "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, 
 		{ "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 205423318c0..c419c69ffce 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -435,6 +435,12 @@ cpp11::list dataset___Scanner__ScanBatches(const std::shared_ptr<ds::Scanner>& s
   return arrow::r::to_r_list(batches);
 }
 
+// [[dataset::export]]
+std::shared_ptr<arrow::RecordBatchReader> dataset___Scanner__ToRecordBatchReader(
+    const std::shared_ptr<ds::Scanner>& scanner) {
+  return ValueOrStop(scanner->ToRecordBatchReader());
+}
+
 // [[dataset::export]]
 std::shared_ptr<arrow::Table> dataset___Scanner__head(
     const std::shared_ptr<ds::Scanner>& scanner, int n) {
diff --git a/r/src/py-to-r.cpp b/r/src/py-to-r.cpp
index a571cfaab9a..80cd65c5171 100644
--- a/r/src/py-to-r.cpp
+++ b/r/src/py-to-r.cpp
@@ -21,6 +21,26 @@
 
 #include <arrow/c/bridge.h>
 
+// [[arrow::export]]
+arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema() { return {}; }
+
+// [[arrow::export]]
+void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr) { ptr.finalize(); }
+
+// [[arrow::export]]
+arrow::r::Pointer<struct ArrowArray> allocate_arrow_array() { return {}; }
+
+// [[arrow::export]]
+void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr) { ptr.finalize(); }
+
+// [[arrow::export]]
+arrow::r::Pointer<struct ArrowArrayStream> allocate_arrow_array_stream() { return {}; }
+
+// [[arrow::export]]
+void delete_arrow_array_stream(arrow::r::Pointer<struct ArrowArrayStream> ptr) {
+  ptr.finalize();
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::Array> ImportArray(arrow::r::Pointer<struct ArrowArray> array,
                                           arrow::r::Pointer<struct ArrowSchema> schema) {
@@ -41,16 +61,20 @@ std::shared_ptr<arrow::Schema> ImportSchema(
 }
 
 // [[arrow::export]]
-arrow::r::Pointer<struct ArrowSchema> allocate_arrow_schema() { return {}; }
-
-// [[arrow::export]]
-void delete_arrow_schema(arrow::r::Pointer<struct ArrowSchema> ptr) { ptr.finalize(); }
+std::shared_ptr<arrow::Field> ImportField(arrow::r::Pointer<struct ArrowSchema> field) {
+  return ValueOrStop(arrow::ImportField(field));
+}
 
 // [[arrow::export]]
-arrow::r::Pointer<struct ArrowArray> allocate_arrow_array() { return {}; }
+std::shared_ptr<arrow::DataType> ImportType(arrow::r::Pointer<struct ArrowSchema> type) {
+  return ValueOrStop(arrow::ImportType(type));
+}
 
 // [[arrow::export]]
-void delete_arrow_array(arrow::r::Pointer<struct ArrowArray> ptr) { ptr.finalize(); }
+std::shared_ptr<arrow::RecordBatchReader> ImportRecordBatchReader(
+    arrow::r::Pointer<struct ArrowArrayStream> stream) {
+  return ValueOrStop(arrow::ImportRecordBatchReader(stream));
+}
 
 // [[arrow::export]]
 void ExportType(const std::shared_ptr<arrow::DataType>& type,
@@ -58,6 +82,12 @@ void ExportType(const std::shared_ptr<arrow::DataType>& type,
   StopIfNotOk(arrow::ExportType(*type, ptr));
 }
 
+// [[arrow::export]]
+void ExportField(const std::shared_ptr<arrow::Field>& field,
+                 arrow::r::Pointer<struct ArrowSchema> ptr) {
+  StopIfNotOk(arrow::ExportField(*field, ptr));
+}
+
 // [[arrow::export]]
 void ExportSchema(const std::shared_ptr<arrow::Schema>& schema,
                   arrow::r::Pointer<struct ArrowSchema> ptr) {
@@ -78,4 +108,10 @@ void ExportRecordBatch(const std::shared_ptr<arrow::RecordBatch>& batch,
   StopIfNotOk(arrow::ExportRecordBatch(*batch, array_ptr, schema_ptr));
 }
 
+// [[arrow::export]]
+void ExportRecordBatchReader(const std::shared_ptr<arrow::RecordBatchReader>& reader,
+                             arrow::r::Pointer<struct ArrowArrayStream> stream_ptr) {
+  StopIfNotOk(arrow::ExportRecordBatchReader(reader, stream_ptr));
+}
+
 #endif
diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp
index e2819daf89b..14af503b4a1 100644
--- a/r/src/recordbatchreader.cpp
+++ b/r/src/recordbatchreader.cpp
@@ -35,6 +35,22 @@ std::shared_ptr<arrow::RecordBatch> RecordBatchReader__ReadNext(
   return batch;
 }
 
+// [[arrow::export]]
+cpp11::list RecordBatchReader__batches(
+    const std::shared_ptr<arrow::RecordBatchReader>& reader) {
+  std::vector<std::shared_ptr<arrow::RecordBatch>> res;
+  StopIfNotOk(reader->ReadAll(&res));
+  return arrow::r::to_r_list(res);
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(
+    const std::shared_ptr<arrow::RecordBatchReader>& reader) {
+  std::shared_ptr<arrow::Table> table = nullptr;
+  StopIfNotOk(reader->ReadAll(&table));
+  return table;
+}
+
 // -------- RecordBatchStreamReader
 
 // [[arrow::export]]
@@ -45,22 +61,6 @@ std::shared_ptr<arrow::ipc::RecordBatchStreamReader> ipc___RecordBatchStreamRead
   return ValueOrStop(arrow::ipc::RecordBatchStreamReader::Open(stream, options));
 }
 
-// [[arrow::export]]
-cpp11::list ipc___RecordBatchStreamReader__batches(
-    const std::shared_ptr<arrow::ipc::RecordBatchStreamReader>& reader) {
-  std::vector<std::shared_ptr<arrow::RecordBatch>> res;
-
-  while (true) {
-    std::shared_ptr<arrow::RecordBatch> batch;
-    StopIfNotOk(reader->ReadNext(&batch));
-    if (!batch) break;
-
-    res.push_back(batch);
-  }
-
-  return arrow::r::to_r_list(res);
-}
-
 // -------- RecordBatchFileReader
 
 // [[arrow::export]]
@@ -92,14 +92,6 @@ std::shared_ptr<arrow::ipc::RecordBatchFileReader> ipc___RecordBatchFileReader__
   return ValueOrStop(arrow::ipc::RecordBatchFileReader::Open(file, options));
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::Table> Table__from_RecordBatchReader(
-    const std::shared_ptr<arrow::RecordBatchReader>& reader) {
-  std::shared_ptr<arrow::Table> table = nullptr;
-  StopIfNotOk(reader->ReadAll(&table));
-  return table;
-}
-
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__from_RecordBatchFileReader(
     const std::shared_ptr<arrow::ipc::RecordBatchFileReader>& reader) {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 334ff6d06f7..ce9e5e84402 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1036,6 +1036,21 @@ test_that("Scanner$ScanBatches", {
   expect_equivalent(as.data.frame(table), rbind(df1, df2))
 })
 
+test_that("Scanner$ToRecordBatchReader()", {
+  ds <- open_dataset(dataset_dir, partitioning = "part")
+  scan <- ds %>%
+    filter(part == 1) %>%
+    select(int, lgl) %>%
+    filter(int > 6) %>%
+    Scanner$create()
+  reader <- scan$ToRecordBatchReader()
+  expect_r6_class(reader, "RecordBatchReader")
+  expect_identical(
+    as.data.frame(reader$read_table()),
+    df1[df1$int > 6, c("int", "lgl")]
+  )
+})
+
 expect_scan_result <- function(ds, schm) {
   sb <- ds$NewScan()
   expect_r6_class(sb, "ScannerBuilder")
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index 885274846e1..c3a9e269ad6 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -31,15 +31,15 @@ test_that("install_pyarrow", {
   reticulate::use_virtualenv("arrow-test")
 })
 
+skip_if_no_pyarrow()
+
 test_that("Array from Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow")
   py <- pa$array(c(1, 2, 3))
   expect_equal(py, Array$create(c(1, 2, 3)))
 })
 
 test_that("Array to Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow", convert = FALSE)
   r <- Array$create(c(1, 2, 3))
   py <- pa$concat_arrays(list(r))
@@ -48,7 +48,6 @@ test_that("Array to Python", {
 })
 
 test_that("RecordBatch to/from Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow", convert = FALSE)
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   py <- reticulate::r_to_py(batch)
@@ -57,7 +56,6 @@ test_that("RecordBatch to/from Python", {
 })
 
 test_that("Table and ChunkedArray from Python", {
-  skip_if_no_pyarrow()
   pa <- reticulate::import("pyarrow", convert = FALSE)
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   tab <- Table$create(batch, batch)
@@ -70,7 +68,6 @@ test_that("Table and ChunkedArray from Python", {
 })
 
 test_that("Table and ChunkedArray to Python", {
-  skip_if_no_pyarrow()
   batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
   tab <- Table$create(batch, batch)
 
@@ -84,7 +81,6 @@ test_that("Table and ChunkedArray to Python", {
 })
 
 test_that("RecordBatch with metadata roundtrip", {
-  skip_if_no_pyarrow()
   batch <- RecordBatch$create(example_with_times)
   pybatch <- reticulate::r_to_py(batch)
   expect_s3_class(pybatch, "pyarrow.lib.RecordBatch")
@@ -93,10 +89,47 @@ test_that("RecordBatch with metadata roundtrip", {
 })
 
 test_that("Table with metadata roundtrip", {
-  skip_if_no_pyarrow()
   tab <- Table$create(example_with_times)
   pytab <- reticulate::r_to_py(tab)
   expect_s3_class(pytab, "pyarrow.lib.Table")
   expect_equal(reticulate::py_to_r(pytab), tab)
   expect_identical(as.data.frame(reticulate::py_to_r(pytab)), example_with_times)
 })
+
+test_that("DataType roundtrip", {
+  r <- timestamp("ms", timezone = "Asia/Pyongyang")
+  py <- reticulate::r_to_py(r)
+  expect_s3_class(py, "pyarrow.lib.DataType")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("Field roundtrip", {
+  skip("TODO in pyarrow: 'pyarrow.lib.Field' has no attribute '_import_from_c'")
+  r <- field("x", time32("s"))
+  py <- reticulate::r_to_py(r)
+  expect_s3_class(py, "pyarrow.lib.Field")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("RecordBatchReader to python", {
+  library(dplyr)
+
+  tab <- Table$create(example_data)
+  scan <- tab %>%
+    select(int, lgl) %>%
+    filter(int > 6) %>%
+    Scanner$create()
+  reader <- scan$ToRecordBatchReader()
+  pyreader <- reticulate::r_to_py(reader)
+  expect_s3_class(pyreader, "pyarrow.lib.RecordBatchReader")
+  pytab <- pyreader$read_all()
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  back_to_r <- reticulate::py_to_r(pytab)
+  expect_r6_class(back_to_r, "Table")
+  expect_identical(
+    as.data.frame(back_to_r),
+    example_data %>%
+      select(int, lgl) %>%
+      filter(int > 6)
+  )
+})

From aa37d197a63a7efbc0660f9cea2f75cc08c30587 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 18 May 2021 08:12:35 -0400
Subject: [PATCH 264/719] ARROW-12810: [Python] Stop AWS SDK from looking for
 metadata service

This significantly speeds up Python tests.

I opted to put it in Python instead of in the shell script so that interactive local development also benefits.

Closes #10348 from lidavidm/arrow-12810

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/tests/conftest.py | 10 ++++++++++
 python/pyarrow/tests/test_fs.py  |  5 +----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index c6e49668b95..3de07c4305f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -231,6 +231,16 @@ def base_datadir():
     return pathlib.Path(__file__).parent / 'data'
 
 
+@pytest.fixture(autouse=True)
+def disable_aws_metadata(monkeypatch):
+    """Stop the AWS SDK from trying to contact the EC2 metadata server.
+
+    Otherwise, this causes a 5 second delay in tests that exercise the
+    S3 filesystem.
+    """
+    monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
+
+
 # TODO(kszucs): move the following fixtures to test_fs.py once the previous
 # parquet dataset implementation and hdfs implementation are removed.
 
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 1af6967595b..acdff253917 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1020,12 +1020,9 @@ def test_mockfs_mtime_roundtrip(mockfs):
 
 
 @pytest.mark.s3
-def test_s3_options(monkeypatch):
+def test_s3_options():
     from pyarrow.fs import S3FileSystem
 
-    # Avoid wait for unavailable metadata server in ARN role example below
-    monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
-
     fs = S3FileSystem(access_key='access', secret_key='secret',
                       session_token='token', region='us-east-2',
                       scheme='https', endpoint_override='localhost:8999')

From 3a85ab482c3181f207c68c52a66f240da49e8995 Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Thu, 20 May 2021 01:46:11 +0000
Subject: [PATCH 265/719] ARROW-12685: [C++][Compute] Add unary absolute value
 kernel

This PR adds the arithmetic absolute value kernels to the compute layer.

Closes #10274 from edponce/ARROW-12685-Compute-Add-unary-absolute-value-kernel

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   1 +
 cpp/src/arrow/compute/api_scalar.h            |  12 ++
 .../compute/kernels/scalar_arithmetic.cc      |  78 +++++++++++++
 .../compute/kernels/scalar_arithmetic_test.cc | 108 +++++++++++++++++-
 cpp/src/arrow/util/int_util_internal.h        |   2 +-
 docs/source/cpp/compute.rst                   |   4 +
 docs/source/python/api/compute.rst            |   2 +
 7 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index c7c049af980..9f4ad42fecb 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -47,6 +47,7 @@ namespace compute {
     return CallFunction(func_name, {arg}, ctx);                                        \
   }
 
+SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
 SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
 
 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)           \
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 3e390df47e7..3a007e06567 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -160,6 +160,18 @@ struct ARROW_EXPORT ProjectOptions : public FunctionOptions {
 
 /// @}
 
+/// \brief Get the absolute value of a value. Array values can be of arbitrary
+/// length. If argument is null the result will be null.
+///
+/// \param[in] arg the value transformed
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise absolute value
+ARROW_EXPORT
+Result<Datum> AbsoluteValue(const Datum& arg,
+                            ArithmeticOptions options = ArithmeticOptions(),
+                            ExecContext* ctx = NULLPTR);
+
 /// \brief Add two values together. Array values must be the same length. If
 /// either addend is null the result will be null.
 ///
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index f6f7555ab61..743d2e3fc0e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <cmath>
+#include <limits>
 
 #include "arrow/compute/kernels/common.h"
 #include "arrow/type_traits.h"
@@ -66,6 +67,47 @@ constexpr Unsigned to_unsigned(T signed_) {
   return static_cast<Unsigned>(signed_);
 }
 
+struct AbsoluteValue {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, T arg, Status*) {
+    return std::fabs(arg);
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T arg, Status*) {
+    return arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T arg, Status* st) {
+    return (arg < 0) ? arrow::internal::SafeSignedNegate(arg) : arg;
+  }
+};
+
+struct AbsoluteValueChecked {
+  template <typename T, typename Arg>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == std::numeric_limits<Arg>::min()) {
+      *st = Status::Invalid("overflow");
+      return arg;
+    }
+    return std::abs(arg);
+  }
+
+  template <typename T, typename Arg>
+  static enable_if_unsigned_integer<T> Call(KernelContext* ctx, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return arg;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    return std::fabs(arg);
+  }
+};
+
 struct Add {
   template <typename T>
   static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
@@ -446,6 +488,19 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name,
   return func;
 }
 
+// Like MakeUnaryArithmeticFunction, but for arithmetic ops that need to run
+// only on non-null output.
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, ty, exec));
+  }
+  return func;
+}
+
 // Like MakeUnaryArithmeticFunction, but for signed arithmetic ops that need to run
 // only on non-null output.
 template <typename Op>
@@ -461,6 +516,19 @@ std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
   return func;
 }
 
+const FunctionDoc absolute_value_doc{
+    "Calculate the absolute value of the argument element-wise",
+    ("Results will wrap around on integer overflow.\n"
+     "Use function \"abs_checked\" if you want overflow\n"
+     "to return an error."),
+    {"x"}};
+
+const FunctionDoc absolute_value_checked_doc{
+    "Calculate the absolute value of the argument element-wise",
+    ("This function returns an error on overflow.  For a variant that\n"
+     "doesn't fail on overflow, use function \"abs\"."),
+    {"x"}};
+
 const FunctionDoc add_doc{"Add the arguments element-wise",
                           ("Results will wrap around on integer overflow.\n"
                            "Use function \"add_checked\" if you want overflow\n"
@@ -537,6 +605,16 @@ const FunctionDoc pow_checked_doc{
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
+  // ----------------------------------------------------------------------
+  auto absolute_value =
+      MakeUnaryArithmeticFunction<AbsoluteValue>("abs", &absolute_value_doc);
+  DCHECK_OK(registry->AddFunction(std::move(absolute_value)));
+
+  // ----------------------------------------------------------------------
+  auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull<AbsoluteValueChecked>(
+      "abs_checked", &absolute_value_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked)));
+
   // ----------------------------------------------------------------------
   auto add = MakeArithmeticFunction<Add>("add", &add_doc);
   DCHECK_OK(registry->AddFunction(std::move(add)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index fafba4b331b..ff66fcf1d12 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -933,7 +933,7 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
 }
 
 TEST(TestUnaryArithmetic, DispatchBest) {
-  for (std::string name : {"negate"}) {
+  for (std::string name : {"negate", "abs", "abs_checked"}) {
     for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
                            uint64(), float32(), float64()}) {
       CheckDispatchBest(name, {ty}, {ty});
@@ -948,7 +948,7 @@ TEST(TestUnaryArithmetic, DispatchBest) {
     }
   }
 
-  for (std::string name : {"negate", "negate_checked"}) {
+  for (std::string name : {"negate", "negate_checked", "abs", "abs_checked"}) {
     CheckDispatchFails(name, {null()});
   }
 }
@@ -1057,5 +1057,109 @@ TYPED_TEST(TestUnaryArithmeticFloating, Negate) {
   }
 }
 
+TYPED_TEST(TestUnaryArithmeticSigned, AbsoluteValue) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty array
+    this->AssertUnaryOp(AbsoluteValue, "[]", "[]");
+    // Scalar/arrays with nulls
+    this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]");
+    this->AssertUnaryOp(AbsoluteValue, "[1, null, -10]", "[1, null, 10]");
+    this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar());
+    // Scalar/arrays with zeros
+    this->AssertUnaryOp(AbsoluteValue, "[0, -0]", "[0, 0]");
+    this->AssertUnaryOp(AbsoluteValue, -0, 0);
+    this->AssertUnaryOp(AbsoluteValue, 0, 0);
+    // Ordinary scalar/arrays (positive inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[1, 10, 127]", "[1, 10, 127]");
+    this->AssertUnaryOp(AbsoluteValue, 1, 1);
+    this->AssertUnaryOp(AbsoluteValue, this->MakeScalar(1), this->MakeScalar(1));
+    // Ordinary scalar/arrays (negative inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[-1, -10, -127]", "[1, 10, 127]");
+    this->AssertUnaryOp(AbsoluteValue, -1, 1);
+    this->AssertUnaryOp(AbsoluteValue, MakeArray(-1), "[1]");
+    // Min/max
+    this->AssertUnaryOp(AbsoluteValue, max, max);
+    if (check_overflow) {
+      this->AssertUnaryOpRaises(AbsoluteValue, MakeArray(min), "overflow");
+    } else {
+      this->AssertUnaryOp(AbsoluteValue, min, min);
+    }
+  }
+
+  // Overflow should not be checked on underlying value slots when output would be null
+  this->SetOverflowCheck(true);
+  auto arg = ArrayFromJSON(this->type_singleton(), MakeArray(-1, max, min));
+  arg = TweakValidityBit(arg, 1, false);
+  arg = TweakValidityBit(arg, 2, false);
+  this->AssertUnaryOp(AbsoluteValue, arg, "[1, null, null]");
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, AbsoluteValue) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty arrays
+    this->AssertUnaryOp(AbsoluteValue, "[]", "[]");
+    // Array with nulls
+    this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]");
+    this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar());
+    // Ordinary arrays
+    this->AssertUnaryOp(AbsoluteValue, "[0, 1, 10, 127]", "[0, 1, 10, 127]");
+    // Min/max
+    this->AssertUnaryOp(AbsoluteValue, min, min);
+    this->AssertUnaryOp(AbsoluteValue, max, max);
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    // Empty array
+    this->AssertUnaryOp(AbsoluteValue, "[]", "[]");
+    // Scalar/arrays with nulls
+    this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]");
+    this->AssertUnaryOp(AbsoluteValue, "[1.3, null, -10.80]", "[1.3, null, 10.80]");
+    this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar());
+    // Scalars/arrays with zeros
+    this->AssertUnaryOp(AbsoluteValue, "[0.0, -0.0]", "[0.0, 0.0]");
+    this->AssertUnaryOp(AbsoluteValue, -0.0F, 0.0F);
+    this->AssertUnaryOp(AbsoluteValue, 0.0F, 0.0F);
+    // Ordinary scalars/arrays (positive inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[1.3, 10.80, 12748.001]",
+                        "[1.3, 10.80, 12748.001]");
+    this->AssertUnaryOp(AbsoluteValue, 1.3F, 1.3F);
+    this->AssertUnaryOp(AbsoluteValue, this->MakeScalar(1.3F), this->MakeScalar(1.3F));
+    // Ordinary scalars/arrays (negative inputs)
+    this->AssertUnaryOp(AbsoluteValue, "[-1.3, -10.80, -12748.001]",
+                        "[1.3, 10.80, 12748.001]");
+    this->AssertUnaryOp(AbsoluteValue, -1.3F, 1.3F);
+    this->AssertUnaryOp(AbsoluteValue, MakeArray(-1.3F), "[1.3]");
+    // Arrays with infinites
+    this->AssertUnaryOp(AbsoluteValue, "[Inf, -Inf]", "[Inf, Inf]");
+    // Arrays with NaNs
+    this->SetNansEqual(true);
+    this->AssertUnaryOp(AbsoluteValue, "[NaN]", "[NaN]");
+    this->AssertUnaryOp(AbsoluteValue, "[-NaN]", "[NaN]");
+    // Min/max
+    this->AssertUnaryOp(AbsoluteValue, min, max);
+    this->AssertUnaryOp(AbsoluteValue, max, max);
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_internal.h
index 162f1d92a2b..4136706629f 100644
--- a/cpp/src/arrow/util/int_util_internal.h
+++ b/cpp/src/arrow/util/int_util_internal.h
@@ -63,7 +63,7 @@ OPS_WITH_OVERFLOW(DivideWithOverflow, div)
 #undef OP_WITH_OVERFLOW
 #undef OPS_WITH_OVERFLOW
 
-// Define functions NegateWithOverflow with the signature `bool(T u, T* out)`
+// Define function NegateWithOverflow with the signature `bool(T u, T* out)`
 // where T is a signed integer type.  On overflow, these functions return true.
 // Otherwise, false is returned and `out` is updated with the result of the
 // operation.
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 592dc4ec1b0..d34eeee526f 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -260,6 +260,10 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | Function name            | Arity      | Input types        | Output type         |
 +==========================+============+====================+=====================+
+| abs                      | Unary      | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| abs_checked              | Unary      | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
 | add                      | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
 | add_checked              | Binary     | Numeric            | Numeric             |
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index da16ccdfa29..56ccb4ae1ef 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -45,6 +45,8 @@ throws an ``ArrowInvalid`` exception when overflow is detected.
 .. autosummary::
    :toctree: ../generated/
 
+   abs
+   abs_checked
    add
    add_checked
    divide

From b5f958ea300e3dedcdb3d734b55da6dcdda14fb3 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 20 May 2021 11:14:41 +0900
Subject: [PATCH 266/719] ARROW-12831: [CI][macOS] Remove needless Homebrew
 workaround

This was introduced by #9119 (ARROW-1152) but this is no longer
needed.

Closes #10359 from kou/ci-macos-remove-needless-homebrew-workaround

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml    | 1 -
 .github/workflows/python.yml | 1 -
 .github/workflows/ruby.yml   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 62c8e5f8af2..2e34c5c6df9 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -222,7 +222,6 @@ jobs:
         run: |
           rm -f /usr/local/bin/2to3
           brew update --preinstall
-          brew unlink gcc@8 gcc@9
           brew bundle --file=cpp/Brewfile
       - name: Build
         shell: bash
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 02451bdcd01..2a7d28dfa16 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -140,7 +140,6 @@ jobs:
         run: |
           rm -f /usr/local/bin/2to3
           brew update --preinstall
-          brew unlink gcc@8 gcc@9
           brew bundle --file=cpp/Brewfile
           brew install coreutils
           python3 -mpip install \
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 380df30cf37..91470e30fd4 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -132,7 +132,6 @@ jobs:
         run: |
           rm -f /usr/local/bin/2to3
           brew update --preinstall
-          brew unlink gcc@8 gcc@9
           brew bundle --file=cpp/Brewfile
           brew bundle --file=c_glib/Brewfile
       - name: Install Ruby Dependencies

From 25dab98270b19cb1ec8af814b609cae9b4e96c72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 20 May 2021 10:28:24 +0530
Subject: [PATCH 267/719] ARROW-12443: [C++][Gandiva] Implement castVARCHAR
 function for varbinary input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement castVARCHAR function for varbinary input

Closes #10095 from jpedroantunes/feature/add-cast-varchar-varbinary and squashes the following commits:

bab024601 <João Pedro> Improve castvarchar for binary macro name
395595393 <João Pedro> Remove unused empty line on string ops
830aabe90 <João Pedro> Fix linter errors on string ops files
ef918af60 <João Pedro> Fix linter errors on string ops files
82fa58fe8 <João Pedro> Fix wrong function name defined on native function
a129dff02 <João Pedro> Add implementation for castvarchar for binary values, using the same one as the function for varchar inputs
72ec38b02 <João Pedro> Add base logic for the castvarchar for binary inputs

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |   4 +
 cpp/src/gandiva/precompiled/string_ops.cc     | 173 +++++++++---------
 .../gandiva/precompiled/string_ops_test.cc    | 133 +++++++++++++-
 cpp/src/gandiva/precompiled/types.h           |   4 +
 4 files changed, 227 insertions(+), 87 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index d1f97cdb3e8..1c4c9e8571f 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -92,6 +92,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "castVARCHAR_utf8_int64",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("castVARCHAR", {}, DataTypeVector{binary(), int64()}, utf8(),
+                     kResultNullIfNull, "castVARCHAR_binary_int64",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("castVARCHAR", {}, DataTypeVector{int32(), int64()}, utf8(),
                      kResultNullIfNull, "gdv_fn_castVARCHAR_int32_int64",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index fa9164bd139..64dbb4a61be 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -534,88 +534,96 @@ const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
 }
 
 // Truncates the string to given length
-FORCE_INLINE
-const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
-                                   gdv_int32 data_len, int64_t out_len,
-                                   int32_t* out_length) {
-  int32_t len = static_cast<int32_t>(out_len);
-
-  if (len < 0) {
-    gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative");
-    *out_length = 0;
-    return "";
-  }
-
-  if (len >= data_len || len == 0) {
-    *out_length = data_len;
-    return data;
-  }
-
-  int32_t remaining = len;
-  int32_t index = 0;
-  bool is_multibyte = false;
-  do {
-    // In utf8, MSB of a single byte unicode char is always 0,
-    // whereas for a multibyte character the MSB of each byte is 1.
-    // So for a single byte char, a bitwise-and with x80 (10000000) will be 0
-    // and it won't be 0 for bytes of a multibyte char
-    char* data_ptr = const_cast<char*>(data);
-
-    // we advance byte by byte till the 8 byte boundary then advance 8 bytes at a time
-    auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07;
-    num_bytes = (8 - num_bytes) & 0x07;
-    while (num_bytes > 0) {
-      uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);
-      if ((*ptr & 0x80) != 0) {
-        is_multibyte = true;
-        break;
-      }
-      index++;
-      remaining--;
-      num_bytes--;
-    }
-    if (is_multibyte) break;
-    while (remaining >= 8) {
-      uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index);
-      if ((*ptr & 0x8080808080808080) != 0) {
-        is_multibyte = true;
-        break;
-      }
-      index += 8;
-      remaining -= 8;
-    }
-    if (is_multibyte) break;
-    if (remaining >= 4) {
-      uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index);
-      if ((*ptr & 0x80808080) != 0) break;
-      index += 4;
-      remaining -= 4;
-    }
-    while (remaining > 0) {
-      uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);
-      if ((*ptr & 0x80) != 0) {
-        is_multibyte = true;
-        break;
-      }
-      index++;
-      remaining--;
-    }
-    if (is_multibyte) break;
-    // reached here; all are single byte characters
-    *out_length = len;
-    return data;
-  } while (false);
-
-  // detected multibyte utf8 characters; slow path
-  int32_t byte_pos = utf8_byte_pos(context, data + index, data_len - index, len - index);
-  if (byte_pos < 0) {
-    *out_length = 0;
-    return "";
-  }
-
-  *out_length = index + byte_pos;
-  return data;
-}
+#define CAST_VARCHAR_FROM_VARLEN_TYPE(TYPE)                                            \
+  FORCE_INLINE                                                                         \
+  const char* castVARCHAR_##TYPE##_int64(gdv_int64 context, const char* data,          \
+                                         gdv_int32 data_len, int64_t out_len,          \
+                                         int32_t* out_length) {                        \
+    int32_t len = static_cast<int32_t>(out_len);                                       \
+                                                                                       \
+    if (len < 0) {                                                                     \
+      gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    if (len >= data_len || len == 0) {                                                 \
+      *out_length = data_len;                                                          \
+      return data;                                                                     \
+    }                                                                                  \
+                                                                                       \
+    int32_t remaining = len;                                                           \
+    int32_t index = 0;                                                                 \
+    bool is_multibyte = false;                                                         \
+    do {                                                                               \
+      /* In utf8, MSB of a single byte unicode char is always 0,                       \
+       * whereas for a multibyte character the MSB of each byte is 1.                  \
+       * So for a single byte char, a bitwise-and with x80 (10000000) will be 0        \
+       * and it won't be 0 for bytes of a multibyte char.                              \
+       */                                                                              \
+      char* data_ptr = const_cast<char*>(data);                                        \
+                                                                                       \
+      /* advance byte by byte till the 8-byte boundary then advance 8 bytes */         \
+      auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07;                   \
+      num_bytes = (8 - num_bytes) & 0x07;                                              \
+      while (num_bytes > 0) {                                                          \
+        uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);                   \
+        if ((*ptr & 0x80) != 0) {                                                      \
+          is_multibyte = true;                                                         \
+          break;                                                                       \
+        }                                                                              \
+        index++;                                                                       \
+        remaining--;                                                                   \
+        num_bytes--;                                                                   \
+      }                                                                                \
+      if (is_multibyte) break;                                                         \
+      while (remaining >= 8) {                                                         \
+        uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index);                 \
+        if ((*ptr & 0x8080808080808080) != 0) {                                        \
+          is_multibyte = true;                                                         \
+          break;                                                                       \
+        }                                                                              \
+        index += 8;                                                                    \
+        remaining -= 8;                                                                \
+      }                                                                                \
+      if (is_multibyte) break;                                                         \
+      if (remaining >= 4) {                                                            \
+        uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index);                 \
+        if ((*ptr & 0x80808080) != 0) break;                                           \
+        index += 4;                                                                    \
+        remaining -= 4;                                                                \
+      }                                                                                \
+      while (remaining > 0) {                                                          \
+        uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);                   \
+        if ((*ptr & 0x80) != 0) {                                                      \
+          is_multibyte = true;                                                         \
+          break;                                                                       \
+        }                                                                              \
+        index++;                                                                       \
+        remaining--;                                                                   \
+      }                                                                                \
+      if (is_multibyte) break;                                                         \
+      /* reached here; all are single byte characters */                               \
+      *out_length = len;                                                               \
+      return data;                                                                     \
+    } while (false);                                                                   \
+                                                                                       \
+    /* detected multibyte utf8 characters; slow path */                                \
+    int32_t byte_pos =                                                                 \
+        utf8_byte_pos(context, data + index, data_len - index, len - index);           \
+    if (byte_pos < 0) {                                                                \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    *out_length = index + byte_pos;                                                    \
+    return data;                                                                       \
+  }
+
+CAST_VARCHAR_FROM_VARLEN_TYPE(utf8)
+CAST_VARCHAR_FROM_VARLEN_TYPE(binary)
+
+#undef CAST_VARCHAR_FROM_VARLEN_TYPE
 
 #define IS_NULL(NAME, TYPE)                                                \
   FORCE_INLINE                                                             \
@@ -1519,5 +1527,4 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   *out_len = j;
   return ret;
 }
-
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 9326aac1e0f..b3d51e59446 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -200,12 +200,136 @@ TEST(TestStringOps, TestCastBoolToVarchar) {
   ctx.Reset();
 }
 
-TEST(TestStringOps, TestCastVarhcar) {
+TEST(TestStringOps, TestCastVarchar) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
   gdv_int32 out_len = 0;
 
-  const char* out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+  // BINARY TESTS
+  const char* out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "a");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 6, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asd");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  // do not truncate if output length is 0
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "", 0, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 6, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "abc", 3, -1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Output buffer length can't be negative"));
+  ctx.Reset();
+
+  std::string z("aa\xc3");
+  out_str = castVARCHAR_binary_int64(ctx_ptr, z.data(), static_cast<int>(z.length()), 2,
+                                     &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "aa");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234567812341234");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234123");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "12345678");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234567");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "çåå");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string f("123456781234çåå\xc3");
+  out_str = castVARCHAR_binary_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()),
+                                     16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  // UTF8 TESTS
+  out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
   EXPECT_EQ(std::string(out_str, out_len), "a");
   EXPECT_FALSE(ctx.has_error());
 
@@ -255,6 +379,7 @@ TEST(TestStringOps, TestCastVarhcar) {
   EXPECT_FALSE(ctx.has_error());
 
   out_str = castVARCHAR_utf8_int64(ctx_ptr, "abc", 3, -1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
   EXPECT_THAT(ctx.get_error(),
               ::testing::HasSubstr("Output buffer length can't be negative"));
   ctx.Reset();
@@ -317,8 +442,8 @@ TEST(TestStringOps, TestCastVarhcar) {
   EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
   EXPECT_FALSE(ctx.has_error());
 
-  std::string f("123456781234çåå\xc3");
-  out_str = castVARCHAR_utf8_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()),
+  std::string y("123456781234çåå\xc3");
+  out_str = castVARCHAR_utf8_int64(ctx_ptr, y.data(), static_cast<int32_t>(y.length()),
                                    16, &out_len);
   EXPECT_EQ(std::string(out_str, out_len), "");
   EXPECT_THAT(ctx.get_error(),
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 1b0f96e0ab7..736406fe143 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -369,6 +369,10 @@ const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10,
     gdv_int32 in10_len, gdv_int32* out_len);
 
+const char* castVARCHAR_binary_int64(gdv_int64 context, const char* data,
+                                     gdv_int32 data_len, int64_t out_len,
+                                     int32_t* out_length);
+
 const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
                                    gdv_int32 data_len, int64_t out_len,
                                    int32_t* out_length);

From 52d180d20b07277e733fcb400895b67facc1c670 Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Thu, 20 May 2021 10:30:52 +0530
Subject: [PATCH 268/719] ARROW-12166: [C++][Gandiva] Implements
 CONVERT_TO(value, type) function

Implements the CONVERT_TO function inside the Gandiva, which receives a value for a defined type and returns its bytes representation.

The behavior is based on Apache Drill implementation: https://drill.apache.org/docs/data-type-conversion/#convert_to-and-convert_from

Closes #9861 from anthonylouisbsb/feature/add-convert-to-function and squashes the following commits:

22d80a3a4 <Anthony Louis> Use capital letters in name
90e28236d <Anthony Louis> Change functions names and tests
3a88586ea <Anthony Louis> Change functions name
f3e06d236 <Anthony Louis> Fix macros for bigendian tests
c817a0501 <Anthony Louis> Fix function comment
80539816c <Anthony Louis> Add CONVERT_TO functions for big endian formats
ad991a66f <Anthony Louis> Add tests for the convert_to functions
3b1bccabc <Anthony Louis> Add convert_toBOOLEAN_BYTE function
36b2458e4 <Anthony Louis> Add convert_toBIGINT function
a17edda5e <Anthony Louis> Add convert_toTIME_EPOCH function
448ffcf06 <Anthony Louis> Add convert_toINT
c7d0010c6 <Anthony Louis> Add convert_toDOUBLE function

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  61 +++++
 cpp/src/gandiva/precompiled/string_ops.cc     | 217 +++++++++++++++++-
 .../gandiva/precompiled/string_ops_test.cc    |  80 +++++++
 cpp/src/gandiva/precompiled/types.h           |  35 +++
 4 files changed, 389 insertions(+), 4 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 1c4c9e8571f..e50069e738b 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -229,6 +229,67 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      "convert_replace_invalid_fromUTF8_binary",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("convert_toDOUBLE", {}, DataTypeVector{float64()}, binary(),
+                     kResultNullIfNull, "convert_toDOUBLE",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toDOUBLE_be", {}, DataTypeVector{float64()}, binary(),
+                     kResultNullIfNull, "convert_toDOUBLE_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toFLOAT", {}, DataTypeVector{float32()}, binary(),
+                     kResultNullIfNull, "convert_toFLOAT", NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toFLOAT_be", {}, DataTypeVector{float32()}, binary(),
+                     kResultNullIfNull, "convert_toFLOAT_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toINT", {}, DataTypeVector{int32()}, binary(),
+                     kResultNullIfNull, "convert_toINT", NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toINT_be", {}, DataTypeVector{int32()}, binary(),
+                     kResultNullIfNull, "convert_toINT_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toBIGINT", {}, DataTypeVector{int64()}, binary(),
+                     kResultNullIfNull, "convert_toBIGINT",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toBIGINT_be", {}, DataTypeVector{int64()}, binary(),
+                     kResultNullIfNull, "convert_toBIGINT_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toBOOLEAN_BYTE", {}, DataTypeVector{boolean()}, binary(),
+                     kResultNullIfNull, "convert_toBOOLEAN",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIME_EPOCH", {}, DataTypeVector{time32()}, binary(),
+                     kResultNullIfNull, "convert_toTIME_EPOCH",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIME_EPOCH_be", {}, DataTypeVector{time32()}, binary(),
+                     kResultNullIfNull, "convert_toTIME_EPOCH_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIMESTAMP_EPOCH", {}, DataTypeVector{timestamp()},
+                     binary(), kResultNullIfNull, "convert_toTIMESTAMP_EPOCH",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toTIMESTAMP_EPOCH_be", {}, DataTypeVector{timestamp()},
+                     binary(), kResultNullIfNull, "convert_toTIMESTAMP_EPOCH_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toDATE_EPOCH", {}, DataTypeVector{date64()}, binary(),
+                     kResultNullIfNull, "convert_toDATE_EPOCH",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toDATE_EPOCH_be", {}, DataTypeVector{date64()}, binary(),
+                     kResultNullIfNull, "convert_toDATE_EPOCH_be",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("convert_toUTF8", {}, DataTypeVector{utf8()}, binary(),
+                     kResultNullIfNull, "convert_toUTF8", NativeFunction::kNeedsContext),
+
       NativeFunction("locate", {"position"}, DataTypeVector{utf8(), utf8(), int32()},
                      int32(), kResultNullIfNull, "locate_utf8_utf8_int32",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 64dbb4a61be..b35062dadc3 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -19,10 +19,11 @@
 #include "arrow/util/value_parsing.h"
 extern "C" {
 
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 #include "./types.h"
 
@@ -1318,6 +1319,214 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
   return ret;
 }
 
+// The function reverse a char array in-place
+static inline void reverse_char_buf(char* buf, int32_t len) {
+  char temp;
+
+  for (int32_t i = 0; i < len / 2; i++) {
+    int32_t pos_swp = len - (1 + i);
+    temp = buf[pos_swp];
+    buf[pos_swp] = buf[i];
+    buf[i] = temp;
+  }
+}
+
+// Converts a double variable to binary
+FORCE_INLINE
+const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len) {
+  // The function behaves like convert_toDOUBLE, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toDOUBLE(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts a float variable to binary
+FORCE_INLINE
+const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len) {
+  // The function behaves like convert_toFLOAT, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toFLOAT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts a bigint(int with 64 bits) variable to binary
+FORCE_INLINE
+const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len) {
+  // The function behaves like convert_toBIGINT, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toBIGINT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts an integer(with 32 bits) variable to binary
+FORCE_INLINE
+const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+FORCE_INLINE
+const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len) {
+  // The function behaves like convert_toINT, but always return the result
+  // in big endian format
+  char* ret = const_cast<char*>(convert_toINT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+  reverse_char_buf(ret, *out_len);
+#endif
+
+  return ret;
+}
+
+// Converts a boolean variable to binary
+FORCE_INLINE
+const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len) {
+  *out_len = sizeof(value);
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context,
+                                 "Could not allocate memory for the output string");
+
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, &value, *out_len);
+
+  return ret;
+}
+
+// Converts a time variable to binary
+FORCE_INLINE
+const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len) {
+  return convert_toINT(context, value, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len) {
+  // The function behaves as convert_toTIME_EPOCH, but
+  // returns the bytes in big endian format
+  return convert_toINT_be(context, value, out_len);
+}
+
+// Converts a timestamp variable to binary
+FORCE_INLINE
+const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
+                                      int32_t* out_len) {
+  return convert_toBIGINT(context, timestamp, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
+                                         int32_t* out_len) {
+  // The function behaves as convert_toTIMESTAMP_EPOCH, but
+  // returns the bytes in big endian format
+  return convert_toBIGINT_be(context, timestamp, out_len);
+}
+
+// Converts a date variable to binary
+FORCE_INLINE
+const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len) {
+  return convert_toBIGINT(context, date, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len) {
+  // The function behaves as convert_toDATE_EPOCH, but
+  // returns the bytes in big endian format
+  return convert_toBIGINT_be(context, date, out_len);
+}
+
+// Converts a string variable to binary
+FORCE_INLINE
+const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
+                           int32_t* out_len) {
+  *out_len = value_len;
+  return value;
+}
+
 // Search for a string within another string
 FORCE_INLINE
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index b3d51e59446..fd36665065b 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -18,6 +18,8 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include <limits>
+
 #include "gandiva/execution_context.h"
 #include "gandiva/precompiled/types.h"
 
@@ -1213,4 +1215,82 @@ TEST(TestStringOps, TestSplitPart) {
   EXPECT_EQ(std::string(out_str, out_len), "ååçåå");
 }
 
+TEST(TestStringOps, TestConvertTo) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  const int32_t ALL_BYTES_MATCH = 0;
+
+  int32_t integer_value = std::numeric_limits<int32_t>::max();
+  out_str = convert_toINT(ctx_ptr, integer_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(integer_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &integer_value, out_len));
+
+  int64_t big_integer_value = std::numeric_limits<int64_t>::max();
+  out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(big_integer_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &big_integer_value, out_len));
+
+  float float_value = std::numeric_limits<float>::max();
+  out_str = convert_toFLOAT(ctx_ptr, float_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(float_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &float_value, out_len));
+
+  double double_value = std::numeric_limits<double>::max();
+  out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
+  EXPECT_EQ(out_len, sizeof(double_value));
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &double_value, out_len));
+
+  const char* test_string = "test string";
+  int32_t str_len = 11;
+  out_str = convert_toUTF8(ctx_ptr, test_string, str_len, &out_len);
+  EXPECT_EQ(out_len, str_len);
+  EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, test_string, out_len));
+}
+
+TEST(TestStringOps, TestConvertToBigEndian) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  gdv_int32 out_len_big_endian = 0;
+  const char* out_str;
+  const char* out_str_big_endian;
+
+  int64_t big_integer_value = std::numeric_limits<int64_t>::max();
+  out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
+  out_str_big_endian =
+      convert_toBIGINT_be(ctx_ptr, big_integer_value, &out_len_big_endian);
+  EXPECT_EQ(out_len_big_endian, sizeof(big_integer_value));
+  EXPECT_EQ(out_len_big_endian, out_len);
+
+#if ARROW_LITTLE_ENDIAN
+  // Checks that bytes are in reverse order
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
+  }
+#else
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[i]);
+  }
+#endif
+
+  double double_value = std::numeric_limits<double>::max();
+  out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
+  out_str_big_endian = convert_toDOUBLE_be(ctx_ptr, double_value, &out_len_big_endian);
+  EXPECT_EQ(out_len_big_endian, sizeof(double_value));
+  EXPECT_EQ(out_len_big_endian, out_len);
+
+#if ARROW_LITTLE_ENDIAN
+  // Checks that bytes are in reverse order
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
+  }
+#else
+  for (auto i = 0; i < out_len; i++) {
+    EXPECT_EQ(out_str[i], out_str_big_endian[i]);
+  }
+#endif
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 736406fe143..170cf92aa2d 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <cstdint>
+
 #include "gandiva/gdv_function_stubs.h"
 
 // Use the same names as in arrow data types. Makes it easy to write pre-processor macros.
@@ -428,6 +429,40 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
                                                     int32_t char_to_replace_len,
                                                     int32_t* out_len);
 
+const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len);
+
+const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len);
+
+const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len);
+
+const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len);
+
+const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len);
+
+const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len);
+
+const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len);
+
+const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
+                                      int32_t* out_len);
+const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
+                                         int32_t* out_len);
+
+const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len);
+
+const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len);
+
+const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
+                           int32_t* out_len);
+
 const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
                        const char* splitter, gdv_int32 split_len, gdv_int32 index,
                        gdv_int32* out_len);

From 23b911679e6ae6f00a50398b1586b27cf4e008df Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Thu, 20 May 2021 10:35:59 +0530
Subject: [PATCH 269/719] ARROW-12621: [C++][Gandiva] Add alias to sha1 and
 sha256 functions

The names of the sha functions are **hashSHA1** and **hashSHA256**. The objective is to make the functions also being available through the **sha** and **sha1** names for SHA1 algorithms and **sha2** and **sha256** for SHA256 algorithms.

Closes #10218 from anthonylouisbsb/feature/add-alias-sha-functions and squashes the following commits:

da4f9350e <Anthony Louis> Remove SHA2 alias
559d6caaa <Anthony Louis> Add tests to check if alias are working
a101dd888 <Anthony Louis> Add alias to sha hash functions

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_common.h |  12 +-
 cpp/src/gandiva/tests/hash_test.cc         | 188 ++++++++++++++++++++-
 2 files changed, 192 insertions(+), 8 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index d1555fba3ce..580b2f68d28 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -213,9 +213,9 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // - can return errors
 //
 // The function name includes the base name & input type name. gdv_fn_sha1_float64
-#define HASH_SHA1_NULL_NEVER(NAME, ALIASES, TYPE)                                 \
-  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
-                 utf8(), kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE),   \
+#define HASH_SHA1_NULL_NEVER(NAME, ALIASES, TYPE)                        \
+  NativeFunction(#NAME, {"sha", "sha1"}, DataTypeVector{TYPE()}, utf8(), \
+                 kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha1_##TYPE),  \
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
 // HashSHA256 functions that :
@@ -223,9 +223,9 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // - can return errors
 //
 // The function name includes the base name & input type name. gdv_fn_sha256_float64
-#define HASH_SHA256_NULL_NEVER(NAME, ALIASES, TYPE)                               \
-  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
-                 utf8(), kResultNullNever, ARROW_STRINGIFY(gdv_fn_sha256_##TYPE), \
+#define HASH_SHA256_NULL_NEVER(NAME, ALIASES, TYPE)                                   \
+  NativeFunction(#NAME, {"sha256"}, DataTypeVector{TYPE()}, utf8(), kResultNullNever, \
+                 ARROW_STRINGIFY(gdv_fn_sha256_##TYPE),                               \
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
 // Iterate the inner macro over all numeric types
diff --git a/cpp/src/gandiva/tests/hash_test.cc b/cpp/src/gandiva/tests/hash_test.cc
index 9f4fff8c25b..40ebc50a271 100644
--- a/cpp/src/gandiva/tests/hash_test.cc
+++ b/cpp/src/gandiva/tests/hash_test.cc
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gtest/gtest.h>
+
 #include <sstream>
 
-#include <gtest/gtest.h>
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
-
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"
@@ -428,4 +428,188 @@ TEST_F(TestHash, TestSha1Varlen) {
     EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString());
   }
 }
+
+TEST_F(TestHash, TestSha1FunctionsAlias) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto field_b = field("c", int64());
+  auto field_c = field("e", float64());
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+  auto res_0_sha1 = field("res0sha1", utf8());
+  auto res_0_sha = field("res0sha", utf8());
+
+  auto res_1 = field("res1", utf8());
+  auto res_1_sha1 = field("res1sha1", utf8());
+  auto res_1_sha = field("res1sha", utf8());
+
+  auto res_2 = field("res2", utf8());
+  auto res_2_sha1 = field("res2_sha1", utf8());
+  auto res_2_sha = field("res2_sha", utf8());
+
+  // build expressions.
+  // hashSHA1(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0);
+  auto sha1 = TreeExprBuilder::MakeFunction("sha1", {node_a}, utf8());
+  auto expr_0_sha1 = TreeExprBuilder::MakeExpression(sha1, res_0_sha1);
+  auto sha = TreeExprBuilder::MakeFunction("sha", {node_a}, utf8());
+  auto expr_0_sha = TreeExprBuilder::MakeExpression(sha, res_0_sha);
+
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8());
+  auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_1, res_1);
+  auto sha1_1 = TreeExprBuilder::MakeFunction("sha1", {node_b}, utf8());
+  auto expr_1_sha1 = TreeExprBuilder::MakeExpression(sha1_1, res_1_sha1);
+  auto sha_1 = TreeExprBuilder::MakeFunction("sha", {node_b}, utf8());
+  auto expr_1_sha = TreeExprBuilder::MakeExpression(sha_1, res_1_sha);
+
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8());
+  auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_2, res_2);
+  auto sha1_2 = TreeExprBuilder::MakeFunction("sha1", {node_c}, utf8());
+  auto expr_2_sha1 = TreeExprBuilder::MakeExpression(sha1_2, res_2_sha1);
+  auto sha_2 = TreeExprBuilder::MakeFunction("sha", {node_c}, utf8());
+  auto expr_2_sha = TreeExprBuilder::MakeExpression(sha_2, res_2_sha);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema,
+                                {expr_0, expr_0_sha, expr_0_sha1, expr_1, expr_1_sha,
+                                 expr_1_sha1, expr_2, expr_2_sha, expr_2_sha1},
+                                TestConfiguration(), &projector);
+  ASSERT_OK(status) << status.message();
+
+  // Create a row-batch with some sample data
+  int32_t num_records = 3;
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ]";
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  auto array_utf8 =
+      MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
+
+  auto validity_array = {false, true, true};
+
+  auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+  auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+                                           {array_utf8, array_int64, array_float64});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
+  // field of utf8 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));  // hashSha1 and sha
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2));  // sha and sha1
+
+  // Checks that the response for the hashSHA1, sha and sha1 are equals for the second
+  // field of int64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(3), outputs.at(4));  // hashSha1 and sha
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4), outputs.at(5));  // sha and sha1
+
+  // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
+  // field of float64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(6), outputs.at(7));  // hashSha1 and sha responses
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8));  // sha and sha1 responses
+}
+
+TEST_F(TestHash, TestSha256FunctionsAlias) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto field_b = field("c", int64());
+  auto field_c = field("e", float64());
+  auto schema = arrow::schema({field_a, field_b, field_c});
+
+  // output fields
+  auto res_0 = field("res0", utf8());
+  auto res_0_sha256 = field("res0sha256", utf8());
+
+  auto res_1 = field("res1", utf8());
+  auto res_1_sha256 = field("res1sha256", utf8());
+
+  auto res_2 = field("res2", utf8());
+  auto res_2_sha256 = field("res2_sha256", utf8());
+
+  // build expressions.
+  // hashSHA1(a)
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8());
+  auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0);
+  auto sha256 = TreeExprBuilder::MakeFunction("sha256", {node_a}, utf8());
+  auto expr_0_sha256 = TreeExprBuilder::MakeExpression(sha256, res_0_sha256);
+
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8());
+  auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1);
+  auto sha256_1 = TreeExprBuilder::MakeFunction("sha256", {node_b}, utf8());
+  auto expr_1_sha256 = TreeExprBuilder::MakeExpression(sha256_1, res_1_sha256);
+
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8());
+  auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2);
+  auto sha256_2 = TreeExprBuilder::MakeFunction("sha256", {node_c}, utf8());
+  auto expr_2_sha256 = TreeExprBuilder::MakeExpression(sha256_2, res_2_sha256);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(
+      schema, {expr_0, expr_0_sha256, expr_1, expr_1_sha256, expr_2, expr_2_sha256},
+      TestConfiguration(), &projector);
+  ASSERT_OK(status) << status.message();
+
+  // Create a row-batch with some sample data
+  int32_t num_records = 3;
+
+  std::string first_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ]";
+  std::string second_string =
+      "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
+      "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
+
+  auto array_utf8 =
+      MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
+
+  auto validity_array = {false, true, true};
+
+  auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
+
+  auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+                                           {array_utf8, array_int64, array_float64});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
+  // field of utf8 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1));  // hashSha2 and sha256
+
+  // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the second
+  // field of int64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3));  // hashSha2 and sha256
+
+  // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
+  // field of float64 type
+  EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4),
+                            outputs.at(5));  // hashSha2 and sha256 responses
+}
 }  // namespace gandiva

From 8f5bda4f785b0a73e5ef8d6786bea9867dee03d8 Mon Sep 17 00:00:00 2001
From: frank400 <j.victorhuguenin2018@gmail.com>
Date: Thu, 20 May 2021 10:44:17 +0530
Subject: [PATCH 270/719] ARROW-11986: [C++][Gandiva] Implement IN expressions
 for doubles and floats

Add functions to process IN expressions for Arrows fields with double and float types.

Closes #9724 from jvictorhuguenin/feature/add-float-double-decimal-in-expr and squashes the following commits:

05c283d62 <frank400> Fix Expression validation error message
182b340a8 <frank400> Fix checkstyle
9088b09c4 <frank400> Fix checkstyle
69745f61a <frank400> Fix checkstyle
27a8adf84 <frank400> Add comments to java tests to make it clearer
24d32cfe2 <frank400> Fix InNode constructor parameters
ece7702f6 <frank400> Fix InNode constructor parameters
50cfa1426 <frank400> Fix rebase errors
1730fd50e <frank400> Fix problems with backward compatibility
4be099baa <frank400> Fix lint problem after declaring float values explicitly
6f2686791 <frank400> Fix test typing for float numbers
7b41e9767 <frank400> Add test cases for -0.0, +inf and -inf and NaN
d3af44db2 <frank400> Fix Lint problem within TestInFloat
9adbd5692 <frank400> Fix array float tiping for build propose
e744d5d3e <frank400> Add JNI functions and tests
cb887ab69 <frank400> Fix Lint problems
af2d70998 <frank400> Fix Lint problems
a2b904b57 <frank400> Change the float_t and double_t to float and double
570006d44 <frank400> Fix CI problem for truncating doubles to floats
1a206c93e <frank400> Remove unnecessary call
8cc34739e <frank400> Fix build problems with mingw
6be7a60ae <frank400> Fix lint problems
64905ddc4 <frank400> Fix CI problems
efa4d1e2f <frank400> test the implemented in expressions
94adfb8d3 <frank400> implements in expressions for floats and doubles
94f111708 <frank400> fix wrong typed double_t and float_t at stub functions
54baa9cc6 <frank400> Fix problems with backward compatibility
36ed0b708 <frank400> Fix lint problem after declaring float values explicitly
6b47c75d2 <frank400> Fix test typing for float numbers
9f57c9549 <frank400> Add test cases for -0.0, +inf and -inf and NaN
c7901663f <frank400> Fix Lint problem within TestInFloat
074013cc3 <frank400> Fix jni register for double expressions
9c1cea8df <frank400> Fix array float tiping for build propose
2b464cae9 <frank400> Add JNI functions and tests
7e226726c <frank400> Fix Lint problems
195a129ba <frank400> Fix Lint problems
b610afec6 <frank400> Change the float_t and double_t to float and double
f8d7b6e8c <frank400> Fix CI problem for truncating doubles to floats
8b448cfc3 <frank400> Remove unnecessary call
89d822548 <frank400> Fix build problems with mingw
a25558e5f <frank400> Fix lint problems
f51ac3d01 <frank400> Fix CI problems
f28ed9b56 <frank400> uncomment the implemented expressions
3312ace71 <frank400> test the implemented in expressions
8fbe192ed <frank400> implements in expressions for floats and doubles

Authored-by: frank400 <j.victorhuguenin2018@gmail.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/dex.h                         | 18 ++++
 cpp/src/gandiva/dex_visitor.h                 |  5 ++
 cpp/src/gandiva/expr_decomposer.cc            |  2 +
 cpp/src/gandiva/expr_decomposer.h             |  3 +
 cpp/src/gandiva/expr_validator.cc             | 17 +++-
 cpp/src/gandiva/expr_validator.h              |  2 +
 cpp/src/gandiva/gdv_function_stubs.cc         | 31 +++++++
 cpp/src/gandiva/jni/jni_common.cc             | 16 ++++
 cpp/src/gandiva/llvm_generator.cc             |  7 ++
 cpp/src/gandiva/llvm_generator.h              |  2 +
 cpp/src/gandiva/node_visitor.h                |  3 +
 cpp/src/gandiva/proto/Types.proto             | 10 +++
 cpp/src/gandiva/tests/in_expr_test.cc         | 82 +++++++++++++++++++
 cpp/src/gandiva/tree_expr_builder.cc          |  2 +
 cpp/src/gandiva/tree_expr_builder.h           |  9 ++
 .../arrow/gandiva/expression/InNode.java      | 39 +++++++--
 .../arrow/gandiva/expression/TreeBuilder.java | 10 +++
 .../gandiva/evaluator/ProjectorTest.java      | 57 ++++++++++++-
 18 files changed, 303 insertions(+), 12 deletions(-)

diff --git a/cpp/src/gandiva/dex.h b/cpp/src/gandiva/dex.h
index 3920f82f1d7..d1115c0516a 100644
--- a/cpp/src/gandiva/dex.h
+++ b/cpp/src/gandiva/dex.h
@@ -353,6 +353,24 @@ class InExprDex<int64_t> : public InExprDexBase<int64_t> {
   }
 };
 
+template <>
+class InExprDex<float> : public InExprDexBase<float> {
+ public:
+  InExprDex(const ValueValidityPairVector& args, const std::unordered_set<float>& values)
+      : InExprDexBase(args, values) {
+    runtime_function_ = "gdv_fn_in_expr_lookup_float";
+  }
+};
+
+template <>
+class InExprDex<double> : public InExprDexBase<double> {
+ public:
+  InExprDex(const ValueValidityPairVector& args, const std::unordered_set<double>& values)
+      : InExprDexBase(args, values) {
+    runtime_function_ = "gdv_fn_in_expr_lookup_double";
+  }
+};
+
 template <>
 class InExprDex<gandiva::DecimalScalar128>
     : public InExprDexBase<gandiva::DecimalScalar128> {
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index ba5de970dda..5d160bb22ca 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <string>
 
 #include "arrow/util/logging.h"
@@ -61,6 +62,8 @@ class GANDIVA_EXPORT DexVisitor {
   virtual void Visit(const BooleanOrDex& dex) = 0;
   virtual void Visit(const InExprDexBase<int32_t>& dex) = 0;
   virtual void Visit(const InExprDexBase<int64_t>& dex) = 0;
+  virtual void Visit(const InExprDexBase<float>& dex) = 0;
+  virtual void Visit(const InExprDexBase<double>& dex) = 0;
   virtual void Visit(const InExprDexBase<gandiva::DecimalScalar128>& dex) = 0;
   virtual void Visit(const InExprDexBase<std::string>& dex) = 0;
 };
@@ -85,6 +88,8 @@ class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(BooleanOrDex)
   VISIT_DCHECK(InExprDexBase<int32_t>)
   VISIT_DCHECK(InExprDexBase<int64_t>)
+  VISIT_DCHECK(InExprDexBase<float>)
+  VISIT_DCHECK(InExprDexBase<double>)
   VISIT_DCHECK(InExprDexBase<gandiva::DecimalScalar128>)
   VISIT_DCHECK(InExprDexBase<std::string>)
 };
diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc
index 07252b42fd2..1c09d28f5e0 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -209,6 +209,8 @@ Status ExprDecomposer::Visit(const InExpressionNode<gandiva::DecimalScalar128>&
 
 MAKE_VISIT_IN(int32_t);
 MAKE_VISIT_IN(int64_t);
+MAKE_VISIT_IN(float);
+MAKE_VISIT_IN(double);
 MAKE_VISIT_IN(std::string);
 
 Status ExprDecomposer::Visit(const LiteralNode& node) {
diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h
index 3e8e67de255..f68b8a8fc02 100644
--- a/cpp/src/gandiva/expr_decomposer.h
+++ b/cpp/src/gandiva/expr_decomposer.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <memory>
 #include <stack>
 #include <string>
@@ -66,6 +67,8 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor {
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
+  Status Visit(const InExpressionNode<float>& node) override;
+  Status Visit(const InExpressionNode<double>& node) override;
   Status Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) override;
   Status Visit(const InExpressionNode<std::string>& node) override;
 
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index fd46c2894b9..c3c784c9511 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -156,6 +156,14 @@ Status ExprValidator::Visit(const InExpressionNode<int64_t>& node) {
   return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
                               arrow::int64());
 }
+Status ExprValidator::Visit(const InExpressionNode<float>& node) {
+  return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
+                              arrow::float32());
+}
+Status ExprValidator::Visit(const InExpressionNode<double>& node) {
+  return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
+                              arrow::float64());
+}
 
 Status ExprValidator::Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) {
   return ValidateInExpression(node.values().size(), node.eval_expr()->return_type(),
@@ -173,10 +181,11 @@ Status ExprValidator::ValidateInExpression(size_t number_of_values,
   ARROW_RETURN_IF(number_of_values == 0,
                   Status::ExpressionValidationError(
                       "IN Expression needs a non-empty constant list to match."));
-  ARROW_RETURN_IF(!in_expr_return_type->Equals(type_of_values),
-                  Status::ExpressionValidationError(
-                      "Evaluation expression for IN clause returns ", in_expr_return_type,
-                      " values are of type", type_of_values));
+  ARROW_RETURN_IF(
+      !in_expr_return_type->Equals(type_of_values),
+      Status::ExpressionValidationError(
+          "Evaluation expression for IN clause returns ", in_expr_return_type->ToString(),
+          " values are of type", type_of_values->ToString()));
 
   return Status::OK();
 }
diff --git a/cpp/src/gandiva/expr_validator.h b/cpp/src/gandiva/expr_validator.h
index e25afe5e7e8..daaf50897fc 100644
--- a/cpp/src/gandiva/expr_validator.h
+++ b/cpp/src/gandiva/expr_validator.h
@@ -60,6 +60,8 @@ class ExprValidator : public NodeVisitor {
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
+  Status Visit(const InExpressionNode<float>& node) override;
+  Status Visit(const InExpressionNode<double>& node) override;
   Status Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) override;
   Status Visit(const InExpressionNode<std::string>& node) override;
   Status ValidateInExpression(size_t number_of_values, DataTypePtr in_expr_return_type,
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 832eebcaa1a..acf3f56ccef 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -102,6 +102,22 @@ bool gdv_fn_in_expr_lookup_decimal(int64_t ptr, int64_t value_high, int64_t valu
   return holder->HasValue(value);
 }
 
+bool gdv_fn_in_expr_lookup_float(int64_t ptr, float value, bool in_validity) {
+  if (!in_validity) {
+    return false;
+  }
+  gandiva::InHolder<float>* holder = reinterpret_cast<gandiva::InHolder<float>*>(ptr);
+  return holder->HasValue(value);
+}
+
+bool gdv_fn_in_expr_lookup_double(int64_t ptr, double value, bool in_validity) {
+  if (!in_validity) {
+    return false;
+  }
+  gandiva::InHolder<double>* holder = reinterpret_cast<gandiva::InHolder<double>*>(ptr);
+  return holder->HasValue(value);
+}
+
 bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len,
                                 bool in_validity) {
   if (!in_validity) {
@@ -504,7 +520,22 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_utf8",
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_in_expr_lookup_utf8));
+  // gdv_fn_in_expr_lookup_float
+  args = {types->i64_type(),    // int64_t in holder ptr
+          types->float_type(),  // float value
+          types->i1_type()};    // bool in_validity
+
+  engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_float",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_in_expr_lookup_float));
+  // gdv_fn_in_expr_lookup_double
+  args = {types->i64_type(),     // int64_t in holder ptr
+          types->double_type(),  // double value
+          types->i1_type()};     // bool in_validity
 
+  engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_double",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_in_expr_lookup_double));
   // gdv_fn_populate_varlen_vector
   args = {types->i64_type(),      // int64_t execution_context
           types->i8_ptr_type(),   // int8_t* data ptr
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 04953305432..5a4cbb03188 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -380,6 +380,22 @@ NodePtr ProtoTypeToInNode(const types::InNode& node) {
     return TreeExprBuilder::MakeInExpressionDecimal(field, decimal_values);
   }
 
+  if (node.has_floatvalues()) {
+    std::unordered_set<float> float_values;
+    for (int i = 0; i < node.floatvalues().floatvalues_size(); i++) {
+      float_values.insert(node.floatvalues().floatvalues(i).value());
+    }
+    return TreeExprBuilder::MakeInExpressionFloat(field, float_values);
+  }
+
+  if (node.has_doublevalues()) {
+    std::unordered_set<double> double_values;
+    for (int i = 0; i < node.doublevalues().doublevalues_size(); i++) {
+      double_values.insert(node.doublevalues().doublevalues(i).value());
+    }
+    return TreeExprBuilder::MakeInExpressionDouble(field, double_values);
+  }
+
   if (node.has_stringvalues()) {
     std::unordered_set<std::string> stringvalues;
     for (int i = 0; i < node.stringvalues().stringvalues_size(); i++) {
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 1a80f1e7586..77feb99eb29 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -1084,6 +1084,13 @@ void LLVMGenerator::Visitor::Visit(const InExprDexBase<int64_t>& dex) {
   VisitInExpression<int64_t>(dex);
 }
 
+void LLVMGenerator::Visitor::Visit(const InExprDexBase<float>& dex) {
+  VisitInExpression<float>(dex);
+}
+void LLVMGenerator::Visitor::Visit(const InExprDexBase<double>& dex) {
+  VisitInExpression<double>(dex);
+}
+
 void LLVMGenerator::Visitor::Visit(const InExprDexBase<gandiva::DecimalScalar128>& dex) {
   VisitInExpression<gandiva::DecimalScalar128>(dex);
 }
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 8ff9711c0f9..ff6d846024c 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -108,6 +108,8 @@ class GANDIVA_EXPORT LLVMGenerator {
     void Visit(const BooleanOrDex& dex) override;
     void Visit(const InExprDexBase<int32_t>& dex) override;
     void Visit(const InExprDexBase<int64_t>& dex) override;
+    void Visit(const InExprDexBase<float>& dex) override;
+    void Visit(const InExprDexBase<double>& dex) override;
     void Visit(const InExprDexBase<gandiva::DecimalScalar128>& dex) override;
     void Visit(const InExprDexBase<std::string>& dex) override;
     template <typename Type>
diff --git a/cpp/src/gandiva/node_visitor.h b/cpp/src/gandiva/node_visitor.h
index b118e496383..8f233f5b77c 100644
--- a/cpp/src/gandiva/node_visitor.h
+++ b/cpp/src/gandiva/node_visitor.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <string>
 
 #include "arrow/status.h"
@@ -46,6 +47,8 @@ class GANDIVA_EXPORT NodeVisitor {
   virtual Status Visit(const BooleanNode& node) = 0;
   virtual Status Visit(const InExpressionNode<int32_t>& node) = 0;
   virtual Status Visit(const InExpressionNode<int64_t>& node) = 0;
+  virtual Status Visit(const InExpressionNode<float>& node) = 0;
+  virtual Status Visit(const InExpressionNode<double>& node) = 0;
   virtual Status Visit(const InExpressionNode<gandiva::DecimalScalar128>& node) = 0;
   virtual Status Visit(const InExpressionNode<std::string>& node) = 0;
 };
diff --git a/cpp/src/gandiva/proto/Types.proto b/cpp/src/gandiva/proto/Types.proto
index 7c0c49f2d85..eb0d996b92e 100644
--- a/cpp/src/gandiva/proto/Types.proto
+++ b/cpp/src/gandiva/proto/Types.proto
@@ -222,6 +222,8 @@ message InNode {
   optional StringConstants stringValues = 4;
   optional BinaryConstants binaryValues = 5;
   optional DecimalConstants decimalValues = 6;
+  optional FloatConstants floatValues = 7;
+  optional DoubleConstants doubleValues = 8;
 }
 
 message IntConstants {
@@ -236,6 +238,14 @@ message DecimalConstants {
   repeated DecimalNode decimalValues = 1;
 }
 
+message FloatConstants {
+  repeated FloatNode floatValues = 1;
+}
+
+message DoubleConstants {
+  repeated DoubleNode doubleValues = 1;
+}
+
 message StringConstants {
   repeated StringNode stringValues = 1;
 }
diff --git a/cpp/src/gandiva/tests/in_expr_test.cc b/cpp/src/gandiva/tests/in_expr_test.cc
index 6a31b1cf4ef..fc1a8a71b9c 100644
--- a/cpp/src/gandiva/tests/in_expr_test.cc
+++ b/cpp/src/gandiva/tests/in_expr_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <gtest/gtest.h>
+#include <cmath>
 
 #include "arrow/memory_pool.h"
 #include "gandiva/filter.h"
@@ -26,6 +27,7 @@ namespace gandiva {
 
 using arrow::boolean;
 using arrow::float32;
+using arrow::float64;
 using arrow::int32;
 
 class TestIn : public ::testing::Test {
@@ -91,6 +93,86 @@ TEST_F(TestIn, TestInSimple) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
 }
 
+TEST_F(TestIn, TestInFloat) {
+  // schema for input fields
+  auto field0 = field("f0", float32());
+  auto schema = arrow::schema({field0});
+
+  // Build In f0 + f1 in (6, 11)
+  auto node_f0 = TreeExprBuilder::MakeField(field0);
+
+  std::unordered_set<float> in_constants({6.5f, 12.0f, 11.5f});
+  auto in_expr = TreeExprBuilder::MakeInExpressionFloat(node_f0, in_constants);
+  auto condition = TreeExprBuilder::MakeCondition(in_expr);
+
+  std::shared_ptr<Filter> filter;
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array0 =
+      MakeArrowArrayFloat32({6.5f, 11.5f, 4, 3.15f, 6}, {true, true, false, true, true});
+  // expected output (indices for which condition matches)
+  auto exp = MakeArrowArrayUint16({0, 1});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  std::shared_ptr<SelectionVector> selection_vector;
+  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Evaluate expression
+  status = filter->Evaluate(*in_batch, selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+}
+
+TEST_F(TestIn, TestInDouble) {
+  // schema for input fields
+  auto field0 = field("double0", float64());
+  auto field1 = field("double1", float64());
+  auto schema = arrow::schema({field0, field1});
+
+  auto node_f0 = TreeExprBuilder::MakeField(field0);
+  auto node_f1 = TreeExprBuilder::MakeField(field1);
+  auto sum_func =
+      TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::float64());
+  std::unordered_set<double> in_constants({3.14159265359, 15.5555555});
+  auto in_expr = TreeExprBuilder::MakeInExpressionDouble(sum_func, in_constants);
+  auto condition = TreeExprBuilder::MakeCondition(in_expr);
+
+  std::shared_ptr<Filter> filter;
+  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array0 = MakeArrowArrayFloat64({1, 2, 3, 4, 11}, {true, true, true, false, false});
+  auto array1 = MakeArrowArrayFloat64({5, 9, 0.14159265359, 17, 4.5555555},
+                                      {true, true, true, true, true});
+
+  // expected output (indices for which condition matches)
+  auto exp = MakeArrowArrayUint16({2});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  std::shared_ptr<SelectionVector> selection_vector;
+  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Evaluate expression
+  status = filter->Evaluate(*in_batch, selection_vector);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+}
+
 TEST_F(TestIn, TestInDecimal) {
   int32_t precision = 38;
   int32_t scale = 5;
diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc
index b27b92010e8..de8e3445a12 100644
--- a/cpp/src/gandiva/tree_expr_builder.cc
+++ b/cpp/src/gandiva/tree_expr_builder.cc
@@ -215,6 +215,8 @@ MAKE_IN(Date64, int64_t);
 MAKE_IN(TimeStamp, int64_t);
 MAKE_IN(Time32, int32_t);
 MAKE_IN(Time64, int64_t);
+MAKE_IN(Float, float);
+MAKE_IN(Double, double);
 MAKE_IN(String, std::string);
 MAKE_IN(Binary, std::string);
 
diff --git a/cpp/src/gandiva/tree_expr_builder.h b/cpp/src/gandiva/tree_expr_builder.h
index 9c24fb9d616..94a4a179340 100644
--- a/cpp/src/gandiva/tree_expr_builder.h
+++ b/cpp/src/gandiva/tree_expr_builder.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <memory>
 #include <string>
 #include <unordered_set>
@@ -106,6 +107,14 @@ class GANDIVA_EXPORT TreeExprBuilder {
   static NodePtr MakeInExpressionBinary(NodePtr node,
                                         const std::unordered_set<std::string>& constants);
 
+  /// \brief creates an in expression for float
+  static NodePtr MakeInExpressionFloat(NodePtr node,
+                                       const std::unordered_set<float>& constants);
+
+  /// \brief creates an in expression for double
+  static NodePtr MakeInExpressionDouble(NodePtr node,
+                                        const std::unordered_set<double>& constants);
+
   /// \brief Date as s/millis since epoch.
   static NodePtr MakeInExpressionDate32(NodePtr node,
                                         const std::unordered_set<int32_t>& constants);
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
index 08ef7f01bcd..fef8e311e6c 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
@@ -34,6 +34,8 @@ public class InNode implements TreeNode {
 
   private final Set<Integer> intValues;
   private final Set<Long> longValues;
+  private final Set<Float> floatValues;
+  private final Set<Double> doubleValues;
   private final Set<BigDecimal> decimalValues;
   private final Set<String> stringValues;
   private final Set<byte[]> binaryValues;
@@ -43,7 +45,8 @@ public class InNode implements TreeNode {
   private final Integer scale;
 
   private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValues, Set<byte[]>
-          binaryValues, Set<BigDecimal> decimalValues, Integer precision, Integer scale, TreeNode node) {
+          binaryValues, Set<BigDecimal> decimalValues, Integer precision, Integer scale,
+                 Set<Float> floatValues, Set<Double> doubleValues, TreeNode node) {
     this.intValues = values;
     this.longValues = longValues;
     this.decimalValues = decimalValues;
@@ -51,33 +54,47 @@ private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValu
     this.scale = scale;
     this.stringValues = stringValues;
     this.binaryValues = binaryValues;
+    this.floatValues = floatValues;
+    this.doubleValues = doubleValues;
     this.input = node;
   }
 
   public static InNode makeIntInExpr(TreeNode node, Set<Integer> intValues) {
     return new InNode(intValues,
-            null, null, null, null, null, null, node);
+            null, null, null, null, null, null, null,
+            null, node);
   }
 
   public static InNode makeLongInExpr(TreeNode node, Set<Long> longValues) {
     return new InNode(null, longValues,
-            null, null, null, null, null, node);
+            null, null, null, null, null, null,
+            null, node);
+  }
+
+  public static InNode makeFloatInExpr(TreeNode node, Set<Float> floatValues) {
+    return new InNode(null, null, null, null, null, null,
+            null, floatValues, null, node);
+  }
+
+  public static InNode makeDoubleInExpr(TreeNode node, Set<Double> doubleValues) {
+    return new InNode(null, null, null, null, null,
+            null, null, null, doubleValues, node);
   }
 
   public static InNode makeDecimalInExpr(TreeNode node, Set<BigDecimal> decimalValues,
                                          Integer precision, Integer scale) {
     return new InNode(null, null, null, null,
-            decimalValues, precision, scale, node);
+            decimalValues, precision, scale, null, null, node);
   }
 
   public static InNode makeStringInExpr(TreeNode node, Set<String> stringValues) {
     return new InNode(null, null, stringValues, null,
-            null, null, null, node);
+            null, null, null, null, null, node);
   }
 
   public static InNode makeBinaryInExpr(TreeNode node, Set<byte[]> binaryValues) {
     return new InNode(null, null, null, binaryValues,
-            null, null, null, node);
+            null, null, null, null, null, node);
   }
 
   @Override
@@ -96,6 +113,16 @@ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
       longValues.stream().forEach(val -> longConstants.addLongValues(GandivaTypes.LongNode.newBuilder()
               .setValue(val).build()));
       inNode.setLongValues(longConstants.build());
+    } else if (floatValues != null) {
+      GandivaTypes.FloatConstants.Builder floatConstants = GandivaTypes.FloatConstants.newBuilder();
+      floatValues.stream().forEach(val -> floatConstants.addFloatValues(GandivaTypes.FloatNode.newBuilder()
+              .setValue(val).build()));
+      inNode.setFloatValues(floatConstants.build());
+    } else if (doubleValues != null) {
+      GandivaTypes.DoubleConstants.Builder doubleConstants = GandivaTypes.DoubleConstants.newBuilder();
+      doubleValues.stream().forEach(val -> doubleConstants.addDoubleValues(GandivaTypes.DoubleNode.newBuilder()
+              .setValue(val).build()));
+      inNode.setDoubleValues(doubleConstants.build());
     } else if (decimalValues != null) {
       GandivaTypes.DecimalConstants.Builder decimalConstants = GandivaTypes.DecimalConstants.newBuilder();
       decimalValues.stream().forEach(val -> decimalConstants.addDecimalValues(GandivaTypes.DecimalNode.newBuilder()
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index 067715c0ae1..8656e886aae 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -208,6 +208,16 @@ public static TreeNode makeInExpressionDecimal(TreeNode resultNode,
     return InNode.makeDecimalInExpr(resultNode, decimalValues, precision, scale);
   }
 
+  public static TreeNode makeInExpressionFloat(TreeNode resultNode,
+                                                Set<Float> floatValues) {
+    return InNode.makeFloatInExpr(resultNode, floatValues);
+  }
+
+  public static TreeNode makeInExpressionDouble(TreeNode resultNode,
+                                                Set<Double> doubleValues) {
+    return InNode.makeDoubleInExpr(resultNode, doubleValues);
+  }
+
   public static TreeNode makeInExpressionString(TreeNode resultNode,
                                                 Set<String> stringValues) {
     return InNode.makeStringInExpr(resultNode, stringValues);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 606c1a922e5..e51f4586124 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -1220,10 +1220,10 @@ public void testInExpr() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
-    for (int i = 1; i < 5; i++) {
+    for (int i = 0; i < 4; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }
-    for (int i = 5; i < 16; i++) {
+    for (int i = 4; i < 16; i++) {
       assertFalse(bitVector.getObject(i).booleanValue());
     }
 
@@ -1252,7 +1252,9 @@ public void testInExprDecimal() throws GandivaException, Exception {
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
 
+    // Create a row-batch with some sample data to look for
     int numRows = 16;
+    // Only the first 8 values will be valid.
     byte[] validity = new byte[]{(byte) 255, 0};
     String[] c1Values =
             new String[]{"1", "2", "3", "4", "-0.0", "6", "7", "8", "9", "10", "11", "12", "13", "14",
@@ -1276,6 +1278,57 @@ public void testInExprDecimal() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
+    // The first four values in the vector must match the expression, but not the other ones.
+    for (int i = 0; i < 4; i++) {
+      assertTrue(bitVector.getObject(i).booleanValue());
+    }
+    for (int i = 4; i < 16; i++) {
+      assertFalse(bitVector.getObject(i).booleanValue());
+    }
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+    eval.close();
+  }
+
+  @Test
+  public void testInExprDouble() throws GandivaException, Exception {
+    Field c1 = Field.nullable("c1", float64);
+
+    TreeNode inExpr =
+            TreeBuilder.makeInExpressionDouble(TreeBuilder.makeField(c1),
+                    Sets.newHashSet(1.0, -0.0, 3.0, 4.0, Double.NaN,
+                            Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+    Schema schema = new Schema(Lists.newArrayList(c1));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    // Create a row-batch with some sample data to look for
+    int numRows = 16;
+    // Only the first 8 values will be valid.
+    byte[] validity = new byte[]{(byte) 255, 0};
+    double[] c1Values = new double[]{1, -0.0, Double.NEGATIVE_INFINITY , Double.POSITIVE_INFINITY, Double.NaN,
+        6, 7, 8, 9, 10, 11, 12, 13, 14, 4 , 3};
+
+    ArrowBuf c1Validity = buf(validity);
+    ArrowBuf c1Data = doubleBuf(c1Values);
+    ArrowBuf c2Validity = buf(validity);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(fieldNode, fieldNode),
+                    Lists.newArrayList(c1Validity, c1Data, c2Validity));
+
+    BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+    bitVector.allocateNew(numRows);
+
+    List<ValueVector> output = new ArrayList<ValueVector>();
+    output.add(bitVector);
+    eval.evaluate(batch, output);
+
+    // The first five values in the vector must match the expression, but not the other ones.
     for (int i = 1; i < 5; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }

From 7206605b3b160116e21fe6f6c0248767deaebec3 Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Wed, 19 May 2021 22:50:32 -0700
Subject: [PATCH 271/719] ARROW-12424: [Go][Parquet] Adding Schema Package for
 Go Parquet

Following up from #9817  this is the next chunk of code for the Go Parquet port consisting of the Schema package, implementing the Converted and Logical types along with handling schema creation, manipulation, and printing.

Closes #10071 from zeroshade/arrow-12424

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 go/parquet/internal/debug/log_off.go      |   24 +
 go/parquet/internal/debug/log_on.go       |   32 +
 go/parquet/schema/column.go               |  108 ++
 go/parquet/schema/converted_types.go      |  191 ++++
 go/parquet/schema/converted_types_test.go |   50 +
 go/parquet/schema/helpers.go              |  109 ++
 go/parquet/schema/helpers_test.go         |  103 ++
 go/parquet/schema/logical_types.go        | 1097 +++++++++++++++++++++
 go/parquet/schema/logical_types_test.go   |  559 +++++++++++
 go/parquet/schema/node.go                 |  627 ++++++++++++
 go/parquet/schema/reflection.go           |  827 ++++++++++++++++
 go/parquet/schema/reflection_test.go      |  403 ++++++++
 go/parquet/schema/schema.go               |  328 ++++++
 go/parquet/schema/schema_element_test.go  |  514 ++++++++++
 go/parquet/schema/schema_flatten_test.go  |  157 +++
 go/parquet/schema/schema_test.go          |  666 +++++++++++++
 16 files changed, 5795 insertions(+)
 create mode 100644 go/parquet/internal/debug/log_off.go
 create mode 100644 go/parquet/internal/debug/log_on.go
 create mode 100644 go/parquet/schema/column.go
 create mode 100644 go/parquet/schema/converted_types.go
 create mode 100644 go/parquet/schema/converted_types_test.go
 create mode 100644 go/parquet/schema/helpers.go
 create mode 100644 go/parquet/schema/helpers_test.go
 create mode 100644 go/parquet/schema/logical_types.go
 create mode 100644 go/parquet/schema/logical_types_test.go
 create mode 100644 go/parquet/schema/node.go
 create mode 100644 go/parquet/schema/reflection.go
 create mode 100644 go/parquet/schema/reflection_test.go
 create mode 100644 go/parquet/schema/schema.go
 create mode 100644 go/parquet/schema/schema_element_test.go
 create mode 100644 go/parquet/schema/schema_flatten_test.go
 create mode 100644 go/parquet/schema/schema_test.go

diff --git a/go/parquet/internal/debug/log_off.go b/go/parquet/internal/debug/log_off.go
new file mode 100644
index 00000000000..23dcccd810c
--- /dev/null
+++ b/go/parquet/internal/debug/log_off.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !debug
+
+package debug
+
+// use build tags in order to control the existence of this log function vs it getting
+// optimized away as a noop without the debug build tag.
+
+func Log(interface{}) {}
diff --git a/go/parquet/internal/debug/log_on.go b/go/parquet/internal/debug/log_on.go
new file mode 100644
index 00000000000..8d6106099f6
--- /dev/null
+++ b/go/parquet/internal/debug/log_on.go
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build debug
+
+package debug
+
+import (
+	"log"
+	"os"
+)
+
+var (
+	debug = log.New(os.Stderr, "[D] ", log.LstdFlags)
+)
+
+func Log(msg interface{}) {
+	debug.Println(msg)
+}
diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go
new file mode 100644
index 00000000000..c33ddf0d8a6
--- /dev/null
+++ b/go/parquet/schema/column.go
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// Column encapsulates the information necessary to interpret primitive
+// column data in the context of a particular schema. We have to examine
+// the node structure of a column's path to the root in the schema tree
+// to be able to reassemble the nested structure from the repetition and
+// definition levels.
+type Column struct {
+	pnode *PrimitiveNode
+	// the maximum definition level in this column
+	// if this is > 0 then either this column or a parent column must be optional.
+	maxDefLvl int16
+	// the maximum repetition level in this column
+	// if this is > 0, then either this column or a parent column must be repeated.
+	// when the repetition level in the column data equals this value, it indicates
+	// additional elements in the innermost list.
+	maxRepLvl int16
+}
+
+// NewColumn returns a new column object for the given node with the provided
+// maximum definition and repetition levels.
+func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column {
+	return &Column{n, maxDefinitionLvl, maxRepetitionLvl}
+}
+
+// Name is the column's name
+func (c *Column) Name() string { return c.pnode.Name() }
+
+// ColumnPath returns the full path to this column from the root of the schema
+func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() }
+
+// Path is equivalent to ColumnPath().String() returning the dot-string version of the path
+func (c *Column) Path() string { return c.pnode.Path() }
+
+// TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column
+func (c *Column) TypeLength() int { return c.pnode.TypeLength() }
+
+func (c *Column) MaxDefinitionLevel() int16        { return c.maxDefLvl }
+func (c *Column) MaxRepetitionLevel() int16        { return c.maxRepLvl }
+func (c *Column) PhysicalType() parquet.Type       { return c.pnode.PhysicalType() }
+func (c *Column) ConvertedType() ConvertedType     { return c.pnode.convertedType }
+func (c *Column) LogicalType() LogicalType         { return c.pnode.logicalType }
+func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder }
+func (c *Column) String() string {
+	var bld strings.Builder
+	bld.WriteString("column descriptor = {\n")
+	fmt.Fprintf(&bld, "  name: %s,\n", c.Name())
+	fmt.Fprintf(&bld, "  path: %s,\n", c.Path())
+	fmt.Fprintf(&bld, "  physical_type: %s,\n", c.PhysicalType())
+	fmt.Fprintf(&bld, "  converted_type: %s,\n", c.ConvertedType())
+	fmt.Fprintf(&bld, "  logical_type: %s,\n", c.LogicalType())
+	fmt.Fprintf(&bld, "  max_definition_level: %d,\n", c.MaxDefinitionLevel())
+	fmt.Fprintf(&bld, "  max_repetition_level: %d,\n", c.MaxRepetitionLevel())
+	if c.PhysicalType() == parquet.Types.FixedLenByteArray {
+		fmt.Fprintf(&bld, "  length: %d,\n", c.TypeLength())
+	}
+	if c.ConvertedType() == ConvertedTypes.Decimal {
+		fmt.Fprintf(&bld, "  precision: %d,\n  scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale)
+	}
+	bld.WriteString("}")
+	return bld.String()
+}
+
+// Equals will return true if the rhs Column has the same Max Repetition and Definition levels
+// along with having the same node definition.
+func (c *Column) Equals(rhs *Column) bool {
+	return c.pnode.Equals(rhs.pnode) &&
+		c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() &&
+		c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel()
+}
+
+// SchemaNode returns the underlying Node in the schema tree for this column.
+func (c *Column) SchemaNode() Node {
+	return c.pnode
+}
+
+// SortOrder returns the sort order of this column's statistics based on the
+// Logical and Converted types.
+func (c *Column) SortOrder() SortOrder {
+	if c.LogicalType() != nil {
+		return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType()))
+	}
+	return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType()))
+}
diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go
new file mode 100644
index 00000000000..b5ceff31257
--- /dev/null
+++ b/go/parquet/schema/converted_types.go
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
+// with added values of None and NA for handling when these values are not
+// set in the metadata
+type ConvertedType format.ConvertedType
+
+var (
+	// ConvertedTypes is a struct containing the constants for the types
+	// to make it easy to reference them while making it clear what they are
+	ConvertedTypes = struct {
+		None            ConvertedType
+		UTF8            ConvertedType
+		Map             ConvertedType
+		MapKeyValue     ConvertedType
+		List            ConvertedType
+		Enum            ConvertedType
+		Decimal         ConvertedType
+		Date            ConvertedType
+		TimeMillis      ConvertedType
+		TimeMicros      ConvertedType
+		TimestampMillis ConvertedType
+		TimestampMicros ConvertedType
+		Uint8           ConvertedType
+		Uint16          ConvertedType
+		Uint32          ConvertedType
+		Uint64          ConvertedType
+		Int8            ConvertedType
+		Int16           ConvertedType
+		Int32           ConvertedType
+		Int64           ConvertedType
+		JSON            ConvertedType
+		BSON            ConvertedType
+		Interval        ConvertedType
+		NA              ConvertedType
+	}{
+		None:            -1, // thrift enum starts at 0, so we know this will not be used
+		UTF8:            ConvertedType(format.ConvertedType_UTF8),
+		Map:             ConvertedType(format.ConvertedType_MAP),
+		MapKeyValue:     ConvertedType(format.ConvertedType_MAP_KEY_VALUE),
+		List:            ConvertedType(format.ConvertedType_LIST),
+		Enum:            ConvertedType(format.ConvertedType_ENUM),
+		Decimal:         ConvertedType(format.ConvertedType_DECIMAL),
+		Date:            ConvertedType(format.ConvertedType_DATE),
+		TimeMillis:      ConvertedType(format.ConvertedType_TIME_MILLIS),
+		TimeMicros:      ConvertedType(format.ConvertedType_TIME_MICROS),
+		TimestampMillis: ConvertedType(format.ConvertedType_TIMESTAMP_MILLIS),
+		TimestampMicros: ConvertedType(format.ConvertedType_TIMESTAMP_MICROS),
+		Uint8:           ConvertedType(format.ConvertedType_UINT_8),
+		Uint16:          ConvertedType(format.ConvertedType_UINT_16),
+		Uint32:          ConvertedType(format.ConvertedType_UINT_32),
+		Uint64:          ConvertedType(format.ConvertedType_UINT_64),
+		Int8:            ConvertedType(format.ConvertedType_INT_8),
+		Int16:           ConvertedType(format.ConvertedType_INT_16),
+		Int32:           ConvertedType(format.ConvertedType_INT_32),
+		Int64:           ConvertedType(format.ConvertedType_INT_64),
+		JSON:            ConvertedType(format.ConvertedType_JSON),
+		BSON:            ConvertedType(format.ConvertedType_BSON),
+		Interval:        ConvertedType(format.ConvertedType_INTERVAL),
+		NA:              24, // should always be the last values after Interval
+	}
+)
+
+func (p ConvertedType) String() string {
+	switch p {
+	case ConvertedTypes.None:
+		return "NONE"
+	case ConvertedTypes.NA:
+		return "UNKNOWN"
+	default:
+		return format.ConvertedType(p).String()
+	}
+}
+
+// ToLogicalType returns the correct LogicalType for the given ConvertedType, using the decimal
+// metadata provided to define the precision/scale if necessary
+func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) LogicalType {
+	switch p {
+	case ConvertedTypes.UTF8:
+		return StringLogicalType{}
+	case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+		return MapLogicalType{}
+	case ConvertedTypes.List:
+		return ListLogicalType{}
+	case ConvertedTypes.Enum:
+		return EnumLogicalType{}
+	case ConvertedTypes.Decimal:
+		return NewDecimalLogicalType(convertedDecimal.Precision, convertedDecimal.Scale)
+	case ConvertedTypes.Date:
+		return DateLogicalType{}
+	case ConvertedTypes.TimeMillis:
+		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
+	case ConvertedTypes.TimeMicros:
+		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
+	case ConvertedTypes.TimestampMillis:
+		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
+		t.(*TimestampLogicalType).fromConverted = true
+		return t
+	case ConvertedTypes.TimestampMicros:
+		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
+		t.(*TimestampLogicalType).fromConverted = true
+		return t
+	case ConvertedTypes.Interval:
+		return IntervalLogicalType{}
+	case ConvertedTypes.Int8:
+		return NewIntLogicalType(8 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Int16:
+		return NewIntLogicalType(16 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Int32:
+		return NewIntLogicalType(32 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Int64:
+		return NewIntLogicalType(64 /* bitWidth */, true /* signed */)
+	case ConvertedTypes.Uint8:
+		return NewIntLogicalType(8 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.Uint16:
+		return NewIntLogicalType(16 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.Uint32:
+		return NewIntLogicalType(32 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.Uint64:
+		return NewIntLogicalType(64 /* bitWidth */, false /* signed */)
+	case ConvertedTypes.JSON:
+		return JSONLogicalType{}
+	case ConvertedTypes.BSON:
+		return BSONLogicalType{}
+	case ConvertedTypes.None:
+		return NoLogicalType{}
+	case ConvertedTypes.NA:
+		fallthrough
+	default:
+		return UnknownLogicalType{}
+	}
+}
+
+// GetSortOrder defaults to the sort order based on the physical type if convert
+// is ConvertedTypes.None, otherwise determines the sort order by the converted type.
+func GetSortOrder(convert ConvertedType, primitive format.Type) SortOrder {
+	if convert == ConvertedTypes.None {
+		return DefaultSortOrder(primitive)
+	}
+	switch convert {
+	case ConvertedTypes.Int8,
+		ConvertedTypes.Int16,
+		ConvertedTypes.Int32,
+		ConvertedTypes.Int64,
+		ConvertedTypes.Date,
+		ConvertedTypes.TimeMicros,
+		ConvertedTypes.TimeMillis,
+		ConvertedTypes.TimestampMicros,
+		ConvertedTypes.TimestampMillis,
+		ConvertedTypes.Decimal:
+		return SortSIGNED
+	case ConvertedTypes.Uint8,
+		ConvertedTypes.Uint16,
+		ConvertedTypes.Uint32,
+		ConvertedTypes.Uint64,
+		ConvertedTypes.Enum,
+		ConvertedTypes.UTF8,
+		ConvertedTypes.BSON,
+		ConvertedTypes.JSON:
+		return SortUNSIGNED
+	case ConvertedTypes.List,
+		ConvertedTypes.Map,
+		ConvertedTypes.MapKeyValue,
+		ConvertedTypes.Interval,
+		ConvertedTypes.None,
+		ConvertedTypes.NA:
+		return SortUNKNOWN
+	default:
+		return SortUNKNOWN
+	}
+}
diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go
new file mode 100644
index 00000000000..86e0cb023e3
--- /dev/null
+++ b/go/parquet/schema/converted_types_test.go
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestConvertedTypesToString(t *testing.T) {
+	assert.Equal(t, "NONE", schema.ConvertedTypes.None.String())
+	assert.Equal(t, "UTF8", schema.ConvertedTypes.UTF8.String())
+	assert.Equal(t, "MAP", schema.ConvertedTypes.Map.String())
+	assert.Equal(t, "MAP_KEY_VALUE", schema.ConvertedTypes.MapKeyValue.String())
+	assert.Equal(t, "LIST", schema.ConvertedTypes.List.String())
+	assert.Equal(t, "ENUM", schema.ConvertedTypes.Enum.String())
+	assert.Equal(t, "DECIMAL", schema.ConvertedTypes.Decimal.String())
+	assert.Equal(t, "DATE", schema.ConvertedTypes.Date.String())
+	assert.Equal(t, "TIME_MILLIS", schema.ConvertedTypes.TimeMillis.String())
+	assert.Equal(t, "TIME_MICROS", schema.ConvertedTypes.TimeMicros.String())
+	assert.Equal(t, "TIMESTAMP_MILLIS", schema.ConvertedTypes.TimestampMillis.String())
+	assert.Equal(t, "TIMESTAMP_MICROS", schema.ConvertedTypes.TimestampMicros.String())
+	assert.Equal(t, "UINT_8", schema.ConvertedTypes.Uint8.String())
+	assert.Equal(t, "UINT_16", schema.ConvertedTypes.Uint16.String())
+	assert.Equal(t, "UINT_32", schema.ConvertedTypes.Uint32.String())
+	assert.Equal(t, "UINT_64", schema.ConvertedTypes.Uint64.String())
+	assert.Equal(t, "INT_8", schema.ConvertedTypes.Int8.String())
+	assert.Equal(t, "INT_16", schema.ConvertedTypes.Int16.String())
+	assert.Equal(t, "INT_32", schema.ConvertedTypes.Int32.String())
+	assert.Equal(t, "INT_64", schema.ConvertedTypes.Int64.String())
+	assert.Equal(t, "JSON", schema.ConvertedTypes.JSON.String())
+	assert.Equal(t, "BSON", schema.ConvertedTypes.BSON.String())
+	assert.Equal(t, "INTERVAL", schema.ConvertedTypes.Interval.String())
+}
diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go
new file mode 100644
index 00000000000..70df2a3d0b0
--- /dev/null
+++ b/go/parquet/schema/helpers.go
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"github.com/apache/arrow/go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// ListOf is a convenience helper function to create a properly structured
+// list structure according to the Parquet Spec.
+//
+// <list-repetition> group <name> (LIST) {
+//   repeated group list {
+//     <element-repetition> <element-type> element;
+//   }
+// }
+//
+// <list-repetition> can only be optional or required. panics if repeated.
+// <element-repetition> can only be optional or required. panics if repeated.
+func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
+	if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated {
+		return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.")
+	}
+	listName := n.Name()
+
+	switch n := n.(type) {
+	case *PrimitiveNode:
+		n.name = "element"
+	case *GroupNode:
+		n.name = "element"
+	}
+
+	list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{n}, -1 /* fieldID */)
+	if err != nil {
+		return nil, err
+	}
+	return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID)
+}
+
+// MapOf is a convenience helper function to create a properly structured
+// parquet map node setup according to the Parquet Spec.
+//
+// <map-repetition> group <name> (MAP) {
+// 	 repeated group key_value {
+// 	   required <key-type> key;
+//     <value-repetition> <value-type> value;
+//   }
+// }
+//
+// key node will be renamed to "key", value node if not nil will be renamed to "value"
+//
+// <map-repetition> must be only optional or required. panics if repeated is passed.
+//
+// the key node *must* be required repetition. panics if optional or repeated
+//
+// value node can be nil (omitted) or have a repetition of required or optional *only*.
+// panics if value node is not nil and has a repetition of repeated.
+func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) {
+	if mapRep == parquet.Repetitions.Repeated {
+		return nil, xerrors.New("parquet: map repetition cannot be Repeated")
+	}
+	if key.RepetitionType() != parquet.Repetitions.Required {
+		return nil, xerrors.New("parquet: map key repetition must be Required")
+	}
+	if value != nil {
+		if value.RepetitionType() == parquet.Repetitions.Repeated {
+			return nil, xerrors.New("parquet: map value cannot have repetition Repeated")
+		}
+		switch value := value.(type) {
+		case *PrimitiveNode:
+			value.name = "value"
+		case *GroupNode:
+			value.name = "value"
+		}
+	}
+
+	switch key := key.(type) {
+	case *PrimitiveNode:
+		key.name = "key"
+	case *GroupNode:
+		key.name = "key"
+	}
+
+	keyval := FieldList{key}
+	if value != nil {
+		keyval = append(keyval, value)
+	}
+
+	kvNode, err := NewGroupNode("key_value" /* name */, parquet.Repetitions.Repeated, keyval, -1 /* fieldID */)
+	if err != nil {
+		return nil, err
+	}
+	return NewGroupNodeLogical(name, mapRep, FieldList{kvNode}, MapLogicalType{}, fieldID)
+}
diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go
new file mode 100644
index 00000000000..0c1a6ab3560
--- /dev/null
+++ b/go/parquet/schema/helpers_test.go
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestListOf(t *testing.T) {
+	n := schema.NewByteArrayNode("str", parquet.Repetitions.Required, 1)
+	list, err := schema.ListOf(n, parquet.Repetitions.Optional, 2)
+
+	assert.NoError(t, err)
+	assert.Equal(t, "str", list.Name())
+	assert.Equal(t, parquet.Repetitions.Optional, list.RepetitionType())
+	assert.Equal(t, 1, list.NumFields())
+	assert.EqualValues(t, 2, list.FieldID())
+	assert.IsType(t, &schema.GroupNode{}, list.Field(0))
+	assert.Equal(t, "list", list.Field(0).Name())
+	assert.Equal(t, 1, list.Field(0).(*schema.GroupNode).NumFields())
+	assert.Same(t, n, list.Field(0).(*schema.GroupNode).Field(0))
+	assert.Equal(t, "element", list.Field(0).(*schema.GroupNode).Field(0).Name())
+}
+
+func TestListOfNested(t *testing.T) {
+	n, err := schema.ListOf(schema.NewInt32Node("arrays", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1)
+	assert.NoError(t, err)
+	final, err := schema.ListOf(n, parquet.Repetitions.Required, -1)
+	assert.NoError(t, err)
+
+	var buf bytes.Buffer
+	schema.PrintSchema(final, &buf, 4)
+	assert.Equal(t,
+		`required group field_id=-1 arrays (List) {
+    repeated group field_id=-1 list {
+        required group field_id=-1 element (List) {
+            repeated group field_id=-1 list {
+                required int32 field_id=-1 element;
+            }
+        }
+    }
+}`, strings.TrimSpace(buf.String()))
+}
+
+func TestMapOfNestedTypes(t *testing.T) {
+	n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{
+		schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),
+		schema.NewInt32Node("age", parquet.Repetitions.Optional, -1),
+	}, -1)
+	assert.NoError(t, err)
+
+	grp, err := schema.NewGroupNode("classes", parquet.Repetitions.Optional, schema.FieldList{
+		schema.NewInt32Node("a", parquet.Repetitions.Repeated, -1),
+		schema.NewFloat32Node("b", parquet.Repetitions.Repeated, -1),
+	}, -1)
+	assert.NoError(t, err)
+
+	classes, err := schema.ListOf(grp, parquet.Repetitions.Optional, -1)
+	assert.NoError(t, err)
+
+	m, err := schema.MapOf("studentmap", n, classes, parquet.Repetitions.Required, 1)
+	assert.NoError(t, err)
+
+	var buf bytes.Buffer
+	schema.PrintSchema(m, &buf, 4)
+	assert.Equal(t,
+		`required group field_id=1 studentmap (Map) {
+    repeated group field_id=-1 key_value {
+        required group field_id=-1 key {
+            required byte_array field_id=-1 name;
+            optional int32 field_id=-1 age;
+        }
+        optional group field_id=-1 value (List) {
+            repeated group field_id=-1 list {
+                optional group field_id=-1 element {
+                    repeated int32 field_id=-1 a;
+                    repeated float field_id=-1 b;
+                }
+            }
+        }
+    }
+}`, strings.TrimSpace(buf.String()))
+}
diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go
new file mode 100644
index 00000000000..b425c895d84
--- /dev/null
+++ b/go/parquet/schema/logical_types.go
@@ -0,0 +1,1097 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/debug"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// DecimalMetadata is a struct for managing scale and precision information between
+// converted and logical types.
+type DecimalMetadata struct {
+	IsSet     bool
+	Scale     int32
+	Precision int32
+}
+
+func getLogicalType(l *format.LogicalType) LogicalType {
+	switch {
+	case l.IsSetSTRING():
+		return StringLogicalType{}
+	case l.IsSetMAP():
+		return MapLogicalType{}
+	case l.IsSetLIST():
+		return ListLogicalType{}
+	case l.IsSetENUM():
+		return EnumLogicalType{}
+	case l.IsSetDECIMAL():
+		return &DecimalLogicalType{typ: l.DECIMAL}
+	case l.IsSetDATE():
+		return DateLogicalType{}
+	case l.IsSetTIME():
+		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
+			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
+		}
+		return &TimeLogicalType{typ: l.TIME}
+	case l.IsSetTIMESTAMP():
+		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
+			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
+		}
+		return &TimestampLogicalType{typ: l.TIMESTAMP}
+	case l.IsSetINTEGER():
+		return &IntLogicalType{typ: l.INTEGER}
+	case l.IsSetUNKNOWN():
+		return NullLogicalType{}
+	case l.IsSetJSON():
+		return JSONLogicalType{}
+	case l.IsSetBSON():
+		return BSONLogicalType{}
+	case l.IsSetUUID():
+		return UUIDLogicalType{}
+	case l == nil:
+		return NoLogicalType{}
+	default:
+		panic("invalid logical type")
+	}
+}
+
+// TimeUnitType is an enum for denoting whether a time based logical type
+// is using milliseconds, microseconds or nanoseconds.
+type TimeUnitType int
+
+// Constants for the TimeUnitType
+const (
+	TimeUnitMillis TimeUnitType = iota
+	TimeUnitMicros
+	TimeUnitNanos
+	TimeUnitUnknown
+)
+
+// LogicalType is the descriptor that defines the usage of a physical primitive
+// type in the schema, such as an Interval, Date, etc.
+type LogicalType interface {
+	// Returns true if a nested type like List or Map
+	IsNested() bool
+	// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
+	IsSerialized() bool
+	// Returns true if not NoLogicalType
+	IsValid() bool
+	// Returns true if it is NoType
+	IsNone() bool
+	// returns a string representation of the Logical Type
+	String() string
+	toThrift() *format.LogicalType
+	// Return the equivalent ConvertedType for legacy Parquet systems
+	ToConvertedType() (ConvertedType, DecimalMetadata)
+	// Returns true if the specified ConvertedType is compatible with this
+	// logical type
+	IsCompatible(ConvertedType, DecimalMetadata) bool
+	// Returns true if this logical type can be used with the provided physical type
+	IsApplicable(t parquet.Type, tlen int32) bool
+	// Returns true if the logical types are the same
+	Equals(LogicalType) bool
+	// Returns the default stat sort order for this logical type
+	SortOrder() SortOrder
+}
+
+// TemporalLogicalType is a smaller interface for Time based logical types
+// like Time / Timestamp
+type TemporalLogicalType interface {
+	LogicalType
+	IsAdjustedToUTC() bool
+	TimeUnit() TimeUnitType
+}
+
+// SortOrder mirrors the parquet.thrift sort order type
+type SortOrder int8
+
+// Constants for the Stat sort order definitions
+const (
+	SortSIGNED SortOrder = iota
+	SortUNSIGNED
+	SortUNKNOWN
+)
+
+// DefaultSortOrder returns the default stat sort order for the given physical type
+func DefaultSortOrder(primitive format.Type) SortOrder {
+	switch primitive {
+	case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
+		return SortSIGNED
+	case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
+		return SortUNSIGNED
+	case format.Type_INT96:
+		fallthrough
+	default:
+		return SortUNKNOWN
+	}
+}
+
+// GetLogicalSortOrder returns the default sort order for this logical type
+// or falls back to the default sort order for the physical type if not valid
+func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
+	switch {
+	case logical == nil || !logical.IsValid():
+		return SortUNKNOWN
+	case logical.Equals(NoLogicalType{}):
+		return DefaultSortOrder(primitive)
+	default:
+		return logical.SortOrder()
+	}
+}
+
+type baseLogicalType struct{}
+
+func (baseLogicalType) IsSerialized() bool {
+	return true
+}
+
+func (baseLogicalType) IsValid() bool {
+	return true
+}
+
+func (baseLogicalType) IsNested() bool {
+	return false
+}
+
+func (baseLogicalType) IsNone() bool { return false }
+
+// StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
+type StringLogicalType struct{ baseLogicalType }
+
+func (StringLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (StringLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
+}
+
+func (StringLogicalType) String() string {
+	return "String"
+}
+
+func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.UTF8, DecimalMetadata{}
+}
+
+func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.UTF8 && !dec.IsSet
+}
+
+func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (StringLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{STRING: format.NewStringType()}
+}
+
+func (StringLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(StringLogicalType)
+	return ok
+}
+
+// MapLogicalType represents a mapped type
+type MapLogicalType struct{ baseLogicalType }
+
+func (MapLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (MapLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
+}
+
+func (MapLogicalType) String() string {
+	return "Map"
+}
+
+func (MapLogicalType) IsNested() bool {
+	return true
+}
+
+func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Map, DecimalMetadata{}
+}
+
+func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
+}
+
+func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return false
+}
+
+func (MapLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{MAP: format.NewMapType()}
+}
+
+func (MapLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(MapLogicalType)
+	return ok
+}
+
+func NewListLogicalType() LogicalType {
+	return ListLogicalType{}
+}
+
+// ListLogicalType is used for columns which are themselves nested lists
+type ListLogicalType struct{ baseLogicalType }
+
+func (ListLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (ListLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
+}
+
+func (ListLogicalType) String() string {
+	return "List"
+}
+
+func (ListLogicalType) IsNested() bool {
+	return true
+}
+
+func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.List, DecimalMetadata{}
+}
+
+func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.List && !dec.IsSet
+}
+
+func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return false
+}
+
+func (ListLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{LIST: format.NewListType()}
+}
+
+func (ListLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(ListLogicalType)
+	return ok
+}
+
+// EnumLogicalType is for representing an enum, which should be a byte array type
+type EnumLogicalType struct{ baseLogicalType }
+
+func (EnumLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (EnumLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
+}
+
+func (EnumLogicalType) String() string {
+	return "Enum"
+}
+
+func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Enum, DecimalMetadata{}
+}
+
+func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.Enum && !dec.IsSet
+}
+
+func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (EnumLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{ENUM: format.NewEnumType()}
+}
+
+func (EnumLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(EnumLogicalType)
+	return ok
+}
+
+// NewDecimalLogicalType returns a Decimal logical type with the given
+// precision and scale.
+//
+// Panics if precision < 1 or scale is not in the range (0, precision)
+func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
+	if precision < 1 {
+		panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
+	}
+	if scale < 0 || scale > precision {
+		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
+	}
+	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
+}
+
+// DecimalLogicalType is used to represent a decimal value of a given
+// precision and scale
+type DecimalLogicalType struct {
+	baseLogicalType
+	typ *format.DecimalType
+}
+
+func (t DecimalLogicalType) Precision() int32 {
+	return t.typ.Precision
+}
+
+func (t DecimalLogicalType) Scale() int32 {
+	return t.typ.Scale
+}
+
+func (DecimalLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
+}
+
+func (t DecimalLogicalType) String() string {
+	return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
+}
+
+func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
+}
+
+func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.Decimal &&
+		dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
+}
+
+func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
+	switch typ {
+	case parquet.Types.Int32:
+		return 1 <= t.typ.Precision && t.typ.Precision <= 9
+	case parquet.Types.Int64:
+		if t.typ.Precision < 10 {
+			debug.Log("int64 used for decimal logical, precision is small enough to use int32")
+		}
+		return 1 <= t.typ.Precision && t.typ.Precision <= 18
+	case parquet.Types.FixedLenByteArray:
+		return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
+	case parquet.Types.ByteArray:
+		return true
+	}
+	return false
+}
+
+func (t DecimalLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{DECIMAL: t.typ}
+}
+
+func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*DecimalLogicalType)
+	if !ok {
+		return false
+	}
+	return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
+}
+
+// DateLogicalType is an int32 representing the number of days since the Unix Epoch
+// 1 January 1970
+type DateLogicalType struct{ baseLogicalType }
+
+func (DateLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (DateLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
+}
+
+func (DateLogicalType) String() string {
+	return "Date"
+}
+
+func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Date, DecimalMetadata{}
+}
+
+func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
+	return t == ConvertedTypes.Date && !dec.IsSet
+}
+
+func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.Int32
+}
+
+func (DateLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{DATE: format.NewDateType()}
+}
+
+func (DateLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(DateLogicalType)
+	return ok
+}
+
+func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
+	switch {
+	case unit == nil:
+		return TimeUnitUnknown
+	case unit.IsSetMILLIS():
+		return TimeUnitMillis
+	case unit.IsSetMICROS():
+		return TimeUnitMicros
+	case unit.IsSetNANOS():
+		return TimeUnitNanos
+	default:
+		return TimeUnitUnknown
+	}
+}
+
+func timeUnitToString(unit *format.TimeUnit) string {
+	switch {
+	case unit == nil:
+		return "unknown"
+	case unit.IsSetMILLIS():
+		return "milliseconds"
+	case unit.IsSetMICROS():
+		return "microseconds"
+	case unit.IsSetNANOS():
+		return "nanoseconds"
+	default:
+		return "unknown"
+	}
+}
+
+func timeUnitFromString(v string) TimeUnitType {
+	switch v {
+	case "millis":
+		return TimeUnitMillis
+	case "micros":
+		return TimeUnitMicros
+	case "nanos":
+		return TimeUnitNanos
+	default:
+		return TimeUnitUnknown
+	}
+}
+
+func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
+	tunit := format.NewTimeUnit()
+	switch unit {
+	case TimeUnitMicros:
+		tunit.MICROS = format.NewMicroSeconds()
+	case TimeUnitMillis:
+		tunit.MILLIS = format.NewMilliSeconds()
+	case TimeUnitNanos:
+		tunit.NANOS = format.NewNanoSeconds()
+	default:
+		panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
+	}
+	return tunit
+}
+
+// NewTimeLogicalType returns a time type of the given unit.
+func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
+	return &TimeLogicalType{typ: &format.TimeType{
+		IsAdjustedToUTC: isAdjustedToUTC,
+		Unit:            createTimeUnit(unit),
+	}}
+}
+
+// TimeLogicalType is a time type without a date and must be an
+// int32 for milliseconds, or an int64 for micro or nano seconds.
+type TimeLogicalType struct {
+	baseLogicalType
+	typ *format.TimeType
+}
+
+func (t TimeLogicalType) IsAdjustedToUTC() bool {
+	return t.typ.IsAdjustedToUTC
+}
+
+func (t TimeLogicalType) TimeUnit() TimeUnitType {
+	return timeUnitFromThrift(t.typ.Unit)
+}
+
+func (TimeLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
+}
+
+func (t TimeLogicalType) String() string {
+	return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
+}
+
+func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	unit := timeUnitFromThrift(t.typ.Unit)
+	if t.typ.IsAdjustedToUTC {
+		switch unit {
+		case TimeUnitMillis:
+			return ConvertedTypes.TimeMillis, DecimalMetadata{}
+		case TimeUnitMicros:
+			return ConvertedTypes.TimeMicros, DecimalMetadata{}
+		}
+	}
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	unit := timeUnitFromThrift(t.typ.Unit)
+	if t.typ.IsAdjustedToUTC {
+		switch unit {
+		case TimeUnitMillis:
+			return c == ConvertedTypes.TimeMillis
+		case TimeUnitMicros:
+			return c == ConvertedTypes.TimeMicros
+		}
+	}
+
+	return c == ConvertedTypes.None || c == ConvertedTypes.NA
+}
+
+func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
+	return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
+		(typ == parquet.Types.Int64 &&
+			(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
+}
+
+func (t TimeLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{TIME: t.typ}
+}
+
+func (t TimeLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*TimeLogicalType)
+	if !ok {
+		return false
+	}
+	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
+		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
+}
+
+// NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
+// set to false
+func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
+	return &TimestampLogicalType{
+		typ: &format.TimestampType{
+			IsAdjustedToUTC: isAdjustedToUTC,
+			Unit:            createTimeUnit(unit),
+		},
+		forceConverted: false,
+		fromConverted:  false,
+	}
+}
+
+// NewTimestampLogicalTypeForce returns a timestamp logical type with
+// "forceConverted" set to true
+func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
+	return &TimestampLogicalType{
+		typ: &format.TimestampType{
+			IsAdjustedToUTC: isAdjustedToUTC,
+			Unit:            createTimeUnit(unit),
+		},
+		forceConverted: true,
+		fromConverted:  false,
+	}
+}
+
+// TimestampLogicalType represents an int64 number that can be decoded
+// into a year, month, day, hour, minute, second, and subsecond
+type TimestampLogicalType struct {
+	baseLogicalType
+	typ *format.TimestampType
+	// forceConverted denotes whether or not the resulting serialized
+	// type when writing to parquet will be written as the legacy
+	// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
+	// or if it will write the proper current Logical Types (false, default)
+	forceConverted bool
+	// fromConverted denotes if the timestamp type was created by
+	// translating a legacy converted type of TIMESTAMP_MILLIS or
+	// TIMESTAMP_MICROS rather than by using the current logical
+	// types. Default is false.
+	fromConverted bool
+}
+
+func (t TimestampLogicalType) IsFromConvertedType() bool {
+	return t.fromConverted
+}
+
+func (t TimestampLogicalType) IsAdjustedToUTC() bool {
+	return t.typ.IsAdjustedToUTC
+}
+
+func (t TimestampLogicalType) TimeUnit() TimeUnitType {
+	return timeUnitFromThrift(t.typ.Unit)
+}
+
+func (TimestampLogicalType) SortOrder() SortOrder {
+	return SortSIGNED
+}
+
+func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"Type":                     "Timestamp",
+		"isAdjustedToUTC":          t.typ.IsAdjustedToUTC,
+		"timeUnit":                 timeUnitToString(t.typ.GetUnit()),
+		"is_from_converted_type":   t.fromConverted,
+		"force_set_converted_type": t.forceConverted,
+	})
+}
+
+func (t TimestampLogicalType) IsSerialized() bool {
+	return !t.fromConverted
+}
+
+func (t TimestampLogicalType) String() string {
+	return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
+		t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
+}
+
+func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	unit := timeUnitFromThrift(t.typ.Unit)
+	if t.typ.IsAdjustedToUTC || t.forceConverted {
+		switch unit {
+		case TimeUnitMillis:
+			return ConvertedTypes.TimestampMillis, DecimalMetadata{}
+		case TimeUnitMicros:
+			return ConvertedTypes.TimestampMicros, DecimalMetadata{}
+		}
+	}
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+
+	switch timeUnitFromThrift(t.typ.Unit) {
+	case TimeUnitMillis:
+		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
+			return c == ConvertedTypes.TimestampMillis
+		}
+	case TimeUnitMicros:
+		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
+			return c == ConvertedTypes.TimestampMicros
+		}
+	}
+
+	return c == ConvertedTypes.None || c == ConvertedTypes.NA
+}
+
+func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.Int64
+}
+
+func (t TimestampLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{TIMESTAMP: t.typ}
+}
+
+func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*TimestampLogicalType)
+	if !ok {
+		return false
+	}
+	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
+		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
+}
+
+// NewIntLogicalType creates an integer logical type of the desired bitwidth
+// and whether it is signed or not.
+//
+// Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
+func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
+	switch bitWidth {
+	case 8, 16, 32, 64:
+	default:
+		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
+	}
+	return &IntLogicalType{
+		typ: &format.IntType{
+			BitWidth: bitWidth,
+			IsSigned: signed,
+		},
+	}
+}
+
+// IntLogicalType represents an integer type of a specific bit width and
+// is either signed or unsigned.
+type IntLogicalType struct {
+	baseLogicalType
+	typ *format.IntType
+}
+
+func (t IntLogicalType) BitWidth() int8 {
+	return t.typ.BitWidth
+}
+
+func (t IntLogicalType) IsSigned() bool {
+	return t.typ.IsSigned
+}
+
+func (t IntLogicalType) SortOrder() SortOrder {
+	if t.typ.IsSigned {
+		return SortSIGNED
+	}
+	return SortUNSIGNED
+}
+
+func (t IntLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
+	})
+}
+
+func (t IntLogicalType) String() string {
+	return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
+}
+
+func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	var d DecimalMetadata
+	if t.typ.IsSigned {
+		switch t.typ.BitWidth {
+		case 8:
+			return ConvertedTypes.Int8, d
+		case 16:
+			return ConvertedTypes.Int16, d
+		case 32:
+			return ConvertedTypes.Int32, d
+		case 64:
+			return ConvertedTypes.Int64, d
+		}
+	} else {
+		switch t.typ.BitWidth {
+		case 8:
+			return ConvertedTypes.Uint8, d
+		case 16:
+			return ConvertedTypes.Uint16, d
+		case 32:
+			return ConvertedTypes.Uint32, d
+		case 64:
+			return ConvertedTypes.Uint64, d
+		}
+	}
+	return ConvertedTypes.None, d
+}
+
+func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	v, _ := t.ToConvertedType()
+	return c == v
+}
+
+func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
+	return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
+		(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
+}
+
+func (t IntLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{INTEGER: t.typ}
+}
+
+func (t IntLogicalType) Equals(rhs LogicalType) bool {
+	other, ok := rhs.(*IntLogicalType)
+	if !ok {
+		return false
+	}
+
+	return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
+		t.typ.GetBitWidth() == other.typ.GetBitWidth()
+}
+
+// UnknownLogicalType is a type that is essentially a placeholder for when
+// we don't know the type.
+type UnknownLogicalType struct{ baseLogicalType }
+
+func (UnknownLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
+}
+
+func (UnknownLogicalType) IsValid() bool { return false }
+
+func (UnknownLogicalType) IsSerialized() bool { return false }
+
+func (UnknownLogicalType) String() string {
+	return "Unknown"
+}
+
+func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.NA, DecimalMetadata{}
+}
+
+func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.NA && !dec.IsSet
+}
+
+func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
+
+func (UnknownLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{UNKNOWN: format.NewNullType()}
+}
+
+func (UnknownLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(UnknownLogicalType)
+	return ok
+}
+
+// JSONLogicalType represents a byte array column which is to be interpreted
+// as a JSON string.
+type JSONLogicalType struct{ baseLogicalType }
+
+func (JSONLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (JSONLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
+}
+
+func (JSONLogicalType) String() string {
+	return "JSON"
+}
+
+func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.JSON, DecimalMetadata{}
+}
+
+func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.JSON && !dec.IsSet
+}
+
+func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (JSONLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{JSON: format.NewJsonType()}
+}
+
+func (JSONLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(JSONLogicalType)
+	return ok
+}
+
+// BSONLogicalType represents a binary JSON string in the byte array
+type BSONLogicalType struct{ baseLogicalType }
+
+func (BSONLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (BSONLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
+}
+
+func (BSONLogicalType) String() string {
+	return "BSON"
+}
+
+func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.BSON, DecimalMetadata{}
+}
+
+func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.BSON && !dec.IsSet
+}
+
+func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
+	return t == parquet.Types.ByteArray
+}
+
+func (BSONLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{BSON: format.NewBsonType()}
+}
+
+func (BSONLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(BSONLogicalType)
+	return ok
+}
+
+// UUIDLogicalType can only be used with a FixedLength byte array column
+// that is exactly 16 bytes long
+type UUIDLogicalType struct{ baseLogicalType }
+
+func (UUIDLogicalType) SortOrder() SortOrder {
+	return SortUNSIGNED
+}
+
+func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
+}
+
+func (UUIDLogicalType) String() string {
+	return "UUID"
+}
+
+func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	switch c {
+	case ConvertedTypes.None, ConvertedTypes.NA:
+		return true
+	}
+	return false
+}
+
+func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
+	return t == parquet.Types.FixedLenByteArray && tlen == 16
+}
+
+func (UUIDLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{UUID: format.NewUUIDType()}
+}
+
+func (UUIDLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(UUIDLogicalType)
+	return ok
+}
+
+// IntervalLogicalType is not yet in the thrift spec, but represents
+// an interval time and needs to be a fixed length byte array of 12 bytes
+type IntervalLogicalType struct{ baseLogicalType }
+
+func (IntervalLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
+}
+
+func (IntervalLogicalType) String() string {
+	return "Interval"
+}
+
+func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.Interval, DecimalMetadata{}
+}
+
+func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.Interval && !dec.IsSet
+}
+
+func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
+	return t == parquet.Types.FixedLenByteArray && tlen == 12
+}
+
+func (IntervalLogicalType) toThrift() *format.LogicalType {
+	panic("no parquet IntervalLogicalType yet implemented")
+}
+
+func (IntervalLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(IntervalLogicalType)
+	return ok
+}
+
+type NullLogicalType struct{ baseLogicalType }
+
+func (NullLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (NullLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
+}
+
+func (NullLogicalType) String() string {
+	return "Null"
+}
+
+func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	if dec.IsSet {
+		return false
+	}
+	switch c {
+	case ConvertedTypes.None, ConvertedTypes.NA:
+		return true
+	}
+	return false
+}
+
+func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return true
+}
+
+func (NullLogicalType) toThrift() *format.LogicalType {
+	return &format.LogicalType{UNKNOWN: format.NewNullType()}
+}
+
+func (NullLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(NullLogicalType)
+	return ok
+}
+
+type NoLogicalType struct{ baseLogicalType }
+
+func (NoLogicalType) SortOrder() SortOrder {
+	return SortUNKNOWN
+}
+
+func (NoLogicalType) MarshalJSON() ([]byte, error) {
+	return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
+}
+
+func (NoLogicalType) IsSerialized() bool { return false }
+
+func (NoLogicalType) String() string {
+	return "None"
+}
+
+func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
+	return ConvertedTypes.None, DecimalMetadata{}
+}
+
+func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
+	return c == ConvertedTypes.None && !dec.IsSet
+}
+
+func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
+	return true
+}
+
+func (NoLogicalType) toThrift() *format.LogicalType {
+	panic("cannot convert NoLogicalType to thrift")
+}
+
+func (NoLogicalType) Equals(rhs LogicalType) bool {
+	_, ok := rhs.(NoLogicalType)
+	return ok
+}
+
+func (NoLogicalType) IsNone() bool { return true }
diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go
new file mode 100644
index 00000000000..cc2b23301df
--- /dev/null
+++ b/go/parquet/schema/logical_types_test.go
@@ -0,0 +1,559 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestConvertedLogicalEquivalences(t *testing.T) {
+	tests := []struct {
+		name      string
+		converted schema.ConvertedType
+		logical   schema.LogicalType
+		expected  schema.LogicalType
+	}{
+		{"utf8", schema.ConvertedTypes.UTF8, schema.StringLogicalType{}, schema.StringLogicalType{}},
+		{"map", schema.ConvertedTypes.Map, schema.MapLogicalType{}, schema.MapLogicalType{}},
+		{"mapkeyval", schema.ConvertedTypes.MapKeyValue, schema.MapLogicalType{}, schema.MapLogicalType{}},
+		{"list", schema.ConvertedTypes.List, schema.NewListLogicalType(), schema.NewListLogicalType()},
+		{"enum", schema.ConvertedTypes.Enum, schema.EnumLogicalType{}, schema.EnumLogicalType{}},
+		{"date", schema.ConvertedTypes.Date, schema.DateLogicalType{}, schema.DateLogicalType{}},
+		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
+		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
+		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimestampLogicalType{}},
+		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimestampLogicalType{}},
+		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
+		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"json", schema.ConvertedTypes.JSON, schema.JSONLogicalType{}, schema.JSONLogicalType{}},
+		{"bson", schema.ConvertedTypes.BSON, schema.BSONLogicalType{}, schema.BSONLogicalType{}},
+		{"interval", schema.ConvertedTypes.Interval, schema.IntervalLogicalType{}, schema.IntervalLogicalType{}},
+		{"none", schema.ConvertedTypes.None, schema.NoLogicalType{}, schema.NoLogicalType{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			fromConverted := tt.converted.ToLogicalType(schema.DecimalMetadata{})
+			assert.IsType(t, tt.logical, fromConverted)
+			assert.True(t, fromConverted.Equals(tt.logical))
+			assert.IsType(t, tt.expected, fromConverted)
+			assert.IsType(t, tt.expected, tt.logical)
+		})
+	}
+
+	t.Run("decimal", func(t *testing.T) {
+		decimalMeta := schema.DecimalMetadata{IsSet: true, Precision: 10, Scale: 4}
+		fromConverted := schema.ConvertedTypes.Decimal.ToLogicalType(decimalMeta)
+		fromMake := schema.NewDecimalLogicalType(10, 4)
+		assert.IsType(t, fromMake, fromConverted)
+		assert.True(t, fromConverted.Equals(fromMake))
+		assert.IsType(t, &schema.DecimalLogicalType{}, fromConverted)
+		assert.IsType(t, &schema.DecimalLogicalType{}, fromMake)
+		assert.True(t, schema.NewDecimalLogicalType(16, 0).Equals(schema.NewDecimalLogicalType(16, 0)))
+	})
+}
+
+func TestConvertedTypeCompatibility(t *testing.T) {
+	tests := []struct {
+		name            string
+		logical         schema.LogicalType
+		expectConverted schema.ConvertedType
+	}{
+		{"utf8", schema.StringLogicalType{}, schema.ConvertedTypes.UTF8},
+		{"map", schema.MapLogicalType{}, schema.ConvertedTypes.Map},
+		{"list", schema.NewListLogicalType(), schema.ConvertedTypes.List},
+		{"enum", schema.EnumLogicalType{}, schema.ConvertedTypes.Enum},
+		{"date", schema.DateLogicalType{}, schema.ConvertedTypes.Date},
+		{"time_milli", schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimeMillis},
+		{"time_micro", schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimeMicros},
+		{"timestamp_milli", schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimestampMillis},
+		{"timestamp_micro", schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimestampMicros},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint8},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint16},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint32},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint64},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int8},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int16},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int32},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int64},
+		{"json", schema.JSONLogicalType{}, schema.ConvertedTypes.JSON},
+		{"bson", schema.BSONLogicalType{}, schema.ConvertedTypes.BSON},
+		{"interval", schema.IntervalLogicalType{}, schema.ConvertedTypes.Interval},
+		{"none", schema.NoLogicalType{}, schema.ConvertedTypes.None},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.True(t, tt.logical.IsValid())
+			converted, decimalMeta := tt.logical.ToConvertedType()
+			assert.Equal(t, tt.expectConverted, converted)
+			assert.False(t, decimalMeta.IsSet)
+			assert.True(t, tt.logical.IsCompatible(converted, decimalMeta))
+			assert.False(t, tt.logical.IsCompatible(converted, schema.DecimalMetadata{IsSet: true, Precision: 1, Scale: 1}))
+			reconstruct := converted.ToLogicalType(decimalMeta)
+			assert.True(t, reconstruct.IsValid())
+			assert.True(t, reconstruct.Equals(tt.logical))
+		})
+	}
+
+	var (
+		orig          schema.LogicalType
+		converted     schema.ConvertedType
+		convertedMeta schema.DecimalMetadata
+	)
+
+	orig = schema.NewDecimalLogicalType(6 /* precision */, 2 /* scale */)
+	converted, convertedMeta = orig.ToConvertedType()
+	assert.True(t, orig.IsValid())
+	assert.Equal(t, schema.ConvertedTypes.Decimal, converted)
+	assert.True(t, convertedMeta.IsSet)
+	assert.EqualValues(t, 6, convertedMeta.Precision)
+	assert.EqualValues(t, 2, convertedMeta.Scale)
+	assert.True(t, orig.IsCompatible(converted, convertedMeta))
+	reconstruct := converted.ToLogicalType(convertedMeta)
+	assert.True(t, reconstruct.IsValid())
+	assert.True(t, reconstruct.Equals(orig))
+
+	orig = schema.UnknownLogicalType{}
+	converted, convertedMeta = orig.ToConvertedType()
+	assert.False(t, orig.IsValid())
+	assert.Equal(t, schema.ConvertedTypes.NA, converted)
+	assert.False(t, convertedMeta.IsSet)
+	assert.True(t, orig.IsCompatible(converted, convertedMeta))
+	reconstruct = converted.ToLogicalType(convertedMeta)
+	assert.False(t, reconstruct.IsValid())
+	assert.True(t, reconstruct.Equals(orig))
+}
+
+func TestNewTypeIncompatibility(t *testing.T) {
+	tests := []struct {
+		name     string
+		logical  schema.LogicalType
+		expected schema.LogicalType
+	}{
+		{"uuid", schema.UUIDLogicalType{}, schema.UUIDLogicalType{}},
+		{"null", schema.NullLogicalType{}, schema.NullLogicalType{}},
+		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjutedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
+		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjutedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
+		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
+		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
+		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.IsType(t, tt.expected, tt.logical)
+			assert.True(t, tt.logical.IsValid())
+			converted, meta := tt.logical.ToConvertedType()
+			assert.Equal(t, schema.ConvertedTypes.None, converted)
+			assert.False(t, meta.IsSet)
+		})
+	}
+}
+
+func TestFactoryPanic(t *testing.T) {
+	tests := []struct {
+		name string
+		f    func()
+	}{
+		{"invalid TimeUnit", func() { schema.NewTimeLogicalType(true /* adjutedToUTC */, schema.TimeUnitUnknown) }},
+		{"invalid timestamp unit", func() { schema.NewTimestampLogicalType(true /* adjutedToUTC */, schema.TimeUnitUnknown) }},
+		{"negative bitwidth", func() { schema.NewIntLogicalType(-1 /* bitWidth */, false /* signed */) }},
+		{"zero bitwidth", func() { schema.NewIntLogicalType(0 /* bitWidth */, false /* signed */) }},
+		{"bitwidth one", func() { schema.NewIntLogicalType(1 /* bitWidth */, false /* signed */) }},
+		{"invalid bitwidth", func() { schema.NewIntLogicalType(65 /* bitWidth */, false /* signed */) }},
+		{"negative precision", func() { schema.NewDecimalLogicalType(-1 /* precision */, 0 /* scale */) }},
+		{"zero precision", func() { schema.NewDecimalLogicalType(0 /* precision */, 0 /* scale */) }},
+		{"negative scale", func() { schema.NewDecimalLogicalType(10 /* precision */, -1 /* scale */) }},
+		{"invalid scale", func() { schema.NewDecimalLogicalType(10 /* precision */, 11 /* scale */) }},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Panics(t, tt.f)
+		})
+	}
+}
+
+func TestLogicalTypeProperties(t *testing.T) {
+	tests := []struct {
+		name       string
+		logical    schema.LogicalType
+		nested     bool
+		serialized bool
+		valid      bool
+	}{
+		{"string", schema.StringLogicalType{}, false, true, true},
+		{"map", schema.MapLogicalType{}, true, true, true},
+		{"list", schema.NewListLogicalType(), true, true, true},
+		{"enum", schema.EnumLogicalType{}, false, true, true},
+		{"decimal", schema.NewDecimalLogicalType(16 /* precision */, 6 /* scale */), false, true, true},
+		{"date", schema.DateLogicalType{}, false, true, true},
+		{"time", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), false, true, true},
+		{"timestamp", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), false, true, true},
+		{"interval", schema.IntervalLogicalType{}, false, true, true},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), false, true, true},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), false, true, true},
+		{"null", schema.NullLogicalType{}, false, true, true},
+		{"json", schema.JSONLogicalType{}, false, true, true},
+		{"bson", schema.BSONLogicalType{}, false, true, true},
+		{"uuid", schema.UUIDLogicalType{}, false, true, true},
+		{"nological", schema.NoLogicalType{}, false, false, true},
+		{"unknown", schema.UnknownLogicalType{}, false, false, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.True(t, tt.nested == tt.logical.IsNested())
+			assert.True(t, tt.serialized == tt.logical.IsSerialized())
+			assert.True(t, tt.valid == tt.logical.IsValid())
+		})
+	}
+}
+
+var physicalTypeList = []parquet.Type{
+	parquet.Types.Boolean,
+	parquet.Types.Int32,
+	parquet.Types.Int64,
+	parquet.Types.Int96,
+	parquet.Types.Float,
+	parquet.Types.Double,
+	parquet.Types.ByteArray,
+	parquet.Types.FixedLenByteArray,
+}
+
+func TestLogicalSingleTypeApplicability(t *testing.T) {
+	tests := []struct {
+		name       string
+		logical    schema.LogicalType
+		applicable parquet.Type
+	}{
+		{"string", schema.StringLogicalType{}, parquet.Types.ByteArray},
+		{"enum", schema.EnumLogicalType{}, parquet.Types.ByteArray},
+		{"date", schema.DateLogicalType{}, parquet.Types.Int32},
+		{"timemilli", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), parquet.Types.Int32},
+		{"timemicro", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), parquet.Types.Int64},
+		{"timenano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), parquet.Types.Int64},
+		{"timestampmilli", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), parquet.Types.Int64},
+		{"timestampmicro", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), parquet.Types.Int64},
+		{"timestampnanos", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), parquet.Types.Int64},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), parquet.Types.Int32},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), parquet.Types.Int32},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), parquet.Types.Int32},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), parquet.Types.Int64},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), parquet.Types.Int32},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), parquet.Types.Int32},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), parquet.Types.Int32},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), parquet.Types.Int64},
+		{"json", schema.JSONLogicalType{}, parquet.Types.ByteArray},
+		{"bson", schema.BSONLogicalType{}, parquet.Types.ByteArray},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			for _, typ := range physicalTypeList {
+				if typ == tt.applicable {
+					assert.True(t, tt.logical.IsApplicable(typ, -1))
+				} else {
+					assert.False(t, tt.logical.IsApplicable(typ, -1))
+				}
+			}
+		})
+	}
+}
+
+func TestLogicalNoTypeApplicability(t *testing.T) {
+	tests := []struct {
+		name    string
+		logical schema.LogicalType
+	}{
+		{"map", schema.MapLogicalType{}},
+		{"list", schema.NewListLogicalType()},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			for _, typ := range physicalTypeList {
+				assert.False(t, tt.logical.IsApplicable(typ, -1))
+			}
+		})
+	}
+}
+
+func TestLogicalUniversalTypeApplicability(t *testing.T) {
+	tests := []struct {
+		name    string
+		logical schema.LogicalType
+	}{
+		{"null", schema.NullLogicalType{}},
+		{"none", schema.NoLogicalType{}},
+		{"unknown", schema.UnknownLogicalType{}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			for _, typ := range physicalTypeList {
+				assert.True(t, tt.logical.IsApplicable(typ, -1))
+			}
+		})
+	}
+}
+
+func TestLogicalInapplicableTypes(t *testing.T) {
+	tests := []struct {
+		name string
+		typ  parquet.Type
+		len  int32
+	}{
+		{"fixed 8", parquet.Types.FixedLenByteArray, 8},
+		{"fixed 20", parquet.Types.FixedLenByteArray, 20},
+		{"bool", parquet.Types.Boolean, -1},
+		{"int32", parquet.Types.Int32, -1},
+		{"int64", parquet.Types.Int64, -1},
+		{"int96", parquet.Types.Int96, -1},
+		{"float", parquet.Types.Float, -1},
+		{"double", parquet.Types.Double, -1},
+		{"bytearray", parquet.Types.ByteArray, -1},
+	}
+
+	var logical schema.LogicalType
+
+	logical = schema.IntervalLogicalType{}
+	assert.True(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, 12))
+	for _, tt := range tests {
+		t.Run("interval "+tt.name, func(t *testing.T) {
+			assert.False(t, logical.IsApplicable(tt.typ, tt.len))
+		})
+	}
+
+	logical = schema.UUIDLogicalType{}
+	assert.True(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, 16))
+	for _, tt := range tests {
+		t.Run("uuid "+tt.name, func(t *testing.T) {
+			assert.False(t, logical.IsApplicable(tt.typ, tt.len))
+		})
+	}
+}
+
+func TestDecimalLogicalTypeApplicability(t *testing.T) {
+	const scale = 0
+	var logical schema.LogicalType
+	for prec := int32(1); prec <= 9; prec++ {
+		logical = schema.NewDecimalLogicalType(prec, scale)
+		assert.Truef(t, logical.IsApplicable(parquet.Types.Int32, -1), "prec: %d", prec)
+	}
+
+	logical = schema.NewDecimalLogicalType(10 /* precision */, scale)
+	assert.False(t, logical.IsApplicable(parquet.Types.Int32, -1))
+
+	for prec := int32(1); prec <= 18; prec++ {
+		logical = schema.NewDecimalLogicalType(prec, scale)
+		assert.Truef(t, logical.IsApplicable(parquet.Types.Int64, -1), "prec: %d", prec)
+	}
+
+	logical = schema.NewDecimalLogicalType(19, scale)
+	assert.False(t, logical.IsApplicable(parquet.Types.Int64, 0))
+
+	for prec := int32(1); prec <= 36; prec++ {
+		logical = schema.NewDecimalLogicalType(prec, scale)
+		assert.Truef(t, logical.IsApplicable(parquet.Types.ByteArray, 0), "prec: %d", prec)
+	}
+
+	tests := []struct {
+		physicalLen    int32
+		precisionLimit int32
+	}{
+		{1, 2}, {2, 4}, {3, 6}, {4, 9}, {8, 18}, {10, 23}, {16, 38}, {20, 47}, {32, 76},
+	}
+	for _, tt := range tests {
+		var prec int32
+		for prec = 1; prec <= tt.precisionLimit; prec++ {
+			logical = schema.NewDecimalLogicalType(prec, 0)
+			assert.Truef(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, tt.physicalLen), "prec: %d, len: %d", prec, tt.physicalLen)
+		}
+		logical = schema.NewDecimalLogicalType(prec, 0)
+		assert.Falsef(t, logical.IsApplicable(parquet.Types.FixedLenByteArray, tt.physicalLen), "prec: %d, len: %d", prec, tt.physicalLen)
+	}
+
+	assert.False(t, schema.NewDecimalLogicalType(16, 6).IsApplicable(parquet.Types.Boolean, 0))
+	assert.False(t, schema.NewDecimalLogicalType(16, 6).IsApplicable(parquet.Types.Float, 0))
+	assert.False(t, schema.NewDecimalLogicalType(16, 6).IsApplicable(parquet.Types.Double, 0))
+}
+
+func TestLogicalTypeRepresentation(t *testing.T) {
+	tests := []struct {
+		name     string
+		logical  schema.LogicalType
+		expected string
+		expjson  string
+	}{
+		{"unknown", schema.UnknownLogicalType{}, "Unknown", `{"Type": "Unknown"}`},
+		{"string", schema.StringLogicalType{}, "String", `{"Type": "String"}`},
+		{"map", schema.MapLogicalType{}, "Map", `{"Type": "Map"}`},
+		{"list", schema.NewListLogicalType(), "List", `{"Type": "List"}`},
+		{"enum", schema.EnumLogicalType{}, "Enum", `{"Type": "Enum"}`},
+		{"decimal 10 4", schema.NewDecimalLogicalType(10 /* precision */, 4 /* scale */), "Decimal(precision=10, scale=4)", `{"Type": "Decimal", "precision": 10, "scale": 4}`},
+		{"decimal 10 0", schema.NewDecimalLogicalType(10 /* precision */, 0 /* scale */), "Decimal(precision=10, scale=0)", `{"Type": "Decimal", "precision": 10, "scale": 0}`},
+		{"date", schema.DateLogicalType{}, "Date", `{"Type": "Date"}`},
+		{"time milli", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), "Time(isAdjustedToUTC=true, timeUnit=milliseconds)", `{"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "milliseconds"}`},
+		{"time micro", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), "Time(isAdjustedToUTC=true, timeUnit=microseconds)", `{"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "microseconds"}`},
+		{"time nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), "Time(isAdjustedToUTC=true, timeUnit=nanoseconds)", `{"Type": "Time", "isAdjustedToUTC": true, "timeUnit": "nanoseconds"}`},
+		{"time notutc milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), "Time(isAdjustedToUTC=false, timeUnit=milliseconds)", `{"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "milliseconds"}`},
+		{"time notutc micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), "Time(isAdjustedToUTC=false, timeUnit=microseconds)", `{"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "microseconds"}`},
+		{"time notutc nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), "Time(isAdjustedToUTC=false, timeUnit=nanoseconds)", `{"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"}`},
+		{"timestamp milli", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), "Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp micro", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), "Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), "Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp notutc milli", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), "Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp notutc micro", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), "Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"timestamp notutc nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), "Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false)", `{"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds", "is_from_converted_type": false, "force_set_converted_type": false}`},
+		{"interval", schema.IntervalLogicalType{}, "Interval", `{"Type": "Interval"}`},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), "Int(bitWidth=8, isSigned=false)", `{"Type": "Int", "bitWidth": 8, "isSigned": false}`},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), "Int(bitWidth=16, isSigned=false)", `{"Type": "Int", "bitWidth": 16, "isSigned": false}`},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), "Int(bitWidth=32, isSigned=false)", `{"Type": "Int", "bitWidth": 32, "isSigned": false}`},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), "Int(bitWidth=64, isSigned=false)", `{"Type": "Int", "bitWidth": 64, "isSigned": false}`},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), "Int(bitWidth=8, isSigned=true)", `{"Type": "Int", "bitWidth": 8, "isSigned": true}`},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), "Int(bitWidth=16, isSigned=true)", `{"Type": "Int", "bitWidth": 16, "isSigned": true}`},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), "Int(bitWidth=32, isSigned=true)", `{"Type": "Int", "bitWidth": 32, "isSigned": true}`},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), "Int(bitWidth=64, isSigned=true)", `{"Type": "Int", "bitWidth": 64, "isSigned": true}`},
+		{"null", schema.NullLogicalType{}, "Null", `{"Type": "Null"}`},
+		{"json", schema.JSONLogicalType{}, "JSON", `{"Type": "JSON"}`},
+		{"bson", schema.BSONLogicalType{}, "BSON", `{"Type": "BSON"}`},
+		{"uuid", schema.UUIDLogicalType{}, "UUID", `{"Type": "UUID"}`},
+		{"none", schema.NoLogicalType{}, "None", `{"Type": "None"}`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expected, tt.logical.String())
+			out, err := json.Marshal(tt.logical)
+			assert.NoError(t, err)
+			assert.JSONEq(t, tt.expjson, string(out))
+		})
+	}
+}
+
+func TestLogicalTypeSortOrder(t *testing.T) {
+	tests := []struct {
+		name    string
+		logical schema.LogicalType
+		order   schema.SortOrder
+	}{
+		{"unknown", schema.UnknownLogicalType{}, schema.SortUNKNOWN},
+		{"string", schema.StringLogicalType{}, schema.SortUNSIGNED},
+		{"map", schema.MapLogicalType{}, schema.SortUNKNOWN},
+		{"list", schema.NewListLogicalType(), schema.SortUNKNOWN},
+		{"enum", schema.EnumLogicalType{}, schema.SortUNSIGNED},
+		{"decimal", schema.NewDecimalLogicalType(8 /* precision */, 2 /* scale */), schema.SortSIGNED},
+		{"date", schema.DateLogicalType{}, schema.SortSIGNED},
+		{"time utc milli", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.SortSIGNED},
+		{"time utc micros", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.SortSIGNED},
+		{"time utc nanos", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.SortSIGNED},
+		{"time not utc milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), schema.SortSIGNED},
+		{"time not utc micros", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), schema.SortSIGNED},
+		{"time not utc nanos", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.SortSIGNED},
+		{"interval", schema.IntervalLogicalType{}, schema.SortUNKNOWN},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.SortUNSIGNED},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.SortSIGNED},
+		{"null", schema.NullLogicalType{}, schema.SortUNKNOWN},
+		{"json", schema.JSONLogicalType{}, schema.SortUNSIGNED},
+		{"bson", schema.BSONLogicalType{}, schema.SortUNSIGNED},
+		{"uuid", schema.UUIDLogicalType{}, schema.SortUNSIGNED},
+		{"none", schema.NoLogicalType{}, schema.SortUNKNOWN},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.order, tt.logical.SortOrder())
+		})
+	}
+}
+
+func TestNodeFactoryEquivalences(t *testing.T) {
+	tests := []struct {
+		name        string
+		logical     schema.LogicalType
+		converted   schema.ConvertedType
+		typ         parquet.Type
+		physicalLen int
+		precision   int
+		scale       int
+	}{
+		{"string", schema.StringLogicalType{}, schema.ConvertedTypes.UTF8, parquet.Types.ByteArray, -1, -1, -1},
+		{"enum", schema.EnumLogicalType{}, schema.ConvertedTypes.Enum, parquet.Types.ByteArray, -1, -1, -1},
+		{"decimal", schema.NewDecimalLogicalType(16 /* precision */, 6 /* scale */), schema.ConvertedTypes.Decimal, parquet.Types.Int64, -1, 16, 6},
+		{"date", schema.DateLogicalType{}, schema.ConvertedTypes.Date, parquet.Types.Int32, -1, -1, -1},
+		{"time millis", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimeMillis, parquet.Types.Int32, -1, -1, -1},
+		{"time micros", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimeMicros, parquet.Types.Int64, -1, -1, -1},
+		{"timestamp millis", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.ConvertedTypes.TimestampMillis, parquet.Types.Int64, -1, -1, -1},
+		{"timestamp micros", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.ConvertedTypes.TimestampMicros, parquet.Types.Int64, -1, -1, -1},
+		{"interval", schema.IntervalLogicalType{}, schema.ConvertedTypes.Interval, parquet.Types.FixedLenByteArray, 12, -1, -1},
+		{"uint8", schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint8, parquet.Types.Int32, -1, -1, -1},
+		{"int8", schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int8, parquet.Types.Int32, -1, -1, -1},
+		{"uint16", schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint16, parquet.Types.Int32, -1, -1, -1},
+		{"int16", schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int16, parquet.Types.Int32, -1, -1, -1},
+		{"uint32", schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint32, parquet.Types.Int32, -1, -1, -1},
+		{"int32", schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int32, parquet.Types.Int32, -1, -1, -1},
+		{"uint64", schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.ConvertedTypes.Uint64, parquet.Types.Int64, -1, -1, -1},
+		{"int64", schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.ConvertedTypes.Int64, parquet.Types.Int64, -1, -1, -1},
+		{"json", schema.JSONLogicalType{}, schema.ConvertedTypes.JSON, parquet.Types.ByteArray, -1, -1, -1},
+		{"bson", schema.BSONLogicalType{}, schema.ConvertedTypes.BSON, parquet.Types.ByteArray, -1, -1, -1},
+		{"none", schema.NoLogicalType{}, schema.ConvertedTypes.None, parquet.Types.Int64, -1, -1, -1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			name := "something"
+			repetition := parquet.Repetitions.Required
+
+			fromConverted := schema.MustPrimitive(schema.NewPrimitiveNodeConverted(name, repetition, tt.typ, tt.converted, tt.physicalLen, tt.precision, tt.scale, -1 /* fieldID */))
+			fromLogical := schema.MustPrimitive(schema.NewPrimitiveNodeLogical(name, repetition, tt.logical, tt.typ, tt.physicalLen, -1 /* fieldID */))
+			assert.True(t, fromConverted.Equals(fromLogical))
+		})
+	}
+
+	rep := parquet.Repetitions.Optional
+	fromConverted, err := schema.NewGroupNodeConverted("map" /* name */, rep, []schema.Node{}, schema.ConvertedTypes.Map, -1 /* fieldID */)
+	assert.NoError(t, err)
+
+	fromLogical, err := schema.NewGroupNodeLogical("map" /* name */, rep, []schema.Node{}, schema.MapLogicalType{}, -1 /* fieldID */)
+	assert.NoError(t, err)
+	assert.True(t, fromConverted.Equals(fromLogical))
+
+	fromConverted, err = schema.NewGroupNodeConverted("list" /* name */, rep, []schema.Node{}, schema.ConvertedTypes.List, -1 /* fieldID */)
+	assert.NoError(t, err)
+
+	fromLogical, err = schema.NewGroupNodeLogical("list" /* name */, rep, []schema.Node{}, schema.NewListLogicalType(), -1 /* fieldID */)
+	assert.NoError(t, err)
+	assert.True(t, fromConverted.Equals(fromLogical))
+}
diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go
new file mode 100644
index 00000000000..03884426490
--- /dev/null
+++ b/go/parquet/schema/node.go
@@ -0,0 +1,627 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/thrift/lib/go/thrift"
+	"golang.org/x/xerrors"
+)
+
+// NodeType describes whether the Node is a Primitive or Group node
+type NodeType int
+
+// the available constants for NodeType
+const (
+	Primitive NodeType = iota
+	Group
+)
+
+// Node is the interface for both Group and Primitive Nodes.
+// A logical schema type has a name, repetition level, and optionally
+// a logical type (converted type is the deprecated version of the logical
+// type concept, which is maintained for forward compatibility)
+type Node interface {
+	Name() string
+	Type() NodeType
+	RepetitionType() parquet.Repetition
+	ConvertedType() ConvertedType
+	LogicalType() LogicalType
+	FieldID() int32
+	Parent() Node
+	SetParent(Node)
+	Path() string
+	Equals(Node) bool
+	Visit(v Visitor)
+	toThrift() *format.SchemaElement
+}
+
+// Visitor is an interface for creating functionality to walk the schema tree.
+//
+// A visitor can be passed to the Visit function of a Node in order to walk
+// the tree. VisitPre is called the first time a node is encountered. If
+// it is a group node, the return is checked and if it is false, the children
+// will be skipped.
+//
+// VisitPost is called after visiting any children
+type Visitor interface {
+	VisitPre(Node) bool
+	VisitPost(Node)
+}
+
+// ColumnPathFromNode walks the parents of the given node to construct it's
+// column path
+func ColumnPathFromNode(n Node) parquet.ColumnPath {
+	if n == nil {
+		return nil
+	}
+
+	c := make([]string, 0)
+
+	// build the path in reverse order as we traverse nodes to the top
+	cursor := n
+	for cursor.Parent() != nil {
+		c = append(c, cursor.Name())
+		cursor = cursor.Parent()
+	}
+
+	// reverse the order of the list in place so that our result
+	// is in the proper, correct order.
+	for i := len(c)/2 - 1; i >= 0; i-- {
+		opp := len(c) - 1 - i
+		c[i], c[opp] = c[opp], c[i]
+	}
+
+	return c
+}
+
+// node is the base embedded struct for both group and primitive nodes
+type node struct {
+	typ    NodeType
+	parent Node
+
+	name          string
+	repetition    parquet.Repetition
+	fieldID       int32
+	logicalType   LogicalType
+	convertedType ConvertedType
+	colPath       parquet.ColumnPath
+}
+
+func (n *node) toThrift() *format.SchemaElement    { return nil }
+func (n *node) Name() string                       { return n.name }
+func (n *node) Type() NodeType                     { return n.typ }
+func (n *node) RepetitionType() parquet.Repetition { return n.repetition }
+func (n *node) ConvertedType() ConvertedType       { return n.convertedType }
+func (n *node) LogicalType() LogicalType           { return n.logicalType }
+func (n *node) FieldID() int32                     { return n.fieldID }
+func (n *node) Parent() Node                       { return n.parent }
+func (n *node) SetParent(p Node)                   { n.parent = p }
+func (n *node) Path() string {
+	return n.columnPath().String()
+}
+func (n *node) columnPath() parquet.ColumnPath {
+	if n.colPath == nil {
+		n.colPath = ColumnPathFromNode(n)
+	}
+	return n.colPath
+}
+
+func (n *node) Equals(rhs Node) bool {
+	return n.typ == rhs.Type() &&
+		n.Name() == rhs.Name() &&
+		n.RepetitionType() == rhs.RepetitionType() &&
+		n.ConvertedType() == rhs.ConvertedType() &&
+		n.FieldID() == rhs.FieldID() &&
+		n.LogicalType().Equals(rhs.LogicalType())
+}
+
+func (n *node) Visit(v Visitor) {}
+
+// A PrimitiveNode is a type that is one of the primitive Parquet storage types. In addition to
+// the other type metadata (name, repetition level, logical type), also has the
+// physical storage type and their type-specific metadata (byte width, decimal
+// parameters)
+type PrimitiveNode struct {
+	node
+
+	ColumnOrder     parquet.ColumnOrder
+	physicalType    parquet.Type
+	typeLen         int
+	decimalMetaData DecimalMetadata
+}
+
+// NewPrimitiveNodeLogical constructs a Primtive node using the provided logical type for a given
+// physical type and typelength.
+func NewPrimitiveNodeLogical(name string, repetition parquet.Repetition, logicalType LogicalType, physicalType parquet.Type, typeLen int, id int32) (*PrimitiveNode, error) {
+	n := &PrimitiveNode{
+		node:         node{typ: Primitive, name: name, repetition: repetition, logicalType: logicalType, fieldID: id},
+		physicalType: physicalType,
+		typeLen:      typeLen,
+	}
+
+	if logicalType != nil {
+		if !logicalType.IsNested() {
+			if logicalType.IsApplicable(physicalType, int32(typeLen)) {
+				n.convertedType, n.decimalMetaData = n.logicalType.ToConvertedType()
+			} else {
+				return nil, xerrors.Errorf("%s cannot be applied to primitive type %s", logicalType, physicalType)
+			}
+		} else {
+			return nil, xerrors.Errorf("nested logical type %s can not be applied to a non-group node", logicalType)
+		}
+	} else {
+		n.logicalType = NoLogicalType{}
+		n.convertedType, n.decimalMetaData = n.logicalType.ToConvertedType()
+	}
+
+	if !(n.logicalType != nil && !n.logicalType.IsNested() && n.logicalType.IsCompatible(n.convertedType, n.decimalMetaData)) {
+		return nil, xerrors.Errorf("invalid logical type %s", n.logicalType)
+	}
+
+	if n.physicalType == parquet.Types.FixedLenByteArray && n.typeLen <= 0 {
+		return nil, xerrors.New("invalid fixed length byte array length")
+	}
+	return n, nil
+}
+
+// NewPrimitiveNodeConverted constructs a primitive node from the given physical type and converted type,
+// determining the logical type from the converted type.
+func NewPrimitiveNodeConverted(name string, repetition parquet.Repetition, typ parquet.Type, converted ConvertedType, typeLen, precision, scale int, id int32) (*PrimitiveNode, error) {
+	n := &PrimitiveNode{
+		node:         node{typ: Primitive, name: name, repetition: repetition, convertedType: converted, fieldID: id},
+		physicalType: typ,
+		typeLen:      -1,
+	}
+
+	switch converted {
+	case ConvertedTypes.None:
+	case ConvertedTypes.UTF8, ConvertedTypes.JSON, ConvertedTypes.BSON:
+		if typ != parquet.Types.ByteArray {
+			return nil, xerrors.Errorf("parquet: %s can only annotate BYTE_LEN fields", typ)
+		}
+	case ConvertedTypes.Decimal:
+		switch typ {
+		case parquet.Types.Int32, parquet.Types.Int64, parquet.Types.ByteArray, parquet.Types.FixedLenByteArray:
+		default:
+			return nil, xerrors.New("parquet: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED")
+		}
+
+		switch {
+		case precision <= 0:
+			return nil, xerrors.Errorf("parquet: invalid decimal precision: %d, must be between 1 and 38 inclusive", precision)
+		case scale < 0:
+			return nil, xerrors.Errorf("parquet: invalid decimal scale: %d, must be a number between 0 and precision inclusive", scale)
+		case scale > precision:
+			return nil, xerrors.Errorf("parquet: invalid decimal scale %d, cannot be greater than precision: %d", scale, precision)
+		}
+		n.decimalMetaData.IsSet = true
+		n.decimalMetaData.Precision = int32(precision)
+		n.decimalMetaData.Scale = int32(scale)
+	case ConvertedTypes.Date,
+		ConvertedTypes.TimeMillis,
+		ConvertedTypes.Int8,
+		ConvertedTypes.Int16,
+		ConvertedTypes.Int32,
+		ConvertedTypes.Uint8,
+		ConvertedTypes.Uint16,
+		ConvertedTypes.Uint32:
+		if typ != parquet.Types.Int32 {
+			return nil, xerrors.Errorf("parquet: %s can only annotate INT32", converted)
+		}
+	case ConvertedTypes.TimeMicros,
+		ConvertedTypes.TimestampMicros,
+		ConvertedTypes.TimestampMillis,
+		ConvertedTypes.Int64,
+		ConvertedTypes.Uint64:
+		if typ != parquet.Types.Int64 {
+			return nil, xerrors.Errorf("parquet: %s can only annotate INT64", converted)
+		}
+	case ConvertedTypes.Interval:
+		if typ != parquet.Types.FixedLenByteArray || typeLen != 12 {
+			return nil, xerrors.New("parquet: INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)")
+		}
+	case ConvertedTypes.Enum:
+		if typ != parquet.Types.ByteArray {
+			return nil, xerrors.New("parquet: ENUM can only annotate BYTE_ARRAY fields")
+		}
+	case ConvertedTypes.NA:
+	default:
+		return nil, xerrors.Errorf("parquet: %s cannot be applied to a primitive type", converted.String())
+	}
+
+	n.logicalType = n.convertedType.ToLogicalType(n.decimalMetaData)
+	if !(n.logicalType != nil && !n.logicalType.IsNested() && n.logicalType.IsCompatible(n.convertedType, n.decimalMetaData)) {
+		return nil, xerrors.Errorf("invalid logical type %s", n.logicalType)
+	}
+
+	if n.physicalType == parquet.Types.FixedLenByteArray {
+		if typeLen <= 0 {
+			return nil, xerrors.New("invalid fixed len byte array length")
+		}
+		n.typeLen = typeLen
+	}
+
+	return n, nil
+}
+
+func PrimitiveNodeFromThrift(elem *format.SchemaElement) (*PrimitiveNode, error) {
+	fieldID := int32(-1)
+	if elem.IsSetFieldID() {
+		fieldID = elem.GetFieldID()
+	}
+
+	if elem.IsSetLogicalType() {
+		return NewPrimitiveNodeLogical(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()),
+			getLogicalType(elem.GetLogicalType()), parquet.Type(elem.GetType()), int(elem.GetTypeLength()),
+			fieldID)
+	} else if elem.IsSetConvertedType() {
+		return NewPrimitiveNodeConverted(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()),
+			parquet.Type(elem.GetType()), ConvertedType(elem.GetConvertedType()),
+			int(elem.GetTypeLength()), int(elem.GetPrecision()), int(elem.GetScale()), fieldID)
+	}
+	return NewPrimitiveNodeLogical(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()), NoLogicalType{}, parquet.Type(elem.GetType()), int(elem.GetTypeLength()), fieldID)
+}
+
+// NewPrimitiveNode constructs a primitive node with the ConvertedType of None and no logical type.
+//
+// Use NewPrimitiveNodeLogical and NewPrimitiveNodeConverted to specify the logical or converted type.
+func NewPrimitiveNode(name string, repetition parquet.Repetition, typ parquet.Type, fieldID, typeLength int32) (*PrimitiveNode, error) {
+	return NewPrimitiveNodeLogical(name, repetition, nil, typ, int(typeLength), fieldID)
+}
+
+// Equals returns true if both nodes are primitive nodes with the same physical
+// and converted/logical types.
+func (p *PrimitiveNode) Equals(rhs Node) bool {
+	if !p.node.Equals(rhs) {
+		return false
+	}
+
+	other := rhs.(*PrimitiveNode)
+	if p == other {
+		return true
+	}
+
+	if p.PhysicalType() != other.PhysicalType() {
+		return false
+	}
+
+	equal := true
+	if p.ConvertedType() == ConvertedTypes.Decimal {
+		equal = equal &&
+			(p.decimalMetaData.Precision == other.decimalMetaData.Precision &&
+				p.decimalMetaData.Scale == other.decimalMetaData.Scale)
+	}
+	if p.PhysicalType() == parquet.Types.FixedLenByteArray {
+		equal = equal && p.TypeLength() == other.TypeLength()
+	}
+	return equal
+}
+
+// PhysicalType returns the proper Physical parquet.Type primitive that is used
+// to store the values in this column.
+func (p *PrimitiveNode) PhysicalType() parquet.Type { return p.physicalType }
+
+// SetTypeLength will change the type length of the node, has no effect if the
+// physical type is not FixedLength Byte Array
+func (p *PrimitiveNode) SetTypeLength(length int) {
+	if p.PhysicalType() == parquet.Types.FixedLenByteArray {
+		p.typeLen = length
+	}
+}
+
+// TypeLength will be -1 if not a FixedLenByteArray column, otherwise will be the
+// length of the FixedLen Byte Array
+func (p *PrimitiveNode) TypeLength() int { return p.typeLen }
+
+// DecimalMetadata returns the current metadata for the node. If not a decimal
+// typed column, the return should have IsSet == false.
+func (p *PrimitiveNode) DecimalMetadata() DecimalMetadata { return p.decimalMetaData }
+
+// Visit is for implementing a Visitor pattern handler to walk a schema's tree. One
+// example is the Schema Printer which walks the tree to print out the schema in order.
+func (p *PrimitiveNode) Visit(v Visitor) {
+	v.VisitPre(p)
+	v.VisitPost(p)
+}
+
+func (p *PrimitiveNode) toThrift() *format.SchemaElement {
+	elem := &format.SchemaElement{
+		Name:           p.Name(),
+		RepetitionType: format.FieldRepetitionTypePtr(format.FieldRepetitionType(p.RepetitionType())),
+		Type:           format.TypePtr(format.Type(p.PhysicalType())),
+	}
+	if p.ConvertedType() != ConvertedTypes.None {
+		elem.ConvertedType = format.ConvertedTypePtr(format.ConvertedType(p.ConvertedType()))
+	}
+	if p.FieldID() >= 0 {
+		elem.FieldID = thrift.Int32Ptr(p.FieldID())
+	}
+	if p.logicalType != nil && p.logicalType.IsSerialized() && !p.logicalType.Equals(IntervalLogicalType{}) {
+		elem.LogicalType = p.logicalType.toThrift()
+	}
+	if p.physicalType == parquet.Types.FixedLenByteArray {
+		elem.TypeLength = thrift.Int32Ptr(int32(p.typeLen))
+	}
+	if p.decimalMetaData.IsSet {
+		elem.Precision = &p.decimalMetaData.Precision
+		elem.Scale = &p.decimalMetaData.Scale
+	}
+	return elem
+}
+
+// FieldList is an alias for a slice of Nodes
+type FieldList []Node
+
+// Len is equivalent to len(fieldlist)
+func (f FieldList) Len() int { return len(f) }
+
+// GroupNode is for mananging nested nodes like List, Map, etc.
+type GroupNode struct {
+	node
+	fields    FieldList
+	nameToIdx strIntMultimap
+}
+
+// NewGroupNodeConverted constructs a group node with the provided fields and converted type,
+// determining the logical type from that converted type.
+func NewGroupNodeConverted(name string, repetition parquet.Repetition, fields FieldList, converted ConvertedType, id int32) (n *GroupNode, err error) {
+	n = &GroupNode{
+		node:   node{typ: Group, name: name, repetition: repetition, convertedType: converted, fieldID: id},
+		fields: fields,
+	}
+	n.logicalType = n.convertedType.ToLogicalType(DecimalMetadata{})
+	if !(n.logicalType != nil && (n.logicalType.IsNested() || n.logicalType.IsNone()) && n.logicalType.IsCompatible(n.convertedType, DecimalMetadata{})) {
+		err = xerrors.Errorf("invalid logical type %s", n.logicalType.String())
+		return
+	}
+
+	n.nameToIdx = make(strIntMultimap)
+	for idx, f := range n.fields {
+		f.SetParent(n)
+		n.nameToIdx.Add(f.Name(), idx)
+	}
+	return
+}
+
+// NewGroupNodeLogical constructs a group node with the provided fields and logical type,
+// determining the converted type from the provided logical type.
+func NewGroupNodeLogical(name string, repetition parquet.Repetition, fields FieldList, logical LogicalType, id int32) (n *GroupNode, err error) {
+	n = &GroupNode{
+		node:   node{typ: Group, name: name, repetition: repetition, logicalType: logical, fieldID: id},
+		fields: fields,
+	}
+
+	if logical != nil {
+		if logical.IsNested() {
+			n.convertedType, _ = logical.ToConvertedType()
+		} else {
+			err = xerrors.Errorf("logical type %s cannot be applied to group node", logical)
+			return
+		}
+	} else {
+		n.logicalType = NoLogicalType{}
+		n.convertedType, _ = n.logicalType.ToConvertedType()
+	}
+
+	if !(n.logicalType != nil && (n.logicalType.IsNested() || n.logicalType.IsNone()) && n.logicalType.IsCompatible(n.convertedType, DecimalMetadata{})) {
+		err = xerrors.Errorf("invalid logical type %s", n.logicalType)
+		return
+	}
+
+	n.nameToIdx = make(strIntMultimap)
+	for idx, f := range n.fields {
+		f.SetParent(n)
+		n.nameToIdx.Add(f.Name(), idx)
+	}
+	return
+}
+
+// NewGroupNode constructs a new group node with the provided fields,
+// but with converted type None and No Logical Type
+func NewGroupNode(name string, repetition parquet.Repetition, fields FieldList, fieldID int32) (*GroupNode, error) {
+	return NewGroupNodeConverted(name, repetition, fields, ConvertedTypes.None, fieldID)
+}
+
+// Must is a convenience function for the NewNode functions that return a Node
+// and an error, panic'ing if err != nil or returning the node
+func Must(n Node, err error) Node {
+	if err != nil {
+		panic(err)
+	}
+	return n
+}
+
+// MustGroup is like Must, except it casts the node to a *GroupNode, which will panic
+// if it is a primitive node.
+func MustGroup(n Node, err error) *GroupNode {
+	if err != nil {
+		panic(err)
+	}
+	return n.(*GroupNode)
+}
+
+// MustPrimitive is like Must except it casts the node to *PrimitiveNode which will panic
+// if it is a group node.
+func MustPrimitive(n Node, err error) *PrimitiveNode {
+	if err != nil {
+		panic(err)
+	}
+	return n.(*PrimitiveNode)
+}
+
+func GroupNodeFromThrift(elem *format.SchemaElement, fields FieldList) (*GroupNode, error) {
+	id := int32(-1)
+	if elem.IsSetFieldID() {
+		id = elem.GetFieldID()
+	}
+
+	if elem.IsSetLogicalType() {
+		return NewGroupNodeLogical(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()), fields, getLogicalType(elem.GetLogicalType()), id)
+	}
+
+	converted := ConvertedTypes.None
+	if elem.IsSetConvertedType() {
+		converted = ConvertedType(elem.GetConvertedType())
+	}
+	return NewGroupNodeConverted(elem.GetName(), parquet.Repetition(elem.GetRepetitionType()), fields, converted, id)
+}
+
+func (g *GroupNode) toThrift() *format.SchemaElement {
+	elem := &format.SchemaElement{
+		Name:           g.name,
+		NumChildren:    thrift.Int32Ptr(int32(len(g.fields))),
+		RepetitionType: format.FieldRepetitionTypePtr(format.FieldRepetitionType(g.RepetitionType())),
+	}
+	if g.convertedType != ConvertedTypes.None {
+		elem.ConvertedType = format.ConvertedTypePtr(format.ConvertedType(g.convertedType))
+	}
+	if g.fieldID >= 0 {
+		elem.FieldID = &g.fieldID
+	}
+	if g.logicalType != nil && g.logicalType.IsSerialized() {
+		elem.LogicalType = g.logicalType.toThrift()
+	}
+	return elem
+}
+
+// Equals will compare this node to the provided node and only return true if
+// this node and all of it's children are the same as the passed in node and its
+// children.
+func (g *GroupNode) Equals(rhs Node) bool {
+	if !g.node.Equals(rhs) {
+		return false
+	}
+
+	other := rhs.(*GroupNode)
+	if g == other {
+		return true
+	}
+	if len(g.fields) != len(other.fields) {
+		return false
+	}
+
+	for idx, field := range g.fields {
+		if !field.Equals(other.fields[idx]) {
+			return false
+		}
+	}
+	return true
+}
+
+// NumFields returns the number of direct child fields for this group node
+func (g *GroupNode) NumFields() int {
+	return len(g.fields)
+}
+
+// Field returns the node in the field list which is of the provided (0-based) index
+func (g *GroupNode) Field(i int) Node {
+	return g.fields[i]
+}
+
+// FieldIndexByName provides the index for the field of the given name. Returns
+// -1 if not found.
+//
+// If there are more than one field of this name, it returns the index for the first one.
+func (g *GroupNode) FieldIndexByName(name string) int {
+	if idx, ok := g.nameToIdx[name]; ok {
+		return idx[0]
+	}
+	return -1
+}
+
+// FieldIndexByField looks up the index child of this node. Returns -1
+// if n isn't a child of this group
+func (g *GroupNode) FieldIndexByField(n Node) int {
+	if search, ok := g.nameToIdx[n.Name()]; ok {
+		for _, idx := range search {
+			if n == g.fields[idx] {
+				return idx
+			}
+		}
+	}
+	return -1
+}
+
+// Visit is for implementing a Visitor pattern handler to walk a schema's tree. One
+// example is the Schema Printer which walks the tree to print out the schema in order.
+func (g *GroupNode) Visit(v Visitor) {
+	if v.VisitPre(g) {
+		for _, field := range g.fields {
+			field.Visit(v)
+		}
+	}
+	v.VisitPost(g)
+}
+
+// HasRepeatedFields returns true if any of the children of this node have
+// Repeated as its repetition type.
+//
+// This is recursive and will check the children of any group nodes that are children.
+func (g *GroupNode) HasRepeatedFields() bool {
+	for _, field := range g.fields {
+		if field.RepetitionType() == parquet.Repetitions.Repeated {
+			return true
+		}
+		if field.Type() == Group {
+			return field.(*GroupNode).HasRepeatedFields()
+		}
+	}
+	return false
+}
+
+// NewInt32Node is a convenience factory for constructing an Int32 Primitive Node
+func NewInt32Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Int32, fieldID, -1))
+}
+
+// NewInt64Node is a convenience factory for constructing an Int64 Primitive Node
+func NewInt64Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Int64, fieldID, -1))
+}
+
+// NewInt96Node is a convenience factory for constructing an Int96 Primitive Node
+func NewInt96Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Int96, fieldID, -1))
+}
+
+// NewFloat32Node is a convenience factory for constructing an Float Primitive Node
+func NewFloat32Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Float, fieldID, -1))
+}
+
+// NewFloat64Node is a convenience factory for constructing an Double Primitive Node
+func NewFloat64Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Double, fieldID, -1))
+}
+
+// NewBooleanNode is a convenience factory for constructing an Boolean Primitive Node
+func NewBooleanNode(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.Boolean, fieldID, -1))
+}
+
+// NewByteArrayNode is a convenience factory for constructing an Byte Array Primitive Node
+func NewByteArrayNode(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.ByteArray, fieldID, -1))
+}
+
+// NewFixedLenByteArrayNode is a convenience factory for constructing an Fixed Length
+// Byte Array Primitive Node of the given length
+func NewFixedLenByteArrayNode(name string, rep parquet.Repetition, length int32, fieldID int32) *PrimitiveNode {
+	return MustPrimitive(NewPrimitiveNode(name, rep, parquet.Types.FixedLenByteArray, fieldID, length))
+}
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
new file mode 100644
index 00000000000..8da05fb540f
--- /dev/null
+++ b/go/parquet/schema/reflection.go
@@ -0,0 +1,827 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"reflect"
+	"strconv"
+	"strings"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"golang.org/x/xerrors"
+)
+
+type taggedInfo struct {
+	Name string
+
+	Type      parquet.Type
+	KeyType   parquet.Type
+	ValueType parquet.Type
+
+	Length      int32
+	KeyLength   int32
+	ValueLength int32
+
+	Scale      int32
+	KeyScale   int32
+	ValueScale int32
+
+	Precision      int32
+	KeyPrecision   int32
+	ValuePrecision int32
+
+	FieldID      int32
+	KeyFieldID   int32
+	ValueFieldID int32
+
+	RepetitionType  parquet.Repetition
+	ValueRepetition parquet.Repetition
+
+	Converted      ConvertedType
+	KeyConverted   ConvertedType
+	ValueConverted ConvertedType
+
+	LogicalFields      map[string]string
+	KeyLogicalFields   map[string]string
+	ValueLogicalFields map[string]string
+
+	LogicalType      LogicalType
+	KeyLogicalType   LogicalType
+	ValueLogicalType LogicalType
+}
+
+func (t *taggedInfo) CopyForKey() (ret taggedInfo) {
+	ret = *t
+	ret.Type = t.KeyType
+	ret.Length = t.KeyLength
+	ret.Scale = t.KeyScale
+	ret.Precision = t.KeyPrecision
+	ret.FieldID = t.KeyFieldID
+	ret.RepetitionType = parquet.Repetitions.Required
+	ret.Converted = t.KeyConverted
+	ret.LogicalType = t.KeyLogicalType
+	return
+}
+
+func (t *taggedInfo) CopyForValue() (ret taggedInfo) {
+	ret = *t
+	ret.Type = t.ValueType
+	ret.Length = t.ValueLength
+	ret.Scale = t.ValueScale
+	ret.Precision = t.ValuePrecision
+	ret.FieldID = t.ValueFieldID
+	ret.RepetitionType = t.ValueRepetition
+	ret.Converted = t.ValueConverted
+	ret.LogicalType = t.ValueLogicalType
+	return
+}
+
+func (t *taggedInfo) UpdateLogicalTypes() {
+	processLogicalType := func(fields map[string]string, precision, scale int32) LogicalType {
+		t, ok := fields["type"]
+		if !ok {
+			return NoLogicalType{}
+		}
+
+		switch strings.ToLower(t) {
+		case "string":
+			return StringLogicalType{}
+		case "map":
+			return MapLogicalType{}
+		case "list":
+			return ListLogicalType{}
+		case "enum":
+			return EnumLogicalType{}
+		case "decimal":
+			if v, ok := fields["precision"]; ok {
+				precision = int32FromType(v)
+			}
+			if v, ok := fields["scale"]; ok {
+				scale = int32FromType(v)
+			}
+			return NewDecimalLogicalType(precision, scale)
+		case "date":
+			return DateLogicalType{}
+		case "time":
+			unit, ok := fields["unit"]
+			if !ok {
+				panic("must specify unit for time logical type")
+			}
+			adjustedToUtc, ok := fields["isadjustedutc"]
+			if !ok {
+				adjustedToUtc = "true"
+			}
+			return NewTimeLogicalType(boolFromStr(adjustedToUtc), timeUnitFromString(strings.ToLower(unit)))
+		case "timestamp":
+			unit, ok := fields["unit"]
+			if !ok {
+				panic("must specify unit for time logical type")
+			}
+			adjustedToUtc, ok := fields["isadjustedutc"]
+			if !ok {
+				adjustedToUtc = "true"
+			}
+			return NewTimestampLogicalType(boolFromStr(adjustedToUtc), timeUnitFromString(unit))
+		case "integer":
+			width, ok := fields["bitwidth"]
+			if !ok {
+				panic("must specify bitwidth if explicitly setting integer logical type")
+			}
+			signed, ok := fields["signed"]
+			if !ok {
+				signed = "true"
+			}
+
+			return NewIntLogicalType(int8(int32FromType(width)), boolFromStr(signed))
+		case "null":
+			return NullLogicalType{}
+		case "json":
+			return JSONLogicalType{}
+		case "bson":
+			return BSONLogicalType{}
+		case "uuid":
+			return UUIDLogicalType{}
+		default:
+			panic(xerrors.Errorf("invalid logical type specified: %s", t))
+		}
+	}
+
+	t.LogicalType = processLogicalType(t.LogicalFields, t.Precision, t.Scale)
+	t.KeyLogicalType = processLogicalType(t.KeyLogicalFields, t.KeyPrecision, t.KeyScale)
+	t.ValueLogicalType = processLogicalType(t.ValueLogicalFields, t.ValuePrecision, t.ValueScale)
+}
+
+func newTaggedInfo() taggedInfo {
+	return taggedInfo{
+		Type:               parquet.Types.Undefined,
+		KeyType:            parquet.Types.Undefined,
+		ValueType:          parquet.Types.Undefined,
+		RepetitionType:     parquet.Repetitions.Undefined,
+		ValueRepetition:    parquet.Repetitions.Undefined,
+		Converted:          ConvertedTypes.NA,
+		KeyConverted:       ConvertedTypes.NA,
+		ValueConverted:     ConvertedTypes.NA,
+		FieldID:            -1,
+		KeyFieldID:         -1,
+		ValueFieldID:       -1,
+		LogicalFields:      make(map[string]string),
+		KeyLogicalFields:   make(map[string]string),
+		ValueLogicalFields: make(map[string]string),
+		LogicalType:        NoLogicalType{},
+		KeyLogicalType:     NoLogicalType{},
+		ValueLogicalType:   NoLogicalType{},
+	}
+}
+
+var int32FromType = func(v string) int32 {
+	val, err := strconv.Atoi(v)
+	if err != nil {
+		panic(err)
+	}
+	return int32(val)
+}
+
+var boolFromStr = func(v string) bool {
+	val, err := strconv.ParseBool(v)
+	if err != nil {
+		panic(err)
+	}
+	return val
+}
+
+func infoFromTags(f reflect.StructTag) *taggedInfo {
+	typeFromStr := func(v string) parquet.Type {
+		t, err := format.TypeFromString(strings.ToUpper(v))
+		if err != nil {
+			panic(xerrors.Errorf("invalid type specified: %s", v))
+		}
+		return parquet.Type(t)
+	}
+
+	repFromStr := func(v string) parquet.Repetition {
+		r, err := format.FieldRepetitionTypeFromString(strings.ToUpper(v))
+		if err != nil {
+			panic(err)
+		}
+		return parquet.Repetition(r)
+	}
+
+	convertedFromStr := func(v string) ConvertedType {
+		c, err := format.ConvertedTypeFromString(strings.ToUpper(v))
+		if err != nil {
+			panic(err)
+		}
+		return ConvertedType(c)
+	}
+
+	if ptags, ok := f.Lookup("parquet"); ok {
+		info := newTaggedInfo()
+		for _, tag := range strings.Split(strings.Replace(ptags, "\t", "", -1), ",") {
+			tag = strings.TrimSpace(tag)
+			kv := strings.SplitN(tag, "=", 2)
+			key := strings.TrimSpace(strings.ToLower(kv[0]))
+			value := strings.TrimSpace(kv[1])
+
+			switch key {
+			case "name":
+				info.Name = value
+			case "type":
+				info.Type = typeFromStr(value)
+			case "keytype":
+				info.KeyType = typeFromStr(value)
+			case "valuetype":
+				info.ValueType = typeFromStr(value)
+			case "length":
+				info.Length = int32FromType(value)
+			case "keylength":
+				info.KeyLength = int32FromType(value)
+			case "valuelength":
+				info.ValueLength = int32FromType(value)
+			case "scale":
+				info.Scale = int32FromType(value)
+			case "keyscale":
+				info.KeyScale = int32FromType(value)
+			case "valuescale":
+				info.ValueScale = int32FromType(value)
+			case "precision":
+				info.Precision = int32FromType(value)
+			case "keyprecision":
+				info.KeyPrecision = int32FromType(value)
+			case "valueprecision":
+				info.ValuePrecision = int32FromType(value)
+			case "fieldid":
+				info.FieldID = int32FromType(value)
+			case "keyfieldid":
+				info.KeyFieldID = int32FromType(value)
+			case "valuefieldid":
+				info.ValueFieldID = int32FromType(value)
+			case "repetition":
+				info.RepetitionType = repFromStr(value)
+			case "valuerepetition":
+				info.ValueRepetition = repFromStr(value)
+			case "converted":
+				info.Converted = convertedFromStr(value)
+			case "keyconverted":
+				info.KeyConverted = convertedFromStr(value)
+			case "valueconverted":
+				info.ValueConverted = convertedFromStr(value)
+			case "logical":
+				info.LogicalFields["type"] = value
+			case "keylogical":
+				info.KeyLogicalFields["type"] = value
+			case "valuelogical":
+				info.ValueLogicalFields["type"] = value
+			default:
+				switch {
+				case strings.HasPrefix(key, "logical."):
+					info.LogicalFields[strings.TrimPrefix(key, "logical.")] = value
+				case strings.HasPrefix(key, "keylogical."):
+					info.KeyLogicalFields[strings.TrimPrefix(key, "keylogical.")] = value
+				case strings.HasPrefix(key, "valuelogical."):
+					info.ValueLogicalFields[strings.TrimPrefix(key, "valuelogical.")] = value
+				}
+			}
+		}
+		info.UpdateLogicalTypes()
+		return &info
+	}
+	return nil
+}
+
+// typeToNode recurseively converts a physical type and the tag info into parquet Nodes
+//
+// to avoid having to propagate errors up potentially high numbers of recursive calls
+// we use panics and then recover in the public function NewSchemaFromStruct so that a
+// failure very far down the stack quickly unwinds.
+func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info *taggedInfo) Node {
+	// set up our default values for everything
+	var (
+		converted             = ConvertedTypes.None
+		logical   LogicalType = NoLogicalType{}
+		fieldID               = int32(-1)
+		physical              = parquet.Types.Undefined
+		typeLen               = 0
+		precision             = 0
+		scale                 = 0
+	)
+	if info != nil { // we have struct tag info to process
+		fieldID = info.FieldID
+		if info.Converted != ConvertedTypes.NA {
+			converted = info.Converted
+		}
+		logical = info.LogicalType
+		physical = info.Type
+		typeLen = int(info.Length)
+		precision = int(info.Precision)
+		scale = int(info.Scale)
+
+		if info.Name != "" {
+			name = info.Name
+		}
+		if info.RepetitionType != parquet.Repetitions.Undefined {
+			repType = info.RepetitionType
+		}
+	}
+
+	// simplify the logic by switching based on the reflection Kind
+	switch typ.Kind() {
+	case reflect.Map:
+		// a map must have a logical type of MAP or have no tag for logical type in which case
+		// we assume MAP logical type.
+		if !logical.IsNone() && !logical.Equals(MapLogicalType{}) {
+			panic("cannot set logical type to something other than map for a map")
+		}
+
+		infoCopy := newTaggedInfo()
+		if info != nil { // populate any value specific tags to propagate for the value type
+			infoCopy = info.CopyForValue()
+		}
+
+		// create the node for the value type of the map
+		value := typeToNode("value", typ.Elem(), parquet.Repetitions.Required, &infoCopy)
+		if info != nil { // change our copy to now use the key specific tags if they exist
+			infoCopy = info.CopyForKey()
+		}
+
+		// create the node for the key type of the map
+		key := typeToNode("key", typ.Key(), parquet.Repetitions.Required, &infoCopy)
+		if key.RepetitionType() != parquet.Repetitions.Required { // key cannot be optional
+			panic("key type of map must be Required")
+		}
+		return Must(MapOf(name, key, value, repType, fieldID))
+	case reflect.Struct:
+		// structs are Group nodes
+		fields := make(FieldList, 0)
+		for i := 0; i < typ.NumField(); i++ {
+			f := typ.Field(i)
+
+			fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, infoFromTags(f.Tag)))
+		}
+		// group nodes don't have a physical type
+		if physical != parquet.Types.Undefined {
+			panic("cannot specify custom type on struct")
+		}
+		// group nodes don't have converted or logical types
+		if converted != ConvertedTypes.None {
+			panic("cannot specify converted types for a struct")
+		}
+		if !logical.IsNone() {
+			panic("cannot specify logicaltype for a struct")
+		}
+		return Must(NewGroupNode(name, repType, fields, fieldID))
+	case reflect.Ptr: // if we encounter a pointer create a node for the type it points to, but mark it as optional
+		return typeToNode(name, typ.Elem(), parquet.Repetitions.Optional, info)
+	case reflect.Array:
+		// arrays are repeated or fixed size
+		if typ == reflect.TypeOf(parquet.Int96{}) {
+			return NewInt96Node(name, repType, fieldID)
+		}
+
+		if typ.Elem() == reflect.TypeOf(byte(0)) { // something like [12]byte translates to FixedLenByteArray with length 12
+			if physical == parquet.Types.Undefined {
+				physical = parquet.Types.FixedLenByteArray
+			}
+			if typeLen == 0 { // if there was no type length specified in the tag, use the length of the type.
+				typeLen = typ.Len()
+			}
+			if !logical.IsNone() {
+				return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
+			}
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
+		}
+		fallthrough // if it's not a fixed len byte array type, then just treat it like a slice
+	case reflect.Slice:
+		// for slices, we default to treating them as lists unless the repetition type is set to REPEATED or they are
+		// a bytearray/fixedlenbytearray
+		switch {
+		case repType == parquet.Repetitions.Repeated:
+			return typeToNode(name, typ.Elem(), parquet.Repetitions.Repeated, info)
+		case physical == parquet.Types.FixedLenByteArray || physical == parquet.Types.ByteArray:
+			if typ.Elem() != reflect.TypeOf(byte(0)) {
+				panic("slice with physical type ByteArray or FixedLenByteArray must be []byte")
+			}
+			fallthrough
+		case typ.Elem() == reflect.TypeOf(byte(0)):
+			if physical == parquet.Types.Undefined {
+				physical = parquet.Types.ByteArray
+			}
+			if !logical.IsNone() {
+				return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
+			}
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
+		default:
+			var elemInfo *taggedInfo
+			if info != nil {
+				elemInfo = &taggedInfo{}
+				*elemInfo = info.CopyForValue()
+			}
+
+			if !logical.IsNone() && !logical.Equals(ListLogicalType{}) {
+				panic("slice must either be repeated or a List type")
+			}
+			if converted != ConvertedTypes.None && converted != ConvertedTypes.List {
+				panic("slice must either be repeated or a List type")
+			}
+			return Must(ListOf(typeToNode(name, typ.Elem(), parquet.Repetitions.Required, elemInfo), repType, fieldID))
+		}
+	case reflect.String:
+		// strings are byte arrays or fixedlen byte array
+		t := parquet.Types.ByteArray
+		switch physical {
+		case parquet.Types.Undefined, parquet.Types.ByteArray:
+		case parquet.Types.FixedLenByteArray:
+			t = parquet.Types.FixedLenByteArray
+		default:
+			panic("string fields should be of type bytearray or fixedlenbytearray only")
+		}
+
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, t, typeLen, fieldID))
+		}
+
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, t, converted, typeLen, precision, scale, fieldID))
+	case reflect.Int, reflect.Int32, reflect.Int8, reflect.Int16, reflect.Int64:
+		// handle integer types, default to setting the corresponding logical type
+		ptyp := parquet.Types.Int32
+		if typ.Bits() == 64 {
+			ptyp = parquet.Types.Int64
+		}
+
+		if physical != parquet.Types.Undefined {
+			ptyp = physical
+		}
+
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
+		}
+
+		bitwidth := int8(typ.Bits())
+		if physical != parquet.Types.Undefined {
+			if ptyp == parquet.Types.Int32 {
+				bitwidth = 32
+			} else if ptyp == parquet.Types.Int64 {
+				bitwidth = 64
+			}
+		}
+
+		if converted != ConvertedTypes.None {
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
+		}
+
+		return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, true), ptyp, 0, fieldID))
+	case reflect.Uint, reflect.Uint32, reflect.Uint8, reflect.Uint16, reflect.Uint64:
+		// handle unsigned integer types and default to the corresponding logical type for it.
+		ptyp := parquet.Types.Int32
+		if typ.Bits() == 64 {
+			ptyp = parquet.Types.Int64
+		}
+
+		if physical != parquet.Types.Undefined {
+			ptyp = physical
+		}
+
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
+		}
+
+		bitwidth := int8(typ.Bits())
+		if physical != parquet.Types.Undefined {
+			if ptyp == parquet.Types.Int32 {
+				bitwidth = 32
+			} else if ptyp == parquet.Types.Int64 {
+				bitwidth = 64
+			}
+		}
+
+		if converted != ConvertedTypes.None {
+			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
+		}
+
+		return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, false), ptyp, 0, fieldID))
+	case reflect.Bool:
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Boolean, typeLen, fieldID))
+		}
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Boolean, converted, typeLen, precision, scale, fieldID))
+	case reflect.Float32:
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Float, typeLen, fieldID))
+		}
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Float, converted, typeLen, precision, scale, fieldID))
+	case reflect.Float64:
+		if !logical.IsNone() {
+			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Double, typeLen, fieldID))
+		}
+		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Double, converted, typeLen, precision, scale, fieldID))
+	}
+	return nil
+}
+
+// NewSchemaFromStruct generates a schema from an object type via reflection of
+// the type and reading struct tags for "parquet".
+//
+// Rules
+//
+// Everything defaults to Required repetition, unless otherwise specified.
+// Pointer types become Optional repetition.
+// Arrays and Slices become logical List types unless using the tag `repetition=repeated`.
+//
+// A length specified byte field (like [5]byte) becomes a fixed_len_byte_array of that length
+// unless otherwise specified by tags.
+//
+// string and []byte both become ByteArray unless otherwise specified.
+//
+// Integer types will default to having a logical type of the appropriate bit width
+// and signedness rather than having no logical type, ie: an int8 will become an int32
+// node with logical type Int(bitWidth=8, signed=true).
+//
+// Structs will become group nodes with the fields of the struct as the fields of the group,
+// recursively creating the nodes.
+//
+// maps will become appropriate Map structures in the schema of the defined key and values.
+//
+// Available Tags
+//
+// name: by default the node will have the same name as the field, this tag let's you specify a name
+//
+// type: Specify the physical type instead of using the field type
+//
+// length: specify the type length of the node, only relevant for fixed_len_byte_array
+//
+// scale: specify the scale for a decimal field
+//
+// precision: specify the precision for a decimal field
+//
+// fieldid: specify the field ID for that node, defaults to -1 which means it is not set in the parquet file.
+//
+// repetition: specify the repetition as something other than what is determined by the type
+//
+// converted: specify the Converted Type of the field
+//
+// logical: specify the logical type of the field, if using decimal then the scale and precision
+// will be determined by the precision and scale fields, or by the logical.precision / logical.scale fields
+// with the logical. prefixed versions taking precedence. For Time or Timestamp logical types,
+// use logical.unit=<millis|micros|nanos> and logical.isadjustedutc=<true|false> to set those. Unit is required
+// isadjustedutc defaults to true. For Integer logical type, use logical.bitwidth and logical.signed to specify
+// those values, with bitwidth being required, and signed defaulting to true.
+//
+// All tags other than name can use a prefix of "key<tagname>=<value>" to refer to the type of the key for a map
+// and "value<tagname>=<value>" to refer to the value type of a map or the element of a list (such as the type of a slice)
+func NewSchemaFromStruct(obj interface{}) (sc *Schema, err error) {
+	ot := reflect.TypeOf(obj)
+	if ot.Kind() == reflect.Ptr {
+		ot = ot.Elem()
+	}
+
+	// typeToNode uses panics to fail fast / fail early instead of propagating
+	// errors up recursive stacks. so we recover here and return it as an error
+	defer func() {
+		if r := recover(); r != nil {
+			sc = nil
+			switch x := r.(type) {
+			case string:
+				err = xerrors.New(x)
+			case error:
+				err = x
+			default:
+				err = xerrors.New("unknown panic")
+			}
+		}
+	}()
+
+	root := typeToNode(ot.Name(), ot, parquet.Repetitions.Repeated, nil)
+	return NewSchema(root.(*GroupNode)), nil
+}
+
+var parquetTypeToReflect = map[parquet.Type]reflect.Type{
+	parquet.Types.Boolean:           reflect.TypeOf(true),
+	parquet.Types.Int32:             reflect.TypeOf(int32(0)),
+	parquet.Types.Int64:             reflect.TypeOf(int64(0)),
+	parquet.Types.Float:             reflect.TypeOf(float32(0)),
+	parquet.Types.Double:            reflect.TypeOf(float64(0)),
+	parquet.Types.Int96:             reflect.TypeOf(parquet.Int96{}),
+	parquet.Types.ByteArray:         reflect.TypeOf(parquet.ByteArray{}),
+	parquet.Types.FixedLenByteArray: reflect.TypeOf(parquet.FixedLenByteArray{}),
+}
+
+func typeFromNode(n Node) reflect.Type {
+	switch n.Type() {
+	case Primitive:
+		typ := parquetTypeToReflect[n.(*PrimitiveNode).PhysicalType()]
+		// if a bytearray field is annoted as a String logical type or a UTF8 converted type
+		// then use a string instead of parquet.ByteArray / parquet.FixedLenByteArray which are []byte
+		if n.LogicalType().Equals(StringLogicalType{}) || n.ConvertedType() == ConvertedTypes.UTF8 {
+			typ = reflect.TypeOf(string(""))
+		}
+
+		if n.RepetitionType() == parquet.Repetitions.Optional {
+			typ = reflect.PtrTo(typ)
+		} else if n.RepetitionType() == parquet.Repetitions.Repeated {
+			typ = reflect.SliceOf(typ)
+		}
+
+		return typ
+	case Group:
+		gnode := n.(*GroupNode)
+		switch gnode.ConvertedType() {
+		case ConvertedTypes.List:
+			// According to the Parquet Spec, a list should always be a 3-level structure
+			//
+			//	<list-repetition> group <name> (LIST) {
+			//		repeated group list {
+			//			<element-repetition> <element-type> element;
+			//		}
+			//	}
+			//
+			// Outer-most level must be a group annotated with LIST containing a single field named "list".
+			// this level must be only optional (if the list is nullable) or required
+			// Middle level, named list, must be repeated group with a single field named "element"
+			// "element" field is the lists element type and repetition, which should be only required or optional
+
+			if gnode.fields.Len() != 1 {
+				panic("invalid list node, should have exactly 1 child.")
+			}
+
+			if gnode.fields[0].RepetitionType() != parquet.Repetitions.Repeated {
+				panic("invalid list node, child should be repeated")
+			}
+
+			// it is required that the repeated group of elements is named "list" and it's element
+			// field is named "element", however existing data may not use this so readers shouldn't
+			// enforce them as errors
+			//
+			// Rules for backward compatibility from the parquet spec:
+			//
+			// 1) if the repeated field is not a group, then it's type is the element type and elements
+			//    must be required.
+			// 2) if the repeated field is a group with multiple fields, then its type is the element type
+			//    and elements must be required.
+			// 3) if the repeated field is a group with one field AND is named either "array" or uses the
+			//    LIST-annotated group's name with "_tuple" suffix, then the repeated type is the element
+			//    type and the elements must be required.
+			// 4) otherwise, the repeated field's type is the element type with the repeated field's repetition
+
+			elemMustBeRequired := false
+			addSlice := false
+			var elemType reflect.Type
+			elemNode := gnode.fields[0]
+			switch {
+			case elemNode.Type() == Primitive,
+				elemNode.(*GroupNode).fields.Len() > 1,
+				elemNode.(*GroupNode).fields.Len() == 1 && (elemNode.Name() == "array" || elemNode.Name() == gnode.Name()+"_tuple"):
+				elemMustBeRequired = true
+				elemType = typeFromNode(elemNode)
+			default:
+				addSlice = true
+				elemType = typeFromNode(elemNode.(*GroupNode).fields[0])
+			}
+
+			if elemMustBeRequired && elemType.Kind() == reflect.Ptr {
+				elemType = elemType.Elem()
+			}
+			if addSlice {
+				elemType = reflect.SliceOf(elemType)
+			}
+			if gnode.RepetitionType() == parquet.Repetitions.Optional {
+				elemType = reflect.PtrTo(elemType)
+			}
+			return elemType
+		case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+			// According to the Parquet Spec, the outer-most level should be
+			// a group containing a single field named "key_value" with repetition
+			// either optional or required for whether or not the map is nullable.
+			//
+			// The key_value middle level *must* be a repeated group with a "key" field
+			// and *optionally* a "value" field
+			//
+			// the "key" field *must* be required and must always exist
+			//
+			// the "value" field can be required or optional or omitted.
+			//
+			// 	<map-repetition> group <name> (MAP) {
+			//		repeated group key_value {
+			//			required <key-type> key;
+			//			<value-repetition> <value-type> value;
+			//		}
+			//	}
+
+			if gnode.fields.Len() != 1 {
+				panic("invalid map node, should have exactly 1 child")
+			}
+
+			if gnode.fields[0].Type() != Group {
+				panic("invalid map node, child should be a group node")
+			}
+
+			// that said, this may not be used in existing data and should not be
+			// enforced as errors when reading.
+			//
+			// some data may also incorrectly use MAP_KEY_VALUE instead of MAP
+			//
+			// so any group with MAP_KEY_VALUE that is not contained inside of a "MAP"
+			// group, should be considered equivalent to being a MAP group itself.
+			//
+			// in addition, the fields may not be called "key" and "value" in existing
+			// data, and as such should not be enforced as errors when reading.
+
+			keyval := gnode.fields[0].(*GroupNode)
+
+			keyIndex := keyval.FieldIndexByName("key")
+			if keyIndex == -1 {
+				keyIndex = 0 // use first child if there is no child named "key"
+			}
+
+			keyType := typeFromNode(keyval.fields[keyIndex])
+			if keyType.Kind() == reflect.Ptr {
+				keyType = keyType.Elem()
+			}
+			// can't use a []byte as a key for a map, so use string
+			if keyType == reflect.TypeOf(parquet.ByteArray{}) || keyType == reflect.TypeOf(parquet.FixedLenByteArray{}) {
+				keyType = reflect.TypeOf(string(""))
+			}
+
+			// if the value node is omitted, then consider this a "set" and make it a
+			// map[key-type]bool
+			valType := reflect.TypeOf(true)
+			if keyval.fields.Len() > 1 {
+				valIndex := keyval.FieldIndexByName("value")
+				if valIndex == -1 {
+					valIndex = 1 // use second child if there is no child named "value"
+				}
+
+				valType = typeFromNode(keyval.fields[valIndex])
+			}
+
+			mapType := reflect.MapOf(keyType, valType)
+			if gnode.RepetitionType() == parquet.Repetitions.Optional {
+				mapType = reflect.PtrTo(mapType)
+			}
+			return mapType
+		default:
+			fields := []reflect.StructField{}
+			for _, f := range gnode.fields {
+				fields = append(fields, reflect.StructField{
+					Name:    f.Name(),
+					Type:    typeFromNode(f),
+					PkgPath: "parquet",
+				})
+			}
+
+			structType := reflect.StructOf(fields)
+			if gnode.RepetitionType() == parquet.Repetitions.Repeated {
+				return reflect.SliceOf(structType)
+			}
+			if gnode.RepetitionType() == parquet.Repetitions.Optional {
+				return reflect.PtrTo(structType)
+			}
+			return structType
+		}
+	}
+	panic("what happened?")
+}
+
+// NewStructFromSchema generates a struct type as a reflect.Type from the schema
+// by using the appropriate physical types and making things either pointers or slices
+// based on whether they are repeated/optional/required. It does not use the logical
+// or converted types to change the physical storage so that it is more efficient to use
+// the resulting type for reading without having to do conversions.
+//
+// It will use maps for map types and slices for list types, but otherwise ignores the
+// converted and logical types of the nodes. Group nodes that are not List or Map will
+// be nested structs.
+func NewStructFromSchema(sc *Schema) (t reflect.Type, err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			t = nil
+			switch x := r.(type) {
+			case string:
+				err = xerrors.New(x)
+			case error:
+				err = x
+			default:
+				err = xerrors.New("unknown panic")
+			}
+		}
+	}()
+
+	t = typeFromNode(sc.root)
+	if t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr {
+		return t.Elem(), nil
+	}
+	return
+}
diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go
new file mode 100644
index 00000000000..ba092159693
--- /dev/null
+++ b/go/parquet/schema/reflection_test.go
@@ -0,0 +1,403 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"log"
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/stretchr/testify/assert"
+)
+
+func ExampleNewSchemaFromStruct_primitives() {
+	type Schema struct {
+		Bool              bool
+		Int8              int8
+		Uint16            uint16
+		Int32             int32
+		Int64             int64
+		Int96             parquet.Int96
+		Float             float32
+		Double            float64
+		ByteArray         string
+		FixedLenByteArray [10]byte
+	}
+
+	sc, err := schema.NewSchemaFromStruct(Schema{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 Schema {
+	//   required boolean field_id=-1 Bool;
+	//   required int32 field_id=-1 Int8 (Int(bitWidth=8, isSigned=true));
+	//   required int32 field_id=-1 Uint16 (Int(bitWidth=16, isSigned=false));
+	//   required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true));
+	//   required int64 field_id=-1 Int64 (Int(bitWidth=64, isSigned=true));
+	//   required int96 field_id=-1 Int96;
+	//   required float field_id=-1 Float;
+	//   required double field_id=-1 Double;
+	//   required byte_array field_id=-1 ByteArray;
+	//   required fixed_len_byte_array field_id=-1 FixedLenByteArray;
+	// }
+}
+
+func ExampleNewSchemaFromStruct_convertedtypes() {
+	type ConvertedSchema struct {
+		Utf8           string        `parquet:"name=utf8, converted=UTF8"`
+		Uint32         uint32        `parquet:"converted=INT_32"`
+		Date           int32         `parquet:"name=date, converted=date"`
+		TimeMilli      int32         `parquet:"name=timemilli, converted=TIME_MILLIS"`
+		TimeMicro      int64         `parquet:"name=timemicro, converted=time_micros"`
+		TimeStampMilli int64         `parquet:"converted=timestamp_millis"`
+		TimeStampMicro int64         `parquet:"converted=timestamp_micros"`
+		Interval       parquet.Int96 `parquet:"converted=INTERVAL"`
+		Decimal1       int32         `parquet:"converted=decimal, scale=2, precision=9"`
+		Decimal2       int64         `parquet:"converted=decimal, scale=2, precision=18"`
+		Decimal3       [12]byte      `parquet:"converted=decimal, scale=2, precision=10"`
+		Decimal4       string        `parquet:"converted=decimal, scale=2, precision=20"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(&ConvertedSchema{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 ConvertedSchema {
+	//   required byte_array field_id=-1 utf8 (String);
+	//   required int32 field_id=-1 Uint32 (Int(bitWidth=32, isSigned=true));
+	//   required int32 field_id=-1 date (Date);
+	//   required int32 field_id=-1 timemilli (Time(isAdjustedToUTC=true, timeUnit=milliseconds));
+	//   required int64 field_id=-1 timemicro (Time(isAdjustedToUTC=true, timeUnit=microseconds));
+	//   required int64 field_id=-1 TimeStampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=true, force_set_converted_type=false));
+	//   required int64 field_id=-1 TimeStampMicro (Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, is_from_converted_type=true, force_set_converted_type=false));
+	//   required int96 field_id=-1 Interval;
+	//   required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2));
+	//   required int64 field_id=-1 Decimal2 (Decimal(precision=18, scale=2));
+	//   required fixed_len_byte_array field_id=-1 Decimal3 (Decimal(precision=10, scale=2));
+	//   required byte_array field_id=-1 Decimal4 (Decimal(precision=20, scale=2));
+	// }
+}
+
+func ExampleNewSchemaFromStruct_repetition() {
+	type RepetitionSchema struct {
+		List     []int64 `parquet:"fieldid=1"`
+		Repeated []int64 `parquet:"repetition=repeated, fieldid=2"`
+		Optional *int64  `parquet:"fieldid=3"`
+		Required *int64  `parquet:"repetition=REQUIRED, fieldid=4"`
+		Opt      int64   `parquet:"repetition=OPTIONAL, fieldid=5"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(RepetitionSchema{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 RepetitionSchema {
+	//   required group field_id=1 List (List) {
+	//     repeated group field_id=-1 list {
+	//       required int64 field_id=-1 element (Int(bitWidth=64, isSigned=true));
+	//     }
+	//   }
+	//   repeated int64 field_id=2 Repeated (Int(bitWidth=64, isSigned=true));
+	//   optional int64 field_id=3 Optional (Int(bitWidth=64, isSigned=true));
+	//   required int64 field_id=4 Required (Int(bitWidth=64, isSigned=true));
+	//   optional int64 field_id=5 Opt (Int(bitWidth=64, isSigned=true));
+	// }
+}
+
+func ExampleNewSchemaFromStruct_logicaltypes() {
+	type LogicalTypes struct {
+		String                []byte   `parquet:"logical=String"`
+		Enum                  string   `parquet:"logical=enum"`
+		Date                  int32    `parquet:"logical=date"`
+		Decimal1              int32    `parquet:"logical=decimal, precision=9, scale=2"`
+		Decimal2              int32    `parquet:"logical=decimal, logical.precision=9, scale=2"`
+		Decimal3              int32    `parquet:"logical=decimal, precision=5, logical.precision=9, scale=1, logical.scale=3"`
+		TimeMilliUTC          int32    `parquet:"logical=TIME, logical.unit=millis"`
+		TimeMilli             int32    `parquet:"logical=Time, logical.unit=millis, logical.isadjustedutc=false"`
+		TimeMicros            int64    `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=false"`
+		TimeMicrosUTC         int64    `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=true"`
+		TimeNanos             int64    `parquet:"logical=time, logical.unit=nanos"`
+		TimestampMilli        int64    `parquet:"logical=timestamp, logical.unit=millis"`
+		TimestampMicrosNotUTC int64    `parquet:"logical=timestamp, logical.unit=micros, logical.isadjustedutc=false"`
+		TimestampNanos        int64    `parquet:"logical=timestamp, logical.unit=nanos"`
+		JSON                  string   `parquet:"logical=json"`
+		BSON                  []byte   `parquet:"logical=BSON"`
+		UUID                  [16]byte `parquet:"logical=uuid"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(LogicalTypes{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 LogicalTypes {
+	//   required byte_array field_id=-1 String (String);
+	//   required byte_array field_id=-1 Enum (Enum);
+	//   required int32 field_id=-1 Date (Date);
+	//   required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2));
+	//   required int32 field_id=-1 Decimal2 (Decimal(precision=9, scale=2));
+	//   required int32 field_id=-1 Decimal3 (Decimal(precision=9, scale=3));
+	//   required int32 field_id=-1 TimeMilliUTC (Time(isAdjustedToUTC=true, timeUnit=milliseconds));
+	//   required int32 field_id=-1 TimeMilli (Time(isAdjustedToUTC=false, timeUnit=milliseconds));
+	//   required int64 field_id=-1 TimeMicros (Time(isAdjustedToUTC=false, timeUnit=microseconds));
+	//   required int64 field_id=-1 TimeMicrosUTC (Time(isAdjustedToUTC=true, timeUnit=microseconds));
+	//   required int64 field_id=-1 TimeNanos (Time(isAdjustedToUTC=true, timeUnit=nanoseconds));
+	//   required int64 field_id=-1 TimestampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false));
+	//   required int64 field_id=-1 TimestampMicrosNotUTC (Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false));
+	//   required int64 field_id=-1 TimestampNanos (Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false));
+	//   required byte_array field_id=-1 JSON (JSON);
+	//   required byte_array field_id=-1 BSON (BSON);
+	//   required fixed_len_byte_array field_id=-1 UUID (UUID);
+	// }
+}
+
+func ExampleNewSchemaFromStruct_physicaltype() {
+	type ChangeTypes struct {
+		Int32        int64  `parquet:"type=int32"`
+		FixedLen     string `parquet:"type=fixed_len_byte_array, length=10"`
+		SliceAsFixed []byte `parquet:"type=fixed_len_byte_array, length=12"`
+		Int          int    `parquet:"type=int32"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(ChangeTypes{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 ChangeTypes {
+	//   required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true));
+	//   required fixed_len_byte_array field_id=-1 FixedLen;
+	//   required fixed_len_byte_array field_id=-1 SliceAsFixed;
+	//   required int32 field_id=-1 Int (Int(bitWidth=32, isSigned=true));
+	// }
+}
+
+func ExampleNewSchemaFromStruct_nestedtypes() {
+	type Other struct {
+		OptionalMap *map[string]*string `parquet:"valuerepetition=required, keylogical=String, valueconverted=BSON"`
+	}
+
+	type MyMap map[int32]string
+
+	type Nested struct {
+		SimpleMap     map[int32]string
+		FixedLenMap   map[string][]byte `parquet:"keytype=fixed_len_byte_array, keyfieldid=10, valuefieldid=11, keylength=10"`
+		DecimalMap    map[int32]string  `parquet:"logical=map, keyconverted=DECIMAL, keyscale=3, keyprecision=7, valuetype=fixed_len_byte_array, valuelength=4, valuelogical=decimal, valuelogical.precision=9, valuescale=2"`
+		OtherList     []*Other
+		OtherRepeated []Other  `parquet:"repetition=repeated"`
+		DateArray     [5]int32 `parquet:"valuelogical=date, logical=list"`
+		DateMap       MyMap    `parquet:"keylogical=TIME, keylogical.unit=MILLIS, keylogical.isadjustedutc=false, valuelogical=enum"`
+	}
+
+	sc, err := schema.NewSchemaFromStruct(Nested{})
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	schema.PrintSchema(sc.Root(), os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=-1 Nested {
+	//   required group field_id=-1 SimpleMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required int32 field_id=-1 key (Int(bitWidth=32, isSigned=true));
+	//       required byte_array field_id=-1 value;
+	//     }
+	//   }
+	//   required group field_id=-1 FixedLenMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required fixed_len_byte_array field_id=10 key;
+	//       required byte_array field_id=11 value;
+	//     }
+	//   }
+	//   required group field_id=-1 DecimalMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required int32 field_id=-1 key (Decimal(precision=7, scale=3));
+	//       required fixed_len_byte_array field_id=-1 value (Decimal(precision=9, scale=2));
+	//     }
+	//   }
+	//   required group field_id=-1 OtherList (List) {
+	//     repeated group field_id=-1 list {
+	//       optional group field_id=-1 element {
+	//         optional group field_id=-1 OptionalMap (Map) {
+	//           repeated group field_id=-1 key_value {
+	//             required byte_array field_id=-1 key (String);
+	//             required byte_array field_id=-1 value (BSON);
+	//           }
+	//         }
+	//       }
+	//     }
+	//   }
+	//   repeated group field_id=-1 OtherRepeated {
+	//     optional group field_id=-1 OptionalMap (Map) {
+	//       repeated group field_id=-1 key_value {
+	//         required byte_array field_id=-1 key (String);
+	//         required byte_array field_id=-1 value (BSON);
+	//       }
+	//     }
+	//   }
+	//   required group field_id=-1 DateArray (List) {
+	//     repeated group field_id=-1 list {
+	//       required int32 field_id=-1 element (Date);
+	//     }
+	//   }
+	//   required group field_id=-1 DateMap (Map) {
+	//     repeated group field_id=-1 key_value {
+	//       required int32 field_id=-1 key (Time(isAdjustedToUTC=false, timeUnit=milliseconds));
+	//       required byte_array field_id=-1 value (Enum);
+	//     }
+	//   }
+	// }
+}
+
+func TestStructFromSchema(t *testing.T) {
+	root, err := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{
+		schema.NewBooleanNode("bool", parquet.Repetitions.Required, -1),
+		schema.NewInt32Node("int32", parquet.Repetitions.Optional, -1),
+		schema.NewInt64Node("int64", parquet.Repetitions.Repeated, -1),
+		schema.NewInt96Node("int96", parquet.Repetitions.Required, -1),
+		schema.NewFloat32Node("float", parquet.Repetitions.Required, -1),
+		schema.NewByteArrayNode("bytearray", parquet.Repetitions.Required, -1),
+		schema.NewFixedLenByteArrayNode("fixedLen", parquet.Repetitions.Required, 10, -1),
+	}, -1)
+	assert.NoError(t, err)
+
+	sc := schema.NewSchema(root)
+
+	typ, err := schema.NewStructFromSchema(sc)
+	assert.NoError(t, err)
+
+	assert.Equal(t, reflect.Struct, typ.Kind())
+	assert.Equal(t, "struct { bool bool; int32 *int32; int64 []int64; int96 parquet.Int96; float float32; bytearray parquet.ByteArray; fixedLen parquet.FixedLenByteArray }",
+		typ.String())
+}
+
+func TestStructFromSchemaWithNesting(t *testing.T) {
+	type Other struct {
+		List *[]*float32
+	}
+
+	type Nested struct {
+		Nest         []int32
+		OptionalNest []*int64
+		Mapped       map[string]float32
+		Other        []Other
+		Other2       Other
+	}
+
+	sc, err := schema.NewSchemaFromStruct(Nested{})
+	assert.NoError(t, err)
+
+	typ, err := schema.NewStructFromSchema(sc)
+	assert.NoError(t, err)
+	assert.Equal(t, "struct { Nest []int32; OptionalNest []*int64; Mapped map[string]float32; Other []struct { List *[]*float32 }; Other2 struct { List *[]*float32 } }",
+		typ.String())
+}
+
+func TestStructFromSchemaBackwardsCompatList(t *testing.T) {
+	tests := []struct {
+		name     string
+		n        schema.Node
+		expected string
+	}{
+		{"proper list", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required,
+			schema.FieldList{
+				schema.MustGroup(schema.NewGroupNode("list", parquet.Repetitions.Repeated, schema.FieldList{schema.NewBooleanNode("element", parquet.Repetitions.Optional, -1)}, -1)),
+			}, schema.NewListLogicalType(), -1)), "struct { my_list []*bool }"},
+		{"backward nullable list nonnull ints", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
+			schema.NewInt32Node("element", parquet.Repetitions.Repeated, -1),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list *[]int32 }"},
+		{"backward nullable list tuple string int", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("element", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
+				schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string; num int32 } }"},
+		{"list tuple string", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("array", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list []struct { str parquet.ByteArray } }"},
+		{"list tuple string my_list_tuple", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("my_list_tuple", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
+			}, -1)),
+		}, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string } }"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, typ.String())
+		})
+	}
+}
+
+func TestStructFromSchemaMaps(t *testing.T) {
+	tests := []struct {
+		name     string
+		n        schema.Node
+		expected string
+	}{
+		{"map string int", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Required, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("key_value", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("key", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
+				schema.NewInt32Node("value", parquet.Repetitions.Optional, -1),
+			}, -1)),
+		}, schema.MapLogicalType{}, -1)), "struct { my_map map[string]*int32 }"},
+		{"nullable map string, int, required values", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
+				schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.MapLogicalType{}, -1)), "struct { my_map *map[string]int32 }"},
+		{"map_key_value with missing value", schema.MustGroup(schema.NewGroupNodeConverted("my_map", parquet.Repetitions.Optional, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
+				schema.NewByteArrayNode("key", parquet.Repetitions.Required, -1),
+			}, -1)),
+		}, schema.ConvertedTypes.MapKeyValue, -1)), "struct { my_map *map[string]bool }"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, typ.String())
+		})
+	}
+}
diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go
new file mode 100644
index 00000000000..9402edc6f1f
--- /dev/null
+++ b/go/parquet/schema/schema.go
@@ -0,0 +1,328 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package schema provides types and functions for manipulating and building parquet
+// file schemas.
+//
+// Some of the utilities provided include building a schema using Struct Tags
+// on a struct type, getting Column Paths from a node, and dealing with the
+// converted and logical types for Parquet.
+//
+// Logical types specify ways to interpret the primitive types allowing the
+// number of primitive types to be smaller and reuse efficient encodings.
+// For instance a "string" is just a ByteArray column with a UTF-8 annotation
+// or "String Logical Type".
+//
+// For more information about Logical and Converted Types, check:
+// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+package schema
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"golang.org/x/xerrors"
+)
+
+// Schema is the container for the converted Parquet schema with a computed
+// information from the schema analysis needed for file reading
+//
+// * Column index to Node
+//
+// * Max repetition / definition levels for each primitive node
+//
+// The ColumnDescriptor objects produced by this class can be used to assist in
+// the reconstruction of fully materialized data structures from the
+// repetition-definition level encoding of nested data
+type Schema struct {
+	root Node
+
+	leaves      []*Column
+	nodeToLeaf  map[*PrimitiveNode]int
+	leafToBase  map[int]Node
+	leafToIndex strIntMultimap
+}
+
+// FromParquet converts a slice of thrift Schema Elements to the correct node type
+func FromParquet(elems []*format.SchemaElement) (Node, error) {
+	if len(elems) == 0 {
+		return nil, xerrors.New("parquet: empty schema (no root)")
+	}
+
+	if elems[0].GetNumChildren() == 0 {
+		if len(elems) > 1 {
+			return nil, xerrors.New("parquet: schema had multiple nodes but root had no children")
+		}
+		// parquet file with no columns
+		return GroupNodeFromThrift(elems[0], []Node{})
+	}
+
+	// We don't check that the root node is repeated since this is not
+	// consistently set by implementations
+	var (
+		pos      = 0
+		nextNode func() (Node, error)
+	)
+
+	nextNode = func() (Node, error) {
+		if pos == len(elems) {
+			return nil, xerrors.New("parquet: malformed schema: not enough elements")
+		}
+
+		elem := elems[pos]
+		pos++
+
+		if elem.GetNumChildren() == 0 {
+			return PrimitiveNodeFromThrift(elem)
+		}
+
+		fields := make([]Node, 0, elem.GetNumChildren())
+		for i := 0; i < int(elem.GetNumChildren()); i++ {
+			n, err := nextNode()
+			if err != nil {
+				return nil, err
+			}
+			fields = append(fields, n)
+		}
+
+		return GroupNodeFromThrift(elem, fields)
+	}
+
+	return nextNode()
+}
+
+// Root returns the group node that is the root of this schema
+func (s *Schema) Root() *GroupNode {
+	return s.root.(*GroupNode)
+}
+
+// NumColumns returns the number of leaf nodes that are the actual primitive
+// columns in this schema.
+func (s *Schema) NumColumns() int {
+	return len(s.leaves)
+}
+
+// Equals returns true as long as the leaf columns are equal, doesn't take
+// into account the groups and only checks whether the schemas are compatible
+// at the physical storage level.
+func (s *Schema) Equals(rhs *Schema) bool {
+	if s.NumColumns() != rhs.NumColumns() {
+		return false
+	}
+
+	for idx, c := range s.leaves {
+		if !c.Equals(rhs.Column(idx)) {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *Schema) buildTree(n Node, maxDefLvl, maxRepLvl int16, base Node) {
+	switch n.RepetitionType() {
+	case parquet.Repetitions.Repeated:
+		maxRepLvl++
+		fallthrough
+	case parquet.Repetitions.Optional:
+		maxDefLvl++
+	}
+
+	switch n := n.(type) {
+	case *GroupNode:
+		for _, f := range n.fields {
+			s.buildTree(f, maxDefLvl, maxRepLvl, base)
+		}
+	case *PrimitiveNode:
+		s.nodeToLeaf[n] = len(s.leaves)
+		s.leaves = append(s.leaves, NewColumn(n, maxDefLvl, maxRepLvl))
+		s.leafToBase[len(s.leaves)-1] = base
+		s.leafToIndex.Add(n.Path(), len(s.leaves)-1)
+	}
+}
+
+// Column returns the (0-indexed) column of the provided index.
+func (s *Schema) Column(i int) *Column {
+	return s.leaves[i]
+}
+
+// ColumnIndexByName looks up the column by it's full dot separated
+// node path. If there are multiple columns that match, it returns the first one.
+//
+// Returns -1 if not found.
+func (s *Schema) ColumnIndexByName(nodePath string) int {
+	if search, ok := s.leafToIndex[nodePath]; ok {
+		return search[0]
+	}
+	return -1
+}
+
+// ColumnIndexByNode returns the index of the column represented by this node.
+//
+// Returns -1 if not found.
+func (s *Schema) ColumnIndexByNode(n Node) int {
+	if search, ok := s.leafToIndex[n.Path()]; ok {
+		for _, idx := range search {
+			if n == s.Column(idx).SchemaNode() {
+				return idx
+			}
+		}
+	}
+	return -1
+}
+
+// ColumnRoot returns the root node of a given column if it is under a
+// nested group node, providing that root group node.
+func (s *Schema) ColumnRoot(i int) Node {
+	return s.leafToBase[i]
+}
+
+// HasRepeatedFields returns true if any node in the schema has a repeated field type.
+func (s *Schema) HasRepeatedFields() bool {
+	return s.root.(*GroupNode).HasRepeatedFields()
+}
+
+// UpdateColumnOrders must get a slice that is the same length as the number of leaf columns
+// and is used to update the schema metadata Column Orders. len(orders) must equal s.NumColumns()
+func (s *Schema) UpdateColumnOrders(orders []parquet.ColumnOrder) error {
+	if len(orders) != s.NumColumns() {
+		return xerrors.New("parquet: malformed schema: not enough ColumnOrder values")
+	}
+
+	visitor := schemaColumnOrderUpdater{orders, 0}
+	s.root.Visit(&visitor)
+	return nil
+}
+
+// NewSchema constructs a new Schema object from a root group node.
+//
+// Any fields with a field-id of -1 will be given an appropriate field number based on their order.
+func NewSchema(root *GroupNode) *Schema {
+	s := &Schema{
+		root,
+		make([]*Column, 0),
+		make(map[*PrimitiveNode]int),
+		make(map[int]Node),
+		make(strIntMultimap),
+	}
+
+	for _, f := range root.fields {
+		s.buildTree(f, 0, 0, f)
+	}
+	return s
+}
+
+type schemaColumnOrderUpdater struct {
+	colOrders []parquet.ColumnOrder
+	leafCount int
+}
+
+func (s *schemaColumnOrderUpdater) VisitPre(n Node) bool {
+	if n.Type() == Primitive {
+		leaf := n.(*PrimitiveNode)
+		leaf.ColumnOrder = s.colOrders[s.leafCount]
+		s.leafCount++
+	}
+	return true
+}
+
+func (s *schemaColumnOrderUpdater) VisitPost(Node) {}
+
+type toThriftVisitor struct {
+	elements []*format.SchemaElement
+}
+
+func (t *toThriftVisitor) VisitPre(n Node) bool {
+	t.elements = append(t.elements, n.toThrift())
+	return true
+}
+
+func (t *toThriftVisitor) VisitPost(Node) {}
+
+// ToThrift converts a GroupNode to a slice of SchemaElements which is used
+// for thrift serialization.
+func ToThrift(schema *GroupNode) []*format.SchemaElement {
+	t := &toThriftVisitor{make([]*format.SchemaElement, 0)}
+	schema.Visit(t)
+	return t.elements
+}
+
+type schemaPrinter struct {
+	w           io.Writer
+	indent      int
+	indentWidth int
+}
+
+func (s *schemaPrinter) VisitPre(n Node) bool {
+	fmt.Fprint(s.w, strings.Repeat(" ", s.indent))
+	if n.Type() == Group {
+		g := n.(*GroupNode)
+		fmt.Fprintf(s.w, "%s group field_id=%d %s", g.RepetitionType(), g.FieldID(), g.Name())
+		_, invalid := g.logicalType.(UnknownLogicalType)
+		_, none := g.logicalType.(NoLogicalType)
+
+		if g.logicalType != nil && !invalid && !none {
+			fmt.Fprintf(s.w, " (%s)", g.logicalType)
+		} else if g.convertedType != ConvertedTypes.None {
+			fmt.Fprintf(s.w, " (%s)", g.convertedType)
+		}
+
+		fmt.Fprintln(s.w, " {")
+		s.indent += s.indentWidth
+	} else {
+		p := n.(*PrimitiveNode)
+		fmt.Fprintf(s.w, "%s %s field_id=%d %s", p.RepetitionType(), strings.ToLower(p.PhysicalType().String()), p.FieldID(), p.Name())
+		_, invalid := p.logicalType.(UnknownLogicalType)
+		_, none := p.logicalType.(NoLogicalType)
+
+		if p.logicalType != nil && !invalid && !none {
+			fmt.Fprintf(s.w, " (%s)", p.logicalType)
+		} else if p.convertedType == ConvertedTypes.Decimal {
+			fmt.Fprintf(s.w, " (%s(%d,%d))", p.convertedType, p.DecimalMetadata().Precision, p.DecimalMetadata().Scale)
+		} else if p.convertedType != ConvertedTypes.None {
+			fmt.Fprintf(s.w, " (%s)", p.convertedType)
+		}
+		fmt.Fprintln(s.w, ";")
+	}
+	return true
+}
+
+func (s *schemaPrinter) VisitPost(n Node) {
+	if n.Type() == Group {
+		s.indent -= s.indentWidth
+		fmt.Fprint(s.w, strings.Repeat(" ", s.indent))
+		fmt.Fprintln(s.w, "}")
+	}
+}
+
+// PrintSchema writes a string representation of the tree to w using the indent
+// width provided.
+func PrintSchema(n Node, w io.Writer, indentWidth int) {
+	n.Visit(&schemaPrinter{w, 0, indentWidth})
+}
+
+type strIntMultimap map[string][]int
+
+func (f strIntMultimap) Add(key string, val int) bool {
+	if _, ok := f[key]; !ok {
+		f[key] = []int{val}
+		return false
+	}
+	f[key] = append(f[key], val)
+	return true
+}
diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go
new file mode 100644
index 00000000000..7a43d243215
--- /dev/null
+++ b/go/parquet/schema/schema_element_test.go
@@ -0,0 +1,514 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+type schemaElementConstruction struct {
+	node            Node
+	element         *format.SchemaElement
+	name            string
+	expectConverted bool
+	converted       ConvertedType
+	expectLogical   bool
+	checkLogical    func(*format.SchemaElement) bool
+}
+
+type decimalSchemaElementConstruction struct {
+	schemaElementConstruction
+	precision int
+	scale     int
+}
+
+type temporalSchemaElementConstruction struct {
+	schemaElementConstruction
+	adjusted bool
+	unit     TimeUnitType
+	getUnit  func(*format.SchemaElement) *format.TimeUnit
+}
+
+type intSchemaElementConstruction struct {
+	schemaElementConstruction
+	width  int8
+	signed bool
+}
+
+type legacySchemaElementConstructArgs struct {
+	name            string
+	physical        parquet.Type
+	len             int
+	expectConverted bool
+	converted       ConvertedType
+	expectLogical   bool
+	checkLogical    func(*format.SchemaElement) bool
+}
+
+type schemaElementConstructArgs struct {
+	name            string
+	logical         LogicalType
+	physical        parquet.Type
+	len             int
+	expectConverted bool
+	converted       ConvertedType
+	expectLogical   bool
+	checkLogical    func(*format.SchemaElement) bool
+}
+type SchemaElementConstructionSuite struct {
+	suite.Suite
+}
+
+func (s *SchemaElementConstructionSuite) reconstruct(c schemaElementConstructArgs) *schemaElementConstruction {
+	ret := &schemaElementConstruction{
+		node:            MustPrimitive(NewPrimitiveNodeLogical(c.name, parquet.Repetitions.Required, c.logical, c.physical, c.len, -1)),
+		name:            c.name,
+		expectConverted: c.expectConverted,
+		converted:       c.converted,
+		expectLogical:   c.expectLogical,
+		checkLogical:    c.checkLogical,
+	}
+	ret.element = ret.node.toThrift()
+	return ret
+}
+
+func (s *SchemaElementConstructionSuite) legacyReconstruct(c legacySchemaElementConstructArgs) *schemaElementConstruction {
+	ret := &schemaElementConstruction{
+		node:            MustPrimitive(NewPrimitiveNodeConverted(c.name, parquet.Repetitions.Required, c.physical, c.converted, c.len, 0, 0, -1)),
+		name:            c.name,
+		expectConverted: c.expectConverted,
+		converted:       c.converted,
+		expectLogical:   c.expectLogical,
+		checkLogical:    c.checkLogical,
+	}
+	ret.element = ret.node.toThrift()
+	return ret
+}
+
+func (s *SchemaElementConstructionSuite) inspect(c *schemaElementConstruction) {
+	if c.expectConverted {
+		s.True(c.element.IsSetConvertedType())
+		s.Equal(c.converted, ConvertedType(*c.element.ConvertedType))
+	} else {
+		s.False(c.element.IsSetConvertedType())
+	}
+	if c.expectLogical {
+		s.True(c.element.IsSetLogicalType())
+		s.True(c.checkLogical(c.element))
+	} else {
+		s.False(c.element.IsSetLogicalType())
+	}
+}
+
+func (s *SchemaElementConstructionSuite) TestSimple() {
+	checkNone := func(*format.SchemaElement) bool { return true }
+
+	tests := []struct {
+		name   string
+		args   *schemaElementConstructArgs
+		legacy *legacySchemaElementConstructArgs
+	}{
+		{"string", &schemaElementConstructArgs{
+			"string", StringLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.UTF8, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetSTRING() },
+		}, nil},
+		{"enum", &schemaElementConstructArgs{
+			"enum", EnumLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.Enum, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetENUM() },
+		}, nil},
+		{"date", &schemaElementConstructArgs{
+			"date", DateLogicalType{}, parquet.Types.Int32, -1, true, ConvertedTypes.Date, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetDATE() },
+		}, nil},
+		{"interval", &schemaElementConstructArgs{
+			"interval", IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 12, true, ConvertedTypes.Interval, false,
+			checkNone,
+		}, nil},
+		{"null", &schemaElementConstructArgs{
+			"null", NullLogicalType{}, parquet.Types.Double, -1, false, ConvertedTypes.NA, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetUNKNOWN() },
+		}, nil},
+		{"json", &schemaElementConstructArgs{
+			"json", JSONLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.JSON, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetJSON() },
+		}, nil},
+		{"bson", &schemaElementConstructArgs{
+			"bson", BSONLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.BSON, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetBSON() },
+		}, nil},
+		{"uuid", &schemaElementConstructArgs{
+			"uuid", UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16, false, ConvertedTypes.NA, true,
+			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetUUID() },
+		}, nil},
+		{"none", &schemaElementConstructArgs{
+			"none", NoLogicalType{}, parquet.Types.Int64, -1, false, ConvertedTypes.NA, false,
+			checkNone,
+		}, nil},
+		{"unknown", &schemaElementConstructArgs{
+			"unknown", UnknownLogicalType{}, parquet.Types.Int64, -1, true, ConvertedTypes.NA, false,
+			checkNone,
+		}, nil},
+		{"timestamp_ms", nil, &legacySchemaElementConstructArgs{
+			"timestamp_ms", parquet.Types.Int64, -1, true, ConvertedTypes.TimestampMillis, false, checkNone}},
+		{"timestamp_us", nil, &legacySchemaElementConstructArgs{
+			"timestamp_us", parquet.Types.Int64, -1, true, ConvertedTypes.TimestampMicros, false, checkNone}},
+	}
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			var sc *schemaElementConstruction
+			if tt.args != nil {
+				sc = s.reconstruct(*tt.args)
+			} else {
+				sc = s.legacyReconstruct(*tt.legacy)
+			}
+			s.Equal(tt.name, sc.element.Name)
+			s.inspect(sc)
+		})
+	}
+}
+
+func (s *SchemaElementConstructionSuite) reconstructDecimal(c schemaElementConstructArgs) *decimalSchemaElementConstruction {
+	ret := s.reconstruct(c)
+	dec := c.logical.(*DecimalLogicalType)
+	return &decimalSchemaElementConstruction{*ret, int(dec.Precision()), int(dec.Scale())}
+}
+
+func (s *SchemaElementConstructionSuite) inspectDecimal(d *decimalSchemaElementConstruction) {
+	s.inspect(&d.schemaElementConstruction)
+	s.EqualValues(d.precision, d.element.GetPrecision())
+	s.EqualValues(d.scale, d.element.GetScale())
+	s.EqualValues(d.precision, d.element.LogicalType.DECIMAL.Precision)
+	s.EqualValues(d.scale, d.element.LogicalType.DECIMAL.Scale)
+}
+
+func (s *SchemaElementConstructionSuite) TestDecimal() {
+	checkDecimal := func(p *format.SchemaElement) bool { return p.LogicalType.IsSetDECIMAL() }
+
+	tests := []schemaElementConstructArgs{
+		{
+			name: "decimal16_6", logical: NewDecimalLogicalType(16 /* precision */, 6 /* scale */),
+			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+		{
+			name: "decimal1_0", logical: NewDecimalLogicalType(1 /* precision */, 0 /* scale */),
+			physical: parquet.Types.Int32, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+		{
+			name: "decimal10", logical: NewDecimalLogicalType(10 /* precision */, 0 /* scale */),
+			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+		{
+			name: "decimal11_11", logical: NewDecimalLogicalType(11 /* precision */, 11 /* scale */),
+			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
+			expectLogical: true, checkLogical: checkDecimal,
+		},
+	}
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			d := s.reconstructDecimal(tt)
+			s.Equal(tt.name, d.element.Name)
+			s.inspectDecimal(d)
+		})
+	}
+}
+
+func (s *SchemaElementConstructionSuite) reconstructTemporal(c schemaElementConstructArgs, getUnit func(*format.SchemaElement) *format.TimeUnit) *temporalSchemaElementConstruction {
+	base := s.reconstruct(c)
+	t := c.logical.(TemporalLogicalType)
+	return &temporalSchemaElementConstruction{
+		*base,
+		t.IsAdjustedToUTC(),
+		t.TimeUnit(),
+		getUnit,
+	}
+}
+
+func (s *SchemaElementConstructionSuite) inspectTemporal(t *temporalSchemaElementConstruction) {
+	s.inspect(&t.schemaElementConstruction)
+	switch t.unit {
+	case TimeUnitMillis:
+		s.True(t.getUnit(t.element).IsSetMILLIS())
+	case TimeUnitMicros:
+		s.True(t.getUnit(t.element).IsSetMICROS())
+	case TimeUnitNanos:
+		s.True(t.getUnit(t.element).IsSetNANOS())
+	case TimeUnitUnknown:
+		fallthrough
+	default:
+		s.Fail("invalid time unit in test case")
+	}
+}
+
+func (s *SchemaElementConstructionSuite) TestTemporal() {
+	checkTime := func(p *format.SchemaElement) bool {
+		return p.LogicalType.IsSetTIME()
+	}
+	checkTimestamp := func(p *format.SchemaElement) bool {
+		return p.LogicalType.IsSetTIMESTAMP()
+	}
+
+	getTimeUnit := func(p *format.SchemaElement) *format.TimeUnit {
+		return p.LogicalType.TIME.Unit
+	}
+	getTimestampUnit := func(p *format.SchemaElement) *format.TimeUnit {
+		return p.LogicalType.TIMESTAMP.Unit
+	}
+
+	timeTests := []schemaElementConstructArgs{
+		{
+			name: "time_T_ms", logical: NewTimeLogicalType(true, TimeUnitMillis), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimeMillis, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_F_ms", logical: NewTimeLogicalType(false, TimeUnitMillis), physical: parquet.Types.Int32, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_T_us", logical: NewTimeLogicalType(true, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimeMicros, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_F_us", logical: NewTimeLogicalType(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_T_ns", logical: NewTimeLogicalType(true, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+		{
+			name: "time_F_ns", logical: NewTimeLogicalType(false, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
+		},
+	}
+	timeStampTests := []schemaElementConstructArgs{
+		{
+			name: "timestamp_T_ms", logical: NewTimestampLogicalType(true, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMillis, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_ms", logical: NewTimestampLogicalType(false, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_ms_force", logical: NewTimestampLogicalTypeForce(false, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMillis, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_T_us", logical: NewTimestampLogicalType(true, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMicros, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_us", logical: NewTimestampLogicalType(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_us_force", logical: NewTimestampLogicalTypeForce(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.TimestampMicros, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_T_ns", logical: NewTimestampLogicalType(true, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+		{
+			name: "timestamp_F_ns", logical: NewTimestampLogicalType(false, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
+			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
+		},
+	}
+
+	for _, tt := range timeTests {
+		s.Run(tt.name, func() {
+			t := s.reconstructTemporal(tt, getTimeUnit)
+			s.Equal(t.adjusted, t.element.LogicalType.TIME.IsAdjustedToUTC)
+			s.inspectTemporal(t)
+		})
+	}
+	for _, tt := range timeStampTests {
+		s.Run(tt.name, func() {
+			t := s.reconstructTemporal(tt, getTimestampUnit)
+			s.Equal(t.adjusted, t.element.LogicalType.TIMESTAMP.IsAdjustedToUTC)
+			s.inspectTemporal(t)
+		})
+	}
+}
+
+func (s *SchemaElementConstructionSuite) reconstructInteger(c schemaElementConstructArgs) *intSchemaElementConstruction {
+	base := s.reconstruct(c)
+	l := c.logical.(*IntLogicalType)
+	return &intSchemaElementConstruction{
+		*base,
+		l.BitWidth(),
+		l.IsSigned(),
+	}
+}
+
+func (s *SchemaElementConstructionSuite) inspectInt(i *intSchemaElementConstruction) {
+	s.inspect(&i.schemaElementConstruction)
+	s.Equal(i.width, i.element.LogicalType.INTEGER.BitWidth)
+	s.Equal(i.signed, i.element.LogicalType.INTEGER.IsSigned)
+}
+
+func (s *SchemaElementConstructionSuite) TestIntegerCases() {
+	checkInt := func(p *format.SchemaElement) bool { return p.LogicalType.IsSetINTEGER() }
+
+	tests := []schemaElementConstructArgs{
+		{
+			name: "uint8", logical: NewIntLogicalType(8, false), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint8, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "uint16", logical: NewIntLogicalType(16, false), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint16, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "uint32", logical: NewIntLogicalType(32, false), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint32, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "uint64", logical: NewIntLogicalType(64, false), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Uint64, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int8", logical: NewIntLogicalType(8, true), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int8, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int16", logical: NewIntLogicalType(16, true), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int16, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int32", logical: NewIntLogicalType(32, true), physical: parquet.Types.Int32, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int32, expectLogical: true, checkLogical: checkInt,
+		},
+		{
+			name: "int64", logical: NewIntLogicalType(64, true), physical: parquet.Types.Int64, len: -1,
+			expectConverted: true, converted: ConvertedTypes.Int64, expectLogical: true, checkLogical: checkInt,
+		},
+	}
+	for _, tt := range tests {
+		s.Run(tt.name, func() {
+			t := s.reconstructInteger(tt)
+			s.inspectInt(t)
+		})
+	}
+}
+
+func TestSchemaElementNestedSerialization(t *testing.T) {
+	// confirm that the intermediate thrift objects created during node serialization
+	// contain correct ConvertedType and ConvertedType information
+
+	strNode := MustPrimitive(NewPrimitiveNodeLogical("string" /*name */, parquet.Repetitions.Required, StringLogicalType{}, parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
+	dateNode := MustPrimitive(NewPrimitiveNodeLogical("date" /*name */, parquet.Repetitions.Required, DateLogicalType{}, parquet.Types.Int32, -1 /* type len */, -1 /* fieldID */))
+	jsonNode := MustPrimitive(NewPrimitiveNodeLogical("json" /*name */, parquet.Repetitions.Required, JSONLogicalType{}, parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
+	uuidNode := MustPrimitive(NewPrimitiveNodeLogical("uuid" /*name */, parquet.Repetitions.Required, UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16 /* type len */, - /* fieldID */ 1))
+	timestampNode := MustPrimitive(NewPrimitiveNodeLogical("timestamp" /*name */, parquet.Repetitions.Required, NewTimestampLogicalType(false /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	intNode := MustPrimitive(NewPrimitiveNodeLogical("int" /*name */, parquet.Repetitions.Required, NewIntLogicalType(64 /* bitWidth */, false /* signed */), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	decimalNode := MustPrimitive(NewPrimitiveNodeLogical("decimal" /*name */, parquet.Repetitions.Required, NewDecimalLogicalType(16 /* precision */, 6 /* scale */), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	listNode := MustGroup(NewGroupNodeLogical("list" /*name */, parquet.Repetitions.Repeated, []Node{strNode, dateNode, jsonNode, uuidNode, timestampNode, intNode, decimalNode}, NewListLogicalType(), -1 /* fieldID */))
+
+	listElems := ToThrift(listNode)
+	assert.Equal(t, "list", listElems[0].Name)
+	assert.True(t, listElems[0].IsSetConvertedType())
+	assert.True(t, listElems[0].IsSetLogicalType())
+	assert.Equal(t, format.ConvertedType(ConvertedTypes.List), listElems[0].GetConvertedType())
+	assert.True(t, listElems[0].LogicalType.IsSetLIST())
+	assert.True(t, listElems[1].LogicalType.IsSetSTRING())
+	assert.True(t, listElems[2].LogicalType.IsSetDATE())
+	assert.True(t, listElems[3].LogicalType.IsSetJSON())
+	assert.True(t, listElems[4].LogicalType.IsSetUUID())
+	assert.True(t, listElems[5].LogicalType.IsSetTIMESTAMP())
+	assert.True(t, listElems[6].LogicalType.IsSetINTEGER())
+	assert.True(t, listElems[7].LogicalType.IsSetDECIMAL())
+
+	mapNode := MustGroup(NewGroupNodeLogical("map" /* name */, parquet.Repetitions.Required, []Node{}, MapLogicalType{}, -1 /* fieldID */))
+	mapElems := ToThrift(mapNode)
+	assert.Equal(t, "map", mapElems[0].Name)
+	assert.True(t, mapElems[0].IsSetConvertedType())
+	assert.True(t, mapElems[0].IsSetLogicalType())
+	assert.Equal(t, format.ConvertedType(ConvertedTypes.Map), mapElems[0].GetConvertedType())
+	assert.True(t, mapElems[0].LogicalType.IsSetMAP())
+}
+
+func TestLogicalTypeSerializationRoundTrip(t *testing.T) {
+	tests := []struct {
+		name     string
+		logical  LogicalType
+		physical parquet.Type
+		len      int
+	}{
+		{"string", StringLogicalType{}, parquet.Types.ByteArray, -1},
+		{"enum", EnumLogicalType{}, parquet.Types.ByteArray, -1},
+		{"decimal", NewDecimalLogicalType(16, 6), parquet.Types.Int64, -1},
+		{"date", DateLogicalType{}, parquet.Types.Int32, -1},
+		{"time_T_ms", NewTimeLogicalType(true, TimeUnitMillis), parquet.Types.Int32, -1},
+		{"time_T_us", NewTimeLogicalType(true, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"time_T_ns", NewTimeLogicalType(true, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"time_F_ms", NewTimeLogicalType(false, TimeUnitMillis), parquet.Types.Int32, -1},
+		{"time_F_us", NewTimeLogicalType(false, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"time_F_ns", NewTimeLogicalType(false, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"timestamp_T_ms", NewTimestampLogicalType(true, TimeUnitMillis), parquet.Types.Int64, -1},
+		{"timestamp_T_us", NewTimestampLogicalType(true, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"timestamp_T_ns", NewTimestampLogicalType(true, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"timestamp_F_ms", NewTimestampLogicalType(false, TimeUnitMillis), parquet.Types.Int64, -1},
+		{"timestamp_F_us", NewTimestampLogicalType(false, TimeUnitMicros), parquet.Types.Int64, -1},
+		{"timestamp_F_ns", NewTimestampLogicalType(false, TimeUnitNanos), parquet.Types.Int64, -1},
+		{"interval", IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 12},
+		{"uint8", NewIntLogicalType(8, false), parquet.Types.Int32, -1},
+		{"uint16", NewIntLogicalType(16, false), parquet.Types.Int32, -1},
+		{"uint32", NewIntLogicalType(32, false), parquet.Types.Int32, -1},
+		{"uint64", NewIntLogicalType(64, false), parquet.Types.Int64, -1},
+		{"int8", NewIntLogicalType(8, true), parquet.Types.Int32, -1},
+		{"int16", NewIntLogicalType(16, true), parquet.Types.Int32, -1},
+		{"int32", NewIntLogicalType(32, true), parquet.Types.Int32, -1},
+		{"int64", NewIntLogicalType(64, true), parquet.Types.Int64, -1},
+		{"null", NullLogicalType{}, parquet.Types.Boolean, -1},
+		{"json", JSONLogicalType{}, parquet.Types.ByteArray, -1},
+		{"bson", BSONLogicalType{}, parquet.Types.ByteArray, -1},
+		{"uuid", UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16},
+		{"none", NoLogicalType{}, parquet.Types.Boolean, -1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			n := MustPrimitive(NewPrimitiveNodeLogical("something" /* name */, parquet.Repetitions.Required, tt.logical, tt.physical, tt.len, -1 /* fieldID */))
+			elem := n.toThrift()
+			recover := MustPrimitive(PrimitiveNodeFromThrift(elem))
+			assert.True(t, n.Equals(recover))
+		})
+	}
+
+	n := MustGroup(NewGroupNodeLogical("map" /* name */, parquet.Repetitions.Required, []Node{}, MapLogicalType{}, -1 /* fieldID */))
+	elem := n.toThrift()
+	recover := MustGroup(GroupNodeFromThrift(elem, []Node{}))
+	assert.True(t, recover.Equals(n))
+
+	n = MustGroup(NewGroupNodeLogical("list" /* name */, parquet.Repetitions.Required, []Node{}, ListLogicalType{}, -1 /* fieldID */))
+	elem = n.toThrift()
+	recover = MustGroup(GroupNodeFromThrift(elem, []Node{}))
+	assert.True(t, recover.Equals(n))
+}
+
+func TestSchemaElementConstruction(t *testing.T) {
+	suite.Run(t, new(SchemaElementConstructionSuite))
+}
diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go
new file mode 100644
index 00000000000..cbe76df718c
--- /dev/null
+++ b/go/parquet/schema/schema_flatten_test.go
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+func NewPrimitive(name string, repetition format.FieldRepetitionType, typ format.Type, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		Type:           format.TypePtr(typ),
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+func NewGroup(name string, repetition format.FieldRepetitionType, numChildren, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		NumChildren:    &numChildren,
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+type SchemaFlattenSuite struct {
+	suite.Suite
+
+	name string
+}
+
+func (s *SchemaFlattenSuite) SetupSuite() {
+	s.name = "parquet_schema"
+}
+
+func (s *SchemaFlattenSuite) TestDecimalMetadata() {
+	group := MustGroup(NewGroupNodeConverted("group" /* name */, parquet.Repetitions.Repeated, FieldList{
+		MustPrimitive(NewPrimitiveNodeConverted("decimal" /* name */, parquet.Repetitions.Required, parquet.Types.Int64,
+			ConvertedTypes.Decimal, 0 /* type len */, 8 /* precision */, 4 /* scale */, -1 /* fieldID */)),
+	}, ConvertedTypes.List, -1 /* fieldID */))
+	elements := ToThrift(group)
+
+	s.Len(elements, 2)
+	s.Equal("decimal", elements[1].GetName())
+	s.True(elements[1].IsSetPrecision())
+	s.True(elements[1].IsSetScale())
+
+	group = MustGroup(NewGroupNodeLogical("group" /* name */, parquet.Repetitions.Repeated, FieldList{
+		MustPrimitive(NewPrimitiveNodeLogical("decimal" /* name */, parquet.Repetitions.Required, NewDecimalLogicalType(10 /* precision */, 5 /* scale */),
+			parquet.Types.Int64, 0 /* type len */, -1 /* fieldID */)),
+	}, NewListLogicalType(), -1 /* fieldID */))
+	elements = ToThrift(group)
+	s.Equal("decimal", elements[1].Name)
+	s.True(elements[1].IsSetPrecision())
+	s.True(elements[1].IsSetScale())
+
+	group = MustGroup(NewGroupNodeConverted("group" /* name */, parquet.Repetitions.Repeated, FieldList{
+		NewInt64Node("int64" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)}, ConvertedTypes.List, -1 /* fieldID */))
+	elements = ToThrift(group)
+	s.Equal("int64", elements[1].Name)
+	s.False(elements[0].IsSetPrecision())
+	s.False(elements[1].IsSetPrecision())
+	s.False(elements[0].IsSetScale())
+	s.False(elements[1].IsSetScale())
+}
+
+func (s *SchemaFlattenSuite) TestNestedExample() {
+	elements := make([]*format.SchemaElement, 0)
+	elements = append(elements,
+		NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */),
+		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */),
+		NewGroup("bag" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 2 /* fieldID */))
+
+	elt := NewGroup("b" /* name */, format.FieldRepetitionType_REPEATED, 1 /* numChildren */, 3 /* fieldID */)
+	elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_LIST)
+	elt.LogicalType = &format.LogicalType{LIST: format.NewListType()}
+	elements = append(elements, elt, NewPrimitive("item" /* name */, format.FieldRepetitionType_OPTIONAL, format.Type_INT64, 4 /* fieldID */))
+
+	fields := FieldList{NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */)}
+	list := MustGroup(NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, FieldList{
+		NewInt64Node("item" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)}, ConvertedTypes.List, 3 /* fieldID */))
+	fields = append(fields, MustGroup(NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, FieldList{list}, 2 /* fieldID */)))
+
+	sc := MustGroup(NewGroupNode(s.name, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
+
+	flattened := ToThrift(sc)
+	s.Len(flattened, len(elements))
+	for idx, elem := range flattened {
+		s.Equal(elements[idx], elem)
+	}
+}
+
+func TestSchemaFlatten(t *testing.T) {
+	suite.Run(t, new(SchemaFlattenSuite))
+}
+
+func TestInvalidConvertedTypeInDeserialize(t *testing.T) {
+	n := MustPrimitive(NewPrimitiveNodeLogical("string" /* name */, parquet.Repetitions.Required, StringLogicalType{},
+		parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
+	assert.True(t, n.LogicalType().Equals(StringLogicalType{}))
+	assert.True(t, n.LogicalType().IsValid())
+	assert.True(t, n.LogicalType().IsSerialized())
+	intermediary := n.toThrift()
+	// corrupt it
+	intermediary.LogicalType.STRING = nil
+	assert.Panics(t, func() {
+		PrimitiveNodeFromThrift(intermediary)
+	})
+}
+
+func TestInvalidTimeUnitInTimeLogical(t *testing.T) {
+	n := MustPrimitive(NewPrimitiveNodeLogical("time" /* name */, parquet.Repetitions.Required,
+		NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	intermediary := n.toThrift()
+	// corrupt it
+	intermediary.LogicalType.TIME.Unit.NANOS = nil
+	assert.Panics(t, func() {
+		PrimitiveNodeFromThrift(intermediary)
+	})
+}
+
+func TestInvalidTimeUnitInTimestampLogical(t *testing.T) {
+	n := MustPrimitive(NewPrimitiveNodeLogical("time" /* name */, parquet.Repetitions.Required,
+		NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	intermediary := n.toThrift()
+	// corrupt it
+	intermediary.LogicalType.TIMESTAMP.Unit.NANOS = nil
+	assert.Panics(t, func() {
+		PrimitiveNodeFromThrift(intermediary)
+	})
+}
diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go
new file mode 100644
index 00000000000..b2ce2291612
--- /dev/null
+++ b/go/parquet/schema/schema_test.go
@@ -0,0 +1,666 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+	"os"
+	"testing"
+
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/schema"
+	"github.com/apache/thrift/lib/go/thrift"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+)
+
+func TestColumnPath(t *testing.T) {
+	p := parquet.ColumnPath([]string{"toplevel", "leaf"})
+	assert.Equal(t, "toplevel.leaf", p.String())
+
+	p2 := parquet.ColumnPathFromString("toplevel.leaf")
+	assert.Equal(t, "toplevel.leaf", p2.String())
+
+	extend := p2.Extend("anotherlevel")
+	assert.Equal(t, "toplevel.leaf.anotherlevel", extend.String())
+}
+
+func NewPrimitive(name string, repetition format.FieldRepetitionType, typ format.Type, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		Type:           format.TypePtr(typ),
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+func NewGroup(name string, repetition format.FieldRepetitionType, numChildren, fieldID int32) *format.SchemaElement {
+	ret := &format.SchemaElement{
+		Name:           name,
+		RepetitionType: format.FieldRepetitionTypePtr(repetition),
+		NumChildren:    &numChildren,
+	}
+	if fieldID >= 0 {
+		ret.FieldID = &fieldID
+	}
+	return ret
+}
+
+func TestSchemaNodes(t *testing.T) {
+	suite.Run(t, new(PrimitiveNodeTestSuite))
+	suite.Run(t, new(GroupNodeTestSuite))
+	suite.Run(t, new(SchemaConverterSuite))
+}
+
+type PrimitiveNodeTestSuite struct {
+	suite.Suite
+
+	name    string
+	fieldID int32
+	node    schema.Node
+}
+
+func (p *PrimitiveNodeTestSuite) SetupTest() {
+	p.name = "name"
+	p.fieldID = 5
+}
+
+func (p *PrimitiveNodeTestSuite) convert(elt *format.SchemaElement) {
+	p.node = schema.MustPrimitive(schema.PrimitiveNodeFromThrift(elt))
+	p.IsType(&schema.PrimitiveNode{}, p.node)
+}
+
+func (p *PrimitiveNodeTestSuite) TestAttrs() {
+	node1 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+	node2 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("bar" /* name */, parquet.Repetitions.Optional, parquet.Types.ByteArray,
+		schema.ConvertedTypes.UTF8, 0 /* type len */, 0 /* precision */, 0 /* scale */, -1 /* fieldID */))
+
+	p.Equal("foo", node1.Name())
+	p.Equal(schema.Primitive, node1.Type())
+	p.Equal(schema.Primitive, node2.Type())
+
+	p.Equal(parquet.Repetitions.Repeated, node1.RepetitionType())
+	p.Equal(parquet.Repetitions.Optional, node2.RepetitionType())
+
+	p.Equal(parquet.Types.Int32, node1.PhysicalType())
+	p.Equal(parquet.Types.ByteArray, node2.PhysicalType())
+
+	p.Equal(schema.ConvertedTypes.None, node1.ConvertedType())
+	p.Equal(schema.ConvertedTypes.UTF8, node2.ConvertedType())
+}
+
+func (p *PrimitiveNodeTestSuite) TestFromParquet() {
+	p.Run("Optional Int32", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_INT32, p.fieldID)
+		p.convert(elt)
+
+		p.Equal(p.name, p.node.Name())
+		p.Equal(p.fieldID, p.node.FieldID())
+		p.Equal(parquet.Repetitions.Optional, p.node.RepetitionType())
+		p.Equal(parquet.Types.Int32, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(schema.ConvertedTypes.None, p.node.ConvertedType())
+	})
+
+	p.Run("LogicalType", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_REQUIRED, format.Type_BYTE_ARRAY, p.fieldID)
+		elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_UTF8)
+		p.convert(elt)
+
+		p.Equal(parquet.Repetitions.Required, p.node.RepetitionType())
+		p.Equal(parquet.Types.ByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(schema.ConvertedTypes.UTF8, p.node.ConvertedType())
+	})
+
+	p.Run("FixedLenByteArray", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_FIXED_LEN_BYTE_ARRAY, p.fieldID)
+		elt.TypeLength = thrift.Int32Ptr(16)
+		p.convert(elt)
+
+		p.Equal(p.name, p.node.Name())
+		p.Equal(p.fieldID, p.node.FieldID())
+		p.Equal(parquet.Repetitions.Optional, p.node.RepetitionType())
+		p.Equal(parquet.Types.FixedLenByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(16, p.node.(*schema.PrimitiveNode).TypeLength())
+	})
+
+	p.Run("convertedtype::decimal", func() {
+		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_FIXED_LEN_BYTE_ARRAY, p.fieldID)
+		elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_DECIMAL)
+		elt.TypeLength = thrift.Int32Ptr(6)
+		elt.Scale = thrift.Int32Ptr(2)
+		elt.Precision = thrift.Int32Ptr(12)
+
+		p.convert(elt)
+		p.Equal(parquet.Types.FixedLenByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
+		p.Equal(schema.ConvertedTypes.Decimal, p.node.ConvertedType())
+		p.Equal(6, p.node.(*schema.PrimitiveNode).TypeLength())
+		p.EqualValues(2, p.node.(*schema.PrimitiveNode).DecimalMetadata().Scale)
+		p.EqualValues(12, p.node.(*schema.PrimitiveNode).DecimalMetadata().Precision)
+	})
+}
+
+func (p *PrimitiveNodeTestSuite) TestEquals() {
+	const fieldID = -1
+	node1 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
+	node2 := schema.NewInt64Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
+	node3 := schema.NewInt32Node("bar" /* name */, parquet.Repetitions.Required, fieldID)
+	node4 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Optional, fieldID)
+	node5 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
+
+	p.True(node1.Equals(node1))
+	p.False(node1.Equals(node2))
+	p.False(node1.Equals(node3))
+	p.False(node1.Equals(node4))
+	p.True(node1.Equals(node5))
+
+	flba1 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 12 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
+	flba2 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 1 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
+	flba2.SetTypeLength(12)
+
+	flba3 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 1 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
+	flba3.SetTypeLength(16)
+
+	flba4 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.Decimal, 12 /* type len */, 4 /* precision */, 0 /* scale */, fieldID))
+	flba5 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
+		schema.ConvertedTypes.None, 12 /* type len */, 4 /* precision */, 0 /* scale */, fieldID))
+
+	p.True(flba1.Equals(flba2))
+	p.False(flba1.Equals(flba3))
+	p.False(flba1.Equals(flba4))
+	p.False(flba1.Equals(flba5))
+}
+
+func (p *PrimitiveNodeTestSuite) TestPhysicalLogicalMapping() {
+	tests := []struct {
+		typ       parquet.Type
+		cnv       schema.ConvertedType
+		typLen    int
+		precision int
+		scale     int
+		shouldErr bool
+	}{
+		{parquet.Types.Int32, schema.ConvertedTypes.Int32, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.JSON, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.Int32, schema.ConvertedTypes.JSON, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.Int64, schema.ConvertedTypes.TimestampMillis, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.Int32, schema.ConvertedTypes.Int64, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.Int8, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.Interval, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Enum, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
+		{parquet.Types.ByteArray, schema.ConvertedTypes.Enum, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 0 /* type len */, 2 /* precision */, 4 /* scale */, true},
+		{parquet.Types.Float, schema.ConvertedTypes.Decimal, 0 /* type len */, 2 /* precision */, 4 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 0 /* type len */, 4 /* precision */, 0 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 4 /* precision */, -1 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 2 /* precision */, 4 /* scale */, true},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 6 /* precision */, 4 /* scale */, false},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Interval, 12 /* type len */, 0 /* precision */, 0 /* scale */, false},
+		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Interval, 10 /* type len */, 0 /* precision */, 0 /* scale */, true},
+	}
+	for _, tt := range tests {
+		p.Run(tt.typ.String(), func() {
+			_, err := schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, tt.typ, tt.cnv, tt.typLen, tt.precision, tt.scale, -1 /* fieldID */)
+			if tt.shouldErr {
+				p.Error(err)
+			} else {
+				p.NoError(err)
+			}
+		})
+	}
+}
+
+type GroupNodeTestSuite struct {
+	suite.Suite
+}
+
+func (g *GroupNodeTestSuite) fields1() []schema.Node {
+	return schema.FieldList{
+		schema.NewInt32Node("one" /* name */, parquet.Repetitions.Required, -1 /* fieldID */),
+		schema.NewInt64Node("two" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+		schema.NewFloat64Node("three" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+	}
+}
+
+func (g *GroupNodeTestSuite) fields2() []schema.Node {
+	return schema.FieldList{
+		schema.NewInt32Node("duplicate" /* name */, parquet.Repetitions.Required, -1 /* fieldID */),
+		schema.NewInt64Node("unique" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+		schema.NewFloat64Node("duplicate" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+	}
+}
+
+func (g *GroupNodeTestSuite) TestAttrs() {
+	fields := g.fields1()
+
+	node1 := schema.MustGroup(schema.NewGroupNode("foo" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+	node2 := schema.MustGroup(schema.NewGroupNodeConverted("bar" /* name */, parquet.Repetitions.Optional, fields, schema.ConvertedTypes.List, -1 /* fieldID */))
+
+	g.Equal("foo", node1.Name())
+	g.Equal(schema.Group, node1.Type())
+	g.Equal(len(fields), node1.NumFields())
+	g.Equal(parquet.Repetitions.Repeated, node1.RepetitionType())
+	g.Equal(parquet.Repetitions.Optional, node2.RepetitionType())
+
+	g.Equal(schema.ConvertedTypes.None, node1.ConvertedType())
+	g.Equal(schema.ConvertedTypes.List, node2.ConvertedType())
+}
+
+func (g *GroupNodeTestSuite) TestEquals() {
+	f1 := g.fields1()
+	f2 := g.fields1()
+
+	group1 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f1, -1 /* fieldID */))
+	group2 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
+	group3 := schema.Must(schema.NewGroupNode("group2" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
+
+	f2 = append(f2, schema.NewFloat32Node("four" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */))
+	group4 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
+	group5 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, g.fields1(), -1 /* fieldID */))
+
+	g.True(group1.Equals(group1))
+	g.True(group1.Equals(group2))
+	g.False(group1.Equals(group3))
+	g.False(group1.Equals(group4))
+	g.False(group5.Equals(group4))
+}
+
+func (g *GroupNodeTestSuite) TestFieldIndex() {
+	fields := g.fields1()
+	group := schema.MustGroup(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Required, fields, -1 /* fieldID */))
+	for idx, field := range fields {
+		f := group.Field(idx)
+		g.Same(field, f)
+		g.Equal(idx, group.FieldIndexByField(f))
+		g.Equal(idx, group.FieldIndexByName(field.Name()))
+	}
+
+	// Non field nodes
+	nonFieldAlien := schema.NewInt32Node("alien" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+	nonFieldFamiliar := schema.NewInt32Node("one" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+	g.Less(group.FieldIndexByField(nonFieldAlien), 0)
+	g.Less(group.FieldIndexByField(nonFieldFamiliar), 0)
+}
+
+func (g *GroupNodeTestSuite) TestFieldIndexDuplicateName() {
+	fields := g.fields2()
+	group := schema.MustGroup(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Required, fields, -1 /* fieldID */))
+	for idx, field := range fields {
+		f := group.Field(idx)
+		g.Same(f, field)
+		g.Equal(idx, group.FieldIndexByField(f))
+	}
+}
+
+type SchemaConverterSuite struct {
+	suite.Suite
+
+	name string
+	node schema.Node
+}
+
+func (s *SchemaConverterSuite) SetupSuite() {
+	s.name = "parquet_schema"
+}
+
+func (s *SchemaConverterSuite) convert(elems []*format.SchemaElement) {
+	s.node = schema.Must(schema.FromParquet(elems))
+	s.Equal(schema.Group, s.node.Type())
+}
+
+func (s *SchemaConverterSuite) checkParentConsistency(groupRoot *schema.GroupNode) bool {
+	// each node should have the group as parent
+	for i := 0; i < groupRoot.NumFields(); i++ {
+		field := groupRoot.Field(i)
+		if field.Parent() != groupRoot {
+			return false
+		}
+		if field.Type() == schema.Group {
+			if !s.checkParentConsistency(field.(*schema.GroupNode)) {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+func (s *SchemaConverterSuite) TestNestedExample() {
+	elements := make([]*format.SchemaElement, 0)
+	elements = append(elements,
+		NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */),
+		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */),
+		NewGroup("bag" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 2 /* fieldID */))
+	elt := NewGroup("b" /* name */, format.FieldRepetitionType_REPEATED, 1 /* numChildren */, 3 /* fieldID */)
+	elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_LIST)
+	elements = append(elements, elt, NewPrimitive("item" /* name */, format.FieldRepetitionType_OPTIONAL, format.Type_INT64, 4 /* fieldID */))
+
+	s.convert(elements)
+
+	// construct the expected schema
+	fields := make([]schema.Node, 0)
+	fields = append(fields, schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */))
+
+	// 3-level list encoding
+	item := schema.NewInt64Node("item" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)
+	list := schema.MustGroup(schema.NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item}, schema.ConvertedTypes.List, 3 /* fieldID */))
+	bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, 2 /* fieldID */))
+	fields = append(fields, bag)
+
+	sc := schema.MustGroup(schema.NewGroupNode(s.name, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
+	s.True(sc.Equals(s.node))
+	s.Nil(s.node.Parent())
+	s.True(s.checkParentConsistency(s.node.(*schema.GroupNode)))
+}
+
+func (s *SchemaConverterSuite) TestZeroColumns() {
+	elements := []*format.SchemaElement{NewGroup("schema" /* name */, format.FieldRepetitionType_REPEATED, 0 /* numChildren */, 0 /* fieldID */)}
+	s.NotPanics(func() { s.convert(elements) })
+}
+
+func (s *SchemaConverterSuite) TestInvalidRoot() {
+	// According to the Parquet spec, the first element in the list<SchemaElement>
+	// is a group whose children (and their descendants) contain all of the rest of
+	// the flattened schema elments. If the first element is not a group, it is malformed
+	elements := []*format.SchemaElement{NewPrimitive("not-a-group" /* name */, format.FieldRepetitionType_REQUIRED,
+		format.Type_INT32, 0 /* fieldID */), format.NewSchemaElement()}
+	s.Panics(func() { s.convert(elements) })
+
+	// While the parquet spec indicates that the root group should have REPEATED
+	// repetition type, some implementations may return REQUIRED or OPTIONAL
+	// groups as the first element. These tests check that this is okay as a
+	// practicality matter
+	elements = []*format.SchemaElement{
+		NewGroup("not-repeated" /* name */, format.FieldRepetitionType_REQUIRED, 1 /* numChildren */, 0 /* fieldID */),
+		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */)}
+	s.NotPanics(func() { s.convert(elements) })
+
+	elements[0] = NewGroup("not-repeated" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 0 /* fieldID */)
+	s.NotPanics(func() { s.convert(elements) })
+}
+
+func (s *SchemaConverterSuite) TestNotEnoughChildren() {
+	s.Panics(func() {
+		s.convert([]*format.SchemaElement{NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */)})
+	})
+}
+
+func TestColumnDesc(t *testing.T) {
+	n := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("name" /* name */, parquet.Repetitions.Optional, parquet.Types.ByteArray,
+		schema.ConvertedTypes.UTF8, 0 /* type len */, 0 /* precision */, 0 /* scale */, -1 /* fieldID */))
+	descr := schema.NewColumn(n, 4, 1)
+
+	assert.Equal(t, "name", descr.Name())
+	assert.EqualValues(t, 4, descr.MaxDefinitionLevel())
+	assert.EqualValues(t, 1, descr.MaxRepetitionLevel())
+	assert.Equal(t, parquet.Types.ByteArray, descr.PhysicalType())
+	assert.Equal(t, -1, descr.TypeLength())
+
+	expectedDesc := `column descriptor = {
+  name: name,
+  path: ,
+  physical_type: BYTE_ARRAY,
+  converted_type: UTF8,
+  logical_type: String,
+  max_definition_level: 4,
+  max_repetition_level: 1,
+}`
+	assert.Equal(t, expectedDesc, descr.String())
+
+	n = schema.MustPrimitive(schema.NewPrimitiveNodeConverted("name" /* name */, parquet.Repetitions.Optional, parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 12 /* type len */, 10 /* precision */, 4 /* scale */, -1 /* fieldID */))
+	descr2 := schema.NewColumn(n, 4, 1)
+
+	assert.Equal(t, parquet.Types.FixedLenByteArray, descr2.PhysicalType())
+	assert.Equal(t, 12, descr2.TypeLength())
+
+	expectedDesc = `column descriptor = {
+  name: name,
+  path: ,
+  physical_type: FIXED_LEN_BYTE_ARRAY,
+  converted_type: DECIMAL,
+  logical_type: Decimal(precision=10, scale=4),
+  max_definition_level: 4,
+  max_repetition_level: 1,
+  length: 12,
+  precision: 10,
+  scale: 4,
+}`
+	assert.Equal(t, expectedDesc, descr2.String())
+}
+
+func TestSchemaDescriptor(t *testing.T) {
+	t.Run("Equals", func(t *testing.T) {
+		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		intb := schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		intb2 := schema.NewInt64Node("b2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		intc := schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+
+		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
+
+		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
+		bag2 := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Required, schema.FieldList{list}, -1 /* fieldID */))
+
+		descr1 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag}, -1 /* fieldID */)))
+		assert.True(t, descr1.Equals(descr1))
+
+		descr2 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag2}, -1 /* fieldID */)))
+		assert.False(t, descr1.Equals(descr2))
+
+		descr3 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb2, intc, bag}, -1 /* fieldID */)))
+		assert.False(t, descr1.Equals(descr3))
+
+		descr4 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("SCHEMA" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag}, -1 /* fieldID */)))
+		assert.True(t, descr1.Equals(descr4))
+
+		descr5 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag, intb2}, -1 /* fieldID */)))
+		assert.False(t, descr1.Equals(descr5))
+
+		col1 := schema.NewColumn(inta, 5 /* maxDefLvl */, 1 /* maxRepLvl */)
+		col2 := schema.NewColumn(inta, 6 /* maxDefLvl */, 1 /* maxRepLvl */)
+		col3 := schema.NewColumn(inta, 5 /* maxDefLvl */, 2 /* maxRepLvl */)
+
+		assert.True(t, col1.Equals(col1))
+		assert.False(t, col1.Equals(col2))
+		assert.False(t, col2.Equals(col3))
+	})
+
+	t.Run("BuildTree", func(t *testing.T) {
+		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		fields := schema.FieldList{inta}
+		fields = append(fields,
+			schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+			schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */))
+
+		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
+		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
+		fields = append(fields, bag)
+
+		sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+		descr := schema.NewSchema(sc)
+
+		const nleaves = 6
+		assert.Equal(t, nleaves, descr.NumColumns())
+
+		//                             mdef mrep
+		// required int32 a            0    0
+		// optional int64 b            1    0
+		// repeated byte_array c       1    1
+		// optional group bag          1    0
+		//   repeated group records    2    1
+		//     required int64 item1    2    1
+		//     optional boolean item2  3    1
+		//     repeated int32 item3    3    2
+		var (
+			exMaxDefLevels = [...]int16{0, 1, 1, 2, 3, 3}
+			exMaxRepLevels = [...]int16{0, 0, 1, 1, 1, 2}
+		)
+
+		for i := 0; i < nleaves; i++ {
+			col := descr.Column(i)
+			assert.Equal(t, exMaxDefLevels[i], col.MaxDefinitionLevel())
+			assert.Equal(t, exMaxRepLevels[i], col.MaxRepetitionLevel())
+		}
+
+		assert.Equal(t, "a", descr.Column(0).Path())
+		assert.Equal(t, "b", descr.Column(1).Path())
+		assert.Equal(t, "c", descr.Column(2).Path())
+		assert.Equal(t, "bag.records.item1", descr.Column(3).Path())
+		assert.Equal(t, "bag.records.item2", descr.Column(4).Path())
+		assert.Equal(t, "bag.records.item3", descr.Column(5).Path())
+
+		for i := 0; i < nleaves; i++ {
+			col := descr.Column(i)
+			assert.Equal(t, i, descr.ColumnIndexByNode(col.SchemaNode()))
+		}
+
+		nonColumnAlien := schema.NewInt32Node("alien" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		nonColumnFamiliar := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		assert.Less(t, descr.ColumnIndexByNode(nonColumnAlien), 0)
+		assert.Less(t, descr.ColumnIndexByNode(nonColumnFamiliar), 0)
+
+		assert.Same(t, inta, descr.ColumnRoot(0))
+		assert.Same(t, bag, descr.ColumnRoot(3))
+		assert.Same(t, bag, descr.ColumnRoot(4))
+		assert.Same(t, bag, descr.ColumnRoot(5))
+
+		assert.Same(t, sc, descr.Root())
+	})
+
+	t.Run("HasRepeatedFields", func(t *testing.T) {
+		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		fields := schema.FieldList{inta}
+		fields = append(fields,
+			schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
+			schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */))
+
+		sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+		descr := schema.NewSchema(sc)
+		assert.True(t, descr.HasRepeatedFields())
+
+		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
+		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
+		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
+		fields = append(fields, bag)
+
+		sc = schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
+		descr = schema.NewSchema(sc)
+		assert.True(t, descr.HasRepeatedFields())
+
+		itemKey := schema.NewInt64Node("key" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
+		itemValue := schema.NewBooleanNode("value" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
+		sc = schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, append(fields, schema.FieldList{
+			schema.MustGroup(schema.NewGroupNode("my_map" /* name */, parquet.Repetitions.Optional, schema.FieldList{
+				schema.MustGroup(schema.NewGroupNodeConverted("map" /* name */, parquet.Repetitions.Repeated, schema.FieldList{itemKey, itemValue}, schema.ConvertedTypes.Map, -1 /* fieldID */)),
+			}, -1 /* fieldID */)),
+		}...), -1 /* fieldID */))
+		descr = schema.NewSchema(sc)
+		assert.True(t, descr.HasRepeatedFields())
+	})
+}
+
+func ExamplePrintSchema() {
+	fields := schema.FieldList{schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */)}
+	item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)
+	item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Required, 5 /* fieldID */)
+	list := schema.MustGroup(schema.NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2}, schema.ConvertedTypes.List, 3 /* fieldID */))
+	bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, 2 /* fieldID */))
+	fields = append(fields, bag)
+
+	fields = append(fields,
+		schema.MustPrimitive(schema.NewPrimitiveNodeConverted("c" /* name */, parquet.Repetitions.Required, parquet.Types.Int32, schema.ConvertedTypes.Decimal, 0 /* type len */, 3 /* precision */, 2 /* scale */, 6 /* fieldID */)),
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("d" /* name */, parquet.Repetitions.Required, schema.NewDecimalLogicalType(10 /* precision */, 5 /* scale */), parquet.Types.Int64, -1 /* type len */, 7 /* fieldID */)))
+
+	sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
+	schema.PrintSchema(sc, os.Stdout, 2)
+
+	// Output:
+	// repeated group field_id=0 schema {
+	//   required int32 field_id=1 a;
+	//   optional group field_id=2 bag {
+	//     repeated group field_id=3 b (List) {
+	//       optional int64 field_id=4 item1;
+	//       required boolean field_id=5 item2;
+	//     }
+	//   }
+	//   required int32 field_id=6 c (Decimal(precision=3, scale=2));
+	//   required int64 field_id=7 d (Decimal(precision=10, scale=5));
+	// }
+}
+
+func TestPanicSchemaNodeCreation(t *testing.T) {
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("map" /* name */, parquet.Repetitions.Required, schema.MapLogicalType{}, parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
+	}, "nested logical type on non-group node")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("string" /* name */, parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.Boolean, -1 /* type len */, -1 /* fieldID */))
+	}, "incompatible primitive type")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("interval" /* name */, parquet.Repetitions.Required, schema.IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 11 /* type len */, -1 /* fieldID */))
+	}, "incompatible primitive length")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("decimal" /* name */, parquet.Repetitions.Required, schema.NewDecimalLogicalType(16, 6), parquet.Types.Int32, -1 /* type len */, -1 /* fieldID */))
+	}, "primitive too small for given precision")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("uuid" /* name */, parquet.Repetitions.Required, schema.UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 64 /* type len */, -1 /* fieldID */))
+	}, "incompatible primitive length")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("negative_len" /* name */, parquet.Repetitions.Required, schema.NoLogicalType{}, parquet.Types.FixedLenByteArray, -16 /* type len */, -1 /* fieldID */))
+	}, "non-positive length for fixed length binary")
+
+	assert.Panics(t, func() {
+		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("zero_len" /* name */, parquet.Repetitions.Required, schema.NoLogicalType{}, parquet.Types.FixedLenByteArray, 0 /* type len */, -1 /* fieldID */))
+	}, "non-positive length for fixed length binary")
+
+	assert.Panics(t, func() {
+		schema.MustGroup(schema.NewGroupNodeLogical("list" /* name */, parquet.Repetitions.Repeated, schema.FieldList{}, schema.JSONLogicalType{}, -1 /* fieldID */))
+	}, "non-nested logical type on group node")
+}
+
+func TestNullLogicalConvertsToNone(t *testing.T) {
+	var (
+		empty schema.LogicalType
+		n     schema.Node
+	)
+	assert.NotPanics(t, func() {
+		n = schema.MustPrimitive(schema.NewPrimitiveNodeLogical("value" /* name */, parquet.Repetitions.Required, empty, parquet.Types.Double, -1 /* type len */, -1 /* fieldID */))
+	})
+	assert.True(t, n.LogicalType().IsNone())
+	assert.Equal(t, schema.ConvertedTypes.None, n.ConvertedType())
+	assert.NotPanics(t, func() {
+		n = schema.MustGroup(schema.NewGroupNodeLogical("items" /* name */, parquet.Repetitions.Repeated, schema.FieldList{}, empty, -1 /* fieldID */))
+	})
+	assert.True(t, n.LogicalType().IsNone())
+	assert.Equal(t, schema.ConvertedTypes.None, n.ConvertedType())
+}

From fa47050e497c946800d324f222e32f814fc87785 Mon Sep 17 00:00:00 2001
From: liyafan82 <fan_li_ya@foxmail.com>
Date: Wed, 19 May 2021 22:52:12 -0700
Subject: [PATCH 272/719] ARROW-12310: [Java] ValueVector#getObject should
 support covariance for complex types

Currently, the `ValueVector#getObject` API supports covariance for primitive types.
For example, `IntVector#getObject` returns `Integer` while `BitVector#getObject` returns `Boolean`.

For complex types, we should also support covariance. For example, `ListVector#getObject` should return a List

This will help reduce unnecessary casts, and enforce type safety.

Closes #9964 from liyafan82/fly_0408_cv

Authored-by: liyafan82 <fan_li_ya@foxmail.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 .../java/org/apache/arrow/AvroTestBase.java   |  5 +-
 .../TestHashTableDictionaryEncoder.java       |  2 +-
 .../vector/complex/FixedSizeListVector.java   |  2 +-
 .../arrow/vector/complex/LargeListVector.java |  2 +-
 .../arrow/vector/complex/ListVector.java      |  2 +-
 .../complex/NonNullableStructVector.java      |  2 +-
 .../arrow/vector/complex/StructVector.java    |  3 +-
 .../arrow/vector/TestDictionaryVector.java    | 47 ++++++++--------
 .../arrow/vector/TestFixedSizeListVector.java | 31 ++++++-----
 .../arrow/vector/TestLargeListVector.java     | 54 +++++++++----------
 .../apache/arrow/vector/TestListVector.java   | 48 ++++++++---------
 .../apache/arrow/vector/TestMapVector.java    | 28 +++++-----
 .../apache/arrow/vector/ipc/BaseFileTest.java |  3 +-
 13 files changed, 113 insertions(+), 116 deletions(-)

diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
index f24f0f1a07d..a00cd7704d4 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
@@ -36,7 +36,6 @@
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.StructVector;
-import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.Text;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -88,7 +87,7 @@ protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Excepti
   protected void checkArrayResult(List<List<?>> expected, ListVector vector) {
     assertEquals(expected.size(), vector.getValueCount());
     for (int i = 0; i < expected.size(); i++) {
-      checkArrayElement(expected.get(i), (JsonStringArrayList) vector.getObject(i));
+      checkArrayElement(expected.get(i), vector.getObject(i));
     }
   }
 
@@ -177,7 +176,7 @@ protected void checkArrayResult(List<List<?>> expected, List<ListVector> vectors
     int index = 0;
     for (ListVector vector : vectors) {
       for (int i = 0; i < vector.getValueCount(); i++) {
-        checkArrayElement(expected.get(index++), (JsonStringArrayList) vector.getObject(i));
+        checkArrayElement(expected.get(index++), vector.getObject(i));
       }
     }
   }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
index 56fdfe96993..dd22ac96fac 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -250,7 +250,7 @@ public void testEncodeStrings() {
 
         assertEquals(vector.getValueCount(), decoded.getValueCount());
         for (int i = 0; i < 5; i++) {
-          assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
+          assertEquals(vector.getObject(i), decoded.getObject(i));
         }
       }
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 67673051a89..c22cba43c56 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -445,7 +445,7 @@ public ArrowBuf getOffsetBuffer() {
   }
 
   @Override
-  public Object getObject(int index) {
+  public List<?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
index ef70a012ce9..31e30cc44d4 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
@@ -822,7 +822,7 @@ protected void invalidateReader() {
    * @return Object at given position
    */
   @Override
-  public Object getObject(int index) {
+  public List<?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index cd77b94e701..7e969263cb9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -699,7 +699,7 @@ protected void invalidateReader() {
    * @return Object at given position
    */
   @Override
-  public Object getObject(int index) {
+  public List<?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
index 436b4d170c3..4da2668121a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
@@ -315,7 +315,7 @@ public int getValueCapacity() {
   }
 
   @Override
-  public Object getObject(int index) {
+  public Map<String, ?> getObject(int index) {
     Map<String, Object> vv = new JsonStringHashMap<>();
     for (String child : getChildFieldNames()) {
       ValueVector v = getChild(child);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index 60ac2432a6c..18d8eec615d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -23,6 +23,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
@@ -507,7 +508,7 @@ public ArrowBuf getOffsetBuffer() {
   }
 
   @Override
-  public Object getObject(int index) {
+  public Map<String, ?> getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     } else {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
index e8fc444d14a..165cb7bad3e 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
@@ -29,6 +29,7 @@
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.function.ToIntBiFunction;
 
@@ -51,8 +52,6 @@
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.FieldType;
-import org.apache.arrow.vector.util.JsonStringArrayList;
-import org.apache.arrow.vector.util.JsonStringHashMap;
 import org.apache.arrow.vector.util.Text;
 import org.junit.After;
 import org.junit.Before;
@@ -654,17 +653,17 @@ public void testEncodeListSubField() {
         assertEquals(ListVector.class, encoded.getClass());
 
         assertEquals(6, encoded.getValueCount());
-        int[] realValue1 = convertListToIntArray((JsonStringArrayList) encoded.getObject(0));
+        int[] realValue1 = convertListToIntArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
-        int[] realValue2 = convertListToIntArray((JsonStringArrayList) encoded.getObject(1));
+        int[] realValue2 = convertListToIntArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
-        int[] realValue3 = convertListToIntArray((JsonStringArrayList) encoded.getObject(2));
+        int[] realValue3 = convertListToIntArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue3));
-        int[] realValue4 = convertListToIntArray((JsonStringArrayList) encoded.getObject(3));
+        int[] realValue4 = convertListToIntArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue4));
-        int[] realValue5 = convertListToIntArray((JsonStringArrayList) encoded.getObject(4));
+        int[] realValue5 = convertListToIntArray(encoded.getObject(4));
         assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue5));
-        int[] realValue6 = convertListToIntArray((JsonStringArrayList) encoded.getObject(5));
+        int[] realValue6 = convertListToIntArray(encoded.getObject(5));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue6));
 
         // now run through the decoder and verify we get the original back
@@ -732,13 +731,13 @@ public void testEncodeFixedSizeListSubField() {
         assertEquals(FixedSizeListVector.class, encoded.getClass());
 
         assertEquals(4, encoded.getValueCount());
-        int[] realValue1 = convertListToIntArray((JsonStringArrayList) encoded.getObject(0));
+        int[] realValue1 = convertListToIntArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
-        int[] realValue2 = convertListToIntArray((JsonStringArrayList) encoded.getObject(1));
+        int[] realValue2 = convertListToIntArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
-        int[] realValue3 = convertListToIntArray((JsonStringArrayList) encoded.getObject(2));
+        int[] realValue3 = convertListToIntArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new int[] {2, 3}, realValue3));
-        int[] realValue4 = convertListToIntArray((JsonStringArrayList) encoded.getObject(3));
+        int[] realValue4 = convertListToIntArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new int[] {0, 1}, realValue4));
 
         // now run through the decoder and verify we get the original back
@@ -799,15 +798,15 @@ public void testEncodeStructSubField() {
         assertEquals(StructVector.class, encoded.getClass());
 
         assertEquals(5, encoded.getValueCount());
-        Object[] realValue1 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(0));
+        Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new Object[] {0, 1}, realValue1));
-        Object[] realValue2 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(1));
+        Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new Object[] {1, 2}, realValue2));
-        Object[] realValue3 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(2));
+        Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new Object[] {2, 0}, realValue3));
-        Object[] realValue4 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(3));
+        Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new Object[] {0, 0}, realValue4));
-        Object[] realValue5 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(4));
+        Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
         assertTrue(Arrays.equals(new Object[] {3, 0}, realValue5));
 
         // now run through the decoder and verify we get the original back
@@ -856,15 +855,15 @@ public void testEncodeStructSubFieldWithCertainColumns() {
         assertEquals(StructVector.class, encoded.getClass());
 
         assertEquals(5, encoded.getValueCount());
-        Object[] realValue1 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(0));
+        Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
         assertTrue(Arrays.equals(new Object[] {0, new Text("baz")}, realValue1));
-        Object[] realValue2 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(1));
+        Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
         assertTrue(Arrays.equals(new Object[] {1, new Text("bar")}, realValue2));
-        Object[] realValue3 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(2));
+        Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
         assertTrue(Arrays.equals(new Object[] {2, new Text("foo")}, realValue3));
-        Object[] realValue4 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(3));
+        Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
         assertTrue(Arrays.equals(new Object[] {0, new Text("foo")}, realValue4));
-        Object[] realValue5 = convertMapValuesToArray((JsonStringHashMap) encoded.getObject(4));
+        Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
         assertTrue(Arrays.equals(new Object[] {3, new Text("foo")}, realValue5));
 
         // now run through the decoder and verify we get the original back
@@ -982,7 +981,7 @@ public void testDictionaryUIntOverflow() {
     }
   }
 
-  private int[] convertListToIntArray(JsonStringArrayList list) {
+  private int[] convertListToIntArray(List list) {
     int[] values = new int[list.size()];
     for (int i = 0; i < list.size(); i++) {
       values[i] = (int) list.get(i);
@@ -990,7 +989,7 @@ private int[] convertListToIntArray(JsonStringArrayList list) {
     return values;
   }
 
-  private Object[] convertMapValuesToArray(JsonStringHashMap map) {
+  private Object[] convertMapValuesToArray(Map map) {
     Object[] values = new Object[map.size()];
     Iterator valueIterator = map.values().iterator();
     for (int i = 0; i < map.size(); i++) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
index c8bb37132ca..365789e04c8 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -37,7 +37,6 @@
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
-import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Assert;
@@ -293,11 +292,11 @@ public void testUnionFixedSizeListWriter() throws Exception {
 
       assertEquals(3, vector1.getValueCount());
 
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      int[] realValue1 = convertListToIntArray(vector1.getObject(0));
       assertTrue(Arrays.equals(values1, realValue1));
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      int[] realValue2 = convertListToIntArray(vector1.getObject(1));
       assertTrue(Arrays.equals(values2, realValue2));
-      int[] realValue3 = convertListToIntArray((JsonStringArrayList) vector1.getObject(2));
+      int[] realValue3 = convertListToIntArray(vector1.getObject(2));
       assertTrue(Arrays.equals(values3, realValue3));
     }
   }
@@ -366,9 +365,9 @@ public void testWriteIllegalData() throws Exception {
       writer1.setValueCount(3);
 
       assertEquals(3, vector1.getValueCount());
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      int[] realValue1 = convertListToIntArray(vector1.getObject(0));
       assertTrue(Arrays.equals(values1, realValue1));
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      int[] realValue2 = convertListToIntArray(vector1.getObject(1));
       assertTrue(Arrays.equals(values2, realValue2));
     }
   }
@@ -395,9 +394,9 @@ public void testSplitAndTransfer() throws Exception {
       FixedSizeListVector targetVector = (FixedSizeListVector) transferPair.getTo();
 
       assertEquals(2, targetVector.getValueCount());
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) targetVector.getObject(0));
+      int[] realValue1 = convertListToIntArray(targetVector.getObject(0));
       assertTrue(Arrays.equals(values1, realValue1));
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) targetVector.getObject(1));
+      int[] realValue2 = convertListToIntArray(targetVector.getObject(1));
       assertTrue(Arrays.equals(values2, realValue2));
 
       targetVector.clear();
@@ -425,12 +424,12 @@ public void testZeroWidthVector() {
 
       assertEquals(4, vector1.getValueCount());
 
-      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      int[] realValue1 = convertListToIntArray(vector1.getObject(0));
       assertArrayEquals(values1, realValue1);
-      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      int[] realValue2 = convertListToIntArray(vector1.getObject(1));
       assertArrayEquals(values2, realValue2);
       assertNull(vector1.getObject(2));
-      int[] realValue4 = convertListToIntArray((JsonStringArrayList) vector1.getObject(3));
+      int[] realValue4 = convertListToIntArray(vector1.getObject(3));
       assertArrayEquals(values4, realValue4);
     }
   }
@@ -456,18 +455,18 @@ public void testVectorWithNulls() {
 
       assertEquals(4, vector1.getValueCount());
 
-      List realValue1 = (JsonStringArrayList) vector1.getObject(0);
+      List realValue1 = vector1.getObject(0);
       assertEquals(values1, realValue1);
-      List realValue2 = (JsonStringArrayList) vector1.getObject(1);
+      List realValue2 = vector1.getObject(1);
       assertEquals(values2, realValue2);
-      List realValue3 = (JsonStringArrayList) vector1.getObject(2);
+      List realValue3 = vector1.getObject(2);
       assertEquals(values3, realValue3);
-      List realValue4 = (JsonStringArrayList) vector1.getObject(3);
+      List realValue4 = vector1.getObject(3);
       assertEquals(values4, realValue4);
     }
   }
 
-  private int[] convertListToIntArray(JsonStringArrayList list) {
+  private int[] convertListToIntArray(List list) {
     int[] values = new int[list.size()];
     for (int i = 0; i < list.size(); i++) {
       values[i] = (int) list.get(i);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
index fc2a78597f3..c1d60da4d59 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
@@ -101,9 +101,9 @@ public void testCopyFrom() throws Exception {
       Object result = outVector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
       assertEquals(3, resultSet.size());
-      assertEquals(new Long(1), (Long) resultSet.get(0));
-      assertEquals(new Long(2), (Long) resultSet.get(1));
-      assertEquals(new Long(3), (Long) resultSet.get(2));
+      assertEquals(new Long(1), resultSet.get(0));
+      assertEquals(new Long(2), resultSet.get(1));
+      assertEquals(new Long(3), resultSet.get(2));
 
       /* index 1 */
       result = outVector.getObject(1);
@@ -220,37 +220,37 @@ public void testSetLastSetUsage() throws Exception {
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
-      Object actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      Long actual = dataVector.getObject(offset);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
@@ -323,7 +323,7 @@ public void testSplitAndTransfer() throws Exception {
 
       int index = 0;
       int offset = 0;
-      Object actual = null;
+      Long actual = null;
 
       /* index 0 */
       assertFalse(listVector.isNull(index));
@@ -331,13 +331,13 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       /* index 1 */
       index++;
@@ -346,10 +346,10 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       /* index 2 */
       index++;
@@ -358,16 +358,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(18), (Long) actual);
+      assertEquals(new Long(18), actual);
 
       /* index 3 */
       index++;
@@ -376,7 +376,7 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(19), (Long) actual);
+      assertEquals(new Long(19), actual);
 
       /* index 4 */
       index++;
@@ -385,16 +385,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(20), (Long) actual);
+      assertEquals(new Long(20), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(21), (Long) actual);
+      assertEquals(new Long(21), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(22), (Long) actual);
+      assertEquals(new Long(22), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(23), (Long) actual);
+      assertEquals(new Long(23), actual);
 
       /* index 5 */
       index++;
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index b684efd86c4..ffeedf04d03 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -219,37 +219,37 @@ public void testSetLastSetUsage() throws Exception {
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
-      Object actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      Long actual = dataVector.getObject(offset);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
@@ -322,7 +322,7 @@ public void testSplitAndTransfer() throws Exception {
 
       int index = 0;
       int offset = 0;
-      Object actual = null;
+      Long actual = null;
 
       /* index 0 */
       assertFalse(listVector.isNull(index));
@@ -330,13 +330,13 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), (Long) actual);
+      assertEquals(new Long(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), (Long) actual);
+      assertEquals(new Long(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), (Long) actual);
+      assertEquals(new Long(12), actual);
 
       /* index 1 */
       index++;
@@ -345,10 +345,10 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), (Long) actual);
+      assertEquals(new Long(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), (Long) actual);
+      assertEquals(new Long(14), actual);
 
       /* index 2 */
       index++;
@@ -357,16 +357,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), (Long) actual);
+      assertEquals(new Long(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), (Long) actual);
+      assertEquals(new Long(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), (Long) actual);
+      assertEquals(new Long(17), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(18), (Long) actual);
+      assertEquals(new Long(18), actual);
 
       /* index 3 */
       index++;
@@ -375,7 +375,7 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(19), (Long) actual);
+      assertEquals(new Long(19), actual);
 
       /* index 4 */
       index++;
@@ -384,16 +384,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(20), (Long) actual);
+      assertEquals(new Long(20), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(21), (Long) actual);
+      assertEquals(new Long(21), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(22), (Long) actual);
+      assertEquals(new Long(22), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(23), (Long) actual);
+      assertEquals(new Long(23), actual);
 
       /* index 5 */
       index++;
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
index 6d699a456bd..9637021dbda 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -342,15 +342,15 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(10L, getResultKey(result));
       assertEquals(1.0, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(11L, getResultKey(result));
       assertEquals(1.1, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(12L, getResultKey(result));
       assertEquals(1.2, getResultValue(result));
 
@@ -360,11 +360,11 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(13L, getResultKey(result));
       assertEquals(1.3, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(14L, getResultKey(result));
       assertEquals(1.4, getResultValue(result));
 
@@ -374,19 +374,19 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(15L, getResultKey(result));
       assertEquals(1.5, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(16L, getResultKey(result));
       assertEquals(1.6, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(17L, getResultKey(result));
       assertEquals(1.7, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(18L, getResultKey(result));
       assertEquals(1.8, getResultValue(result));
 
@@ -396,7 +396,7 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(19L, getResultKey(result));
       assertEquals(1.9, getResultValue(result));
 
@@ -406,19 +406,19 @@ public void testSplitAndTransfer() throws Exception {
       offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(20L, getResultKey(result));
       assertEquals(2.0, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(21L, getResultKey(result));
       assertEquals(2.1, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(22L, getResultKey(result));
       assertEquals(2.2, getResultValue(result));
       offset++;
-      result = (Map<?, ?>) dataVector.getObject(offset);
+      result = dataVector.getObject(offset);
       assertEquals(23L, getResultKey(result));
       assertEquals(2.3, getResultValue(result));
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
index f1fcb830267..383331b691c 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -648,8 +648,7 @@ protected void validateVarBinary(int count, VectorSchemaRoot root) {
     int numVarBinaryValues = 0;
     for (int i = 0; i < count; i++) {
       expectedArray[i] = (byte) i;
-      Object obj = listVector.getObject(i);
-      List<?> objList = (List) obj;
+      List<?> objList = listVector.getObject(i);
       if (i % 3 == 0) {
         Assert.assertTrue(objList.isEmpty());
       } else {

From 8b0e04961790003abb32ef03e19a4604060e88ec Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Thu, 20 May 2021 16:20:02 +0800
Subject: [PATCH 273/719] MINOR: [C++] Fix decimal doc typo (#10363)

---
 cpp/src/arrow/type_fwd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 1c953583c3b..7e564106bbe 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -449,8 +449,8 @@ std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
 
 /// \brief Create a DecimalType instance depending on the precision
 ///
-/// If the precision is greater than 38, a Decimal128Type is returned,
-/// otherwise a Decimal256Type.
+/// If the precision is greater than 38, a Decimal256Type is returned,
+/// otherwise a Decimal128Type.
 ARROW_EXPORT
 std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
 

From d07f30ada67780251e3f053aaf4dd32eb6450cec Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 20 May 2021 10:44:09 -0400
Subject: [PATCH 274/719] ARROW-12715: [C++][Python] Add SQL LIKE match kernel

Implements a simple SQL LIKE pattern match kernel by translating it to a regex (or substring) match as appropriate.

Closes #10356 from lidavidm/arrow-12715

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/compute/kernels/scalar_string.cc    | 99 +++++++++++++++++++
 .../compute/kernels/scalar_string_test.cc     | 55 +++++++++++
 docs/source/cpp/compute.rst                   | 25 +++--
 docs/source/python/api/compute.rst            |  1 +
 python/pyarrow/compute.py                     | 23 +++++
 python/pyarrow/tests/test_compute.py          |  7 ++
 6 files changed, 202 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 65196b2a491..1475379391e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -494,6 +494,95 @@ const FunctionDoc match_substring_regex_doc(
      "position.\n"
      "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
     {"strings"}, "MatchSubstringOptions");
+
+// SQL LIKE match
+
+/// Convert a SQL-style LIKE pattern (using '%' and '_') into a regex pattern
+std::string MakeLikeRegex(const MatchSubstringOptions& options) {
+  // Allow . to match \n
+  std::string like_pattern = "(?s:^";
+  like_pattern.reserve(options.pattern.size() + 7);
+  bool escaped = false;
+  for (const char c : options.pattern) {
+    if (!escaped && c == '%') {
+      like_pattern.append(".*");
+    } else if (!escaped && c == '_') {
+      like_pattern.append(".");
+    } else if (!escaped && c == '\\') {
+      escaped = true;
+    } else {
+      switch (c) {
+        case '.':
+        case '?':
+        case '+':
+        case '*':
+        case '^':
+        case '$':
+        case '\\':
+        case '[':
+        case '{':
+        case '(':
+        case ')':
+        case '|': {
+          like_pattern.push_back('\\');
+          like_pattern.push_back(c);
+          escaped = false;
+          break;
+        }
+        default: {
+          like_pattern.push_back(c);
+          escaped = false;
+          break;
+        }
+      }
+    }
+  }
+  like_pattern.append("$)");
+  return like_pattern;
+}
+
+// A LIKE pattern matching this regex can be translated into a substring search.
+static RE2 kLikePatternIsSubstringMatch("%+([^%_]*)%+");
+
+// Evaluate a SQL-like LIKE pattern by translating it to a regexp or
+// substring search as appropriate. See what Apache Impala does:
+// https://github.com/apache/impala/blob/9c38568657d62b6f6d7b10aa1c721ba843374dd8/be/src/exprs/like-predicate.cc
+// Note that Impala optimizes more cases (e.g. prefix match) but we
+// don't have kernels for those.
+template <typename StringType>
+struct MatchLike {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto original_options = MatchSubstringState::Get(ctx);
+    auto original_state = ctx->state();
+
+    Status status;
+    std::string pattern;
+    if (re2::RE2::FullMatch(original_options.pattern, kLikePatternIsSubstringMatch,
+                            &pattern)) {
+      MatchSubstringOptions converted_options{pattern};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainSubstringMatcher>::Exec(ctx, batch, out);
+    } else {
+      MatchSubstringOptions converted_options{MakeLikeRegex(original_options)};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, RegexSubstringMatcher>::Exec(ctx, batch, out);
+    }
+    ctx->SetState(original_state);
+    return status;
+  }
+};
+
+const FunctionDoc match_like_doc(
+    "Match strings against SQL-style LIKE pattern",
+    ("For each string in `strings`, emit true iff it fully matches a given pattern "
+     "at any position. That is, '%' will match any number of characters, '_' will "
+     "match exactly one character, and any other character matches itself. To "
+     "match a literal '%', '_', or '\\', precede the character with a backslash.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+
 #endif
 
 void AddMatchSubstring(FunctionRegistry* registry) {
@@ -518,6 +607,16 @@ void AddMatchSubstring(FunctionRegistry* registry) {
         func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+  {
+    auto func =
+        std::make_shared<ScalarFunction>("match_like", Arity::Unary(), &match_like_doc);
+    auto exec_32 = MatchLike<StringType>::Exec;
+    auto exec_64 = MatchLike<LargeStringType>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 #endif
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index a59634b7be8..c20af503ca9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -388,6 +388,61 @@ TYPED_TEST(TestStringKernels, MatchSubstringRegexInvalid) {
       Invalid, ::testing::HasSubstr("Invalid regular expression: missing ]"),
       CallFunction("match_substring_regex", {input}, &options));
 }
+
+TYPED_TEST(TestStringKernels, MatchLike) {
+  auto inputs = R"(["foo", "bar", "foobar", "barfoo", "o", "\nfoo", "foo\n", null])";
+
+  MatchSubstringOptions prefix_match{"foo%"};
+  this->CheckUnary("match_like", "[]", boolean(), "[]", &prefix_match);
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, false, true, false, false, false, true, null]", &prefix_match);
+
+  MatchSubstringOptions suffix_match{"%foo"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, false, false, true, false, true, false, null]", &suffix_match);
+
+  MatchSubstringOptions substring_match{"%foo%"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, false, true, true, false, true, true, null]",
+                   &substring_match);
+
+  MatchSubstringOptions trivial_match{"%%"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[true, true, true, true, true, true, true, null]", &trivial_match);
+
+  MatchSubstringOptions regex_match{"foo%bar"};
+  this->CheckUnary("match_like", inputs, boolean(),
+                   "[false, false, true, false, false, false, false, null]",
+                   &regex_match);
+}
+
+TYPED_TEST(TestStringKernels, MatchLikeEscaping) {
+  auto inputs = R"(["%%foo", "_bar", "({", "\\baz"])";
+
+  MatchSubstringOptions escape_percent{"\\%%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[true, false, false, false]",
+                   &escape_percent);
+
+  MatchSubstringOptions escape_underscore{"\\____"};
+  this->CheckUnary("match_like", inputs, boolean(), "[false, true, false, false]",
+                   &escape_underscore);
+
+  MatchSubstringOptions escape_regex{"(%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[false, false, true, false]",
+                   &escape_regex);
+
+  MatchSubstringOptions escape_escape{"\\\\%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[false, false, false, true]",
+                   &escape_escape);
+
+  MatchSubstringOptions special_chars{"!@#$^&*()[]{}.?"};
+  this->CheckUnary("match_like", R"(["!@#$^&*()[]{}.?"])", boolean(), "[true]",
+                   &special_chars);
+
+  MatchSubstringOptions escape_sequences{"\n\t%"};
+  this->CheckUnary("match_like", R"(["\n\tfoo\t", "\n\t", "\n"])", boolean(),
+                   "[true, true, false]", &escape_sequences);
+}
 #endif
 
 TYPED_TEST(TestStringKernels, SplitBasics) {
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index d34eeee526f..f18ed4ea0eb 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -533,28 +533,37 @@ Containment tests
 +---------------------------+------------+------------------------------------+---------------+----------------------------------------+
 | Function name             | Arity      | Input types                        | Output type   | Options class                          |
 +===========================+============+====================================+===============+========================================+
-| match_substring           | Unary      | String-like                        | Boolean (1)   | :struct:`MatchSubstringOptions`        |
+| match_like                | Unary      | String-like                        | Boolean (1)   | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| match_substring_regex     | Unary      | String-like                        | Boolean (2)   | :struct:`MatchSubstringOptions`        |
+| match_substring           | Unary      | String-like                        | Boolean (2)   | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (3)     | :struct:`SetLookupOptions`             |
+| match_substring_regex     | Unary      | String-like                        | Boolean (3)   | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+---------------+----------------------------------------+
+| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (4)     | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |               |                                        |
 +---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (4)   | :struct:`SetLookupOptions`             |
+| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (5)   | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |               |                                        |
 +---------------------------+------------+------------------------------------+---------------+----------------------------------------+
 
-* \(1) Output is true iff :member:`MatchSubstringOptions::pattern`
-  is a substring of the corresponding input element.
+* \(1) Output is true iff the SQL-style LIKE pattern
+  :member:`MatchSubstringOptions::pattern` fully matches the
+  corresponding input element. That is, ``%`` will match any number of
+  characters, ``_`` will match exactly one character, and any other
+  character matches itself. To match a literal percent sign or
+  underscore, precede the character with a backslash.
 
 * \(2) Output is true iff :member:`MatchSubstringOptions::pattern`
+  is a substring of the corresponding input element.
+
+* \(3) Output is true iff :member:`MatchSubstringOptions::pattern`
   matches the corresponding input element at any position.
 
-* \(3) Output is the index of the corresponding input element in
+* \(4) Output is the index of the corresponding input element in
   :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
   output is null.
 
-* \(4) Output is true iff the corresponding input element is equal to one
+* \(5) Output is true iff the corresponding input element is equal to one
   of the elements in :member:`SetLookupOptions::value_set`.
 
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 56ccb4ae1ef..2e841c54886 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -158,6 +158,7 @@ Containment tests
 
    index_in
    is_in
+   match_like
    match_substring
    match_substring_regex
 
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index ec38710b023..18d7fee8df0 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -289,6 +289,29 @@ def cast(arr, target_type, safe=True):
     return call_function("cast", [arr], options)
 
 
+def match_like(array, pattern):
+    """
+    Test if the SQL-style LIKE pattern *pattern* matches a value of a
+    string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        SQL-style LIKE pattern. '%' will match any number of
+        characters, '_' will match exactly one character, and all
+        other characters match themselves. To match a literal percent
+        sign or underscore, precede the character with a backslash.
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+
+    """
+    return call_function("match_like", [array],
+                         MatchSubstringOptions(pattern))
+
+
 def match_substring(array, pattern):
     """
     Test if substring *pattern* is contained within a value of a string array.
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 8e045fb4f2d..fc87b2b4a19 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -272,6 +272,13 @@ def test_variance():
     assert pc.variance(data, ddof=1).as_py() == 6.0
 
 
+def test_match_like():
+    arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
+    result = pc.match_like(arr, r"_a\%%")
+    expected = pa.array([False, True, False, True, None])
+    assert expected.equals(result)
+
+
 def test_match_substring():
     arr = pa.array(["ab", "abc", "ba", None])
     result = pc.match_substring(arr, "ab")

From c697a41ab9c11511113e7387fe4710df920c36ed Mon Sep 17 00:00:00 2001
From: Michal Nowakiewicz <michal@ursacomputing.com>
Date: Thu, 20 May 2021 17:42:32 -0400
Subject: [PATCH 275/719] ARROW-12010: [C++][Compute] Improve performance of
 the hash table used in GroupIdentifier

This is the draft version of the code implementing functionality for mapping arbitrary set of input columns considered a key in grouping operation into a vector containing integer group identifiers (same combinations of input key columns get same ids).

I will continue working on it and updating it with:
- integration with initial hash group by implementation in Arrow project, once it is finished and merged into master
- unit tests
- documentation

At this point group ids, row ids, offsets, hash values are 32-bit. The overflow checks are missing in current version and still need to be fixed.

The entry point for id mapping is GroupBy class. It uses three main modules: storage defined in groupby_storage* files, hash defined in groupby_hash* files and hash table defined in groupby_map* files. Key values stored with the hash table are row oriented. Storage part of the code defines functions converting from column oriented storage to row oriented storage and back. It also implements comparison and appending keys to the incremental store.

I plan to add design doc in a form of a readme file later on.

The individual modules and functions present here have been tested with unit tests and are passing them but unit tests are not included in this change yet.

Closes #9768 from michalursa/ARROW-12010-GroupIdentifier

Lead-authored-by: "Michal Nowakiewicz <michal@ursacomputing.com>"
Co-authored-by: michalursa <michal@ursacomputing.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                  |   64 +-
 .../arrow/compute/exec/doc/img/key_map_1.jpg  |  Bin 0 -> 53790 bytes
 .../arrow/compute/exec/doc/img/key_map_10.jpg |  Bin 0 -> 69625 bytes
 .../arrow/compute/exec/doc/img/key_map_11.jpg |  Bin 0 -> 60687 bytes
 .../arrow/compute/exec/doc/img/key_map_2.jpg  |  Bin 0 -> 43971 bytes
 .../arrow/compute/exec/doc/img/key_map_3.jpg  |  Bin 0 -> 59985 bytes
 .../arrow/compute/exec/doc/img/key_map_4.jpg  |  Bin 0 -> 56289 bytes
 .../arrow/compute/exec/doc/img/key_map_5.jpg  |  Bin 0 -> 61950 bytes
 .../arrow/compute/exec/doc/img/key_map_6.jpg  |  Bin 0 -> 43687 bytes
 .../arrow/compute/exec/doc/img/key_map_7.jpg  |  Bin 0 -> 43687 bytes
 .../arrow/compute/exec/doc/img/key_map_8.jpg  |  Bin 0 -> 48054 bytes
 .../arrow/compute/exec/doc/img/key_map_9.jpg  |  Bin 0 -> 52894 bytes
 cpp/src/arrow/compute/exec/doc/key_map.md     |  223 +++
 cpp/src/arrow/compute/exec/key_compare.cc     |  267 +++
 cpp/src/arrow/compute/exec/key_compare.h      |  101 +
 .../arrow/compute/exec/key_compare_avx2.cc    |  188 ++
 cpp/src/arrow/compute/exec/key_encode.cc      | 1625 +++++++++++++++++
 cpp/src/arrow/compute/exec/key_encode.h       |  627 +++++++
 cpp/src/arrow/compute/exec/key_encode_avx2.cc |  545 ++++++
 cpp/src/arrow/compute/exec/key_hash.cc        |  238 +++
 cpp/src/arrow/compute/exec/key_hash.h         |   94 +
 cpp/src/arrow/compute/exec/key_hash_avx2.cc   |  248 +++
 cpp/src/arrow/compute/exec/key_map.cc         |  603 ++++++
 cpp/src/arrow/compute/exec/key_map.h          |  172 ++
 cpp/src/arrow/compute/exec/key_map_avx2.cc    |  407 +++++
 cpp/src/arrow/compute/exec/util.cc            |  234 +++
 cpp/src/arrow/compute/exec/util.h             |  173 ++
 cpp/src/arrow/compute/exec/util_avx2.cc       |  217 +++
 .../arrow/compute/kernels/hash_aggregate.cc   |  303 ++-
 .../compute/kernels/hash_aggregate_test.cc    |   48 +-
 cpp/src/arrow/dataset/partition_test.cc       |   25 +-
 31 files changed, 6358 insertions(+), 44 deletions(-)
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_1.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_10.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_11.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_2.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_3.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_4.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_5.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_6.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_7.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_8.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/img/key_map_9.jpg
 create mode 100644 cpp/src/arrow/compute/exec/doc/key_map.md
 create mode 100644 cpp/src/arrow/compute/exec/key_compare.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_compare.h
 create mode 100644 cpp/src/arrow/compute/exec/key_compare_avx2.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_encode.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_encode.h
 create mode 100644 cpp/src/arrow/compute/exec/key_encode_avx2.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_hash.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_hash.h
 create mode 100644 cpp/src/arrow/compute/exec/key_hash_avx2.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_map.cc
 create mode 100644 cpp/src/arrow/compute/exec/key_map.h
 create mode 100644 cpp/src/arrow/compute/exec/key_map_avx2.cc
 create mode 100644 cpp/src/arrow/compute/exec/util.cc
 create mode 100644 cpp/src/arrow/compute/exec/util.h
 create mode 100644 cpp/src/arrow/compute/exec/util_avx2.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index bee14ae4ce3..1d832cc25a2 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -119,6 +119,22 @@ function(ADD_ARROW_BENCHMARK REL_TEST_NAME)
                 ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
+macro(append_avx2_src SRC)
+  if(ARROW_HAVE_RUNTIME_AVX2)
+    list(APPEND ARROW_SRCS ${SRC})
+    set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
+    set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG})
+  endif()
+endmacro()
+
+macro(append_avx512_src SRC)
+  if(ARROW_HAVE_RUNTIME_AVX512)
+    list(APPEND ARROW_SRCS ${SRC})
+    set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
+    set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX512_FLAG})
+  endif()
+endmacro()
+
 set(ARROW_SRCS
     array/array_base.cc
     array/array_binary.cc
@@ -215,19 +231,9 @@ set(ARROW_SRCS
     vendored/double-conversion/diy-fp.cc
     vendored/double-conversion/strtod.cc)
 
-if(ARROW_HAVE_RUNTIME_AVX2)
-  list(APPEND ARROW_SRCS util/bpacking_avx2.cc)
-  set_source_files_properties(util/bpacking_avx2.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
-  set_source_files_properties(util/bpacking_avx2.cc PROPERTIES COMPILE_FLAGS
-                              ${ARROW_AVX2_FLAG})
-endif()
-if(ARROW_HAVE_RUNTIME_AVX512)
-  list(APPEND ARROW_SRCS util/bpacking_avx512.cc)
-  set_source_files_properties(util/bpacking_avx512.cc PROPERTIES SKIP_PRECOMPILE_HEADERS
-                              ON)
-  set_source_files_properties(util/bpacking_avx512.cc PROPERTIES COMPILE_FLAGS
-                              ${ARROW_AVX512_FLAG})
-endif()
+append_avx2_src(util/bpacking_avx2.cc)
+append_avx512_src(util/bpacking_avx512.cc)
+
 if(ARROW_HAVE_NEON)
   list(APPEND ARROW_SRCS util/bpacking_neon.cc)
 endif()
@@ -397,23 +403,21 @@ if(ARROW_COMPUTE)
               compute/kernels/vector_hash.cc
               compute/kernels/vector_nested.cc
               compute/kernels/vector_selection.cc
-              compute/kernels/vector_sort.cc)
-
-  if(ARROW_HAVE_RUNTIME_AVX2)
-    list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx2.cc)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
-                                SKIP_PRECOMPILE_HEADERS ON)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
-                                COMPILE_FLAGS ${ARROW_AVX2_FLAG})
-  endif()
-
-  if(ARROW_HAVE_RUNTIME_AVX512)
-    list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx512.cc)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
-                                SKIP_PRECOMPILE_HEADERS ON)
-    set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
-                                COMPILE_FLAGS ${ARROW_AVX512_FLAG})
-  endif()
+              compute/kernels/vector_sort.cc
+              compute/exec/key_hash.cc
+              compute/exec/key_map.cc
+              compute/exec/key_compare.cc
+              compute/exec/key_encode.cc
+              compute/exec/util.cc)
+
+  append_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
+  append_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
+
+  append_avx2_src(compute/exec/key_hash_avx2.cc)
+  append_avx2_src(compute/exec/key_map_avx2.cc)
+  append_avx2_src(compute/exec/key_compare_avx2.cc)
+  append_avx2_src(compute/exec/key_encode_avx2.cc)
+  append_avx2_src(compute/exec/util_avx2.cc)
 
   list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc)
 endif()
diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_1.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..814ad8a69f60bf71d873a34fbc0e3854b7317d0c
GIT binary patch
literal 53790
zcmeFa2Urx@wl-YkEEyy=0tzZYa;8BfizrHvD3U=yk~Faqm7D|t1*K7PmaKq)WKckY
zAVHuT0m<D+Xrbx;%Q<sq=G-&)p5r{<z5jQgk7}yeRkilswcc8__S$PF!Y9HUz@Vq4
zs|64d5dk;AA3#6~U(kfPI0Ary0dNKY07`%ypaF=%&?)fbCJ_@r0)~meA3!AW+w~|S
z$zQ@OAWeCs3qbk{4M5qp!VOS>*J#0W698b;;$8tml;D1}9U*Xpz!3sR2pl1Bguwrk
z2$;Hf*n9f>@LhKH^!4P^w0H6K^yZT|Cw5Lu;@mk22{8#-DS2t&-=mT|Cn=$LPFhh?
zmJbZb$SX?7004Os$=|IKM(!li-@@mTAN~?1Id={Kl9PWAXA}Ky-TZF3P%->?Jx7+y
z|C2&`#PksYM+h7taD>1S0>2^fAFK^IMQK^!&#r+H$cadPwJFF!j|nG023`YwCybAX
zxPA*iB@+IX2K2KK6aC$L!k9};{P!^1<6kqs@wfOtd^G?O;S*sB&;TgO$tlRmC@Cl?
zsHiBZX<6uKX=rFUn3(BVcsL=vJe=Ize1c*^eEcE;+}y%)r$x?5NJ~pYgcMZdC6&db
zq$PhOLPSMHMN30_oR03e<O%K*l0Ur>S^-8%U>dZbh)w{+j6@`iM1&3i0zM@f=w15Z
zVET0<A|@dvBd4IGqNV`}Y8gO|Nl1uENyx}ZLAMc62zVbLWh7%dA)!vre8rZ6--|`^
z?!)Jl0_Q8=vKkGc1*L9!hf`6ru^;E)Jb6k;`1BcR8Cf}b1x1Yunp)aAx_Vcy8Jn1z
zT{pM0cW`uac5(Ib^}FK_3kbY-|3O4#RCG*o%A?e@^v6#!UgYHF<rfqd6<1Z))YjF%
zdfm|0-qG3B-P7AQJTm%mZ2Z&2Boa0MWnppY>+%X_>)ZCu?jH90{tv!>@cH-cZ;bs1
zz8FEih)GFFNGX2sMMUfm?j($)WG5uZnbfaP*m^PZOWvhqIsfo^<y$HNDI+xNP46LU
zHbH6RNz4z{e&OujV=VkHarPTyzw<Q-(2@{=FOP%~fC78_!oZ6#>i_eHT<`M|JZjmr
z2RfTV)!`;BA%mQi9WJAB``2b28wf!5eZ<~6v@MLj+6w;Ryy3wv0;xSw-tzT?(BXk0
zI-6u|+-9301%Ft8y|U|8u}`T>076G_!oY#d_#vqv0cdf-vxH=<;=`HXvo!>O8c^-`
zBLM1D(9KEs12)66Tu}=t?<MqKN&rl<Me94+G=6#u1#Sm0{-+&fED0ULf=QQ;wQO<%
z{=a9M^Lw6`iT;@BAM*tMnCTz$1R#H4EAe-()F^(>AzyrwuO&k6wTsaOZw>x9#zgf^
zHTJ@6;ut{q$KiApyu|Z)Hl_6DcXD$AFwQ~%K4@P5?e>|x^!<a){Y?T;brP^B_J;R3
zPT}7yf<!slG><M5X#y-i4y;D-?^KiU*FyMrYHVMBi~XM@e(dVyx{uw_g0HIeQ??MI
z&jyoo`sC;DsNyB5mz0-y&e9~ZHyKKgp-v~u8Gm|SrQg*$51kfyLq&Ex%qN_vAWya;
zMCJ=;XJhv*%!!NV&X;Px&o1gE05C}3OU;((TkE0iZgd~+^}SO_dGC!Kn0xQ@G|mnh
zsgO<MrH@(|<@aIZOC8v{Vzz#a*xrd+&%RD6v32(mV5~2`vp2glO91Tkfck4+2tZPQ
z{b5i)0eG&h#x9&q3uU%&9XHw4&o%t=zMd@C8%DLDqd)-evaa2}ecQ{dtOyp<Ci~=8
zu7z2ua^rQt-*$_xagWJzq4DX%Q9nL*zf3D5{St~eOWiYCjd5Zok_QpG?X)2}wsCQR
zAFJT3Wo4<20W&wuh67H<A7gioytS16xp9j7P)$~v%J225CeEs`nD*Wi#0h0j83K+|
z2KuiA(lr2QatXi|GboMThKFfkqC;wAEWni?hpYAkz+_fKjlpGhi?)^kl%EDafSBsv
z?*A)x|1Kc^X$D5;rsF8QVu0uFhtJ7M&t;lX#j|<~DbLv*)QV@q34m%60hm)K04QhR
zn<oKS8iwE~p*RCxcLIRh*8p*&AOO+X<SPC9%5J;Dm%hav<`j?;0RKtEBKQslbD>xU
zI5q=No+1F>w<8GvF+Blb*8VS27JX{ez0}lKJlxyjS~sV9S#d7#GFfqf4^Qixa1`Zy
zioHi%`9Ub=YF6Yy4bc1#kCD^gLI4mta5NtQz(@lD^#tJCGCiIYf<KigO91d8CkR06
zJp$12frPFf`^4^m1$s^h>bx8WuiiBLBX__*viD!4n^EK^x@OeR;#KO}zK~QYIR6QJ
z+R4A^MB*G*6Bq6+T}5qfrT`=9NN2h*uaq5D+6;UuBQQHvIh}n!NmkB|bUyow&!!ra
zZs~GT`{KJo^V|RdAH8gQCVGV((oj~B@!dOH8r<WTyS@>CVw2l6hbn=I?=O>zmb$$2
z<mr8LZ*8l{H}gFgB`NJvfe8sF7e6)p(r`C5pO^|dF8*1*-S4Zn!X9r8pi$<M&R*g~
zw#_sxnMTEZH-Y>ZY(J(&jiDdkFnjpz6Wn*70O)1>gGMK2Cjf~(Ez_axAK{$;c#DME
zU9rDngJSnt$0Y}g@UZSWxh^8o3QO_xhStx{WWt~t86QPw7O#sYZR)%)3G+Vw^lp~k
zQpE@h5$nv|f~jc=&a%;C^+cI>hP@Y}skXNCbyLPU)=d^ZNSl?Ht}8F!2np+^OySpz
zei)(eltZe_i2bHUpRO|`5J8nOcfU@P=>9}cT#oPV*0?-xt)4i|*Lo?k`j*D&6I#Xz
zY5))?E4$V1s4$>Xt;WR}de<qR#_QJbUEp?hlFuvJrNmuL*AJx3w1XaALxZ-5BY0TR
zR}wKR7ygFVTJ<SwyJc0Vd4XF_H$tE3w2K>>m*d5Ta#G&8EPW%6aPG|Qe8kl1LF8TV
zC6pd|S{m5JSjSZ==P%=G1wGid7`&dQw&7ac6$JR~GJLI14GT3|IB#6fbD`DPlXRax
zUi7_gev_DJfQs~3M3~I4T9%R<o`2qJ2=a2rrmNsL#S5zT5_4Bs(?we+KU%#0<1brJ
zWxIG16gh$3CiHv%Jt+e(VTC)ex0hR)7v_=z=idqNv@;sFAStWe^=5YUpF~GRIFFKP
zD?LllzM-F>!g6deOsE0<3UMXtyI7@kl}L-SdYFCJ`@m!5AzmgjP<oA`nUH+lFS=|p
zI(>H^_lWq5zdmV*11^&r!H!L^nyx3`kgB#-OtX5`D-`EGx3rNEnK3@RExbK-5c}YU
z#uu$@J7*^m`0kbME30apJ6DZT=PT3u9(9V%riPIwsP&LC&JKt1EBdZoduFnhsz70{
z4F95NVW=vqg_^M_@?g9fKiX4BDfkNNr@f~MIJ#cO2Cpy^0A~s9+sdsY4P9&3?xxa(
zzty(1r}Y_r!eY%9q5a5^{qdY<fEdq{x@dX@g#li}EpkBEa+#N{Ho@ZTkVrqvE2`b}
z9x+|YY&mWn`FW+gU&bDErbkMzoA;i&uW_60I?CrP+3_SPk0px5M@h%LzS)87HksGn
zUobiQ>dE7<Y&pSXZNpZ)@uj6sPKS~+%DFwm$~UFkAxRQ;Z<p_ThALhr?@{3RVFwX|
znlSMb3{SB{@-b+q=<6ShqjFDeG$c}5!PCBa<fdrA=?okB0EEQ$pt>4o%cl&(LlTje
z%{Bt?0P@EZ;2+NZ;pE@Gb#>HN^dvvKOTPKZ4#-Nb*)>V~u-j_cbPfG(_v^>X^`nou
zBfEN@5P%q)jeBY=rz%IvJv}@e+2cl>j@KpK>WO1gwQgy&dB6YB=ee^%VK-bvZg~2+
z$qf2kuZa_B{Hp<rZQe&jBKwZ%GSlN42j>;?A37wJGTA=4vo7eqUg@=6;A-v>U()g<
z$kR_SOE(NUkxP1l_Bcs>{M&?(+bY7uXL-a8UXI$njuxWw<SM;V_9*t7!aIq!xvX>N
z$Rz_8AMcB$G)f%4mh|p(|H79H*u}TN3cErE%K%S0t!dg@uR(==9g5AcSr>-B_8|aa
zRd7)8!(G%5-yv{hDPYl5EZ4AorI)9y<+0d;{lwyAZw>KEZb}$5cJwk?ihB`9nqlq4
zf;!0|W8*wNBj{RP^0f3w`)!PpmKLY<8A<^aDrq8*UyZC^EUmvEie2Gb$I{XGn_;3K
zg<HPJfhMH&2LkZb=)>RD|Bv^dS4;r9bKyJRDDd%mK%=oJ0r+wvV8`%~{EvnFFD3mi
z2V(<6y!tR6^8PYeLmoRWVxwhs5^_jw4lMaSR@U$=wp<yz@c2%wL;XeeiLl_eDfK6+
z&S`6;T$XM=7vO=a|JKg$C?Kg`Pby2)eO-0aFnugcXbWe%pus=TS(gf%u%*~|?=!pD
zbN}|8E!b*&#j&>m?m73HnPMYeWoax{Rj<1|OIHfDTtC}7=bKGu{!20T3ky&G53tbP
zl4+50t<g%ks+*pNsE7JKK<{k@zQBCgyo}i(YQ9iVLaoK#Tgpq=;46*1V3=1DaXoMQ
z>sFFlT|Z<9?3w{<;xpdvSkL|E3!6eu@8Ro@f5BLhC)c0OF)Kr!`fAa2O2k`#ovSMJ
z^{+N9t2v%ZO!SUs{rl?zNfd%4we5ED^2^I=44Ls4b&7V++B{}P)YOqn5P*w^EBZPG
z*PSwuc3Me^X>uau<cdkqRKViT>Nmw12;99q9P}|*GkCNo*<L<PxmNI$%UM_A5fD5~
z00JHWhPwxe`=m1jVD>g>iS((?f}*LGIeV}H!7HdyOl`owG6xZW5YT+;7Cs2+J|y;p
zZ@z+K7XXMS2uQZU=HVV_CUNJi??Jbz-VlIhBmtOz1cWp%;qS_|>`g;)-ih_op!t4k
z4}Sm}RS96N;{fr891FY(XAHQ}u}Ys+(wrGRAbmHGpVg<rr^9r<Tqe7hH_gF1P#N@E
z9axJD^Fn-gT05f;Q4o3^Vev$aSaRKkRh@%J^)eY?{jb>96aP;489rVE-DjyGy7T*I
zQsml#S%Mq}za;=UMqy$f_2J(stwXMYwXAF!-`}bmztlPYR`IwB-b?ariTuVw<WZza
z08TVk?C*k%olgrBNV#5@u&aMZx31&u$)`da^EVsO-SvIygX#}c`sq+~9I~4q*Vw%`
zI9{tiOqLqYSI5Nz@~eSHC6%LVVOo=423k%p-5aF1_x);$>4@M{wT$?ji)PvJPDWFa
zSK^ighQq{uv9^sxP4UX}1fb$EblVj);fg?G_XN#gG5qUKD*$rqoc!hL8LN|BMaooO
z5n0nZg>jv-5xpD(EM8B8bNF*2HHJEdXX;*zypt?Sc1%@uZ}wCMD41Fd)xSEED9*QY
zD$645TgJrL`}`@hvnR_f*MOqw+ut{Q!X?Zh=)|ve=4FrNBv1By6S@$Rt3%p2plf~Z
zn^!pfdqq^3+<a5*6Af)2nxZc1R|>tYhp^(2)j;>6D>7f42i9T=l%lOB=F7>)mI_#i
zo{z-<XK3E}_<1fwetJ9fvGOVFjrPR0u(MSw{Jw^@ZzD{E4v+74hs^A#)HWqGRp|>S
z>rx69yK~d}_GZ)k{`!6ovHru0)c%JTDYelg$9j0!-}(N2G?hdc(WKssrH50tIG*Ba
z?5Q>@1M$UIuaedJ-ajGYj&M`}{6F7YlbHTO>{daeuCmjK`m9$qD0y+NxGK#cGcW<z
z8ecVX<7KZITVVQZAfKf6;dJ~<mUA~cjy>VqS-%?tM6|b`bt+N6Z7<H+AUhU7qns>p
z(EqS&C`kbd+wRm9zvqtY$4u*VK28=S32%3MDL<f>KA&%v0W62Lc8UV5+<Od@Z6lTj
z+!A7vTjz{9<hc<(Y8(}1%g|d7T53gZGj;{<?AQdGJg8l$MX}euj_|u0J|usG9;wFs
zyE^|p^hd@1-pw9i^;%T$U97YA#)1R>bO;?_h98K<2U)@AK%Ec|nHU1APHq)AN;oE)
z<UkSneH|=b?$YC>;Z7i?D&;L`uqM+LM!$vyYh<<Kcv2{K=nw(H?Sk%+R`9do1fbP9
z1R!hN2Fn0y4(;7w__xVbu<`}LIRp5?#NTqd(lCPWOFY-m@JQdsJ!&D{yF2=F@SE?O
z$<8E>#TEph=lr)7#6I1>R)qf(n&59A>fg%AfBahCtR5o2f3&zscZj4=F2LXWQPKZ<
z&G+}vnfw9*z_$#iUlb<(Y!r*v@VD7nr^i1K1+0EobosrUdJhCOeN~Jj)U&C@3(P{Q
zPV6dqzQP6AeGI8I<qsETza=5)_7urNw11iO(Vr<Z>wONPsx;xc?{TVD-6RSdx8bS4
zwV<-=yfU(~3f^j9aRWaOUrMhvaA6&vTzuV-aC~B3r%m&O*8>rs<dVaU$LnhB&VP7*
zN(dfm%Ymnu7y20F`ww>I?~nZ=?8IY~o7Yo^L2s!o#6bQZqJuy@7~HoPP`QHb-Ld{C
z^EY45@89hIbclj5K|o4;arX7Sttc+;;Ui{i?|IWf%+AwY9A<l4Tte)eIG_xL-L|!J
zbMWQ6>EPtzp(3=|*doN|Vy_}(Dr0cY;I@W?vx`okx5L%IOV{iI-Ru<Xg`lc*)XFeL
znEP#a2VYx0nENdcA4Ql7|1YU4g5e*R#raj0z3m+pjV@^YN&%j!@c;TO{{H@A{!(I|
z-cI5Y3JMD1=Oo1?B}G9BQJ(-0Ut5@{hmXLo30!dSvGaDh?d#&{!S^FU+nb(#zAF5F
zelGTkj<$|6H|_0ZL~q(k*^5d@*xQQQ+Dl1@+Dl2<%gIQ{OFBy2<p28vy5IgS`#*&O
z%nxKqQN!E87VQ3T&C~Oi>c7?FL0SAiM*BS-hYPm84yr#IBt*|ii%QB~`_U!hM<c}_
zBK?g<S^Nice_&SiZ&yJ4z|e1KgV6K+4{S%7{1nJXa2>(*QwaPN@sV^L!Sz!J{1ow#
zbREI<QwaPN@sV^L!Sz!J{1ow#bREI<QwaPN@sV^L!Sz!J{1ow#bREI<QwaPN@sV^L
z!Sz!J{1ow#bREI<QwaPN@sV^L!Sz!J{1ow#bREI<QwaPN@sV^L!Sz!J{1ow#bREI<
zQwaPN@sV^L!Sz!J{1ow#bREI<QwaPN@sV^L!Sz!J{1ow#bREI<QwaPN@sV^L!Sz!J
z{1ow#bREI<QwaPN@sV`>kHhu*hz$o1aF~WaI3DAdxra!IiGTco8!7llM)_kFA2MnR
z3UW$XYFb(vY8o24V@wQm#~6>%&@iwuFfy~Sva-_Bvma+;InKnw%JR$HLnL4vQZgzs
zGAb528akGr-hP>Th**jEF$od>(cD8o?f?3@hYqj4hyh0@AMHmN{J9K}bi$oCslPVM
zV%Xn?Fn1_wAu62rLM0y?7PzJr`FP5HrZDyU%vEI4ex_cacq|#;2PMr^z$(o#Nr8io
zI}?}iCAh_Bm`uE$P%Gkqdjq<|JLdM$w0<}Y9A3zwG?#eD*aE<j;&$LL%t8VXaS1Cx
z04{j8La_Ruz!BfYJAuL^eNbddGbI5S;zCp4nRcN#*F>!SW;k?P3r9o%?!QR{$7xoW
zz6ev!IZE+=7RMz+_F!hLKY9?={g%$KjTb<7Kkz%<bluNQbTKx^;`)2^GZIrf4%kCo
zqn=0?`M!IfOoz@U%%m>BIa<EF2&N(cl>_qx;L5t=8{?o5VIt=j;Lvw)$Jk6NwKa&x
zH)*16E?|y5ELfdXnT^Y5UY^V=ww&s(SO56gUu(tqWnWp(N^NdSRpQ%I1EI&Tq;cR#
z{z|*=;PgHOK%pSmkpLt|qlNI@N{NTswHZ6c>)YA%;Ji=&BxV03U~jBrfUSR)vVRh=
zl8;}r>4lF{{Pk=7zYWJ{%2sGY<ZL@M7D6fmrG$~R^RkZSV;nw~)@O*tyE{p>`O;a%
zIiNI8>htsD&Yz*TwYgBP6lj`?`!mrY`wPV+NnfZGy!Qf@nE=>5>&E9y_6HX4?wh9m
zscimC27f8xzZB`;EgAn?6aVoLe<cGgt_XTPV4?XG?jm}r4MIK(E9eYNb}<*t8WLRK
z)OK{Fj*7~zZj=DdgzL&IS<@btDW5<gI9!_9n}>r<=RP}`h&5cZ7W?2KJvypK`|?4q
zB^$}JNQ%k4!(wH~bDSF5CUTocqx0Pyg24bq!-b%At>ElqH+_&{;js1iuGk%DkD>ga
z>y{x0>>bLp{h7sE7tDNHC@p68US!nTjY^D7gt?hV&AYv%PB$N!r>)^r2#UU&>;ZG2
zqIx;H@s>v-qYvXBC5jV5hFDlOB?R2sey8i{q;D2iRpa&i4&}YHTc=yOQ|{W(;<T`l
zm{Sm1+<ENjY;2BEBI+7m<$yQ3IOKW%w7K%`dT~daMlThMRBH%v@m_1A4+fmRt%W1_
z1j>fZne%Wry#Qu0H`>s-<uXo3$+@#$lguadJjl1sTWXKfSej^eHe#nISpUWHWNnR)
zraRn?XZp(Yg`u7z{*w5W(A67pMPInb%7RbNuj`|DbL`OXEI-;}pP(R-{;Cu&lYP+d
zjJ9OVYMm|*TUvG`q+BY^lwCimZqoPcQ_;>K6LzX=mNfMEiYgt(G%i#KtM%^6n|h3K
z_k-Q9%|fpe_?Ag6^tIliZZH|2PdLz+Upy72icGC54>G-rGe3M8d;)D~IwpeCM-Fm{
zW+hb~7MRze7v4{L4ivt_KgzLP)U<}jR~on2Avix_Q?xN2sV?&n7HqhIz!l>V_HGk8
z_i|@_LA`hGw+Fvn%#H9(+I6(*ua{xRu^8en)}zvGcPXnvC?;?A?qwhQhNSN<J3CMQ
zJjAU4=GMi@QS-o23YOWv$9c<#_)~}HnXoUs*}nb0Ev(Haoe>mx-qlb^3|BvIbguf+
zXPG7~U1EE2n%Oi~lua!~%#Es1Jj9s^Tiw}U)@%`-i!KPewU}u>#wMed=W(KaN`vuM
zBe~T<>$L1sT<1K56BcQ5m_-2G49Ql~twlLy?>glg<e6i4H@M=hhdjo8N=HQEA34Tf
z*Jw7jLFfU864yxp4O(6Vf&f^Wo(2nP$7qbH_M4B=LRtD2Dq5TRspb3<2-TUZEZN8A
zy|dhw64_wz4gye(h{rRyT@F^qXvYr5G#Pjz5ezQPe9dxf<64Uxi1l-t@pWa)osITt
zr>&3ajYqi<02XE4InWdfgggkA-^VdwyHN$vBAeyP;@sLM2ZE-iOz&8W9)_0=1KB!;
zeI#~I8Hn}`G=nn_<lU?lM)1<JUvspusr2n`{l}+E5ln*mu_dCKCp9N%@xIB@o?Mk@
z)^7Kvw#rwOjQXr6NPM_V^7U6kVG9vAyLCksoG0<qk{1Ce+^x1Qe$$+~VDn$w7k@F@
z57T76St!`(1D?gDL1yF4$&TBulW+2)-FIOZ<HqI07dnoqejUV^*mTmzXK`Rn3t|2S
zADtQ-$AdL|MWxHQbnE5h(k~K;tulREc=4sWzVFpeCu|nSVtsIBB$NSrYE%2A)m77b
z-3=^tCX#xUqVHF4o6E=6Q*wk^b>DrOb@tKW)2^a9=;Xc52JNl~wN2PYRFL$jG(GcF
z`T@^CMltQAdrS*sNS2zeO$-0C%?#i`k%J^90XE2^u#{+1xLs5-$8!~>9TO@VY=X_(
z?NOCzMj$H~^pX9Xkcg^IPY1Uzf)0xa-=UhVo&k$nF+D}~9&Xj>*^Y`BUXJ?1EM-p&
z)sE=d1J|HLa#)2<1HZJKRNuRTS!v<mnmfs)?}%DoPl1{8-rxw}Sjz6+sW^pwexL<&
zLPDcTgH?ju%CM20>|^Tzp5Qc0G|N|AP5V8LpS@%k6}ln%{qkm9(kCJEmju93oYl>S
zIU6(Hq13Opvf4hwH=Y;Ue#c1vvFP@@ix#<+&kyng1TwC!aM1a{8F3USI=A_m64c;5
zyvhgP&FBW}c(**a8Ey<&&h&(#^H#4+i9&pu*`?swCz8jVz!`0}5fKESMmroY?4rv5
zW~6*m`gN0R3r1<Aw(;~WPv52-d8u~(h|7I>sSz4CzIFO%F=I|;&O?qZ2kyrEir2&&
z4WFG-N*$Q6eW;NW_btMNyjkgE@RcH1-R!)7cr!mtaj><8&El<Su|5hy=}L=URlH^G
z+GY2s@FA-`9fvl@nfGUzk@0o4g)q5YJFH<IcCaH9@}Wt04v7k}xl%LA;rS(jOW#ne
zYb@9!oNvVbemki#5M}^`Y-$G6V}iP}&S9DDFtf<r%z4qE>op_6G!(JYJa$HJTxvy6
z&!BV!>*bXEb;7y?ttK4EBIZ+K;cVr~4rnKBtP?WUlQUQoV_kp|xaLwjwaQgkE<<^<
zzDBB_{BbgizC&teWuI2r^ks_AHKx6wCQyAnUjm5>%)h@c*YUWS^RsjK76)Fimt`+4
zF!_C<U5u?rvb2#_XI-B+C%<V#MM+v1kLHhQQ2w@4Jo+;`s^Sb5oJ95{^PJ&ff<I(G
z(M@KhWNKNB<_{77p1{BQ|CchLpvNR4DI)Oluv0d)*cW2~rEwUmw_Z+C>OL-BTpls7
zlh#I>DqPd|l5|v^kb@KK`t_JocS~ReD4_(rpyqQcvFG_N&8I)!OdFU%a-Ex&_R@r8
zp7Az*`jR4vEQCrE2rwRRrKh$I4y?qsh?u&e8dU_pVuVvQU!S*Os$%VZ98kq~<I^Oz
zRGcidx0w`8(GHKAp&o+Q@C_^bso)xNXIwdu?B~9Va*I||KFmLV+JstnAb?`JFq@uy
zti^#x;6Pjbqp}o=0I<TCEr+-D;m2h2vaQt0LhQ)dgg2O+?c*oBJ8fKFirn@JJKM76
zGj{-|!#cjx`H0g*uKTo)*9SgJkJ+9Y<t%#L(A${olq{>OjF&6Xu30<Y&ALs;LbNzb
z8A`PbrSeqPEbJ_CojHktMCHfKn~c{k&!=syzc3V&Q8nj3!yk8sKTHx<291YMZ^Pqf
zX?F%mu;Fe!((aBY!x#0bScRy0EN_XF67?l?HM$Cu^n?lp10&6Xu!8nx;c75VWeDmG
zF_oyOUn0GwT5_+u@Og|*il(5){cM5L?X;-ZGKVqBOq1dr&m@<wV`Q^c`KBh@Q8bO~
z>W*2+IVbxkmferwGYrOBt!(Ftvl&8TKo@`<I8$2HSTquT+}WCMwCO6U;yC0Ol>i!_
ziQ*c~8rPZ5vc|S`aq1F#Xa;0xQQxK%taM*mOPtHx1bUH<4P$d=O&ZV`5x=0I+Stn@
zepF5YaUXOFiRXG+$xZs6%C79`36u1y9uFPZw`nH;T&}8;qfNRfdLCHYR<G|+88#Uu
zlsxkR`eYYo7a>1?L5ww%CRt5zyv*pw+-N^$T)Lvhd-YG;Ax$Ooa=1r^ccHrq)9@9Q
z((&R7&pbf%uSfZj6SsKh;C-;9pcB$~Ip)|D&Ld!6J$^&>Qn5K|(ophbaE^nV9p8=l
zSEaLYvn0VPn6_@sduExJM`T3ljbcrP{mS?qV)na`^fS?)&2A)cT-SS;AV(%JBAiW6
zjWJ!T&R?|_AIIr}x||E!jXd0px(}baf_2<KZaZql!1rue$L3x7%?GpWX`g+vYTt$q
zLul8Vqz>d<W{FiHSa!3~?6DIDN_q{IakW{Af<EoSO---Fs9C*7BJi>rogogIfDeQo
z$6gRD1y1?dB$TUVH@s~@DZT2aa!2bfk-+PD5ub6Hr52VJ$7k#)n|P5XCnpqy3C7$d
zJNBZtc9437c*ZT(y|aT^Fbbp3St;moH9W|VcSI9_wY0ZP+-p~*i0ovVT?r%raejl$
z6LfOd#+wY`yCuEucN(HtQ;lz`zZn?43{Fc(cH??zH!sf!juELs4m}R*6~_rkK_iuQ
z#?f?!I?*Vd_)?D(pFRXQvvdo3&~tBBJO`ZuWU;&F(a*Z?DF?k+nH-N&RxgGvelmB<
zIyG9J<F4gwtdk&fmb06QPlUMi25rWg>KPm@ngDbT(zz&JD)K2Zv-(;uVY6&Lkm}7h
zDt+*X%fuPs0<2F{CtVKrE{F>>W+D25poTkrmO>83?HIDp!Xm9P6>;LFDF*I79X=i3
z)Ez6DEY2r&b9?3Eq{wWZgn>dh1Wr$W<@Y_nu*>tiFoGDq=JSdGj0Rn=Ft(o6+p738
zlj;#r^M{y!Oy$1{|M%ta*HZp#vHmL#|9J1eH}xMm{do?pn0inb(crX)UyLYQAm0dd
zD&D$EbH945l)vf0N$Ng9D*l(Wte-r(Zg=>X^T5H0(8GBs-VOo;04eV9!|zHmCS37v
zIkCc2tYiKiw^$*`$FKi?rucoY3sE`y_F$6%#uj84F#`-|SB}}gY6RznzkjD6TJu6N
z<jc)j8ur0x!Gw^4#O>f6;szK2a1aj{>43LAMH4}GMa;aW@Bu-PzAzP!{}nv{TknPF
zPrXsku>QM2Sc|z3$60gqQoAQ5*59n|^OqHKlLsztUTh0zrtB}6Se7u{4rI2%Y3xOk
zkW^d@^4$eOeTiiLs}ky^vOB2L!1<UNw+m|q*|45Em`j-~ZAg)w63U7by63B+iFHaz
z$nJ1eK9%5EjkKA7&PA%DeOjSd{Rudx7}$w9`|tSvv*}G=H2srn_Rm@qfSthqt5Pg^
zgeh3_M~5DC@RDVXAnU`V3q|~%h>5L3kv!Fg>%s|$faCRTo!?Np0tlD+Lx_r6Hhp$s
zg<#^Y5tjeGHtzHwz(@dYc1H5FETvcCiVgp%UF3f+Hh=p5Z2qilN?i)&In2VdVIkM+
z$C?z-Q*Ej|W1{r|ifO6O&CRDZU+dLPFtVnf>EPzQKMrR&M7FlBj_`g~34=TX4E{o`
z<3FpFP%vXibcSWbS%T$Wjn*2zv|h&ZrLF3NizgtaQu1ayKHwSjy~GbI%Lkz2BO_DW
z<>DxIoWLylQLr3-@<4kx5hut5zZn4WUq?*%AK(UeRr_~ay4~-@s?q-i&+{*C+j1ck
z`NLdSYhkRXSCK2?#+wnGy69LsI3+K%UP!+s`Z<m-4P|p+XI1M3VQ4|}g8DHB*|Ix6
z39W%6`BDy;dk$J|;or5)F2XB!J!|nw&LlO7JKNj4ho`qY3qJg3Nc^ko6pWa5yiNt!
zaJObN3f<1o`vSFRL`=xd;RE4qLitW{m|e5g52`}2(A7=^9nSd0^;-vyd3jb7?Tbw8
z!AT8bI$tH*nqgQu0zgNBZbTN`Q)U%vo28o3$X>3?xaum<btzvnMmny4;V>>q$7o7p
zZ&i&e-G(Pr0JhqZB~yd7nzLf>;H0=$@nvXpQ)b=6Lf22}>qB!l3r!utuC~62>pHX@
zaW|MwTZE3+Ll|n{5xWSQCMn)})F9WV&#vc3ESqHOGF0Bwgma}gJY@YGV|b8Vc)ya?
zI~%YaYGTF&&6!5y`6pq_ZTsx;%1Ws9J4+oVa^?{)efNgTH;jX^ONqUnvM2b{jSv!a
z_f`~UsqybZ<qi?<p~tc5ZzY$Tr7?05zVRB`<J<k`mrI$i#q55a=|NvI#?>c$Nh8)g
zk+gF)23Q~fuHtd~E%8uCXd<2xQxUn2lbJ`*x};s5R=0(D-l`CFdZL_J62K|{smS^B
zbslx29HERo1SNh*GFW%4$<$|VosGv3x%XgwDZ+-iH}HxK4faubea+?jO}b&6-hDI<
zm*vSK6LETnPw)abJG5cfVUT&v&^Ce-Hl<rN<QqaxvzWo7Iy`w*_&{@@(<JqCu21cq
za`u!g6^Qy_mLGitgst{In&(YKCOW5!)2*}I`8Ffv@D3%CQPa<b!K%?APQ+V?G?iYB
zX$4Zt+K*VPKtf|nkXVOTW8=Dx!)eHH&E2WTH~3TsGo3P@+9ki5modGDUB^guz&S$s
zu@tS=<U<wR_c(jYWuEz5PadO5tTESCJRi-?B$uJDlb671Y#jYPOjU1|7_aO)TRj`0
zN@wtk>uJ*S!KQE9jG%!_F8*U}O;gN;ts7f#?<ocam*d|!E*IR<8jK9q#fYmPmVxUg
z3}V%rgz{j`Zx=q81nI9|Ti%+y<ojw@k3Mb2-i+>HJ8_Qlsv1N+_&9bp7iW(3%!3vC
z*&MT+R^Igwy!70QBdu?1#wi9ybBxQboQOowU8v04`V1C1Z^#gAIu=ZVE@|hKeG`B@
zIIF+zV}c<sIQi_^>(}HuqAwUb1$xhRl^(og)i`CS=H1ML@5Ix1Mwp*^h|zAJWKX~u
z(0$x33J4LcOeSk=dOB?xK=g9PdeN9GLPtF#+%Q5_0n3iL*D-Sz>wr`d=*&8f(R!9Y
zVdc#1YBN#oGGcb&gBrR~Ag9Cjgx-yF!msv#p0O5p1M_j@V5E$dQmdpDc6hT}2)Ubw
zyk2|Ku+la3+5qwNtuvSfTlXpL-cy0)^lz|`j)qEf|6|Osa~3bgGG=k3-C(o{dv(b{
zOPA+LruP{+VTojNuZ-^_x`%1X?w+|Y=#|lO-^LbJ%V1-hs8E53NhjI5p0N?N8+V^*
z$j4u#mL%5+_Of1|x0z@`CU(HtI#F<DESL7UvN}WDshl9sMZFhNTegjMU>5>b@a3pL
z=Qbmo#bJ@{E%dn6uc1Qi&#fmWSD2(5@;WdR-MgtX5?rai#0=*b6w7f52W@cPV4s{|
zvsyH?13|WIF4I-^;<H<W=6J-8iwx^&kDik?4|$J2?qkzDk>bq++ePhcPNa7!Mx@M_
zz#<TfZV)yRWd~R5d7486QI**7btOxhfOEAgP=;m}Y-|U#MlMX*C2x?d4)Oe}`2a$t
z<G%7kLvPWJW0gsAL<^N3K}0iQ@{n+7tz1I0WW@uVW<FM_J+LC&Zz?rHrv}0pTV|&b
zU%!^I#Z29$pvJ?vcMAFTi;<g6ZDh<&zkgT7oH!lU=_+Vd-4|aP@v!LN<yQ>ZDDeS!
zgYS4zTB&J~Y4o)F5R(t}-=;Z`z#epF$)Ydi72;&j@bD(P=PRIQukYBTbIrQYP|B$4
zc-DnQxJ&<Ts>7os5;>J3Kk*40bF_H0DqnjZjv4EOxis4`**{a63rp-6niAZ<YWdPB
zVp*`>DNXR<(}J{#v)!S9(r2IDH{-!q3vrSocvh_HRHv!6Dtdc=Auglqg$Syep~+%w
z-uV_#zH)~+xV5CG*9_{E$R2!o9O_0NFROZ{e)#6_TK14#+Q7J3)>kw3+GL%yPs)t<
z7RepcH`Z?uN0L@;XHgy^b~Y(-W?0kGM?+dqkckwtUnir7-T7bMTleWFeNtZ+e?Hf~
z`$TJ({Dy%GRt~uthvPc93VJ#vT}<R=s4;nUnF1^4K?}b>ZY-^z;?aj&hjX`58sW#e
zeOf6RySFf1UMUHZK^UWJ&gZ(n^VXjHrK0QwWsnIGFaW=`pb&ligOH8qC@Oc~#>DSi
zuQ{!7WpeYRri}0@*R}b>kdP1pVE(1a4@Ff2X9|W^$LhykPBa-8tV%;&5V>XSJGG2k
zfVOpJn^iA^rf$gtJ#>IP#HM2}o9gd<9)2_~{LFqf8hf{7nbR{ym1}Cn+-7RikDfu_
zEpHk=nA$VEx!}@roc~oUe#+F{5}wecE}MuljNJ8d@p5Hcnf#JwG_CU>KBhb~1p2f9
zYgR+U7II494d7RaZ*FcT0A;E#etW<o=YO~T_dn7RCDZjujX<CIz|on@64)7fs>~d*
zrd_%snWs8~3l;(4GRfyvuhAk?{k<0ir`lxYYR|qZ6%i9FfQwNw<~<|;4aJg%b7a9v
z1=w#LpoJF~?1U-MPhH2K!d+^KnKw*vZAuEsUk6)6o%e2=2w$fh&@$J0J3vYC`Aney
zzHfnPPev|;xwKgn8-%<FVcD^Wck5^fZa#@QR=;~zlT#(Wx|{xCewW<|+L4Qe)^-wV
znGt3wmIWQ2Q3ysnvoqL4KoP!O!Lt%7h-eFCs>WPOtZ=Qgc!SS+KO8VNqVtgdBK1^t
zi6l8uk3wrU{p+G+C+F5dm9wZa_-^Ugsl&ox6(8~%>6Y+@o3qP-HOUv^>iyBnQm%56
zQV*s}h(+{3aXg;<W^>o+W3VkMG2$Gyq|2wDHxK3Zq_)KBl)W*&x9Ul?o9j7p)XV!$
zq653Gz`8F1fP6KtNbrMGT39L1@C}#UXl|_AIypgmRXUta(w$O2;F1~z^mJ*wk0lz~
z*1~8$yTUlR%sCrht0P^THPf4mxY}rD(W}cT?Wn{b-Rjg9XD1*4m9Po@V6FObv&p6a
z>tk%@C(eSqn9@4K{;DDT^o`+L>!R0YW<>07S{^T_G?~9MtPT`4y3IkD%<z+-{iS@i
z7`thV8M}w<r@4%A)kRUx9oRGL#sZu=H>35K1z3%qkV4;U0l<Mkz^(a0mB2lf>UGQM
zS%xO41BST!t&a`p_A&^ulP&SQ`BDi|)wRsk+a&g}z2_e3hEfA#vZ5A+Eq74D#40|t
z7K6*8$FO%LP8p&cb`hv`?Y9|1_i@mCn4`P4W(?b#4BdJgU2gAp+z-1bp0GbYpN0hZ
zn4C%0biU1&D{LB8JZL(N+3e)3Uu#6a*`bfB&oGR}85dgfyDHwfzn|Y&xP064VzIls
z?Sqj4=lfjwu)gBDHMX}r>}v;@vt9cI9X7G5OhYn>EU=18X#8ELkOFvH`%8V%^Rri<
zT1s9xxOu<kQe{ps@{&@RF4%s*IGEn<t`g0``W|bFl8eN-m%>u?hs5L8XCpn~)jRT?
zO;L4wyyp4~n-0`VZqNJJNc@Flwj$T&=Qr7~_fU|C?J`dkG`cj{-}BZuuPmCqgGZLe
z@5vno59zuXQX5f6zSG(c5{H<l%nMnN*O-mS13}oq{J;|oi3$46*c%krCbM$`TdMml
z+t(S3-`(dd>e7S)?%%c5yx`ncJ{>;6N3zXp$m2YQSFA<fPtTNDEmJfwqH&C_JlW)}
z&7Ih?k<4k<rHK1DR1w<YSCNSq>Gy%GyBx?of7=q|$(V<Xx)A`iz2AbP7+~<9kJ(qD
z7b>_qf;)63+Wv#9(A!$Jd3-%sea_4`2o5!97|V^cfQ$R5I7%OMYY&12S#I8orNdJH
z@_f~~V~9gL?6~N>8&BveSR>BVezhe%W#y_{Tk}o!`<Rnq(}jz0_I>?HS?I)|>x$im
z`T%^t9i2_}Lv8k8{G&ybxsV~IHXl#2?K`HriSCScFfj=~%)QgPZ7lOm?(UBGB=dYh
z_^sv>U-2U7@%A@Trgc^t=h0;|;ghlYuJgX+TVLNlFjq6q?6V}ZooNokcFxsdQ_zSm
zYerrC`$>FaKyGP={fR90D2X{V&0@1F_Vj^cunN`&4R3E|z&<SQo%7^@8D6X(mVwf(
zm?Xy*35H8BB!6avh9*<)ok$d*CECfRw?fZ$-$f$A&Hdw=B__Jj<JUyS(#K4X4U>46
z_mMRextwGo6N{0YviLOC@&Gh&HX}o2(Sy-4{X!k+UOQwV=aRWatjqd_G<HntM1cE(
z`kCNjGlzg~)^Gmt%_m@q#-lAxphKTISO|I_$CJBi-Zc53G^wdSx83K~kW0OPC9X@B
z$84)oIy;$?xU%^yELIQOKR*~ZBZn3&DaArBVP#6bnj@k_u4O8{Tly?*GNelU9@ACD
zW-nUZPpu;w1MD*m?v7zk2NcUZ#y%*}YMaq7TznN{=Dwse?i<T8csxP-3IPxqdh&KI
z4r5`>d041=99HnQ`4r~TGt{duL%4G<3_SE>xiVeWu9a)(dkVUpi2#!BcoPYzxV^<)
zM;<`2rie=8s%1=+du>0**vZ60lh3DI$f0vO1uJCUh?-G;2m}6;XQG%wGp}=F?%4--
zreLoJ=+*1ORP)g`>k8<1D2mwU=z#~Q)u6+hb2_&^mMun{-4wy2rwjydDm{DEyY-|p
zZd*p9%cgdz&5&qyKjC3H?2V|W^aQEgmc^&fmQ$a4)Rcs><g<h`AGTNREL$_924i?>
zv8&KE&}#~fFcRI2@`W<HWOUACk{(OO!VPQgb`D$7((|pWs>KgjWjYU>eHZMB3Eb?2
z91E3N7N3uoLw1$OjJ<XO7fMZRS7i@3FyA*};!8$;w$@esz+s>48+|U$B5?jTaNvZp
zSyw@~&#moNVk<g#8|>55+VEp1W85NM-*4{6P=0J0e7tS>)P5ewD492$tUx-oA71zU
z?9<6lvsu*{By#Th^mIrv75R4Zmu@c6hScx(464HnUD42qjlQXC*zagDOc@Sr+{IBn
zm`l?<`66YVO-D!2&WnzJt2;a-;M-TfW&E*VBka80JTyKXPAyhz$YSj#I~g(<xwgq{
z{_+X)+9qjr+q#4?uX;4q+l%h2Ve~_E5R<C0()Eh%gF??+8*}<mV(xiNn$ou<%S+|Y
zsP_nz*;H^ICMk<#(H%Z*qRVY-Si6IskkX`&cF(D6iRU$a$Yhpt$85Lc0cF7hYD$W`
zYc;?XO>phb|715kIq$X}Iu2@yNbUM>+f!w(I)3$AdjYa`hE2Y~1b}{D&s+>X*&Vp*
zmW-)QXz@eQ6Zhv#2XMNV*9uNf24n`WN)^oTpMu67o?PdGtS!|7eiC~CSir7Ax^fHW
zf7LBTf2D7HPMNDohHQ#|;p}v~gPjwzrKOc+k3h`v4{T0%l+P5BCc^-yT5wIMW@Ri`
z_K|5|MKPwR_=>wV@18k!ZR{h<N@Wa;29(^mUhBKZvo?)Aym&vpaVRkCHmsr|$5LW*
zL;L%7Hgzcf2($SIKRQKiiM-EEX=^KaF%d?&^DPv4!Bc4QxdZIMt~2)Z3+z*yul~&>
zqh%gWBfF$79yJpu!{xtx#_`@12}`&%s<OJG_@hPhOz^RkZdHj7!A@ZPn8lTs29uTO
zsfsvO<lxTQ;WK&BIJZ8FLtWcosD$J#nL0(S{a1K3TWmg-9;N!A$W0LuCDQ_#GNg!V
zGA#VAwY8pm7s+KLL;0{H{@_)rntDQRiuJrE5Zqr9+BQS=QI!>^gl7GS&y!6>@?2XP
ze!sx!Zab|bmi*;m-{6uCgnM1SBKPqF>&OxLJ<wBCW(Y=Io2|sByP*+n&FsTXh9xe6
zdk=PcaT@use0gM0UUQnujA^nguVqvla_C{?8DQ0D8Cu=^d`@XIN>%P&XDA;gvO}El
zE90_m;WU?7-rbz*vRwAmNnLrL%}yLAB2sYl2D__+EvEJD>+HtprMoz2-fBb6s(EJ?
z!qYvdP=fUHw4iHM|DeBn|F#N+Odr;1&K+@o+a@~gPDcN7BW9o|sQ9xhEqi}$08+=;
zZje-~GTU<Bv}i~TqHG!3N0noNlTLrDdTvztL^+oI!jHaEvql#3Vh`3+MAUOKPHV}r
zMVm=(-<<X_zHt~EtR!|9qoMl|cLik-Xalh{n_1zk{Q`cqa%{y}*DCv>iivw-ki=`3
zsmW-z#)qXZQw-k5ziY^PT|v(Pn@3%$K&=P1Yqt#5SPZVMTCk3oZ1JT`lSPw%k0}m&
zf1a*@Qp%zsac%0kE0j{11pUS4^V{bzy;Ys3rz!gb>u3`m_IF7L@TZ;a4^m+vnnO_I
zl;D`N^QKX%;%>4jX3HkKPVHIDl?~C!HdP{x8x_yEHZr5_48_NUb3ErOU{}`@FYXll
zy}jEcw58+|46fBQx})FYdnh{~^y+ov>)Q`Cm`*$S+qF_>Kh2WQrayU@({%O)cB;dM
zY}=4wnKSfF&Sz|&t?IL4^Su71DPJC|abJ%-3)<r8gc5W817RG~t);omFrC3e8a60m
zzv5d)2}U1WMOoPsO}){#$Tg*lv-J`*J*QJ8SRAZUicC!pr?*Tr-(~%&9^l_cI%i4o
zJUA(AC0gSuIx<XI0%@h^$!2B#F5t!2u`wTIA+?X6x~v6_u2r3wyd;^u%et*MU!NCX
z^JfjKfY!q8z#wqp8SZ3(CfBtVzsy=y6Y9o9^}z)jesi1`+YAjh<v`C(Uj<yiQrgQW
z538*kp%%02Wl1U6KpL+6uH*p{?ZwHrB*;Ktj=gm+oNMca4E4EUmSb(r-lI@gPv+(N
z499dYG~!BZ2VO9fm*oSzb{#)bs21HElvyr2p~<v1fLaUgw0d6kqP+INdIl!c*1|N~
z*-VzMiGknsU3I@XuTl4rZE<G5%kfOg)O6HnDs6cwrwm{DIiCz*-}O~uJc4!(`worh
zRHb<RCdJ^Mb!Bqr22yRE!7IL%x3+C%wr25RK7Kfh3#;OiKUfG#ZCv`EVyX8{F19Vl
z3}H*w?^zeZwYp4Q*LTVb=2|+VRHb2VMi|zJXD1UPWRx03&ur*|{&JV8^LW$A`MF!#
zeo4Yh59kmXVM-wo!$kd;bv03&3>}r<9|LtCX_6^4x}ekzl|oMz-EE|fPb;FDZ!{l1
zR~AP?ID)TX>;q5L7Q1GIzAnI0w~bA1zdNifq`Y6^=0=m0^fD-!rxz$r8+>=KAGEcd
z;_>{U!%%WO4>sj(y)|h=byE|@c<oN$%QtlmHQ~cLWf*1GCx^ICunE`{2kBy%YInXa
zr*3^<iMvw7M)z38tmNFb0yj6Lt@#pWuPgpXH<a4lmf}^#2lLL>^k~1l`{pu9E(LEs
z?wl=17QPYYBe+hN4Fs|Y7KgWztZEJ_EIzX)Dh?g8J~@()u8-iu`kmbD6b)=MFvbaI
zbhxpl+m|yBHZ^VS=3fs79yTwX)Vi}+T>*8oMb1WK$&c4=Qd+vGo<M7rION0bfvv%l
zG?G)I9Z9!dCm>CkG#4zX{8_Y4STabcL3FNIyHo^Q#~NUJkrk1g9#2kHl&qNetW4g2
z_--LT^=5qSsq}>}?@oUz>AIt^KES?Cg>vVmnT^akHYy@ss}O8~(s92T(S12cOWh;U
zcdOFy$-E&SdE#;RR$eJJ-XYE4oO${w4bwWbc&u^|5)o<IFx$?<^4UTt*7Qm==c3t@
z9I3Z1^}cm&UX{5s$A}z^h`MFai{ob4>mdgaO}`>5!;q3?_DR+1-6xY;&z4>u_z&oQ
zk1-_wU?^`lk_UtoXlnB+9(#<7{4Vo1FU=pD;90w|tL@+1cm=WW0Jb7t1aC^YZjfkG
z!nN76F7+P*&OoE@U8<TNW(qR3wKVLY6VMrLx`LjWH~OghHmCp_Zc!#Si-&t$N{2IR
zVAVxCT6Vj_l#ZL#3*EcBgVT0RdjeTqUS8k7`t?8hI{xK(xg9Dp{0251J=>|u`1*Bn
z->l8>Mop2}O`u6%S)9>tkb`SWYMOc(>^ago3wBRL>q28qz#d+rYfV~dvew7zUzvmi
z4t(c4ZMU=(PZH3k+3H}TmiLlIxVK_73hXjDNL6RJ+tP9D4Sk0VgV%cJ9(KG8>}q64
z0BFaN;`=n<`gR21m|PV6yJ{lVv4sl$!KRx4V7;0&2te=7D$8Yx6avsLgVo1}<Dp<E
z9tFW0f^Di5Z8qEN7;wpNZcheqW%ZE2PO#$%_;~%HOyK&OEno`{dL9NYl$Y`q90cQr
zPCq2W!3jVlR|gb_u&xAqEI@HmHuqb;iqC_~oXgLg0GDmuhaxj@C&48w;SkW4?!wR$
z0HN{H|F<5t6u}<at4f3Me<1UWdh<GF-+U#_$E<G_jac<To;)8H{JQ*0G}dmbCoVGQ
zG-=H4iZIC+1Ref;GtFU!@>TRzU9{8gtGZX1(sk%=M4m{`hO$a2-8(9NL1o%1KA{}u
z1l8Jq(Us@VyOR7Toqvc@Q9T~z!Qv$vcAy8U%KT!IUB%-o;qw@N(1!I#e>1{X9vllD
zROR>}VzjB5v3DNPac1}hX;odyU_;MMIcDWEnWjQZ1M-3)B@!p(TUDaJDt$Avra*^2
zoKvB7-e=2_E|c*P6EhkTbkvz~$#9X~5ONk5knQ|#DsKsp0kG~)1QoWs9Z@3`5qcKp
z*3JY&4UD&sXK{^6D{*~t-<V^X)>ihH#Rmyq^4DBwsEK-e$yC16t0BGBkURJi8qwXt
zimlMA8)>qbZ=r38tbx#V2XQZ5s*9nDJ1a65m2q7t{r%0{7hySGN+qks!X)4JtPWF}
zMP`DP&^(7p;q)<z-BF&N%BIDl@7)el-GU1G3+f*&yoFa=)STwbU#nuHSd`ggzuzR9
z4`XlJ*N&7{@&lX4N_0xsZ*WJwh$qh8`TE9ZvVmHd1bF}ngEKbC&CwIXTJVgIcRab5
z5e&8BTy9yc7GFs$%jTy$E=WlQoH9cuvt8DU3zs~0w)TTaD8G1@CpXR+lYZYFr&QpK
zr@#-OcT>DdEjY}c*uQ>teS<{$ih}cVvR;X>cM<LtHpZjuX)kBgZoZ0kjE?G4+@Qz+
zz*cf>>=lfE7*3&lWm4anGp(q=VrF{TP(WwbQ_yvh?IcU`le2r;XWYIR@`Uz){o>*)
z@Y2B--A}lyiuR-AWgsM<dtnKc>NnD6?I#;|1ulN*DQls)72C4}y{#Pv6L`A4TO>>}
z4~erO*-2y!_Nc+8l;HS0qXX}hA%on{EU8DD)kl20@FwQEU|UmiC-+0>)1LRGI;UUR
zb2E29NZnPr@f{G3tDT&35lyVcs~IYX>z&OH1tVrgqs*?W3l2+svp>%$7e;hbHsQ&^
znOp8lcV}K1M%j=Db71t%&>K0Z*<(}K@DOsRS0iI<joP$v*OKi9x)-J8gX0QSoJj^-
z>=6Jg(h!}MgQ4iItP0k`ywfOL@oFJ&n-HLKxv8T3>bu9HcJRlFC<v|JCQs-HFU457
zC@zr-=G5L$)mhjrNr4}FbFP)))cULljetj&YOtStg;%`S35E3$XvO#KB4?7h3hXto
zKcpZ1grOdmxP_i?<YrxvJC_MY&vndkY}Mf4#y7{BD20%I`uFenPmsK)reUqHYp(W;
zLDcR=*uB(L<-uOfw;T+Rw$wf>fm6nC8LxFIUEbnp63#joAhYe>?rywyLtz<<!CxY~
z36p!<pnbR0LqytwwYn~A(QV?=?!(2b>oo5K6TX0hnglCV!)^<cc&mDQ9}5_)TsHcH
zU+$Mu(<Jmw$tNsar*U*g>6M7|s{ABsabrcD`*K%ywNm9A82}pR700nVFR=b7lQOi1
z;m6XN3O1O(5yrQCZ1011kkg$UwU5vKzxK{Ms;O-6`$5zUA|TRhkghaoq7aIOCZkvZ
zsUwIqLv#=c8WM_hktU#^pddwQB7sm7By>bNNDT=J(j=jT03kkyS@)Ueol$3o`@Z*C
zcfEK1IBUVl*~wWuCwu?)@AvtBw}{&gQO}Dvh5`)4=WUJaROrTd5<nbml+dE*AaoOH
zX5E6`{E+_Z`IXhQS-+Gy^!>rfB-&ZJ^g#O?A}BU(jb9P1zw9unfu}SRx1&Er(OJKM
z*zQ1>ng-%In6+<^O$91jjm&vjmh5<Y8{sFtf5SljpTWcbi@&!ttp37O0xhlqX=FOz
zV<#q$PC)xc<Y1fjB+Z7do?~z3<xPpK!CLaDn<{qG-J3t$l9MiS4VQkrm<<2s5@R9l
zD+1^%FzRp3>TeY0|8AO9`M;_rrf`MhUlHG!hoRVdkkB+Ps1C`=uZT03R1K_ln0v%*
zR?a;XxjI*A`bOtcQBdIFYB238#d5ZnJRfLn8JN*~8B<QpB)r8A=C`%z0GOVB9)m0k
z-*3C581dX+J$db2RrYV0^bYVQl&&O6TRYdmV*eGzp8J*56||{!J=diIH2U}Z(QO5N
zJ}<Lg<=uvi_g=q%i0G$f2wNsyMR7J3Wcxc;FNDW<6v8H75I1VpVPVY0GvIy=%M^wQ
zeR&LWS35HyG!kme8JpuUR7FcnJ<)A`^M&L-t8*GxV}nj+IBt6{y_v;zSvO%E!qM;e
zjaQ=i2C%_-%5e>AG?S0_2Htf&m)xJ>%q8L*CpqW6-LnmJJAv<`11H{%D-^<8WiSW{
zftqtZ%k_IaVhq>B(AvB}GrIT`VfMStLI|viE`sm)xs5y&&lE}}g^-l0Q4)GKaPFG2
zjE(PL3wwO@a4Vwdp_Esdyk)Enl+%@U004pM0qZA`hv?@EO`p-T>t0O_l8^H>(rzBY
z3J9nfOPTPEN5~JP$?jLjO2bx*Vl9$rVRTR8u5Pk5od7I1?u!4&+qXTmRjJY?kvFQo
zm7{Xo=jH}rWtd`!9H9_t>96Ve>t_we=6B8TuSEw-QJfp{eYYp74UQhoky+vnPCSy}
z94466WZ{r<%lI@Z7TAAzbK~jn9KU}=`*SV7(j0*{b^n>qfq2*&r0~@<db{MSA^R!U
zCI@eA2(<eCCx7pbws4+n|F?ut_7}e)5Ao**-ghh4ABNv|;K@hrX6&QFqdt!>j(2LT
zdS+lJR~7=o(t$0|o6e%gE>iOgXU(b6XEz>RrF<arDUo_Ke~1ou>Ea5JT0b|#v9G#u
z=q@QJ@}7K*vXQpsCmBt;kk}p61(wy7Y-V@rUMjc&J}^W^->Y@wUrFWYlc=byxSMBf
z^lZ|Gp!y*8VfXD;ewv{8&sY2h3T(y`GqdU4B!@Y3Dvv29{v6$#%wbk*(Wk}6{@fZ<
znq<-Jqo{zLG**s<+e2Db%`8W5_t)Ckd5rsn;3Rb!k$REO?WfqCi}xQGhAKPw<Sh6%
zRzt*a+=C1_-SQ><z~tE!6D%(A-Fa{L9-ru@ijeCdsOZCEObwRBm2i|S!kiXFU!<sU
zg$Uoh-8LUQU#JPQW;Tr6y{#F4$&}S*SXAY~LZwmQ-s2ph*F}6+qu;C)(amUXy*L2`
zaeOECIU?W`E%qhyU=gJxVKf&FEpCm4;Z&vPl&J^aLFkQdcLh1*sveL!XuGWu3esg+
z`vO{?J}Qh!p}j!DhPB7L{PoV$kCWmvPy^O^KC#2Y_EP||k1^x+%{d@M+>pJ_bdtSL
zDcDge$QXDdu^W+4jY$>jN~=!}NxPvRSS7V?FjJ9OB5E;r5Q8OZ+^|nLV{=8V<-W=G
z!cmBi=GqYxqR*he-ch<2Rs9W~A3yf=xHaWskM?L9m|&J`S~4Re+ebe7(i$5t`fzYU
zQ%c5EC|;CL5?7OD6bWg0DlZnS=f6vSa~JgQ9N$pP=Kp>QaAn;zc1euA<G0@xRQUD7
z-TP7h_p)4nUBLHazncKi?E9bny?ffidFbz^vR~v?zLwvNp{#-T-OBaX^7wqjgVI`X
zCrapKr78ic9PL40;vJyQ=SrS5quh_*9l$qBF56>zlWn_ik|jG~jI43=or(Sh#A}h<
zH1_EZ!04uNHZfjWaAtz{r!oe>#p;PMpglN66d|p+J>(`kZ)NAz>W*ycE=*2<V*c~R
z&#wie>e_c@^)|0A-e&`e$LFAWM&-KU8g0tsB)trQaE9q2=>DgGRD5V7`%C~2az)D)
z_Bujl;7R@a)riXD&W3g#wCXB~c`!-_ABkjFjwF8pZJX*FtZxW~y;N$eUcUGCq8zrI
z`^5nvJ1Y(MqqaXyYGzVhYEi0`$vHMsTEwU@r7rbwaS3I4GGv*9vw<dZr))cxyIrge
zpW4jxq|?_kU-IDjgm<d8o|zP$MhH)p8~|RR6uR;;0NXH6P$A8M2{%R?k96m!2RiL)
zJuE4kX!>zc@P+(S`Qg*kprt1^w8vc<#KH3fG&o;lU|H<AMNu4c^5Xp;k2{Y3*loxj
zh*xilF?ruIXNN24VF=KRzzxwH?)H+ex}O(yG-UfYXWBI%`U0BMadl`}w?%S<eSW<|
zl=7^2(Xy#xe)b9V>LQ8gKR<hS5Atju^!OcLa`jnHCx_Wwn-M*jZdJB2kZ2isX40$W
z9P>(FwXm#8h6Gcu0cuasBhQBN6C*RN4P87y9}Ib2`M`ySGpRi+iyv2ju0j7mZzX3(
zN_A@HpTE$(+@@b+!C%^+<uDgza6-n$#>)0?aL{nfXOkTlN>R4r@Hz+&5=P~r#nCk>
zi*-2ma*AeK!W=PTJ{{9*xRKxcq&hElC|1KgB|1%>Lqw&;LXxe`$%?^Bk0Rkg2%dT%
z{dTW07cu!GJ@^G;$d>YwXdwI%E<Pc-?@>y#TI24R*MS$VaL3%R!-pi*SHK(Qg&5cA
znNJC0Xf{{WZZi%wwn9>6S%`}6XV1Z_;g68*BZeIb`w}jua2bfX%*N)9w8E(lgcY9Z
zdTa>8FW2gldUI7KCiUW=ar`^p5$%BYEcWa60_);*9ZSkQLCn~J%8@cPDoMp*?~rHK
zg}B;WJ%<H<f*YCa-EsMSpu)%>ms7trX0+3XfQn==Q)=q0TdEReG?x72(Ei}sOEVVS
z_Q&Fao%e2!wc5FBR?F$IqfW=pH6Y&Xk1w1}qa1%qKQabk$jsvDy+4ntZKUaOAw25r
zcNAghB4Z+DnsZlAX@9;!x}(2jl4M?0zL!zt6E}Exb$kU^252lizKRo!A+s42lQ@0S
zDZplE8HjW232TO*(Fb}d^Rp`yY|UC5u7uPcGRjT)Tm-9Z?!n~m>gppawOlN13d<|E
zkQiH%_1EPF20M4b(0R_CVEc!(saX(X+I>sV-&O#@&1=6gZ2y@*!WNsv@8!A2`aU%X
zuNgds22Y8zb~Hpka~nU|Z9yh~RN8BFC2HbPR=g+qR^WpAHHQSR;2MzS@&C~v@&%7`
z?fk?%(hZySZsqIAds-2(3`;e-;N}RUCY~|4F1T}Wz}DYn%wmq0WxG<v@t_!RwgYl}
zs4x2uamn<uGo1@oDt^^K-dk6P>@SmEltHxsm_jyaDnvU!jW#`~wCI+4d*i81F~$+D
zqEjkhWH+K#He+wIZj7)rq+-LE?4MHib!lweF|MTEtM${lP-2$iTlz`6vf$Im$x@+<
z<q(2_c)x8@RAI@w1E4b#6DPKJdzQS|*GFs}`7jWrN9|l>=uho;EeFP7v6%-B1IA<V
zF8W~PHY4;Wt0Dlv=tV#70#faANbWD7bRZ)LJgjJXFX;ATzZx<9wMOcTZ)lau@LP4<
zIs@*!dEQd2dVkM%VT$iT7F#&a^7Y)lY3ZfVg8PR3(V1<XhP*7~G)!plO>+@M{8(u?
zSy#%uALCIdbZT;HEi>KD<{u_sW4V_TY+`#oPU`>KeFP}FUd5@Ad5nP;Mt^e#|2qrv
z4_CbZzj8lZzzA&U3)#s4vV;HQ`lol9s?XpdjA6s|4!DhK1`%E(+{aI0?hkTk)!wf#
z%vz59u-HD3Ak8DTZU%-N$SL>&BKJ7<lfgAsd&*51m#_Oby{!u%&Hc1HQ9Ay!SL#GH
z>i4V4|J=mK?-Ry<@;~1<Mgi-;d-E4lB0xcJVH{aydU=e<K3&0lA&>3W$h`zF$dZVO
zJrN~U3Qm7f_^68O1*#)W49eq7<uazagy6)clB-`&3kw8$F7fwR+gOS`)k-qDa-eHl
z-srh81<oaw<zPNCQ-n@|s#DBPQ<EBW4l{+PgOj_~T1bNio)-rUuoso2(+x5^jch#b
z2=`O)b7GYFhEXA^Z25C~s*oa5lht;{>;_D?x*<)t;fjh<RgR2#g4yoE!(@M5qX<f*
z{*cXB=0r}P6Pf>s2)nOxMlyN-W$Y6vyDr7^P{$);#*AVk7jU3@Jq+PNdrH4dPTNUO
zsn_<27>_rm)P`%8D=T`!vWw1@b0}5C=Dif$c66W#h@b!tHoYq84H`2^Pze)P4kc&{
zMCQ*gM+WRGw!kI}7FHFn6JQ##VV`e}%e7o>Q19sF5<>N-u|dIw)7nPm<fW3T?kjtU
z&2T4gFSye_&}=G#j7ax1=4|cH1l>8s2NbD+3bosO2n6g>5?zl{De0E3Dixp=DW130
zd@DnPhZ*HhARE&^jLQjb?{yGH^`hDJAk(c`GpYOcIQS(h3|tO;H!(gNW1{suN%rsn
ztf%m;FzoTRJ7>T}XD?{@S+`mjm*TrWKdUaT3{aLRFMVLj+xe=c;?>v-u*Cfm)c2RO
z-w3e&UP}LBJpuZlr;1R*Ni*btI_Eib@gaQ1sGEYS&<`%b<lPA9w$;L9+%D8W8W&1{
z0in#_)dP?`gh-=3Pq)%ilsVyc!t?ZLQ;B;gq_HQ=iO&Odo1B^mAU}`ULtlYcAf)j)
zJj`CI7mz%yjBC{q+1g3^U^nf)!b8tIq*c`24h|1#Si?qU(W+6Q7SOy?^3s8zJb1*Y
zWEV~yu{f?3#b7%yqsP^a<8WW&&nC0;$-Y}yT{*>vG50Tq-pC7n#C~n@SRA_}j!t^b
z`2)NqtVg~!=ZwxS5FJ~e3MmFwG`{P6Hp*#&Aw#yEPaLg)Xb{$QHO7g#Jp%RAa{v0N
zWBaIFb0PGF@f6ZTWW}*EOBgot@z|1x$I`Yx!LTqa3;0Mqd4&3S9sv0QQzcUf$c*jl
zV4TYG5V(D9kG2_UL$3=gV0!Z)Gzhc0P3y=lSjOQ(B`Ust2G5H;LT`VLzpp_I70&Co
z{+Px%pud}L&!*`yWn~g*7V7OQy>8$fXWAuQs|uTmA}j?NcaL|?D&<S+)Tt_>u7Z?G
zj(rH|phnjg<e5(RrdYmh(BcGERN-isN(I>Y@yVLX2J9lUogym`c!@VL!W?sHpG0g_
zoknD<INS1M`?LOR`Vp$xcvSQ~<{>+Vd_^zxtZ&|s+ln5fxayR9?G^Lls=e*(nEDI8
zJEi4zsA3EBji=&_;$WjMhfb3!i-2(jsMv1>Gw$7FuYNV>*|g31&GA)sxX_{i-9Y4G
zs8As%BY&jALzbd>r;2`5-rH{dDq?~6fr5ODbVSWT%gdEQ1nyCPt$rZ4RrTa+*NxnS
ze?t5Jb&bUkGMh;;d>BnXp9+#<)<<&cpxafPRxPlhXcv4i4%`P@F62ZWqC|&!N5V!=
zXJ)!uBx1Qe&*hh%Fm!Gy0)wAvuzZRfnld|6r;oUPP<yEnYyJ~>^8BwRUVo<L{%2m_
zvTxUx8gL{87L0dzty(rE;zbRyflO&;S8`W7nsiFkb_&JabvZ_|`myCUHst_`Xtik)
z0x4<~cBC0R9nF8-+l%C9Edrr$#C2T>5<}MB6M%&CQ6~xb(B!20==WYUU&^}x$E4ee
z{S~~-eHICOWD+o`etpNe%N~&~43_Kb-FPFx+U3nPm{PX}n|PEtjFjtfr0R#uHU?`z
z=I)>|(|aW0sS`6%rDso6s+UzlR4rYv<>clAGV@e6N?{$A3J-AZL^_PwX%AVIb<C9*
zmSb^6k0PGaRwOlLjw|Huj51NUCpv+_Dbmdw^h5#JQJrc-uosMP<iCXzucC}Td-i^m
z0QUhUl8=m+V;2V`y((0cdY>EZ`nkM8A*u(r4d8=)?lN@&Wx?%>rBBF2F}gED_bJ^|
z=l!I%H|ETbmoK?&Khf0;#X0fG@a_-^A2AS*&qZ}78>OtDM8L<Evr_aja@_mW9EPWh
zn!~QyG$*@0yKXt*yO&Kw$SxP#SCJO@1;j5w?<D1$)maGNOw<<a`8<osEte&3WW(53
z9Xz4~ip2U1GE#XDq1;ym{uQK+zaH{`hXXCjL$NT7C`5lK8<0Y!8#(n>d1WDw^;E$M
z5EtQ3Iex}QdvWi1+xFp+Q;5l>qX*P}XgXZbZ%{-J!wXe;Q8NH#5tj1#Nd_IVu0t3+
z)lhWJ+0i*}g5*-RyYs}y^t%Gm4XnySrPUz6AO9~1Q^F@=C`z&iX*Z_N=}}P{H@GwN
zrCZ+$R{MNYmu^b4aCvaJp>)>Cjn0<jRh~wN*W&nb4}kuG@Q9e$m`8dc?sFK|to&>*
zm3PKU20L)5U?Z$sLrnqxW=L}3NE7(=bQ;Do{N_}Z5-Chi%=8Y`%>GsUMf5J&BH{YX
z0foDZ^D5Bl7l}XlOT){X&ps*7n`s|>kMJR7-`AYGQ!5~B9){$8IpCHm0f@n9fp1G&
zZ%B(MJC$9mi}khKW>?Q0X1RU0B`Y;@3{^x{iRw$+>y5t4K~*{3TcrZawEc8&hMzL(
znp9*a{m?SbDkDyA@sUtM_ztbzL4k;%v8Zf9+WVmK1BeVGN`GL9otH7bx(wST9Wyi-
z%g!z@oou9cE$o;q1*B-y*)0J%Iztpl??$O1JV*^GS*E$uYYC6nN}oypyy})|l>sx&
zT=i9Jxpjd0<o+G6;PqpO6yv}Z(qvExJNw*e2Yz~-h}E;A9`Vi?n~V`?-uVeccN0rS
zgUEI8y*~Zf_%W_MQ%s&7lmwl2$UJQHh^x2QX|1vMHqw`s6pgc!AWFkc^T{PtR?_|I
z@xno;nzi0~d@M<2H{Hj=szh+Y0HAucvIMtnAF2Epa0T|hvHgBFFAITApmp@lRk4%X
z#Q}fE-p^P{qoI~XGr70N(WpF8*7Yp~h+~vqJ-+6q736>Eg8z*+<dKp-J$?sNW2c?F
z<$0p}njtMZZzJngY_{*L-pFDc$}#>8hUcB&(v<By;{ZCUB6K!86UpKb8sk=Eqgxu4
zi^$aUQ1|f8+IZz#Y<)r$HxWAF9J?oAV#kh%{vzTfY`yv5IzjclWEv-9f48zrRkE@a
zvloBYkmDuINi4^gbClOy5DEb0+R&R~s;gWJ?kDg6iYR?O@BBqjx6+-gw+~$l!1HXz
z1oT`X2JH$!4M*i@a<MC@fsG_I{w?6aJxlap-0A?k{R8BkiZuV1mUuS{*IOyu{y~{n
z0Z2?nbl0L}>HBjX4E=QT@%JVcrMhnpMI9Rp>Cp6Eh0S$-L{~EoTvsy(^#XZMHwatK
zuA6C)E1WkdGj(!9_pH`$0c$!($747A@9=c^OuVY%8rzVnxh>eaqy%-db|1F2%}h6W
z8(Y<_(FGem-C=J8i(DwUUIyAl)+c2$1j~rd<E}RA#R#yS(Z;nLfKz!yF=wpP312`z
zz~Em%F~1mz$pPmdQY?Y<bzeZkkp<`lJ~@U4G2j7Xh8(6pQyAp^u#u_jD$m;ITDW=b
z6ABlDMg=x@NCv=v$xc8~tTP5#og)M3P)#_~@F|y7j*8OdD*yT$d)s)pA9xRQX?_IC
z9^H6=8;+Q<y@$SS&ORNcf2fo<jnWM;&R$<Y`}~Zl9VIMlg7U1rQZ-QScYy37;Uk37
zP2|LXQq4h+MQqa0_s(enGP_A!5eodGQ>6ytFuV)2Y+8g(1>Yv=?L!JtT<Vpa2o=$Q
z8V62hTM|kgH@A5`C#t4;&NNyF^00%np!Re*&v_#UC*-XsGtPXoavbRNb3jJ-EK>Cg
z=vmSikhC&TeE{8l;1SN&?)hi~-JWS?f0|e#K#*ENngK<GaKC;47R&^CA&YL=S>=2s
zYNP-DMR-$|D{D`Ih<jlrOS^ev^ul!oo;Ns-E+~i7jmzop^PgZ=)a_e*Fwq(dP4<_h
zePo^tgv*34u?E<hGc>ho3)jT1>CZnx)02%hUf_7C07af3>R@WjYz5$vYQ7KSv>dug
z{(5_uu%g_0ar;N?hZ<oYhNfjL*P8q&fMnOyKjpGWugYc$=aRr1yl|kO#~e4mmrE3H
z6kr<OzPnbxS}#-7X(~~|Wzb*OIs(YIy3YqemM%hp=0Uk=rgHS}PyBLO$VM)1_ih(5
zHt39xOF!%ZS6vyOXj+NO8~$jf8JE8eSVAR2F`C)?1tg_gY#@#`M4W1zE1^iX*T;SW
zlkve<aEUw0OqJ&QY;4tEUoMEj99TWOpbiHG-8=X~-_@M+ll~rW0B*9!0{*Nr9hzJM
zCl@YF^HWEyhjTi-U5=d%uwY5By0z+MApUW4P;oHnMgsId@mv>^cqa!DXk({5&O{a%
zC7~5g(ckdZInV~ndp91S*baG7d}^d!n8A^d_6^I`HWh}N&l|{^6bg8XYV8|=sqzf?
zrSaO@+B)+;fL7cJIQ1v_!ru^=%|~h)jvki6p7i5onY=w&lg5I`CEI1!DY?}42{%S6
z&StzS1^^Xi2aIege#Xhd@#WBS_(RZe>|>rw4uVNcWv!a5&q$XWX$-lNh~?z3(<}z!
zK<(=RW^=J)u;Ux+y06NQ8Z4X6#AYU&t^Uo5C*Mf4{&BfwOZWdZM78YxcYpr(?j66B
zum0}OKP>M3??0cDj4hZ=eq!|lO7R}e{WNp>z7C`)g$vkN%7Nit4i`eT0-C8=q~Vwq
z#yk+ml61PF?@Fjm4B}jURf=l%sJ0%mFlj?ZNkD(j30Rh~{vLvIi(HpSZmLs>{%A^4
z(Gp~-HM!?MoDp#S!&?2fZI2ZGqHN#0<gZHp&VVmLe%=fZ`yj!lXOGpFLq)j?$vPMQ
zr9=DXF!8<GIO|_eBbIq@#4~@m2?0ZYseJ;Wr7#c}r{j>+!r8`k{i#t2ic<A_ZQ&1>
z?1(dJHg41b`p<2W<&WL^Ut@eghM{4{MZ0!*LYv6C1_fE0rSz2RL@;#wd*({;EaD5O
z$$oXbT!TgKTh;VMJSVdq&5q>C8=A2frNc-N&KdrpswZYAJMxu0C5JO@u7yRkCOC3(
z9rM74p(tZNc~3=qrq*!1^OyZ+Y%BjwEi1Q@=G-&_wFKxjzz7uEgTZ4*r^O`_`_0M#
zBT$9dOkztn>Y(8bfsN8lBhdbS;&OpmqrF6mF;$MRei&0$^20LhXS4mD(#IO=El=Un
z-koPCTJMO|i{~D#D7#Pyjt8`;=C?tKTOf^p!%vr`g$d(gv*;P5YFuXK25#jHb$e-r
zT_a=FHcFXkkwOhr2*b~n-0B5>Sam$EOh8NP!k@(dOgz89)LE%6%<BEiDQx#@3pzN`
zpva{Qz^=eHkeQj8`L*t8`CsqvU(0j;H>Y|;Ma02_hLfNuYU|?DvPL}49nZ%nRW=t3
z`;BKz($QV#I=z^EI{oJ%*TLq@B8L0PKPJ%r=z%RTYv>=`C=7$Lb4J4_r?A#NF*ERw
zpBJt*q#-`7G9gP3Ft&i|`~GrYrvZ24Fbl94%|i>veS2HQKNSrB(VhJL+CTp-{+sn3
I>z9H50`1O7cmMzZ

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_10.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_10.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7a75c96dfc59c0877532eaffd6d47976e3ff59ac
GIT binary patch
literal 69625
zcmeFa2UHZ>(l6da&Kb!WMaenCfGAM}l#GB#mYjz;hyqFuii*OZAc&+x5=4?jQIRMJ
zC_|JCLmrr6=HHxi@44Z--+PC*{_m~z)}xsocK1~8-u2tnRkf?TYvaG;rvN&AZ9Q!O
z0)YTG!9M^$EsDMjb9Vs%Lqk9q001(81fT>6z|uKz;}(PgAOy=0@DG4U{ki`>MEdtK
z6G&4Y?+y_CP6Lp&Epq~-;65sF+Z+JswK<o;5*fIjE~f~bB5;bpDFUYmoFef5Bmx%h
z-cCLt0lda;K7l^Gmz~@Lef)W)B*i7gr6eV#q{OA<WfWzBzeXi3DJ`WeDXT0k&nqpX
zEG4BZEe!x9X@r0GO6WP$i2f`~W~BdKCX`VCfQ*cPl=C3}@NWL$xscNxch9Nk^8cie
zo-%!kz$pT!2%I8tiol-;{Ij<qtqeLGGSa|*+6Nv$0wMgHPeB4kOxOWpa32^up??VB
z__O>Ra{g~LV4Q^j^7rToeE|W%-^&~Ze>?fjKaM|+)c_Fucl-pP1(1=DkdhFSk&=><
zlao<UF;P=dQc|%oFw!z{vGZ_qv2$|r3W^Ky@{0;^a-LVXASx*(D=W(*q@<=Ottu`f
zD}7uEgq)n5ijs<znwnLbkCRXO#0$O^peF-4t0#o;0R;3ALV5_k1K<H$Neo7pjsr}8
zTOb64M8qVdWaJc-AVDo1$T1-y0TCfFF%cLxf<%J914Q)1417|WB#b5wr2KwN(s$Bd
zkO^F@{J?x|03|4M%Rh#k;tUHb8~fRFLgz0C%gV_sC@LvyUAnBTqpPQX-PFw7!qUpx
z(aG7x)y@5OKwwaC2rM-0?!DN!`1=nMGBO`N%6k0dY4*$ff>(t_uZ!PQRoB$Mt*d|6
z(AM73+135A=hNWO@VAlC?_=ZBGqZE^zZMpkmN&PycXs#o(Fcdee1V?gA8h@}*}w5c
z5AsDoL_|nLddwGuAOu_q>4}K>q(~SvO-LR582P2|kTG3Me^L2?TtMa;iusoR0L2+W
z+3B;J$E^L%*<WKU=6{K^KN<T6U*iB3Aq4C^LV5rS>>r*7UPe<Kf3b-i_^HgQm_D}<
zV@WAvW6<;R=GXGzockCj@$|v;m1kNs%6Byw5FA$eEUUrWKjas2nl(}uY*Y}>o!!=w
zeRaXAw=ry=u59WN{E*>K{n_Og=ZT0KlZB{1D*aGG9`U;Hei!rGPk9Wb(xs9{q@>7%
zfjoi);QW91x=488nl#x5=EUO5c{XY_7MmUA0uyB8V`K$XRKcGicz}Sg2{3zrmGIV@
z=%hh^yEx4;*9{4s*4<g@*3U4gpuWNS^6}?$-|s)-R<IYPq9YTCmTrR08Iug33EnGs
z!l(OLxOiOoPJa}eoEpGn47vOtzN{L`<DE;(CFttupR~@hiYE7?LNBjqu$4?D^9d#o
z*s{^RmWsWqv_qH@kn(haiHB(ED&KZv&zCD-*l(8WIlimX-xbOT!}OEt{fF8K&t-m!
z)04_cu;J0~HoR_}A?L^Z8x;h|Dt=gcZdC=NV~huKgIk`N9Ob`;_o>+T76-fxZU3*k
zUoU8W)e^6KeOZvq;jMaLEU8%cr(QfzHH;irj>m4-V#iYgIj8?#$!pxeFmxfO;@5CM
z-U96!mliH`9OtF8m+}G+sII}~^K(d*<e$Cn3knbLQuQlNP@JP?Q(YIA>cb$)+`kD0
z`DfOfTygl59U=w9<~;N$z-Qhv7`eGbi%G<mDBrvCVRWG>{(7xE9=PrHHuYy)sq{p=
z5{2e$_05}eJo>X8%5BtxOQog@d}0M(ylOvcjfM#4`%bODt`amG37TS_t2TN%H%3>R
z89<ztM_e7PpFAeO3QfL^jyl8`4~=_W{GoY1(~Us>N)q97dEL46(OPQW{_0RXz+(+R
zir_h7+VxLM39!L97Z%h77p{%=2zI@nl*9~Zo6>);?9#b-`z|ce&QNLnS8W1NdY#tw
zf~tW0s`pnKskP^soob|c=-sBU<!ddD7*zvVB?s?SeKscUfEbDle`QEtS<sH`ox}n>
z@X;R+;F`+tz|I8^|F<cf-8Wx8)%>3UbJwgPt6Qh(eJt~Q3~4%ZAJLo4zWE!w-AOlW
z<w_-OLSJttcg@auHk43Lz~<{!k4W|JJ<udgRzQlL>m56r&3liN$s&bcy(ln#=+2K#
zTQ6$zha1q@6(c-QaTgD~%q=%2(SBdrob;Q)d`ExX6Q@axE=6pdVwWCs?<B0&d%Dr8
zW<}2-j@UP&KEj~E?Blf-$pg|trI+nGmkU-_-rQyhPW0SN40fh5njG&mVx3LzzL9$M
zQMKNK|H5%yN>}>#^M%GH8pKRMPhBX3=hw9&*4<7&+G2FPVO_0)@EW@&?Nk5$x{o)y
zI7ARn<7|+J(K@ar=ohQva%2wfBcZ0<92(!vwr+iXJvXa@Gp5lMDAQA~oOzsjvn~+H
zrZQ8L=|2lq5LN@asr#-5XmI7Ux>ltD2C}Y%oVg;;>)xAq(P7j!$Vkul=OWjK4R1UZ
z$>6Brz3UaGxvbtPfd?2(T!x6?u0L)j(H|j%@Bsa)u#(jo$HwmpBpBwAc_q?ShB@x1
znC3@96D+vM4LFLz9(OLq{EV}~=UR+80+$+&Ys3SepSNs-*V9-D+|&MGzX=btUVx*s
zl`Q=$D^#x~>A45b4wO1WF=E=(uX&Tz=WgDF=NSy)(D{#IpYr!ns3?#9jPq%{ob(Qg
zUQ=01!~^R4EokE7*6D~)N_)|NY3Rk=FS?9T1=t*5TOkHRzHG}PQMG@dD|xJzA(>LT
zf)3wI(+@1jA{+wzoL$Pe9u{i)EHodX(glAjaZO<*72>L2(coHsZAOttOb9>*v!W?t
zKGM6A@y0OuTKFuDM++U-M3A$9jkFBF17dyfLzi9uKXkr`w$9%3mGpWoHUYoB$*7;C
zqgZ0AKQ(ef=so&%sgKxM`tn(`s0nT|5IR~P-hRTC8XfGREgnE->g?U+&a%+K0i?Tm
zWLHb^fICLnebnfj0;gwN%9u+1Yfm{?^9R~PxxODf!HlVG(V}?3qZ3486uv(<a8$v9
z#r$562I*d-{q|Z29*8pJ8Q+BW)gq4$WFgiI*LCWv9lS#BJ*hnE0hOXRw9E=Onuv=m
z#{(NZd4w1{iG#iOc%bE6>Kd$NVSEFJ%?6;gGk5^u(Sq4>`zKn8z3kqmt_nXgt6Aa7
zq74>8zaM@n9WhE9X!Fj0F#yCO+}=S1VMlpX7*C^3Ih`wbAkDc26B8{qa4hiMn1OAq
z9WD-WykVljMh@zpux`?$$a0#2)ai}U!zh=Auz#^@6z0&)&DZeJKHQLg?~vEDvUBq-
zttVZul`iNbz(yXGTPdWpwt@$i^FELZJ{iqz4F45PTip(7<;>BdW_S+4B%LO`5ce?9
zRd-L;@&U~IGp?uTD3|A84)ORcSOs6*y%;LJKKsWLOofw-BZ3QRP4FXAAe3$!Q?pRP
zeSixLrNaY#g0mAy=+|iJ7ncpn9Ii>`_tnmwli3g7KMd|`3X%m``GX%jzL>2AT{q9O
z860sLl2tn2v^O(%x;*F+c9Hz<XH$MnF5cFwzx47VKSYa}YdEdLb+qU<@Y*}Vw3_Wp
zK%o}F{k!fn++p&xhhIB<VH>ud{>abyTEdsvk*dO0LZ9BeBUGBmJ&Iq@AO{s`cg?f?
z$owzrQLC;|q(!V<QfmochpgeN6A?Px@>Ef%!^rallPjqQ3oJ)iDAtgp2owQ7Q104s
zOc1tr&Oh|(I9LT-d>XDw(sEtZ;hrh@3I3`&3(7-09(a&P#y&l+=)&VFRZYNYkmgS#
z>9<o<+b+I_bmh>ce%ip;AGq!Yi8!E2W~E~zHQNqdT1tpIJ{!;PX9NEEblwPDXg3~!
z)1r+@Ohu7yBjXy0RW`0(rB|7V1)onj0=Rc;wq}4MI(KkuUmK%#FQZfC#sqHbTs!}R
zv`Q=!<GP57L1M?3kqwm(1c~nqhyD5dmoj7SNw%uRDdv%%s~vvPU(1@Gu~wvT`DH|0
zTH(ABR}<_=<ifK~xVr*t{S7VQ?t4av1r^F9PWcbHB!t{gW}=nXgbj)VOI(|@4KnV2
zG4ImcZzbGeGA90X?9W}W1o7<xZM!`bV<@6>C7Z9%=GP@wO~jKhmuRtr0h%hIqn-2H
zj$BSxm9cHnN@6-1P4B<?xWsKuo);It^@OKJ{VG!MW}*DuCn+=mADMo0J8UrEP=CSr
z+L=Tk@&eJ$woFonF7!R7c?VeGV#oEhS1uKqCVl0GiU+0T<Vo;`(a=Z@2W{v!aE%R0
zl+9&N^wq<-0zw7u{#Gpbjw3EOv-?Uvv3sN1n|BO3`sdO$DRIyE4;<Ne<;hiVI;A|v
zSeO)&iv@n<ZLCjTxZM6*+FLkd&sr~VW7`pwf(P9gttFit9_)@0W-;<Sq4MC$=Y~-$
z`On*z=qcw7z@eH7fnO#PRhF#Wj!~yQZzS4;3uwcfUsZceFn@gA{x0mqOc4F<#4=4I
zdsTV0w-RXf7lp!jz{LiezfLm@@wtr$0;5d5?AKF2MN^j?4IG%%!W*E%fa9C>)Oq`+
zMKx;ZA0)u<G|9&##*p7><SX`d<~7))=5DMIlKPB``3<oh$ZRZ)re570>i-vy)7jpp
zsVko;xVoukLgBM4OvnXxgR@w0A%jL~x0FMq<jLyGWLHn&DTb0<gi%+tXk^0*E}R|@
zK<rWZYoqqt-@)ra0DB(51dU?%p!=T=kIw(ShzDW;J+A=OI^(Ww21@QvGFyn-!{3qH
z-|zt6(hk-Q*qR{S?>mq{8=JH&l%%%8V*w5h#AM5yS`;Fdg98`#M+tDglvJnu;p`K(
zHSL*b*Y$m_sGJt|<v&LbzYiZSNMPf@E4m^G5A<ihQ`FNnv*MIa(H;jf;TW+4la`|_
z>}vpt`y6HJg9n^jgaOM9h7nj{`dMLGKI6=fYVp>f8YgEi!ft|wx77s?Pyo<r{%WwR
z6%c4DJTR?;6*Na|cWr~5KLgpt1NRb!aIi`ElngwU2bFFO@8jshlK7zVD1uo^!b^}B
z(>}Al%ObHlo1EsLa}_2e>`ST^F#cZoUsbaK2t_e;<tKdST)BHQCer9&CYs@RSVGb=
znR>Lb=sx_rtUz3baBAzgo4$Kk?<LIF`nBDS)Z5Fi|2}pSURE%4F6#AvT`@1d#r8Id
zrg0>&nGYj)Mk3r*)+>Opm%pfQE|%h|-y+9O9t8^asm&(!?4YR_zYYf^ozEdx1ts$G
zmYE)f4WS~JA>gt!r1o>Zv!ha$LHIs49`i~1>Q%)BwJMQk9)Xic*&HKmt<Pp25k?sv
zW{){U&gzT}VFp1d7{LSVfcjU&<YOGJ4*kueB$1MzxcB>mrv}1}b7u(4Nka1|^34W*
z3iuUYf|&48`)g9@5N&rGL+Mwpi5KTfmxz}@sC>!ZFT*z~!hn{&K-^Bt{%%WGYIV{6
zD^RDF;beix9B7lo(UH$){_4`v5`1F?o($Ml+GahzsY2h3j7yV;hAs_9LF4t87yk|p
ze|=rTGHHx6Ek}}c?yen1zh?M~%P%pUr8Da-MNcQrAX35v;0=y-P>|I_vu5qbos^M@
zApu3X3Js>mI4ytpXagQtG`vw#+qHKz9BE`l3p!95<PEd9(96dP8QjtamG=A}CIa9n
zhb}y{-$&trO}QL+L5}t>ws{VAUJY__1;X#xcX?E>8k_SAl`WX-pI&G@$n`8ortN8_
zm_V&-COOVosus&@aaD}jUV0~<2ap7Z5oQCEBvSy9$xZl>79QvrT(1Wu(gcnKt#d~|
z<iBf#T@`Xqq&q!$%}q}r4T#8mUvc#(c*VR1O5?JY8jas>+V;%p0NJpFa|88{G-wo@
z9tDinH30z`=&mENb$W+8zv5;tF^D418@slqc^&A|(~!PaS`De8rQ^CC{@t2v#>1T=
zBIDp$oNmt)0Q;)6qQUX-k8v#VS<qV<M(nP)95Px+S}AZcFGs4mK@=+X;}^7daVYE}
zFxfK6lfyBJY2VJH(NpE3(({a)LGD{ZE&&&KX`dt=tt-h@zHk@G*A%_z_S5zbgic(l
zdl*p$c=*_F(0<Jf!vnCWK;ZX{<dvi8gMQ*8`Ac!s7d6>+I|U41!EPA<<V#7pIq!DM
z%9MLnZ*EQg(mPTJw0A)ATyy(Z<*WJAVZ@|>&)hATPmS_|5YG)x9O>Y@9$?$%&VH&-
z7~nmy2o;u3)daO_O&IN`<oy09Q()BWClBF9co=)|@vnfS8tJ_D?<M()JnZXna8u)q
z=iUKJv|}(PBI)_;mz|{*oS&`jzc_h>XXBJ+r9LGrhLunyER^l&t&3fA2&LHIh^Av8
zE!s3DT4>)6Zc6eGYYhv=2^*nxetpCPhN%@;lp1v74L};da1apA<B6NvUcs8e+n`XM
zrOKU!ebRFHBzT2~r94*4&=MMV^mQ87c~pni!|m5z?`T09VWtivBi(?GrX00qEu8GF
zt6vfOpkK=lND1v{;trs*+Jd+(I8q1;IzIF1Q0MpNp1*|$wEN+zF-^_j0EFTnI_Gz-
zOYzDXdPrrHaqVKjBcZq9#>97(Q>QXpQ0&*GW3+n|@xaZ)nP|p7`*l4$5W#c9Sm>Hg
zkB}mcP=hUdWF;{?D1nWwamsK0d1`u#a1Tao6noeQy5PM4k8?veHmtAvNJcg?YXuhx
z>f>hW<Gr9`ZPl`IS7*!{ybvxwL+sI(pC_<YH4xmFzeRrcacs_0z!b<@8C9iG>;5ON
z@(*jP>5K=sjpyisQ^-02qvHWHk$)t*WpaN9x{#gm$B;y(3_2%Q98o_b@$3_!D$fi~
zqWhKGzgkRY{jWdOJ6WWuiVRm(91CH2lq5vZP9gY#8**0jj-F89b38Dlz1vp?a9_j&
zVMZAA<h$KGjl8=JSD{P!pu&<W8<y2zqv61;!tY^*u=j8~i2cnbsTB=K-9@sW640Fi
za6|Ks(WYMCku0cc2|wOIWM73Z<%cbdr1HU=gm^%TfE5iUQReGb!w-WxbcBLuw{i<m
zaN3ByGW~zCFa*U`hC<&#H%uoDoAdN4Y+n`v%@M+4dJZbkg9laczD7hg9!Lg+{=Rcy
z0qiUFttbyr+jeJN+(cCOw1-u^*e^O5g!k3_LChV9*j^L*)iSGde3TwoRX@Q$>iV;)
zeqt_l9~rA}Ofjz_N{cfEBZ|;rwCxqs)(`*T03Dais<WY$?N2RKwEd@WHDLQ#u)CmJ
zWwHP&Shef8Lnerw*VQ&8JSUVWoWv$D+A#*6CWLxDWIZ?ul?tM~7EL1dE9vF}0vpsK
zph2G#pcg8(w=7>2D%#Ryr$L%q%LvQYwb3hY{vJ2Uajz~ELL8$0C17d00%{G|qxEGS
zeEXZQ=Ubhm{A%Oq=6KNTBM(E&x4NoyR6Ua*=2ViG0hO<bxUiyQcl9Nxev9!yBn8k{
znburgjGa$oeTzd$@<=FhSsaAymi?=JCwMc~fpCnMDhy7-<{$cLFdmqIBOzH(nlE#k
z!nb>&8|fOXVUO>v?yB^-W`zzoC*`-@k<T#63=w%gof_dQ%#liL*%u-@^Xr$Ce8k6L
zZL?-~)}ZVCU+_R-KmzbOipO0<Fnlh(J))eAe~!At>mcfe)6oy}$5{S;F7<Gi#(MX&
z?^5Ras?<Jhu1?&k^M(dDHMtQKVR^HCbxd>p8t3aoy=gzQ?+EK)tFEIFo<3vlD9i63
zZbvt?-67PA>E`OmBgekR0}I)3B!8+f^xzj_|06guW&Pw~{o?yY9ufDgR?_*^_RJkM
zDErW#L-*gqs=~=HZI3cU4aqpPA<?Nv+0p05z(ASW6llu9kVrzl^8$ANE)I*p2}e^u
zMr=>gVz%JuH}ZDyJ{WWZ)Jik2At`mwW{$9&b=WQS_@g&bpZg9rNRJMnSTle_0=m%1
zvp)ygSyNBYWy?juk(qd)(-=~<jSK$(Y7rwmz^wgGN0<zmj9P!Y5+~jL=^%^$>4A~I
zE_7kM_D?5DWD;q!w1<lvj!~==KBES?1soY_aaTBsP1UAMTbmn6X)Jz@_NOgmH6~a(
ztSu~bI`eg|3&Pi5EF2KW#(DSd-PPM`;2A$n6F(I2ETT}Nmj}njmY8dBe;>>aa$09R
z6O#!?79M++>cfa1&u~WyXp_AH+#GU$siiHNbziR;n&ks3S=EVt%yrz~NsJ+X(xlc3
z?VkhDGsH0gk;Tw=3P*=NpcAfl)YGyt1Uk>gL(kP~-=u|iZe>iRK5qD}^)RI+_V%C3
z>-z92Nxth`XdyqBpg9Exv8CkH?vmBGPwZOF=i2%Nch=SO)6oZ2OpI%v7B=%Jt<96N
zoichB8ozE92wOtl%8Hu7R$|urn%w&###Zp|hpG$noC~rLwd3IoeG4-2Xaf$$bc_G+
z%L%1FLnp$DzUqFlm&oSek=PHb*u}0~{X@xu@gR;w=XwgZdE1ului8d)w`o^Dwk$Lu
zaYQZXe#p^N7VJLgj<-GrO%t@5AMD+IIGO<u%z|+mC;RQm4KN08k9Bt(dij7rv`&#r
zMg_5=K`U>9IPma87WCp|BNJ2?dh0<lv1cu_8bDPNUBpBRbg3Gi%yTO>b@GQ_>VY?M
zR)frwU-8Rh&cGuKJXvMcJtLy$pbz`j`&b|HI9^%!g23Sc1$4jYEYH?p6mo0qeUvPK
zxRpv2TE_{hco$H`lYEjvO#TQ!9C-dbHlu-O(k6OD)8iyA5RW~nRAlA4{bwkK7C#0)
z`7E{Q78vNOEFs|?AnxGgbIV!W(Z@>y=HM$KB`zres6t`B4vrqqfxNezUERIagf`x{
z2=Tf*sR>!g8A=-ZYB{^P>xTI|Uk|%#>KNwXsN^IBRi~y<g(<_le7&3l9e810p56h<
zFg5<)YgY!#$Gau?)m8nST$Ha}(*7F-xTVJbx3+|Ygoua8i2L}vN=PXwDM?65OGrzL
zffQl^q27TGFfs1{fxlI7$vME$-`zLR-N&2vxI%|pK0$$M{6RtPPRcG0E^@b=oaDrA
zImkGPNl7_5h&edPNQpVg$T%s;NhwOZNZsQ9djq|E|2+MFgaSMt$da;_zq14Q$d;*(
zkEi-ydP+-G;@=PZM?KD$90Hxyk8h%iNy>^z%bQC7?Z&FOjJ(1>9ry>Gs>Csa|HQER
zAA3Ly!P1|IgYfhIA6QOLaw3pV;W~xuL<pRS_*A-1;W`llCn7$Tu2Z;9gusc2Po?V=
zt`i|}BH~l&I)&>*2%L!cRJu;#IuQaVB0iO_Q@Boqz=?=YrRx-~6CrRS;#285h3iBJ
zoQU{Tx=!Ib5dtS7K9#OhxK4z?iHJ|7>lCgNA#ft%Q|UT|>qH2gi1<{xPT@Kc0w*Fq
zm9A5`PK3aTh)<>K6s{8?a3bPU={kk$L<pRS_*A-1;W`llCn7$Tu2Z;9gusc2Po?Ys
zI9&gDYs1+ad{rX^d?(}goN0sv1jiq6Ap$?dWXIXZh$%=(Nyw-usHi9@C@HCF80e^J
z=xHb^>CVv6GcqwVGgHyBu%2OJWnf}v`aNeFA$SZCF*z|YITJM{HPeZg-*cuBctKhS
zA^fK~(}3Fl>vN_ZU4NMXzGL;5)?ZO)Js#(>p+jbJ>^i?m1RjYmFOvijYZYAiq^lOn
z1Q{~1v%;uG=)XfhK(S1NFXaXGUL4zvw7FQ*-1Ib9yU;b(3e9=GN7WA<SfOx)0F!DZ
z+N6$Ie1OyAggy55EDpcy%jY`DC)q6ry{}JDoQ`2~N#DDqe(742xu7xWP@qK?Ok(dL
z8a}(G&@q~AfB$k7+6Ag$8E;WtKU6f-pRe55|3il}A=WHqCXeeL!EM_3mRg?n)CBc&
zcQyNiCQ{dIN?aOiYKLn|n$#+IjZ2-Qe_V=jhCG}npIUg)3xs`R-lA=VPJC`tE@~H}
zu)jYk8aLV*fV3U6|8e=%Rnam*h3$3^u8zfA!}qf8^c^Yhs7ie~gjPb*-#Sb84oeV*
zM7SakjEQzE60C6FTgZJH^=KZDCT8iCOV}9JwRov7$&QuZ5%_2@l4Mi!l*o(0Uz=3b
z48e^X;i1B@qaPciCg05{*wcR(H3};D?mlWUYEp2Y%ZXl-QRduW!Eda#tqkPG8fpm%
z3yqAk$&51!v4Kbin2%|^MgQ*kOCxn)#)Reh>*-zEnnav25Yn<h!dHM8({XZ;{|&1j
zob8OBIz4*YQSe>=#-oc#Zw=o^lg_4m8^7ko<<|>DIAUaQolqjI9!41}KO%<*;<D8y
z-$zcbh&-Dpc{0b?*h?Xs6iOuT59?N{;6QuK8tP1^t`=eB#`}&QPbG5X7+J?{bZ7sH
zlW3V{ckm0E7uCm@lY1*X=}6I_;D`~~>2~|TxxH}(qiUnS%3;J2?#SAuruLd}`*GXQ
z7_55+-tF+_C%SHy=1wtIsZJA?eIqQgtD2%$Sb1rQQ%c~?hH#R?nIO+I%!3j1==1<+
zS4G^R(A{P*<3QUEs=71yvGGbam7_T-r@mHDcYolem-q*Mr-uaoq)J4QsDTnH`gEk)
zQn2{yY&L*B9EgAl!clsCEY`2?D9B8BRLk=Jcq*8Qv0m_?Ps*&d;AD)hS0rd2+&q_A
z3`?ArFL|}J{VdWNb17?ZGL$u@x(8x<E7HFmVPfl)KuO2q_f_3<wn$(p2G0J}RJuZ|
zA7f0qpk28s)0WGBbg#6Tcq!X#7;$Dovl6v{<r69T%(Y|v(1=}j^}T$lhj0zf@8**Q
z-t7I2=yB0Q_?kOL!!VM=K<PJB<1L)-eMYa!hq|V-q`%C*lH)jcn0G#Pl{}|%n#)~S
zS<!j;tj>!K2VY=<Y`3g(+&Xk!MZOx>^Vw5Kv>pXw`-_c(JgVxik+KRPw3h5I)5N4Z
znz0fRj_3UE5wgDtF1fuDEMTwAhXIz~MK2T`WFtX{RxtTigQP=`FRwFKSBCS*R?@rn
zK9s!AV4`azCG-4wB$vG?%wjKclcpLaG`oLi_rAr&_nU|WRfC|yFN1>7A3|j~+$TOe
zemJlpr6MKr)8J4RTJtH^Z9X#+p@;f>1BJK~VHV`$T!79vTp!!}?5k0WF{_fw7;`?W
zaqGqfXX-koqGe=l)|>Isq-SkN%H0iiw8w|>%q8o|SB17VWrIJo(;Gi~IraWBnlF75
zRz6!VzG@MTHrflyG=7V+h$}dHULLqXKW1w!bYE3-hL)~J{sE`tBX@Bz29aMGuD23;
zIJ93_Mk9Sk>bi~YM)7X)BsGSF`=A_e)S@4U%6^_Ux{si#eK<IkA+I5ZGzkddAa@jC
zVC1|Lec;y#gqQ7iXwt3`&xjZ@>TaHUH)E+v>-whi#oL0ck?URi0j0P39#b-Zo%)dT
z@j!#f$vh;!<+cwKGQzcAo!x_nd{>RqX1Hvw<xy9yW7yl^`+f^w`2s5SR4fz)cc^1T
z9yOpUIyV5U-bh4<!*o41d`1Kw+h7;Zcy^esz1YL#=NA`J@%P4T5hK&md2gPhIKXQf
z-Pfufp)P>FJj>B{2K{zTXwxEDj$@5Cw5skShm%FhiVsCbWmn=>-cp7huz2pM#>Izb
znOk|NnR6w(qRmh}^cJGad|&>+?PXfG+z<Ki#xb#hhdDo_fV2Xt(ZOJr{@cOw`&rdz
zJ8L^UM6YXqcr*|#+?tXXLmlM<bKNs*Mu(%8XQzLTwyJD!z`lN)ud3X<4vU>e?H-kU
znh+OH_hHa&)mE9dOqnSnUFeNUfEis%ZC7U**n!=OBdftEdNGLHY!kHXKTpQ?G$zTf
zJ~O2t*NxCT;rP|`|BY+*Knb0052?Ubr*Z<|n)SZA##IRqKr4Q^N7U~0g8SSrD}DT`
zP0gWW7q(DbWTHW3nJ!bS@x$L&8DU?h{+F!!U$g!NyRQoJXt_Zf<K~e_%LaSaeyG##
z!wP$ij#^jY_<VG>rlyWjt$@XbWsRj$*N^Lqm5NkG=xg}&OSz3hemki8%!-Bp9=Q7G
z?o`XOneMLDXup{iPp3f8OMvSxE!lS4<uUG*D~<X+D)s+bAzGSbk9JEA$K6lupv98+
z!w=kbu(I0V>+F|wW=L^Fi%@J$S-Bd^7L(s%OIq<^`>N4f7m0tT$j7k@*n!z31vUx6
zSG)sxaMtFd+=&MiiaYinO?KnIt5r+=t$k2BC`w2Z56r*fK@G6)AJOyN=Rq^(;eoBe
zxAn)=-7j$$Q%lu9tUU!YA5zm-zz68jcksX+Wm@p9@bjQ?yFc2pcps|)zGj|7iU+bD
z)gB*_a@@j5hdo-)FDGCpe}(q9aF<(np#72ua_3SMFMR6iT`+&6zDHF5IL-)bWzR!H
z`+2VnsJpVmUKke6{$q9k&Zn>J#mlT_!O86&>Ck-F4B?oL>RYo4ot2<oQpTmCJZ6Z0
zWxg5Ligfi`Sako@E=-!d_qLGL*^-lvtdGA2OiA^7NBsi`f1YY_==3ujlP`4c`d~Qi
z!FEEEQ0irDEY!PS`Ag@2W%(ie-`hKW+%BDC#MOVpP)^9O{FnX!^H=;km?9p>C*cf}
zm^%-4EBDxwmVfV&)PIu<n;XZ(zn^P_=jPu3Lfc6He-a+47Eoptd59Zf<eV28`(<+5
zaTjyKHhcEUCXXfI{R7)~imLRvEemz8Bue!U`QG#uk6_g|U!``WLSR0f4Nf6+OAUL^
zyxxp=2fwKnz1~rk=UWi{;f51|c0f;gOk@6@5gy32LaX3`Ti(fdpg?l8gks%>>(;<!
zrq=T>!AxI&alJq-Cij|fp@kR^^jM-nSKTqF0+)YyIfS_#ErHYT?xw|Xl)-m-21V*C
z<m);@ch%*Y7oas3o$fetEc>QVQkkgZu=vj63|+ggetWZ32D3Za96y`FTje*oj~2fz
zAb3<Q!lw|ihba}Lu`FG|zMt8b*_AGCh~1Sg`^@jjf3dRq?c{X$h{=!4ubHOkrKz3%
zM!j?IrV$D3#B0Ok#qMF}()Nrzh?3q6S@$i}KRI`VRAggbMAF0EB;t`2L2flIOt9<;
z!{NC)1?Vk$J=q>QMTQ>E+g-9k$-P?((PPY;b&0jKF^6JhS#=m0bi!8qO!EA0^-?Ye
z>c^-ywdR-mf+_1g&6Y1Mv0b-5J!##ka7OiYKq(_Ak6tv(^4!m^t&6P)T3W3Y6EXef
zM0ff1cOAF%P+z_NMrqN+nt<&&n%(y?o`XI4$b2+nT5q&n!lz>VM*ECQ(13F+cDPxD
zQTGEI?4wx#+N3PP1XU4VQTy3Y%PMAY;o=bVZSDEF9*i8~tLiVnDX9xf1KX)ch>)tA
zM#ke9&<oa8^{CNkiy8I0*JB*-U1HQkt~931i(<b4PXt5*J^~^%8f8A5Sd9Vov#|EP
z$0HF6;YY;<w6WAaG_M#W>*}3tzSM{teT`nX{iORNBOq3`DSKVqG*apnZm5M)E?J_(
zx~wR<rl+Qb=1EJf1lOb@uXL7fYkHb!>{Ba^q{<&;v#)N$IA$yEsPkJbk1EqX5cAF-
z$ox8dZC<c8Yh2a!OZr(;r;8B2gf%{+UOW0tEyHS5%Ln^0q-E}h_`9lN#fuGb)l|M&
zHa324i<v#FY+HjC$<KI$ehz0^+SfczXjIw6w6-vnsx!MA%pm9{&@Mmh(A8SWu6kaB
zdu;YbR*dSNac7@4{-)cdzOMV~;}ie<#R>x4fISU3{_J@P3+Xfuiz^nEO>4cTOSJs%
z2dXCFrIX1v;~AXPN9M)WIYN6H_ptT!O_dpzHUk5rWD9$G@6ugZt%{gPdox>gjG^-R
zSs$6T3XT#R4U?yAtge10zfG}YGJ`jsDfKCRg<C(pd?_pAyS~}lF0STb$;8Uh?K3l~
zLwnBPwdainRo1OsFgHwrpT#SSY}}WYEiWVmboKfmd?!VC=S}fCt?wIk-nwD&8ZeVC
z|GEgZolQu}*IzR{a(qu8#pn)X*?wXrj1Dasey8F$G5B(i5+Sh6&3BYnic@rl)4->1
z!Y&sy_q&W*TmzE=-qv_=D~I0=+fu`zCo!s4GNiM1VX7jo^71tDJ~wo5z0v0887}h`
zqpB<wH)pTTE6*-HAZ2Au?@~^??C9|X)X`h0=D}-PQyeJ$)MwAW`*m5U>1`nNQ4-7%
z*B{%V5M65G60D1G>h*KOwTYUj1+9JtFIV%YkJ1c;o>&!l+^G{yzbV4lNT0o^r^miH
zu!~G>aZ8Qop`5AB?3ffEs_AT~W>2nk31K-i|BbNA#IpX!N-NRx%rwXwoZ!)`W>&P0
zMeX1&oYl%|@`e_3*N7fj99y(hj}(PD)tqDRh4wQa2pUPJ^v7<uus~f)-l;y#9h%zl
z|8Rl#fMLBxTO}2k`UpKk#Gt2aM~P0KmWWY5j~<=wdjKXZ-D{aK=)E>F=%L4L7-E>s
znnC83!tqu3%|NIlAFHHL=99TA`k*7bat(ce!sw!Ca%K%$n8q$omtEE5ie0ySR;NUu
z=-73@`p7YsIGXhC$h%aosNr1Nfo2kP$&66ku8lpNR{mwdTXa=?7|V;-$ggudiQ4!e
zO}gbaH|~sH2Q%`uBgeu!A9IVq3DEvS+QX|x4_*!BH@+RJaDFHo74n3lOHkvJpsG0S
zPr*?ywO1{P>O8n1b-sZ{$6~B@_DVaYLrA&J;>cdM*II}9HnXMRo%^}Evnp1VLpO<L
zO2!H-gmuOc)L1`<O@R;WLicMh_i`t3!1`thFHI2xv4wFvb(%sD^AOc+ZqrI4T4a`s
zr3*>XC{^ml{*b*qW5l;hD7IbOnwQ~{cfdPK`sDhV_ZqKA1|@Y!D%}OnFda*^`vr@-
z(Zg?AGa7XwQDBCoEf0HA_^RPR%T65o?(Hqa7!+sdF|~2@pg{qb)^1m3SG-sj1A4m-
zmygrT|HaktcF;*Ijjt)X?arO|gdA}kTt_m?@FFG?OSZI5M=u}Q>a&d(1iSZ3D}gP<
zUj0D}d*Dc=QLRg|3)zh3uV|!XD|KH4AhXozk6>vuEJmwLha|`~@qe{hyV`D8pFPfM
zCl_(gXOi%OX`6osW9EDs<cCO)Iy-K_`e#e)Mr_^q=-t}2kgnWwm31k7Mm6$<hK8{P
z{(3MchEn5;tsCu+ZrX6@=+^}&iv>o~pM7I1l(8xL0%tEadiOgdEo@&_`}Lb#dy0};
zNvYViI}=<@Y9+rU5|A}#_wVCKu%1J+gF$@&Y`e``F?$)*28^N?VTqcujG7_>#q??V
z&*uW!<i5dp_8uy**ilq&3f+w`m~K+&<TlHCFeB!4v42@X8!F|Pe$70K<&hQ>!+CK@
z--2}0nN{)YKh4!jev|kl?N^ijQJ|1SQL#_evES6!`hyp3CY6*RTVK{2c~_;cQYpG$
z!$-kPU)0U2v=Fq=%$drW@G{2^1+5wL$~?C)PnB}<^(Q}>W4``SIS){6KB9O7o3bdx
zs-wqo=Qc|xX*OBz)xrtU#h!SermSYocIuIYnAW|JIGA8y@Ws!~&_~hEyo2+%{3@4B
ztlNnOdpUJ8^))jt8?${>B2K-YMAIDeDlanNvg@4gwF;k#-GWzkNc$DjI>KJ4i~WK8
zMl8?Xg(c{r4s)1UXvOMhJaFzODj(OW2IltCovF}&gXmDGH@VDAl%U~|{C$?A@*^jw
zYk!*Jl5wIU@#h<qu#xG<j=FRtB8x^vgbABQ<CqzXgeX?5Sj}*Rh(l07q+jXMUd9cZ
zriN0JrZ4)&2fZ;))bj+8@f%X#NtTC4o0XSGl)n|l*VxBR3Tz5JfLGrft`%Vws)-Ym
z)%Q*72@ovdxT4D|d}h(;qKJX}i&y2TwyC0@m`BuoOUcbRQ(#;qSHaNZCY%Y&y!msz
zY372e+@{Yp)Z~L1l5L^Eikgobzp8H~l#9-X26)qaSrh(Lkdeorw{4VUU(>zU*N%v_
zlS8el^ADNe_{W#n%kO$O+vRQPaQz6dtSVXc?_*DXe_!R%1MBsTW_4SSY>AXcu{SV#
z8x(1BX@pErnbnxR9^tbmXXNdbCp|*B$h9e02`F-!i}(#`AMP3fFiKr?z?2$Go3*N-
z+o4&8Wm`4Zmx0$G#C@eL=bzol(CZAO)8s0>z;vE3ndNpVTzp_rYUt3S&yM&g?_58Q
z9?LQKSZ@T!7ooAq6YIGv;OHnA6`uDtvG;v_ea<)Lkh-%eR!;&(nxND6o$q-POm2h2
z#IcY`?$I5CO7~K?u#BQ{&+p~)y!q0)DX**q&4oYt%jjDy3k3#mYNy@2lt{;QMvj71
zNs(~!IbaM-Q7CPUB*YLr3Xg=BofY0Y!~@p#5xO4!b*o^^_*^Y21lRGC1C$Gg3iFB!
zN1!d98)>G&1K*$J!<VS!mcK3MCVU4yVG<@2Fe~GKvuX)c?JxVvM3dIdxPO_uAkwox
z`_^NAV|=BnVIu}776Ga6-)YjaZFpuk%x8P1Yw0E?p!Qw~`+F54GPb+b^7{AAY6?7Q
z)WPW3uGm4)6VomCCyC#o-qub-shUf|3zxD{IYE@<d5>wmg44A>&UNxLIC5Maq&ZKx
z9IIp;kSi>oR>J)rez<@K$__^R{G$fKYLtuOBj`~Zt^Vk~8>#JYmA}5=TCd46KlCd7
z{FW(We&9>B()DP@Uxb2Xu+c7w@x*KBOjN4QrdUO6P#+j9VL>+)EGjfmg$Vn2JJ)5g
zosHEMjt|ZJOktVt8v>^H!!%?y(tbeJ<klk@%0VgRX<b2W7~3-V&|Ce~DeW${{$bU&
zm>D1Rfa;pIt-s_aZC%F?<+peyWNZ`L%z5Z*jbcZ$bv~>tIgHJO*1F~HQM$jPZx5A=
zDV)EzuC16>Pen7Rlt=q|S*-W{gjyI}|6rjLQA?TtpLT?ew&g<5emu$ZC{JJcieDa^
zmOoW`DEg{K2pONIX3Y{O-L|A7FZbx;VQmZ##h6EM4ArF4*pH(fZ}f}rQkE$g4--E}
zD2(09xH0kMGwEAB+K+mZUo^%oWFfm@J50$JBfs?(g|SDghokJVk5W^3_7tqaSU9x6
z!tVw&1{9Cd2wyx<*)WO+Opf^9Cv1B}=nxwvfg;lJ<-_J)YET7?r6bHN4eHrEv@VuM
z)~fJS(6gk^=gHl^x>pgY)O8)&=<6s)?Iu)T>JF6LjXiWA!mJ2(8@3i?H6J%ZHuaQS
zNvr9F10TOU)kW+NU-gmJoo+l(%CmUELZtuEQvYmT_tu+he&XWYAB;idE%s>F*l3bb
z7`p*aRg3BH^MNX2I>ySeKfe_3J#^02k}a*OjKtZfhuZ1r?~2*r7>!aakW{K-K`4wB
z>M>TpdM)mK#qEap%sma!Vd+jcvmQjRKf!O5_r7X}MQZDL>nY7X-|qGS)60hR*#-0|
z%^{ZI%H<)$-mV?o0!O(K*Jo&nm+LB~OOEtyWoEvX&G@XU*z8rl`zAYPI~r8_X|w7h
zJ@#X(!*jZ@bX#OABJR+%g)OrqTcWLx4rZh~B-`U)>(OQX^T+<+4ZXTRV;j8?>H9Il
znS5*?Sr<i?p*J%0T9uTRh7S6pZW2aJQ90lok`_4kUi4(l`x#m5L#;)mFaGnz?cw{E
zy=$yicr25Nu|O9%^Fk`=in=sfB-|@+>6(Y~qqnaLmqi>PW*N)pCHOKfU+fT(G>anV
z68Y2^z>^r^ylK61ZPVwTx{y7L9ivy3**-25TY(3vu}^bs)=ZYQMebKhJTWG5rwR{<
z@mN7b#SaH9C6IpJkGei8^5Jk{q*(9e%xVP3bVV#U0Y@VT3f@L%ja6yDjrB)~>6Q0P
zqrI8HEXwt-KA%Ym@v1r}Xli8@|CMCQ&+c)#TkZw){A{mJ7aSG0!VLx$;l#_?>IkoL
zL6ga4i#q+-x2+lK{F>~5o>F=b7j>jz<;69!7w`rq#}}-JHxXZk5@k5ET-5XgGeU(B
zc17EAQ3J^z(H_^(gvIXmsKQ3u86P5aYG;`3<Ms&&s@sP~@p2c+g4C3MNr;UYRvJ=X
zjZMtw^(G_D$>$!$h+)-I+Xi8M_NlEQ&BV2%6})HqSFD@9z9>d&QF%U)>w6~qf+L{*
z^7Xbfaz&D+g?CE4xurC;qw38^Zo93Ni1j46_^xzjy0`_E*DRvDj4A4&DZ@YelkGEF
zHji4I@W6pw#od;@bBoABE$Xn^P^15#y-jz_KY*U3<p1FQBVnhyOoYm4gw<xrR7z8!
z75PpJ_3~ltNU^VTa>V`lp<l5yBR9VqR8@SUft+hxnYH);MkiakklaDkB04sS_DooX
zM2*28H`);H&Gv7-@2`7SFm!H9QXW_gj#nheMzUBA$$maEAMC#N%^9{T<}GBJvG$Ep
zx}#wJzL}^7*Bc)?k8Gc~K%1Wy7EN_Sf$tTqXMSyMT~1*YFt*AWyARPILO5vf2)P~d
zBsEiGT_UyGl+|-)bFQ1CT#}t<Bq(&sH634nH*l-cVIB;<m)|x~pv_lBG3U~_FwJu(
zB52dG_3#^V&ns!Xw{DG;=e``%(E9b&{t4?|uf41|J&Mo=JXzYN-nWX9el=I4<53pV
zKHZeP9M?AGV+&Uf6UH_whZ&~x>%%Xa=2W#U_i;OsvR!yg+7yyYi@F*+#Y1HY3vX{Q
zEPU|tdzV06W##?LOj?4d{ANADUBQg2p?S0n=yxc4OLY3?g26ZI@14_1s+480U_4;F
zZ7h4uwh^MCeY1Ps;+94?tB|TiZv*Wry!1PaM-wHAsBVc}Z!~d1Esff#f0NTK99Vdp
z939?LEx6)8sLXp#EP?R(0zr$mKH>8n^_^sf${seT{m{~y1`lggKayCkg$upkAp_1h
zKv3EUD`SGA647<-%3qYlUpd^5-l{m45tR1O*UCv_<!+se$L;QEi%uUR7;+D`N%P_+
zl0Ha+*3BY{d-G~OO%aoS>nIPH)7gL|vhZfJY4q%ZCt-@miV6GcR@Md8uz~pshtwh_
zU>=j7bS-Q!nh78w76T%BLlqJuC^rLUEFPg|?Kin9?$_QAid5d$YZ+t~1Y<5&Yto*q
zOT2i<lzoOO*pVt;b>uPj+@z9fY7Do6KppmqEG^F3qHu*>K2@O$PPx2&JL+naN8a+t
zNP{=&#@eg;A?wg;JaA>45}ZTe?e0XyPfl~1jLi2kk7RdU`w*yX^a}1iJOzq!$b)Zv
zO+_0<fOMCj-8^W?e&~U#{lzGzD5j<)y*(54a4--s+HJq1h4sS&cd5Zxc2*#~&11IW
zkPinY#r_Zn2BJSJA2OoyQTg9H)G|(TK+&pk_<4jHc+>OW>w`|qZH^v+Q>}2qr`o}|
z_IyqxIobq$ZBL5UnQ@`z-y5uhJ=d%Z#@cyiQ?aaWJlN`r7f=c)g<6u<*rkIF9HTuK
ze1LYd8%G7=W;J4ws4fZ1Y{jvRsA`o(uy1zeW)@f(zP9@jS)t{}ME4>?PqHE1u~ze}
znVw>6MIoGG+i2W;W_)xn1pP|x=tWayYTJ0ek=L&SZCGbPY?|V{9vj=*NOnHmo{6gC
z6c$O}%!56PGK$^hiHVYWW;`IP!X-UpU~8c1maXeGsBqrq8d0&ZuP!O;l!YxgQA1Hm
z%VIow77viHgTZ^zl||Nr<g1!f{>tod^@qw*j(dJ5!SvPYEr}CCB=DC3jp`(=da#ZB
zceL~W4dKy#L@ZCNgd+&ZNt%8vKM##^!Mg$f-ubTRyE8=zSfS}7Bb#9y1$voF<w{+~
z*3xW4|1ak_IivuQp>DOL*)4k0jH^mB%@466>FBc1@+CM`ghS}Giv3$yAyTLDncf{!
zg=etelwu`eN@=_cBZx{1mMUASjj@fk!=gJl2v!dDtIlgr%vA?9@~mibA~aFxk*axR
zH^28av)?{xo2L1FkXvNB`^zMC{%Too+yCU=8|_2TO{kAB3EC{#3#mhSj>zZoX4-u_
zGrnMaua~+ZN8UlSjMaJaGf(#N`?wiEE^!u~%&m%cEyTK@Xp_Bnc-U`;8AXdXI=oyV
z)Z5~1sw2>Kj#(d8xiG<N)S%%M^|UvvZw-Rd>9A*N)SIHEhuL@fqAjdZ!7u!ZHRXCA
zywgjZXuK#78Qu4)7nz!Sag$6@?&D<C7jD7LF2tsm`6e>4W&+6tYi~E4Ht@(1xe61N
z^<b!CwwwvI(_`%w(ETzP(Lv1Nr@_f`Z?rRPf30&W*$&wuFMmeCo+7`+OIeLqUMy<#
z_Sw2?XIaI>*__PyOo_*}sUyo+DH2#98nav?9|X{ZGhDPYqwN(OZrp;uC1M!cFHEah
zcN<r_M@_i?>XcEXsj7G^z8Lmf(1%5jN$?^tZ&Id-=a1(4rw|V*c`3Y2JxUtwP)^;A
zJ*#!`TO6Y9$l^P0sPL#Fq&{;Mwvmi3UxFEn#DA+z7UxNtWE)%<vbObJ(EL_jCZ~!A
zUe_he8d^<vAIZ9}^&S1TUp^gB<68EgpHB%*U6TkpLVypBTyL;}`LH9~8fp-(4M{zm
zPih{nI^)i&5;S!)FMaz3h9aE1UbteQ*sBOV1}<~Z`SEe#*I0KwQ}v{+nCQ&UI^b>(
zCp)m?Jg+H0%3ZVUc~khjF~MvS6-Cg092qiy#F`I9XqH4sY(9#Q6~YK5h}VRFtVxXd
zKxC0jd7ne*&I5|)*()VU=-xb|nZA4Kiv4S~ZaW;w1{a&F63w!;*sWCGuEzx5)HJ&|
z@nDrj7BVLBlW;sM1Dwa~{PE0X|J_<j%c41>(tCCq)m&T%j{i5`u9y!AwnR(#-ZHq)
zmZD70+wJQ{82GZzWJ4!QO$v?q)%Y4_G%NaU8@vf{O$szLs?5p=Y`Dp7=rkF+W^7yi
z!qROuK2$~pE;eu&vizQOw9eY@xy3_?cI1O<cq#flKkZ=%>L-|ie$l2FK5dEBOq<3@
zuee3>_3wZ;_xj7mqt)!5<u?9mEW$BMBwI2Hb<?uF-k5XGW^lwE4vnXxui=C;e`U^w
zbdcL%Cbi?9!&_xr&l7%YY=wR7<!IQVt&p47nu>daT!|8o&`9IX`?0iyz|8bJqB?m{
zFPt);MxBk&c#NAp4~D6m`I#Wg@6_ND+X;k>yL-QyBhZh)2&)k~WO`)y?52H(nvZdP
zj%eV|bF@r9I>qxt&s{PuHs^q-Rv!(U1uL4JD~1W}T_28kHH`Jx)apJ=#xC<<By=|8
zUU?K*-;JSXHT?BaM@yidHtF7#HGsRW#AC=BdjmC@Ap4}8w`oAmVBs6PwB66V`k}Oo
zG>Q!5W0kpAC`Z}2s&YQj*7?~>j>AZ;hN2W;6>XDT=%=%ADz~?}+ylGU32&Qth@I{1
z{aVD7WN((Rb;<UvFE+iZ;+l47c{`4|DA^(vZKtlry0%A4yNL%Lz5(_QMLM9<*62}O
z;^R~}im)9IVBh59m@AUui|=6jcmPb=jR!vc-$J<UGcG@JxV;yN&i8rsRJ%wbcK6cT
zEoZvAY_GHQYyKKq?^zvNS8nuSMeNz}z<~ee1`dG1@xUXq%o*_h65nQDTu>^F2b?`3
zG%|mAFTj2OZQ;<x)ZRGD9H)ybg(N_}g0LpneZ@T57c6;w*48LmS~N$Ob=TcOWas8e
zw}RhK8y#iv;k-quf){C$&N_p2=as&4q(A5O(E7g?)4wbrGwSk-hdK|`snPp-qq|x!
z7A+=uhQ)%Yje)2h&%G_d>YKaYW1ry&Z$SSm{Gghz&B`ptLeZo%$wnPu2YA7!pXT;m
z9TjsL+L5+5eBW4C(>O7|v^d?|r#j)=x<L`<WK25NmeuIo=s3%BcYlPiYy43~LL*nC
zA6GUki$kvZ{ZLVQ+a(EpbE!t}4xt>@R7dH!B=IGdF#*A@g0!n7x=MMZCYQnd{{PK-
zUA1OzjdORlVgzv+I5(g@U~v3zNv|gC`s+NRe7D2M3vS2%U%iZ0P3)r6f@Aa}WUHPs
zudUUH!L5=lBR@Zx8!CI${em8OqherUq$z-Fvp3Y}e#_jlI`?sSaK`>fynp4}B4Ni`
zYwBLX=9Ebdyp3)8D@*D@)F`wA7qrO1<O>jv*ORr(Ek%H{OWbmehwi+$snEzH8dp0%
zu6BI>zgD==PMCD}LtFauq@<{K*!qH3KeuK>jC~uohd7Tk_h%D;9#rke7F1Vyz}|LA
z=!kF6-d_AC1tpD$pHDbKT!Vl3JvT2LShC7xZoq`Z$tI@5k6xdvjx(wYRW}-!jy}k>
zBk3LcSx3m$dE1!4i~Aw?srjpgM^pB7E)}obpw+bZce%o^;wJh_gT&5&#?(M4@qOU8
z)p5&I%pw<@eSaC1Isxxs1fN>-8L9p3C9D=aYcL|qu~^fS)-b7R-TADP2=M2OuJ7J7
zChfPAN4<KS+{jl33o%L3b9ggm<R~qrI3~O*kXm8#aV?<@^g|L`II!eszZsDOcAGW(
z_Au`kegeDhXKPcmg??no{S36n)rR`~9ww+p`QSQwe-HBJ(VG1a!UO;5(PhF<s+aMA
zkO^uZ*U<+~^sp2hZ&zVwG9z)IoEi^CBw*l75C%2MyI}yz=I7N&=-$iCgLwUh;inOl
zFMbf$W&pMs`_^N?wxCh&MWjgNhsyn`d4*tmRE9lc`n2thqP>ubs&h4-z1@c_jCcT?
zywl^zh-byfdV%i~Za7yBNP|vOp*FNY1J<wrwV$_(`BABM=-RTN!79*U9o-dip#+?D
zSJCddfg2p&<ypC~DGyKlqXyFZwU8wAbqjFXE8ix?tbTCDh@>&cz8UbQ3(H|HbR@U-
zefLdp)Z&<+xC+d2Qu%4HYMoa3e+)|>tl)U=AOe+md%mUGa3Skl^$&05MA4)C2UU=t
zxYtyvGO9T^6+p3@kke!!`0ky%_ZCRGlmkBqZOuf3BXvqx1K6SM%4+=sa$aUG{V#@_
zCrjLE$6d}Bt>P?lFz=qJv@9g-Y*V|Z(%&XzLluJYN#5x&Kd~1_cz~^{8__X_lh{>-
zm#N+U|JZx)fTp@_Uo?n<iV_PTNQnvpN>w_91XKu!6dNE_q<4`PAP^J*sSy-Vx>OYc
zBE5GIk={E20V#om5&|i{<+t~{XYX%+yPW&Z+xOjf&tHUOuDQmVbIiHM9OL&JqbIna
zZU&cfVHN)qG#=t8Y)-k}v8mmy2a&P?c82cBW47!c{;VmWCcX=UOj0;)bGmBN_MGu`
z3SG(?0}m^vny(;%(28#_RPFsS+or^NYEbZUOIheH)uxjVecF~>KS-pP>|m$={6zoV
z-@j-4MqbdzfD@t>9JFB!U{D9ow9IWZ?~dD!Yv;dA(Qxz<<d#!=6aO0aZC2XKidmT+
zG5?7%^6`~cqyfG-mmGADma^k0ad^@{wAH}3`zh0_$I%xy3%ZM}%J9`?0+&^lrBom~
zE>oQHZR$Rpcr#N95Mu4frviIzk(ByRP(P>RVAA?NdOxod{hJdvEeK5#wEN}61^HtC
z5tZOZ|H&T4WQ*v_M(+x)gd*x=WiMT5<Y&73?C|wx$3YLSUg-u`Fh2{ZOMNYxR5{hi
z3%nN#L*cW4x1oeNvJ|lFyc^BgWiEM|nztmHQw7Y}M%E{*9lR!9RFrxt+dNY-=0%m=
zZ2+aY&?2Z0@{t2Lonkb0B_XkPGC*V)pP5Qh_cnY~7ss#Tdq@w&TcpCbx+ps<&jm$?
zZc#s+)i-EE3^2=o!DF@(_XOb>vrB|RyfrbiX&82fsM{isbt=)qDrxldu?6X1);=dC
z%(#i#1$SVM2EQGBEH5)Ui3%X3wdj+@3hj*w!}A{!nP0ewVz(&*8g8EKC3NS4?9;qo
z-}iOI-J-74;1>g7;ZrMz_XqOk>x0mxq+)PT-DNUn4ibV=b+yeT_b;groC{YHdOMaI
z&f+XC5N)35Y0HpO@8$jMB7YpICj`zTQR6z#0@Qa}r5_|h0(U6J?1S+$543#JE6Q!h
zsxILdVpv4{zY9oC4X`n+vfVZbjiz5l+TySub?0~;h8we;%Se6^Qzn)2_d?j2G!2tc
z@3VH&S&dt6ag%Zg=gpHk2(~Y3zM)xHr%n_ER73cxb+dh2eGpBxV54CwfLc={%0wo@
zvh!7shrG?0WZx>mUlsILmDIQ1T8Kq>AJw?MfJlC4xzd@E2~E~x4hIg6LR#}Zl+#cE
zp`1NF<$K6yr?7`|7r6`frUPz7R&j6871ESzy$f=w*7xwE_TWih3%^&r$F_x@F|64E
z^)<a^L6g3yOfKcXj@i`1%th(EU4e3ZX~J!~53eHszuV~drk%>~pCfznfmdYOE^-=q
zjy2$JstR=0O@+(spIdhN=D|Sms1m45mI=!Y-clw@f;1GUN{a1!#)D{&H$VUEWP?23
zC~)10p<;Va{i+|st48Pvuq8mnV66PaG(WJw*|El?dyt@_90mH{qY+XF-}B3_GL~oZ
z%db)b_*D#g*D{7|Ube;S3Sw;l$I6qB!R4nEorOyMMwOC4F3Pamk*N(<1l@y-qa%Uz
zsYKYTIf*e$i;=+#8MQtSZv5RFC4(}06&&%K4kRtnQK7(?B6$~0^P8akl)7c0eq3HB
z{CB&YZ~W|+!wLs&;lY2`7JWbVWojh)F_R`_N#-Id?kDKbk<ixhkRZaVn*_v*TtfYe
z9Wc*Cvpt&UTg+RI{&kJvhw^qcE!tljA{iUg!TFljcVp4b+qnf^kvr}3g<*6N6uOR#
zrCeGb|8`)$Y5K|l)imSB^!YyA+l(~rI_;ZNLQkXJi$cF;Ej`gtZuJFFfD~&aV)l7_
zqIP*i&DFX~7}EjKUs%-&n00pYX8Y2&zDy?5PnwHNP}VHI*9_aiR}9xJlJ}221WH~K
z3nhV@$+Z-QD5jPY%nb-V5i)S4HLr4SgVvHC){{xo1`%#BBiLp{Cn_(l8<9e9+Jy;J
z5OP~tTsg?mX!g}abPKOpo3oD9X7gUaO=`^d7q8C6oJ|&%=M`pL=Iu(&<&|IEq2DW5
zV~ZI9a3tT%OLs9cg$Gb7WOl!dQ3s=`mO7z|PzqS9;v_7+%R&)x`qZ_x>94KHc$G+<
z$Cu4d2lO!-5+r~v<tTZ#$tW!ds-2F!wq;z=pYLh--48F>c$qswsYFlCMg7+6JDBnd
zfv^Oho5WOKtL>h59}L#R_n4bz&Y+y#C_1RCgkj9PnMve@UEH{N`|<8V4m?II)%D`n
zb)O5z7f$FHyPsh4SqXs9M~s$QwEUS{65&l)+uLO40tZIjVj^pjKl{R<%GbV#hzJ2b
zE4_s$I&)vHnyP{P#njF%YpER48-5h)e3>MDm%rxf<&2kdrz%*;L;Kg>RYmRsF9A7%
z?pkL7GM99oY>%sq)jdMhBSdBSMu+-K^4X-kZ(I9XfH3^d6*cnm{-)~XD>DI&(`SGv
zHT}S0lv{>2ZsXLnV^vP3r^S_nZMN01w<5iiDhe}DrThnN%b~p|SlxPn+QxfI^iyDV
z6gw@qPJ$f2gi8ynLEzx0ChR(GKOi0Yl|CGAt*v?5+rp!8&B~(spolnlX8tw7wxOQ;
z2bO?-Fhqs*&P<~;hoRPm_M}Y&)JR&cY}cCKo7w(I-MO3JT`Yql&r+P<sjpQ21g$=~
zbjzVDwo+LNzpMfQ8<XW)Ibba0z1bAYS^QGu+75)r3>k)X4M$k#EqlwI_@b6I2b{%j
zs+nK8?$mrCY9f%mo^q9J-m(vA(+VXFg|;|@XZL^f3+%&N97Eh$-z~m-ZkUz7_3`o%
z##3Hfjq>ayaD2^UO<q<V`&$ma;UJ?;%&CBHa8}Q$fTfmrGoEt7NW@~y66_*j(F3pY
zttCEI)8evq$HMcZAaADHM=#$is|S=YEY!0ACWA5gl`%mmV*Pv_ku`$Hn>1eICVKte
zr<k{=a4{-|OZw{3H*S}&%a0ddF8na^iu_Cl1H36YZ$Fg?LaEi$unFVhz5bzqyK;BP
zw-tosB?UR>1q@Bgq}sb?#zGi>=oOB+E|uT;JniT}iI>xB2RT1Fa`)>?pMp9FgMHag
zkky1IHtq97ha(@&Yi|=%?S}g1UEe(#^Le%Ha6{WM^G%OBz)@Z#)tt6(iXBPUV^&}e
zEa^lStyK+U#+<WCF#*-68UFQ2V=2_eRvGVamo@(LChZ6{ko!v{%dK(>Unka%7C)|K
zxVu{3wF$$w7Rz=21Swfeh}S%fJa*z=Xyf7WUn~~0b~)vWajl(LN`8D_{D?%-u5-29
z6I-S4do!$Ko}7)GY=~K9*`75E7tf60_)Vj>r~-4PtFYOLV)zCp^AiE5D&jNAE1UIA
zn6R`{B@hm(Z)j6JST|~|WQNCls>{tMpBD9kBJ;^uOU-Tum43-T*CBw2AX<5xpg!t{
zoe$`RFhOmz9Zsv_Of4I#hTnH8;SaOpwR}FZ%AGrwwe<`v&9Hq7#xUR<@P%X52yRs#
z+poNj-$DV?=k(k3d36?Y@-rU!7v=#7(>9wSKh8A7{{*o~1aNfN6QZBQ7GP<gu^$nS
z*4ZUedpeFjQi>}J^FDNLt{QS0<x7;!B%U@Sgf{CQ94EIeHJ1(Vo~<Q)$o~nlnP?aI
zVI4Al>ip|Y5w)`G4Qj_K1Kd>j-n!0;T|FY2X`JHdSy~fc#5^ML=Jn$-(PO{ndFGVt
z10*n{Ml<;L0xzJ0Q!&qgiS0(03PGYVpFqn8dPosu<uEc<D$&V(q3Ka#Ty;jZIF
zZngG0&>8!=8xl)fNn!b!t)7RPrYFulZ^oovn^gB&XbD$2x(CssiW0r*=P}Hx`Q%Ul
zVj{ap95?nCG%i$}Qwlhy{`lmLCvpeVZAPC|M;s77P#%Ojlih_wvrZzgjz;f8H4QWC
zob`8L13Y2zahX!+eCKsli%PlUS<ak8K@EzY?}`ts8j0Dvkc#pX7q&PIB$5}=(=gsT
zug>?32Ok>rD53$#)p4rA5KI~wyQDqn84<-g(i|m2m8w+S-qjBmd(PqXF+MxxS={w_
za7DBTq-7dL4$GqtK#Hg1<{<~$XK|kBk3A>*J#p_RCM8HF+35*Rx9K}9=OLG_>D-$=
z3)=UBfR@Xi^|I93`NvM-Ew8S6w+mA~n*$s+rIdB{32}K~Xs-j>I<fXn-1D;&KC3j!
z*tG7^{asrx8>P{Whrg6Hgp-pvGtp_1)?8nVCFNR)kIzjQ^(|~}_v_Eyr}vhBMXWc2
z4ruH}t2rTokbL-BogmT=V8}xm&pca)YB@gH7sE$?lHp95y)GI*@Q7`5@t(pL@#jno
zVMd_sYI6qr$6^Tyo>(oCqiso8eR*LPx4`rf^bF+6<`Qd5)w;>aa4j^$a{OG6^>8UY
z!uevp-%jV^9>i;I?9B!`<r6-^OtP5~!Z}JcCEiOtPD^ru@gnV(>mb9Z>mNjxjY1<X
z3eFjh6H#w-?T%b}ZGZiQ#(4u55dY{pz~+aut<(t;OADXTd5y@0Is}`HahGfdnLgKi
z6KUj@*>w9Cr}v`QHsYX7p{`hvp9$ZA0Wo^}XJA=bm&FeMk7d=h1?-A{T2=@Dc@ys8
zV%o=HuA1_QZ)#|9)ew`&S<LM_z5g3;p6x;OiPWQnfCP%{%3X4*zZ+G?!Y@<xJ$~5L
z25#YO*4zES=<Z^Dd_OMDomEo(kiVQ6BgJ!N-yjs&7T_lx64z5`#Tlb3i5G`RNy+{n
zr)2yD<HhS+!aGN5LH?TE4GL=tt7f_s;cmd?AHX)@a$5!yB0a$!%&a6BGIc)g2z|3k
zF=q4wQ?2toagBLp$A+2*@6G7nq#UOO%AzOq$&TKh7isHnc6*Jc?9bf2VxKtJz06j0
z6Tc`ecHwHZk|1qP2DFPyKDLux2q!-vCfy)6XTm<DkFUNoYU3C6bY@*|KR@zV*d|K<
z_S4}D^D!$T^;``-Oo1FVN~D)$pE)pu95Fl99h%EcaRnn6NA!mv-O=z1ePRW-vzkAN
z%iRoeScqO{3*SBK(rd>!*E7CEx=(ZsuRCW~CrY+-<ypn9B;E_S+GiCn-@n4G^=2`I
zb}K)<gHu<G82hE+;Y-IKkSKG;?TyGpoj6<3UVHEOv(riWG9zQBY+oBqie~VCk@wHm
z?72lW%lw}16bz!ct<nXbo{gULY`0KhU-PI|Ry@g@oh2Ky<kg96aZEEgN}L%>!)?5%
z<11{vKeGK3wDuE(FNCb@52uA?zUnD{n|VcZlcNof-7MquK2ZBP=A~pKS2`H*eAsWW
zAOT!rXcL$X32C;Uds`z`VCry&6y<mP9Yt!ihw-32cf#ZE!j0VF@s|TXZ2?y874lQ!
zSD<?FnP^uiuRBg83V|yHn5a}GTqmX4rmiO5O`s6Ea8a`PE%dcYKOf^2VKDsBo?{Or
zCVO0d9&!`3XP`^=nV&g8wI(agY3Uv=E6SZ9A4i?fPcrVbpv64yAJ<&c5Qu?}j&!(D
zYKkTRM%DKxC`Mvi<{Yo$t_oSrjZi+<H*Aq`mslPwQ-OubcsiA<<|M2=TS)bc=UF`S
zTz2t%^3!nIq0JFYLN5O}M$BhkIogh)a~CmCD)z_35$4y(2)a%t4xpQ)UIe^Qc-C68
zn#>mSO-~3Iy>^QzK~J#rT-j!ri$QnvleYlJJ}Y|JtK96VnT3yd4v^;q)1BgI@1grt
zS~k+utb{AY)iX^@@&bA)EzK|TvxOw4B6uxeYR<Km7Lz$oQR?m?l~hn%xq>3lJIZMP
z*h%z<^y4cc%=lCxUJw6s=$7g(g`w=Z-N?qDAikLu1iX$i23fJA*oL*6LNp*m@hu%s
zdt~jrB)YwPKc;7+j)d4x1Wjjr3K*Mn8$t9lQ?+UaQjE+bkl!FgJUz>uvQZmD&%u--
zlRf*k0ZZW2hV+ttLd3KbGu03Mfrn1nu=+0%VQ-*m-=MAjc>q7TkS~m|t4$OOjezQU
z=h>CVI=(cf#bt#HKTi;zkwQ??RU$lsET7dZek3DEh@l!Ge?ne!|JU+qNFNHEPxdny
z(pSkYD7>Z9CL{kS>uvO76_EF0-D`?LSm3!I`zm*H_VRP}6I>_!8W3akgG&lY8@6eJ
zm{Md??6@qZJK5TKyd=xNWLBD)#!b_4e_KaFFJPn@Hd_I<LsV7y+O9L<=u(sK{kST_
z)^vDuchSjGXGKZ=E&bl73UAZ(st=Ew#F~IOstr*miOjPSt-9QfW`_t)0WiSgzc8?9
zjNCz%tkx>7Oiwv~YP%iAAH%OcS$VlJq>0sWii6U6xk6DVHjkFFbi&iejkHeqHBklW
zUAyG=SJ7dlp3}4IecO?|l3ylD?REF<8|>io6Ep)oTM8AJ2JqX`!ZFJhb1szgEK6ae
zm!d&97F#3nanxa8?Bn(0**k;8JrQTFW=1ySLCwH1l}R3hVtS9FYcy417EV^Dp9`)V
z^3Vz``ZN-M&lR5}b58YfvzO(Yf}zJ#4_J7XWXAl9Td1}g{UCbR)t8?3Zlr0Tq<!(R
zCPkkazepeXS)mD(&c_D3{hLzuQRmI^J}}44E8lsH8NPpKSn{V&>-d6D)!2Y8`XzD-
zxXtwDh+gs3+-pKx5w7rr$+>8e&LkG(Xs76<_z>9`*)jw}NBe;)W31fVZmVuo+Tk6%
z9}W*3sTFWq)yq4=3&XQD@edY1r3%EbUwNTdcGd0tsRMeKPx%)%?bnBuf{!oDg-jrZ
zP&8KV;`cJ!pCXb5&OXbLa*(~&OgHk!CHwF?-o;M~)0KrZ;Y6`59h%wQKWp{JgXjXv
z9%KbTnJUq+0T5*7oOh#N6nA5cv38+&Uaq;Zf0ZiF$u|<`<5D_6hgi6a+P~?j@H{nX
z#RSv&QD)?xS&38R>c)6FLobzu)cL)bJ`ld|f0J$qeLZ^qC&=J{h{i;kW30hKqYU2M
zi3r1YW&WUqBwrLA&(DhJ!%W()jcuv|Xvwk3!<^5(jxU9U2ky&aFl#f?Aw$;u_!^r7
zu&!_Ch)e$K%<G7U;8!J(Kwx1~842dIDg)Fj&V}T>hJf?+gD%_bL+8t+8Xu2*gjSBm
zF`rg%b(@SX1-DB)<&z|bVaYXSR7fuh0Ps|YnKwgDN;^MNzTkJr?%70AM!sOIhWw;~
zZv_Zcy3;;Uqo?33r1@+AF!n0E1aW451KyY0PhOJpU&HLcUX-T_Vba#3F6xr(;=+cq
ze}XW*i1lABULy8?Z6LXcpoB(Wop<SECDj2LXSLEO{q80f$6B@hsar80YjUUFa8Mm>
ze%pjP`lbK0&8!b)%l~<!diihLhW-n7!hbpVY>-bYZkaE@WPyRyD7lIyHD<&&zgDxx
z((a5OKTR8%hR1wFuD8#M?X(4FEcX<&dg3T~hcj#KzxzM%tUk^ADGa9;dx4_z`t$l_
z)hL7j<EpsVERWk$x_@3v9hV&eWil*4&Yt5O(JOZhyW6CHno6)IY2eSAX86yPm{??O
z1#GroP|ptVpPqPC!bje2ci5~SgUJB&TO+dK<EzuGeWNt(*9vgVPCEGBVb<Xl#P!4{
zf$1PgX3$Zie=YsSpCjOZ9NE^K*9=LbXriR*-{7CEUL!JLPNM@BHKixwBGDz5MHLT~
zUX0f|1VZ&sp>ZFezDf`#q&om%4p>c_Lo03z+SmvbS+2XA`JJJrNMPV=w+Ku01V_&w
zY@kZ0`@mB=n-#G~hnalmwhGIuc4c`8PF<I0dlnsh@qfO7M!Eze-@C&CJjcMf0>sH^
z;om#aL7fCFu8z=+ai^twx+->a!KV9s%<aED{0;gr<uz+5kG++uu-pK+2)`1rBNH`O
zvP>cXt4mcT`Ko_)$7Vp^Yvym81X-7Ed<AE>$eqA5LeajD;Z2c$zUZbf!xQ9EE+bXA
zV?H$*OvuXkf6zjtsovQSd}77CxGm2b$BijMeq1%`J>M^W{rP4gYmwDU+4WKXP@!~E
zp-16CTIKc;>H~_5V<K-EmLtgRgTp@WYoq*r&i;<?L;1Zr-&pVKmCYfvvq%7&Y63V{
zrQO_|`NhSxot>eDCH}_6&6xoXovA|$X`j*5#Gb}_%4um=+WC+`<^OC(8dj+}?`ATL
zAL&g|F0|<{<@S5CQPQP9$S9lU&3EhWIjAh`;TNQ}_uZG;kL6}c#5S5zB;aSHxzkv)
z#EUfNOfO7rNIQlIT$UfJGk)Cu@mr?7{Pg@HY_u<ADsn4kWTYNvCPe5_D<@ay%eBH!
zv?M-<*u91FTT0F_r9CRJ)XLJWauJub+ERQmE`89V9Q35_47n2b%K?l&A!BTM%Xm~M
zsN|f`AU>z)l&FDkr{;wtaV*yk7QMRsEO}Peaq!F>I7IFtIrJ9f$BeT=btF1tH%Wm~
zNrxUPU~K<pKnfuqIR?A}o*Sq?RQ;hgocBe!{^*^|bP&S;?|a5N^A~2EHjPA%H}Jl?
zNp$}_<67<FN`MIvZ0Gj(c^7aVX_3U-Vg&fi)hPh<UMmv(c{l09o;%gbd1tYLH!}yc
zf9z4zpZ)|YEs%q8Ax@B!_CYjmd^=5C64p8GlDp7Dj42;UJTrGDgzFPZEeE{qS`OK+
zXrM^=RHY@r+tg>w9G{LWJ_M(?3IF-ef6FzJ2*+6<ZC$C6zBb1TCr15?;Pa!NNkM6%
z4|$gYkp~SJOFSANs*$&&rpnWBcY9|~Fn{8t$)~d}H3vxcEfTupps5#%2Klu4#O@dJ
z_D}XWm%4jc${#r?bnz3rW}BmC&HaoE-&aT={}(IfGX3CfJ-4Or$vzJNTOrMZPwGDP
zh=&A}9Y@KorT1@@ta?+tUwuysd~~<ohG}Zbi_|UR9B}s~NWM*Fi7)LY=$k9M-HS@a
z+Ca!7#e^=8AQ~Hw&z=yUK6)W{iaD}(`f}gNC!T&FSc|7b;xhBTu>97h4is?Vb?R1u
zxv(;OIx3)fF2-bEe_`|2-$ATbUMjTb_6JIFw;Ej#*C~9+JQ|(7WqR%>NR62<K(jqA
zZO?vJbzWL7yyS*Sidh4=h*xW|N|heY3Z=fnh`=`o)`Qv{wf{oEGYbkJ#z-qxx8?2h
zV{JtJ7ZI=WMj1%_A5bG2@IKY{v{<(VTfmm<MRpyZ!=5}NWblb`4RL3$;3YaE2m=?G
z|Jp+>p`;TN2mV67;J*-?@yb6Q&+Ev%8)_XxYaq(W!8*q*$dB~-o(}m<fF2_T`vJ8n
z8Tk(Z-~EN$|7!+TX7(qjT%eAfh+dn?|Do6(Cgy<^(y#8F-rQGsSzDJPSY*pn?oioz
zLBvY_kaCwlNByNQARfze?ik%{Bp5#mh1D0I2|LsJ%mR1)!w*{nis1DKUA&)!e1XE$
z!bY0{5H`6hH}w+)i(9Dw>)pFnD<yeJeq$v2@5UYf6F4{qtl52vP+L8r)0|m{$+&S`
z3G(4{s1T7}x`OTf!*_>#;S0#78{Y*W1aTJuoWN`KlV^|vK=!GQ7T{>!stp0cI6Fq}
z{_}sF>yNMEds_8IBfSzpHwy2x3(#f)z&kzlJbKLOWR4$ZWTEB}7C%~6vHkES%ZyM5
zMg7s;MW=?b>VjWb#|4Xjq0Jv7_GjVcW`)^EG|7_4@u6UOW<pUc`a_1PxqW5(ICj9`
zv;Cu0sz!dT-$=bd5Pg#GTw0TPaX@K-vAt%YseM&<us>_U1y`-|2*YAGM&W}mzaMI+
z++)~bd?+FFYknY~dm6t1aQL^sQOgU7L32mQp0i3**Ffb)3*Dk6rt-&>N$<{Cmp?Tq
z6qVy+9W=0PdIgY=S*T~zSOd7oh$gqMwiwlZG3nCRk*+7M#W1bTSc5#a{|+nt@3GZo
zsvukAHN}5U|0D+>Q*Hi~{^>*cb-JHH{GZZyz?lHg)2-j~cYqe?!br!vn7Qfff<^(o
z@fl>mfKFIeXcr76H4S?#z<gAk4c{<TAGDd+!m(esL8g6nCz%H8w_OY9n<6eQ*_x<D
zwYb++qr*8a*U37*%Ee19_g!<XnMLh9u!iY3?_T3zhPN)c?P|M2{0h6Jk|n?N6dVWH
zV(SX4>2RlHT7t#>&j72PM!&eAI44nqP$FKNSfSL<lRA3Fyx5I}gz%7=IkUS37bq?6
z_3ZB&X9JrR#+&(J6`4Ve3?c5NA?p4BN@CSD4={i*6GCw7(fdxxX(ecMn2%0H#Ky^P
zs$SV&$lcLFLF|$fUa!8WBl2vt+C4^|lkE2T1PCQOOHi>$##_XsndmY3{(TwP#QwhM
zMm&6nk4dHEUzJ)7mxrF<v{f>+vK?V#Nc{&Qu>1qj^+TDTF&<lTJQle>7Y#nh#MM2_
z#kJG=Zymln-2NuV>z#|XVC@SJ$18xvtso+CD%Ns!(-P{x-}fJ*c00O)OTZ!^IhHwn
zDi-lXYB~@)bF;7Rf<u*O_Ies|qD8Q6UKjCHY@3(&p2pXsRe45IF9^usFqGwPNPb~3
zUD%ecRUc;ZOC$QE0rjHMMJshReoa$Bb#uA73t>G0J~46sfpf-dGwA@sG+}Vk!cP!`
zOGP_ZtKIk1Bci~-KLG|FG<&8A2%Jg%1YMkIDG2EN^zldHdAJ>bsN^uo%`!(ud-npy
z<GZvyar(tyx@>Tx@5nEu_`IO}OUdf0sjnI@0-6;KXqLL>Y%HK}?SQ_evoZi|pX=~V
zQ|is7wd$h<VHu}Mm<^)!S6{W=7fb#uICMDIPf+Z}oWU<8^w2)>OA8~!@Am)F>ec!V
z=qW=$AHD<nVD;a^{%1z^37QQCK^cxfA#RSgM1xkCAmC*=v+tFh$Px3|-{>3n>b36p
z_cz3pvoy=y_OjG2K2;#q{Kp&`riy<eGJVLJ8bj*L?GLnidnra5sSV{l59cl$D7eF@
z#22#{tbt$j@nDb(lIFcW+aBeC*P43`p2J=vsSP;M`IWcJk2q|7&bUE}KCnDzJN`6%
zGP0F@)Y^S}j@H+}<F#zE5mm!)F<;2M8o)dF2vIcTv{3xk7%KYWF0AErBTuATcpJc<
z@T=!Wv5?*8#8lsI?4XY?R|PlA-4!S+`qDyKhyr=P{;%5Wb4^STt~{T$F0}JF*1nBq
zJH`|1FHaNcLR%xvMo16Is>eQd2n4Y9^~Hr8zVP*gDjUP~IsZ52-2S<1+_&OS<WAHO
zP*btDZUmSU_N=$6$u;Y+g(ovuQ*y_gvy?hHN<hlcOnKhkYg1+li*4<*IyLDWnRWY<
zX_khn0LRsnRGue7?TI$J`?8<W>&*T?L2Y)785xy5WL@c=J<>3c{`j}=fWy|-E#T1V
zZ2J>*Fkyyj6R4;tlKeN{Vt#iSW9GlTD`mG1{)Jt@+|%<X&;9;6><!uzEz`)PVTK>>
z?`Do?GBeY3VKwe5XJI^5XX^O+*Ex168rqI*&vyUpYZ!Z4>}^^xFONMtUjlw%<gw3@
z!Y^C&UTQ+UZG(Q2GFr2FFJS9op!3uS=e&I03?bD%Z3_>a0%l_=0LZ_W8KLH$_Pa0p
z0BZ8$TfkHt@1Y-&s3&)UJUk2(e*Y!pcUS!{=*qJPx!SyitZuFf33mNBX(x9!KJCL8
z;v?cxG>hCT?JBiY+*8gl7MC8|d&(c`e<$y-9FVCI40Mp#9hPVAn8+ICo#pV!;cOwv
zwB^Ku30RUxY?qWqIM+#IufA?VXu325iBT9XhX%MJ-oNLH6aq=};w_VjBZNY+KP3hV
zI1Vs>ezV@B8CFXE!BS*T!LG7oEE|k$9NRr&&+@Hzyrp0J-2JK269*ndOM+Ik?FN9H
zyHq7lAh&rZHekl=C}C26z>yD3j>$5f?2iA`^ByU?IjtawT8nxxc%X&A=H6JCUh`ls
zLYE0?U<jlwSUe8DSsYJa>b%yopdLJ7H!pP5ajDY~jq{_AR^=x|ERl6_Jd(}U4l~l;
zXBP&|EGJXDzN_d<f-i5Ll2msbMB5)?CV7%GLg~S#cQ=yfX{T-{f>x>AbDeM=B;q<z
zFwutIH7x49!{Q24j`g{F&SO%ENAe5aqz+5$X%67-LmA|+&baoLppVZWawliZ$25v2
zH5M<bmp8y1&ZNrkiU&NpHygeO@NE?f8_dVbam=l}r`rRBhEO-s2pBaV3(tHl*_01s
z(=sK;t=Tgnf?`Eqq2*E6pYqLV3Vy1$E2|^6H>Qo>mq~Fqt_)Qr$7El(IJ%N>vi+F0
zV0@0)JJR06Z|m<>kCGoV1`aapUmiH|?dlO;^&{=NBdKv0mh5^=3|h^3p+a*#$Ei0%
zlp-X;%oNGiId-z7;g@7^>i{BuIpJy6l$+<7G@cRH3&EH8l`Jif=L*491HREEONhA*
zfZ@HOZ+2-AYI$^`w0PC1G#$XeXk^(<*<v7~@9$ck9{d1Z0u-TCe~u`ouX!=XG6cq_
z{_ubYbSc+-LEk`0-Irn@H(wZ1cbc4%iMB%4H~X50ZTCp)lqHsCn6$-$R6Grs9UsQT
z1a<64Gc3%Ufcl(i>-4{l+X!@3|KN%`+t~sWDtb7qsGW2`D9RQhnxOMG>h_T<3DcBo
z06aB!bYj^{c33|~l%tH8f8|E)NjA_&<-h|qLiY)S^`A*ev`U=jdN$ei;dMtCM^(%^
ze?Q*0cHwojR!b4cOzX)lYdPshvfSp=>2ZP~3jCh2a(Z9CJ~_<)4D1Wqln9$7j{?jh
zI34;BE=2YJt|<%c*|&iQO?IuC)Gv`oZWER02upxd@)JZRgaU?j`^<hv{V;j}zK))7
zSoi_Thz|$d{6%v6j}Ia1?oT7PyVW)1+ug$VHkxKiLc^)f%57L2kS)R9w>|{F<2@i$
zR6ZhwZ3w#(oa3h}EX=h>;*xs^aKf-p8`?DXyiX}?Oe80>D!OY`!#&o&>?w5nrYb7{
zXG4STy5q$9sE4#H*j3|cdq6j(Q{Xx=&SIrU1S-NM#1}Q9N-HBT#XHIFuu5LTB|mX8
z3Fu9gBXob5?M&k&^K0~#n6t0wSDA1OSB)jzD~iR1x7Y?}L>)O^?$xfuunt6?TnST6
z(@1Zv3ga34#rWY%hc;e)r)3I*-xf34tXI3Wxf#i9-Gi4DM(&76a6wqeik&U-FqZp{
zv`4%5C?^IVafG?m9ggRi;6KokXQ9X0rs*VJ7DLfQMsLnHoE^@dec@4ffIPj)3%^xM
zA3U{kQWNSbaH<QW${9sEaYX{O=?-LLr?JnTpauX0;INCtp`MTcUc;H?0Ps+ubmv>~
z(wt|`s_9T{{v)PNJeShf0#g+stRIjW#yS^ugJ6T1^S|`AWa>`0(igd7S3^O;7Ne1~
zlwPI<wycLSRDc%wA2ao9?iK^ikC6lBo2rV4)={lX!SDJ@5km{5$%u=kZwFjjx`W_S
zoMXM<2Xg4c?Ybr-Ei7Fk2toliZ?%pOcta;$@89kMcF^0O9z6HMztXANI&+Xpf5Z{$
zN|TwQLP7rb+C|c~`fJMn;2i1^33gpJGyWSgi5=sR-9Jf6jH5%SB7Fg8r^uh3d3qMf
zNhWFLgU4HvMkjB+h$_d#fubHOSq>&n5YvM3C|8`RWAk{B<Upw(*ES_C5^u@V_hy2*
z{HyU6wyS<gI*EDU2LOX(#z96$8eyjgq0-F{i2CL4kPbI0`)Z`}9mPCz40QsZzkS5-
ze)aq7W8dEUsF@rcCo-=!&-Sn(jpt;8P%@_-=do%s&iWdKM>T=0J!bb_R^0ydt~@Mo
zId17(GUfE;@`DEyw=*9t$@dHgLiz8}Vz;%rbOsEHh=tKm_}q<wx&DpPDUk;!r&fB~
zjK5NhrJX)2GCvkL%EQ#)#9)?0Xvbkf15T2Kv<c-QPU8a_pVbdvjLVK!_Yz-UZ;7`7
zA~L(mq3lC!%<y>C#@yesZ5~p-t6es<=8|84dw?&nw95%sjq|X+{MG-3K!D8BOfl0t
zSRO^1da<@DF1NCh5dBK}b7iMeX|h__u0Y~;*)mX?Md)`#AG-qyt$@oQyyJx;68`YH
zHG2NLjb`AM?yl4>9AD}#1WM*p@}@a#v`HLW9xrdnsZ7$V)hg}B$KjpD3!KZ&CC=ta
z-B^LR%>*k<ETru%_W&-U!+c3Vf_lXfI@lFW-T`?$*rmbuw{2$tP|WIx@?ZWg0sIaj
zJYBBe+QOn|H+O+^z8>@SH1xnvP@y_tO6t;U-Dm##_uId}9^|3$|8oDATQOfo&spIW
z+6Alv5xiiYk@jgAEv?3L{zGnQT0<&d5wW6vOAIFbV?T5>#d0emztHv65x(1yFSo-E
zZgR8lFjLR@+PArs=Co^%<Coj%>Y4Nt%97u?Q>-E_Q=a_ksiVocxWX3BIziOkD#EyN
zqvgE+f${#C7Jj|Wr2%m~cIgI8L%W~D-heeymJ+^6uD7W_HEB;jY6{00gw%+9_+d1h
z^ueKOuqw+tVgJoDHL1fnqlPZ$vuL;uPR<Q;ZCyv4qbuhF@*`6N!?OzuSqTKd)$O<<
zYQ*`L{rjZAWy$U!E?%D;WcNrD8cbDkFCd5N4}0MGrZPhO5{7n|@7rJ0n7_kuC%`sF
zT~Y;p1pq_Nq8JG?=H^FGW~qK}WbV!j#%|oIzQZqPFPOsSuvWj>W%anF&?(V)BPf9X
z6%Ny2dPskC#q8LSv{tR2^lZjOY+kbzKWmnPe8FlHkg%-O(atH8DE=XD<?*QT<mBrS
ziFe))Zr4<1VY>F9O|hTMhH6kOIfc~PRl_gy6fZJEblT=fF5pX*q6!Ve1e=B0v_%^0
z&i3y(8uy%C<rq$k8_p6#7iT|uI)3kPn?~C<>U_ufAj<{DA3)L$hESYAGyE9xCRbPg
zvh3{%<u9BMc<;G5*}ij1bDGQS2g=&msv7s@G`=^)?sQI1iPaS$GkhkWZNe_@y$ye+
zxuULZhi+odiPfwc4pVk7FiWFhRd|nXa{*_GG+(KWee}*MfbDt?)&~Ahar_BlPN=8)
zV#q4nGkbf(^#mV3;MMz{@m)_nz(5(vg#85N{sfs-Zv3vn-}Dfu$U~v0(f$p0Xdrff
zkO)O7ymuU?^I%%#&cf0@%&6sl(~Q>FmR%^5M$&$ucP=uahnL9q^?$ua^=&HCS72w#
z6Z8oVlMa01C7%a#`tY7&Hy0OjYR)>31)q=BJSvqi4zt*O@PlzRc~R28_R4ro_20fz
z-fxvyrv6HLe>;JGB>|^hN7b~)`OCIh&^q3gMPIs0TZrL0tUMg3?DFR{K*zfL^r{E8
z{U}K+Trygt=VA>-Ej`sMWA2ByaMc0H+)T<qJw|)gZIIF#43*hC-FOwqx3Pm))OOd?
zzT6~3<cNb7Pu6MNh`uxx&BkHV#&$s4|N259^s!I7!(Jw?1Wl-fJJ!E|N!3iaWl$B4
z0MiZT2i;K`d!$)<seRX~S&bDzc&3yb=oi}3!z0mL7z(>gv@eq*+9}>2{z&>Vfm(Pb
z^fEmlFy-|I19=$Ei<D9s$%t5s%Cj%4N*Eo#?-gbfWvcgSHZ6&(U4Sp{D}A6lk0M1i
zv~?0D#OHsbRDp{+W5KKD{43^1QS5{S5{k@1WNz^49Y?8E3{n-2=TtiK%aAHYv=2;9
z^qh0*<ZiM4W_$iV$Ta;?AnY?Sv|XQU3rNk5%xz2Msp*q;=GQMD$RFo^;bML}#QtH3
zNH*8)Q(Qug21*H?meEgZuR5!9o2H^V&b*ns$l74t{Wx!B(u0R+o($)tN@bE0aA5p-
zSZ<E-w5A0yNH>1`fgVR-P2#<)Z2U@E>YJCKR+<}TK}7WIEx={X?dgs)KY#?!^I<ZO
ze!BfxWcPgY2$p(Qb=1bCd@cUwQ(r3|Y3S3&XM33^Q!T8&z{5$qOYc80J(e1&MYgZS
z%tf7Uohcp(+CVAu6*0x}oEmLst$iZyg0r)UJ%f1dcFrL3y+vdo*q7wl&7^+(k8=wF
z1lHp&p;S_#=<m;mGAqu192mKb(ybP1o3WwyPV;{X(fzb1;G^{Y4Hxi8XWpFVMu&DR
zZH$PKA62D=?OKs1)@H`JTf9zmkRSpWA=E2I!JnYHtpk>>ggq#2f1&=E-T|Ok!|!uo
zFl>dk_8ea|6QSkRFk5FLLtKwD;-YRESFajAibtO_G0qc!IeZq8ays-@hiT0{$RsVY
zP82x-*x*2}TCy|s=6GG@EFPh%=>0;VEEIowY-UR3;GJ`-T#xfEa~zdp^?m6$4!ekA
zuOgOXN_07wVOQ!1IrW*NJ=PAS-d5=+qWQtki|Ve6pP1#7Dj}%w3(k`)KMz5V%%y<u
z69uzBIMxXditdP+x5TzQ^T{ety>#J5m^%NBP1!@dj6CkKD$vdbMc1%QGLD#JMA!{4
zq^jjN<NPnLH;pfjkFz*m^AQs_8?)96Up@8d&btT3OXoY<!DKO_@!cdKe(!;GokP4>
zt5Ne~$tlN4aQ_Iia=Ex!ZqsHstM;`*{?5`9$rW|S0cSc>ZWs#TJ@=)?6a~N^*nXU_
zuWF5}{iu@o!cf3)DXIbNw7OXQ@Z^_v8NM(I%poctsjka`QXNEDWFw81U8Vi6&UH^S
zNnK#m6pDk{kFjI4bD?Fr(hPW$-PN5gwkPsJZad09{<fmeC+dS-a_Iejmwf0&H%r^O
zq-x%4VuEwaO<L>|_PZjCD!L3U!4JZgA#eQj7u=^63DjwE!uq>+#tv0p0KQ{+Jms}f
zS?2l3v63q1!AD4w6Zc<vEh^v+zGm1_EJQu=k-?RpF#uALrWO^N*QI#pQN;&ca)zvu
zbb<@Sq439RJx|QoxxbfsPYM2LuDR8H^v%~dwasqUvs=GRC9=VDq;8W}T8Y&rd8AFU
z0E<%)7cf6gc9}gQ8eH>)U2Ay`E;^uH{!N7v$8T2_IMD&xmI#HMg`OmOH5u0*B9|kG
z()tUqe(8xXi;n&cF^eB#ExWn7i&9D_B~)n+rSVCQ7LIHjq)!vgoO>2wAhmr4eU)(#
zuQ0<e!(N23sgAK1dXiR53(jfJNwK`0reA+W>a?4;EVRQg)<+4U!zAD8+NZ)beV(wM
zGFQuZr%%T5er<oLh`rM~-sI)y<_l~wtcT?<x45mEx#1vVx;VK&>V3yKWtK+SvjJEe
z?zLDE4dQrw_fm+UJCDwoK$Y+#AQh_Puy8snSf9*3E9TdnD%R@>Ird=MjxJg)H$Ig$
z@z&M0#qyiW6HcGovjviRCxg1&1J2W8fbtqSGinjH3D2MK@^yGjDBx-<pSjr`zQ7!>
zaDM^AuOwi@i~K(F>9z_NjFXTWHCxC!YNyDW<eX(UeDbiElhgdk)>Etv;7!dQqv3yW
zta0>!FO~Y({;hiouz<To0Lm#9-~x%Ecz{@|occU{(jxs>PlUnQZvnEw52F=IT1(1B
zi>t5kGd)1<B2Jc(#?N!JGK+laC{VB+8ba)|N2HbD&$~kC%I$nM2N6p@K{-AXKSA0`
z2I<s0dr<8o5W6oWQN==3za><fH?45P&q?M>F$WOx$nEOkOVf1#b#0Ye{{$t7aI_9j
zW$biNB<H`KyPg6>4<Eyq)0Yt`c@or0!!q$Q{FwsXu~0sX13Vi*jTit=sLgz{Pt@??
zrsBh`_S}Jq>_ZX9_z<`FklXuy9_hz6l=RK_o)kHmI*&`3q!qJ5n+__>(Ayd5iL9@E
z#y<m)hq#DGt1f-M-)V2$7Ehe-{s|(<L$-T8u%}u>!2CI2sIR|rm7d|J#_t}DOBmhU
z@|&RU|KmlShZV3Vtm*5(`t@=rtl&QyfYj3GuJ3L#cL1Mqg0jJ~pDPbEwI<;W)#N@y
zCRq=@k&J{3?fT1qdHL4^dz<=zr>M~!#hP|2I(5=Boz=Y=D5T<pn-c%d!u!Db<Vo+P
zDlWX&F%!-fIDcxL!BMHGc}}d9dp@XnE#5T=8(Hu2@|BbP*5OU}diq=1%)XuC9i-PP
zeww#`iQ-Ml=(Il_(H8oZq9V*nd$V`lAOEJI=daZTWDBUlc2SXh)kLW)PYlqP<axmT
zz1<W1+mrBl<=1a6JQ}T<C^4&q+wk~2KkQyli)N-t^NQRkvIzRJ9P~?nU0!pDO8W3h
z`pg6QWC5g14gLXo;cIJ=XSrSNlYno4fG3vF6do`j-|Z!b&YvK0eX$5^wc6?{l12L)
zl~5(XB|M8BQ0*_#;^2fvzkw2$w@J&YkD8fd2AEToVX0HXj7r@@33QWu<1@xy5hFrt
z%^1n2tlq$K{nL>DA1tVW67g)Cn0dqW`C_7Ed4CCalJd>At$U%K-{NJZ47(W3#Bbkl
z=1f8by)4R0bq`<b{EHzrDQ`WGTOhm66_T~o-$O-cZ{eh7yN40)tWLZwiuU4c-e@PR
z10dPF;?3^>;BKW*g!XFQoNQefl*=hLJH-<#-yy3Pi@u6h+|dQhSjZnDW$rODf*#)K
z`}?W?{|IDpV^m<FY?rUbAc}fuu>Bdf0Cw8p4#E~ow)F5d(Q%`_%VKWDwwz0@h`##r
zE7Q#mK23(+^5Q**CmO)+iw$3Zokz(KHd?T-=?U|qtD5#gDYj@=r3uq9t!L0qzZYL(
zh1%5bhp3+W4&Y%f64i?5?3g$9ighj?RHYZumni6I(wGSb=<2|#{?5#io>|&$yf{j8
zPWMz>uV@FXh9E78s0>YDi)l-Gb28zoi__IpUA=cF6~4<V2+4EGx4-`ZzNXtB$Pq4o
zUv%osM!*9)A{QQfJXq}2(`S<meUFW~pu4lSd^v$fFL}jvSZux*An5AU`~>aPpEbWY
zRQ5?}Pw6p&@tJ%NY;XP@S2|Olw$fV-^V){)X$B$CraJS|MtQdQf@Pe7Sw*+5G81SO
zlSJ!RguDi9>@@H5MR5z24fq{i%#(oEHJ9UEB6jOVe}ec2(wcybmrj?Ql8<rBvPL``
z?O3X3l;4rAzWM#r-n~;up<FZc{!A3;A^;tV*`yHA?_pFoolR3?N5=YB<p&?j^G9xN
zq$-|FbXa&}nF{)$!Y{PAa=4AtL#!R7$K)L<9=s7+QXLFn^iCVKo~zwNrIlDSY{@=!
zni_GD?-o%nu917}3RTKB^)I9#x=X`#OZQ-g6<--E5Sw@#N~15x&jTy!+_>74qe|kz
zyxHN|-d<KYq9!goyR$TKj>3xg<{Rzn>ygR**mrCBrGF^@qB25RSyAu6ZLO#ZWuNUY
zu;!R8zRc9r6ydf>zM6e2h{cyZ%<Udso!6k(cjjHdX8u>fpT48o-!J9n-n>%ITnKy1
zJo*G?Ip{txkPW)XRxuGYl6EU-O#yOcMPYflKk|P@7=tTBIsN_`IAN*vY)*B~%Gu#4
zi=0W*8T%^xmU8`NE}siR@Rc$4xIId$wI}EA?pRqL20K?lNC-q-$Br#080}0aSjbQn
zO_QBA`~)<tG~O`0Vtjc@H{rDp2uNn|9_~&31K1kkDJSTm<<m&7Lh;|9ss2gK?T=O>
zTW=uGQ%p8c+mbj1R38ehaVK_~45N&3^~cBwy5+qdZ=@SPE|Ye>WnW>t(+8^Fp7_jk
z+=Ze0NLa1Nd-JD|<MkzCGK_>>04p-i5#z(LYpY%+b)E09FsAsjq(*~umqatxl~~=H
zxIBo>(%Jyqkr;KO&1V`cDhDUOc8;0j84RY+Y!?u9?m-q*c*Ns?O4g~td9ojJs8B*d
z%7+qdhT4)o=Zv&Yi_%xV>CRb?)gRke$u&SiU_6P7&iuHMVClE(5B=;HKJ|&E`&@7f
ziF$kDR2I((WC1B0wABe28^P(kbyO}*YnEUho0wF$NEGpB8kUJU+^B!xN=)?NuX@SC
z2{$pf&>^tn{#PZhJ&=8BXQ8K~bGma}VJX{}myMwT0Gb0CyJ?QP18XwiP`*?3MFSaj
zqF5IXr3V*EqmujtZhpCT$wfnt0Xmo%+JFv8tS7ln(D{)1yOgVt;!#?zTG^|$7?+ua
z)=JjaNiAoSW63)OE8!p<0%#oZ-pCviTE~8d<mSUMa$T+LR3qQv(Wv6f6#@_4!nH+a
zC7P94^quc&lYMZeN_XuN8U5@tCe+~Z&5nXQDia#Bk+UP-YM{qP|H`1ecS75m2P<0V
z0|0_!H4s}@H?9yn>^8UI@D2V$hKJkKl?>kXsqpy-t*sI=OfM^6p>vXDw!btUu^K=0
zE?8ATURT0yDfM+#eFHq?Y+I6r#e0+jA!;!t_Xgp?gsGiZA$z`4>lnX&WbmDqqN5<8
z)Y2dK4~8x(OiKEB&!)nSKzk4z$}8Y9*%hb!gmQfV@46!aHWDRdzjix3AIs)dewgi0
z<Y|m3D3kY)M<=fKypDRD6?iP58K4CM&O0D#0@%U(^&~|<eb;7x^9Mxc<{p-nQ~*R7
z^Nez{v~m#r<or*NCh~qZZ4@NbZM*=o%=YkLNUr1VXZvoiuR=>$gA)N16<}UNebIS<
zk)2!0@(*tl#rEG<|Iw&DBDZx@n(Iyvl}a44KixS;QI;7{!)TQ0DlbNP8#fNn5Q!(Q
zrpijk4Gn`aqR{If$s#62gt%5(Znz6xAXKwrfJY$4^mANb5%h3N(%EaiH>?2uMUM#G
zU*vBRq4~y8G6(7xeucY;6j3Tq5ADcrybKk-yuV1A&G0XNW=>sJ)XswQ)>wQ*d}tPY
z=ZUN3=r@%yKGn?QZIp))L4RJfXZOeXZlCZ5A#QF#K83;L$!-4p_lnC#AWgnjryD(U
zJ1^;Or`#C(W}*|`2M{9KKIS7~cC!WsBy)CjuPx%J^o&S~pb4!SvyCC|0TI5DUAP}T
zNsn1m%^ht`H1M}YFjC~c%J&1OLJSrBSB-81eRvOhJcbveGNS~|$v}x)p!O2bE^UVw
z?Y>*TTnh4dZ6NngTS5NGeg6L`Oq|&*)Shbs=V~O+KK;Tcdgolhes?r8$@R5c7H~3w
zkRi(u;ubx#a6Mp`CY$4rX}#)BE6#jHd-HJ5<AXcx-871pH3#0e+y$o1R0X0;m3?(T
z33$t7jYz0xV$0$jMn)p~{eq6}#d<(|%K3r{Z~j=BX+}Rw%t@J*0FO7%`<Xg)iyfF|
zF;$;O#48jCo;;`Z!Q152m(FK>7RF-qPafD#bkP20UjDT3fZ27cJ8%j07&rISi<ezA
zuXt^4g3*#5>8y{!m+Nsl<cO~zWbUo4<2haJHxeKYJ^PdXc{F|aLfSMy5#YV#0s}kZ
z&-%cG2Q1udixN%GsxvKNW-RgiyM_QR=Rpr}juU2llTlTT2}<1B3qXXG{3x<6qIR7{
z9QSPB2oYDmuu91UvZLh0aijQ$$JMNevZ+NZ+L57%nB>WMn%#vZ8W6ZjHJkz*5x;fx
zhJIo-LW&Ejohxl_x%o^yMiA#2h?=COFT(e&qh_}I<7Z|`wU9UBQ0<&b%E=p7erzJj
zR-rfDpG;&h=3hUCUV6W*!h>ZWn4m&N;!JIb>WQ1&7h?8&r6VGeiI?s^(Tpfhc<Hwn
zlr2QPG;`8N1<babYEd#)@gsH~4~SxnDMf#5rxsJjfaeR!2DwWc=(-X7%<Tp`i0Vl!
z5z3MMKl&x*paB$!J*29g`neko_*d?jpm;{oIyV8a)gr_&i+26;js<Ykl_yuum{0MP
z)1=c`?*Nt=A0(-@9$cgL&UmHVm%T05Qu(zJ_H~HtXCF(e)71x#wgd-h_^E&1Ke}B;
zMqo?A0T`e1BaS34Rt&#FW&3`wTF*XUCP0g->4^sMbih{#&LzwMYnQ!iPM6onE|sUY
zsJzd$l*Lr=M(j|bP<X%Avl)AZ=VHyLIVR`%eL4gfrvt7ae}d@j_03bpM)xS{nV*d9
zy)Jc5&uHB6>OI#kCXoM;xef_R>?0e@_7RqFq0ga$0$U+LnPbb3RH<e+j&PuP<Gzt?
zacsThjDePE2_o-E6iU;&#;{~~+0ioB<xYV!{w2=zsm0<~tCu2IMS^XZ2kvqkF!S@;
zF%13#u+|?MjzIgzRrh!DP!7cViRx<K6`5k71t&na*F7()R&zl!lWilH$kBdJIp?GO
z5$vtYRa4()I8nE=p$=tUi5zj>Y<aFBw(;|N26qKvj}IQcwzM$m>#|hJ15)fg;Nv*L
zsf^6*G^=P_{Z)(IHNMs@Xe1n3x*Ew>1Ei;B+WVhu5UzI7{5|JGZXx(QaNwQGeJ{Wr
zl<d|6P19YaRsbHgBPN3HaM~O{%+U^E`R`tu@9`nVudC<&-_%9`Z|EGZex*Ivp_7oZ
zL<$Y|a4f`w@vgA1T<8+OH8<|9p4;_j8YIxx0c0O#`0zb{Z?y2{pa<u#g7KTLILEK_
zxSDB;H<lc8+tA4fJZ-m~wK+1tTLJuhd^F3F$JZ;pEno89$sY)|6=8-tqv#;E)H{~k
z>I(SPj@&+=jqKBC<<(}i6aIRmk@?!f++N-JW%pIsQSYMiAu(qgy9L9Ax@Qu~H($D+
zzWRm9XS0lOq^HUtG+->?azB)dqCjpp7^xAilkLT7Cpnzujy?44xX%Fq*Q9X6#%<#J
zUPU_C<sP?6`?mDHnt~qUrqRD#=Nc&S_CMNg`9NL0Er~&`XVrSE64XBvx}x)3%Brje
z;$Gp;bh?K|%T6ymDw{amnUa5BSEjFh9;+M(F#m?{0a;bh4zoWJf=&=O(I48=dZ<AB
z77K7NJ}eK*EfZ~95}tH9?cb^re-lqyK>(F(_Cu4`6!BKb1C*@Posl)>gYUcD$-T^q
zb&3Ns*>#`6Bv(?Q;>B574@5!|@Zc}e0k7~d;9I&zUeyhV-~R$1s~@ic?zam?)il%V
z)5yEPPD5|?Ff$@<MzbQ=d>q@q$}0e#g#YgG-!zW5HRn?lDmeC=itW?JzZ&1DW$aiB
zy@D?6f!lmZ!c>~V<7-fvK{2h@?ujqnKdK_{gici?s_-O)p!_Y}aCoutRsCV!ZQ)Y)
z_c_5HVV+0Slo|}Z!jhEgzQKK^cIC0`#Nkw$*v?CIF}l>JPl9<Jx#tr~L&Jf33FWl#
zYMfH=V2wz|6s?+K-d+66;youctz=OK=9=)d*vq|~$CfwGpwkR?yYb6hpu*5U?CVUs
zk>7UtuqwTaKD2bB=KpH%I>VY=wsjCGN)<tx5ZVS5kq&~8fNVfT1REWKg7koZNHHM@
zQlyF?NC%N7(j*W%B27_xlOQFD7+N3^j6p)YALpEV?%D2hpS|z7_s@Nv^DoIv=9_QL
z%vv*R-t{hU6JhZ)_$zQ7(bw^Po~rN^plnII<$ns$bpLMd=W+NymK|jX`wowsj3LXx
z##;gQo*_Q3Ba>L4dhwL9Wx;bS6V=qHj!fpd-sdmVxU&5bbJ|JmQGXkrYSG;nGrgu{
zaCa(Q*UkfR(^(^NJ%j6Y(TmZZyPVMymEUqF!~{ZXC5;cThU-1{zj}BwJ;hoz0-!am
z@&LGY<Nimi1AFzE<43nVBxs#@2kZIiw_<ZK#kPuZnm*T+KYE^Yd3eWUGoo<)M(Svb
z#!k^uKYQbhVyq!08?cm)c5jh>1s=Snt{tm{q|>?QQf78DRNn~BX;saJEfFDGAoM@6
z^8))LZv`Z26H}LWQ5gby1`;2S5|TnbeMi9-dq5U{%F@04Q?>qX9sbc$DD<Mdl38i-
zeJBw9Tv4#0Et<R$x7awguAXbWj*{g2BDy3SWk?MQ9@T)(1y;LO^X%5?9un;KoqpE?
zIhxu{&Exdi-r9<>Z-3aw_2ONbUF!v#v&S#oyLvR?2tN~PwLg-CO-~71VxZAK0|EZb
zcc#UPXMIS@YWI|7C}}b`tX+6aq6vde+9zY^SH7*P{YX|jF`;;0jqrS%wUd>ZFAjNQ
zel5=};cGcP-Yvphl><Cq%k>}|Ubc&Wq<q*@Eb_R2fT`)YqAYC$U$MXTp3(xDk)n%<
zMIqHM+pm}&h0oj&D!%zGa}(R*Us#)x<1f$rIOg^DZm`QdLC@2VtFBWnlO=ZZLv%N(
z;lm9h87EoZHQ_6%kH8$OyvS4(@Z4_Aq5XL-N2G^th#qd3x&DAZCH#qD&CQD!L#q|<
zlpO>)J^^^CbnCGN@=T%53CMr9|NlWg=`*w|*DM=&2#Wp-Gc$Uv?Jk<qBtFfvxzriX
zv)R$Z*;f|r%eU($L%izsE%t5o34Oo%%b5ze!*f<vHfHgz71VJMzE$DbN^?xi76*ke
z>wGQAOeM$x88;}<rYb45)JKKCA>2A1$q~i$xM?r}A~-3r1Au9SSFgmv^A6~~Ibo95
zttkg@=;Avj&7>UT)Sr{HLc6#esB~}%1M_7}ZoVi?^AtCE=o4fNL=-M~&*qRwIhNJw
zEmv7UN*DZ+i$VT1G?;bdS~lIS@}guzu*{}Jt@vHu54jD%3p`8M|9Wp__!|Pthv%g)
zC9nQe^z=gN`-tw4HNzWsX1rCTfYrl(hqL_c(6z;?2_}7xv7^q*CjV~#|CM|K@X!Xf
zh%K+7@X(}eifC`v7n;<PLCMPZC#Et-hJNYKwp)&{s{o%QuB}{_sz9$F*Z1C4`LE#y
zm>1y=!E$Cawz65e5xJ-Df$m|hMYL_4&7jnye(n4<{Iap}rtss7Bda+GG}8v5p!gZA
zStsxG*W%yfjQ${-;Xa_8Tj|0i);B?1cGT?g3qS0V?X?eWMu0mm?VVp)VdBtja~Xke
z>E^a;Q&G&=*3s2oz$Z?L?-sc&F4{b)zO<#M8tgjOgZAI9_6vJi(K=?*bf)jXK{a$;
zJ^?t;&@R*`e?NrKN0A!x0jT(GrpXcee7dY{-!6N*#1Bvh76ruK?ggc#*Zu&VQHq6b
zQ2$o%(*Jcm7$jJbFcjZvu%nN-LXH}WQYs#$pY|v+yXhhLAsv6DDGB#%GiMeK?^Rbo
z=N!*Tn2N`cLMQ<?w4eo!Tn>GVxD-Vzsa}bTBYwx7s|FKi+DO30-+I=ccm9#k)7r%7
z1!2c<$m-K}MS!I#hSo;zu%lJtzybh_=Jl2GrFQnzULDAbW_WJG!+X@&e)}G=k0~z?
zNOHE=g$cYYm1>3^VT|QzjxhM>{1h%s^tzx0#kCH%%CTL1B$2D%F>2EibW$~8CprE7
zz63;XIHdA7=XxOZnGE9tz%`~0i<?szE}gnPADz{nk6a8!bwN&y!U<zv?_R3&-t_c$
zc$%zS$7z_G!;}5!4n&)Q#m*&DycoQ{j^k+8xHF%1mU(7HeXTDk4btfDyM0T)1b6+=
zewKvd%qFlD$P9!m$DJM`@f=*5f?>hfIb@pZ50JZ%ZZdj3(fltuF?R+Cfw?q*C&_{j
z_u;0-Nvv(Z)WKvki3N?C{WuZSiHCF6H~4GTz3@<0#_XIH<=blq4m0w4D!d%<3|*yI
zlaU><iLGAF%7=upG~3JxOtxQ3|3_&)KCN)g$<pxkcG)EuMm$d!?(ByT>Fb*LRNGXC
zuB*v&A1rcrIc$6~ODSbg^~C}17~lQFY||orap>HAn-}UPH0;bYH?HQdH7I`^jQw^6
z?)nAK6)_Mc2NGwonbfE-=UK+sCKU&9Mdy1QHunqg1A<~N9ZwH(^#<`P@ZANOArg1o
zf|Gkg^IFIdB~F^-#svyH1?}sC?XrE#y-_o}ep<Tuoocx&=w-zrbj|au>b~SSFt-v!
zGMJ+mdo@hC8N${<(R@&@nd#<kbN$NE5eIDL$COrsJSokZ8|_ZV{gs)%GP3bzS~d-1
z>n8$5?aDNSSZ326_E%vncq>1fL~d4zL9X|`=Lwy#Ckd~@^7Di#R>`f`c1#g06qd?r
zDza^=tD~VR#jDsRcJAZu{f{ziq<fd_AAjiM5_<IkUqORX2Wgqrh+i8<H#BmU&H(w#
zjYl_e8L{fwaJ8F-`TZQecQx>>o7+x0q{~&G%0%_mnZ64f`I{2-oXVi&h*e_p_C_gq
zx1|`sk-r1{uZ4o$;BPzTKR}<FL96mr=8VIN&W!soUer7Z3dFZB{V%F1yy}4a9$B7a
zqVZG&CtH)%VZ3Mu$O;dMOX{3vRDIg^;kOvtn<TfsZ>BLznbklT$Im^Yps*M`DxR)D
zPTFxC4ilYS!S-g{%D7~`vT1vczlO6C8=_)=b~M$~CwjQ`nA-}F(jbdm5V)sI)h7Qa
zx#mAlx_N2<f;>yNrkuF4BDxhga=Q;x+u5xqywUyg?#8wI&WubemnN**WMie~m|HK;
z;aM&lH`OKEtFY-)@W~z@jl;y$F}`&y=o2#c!HPn?<5e(GpW`2HeAvQibqlB@Anqm6
zQG)WVD_MByJ7=C;P2E_1g<jBEq=DetoZDkR*0;FhxSQ8&F~{D@P^)|a_iQbR7)h5V
zcNwAtPnIw^DM;djPi>7aMK=ifBviB0QBW62TJf9tDUct`0xnv&>mj0%a{lEyTbbSx
za}V8lN3VzA8=rTAV#FwO26CA$7%r5fY0lkc$$q-Yr;s20G>2P4%VoBzmFsrXAaVft
zau*LbK+WhFkHmGvgifZGFOMuYi*^Q-Ri?!+#$!A_y7jXdzhXMU;U@5PaHJ0-o#mSr
z1FG^H(*>d%7;PZL5JiD{oA#Js3OgF?I*(_KK`!_T4(Sf@#G!>X#0-sW#+>`y<gk2t
z73nH5=Ced*iGxjkJDRj)+)%uF2OmO);+S6qGs3MxxjjQsaVK2b>&6S}E~jU3-*_Zv
zL*VR^tm%w>wF)!d_FC7;J0RxCEMx(2v<!Ji_UfSW(wH{A!F74{gzEbc@S|_|lEUg>
z>b#w~u!o~^W1aW~!4st}9v&?mUW;{dsZC*2fA&{XWbmteaRUk^>{#$dudQ(<;Ll_K
z#1Kn19Nx-Q^e#+Xzn<+Ml6v-iclS=bc+g2@{zy_DU!#NJ>t>B9YNRIvnZ^g`QFU4H
zU!5>?(YlsB#EIQI*m?rsEaeg0eF{0S3KbIV6m$er&d|)Q$nDL|Yl7z2sf8!v;U^4i
zRo|t(uV~46%vlieD#<Bq!0{L*ikPfxMA#7U$cl-l8{q7c**yT98rD0#`A&+PCVo*m
z#Nks=>bpCxs0EOdlhw%FdDb@;JlpDp{Cs{uO!6wv8V+yHO)FaHZNtqASkKJT!20H#
zD=G8Kl`^)u*&NrUjT$VR9`0sG1l?<OlVta?>Y0BT)Xfv?C=iL;h+{}DLuAQZ?N2j2
zDm-n~2JkoU`TV*HYEgUAMV3;F9Va>FOigC98uY2Fd(N5w4oUjwaQ@r;=(PVj!&=2M
zCP|ci;QK*ZP!7vZ`dyE^T%WTAmad$bvvkQ)a0*s!JtjqB$YZZ<IL<l>?>GfRyXUlN
zT7l1KO7>&AT6p5PV(Dn0=yq%7k<W6QH<D&s%%mj}+!7z1Th$j|G(Eew9{<I~o~Sw4
z3EXC6eO5Fd9RD<oeo{L6UIv*b&^Ue}z;_8T8Dhm;I{hX1y2En}bM>O*&A<g9s|zik
zAQzidRAls=7NN*tY8O3TDV4`gO6)CgI<&q&Mv<C0TO<9gDjCR-dvAxP)&*SA)!2jS
zMxeluAD~l3P@+IT>UA+$Yxh7N7)aE<R|F=PBe;OwlUvLW5ZQ|D2k4N#m?LUXerUVL
zKmT`SCiLSlX4G&d0Q9E|bp8N!Td-N>Z^o%H{On7AE&{ab!Hie)Te5ozR#|SLDEv$l
zL^?j<2Pntt*zd}I?$`gk%g2TJYn&tJ+4g_G1LV;;RpqrOg^r~QWZ=f=ARs;baao$#
zLUL|QBm$z6172%H*3uyz@8Uo+<lLokU4<eCl)ISS(938TiJ2{hnmbayiD>1@u50o2
z#@7ch{2Wm@Xf%ZNuQgw5<97d6_o)HfUvD+2dzJUroz>4zmS?U2<S7h{LDiMtmHhxk
znJ)N#KfIS{H=+?rivz7}GW?8Dg9>TCD}v%K(?l8fhj;*<hAJM0RJaZRn#;Ka48h{h
zMai_Q?LaVPp&9@Up(#ZE0L2z?S3`;XZghN;`R~erl)t|f*1ynYN46c$V5N(+M6!|v
z`EwDzO{w$PU8Hxz_w^##-uh-f*XgI%`)hN4O$e6K%*bO2P5ybtSVe_RgE<+zN}Z-V
zP~mtmGcCZPy`d)m+UVAk`;<3}_xJ0Ru20CQ^xNsa@Q&Gds1GTyj{<$A3hkNQV}ImE
z^zZ})@=wStQ2R=bX$cI5=(X)efgo`PW0P6$LleUE<e9cEb4Ru*{+-tARx`XeU#{1e
z-_8hrs~H{2o%%~p7pgqimUxNxX}DUGjoin8k+HaSX>B9D;I9LISvR*Tfx`_I?xABK
z9$9=o!!7ocTjMx00KP%r<IVT5Y*B~zmyWwdW&s_!^n2E_-*^?xEzWAOog(CgK=P-y
z{q*^D<wAA~aSkG@JBB;|Y?AKHJKL9tKU3?Ur~L+W1@$9^Edl{0^ISv4j@ZxAj#JmM
zT~}Y$#iB8XOeKxve}KLW6#8SB8b8nbQk-^~91rYxA`HhELi7U@8Ut>|@jW6^vxcmN
zxVMUQ+;dy)8C{o!?VHj{r^1w-n02#2-wI9779C`Bg#3Y@(Gr|^vSf@dS?yr==0;OQ
zuZL7hXa)O+2<RcZ$3wyE+CHs<cp;}mVq>a#qctv(<1{AymAr-OIXz|V0-h%**ZCRq
z^W*K@bd5$zA|_7wtErbFWTQ93R?XzuyYjR9)LosFJKDB6vwyw&K(ssz8aDjB^fio|
zF(NV)U5~utK;enu&gQp8U~TpCqGTH6S0zs03;qCB$*imYaFV@RA$kW!OWfH@LmL)}
z>x)N2#T}Qij;a35Ns+oobZ)D>+n<b59Fyu+5mC=J42dwTDOvC;rA?EEdK_hHcF_f6
zbdBfs=kTIP(H-esZnNf?)RHqYJJ5GKuU}L>lO;4d;lm;C$W{;Xd-4Rw0<V;;=$P6U
z$PQh_@Y?r{BPbDfh{7wOQ+??jyby)p%>|_g9qyH)c&@ozu~bi;-o%CCmkscfX2*Ly
z1PU7!1@pcP2()0HJUS_2)XU0(^doQdOwOGmyQWqGUWF`Ot#eVeQ|j9%e5G&72n4$@
zr7fNlFUk+4wOnkcXm$Bc2leFFMbHGhu^Gw28Zd4TLFHvZK97tB%%g!%FeI8aO8Tr1
z6LL-1M)Pq2Pm`_~-qcrkkNkt>Nl!_oGYefJhyJLys`VQ#%G;7B9C5OpjjR--d+Jf<
zn8_F%{Wa~YAHS*iMjzbwFG1yd5GA&ZSyTMG`2+yaJ*Hu5Dkj3kVfxUh%;SrLWahdC
zlVq~@$n)B(@!GQ^I#$NbsHB4l!7*L5zFm+Tbm#81En)|tHEZ$KC2dwCQ^sSzU|!BJ
zN&@s~=@$sOE47^5c{X2(XkeE<CEirPePu_h54xpH11!{jHDG$f!PD*9fYOZFQ<{?b
zKR~HoFP?^LL)Yey0;GFgduES!2yjK9QLZ#1BO1J?hoeypv#OVIpie-=6e8dU$dG5x
zlo|rsTy|tPn~?uq2mySK0JQ(MW^=zhb1GwiGPKJH0RtA86KF8ogzB4wTDrbxr`X+q
z;}CWg@T-{ri#@9s^_bHjLL`RIbqX=MvSp~!yJ~cN%fYD4%|kP%^ibwQUkbQIyHsd1
zm&tRfLE!i;v5TkU&)&5AO#Vm_4S&P_uylrX_AeP*g(`pd-u0C@(b}@$D@M(N0G#!y
zm%NK0!z6{8)B6vX`Xt+NrnkTy^=|}J^z-WkrdhDb{VX1U3jMcouAjW5e>DGR`2fhT
z-^zK)#t7t(`g!Qwe-^aEP5ytiXRSfhQVOgG8a>Y*2<3HG{y^(ppLCytAMER@mU6U>
z%udTjc4s>)zi54)+TsKbCN8Z|z*qv~$U%fub0f`R_vxH=eOtul#prk9_MfF((vNlG
zih?g29Srsm5!lcd$2MVAmHmc7Z4HKl{=gIWuuS(*%7&>uEU{f-i2-WGG>oscngq(M
z43-*_dy6Y1Km<-8GgTC1AP%%<%kT3X`gUlu=&kvBxC#J?Rsj4<PsqL$+u#AwX3RZ}
zVUt?ccTyP)5#LvY-5bHrnO32Z8c$+APez36nO*<&PN06(d`S2wLW#$naAj7O5ExyZ
z7~Vl7z@Xobuay1(!ONkzGKd(rsO9*jmWm~vh#IQI%qOrX=&Mi8<*77Omjzu5jIwVI
zM}@+rs4ClNsZ@qY1};e-ICB@G=5_>n*%*ceJl0QJix?u=IO_%4m^oYH&4=JO)%>Wg
zs|Z;~7s`4f2nb1kkO2G6%g?m3WtEzYt;6xKFr<USeqn57l*Ru5A;qE67lBu+q^SVJ
zv{t~@l%|6L_Iyjs1!HhcgcC(?cp^REWK^-yZff+bw<X~cBt%c;B&mSgWcowi)0S(C
z28_T>YW|Kf;&2Q;C|0A6lHVDx@j12BCfhX86?+PA?b?Q1J(lWqI%G|lO<wwpv6$B|
zK$qnX@I8M^owlVLlXE9X&AwT4igzYHCEiW9*k)dlXC9WXhK?~H2+c{wdb@0%*gS!j
zhS7(nD0PvE7Y|&cYQ!&tzd2DA!|%Gr!;(9=eAOKctueHC-057oi!P?5Gm1VFl_b;r
zv~v;<@5?pd)v)1u`{FixcMBmxCPlfn<`2XVImn-t+6kG0|0=pu{RHNNtjjXpz>4Cc
zv$<YO<?|sV-B?DWF^}MPCXP6LEI%{KHND?icrbLB?dc_Z@k$C2*AQ7|Sh|wWOFve!
za@d5w7VEHjr}q4+Jge+g4^_hm&Ah~vgi%khSi6{q$QD)%*!1%poc6lacusva*9WU`
zUuJ<dwtWE!Y%i&C3q>>vdCu(O#syd`L;!jDJ-FTm1-A<scEVx$I{cl8+DC6>meLV4
z_zOEvj3)Syu0jyJSR$R_u2EsbH^AYf`wY6?xn>8V4uC>g1c=lQaG1!PdvkQHFGcWm
zkj}-NHyPObwzCy>*bJ!rBLhixb9lE~y-J6>dGw(rqGc-H<i6E;KKAyxz?n;|jMdMA
zGx^@1!}Yie9{O<m_33^=LaV>v8x{V9fX0BJd@z^p>EMP?Ot<;L<)CW*0LA*x7&!ln
zm!J`V{U_GR;IhsqXYB&+y4OMZSaE@ostz=?zI8|bCSzDR1ne;@lCSj+V3a-GKuEyX
zSG)&YFUeq@t!$*U=_KSAn%hpOovDq8!tD9E!Ow*EiRVqJcW%0Dsx_WGy<nM*xJA1w
z-WF2Wk&IAop3PJUC~2tnIq7u#RwsVGiCrS{p-LQR-Mxz0jP{jGu!dj#fG{A}e+oSl
z(j0HMYB|brb~AC56tI}EaI=Qn<2$O)JjS*mref6+%8O76Dq0qYQIiT;>wKtGyTQYQ
z4>do06KCC<U8nmMhO!AjFz6NBY9bOMNxQUl+Q#z0SIJ83Zq_l;b?JVX=Oa~pnZrr)
zt-A&7Ld)O11;0TL(SLzE-_nm>f%{BIr{&$Vc?QaNa%UcPui~(zZIWm7TQTSfps_1v
z%}sII1<@O}4bkT=TX0@@K6>+UC(BtO?+CT5Y+Kq4d8aE(Lgz>|VTjYic1eY3+BT_(
zZxUq?*QdJ>(?W#2ai*COd$m6yoR~i&7<cB5r;U10LTOi}o#DjS3UDk$uw`T`S!u__
zBGLwP_pT!G0P(D-_uXV>KVi}(tv*1*ly*&td`B3Pi<a)Zd~tZi(5`+*{j%0aleG2;
zfRW{_8~}qVn2HITV(<bE|1hpu4{?quAG!v9YFx2OJ@?EUN=l&l0sX_%Mg;khD*KeK
zuuEJE#}JkSmy}=9i3*^AUt3OU?PvEXnIv7-KDQ*0rGJ3R+UiCfm~02rFVh?$U<ZWt
z7(%*&7J(mikX@OwyoU-i$QoI+*coWi>Dep^bA50R<Qfhs5o1x5+8voH4f})b_GJOK
zvH?YY(38J&H@y-olh*S9c(-01<z^8z4oKSjd{FQdKnN=&o`$wyw$*UuG<koDjSiq+
zSwxKh0HV(@D@bVTnM#GTbkG^CfU&|oARZQKKY~7-b7v2s=<_O|&k+qj>NlEe1k!Ta
zDspEAafa?e6@?~hQ(6i|o0f-1N#H7vaSP@<5dldb3VNRrf!#npc@Oq?P!Pn)G|Zjm
z4Zwg%e+4|2mXVSNBSmr^6{IG$e~b&{2hA%6pCW2A(!62kfp6I+qls<H?ggxCfBBEN
tZC+6WR0k*+`}==%>lX+)#P55ZSc9F{<PKh)1UmQMfjR&4chryR{{X#8Vk!Ut

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_11.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_11.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..59bcc167ed2ea383b2cb0693c0e88d3849752454
GIT binary patch
literal 60687
zcmeFa2V4|Ow=de{93`hg0m(VX!Hs}`fPjb+MKVZ|C?G=+P$aVfMPXDBR0Jf0Bqa@q
ziex3jkmC$-fMMn~?)`=1x6eJF?|1Kc@4m}GLw8SgRjq%mTD8`y>Q(q}_$h#1Uq?>|
zARr(Bu7f`Sep(<+JJ`(`01OQQ5dZ+l0aAbpAOz3Of)_Uk7y%;ioB;d*2&Dgh{~m$N
zk8>t4O-Y;^K>Q;OK;E*<1(1RFsKM(i0KlNbwG5t-gU89|1c4I-P7pXj-~@pa1pco?
zz{1VT(L2bG_mZo(zc;V8qnp3?EnaCU2`LF_DJf}b326mcB{|^VOC=*EBdsDOry`@k
zD<i8SEv+Ia1puTeM1Rvt7`Rf1|9&o&miptINM0TQ($fBM{*>S!+RZ;S7Yh30<~h+^
z{;y=x6QWNLI6>e9ffEEy5coR+|Ez7usDKIwsBQe!J@5fi0-`^43R2Kx!U2$g_dwqX
z!|w!~e?NahAoOP%(9c3h@Hg)XLp~wl-_B3X|5^C;Kb}AK)c^$eZ}?F_3m_*YB_kyv
zCnF=HpdhEDW}=~{qM~MJJVnRE%>m)z=HTMu<(ClTJtM})#U-RDEG8u_CnpCHR907#
zQIn9BlR3_WfP#X8nu?l@hK5b%G}mdFU;g0V0}SMVGpIokoCXLP2#6R6@NEDDtR)HP
zT{?C!{rMpvBqAmuB_pSxqyiIE(SsBd5fKs-k&qCBZX<#_;P(JA0}114>2su~Ozg?d
z_%g{vramL*`>pH)vuQ7iU-rhWC<;mzRyKAHfwO`_!Xk3=3W`d~Dq82Yb#yQ2>6=|P
zzhZIqnx%uIle3Gfo4cQXKwwbt?U3lZF|l#??#HL4|DKVV_3%;l^St~Q1%)qPy)Lh)
ztg5c5t$Wwf+ScBI=<NE~*FP{gH2m%R$n*?yc5Z%QacLR7xwXBsyNB67IHv2E&cA*B
zAnYf)7(luRiHV7b$&TqFAPfQzA_iiT)6%4j=S;}#eNUZ{i6m$GE%jO12MRt}Qxx-!
zTfLMl{BqL*=ws4;Q1<T;7WMy#vcD7d54uJGY9a!#@rW1zD6rS%96uq;-<R*IQt_5(
zOWx|x$`B7cjVV3C*`c43734Xb4YVDtR+%wZI=YiQXT+MBKV{uwT!6-fCVz<#sktY2
z`9^8cH&I%0MIrJdw`NqTD{Py+zZoSA?TDbmT&RQ|P%lklnBYiVEWah>h}3@rOKFUL
zN`zw+r^7|71mQ>oF}-+zf+Y?QoXb(g1BB^VR{#&(eV_%_<4X=6h}6T$!CJJR;eoF%
zMtI<TJ|5rzpwnl*;(<lkLfDikED`bx#i~=|fln?*$kKyfeDME``chrV&Pc@JfwGCw
zvDJ=^v*vf|mY(paa8ziLFcD~7!2`*c@jyxRK1YWegAw}xkCNc<3&7YO4?t=lJ71vl
zJ9(N+a><y@bX-#jblY$601$dg7!Q1^LbD`?@F^T*RNOaJ)N1C8ULPS1%_&CMs+ivk
zQ007RYZdykP@8!E?Bo*%C4{x3e~<MgBJqGws&072=or~!`*eF#a+4bVS>L@ZY_!Eo
z;CX~><pu^fhzC~IVB6=-^S}xMcVkQu5u}DLqbmo7!&$S-3O(~@-a(}7qg~i`prXD5
zHzLkHFe}ex7l@iWd!a+`-bN3APFQ%kl<mmT9@Ew$v`+T$veFgP2JYHr=&0S72(eaQ
z=J$T5;>hPXxgQ2FivvuTh_Bdfa=pU?rLN7}!lG9qD1``Pi}3)@D?DIBwkiL9h5nP8
znM05_k^a{XwGV8n_xKN}{mC{$(9zkziWn5Ri^SC-uoIQ8gt4jMW2Ug-K?E)-r1`k|
zCYo&0!HU{<r3`{vTe;{SU0qL7BQfha7rE1J`#`%QZjFRE%HXcBSg!rV`@<b%O@Wch
zS7P)xD};Q*M|eX`%(H|AfRCB~x-XV$-#i;co_b9c2d#n~9NHZ*FuQ&w&<6N~oJ2wx
zqOZbGaf@vlD?#4UjEbL%w?ybK^MrcZeT<OMeSS@Qw?I2ZFOWRfSZY1&@X!(2FUYi1
z8A_E-P+P4Rb`M+Inh+sVSZMUJIRmgxV!Bs%E^)1{ZlnLeY!PghGv#<7Wp4N=R2vUG
z)3dAusbZ4%#_Sj$EJ9ODM3#r<H`;u9c5h!%b)hVurYuJa7LhLdeoayj0sI3Y=tEyT
zV2#9iTgnl1otvrmR2mymPh3sp9@r0V3pH|b=&<I&Wt>J2KJ4%x*Gi9Uv%}tsV8xw!
zV0=Kj4EY_rN(o;6lt%jarH@bW13Zv)37AkVKbKyxtggAl>X*E~WB~U4_QE5g0J@Ko
zIAu*5JDzK*Dv63F9~JuBY<%dj+k5%xh-zsem;L}Q7V{-mQ%%Tb|G-xwJTR?P2}%gB
zXZs*!2$8w0`=&U>G#UxaYmL@)`UOyg^cedZo*hqlTs^z#;mHx}DX~Wqwe#Q=FWtV|
zh7oQ!9T#cQyf*;F-YWXund@(ln0a|qUfoo=a&+zUt>T!?B&Z)7*!8L|PN-Cv^Dn)N
zC94GKc~-=d(+-NKnSNa4ySO=4JirACq`S*Kx9wKPwRj1CP#JoO5Sp}_mzYyIuInCU
z%m71w48jVsz_)1zphC@<IY26Sv-4vwNaomUHZ-f&6Ii#g!}%t)@>tf4ZSWCche+0Q
zc)(ML01u#^Si-(4Ucx@d1IP^L?sn;Anlm2??9$mtD&}ZG-mTn~>#KkJxV+BY&<&ZG
z4^j!1`1$y8qC%KQ7aq_onmk;F;FQf*XN^?^de^g8EE}AwvuUc))cO|iY~%H^;^oXY
ztENE~l)}_KL*!QS&b&gtX(6;#_jeyc7aCxD-=0#uIs0JEg7NnKitcmuJ{{qH*dl{3
z50th}X*`YrZU=04X1_przJL7aYFJ^4bxQbMj@Nq(X%EF)VLo4paQ&=Bu*tIzvlwF2
z1?s#s%of#O5(wrhBoNeMm&2!VsNGi@e<X=G9qZ3HcB+;%g<+E?+<doS;$kvk%$V_G
z_GvoJWn*`sZ1-WK6>eX?ar)@DFXAfe!X*u{X%3Z}<RnavzB}00Q>&fXUz_9;We-)@
zkOOH*$JFSvYi(=3PvAGXj)YwXw|{SppRut&Lwxt@rx^3mT<Ni`{*mf=n@@GuJ}q1L
z=oL?PCRr+`>c2IMKCFa%;&>(A=6n_?!vhf<3z_riZ?KUYJKl%012<-%>R+rlFT2?9
zXbi+EEC-MDIoY&5{07i(A^HjqFWMI3_T>l&FGk^9_NyPJ5WU?YIcjVd*zP>I^YQGw
z^Zf_{Izh6L2(dc_lAsK4GQ1fKGb^v~Sd#F%LM#MVdt}mq-C8pzb33J;cz#f>vhT&!
zDrjZggV~^zI`0&M=#2!NoV5ps;rM_7OXWSZW4X@4Jhw8Ua0~HuEnuwU(-`V#fHkCQ
zHFJ}K{v2bb?W1u)=H4Nhu@O}gW`Q4q?VtY;7(P|F**B{D@LOgY^ZPDAfw(3;!QmA%
z+|2r9N?Hq&m?1?Q(Assou3Yu|T9358o^>cs(da;^>5JvKtLyY1gcrH6Z~T%tpB)73
zKBc;R2^4G0rg!wejPtwX7-#KeXaqu{X1|ia);J%(c|gVDJK{Flos3#WpxE}99u$ll
z3WttX`9_#CYB0q4K8fz(6HQ%qCG6Str%p)UsHN8Mm1O}jia46=H2?j4A~S=zi4Nh*
z_k<DL+MxIc%lhdLv+31|5uIETvBF1Zh&EuA>UbarxSDi6TblnSyRnx3Vj3TR{F?-+
zkFC7PAwco&wd?ak))KdFq|CvH24N;^mtdYFI%~|WxBX5Nq{41@A&I<h`HhxV0aq0j
zY4bWOZW!}^nr4N>UckQMme6y_Tsj@2W1JbyH!Sv@4NC%;Z|2pC&z;{phvG1ZJ1Z6{
z32HF}0K~F_1<rBr6XLTF>EH;9JOV00OQtTHrtV5OcRbGgXb$SMtJC-AGKQ3T4OGkC
zUs6d6rOX3`BPl30&m}@Z#mEfnN`PLmy!;`-GUstj$6Cd!4!?x0RT!Tsjec8oZ|F@c
zrS}<k8jle3LI6ijBs=R+8xPL?Fcc3|Hb30S#|`wO;Wd`qJqL43!^7Wjmw`glx11HB
zHx+h{meIqLN152<7m$%a;>g|6o9Pj1gFA@QVt~l>t-7RVl5d~+MnVuCaDz#&=pDT(
zL_)o*5T#F=9_?&Dr9Mn%!?^d&8-yl?8pXSQ8P=)0boqeCT@qsyCZvLSa1m76em_>)
z?7$nsctDHg5vbJ(<K*C|Cm1tathnr-#KESOQKd)JmM%ErQcS-SdN{`Zx3$u5^w4@K
zhkaUCA_ZfvMd_@%vVm0!wFcAi=M>NpHvZUyq|m`q%te3pjH{SJ!~5i94DVtOobKZ^
zd#}Ia1_6}3%l>_C_{~{owlh|5yD`G5x>F+F7z3=p+->M+h+S5~&8>d6?RLPWIboaJ
z(W$>`O-%&;(AqkZerLq5HXI_keWxZtO<FE`ZDj7jn9vuF0BO2CAK&4EaPKb<q;0rK
zAHP}3Zs{EHJgWM3FJhjZXszcW@`deF`@i!hiH!m7oCSR610L8uWj(f6y<6VS)#vGE
zMDHmG%`$3SB;1L^-Z93_z!Zup|IpBcp3+GU;DK4Eq2`rE*y79O7(9RtEj$VYxj{!9
zxb}J%)U-;#<0YsTvqVUYzz|1;M`60)ARoUb0FFu{(homD9T8vXCVFZh3&}=-g=HCr
zU3p&)6JRhr;KbB3NkuOx;zTqr`gO}f{Lv%1qFc_!;=MPhUz98JJ;A{wS4=)~jT!MN
z`BAQE#EHkxK12V$D{?`SRlk`vk@Ky^=66z=$H6^Keh21{p9MR{t@f~KB0o_pi{|=-
zgUze1N^$mU^Q_acLO3$I-)$q{*FB?{^g&zi+@G+R4>Zp*-v+e{2*}^^A66C%FZbmd
z=wz@M3D9qBTHU(7z~?=jE87nE4?)m7(8K%KkxC&@p`kqfqdZE(cd>CG;898Cir(B|
zpkg;+86pF6OJpB4AFD(AppXYUQi!=khzD9!pToX#8iOndYRZ)7rax1=a46*T*lk~4
z*nUdF!_!MZ?0OK)I}520guGC%Y&-*Pw!cwpI@S^$Ik3GbRC`soukv2(LjUR|79w#<
z>NHKFLQ@}MSh-xL0IcH8eQw>9C*i-Zw>v6$lIiCVHo?7s(@p2egZJ2wBia6{OzZ}n
zg35w>nJa0^?>o5yxTdx@^xWQFBXY~WE>^z8GAg|n^X{#B@O$<}b=AFBtOk7$RWY&0
z+`g2MC8Jfbd)o%C;$enG2ybeiXVujL4w`w&kNe)KXi+9{-t%J4rpWtUl=(&iCvgB7
z_adRb<~e>sT9v~~guTb+frTlk`I#>q`M{2dhjW6Xoe=lr+BTW<tNl}CJB)~w*tJq)
z*@%cCjjOIirEft~0@RtujOPcR{UMIP95V7?h=~e3V28j1+PXh<M-pfEcl;ccZ@Xlf
zUGvi9MMyP$WTQfOq__Dmky_`HmuHUKhjzaq)uGFnpe(PR@@<vAbS+NocJZNycD0Ms
zji4Cf>wrRn51Bqt2J!(SUh(>;JfFqZC@2T;rJpZD&7|nI>B14mz|6T^MZ=B|oS=M=
zO;t@}zgqEcunOIIpU(HYyga%$*LzSIG$RpG9f{N5OT&f-Cd`QIx%4#5NY0UmG6|W7
zF1YV8b%qH))ofjTR~JqDf>>6k-pctaDVbJ8duWE8K2Sblbl<n&>ghp8DNC^~?$(~f
zNhu3~TH@%A&?w!IB;>i?Guc+Ir4H5aCaxLHB|f)Hd232`j)#eEJxwEmTh^WB(i>px
z{3{`iYh-h$&k5#60KyigWlm!f)1wVKoWUP%5CA+2#2mo6*KV_nlEwFk`1gzYm3r)K
zMcT}CRag^igl^RB5l0`y192uKmGy54_*<q3Cg<d%;iYq$r@p}upAcju^t39QlSqp_
zBGA-@Z)yXaS?r%V<4TRUj%WbRxMI#&-`wT}FbhS=V(|7p9yk*rEhaHj(KOhH{Pf%K
zi!5pB&P^mcS4i}}IUZQ^klbIy1A9wgr--2Y=d0v@zW8tbim%u46z0L@+C;l9k5GRL
zM-d8A>&VnmzO7;#UBhUz8hUH5Zxm4B4<Ah5K$)E(iGZ{2)X`bM@SP!5<$YoQN9EU~
zSWBO>5xrHr)I`(k%N81#a$Yx+yX)Jt2O`PW^`27Y{Z_`ZLM4{;nxFkPTlzd#wNYrc
zq?{;?=I7mqkJ?&k6;tQ{p15A!IZaAAaWyx$f_p%fCPow1z^1s+*sOBDLG{K^*(gM{
zQ<v84^9|0~@6pTB1D8n5QO@zXUcBit3Cc2Mf+=_IXv$Z3xM!xyx$04`&Zd94nttSq
zxkSAE7YfQx`pS<>82&qcT6)pd-`__?Qqs#$!rsyQhLePYx2I&Vy^o}{gp?$p1`YPH
zckpoX=e^<N;^w6;xKZCM$m`~)E@&ZdC}rrQ<>cyiA>@{mS%}ePhY$}3Wk*4%1`VZJ
zuu8C}kEfHrJ#Vn*O)o!{VD&RUa#sP*k8ew!(NMeP=&WLTUgysg;FbEBKkE_{6eJNO
zE8%_1MN(Q>Sy@s_Mp8yb984kZciYR~K3Lq#kMGY6&O7-z+;a2rck}k*J<ibnhIfF!
z`k8<LH%Ap`duRC@j*jx;H|%8{#igYk?ZxdKWu?U(Wn~=|<)xKmoTYD^`CA1&eg0nj
zUqS(v2a=?sb<4>foJ_v#?R`_@-<m&Gll=Lyf8^tI-rnCy<Ji3`E+r=}qi|W~PnWNR
zyu9>3J@`jDHOXTF|A}CYKkk4Sf@gn69E6|uzu|LI$S;9>0@n##zl6XqB|hP<6S#f}
zfnQ2|!d)kD{SpGdl=y_ZPT=|_1b!*;33r{q^-Bo+QsNWtI)Uq#5cs9UC){-c*DoRP
zONmdo>jbV}Lg1GYpK#X+T)%|CFC{+Vt`oR^34vcqe8OEPaQzYjzm)idyH4QxB?Nvc
z@d<aG!1YTA{8Hi*?mB_%mk{`+#3$T!0@p7g@JoqLxa$P2UqaxQ5}$C_30%K~z%L~}
z;jR<7ehGnJN_@gyCvg1|0>6~_guDLR;rhp08%|!}s~SPzI~hNsxeyT&9{++LV(^!Q
z{5UQO2_+dBDLFMIH8m9_6%`FFBRvf*11%L5Jqta<DJEuSW@<WCHWnr}MkZ#aAJJTh
zz-Nd_C`d>sm}sbIn11>5Bbp0=DbYnDf-@)4T!5<o<D<D8nLUpOUyuC%-ElL{eOoYh
z09Y2Cqw1jo-(L$w!)88Z7v95OPhD!_tuCvm>1(ihy)^Vz>*ztjMf20uwMlA^1T*e;
z=-u4=Y=;M8Wb<KP5oj=K1TAeN9_Sh%*dbxw1YfBY&f&wwLhnH^J^>pzqdXkb7cdNn
z7Nm_1tE94r2LvyX4KP`BgHad?{F4vZaJ;bT55vv-Ds{%pVe-}eY5}NLqbjs<a$mnC
zns`XD7uIs8qy|p8q!QCo5dKXcw>xnv<HNr1lT#^l<%^|4gE$VicOB6Mu2~%Lc+*kC
zj~@gI<EyN!3i)^+xg<>)LWyC;4iovqP25Wp0=0eX5>>{G=MUW9nB0kLx6DXWpw)fy
z)>`NxdgxNywz?q@lEXZ2fg>s#P_y%%xmE>X(0ysOCQ588Fd)8)Gl^{a>|zzTf;D-v
zqt@q!BQ5J!vr)W%V?a$?;+e*3EA-wD#hY;Z=<41Dg_WeIuc;l9DhZY<W5AaRJ-W9c
zaqoiS1P9i|cA$)3Mxx}*?$o!9<ZPDsrE_$viixME+!&3JA}`H^dC<jH2Tq7!_@;`y
z8G>8H>DqF|f&(D3?b#V)<;2xDNiuK0a6ClJ5z|%@>{T(Nd+|V2C|0EdI(-_wt4j>F
zrFp>84j4isE(EIrzP<baF^Ms^fh7WW61Btp!1mg~1b{8Vn%s<f4aRxlfKE>|(>5>O
zK=0w0E$on*5C;=GE<DiM7|sp*+K$}7ij*{WBx8a~TT2hv8t#BiZ$6CqO7?BQ0s%#B
z*u~jxO9vl<Zvq#UqVsUvE2tG*tRw_BZ61J=BiNN^M)zP<ho^^+sBJaCs=Z#px}MbR
z1hZe^vr`E_I1Lsj+J$CKzOW4jhbdS*JQ)5mSvk2{Kiv~+$BHAu1nHm!6D*9-F83x4
z2b%h4MDKI)O1*UbcGQ2LGj&N5@~S&?0os?@qztlO7*;vu6d1^WE*Wugp<PkS4Z&9o
z`H7dG(v`g|O@jT$&Y}A=FB7f0iz4Aabq=sc{@lrcLymaN!cW};_8<FLght2nAJzMh
z{r03{{}3oAi2Y3f|22=LKK4@C457m{hJhlUb`Rs<0Lol#<P5<q3>+0YdxjcTIv@o8
zrQb()<Bs6-sM333S0_g`1VgDX^5P*g%fb#<*{d@gq9_n^nviFDaJ%kjh5Z&)DqxI=
z<2VsN9Xc=a<p{RyVw-16u-;%uwUS{xaFIhhV5c+K1E;^yfd{!Bj9vC2-@*=zfB*(%
z@tIJ<18H!S8Lp!qek8R#I5td!Ox_10n2<nMZ`R>~e%fM9o~1!ewIuk$7Bq)laI+mU
z<N6c`t3|NQnhYy1sJlLoAll-}H73g5hwWAh;#h-LKf#v0e}~OhSPVdBpgCd9hnBD|
zucJrJt3FwHKrcH&<0-^H)c<*8Upyf6{J%I}dV3ddkLs1IFC+~O5z82;ePQ4;XLr64
z0uTs(GaVh%4e+XHm(e4R=TT-DjI?<gOU4!Dm+TH?#uqugzUFRtOKjuxI}f+)iGWyh
z*jY^i>1q|J9=<X2*MLyoa$wS<&W-mg>+U6RFfH6w!6Idx>bB*C3H587^@F{<oBsB>
zx`)3-h?{86CdtSq<=;^unuH*)Nd+U$J+@BQeqh~SI5fkr0q34QJLkI>nc%pPq7t;z
z4tSWD^AEl`J;>Y7gX>e*aUF5>%)V5v$0+B%DRQdrce$4@-U2eb>%v1S8JT&zF%|W(
z3#;XiuiZ7`LWo3M<Lr%k#|RyHN+o9kMv(aF0LSmO_heW($ZEbc0xPgd*QzjP@ol_4
z%<s?#*3}aJ(Qa2s54+|{v>JHWw%3L8|GjuG8HxQY@Gm+2BWHwP<k&2Acz}h$;LU(g
zr46yt>rGWRTEA3h*ic+?^2rbQV5%OU4OD4gjx(=Jzm#Rf>Z6fIt;au9BvP6Bij7)x
zmQ)#WSWpbcd*fFAqFzpF?A3&M{dPIiY*~?o!B$IP=N(^<Yk83#V|8E|Q3@v+L?6tI
z8@;klFy)ef9w`r?V_W3VDE26rT*>x=zq0mpEtV)j#_GxT-}JgQuHN{P$TI%bOZQAm
z14~3`khhEY?DLU934Zc^{Y!5Xncj-cigGu=Gwv<ACR>y}z6>``+8y5^PjyNA>|`e9
zIvMa?w$A*mZ)y^q9D!H72-Vte!xw85S+k_Rzo&ea9Yp@@K7o#L5q)zaV64r>-RAps
zjf^({snO^P1`<3DCiv6Q<Z|>1I@=ijiKqs9Rt66&$y)UPVDz3tQ&?B%(L?B}@1Km`
z!u`sP%-!-Y*<6@L{Pa`0Y^*#U5C}rTjuHc#53cu#mMVW39VT{RN3d}xZ_58m$Gxt6
zoes_ArwM@u<AL4RO(=_YZfMI_tT?)((38)GZ`^W8RC465eyp<i)r%|_x*Rf32al%c
zDGj1Z5pYJ#7Gi`nwIECa?H$(u6{>w~G1y>USp3|o<*lmDX;D6JKOvI@f5E=w<aZsV
zuY>!i7b3ynEVrjVE4Ir_%I}sIs<emT&R^(uqkcmkuNTxg8u#h^c|t1ZhMFnwqy_`e
zphKv&AyQq-#+%K};)+W3gO_6z-Qo>?J=~kk63NYuVx!e>Q&|(EA~c%#N>m{W{o5t1
z2*{=xjuQ6mJE#g5a9h^H8w?-~lT!Q+(Q6pW=>}gA9_UozQDPSKz<%&0(KxiNR~M80
zhYg4a7_cjIU?`w|oI%j)XFT9=xB1}W;0_rkdBIK+Hw53bYIgQM<PyLGea=tmUXtkx
zasSksKesM;O(${mS{4sbx1;^(4wwSG+VDX4lO}i;(pGV5GTx3wg8>W07!(J0bhpX|
zvtCoNjnjY^wENT?m|CCF_K^MJDE)bhnNj5V^=i^73dnC^jwsKbHt)N(3aA%PzFJLq
zyc1Y|M7+m;51Ho5BNvEJ)shm7ey_%XHiS+=+9jFXn(43t$Y%Or=Y<cp^z~)UX)D@E
zu$mQP=cl9mUAz>O8<#$rKP<Cl26!WUCMYmvufjxoagwER{@r)%+HD0f{h?16hicN;
z*0ewE4t){G8DQD$Q<_a#n_WZr+2(np^AHdUoS-X?6;l!hedRWSdvxdJ(z~8mPr>TV
z+42WHxfJ%9*Mf*x7Iby@tLeM14o{ZHR^6V)1FW3J^lrNvg0OpQXrk7vScU8f3PNoz
zpE1dgpYDo%JoM4L{#{j5H4{P{=7AQS(Mg}#h~Km2QFzDl_>RMiMO${a31Q3K)7<TC
z{U2X->t<YSNqpArCHRt5HzHg(EEtT2g%w3JwzlG!noyaI@{KK+D}yhR&1?D#e5;Xb
zX%|l4HfPg!5F#ZV+tn~f#UVcr$AzB>GsM_T1@udX3wYoG#+~{_<e;9Fx=NQ?y1=K`
z{8t`}+~qf^HX8Q)_}VZ8eiui9bwMpeW1*-a2*=_TTavHNH8sYJOI^3JR}bzqMCMg(
zzGu)r+qOBOiI`isE^fZj>`0@JY|T~99=_JUoy=n!JipPA%Z?_gJ!`;j<FGmX5a}m;
zX-Oo;NmRn;!c)2zUT+rO^}1As=6R~6uvu<SDA*)?B;BSpA{S3h8`serO-EaF7)MQz
zq9kJ*7*KK3UlP6VpbM;4L=T~iTGTG)WenD0S(e3X9}`}`orHY)mYP38_ObSm$6-cT
zjROz3a5xn{$o_B_CR_C+s6)0RFjhM5{d5q+6Y5QkYZyh8;%*#9-s6>e&ScBD%eBE0
zjnOy34=yKPPB$IM=bxUq7}HYv@!m6&-;%Qg5`n-8v7^iYaoBErKkkebCih@er#e%>
zlOe!HN7|R2V4Yl4XtUeU+xfh!TEqF#!SZ~6cm?c!dRqv*gDtPuznIM}T)nyf+fv|X
zDjU!0N>N|mOx40WZL-va^w)iDd*>9a+`X>*M&vV|TTtVnBMz6JhY*J|PhzDTFsbru
zbH&TSkK2cHPgfrbdylosDJ+&XT@KrisQ}0C?d6}98}OQTrb+pz7CJi(b&@>f;>QJ6
z?JD}(apA@wn?CUJtfM4Gn5rdk$lZj-!XD+n@NR%X=H32tZ&9bU3)@lxQX{%1EQlE;
zxq669egJlRsADtcVr8f#l?_I;Rn~*GG|wg<Y1UkseCb`<`sIhvEGi*J5;IQH&HG=h
z;7v8mUF>@uD&c|<I9XxXa3>Gv(W7uVS2%G=6K&T_Gev21fOl1UP?1&pt%JlD8J{*A
zc2OOd`Cnw}v^I@5*%OePy93eYn8AFkI_5h%YvzGf*PR!mVX9xNR6_&KD%LXU1<dLg
zs~Th_>M8N@N7`g8w3jA8D4kJ(b?CFBEh=%bYTB<YUH68<AyM<3jVvAoqg3qtf!7AF
zu6k$k<?8rGE5<_|J&ajAU<|7z-lwZj$@kSx7h-Jt*sQDCA6r(|JL1-v8ZMIE$@d+4
zd>iF3gJ>5`CR>7@3fQ2miNPwrvRqF8{1oB4``OFG;w-_LPD0Jti>j|5$(U%4u%!?%
zL0H3O!VI$z92>0$bQHSN8`R4+mfErK68^QIlF#lf%HN%@nX3A`&~dO>bW2>7SRKv^
zPT#%vU<!=aq>H@mEf-+N9bDRqWAEQKrXCyXNEFyEm|}bJP&(m^?CZ+Nvc!4=1*U`N
z*i)D~6waH=6-E=LJ^dI;g@IprH~6f1=yaXp0rjw>+=Tvl1;Z`IMfP<2fUfK0`R|UB
zZ#J0B(DBeQxjWeQVzs8#MKz;vZq(Un2JYpjf;ZQAW%cGhvg>v0ZV}X^(A`EQSYpFj
z`yJ5M2cDDnF4`^i<h7wj9-ULZnlV1=x^E#+?$hS*bl-0`3S)t)nNE%f3!LEXwZ}xF
zS!ZZ=ruys~s^S+eH#9!-rD-hklw?xaHzgP0Gbh{NI8dEt$(lJjHNBo(5WHYoF|20j
z#yC3eo=LJhNvvvWamD=fM`TE3E2Cd*=@U7-NLVFZ98My*XogrNx#cL-X2@Ck+cF1g
z!0EB!TmJnknKSB9LhonS*aOF|)y@*`B{n<4Il^>Mu$GNV%$F4dv~HY#==lMc<jYM|
zFYQ{&BHl2XpSLNuDKctqKC6BDElnT?EK#h16TN~6v8jm}+U?Oqd1d&6t(IStJviWO
zb+3)O{ZRqMT+Zw8>!0TCm)=Cx4k2Q<JJ5pP>})rg+==w`NNS!iu@hUB2d;}pjLp<~
zo|?Y$>_vF5ttci^ef&)(M^m(Yeydl)6<_iCPko9a09%Gs2wIaz05!VXG_X^e459Zg
z*7^Wrkf<~{N@RKb0#4Og?v~7NB&Z0`_lhN%L@O&(>Q51w;3n+o0umisb|-`y(J_av
zmR=Rl3gFX^gRf9I=j^aKq|HpneU@&`DcUn5OGxop@+6xcN}QeqIjX(-jQ5(|sp998
z>nUuzqdL7U46wMlz|VemmAWmBT1dfGv*{d(<)M#jRW=G3gWwAfJR4_aYn8~-%2L$9
zs%Be#^87wN5WORqJg0qT?sw%LZEjV9Nciv9Q83ECR_UN^FqM7yF6in@+dGtkeM3s(
z(&g_+<M{T6KY?L(1+iJp-=lE|cG&i#mqe?lM>10aU;L|nd90D7^!0H7`=zaRn-<}U
zc;MSGIJOngoT;bwnkrIKW4@#Z&Dtq}{`~bjoFX-neM6In<nXF{$3ahvEoJg_Nfuh)
zj?t#4M!$I=yn~nidHooFt{L%Vx~RiFr>Kaa>zKWW=V-Fk1vFv5E^1@7_xaMhi5fRi
zD_hIp4<%0Z7k+D~?(J3+$cVDPd-IZ#SybRusPP(^yFA@}JIXheiGf=!cJyBJ1FXLd
z;kuXyGY2DS+N;`(KH4~GMi8sEKE1ACswLMG1!bsO3z1~Pw9arR4&SRR5-oz!m1I1>
z9jIcxg(zDeXa8R4Q%GPPA%*qR<S9fABeK0gy^CORLb#97bu~bU=$_Z9n{OF5D~(iT
z-a0M&MfqI||GV+p{lj>LO}n&q+r>cX{GpVV*4=kl9#fqk4kBXwEPP{2mEgTNU&WLo
z4VI@$>nD{nLcJer$OOCphc!)yhJ0zVJ1U0G)`WkW+*LBb1NoorF2U~M0zh~Anj5H(
z*6%ul)^&^B(OGxkAmZP%WC=t&r@poZndFi$j9th5ZlXzfHMdZ00WEhoy=y*Rh`@m5
zx?UF%&=<&8#s)Pu9_ZBuc=F0t4qy@3m+!ZCj&$gjw3z@YngN%Ua|`SX+<!4V9{U6S
z7Y~t(=eXSzBEJUp?7Vm$!Rp(oJnIjIW8hy>!Wxx8(_O`Z%Yipsxd(I&NFayRP%sI2
zr>Ti1`A4?v2?-26&%Wp`!(@%djBfem8aMC1$G#GJO2oGI8#a$9at)-$WLXuoZh4Lt
z{}V63|Mva_y4$A(mnON}Z}EVtRa!l|8FYl$9Z9({f0GCz<g~}3zUb&E0bD!S^3Y3U
zJ$6+?@Tr_YC|NM9Etha8oNpy)k5y)IgeUcmcr-LKe(U0Tg-*SR{`7spy(hGOwuXmt
z6TuQ&Glh{G{*dT!e$=C(o9mL>%?$21@!WZe(+n~*N)s-6T$$#Z-<*OfsBf0qqq>)y
zD@jTtaA#f3EHPGBzgFewe^`@XT}!d~c2u}pBkg4q=O_NgDF4n7+w7@pIN&H3>pczQ
z4z{H_g6YDid!kEAcho64J#NPqfU#_o0_ew@4;sr}wB-o}DGUfU0(aOEszw$GGDm&N
z?|EALtAk58f9ekP1yEJOa38+`7qSA~>4Raazwij*&c=gw*bgVtU)oxwFMqTh=n(>8
z9Y2(XeVaa(g}@z66%);+fAwno!x!;ie#a4EqRsRkbR20OKg9jB<HKXs>t`*o(wzh1
zP%{Bd-je$_oFTthfxoo;n;!nKOy^-E={SSQRWK^9Ljq`+4uBQ0ryZz_t^x!5|K!*C
zaUGkP@|Em!_|Lo;lD>4Xd*o{k``NP>2xRak(Ewq|D7#$|RA$-KVWJiygr&-5#;VxD
zBs;jH=C(VB>}~1$*xLNd)`&{<a6rgp1k4E%2cgDZbD1$DM3~kLuC~6B8TZJ_5>m|L
zb5?e8^3F&i;P&wtX8!FZaGf>+1_$H(N^(6VQFwCK>nXi7JX|Kcss)3@>BBx4A96~-
zwp)uV{JznS!F!7T$)Wf|CJzv-!)ILcK=BHItp*$mQFJnvNrR;pE%-h-ZGN+Nw5=ir
zZu~B7$RFyWmS0w04)HlxTG0%Sq?lnGB?BJdvwIIc<T#55dPyUckWa_2yR_p}x%}-o
zSJZBWz^feO;NKLX+e(-%JV2G>0J2+=x!w`kX5l4L-CdfVBU%kL(B-;t6^u0e8v51?
zw&;sqz_F@;@m>Y4G%^vi1iu!yFc@xN_x3>Aap$YV77JAM!8zU6=Fe9v`gwNM?T5Qw
zn)O38`h&%2pVN>+e<w@nM&>w>sZGIN1b(*1tdgSbT77hMcd^X}9?d1)0(EL|yZ4t!
zjsK0(Q;(uSuZLE0G89!e1l<>@Yi8Ddy%e$+Exs1!fvk+XG?-ug&J5{k)6kjOvF=%?
z2$lRy$(0nmSF&E)!yPdpit!@v#7%<UCc~g@g1Vz<|I4S4blfSoshgCCr2M{+9a!A#
z{i%$qa;x;N9W04B;0U`u2F6xKB73kB0m(T1yhG9jTpWZ4HtqKYwrTFstH~u8|9sHD
zG4w;`Zj(HEMC+w{pOuX;y1Hb-(ZC@{VeBzHYd(fnS1oi{?=Q<9f+W57X2vQKJHouX
zK@Fz<VD-<^0Y^OWUzN@d`MG5uqc2;QWR(WXObmB4sa^=6u{7{_p`kE1`sw~nuldn*
zzfJ7dXPpblA?qs%=x;PL&9PGn$!q(t?O~WE=U+GAzm#G1M+2IE{XdX@9_KCY$zTmJ
zp)U-OlHA;?R|ZSOz3siZmXV(m{ky*p2HhzXJ?y-pY|SJ6HT(O^Z9?6S(!2Sfi|y2C
z^O4N5xj#-^58nE0g9at4Y3((H4akLWLOqs^Z(UG~PSWJ51vT3kR?rc2Koh(I&S#Jw
zFEkijbO#otuklEuo#9h<)RQak@-B$o%-iC&H{9&>i%F8l2?mP1R;{2$XYV#<sJh>#
zd|{Z{1@_4YC_HdP+5k?B#1Ywo-E$c_t&M@<qHR^c$?nKf*k*vg1~octLL}+PTTS&1
z#{J-nd6I#lt<wF|sWz6%_Ng-g9}pkgqKfOeEc6V%_pd@dOajzc5tGz%M`Z1A8c#8-
z9`bRhjYHe1tCOeE>(yb47WxAcgifWL-W#4Xs#!FfNF{!r?@h#uREZ;#NfrG#-J?5t
z>w2Up-%g?>6Ra73@Yd9KE?Bz-h){vJWh1-K>tLcJEG_A6D7<js@}ZET2hBiEdffsX
z^&ATYgM62A!=~RKubxQ8n1q8-zn=~#HKV3HCb2nJH2A}=S7Mxp8ErA4)&rF*RFZ4r
z1}1N-jVrENc{@(>)xUBk;TJfwbyY01nmQ)b0>&c8X?K6bJ0i{ML1Xx)u(P-~?IJJ3
z>uUb+$ZtxgHFgOEiQLW<8aqdiTwZsfi?oHT991A&lYE-#1IFNA_Rv_-S=eaY^iJH}
zaK1274(GC&+fKE^3kf};GdcRZ7rtMFsL2@K`b=~^$&4(rhk0)(1T;|+TuO~#Ibl}d
z1Q3Nl)7kJI;wR}ddLIOvKYNLYn#D)^%DqtjXjvZk)$2AMh;K$j5bE+M4Z+|eWrny9
z;C#_D1Okq+UV&gUEWs8QD&^+Mlle*Nn*1z}77O_9{=5Bp3Ct`$D$E{PPIo#Bq0&Kn
z`#m1eU(0U&skhr?8r-_A(H!xV8otU2Ki~=js~hvB3}n~KKMB3!5FsMI|0wwU!bKjK
z_w8(uT8&_a5pJ~4iu64F3p+;YZ>NeUj*Z^(3b)H<QszQ$*o^z;suDC<pvD+dkD4WM
z*&B;Qeut!sF9kbzRCzbq!~EV{UwA^dUGn{B(dt+un7)Dm&B)H5VEdObYxsGDlJUPg
zDy08q;cuWn4HY}z|12Nq`ZtRIxdAN{XI@s>NZO*D8cP$y0#lYbDl>=m!#s;>9x;9!
z|6Fr+VQ&AKR%#E=&iF7p@-1gOjOGJ7WE3=>mP&9DivLLq{=_9$s{d%f7f-2^|BV*>
z{}ux&>h`WXVIHIQ^s_o_Ra63ti>xgtS|*eS5sx{RvMOG$9~xQgrL^6uRdaYIUtzLH
zoSbM!aRG@FtbrXk4ZL2$TBbwev?e&Gu0+(17h_h`p|jz`1G%#)-eg=6>b`p5;vUIX
zaHj<h^e~Q#2HgE3y{MOul8sP_pC}31V>Wijqo+wY%=YoL;gffXlTl$>#Y<AHoL#Tn
zOqW*ZGj)YdGxP*-CG$yZn_s?lEy6mDBvj)|PL*1Q=^eJ7p9PmLR;ZZ{)Dk=pb{HRd
za0OWrVrKVlGkv8uF)=9q?>@Lx?GgK6b@s*63%xBy*yo+*gVls>(X~iE^%}p8e)SMP
zY`dh3@!L?#oE}p2UMmgs9NMMT$^gn3JU=mf^>|HLXIN-4T!p!>iYnrfN5|;lXmHY8
zYgp|U3bJbNNEJe4U=8vOxX2_G4AXBKns%;%>lScu8OGGbqlQt?g`myHU`r3Kl<HWE
z<O0ZMqR3q1g{2*4jW0AKtpS3L>$ewvrT|<6X9w=ZwkVG*#VT>^fotH*lkY=Vi#0f^
zx@{L##&_Wkjse`dre#%H9`-k{^@k<$-}{0G_~W|cK85M#MDwbLZX_NUW+k}vCS??H
zlFKXkTP6CFYqWa9M09#b>n7(RJ8xe(89tk(yqX@bEL<$65t7*m&T6>CWn~Uw1DOK&
zW=I<m_IN;YqPCP1Q-wM-M!Brf94dTvf|X9lO^EyZf=+vrLHZlxNtbM_FEGKc(>2~P
zAGca;^=Xw9j8c`_z*zLyRVreikgfKE#zh!NjzISFVx^Y~ss67Gd#>E6tXy{AJuoZk
z)GYul3#oruDxhU>!IfyfnFiKngP8<#uqnkol#5?kNkI)`GbR(kxkdRAu){`QC~5j*
zox#sDaYc3iVmSVV9|fG;jJgZ!{Eni+b=+^>K37`)L$e>nQPL!13^WMRe8}$4&n7_!
zcq$2^8leQ(R|EGG2*wD$#*!s9IOpKPmq1<z^o3zP<Iri{{lh}65K#hp7@G*E!2_$;
ze<;2?-;y#Am@T4qC@mHDYx#Z9PYywgHSd<R-;z>sbWCpCMu66b!5?`+k2BJwu=Y-B
zp1p|y7fCn}|4`d|ut)tk=8nI}<|*xa{}<2xkyZ1;Ivn+s_<7dx6HGApJ6KDEQV;hr
zk(0O%iep{V=H8MuELlp;3^i3Yl4tjOdXzLcH3cfb?yDAjL<Kq*4ks`Ag2wZYtnz;3
z17vhyR&Y8!u^{qNpv&o4RR;I>0xN6n{g*ibt7Q6?tbfn#PxAgZKxMKB-L!0C1f7t9
z8{l%1cNu>cm8^wC>b97@hm7v=iIH<H9^6|=4(WrpUEIFE^R<KySrZ}AB#0FmR#SFd
z%nIT;kJ>M<ys$xx{!&(Xe~Z_T{<mcco5P5i`cIx+YA|wYO+NM>FF%CFfKN#3eln`m
zPmiyCFI}y03f6j9O0sM#-e6334I`>OC|DB;rT(VJseQAH<@;GtmNVe~B+SWt41CgK
zZFCm-<G8O_1jeKm798$?6&A3sulS-N0_)sId4sN<<RfYK!NsvPUi$nfB8IQX<l&uC
zCCr;v+^N#&uoCFB*ppj#{M9c9^6V_Jjp+TTmS{rs=#zzFakJXh5#1!*R~}llZkzhq
z!`7j2_OhYvqHg(ggC><|E8`c-?tLV=zsc4y$z4q3sPMW%ORFVm{3clO%77XSRCUeW
zZZsup*EGf-Ol6pfrJa9JE7U<c=aAr=+OJfyp;`g@*08PpJh{Iss{3NzLi>`emKEm8
zNjFAw$*O&iL}!{YLb}-BOTU9m1`Yt*sv(D$fHAN48#U}W+bXmjmPrzd0u>yW7E%RF
z)SdB`C#?zDA1m}}Saq-?kMq`Xb|2%dv2C=E)XA^I#Cree4d^R~lD2>5<cCSJ2)j*%
zFwo~*7G-xNKRJ9u#R7u-M8||x5(|7eJjXwk0Bb7;XWZ+VJKry*rfKZ;bAekxNdD-W
zcmV1T>REt*Zxg7)r&dfiA0>c{0}lK3(5*dVj0NXk<rJYx(3ctW_uNduoj{KJ6l1*I
z@iQiW$^fJqXejWxw89SAz}ftMo1;L!7rr>P_h{qRD1>`rxTz620&5_7R8(f2K>LWD
zZ{v=1JHYg3X357nT_Oa_FWu+V-~@U3!?CuuQt@YveVhI@qnAW)xrJT-k=Y+45(vHc
z^BKqwIY9fr1TWpEpl0fh0sFoEABNbE!bH^Rk`o~m#sJKFEa5G$U*0lbiTa8@j6t?f
za9B$)hFx&D5NR!$q{a=KndAy)oX&7U4JG^e7TLt!WUZs$56t6kdE^LlNu;|(6LfXt
zR`3Ual$Ti~{K-}{++C@2@d6vyA04e@xU3zM*AjG4)7C?@5{Pcw4bgX}@;C5%<e>Qw
zf;weWTIywV%!R_7+(zok_BbZTkmOIW7-R7m=i7*O<9&z*D2~4k`~G1japvR`GNowV
zP~q}Dz*Q|km><Gg0#F_q;Buw(B;%5}*Ga7tji<w}bg?a5r+&}eac=F4pI&IG>6<?~
z7#$kgCVt$(ayiO|Dj#&qXI*Uayf9+;Vp+e>v+j3rJ3^D#cx`FIeI1(gD5=qD<Aj*@
zy_kuKzCEo0HH!jFrZ)q73lqzRn-A)tl7?gY$eOj&`CEh+cq#W#v(l8erW-Y`PaxHU
zxeiUagc0L>Nw>&%=d(o8=S8#iuF>8kWmDH<TbE91!UNKfSdWF$5l$S~Fsr@bSm6%`
zSC?$_dcoWBFk0a}aQ*#_-IyM%<i>Y6M2bf;g=G!gVY3G@RPknw^wDMSo>#kl;2`{}
zXeh^4M*aHpc;RE;<quET-~L~|HS5WX%zZe83f9OF5CCrL-h(<f6JaXaIhyZzpK81E
zuAo@VZ&D$BrqRME$<35}%EHytD=#@HgW4dc?zE0Evl)&Z`X0Kqh4jWM@$eqo(Yqp+
z0J;YQdRPE(2<b@#P0-+IP)P(g(uy@k(P6QeQP6lNy9eD@X-ta$%cQRx5vy^i`I+U9
z%XXNH&UIxE$1Fb!Cd<zU^Gi24A<u6Aek1L|BQ{l=I33<Y8v(#);tWRjX>jg}e(t6>
z{kgLbqlBOpSi_g!@*>WWIz>%i@k59U-3VPc`(i+YWzjM#p%`l{_HKFk%Ax)_?-@>v
zSW<6fQ`cFmKyceM9lwDsn_^zTpY18F1Y+7Nt^m`5Y(sUIH03F*?QUVBa2}9{e$}5B
z6qb@kFF90cgAR$iA#|}NRR$0Au;)~lg?c|9J)1Gq*<m`>b^mNmCQ)uIZ!Ln@!Y?A!
zq|!xcK0=8`a%wWJ5=!HqEnYQzU!ZazVr#x0tyAdd$D`WrC*Ep6R>BwXMKlhRddPuQ
z!BkCiv$l?)&|E_y>RZb!7N)_myEAA3rxC=OY`0j<g;|MJYTqP}@%SRH4C3u2gCB)x
zoCh5nwph*%jZx%9*f)f_AXa5-Y8Z<P?g4cg<&~8YA);AHOu)j2(tRu%j$`rzIgGXe
z1FB5WTv^b1P6|2LjR*D#nx`^BbA05&cuAOJ54%`Gn6}2{43P`ls@LMarcUI-rm~tz
zd#oloAcv0xOH0A!ODWCM!*J5(g{=-4#-vC0zbdxh3P<6MvdK>zS?IWkPU}Rpna$~$
z<|k`aJ-kD~y|Quu))TA`aU(_{WC8<j)HK$0Ld$<cyYv-e8TY2f{Iq3kgFidp%S<{q
zqr7)o<Z?==)3vzXdmDDPCYlgISZh@ibKp=z5IFE^_h612vlLaq#xa~!4qJAuD}D14
zQKjJ)B$xrofU!3_T9rmOSMsED`3A$SF~YB`5Pq*#WHGS^WT@q$H-2(i3|Rz#|L>3Z
zpu#sR-4j-}cS^CRqNfc9-+jzpNSsT$sB$4!sIFCbfsc&S*n7e5s<jqp3qz`<j8CTp
zZ}cehG@|F|kp$I;`SCE`NB14g-5@P*d~W4oDzUi{u)Nu=IpB=)i!o)B_n-ri_%p^p
zE~VjJX%hGJ4YX`<_SEk;-rRk)s$U_={V-`q602%4Z2>w6|D<eqVAt=(tecwYo3EYq
zdBNXg%5${{RiemoxdmT(b!FEzE{A1Lq6}d%`br=g!;nomYdnw`th+`47S)ATM<l3;
zgfn7>Z7RQd<vbfYGr05p;fVdU%z*G~(ahg25t=^=9=V#_m-BRoJzu2obu`B0v&77t
zF1XxgwFs5oniY89-PVjT#uC9q-w4072`-uUWLZKdXooeQajG@1rSEL>DSWjJ?t@5i
z2jyX1+dtRPjLsRz^m2}zM^l$G39>th(f0Y&jV48pG`$-TTPt0it9n-xKR+^k(X>@Y
zRQF73)Ehcth7Qp#tEt4j2u15K(c#e+*ZdkRpCVd*K1MK%Up0Ef5x#$YIgkJ0<O{C8
zWyHtS*&>j-W{kpnSOKOx0oBC+B)M4fKlb=xF`S0(bp5?I8UjnF!l3m*SCNB)kJmW3
z`%gvA*}cANh<I*v`c|;H+Hi5JO!wQOEb98`ka#VP3&-*+j0051df35vQeAZjYNaRI
zZfhS2#R@Hymb!wHhVy?WI+hl26rre_JB{dPc%UvV<X{sNXH+=u+b9nlOw9#kSgvhY
zYpDhetY;wsd;u*lx2f@YgvM__mn;82mw-*p;6qwal)+@QOYue`-O;D?zJDj%6LNmJ
zbqX#VYJRz`ge(Ta9{fC58Dj$JJ`~-4mgBAFDv)?TNgK`GF)N^-mVZm;%}3ww$v21V
zgS(Njk<&JA+f=86W=dIzLB|96Zba1lx|DBKK^Md5=^jRFHP_Hn)`|~G13v%OX%}`C
zy`F)3+>%C}NT)ZLIWix>w(k|7O!-!AGqZ@D3}<zL1_-uGfH@{d=CTY;w(fG&wArx)
z*Epl68;RPoSQXvi@0<iORUP^Avlw;a2Id?3pqR7G$vdTqBg}2MGlN)JRNoGNUGv-M
zw(^OIYa*VPBRW6UrP;LKzx458I?d)Y@3JsXwAT9x=H6jljK!2qXxkP02QN|UvAaV-
zw7!pM)_P+puI|XNU#<2_w`v}Nv#VWfySAvof?|FXN2O5~BvBU7Uu1P|wxe+A){zQj
zSl_y_n6WU(;htflH;i|Olb8bb!_535G4pS+HiuMUq$rx?(FPM_DP!=hPT$=`i;i+j
z#M;({DVO*?ZH!>C$0vTYOB)_w_Mi;k$$TIeBK~^2Q|sn)Bu|ejBxD@r{k}PFf_Q*r
z_x?1E$mnHuLGw%k-!Mc%W?j1f8{;BnIX{EiTX{$dsMAJd$o5-M%@B4$!w_37?av<T
z4oLKk`s&zp-S+=F`lLu|teoEb-iEO^MzADr=FH@c>H#c+YuoR&Lk%8xifm|As@@O^
zZPF>Ria+r&43F{T;-WqhRuG<g$ycfm(#cf@RrZHr_!FOFuNc~0#-tY;Oy@aqj!|*m
zKiyPYKk;EWG)uaTN#~rz`qkQfP4?Xw&{uR5E9&BVRENxMc#bIm_jJVrUz#dL$lRsW
zh32z-=5)J|$HQAGI7y@m+4~{jZ(>++6i48CJaFsvN;+0c>tMLaV5MqwfWuL7iJ>2p
zDYIz3gYOf_oSi|h{w63gW1p$H<O**;<o5k6emds(EtvntgIrUo^RspUhaS7Gxhl?D
zW|k}IZV*ylUH}M|dpm~vRB`nDyWrz#>Jr<~s{;+W%z`=S$h4b&DcyKLVxPM3BeC|n
zf?}^beLhM*{ll62d})fbNeKj3OjV*(_YQ8r{mUxF%41x6s5zf;BbwP~8dPS6R(4(L
zGdGOlJ$mj|$Qs-(Y$@i?Wp(DSM7_p#bZ+)Qt{PpV%gRU~8ee>!9WZ7Y)1a6+EF9$K
z%h*NTtK^?b{;~Y+RFrb(&c>BRAb&(@CNmx=erfsPj{9znl&-J0N~LML2&t}Vp~%OO
zC?&q;e)-w7F#39wut#%DIIlNZ><!)6Qu^^lGae(B$&kkCkol4GCSB59eT_*8)C*i+
z$x<w_A)o?(Wkwu26aGN3^$_87U;bL*p;6qewyE1eALw<iq8R6{g9DUT)rvwze(x(i
zwXZr_s31C9rGiPz54Sx8sy+#;H+<~vVv1+yzMy=?Y=Ged_m(ZVDKn8*7~|S)R7D$O
zu&j73=GKg4)uOC|G2y6>$IXLLJvQ!ZkQYEGGc$4swA)5oWCfDjY1kVm{8BkQG+-l@
zs<CQ>HP0?-=XU#Xou7&sPZ)kPGOp|Y8hxR>qm$cFhvb^E*0y@GmL|*p^gX763+<Qb
zeLwqY$Y64&CF8ENz?d<Gx~N&*yyYyk)xtM#`!wHgm&nYtZ=b?_9kQ&WngU;i#ME~b
zCkd4I@q8RSBCl&yK!Hw|%B1mN(ww7bXn5NsdZPei{9d8>{o_luwZjs=8^kxh)8m0r
z7w)5bWN$9CYrbF5*!heBw<M0$;JNmohbcHq54Bc`-5!*@QBAgJ6KhOX_-v*@u;YP~
zOR+-ajPX635+-$KCbB2Gfm6h7jb5GOoXcz%vE!+@x;pZ4MaIjl!HT;#G7lCfmQ`%i
zQ4{U^DnWN6Ja0@JL6KWm1Gesat&p~mBc33Yf#CtDrzEL$@B<t}51E^-5GHZPE+t2?
zHCygAv)#J>*4V>mSCj2zKaG$z5iIQVP~v(gImXRQ2C(X=xR!pv%Ans{a3DaDIHyYu
zeVdo=o$mKJtpQfP(ytn1C`cTRdx<I^<E>NPdy_~mk><%csye7_r(f2ska|XAQ;+8(
zWar`mR5A&!TVaB&clcsGy6|3NEK+o-iQE5cF%`U_ZA4^Dm1j-7Ex|8MZ@#{n1WSh^
zi`a<4#ep)j+f1$c<<N7hVU1EhUYWW*=d<;n==4GP1B_4*C+X@dB<ouW0r}u^xPS4f
z|0hfG{&%LMf9XZu6XKX!m{oCOwMR5b<bSaDo?%Tj?b>h<5S0=|dJ71Gbft*2L`0eh
z5tQCkRGRb_AP|az(u;tCQlv{$DM6%nq<85MdQYeUlK3sWpS{25x$k@L@7eGB9^bF`
z;DB(zT5D!qGjq+%b)DxqcW$Lu@G35oH;5HF`~p22Z!hD#E+3ZqVuW65WX$x8xg%`}
zz4_wgRgIF%XpJ+A7Zc&pU&YI9Dqq&**-=hrb;@|4FNpN^I2_N#UI*n1lLu3{#{7(G
z@hVq4+s&x46wnQaw;qiIo39+ymY6NH6lR+AcwvjYXp#~l9I}lR<T{(mhCYdR?q|mK
z-;xdAM6}3ogAPK4BIEK*?5=wp?8`V!8=BoNQF3*_s4qtN3$4z~zKK-gIInadllxoN
zA+t^~&pPdlxae$Ud93g&eFUFwtZI9OM9LZ73dLqGj#Tn&tZ9@l7HxteZ|m-!f~4?z
zrcYdxg)M)tN4IZ1$9aE6E3e-sAAANxHr7y4x`<oW>wbdP5i^<*4bsC^JMQVGUuGDq
z&()~Hu@jkQ4O?sGXgg2Fc^f_nkuP!~2Ik6w%W7ZNefB@DPO*m>@g!YNI7?;_k?7>)
z9Vj0(RTb+P;DVRa5GOI&70(!HCi<S8mGP!o*VcZ1P#hCCRG1>JPus@L)vSWpK)2>0
zA$%SvhTZE}sqsLu?=*`$w<-kuRi>J+E*+!KGT#++$N?aMy4yFFf*+)uy{F)!S|2~?
zuY5}43~xH8^{KAkQ&T(NcHO~^RZ5pBDSQs#8$zyS;B%UdVIfsLdp5aKt>o>?jE0+*
z^~V_s_o$3iyv`ESd50^os6YkFIi?Ni^4KxoyaS`Hq)ATH*ez0Be{yjI^oq`W{VbTw
zMCOv_Ycc8Z0?R5}QA^b$g^abETxGnB_k65UoK0IJpaOW!*{<Rbm{bO95?6Rse3?AH
zaN7sO5eV;vWI1<WCDzOj2_`t^7Ou<6!^0!@%q|W2-ZWthR<*F<i#|?TQMh=rBmq0z
zC2?RQqcY8ALaNav@X|K8?oQ02EVMuaT1wW5YE(9{ZI3#Gmv1IX$B|z&woz>usy_3r
z#t!igeh^*m^8}|IyEO-Y-7UIa#XCdodhoP5Y0j*Y@F;4%F9U4zgmLZB&E;!T@5F}g
zox`@WkV}y*K(HB(Wn0NDunSd>Fx->&B8~<sWfu4HA76S?Y|a<oQ|-n@i<g;!hj^Ys
zfe~sN{aavK6m1gwx3jC}mhVhc7T%_MXU&O6azU^~rDK$Dt8fNkr>KC5%CoW0_Y=-F
zBHKJ>JiUir8<)4P(fkNlNU`I<m_Swq%OVJcHIOltn0_f1H8yXtIk9?!Mt;G5SOUyt
z7n#QHcMU%|yLHY^E6qrBDYVKke-YInKl`(02=T6EKwN#JcOIiLC9U|HPai_yNj+a9
zkQdh?H;Ppa`~}((L+CCOzp9Wnx-<M6YxE~Axp9jX-M-*@C}l5{Yk4gb@2XbPSBh;>
zrp;86`L??0n&>+DGuAuKjE*V`<XZEG`vk5-II5QPxt(11{$H<S>RY~&l}3^EUm$Y-
z+j;K)hkNcknWVbA#CTc2yp(L#J2=tjSa#JwZuq60;z1*Ih)tROxo&<hH$m1g4KFK}
zrS<vP{<?4DL43A;?v7S{dzy-_@UlU-wKjKJxoyS@bY*$821DOxN;3|6R(0;c8}<43
zk)N*5(BE7+LZy7LYb;abKiGF45|`crkHLMh!<z=*hD+HEJK8_CHI*g~TT2+zYuMPg
zI0x}rZV7{Y3K>&Bt}%I&l5O&AMgUlZ`@Ep5y}QHgT!PGhxYAYOTYt<Og2f-Ioa~};
zInBkVk+Lxto4G{$N(C2wf$lr{`f}XYY;lT7m{<yGD6%I10=Ps~yLRS%e<=4k1;~sz
zD<aR(*^8J+jJH)JbnK(Ws-f1y@Q2p)NL!plAZnHlOZ(kt!fwRG7qu`f<#>$GnDSRN
zUK|t7HD|Rn;)!NN{5(~3W<gz1X?M#)5kKDOM~7$T8B`F(F>B)0T;*8}lm^9k<5a6G
z+aV8a7}5o*^Hnw0zDIjc<L^Zk(<3eCwpbCxyo+U|eo}iGWc|?N?CFL5n9KwI${q>=
zFV>*(U8W-~;=W36PPE@Prsc<trNqJZE9~>ob&WJI1)5lqsy5Jp2%ANR@;(x(t0aEN
zH^zI|Pn@MBrHS6QR$ua-p+8l!S9~`}7vDSw`2BKV*ooI2xpmaY-c4lNYgF}HbF#O*
z<sHsy(3`ZEpBbrcEPGE*{rcCK_u(Ajo^uKYv0YdQ>HOhBE#9)QV}?D!m9O$ngP5CU
zVvqcK*p*jb>O~!Tfnm|cO%D!`Tnq6Ghw>m=o7TOqe_kb5FSFS9Fci6*SWGW@9M|m5
zMTJ+KL4`<4;;kCw#6si)%@@0WNTrzWiHpg6J9w<+j3c;IV{eC9;owS+RG*?Vni9U2
zb%$IN2z2mBFYj@7ipjm9!}qSsyF1mFF3XXztE^#ST$3eMFympzB{WX}$A$2Rk#yFl
zhN-gH)f99G_<Yl>6y7@z`*Q}YXvG8uyx(@NTLg3hEI1Tkz~;G>`kL3t<{5A?XSPB<
z9_wM2*zZ>JqLs&fyNw$KFd#L-L%63b5boYM6G8S1v}l6O>N_?KBaVMiBMU(mR<<b3
zbvX}`yZ)bk<VbJT#Thg5+0DpGc!-pbB0HSs$<?vj7kyHSmSjB@CtnNXN%zip-=k8T
zNj<d;0lT^PBV{v2m&Xv6Ji-0k$rF2?vK`+x*EJM1ibBZyLybYeTI=r#GJiwc{R>h1
zfBTv{w<hKkh*!u4@4KMdq^t-Se@+z{*b>A4@^1o!Pzm5kpjE-}$<xj$at5q(@Nn}p
zcpqoZ+E)ZK0KFORlwcr}EB3UFkKzS#(>LDrCTFpE8OL`$MQBip?2KozpktH)+X&Zr
zyw9BYMPc9Zjt@7ihx0d$iD@pi8)_UM5M8|DEvXD-ixUX>I{kIYh4HMGYB#mqn3@hY
z8DhyB8qYME2oseWV#;2A7phS|&H@Roj43MXp&J)FlxlS`+FGBxr*U__!=d`7gevu>
zkvy%*Tu);y?Mk`KQEH@=u!)%}L&cXJ=`#nDs&g(<cUVe-b<d2^sVNx(u!x64w`#)`
z?B>K-uX_di-dh>2`L0!yQog)K>P+967GP{s#(r(xHyd#z7|lSaZ;`+?V`HR%tWn{~
zF~+%#VIP-%+Vs2SqaBnIsualy_vl(l%=1axQFvtn`5c5H-*>!qAUMui@nE`?W^IcN
zR&K3>%WtMyL{Z^Sk5+!n>i+Jgc05`Z!M@B#K2^=$n6#gYiMZ5b&@W!Nrzmv9SW#Y8
z7Olb2*2wcF@@I?e-fkg$r$3tOPAy2gLKENGnsN>2sZe*6vu~EW$E6i`lM^)>-M*<&
zgsua2WL2!#33UbWSuJ)Y36~w4{cFv9RePS~uBjgUmEEbNPco2gLH|`j;NwcqVKYdK
zDoa!bN4(O$chZBB)i~(nbRD3z>Dlf4%^dzNptd!V3P@u0MiP_81tZ9UaQIT>+pQKv
zF@FXAWN3dF1i&pcd+jxlsSch5Eukwfhr?V(+ovBJ-C}CdG$|EL(NG8~Ia#1^0Sgoo
zq*NwF5a*KXj-3v{Fw2A%{&@H8?DL2*jWH>QPHo|{KopA_<FbVdu0UQ$s{oSojt!o-
zC);W#zYTf>b8eTfu~J1zw?3fa#em1pjm!q8=Ph%=`e?W=e}4T`d^W$LxZtVk!%y3H
zHXuKhL?j%VtmV~-b~#_M(~&h-4V!&1ju&!XREfK!V|uF~K9#G8HVWSiUGf{;yX6;t
zfkq6dQdO`hf21OSgHSeuG$u(WMC2q`+4)%CI*gq5nwRbP?wa^mV^yPwX=lb1%2}PP
zgMV76g(Z<xB{g!!>IC}4-tHWa^Ybu#nQte*Js)W}A6U2bZ#f%-NL0h-U=^r?s#%c&
zZ+ux^?RDvhT_K&hTIcr0j;M?G>pmTAQ2e&!nw&j>E&jcXe&V&svCfa`*)JP2L%nDU
z&Qe^MmLjuFNKQrdXC!Im9elw<aA{|Ab2A=yE6KJ(l(+DY{|gr1|G)u(X^D9X$L&xi
z8{vH0Bdy4MOlx_yoLHfAc9()?#;R+9s-36CsdqfDUs1&deghWO8r|$o7Uvn}BmKDT
zX5MrC0a_km9QEseQv9msTuePMXZ}DDui7y}doyRq2FgHA(|?k9i^AWRQzO!#&cm=X
zYXC;sYzW{(01Mf7{jg<Ad;qY~p!$u=#`wt)6hrE#DXUP$He3M1)8^BPAEdUr6af6b
zI5tJ`TW?JE?N`qUKMZ*JD9}lMq{1MX)L(~cltt>zk)CP+Xkzt+G_d}VAKOaOQpj!>
zn=icitX+;XO@g1ukm#mK)S;a-V_g!;VyeUn+K}~cio>~!?p*1)v2yea<WI<xfG>oC
zmK+IGmbr+U>827}=%uK8=rR2>RhqdbqX6N|H>taPV+5ZeyXuuWK%lkEMpQb|H|HWk
z&2IUQ2BR*lUNj^$QrTy8+mCQ6Jz#0&w99~qU2BnLaPRi&F$OInVb~pQ?D|nwb4m`z
zX!@RNs3RJY{Gdj!eZJ*ucL(*8d6x1Sno~lYytCQK8F-)jeH(jlh7K!KDO|cIOj<Mf
z#rTx`J25UpAeDM|(yjm|`DEMInlCP{J&jJsk98%afvPce%<l@RXQ!qYV7t7?*E=?M
z{ZMPq%<r2KQ>qcE#xQ9zTmG&Hfx6q5JxI_^Nb=%8`<!6#$z-AiwN%(-aLVY*iZEBr
z&@j3az&`KFbj5lJ0L#b3f76^=s2O=q<>r0T<zyWkgZImt{?95Hvs+U=3oPvG!|rH@
z+Gf_C`(18EfCdoxLd%WTOa8K5u)1M6tFUaGh;hiPtU-avw^lf=bm^$hs~%$ti<8cS
zj7s6o!|!H+u9V~%Vl^c8TWs&>A7Xo1+eL7G=>!i$69MjKH0a?4Ga1vR_j_rCE`Xqt
zMh92N+hi}3xCVE9eD60)j81P{>~h6)=`PM*KZsn^3`w8?a6mG~dU&gB$QcM7$_Aft
z4C_9=nM|TDG}!R!_8^v;zT1yf?c<l_FwoZ!5QtMowhF{;-Q-9p#E#z`QGjJz57_z%
zYHBIOQIz`$13l3Kwlj<o4iNdd8y{t1n`n;r%t0i~X;xGYDQH|=`Se9)AHry>ul`e(
z%cb7wqh+g+^2Odg>A`=Px*e8qh5SP;mNPOPbBv<z%EgB&-kP>Cyo3`=ceL6Lr`@-c
z8%JAU!k6+E<KS57$ak2PDov(zjG+73gJ-M+wOlRB9U9K^C|R_rJLNq~4)>z)e-c_E
ze+PKa#xD8BlGQ{e@6^{;zDjI@HIo>7-9rO`e-SOrb&Cp4a)_GUfUBX06z+?DAoVGX
zKt+bc94<N{HlI{@J?tS$x)*)^!_cwXaDIPTm4C;K_uU<X$DVzgB}%F$-^blCwwZr>
zxddmP=jd|y3zUMsV0!jhYi5D>v4QdQwf6br-s$Ta^C{ob=q&zCO9I#j|8;jT^Ef`i
zUad)ImFf6SJ@adKY9H8}hmIvi!;_W2KnzxjaOu7$sT~a9M3y-YVAP8!P+R68%2D)c
zd5D+Ao066CQ5RMv4gG)8{#Jer=>0^)^}!AU$6pC)(Bi_NUb2ZCCBAlWn?8?EQbsxE
zA>!`UhPaG>=T%L-sNj75^3x{Ut8{YC7ARoox3dz~=72lZ0LUpBYz%jM^q`Xp0HH4G
z6GKSM0K6t8^Ypb}AP50aphPu-31UFHR0|G7p4X-aQ{YJOf_4`ONjy=Nz<wwBO2Y`?
z3=#`!){}5#i^4-JZZ3xRQgx(UAx1&jSJ;+<Hqz09y0q{IvHkO9i*pNM?0FTt^6A=s
zWqs&%7`7HBot??DQ);ohMO5+jjEo}a{l>w<euLS*(m0Py@1}uMYiEI~p?b1&05A=Z
zVEyMxEpzlGfew%VsS2iBp)kvy;3L&<Gso8UdkjJMhxkgriF#EWK5kicP^4SFpIYdv
zmNbWSLtzR|0JD$&pi^-@o<7MQ5eUOc(U#qPsDau&hEyzoN~f+}8o2)pRCjblajn$?
z)epR2&-B0i1NKk#+uNnH1}t=#Noqn-7eV<D)M^O>53b9#<T=~8<s&_%Y;w#^uiOs`
zT2z8S-xa3=31@rkHP2^_rfv2)B>sqcPE^Rq;RA0fLPFGw>3z=cfKL$1vM{^C;XE)<
zTtqd9!zb-J?atj;_1asy(xHls0`ivM@XS3&binKtWbg}wInlo~B~Swm$LabDbQ-vz
zX{3yB6W=Eu<X-r!rL}Qj&B>bWmYw&)pr;_Hyv*Z0&#S9Vu~*A0>|cC}5}|EW=(iX<
zeRl6%4!Sodx#9X)b9NAa?S4GfMk4f1^Q-Ig&5Lk`=ktNoPXr;-j_WHMmSW!3Jv{$f
zYVk>_oO8u^0Z4r+9X?}rGQKe3tLR>rhZz)SqWesge#P^ETfC!wM-;O(04+O!QwOB@
zgNs1(T!&r1F?d|06Aa#v^KT4Zau#K;PM*%5g3|BYIr&AW>uifkKT#N4!i5pM7c1$(
z0Dv3#j%V@yxfVjg{4RZX?H6d6R69s=e}{>9<_N|EX&IgJ0SEW6WJH*XrA?`Zq}w^_
z^Hk=B@5$O{?(bhMZ#o7*{Ruq;`n8!yEzjM7#&`Z#gVO$|L3^j^{4T*FFXJfVr`{3c
zKUo_zv~03$sx+A-u`HRzg1*;i#Bt{eAyxu?<QRn<(<Y6>^04Sg*i16eBvKGYq}=DS
zufvoz=%S8C2zFf;<IDY9n>X*HgBDHHda`}(Djo5cqv|3$=~4@03zS+u5g^A)Ii|kC
zz>JD&BLaYfe}I2i$w85g0Rswj&H?SK-rgfY;SL{6)GVzBKcJy1K$93D&j7S7kALnP
zfBKW%GWgXrQ=uQ{DwOSuFYmn}G3jdUu_ozK(S0Aiq*)$%t*g49$mRUMiD^K@CYvnr
zC79ONNb$K3;G0}Dk)&~xLBB>DFS;+xNqwJ!6Bg1gM7(BONT#X#kl(V3*^je5fRv`X
zHUof*VT)6+WzHhh%+N0oJ_%F9jjER+Q6sFHrs)bcM5N=<Bho8w$8}?I*Q^sYYrm2C
znS<ZiGeh6)6<0~H#L7ZC_~xHDdHcmLS}?=r5hu7ZOS4we+cMk!@CBjQG8w{S8u_N!
zY#{4uf}}AU{X8PUZu1NDI>SlLVs?-8U>fh<Q&)S48-ZbMe}M>EzD@z2R|G&GYKT65
z&#Fe|sG1??3F2Q!z2(?YyCWMzURAc}@`|KYBZX3Q<i!gj>&-WpYP3(QAE7L@c$3El
ztBVj(1i7^tD*}Ek6)%k<@kjls1RrvwS1hgff&g|9XU$|hR2$;3hbn8i>m9y9FLVaf
z{`zS}Fq4ZC<?FKnmDQFn>w0cdjeuOh$i>8xoS%OEIj*ygoTz$PC=K4T8A7*S_`EFL
zzHqI?<{<PhHY)0?ok@g{8+fd7dh&#(;*)AA+(}$ZhX!cm17LHsH8V|&Fn}CA9?I`~
z<HjlPCNQ10G?G)u^@F+W#j5tmw`~Abw=`q%cdr7t6M~ixFq|F3m{4?kVA`J@S*vUj
zEhTv-hQZ}8!>{K{tuVh6NxwtG4B@d~QV^3+6~s>*z?jP*<c2Fr$gegdPR0{m&IQes
z^;Us`^H+_(ZoTExO`rX27!%&V?=~E+n5AjH6j4l|Uz#!G1`ywC;gW%{GK*Ts<Ux@t
zYE-kXwnThG@))|BLTPrVd*?n*s@y==)VwtL<FWkHn{~4R+(&_G-pBE3ojzA(g;U;v
zXgt?lz;_OJ_5t=mEFcmod5oyMi<*&qpqImVy3g&Cw(d~s^IYm)GI(~eW0qXi8}9%W
zhWr0JD%|8vC>!`8OAR&Z=K_rMRyy<xz(h_Nve*aUy)j`kXksKVg{%Y_=5zKOZrmiZ
zuOhwINil(3yB~QjZg(wcckO88Eop#@dl)H>m1*`)2~r%&PN%sLT=`XNVBDm%FRQaJ
z9FlZ{&47xEQgW;YHY5$FK){soIaC28Zc}J}&KesuV`k;~-a+;EA$O*)Q3sY1*FId1
zU!WI0739@2S-}lS2WkQ+D0op3pz({-rD#HORI|k~!=59NH=&wMahyT@`(^Sa3qab!
zuVVMmcdZ*`<9H|o39a21T%A5r69xI6CBXbi&tentD6;=vfAO!91HV-T>;Tv&;EiHR
zW|)Z)7g5VW7EgZ@=!*#qyd>3P|I9h^rh&M)03dh0RBH=rFi-U-Jy?a&=5Pecxi|;C
zzYq$83<BVMNYv^T0!rs5$Dc_rO>RZ<t(pLs#Go-n2GvB;4k4u{*7?;`89bKX>D_3r
zljT(6{L%~(W$=ULX5fe_Ab=tBXO>DWWDvmBX90B3{MjD%S`11+`4{M3#|+HE{0jLO
zPC*+gKX1-&H>h}XdCPBPoz#Q!KOZM~6X<XQ*kn@UZ{}P6S{ae7op6ohL!?E9q(#@V
zK`$bY;rn~NRKkPHW6|U(vI>6EdsC=NFz)~;^1X|;8u$K+c?#Q|t3xRcJTjseFfh&<
zs#O$&yJqm`lD<ynF`0dpmUL)_m!q?7c(-SjliwyuL{^xxrD>Y|QRF_afC({aaj?qS
z=91wGQxXLmAOu>tEB9e*j8jCP>dZd3Z;DZv$-0b}!8OigZ5e~!_rE~yl^1$<d8yD@
z!c6v~G@cL3UAtvj=gQoS#ZBH$d3?QTU?UuEe?xd>!hL?;&)*=KaN<yQP8^Eo9}eY^
zwB~0KmE~32Y>95QIK6$xs@W67MJ1p-<Rm*6*E*-I8#%)8p!ky_w>~^fbnkmH-Bj~z
ze!iWkSF~@WNn1yQ1@F9*?72oE_4(6n^XG<l$H2V2;inY8YP88xVKY)U(Lfqu_|syT
zNnD5Ux>subbclCi;2w<8)^nImCImBF#FGL6X7^`FEd#cUHp*|)Y559c@!`Hr^Y4>(
z$FxzElRe7Q#g32EV3Uq*07~y{vpW?lG~@&F9`Gi|Gb8`;CWGKZqHqW4$`i!#%&<!i
ziT=Akm^?0hHH}X7q@7k3Aa74e4g>|tTk^_MoJXBIoN!qzOfS7(-pY>CtUvI(-Y;cm
zXOtFD;65p%7gNL<c<6DU^JDeu*@^l)QMX1nG?)X$#^<EB!nUk}O$^;`NJU7ThbNPJ
zZEL!%U-u`+4AOW6e}^iywZY(|GS1Uf<O2|*?M^}e7mvr@baf96-VuPvL(I}ig%f|9
zegM@2T<MkAys6-fwG%BN3ANWL&}qNS2mLm@$)jJOFvz7?8`E9JjktOscqLo6w9_+d
zY89q%(FYU7g%HrSc_eo|=yDe!q2%JCVn^eG$qn*AM!xs4UK95M=~z6xR!X^4RZo*~
z-k5(M6pcEkReQxOgtZ&kAq*E6Rk01dPl(^LY*2V~44VSh1tzO5qt^Rl%4^rz=79`+
zx_2UYA1?MFVx8(p^2goFenZiV0#C-b=a1XXGK;Qn0G+Bgvg&<Q?>XfJQ${{-%V}sz
z714U>xbV;q8ZR90E<CmVH*^dRRBC_KH0I0rTyc=sWl79GTB(8q*FJ|UR{vA?NfI)s
zc>nZAe55AE;)SDvO}2IUew;G3;o6u(A+L5PS>)KN?E6z`gywqz?`cvp3Z49f@H!X>
zJ>uQWG5BSnl%RrA&f1(gTKRPTg>vgB+ccLXxb&}{A#6!O#9XdFQYyANsV{0T&t)oN
zz|rqLZeW7sR^2W?p}1QR9eQd5^w8yj;E#>=7MM;7z6*JHtc+?$5tCr+4sj>q!8I?&
zI(_opCzLjNa$rR7Kjy4Be_|3;pB0CU2)-Ya{Hz60cckzB-iz&Se`q2!_xP!gui)U&
znszZO*JprtaiSgbw~DO)&fjs6vHK)d@xbYQSvauLrhQ!C30o>NAZEelALB)VB^EG&
z9X%WtYIP+6ib1J<y<E^iC@Yc&2YywWE4nmT*w125`pGihvIFP1q;Ye;C0MI#J-aFd
zlh|ko1re^7;S*;!VD(=R`HknPH*B|Iv?j(MoDuXQE~?)*KD1xv1dHgwS&B7@dB}TH
zw0QD4QB#{r^Di7sp(TcPw|x^3=C?KU0^BA%BxgNo-untKp{VU3Ul|Mc<MWi~Q0RKA
zOQtK8Vr2yuJvKH%#0ldcKWQ?-@!M4FF0yIh@<*0@*2f-VecLnct#Iwra?V{|BOAb5
z@z3E5NX?4Ca8h3=R-egz)aDRUOdbH>y*S)S5bqt-m<dU%6;n(M^+uCoI&MO4p};dR
zK!TA3sY6L>{Er}Zzx9N!kjCWb-eID`ASJ=^1uMfz<%#Z?(Ts{OH3828pB~F2i^9)+
z-oI2+^!Ey|gJi1^Y=;FU7KZ6VfPvV3zBox*jUdrek2>Omk^}qJXB_}I+#r|k%{?k0
z&fX|O0QT}9m1kfC{^zy$A>d{hCBP7lJ_*I2O$Kf@PgenX`4baJG~&Fo%mHL8zd)CU
zfIHdJNFAut?8Ob@ry$1(1;D-Dz6@l51OCes{;qhPn_Lz^Y@h^dL3$1L2X1-?puFZD
zWbB*ZuU199FsGo(A%y8$5NxZVp`ab|V^IQ&L+zQWGpJ4D-xsF-rIg?GME&JWb^cn4
z-YUaAs<-0nXzU{|2=GjK=h+i0-9bI=38EdDg#>_!9lwi~3-x^H6+)00IWeC3iScYt
zjCb<LvXhE4pHw{Qq&|8pyE-1cov_)28frkV=Ms>Ey?FA7m%r@>f8j^LS%Lj+wPN_k
zu+Iujq>j`R+rj?Bb^?DX#UQ6o<tG>5nEzbX15~3Z@}$vJP8#hWhCh7Ua~1M+y8h(~
zHmoj)wD6B+pRSJrYEJPVH3yz~zXtPFq0SFzIvFy6jT2HAqToDx(#}WwYhzU?0O5Yo
za~9Gp>e^V@@-EW|!N1tB$^_`(6?_aG&_Nqfy`zXuk5Im_UGQb6+wi*Z>XBr-V$Xfe
zc*>X^QrNKwUQM}dcz%R*fp86{Ymy$PrA1XgJZvDt=O9X5u#hp~r$WzqL4$AOjz>JU
zWQHAKb!=ybht2emHzYzV6T}%1&l=haGHHaqDRTrZ&a`wEASRq+wtliwaYV*bE<}`P
z*okIMTrf4!u6Yo1H=r+(Z$k*d%ZK%~*5LlC&<!4qnkk&P)&Xfik!dl6az266j-Ibu
zih0$Ob2rst@2^JIU#!K}wBGA;Bf?aMq{UD1C-B+v&VI`%j~q3LEhdYYb#J>d%?^T-
z^B3A(M09sFvsw3VxB=dP?sKI4P)&-7-7?))(gUF=c0!g~Lj6@G`*}<IovUA$U12Aq
z5Ew`?|2zu+)R7FdtZ?)Vb1lqJwxmYGfvt>*K2y_`xG?z_=Q2w@eH3E7V1o8PDPGp`
zSIWz;`Q928C`*xl3)gt`u+3N1MWaPAchOQ|6!o+=h<X6%wp0!Z(ocVZTJqrQ->{D=
z0XzZ^z&`lhWesw4?yT!809vDVGqpr|aiHmEe_xvmFyoBz;=e$*!|G6YEh7}})9%rg
z@8rL`G7ajaCj;GGVkohg=u%9LpT#EDP1C`wVWS^WXAzQ(Q_LxI&D)v%%asq9?1jQS
zcnn)Y)q5;0fNl#2UhTj(Gyr=3*8W3)RNHbQ8YugAJ?e9at+5iI0QtX*KIY;*zJqFO
z#+r~?0a*ENeSu!&5@G)n02a+5gj-P7uDhh390Jg6xQXhq1vTLcQ@-WmE$)?JCx;>u
zz6nv5rRt((m{vzAw!$S6X1K`KLJxZ|;kBH8jCk?;IQ=Y)BgmEw`Zn)++Gm8vlXv!>
zS7I{s-wV3wnpj@=&eH89JmDujWo?XVsJ$?OPt+fD)1qT{lvvzRPf~a26&5DDsH*HJ
za?1ZqxARnuQe0ytWz@c31`bkZ!6FKUbh$;2qzK_Or$<#1@3UJNXJ%$T+P89vHVL{3
z@hS*>$VXe-*%$(AjfOhBe?k_s>LPKb_y#enc7DQ72ncrN<fB$N%2q^FF`3~@yy%g&
z6mYq3mlyiaQ&t){9TtLxboT510{xzs{zsSsa)R1UG2rU$;D@5qB{G1@Ft9b1{I3r;
z02IdE{(E$&BtZp34L-NbmQeQTuKjM8+?dJtym-wwp;BV&j7rbybe%E;7^Z3PNsY}Z
zZ2j&;3CGeE@2od+FH4)*6mc3X?vGjhwCaj!47lRk{dz67mHhZ3TN~xx!r))qQ}(tu
zqip6<{ZS>w4XLkR<CqPGtKPa@krt<!D1eN{*&J9{`71lJweV^18d;vYH~Kgt+BKSN
z4_N0zxvkBm2CFo3R+uB)J!i)O(rfjd>w&ko9Tdvqte=5yjIFwS<Jc<-tzk*V2)sg4
z`{E*78%DkWf@PsV78w6{KtF*pasBk_*wB-T3!jBL^H>kndHLwiFfJbfNi)aM;yG|5
zk$<TLNZCax2w;Bhoe?iYs`kRYtXv&!M}(#5Z@;=#5fKv;pf?#`56HDEo|cT;3cyFE
z-VZQcdf~^j3{|MaQI>}xM&hj5!dB#hm(5{93l}(}9dy3GW?nRn@;>}N3lAX)BJbnj
zvr%D!xW~tLaoV4;t2xS*kz}(!*HfM+tA_m8K@Bb-Cyx&zX}z9>;q02&3|dlO{Q}YM
z+5uqTaeO|;eNZL^T2VSpnQkX+pWv}x*Sh~LTrB<l#=ReP^uY2aZKEMpyN|eT1qrEz
zKVK?yC4q4JVL0XbN`ufUd6$tS{yf&CG0&GD#R5XZGeAFFa=3?^3%}d!J2%B@a0g3A
zJpIsdjhpP-&!tpdm$RVAOfU~IQo*VasZ4wag&?Au<X%*km!KY+ukQP1%|8;(?Vh5d
z-ji)l13&xcqt7f79i|f<CtDq}sJC;e>&QWLjRSkI4-EInDg-A+>o_)6RnN4p+Qkjj
zFyZYQLrmpP$J@>NDjUPawhwNH3VRtfptn?+PiqyZKQ3^gVjR{XTt>av<EmXE-%g{u
z#3|Y02gaIWf3E5vZ;d>DQw1*mk!06(nyPKv#N^G+X$a+G*1E~4Do7FG!9dl;S)#BG
zE}Yq%T_(LLTy*|By8J2UlZpVg8v`7{E|Njr>O}*^=GG7~6TS;Q9UB9Oy{AM1O&Nsw
zG3Q>5YAdl)xb|+V=>*hsgfay6E~t_7QsIg$O!0QJg{Y=6tJmVs8QmwPEzjE~B+{F7
zEwn7EM2tx&w4s?gweN6lw5Vr&?XA+Wn$y@EZ^BLE9UM?!ilmx%Hh#VwyHufeA1(WA
z=K|ib>UuAg&flr==8w9;xNIY=3cE0==cW+mv|<p&Ez$t(LRP1;57Ub#k`1>2Plm1x
z4HL(RKOj_(4yoZrS`y#7qiEMW<TewKHWI{q?MjW0s>gzd)M8F`OWZTz0Z})>XyEh*
z+r1nh2*X&1^utHM8pH$rWN;s{DkrC`y|tP*s>IG3#Pjk?KX)xH@AZuUdF~oC*GR@;
zV0Gdz{aEp}J~$I|ocs2aja8F3?a>(OnR&gJ!qv8fn9Ms5t;gK1)JNXp2xI{w#Poi>
z=7M<nEI^@xV`%SeSrp^)fzuRTq%*%}mG<7@^{XHEVjg{`S8a`ZSr44P^k{(Z@uzF6
zlt|$@$55_oA(%3BLzhWYDn0)fY~st^!##Vo+0BUkYPAQ0OZt-{{RO&SSN+5FZbft&
zN<O+NT?P-IX354&rrCV8<>JQHzRJQU=iq_Uo*=K>;GfNH+00sx@|MbDZk76f)Bl-n
zlui-O#6Y&^*jyZyavm?r{~G+-QCu~>P~(!#*t2&zJ-(i<pUHU)XZ7VDNW^x6SUKiH
zm;Jbqw|jwe3S3sFxT@^dSRBQk^TnjaUMi^<RuOts2NTYqnR{>0=64w{_zCa0Uxo)~
zPa^ej!)Vz4vsx$g*qi-lh$hriM?sHqn*2b<o!EK<J37}cq&O}ylm`nBs9{5B8pkzw
z`pMygFyDO&J*F6AG2Pw$m`$%?&`f~v#<_wY9<D9Uz`;9G2F<YZVk7~iB+e?Vh8f|C
z;i|?k4=aEH;0x`lac=OO?!X?kY_!yAJr$!%ZzYPmb8OnEh9j0XtT1pUf)ie)2#?a)
zu3=M533J4lT-kqPH|fA0XU*Eywr$RioMVv$dG}IHyfJv~*I_O-Oya|XO>pJ;d0haj
zGo*>_Wp@yb{}pt<t|jHiBH6=XUNJad$A*O>B4f_#HR&?)G5+1dquOR)>RZ^N9AZdj
zBX+#8*8W8ghtQ6xndW@Q;x((gO7Z8C5fg`L;HpoN-wF-!o@&)Y((-L?#Opu!E53cN
zaEfVU3+!BP^#@VJ+F>m4#t@d8OKQhA5U2-3d#S-GmvB!i7gAqyx($|B#>(HI&Rn5b
zy$u&qsi4o~c^fV9v83j*Oz3kG7ikzqUFP@-%8ff6H(PtAv2aB$Aq|VNX2~h7^xAm(
zMx|W+U143@z40ecX>y{;4z$DaP1cg!Q!=dNCXCE)qR*>`2;sg4W^xfYmZnODXTOyK
z;Z3pPV@0G?FH`^#ce`(I>+CA?QB8L3GoJ&)*nYzV0qgQ~Wq|~Cs)MJM=q@vbR8HyI
z(OOe{&+Hc4+QHP7u-n)@beMv;dmh8F*1RZ7Sps?41Bc_sEudDDXgRW*5c57$nR-Qm
zrZGrEE%hj(M=#k3_jm=T@d;2f6Lp(&OS7*VV~IDHyHd3H-?Baw)cz8`rm5G~sB7bJ
zUm_2f$|v=wZeK$(0n5NYkU38rCbt1y3fx9Fj4L$)<W$Q}ph=}Cv8|@CkcOgeSJhgR
zn!PTa`eLRe0jFlPJg(!q%tzMCg)csX3(mk<U{<@ZLCq2bd2GR0sqpDy=N!8(S0BYZ
z^3LPSai7`>en9no8|h>xL#r{0KJ`c98wQxI3rN-4(lRc3>}15jOG;vYrk^Htkf*{K
zby`6;l^cg4KjheO8?JTU(5PeP38X5HF4>N%8Hf;D6u&C1)`=tWTR0}+o&ZFo{X_pV
z`+n=!dnw)nr_rRThn*9-<HP`dgPmB)-K}FG*mcAHH6HGL8Bo;Vks)Hq9-h7mM|#Iq
zjC>&VPj5TG7*Xr@hiZ2tjX_5j|A>`~V@c$PNq7E7Q5x8Z`1+p(hkcb!+&!>eEz2P!
zY>g(9S^<G3Kzx0btUnYu4CcQ}VUV-8q%fD`T7%rDX{WSAn39^q$B_L>QCInLbwaHo
z>gd%56!V*^#E&73AII^UszW4hd}7lS754q`K@$AsR{zgi0%1g**axpPBw8j49FA3)
zdkuQ0GRh!JyRF+8e+4^ZVmwk9BMND)d`g_kR0}JcADLPlc4j!T-*s+^Q7jE0yU+TX
zgX4YdOsoVOoXW4q^(TJqE-o=b!9Jg$ierj%6wQtt%c?FbEDP*RyW37%$!){w6uuOD
z14I+FeuFD@NtWdHq?fAO?wDl-w<0_X!NV0)fbDLId=DB-AvaF#-dW>>K30ogO!i$O
z>i9`UzAH*KBqk69i}7k0{h+)F^Ddh?`=@&=OY*vAa`VkJ&Q-LfjMrFn-o!Q*X`k)+
z8U;Ww7~ED`e}VELKl~-#6tB-|_7*&Mbf=CmUmY8Ll&t&x%-0D<vuvYN5$VaSa|`V=
zE_NS)V@fI;XNnue!FY(~_iuwDQkt%goXw;6d7p+M#}S*(X4&ep92`r~#VGirfWMC(
zafMU>VLZ8qkskxeEqQOO5OM+W^J-t$qbMvS9+`jq5aH6%1Yv<$DN4-phXUaU=B}mo
zr>>VXo02UP>6`S1uo;W`Q~_PSdVjYd$R%G^v}}Zm_Y&kYf8xc>O6Uy*)hrm4opTk}
zA0j^uypFx?H)ZLSvZ`e|tl*q$4Hf9$eO)D4WHD+{U`PC(-N-N;eaA2`w)pK+r8@b}
zryvK@hjU-Yz&c5$;5(qD-Bb>1<L<KD<#M^jRG$KyO{N`qc~zvw#%sNYTvt7Lgz7&x
z)@BQ#81SB2xWwQ+Eh+=F$)tc@S?rJPF>QToK2<ec#lg>WNnd1wOT#|cP8%lQ`I=~e
zRt@D;+UT9Q9dQu<gx4i#g1pP@-L2M_EQ(aD9CT#+Y2Xi17C(tRTx|VFWgMg#DvE^#
z7tK`n5rp%?R07yeLA%BHBQJ>lXbpH7({^~DJHX7b$#E17fC3qy;s|OCQ=#a#E$O0_
z^+WVirUNgYnVotHbleq*Y%oo$hbm~9#6-9kg2_%Tv>n#3Q41+_m)#}8bZqL`LhBl2
ztyIRY20Dw_y6#ZW+<2t#CxX0F2Argk-|Q#jW0|j%wt{908z29C|H?~hMyfOg&)qaS
zH4J>6L=wfnbet<bhi}x!QiZ>a+P7~>&u(^fb@Z_<;=7$P<@UrwRn~yzQLK#RsV}?J
zqvDPGP{v*<8>AYBR(nYkXI~p3wr4jwm#}ni@Nt~q)5BArE|%Iy7?9m8euCZnoJ0!A
zTD_q9Nb}gRNMGP#lW?xQ>ojO5a$X<hU#0vBk=(%MkYBXS%*8P1pqFHSX%izmHEqWT
zs^rWOO2;pQ6?#v@U)3<!ka+R=Gh1AyQ>My!h{q%MO3KS_WuK+QH1LVXPsYp_UA60;
zCq^sWm<F#S<)0SW$vNa?;X}8Y#^A4>Pw`a)l*c3^cB>zH8JC{jiY#D-^JXi~Ixty>
zHD5urQfBZ*k!nYVUv2Wns_MB}sS4?7-${|UA&?Rh)AthO1#g7;Th>Fb4=OlgAWXI?
zFkIJOs7;XJ(Br^K(Sl*P<oZ5Vi7nH%oBUcmU#=`C-dARy5i6F6^~8W1^X)bDqBL<H
z#l+&v%dxQ1pKlPxXJ&jW9)z{s{I<keoXO^9#B|-c3K!@FNF|~f;!H&}t0y0|wJa+5
zJbCZAag)W53hR)HJ+7AnzubM}*1zk+;<o1YkZc<hVQCuE;*s0VaWU0+dzyjNc@T$~
zYvK~Xsx>*DN6OaV)ZVzaNmbZ8*l(HN*cVw&ya0=V)(7I=N2f)rQ+S|g{6tL%cSoSy
zHoZ36Mv${>#p&NWt~e?toO!mxOunGYPhNy5oX64S0a4n8?$9Z&3N3m?S#bjk$(-n&
z(xSrW7q>R`ZZ1T1>;&B8a-W?Z4QVu|AA~Y!<yfrdLmKOIeFGdED7p|M(JWbBqrr=h
z&5{>{cevi>AC0W<q#;7g@fmrOQx;e8A?YS~lrB0V{`OTh2Ao3}D0<%X8{ZNIx%tPE
z40T)}O|laA%!i5_mt9kJ@N6x(rSJkM7Xo62Q_bbK7gzb5s#|ID1T3t=V9!gTtGdXe
zecDUGGYu?-_d1qVnwfN*C6(nOzU$+}vSzyYcT~StR!2_3uJ@yaZQFQN+9MvR(8xZI
z9lcR3iyxmcaU5gc2MDkq!8B`FV?6z;7TkQqL1aEiyE5oGGt<6}@D-B}BiBs6fo74f
zB_z#DYq_LGtK6|Wje<E7b1lZ4!{2Dv`-Q^PXVqEtRoV}uQp1-5KWKR;X)q)kUn(eU
z8*`D2OG!1iKJ{e&07CTkH7HM!eK#yif2$V3Lq#>6vUmD>fgv8HUO8AMf9xx%T^8Qq
z?BylVNTYsQQyzV;CUZB9de<PFKt{~2Qan~!HNOtPS&dmU`;m%1wC{~yjOrlyUOw5;
zm5>oZ5(->6kc~m+<s&hYaw5x5%<njwM&-6){6+gr;ejMU8>kG<B7Bz?DxbSa5cFkA
z^}XESA0-l<xwi13DW+Vno!d30TexVasw&YwZ!qx&FJ})-`wiLS&|(gZUc^wns##so
z$OvIEMl2%{`XFCWCpLF<W_Km-CX@B2Y1>y5Qc&B{v?r|<4&=+SL_xOA%TL2QDPn2c
zSs!v7J>>8!C9@5kQP99CDhS{t8&X&X3#Qm1^p~5iv*jhuA3xVz^o<`Zro_<8aS&RV
zIa_=x>N&MhkR6g1NJ9nKh6CSJul~I1dq~`B8$$hE8VTf@at&`k3ws5BDiEq5nTd#4
zH%b{Q(`4Da%NMPowdx#8c4IAISup9uBcH_wH7AL`c2_*rsx#u|Hu}id`HU5smFf!1
z`4{J*j{=3WclR^MSz5W?fUHC$pboC)j>np)_6mX?u>OSW11I9i+Vu-}0T74(!?1-v
zpZ8gl_FpAw_CPBt<XQhJi1r`CX#XppC+LX3Mq7_8PXc)F0QF7yp*X69iWn(z62QAh
z^;5Y_lzT(-CZ)3ldzmH?Jkt{;Eh_A}{~;v^<!raG<e8MPEnzwB#5gv{{JGiPPUK=s
zY`xIi`ABWOH{s{ioK!%_`xrP`Bc}GJ+KhPcu02Yr|4CIy<CZ<(*geCvaU9u$>SJy#
zu(yYuM&<q*PdMDAmy+gIYH0D6$Le@~RIm$@5BGG$KXcfs*R4=rzUv%jNY~nL=y|Tc
zvM_YPO(^r_{dQfuRakIpN%ecknPSpK#EwP@4*bIH!yWZqRx!S*?Uni$37OH#Y9eHv
zWC2v?Gst(?R*<mctAuOV;$VVCx($gbSRO(@?i*zw!wh`MeD=Q?u@pb1e~<O^_s<f*
z?pD#SeG659e4a{TrY*3+T_0%N_pG6Pd3i@_&7RIs=gIMTAy~h@R=d;24kYmzFQzJJ
z7XG3X3_*bRg?o{DWu@|?NUi-a*~?cFv+l<7(Lao6>-=;lg`HiU8&i8r+TsO?VYtlk
z?5d}?0)VyEOFm{=;ZjLxlX@(p=guAf<lMP(rB&BZ91MuSec1}BQp3~+Ch3KQly2em
zr)Yd174Np^`1TRA$>-ljdOBPr90yRUQ(OQT{;82Eoc$!0G1A}^1@`WRp9oGruCX;{
zW9byX_N>)d#ipm0;hdknKDTnb1Ra(KUNdWr@MFOX&osBL<1jtJd9>(I>0*YJO8b^C
zHqf=G3pNKj^5*Ss^NTajiTjJ~`g-Hxp?guUYfI?Dtd6A#s(4>)&+X5B#RQc{Qa+pf
zE)HTwAh5GDPt0`Wy0)BwCbo9p0`G{eHTsG>9W~1?54q6j&|5fsm~EoF_FCm@TQfK9
z4*7!6CKVMB-M)Z#t%vg9yJq2`H4-BPvY8E(k(s9fI<>`|2ZtPHiwc2uNrj!eoeyB|
zo|rg&s^bf*IBz0b`~YhZ@)3{2HO?Lc*N9Kp)X07Qj5o0?7#%JBICpo*{j3<9@Q0gX
zr+zduDL6pHwr{fMFGsQY{eYb%@gTC7ngEQky`#H)i$S&ZQpy!aqnn_KX7)2n+#ndD
zuorL&P4EEdDSV-2rU@dsvHU2|hO)L~A=pSXyM!j)=y3v*nu^e;GrDAB-?~qO6ioJ+
zu~QK<MEKnB>)Ex$ioOB`AI0j=u^#Wzsh?WY$iGc!vVU907k!Z;RydNF8Ccx16*d1M
z4A_A%JxAa51K_2sB#Y?cJSYo+8|ztL4ZAl`#hf;_%*2eQc6D{MHsrm-tA6k>ZinG^
zPhPvi08$y7*pMPPT-J5}K_BLO?T8c{ujaUD6?6B<S>l?%zZ03n4;R^aR{&6Km5XG?
zq8qkCU|+W)39xKpNUL#TsDiY89^#dj;P*k^s&LUC+6!No=RSn1n|;b44?VKn@934<
zy5RQ%!o)pTz&-dls8R0a`qc^5Ga4VuW7SQ;TfDgs)qpkea2__NVQe^PwvcH}J=9mG
z(NjOMQm;gVf840hdEbLdR^GX<WN?^-&Rii~>f56g&?}DwaxgUk8%2c<=UEnC)~iJ{
zIw{ixt<r*!k^*%-6o@!X;yQX(wp88%uM!m`?XwoaHA!=QM7z)E`{Ab-r}bkx;$<ll
zqg_oCn}zeI18S&<`AFUwDo4PRXb%%)Y0Ih$+tERqLHB~EHwrJ#9G*LFlQ%e9>%{S_
zW*iG&KgJ%<cBXJ8HcoLw-E{;vx;tjoj`2HwRdM<;@aZse0wOB+3*<%?U;@K_LSDXn
z5H6P8Tj;=#3vA`86npTaGO!gK9d_wj3x%ZvsGifBd>%$c5FPPjB)H-Ad^7L@bug;*
z@86Q)m1TWMBhk*9OQlyOrQm1F!r>wajoaa!bwRP7G``gZ)|H;GOTR}*8hkDravOiM
zy;M^3@>!_f%xqshh0g_ZPwe#mL%anxr%AI2Z`t5vXm{b(@cx18#Dtk*fvA69{gV9Q
zm76U;_rsn0tMA+ua=MigqyV-B4oaJmRDOi3-B0Q4$Qc{AtMu_o1AV^IxAC>{a~(um
zcrld>51pQQ51~^q!1Pesm<#l4f3vxq+JXYDy>zG5SbE#aJf0qZyQEdn-H6dFk%KIh
zc%~OgosM^GGUpj~>rxci6F+aN=ALJI`tb;Trslb8Q~|I0R4JX<AnRdRU{NWA#?d9t
z<jdRN8eZBc#~toeXO-1GZZO<1R;F<{E5ds3yas@;xJHa4vEwxxQKc3e7xPw9AOp{(
zg#o$Dr?f*=SBg34M@`@&wl~`h-m;!qHc2UiZ1Bv8(;!ueU!bR#p~8{!)%?B}o9H=(
ztS)h?^p43j3Z0_Jz53+m=>3K5GZY53Hnpb-?3o1kAc<#m`RW*=<3X8;{D!VTjPc^-
zpA2%rKby|<I`Dch&|s>1<`UPZ9dRXdiH(zX=KUMes58sP0FT3qBTj>}!BpEmiTQPO
zh^mF)E4PPryMQJH876FtngB`Oe~VSjOj?L)M8IYtkpmjoh_*n?fKJWj%U?^aD0z%L
znz}Q{^bL4f8JUBT5Lax^eifcB!Gf9~jkS0GQfM?`Gj<xi|Gn_el%nFiMOlGUE4$F0
z^TA^4hQ8~Vno0nbZY3UEiE8xKoF&%2km{QiRT;C}kkhmA8{*-4@m0O^i4EBU`JV}t
zs^C+(c&q&fc%vDj<DFr*p0iDr!-+RiIvxPQ%x~2Z3D+YGdJ<Hl(+u3VI~W?>(!j4a
z2m(2T%h>ATCJTDJni~G~EW5>dT76iAImO7Wwe}kPN9wm(rk&(B0x+si$6F@CBWnSL
z>)~OfFD|4x7BATd<3!4hn22o_?v<6`m|rS{sUOrkJh{(y_~>r_mzbYly0v`oz@ETg
z2xDe3JTqY`gEeFB;=<jj>nPhcNBQ;dgaG~-c8|_^@}$Jp)UW7<p4U*`Djd}-0x#uM
z9`Z}8kCegvvBLdc2I?1~DjF5%($v}gsqTQb#m6o-S2~8mo<d%xNMh&F1CWiabF1DQ
zm2q2DHf);|7E<#G(apTU^Gl)@W&+tTHeAoE)N{oOo;d2+$Rv@C0wmcidwNkjjr{$d
z>HvlBXL!TiA}{sd@oIVeip(4c5pu@?5gZFxAxQJTT(w#Pka)%X3VaL}-I&T)x^cay
z$>SilHG2wWZPb;snrQ5Xh0k};?In$=&$C8)-HZ6i)2-snyRQQX&Jqzne}N{IMt*?~
zuluvB!2Y6G3!9B20KgI$6xb;=24KJ>?X=S;9KwHz4(`_eRnqoTQQk2Z;rejtc%d8t
zQFGL?$&aomtHe!J^GT}yP|nCOpN)SLtweF&n!Fc4_n+i|^$iGusbEwC1R&YARpJB(
zs^)IItGb^TwlOMjdZ*`DwR;@d>Dr%Yh?kk&aBUoW7UAlOQC}~ec>A;jkchQ+<@eEa
zWo+oW4qM5W2HEO7P*A}}-NY#l$1Bd0xC6DWG0CGoBa(H#sDT1$Iw%_omLiSsH65(l
znkmmZyv#%Vm~PtcC@RWwEX`yh@ze&kZ2<hb*r#Ym{0XxOV48;JTVm151IS0t7{t>N
z3!P+-V=sad3SiaAjz8N;c5AiIy*Bm{3!qsPsTvK8vzjwW+#mKBtQSeWIQ|Up(6jSR
zaoQbZJ<8`zEd#HdA~Ggs5ZJMsuV**ZZlf!4eC0_A3M&o-_$M>dBG^)VXCkc#D*$D4
zC`C0SVGjsA)7S{=RC-RC?j1m94*O;mWj{Frd48-^F;41v7y16)=`6;?0wg<j|J&?_
zr0TbCI6e<!hr1G^_>BAI-{KUrzJ<;{ONQ(YC1v558x+Z#4vZfh#Nh+aN;Mg7km@n^
zs?8jy&Pa+dpDRg`wNbt}od@g|_5MBM7!pK#cc)oQ5h<+hBZJQ-Yo<`=ExIgs=sZTN
z7ALQb+tPTXQ9sZFg9*I7NNM*^rp9p0*=ZfsNXHE@y{$}cTFPAwsrAnsyVodNZe04<
zN20}h-p1}{d^DLv#F}Vna=D@X1ct<l3?Mr{7Bz$a!7v03e*6j`MYIq;r1M*N)t5c5
z>5`YcFev;TcUbx6T2#4j+JrsexH?<4uM9(Jf%AQ^`|mPLC-AZIqB8uf&j~tt<EHH?
z{S(bjC0bEOwp};b-spiZAbEQg;O<#)FW4*YxVtKm)9YOPgU@y9xPv6>o$_Vp65d=>
zJW5Kuhc`*LHJ3o+6uz$I=b3B>QUb!zGhgLZPapHWvmYl>p0F}O#vecZ{QoviZWRn0
zrGy<C2G~`wR|38&Y_heb3L3dn9|OIv@eO>fbM0+nW|;{E)v7#Q28cHV`Hu)6oj%dc
zDh<)>tSC8?+Ka1+2EtK?3$B+NHBX~fk_#bNV3Q9FB~JqHt)-?At(__yjBr^lb==dv
zbsXC30F3C8MavupvOJZiYmn0f#Ug~y-M%6Rqy}d~avV*$-<83Qq9;8z4v#C0vRjGL
z&+?G}-gvS;1<+#tIOaWwByP3d?E_~10k=3jaJ+M5@&6jF#4bN~{12ne{x;g>|1_G{
zNb}aup4pyOT5VdjCS63jW8r4aC`EY+f34xhywHQgtEGiBvQ#9aOS?z&<O7x0%)k3A
z9nsYqn;xU)vTJ+*hhk3<9i!Uciux=E#@{KbZHdBr^fXK#(czDQMQ(mE4nUqrZUD-J
zNq_u2+#+$7V9}(Y8YnP)Kdlk0foD7?hys+Ot*Bb;CxG6_1|&hC)A*H)lOM$?K))M@
zQKOdS=e7t!|F?kuYa*a8{oh{ncLOc$fGiNfx_K=LIDt6>pp_gRU!$294~OL3=qfoV
z1mJrV<9O5R|7WJ}<@tZl0%oLbHvGRC{^)(+2b~%k9rI&x(A5%c(5ayt*PSip0iPN=
zC!PIg0Q;rITXhXNAKQHN2d^4qxn=pF_<EP&ZJD5p4xC#`j*H|<Hy$|gqR`uw!DQ8(
zRL4J>WqlPgp!;%#nJ+bg#<Ba_MYaSm<uY%Tc8>ubk(#l9<(92x22eiZi%72A6rdaf
zJ3~T%LxQ6u8}M{Z;9-LxHKE*dcv7A`>RsH&sw=`U1)?yQeTB#t;03!Pd=31E8WspS
zFwFb11w76O8f%1O56}g?{Z&yKjKJdx!NxI7V6fr=xfpr|KG2@bUf{vkAjd6rd}R!D
zy&CYq5)qJF9J>tovp{-#p37X4xH`iyz~e=s-=YSA6}D~}2fl!gK6C(H`3KU%_}1;>
zoX4|(ikF$D@q_@!L4&5UuQ&^I@C=~GfDUDdYX&)((p8Qq`c*;3gN|=S@-edSM=+cH
G|4jgz)nCy7

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_2.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4484c57a81d1860a2befcbcd87024ae69c58710d
GIT binary patch
literal 43971
zcmeHw2|QHo`}djb>md6!MDZlq_hpojBwC4(kS$xbp)n&uWC=x!P>2>JTZFN1JsDe+
zov~!g3}G;4-lJ#zJx{;?`*=UU_5B-j#yMxMbIyH#@9VzqYq_s`C?6>!0NXKrLwx`O
zfdCi5KY%hS{6r7od>H_Yje-3D0MG-p028na+}j5pT!L@_RNy`Y`~x6zzn;Gdk^i>O
z3DV?6I0Mw*XaM@!89{&!JjV<kp927PeZd)Uj~?81UONcvAh3hL4gxy}>>%*J5&<)3
zcY6<iZxK@`4<8Q^J$q*#kE<ebvIk`k%E`*g$sLqaR!~(0{u-6Mth}7MtfIQSvWUEb
zx~!_YtQr8&-lqCRD`6MBP5tY>Y(o6EeJT|N07yvqZ9g6In|AY?=EA_X)jT_z%l}Fy
z-Kq2r0y_xoAh3hL4g$X-@Oy1TUR_>ZT}~1B-Z}6BS_sw8It4B0G2sVjz;mGQg#8Xg
z;Me^Wh~&>Spr2(I<QMM=d*-fPzw9$F|NQWaKV84&s{tUCkCc8u7oex5rK6>xr=z1|
zV4!DY=44@JVq)gw*bU_r;)e+f@e2xyh#!;?*>gZlP*76khXb;5ii(Ob2{kQMdCh|g
zit<}GfiN&IFf%dpvas;ViwcU$Z~H~52H5EVYfyuNhyuIVAyn)TN<9DrOGyKIm$n>C
zKffTmsHkaZ>F604nLvVKHn7H2RJ*9DXlSTGw-F=={2riYr{NHlJ4Cztq%GYZFHZUE
z@sH`n4j0sLo$A1gD_pu7!obMQ!^_9Ncb|mh5Bn9BlvPyK)OC;O=^Gq1Ja+ob*>h&*
z=Pm5)9WFaMIlFlK`1<)H0s=#CgoQ`kjEqW1ymL1x`QH5pPckyIo@VDfd;YqxsJNu`
zO<8$uU427iQ*%peXIJ-!p5Bk2`bNia;}erppQmRCi%VaZS5}E@>s$5Os^>4SpDOl;
zda;A`+C@!GMNPL=FUT%`aHC?UrV*8+<v4Vb&em)99{KC^oQLBd7t}C_DV)M{UAo%A
z$Stlox|gt3wQn{1YZVLmf2!H975lATeE>5R1Z+Gib^s2nu1f+>f*F7Ju`B%9GYa5%
zn6bZNu^e+JN@t+jUM63=`s==DR*2M|TGa=OLo;FrPPOv1NI2;;g==v>dKZoN?g{3D
zNBi8Ao=E=jO?*bo#A4^*J9(LNf*12VRi@lcEJ|(1gj7zy-H^_lTMDrDq?Ir|Eka8j
z&308Tv{{Xh6Y@+C;53E&j~~?X7B$j8nn8ILj?f!?Nymv1s!7i^D%IQ8?&jrY+<Sgk
zpb?=ejtX7mgq(3ISL)Vi2z%Q&bA}^@OCp{;t%=JHEcD3wXn~!oapt@_8tibv&WomC
zI)N4y+8Hd}p+mz7oc!GfbJ8JB8Bsp^tb(sOtAl7PFJ@ty0*pBUgT53%)0P5YvME4h
zhHo#45J4uVk@p3&^x7;n!Zy~?#0M%?;05S^&jsnlJ*=V;@a1j_Kv0ATm0#%ZYXHIp
z^{>+{nApaB>3*1pnTm$5e%<;Sa4)9-sj?JcxQhbRca+bP5p@&*Z9)Nd>(I{Tui>DZ
zn{bk`z`yyDu8}(?9)hj(93mH?SEAc@S^eN@c=yl&yNkd=gVFEW><>-&yF;^rbH>`A
z1i2g{578A=r_&eT>h1C|+W6e9(Qj!e|H5bQnG{&-^Ubt+o39+56aa4{K>>u*spiOg
zC_s8@G6e`(gA<owjs%`f>aQ3QPYVTT$^kauH!+_jvM9hvl?hqP;1fxY0vytkp#U_C
zXu=GjjUAI&XP8D4WfCbs+{qO*JG$M(hyv6g(@25AyHC9B)-U$Xwt45OlI$V)8L!h3
zH0di6GM`qq-qY|u^#)plDD=2#4`#L&y~+I{@|mRZ{`tfShwIH^IU2@*k!RE%YG&jt
z1$fLDVQQMxY!*6IE3d^I#R3@1OCmhwl50pYx_*z{uK**8<FZ0cNmJ{L!2J$X2U`=h
zGWwXF-iCdKAbmTI-~v46bfy3dR=uroTqA6g)uMZ`YvV~UR2p9X!)x7g3G-%y{fDa*
zidIw?bQlC_j66I?z6yB7g6w0tn7!t_*`Y(1ls3p2$A87+j!!ymZPdl+_=Gd){phW;
zKmbT^($IZ=-zn2u!OC-Vy(EK)1s#r?y@J6`zdmRGKD~_#c7mLCaScwpY`_Oge*21j
zj75ODwt$+p(KE<7zPh#kZG_;68x<!+V(ZphcmDQhuJ(@{eZDW`#P69VT*FK5s;YFA
zUXAe8F!CK#TuEr)PmC@UrAm92=Z`I;08hMRHaA~W0NfoQU={xLgag>eOKeD2<Fu#9
zkw0QqSKd*8SW{|m>kEgNjt#Tq@k+MP_grcEOnqO;<T!z=DldOU2BOizSAV<kQ)Yac
zzP@KGD&3SeBF&ou9GSG)*Z^<XB)Rkj{W%4^6Z#5<H~|=DIHRxhddPTGNDDuLltf8(
z+EPEfrx<o~@ts|`b#Xh|&e`TGOCeYiCNd}RH%EVS@W1^lE*~(2RV_pVD4(Z3sd<Bg
z-AM4->Can3TU&RLmdZIq=Gzh$O*Pp8G0vm|%Y8GtG9ylqg6U+o=~sP^4o^<t?!&P6
zgZ)fLOA>+|ug4l_EuHtb(5_nlT6%8%w$Z-(nQZr2(cRAhZJT6f!Bv>7YI8b+fH8qe
z>jl~=NA7~2c$x~7_Y|L8=`*Y~IW+owMVFFV?n0;5@zOrVHKS6;gtxD&WwENwe7wul
zUOIe>(<q6PSxs)w^<UL6dkuzPwK&C9HyV|g;D>B#Pw|s;?lX%V4e3*pX>7h@aBxk&
z(@ylJ#x+7{%?c;vlC(^H`01&c*+M^^<MuHFREuc;&}jRm>xSN$#K?x{HP7}RI9HJI
z<*NU&N=uEa-U}(~fx+jCx|W;sMFa9f>-YM_8%>RC?#FqUYOHGZY<@@5oUD;d9a6i)
z_FD2H^v!!f`7?Lnqca@GuK+^C%#91$xfB4YLNtY}ya}=-6Fu-5^PcOaoA}{T9lE~J
zWsi+fOl@3o-OTW69Xbqvxkh}zEQC`4M(tkc3-mAx$j3o>uyYikN`NH!kGR{1suDln
z^9+)HuNV=kbaZitp|}(Oi2ME{N@+x6-<5m!I&*=f$cif1c2R(2`mVqIV9Zzzyf47|
z=}Fs&Dvb&HDzDvU*SVZOXl36!q>?e~&MuU*>@(m@_Gb_Yv%=hD4vn2nu^;#2YvCj)
zKkfOX+@U8L)3R22H)vS0iH~<mjU-8>ah7mF{b2KgTlUfocC6I?3BZxQnmxh%swbVu
z88rTZ#jS~wgvMzV#iIgpug=A%Ap6aOGaN6ZRnS=g(>Z@E{U0g=9@^>dZcfm2YD@Et
z3Nf2DNvGw_aHfvoPo{$0<ZJXMO+V=aIO**mvSt&4c2D67Ivjsey<n72V2oPsvy+dH
zr@D-cyZ1p`dyh*F2kkuEWDvHVGI9rHWdKb$!qe8y)xk&Pl7pkOyOzYlTa1K=v%Qvt
znUb-rv8S$slk?HQs}83FPn@v}bhT5nmw;=tFlr*y5pJGt4nDRb2)8Tl-s%XgJ>TxG
z4(@NAmf54NdDZ^1`l%!OKU085T6=yji@(4BL4SpV9#<V@<kZyEWMt)K<mIJ73Tf{E
zcOP4Xw7a+1&o?;Y;B9x++0)0_!(C+ShPIbHe0{X``1(5At6#RgtaQoVUP=0rt%AL@
zoSeO_w5`2@oV2}yg1w58oT~g~xl4O~DWIF@uaEyO6yWo~Dyi#Ub+84U>t{SXu4w-y
zf4Zj3A20juJ`P80eH^s6%w1_&MQM5EGobzZ%>+KEq9FUb3xA^1l-a7_?<%bQ(-{y$
zaPL>dLHI@f1+SflYzyQaxOU*$76RKM-r=qtxVDABwupDQYX`1vA+Rmt9q!tJYg-6x
zi+G2-cHr6;0^1_q;jSIHwuQj9h<CVa2d-@)ur1;p?%IKCTL^56c!#@o;Mx`f+aliK
zt{u3xg}}CmcerZ@u5BT(E#e*S+JS3Z2yBaZhr4#*+7<%aBHrPy9k{lIz_y5YxN8Tl
zZ6UBN;vMeVfood`Y>Rk@yLRB(76RKM-r=qtxVDABwupDQYX`1vA+Rmt9q#&<!}VL%
zhJ!nps^JgjWPHnRr`ok^>kYoB!8Z;4R-QNwBOM(rJu@RSGZP~d6ALQ`8w)EtD-#nN
zHyitIPA)DkW+)FYHzzL#Cl}|p>~<>f8fqE_8X5*p7A6+XZNI){w?ht6$x}i0>}0nC
z#sBr$?VG2cM1iU8|CCK%<4oH01HfXx)|9Hop|4n%$%EPw2>0GGnz-vzA0C$y9G0UW
z+|7Hq=T$&%M@iwc#(~xTM;}`nA?d7nQMK(z$1KEyCCid75AGQ&ZqoFFkeBY-SkFDN
zmu}pKYz`EZ=o_@D<;E!z!r&%NLf%n8Qq--swKH+|<Zz)~Jz?<=VgAC8HqgtTcUeeg
z+J)^4=8lT7KVZ@u-yLTcl>lb+Z)NrW7jF7(U9oSno>nO<dq<<87QmWoQ@KH-p<-I3
zwbBQa1T?JtT!i#^Dy;L$+?9iK7L7F0LLX*Z1l|Vpo8LJXovzxVRHyfN)~0CcRt3x0
zr`(7eByX9zYcRHH^@xP`!pYAb=FWHy+Im{695zxs^-3$oJB__c!GdH<eEV`5&OuVc
zy+DVdc)F1>@Y;2Qu>3EV2!aGSkAFfFcTWj(Srp(o>aUHO-ndnhy5|)kBE0C4qe)R&
zQPJBlhw!1r54lgnj#jhm4sJ#?%1Tm!sq}U>>@HQZUE2Fy`G+IER4BX{BzD^QW)3C2
zuV~5sXs|y6Hx+Fp_}KmeEx@Lh2XDggSLos?z{&)sLven0{gApP7Sea&Ji^uB5jXY2
z`L4)=nF`IfYagFI9iS$jLp7P7e~v50QgHl>mRf$irY|lLAF(q20cn{_w5jJoLCf(%
zVK;_EIu)Js6WVa|y$5cIgfY!uq!-n-t*E$uTZh(To@YZ(re5X~tjPi!*7kM;%jLxX
z<$jh+pl^qAdF&%kUnxCAq`pRcM;o8vxl?H9B?1MQmrNBPhN)cF6dTFCHU^J+DQ#EP
zJ@0dRm)_F`k$p1i0pmxYBGj1y0|u)3cdPahB^qqls#wV#!))^;iQMTPvL=Biye;PO
z1@l&!iK16!g*OM4ss?Ao`xb439nQAoTa^%W$?t87RvRiE8_HQLeMe1%DZu3!e(TB%
zXC8|L@t&2Z47n0)V&!07nuFh6yUt)O;o+5fyaH0VyGryz!;AAZPiG{KRNzS^M`qe*
zu7A+38Q<kC=XjiXB=pVgGBHk6cfzbce}1T@;Yb_@Nr7OUk#6H+v1gXU@(#xQ@xa&*
zZj(WYA`X|MNAGy2N=*%(9`%pYG#-VsBP~numveW+8)8(NdQhxMz=OLz$qu|XCU;#;
zW__E*E{AP>)1s<9AGC4Ensm9MtR2ZRGU&2U+jOYgzNfg-^=L(x-pi-VmKOb)9gkBl
zU-bu+pX7+f0QThfm?F30v|9M}RL%-x*G3A!hBSO^X>H=+RD11~>&1Jn8!h{M7a!vw
z_a<8EZQuI}@7sKJ+PFv4JXMJ3IyNjoMAxb$Os}t0KH2*$d}vJE?1Ac&1cwLSyjQgA
zM`BgX-op#yYB#T~7~sL|+{g!)<HlHw^a+=foim9Pz~BL&V?|+nq;hFNUD4}Ijf$r+
z2Jc`xCybqt_66;V!#6SP#fP4d&JYg~WNnVETlEHWubLJRF5B%I8h>)c?N-ZTMC6YS
zCbatQI&{O<LIj%pC`Bz5^H;au+2-^&`MSRs5aS&@n=G=(3f1k(axtU9AHy}Q!Wl>~
ziMWp4jd5B1C3!hq27ML9OY%GuEUAxC7ZPZtD#2vat#sM{vhDVwoRJ<NXLnYON(gOT
zL|+?B>#4E(K@cbK2%!Y~gd4~$owSZVag>CHoa)eFUz18%Z3~1RNAEWg_5M&WoeIYi
z&)j|JIwq~XbfL0iv@F*6SR1|jLFwKF{e+?yeDKrEnAP`y&Ela898CkMXzb#=L4rK1
zrPy$Onb7`Yl8_jVQtQAKq0ftEYJ$REaj2qoNl3`v3+(aaz|)`q3x|Y13}bA)iAcVa
zxrnK;-blltQfqOmgMG)L%iQN~9|l+DfRw4A<8W!`9yrZEPH?_p!0BT3sT`m+RGPrN
zCdsQ*W0+hLpIOGB)R1}yN5i3L&E!m&r@P!TCuFAkB)I&{zi9Kq@R8hn@x3Ffu@yEB
z)G8`fuy&7SbI~`{P2#sLE1u_uuy&eKg|KQwP4wy#2L{l!wM2BPLfo#QRH!Zm5KX<0
zoj8s9@T1v^C9&S%<j9~s1sIS1Fg?rL>`p?<^yUHq{v<Js00sE`7(qn==rtC|7dpWI
z3!r3Km$U{bKrQPA1xQw!d25g*V2}j_nC;N-;IqRIJN@PR@&QpAg%T#Y5FEpmf;iBT
z)^tUorwSjm8r?G4HFTikz)jQFyI->L(l(2;-&R#pzBOVJJp}8RQ+x-95qD`ANs5((
z!AGeoN7)bOP=MRmbgsE;J91}7F7J#(JLB#@Rxd&DG)UXw9BUWCwPgq?RSJ0SXFL%;
z*WO#rovtx^_3*j}Q>z^Z+O?#%U-?Vl$2QMEv&sl9)wTe>X=}?3a|i`sE+oXottBd@
zZHhjqG&I;14Aa|MneeBL?k=haw&)7ygU=oBL4OT*SyuWC$GR+H`|8NXjyQ?U$Ol;R
z?f2GCE9uteby)v7uBU&NOY3j$OVx^Tg{~>3MW?OobA_+#Dg{3H3Y#hpaGVF%KHOJG
zNF>+4cccJEFXx}O4kfLm)1A}<7f}4RiLT7*2=>-5P}dO@M)PlJih6tP&BEMNyj8i+
z@@B9M2bQV+ymzAxGA_%9y%3o`LgvK@dG$Z=zu!MBNJz^%IF!N<1aaP9$<XcwicUH9
zMQ@ny>M%@mQJB|Z(%To5gDnQ6{~y0rdgziEd#C+<)1b(z(C1yp{$N}DJ6&dKX^}$=
z5o3;ZHr$BAlVe6kXf|h_z~iOtfc4%~o|~_yqJ!69$3BZ)UOQAnl514Ql`1KzTpSA)
z`aOFFqXum_EA;TXD`teI7)`Oja`erjk8=q<Ge=L^vL`L_o-a||Uw`1Gj!5+N+Xh^V
z3&ENa8%UA?9u{6<l|}NxAF;TMdD=IV^vploGV>WH;Xsby!K7+gW(bgatruJ@vgRaa
zyrF6@1sG7E04My4XV9+$p+tw_P)7_&G*A{?BJ+ftDP1eLgyK_hS;WUSU#*Z}p9*H9
z0E<uKh^Ntb)BN>=RiN0frbn-xwn+y~k1gv1g$50hhu|`cVdxHDJRKPTjhxm8@KIy%
ziH9+t%ZZ>31Bg<931d{SoA4d*4OydrcVYA(GjdS?z20gV5dx(E`<*C2m<qT6BO|z;
z-|PsK1Vm7PyGrYjqQH|yfu?j*6oB*thA3&a$zubcN2uYXXBlM9VG^el`Br`~9KQ=j
zKGXY|wqshqV_FB6wo{FNU~Aw>=>8iZ;Q8$=8J#g1oxyO~DvbfuAiQw6HpqXBE>c?=
zp}%0z$<3YGf4ocDdb;8h?)u>u&N&r|&kYAu^V^}pRR_CKA|zI#|JWm2T;jQ2O^s}q
z<QR@Cx{5s=%@b#WI?7^c5D_FUc0Y3MFvl8}6so>N0pv7JTIX&^pj&cDpg~-qpicoh
zd2^^fe0~D1#P%P$t(_(_-c&)=QUH#K&5-pJ^p7Wcz%?pA^;!~Z=;k)?wOUdQD8|s^
zCQQ?=u0m6x3;XVhY>1=G6ZAf?TZPOs*PP+T0$x^hBrYEEahoxlP4G@%r(BP3KG4~6
z1Xf(90DDsJw$Y+-ox+=eDeZLNqMZ!TE$4tB8Abs{xvo)wcTtn%y~tC<LHxofHmc$r
z(gbu(oAPtt^?e?cNM_skSOJa%+==goH}m@a-5Nc;QbxY!>rM4k46h`KquOk^0w}=!
z(`P}n)tsoZwMB`;?Zl>PNM!{V$^ZlJtBW4lrSl$!zfJN0*DA%1QUEf$8HV&GFT&}Z
zD%D1I>`^)~iDy%(Mc%VI!oCElvE;^u2sCTO>9N8wG~hyYxHGR<*GUt0jVEoqwle@M
zaB;u;{F}kh=qb7^pX_?Lz&g6tZUjKj9m9wXr_I|A101wLFOoO2!1XklaRjrpwYy3-
zn_@|oLbu&VApPkrz_|Ug=2B0v8U-L8MS)_ZeG05GsHMLcCSNd9`Lzhhvs*=oSPyP5
zdXam2E)5?s)G)OsTa;=II>-I`-?}S&D?^}+cJU+yC@K(Jxk`M2CAB`tC&$0j5mF;S
z8_5xk+5pS^$csg-HFA%jBUm(0blr%+p*LC4pdiCXRf5v4!AIk8K{aep^Qw*-pO%T{
zaz>g3u#_KZ!&#+*)FAqfrE#V0CQ|*>CIW{>{@5gQf{v<MQdwKjcae9)08olvI=d!J
zq5!U}MBui<`~}RsbQ;kS76mrtkI$Y{fN8<v&vT*v0P7&_FYrUKplsG9V8{`0P?!?+
zz)CKTm0^A<g!foJSO^lln>H-V$|VhSMBUI{k%6~_WiGCQ&nX$~_e%LzjE;cie3i|t
zNLXQVgEe~c%m?(>Sa3-}>`j!)S8X{w;Nd}I6Tg66d4*F2i)KI)hl9QKvo{eO8g86j
zpa2J@9`|v@0_1620;IWRCSWR8aJ`-G5%L%@i|}@IxGtZQ_>!1b&Ht@cO#7g8e`pky
zi#WBKX{#T$&`-nPBLc7uj{N4;sSR`gYU=B0E)x{M`3eQ7Qzf09B0CtHfy&K9zJa~K
z8^G_g{LlEOv!clyuo$o}sgd9?8=4^;1u)@iLx3woCxe}b%1OCAnIv8Xsaz^{Q!rY5
zSkv2ncf;x{ci7VA^8j7#`D}p@oS0hvk=dGYf4a&5Ow+;|yH5`sLCmi^ZVoWO`%8et
zSWFDy19HG0`$fhtPe#JQ1~~|Vo6Oa|G<(RjI$Yom{%jdtU$8b<=#c9(!<-9ndmG_H
zV<sF#?Tw>$wo`r6RlIKt$}Y;xm&_*_AGSX%-hcb#>#|1mINi-hzL;n>6c)oQ)>&aT
zlA~CUuSzN@7(!?-I-i2RHkgPMw>ru7@nHLpT!*gU>A1kLy+%)fEQ=ds5;h?jwXDMX
zl+zQf<2Dq7(T=dN04lPcmQ)HexYli^*7~wqI&0c5<tA;Fd;eCsInjS&sB+;Trzx=O
zE&RFXo&26?`W*1BGfZJdLFT3ax@mRb(7q=hz2@~@&Y9|o4RWGN{Z1GAo&ZX-P+LxC
zz4qOL>;2XfhPHYFSa4T(y+zPu{^;h3I+Jc2Lri&b5>wD&)%qAWG0`)3L`CFI>0XRa
zu~UTH8xkk>VU|GN85u1cdC`IO0)4r0$Gkw69Bm@)vj(s^c&lmH1qyJ*O!PTYiBP;+
zBjrJFi9GS9kZ{Md4%29N!XUxw^_R<wDuhbZi3;D`IDUjoJz1<A@6#s}dl^p()FY_`
zR+xUu9PXFRYsag!Y36>^lGmSG(&7J~c<Q&d+2Sb!?IH;F&utX<=e9A^#eVCCTHk9U
z_@COw3aY&&&IMQh+`A>e(>6Yu>mAr!z<c=bcZNR~e9DHS>IbAd(H}qCpe>~>fu~JZ
zl-@skMme-*wIG0eTv{R-vtd>8r9bAQ>X!X?3<(OiY|=h$fR0@MlROr}Lqr_=%Fun5
zaC{s5cjK6A$0yn^fKWN}ZB~7g(^xGhs<iJEd%G<4N8$Q!><jV8-v(p<Z-a4~1(v{_
z7Vv_IKK-g|nWxj^ORhu6uxYHEnB~#tvwP`}GM~BqQQVRDJ#bkCRN?YTZD=S_^$KzS
zSMLFIvt4Hj2=5z^&l!?~6=pPf0u6fM{anp1X)H*$ApEiZTJGW9c?U=ig0c&QdtLG4
z$yeUquZSUe*d*o)x!N_RG7;c6zf=e*-@@ImfD6Z?S%&gCm7P^qev~TGWLb)n@`E0a
zf2-L}i}@#V)PGP{2<WZPkFpV{5+bP+Oro%nBuEYm)3NdoEa~{5Icszxli|byx!3J&
z3C@pIAw~FZXBr#H1GLH*;(|dttYAMW4uvj`KmtekR`P6ClCUI&xZeijuYQN#2GhBR
zic`fWb}wNq0MK0eQy2axi`Rc@U$FU2qp<o(qnPR`B<W=jxsYMqbBddKUmtF~IDfIU
zTP*r$Pp3SW;b9IcE^%cBzD5NdzL+tSm=_d4A+3(gDxl3+39bXa$on-L#zt}%MAtfu
zFt5ka48<>FR~oMJXV=*X*dRGd@L{F(v$x=U2Fx0y%ZlRxd;1SgdWfL~ddFv*EItyl
zLr5lL8u@g_Pl%Hq(56hAI{z09EDww~3mNC3R7hOwU}%@PJ5~{r|DPjX|CycPZ`J$~
z0z6m=^4QtT;&BluWq;NxKVde`gTgy{WJ{Z~ITK>iS#^3d3o|}YwI8(g<e!2~lzFxH
zgNrKYtpc=vH+cYh2lPnf3@=_9=AZyy_$WZOhK0u*7!=(%iq9uEXh~53L(%Ll7YC33
zoE$;YtEIC^f{J|DuW}Z);zhJ17?^IRqId?TtF-YksgK}cMP<^tUn=<gH4BdP#B-X(
z`yMg7s2ozmxjU6k2QvFj^)DqojBBvj;QD4~yp<uI#_)hTZ6-njG%8%N<O}B{V&Ru5
zKokm&H@m4RlY=~tzoE=Ol;-txz|eK%TGm~$2g)(6+#hZ-Qweg=W9@W=5`%Ed@V124
zCN;yi#;3^K1|dP{aeYvIpJ;&Lvr#e>pyMdsgv{2PMtYOZs<M(q0TR84ASjo9Mkcd#
z2!5J08$)QC8zz`Ekr@eeVaw3SRJQOs%dRhq{gKj0sL$(6Z~cz4!+iyiV7rh*d~>c>
zNJ7M_6X;QUghVg-$X3x!X^R6u>j8uhG^NN5Kk3VP@5r1B4WJeIgD?ZA?D2ywlFSu}
zi~EEbON%3MjX{}aV7oO%S;9-R2b@beEQrF>4o4?rGOmUdU!nq<s6!M!w$s@UBQ{Nz
zGG8X*{m0foTP!SoB+X`xJs-ZM+x;+n3)H?(Y3>F&LJ5>l2PLUuzBNB;KLz;svk5aT
zy&Bg|0n~9wuMHK_X#%JfR~#A*(j9ZWA^NK4>qd<7n2`<D;DKj?9YyY@B`<X@@{Zir
zzsBDWr$fmkrV-BzGb~ep_;1DMgydR#Y#5<i-jG03w%{jw))oAnh%gNTnjB>ig2sn`
zgZ~l&x8gy}1}y>_L}=}LL_L;X*EV;0mEWyCsfw=r{3*2zr-8?A=E2o+H6qa~ijYq#
zTP-myCH^ODPlBE;|1k=%$r(otieMJdkbV~h;Ly$<YbvlP%*uWj%({VepwTy8crz@B
zdQSm3N$8P$HiV5S@%||N6{SjTobj84-s)AO{RWC|0(YcefYCWsarV^466~K@2XHk@
zL88DaLyv=Xd{Jk!j(U~X^u4wOOZBO4EG<Uyp5p8R$Td;CzDz=f7Q@x6hoX~8y{z<E
zeeD*LuTwGGKNDf|)S<J}11-YZM??(hr4y@x6Lqc8i({ZC5W~}C6ZspHa6(3g@h45z
z6wo)}#-N>W;#~1(3Q%Z3dKgiQ+2H(|zbPJG@sP~Og2#<1M{I}^SC*^^+2KlV`{1t_
zat~-qdD&^+?{>TE_v3nW>%FU)lL~jKJp+UzH&jW|#Ecvi7S8m=XBdXi$DT;5?YofF
zvue%!Hr4!z>(MBw=JF>}uLKTVjAFo2r31@-QWAfzIB2o_p$@@HIWN=x)S<uMay?I8
zqeoAWY(S?Z$jc<rzst%NQQ#bwKQh*z`!$OTR1DUol=wY1f&cMAI{Xv2*b}@SeFTl%
zy0w#u*Hb;cm%LjRuz}5g?OFc~c>KjOst)&Al8u|j@;9TtIb*C7zR8+Al_pprx&e$;
zOVtbWbZhTJ92Q{lFnQDEE?$h+>THdFFQ7lc<Wf%yiDjU6EXXbVvt-iY?>=Yp9qIIE
zhymA==iz@8)u0#pk8%?fJb$<E{LLT&N@A|R$_m}W`tYZ&f04o8af^xLG+r@K!6Ozk
zB&ak;rY5Hnhf|m1|03W0cOddto6QST^f$wK<=!g7dfL<lkL?ZNmX0F}4U66sfJv-I
z8?z|3B@h3-vg9xJ9i<%2X!P-(zslrE2lNda{V#IRqM;M|Cujp|u<x-F7_C{c?{NBm
zm~j2efc)hF;HRNm=GUP+$b2h6Y@-CK3|_=6+yc!knXkKQFHv7h!(vMbOD@Libsj4)
z2E`8V%#T!Vb|Gh?aiN`6w_^sbFl$$Me}*4Va{*F+)oy=)>RwOOfk~ljPI8ul%Ign0
z2Mjx_0sA!K{l4C8a_a#M{v8FNyLvW`B-*EJDOPz$bA9Oy`ie|QdHy`thAOdc^vf-@
zW423qt;Ivvhx4@neo#-9^^5{AbZ#K{=$4zL6~(ogLIFI`)I=Kt{Fg{Vel@qca=ar;
zSWHf_8CwkpJ<j-jR??OI*OSBvI<O~bJU@Iy2~YQbjkmSa4TGMk|5oC)O^KQXqT~gy
zLTIe@uG={hm&^|(t?+dQW4o{093<In=D^)ef**`43u_VddL!l??j3nhkt*2r^p&T{
z(D^4tzM9@e4wz3^PRND<IDIvPOl5P=hJG2_zZ=&W>kEURx?#*n1CAVL+tT9ejyHtq
z?lVmr9SBg{_+0fVZw&?A3Xt-l$IFS-Di?i-`e?0v!P;9fi{K9MZeU3BJ0VoqcVe^u
z@{8I(_Mt3@ROCMdU%^uU64UM4is^#p(tanZZp=sye9;cQo8FcJ$abaR-~@%Dud^S$
zt@Ae|ay&lykBn+2jtufnRT)Ps5j^S_K1r>N5Z{jknbeRKiFIHQ=N!H#Ya;0NkCtm1
zq_R_IHO9Kw!-crs*mhQZB`S>c$Ax3)Nh~89&v0+c@#F}*Ok+*ayW-cbA;vCSRdZx!
z2oKmrfuWtaW6E*IEF>Y|=g!WyKwu25p@JQ|RQn=!{3<v}{r5>(kXyXgYZO3&YB?K>
z>lc~s`YFKwmv=&*xvUGP<vW+Eqq6PXaz0*oY&h>ecK*`cqczMg>1|^h^G~tyvgsd6
zhelQ%MAsZ7(dD^-0_P4%vPa1zbVxodx@47K@HNvR3GWG77VbdQ^j#fnt7|X?2#?%+
zhPJy;liw&qli!d})jxu+wHh5J5@Y3t4(F&tun+lnW7`6IKNsrrE@9PbChumjq)eqk
z6(OTG5VV`k4NQ@}XTzw;?tX)pZv=|8PKM>Kr;g}1O*IKNUhwX#e1{Ofc)7}k2!=$c
zaYPM5tKss|ub1&AQRc~3u63#<mo{|`>4h((j3gGxO-%~9HdSbhdi3*vb2|>+%`~_E
zguI5Q?ltYpnAy!)Ss0Td=`8f{y`HJ}oBQ!#1th7yvXjqV+U-RpsxKc@_ELr&(XWfY
z5qROj&@;!Zr_a!=$@cDh8QtBpf(mqb+s@yc0{!gisvplY8EjOm@m^GenJlX1$HA|_
zL)U#V_4#2iw!CF4yE7jqh{>ZbGz2u>)?O)UK0dS862VDBdpPRH`Lfrt^fpPs+ASh>
zPhF{MtCTyCUUpvB(p{AMa<eK2dUhMR8=iR+8P$4Y{lzY*ke-LKa?01HJgE_xP<~j$
zEc6Bi;6mDr%G~7Ur^&lDHq0?JJtX{4FJzMCNW(RS)>3cy0?#!m$;D%CfU<cC0DWWD
z_&^HLP$5}<gRb7_q7r;=j=VbeRF^jyHaH|_DJ^nVJPcq?7+@q`vWe??MRF=5wky7F
zPYY%v-f`|HUwPR3$n70&<C9gjU6oWOK_a$F5xJXanU1t37{pyc8rvS|vriSl2RbFK
z^z@XhCN!{Hs&%bCJ^H|5(#&(iuy%?QvLbo~P5)97>F^jdavdaYB#|LwM+&S?er*^#
z>C$%T{L$%<&t~~9Yr`f4x`w#8p##J;l~B!Vbak6Nwd&(Tf?WrC^-C*NR+9)v7T@_T
z(b}>H9Dd$V5X{$-FGzg1p*ssJ9G+Jat{k)4(`q9yOghjxCmz)?8MLR9uzW4-+O5de
z@Td;H*FWl`98M;9>%h{<wwGkjJk#`_CQbLp4x~VrddfT^)K6eCh$_d52`8$m_V-rY
zRSC@W>!@MQL)lF<-h<E$UYIE7IN=J$KO-^1d)rYWvwfGyJ=!$*;No&bctj5IoK)mo
z0R=cPetT%&By6ZJ<D)6vC+&+h5+Ulb*<bTj39Iu$_^wg+O4rB4(k6ADh~-;hW=2x8
z#fH05fFBhWf6!vG)1uKBfkzocS8-1V79xgE5ThiVuM@cM+8>zz;@3JGcI!u^TVvOf
z;B6CN1eyHuJr4BNn9Zw5)M6p#B%@BHr^Yg8%6szcN|gJem5rEJOfx(-?49^5?E(Oe
zJ1sSekkv;ZO1gp_<(l#$KJk|OTXjUj1=&N?gj|9K9M{d_$SwZroS;j=q%I8KmaAR-
zq~EFCqVAeMPoflWMAGQ(#8)qG&X`HK57^@*rfk3vZ;?tAQX?0s`valLxI?Lu)$(Zv
zNnjMtLIK>L#flH|&u~2SLp;3%y~l-P3t^DR1%lc;-A<k0lo3z8x8BS19O;a|9OIFH
z!I~J=+Q-63W>Gj5z&qA6W5*i>j%v=)1vuC+*V0NNA+c##sNy94U2Q6F?+7tSId_B!
zE;Rd~Auo+(d?GKZi7U7K1Rops`TO@n*|>CS#@AT#BP$&788z^t@W;=Foy+s1+)yH)
zm6u$^Ob*d>pL(P$`a|iZ*1V2myf_W3*nx`|slFPu?G2|jE1RQbz+zlZiV2R}3HrcC
zZN!k?#LAbTr-fT74LV5j=pqYQS{{OQD#BTuZ1A^Tc}}!c8G491iOa>*Qy+bGZ2SxD
zD-!$0F{B=`2|qj{9G5x#k?7ipW@~)uE5jC}f;G}`lh-%Q4DUMyB%GeOsJj1cT&qqx
zv=~`i=_#)>71!Gn^1f{VR$HY<tRz^DVdpUTE~~WqRPj!dMPE|k)6YXZ1f|9Yr)GBh
zm>*wF@@sw}k-&LpG2DK-umDR`+0b`QK>FgdLqV^YdUuu7XKnEif@4)T7$Ax~80W?5
zwqL&4Z7lYvpjcB;xaUWWn`59jTDp1ijikqyFBCwl?D?&{^U5##E>;B381eKO--}0%
zvd%v_e7~*pP)#GNTwfJ+7g+`A7qOyWy7@Q{C2FqjJm(Ipl}fK!aosRqFfM$#3Y$H3
z?yMh+aJ%+dk+cVuj&<sEH$fj)$W27+XX104IU-3QdqDfq3NPjw(*>(1I`)kd3?j}d
zR4o@7syQLcUbjP_`1NIMXF-Jn&W&euHT(<cYh-sZbynBPPLg>0Mtl#CZ;X7<r#mPm
z9K5)<9?gf;z^8@hSIF(l$eiX0%!#leaK~hzT?ePnX&b$MCsc5X)t>#?<#UV-YsScV
zg7+9GNI5W<(xN%yBFlLiwD$Ea8Ck*4zAoWbkCrQ*FL27w{UB!kQT0;T8E(@P@^U(`
z$NR^Fy>18EVX9nW$Yl;$L3f6R`ue21r`?!LBo7_Pt!=yyS4f~|RD*{h=e@>cB4GmB
zDo9;C_C|#}h83wY5-EK}c(VU!3FdA0LUt(+%Vf~w^QU+};7p%W$zj_x5GM6#0U{1(
z5`kiUnms02B(odgisK=PuD@J&i}tpBmt(1Sm4mEwhXW%*0n8qi-NAc;<Ck=nR5zey
zSe>=RI@*;gH%siTgY#mXV1fvRvE-;C4|bAC@+dWT46`*?@)>;j`^U2x%`wr*X5`cN
zK4r;E9k{F&r8?j`4`sA5d5-Wfs3?0=P*MJ-C<^?k#@;3A%YJ$T_)<iR__Htc=`7Fx
z4O2nYpwZUK4@beQPXS_8-;l=JIOu2pzM^xB2^{A_1}7Nn{r1YDia&@)e;feZaqHIz
z);1gF=$4lxyRB(#=fR0pd@s7s>e{}Iy#v|Hx%KPi-{0uX6o>Z)%tqVgFVC7h3Es1K
z)^i}CVtK|Z))k9VK#IY5+(-huXR?hzpSni_zegmMH0I$-cyTh~2C2^J;?co2RaG3<
z=X!T)eiP{v@(RPLPQYcx%+%n^Ri}zK<49glh}RmmIXkN4E|?<>8v2qR=eU%J9f~!2
zlk~+g;8J0k{20(t(s^FIj@E(Ey!XR!mfT~O?vy~^bSQK3iW&ZWQ}n*7A@nHM!%-qJ
zunHEdP<-V`*xefqg{$<(?bN1}-dT`aQ2TonVa!B@MjOs?f=QhT^n%5E+8E)RNxjL_
zF0DQ7LskstM*O^8qGE?P4vD9_PIqv8!HTaZV%J7cXWnXI7t^7sNZkz;1m-BgEP7cc
ze3+xRdfg*B4^yNRq>a-xK2n3|IB?3!Lc<t#SM2#g{{x{SY;?y;N3oyab>*;IVRNOW
z{Md+x)g1^{E7&R80Ez1oN#`C|#*7rg@0|@k+1#uNXf1e@WjdX$H0TQbid~!55x$0p
zk5*;jBSwerkAksB_V;7=6rc3;`7VEW6F#OPJNzd6Faw9C^9^}hLq>Upgli?8b3=#c
zT^X)kHE(2)=qB`Jv+qxdOHLs)DXs(I0*JZ!de8J&MZx{i&}K7iVA>qmr<f7h4eN1r
zCXw+7m7@f=*_j8i$wBG+vR1D3#&B3ZG?Q;C7!VOxZ)W1~vN_Tb>#ZXQuK~T1<?tF<
zxCJ!CM@DrzP;;NCQdgY3<7H>@`^`9?1O>Hfsso68_Rtkw!sYnKL|o0fL<4sx3Pvn7
z#dN#R;o46bTfmDGE=S5daLY0jcXzrK21g3AFYjr57OcANWsszhx`~E?`TOO)D`!8D
zr0np$C-EL}rw<V@H;@X@8#ZQT9SswyxlbFPpZj4RIU({wU&u<FTIf^r6(|pJ0v}~_
z1I_}*Y+`g9c~GjvP=ZlopmwesY{k~l;Q`ZISP`}P;xqpJ?tD_ktV4bYs5RNp#uEyu
zMFFmD`k)@9@qR;vh^ZT9uL#H|ij5uD=I+vT^JZCj=IWK6H9uc5&2tCqcC~ANb}JQ4
zg4$V2@q?-_Jy^+&(j(|_H0xIxC}M2Nm|#}7scTx<uBoS;l872xG_-KNc8?iTc{uUP
zUMtp<=57r^dK1xh@n6Rh-%+WGo6^015HT0(i!*^Do|^~|*Eh}+o)-JtqMh@n{iGbY
zZz(0-m$qp8SQPB$cWL5&^ut(lPXn_NG&q*AOIHMC5i^?7K-S(rt^B(Gyi>XG1D|Vr
z!lqo4<<Z6xeS-T#9>2PYaxe|ZckTlx+o+oy2u!Ps0@m8e&3KhLe|UA_n7{e>>x6FN
z^_QNl!k-7e$nh!EXn5L`80Nj+7|DB%De}#45E5%J%C>x;+4OjppSjQJk%w*MWYTWI
zp-cD7HLFvkcJT-=&ytUMH(8_CDS)+l$3&iN0B@6oB4l!ySDTL*+GP0H24T|JJ48pI
zy@hbkor)Fnb@X`|CM@vQ>hoURl+ZV9IwDbvLUt=M6@=_CvP5<y$wO#0x=Q5Wz2v^i
z*ys|!m4frf_eSn1S-M{;$5PgJ*n}?Hds!`7@M_>xPhyL;Do(8dK)|D_0OFJWG5?!L
zZ+zDn4JKq6#-U5CKL0f1NsLA3<WaMC;bV$RPn@#M+q#zIU!85nDcyRBY9!>>rZFR2
z>rjesJ4q%_5IKl;#EX-!V%LoZP47p)#?m`|2xa)>_kx+`WScxHaj|B`B;Vp(v}{c}
zG=+42Lj$STeGq9$8%FS-vxX5)?SAH5j*S2(mKwi|V{DLn?f=GRFU-l~homFA&AiR|
z_C(haE%+!pxI%4INb*OY_q2^H%AfKCeBuO8-Z*DB<CR*MSe^H<g^w;JHUbRcKfk+`
z6b?Hf3Hkaj$TPkQ<E?O0se|7@sStvSEByV1VeJdWPPr4};<+?$Te)t|9lN;c`rMGO
z;0-X?^mTQt0<-c8j253KHi6ON%Y|SZUBN8pd3c?_kmTQFN@%|LnGTFq{X+LF{}B0_
zS<-j2V*ua3GcNtT@#;V6yztqdhFnm?`*q0eO$SxU7w_QTOOOH7e|5@Arn6d(Wnb?a
z^9Lti^CHxZ8oO`=*0_L7rM{*~UTVCizD}7y)IyuQLhO?=7|{Tw;$BS*1)WuV)8O;~
z=@o-)QlsmKoIX{d955SaRA_6SqaG9J6+1MHUYv~Ex&Vy!9|qGU*!+h{W%+0Ov?Rgz
z4($#OJN)qfWxvRA?K5k7q@dgFaiGEt6cnbFr6mdG(gHO+1q|b7_u9#9zViDb!m>55
z)%6qTpJgOePX=pBRjVgAn%uNzz^_KWL<WsW&MQ5nml->UmnQVRZp`CMn@`Q~Q)kul
zf#GAfaxtESfr(Qh3z#);QxtYXQL@5eB;hbPtcFiY*|(W|%nPn6q0*ewE0c{|Ull73
zoCH&)gu$sXe_C5Y5nZbRPL_`10TZi2tH2pb?*sFG%B<QU-$7=FXa4`}9(=fkFgOoc
z48c>6+WmpN2eDW2(KX|;&ISIa>ecu*(bRoWx&kG6Gx^1B4En&xT`j5p$CpQB#V~_S
z>C4YNwSKt8n5PvRq}Hg=02ux*xAm74?0?$-qjjDlt67=pP;qcNUzEv8Qs<X~i9W=;
z!wHS&=2za)Ej_mJb8>gbb`JNaUC5W*d~#r_5TA5+?1f}z(nf9Iht#4MjPAGlU#zKI
z*BMWA)oN~f20OMax6e{YorAs+(gqid0up|w9RAlWY!&qbRInX#GV%oeM&3fcmO8Gp
zBD-<8*s&H9VJ-Ovzi^Y}X!~?#<*A&nM~PwBQ^K7M-Nu)%+^J<bxYmwp&4v(jH;#gl
zHx{xw!f#-2Y6iZ<jaFmRWu0eWg+U3M$byeIWC1Qm!=I=nsvf4N>%1lEU2_3d83e~3
zyXNTJWD~e#b3<FbyTUn_WK`O1ANiymX}WHeob=Gk{lJyU3n`=~pH9JO-=-@)!f_jB
zB&}YQ%n)K%Zv#JPX~Czychm(luL=jPjFsw*o%tQ_pKdV_;qa2jtBO;7tV_BO?cEh+
z)!#%H+IX(wEiSfn3=MVmTaUdzW-L={EmRci8e?Nt-u1NmU@{Fib@(8!!}*j2xBhTW
znzdW|@~bI;kPD2R3?&*gJg87EDG$$msvDqwzUuSUI*mJ1!TPKWt>dAq#(;k=f5}w{
zPB6A9C)2$4s|2jKEc3EgyL(gm!Ucz^hWGsM#wH)(Cy#!K#h^2dbM^K#J@oPre7Aeo
z+GwdP4i*FWeLaJ6d>_j&bLOhog@H7V3OF?Gr6$+D^jl-D^YX@*n1E>WzNS6;i`+nZ
zU2}%O>i`k9%yuw2|A@W&8*5q<O_a~``8SSKPxYa&IF!@h*meKT?DuUwTYn?FSXwNo
zO9v*6ywsy9fsfE77}R-0qzcE?r|m|G8(u?J37r=n*G$&Q?Cs5)xN+`T{B6%n;O3ig
zUpOQBGNKALP39^2;Cs|}!&s&I72@ERR)mV4-}C;=pn3}MLhR}dsYhj8jn0^YsW_4X
zE}svPUk4XflHPrqptmtJd=x2@ZEYBW_NPzs^=EGc7=!0cRk^;V-S7!A(Z#ntw99op
zg@`yEB9g5BGKtWy)wLR*ag+L#iB&q(1z5s<Y8sK;Ohsyq8M8_U&RQ3CFSqzgsk!&A
z_LNN0i-do=mpI9bD?M2;a$AkBMQgM4deK`Gff;M57;*ke@*}sbAC1Th%^~L}qG{&*
z2U-mm;-@X2F*X_sIgi`3>j(!CcxuTCABwHS&fz_x+<c^Nsb+_a)tPstuGbRw(x2@U
zPm<s`(z5u<Yw>LetNsBbxPBnC78apy6E<2IbfmAcp}zNOdh~n6r4}!R{jBWBmNcdg
zeL9zUQbEe<I-`CZqhA3X9qjab0EIwaB*3DO3IymFS5pI&jbx3N#;l|)63rUO(4{<*
zf^GJ^O<{3Ek)}q7;a&Svyt^RBCoAbJ2;+f{^C}~0{7uhl#+v6*EG`zWx({5EYs#5R
z8ZwdFec+_M$n7$5N6@Lzi*Yd7ZHUHcpc#<XJrz=;COm@!)-!?P4}43~JB-xB-fT$M
zmpPrS-oVJ7x|(igIGXm!yAH(;x*3%oDF-AP;)icI;BWA2)zTTd4dU#?;|2?a=^M4d
zH6yiAukem>%O0N$tF+*aGCX<&E|NA`KB`B^fGmDeGI(%r+zN_WA1*_G{yH+D|CMsb
zs4JXkF<jDAixqgEiwHgD!G@F#ZIC!o+|4Hv8Po81L%A+(KozbTu=X%znd@^JzP>z!
zsEm#<MOqOAVf1?s#Ed%4OTTaJD_n3@!qnWhOqu@_(~q+HIQAC9je0-qLd+d85ki<X
z@CY-f)hRPQG0yHYuk&&XdKysB-1Ab9BY4f$KlH9(tiBV~xUpGd>JNCm!8r=Rqv=m@
z9W|?`Uyc-pax3n35vE6wyKX)<a^kJGb2=6xC?&;VG;#@Z)XiFrOPJ?;20BH(WaQ8n
z=!u{glR~;H(mgVoQaL1e+!(nMS{rlN8M(mHV(X^WStI_`tR$AK(@FpON7|7eA&oQZ
z-bS6dK33|TwXN_yZ9RKKc+cSDb0B7TDo%*|f8*emkQ4>DO1eY$gz0Ek70vuKGbJ<Y
zNie^utVk^M<k1e=Hbc@HX}#-nI_><bqC71YBj|}%_vHjWSYc9~yT2cD`A501V3be)
E2U89toB#j-

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_3.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..afd33aba2e01bc15d7c0e6fa02f09b71d8610bd4
GIT binary patch
literal 59985
zcmeFa2|Sc<+c$pMWsQhzBTJNh&pJxBB++7vNp@ng4`E2M6T&YFSyEXh*-A3mL$WWC
zWvJ}T*cyzP|E1q@-}kTH|8slZ=l{O{=ku6c)8%wr=XD<6^Ei*=IFIwZj*B!zng9--
z)zQ}h$jHcmYv4bCgyBD|jc|4V07FCI1ONcE05w1lkb`H(z+bMDu>us}IT`p5Ad~s4
zJd#ZI$2l8FQxf3}Q2w9+XzS;A02;820sQ?60I=xr%z<aL;IaSNM_?a;eFXLq*hgR=
zf&Y;Rm^!=L!Tr4WFF3)y;r!Zm&faiOei>;=X-OGrX&D(w8AZ91^1#1GB`Yl}qarP@
zBCE(RE2koTQbkT40I2U#{OOgj@Z6*P>s&f1@y9uZvLXN^CH><(o9rLn%|ASs0|$4z
zXWw)AA1S2!Oz$JGkH9_x`v~kK@D~FA>}|-Z$bvs7fnSw@7f_Q?{NYnjgAo%hfC?-F
zV<#;4$+-VIe@G_!2Mri!At(DYdcu-NPX6aPlgl4BzxMm}yRjO8j5I_V0ki;GYHAv4
zDq0#Enga)D=@{4;8R+R5j<6nLX5;08@bPl-@bC*sitr1F3-a)YDjgS>mXVj2hlnVv
zpOjUTl#`d;t%U5rfddTm44jONoU%uGj>_)&LwX0W&;n+l2Ss)iAZH<?U?C$l0uZp3
zRA6*zH^B7Ahm4$pl8Ty!_5d9{NKkPQ<d}kjoRWfyiV_SPkp+Q&2Pj#nSdYq_qCRA7
zLnGk9CL5agm{#y~X&t*sH&#gQy65c!bcZ=Ok8lYe6A?XrLS8{p>7=rX)){RbT|ND?
z7cX7DVrph?VQXja;OOM+;^pn*>xb|U2)h%0HzG1BIw|>nN^06~57KjT^YWh*Jbm{3
zWm$PeW!0<K)%6XHP0cOuTibei`}zk4hdvEsCa1o9{Wd)_JBM3YU0dJyj^Etc<!hJE
zKR>@S_BXy*K)%Q+DJdvvcKISB_X7_K7D}q4GSsZ6jA?8<4hhJH(z2aSd|X<0Kv2#E
z%YNOno9?iXJVqF|%i0gl{yoNS|ED<ni?M(3H4HFNkb#{?!2&>m?^~ikP6*vEzsMgo
z7uq^Q6V=<q3m_swtG%J;A}o4Kj<INa$2Ly*)Tv90lJBqq#+p?B!53K=QoOzJQ99k#
z=b|<@15y{rS~zFNtIoSTs*$rCHK(nCGF(eZ`Eo8n;<vz^*L5P0^N&O+p8wS>3b{*$
z<_QD?YlXIJ%e+hheFM&cRi=!dn4F`5FJ$Vbp*<mz-T$jbT)MJyr1<*%SPsp`AZ@@v
z8(_OYrVU&Z*Es5Q%ruj~(S|NG3%vi7LjIHdIc=qpt8z#5&mFw^iYM4Vo1Q}`#ys-m
zQ6}gK-aJbkMDK@tx+zzuJX**@WZmHF&z@*!86IrDlp6GIVk$&=z1R{4T`1itfIrmj
zEWhRua_lb-$e%n`|9p6Ez_)(#^gDKmDvH6aV1=Zu%2%<`-tnamc%HrI*%^E5MD7}A
zV1A1GRNgWbd*qnx0iJlhjbcmPOar~luasUX)Y`mcOQPg1qQF!4e1w~Ds?aJg+slIQ
z3HgA7VwDH)uw`Y2vO~&+S2RwP43!?e$qA2VFQB6y2_-^b!YysyjU3s0tueIYM85Ip
zISFvOdiAuMLwNSc8^lEN5LcNk3E<y`Z+$_(zd#keD-WVBRU88YZ3Bn1oKrHW{K(3$
zt#;O)cy2u}={Z2p`MJi9PxX5?quA{;l|v^#yn=pjzqCeXy>%wK?fPQqi_qmJMb+|^
z!S7yckd;<+eF$?UM4k*TlfYBkP`4m6@Mxo)1Y}+$0pm|d03DEeJn7n5z6eT|+Zs7p
ze|9AqY*F}i^Z}u(Q#bjU`+DRDsb~#M8FZ;0++;?}1LEJwG;irj#GidRh5eScDE>;e
zPRQlF5-TkvF`FLj<a0^d(XvZ7bGt53!kp47@?Mbue`IrdvK8Z)Tj`C5CC%0}k6Y(8
zS#>pL)vDrhJ!L6u6W4hz-avkVxRHRXX`6IFsW!k)|Jm(cj?Rko90yPEu@LU*4icb|
z1UB1%DBqVpb9hxEs<mywej<P;vz&~*ab>fzsn{Mcm@B|rcJ&X9+Q(TjUA}uNn*qvF
z=AWv^8Sc3qa<Y^BRfmB|?d=Vx$u|#-HQzBICN5A`H|O4hdsT`(OYwCi>wN18Wv?+T
zb_$?Q(RlFes-=NkqM%^V^8J7=N+eB<ezEJuR!wzDzK4&H5o@lhHz)k;{NlRUbU&q`
zpNx@wEL>gIiA<z#IqedQ#OFG-^{a4oCA!sw7!5fbU)W^(Eh{Or>dMffq|=d~4@*v|
zZIfy8W;JwpUFxr)t{VK_+v#m#G$yy&az3RjZ%!$BRJPqmPBiu*hq|XIS-N|EkOIM@
z)lcp?4gc#k6ekYV$b8s&Os@aN!urT7H_XE#k3t5~so@ay)LxUfa4P5T4s$T|#R-R?
z7NJM4i9QNXauHiTeQ`cA-p#!=I8Em6<)tOz-weFO!Y?;p#SPRjpTWmWZWjkMj|BxU
z>DfJE>3UH|xi+iGw^KB3H4>?o@!+C|aziHKsc;|MRQI-xk91-_f92}?O;F0@*5BHi
zRATu`gBTZDeJIb{-h-ad<U}dsZKmL?^O+|rO8eF`3NuV~CAeSI@q6?<^gA@PMM1N@
zOag+^rIEEH;QOcIO#zz^trCK)F2=cOCv3fLPxm{2?-H(Ps=2VNN2uJ?%53r&D|7Z5
z>Q-v^WPmKAcQ%~rh})n{IFx^h%90bGEfpW+pg-zM_blWp?&0?KB_)~2<>@n$Y;NzE
zg2d$0)Z@4VtB!>2d?x|59h}f{<o3eA!$i4n_8p4DVli&3)V}7H&XR~a!NDGsxre*1
zePUk_#XT{a*VRMsaUz63vmiU-=~wEu#M5#+G{oD-88WO%KmvN(k*Lr>aG(uTe|$in
z<w5OrmC$mB5RQq5H=9Vn0XE;BBm9<;jqb>(;6Sd0MdTM*=zJMcPV;~l2{@Wa0$>M7
zz+<|V<yaPr4jkP_)m979^ABJ9Ecmr141}mAgd<lB5ADplBDKJKwUB@k-OXqqMxO-S
zdkE7ZUdHgLME4V1Cw0@W=qCl%=jLY0Wz~nM54RKbHy)4x>q-)^En#R6>X^du?Cb;H
z%<F@YCDb}9BT$_kweciPG^Mr=-fy8%ce0MKstGxZ2+Ob9?jSs`Apt97;Kd~^n+kwE
zB7&&@1PzW(5P+I?jQo{S!kM0q^5Ubn9GxkU^tOrTRiiB-XIzv_ecQP$E&Vq|JVSzB
zZA^)13@u+?TxZlV=!-X0Po-|2ZdSDO{<0eO*;N1I4a@SVplYF2Au}P-2hCF}6PHd;
zF3oWtL0wys<WlxK)>A_R@bKyAFfpH-e2F|#FeExQW<4_6e9!#Bq)qFQsVwWPa07dZ
zg*!XX<&9s+rcins*zsh(WBynnCqW<c#tfeSGOY8VIO5!)(Kk%xn-9iTbP^TyjOir<
zcx$3t10513WQEXuss7gu7~3co?tKAtZZenXFzKjFRhYkcRsFDIBeIgp^WcJ5V)Fc8
ztXFUP;6>R54$}eLuyFp<bY=HP4mTi)em)?VBE7s#H$IVor|_+L5#rtc4vCME3%2-s
zq3WR`6jFrnn*nbqo_;*VOM~3dKAJRN^kn%q^U_XmMFYW^+K6GMj(So^d888?4LH8L
zXZv#Cd)(_J9`~s-QxBJh?Q1*<>mB2tyU4G;AP&(8LY&T%fJM3$WZd%}#6Vw8D&*R!
zlp{oCQ3^EP!Z%RyGCZZqpl)J-7}+KT5n>+YBTj}0^q4W8m(`mY-6?!m%dfLIwdkC}
z8RmIYbAyd+IrSVA1N3qT3Ji?Ll7KfC&1Fe|{TM@h*jqq9`(;##?7P6i@p+g$!uL2$
zRC$l>HL5nRgaBa66=p9m!od7tbmw`^0($65gP?}T@<Jb3+9WG;gPZ>;s#vMe@odxy
zyjmQj(NfXJ9wvESuf@gVv&qmFydz<IezW^q%g#Ds{>ikmL~JPuaB22KZ51ppRVWsU
zJFJYeN^Mnvza-;3INn`awIyy5z5PL|Af(E`H~Y88%$oWK>|oW8NWj=r_pb@tXD60n
znB_%tsn~7;rS%QuIP7DBB6JxwZSna-clXy=b49yT+k?cT^H&z)-G<`tnnTwVOPt-k
zb#2@)v71~o4^evtWHl&UZMkv&{DZ7*yR0d_Yqpi@-u1zyBKTgrs`C3}$C7xMLLX~^
zqmp)U0n?e1cc;_pMRw=>`IW`8tc8!r`6Of<z-LwIwez5J$I}S+MBmk3%spAx^oFej
z9T(uK<;r!#P|RgZT#sfopJgOI-3FqUOmG5NMko@W*b+a9;QQPhuPvCD+c~J~haQk8
z6G?L`m)dY_Q(-<XLzr1veD|)GiFrrX2$v1$x~n+f2@){EJq>QnZ(425KSc{^?m9;E
zwI}45n9Br90o=N!%^Inq_U4A|w)9kQ8+Z|=jKzIX@0w?G4EkYPy{tXW$42?$BgYEK
zDKrmVhiuU(?QFwvxd%x=moj>Jm;}VBmYUNnoqFG!0bTDvHiv!XtG)%<c0f<**+e2|
zc*egkBAa7Dt)%&l1fX4s@=Hy26m7in8;}i<I#%l;32^*TWu9~+bd3qVy@8(6IZ6Vu
zy^!M-B!J8B`b)B}WX+kN>IW+-KFxXhO`>sW8|u`ep6yIFc{CvU*VpklahDPEKBdR<
zz?-g8tM+@cI=3z!nMgTep6l~*fbzE7tR^#4MDFpr77HUqHOGLdlOJz&QH0VwrcV7|
zxY&RB-XHqpRSnO66?xdkOHJKOv2}s+0^YriCZ&H`Lfq|0`vx>z5OKXOyD~<Os!IGK
zgRH)?5UzDHL@=U0^&sV?)GsIWzma|Ofd?OVz!t|Y%lIGrB;#PunDE;@Pj%Y5mcA~k
z9o2JT=i+*(??u$9j;K?gSp3}){^1S(_(GCCkuWkdPTt-(Rivccy(Dey;MeUXZQ*WG
z2%DQyGLq6#fEpBW)5g}--kblry`!_cy2$byv<Sbmow|srf}ynGO)YyTXT1PV`-=hR
zFWCmT+A7<LKs6ZY)DS8Nx0`PE-ZuOQw;S$WDhPFfAGNE1=exyH0vc+bb`B~gXLSCc
z0Dn^#_@ga;etwdEa*}XQM=2R)Wo0R8St(gr36Mg<%irDG1|i|@CHO}LXY9RfJ)LiQ
zJHy@icPq5H4)^g^7x3|Mwo`GiaZtE!XQv=>-A2w%LPo~UM#9EUPDa8`PR>qALFS~a
zgUoe-KO5+F^RL_g5(@BsAWJG*p7u82g2p8{{D#KAwT7W4_4mvEQIGu@8*h7!-4KI>
zw7i6@;-%e~!*19?Qdv>%pDz5JPEBf;!GB^{<M$E}L-6b`#6kG^{||ijZ?Y$l_u<-y
zYflL5iFjYS_TkzS0(&Cfm#%%d_JqKmi1(#yAFe$iuqWbu>Dq^DPYCRZcwf5q;o1`d
zdm`SKu6?-ngutGN_oZteu00{JC*pnS+J|dT2<(Y?U%K|;+7kkMBHov-eYp07z@CWr
zrE4FqJt43s;(h7bhigv=?1^|^y7uAP69Ri8-j}X@xb}p=o{0CQYagyXA+RUned*eV
zYflL5iFjYS_TkzS0(&Cfm#%%d_JqKmi1(%I|2SO#cxuDm9eh;74}2!$M@9h(a`N3D
z@IwjyP|@x_n@>eYLqko=K*zv9Pe)JB$i#Y(k%@(gp8nwBgDi*G*xA_`m^nBPvvIPr
zv9tZiC_n*TLrHajis}FxBRwPAo<Bb_3XpkHN>h*t>}M1JD*oGN6xg|#v-|k?{_)=&
zgH=>CgjY=vdwc3!1&(pC*Cn7(ui8EFiCHHSiMU|@;R-fauPysar8E@~Ai@5naQl3q
zEbOi+<e&rYs$%V-s#+_Rxvj}DyQH=+ZGtQA0(PtX*LwH`w69WAV)PU`Mu;P@10|9r
zZrm1q*ZU^Vr1MI;(QQik&_0UPmyj4{5pLs6Sk);Gd3`dQIXI{08gT$ZH;H$1#gQY~
z^CQ9?`;et$b8|i3N!+7f1EVHaJ|v#x|8nvI8BGd*KZ>5%YfXXk7%=&`dL`mQ&kd$x
zuY8!3-$2!({X_alH@i@~Ou4WyuGg)Xw?7;XQTL7B%-V3>DV2xDj57t2AtzN+djicT
zpOq9(yi7P-b9cZX^72=w$LV^N`rG&Nz$X)B#@JG+bjybWPj|rsy^s^_x4vaGR&oVe
zJQ{s?^K4*-vsI6xGx=}dSrhIsz6uv&xIhEb7TD<?kG23rbHFDG{|6polkj0zqrSjH
z+m=qX8vSbaD1sW+XF4oC1`<_nrNaGwaapj&d6+v){7|IH8K?I_VIDE=2vnt0HgiV;
z3*tQFP#`@%Yf?gXx``-RiBpOrC_l1ntoA<KUKlC4c-=MTmf7_LmX!L#GI8J3KcSuk
zUd)ppugVHl5OnHfM~7Bi=8!mSqNL>ej&U^p2wD-9`*gQr-m4;E1jyv5k!knR@`Aul
zK7XyEjD=d6)oI33jdQ0?B}SRB0e)XXAN4PDna0VZhbcB*;*pa`vW!Ps4Ye%Y3U&iN
z7)OP-<w;gMkNC?N1t^3fyVYXf9=w)$UTlOY87TS^UjfHB!BOtw5fKFa0(=rPzodO?
zkKru0Bi`&wF#9-alxJxQRSX0p0%dyaaejnDEt=)sN2?Piq4fs$2S`BKQnCGQOE;ai
zN&R$crkEtGd{i9?NDUJy4x-uSZW4(NRGsXM`M%L(2@BNg&oC*!J3%I{a*6(nGxi~m
zV{2UgRKj@r8Z>U49#t!aIB&!_jBhc;#%K9<9)9W_iIKA&u9nrO-n6wnJc018n-hD3
zB}Uj{f@lo8%iHbBdcEhJ78c#6J`Ux?kqa;+-DD$cUDV`ATS_Phu-JVx_J8P6>xC0p
z)SslmrNzsmDGAbO+`yxW!tnBe5(x#fYc;DiDZjP2I)KkUr@Y!s_Bu}g(6l=RySzw-
zg<r)bG%_EtwjwaT8muvvf#c8A@*|kdVsxwH0#nQeh`O)ijbue2NT#CD(*^C(z_O22
zRN@S}V*T6e=dd|0({Kq579{+t#khDPy}jjkf1xDSlPR{G*K{;F3oZN>gNi=Q*7D(u
zqKSuH#@O+2)K$w<j+4q2V*Xz%IV7LKg-3G&yP6le<33$~qIu-fpvbe}59otxu9L2X
z(8JckGyNL$3Dhnc$MwUSw@M$DN3?$y;$IqrU1N+5MEe%vQ4O%eMEO7nmITxlBQfPf
z4tmK9w@Jw1_UGmu*9HcXwZ!zD5?`n4X1wRVJY|^i{$vy2i=-fmAXb|o6(TJ#rulR=
z9KuVxx)<|O)<yb#D&2t7OcYma=|k1?`rB_RA#cpl6&#IHEC|TPZ#!8n5#Ems8q4Mm
zOmf~0vq4ni#`Gi>&$x-n`MW%OJ${Q^lQGmvw;_QZ-%^)VVcImRvs_>H83Ap!J=Z5r
z(1!cD+lDU|*;XbU5s(ySF!!XTy#AcnWzB;RLG#oyBbMvNSV_Pgv?2C`MPEQuk>AEf
zufL!1aGon|`k5>Dej5!rX>#y(o23v%20nRP-`o{ONtAJ}Rl&wrV2oH0r`rzr#L*b_
z_Y0>Edosvbz;po7A+^OTCgEljP0lk&HiDo3_Dz>yCO5vXgd;R}B-~RhO%iT!fBP-y
zMYD#ZqiE?3sPxF2$0#-YBdqSEY4b7-!5`<-atEe`V~(j&cg8F;;jheXeIAWL2+2Nh
z7cy7N^XJkzs&wzMbR(bGfYBX-4Tc1a@C`Oqj<IK5!QQRF(8HNO&kYQ|INs}i^mXQS
z`?yb5DR&~t;*8z`L2PJmN7P#hh2WVj-vTfX((Z%b|2qeJ++-Gu76!^Vw>8>l(&7BX
z!q8t<D@;(x>Emf~O%b|)L``maQ&sG8^9GcaK!HJXID1?@^~819t@4>G`|$G*iAJn#
zq4lX`w_m-djGay%crC>m=+jlhn~$G2!)ZJ!MtGgUrZh?$J$K={MJshC=7_o5*(3+e
zl_I+do6u}#qnyGBJ~JUVH9x_Jj>#>6Y3O+Hu=Rn8{-xHQ8WSQ*c^?TVEeZ5sN@a?o
z8vm_SwtS>2+?-ZRvQtXUU7l=Bn5}<1m+$`E{~(L}N-8>w-8<_}s)_^=<rd__S6X5=
zb5P=T?#oUe(RhKnQBA8xcd_L7<`C846M1P6$0}z!xfJebe&Ga~5OrMWoNK4NRJ;mb
z0dvEQj~K6GgGy&yQ(bSlgl}u{@|5)HRfFkfS%DMJJ{p-?0Bdt%^i9EF1{Fq3hiRkA
z*R=qLxF?pae*Qj+r?pt)K795HVZ*8<TN_WnhTk;Q%Hpns=U5?Zn`#ebHQsDh4E3Ls
zkhZj(o0Mmjt)J8p{U&`?sMh?1Tzntd(tXoTKK`BZ8jkEauHMS1$wDy4jg#NvlR2XN
zi=My^zIu$M-SZwet86Z|lgItJe!FScz4v9OlLU(*+c;%;o>BVH^n^fmrT?l?^HMMa
ze$DYnmjYRq(VDYZWf6+i$+z}cSA~SYrul*oy}yLxrRzoA?_*;rm~<I6A;2d*1A%!b
zww5FC8g@FGpbw2(xDlv&e04n|N<x9#HKH=SytzX1x^$7(%bdJhDapQmtS$wM>O5nT
zc;{^k%elef6oLvzw7j|Z3{$SGE(P~3?I9K(>WkCLGWSeRy1hwZ41!`{LukzS?Pax-
z;z$gCO?RNOys3&&go1?o<g`Iq_4%~pH<qhDP?p`+r)zu(82SYB?>r<hPR5B%GCg`W
zyL#0EvEo!P&}XIh#8hu8Uhy=ysA=12nHf=vQc!o_b*njwxfR5%r$jy61359in3go!
zKBr_TCs+F2LFDr~Dq8K>ur~~=C!uL&dmRtGsDB4c<CB2axC!Q$fp^X?!(8!7^|KQw
zhS@D4mAeJcwd^6OY3XlAtgQ6NKF$gUo2`asdRSk48r%Y9TCUOeK%Q_to-^uVYgvs=
zcj=XkQeRs4k{hNT()Tev5qw(b>>*9QQtXoIP%Q*+uOLAJhQ*#(#AL}W`{xLcX9ayK
zxqI&>l?1?M3?^{<Z*=$*E2bDv4^E|pt4Xk&${)wyGVPbtI%d`m_j%I9#9h_y10||j
zZS8bh8<$|vE^w;kZnYyN#HwLk5^%`6<RoV{)qiFtjrt7=?+}_FXCNGUQc@rpIhd*5
z<z{-RN>#W_%J;d5jv9G*6*&T83^N*}!i7yNhu112u1hf?&_+Q<dhr|Fpwp@AiFHpE
zFqfUtvVC}v*+Y|$!UN;A4JGy<nQBxq5~kK^4Mc_Rij@BHI8%$)eJmp7caP`P#4TA3
zAE(#59C@O^la&m`j`zY?!lGG&8v{^LsP&GECVfQ;-ZeGE;kVKHcN8zS9~tGzb%ndb
zPJH}LM;t92lnjdI#!YNh!}!55e4_jMG7CO>uFDxcOcd%b=##X0`I&p6`-uKZuEg!r
zaUX7)C3iTE2DcI^GwOmVvHdUSxz1@E@Em5`NPFJl=6{2i?tRXs8!6txW=gQ;5M^e(
zXjA`U)&g=$-%a4IY5deV=iDp}gGL2<&q=Le+3dR-;o-LPtARRr5u6c5eLN~KFj&wz
zSbU&4{8s+h?BUc!8xP^~p8Wop)6uhBoDX>P%<kJH87ZuFM%5@iK@2u*rG^y^H9VUc
zj;l$nN=>8iG(1!?ptW!*Q<pz_1q~3}Y8miBlZH~TyQ_`qJKtg`Q@6x}`3DMW6ZE6H
z1oLzBJdTKxyKmR&bZJh8sEuzt#V!suqCX8bDv<z2HCVPK0a{jG`)1HPNTWk>UY{X+
zsd!+WbLU}}r2X~MQgJ;YFPIKN40kyzzLpcA)tK&+F*_Vwn)_)^aln(OoW+1#FEM%U
zaR@=I&1ZS21+yGk;db2-dC=S`U=-7*0H=2z?*G80^H^EgFZGkzai*K*I$e!+Vb(Z%
zSH`%9hX=Za4tF(|HnF*|gGuWy&;VpZ9-4=`wZY_%KMwzv#UqovTJ^}X<;EQ^{pwr1
z7tdjv3;Z2@ST=LgRrPrxb4N}QL@Bywo)mnxehZB$Rspl(?B+=P?{%nUnZfaGF#4+R
zYDWsep$gv}swivYXUuXNPr8`xu<p5A<3l6>r!Y4aLIp^$?Iy_lPaf>wwxl9`=o5xH
znump|JoWQFAz-I#tIDCC%1_?(_c-z`RCXoc0hBJR8e06XxAKQBf+-!3RpAndjUBEe
zK+hg!h7#E+LVg(_%8-De2gS%)hH|p`U3JQ?d#P_x-b!PRxV5CpEoSHJ@aomgC@v-g
zIVIro+zM<sr&F^Z>WGd!oOvSVN^(Pzvq>>HP<b@mzT6}MY>5Ryzm0h{AMBzUkARia
zG{9p5ng$+BEOR;Y@%LSe{kV~8%Kzc|d%o)hRJpAbAFMT&)dmk?O*bC)tS^5`tB0{`
z2+vhZ-D!TF<lp0ecWnH_p;oTEvmZY!)Jyx`)}N2Yb>4v;l0su1*K#_^;z!;g`HGg-
zc23<0G|C-np{~jvk*I!eJ=N+Lcm3X-J707R*r(J?(*r5`<xw>FiyK-v$JlRo+usdb
z;abm`Gu<}%*l;DuYs2}NCwa3u>m?n2o0njW=>@@N`x2gB7tcJ|8O_BJxrSnMR^iQC
zVjaLsWgwz{TjUHmXC%lunDTzC0Iu+va1@`CPq4<rv+*e1Hx)HieG(?Rt~wH2h6!Uj
za-D6Pu=-kgeDZT=B#WBW_C-V%W}$Kmb;lBOsoMJ4v)PkLJtV+VujI=`o|u&nxRG*~
z4-cQ&Q`eU7JR(@;AbKYvZu>~kT&n0V$K_dSx{EuQvsj-@5lRs2P6?$l;*Y&RbC(c^
zf3~e1cnC{;HVNiRqn9er4Xm_KOs%ZWm{Ym4s%J7h%3@F3D?RdV>S*L-Z3b3+Oz0KX
zIz}qy+KURD!Kdco`&_Pamb#voILz8v&)$7cnR&ZLr0FmTz?P%kr5K(3kZ-$cdGQ`K
zdR<R9?fNCZ-a^nb5wFU8n4jt4+Oi3r>9hcjJN?&<T`*zdC-|F(j=c%J(W_8KVow%*
zjp@`Oj3GzM@ZtPP-W7o%%P}$4YKCeN=OgDmITFLttF})&Z+$DqN=-^c++{Vzu@8hI
z4?j%(5|xfA9=&js7b5JNb(IdUWYAxZ;sE1E1{kwoMSOs{ExJdrN9{NUO{23#eDw76
zOoL}kKf+p3m-VdiDZ}YUgpi<?DWhRH2HMb}Gj$#ZNhp`#B92Wjcz*h9^}L;@&epRi
z3jrkQ=+8#5JG5^^<L!$OD^SWI{Pd|B+|>sx&c1^)D|&TGDd%`NXtmsUCI&+gBAPu*
zHJ7&?tUK-5zH^vDucuzowARy!<BlX|sd9Z0>{^gVebU{KQdE2~%re<Y_kn9v`b=`2
z;5icbdJN92XW7gJgj3rW1BJ2irHYS=0?%NH8A!$tmC2{P0-oEXDqg+^W@A(falAZw
zj2ginffEDmM`r^Gh|gly;2s?xqQ+^z?PyFx`9xF=v$&mpdi9WeeBF8fl@5ebJtPJ~
zQ$zJMxKG6|UO}DXT%V+xW1(WM{nHv(jr{A~0^-Ix({i`ZMSU&sBLO$Az#1&uQQ<0s
z3zyIbmoIA7z!+Rp8HV#RbyLJk&0<{roHgbbhC-QD??(gM7me__7{-h5E+Qre8&v{{
zELC@1qZGV<<F<<ysCST>$>(I5OF=Ug+xzEhKWp;hc3!EMq^jfB{hBhylicMtt$;O+
zRZ~S45G-&U3k{4lQ5X~tzGS2c_llvk>9Kz8{H0Jc56*4Ff>bt-n6_=LDaOo{AS|-E
zGQH`^WB;6voG~qb*SwbMB|R=0Q$5p~aQ$I*y=(>;MQ{(2K|=%)S}sM0Sk)R-c$9F$
ze0a1_|Age_2OJ49r)W~%e=l*I0Mi_DyG=5O1)vgw`CS4;!i(g0ye{?f;qtcCUpB#S
zNz|NqAOs(i*6vhGu8*Cpb-^3q_-ATQ;*aDw2diRDTe6rwe=g3vlQ=sIpM*1xa4TJT
zL@2N~n~H}@TFsPrwSIJ^uNzd<fMCb#;OvN^W;D|pEQ)wM$YByA=j!Ss-1NnlLy0$$
z)uH9L&w?rs!R(k03B!Jv=EvC1#xbQXf@R)(05k&QFJ&<{6O0K_Wi@V%@tA-c4~Ci|
z9bs`?el5#)f_ZWI_XnQ1&g)rh4Gd@rm|OkU|20gA{Ol3(6;;!Znf!zDuxLmHO`HNc
zqRO{=>481AZTlRK8kO^*>{F-j3Qc*HY|D!LT#T6G^RVa*<{@NLx>7Wlp%HdhOSf-f
z9lFp#-e6d>E!56mSsgzhCCkxrVm9TyTHNO@nwt3AsfoceQ#>YJ|53Do+M%Mv3~|fo
zb)SOjNRX0Im6p(;pGjm+Bt>jlCa^UqTtM)~!=J${&CLm?vBmvg8)Fz0H?~I06U0i7
zUe*jO6=DgsvVM)(QED8vA4n;h-Keg8;QK^*0sdJ3@dLSsveNvqTZ8X28SzpR;5@h@
zHUFtoCN=h31lY@WR(0K2db~4S;fuV%+U5-L-Pz))QROEbcU<ohxqxlfxfObD<&vf8
z_?|J*4@rh{cg*@c4Gz8I`<ABPFTUP!qTRT63DYMob<`l{)*ENaL__YU_TE02lu$~R
zIvRyZK8>Bn8eI7CI%_Bv6+!TN>}oBNaH_Aryilw!bwb{E1x|J9{jgU$HY=P290Wth
z2X<0xSqY~H6_^?4V9bka9dymx4Zg#H&-~q<e7I++<;6Q<xZS!cbiSLjp3rnD)}0^X
zGJ?<2udVLSuId*r$kO-VC3svFRL^B+qRfVj67DFeM;7PLh^>xnl;DmPPbS=fR!H1U
z;S0lj=^|+7Y5VxN<|RNchGNR$<7Z!z+cV8j-ceebI#BNZ8SRUA=rntQ-ASpedS>a$
zRqnC)y0+}4qDR|UAxk2mHhaqJ0`;QSMr7Ut3{&xmYcDC@(>z8+YA_P}t(EbKlW|<C
zS=@+2=W$fcCF1L#>KMpUOnWvvFnL91#XT=W>EiPfm4|sz@3~zqF-LGDAR<^JxSy+r
zt0ZAkEQ3BnDaIhZb6efr9r<$Nmf?5CoAshoTBR#@WIjZjasxD<`YNzXQQKyS7%hAj
zc3S5Rz7YF;(vKj_Rb!!p0s~|RKFPM{wu+yBkFyf~P$-ZWdFXqn{zpwnokl5cYRb15
z1C7^^>++GJolWOxQ1>%^8Dm(}vm}yXcK5>pUTYf_S|BQoA@t>Ds|B#_9=)lnjqkw0
z^l{U5VAfx@QM@OdKJZ-PgcZHfixf$D9Ys&8(^S2B+6``+CFk@G2t7463+Bb6aZ}Am
zMuHcP+vqJet}+c{Woml6S2OM0gD(TwFJC?2_UX(-J6<one@-y4&HQB%8xWMD?FC~b
zfNNfYnDM)6miF9D&Ly*{)envJq~9l>c}XSMN6p^TMUy;cgLZL~&WxI9M-Oa`kbo$Z
zDe8Cz<MzeVMrW{gS;Z?z&=+mki!bUqrh!kyAn&ROr4Pm^j6j%Yoln$l6mF&pJ9FCG
zQq|%#!ZO94q0*5z=7Y#=@Lsoy5VfnAfkMYg3XP~K=F$eAg4sc&W6J3<p+=E3fih8{
zXJZ@;DhKJ&oM@L0$hn>L8W>ItlUn*1Up=KQ-T1tzAmr?zj81nuSG7vzrvr3nGb0kM
zBXEKnA$TwB?Qd=4&DJtK1XVx86N6ip?cs;chD^LTl{%7H9D8f99>S)Ee2OTX&|Or1
zGRHhI&N@D%{rK&8BQxcQtfecfJnlr3r|g{LUHK!l3(rfGs1%kp*g>C?c!)Td#pA9a
zFs}l?tH9_ouJ@`-0dnwaevzNW4d3XOEpYJ&PJ_7ZeoNKb8y=dx(=A5B^Yt+5HDm-=
z(%i7dvnTj=i>j9q7dRRgX0JDRe}CGZD8PT>B)`KM)MZ>jd^xx-pgKUXy(RJ>&)LFm
zxM-R4vo&Sp`$nISmv4%#ZnRu@fvu{`B11fPu%f`cAq2U>wbHTNw=At#bzZO7(+#cI
zI68XtayR&)z(l1^oeZA?jdwDLN1cZ0>F3`%Hl!sE1wD-5ki^PFN~$cWYLb9}5VbS|
z{0`2w;R=j7&}M28QlC0TiOXNle6%4Uwg?@z5QJSbFJIC7CKmcSQFu72yz<nwnjPz!
zrwZ7tgj!jI=~PP{G>nfDn_1@i9RJ2*V2#2n*Zc$OR=%#I{6*__587`>X+!(cCN@xS
zII=ruHR-_!8qIx#AMt$JBxENllZyoOEC#%_uE}jcuXv*ml7NMC*g;~NYyaHZAM?cD
z=ZmJv!~y7vE0S}UVgS8%*J3?-1v%V8G=xuq6{Qan@3n&=q1^zT@e8KBgS5)Lvdm8v
zgxSav^6-wjUDn?W<@C#X^Iiw7Dil}i*t<UnWT@)vOTg?Xtoin28C9hj+R0QT>lCw?
zczPDxmb18jfjT%V&wN%<=99Vk$fse^qi%~iW>dzWsd!9g1FTXLTqG7Gu9bzTN=J{$
zaJh>lhmN7THQ91|7jtXU7q7*+J6w2q?pef574o3x6JO{o{%iL*6vZIvXBDd(w~Dx|
z?yEd(xRF*mlJjv2+Bji1@Rp72w7Oys$7@7!j_zx>UN=pKI}CvhXtvG;OXji>%bjQn
zszR}mW?Sy-_S~i}8}DXZ&qf0)M&U>@@9_wgv^dG-Z{;`!FqLlfvYN4*M91wyGx=H8
z-!7HO2>Gj4#l05EMj7<U-{Q4CnY7YF-*i_yRmOw!o_JU;!*{-l$Jzj5FeI3M-a@yj
z133wat+@<|R%l)Ln2@XMRuGiLD`&VmY<pyLBrv{SeI*b*HyCQ&64(Czqptj%C<Wv^
zwl)DwBCk}bVwm#iYxl8Rg}3bV?Uh@{ChT7Zxzsx=iaAO=9~vpHX-d}%Oj-FN?%9;~
zxbv-(vF4F#ov4DD!(#SN%to9yT;ErfhYQ4c4SwbEGV1ltU=+5GZ~i#eYUi8MGbXIL
zaNgjcO46<VEm1O>QwD#<B>s#|{PaH=J>i2_{dVkzAmshvgWygi?UDvPUc|TuOcE{u
zGwWM_t83M#K${~I^`!chzo)L}!JO%x=yerQiYOei!BFff79F{^%?LH>^<gwMsxITY
z<NK7Cq4cG@I3Tj@-4VE%OKbz<0&pS(FNXmgSg-6k62K6sx|8Y77@sV!lKC_>Hc87_
z?8rH1)*2@zQ6>|Gj|YTyytIM9?kUtKjn(E%Zagjz6?v^39yz!X5S};LpLbXa&R#9*
z^L{+7s<n>N;caid;ux68_|BDPDfZ5Sj!B4!+m~TkefDR8nb%nl-=u9}FcN%L5+4n0
z(l=(kENDbVrn`q@pgf3{2J55PfrO`Zw>YA{b$``3mM-dk*e;@D^_<81k<)73p(zlr
z^nP%~OBkQg!gmxu3U`~_W>l?*v5C7ieQ<R=+9$vLULv<DGI#T_P<Z31M@KH*YdUq+
z9eT-F6C!FA{1JL6(A)<*20Mrkt4lweJYncLXkOJ-Bb;Nz{uUE;R!&YW*0`qfTI5rY
zn%f;+K9yNzR*R^5cziG;akNU7pxsyTW=8S~HN9OGcJpkkQsBt>^SW0DyDnXziJs3Q
zTdK%NxGkyR>P-T|Z~5>xl7QFH*;_a4D(hEQ!qC2|*K2R$DX`;{RgI8v2umO|8!>XK
zo4`KF%#Od?wzw?++Fe0HMX@$hIlN5#dEDFc6IU6-V;Dc7YF&imY(KMK_VnZN%Vt1R
zBTS93QekUSjLtIUDp%rcs2@4=M0klyH<xCMyr#>kbUyAx(MFGKHqjeRu8Y|k2g7uS
ztc$HLQc!H0%!wnz>Qcvx*TG!*hpdbC63KlY5TE5*Xx{F6%a!qK`9paQmI$4#!kaE%
z*~sGHm`4J=`o#9Ib7hsi9wdOz<6RW(v@-3<d6PLJZ%@rBwgb;*Qv;$;6YX66++W_Y
z`Ap3N3FylSkz4kDqi)hvv{Zcb&C5jJikj}C#Kb;3K55m<?*-y%ZR3T^Cbj7zL#TEG
zdtH}6ZE2WwcdZzD`Ay%V_Zw9=o#m*d$DypwFOX3!BDiAXE|c9?t{7|b8DqvbRbY|G
zjbjKf%4AS6w;r&Pz(fLCdkA1s?ozo1*p?N_wD4bje`}*{Gmxa10EVba<t#v>al~~%
ztd#_~LN*o9v0!Dcuq`bKAfV~5$k`=^I)hYBV4`%b`Mi-07<hz9Az+i*^~7GT@fgV?
zhcMHTJ<L?I0lCU7m5;CYoGtK}vg@%L*DM}gWDjUdS=(ZKn#(y=0O<+g{oh*i{QG(-
zdpn|G(l3n2TjYr-YzlzfWj(jxW_#gopETcH9SdO22_*_Hx4gs)k^t?5cND~LF%Y7v
zE<quo4zeYl36OyL98g5!yJ!l*MF23)Ym3z-#)yPKu}@1$z*IT1!HmE{0&-d>m<cDu
zk@!U*;4S+XFi0~1p)6Y=*kZ{xKn;-jK6!0t(HOn+SUk`y$t+N+5+12iGu0lw(IwsO
zCPxA!+@(TR+N3V>7z)J#bTvtUd^s#OaUNC<i4Eq-(XP_!E?+!lcW$#-zQ<_#@#3AB
z1;ZA>k0ij^wJ&WccD)H9ltsRdqM6w;iV9Rlqz^W}Rau!f>63y-R$3uOX`2=c?=dIE
zmTDg5iHs|*JQr%|yN+Je#jZ4+(-0rD!TUXzYS(x)b&Tt3>5GYQf1|V$(;+xjL7yx9
zw>8y+qgN8}uFSWgYvyWX;E>i%AOXC&M6Q;Jgq_1i01{&iN=b_oMgPnXC`*4pnfwld
zzXZw<ZWxLw3Z^9i-F8?SBKu(|!8b(RI(j8*x5hIe8Zvmwq?<GZp$!<p_YIN`G15@0
z(i{I6^Y)ejjxody?c4(wtd7yh$Q5$LWixIa=U7UZH~0?%wpVl`YYg)628L<I0~MC)
zkn;)4qp;6H2MhW?hd@<;!JL{rV*fa0uCV14Bt%P!62kVQp+<OD$iFg~&1{-e^Ao2o
z6)lVU*l+9UQuO%SQjqadzAIT1o%oDa!OREJ$vm!eD=OJb?V5+dIig(x1{F`_oMGY@
zY)bE)?X8&`ufAj1UfLHg>z@8m5;^^X*W9&lN;UUl5@OD^!7Z3^j9{G5XffWH;bB`m
ze?{M>V0J@J9F0vFZV~9<jJ_K9-8!H}wJq0u#>V*7nIj9%&?Bf?Zta~!oWp>dz_(B2
zx|8#+$Ax-)gek}!ycr~fh-()Ql69so<;s&S-ZxDyShT_FU)Bn|j-`9Sc_Ps0x-@cm
zf_Mpy9e`3IXI>&Gz_<PE4ggI)l(zrRqhQc{VmxZ>AU-k)eP=fint5{5uiJ-HvVOX+
zc`+K8o5_P!gG#q`VRPqMMYFmf);^T5>`nsk>FdD{P4CaXf)8gjHm(^_JHv`Ma!Ekr
zU?CH=D4ElLxZ&;<Gkc&v-fhC_tN}AC;S9DQVmViBc&@s?q9|+*#jgymTcH2Bt4P2f
zQVEXjzY8R=9w65ClT?Bt{;S@9fCl9>h+~R5<kOoQIEmy#3qYC(wrJ5{N@2PY0Xd5|
zf@_p|-OPtIt$tFO*}Bfh@P3KST2EPwO}I`7?rm}a7{VLlf}v5Fa`?wN1<6(Q=_|AM
z<j;OeWpO*;djpqMPX81SzG~_6oM1MJ8hPhCnrcK7$J{W&9p`JP(y|S`+WqCVu~+f%
z13q~90VcLFp_Qv=^DB$8oE{x8d~X^68P3IAuDRx_q0~bV4#;ILK$O_zjtVk8w|Twt
zc><S0g_!7~DKWM_kFJQQ14peCB!mXCCjUd=VZUbiCxLhR6^Lr8BtXOf$4qSOWJJ#T
zzO_cdq?{n-sCsa$J2UgRxZz-Rg6FHjSjr;i4V>lErffU!w--G+ao-jlE~g5#DH{tw
zrcgO+%q-5<qP<gn@^0rlO<yEOE)_ZI=RkE$kdFXYW4&CL(Ankme{9Q~R<obt6$C>D
z5zFhNzH4T)sbStp><-r(7tN-$sq*|BE??68xHR`zpWhVJj^)Tm4N*krh6g_X*5R;R
zyhwu)#>zOZ<n=demqF<aWdku?`n)S84JSV&UrS8Py6+7+tZ@t>Xt0w*P|w4EZz4jl
z&B>UzJgyt5wfudHi-%XXCY{|d57Y)snrSDhXTKZPS6!$8-zq}vh0v;<+Eyl9?Dbxb
z9;5A3pzoxcs~i|e%@in0H_yMNe3@0=T;I=upNc0*Gn@GwjEUfWjTRp<!NZ6AqJ@TR
zKgcDrpyS5ZE)YN!dq?;?36ON=*m!{#cuqr*Gy*_Z>hY6CVy&Ch&z1?ifOfojc}p~S
z*IKcHF^@EPWW5sZ4KW%5+NnF>Ta&(aK>j|$e;(Xjzo<yTVjiOuS7R6FJOz(p^vjds
z3x0r67oSNSWx}<p8mY}iyo$bL+f_;#=J=CA_?6TwNU2d_zz_%x*iu2oC>xQx!5=EM
z^j$9>Zl?oSr9-GLUnS4Br6gx&uO9)IN(b*uZ7YQlh4uM<%ROj@Z#k#ZRcYL9;+=kA
z$d=-J&-<1#rWDtI>&7&9iAuH!k-=mwJCT0lgx5dA{VOT|NlSz<<8_Ei$Zjprm>o>}
z->8SBMj7{t1azFn9@updSn<duho@tRsY#;-_!^pXMg<<Drp`^~RfNzuK3QF9y`qsN
z*4D{Mlse|j1L)~YbATG|Z+0~|d7}u=KN>8Jr%28WG`5|M2RHdO0RavX#b)>zozi~!
zH?MpcRaR)1BuW;C<h{<!9pR;&E28D!;zGkt5JYxzYMBrm&DN^eMi_e_J4blWH(p1b
zd-$XemW@@xHm@t<VqdAc3#6OmE*1AMzj^uA5DDtWlMtN08fZ3tcME=PF@FW^p0x14
zr;_%f*Y|7u-D~^RApP%vW5(okOhswY?(#r>{VY)k2c-M;Tb+r?o5e10aNOel>MsW_
zibZ@KKk>%?!n1xXw660&pz5ebO0M{D`IJ(?Kz8%I$s6}?t`B5aSUqoAI*EkPuuoTl
zGd2y%I-Hm>E(-r?LSJ&JJj3m%#7N_SD!<;7%qu=S%cgRY%T6ZM$lpC;GXl9f8YB8+
zDnK^Z{#}<de-BRZ&pyY`V!1cB`u|pq=BGaTQ(lW+g+Kxi^{5$oZxQc4!$5dPo^3Nn
zdC%NFPdnZIioU7o4a=(bu~Rg0@$3W4KRF-2;N1_S6kmW3pLk<+cB^w+&g`yv`QrH|
z&X>bub;s{cD%gG>{Ge-xoHSt$STdzL#o4TC8iLoIYI%pEbr}<TQw4URxoIO8dZX~n
zm~bh*`?q0Q^NZinKC#@L+nx$Of7wI;`MLW)F^BoT1@>O%S^u?dR56OKRY}00!=x(m
zE0RrWnR(yd&7fz$HS*fdEIU+h`^}d`wo;6a8R5BWDi;3kjp*`YoK*ak@#b1Jn`*<e
zMB((Y#>~J%PFUno6OR<S<toV!hF-xkcpO%$*&;2xq$F4Y!FsZs7q)F>13P4v^2(<&
zlkLjRGPZo4|9hP4Gf-Q-zzdy(a_)Lbe_EW&ag-j<MeCmebUwRN7Ub^4{f8^`W8&To
z&e>tRbM{?d==V80+1#4xePn~_kBNKBpA&Zowx0xmlb}-k-zP!;r8Qn<P5`%=8BHFN
zXfDicPA{iybuMw9UKmcv-EeZhAH`BOQ>Qm^`(zOu??M8=>8RO1SOo_NdUwtZ{cFxm
z4!c7X9ndK)!9$t`VQ`#e$k8Nq{fq5Z7WHmK?SU@xq6g7tU$oybv)rKaMj6AZrM+4#
zu-u&q5b`ArVP~Q=EY6xDer`~|x`*#GBE7|~ReWG2w`v&~@@@h-=una!49*KZNPuwi
zJLn(NdBBi^8Qd`QA!`Eb5WY3;GEOiIcr`%=I{#VaEp=aZ^3<H4wSvCCSb-tNkb0QD
z632Tl{EI<{%~x%tlB;ady;R;yDxl|Y02^+Yh<*yez9Nvzv}lNycFu$Gq`zV(;9D<+
z6aI=JfHM{5+pvGe;!j88Z-9UTg8Czr^Jo0w4@aYr2u@78ZT}2HB>W8T{~xL>()hiV
z%)f_9ejdGlHu7)5mtUFjVjR=7F+K&y7|c6c9XAm7c*wlz89u|fH{Z!UQ`u@@xb2AP
zx7hbK5AqUey|p#-&(#gqU|}HyRqVEY%z5lsc!ASlF%lpyzwHx#VEsC^`P)vBxBig3
z!K#6D*zu?uo1%i|Eb^<JlLmN{{UJnZ%fTYwdzQ3an>DB<0}}97aL@z_cAMD|e9Hu+
zrD-?x^IPW?FhJl6&c6TS?#L7TBcSy64*luk9|5Ibbm-6H<xgY!|3Q^6{t&xA$H~uA
z2X4%|In+feGI~O`Dojn0R**lT%JnnXv#I5<sQk}kh!#voTy4bEd$y0)D<F;)gQvgN
zwj#p5-kfkV&zO$AcWU?ucW4*dIU%fz*iBUKhfG%8HZ^<&hC&u6-@sYTBkfXEJ8bf_
z;k%=(^e{yV>rtYNbuy^_%(1#oj5sj3*{2k>;6KIr%>r1gp`TJ^jtpk#4m8Z)yxVZO
zEPjLi0fVQg4`XTHO3~{&@TG{=N4xw`P`sEUW*!nT0ERT9@OMX=EU20`9tSx;GwbBR
z>XpF^W#WWm@7u@a$uRItYWQ~qkjFj$$EHdM3*iz8xSwi?{3L9&qplE;4sNA3$6X|v
zn7fexyA<f!TK}BucWpo-a2ZEWJWt;U-8eBr0=7(_4|cP!q8p_)PfCG{ltYDJuxQr0
zP?I8P>gxA6XdmJhu^Gh-mJNTbKD^!hdYKFP6#IQA&Kg1jdMh3QK^F~($P{6zP2wf8
z6-XUyQ!p6R>Ms~0h{-Khk36!dkYD2P%*1z!kS!x#UvIK}tQ^rmBmucV@ffUBr_g%d
zn*FCDO$ZqCs5^!J(j9n<1b{m$cciLN&Cxq)prQ^#w&vi!6ER@hVQ^U7j|DBVEwD9|
zV6-&7c&!KhXf!x-aN3W+3MIN!^`0gA`D_VlQvLgUn|FZ`c{BV)5@55Pup>8jIcFR(
z;0;~5qQM3N6g))&&TXXtK{Dq_0JJUR$9IT^79JeILhyIU4YC(t6dy5PRw6fnA|#?g
z`#D72KmzoqN<N0j*HR-}?QqLGA>Pm((SFxa*ZRMgI8jW&@Fxlg8RHS)im)^2DEFwg
z5uT&^e^vV9u|sCCy?7xcp&$Mvg|IZgrh`^RPb89n)yuW%rmJsI-CTg!G%^yla?|=K
zxXNIv3s(8|N^K<S5%DOpwGMojaFZ2?-`RBwfP0ziV+8SROMTWh^JnC2mPs+#=Vf3|
zC6!_ehz;K%TaJBCDQ2Vn!0sO2{rb21ygRr9ja$b-iOrEDV6FGbV3}zw@$W?!XqvOI
zmyI@+(4e#p^@38H=a;g18TK6gi_#x)X$aUZtR4y2d_Ha{wmCXD6Bq1<tP>;w-wK1|
zQPX~~9C^U>Lm9Hw9ACKGm+&1q=k>Mzz;-1<N>(&_0|H{(tC*$9b@>r9AtfOkx&Z+L
z1-%;i77BkW(1e`3i0)i@(hGiNGv6c9fdayfh3tgffUae#EX>FL)cM&=F&iKY38*M=
zd$2r+_>)J9Z_{fWe^D5sArFf6b7bpVJd)U)0pAg<LA5>Kx%`X5ACG74fP#4&{A+6s
zK7!aBldvOHjw*AVlEMqa2pOG`$OR{miynis+lnD{o2l?kF(^B70SY$eiJL}kI_iM~
z*qdcIQ)0u$_?AQO48_^M^>3lh-wJj+d=)*cN;I6FjM$0Z$|5EiT1Q#?ZR;XiYX}!W
zk%ECu>d}8n(YgCT_YB%$9KVInA_2B%kXz2ZuAd5<k$*3GN-<@$M=2kK;f3IY40t4Z
z;l}ITQE_71y;casLI>GeiH{*Rx1)9hU!p4KhjR%E-~8Fpk%ZgPqQ$W#;KpwLf8JZK
z>;@s8Pp*S)h#+FXg*tB)Fb0T0Wvyjxsz9lbLysn<b|kvyXPB`l$iZUc-2PmwnT5q!
zi+7(lJL{n<rW#71e138T@njTuqcWui3Y@Ul6mlmn6}^=(%l>JwObhR{B8Fty1owno
z`DO~<`rbe9d*`VH6_tqo1Zw+GjW!9$ujU1H9ComC%ZTs@avQyH9>+mUo$*<t!J8&T
zUU}L**NqpKkPw!*<{mhWfozIpaf5o{2DlNXmpT#Du$e4if4;_{LH=Pox@*x3^Pe6Q
z<bSXqo)awwzD=IZSnr>oPG+(M#W6V#Yq}FPCII3%%Jdf0AFk6oBE<6v|AfjG1wpK(
zQS6e_x)b5>;nbwm7fNcm8_wX}{E9#XrYAdh^nB=c7dbx_5cS8(!2ZsPfvz9z6YHP(
zGvx!tBs4l}C8-_0Ed)*}3Z9@Y!m7pEHBKPg3?$GCNd?5r^tXn=Guc!>>!K}zT0cCv
z15ku&60*(ijocCIcLLxwu9>)ktpsM|P(KDV3##UWr^gY4>QhGRe0-oF7J`a)%eO3>
z8QgE)xssQ^tD*l(b@Mb5FRDaHmr6h`fhse9V;$cOdkJ>m1^A{s3~a$?C@4{j^SIx4
zgG04Qz`tr@!V)O3ApRX!wwz%fm(XXNfPbHL6|naqbl_kHCkju3&X!yLU0Y`}KRG@=
zy?N0Y{JKE%9{I1v*QYs<9qc`&3FtO3gq&6L`P4<7_O5eNJX0OC*h~>1KU~GPiV>zi
zmjQvGyi<W5M)EBz2z#93_?E6eEambRs91k4zO=i+>A#IlR+0KtMbxMj0cE-yLQMA_
z-f+QX{Zs_tdx><Q3?KUr%H1}X8+t8%1%0A$-SiiwPj|(iVhu_>0LqAh<!nAuwR(pI
z^>O)Mtn`1OcK-i|Iew1Bo<(meWgXl_5){$A)RYakKzq<L1EnO{AIH2C#{t>WcZNB1
zk-=m;K(*;OiQ0)+M6Y!e4EA1L6(3)@5_|%@ICunfv4(>H=PQ5rx}1LHhy84c3cJO?
zMJcCcurbk)4Y4`omc9bu{}tRq!OO7#Rt|w30(GbpsD}+z+-@l#I)70J1cIvWXW9OL
z7x{UR*;tTSHyp=KTpDak*B<6J*qDXY>)ccvXF(2q@&RG-y}U7cOCA|<sHGV99<-RB
zn{I$6isD~Pl+`bk@&7_YBiS=6AmQ`|`UY3OO@9pjXLBI!44R`O|6(-yka{H_-=;$w
zqNWCjLO(17`wMWmGW>HcmapDF+rnSkM;5}BfWBb1UGwCXk)@2+C7zvb2FC}t1ahq|
z&(!g%;~nIel0Z19nqLl*0OK9dVs1E*049+HOMKE{&^&**QU~rrkp>6MUp6D92l-`&
z0II6%ev#<4DlJqtw7wSki^7^fU7Y#6G5$h%`TdIjr@b$ahqB${9#JaE5-oPwDxt+v
z$dnS228HaEN{C6wHjGvwrU*sZCE2peHd&{VWSvl$8L~5D88@?dujxGJoafLv=Q;2D
zobx%K_dNeh+>Lwgxvt;-{r-N}Cs!Zc@iq{HPg#IZYpzdNd^jpNIyWd@fS^ZKIxunY
z&FImiFMmbA;jBXo!Utud`>6ULenA-8zQB5zN26BcQHQzgH0au3S8o9k4k?2(qehEn
zbynmPpJ6}oL^*e>C^>!&38R|8#@d?$8#_DUa9I2d=z2;R)MuqmFjNrPoT=9r2qcpI
zJP;2G9xU7PL<;7J+5l|#dr}__HjG|0euXc?-(7L(Gba%HmA(iIbg)0MZ~96Vc!*Lo
zGo}Ym0`0@*!&Bpr`o(YY(9}>Dm<^5<jug=V5!=z&oTQgulDkJXc7Cd9{Q{Rg_UL-s
z#(kmpMCE6K!@tW0{*QXK5Vr1hZ<`ekb0=eU;SNfx=sb2^TS({0J%%JDaZ5<RN&Qn^
z#@Sc3FA&Q6qKXQ6-d%Za<q<l6-$eL%#zF(meG~oOZ*mHK#4Yq^TR<Fd+5)5{*EBcz
zC)r!zniY9jrcM<lYZyI{h1v<+IRnRhI=2h9c;PvaF;&*fZxl2Sy8<NLAB6x)lniGm
zj}(n}kl@QNwX;#u>4K-3hEpKmqAK^GM??n$OwC7gS6nogJ;34nfCR_%?^vZSKYN=X
zce83OjwXQe65F6Awuf>ricl{9&A!gE=+-VPg!)Ib?vH98lT||wPjY^=(zs86ACUfj
z`~dFIOZHif{}q3F?$wP4*L`o6<Q~BU(EW@!l!2z+L#-og<EXsQcPvj`uM$Nz#^r@-
zI<eBS`obEY%hpGo%OC48Q4iM`T>n^_`$_fS<%7+KuCUJ;FxQ!bpk)Tc&_asv*@)Jv
zrrZbE{wVY`QcIGx#UxOfP>>(*hRJ`ozz$^5O<A&2!hBk|xad@MDUNxfx!#V(12#Ag
zk;uJkS#e-lu>gdhEB&soj8ET$DC-$0N165hK(6#<6L~aQ5mGWE4ow;(g478(t&w!Y
z+{?M!R$NNt34`~)N|;6lfM6=`1@PW_Q}WZVu%_shSL?5TNYB}lJ~Yx634ITgNXF`^
zz}e`$K#%&LFTsSY2v6hcW8<UzQftv9QIl$^XrfnT(Yqq<8J+bh`Lov10an=o$oLn5
za2s_0ql9UfDv%%L<pZa#H=a3B21PR0Z3ADVhp0poRs;dyhpL|3%NR1zd#>u>Y5_B)
zu!>{x8_BMfx%&KHWn<2}dAIEOR%w5CU?<eiG%atj{-8eWVr;i*G+KQgs7+TwZJ)Ss
zJ#%{{g%ETKUv$JX(@-XCu(CZ*xj5|V$Im!^-D@{^lcMgq+xCz!-EoH@dt&a^EAfhL
zuCsp6cYRPW&^L|z=n}AV>#+<c&ZeQ@8x8NiKFAO;Lycvm7x?oyCs&9QkwKHFODV$v
ziqPGx(Oxd{spqM2&rjEs9r{Z7D*WC?d)=hM`ifV#ZwlBWpXfJkeAAboTNhOdi4$`-
zjrK~Gc4!KxIdwm3RXKEO_^U1VcHZ@j7l+vwZqKLl!+Nw`Vc*03H1-UP`)|@gA!(XW
zV?9$CiXyn!H~2=)mLVGqwWdr93o4%XD(s`kM0iVtdhumiG;PJr6I7^2uhrR$eR*Y4
zCXn)B9K=7yobsulsL=w+GwY`K&?Bxvc8Ae-QNSVo?N!?n%BFgQemfWxSBKfsy-BxC
zv1s!hOvUS(X+a!nT#AwlYN0=(5|8$xg>|4cg(z2x1>SYt63j`XIXGUvQ)LOp?_JS7
z<}28nj5CNtI8GYXif~Wotj=rj7ISC}OUBy9XdL$?#SW=yd5|&msOB(W<x~MXcjPvR
zo=Th7f=9B3`hZ8jl*>$6)Vbj~-og|?h?P*J6$DIN55PSx^0WKSGMt^CWZzA$l4+Y|
zxZ@HE)b4A+b3;)&mq<yQlZf2PI<Gzzt42qI7UjnA^LGrnbk_1bmU&uGI+yEE!N@EW
zU;owdx_84T1h=CQ_Fhb#uWONX53#(9|Gi2gPXDudarJ!EO>q@nb*tEEwY*6W^I&JO
zZH3Sbd0vNt2^pd|)Si_nh@BZ1vc)Pqcw|+K|8`w+J(qya)lJiv@I`J3`QmrxdeEFu
z3~~D?Q>r72u3u)iVOUGLGe_~*=ACu?8gDLO;EhszHJK7AaaKH$245F%fdPU23e9z3
z0UVLRo9%d2dTdS`GizEW`EmNau>tY<25$Gyo7DP-l|Lo#yY6OpGN8!K%Y1)C0V~F=
zyI!`k#<9HaF|q{Ps_86Oi@Z~%O1Zk%XXc8!%w&d$zgOWN&6{ZcQo1t^#BHpQS2Ri?
zzyLzlVR-{Kq0}HQn)X+^5#=&A)4Q);KHVFmgKf9;SGBSje6~x63zZiY&dK%g@MU4`
z@!ZZUpX^WLL%{ZW{>$5I{v@Cc%~!;8r;iQgK+Kj8GiktWzIyz|smVMYS$ME%8hvec
zZ^(D!jxV!|@`G4jZ^bF<Rmzqce>($;?){<+&t_grZ@ao1i^bV)o||9H8<!e{MI7W)
zKk@SIF@e>)53VZnH>Z}i6&|cOl41+FP6S9eE`Ag`6m!RnAmno9qx<SQMc8TAk;>3L
z>yx(f#Wp|pRHh=6rV7nJikAR0Ti0WETT7~p+?jESfZo{UDDK=oxAu~w=gxt?lJPv3
z?!5GG_KHM0d9P(c(SW;?C9=9n#7RB;xns8X_nMk{nk>~yKefcqFRW$Z{)aEp4!8B&
zOOEXLx~Lekphoh#ISId%unrSq%SAzkXf-GGb{FR+jtSK*hDE1bk{;QRS?q8<{}v(o
z@bOh7!MR6cxD#pex5I9Q?8ub(EjdLM1bEErhY0X$xG2={>A?5O77we;o-;9%$|J&C
zYtaRzivBnMqTjxv3IQhGN@kS4YEd<+BUJ59(Wvj0egi$q#$6(Y$yv4A5<lp<+8hfw
z34i<Usnj;fyjz*m_|ev(E1%rM8P3nXPA9A=U5$N!Urtz%^KE8?2kc&gwFepHn*XS>
zuL1-9c<wWom#%VmLm5l3ox4FTIsZh$qOa4O3N1Ns^NQM)^Ki(*^2jcc?I+2B4cAKK
zB~UBlM=2{JhQ0*b1zX@*fh?3Z-@;BGr*kgB&I$7{<ig9<VXY8wRx*(|ER_ua1hXlo
zxt_V}S6)lx0j_aHb`a3`GXMs*7)RX;HT6&3VRAzA=ZRtG9L?i*k_|f_rAijXBMWUc
zO<w6dd35XI>p?2Ucfs{Y@o~{>j)g{?!|1<3W9**y{cYhM_7e!o?ih<%%8MQ7uK;DV
z3ppTLPxH-&anFZ2fZt{82?$2eBz(3x!z|RFv>dZJ!!}#5!CEYrAFsdnl7;;w`1pB9
z@+wd(o$;t@AEZIRRHM74v?+>3jF?wYNiF-SmPFOIkBT`QUa%L#5AQ6&EK?K@FJulz
z3!lZHty@6{QPOV!IDWE5{(LJE)<T$C?{ZYbQenkX8gaG>OwG_U3c<@u_5t{Ep(1H5
zgFc3T{~!*mSQ;Y+^kAwH@Z)%zuD5-i5bldg-xkMM|EjQy3NPG!q>qm^3Ae1*y}1jW
zC3TQkWqHUDbYK1kAzNNz@>f}2i2A%HYPonVm$(0tMM{M@DJj)e>lv6TElAv=Qwv8#
z1Ar31I~Rcuy&T2F51zC{^zIVCurq!4G&?O^ZP9@R0Ap1RYHS0&D4<04Iyh#8rFmBo
zC5FzQ3-BF)3M*TqM`Y6c;{am13@EcRT#lyAvZXCoJkylrS6HgJG6l#zmaD-?!4C8W
z>_C6X1y401-Oe`j7|h`LofA+~BhGg0IwlaOTI_3N12)JqJdbL(6$(Aw=BnJJ-18BH
zGJ38q1CJL%?j0cKmGqr|USZzoG9RvfIU)qc00p}h022`bNjv<4M3w!5MEz1yg9Zej
z5^Xl}2zVpST3q!vci&CzjNll{*V*y$&^=OYTXKnS<~ChJ1SUswHRWi19|!#qS=fwx
ztjs?<@~mT7!UL^uPK$mc(UGz;_x5}@kI*CNOn*e}k9SUln!pH+K4BEvk0>x*p~zfq
z8uzx!uNvMn2OHTPqsC?ta=1p4KgC7L=Q^8&dAqV1Dl{Xv(4U9Dqr#2iu7d@AngHfv
zD{U_n{wDXKlP-7O(@`-ct?%9v%ti#W9p@&j^ggV=-?dx*-5m1f5{!>-2L_DDLp~@i
z>?XR!CE2_*E4x~$;cjKUVE6r16ZzIB^Pa!wbDxRWYj?3lSLWLn_a6JfsR!u1s!-?t
zy&0%|dF_dmsh~-T_Sw~C6mG^xEfl}h{X1T28*X1;6&fX~t1_E|@pSi3?XtGDP>u6a
zUMJn@Iy%7~wK`>YYR0bX`__rDTK(FuP8vjYJTcZ0yeN_r3Gg7@Un6HqVVWwO4=SW`
zyaNUwn3ZKepzh0`<Ju)4H*IgLvMh68<rCfzhFH*<LQa_;JI07+YIw&Hu(jx}p1LZI
zf>!!Eg1CTco*(Jvt0U6p#Rk%?ck~3N%#ek82Ztp0muy6g?qjsz(*p`k@CV<_B>YWj
z^P~{S0vf!3RREn6Z<R0jB|pRlOI|tsCaBjWz&iKBsr?J<cVuT>DAv-%yaj73N7l84
zxYE4E49cyFt6B^Dwu0KVRVYX6WNK97r?flCdZc(AzB{!7rz34R8{l2db!cJ5LNt51
zJ|Q89xuH$!O{>Z|vgTrV&g%uUi9y#JdXh$7nfJTiOCDv5T?`KiKQ^yI4ZDh(r)cOw
zDHH_tbQO7SDCl?;h0{#WBl}cuE0vw^&e!85`|Ai~sVgYlmyc)HRBy}mZJlj&*<x7O
zmX&1Xr=e%kyVjnTA+*EPcpSDW+t~C&?)8JuG16r0&ACc7YBXgiRB^XvC6~!(j~)Yq
zzP(mW(OXI#%`X%a0!Q8xE3cCE#-O3O<Pc|_bFPyF9f8w)A4cHQ9s$6i<Z0B|Em|ev
zv*;(tSK)kMMnq;!48M=r;$)O(+IzWqGw;~#x+Nc--xsbdOaD393$TX}AO;*G@$jHT
zAvu!xD$C)3wU7@HKH)dD743Xvw{KL%b-fnd4f4?@jq<q?&+;Qn90eDzzv<rJ8Da@i
z0r!736}ToYR<$kR0elVJs(;wQ$~oJtnsn<j!NgNmFVr_|H2qMX4MVWJ<Ln1@Csdn%
zI{hd#LPXpvLkH72-TEulYRW2XB|1HYGiiSJCy^4t(L1s^u4@#YoRoI(G)#oKofVUY
zbSX#c5_of%s<^azds5iOg{$;K1mV#8tG*7uvvJTL`c{ZI5S41ycfa^$bFb8xrM#9-
zJHA^bZy=!%f5&qwM7+Q&lnzfEF=--lTN$1yX_JjOkZQpdw6|c(+O@U(Uc7n>`dd|k
zXb1>iUnSY5wWZG+Q5)f%Osfr0$nTu=_=3!yc-B+9x~59ZM_c@9`#JuAfXcCN!!L+W
zcbREjfP$0hCXBRM8V`qa!bV(e^J+ssq-TzulVwxUU{gnhNme70!|c1^-RDMIr0ut=
z%uWaxrJTE7`g3LWwQE4Y>M2$P_=5^znVN2HFTJw1dQd58l#Ni;d5F=G7O$yj(+7q4
zlwX0@RUoxrPoap{J7nt;biXGAq8-rT@tUO&H@OWb#$H0k-q8{m9m}wV;%Jt_k2^ZI
zn3d>VKs>!3Vl*W8{rwJe=YZ&!-HBqScBJLFyZ<Em&)va@MGrh5gHsPe1xqj!QSD0G
zzs%~>lKM&bUr|6zr>ExkFyHS&kBnrt<0nI4mMfzFzgyQ`&av_UahiKCAc+_grf5PH
z0AWsdR);OV_>l~pu=~xCnnAw<JsJT(>MxwY_qT?bRd!a$ue=th<pq4+GMbejO$nf3
zYrxv;0QM5tli8|Xtk4h4=C=Ty%QK(9``wpOPkyJG_HP7F8h%DK`hO)L{eKvrvyjT)
z1_CW38;y;;TFyucRL)#ZYj9q6AyDf{Iyzy{ko9s{T#|(h{+h&q-?joj`**{<=|91=
z@eOuAfxkgw`+Maaf<x#+FTn7s=b;JC!5PxAmHZV`)Nr7ZJ`hM0FI0i2N{uw)jAxNA
z{#}&29PCOg8wE`AO4PeR7jo0|?GpjTpWhSY{jD6R6#9YqLr9{Y<&NZ%y;dy#Rn4zX
zXxM1}Q#Y~zz=?n4UbI_V|F1&7-xuS59MA-xJ~fv>6QNry!7?@`v4*bFfPZ+e(TN^+
zr#LWE)$jUN{$)NiK*?9|fPK7h!iTJCu_bp)9$X?f2N&^d&?kTMKu;)asl|6c(;gp1
zDn%G(?Q@ZYx$3%&Sq%}h!_fn0DN8V>FvA;+0*r#)@LWk7wgpTNE)yK{E=C+pz~ZqB
z>nofagwmK9l-m|~1=KcDmvYyvsOpD<n43k9o+iD)M(FfVZEBsGd8F-!61f4Wx!m~y
zvVfx#XZo!QG=|1kbbB$>vVn3ewm4~XxFlH@V>iA?#rW+2eVtO~97oqAA(fjVox`Pc
zuWk&m_Dtl-VzIz-l$e{-V$d5PC>~T7k#kebqiNy-<F65PDJ`u-HK0Ehi}%qC!&1f?
zu2sbEU>`y*uX+%xw$<i%dl9gAmBII0<<0D6;`9rPN`siZ{^yJJ9wz4xx?H<<WSfY&
z0VC}t`g0U%2}YOnOknB&8t6&~TXh36&H4s86Lmv9{Snnwhl*dfD?i`Wy$(%S-I;)T
z*c{Lrmf>9LVv%%E6E!zVBBn*JLG2~=`z^uh*>c74;~s2-@SmXk7tmJ~42-}q)PqoY
z4Rb5m4pVf<7qAM@4j^o~v&ACF$Ds6*edUDUasK`7`0vgE@B4&H#G!%!Er4)o*Hf6r
zV1kvgZ8n(C^BnXv-|qtzH*!oB7`|}^T`Pxj1;aPgSl>)$3JZ0>{M~`pgLnLua{U%g
zJ_S0Yq4Iu9`e8A3$cHqs!6^knV{OlV)6$t^c31d$zFDd6{hn?~Tec~EOzp&tpH@~)
zJJ1BDvrDkgd88@2+T#2YOwv5eyY>vWLu=2N&|vOuO+VQ<t=liUboY`~^Y<&OsZHyZ
z2Q-4UBMdwM*wMo^w|`O_OgCxY=hmMK@I(T4mUz1*5sce>rBo*#w3tAb6w{(@j(OGW
z?j=W;6v9m=yQow7rsJ%iHEN>0XzgrsqCX0XMDVvi9;|>wngRhR%~qV|a6)Tm*SQ4f
zI11%=7j#XbbEbWS(Ior>f{&&V=cm49Y`l8Z{vd1eR4sbFqdAj<!l-0~M>;yY@z5m}
zYp*_%)VcJbyTAX|z5_WiGy_1gO_Ij}nx^&`oE{3?+e(TaYJ!|;QBKC!VK$F0Cfr8(
zxtxFLsBT)w4+NYmTWdiwo{=VOg1^>XpnctYVXOmJZ?Jm<WRH6b7Bz(37M}ZMnEgBY
z=bYT0X<VJ1aKOglMY&t_)uajgGbQ-*mC=1n-sczkuF4~?-z_n?`pPHDTZG<0=K3L5
zA}A5c!nS_!;Y#QwSi?*94?A+rK=aHg=-U$P7?Sl}4EoTNS8zgP6=?GoW?2^g`98W+
zpw^pZ6iMfPaYzn*2W3N62$0|rMQKu~_f(A28IRF5+g^S3NmeRc$2plDw{_-pwto!E
z{xo9mp->1kRI&)RM$(AWVMRH#XcS8>+KnDQOPL4u3@Fya%!Z2d;8dBut)5}m@zwE%
zA`KIcv|UYk1_@hiixrMrkhZ~I`l^~mo<6RHe#d@fCDr0|hSRnKWOEGoA{SI&qm7-<
z5Z`ocA@bBR5KLZ-xPPcYhr^rlNG5NX{k*a2bHB9<hB>D$#ly=Pqp-zr7D2-e{o|P|
zC={?Icl~pD7zO`9L)G{bYI+4{e?qVvISdyre1;r^$;K8I5&Rs%s7J*jS?FGZ{l=+i
zygcolw6$2kiCHXU87*3|mo$$40x%i|AmKg*z`{?Fdt@V+M&$x&qzuP%k+)!q!c6*X
z?oqAKkhW*2|Lg3^51YR!<Y#<0?RTFsf2%+Pw&$xG*&R3Fa2wc{$|V@U@)LP>Bv)E7
zw`q{P7<^ckDl7T3<m@LR23s6j#vBR6_CK%Up?JNGf<UEf9lo|%{EEG)5V}j3E*YDR
zi``B;{4ij&i-J*6$9DmcFZvW}KZ(e)zdw9hpvJD{VQggra55MMmzdTPtQMV%o)+|W
zsqf7I(#VNhd_qQGY@1CxB+Cn9fXleJ@aRRH!$v)t;&JI)$EvG|fojV`-oI6OsU?_;
z(=&6+STO5#O)5%|G?3p~<T&a2o#pq=GTIH(ZEM6Gi{3NFDVUh$HtGf$hsgsq;Xhlb
zv*#iS7;%wRfY8Qrd<UeEbYIG|jR*m@nUdwKGdH^pgig?=)>jh?cp*16(MW&$)NaeK
zN5M8|z3!3LGq$*1TzWv_LAZPxv$Y*Qe1fIMyk*Txu<f5YFVR%NxFUQVi!TelLfff0
z+a_CfvUy#M7+)OD@Rj#bTxVam<!Em=x}G~*izQHN#$|GJrz9R-^I6#6TgmbfLC6i>
zG;u0|<+lH!M8SWNsDPhK)TT14OQ%0(z8(6W8)!uZR;ZNJKZLp8%vcYU4OksONSQ-I
z7pK5vkmGb<vF=oY>q03j1&n>FZ)K-Nl^W>iVfgi@S6A9=QxXCz{F6rULbLP1JakFR
z3L3%L`3&ct?ox4OWP8Zs8I)PxdEh2A{hJx5e;A)(_JZYo8Dm|4_AtM4efOHYUZHDh
zD9jg{n@ISA4o^MT2^sLL;ucyL9ba_SMrbug)jiaEazGOdp<M=Jn6LsV@mU!Vyn)@b
zaae*q`qyM?Weq=D=`SkZrG*b-cHxC7DWHod1|>#m2ugzxIK0q{^d!44-P$P5oE;I4
zE~&>1o1In-#)$?t!Q3C4p{enSUoop8dN_`y7ef@9k}`VvQ(j@uUqK4WWrKDS9qCd^
z;c<U9OdXYlz>x`C`ct^I8o0k$Sz~&SZ110$_QM0{w5H?r1dw`r=6fbEAMZTaX5cHB
zYJk30GO^%5R0_j#dW7**2dtt{3tm}`k!>51V8u@5B8vr!-)MXN)zh*6;^~Bervnb!
zmbJ2B|4IxMfB|TX6nvlurNiQq;F*uQ%zHe^^Yl#xH2Z$LYEkFI6x_`p!lX{0f7Bz+
z;jAW{1cZtEiHkK)tXlsgI}jPvjJKeN*+`@%n8!SLh5_}t;ZGvLsy?0>jZX_Phm$LS
zPHwpz3SvCg!8PRGu|E1;UAkh^7AA;NK%8F5iqi|JuP5!thrZ0Nmj~KQxN!wLs|3+N
zPvazO!Ooz6e25*(U4CP^%a`EoVxRnf>)kQV8wvmJn|J<%AzWq9#*}ybO!p$=?)TLO
z9I}yZthH=NIE1H5s1v%B=Y20e#1)MDy|HrafsZfd58=(Jk>rQg=!-y0;M7wr;Q5{4
zAl}JT5n~dzpmOL!bVUMF=yZ+U>{^zk6J~MUr&o)UTjN2Pm8c80ll&D50_!8sZXdd`
zo?PJ$hLvXR4v;_@x2Kc)Fp_zl6-QQ2JZS3Y_E`9w^C-QiU}Ce6c;&52-PdrJ5${7r
ztVowbOM%~klP0mV2v4xb#inD%&kph<?HjXCxf-T=&6+3tA_BkK9S}csC4;aT%L<#z
zfF+YOtLhZ}=>Lo+c2ma(7`$b>kAZArl^9%1miO{wc`va}(b}%&ykUXgc6Z~6^Is4B
zn{O`Ey^=ii-WOf(qRs;*zfpuCBrX#i3xYh`HUvPqJDk=Uvhh<VBeqigF<$a68V-yE
zGwa1sW@`YyDUPa(nE^g=tPW1UX=`&M&186PM_=UW&YjNhf$*nk0;75u!wM?iP<WPL
z62?Feb`q2pIer*VZqQ`6u<Ety0qoje^T3`cgWfBgrUu2NrWS;^0!2INYNU5ro(a51
z{LiZafg71gvXMfRT$ZpHMEh}!-M+T~e-ko-XlU9Z$K3ft6DwGnSn0GsXuIJ!P~WEa
zh2hK)@Sw|gN1R-)Y`3_D4kDg=KU97fzPt7frXa!o^Nw8j%FPGQ&s6|SOcEN`2O&MD
znA$1bwWWRssfSkmw%Z@d(g^yu-`z_8XSJ6FB?Oe6euUCjD<*~JhUPrtl(@%)Q1f==
zz_Fl<ZQn*Bj*2zsO%A%GUj?T%jW#CO*mWanPLwj=QJCoQ=iWcGu`#Fpm5o@M@U)X}
zy@C&%uAF`cJIn{PiLS<?!49m4uiSLAH8sqYg%*99szMEYe|bEyhIWdE4A#_nS&sxK
zK03G!SUY>Q)l6;pkShkI9%t)7G`-^tnM-?TwC<}LzB3RsJN&{UBGPq9!G+?%@x0{m
z=wW4sFCG~2lb5Tlo^b5b-3o?ROI`1yekk9&V6>{`pAYc;n_<$R^!Fe8y=DF-7;Mb8
zBP}_QKg4p`54;zm|J2dJmNI4J*lx7S*gyv+BT%^P2Z?%xKaCM(*X>Pgdzr|Q9328X
zvGdBZAC7?ggT>n=E9u^H%Ds7peivq-;|5<x_k`CWII4-;Cd`@?b8{oQj3_dNUhX;^
zSn`2K(ja2RJE^aSffC>kyygHhq>rb$4zaP`+1b}442dYmoNk#r&l@>zp0{bRT=qEh
zWq-;`iNfo8<&TSXC7o$U&qAjp?J37UA%eQ=q%PvkPfVnIox$wug?N-WU+S;_kT{(p
zj45Qzegy{NJo%U#&@<5e-^1kp`D{~sX3L-6`_E>Z`hro{gMXQt!~V;x$d!}i{<z^^
zX3eaa5BtX&{vkGh9V`5Yru-94fwKlF2g6YI<aN^hJdpl47Mi$GwTaRkr0ID9UnN9Y
z6GIeM_(D4s=_A^HZqXGm%EuFjG;i(E@xg`%a6*3dF=1I<Js=j{OImwaHGW6S<D>qY
zc*;Zr>soGHN!t*9K>n>V+lW-4d<-S14xI0+adv=W9x@BxApP`9cS92Q%X5b|JSZZ)
zreCBWL!;XWl}}4rt9B`TrCLr5>_y%-EDY3cb>cg|@NGz@9{VsmiU;0YdUQKJ-CMlB
z?@pEIZ7^{ztsz_e`;>M2x;O1e;c`PaS(|Cr#JOhJ#vSZim_m(zn(4$ti)GD^USVEs
zv#=g+;ADE%5awkQYFA85bM6=h6{w#K16nbSrqUK1LGZ^N|IRS$Kh_aUL;nXOy#J7%
ze>4jHe^`2C;b6@4@dI`bY*$?#-&9w867hPKb>wUooELSf0;+dqia_U(@xm>Gwh-Z*
zcb3O6&&JMty(HJ16St(M=actt7>BKAj_JTILH4!2UbibWUR<%%$?qERb;&ZjQA}Zc
zA%?Bmjyd;=lVIslX!Os{EBjCVKWD-)2(gtJawOdlEs4~S`Lf<e_Nkg;=JHe@MVkaP
z1-a>X>BK<DRLo?!J1z$#9{;H)`|tT2LEBSm@j)dVi!(RhkNO&|4v~+B{3+~5pd<n}
zypFl8irMygDyp}8tkHvi_4S~sbC~CAhg!qbaIqWnNQb;?iq%7!iK6SdoWr!68?T)U
z?>~4qRIuA??N@?6TuGU&y+to^SKu0$2+LLO$gwh@Q19k?x7FDS=CyMyP6f4g+MH@S
zf6b+nVAHP=GT~pN2H$z0r^#bv-u!_Z>&yv309y!XX?jcbQ!|oBs&;f-F6vNuZ+l3+
zruBngW9aiIGi$>+ZmdtL-u-q5I9YQxWGCklQ%|ljf5lef<RnqU;H=Smg}N+3d*k<)
z?cWNpxrU@Ak8Rf4-jbzavu{$Ep}N?sk!e-##Us+9eoxm-MDUWtE%n=`b4^aT+E|;(
zjEvqR>5j$5h4X6WCcE8^Zr$c^8SZ(QW2QX>){&f;S9RMYucW;A(dFXOuuCC22RCG~
zarhs}V2!jHr0b9-xN54T$kgDBzEDF%of^NT<29dCeOIES97T_=mN+knk@oL~jqXT2
z{pQGvz3bOW?|Kkl%HAFeiwJ4>j6$S(=X`K(5;rPopRkX2c8G61Jeq%OPzG~t_`~QC
z(fcZ|55WGrlm6eMz+ht1j=(&b#TBFZrb1x!en>Knt_L~OI7hI89(KWMWCKEvaHUCe
z&E?ki5&kY6cd15>uAonj7mX$P*Bvowym>dLyunUbk+}^bQKO--<Zd0=WRy(*LYBhR
z8vAhP7p3?1OK?tRp3HhH!n@JXR|!zu&mUBV2bm>sr$gyP4eZ^<U5);Pd&M^mzs#WU
zcWhSAM6BD&P04uwwSNzH$Si`F*~r{NxluyV8nf_00%nc)6{?WV<dd7mE~G*W4kQ<B
z?YY)T(VNG>>C~k9F^U}3v9n5OtPE2xYEKl6xnH4g56;!l$M4P}1*ev4S$?%x;FHqk
zd#h!3akYVagCT+=w;tUsUt6U_nY>9kL952^8H>Febhl8$O2e#ev#`OvKouq=W?^|e
zIJifmdu@+RZxC(IsClmxIcy$NB|T=xtDL*mMrJZN&92+3XS>bz$M?PyNH+8Q4Z?$$
zrM}CAyIU|N7!O?)W9qx%L0Zzzz5Hag(46m8ofZ4fJG#hTEZtgWWfi>1zwE=oOKi8I
zFO<6<7T2O{LqkNw9e5$G{+n#;*1~2!=tW<R`laTO!QyCt;aXZ!{7%J7SH-wKZkF*+
zEK$6X$v)qAch)L_2LeY?Aw8X&ZjAQwiFC&&@)~-4JBi6NJrfx(cvv)WRmH1jsyW#+
zCym1AdchgX12rCNQ#J_OcudT&DEq;tqx=m+k<%YGB6hW8dgagv=C@oyBQg6iay=F6
zLkw2Vbh}cPlN_Qb7j^*iQNAPU8kgAFTL??o0=mXGrXB$}aGZ2QN~9r_^ve7AfVESE
zH?oF$!wmDH-o;V#b*g#hZbRwjoq?0*w+BkEKHP7{b2n#B3H0<9&cDUp=d8D+-j%yw
zC636^ib;>NU!7sG8`X1#{$<+({ydF*)F&<TBlW($Ni>PCI3zKQvlhNd^9TVE#FQ?N
zpD`vkJx?nP6Sm%~X59T%@9qRCMtWl5R<z8G@0IK)A@;;!J3h^AL_?W@HW!pU;SE^+
z)H=36D~-cv24)`ft{%$qN!*azooP*(4+_|<GF02+e`3r?DC%W<oOW@@n91!^Go0O)
zpSE3(Koqc@xplqnW<6>m9fDBDpre$wT3=*?-6rR27T%Ut)u~YW$<R|2)yE^kg_)Ah
z7cE={)<25oi>R^>8@-XYSWA+t!9rHHTW+7i*tXM^GQN!Rj{3UhrM}Y~bAF<~H!rL@
z{MbN7SoP`hylN3eAa8L~yxzHX6x;(9B_Z$Ibz-71H5h;Df!+3nZ5MYp3+zpV@7F-?
zu3s?Sx;2q!Yd&T5P_128TmMFk7bVn`+z^v}<+HVoN>ywOQ?eX0zE{&M7G>0<_v+Dp
z)Tfs6)oZyT-xEM5_I<6L&|R_;H4_{cPR!l%ZJ4>KMPZIxkyh7z#_~qq?)}|O($O)-
zMHPO@vLg4UJGCROnGfbB7AAaIf}JHZ(r>zYG>3GjYV9W5JX_Rynr)C~sa_nj{dlOO
zvamKbdb_r?@Yy++4zwNv<3T&XR7AT#jW!eoT;0Sj(fziS?VAN2YPuP2jTOD0xpkdZ
zi&!gXbV_342E@I^`C!d06tDiiz_@zL_@cyKh@W=m4YT#Em~(K1)p<Sz5q*q;>?P~f
z!iT?ARyoyBV(NK9&4X$B7T(+aDjm$po#m;Pj+={BKR0<gJ1S&n#h@K@Vc@)sr=Rt#
zHe6%N>7AmU>HOY?TK%$zex_4F)2E;29LI$a0L@#Ld8PSr=k_36ULPlz9e8`bsOn@f
zDHdbE2)jCtQX`FRtMDXRh1u1|roAYUt%%U&o=am?xJ^Xw3*p`QNNp`y(6l%ZmbihZ
z%Zm8eRt9>MISjZr0(5=i*0vK{_#s{5=J4H`N6sIJI(jne^)WHg>{~F@Uh93~yO2Gc
zM?2{ViXGLjP4i$jIB|6b3TK=?qi14PVmJQ&W7hrtOug+Et0FcEBL#=WI3UiK!{|-+
zlqJ{$6!N9_O#NoxtFc;f(#4yps?^u-MyI2~1*^R-8rfj3!ge*|!~^X~W@-K3PQV46
zdN*~T<oy*h(t8~pw_SRkc%@Ae@4Q3K&5ZUoKVB$M)9Y*nlu1B~ppHWalTk-HDs(cf
zz2%tv&FjaypQ+w9DkuvdsOyY)9BdzPP~<p!;klPyFtMVvG`dd*`ihHP|GlvOiOVEy
zJKifXV!J5E7JVgSKXo2pek`v27#67DJBUc{vaRH*L30rxIUm613a;8iQOpCFfS~nA
zqhN2gY<%3rxXb*z=;Gq&QPmuaFRyNW9EgmxRvSl<<HdPwP$!^Z2Il4P4d!0Ft4xdX
zw+s6PsJ?o!`bIt{*#h+s)wfp}vk!XDv<7PO5CHO8p-+~h8n(%j!ABL1G0Yppd2=fE
zBQ+c;qe`i_U)m|ps}_H*5pwK3U_n)7wC-8|GQ|bZ_iDh~ZQ-p<c`I-9u^6ISc!g5t
z@O-|?d>wpka~e2#c+^e8&z+i(-;;FfeA&LB-8lmHV8n7R^cS3AXmz|4IU66Q@}|%#
z4CNhHd+9<+$W*Nw6ID>yIm7Yssn2~z|5e2^{^NLk0;a3)G*ZyoI~3Q)Ta<8r3z$$5
zF`^nhT`l(9!5O)yu1*KWmWFbd)OZYov4f-Zruv&3keiQ^a@iew)uE>(rF9T!QK&TV
zcdyZclgCvTk0*c35fa@;Tq__og^O;mudp9H*kk9X^i6QaHj=h|R2U_h>YYNoS4r$!
z?QLLG-0qRhW0YT{A8QccqSMqW5pkYxz~0d{tXy#Rna?y{PEI89@W$l74vPBoz^Fg|
P{bNo3(=x!m)b)P=(OM2p

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_4.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f026aebe9a26ae173301ba880df4e5e71897060d
GIT binary patch
literal 56289
zcmeFa2UrwKw=Ueo0VD{L(|{<Tl5-vu0TBg3B}d6w@(>3>KqL&HfPw-Ff(ekEL2{Ce
zgdyh~hA<2;40qhyy}xtMxA%8G|L5HE{P(#u^gK1ytE+0gy}EkUs_J)6hEJw|vo}@L
zQ~*3YJirY12b@e_dZ7%pvj%|Mw}Hz50FVI0z!?A^m*U2Kd4NX)5a7~yxIX|-_|N<&
zcp|^0X>l^S;dTJwZ!!Q${R%4p!sU_UzUu%0wF>JBE=7XdPq$M9P7ydo;1q#V1Wpn7
ze-Z&*J7-H5Z+DJ6wk{qn9Lkn<9xiSi!a@Q<0>VN<!omW=5~5OKz`rLYA|xU#BP1px
zBEcadDkCI~O9KFLEWsaM2{mgh;h$-t__*KF1foI!5Fh`KbUNNYyqkY`E~ICFT%J?U
z<^M?|JyrS?fl~xd5jaKQ6oEex_>bO(h>VDcjHndwS9!P-i17%1`V_>thzT=5gv-Ol
zPN<*bvHY2Sg~$6-1{Y_+$NM9CLY;|^|3{i(?Puj?zn}jjRs-Oj44;ewN&pElF^HIm
z1Ox(+l9G^-(^8P1IYZ7wbB>CZjTy|&#>~pfaY2BG<2)Z1D=V+$B|afxF)=YPkF>m$
zh@60^n8=Sp@JLBX$<L58Qcy68aI$iW{PK446`&@;X|_jz#|hw5;}KBfoiqYqTq}ui
z(WM^&rk@)gJ^>*SF^Ghe><mtz_$*Fi0s?$O0wN+pT-XRN0CyZ9q$Z-_6uwG)PRks0
z-j!D5QQR96u50;qblSaJ7epVp1(TA|GcYnSU*zWDy>wYjTtZSxT1M%*vWn^rwVQYE
z-Ph68(>JiNw6eCbwX=8k@bvPA`uK)C4h;)`67e+t+4F?Nq!%xf-)3aK%gTQL;bTEz
zQE^FWS$Rc$Lt|5OOKV$uU;n`O!J*+1#PrPU-2B4g(((#&duMkKwU0hH{Gk`lQ~X`6
zKQ;SLdQs!_!Y3poAO!u;3lHBLw-Znk5^)L>(_Ga8nY*4lFY<_l_FCMV{5n!DQSB|d
z2X4J&^cTdYFCu@a_M2w^o?^lOpEUbZv47}=0LTgOa2Jn&8h`-%hrGbsAhI7Hq8px(
z>iRBR&udn1Xxlxkt)CBS`EWo19!<WXuWqm7Fc)M+Arrz5WhtLcl};xt87ivmSS$!b
z5trRzx^tu4lZ*4YO<c@Ok?kJ^{#NQ=e-IGd+e&=>+0=M^yn(P#(}k6~DE3N`EK}FL
z)W@60MAo&B^cUiFzq7R&aag(}E$X&5L}$;hl<lRTIfRAkG`;bv_Q{O79$U%LL1$dH
zYD@>v-oaD;%MTGX8pdROV{I{_moKIO*<`ta9eVlI>gQjKwiTN-S0FL~y&gg}+E_hH
zl2<P3uzv}U?tAn75UM+Nw)i9$yc6dQ8P)J}8QCQ+E%wLz4(tM2ibS+{L}Sr(HS+=C
zZc*#w--DL#_WVulHmiwl_A<n8tVr?IBYn=_dPQCkE1i2+t9S3Vf?ZaOe%fs{1w9=f
zrgm`QyqieFf?D}{<wOuiA4c|bhgoTFCx6abrI*+<VctrnK(jy;>D}&kG7w5=9RC#Y
zbVugLQRd~6D7<(LH}~aaV5LenyEDt~{GcV-DpxA&7mj@C#nryz_xQ!7hezFqYmjj8
zcBkzAA(ri9=kHMIR(Fq-b$MrxJ7sTD?mhV34F2hY5r{V4?RJ{Z(eSgrB<EEi=O`0K
zc0;`V_RMV^^|Fi@D88ydb43id@XgAcZLh@HV+#Sdz~w`L^6)9Zn@%=3va^A9*=u0(
zhnH1@%-8qiSAN#6Q`<7)GZMYLKM=Pc{!Tv~|A+iPf5^Juvm9{Z^oqzRYk$R75-)@y
zF=7TZ*CRDI#!rAL)>_PM@F;!Sc;yN3RT_p)1|TK;kV@|pAjs_mK#IW}4PaiICjc;i
z0?gRr@jIvQY?AQ2yq7jE1X=0NV@*2&bnS~#Ww5Hr{kju?3{V)N+E2lvA=9deY3vSc
z5`^Iej*K7^E=N-*Kq%ND{HXH;@CR?DlO>1l3XddxYoSbAnSBtrh5FRf$<}y<Pauzl
z{L`hW9)hL6(B>Z+w!tCJwOtsWS(Ya)hegZldz;D0zh42)ik7(6Ym(ViuL6+EckMEq
z2H%H-8s-PdkT<b_zr_b;_7I2C{$_`N^T8Ogz^1CbJpm~6l;df*&#Ug2ukNltO<@2}
zM&dM^RU{UG)G{1z!J4CtV1a@^6pIEJ%di;J9}qALyB?!!H$$*uc&w|C=OXPYyVK{=
zs^nL?UY`J9MCoxr9ya1>kl634@DPaMF(A11APHbw_yG1E5bzdq_dpBv<XCfF5%ljI
zz~lc@8Kz5>>q7^Va5;;Ij4g?;<qOkV#EgxVtUzh595F)e@8F91!SI2nK$N|NwW?;3
z>htSv>f}xjuD#k_w0oC|9UKlsd1xD-bN1=@a({O03pg6MF(j1rtyO3)$i0QlCkruQ
zIPs~t>eKm*=hk=Q;uOz#!ntBIeIoW_K$CP5f1%zbmW{M`fID+@RfY0#l*m^DmT0VH
z`U&unfRShPf;QFJ(Uy!+dj^vB!g%~QAl5W(d0xDsRF2?pBeEHcxv>?oBI9@f<=jGs
zDedz5c@D$i=IaqBfH(HTIHs6l$=g-&%x83%JVz@zKTpzG_mHuY@8F2ft6-KoFQYa)
zKGy@a^J-CfUEAlw<O{#RChy@nQ!y-WI?xtG+^M+4c<X~3!`$7m`Ji{~e1*)<OWa!c
zoD)~2RFrC2GAZJQsRZ+?0sVrZ^}P9_Z30H;kTv(?`DWEQH4KHr9U^3pmg1S8UFHM6
zvu*EJ_<9>V7vEoCP1{o&>e(4Bk+;t+&ftERC0}$2|Au9BU7X~lk@GbqY%ybjVtC!z
z!571H%Jg%{qtqFbx#pvaGGnSi$->x8$1V7j690z+zZcA0d~RY`{d36k!godvRA?W;
zwU%3nj%vxoC*rIuZ6er*&syqju~IT{O-3ExHt7`;?{~`5pDhvOTNmKG_OZN$0CE$k
zl`p!&c!{1-#_TP=>bFb%;}ksi+$`d4H}xcjb^S7eXkTZ<mz`-{h6e)Tb*-K5>=%%9
z6A>S;)AJ%)*4#Q|W4%?b-pRpk7$yS(7&UL}WXJjm>6zquhYtf%*Cc#wE^8J_vUN$b
z*{5N+hp5|RBdO&JY#i|K7gi_n0a5^EZ7MH8Zc&+f<&9jf=C(UBQz3Qi8BY^xmJja`
zix0kb@Ht=Ic(uK`H|kk*PE=GNeu~q(5d%FX3)(X%DDiMF)+qy-PCT)MG6-%KIM370
z81;$n(r(Mmmh!E~x4p?rhNx&SU2tn-H>&E2_X$dz3`xXpm-;^lx>(dh(Z`2~eO{P5
z!kMtY%aQkuaS>F$x^%e(bG||mI%4K|FGdQ*_xbEc`b!C2q(l*I>4faGefaBV)xH39
za*OrU@E7D;le}~8O|n?(Xu)qgx3r5I0V}&#<#q1+g=w=c-NSbXK4Tyb7M_H$^wbU2
z2~3eRn^E}}4#@+#aZe5O5^p-uPuD>BQG&}RlF7{4i2eXJpX@Okx4Fmm@%9d^oB9L@
zI05$F6Yw;qIX_czbs|qS^MI5)eqjl7Me9lnBe}*5$4GL6Vs5<wtdcrj2q1ef&2n&G
zV5v&l^`m;0@Bjx)+>%2qI=JIe_pMp!RfD626CjlfdvpS<-XYYbxb%eJEP=1(!3!*>
z&S%w^A6DE-1>!R=2hgfS!#fc{Vwah6_!s%CLs<(LyXL#@?SYjZDW28LT;A7eU`0J_
z{qjMtLEu_Q)T8yUGbsWQSsL#A&-pZbM!R`<aLz=2bDT<k^BXM{x_f4L8tKG7cE`Qy
z^V$2#0J{OqL+f9rw0{UIQvF1TjF;d%2NCwIB7x+Ycoym7fq(#oG`Mf-A3gdBpf$Of
zy|sZAMr8HZ&K~KU0C~kHK(Fwf|Ee#4D}NZ4?%i7U+Lt2Z*dP#Q+EA=M2kX~|ZGHsg
z;D-)^$9t-AC%|p16QJfjunk%MT)Y3BUG-Qz3454MvKZ-T79JbB-0~7WLuaFL0>IY|
zu{A>{!1X3Y=D&5Cemh`RJa@<5AXu&HoJd^PG&V=ZHvq!EdjhOqf#F(TV+&yWO7~oj
z7T|xIfnU9m?aZ%ko<bQ;B%-jDV=Tf$>gvwjGo*b`dB1?x={|ux`VNd4sQzo$?Pv2q
z7h&-A$TezgzWd`AOrZvPakV?!KJ0g7!89OG?Wn1O`Fkfo(Urh9<&q2^8}QlC7!Q-S
zsxKUVR-$_rip*kv=N<l5g`2_CQdIRcjl`KeiV%35jGrgww$3OwWRU7;A&4UH7>Rq{
z_u?*A4P-MJkZ3yr<Uz+fDnnQ_E(_LOI<H81yB?|}<0O(Un{@ofn9p<@T!p*5dXQad
zTm0icA8tf{kz!4K+b-@`S1wxZ{wP>Md36Q$hR)RY1b_y5;29X->>@sM@tggvU^!<j
z*H_wEm@R{A#baKnHrK%*s!{_a_89{Y!6IvN8qU+fodGNv9&-bq4|a5q^_f=pFvp)V
zBC$&QX?v?6Y&bt)`lqS>m4i<3Y+zRB`$i;ppl9<Gq^uf3eR$uLTkv2ZZND|0(q5BH
z)X3;&VdGo1s(I5ZQWrZc7i+mQBI_bw-fl#;8Z)`hZ=K=(b^;gx`u@DlN7O#}i0hmk
zDwGpv{cBDDL4)PQiHnLg2CvB7KnEKzQAMg4sl*xjON_@WZVli@`742k%O2IYjBn&z
z@TUHJ(L%e6ERiFWHE!qCY!(q<|B3icgA^AR7hPmdXJ8V-pPnI@r!2{V^NA3GSZ{83
zDzFGoT?<Lq^*YB`vA5;oM;{WQVNW|!;L(^3;gL;W;hMjLXE8-*krPXM<N|1(E?#s8
zAYD0jeF|*+!y)@msX0&l$x+EV9f1pX9=7~&6>E2!bib7yeZcj2IPZ_VGms@E`qFGD
zW62(*0wO`+)w=q0ajtl1t}NKC&_CXam1sv|?YY48qxds|AoyUA<Sy|*%&Wk$&sLO*
zBBdA1U<D*h-A031me>{8q^YUPJ3tMTf_n1gKrv3^?g9i#8CJpj)#a=)jfnOjTlsp@
zGpWO=z-vKbm)@PNC|BmIW{gn=uA?O4jdH#v_7ts<=o<*>5iI!**r@j-lw*fS8MK-7
z>dOhq@uhat&v^XZ4d9j>Wd3QwnG(-)wEQ8-aIN=%4J5|C`uFjH0-sMrJgDon2IPsZ
z5(6fD6S6u;bxdgh)c)%mmh-(zxIrxkaO>0STptAk&s|7A9q4&Y4i33wK<*u`(^%`B
z7yJa9v<{-W6kv!w67x}CncN>qC*_DDjSU{5eC;;bD9gMwa{|O3FCP2ioYFTPiVREt
z(x0Apew!L#swG?}`tfFlf(hubNRIv3_gK~5WjMF+1Yqym!!D;29<fb8W(S?q#@`*$
z4Z>y-CS^xiLFd-IO?TD>k9X8jSMm-}N!WvQl6m#WJ5hsZ>o_{X&lcLFVZ>{!y6RR=
zeA>5N-P(uFQJ3+IE3?ts<Vs6#s4e#ZhX4BJ5>;<Og%wBFA`i<OHXz7NDlH7p=#t;y
z>)(O4UX?x)V>|)UUhEuIp8!ph{i_D|{>Hig*^S^m%WhtOzj{IQJiJ5zh`2{JraT5u
zSw(y0P;qM>93*X<Zlx2nJ;MfCVUjSs+m^V(U&1c_7~sggge)$Yw!!M*e+rVXKVUd(
zcup)j@&#JtviG6;ZjSF?4#xg*kF@nKhViTF(Wo&CSKSSJK?8H8a+y91lwUakM)_fj
z3*e)*JNU=Bqu4-BTsPTrux5K?T#CUc;cNiDGj<0*pkN3aXpTG7B>0EC-wE&t^PWIb
zzoOKZ|G6e9_lgi6-cEO^W{%_EneWd>1hMr?_&SBn#tG0evkP~@d<p-670jDP<V=h=
znA`4)&Z)fUBKD1i<N%i4Gu1K^>+2&n*3mC5t)3*@I!nsa&v&U*L7<=iZS-iTi?G&n
zKAQ}N<^ag|(&be}3I)+VqQ=;;gqI3hPuj-D{4*xw3}hrSJso(;ZJF5$JLsENx228J
zuDSLFQJ6_4IF)|uMp&ExI@6k_1AaQue;Jhc*=$_Uz)CMAbU-{HA3tmThM)p${;@Ft
zvR$p)?Doy~cDcCss-%>@S4>-AklhZPivU_v)cT7$^exLuuCj(Z#>=fWMG8stu(c9@
zY|B_`6ET!CH<<gV64-}!W()q%EF8kwg6<)D?=PNJ@RFk-#6kdwnOfHdm>5~vK?>hL
zm|G*9@<!Hkdbm;U6>g1%G-?-O=CDP0hWb8opEPBp=oG$#1=PJ~(^n0}{tIJ#`EDKC
zGx+5!xFpx>17TWW!@$16YzfG|Kb?Hs{kD^#x}UP}TtUf2qYrgmB)P6t<QpBFiNht^
z3!+TJZ{n>uU0Vq{TbK833A`JUuO|znqWW&YE3{Xrp;|%0ugG^-d~%)wUElY8Bu3@F
z9lwKok9%cP<NLJtUv>z*>Ex9pbentDi3~Wmrx;<$5iIw@$MkAk&pzSz%>u(&_L~-r
z%Zg`Ewxkytt9JTVu{=8&Ydr*UG{|nvY&2Cnu3tz*x5U3_meE4#QRzKPbIC)ebp?qQ
zzu>r$W7rV{E>;cUR_yYlo+w`Wt4HdOye4=Pvt@+zIlK$!)Xp?R&9s%66fp1s-yVWY
zB;ETj#TCJqHWUU(pGplaaNw)ewU{41?Jp^Nx-rP|g2i9-d~5VdrX{#N=;GTT;oY|T
zp7IYb=$}9Ere(u+gN*j;*ksB2j;=QQNBH_uA=3%ErB8snf^+i6$F*9jsqWY;(^fyf
zBWzd@RS~OAYf?6D;L0qwa{|a+#Dv$@mag#PhkfFz@@mgFqS}4lb{`bA&z|rJ7vB2I
zi>)hyr3llHyd5Ch%##SsKxCN5n+k`y#*2RK{($;QRJyej5Zp!LG9Tuk*ulE;RAZ2k
zI;16AA>A+JlZ^vibV79|fuoR0z6$BmM<K{n#dBJUOb9#ofKMBCHy9j>qyW4*<^2E^
z_o14APb$CrcB10dS!aw_q7=TfOO~6s5657~S2cb=^iM%5yaJw=EZhsm4NP}~6%y2s
zMEeeRu-G2F<5$Od8?~Dikiv!a_eR*XX+_Ymv!A`_2{3sDOB?WXrzBhO_z2fmlC1BB
z5GK(am$+YSOtp*>Dwm}beYA8I=T@4-4lrfw7?bBfK<EjuxIOy;;x`EYqR^)>f-_s$
zYF@m+#-oTMBb*QR%6V^nH|zKj5?*3-JYGJ&`Rdpfa1y(*)Hil(rGOMaf0iqITKDe)
zq>z)5lWE~4TMv(iGJ=B6?gHkPE)T2(EL@xfq2><-g$0BJ0XYctp}B>Fl?TTID;qm!
zd7jP6S{@ENOL-n$@!LYTA1Ybd+THMVv%2f6anHin!9v=S2ckehCI^*)Iz4o<@-XLs
zIypMK%RuGN|5myTF8w20@VtVYo29jk_H~t?61Z>j=YO`v+uK{fTU5Zs%|=jIT3T9A
zNJLOXgdZot@9yL5VGiYYcIWz8z;!Ek3pcxm9(FFy96t&)f8gTjA%EV})6P=H+T2?F
zfu*H5{{wSTOMYQtOLKm6OHpBdOHolvNpWE*5o_TG=l^J+)5AZj|4S%v_2E>KQF5~~
z$1N$j=i=h1@NcamkrVvSr~RWGtLx?-Rti5RdH99I_(de{;bwh)oA?nBk(T}sC;l!c
zC-_6b|DdqK?>RUOaj8EM$HC9>Kd_xv@=G9}!gUJQFCp+t!l%}C3fC_o@Jqs{)^!Tk
zFCp+t!l%}C3fC_o@Jqs{)^!TkFCp+t!l%}C3fC_o@Jqs{)^!TkFCp+t!l%}C3fC_o
z@Jqs{)^!TkFCp+t!l%}C3fC_o@Jqs{)^!TkFCp+t!l%}C3fC_o@Jqs{)^!TkFCp+t
z!l%}C3fC_o@Jqs{)^!TkFCp+t!l%}C3fC_o@Jqs{)^!TkFCp+t!l%}C3fC_o@Jqs{
z)^!TkFCp+t!l%~tKMvPF-rBHo#=WZHje95KH%=!4eEc8(xQ!6^Pek&A9f^nx1R^FO
zCnG06Lw4p21trZ{3QB6qGiT1ypQS!WOGigXPQ}1TPs>O{OGo<~rxOA096};eA|g^+
ziZc|nzr6j%>4bj~pOOIY{3)jsQ2c*Ar_=G>w@-0|PXBWMihHR&^34|Lk-zecU!;$~
zdXcfrtX3k)iz}^P8vE$9$1Fp?wzY`i)hfHy0O&^!vnN1U4J-0>bx}#8!NfDC&&n>M
zm*S-sLaoRe*~;<0Jala_XhpLipSH|q*Vj^_7jJz@(<m4;6Ma`an)i~PEjCycw1Jw9
zLj%4PbZ5)D0<~S2yB%j&qXIo5A5yur;+TqpxZjq6ADsXUYYNi-mwWuZvxdj)P|Q`@
z>U2466qgATW@Sqj<L~U|u3PdAUx`sD0uFh9ymk9OyzjsfHy$0y`)iDeK&BbL%fk9I
z%MV?XcYVJhCBty^2#iqgdp*Stuwpyk*PlytAG%oXaK@3zY-lsNeO9>_ELum4haCE9
z8iIQlPh4<hXnGu#a&)cx?Xo00ZT~_^e}CPf|Jy<0!l=yiI`hGo0O<p`zYX0&+8LK3
zCqtC_@yjX}pKNGaQ$<!j#c@+<Fb48o)q*lNR_q-DH9dDNR(avdy3WCi_lU2qhv%mA
zNS0HnwsN0Mcb+G(%dpdNwL^JLShUR>HU>-eyXRZi1CqiQ_ISh>lEc@cx4L%fQfaq3
zOXB=8MxeLaJy@!FbgD{f`u)Ehq~348d7;5_UjJ#T)YKhN+XQ*xkQhrI!r^P8WFiFL
zadvAV$<y4$Z$`=8{LQ$L&x#cTMQE`o@RD?2quD5y$P0q+Q3-hr1Isa`qv6xpPZVg-
z&y0$y3JPTeBZzaRH>u^cK5q=+U$az;RoVD_UvI<g+X&Mkm*Vj=EIri8UkL>vYu=pU
zha3s&$8T@UEEh9M;uxPYnJg{%QZij8%0(;)0_haJ8#YI5ruQ36Y4leS_ha7qd>p=p
zst9N<&M~o9)eJ@3u7@_}auQwpa2b!oEEzy^HJE~SA*3fjoy2;L(6IW)t4Idbh+;ND
zBV(0mla7w>OxnB|g2fMpDepRyddoLM?mjT<#Nb}h?%vCj!QB$>XdL?qnG=?d4Wxh%
zH5e~SD>0&q%MumZd7NtdE!=w&wcO+@cIQc9n>7c1OCB39{>}d*_L^yp$V>(dE(`NW
zwame~i!awKaCl)ofWu2nB~krc&%SlD$)ZrlKdZm=dDOMp&GiPnGFZqpRpt)C(%Ywi
zWjx7Op7JJ=qsj*<4=Xd8A5wvqCwFtf>SbX&voY090*`D<nQg*74xFm4qFhlC^eZd7
znvNHny)G4$x&_S&WX2Ey-bGqCW`MugIi;F}Eyp>HETZ-nF*v@M<<3+g>0DX0EzIB~
z1Lk%vn~ro&UXi>&yS$LJAi-7F_p-NCpfqO^OwzW$Zu<dV9)D5hU)A&P<k2mHi(t{k
zkg!Pr%L)CY44oMib0XV5>|D6LRM2upB)+;?*;gLAxE@;|xt4)^c%8A;U+_H<nTUp&
zE4)PtV@m*;c3F4dnw$~e5BHaMnyK`I)2<}G@2xK>Y$5u#@+d2YbcB$mV7ef`wbAVG
z{w5{nVNaFlblMYkE@U`dEb=wORpiBq*ZgB%d5&T|sV1X56g#&?lA7m0fm=WxH@Y`-
zbQwl<0$8p(T)7`63t@r!PKZaVoPVr0#;KL+_2|)Ztf+}P_sakan;9zlF+G!&RK~5m
zby8T4Ln5kVQ!Y;b^Hrjr2s|6T3kjB-SEUH2*3+qS{2@p8{AJPM5^pllnvIcuGf<DC
zl)b)OzFhuXZK(v2P4eE1kI&AV#Y$SvN=fZ>k4&QGw!krR1Y0OD9kjX0-#yO&QTOl*
zqCL?Xs|;#A7b=ul?&xE+Z*>ERGh#gf=&HcT5LA2ro++_da>8c_Me&OZL)wFS>l-hO
z6m=M1Kj%x;d+<${gXwFR8%P%RE|Lm;(d7|WaiLHLYam;h-b_@OzDADyw<$r+2x~sy
zIaE5;XY9qmeh6hC^=j<W1UHgw)V@y%l~qtwR92nHC<>Rc3YOf?=ti~`ouwpgGC)Gs
zskV;l+0XULslP|ZgZjklAqDUHXW|cUEAg62APqmY3AUeMd-_mjrbd3KxH9i?TG3GR
zOUzwl;^I^vG<c<3yKggBOkMqjcqQ+8qPU(-A`jn|kmdHHE>lj8*;h+qR3vD@sg}BG
z-8)E0cn!<9iriwKbm$3ir;k5px{qQYG+DOeZZ*xs98K?)%UdDb3V6Wbxmn%yylFua
zY_~yaqYJUUPU?BTfNN~rBw4`^^T&(cUyOmGKF5i<GCXs9BpOPP2psK)`YUCl3zZ88
z#Xgt>$e=joXsT{AdB{@^;)N5u0ixVZQ%%H>El=b;k9Y!mDEtJT&5U1BAN25X!oa3G
z&#e(O@+L_XjNL*;*CEC@l+MS$(}h=>zEPk+2FA^9QrclTdt9c!i#e%K%je87jfk*I
z3MkIT6ycRC)84@cca_Tv-a(S5(a9;ByStRu$J$m{4l=Oov23#YAw5zm*JEV4Vn%l`
z_Xol4I8jX`ISeNXb9b$*f3{R%DM61llNF@#+wnN^bB<BxOg$3Dh~W+PD&yWN|BHQ)
zC5a7ATviQM>oZ4p&b-!OQ~fsN>_KrJv8mB=7zy1F=MKibSzWSYBzT;k+lyW9zNO2M
zaRQjZ>KBl@Be=c<eY5t9M_MV4`{6!Hu*e=_If@;6i5fd{LZwq8p8g=L_;1=@@{(8T
zD-llxi%4e=#Rm=^R-+yr3Hxge6m@h&L+%uB&F>EG>$p|jRX$(eq|c^c#W6J(_*xP4
zp^(U|5z~@Bek{5I(=t+Rnhe)Xqw$xvv&cf{>1<gSS_II)G5>6Lt8z9`4pX4=^>fdc
z2h2PNhP8?gtyt#3fqkK2jAVEdmIw(BD+1Hy<fddmKiw@Wjla@S`j~MLnyAFb{UB%l
zB_4l5SKkeGE98~gl5!lUUwDl`1=`bTCi)sOP(UE!edos&`rHTRuJ-JRp>G@)s~$ae
z&AWfGO{y)O>P-zJ1~lL=ie%oO0Y&-SxzFY?s%OD042>nyaL@7Rv=<j`XTsz#T`TWi
zzkIQ#^_4Nt_(*CtHC(C%eC$&H!PdLfAS}ho-4Dk=Y24`?-)SeCQAK%=J|bD<PC@+D
z%QxgB=-w%wZF7u=zgBWjdtsf73ck@!Ln1X^ezkiDvbd_ln@(a;N`3j`tMjvD=>V_<
zdM6`=sd+8)P<UDU2j!-gBKfn6ifryiVb9V#U69_5$s^ynk_UH``qnb~8s>8lXQHnn
z9+Xe1p+b#jv3lMuY#|gKF)blRCh*K<A&fOrBbzc~!J$-!IL^sC+u~Tc%A5JwRRP$j
zPc8#?xCi8fGk(0k*a`GYa{VZ8yZS2RWOgD`nq(|xypwWAZJCujCo<CZP}@fK>V$eY
zmVoIpmm??RSC2ypJv-f)HLAs1nkH#%!_{M04CSfRtKs)Z!^-S7#0hQHE#kl<-}BQB
zTt?-Byzc#XKG|>_m&Kq^Pg7QGS1pa4{2Tp}j2!!9qWO2P-+s9(mUrfI`h_4;9@baB
zL+|feUOk^6PtpYV`+d^)S4EF!V=iqeeApV**)rRzDa>4DzcA#<l5+Qfaty1oW$(@I
zO9}Pc_ay|}NT<x88B^JfEU?FEv^8zhexKIwV})S#<`&sXVu7BhG`8{2Z)W&Ly=f!9
z<P5po8X8@t93*5vSdw1Fa>topijqukW8LB<kZq93a7boPd*6|5WOr_SG;=e1;_*2|
zA+PmQ#i({RHN>bI%cD9j!qvO<La*h8HUGC8nYeabdim+hE^LhoZ8UND2!f;WB-KSx
zK0_T(uGBQ4qE**YV2B-s(KM%;?+8&RBNS`$%x-|8TFR{CQ;$}UO+n+59<j)jYE{VU
z`drySB1t~Bq2b{q3K1l0|2H1mS*&N<vTyhkOZ11CD`!#yYG^~0`?<fAZGWAF?o38z
zj!!zej<9-7rp@Gj!QRJ-!k$?C5IstXo}f)r?&thcx`Q_YeKwhrJv!>)rcb^<0-sK&
zEcx&1pMI2+{Ef~{15_=O+)>Gn8@;ulM_ZD+L!EQp^eeDKpEYZ>hb9Nm&)$kS&oS$8
zrm~gs8Mc*cqP!M|A^!>RmTq<vL$$Sk0`NNwWWXQHLV}~nye46TL2`Lm2FXa+QYaKh
z%SV8bKLI)nAk*&9QtT65c<mOXVG>iEcW-p_u(xz_FP-WNjwrQrX7Z5JL;*+2dIC_v
zwg=(=T*r5bBW#h={;@4V`w8%$O3V1?3ioc~n8b2ea1OTtu>H?Xw*8ArtM9thuB!Z|
zL6&9|mLGsMMIpCNfYRUFl)H>osCJ2B&I;vuykS;+n?SwR=^lyuxo(McimJ}qg%OlF
zTo6iZ@j5+tB3g)3iYFd)cXcmMu*xZLb~f=1TUT^tePSwGu>+goL*t}EW21mvirQua
zDZaD|{wc(eL1ti$4`U(UJuKHvW$#sMzauv(jpJZZ9_{ZOy>v8Q;=J00_%8)Qm{z+2
z_iGRKI~lu8-1n<-V~-kJd_M?dw>yeTv;pxt4^h=E*<}mq_1fL>Y7g9ZN~zc>iTp2)
zr#JiVq*ZTX19ntIhh80T!gg@ncB$RXqGQJ)-?mWM4yZiQ&YfhcUE^xG@q)DKQR|JW
z^|$}U@%O^1Rg0TA8mBb>UYw&3@tefp81#4wkx}~)vmH744~XMW-*)^LLXH_+G$kFg
zH@!wG^JH!Ri{tj>HhI!V6FBOXNihp~pY92qfhruNldk>W1a0DE^SN<>)C)c0SCl-A
zZt&nP*Apo%?hy@j)JQE;hf|Z!g;XhIU^p_RF_1I7ZpgPQ!gczXo3uz){wCMQ<(Vt5
zZ={4c4u@Ukj7y@{=#F&V&gWG9v@I`k0|YT$)?+`{xBEU&6rHA3GSFDj)DkY{;FlwG
zUAMI~X7Qda_xM)@3-yO(E$k{j<6l9BQ<E&XyXCAH%Wa2iRfg*_%WC@g1tKD^b67Hf
z)|I-1xF9PlD~_{@<9VDtPDyo;BCj`D27#NJ$iz_YD%5IhWPlBSb=3}U7ha-lrD6m2
z1wosIN%e0J=^qd3|5H2pv-HEg-8`WIG0n3lK>be5pnwz30VGSKG(hMX1-z#!m^kxt
zVp%Q~>uCLLa~<XAou`rVxce6CZXR#N*5_V~{{ZGrUxTNEWSHabaE9Ttny)RMo?OC_
z6=Oah2_Cf!YeFOewoqCcVxQ#=NE?Rt0{Kq=80;Au=NJ$^O7-=FZ^Ot)3FLP7!E;!g
zJA0MGCN*^ylU5(B*ex&LasUa%k$v4@TFfpx0eH63V@10)U9p<eI6VH%5-o`dUuUGk
z@Ik9j>$YCWD5Cpk+Rmq1%y4f_JWWbm%kGF7Hny*|v2vV>UznqQ6cpJu-n{HoE*l&=
zm(N8Ib-07`lplMZ;X#kjRFbThtei#LD5p9EWh_K&?A?!$wBey(ZOKOV5NV#}HAQ4h
zqh!&NjoG-i7EvEx`M%mNr;JN$ZS7+lyJ=2<Wv@_^y#Hg@@(&v!Pw?gh_lwUUN+0=O
zay`;epGn$rsj_mJc5twa&gq@$&%u88($?Z+iC2p%B}r$c4c1yU>6bh#JeI+-@68RF
zZoyB0hHE%fu){dz^RU#BkkWM=Zy;>AX2=_L`vlk>{dVNsjgtF@N~gFj`_+W8_XH?V
zsGIKOZaD&XVZR-TKX*M2>Fj<zXBM~_sIgu)o@(1)MKm1^KH^$qXYF%@<zqJ2x(oGH
z!NYQ62NNowjs3Ze(ddzdQrmQL{?iiw|B9pJ*FdJ;t{9IKfWnR*Hr#wS>x>`oEWLuu
z;U*8@6KIXQ)LlNET4<N-5wy!p(i4n)rme8;Asak=w>&4cYxt^T_r;C(meKd`@LFB|
zJiIBylk4!Q&Kc+~%}d9ug6m6}SN%<pVxlb%<%DX$eFpWqI@`%h<M!9)@Szi2hSw&(
z72ze#2p-75!)sC8(4-SUz3B3u?>3wo);8de^P%!Z8*2}Z-{^ko`Ky7)k32$MhBFRK
zUW9P_568s-mI;EVF$kG$J$5+btTz>S53lsN6ts_{cC4|*j)oi+z$V4R$0!%0Y-)?y
z7(YOSR?3&VcnRE(oT)Z&thN5Ve@Ht}#=N{RMR^-=hB?(8kKkx+Ath7TwY2@t2XtI{
zCfJFViTEJNKk~q%4`E)KM+;w;>M*iB#EYmg@c1+ALB=1&4zl2vb$5&f`><MvgmxM0
z#$+PLzztG$IEP~FXS2lqsHXu(#1B&#`1tM#K@Q+_0{o}y&0_+tt_DN8-@N^c>bt?+
zY+c0AY!SMtEkC$YJV`o5-g+6NlIM6w^=F$tE6Oi_tgBpH?#H>I6qA1*9r&%qpJhyR
z#qjz@jdJ>26t5i(e1H8G*79MNbr46=_&1|438m0(XIwXvToVUbC;em>yF7$k9jsx<
zFRvVU09kS$?eO;cqRfF~tbVdJR^X>DIlD?De7}`x4yI5%d${<-(=Je<?alDA7sRT#
z$1yjvvb@&MrYy!Ur8vpa;mpB0Xe5J!{&uaS%fYfDTN7QE{v@=*<x6DrIpxP~4?8^P
zS4APui1BnaceNJO_6xxs(HRTUZ@Z3~`U4&t*(URGtA`op3F^PZybvO*fW3u_XjZ?#
z8aaW5I|0W@`#}RkgPi&K>I~ysC4)0F_cdj#2|qieepZxXZ_Y}dk+n6kn=^!R-MtY1
z&FizR=#H?w4C~>8ntR6uf>o~E&$jf}#n@O^Yqtvc6gr$MG7Clz(yS}CZ{^IXcrE%o
zlr|2c7%j_~fk#3Za5Dik7)f-}JE)*`#b6cB$5ndYmT+h19CJPM=y+(=OB$sMlU30}
z3EGU9uTghEgM*TThp(fH*E4&^42-T9D$8lGu`%K^ip){BE>2{PhuxPV_l=6evKJ$h
z>R^!&>WwPUmgcl~u-GJp9byMc)?ixuy*RwAs<gD9(6x?K@4<~^^$6A*xBRwh^jTiS
zs^oq+GV;H=#U75MkflP0*9WqHS2z!Kx!W(61iwA7!BH^4qD{tU6LHJ=TG9j?Qxd*J
z1C1gsNGc%kpr|x{#>XO<q^EWBiX&ONWj;@o*|{`(zKu5nIS(od69jUsuU@!Y@16NX
zk+VVIDj{{3-%y8%#J~w)Yb^RTP@>=8=M5CxsK8pgHSuwuxh2%unV&eRWaLX8Tl2z;
zG6?T2qRXQ%C>@fIW3US7%9+xzD$q29$gYOYg%T>O+_#3&`eb6#DHaph{xKGz@;=nk
zl)to)`1WXmKC&W6p=n@GosE8A+@7HXML(J#xBBuLx7t_2XOG&ZY`AmQ9)K9TRXEo7
zrP=bk55#x&*RReBQlm|$Ss4&%AytAOxX_Ur#brg+Rmq36x;3RYvhJN#uDN6QkO`0M
z!@;2rQC?GR4D4K$`^*sxSW3nF(t`nie9et<kW5OV`d~!Tr_U99Jx1{2wQo<oIU8iz
z73+Y*yEngo_FN^&!-c^Dt~Hxi$VTexTz9@P{XEtOlzp?*R5<YKejpcavg9t>hSoqQ
z+04SLRM7Q9dwdfE;$6q*2gKw@A$ds&U$5@4jro0ly&su%L{io(rgtleywt>`Q1n2^
zj1Jd{E3;ds`3Ymn5DFH_z|yvETuW{Cn;-pf$>9nH*AMr%J@glv0Qc$C^H^^uE*aaa
z%mqOwEF2Uk+KaVux6JmLa|#g|*G#4p*OWhX3MNnzH*4-*UAUofHEN9j?7#5QUp(A6
zX{I~Ulg)8d{{TUsKr7=EiYfI%UN*LOT6BiaHpaV<edWRAo4(iuD3J`9*QyPZN9W|!
zPk?s_I^Sv^$%qT1M|b_@FR=Tw&D>|)2poOG8mNf9ztY>b;5D1S7Ql<Q`=g)H#>FG{
zQ13DQOQc_=blTEMZL@q{rCz#U>N?4k4$``9jVN#4G`}5V{7pBgomf$N73b^R!@Ky?
zv!?Zgqjw+%i@BM$IoX~pPv3C*or@7&e`Q_4W`ULK9xU(JMDLXEZ4Z-2p&*O6(KznG
zW8RHu(L>_~+4^F6tOi%#p*OCWe`sK<GBGlUdCgVO<VyCeYmnj``QCkLmM`+6C<pY*
z&zO5#Hcvd|9j~H3QPHd%-n!yI|J7;T*+l9Qle8=Y(}ugLxc^18an9;wzDphKi2}iW
z+yFdjs*==2taS&zE+scYerOqvj#vJAf9=TEhU+_IDs_TWlL13CP`tFBSrmQ`sZ$u3
zSSY|Qd~sHxPhLe)VRgL~lm(qdrJ}XAMjMk%z(@$Z$oQOo$vq#(+>L9kZ-(5EOIJZW
zgB7rF2%Q`c(nd8+Z_ByXVn$wo6x+$)8CBV91qdsl=^)pxM4&^bCL@zuA_X@5ugq)`
zFUy=E!BiSsG`M)n#0E($>ydt1yi5(;RD_)dl*1S5ALRAZy)e%;>U*IKy-dW$`~
zL-~))5g~ReiMPI7Zt*dXy=Ugs2F&?iLE2+iXIq+T$yX(HbW)kN5>qlwuex$j`<U9P
zC#I@dS~vI3eYh}%b3C9RR3h2}2{DCV-<phE+uT#jurn*QoqIBV`509ykgh>%_Kez@
z(oeSoRs;^)tEES$pep*SAU5vrS16`&gPU7}iGERqbu8WdjW-z<oVFqu2sUj9q?<fz
z>1wV(8D^U+w^XmA^*@ZP9xkNKQb2TtmxZGd0x1BjnCYvLp<rS?2fWl>i1DHuAYa=A
zD~8m=380BbSBI*cvX>Ewsn;elBABn}aB*IEaVdforSh;Pic0fFkLM>rnx#YMyeuPC
zzpRzBs-8}+?CbH(D@=Mc#0uIu-Ey;Rt5e628JmzAIxLR&ok4*fy?n709kT<C$lbN$
zd*_g2Vi=xk=-2u%r`kXw^l>#q$JoPjTub1`wM-bct;H$L`f*4jbD^G5UWiy^TfM=U
zMq*;8{nJtgk@Hfej|<9ntS>9vO`Mt{$U>?%$)>=f5V|2nqfhwOimI7SY0$}smV)z!
zRx_ocH$ddupx9@<{KIBRj9XM}xXIjWRnnPb{sv_|{+c#?%nIT5sSKf?-+>*zTdH*=
zjb+ShJE;tg4#cI3E`uo(7dlWC=mT;$!-fy`zVwg$GTI(8B~0lFJg~;Q(N4M(OR~CY
zDzTghWAL|mv$RyP7(4-qI00zpYPJp4s&wLthU~6KDfHtQ#@~6osPzmUH8MZoR|G2>
z1`Zp5!vh6XGoa6!nBAZ%FcQ1N+%!+N%v!seszkd!-54SAsLJy?%@Y8>u}fn&f|%D|
zbSt~j1pm9ov5OMYCaS2D#;`SC%*~*ykFT9lWk~x4*T<<~ysvruni0(4didzgZQPF<
z#pJwQLR8gKedbRx)ideWjBzO$my7$<+C6uW;(fVtzJxmQ0@+RqROjgW)_z3Mk(wih
zYsLjZ)^dOAP>uA7rfu8&@kep<*n&3N0`i#A{4NsEkcrfBwgMwY8cB<wdV;aAGXFSA
z<nTgA`d!R7Av^=`XeM()K=p+xNQx<^n;m1lrP;v7pz*y*ZRU!+OpCTzBH=l{q|7Kd
z=ULI4LRZ@@fvb7RfpnPrTcEGQSaGPsOu}9+d`3NwZl&TgN+!ypd&uJ`5!c20dB}}(
zHNB`8STN4?FVE$g3eA~ZI{`lX;~}Y<5?rWkwV*UdH;M<u6}8#8?j$0)pGv=TB+*|d
zc|wl})HRApCs=Y}5A5^OP3EcZbeUc#%wCmIjgWJ<nyx$wIVW`EHILof%cBOD@FD`y
zAW#;iYDOo{!H_6`W=YGb*)T)3<T~BFV-F-)k^)Dpnmf_Wx?U<%vQG~{vsu;CwNcDA
zp?L}{^;p`X?+Su;iM{BBc9QSnvLQAy)oV-y-?oQeG(I*fGJ6%|E|HwdyaihuN2*Vm
zHX3v38LQWG%U0i9Y|xpSTp1J@=lkMtgAub9ZkKlP>A@}O_V>Wyr|F@V?r_|l<awo<
zryhIp=?S16JZ98paY1C%=zws2C-G7OTSgc09-*7iZ5`ZF8?Y9#CS`_#tI>rN2@mzu
z_i!>?TP=_txql&So&l|`XBqLeZSG53v%^;fBL7<h#hM6k!=ZRy3`P_=tC^xwRhH6(
z@PZT7e3)M9X-2llZJlFM8@4bfn`=oVa8Zcc(%~sP{sgn_Kr_gAznngzPmorrc-n+e
zr23fILvY}gPL=*1w`>5>3z;1_c2FrS(nRk;41Q2@|Mh$uaoawO+Qc~EJv2-+Mt4wB
z2LauBrNCC4T<>E6c8EuA<P!C%jNejc^-^wOw0^uVz9JGZ*{GXjikq*5&nE2cbvCC?
z(55sbepyCstEDtnsi%ZhK$D7`SQD=)jmLv;0xJ?Rv))Hv-TU+As0Or(yjf}|2vT3e
zI0A7?p$J-P?KB@NaHESe(7aGpgD*YBd6dx9p+@TQ6)my+vM{E8^Xqj1&B}z9u4Qv)
zue>mQWFmtcioht%f}i+ihrb=Hoo24u8Vt|2JtM0>a<Thx_2q?xt2T}*mDhrvW`B?d
zU-u?{l+fH?%G452dkZ&r{jvx?(%k~4$Us}oK#1%Sw5j4--61(uOrJ&^&Rg{tv)En}
zy2NLuc8;%u>xz?^(yLvj7?%+)V`O-^zt2Z#S|jQMlBdZaG38U!M?3l$$D$8KV=N@I
zy0V$~IU6MIeTgeGDc&%pQ{c9(1^I(Mpc@*Ap}Wyys*A-RUtVlz&(Fn^3+fZ>ah4MQ
zE`#~pq}B`x@@)Mo=l(r#Gfyg=Ds{7l!QT`ei+G8wji{2%91(aFkcqIxGQ=+-#>d7R
zEhHJ>yo7|t!_lflZ4<8dF+4|N4iHM08fs`r19MnP_9}uh+-@31cTGJftJs3oJ{NQ8
zc-L*=x|=iw2hY%e6IKlBRAkI@cfjNXP=f0Z&TXO6*gskE(R6xUNRaqAHLa(_MGU!g
zN17>j&s!9q_EX72$|;hzS@;LW7Q)#jyam%DNPV@^tYAaj^!Vk4!NE0qgy(YN4yQi<
zrp2qM_fi)vxa2%dHK3zuxV2O?RRpt}O!g-Ld2t>^O9r%WTKvjzl``QvcS^uKiDg;o
zcI^<c-uEaC6_TzEhe_32o+hMS5Q0Zv<vbLgXD?;MYkUXTuP!p~F5+f8x^WJW{(B4^
z8axdnvfoFplp=1qweSxpe{-i#RlCX()J<6~uZD5x|KAL5?3J7EEAq6RWL`Ys<=2xF
z&6*n7Y?@4mK@On$k_ugnKpwsERX@?yc&N9^)0OXW{$=ZltM`PYIJVb1A7+{X)Kt=6
zCl91{MCTV5n%&{{bz5=0s?9F6v`03Wi>6!AwzUXDR6xqwq}>3vvyk;5)dsO|Q<|hm
zjj`Fv&3EMqud<j<Q6BJDnvR%HIpWWxj@IbXLm45$c*xf1rO%djEGV(h&?Yjc5}@0r
z!B5y7nKcTTzB_G|Cd)4!Nn}_bOLW_#6D5|vE6P#FUd{41MD9%n@5-JDU~iXKZ!#F4
zu}kKUis@iY;`=;5f2B2ycQ#(1WG>SYbb+9C85dLQ4iO*Mo-{#++AuVB(_aooBt3a^
z&amFwdDOvHEsUg&z?RGqaQ2qA2T->2G&U=$Ag&myRqcDO^;~s~T@vxM<g?lH-HY0H
z2rfE(>Us6bzuG4+wALDiP-->+H*Yf9no@g83}EPH@|af*R6~pHT2BDQn&NUxY<KJ|
zS-~~xw}i)YR!sb+9-Zy%wW0nmw?<K(um~`r0w4NHv#;kyusED;H96}gG@o&UUq8=4
z-EfJ@=o68rG!qjWX)DO@NC91IfV=0q;X2G&@7(>&GodkvA&p?`c7sMo4zKIGL?baW
zJ6%_vp+6uy4ABlp*0${6(*7#tnH<S`Tk?%1Sf(C}7)_B~(?-Je1(6Hp4`@QT$_S)Z
z6hDH$z`_(RE=l6L_dEsO<=YlB>f?b#eyy{k#2Ir2s(2NV5Fg9!5F2=PkGlbwZcmfi
zU;a2>%#_s-&CtkxdF%a_+@bQ`X82m@KuLV{uG<^xkP4+2UlBpCB-YGt1GeZ=)U&P0
z7zOIK&n=KBIj~<gG+U!*PhMbfg3;6A%3$Rqoe9f06k}Q^-2IA>?c7B*ariV0@#%iQ
zf{cGaJ~9xVqX#`#Cs9&*$E1y@G{K78t3X{0UT~K$Y?fsPPyZ<|c|L2%Lh2Q?@vJEe
z+H^)WX0)!N<fZGZ3me)R)zI*)sp61LrruC5uvdmt!LT4RUgTK<V2R3|)a&)vdV}#e
zezOjnHXMi6H?%O*zdBSJZ)Uhr<KVy$=65ABCXRuR?kc0STudgx_bP&;+e))k^h>_P
zAEu}np_iQk2h}^7H%*F5kHlBb`$SbvC2L;3ohH$Cjnxln@~yzxq8a4udo)yEOMr9q
zDL^wrk7m#|Z!tPsnjr&S6TDlCaAq|z!y9t(G*^^_t?lTIgBVBc?yn+hmLk07{GPQf
zrrP2g;3j=}SHZ}Iz~bPBhvqB7hq_lks`MEo+)?YcZ=~;M#V*%g2CRKRUpRt%U8AVd
z*Ow6~rFoxNOF$C}*#pP+E(M*qA678k&R8pibu(bz;|n$w?!5`To7RF{Q_WULO%NQh
z#)XLlaX*cyU%MEI8_+R;m<X=)2f<$KEe;&+ee)XpqW^7MiR@b6SxehyrY@4XJvnyL
zHuh>^o1aA<@lEbDodB1CgUKdHp>DLCvdxSsMGF#suXGFaBr9#b;{HSu+xV`|KKb>*
z9+Ngs;YOu`_qJo{Tp)s1c~MD&GWQ3-<YJTLtA`AaH;QgJ6{nrE#nKIUN?+`c72$sE
z!e3lf3rYM&9{+`!ul$a|t?d5#K~HIFwX}EQj`KPbU%mv4mO3#l=5eK&cGofjJ2|S;
z0jb%u-9+dil`1FZedU6r+lfs48ZJfU8QT1>)l{y<^SKe5c_logb(yV<X-6>5+*qH9
z=G3{-d-n3%xL|-yCau3Om&FJ5@ku6`XbaClvgS`1)5&&}*uME)jJ7li31gZw3yE3e
zl9pd?e{&DOk1``-Bv@YV2~;X0y!{i+fJB!AT&yPnxF6p&VHd9&CIKDwcWljUc7K(}
zh+A<6bP@Y3MC80JkIqOHOO#;rup-Y{jdgzoU(jUB`Qv1;RSFhbJFj^5?PvRs8TRk*
zlaYP}0(#`-ag(mh_g6?43DYWs+y~;Ka3R-M?k@v7Br!->JN(ZkKOAYYVGSPS%^8WA
z%@?Yyn2W*eF81fRfk^@x8@BT5<Ci=A5zTopKi7{dh{C3M@x8Up8htNkr%(I$bzKo3
zGkEC6kAEuiM_Z`1wExKBEKY%sLlg$^a(=`u4+o=dmR5R46Fe`wPZa&_dT+dV{EkN}
z5WXD~$VbD`<=P^#is+Un9;WD5P`4qC{9XYb0_`3C#fq2nrPl&&Gu3LLs?sHJh&S9U
z&YO3UKyE1BSZ|p=t%y6%lOaMhZ-t$qAZDi_&4w@%DzNd5A<*T1$Jm%^soX52L6I5Z
z4fhxR46_UfY4p9lk7{{+{1W9bJa+_3NRw@?J6`z4HtJr=+M5mCZ(wdgsSeAjW=_H!
zeI`fJtgB-*OAqvSs-nOXj&<XuvaX%Zv6Rr&HApLlmxC2KGz^PF-yx=dJv&M603HFG
zA=#e=9Oo~>tOJr!(h8IFHeK67FrPw@*VuZCnB9c@R&)Mw9Eys1S#ole-5X^yl4tR3
z9joKN$pzX^5%PBG8gn<yBuJGxqS}U#k$~hpQQ)~iTD<N3_3_zS3I|w$YQ)`JNF6v|
z^5jU;&19BgOT&_<*s+^EJ4bcYOabLwE*dS|E<98FO;o{$Ng>#at0vNg$kY!C5g!fg
zWp-Bb?Qe2CuU;ejR?kJ-6Vy!3OHO-qd=6<b9T<sB!~L4x)F$<rkPQ1^PPdX=E-&i#
zcz}|Uc#f^s#36P|bFFi_8w9Pqg=Fh0p3$V6TpHXqSo$z?`*z;>K8skVSjBfLOwS2L
zgl#Sh={*Gw@E2ylkzk4%JQD|Y;XzLd-8<;~06Q1)+}ej16MKwI@%S9UJRMs2nQtN@
zF4Py0lE{MT<(w4KW^2#c%Pc;TbNU1|z576pZUz>Xdd2X)$C8s%#tx=~u*_8c?L!lN
zqnl)L`-*wlo~9$Jb%)svkznE+Y_+*f_G%wGzC|m4fLwAgsavd{i7P>ufWnm<?`~8~
zru8Ri2ughmmH){hsYHPfnG`mQ#Lq*`$3%R3!oR0RsD7s7@{2ZI4<p&@cS?Nbtb=5K
z41TCFqBAa}XqlCUCPZM3=jb(r)09=1@%^`oHqaR<*>GJ&k?@CRBsl@HEj5gT5ZpZ8
zj6C<zk;QNGcx!F}we3IV@vuAAl<VcAG4_+;{?b`c)rLc_@Qe{v^LKWGy(0+2>r!Ei
z1AMM<t2;oqLT0>L-~XU}g=V978f02fYW*1HcPrpgeArr@ogZu13D6Gu!-5qyC><oH
zv!7zF%;ZI~bC+#ldRCJ7aZC91#UokXFBX-TN>&tHk9*$A7q9G|*eyF?=7d{YWfWiM
zIxw&54C~H$0M)!F>i+<#^2(!@KYa@YcRl{tv~NTzP%M7dV&f6Rb3HT51I#WhUdl}I
zk?Feh)&xSSg3(`a=E}g`o|~mIayM~HTA#J!odAwQA2Y8>T(Ep*qWjM?SpY9?{O9=O
z0)f=*fOWDttH+d?e+FE$<8a{l9YO~ZldJ3SMXXp3qvNq~|I+|kJd#qp7mF8maKGo&
zVwz36$+JLF7Lv9Nd99`lsGOIh8R0NP4!)udY3pWxzVcgthR6HvZ4k{<1Kxi>k@)v-
zz2JR*Sd%d4t?Nq&D^kgEa(Qp0JoBN7-{K-RwP%6Jv2T0J;AS31AYOi^;D5FE-ce0$
z?fPgC1eGQry#%Ey0!r^uQIL&@sEE|4fRqT49%?|OcTm{Wpopk6rI*mVNJo%R0)!$Z
zkWfM(#k=-*?svu=cYOQY@BDt>KKI;l&mS3UjJal3X5Km1TJL<{=XqWL9PyfyFj?9g
zV&kX7W<QEfh4VO1f~%y9-&lPJw>3TW!<Antx{aSy?uSU9hY+Gg6%cgAnsDkFhN%(b
zDiZiNwY-kCD<^L#jQt3f#RPH|f~3wgnm*nuaM{#*1$uQCuNyU*Q1toe&M~up++8Mr
z>n7OFkT+@J;r?qNOW-e4b@?az8yA?1&!AKTzf}h2Uip2~iY5DS8K#L2*`FaBL3I+E
zLo8uKzd+WyTJcGzejd$0_;pN*XEgOh8;4c0t$Dd4!7h=jcVUqjPNsJgE$T#jk<9^+
zBO;D|yI^6P7LzCS3p9#9)ntb*=!EY0fH{$yw}^`L3}^H8y?+Sa{YQY_|HU>2k^Tl9
zdq0VIT+42`^mo-p%63wQ6}L6?#~MlKc;0&-R@g(U={qh(P0`afH;L)Yit>&LW*M|+
zg%uilH9$#aB6twpO#cPy<PP#|{!&*n&eNVu_MCvRM&r5)=5z;I8;q>caX@f^#nHc|
z=?dQ;Y2kG~=#-vctPc#x#rn@7F;6PN7~$R8p(*z|Ag~tob>}E(`sWx`{evaa;c)K!
z_i}pIs{b#LOYAqKGkg#Wacsc#)4_86YrF8$by-FhQ^MREM_Sq{qUb>jg+F(d_FnEX
z?M;hz5G{QpNs$~>u2?kCoB-(vh{zzB<}T)1{Q_;;`JqU^mjKlPsrX%zb(v1f!VbC_
zrFJy;HuTc}jPYM*M=XHRb)Xe=3Dvx<v2(8DzibIq3<TEA(0O38cXtl$pr$CqpbJ%<
z;J6Y_)bF%<eGn_$sceJrGpaZrFdVnMQ?6>Hb0`t1fqOXX&UN}zJT13v*~|8Ux<{y2
zTOY{2*=t&`1bkwRFPZ8>eEOjwUddJ6PGqEXWo*)yY@fJoj#w5C>&RR(!(OUo?Y<&Q
z{>Qf+Fz5f3_Z?`G(h^PgSw0|%iT^Fi)-{+<|67#phjov%8#htGoqg<U(>v#-iW7mK
zOlX+BwV0DVHb+m<#$D=QLOgwV)HMz3%J1RYEL2mG^jp<y_Ug>q`udbD8H49jX?U(4
z;R~pJ&+r~`#*q(YN|TG;>fpz-Z(Jv^F4&gA)5rq1wH5p;+Sar>1^@OxJe$uk7>kn+
zk<s@ciDLLl-AK6<>H^D=+FYLdl<6G464{LTkI#ZOfBye|0Eyb61{_}T_AI)HGQW0D
zmwsYx*IOz15&%x8{5CuN@D~V&#aor9>bYNT!3)qYuKEef=>gNif2{A_uuq&L%0KyQ
zP0!{VQ%jG_;7e<jNNBd3M*uWl2Zn>h&WV2}q(>}lv)U>~d1r3z&j$7T2YoT}`(!AB
zbc%TC&NW~6m_kfazG&@nAG9U&3WlmyOC!N^5MJ#<FQAt+PhcYOY1ZZAW!c(XKX?a~
zlsC+~sj6f@M*^vPU!tjDLN)leg>qF}*qsaR@U7=(dMQ`T?;QKo9eVCU1n5cQg}?bD
zfA}PS)yDzlSEl_G&9bN(nc_xlF+<07T27XuwI8!jMg@iXPG9v2c+{|ax!Z1W`PiPu
z5FtKr5A}REmH>&)$T}9}HZJeBy&Bi$Ne*bYrZDn7P9JbDk9Y^f`P<hd=HwKK{-2PM
zp>iUkGd@%#4Vl2g;wOp({{lgA0B`2Mw)K(wjDf#snC^2e8nWn`YfTJ*neITH36XHc
zIK>3%!hDZRBljBn+3mz9CP~{~fuBm=3fI;23oT;K9n)x&oadALv1!WaEMK=LAlOI3
zp<Flhcs8tN=CmeNT$0Np@ha>JJH3ZjIIB(kKgsuE<I|^(ms#jIh_H`?u%HbgY$#p4
zkFebtkaM)(^!;5{Qj}`(Oz``~IrSTPnE4N<M%x|ZOF|hX-PPGg(5V5#&*+xrvgGC#
zCvql1&3n;E$HLDUKhurj`*;@m{--^Jz-3ob&=PbC<ntGzQ~y9Ta((N%C+<|fqdJe`
z3cB%5y?ek=KN3>0;Xy{4q8qaNnV{Cy-*>J6#MP+Hty-pXWX&pB4lC8&(HN>ptuJRI
zCuq4sc+iH0eMJMurkw+f6K)G<E-IJouDd>N|9s~OMXRFjuqW?_L@a3F*@dMXHk>ZY
zq)dDu4e{o4aq4sB_&u;q-sSE6#mdMCI8dj&Jn2XgfJH8y^*Deu0kG+b=MaD3fEf}O
z;%%Xlnjt@1>;bG~M9&QU0Q(DM*-Bdl4ltcicE0v9qt-#?;b2chPs?Pi@TqzkMvbAX
z`+YdEUm!a<4QHSP{4NH76uhM+(Xa7?y(m~D&57{<g`}12Fubt_Q0g!oJ&pu9JYf?&
zv>gm^K65zdfCH(r7c}xeWB-4}?xPrwItRRFFUyzZu(c%pxrU*am9BW)fUo8@df=qD
zsG70y#a;FeyLFWFt9YN38+9@=(h{R}oMc0MW~h>#I<S}slVj?=0Xh8QeQReAY1SGN
zMP$Z>lp^v$cNW_2e40fK>h@^W@`zKGk;9k}XSJsGyC=VWt>voO+AL4KInU^aQ>VT%
z4TF$0XOQc<C6k}Z;f4Md-S9(M6tENas`~{R540>dx5NGh`XeIySCadM0Oyaoi?@@(
z6;22*8b&+jjr9);|2BF&chXG0H~T*2nv<@X-q)KwVop}1j~U*M&p8lnB4Tn~K|XIc
zl7ns%Ilesq7NqX~?&n0h+{z2R?`)W2o7~J^ZJFwR`y7VBInZV2Ki)+J8)Ho(OKic0
z9srm+wyiR}?ghj2uoreTL(nGCzXz18+14kj$qfA0XR?xn0`WYS-F$nGyI)VNE1jfi
z0QF<vraV*wE#<YnqG<`)h<Tpq`5<xlZ-~*V+OG_5Gj8Cnp;_u}mVMq@f*RH+d(k3M
zae*j4UcJ3Jdtq?<1|J<ta%?aPyvW{2rcTz?KO<rI$uRcF<wphvW61$r;VI<U88SF9
z-!|wramM#3qH8<WZ4ZAz`={0EWXx2iV%~%l90$K#?(lR|>IPj1G)V?R-BLXBrnE#q
z@KF*>b-j3c&CZ;dP5(`%6-5!*g6xY;z`k45j)wC6Niz7)lEnYnc4ngk!@q$o=b_vx
z!FEH7sObGTP4D8LiaNT5n8KGCTN#&TtcN}}D0hC*`ys_IdD7sf+Ll_4#=%r&t;Clh
zjUF65&>|8seX}ZVu{_2{b=*FkzN$Z5`hKi__i^I+ec!|f^Fd`}H(4~lnItnSG4z#A
zM*ohZDBc1}N;AmxfB1?VyG$<vI{}54Cl6+yL%t8!+MTmc+H6v$>oB1d3eNum-Re2Y
z2}aROp;#Y)3uR%gZRo>7=-F8v+bM9~%BKs^ZH@paZ+wQ`*^A1BE>1MYf!6VNkhR}|
zOVaa0RzL1GlNj{O5!g=PC+*G=$~zO}S|?7<&$5^`439ksHty1@`sk?BZJ1E{*fGKQ
z^MxXhHjL5KXwzQOtK`er=Qxo>TwdJ)RoPCJSkM-tK$M9&myYcS;UmW^2wsfmCPO>R
zYDTAR;ZNMp{@!-}Y;uHn(gM>RmYkqx#Eh<lyO0;V?2N{Q<~RO}FoeulS#^vw_pHC4
zwS=^!quOAsWS>;U$cr2v+o43g^Fc<VKCHh$61ND3k=_G(0z=biixMTnkEWk>;#|!^
z4_{GV7=fgsPWJVP!YCT#{&v(!G*8jbNiF%orUfj2OHA5tfgdHtN*6seD7@%t!J7Q+
z7Z#uLGzGGNbi7jBqD*|Xe~{EX_RJh2antr8Ijz&eRPspVsj8{?wMTAif=urxlQcDf
zvhj{cS4CcOdjG=3<}-xITpb{wbFu7Q7<^wXX|wjyd6zIbu5TCePj^UF7>vY^dAB4&
z1yvhSLO)NF@3j#&@=oqx@1flutZw?XE%J1yk3@$ZFIXS^!IY@E*wc=kTI~{JuO^>q
zNEpmQE}V#ZkaWjcDmk-xv?TfWIPr@Cd`t<AU(#%`Q6YjGLZ%6Nmp6EZU?hluk?RpM
zLo&Ws;)}7@3bh`LwwsQc2-S#bap4q1H0&-EqQZmD7n%Y&H4dXvLxnFP?ZPK{`V~@-
z$EOKxpHfTsHi+_en6xC+F4o4Q%@>md$@-mQA|uaqOcox#p<Mm)T^ga_kf%gAbI;xK
z+@lI+`}Di0=Svm|)@0)z=qb8H&~XAOfg|lBBEx_XK`X`Q&W_!HdvcZXj!5tH-4!^`
zW-oF6g=V=Y8A3Sd)PlsI9dV`t!U8(_i!N>%6y;m#M)(r>N7v3qI6h~su`{rbSZ+A2
z0or=7(Rs^*mhm3h6PmU>?)YyjH>vU$=;H=us}8*W)4H7Nau3Na6M#q^GtDr2^$Yag
zo_1Srh+b5_-7t?VF3l}y2q)VgdaWG-sEs>FHl#u@<N~OR;uLLe<fd$VrvaH`O3KNx
zV?fE!b%9sJ60(faiR{0yK?kI16w$#r_6wAofu`J+Yg#wD21vtCU@mqCUu$A-F)H2A
z@hg#ZynAc2(O}Lq>T6qQ2V=4!3(L0mc06u|^UY5^mW|PwpsU39F|^nI?4`Mpnf_uQ
zeZ;sI-Us$#1cXJpPS-x@HsYQ!1>!sRaI%1CP7Fbg-lLsFGpx?qR~_;=n#`v95BmTP
z59^v0DcniApd4Xg9&TIpB}&W@^A>G!$uLj7&rP*m0Wq_Y!vQNHK;x_6VhHzsyJ5Kf
zxJu*r?zt?*vQtX?7I#loB%cEEi&xJuG~H0rWXef2{7^Iu5RhXZ|9(tYISIOKtUkMu
zcqm$ogJ8}L;ndrw-NYt4T159tq1_=DzGoN{wq6FQzB_SH$>*19TB&^T@^!(?LEKJ~
zO(<steV7AelQ!(&Wkd>9PHElZ2v$NuM=Pj`MeAA?k>1(5_U2kE#c-!H>RG>siBoLW
zLv`p@)DN~5)At{_x8ej2`AKkET~!;w^jb6d#hgq_ed5kaza{a-&(xaNj>>jllY3O#
zWzC{7AWQuV+jly}WEt_C8cs})a2mZslWV`F!h5^5kVnncyMA5Ui9o1q7iUoTZbPW1
z{u2V;&WFiuiF!2JIgwsbU%yjkKEB+c6fP@y{AOIhV$Vy!Ck<9o1cKz8iu=JVT~_7s
zK_-Rr<00VDJtHaf?*`HC3h9@8-wEfq`8`G};b*Ua4a{sz+o5rEG4jwty$_00ceJo+
zyoNtLAfkBM)^hgxmGqNup9_i_v%S!MAy@OKpr!xp`{<AU%qB?}R|U3nytgONirp<!
zErc2!(`6@$J%#fc<u?8hAS)Li3J&!rV4b`5;7yGQueVa|;WCWP9(KoxtX*01<y}ME
z&b24)or<T)eum?P=v3QEQy3}1?2^9v<<8z7Uk!DUwE8Qc@*7S-ev7^J`?KF^+4wr^
zmol5w5Q-orhqm?VPfsuu?9K0=@rnc`3#5(bwcNAp=nT+*u4*j}c}j8TTR2I9fSbER
z0PbcX<DZ=X-60+%sU$@<^R^HbqZO8_M@y(*REP!I2|E{IJ0B#eg9Tx84crB<Wz|++
zKl-tev%7Z`J{=1L7f7QM7Xi_uqHaE4tHPWNKR%KL*W1CDznkFecziT&UuBOE5t%S3
zEp~-4(Z#7+^%;u~-r(4^yVTw6i!tj5r>mTTI4{guyO}K+l=3)UmEasFj5BN=LA#J_
zFd^OacMVA?ZXaDee@+Z|u_ZSRtgI#!n9HSN8@<|br>yfOeY$FudvE%H5?Wty%q%%j
z>jeAS7<D)1v0R8mXp9{T`Ko^zV&zEwMzf^tbvz8o=APc{vs2jq;>4*K?oTbZbi#9p
z<3V?cb4~fwD@0SOWU~TsD5`4dg*KLr9M?WvNP6w&@cF%h!s2zO$16);d@xsA&u4*-
zNGANZ>&6$x-&Is+nAIe_c|mA>f77ge*}Qjf0&{Znd9bpD$F4`HHVD*nTVwNb&wEp=
z)u78bdAa)pQs(oyb7MZ54rAw9V8#C2s#)BzCSQBduVUi30q*B>mPGHU=)K5!*P2@)
z1Ed-Gb{<j)BsiW1P=<d4jl8Y=(~~TJ{;i*ef+R*6!a-TaeBzr$K^-@ouyA(ZKH;5O
zRuCq*YY7pvExI+aCO?e^z<SpcM|FzG#nF0_BS%P>rPC(X?a=)W7<l{-xuaz$wsI9@
z9<ge##ckg87B_SQ!Ere;r{T$fQd2GFby?Pm@g2x=+zt8UAwQ#FS;jH63m|3E8p1#B
zLvvr<Y_NT4_iXg|#ZCv%r-JmSk{3QORbB#d!&dR?^L}y7=MWW8k+zaqDi?jkUI$|u
z`JxXQm<&oY<IbpZ`K)<tcGfFNw})|<t|dOMV?;n2X0KF?qa9kD<M-_0h3ovES7}pt
zapb2{`}eODNerI?eSEjJvsR-vu?w|vz1wl>`hlOZP`@_Aw?v8iS9(7Fh}E2D2vr0S
z{7To_3`WvY=9&XWsLCJ6lcX6UXS9#4(z+;rn;w77Se15MvD4{(pJ0i89~A|I3CBc%
z#b+X+Kel^@_qw9x+~@N=A{YfXVnIe0iAAW@4QMWK=SLR0QBJRenXn7xac{^8rIxN)
zCXpHHQDqX<uNAhWrvfIKA?Gy4O`G~ZKl=Kd^DAu6&Y8LlN=2{6I=@^>of@($n4AmM
zlb_H#L(Wp<3JMs>AKmuRRG@RYU^q~LB)^^VtyIh*o5xK9(Ksc?t1>BhZ|ieC4?6NL
zk$ngcC;(%H{Ca_Dw2JXPvfXPT4z>||PamBqaMxNB&&T3_HlrV&Q}9N%fYBGW9Z`E~
z3b(Vd4-m2QUcEnOIJY{Phsp&ZAe({3hyx3NI56Fvp;c4(n6FmHPV9To)k1b<v*xiE
z`9|*Y?Qb3^u7E6gV<R*K%Y)uud~BrSaKv`+=l_FX-QN;||7;(IRBlku-RE*`bO{*u
zI#>X%(y>t2RhFASW!tkTzoe}Cy$e(Q(dKa|A58p_XJMoJ>YRgyUELBVz+GC@nG>%k
z*L#T$rl3w?oPFJj*1KM8qO$_atEX|~zC>1;Um$xpO(mag_v`$Q?jbd#X?IdwY+THO
zC1raEx4)fN4<Z2@KTf&u-h)-*(ZP^SXy6@x8!^KcNYZ3;z!D>%O2e2+{z4ctCU+4i
z=7f-H`EOcwh07=NM%w$MSBwl;k15@H#C=uLyREIvy8Rc3f&VvRWZI(QX+(z|v2-Ge
zE!lYlW_rT-?73%7diR~5rn%qOiHH^noGdoGBBt$6QTmw{q&KFc=u);}<uO{lrepZ{
zlQmj;aSp$nrQdPgY&`hw6~^=P`}0pkok;N1jXRSLBUI!fg#95RS`HQv{K5+Dr*!y4
zQ!2WjwTfe%86Q<H!HYk0hZ!0{Red~<WpmL3kG{R+qGem+>zl01zq5GbRB>8gtEj2w
zs_KXT+g}CglyOR6Q9T=2T~7x5)#c~@VpKZB?r<ZS(^WSA_;T}|o043~*1DQY-ZJ&;
zu#aTp{Mk05aHr5_#^t8m?I@LO4SeUP!6f#rPg~3S+>ETzt-2f?FP?jR-Ny+I3G{(W
zy<1LfUMwKzE^3pv{MMDpFvCb#P`sqs-1>?-2GqThYPPn+#t?pmKSUYXcb&`w%wKK~
zy#O;1@QHL}@j3{=Wu?XtWd7Z&Gso}cAAkGjiR6EF75+#47*f@~_u;U*avdR(&Ut5{
zaTZoh-}L+<IHH_f9$%_!twr<9s$!x&04)xCHA8>M&UWm<dCz-^9EQ8)s1O;%Gb^g6
zn>Vb+E_fmg=ExV@Cz)~i2HSzj#jk#wH~i{qZC**CQ<*P42hBDGHvx%!NS^HU<n%Cj
z_i(Y&?eX`qO}k)x+$NTUlI}|j30cj^wC?W-p5C2y<NLh6MpD!xb&=o{(=>;^7Km5>
zx6)-?GyV`?6RJ|PPugRl(h&u4DEta?Xf<}AI?BTIi~m85VkHt2lr3#?NXfHvge!_3
zz=zzbtU8K|exNDS{8KG>J(DzPDs4znsWu~#QoCA%0ge8BNC|%*q@3D>sI6Yt>fDcJ
zAjaUb^DI++b<5xXH5%<V+oD@D0^=w{J=xuU1_kD<>QL=g_U?fW`s<vX_7*m8+L_V#
z@1s)tKZjW|{m{po!jB4kLQ$s`^;I}vQT59!4|a%HUn*((Mu}EKaNJPSm*8yr*wT~|
zd}wB8SI!@?-53(+md`H#_fh>fG~LJ}{_RJhflG{QgBny18mx$H=uyx|l<PEWW5W`X
z6{A$r{5?lQwv6qAi)p#$mpkO*Qg6y=k0Uuu+2MZ}k!S8lU+p*R6yEHv6=o(*#e<^$
zRvF(vCb^OGg?c|Ar?VREP1!JZLEqpAFPyP~@qN)bsarYc8pmFnKB_0gAG;jKID`T9
z^rtd>{0E^Qbn!n3^?~I7;s885+=`$WJ<)KZynACKTF2-Yh-Vth`LM@lp<G<FhF~)%
zpg<j301#_EwPXOUp}Jk#p7(to%mvqhfP3VxVW#iY#dY07c(xnUG~awvnkxKGsCo@0
z+e!v@InvKw0MF;2__O4aIzvusK{?A9YRwkfZhOa=$Y?v(@@26WhU8dN{snTq@EMkH
zkoD=1MM5C%>x|~m(bb`6J2}KeP4G7l@*WjdLN=$B<>^h#1>OTkVLOg2D;so9x|_^>
zaOX*<pP^h{qxx^RGGPV%$-oX2y!ZD2sv;0T<?m#DO_b>|jTAcmUMizw*A4+Fi=8_W
z(&P2`G+bX|?xw}$=1zb}z~beLUXv|{ROf%=A^r*H`Omn}|Cx3*(+2;ZmcpQAqHy<v
z3`kW40&Gm67kMqpm9ts9s#`_mW&wNRhM8G;w3lg}zqA^~@=6CMDN>N_1Ni5Zh7C?*
z1r4Kf!?|ojCgT?By#1dLa*M*O%^D^is86YGVnP#H{X#Nt&e!=cqOdm+TMPr3Qc^zV
zbk&IJ$H>ppJ60wNfgTKT+i^*%r>FBcu<Gu=K=ZRH8J0JKKg>CQZhO07_n>supI=?b
zwhQgyZ#n1A^q~|!w!C|78~1em2n=r#t$W9X7r1x0dj+hsc6n}tKkL?=pP0@|>v?`t
zW$smt(GpNO?fB_o)n<>K!!`ie&9ND4!V9nN8RJzkah;?X4^iaoT)bL2Umu_c*4YHx
zrdLk@sTZKvk#PTE%+M|XDTQ8KJzG!rX4vgc5_?l>y_VYb2Ih<Zc!+h3-My8^C-%V4
zfBF?`5N>%l7ETu(pVh`=3e%HxwD{^$*XR7E@MS7}Z&Gwhww2-L4;gy;S6(TmRQd$-
zETgs^NB|DVDy?7UMJIp{oEw1evj%e*yRlmNP3UY(pZD&Ych)J{;um`&8?JE})D9%1
z!hPgFVO`nZu$u5#{SG?@mwGq4T6<ccnOwJP#$G><-}T+5vjd8(qHpD?_wmhC-^EZp
zL`5vc3_bEV{Kf>j)Ov77!Cj>>4zajXiJm!WW`!;4Ux~nW4~)C~ljb`TFsyoPx;h|}
zvCD%3kYfzHgR!+3+*)jwbTy1txJr9nu2MhG@|808bf1|G7}Co?Ad%*KLr5nevKs#0
zm@lGqx~6C~I9+Z)3GvY1a(31Ij{I&?#^CuDvY*8;-EvUhi~4hC=%6AHh}V{00c*Qp
z;o~J$uxJ%|52^2L@8>4ml$@o^xDLNHKl#)KtcFZ=Vx`Yr!tFR55NzAH*F>0%n`~<k
zb{6s}94OsNSy_KRE4B4*CxvM%aw^{QioP*h+_~2mPTWEjidtHnt>qf?1Lq=o+RKl-
zWGUj$6MH%X{9@dyjjB6`3&vIMCm$Z!e&b7(RmeSN%#^-aqV?K?swpnI+R+kmWY}RX
zJpLtp*KE96Fk-N<#5GMguK)1!8J!C74F>c37i2*fLND8TmW@s7<yHL~TJAq`H~x3e
zF{Fu{?PFZ`n4Ul<;l;GmNY$}39nVp0rPR|q7kkw@6iTIzj+G8wOuk-dpl2rc3lz}g
zieuIbaDrs0E^+1~-XLMcZtlT%0q2o?wXmvUlsZ!Y&Ap~xpP8n^o3#0fgG2vPZ;!ZG
z-wkCiubq+l`a|Fubx&O#A}^ecN)HA9r<zY1v8GI*wHFjJ7p~JjG;6PCN$2k%Vi!e^
zO?C=-=SRM9#y>?!--dkeV;YgwlnLTqVgo4XU!Jkqrz8asurC8@BYYGwaX7S)K{gGw
z7<eKG;#Ot?+_3L?U@8;4kEjI)UIOIR1D*Xd3|r%W;ZyYQPh8cZtJ}XoljW=p1okRy
z&HK#7=>(sFuVg-JIb9Cn1AVdn`LQ9%3qMk<@m#VqYLU@m56?rr(zXtCyX879ObGFi
znyP@%#&ledI+8Pw%e92*L6)~wYR!(?&eQZ~jE!N>W<&@K>q77_?^zj&aBuwtm?ASs
zfna=!`wkB4XIHmh(Fz0)gJvm^aCh0UAlCP5kRNk520#A-0r<Ho`dF!*Y0JRiHYBJB
zFEG@<vOPr=_ON9?!!YF1e0<CswFy7B`VhH^(gwJkE)(T4*p*d3RP#Mjywmco-zJ^C
zuE2z#ko^wu%l-Ma0ErNXL#z|!cD29WFOa%6nzn<gvWF;Uw>S~py1Z5Hu;{)g^P7w!
zzo6rC9|OFjpzTUpkrf(+%+8$)IW)ywMr90OE6Hk$3F@s4?rF4yC7TUS0F@Vv!fSnB
zqEKhHb`QRWjKou$-D#+{THfK2D<Y)qd0TZOdiRckPZYgxw|R(^>$($6clYzBEJK{T
zL!jy*L-W(ID%fLd4W=aZ6t5BpfZ!t`t2Qjsws*Z6hp^`<2Fg2!jU*)VjD`LUkQe6v
zBgRyBoYz%1PBJ__<GRBxH+VX@$np<U9st5xN)P__pQcG3weE7g&BxatoDLZQ3+c$a
zy`6bDv@mVL%!xOiT4|^o$FD@LAeK^BocyKEOJ;DfcShVdXCdFiz8jp=^|DhZ40X|E
zze8LU<C{e%_QtD+_u@=WxiN2Vt|xi9si=#&C~??6Sk!FoYv0|8Pi++&`Ug6ke<qRh
zzuL|yDQK5j&XWkR#Z%~*YkexkaY@l>K@g>(V&%_~)3=h$SVd0NVXQ3QWybmPt!rnI
z`p#(a>T<G3)CLfO^}A9!D2{4jh2(RiyHHb*Fr}Q=#-@+wgcU7yuJegH!>wY8v`Hq^
zd<Q6SmiKE%F`_NvV@Ai3l6y@`(awHmwqvWLK@2@wy&J3Gp#9qtH6ih)4_x3<wGtU=
zU@|ePLI;E|p4Bl$k3dG4$<xc<qFiiHDPMY7%x`#pUZ1=4*wv9hkIycB=R$3I`gH2~
zNv^lZx9r5){!EV1dWeL5s;G50oBXY2@JO0ZRRScV<lD3Y!#Nc|@S<7m(CaM5LT2fi
zC&PC$n-Z<scmb=-AV`7qhKmpq(2}XKId<I{^)o!kZD&Qls?4<a$5PY-VmRwP#i%`p
zSvLt&K$J$UKlEk-5A8HOq1mR__jTW)ut=!SeG55`QXDhidno76z6Oi|s(<3CTLvga
zNajc!D;!jxjF(H>QPVv89sQ_%ynlfvsBV38>@eWlK*c<_#PwgG>%usAhLUKZXNW<M
zxTyJmTdR%r<)Yb8rXBm`;$)Z)uCB!y6(0bPc4wPoJMEm66e;ne@QLN4EV*hnX3wY9
z7w@DcOn=t-ieDH<L$xp^80I*krm9k}TFXq*ZFPT-e6{dzfEDY3DOrBzwSqe4jTD{^
zawQKHBNErS-ne?LF?ZamZiM}Q(H-}N)%U|@^h2%Sl>TRT9)4zAmJAK7{)=~1f54hf
zU~f$(Vm9)m;FxuaLELX}vOb_C!d%<f_&wgzu$dCn$8?t1_HdxzUPhZ@k9zDC@ATgA
zkh;<PY@RrSt<IgHJFF49v8@E@?nC>G2dPK*UJD#%ipS-JKvj`Vtt@3H3^vp9W?2q#
zsKKxkv#DOwfJ&y0*Wn*L0(r5Cp*egncxXB&*i}#8>8Ym%izp=o+<Q>YPpd|Ilu<Ql
z*=XZMPWE+W7ptz)iQiLf+Y)*!T-8P(X7OiyS$<^2Ep8{Oma>vk$?OZ$q6Rlr;HOPW
z6K)Uio|-Lfipu8Od=X-SF_;knv{Kw2+_B8D7i1C%5uxjN&ySH7DywSE;yI!87O)ld
zp!R$71y^hZGA=~bh#1xRfPR*MpZMbO-lYcS+VCtH1HGMRK5qBm=|!$?e1C*2Bx>bI
zy%oZ}kL3y(&zg(!2oxvmkU5Pe^2b7;3feJS3QUO_L$fzTJ$8m!nxrQ@Jx_E8dh0?;
z&*A50iOGlqjUh0!W1?AdT<<K^hG@}_6q$8a2S9f%74(bi4G%u(sEVSq2AadzMin!j
zR4Q~Qb8#o0#8@-xDnFsUSI`p-Vk7sIkzqY~XNifWZ>)c?t9R(>j$~nNhRYNjt)D!V
z;=x~rr>fui<cpW9+sfx`RKHNkvkUSiQ5lAAqXWSKUy0Dzsuo7FlNC{{(t~6RHmt~_
zlW<?1cJgDD<(kj%X<YdMa@Yf9k;mHf^oR!$^s_;FLK0LbVqx>gPgje`pA<z9wr{@N
zlsay*r0}_XDkxK}rc3HKp48vm1&^vxiAZPDDdb{L*ZF42jVy^Fmqk`L^$xWbC$0NJ
zK9x4p*@4mDp6wW2xK4RLBWV25#!WmlxNl*=wWmu<A-800G;<GcFrnS=Vx<&$^WB}F
z&s#4YK74+q`?-JV+5gKrnq;8p)>0p~M2To^5hk*Jq-jSTU9=I&Yi<gu{^`wN=X}p)
z&a<dS>!7D4ME7L50NHRsTV9=%OvW#=g1OY*V%}lotLce9z*CdYA0D$b!0q%iO?0X;
zTZ)L_dGV<)uHniIC(y<VVoCG8z*{b{;eEs>JLARa@Hf_oIOBSAuIVaYsnYl1B8Q$g
zWbKfNCv`tC&iq*%;a}PAfL$LV<M#1(n(zfIj|s(g1`!+lz!`lOKJUCK@15})T@QaZ
zF1nh_qj52%%e^Gfe<ajYb<<N}o|!=tCsEXdu*G(8a>BqhXPPg3uTIc9xCduYNOM}q
zd@*n-96zf1eN2wA`b7GPtKdNIP#OkwaHfuPI3THao?a!@8WKJpV#u;z=8IiGzLXE?
zUq(Q0#BHVzIl?PVqy5$Jxx+>MJ8#%pKj4^-M&N{&cD@$J9kk-xE|RfRMeS5kr%tx-
z;Lm%@hj$_$-S>DA?0hV$YbY{6riU{UbMPYQij1v{MO0t9vBlKIXNiIi37@Yxq;Q2v
zeA+8lBeZNbs@8T6ogg1{LAcRoIMivx!`ZGPwf8ljTayPETYuWu#$FoOWmZzNj8(jH
zLhs8z^ZT`ckQOVn;?RzN<OxNpa>{yhY*Eal(aHP#4)ufnr3)HudQ}XozR1ekVK9m=
zB7EW8IwspWtKHQ|l_=|Of%oGcP?%BBd+_Rmz!GP7F`|RyEHs&@G9*B@?!jz_Q4L{l
z%cIK5RX&u;SZ~PAe6~nLyfZu3Yc!Y7RhOuImp<&*e1?W<=2*|utR{%Xe1b&b7b<GL
zSUZ=tPtVxA<K)&zsF9YjuVNE3^s<diSinXeMJ!Z}dY7}#%($Gc$$3;+Q|tRVq0gcg
z6MFbK?vh-qrmlZJr_$&j84dQI&5UAVBRb}bCR+5d&M|Vi36Xo&P=3F3FSO)k`nAfl
zGhf%v<+de;JQ8qrK~{u>1L^5OC>VBB)amBb)$qi*A-(XE;qiK7jm*rmVe+^e(HG+#
zX7rxSt08z(f{8qBA)w(PR*yw&R2FmihYc%gGsjrTretM9?BLW~P_N@1-m{kttgf5A
zcDZ(GRdsDr%K}J!aJmZVp?n+{bd~7FPb<dcpBg`ORNKxSZjWLrV18W9ARu{%g?#}v
z6}bZDLD=6VI(2UIk`GJ?CGUfIjK4PzoVjx=cBEuX<#d6~&Dbven}Ib~`vxsyA@+e_
zE-P}!)iFJw1W1S8F`~nPI<W6k8FO{REIy;<cc>&+eeT|lbbkFJ)#efNxu*Bn1ynd5
z^ZhKLIe~vl-fj^46vch0hEM+6BAx*jF<Y<;2|$5R&%#Jhx%Q)rP=}g8x!|lI{l3~~
zTM>HD*C(DHHx!VW2$d$tMB=GD^EA$miArNHw|5Wqi*Bg=nA>23iLGEjL;XJ(`dWGG
zG8m3H%??Gt6ZO-`2>R)2isI{qU_QkPYu(q)1A1qdrfdAJp%lNqR6bS^&$Dr(1+W(2
zer9|nG{*r0%=-!q@br$`wDud*jOA;BVP@SY_wzZz2e~;|JTGsAhaUmrCg3K5$t3<V
zaZ8tP@MyAoVmrTUQBXQ~V)AnKF_LFT&sSgcm(N2F!?x`bL-@&h6T<f3h`omqxqn;w
z3&({0*u;2+pG%zm!OO^WSzra-GH-GabrI>znLs}?f(CO&DX6Fo_cm3>JbAG*2$M$K
zRWS!$^o(oH6nwRt6Uwk%^<S`gR)RM6*kSFyFFwFPBF@sP6{;<L6xDn9A)Ie(YR2wC
z>c(*6?_3<y;vX-kAHVGx^bs*{QPX$e5x*KAFc~?uA;up?nnaH!=8Pf+aSMAq35F}I
z$75ZU!uiD+SNdh$HO2HD*y2M2OV13c9Ao*|gIhDqVqhG{5%=<0MsDq15f8`-(R<~e
ze_Ho81Q4k??4X~LCn3r9$Y?o#Ieq8$HtCeh^>^0!+G1D9p^{0@<syl-VbtRU91gm<
z;6W(<ZB!*8@b0){wa1$aZ(4>!IwVXamv1{=cZ>+{tJ~J~^nzL<iaG)y2`ZL*Zhoe<
z-&GnT@5z_WJh_>h(ouGw@cipLr#jw#GUvQ$M^Z$?cT({J__%Y3@c4GK(bnFyz`U<b
zc%^4pL@$eTtA>cj6)?*~vPDP0YMW{3=*H3Qw2#+A)Sy55r3>87^tw4TerN5x^$F=a
zB*e|h@?`Zj?|9GyGThB;u_ZA?azq6lg%r@aRDtWnKw@n7X8rVcO2$rG%{81!1f`Uu
z=$$z)UmbH7w%Oz~kL;i!+bR%-cL|Y6{tF)!@(w5pf!Cdn_cz|$iO^wD*fJ8yUGkQ$
z2(qLoQDuqU)v#h1onctj?e?rhq;_%ti6zgJc}e%1w8c=+sY4J$=;c=C)iyAXOS1~u
zc)kf~Sl#?(bRSsP>l2fAQkL>Gvd+j>Xr~U&KjN3%+DNk<cu<=tr!gv<@+9WU_3I4g
z9<3nXo?G|loS8W9CH0K{5po&;g{^y#kIYs=uk3430rfs<+rIBn$XAPK_yROW$1sp`
zgX%uKpUY<)3^sM++t8noig`6VZSwTNO><)8%`)+vusk7uU4Q`ZX|c>i1Gdn7vj_^E
z|DZb#db&%KAe&)WPv{(Ftn`{<YDy5;H&2y(#t`aYpYsL8_~XN0$Yc4ti{bDlHnfOE
zbwOQBwl0m-IgUPGxF+;W=p~9H?QUA9pOx2uiV3aYqVL4!hEimd^uo6*XP?yX0yTE3
z;_5hNjuz3rJzaUS8KlM*LZ_9tQ(HRyc<p#uHP^9iN>IkGA(H_VZTOnE<M@*2P1eDA
z&TB&OSPKwLO9MRBZ1xKja~(_k1?mAJTeuCHA#^5q3pp7OZbJNRp54iik)r+;LX^#>
zL&&sWAZjr3!v-a(Y3K-e62*d6nX?yPw+am=vXJ1fkc(#OR3woCRDwDB1u~mF00*M>
z=kmvb?$$9{j<nJTc9N*SNdwqfIt2L(1ct6_(Q(^e^v218x1pMC@5zPFnJWvzXxYnP
z@+%Xd12yCqh_2THAW(YdR%w18RIq2tKoj22A(cS|fX+}%3l6vzz>?^hJW6b-qs;sQ
zmF@rhna}aurTM%KL&?v1<kLe#x)cwx2e?=Z(6@YF6zOqL{|jWh&aU1i7pj^o!nuhB
zpoE=o;Q)aG2V@0^`}BhHExiNCMpJ;6EMv}pKSO1zV0k`;(aH-_081fz&)F#w=`DYl
z$sSS%hCntV{3qL5PLi?u<XK|eT_Q&{{398DKUi>YI?z(B=lG28NzsiH>h&)*9}37_
z)>zHSKssf<kbSCQt|=qV&~p!f;S-|ThS$JVQ9Y>B(;FWHRAcsTDySssXj|G?ui?D2
zS!TFzsJ7UoEvMzUonqMfdua4ub2Pfbdc$LvdUwPvM9w-$_tjtntvgPk2@Dh!@*+K4
zO=Rg`-FAPi(Q_;BJG^^NtCpY;t*TwBesBDVx*DmSUHW_3)7gu00^0oK`^^vc%$bH+
zjBj`aKsx1F?TAlX7P48%TRsEK+Lo$v_t`=Q6?n5mdjt+&pTy}rfOf&-?L^6%ZvoK=
zik*?v{^*Y%E*Q6W8Gad)Y$qSSiM(X_dxZT@P~W*w?O3^R@;zcfM~i@)&%FXxim2hu
zdW!VNf(<@TOX$O|_q9qs_cDUM>dUbev(4v-_`Ujf(stjOl-7QZ%(YQ;?BH$>KeYPw
zY<?Y_{Gb6DHvfBj?<ZmNx|UZggEL=FW0DP?H%2ic^%})z@`3cq(MEfj(|8FIwlQF^
zVO5Xha6xghcH~cEOtp<Xq~CTgE;D%~4|2~j{ZQI`=w??BI*66afr`1{<E=KpJ8bj`
zfL<iOeU!9?{0K1Fet~ZP0&TSXcoXDbRC5_tg6vhMSktf6wyZ<y=|waoO&hqIeu18Z
zYH9t?RsK4=sIO3(ZmxdlMp_X$UkW+Bl%N$h>#l3bf9qTP=rCOBgV!_oTAuO94zsKw
zU@4JV&}};KtfyISEGco1WJXUYQKT9}s&~At2gT{DM<@UVwzq{0rg!;6+rXz0!ky46
zneMecc7P~$i}(_jFWuyUNbRm2t`5ukYJVnHNbs7;MN>hB&<G_4K!?u%UkS8%Un>Fr
zntRh<AfGcuPcaCRwrGK$S_oF8bDP+A)OZBxHRVw#%p_yE?7GMTcP6nQ1R}XUAu~Dh
ztlAQ@>S~A*ISdDm%Ht!~&%@w?u!eVbY{c@2`BCcCV1z>pMT$C6(dVKw#V%{Ih0?re
z)~Z?lp;<QM+Yx6MC)g!KEe%SPiA0;dTN`@sa+DFZoj!}l`OU1tBnncM>U<pA24_NV
z$RVd>+AalYH4@@xVEks-jR$n`an-ouFA8svfo#NY6$9sOz*%R6uSJ}?1j^@B0sJ;=
z7-Tw-6!G1lv~)=7^N7?<neR_inbp-QwdJn&z|IB#sH0()EkMZuka**jkg1l+&-HD!
z#)>>f@Ep;}a}!g~bst`T+H;$A@|`91_()Ytf>1{10K{Tqj|RP86*+cy>`?=BlR`YD
zbZz(goz&j4*2kf%g*N0`g2i=mSK(T+t6xBR+D8|l08k+;OTM+Xd2IERMbK^YjN_KC
zisj^(#lx?NS<gG}AmNZ0qbG;eX>CGs>lbp)H}I>&xSsjwJ@Clu#Kj&Nptl$JTWS3B
z01kNedtLz`5Mw*(WSwgPB_8jsZ*Fq1{Zd?KZhqb7Wdg66pW2zHP2B%Z024g7WfMTO
zljnTly-j`=gN8z4N`YEeON4VSzIj5HM!#QEWlmqjXOQoCSs4`7V`&&tX2`Z^U~r&C
zjI}fUOcZIn;k)BF8Y^N$)r-YUo7|$~Oq1@W6sF(49F_i%SF>-Y5Ephu40BJ?D{S)E
zi%f|ej=D2PvJ4SQh8m*JWzVN-wr+%(z6}-l-|lcOd)X#kJkaULi|%1}o{-Uf^q`6I
zTaYh-s>fS3sye_>rP?roUp#SCS~cNzQOt#|9;@%&*wSE*jBKibH^w{3@ZwOcI?$5!
zmMTbh3&*+^dud)qfnI`MMv4BZlG-mbmT4C~H7MPhXP-GNowSH|lVqIwS9+7@D{h`<
zn|GLZSj@GY0}8~TP2bM0`K8Y3yLo1dndlt_o^KrtG0OK&;cWP<!r<d5{P`;M;G@+I
zyLTZ^(3Nz-^7b!ZF2nL5(RoF5K8^+e7T#RWex8<1)tv4`o(8aLUv@14&gv)7zV56>
z0YV1!A~?YxN>zTJG>mBMX$?TO+kZffe=}hu`HcY{nFS6S%|#2c3;<qM!damGYtsQl
zgKhgmi`M{WSr_9=#3r=?kEoYfII1(CP<;%*;z;rLbktHKcaL5TZCnxih<5Kb2&v)&
zTLD$2?o<|WQE!vO1or3?F)e$tCSbywZ4HS<R8rXH?KjV`1=cCRHyT9~6t+C<%mIC7
z^#{5k3S^2{o^OufhT~1W6C1>MLt=IhvHZOXKv)>A+6DT{%xJD*<0m8aOAt>cNk1CL
zkd?RQl<UdC?5*QQ;#cj(3YDttUiMuKYeIT$c{F30sK#y3ggl6WsBrIyu!wzu%R9eM
zM;4pw;za3!(hbPzAX`8{@LIdP!Xhm4^XHuDF3D7bIIqVpa4pbIV^FGs(W(TXQ#YBr
zvwS}-J~<G%{}UEk+z4Zc-{J6Iv-5ExD4Nz1%1{eyXmgx#KdW_&f@2nv+m=>=&(`3c
zm5X`tKIDGF2!bixq=C#;v@Qe&V3;j@2uz?Y{~&ECL>gUP*SyPNh;a(TrQXdwlP*T7
z?`0_{!R7t{`DpeQDl_D1ieA;#t6t4YYGW0B0_8M9F185W!swWsra)p8HoMkWW!-Ny
zc1t>L&hXKd4mC5I*{w_$n}Bq+-34Os;F~8#2xRTn*-G^M%iYp*4xtf41cU6)tlRk<
zM0ZB+%ZFZ*g~#0M@17BHAksss+nigN<yD~kR){n@H;Nw%?H*GhU||PJQ?ve|@w#u|
zXOB6!aT$a@;z=_jTg)dUy<Bm5Otgsi5tNQ}RsbF(LHb_a3J>&?eH)+fna(>iT@L_a
zCA6}dI@=~wK@A!tUHSHIv(2Yek5o=Ly~%$toFLuNam3?ta&cQ2Yh{Y6?n5pH_H8Nn
zOI_B6WHZlUoe;J5t61ZI-q2UXVka3ad?y$Mmqd0)BXYbNK^#SzysP}sV=6R#eZoqb
z-{O$4DeUbb7M;{NnO@63_qdmGi=gBA+r77`9aulj3^Vds`x4`YHASig&M4b3<}KKX
zC^h?glg%=FZ!MxTzA5_!wi_xVE*ANeQO8s3ML@6%-~43>aE5Mx1v8epREB8x8USeV
zBAQq4e33*$rVma}%{{;7Karwz<=QG)!sN&CgVBH9(pN;MZ(6_o3?k;lqxy>^QtXTg
zRpdd=B$SZbewaI*E#2+h!PPA8h|W^SW!tW^cKVD=9K7|@7jhOtZIjrj?4Jqf1W7wN
zVtW3rz6!t7JtFTwZ@r~*c;vGyeW;XD>Ra90x4r57RH;!to+;NmYZ*r?AN2~KE0n{l
r@pia7dH&Zfx^}(yJ$2nhC@9~-puSJ=UtAFMFXp!XGu$K2UsL}FD^xvs

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_5.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8e1981b6571f9e58e02ec1615096ec64329eacde
GIT binary patch
literal 61950
zcmeFa2UHZzw=de{43aa00wOtQm;sSw03-(m$w_jK10qpi2m*>k0a1}4SwM1B$x(97
zVTR;Ln0e#(Pv7@{=dE=-=dN?#U6&qern=c(Rr^=Ft9I?`+J&A(F926=t7@nMSXfxV
zBg_XtFW$gb@pG^RfP41<UH||H0epZ2z{bR`V@@owr~n*H91HUSutffOJsL~&k2np6
zrZma{!2N>;AZ*)W0SGYHNHOOI06?kAvW1BeV#4KdiNGZSmk3-UaEZVr0{>ScVCdjt
z?dt2vu5It?<;t#N?cn9=!7d^!C@d%<EG!}-C?Y8)Ee`zWs6>TDMP!A=Wkn^~Ma5)=
zWn@Lg0RTS{=O0=LB}*diKjXs5Nq@v~#3TS9Ir(4lT&#a-H~-RHh_3vqo=eT;|4Jdf
zWcm_;O9U<vxJ2L*fqx?KXKh1NR#a40TnhM)u3>J#$HMu$PJxfHm@ol&m}?l@3FUJv
z=6}XBv2OgG24iQz#`=f#gt7n|`yX*S{l91a=x_J`vef`s=t=Z6pbQY=;}hWH5fTs(
z5D^g)lhTlpl8}%xQc+XTurh(zSeaN@*f|Bc*{|_)u&~^a;^7w-5f>K+amy%3i^>a%
ziHrV91dE7>h?InsfsBkn^eW3$(ceDMEdV7U5Q@>Du&x5wlvp^FSm+J_gefH+#=7*&
z!1VV63mXR)51)XLh?oRJP<;i%F%AwkE)E_ZF2-zx6^!{mfJ=!-byY+OpZdN9!8Lap
z(XgaELXMjsTWNKM;GANX9#4pf>F604nYgZV-{9dDmync_mXTGyrJ|~)u5nvePv5}M
z$k@cn+Q!z--oeq+%iG7-&p#miX+&gHbWCh=%JbB;^cOEPUgsASzA1Y9?tMjNRdr2m
z-KYAt_Kwc3?w;N+!y}_(;}etLrxur%S60{7H#WBr2Zu+;CqIxsPk-_Ci_d=?e`D<5
z_@cz{g^i1ggG=y>FDz_dOu(VU#k(qkPo;F9z`~vSnrIjy&CR5|kF7);VmfeIOOGL9
zI!^ILF2pa^{^0CC$Jmqq5@-Kp>|cCM0i-xsn99SU1R%hV(;L9+P~yLRVSU&N7i?Ag
z)M<bkE$<&|h?s<uK4w8FX8@3@k|=%~r7d_p>|1P*;fZJRwT34*Wg`+<wDDs9l)}H#
z`e%%&r8k!t>MVwHa-$I?%E|rM4uuz<)57HY{Y_H4me;gmclwpdqM2+{$W4N3Kvs^#
zD6_sL+hUu8>|4kOe+kP_(L4lQx{NvRdO&@onm(s6XM*%*s{GN4Zi+$mr;M(vxF2di
z5?T5v#nB4%a4QhVyC-G84&^74oe~a;ko<`TviY^`ChV6X_Z8{O@vz1>)yV2_p=BT*
z@T3Wl%v|@zIY;z1{#_4J;p0=7dy?)TmI^?l{U3ed($n*}R{K=;paBnYMq4z1wdeP?
zGhb7AF7bTjCV1%r+8ENDm`mdIG*na*$R)0DG>Dc??*k`ZeU7?KR3xzAJyY`oFL;?3
z$`P)}c;9$KyRS1v@$9_IH}V!<RB=-HQNJ`!dGX0OlWz{$Sj9U(%PSmeSJ~M&>IewR
z0)S=}Kt-X^P-G_+hrByacZ)(hxKq(O``eoBVF?;oe*?Bg1Ae}&{;bjq2lm)g74?xi
z#7T*2%6Am;Xn_0wuP-G}Hw%5EiEkOoPwy^+3(m+3(12K&B3^ZQxMzcZN|SsqVVC<L
zg`%h%<lXfmOcgq$tv{1smPP~Wi9t~pLt{;i<bK>qcP?^BLobTGd%304Q`ugaWd@5W
zl8w2zJU;qfR9Q<;T`Jib`LjY1;zd0I84c^{tr)9*6{#edIf$Q<g{ZmdAL_F#XLKQ%
zGB}7|_%yxD_if=+SvqTrsV$+Qm#sLs2qI#U<M>e^wQ=}o$rY;I>9xEm<k5^wCvYod
z!cml{K92k;7WWas(Ya%Xn3}9>e1%we7a{m5TVyEb>(CprSu$}$6h}Iq{Evd$v;-9z
zl8_0WuUejnk}Kun{ZsD$DXTZID%6uyht*ptv21On0rG-zvz)izXO<A9mO^fzFWNub
zI39&6c&ILpQYX0Gz)|IU!Of{6?xN~J8`eccaEu#5YlQPoa;>M5VAU+!^%EMf8}vQZ
z#@h?WF4pjStxKvtau<A1q=KoZBUUxm;I~*nv$f8EGd0_L%1nxGzMLLygP(dy0+r<~
zLTSA@-R|8h_CNyM{~2wEEs>EM(U*@$2Kz_S;4#Ng;$rv5)2N}xH)pU0Gx@`Zn)9|G
z!iro^yz5{APY9)LT;2zUE6<vC-iX!6Hzopj9uf7scQ;Y2*kjRc&3=i<5c~AfkW_}(
zt$m9E|8))i)M|-a&}*so9l5dv_3jDQx7f4)fy^*wY?X9njB{~9Yihsu#4YTyr?)PH
z*j<9%jdhS_T~5*hCc|(QB9FYgd_Pu<DR9CL+0W|sw-u>MQ*oJM85SEf1eLowD)0zR
z19Q?d(Ey=>>efamF{WGe!Vcw|?Y(NhyU1nzJhKVBL|(MrgEO;^i=)#{>Vx3FUrHN%
z5K{01e1!Gwp6Aj)ZNF6wY-e+wuu1>tr;9mNG+>dE)K1;H-~igS;(`gaH|K=rQmzS#
z?N)|}%+p|%a5{+F%Fu|i<;Ff4%a&mq*Xh^BPL@LO9E0=o&M5+bY861cF41_tn_YwD
zVaU$=>{2ZD1vpsyJNT=<i}*_2*wPqT_0mko`?vbbUHCgs#QK#Jp`?!v6&WcOrnwuE
zSK5O=eJd!nn10DQ*A1HMZE>mVE_CGQD|GO2e-`*U?QY(~?;6CKBbie2BF7tYhl<S4
z|JufAkFTdqD2}=a%yf$zR5ZE(az;Gt3y6vF#(fkO#TDqF%QEV`6%BN4SCXhoP<zOT
zNEF^H)nP89LdG)ar<mqkH=20CzVQGD4e(!(-9rOJ1ksQUDR(pw!j1-(y+@93+ewXY
zT^U#+3+$`uuavo1Y(218UU5CO(}lJvEm543%|Pd0K#;Q_-8UeUw`Ri<_iTUY9ITJK
zs2ja_a_uSQ7c?*;yCL=#4GdcDqb}Nil_jl&q02x`pLh7VOOMeO=rIh%P49m&og)a+
zM+0xXCA~qXqz}=+d>s@yYqzuHu**p5;dn1tT+OIq@?$>5Z2M>$0m}G{_~SD)@GnY_
zeuEVCpl>2jG@$gX`RoS;8eo1uaDoPQ)d@~M&kOhV_dpwSjt1uPv!cZKm(TzzoF4jP
z4}_9Z6+#0WBEA@=7coqGd6uu)u$n!*J=b$L<Ma4&lKYEA>E7=1!yg}DyYZf=a9_xe
zH42oJ(OAii1`e6AWQ&WBrO-jg^}iv#MY3yW$`srx+t+S+U0S$3y!$LQef+vsO_J4_
z)dMt;J2r6fGaL=X4?`b2<s30#p7&;9D54(xQTSw-;*Sgr|CYghQ>Tobn#7)|+5R=f
z?jOS59H>u2$1zbgNm3_WnA?<!bIuM4FkBX(CMO^p5}p_a9{pk<*wSLRz{iZ`Q(gVg
z@SS&4{=Vg+qHno#RDh7wl812LSUbIk!xk#qZLz>N1nB%3Q|*O+AEsnX2P9bnE}W@N
zW3oiD?<EOb;bqGBWpr*!vjq{^SgsZ<O3$$5)dVJ9>W#gw{Q1(@il~l-MrJxe)pJ6A
zt>MFX^x`#(Yp+E3ihDSJzBIJww({vkc^e;j`}Ee=wQS_Z=flrsj@QRi;=8gE^4r6i
zN@bSlwvSCVI$xYQJ$PC8IPsQElYU!L%a1;HL$jaXl^59N5oe$aPuI0-Qdwx~+dDok
z*FDa0eZG{9jG%!rqgPbQXyED{*y%DoDuPQe-|8X{yYnO_x7}ry+L`-_$kAGzW6<LQ
z@Rw0z<8GJI{wt5fjv*$Ga}H>0(LgB`rqb_^TV2l<0@K01`5nSuk`u1W>oEmC*@TNb
z@h9&+>MVGixl4iuDuxD$Pk)?;Q(Smytv}fHm_Wwyx+s&?MXWyBnX_`a3cKx$2F}A>
znzu0m$zklx@y@c?-?u^S^p;#rsaA5p`Whs#ZXv*Xy-)zC)HXr`-4V?f9_FiRO618{
zoQT^-;cRXM${A_T!e;zTQHq#E9y0$N9}B2L;rezjDybMNVcMDTwOBIp6OF~g#Vh50
zwa#+|kK#_2Dt68#@@89xgCiy$SE#|N)I*jl*7tp<GwMb&!St3~nxe=gF+aJR?vWLi
zSOoa~vf)YRW>x#hTV7f}k6h2t05=+-)!*I1bmH8Tx~Tg_t*4aiudZ`xs_m~VqXD)e
zXdB0C=s-Sn{wD<aUg5L(n#B&!35Y)8C>j)GaKK^3Xc~jneQ~UdX-X$gA3zuMn$Pv`
zyp!DfemhgA;wJRbqzhTiGW#P?^&K?Osq<$lK?h$Duc)*0<LiOUNqAo_nr_scJ!rnJ
zqanMV{o*{(cO2DGfd-0T=fvq~Aoi`}-j0vbMT`v^&;lW8F%K;5Jz26aw^h3tEa2k(
zgHZRJr4t&D+zQse(5RhorC-SZtynB?`6D$JFJ!I1^Ly5>OFO;PLiOE;w(;eofq|+V
zB;_wCbT&Kpo-WeY{o7^rIqIQ@j7(!+%3JZ5G}SJS?|fG-|HLtAj#Tl2{ya%R1K6Bs
zU<uR6JvCHA133;5_%$sQFU*F#b!T}JvPe6GQMIPD&IvJ;u?6Nt8*lRr#@fO{@j5c0
z%cFred?@VL+R(sX%c55uOs_gz<ut4KUE@Y|*xL7#clf)xnRdRcH%_#7*!mUXov%7C
z*QAly-@V^1)`jKqo&C6Xt%~nfNuI?oWfWg41`Vtjk3zS@bzgrREqNN_8f<^f!8++y
zTHH6OPnM#bj4P+wIOkb*DCiCZPJ{-Ky9T3z-U4D;87E7qf!+vT$v&{E^eK@&PFl{A
zLL=RXrZ?fs`w#P{-^d)^?5J3U2F@B;Nw1Zb#lLn~jyf}$lusc9I}2*leyg~J1-2y^
zpoX;%hcXMrH}(0tJ9)oVCLOv954%X`BO+ekOS(?(9;%?us{WP(8R2qraQF1N`%iT?
zedp^|t8obh>WYick2gZ9u*N?GG`zJL<tHxhxz_s=4fIqsX;z{E$O#CEi+SpNLV3Fj
z?@$>}w|X-TSdp@;BV5RyCdp{G9M)ww=SVp9jziZ`?PvhVK?4bcSS#C*LyQ!nflw{X
zV`zWior6iT91XN!YJa&3@RuDpM+1}41!^>an3Df9G=SAMm_0|Sp(b2=bSMApmhSC1
zeol+%=VC|cX9YR!6SSluy#D!_-EXJ;6Q?!2hMHdS<_G}2Ynm^m2Rz8dVu2$1gkjUp
z@H16BHNPc|gqMO{zRBu{9I;&cNxQt=!3q6;D;w}{h5KvSfWH)u?XQqfa2sQ!@QmUd
z8;X+C_ZHyO8x#LpOjOM*5xaIh<~#qSFsZQn%YZ?=5{|t&G%zvK++St?hdztL-GK%U
z__rX(t;Hsx#0h>^KSktabEaKC>%A7KdyF5-lSQD$&8_i~o7uxT<&ETW(Om!>P9qpm
zz?p5iX~28dzDKt~zTiRbKEauJHi)g2=a5eQVc(9b=eoxCd}h*wC|+|=AVDU}0al?b
zdBSw9g8{c&YhS&GSdNyNJvI_EUT$H<695#O5f?({#}3;GB}l!aKL#RLq$g<;MrSIG
zIty3iJIF!<j!%h;@nhahKX`DDw!m?Ml3!F1tR>(XK{7~MNd>IS4`RntYf$-z->sl>
zrE)owwR4jpklxNd4*AE(6-Q3}thV|1htOdO6L2FJI~K_QFfFk5EYM}Ir;G&qYg~ft
zVfXsoGcq)Afmev>r$x{}1OT#V#`Kj{%|F&4@cbPqG;l<hjymr~(E!1b7^ULLFh+BE
zfq9r=<?|S&$He^X5QNmmI$_4>DkPXzFvJQC5QE!)X(<lS{cq4kMS@9JOjjfzbnz2*
ze&gRVyICXE?3|9eUg2^u!rcp#{tB%;-v8;l+NiZOJwUdVV)VJ=Q23+NebMO1HEi9g
zCt`6bqd0Mgu|Tm@2j{KlDxN{ZWiP8_P2Xp|Qsb7VG78FpGRip>4GQj6U(n6#JkzO*
zv6&%tB{h9sOv}FD=0QP_G*1u<4F9zle`Yxs=g&O<mE};1Kk<D3uPg(9=2`8}ECc_=
z%s5A!?-l5(j);F75vwc*lKpoL|0cxP%pWcig6zPiR*Xj{Yj1v4>>sszBbSgK<dU8p
zRE2Whzkq;;UkoJ}(_~t4YGeD~M+5Ew>qP->SgHUJsEs}Jm!U5fi}CDrX!8Z0#@{Yq
znR{gXz#3b=W*epzkns<B!aPCj=E#fZho};TVAJVSG;qBBr_BFRj0B<2LRx=HYc|uk
zKbXUH^%(R`cQsA=s{flLt5^yCy>;ST4-JI~zzRmUVW)jYHQ!}D*>7a`h~!6;%S%Q^
z)@Gb|!ecoe7y0NtD}S(Ss%FThHq^kpKa_4nyY{n#0fDWEB=Zebe+~`>RlPXa?drdm
zU2uMvsNUruML}!~4KS6w-41X`P4f4p@o=8GHk)qIFZ@~44a%`$kCm2vb-1j=vM%yK
zA$fO_VVB#-2kyU4a8RUGx>+;sGvV<d4DajfTdf*14_xlYSzBx4$NzQwN7<d(n|ZMZ
zxoSLB6eFI+YQCEuUBcL|8d^z_?yEE~yMAy?1MDN!y~5cLZ>P(<+$yjG>~c@mH~X{%
z-`SC^7UqAE-!AXEf+ZKq2q^?4*NsMY1vu-o_avUk$QK2{OEMp!Bs`?3Zk5tL7bYMT
zRvY!n#Tw#_&ri}%3M2g>N~jQTn0~J!V|Zz-B>&2cs#WTJpFJI6RW*eT9GOP$0`S53
zp7D=;D?X~}9@_zhFR(Y##Xj{C@z39w`b|l4!N}bYX}MuY!d#+Veu15AgD@jTqe`2Y
zZq5a}+^^JO+XjD^H=+*<b&2xtzbST+WHqO`>kEksfKj_HE>{J3BXFpML4v!VfQ98i
z6!bRm1VXYY!tlj3(Tb;X-6}quSoTyurs@vkE_-#dl>MXbH2L-xl1j#bdj+w;&|hs0
z*q<tDrSLr&?KnX%V;EUmHh8lpqN=997o;uW`=q#AtO<ieaUYW+Qh^W}EZh*Pp{2y0
zM0HMXuEuq4IiJ&0YGmf|S`|pMHhGIA-9WTOIL-*w^w}PZbNB;SSZ<(EoECS+W|hS&
znO@eP#ljusN-R7L){eBkBHTC$8Y_bY=<k?!07W+Jy}aCHg@jx@1ud*yEo}s?T%Cpd
zEZl@d1cikFd5E8zg_V<y7rUj6or8-4_kKe&H@kzi0=J>WJ>h$9$~N{6>H!`$x&fMc
zRsl{{GS=J>Fd4DDpRAv=o3o9V1-qZ~V;4_ZKZR?5q%Mnz|GF%64J_|rZ7ZvDOZD#*
zm@|cIe=m!#udkr5n4qhNosfu(jEs=5sF0|r0ER-q)8EC*!cV}(ljH9R+_Les@^EnT
za&UEF|COMHrK`7>!ZmMi2Wwed3tI_GYikJsOA9e;0TB^v3jqsjF%bc4F)?c?2@z>g
zTM^4^|0tld+ds4a523*1hha%p*~7*H^YVh8tLtO%f9ib&d7*#5?O*BG+_Lbp0sop#
z5D*p@5S7%!%rE>g#egx@%lzraztPDH{bKM>41@o61%n|b_D{qy@U#C99G98=7RZ-y
zUBdNS2>cfDrF31w^;-!17V)KYUBdNS2>cfDrF31w^;-!17V)KYUBdNS2>cfDrF31w
z^;-!17V)KYUBdNS2>cfDrF31w^;-!17V)KYUBdNS2>cfDrF31w^;-!17V)KYUBdNS
z2>cfDrF31w^;-!17V)KYUBdNS2>cfDrF31w^;-!17V)KYUBdNS2>cfDrF31w^;-!1
z7V)KYUBdNS2>cfDrF31w^;-!17V)KYUBdNS2>cfDrF8v|!}YIK8#XSOr5e7NH5q?+
z55mF5{`H4Ba4~;)gundo;1LrL;1iM(lai7UlaP>+Q(Yk=rz9sKxk7h^lA4B=mX?%)
zo`H^rfr^Hf<`3^dIGB5I@rdy7h-k=2$Y_51_``b;Rx-{F9IR`X-h+VZ|Kq&}UFg34
z<^AWs9c}|AJW{st?a~9{K$VV5HyU^!7p6#}B2hpx{#@84a}aw-1+yOQ|Cqo92+)Eu
zKUvHMB%py*QKw1gT>qk^M-lXa8az|9fyr80n~Kyr^du9o690!kcCRi>e~sa96!aE~
zmVN5{vE=-!^#mfHVlgo3nSDZ_2Ja|eY0x8t9g}Nhquc_Y?It9;?3G?4Y-Y+N^_lch
z{rO=FLCN0xsmOPB1yc%R$1`+3y+7X6rCO<+ew3zN-G=LSn5<F6qImpX>BdLx^Ls(~
zRI7J~a?PvBFI*_hyGk79E2Y|*NqY9V;AZI`wkR{-<dx-(xlGLO(yMC7qJe7UAdVo!
z5fVZ94Q}GMtv(@WC1*CR_+l%C=SXBKh8YrpSt?IPW*5{@<%;chW@^4@4Ie$Z+LZJ7
z03y~I5d1mQC$XE9vYS*SzL>=+;{Fc7?Q}b(Z19^<`C=4ZQ9{`(NhrR&3gPZWaj}Ci
zN;-B=2f}NBDC+#0@Ca$1+aZf_@zMD$>L8&@DW~hR?$;?l=@XjAU#$<38Nk8ECD-`)
zFF~?!-m-!@nMSgoWbCl<6ckl+T+;afe4_1c{@d`blr+e_DN9s=VX<?8jQ~nLmx33w
z05}Q3JG!>LaM6kJtMb-7n!gYdl`#}jR|sOMz61G562y(zOW!h>`hpAr+0;MJ`SMw>
zRF#5ba3rX-WtX9wwVC`m)@lxX6hu*?gqmy~3lgZZfyH={TnNyk2;8x+vMomcC~a?p
zP@jVk*dVDk6z@=w2I%3GY984jX7&4H_lJznioPihyen1ioW~C$TK*}&PlXg}KRxdE
z5R?!QC0f;;jI!ngx%ceY<CGny_=pk|DFk53EtBJFsb#TkhFR%Kx?Q+w1*Su2J!@pV
z?t!0Pde0L8k56O=5i6vL+;t{d21U@LxaO|HClX2sW~;_O6b!q-#<!-w#pRs&nl*<3
z$Aw8o%kn1#;hJR}<_Qxan8nRW(-`L|G3VlSU$^M!+KtPlw_0~ez7%78f_3jd90(8L
z@H1Vyp6wbWv?L(ZA^F~?F~fPr^Qr4z&9G1TUJ<S@4c5@my+Z;TK74ML<?vla^8bDV
z@xP*x`S&zbor_a)T5Ln<5;JxD!_VFO`38cV5e+xtO=TBh7OeZRji%Ms<`jMb5}z9M
z6__lI)E-Bn0yH;@ayb!-Y)`E9SKu<e9AUAG=bnh-HZ!czYr|#7^bHg1TB2kGx1ZdZ
zRrN+4Om{H0t%Q9JHB&V*6aqa7xf*mk@!rx!AnHmyYZMzBJihhJv<xKBRiGyeZFu~V
z5)CLkF!Wx4MTRgTU5mcjEoRXTs73l73X;`Z-gzn4D;_1lmwB*vhYhoelaj3jVYh@G
zR=6qa+AhG;O&_o~dq<D2d$u}K>l*cjiGzFCi!+idA$?5@qi;-`8D|{Lsk{MSe1uKQ
zry7IfI}&=;%BbNs=DE-MdXc>{M=K*6*wrO*l9Gs4S5miDs+|OebW7e7fLvJ4y;Czc
zydOgB{jXM7*Z2t!ccJX3`RHuPa-`D&h|;fCkE-Tnp2GxF>X(*=0+DuRIXt#HGqFy<
z^=I)!_UvbSs$z<65UkKosI7U5q{~cxyiwqy8eY1r2A_{9S*aU2zTh*2=_ddjhxdi(
zq(^pkzP9iV3+@Q|_Z;MeLfJ~etmoM9tPF?RIax$=Y@>0R!<1k`ts_h(7PYzJz@4}x
zZ5b<U9L<Rh)}6%J#o;Y`fYN&=TEFUl(sR_$w+5jK8Ez85xc2Z|Qj72g;q)$oVokUs
zZ4r2pIc#ODV}+|_-{Rd}y*M7c$YD-eCUC!tP44vRA|*(7Nr(Z}H&1a4qnYZZD|62?
zYMv@+rca*dGr2YI=sQK|PsHEjp|qXS2LuYekoC;G;Gt9O#(_k42Bjm47sg`>?9HiB
zTtN>Id(|wG2Yr%Zf=MN_n;-2BzA{B~L=vv(C9kfmq>-(OC<48-yO2t7-cq080wl~;
z-YZ$?S<$e->S){-D%I3P*8W^lb-i~M2@FY!Y*}}c?!3ie_3Y^>8d9YEsm!^{4C9Kz
zuY)oKQNYa;%BZ!+ywp{z!YQ|X_{Rg{%=aF&8R)H7loQ~{boxHmCP<PB3*kc2_-E50
zBAVkq(4!tTNyhPaP%Of<!=%bsS0jBCrn2nLlpL}_b+H3Wbg*|p(w{9k=uH+>hgYqp
z<dD6gb`@VNeRf`!nb^!GxNAQ!R`$g$zejZsH}^x5dS2@2F)n2f_z2hztlP`e$OAvt
zx#8vAYM?t9uEQUefDfgNJ&-F46-FA^H6a~J`rxOVIsPLSaSt|<KW?#gMjN%WmADLk
zlKz^MIHZiI)I|nHgL#m?D9U1$HVPUCQHi<exdR#f<Pp>~yP_NJ2lqq@tJm)*w+!kF
zpVO!5cKbWX5?R!RwRc~mOtpFBrbNJ=!0Vl4T(h3-cj}GG$pXN!zq|?j{~#nMy|~f)
zrZLQ)9q!*Py?l;PBzM4$%TDb@{#c5N3Sr$Wf3R)%xbmU8#mDmMh*DRf)z*vID#}QI
zbCyM=mfpvJ`ck#|Gn4KP*4yh!NJFxk0=WA4k2#^{_?+7x-P|_m#&vx~v)?h}>Ja+V
zhN^8e;=rBM;F|g1YVjXt90=!`5dHZWiF9hS+o9`UbwkTLvv1wUy&IRPaXnNfVkqQV
zklFW(N^fuYKwF%scnR1`c6A9l68QX!2{r`}{`yMDiKuWeb1RhMh$1RLdiY$bU~1z;
zF0PEV!s1Qh#U`g!nm=>HNB-H~l8xmX9gB)x(x7!TP@<!a>%cbLm{LVKnxh4EO#XL0
zGBFc+`gS%0Fj)s*GuiqvM=;@h4i+Duu&&7Br#6b~wLN?>sako1{l(WS%&aln59u6A
z`ezUWr8jUE%OWt2V&t_7&mY32qUD9)vC9h%l?@J!4<*v0uLX1HN8kE-5*Wx}>(;vR
z=u3anjviz0(g&pJNiZ@U{-X^<(r9e4h^*G4_harraWuSY7d2O5Hs3r=?d!U$YxkAv
zseQr2Ri&TdlyRV%Y|-;~+WspPE}wRwvbcUOqgV%35YqENMEH{3vvcT@Iaygd@>MIn
z8Tnw9=^0Y6bTu&MR`Tovc~V4IUMj7=aRLc-0$zaYPEirEti2gHXI>m&L>jmfZD%u&
zID4tqB%jRfJSh3xc%Nu!(N?=uZ&71Lq%UAUb%G_B;%BFX1xM+)LV|3tShhXRk`|3U
zgi-zu;_yB~D;7oa5INu81Zw!c5YI+Iy*ZvHnMY(^+e@09!u8cnuYO5*l4Su_-bnl2
zv3@=3L)md3(YeI?Z%_LQd)-;Tnr3ycigq8=s8X<1SFfDx9-r)M0qfU8Syg_y$NXEs
zF`{E(@!XDmwsY1_B)k|&jPW~nynOB%n;i=vc6Aqmn#dY+jFA?_WlyS!3Pk%}_Y&nO
zHzmQYn`-N|?P5B+N7gbjHYt0r=ID{L#M>uxOo*;@?y&Rw9zG_@zQSqUIvl8=z4F((
z33pvSe!<s(0U5`1jIGNY*-p%zY!pN1&bhPp^{7`)9~=zt)R@ZkJWZ0`68%8fBh7w{
zi!H2}O9m5B;||;J<SlneT`DU;jx5}c5%+u>qzvb@%ev+eH0yd->y87)ncExHe3bE*
zyOW!oFZ9YZ;93dd5Rkq5wk&=gU13qvil}O#2@lPZ`icQozPG%%3l0yiFvh#e#>N}|
zuva+S!#Nm_9KU>Q{%;BruN+sNggh@;`eYuRL+J&LJS7~hHpoy-v#G8uuZr&ThPq*#
zmJ)KXG}NvDT(NKY0U^Kk&M8Q^yn^?K?fUqpl!7UksOi@t5(XRw;Fw<%z)5xw4FK`M
z+EJVrxi5N`_?-3$f@Hk$6>!B&w*scvl+6--O%>J78+Z;GsMnsaapWyhMAmo5HEK95
zLt;oM$DEC{@{wkYxM>+e4=WnTxf?jbg=RGkgz?v)4`M+A&sgX<%6^_ht}4=2WwA{t
zkL>P<pE(7xg6FtmnN5wJy5D*)Onb%cJ%OUEey46`w!`Q@gzo7w4N&!T>D8Lx4o-GA
zL7_*89I|o|6p3h&S1U7dsHK-TH5OwOEQLY%h1o@K&v>a=b>9ZI<yVi-KjnX8S?11k
zNBNa;fT+OienajHn0BJK;Ct;C2^fp!<LBY$_rKp_vqJ+u=IqBSzDXhEeq|jmTp5Ej
z$4ir%P_Gt^(v_>abN#-~QoOALX}$oCAe6}VoABUgX4_N!Ij@axaL#NQ-k<Ig+3Hs;
zB|tF+F7B?MhaL+kcgdWUEebFl8bzalX(ywCxZpcj`zB!4i)<7t(%@9#W_#=Dovkmv
zP{(<tldo&w5>F9-qqFpZ&jEBJ$GH@IuJI<QJlGEODVx;jm2B=hS?5@aA8COd&!O}6
z%FAWX=#9>V+CIz9kDT4<bf5&8^G-d2kWRcWmo8ukdC_$P=L;HGyn&qHo<aI9=pR!d
zx8PDM^-7=;5d1}uMbyhCMn6aWJ^phWzS_k#2Lx>Vv)&WAGxFzSkLHDOeOJ}Ru~v%_
zhTR7In)z_64qa&N>#eDWUr=D@ZvsEDgWo|H^S$KACHBeuCfaWWuQkCtR|bpDMobGn
zV(po;9dGfsbFh~k6ihecF#Gk-WujO&aN^;xZcx;mLgN`Is?p}P<Bx@1CuURR)G9OM
z6nt+ke&-7XV;5hQL#h$Z=wX7`PeTZ17#PJyhYYtB2)e)cnY+WM)hF89l%Z6cypcdR
zK@k$+4ubawn8kHz5%|68WWz~$-<iKvqL7i}B==FGJ^}JX0^>h)8~li20CYbV2WKaE
zyO$)$0)9K!FGvQ#p{WTO^4-WtQ%KD)y6%ZOuD{@&dv>Pmr+M%8{A~oqbP!(Y14ugE
ztEIOpFSF1<16kKvZ5?><%LPRt5RgjK`CJ$mUuLt5$3XEy3>l}5^Y%5yS<#<@b?ieo
zjQ3nfiGNf)nEcxudrlV{{^(VBS=_9i=A4ET>q9*YtEL$B*-8H17;SRK^c8U7y(C{j
z>2rIWeiZZhgYeFbh=Hi;VgmR?yw^s%nY|%Z3}a#<c*q6g=xc5khOM%~Qv6;vl`XOY
z-;&_|1J@2%#Hn3JyzH0|SKWyB9Re|jfbonaxmig|VKaL6V-Oz)c0w~>J`elh95(Xh
z`^~0nQ7vfTidAhT!m2n+PWR)@jF4s~(#qE4pm(XNX0r*ZjEVJpw7{_=Ptq684Tb<0
zhJi(`|2-O1#vaFm?3dW7uk8CC`Av77s)aU+ymhMY8>%j^RNVPeIH$_VtHu<1r{SZX
zp0@GJ=UB-%uwfmpF-&&tFmkAaBayqi_#szeWR@N1rT5#1;GhOSYGhWq|5={0$W{I5
z)ex3*iP18NeWYfiFW2Y&xV5K+h4BsaRfEnCR;H(OmJ*kLq%)ANDq`K&=M{>~CKx?e
zTjD^_cb#;BsAk%RHcrLkKgmO^CocwDTk8`6+e9{U4&9F)s7jW{CH;gufd|lmpEQ6P
zTYF~qi|*rhH-0$?!UQK_m0oEY4fc2sC61|&@)pYi^ey#dUDaidY4xh<&fn5~J)Xu<
zxzQ_3W&-(~DWOD5p^|C(j~CWi)!ZKl!N6ij-<BWnjUAWc0qPm<m(GiQ<T&0?(UkZ#
zVRst;<-X?yR%c^9B@?g^$tzw6fA~OaoN(=g_S=cF32E>H9UfEZ0Xw%5N?004BwN^m
zRwS4O=~UF`w0I%G-!#OX<L$9`_+akdoKwuq%j=0h{23FZ`)}Yx!<3P)5Yj27j)UhC
zg9TX46+w_|54S3KHn<wSMOx>t(b)DAYYKZ#AVG`t5g{zS?%?qJR;OTZGyn?79N_A^
z_&#8~S7@7|JF?$kb<l0jVk_#uaKf3%lpM-Lv)8*moR$^~@YWQ*GqIlv?z_17X5eG+
zQT%X5bCDxf0`KK_7yuA)9$|l&pg^<@XpV+Z_zjO34OI(KI!G}o%t&Qu#$h!!MC7@1
zy2XEeNt{NE)$g8QkCOxE-n|EJUh<7DsiGi9X1Pk3)@3XW&#iq~8#GhRr8?Oax37Fl
zN^T|4rKPC8972j3F(5hvmvvCYGF?O_=9`Wl4OBhmTYFKq{)AyU2Fot7z$rq}Xd%-u
zEQk@A%Z@C9!<6CBj?YrdK4Bcaz|iq$VyrU1+uuw?tM+}bm<ze!-sRM{TBF6QkE4i6
zf3}cAc#ezs5$*g2Gel)>2TLG)Pi6<hR-b8QtS#-ynZ6uSntJ=)To|7Hh(q2QXM1~K
zs(Ep*5Z>LD%@0R4uoYoEDt~T%8CcZk;7&G5C5l?Kt?}#VVU2FRL7<z76A$}7vUJ?s
ztrd>37d>S**uyyBtVu&L<R*(p?k*;1yw(!N{_<f3pHWAa+FZtQj(sFVI>-<C>{qKR
z3T;zfEQUl(pxBxws^EQ2fp2?-lS%C73%(^qS@tG2*@~rfSD-q<qJxdzXF}%^51DnI
z-4AANKlOpVeGSVPKOj*zFnU1Q&Ii4^smM~hd-d#&U)kb(+_}hNGqIoM(UiuL0>`~^
z`TK88AD7og?yX68NS?v=3wgWw@kbeZd$F6cU~&!Q!iu|_=CY`1g9Aut5Vrc<SBz^@
z@=BpO2Kt_Ffo=e|^akI?+iBlVEu1I0M86teC&9(oybt!0X4ghs#Ai+VCv-<jwYwDx
zXGr*U%6=F#Txs7<+BFf^CP;*|6c6CO%UOiQlr~<+7(Lq{Aw~CxZ1%%kcGzYDO*NPj
zJmTC{a=IOYK<=!tP61LRn`PUL<DNHCCamptPXyb_615-#ZEA6k=(f%3w?DaOH7$e2
z^D@536*~y*QF2GB+(7h*ekgj&eoQbpi1WH3(r+nQhE9D&yJt(4f$b^V@}^EP{$@^x
zWuwdf!d?lyOG82^LN>K2$)(@SoWSvMw`v`KfO4o=Z|~-+t)Qj_S1tLo#r@7*vCjI;
zQ}R3;_eCv6)2nIQ?e(RdbyI}+_C~C=S5`u~;*$EAo!kHg@>oZ8&P~m~G}pL~voj4v
z5IBo_Fa$@tr+xj5kT`iSzgXwhgr#9Gxz}}1RIcWyNw)KqGEhEL5mgZt92z`cuLk_N
zF^)JvPGEY3jvr2UI*I3p#>b~@jY)({6N~1Jgdl$3q_CIQ7)Zsg0bywnQ;jrLc5BKl
z5ft>a>k~BEocstDW5)POZCppY<eHYNw-a}|r;S^b$cz&EDyMRf`+&9w5kWdqe|h(w
z_niS8zp;E+*OcBhcW+WNJ*Kgj&M|@52Q=#>{EBNb<RTyQZGmH1p7*T@G6ak(x^Mp%
z%t!pYsqp{%ynvfwDJ^4%)*~Qr5Xagz5hIUmo5y8T@E`%ikFFY&^rwe&I73_(i%vc)
zj#4-`PV-M+uc_~L|G3kaGpA}W-?{w>b=@!F>!ND9JEyk!^s{}MS<{b(wZmUUgEOV@
zI6ryYx3WYJFIF&WOO-g(2REfVk(Crsni{t<wc}n<rR0cLrPU+H4r{>)I|WE?6l!sE
z<czN6{C%cGhdNe+ld^P)h!rkoW`_`4a=yr}c3sH9GgT_~+dCDi!mImgvMJdadr`6P
z&~vXfm4m^-m_ZO*E*Eb<21NJaoY~tZZ*!cUjSG=oP<VuS)+XzW3-7mP*vXr=P#Bxz
z!~uN~GG=5*Wq<D_5?8wOo1<3|QqP=GfqZ_{w(0rSk4iGRlP4Eu?!08q^mn{T){OD1
zNqn>)b@Ia(<oL=2L0+)5tUpm!?ow9Jh3_VuBy{!agKOl#mQ&3w-kII~!N5W6Znicp
zoxqWezju&4MBSgozvaK{o`^$v0jEybhO5$$yA!O-LUD7rNPlnEq8Ryy`SueBah+Da
zflLp+ZdOqFHZPEQLg7R8l6h!=U5FUD-0@&RqAXHyH{S~NEnS0HYE%;AM{s1h_Tc-M
z_oaIptQwmG3j{y6_BnTP00ivCdhOk%#%2_~c?{<o=t(vtm0-?FGKLL8B*6tq%tz6W
zp5#crgLg0Cr(W6SKDldHhw(}~bx#b`!UP;`>;{$7`n(sqmruYJ&d|~g3Vp{BS4`c)
za6C|iG<-RU8iA1Rxms3yM%)E6k8(MGoJ78!3Z`3}J}YKjy7etMRASR@rqi9pL*-g8
zOSp&Z75%8U3Y~z$gkN`)0|(4*do*Qq{ax<u!kYE9qf_r9`~fA*%-NrY=$vutbsP|r
zl_vIcp}{6Dhx^9cbo=Ym>f9Bh&YZ$#M&I!1ocvSBSVG+s-BgwFal<EdME5!Yzpiuy
z#(tpuUOE5#?kCJVmU3N^#Ym7Z%{2zr7omw;L_Z#7B><`Jq)`Ar|9$4~x58clEXxXz
z0SR5BVdv^T$+foA@u|}<TauAWQ(IU0b-LO~<BB46M1@slHUbQvb$dh4MEhtAD`y;`
zgfE|{1bTDa^IXhK#J<D9I$-&^;BgfY-_e;CjWg?cN87ZIwXImSHQhx?H&IE!^AAU|
zf0_;aPE&yxuM>I(A}d1lr#ko+&cA^z?XtPn*ay^nyU2DU<Y5Pe5YaqO25)J}Jt^Pc
zOy==!*&g*-uiaK;v1`kTPC8eCPn!+co`S1w<Tj_8QpBQ?+YaPiV_<H|e8o^I_4uI(
zpS;iIPMs)zQ_$0F{%1=iu|ETN)OQ-KTp75`I;dB-@B*vfal5m;VA5>Tvk#7)v#DdV
zWO=vF{)FQ0mB3ga`+v~Ri2H$p75ViZoFe=fO6l0YS>%v5;0Z}<$Vye4RXW!U;7u@&
z#^D^_$??K$mZB<#@-K^Ob`fGlhCpE;qDH)BXHz|`D4m#UElT@S?rghHFZNLVqFl}U
z1xn&n;K_E2*E)&|p%ibx_{f_W=eO{_wdy6#IOIUbgqr>YaSej>8+NyS(OKdxju^Ew
zy!wya!*oiS-pdMv7=sU4(7+iHGQpL`Pps>Qfoy;N2S1EyYSzTh^vrR$)OGi<l~H#J
zPf~`Seo9Eki=VNte6bSwD3<JCjaG}U)AbXYa9E^Ekd3D=d_#5<_U;+;4bYj!p}Y78
z=0|Ps$&wnVmgB5LDN4d|Mj)vYwxo)!KO>cn%Tncy&(!R*Ws&|i?n@w&ty8sVadXfn
zm~BiRLigs{E3HwcEoz!<FDvTSC=CLYBoA_#F77BaV82g`q+goYl`8fdxmB0aI!Am*
z?!{uwsDY;w(`0@(sqhY?Sej@$r?XIaOCJ!N_<^}-Sy}jgiWwKYb~!!1(R9g~sl8*K
z^!i{`u*a7>eUJ_&$%>4=#7Wwk2aWEzVJ}TrS5u?eMc9`|g1ncV#ahfL#~PL2e;J%~
z_{dmzqaiDLlS$Cuhm*I=$D3s>RHpeEh`LmsoBUNC(r1cHtLt>rdU&Tah7O_H_|@N|
zWz~7f{AGfY<$d}!cM}k42;=v2iJ|o{!8c>aeK*Z(98YJr!rkA_UZH)Hm;kg8M}zfL
z^t5T+ji3ZUWbaH%^OntNJ7SibDI=0sY4(^NzU>XyiaY91OkQz26o-Wphiwk!3qA1c
z#{;AV$)Th_#J7H#&UuJB0me5~v9ZQ4coQ_Z!}grgkueC0TS(8v!*@=)^_<E4bTx?`
zwslQel@)~7v`vhSV5x@5?mdhW>*JU%<y;L>*ty<O{ZOo35NFzte0-tGqX?dLTmSI`
zN!ga1PmDhs4``obU#xfgVhq(&uI*%G=ia`gkv!Gm($zx(nJl8cKKhNr6#BIqnyN%*
z4j+S95umtGXf#-Uw3;x>!wW*vST7K}{)D6p4LGK=z%qhMQeJd39Lp&Pl1UEFjSn-F
zl$0z}DwE!Lo0JRiz7LB4utJ5Ui+Kx;PPGZdFq3g?G)ULSCn$ROT<Rh+=fdv9`}Iv5
zh3`F#R@nu~S~^aJBIhmZ9P{T#MJcf=k7i|#a&f0eSS;fUE654KqdP}$KIEmdONwOc
zh5yvX3%uYR(fgR(e=^1VRRhcL$&!TMBwMF?EA5&ts;`ND`1@WEEw~Uqt!+EU3C|k%
zUh>RW$hvjcn)_^kM0iIfpeXaR+;Jf(=LVERkqv?|xT<xOMLwQ~r`Qjj-yAeVS((v>
z<&9obntP~_)z~Ii-H>LfdKE`580Xj6US0vge2b|z^4KZZvOVzD!E~~T48dHi3Wt}6
zkZOF3jr<k9T>Bq91T4p${@pMzsnPaSyI+ptC>Pu}zU_TkwD?A=*n3!-T#p8k_mu_b
zAW>M|hMPUk(70j;)@DYqzi(I*pP5h$bsL*pc~pCon0;@J<N^K2@l&Nk4Lc6N7wb@-
z;;g5One?gpTxJCUCwbG;M<_ku^dIA2%r9l+9xq9;*QeTKo?#h_q}x3Sxy3vT?BT5p
zJC2<a6COz;@IK5&M^UROCte5zSY#>d3eu{0rsPeO`&9^ny}<+9VXqtYut55X2!uoU
zshcIv43*MVb{~xCWqs2a%nNUNxvWL*C`(ssMP692bw#42uVBlVGd_|KGO#6!nL1}h
zNEYR8O_jC7Xy@+)mu*~8lZecBn=Tlus<AA8Aky6@O1bh)%}_t+MyehVuu3!LHmlIR
zs+o(+O%tiemxBEK!OSmk(frx*L-6uW?TM_yYF}duw$81vFT$k6$qYD=xGY{}X^m26
z+J4PTYVDB75CG1)`=%CA)B&a|A3H9w-MljG=R;%>U+}q34-hZym#?GoczB1bfB-mC
zy!!#m`~{l>?w5{GqHW~D0;;+Hqr5=joIRMiynM8}7<R>6iMZf2YsFoSYa~>|BkBF?
zlz}h#06AXJJ9yE_Sk2H8jE#a-Z7JSaUT$gE?5p<p2YCqRxQW7ZU(G~;j-+d4U$5Xs
zkhZXtvuG21XS35J8n}xYnQR0@Z^N~if^2Hw`yC~krW9AUOjd;b$XzjIHNQ8UyfwF}
zGW^<%bMvs$<K61IgK{%O(CGyda%N++WsU`AL*{g5n0@wM+NT!!UQ!jzr;@Z+eJ3Kq
zHpA3N>1W9p^L3q@q2!nsGv4f%STt3q%`M$nt9^ObLrpYNO-ERX0JEu4>-KQttA*gv
zY`oFxvWS4V3Nb%#y8W9Ow){Vs#0!4na3_!Nl)20zSnm2K*K0jR1B~Yih*$SU*M{g$
zznp0_oY55)Y}2_q1X@Xsxm9$`#O3qxkrrbUa%ZfA6>g%)WC=P+yxaY5uQpd>ul&qV
zXub7$cK3==Y4h$N#an_hJ{MaC4pt;aj8OI7DkS)fW@_3kPsshzz&1)k0HC>#*{b1x
zAc$%;3~Wvm25BtL6Z>s8$>vNcw>L3DuIvOU(G|XCrDCoVDBU?img>mKHArQpiB=Sr
zz36$Ub*jN3BXN|z%ZeaD(U!H&Q#neGL|0G0lKD_&bW+K9Ob~$@U})h^10^cJc_Dbn
zhBg!dJS1xIvy>Ksy?p3H-bl&eqVwJJ#N2r3>U=4H>5M48H!tqTdNqU2^)Oa~5+)GQ
zoZr$(=K}njulvmB0)rZqcA8z>i*E-mTy0eNoj+gI2zA7RmVVWY<2E#Ujt(xLEdeh(
zzxph1Zfa^v@}$-hTGK1~BF$Fc61W>_!w~X`p$y|=Kceil-Sq*?ZU)FZjydP?cg<rn
zxEae4AdWT69vr%Q-L9uT>qVQ2AmJ@&<wOUB#?Q_OAzA!-+5}nK3850#qMoyNn^#@F
zHzL6KL_#g)s&$gZ({#R8fe04sW2W@bChiS$2B<w_5M)sRbRW|XTiJN#Z<~yL6LUZV
z4z=Y8V;}Z*l|R&Z6p_-VQ}l;y{s>2y-kNf^S$ceiG}P-}Jqd6<7f_%MS*=6}Lr(Y{
zqbABuAqraSyNV#eb6O<Hk^seW(Nj!?nMW96_80Mz)2mPEG=RbrJ3WiGN#W093HcW?
zZeek`3afu*12!~)ANL$7kftE!5--I$yGVJ0m96yNb1B)zSd)I7U02YlGS9$(!bcP~
zU%^<DBvMy##AV;LzqrWM)Ktr3*wI8_H$zmJG^Gb;fQrY0MMrXGC4!rUsq9PvX^RHh
z=gIh8s_V|1skVpjlsWOXuig3j=FmF&G+LRT1l2*L$d<&EYBflqgxNhK3o}3dYc!3;
zSo1{d_A2UH(5>MnCZuh!o&I3~@g`Yy)_ebSsao=B%i1tVf-ysc{(g?4`-u9qUMBU9
z+?in<l5HME&)xBCfb9X~<zf!`%marI`UthXwJcBCy*$*AAZKlS^LoaY_LxdVCJ<Q&
z^Hy#Bu-|@nvt2!+V6ej2sAbOPO_=j_qMFbnf5Iuwx`iG?`>9x(fDi6J{fI-%{LTZ0
zm_#eB<E)oiF?+vAZ~?TYI-@1x&fql^YK>2h&nUxxkJ~M&lj+H~o$gQoH#q>yz$ps3
zXjkM2A!u}P_+USk9$ob+o^6l0k6RU(d)PKumn^Ni`YqxP9<VOmEsiySq&|6rG)KsF
z7)%jF1!>l1l+4_7cX0Kz@w6A>&?XA>$q+Xl#U~I+TjtK?bZ^FoIuzqx<QOgybdrU%
ztKTo#3b31!q&>WAc-Ttn-Vs{aqa)Fl%kQSi*V06Ku6-p)7pazqge;l8LNq8Qt%Ta2
z&P4zdja;?@j3&Mf(L(99*w?9eKOPc4nN=ld|Jn)HdK!Xx$F4(i1lmTnn8So@ICI%h
zJm@Uy^K|!RE%~;QyQ}xO?1i6}FlDwV*75QBt~vaN4$^8_i_Q=F5T=VUEt<`L`#iE%
zRTsHM#0Yh(G?llsChvIk9Oqp>R#*4VCL(|$AqPK5V41|@oj<f_`fD($TQ7^-!$uy?
zA)yyoHU=Izq)+~fCc?^)PO#))qXWi7D9I$4PnUlkwzvyX9p$*YSJUuhm@5Mx*IKmJ
z>;?|()sk!He(cnG$7kimH~EWw9Ex!8JMc41#Z6gHByUfQBlu=GzEpC5U!G}`;4Ss6
zOOy-^u3Uh2yn@8#5S{ZZ7c*o`&6|x_G@i*xo<zJ<MSg`+s?L1ckNuh70%2|xIOEyM
zVUd?veuV~hN<5Z3r_y7O<QO?TPG_s(8hLb~6!VZ*P1lOE3f<htKNp`!AffNh`7d68
zS^P{*!gjp_S+7er)FfIgq}{Z!pY*X2b><jl0#0I`-aW#1AGklBr2=>~fsxSsASL)r
zJ@^w=@@T~!vJTB&h0HBa7PxwyeSs>*tVYODPTPGhh3G0<9H6=bKkJqvl58d=1H(Sx
zQeOXT`(kD>WNW57^)T@fRqG%wm+sh#0pG#0+h)cRnEzY~LK!4lZaNROMuuzFJycT}
zOO;|~?)jN;v$-RRKl!>FqT$`y4B)fL9uf+B+X=2KdkV%Vj1egduM|uImESmito*)J
zZzb(p-W2oA_X_n4-@B>#jzS~1wg!{GOkFvVwZ|Sb!_1gR>D5LWU2~_ayJH%e`L-m}
z;Ls1Bt9j`TdIElyznh}K){Ch7ZdP(HaZznzf(Gl=Vpc$VrwLtFiGv}Zf8mzF(CpNV
zT2_V1wJ24_V^T6l!>8v02+b^b!dWK@dvtEWbYrR~n2JPBrqm{oWOAh;QapjnmXEsd
zh%YQY?dyE(5hcP?`7tyX`6?;rC#^SR^&|ws1o(D6lR2h%3YM&{eq#FS-O-)N*G~w)
z_VS67WAlb;aArOtJv3)U3Y};*!9hEp;ANO`0BIF9#`@euJ92H@rs7lVtzd`MNU`7|
zGkh6W;TyLI_GlCv6noC_wVp9?SY42Gv}e07I&0h2o}cML7ai^GO44Iux-W?9i!k;+
z!TEvV(CbL2<w7KEffdxTe0xlszPVPTaQOI{T5UshinL!r{HWyWBUQ!LDZ(E4bN>a`
zn3Qulq(0(VOF(V=00ShpdVmU<az@-g-##I#R#GxH`jw?vv?wiBDyWZxB1xI&k)Gj1
z_)=O_ucR3lH>&fVE`aC<4{Y&37oF@s$y6dF$)RidV`78Qk!m2Q7v8-{5F12+@N8p}
z7|(GR&oFG6JeLzJq`B)HGS50{wX-cLQV00Nf4@Q0P21w<!^&-Rkr6^@<X77YqV&>`
z2Q_Pq%IoJ&&3$(Q(={f1S4zG?MVDrOea6a8#fDjRVRCu*4f5S<DUcOHn4t^0NIM2$
z1NR*jfM~0n3tmB}(zlZqA8LETPEVw1uvXB($MH|gS^ABhu4PV4KVr@)9sFHE5%S7Q
zpSjPZ^qXP5eFHD4)tDRYSFvv+-aW3>#s^1?i#I&<ywfmJmV2j|uP%dVcZJc|>XH8Q
zgoP)5r8<z<dd|mMljnCeG5h#jD}6Uzbl#v_fUM4o7?=#Y*UBo-^PUZ}Wy`K2gL!30
zp)Ys4v=^Dq;$Ah3Xm((+xOWxO_+qL#opc{HI_OUgI^8xaG$UF7u{y|V?b0sovIp0H
z0nr&doEAQnjm&Yk|3P2gt?+rA&VCeuSN(Dz2-H0n?y~t8BildThxzaPV&6COM^r8w
zWWm)`hJ^Ny9-5n{8;wqAR8<VAG-r-5T+iVg5Lc(+YRufj?B)5u-|k%UIA$G1s|Abh
zWt`2&Y>!wM?@TP=$k!$%lI7?%Tf_&tbKhJX)8)Gwp@E?rh#%|un2F*p7}9wgbjrmB
zK0kt@jK*{HI-ndJ+v1R{{|x!i@m;k8;nOjSb+zG9!><!LwWaZIW;bglASCC(G^g?1
z-vta&Tm;|Jzxqoe-nt*$TIa@z#q7~X_Sq7nC`afnFNv!9+Fvy9aopLe-gi+28Ra+2
zg413%VRWs?xsPbz_5J1j0W~zR8V`a`kN-dHy?0oX%hop>M8Sd}BGQQpf=ZL#6O;{z
zC<q7$2&jlOA@m|83JOSzpny`PqX-dcB9I_eP>dAm5J;3FEun-!Li`5zIs3TxKF@RZ
zdC&X2*Z0TshnFFlx$l{^)~s3Ux7N&pubkBgS)T^g!O9AMeRJopZ|;Qu{?}ZFz^yjM
zeLWOb6Z##JtPo<#Nson9YOi<oGI}$GU?(c6Hdj9*Cp5d=Z7w-jUX}Y`7U>a;Nt!n<
z<z1z>qaSD>Xjf-$^;M61%QPYRhXl@h&&xJ>3>nf25Bi@y663Zm&<gUEWX*;JmeZL(
zqvz`#`hSqVLG>dF4`1cX{Z>i&a&<LTeK~g~I0UICyGoy(%=#UvySB&x?Ac#(+whsF
z?~wQWuxC*WU8rR?eR{(hYWFb~^jk0d!%UEWWZ&~^@Px>|BBYuU_NEj&@<4z^!!1z}
zSr?3K&&q6Bzi(c<f`tUNVF}e*PdK{%r0l)D%7TX~!!9W}iGUcY>Tex8M&izcvT58*
zG2fT~yRLsX_rjoLDb}Bp7HBihCG*kDubDpOQeB`&({(+@0^1WnEmevgoraL-e2qLU
z&nBHw*SAb<%@afOcn=q$4*<@^_aUdFsh|oWvjPXh${0*k=mn5DZt@y~#R*EUc=jdb
zT21Z<<&8uOd?L3pYvTw821>PcG6P;cLN@8S!dG;HL%u`qq_Td8NFDo<a%B?boxl0C
zPQQqXm@qHii~&*VN`0x7^Kv&GJKoeik-GL!s8ONuwC28k%W?dk8OBahDKI6(pye}G
zX6feU3EoTE{3i~HuBuqArx4cK;;gO7JdN-5Vf(0KgYXFnOZxp%1q;`L7(-0bwOFy7
z2Bj|?DJQb+?Q8u$BXm)e=)R-_Sz!!VaQr13CuVW5+wAsC_sLDP2~X9;x#!fGGWgX&
zWwsm%;v${x6MWrSkFMZX4tQ(PhNtp9MJ{#6>-(`gt!b8~HYBYZa`Y$?@T9IG+$N_T
z6ght54&eUTKU2cX;H9@-&79}(`nj)W7XG5~c`H+eSKv6arH`LvGi8?rDKqI7_gw5M
zeLp4?IrS-Lu?C0i?JM`|Q{r6T5LoYtI+|W!VXBVw-(Hg3u<n`ufIW+@p!-@&WV{e{
zW|ij_>oaiBp|I@5mvx)R-qKSq^$=fgKj$_M5YT1UWRblS4`(u5LOm5C_xI2x+Tas8
zinxs<nLjh?7gy|xj>d0{IuZD(CSh)`3!kai;&oM%5YPTX(@htL^Cg*hqu~BnpnCq$
z;TVvWsD2<T0e_bj6G~L@J4!muILn2MpgP6*GolJnSdzRJPyzW+QWyW?Ca2MFB-6}c
zzaC^5^6Zr(f}8lKUj-;p{YFM%KaDu(7r}ScImEZ{isX{3!d!;rR@COJviPu#_MGYr
z#;doQPun)tWfxOdKMg|q+rLBh!&F_pgU?dW7a<#S$d!3ruKs?FeC|HFL1kho0b>^(
z6c0ke_rLQ`vME#UtaYi*MbGaob$dsZAa>W61p>DB4D?K$p1MD8U4*L)s0PHZ0Wx9w
zcbP!_ae{)=stNO|lw8>7E%EsMT({_t{~i77bD(mXg|J@7p*m{@zlaTxzvDm2pKk5%
z-sKejM7Sr*#dRWO`}ONcHS(WcE)ojf2qimcgn;PI%&#kt5~Z5(kSwB|+D|E`o+{Q-
z8vb@rEh&En+DxG=DOMvUc>Eu~Wu4mS$2=7&39cavQ0~4iC+po-3^AI}i&JQ=_*{5K
zBy&o~zcx@(`c$-xj4>h&@`GM*K(DI7%`&1bb|*a)hR$Ul1jYAI+0cx0#ipnhiT&SB
zx#My`<V+0p5#s~oq$7rLUmS&9oQwgB<pg}<71+IU^ofhK^&GH`b>z+-KtAD`2NEp@
zi=j``XQzy)H;X{|y~F?-gW-gwzXE&03HXNSmN;NjL7h>_qK!`Iq&B7EJLK_(%yRBB
zI^$1IMg8#^vkJ&3td)iIwk~9blFnXKil}hp>ZZJ%I<m<DY*W8HY{RcD>i}U*Vx@GT
zejYKbTyOnBkQMTSd+-I)Od2JbDpYz*f^unfm}e*e=9cGPL#$35n2OD;LAwFV+K+-x
z)Ucslva$;(Z?H(DE09al`>gU>k2yX(gZIPPdljfu;S=h%U0?4g9l)p0o6q9rQT(e^
zJtBEEgx3$ZxjkejeP;7o68b~<P!Gd?x~?nBWgQ1-AoDvKY*F@i<Fvb!OMhoOl(j4}
z{b6ev{^;|2y8WrokGXA@z$am`xJo_>Ivpez{((RL4JDs!3H)*vUFjlnMSn}1AQkKw
zmRq|<5_EGvSnc>u9<VW747q_?KJ+WQ<YB!xFpikb*DLc%%LWy1KyJYFf};9EN2lA<
za}JMnL~B)g4Pj^UR8^odwmKl6H=^VIBTmx8wgJ^yv6x{7;Uc5#Z2#n`d0=PAuH10K
zRR&LOy^rVT_x*{bGB1o7*Ja#{))Q84gn$+OYoFhZ@lSoAmVlh0eOb#;UB{tVn#D&X
zX%I}UOO(8}N$*0gJLvs!-m9tbzp?D!qyLs{h42~Ja*kt%-W!n}Q5xzKu2~RjZBsS>
zYI9X`%A4#SD|~1CkZWSe7uyb^>~MSd%h=oO^0sp&nFA%cDxJ2{)34k0ZDgHtqF;sm
zh4>^#D%PE2otZ8ngo`~RoVCRAS)xfPcB)vAjmQjPyW+QKJ4FK(FyBc`=xxS>)p2ek
zKLCMIVHRHksA)#Xm^R|7j47cUN7VbIMC0g)!!#`wo|Nbf&4u1sOjI|ns8XVTiIAwy
zXkhzti`>&s=G(AIj#_VBeK`tR<|1CwpZT+Kr5Al`dZ5~B^mGPisB>K4)<(iQC~~NO
zcEj3uaDk$N7ec=4B&{&D_d5XVVn@oTD|$#i^M!|eEE`1j&TzdKOcfk)%bn=ojLZWS
z%^g;KiPha@kz`R<6Gq3Ljgh|(EI+6EQ}g-1zt`z0yLuZh08er!#zi9f9iZ#yFs04o
zM98rfX4KXA2DZ=}uNT3adq}g4rX}RetBqQYU=J3fuMDJG>p{vZwG+tQ(Eba<bMO;e
zFGz1d8*jvhvSQB6>vj(eFDLSrj`>666K30t?3x|jlB1>&)_ecnIK|_wYDfGFRp#{T
z8|>*TUrZ9J(a)eMtV;@y(D{`@nTyx<9zeDbsU|Ee^Imgaa;jaGlCjr)`Jizj&TnV%
zXsjq|3e|`)=t6!e4;BMOa_8}kCzU5QK3KHhI2e#zm#}(?W9|B;_JM0C!|;&@tdq{x
zCt|;F>wN2Mh1JXez+2k2^Nn7%<H$MVXcHw+{1mkgoU1Ow3AA@7<^&scc6^+e#eJFM
zu{(5W+vnOF^&;UPPX?v4WvX3gYM5VDoPu)0zXVf3eN}s#Hpb&hmle+ol}XXa-B+V!
z^$JfdoMB#85WJJPa|)^qwXHv?I{8fDe)$4#?t%I{A8+|g72%ddmUA+iY^T!c7jSfO
zJWE3x?8IwU4^eyC9&?<hZT7?0>6sx*MnbUg>fZho`USvW+IatFoIiBq+UMe4iOI6X
zwN}!=Wm;F(t+NvEYbAet;x^a^X%~gCdKp6Kor*Tr*(=+H*^O@c;wB6yisrW3YI$#}
zhqoNKtjGgfaU4sGmS#<(TZl8b(Baye!4@9ur$iW9R@FD7S7I}oH`YGt6AM52ZKN(E
zqVY?xU_RjYz9ND(U{!hRyx9tG%~~#O<<Ui(sDJYEOG=)ZY9udME_$Zz)~$%#@<rW{
z8O=AskJp6sdOZ_{MKro}vx|Oxg0Y8&5hZ;RaS!O+51$W%(xlC4rsFi>tT^uv#h-lJ
zMTvZKD)YA?rQsCN!j4qUXnlrU>3r4?r|Z^fG+F8h`$>2LE@r|TA0pO?^y9r2kJ41k
z!$Euud<EcG6shn!yMDteJV|W)B=6m8@|BNSRy7>OnYR-)oyRsfY{T5XLo|^01durB
zgdxMeXhCJW<7`%Uv}zdf=Z6^UG){4lcOePSsG!5ub(A}jU{>T2f--8YjWC;5`;zDx
zOA+*IfLL&Xn1n7W;yZ)_&H4^GuXYbSrrFhbcBjm&*O$#^G5m5H)#TTgsztw5x=|re
zO(!!B{QVn$p)J?!w-M#U-23C-AE5*Ya|?2>0i+=y%s#w_aSA|q(eo6_0)&s38FJ-{
zef;L^eJVIJB%;@Zq;cTDd610LwMc?u#p+$2)f%r*0@n{({YtkCohxbbR(ppT0U(>{
zZ(lu76=&{cTui&&aE&wSW8W2lUUGUm8&~XVVbHYZtzAD2SbaS{_w_huRw?rAvB^Dg
zzk7-f&z=OT1Cl_e16@-Vy$HnTR({BvnoS7;d@r&+fGP_X5%(11@#<NcZ%~ND+$-Fg
z!#7+G2<`3wNk10P{qOySBPdYC&|&=2ZYtQ8>=S?vggvv94L0kO2B){wHaBBihrUVZ
z4tlTzr;qa~t?k~WLczIg_*3xTArCAXQ8Ymm<H_n-QqX%9MFG_k>FjvIP{0Qh9XW)q
z$w_S)k5e-VskGQc=}XQ>N2njZE)fpl?b{V)_S?{<J_nmr$&I+Yfu$h|T=rXk^Y^+u
zLU49JoUD@PH*#I#fDR|>gCPrBSPh<sn$kQbSJgB1($ZZT-_-LBZ{O|fUv3~G+BDbw
zgqbk$={WDi9v0EqJGM;)bam2X(soblnSFQTXJl9(&mUek_K0nXzw{I?ed~(wPL`*2
zRO<<AsySbUwO<_DJu_upSrPYylX<}}yv}Zpl&!n=ZPeRMYT#a&VA`IH1jNr?D(efj
zJXno9I+pfq#{A9AYZHVp>p=VRgrxcVL#=j|?+QgE9>^~x*e+H(U#-4MJYi@w$~#4Z
z7nWQZ3E*hzw;IJ1izRUkUy+^jP%2u&Ewt-2%AWhGZ_9CR_bAkdEhM!5JH!glVR-TD
zqI?#oiQ@jRdO}3>I)A7Tav;=bQ_0Ta8tiVY_iaPXPEW=k_bV#f>k}odT31KB;1}R*
zeUHFlZKd`i^x1=qN*0f&Ut(humD$^bD{2HE_#aY{u0vn;-RRG-q)YV9hbr0z%+y)<
zOJ^zU**|S9_)F}OUq)*`w3&&6WMIxba(`onUMrlFE=7paF!jzKcKQ$}mYkpw<9%QG
z<&9;FBH}_S%2`>UD0#-zq&!8tE2)~uEBb(Cr^dtBP?kF#;R7pZ4QfBB+Jst`Gec1l
zmZ&2&C-;j5N*}UF%HQFaMEIqlT?(JNqKFHrIppZn@Q#<L3NLZxPxSuk`RU`u?t*+C
zzdy}apKNid_7hBSc8wx0;9687;LyOg<bC`4@W?Sy;;`m(ncr=@Z;JmLxx^)Pcv~MF
ziam~<_5T06Os0KXMLwS1Xpxu;%xH^RsEiSEY)NqSU$U8blbPgN&?+r_>}Ykex+<UT
zK*kFP+U003xf*8paqb10U$TIouw34k&8~PP^Tw^dy=}MYn!Y8&-(ERi*sNj}Zew;q
zNiyahT^2d;@nX{WN<&H4npRu?EPl0wwDQ%^c3G9oEn+NvQ5*PK?Ki0Bf$|>4Zpyou
ztv*ISR6wjt;&)-^)ct{ubUTYF=UxU{aIJ6&7Q@V#g+9q5oIgEvhO|(Oba1au@vBP2
zahG?LNq#80B%&Vte3y_B^KeNZv<V3fKohEx2vYl}%qmFL(Qa2OADl@&+EO|H$Q!Yr
z6X9biusiXDVLZ8fn+?oJlBW4J36t*#^0qlghDHuYU%H_+J>A5XC;T+*l+fElr1Rbk
z4yx+}8gdjzVSmUSo3cB>K>UelX;;Cbv{bkJ$vA&Z@Aea{H`(>kQpM%n*1XX2%>2NT
zU?!?}5q`828RS1fJ3jTM=0UsY$4}4X<b@4nMH2&6`DScmF8XM47?D24*yqidKXWJ;
zXiTP1$Sd&*yk^1;S3<dOJ-cZzvW$$*0$aWS(AlMb+wxV=DpcT`icG!5()btPD=ozq
zxzWZ{VD-q;^@}dg2n&5i%$4*lE%bF;5~>2tk3LD-v)vJqbV}sec#tnTt)IvmMGr{T
zi>NH_dO9{Xn;KG;lYFpx2$sF$#mmui_M60mJG-r3oDK>+6M}0zxeb$6_o}-0#Z*OY
zn0lJ^U0DORcS1tES09D<?mHv$T2y#fsWfgJaYxceee(pI-MTV^bl=fZG+EE#UQ9!=
zh^o(}!6&)98+0!v@u}wZJ*|>wu-e)vWfA7Zf`&_U`%ja($DY`Ghj{ehC?5OG;=Ww7
z{NN^;u;7Ke`!%gP-`p{RE}u`;)(h9~dePlxbh}p6cx7bHa%Af0ll$%0ybSf*Mf);L
z22yIK;0NGeLaB<s*@COPfo~8EU4jSm|3tle!DCnH_5pJsp(4;JH=NpzhDTQH-R%?m
z&XFvJx?wTDUrSbTMfS@pIg;}AVxd?j^X*C!Q>ZAY0POHcfQ2@BG@~yof2JS9MP{kY
zo1t7gb+lH`5yMeYtkmwIROR<PciUqJNPpj()_aB2FaalDi8BimVc!H^_ZdwjA7^eQ
zj#{3Ap8dLl3(I8fR&1{fLa+^#X3J#N45*7QestV@uj=WeFRw2sb0v=ime@-i=}|>S
zntM^@2^92%x|F@e=N0*hVId=ixK(WbvbM*KOfgT<yqEj$ZZ90&E8w;#8Hct|9>+!s
z^Hcrzj)V$zU+dB7GEz-?GWwZ`Ggp4U-_vW{y4%9ExVta5k0v%oAA80*iRZ=wBU&C1
z&f!-h`kxw_7N>v#aN({85+v7=D0i|A6P~a!M7!oXHgnFfh+!f$l67ra=928oZl7p~
zXbD8zWgMaQv}LMLElWl&&!jp=)@c{Ir1bH-y?mqrIUT|2b&t|{Z>Y@)<;)g-hz0#r
zF>a3$dAri5Z{`W3O*?v(Jzr!sNt&?6wY_;2221Lk2APBc6WIMi$n&41+!W(_)JApf
z)^6B8={S7t*`#3k4rXt`DkH@_+j|Ax@N(L$r*gBw-XOa-<fC6BqlHHi(qRQ$a|eyS
z=D9W=NpX9=t3e}Kv}`pxw_w_jUt{QQpZR-)bhZ~l`pK>L$A39Lg}><p-(7FAb`(td
zjNbjwo~&8M!S9fP&oaQlQ3DQsRR1ZOAreis24uU4-MG;^o5c{Z6@wG+PZFw60fQ;=
zP5P12`HqVY<!$y^E+icEn^jaDnNo%*{&vvcmM=uZ**tkIv57fc(2Gu%*q+vzFeLe^
z%CEo1Pve1yUWaF|J$+-P6nT+~oS65viM)jEb}Q^XGH~$P3%Pxhn(u4(zFc~`qF8yn
zsM_1gP5jv>`-RvO^MVjPr2D8Ed&YFGr|L(~L@gY%=)+dEm3Ev;&a3*t$Kax)odnon
z<hw2sp3$@nz`4Q;rFd$bCv+e7ixU-i*jIrEw$f8%fuQSFr!;ZQKFh;wKVYn9n>!Ms
zC$h>+607O4kl&_d(i~UQ>fLtPcScWx{Arx{KywL><DN9n+PP|A6rY0Khn$eEO>QKN
zoW=;rzP=;S{>z9!r^xe%jYw|sW{&WUlsMS20HYQ^j7)8}<ceNgUU#y1f_r0!9UH@C
zmDlb~P`^Rpp5e#C$;xFj&)-JhKJ^aAs`-wu&G1oi1jvobyS~p$En!pQ14BJnq?d8h
ziQhe=fLgWOXkv9aEi!e`*7QWu(1nB}SJ-pWnzj7})slu@MpY_P<V}~Oysf*&<|0z0
zK0PwlxqhlS7$h14ryaW)3Aoy0m~De4^ZzpvA!H(;{igcCJHg?fDVuXGHp7pK-aE#(
z!9Gu9!h0(@1Bvt3@D{z23w3fE0o^51i$^0^SyaBR6U=uhr-@WpI^3K`d(8zN8b>`M
z7PYOf&Rk|ZBLN(HXtI{Q44F4<RHXA#vgOkC!ykfD`~<FkJvHs>VruL@Sutc}`m}A7
z*K9}y63-ZY<0GWrrAzQmCyL(5WYHIo^__W`>8axoRDKfV)9l0jGh3b@^UuDWdxhPy
z@N@re;lH@`S7MB;opx0qXcmS!fE@UOT){oJh=|9Z-H@R)kl@k5&J*};1`~GdBh<_(
zJ@T<$=@&N_+@3zt@|m4he)c+9R3(teJDR><O9j2nF7i8s8-S|u0gwHF{a<h&V6nSt
zf?0gp96GHpx@%#kN{xMJv$Kx`UL7~KKu$oYXaI_OH{+>_;Etnpdp-Ig_*VZoYT9=Q
zODc%XNxF_$CUY=Cb6W!aq-N)`#!Yfc!D%aIvK=D|e;-S724^(3f|OdJ5&$(5AS*NW
zD!>_+dKlerL5fea5yf4G5rS<Puk-%lt)Xcg`oI!-|82Gnk?%--I00>$x3?takAVE1
z(`7SiX=5KnKUZjdi1;`uD6PnDIP+*Zo@q*jH6R%N`=MU7)RSvG5l7RxfzZGnn9)6v
zhJBZ>12Fcx%mnNvCyom_-mm>@T=F*E0zlEq1O%Pm2}v(2K~|8E%d5P|8wBjsFn#w$
zGyES%-vy55akL;eL_z`n^0ojF^*ujwC;+~`5l&O)`3`~M$30-+&{;y^7A^%s<^Y%S
zW3KKlr|XkY1WMTps>uR0eC+V0m5lR?j{9AP4&Bgp^k(Pbj}-Um2yBCZm}DIc`eRtC
zTf>@0(qF#FeR7pschn5bhecwr<%Ij}r@uCz;$QhAo^ovr)6Up&;>6;gndk)Q!n#El
zETBuz54ynTC82csFW(_I2;U)q4s4w~3<Kpx1dV+ZXs|RB^oy3@>O#6EwagXRNZVcG
zNQmZ6=j?B$W0!Njy@3K-H)QnXPI$#hL)u-9grx|;q!HGbI$&r1q75=OsIp)kg5V%P
za|WJsOVl*_TS4{0oB9e*oi;8T>bh(}KRTuk*m9VNr1J(Ke*_jAZHU;7Bf`pn=8=Hr
z>&};j0nROiX)RpJDB*5F{^@tUKR+bM1^Gc3ba;~WClt#TPShqtn?sM1*mK=TrmeeJ
z?v%WLW+V9~IgF3!rnJ$!y|ut=cp+CsbZ>UN7r5+#><=O>(@_Dym1co~weU6ns8z<|
z2h)6$cCWuYpv_A7r1(HD!?ZHzHJWPh4+5S1l|UfIcDJ?-x$=t!*eJp|IKXo1Mga^v
ze5;Q-A(D>5vtZ|3UjrdKSYn%P%S4uFv6V*%&_xoF;bSK&0_u#^wb`nN_<{=6qzf>Y
zgM1G%g_WN%05LD@kC+#-(Tx}}L(*=YVi5RQt2cEd-nPm(o0A8I6u!C{k91vz)5S2r
zY0ZHWQU73tD=};1Fhk`xR@fq6rJUYOk0(S=fZ%~58MnFbJ46sU-qZvXS4A#^%T5ri
ze-3gRgG~dz6)x3$)H0Xnf4~AvXcsyjIUPv_V#Ff+gN#s8Ds_qxg^Y)ju7byb5cFID
z+)?Cp&TAy~d@#@yzrfp|)Sn2{-v(dNrhEQ_Kn^D<Uh)_T<hyl}Bct^_YE#zb^Nb=9
zrj$mIxvy~^kk+`8dYs5RPGVF`=lhhqSnYI-s~DcWAOd(i=Fsc7{Z57KM=e&M%kKX1
z;poARw{u_q>d-F+9rxk*_r@+8bX=%~8xXGXk5tzCGb@6QqXyqC94m$XnZ)ov^I`Cu
zW_rBDpP|HxP+1U4+0QuCK3>&>*%MI3lrw8oG!jy5@+rl~(WtJ9N$H!O6lPLB{7!|k
zDnt-V)ny#Jo<%i<ft&Z<X8zECOJ%REKJ$B63#vqOi>_Eeb>tbGSg^ya=mwhs&S(No
zCo3QumC4$SC|op_<oIK2s9P)|z#kDVS>Zci(T;Qx<N^3}EVUTO!Yv>Rl3SVyL5SSs
z%;H9l<FvOlHhilC(yWA@J6-Un$Dy_chV0@aTQJyGz(&)(5ihc?{jC!FDC7X{55H}x
zaeNEMhR9EofaOLpE^L$_zN%oO?3NVh01tdg2Ozm~uS7LN<Z^#BMljg2UlF}?beJwk
zKGBlr;$;iABn7PxarVCM+w3o>s!wTuWMTGBXYTqjI8ngHCx0JJU-w%JAEMK@IAMPD
ziOYW*Km2EM{Bit!GR#qxHRmwJ*6Vs*=l)C#C@F3WA<vN~py_;IpD4>;Wz$<qT6I~&
z9@{vc^~Yax0ZXExzvD=VHYYu2qr}zvFH#6yM6es=c;3e_jrjqw8Z{w)hFCiTiYMAy
zGQ6!waiiF`xcx7zbPJEp5zd?b!uGCD*?e3~k@+FUM`QeovTB1S&lbPsjOfSz!ChBL
zZhu3i_@b_f=S%s26A3P$X8(JcuwNC_TY4fXh4}ib*!^LSe-e|dUQcKT98AJ^#S}gv
zKn0@si(`u&RsPejX3ihQ&^bhplb*f$*M$^n+>AUHVqU!z8G-#$7yJYN1*ZQG{I?#H
zL@vBdXC?1nCMq|%nO>wKN&*mipH{8YXrtOD*zPZJB5u+fqgvKM?U&0E39^kuK2-vy
zU2jM9c$j8rINkM|+=h4lDP337k;c;(XS~`iQ902P-Rj%^RnRX1+^-=e5_Ex;6DZr5
zF(Ilq!HCt#>O%@Ao{GMp=KlC_%&m&I%2#W~?KPIqlc!o1If9kRGT{}PE(VXV<Qq?i
zZahAG`%T5$Jr9~gx^4%e**4KjSRuH>*cmA434oUJIt^%c+ps{_JB-UdB&wG6L&<Yv
zE4$l1ANKcGxVffqB&jtJngqN3vNLlsE)yJWX1bcD*(r?=?kRdIDeqX5>sSMBzq@n;
z=dFwLHp=<YE(L)B_XQo~D<A19AL*I?xt;rnf3EXtp|Mu$JgmYER`KoFCzk*IlX8dr
zro?^nV#{~P*4)IyeWY9r?*5pY`{S2QlM?6lVFaK)SA<<(BV5)^DVQChYN79+Lf6v^
zAYfX!a{pW{O&oy*o$ow$oQiNOn#mgCc@r6Rw5rGPBdo<sq2#o^jB$F?0kS<K%~Zpi
z><<+AUb?~fJeO7K1cGi^q?A@5$;roEu=8?+$kQRUWpfQr!k##eW-Ny!p_|Za|G9GT
zW`2#2=)kcagd>+sw$!$k#9M}Xw@8Sgl`N`l)+-V(lx%qxG6f}n>t$NKFjX(zGkYf5
zg6~V{#SMsi&#Z3GtYOY=fr^c;iVZ;4-w47~Yg8g&RH7FB*Bjw)fjc5cF)MCsKWMy`
zi>OzNzM_n%7YLc@y};~8>#80DxzT_7!ern7m8QIIeC?&;ZCIW<brQZgll{}`Gk1|K
z4*7J-UfmK<|Euu<yf}@1kFn27S@Xyw>I_YK38Wa^?Lcgbxj@!rqM$r=nxv`b(7m5l
z?@OCrAlp9m89k}Zvg#bha~MU6W&S8p9p}+JMQWABZ}fw^lqyLQWE{nonGYtPCIsvV
z;m$?7i#b1J^)LtU@&4s41bOa{D&o3t!!B;V#C~}b`W{bF-C7PM2DB^4cE8OOqssvw
z?Hh3PH<i$;-N{=6z5~u+Kf-UE{p?8TCShYrE5RO#Q6y#ElhJSh8xtZb&A6`HXVTbP
z(rwqNm&$^|H6le%ISN0Qe}aBB9G$m2!7)T5xFoX9)e|Xihg&+IeTNK!i<@4;#F1U4
zbU=-U7(2#^+py=O^SX;J>lO?FW`h4F|6(%-eJ6Mqp$KGEG*C0XLv(A&%-|r46q5QJ
zvQg|`DNtYh)sA|T<34`H6}t~kI?@jyJ&=9#hwjYZO2&HKDd?m>y%Xe$K<_u~M>^o6
zAjtksH1IjKpl0rrT>r}8=g6Bf_p>hnA6zC_3wXhWF?Kzf&TW(iYASa5rZDWIC#uqF
z3gtG!IKZ?D#;qJ=(Sauoz`0I;VZ3L+nbFEhHT3EW_$t=s&)*?wwq#%)ZXfy%Azx%#
zHT<<XmNa0;rEJgO3nH-gj#x9TPeyFM#E7BkLu<{<!(VGE;q67g4IT>!CYA`<OrZyR
z_we{-(an?@0^oqM_*Qoo7I_*CL4L~dd@abhb_iqsgJfy72)f*N2&x>!W0kNtzvHa)
z_s9x~@QD;U5~#anS5&EY&)Cm|-$QWR1bApO^U7=B#{;v*?q)Zjz?I<hGaz>zOC+wm
zsf5!MAt9!<LvcW+(R3w_X8eZYB6O-0*5jJBxf_sF=6*dEP?VK1CWUPM`RRY)%YV$)
zMAxr~q0LCT3}Vp3F^hU`@;jv4{3&Aq%0+IQt0hCA8`LfNDURx|T)dxEfowAh>sN;F
zs52PfE7Hh6_okkbTZ-FtxB2SvO27REiO92%js8}v$fy_eQ{!hU43O%EZ~CrBT0gvk
z5c}Y~mMC$3b7Um{`4=QoXtn|FItT2f)y))?I78w%iW&GFIxcJ9A-sbDVQbB>9Vt4m
zmwbk8Zs@O8;8AMtX4ax&P{2Rc4>E6~sm|MS(wqY9TGn8PsG3B(T><9Htw>S?HLg-%
zRQ$XzkBq>nmpAvmof^4z8<uU3(?TYx1BonxyDr{;eg^t#4K>V);87|BE^q#SOvL|Y
zZTx;=0pY6tAzWtv<el*UpDgG9nIS*Zo4-R=PiMTx?>oSho)+_CrTPE5(%@i$clA?j
zsX$HMIfy5ofdhrqathGMhI;26jvj3F>zX(JO+`StytKBKabO#dd3^|M6}qJcU^f{b
zU>ZAa726DLlmE0ddW`v7#;;myYvGFnSCY4$46$cqZfQTGExm<Zab>{7`5M5AjNJQ{
zG7FCV{Y`b1B7vYhCDUZ6>u-oWN*JsyoFB2ZQWx;A7Mp<zMI6Kue`^gqn7v?jj5vP+
zD`WeQ(IUZ$H~bFC1vA*16qarV2{HR^#j`dqZYbiTpj1v*a6h{7pSv@y{%x{A!9lsO
z)BT}`kmEXA+7;xW{l51F*3Z`UMO&N14>j}qmjc54zWe`<`FCt$TGdxvDxAOD#Na8M
ztCPpk#Y~Yi5~e61tZ>25_t<?*-y!I&z4bL}pDO^&oH^p{c9cqk#QKy5extj-gTAiw
zdN!F{yP_IRUFO<H^f!+&S8u+Af2j?9pG8sa+%gO7%-DgNpAYp92DSmPH?-5Sz!F%!
zF#p%ip_~~#?ktcG2`;8xStj>TY=klL0Fc#B#S{(CseW_)iX3KU7*x;IfH+C|mWi?T
z;Jp{yZu#MFSsi0i+Opcf6>R6X?7IA%+gylxZ!Bk>XCYmd#1U0zQ!I%qaIy+%&G$;4
zd7~SDY)E&v<{8u22l6vELf%=NFILYRG`v~Oyf6Gvsl?MkL8;8OOQCB2nRlo28zm$Z
zZibperr2(+JmpA*u`z~)WvKO~0oRC(0}*Jq@vMCxu2hnx<4Uc+0T*HWge_$!kMHyJ
zB^!+K!|<xtc45$plopu-p`%#tV2Kw2ju&5(3Z4j>zrQN9A|}N$Dk5DiY$EOG8kT*V
zDI@sv<=YbP=zKIj`gueQl%o>OLFbPgM@1=Qn5)utz<H#W7}%2__ZJ#1)vv}D!gQve
z?1l{CgG5GeGPvm1NXqlYWXI-AK~kM^C>QlkeiOo#0Q{6wxt)8X&U^{-6Vu<B0XdPI
zbciCMwLc)7v73v4+}@cxYb#6LOORm=R{LCunsapvGXH!SBs+Z=>ENrFG)rQA*LL)!
z)<nF{wP|Dpq6J`>)Z>OV<9ejAXWkR)LcBu-iHbW#!a4>^rK(-`K47ciWn#a)>!eC&
z@C-V%`8z~_Zbrhi8DXfJLjBDp$`;m}7HwA5u_AS~{f8|t=XvD5GncLGS0B`0x+CJ9
zx`8tVr&+b%B1sp}R4X;supF{XSg_v2EP}%hFI}>ueCDk9o&2O+Z;VM8)4fygkae=r
zkZ*N+XwH@axN&F;NA#`<)fZ1m2eXj{?fLcJ%0~q*T}ya!eL%^5`QnxEHNBE60q-e{
z7@bd{Tm|>Xb^C*D$gF4i>yuGIC2tF?t#w|O?YA|oW#(!umGc}(+B-31F-N1F3WkAE
z^^AXvBrO~CDai1;yoo5jO5C+dy%N=?VT?|cTCVG8<UaNKIji^UM|_$_%|?T0!j7p1
zY?v_bXK+cYBO&?8MCUdMDOc@~GtL`d<E9>Qr*>~Ta7L}2KrpMvYA>?60jmN`pd^Se
z6@gx;Ga{{Brh1k*uemwG(q3FU^Hqc|aqP-A+(EDHw;`Sh4^F`)7=3nJ8hRk3bN4RN
z>GON^Khw>Zrj=)*_f~4NcVeF;&Tx79aXx=`Vs5_iIoD7vjw+6BuoDj6(-rKMDN*s7
z)E2qwc1n!27Pg>x=NSgIJ23Oc4bzt?qrxDRg40-Qab)!1cZEvhL;JJXV3NVgq<U0T
ziH1b3&G5*nA{S%J{HAwDO(9ar=01<_h16=>gUSl96k4c_av0-824W&K_i6AiRL|}H
zZ>qx`KZ`u5Pm^1C9X+W+d6lC?S(gn{N@l4AD`Rj-GvXoXv>U5dT61b-i*vS(TBCY4
zvt`3{oaC#Q<^-ss_7y+(P^$^7qg`02VjoN)*p~X9oYu7AAC8Lknf5srEG9#!UwUD#
zpeuhi^6E(0AzoGS(9^NZ<5-d*{H|5}Gn!sNW{BQJqkREJ<9JGBgobRu-HP)HYZh$X
zB0krSID7YAyS{RO2_O0aS&448u?JW}+hFLS>06J4_mbARY${vkg!S%8&3Dgyur(=l
za6Wx<B>my;g^%7gQ+KiB*%r6*+roX(`-6}4)g32QxALbttMSB^D)NeP8HjL4;ar|7
z>>lP_I2yqtB9i*W2QgSp2)5(LI>Yyl1>8`D2?g7PX+z^gJ|BV(Hd~KGmAhrhowjw@
zdmFMR)7#b=T#6w}p3D-zqai@x7+h@pB*eciaEUxwNQjB4wJzdp-#<Lq<Pj&>%s~*)
zpG(7B;K<f+p!v-jkgZ!o_fi8%sj~zDs%D?2?U_}Yt3ctkJnZ1jQ7ePnuMuyU)<bqb
zTon%sJ~MWbRDgQXp8t)Xt)Ft=Ys0pf<Du`(4@^#94L=ly4|ii>SQq@DKL~ViVU7k2
zQD!NjpXdmsoC5*gt)blHon7wp?A_0OcjiBOv2<wH^7d8{h=-P4IdGYn!9|Mb1{7<6
zeH;H^Ki#Zgu)?A`!dpo{;feiNAVP{yWcw;-c&iih=O51Z@71`7j4;;9_W)B1b+)P8
zRrn}1DDdDj;mlOs+M{z-9|x2>e1@mDpOBNg`0;34o+yR8zx65cYl9%W>>Y;ZW&h^A
zQ2pLIW761^aWAGRsBm^^WG0#BO&Jc)$2WhPsydt;C=Vd4;#a6*WM<Ry(;ffajsgGz
zCvb1hqV9jS<@~+DWuCU#`!5(f=)b(KPCX+Y)07&MSEkf@TI%}YJ3>3ZX*QH@yvD~4
z?Olw<?4#m;={d5zDrKOJc~uz`V6v=_ICA961C~qibr)Eh@ntgezDMX%RC!YMsV+3;
zNWkKSS8r<j7Whdm>?ju*vF3hxxf*fT!$V9e*BKius~k~w)#lIgsI#p@JOHJ+`=+`%
zL%AUUd@8go@4Z?X-Ke!IU_#fVpFQm|_hlvHmpl2;JE<ZxPP*P^Y6U(VzmLv0(f5!b
z^r-)!Eu1v)Ia}mH&*YOqOTl+XM)^e@%#0+ji9a5wfOCS47|st4cZj#d*%?sf3+Okx
z>pVlmp5=Yx5f*ckRY*&VOqet%`lV~VE7Hq@<HU33ZOsslLMfy>2m3c$4XSeCr5X9o
z2}HD#yg@EC-ud%(W#a{w?lV;nClN~zCf>0P%sjM-Y(xyHam3*HF=7Ewy)IE3`G)cq
zM^Cik$LUgOyT!(Y`SEQ1iy0%_f>$OnH#SRPoWpdn&AY+k@>O@n!Ojz`UROarn1;wR
zI9VCLHm_a~f&b{A^r+&4m;T6vQHP3f7YOu`sg8l9Wr};Z;&~7GfWlV){nSE>*PhE7
z{d!*Rno&_YNpmfm)7wyO@cmAy@cC>Khi;t_r&3vw_A>!KpUn%}SF#UZ!@E?=mZcN#
zsrNgpqSdI?G*4Gs4HCSy*lt^}WPc@|DZstWxC>Di<oc|MXRx$yw6~qR{OZ?uC5MCz
z!B^Sr9dZ&qp{l_=<j6SqP4w+C&2EK!prG#&dQHvhT%&r2pJY3r)*Ql>#i^&zJ4uTT
z$e}&sg<%Z2U|v$>?G+SP0KC=WR@`I?@%ZYy5=;wsudQj@(b^mOqC1PLA&@VL9}2%h
z>PZMnbJ{q#$)Ch=hW{uop6*GYS$8?G=e}quv`&hcptmMdh+hTe?#-tA&eYDr2Z2|-
z7KM>=e$xa%@YZq?(Xx9X6x<9I8{_TfQ>c>IH)Ce*Gye6F{R6L4ESOWhSck<Z3i1_*
zpxChjdx(`TOBUO^P2y2MUDfy59;s`ic}au(9h+jDPuCy4-u~rz?3D~m7T^JOGn4L1
z(w!yj=)>mQ1c*1Pfspmp0I_8&8!H<Py*ho7bJ#;;mZ39KR16`Ji2}plQlfQtY3-^W
zi^Hr`T-fHe1>{K`fTV>?4=)<%K*atJ8Wg-D&l&;PiwW4uAxYF(nsn?qve8kQ8M`5I
z4O1)xK%{?Sh5me(;eVO>@RND_13ZJzd2gAwR+#61<*8?!ou2M&4!8ibeLQZzl(!Um
zp@uk%n=%1KRRUEPQBP-9=+0~ix+vT7>_<5KtEsUTC-1FHrjNStwq^SCrD(RjuUGgW
ze&ZqF@<XWuOu|h)aGA5(yF`TsV2!|9R$UYd0N0@<O=y}n!H)Lie6A2WD<ihhdc(oe
z;?Q`~p?EC~pOzbR8S1GCXc&XXh@C2as)9s$5PzK}_4JVX17`K|{U4eka?R-wd7F#_
zOnfMBu7@~VqnVlWOC#fnH4AX*$l0wy&B5x+a0*{->KMGZ1K)lJK{7{C?<z18P!afH
zVI(DmF1r6a<XWKKSDGWmjPV^JznN7Yf@bWfs#Vz}Q+?<Xanw66oYfqoVS?5!YL?i#
zUX4>yrB6KAVND(iwd+>)vyZQ3N+Nq)=B}F52^*e7Le%}9?gGb%es^ue)&;yj|2zJo
zHbOoc>^9Jp#kQ+M58t~7wyIs}t+>GYIM`$}6(c~u*;nCa+6Lq7q3Z|chE<*S$GWgw
z`LKH%Gh5NbE6w6=``eARrY>m6F<vvs6^P~umt)&x#0#JD&h~6{eMyJFaaiEpJ_j^E
za>+0A=~O8)Q|(6j^NXwV(INk+T=cK%$eAdgwbBLm37r54){+g@0Y?-3foP$hM*>uo
z0x-zLd73nGU`m;hSQ&Cr{lZ_WPWR)fPdSp1H#yd&v9D%kQG6c)_`X9@%+b%ew;)<O
zvCvU{^h~BS74V6J>ayHYt)?84=b1ax658X{+7akiu%L40=rw(z6Wxq{JeQu)U=V?c
z$Ov@<`%fDHDC_@gboGA*j`<%2g#UNQ|1V(o|A@+(Q*E|Kvh>KxO~c139)UR2>Y!vn
zjgRC_ZlB?q<_;}&eW!zx>7KGe4VpJiBs}U!9N03&C6;=cO|X5M2}r4+_|J6_V_bEx
z_C;-~RS!n2)0`Vy?<&6=^So0ntdz@6E%Dt%W5gvuWMDCBd%Qd)<}x@)=mOa&|F44%
zdk7w2XQ69BDdd~Tgx|o@e<1PltSJ3fy$zc}BfI+ZcEfuJuKTt^BEaCHq+Lc%gRp?@
zZ(#vA*DYkz$p9LBb=7%2WT65In!6C;uhyE`ku!wh^&7vz>X$+RMyAiq;0tdX1vW3#
zd<zxMgI4i0ceB#30q{Q}5Hd0pzV1%=tqTaSuo673VMzcXl7r6>`k|Sg{|n6h4q5wO
z0Q7%B+aFQDzYdN+NL{!7Q|NIkI6(K@QVXmM%#XMw+?4_)f%w?lOytB5DD;P|IF98H
zK265A$qaE(f>0LVUN)o1gK9?6;x#nCdIhNib5-&k*NkMukty3ta8U93kjcVo&%ajS
zGVf;QBVg(95Ib+V@ib>~xQTRU_i~woVrHo`#F4(;3d1vZ-?w#GW?%Ad<4VAD59sDC
z8TD-R%Mj0NH!Fo6B1x(p#by-RxSD8}w7*n-cte5K&iyKM!QJGu92^_J+Kbkp`K~@?
z{l8(U|C_eH7fh)qSUZTovF&}%NT8ZqLAmJAm!QV=K0Tv4opbiUcmm<J2F5~=h4C&)
z)@(l9cZhYRSEl->8+{u3q>Uv_L-n028GZ(XZW)xcuWD{9!e>#0!-bY*GfbK~zGZiM
zx2Y`l68mmLtH)k*4DBXZ!*7G*YNdyg##3e3UF7sc-71sry|{PwfiAnJ?VZbxttgH$
zYB_LF<5o1~WR>!tJEc@9mGNBUP<W|!WWk-q@^-rh(A<#CRHPg3Krr9tBL>tpQ;cdu
zIPW+yn*Pdp1C7eM7~_#t`sG5qOLM!Vi>3Woi|$v6c0#>PuuC`ct<fQ${-eUYhzM<y
zWA2u2XP>+e9Nq(gX|a3uC=xyB%$o(iGK0`lo2d&7f$6H$^Iy8svUruF4mrH6;=#Je
z>fe;>JfhM{egEtNWc%0R5o?MmnS-u4V)eGnkLr=Nr!9QgoOaIO@Z(82jHTQ7dhir4
zEA|@G6J`2=915{%jiJ$v*+We;qsrL(xjB*>?X(5&X-9o{cEiG3ua4+9&Z-^$KvMwR
zTm9_nL`YD>7$4PhoQ98PoFpedN@YCB9~f=3=b!`?AE-$VycpHwcJn%N&-Hkw^=f_U
zQJNuDo#N4pyy2!k!MImMz4dkBW(nhPh8OR6d}B~#>S;=$90Fz^d)tGTP0+n`w#5#_
zve1K|PGO^66mr*!0P?<lBZDL6?NC*?e^Faur%yseOS1_`AN?>WQfAh0#Tp_wHUE&N
zJB#FoU&Zmc#7!Xf2P=>b;|K!h<!DCrH%tX%<RX{cFPwPKr=H@`dS)hU*M`J+*1Q=9
zK*#P6Ro@Hdn3$;qxQhGE@y{9_2qnwP$+u}iJ_G0G*kNuBSGYc9BRHvFNhnJEW)AW?
zp3hlWrdy$mFmG*FRo_)7bJf!wU2y5eow^!#{hbRn-`LL>VwHGGsk><MR5ZnGGdmOC
zkg8u#l_124am*icsE^C<s#LKbo;sGWrq*7OJow;T1|-gliK5_16g<Yu;KbJ(RFCDx
zCbq@0xeEiIes(hdF9BKq66yM%YM_!8#b5O>pPD;S#~+`vnH^LV5N7QrM`Q%M>!u(j
z&Qn1$5^~sOE>|Gn8>CHBZ(9viI3W6m+A2BHo!cw&!_s@5ps#d7VR*2)!}G1wHtmP;
z;v)6F<nNG&BcGV3|KR%l><0e)`3v3oDnS6ZFM!|XAhEs2&G(sDA90MQvz&pe)47b~
z*)SNYVqNnddJ~oAw{uz-dFpaX{0d(sYJV`S4rLnOs3~rfNU?SdDOfj53t>&nrA9gT
z-Mpn`WNdTuiZ3KyeN|=k*FrEqzx1zuCWIbx+Az!qUh|q-d_F~B6mFM1b*_3it7=3@
znjsdcS;ki0p0(j%^C-lcPJw24X6=i(9c)Y*rrF~d-2fB}vzs$IM#tV{bYSt5$Z`q-
zq(L-P^*cf_aQ3Ww5dHHGqg(DU$3w<~Y#5|f?<yn2dCYQTmns99p|1nc{ZuS$W)%7;
z)6k1UyC35MB5`rTM4Zt%^1A6k9^Uu%n`2oCAK@HUsnL*YVm<Snf4i^sU*;bDmHZ^o
zhWR16Ae$fn-xqLDXg~gAPif_|!+l={1lr5_L%u{0!u^$xyF7v7+ircc$P9V2(Jw_b
z;Mg}75Z=%|T`g>w=31h!R4q)xxMfj72q%bCY`5*_m+;!m&%M74ztlS|zp)zvN`C&X
z9?gH1kH{`gU_s@5vpq&dco%)MyIS9~y6*1tKBR``_O^SG;N(5L@4dzoa1O+5Q13Gq
zcXhp=kG#`~wFSBtTcz<GGA~0tbCl7%gnCcG%@uDl9BJ?yG&}kpH$P7rku*cSupOK-
z`ain8*%Q7`F;4s(S+6O^P0v7iEfwVTRrW|TwGFtlfR%JE#GB2u1-0R36MMoUyuFoU
zoYnnvd%$wDsS`$&8sdB~NjO^=UE1!T*c`22W?V4#QOq=cm(za1bWc^GSu~2CIe8oU
zQ!$VK5&-)T+OP@{>sj3Ffk}cE=_S5I>Gxl+PaecOV@wP1r@j<1j_~`Y2|wWKwXo(D
z(h`Z^G}Lqj=uqyAT1qrbVC0)x$^gP=Gt@dm4(}d1?BP>OO%<dx%i+Y3Gf95NtBB=+
zP#>nFf8kbw9iWY%QS)AV{T>Zt5@Fc5yK{3go3P8x=lyL@iol(Gd`A!T@~>%<7WTW7
z#+N+9sZ7I_UbY(K14A<RVNffbe7z#Q;@MZ0=RJ=k-n{5sU4H)&G0bXvq_`kRhnxjI
zo_Nhu)~=r_hx{un=%?SA+NnJk$a}M*H1@FHM@%{QTlGGe^}4zD%U~RAN#|x(ZQ7&C
zbErjPy{`lgHX7oZOwNytLTH>CclNPHiJ!0R%S*FH1=uO!XfQIjC*5vF#$^CHigWpP
z#BD$%n{RUmME^hF;$M*rR?dHCQCJ9&nJ;zw00T*-s@JC+8Mw7c*vO3whD3}z`!7|r
z^`C6f0C{-@p9dQA12odFFd%;OTKoSZb^l*c^RP-mV0)X&XpaV1<R&cJ6ULHn?>^2~
z6}w7VEyxQqyc0=lKuu|J<fMM&;#=_Se)wUXGV#Vyg#gx_*n+3QzpUvh=>P9-NTtgE
zrZW5Vt9k;}oE+2Bz2infMRk3hLs#^LW}F~7AVV8xhp+;*y{37Qbvw6;A{~G6Hc-Q4
z7t4Iz6d$Y?{H^RAYx3!@NXTDl!zw_;ah}fsN9|VgtS;WVdka&0^ilu*<6<qkX+bJa
z3{{RkJ>%us{KdZ%178YC{aWW-6cB-H7Tyv0OZ9TaY>CSm-x?*BrJJo;fk&#0MmsyK
z3iQ^<V{ekCvfMEhzm|dkHAUn<8mhJ~JR#f`&3SNAG=8N5Vtl5v!Vz6vqA0rGhYdcq
z9jOKhInZ~dF7x>IaKAC{*en<D_vi?b<&lyTlT+S}q2rh<4Vw2`otRck51QRLp?c_g
z&-CB-M&@{Of8^wr|G`+&G|_}fiCj*kxCRP7+fsX4hmGmKD9lJ}9u}|kExVmZj%V@i
z-8}Ds<k2`<8gPdWEvCBn81?$1*=EJf!rCtOUVmAY6_PZXCebNWH+Qo7W)tVqJ5}xn
zZ@xoXzqZ1J>2Mr;Srg9&vd6BtKvu6cAXlHOmfBaxeMtz@jke5)f-v<y9N@S$K^HF$
zHpKSrmpKq%)ObtjVMTIHw}z>2kx?yGEBd_Jj<#p`Ry9F#63sBc?lL)LZk8;5nzZ5u
zs{wX8WoT$jFd|d;PFpf2c713UQS`=bd2r9QO|S^L|Ne`Ly&SFPYpHQ7CEPKZCCy6x
z-7;*+YaG1MeuDI}TEXb`LIQlg^VtlXEA-@u^O$6H8ENW{q>^){(8DHWld9@lH~Ee5
zm*-hV_j|>Jv5ZG%6nY|>(J`O~P!~-Doa%ERebd2E?;=)yDf{F$H|kD@AI&3GHuvwU
z5b$L4?6?;%XY~F-U^f+okdT?54z7mzp&Y*6v$Ye3YH6)bF~<@?6;*g6bSOrfgl@#L
zyJYRhh_(>if|_9tpYv`xMz}xC;530NiB{ehO(Nc33#Au<+}HX*tP>*gIO#3yNOerb
z#CYNMg0jib`7}SAq_i@h@-~d^C@n%ZQ{pQw#$rs6)PEm#z-0}&&o?V9Qx@1~Vgd#n
z{Ot}1&+M_&tx6%Vi5tWZ=4OwM4=H5|`0WTpMJb{WQgIW|CbXbkJvy>ZaRPf^`JoN9
zt&t&U7PiF9brc(QJ~q08DOGQzS=f-_tp#<pV)WW^Xy}b&xo6)9SAk4uQrvJz0i)4X
zAkFo)SAMRHo22V8i$hnd<~5ezzK{vCt3=(a3nAH0fb$irn`ieP0l9oU$wMcG^DAaK
zKi`P?V14PmjD{meMx8I3i)t>3IU0PE46Dzf8}@~sbWyTpt?e%CzgZz6;U;A6%$-{y
zuf_8r>(-NP_dJ+xzSOXAqa6Z~KlUt|1pQEXUB3vAt^t&FGec@JVWRIGA5G%!>1!(6
z7jGV0Xo|fZwAQkC-yFJ&aS()QQ@Sc!k|PT`ixiAsvbaT^&B-!XfzbNH2ZD{M^(4&H
zM0_oo=K`tk*#uhXo8KGFv4?riCWD<%ZAP<|#x8ft2a?xs>(aU|9o+7jV4Fg_6tL$4
zHJ_#!e1`N9wP)npMnsH<WVT(sLWjmXqTSlyHdILfs9&lSU`I)>po)(#?yyBVQy&Es
z>-YQCA3f}wrc?dOTk2EIc8=rphL`u;^OxjUoS5eWmM$eK(L&9R>g>K}3&$a5{<WTC
z<sfq~ILW}OZ?jzf4ynLS97>^ab$rBCAst)<=r~GfCk2CT0im!oaA-R#Qe>F7)Q+Ge
zIj7KhO=c%P;QhsJ8QAB-!I@XqqNH=5-ZLdx5u9?NwW$~3vMLWqL0)juE11YRco$}$
zO><kZi|wJZm`t_x^WxP;XCbRuvVnZ<D}p^@HSuH0iMqS^kKgb+1YtEXx}rIIj}%`|
z=DO5p0xWLj)1Q3e?pC&-?_aO08Ks{%Ogkd2Y4ilF{yxWf)rk=AR=aX^bH<(~aAadL
zcVc$f)iuD2KaihQ*lI(5%1JynAr<nGR6Xwtsw~@zz51j8=d&JjQSPoVm!wqh8@4-B
zCucpPaqakk_fqSem&9aN)OSd=Qehq)IDejn0k6i|nliV)94rX%Gh!P(J2X1;(itA&
zT_ND3-yywIAWYaVqXoMkBS{q?>dh^a)+SP#o*kqSII&&(+fG%N`__`^$qyU*xh{)+
za%+}tJ8@8@9WH%p%7CC*_X1m;y)?2uq?wY;9T*%C!o=L4&MqJSh-n%eVnDh`aWrCg
z(^a2?TUa0R+^<~Rx1M62Fq6smCd^0Y41FN6Pc+dYUVN0clnt`?S0^{KF@l9n+o{oG
z(mj!3>HAaLp3th?hBaQk9=SfUdp*d1fwwgPodT1x3-E5T6{Q*%kJ3-|h3*b$W9)nE
zCaK|iG^xHdGH?&?;0o<t8cNpE{6eAAP=MP2_ay94U!64(eizAJ=NU@D2$T9F7@}k6
z{fW>$R8R9g@7T^IGY%`sNgQKwhI;P{R9^=bz)D<c#=#n7Xc*n1bo$n?9mq{rcB7=x
zcD-<SE1l>_j;*gKdy#$lPQ-A<GLGdXW>;t*sK&bX78S{m>0dpj5c3^!z}DKS*gd)D
zg`QcejI5izWF0vj=11<UGn%;cikd_&Z{DE9kku`zZ;J*4gcr|d*}VGnqNKvtGS58z
zQzyhuF67(w-P{d3UNw+ys^`MV{S9i!IW#Hk<MQQL8$QWkD8;8bRWH)=q~lyE=dP%6
zuKJg`wd`_CD|*d1evWOyN@Szx{T=5`X*_3ZdRx->M$|0jIB$0e{7M<WB8;hA7Qe&L
z3^wdm<Oq9%y<ew2VaMYH23>Fo{<1<yb$+3uAjt9ca&`(1S)s#gQ+`nAgvY5b(Q8IS
zy2KVz@OJXZ7-Sh0o1&jl(c~_#{G?jEgO5hjc7u*FW4X!xky4>-GaGAfpCBEbF9#c^
zSiXv}h;Ny6o$9$XWy0V1^3XA6@lK}dqT`?TF>R;|xcjwwu52K)TJwvpYP~QrWEH%X
z)wcYs=<mDyfBHZ0fb&s5jQIbrx^oR@x^Lk4q|B{QN^4D9q7?IVQ{rYUPvlfBbax`U
z$@!5}*w)OUgVI>z8p&)9h3;bLcIYO<WR6WKi(#jv(q>EyTiySz>v?g%cwRi$^X_@~
zd-;F$`~9!)_4$539zTBKPc)3sqoYynZPA6pvzyL<XM?7sewwS#q!(PGTPlxmo1B3O
z_xqd+iq=gv;pYP$*0iP9*r-SH9(t`!2>9npinR6^_n^1v(zC21`9QZ72cmR`sgUGb
zPd6_HUWeQbn*#gHts~($2~(}Cfu$1tvkC}>#&90Aj=Cq6;7l)4%JRd4K%hj^Ma{|P
zK<yd9CY$UKlQ}R@NdvpT^YTBh%=hXq7Ic;&>UT-rEYpHZ2SbU&bO;Xs*6dXtL#>W5
zOiiQ{{CQ(zQ>Jkxr6g{OS?-jSf$fEsY?xh7<OOQ062(!Lnn&iS&p(s{lBOVTRGe<Z
z!X5VR=j`rX#hGa2WzMd)2qk$nArg=25|C`SC*QIBLik1Kr3<XbT#^sIsji+Z(#l)n
zIXE}D(8`4A&BoIa<9>4%Vs3idK_VBePlOWeDKTxvF!d(n71&i<KbC%P1j;hbrZZY=
z{D@I}X)q~o{yqFkn+d<Ke-)3g<QA8Y;(j|*z{woi=!k2V*nK(-C(APz%Z5}IawhwJ
zWf$|d6&9%W=ocnNzzao1Rc0=y$b+{ngFJR)#VG=o`4-tCGazORCh8;njJZ`UmfB(W
zb?keBW3+@$Jndhf-F>{BS<pljk;1Z;6U1+qGEp{S%#H2O1zkJBgq5UJ(N8yDv%Blh
z`HhDs->u7LV4BZ0D-5Y2I!W1;Hz(2qg2lQ~FWL?$0a%m+nT-<7>SANW$KUgaR&rLR
z;l<XpuHG2&=HOlRX`z%QD{vl*HUu6UVKqS5+)c{;FYvO=Y9Ja?nIs7!She@NKa9zY
z9)<Vwy|(um_H2u8!Jy<(>xBhUK!J!LY*VI+)u}$$+*xz3Q;k`TpQ7!%63>+>wvJf$
zbD`|%m>p4F-wf~Uza0oeVVL2p%?%9F_UwXm*}|i*d`q1^K5(zNhD$dLty7pyRva$%
z2l*lO4f(_#^bUe%<u{<)rj4An>3Q)gm(%eb6y}9{fKANPBKC<k-pkQ&;OjWlN_HY#
z2*>71<Yw%cEy_~lgSY6qE02qhoojy=#W3G<_h=<O@UoW1%uJv)79rtvNli{FEgmb2
zx||H>lE`zOA=g&dI}g(*XFS6iuafPdgr!v%?;CupOForr;qJ^U>Iaumf#7z`jzOWm
zowD~zA^+GR$pKtZJfW1q`s2_mwa{TM5Q2^8312dp1Zcrx5|#4;npm{A0sAE&v@OLZ
zC(-zc#hNu~)`TG#KBHIzgeWiSKC5U#$x||yS9m7E*{t(n*{Bc+S$C9w!8KU~`-t4X
zyAaZ!>k(bv><c|@nwDGTkfCbN!H;B;S|Jrm1>1H^@5yBzruElzL^jTn)(#%m>EDdJ
z6|d0~xy%&uzPOcQZ6y%0&FajBcwHyQ&`t&0$M*ItV;~3SZhP?a9<|KOAhlYT!tn$x
zlE-4iTGjHF#Z(|Jy>oDDwoNt6gD!hjK^}14Rde)A^QV9q31$q^Mc)Ys*m3es3&$w&
z%|{hug3n{ru8=<jdVcO%f{15hu@~Bcx9G&b8;>oy3_PFF{mDSz#Z6gDahpC64$-bN
z&pmNw8z>oY6T+!NYZWwg6w>icM>!}T<DDEa;CZE^Qp*Pq_>a%zTL*dUuZ{cj!^8wF
zR%?LS2U)8rv(y{;%st+av7%T#I&Vh}{WCm&4^?gpZ0RJ!lXB?lsH1fR6LHG^ZaQMP
zLAq7upf>Yq@_Ou5b>j3{n%3#BJMeJl9;x3c(BD!Nkne7cRa_d5>$q>><XS|S&VcBo
zJ13uGR;#)JKcW@X;*6f`o#lN+5gQ!0Xax9qTT^4}kG9LQ=bfibJ}vv^8?3)XGKvMm
zzk?VO0D3YU;rHD#^{lJ<`1{X}SM?5@$Td)(z$ed;zWLM38GRK13KZ_8!F9S$Lj){v
z>u*pdw%X)k+B)0@bF^gBIreV>y;atOL{QQU<Kcf1pK53iS_NASiQ<N0!6F~;!@7J$
z*ve~9dXNO9pyPc%mS{V@G%V~ap^z<eFuyJ9pl)t>k(818JWs&<j8#YC_pWJeEDJFg
zUCK_MfW5!=(rtLY5^1`%cblD1&k<l7x`7Y2sBWTebf*S;zdCghzi1R%Y`S}ewlK?s
zs-8Kzp{d7RK`uc^r$zl~uEblN4H*b=af|EKWb(H4tgJB3KXnDU8HL1Se12?90ubB(
z7;OG0m7OuP0vUog`iH$9NM-K1SEGgaFJQ*f!*{^{B0PcXnX*L#Jlgrx?v#PmR9){E
zw6WXWj(asjn5FuwU)2~yHO3WmCjZT`hyF-PtwTBCF0N;Vcxvi;#;6I{NQ1+gR=#<2
za$REMeA!P1Pa^HjN%{e7((r@pEMI!@iUeI?Ry95UsH#Mp<eE9zp;}f&qU*|*NmaLG
zzwAt0kw6iFzB8u*j_<k~SoWSKR0z=1uk-Et8WZSNG9s%Qh|>?bPBu&CpcMxlT8L2b
h4@fM>1eQm;CSBlseH8w0zx3Zklsf<CXY}{czW|3~@^1hD

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_6.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e976a4614595b564348bd1103f9b6bea5d025644
GIT binary patch
literal 43687
zcmeEv2Urx#wr&qYl$<kzfTEI<1c5=3BqD+e2m&G*1VqvhhoI!3AfSi@0TBVoISfb=
zm5k&Jl93tW0K@QF&)(;pea^n;UhjABdEfgU4LwvhtE+nb_4le(tEy{`hK{BHnk%Xr
zssIE60q%l7z|r(+78Qi86#!^y0p|b!AO(m4a)1C#odyr?LFfP|n1+Bq07UGs=R+al
zKhq4L%=-{qfbgdbK>A^s10Vs<QGmyK06?qCu?(h2!ToqUM&KBMV+4*7I7Z+Yf&VWN
z(6@E8aPn~BzHZ~>>cp*LVe9JjkXuYtSX5X{R8&k%SnRxnj3n@1lM)vd7n2i}loLPC
zEiNG^Dk~>>9sr1+K!5j2XgQt`{xvNc`}AiTDkcj6v9W(kr$PSa-TcjSA*1<GJ;$EQ
z|Cb~^R{9u$V+4*7I7Z+YfxjZ~_uhuMoVd80lr->X=fDeyA<*A^3Sux~!Uhn5=fKzr
z?K24bU(?BuGr!4zaTWr|@6i+5Oag-6(+u{%6@K?G*Z+vs0Fa}hqftN^ASEUyAtoXv
zAt50nBRxUEKuJMPPQgk?PtCx|2IJyn<KW=t6XxgU5#;6II3s;lP*hA(QWC~5s~{sT
zFDxM`{v#6z85tP`IRy(PC5!k;j+5fQyd5<Fw4}f@(1U`U1PEv$P+G`QGXMkck_e10
z{RlAqwm}G>gha$7q+}<^L4h(FP-7^RfDlSVL<okBAim)L0YX|Lx|3oTiRpFBNO+tX
z!~>qDlk#3FYGl;yLGwx6dl*P|f{B@hmF?7N{xfIKNlKlUmXVcHzO16Ermk`2#?4!L
z`UbZR%`GggtZi)VTwLAUJrJH=L605>hlGZO$HqO2Pe^?JBI$KTX4aeRw>j^MOG?Yi
zD=Mq1KQuMBw6=Y0|J2*p|8-z+Xn16LW_Iq|{KDeWGG=3QYkOxGySM*CuOE8;zWqh9
zf6$8-)Qf<S5K2h$LoWz{2e?CN35iaM5z}4NAu)5N=MfJeWw`V-y{M6lS3(!fc<*7)
z2_`<t=~I{=s{PdLzouB=e@nB!D)u+MMgR&Z1gtzLEdU30_s;;Y{ZIVq2UI{8LzM9H
zL3w4;@?Ps)X<5vdCo#HmXLj$A)ZM}lV4%1jthF7|-Fr(P_e}KA267K`S?pTEeqH=P
zoM4EZ)W;k2DGgUL-)rA(re8AZhlF{|7}>m^D=sRp?qBDXiDj20RQ2T?dURveNbxId
zry$gC7sq?$#=+}DX}m!jUO|a|2pikGymsI5US;3<kflQbjR5?8D`D;t(4dhY4xC9N
z_zyp-Ohdiu(=!m_`zz}4`nosR8bn!Qja5axw$sQ>sCpe;QPf69fK8_9NZgs&jmzyK
zC%5@x)t8o*T3i_*oGrD|dK(v==`<-K%6bM*hNLro<B~{ikvlb?I_*Stb&YciRp~L;
z#*8`Xm`2qm+cvGlm6|oA{KmF><RXKPpdG|u5US8M<7@W~xY=L&S<A2^--(A#)srNH
zmnt&*EpHKH>hwxYtdnHTlKg0$qEfJ~sAW{JPFEEN%uNzfp#r$1NVNAl47vb=>Qn=~
zyd=F<+9hg-OUU)jBOnTZS;ZXm9s!dJM*y2*2W&U%2tak=B_U3#CBeoH=18$8qv}KA
z71!I>zuUjTw-+A)DOyJWk{xFQ{Nd;i2L#D%mN{RYXfon&#y~q9C2A(#BQ1uJU8lj!
zqwtcz-yi&a8ltV$Cu9-+aB(go?5368BFAjqoj+v!hca!c;w($?OV!9ORq*D4oGjaf
zVw)XtxeFJJS`A(p#g%%0?OJ^)vNt!mi$S&eQ~xcUvs`VN=Vj3uy4=LVqqohq1$i#C
zF5+{=AwN&6JkicsepBN6tI)+D1(JN@YP4Q}uj?SQ`)OM;LZ|qTcU&8~7$Bx5=<)s~
z#?C1Bfn6GlPz!f4t<|wPUloHm?HI=dYyTF>xO+D>o=5IEs&{&_GC;Bp<1CK=`ImUS
zG`5c_EL$?-WW|?oQa#~e)|H?n0J)=;h}&>Fz)sHIOubr$9|=~7DIBe#ov$!%Utd>Z
zMi$>kEyN)wTR}x_dR(`kW-?*j0SS`($*%1kz=ea;pYfj3hd3upPN~fRc`i0%j933E
zLG$c61sj6wNYos&!x3O=i%Y8XEPKmcXTukLkqw@(i77L(!nt*gb2<-(1L5n<?&o_i
zzq>kf$<OA+1F@JJ?Rz0mg=KF}LxH7{Q^5v!-H%<7GK7Ju)Nj|7*sEc<kUo5!#Sze*
zkx%NQZLv!cCz#M<+3fP^Cf`e#xj6~%{B{d)b%ML}!1escL^`s&o&yFm_a5uBLJ!(f
zf~4#<`+2{*4vD;uR`0bOqq`Uk(^*ks#BG(K`oHsij6P%T-j+sTNA5&F^s!*4Za}#5
z0>3~4kum`(9~bFtKA?xfxElJz^6mTfO5=(rKGxseWRl;QJoA}A+MGA3zgNm&+xvWF
zG=I$f;PZNxj%w+;7df_x{m*6X+m1MNG_`IZ-$>fBnG3HAe}(c<X;PIg%4=k|li6Bq
z@Ds)7&+#HGt#pa0sLy#aK$23V%acYXY=qdfOT})&Y(ol3;!w~6<qlW(P4)<xd5a-q
zX@@a(Sm+d<0rHgVU6R$~Eb5Za^0A|72D2knObX2#N>flhlF<8><(onVV;zMu6_GI#
z9B%D^xq`-a+6fdo!C=quez2^6{gbqV?9Y%uCO!_viH}vR2Mt^EFK4}n@5`U^7OEji
zUQ-Wag+u@{Y2?gu%S#(g4)Wrk`4ic-1<fwC)s+)ml>JsG5uh`Pt<WqEJgaM2xyeWC
zAItOj)b09vbC3_xg!BG%gUz`f@1&mTIXrq0b+R%d@TN3*4WG-%JG&&`F+cepHx}cL
z<QzU}vYKyH?^-2d(w$oX8_k&u22!j-mf<(UW#tpAWQO=QnqPV?e1F<0mU2!*{-Q<k
z2%oCv<LZ~~w|R^|<_CH@zIqE8@q53!yXzih=Il5y>QNgRnVF)*Zm<~%OJ@`~0zSIK
z_IKirfJ;|^EQgs0)+_<>x;zUjS*vqQ7c1YZQgZ|iGK{V>4&DCvF_-V)erBBz%M=gb
z9EZEiMY0eX>2J_e_OUEZE{~eNvEqSnbhf*5u&$cpD|<D~BOvBb?QjbuCJ~^6NyG)N
zp;p%5ar%^sk;M}VAI9XpQ#(cgnK(10N&mIwGXzOky#X2Xz_XbKw6_^aDIfFEK@_M|
zUDoyUT%Q#se=M%?&*Ig?6Y-_C#VN6m{2lFcB@mZJrcVdkzet);TIK;L->uiR`gYBo
zfnK3JbuzY9S)k^2Gv=hs-1!NV*6ZxP)wpX#;l1Ytp9Vq{yypGS?UA`5b4J@=#Wt~>
zS0FXJ-HT8po7GWR{l3KWDH>omW)QT-6Oha%0etA}S36x7uE*+C#ajwu&T{7l>);n#
zfu+H!(`AxT=M+v)81yRhYMzWAwb6--_J2Ua0J$xwuhZw~FgzkIVtCnY`F-s78+&ix
ztxR8Vb|LDiNglgoVd)(3CDKqi#%Fd2qC|^&q^HsQZT-df>qkHy;$|Sh8r139s}(=e
zB=XB)$+9Y~Q|T^Ps%MZGu4ht0<X3n+g4Oc}?cb6F$Jny$(`!Y1C>*_`>FnSsf1{nP
zxl{CEZd3->5d2OO@Cd=Hg?M9>5Nj2CUTJvrTY|g$Ig+a-$l2T_ZA@0=pdV#TAZ%jn
zb%t6j@va_AR~BGc-QAiYC{6zbBO5K1BGpJ;R1uIeDLsF#mW#>g@^eGeE_Q9P6x~|}
z@{y$OZTACZI7Jl}AtN4-XM4Y2PN<NHWW2z2%4+)3;rBY=-N^gw2NQBVl9f93oe_My
z)D@A>?gqhwQa|_$k6|BPh*)}0)L)cW%vuhJ@`;7+S|4`HY|m$wqPNT9AHRAguCe1S
z+uWVA;3%=NP&wC4G%CQds&_ZiphM!zm+^;fGW%4ikJb2bA!bKFfh~NCFOA}b52vj&
zVfp6Rmz~af)5L}kjzSgib*}n;?WsD&snQsswv?TXM!rD3roB*_qC0Dy4DR?#{vu0q
zd|o#P>GxVStZ(U^WMm-fl;PZyABavo0$}?w2TMml+6f?iR-9R*!ixJ$^(Bo(8L3ac
z??>@|JV!u%3jnh(KLS!iQR_u&BWtLI8Ign4>qH%FJG~0d#^23u9!Lhcr~{pg*h5O!
zBVeT30|$PB&HlF^EhoIpb$#y$C<9yP&v;3s-O?oJ8rXxad?L<<;2B%I|5qoLiBTep
z7?O@Vu)1}9<R9dLKVE?U+J{=e58{si5o|Y+khE^0dN!r^{i@1mk)JZSs~T7S<>j_i
zx!DfzrJXwX0_bW)0XKWyE)e$Q>8$6+CrpUm{FD~-qg7uszJN!rA=fv4dN6;~CM(xY
zkIpvkoXMyJ$6d0UDH#*~5-MN6CWc)_+YNXa-WvW=r1>Cy?KxG>a&~Y2ICiv2Pdduh
zEzd|rw2k5#mDk-_60{_Qs&2-;%pia3QAJI4>PZXlM+UEt0NYL%?1>cPOQZNxR^A7)
z!c6zSU_MrMCE&KFEbyg}?8Dq_WY=NAA>I^|aiD(<td9fJ0~~5riDc*ft0MrugWBCg
zw&o~inH`pqAC`~WR6H>1xMTg$dB9(&yx<K_f`e+$O}Eo-4TrXcE7dB^eim%`kvYkZ
znvcx(f|Tg~fU}(zm%%~}D&Z`X`fwjr2sbXH4zQ;oZDh^d=sSqmyAJu5z016j@mc%3
z$TuaDgw!$9PccI9N>n_+Wr%b}o|7-d&){zYp0<ZAxVQh4mpo~rU15V^Nz;ga=Bo4a
zKGL8+Z$$0qefU%Vyb<eVuku%!s&AG;UYu-x(E4)j$^N-$jWjCF<`l{rY}nmGyV--&
z?HuxAm!V_y%WJzgX>q)8ldpqIY(l{ySw>6cOzn(i)%R7P+lran9e~Nt;&dNF)1XUA
z<SGEeb;y7CaeZvuXxON9SAtLM%;yWq5G5A18(*cm;?yEm6GDP~A0|IN0<QL<`loIk
zN?a`I<FoCT-;IWQy2+07`YD7IHl$Hr`O{v`AG$xH!aiyXW9^pdqhRO2_~a;GKoKE-
z&ty99L2leixREK<UIn>l{vGy%EZJMUG%fgL<PETaX#hn3RbS+{PRf`5?G**F1SS!M
zvDD?YyT%*v=^^hF0huMkH+7r#x-T{9`>vHe37o|zeUycNe@~KQ2TYci2X;<e@HXHL
zByH`FK6T=q0>F9B@L%?Pew21%aOClum-ih-_|@;hn%i==pjA7J5j*+jCNu4@tAz@V
zr%fl-;lrlW`>CJ&CAT`5K5OV5$Xj&q@3+rg$b51Hl(wZV88QGmN<{zigH#>LH+5oR
z4nr~WEZ<>zh-o$F+-@<QP=*2;`7NC*qAiC3A{7&IFMQ6_PC_vqIbs{~Ddl4wgWmYR
ztm>Z}xXl(5>X4Uxz(;m!?Z9``-Z^2vXfb1~n@B8uV&$^N%-R0YZ8nD*TI$16$6{X@
zQ}cmp(pmL;1~V$^-DbB`0WSaR(8YhzLktI0@4mYodV*MkKkME~^|$#Xvl#83h5oEW
z`0}_8J#2!1MTtzfAV)bpYq<An#SqaN&vWN4D|xoyzSi?}OI>c>l%1(O8sFHxpJR9x
zB_2U9?C+!z9|6rPrYk6ozOKGCI_kNgO{t=6TRp>@T(^?RS>#LHtQGUL`MQZd{U=m}
zzQkOMaV|JaXr5qkt!pVTi#fRf{r+aU$7OJ!^eNMs0m<uc)6NBSLzJlA$-Pgieid05
zW-{R(l_2munlChE*&FP|!h_#@ebYV7HYUK8&T5ls!JSugjWCE4O7+0Ys%-HOwEwgI
zzpM!Pu<=M!j;0r~SJ-L(dDKB+R$<1dzf4}qOik|jgyluPfl)`kjf^|;m(D&ytZdRz
zIj<<unj$iE9%;G0dpaUjzX+UI_##%~KY3nF`^qT#+t;=6ns!fts&iS&m+4tKW<l4M
zucTiDHLdXSnw7cPVe)C6iU9r>L0y#!3c1<;vhIF9Z{JFdPw%StyFGiJdwa}8swLO4
z9nv6}>E2{jfV3P6vq)Th>h!ciY_xwt7Pt4KZp@cPcMrokN3Zx!v{kHVm-Vf#O0(P0
z2{ky5t|`$XObzi9;X9yvIo$TJd$cQcdm3TIHCjy4Tk)U6C62q8saTd(mM6W-#C&2#
z-SMt`M#0tS$oh8(snng%b<9n<-ZQF?w3t6j46AOOTz~}5KKR(ko9xw1u*7w;Pin<;
zq4&FD{}X5K%XcrfItM~J)Y2!Ht6R1ToYULQGDAPA*7|4*EZ{vO*X~&7w9keEi~0F!
z#pTf>0}R2WgLZ0ck8g4LpGf!*Vmbe5b)Pr`JsJjQhKViOxVk=&6A^KA5jL}Mx@ReD
z?&KhXFnb^(CM+rf$iop2%*^dAUAgaBTH87*@UPdP__=K@6!`U}v_!QYC|lats(U@O
zyy122rn#5BxvT|0T#@pGJVFlP@W8>+)r=eAaNp5I4xzyFGj};K{o}L<kD~lT3oAL@
z%c{RgfJX{Ezuk+6hlj9-gs{^?YY{P7Sy>TLaS?HGAy7id#naK%3?by`!uwkWmn~h)
zAKE@}wRLjj{*j^CJtsF;1s*pyTMIcWGb^ck78X)M_sk?Lgv7)w%!JG=B*cU)BqS`P
zrNm^!t;Fu}{C<ND5B^&GpF#na2dX5e{Ls=29MZn&<aA&0zch+1FY=F<{VgBM%Vw^Y
zia&zPLZXsF;^%LIQRkn5XJIi}(Z9R!FLLrCKNS2sg%$sD2E-6d{S|Q#e(rz6c3j9W
zfqV?tF<ifdz%L0Ov+EeHUqaxQgpb*E4A(Cq@JqtS>^g?)mk{_R;bV3k!}UuD{F3l7
zyN==dB?Nv+_?TVCaQzYjza)Iju4A}<34vb{K4#Z3T)%|CF9{#B>lm(ILg1H#kJ)t$
z*DoRPOTx$OI)>|)5cnnGV|E?G^-Bo+lJGIRj^X+x1b#{Qm|e$k{SpGdBz(-SW4L|^
zfnO3nX4f%Xzl6Xq2_LiT7_MJJ;FpAt*>w!pFCp+t!pH3Tx5M?fw>B&t!B;grz;`l!
zt|<d0Ao%eMZiL`35$TWBS%^-MkPwqnoS>i}KS54TNkvCPNkvOVPENx_Lrc%V$jC@R
z&CJ5Yz(U8s$nbMb87O!SArTo75g7v|IVHm{Z$H<Rft)8`hC+Cb*OURu{-0k{=J3Yr
zaPa-M|CQZdOphzI)1Oo8o=dCP{(w`cyv7@>zDn(YRK$U?gl2K%XBSUT@fM16awW1;
zkabhxt^-@d%bD%|a#+c3lfXJ`B-rL<E8EH|kxP2I7sYB~#1{x3hBCz5P=eiF-zM}n
zhkts~9^A<7<uXvO6En5Dc5Bv0dqyO>o)ZmwE<WNBBrlw36I4iVOv{a{xfykE8fW6~
zxieV?kBnVC0ysu)W%gK)@Q7ip{1tP@e8u*h_j_~X7hsQlk|x*>-}^Bi0htB}RHG?d
z=_1}%&@ErARO@1!W!Ur=qq@m~YIulg`DBPxDw7d{e}?PvJ0EV$VdE%O%*>NqCodd@
z;e^QdH-t~pzG*lDr~MV5zRtK!ex`wSKNX!d;o<Eon?}-Skc^4+D|<OZO=nxr*yAJ>
zbq_yywRbf`d~CwxYLW7Z=)6t@;63=c1V3VO8e8Wk6ta4V{&di`lb&=M6I{KZ*&YAv
z$yLYMFI9~kuZej3yGaJT4=-{Jb!}vdn-!{aC2tozGsh|%+(Jy!V<Tn@LVQT(Dav0>
zb&+R{aIX>$URH+-f93USM=_^y`BtoH&+^s4+s?<^O5WZHjr3zpqkgvwyXtqxuk@WZ
zxNMH;>55?Rt@YH$N5B#jmk3(FHD3;%q{(T2)q(!H^YA@{3)8yC_WPLW!L^F%4(pbL
z2{LR*x?L)%Jo15iQwBEk+~@=f%2W|?kQ2anMpoCzpDc|!)8_5KrB9;7e%ZLQ%Py(t
zG@}0<D*ln;1!RZm@cS{rlO=jJmKoT2{QigC>wQ+Jk^_a8UrHYFu>^^XKU5`N%k2^r
z7-uctY*pwK*zmxUQ%-Xl&5*PeJW}N97CiM;D(c3ygx=l(NJ6Yy<q2SmW5e5=p8>L-
zN{WX~_)WBao-u9lefVj&AxCa}5lP{ypkiH8&U*T+6u&C*?N3vC7H3Q@LS51hT(0%5
zZjG`CuCAFCmsVV4&>L9fjg|y}bmngW1waC2oPifrtQKV8Kc$l1_fa5;_ngtpuP~f=
z_u};YTtNIKB_6(mccL=vp<^8;8@Z+T{An}vSj?WTtMdYPE!<~PRRD=kcY?-TyQJE1
zH?hi7;7k3AwAJXrC|7cA8k^1c@-LrnV9t|n@;Yy-PSFQnBJDC<T{;omY6G3=pl+EE
zK753`ory4NtNywkQBSnommeK9b9hNxe7&hpn$~avj=ALgI1V`NysHEqz{CB9Qz?;F
zhjwjOFs&KhmL55_C8opdM$#{Y3FzbtKkpicvW%B`etYbd+N=$+LDJ*cbFuo(31{Cf
zCY6<p7#md{EH)M4nFxvXmPrX8LST<4%b4P3V;FH4(b^3MbCnoaR4ucc%k%}Z@>J>T
zWggL*WGt}itKvipuUj)y-uT0n@w7O2?O|P1Vlygy|6BZefUa9jH=bqDE4<74zW7yL
zyK9jYCGm|LAFtO25-(ibnSYfvyi_iFdw8VD($+EXOn!d18ix%4xg_!lU;(Iz1)AaJ
z+QZhWhw*soA*|oz=Q<4Igz$F$Emf_KM(DZB&2RiCG9ssnSj|ng{_n!S#$rLI%0bgu
zMIpzwpvZ$DSH`AJjhlX`=PS}uyZkjjrFVI}ZBmsp23^Pvt~sT|`-Bv&J!IOlKao1!
z#e(C;+*+{%O^xY0O4wSl^QrllJe~>kg$iV$jBT%lJ=3V)+GHAnZ?wM#-&+TFfd%Tw
zN!EHE8#wE_JnuC7B>7UcLt@C*p`M0R9oszK)2gbfF2wHn`>7M*LJS+!6Y@JSwZV2%
z7Wmv!t;Qh!at8@7pM`JDZ5hR}>yRUW+<P|UkRp}(2%sjJr14xi0!|vjr=5J9j)3&I
z1{hXt61g#uGb{gutd*>RwV}<)Otohbeh#Smc`5RcA_X`CJ}4d>0STqZ4-?qQBS4wo
zAC8WxKLWnx-xyIK`Sw3|L7902oY7AjyU=FBgYL#iHyxbrl^lm(KhQuw-rPgkC2WYk
zM3$*`qs;l;ulY2HiglX&xY;^130||J*|~EPt6B@+m|K&r9*}t2y~XK!2HI;V5|tG_
z$%P;kxjR8z-!*##&_|c1Xt%EK!~9@Jz%-@r5%4)_iwapg{QaS4M(T~dZj#hJI6G?5
z7mGXs9!c#U0m3#Le$vS91N0FH2C4lz{?1^U%Q8|<b{qZ*@H_(IFg5s)$&e!et89wg
zn2+I~^*E$PftaI%f56n?MWUF!`5$C$|GW=phN-!ZeOZ7|LZGJi%M&wgM+!!%gG7+t
zJ5>d*X;<-sdzXA4y&A3B1R7ekOE$rh!9_%<=kR?diiAZlbO_eto8SW%@BBI1zbLKp
z8araJhFS2xv_m;%zlw)rWL7lhaE@S|?@k{9c$&dUTvdV6!a+yM0)Nu~gciRBP*uKk
z%OralbyyMzFi8Xs$j@5bcYh~B0_yDL_@*vYt#J7WIAdDGG+y%x3+i0_yUs^IdnW>1
zl|&k+gXf&Cf?*EwcNURsKo3|!Zd~$cQj$081e4Gh=fjdeQ@9_GTv)>Dbmia{p<TGt
zsL3rw9312~8_4fONzZ?%b^Gog&~WU7{+2vgFm@c|{(939fIC@$+H+C+hgz?Lg52r(
z^Q@&hZ0m)w<8OVg4&u1wO{a()MAZ7qbP)2a-UoedQmdB~LiG189fp>#jjgv5i~A;m
zSq&y)k@#yaZF_sDnHcms<wtOhsBxzXWo$Qb3|RDAuzHmgq!CP54)2(?B<(-t9Eg<m
zhiG5z1tl)zKh}T7KPqGr@Z;te`th!i1^hogAoBkJmek@8T>gWs-<xY=LR#09XB_LM
zAu~rn$nuEE(`-A`eMO48!}XE*`cSo!1JiG0J9Hk73tym6R>dx`=#|e~&?A6VVis*s
zE)SdTdQ#K`1vzb=zBbtFFL@QoemlC03juXHFd;^bN%Zj^n8q9d`uWyNatkdnH2K55
z_53)Ae!sSd10HZIwC~t!)_#H>#c_-H2doeCAgml7aB;uhNe(|AoG-S;^FRT|Bw;IL
zlZhy3>0J*sY;Ky5Vh3Kw^TuCDb^m%!ntsb^wd+6}VY*|4=0&(P#ZH)Nx~?duzcrPe
z^boy8R9m|i<#(OnEz1d6IKtm^x3%NO&W9kq!ahIPB!WbzFbR69v$k5<Hs5YKbg1W!
z$yfp>)ur8?f>XUH>#k9O>)4Lzm{7ly*vC`kb*7rytJ{?h^4A8+V?PC3s<$NHO@JhR
zdwM6wlB()-J$8XFR-;pcUv@##gD-MHai|@v*DJuLcQCdmdw9%PV|Ee{6jSX}|2%bJ
zrvaKmT$BPRlx!U62jjco<teBGjZY)U5Ji40^)<{nN0I46(}u)C<9O4VNaX$aXLsq`
z>2A6a)K@#DuKP3*-i?e_rRF4I++<$UI3UAn&W^cYV=f9z#e|*Da?Hc2WS@=ccp<A@
zO4xY+E`)n;P@sK;!QX2!X4d`;ZySpG<2Dy@J@+Ae#`(^`%bE44k&xyN<OgUA9?<LE
z^1X12IjAuaRVRJJq?vcf4f#xDD8g~P6L;5~KkYEp6%}eI{XvVMYbH9!Iv3G<X4pyi
zzJ?{Q^AP7|7|c;x&Yx`6D=7uE+HuJkyq^;`19ze6A$)e%XssJu{V@Y%__Dm`6b-f3
z1W`Q+*X)5++@Es%2<YlV|7MP*P_6S_IQ@EHAt(m=!+6&wY8@u{xV*vl7*@1qVcG(-
zF)8WUZP|VYjm%n3j^Xcu)e{eHiavcxz$ECWtJHA1{^gHH=0|e?Y<{c2j$}YV7%)e`
zy`L?It<S&X!NI}f4zv(hx@P1D*!1MHqQ;Kz6;ITZyWO_ZD48N=ZOtp%QXu3*(g6h}
zSa5}w*?j(gHeT!Jy^~XH{_^;DIAN+vjsV7Wf7z+ohM%nk96i~93Rh&pw#-t8F`3fh
zxTFdg4GnKEGZjp|EGT|@Kd-3z?N`t~(rCD}DG4lcfmTNiZ1&Qq;h=4%-b?+v*7lc-
zN$%tcdD~h0uCNplbV#I|2=xkO))JduzP)Gq;^xvMW?M0xzKuuI!@0(3=vgoY$|q2U
zxJDQ({SM6k2$);&<3SFkP8S>s4W5AxKV}eNxK8l9zXUG3ch3s!du3IB^|d!W<#QKg
z)*Fn-VOSC~K4Il1<XS=%GIWw=Yckk`4L{g#URf=EN5$IS-0J0b5#dvJi<($u*q^5?
zq$$S7Ole0@&cx7}BJ7RO^Xb#nbcRN!c6a>JFjs_yG#$iS`0n4FE<6!?;yY{C18Sw$
zf#>I>DIhX!{DcolbmcZ16kGyKq-Ur{u1sF3#|Z9l(F{s;QDCjUtx4s(e`)$Q(#}GB
zx2PnH&!B>GHMf<Tr7?}#;&ttA%Op$wmam#}ue`$RNyKN<fi*UKmv*1YnVpgMV`G|p
zd2d5C-xcr`nJpO}0hD-N10T)q_adunj-@L#7jh&I4#oE_jomgr$^QBYf$D>cgmk2L
zFV6eRRoDE*ql!6Tss)?JEW;L^j|x<FyA7~C;Ym99sv+nI;p6QYAX_SoEx0?_{7f7t
z+H=rNT|{KU==uHorQ>o^vGe=OR%8^kXYZ2NaLoZw4vYbe94`xx!kNFtN;h-SeU+!2
z6}Z5%t<qa2YxYVwZ@hf3y9-(^s>goYC7zEu$zS1GI1A_F5YA#}f0t<OH%ORy_y#Nt
zUb-9P$Bt7&BmJ|hjkR0q`TD2(`m{=)Fcsf+DYxyqpdSe+S@UxPU{BcOb!{T(@U2Nq
z9Rb4&nq|IT3u|)YICP_Vdc*9R?p`$g7s~|;`*vFv2O{^tFDhQ$C!f(KyY!o<^JY)u
zy}BVLrl7mDCyN424#=UdL!p6{eu2+FTn2t=FyjMyoa_(wj00Ye|962PrwU4tclh{`
zSD~a!c`4GxRB;7W_F=dpTuO^%dN54>_EZ76@txZ%il_U&m8!q?Mq;s|%6z#(AJjI9
z-IT<$ZR~*k6X9r6c6@(V7PjJp2@I_k=tdp7YjV1qBuvnv&xyNKuUIeuTpCr~f`sQX
zx3&C%z6v^V$3J97I-<;p#9onMpq6}A;e4-xN~y2ur?@qy0{#U4K!X>tDikM@xkweC
zK~@;Q;eQzCm!S1K48wCA0lnjnbxIG@_V+o$e|W5*6V(CQAAIu>K!sd#tWe@gS8+x%
zD+<`cNqo56#&yX*?<UN%vkOUIi?XPyuYM7zOnAkB;CovY0JO>Lk$gA;qP1s{EX#_N
z7?MY2+EB!&&Q=8-*ZgY+)|$D}4)v}Q4c{Xfg9q+>(}2#WQQvqS6y3%dI+eV@%CY%T
z-g&TqaoVtAP2sI{w2%JuB)N|OMnCEvdVH#H-A(J9Jzx33#X#A}yB+cl@pNzH-8v<d
zxK>mU*3AVYng(e7$l9~h3iQ({B{S=E_9GWoc(0Y#l?SYuYXy8AQ6-PJW#C|&#M)wR
zv?+>X_>w$A>uHzq#Dz2(=j;+k>Y~ORc!~p<K3ZChDO)Qj-caJo`7?6C6G%2#5B<vi
z1W12{OW`wkVIRF-KPs&JN_8f4;=0h6^XCm6U$M2j__|3D29rK3BpQ!-2tc9{dDNg?
z<4A4Ai}m}k&rp-ur4VD{UqyPoUNTHB>`c^i9Q9eW4j5+<pv<fK6id>f#Fec=xsmP{
zE>W2huIiQ26x3xR7U-OsDzA2M_x9p0*V+C&ACehkrv^X9x`ZYpj9cPxZG<h)-IC{1
zwwL?(EO6g*0?PK++JSwl@EOn(Gw8vy_?-P(>-~`Zdhcpp%b3_mrHH#J0qN{YLi`Ix
zUk_^rA9ieZHqHv`mW3_RE=y{0d=#db>e`@fD|xNwGdX5A{%f$pTWDFqnI1O*P1BK=
ztpl;+ZJgiQf@&4JjabF_-c=;^xT|m!-g*Ckj6EdZtH!5pvyFs<O9uYR&lh(Ky_Vfl
zfEADq5Ihy*%0-2qldi~sZ}c)%R))qniO)czXM>brenv|><rRbaPYp-GxDun|>1a`|
z>yMsMH4uzGbgs0pm}Sj{Eo3>TGW!YGniAu9M&@&!JpDHAw3u1GS?b9hn;8Ez%@cS^
z5aQv2y9`>nDd#Sn2zCxHjfu{fnxsK!-b71B=B=84Wxmq?E!efX#C>!@S)OXK(ZIp7
z^X3ypJ78PxjLfR@P&8Sm^V2T*pf*k~j0t0|U*AX&Vxh&46J7BE#fTUXp`4&p+`ZZR
zrdZRdl!f<sD@9-Hu+F)lwmY+ukng2wPd=$pWS@H=F4NdHW&Z(x%CFb3EwI*dTAOS!
zm0~-U84+jFGkKfMEo-fm+Mew`)%#x2JGts<Qv*jplXiHDEG99Gudh}G9ixwy@(<Lw
zakKvGs^hFJ<1^B7Wr6cE?6ON6Nx%s9+w9tnY10pI25g0DH`sgTMfK!yeHW6-77e>4
zhMI5k?LUQyzC<G{TflZ0>nA;g@8J?3z`0nn8EGP?VyLp#UUNivzg;hDP7+ITuB5A4
zus^$Qs?|Z^L0kLqb**p!pZ=V<D0={BTLrwcp8N<1bjL~6VtEfP`t_I)CtgKW6o=1u
z8lU$L4OK1GjOc$9=oL)e$l<rP2we{sh`?51j5M$prisC(NO+qhcAlMFNLowfM^mK#
zTT?{a?+=)#2tcQiZTuKiq#KMF8}z|2JD}Fm+MMfcwO+QMMAp}r<&$!(Qy!`(n(T!i
zW(I6tC&}qih87mimR}R4a&F)1nq74^QN!@=B%__0kf9S)XpUX`x7dp2>SRM7FNfUs
z_Xn=VC%NDR8UPyB2upn+3E2K5(4olZqle)SbQ|@iWS@2lt97s!D!w`r93>Uo!d_Cm
zr(*Sr-JO9;X0P8`w#9UKcedBZueWRkD}Oc*VI>69aS*@pwv;Zj<YaR$$5ZuBjZ_fs
zCsfm;l91&;GZ%I89NX;LZk^6}FTB(|*FV|ie}g$`=%kVenoORp;G1)88)PXEz+w(A
zBRr-U6xm}8!oA1!o7s}(9HL*^$4IWn^Lmad@Wgn1=c7nefg#v{?JEK;^+JB#BZ**-
zJFHsTA9rDjQ?1PlpOw{-E9m8WSXLO#GEXaZ+Czm;f(p`}4|{|vC4|$~8s{P|+(y4C
zn=)lxDu22xl(CGznQBw!DPJT{*Zd{+%-x$)=!;uv)CTXV8;~0=C|dZcHhK{B_u?n0
zZo*8JpZ=NO_!bP!8SV+plHguRhap$5cB8f`3=8t@=L#ZW8?F<lj({SGOF!B>JAe5L
zJK@c8Z67Y=ywGcfu^Hx#hTS5Xdc}Lsxq+&|;vqS;UH+_t;$$<djO3B)LID3Gg!T;w
zEjD<ay<nj#l&Vskq;YDFj?$|uy!LD+Vzy~F>2!@bS60^e`}X`dodb;&)Pyw3z{O?K
zk1Z~K>{x!#w;>z0*W34drFdv1!h`=lvp?gVGfQ4}z4N=piFfLND966P5=keL$c+dC
zYw?x-V2BEJEU2ZN&W)(5Wf#O0tL14>ykt$f=icc+6MuD<JGc=_REQNZ!ahY$&Y;32
z3z+yHNs_PK(1~66kjvI?&wJ@h344xl?Z*+M;9iOmcT;p#!@i^GV=nqQ4fI4(wWO#a
z_MyR>-p4hy+BTTTllr>s+1E8Fakujnym#EN$QcE@-5a&tW17DFFTI_de4UBPMlqWm
z1nRs`qh$p%Q}$*7E;n@IoeB(dn>Mw*z#HK>;v#hKCnIv=>v{~#mum%{ZaPpiUxz;$
zyoDBwC<r%YbIc$0OUCn5l!qmzB?+kOIyKWkx?~wg5rfm(kMZ0_OGS43w&QQa&Py{%
z(~W4`$DjDjQCM7^bDiP+10}8?Ha|+VNd)NCHlb}&V0|-vt{*ilg9Vd|WCg!?S}6?0
zYU^su`=8f|IG1X+55Tibv0$X))aZ=bVT1|zWcB9X%khi%n4-odI)%Y@gb>Ih;7VCI
zkhoKF*cR$XFmdPnxZLE-sgPt32hw#y%;mYA=2)Jfq3Zpwu+V+um<YIo2xM`?sLh)I
z%Wo)GmcdVHFW;@kRw)B9ZrYuT8e`jqb0C<Nu<_Hz%1wU!y;B3Onp<Sf)(@x#8i~}o
z1M{*8#h={tzeuRhJ54>@)}Tnzi&HkwvXxfG(~%>KmJ=%sUwv&^`@-%X8r2{BBAy_Y
zAlF}_p-U37p}#u&HaoH&zUW6dD6j3Ahm%7)jIW}og@w5hgUxhC?cb_AJN<fg_=MOf
zfGOs^z^Mc6HHB$oc4droql2<$@BKBAZKrTUt&YQ%(vy}omL;{f#4Fj8AG2TN=RUg@
zzWuo%z>ly7#P*S^DteXHHia&QJ&I1i)8cWSMVNaAZw9fvTl)o6gskiY#yUV!gi|xU
z{r!+|+WzS*u7d1@r-L7>FS}|G_lhe$tzO6n5d3SS)0|Y)!@DgL+(Bx6eRry=dxT&&
zja=#5309c0%r^<X50M=tQxr;VecayO?YbtnyJTP@k3Js)29%jixG++I%Sj4-Eqvu2
zyl>{CZzf1Q<Blp|lP1BcIHqLIG64EN18+`kN5Bq0*5wG$bgSMz@u%)9hxMBEK@1I!
zsJ7Cf+6~pLNiFkzgm!{#Cairej8BE6$Zh)_oOY`Q#^g6*0_~Zrh~6PWUn_O9SG~qI
z;F%UJ_22<PO~`sXd!^KOKN+-e07l?Lbx1HyJF78oF>#S@rgMrjKk&qpi$u4tky>)+
z0D+|M)jq&VV*Gumv#`N!S9%>oaC}VQx}IM}9Lp}`MvOi`zedZRb4ubI5g(C0U>%TR
z1D#79p1i5e;)Cd^C3=s2cVK{Zm|BV0tLJKWjbSi3nJ}TlDt^_7nfN1J*v53c<_1I1
znkl`Heh$v&tGo)@dRme!`*wyxOa#uH<3vJ|dfcIJS#bf~#K$~MqK}bc?SgGwb?}CD
zsCF7wSp}?QFH+Y@vO5>u(p#JapOi&DiB#3;2}%)X=UXQX8Bdx*P6y-3eP7R*O<$;-
zt9<p|Qgmhinb9|^vCd0(s5C9``~&{-86fl&Zr&mzebdNCiX4by?T}dwtcSC${ac|(
z!-$tQj%F9B6VHw``)9kmv>6Uyjou($PN%NDQjC~2rEkKBG@G18zepKuMw&%`TGyzk
zNTzi-RV!?+ZagQ*2hngYUC8&^jX+t#9#6={C>h%~#TlOu$`+2#*5u0dq<CT&Ny~+u
z*KOjJy{Dm|b>Bz$5R6^;uvDX}(a7Mm-U6q1-FeKLgqfLJBqu3~YZS-8F#(xQDse~}
zwem;V(LQ;RwADZBBzgsVQ8i^RB>g{3CDd@AE|)kKlYt;N!ra;^LeDl1>i6Lk5@uw1
zW17W{G(ck!Ch*|?v*)uid;XIjS>6x%u=REY_z8gBzQ#!opBt90BLhVRdn3jpLrXi-
zJ%v_*-VfnEyhFT533-Rn=(4THZ4HwIV+iC2xs&L%T0^Htj@;GVuLDv}7sfJ<L`LVa
zU>e7+t>T8?+c&2cYkw$UM!eJ+l!8;#R<LC*x#SqC<QO-Zzr9k~G-jCjnpLBrbM8XT
zyuac@wwP$e^FH^n(DzFdyfF>+0t1TdV|Mx;n5C!21zo4Ao?h0aAt*Vo#-DAfor7n^
z^3O0oE)%FpWyXP#X9JBBp+d+)v%W_nPc(zW!W?Br5ObJYpqt&1O64bFm!y|5Q$Wpr
zFfKfve}BHbzN$p*vib#*r#wEjR0{r!=R`tMOZb~oADf)kra_ort}gtVARW<_s9#rQ
zoIg01WNq5U09n2>w@~05X}(b)0vbzP@+27D20fDn&@*vcnsNZB;{VKxlj$1i+T}Wl
zCHH(V>bimqP2h4KFXgSJZ=sJHrBe^5zPGY+(~oz_T~Y9`w3Z~J-HCIReCn0W@<3#=
zRNm%A4!fx-A`j}xSm_7$V>st2W9W?jV>fC_Y^r9^?H-ZtQ!TF=E*KFqatEF$PNw;E
zQ5Ql;dne`*+!_X8VVW4q==jWWYOJ~tzU`gKxSfYSbA_nG2#>Vx6|0*k)K9zzx}fsi
zB<=8$`W)&Ayn;;%A%-N}aXTi2i?RxpI6XJ$T&pp|D)h}@()G(w<pIq}`k3d!m{(<M
zu+qBc*flh)g)K1<&!hGR;jJ2^t7U%8UP{wJPtz%BrgL6b7ooqa&HnrjXI}I7kBqDI
zI4ZQ{2=yaZvuW(zzS>jr(rg^=#qHz<?R4>%q9a*wnZdsk=Ljg)|A1ma#OV%f!zoI}
z?Ben}v+Rj8+=vp=&n1UcmPPAObfjc;FL2EJ%R^^En#ZkY1x-uVgXPao*}5gZyjFq@
z$w%B9*cVWyOg<A~6Rc7A241Sp0&S;pWu47De2%+4BSMVtNu`yio=GLup3+3JTens-
z#Vc$vf8uASzkVy0$?|jMZTAQIB2N%@`}!(ar)d-w_Ol+w<W*Li!wjYF#7OV*<`?&I
z%KG(FjI80@#vQv#?Xt2mnHc6KCrWHViwO%xIQNavK+SZHRpOQUO=G@t+napPN19GP
z11@%H8v;wgtc4^Wn-^?5A0oin6>Ks5_!Ah6>DrwawJ=AaLy0+C|5*&3pK3jNe5q~5
zekvy7iQNP(*yVcXBRAkmNqyt1gBy&>Xmmx$%x;s9{KCX2U-2`NduOB1pQ=0@Ug(C7
z!|hGS5D`;C-@c?`L@KahW(%?o>Begd;nMt(ejArrSB-t`LYjSr+geh~6aBY5g=Xsl
zeCYEz4SV8(%TjE3qwa73@23woRKHVtXGhDErTJVzze%4)(c3<-H>65?dAhWc#NK~c
zkMM5pSyqZnMqciqJ0ZWyJ!Hw7OnFPptSXwRVkURZe7Y^@eL*(XTd4V=8Z6YJJnr+u
zO**});j?0`&YKX_6EWjLtW;Ji0Zt!`Vg=R9VBdUND$6%F-7KvsE9zTy%DCG>xHIR6
zRV!4*TOmd(IPI6nV*=`lDt~%7%G0BW(n-=Wu>Dhk%4?bN36!yN8i>sXRlXYDE4ecm
z2DbpMfH3BK#(O(IR!>JMjot;_(g@v};I`}W&LHzNE{n$ny!z<j(E<3(Nx?Wki#3cU
zG488Xg9Z5L=lMui_P<}B?c=F2P0@7W3C-jtvbrCweXTzA22KL4-7G>5nlwcb>~s8i
z&|cDjQQx~w*`%i2iIntoW6q_R_L*GVmBW{c95|D&lKE;aCS20H)y6f3!yOL3E>`zK
zpSV-=T;4-)ctg5xno`59v_s%0YOkhnqg%t=%yI>tm{WxdydTyW6|_rsqN2}0`|H1J
zQq`IRRLL>dX0tp)LdMh%u3@*B9;gN3WQJrrd%W}AZgeVH;=0Nec)ow1)GDd0s(Nl@
z*u?5NZ|?upUsFus0l@!WU~?BwmNC<UEyC>XL}4G9$1;5t$>%TT{ifq2xceBfsBWO+
zvdoSK_StueO^dNXQ$pjP@As|Yc?>+;+zBo;Ix}3;hHdozg>y0gMb4%E<eWPCefo3(
zy{+DU>tnwF{^Cj!+r}|c(5GgfrvRNUnv0AdvC2EnXzGA16wT0*p;kBO<pfTydS$H*
zWV<<gyAY4R=5^zHW2uzs|89x`EdkbC{DI8!*;$d8eOQ207yX1ioT`@H!&{{3b3`bY
z6oxQi!_(2kCAHKyj9fJckRig~tyJte(8g-3cH^|rrq_E*BQ7YY>0z@n$4re!8gisQ
z3%!~N;`4XB$KasH&uiN%Fz8rMgr!?=!qfJ!Hx;V9L0myNN~-67i)AW`Jq0$Ckt~=&
zzVCR!xdrc*6vFOWH{p=eHFv9n7L%TN)u$M$ner^r_VBT*w-czYq?AJDQDuWMZcfBF
z*2-e^*M<s=TKM{!k4VN<vSttFhoO)BXTMp5UYSk%WSz7H51ypnN`0I{fL?!8s(x9{
zB*G@4keB);;L}npYsFW>H5WJW2IJ&kO3i7!$Sk*$Tpc;W^g5|L#o}qdvTOKqv6%t!
zhZ=yLH`Q=*KUVwUHRPe`V}R=w8o&h;c_P7C6m#=9umg1u83Ny`Fe<qI&+VZrq}6W#
zS&5-;F!~%knr(!;T(}v%md-hqawa)tDE9JoZ)Q`&3=B-O2xGmLHKo9%iCDjDO1Pq?
zS86exZ=FOst4{(5RQlnb)&!X>%*GHb!su!R-Xffu_wTTEhWP~u3x0=;3~}cW(%gDB
z6|8&W2`P8@oX^Pv87xV|zG>)v*Zk9q$PX8tb`*4XR&G>R)>Q>L_Rv<x-VjgIRl4<n
z7{Lm<92$qOwq<HfF_UWO^Qakb(Da}Jzl$tjy%m}T!)DG3Wzx#pzCiU-h8IOE<c6?5
zd{B#-Wbxx(VQUNCPG!M<o<_z<g5%+K^{0C2hD>ZyZY9LOF<REqAc_=s>Uwd~_=`K`
zMj<DomhYxB`a;7X7Yv;cE)OSU^LUgUyr3LCqH)rml}Gn!akZ$`o(7@V-MtG<dJ9oL
z<mo<_d(3;~mG4SXp1)A(6g$}wD_YXTogn>5lrIEQrQp>~^2wAg4|fA2z0-(wfj{z5
zd+Q_d)kz-e>9G^BcAEWVX5@JDRXQW}E4jE&*Qg!G1vIA}U(7fKBg<X)MWTGPshd%>
zh?gI%yiI8jgD(iwot~I@pi-KEu8KP8@uctx(W%zHl(MxWfB|U@3qi4#<qDa%j4&i6
z<XAfH6dE_Z9XI^=R3h>0bIWVu$cr~{|1AeR2o1v}uQzLhV>D7f#%R8IyjZ+Im6~Gz
zPyYbCr5JRS$p2~l@78v{Pdp|Ff9+*6YUi|#>Aq3^bs{;#D}S4;b#DD1M#LDeyvKo)
zu430Py!e&^aJ*wluSf}YSw{u@7Lnif|9bBd931)aeK5c6e>Ph1XT1z?C<@eu==c4<
z`amr+=z!lk^4re%Z66p12b6btWzoox>z2|F2CIj-z32nB+e`%hIsk@&Cd07GwQ}O{
zm0hDWstjdp`m{C!qR9wdA2&k@*Tpb}z8y(PwHNhtsCZD*3J}hwAmlkLtlu613Sy?L
zeqT+CwZrrA+$|W?lveK&j7~^(O<9$N#$IQ3!8kkEorNN%$v>0klf){l7`aB7<JSYD
zUN2LyZ%oP#Dm!FbT}sM&wWY#yaOoTqZDLb2C&F!v>SCuf4YkAT7?W%#erTjO+b!J&
z4j<G+W5cezb3l6<VMcUnYDLx78!bt9jC_guljiWw?_WgI01dt=Kq6LSO1~Axg_=rt
zCw~Xd3aZD_XSZW6%m}AfPq1!PCV8`8QkxtTJ#EPmmHqh+T}SH~=(2VMifV8M)dZ)-
zDr((3xEN$u;w|0px)y8@kJnW?SxkD$;4Ww05cd<cX#5u>u`6o$2$*52nQ}Pw1#N3W
zieA=q?b&m1kubH763p^;eArBpBG%!hzq*@&Q|_*(w{b@?l)V)4ZcQ<g`t~jI$~$HH
zN7i=)^U|h@{j&i`tCs0M4XOp~Ib*Mry#1YTKmPuAIrmSIP=VTi44OWJtoDPf5kg3}
z6bvsk<IiKWTxMaE*glPJwiVTqUjE{*K5*gJ)WZuk@fK>)8<bTC<^-A2V<hr3b@NZ&
zBM)66o6I-~krwLxv-NZ!=se&Q#e{xjleo|IBjEL`X%QTcFlx`i^x7KCG@o593v6b<
z=s-J086WQP2!#%<1}E5+!O&k)8Q_cA2yjH7G@<}+q~BMgmi0d)t@&f{xMC(Tu=aL7
z7;~9%dSqGmHShi^fAnl4v#M0|h~^94bHsV1_Z;UsH3-j5QE(iJm3rxvdR-@pLVh5^
zWZ=X31K{XKbw6eT!q5ReHlSW%Q`)$bBFH@#lh6Nl*}KCy8f=C6Y`%GmGfo42yWdUg
z{AbOi(`c#5*doKvZi4qS4opu*m}pkUUOe#}N|>6q6LaN!%oPArfK{rOMD|=p^W$j}
zQMjrMJdH>cY)eO;40MT3t%DOK?T6j}&9Zsy@IV;Ts*hV9;ypY}kz6i1=gqLK%jF5;
znhiaH+m2aL&ye<WV)pGGbm}K>0C(fIxO$dUdzO^Az-EDzxix10ObQ{$J34TlXQJdk
zRhYN%{~c+mAZxVp%y>+6Sqx(vhQHBT=d0wXkg(0mhC#n8E%rkFMa3~|KEXns!ZRT0
z?o!bC1&t`20e_*bsA&Rh&k>+P-;j#EWO4+66H@=7-U=-L{j!Dk@uWURMVNu2nMyn#
zR&ZJF%}dLy4tfOJdb>fsWp%=Nox_4BEd{5>G`JsMg#>DMd;jGLMu1Z$k!)(bGa><C
z(^^EgTVM`OVosUt4|?LP0ahVND>4yAQ%m%i`>i6hR;KNZThfk0MsHL`ufL)kcKARH
z-{U9ubyW&QiSy~jDfn7L0$kT5E>-odT7IUlSMjdNL&<bb_&c{ar?6w82M>m@5m(}F
zV5g^YJGdt1&+=<XPM=K@Z73Gce|CSlQHlG?qd-qi`*q)prLCE$AldauD}Q;zmFdY=
zfdfy6?~2?A;+ct?8(HDn1SZARw1Ih4McvWk)sgx{Xz!hi7<hxRu#pJVK9#amDmlpc
zK?*^A?EEY%w}!;ZsUZTyohh$}F58Md+Zi_C5EUEvSdq_aBSY&6`KuHO>)Jt%KIz(9
zYdL;|m$b~wiYgSJGeBlj|H-DhVP@N4bZ~1YU<}dLq9~6p9TBR{Vd%=j$<<dPJ|_!^
zvV9z>Y8X>z<+kR7uEANsuQFnvf(4HnVOml4uMLYvohTmFwe&?=o(#L_$^P_1Bt_&C
zS=jj%0Zi`BWbgOVk~f-YwW<0ncI69p2}{zUWuL8B&hgyOe=U9l&=Li3M=h(TQO8NG
zV+GWjHS-Newb6?48Hg0`FkKe2dUSk8a&HMux(?4*M{==y05n#8yT#8ROVWfa<qxfe
zW!ahVVc_7XCxv~YD4FmU!iV3}OK@!S+l%15fFTNN@<?zxORB)(vm;<C$&caiC0^7X
ze?Ap#E*~YgJk)PGiwN?21r_<S!Qo)rD8yn3+;waadVF9ge4RcO%dFD^Al7l*rfl%V
z*9dy>`O%!j-vQ0VKbk_R)tbLOe}9x7NNj=yE1p3Y#K>#Tpct`<S1?rRdA8Ml&vP<t
z9xfF&GG<o9<_Me`Z*;e+b8Z3%3OF^k6*790dIUrjs4KAD5OxWXFMuO<DP>Fd-D4I`
zxgI5|f5$sYznAew@})W~LEG?t-=1PfHu6Tydu_)H`khE7lmjJo6W5u<hE%Gph=K?h
zz4j+pAxL9j^8)+2W0g7$7kli*hLau^fcN*R86kL+oYq8k?&QdlmqVtNX2i;H0*jdr
z?%L|5chPfC_Xj`iXRn#gI*mYetGnUW)KsT4?#6mDBkAN-$Qv_8wn<hOmAtdL(i{jQ
z1F2?gkj3x_MVaM<`qPJ*wO3O3Z7hr;;DF8joqek0vDuEsaTlJsjpnIX@lW>4<1n?p
zEd}Apt9|$LOAi%$2t|iSE*GiLK|Z`Y15p)gU`eAU!ipTQV!`P;e5M?TKHXYuD&}HH
z*GwvnSya{BT4~UCqqnDuDumki{O{{`PZ;3#5BlOSEo^3`5er_aY-3r%Bd<Js-xF^5
z$G5zGNAQ33@AF<pZb;lx3i3n~CL~6ZI%I;YX0TQxcX+W{=pF0n`nnhQ!WfXH*T_~L
zb0j~Y1RUib8$h<Zw!jGn(+AfO&>f%NyHyyg!i?*+CtVMj8vT0S+EKNU7JjHLzeYU!
zn9$!qKXlx$JCy-t7ZK7|54BPI;71|!O$(#ka^8Y32=0BY^ge40#|2y!ALsM-ZR!Bi
zV*VgSisd+Ll65^MvHAZLcV1CVZf_b7#UKcTj?}1tRO!8hVxfoz1cU<!NH0>QN=T3@
z9Hf^cEubJpF)Bom-b0h#BvL{V>4~%m0p>gNpEL8npEb_KUi<R9_||^byWi*e?N{UD
zyf+D*AfjR&Q}X<+kn0)fUW|+1UmAY&4(C8&UZM6&=&gvWKz!EL4na?TAlpMc$djR%
z=jMxQUmIw;#u;zbIy+${$P~dxj9*BJg3*$#Wgwx@j>%l+0;tTgv`Yc$a3yu61a**~
zG40@B>AQ@f;khszOsQzo+baV9y4|g#x|XV1MpZkP{V!_hAA0Eb?61=A6v*YX_qHxZ
zLQQY`UG~jms2g0ukE3;WC~t5u4dkRC9T}q3oK^k}jv@E$#f&uy72yt=?*7!1ue@Ss
zA~-lL;Ggf<7}3l~aXG1K8EuyHOd<9Y2F^f@0O}aGL*LkNw>XHeO8}zx477l#>`<f{
zrQA5FDV}JA2QBzMJr&GYTupY;kaqo$%EWE75#P`_IC%c}S%9DA<w!6l3NS=9=J`Y8
z`ny{IMa@}6C2rxr)C@b`O6i!fIlBHxXZKm}uND3z>Y}O$CgKhO9ec__wjS$fW*y#a
z)P7~1r0?a#a`OlHyViqeG}4{bPQu^oU6RCvgjeSxuY9VG8=X_VHsTPaM9**fZsMi@
zNB|@lo%q7iy>(6JVkt1T@$cb`zjKMd?Uda+a*Y%)t%-HMkgSDLkJ>ejzk|(PUw8Me
za(G^vyXZl!&nqVK*M|iVo$wweThrp$*$c{n-DsgZ9c=!)&Nu^EJ*DK$bK7&(61Oww
z=RD$7m{no6Zm9tm91=lCkVUsHG(Cc!_-*D4l(c`W4e?-g=F7u%fA0={nN;&Ug`i`&
zw=gxZK{8u{Zv`6LGb;9{L<6DYs}fa91;}p#_9oBvm6Z!v|1h{>xv6`P`DPG`5#?B=
za?0k82qDEkKrbG4Ub>F+e{&0*^SZF%5kM19l+y{z(H>G6svWuY3CQa&=?H2QxeqMJ
zC?fTY7aB5cSP_~{gl<7O)_+F4F>7*FbKI?!pkJ?u9TH~$5>A)i6gD*V>xmhlYXXBF
z?o^<zYyZ|z`KPh+f4(c_c{$M@Ias18&gA_CR3wtm+ZV@s@-D#>=iMmJcF28KE;CN#
z+_H~8gRH!~&N3+E<8ADx=cOOUTBi22(=g`%RdWp0CrCGO#{}YScAyA@hLhQ{Xieq9
zps&KtiUacRU(0rW_{Op7A)q<34^M%9qQ{rrL>+5W#Ok1=6Tc!d`hL9H$qdcuv$t|X
zbAs>6ICs#rK<^E7I#kb}i;7k>*z}QR-k7}~Akxx|SZ>@+``oLhO3)Q`w+0cbv=7|L
z8OO#CI^H6OkJi`bxaX?z2w7qkzBH9)`{dKg-8KA4rgu?NbT8*VJca^&84A^@9?{e&
zo%vC(l^Yg3smv-R+u7t-Sp*?17rw@3bK0U<X;W>=%a(YGz&Ku91GF=QKYthgUkK#y
z5wU-S^JOkdba-!P-G9G#3+D-XY*_R3m-GJHt;e0#sJOtqCb(q_c61MKFp$GNRV^jk
z{)${M9cv-bE3}WHCS#A?-H{hbH=1e5AtetGR^(iyS+P4aBbTV^e0WfwP9*VLIy2RU
z2BA#AG;l_7A^az<A?_R-hGuEr9qy^+LV||BZtV%Eea7WQ*MGm+yOPi*?2<W5Rf(K9
zRX!<FlB}XR@FOmID$|Rmg^+|gO&WQ%Feb>s7`l4@b?~r3&!ufGbQ@}Vr~^qux<X#+
z&dpi{!qth4Fcb$ln$dem)Vl;4Fspm*`DY*a0F7hQhW^Oj3FLO|m?B=%TN_@Z%cvZj
zCS`gf(yuZRWSZm$miSq23jkoyuVca6Ay1BW(YtGK3kF$rv-01lX&ytmiEjBSEa!Xp
ze`*1D^GK1uN&jG+rdyMijo;rqyOV)=BpCiWj3P$0hf93}f_*;CieG{Vx<et2S|OOw
z49n_~KpS)UI}zdXU2I~&FzbBwDF!vw1>Rq-8wWAu(qnU5SOJkw55`V>=quryC@5Zi
zS(xW|Cg_#JKqz5;rT5Ziw*g%(!tbO^lj<1eA!|#nwlXcZW2r~}>Coanu0-b)WA)I^
zMwG_CJxWJPvI7r<(}Bdh*P05sa&dP7-o6&6A7AqowOBGvQ2XWMvgZC)y&Jq;*-G>p
zZ$5|YDP$xoRKgcM3Xi+(S+@HlYP*rbnX{g3>B{e)=bad_X0<zPwTeV?Zrd3V<x-pI
zeET9*0ppz(!AcZu9|;X_Jr%4O4*c;8b@{Q>PD+bZ{52t9Z1r+s2Mu)tN<6fOPC%ZH
zW1;xTy<zJAtP_4$9p_Zd97C17q&ZBj8EXT?c!Wu&y@5hWJ=5kUhC^Dg!u+XzzzyCi
z<K8@GE?C&1^H}OaaW(4P(-OU&V66*2nE5#P=WYPbvs=yS2bI!~7Ha$=H$}ppDJ3z4
z%hFCA2s@GFa^Mm|*RoqRWQz~7n9UgeFtmLqPqlm4g~IDEbr+K)JCR`Wb>fvJ6SRI)
zdKX1^QPjCQv$%VY@1-#-Hhw3e1CldUbMj)l#gj+#F{%RL)j<3P<H;~i^A3Ctbt_={
zntkPMaFoHc2z2A3<p_h(a&**_G~D~Ot*2zur{f^)oGAeX064=Tf=Mc!-YNuSwRZa#
znY$B|K*n2rQzogxQkoOHDn5bG_2{KlJDue{OlHkIS=_QyJ9;LoIqz7H0?;p#9LXO9
zP`XUlUhhWACjDWs9c+vnxf9ViFTbURU&~1TYs~_CN4np3Y;|m3QZhmxUUw@w{B~Wo
zmFunb<fQX1LP@MT+z2`n_jU4%g(hfMvp|DNJ-Et8x=MOEfiN$KBw14B6t7FRBCjB4
zOg-mB>xn9D5Qrhk7bjPcqhQqoPACoPn6-0rqmAfPQ<Y3@rSV#_+$7F(6c6v+Mqk{3
z36RZ7r^8g58HnEmb;fBOXyNio9*mAk_~puiF0*r1QTBkj;!QRrskExh$g~$wA)vm5
z<63MTadTLTo8_GmPBW6ts;B&doJchIzDWE4RmMWg^xz>*@bX6zW{ZANgp6Kn2;TKp
zyqNZic#wUXif7SWlt+f?3xUcd?alCiba?pkbNzU)%Nvs{Hs$#;Ig&XNqT8j;FE3aJ
zD?^`R4Zva`iy2w<dn5m2Ui@QX{9o>z`~#c;Kyc}3{JUta-B-+uQzhg?PV>2wie_E}
z_h=wrgy&Kh*T*?eZb*P*xL63Rm%|~B%AwBHthaq3d4UH?2b8F@DufoC3?IUL3cG>f
z3$1=om%4VC8>0ceQ|r>En=G#@Ugxx8QpDK2Qyg!+|Amh_iox5AA|OYGkQAFsq71je
zh7@h-Oi-qYIw~ICdMLP;WR(13Z);Eo_xf%`xC!-$5&lVqbV+InGvwjW6i30JcFaO@
z0K|VZ5@PQkJ><JPkKrWdY#9>Ex}epVt#bs_43q$l4Xd=ZpToTl#dKI3Gp?E%S~1~S
zp!<2j8Ob87s<hckhrVN_d?DULI-z_a`3IMRyVZ|@Vcd#&ZRH-5hqw7t6H>Kv<hRn~
zVH?Q<>a5dI60;B8ky4{UZ~B|mJMMjK7?<L_vz4zRE<D+p4Kk=Sw9WKWI2p4^e4Goi
z7JCWI@BV%3N>*Z8)DuCusWTA9o_fui`OTsXS<?5<eXorR<$FzZ?Wwqj5h4x0n725~
zo$q%;R1}=uUScEmMf)Bn9UYM}a7~CIfZ9vnFg~f?gq@%J!GgE;<P{&^QYvkNxwJ8C
zn)S`db>2(1IvMf=9g0|3nDv#>eBWr@gI+(f`~s66(Z{9Cr2Ij=)qtw#TG*i{2lSEG
z)P0Jzo;8e1FXufP?WGZ_$MIkH<Z5+uincd;XTD5qU0A|DJ%UU&(&6m*NazmxlpK3D
zga|=R)4Rm$h|wB`DYcrz&&=Y7e5F#(_3_LK1xYG&_gS&F@vQ0a>jQnI<e90qmN+!a
zmC3CjI3Hj~KoB0vz;FK&FgkDJzN~-cfq_v*z_8SMRo|yhI&9FK=t&uYB>EI7@7c`!
zKKoFIn4IF5-7H@FdBAe!ucz~y2bEy^ei5on)?MY{lBsJCi5ZK{8N~RmPrS$+w=(j^
zA}ZmX?|#MU7~^oLYx~N5QT|Upb{g*OF*hCT^d85q-X;vS0U>`BHAQ48CC=_#Ln4In
zx_c0WCiGA9be`rme<7~)_7#ZKg_TL2(%=DIksJ`s;2Fs6g(e;0B!p0kNq9xZz9iS)
zt4?@?_2u&9CV5zY`Ps@?!SCy5&K5urey;E|GJu}2_rPo+t^&BB=usR<$_-u^8}htK
zRQH7kW|oP<A1|=pw5WkuQQyjmqN2#z9b!CC94+p59uqzR-gZ120yd_pu*e5xTd&WW
ztMCexes_(^-FtKNs#M$8kJctn%~_-1JoC&C$t^_fZYbx&4`gls#V?YbcCw9AkM{X|
z*{o(<>4johjRWfZ{JD@0>dE6&8!gpHH1PmO_?kn=S~)ob#l71RMh}hqV5EgTqF%z7
z#&|gW{)}A+={CfecQ3T2gU@U0(VwD=-f0uii3qdBrmNckBy>KfH79N0o}?O<FMM7}
zxnftP)hH>w!(&wft|$4StqtUy&!PdunKl%uh=-eRaqad*Gt7)ahY4DG^bBMeZylP+
z$fMwKN%ILGj<T^Q9===X)sBQB`|D+*TRb2a5szvoWwx;xj%jJM|3h`rG)B+MpyE~s
z)X3609A%mkZ2j9$?nP39`f8bvw*Bs8GcyocOA3W{+Od-{MEFulbPZqwvUnEqO>i5J
zE1oEG7l!A#IbG@IG*3N$e8Z*5mGiJvU(xHTyjC@EB1+ikFYBA=rL{halEYm}ipCjb
zr1gl{RMYevp(Iusy+}@NNLun!00ael!Gu4IQ4AMZaJR><=CBQb{qYR+_bsGueZR<~
zX=(ODUyo`NWn!Qm3%Uw>@G5p0q|SVPX`T*a2m9>jSoQht*w)%w3jDOTsNGbD>E{$9
zsP)mRKo|~LmNt(;jJ4>d`>E21Dw^uhA<jSwxl2fS#N9C%6GfV&@;oStxBupc^yRJw
zk8EQ{v9?V+y=Pj}md<UTmYLN+seX60{F5b@WbhCIyt8E{WeI4Ku<E{CR~Ky;JJ`sb
zN-%eoY<-S>dH+ryKbVhoFyOeSom~fNg6yB?_C>-FEX4U_3`KR0JL0}_=%(O;tqW%#
zJ&m`0R$sOqY|X6DYhprg?=7Gd?gub|;Z4N?=}8R%`F2_?_EY@!O<pmJn+G5#VioUy
zzhm>eE^*@G?nMwZP<QWqW$)0&x<z05*y2<g`sR+4^m9zdMGtRig(Vcn&Y~h6POU}*
z(>@kM=#qB`El^v4KI|n(ezD##{urJoBREt0!Z*y9hvBs$pKm@cAX0zl2|c^UUl9$$
zNVj|X0xbm!oBe}4INFHK{iFRK>ce!3<Xdg4Ns;GiPZ8k}j}7LpZXQ74j$unYhwoGl
zvdQ8Ygbz6!=&PTBVku$e2$c!01Id-in!4Fi5gO#yCqteYLt4swyX1<+Ne;?8g+T&O
z_Bx0g4$^S%`B^f#HS{7<E~9xAWj0<IoeMe+PdP}wWtl{iL^Jf-;Co$50-6T_A^P5k
zu<0%o8*9{d1)I#?ZFVkU%u1HM-7}Y%2*(BjPxv%yEHcRucH*lVy~BJf$@O#H7qMP=
zB|PonWy^s;D)y+3l+3_2JFUeXvS=-!!O!M-vGxgtQe#*Mt7p!sf%=1q4JMejw35yZ
zBO3nL-usR*89$S9L91sVmw7xRavaL8sp~};qViInqZ)GnPjww8e!%(rVWQc^*FIaZ
zln-{$z6X)^6T7P=!Tdfr8Ucts23qD>RZK=169==rO4qo!j_mThE+$^kP|X%|G#!=@
z13^md|GsmYP)lstG6em9^lbk1O&VU=&WyHhc>pM7V*8ld;iq*y0H{vRB}vQ86acx3
Qnt#j8{BO!l)Y;en0E*WRl>h($

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_7.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_7.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7552d5af6afc064d6fb9b2c4a332072217b2d2fb
GIT binary patch
literal 43687
zcmeFa2Ut_v)-Jpd0TmJH3Mw{4g@AOW#fGSC1XKt`h$0;!Dj)$u0u~SuQQ5#2#2~##
zN{CVtrK*TXZ$c=7v_L{h0%>=#&))ld-}e09+5XRS&OP^D0xMZb#+qx5caAydnD3Zt
zgxAd*1U8>Nb><Yn$Hxa;2LAxwkm6b+q`M0Mn3@6y0RUJB2m->u8gOqP_{Nb>9N-7{
z`M^JbPwkJ-@AIkux-SXJ6o$J4Yk!pi)>Th#1BAfOHiGZ106^^2wn=br9k{JNRuNc5
zU=@K?1XdAPMd1HQ1YqvoP6&UWo#t)`U&Kx$CwE`O&7Eq8R1T@A9Xh0@rlO{;aaa@h
zYf|cm)YWtkY3izL?^M^&RXe1srUn3lkNAK0O2oE3TKmWTp(l@j-RD1a1OT2q`TKqf
z-`~BPzk4q0H?LIBs^{|mq>-*Fy^6pp0;>qDBCv|U9|-(cZ$n)dbT|%a0{`eU@Bl$R
z{@;8GL69+#1q8s)K<-5BXFj<<_Mh?X|4jzuS=R9V&Yp;+uUYf^z7*`YlVASZ@he;n
zz{l(6^#Lb<b%KIIf&%M=goM_wU$<eSq{v2L;f-6xB}65+%gXQAF1u~p&b=y%J9jJZ
z*|u%J&H?2^YMPpw@`^_c4y)^{XlSah6vDTD{rZi<8@GsvY*F8}ZI}8FKX`8dv2{Qa
z=t1%A0@jG}@r&{CY5{rhDg{7xX@y|=?ZLN(f31L^(7N>-gh2uPW>8~({xxg)1q9ZD
zv=Lu0_&u;zOh9~>+A%>1iz`CAZ%C?#K7O%o&+($SQs-N!do>(y-d?{!T4u{uS%rOy
z`wtw{)Y8^Dd_?!eNuyK7r_Y?daM98VW^H48)ydh#)y>_*$Jg(cKQbWj&fR-q;rAmV
zpTzzg7oYIU)5O%Y^o-1`?3~=<l2Uvb;nnN%>YCcR`i6J!KeV>BcXW1j_w)`8k9-{+
z`!+r?Nt>HrSfn$UOUo;It?2pt<8O-nNiQ)_uQhAe@~;(I(Ti`5Ke+LWtrgg%CMbT)
zLg>m3iQVd<>m-jqeo^#x{T_|;R4K=sEgPiwY7QyTR#f{{v%jX;?f+kz{h`?3_38yS
z^7Da}$1esz0mkxvAT?yeUwX=qHikd!oSN3YH3#=t>-sKSe&^yX{r$_I1%jU+eCDk_
z5&v{N)WOj(Ycw;7ym_J~c=Uk`RkV`+)5(y)X>U@fCSO6;q*Y=@N_ojIXSN<Th<w~G
z)*W-U30o_E=~7i@lnWyB!SETA=`f9PyVvxsDS}Ie2Qj}4xZjXp8r$VB&*(Is%Phm^
zx?h^I4U|JOtghL^`hO0oS(M}}Bix#GQ|UxDy|(9i4-pWBo|`=r!*b2&+i$-|JQHrr
zRI4Y?ga{vE)bHmTFpdoWsOnn=o&We|5JR@%{%kzFK0nJO_*V0@=^ihWpS`F@7W&zW
zoWcG4wk&geTY>D@jPpZ<nIM+v1?#D7&*fjpSF&ncz0uQzkNz(g*HuBq!BXLoFP7@+
z6a09<PahF7Q}J9&tuSJVeUvcET^VuzL)`%ZC&sC5T7XT5R+RldO^x_~bUnx0YUO7{
zQzM%tkk1zo4+k2|9&lRpwmPwo4~l*YSu=2;bg9Reb4=)BjIRp%={xqjymAMES5E1p
zdryR4kvjW+s<1bpeF=KFe~p2OL*%DHo4ykY6xPNl<c&uL*3Tt_Jzl}3E{W7FG#(1v
z*Y!(G&XN(ZtAe)UxlX^09sG^qyiJjf5Sy=@D~Q*moNKehvy9S{y7X*L*X;4%s3!}2
zdQIbR^jC<FaYM}PIsCZBD{{|x^VGDuJJBpMeJOe%VLf-adQOxzwP(qk-_E*Y0heUm
z)@pKvQJ8Q2>e%GFqkE`L-gaMV{Jj&T=ki~^Q!fnP841Kk0!oLFQ7887v1xn9190g)
zKo)pUQ+$BZb53;#@5&c<hkO+iczwRE?Ac}h=lF1H+qA=WYWco`VH2h3Q?#bqikZ)G
z&D3v44omX&@PKNgRLo~rGam3}r6de}AeILlGf`!)Goz)59%063+6l(i=q8ujPpT_a
zy$c&MzEO7J&f6_-4Z{<sQ9AqLQka2_rorB=0XrWkeQ3+HuXtX{m+jZTbMvM=$MuKh
zC)PJlo%O5Q`C$9G+B@yRFQD}y`q+Bii{&v9rB!mqokRE*g}B1Py$^?ORrATk79QET
z*`V2D(QPq$$Kei{i^bI;Ld(Z7(SRy_=J7L(NB6g_4ZRX<54~iw6P0C821d+ZKG`bc
znRf23*wSaA1nY?|9jHXyjNf?MiB>Vx?JlK=`6p?y?=L+vdx^8+_O)Wh4><FH(DhtR
z;EsLkrW?84&$h`g+#OlW)vfH6#$E25n~e7u8H==hW!iFh{LQ!QlubfIwJ<34(bNtF
z+9v!O{W*PUI_~-_OOg4*YpLr$8N6P=nSNLB7H2;@ana){5^T?8TJR^VVXVrFED!Cg
zmZi~&f@ivXXdkxiIJS_o5qFPH8+VX%$0y$$us^=Yo{+YS+qW}N#pl^@g-_wchV@=n
zwS4VKcUS5dPN`Dl-j0VeH)BT}f@o?4cZo|(R||p+qiK8olt^`S&y})sq5`M4Li-gB
z_VZ^`v2@pPl-kvd#=_o9O>2=)T#^lM>Bn}KovTRS=xSNC^>3APRG33Z7vllD{#NJt
z!jKcDt@_3aRd^1L?)TN@a-KRR%sgQB&6Q6EZu}S);tUT+$w=U?Rb;kzF5inAk-}FJ
zl7Du(pRw^MTLiGdNqL%{<Mx??Lj_AA=24734LeZurdpL@eAA5|StFrdaw`ABp)C&*
zvvSAFbeqcS0>t)dlphd_tn`}9KOz{i$2iI?vd#2$mHP~KMn47dXpQ<gL5;|Ts<hAO
zi-6aTJ9BAn3-WP*Imv?Fq#DWt<W2jyzr2UhV~#DE^OXjQeu!R?<TG~T4(N+A>Dx)Y
zT0y@aA!nSz&h9l38a*m`O)?mFeh|~}jso^KX?cAs-vBFLooX3}9t6_T_H!HXgTc*b
zt5ymO5dllIG`6weL<GIz0q6Uxve(d81hxM-XZGYleF}8tYW5IVqDN)Qz;7)6e(Xj-
zBv_3H7~*)q3kcwTNw3aJ`SA<?iwYMpV*H^F2hvRgGMTWGcl2hmV#eGf^w6gs#k5KC
z=`ml3<lRp};FZtpI?=ZAsJuhgT=pPMq&@9M*r8o&5uxv{60KrFyQ5#%0q!2lzOc+s
zt{4c#`~1Z#i*Mzl{d&vrH?C$csTs?i5I;UQ^Xc>xQ><!R0RY3+owAoVuWT#$DcW)D
zz}<q<JuyV$*BeWnahWe}Xyb#NmDg{(ez5eKoWk3g;Qeb%cmR{#$^)h*NDDv3#X;|X
zBec7UuF*>O(LW3)#XUHJ_{nG496%Xsb<u<C?%t@vkEBG)89GJUZ5-SbqS_R@kjs^(
zX^j<WmD^X}SAiI|rteYN1PjGhG0Hy+&fF+0TENRMi{)=YU8M(I@Xx8u@$RI4$vY8S
z<fNRU9o%6oKae6gx#J+IB&e9~D0wUjqjqLq&Lj4nK{1kE&?G%GIoFY1UQg-XazAU!
zf+n9s&{&(7Uq#(fisaxugY|Q1xl|_heYuI;c!|G4ja|JXtFZBQ;KG}+kb{?)xicTO
zA6NQ?(mAtd!y7(5>krnVxzXug1Le<-mlhm1fSoNP#@u_#FUD#6f)?5xqTWg$?mE-D
z6K~C)daa+~u7SV)@GEWl7q&+}T8(9N-!4ce_+$^H`)+V+x}Q+nEhE5+2i#4wh@aE5
zZjm;B;Jo96c)r}0H~O$Md0)q2Pp7_WV?c|`pMvQI4@RyVc6zu=v*P`Gf$G7a`NRVM
z;W94?7+6sopG;(PJwEguFV1n<!35C0e8D&b4;WIg+fr9yyX0cCY14wkoxZ)_sDk;E
z(5X@$u)HL{%>P-?vf;k><qnsacgFb0Z_}@Y822hewT-X&#B}C0jzk@iGiB|1Q1JB)
zztTNU&*3jZuZKfKHz5<}ElFX**%snK{!iDPa<KgQvP8}G9=1~Y9qx$<9^mjdJN777
zzW=V~V9nYfSR~WcG|_B*cdy6iJA1c^y~I>Lo4#!PQ%?PUe#=t(BcV;XH`N}$ZP>zo
zx!+#)t*y57h}yF|vG4m|NleZAGzS7Wu#XxjcyRsN-1(U!ieI|*E`H;u)dg<vOmGhH
zk*cK%#p`=rd)~0e@Rj}pM#{R87R_<5vD<nVtuv24**`w$ip%*rZ`C=V@G77Kh<jt@
z{dLpNd?4;zs3vK78NrcI(Do8N_k7GG!YeNQ5voB^=z&DcrH!twA$!a=xm)x%_-!X7
zC}Vun;sDpfZw}W7T}(vZ%ba+k93TI7>xIS(uaff`K4^Ttet5^v01sG-kzb~R_L>c=
zDL*SH@Ax3+@>#;3kT)B5)c9QX-I}r%Au<7}@Jrd1;4bh`m#;y88(%a2Qprog%Tp%T
zhpfNO+$4VrIoS7Bsy(q(A_bZLYQu*>u~FmWNQ}wRUrNuL&saI05jyh8><9=9gbmH;
z{a9k;Ms(b4`AaN;FNEI!*x0?DAY0+(DUj~9z0y(BRmC#UX;G6e+4U+S+s*3nmwlt=
z+fVP6(tmLKh|t=Y`gFq`t!GE3E%-`zzNweoZ*CaER|jZt#i~>f8dWIm-xK)m?Zb2D
z&fN@H%vtmke&MixeCK7b7YrVVbH-@i{PJe|2rFyvjc0?)59TAIHBFD~d==_tZtSx)
z(wVaR**C)~Gfh4(uVIs?DpEkArRA-e<KOtH@7a@OY1|6XufRP92Mhy1J6Ph}>+&aK
zcFyijTdu}7Hxdpn_SFZDMBW_ly}3KJr&h{o{-VH;U3;K8zj7%=@7ybiKD)J#4jAo?
z#h+Ol`aERNcOlOl^DY=V)xiT66jDS2=j?EsJbm2b-8~Y@65?|!PRHC-6TZF0Uy{##
zq_aSC{43U|^404>T|48)Sc~WFPi&{U5>2!+pZd7pV()(yrFZ1(BhgGZKFwKC58dFH
zC!alJF6iGr;}Un%rdT8N^CsO5Glt#d`H>(VfHno>uOXIbRXl*SFQ!a~%^ctX<k+K9
zDx-;ZNirsKO)o#$^MIRy0p6shdv>g=nC9qN&XOh&Ttxa-&)~8X|4T`h^O@WZ9r-Jj
zbL^0(b7uBEdGJ5V4iWvMaN|BXE((E}eV?+SEY?)2ZmKGJOyQX>`s+e>6uZ3x_lMw7
zq2DFbDAC{`qeEhR{VZUw>t0YgzUxUnRaa%xE-lZ$f5k_y=K)wbGHcGGFhjFT*ZHGZ
z_khe3ZA(Fk_DBH#$G2|(K#jZr&9*ac@2>{j&l(`VHqd^_O$`w_VPDpI&6!9Fn-q2n
zb1=R;WMuJK-!&7NxZbsOch&3L`?^~TDg+d}87U%v$L?J5{JxI6V*_q!)?3%dX@8Bs
z&@g&bPSTvu2>73#Y;>{rA-+p>>7EJ$DK!V&y>$!??22&}jv*RfKdb7WkP{VqhjqHa
z19tY^L{(zS`Y&-$^p<Hx?`}BWcBO4$%J+8XuDeE(e1Va`{**QU!_&dc7dbudR%1bZ
zckk(W>lZEX=RnT_4f9$BnENLa4WE(cI@*IXEw~?^@7eK9vD>e}UgLC8>rOSBqkF#i
z>pijgikQ{dVa<v?DwF*`o(H^&q`eW6UV9&AN2-5p05$;M$o3T-&uWI4YacIoPK0ib
zVHm^MhZ%XcnD7FMw*ci!OmH@)xt1Nr1Frj8ETo7;{`2Xl8eUZF7~<x&YnNj!+Cg-7
zd=|UQc-*|suyUE-a)iqHl(qW{GIG@Wy1&AMBhoEc)ch`nw}GU&(9!$Zp%%xp2Z2KZ
zkpHn_R;o=v#LT+5^{Q2Q(k8aCxJdJ^1<P%TjdOm<K}qq!N@mcx4;tLZI~m^h4WDuL
z^She0U0bNQf^hbJ)bQ+4TD)QUqT-j1+<#Ip`%~7<sO-YuJ~N|m`o#n9fW4P<)7oQO
zUqbPV+kHkq56wYD?N5!a+X$UV++oKV37tp;sxsZA6Mds^%ztr!lFOgXJSOd^>9#8P
zr|R%o!_d1$u6NoRy>3^P3G42TRArD;mN#NOfNMLpdA@F)hrg1&SFOz)>?`Z#@6N?Y
zh|X%8Wh$Zn2_Yc%)WSqVz+C-|KA*K|!FJD+iI`3H$6FKB8uVLYcbvHL@bDMik!|Mu
zn)m&ZtQBSBa|yBUKd8SEyE^i6J4QCgHE7sg#pYqI*P@)(@ptqhsia#+g$aiZ0>7H`
zQ&Nbule>rKD3FD*y^w-u6;XqO;g5nIY*tDwEh&3RunRgZz6F-L^G(Y7@qc2L8%>cD
z<%v4sh2Q+*XLgV6@6OY_{9sP+R!#wBsHE)Sz<}1nE$s()KLCWT92IXlx;0TctX!oi
zyUEL^xPIv_>1toK=_kg8=lnMwKTOkV`TS4XwGnW$<d_=~8ZhuRss81LM^Z!PM;iGT
z^Lc={@vYwHS7j8U%Y`+>yz+FO&Pz9J*It~K4bi}<pv;0Y$;Zp)IP;^$VsfP?4)0u!
z4KdE@%xZJ&5Q->hRDN;O_U81LT`~6>`iF{cADKETXG?z?knQ01#1=rm&o<UiFH_s{
z4uKOa|L2Cc(G+&7EY=F&<#6k^$rdNr5k6au_ae`0w&oNRUal&8IM(TuxE^-%jGVJ|
z;GJJFkrwypdam|*-x06UsQsv-Dm8kJosiOG?&0Pf!??D8j>+0JhfaBlno#i%BMhb!
zJxwKbAf)M}`HhHAXH}^)%{y;2oCUHnFEQhTj~>f$JJj1ZI5wtK_b3SUN(h@_=p|!c
z*(Gk{hotys?}eR)T;+W2{wA*B3%`71`#68bU*)a-fgC@t2V4cGHty!@dtFym)!Rqq
ziW9=oS>-CiOBH$Lx~iJWAyq&hioAa1s;9H>PDf`~cW(p5*$R^4PIo5*MVOZ9A=B$8
zoZZ|{2i|nP5P0t5)j-dyN1PO)h9VpEk-A8)>t4>jS9T)3u6g_DA`N!`TDmT{zw)u_
zZbSW>PA<CVPoDZs0(@t%`?qWH_xD%v*HA&+bX8S5a^#5WA$3)CbqFW{@d@zuy@G^z
z`|SCxfRoNXS8uvs_jN~j?_4SHiX+0$*I>7wpSzQ;%M}+bM<*vOh~pIvCy1Jw(-p`S
zCk-`-lZJ+qj+WYCbr&_q-M?R;*Y!V6{|})6&j+fcd*Y_^6)=2%5rMd7_?IH~`l^3E
z?C<3`pS<GhY`8+GLk?*|)U_|F|3<Q_96F-;Uk?16oWAOcg8xNf!@qq3VhHa2fj9{N
z&VR#W^&~$8@+w@baQzShKP0?rU8`{Y5CT6WylP#maQzShKP0?rU8`{Y5CT6WylP#m
zaQzShKP0?rU8`{Y5CT6WylP#maQzShKP0?rU8`{Y5CT6WylP#maQzShKP0?rU8`{Y
z5CT6WylP#maQzShKP0?rU8`{Y5CT6WylP#maQzShKP0?rU8`{Y5CT6WylP#maQzSh
zKP0?rU8`{Y5CT6WylP#maQzShKP0?rU8`{Y5CT6WylP#maQzShKP0?rUH^8t{(jen
zvp0CFhCg^u#;^Hv_}8pi`3E0s!GD2uE175nHV6p`uG_d_<3`~P!onh(#5ap<65Av!
zyjgm)n1rO1l+;F1nJv<iTf`-$B!A7H!w(*_R$#q=z<Nm$VG+q6e*Bt0hws!HS$@9V
ztNC*P{QvR!b2t}LSMFZ?Pup@F;C2&bC6~T5ZLccb-+D&50B;GK)zCs=^U-YsM>sF^
zjs1r(J*1!T<*oS{b6Tnq<<q8|*lRVjkVH&ldm5KH!G;h+3~}vvIfEUa>?)?RuN64j
zw|=>FIy(~D>w??YMY@I!CCbL7wi_xV&+LU)Ny_G5DCeNcJm!Ptx$75)pb`!>d8n&2
zXoR80)c9!QR99=?1P#j3>#6<lSe3Qa8Rtml0n=nEw%Q`y)~rT#efL2cQAE1Oj^>q=
z7;*>&Wx1k+X_|RbF?HDWO9c$85zEN_Bx-_J;0!j~X0r1}f6t|wAkTv{k;4N<z6gem
z=N+2N^fxN5?(r5sOxS=#y9Mdn$Vq|=4<ME~d)6<?cZ=3Mm#9pFF_ng!H?y~N{W9B^
zZeP`zq{jo=f)4aN?&4tMLktU3iAF6L4}>ggJxwb79>$F$-DZrhN}67FtxaC1H=clF
zAMhmwcA7DWW|e45`;Nt*CF=awm~M<xKCd$B=k=lSsPz6^SVHHrSALg{hB3lxgXR+x
zB`-@$*2gBe;*@+T;~~b)Ost;?rz6@R-lcg5P|+sC-W3uUr@lk1Mi=bd;(qX&buq$-
z6w$mz$f;)IlK7eBNjW9!1Yd%T_`%&V^rw06@w0u3gNU#UH@s-n4)HTGvqpk>k~1VY
zB(aig%mZ!*Bh6~NUQUv?u+4_e9xN{(xw*W!P_~Q$Z)rbAzoDLP!mYRQ?5>2ebl8g2
zpWUTnsg2dQHdG?+7?oME?OeuEHp+`~d&q0prU&b;(p;<2p1~cjIvfcuY=u6DimXZL
zK>RcRTd+d|y&o!4UlAVL%W*nw2iZofBWxv_)2Z$7A;R57NyHMf1=2&BEqH26t1_nZ
zQ@`Dv-Pgh0z$~eS3Tln%(I;dOewm-ZJK_gE+;RfbC%KM-N@wF9`@(yZ>alB3MpuU5
zy-=gl77D_JW%!h)czn~c!W0kKV6QrWkLAq9&dSHZ{DPg=((K>Fp4i<e8TH!U?q>04
zfyje#ql?5OPpzhpEq#5*qxU}f&}V6hp4WiAUv8inj9_~uYW>kNoTP<DyMlus;Cs$9
zlL~9eJ=z})=X$c(RcE2I53$vK<NT8x6N{3T>Cp;edDw?eg?>m~ZRGtlPNv74R}lR@
zQ*J277II>^;qDV7R?D&6Iz3!hqYF-_g$E2`9VBR4)4GIus01^1#E|&#60s^6Lc~gd
z23Pg$ryOw}P!w-VR~6+p@qqPg1zDqXwmLQWHcoD^#Vd{^-Saw|U#Nx$l=;6QNt`XR
zX4_D)iDW--E$f0F()wFng%iv7o)@@o?~nVaQ&wC!UI_ULJH-S1;$K0<(4R0H*fwI}
zFs9d#fsq^+t!Z)#!J2Zhszg|DLfA)!^xoq!w~5L$(YMh74>{#Vef{nwB#usT^3&~@
zo+UT_$w{TJ_A>NLTGkt^m^BhiP}Ab28D!T(Y|ccT`Fw{M5eOz<N-$v*IPrj>udIeq
z%YL}cF@4zey`SF|*X^ExS$)Yb<#UY82na@G_BQ$WXx^fKPCK)=xU9nxUiq<O4qe1D
zLw=**rEMqQ;{h%l_0FlY#R>1)iTkFCbm*ofqLpRtJV5n)3~MBdTDA-<lg_taTL?Ph
zJzMyq7$4zpa^uGCRIsJECsrmoaEj4_Ob@Flb7s-d-pA+#bC05PQ;Tywc})u#refD!
zwrS9&0$NNhcQ3OUmzJQ4gPS0-YC*G`b(J`Hk~HlcUlj!+U4p5`tUvYG9lEhn@p4{0
zQ`62FQCH^e0lWn}D@rm|wYD|i3M!3K93%AGMO!J|LR8!1r^?YUT=Q%i{GEM7DEWEi
zN>Kq)Eu?<uLeW}y3AUOfeey~NR}N`r0oDyRTD(7ryWA6plWWIZhHgThoAZJX;-d6V
zP_H^%C3!!-#jw3L%L7cOvs{efbhIt7Tpqb3W5mg+bf+rTTJ4bHq%GK)?&zc=N+z)4
zzB^}A9Z1xSR?d_SbYRJlwq)rK>ohl$`m$bAUAgFVMO+6#2?AKBu8s(+Bf{#4usS0A
zCyWRyiA(;xDMiwB>Jb3*pv#?F!^70EP+iVA-qc~Bzl#dy!(oonPS6)<<a*H^wkT~H
zH(50TCFVMO=5pm{_Mzr0LAX=JKi}?znf4{1n;(n$gIR9y3G|>hfd=t+2AgEe+Df;u
zA+7@H?mMD3oQ!gv;{mxGawGUs0)LL7n(}}iC6i9#PGrK!KsmiUUwlgE{SfkMoQoxg
zl{cV0pyPKD%(<t-5JCR}rgvG%?jmU}s2GLXm4=+JAuJ2s*X#?18xf=4zb-0|@jd@y
z+6eWON-_u2{5X;y*g~sKF&3{nh?|+kE>UlF>*i+@&VYG%QZC{+U>=)zIHQ?S#z}S%
zVR^FEqJ|Tu+eU%_ZI49f*r`DKkFM9s7-hN65nx%NMAIZvIpwH)JXsQoFx$?)OdHLr
zLC8<EG85iPqxZ%OH;LA4q|1~qcpf+narW(h?LWOFCWB$IY%s2sLTNp2h8tUaIeN!p
zsynaSo_aj~GW~QJMs*q2&nBs|w3h04z}gDVlji$c;Hc|{kh7_0*v5Ti?ATuX7}Js9
zKabdo8Te8J=2=W)g^|qNexLO4OVklJw^RlU^>O%XWxa^FRI<_6FWvpd<p<2^yNN$B
z;n^Obn}eu_$Peu!Wq<J?uS3~r7!4*nE-@O@!8Aml55l-3m_}9BH&!3|6HbtW=WeN^
znb6&7RUvX(rt0q(t5n7`%It#rEY)i#!Sj>%HIe&orHD2nBIOBL54nesb8SO}n1u<H
z$zT*z+(ck3d?+kPxIa+q=XB}fa_=t{bkzMQ5{HvdDjDE!nJ$$VbSp<Er!)+C9)|2W
zy<mNQuN^dj1DoN(6F7<NVbFKB=e~EXl?9WKmFh(2j|3H*9Se%t)N`6N*;uhectD!<
zwg$aBQ%(&$TNAJ_l3dyNB8JK9+y?Zi6mICSh&CD4f=He7zUH0QFPR-A2H%HLZP&td
z6Go(CsIl;NZ!N@KL*=Kb)N@5B)@UmNGcKjX18$H;6FBo63dt3A2i?m9HaaphSend%
zHzJ9dPo0(r5`4DyL8>UZPaXX*gNRs{*_jG+p;1DeXkS|cW{ZA+JGHN2*_Wt5HNr+H
z+@3!{tueSaQziT1ys(YEj<-F_@T3u$%k6~s%<_%XZAI^B+C$HMzDmVElYX2YsC+;i
zf^FpHF)*r)+$Qu5%v%*6;8Zd!VvFgOtM9x~)tTXA&=7tt9lq$?&+e;-GYHBU>OSre
zu3Z+`p9-cT0=HTk;;trQ2l5sVI7$;tLgbATS^{*mbu4BgFB;s(#Tv>?%95t-Xi>(@
zGq_0|SwgkE^iM5FMQdX4+=LEkVFQ>H=oNXck-NmDlS}g6lDsfsYN+#^L=@t(VAF-4
zih~g*uOCq22k0F?73q?wI`2#T8TQ%ybA7|%l!+12gB|*%Os^4+E?k_sySets+kh9$
zE6=ML4$;mB1o8`$2r3-x?%kk&h4ECKW`ryRL4@8Ws$jF+)R?xPgip5`NJ~aYH;?iF
zOBN{sd~qQiCJiQFifw3u1yG8}i09#fsq?R&kJOZ%S6wci;d+gzvijnoObN{Ou1OI<
z(gI8)1a6K<Lba?;jJ_Tw1$oDm(6L}QUYV36m(4IbxT6;K9xJ>k0;fZmb!qIqU5j$j
zc0;03``jue^5KR{zDeX|fwMf|3u4Kc2cSUT8qw1+NG>DQY#|;%HyP@@<af!*v@s-P
ze5t?HhzRn?v`&UI<-Yb*unODxmc0H|9py7P)r8ce%)DcTrH9e*&k%k&mzr-3^&ODm
z*!W|#X$c%kqda>(>6PcAAecXBCFx3Ph?>6LkceEv^X;V1`o}W+CYK=#4yH}f?Lnz=
zqK-;mo9``$;aJja#}=-*=W=v?0X4iXR?}g9rz#dzZaT|ZlYyDhVHbmwVN4ZadXkFh
zgAYA`OOjl$a@~9-7u%GvcC9m4^_&S+_zTMCaBPveYyF%q4}e|9e8CDt&W;GhKn&%O
z;Dk4!mbk8jw)drj8+wcD%c`z4o3qC}l`<)fB-eNME5KZ6y^Sn`L4zHQ$EYq*Srfe*
z(b6TiTQy^h?(H@zS;BOnXGfPXJ-AP*Oa=0EJnC4U8S4bH_mfU}d!g$qD&^-WZhi$R
z3E9Y{kH&D<lD}vz-#IEEY4Pi}<vD~J7Cq9ui9EhzXPehzu=KL(zOnjAgO;=5MN)IF
z)tnPBP83?r4V(jNaZZg`F;5J?PgGqyCDTl_-^bvFo+mNnxHcFvbfAy!ht#3qsP(Fm
z@Dj7=X`m<9J>INf!e1@9xNo30CteZ86<pHx=1$>(peCtr@RHfu1BS{dQL6j{MPYh@
zrxCvD^+i^m-pNc09uPc(Gx~2svFJzgZRlF`c29Xh6n}8=puF&aL%*A}MDvXsX>aD<
zE7|2$Xq2NqF4m%aXs&OXZ!<c}_ritQ8pBu_d)>W((n&9ROY<sNJfIu|3r-|67XuMB
z-6$iSY1|-_t*DSeijb?e621zV6;mcTN6o_YSeo3A*HH)X7zt*k8HIemH*+eAa&D-S
zyenzeNCGo%#)i+B3;DZJRfq14CRa7Em7v1aP7(4Sl8)kyP04Q>OJ-qD!HkYnkkz=;
zS<(#o^f=qY&?&#qTWh_A3@2``4253^mgE}?6q;_Prq%RbvJJ+8^E^>Agq(I)`!&a^
zgfXug-{RR7mME{5Yl!~P+e0(LMj6U5{oks3dAbyt3<XsQb~!ih(a=Cm;GYt_xbm+V
zc~g=5`M=7C5Y?HcGz|xY01`JO6o%SLTO-n_^7KZEUG6yx9i6yx$osj`GAL)EV?mU?
zUp`EJ!IZ54jxS1Z?(Xs|YW*YbPSb$WMl(uAXaUT8PJ*(-io??3P)h4)Ru8#*Frq4X
zZp)Kz>6vY<zO3VQBo_|0e`%cmDjmUjMiigImNeIhn3yx+gH6U3Rn)SC8$SMiLsK@+
zuO3r&yZXCC`DhTD%Bw`VTM>f<j<_lxm~c}A>{aTsIq{^gJ%*ckKsOdln>-<DIVs5p
zc6Zc9i~wo;F;WyX#Uxd(mN8qM{1&mi*MT3L>$^1afM{5@#nBybaA+b>?YI##A>c7j
z!3H{V5mN(UOY?wKqd_Qp-!&4mBk<l26e+Kb1}Tt|glc;qdI0lEj)%N3du#Tj4pF*w
zM#X!k(!f)hAa0QU)68^>x67?10_+73`11+ni@VrKiq-Ngi3KcYcVyM^5Pj13n)84#
zJ0^q&oQ!&dVgA&Eq2(@a*v}8DHWbel#<c9F%5x>O;cSl>rZed_dEs=W5Sae7DFMu2
zj5AODffC1>dm89BnI<?cNNx%H8JG-V?jUAo^;I*zT<Y&<27a<B+#6*5w7)0y%g1T-
zVxatYP+PDdnD@n~U7+^_J;MW9_vf|$D=ze}s&06V^9<vvdbb>8ZD*;33++T1rZ$&5
zU%hN!*VIoA&mT=3HpWLJ_a{O=erlNgKca?6D>YPzZia5uf}8*=NNf8kMs@FsK3(Aj
z&hf*>rN8Mz)}CB|erFMBbXy(}>%=?(dirQE?|8ZeEZbZNw3Y8n^Un>-;e`<pfcd`4
zzi(Jz%lzv_qIy_|xu4+cK_0b-Jmj5~gHok>aX8=ZEmTA$Rggq&8ntR;JSFrhH0_=y
z#|+@W3;%O#fI8!rw9W29>53E3Wh1G;d;f~-`~lUQuqBx3QEI{^&U5q@58Lo+9w6wU
z8$ElkbVVhcsBI&pHpcxom55#)<Cs5VZ9I+V0sG!l!@0E_MILZE6=$9zs`?K!|5Fc8
zbvzOP=ZN3e`gffQ*f9TciC=kuvvfZT!rtz}B2&Z0$txgN1sG2!ffrdq_2Jg`%Yhg9
z0tW+!W~B&b+prBvit69suq(9elDn=5hwV+OZn3L~>2TwpC({ox-#gQchYX))*iOL8
zB#>6s&8MFa4`#*bL5NK>9X8l;%b#UTF5$TAXH)9ZxJ}D`!?>nga=-n19>9m$1Jb9k
zGXXjgL>0C*tuQ=`NjgtOM+{lw@lmX;j`Q9)g{UU~xKZOJi|rR&X{ZIh8A(_K`v{XG
zhg7T)N50=1Wm(zbt$!pSr|%8c$!=T$8QZi3QqW_RB}}*vIBym=)OK({X)vRLsmr}%
zpkqy)3XY<XUFN-d?WgGBQQSaIvP}=sv?7+7&EMmOdq$K&&S02!wpLL?&G)<Dk|?c!
z^yZ-aOP91NNj*0Un-8$NA8QAY)R)ZWReI~f>z*5Q8g3Y1Un`^eEy$<^EN2kzRc1p=
zpe3nKc|Z&K>%4!dNkvmXBQeA<3m3x!u8HhM+<{9}O3@ll+>0Bmu-Ca1sm$f>gD3d2
z`2#WJ+~uyb?Wvub6_DeiOh$RA_ii$$({wS4CNVS+K{KnCA<ELEO6(_KNw${8-8?`k
zP4N@fXUrC$<9a-~rm;h!oQP#J?qef|7Y=ZivI@B;YDnhykAkvI!TIGw9<YtL8|ibF
zHdZtUa==@ar&G9ZBFRmar03-30B3i{;FgmG;!7q3|K_9eXDr}H*t<A6X^1fHhNR&&
zX3~V<LVF}gCR>+;UNAE~wcbHt9*yY_PIGOX^ZL?<KD)48b;K_`TAA&qdZ*Ohq;?*8
z-v1@1IZle07MSyzLK$`K?UVD+fPN@Ax+B2|)yL9cA7?%uHj9AEY4U*kSXb`n&ojQH
z;_<+rh+b8<o1MRLlDC$5Ba(FpG(&Kt%*6_E_pvQ#iWFG>_;cNt1Zw@y1Do!^+A1{=
zz64X3g1{E<S8-@qa%C2aw7am5y9fm9fuxC00=E%9cMZD^Gi}z2S%^HixB+Zne?slw
z1JO9@3=MvXi5tRfbB7DrV1{<K;J1{*nWz2M#j*?!_-FMFko}h#1dtrs*I!W<JPFXt
zmB#esnx#<NIT?oAnY#>kd15yEHcNL;!eq;^!veSMqO~qZp<qS&Vn(AU=eSdzY!bB5
zL6Mr(Acm|Nn#mf*NTE9tv7!1Gq|>tNIB_@f+y^Xkjr<N8aif$SJW;t=mD$7)=<)qH
zO-_=&>u}6{_!^YdFmx+uWG>8jZk=;w_=h<HYPQ%OTT`X2-Atcq9bfTW{@h~+V7}||
z4BGe6Fb$Z8a54QO8OV8Ss(iTq#`s(2n9D|pu>p89;#*~LX_4W0s#OFkqp#vyifDQ^
z?iqJ8x4nf>gAp*L@hL{x5xZ0>_h9mE-sEIurme+^FOi@##FmoLCG8n6l7ca<jh_Cp
zjk}iZ+=bfe<0C@8o50#_5RiJYl-m;5s`^DEm<JrRx!~VO@MmQ6Pi#;cCHHjI^{Pe~
z?o>#|Qbce})&+b?Py`#AXZ_Y$s#^Wpydw`VipV&L7&+yPJIK__Q?12`PRTY$@c<9e
z%}l7Zy}t#X+Yn{nR&ah`e_`_OC54E-nS<s+0}C*DYQot&#?&2H0m6gX%3uVmE2>ua
zUOP;@UrCP#G+IBlEDI*bvf{e`ldN*mFD6p9p^hwqWSnLm^V?oVN+YX0dcLP4g$G0g
zenFU(2FPQYk+B%EX+@bTh+_9RE&(mUtg-rxaaW)UM$=TynfMfy7{43CTm1y_uM&=t
zA}voRP&S{8e2$pCFg-i38b^HH+^aK^5bY};)mvCJ1UWSd4lt>`)r`7czj6e(h2@kp
zR>uRp=_HV*3^f<(F1={w|H#AY79+m3pedu)MESz;zLq%RNQMbG75LssRr<~(17=?6
zL~-@lH<=MbO-c=k4Pg9n(5a;O4r`4PMthDMgfSS6%ZOkYiI<v;s`Qf$fM5k9)HiB4
zJu{jp$2^F*zh2+ApZ?l!NRH$9Nx;-VYl{Lh;n#779*tFfPw??tItYD;#Vdqisfr^i
z6@!gQzj(9vl6u6Zn#41*=no1Xqlc1am(G7kBHCAws*cK^wt%Z}KM_@0z<z*Fc^YIq
zy)X^E$puMY%!W9&^}_oL#bRBPWR{gp5AMOpYCr;DZ~JY~3P_A~CkoCoM~1&$uAVB7
z#?kd{XOcbQX-2*iGVhhok#C$2F=V8^0_P0n7iFO#3rt9*Yw*YvW}wnxSG!8;tM~~U
zGS{{?vY=#!jr~$s4bCY3ffEV)b4NMfe;gsh_6K=ziJC-Mj8+UE)91~5S7#ymS}U&C
zD4Wg=YE&|jn2A(yOZZ@4K@0?z&qgja4~bGt<xo1*i{#DXNPGq5=2W%La^15HiGd!6
z&eBwq=DAZ8IYW4?_gLQ?EM)>Cm#^>cI+BCho;vBEm{c8G6+VN?E?AW7Fl|O4lngt&
z`z)SvEj@sRa;0zbJ>d;lu}V9d*@$5rP1eN3>%^(bR&G=J#FM46<!cvu0=VbPnv;os
zU&ysAIvf8Mb&Z-(i{tkoZlY%0BkCa=ENRVAs+G}(y)#|sSO%qHC<Y8B=jFd-p$Z0D
zC3Fjb-~^+Kt@)!bTPcvN%6RVZfI_6*>o~D*&SpdwhGg|0)cFNM!?_0Ff{Po-NxJaU
zLGb$TxO&=~IlMM9*%!Z;e7y)9Hj_TE3g&`mXU&CDTi_~Sd|sx~ho%IQX+^YyXEKl;
zBQ?FZa4AUi75p^C`Bn0H8)G&jZuh(mCOj~U3X4V^pw&7YVg}_fv2}2~OV*&7WL@^8
z8pipON3VaJ$7B1mk1?r@GKu7aS&bEADWabvm71U~&0Cpi!!mEM0?3&9ThJb&0;$Lb
zv1_iffUGy2;7Xk}jV|GoFO&nITk1q!(!$=s()X?Sq$2-cKIv=nHv8>5Z+l0FCp_~x
z&+FYWC0p18yQ_|ltO5pcwtv=MN|J9WjU|hm>}r6Qki%p<KPbsh{}OvTey<lRBUB7U
zu3*E1xB`~x9o}4FwgYV@D|o2M2ll!0A_Ot*t-0OMuAd-g$9Zn&Z^SBOSv*eqx<p(=
zHnSx<M=pVUIgY7%zW8-8^n6=B%GmYRqap&_uwVYFPXCv8AQ6OqPV7$+jh7pmjdWp3
zWibWeqTt+lz0ku&y*!F-K#F?bc#+BVwhBECSxdqrmj^V%86H*NLfQ5Rs+<+m<+<Fe
zR|(Z%!<K#={9Inl-Sfs6v4KVO+{Xj7bU3q&smbO2{NEtzpSl{bTlIx=nHq0(v+kC1
zWQEf^akx_d)+eWBAvw-PkM+p|6vT{5?-)3LzngT7$(N0ih^Y>wbVaf&qBr#}U4ygK
z?QEQb32-nxaxrHV+RtJU?eU8ll6=M|TN6%xUf`(jBTGFKv&-WF#W}^V;nC#H>C6k%
ztlH<w_pT%ct28%-dP6%j3t3`v2-dl{FR&(3*|#wERhn@1jxAK<FwLZjC{qM1Ld)Q|
zLcdfg$SEr(aX)<{leka8PF=nTossQ9E)K7Lx~PPtWWhyk`%;Z73@@G;JqeOJhvVe9
zLYIC@q^Gg;P=QQr-lW53n$CTFtr5gak8)>nm~W0BgkWyBB;Na>2-TwJ6|quJNn=Z%
zZJ0m%BLK=&vAS9A68hsZqPj6=132?oZ(Q5&!8+ern2DdCMrk1o+U!k~i(?Wf>yo4u
z$Xo)amoDF<XwqdU$2?)qgyGyI+$n=S+{UW~H7+#kDQ&N+;aA2iBBq(u!DTeD{boVu
zGMv4QO2lQLTgat?dBK_ez3z#4`hB|2&Dsg6*1F8vm{BP08YVuTTq9eK?h8z~Z)aM`
ztwDsxmj_OwF9auN*`FQi1(_j#beB~Rw?8QOZ*d)v7W?nSQsoEJAPY`1(Ew%Ev8cKZ
znN+(&mPY8-vR9w20e=`QzAqcXj_`7}Cb}h;)xK+*Lp9B#@Drr}UI@re8SaZ6a*Zy8
zx&-SCIXB#MTKSosVQ+kJjwkGb?GoKkmhgO{FeBc^xRHL>G)jDy2iS2b*O12b@|&S9
z*q+&1qEH)JZu0pqn%Nz1nRB02G^_Tx7Ii|txgv3PAIyjmq-LWf0*<4Fd+C-U`ZLM7
zA7<8~f02?4^2=5kG?`lRoGgtLiGo1lD!hy_2jphnuDRA*q{xyQg|Bs_;zGD`1I(ZX
zQb|k{%5oS=u|IF)4ISdQBjb%A?FBN+Cv!OUXE|#?Qp}8gn5r5<+`$%TS-?oznPfR|
zGAD=o&b-Dp2djLt=@~3_$HX{NKxk)pfJl(@KbQ&3AJC#0({C{sX;4^u<*s4GW~665
z`ErMEBWdHr`AIwP6vCVWKF)C|^;Vz69-a5|I!$w3ncqz(0A`}uz31AmI8PVI_Y!tU
z#d5`CBFa1H83p#Hp5MBe9s@y{<?%Mo@~XXJ+R7#*4uXaMjFhdZ2N#ra`IvWx%Gv5=
zG>{?<ncZo3*!3m&TXHm&gt4U@0}>TKtTF}?E6?+|9l<_E0E*y~=Yal9E)}f~4mw6f
zL{vgEkSZoDj@c#JzQ-y*k2Hr4o9usLc`B|y$%nE8Lk!@QnOg>An3y5^tEXvQ(~HBT
z&CENjpbN=(0jendwuvO+e5sv+?-KFUS=K-#Fk&b^)eE~{Dduh%HDqt{Hu2g|oiw-J
zvRF=%vK?lE%#^OsUEv+R<8sH&d-ldmQ)pAeXj~5RMD4&%YF9MzZSF(@dTVP&ds+W6
z#8(r~gLU&KDP{XkA!5f6NuxN^?}`B_VVWjZSl{agOJjm0tfH@^$X1Y-@XMo^*p?@D
zH5XAkDjBAJ%EccH;ibE;StnqY$qdf|CM<xaH0b}<!2eC6F~|?&@y)=1kyKPhUY*x4
z1TspdN8MV2QJ!BQE|R%;Xdf;QRN~Q&Ir;n8QpHeq&|pls;XZ0XLYOk%gd{$hK$K57
z`>NeBUbNp@`KL(6`6ZGL?I2}I-lqe82W*-u1tYUzfid^>{YK2TBaPwAP=n5bVR?~>
zgqCC1upR?>-Yq8lq(-4Qi8Rpuyn<tm03>4C$7>iT!dAfWGf8Ic5Ij_1Qe#}Y_g;U+
zZVzoS=rW|%lznFO8k{K*6t`-u|MwbeH^wPh^%4^?BywWNp(@2Ri*U!41$jsiNvgY=
z-&4kj9Fm^_*(~TRj78R=byb3~4|%Fj^)fMzWsLqTTR>0pchB_b>fO`b-zbp|Gm%ab
zhyKh1>Wa9wciy0-Iu=R2Bg2RXi%kb-`pjBN3-oG;J6^pCZ9OpT6qJ`4#UfWZ*@I4t
zeqR5|ubKSkHD;4)EFa2dc=nF*pgpI*`L<9fJ})4p>qU$7h#3uXyQ0ziV@#Q%*Gz#f
zn_Lz3t)ED2NDVPGW!h1rhOyx*TpPxYCiGBMt7;EdY-(Lrdtc3CTRRVI71lXd9hFVf
zQ$m;}eZ#3T_sHBs2~h_aH3J5mXDGX2oESL8pM27Ig9ogW9pL01@Xl}}wJMm+sKdrV
z|8f{p)DVg?r7{LpV$ztYz4p2ciVjS=*YqPdXyVq*Pt5{pChQ;?1dHU)^y~uz(MO;?
zI$TrN!Ec<1avkpD6Am~B3l`|pz0YTF1LG??Aeq*>-?L+UWmOCRpnM2)?id%OVY>;L
zV028`GHXaR^0@@<Q&oXm@21!8IUcr}M9alW$b2miVCnLJO$%Jr_!2|#x7FSYF%e)i
zU;t!u(~DqnsOy|qmoC&#=$$CF4$_bajg9!GSAENlOk%;mxPmc9J7xmNdrs&oH9}oM
zZs`<F=iV%A7#5a7<r1#=&@GX6Un?z7a%P&J-eQ9hTe+*?ihx@c^3tf&RBWh<aw<*r
z{sKwVU~F~|>m1FC-JhgG>E|w+3mF;RaFAzq3__)mw;`e@Xkjt=9>sa;eG`nR>(hA*
zBE_0n5->a^AHH14<u5HI(az@vKOA6iE`a4jX<5xB!GH@2&Kwz%4_!9Rd@-DS0-Pgz
z#5vw>?ML-RV2I<`ezVGEFl0AfIQeN#iKQ3xi(GIO$PJhhTt+>Xa6_Wg2|=Kr=ZHGW
ztN=aOYUdGSr+7JIFM2%m3l9iLV4mtQ;sFhL6S=?w{7)3%3UT^<?CbXxN59t5cN|0D
zy?;P^ng93j_V+{Oq||rr9q5_;zI<Wj^{>s(7dQN|1ZHI!%=fJ3f09Z+)EGX<(Jh#J
zn_d$)G1n3}k@R0fqfWE8p$n(jU_AM5Rm@6g6b1s7#@(tn7{d`G0hl4xt6-El?`Ilz
zdMnEiNj>T(f0w%hKWBLxbM@#loU=%`it=Y~Al?Rh3qNWCTVj@uq<U31XR!~qcyAM}
z$lL0-7i=8f4n~E|-<8@io+1h^`wlzD1fv%x5pU#~CLlvf%Y_cb64!y0zZ_MFD^7>9
zJ*phQnuuZk`C`An&ealEaa^^Y|NGjE$%tYDjvvKOCDWUu*(VTbZxtI6-e}LLOzyOW
zYSSJ}1>As>H$tL>yehRnLB}vHjx_Y{gEMor+=9H|6)pwfhzoW@nIVRUz(qSxCd;93
zctE5DID;FDp=NI1H{9meiOoc6oO#u*`5NX?d`aUCOJ~*!UQS+?09SLpJ&d^mXI|<p
zh072X+^vd{XB9uiy=VldzaAx<^t&Qph)0#|3N*M@()rwM3rUds4!*upfZAnA)lMGl
zQ%aVwiF)7hoEgw%8>2_~a*nL`67e)(Np6OMvu*WFG7$Qyv~7Wf%+H57p59d!s%^B*
zqr%eCxZfhCHAH{v&)sJBKj<+R1NZZP_|EXZ?=e@pclCwjeGMmCDVqJ-t#ZP`hdYIX
zOswM%dyQCb$c`i(%*7cMoc;5n?tk@rr7=vDd>J}sh%99?XcpG1K&~T2xPIkUfBqsD
zB=~-5R)y5@CXdVvJJc8WS?IGZ<GyCI@Hio{44-2=p=+OxzQ*Wk%Qvr0V?OO0wOzV!
zk&~<M$2#r-Gu+O3R(Uuf(XrxK08O>_rKYVBfx&H*5Mii@ve#anDQ0(EJ;8Z|GNq+i
zm(pMqh2g_&drkEvy2F~5o<S=2*_V+(4l4@`u5-G^Y?sqaS_?D{bcIC>Kgr1{xpnS`
zP)4j%BVaSRxttDqis(8flqJh_9Rh<q&>=0FcUmw_WBA%H?kL#so-zH6(6CNr3c-S{
zhjyMQC*J>UsqODeZvTC+uSr{(Q%9NpLwyB7wY^Qx!_({vT`Eb*ju9AXt}5M)KOMK5
zIm$9c3e$tyhA>i-uR3stUw+QNK#`4O-ITf#igsxfsYvpeZ-hme(0IUb1I28QI}cc+
z8BRsqwr9U4?qjBY%#MRrW!Vs#R58h1xIKOn%1?4atYu3Qv9)}KZ2OU^=0sLKdUq_v
z_Q6Lqw+pdo!u^1G6Xh79UxbpQHPg$lC_b1~;R>>qp0SBrT3x^vd*?zF_c>S_3C@Ta
z)h0$<0yA%%d>1QNxs?j7E<$47?$};R<tBzm+c_<LyKcBptkb*)Q(D{Q|F_k=zpv{3
zx4ypLd(p~8&oU5?#5TZt8^MlejW?t;X+ni<JmNWDVn)S;kFcz=ZZ<KXzN8ywj3l!6
zOx)Kgs_`&`;#kcXIhUhTgAIT3tbt0d(Yf32VASI$SI4U1KF#7Dt`5<u3}Cqz;)kPf
zeC&<Xd6Ye^5(BC0e^#Mss)TFu3t-rB+F4}(>mE^W!RZl-t4>qEMU&iA#bP4SkWluN
zsa&HP6LaQjOuR6B$#a4TuWHYnO(foBVyGiE^1>)BchH#Ek6WSTTMCS4D9Wkb$;C~a
zv5{GwCVNZZ*v110aU<E>lX-Yc;}3ZkTK+fTY^gNGvRC=!S@AlJP#P+G#x<@i--8tJ
zG;b6kf$mQHRtEtCX3&zDu&##kbnyV00nK({qoF3XS5vC&N*$IO=8eC(&}XPPQ3VMd
zB;E7YKQ;mvvvHIC2|7o+#Z=ceoOWlVqYWo#zzGpo+PFcqcZ4A?GVP5PL8(3a%IvyS
zCf_G}_avq<8OJWRYF_T5%qw9;{IDXJtJ}33Y9BG@hX<<sXm^IyQ7z;tF}|#c9_Y;v
zjhsto*U9VPKmxR_ly8K@j{*5gK5lc_eA7_Bp&bv{JRs5=9^v;C-H}6v(lkp5+}jnf
z-m1v|ZFi~9;~rplREkhVYcPF=Iy4bt%vvm@>h)5gN)oH+BuO)xO928Qs6|>Dy_OnN
z-5-FGPwgE5*Q7kXuHA=()+o`V?xD$Un2I&va(MrbGGl!X{&XC;il%JMaEqU454a@d
z<TrV@r<mdFju#X>nvzK1Fu2Q!3~VSkL!AvFx%LZ`HVe3kxfkd(`DdH5`uggd2cF+r
zN_ugal?H_XNK@=|BTa_>q&ytPNz+;^p$<NF#xvnVo|zA;RcDg=?&%fEL8}W0P(M-_
zv5-o-=Nq$jT)SL5G{bhh+$b^Nx^;JT(Fxh*gSfNZe_Q(gU+0kjw*B{AmB%uXaHGl&
ze8JhSGp&?8^R7f<vSXNneC;Rl!LDYKH1rxrzJ_=(;SE-#)Pxj;5pRCax%c9gHeDok
ze!w1+7YZZMHeM-PEDZYV0~~<MMR)Ab<-E{R!fxVza@c0lq-LK|$xIlwmWAx@V}TA%
zL>MbUh`UCOVT1OVL5p(hiCdYNVKb&F$}XFX`iYsePZlo4yuTE~FFJ&e((`J^Y$87m
z^v3zOF65btp1WORRuWeKp;s$XI0)IfKh=_zn@jQw&O7zr=IZ}?ob#)EYHeX**U&wA
z9#(<&V0<7Us_$L|bT(6yFURPro8j&@eVa79$#Rtp&%B9}nK_*AQYX^6x6jTK_kpZl
zOq+mldk}P7NdS$L=lXQid#MaGX)nMQ<eh-je=@*&j`pCTY->)=S1;(`sYhH9@Q#cG
zom#S_O`LcZqjXApDPwk?)HqcZVvseGK+zgh?U^;Z$A;yOSXB<!94|5}^?*^{S%wKL
zB`JeTcJi7b4*Q8B%x^6FL$US#2^fB~)F(82qq_#C?pRJ84=}p`E|e)IFl5G99~#+~
zlbF9a5?6JG95(w)!|SgRw%_ld`j@|2?JF2IRXqB-t-x>l`Of-p7D?0e>t8+<;oB>=
zmYpnMBzb6fIiPLF3=gARsIws7MQyXD5h60@O8X6=@rS8<U&j%@g7;07Ho-TdKS2e-
zg&=U$={AxZE?WJPoqO4QM#G&-rHwdAtCl>W$P3)&FV5Tpc4LTGk5@u{^4l~KV$7)_
z%zaIH0cJd0h#5Jzxh$~VJ)RIe4z4m?o`KJzI_5TkF^#nu)Q3d@930KKVF1F685RU1
ze>ZLfjDmCrISAyd-r*Svl6)UfO3bU^D0|0{zqNAvklvAN`p}Fek_#cxKduXfQ({x*
zgBgJ|c<)uZ6qQ6#7*SH)(vJkA$6IM$_Y!+-$nF}rIPtuStaptSvtV%8uag)D(lIBP
zQd9;#n<+!HJ5J>mj5LZy{I6oJ{i&%dkH=*+wpOV;q`t^hiwq!SrW)uXB-@pTiU?RC
zKnwvD71)?n3<(l))vcgJV0>T}s66fP<_Z|n5=cf8(ejX$N75vOkYH5=5`q{mB)RPA
zj<d7>K;7T&4`=S&J7>P<`~AEQ(8kWWF*`>IR|m6`9I7QIztS#qJv`tEiKvT(68cy2
z&M4Ld=?d2s;+6RpdZe>bd<Gm3PQRm4v*qB$#6^v!qib25X{+fBG`Z*$kGGb>N;J?<
zswyb1-468cgAa%A6004ArJhwbjX}GoSu+p!i7XTLbI=TpbhGnd+9X4@PIgjX&P#;+
zhePK3xwxU&$=|Lj(D-Er-^lb3y@Km>*Sz%ys-q7SWz^R$MW;O>osy_%A+|cyLCF>#
z_DZSm(I$^B++V>277fAu5`*7(jG@M&ZWycmUuy9qJ@=Yi1!+2o{o6|eU=xFfo03I$
z3k@MZs)6KK@mV2`4TS(dZhFPEirOrApz%?cZvFLW>;sy)sD5AC*~VG@)wTDwviYYX
zyG0wxmK(HfAg}ix?|=+WtYCN?rau2n<VbTss6MUXy0^h6KDL6^i5~SE(|YwSl714H
zlQB-@0=CrLg0Lpa9aB3(D%0!3H|2Y(r?&_SfPJiH>g)D_8_bJ!?YowSZCbrL;qhlL
zj>~S#dQD$5$e~M{&#qdCI_`3r>(Ab!kIv;W*JH^AuMf|OtBIv`Z|&zPjV(I@n)9uP
zu7j+d;a}LSVD=(t#;L~KaiU`HeNsd@xs95v+)Ldb^TN>B$jmsVL3!srJ^SY2F%++Z
zlPc_2Azl{10(O_!Y68xi?4_<d(80NyJi20*=$ND4NRZDC`q;ESit)aAsP7D|DD7<t
z02TSTO^JxhoRnsIxlHeY8CBn!>;BdhcQnrN(oX9viY`xaKj9o{Ltf1ByUV_5y{vrP
zJY`BK&A8J)A1Ag3OL%GLXx*3hb$m#${qZg3!?-ZX-OcuYt;co)$Ij{ASNqltqz88n
zCwAN-#+I>N34kPEF*Blh`f$&P&)Lv-!%08idmh%8l1TX{^_Kqh(aKSU0pxHFVN5tB
zX@aQ$7vbQfsGS&yn_(J4b4k|pH^mM#Uws_Jv+;w0zD;gc^pwwq0mupbl-vzSwKTzF
z1Qy_&*<{%s)wplvg?D3~lINmglp*G?Tft!9);D%9wU^u5S>>&TJ!|C72^7uACvan?
z<OyEo<Gcuxxz1kC2}Yi)j}6q=Y2y{hxsjPdh;qNSdm(KFgVi3F+^IFRzM~teB1%uS
z&^2Bw7$-`@8`2ltXWVV3BW?P)lcvzT1-v~{1qlFw@xpl9n{=$GXj2TN+1rv?D{`B^
z&r$$DXU8G<$>?4^eqMnCJ^2sXdBAfvgANW?G2X*Y6k^Q9pcug0u)s|+KM=p-MDlge
zW9=%$?>KTN8>;7g8wZx*f?CId(@wo<9n0qRhN!zXpSTi547W&fe*S%&L_eqH(KZ11
z&iEe1$h|K<8inz=(1;zem3h*cP3==&ZE$ZXN=E%Hcpa`g95*5PUMuxqFR%2yF66t!
zWbj)X8xn7ejcrfnq52SQ3x%b%yr=C)aJr@q!%xkOZL0hH0!B@j+#6p^#SI9<GEaop
zi@rM%iTy>gB_PFwjX@&jgi-~FgPiMh&Z=fNNFRpPQfaDR7`vy=J4LzW-kllZTP(%o
z)CTgnvkqNvyR$(0beg!Ob0i>6FZm(`b)he>Db#O$^W&o-_3062@QwJGpzOS%Hah%w
zK%d*1D)VSchJI*pk)(4Pp`okIWbogG<Ur7V25zT7Nq{a|19iQm;jLK9UnkF?`|=iR
zoAlyaiT4O4o4GM$>a#vdNp^m7QEI!~_n}hluE`lE1ktLRFx3CdG9MRg*!?ESg<7+<
z-85CMK2&DI%RKCJI7tp&PjlqhL6vZ4=d6!GBvW%~OYGFelBt*Z)HsSkzyWqD^H>?9
z0ChvcOweD1NV>ijF5VkBY<wgJ$JMBN3DBz4MlVjirQO;z{}Xm};X}mNDe53_c?eFj
z=+6D}HL+!1G8w`F-i7S<nl1neWssxT9jTsF%<3i>*GL<z`|DEtU9N5x&^D!RiSCo#
z<VN^v?sMm7l`fV=dz;F(7A^x0Jc-gh$W|0UYEs9)<-6`Oxr#0+fKdtF=@B?&ggih#
z^L*?Irn7%myKcelziH!-$xArv${!hL$|p#wYLLWHiaiBv0wtO;GjQP<UDU!bQ;=a&
z=2kGIFvUrRi;-B~@A0>_0l{-wI$uzS4oL6x(@R~vLlPA$MPRK!ZaA_A1>`eYP}N!I
z#?ljrHki1P<Bi;4PB2{9&lS?~9qoA@$wE;c%`N8GxTH%dG(bWtbAg`8k};I9F5<~F
zzr&O(wPBBsxKRtC^Czypfe&X+1NHf7t({{x^vE52==M}|2|1U$10lzcHj5AJo%T-n
z<eb|Qi_^jw(|R+Be>Mdr*v%ag>9eN88oEfKEoCUXv0CF5>>%<TMR%zLuf=w@f1s<a
zc+)HXB<@pW{YA#cDZ`K|A-Hb5Rn~tq-%|5KjV1%#vizq5^5j#=<dCJf_{${UX6P)e
z!kc@e9#}Wd1uCS4Yc!lzwpxbk;P;{n5yDh(ZbrY7Skh<=>uTg=z4c2Zyn-DTXQCSO
o3ETbgKH+B~sjz!L<?Och(dVP%{lB94h_L@7#QX#LB56hWZ=fcjR{#J2

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_8.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..242f1305328bfb035526e9f8308f588e2efc8ecb
GIT binary patch
literal 48054
zcmeFac|4Tg`#*k<v2T+Q*+vMFo$NDIk|f%!At5CD&M->WWGzZUQ7T)qFB$vLV#$`>
z7?OR)IvC@7d%f51`~CX7Kc9Yod>)VA<Ll<k<IK6Q`<!z<@9SLWI@f)lYxaKZO#p{3
zoYOl8Kp+s{Cin;J;Y5vf{GDzCfPn!Z0RR9!KnpMdRA5dN{BjF&7=VI#2>1s;<o;eB
z1(E+F&jHdDMmho1f6xH*jdOee9azQ;em4dHwsU-QV2&P42gd;d2M8P>aDc!80tX2E
zKZ$^eldGMZpO>JKqno#zppKoBx0|P+oUF90w4AK0oSd}WDFqcp;D1IXFDoynCab6>
ze@akZK}}9pO+gg^XcM4+vr5?b5~%;4mrY9iBM+5T0Dz>Vf8?_u|FCZUVYx6I+P^&q
zmdpQ>LVCdT0RjgI93XIjzySh(N8q2W4SBVFgF_Mcvoi1kS_t&7HU%x{G2sDdz%tNx
z!uA-#`}cf0MEtKbpr3^b@;C1ZTMiY~-|`&q{@VD>zg)lXs{tT;KlX-!GXOm;EgdZl
zJslk#0|PxHGY1PZ6BF~%!|X5)ejd00KMx<Dpop}XpwLNSK0a~f6DMWm6crWWVyc=d
z@*2_#it_uFKo}Spn3<TlSy;H`kMSLo|LtS19$=#fqCpD^atxqigFx9Jd(8kG>?IB8
zUD|gr{q=xQL8)nI>F604nLvWFLm<acC>1r7hK3q+8$p7>^8hs)&EaEm+O+JKZRmtN
zIOOjozN8mEUEIKVrJEq4aLe;P1LF}c?xQ@%Ma9HVNGK|uQdUt_J9Ad&obGwO3s<jQ
zH#RXfGq<&~zwO}YbjQov$Jft4ATaDfctm7WbWBq6<0mPpPoJf|%FcQHCim^T_a942
z%gQS%tEwBDnp;}2ZS9}G_w@Gl5BwM$!cF3*rhm@-nw=xAtgfwZY?8LN_xalA^KZvr
z82cB#*g(FhsHvgUbo+cksQkbLWuvA!CP#Z%`!b!42fL8`J$jDQi7$&A7=#tB5IApn
zb~7FkQN$f5?z8p>Xa6(C?*D({?C*^IgRdcg843Y!9+VA00-M|7z^hQk|H%h5eSmE3
zkbNg5O@cGov<|xPK~<t4>72f8sBG|@9f4dS0l(h6RQLuNaLbEXf8l*Ceok*Qiw@3m
z=Fq5ZaIk~;uXa@*^7W4_*PgdPn>JSC*-PC67=B8PNT8~E1np#wo7&k2tW(DTx1V|V
z;F(m`RhDx~zKb5Mi2E^VX0p?<2Q*RUCbl#Bf+%b*>TiSU_keLOPgnA))}hZsnUNUO
zs@^*%+g*pJNVF@mmVOT~t;-@e6&ha1sgsSg?sw>_Rrh14uHY>!6uHvK*@3uD5!I|=
ztzFAvk;6SSqjg<cUYgRhvPglynt;_`wLV>4Vy&Rt`zA(b>WueKN9!M9t1^9t6M;sw
zuCIwq7rXEF>=-MT_3{`Bnk6UM;Q?iXTvZk<{E1^rw)Gu)(SD_)70N>W-@baB)09Wt
zModwtLg9M={iif97mr{UF^8Dh)vd$YWF76@BEPIQn*mGvGQp=l)RtBC9qk<6cUkFi
zpCA*V*S?owiOq&y+1Afpc$DRy;V;izYATeMeYhN(@niWlSYdfaK!A)0dim2xIOYYQ
zBlRZlRXozL^7-X5E4z|PRC~PXl*Sa==OgUxdv?-N<MQ>2t~ICZRFkUsuR2dT&!-+e
zIob{%<(u-I*Wz);l|6Os_2HdwHyjhS<q4sG3WT7qp7Hluz=u40J7)G7u{6tEoVl4;
z9siPUEL7ZzG}O~n&W?!={g^Sr(kg2yN|&uWPS>>p6L`>RPqP_Skd3LWOX=_MfzcFX
zFRH@?Yr~SxFH}Ou3jnh{;DLo9ANKfy+a92Kcu8x#<E3-KnWmMZ;h)OY7Kbo*B&f@$
z)>Z*r>TVW<zhPdB<8JMhI_bLVV#4BN3(NBdwKXSFjgd_}84B1j4c7XNUa&^1_6UcJ
zZQM?6g<)n_s5m6ai6<_t&Nn{z<nw@2eo0($+Q0b7ZNV_%p;GTpm!z)revTrZkG*1Q
z$L*i)bCu#A*hMoW4&>Kp27bBRC!@F{=JdLxK27J!EnQ?o%^Tc^Zx(Y9oYmT3(w8Vu
zo}yr_?Yt2rQo#~+@1$wcXops(O{RHIciEs1)3b#l@8K7SZQy8jf~)S;?j-G%4;ypp
zF~G52CElp7(fn^M7qSiy*55+Dwj~uct@>khd0a^4rl{V8$0xZ%18!`zwUHg6RzJoD
zhjm=sXkv|;UkX1`2HvXxSNL_Wa^<;A523nKJ1mN!+g;Re@K<hX>&wHa5BJ@}xiP~T
zut6K#1w78}Lr!mkhvShFdq8z|C?fY0;9@za_M$AOWXt!_^t-$ERqyKh-t7UNovU|%
znKDDBJ>af?w!whgvC<?LO^Yv4OGDqtCxb7n#ut@&BkzC+S2=8z0cU8l-+f&zN_EtD
zcgn{##0Xt*Z6M#xXP0$ZKXc(7|5tnE!bzhI;SnRMUn%VVXx*~!rSn1J1uk7e;!??T
zhVR6Z?(`1D<#O~Bw|pZzJgdHVgiGy;Q@NGSp+`!{Lsh$CP4Gd)@jj(rFH4-~S0J}X
zcU~{$%_g^#HN~M>%H~gx4uah{<NWrH-zj&)=^>ftJazITcDF<2P=>hd$)?=G*PPic
zPVS;Bl)D$`2o*abc;TUnme!p2oVFzZGob9D#U`Eou57BDVwu~{gYX<#I*Ht{?oXL6
zW6E=KnU4F4mEgGP=Pk?uNyb@1$ilA*U)XTIr1_fkdAQ=jOrG+}HpINbJTBMeFecS;
zFp>SJ+vH75f2in=aqjLH<hS>a58aWylWNlDqC0dMie0sp%zQo{7T*{uy_<KHGM)Le
z&+SrmP>tNlnD@9*?i2WFPt5mF$$9hYUmN`^+~y7$Kh?c;R77fQk)iG66Vb1+Il0er
zhOs<U=K6NL#uw9*ki#>o%U#={4C7ZHF8m_EPMLWMKYS_T)A>ey`#Ou&mn-p2+p*ih
ztv<;N{;z+~pR9cVtX+)_T+Jwj=w9rziEQdmtr-9H##8SRR-(Ctb2}E^*8iks_jN&f
zYVydr6yi(2gbIl6RV|jtt0k2+&ZdQ&xkh^0zvME%2(RY2Z82l_0ITIa;H4BG^(NOI
z6}vIOAXIb(HorFIb<xtnz(KqsA%%VSIRrEP1~R<fzyT~;P_&h;*T43>>CInU`rdEW
zE$YL}vCW5{O^3I2L0!u%cvVv*A3o5%dlT+;FY{uC_=mo`WwLvKUHW+=N!bQ!+74Ej
zZCGb<7QJnC#Me6tBj*n*RRYE95}~knB8(q=B<1WkI!na{i7&RY=(%ou2#T68Gs|(O
zzs&0`axX(=N}x<?h8*Iw2docb)-1ZH8s3?WjI_J4G-?zl<!ATu!3*hy32*pq4NYFK
ze5bv~oCMS1H>{Ia3ycBE_=-m?a=KOgr?lFdUQ*lfbd>eFob)bGl6Ej1GG)K1NytkE
z8DA2o8tuq-a+u!g5toN2K$6fB_Jw&4K^`JUR=E7X&~CBHv%*xc=73X*ijw-^YxhK~
zGrPK6y%F<+<f|6eL!*uM5%zX;bQ*_(8)mgQhb?=Ixo_`W;eL~V<$WunKs$6!cjv~n
zv|Mta?jB%$2>q)!iw^B%wp%&K`+y7jJ_xVUSxB$4nN!_&^6t}ume0cUV>cXLL~q99
zv|uH5<-I~qQaNL9hk5TarbF+;is^`XYBJ=o=af3JJ)pZti$mk8VoLXHs!+drR5s?}
zHx_{n9aYQuU)sW%7yHXbe^f9=MVL)Zsze%ZB0~L;15}~nqR*Amm*1W}TakocWazeo
zW?7$B58PZho0d|-nHwqN<9OzR)I<Q4L{iIRcq+eFVyv$sq{Do6bEqOhvA@#${;qOu
zeW6K^WOgjNz60^Z3Ju*NS|97RkF}Hr9&yp1e!?fKamX{+B10T{x~U0m*FRts6cL>G
zQDVC8w#&0y3Z?L5l2%7>o?fd@RKv3C9<Y75iw;r8nri>*)7<U8Y;j{`ov_jSg3i<O
zrhY1G1=b74cXYnK(h=}ku>!c37ULAL_!8tJ;6@*2irr@qxbe+l&YUB)`k|$+qK#dg
zlK+%J{nIZ`$groLns2Ed2(VXG-0$yPY}ZVuE+0%-_7yt@8U>t-8MKf&2$zOBuD@&Q
z+1Iu)J2{<VsO{9~DqnK@=rA+}NDDgBd#o;8gh9xvMPBYGZ}Jy+Sg~Y$Yl&{y(@Q;N
z&GIE@rWNH|s!z@MPAY7cw4!V7X|Wa)^2?RmJxk7r(yO0$7v>!-&!Xc!w*Uk|EGjaI
zTe*9{z-{oxxoL58S+NFHz6l)<@X$z7({PGDr19ozXW=D<*-Z1KnZ`}7id$wyhecK%
zNK5tDc#k}atK=TMM<Hgh1dCl#yOegab|Xz?ko;=*I)liGZ3noPq}-ZAZ@yxC_6u7{
z+YLYFs?W%j=6F;qQbCJjyy32ks|}ZIaiyP!NWyfM1NZJ==a-86b?N<Y;$P{#w(Jxo
zy|549bsw$y=`yk!-)2Nz(Xanb@BNRG#}>~yqCOmzto*F7-N#Az+L@>Q&d@u<+w+;Z
zqG93+AUIP9w5@4$(fsnh%r)5inc02Rz^lZKx<&!^#c$Tw$B?uZPpK0GS$?N=)I2gG
z=Tl|1(Cii7Xy%-|UnHNV2!}0j36H(Q+-C^CcE{_mlt}5ZM8*&HLkxJ|po{=jMM&{W
z$HnX>&j=kigq!j$hdZeEwRXuJBNU~AS9?I4DrSBXz00!(d3l49Nn7SLY0==vr|iph
zG>NIc!7kJ{I=OPqhSWt<gJg3W*iNtZK77ITR>!&{zg9!TR#XFd?oINMGQ_7KbO5#S
z5ivQPJB0#zQONllaa{N6kE+Z$LdyM82wJvJHmcjUXbd+WnNq%&?g1H1lr1Dqi*Dmd
zE_nmK*}exbz8&ZvA1ZGDiMmgFW%!apl_S;oKb=LE79eLn>;ZL#BqJJw3)@4MrzRDX
zy-Mu>j@9k!MT?S#TMNkLWG$xM*PyP`jNDy8H^%?1l)?eI@_l<yv{p(K+F9k9n^>&7
zrsTGBp@$L#+nL$}!U6b@apvj|^tc#V;&tD8$*7rp`x8CWVn%;nZnV7kf7uf%{&PJ}
zyPIxHX$85~7CNto&aJrkgmd1zns7S)4p8=A^+kQ&=KaWdyDc92$ESMkpV7-0!>{I{
z|K0_0;GZzMuEaF&B$ar@pxKb==80b-R}2lc9I2ciIUAIPzp1Q^;ZDtK^Swt(i@`RV
zBXS`btrXs;VVvV^C*{>*K^6W_T_|wO+5>J+-B&hHV2;abYlgSQ0A2s>8mObUEKgGo
zA92pfb(*fdEZpyls9Hkyhowuvk|Pv;g|IvDaKWy0psk|Ep6|zSMaNk9)W&=`S#Q=N
z?Od|$%UR=X*B7oI%!jkicL!_k8opcB;&RNQiUGv`!$($4ZTW`^=Z96_LGD!RvzXn=
z=Bn|x8sL{e3CMicv-urV0*Xj)bm_<*P%>8Py(rMB&)Rj&Dd(rzl*{+?=pozoZSO3m
zH6w@)!0{hG&bg~*F^dgM)g@+iWm&XdxnB4*(KQnDjcgYv9r7yYV;7q=%~GyvugrF|
z$PE#z?pz%Ka^@9B=<Wa9|NXPfpL8wDi<1|+u~O5ku%z!Jv|j7&5Z#(X61f|iONGkw
z$>I-IH<)fCtTg#6W!5HwjA*AW>r(4;oG)$-yYng*&;)Y3{+sUW0M~kzBsHbSJ8NIu
zI_>*bqol&#NY!>hHUEuTYt_n|UD5h=Pe@flAZvHLSMnOT(5Vhdb$H!)tK^<7_4M;R
z)%DvN$xC-e|072G2tqBtD>@`TxqePxuAcM3P~TYIi7@R@jWdzGV%*`u2|fwCvUR&}
zsrn+qX$m@rT}6^sjam^s$m7~`hrG4d=Es%tv}ibh%m4KU{Z~$XIf$-E^7mq;;J~@Y
zX{iNoivsOIWv)kw!d8JtYzN{RL;2s&uQ=Tj<GH>#VvsCsL1PQ=^GYUN(Vw*NlcddL
zuBeE88A~rWJ!Kd0fRK5nvkZ`zVhuj6?NQW`N~fwX{!wajJM3%ZXyqDJ#r5kP5^t|V
z0SLHM`tQV%)vH<Uu7Hq#zTGF$4hCf_^K;|#agb}Cs!StsWaFgwufmNuuiXSxs6vl?
zEV?VxT3~~IJrZzRm2qUaCXu7Nm^T_qYK71dvRG68d$D0PIU7OmV}Qc?%yf!onY$%B
z#ZMXJEr(+f>(R&iF-{`i&00JZe+XTBrZ2iUL!-*iE*4jLS&QwGPtr==d~5SkPoX7e
z&o_kWm8>UQr;_gt)4eN9=~thRXQnUoV;YotX85h`7OU#|>^I<!e+X}l8>^GMb^4f&
z)8R|{4x9G?*Pzw3?*A|}|H{FPlG1mwl9yk>h`xy2SUZPT`S-Sh&FgV=pVg?aaZhsc
z3tx7JNiyGO)y|^H|8|GPcx8ymMP%!?S)W4{D<neh3)|c!S-E*F{zKe=&+GrbJ-cFE
z4H@RPkLTvDCq|rs{)oYT>Uv`-!7IhM?wu*}D90i?)eR~AT^D{@&4>zZV`b^+Z2Q)k
zo%fa3@zKY(Mm}`7L!>HVp`Fv_JXWdB**}iINRw}ThEzxq$<Py7(aU0b{2#Q$f7Bs#
zdxPM6Wpck9y}jMlWMo{uq;2fnZrMxQy1B^s+qlcfNz2Lr8c2V48(U|4Z^2vk4o<F`
zV#_rcF+nFgO)(QC16c$2Gxm;7=L0?MuLfSaW*g{it7<2PM6fVw_^bK5xVzYU+X(u*
z+;#O*^Vbymqjohgzh5jPgwXJ`yRCNR?76>EfZsHQ{@NEmKR;<d1!*@=2N^k4RaF^T
zc^P?mDUd?SE5Oy;#$U?SOZcx9oVE9|^>lLgc5-tS+^^8)mYa{arjU=1lbzaao7+ma
z?Cg}JZrLc<Ny*9C*+|*gDac9LDJa+}E6J(I-<G>2^tTSWxc|NRKZOEp4`fO0jHkT~
z_(1%co7-K)|L7UGhRnZS_K$k(&)Rs~Blca?QnHFt@~5uJ|J7|REvq2^PZ$1$PD5s&
z!GB^H@s|=1LooMu#6kE4{}+yfMt%$A1Go<0`Yi;0i}*mg4&eGN1b&P7K)Md#`Yi;0
zi}*mg4&eGN1b&P7K)Md#`Yi;0i}*mg4&eGN1b&P7K)Md#`Yi;0i}*mg4&eGN1b&P7
zK)Md#`Yi;0i}*mg4&eGN1b&P7K)Md#`Yi;0i}*mg4&eGN1b&P7K)Md#`Yi;0i}*mg
z4&eGN1b&P7K)Md#`Yi;0i}*mg4&eGN1b&P7K)Md#`Yi;0i}*mg4&eGN1b&P7K)Md#
z`Yi;0i}*mg{@3C9$F2=~S8%I_AGjyukBBr-Dysc|@Sq0&Y3TQ3yU;Mw(b3W~Gcq$X
zF)}f+upU0d!pg?V#B}J$AvSgnPEJl{7#H^u4(`JooE(2dq=AChP}4Bb&@gbYFtKp_
z_VGtV8pt^+D=0+hAR-M=_W%2cG`m+{#em^x{+=$Srvc_Clws!J4z--N^(<M}NJXIZ
zGPoJ(vR)6k(N#wjH?IRoaXb?!rDf^*8G0mUe@o>FHBU#7enitTi{@3baDQk+HD4E1
zw+^`bPDkvL0Z9KmvVa39y<!*G#jSh>Y(jj03uw-4`#!z<`YW)Wi}B`A+FxE^6Z?B7
zIbNuO?Ot&eC^e$$)=Qk%x;e@UHt`DF3HB;ER}`c-XzGsvyDG2g$jtll8`#ADM!zfG
zQ((J#m4V&+UDYjsgn6riP5hm`e`Zx2$Pp25SK%maSA*RkYO%hl5i^!t$=Izd;@-_d
z-hnftpfOE`8AHo00;l>RmRyZIY5~;|;BHn~I>(N-j|I=@>lr<gPGyRG*nLVlPWkS^
zZ@m#pygrtgHL~a*!bQ5UFg&Ou;59vd<JhPtRwMi-<S0v6DG-?2j2w2D@M|1ym(tKC
z%AASYK6T>w>j&S^^0Jg8aWtL<*wZxbnS>R$2V4>#e^`*P*UCG|A?76W#aGaQ<Jwr9
zRUct|14VKp#C$tO@Vi%vggxe7U0BFwFIRJY`e-TmN7`~L{>5ix<{nTOitr@lk~!2j
z&_vg)G=EZ<%d0!g?k;3)XOB0F+qG?PRgq&wFBM;yV{a5!V0xCNiWc4&5G~C=f5ZxH
z=f!ac#k3J4o0eJp_kc@5aCVZLKJj?69dEGtXUWGJ9!)1U7c-n^G=nyHk+8x-r9EJ9
zr&TSoF*xA)Hy6&Fpifl0+mp_^Z5Qzn{7z^RM;BfHQMz2kBTT*btd+=IyLoO(a6VWB
zoU2v^!&-OZ=5zlu%iq`gxWD$Zv!~2wiYI-B8B5?02`b#!LSRi1a$IRpt=RhU=3<LK
zb`i2;M6#XWGv+~U41|ai%Reywsyo!(^Q~8jZYxy%oUhK4yKU!ytFeitTgM|8utb@S
z<fyqlpp-7AaP1N8jB^tK(`a&g?6df4rt~(Bbv{LPN)n)NyUU*i?X5dT8Ad|uj*RI!
zO~RrB`TV+z^`uUUI&URL5=z}ApSY?Wl6<fSL?)vItFEUsj<>44E=S{61{w!k4hd9A
zup${ZiXy;W!ZS_i2gXjYuv(UvBvckQ)4`tmMRAXJvzyma^FW9)-_m9B9<b{TSxpT`
zPP`m1*#jQ>Q1@V)d0p6zY{T3pnJgYDyY>Cl(Qc=#2Q%c%^{Y)k`b5bLa6#M4yT#1)
zUr$lx>bv3IG-9k9i-7w(W4FD2kTRPBt)ovq7aX4JzkK5*KJKU?=I*Ms>iqbJK~k`f
zM-ioSOs*HfKL2&h*|9))^cUCi0J}Njv;#K`K0=-FCKU0ZO;a`NgH;aWR?!4z(IVh2
zX#yon`h;sC`4IJ~ha2G`sfBM_Z-022UU6gVY>H7}u?fT4z4}&7ce<BZ3?~ZP`(nts
z7+1+wq$6x7vI!1Me(0GguOaF_DhoByAw<MwxW0Ug{0bRT_;@n!_GwwFbh}5n?I)49
zk<sYVbX}9(ERE}aIgZNeR`77j;hIw$mvz&({08X#ZWMPOGX>6_xw!}kye7gn21)#c
z@)do<Ln!?&eBGlMJtrFl>#woM@a%gx@;tik-4G*dJjc;|&M!Ygk?WHJ;f1Hv9jS2)
zU2iw?Aeoe*ly@@`ykw!0^C<oa7g*CYt;UYJc=KZ1^EbjlnZBYm#L5}Z_?bOmJ`0v!
zdtwiuVcpFnTf7y`KnM;yriJnGq7RvmIMsv{$xE;rZsz?m5Hl_wIO(#yw)4e#=R`8f
z#Q0-=aa-Mce1yQ{M-3aoPLma~Cj;9|;r&k5vm2xxH~tti4dZkF9K7R@w$*918`0S`
zpZ5s85kic6NyxOiy=tN3B(SW738_r$USU_M_oqyVgeyYUBeZ9~(cE7bH2H?6*#qtj
zf78W6agi;{tTIhSF{Ywphj!LJ`J7WwdeA&wJ@Z-QV_08IrC|;$t=xR5CJUfv;Cm_G
zWLk5_!%A*PE2I~}O`_wB$kZ$wkI`_+r>d5Dpy<8o!=g>O$=dxQ(cVIyEtCT<;)2k#
z*^wjb@8(RZVHP2id7p%}tA+?<8N!pnijOfGGhgZ|&s0yx1u)iA$@9`V0_!qS=<?3`
z<?xCmbN{eOt4RK}yI1t}8PX_rHe@M2#%YVV2aGr*Zfcxf`nvsx3S!(lmrN45j8Haw
zmagyu7{cdM9uBjxMHt6%rk$BshB}78#pPxO2hWM!^&XYu9oZ>X8h?aUe~d=%0hR!2
z8fQon_gmNlCP%duiA37rv%TCC!=(HDc!S~ffJ7rt^e5}nPHJUwRkm*E+%XI5!Hdr)
ztIM9)$LXw_@X2m+CgNd{V?0Dk`UXObFqS&0FWPo$yX>)vxIaGP>hiG0KnECb<x~jV
z@`f`wxHn^cU=OxemD7eK&u<3O?5sEU$0s~%g<>}$A}6w7`SP$!jwiX^QPnw<WOj_l
z?jTg)%m^8Oj}U&;m4;J<$nn?n@8zE5i;77NATA$&ERcV_F<8)DF8&7Omm}2$=Z`+O
zFejFWV~FuX(2bV_Y`sdp;?`mz`7+++CTBP5Sa$C7w|8KxfN$)=Ly5tJSRnVPu+xe@
zY%qS(dQpt6Zx3*UaazvuuyhPW;?NHYzkPaqc?My*-pD_2(yPrFU`zO+u0{6_b!N9b
zgbpQDo7B?`KfI>0oP8_F-1OqWsxS=aIAJ0&;w@;xum`|IN!p%yQsJ0bAN{*~0Cu`4
zyl_ZR)AJf%2<M_T%O2p+2`W7L{O@o&f8H~ME$w(j$7eZp+c&dOV>6@Cg=AONk?bun
zXv{C@j0_{X;S9!M4>&T%kQ*O$YDhE-OFQ0tq=LtFJnw5vSn}@NHoIP5zRwrlpk2hu
z5aqMoIA>pe-bqiH_m#NF%KUh46oN1izcy1$L`JM(4x63QuIS5uRdoi-C!ZNI__NMy
z0Ao`^*q*~%?`GE8zD0=*Jge>RHrK7`k{K@)Ek!-(kQG2yG8{7if^8B4y|izBI?uUk
z-GCk{U%gDMC)p5#njRA7CsP<I9w2sQEHB4?vyoGEty+mbnYDhakbM?<FE(%jc1G*)
zRm<c&&Mm``5tG=EE_fL%0s(2U$?bIUn~djm1_~eISok$Y&lsVfLQ5U&%#6Q2xJjeZ
z&TE)t8uAr=cn@$=i7VRC@_)a4zRT6O7Rf|D?(4GfDqeMEx$w|Zm2Jl94?t{0!w{gU
zKM7wfA*kbArcyElNvSWf!Mq$v-y3gZo^hj@bg8P0@h_<=4M_pYjZ9C>?n;qu-&;6P
zkWwfq-)_i!QWl<2bcf`&gFq>hbZ#e*)a!@fk)N3ZHheEf5A!QuJhzEhFz~~BbWFoL
z!5|k}a8>`S6h{6XB&ov4Km(;+-mOHda*85YR@vjbYh`zhFLT+XEA3K&ZK3J#`0QrN
zz{XWkQelC;nQdb5bu6=Hi=BTHZZo15ovRriO;&zQN^1YeQdeNl`B^{(_jqTcQ)xBh
z(+*2!<bVm!9<Z>B=Rbo()PDO}9`jvu>|V&Ohf?;Gs7EMqw_cCY5jxKt)|19WGpALQ
zMq#4#Riwqo(KW#yj`3MXT626bs6Dnh8#otx+nFKyi%;icJ$t%5PWR07e9X23Kp+p0
zl(z7~wCHot#$>JSM(FNalpKLok_8dujN$MfHl^{lrgxnEImZ)5uIJBI7y4XoCk&iu
zJwJp>YTB?RrS9l8`=E!QEhd5I!o13GBx{^xiJLt~Kxb;Od!)}7z7@_;YP(acU648J
zLwkTr=Dl$yH<3~zUGpba^<O`7Qc~EJ$Zbs|a~00d75wy^jp(NKx<fbKf3b1H^IsW8
z61SXj&gD_C8&Bxq;Xx)%=)qMSrcqKcvSEz2i;OU`?JW5id+En)^)2P9&0EeG-Z(#T
z3Yt<4Y<+3WBp59Z?g1^;_nq*QEalN}<r*r8GzG2CE9O>(vcLc{fprhQ`#AG^wvO!q
zhPCCGkI$4_e4Hh|@I9O>RIyo-6mEsXYL)Q2staHow@)(7DFz8H>(1hZMno)g?s$1X
zQTqDwkGo_<b>#p!yqCG>;0)_O-IPj=6PMPQHs_-Y;KvkV>Ef5D&R^Ano7xGacbUpu
zp+OB0tTzg3dr|HPZj=>alB`C0ecuHK!{`rOvsEy?c*&v=^-5n{(=_kFw$_BLKqa)R
zi~8|^-CRq^)$s~WJiL@{@eD3L)Wg(ib=Z=W-8xh+`HFN#*HGE07yc7iJUV#fddV?v
z$R+u6JyR+05_mIa=#p?ArAt7f=DQ;Aiy~O#9>8GvJV-pd164EsK)uj(CLtdXKK8*k
z6n2FaxpSAyO0ozzo*RWY{>x+hT#HEQhi64U&%qnJ>kF?}{?aOBrwAAj?oS(AFrm5U
zQb35~zgRO_mTCn~y;UJg4%P8eIx|@)Y-TwKR>G-B+)9{0@QcT{e6a>SY@a%7*eyvT
zURAoQVC=Ilx(+LjJ>Y(1NGC<Am)#J4$feXkJB|etCFQC(iZOY{Nhh|`w<emLXF8v9
zYI8eyx@`Axd!1ySnX&1V^>vRXn1`8wEGZaQ1fT7+okX+DhKPK;8|Id0rsEkIr(%!T
zE%}C;DGF(Fz^(VSVs?=JSR;~QkwQShtI~MCENjdS1II0f^qU%J_rs^tfbAy1wV$yY
z%fDlKC*5#>=3Wdmg8uqr>9xQ7`db3mZ33l0*-9pvqh``CrtVP4S3dz!f@_&qmUGHb
zOA<D~xk6q4)b!2zyc0|L^oS>GV$I0t40h7hEEeZhgiw!$QqHpnRLj?@OxbcIz6x88
z2ecU<(-k?fR&*cf8b1_-gDvV1iu6e)6NYfJJ0T|qG<4q#4Zkg^VbiU42_G^%c>?M_
z!Iw@DhsJ^+Un8F-&a^B)vO*GFKjHFVheoVil1+(=8*8->7MWNcb8k_RU)PrE0OHi4
z#p1h{9$i|<__W%ofzFWZKssRV)$#47Q;s-N=m{go?k?DlYvSLU$w>1)6HVTIqgli9
zUWMwDr2G2*B9@j;kPjbbDxgRT!?{Il-!RX7SgW5+vM@-oapV74hUQF{Ibm$W9dK0p
z*4r6ZuW+&AEY|yp)$a1nUto*YgtML@awGt@!*dc%S`;HI#W%pVPiDgQ0K(hiJpf+{
zEDympf(h6~rir6#utR4z1}?K9p63tqrv}=8p2BKeqF24zp%B)!9Ul>*O6kvJuZ8W%
z`PVcKmDilQM{L6N*W~^fpYsjU>N`0FnxdShHc8i@feGi%xA8W{SLW>IBj~|Me@X2V
zkR^@FwuyT{qVsAAh0)EAu!+}gc8eIWq9#3=$jci)BuVnSQ5R-i<}c-v;-7opIl|<a
z@VQmf621%$?-l?wff^)2g#4HBbge~jQiT<vKXa1zVb{lb0nNIx)WpLtZ9ay<MVm@1
zQlheGfP^E*;qQp=Mw18X0%qb@TTtygnxvf#x7`;ZM`i^W2;(iwkzfGA5`)#zis+Mv
z^}j?wzh-Fv-2Le~%0Y9vF=nlZ<P5(LUo%61fn9NzZ9yBv5!&Zi1P0rTC?*}-19Xyc
zdjN&=2$Jl3Z^snH=9<Y(2GC26e}D>^D}B^+FWJfzeFu~k;bdZbJ%Ikvy<dwH5|<xB
zkM6c3p~0YBI`Mk|e->Rx7rJCQ0_kWCbY}VA7b~xhie4MN{bQk~N7_VGb<$n?bB0H{
zKx8jUf!MiGN_sJY9(>p`cBHRtuoM;tXDu5=wC({{ONMCQIUKWf^MBvOoRr1edtd&X
z))+@{i`b$KK|7M1VhF%ICLi*}lOI~Y$Ru11!bD<t5W=Wa)daETA{t98(mm^@>ghF}
z-Mj|pyCp3e>^J1A7<iP^TAof3vfqe9ttkvwiR@T3H$n+qptn?SMu1VyK*0}B{fpp7
zJ^4fMPx1eg;2Zvv;QurnT*i)<pcm!|73T?t5fm8|a_4xOTgGCo-pwSd8?pj+D=_6{
z1>av;5^)?RuhC7WWN?~$)lS$0z~E^Q;S_Z(fjpf6lYf_D*ih5NLvIpx0=fry{RLFm
z^&Ho9=CqZj9TPGeUuJ9w0Hw6<*P*uuu-FpLP#%gk?Ez$o{c!=PZ-nh;qU3f?lW2pX
z&1{-wy=M*I%%8&Ak3I9OlIvu@&w6b&c47^YjmwNfm*qAuLn&XaqzUB(h2OGJ?j)bW
z*$w6G7a3hj`l?L4K`qAIc29t@9JVBRT0-{H38XXZJ~MLaqdyQ7&tW=+o-!wc%ZB^U
zN6DI`ZXz$PY~UW^xWB~QW-}v&e?TT{V9|=n!an1~L>lf%+xhO={*zd-aDF*0(0U<U
z$c)NRCp8fBuoS6*TBTf4#N^t^YHYk)#p|iFZ%yxJVcS0}N=?5KhiHk=MC;C>?)cfQ
zFJ}bc$wX`Ukj@aq?sIT@wGOY=y^m5R27y96c(Mnz=`yKLowR}rT@_0Rzyv(6(1BcY
zfAvX{!h@ZN+&y9ljup~1(>;JV1gx7&e#ZQ}42Sf?sV#C_k$}J+u%h@k(sQ%*?M0Hv
zvd9Lu0X#gEZui^%fQkS2u>~0~g)i*j;)l5E6@Cc_MlTk=5(-u}D_56Z`|_kc&OiN-
zXKuO!-OF`2Gudmm03qfN-?&Be3omo=b%Ej|n+p64i!r?m8~5IuwV4i&NqlG$c}u%4
z7@L@&91i!;14rl~n?2xoay@j!{}onv4-gudaUgSyE#sD3<D<q|3TqjNk_{#%b%G{@
zrK(_B%pHRH@JXT}^#F6>>2r^M7T;FY5TA_!K-=UG-9O$6`>XCRKsGCLy|nMw2nzlr
zBXs<9lRYiMAkxWAicO0VB6#uw@+$snjaU(CwG%!Qvz5FNMBH2$-%%q=eb->eVIGdd
zDDAhO&(ydbHWf)tWu|+k5%z88>u9w&V|F;_tTmw@NsavV5d|(e8((4ffVeKY_uW?4
z|6PQk8y`qQqX<yfZ-P4ZZ=~OqFVPwVnC)xfI1H$ukbhNBe>SvOcR_EX!#HzDR~;l%
zi%4zg;={&1Tm7wg(C^KS!i!$b6Jbv@@G6?Hw<HIA=Asi(y&hbaay_h-%5yGpg!3Wz
zovVW%QF0whS*~=PZXF|Dk2rc<OZgqOUF^MD@tqh$aEyk*woYaM0{E6As_?BN7<J^t
zn@uz<L=kn4<l-}77;VK^{fS700d3u1N;@X^`v)Cj{6#D6Jr66oDLs^H$LU@)qQ#I8
z=T3^nQDjkfi2EL_WfmvOvC2<O(#81U#n}Khi6gw?aw~V$^#dGZ03!y-t;^e)JTC?{
zgJv>7wWVS5HM3+q9s|bL92~&8CQf9skgZ7Q4SmIN&0+tf29eszn;Pv?0&Zis7rV8t
z^NAN-_!E^Ms%adv@+VUS9F+7Uv!J@Cqx}4<zB)gua#U$La-)bOq>2P@0p0GS{V|aI
zCqv6`k!}Y;vfT(LFmKaE2?aU7jmk9WcZvJ*Nq*YXHbab>@7>JNhF`(DO-Q~yz}YZ*
zjBfX7ttHOI!t%Km2}hW>>{)PISJ%wClj3>g)MiY+Y`X;H(|KVLI~JVo$d&ye%8hQf
zBLJHM;>B%XV9c^D(B7m`t|0{xvnQ3CoQtSX7YNt;mZLby>V?+YMMJem%D-q_ZBS~G
zOi0WQ$Ty2KWK_Eqs<a18JVCN)_z}bKCh=s8B7bK?B1_9JGoN^YaX0<8Xrm8<+VROY
zLl$4o8c~IwXF43;OX^)}pC8v)#coa3+3jYKgLjlE9T+C>cxsZ{<R++zKbpPSS{iOn
zVK_eB1o>$wxEivm>>5M$7N-;VCkh{&VNIU=nH2AfdW9jV?KSs+ssDR6Hb#SbShxt(
zQLvq~eYFq&_t6E#I3fo(o8YA~p$=Ud+U&!YXU}fCmfZEVoK2NX<|C>IP8+Ipz|PDe
zj*@0}%u$yJ$c2{4Wu`kr3qy2RSfu$<rY-v2;3D)|NkVZ&?RA?7JtLMm?W4kiZj6Y@
zWN?rQHc|A&Lg^}rI&r!bouZCT(uq&wP2-Q?Y<0Yr{_K+`_X~?qT1B`~&ux~FzKODO
zv&jsG!lO4HJmUR!>#L{#<sJ@5(H=nkjuef@#KL)M(UYS=P3Gt*SDDja^8Gh<0<F$o
z2#@&8jC#)ceau`al!Hpk^l?aTYp$=M)z(Y~vXgUFY+)0>Jc!xwAz9@3i#4IS>fpOc
z-;_Ij$3#Bp$w+g<<OArQZ$7K>HsaJ}l>GiD>K=CeRc+qCi!dE%YUos>YzYuBn)v^%
zglu;6DD-iLAo}1r+dt;$oIe{`{5#r|&N}YhmnbOW-Xwj-(43H1kI!@4kINUUZakfd
zZlIG0Xmt;SUv8wdVUE`7PTCWdo8Zwh+@#!z6Sk$jOI*_iSNd9Ain6(n9u`#&l%vM`
zUmpI`8Q`lm7XpEj1kH=ZOoHtsEVAx639-_7wWPOJVKV5slyCpFxGwCBdF|IEHlFB)
zt@KU6I}293s&8e4*`LzV$#$fWzp2#af1lL;D}%8NCAo7Nb&}X$<MPViZPFx+teroy
zw4%5uX;oHLnR+MHr8p=2Ib%K+8p^P!C2&ti6!gDJ0UXb;=H_v(Cfx0vS;)v~EMAK{
zLycG-PMB#7%#23qzaRQlE&7ZtV{Ut~F-O14SM_)l0O%hEs1snI&J$Zqp3EGIpIid{
z()2S;HP4Y@{ijweS|)2|1{F7Lz)<H2uP-cry~Cd@VV3Iqd2}})W2e77RMUc>4!cT>
z-F9o*74prz-X%5JG*;d}D5TfHOEF7dJLjo7v5ui%7bVG@dPI2Me8^&Oc(`7wR;FO6
zKEZr0K_W@e+vtd7^R?r6>l7`XYxS$bQP3(<0Ri2J<gJyi<0IBZ!=n_fOLcuVRC5x!
zzG|KRApRL0wXX8B>IHPefFy&nc&`6K5@-0pMMg>_E-0vT!Bk-K1Y3o0er$Lc6{N$<
zs<e{NTdYi7v!;v2BG0d~B1I`p-%BX*r*6|b;rj|1x>oohjCxsTD6+I-;i?BRs_qc!
z^Umb*y;tZ{%MH~8g)#bu_LWqmT)WsNFLw&OCgjPs;*SaT*Z~L;Iq5+#A9Nhjj2=Ka
z<k4D6o{cbgP7Lh<f3afa^O)wBx{G}F*Uy|MW&>WJ3L992<x+Yw8Sj#vZC}j^z*mn4
znJp}Lm*=->p(RddU3~sgIaHPU5PgdbHo};g+W^_bOn%H<3N<8$Durd7KgzFL$@zL>
zoW-1cl#uah63OLE9KGYmB$_wL68LM&c=p5LvmP^thMTJxv(xRLmXwqP%tRqhkgV|v
zD1hh?R(7`Eo2iq>GKb5F7n_uJq=fgKE+pap3vSk|F1iwj+^n@J*qeZvcbkf?+mvhQ
z=vSdE<JO-afeCzMIl4}MyPy72SlcwYif#!eTs(wD$W%^<BUd4PTT^)9x0_U-5a7v8
z@?&s$w4rhXJsW0{1vBh4)ShjFUtI0Hy5zWD%O1e41Mt>ip6n;^5o^u-*5&>5m%@O^
zmY;h-%+AEqV{s^Jk9s5YPiWq1d#mo{C7Qrs@a*!IIB54QxL580J+wty0+|^ZVLb_m
zXEPbOBi;M;P()(@<zTeLeiF}u`VFjPg9-E&>=&%U9>(c5!|tol#FItY@wwIu{5Qhd
zvD6NPBEur^(Gq9~(t&1e92b21X5TMp&$a_dNdEy^uF#>^jtt-LGJMkUUu*qGQ(zsH
zQz$Kxa42Z!HrgQ6d?*clZ5rwB=g^F6+hFVCD@NcN?+4Cf4|v036{?AR;|@~RKLpfm
zHKx*&Nq<1`-?8v*58&KvkUhgks>6qZrrB3IlbPh$xa^Vtq76+=x?I6vg-%Vws(mc7
z$p#$2{(at0m$hh@i>A+k34BTp7F_=c=i-Zxf>XOQ!ATdzu$M4D)T-!`<LEnaR+(0y
zLz;2{)&Se#lSZyJ<aN>Iu62NPY&YrdI;1Bjp&9p~KsI;(OQrwR5?Dn5X0nbXN^hIV
zHR8?z-wK^0uXrT(2HpG9`e%MiHs>d=!bxW1P+3EA9G5D13(%%TIz_6aRPe^CPdI=#
z7P%ci&jDGSUt7GCV>N!k9Jn|TMj5g$ZAO30Y|um9PD81+W{YOK*x2WOJMl;?@kh=5
zro^c1$M+#q(%MIh{M-sq{Ger|L|A%<U*-$xusU4PBcmk6dzt8%d}T)wCURNmi_0M=
z#`_#D8CHfjgT<kX)^u|w)iV*L!6sLFEOdL9@4l?8?z#BN`s3$c`Q2PYH8IH*zTKOF
zjPY0jn(n$|B-feK11=$bHXip+Rp}k`IhJ&O>dH|O<LmbJg71v9CoAxCx>_o&DtiDD
zcgc0E6f$K+M{F&b$UJOLqQeeY-@#-`W|3Yz776?gn6?OVU#ro%Vn$0HQy|2A@*%u{
z*?TtQ)d!TvgbVK^SHx1OrtiR_F&7{^G%XU|8=+^Lc;3$YW`2GSrv$x*N%T0`lgQPu
zc&iCl6jcEFJ5CaR7E~kYtr?EcuebH07QMAvY7?Y+uEwhHzM^^BoF(vrs6Kg*aK8Av
zVi0U`58%0jOV#VbC*LOD)L2Epp1282%N*mgzeek)V~dLiJ*1@5-!_W!t@i*o(tzpE
zc3~xPalAtPSU3aX8e7E8#ITW)Bfhu69|nnlHr(DZ>6h69D#*@xPh3oH_-`x^zeqU!
zAX9?AN%nlhlG#HA1%Y*cd~Q}2<4|}kPei7dlMmxJXGIr|4{a}>bj8Xi1Wp~6Tef~Y
zUUICG#I<AHzN2wNPja?eH-1TJmsM5%qHjUg+<QH-!e4JbBA$%}4b1aB1=xF(R?)47
z4>H)mxZ93~53CO@Dyo~-+P))YxFD8IZ)|;UtftM|DH(=W#ZyQekOfNSRb8v})p+dp
z%FIV&Dw(+4hD<*H!tNvbpSaG|V(y>XDLUz)kcDUo?eJ=Kp5AqCsK7w#xX6m63WrHF
z+nj*ZbEEG_oOs3_V3IM(e~WkK{LG_?#32#J9@>Yu9@PYwXu+K)h*&F-U<UkQ4y3d~
z3u*`NHnay^Ah{Lz_cYZB5~f>nA9`2aw4~E6uad_^XII(s=M5!<zWC10_ptQoMv@=&
zD%qEmI2qtArHtDQFaI^phSGUm_Vc9%J413)KYfm&XR}0v3${Hnmg`sql@bs%jLH_f
zm!!HeubW#{zd1`0UM-41<K+7b(6`(M=lW{(-};AOn}^~Ob4ZcR#+y<s-^ya!B^3g8
zZRO=fLR@vJV^=LV!`4}ZCn^?QaF~cX*uZib3$%Gmpz2d?myC~78As33O>x4ANOHSy
zZhD6AG)q|7B{d_uK7ovNP$^-ivi5yG#f|tq;3ChbK+iRRC9fB{+=^s2c}lhdJ&9qU
z>jujdzbkuZ4Kn+`d>7s<QHQeXO$AL&gw&dJjw$YbP%Mhr9*-rf7LhzB2U>>gx>%rE
z<yEV8L`6U5D}#%?H>Yz(;uw?JPU|UM-Ka>Od@){+K4j_dx?@N%c#VlrVzIEQ))ScA
zwURs|SY9r{>Dgjw?&(P<&guyu9&Idl&bDU{%*PPb)j_L8a5s~}Kj;s;_I)kb#HhQJ
z7+iEGX7y9Llw-~}H~JfTr^p;8E}=<=nkOiYLuE@<J7OqR@CS*imW!nG?E<8wMWk2Y
zrTVK10;cJ!8C&a$kY&z4LHtjr-`RCVpV>|oa<F7HX%rt#;XxfGVq+HB>}tBboxhNT
z-Ktlx>}t>jQ|1|s+jZ9cwY)(DUL3l~@S$&r7*XKm<X{<#Fy(Oc@_NA1vHoLq-f{k8
z4E(RO3S=G2#fK|~6(JjSJx{n*UBd=G36Z}7Qx#h3>w5rn%cL{o6o_(W#CGQ+4b@3F
zY{bmJ#2|&H%bVA%dBs0PJz~)1_`62CXYwCUP-Calm%cgr<0nQ3T4WBl6p_#OFDVvL
z4>?9-8hv^CzNH_jsNbqk<}#73wQ_%SN+NyH=s~DT8(kF`ala${i}y^r$Na2ydWbAB
z;-$}y@WVwD%YnjL0CD1s(<^5aug@}twW=H8H%i~4wzv?1dF-E3-S{PKn-Hf+7P%--
zl36Z_iin9~tq970?`&>W@^;R8aFfTp`KSArkdOdvwguA;TyhH#a^L4DU(CN*37=Hm
z)f^J!GHTtuNMSLn@P9WVdgdJGdonyo+a~P98Z0}gN)+(%oI^tGey@HFq!P}J`%K4)
zdei;*aJ0k!8wyk4e`<3=8C1RznMW4LhLf6ST1xKl=#r1KkI$`+oXJ0fn&l(b9#%hl
z&-`Zh-OCkj<&Mxr+m*#m-Z;7{e0*<+H=1>63^Vh~d1m=2WnkN|>4jHo2rrQ?QrZP&
z_T~%YF9f`-k{ub1uRJc5(Th{4HfR@0xM;zkp7#~)=yqt#zXHD*hSHqmhhYZ-T~fo~
z#?{>+0;3}Vt=m6;s&tEFh=m_DIz7R4la*M?(j*9X$V^SHe?1g{&lMw6XzLhB-IGOB
zBOzGd)DX7*S_2mx5pTik;-d8E^x9lGvn!ix!EJ}z&u)k#rSVNvDg~X9L)Htg-}%pL
zl@rFJ$Y*>eWy+g34t>j;&o5G|^kF^jar7wP(w4g(C~Wo9o~lP%gLfsrB4p6*+WQWD
z+i)!9sZ!wD66^S~t%|Fu9dqpJFSWyxUqk+d0$2kdaK6hfx)gxL&)zU>UFI_+^h|Wd
zX_z=ZJU;d?+se9uH+UdL<yjZ}R=tBNUTK9=HZeRNI?e<R8;cSmGHM-m<l9>(IN`A&
zI<M1%wU}i!$E%fae(^AkQ<zTixzW18ELd5TWUnn|i~OqrAD1mu7fj4u49+2|_SZV$
zFo*no=Q>-e?fUWBU5nDjW+*-WCYSP*?ikq9x@jMYi}tLO+LOpf8Fa<{%>t^VyrOv?
zw=l(TqMWG>bpn+bCf1iHpQi;J|0pTdZq&dvRo_&Mivti5aOF$rPVlGoDA#HX`S#sp
znXM<h;Xn2OBU0F8@a<r~bKgy_)qbe1tJQx#tN1G?u|TNG`}S3j4??mX3yW8jp*JEI
z3j8LQA7D5Y<5@|YSF3ANCkvw}Cql<n?aTV`i8-~!I!GVmqc3l&(|-eklSkt4Z~x66
zpYIuX+8e&*@kF>B9Z6{Uv&F#wf;KAN@|}5vg^m!hb4WF0q@SDh>YCE(Civ*jc~=bA
zQH4jckiwc$Pt~35u@Co#!-hr}GMkAbhf_N5xIY^eok~0}D)rz+-%n(za+I5f1#t&N
zp2!-BpZ|2~OGA8~r6UIBx}<>*_VVI$OfqC|jD35|o#_Le{}3%M0anuarG4Vcq6V=i
znSzE|)1VxQCih&Cwl(?~`gtqSasr81@Tj~zt@UM~j|HtVeT8v}LdnHK4tS~u?jE1<
zOw*#w#+BY$pDYW1x8{&z)kLo0TS4@<`_}tSUffWNJX{$Y8|R0r6iEYiZhBVn#o`f^
zCx0>%_|ds-7N;wpl&3uxu*T28`4joC(=!l=&h70U$ko6T*9yh(esN+XCxQc3K(bUP
zX<4VcNM_<u-TW86i@#vSHx<%}X<KM>g3EHfTR7;yMo0~kGMfV<AKva&j3-?X!}QHm
zMw&)!eOJZ%i}*%Ul{UQkk?lHuw~KCwq~S4;i`i-;&(=g}b&4e}cOWx4S2rIa@3KhZ
z>MHO^R%Z%dWf{@;nIcrHOR5kxsA0h0#8Dn}?uV+Avhe+!W}_s7>$?T7N$-3J@MdMi
zQA|%Qn+sip%(|uT?Cqe3)BXj_nA_zNRMRolr3;?fKeaB2Lw_}NUUE`qsVP~RNZ-I0
z1RMz?j2n=e2r%4e1STlIfosF+MWCzVqbrk*x04s%XO$+N=X)owaC39(c_KD3<Z~uW
zs;ubYm^3LIx9ir7txwY^%KDX39UmnllifEH^`xrKR`JnJ*-heEU`jH5MI1U`SHGdj
z@^1ofm2pjyk8kxpHc1QW)H##(Z!-tDzWm>q8c+f%^F8C_sSxiMvM}m3%JtRdM;PZ_
z@D7yW4O#a`!;uWB@6-3Z`DuryK2t>Frel4J?wqFbJActr`n&Cr-Uy2!abQw6`c2nb
zXXF^ya5m~Jk@e|7+JmkOqSi1|p+pZ)v+LTy%}org%uj7XA883}j>Jt&bXB>guf}Nx
zOM)*sV8?{~LFaP@lG?58(N&|E*2z6U%hGs>H#+=_d2Rmj5n`gS>m|h1<gmCqNM_P7
z&h-WH3-0y{0#hqRP;GSS<GTW<z>e&9XonFeXZB}JQd?Wo)VjsC>Jf$<kZp!uJ5R}u
zkmGwm`z6xR{VAc7lD*v$C^6yPDQK(dKQTk3<D9Vu-=vz#MKSMNsWW@P<=tDMnhin0
zC!Dw{zNdup;N6Ro(N3_TGcDi(5`0<G?Y;n~;fLIN8ZMsw${#yjFJc_WiVFvsY_yzk
zn(8L~SL})evM6OM@njCKjer4!I+^WbB_X)BLO_*Jp3C|!N7uFCc4|UWYiuBY&UwvA
z4#@f)fvug9(2&OkaS)YnUTD_kvPIqc5P>S9RYn>J?}vNq`YKaqhk0Lxebez#5e{+J
zxkWZ0WucCgtDTFt*~Q>BEj(w<MLh40#?oduG^}LBd!{DD$-jrHzxn_nDuXLK!yn$k
z0!=&U*0j3m@80L925Y!s8l7R&7T7BNHr=DwAuqZunDllt)|{7F=4DC!)}#w4gGrdQ
zfr*9s%LF~PQO2Q(rHVM(pMsGRA)fjvwlxGS4oyDxkHuZSOYq+oca8tFxKmdUShM#(
zDiGEms*&1&Q0YgpPwHC(r0wNJlg*=+bj=Gdh58eyT2BCtuZP5sC)_;${<~mfTk-by
z36HLfzI*t1s$b)W$jV+Xoy#$IK2$dF-pEO)PG}eLJi(kKDe>`R!nL=8#2fJY8Xoy1
zvF4OI2yuOUsA%T%F99)oh9jk+N94}aa!Pk!OVPe3ly^RiNvIVv?<s4fibYgz6y2{o
z9@5P-9t}RWvWC^cT9oog7Fq#HszzEG>ll|K<W)hW-{(rWOhGVS6jeS{<r;Fi6FI~0
zyy`q*SdZi*nQYuIA>|Qubtuim_!!FZQEg|t9+AnRua4$lALt4l=8-oS;ajWVfNYF6
zcTzVIC2^h6WHAq!LEM1i@C(&9!^zw!^3Oxpk1pu0A;G6Gja)?M$@mB0E1t4M!+SkV
zX>O4|ET>%1Ep*q)D@%KieXz4T;_-5|;u0|%E@vb-F~hk&KuQybPG!beLqo(+#-xXN
zD5I#NNR+|Evj^5NZztDVN$Sm4tAAQloD=2>eR?`o;7aKEc}s^Vq<tkB{dT@i1I^~Y
z+#n!Llz!o=#KNUJfRCY064xj#SLb-CBMkR{Cp~pjDVS3SK)oQWPW+$tzB8(+ZEZI+
zqkx1aJwyQkr6|1<5NXmBwjecb0cp}ingoLMUKJIjC?X(2M0zolgkqs9NDo0lnk0Z>
zAtC!N&%Ni4^PRoF@qPE+ajt*Pk2$hN#!BWIbFDe&`##U}J`0sW=cD&)^2Z+^Jwd@H
z@vM*exWFjRAM9*%9o^Upgzbjz@tw=F4wYYb3$mj_X>3Kv)I41U#8KHOZom;g<^C}v
zeF1pd5YblE)|k4%U)Rub`cS`Xx`C1|Z{Cz@%8!&32n_X}3n-@iMIf>Er|1r{8YLMf
ziAY9qfFd-l<ftKzmS;071FAC*2(@=-pcO;ckdODUbk2@c6fatwyta^-g61d^8AEGT
zc-8sPr8UbMdY(N#8Y_OhE*)0b8k#P!mvRRRRmawJfN&T>0(}|a2Pe=FK@LSiyMqY}
zlKHI|fGM?5R_Y5CDx6Sq&bo5#E04gr&fe$hZw4YfdYloU4z}a^yr37?*Ysy$rOxDr
zjVE76od`?9l{cmoV2{dOe<!QO_WzhD8_^RP!ov@(;W0<GWuAI-zR-+M$oBFGJnSA&
zll5Hl8<btC4O)I-p~DT8*t*<ZJlWSt=cUNyd;M4{@T53T_nkI*PQMig@4xUU2G(%<
zfEQx-0R10R>hCLmk%U8nkv!TwFakCa)F~GdIy@4c#{6zA75JH7yYkkL>4QftVweUJ
z<A+f_*c4qZAefBr(S=%Zc;Vn5lqVbhfbhbtr$}QL6S}gq?0XXjXSI6M&^>ywv5)K~
zP$13<%ZWOLW(RMfD@neZqA6*06v*wekwwAk!@r5;$uy6|&RB%?_ocox*Joqmjt<cL
z=$|oV+<PWfRE|854$gwtX4ZhKZ~63`ca%^)MJ~O{TQeE|K>o8LjY?7IO4qd)G~<b8
ziNt^pC6-c(FXq;jL-UYnsz_Gkg77#^hxA(JVk7yTkE%7auE9M!<!$Ddiq$(eZ@Ya!
z@+c!yQT5e*aqT))Xt~UVxy*7PX2`LH!tt0S<1<|l+?1vJ?g~-LtvXExao24*7G-xy
z4=T6KlNKHLoo+m__RnDIMES%&5Bd(n2bwwA9LbFn0gAgrxetoqK^84z`Mxa*$*!lG
zVV8;s7=vvT?F5DoCk+@cOyFU>09;$+84!AL?D_=lVz8M{s>M0sLq_@@LI|x}n?efP
z#~3*{0#VLCF@9ZBpp1Nm6xxGL%I_e2E!0*cp;+FY03u3<v~7$-k_?;%Lf&41HN;%I
zd>x$a0Wg)X`Mc@5iedge4%b_Jh=T1F3(htsi5o7$W8c}-{dX+Ra}Kh<G8J+<uRq1w
z@wT&3!Q~4$v^FH{<R-~txoe(t6_3wriqfF)6WEY~Ghx?<E+$)#&YcqL>6j9U3r9B_
zQK5cIbXm6u)nbrBX5JmonvELT9S^A%lLgtSZpUY>o#m{K*J|aWmA1BB77QV9^`VLR
zKOi5+=Kh&`_)A2;5_m{_5Oor*K|#iSJGrl|xl|BTt~`CymAls_K(tgRiJrLB0i2`D
zp)Zj$@z?~>Q)JDsAPe%h$Ik|^K*?&}1YdrfDtlVYpt5er%AB(6(rIQ95fZNTZ?pzF
zn_@%sAb{Ef1m|~5GFgD8Me=8UUyIfmsgX?ZtQir|c|UREh4GIok|(W+1c*8`f5$hX
z7bQSbAw{Y=HjpLePLXTMO1q`VvxNtD9TJi&s`!F%_Q#}>6MD4V`(++oooEJE6W%xb
ziokx)9H;-$wS0I@K`G4uh(D7;b1k2mFYS5e^_<)A)tsP%{q-Dy*mwLCehE&7n$$(;
zx^romtKn$7BiY}eIhLfB5bUU9XOXey*m|qwe73F6H9twW%buy})B1~W;o@rPl0ydK
zL+nAOY>w~5WR%U{_8bvBZL$^<n7@tuCk)~*m>-UEl1I$%z&-LZfzH!>f1SLR?>qQt
z%=b;2nV?1I?KG<R4@5qQD6;_#6yt6r2TmZYymMt?Kn>rTP@vjDihb_r<lxQuNl}qa
zeRyxWJZvQMK?P?(P|A-rkiTHqvg$c=2g8GtL!bXhmpZ?)(7_GJUxMLhMZ-z?TJ`6I
zmV6qmJ0oB4a95q(y2}(Djy*ro?nTMPN1Yy1d53}wko;L!^Uou_$#caO_IS(_@8byp
z?l~r-Br%TR%XC|o+LYbX(oCe{V^KpGZhu`4AkkJ-q@j&61j23hz~CZY^>*BaygZ62
zpB#Pov-rC&-D)qjLe(g);*+5GEe#<cg{x0kqhfG%4PIRC#dPjjm2nnz>*-_3HyDcV
z=kKygLsojDaG+GM<}dNhzd+mIR{z?)KQpC+kuFKI2jZ(}R%zvl+Q!6(=TWd7uV)Ei
z-xk5W-{%z@yECKf;VS|kjRiHZH-M^A5CE`SpI(#`)8VQg;`PDty{>l_GJEg{A1qg3
zbYlDN`VHMf<Zy(;POCK2dz2o<E)jD*x70Jl8}0ADpy2A^U1!vcGqcFX=391a4|ACI
zLPzU$Av7(-k*Fhpco+R#mu_yEW7n;U%ZXFk4N9uU)`RvL>Tgu$!7|G~Cq#e2P}xwF
zy3W!r06`?okr^{;Snz=AvZYt!=iZZP&xkrk5nKnb0Sq~w<^Yr~AQDmR+BU>n<fVdz
zSp$m#6h;4|T#cN{;TwX2ERQ_V8)NOBeeZuetgg`$8T^x_+mj;Q@0?GbKZX-qo#z#k
zS(xVull+|@KP%+;;AsYh_ghQe#z;~RQIfN)?`{dV=7c>7HtB~%tZHs%A_y*8WMfdW
zTke={U^wy}Al;Ejfyv2b^hjSa>6<%k)*^5eWq3&s0ixdcv%r2g<`oQ&W%v3KRZ2n$
zd~K!Y*35a{&cHHce9)I%x!RQVGTr9$en}BZ<rCU`m@XGJ+9b+NrdE?1k%_l;i%D!c
ze#->S#H4DhpYQmb-oBh`bYV{8*%wila*K{eIvYYnn%-L~GW1RCAyC!fLxhgKQ}aw9
z(+XmsA3z$-=r3rrZ*0>mW+tz3ck3b6q95unLX+s@^Yw-0>viOOe?h0O{By_eZ*Wab
zPfwrV?2}nOs@yClK}_2{X$REVBCq#rE0L@YV>5TKJkw#D_7p|Ap55e9=Qhgdl=K_P
zDS@VAS2QanTD$~jU)vEhMGHy3e47_O?(9d}nYX9mgWN+o^8`!9S)7Fvpu;IZ%siEm
z($tAe<8zTbD5PPe;%G7zsGft@pa@Ehk2D=NH=JNC>24cYb%LXv<YEFrgj6xiGp$+9
z=zuRrmwA`;!gsl?2qh-qs%R!ot|;Mle7us0M-;!c?I8cQIoY^6F>t@nPW2``4|6Mg
zTV3Zcvz{R&^5#GWZK=g7S(G2tilk}tS9}V~eM9ftqV0L)`%6!)?~UKO%X#hAX}B9R
zuRA;J-6fg|aPKktDw!{R5y3X|njZhW*S+E46SwOZ*V8V%WWHM2->J>DfjSd4ig@MG
zvpJQy$nq_d1WO78f{5DJ=G;bhE}x3Hz0d#NRM+~2K^5l~-{n08L5&iQcU1L*D&@_L
z9~0^A2=~Zv(p@rojio6~w|AtgYFbo0yI+_)z<}`(^J6sOGE24&(_$YjW1gvFJ*fC^
zscI`W77WyAr^v=#sxEwFzf``|(rNZM{re9Gl_YASJc90QtEoc*(>U0ug%qQ7;($L9
zS4TSP(06WR1MP9D_~M5e>$y4j6h}=e&iI6#?AubQoX%inh82)7$AYw7!ZYAZ=AI0W
zY3&b)fgDK4!b|Bw1;mbnW1FDtXTic9%;7zmK1l!lTu?|I(TM?U>@k!n<U%Em4SWZ*
z3mHM@9Y@hZqIJ%tY=Pt<m_z>o*|BSff{JOC|MWlqjOAMW`IlI*K4jm7Ch<W#kbFA=
zmEywI64TdXGvoXYmgEcfC22}la^{?q=suNWG8Wi5v#OaT&9s7|DBFSE;%(6Upd9%%
zQMY49aq|EDg8$ot@Xs;NPu&za&i^=e+t?ITeP*|XK_3EI-T;&l+aAcGFYENoyaW<P
z#^=PJcxNAx5HlBhD3Puvauh^B7^8LX{fuV9F*FsmF3b;^xqsiX|N0sG_bvPPEc^E?
z`@dTlw_$)cDi*P0hXxCL<Nv-l<PtkS)11jtbl%3lt@yhCCoBG+vD(XjUK*EFGC|q~
zh7(6$1x}WSS@pRHQFI@}Qfg}4>qi<QB3i6Zl&to<xut)KpZVIB)N5}XsE4?S^)HiC
zxgwgVZBy}g!xBcVq`WW}4Nh%8)>kk4k7ww^yRd342l+UzxmmV?;&>O<=4Qm!loiZD
zY5(T6f!pr94230MHA53__1~Hmw`v%zpAI;$8gfzgEa8Ymydi`k$BOB-=LqO_l3oOc
zRZP6M*{v68ONE9IHN>*A>%WhbF-7%liQ?KHeb&}|gzf$0{ahQG;)P5lbM#VDN|&x#
zK{!5>r5XG7)|{kzu}NUDp9gu{_9OatQa@U-tR8mwqtMklqc0$!+ofot8F%@b$$oiR
zm|aT84PRGM!t3w_*km7GhQj06gb~~BxnDC3gkEs{ScHx`NW7SxTVygg14ktk*IE|y
zr<8!zej6c+q)mbed3biUHf~J$y+~YL9imOIyp+>2>MC!p9wHkjjmn%C(LKGQd-y&9
z)aE)xiQjuVoYA85v7VoL$mnuAXUpkq#oj9~1q7t>4;~$4W85=2=x-71zBHAmzPNGu
zm>C~Uyu2uN5pKNedNJe)RV3$1t!{^8s#jno9&_?u`~8>35@%<Xuf9z_VI!8%`<sHH
z1jnl8c$`mtmO8w7v59Fk8lJhGuRss7d=f1T9=W}1(GZ01As4Ji^tg<knwSa<A#FA@
zZ~RvO)4}FfwYz@MtgsTs0&$M$w9Nj%xn=}7Md9r*GCy9*k<yeTcgx3CJ>k%`34TkB
zNp`dC{#o^lx3}*Mpf@;5DT~zhOQQGSyPe2*?mJ)%b7aIg`Mal8SQfhRg0aycIY2+a
zqy%;WnBBh$AQy}p5OKFsBZiWY+~q93Xu*Ygp4rV_{lWz6S!3q=d(od|IABF9U?qzc
zp?K>0s*Wo)VdB08xpzUX3Di!o&9vIbBSJSOPKOR2kgJ<?SYvEozati}FB7dMchBcq
zlCQ@fy*suYUisd<scB&LS>dbqrV_;8^(CT}TpmOxh`6TRqE#@7cSmuKw8NfEc}^Y5
z^?0&ko4prc|EV+EP-yv81%&6rkNoNXUFrJwiq~K9nG?;I8!)IeplB<=gw=g`AOp&D
zQxT0J$sq7zMnokF8BX*RCx@L@hQa!Ugq6mEzl^*OD7nFun`I*6q0F$upVTm*g?CUH
zudG5%t=#r~vX?tIH~13I`UfPuFo{(uqhTwfeBUl?o*Mf>z8*H&flscqm#OE+aBCYd
zbv>El!bcWf@o&w7&+<bSPro(g@hpBNl@g`8W;-W!@KoCbC&gtVRSRGq_ftGop)c<S
z>?+C!wq$6C%O8!jD_!ApfTzFCEA~1Xf3Mw)UA2*Ncrl&-$@2;b!!Sc9sHBIIF;pe;
zax~3E8?1=8+$WV2CJ%d1tde=P^e5Q77*5J%ahqIfPRlwBW%?S=Ec)R4vht7r=;HLB
zxjX%9zc*jZ?JQw=S{eYilws?|sSM)~fZv)kMY7iYvFCUxdUs+w$yHBd)#S}$w1&=|
z6+PjWou&5j#ior@MA+oMX;U-BukOpLovUV4YI*sTiZJyvOAL!vfrM*dH=~}!_4afA
zQ#_>d4bwo?hh*3}6e}S2=;=JK_9f5$jPfckY8umpxr{|Gy4)k7oy{2JD_9JjeKi-z
zx!gu6C&-L%`?tH3jQ1^Queb)gZ=P^bKdh9-XvqQz_YSiGOT}W+=46}O03LEpU?yO^
z^TwDa;qU@l(p(_WV8l$j6B_x&{dg?Oh;|Z?B4Y2*MVz5sqN4S~vg+YN%(HU%oo&f`
zueNZCY8p}^w>oQ9Ri+=wkK_bqM%Dj-cvE^0^RE7M1^dM<D@#Kge7@`72!<^`B3@+L
zg`vDN7X9AW_G%o!zPOyBOjB#fXF4BV9c|F~22ytdbLdt~T)i}IqH<1G_tKX$vqvX_
zh*w3+A$wx9V?fWoy+RA{rqY5uw(x#gfu=zonE!Ek-T28WziTb?w{*?r!n7yRnpR1u
z#=EzNl55aVvKGD$ZBLmA&sg#~rRq6lc1tcaG^-u4U?j1ncBoUOU3i~=4p=1Q6{&%>
zHOAo5X3)dgRNz%2_JF2SCfY6R$NXNaJw+|x!e>9^IkXlKw1ACw5p5*z#?^y<Tb_9<
zTMnNH;uf<<bj&pRWC^)_lCAlWY7f80Kx5`hFH|Wd=hN*xFP?=!0-z-VRm#6=Yx&21
zt^GvOw)I#9pHEDTkE_0#ahmD#@n==ev5>fP?3nR;LJ<W1hG_See)^!cAu~nj`*Ix8
z1Hq0ZSoqD-Qiu3l+6)$EPi5MMQ}(Iwb4Hfo;#vWEx-mc*#jzU+Ma!rqp*={#2o6tj
zju7cPeXmQ4>yU%d9}ss2*CF`Yk`<^?2TOK+DrlMUt-eED^9glFv5T2pj^928A8w0G
z7<<All5C$4b|O}OY#cO)O1U9MF&EVWURTecQ9weRKuv>Zw^#nv!E^SLu-Ouuna{fn
zrpFJ|_#$G|H|dxkI}D2*j2QP~1{$MTY9Z9(zBNUKIEx__`WXa2M=5kkn$Alw8ihO?
z>Fghpn&dsE`>FgN2j^EETVG@xz0@qKPihDK2atXKOf(kqn{k>iiOpH)W5$U)4;p&L
zGC~ZvX<KTRyrS*IAU*g_nkJ(n@<iWl@|pDcMp!JJ*XI&oY|_*Y#8OTN*iLj63G3_3
z(cA<-xyyYBFvUFJIQNpc_d+wXFLR1={`PO7^AI}k1t`s2%1~75<6|9?{?_CvxUg1N
z7fYAe>$U&hcCIHXTuvWAa$~#-D!yk<3A;F};f3QG=h<jyZmwMCjht_Cs+`x_?QWde
zIeo<8NAZ8T<xl7ta47U@MqNG$`W^oCJp2XC`g%;)?JT4a#K8hDICRX)c3f6jOVB@9
zX4+SGkPBy4P-&~a$W(d#8boxdJ-EXc7Go!RcC=l=ouZ`w;roE)mJFMjvpjJ&+2dZf
zn18CYA|Y741m@TO<ass0@j){0_rb%9l`HcdU!2Op8qai52B2&S(~VfvffCY1(f#$+
zMuKzN!7~+vOu|K|uc4$@&P(F$VsrXk$N^q`NTrY@7i38uPY-_XJwa`CDe%O(?KaiD
zINRMZ=W&fOZLu1?PjRIVj)0t|DVmMEXwnMQQO>X!-C5XUGPr)*uf0_xgZF6k&6`J<
z(!hE$9_o3DRqw3#G)UgnCnoa~NT-;oNZ5{ssNA7WDtCtHlHs75LZ5_tSIOaE<WyTP
zc$(<r;F72Tx`@SvH_jz(pqO4j^eEqOhVc3-KJr8Kx3A4mL0fB+jVKtsH%f$zNQS1<
zTr20%zX1g&*y*PzVu_xb3%<uC%3d*=vZ`_AFhGbAOPNpYJwsHf4v|k)`D`h&XQ!qK
z;u*>=DbGtFP(3J91FGbZ^zofOtVa3I?C@7S7eD)qkJ++>UU2X^n7<9V5pB{jC)leQ
zyY4fD=BJE-rQh5tncQA3JQR3FM@3l3f^V?2>=!wz7pos68_&q#^vvjU@akd2nW#C0
zknVAypic<4P9Z-V-|8b2Nv95e4Bi$d<mEvVuY6&OHs)B%?bqK{KL(=4byn+dz{D)J
zv-Yc~aDzQ|iFK-|;WR@a=B$7$f%{lcVQu9JXytVe$P&^E^{J`5qd_fWM<=vriu(Fk
z!hLUFP?mMWk54N+^TFV&vt9V#XD#20uzbFy*#Y|lat#xMWZRTI(~cx>nmHTwdM-EO
z+7+Fxcq_Vc^hVnhvpPj$0lTIVJ@)1%v#RiG501EIUr>BGX#C9btj2-N;!sjC+3ZX#
zT|9T+pljd-T7BYojOyIB2`<OYZs={#$!kUmkHn>Fk5N^48MZ_52=1^tsxok<dX^Rp
z#70sM2?R^8?+DgU3(8|pre~j5I2hhgu5r3%dz_OYDF2DJIAx5Av-Rg23MC^Joc1m)
zY{**y6%ALy1ezbDIg~O+>9yguUS;WH%iYr5E(+W_ai>$uL?>`#g8cP1A`#<tMigyw
zkb&`RL#7VB@OF^_)JRGk+0~ve^qt}SVhX(8jF6Vlv}iT%ub=O;>*<aH#$>xL6cbIo
zl|M#X$E)N@z*hZ{R>#D?A8*QTv|}^=SHQIqw#dx-mW+G-45%^~Yj`cRC1+sz!MC)n
zt~KtP>F?PF;*~pB^@nVVy(fNs99FS9)svBYZ8vl`kE;6iZGum-tup(2E`5nRIRt6=
z8_4B10;fMeVg+RAtSdFM-U!ffgQh)JzU7B@B_&<G*}j>fFeWG{>Nea&W4;KTyw4*4
z4L5w*{J~UCn$q*!M~p_laqC1_-&)me2W8+wWhGx5i-d>kXT`de;OX)0Aumr|3wa_|
zA+p;ES=7$F-3vUV2pa*WL>-Twx#Kw|JJs@Bvbgh6ZZ$nI#zOK&%b{+D@OK=AdQhuX
zEwBoKtbFxH#6(sYj2c%!5Y-%`ogiVE){0>b8@7Ya2FA|U&AdkGyjLYy$y&_!^lif&
z??E+=gBSE=3ieKPz35Qjog#)c&|rZS4GQW#asp0ahQ5rqTlFF8>%JDrQ=}4UN<;)F
zut!s2w;~Cri(WY&qI)CiTK5zCyE7JFa(cv}f^dorPz5>weosuK9gSQ_m%2!qt6HoO
zTGkucdCYs$sWV9;{G^w&HLsyN$Kbq-Dr7Ml=3e7Zvchi&lbWI%=8uFW%x`GwR(p@v
zRWRNBw%}|E^^x&)zrii2FEWnk?nqO`@W=-r$oq<4B=@D%9|6sch8s|_Gx|&NfZMkC
z$&<;w2RWxSK23hMD+!$On1^e*MWjH!`~kUM9IJ|e=yH3`K}Q>$4X%vsd<=y5mWj0&
zJbc$$GYqkKf1yVG&F6{ALr?*YAu*giP`PjIJ<p2q_KyPv$L=HROXTJB!f|KY(Cl*4
zu3Nw9?;1HxKl-88tnytf>Uj2}zw({>|C@!oJ)E9EY}~dRny>F$Uw8~UTy%6}c-M5r
z3(nu~oa&?AtA6t0#{F0Ija%XV?HSZnBr$l0y*TulvE>r(7-&t>6lO`cipB|1mL9T!
zNk%N%*f(nQ*wB5RZ{|5LE46-+AE4%&jUt!E5%`J2M1&DjhTh!|lFlUFyZ5E{&^=dD
zB*&VyY(NYcprX5H8`EM;i46JFVkY4EyS}2M!)S|VO%Kc0gd$&mZ(ByT=BZ@sCc@Gp
zE_ZN+g_=`BVk<mb2hwq6W!{`R4FiRv{wbc5CerxfW%AmGi_kb4X0jq`6jgWc9<J}h
z{mbKiPQL7htt+o&+nUv_-3)cVL9Jz$u^f>ZsHxM4m{A~S%j!;ui)3@Lm6rRKpGwlA
zS>`&^P2r=wipn`GH@Fu)F84%6kT<D*KqMJ!-SVMbr_{NpV#wp5$R8W5RWO}Y9qO@8
zHr1{f?i>4JQF7NhYU2vBZU{dQ*p9m}=n5yJ&$tG{XZQk#6k7*#lG{HQglD^P^Z8D9
zfk0tB!*cPfj|5HQWTa<A<hd&wn{w@u;h0->is^YdlMe0NYFCRKYFz>tg~*s_-D4BC
zKujClyoOR~h=|7>=^HP;uSFo)fP{_p!P>H+kkO{><F=<H0{3J&MpfSpdH?7jNv-3_
z^Q5LEgd;^V{^VS_@+qHCZK3#&CAd3>e}{Jyh~}{s+=n#Ob*fN(#Z2v76mfj3YG$?P
z=^H!euw^Q7qxM$iz823Lx6gg0q<5ToYh>&I0qN<<Wjgn3M`|nZrYfv(r~b8tuu)@M
z?Z}rLM<2n=q80z8vnW-=OrJJik++9fB5iqVdjqqKzxvz*g2+vH#1UnUdJOAIa=E>k
zmeYIsjXH}UrGU`^*%zZ`CpzD0iN=Zut=44}Mg;ai(02QNVU7+H9p2bUGwV_1)U|iM
zYJ}>*3Y5dNcDADdwMtUdr{ct+Pha*40kE=2Ai73>IhVF3buL@dF;=)r;Pci)gF_0B
z80|9jKZ``9j5|**kC98D|I^6(r<wP!X?Efip9S1~&>IPxL=$}RY(vfARnDL-gfH_Z
z(WGVZCTQJy$vIg3rh*=*&^9K%E?)<2nflI=C5tP)g$DBSGP35p9nIg1+GV9xK#mUT
zIR!ALO$4oM_V3mps4p`~(SXd088l*FIBaU-2Dv8`a_?FEO}7lr`7`-?!l8^X@fdwk
z`~8#j$vODJ)1A;!>^-y!1!2kF+LZ9kOnej7A#q~QtzW}Mki&rSa0ujwd<t}6tZ06D
zOBqu<hI|{%dP)&$lVZE)2#x*f2?0A@^yry|5$*7BOfT+uE}y0o$2-63Mze>tad%)I
z*N;bds7ww7idb+fXTQj)%{UnXY1?qEXXIFnml7pVYxoz-*PDn#Q#R*^Z_Nv_?NuwM
z8_8K7mi_kLbKgp#Kk=rb=6!3uqcH*cB`}b^kGYCgrHF@V^~?*>JV!IRXcszsjilXX
zm_zPN-uq~}cRr1f`I|`+2{-rL;c}X$?<2lODrL&ivA5Qc+2ok`WeCep2mu0Nf`ojP
z9|o=2f7-ME(#9NwsS;<V60hTK0t5T(G<5(JX5;6OIivgdBKc8lNo#@4aM`|geoQZM
z{ZWm;Blhs;w&sB?(U3@SLr5T98JE~a=Nng<zMJPLCqL-(#x*_k!kQ4Dbo8QZ$U-<G
z0_U^1365Nsoi4M_%Y+Y`!Sn1hHD(5tURII<?5|?Ea-mZ22-f{pb6T_DGL2eeMwma+
z&b63qUL;Poy@S;XaQtm_<C~?9;%X!dBoM_+R|n2e<A(iJIuctK*M2ys6YGi=e5D!{
z-ZeN?+Gm?;IR%Y2qo#^ULp_=CXMrGdz?s~*Oghw16W%ZX#7lSJ*(+weRg;K9P|RRT
zNO3{!u0Io6ja-h`5>7=P33D_eS3lYcuXFes!sTEn@qj7(%833A7C~xFp<S_4fcjFj
zE-PTR&qKQc20uy$px(j&*!pH+W8C66Al_YsXg-T~d)swasrOt7oN&G_G%GE3_pYby
YIjd)yffEuS5%#NZ_`kA$=pU2+1I|2Un*aa+

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/img/key_map_9.jpg b/cpp/src/arrow/compute/exec/doc/img/key_map_9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4c064595c9a533156d26a2baf4f25fa2562bba4f
GIT binary patch
literal 52894
zcmeFa2V4|Qwm00vkfTUOl7NCDIj0#E1r(8_L;(@W5+sg<83d7>6%>&{0Rc&p5fCH?
zK|qiw86?9DaexWm`0TTJ@7{Ia``+LE?n@8VRCk}Q?)uj`Ri{o>pCXJ9rhyaJG_Gm@
zAP@++L;L}R86mDKFh@H8(9r?T0{}n?kOMRT2{Cq-cyJeV5&#q9AmR@IN&e}4I7sTZ
zI5Ux^D9jNc{f!2oY+B(0D2V51iN{6&z^K8sLX1%o!|`^Ez%c^H2pl7DjKDDh|4$-d
z?C5&W&DWDp&%w>hjql1mM=v)IK1m5N2{B0t2}wyYNjYf+8Q@=|l9G^;RFaTUl9J<-
zl2(!=#-#v&JRbZ<t%Q*)p7c*~iNu88;$R6G07y*yb36m|XYJ<CnhVv5pT%>mx%@xL
zq{o>)M&KBMV+4*7I7Z-42>hkCA*DoAIHaV3e{_y`133u%yG}t)w3u)LWW;ks+X>@S
z(5XMg(?G(%(-7?}B%nW9PZ+aENd6dSar^z@cm8nyPg@NDB8(8m0d;_qoScH3jFN(a
zf{KcgnwFW4mWGCw<0KOUGdCwM4>u<l7oVV*5Fh^q0WL0K`EwT}BxPh|c!d;I6{J+e
zq-CUjCIX_OqN1gtWv8QKmpaXLTI!cK!Y6={5{M;gP@vNQ2_p#12qLrqyu`O8BU+b!
z8km0HKqO#NGI9z^Dry=cLB$DTj=^9OQZN}ADbZ{M3M5_!NEyjao|e2w&ZKWm!T*3+
z>QTaLN`XryjVuQJC_(AF9>G-9tZeKYoM+Ao37<PJBP%Dbps1vN`HF_7*41l<H*Xmk
zo0yu}+_SZ_cW`v_^z!!cg+25Oc^vvAEIcAIG3n{E<do+xQr~1|WxvhI&3ji`R$ftA
zRsEr+skx=Kt-YhO>+8Vax1r&Y(XpA?x%q`3i%ZKZ=&kLY-MxLx!Qsz*{mkbdw?Aa;
zZ}P=R%ohnMDVUVvXTCrrzQh1#Bqcj7Nq+L8K85uICVr_$l+2eBUY9gd2}m2DSnhiC
zQ?m-n%$!00%-U}```0oS{BPy#PZ|4jzQzDrFo;-qU`7B6>>mmPZ-S_Q?)c2djXyZs
zeh4jM+q^7D7B=XexAezrzY+Z79f*pWN(k`K@q8j#xMGyrqEt}2oX76(z^BYOvbcCx
z5iS`7<9QNvfdJS#5&&1WDeKB36PG5$Ip0P0FKhPMVu>~7oH5^xef1|yK5UE5TUVNN
zs<f%GpI7Gj80*(Z{*4*XS0nqUJC@B^>E&U)P`hn@+onpY4pj!5+ulKehK+vO4%zE-
ztX<(x3<?=3Z<6zQ=!Xe#&1C~jj_Ie<PVNV3*Pib@cZ+8G4h#DikBqck(qJh`4-3w{
z!(Po`^W4^n>@{NpNueJ%ZK~<BP!D<l?{YoR6@dAl?pVU|8w)O~)*Y&_CViq}V=d)9
z_flvxb4yBbI@WVgkJOkmt7K(rbm-7xVW6n_Ldku`7ItKv>7ym-PqMD#>RJ_}6^=!M
z>2~4j)DoaOH)zPXKQkukW^m_%%_0He43htB*NQ^R)4g+|$nM#RL~gW(YffLkM*u$M
zJN==U|8Xf23;J|TZDncLmzwHG%CeX(1$|`RJLlqML6Wi1@`qwTqF&B|puCk%;Q=S$
zh80w@O5!2_&T*ZIc8{-_kbVEq3w@Lw*1~x+uX^et$LD}jEtYzt^Eadi`1(lNi<Y+<
zAIcaOL^%|ECFM&bdD6i#e`k;te|BJ#)vBx(Dc0$xEp><Vjur*(b*$$7NT4J`%>@g!
z=3sX_ZQ3v?Ahdh7+~F@+vFLoDVN1V-Wfp6xE|+0fe{#ix3b;$AB5p~5Vxe)Gb|Dl{
za<Omudil*M?yvFG;-i9bJtUXpS?^xHlY9F$*^{+Bg_pNK4lNAae}eqc{!m6=*(J<z
zV(}6ei{coG^!&YP`*dkFrm^p^NEbGvYHfG%`3rk`H++Z{TC0jTMUnsfJ)_vyxYv6z
z=ic~SmY!Wvc(O2N1J`CuBcaIASK}~nGCOkgyPfKH<=k`OWK);X{d37O457B|9X+QB
zfNzB{egb_iL4AZWz9~k8hQq2;P{RXoo4i*Rli#)eBj`-+Oa1g=ty23zzS^2feVI<z
z9}1mdpV@cr?FpTFN{g|6$~2BHqGRWZEN7}pjN!GOpCHq|88kQlz?Ed8hSx)kOP#e7
zC(XHDWgzr4mrXrj(%Q)@91}$xuLav^LZTx;iyX@$`#ZhYfRB(cH<`0gIWHd1o~PFF
zw@)-D^ld<28Cp>~h6|W<FC~0l)Og*-YUz%R&5&eQ@z*2W!*jl`lrMN<b7qD4gHON<
zDaoV_#rC&4w$5hU>94XU3KMI3@;WMAw(TRbK^;4v?4Y55cMw2q{JfaU|4Now#AS7q
zZ?umWue*2-^u92JzIeBX=kF^Pq;e@(9%xYGuhD?M&Y)9L>LWIppV+hij`u%Uc$&wk
zxXs{5$8L8Dwr;fKX0*M&mGVQeqtSZ2I)nb*U&>0)V1-v{f{V+Hf9(OeO=WcSoWl1O
z)H_w(91fLR-u*?zS=-)G!wFiS$}4TSM0N#!c<w3?fO~pmpIz9r%YIDVs0dFlc%Lh@
z@*v5eCuj7w*y!5&f|X{t1kQmZ1uI~7i2&r4v9=F=8{<Muk=}H)E*Nn=I{Q}qt`NWI
zMU92MlYA7sn<Bh^KyenPso8LNLxv*O+{JQCT+ZJY2;~4yG(Vx>i}qO!Y4y7KCz$SD
zI*aV_sjcR1c&29b2~Y-&_vw+QUq)^pcoTp?0<fFAB}f2}pNo$+2|%G5MS>6(Z-JOX
z;G9$<po?3p_#*<ast20CN&uiKR{K)~z^tUgWL)=Kp{%K^7+vX=Gj&_BW_;bDCTaJ?
zZibB9WBQaq=M~)%iJtndB++3Te?oKN#xFnO^#>-k_cfNcnfAhO22TgmMp7;>=>V?%
zYGk%6HPL;Y+@Zk*snILf$T_r9>f-PnJ1e^24**BynEi;ib8h0sbZsMZ*)<dL<V#;k
zx*sfMfcij>jIsYwhT`~j53n@^pq>C64gwEPB6=5war*>dD6=C8j}U9vY`|9lHcms-
zHD6+UlC<~QsvSs2Pqo;|``n&;J)*Ezi2PR);Q3cTugy{Z{MM{ffYm3I<N3ZIHBe!)
zhgiub0L2jvJ2rY`k-xRJ3<Bgql3D!=rHR@xthcIs4pP4nfI%&I$Oh9&g8`lSK!w6)
zOG3E|*>^#z_4%Xzq=A*3G{(mEp&)T$Q+TU^02ngh0vZWGra_QgZU(j4+cz~XIWHE<
z2b3wNLm@jwe!}>i_oREdJFjF5G86q%w%s<g{3HDBxy~O7bAfhMugZXEky{cLh~Wa>
zL#_(dKeTAJmiTRA4Q{pFtplBZ4c*Ij>ia`uXRvIwMOjJ!il_)cMkG-6w|OG@II)}k
z_^x(s_-g}qn)e)#kij9ZbUwP)Cn<SW{;iZTh|RofV2oW5mn^q+n?`8an07hEP6y60
z$8^y8ju2{nZ*uG61t}25jqU)f#?)NwSlU%`DulGaUEo8^>$`z~yENve*+gwiRK6M7
z+m1#8jPtT;<-jWPJZ5lLljTo18+s-tL?G8zpj!3Z4sgqyC>Ih0V7Qz2P*xc1^x{HE
zBh6=#$1%)h(!`NULpAopn+8t4h;d*28j|b_ui>;02tYt=H<sri7#xy1o<SP{oxek5
zsA3c7j-rzvaQ;+&zQ2f1`j2~&XI3`cRdc}-&hk|k;LUF)t}-|lao;gMJa<3<WSN(C
zXSnK(a%>*tNGe=Cd%j<B&s&fo(?tgOY%u_9*N;!jVs*cq-rXZ0|3jsrt1|ZwZCajJ
z@d2qRVwjWw^j!hWi+n^c(fFp4Gsy%y-Y(Enc}{K&R=(J-exL`=9&$Myv5+9nDr3;y
z*5wMUz}IbHNmj`^VEr)lBkREy|GT!O&%;OJrEYx5;F>Gu76zsBI8&aE^v|;&^o~0d
za5VM4TN{k5eWq;8hmufOCL%SKOaQjKh~fggB>=My0DiEF0Hhl?V94YN0F&lkb^~_n
zBT;`i*MLa{g17YsvP)lxyA%mlj$F?deO!?3KhL~u(zl}L=)3B6SC(T3S5S88{$kQv
z4r!iYmBQR9DP6v3+CDNU;5>ZN!DDnqPb<;%3#zGduS{YKbXsu?_KDB;6UUF5-gRxn
z!Gb>l2r@*VWdMW|@!fhwrm!S02|z@yU48J<1^tMS_6PU7OPkPX98Va!$>Gb(%WB-*
zcW(1yv}9S!ET%@2<M4?Sq+8L9?7RBkYMDaus0_M8VK7m?*~J=GhZu15W*t1zec>G+
z(n9*86dN#+K%^Tt3=XJ#&TE8v!`tP|m$!~0=No2D$GY)U7Uwy!={x9YG}u=7%ilRG
zQvTrpZduK~t4i(xT4HpTOKM7NNr^d?Yin&QN2wCB@1*?7>V~9>y#Dux9s+Kr_p`^0
zgFK(LDe}Go9^l#NGGR!Q57i&Pow`;dS@eo_$a5OVRC(IVCEP0r9;!9jjWV?{J*nKL
z0eTvB?gIrLtVaJ&3j<mqN&pt0ZQ?;X1fZoKm*++RREiNOYB&7ZAa0&)H^I&m=FjXE
ztfa;+lTN1>WO$L6%ltK~xoH!C_%E3|yoV4!prR1bL8C`xdq?OFlgZG&S*hvc@-oxY
zW3(Psh8-)?oqS2i+kN0Vm0{6hrv~RMu?YU}HJbaLHJ0!!o4D7T7NW#@giO>1ua-g-
zz;z*fu=B~Ut47)~+RhTF#dGJ5S^_?5JQNa5T5}-LrLXwxY?e?LBTy}2{^BIGHtloo
zAOQ#~^m+;yscvE)&mWE=mi97eYO?3IZ;K`6NQ(}><1}rPbm8dj!J`|gRUa;iT<ZEl
z2l*nReQ%UjyF%kC)Ie7-eSSBCYU!^f*tu)HVQz?bpJFS6Iwg+QFSP8-3*GZqIyr#E
zS04}IuT~%~?Js<&Lf9j&c3ww;c7=i%W_E#H90UFePCgp(rnNn%OGLRv=e*@|8=1KL
zrb@ZMg69@$asr>BM=H-|HrRYWneoDzy^bQ-7UrOAbw5`5j#2&Nizs}mx-{6CmBt&&
zx2ndr^izGw^T*eG#@8Zoth=7qh}d6A!MW?=NcG5S7EhNto=Kk-`LI6kXsfZ+bmEit
zY2Jsr`*FHvfY;%@Vb-mSV)?NCj%+TM$M+aRuq-L>Ml2jFpKOfZsy7+B<@}5Ux_f!1
z<CKDca`lc32tKU2#qqZ4o4EA(T>BQCYT9@YLuB2<eCS<b#g_4L5r<|V0H-p*YsOYP
zY<0xOlc<``c?CwU;)C~*`z=tcYY>OrOWBr_>VJ4D`)^Mr02fwd`qQOP>lopDcWD+t
zNIf}s_){SHP{gcbOFeHg%pF|9cydHAdI35nYxCEM1Am!%$*yDi7q807>f~+1>f{mO
zr(z?wY)O^Z!8D11y%*KEAt`S)-<nI2OZ5u4rTZRzZSaBid4YjjE5t6)U)9gfq_DG%
zH_uN@hJpdt8x**0H97>?@vEHW3cB8)?r9snG?u3+wVq^vF@)yjMDyzlVZ4}i;Qqz8
zI&N-Uj80WiTuzMSBR2qc-bNjymVYAc*sHk{*_xl(fOy1*mCUUS^7_avjgroMC;F(-
zXnUwt60$8jN++Gt=838i+CY{%^S%n`69Gu}0ti5f1)|S#8;=Pi05Sc0BE^R^lZdGk
z1OQzFKw60!+eQE0qgh_Oyyi$z!;<(zVzZ-80NR#723;3rJaiDA3eg)k6Kuc=0?_jz
zed)WI*@zDQ&-iMhC<&8H*o+c@cPaS#vjm_&MvYaNk1<$^4zL!jm<)PgL-pY+ngH<R
z;Y+9RALeJ0aIu+H*^VPCa;Z;wyt7vMckU?ILZTVY2gb>OoPJk}+pE9XeN5I?WxtU4
zdR%mXwwY=Zf5fm6`>G>{PKjHopG3OzLY2G9fR@LWd}>+}HcU-{88q;-Hx44|nbB-k
zyQ>uVFj3vV?w;SQF~$a8XjlzKEb#}44yhs!^AM}EZaDTq(A`MjeCS?`eIVb9XRkl-
z`urf5?0`<I=k6);41z2&hwl$d=s4%*;%Oeuf8gB|7J#A{F@`B-UhK7BD`<SUADnm!
zC!@kY1!86`&O&#G8K`zrDdGsgC6aj9OD#Bm?RH$2q)OP9o&ykQH+yS5XIb9!!_oi)
zzvQmy7f7UI&b5z&L@nLfl0hin%xpox&Cac^8Gk@gok8dDI+ik7*v4c|KlCO$5O|*E
zuT_Yjfc+KM5w(K9>WXK7ZJ1}YB>=aXh=t4yaxFC&I%ML{l2Cea&%Mono+HX@d{d2n
zYnS0*KLk%y*zsrM#EDfc5(eEQY7WF^_us#?_kWaNX6je`^yL^@%6u6QNhYSInvyxU
z3gG^4g`D!h*%y~D+I@~GD_)a_^HLUzJtXX?>A&pF|I4BHV}eDamxhK|QHo@lF?tCD
ze0PI{ez$S`hsKZJFMtW7#MyC@OAcOM?n>g~uAXAn_uTH<irKiih{LSi#U;fg!~qp3
z%-!0?+188iuC2YJtE$juZG#Y><2_X&V_6*u9d~tG2S+VG4_iaO8#itIoNW~E2|*!r
z)G9C~n2WoMt(P?)%;mnTrxHw+|F_hYi1DAN#rYvB9{22&3@&T@PC-0U<^TO%e0_bz
ze5J+QJnY3K6%`f5C8Wfqq(q4nqMi?3y{uuPuATzFCve%;)5gQm-OJI<mG5VQ)_2{!
zy;S+Vy&dl<*;(7k-o1BER`jm5^gU5Y$$QqK*7u|(Mej*V-;<Y>RFJZhyvzT`7j$v|
z)8qdk6vXEdv!tZ%VQWns*1zfIb|3ODjpVC{|LtvmPRI7LwU;gAr=eX`LPk_d?xxi5
z#&<CZY5Bjn@egz=;y*L^7a4~9;S3Q&V(d?d6XECkZ`h6>@=G8e!*vYTFCp+t#K+uq
z4A(Cq@JqzU+;t4sFCp+t#K+uq4A(Cq@JqzU+;t4sFCp+t#K+uq4A(Cq@JqzU+;t4s
zFCp+t#K+uq4A(Cq@JqzU+;t4sFCp+t#K+uq4A(Cq@JqzU+;t4sFCp+t#K+uq4A(Cq
z@JqzU+;t4sFCp+t#K+uq4A(Cq@JqzU+;t4sFCp+t#K+uq4A(Cq@JqzU+;t4sFCp+t
z#K+uq4A(Cq@JqzU+;t4sFCp+t#K+wA-wxNGzuK^MC4Q>mOZ+C|H@`Yy5|W>P#Eq2r
zM@IS6Lk$@<1qC@JEj2AI4K)o79sS7@bo7k$G&CnzPcSkuv#_wxGO)3;GP9p#W?}x#
zuMU`a4=EWH85tEb9St4xFK@s3)dBgDyaR*ykNxTZ75~5YtAjIq^YdH(|3x7SGRX6!
zE@dwjmmvzZBLE>a!?8tP<~W$iTC9kO@bisC^-^h(pv#gQd~9wRWZ|UXuNA6(%^H<#
zZ2Of%tUoS?+SR_WxwJ8^@1bjSmzh2*9?0w2I-l5`vD4;%$eRs!#X@n<>ILy5X1j4M
zy<<>|h5;|6gvHV%$v~DrY~4nXMXc`mRrWdDgXyy3YezUee*WMV{>K(yOr~u<D6T24
zs`0X|AW#OI5tZ^~;RsvtQx=YWUHug+ogU0VS{<zk6bclZUd2ARX9VDWpz<E_1DYJk
z7L2_(y>4~UL}$0Cg7f^7r`5OAtEIWx4!Y<=hCX^p-kzRbRH5}=-Nkz9P_ro;bFeo(
zNb}<I^V#gopwH}94w@6H+E(?u`D{ymJps)$m(%ppiUBh>6Tr&3AG#xyyxNjVo8VSm
z^Egd}L@J;{r-ixEK>oyvq-FDwJpQXC6D|kNGY-pdDtoUtc?JIpLiPpe4xBxAGth&b
zi!oR^2(-$qPjoo+KQdfZWA=`58fZS$*~rPet?8IEs(z-YXE2P4Su6WR=MJ@$^Rs@H
zhcg5qe3H9tNJT#11Qw$9LGSLz(pPp*F2O|7P--0_!;0)ZFa|8ksaa+`7#AC0OqcaU
z<dvVZ3nPd0tHk<cV|1jVF}hd{_X5s+5tTA66civzQgG5_Spt{UCbyA(Me6&>k721>
zyl=?>KT9Rvi?IrjSMW5Ox5@^?bm&vFd(Tjln^y*MN(L)>{7oIno`k&~$GzRMjR5PM
z{d&ttrfcDOT%81+Q-CriG&4XC)xx<EcA@Rtd@_~G#+xly7yDR^Sd+8iY3+ip(FF|N
zb9}*SN@fZhQjjd;GiZTXp9EBcR$1NP?8lGQ4RujB-Vb<v_LS{e7z}#$L{`~j{MM8Z
z^}Fb+VA3teBORC=6znv4=A(t-br*5HIIWsK*8)@TTlGn9&zvfoFKlTEYFzqYL{F(e
zmJ1%b`OLCzcOu%uFDcqy73a76qfULgcP(V5AsA848-eG?Slz1c-yr}e#zpHUSt}`1
zVJ{!poC5->K~>Hw83BdmK2!Ad;*JPLw+dA;WjgcbdcFaHC;lH#-L7QacciUtF$^`b
zbr^ZwEFeKDmBwhn5Vfm30dDe}%*h&S_RJd{>3QJjBGi?5ExbFt#WQ$9*HQ=YV^dvr
zUfdi(?8VjN@*rRnOs`Q{>A>zIgtzTOx16vOZJlRrXg<CuTTn4oTqxB50fAuR$H6~8
zCjB241l+UcR1Cg)&G%9CAgbz<P)GyJY32rwtZ#Qwp~?AlEjn3Lv`D9|g;<G8c?S;!
zyyp~p{O~ck=1ya(&(=ez?iZcDDmtn;851vsQ?PC<V%qzHkr!{*9nEQnFqK!C$Cx=o
zE0)m-+p68PXJSBWw==9VQ@1uTLddq~a4C2<p6jZRcp=PHwy%P6eDn1qwTO{>I85X{
z<Sa${An4<qg+Xr<w36b{u7$^jGM#_{$~7-*d0*axG{{nP{b>)+p2B2b{+*J}5w#we
z*y~&<R-}OmQ`Y30s68jDJzKcG*gW;V5>s)NA<0F?Uz^Ndk<{4o-s9Zy6w>wExR0||
zR7-G~0rMMZ`ly`MF@L3wq10FJZeKR_EMIrmP*do&@(bYM{3?pP4lAz}bP=WXxlp-8
z_wMP)XT~-?7T`qnv`r2(2#pBxrsLIpxfg);$;;Q3`8*qFH{N;}Tho<j<~%IN)P(+$
zwH|ssv+J@Y$#YHRM`Gv}Ki#MRTXa-g@}zKC-_C?`Ydx!yX1ME}#O)hOtp)sYvD5AC
zDKEoa7UE08J3C!IM;`GXHP-WfyWp)X(nF(DTlS5&yP>kWJZ7%;zOd-|ga|6?jAYVR
z`RO4vZqqpO-sYU<y+bi1d9eN){O8Ca*@}_T@9BD{eA199?29?qfP?nwk5ds?tE1bn
zOpBr2R}7{u(v3qkwj-$C=X-J~Cnr@pS@+LmJ$<8nTKOh_M<)18+`#aLKuNVvM9!_4
zh)CHBUnJmq6oSZ$J^HArP#G)oW#_j`oN~xFjw{`(_HV~Eohj`U1^I<<r_q0NS~YrK
z@}Z=%c2Lws=wp2I{j#p`fevQLn??dc9QQr9qL1__x~5Q98NishPtdX(B{s8x0)~B?
z3uv8g^w>&eFX<ZrXQzxccS-~O_klfu4SH>I1c2&ddKh-wkm&5|ryIKec>zqS=AS;l
zKgZI()Ib{`JMB<B-mJv^B;1?kgLtu7*Th}vUIEsJw}Yr`5+PwMql0Y8C6=b&&A-&&
zlNJzot->Ne3cm?z#vob_b)znr?Oz#O+jYK`YVMQT<^9Y^Yy5t=)|^M>MF1V=H$b5h
z8(0!zMGG6)<Hh70Nn@(?&<$%JX9aN2yoJWZ>$z8%>?Uv9FGSap1<;0yb*#{{8m1W!
z@Rp^th;w1}FfOcBXz-^LUWQ%F=9$tI-D>-@7kMa<6$z#+<L>&&U-~Yd4h8%|Pb-kl
z$<Fhhf;wA;BFpLvZL=}^E$y0vo1*w{ZjLN5t)H*5Ufb-}FMSk0p>_1O&W7kzB{(BM
z8J*KY$It|)K=npqEj)9rF1m;=6d_ARVCh#hUfNxcW|muH=4jVf<2AvQwWdOYu*!MP
zWB6>htfj;II-=ipKi`j)rQq1?RAtL6ehfLeeAtU+Hr&e?fS+A4l25fK`kV;chobo_
zt52C5)ESwA6QSYnHQ`#fF306d7WY9JVHE1<-Y4D+4463GFr)rmh1AI(Z8N$l7w*&F
zT%=3C+OeR^hc>fhL|H~m^U^PmF@%+m2MTBoyg4y8YL=ljdvOG`I(V3PwNo*J?s2Hi
zj0P|bNB}!?6_EeIYHgNEv_ZcbBaIg~xdJ#=DG-`+jg#fe+PZ*(E3sPJUOR85ag?*H
z#(T;X!`6)CZkT?JWONO<nz39j_*I2$hFe<QT-BgoEJHg#&umTWq*@oZp~pO+W@BZF
zK{P1|JubMC9F}FWRC#CKe0iwTjTg5SM17FCM`JVFi?M(*PVI-armHr{CGJkyxKz@>
z{pU_HTsy1@VlN5Z@EqKZ#@{^r={Mqkb|4i3QfDFZu%!T5jN%-UC2ZtSEaD-87v~kr
zdXLxO6GL0A-+?`zTJ_y+`L!O;8Ev+6d6;qY0ToHR=HPqjGwgA#{PA!Z&^J7h`yl`>
z0ng?bO|2uBz>~vYb%AF+b@>sbp}w&<+kZjttJh~2CY`c@f<AReM_TtQ>|E#h#xmdl
zb;G}@G(4VWGTnpo4Qy_17Km;ZZiugOoZK2acPp#XUT^t_u;AN;(@!=s`p9EgQoLtH
zXy~+c2a1XdUA*bkYR1eyOBGbnhFi2arqzEVP4gltkMddaw+rIFBK=_bldQ&^MQQ2a
zn#}6UadZDw0w6Iq)+u3XH}X1-e>X5QDK<Dr#v!X<!mx#vlhYiMnQJ<d{jJ3(xyM_n
zb+K3Au0qm-gI94NjjO6;fMdT&s(6fy_{k|JBQuSv9H$h&`{}oKSdI0&_li4c&ga`w
zjeS1>S>DOH4QtjYL$f4@Jjge>SZJm3b!@rx_M7itBB|`Owm&?zE>&9#6b9Qxo|im*
zUgG9XC%znXhPmm;GmYVf-!~Bp*Dr@#D__S}s5S~FY(4}lg0F(5!t_=YFX@MP#!~KP
z&{2!Xs}nmyvw?C`2K}4s-a1k<>dn6PQlFWl66HnmPCwDqZyzdm06DzfP(jCeYRpZ6
z(HRu_*T1(ss5<z@QL)M{8!nUcvC`riEOk~yT9`@YT36A^i_TjYG7>(07Y1uYX6HH$
z)m%y2PubTqHT_h(Q2inP3YRnPhaDeiw+L#6>6!TmooR?ram!xuu@verSEk87h>h~)
zZbOsmh-efPXl${)Zc|PLHXVx3qo&qbW_sBip!D7`Vj}rbR;QeFv(4K&@?@VbJ9|Fh
zzkM1iaJoEE&Qj~XPw}mPP154iNF40OvgQ;xT4BYp4UV~WvU4$3O>kb6cqD|+DU~IQ
zZEO^^<}-eu|GDyIC_+ZVl*7i3vAsH8X)zMmvleg#9wm(*;NTUa0Waqo#!^h7tWf(U
zl`}KFj7~hv=%;K|!*Y9j$@!uq=0>A#5er6|Rc@1=T}5*1_KIe!b=F6B%rNlP)-s1F
z`T*-WUV6;6hlLd%5t>iFxvbAWw!0T`HeFF@tIhZ6rHp722Qbl5?AaF|%R1(;N5jFG
zfxR}=;*$zqs^u}ECw1C$h1#vDLVNfKx0zLi`J-F!?~!giWOH_jhp+=A@mhC@O)CZZ
zs1?Bob%={f=lq5hpA{9k)IW8D)64I%+=HT*_5Ivm?PBdbwxwS%(@hJ5D&@P>AkVhB
zFKqzx)6<;3FGjf1r^3t*!uDQb8uJ50aj)x18|~>U@=cxkSKs%{tka%LGI(~U>clzv
zuIEO9t*@Tx***`VyMu+}-U^|G*4oi|JKC!LGy8D?r)_!1%RlPa#BQ}_-|6d0yu78c
z%pZ1ssf9bs-2uFs9bY-nCx1PdVHGo2F$<T%Wfo#Z<}OYn88GTC2*v<Y7i*qugyYP7
zP<FJKiGYr5OaGhN$9GFYsvm8fr-I&RAeC>LfVL&?9rkQHDHUS{_V8SlCD5_;cO+eV
z<m3YPJM=pzyMkyp8cBov{k3idZT8W^Wr7VpC45?c1_|0Jjv)Y!3}cA|Ko9o{FJ@xu
z2p7QEqXmc^-lDh3ei2pM_#W9V{@dd{q@|jI59uH0FK9KA)IUb;w-qwABA6-=q)SGr
zu_X`Qv_UH>US2jHD9BMM%<S$>Mmo|O$hu_2jQar$m$$2~mb*Wg;+;bh0F%saRm9_+
zxpXAH=0F=yE&2}jqkX10$ZIOh@^n3FF=DruV{AG8yW=){YT%x6!TS%q56!gZI#jr*
z$Iw;nt5clt4mb}Q8tQ7++d8RQRnjjLLVs)BTcpk8o=8Z1ACpShKnLV0Xy@(hr|C!I
z%U0*$15resa(LbPNCv|OaVady%Y{|$Y7qdV`x#bclDpl}(}B(<xX0LA1(?mf>loi+
zSkld!A@BTg)AS#Qd+LH^64fs*+Ce@sd@>f8Mq-K))0qgy3dC+D9*lB*HM78-=s#+<
z_MYf=>FzpY5VY&suYHB+&H3L5F<lAkRxB91so7V<>$XQ)<5<-}74g?&n!ayzRX4-e
za%#7KaOzZ>=<_E_CCn*jeb?#$d0IJ15=C2f8}b_EJ4X=$mkzjrZU_ocpJ5vau$7HQ
zj#>&lS#(KD-qSaacs2u-JahZ004+&CmxlwmS7s`j3G0L@IXZ_y>J6k^9uoLgcgs58
zaarhj@^JE@7pnW+BJbIy&)_9BNJU8wzK@qPz!nv96EijQ(v6~dTH9o!LKNDR8dK3R
z=uIa#fea}0IXg=GRq*;!x;EMOjHJ$@oJK^cPWvNow_wAu)#bc6@Z@UBVa2yu=9dN^
z2Jbw(mtg$k7H%!OGN~yxYltjvtp{z>)IhUq6=KPTc6)Lk&pt_$uN-Kw@pWdmP+9kb
zucJ${EaZALX>7i<1m6jtc@jXrVI~kZKIa=-s3MO_45Ya{V2%2MoYm2_MAJPTj(&r)
zSQnkGiZzM#(dhnSGRFt9G(<PgS>~XMXT{ghbI2Goi=5D1?vrjTn*Q_&LSuJp`8x!q
z%z^GMp>A6n&kxt12i*M#U)_VFeVcl@#$ZX)ldm&cf51H2oBd)#W{XuCRgCTwzUul+
zDOP%3TH~TY0kq~}u1v#dSj%v|1jb^Pfdlq-xJ@3;{c%`)_u0o#?2A-9n9JR)WxU1x
z68$%0Rf@c6@F1KVQ`W|#fV$EUUT5R>X1S+Oeq5$UUeo^NTICISb*_OF5t&v>JBM#<
ziNI-J9wvi9^I3}ZytfWa+=udy>cqSUqtYPR4@DYJRrf+H>O5U2URMj{vQH5u`@{+X
zAoc1&G_@Or7@%%dEF+utHogp-*`~elDSKLVGp2Lvi?)k`(4$y;%l9cWz3Z<sU~eB(
zT|qB~nKz3=ASS!Yrj|cOl)cEcIA=7u%GG=`7}5xUonjWop4icxYKakydIQ^>rHH5#
z2uy+8_8s~vx)36<oNV+l+NKs{T)z6Ej_3-G>;&c^nmb)%V6y_sh_TY_{2piXc6o;L
zvSQnCZR$?2QYg!6P0m_E(PV%)ChpA$Ch|*rD`#)ZIjsLoq3K;a9XU@GL-o2liE6P|
zTqS%{mjdLEL_24!PQrW3mTO|(4?s8_Yo1lidk7p<ZB2{|>YPt=>5Ny2Nw39_1gRos
zp>c7|#dMwd1fWT^2=nrO9@|KOqU~O7X7!wJ$_KdqRO-R-o<1&Dwl~~tGjhkpvk3xu
z_5&ST=w_d1<<NLxr`9&0)mi0by+$-i)}5Jfj^aX}*orO~#Y}7%YKYrMl~plQ$Jw-I
z*p0Nf*85wtYkVvP2RLr9kF1z;x#mm7OyITlOG6DaT;DdVCC69tovO}$>m_S(X=WS*
zHwvW1iu{-wsr|H!+MLZ<r^$GmpXaP=Kg$j69;|ryxXRd|{uOPaw!ouKd!N$}LWye(
zkkprqS6uBB;$H2-vtfllVi0KM*@g&LahJtP^pkuSgbS&#`lk}pRxrixnfQcVW>9+o
z;~;c~8TSSv8K4?dmMJm{YtRZy<U8`~5NFg>dppY(4=BhptT^dLAz9&q<M=)vPK@=m
zFjn?{YHwK4uH$ZuD^-r%S&mtqRnBYa4*Y8MmKQ(gvyNq7w*q<XqjTEhqIsFg>7#sQ
zb3)4<ebTg9f;+^r)F<e=OEX-<Iw5VfI)*c<yZh^>GXgZUmo$($n7iATl3S}D#tBwQ
zTq~lS2Kev5tBJiAW%VNsY`|bWJJt|=S!DW@JjV24o~cvzx>&Y__gYY@I!VN%3Ux5w
zsk*OUbSJn~Pp-~Y#lgh_)Jjl@X2#br_i>C<b4%)E$t;bd8#V-1Ftiun%$(rrlDqcB
zoP9~!%m`WrFLXiW%n^W4L`A{;M@(GaYRVp)iF;*Kpd%M!{x0;~r5lqOUIn=Xz<!Dc
ztB8g~%ua<V4Tp`JK-k`++CFB(9=V?EA^>Y4CvLY3LY1^$X5E&G?|RVqBS_*bu2{v^
zW=46F_O^qWAd<ZhYwt3B(_}vBh~IQEir8O{K8T_U@qE6|N?YsesR#4n4NrzvS%rw!
zA^bC(>3D->)otD_$$6Lwt_&O5_Rl1mQkR~9*m|%_q3VV{wy%X0Tfv}u-`rHp9pIuC
zuzH45?ZujQ&AP#d>@5;z;o20=ZUSCHqP2??!;woWr%++DKCh21IS?J*Vgrj{&YuU$
zPZiSBF*T#UKYYNE{X|{<g?x8+n-nD<alMB`Dn6btbka2b5&ETW-~9l6PZ5VyjXa)`
z2X9@iN~aF4jl?dSaHAHQnL9||ZFF=%MV9dtrI=pysX5Dhl)rk}0G93{3V+nN>3uiK
z(Poy;Wq&R4M20{C^>w>B_OrKXAEoZcjV(?SfH=4UjJ~;<hj!R2hP6$5{-tS{F&l~M
zrEd>evol2a+ty=_I81_oJJK(aV5~Gcq{-JPRX5sv=3_(1g|&bv5eBm5l;p&V{7)Hk
zx9z+bA5TiKsVc6<(;MpHkO5b-u^t%jS#gFQ3M{|=O2atMnSu1{4>=$E-TIa~@#!OW
z#v_W4dscJB3VQwKT^9BgcMqr)(Z!_7;M0Gp^^V!sRferDTe0K&>uE{{>!qDi)5}m(
zvt-%BBX{C1qxYL$erS9BASmFob5=x<hK9grN0z5~P$}BTBc&JfvE(MS^KoVX=8Y)t
zY;V{)l;~UW6l}XWbvUYm0MO~=H_r<1IKzs>3x_r)wOj-fWzH9LJvl9MZaPfm#|H*(
zm{mK`G=2ec&S6ptwb-i5jQM)FX;^{6J$aGaX+5J`^4_-esftuPbELrO3EK&Q6E`zY
zitKP$U$LmwQ0xV#Z6TCsg*_yPlQz|O`LPrj&)GonS*uThx(29?*EY_X0sI!Mc>mmM
zfr$r}9S`YsmC;;vJfBim@oXcjaC%%ZL<-l?%-P>xXN^ljyu}2ca^E}UQoj4-W}?oh
z@nkji&Be$=@lO3T`U9NjH5&@WA3+dDT$TVD5{llQ4vXHgB3qf1`sziw+wyH#^P=Eh
ze=l8~!&3GW^V9Wuz7j_4LOF3*JB<IFRZKktE+fF(e!9o`Di1%ZC+ta`{Ogqy7yObl
z-e2G9bVre&<ufy2IYoPmBOG{S<yUY%lPg^^ZTp(FQQO1zVkprj>Wn|n8-yq~Zb7i!
z#_+a4L{aT=7s_AUsYCUoPU_nFI^G}Jbdj-s{L;clsWT>Mp67;U>hcs>{b`tA^EFI~
z&||Tp-3Mr8J@mwIa7OCc`Y4^+3OQ@G#jVL@MgxOKpUy-EUz@Gp+)D9z*z4+NPXNAY
zKCLaAIv{piXQ#%*EioxangLXpl1v8#y=;t7v(!>0%JKcmp53_d;4VXX+tymuHvI_e
z8Fn$SG*gv8<OYNqD?Nm;(~a7PQ}*rjXc}QOXJJUAflWdC>Xgfa-;CE!^ok_@P;81|
z89|?2XzxiqbpCuQ%Cnm2<r-?J6FVXBr9apYX|=to#>04Y6Q*vC_Q^(FEO$`~ydr}P
zKXlCw-1C~)^LJR^Xzrejln>I)r`~pZOl+dD<{9__wlSXU?5k`|4U|qbIgxd;Fs&`2
z+QCHr)3??{yK9o6tEGp=BN`GT3)L!jX5-tK-L<Hd>|=$&jSbdu6svF$?kSuX=HKFH
z+ZI<aGIk?HPTnzs|LhMVi{=R}fstYEBeD{-Jyr}EzIoI<+siZc@~YovN#!_MgDN_L
zBHJI?*2@|9#EKm2Ih+0LS-B4D7GAYos5_R;n`>Y6(Sb1%{-0P~0RM%5b2x_b;p~g~
znDJ6|gnxrBzT)Z?K!fjAd&ZIypbB|fZ!#yCX1J6=ImYrJgy_;Adfqo#I=y^_=!4v3
zB3)N;elaz8Tf1UVrMSG23x6|827Z@(ls_#$I+mShwoqS<cB@^b3W7h#xSXFxdv6|x
z=*a2+gAGW!izjC)z5Gy^q%ih{bM{!fi#@4NC3^9mMDFF=hoDa@2$jKAO2DH%+M9R_
zEvzLdWZ*S(E%~f}sy8#})TP3tDfV7Ows^8+behi|k%Z2qDNny?4;-z(5c6rD8`^uY
zdgG^u(Er15%}l+okK!{FrN3@4(_rU}q&E4WquVw%`C=wYj*CffpRxshjyVKf{8r|b
z)cECr`#Y;LC&tXwxfnGlsKe!Qd4ovmEpa&jdU)O9NCw7+bE&AzL(Gm79b=~>Y^umd
z!+A=j#TTuN*O`Ixe``os>jp>$D3A8ZgFG|a*9+4rN#EFTD0IotNHooHPk56nSSG0e
z09S>w1FB{j#?AFG&x$88hI=qn0FtS~|Dn#u;YhL1<}goP*M$wgZcU4XIrmN@8PM+K
z6cYB)iD&v3?d%S2f(8>|EYnjb8eBJnt>|{aZzo|Z?WRLC2aee=Z^3V37Kis+s2j~y
ztiy{qr}o6q;;pNp^|*|BA!i=Bs}ckN6^`+<|LUcH7!lz(*{!+Xsov{%$S^b2Xax!o
zC}D-uqTn$zxjv*eIi{M-iGbvG2l*mojku=4^y1zix*_r<4+-uigx}erS@Je4;!fY)
zl>TRL?GAMF+PU*q(VlxHLtL^|pLA&)7)xekt=Kg^KKVa4WZ6}VKG)H^*uJ10=LZ~E
zU|o)^-MX*}n0rh|F-6}uAfw~giMmkFk<l^~y?dkz^R=KEJ`!*1GfzHz*lBfZHYVc`
z?*-b*!OF?E#4*HS@*9XvR=Cd@_yOvB^4Qp;?;)qoI$1#R)H<lUr*hY@w|r+A!u+C8
z7qI3tNBPFR0Z)xD#`zTImE7{C6B{t9NE|wxRQ~L!0JB$Hn$mLsKSi;%^#a(N(efd6
z_8FLGl^c@FsZR|%qE<QzOySX>qMqm5v8o-#6vQ@qO$a40t5WEhhn4GJ7cZ-`bm$oK
zdVQoQxArnz1v^@F0#0llPr^^-rp)jXW6aBP{aZD>G5m42%lN!vOoJR!GP)LmmORyX
zO*h=G-$T(s>a`GKOnX5XsYbXkmd%P|nUMe#poN-_lDt~z3+|t3nNSqsqu$jGApjNa
zO~tW^xTGeyXnBgMRk8(x9-V#e<+;ssRZm4uDnIMkmjSJL-Gtkjkl8}z7!G#xesQ8q
z{2~STY~6M%Z@FeO19&xQsa^n+FhfUSQaQS~eE9Z*i^~Jn1!D`e@{`;7<Q&|Ts!QO0
z;o@@y;7KXwGy%90_lXyC(}d`hdki{LP)~{Iw?R?hSy-W1pCDDU$SowsSv;6`#{>%^
zmiHB+dui@WJm=@>FdQosK+Ndd5P)_E@VDImJqebn=neN{oQ+&WheUN;&dL>av0tdA
zZ@M@gGg7Sb4^Y#<6~HN7plBEtKx{aa5dFrNgAVkM<l*C1><|g~$Rx|KRdcF%WS#lL
zOk!tJoS9#!GI(<Ei%{xvxMxLzdh&h7@)K-=oVYyGQw!e#p&})4yyWIo1>70YsEn0I
zY47>Fv0^^AAerkx3{kQ%mwtS0Q&iEmssDWA>hrUpqy_0aii$^XPB%^<D~$IFFg%4w
z%<XiLP-TGOUEReWeG~=#mWD(n(H5nSkKzr2qH=$GM?|w}zzG8IrhS?Ld;S7~=mg%c
zf@J}U8nPmVE|p-w1b{k<7q6^|l}%}6IJ^KSx^r)OLzIr#K8k$(|4j*MADIL&+5rJ~
zQ6QlpipZZ(_=&|8j$%TzO9pp7*9K}|uMZV-aT66ZJ)O=5O1$hXC{-GNk?YRS(SpBg
zBeU+_y2}1VIgJ`%xbdP`6oWp}fI;{CQFVUEh>fh|y@PH}qp+NeRN|sF&(kirUuNjm
zmZD7)7OM^KD01G5#pR0PX<vynrE?FQYD2QBT$XdNWUf%KpAYeL_dI&(eSXwOK-rvv
zdM#ZMWut>JL=Cs6hQ-#)U>#7rkr2i1O*e!nzQLYZiTvxWwL-p!6iM%sjqbjVm`Jk?
z0UR8mB#_hip_F)MB+pr7Q##0z)kS{BC&8Ehg&~LWH!(Ka4Yl@^cyh#JY|g4K9Y*sC
z3Lawn#=+0xZ9?X#=KQ#csalEpN$;`T3_!Z`)BG?J(QF*R&;dnNV?ph@n0NPy;3@xo
z^)NVe#^}2=viP6}QkV00m;ZrG0lQ8jU5LM_j^h5VCdyzidh=hL;H@l&Z)FMbez9$J
z#sAuyil6b{Ufs03g?_nbim^tod`Ggw48H6eUVQEy<$a`zrByUYY7d*9UKh-3=iA94
z0DmiZynicmYP_njjsQno*p4~!q$#XP{HIM?=G(T|PBDjNyK~_(Y7uWTnMRX_@VS`O
z^d`~MxWN{Q5&jW(q>l|4s$+WVaMHo-f}ioYOlJiv$<_~tiOVZ11zpvq%%P-d8uL}w
zOT4UjL3lSWw+e^%j1?#LD#o{PS)Nvqu_5w)qE<E#nk;V`A`p#yp*0u#I1)QvgE1!d
zzek4mLQ(du;!mtNRCwp6$d~=nTzst=!V8c$E6U;zjA<4wAeX_P6K~jn%7XQ5WI#tY
zS02h^Ws!{#X>`~p@GwNU1Ye3mP$T}=*D3!us!?82^GE2oPo(Kc^dFk*UCJL~@4j1q
z;EmY*B241+dsTE|yK&j_G-?<bF~x-#MF&<w8c=adbI^qLt;N)RYSiWv<d$K*5@KY=
zg#hH@<J5R!dJDM@`>z1RU2h?A2F<^E7_SI7uI6Kmd%b!EAfFDtxRb$<47I6!O*Bbr
zwiSnrB^*KWvoVQtEq-3EQ)u+t$!7^1SC3}pW}eg2K5t6A5~!23_3Arg!sd?b45B<`
z?I<_kT1_9;cRJ8aoEmF_`m)%+`>^zMb#$<8Sq-)He7){b_DF0NJA?{8#EFRA{ecpX
zD<6e$VNEKa%a+{VAUA98+w_(uDceN^36h-&Q&sB(_P+!=EDptL>Jvv?yyjHgb*J5-
zoWcO{RG`C~Ao`ydiL*EU=^=<gJ^hMDLuu?iTne1NOqvx!-`^>?3$>0a^w!zeAzndX
zQ>PlIj+nd<IQ}O;m8QR(&v1_3-#de(j&1KbE*m*R>!<k`d+x1xWW5L`(+k3jn!lWu
z#`V4X;wioxCM9j_1r!q=FRjYs))}o);Ks^fXS*>15G9+}P=-~Usa1cmtKG(TUQCJa
zJI6S?ew)Wl9;-bo+$*<wUfn#bIwtfXZv;dVR^aw+SEn|!<neU&v*P{JSE%VB+ro<=
zcaZO%11@!7dSwNE9)06YfCbv7rTZ+JA|mH@nIus7v0k&2^bWx6J*Ip<_vKz-FZQ(G
z==Wy4h&W3uJ8>~Cgr35ozZW*tKfFrs(?Tr$L)n?BvQ2D+(iAQhex3jf;9slpr2MPg
z^Vh7FI**sIIA!%1FNahP_;~W*5dFw?5oy&{^F?{5SPGX@VcQS2+l=(>R+GS38HgY^
zPnnX$br&zHaZRm38LM!KaMmI@`zxWb3LS3G?T3pV?#gbc91f?zKPwq7O_k|3y%e%Y
zY@KnD@8*ehiGPn>9d!8TrT!mWRcYSK?!`*;woW2`=n|Ky+lh4AcbEfH5LfA0#0Ta#
zA)KrZ<e*UvdqTufl*22s{!Kf)ixq&<A6cSKc_s(@bTbyYr(lj)fo2X@4~GTF5Qs}J
zp$G8oG;TP}9Y0{_kFxyt>IJb&uC%*M94&m;YVIxw-G9_rtb%DrF%f4!@u)yjjibG^
zHd~;wg*%F|Tb+jo@z)xp<lNmM25KD&EK4`_C_1w172%x$*TwKESjj2{aUO;Uua4nd
zf$WtW%QZ@hd6JL&#G1ZcoD2&?P3=tuSJVdHn4lkjY1i#5ryFaN&M&X=X|b4s>!f1Q
zPC_ty&~qR<AcLNQTY+b)Saur9fw&i_+{VL?;D*uM{D5&r-(X?j4VTGoF?Jj7stZxJ
zQ-9RSvEix4gKbQC_iUP}mYd=FowR-{B`iDc?Ji9NM@|!w-BT}H(qNE0mP1;xGCgp~
zt%E&-MCsbR@WhJ*;1TKJ8Fq!aBE?N&SHx<0D2Vls(Ei1LQnwoCXSgH+Fh7K+CpKI!
ziHi3!v+Otd3E3`uCjr=UMo=KubozgmA16`Q_>&fwbw5;)HqBiaAWncd^c!YFTg?uK
z)ze_1->^2bpIhJe%XC~XgBr)rEkv;Ky<ndv+l3k^z3jP02cZ(GNfmK+NXSZde0i5Y
z^t(yYCxGOts>oa%$V~Uh3+qsY33=BjgcaTdPpQCJ2kZ>;idPaU1qZX#hlIP<qj?UN
zk6QV-HP`x4$QG*;SZ`?iW)v&0Ejl)bF{ByJ^>WdQ&M{2c_$^s?(85$murW>Y#a*pv
zTHm(?=L>R<8oYd1&HyYk(7zE#%XLepBM1TLNGc@ausB<cgRU@xmc9OoxrAjq4$G`;
zlB9`Cz`Bw0%KmsK>Xq-cv8mc3DMbLLlw$(9{5TH~du`x0tCPghj)pJ?U&MV_w{r^P
zUgGeL@SgKkCEt<Z84Q_N`AnfFSo!Q_Ml?%IxzFdF7#1>zuZz*%bG&)b#pSF2bV?JC
z^^pMq=q$ln5Z94{8WVu8=kh1nG6;Zbej@?+zbIzQn_7x<DmY<~LnaFAA$gd3>r~AN
zgw;YSaP6E=Qs0At*TgkM^|zuE-6?LL(4E)HAbk@ZJE3|5n#~O8K@1a`z3Jkf!&QmR
zi^|+w!zU{mc%LN|f&mjt9_gU_>g~!<gXrW7cgR|w0*3*;0vDH8uj~)rvdy+)W|(jL
zNh}MdS&BEDRpP~t4h)EgrMBFovQcbLe@E%HV<ez(wm!wGPU694nsFTb>lBcI*_>9e
zd0&ffMi)iT>7yp=LY$ZPBQJt|=ov@#pt<0q{2+!qhSl!UZ5|BMnu}Lpi)*#$uXA&V
zptDI9Wt^IEDp2<^%?PisQ=CJ)r)Y29!zKdUene}?{G7Kx_uqYKArLrlyLgQ_GvjO(
z$`{`<!Ak(NUKi!nFBci_=@Nh>bBr<pxa&$3%`Cm2{lwp=H5l-ORZs-MdIAW*ktw{7
zGpA{jVVQ>srTWfkiUsSa>{!n=P*Qh!1h+7sS+0NX`_(y>>D;@3%IaP)nl<Oh0Bh_v
zy4q+wU<Pw)<*&s=%%0L{tgM`PI=~M{1d2fr8?m*O9U-d>G#D#wlzo_256zX*D4nlf
z<FIv6QPH-+CxUvpAtL~V%5WOKrvcb$7~P&}O@HL#epvb8fru05<c9~zNW($m(y<oq
z^IN`Lp`C)lV8KM)!a3rSkq&Pn0g)zz{nWup?}j5T>@5P2gS+%`tWIc;xZ-Sr5OHC~
zlHOLUL&Tsk`0EmpSop6(tTKvd&<P2mQlYQgX)i(?4u&Z7qIM_7w=>Az#9e}H?nkg6
zK1dY?R$BvVh(b!Wh-EfV$CtK$f>2PZo;=)}6Fjmb4#rX+My-CWx56bD<4?v#Bg=DQ
z#(y~Xav+*i1XWGFMoyl_z)|pW(C&ZsZNY!5TxFtq=ZJt{rFV*~j4`igbcr7W>7aaa
zipES}DF&5^D_@!xuvO^_=(=chy6)%15spT#;V7&cuA&Z-y59R3!s1YcV6XE+3M?N|
z^jO~;9e<hsRV7Pjl!J6jF3p|~yoC4TsSn~J8eKjCff0)p4|Q8M$MR?78_NXYqUaxS
zIhGpkV*ygQcWa1oO5cWM%Uir_<7oT21pnd~1k*;ji<s3@jL6J<p{-MX^wSR?eq3$j
zJ@-ZPwnVW7#{5VUdjTU=BL6smA!|7$A?|>(4idP^GynZ+jbYNb4{_ns&lKy$1(*S$
z8N{b49?T>YK3~6~N{6S(dNkRw)aU3a^Cm_M>fFHkE(%J8mBaYopOO}<$hB}X7G}8q
z5I7BK53$md@0NIYxQ$)n6?1=(UOfkW1_XS9(BcQi#>oev4i>!4{vYHHc|-2Jf<JnD
zr+IZKZK`y*b-c)}!EuUB>p`+yjp(cD-by9oftQZ39p3m1hUd_kr}*>Dt;OrIvpct4
zdxoAl%0sYTIf$kRTvqv=Gw*TPcfJW&o}_bbl#4{}qj`O2$(@&{wwKkKD%AiS#WpdC
zdr5osD5MyYCcxJzGl+9vrxp=VwUI}(5JCho;&I`~vYcp{k<3}`Udr#|tLR6#tr8a7
z+n&7_t#m`Go*kGg5)E0q89U%~B@ftcD*&@<o*MU^u4>8ex6XmhwqO5N%2+<}(CS9D
zsnY#%sa(5W-gD3x|EC^VeGzwU6Xih98jrgCgQVLVuFc{315TmDTVG1M%!yMi^9V$d
z-P)y*N!EJA4lz&oz)%zK+_ZtP;kKW-AEt+4M2p`>QN>A#KdEqhd9&~32Yu$Oa`PHC
z+G`;R!NTAbS9ZT(1@4!FAzD*%v*w$Do0n7}&$fnAjx1quGjc@JKfEuU7D2`_&pI?I
z+plc_gKu1nHSL_Z|Npi3<?&Gcecyv>RG5@C%T$E2W#0`+QfZJPVw5x_TauA6BZ{mO
z6~Anwgd&ErCxfvhB72sMG0L79>xnU*<NE!6_j1*BKlgQC_wzjW{d(>{yoNbv&K&3a
zIiKbIdA~o$p<`jnM~9Si^NV%6UDvGljPky!*A=l>!>4-ci{8hmzKZP#*P$26mO!v?
zn782onlvz8i`b*M(*Zz_N=MV+0*!fh9fr`z3aRXwFn({^Iqx~!Uig>U)VQDrs>njp
z-gQ#7$SJ1FvM;mXp6eb7Cp~!C<h8qV4lOR@^}CjME^vkosX%oK!j=PoEKg?62oVIn
zd0t9aW~T@wd5FG0b<avXH<<H%TxPXVyFFy_<Ub`fzqfJtf9a(AY^XQ;<kw`uNSn`+
z06m$O85n8hHW6LWiyeZU++Ooh<(pl1lB;D#Lj8!dzdA=F>mpF{6e|D(=L0Pb!iUh$
z+<in{v0u|FrLXC|E!qM3Zg!ncAzvZiTLm9gi#2_P4Fpo$T$tn0&-lUTz!Q%!*l~Ol
zo;L8x@0xGZzeYfk+NEgkiOW;KEbM*`Qfowul4i2^n0BkARPV#D9;r7Zxj6=Mz9*d7
z$vG07^|in+4kJ3*mGfgIgTrM<e=`EL!1hJ{;d1|DewyVw7t<4b%Br+uaV>AH`IykH
zBK3un#{2n=uX-6kG!`Nnlm}fXJ2NKjZR>bV&HyRQJ@pCe1k+CSy!Zo@{TGW@g7_w^
zRVt!Nd-t2a=Xhdd;4!+=<ju+2{%ISD?unzChMtZNLd%?rex_%-JG3p3E5uBnE;w4m
zx(Tc><L>~M7`c75IC#>!ZKUDBlMHiJ$po`Y{0qKuHVHM%2l80}d`$*qP4q?(s`p=m
zGYWBv!v{VFj4B%Y2}bX!*W@4Jma=Zm{lFoG@O5xo-O1%HB8I$f32kXv(`VtCQh~$L
z>(7<cC&eD3X+$0ehbGwT*0E+F{v`e?IqtTh7uI0SV%mC0nN`%w(((uvqZeH^ku%JU
zAE+h{cCo-Z=H(ff?2lx~kIUXsh~kJ^|3IuS1C0CtDZ`Xqb{+_lj-OK5C#tY~7N4)(
z&i}w*`ZMUjJ`HmMD_I`{X}q3FIVG2qDjhq2Q+}1|)1uC!r3BkrXsf@?t7KyBSc-|9
zGi-;=5x5bCv>ljSI}Gwp1pGy?C|c}MBnQ@}&~aH(u@ho031<Ha1&{CrF(KQ;N^P~Z
zGhnxdUY7h81AF(yVFLGfegneV^VoiE*&DT6t6esdWt?~Iqin#{xA8mSeq)69=lPP>
zeB6gc8N?{oL1wlGii2hQDL?&?=TKRHiS;FKnW>Hty+kLzt`7;i;~ZoL5*~}^8v}MS
z#%aB2qxpvP#$9%&7u-H2s%AcMeKN1f4C!y4Yw|UMJO7#H{Eb(RZ}1XjRtmfUgb0>E
zh#>hL?KW&-pbEYWJ_-Gy3-A_9nQ$a9WjbqHrEJU&vgF#l_cpT#c}n$OeDfJ#as%RW
zuZ*3!Y7i8Q37{Z<9HL678o?RAIrwYC=anbcLFrVcl@pv%xk5LqCx;vIGeek3cW(AD
zw-Ma))?=##gOwl1-jkjOJO0;VzhY~ikDy6{UHCd>CXpQl3jv#q;ow89zcxT_pA-B>
zW<;~lU}qp~1i{)*!vF9!V@bh9n`B2}Ax!X8HRd)TiW6L&FWPiS$v06^@I6-nWg7gx
zJ(Zx(n`t_#sPutNJx!<;?$g?B;kR4Ye(>2;?RV*e_bqdGZk6Rqbpv##my?+S?Ex<w
zS$d3b4}hRc!L+qzjomq!=H7f+(kEaUX@tva4G>(w2Bg-4<li2RD#c3jF_zN4<-K&*
z^-q83k@GoIuUS2+?^teJ>gkH>i>K2g;901m@j3)Qb`Jn{M|m;1ihL5dt%|A>x6HkS
zrS_d{uJ4=`araRR<r`HUHt<~2WuXB*CJf|$v*b1)d@S@RV%Hb_)wnFbr;Y_#0%q`u
zWjSwFew2XEeB&8mZ0sF^9Ygs(P6m6dOG)`A;XZEr@siX*r^yu@2RZzEqvf{C=|Vj#
znLME!BKz_V$%jG=+Py{eLym0Q2ARHd3S6Ah1v;Xh6mGbiiD8mLW)Nk?K?XMcIbq5&
z^E`}K;RfKt$ET|fUH68d-L?Oo#W}D65#4}HWB{-Y2)757ebEw{W#!}n{y}cUugV#M
zt(>~O;L0Zn{QDRDA3LV)q}geH3R*7$)mGKV=Eui?g|;OYx&gVnX*^2c<-Y4eKQhKL
zW=JAt%r-6W9VDRgTUa)bX<pe`60F;IuFG5M$k2u1nl}_xNzK=qCVaPT-|uCIy=2!^
zE*i=*TKfjdW<dOMSm8D`LMEX6Aw^NmH{e?94QGYpz>jh0%PqGClpA1?Z?M9Qh<Zi8
zCfwEz8dE6A6~FBXJk-0uOFH?R^v#m8b3I}0`xVX|eW9=`mCyG)odPdLHhObUufn2~
z4B>8)d-@cz->7_X6Slr~RW!*xK7X|PN|dT!eB^IoF$G4J9J6Er?1g*)PdiQ?_1<PT
z2oP*bx>78Q-oz7<@Ff9{toZ^|Ic+8LddV6HXT+dz{cTcH5CP&gyr;{vSXm4cPPY%T
z`<C>fqv^qw)T8BV&rKgZ9}&kPoF=QG>zVAG0F+WK&d4qsL5R8C=1tY)C@7ZC>i!xf
zy`$PbxXMJ~j%`}!QMKV}hxG(J8ovv4ou$GAjXCb=<b$E^y}UObPT>SA`R!T)rM!zZ
zW#)A}$4msKo@PB%xp%4R#48BG%8djA90HldQCkXpJBG~OgW~v%_jX&p{qhZRo0G+u
zDD47I5ZJ};!&rYFhO~A1$Y+DAhoU3a1>hVT5E?JIu_X$!m@EGgPZI{(!G<!l1`n7x
zfhfx;0Wf8&fsbc4X299I`pB&E4A{wMf6F5Nj$^9d0`81x8WI^{P7525#~@;sxRzak
z>PN6GyhrBNBP$W?coR#OIP>0kJu-~Fjp_7MoF6q^DHxaIC-M4GtaSR9gU(T{^>PXr
zg%ew@q%Yup1qwX}L7=6Y436r_>8=uN>>3iYa1rRIzd)d25lNtiNcUdl7l2wXRL*7E
znIal#-aePZTVk6ac&Aa!XAwT|W;v=HI7D;cm8G0Fxd=QN%?r!MxE!-8Z`P?%d#_e+
z_1(IjnE*A34_LNaPp7RDSy1)8_Xa)EHTt~7&M~#_%N)!mlW}&5`ZJF8a^n7oz{q`)
zv0&V-NtUG`xhIB&8DC@UuSRA$<d<Z<+7}XkhpM08ApJ1GTspzLAUUPb_sl;Fets@i
z<-dSv<zE%GAqGlObhawy!vc+32NwrO$hhsKMa3O!Hf3>>r;c@+c+Kzg5jD5Axv~u^
zI_6JG!P9V9^mYDMz%F@M68n%tSGzT0Oh%0Wx}eWCwoPP4bu|cXe)0w_;v^M6-$npX
zsJ-|JEikei^d|p68^JGuQu39o@6cOonfk=KYAI&UtQIq~&XBLqzw0HyH{pJ3&+rd8
zBL*e=<ZEMlS-^69<1M<jcDu<%ay0omEC$YaU`}hn^JBEk%Mwgn0!Q`mJQqE?zNc*u
z7f?kt?|Dygr?5g^A8K~B=2#wYP{N5~$f5|QY}J@EXt=_3(KZjJ$ufLBuF8DvB&OM4
z#h~}jNbCWBt<~9zL-N{`K+V?U`kg9?6CvUIXvVAab9L3SeXqPcSr%GhSl0l*J<pRP
zB8w%=tjj69S($sT-Md_?J-9c++N>q&DuG56#pu@6QEPRn;Vdr2RJQ}=jOExDN;5|e
zJ`Z)YJ|XK8XKy#OR4d`ewP<pzBfm=TdH>|C%cuh%1VYu+^A(<+yA|6u_J&z|xjr@F
zhPlQqvhLOO%4#KdO+(qZV3V3M&!+>9NY~rd5>=W5FH57b&EiyVo82)IQ@KWLaavvQ
zlRtt0oW=bP@y)DCib|X%T!i?3{9A$kSAIV@P(Te{h1};~Gd{G+)&TQ^7%W<xc;Aq#
zY5hKUSC^60I&gAWi<FVFySV)xIDLj@tnmHfxAJSjs)#M9i$mrbttTcuyrT1<<qs0~
z|L$vjIBM{<)?{=FB9hGMrSI)>4H|_NC%Rv7xmeN=hsTQ=-p?QBynWqAw=V_E0^rqP
z6uS%{>rsLw^3F-Ot0GQxp5jp*JZ^G^(m%EXgax}e_4+=Y%3ya8g|KP`6YK*OKroHE
z)8m$!-J^<6UevQef${NAW;gw}{+u*S=f1umd+$dEm7+W#yc`U8(B^hv;O7|7=qjCz
z+GXcA6SltBtTPTq#+m}*wqp+~PlN}3g7y^KhGT^5QG0L)-2H|p5&ZhQNgg_-?~2|s
zcv)JvR71AMVH|Av2=7M-zwP>PGPR2>G>k<9j~PvMPRf!R42g*4Wdb@kMV;X~Hf)?O
z_@Ifr<`{5KF?NJ!Ru+^U)I}|z{@Q0|{vv>a*%wLU7k(J=lM4<pIQ-xEm<6cOv<*n3
zcQ~F0i=uPUksMPdlvRUo`O65vMEUXp>%JsP8Nd9-^UE8t(p0pzfgD%n`b+%SXSNau
zcN0PJ#-u>c+|Ez<^qvtgh$(+uQ^h)gL<N&2Kd|Rw-S<<z%?2|mFyS8mJ>mXM{r0aq
zrqCJE0<IOsEka0P@0t}Kuf{cSx8aCw@5};zd<+bj0l!v;%!pvu&u-$Qqa>>gE^vcb
z>Ac^azIOsdr*t|Z3xXxA2~xaA;h}6!fH3;x+3Vs?7Q#Ja;s{E$NifEzR^nYxo5(%Y
zp;H1;Fxd}ryWSdKTP5SZy1d<*5?i0%KgH49lyjNS7bS{S?#3#EY@L8*`wFIbB^I|W
z+Bi`!spkr^u;6*=iTekYj&5ro^nJMU_UelXga8&fg4}^M>@2pp<pJXvvTv4oj;l!^
z^199C>V8IrU-|H=_j}RS2dZ0*j~ErUJb)ETb-x<44mMD_B+P7-4zir+;$Cr9+jlT@
zJTAppM7%GaWxu9;_qz;`M{^%dvSEw{;`WymJp#dtmlXw4@w-w#a(X4mXgeQoOGpJ~
z(+cA8qr5V8o!@$?qi?V$dSh9=FLJCxYh&5XB615nh)ZjdH9mJmxTR-u;T#1D`7A5i
zM7}eF$dal=P}Tr_lVDm<U-_C3%wx;~o6mDqROH}bZgT}iEWb%gfnzagruYK+D(bF|
z=%tZ$c!3(B$n?`KF7gJ1Wcc(g>su9h`2q@PgomvWP|!(ee5k{W?`{LXnB=7$5G{Oa
z2e=E^4*tUSJ3hCNEcg4ek=sBhpO-qW?zI8Y0gJND{hCLv5bN2Gti_$jIQ7ChLky3v
zN4b<Tg~8551a%h{sUUP~S(ld>0hZHv)%wID%9dLzS|T?%5=ZS^Er-}s+DDy*+j}{F
zi!)a+lJ+Yv)#1|j(F^r^KighU-tG_ELX4ok85-7pcnY<dyq8yoXcim4W_NJbpGu=1
zdQ_jZUnSujdS^_v5+F57h{Y<y>%4h}F`?i3Oo*nRlImA%OXHgn5m|1{rQz-pn4qrL
zGOF9$e9Nyd2)uxP7Fx}Ew2~xb<v92;i1TH_7wNP6ppQSK58j}px}@^hKE&FdLhfu)
zZfM_*<oyW#LysCpxr>+iDifcGDC#5M=p=7d)W1UOdC=5(7KQjcIp9>10l1iV`D@?A
z-=D|VorA?FF9Ok1iB|7QCXyZQB8Q2KU02^933zZ%Ri_|l<V=<;BBshTz&i&pv0%6+
z-N~O_vS9>oKn4{gdd4vJW=9%d$|j%(V&3*@9{sp*&k`1}p4g%_)53`GX@Nl}zWAmF
zM=6CBO0bC+SHr<|0t^3sS3F$=2m5Y-g+G@(zTLa1{=`z7sz>+VTuV*S6PL;*xea&)
zETIYPmfsL>;C06bz=ZZL;)FgQho<E?SH`5H>`$!@k7Wn2X1@f0DB0y(1h8hevNd0m
zE2Z|MN0|B?BOgH|dhC%p$J7!9>jc!&sG%sn5`rj@JvVdvc#7nB&H7B6&u+Os9i<r~
zaAiqxM2EgtReG7jQ^{##d%e=oJCT0=e)~?0#2PEiG=)GUJm_=bl?K$=TIN;CXilWU
zw^Bfj=CGN9OBq+iJndB$EF5?>ar;Fr|Dna^!SDr$F0mgC7UQh5;NTnjT!V-5Ys?0u
ze%F0<3nA|mHvc=h@s={tNwsi=anT?5>k{KVrU(i=E-|;VU+q7!?*XJy&*oq2S#0mJ
z+sX6N)A(f_0Q76f)q6oD805^_2IRn*GSEvBqsZXYr5Bc4P$gQM7{?rlaS%X^quSkd
zY~EZyVJN+cjI66pTz^Rhy|Oa?Gw~H3^vMfW(*SHtjzcc%S%`LfEMneFA1wBr#Wo=8
zvI(y+T6ISTu!bcaB_|$Qrho5$-p%TP&I<2NTX>7Fnw6D+XhZl8lN-}<e3%)0_qGkl
z2E=?UAI}9u>Yt<W!q3GBdgNV6IHm&=54oP83f-S*>MB)Wyix<z$)2)B@%nSCdtRn}
zuQKpeZOWS79ouQt;8VK76WFe8ZpWg0y&-mWu}4Fm5V`kW+r7-zWhaTU#~D5Tr*6qD
zynadbxK4-mVKc>iOK#K``(i=^fC__^@ELB@j>eDPdDjkaQ(|8)$tPb&8(s;!hmeyF
zJy*FUf~Ix><MpG7b-=T<&%9e&4A-2XX@7Cg3^eJGKH+gxLbdZY{476BED*;9B=$DS
z&7_<xO8D0G-Y6Tc2>*CaD#LBSR4QBIoyU@x+r@y{c3H?3aC}C)J&UrQU3F9+!7yuR
zRb6oGw{RCXZ-J}$4DqK6yp_DWMOMOh_4b$QoFA4Pplgc-otTx;wy7V&%Ln7A4fq{6
zaSXj4DN$7VDPP}!<{?%FF%cK)F!uL(fl)&$SjWsCQ>n{~sUQ`Q_b-jujw^>L>%(Ph
zu6Xa!2uVPNCF@HHuet1dT(WiThU3BXx!=C5XIx;y+vE9kAePXHXs^iy=vD9K%3IBB
z1;6RI;M~WhIcSWRVN-y&kLTwm<wsNBG)45pL~BLihZ<GI+(=1YhKDd~Y@xLFFgkOm
zdFJ}#X9VlG#$66hLZD$3R`ndsiwVwag&TgIH%WvV`Wg~x%DXVaCnD<h#>HMj!ztcs
z4Q1O!HD^_w+-JAg6CcD(T^Y<Z+wZTGCq2_bv4nyRf5F5-ib?}By3UW{r%B-OymWKY
z#58YG&isk_+3;jRs`_ce%Qo3Y55_ar6Qlp)bfgsY;h9T~zRPlzd^MnvE{(U~wp_Dv
zyapH52+EUyo@QK$x8LJIYoV67Pam)^tT^C(SR+0Lcl;YSC;gIw&3HH2_3;KI<`RG?
zUt-`M1d1H2ZsM2f({9dyS^n=s&A%0rG`><xKvWbwG#KG7-zuWAQ?p4{B&GFS#MoyX
zm=yeew{>&aE<cAeWAMUe>*lLucKj))`FGGCvrfE!wAjI7y-3e?v&HT)YzyYn1}`|e
z|6m9Rf>0V_EC+iN+@)@C17fu<`K}RUvApCO4QM)g2Fj8V>Ng}ZkrRs$+z0afChCNr
zIkU;Eh&#NrmqCWGz&S*W@tSmTMtTx+XYGy2WXTkdEc=##k-W0Uqv5-vuDJF*5*i4T
zJ{)RZBOxEKCQL`J!`Phv1X^)SEv7DO8<w|EI-OmQX6CRXHXt1MKY#~B5pJ`<+<=TI
z|Bh7}Nvf^wr<yV{X`ngm@W|a$hm!|IQAp$J3Y~DV{@LB9qPLsVE_}V3eqc!ZabIX=
zKo7`axSK~fsHxd=cp^tiq_z!dawmq_#1+LbfQ_K)J*8+)@r-$TBaX9|rIzD<epNf{
z97D1Dqc4w&!nIERJjlhn))$VB+=+4C(cWRm2kHm|T=7benJH#HH-Y`=H=i_M+!nq9
zK{oNbkuuNvwjzxrZFi<a8X&<!rEW^R@9&(aJ4d(Kv2SEP8xJFRr7n`-Kz^V%6K-7r
zSkOMb@;`8p>EH7@L_vMI>4`voOmy^P8FlOJ1M?>^=g{+rd$#bsAgntVS}n@VCpEm=
zS(|8IZ!H$9X3D_Ew3J>rkRbFM)f+z5=D0&O?13?)FBhXnL)5oJCL*6t7%Bt!OIHf!
zUz0+WpL~sP8&5RTZFZM-32m!?<9sC<GD&s?2~F_3$NWLo+d2r;eb~>6GkiRyK|{gV
zqF6`3dj!FUL0SM2BQF=fKrKo;XS3ciECx?AjdwT3eD(L=8ug+X!ZQvzzp|{{NL+=p
zwt}(icNCDTXM!ZhZUDMAASN4-5b_@wH2gCegb45q;XffG+h<joKUoF-GRu7f@)Sfv
zP=A0;AqGxS^Mn~xEMgG>?EC`?wff0oAWOI9Km24iX1_W+mM?Ji-}lw}sAc1-2%1(M
zD%?r1&&swACkq<gS;|VEotrbcyu{o~AxrjdpbR=)Mqm4JS8Nol7jgA&qs`H%RNicj
z@drjyY|iq=+dKY#i;DnL&Mj<Bj0`%ig^nZmXT~Wm#+uc3IhtN451Bvjj-@C+t9k_m
zyFs6h^qcXt>lzic0=x%~YiVv%^?_>BC!G*X>v`&^W+r&@d|O?xT$qAy{YH*j7B_f0
z5F!rIS%&rns{CmH_p2Y0s=z5`OXIHmN|x8BoAPJtKTWW?F{j_-=0(M<+=Zp2Zm1Tv
znQ(>KvLqP9QFO@7W1TnQ!YoALYumsJwza~o^Y6#~TBOph6ep<EqAy=4RJDNIq`oGx
zQebrlbcs+mHV7Ji-NiP8p9Eb83B1pF*h*M!20TJvHXD14VY7NGEPK%XeCHS7z-%@q
zBhuygjmpZ<db$6X7G@nQOh1Dk!!h8r5syy%BGZT;*MgtFj^GA`M}KjIQHLisAg($>
zh<=X|=6lO(o{rDowQJS{E+!W-NHya$G6k_6W2|%r>;`eO<I!2cc`cu>&L21t=D9UQ
zL4f!UBxq|gsrM1rsA1q0!9iIXZ;2L+@W(U*$-XAGM%h6uaZ!#knfN?LefGm!=liuf
zq5>QfhH^ax8xU_&q^;BTPhRM-5!k)CV>qea{M1|^aa9#Qqd5<I$C;Kjgbr{CjiI_u
z-d0SE%;~pt@9nU&yKt<~!G7>~Q3KCOh+U6S-3tf=Y6O`CRYEW}d;-CP70U!1DBz5o
z<6&7x3+)F63k2k(6M_=#5)}d!>Jx7Gb^Sby?`I;K;P;-N2y5Wg>!!|M#|x8Zki7_y
zUist25(_#nxU|xsyoC^n=c#g}aRsd>>l<edyUQ$mSilktg`->hD*`;JX%(W4s=9q4
z@P3m~eiuSyx@gmSWEq@QoWaK#H3x##583<i9TtpsHh&WMZYEn4F)F++K;;29iTZ+Y
z;7JH$E_R2*$Xr~FsH`y=B)yge6nHdA3ihd|q?CR7T9s6%+t77!`j)P}5II_BLAr!%
z%PXRCN{GYyB_q)|To}`A21O?dVbJG4%^AvJZs}Q^>GU|14uhVNwu#@NV(qWWK@q<K
z&vGxDNnvh%^L7+{Z%}(}cxP&yu7CFi#HpBrj^ky@-t)K6+^NjxWT%mq!5<v@v2eM{
z#pU?6u?<K77ZGbCd4x&0JNxQe=TggC`MFZ5V>iqDX3t(cG!@Os*}DCECS*xUaLRsV
zMq?B%s9IV^hjDo*WO9_iAm(};6FaAl-8t(<EJ8$8slG}^#OVt^&kX)r#E>own_Tf%
zF@hh$gw-a~vGUBu{84jR4-XjMl*)vz>sXyfp4{-T#oP@_M3OtVh}!Jw(1vLAQp4L|
z<UEO>e|Aj-Gu<#O+Ec7YjWU#UFgV#6WNr_Ko{>0~d+v*Q_j}$@-v>sEBu~)egCW;s
z1$6iko2?(P=?Dss{>tnB?C?3Jnx2kJ0_~Hiee9-{2Usb<fl)?D9=et;$`p#oxrP((
zF3^b^SpC+kqx4FG{57RTO`oaK2-fU<VSC31e)9MiuSPIrM-2%K(g`5YkB;E*z=?L<
z%nKGb&IN>~u9k<3Bx<;G_k<`NJ*4o|$EfkhR!GoXyqL@m9!|s6f>T#g<EIl&Na?3{
zf1aVoX1UGw!ZlhDNs>ENAiGc0%0hV3eNhMuVg!^YZ)s)rW4$}R7yC9;$G-q{JznG|
ztwpLQ$CtY<+CC1MjVhrl-zAGzUD`27YMg~0Ez!5&BaB4cZH<`QA73IAe?wJEN}gxz
zh_*UMKJ(-hbyRSWwU05}B{2{-`Xt}xu={NnmydTC5c2}tm21DzoXn4C=7E&Yzrauw
z#{IaQ+5rTaRdEcI0CZLmp;%eq$%q8MdK$C=yq#wh9uJ>C$1q*cf2}hF`sWLZ(lI_4
zJ||x1d3XlEGg?OHD0mLQukN;YJz^^|dqUT<G|5wu8tOHGj^a*KN6Y8EP&jw|1O%-O
z!k4}O$pQGkbb2t|LcT@-bED`D$g=D-93*?2{FH3eF~=$JXyR^t*r@XMYp7jS!a`{z
z_rAU7S`%^Gt1cuS8g)x;JoBpNh^_WPJG8juBaSzJD<%I6>-99AqQY{V1ql+jifX4p
z&i4)aYJytBPb_l;E!(VqE)F9N2Tm(iNZ`j%9n(WUvk4%#wfvgL<Ln;JsCCnaI@({d
zCN@P8$Z<2noCOf-LbRVX-+*u;(iG+PpODSzQs<b*&(M4jXmA*ZlO7=Y@?}ZKQglVt
zg^`zbmkt&`j=p#ygT76aYZp0)<1;g6-9EpZfiyB|yP?uP=J6ikP6)AL`7%5+Co2go
zDYh<SX8I*2pq_s`_}<i=$Co(_FW#|Ho#*3dG}3qi&I9k5$J#g|Ylqc~Bn5%DG}6#?
zwZtQOs}$F4;uyO14*P6;s?e12d1u&7kfWludCHjvfzWR?<6%vfZW&y}vp;_Rk93w0
zY2vHIwoTDTH~XgF6kZpE`TluV*h2T;`=QKqifo|(Bz5)SY^qhNr`|kFr%i2=&={X(
zM-McSdEZEvI>Ify*1yO)V&E}`+KaViB4_<Z9nXRi?srFn_tXd_Xb>aW(#MXxg8Y~%
zo;PIO-OZEtK#Nm*pp^qX+SJg_kLws#?Pl|`#KHN%o=<mS$W8GD&YhXsFti)UnQeiz
z#IM}Az3b!}eg;Kzl&ZtO2IsB^OPwj}y7ZRuE$u~Hp;DJ({c=aM!pEt$8T0l6gFFYC
zcO97*E66yWMniB<HfK$0x;WcxKrX24OiVaF)%tNkNLGidkTY#fmH;%6!>Wml@Cg9E
z{FN7SyL8xZI1#*ne?88hw0)h3A2aU-U5_8rmQ7sRv2u+;>|bFYOY>$oAq4Rtjm0tp
zb}Dm`Ww9wU0<CRx4Xliyn@hL32AdirVp|+&fFN;rb8Qfa)_>&9&DH*Qe$zkiI;oP*
zcjEhY5SZE&d;HGO34yY#_tuJtaINof3d^9!$^Rc@q6_+!JC7{yqyMmtVt+!tIiJGJ
zHIT30dnQLK3@7bo()NkvkWE;OzFI4Jn!>g}YTj%nn+Gwv+Ftwsw&jXd^JR1ChwbHy
ztCrAKA`ki6$5VB~hf4rYCiP5Mt)ZSYC1={LX-%x&$QfFeR&LIB+~o$}h1)^{<TK7*
zQaa-jDuYsCVeN&6fOwxGQ;Oz&4;)J7M5o}6bSV1I%8XpfC_i(q(CFr+1*w=+)sYw`
zO50?F5Q~3(Icy}b0H@AUyn6-XV)Mk-v2*lGs(t&1)(|xJ5CZ~PFoHXDI9V_pmY)K_
z%>MKi%Icd7Kq6H>kQVENczvz3fE?`f<$aspYc;va)a0dyLyHnna@RgIi?04}57;sL
zF&$;Vw|xt9kg)_#4VHuG<ChlLg0XaE(Fp{Mr4dX59-b@hDE5$6_6-V<JJIc;4Xuz8
z;VKx}cA_EByK!#dP2C0rx+J2nammx4PQ2fjyEP(gaqEKDSHF&|ef&JZs_kv6M$Yng
z8QwEpqu#Mj`-+&<*`0x&`4<PsDbF!b<c~#9S%hkFO@fk4<Bg^pW5g<Lh&bytW_)?E
zLdA6bDPv(|+ibqpY8t9R4iUmvXdj<`BY^YB89TEjbXb+>Gpr$_kw~NRxhJj~M(5NQ
z4qg~2=#DjRmg<Q-7%d4dXo6pnfCrDcz_s0SpAH<?JrEZHGRQukeZc|yay>Ovr~QWB
zFG!poxxbXbfavs};Euo_AkXIOe~NbGUmoyp{T>)GdkSojH8DW}!8!_TkY$rFPGPKN
z67<)dyKczTpH$YL#EVXJCkaLu`(!{9a!p*3yaew6ha3FKp4(*5ZT=kP;^cm>?kT?9
z+zxO|KuZ(_IKCqGZjMljz)L2}6Zg2qN>+H6%_OrQ%iB+#ZzbP+KDnUT6)LzQw@;#i
zq=@2Wcf#LHav61d8X95@;g8%0sv>iymzgn_ghsFrN6>Sj^^2U<FET=EIG=`$Y#SHg
zSQx*v0r64}A3o5#hr*UiUZX`s<EAHYnx&qG7X7vT@?3kAZcFqV+zVQI*KK>A*)EVL
zt@O0#QH-Dgw@xjzw~*z}n4`e=6bE!-FVNQY>XU=;Eu}eLZ(bPOAL8SlF_S96@HWlJ
zyN(1!YQVza{QBn@2@@6Z3fH}2*>P=w2WMquWf8L+MsVbkC{%uZhG~ketUkP+%N9wf
zB;CdCXC|GFCKLm>_2M-1NB#lkCdVA$HK`A4azG?zD<=aPk>%hW2@Y#5t9zSqOFw=8
zC)XfH>Wl^7I-SZHVwmx)ie8H^Ce)Gc$2Sg30QYEL%*Wsg{8Rjq6-$v(<*WS3i|68{
zE#DhkYhBSTldO`A=-U**LG>BXm+Dv0q_1J?Zjfh5-dJ&*dg>Y>qa6V2MN!Eg?5WtM
zc~QhYU$!pLJc0;Ud4nlyv=@$$IF8LfWfdnXw%#r4%i0$Ff~#=d{cX7LxTVAQu@E0!
z=1|Zw={GO3H(@)FJ?f}OL*50$Y?$?yjNqLX3B$GAvx?odTU-$JTzpGTs2%Jsq8J5V
zXFBRAJK`N7Lov_6cxw#|=Bzk<EsWRH)i*gDSla*zf;J)s<Z0tIj)+-M!K_%2q!&}0
z9qRh7T=tpSmP^lNwSSb(jwk5$MO%z|6Hp~$>P`|};{ZS7yKzMC5+)JJe~mxBQw*0_
zo1|ZD$5nAkQoggAkNqB;#ZbL-3O=ZNHD0D{v;y`7oDE?jJE(E!3S0JqIBtfxdR6FM
z+Bq9YZf<5s?hykwu8+8#>~=|RfH!-{0dTQUele_~wK|;fjc(N~HmClum8#N$j)Rum
zKBH=kP~i&KC;Ac3Qv2S?jkuKRtQcnhej6PBnIh$Q_E&nIO;!Cr5Z-_F*}vmGu2#e^
z#a$T0uiXYXeEAQRk?l`P%9XvK$G9#wif-aWL^mwgi-QgOL$l%|d^vHrM;sH~)@T+;
z3%CdN{IwA@qn)NQ_%;||MO+ADgA;}R;~TYF8{*Vr_@DwH{RTYn@y(-JauR=YX0@z$
zb<Jmg+}kc@CfmM;H%nw}SCIAizdQ*^D>|}YZ-iRET{`I^$Vz#e@`TT6O2SYtD8ew?
zF0_JW5!iyS)yIjrpLVESCB;@th<}GOqA}^;cyqb~^JU~Ghc+Ooa7HEq_5KsI6~TBf
zsRN?%V;9%6z!{B)=c5ZOymLpE+ZYv+P5B4d?f7DgYQyF=n=??%ch>ftlVYFK)~6>I
z+DW6^*^jc`5b-W-(49e9a^wM5_WAX!85h)C4rPs#GQ<hUJY<XJ(8DuoJWwSvW!hdU
z*_g|XHSCiPf{W~6GLy<Kk-MA`VuvQ#Lw?+3*Ou-bXT_m}s}Eyp#pe2{Kzzm<?~A5;
zdC*-NpD|+KGYf<n%ce;u0gq?xJ~=++J@F-?TzHHIMF%=bxNcisAPjaUnoh7E{k~`r
zGY=2J*eV}~V~LDL&gZ^zHkP`O%sWw!u(<+0VGk<W5W1)TxDw;w>2qM34Upt(ht$|U
z__~<CYykCF4-uQ&+JAr`>JEtgzuE}W{(d29>pK@u%^5{x-9WvLNx{2P!#v`UVa2}f
z@cFN>aljIswep8C{%Mq(ufOP_=7}FN9DYLDu}&ngtaPdSz&ed(J3oeIo2-D7^3*|v
z=6~G${yTk=;~U~{aO0o<0C^nI8E7K+wRHmP$0?_Y*Xtd%I#N6Qx}qS-e~5jzT&g%>
z023FteS!&F_GO^&vn=vF7RZs;H5BdbzALAgGQIM$O??lTi0_!?Eot@r&TqPR+{@?L
zJKZSAJQ8rFZpD8xYa#-^`o<vdryEaE0_54DhWg-;j>u6M`wYnetO7tQ52!^g@-xTT
z;>cb2d4D*grFzE(B!fUeFh(&TDElz4cOnA@$_K+88Mf<Nuz_G|v8Nu+N;6}r&V`aE
zr0DG|?eu-%Z8)`5DaX}}pe11iHz3AvNj$Bd3EhCmT7k&!+Y!<#Hvke^`DE`Se(1jQ
z`k!<B|Lr=?ZRI<jNw2}rJp0|OI`rjf2FkUDqMiDPXM@4fW}Tk<i(Z%VwH^pRxP9;U
z%XTyyo&@vn79M|$2K=i*hTB(dA6cv^1A9-kOy9rCyp^h~EK5$No?@1kxO$EwrB10S
z9QpFxz0&N~egU4MC6e9$So{Cnk^jX!o456P0vqmO@7YUy(Ly&cLB5R~DKCD@Tz;pn
u)>xC*bU<U$_WYL6SKRv^m<Dmy3&YjK@tAMV|H0Xne^#9T7he$@{r?LXh-xYT

literal 0
HcmV?d00001

diff --git a/cpp/src/arrow/compute/exec/doc/key_map.md b/cpp/src/arrow/compute/exec/doc/key_map.md
new file mode 100644
index 00000000000..fdedc88c4d4
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/doc/key_map.md
@@ -0,0 +1,223 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Swiss Table
+
+A specialized hash table implementation used to dynamically map combinations of key field values to a dense set of integer ids. Ids can later be used in place of keys to identify groups of rows with equal keys.
+
+## Introduction
+
+Hash group-by in Arrow uses a variant of a hash table based on a data structure called Swiss table. Swiss table uses linear probing. There is an array of slots and the information related to inserted keys is stored in these slots. A hash function determines the slot where the search for a matching key will start during hash table lookup. Then the slots are visited sequentially, wrapping around the end of an array, until either a match or an empty slot is found, the latter case meaning that there is no match.  Swiss table organizes the slots in blocks of 8 and has a design that enables data level parallelism at the block level. More precisely, it allows for visiting all slots within a block at once during lookups, by simply using 64-bit arithmetic. SIMD instructions can further enhance this data level parallelism allowing to process multiple blocks related to multiple input keys together using SIMD vectors of 64-bit elements. Occupied slots within a block are always clustered together. The name Swiss table comes from likening resulting sequences of empty slots to holes in a one dimensional cheese.
+
+## Interface
+
+Hash table used in query processing for implementing join and group-by operators does not need to provide all of the operations that a general purpose hash table would. Simplified requirements can help achieve a simpler and more efficient design. For instance we do not need to be able to remove previously inserted keys. It’s an append-only data structure: new keys can be added but old keys are never erased. Also, only a single copy of each key can be inserted - it is like `std::map` in that sense and not `std::multimap`.
+
+Our Swiss table is fully vectorized. That means that all methods work on vectors of input keys processing them in batches. Specialized SIMD implementations of processing functions are almost always provided for performance critical operations. All callback interfaces used from the core hash table code are also designed to work on batches of inputs instead of individual keys. The batch size can be almost arbitrary and is selected by the client of the hash table. Batch size should be the smallest number of input items, big enough so that the benefits of vectorization and SIMD can be fully experienced. Keeping it small means less memory used for temporary arrays storing intermediate results of computation (vector equivalent of some temporary variables kept on the stack). That in turn means smaller space in CPU caches, which also means less impact on other memory access intensive operations. We pick 1024 as the default size of the batch. We will call it a **mini-batch** to distinguish it from potentially other forms of batches used at higher levels in the code, e.g. when scheduling work for worker threads or relational operators inside an analytic query.
+
+The main functionality provided by Swiss table is mapping of arbitrarily complex keys to unique integer ids. Let us call it **lookup-or-insert**. Given a sequence of key values, return a corresponding sequence of integer ids, such that all keys that are equal receive the same id and for K distinct keys the integer ids will be assigned from the set of numbers 0 to (K-1). If we find a matching key in a hash table for a given input, we return the **key id** assigned when the key was first inserted into a hash table. If we fail to find an already inserted match, we assign the first unused integer as a key id and add a new entry to a hash table. Due to vectorized processing, which may result in out-of-order processing of individual inputs, it is not guaranteed that if there are two new key values in the same input batch and one of them appears earlier in the input sequence, then it will receive a smaller key id. Additional mapping functionality can be built on top of basic mapping to integer key id, for instance if we want to assign and perhaps keep updating some values to all unique keys, we can keep these values in a resizable vector indexed by obtained key id.
+
+The implementation of Swiss table does not need to have any information related to the domain of the keys. It does not use their logical data type or information about their physical representation and does not even use pointers to keys. All access to keys is delegated to a separate class or classes that provide callback functions for three operations:
+-   computing hashes of keys;
+-   checking equality for given pairs of keys;
+-   appending a given sequence of keys to a stack maintained outside of Swiss table object, so that they can be referenced later on by key ids (key ids will be equal to their positions in the stack).
+
+
+When passing arguments to callback functions the keys are referenced using integer ids. For the left side - that is the keys present in the input mini-batch - ordinal positions within that mini-batch are used. For the right side - that is the keys inserted into the hash table - these are identified by key ids assigned to them and stored inside Swiss table when they were first encountered and processed.
+
+Diagram with logical view of information passing in callbacks:
+
+![alt text](img/key_map_1.jpg)
+
+Hash table values for inserted keys are also stored inside Swiss table. Because of that, hash table logic does not need to ever re-evaluate the hash, and there is actually no need for a hash function callback. It is enough that the caller provides hash values for all entries in the batch when calling lookup-or-insert.
+
+## Basic architecture and organization of data
+The hash table is an array of **slots**. Slots are grouped in groups of 8 called **blocks**. The number of blocks is a power of 2. The empty hash table starts with a single block, with all slots empty. Then, as the keys are getting inserted and the amount of empty slots is shrinking, at some point resizing of the hash table is triggered. The data stored in slots is moved to a new hash table that has the double of the number of blocks.
+
+The diagram below shows the basic organization of data in our implementation of Swiss table:
+
+![alt text](img/key_map_2.jpg)
+
+N is the log of the number of blocks, 2<sup>n+3</sup> is  the number of slots and also the maximum number of inserted keys and hence (N + 3) is the number of bits required to store a key id. We will refer to N as the **size of the hash table**.
+
+Index of a block within an array will be called **block id**, and similarly index of a slot will be **slot id**. Sometimes we will focus on a single block and refer to slots that belong to it by using a **local slot id**, which is an index from 0 to 7.
+
+Every slot can either be **empty** or store data related to a single inserted key. There are three pieces of information stored inside a slot:
+- status byte,
+- key id,
+- key hash.
+
+Status byte, as the name suggests, stores 8 bits. The highest bit indicates if the slot is empty (the highest bit is set) or corresponds to one of inserted keys (the highest bit is zero). The remaining 7 bits contain 7 bits of key hash that we call a **stamp**. The stamp is used to eliminate some false positives when searching for a matching key for a given input. Slot also stores **key id**, which is a non-negative integer smaller than the number of inserted keys, that is used as a reference to the actual inserted key. The last piece of information related to an inserted key is its **hash** value. We store hashes for all keys, so that they never need to be re-computed. That greatly simplifies some operations, like resizing of a hash table, that may not even need to look at the keys at all. For an empty slot, the status byte is 0x80, key id is zero and the hash is not used and can be set to any number.
+
+A single block contains 8 slots and can be viewed as a micro-stack of up to 8 inserted keys. When the first key is inserted into an empty block, it will occupy a slot with local id 0. The second inserted key will go into slot number 1 and so on. We use N highest bits of hash to get an index of a **start block**, when searching for a match or an empty slot to insert a previously not seen key when that is the case. If the start block contains any empty slots, then the search for either a match or place to insert a key will end at that block. We will call such a block an **open block**. A block that is not open is a full block. In the case of full block, the input key related search may continue in the next block module the number of blocks. If the key is not inserted into its start block, we will refer to it as an **overflow** entry, other entries being **non-overflow**. Overflow entries are slower to process, since they require visiting more than one block, so we want to keep their percentage low. This is done by choosing the right **load factor** (percentage of occupied slots in the hash table) at which the hash table gets resized and the number of blocks gets doubled. By tuning this value we can control the probability of encountering an overflow entry.
+
+The most interesting part of each block is the set of status bytes of its slots, which is simply a single 64-bit word. The implementation of efficient searches across these bytes during lookups require using either leading zero count or trailing zero count intrinsic. Since there are cases when only the first one is available, in order to take advantage of it, we order the bytes in the 64-bit status word so that the first slot within a block uses the highest byte and the last one uses the lowest byte (slots are in reversed bytes order). The diagram below shows how the information about slots is stored within a 64-bit status word:
+
+![alt text](img/key_map_3.jpg)
+
+Each status byte has a 7-bit fragment of hash value - a **stamp** - and an empty slot bit. Empty slots have status byte equal to 0x80 - the highest bit is set to 1 to indicate an empty slot and the lowest bits, which are used by a stamp, are set to zero.
+
+The diagram below shows which bits of hash value are used by hash table:
+
+![alt text](img/key_map_4.jpg)
+
+If a hash table has 2<sup>N</sup> blocks, then we use N highest bits of a hash to select a start block when searching for a match. The next 7 bits are used as a stamp. Using the highest bits to pick a start block means that a range of hash values can be easily mapped to a range of block ids of start blocks for hashes in that range. This is useful when resizing a hash table or merging two hash tables together.
+
+### Interleaving status bytes and key ids
+
+Status bytes and key ids for all slots are stored in a single array of bytes. They are first grouped by 8 into blocks, then each block of status bytes is interleaved with a corresponding block of key ids. Finally key ids are represented using the smallest possible number of bits and bit-packed (bits representing each next key id start right after the last bit of the previous key id). Note that regardless of the chosen number of bits, a block of bit-packed key ids (that is 8 of them) will start and end on the byte boundary.
+
+The diagram below shows the organization of bytes and bits of a single block in interleaved array:
+![alt text](img/key_map_5.jpg)
+
+From the size of the hash table we can derive the number K of bits needed in the worst case to encode any key id. K is equal to the number of bits needed to represent slot id (number of keys is not greater than the number of slots and any key id is strictly less than the number of keys), which for a hash table of size N (N blocks) equals (N+3). To simplify bit packing and unpacking and avoid handling of special cases, we will round up K to full bytes for K > 24 bits.
+
+Status bytes are stored in a single 64-bit word in reverse byte order (the last byte corresponds to the slot with local id 0). On the other hand key ids are stored in the normal order (the order of slot ids).
+
+Since both status byte and key id for a given slot are stored in the same array close to each other, we can expect that most of the lookups will read only one CPU cache-line from memory inside Swiss table code (then at least another one outside Swiss table to access the bytes of the key for the purpose of comparison). Even if we hit an overflow entry, it is still likely to reside on the same cache-line as the start block data. Hash values, which are stored separately from status byte and key id, are only used when resizing and do not impact the lookups outside these events.
+
+> Improvement to consider:
+> In addition to the Swiss table data, we need to store an array of inserted keys, one for each key id. If keys are of fixed length, then the address of the bytes of the key can be calculated by multiplying key id by the common length of the key. If keys are of varying length, then there will be an additional array with an offset of each key within the array of concatenated bytes of keys. That means that any key comparison during lookup will involve 3 arrays: one to get key id, one to get key offset and final one with bytes of the key. This could be reduced to 2 array lookups if we stored key offset instead of key id interleaved with slot status bytes. Offset indexed by key id and stored in its own array becomes offset indexed by slot id and stored interleaved with slot status bytes. At the same time key id indexed by slot id and interleaved with slot status bytes before becomes key id referenced using offset and stored with key bytes. There may be a slight increase in the total size of memory needed by the hash table, equal to the difference in the number of bits used to store offset and those used to store key id, multiplied by the number of slots, but that should be a small fraction of the total size.
+
+### 32-bit hash vs 64-bit hash
+
+Currently we use 32-bit hash values in Swiss table code and 32-bit integers as key ids. For the robust implementation, sooner or later we will need to support 64-bit hash and 64-bit key ids. When we use 32-bit hash, it means that we run out of hash bits when hash table size N is greater than 25 (25 bits of hash needed to select a block and 7 bits needed to generate a stamp byte reach 32 total bits). When the number of inserted keys exceeds the maximal number of keys stored in a hash table of size 25 (which is at least 2<sup>24</sup>), the chance of false positives during lookups will start quickly growing. 32-bit hash should not be used with more than about 16 million inserted keys.
+
+### Low memory footprint and low chance of hash collisions
+
+Swiss table is a good choice of a hash table for modern hardware, because it combines lookups that can take advantage of special CPU instructions with space efficiency and low chance of hash collisions.
+
+Space efficiency is important for performance, because the cost of random array accesses, often dominating the lookup cost for larger hash tables, increases with the size of the arrays. This happens due to limited space of CPU caches. Let us look at what is the amortized additional storage cost for a key in a hash table apart from the essential cost of storing data of all those keys. Furthermore, we can skip the storage of hash values, since these are only used during infrequent hash table resize operations (should not have a big impact on CPU cache usage in normal cases).
+
+Half full hash table of size N will use 2 status bytes per inserted key (because for every filled slot there is one empty slot) and 2\*(N+3) bits for key id (again, one for the occupied slot and one for the empty). For N = 16 for instance this is slightly under 7 bytes per inserted key.
+
+Swiss table also has a low probability of false positives leading to wasted key comparisons. Here is some rationale behind why this should be the case. Hash table of size N can contain up to 2<sup>N+3</sup> keys. Search for a match involves (N + 7) hash bits: N to select a start block and 7 to use as a stamp. There are always at least 16 times more combinations of used hash bits than there are keys in the hash table (32 times more if the hash table is half full). These numbers mean that the probability of false positives resulting from a search for a matching slot should be low. That corresponds to an expected number of comparisons per lookup being close to 1 for keys already present and 0 for new keys.
+
+## Lookup
+
+Lookup-or-insert operation, given a hash of a key, finds a list of candidate slots with corresponding keys that are likely to be equal to the input key. The list may be empty, which means that the key does not exist yet in the hash table. If it is not empty, then the callback function for key comparison is called for each next candidate to verify that there is indeed a match. False positives get rejected and we end up either finding an actual match or an empty slot, which means that the key is new to the hash table. New keys get assigned next available integers as key ids, and are appended to the set of keys stored in the hash table. As a result of inserting new keys to the hash table, the density of occupied slots may reach an upper limit, at which point the hash table will be resized and will afterwards have twice as many slots. That is in summary lookup-or-insert functionality, but the actual implementation is a bit more involved, because of vectorization of the processing and various optimizations for common cases.
+
+### Search within a single block
+
+There are three possible cases that can occur when searching for a match for a given key (that is, for a given stamp of a key) within a single block, illustrated below.
+
+ 1. There is a matching stamp in the block of status bytes:
+
+![alt text](img/key_map_6.jpg)
+
+ 2. There is no matching stamp in the block, but there is an empty slot in the block: 
+
+![alt text](img/key_map_7.jpg)
+
+ 3. There is no matching stamp in the block and the block is full (there are no empty slots left): 
+
+![alt text](img/key_map_8.jpg)
+
+64-bit arithmetic can be used to search for a matching slot within the entire single block at once, without iterating over all slots in it. Following is an example of a sequence of steps to find the first status byte for a given stamp, returning the first empty slot on miss if the block is not full or 8 (one past maximum local slot id) otherwise.
+
+Following is a sketch of the possible steps to execute when searching for the matching stamp in a single block. 
+
+*Example will use input stamp 0x5E and a 64-bit status bytes word with one empty slot:  
+0x 4B17 5E3A 5E2B 1180*.
+
+1. [1 instruction] Replicate stamp to all bytes by multiplying it by 0x 0101 0101 0101 0101.  
+
+	*We obtain: 0x 5E5E 5E5E 5E5E 5E5E.*
+
+2. [1 instruction] XOR replicated stamp with status bytes word. Bytes corresponding to a matching stamp will be 0, bytes corresponding to empty slots will have a value between 128 and 255, bytes corresponding to non-matching non-empty slots will have a value between 1 and 127.
+
+	*We obtain: 0x 1549 0064 0075 4FDE.*
+
+3. [2 instructions] In the next step we want to have information about a match in the highest bit of each byte. We can ignore here empty slot bytes, because they will be taken care of at a later step. Set the highest bit in each byte (OR with 0x 8080 8080 8080 8080) and then subtract 1 from each byte (subtract 0x 0101 0101 0101 0101 from 64-bit word). Now if a byte corresponds to a non-empty slot then the highest bit 0 indicates a match and 1 indicates a miss.
+
+	*We obtain: 0x 95C9 80E4 80F5 CFDE, 
+	then 0x 94C8 7FE3 7FF4 CEDD.*
+
+4. [3 instructions] In the next step we want to obtain in each byte one of two values: 0x80 if it is either an empty slot or a match, 0x00 otherwise. We do it in three steps: NOT the result of the previous step to change the meaning of the highest bit; OR with the original status word to set highest bit in a byte to 1 for empty slots; mask out everything other than the highest bits in all bytes (AND with 0x 8080 8080 8080 8080).
+
+	*We obtain: 6B37 801C 800B 3122,  
+	then 6B37 DE3E DE2B 31A2,  
+	finally 0x0000 8000 8000 0080.*
+
+5. [2 instructions] Finally, use leading zero bits count and divide it by 8 to find an index of the last byte that corresponds either to a match or an empty slot. If the leading zero count intrinsic returns 64 for a 64-bit input zero, then after dividing by 8 we will also get the desired answer in case of a full block without any matches.
+
+	*We obtain: 16,  
+	then 2 (index of the first slot within the block that matches the stamp).*
+
+If SIMD instructions with 64-bit lanes are available, multiple single block searches for different keys can be executed together. For instance AVX2 instruction set allows to process quadruplets of 64-bit values in a single instruction, four searches at once.
+
+### Complete search potentially across multiple blocks
+
+Full implementation of a search for a matching key may involve visiting multiple blocks beginning with the start block selected based on the hash of the key. We move to the next block modulo the number of blocks, whenever we do not find a match in the current block and the current block is full. The search may also involve visiting one or more slots in each block. Visiting in this case means calling a comparison callback to verify the match whenever a slot with a matching stamp is encountered. Eventually the search stops when either:  
+- the matching key is found in one of the slots matching the stamp, or
+
+- an empty slot is reached. This is illustrated in the diagram below:
+![alt text](img/key_map_9.jpg)
+
+
+### Optimistic processing with two passes
+
+Hash table lookups may have high cost in the pessimistic case, when we encounter cases of hash collisions and full blocks that lead to visiting further blocks. In the majority of cases we can expect an optimistic situation - the start block is not full, so we will only visit this one block, and all stamps in the block are different, so we will need at most one comparison to find a match. We can expect about 90% of the key lookups for an existing key to go through the optimistic path of processing. For that reason it pays off to optimize especially for this 90% of inputs.
+
+Lookups in Swiss table are split into two passes over an input batch of keys. The **first pass:  fast-path lookup** , is a highly optimized, vectorized, SIMD-friendly, branch-free code that fully handles optimistic cases. The **second pass: slow-path lookup** , is normally executed only for the selection of inputs that have not been finished in the first pass, although it can also be called directly on all of the inputs, skipping fast-path lookup. It handles all special cases and inserts but in order to be robust it is not as efficient as fast-path. Slow-path lookup does not need to repeat the work done in fast-path lookup - it can use the state reached at the end of fast-path lookup as a starting point.
+
+Fast-path lookup implements search only for the first stamp match and only within the start block. It only makes sense when we already have at least one key inserted into the hash table, since it does not handle inserts. It takes a vector of key hashes as an input and based on it outputs three pieces of information for each key:
+
+- Key id corresponding to the slot in which a matching stamp was found. Any valid key id if a matching stamp was not found.
+- A flag indicating if a match was found or not.  
+- Slot id of a slot from which slow-path should pick up the search if the first match was either not found or it turns out to be false positive after evaluating key comparison.
+
+> Improvement to consider: 
+> precomputing 1st pass lookup results.
+> 
+> If the hash table is small, the number of inserted keys is small, we could further simplify and speed-up the first pass by storing in a lookup table pre-computed results for all combinations of hash bits. Let us consider the case of Swiss table of size 5 that has 256 slots and up to 128 inserted keys. Only 12 bits of hash are used by lookup in that case: 5 to select a block, 7 to create a stamp. For all 2<sup>12</sup> combinations of those bits we could keep the result of first pass lookup in an array. Key id and a match indicating flag can use one byte: 7 bits for key id and 1 bit for the flag. Note that slot id is only needed if we go into 2nd pass lookup, so it can be stored separately and likely only accessed by a small subset of keys. Fast-path lookup becomes almost a single fetch of result from a 4KB array. Lookup arrays used to implement this need to be kept in sync with the main copy of data about slots, which requires extra care during inserts. Since the number of entries in lookup arrays is much higher than the number of slots, this technique only makes sense for small hash tables.
+
+### Dense comparisons
+
+If there is at least one key inserted into a hash table, then every slot contains a key id value that corresponds to some actual key that can be used in comparison. That is because empty slots are initialized with 0 as their key id. After the fast-path lookup we get a match-found flag for each input. If it is set, then we need to run a comparison of the input key with the key in the hash table identified by key id returned by fast-path code. The comparison will verify that there is a true match between the keys. We only need to do this for a subset of inputs that have a match candidate, but since we have key id values corresponding to some real key for all inputs, we may as well execute comparisons on all inputs unconditionally. If the majority (e.g. more than 80%) of the keys have a match candidate, the cost of evaluating comparison for the remaining fraction of keys but without filtering may actually be cheaper than the cost of running evaluation only for required keys while referencing filter information. This can be seen as a variant of general preconditioning techniques used to avoid diverging conditional branches in the code. It may be used, based on some heuristic, to verify matches reported by fast-path lookups and is referred to as **dense comparisons**.
+
+## Resizing
+
+New hash table is initialized as empty and has only a single block with a space for only a few key entries. Doubling of the hash table size becomes necessary as more keys get inserted. It is invoked during the 2nd pass of the lookups, which also handles inserts. It happens immediately after the number of inserted keys reaches a specific upper limit decided based on a current size of the hash table. There may still be unprocessed entries from the input mini-batch after resizing, so the 2nd pass of the lookup is restarted right after, with the bigger hash table and the remaining subset of unprocessed entries.
+
+Current policy, that should work reasonably well, is to resize a small hash table (up to 8KB) when it is 50% full. Larger hash tables are resized when 75% full. We want to keep size in memory as small as possible, while maintaining a low probability of blocks becoming full.
+  
+When discussing resizing we will be talking about **resize source** and **resize target** tables. The diagram below shows how the same hash bits are interpreted differently by the source and the target.
+
+![alt text](img/key_map_10.jpg)
+
+For a given hash, if a start block id was L in the source table, it will be either (2\*L+0) or (2\*L+1) in the target table. Based on that we can expect data access locality when migrating the data between the tables.
+
+Resizing is cheap also thanks to the fact that hash values for keys in the hash table are kept together with other slot data and do not need to be recomputed. That means that resizing procedure does not ever need to access the actual bytes of the key.
+
+### 1st pass
+
+Based on the hash value for a given slot we can tell whether this slot contains an overflow or non-overflow entry. In the first pass we go over all source slots in sequence, filter out overflow entries and move to the target table all other entries. Non-overflow entries from a block L will be distributed between blocks (2\*L+0) and (2\*L+1) of the target table. None of these target blocks can overflow, since they will be accommodating at most 8 input entries during this pass.
+
+For every non-overflow entry, the highest bit of a stamp in the source slot decides whether it will go to the left or to the right target block. It is further possible to avoid any conditional branches in this partitioning code, so that the result is friendly to the CPU execution pipeline.
+
+![alt text](img/key_map_11.jpg)
+
+
+### 2nd pass
+
+In the second pass of resizing, we scan all source slots again, this time focusing only on the overflow entries that were all skipped in the 1st pass. We simply reinsert them in the target table using generic insertion code with one exception. Since we know that all the source keys are different, there is no need to search for a matching stamp or run key comparisons (or look at the key values). We just need to find the first open block beginning with the start block in the target table and use its first empty slot as the insert destination.
+
+We expect overflow entries to be rare and therefore the relative cost of that pass should stay low.
+
diff --git a/cpp/src/arrow/compute/exec/key_compare.cc b/cpp/src/arrow/compute/exec/key_compare.cc
new file mode 100644
index 00000000000..f8d74859b01
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_compare.cc
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_compare.h"
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+void KeyCompare::CompareRows(uint32_t num_rows_to_compare,
+                             const uint16_t* sel_left_maybe_null,
+                             const uint32_t* left_to_right_map,
+                             KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows,
+                             uint16_t* out_sel_left_maybe_same,
+                             const KeyEncoder::KeyRowArray& rows_left,
+                             const KeyEncoder::KeyRowArray& rows_right) {
+  ARROW_DCHECK(rows_left.metadata().is_compatible(rows_right.metadata()));
+
+  if (num_rows_to_compare == 0) {
+    *out_num_rows = 0;
+    return;
+  }
+
+  // Allocate temporary byte and bit vectors
+  auto bytevector_holder =
+      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
+  auto bitvector_holder =
+      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
+
+  uint8_t* match_bytevector = bytevector_holder.mutable_data();
+  uint8_t* match_bitvector = bitvector_holder.mutable_data();
+
+  // All comparison functions called here will update match byte vector
+  // (AND it with comparison result) instead of overwriting it.
+  memset(match_bytevector, 0xff, num_rows_to_compare);
+
+  if (rows_left.metadata().is_fixed_length) {
+    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                       match_bytevector, ctx, rows_left.metadata().fixed_length,
+                       rows_left.data(1), rows_right.data(1));
+  } else {
+    CompareVaryingLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                         match_bytevector, ctx, rows_left.data(2), rows_right.data(2),
+                         rows_left.offsets(), rows_right.offsets());
+  }
+
+  // CompareFixedLength can be used to compare nulls as well
+  bool nulls_present = rows_left.has_any_nulls(ctx) || rows_right.has_any_nulls(ctx);
+  if (nulls_present) {
+    CompareFixedLength(num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
+                       match_bytevector, ctx,
+                       rows_left.metadata().null_masks_bytes_per_row,
+                       rows_left.null_masks(), rows_right.null_masks());
+  }
+
+  util::BitUtil::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare, match_bytevector,
+                               match_bitvector);
+  if (sel_left_maybe_null) {
+    int out_num_rows_int;
+    util::BitUtil::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
+                                       match_bitvector, sel_left_maybe_null,
+                                       &out_num_rows_int, out_sel_left_maybe_same);
+    *out_num_rows = out_num_rows_int;
+  } else {
+    int out_num_rows_int;
+    util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
+                                   match_bitvector, &out_num_rows_int,
+                                   out_sel_left_maybe_same);
+    *out_num_rows = out_num_rows_int;
+  }
+}
+
+void KeyCompare::CompareFixedLength(uint32_t num_rows_to_compare,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    uint8_t* match_bytevector,
+                                    KeyEncoder::KeyEncoderContext* ctx,
+                                    uint32_t fixed_length, const uint8_t* rows_left,
+                                    const uint8_t* rows_right) {
+  bool use_selection = (sel_left_maybe_null != nullptr);
+
+  uint32_t num_rows_already_processed = 0;
+
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && !use_selection) {
+    // Choose between up-to-8B length, up-to-16B length and any size versions
+    if (fixed_length <= 8) {
+      num_rows_already_processed = CompareFixedLength_UpTo8B_avx2(
+          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length,
+          rows_left, rows_right);
+    } else if (fixed_length <= 16) {
+      num_rows_already_processed = CompareFixedLength_UpTo16B_avx2(
+          num_rows_to_compare, left_to_right_map, match_bytevector, fixed_length,
+          rows_left, rows_right);
+    } else {
+      num_rows_already_processed =
+          CompareFixedLength_avx2(num_rows_to_compare, left_to_right_map,
+                                  match_bytevector, fixed_length, rows_left, rows_right);
+    }
+  }
+#endif
+
+  typedef void (*CompareFixedLengthImp_t)(uint32_t, uint32_t, const uint16_t*,
+                                          const uint32_t*, uint8_t*, uint32_t,
+                                          const uint8_t*, const uint8_t*);
+  static const CompareFixedLengthImp_t CompareFixedLengthImp_fn[] = {
+      CompareFixedLengthImp<false, 1>, CompareFixedLengthImp<false, 2>,
+      CompareFixedLengthImp<false, 0>, CompareFixedLengthImp<true, 1>,
+      CompareFixedLengthImp<true, 2>,  CompareFixedLengthImp<true, 0>};
+  int dispatch_const = (use_selection ? 3 : 0) +
+                       ((fixed_length <= 8) ? 0 : ((fixed_length <= 16) ? 1 : 2));
+  CompareFixedLengthImp_fn[dispatch_const](
+      num_rows_already_processed, num_rows_to_compare, sel_left_maybe_null,
+      left_to_right_map, match_bytevector, fixed_length, rows_left, rows_right);
+}
+
+template <bool use_selection, int num_64bit_words>
+void KeyCompare::CompareFixedLengthImp(uint32_t num_rows_already_processed,
+                                       uint32_t num_rows,
+                                       const uint16_t* sel_left_maybe_null,
+                                       const uint32_t* left_to_right_map,
+                                       uint8_t* match_bytevector, uint32_t length,
+                                       const uint8_t* rows_left,
+                                       const uint8_t* rows_right) {
+  // Key length (for encoded key) has to be non-zero
+  ARROW_DCHECK(length > 0);
+
+  // Non-zero length guarantees no underflow
+  int32_t num_loops_less_one = (static_cast<int32_t>(length) + 7) / 8 - 1;
+
+  // Length remaining in last loop can only be zero for input length equal to zero
+  uint32_t length_remaining_last_loop = length - num_loops_less_one * 8;
+  uint64_t tail_mask = (~0ULL) >> (8 * (8 - length_remaining_last_loop));
+
+  for (uint32_t id_input = num_rows_already_processed; id_input < num_rows; ++id_input) {
+    uint32_t irow_left = use_selection ? sel_left_maybe_null[id_input] : id_input;
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = length * irow_left;
+    uint32_t begin_right = length * irow_right;
+    const uint64_t* key_left_ptr =
+        reinterpret_cast<const uint64_t*>(rows_left + begin_left);
+    const uint64_t* key_right_ptr =
+        reinterpret_cast<const uint64_t*>(rows_right + begin_right);
+    uint64_t result_or = 0ULL;
+    int32_t istripe = 0;
+
+    // Specializations for keys up to 8 bytes and between 9 and 16 bytes to
+    // avoid internal loop over words in the value for short ones.
+    //
+    // Template argument 0 means arbitrarily many 64-bit words,
+    // 1 means up to 1 and 2 means up to 2.
+    //
+    if (num_64bit_words == 0) {
+      for (; istripe < num_loops_less_one; ++istripe) {
+        uint64_t key_left = key_left_ptr[istripe];
+        uint64_t key_right = key_right_ptr[istripe];
+        result_or |= (key_left ^ key_right);
+      }
+    } else if (num_64bit_words == 2) {
+      uint64_t key_left = key_left_ptr[istripe];
+      uint64_t key_right = key_right_ptr[istripe];
+      result_or |= (key_left ^ key_right);
+      ++istripe;
+    }
+
+    uint64_t key_left = key_left_ptr[istripe];
+    uint64_t key_right = key_right_ptr[istripe];
+    result_or |= (tail_mask & (key_left ^ key_right));
+
+    int result = (result_or == 0 ? 0xff : 0);
+    match_bytevector[id_input] &= result;
+  }
+}
+
+void KeyCompare::CompareVaryingLength(uint32_t num_rows_to_compare,
+                                      const uint16_t* sel_left_maybe_null,
+                                      const uint32_t* left_to_right_map,
+                                      uint8_t* match_bytevector,
+                                      KeyEncoder::KeyEncoderContext* ctx,
+                                      const uint8_t* rows_left, const uint8_t* rows_right,
+                                      const uint32_t* offsets_left,
+                                      const uint32_t* offsets_right) {
+  bool use_selection = (sel_left_maybe_null != nullptr);
+
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && !use_selection) {
+    CompareVaryingLength_avx2(num_rows_to_compare, left_to_right_map, match_bytevector,
+                              rows_left, rows_right, offsets_left, offsets_right);
+  } else {
+#endif
+    if (use_selection) {
+      CompareVaryingLengthImp<true>(num_rows_to_compare, sel_left_maybe_null,
+                                    left_to_right_map, match_bytevector, rows_left,
+                                    rows_right, offsets_left, offsets_right);
+    } else {
+      CompareVaryingLengthImp<false>(num_rows_to_compare, sel_left_maybe_null,
+                                     left_to_right_map, match_bytevector, rows_left,
+                                     rows_right, offsets_left, offsets_right);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+template <bool use_selection>
+void KeyCompare::CompareVaryingLengthImp(
+    uint32_t num_rows, const uint16_t* sel_left_maybe_null,
+    const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+    const uint32_t* offsets_right) {
+  static const uint64_t tail_masks[] = {
+      0x0000000000000000ULL, 0x00000000000000ffULL, 0x000000000000ffffULL,
+      0x0000000000ffffffULL, 0x00000000ffffffffULL, 0x000000ffffffffffULL,
+      0x0000ffffffffffffULL, 0x00ffffffffffffffULL, 0xffffffffffffffffULL};
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = offsets_left[irow_left];
+    uint32_t begin_right = offsets_right[irow_right];
+    uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
+    uint32_t length_right = offsets_right[irow_right + 1] - begin_right;
+    uint32_t length = std::min(length_left, length_right);
+    const uint64_t* key_left_ptr =
+        reinterpret_cast<const uint64_t*>(rows_left + begin_left);
+    const uint64_t* key_right_ptr =
+        reinterpret_cast<const uint64_t*>(rows_right + begin_right);
+    uint64_t result_or = 0;
+    int32_t istripe;
+    // length can be zero
+    for (istripe = 0; istripe < (static_cast<int32_t>(length) + 7) / 8 - 1; ++istripe) {
+      uint64_t key_left = key_left_ptr[istripe];
+      uint64_t key_right = key_right_ptr[istripe];
+      result_or |= (key_left ^ key_right);
+    }
+
+    uint32_t length_remaining = length - static_cast<uint32_t>(istripe) * 8;
+    uint64_t tail_mask = tail_masks[length_remaining];
+
+    uint64_t key_left = key_left_ptr[istripe];
+    uint64_t key_right = key_right_ptr[istripe];
+    result_or |= (tail_mask & (key_left ^ key_right));
+
+    int result = (result_or == 0 ? 0xff : 0);
+    match_bytevector[i] &= result;
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_compare.h b/cpp/src/arrow/compute/exec/key_compare.h
new file mode 100644
index 00000000000..1dffabb884b
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_compare.h
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyCompare {
+ public:
+  // Returns a single 16-bit selection vector of rows that failed comparison.
+  // If there is input selection on the left, the resulting selection is a filtered image
+  // of input selection.
+  static void CompareRows(uint32_t num_rows_to_compare,
+                          const uint16_t* sel_left_maybe_null,
+                          const uint32_t* left_to_right_map,
+                          KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_num_rows,
+                          uint16_t* out_sel_left_maybe_same,
+                          const KeyEncoder::KeyRowArray& rows_left,
+                          const KeyEncoder::KeyRowArray& rows_right);
+
+ private:
+  static void CompareFixedLength(uint32_t num_rows_to_compare,
+                                 const uint16_t* sel_left_maybe_null,
+                                 const uint32_t* left_to_right_map,
+                                 uint8_t* match_bytevector,
+                                 KeyEncoder::KeyEncoderContext* ctx,
+                                 uint32_t fixed_length, const uint8_t* rows_left,
+                                 const uint8_t* rows_right);
+  static void CompareVaryingLength(uint32_t num_rows_to_compare,
+                                   const uint16_t* sel_left_maybe_null,
+                                   const uint32_t* left_to_right_map,
+                                   uint8_t* match_bytevector,
+                                   KeyEncoder::KeyEncoderContext* ctx,
+                                   const uint8_t* rows_left, const uint8_t* rows_right,
+                                   const uint32_t* offsets_left,
+                                   const uint32_t* offsets_right);
+
+  // Second template argument is 0, 1 or 2.
+  // 0 means arbitrarily many 64-bit words, 1 means up to 1 and 2 means up to 2.
+  template <bool use_selection, int num_64bit_words>
+  static void CompareFixedLengthImp(uint32_t num_rows_already_processed,
+                                    uint32_t num_rows,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    uint8_t* match_bytevector, uint32_t length,
+                                    const uint8_t* rows_left, const uint8_t* rows_right);
+  template <bool use_selection>
+  static void CompareVaryingLengthImp(uint32_t num_rows,
+                                      const uint16_t* sel_left_maybe_null,
+                                      const uint32_t* left_to_right_map,
+                                      uint8_t* match_bytevector, const uint8_t* rows_left,
+                                      const uint8_t* rows_right,
+                                      const uint32_t* offsets_left,
+                                      const uint32_t* offsets_right);
+
+#if defined(ARROW_HAVE_AVX2)
+
+  static uint32_t CompareFixedLength_UpTo8B_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right);
+  static uint32_t CompareFixedLength_UpTo16B_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right);
+  static uint32_t CompareFixedLength_avx2(uint32_t num_rows,
+                                          const uint32_t* left_to_right_map,
+                                          uint8_t* match_bytevector, uint32_t length,
+                                          const uint8_t* rows_left,
+                                          const uint8_t* rows_right);
+  static void CompareVaryingLength_avx2(
+      uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+      const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+      const uint32_t* offsets_right);
+
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_compare_avx2.cc b/cpp/src/arrow/compute/exec/key_compare_avx2.cc
new file mode 100644
index 00000000000..6abdf6c3c3a
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_compare_avx2.cc
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_compare.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+uint32_t KeyCompare::CompareFixedLength_UpTo8B_avx2(
+    uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right) {
+  ARROW_DCHECK(length <= 8);
+  __m256i offset_left = _mm256_setr_epi64x(0, length, length * 2, length * 3);
+  __m256i offset_left_incr = _mm256_set1_epi64x(length * 4);
+  __m256i mask = _mm256_set1_epi64x(~0ULL >> (8 * (8 - length)));
+
+  constexpr uint32_t unroll = 4;
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    auto key_left = _mm256_i64gather_epi64(
+        reinterpret_cast<arrow::util::int64_for_gather_t*>(rows_left), offset_left, 1);
+    offset_left = _mm256_add_epi64(offset_left, offset_left_incr);
+    __m128i offset_right =
+        _mm_loadu_si128(reinterpret_cast<const __m128i*>(left_to_right_map) + i);
+    offset_right = _mm_mullo_epi32(offset_right, _mm_set1_epi32(length));
+
+    auto key_right = _mm256_i32gather_epi64(
+        reinterpret_cast<arrow::util::int64_for_gather_t*>(rows_right), offset_right, 1);
+    uint32_t cmp = _mm256_movemask_epi8(_mm256_cmpeq_epi64(
+        _mm256_and_si256(key_left, mask), _mm256_and_si256(key_right, mask)));
+    reinterpret_cast<uint32_t*>(match_bytevector)[i] &= cmp;
+  }
+
+  uint32_t num_rows_processed = num_rows - (num_rows % unroll);
+  return num_rows_processed;
+}
+
+uint32_t KeyCompare::CompareFixedLength_UpTo16B_avx2(
+    uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    uint32_t length, const uint8_t* rows_left, const uint8_t* rows_right) {
+  ARROW_DCHECK(length <= 16);
+
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+
+  __m256i mask =
+      _mm256_cmpgt_epi8(_mm256_set1_epi8(length),
+                        _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                           kByteSequence0To7, kByteSequence8To15));
+  const uint8_t* key_left_ptr = rows_left;
+
+  constexpr uint32_t unroll = 2;
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    auto key_left = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(
+            _mm_loadu_si128(reinterpret_cast<const __m128i*>(key_left_ptr))),
+        _mm_loadu_si128(reinterpret_cast<const __m128i*>(key_left_ptr + length)), 1);
+    key_left_ptr += length * 2;
+    auto key_right = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(
+            rows_right + length * left_to_right_map[2 * i]))),
+        _mm_loadu_si128(reinterpret_cast<const __m128i*>(
+            rows_right + length * left_to_right_map[2 * i + 1])),
+        1);
+    __m256i cmp = _mm256_cmpeq_epi64(_mm256_and_si256(key_left, mask),
+                                     _mm256_and_si256(key_right, mask));
+    cmp = _mm256_and_si256(cmp, _mm256_shuffle_epi32(cmp, 0xee));  // 0b11101110
+    cmp = _mm256_permute4x64_epi64(cmp, 0x08);                     // 0b00001000
+    reinterpret_cast<uint16_t*>(match_bytevector)[i] &=
+        (_mm256_movemask_epi8(cmp) & 0xffff);
+  }
+
+  uint32_t num_rows_processed = num_rows - (num_rows % unroll);
+  return num_rows_processed;
+}
+
+uint32_t KeyCompare::CompareFixedLength_avx2(uint32_t num_rows,
+                                             const uint32_t* left_to_right_map,
+                                             uint8_t* match_bytevector, uint32_t length,
+                                             const uint8_t* rows_left,
+                                             const uint8_t* rows_right) {
+  ARROW_DCHECK(length > 0);
+
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+  constexpr uint64_t kByteSequence16To23 = 0x1716151413121110ULL;
+  constexpr uint64_t kByteSequence24To31 = 0x1f1e1d1c1b1a1918ULL;
+
+  // Non-zero length guarantees no underflow
+  int32_t num_loops_less_one = (static_cast<int32_t>(length) + 31) / 32 - 1;
+
+  __m256i tail_mask =
+      _mm256_cmpgt_epi8(_mm256_set1_epi8(length - num_loops_less_one * 32),
+                        _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                           kByteSequence16To23, kByteSequence24To31));
+
+  for (uint32_t irow_left = 0; irow_left < num_rows; ++irow_left) {
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = length * irow_left;
+    uint32_t begin_right = length * irow_right;
+    const __m256i* key_left_ptr =
+        reinterpret_cast<const __m256i*>(rows_left + begin_left);
+    const __m256i* key_right_ptr =
+        reinterpret_cast<const __m256i*>(rows_right + begin_right);
+    __m256i result_or = _mm256_setzero_si256();
+    int32_t i;
+    // length cannot be zero
+    for (i = 0; i < num_loops_less_one; ++i) {
+      __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+      __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+      result_or = _mm256_or_si256(result_or, _mm256_xor_si256(key_left, key_right));
+    }
+
+    __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+    __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+    result_or = _mm256_or_si256(
+        result_or, _mm256_and_si256(tail_mask, _mm256_xor_si256(key_left, key_right)));
+    int result = _mm256_testz_si256(result_or, result_or) * 0xff;
+    match_bytevector[irow_left] &= result;
+  }
+
+  uint32_t num_rows_processed = num_rows;
+  return num_rows_processed;
+}
+
+void KeyCompare::CompareVaryingLength_avx2(
+    uint32_t num_rows, const uint32_t* left_to_right_map, uint8_t* match_bytevector,
+    const uint8_t* rows_left, const uint8_t* rows_right, const uint32_t* offsets_left,
+    const uint32_t* offsets_right) {
+  for (uint32_t irow_left = 0; irow_left < num_rows; ++irow_left) {
+    uint32_t irow_right = left_to_right_map[irow_left];
+    uint32_t begin_left = offsets_left[irow_left];
+    uint32_t begin_right = offsets_right[irow_right];
+    uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
+    uint32_t length_right = offsets_right[irow_right + 1] - begin_right;
+    uint32_t length = std::min(length_left, length_right);
+    auto key_left_ptr = reinterpret_cast<const __m256i*>(rows_left + begin_left);
+    auto key_right_ptr = reinterpret_cast<const __m256i*>(rows_right + begin_right);
+    __m256i result_or = _mm256_setzero_si256();
+    int32_t i;
+    // length can be zero
+    for (i = 0; i < (static_cast<int32_t>(length) + 31) / 32 - 1; ++i) {
+      __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+      __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+      result_or = _mm256_or_si256(result_or, _mm256_xor_si256(key_left, key_right));
+    }
+
+    constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+    constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+    constexpr uint64_t kByteSequence16To23 = 0x1716151413121110ULL;
+    constexpr uint64_t kByteSequence24To31 = 0x1f1e1d1c1b1a1918ULL;
+
+    __m256i tail_mask =
+        _mm256_cmpgt_epi8(_mm256_set1_epi8(length - i * 32),
+                          _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                             kByteSequence16To23, kByteSequence24To31));
+
+    __m256i key_left = _mm256_loadu_si256(key_left_ptr + i);
+    __m256i key_right = _mm256_loadu_si256(key_right_ptr + i);
+    result_or = _mm256_or_si256(
+        result_or, _mm256_and_si256(tail_mask, _mm256_xor_si256(key_left, key_right)));
+    int result = _mm256_testz_si256(result_or, result_or) * 0xff;
+    match_bytevector[irow_left] &= result;
+  }
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc
new file mode 100644
index 00000000000..0c5f27c51c1
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_encode.cc
@@ -0,0 +1,1625 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_encode.h"
+
+#include <memory.h>
+
+#include <algorithm>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace compute {
+
+KeyEncoder::KeyRowArray::KeyRowArray()
+    : pool_(nullptr), rows_capacity_(0), bytes_capacity_(0) {}
+
+Status KeyEncoder::KeyRowArray::Init(MemoryPool* pool, const KeyRowMetadata& metadata) {
+  pool_ = pool;
+  metadata_ = metadata;
+
+  ARROW_DCHECK(!null_masks_ && !offsets_ && !rows_);
+
+  constexpr int64_t rows_capacity = 8;
+  constexpr int64_t bytes_capacity = 1024;
+
+  // Null masks
+  ARROW_ASSIGN_OR_RAISE(auto null_masks,
+                        AllocateResizableBuffer(size_null_masks(rows_capacity), pool_));
+  null_masks_ = std::move(null_masks);
+  memset(null_masks_->mutable_data(), 0, size_null_masks(rows_capacity));
+
+  // Offsets and rows
+  if (!metadata.is_fixed_length) {
+    ARROW_ASSIGN_OR_RAISE(auto offsets,
+                          AllocateResizableBuffer(size_offsets(rows_capacity), pool_));
+    offsets_ = std::move(offsets);
+    memset(offsets_->mutable_data(), 0, size_offsets(rows_capacity));
+    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+
+    ARROW_ASSIGN_OR_RAISE(
+        auto rows,
+        AllocateResizableBuffer(size_rows_varying_length(bytes_capacity), pool_));
+    rows_ = std::move(rows);
+    memset(rows_->mutable_data(), 0, size_rows_varying_length(bytes_capacity));
+    bytes_capacity_ = size_rows_varying_length(bytes_capacity) - padding_for_vectors;
+  } else {
+    ARROW_ASSIGN_OR_RAISE(
+        auto rows, AllocateResizableBuffer(size_rows_fixed_length(rows_capacity), pool_));
+    rows_ = std::move(rows);
+    memset(rows_->mutable_data(), 0, size_rows_fixed_length(rows_capacity));
+    bytes_capacity_ = size_rows_fixed_length(rows_capacity) - padding_for_vectors;
+  }
+
+  update_buffer_pointers();
+
+  rows_capacity_ = rows_capacity;
+
+  num_rows_ = 0;
+  num_rows_for_has_any_nulls_ = 0;
+  has_any_nulls_ = false;
+
+  return Status::OK();
+}
+
+void KeyEncoder::KeyRowArray::Clean() {
+  num_rows_ = 0;
+  num_rows_for_has_any_nulls_ = 0;
+  has_any_nulls_ = false;
+
+  if (!metadata_.is_fixed_length) {
+    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+  }
+}
+
+int64_t KeyEncoder::KeyRowArray::size_null_masks(int64_t num_rows) {
+  return num_rows * metadata_.null_masks_bytes_per_row + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_offsets(int64_t num_rows) {
+  return (num_rows + 1) * sizeof(uint32_t) + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_rows_fixed_length(int64_t num_rows) {
+  return num_rows * metadata_.fixed_length + padding_for_vectors;
+}
+
+int64_t KeyEncoder::KeyRowArray::size_rows_varying_length(int64_t num_bytes) {
+  return num_bytes + padding_for_vectors;
+}
+
+void KeyEncoder::KeyRowArray::update_buffer_pointers() {
+  buffers_[0] = mutable_buffers_[0] = null_masks_->mutable_data();
+  if (metadata_.is_fixed_length) {
+    buffers_[1] = mutable_buffers_[1] = rows_->mutable_data();
+    buffers_[2] = mutable_buffers_[2] = nullptr;
+  } else {
+    buffers_[1] = mutable_buffers_[1] = offsets_->mutable_data();
+    buffers_[2] = mutable_buffers_[2] = rows_->mutable_data();
+  }
+}
+
+Status KeyEncoder::KeyRowArray::ResizeFixedLengthBuffers(int64_t num_extra_rows) {
+  if (rows_capacity_ >= num_rows_ + num_extra_rows) {
+    return Status::OK();
+  }
+
+  int64_t rows_capacity_new = std::max(static_cast<int64_t>(1), 2 * rows_capacity_);
+  while (rows_capacity_new < num_rows_ + num_extra_rows) {
+    rows_capacity_new *= 2;
+  }
+
+  // Null masks
+  RETURN_NOT_OK(null_masks_->Resize(size_null_masks(rows_capacity_new), false));
+  memset(null_masks_->mutable_data() + size_null_masks(rows_capacity_), 0,
+         size_null_masks(rows_capacity_new) - size_null_masks(rows_capacity_));
+
+  // Either offsets or rows
+  if (!metadata_.is_fixed_length) {
+    RETURN_NOT_OK(offsets_->Resize(size_offsets(rows_capacity_new), false));
+    memset(offsets_->mutable_data() + size_offsets(rows_capacity_), 0,
+           size_offsets(rows_capacity_new) - size_offsets(rows_capacity_));
+  } else {
+    RETURN_NOT_OK(rows_->Resize(size_rows_fixed_length(rows_capacity_new), false));
+    memset(rows_->mutable_data() + size_rows_fixed_length(rows_capacity_), 0,
+           size_rows_fixed_length(rows_capacity_new) -
+               size_rows_fixed_length(rows_capacity_));
+    bytes_capacity_ = size_rows_fixed_length(rows_capacity_new) - padding_for_vectors;
+  }
+
+  update_buffer_pointers();
+
+  rows_capacity_ = rows_capacity_new;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::ResizeOptionalVaryingLengthBuffer(
+    int64_t num_extra_bytes) {
+  int64_t num_bytes = offsets()[num_rows_];
+  if (bytes_capacity_ >= num_bytes + num_extra_bytes || metadata_.is_fixed_length) {
+    return Status::OK();
+  }
+
+  int64_t bytes_capacity_new = std::max(static_cast<int64_t>(1), 2 * bytes_capacity_);
+  while (bytes_capacity_new < num_bytes + num_extra_bytes) {
+    bytes_capacity_new *= 2;
+  }
+
+  RETURN_NOT_OK(rows_->Resize(size_rows_varying_length(bytes_capacity_new), false));
+  memset(rows_->mutable_data() + size_rows_varying_length(bytes_capacity_), 0,
+         size_rows_varying_length(bytes_capacity_new) -
+             size_rows_varying_length(bytes_capacity_));
+
+  update_buffer_pointers();
+
+  bytes_capacity_ = bytes_capacity_new;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from,
+                                                    uint32_t num_rows_to_append,
+                                                    const uint16_t* source_row_ids) {
+  ARROW_DCHECK(metadata_.is_compatible(from.metadata()));
+
+  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
+
+  if (!metadata_.is_fixed_length) {
+    // Varying-length rows
+    const uint32_t* from_offsets =
+        reinterpret_cast<const uint32_t*>(from.offsets_->data());
+    uint32_t* to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
+    uint32_t total_length = to_offsets[num_rows_];
+    uint32_t total_length_to_append = 0;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      total_length_to_append += length;
+      to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append;
+    }
+
+    RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(total_length_to_append));
+
+    const uint8_t* src = from.rows_->data();
+    uint8_t* dst = rows_->mutable_data() + total_length;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      const uint64_t* src64 =
+          reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]);
+      uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+      for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
+        dst64[j] = src64[j];
+      }
+      dst += length;
+    }
+  } else {
+    // Fixed-length rows
+    const uint8_t* src = from.rows_->data();
+    uint8_t* dst = rows_->mutable_data() + num_rows_ * metadata_.fixed_length;
+    for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+      uint16_t row_id = source_row_ids[i];
+      uint32_t length = metadata_.fixed_length;
+      const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src + length * row_id);
+      uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+      for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
+        dst64[j] = src64[j];
+      }
+      dst += length;
+    }
+  }
+
+  // Null masks
+  uint32_t byte_length = metadata_.null_masks_bytes_per_row;
+  uint64_t dst_byte_offset = num_rows_ * byte_length;
+  const uint8_t* src_base = from.null_masks_->data();
+  uint8_t* dst_base = null_masks_->mutable_data();
+  for (uint32_t i = 0; i < num_rows_to_append; ++i) {
+    uint32_t row_id = source_row_ids[i];
+    int64_t src_byte_offset = row_id * byte_length;
+    const uint8_t* src = src_base + src_byte_offset;
+    uint8_t* dst = dst_base + dst_byte_offset;
+    for (uint32_t ibyte = 0; ibyte < byte_length; ++ibyte) {
+      dst[ibyte] = src[ibyte];
+    }
+    dst_byte_offset += byte_length;
+  }
+
+  num_rows_ += num_rows_to_append;
+
+  return Status::OK();
+}
+
+Status KeyEncoder::KeyRowArray::AppendEmpty(uint32_t num_rows_to_append,
+                                            uint32_t num_extra_bytes_to_append) {
+  RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
+  RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  num_rows_ += num_rows_to_append;
+  if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
+    memset(rows_->mutable_data(), 0, bytes_capacity_);
+  }
+  return Status::OK();
+}
+
+bool KeyEncoder::KeyRowArray::has_any_nulls(const KeyEncoderContext* ctx) const {
+  if (has_any_nulls_) {
+    return true;
+  }
+  if (num_rows_for_has_any_nulls_ < num_rows_) {
+    auto size_per_row = metadata().null_masks_bytes_per_row;
+    has_any_nulls_ = !util::BitUtil::are_all_bytes_zero(
+        ctx->hardware_flags, null_masks() + size_per_row * num_rows_for_has_any_nulls_,
+        static_cast<uint32_t>(size_per_row * (num_rows_ - num_rows_for_has_any_nulls_)));
+    num_rows_for_has_any_nulls_ = num_rows_;
+  }
+  return has_any_nulls_;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           const KeyColumnArray& left,
+                                           const KeyColumnArray& right,
+                                           int buffer_id_to_replace) {
+  metadata_ = metadata;
+  length_ = left.length();
+  for (int i = 0; i < max_buffers_; ++i) {
+    buffers_[i] = left.buffers_[i];
+    mutable_buffers_[i] = left.mutable_buffers_[i];
+  }
+  buffers_[buffer_id_to_replace] = right.buffers_[buffer_id_to_replace];
+  mutable_buffers_[buffer_id_to_replace] = right.mutable_buffers_[buffer_id_to_replace];
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           int64_t length, const uint8_t* buffer0,
+                                           const uint8_t* buffer1,
+                                           const uint8_t* buffer2) {
+  metadata_ = metadata;
+  length_ = length;
+  buffers_[0] = buffer0;
+  buffers_[1] = buffer1;
+  buffers_[2] = buffer2;
+  mutable_buffers_[0] = mutable_buffers_[1] = mutable_buffers_[2] = nullptr;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
+                                           int64_t length, uint8_t* buffer0,
+                                           uint8_t* buffer1, uint8_t* buffer2) {
+  metadata_ = metadata;
+  length_ = length;
+  buffers_[0] = mutable_buffers_[0] = buffer0;
+  buffers_[1] = mutable_buffers_[1] = buffer1;
+  buffers_[2] = mutable_buffers_[2] = buffer2;
+}
+
+KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnArray& from, int64_t start,
+                                           int64_t length) {
+  ARROW_DCHECK((start % 8) == 0);
+  metadata_ = from.metadata_;
+  length_ = length;
+  uint32_t fixed_size =
+      !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length;
+
+  buffers_[0] = from.buffers_[0] ? from.buffers_[0] + start / 8 : nullptr;
+  mutable_buffers_[0] =
+      from.mutable_buffers_[0] ? from.mutable_buffers_[0] + start / 8 : nullptr;
+
+  if (fixed_size == 0) {
+    buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start / 8 : nullptr;
+    mutable_buffers_[1] =
+        from.mutable_buffers_[1] ? from.mutable_buffers_[1] + start / 8 : nullptr;
+  } else {
+    buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start * fixed_size : nullptr;
+    mutable_buffers_[1] = from.mutable_buffers_[1]
+                              ? from.mutable_buffers_[1] + start * fixed_size
+                              : nullptr;
+  }
+
+  buffers_[2] = from.buffers_[2];
+  mutable_buffers_[2] = from.mutable_buffers_[2];
+}
+
+KeyEncoder::KeyColumnArray KeyEncoder::TransformBoolean::ArrayReplace(
+    const KeyColumnArray& column, const KeyColumnArray& temp) {
+  // Make sure that the temp buffer is large enough
+  ARROW_DCHECK(temp.length() >= column.length() && temp.metadata().is_fixed_length &&
+               temp.metadata().fixed_length >= sizeof(uint8_t));
+  KeyColumnMetadata metadata;
+  metadata.is_fixed_length = true;
+  metadata.fixed_length = sizeof(uint8_t);
+  constexpr int buffer_index = 1;
+  KeyColumnArray result = KeyColumnArray(metadata, column, temp, buffer_index);
+  return result;
+}
+
+void KeyEncoder::TransformBoolean::PreEncode(const KeyColumnArray& input,
+                                             KeyColumnArray* output,
+                                             KeyEncoderContext* ctx) {
+  // Make sure that metadata and lengths are compatible.
+  ARROW_DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  ARROW_DCHECK(output->metadata().fixed_length == 1 &&
+               input.metadata().fixed_length == 0);
+  ARROW_DCHECK(output->length() == input.length());
+  constexpr int buffer_index = 1;
+  ARROW_DCHECK(input.data(buffer_index) != nullptr);
+  ARROW_DCHECK(output->mutable_data(buffer_index) != nullptr);
+  util::BitUtil::bits_to_bytes(ctx->hardware_flags, static_cast<int>(input.length()),
+                               input.data(buffer_index),
+                               output->mutable_data(buffer_index));
+}
+
+void KeyEncoder::TransformBoolean::PostDecode(const KeyColumnArray& input,
+                                              KeyColumnArray* output,
+                                              KeyEncoderContext* ctx) {
+  // Make sure that metadata and lengths are compatible.
+  ARROW_DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  ARROW_DCHECK(output->metadata().fixed_length == 0 &&
+               input.metadata().fixed_length == 1);
+  ARROW_DCHECK(output->length() == input.length());
+  constexpr int buffer_index = 1;
+  ARROW_DCHECK(input.data(buffer_index) != nullptr);
+  ARROW_DCHECK(output->mutable_data(buffer_index) != nullptr);
+
+  util::BitUtil::bytes_to_bits(ctx->hardware_flags, static_cast<int>(input.length()),
+                               input.data(buffer_index),
+                               output->mutable_data(buffer_index));
+}
+
+bool KeyEncoder::EncoderInteger::IsBoolean(const KeyColumnMetadata& metadata) {
+  return metadata.is_fixed_length && metadata.fixed_length == 0;
+}
+
+bool KeyEncoder::EncoderInteger::UsesTransform(const KeyColumnArray& column) {
+  return IsBoolean(column.metadata());
+}
+
+KeyEncoder::KeyColumnArray KeyEncoder::EncoderInteger::ArrayReplace(
+    const KeyColumnArray& column, const KeyColumnArray& temp) {
+  if (IsBoolean(column.metadata())) {
+    return TransformBoolean::ArrayReplace(column, temp);
+  }
+  return column;
+}
+
+void KeyEncoder::EncoderInteger::PreEncode(const KeyColumnArray& input,
+                                           KeyColumnArray* output,
+                                           KeyEncoderContext* ctx) {
+  if (IsBoolean(input.metadata())) {
+    TransformBoolean::PreEncode(input, output, ctx);
+  }
+}
+
+void KeyEncoder::EncoderInteger::PostDecode(const KeyColumnArray& input,
+                                            KeyColumnArray* output,
+                                            KeyEncoderContext* ctx) {
+  if (IsBoolean(output->metadata())) {
+    TransformBoolean::PostDecode(input, output, ctx);
+  }
+}
+
+void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                        const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                        KeyColumnArray* temp) {
+  KeyColumnArray col_prep;
+  if (UsesTransform(col)) {
+    col_prep = ArrayReplace(col, *temp);
+    PreEncode(col, &col_prep, ctx);
+  } else {
+    col_prep = col;
+  }
+
+  uint32_t num_rows = static_cast<uint32_t>(col.length());
+
+  // When we have a single fixed length column we can just do memcpy
+  if (rows->metadata().is_fixed_length &&
+      rows->metadata().fixed_length == col.metadata().fixed_length) {
+    ARROW_DCHECK(offset_within_row == 0);
+    uint32_t row_size = col.metadata().fixed_length;
+    memcpy(rows->mutable_data(1), col.data(1), num_rows * row_size);
+  } else if (rows->metadata().is_fixed_length) {
+    uint32_t row_size = rows->metadata().fixed_length;
+    uint8_t* row_base = rows->mutable_data(1) + offset_within_row;
+    const uint8_t* col_base = col_prep.data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          row_base[i * row_size] = col_base[i];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint16_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint16_t*>(col_base)[i];
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint32_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint32_t*>(col_base)[i];
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint64_t*>(row_base + i * row_size) =
+              reinterpret_cast<const uint64_t*>(col_base)[i];
+        }
+        break;
+      default:
+        ARROW_DCHECK(false);
+    }
+  } else {
+    const uint32_t* row_offsets = rows->offsets();
+    uint8_t* row_base = rows->mutable_data(2) + offset_within_row;
+    const uint8_t* col_base = col_prep.data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          row_base[row_offsets[i]] = col_base[i];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint16_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint16_t*>(col_base)[i];
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint32_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint32_t*>(col_base)[i];
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          *reinterpret_cast<uint64_t*>(row_base + row_offsets[i]) =
+              reinterpret_cast<const uint64_t*>(col_base)[i];
+        }
+        break;
+      default:
+        ARROW_DCHECK(false);
+    }
+  }
+}
+
+void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
+                                        uint32_t offset_within_row,
+                                        const KeyRowArray& rows, KeyColumnArray* col,
+                                        KeyEncoderContext* ctx, KeyColumnArray* temp) {
+  KeyColumnArray col_prep;
+  if (UsesTransform(*col)) {
+    col_prep = ArrayReplace(*col, *temp);
+  } else {
+    col_prep = *col;
+  }
+
+  // When we have a single fixed length column we can just do memcpy
+  if (rows.metadata().is_fixed_length &&
+      col_prep.metadata().fixed_length == rows.metadata().fixed_length) {
+    ARROW_DCHECK(offset_within_row == 0);
+    uint32_t row_size = rows.metadata().fixed_length;
+    memcpy(col_prep.mutable_data(1), rows.data(1) + start_row * row_size,
+           num_rows * row_size);
+  } else if (rows.metadata().is_fixed_length) {
+    uint32_t row_size = rows.metadata().fixed_length;
+    const uint8_t* row_base = rows.data(1) + start_row * row_size;
+    row_base += offset_within_row;
+    uint8_t* col_base = col_prep.mutable_data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          col_base[i] = row_base[i * row_size];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint16_t*>(col_base)[i] =
+              *reinterpret_cast<const uint16_t*>(row_base + i * row_size);
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint32_t*>(col_base)[i] =
+              *reinterpret_cast<const uint32_t*>(row_base + i * row_size);
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint64_t*>(col_base)[i] =
+              *reinterpret_cast<const uint64_t*>(row_base + i * row_size);
+        }
+        break;
+      default:
+        ARROW_DCHECK(false);
+    }
+  } else {
+    const uint32_t* row_offsets = rows.offsets() + start_row;
+    const uint8_t* row_base = rows.data(2);
+    row_base += offset_within_row;
+    uint8_t* col_base = col_prep.mutable_data(1);
+    switch (col_prep.metadata().fixed_length) {
+      case 1:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          col_base[i] = row_base[row_offsets[i]];
+        }
+        break;
+      case 2:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint16_t*>(col_base)[i] =
+              *reinterpret_cast<const uint16_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      case 4:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint32_t*>(col_base)[i] =
+              *reinterpret_cast<const uint32_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      case 8:
+        for (uint32_t i = 0; i < num_rows; ++i) {
+          reinterpret_cast<uint64_t*>(col_base)[i] =
+              *reinterpret_cast<const uint64_t*>(row_base + row_offsets[i]);
+        }
+        break;
+      default:
+        ARROW_DCHECK(false);
+    }
+  }
+
+  if (UsesTransform(*col)) {
+    PostDecode(col_prep, col, ctx);
+  }
+}
+
+bool KeyEncoder::EncoderBinary::IsInteger(const KeyColumnMetadata& metadata) {
+  bool is_fixed_length = metadata.is_fixed_length;
+  auto size = metadata.fixed_length;
+  return is_fixed_length &&
+         (size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
+}
+
+void KeyEncoder::EncoderBinary::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                       KeyColumnArray* temp) {
+  if (IsInteger(col.metadata())) {
+    EncoderInteger::Encode(offset_within_row, rows, col, ctx, temp);
+  } else {
+    KeyColumnArray col_prep;
+    if (EncoderInteger::UsesTransform(col)) {
+      col_prep = EncoderInteger::ArrayReplace(col, *temp);
+      EncoderInteger::PreEncode(col, &col_prep, ctx);
+    } else {
+      col_prep = col;
+    }
+
+    bool is_row_fixed_length = rows->metadata().is_fixed_length;
+
+#if defined(ARROW_HAVE_AVX2)
+    if (ctx->has_avx2()) {
+      EncodeHelper_avx2(is_row_fixed_length, offset_within_row, rows, col);
+    } else {
+#endif
+      if (is_row_fixed_length) {
+        EncodeImp<true>(offset_within_row, rows, col);
+      } else {
+        EncodeImp<false>(offset_within_row, rows, col);
+      }
+#if defined(ARROW_HAVE_AVX2)
+    }
+#endif
+  }
+
+  ARROW_DCHECK(temp->metadata().is_fixed_length);
+  ARROW_DCHECK(temp->length() * temp->metadata().fixed_length >=
+               col.length() * static_cast<int64_t>(sizeof(uint16_t)));
+
+  KeyColumnArray temp16bit(KeyColumnMetadata(true, sizeof(uint16_t)), col.length(),
+                           nullptr, temp->mutable_data(1), nullptr);
+  ColumnMemsetNulls(offset_within_row, rows, col, ctx, &temp16bit, 0xae);
+}
+
+void KeyEncoder::EncoderBinary::Decode(uint32_t start_row, uint32_t num_rows,
+                                       uint32_t offset_within_row,
+                                       const KeyRowArray& rows, KeyColumnArray* col,
+                                       KeyEncoderContext* ctx, KeyColumnArray* temp) {
+  if (IsInteger(col->metadata())) {
+    EncoderInteger::Decode(start_row, num_rows, offset_within_row, rows, col, ctx, temp);
+  } else {
+    KeyColumnArray col_prep;
+    if (EncoderInteger::UsesTransform(*col)) {
+      col_prep = EncoderInteger::ArrayReplace(*col, *temp);
+    } else {
+      col_prep = *col;
+    }
+
+    bool is_row_fixed_length = rows.metadata().is_fixed_length;
+
+#if defined(ARROW_HAVE_AVX2)
+    if (ctx->has_avx2()) {
+      DecodeHelper_avx2(is_row_fixed_length, start_row, num_rows, offset_within_row, rows,
+                        col);
+    } else {
+#endif
+      if (is_row_fixed_length) {
+        DecodeImp<true>(start_row, num_rows, offset_within_row, rows, col);
+      } else {
+        DecodeImp<false>(start_row, num_rows, offset_within_row, rows, col);
+      }
+#if defined(ARROW_HAVE_AVX2)
+    }
+#endif
+
+    if (EncoderInteger::UsesTransform(*col)) {
+      EncoderInteger::PostDecode(col_prep, col, ctx);
+    }
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::EncodeImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                          const KeyColumnArray& col) {
+  EncodeDecodeHelper<is_row_fixed_length, true>(
+      0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col,
+      nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+        const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 8; ++istripe) {
+          dst64[istripe] = util::SafeLoad(src64 + istripe);
+        }
+        if ((length % 8) > 0) {
+          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length));
+          dst64[istripe] = (dst64[istripe] & ~mask_last) |
+                           (util::SafeLoad(src64 + istripe) & mask_last);
+        }
+      });
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t offset_within_row,
+                                          const KeyRowArray& rows, KeyColumnArray* col) {
+  EncodeDecodeHelper<is_row_fixed_length, false>(
+      start_row, num_rows, offset_within_row, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
+          uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+          const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+          util::SafeStore(dst64 + istripe, src64[istripe]);
+        }
+      });
+}
+
+void KeyEncoder::EncoderBinary::ColumnMemsetNulls(
+    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
+    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
+  typedef void (*ColumnMemsetNullsImp_t)(uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                         KeyEncoderContext*, KeyColumnArray*, uint8_t);
+  static const ColumnMemsetNullsImp_t ColumnMemsetNullsImp_fn[] = {
+      ColumnMemsetNullsImp<false, 1>,  ColumnMemsetNullsImp<false, 2>,
+      ColumnMemsetNullsImp<false, 4>,  ColumnMemsetNullsImp<false, 8>,
+      ColumnMemsetNullsImp<false, 16>, ColumnMemsetNullsImp<true, 1>,
+      ColumnMemsetNullsImp<true, 2>,   ColumnMemsetNullsImp<true, 4>,
+      ColumnMemsetNullsImp<true, 8>,   ColumnMemsetNullsImp<true, 16>};
+  uint32_t col_width = col.metadata().fixed_length;
+  int dispatch_const =
+      (rows->metadata().is_fixed_length ? 5 : 0) +
+      (col_width == 1 ? 0
+                      : col_width == 2 ? 1 : col_width == 4 ? 2 : col_width == 8 ? 3 : 4);
+  ColumnMemsetNullsImp_fn[dispatch_const](offset_within_row, rows, col, ctx,
+                                          temp_vector_16bit, byte_value);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+void KeyEncoder::EncoderBinary::ColumnMemsetNullsImp(
+    uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
+    KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
+  // Nothing to do when there are no nulls
+  if (!col.data(0)) {
+    return;
+  }
+
+  uint32_t num_rows = static_cast<uint32_t>(col.length());
+
+  // Temp vector needs space for the required number of rows
+  ARROW_DCHECK(temp_vector_16bit->length() >= num_rows);
+  ARROW_DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+               temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+  uint16_t* temp_vector = reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1));
+
+  // Bit vector to index vector of null positions
+  int num_selected;
+  util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, static_cast<int>(col.length()),
+                                 col.data(0), &num_selected, temp_vector);
+
+  for (int i = 0; i < num_selected; ++i) {
+    uint32_t row_id = temp_vector[i];
+
+    // Target binary field pointer
+    uint8_t* dst;
+    if (is_row_fixed_length) {
+      dst = rows->mutable_data(1) + rows->metadata().fixed_length * row_id;
+    } else {
+      dst = rows->mutable_data(2) + rows->offsets()[row_id];
+    }
+    dst += offset_within_row;
+
+    if (col_width == 1) {
+      *dst = byte_value;
+    } else if (col_width == 2) {
+      *reinterpret_cast<uint16_t*>(dst) =
+          (static_cast<uint16_t>(byte_value) * static_cast<uint16_t>(0x0101));
+    } else if (col_width == 4) {
+      *reinterpret_cast<uint32_t*>(dst) =
+          (static_cast<uint32_t>(byte_value) * static_cast<uint32_t>(0x01010101));
+    } else if (col_width == 8) {
+      *reinterpret_cast<uint64_t*>(dst) =
+          (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL);
+    } else {
+      uint64_t value = (static_cast<uint64_t>(byte_value) * 0x0101010101010101ULL);
+      uint32_t col_width_actual = col.metadata().fixed_length;
+      uint32_t j;
+      for (j = 0; j < col_width_actual / 8; ++j) {
+        reinterpret_cast<uint64_t*>(dst)[j] = value;
+      }
+      int tail = col_width_actual % 8;
+      if (tail) {
+        uint64_t mask = ~0ULL >> (8 * (8 - tail));
+        reinterpret_cast<uint64_t*>(dst)[j] =
+            (reinterpret_cast<const uint64_t*>(dst)[j] & ~mask) | (value & mask);
+      }
+    }
+  }
+}
+
+void KeyEncoder::EncoderBinaryPair::Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                                           const KeyColumnArray& col1,
+                                           const KeyColumnArray& col2,
+                                           KeyEncoderContext* ctx, KeyColumnArray* temp1,
+                                           KeyColumnArray* temp2) {
+  ARROW_DCHECK(CanProcessPair(col1.metadata(), col2.metadata()));
+
+  KeyColumnArray col_prep[2];
+  if (EncoderInteger::UsesTransform(col1)) {
+    col_prep[0] = EncoderInteger::ArrayReplace(col1, *temp1);
+    EncoderInteger::PreEncode(col1, &(col_prep[0]), ctx);
+  } else {
+    col_prep[0] = col1;
+  }
+  if (EncoderInteger::UsesTransform(col2)) {
+    col_prep[1] = EncoderInteger::ArrayReplace(col2, *temp2);
+    EncoderInteger::PreEncode(col2, &(col_prep[1]), ctx);
+  } else {
+    col_prep[1] = col2;
+  }
+
+  uint32_t col_width1 = col_prep[0].metadata().fixed_length;
+  uint32_t col_width2 = col_prep[1].metadata().fixed_length;
+  int log_col_width1 =
+      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0;
+  int log_col_width2 =
+      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0;
+
+  bool is_row_fixed_length = rows->metadata().is_fixed_length;
+
+  uint32_t num_rows = static_cast<uint32_t>(col1.length());
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && col_width1 == col_width2) {
+    num_processed = EncodeHelper_avx2(is_row_fixed_length, col_width1, offset_within_row,
+                                      rows, col_prep[0], col_prep[1]);
+  }
+#endif
+  if (num_processed < num_rows) {
+    using EncodeImp_t = void (*)(uint32_t, uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                 const KeyColumnArray&);
+    static const EncodeImp_t EncodeImp_fn[] = {
+        EncodeImp<false, uint8_t, uint8_t>,   EncodeImp<false, uint16_t, uint8_t>,
+        EncodeImp<false, uint32_t, uint8_t>,  EncodeImp<false, uint64_t, uint8_t>,
+        EncodeImp<false, uint8_t, uint16_t>,  EncodeImp<false, uint16_t, uint16_t>,
+        EncodeImp<false, uint32_t, uint16_t>, EncodeImp<false, uint64_t, uint16_t>,
+        EncodeImp<false, uint8_t, uint32_t>,  EncodeImp<false, uint16_t, uint32_t>,
+        EncodeImp<false, uint32_t, uint32_t>, EncodeImp<false, uint64_t, uint32_t>,
+        EncodeImp<false, uint8_t, uint64_t>,  EncodeImp<false, uint16_t, uint64_t>,
+        EncodeImp<false, uint32_t, uint64_t>, EncodeImp<false, uint64_t, uint64_t>,
+        EncodeImp<true, uint8_t, uint8_t>,    EncodeImp<true, uint16_t, uint8_t>,
+        EncodeImp<true, uint32_t, uint8_t>,   EncodeImp<true, uint64_t, uint8_t>,
+        EncodeImp<true, uint8_t, uint16_t>,   EncodeImp<true, uint16_t, uint16_t>,
+        EncodeImp<true, uint32_t, uint16_t>,  EncodeImp<true, uint64_t, uint16_t>,
+        EncodeImp<true, uint8_t, uint32_t>,   EncodeImp<true, uint16_t, uint32_t>,
+        EncodeImp<true, uint32_t, uint32_t>,  EncodeImp<true, uint64_t, uint32_t>,
+        EncodeImp<true, uint8_t, uint64_t>,   EncodeImp<true, uint16_t, uint64_t>,
+        EncodeImp<true, uint32_t, uint64_t>,  EncodeImp<true, uint64_t, uint64_t>};
+    int dispatch_const = (log_col_width2 << 2) | log_col_width1;
+    dispatch_const += (is_row_fixed_length ? 16 : 0);
+    EncodeImp_fn[dispatch_const](num_processed, offset_within_row, rows, col_prep[0],
+                                 col_prep[1]);
+  }
+}
+
+template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+void KeyEncoder::EncoderBinaryPair::EncodeImp(uint32_t num_rows_to_skip,
+                                              uint32_t offset_within_row,
+                                              KeyRowArray* rows,
+                                              const KeyColumnArray& col1,
+                                              const KeyColumnArray& col2) {
+  const uint8_t* src_A = col1.data(1);
+  const uint8_t* src_B = col2.data(1);
+
+  uint32_t num_rows = static_cast<uint32_t>(col1.length());
+
+  uint32_t fixed_length = rows->metadata().fixed_length;
+  const uint32_t* offsets;
+  uint8_t* dst_base;
+  if (is_row_fixed_length) {
+    dst_base = rows->mutable_data(1) + offset_within_row;
+    offsets = nullptr;
+  } else {
+    dst_base = rows->mutable_data(2) + offset_within_row;
+    offsets = rows->offsets();
+  }
+
+  using col1_type_const = typename std::add_const<col1_type>::type;
+  using col2_type_const = typename std::add_const<col2_type>::type;
+
+  if (is_row_fixed_length) {
+    uint8_t* dst = dst_base + num_rows_to_skip * fixed_length;
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i];
+      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) =
+          reinterpret_cast<col2_type_const*>(src_B)[i];
+      dst += fixed_length;
+    }
+  } else {
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      uint8_t* dst = dst_base + offsets[i];
+      *reinterpret_cast<col1_type*>(dst) = reinterpret_cast<col1_type_const*>(src_A)[i];
+      *reinterpret_cast<col2_type*>(dst + sizeof(col1_type)) =
+          reinterpret_cast<col2_type_const*>(src_B)[i];
+    }
+  }
+}
+
+void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows,
+                                           uint32_t offset_within_row,
+                                           const KeyRowArray& rows, KeyColumnArray* col1,
+                                           KeyColumnArray* col2, KeyEncoderContext* ctx,
+                                           KeyColumnArray* temp1, KeyColumnArray* temp2) {
+  ARROW_DCHECK(CanProcessPair(col1->metadata(), col2->metadata()));
+
+  KeyColumnArray col_prep[2];
+  if (EncoderInteger::UsesTransform(*col1)) {
+    col_prep[0] = EncoderInteger::ArrayReplace(*col1, *temp1);
+  } else {
+    col_prep[0] = *col1;
+  }
+  if (EncoderInteger::UsesTransform(*col2)) {
+    col_prep[1] = EncoderInteger::ArrayReplace(*col2, *temp2);
+  } else {
+    col_prep[1] = *col2;
+  }
+
+  uint32_t col_width1 = col_prep[0].metadata().fixed_length;
+  uint32_t col_width2 = col_prep[1].metadata().fixed_length;
+  int log_col_width1 =
+      col_width1 == 8 ? 3 : col_width1 == 4 ? 2 : col_width1 == 2 ? 1 : 0;
+  int log_col_width2 =
+      col_width2 == 8 ? 3 : col_width2 == 4 ? 2 : col_width2 == 2 ? 1 : 0;
+
+  bool is_row_fixed_length = rows.metadata().is_fixed_length;
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2() && col_width1 == col_width2) {
+    num_processed =
+        DecodeHelper_avx2(is_row_fixed_length, col_width1, start_row, num_rows,
+                          offset_within_row, rows, &col_prep[0], &col_prep[1]);
+  }
+#endif
+  if (num_processed < num_rows) {
+    typedef void (*DecodeImp_t)(uint32_t, uint32_t, uint32_t, uint32_t,
+                                const KeyRowArray&, KeyColumnArray*, KeyColumnArray*);
+    static const DecodeImp_t DecodeImp_fn[] = {
+        DecodeImp<false, uint8_t, uint8_t>,   DecodeImp<false, uint16_t, uint8_t>,
+        DecodeImp<false, uint32_t, uint8_t>,  DecodeImp<false, uint64_t, uint8_t>,
+        DecodeImp<false, uint8_t, uint16_t>,  DecodeImp<false, uint16_t, uint16_t>,
+        DecodeImp<false, uint32_t, uint16_t>, DecodeImp<false, uint64_t, uint16_t>,
+        DecodeImp<false, uint8_t, uint32_t>,  DecodeImp<false, uint16_t, uint32_t>,
+        DecodeImp<false, uint32_t, uint32_t>, DecodeImp<false, uint64_t, uint32_t>,
+        DecodeImp<false, uint8_t, uint64_t>,  DecodeImp<false, uint16_t, uint64_t>,
+        DecodeImp<false, uint32_t, uint64_t>, DecodeImp<false, uint64_t, uint64_t>,
+        DecodeImp<true, uint8_t, uint8_t>,    DecodeImp<true, uint16_t, uint8_t>,
+        DecodeImp<true, uint32_t, uint8_t>,   DecodeImp<true, uint64_t, uint8_t>,
+        DecodeImp<true, uint8_t, uint16_t>,   DecodeImp<true, uint16_t, uint16_t>,
+        DecodeImp<true, uint32_t, uint16_t>,  DecodeImp<true, uint64_t, uint16_t>,
+        DecodeImp<true, uint8_t, uint32_t>,   DecodeImp<true, uint16_t, uint32_t>,
+        DecodeImp<true, uint32_t, uint32_t>,  DecodeImp<true, uint64_t, uint32_t>,
+        DecodeImp<true, uint8_t, uint64_t>,   DecodeImp<true, uint16_t, uint64_t>,
+        DecodeImp<true, uint32_t, uint64_t>,  DecodeImp<true, uint64_t, uint64_t>};
+    int dispatch_const =
+        (log_col_width2 << 2) | log_col_width1 | (is_row_fixed_length ? 16 : 0);
+    DecodeImp_fn[dispatch_const](num_processed, start_row, num_rows, offset_within_row,
+                                 rows, &(col_prep[0]), &(col_prep[1]));
+  }
+
+  if (EncoderInteger::UsesTransform(*col1)) {
+    EncoderInteger::PostDecode(col_prep[0], col1, ctx);
+  }
+  if (EncoderInteger::UsesTransform(*col2)) {
+    EncoderInteger::PostDecode(col_prep[1], col2, ctx);
+  }
+}
+
+template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+void KeyEncoder::EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip,
+                                              uint32_t start_row, uint32_t num_rows,
+                                              uint32_t offset_within_row,
+                                              const KeyRowArray& rows,
+                                              KeyColumnArray* col1,
+                                              KeyColumnArray* col2) {
+  ARROW_DCHECK(rows.length() >= start_row + num_rows);
+  ARROW_DCHECK(col1->length() == num_rows && col2->length() == num_rows);
+
+  uint8_t* dst_A = col1->mutable_data(1);
+  uint8_t* dst_B = col2->mutable_data(1);
+
+  uint32_t fixed_length = rows.metadata().fixed_length;
+  const uint32_t* offsets;
+  const uint8_t* src_base;
+  if (is_row_fixed_length) {
+    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    offsets = nullptr;
+  } else {
+    src_base = rows.data(2) + offset_within_row;
+    offsets = rows.offsets() + start_row;
+  }
+
+  using col1_type_const = typename std::add_const<col1_type>::type;
+  using col2_type_const = typename std::add_const<col2_type>::type;
+
+  if (is_row_fixed_length) {
+    const uint8_t* src = src_base + num_rows_to_skip * fixed_length;
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
+      reinterpret_cast<col2_type*>(dst_B)[i] =
+          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type));
+      src += fixed_length;
+    }
+  } else {
+    for (uint32_t i = num_rows_to_skip; i < num_rows; ++i) {
+      const uint8_t* src = src_base + offsets[i];
+      reinterpret_cast<col1_type*>(dst_A)[i] = *reinterpret_cast<col1_type_const*>(src);
+      reinterpret_cast<col2_type*>(dst_B)[i] =
+          *reinterpret_cast<col2_type_const*>(src + sizeof(col1_type));
+    }
+  }
+}
+
+void KeyEncoder::EncoderOffsets::Encode(KeyRowArray* rows,
+                                        const std::vector<KeyColumnArray>& varbinary_cols,
+                                        KeyEncoderContext* ctx) {
+  ARROW_DCHECK(!varbinary_cols.empty());
+
+  // Rows and columns must all be varying-length
+  ARROW_DCHECK(!rows->metadata().is_fixed_length);
+  for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+    ARROW_DCHECK(!varbinary_cols[col].metadata().is_fixed_length);
+  }
+
+  uint32_t num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  // The space in columns must be exactly equal to a space for offsets in rows
+  ARROW_DCHECK(rows->length() == num_rows);
+  for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+    ARROW_DCHECK(varbinary_cols[col].length() == num_rows);
+  }
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    // Create a temp vector sized based on the number of columns
+    auto temp_buffer_holder = util::TempVectorHolder<uint32_t>(
+        ctx->stack, static_cast<uint32_t>(varbinary_cols.size()) * 8);
+    auto temp_buffer_32B_per_col = KeyColumnArray(
+        KeyColumnMetadata(true, sizeof(uint32_t)), varbinary_cols.size() * 8, nullptr,
+        reinterpret_cast<uint8_t*>(temp_buffer_holder.mutable_data()), nullptr);
+
+    num_processed = EncodeImp_avx2(rows, varbinary_cols, &temp_buffer_32B_per_col);
+  }
+#endif
+  if (num_processed < num_rows) {
+    EncodeImp(num_processed, rows, varbinary_cols);
+  }
+}
+
+void KeyEncoder::EncoderOffsets::EncodeImp(
+    uint32_t num_rows_already_processed, KeyRowArray* rows,
+    const std::vector<KeyColumnArray>& varbinary_cols) {
+  ARROW_DCHECK(varbinary_cols.size() > 0);
+
+  int row_alignment = rows->metadata().row_alignment;
+  int string_alignment = rows->metadata().string_alignment;
+
+  uint32_t* row_offsets = rows->mutable_offsets();
+  uint8_t* row_values = rows->mutable_data(2);
+  uint32_t num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  if (num_rows_already_processed == 0) {
+    row_offsets[0] = 0;
+  }
+
+  uint32_t row_offset = row_offsets[num_rows_already_processed];
+  for (uint32_t i = num_rows_already_processed; i < num_rows; ++i) {
+    uint32_t* varbinary_end =
+        rows->metadata().varbinary_end_array(row_values + row_offset);
+
+    // Zero out lengths for nulls.
+    // Add lengths of all columns to get row size.
+    // Store varbinary field ends while summing their lengths.
+
+    uint32_t offset_within_row = rows->metadata().fixed_length;
+
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      const uint32_t* col_offsets = varbinary_cols[col].offsets();
+      uint32_t col_length = col_offsets[i + 1] - col_offsets[i];
+
+      const uint8_t* non_nulls = varbinary_cols[col].data(0);
+      if (non_nulls && BitUtil::GetBit(non_nulls, i) == 0) {
+        col_length = 0;
+      }
+
+      offset_within_row +=
+          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      offset_within_row += col_length;
+
+      varbinary_end[col] = offset_within_row;
+    }
+
+    offset_within_row +=
+        KeyRowMetadata::padding_for_alignment(offset_within_row, row_alignment);
+    row_offset += offset_within_row;
+    row_offsets[i + 1] = row_offset;
+  }
+}
+
+void KeyEncoder::EncoderOffsets::Decode(
+    uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+    std::vector<KeyColumnArray>* varbinary_cols,
+    const std::vector<uint32_t>& varbinary_cols_base_offset, KeyEncoderContext* ctx) {
+  ARROW_DCHECK(!varbinary_cols->empty());
+  ARROW_DCHECK(varbinary_cols->size() == varbinary_cols_base_offset.size());
+
+  // Rows and columns must all be varying-length
+  ARROW_DCHECK(!rows.metadata().is_fixed_length);
+  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+    ARROW_DCHECK(!(*varbinary_cols)[col].metadata().is_fixed_length);
+  }
+
+  // The space in columns must be exactly equal to a subset of rows selected
+  ARROW_DCHECK(rows.length() >= start_row + num_rows);
+  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+    ARROW_DCHECK((*varbinary_cols)[col].length() == num_rows);
+  }
+
+  // Offsets of varbinary columns data within each encoded row are stored
+  // in the same encoded row as an array of 32-bit integers.
+  // This array follows immediately the data of fixed-length columns.
+  // There is one element for each varying-length column.
+  // The Nth element is the sum of all the lengths of varbinary columns data in
+  // that row, up to and including Nth varbinary column.
+
+  const uint32_t* row_offsets = rows.offsets() + start_row;
+
+  // Set the base offset for each column
+  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+    uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
+    col_offsets[0] = varbinary_cols_base_offset[col];
+  }
+
+  int string_alignment = rows.metadata().string_alignment;
+
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    // Find the beginning of cumulative lengths array for next row
+    const uint8_t* row = rows.data(2) + row_offsets[i];
+    const uint32_t* varbinary_ends = rows.metadata().varbinary_end_array(row);
+
+    // Update the offset of each column
+    uint32_t offset_within_row = rows.metadata().fixed_length;
+    for (size_t col = 0; col < varbinary_cols->size(); ++col) {
+      offset_within_row +=
+          KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      uint32_t length = varbinary_ends[col] - offset_within_row;
+      offset_within_row = varbinary_ends[col];
+      uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
+      col_offsets[i + 1] = col_offsets[i] + length;
+    }
+  }
+}
+
+void KeyEncoder::EncoderVarBinary::Encode(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                          const KeyColumnArray& col,
+                                          KeyEncoderContext* ctx) {
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    EncodeHelper_avx2(varbinary_col_id, rows, col);
+  } else {
+#endif
+    if (varbinary_col_id == 0) {
+      EncodeImp<true>(varbinary_col_id, rows, col);
+    } else {
+      EncodeImp<false>(varbinary_col_id, rows, col);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+void KeyEncoder::EncoderVarBinary::Decode(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t varbinary_col_id,
+                                          const KeyRowArray& rows, KeyColumnArray* col,
+                                          KeyEncoderContext* ctx) {
+  // Output column varbinary buffer needs an extra 32B
+  // at the end in avx2 version and 8B otherwise.
+#if defined(ARROW_HAVE_AVX2)
+  if (ctx->has_avx2()) {
+    DecodeHelper_avx2(start_row, num_rows, varbinary_col_id, rows, col);
+  } else {
+#endif
+    if (varbinary_col_id == 0) {
+      DecodeImp<true>(start_row, num_rows, varbinary_col_id, rows, col);
+    } else {
+      DecodeImp<false>(start_row, num_rows, varbinary_col_id, rows, col);
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                             const KeyColumnArray& col) {
+  EncodeDecodeHelper<first_varbinary_col, true>(
+      0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+        const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 8; ++istripe) {
+          dst64[istripe] = util::SafeLoad(src64 + istripe);
+        }
+        if ((length % 8) > 0) {
+          uint64_t mask_last = ~0ULL >> (8 * (8 * (istripe + 1) - length));
+          dst64[istripe] = (dst64[istripe] & ~mask_last) |
+                           (util::SafeLoad(src64 + istripe) & mask_last);
+        }
+      });
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
+                                             uint32_t varbinary_col_id,
+                                             const KeyRowArray& rows,
+                                             KeyColumnArray* col) {
+  EncodeDecodeHelper<first_varbinary_col, false>(
+      start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
+          uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+          const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+          util::SafeStore(dst64 + istripe, src64[istripe]);
+        }
+      });
+}
+
+void KeyEncoder::EncoderNulls::Encode(KeyRowArray* rows,
+                                      const std::vector<KeyColumnArray>& cols,
+                                      KeyEncoderContext* ctx,
+                                      KeyColumnArray* temp_vector_16bit) {
+  ARROW_DCHECK(cols.size() > 0);
+  uint32_t num_rows = static_cast<uint32_t>(rows->length());
+
+  // All input columns should have the same number of rows.
+  // They may or may not have non-nulls bit-vectors allocated.
+  for (size_t col = 0; col < cols.size(); ++col) {
+    ARROW_DCHECK(cols[col].length() == num_rows);
+  }
+
+  // Temp vector needs space for the required number of rows
+  ARROW_DCHECK(temp_vector_16bit->length() >= num_rows);
+  ARROW_DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+               temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+
+  uint8_t* null_masks = rows->null_masks();
+  uint32_t null_masks_bytes_per_row = rows->metadata().null_masks_bytes_per_row;
+  memset(null_masks, 0, null_masks_bytes_per_row * num_rows);
+  for (size_t col = 0; col < cols.size(); ++col) {
+    const uint8_t* non_nulls = cols[col].data(0);
+    if (!non_nulls) {
+      continue;
+    }
+    int num_selected;
+    util::BitUtil::bits_to_indexes(
+        0, ctx->hardware_flags, num_rows, non_nulls, &num_selected,
+        reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)));
+    for (int i = 0; i < num_selected; ++i) {
+      uint16_t row_id = reinterpret_cast<const uint16_t*>(temp_vector_16bit->data(1))[i];
+      int64_t null_masks_bit_id = row_id * null_masks_bytes_per_row * 8 + col;
+      BitUtil::SetBit(null_masks, null_masks_bit_id);
+    }
+  }
+}
+
+void KeyEncoder::EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows,
+                                      const KeyRowArray& rows,
+                                      std::vector<KeyColumnArray>* cols) {
+  // Every output column needs to have a space for exactly the required number
+  // of rows. It also needs to have non-nulls bit-vector allocated and mutable.
+  ARROW_DCHECK(cols->size() > 0);
+  for (size_t col = 0; col < cols->size(); ++col) {
+    ARROW_DCHECK((*cols)[col].length() == num_rows);
+    ARROW_DCHECK((*cols)[col].mutable_data(0));
+  }
+
+  const uint8_t* null_masks = rows.null_masks();
+  uint32_t null_masks_bytes_per_row = rows.metadata().null_masks_bytes_per_row;
+  for (size_t col = 0; col < cols->size(); ++col) {
+    uint8_t* non_nulls = (*cols)[col].mutable_data(0);
+    memset(non_nulls, 0xff, BitUtil::BytesForBits(num_rows));
+    for (uint32_t row = 0; row < num_rows; ++row) {
+      uint32_t null_masks_bit_id =
+          (start_row + row) * null_masks_bytes_per_row * 8 + static_cast<uint32_t>(col);
+      bool is_set = BitUtil::GetBit(null_masks, null_masks_bit_id);
+      if (is_set) {
+        BitUtil::ClearBit(non_nulls, row);
+      }
+    }
+  }
+}
+
+uint32_t KeyEncoder::KeyRowMetadata::num_varbinary_cols() const {
+  uint32_t result = 0;
+  for (size_t i = 0; i < column_metadatas.size(); ++i) {
+    if (!column_metadatas[i].is_fixed_length) {
+      ++result;
+    }
+  }
+  return result;
+}
+
+bool KeyEncoder::KeyRowMetadata::is_compatible(const KeyRowMetadata& other) const {
+  if (other.num_cols() != num_cols()) {
+    return false;
+  }
+  if (row_alignment != other.row_alignment ||
+      string_alignment != other.string_alignment) {
+    return false;
+  }
+  for (size_t i = 0; i < column_metadatas.size(); ++i) {
+    if (column_metadatas[i].is_fixed_length !=
+        other.column_metadatas[i].is_fixed_length) {
+      return false;
+    }
+    if (column_metadatas[i].fixed_length != other.column_metadatas[i].fixed_length) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector(
+    const std::vector<KeyColumnMetadata>& cols, int in_row_alignment,
+    int in_string_alignment) {
+  column_metadatas.resize(cols.size());
+  for (size_t i = 0; i < cols.size(); ++i) {
+    column_metadatas[i] = cols[i];
+  }
+
+  uint32_t num_cols = static_cast<uint32_t>(cols.size());
+
+  // Sort columns.
+  // Columns are sorted based on the size in bytes of their fixed-length part.
+  // For the varying-length column, the fixed-length part is the 32-bit field storing
+  // cumulative length of varying-length fields.
+  // The rules are:
+  // a) Boolean column, marked with fixed-length 0, is considered to have fixed-length
+  // part of 1 byte. b) Columns with fixed-length part being power of 2 or multiple of row
+  // alignment precede other columns. They are sorted among themselves based on size of
+  // fixed-length part. c) Fixed-length columns precede varying-length columns when both
+  // have the same size fixed-length part.
+  column_order.resize(num_cols);
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    column_order[i] = i;
+  }
+  std::sort(
+      column_order.begin(), column_order.end(), [&cols](uint32_t left, uint32_t right) {
+        bool is_left_pow2 =
+            !cols[left].is_fixed_length || ARROW_POPCOUNT64(cols[left].fixed_length) <= 1;
+        bool is_right_pow2 = !cols[right].is_fixed_length ||
+                             ARROW_POPCOUNT64(cols[right].fixed_length) <= 1;
+        bool is_left_fixedlen = cols[left].is_fixed_length;
+        bool is_right_fixedlen = cols[right].is_fixed_length;
+        uint32_t width_left =
+            cols[left].is_fixed_length ? cols[left].fixed_length : sizeof(uint32_t);
+        uint32_t width_right =
+            cols[right].is_fixed_length ? cols[right].fixed_length : sizeof(uint32_t);
+        if (is_left_pow2 != is_right_pow2) {
+          return is_left_pow2;
+        }
+        if (!is_left_pow2) {
+          return left < right;
+        }
+        if (width_left != width_right) {
+          return width_left > width_right;
+        }
+        if (is_left_fixedlen != is_right_fixedlen) {
+          return is_left_fixedlen;
+        }
+        return left < right;
+      });
+
+  row_alignment = in_row_alignment;
+  string_alignment = in_string_alignment;
+  varbinary_end_array_offset = 0;
+
+  column_offsets.resize(num_cols);
+  uint32_t num_varbinary_cols = 0;
+  uint32_t offset_within_row = 0;
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    const KeyColumnMetadata& col = cols[column_order[i]];
+    offset_within_row +=
+        KeyRowMetadata::padding_for_alignment(offset_within_row, string_alignment, col);
+    column_offsets[i] = offset_within_row;
+    if (!col.is_fixed_length) {
+      if (num_varbinary_cols == 0) {
+        varbinary_end_array_offset = offset_within_row;
+      }
+      ARROW_DCHECK(column_offsets[i] - varbinary_end_array_offset ==
+                   num_varbinary_cols * sizeof(uint32_t));
+      ++num_varbinary_cols;
+      offset_within_row += sizeof(uint32_t);
+    } else {
+      // Boolean column is a bit-vector, which is indicated by
+      // setting fixed length in column metadata to zero.
+      // It will be stored as a byte in output row.
+      if (col.fixed_length == 0) {
+        offset_within_row += 1;
+      } else {
+        offset_within_row += col.fixed_length;
+      }
+    }
+  }
+
+  is_fixed_length = (num_varbinary_cols == 0);
+  fixed_length =
+      offset_within_row +
+      KeyRowMetadata::padding_for_alignment(
+          offset_within_row, num_varbinary_cols == 0 ? row_alignment : string_alignment);
+
+  // We set the number of bytes per row storing null masks of individual key columns
+  // to be a power of two. This is not required. It could be also set to the minimal
+  // number of bytes required for a given number of bits (one bit per column).
+  null_masks_bytes_per_row = 1;
+  while (static_cast<uint32_t>(null_masks_bytes_per_row * 8) < num_cols) {
+    null_masks_bytes_per_row *= 2;
+  }
+}
+
+void KeyEncoder::Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+                      int row_alignment, int string_alignment) {
+  ctx_ = ctx;
+  row_metadata_.FromColumnMetadataVector(cols, row_alignment, string_alignment);
+  uint32_t num_cols = row_metadata_.num_cols();
+  uint32_t num_varbinary_cols = row_metadata_.num_varbinary_cols();
+  batch_all_cols_.resize(num_cols);
+  batch_varbinary_cols_.resize(num_varbinary_cols);
+  batch_varbinary_cols_base_offsets_.resize(num_varbinary_cols);
+}
+
+void KeyEncoder::PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                                        const std::vector<KeyColumnArray>& cols_in) {
+  uint32_t num_cols = static_cast<uint32_t>(cols_in.size());
+  ARROW_DCHECK(batch_all_cols_.size() == num_cols);
+
+  uint32_t num_varbinary_visited = 0;
+  for (uint32_t i = 0; i < num_cols; ++i) {
+    const KeyColumnArray& col = cols_in[row_metadata_.column_order[i]];
+    KeyColumnArray col_window(col, start_row, num_rows);
+    batch_all_cols_[i] = col_window;
+    if (!col.metadata().is_fixed_length) {
+      ARROW_DCHECK(num_varbinary_visited < batch_varbinary_cols_.size());
+      // If start row is zero, then base offset of varbinary column is also zero.
+      if (start_row == 0) {
+        batch_varbinary_cols_base_offsets_[num_varbinary_visited] = 0;
+      } else {
+        batch_varbinary_cols_base_offsets_[num_varbinary_visited] =
+            col.offsets()[start_row];
+      }
+      batch_varbinary_cols_[num_varbinary_visited++] = col_window;
+    }
+  }
+}
+
+Status KeyEncoder::PrepareOutputForEncode(int64_t start_row, int64_t num_rows,
+                                          KeyRowArray* rows,
+                                          const std::vector<KeyColumnArray>& all_cols) {
+  int64_t num_bytes_required = 0;
+
+  int64_t fixed_part = row_metadata_.fixed_length * num_rows;
+  int64_t var_part = 0;
+  for (size_t i = 0; i < all_cols.size(); ++i) {
+    const KeyColumnArray& col = all_cols[i];
+    if (!col.metadata().is_fixed_length) {
+      ARROW_DCHECK(col.length() >= start_row + num_rows);
+      const uint32_t* offsets = col.offsets();
+      var_part += offsets[start_row + num_rows] - offsets[start_row];
+      // Include maximum padding that can be added to align the start of varbinary fields.
+      var_part += num_rows * row_metadata_.string_alignment;
+    }
+  }
+  // Include maximum padding that can be added to align the start of the rows.
+  if (!row_metadata_.is_fixed_length) {
+    fixed_part += row_metadata_.row_alignment * num_rows;
+  }
+  num_bytes_required = fixed_part + var_part;
+
+  rows->Clean();
+  RETURN_NOT_OK(rows->AppendEmpty(static_cast<uint32_t>(num_rows),
+                                  static_cast<uint32_t>(num_bytes_required)));
+
+  return Status::OK();
+}
+
+void KeyEncoder::Encode(int64_t start_row, int64_t num_rows, KeyRowArray* rows,
+                        const std::vector<KeyColumnArray>& cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row, num_rows, cols);
+
+  // Create two temp vectors with 16-bit elements
+  auto temp_buffer_holder_A =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_A = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr);
+  auto temp_buffer_holder_B =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_B = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    // This call will generate and fill in data for both:
+    // - offsets to the entire encoded arrays
+    // - offsets for individual varbinary fields within each row
+    EncoderOffsets::Encode(rows, batch_varbinary_cols_, ctx_);
+
+    uint32_t num_varbinary_cols = static_cast<uint32_t>(batch_varbinary_cols_.size());
+    for (uint32_t i = 0; i < num_varbinary_cols; ++i) {
+      // Memcpy varbinary fields into precomputed in the previous step
+      // positions in the output row buffer.
+      EncoderVarBinary::Encode(i, rows, batch_varbinary_cols_[i], ctx_);
+    }
+  }
+
+  // Process fixed length columns
+  uint32_t num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  for (uint32_t i = 0; i < num_cols;) {
+    if (!batch_all_cols_[i].metadata().is_fixed_length) {
+      i += 1;
+      continue;
+    }
+    bool can_process_pair =
+        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length &&
+        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(),
+                                          batch_all_cols_[i + 1].metadata());
+    if (!can_process_pair) {
+      EncoderBinary::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i],
+                            ctx_, &temp_buffer_A);
+      i += 1;
+    } else {
+      EncoderBinaryPair::Encode(row_metadata_.column_offsets[i], rows, batch_all_cols_[i],
+                                batch_all_cols_[i + 1], ctx_, &temp_buffer_A,
+                                &temp_buffer_B);
+      i += 2;
+    }
+  }
+
+  // Process nulls
+  EncoderNulls::Encode(rows, batch_all_cols_, ctx_, &temp_buffer_A);
+}
+
+void KeyEncoder::DecodeFixedLengthBuffers(int64_t start_row_input,
+                                          int64_t start_row_output, int64_t num_rows,
+                                          const KeyRowArray& rows,
+                                          std::vector<KeyColumnArray>* cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row_output, num_rows, *cols);
+
+  // Create two temp vectors with 16-bit elements
+  auto temp_buffer_holder_A =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_A = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_A.mutable_data()), nullptr);
+  auto temp_buffer_holder_B =
+      util::TempVectorHolder<uint16_t>(ctx_->stack, static_cast<uint32_t>(num_rows));
+  auto temp_buffer_B = KeyColumnArray(
+      KeyColumnMetadata(true, sizeof(uint16_t)), num_rows, nullptr,
+      reinterpret_cast<uint8_t*>(temp_buffer_holder_B.mutable_data()), nullptr);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    EncoderOffsets::Decode(static_cast<uint32_t>(start_row_input),
+                           static_cast<uint32_t>(num_rows), rows, &batch_varbinary_cols_,
+                           batch_varbinary_cols_base_offsets_, ctx_);
+  }
+
+  // Process fixed length columns
+  uint32_t num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  for (uint32_t i = 0; i < num_cols;) {
+    if (!batch_all_cols_[i].metadata().is_fixed_length) {
+      i += 1;
+      continue;
+    }
+    bool can_process_pair =
+        (i + 1 < num_cols) && batch_all_cols_[i + 1].metadata().is_fixed_length &&
+        EncoderBinaryPair::CanProcessPair(batch_all_cols_[i].metadata(),
+                                          batch_all_cols_[i + 1].metadata());
+    if (!can_process_pair) {
+      EncoderBinary::Decode(static_cast<uint32_t>(start_row_input),
+                            static_cast<uint32_t>(num_rows),
+                            row_metadata_.column_offsets[i], rows, &batch_all_cols_[i],
+                            ctx_, &temp_buffer_A);
+      i += 1;
+    } else {
+      EncoderBinaryPair::Decode(
+          static_cast<uint32_t>(start_row_input), static_cast<uint32_t>(num_rows),
+          row_metadata_.column_offsets[i], rows, &batch_all_cols_[i],
+          &batch_all_cols_[i + 1], ctx_, &temp_buffer_A, &temp_buffer_B);
+      i += 2;
+    }
+  }
+
+  // Process nulls
+  EncoderNulls::Decode(static_cast<uint32_t>(start_row_input),
+                       static_cast<uint32_t>(num_rows), rows, &batch_all_cols_);
+}
+
+void KeyEncoder::DecodeVaryingLengthBuffers(int64_t start_row_input,
+                                            int64_t start_row_output, int64_t num_rows,
+                                            const KeyRowArray& rows,
+                                            std::vector<KeyColumnArray>* cols) {
+  // Prepare column array vectors
+  PrepareKeyColumnArrays(start_row_output, num_rows, *cols);
+
+  bool is_row_fixed_length = row_metadata_.is_fixed_length;
+  if (!is_row_fixed_length) {
+    uint32_t num_varbinary_cols = static_cast<uint32_t>(batch_varbinary_cols_.size());
+    for (uint32_t i = 0; i < num_varbinary_cols; ++i) {
+      // Memcpy varbinary fields into precomputed in the previous step
+      // positions in the output row buffer.
+      EncoderVarBinary::Decode(static_cast<uint32_t>(start_row_input),
+                               static_cast<uint32_t>(num_rows), i, rows,
+                               &batch_varbinary_cols_[i], ctx_);
+    }
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_encode.h b/cpp/src/arrow/compute/exec/key_encode.h
new file mode 100644
index 00000000000..3f5ef365a08
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_encode.h
@@ -0,0 +1,627 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyColumnMetadata;
+
+/// Converts between key representation as a collection of arrays for
+/// individual columns and another representation as a single array of rows
+/// combining data from all columns into one value.
+/// This conversion is reversible.
+/// Row-oriented storage is beneficial when there is a need for random access
+/// of individual rows and at the same time all included columns are likely to
+/// be accessed together, as in the case of hash table key.
+class KeyEncoder {
+ public:
+  struct KeyEncoderContext {
+    bool has_avx2() const {
+      return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0;
+    }
+    int64_t hardware_flags;
+    util::TempVectorStack* stack;
+  };
+
+  /// Description of a storage format of a single key column as needed
+  /// for the purpose of row encoding.
+  struct KeyColumnMetadata {
+    KeyColumnMetadata() = default;
+    KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in)
+        : is_fixed_length(is_fixed_length_in), fixed_length(fixed_length_in) {}
+    /// Is column storing a varying-length binary, using offsets array
+    /// to find a beginning of a value, or is it a fixed-length binary.
+    bool is_fixed_length;
+    /// For a fixed-length binary column: number of bytes per value.
+    /// Zero has a special meaning, indicating a bit vector with one bit per value.
+    /// For a varying-length binary column: number of bytes per offset.
+    uint32_t fixed_length;
+  };
+
+  /// Description of a storage format for rows produced by encoder.
+  struct KeyRowMetadata {
+    /// Is row a varying-length binary, using offsets array to find a beginning of a row,
+    /// or is it a fixed-length binary.
+    bool is_fixed_length;
+
+    /// For a fixed-length binary row, common size of rows in bytes,
+    /// rounded up to the multiple of alignment.
+    ///
+    /// For a varying-length binary, size of all encoded fixed-length key columns,
+    /// including lengths of varying-length columns, rounded up to the multiple of string
+    /// alignment.
+    uint32_t fixed_length;
+
+    /// Offset within a row to the array of 32-bit offsets within a row of
+    /// ends of varbinary fields.
+    /// Used only when the row is not fixed-length, zero for fixed-length row.
+    /// There are N elements for N varbinary fields.
+    /// Each element is the offset within a row of the first byte after
+    /// the corresponding varbinary field bytes in that row.
+    /// If varbinary fields begin at aligned addresses, than the end of the previous
+    /// varbinary field needs to be rounded up according to the specified alignment
+    /// to obtain the beginning of the next varbinary field.
+    /// The first varbinary field starts at offset specified by fixed_length,
+    /// which should already be aligned.
+    uint32_t varbinary_end_array_offset;
+
+    /// Fixed number of bytes per row that are used to encode null masks.
+    /// Null masks indicate for a single row which of its key columns are null.
+    /// Nth bit in the sequence of bytes assigned to a row represents null
+    /// information for Nth field according to the order in which they are encoded.
+    int null_masks_bytes_per_row;
+
+    /// Power of 2. Every row will start at the offset aligned to that number of bytes.
+    int row_alignment;
+
+    /// Power of 2. Must be no greater than row alignment.
+    /// Every non-power-of-2 binary field and every varbinary field bytes
+    /// will start aligned to that number of bytes.
+    int string_alignment;
+
+    /// Metadata of encoded columns in their original order.
+    std::vector<KeyColumnMetadata> column_metadatas;
+
+    /// Order in which fields are encoded.
+    std::vector<uint32_t> column_order;
+
+    /// Offsets within a row to fields in their encoding order.
+    std::vector<uint32_t> column_offsets;
+
+    /// Rounding up offset to the nearest multiple of alignment value.
+    /// Alignment must be a power of 2.
+    static inline uint32_t padding_for_alignment(uint32_t offset,
+                                                 int required_alignment) {
+      ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+      return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
+                                   (required_alignment - 1));
+    }
+
+    /// Rounding up offset to the beginning of next column,
+    /// chosing required alignment based on the data type of that column.
+    static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
+                                                 const KeyColumnMetadata& col_metadata) {
+      if (!col_metadata.is_fixed_length ||
+          ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
+        return 0;
+      } else {
+        return padding_for_alignment(offset, string_alignment);
+      }
+    }
+
+    /// Returns an array of offsets within a row of ends of varbinary fields.
+    inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset);
+    }
+    inline uint32_t* varbinary_end_array(uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset);
+    }
+
+    /// Returns the offset within the row and length of the first varbinary field.
+    inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset,
+                                                  uint32_t* length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      *offset = fixed_length;
+      *length = varbinary_end_array(row)[0] - fixed_length;
+    }
+
+    /// Returns the offset within the row and length of the second and further varbinary
+    /// fields.
+    inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id,
+                                                uint32_t* out_offset,
+                                                uint32_t* out_length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      ARROW_DCHECK(varbinary_id > 0);
+      const uint32_t* varbinary_end = varbinary_end_array(row);
+      uint32_t offset = varbinary_end[varbinary_id - 1];
+      offset += padding_for_alignment(offset, string_alignment);
+      *out_offset = offset;
+      *out_length = varbinary_end[varbinary_id] - offset;
+    }
+
+    uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; }
+
+    uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; }
+
+    uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); }
+
+    uint32_t num_varbinary_cols() const;
+
+    void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols,
+                                  int in_row_alignment, int in_string_alignment);
+
+    bool is_compatible(const KeyRowMetadata& other) const;
+  };
+
+  class KeyRowArray {
+   public:
+    KeyRowArray();
+    Status Init(MemoryPool* pool, const KeyRowMetadata& metadata);
+    void Clean();
+    Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
+    Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append,
+                               const uint16_t* source_row_ids);
+    const KeyRowMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return num_rows_; }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint8_t* null_masks() const { return null_masks_->data(); }
+    uint8_t* null_masks() { return null_masks_->mutable_data(); }
+
+    bool has_any_nulls(const KeyEncoderContext* ctx) const;
+
+   private:
+    Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+    Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
+
+    int64_t size_null_masks(int64_t num_rows);
+    int64_t size_offsets(int64_t num_rows);
+    int64_t size_rows_fixed_length(int64_t num_rows);
+    int64_t size_rows_varying_length(int64_t num_bytes);
+    void update_buffer_pointers();
+
+    static constexpr int64_t padding_for_vectors = 64;
+    MemoryPool* pool_;
+    KeyRowMetadata metadata_;
+    /// Buffers can only expand during lifetime and never shrink.
+    std::unique_ptr<ResizableBuffer> null_masks_;
+    std::unique_ptr<ResizableBuffer> offsets_;
+    std::unique_ptr<ResizableBuffer> rows_;
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    int64_t num_rows_;
+    int64_t rows_capacity_;
+    int64_t bytes_capacity_;
+
+    // Mutable to allow lazy evaluation
+    mutable int64_t num_rows_for_has_any_nulls_;
+    mutable bool has_any_nulls_;
+  };
+
+  /// A lightweight description of an array representing one of key columns.
+  class KeyColumnArray {
+   public:
+    KeyColumnArray() = default;
+    /// Create as a mix of buffers according to the mask from two descriptions
+    /// (Nth bit is set to 0 if Nth buffer from the first input
+    /// should be used and is set to 1 otherwise).
+    /// Metadata is inherited from the first input.
+    KeyColumnArray(const KeyColumnMetadata& metadata, const KeyColumnArray& left,
+                   const KeyColumnArray& right, int buffer_id_to_replace);
+    /// Create for reading
+    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
+                   const uint8_t* buffer0, const uint8_t* buffer1,
+                   const uint8_t* buffer2);
+    /// Create for writing
+    KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, uint8_t* buffer0,
+                   uint8_t* buffer1, uint8_t* buffer2);
+    /// Create as a window view of original description that is offset
+    /// by a given number of rows.
+    /// The number of rows used in offset must be divisible by 8
+    /// in order to not split bit vectors within a single byte.
+    KeyColumnArray(const KeyColumnArray& from, int64_t start, int64_t length);
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    const KeyColumnMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return length_; }
+
+   private:
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    KeyColumnMetadata metadata_;
+    int64_t length_;
+  };
+
+  void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+            int row_alignment, int string_alignment);
+
+  const KeyRowMetadata& row_metadata() { return row_metadata_; }
+
+  /// Find out the required sizes of all buffers output buffers for encoding
+  /// (including varying-length buffers).
+  /// Use that information to resize provided row array so that it can fit
+  /// encoded data.
+  Status PrepareOutputForEncode(int64_t start_input_row, int64_t num_input_rows,
+                                KeyRowArray* rows,
+                                const std::vector<KeyColumnArray>& all_cols);
+
+  /// Encode a window of column oriented data into the entire output
+  /// row oriented storage.
+  /// The output buffers for encoding need to be correctly sized before
+  /// starting encoding.
+  void Encode(int64_t start_input_row, int64_t num_input_rows, KeyRowArray* rows,
+              const std::vector<KeyColumnArray>& cols);
+
+  /// Decode a window of row oriented data into a corresponding
+  /// window of column oriented storage.
+  /// The output buffers need to be correctly allocated and sized before
+  /// calling each method.
+  /// For that reason decoding is split into two functions.
+  /// The output of the first one, that processes everything except for
+  /// varying length buffers, can be used to find out required varying
+  /// length buffers sizes.
+  void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                int64_t num_rows, const KeyRowArray& rows,
+                                std::vector<KeyColumnArray>* cols);
+
+  void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                  int64_t num_rows, const KeyRowArray& rows,
+                                  std::vector<KeyColumnArray>* cols);
+
+ private:
+  /// Prepare column array vectors.
+  /// Output column arrays represent a range of input column arrays
+  /// specified by starting row and number of rows.
+  /// Three vectors are generated:
+  /// - all columns
+  /// - fixed-length columns only
+  /// - varying-length columns only
+  void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                              const std::vector<KeyColumnArray>& cols_in);
+
+  class TransformBoolean {
+   public:
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output,
+                          KeyEncoderContext* ctx);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+  };
+
+  class EncoderInteger {
+   public:
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool UsesTransform(const KeyColumnArray& column);
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PreEncode(const KeyColumnArray& input, KeyColumnArray* output,
+                          KeyEncoderContext* ctx);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+
+   private:
+    static bool IsBoolean(const KeyColumnMetadata& metadata);
+  };
+
+  class EncoderBinary {
+   public:
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool IsInteger(const KeyColumnMetadata& metadata);
+
+   private:
+    template <bool is_row_fixed_length, bool is_encoding, class COPY_FN>
+    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t offset_within_row,
+                                          const KeyRowArray* rows_const,
+                                          KeyRowArray* rows_mutable_maybe_null,
+                                          const KeyColumnArray* col_const,
+                                          KeyColumnArray* col_mutable_maybe_null,
+                                          COPY_FN copy_fn);
+    template <bool is_row_fixed_length>
+    static void EncodeImp(uint32_t offset_within_row, KeyRowArray* rows,
+                          const KeyColumnArray& col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t offset_within_row, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void EncodeHelper_avx2(bool is_row_fixed_length, uint32_t offset_within_row,
+                                  KeyRowArray* rows, const KeyColumnArray& col);
+    static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row,
+                                  uint32_t num_rows, uint32_t offset_within_row,
+                                  const KeyRowArray& rows, KeyColumnArray* col);
+    template <bool is_row_fixed_length>
+    static void EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows,
+                               const KeyColumnArray& col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t offset_within_row, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+    static void ColumnMemsetNulls(uint32_t offset_within_row, KeyRowArray* rows,
+                                  const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                  KeyColumnArray* temp_vector_16bit, uint8_t byte_value);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static void ColumnMemsetNullsImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                     const KeyColumnArray& col, KeyEncoderContext* ctx,
+                                     KeyColumnArray* temp_vector_16bit,
+                                     uint8_t byte_value);
+  };
+
+  class EncoderBinaryPair {
+   public:
+    static bool CanProcessPair(const KeyColumnMetadata& col1,
+                               const KeyColumnMetadata& col2) {
+      return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2);
+    }
+    static void Encode(uint32_t offset_within_row, KeyRowArray* rows,
+                       const KeyColumnArray& col1, const KeyColumnArray& col2,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp1,
+                       KeyColumnArray* temp2);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col1,
+                       KeyColumnArray* col2, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp1, KeyColumnArray* temp2);
+
+   private:
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void EncodeImp(uint32_t num_rows_to_skip, uint32_t offset_within_row,
+                          KeyRowArray* rows, const KeyColumnArray& col1,
+                          const KeyColumnArray& col2);
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
+                          uint32_t num_rows, uint32_t offset_within_row,
+                          const KeyRowArray& rows, KeyColumnArray* col1,
+                          KeyColumnArray* col2);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t EncodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t offset_within_row, KeyRowArray* rows,
+                                      const KeyColumnArray& col1,
+                                      const KeyColumnArray& col2);
+    static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t start_row, uint32_t num_rows,
+                                      uint32_t offset_within_row, const KeyRowArray& rows,
+                                      KeyColumnArray* col1, KeyColumnArray* col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t EncodeImp_avx2(uint32_t offset_within_row, KeyRowArray* rows,
+                                   const KeyColumnArray& col1,
+                                   const KeyColumnArray& col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                   uint32_t offset_within_row, const KeyRowArray& rows,
+                                   KeyColumnArray* col1, KeyColumnArray* col2);
+#endif
+  };
+
+  class EncoderOffsets {
+   public:
+    // In order not to repeat work twice,
+    // encoding combines in a single pass computing of:
+    // a) row offsets for varying-length rows
+    // b) within each new row, the cumulative length array
+    // of varying-length values within a row.
+    static void Encode(KeyRowArray* rows,
+                       const std::vector<KeyColumnArray>& varbinary_cols,
+                       KeyEncoderContext* ctx);
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* varbinary_cols,
+                       const std::vector<uint32_t>& varbinary_cols_base_offset,
+                       KeyEncoderContext* ctx);
+
+   private:
+    static void EncodeImp(uint32_t num_rows_already_processed, KeyRowArray* rows,
+                          const std::vector<KeyColumnArray>& varbinary_cols);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t EncodeImp_avx2(KeyRowArray* rows,
+                                   const std::vector<KeyColumnArray>& varbinary_cols,
+                                   KeyColumnArray* temp_buffer_32B_per_col);
+#endif
+  };
+
+  class EncoderVarBinary {
+   public:
+    static void Encode(uint32_t varbinary_col_id, KeyRowArray* rows,
+                       const KeyColumnArray& col, KeyEncoderContext* ctx);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx);
+
+   private:
+    template <bool first_varbinary_col, bool is_encoding, class COPY_FN>
+    static inline void EncodeDecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                          uint32_t varbinary_col_id,
+                                          const KeyRowArray* rows_const,
+                                          KeyRowArray* rows_mutable_maybe_null,
+                                          const KeyColumnArray* col_const,
+                                          KeyColumnArray* col_mutable_maybe_null,
+                                          COPY_FN copy_fn);
+    template <bool first_varbinary_col>
+    static void EncodeImp(uint32_t varbinary_col_id, KeyRowArray* rows,
+                          const KeyColumnArray& col);
+    template <bool first_varbinary_col>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t varbinary_col_id, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void EncodeHelper_avx2(uint32_t varbinary_col_id, KeyRowArray* rows,
+                                  const KeyColumnArray& col);
+    static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows,
+                                  uint32_t varbinary_col_id, const KeyRowArray& rows,
+                                  KeyColumnArray* col);
+    template <bool first_varbinary_col>
+    static void EncodeImp_avx2(uint32_t varbinary_col_id, KeyRowArray* rows,
+                               const KeyColumnArray& col);
+    template <bool first_varbinary_col>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t varbinary_col_id, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+  };
+
+  class EncoderNulls {
+   public:
+    static void Encode(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit);
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* cols);
+  };
+
+  KeyEncoderContext* ctx_;
+
+  // Data initialized once, based on data types of key columns
+  KeyRowMetadata row_metadata_;
+
+  // Data initialized for each input batch.
+  // All elements are ordered according to the order of encoded fields in a row.
+  std::vector<KeyColumnArray> batch_all_cols_;
+  std::vector<KeyColumnArray> batch_varbinary_cols_;
+  std::vector<uint32_t> batch_varbinary_cols_base_offsets_;
+};
+
+template <bool is_row_fixed_length, bool is_encoding, class COPY_FN>
+inline void KeyEncoder::EncoderBinary::EncodeDecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length);
+  uint32_t col_width = col_const->metadata().fixed_length;
+
+  if (is_row_fixed_length) {
+    uint32_t row_width = rows_const->metadata().fixed_length;
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      if (is_encoding) {
+        src = col_const->data(1) + col_width * i;
+        dst = rows_mutable_maybe_null->mutable_data(1) + row_width * (start_row + i) +
+              offset_within_row;
+      } else {
+        src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
+        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      }
+      copy_fn(dst, src, col_width);
+    }
+  } else {
+    const uint32_t* row_offsets = rows_const->offsets();
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      if (is_encoding) {
+        src = col_const->data(1) + col_width * i;
+        dst = rows_mutable_maybe_null->mutable_data(2) + row_offsets[start_row + i] +
+              offset_within_row;
+      } else {
+        src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
+        dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      }
+      copy_fn(dst, src, col_width);
+    }
+  }
+}
+
+template <bool first_varbinary_col, bool is_encoding, class COPY_FN>
+inline void KeyEncoder::EncoderVarBinary::EncodeDecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  // Column and rows need to be varying length
+  ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
+               !col_const->metadata().is_fixed_length);
+
+  const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
+  const uint32_t* col_offsets = col_const->offsets();
+
+  uint32_t col_offset_next = col_offsets[0];
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t col_offset = col_offset_next;
+    col_offset_next = col_offsets[i + 1];
+
+    uint32_t row_offset = row_offsets_for_batch[i];
+    const uint8_t* row = rows_const->data(2) + row_offset;
+
+    uint32_t offset_within_row;
+    uint32_t length;
+    if (first_varbinary_col) {
+      rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row,
+                                                               &length);
+    } else {
+      rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id,
+                                                             &offset_within_row, &length);
+    }
+
+    row_offset += offset_within_row;
+
+    const uint8_t* src;
+    uint8_t* dst;
+    if (is_encoding) {
+      src = col_const->data(2) + col_offset;
+      dst = rows_mutable_maybe_null->mutable_data(2) + row_offset;
+    } else {
+      src = rows_const->data(2) + row_offset;
+      dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
+    }
+    copy_fn(dst, src, length);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_encode_avx2.cc b/cpp/src/arrow/compute/exec/key_encode_avx2.cc
new file mode 100644
index 00000000000..d875412cf88
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_encode_avx2.cc
@@ -0,0 +1,545 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_encode.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+inline __m256i set_first_n_bytes_avx2(int n) {
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+  constexpr uint64_t kByteSequence16To23 = 0x1716151413121110ULL;
+  constexpr uint64_t kByteSequence24To31 = 0x1f1e1d1c1b1a1918ULL;
+
+  return _mm256_cmpgt_epi8(_mm256_set1_epi8(n),
+                           _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                              kByteSequence16To23, kByteSequence24To31));
+}
+
+inline __m256i inclusive_prefix_sum_32bit_avx2(__m256i x) {
+  x = _mm256_add_epi32(
+      x, _mm256_permutevar8x32_epi32(
+             _mm256_andnot_si256(_mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0xffffffff), x),
+             _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6)));
+  x = _mm256_add_epi32(
+      x, _mm256_permute4x64_epi64(
+             _mm256_andnot_si256(
+                 _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0xffffffff, 0xffffffff), x),
+             0x93));  // 0b10010011
+  x = _mm256_add_epi32(
+      x, _mm256_permute4x64_epi64(
+             _mm256_andnot_si256(
+                 _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0xffffffff, 0xffffffff), x),
+             0x4f));  // 0b01001111
+  return x;
+}
+
+void KeyEncoder::EncoderBinary::EncodeHelper_avx2(bool is_row_fixed_length,
+                                                  uint32_t offset_within_row,
+                                                  KeyRowArray* rows,
+                                                  const KeyColumnArray& col) {
+  if (is_row_fixed_length) {
+    EncodeImp_avx2<true>(offset_within_row, rows, col);
+  } else {
+    EncodeImp_avx2<false>(offset_within_row, rows, col);
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::EncodeImp_avx2(uint32_t offset_within_row,
+                                               KeyRowArray* rows,
+                                               const KeyColumnArray& col) {
+  EncodeDecodeHelper<is_row_fixed_length, true>(
+      0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col,
+      nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+        const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 32; ++istripe) {
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+        if ((length % 32) > 0) {
+          __m256i mask = set_first_n_bytes_avx2(length % 32);
+          _mm256_storeu_si256(
+              dst256 + istripe,
+              _mm256_blendv_epi8(_mm256_loadu_si256(dst256 + istripe),
+                                 _mm256_loadu_si256(src256 + istripe), mask));
+        }
+      });
+}
+
+void KeyEncoder::EncoderBinary::DecodeHelper_avx2(bool is_row_fixed_length,
+                                                  uint32_t start_row, uint32_t num_rows,
+                                                  uint32_t offset_within_row,
+                                                  const KeyRowArray& rows,
+                                                  KeyColumnArray* col) {
+  if (is_row_fixed_length) {
+    DecodeImp_avx2<true>(start_row, num_rows, offset_within_row, rows, col);
+  } else {
+    DecodeImp_avx2<false>(start_row, num_rows, offset_within_row, rows, col);
+  }
+}
+
+template <bool is_row_fixed_length>
+void KeyEncoder::EncoderBinary::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                               uint32_t offset_within_row,
+                                               const KeyRowArray& rows,
+                                               KeyColumnArray* col) {
+  EncodeDecodeHelper<is_row_fixed_length, false>(
+      start_row, num_rows, offset_within_row, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 31) / 32; ++istripe) {
+          __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+          const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+      });
+}
+
+uint32_t KeyEncoder::EncoderBinaryPair::EncodeHelper_avx2(
+    bool is_row_fixed_length, uint32_t col_width, uint32_t offset_within_row,
+    KeyRowArray* rows, const KeyColumnArray& col1, const KeyColumnArray& col2) {
+  using EncodeImp_avx2_t =
+      uint32_t (*)(uint32_t, KeyRowArray*, const KeyColumnArray&, const KeyColumnArray&);
+  static const EncodeImp_avx2_t EncodeImp_avx2_fn[] = {
+      EncodeImp_avx2<false, 1>, EncodeImp_avx2<false, 2>, EncodeImp_avx2<false, 4>,
+      EncodeImp_avx2<false, 8>, EncodeImp_avx2<true, 1>,  EncodeImp_avx2<true, 2>,
+      EncodeImp_avx2<true, 4>,  EncodeImp_avx2<true, 8>,
+  };
+  int log_col_width = col_width == 8 ? 3 : col_width == 4 ? 2 : col_width == 2 ? 1 : 0;
+  int dispatch_const = (is_row_fixed_length ? 4 : 0) + log_col_width;
+  return EncodeImp_avx2_fn[dispatch_const](offset_within_row, rows, col1, col2);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+uint32_t KeyEncoder::EncoderBinaryPair::EncodeImp_avx2(uint32_t offset_within_row,
+                                                       KeyRowArray* rows,
+                                                       const KeyColumnArray& col1,
+                                                       const KeyColumnArray& col2) {
+  uint32_t num_rows = static_cast<uint32_t>(col1.length());
+  ARROW_DCHECK(col_width == 1 || col_width == 2 || col_width == 4 || col_width == 8);
+
+  const uint8_t* col_vals_A = col1.data(1);
+  const uint8_t* col_vals_B = col2.data(1);
+  uint8_t* row_vals = is_row_fixed_length ? rows->mutable_data(1) : rows->mutable_data(2);
+
+  constexpr int unroll = 32 / col_width;
+
+  uint32_t num_processed = num_rows / unroll * unroll;
+
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    __m256i col_A = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_vals_A) + i);
+    __m256i col_B = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_vals_B) + i);
+    __m256i r0, r1;
+    if (col_width == 1) {
+      // results in 16-bit outputs in the order: 0..7, 16..23
+      r0 = _mm256_unpacklo_epi8(col_A, col_B);
+      // results in 16-bit outputs in the order: 8..15, 24..31
+      r1 = _mm256_unpackhi_epi8(col_A, col_B);
+    } else if (col_width == 2) {
+      // results in 32-bit outputs in the order: 0..3, 8..11
+      r0 = _mm256_unpacklo_epi16(col_A, col_B);
+      // results in 32-bit outputs in the order: 4..7, 12..15
+      r1 = _mm256_unpackhi_epi16(col_A, col_B);
+    } else if (col_width == 4) {
+      // results in 64-bit outputs in the order: 0..1, 4..5
+      r0 = _mm256_unpacklo_epi32(col_A, col_B);
+      // results in 64-bit outputs in the order: 2..3, 6..7
+      r1 = _mm256_unpackhi_epi32(col_A, col_B);
+    } else if (col_width == 8) {
+      // results in 128-bit outputs in the order: 0, 2
+      r0 = _mm256_unpacklo_epi64(col_A, col_B);
+      // results in 128-bit outputs in the order: 1, 3
+      r1 = _mm256_unpackhi_epi64(col_A, col_B);
+    }
+    col_A = _mm256_permute2x128_si256(r0, r1, 0x20);
+    col_B = _mm256_permute2x128_si256(r0, r1, 0x31);
+    if (col_width == 8) {
+      __m128i *dst0, *dst1, *dst2, *dst3;
+      if (is_row_fixed_length) {
+        uint32_t fixed_length = rows->metadata().fixed_length;
+        uint8_t* dst = row_vals + offset_within_row + fixed_length * i * unroll;
+        dst0 = reinterpret_cast<__m128i*>(dst);
+        dst1 = reinterpret_cast<__m128i*>(dst + fixed_length);
+        dst2 = reinterpret_cast<__m128i*>(dst + fixed_length * 2);
+        dst3 = reinterpret_cast<__m128i*>(dst + fixed_length * 3);
+      } else {
+        const uint32_t* row_offsets = rows->offsets() + i * unroll;
+        uint8_t* dst = row_vals + offset_within_row;
+        dst0 = reinterpret_cast<__m128i*>(dst + row_offsets[0]);
+        dst1 = reinterpret_cast<__m128i*>(dst + row_offsets[1]);
+        dst2 = reinterpret_cast<__m128i*>(dst + row_offsets[2]);
+        dst3 = reinterpret_cast<__m128i*>(dst + row_offsets[3]);
+      }
+      _mm_storeu_si128(dst0, _mm256_castsi256_si128(r0));
+      _mm_storeu_si128(dst1, _mm256_castsi256_si128(r1));
+      _mm_storeu_si128(dst2, _mm256_extracti128_si256(r0, 1));
+      _mm_storeu_si128(dst3, _mm256_extracti128_si256(r1, 1));
+
+    } else {
+      uint8_t buffer[64];
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(buffer), col_A);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(buffer) + 1, col_B);
+
+      if (is_row_fixed_length) {
+        uint32_t fixed_length = rows->metadata().fixed_length;
+        uint8_t* dst = row_vals + offset_within_row + fixed_length * i * unroll;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            *reinterpret_cast<uint16_t*>(dst + fixed_length * j) =
+                reinterpret_cast<const uint16_t*>(buffer)[j];
+          } else if (col_width == 2) {
+            *reinterpret_cast<uint32_t*>(dst + fixed_length * j) =
+                reinterpret_cast<const uint32_t*>(buffer)[j];
+          } else if (col_width == 4) {
+            *reinterpret_cast<uint64_t*>(dst + fixed_length * j) =
+                reinterpret_cast<const uint64_t*>(buffer)[j];
+          }
+        }
+      } else {
+        const uint32_t* row_offsets = rows->offsets() + i * unroll;
+        uint8_t* dst = row_vals + offset_within_row;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            *reinterpret_cast<uint16_t*>(dst + row_offsets[j]) =
+                reinterpret_cast<const uint16_t*>(buffer)[j];
+          } else if (col_width == 2) {
+            *reinterpret_cast<uint32_t*>(dst + row_offsets[j]) =
+                reinterpret_cast<const uint32_t*>(buffer)[j];
+          } else if (col_width == 4) {
+            *reinterpret_cast<uint64_t*>(dst + row_offsets[j]) =
+                reinterpret_cast<const uint64_t*>(buffer)[j];
+          }
+        }
+      }
+    }
+  }
+
+  return num_processed;
+}
+
+uint32_t KeyEncoder::EncoderBinaryPair::DecodeHelper_avx2(
+    bool is_row_fixed_length, uint32_t col_width, uint32_t start_row, uint32_t num_rows,
+    uint32_t offset_within_row, const KeyRowArray& rows, KeyColumnArray* col1,
+    KeyColumnArray* col2) {
+  using DecodeImp_avx2_t =
+      uint32_t (*)(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                   const KeyRowArray& rows, KeyColumnArray* col1, KeyColumnArray* col2);
+  static const DecodeImp_avx2_t DecodeImp_avx2_fn[] = {
+      DecodeImp_avx2<false, 1>, DecodeImp_avx2<false, 2>, DecodeImp_avx2<false, 4>,
+      DecodeImp_avx2<false, 8>, DecodeImp_avx2<true, 1>,  DecodeImp_avx2<true, 2>,
+      DecodeImp_avx2<true, 4>,  DecodeImp_avx2<true, 8>};
+  int log_col_width = col_width == 8 ? 3 : col_width == 4 ? 2 : col_width == 2 ? 1 : 0;
+  int dispatch_const = log_col_width | (is_row_fixed_length ? 4 : 0);
+  return DecodeImp_avx2_fn[dispatch_const](start_row, num_rows, offset_within_row, rows,
+                                           col1, col2);
+}
+
+template <bool is_row_fixed_length, uint32_t col_width>
+uint32_t KeyEncoder::EncoderBinaryPair::DecodeImp_avx2(
+    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+    const KeyRowArray& rows, KeyColumnArray* col1, KeyColumnArray* col2) {
+  ARROW_DCHECK(col_width == 1 || col_width == 2 || col_width == 4 || col_width == 8);
+
+  uint8_t* col_vals_A = col1->mutable_data(1);
+  uint8_t* col_vals_B = col2->mutable_data(1);
+
+  uint32_t fixed_length = rows.metadata().fixed_length;
+  const uint32_t* offsets;
+  const uint8_t* src_base;
+  if (is_row_fixed_length) {
+    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    offsets = nullptr;
+  } else {
+    src_base = rows.data(2) + offset_within_row;
+    offsets = rows.offsets() + start_row;
+  }
+
+  constexpr int unroll = 32 / col_width;
+
+  uint32_t num_processed = num_rows / unroll * unroll;
+
+  if (col_width == 8) {
+    for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+      const __m128i *src0, *src1, *src2, *src3;
+      if (is_row_fixed_length) {
+        const uint8_t* src = src_base + (i * unroll) * fixed_length;
+        src0 = reinterpret_cast<const __m128i*>(src);
+        src1 = reinterpret_cast<const __m128i*>(src + fixed_length);
+        src2 = reinterpret_cast<const __m128i*>(src + fixed_length * 2);
+        src3 = reinterpret_cast<const __m128i*>(src + fixed_length * 3);
+      } else {
+        const uint32_t* row_offsets = offsets + i * unroll;
+        const uint8_t* src = src_base;
+        src0 = reinterpret_cast<const __m128i*>(src + row_offsets[0]);
+        src1 = reinterpret_cast<const __m128i*>(src + row_offsets[1]);
+        src2 = reinterpret_cast<const __m128i*>(src + row_offsets[2]);
+        src3 = reinterpret_cast<const __m128i*>(src + row_offsets[3]);
+      }
+
+      __m256i r0 = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128(src0)),
+                                           _mm_loadu_si128(src1), 1);
+      __m256i r1 = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128(src2)),
+                                           _mm_loadu_si128(src3), 1);
+
+      r0 = _mm256_permute4x64_epi64(r0, 0xd8);  // 0b11011000
+      r1 = _mm256_permute4x64_epi64(r1, 0xd8);
+
+      // First 128-bit lanes from both inputs
+      __m256i c1 = _mm256_permute2x128_si256(r0, r1, 0x20);
+      // Second 128-bit lanes from both inputs
+      __m256i c2 = _mm256_permute2x128_si256(r0, r1, 0x31);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_A) + i, c1);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_B) + i, c2);
+    }
+  } else {
+    uint8_t buffer[64];
+    for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+      if (is_row_fixed_length) {
+        const uint8_t* src = src_base + (i * unroll) * fixed_length;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            reinterpret_cast<uint16_t*>(buffer)[j] =
+                *reinterpret_cast<const uint16_t*>(src + fixed_length * j);
+          } else if (col_width == 2) {
+            reinterpret_cast<uint32_t*>(buffer)[j] =
+                *reinterpret_cast<const uint32_t*>(src + fixed_length * j);
+          } else if (col_width == 4) {
+            reinterpret_cast<uint64_t*>(buffer)[j] =
+                *reinterpret_cast<const uint64_t*>(src + fixed_length * j);
+          }
+        }
+      } else {
+        const uint32_t* row_offsets = offsets + i * unroll;
+        const uint8_t* src = src_base;
+        for (int j = 0; j < unroll; ++j) {
+          if (col_width == 1) {
+            reinterpret_cast<uint16_t*>(buffer)[j] =
+                *reinterpret_cast<const uint16_t*>(src + row_offsets[j]);
+          } else if (col_width == 2) {
+            reinterpret_cast<uint32_t*>(buffer)[j] =
+                *reinterpret_cast<const uint32_t*>(src + row_offsets[j]);
+          } else if (col_width == 4) {
+            reinterpret_cast<uint64_t*>(buffer)[j] =
+                *reinterpret_cast<const uint64_t*>(src + row_offsets[j]);
+          }
+        }
+      }
+
+      __m256i r0 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(buffer));
+      __m256i r1 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(buffer) + 1);
+
+      constexpr uint64_t kByteSequence_0_2_4_6_8_10_12_14 = 0x0e0c0a0806040200ULL;
+      constexpr uint64_t kByteSequence_1_3_5_7_9_11_13_15 = 0x0f0d0b0907050301ULL;
+      constexpr uint64_t kByteSequence_0_1_4_5_8_9_12_13 = 0x0d0c090805040100ULL;
+      constexpr uint64_t kByteSequence_2_3_6_7_10_11_14_15 = 0x0f0e0b0a07060302ULL;
+
+      if (col_width == 1) {
+        // Collect every second byte next to each other
+        const __m256i shuffle_const = _mm256_setr_epi64x(
+            kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15,
+            kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15);
+        r0 = _mm256_shuffle_epi8(r0, shuffle_const);
+        r1 = _mm256_shuffle_epi8(r1, shuffle_const);
+        // 0b11011000 swapping second and third 64-bit lane
+        r0 = _mm256_permute4x64_epi64(r0, 0xd8);
+        r1 = _mm256_permute4x64_epi64(r1, 0xd8);
+      } else if (col_width == 2) {
+        // Collect every second 16-bit word next to each other
+        const __m256i shuffle_const = _mm256_setr_epi64x(
+            kByteSequence_0_1_4_5_8_9_12_13, kByteSequence_2_3_6_7_10_11_14_15,
+            kByteSequence_0_1_4_5_8_9_12_13, kByteSequence_2_3_6_7_10_11_14_15);
+        r0 = _mm256_shuffle_epi8(r0, shuffle_const);
+        r1 = _mm256_shuffle_epi8(r1, shuffle_const);
+        // 0b11011000 swapping second and third 64-bit lane
+        r0 = _mm256_permute4x64_epi64(r0, 0xd8);
+        r1 = _mm256_permute4x64_epi64(r1, 0xd8);
+      } else if (col_width == 4) {
+        // Collect every second 32-bit word next to each other
+        const __m256i permute_const = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
+        r0 = _mm256_permutevar8x32_epi32(r0, permute_const);
+        r1 = _mm256_permutevar8x32_epi32(r1, permute_const);
+      }
+
+      // First 128-bit lanes from both inputs
+      __m256i c1 = _mm256_permute2x128_si256(r0, r1, 0x20);
+      // Second 128-bit lanes from both inputs
+      __m256i c2 = _mm256_permute2x128_si256(r0, r1, 0x31);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_A) + i, c1);
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(col_vals_B) + i, c2);
+    }
+  }
+
+  return num_processed;
+}
+
+uint32_t KeyEncoder::EncoderOffsets::EncodeImp_avx2(
+    KeyRowArray* rows, const std::vector<KeyColumnArray>& varbinary_cols,
+    KeyColumnArray* temp_buffer_32B_per_col) {
+  ARROW_DCHECK(temp_buffer_32B_per_col->metadata().is_fixed_length &&
+               temp_buffer_32B_per_col->metadata().fixed_length ==
+                   static_cast<uint32_t>(sizeof(uint32_t)) &&
+               temp_buffer_32B_per_col->length() >=
+                   static_cast<int64_t>(varbinary_cols.size()) * 8);
+  ARROW_DCHECK(varbinary_cols.size() > 0);
+
+  int row_alignment = rows->metadata().row_alignment;
+  int string_alignment = rows->metadata().string_alignment;
+
+  uint32_t* row_offsets = rows->mutable_offsets();
+  uint8_t* row_values = rows->mutable_data(2);
+  uint32_t num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  constexpr int unroll = 8;
+  uint32_t num_processed = num_rows / unroll * unroll;
+  uint32_t* temp_varbinary_ends =
+      reinterpret_cast<uint32_t*>(temp_buffer_32B_per_col->mutable_data(1));
+
+  row_offsets[0] = 0;
+
+  __m256i row_offset = _mm256_setzero_si256();
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    // Zero out lengths for nulls.
+    // Add lengths of all columns to get row size.
+    // Store in temp buffer varbinary field ends while summing their lengths.
+
+    __m256i offset_within_row = _mm256_set1_epi32(rows->metadata().fixed_length);
+
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      const uint32_t* col_offsets = varbinary_cols[col].offsets();
+      __m256i col_length = _mm256_sub_epi32(
+          _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_offsets + 1) + i),
+          _mm256_loadu_si256(reinterpret_cast<const __m256i*>(col_offsets + 0) + i));
+
+      const uint8_t* non_nulls = varbinary_cols[col].data(0);
+      if (non_nulls && non_nulls[i] != 0xff) {
+        // Zero out lengths for values that are not null
+        const __m256i individual_bits =
+            _mm256_setr_epi32(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80);
+        __m256i null_mask = _mm256_cmpeq_epi32(
+            _mm256_setzero_si256(),
+            _mm256_and_si256(_mm256_set1_epi32(non_nulls[i]), individual_bits));
+        col_length = _mm256_andnot_si256(null_mask, col_length);
+      }
+
+      __m256i padding =
+          _mm256_and_si256(_mm256_sub_epi32(_mm256_setzero_si256(), offset_within_row),
+                           _mm256_set1_epi32(string_alignment - 1));
+      offset_within_row = _mm256_add_epi32(offset_within_row, padding);
+      offset_within_row = _mm256_add_epi32(offset_within_row, col_length);
+
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(temp_varbinary_ends) + col,
+                          offset_within_row);
+    }
+
+    __m256i padding =
+        _mm256_and_si256(_mm256_sub_epi32(_mm256_setzero_si256(), offset_within_row),
+                         _mm256_set1_epi32(row_alignment - 1));
+    offset_within_row = _mm256_add_epi32(offset_within_row, padding);
+
+    // Inclusive prefix sum of 32-bit elements
+    __m256i row_offset_delta = inclusive_prefix_sum_32bit_avx2(offset_within_row);
+    row_offset = _mm256_add_epi32(
+        _mm256_permutevar8x32_epi32(row_offset, _mm256_set1_epi32(7)), row_offset_delta);
+
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(row_offsets + 1) + i, row_offset);
+
+    // Output varbinary ends for all fields in each row
+    for (size_t col = 0; col < varbinary_cols.size(); ++col) {
+      for (uint32_t row = 0; row < unroll; ++row) {
+        uint32_t* dst = rows->metadata().varbinary_end_array(
+                            row_values + row_offsets[i * unroll + row]) +
+                        col;
+        const uint32_t* src = temp_varbinary_ends + (col * unroll + row);
+        *dst = *src;
+      }
+    }
+  }
+
+  return num_processed;
+}
+
+void KeyEncoder::EncoderVarBinary::EncodeHelper_avx2(uint32_t varbinary_col_id,
+                                                     KeyRowArray* rows,
+                                                     const KeyColumnArray& col) {
+  if (varbinary_col_id == 0) {
+    EncodeImp_avx2<true>(varbinary_col_id, rows, col);
+  } else {
+    EncodeImp_avx2<false>(varbinary_col_id, rows, col);
+  }
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::EncodeImp_avx2(uint32_t varbinary_col_id,
+                                                  KeyRowArray* rows,
+                                                  const KeyColumnArray& col) {
+  EncodeDecodeHelper<first_varbinary_col, true>(
+      0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+        const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+        uint32_t istripe;
+        for (istripe = 0; istripe < length / 32; ++istripe) {
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+        if ((length % 32) > 0) {
+          __m256i mask = set_first_n_bytes_avx2(length % 32);
+          _mm256_storeu_si256(
+              dst256 + istripe,
+              _mm256_blendv_epi8(_mm256_loadu_si256(dst256 + istripe),
+                                 _mm256_loadu_si256(src256 + istripe), mask));
+        }
+      });
+}
+
+void KeyEncoder::EncoderVarBinary::DecodeHelper_avx2(uint32_t start_row,
+                                                     uint32_t num_rows,
+                                                     uint32_t varbinary_col_id,
+                                                     const KeyRowArray& rows,
+                                                     KeyColumnArray* col) {
+  if (varbinary_col_id == 0) {
+    DecodeImp_avx2<true>(start_row, num_rows, varbinary_col_id, rows, col);
+  } else {
+    DecodeImp_avx2<false>(start_row, num_rows, varbinary_col_id, rows, col);
+  }
+}
+
+template <bool first_varbinary_col>
+void KeyEncoder::EncoderVarBinary::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                                  uint32_t varbinary_col_id,
+                                                  const KeyRowArray& rows,
+                                                  KeyColumnArray* col) {
+  EncodeDecodeHelper<first_varbinary_col, false>(
+      start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col,
+      [](uint8_t* dst, const uint8_t* src, int64_t length) {
+        for (uint32_t istripe = 0; istripe < (length + 31) / 32; ++istripe) {
+          __m256i* dst256 = reinterpret_cast<__m256i*>(dst);
+          const __m256i* src256 = reinterpret_cast<const __m256i*>(src);
+          _mm256_storeu_si256(dst256 + istripe, _mm256_loadu_si256(src256 + istripe));
+        }
+      });
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_hash.cc b/cpp/src/arrow/compute/exec/key_hash.cc
new file mode 100644
index 00000000000..081411e708e
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_hash.cc
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_hash.h"
+
+#include <memory.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+inline uint32_t Hashing::avalanche_helper(uint32_t acc) {
+  acc ^= (acc >> 15);
+  acc *= PRIME32_2;
+  acc ^= (acc >> 13);
+  acc *= PRIME32_3;
+  acc ^= (acc >> 16);
+  return acc;
+}
+
+void Hashing::avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 8;
+    avalanche_avx2(num_keys - tail, hashes);
+    processed = num_keys - tail;
+  }
+#endif
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    hashes[i] = avalanche_helper(hashes[i]);
+  }
+}
+
+inline uint32_t Hashing::combine_accumulators(const uint32_t acc1, const uint32_t acc2,
+                                              const uint32_t acc3, const uint32_t acc4) {
+  return ROTL(acc1, 1) + ROTL(acc2, 7) + ROTL(acc3, 12) + ROTL(acc4, 18);
+}
+
+inline void Hashing::helper_8B(uint32_t key_length, uint32_t num_keys,
+                               const uint8_t* keys, uint32_t* hashes) {
+  ARROW_DCHECK(key_length <= 8);
+  uint64_t mask = ~0ULL >> (8 * (8 - key_length));
+  constexpr uint64_t multiplier = 14029467366897019727ULL;
+  uint32_t offset = 0;
+  for (uint32_t ikey = 0; ikey < num_keys; ++ikey) {
+    uint64_t x = *reinterpret_cast<const uint64_t*>(keys + offset);
+    x &= mask;
+    hashes[ikey] = static_cast<uint32_t>(BYTESWAP(x * multiplier));
+    offset += key_length;
+  }
+}
+
+inline void Hashing::helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys,
+                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3,
+                                   uint32_t& acc4) {
+  uint64_t v1 = reinterpret_cast<const uint64_t*>(keys + offset)[0];
+  // We do not need to mask v1, because we will not process a stripe
+  // unless at least 9 bytes of it are part of the key.
+  uint64_t v2 = reinterpret_cast<const uint64_t*>(keys + offset)[1];
+  v2 &= mask_hi;
+  uint32_t x1 = static_cast<uint32_t>(v1);
+  uint32_t x2 = static_cast<uint32_t>(v1 >> 32);
+  uint32_t x3 = static_cast<uint32_t>(v2);
+  uint32_t x4 = static_cast<uint32_t>(v2 >> 32);
+  acc1 += x1 * PRIME32_2;
+  acc1 = ROTL(acc1, 13) * PRIME32_1;
+  acc2 += x2 * PRIME32_2;
+  acc2 = ROTL(acc2, 13) * PRIME32_1;
+  acc3 += x3 * PRIME32_2;
+  acc3 = ROTL(acc3, 13) * PRIME32_1;
+  acc4 += x4 * PRIME32_2;
+  acc4 = ROTL(acc4, 13) * PRIME32_1;
+}
+
+void Hashing::helper_stripes(int64_t hardware_flags, uint32_t num_keys,
+                             uint32_t key_length, const uint8_t* keys, uint32_t* hash) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 2;
+    helper_stripes_avx2(num_keys - tail, key_length, keys, hash);
+    processed = num_keys - tail;
+  }
+#endif
+
+  // If length modulo stripe length is less than or equal 8, round down to the nearest 16B
+  // boundary (8B ending will be processed in a separate function), otherwise round up.
+  const uint32_t num_stripes = (key_length + 7) / 16;
+  uint64_t mask_hi =
+      ~0ULL >>
+      (8 * ((num_stripes * 16 > key_length) ? num_stripes * 16 - key_length : 0));
+
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    uint32_t acc1, acc2, acc3, acc4;
+    acc1 = static_cast<uint32_t>(
+        (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) &
+        0xffffffff);
+    acc2 = PRIME32_2;
+    acc3 = 0;
+    acc4 = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1));
+    uint32_t offset = i * key_length;
+    for (uint32_t stripe = 0; stripe < num_stripes - 1; ++stripe) {
+      helper_stripe(offset, ~0ULL, keys, acc1, acc2, acc3, acc4);
+      offset += 16;
+    }
+    helper_stripe(offset, mask_hi, keys, acc1, acc2, acc3, acc4);
+    hash[i] = combine_accumulators(acc1, acc2, acc3, acc4);
+  }
+}
+
+inline uint32_t Hashing::helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys,
+                                     uint32_t acc) {
+  uint64_t v = reinterpret_cast<const uint64_t*>(keys + offset)[0];
+  v &= mask;
+  uint32_t x1 = static_cast<uint32_t>(v);
+  uint32_t x2 = static_cast<uint32_t>(v >> 32);
+  acc += x1 * PRIME32_3;
+  acc = ROTL(acc, 17) * PRIME32_4;
+  acc += x2 * PRIME32_3;
+  acc = ROTL(acc, 17) * PRIME32_4;
+  return acc;
+}
+
+void Hashing::helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length,
+                           const uint8_t* keys, uint32_t* hash) {
+  uint32_t processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    int tail = num_keys % 8;
+    helper_tails_avx2(num_keys - tail, key_length, keys, hash);
+    processed = num_keys - tail;
+  }
+#endif
+  uint64_t mask = ~0ULL >> (8 * (((key_length % 8) == 0) ? 0 : 8 - (key_length % 8)));
+  uint32_t offset = key_length / 16 * 16;
+  offset += processed * key_length;
+  for (uint32_t i = processed; i < num_keys; ++i) {
+    hash[i] = helper_tail(offset, mask, keys, hash[i]);
+    offset += key_length;
+  }
+}
+
+void Hashing::hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key,
+                         const uint8_t* keys, uint32_t* hashes) {
+  ARROW_DCHECK(length_key > 0);
+
+  if (length_key <= 8) {
+    helper_8B(length_key, num_keys, keys, hashes);
+    return;
+  }
+  helper_stripes(hardware_flags, num_keys, length_key, keys, hashes);
+  if ((length_key % 16) > 0 && (length_key % 16) <= 8) {
+    helper_tails(hardware_flags, num_keys, length_key, keys, hashes);
+  }
+  avalanche(hardware_flags, num_keys, hashes);
+}
+
+void Hashing::hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc) {
+  for (uint32_t i = 0; i < length / 16; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      uint32_t lane = reinterpret_cast<const uint32_t*>(key)[i * 4 + j];
+      acc[j] += (lane * PRIME32_2);
+      acc[j] = ROTL(acc[j], 13);
+      acc[j] *= PRIME32_1;
+    }
+  }
+
+  int tail = length % 16;
+  if (tail) {
+    uint64_t last_stripe[2];
+    const uint64_t* last_stripe_base =
+        reinterpret_cast<const uint64_t*>(key + length - (length % 16));
+    last_stripe[0] = last_stripe_base[0];
+    uint64_t mask = ~0ULL >> (8 * ((length + 7) / 8 * 8 - length));
+    if (tail <= 8) {
+      last_stripe[1] = 0;
+      last_stripe[0] &= mask;
+    } else {
+      last_stripe[1] = last_stripe_base[1];
+      last_stripe[1] &= mask;
+    }
+    for (int j = 0; j < 4; ++j) {
+      uint32_t lane = reinterpret_cast<const uint32_t*>(last_stripe)[j];
+      acc[j] += (lane * PRIME32_2);
+      acc[j] = ROTL(acc[j], 13);
+      acc[j] *= PRIME32_1;
+    }
+  }
+}
+
+void Hashing::hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+                          const uint32_t* offsets, const uint8_t* concatenated_keys,
+                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                          uint32_t* hashes) {
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    hash_varlen_avx2(num_rows, offsets, concatenated_keys, temp_buffer, hashes);
+  } else {
+#endif
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      uint32_t acc[4];
+      acc[0] = static_cast<uint32_t>(
+          (static_cast<uint64_t>(PRIME32_1) + static_cast<uint64_t>(PRIME32_2)) &
+          0xffffffff);
+      acc[1] = PRIME32_2;
+      acc[2] = 0;
+      acc[3] = static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1));
+      uint32_t length = offsets[i + 1] - offsets[i];
+      hash_varlen_helper(length, concatenated_keys + offsets[i], acc);
+      hashes[i] = combine_accumulators(acc[0], acc[1], acc[2], acc[3]);
+    }
+    avalanche(hardware_flags, num_rows, hashes);
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_hash.h b/cpp/src/arrow/compute/exec/key_hash.h
new file mode 100644
index 00000000000..7f8ab5185cc
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_hash.h
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_HAVE_AVX2)
+#include <immintrin.h>
+#endif
+
+#include <cstdint>
+
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+// Implementations are based on xxh3 32-bit algorithm description from:
+// https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
+//
+class Hashing {
+ public:
+  static void hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t length_key,
+                         const uint8_t* keys, uint32_t* hashes);
+
+  static void hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+                          const uint32_t* offsets, const uint8_t* concatenated_keys,
+                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                          uint32_t* hashes);
+
+ private:
+  static const uint32_t PRIME32_1 = 0x9E3779B1;  // 0b10011110001101110111100110110001
+  static const uint32_t PRIME32_2 = 0x85EBCA77;  // 0b10000101111010111100101001110111
+  static const uint32_t PRIME32_3 = 0xC2B2AE3D;  // 0b11000010101100101010111000111101
+  static const uint32_t PRIME32_4 = 0x27D4EB2F;  // 0b00100111110101001110101100101111
+  static const uint32_t PRIME32_5 = 0x165667B1;  // 0b00010110010101100110011110110001
+
+  // Avalanche
+  static inline uint32_t avalanche_helper(uint32_t acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void avalanche_avx2(uint32_t num_keys, uint32_t* hashes);
+#endif
+  static void avalanche(int64_t hardware_flags, uint32_t num_keys, uint32_t* hashes);
+
+  // Accumulator combine
+  static inline uint32_t combine_accumulators(const uint32_t acc1, const uint32_t acc2,
+                                              const uint32_t acc3, const uint32_t acc4);
+#if defined(ARROW_HAVE_AVX2)
+  static inline uint64_t combine_accumulators_avx2(__m256i acc);
+#endif
+
+  // Helpers
+  static inline void helper_8B(uint32_t key_length, uint32_t num_keys,
+                               const uint8_t* keys, uint32_t* hashes);
+  static inline void helper_stripe(uint32_t offset, uint64_t mask_hi, const uint8_t* keys,
+                                   uint32_t& acc1, uint32_t& acc2, uint32_t& acc3,
+                                   uint32_t& acc4);
+  static inline uint32_t helper_tail(uint32_t offset, uint64_t mask, const uint8_t* keys,
+                                     uint32_t acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void helper_stripes_avx2(uint32_t num_keys, uint32_t key_length,
+                                  const uint8_t* keys, uint32_t* hash);
+  static void helper_tails_avx2(uint32_t num_keys, uint32_t key_length,
+                                const uint8_t* keys, uint32_t* hash);
+#endif
+  static void helper_stripes(int64_t hardware_flags, uint32_t num_keys,
+                             uint32_t key_length, const uint8_t* keys, uint32_t* hash);
+  static void helper_tails(int64_t hardware_flags, uint32_t num_keys, uint32_t key_length,
+                           const uint8_t* keys, uint32_t* hash);
+
+  static void hash_varlen_helper(uint32_t length, const uint8_t* key, uint32_t* acc);
+#if defined(ARROW_HAVE_AVX2)
+  static void hash_varlen_avx2(uint32_t num_rows, const uint32_t* offsets,
+                               const uint8_t* concatenated_keys,
+                               uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                               uint32_t* hashes);
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_hash_avx2.cc b/cpp/src/arrow/compute/exec/key_hash_avx2.cc
new file mode 100644
index 00000000000..b58db015088
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_hash_avx2.cc
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_hash.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+void Hashing::avalanche_avx2(uint32_t num_keys, uint32_t* hashes) {
+  constexpr int unroll = 8;
+  ARROW_DCHECK(num_keys % unroll == 0);
+  for (uint32_t i = 0; i < num_keys / unroll; ++i) {
+    __m256i hash = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + i);
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 15));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_2));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 13));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_3));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 16));
+    _mm256_storeu_si256((reinterpret_cast<__m256i*>(hashes)) + i, hash);
+  }
+}
+
+inline uint64_t Hashing::combine_accumulators_avx2(__m256i acc) {
+  acc = _mm256_or_si256(
+      _mm256_sllv_epi32(acc, _mm256_setr_epi32(1, 7, 12, 18, 1, 7, 12, 18)),
+      _mm256_srlv_epi32(acc, _mm256_setr_epi32(32 - 1, 32 - 7, 32 - 12, 32 - 18, 32 - 1,
+                                               32 - 7, 32 - 12, 32 - 18)));
+  acc = _mm256_add_epi32(acc, _mm256_shuffle_epi32(acc, 0xee));  // 0b11101110
+  acc = _mm256_add_epi32(acc, _mm256_srli_epi64(acc, 32));
+  acc = _mm256_permutevar8x32_epi32(acc, _mm256_setr_epi32(0, 4, 0, 0, 0, 0, 0, 0));
+  uint64_t result = _mm256_extract_epi64(acc, 0);
+  return result;
+}
+
+void Hashing::helper_stripes_avx2(uint32_t num_keys, uint32_t key_length,
+                                  const uint8_t* keys, uint32_t* hash) {
+  constexpr int unroll = 2;
+  ARROW_DCHECK(num_keys % unroll == 0);
+
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+
+  const __m256i mask_last_stripe =
+      (key_length % 16) <= 8
+          ? _mm256_set1_epi8(static_cast<char>(0xffU))
+          : _mm256_cmpgt_epi8(_mm256_set1_epi8(key_length % 16),
+                              _mm256_setr_epi64x(kByteSequence0To7, kByteSequence8To15,
+                                                 kByteSequence0To7, kByteSequence8To15));
+
+  // If length modulo stripe length is less than or equal 8, round down to the nearest 16B
+  // boundary (8B ending will be processed in a separate function), otherwise round up.
+  const uint32_t num_stripes = (key_length + 7) / 16;
+  for (uint32_t i = 0; i < num_keys / unroll; ++i) {
+    __m256i acc = _mm256_setr_epi32(
+        static_cast<uint32_t>((static_cast<uint64_t>(PRIME32_1) + PRIME32_2) &
+                              0xffffffff),
+        PRIME32_2, 0, static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)),
+        static_cast<uint32_t>((static_cast<uint64_t>(PRIME32_1) + PRIME32_2) &
+                              0xffffffff),
+        PRIME32_2, 0, static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)));
+    auto key0 = reinterpret_cast<const __m128i*>(keys + key_length * 2 * i);
+    auto key1 = reinterpret_cast<const __m128i*>(keys + key_length * 2 * i + key_length);
+    for (uint32_t stripe = 0; stripe < num_stripes - 1; ++stripe) {
+      auto key_stripe =
+          _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128(key0 + stripe)),
+                                  _mm_loadu_si128(key1 + stripe), 1);
+      acc = _mm256_add_epi32(
+          acc, _mm256_mullo_epi32(key_stripe, _mm256_set1_epi32(PRIME32_2)));
+      acc = _mm256_or_si256(_mm256_slli_epi32(acc, 13), _mm256_srli_epi32(acc, 32 - 13));
+      acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_1));
+    }
+    auto key_stripe = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_loadu_si128(key0 + num_stripes - 1)),
+        _mm_loadu_si128(key1 + num_stripes - 1), 1);
+    key_stripe = _mm256_and_si256(key_stripe, mask_last_stripe);
+    acc = _mm256_add_epi32(acc,
+                           _mm256_mullo_epi32(key_stripe, _mm256_set1_epi32(PRIME32_2)));
+    acc = _mm256_or_si256(_mm256_slli_epi32(acc, 13), _mm256_srli_epi32(acc, 32 - 13));
+    acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_1));
+    uint64_t result = combine_accumulators_avx2(acc);
+    reinterpret_cast<uint64_t*>(hash)[i] = result;
+  }
+}
+
+void Hashing::helper_tails_avx2(uint32_t num_keys, uint32_t key_length,
+                                const uint8_t* keys, uint32_t* hash) {
+  constexpr int unroll = 8;
+  ARROW_DCHECK(num_keys % unroll == 0);
+  auto keys_i64 = reinterpret_cast<arrow::util::int64_for_gather_t*>(keys);
+
+  // Process between 1 and 8 last bytes of each key, starting from 16B boundary.
+  // The caller needs to make sure that there are no more than 8 bytes to process after
+  // that 16B boundary.
+  uint32_t first_offset = key_length - (key_length % 16);
+  __m256i mask = _mm256_set1_epi64x((~0ULL) >> (8 * (8 - (key_length % 16))));
+  __m256i offset =
+      _mm256_setr_epi32(0, key_length, key_length * 2, key_length * 3, key_length * 4,
+                        key_length * 5, key_length * 6, key_length * 7);
+  offset = _mm256_add_epi32(offset, _mm256_set1_epi32(first_offset));
+  __m256i offset_incr = _mm256_set1_epi32(key_length * 8);
+
+  for (uint32_t i = 0; i < num_keys / unroll; ++i) {
+    auto v1 = _mm256_i32gather_epi64(keys_i64, _mm256_castsi256_si128(offset), 1);
+    auto v2 = _mm256_i32gather_epi64(keys_i64, _mm256_extracti128_si256(offset, 1), 1);
+    v1 = _mm256_and_si256(v1, mask);
+    v2 = _mm256_and_si256(v2, mask);
+    v1 = _mm256_permutevar8x32_epi32(v1, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7));
+    v2 = _mm256_permutevar8x32_epi32(v2, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7));
+    auto x1 = _mm256_permute2x128_si256(v1, v2, 0x20);
+    auto x2 = _mm256_permute2x128_si256(v1, v2, 0x31);
+    __m256i acc = _mm256_loadu_si256((reinterpret_cast<const __m256i*>(hash)) + i);
+
+    acc = _mm256_add_epi32(acc, _mm256_mullo_epi32(x1, _mm256_set1_epi32(PRIME32_3)));
+    acc = _mm256_or_si256(_mm256_slli_epi32(acc, 17), _mm256_srli_epi32(acc, 32 - 17));
+    acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_4));
+
+    acc = _mm256_add_epi32(acc, _mm256_mullo_epi32(x2, _mm256_set1_epi32(PRIME32_3)));
+    acc = _mm256_or_si256(_mm256_slli_epi32(acc, 17), _mm256_srli_epi32(acc, 32 - 17));
+    acc = _mm256_mullo_epi32(acc, _mm256_set1_epi32(PRIME32_4));
+
+    _mm256_storeu_si256((reinterpret_cast<__m256i*>(hash)) + i, acc);
+
+    offset = _mm256_add_epi32(offset, offset_incr);
+  }
+}
+
+void Hashing::hash_varlen_avx2(uint32_t num_rows, const uint32_t* offsets,
+                               const uint8_t* concatenated_keys,
+                               uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit per row
+                               uint32_t* hashes) {
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+  constexpr uint64_t kByteSequence8To15 = 0x0f0e0d0c0b0a0908ULL;
+
+  const __m128i sequence = _mm_set_epi64x(kByteSequence8To15, kByteSequence0To7);
+  const __m128i acc_init = _mm_setr_epi32(
+      static_cast<uint32_t>((static_cast<uint64_t>(PRIME32_1) + PRIME32_2) & 0xffffffff),
+      PRIME32_2, 0, static_cast<uint32_t>(-static_cast<int32_t>(PRIME32_1)));
+
+  // Variable length keys are always processed as a sequence of 16B stripes,
+  // with the last stripe, if extending past the end of the key, having extra bytes set to
+  // 0 on the fly.
+  for (uint32_t ikey = 0; ikey < num_rows; ++ikey) {
+    uint32_t begin = offsets[ikey];
+    uint32_t end = offsets[ikey + 1];
+    uint32_t length = end - begin;
+    const uint8_t* base = concatenated_keys + begin;
+
+    __m128i acc = acc_init;
+
+    uint32_t i;
+    for (i = 0; i < (length - 1) / 16; ++i) {
+      __m128i key_stripe = _mm_loadu_si128(reinterpret_cast<const __m128i*>(base) + i);
+      acc = _mm_add_epi32(acc, _mm_mullo_epi32(key_stripe, _mm_set1_epi32(PRIME32_2)));
+      acc = _mm_or_si128(_mm_slli_epi32(acc, 13), _mm_srli_epi32(acc, 32 - 13));
+      acc = _mm_mullo_epi32(acc, _mm_set1_epi32(PRIME32_1));
+    }
+    __m128i key_stripe = _mm_loadu_si128(reinterpret_cast<const __m128i*>(base) + i);
+    __m128i mask = _mm_cmpgt_epi8(_mm_set1_epi8(((length - 1) % 16) + 1), sequence);
+    key_stripe = _mm_and_si128(key_stripe, mask);
+    acc = _mm_add_epi32(acc, _mm_mullo_epi32(key_stripe, _mm_set1_epi32(PRIME32_2)));
+    acc = _mm_or_si128(_mm_slli_epi32(acc, 13), _mm_srli_epi32(acc, 32 - 13));
+    acc = _mm_mullo_epi32(acc, _mm_set1_epi32(PRIME32_1));
+
+    _mm_storeu_si128(reinterpret_cast<__m128i*>(temp_buffer) + ikey, acc);
+  }
+
+  // Combine accumulators and perform avalanche
+  constexpr int unroll = 8;
+  for (uint32_t i = 0; i < num_rows / unroll; ++i) {
+    __m256i accA =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 0);
+    __m256i accB =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 1);
+    __m256i accC =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 2);
+    __m256i accD =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(temp_buffer) + 4 * i + 3);
+    // Transpose 2x 4x4 32-bit matrices
+    __m256i r0 = _mm256_unpacklo_epi32(accA, accB);
+    __m256i r1 = _mm256_unpackhi_epi32(accA, accB);
+    __m256i r2 = _mm256_unpacklo_epi32(accC, accD);
+    __m256i r3 = _mm256_unpackhi_epi32(accC, accD);
+    accA = _mm256_unpacklo_epi64(r0, r2);
+    accB = _mm256_unpackhi_epi64(r0, r2);
+    accC = _mm256_unpacklo_epi64(r1, r3);
+    accD = _mm256_unpackhi_epi64(r1, r3);
+    // _rotl(accA, 1)
+    // _rotl(accB, 7)
+    // _rotl(accC, 12)
+    // _rotl(accD, 18)
+    accA = _mm256_or_si256(_mm256_slli_epi32(accA, 1), _mm256_srli_epi32(accA, 32 - 1));
+    accB = _mm256_or_si256(_mm256_slli_epi32(accB, 7), _mm256_srli_epi32(accB, 32 - 7));
+    accC = _mm256_or_si256(_mm256_slli_epi32(accC, 12), _mm256_srli_epi32(accC, 32 - 12));
+    accD = _mm256_or_si256(_mm256_slli_epi32(accD, 18), _mm256_srli_epi32(accD, 32 - 18));
+    accA = _mm256_add_epi32(_mm256_add_epi32(accA, accB), _mm256_add_epi32(accC, accD));
+    // avalanche
+    __m256i hash = accA;
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 15));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_2));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 13));
+    hash = _mm256_mullo_epi32(hash, _mm256_set1_epi32(PRIME32_3));
+    hash = _mm256_xor_si256(hash, _mm256_srli_epi32(hash, 16));
+    // Store.
+    // At this point, because of way 2x 4x4 transposition was done, output hashes are in
+    // order: 0, 2, 4, 6, 1, 3, 5, 7. Bring back the original order.
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(hashes) + i,
+        _mm256_permutevar8x32_epi32(hash, _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7)));
+  }
+  // Process the tail of up to 7 hashes
+  for (uint32_t i = num_rows - num_rows % unroll; i < num_rows; ++i) {
+    uint32_t* temp_buffer_base = temp_buffer + i * 4;
+    uint32_t acc = ROTL(temp_buffer_base[0], 1) + ROTL(temp_buffer_base[1], 7) +
+                   ROTL(temp_buffer_base[2], 12) + ROTL(temp_buffer_base[3], 18);
+
+    // avalanche
+    acc ^= (acc >> 15);
+    acc *= PRIME32_2;
+    acc ^= (acc >> 13);
+    acc *= PRIME32_3;
+    acc ^= (acc >> 16);
+
+    hashes[i] = acc;
+  }
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_map.cc b/cpp/src/arrow/compute/exec/key_map.cc
new file mode 100644
index 00000000000..c48487793e0
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_map.cc
@@ -0,0 +1,603 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/key_map.h"
+
+#include <memory.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+
+using BitUtil::CountLeadingZeros;
+
+namespace compute {
+
+constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
+
+// Search status bytes inside a block of 8 slots (64-bit word).
+// Try to find a slot that contains a 7-bit stamp matching the one provided.
+// There are three possible outcomes:
+// 1. A matching slot is found.
+// -> Return its index between 0 and 7 and set match found flag.
+// 2. A matching slot is not found and there is an empty slot in the block.
+// -> Return the index of the first empty slot and clear match found flag.
+// 3. A matching slot is not found and there are no empty slots in the block.
+// -> Return 8 as the output slot index and clear match found flag.
+//
+// Optionally an index of the first slot to start the search from can be specified.
+// In this case slots before it will be ignored.
+//
+template <bool use_start_slot>
+inline void SwissTable::search_block(uint64_t block, int stamp, int start_slot,
+                                     int* out_slot, int* out_match_found) {
+  // Filled slot bytes have the highest bit set to 0 and empty slots are equal to 0x80.
+  uint64_t block_high_bits = block & kHighBitOfEachByte;
+
+  // Replicate 7-bit stamp to all non-empty slots, leaving zeroes for empty slots.
+  uint64_t stamp_pattern = stamp * ((block_high_bits ^ kHighBitOfEachByte) >> 7);
+
+  // If we xor this pattern with block status bytes we get in individual bytes:
+  // a) 0x00, for filled slots matching the stamp,
+  // b) 0x00 < x < 0x80, for filled slots not matching the stamp,
+  // c) 0x80, for empty slots.
+  uint64_t block_xor_pattern = block ^ stamp_pattern;
+
+  // If we then add 0x7f to every byte, we get:
+  // a) 0x7F
+  // b) 0x80 <= x < 0xFF
+  // c) 0xFF
+  uint64_t match_base = block_xor_pattern + ~kHighBitOfEachByte;
+
+  // The highest bit now tells us if we have a match (0) or not (1).
+  // We will negate the bits so that match is represented by a set bit.
+  uint64_t matches = ~match_base;
+
+  // Clear 7 non-relevant bits in each byte.
+  // Also clear bytes that correspond to slots that we were supposed to
+  // skip due to provided start slot index.
+  // Note: the highest byte corresponds to the first slot.
+  if (use_start_slot) {
+    matches &= kHighBitOfEachByte >> (8 * start_slot);
+  } else {
+    matches &= kHighBitOfEachByte;
+  }
+
+  // We get 0 if there are no matches
+  *out_match_found = (matches == 0 ? 0 : 1);
+
+  // Now if we or with the highest bits of the block and scan zero bits in reverse,
+  // we get 8x slot index that we were looking for.
+  // This formula works in all three cases a), b) and c).
+  *out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >> 3);
+}
+
+// This call follows the call to search_block.
+// The input slot index is the output returned by it, which is a value from 0 to 8,
+// with 8 indicating that both: no match was found and there were no empty slots.
+//
+// If the slot corresponds to a non-empty slot return a group id associated with it.
+// Otherwise return any group id from any of the slots or
+// zero, which is the default value stored in empty slots.
+//
+inline uint64_t SwissTable::extract_group_id(const uint8_t* block_ptr, int slot,
+                                             uint64_t group_id_mask) {
+  // Input slot can be equal to 8, in which case we need to output any valid group id
+  // value, so we take the one from slot 0 in the block.
+  int clamped_slot = slot & 7;
+
+  // Group id values for all 8 slots in the block are bit-packed and follow the status
+  // bytes. We assume here that the number of bits is rounded up to 8, 16, 32 or 64. In
+  // that case we can extract group id using aligned 64-bit word access.
+  int num_groupid_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask));
+  ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
+               num_groupid_bits == 32 || num_groupid_bits == 64);
+
+  int bit_offset = clamped_slot * num_groupid_bits;
+  const uint64_t* group_id_bytes =
+      reinterpret_cast<const uint64_t*>(block_ptr) + 1 + (bit_offset >> 6);
+  uint64_t group_id = (*group_id_bytes >> (bit_offset & 63)) & group_id_mask;
+
+  return group_id;
+}
+
+// Return global slot id (the index including the information about the block)
+// where the search should continue if the first comparison fails.
+// This function always follows search_block and receives the slot id returned by it.
+//
+inline uint64_t SwissTable::next_slot_to_visit(uint64_t block_index, int slot,
+                                               int match_found) {
+  // The result should be taken modulo the number of all slots in all blocks,
+  // but here we allow it to take a value one above the last slot index.
+  // Modulo operation is postponed to later.
+  return block_index * 8 + slot + match_found;
+}
+
+// Implements first (fast-path, optimistic) lookup.
+// Searches for a match only within the start block and
+// trying only the first slot with a matching stamp.
+//
+// Comparison callback needed for match verification is done outside of this function.
+// Match bit vector filled by it only indicates finding a matching stamp in a slot.
+//
+template <bool use_selection>
+void SwissTable::lookup_1(const uint16_t* selection, const int num_keys,
+                          const uint32_t* hashes, uint8_t* out_match_bitvector,
+                          uint32_t* out_groupids, uint32_t* out_slot_ids) {
+  // Clear the output bit vector
+  memset(out_match_bitvector, 0, (num_keys + 7) / 8);
+
+  // Based on the size of the table, prepare bit number constants.
+  uint32_t stamp_mask = (1 << bits_stamp_) - 1;
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint32_t groupid_mask = (1 << num_groupid_bits) - 1;
+
+  for (int i = 0; i < num_keys; ++i) {
+    int id;
+    if (use_selection) {
+      id = selection[i];
+    } else {
+      id = i;
+    }
+
+    // Extract from hash: block index and stamp
+    //
+    uint32_t hash = hashes[id];
+    uint32_t iblock = hash >> (bits_hash_ - bits_stamp_ - log_blocks_);
+    uint32_t stamp = iblock & stamp_mask;
+    iblock >>= bits_stamp_;
+
+    uint32_t num_block_bytes = num_groupid_bits + 8;
+    const uint8_t* blockbase = reinterpret_cast<const uint8_t*>(blocks_) +
+                               static_cast<uint64_t>(iblock) * num_block_bytes;
+    uint64_t block = *reinterpret_cast<const uint64_t*>(blockbase);
+
+    // Call helper functions to obtain the output triplet:
+    // - match (of a stamp) found flag
+    // - group id for key comparison
+    // - slot to resume search from in case of no match or false positive
+    int match_found;
+    int islot_in_block;
+    search_block<false>(block, stamp, 0, &islot_in_block, &match_found);
+    uint64_t groupid = extract_group_id(blockbase, islot_in_block, groupid_mask);
+    ARROW_DCHECK(groupid < num_inserted_ || num_inserted_ == 0);
+    uint64_t islot = next_slot_to_visit(iblock, islot_in_block, match_found);
+
+    out_match_bitvector[id / 8] |= match_found << (id & 7);
+    out_groupids[id] = static_cast<uint32_t>(groupid);
+    out_slot_ids[id] = static_cast<uint32_t>(islot);
+  }
+}
+
+// How many groups we can keep in the hash table without the need for resizing.
+// When we reach this limit, we need to break processing of any further rows and resize.
+//
+uint64_t SwissTable::num_groups_for_resize() const {
+  // Resize small hash tables when 50% full (up to 12KB).
+  // Resize large hash tables when 75% full.
+  constexpr int log_blocks_small_ = 9;
+  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  if (log_blocks_ <= log_blocks_small_) {
+    return num_slots / 2;
+  } else {
+    return num_slots * 3 / 4;
+  }
+}
+
+uint64_t SwissTable::wrap_global_slot_id(uint64_t global_slot_id) {
+  uint64_t global_slot_id_mask = (1 << (log_blocks_ + 3)) - 1;
+  return global_slot_id & global_slot_id_mask;
+}
+
+// Run a single round of slot search - comparison / insert - filter unprocessed.
+// Update selection vector to reflect which items have been processed.
+// Ids in selection vector do not have to be sorted.
+//
+Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected,
+                            uint16_t* inout_selection, bool* out_need_resize,
+                            uint32_t* out_group_ids, uint32_t* inout_next_slot_ids) {
+  auto num_groups_limit = num_groups_for_resize();
+  ARROW_DCHECK(num_inserted_ < num_groups_limit);
+
+  // Temporary arrays are of limited size.
+  // The input needs to be split into smaller portions if it exceeds that limit.
+  //
+  ARROW_DCHECK(*inout_num_selected <= static_cast<uint32_t>(1 << log_minibatch_));
+
+  // We will split input row ids into three categories:
+  // - needing to visit next block [0]
+  // - needing comparison [1]
+  // - inserted [2]
+  //
+  auto ids_inserted_buf =
+      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected);
+  auto ids_for_comparison_buf =
+      util::TempVectorHolder<uint16_t>(temp_stack_, *inout_num_selected);
+  constexpr int category_nomatch = 0;
+  constexpr int category_cmp = 1;
+  constexpr int category_inserted = 2;
+  int num_ids[3];
+  num_ids[0] = num_ids[1] = num_ids[2] = 0;
+  uint16_t* ids[3]{inout_selection, ids_for_comparison_buf.mutable_data(),
+                   ids_inserted_buf.mutable_data()};
+  auto push_id = [&num_ids, &ids](int category, int id) {
+    ids[category][num_ids[category]++] = static_cast<uint16_t>(id);
+  };
+
+  uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint64_t groupid_mask = (1ULL << num_groupid_bits) - 1;
+  constexpr uint64_t stamp_mask = 0x7f;
+  uint64_t num_block_bytes = (8 + num_groupid_bits);
+
+  uint32_t num_processed;
+  for (num_processed = 0;
+       // Second condition in for loop:
+       // We need to break processing and have the caller of this function
+       // resize hash table if we reach the limit of the number of groups present.
+       num_processed < *inout_num_selected &&
+       num_inserted_ + num_ids[category_inserted] < num_groups_limit;
+       ++num_processed) {
+    // row id in original batch
+    int id = inout_selection[num_processed];
+
+    uint64_t slot_id = wrap_global_slot_id(inout_next_slot_ids[id]);
+    uint64_t block_id = slot_id >> 3;
+    uint32_t hash = hashes[id];
+    uint8_t* blockbase = blocks_ + num_block_bytes * block_id;
+    uint64_t block = *reinterpret_cast<uint64_t*>(blockbase);
+    uint64_t stamp = (hash >> (bits_hash_ - log_blocks_ - bits_stamp_)) & stamp_mask;
+    int start_slot = (slot_id & 7);
+
+    bool isempty = (blockbase[7 - start_slot] == 0x80);
+    if (isempty) {
+      // If we reach the empty slot we insert key for new group
+
+      blockbase[7 - start_slot] = static_cast<uint8_t>(stamp);
+      uint32_t group_id = num_inserted_ + num_ids[category_inserted];
+      int groupid_bit_offset = static_cast<int>(start_slot * num_groupid_bits);
+
+      // We assume here that the number of bits is rounded up to 8, 16, 32 or 64.
+      // In that case we can insert group id value using aligned 64-bit word access.
+      ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
+                   num_groupid_bits == 32 || num_groupid_bits == 64);
+      reinterpret_cast<uint64_t*>(blockbase + 8)[groupid_bit_offset >> 6] |=
+          (static_cast<uint64_t>(group_id) << (groupid_bit_offset & 63));
+
+      hashes_[slot_id] = hash;
+      out_group_ids[id] = group_id;
+      push_id(category_inserted, id);
+    } else {
+      // We search for a slot with a matching stamp within a single block.
+      // We append row id to the appropriate sequence of ids based on
+      // whether the match has been found or not.
+
+      int new_match_found;
+      int new_slot;
+      search_block<true>(block, static_cast<int>(stamp), start_slot, &new_slot,
+                         &new_match_found);
+      auto new_groupid =
+          static_cast<uint32_t>(extract_group_id(blockbase, new_slot, groupid_mask));
+      ARROW_DCHECK(new_groupid < num_inserted_ + num_ids[category_inserted]);
+      new_slot =
+          static_cast<int>(next_slot_to_visit(block_id, new_slot, new_match_found));
+      inout_next_slot_ids[id] = new_slot;
+      out_group_ids[id] = new_groupid;
+      push_id(new_match_found, id);
+    }
+  }
+
+  // Copy keys for newly inserted rows using callback
+  RETURN_NOT_OK(append_impl_(num_ids[category_inserted], ids[category_inserted]));
+  num_inserted_ += num_ids[category_inserted];
+
+  // Evaluate comparisons and append ids of rows that failed it to the non-match set.
+  uint32_t num_not_equal;
+  equal_impl_(num_ids[category_cmp], ids[category_cmp], out_group_ids, &num_not_equal,
+              ids[category_nomatch] + num_ids[category_nomatch]);
+  num_ids[category_nomatch] += num_not_equal;
+
+  // Append ids of any unprocessed entries if we aborted processing due to the need
+  // to resize.
+  if (num_processed < *inout_num_selected) {
+    memmove(ids[category_nomatch] + num_ids[category_nomatch],
+            inout_selection + num_processed,
+            sizeof(uint16_t) * (*inout_num_selected - num_processed));
+    num_ids[category_nomatch] += (*inout_num_selected - num_processed);
+  }
+
+  *out_need_resize = (num_inserted_ == num_groups_limit);
+  *inout_num_selected = num_ids[category_nomatch];
+  return Status::OK();
+}
+
+// Use hashes and callbacks to find group ids for already existing keys and
+// to insert and report newly assigned group ids for new keys.
+//
+Status SwissTable::map(const int num_keys, const uint32_t* hashes,
+                       uint32_t* out_groupids) {
+  // Temporary buffers have limited size.
+  // Caller is responsible for splitting larger input arrays into smaller chunks.
+  ARROW_DCHECK(num_keys <= (1 << log_minibatch_));
+
+  // Allocate temporary buffers with a lifetime of this function
+  auto match_bitvector_buf = util::TempVectorHolder<uint8_t>(temp_stack_, num_keys);
+  uint8_t* match_bitvector = match_bitvector_buf.mutable_data();
+  auto slot_ids_buf = util::TempVectorHolder<uint32_t>(temp_stack_, num_keys);
+  uint32_t* slot_ids = slot_ids_buf.mutable_data();
+  auto ids_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys);
+  uint16_t* ids = ids_buf.mutable_data();
+  uint32_t num_ids;
+
+  // First-pass processing.
+  // Optimistically use simplified lookup involving only a start block to find
+  // a single group id candidate for every input.
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags_ & arrow::internal::CpuInfo::AVX2) {
+    if (log_blocks_ <= 4) {
+      int tail = num_keys % 32;
+      int delta = num_keys - tail;
+      lookup_1_avx2_x32(num_keys - tail, hashes, match_bitvector, out_groupids, slot_ids);
+      lookup_1_avx2_x8(tail, hashes + delta, match_bitvector + delta / 8,
+                       out_groupids + delta, slot_ids + delta);
+    } else {
+      lookup_1_avx2_x8(num_keys, hashes, match_bitvector, out_groupids, slot_ids);
+    }
+  } else {
+#endif
+    lookup_1<false>(nullptr, num_keys, hashes, match_bitvector, out_groupids, slot_ids);
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+
+  int64_t num_matches =
+      arrow::internal::CountSetBits(match_bitvector, /*offset=*/0, num_keys);
+
+  // After the first-pass processing count rows with matches (based on stamp comparison)
+  // and decide based on their percentage whether to call dense or sparse comparison
+  // function. Dense comparison means evaluating it for all inputs, even if the matching
+  // stamp was not found. It may be cheaper to evaluate comparison for all inputs if the
+  // extra cost of filtering is higher than the wasted processing of rows with no match.
+  //
+  // Dense comparison can only be used if there is at least one inserted key,
+  // because otherwise there is no key to compare to.
+  //
+  if (num_inserted_ > 0 && num_matches > 0 && num_matches > 3 * num_keys / 4) {
+    // Dense comparisons
+    equal_impl_(num_keys, nullptr, out_groupids, &num_ids, ids);
+  } else {
+    // Sparse comparisons that involve filtering the input set of keys
+    auto ids_cmp_buf = util::TempVectorHolder<uint16_t>(temp_stack_, num_keys);
+    uint16_t* ids_cmp = ids_cmp_buf.mutable_data();
+    int num_ids_result;
+    util::BitUtil::bits_split_indexes(hardware_flags_, num_keys, match_bitvector,
+                                      &num_ids_result, ids, ids_cmp);
+    num_ids = num_ids_result;
+    uint32_t num_not_equal;
+    equal_impl_(num_keys - num_ids, ids_cmp, out_groupids, &num_not_equal, ids + num_ids);
+    num_ids += num_not_equal;
+  }
+
+  do {
+    // A single round of slow-pass (robust) lookup or insert.
+    // A single round ends with either a single comparison verifying the match candidate
+    // or inserting a new key. A single round of slow-pass may return early if we reach
+    // the limit of the number of groups due to inserts of new keys. In that case we need
+    // to resize and recalculating starting global slot ids for new bigger hash table.
+    bool out_of_capacity;
+    RETURN_NOT_OK(
+        lookup_2(hashes, &num_ids, ids, &out_of_capacity, out_groupids, slot_ids));
+    if (out_of_capacity) {
+      RETURN_NOT_OK(grow_double());
+      // Reset start slot ids for still unprocessed input keys.
+      //
+      for (uint32_t i = 0; i < num_ids; ++i) {
+        // First slot in the new starting block
+        slot_ids[ids[i]] = (hashes[ids[i]] >> (bits_hash_ - log_blocks_)) * 8;
+      }
+    }
+  } while (num_ids > 0);
+
+  return Status::OK();
+}
+
+Status SwissTable::grow_double() {
+  // Before and after metadata
+  int num_group_id_bits_before = num_groupid_bits_from_log_blocks(log_blocks_);
+  int num_group_id_bits_after = num_groupid_bits_from_log_blocks(log_blocks_ + 1);
+  uint64_t group_id_mask_before = ~0ULL >> (64 - num_group_id_bits_before);
+  int log_blocks_before = log_blocks_;
+  int log_blocks_after = log_blocks_ + 1;
+  uint64_t block_size_before = (8 + num_group_id_bits_before);
+  uint64_t block_size_after = (8 + num_group_id_bits_after);
+  uint64_t block_size_total_before = (block_size_before << log_blocks_before) + padding_;
+  uint64_t block_size_total_after = (block_size_after << log_blocks_after) + padding_;
+  uint64_t hashes_size_total_before =
+      (bits_hash_ / 8 * (1 << (log_blocks_before + 3))) + padding_;
+  uint64_t hashes_size_total_after =
+      (bits_hash_ / 8 * (1 << (log_blocks_after + 3))) + padding_;
+  constexpr uint32_t stamp_mask = (1 << bits_stamp_) - 1;
+
+  // Allocate new buffers
+  uint8_t* blocks_new;
+  RETURN_NOT_OK(pool_->Allocate(block_size_total_after, &blocks_new));
+  memset(blocks_new, 0, block_size_total_after);
+  uint8_t* hashes_new_8B;
+  uint32_t* hashes_new;
+  RETURN_NOT_OK(pool_->Allocate(hashes_size_total_after, &hashes_new_8B));
+  hashes_new = reinterpret_cast<uint32_t*>(hashes_new_8B);
+
+  // First pass over all old blocks.
+  // Reinsert entries that were not in the overflow block
+  // (block other than selected by hash bits corresponding to the entry).
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    // How many full slots in this block
+    uint8_t* block_base = blocks_ + i * block_size_before;
+    uint8_t* double_block_base_new = blocks_new + 2 * i * block_size_after;
+    uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
+
+    auto full_slots =
+        static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+    int full_slots_new[2];
+    full_slots_new[0] = full_slots_new[1] = 0;
+    *reinterpret_cast<uint64_t*>(double_block_base_new) = kHighBitOfEachByte;
+    *reinterpret_cast<uint64_t*>(double_block_base_new + block_size_after) =
+        kHighBitOfEachByte;
+
+    for (int j = 0; j < full_slots; ++j) {
+      uint64_t slot_id = i * 8 + j;
+      uint32_t hash = hashes_[slot_id];
+      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
+      if (is_overflow_entry) {
+        continue;
+      }
+
+      int ihalf = block_id_new & 1;
+      uint8_t stamp_new =
+          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
+      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
+      uint64_t group_id = (*reinterpret_cast<const uint64_t*>(block_base + 8 +
+                                                              (group_id_bit_offs >> 3)) >>
+                           (group_id_bit_offs & 7)) &
+                          group_id_mask_before;
+
+      uint64_t slot_id_new = i * 16 + ihalf * 8 + full_slots_new[ihalf];
+      hashes_new[slot_id_new] = hash;
+      uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after;
+      block_base_new[7 - full_slots_new[ihalf]] = stamp_new;
+      int group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after;
+      *reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3)) |=
+          (group_id << (group_id_bit_offs_new & 7));
+      full_slots_new[ihalf]++;
+    }
+  }
+
+  // Second pass over all old blocks.
+  // Reinsert entries that were in an overflow block.
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    // How many full slots in this block
+    uint8_t* block_base = blocks_ + i * block_size_before;
+    uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
+    int full_slots = static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+
+    for (int j = 0; j < full_slots; ++j) {
+      uint64_t slot_id = i * 8 + j;
+      uint32_t hash = hashes_[slot_id];
+      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
+      if (!is_overflow_entry) {
+        continue;
+      }
+
+      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
+      uint64_t group_id = (*reinterpret_cast<const uint64_t*>(block_base + 8 +
+                                                              (group_id_bit_offs >> 3)) >>
+                           (group_id_bit_offs & 7)) &
+                          group_id_mask_before;
+      uint8_t stamp_new =
+          hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
+
+      uint8_t* block_base_new = blocks_new + block_id_new * block_size_after;
+      uint64_t block_new = *reinterpret_cast<const uint64_t*>(block_base_new);
+      int full_slots_new =
+          static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
+      while (full_slots_new == 8) {
+        block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
+        block_base_new = blocks_new + block_id_new * block_size_after;
+        block_new = *reinterpret_cast<const uint64_t*>(block_base_new);
+        full_slots_new =
+            static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
+      }
+
+      hashes_new[block_id_new * 8 + full_slots_new] = hash;
+      block_base_new[7 - full_slots_new] = stamp_new;
+      int group_id_bit_offs_new = full_slots_new * num_group_id_bits_after;
+      *reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3)) |=
+          (group_id << (group_id_bit_offs_new & 7));
+    }
+  }
+
+  pool_->Free(blocks_, block_size_total_before);
+  pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hashes_size_total_before);
+  log_blocks_ = log_blocks_after;
+  blocks_ = blocks_new;
+  hashes_ = hashes_new;
+
+  return Status::OK();
+}
+
+Status SwissTable::init(int64_t hardware_flags, MemoryPool* pool,
+                        util::TempVectorStack* temp_stack, int log_minibatch,
+                        EqualImpl equal_impl, AppendImpl append_impl) {
+  hardware_flags_ = hardware_flags;
+  pool_ = pool;
+  temp_stack_ = temp_stack;
+  log_minibatch_ = log_minibatch;
+  equal_impl_ = equal_impl;
+  append_impl_ = append_impl;
+
+  log_blocks_ = 0;
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  num_inserted_ = 0;
+
+  const uint64_t block_bytes = 8 + num_groupid_bits;
+  const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+  RETURN_NOT_OK(pool_->Allocate(slot_bytes, &blocks_));
+
+  // Make sure group ids are initially set to zero for all slots.
+  memset(blocks_, 0, slot_bytes);
+
+  // Initialize all status bytes to represent an empty slot.
+  for (uint64_t i = 0; i < (static_cast<uint64_t>(1) << log_blocks_); ++i) {
+    *reinterpret_cast<uint64_t*>(blocks_ + i * block_bytes) = kHighBitOfEachByte;
+  }
+
+  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  const uint64_t hash_size = sizeof(uint32_t);
+  const uint64_t hash_bytes = hash_size * num_slots + padding_;
+  uint8_t* hashes8;
+  RETURN_NOT_OK(pool_->Allocate(hash_bytes, &hashes8));
+  hashes_ = reinterpret_cast<uint32_t*>(hashes8);
+
+  return Status::OK();
+}
+
+void SwissTable::cleanup() {
+  if (blocks_) {
+    int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+    const uint64_t block_bytes = 8 + num_groupid_bits;
+    const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+    pool_->Free(blocks_, slot_bytes);
+    blocks_ = nullptr;
+  }
+  if (hashes_) {
+    uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+    const uint64_t hash_size = sizeof(uint32_t);
+    const uint64_t hash_bytes = hash_size * num_slots + padding_;
+    pool_->Free(reinterpret_cast<uint8_t*>(hashes_), hash_bytes);
+    hashes_ = nullptr;
+  }
+  log_blocks_ = 0;
+  num_inserted_ = 0;
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_map.h b/cpp/src/arrow/compute/exec/key_map.h
new file mode 100644
index 00000000000..8c472736ec4
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_map.h
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class SwissTable {
+ public:
+  SwissTable() = default;
+  ~SwissTable() { cleanup(); }
+
+  using EqualImpl =
+      std::function<void(int num_keys, const uint16_t* selection /* may be null */,
+                         const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                         uint16_t* out_selection_mismatch)>;
+  using AppendImpl = std::function<Status(int num_keys, const uint16_t* selection)>;
+
+  Status init(int64_t hardware_flags, MemoryPool* pool, util::TempVectorStack* temp_stack,
+              int log_minibatch, EqualImpl equal_impl, AppendImpl append_impl);
+  void cleanup();
+
+  Status map(const int ckeys, const uint32_t* hashes, uint32_t* outgroupids);
+
+ private:
+  // Lookup helpers
+
+  /// \brief Scan bytes in block in reverse and stop as soon
+  /// as a position of interest is found.
+  ///
+  /// Positions of interest:
+  /// a) slot with a matching stamp is encountered,
+  /// b) first empty slot is encountered,
+  /// c) we reach the end of the block.
+  ///
+  /// \param[in] block 8 byte block of hash table
+  /// \param[in] stamp 7 bits of hash used as a stamp
+  /// \param[in] start_slot Index of the first slot in the block to start search from.  We
+  ///            assume that this index always points to a non-empty slot, equivalently
+  ///            that it comes before any empty slots.  (Used only by one template
+  ///            variant.)
+  /// \param[out] out_slot index corresponding to the discovered position of interest (8
+  ///            represents end of block).
+  /// \param[out] out_match_found an integer flag (0 or 1) indicating if we found a
+  ///            matching stamp.
+  template <bool use_start_slot>
+  inline void search_block(uint64_t block, int stamp, int start_slot, int* out_slot,
+                           int* out_match_found);
+
+  /// \brief Extract group id for a given slot in a given block.
+  ///
+  /// Group ids follow in memory after 64-bit block data.
+  /// Maximum number of groups inserted is equal to the number
+  /// of all slots in all blocks, which is 8 * the number of blocks.
+  /// Group ids are bit packed using that maximum to determine the necessary number of
+  /// bits.
+  inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot,
+                                   uint64_t group_id_mask);
+
+  inline uint64_t next_slot_to_visit(uint64_t block_index, int slot, int match_found);
+
+  inline void insert(uint8_t* block_base, uint64_t slot_id, uint32_t hash, uint8_t stamp,
+                     uint32_t group_id);
+
+  inline uint64_t num_groups_for_resize() const;
+
+  inline uint64_t wrap_global_slot_id(uint64_t global_slot_id);
+
+  // First hash table access
+  // Find first match in the start block if exists.
+  // Possible cases:
+  // 1. Stamp match in a block
+  // 2. No stamp match in a block, no empty buckets in a block
+  // 3. No stamp match in a block, empty buckets in a block
+  //
+  template <bool use_selection>
+  void lookup_1(const uint16_t* selection, const int num_keys, const uint32_t* hashes,
+                uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                uint32_t* out_slot_ids);
+#if defined(ARROW_HAVE_AVX2)
+  void lookup_1_avx2_x8(const int num_hashes, const uint32_t* hashes,
+                        uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                        uint32_t* out_next_slot_ids);
+  void lookup_1_avx2_x32(const int num_hashes, const uint32_t* hashes,
+                         uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                         uint32_t* out_next_slot_ids);
+#endif
+
+  // Completing hash table lookup post first access
+  Status lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected,
+                  uint16_t* inout_selection, bool* out_need_resize,
+                  uint32_t* out_group_ids, uint32_t* out_next_slot_ids);
+
+  // Resize small hash tables when 50% full (up to 8KB).
+  // Resize large hash tables when 75% full.
+  Status grow_double();
+
+  static int num_groupid_bits_from_log_blocks(int log_blocks) {
+    int required_bits = log_blocks + 3;
+    return required_bits <= 8 ? 8
+                              : required_bits <= 16 ? 16 : required_bits <= 32 ? 32 : 64;
+  }
+
+  // Use 32-bit hash for now
+  static constexpr int bits_hash_ = 32;
+
+  // Number of hash bits stored in slots in a block.
+  // The highest bits of hash determine block id.
+  // The next set of highest bits is a "stamp" stored in a slot in a block.
+  static constexpr int bits_stamp_ = 7;
+
+  // Padding bytes added at the end of buffers for ease of SIMD access
+  static constexpr int padding_ = 64;
+
+  int log_minibatch_;
+  // Base 2 log of the number of blocks
+  int log_blocks_ = 0;
+  // Number of keys inserted into hash table
+  uint32_t num_inserted_ = 0;
+
+  // Data for blocks.
+  // Each block has 8 status bytes for 8 slots, followed by 8 bit packed group ids for
+  // these slots. In 8B status word, the order of bytes is reversed. Group ids are in
+  // normal order. There is 64B padding at the end.
+  //
+  // 0 byte - 7 bucket | 1. byte - 6 bucket | ...
+  // ---------------------------------------------------
+  // |     Empty bit*   |    Empty bit       |
+  // ---------------------------------------------------
+  // |   7-bit hash    |    7-bit hash      |
+  // ---------------------------------------------------
+  // * Empty bucket has value 0x80. Non-empty bucket has highest bit set to 0.
+  //
+  uint8_t* blocks_;
+
+  // Array of hashes of values inserted into slots.
+  // Undefined if the corresponding slot is empty.
+  // There is 64B padding at the end.
+  uint32_t* hashes_;
+
+  int64_t hardware_flags_;
+  MemoryPool* pool_;
+  util::TempVectorStack* temp_stack_;
+
+  EqualImpl equal_impl_;
+  AppendImpl append_impl_;
+};
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/key_map_avx2.cc b/cpp/src/arrow/compute/exec/key_map_avx2.cc
new file mode 100644
index 00000000000..a2efb4d1bb9
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/key_map_avx2.cc
@@ -0,0 +1,407 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/key_map.h"
+
+namespace arrow {
+namespace compute {
+
+#if defined(ARROW_HAVE_AVX2)
+
+// Why it is OK to round up number of rows internally:
+// All of the buffers: hashes, out_match_bitvector, out_group_ids, out_next_slot_ids
+// are temporary buffers of group id mapping.
+// Temporary buffers are buffers that live only within the boundaries of a single
+// minibatch. Temporary buffers add 64B at the end, so that SIMD code does not have to
+// worry about reading and writing outside of the end of the buffer up to 64B. If the
+// hashes array contains garbage after the last element, it cannot cause computation to
+// fail, since any random data is a valid hash for the purpose of lookup.
+//
+// This is more or less translation of equivalent scalar code, adjusted for a different
+// instruction set (e.g. missing leading zero count instruction).
+//
+void SwissTable::lookup_1_avx2_x8(const int num_hashes, const uint32_t* hashes,
+                                  uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                                  uint32_t* out_next_slot_ids) {
+  // Number of inputs processed together in a loop
+  constexpr int unroll = 8;
+
+  const int num_group_id_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint32_t group_id_mask = ~static_cast<uint32_t>(0) >> (32 - num_group_id_bits);
+  const __m256i* vhash_ptr = reinterpret_cast<const __m256i*>(hashes);
+  const __m256i vstamp_mask = _mm256_set1_epi32((1 << bits_stamp_) - 1);
+
+  // TODO: explain why it is ok to process hashes outside of buffer boundaries
+  for (int i = 0; i < ((num_hashes + unroll - 1) / unroll); ++i) {
+    constexpr uint64_t kEachByteIs8 = 0x0808080808080808ULL;
+    constexpr uint64_t kByteSequenceOfPowersOf2 = 0x8040201008040201ULL;
+
+    // Calculate block index and hash stamp for a byte in a block
+    //
+    __m256i vhash = _mm256_loadu_si256(vhash_ptr + i);
+    __m256i vblock_id = _mm256_srlv_epi32(
+        vhash, _mm256_set1_epi32(bits_hash_ - bits_stamp_ - log_blocks_));
+    __m256i vstamp = _mm256_and_si256(vblock_id, vstamp_mask);
+    vblock_id = _mm256_srli_epi32(vblock_id, bits_stamp_);
+
+    // We now split inputs and process 4 at a time,
+    // in order to process 64-bit blocks
+    //
+    __m256i vblock_offset =
+        _mm256_mullo_epi32(vblock_id, _mm256_set1_epi32(num_group_id_bits + 8));
+    __m256i voffset_A = _mm256_and_si256(vblock_offset, _mm256_set1_epi64x(0xffffffff));
+    __m256i vstamp_A = _mm256_and_si256(vstamp, _mm256_set1_epi64x(0xffffffff));
+    __m256i voffset_B = _mm256_srli_epi64(vblock_offset, 32);
+    __m256i vstamp_B = _mm256_srli_epi64(vstamp, 32);
+
+    auto blocks_i64 = reinterpret_cast<arrow::util::int64_for_gather_t*>(blocks_);
+    auto vblock_A = _mm256_i64gather_epi64(blocks_i64, voffset_A, 1);
+    auto vblock_B = _mm256_i64gather_epi64(blocks_i64, voffset_B, 1);
+    __m256i vblock_highbits_A =
+        _mm256_cmpeq_epi8(vblock_A, _mm256_set1_epi8(static_cast<unsigned char>(0x80)));
+    __m256i vblock_highbits_B =
+        _mm256_cmpeq_epi8(vblock_B, _mm256_set1_epi8(static_cast<unsigned char>(0x80)));
+    __m256i vbyte_repeat_pattern =
+        _mm256_setr_epi64x(0ULL, kEachByteIs8, 0ULL, kEachByteIs8);
+    vstamp_A = _mm256_shuffle_epi8(
+        vstamp_A, _mm256_or_si256(vbyte_repeat_pattern, vblock_highbits_A));
+    vstamp_B = _mm256_shuffle_epi8(
+        vstamp_B, _mm256_or_si256(vbyte_repeat_pattern, vblock_highbits_B));
+    __m256i vmatches_A = _mm256_cmpeq_epi8(vblock_A, vstamp_A);
+    __m256i vmatches_B = _mm256_cmpeq_epi8(vblock_B, vstamp_B);
+    __m256i vmatch_found = _mm256_andnot_si256(
+        _mm256_blend_epi32(_mm256_cmpeq_epi64(vmatches_A, _mm256_setzero_si256()),
+                           _mm256_cmpeq_epi64(vmatches_B, _mm256_setzero_si256()),
+                           0xaa),  // 0b10101010
+        _mm256_set1_epi8(static_cast<unsigned char>(0xff)));
+    vmatches_A =
+        _mm256_sad_epu8(_mm256_and_si256(_mm256_or_si256(vmatches_A, vblock_highbits_A),
+                                         _mm256_set1_epi64x(kByteSequenceOfPowersOf2)),
+                        _mm256_setzero_si256());
+    vmatches_B =
+        _mm256_sad_epu8(_mm256_and_si256(_mm256_or_si256(vmatches_B, vblock_highbits_B),
+                                         _mm256_set1_epi64x(kByteSequenceOfPowersOf2)),
+                        _mm256_setzero_si256());
+    __m256i vmatches = _mm256_or_si256(vmatches_A, _mm256_slli_epi64(vmatches_B, 32));
+
+    // We are now back to processing 8 at a time.
+    // Each lane contains 8-bit bit vector marking slots that are matches.
+    // We need to find leading zeroes count for all slots.
+    //
+    // Emulating lzcnt in lowest bytes of 32-bit elements
+    __m256i vgt = _mm256_cmpgt_epi32(_mm256_set1_epi32(16), vmatches);
+    __m256i vnext_slot_id =
+        _mm256_blendv_epi8(_mm256_srli_epi32(vmatches, 4),
+                           _mm256_and_si256(vmatches, _mm256_set1_epi32(0x0f)), vgt);
+    vnext_slot_id = _mm256_shuffle_epi8(
+        _mm256_setr_epi8(4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2, 2, 1, 1,
+                         1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
+        vnext_slot_id);
+    vnext_slot_id =
+        _mm256_add_epi32(_mm256_and_si256(vnext_slot_id, _mm256_set1_epi32(0xff)),
+                         _mm256_and_si256(vgt, _mm256_set1_epi32(4)));
+
+    // Lookup group ids
+    //
+    __m256i vgroupid_bit_offset =
+        _mm256_mullo_epi32(_mm256_and_si256(vnext_slot_id, _mm256_set1_epi32(7)),
+                           _mm256_set1_epi32(num_group_id_bits));
+
+    // This only works for up to 25 bits per group id, since it uses 32-bit gather
+    // TODO: make sure this will never get called when there are more than 2^25 groups.
+    __m256i vgroupid =
+        _mm256_add_epi32(_mm256_srli_epi32(vgroupid_bit_offset, 3),
+                         _mm256_add_epi32(vblock_offset, _mm256_set1_epi32(8)));
+    vgroupid = _mm256_i32gather_epi32(reinterpret_cast<const int*>(blocks_), vgroupid, 1);
+    vgroupid = _mm256_srlv_epi32(
+        vgroupid, _mm256_and_si256(vgroupid_bit_offset, _mm256_set1_epi32(7)));
+    vgroupid = _mm256_and_si256(vgroupid, _mm256_set1_epi32(group_id_mask));
+
+    // Convert slot id relative to the block to slot id relative to the beginnning of the
+    // table
+    //
+    vnext_slot_id = _mm256_add_epi32(
+        _mm256_add_epi32(vnext_slot_id,
+                         _mm256_and_si256(vmatch_found, _mm256_set1_epi32(1))),
+        _mm256_slli_epi32(vblock_id, 3));
+
+    // Convert match found vector from 32-bit elements to bit vector
+    out_match_bitvector[i] = _pext_u32(_mm256_movemask_epi8(vmatch_found),
+                                       0x11111111);  // 0b00010001 repeated 4x
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_group_ids) + i, vgroupid);
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_next_slot_ids) + i, vnext_slot_id);
+  }
+}
+
+// Take a set of 16 64-bit elements,
+// Output one AVX2 register per byte (0 to 7), containing a sequence of 16 bytes,
+// one from each input 64-bit word, all from the same position in 64-bit word.
+// 16 bytes are replicated in lower and upper half of each output register.
+//
+inline void split_bytes_avx2(__m256i word0, __m256i word1, __m256i word2, __m256i word3,
+                             __m256i& byte0, __m256i& byte1, __m256i& byte2,
+                             __m256i& byte3, __m256i& byte4, __m256i& byte5,
+                             __m256i& byte6, __m256i& byte7) {
+  __m256i word01lo = _mm256_unpacklo_epi8(
+      word0, word1);  // {a0, e0, a1, e1, ... a7, e7, c0, g0, c1, g1, ... c7, g7}
+  __m256i word23lo = _mm256_unpacklo_epi8(
+      word2, word3);  // {i0, m0, i1, m1, ... i7, m7, k0, o0, k1, o1, ... k7, o7}
+  __m256i word01hi = _mm256_unpackhi_epi8(
+      word0, word1);  // {b0, f0, b1, f1, ... b7, f1, d0, h0, d1, h1, ... d7, h7}
+  __m256i word23hi = _mm256_unpackhi_epi8(
+      word2, word3);  // {j0, n0, j1, n1, ... j7, n7, l0, p0, l1, p1, ... l7, p7}
+
+  __m256i a =
+      _mm256_unpacklo_epi16(word01lo, word01hi);  // {a0, e0, b0, f0, ... a3, e3, b3, f3,
+                                                  // c0, g0, d0, h0, ... c3, g3, d3, h3}
+  __m256i b =
+      _mm256_unpacklo_epi16(word23lo, word23hi);  // {i0, m0, j0, n0, ... i3, m3, j3, n3,
+                                                  // k0, o0, l0, p0, ... k3, o3, l3, p3}
+  __m256i c =
+      _mm256_unpackhi_epi16(word01lo, word01hi);  // {a4, e4, b4, f4, ... a7, e7, b7, f7,
+                                                  // c4, g4, d4, h4, ... c7, g7, d7, h7}
+  __m256i d =
+      _mm256_unpackhi_epi16(word23lo, word23hi);  // {i4, m4, j4, n4, ... i7, m7, j7, n7,
+                                                  // k4, o4, l4, p4, ... k7, o7, l7, p7}
+
+  __m256i byte01 = _mm256_unpacklo_epi32(
+      a, b);  // {a0, e0, b0, f0, i0, m0, j0, n0, a1, e1, b1, f1, i1, m1, j1, n1, c0, g0,
+              // d0, h0, k0, o0, l0, p0, ...}
+  __m256i shuffle_const =
+      _mm256_setr_epi8(0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15, 0, 2, 8, 10,
+                       1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15);
+  byte01 = _mm256_permute4x64_epi64(
+      byte01, 0xd8);  // 11011000 b - swapping middle two 64-bit elements
+  byte01 = _mm256_shuffle_epi8(byte01, shuffle_const);
+  __m256i byte23 = _mm256_unpackhi_epi32(a, b);
+  byte23 = _mm256_permute4x64_epi64(byte23, 0xd8);
+  byte23 = _mm256_shuffle_epi8(byte23, shuffle_const);
+  __m256i byte45 = _mm256_unpacklo_epi32(c, d);
+  byte45 = _mm256_permute4x64_epi64(byte45, 0xd8);
+  byte45 = _mm256_shuffle_epi8(byte45, shuffle_const);
+  __m256i byte67 = _mm256_unpackhi_epi32(c, d);
+  byte67 = _mm256_permute4x64_epi64(byte67, 0xd8);
+  byte67 = _mm256_shuffle_epi8(byte67, shuffle_const);
+
+  byte0 = _mm256_permute4x64_epi64(byte01, 0x44);  // 01000100 b
+  byte1 = _mm256_permute4x64_epi64(byte01, 0xee);  // 11101110 b
+  byte2 = _mm256_permute4x64_epi64(byte23, 0x44);  // 01000100 b
+  byte3 = _mm256_permute4x64_epi64(byte23, 0xee);  // 11101110 b
+  byte4 = _mm256_permute4x64_epi64(byte45, 0x44);  // 01000100 b
+  byte5 = _mm256_permute4x64_epi64(byte45, 0xee);  // 11101110 b
+  byte6 = _mm256_permute4x64_epi64(byte67, 0x44);  // 01000100 b
+  byte7 = _mm256_permute4x64_epi64(byte67, 0xee);  // 11101110 b
+}
+
+// This one can only process a multiple of 32 values.
+// The caller needs to process the remaining tail, if the input is not divisible by 32,
+// using a different method.
+// TODO: Explain the idea behind storing arrays in SIMD registers.
+// Explain why it is faster with SIMD than using memory loads.
+void SwissTable::lookup_1_avx2_x32(const int num_hashes, const uint32_t* hashes,
+                                   uint8_t* out_match_bitvector, uint32_t* out_group_ids,
+                                   uint32_t* out_next_slot_ids) {
+  constexpr int unroll = 32;
+
+  // There is a limit on the number of input blocks,
+  // because we want to store all their data in a set of AVX2 registers.
+  ARROW_DCHECK(log_blocks_ <= 4);
+
+  // Remember that block bytes and group id bytes are in opposite orders in memory of hash
+  // table. We put them in the same order.
+  __m256i vblock_byte0, vblock_byte1, vblock_byte2, vblock_byte3, vblock_byte4,
+      vblock_byte5, vblock_byte6, vblock_byte7;
+  __m256i vgroupid_byte0, vgroupid_byte1, vgroupid_byte2, vgroupid_byte3, vgroupid_byte4,
+      vgroupid_byte5, vgroupid_byte6, vgroupid_byte7;
+  // What we output if there is no match in the block
+  __m256i vslot_empty_or_end;
+
+  constexpr uint32_t k4ByteSequence_0_4_8_12 = 0x0c080400;
+  constexpr uint32_t k4ByteSequence_1_5_9_13 = 0x0d090501;
+  constexpr uint32_t k4ByteSequence_2_6_10_14 = 0x0e0a0602;
+  constexpr uint32_t k4ByteSequence_3_7_11_15 = 0x0f0b0703;
+  constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL;
+  constexpr uint64_t kByteSequence7DownTo0 = 0x0001020304050607ULL;
+  constexpr uint64_t kByteSequence15DownTo8 = 0x08090A0B0C0D0E0FULL;
+
+  // Bit unpack group ids into 1B.
+  // Assemble the sequence of block bytes.
+  uint64_t block_bytes[16];
+  uint64_t groupid_bytes[16];
+  const int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  uint64_t bit_unpack_mask = ((1 << num_groupid_bits) - 1) * kEachByteIs1;
+  for (int i = 0; i < (1 << log_blocks_); ++i) {
+    uint64_t in_groupids =
+        *reinterpret_cast<const uint64_t*>(blocks_ + (8 + num_groupid_bits) * i + 8);
+    uint64_t in_blockbytes =
+        *reinterpret_cast<const uint64_t*>(blocks_ + (8 + num_groupid_bits) * i);
+    groupid_bytes[i] = _pdep_u64(in_groupids, bit_unpack_mask);
+    block_bytes[i] = in_blockbytes;
+  }
+
+  // Split a sequence of 64-bit words into SIMD vectors holding individual bytes
+  __m256i vblock_words0 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 0);
+  __m256i vblock_words1 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 1);
+  __m256i vblock_words2 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 2);
+  __m256i vblock_words3 =
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(block_bytes) + 3);
+  // Reverse the bytes in blocks
+  __m256i vshuffle_const =
+      _mm256_setr_epi64x(kByteSequence7DownTo0, kByteSequence15DownTo8,
+                         kByteSequence7DownTo0, kByteSequence15DownTo8);
+  vblock_words0 = _mm256_shuffle_epi8(vblock_words0, vshuffle_const);
+  vblock_words1 = _mm256_shuffle_epi8(vblock_words1, vshuffle_const);
+  vblock_words2 = _mm256_shuffle_epi8(vblock_words2, vshuffle_const);
+  vblock_words3 = _mm256_shuffle_epi8(vblock_words3, vshuffle_const);
+  split_bytes_avx2(vblock_words0, vblock_words1, vblock_words2, vblock_words3,
+                   vblock_byte0, vblock_byte1, vblock_byte2, vblock_byte3, vblock_byte4,
+                   vblock_byte5, vblock_byte6, vblock_byte7);
+  split_bytes_avx2(
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 0),
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 1),
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 2),
+      _mm256_loadu_si256(reinterpret_cast<const __m256i*>(groupid_bytes) + 3),
+      vgroupid_byte0, vgroupid_byte1, vgroupid_byte2, vgroupid_byte3, vgroupid_byte4,
+      vgroupid_byte5, vgroupid_byte6, vgroupid_byte7);
+
+  // Calculate the slot to output when there is no match in a block.
+  // It will be the index of the first empty slot or 8 (the number of slots in block)
+  // if there are no empty slots.
+  vslot_empty_or_end = _mm256_set1_epi8(8);
+  {
+    __m256i vis_empty;
+#define CMP(VBLOCKBYTE, BYTENUM)                                                         \
+  vis_empty =                                                                            \
+      _mm256_cmpeq_epi8(VBLOCKBYTE, _mm256_set1_epi8(static_cast<unsigned char>(0x80))); \
+  vslot_empty_or_end =                                                                   \
+      _mm256_blendv_epi8(vslot_empty_or_end, _mm256_set1_epi8(BYTENUM), vis_empty);
+    CMP(vblock_byte7, 7);
+    CMP(vblock_byte6, 6);
+    CMP(vblock_byte5, 5);
+    CMP(vblock_byte4, 4);
+    CMP(vblock_byte3, 3);
+    CMP(vblock_byte2, 2);
+    CMP(vblock_byte1, 1);
+    CMP(vblock_byte0, 0);
+#undef CMP
+  }
+
+  const int block_id_mask = (1 << log_blocks_) - 1;
+
+  for (int i = 0; i < num_hashes / unroll; ++i) {
+    __m256i vhash0 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 0);
+    __m256i vhash1 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 1);
+    __m256i vhash2 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 2);
+    __m256i vhash3 =
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + 4 * i + 3);
+
+    // We will get input in byte lanes in the order: [0, 8, 16, 24, 1, 9, 17, 25, 2, 10,
+    // 18, 26, ...]
+    vhash0 = _mm256_or_si256(_mm256_srli_epi32(vhash0, 16),
+                             _mm256_and_si256(vhash2, _mm256_set1_epi32(0xffff0000)));
+    vhash1 = _mm256_or_si256(_mm256_srli_epi32(vhash1, 16),
+                             _mm256_and_si256(vhash3, _mm256_set1_epi32(0xffff0000)));
+    __m256i vstamp_A = _mm256_and_si256(
+        _mm256_srlv_epi32(vhash0, _mm256_set1_epi32(16 - log_blocks_ - 7)),
+        _mm256_set1_epi16(0x7f));
+    __m256i vstamp_B = _mm256_and_si256(
+        _mm256_srlv_epi32(vhash1, _mm256_set1_epi32(16 - log_blocks_ - 7)),
+        _mm256_set1_epi16(0x7f));
+    __m256i vstamp = _mm256_or_si256(vstamp_A, _mm256_slli_epi16(vstamp_B, 8));
+    __m256i vblock_id_A =
+        _mm256_and_si256(_mm256_srlv_epi32(vhash0, _mm256_set1_epi32(16 - log_blocks_)),
+                         _mm256_set1_epi16(block_id_mask));
+    __m256i vblock_id_B =
+        _mm256_and_si256(_mm256_srlv_epi32(vhash1, _mm256_set1_epi32(16 - log_blocks_)),
+                         _mm256_set1_epi16(block_id_mask));
+    __m256i vblock_id = _mm256_or_si256(vblock_id_A, _mm256_slli_epi16(vblock_id_B, 8));
+
+    // Visit all block bytes in reverse order (overwriting data on multiple matches)
+    __m256i vmatch_found = _mm256_setzero_si256();
+    __m256i vslot_id = _mm256_shuffle_epi8(vslot_empty_or_end, vblock_id);
+    __m256i vgroup_id = _mm256_setzero_si256();
+#define CMP(VBLOCK_BYTE, VGROUPID_BYTE, BYTENUM)                                         \
+  {                                                                                      \
+    __m256i vcmp =                                                                       \
+        _mm256_cmpeq_epi8(_mm256_shuffle_epi8(VBLOCK_BYTE, vblock_id), vstamp);          \
+    vmatch_found = _mm256_or_si256(vmatch_found, vcmp);                                  \
+    vgroup_id = _mm256_blendv_epi8(vgroup_id,                                            \
+                                   _mm256_shuffle_epi8(VGROUPID_BYTE, vblock_id), vcmp); \
+    vslot_id = _mm256_blendv_epi8(vslot_id, _mm256_set1_epi8(BYTENUM + 1), vcmp);        \
+  }
+    CMP(vblock_byte7, vgroupid_byte7, 7);
+    CMP(vblock_byte6, vgroupid_byte6, 6);
+    CMP(vblock_byte5, vgroupid_byte5, 5);
+    CMP(vblock_byte4, vgroupid_byte4, 4);
+    CMP(vblock_byte3, vgroupid_byte3, 3);
+    CMP(vblock_byte2, vgroupid_byte2, 2);
+    CMP(vblock_byte1, vgroupid_byte1, 1);
+    CMP(vblock_byte0, vgroupid_byte0, 0);
+#undef CMP
+
+    vslot_id = _mm256_add_epi8(vslot_id, _mm256_slli_epi32(vblock_id, 3));
+    // So far the output is in the order: [0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, ...]
+    vmatch_found = _mm256_shuffle_epi8(
+        vmatch_found,
+        _mm256_setr_epi32(k4ByteSequence_0_4_8_12, k4ByteSequence_1_5_9_13,
+                          k4ByteSequence_2_6_10_14, k4ByteSequence_3_7_11_15,
+                          k4ByteSequence_0_4_8_12, k4ByteSequence_1_5_9_13,
+                          k4ByteSequence_2_6_10_14, k4ByteSequence_3_7_11_15));
+    // Now it is: [0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, | 4, 5, 6, 7,
+    // 12, 13, 14, 15, ...]
+    vmatch_found = _mm256_permutevar8x32_epi32(vmatch_found,
+                                               _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7));
+
+    reinterpret_cast<uint32_t*>(out_match_bitvector)[i] =
+        _mm256_movemask_epi8(vmatch_found);
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 0,
+                        _mm256_and_si256(vgroup_id, _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 1,
+        _mm256_and_si256(_mm256_srli_epi32(vgroup_id, 8), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 2,
+        _mm256_and_si256(_mm256_srli_epi32(vgroup_id, 16), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_group_ids) + 4 * i + 3,
+        _mm256_and_si256(_mm256_srli_epi32(vgroup_id, 24), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 0,
+                        _mm256_and_si256(vslot_id, _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 1,
+        _mm256_and_si256(_mm256_srli_epi32(vslot_id, 8), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 2,
+        _mm256_and_si256(_mm256_srli_epi32(vslot_id, 16), _mm256_set1_epi32(0xff)));
+    _mm256_storeu_si256(
+        reinterpret_cast<__m256i*>(out_next_slot_ids) + 4 * i + 3,
+        _mm256_and_si256(_mm256_srli_epi32(vslot_id, 24), _mm256_set1_epi32(0xff)));
+  }
+}
+
+#endif
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/util.cc b/cpp/src/arrow/compute/exec/util.cc
new file mode 100644
index 00000000000..5f1c0776c56
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/util.cc
@@ -0,0 +1,234 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/exec/util.h"
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+
+using BitUtil::CountTrailingZeros;
+
+namespace util {
+
+inline void BitUtil::bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes) {
+  int n = *num_indexes;
+  while (word) {
+    indexes[n++] = base_index + static_cast<uint16_t>(CountTrailingZeros(word));
+    word &= word - 1;
+  }
+  *num_indexes = n;
+}
+
+inline void BitUtil::bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes) {
+  int n = *num_indexes;
+  while (word) {
+    indexes[n++] = input_indexes[CountTrailingZeros(word)];
+    word &= word - 1;
+  }
+  *num_indexes = n;
+}
+
+template <int bit_to_search, bool filter_input_indexes>
+void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+  int tail = num_bits % unroll;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    if (filter_input_indexes) {
+      bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes,
+                               num_indexes, indexes);
+    } else {
+      bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes);
+    }
+  } else {
+#endif
+    *num_indexes = 0;
+    for (int i = 0; i < num_bits / unroll; ++i) {
+      uint64_t word = reinterpret_cast<const uint64_t*>(bits)[i];
+      if (bit_to_search == 0) {
+        word = ~word;
+      }
+      if (filter_input_indexes) {
+        bits_filter_indexes_helper(word, input_indexes + i * 64, num_indexes, indexes);
+      } else {
+        bits_to_indexes_helper(word, i * 64, num_indexes, indexes);
+      }
+    }
+#if defined(ARROW_HAVE_AVX2)
+  }
+#endif
+  // Optionally process the last partial word with masking out bits outside range
+  if (tail) {
+    uint64_t word = reinterpret_cast<const uint64_t*>(bits)[num_bits / unroll];
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+    word &= ~0ULL >> (64 - tail);
+    if (filter_input_indexes) {
+      bits_filter_indexes_helper(word, input_indexes + num_bits - tail, num_indexes,
+                                 indexes);
+    } else {
+      bits_to_indexes_helper(word, num_bits - tail, num_indexes, indexes);
+    }
+  }
+}
+
+void BitUtil::bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_to_indexes_internal<0, false>(hardware_flags, num_bits, bits, nullptr,
+                                       num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_internal<1, false>(hardware_flags, num_bits, bits, nullptr,
+                                       num_indexes, indexes);
+  }
+}
+
+void BitUtil::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_to_indexes_internal<0, true>(hardware_flags, num_bits, bits, input_indexes,
+                                      num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_internal<1, true>(hardware_flags, num_bits, bits, input_indexes,
+                                      num_indexes, indexes);
+  }
+}
+
+void BitUtil::bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1) {
+  bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0);
+  int num_indexes_bit1;
+  bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1);
+}
+
+void BitUtil::bits_to_bytes_internal(const int num_bits, const uint8_t* bits,
+                                     uint8_t* bytes) {
+  constexpr int unroll = 8;
+  // Processing 8 bits at a time
+  for (int i = 0; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint8_t bits_next = bits[i];
+    // Clear the lowest bit and then make 8 copies of remaining 7 bits, each 7 bits apart
+    // from the previous.
+    uint64_t unpacked = static_cast<uint64_t>(bits_next & 0xfe) *
+                        ((1ULL << 7) | (1ULL << 14) | (1ULL << 21) | (1ULL << 28) |
+                         (1ULL << 35) | (1ULL << 42) | (1ULL << 49));
+    unpacked |= (bits_next & 1);
+    unpacked &= 0x0101010101010101ULL;
+    unpacked *= 255;
+    reinterpret_cast<uint64_t*>(bytes)[i] = unpacked;
+  }
+}
+
+void BitUtil::bytes_to_bits_internal(const int num_bits, const uint8_t* bytes,
+                                     uint8_t* bits) {
+  constexpr int unroll = 8;
+  // Process 8 bits at a time
+  for (int i = 0; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint64_t bytes_next = reinterpret_cast<const uint64_t*>(bytes)[i];
+    bytes_next &= 0x0101010101010101ULL;
+    bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest byte
+    bits[i] = static_cast<uint8_t>(bytes_next & 0xff);
+  }
+}
+
+void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes) {
+  int num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    // The function call below processes whole 32 bit chunks together.
+    num_processed = num_bits - (num_bits % 32);
+    bits_to_bytes_avx2(num_processed, bits, bytes);
+  }
+#endif
+  // Processing 8 bits at a time
+  constexpr int unroll = 8;
+  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint8_t bits_next = bits[i];
+    // Clear the lowest bit and then make 8 copies of remaining 7 bits, each 7 bits apart
+    // from the previous.
+    uint64_t unpacked = static_cast<uint64_t>(bits_next & 0xfe) *
+                        ((1ULL << 7) | (1ULL << 14) | (1ULL << 21) | (1ULL << 28) |
+                         (1ULL << 35) | (1ULL << 42) | (1ULL << 49));
+    unpacked |= (bits_next & 1);
+    unpacked &= 0x0101010101010101ULL;
+    unpacked *= 255;
+    reinterpret_cast<uint64_t*>(bytes)[i] = unpacked;
+  }
+}
+
+void BitUtil::bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits) {
+  int num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    // The function call below processes whole 32 bit chunks together.
+    num_processed = num_bits - (num_bits % 32);
+    bytes_to_bits_avx2(num_processed, bytes, bits);
+  }
+#endif
+  // Process 8 bits at a time
+  constexpr int unroll = 8;
+  for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
+    uint64_t bytes_next = reinterpret_cast<const uint64_t*>(bytes)[i];
+    bytes_next &= 0x0101010101010101ULL;
+    bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
+    bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest byte
+    bits[i] = static_cast<uint8_t>(bytes_next & 0xff);
+  }
+}
+
+bool BitUtil::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes) {
+#if defined(ARROW_HAVE_AVX2)
+  if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
+    return are_all_bytes_zero_avx2(bytes, num_bytes);
+  }
+#endif
+  uint64_t result_or = 0;
+  uint32_t i;
+  for (i = 0; i < num_bytes / 8; ++i) {
+    uint64_t x = reinterpret_cast<const uint64_t*>(bytes)[i];
+    result_or |= x;
+  }
+  if (num_bytes % 8 > 0) {
+    uint64_t tail = 0;
+    result_or |= memcmp(bytes + i * 8, &tail, num_bytes % 8);
+  }
+  return result_or == 0;
+}
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h
new file mode 100644
index 00000000000..d345bd3af0b
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/util.h
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h"
+
+#if defined(__clang__) || defined(__GNUC__)
+#define BYTESWAP(x) __builtin_bswap64(x)
+#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#define BYTESWAP(x) _byteswap_uint64(x)
+#define ROTL(x, n) _rotl((x), (n))
+#endif
+
+namespace arrow {
+namespace util {
+
+// Some platforms typedef int64_t as long int instead of long long int,
+// which breaks the _mm256_i64gather_epi64 and _mm256_i32gather_epi64 intrinsics
+// which need long long.
+// We use the cast to the type below in these intrinsics to make the code
+// compile in all cases.
+//
+using int64_for_gather_t = const long long int;  // NOLINT runtime-int
+
+/// Storage used to allocate temporary vectors of a batch size.
+/// Temporary vectors should resemble allocating temporary variables on the stack
+/// but in the context of vectorized processing where we need to store a vector of
+/// temporaries instead of a single value.
+class TempVectorStack {
+  template <typename>
+  friend class TempVectorHolder;
+
+ public:
+  Status Init(MemoryPool* pool, int64_t size) {
+    num_vectors_ = 0;
+    top_ = 0;
+    buffer_size_ = size;
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
+    buffer_ = std::move(buffer);
+    return Status::OK();
+  }
+
+ private:
+  void alloc(uint32_t num_bytes, uint8_t** data, int* id) {
+    int64_t old_top = top_;
+    top_ += num_bytes + padding;
+    // Stack overflow check
+    ARROW_DCHECK(top_ <= buffer_size_);
+    *data = buffer_->mutable_data() + old_top;
+    *id = num_vectors_++;
+  }
+  void release(int id, uint32_t num_bytes) {
+    ARROW_DCHECK(num_vectors_ == id + 1);
+    int64_t size = num_bytes + padding;
+    ARROW_DCHECK(top_ >= size);
+    top_ -= size;
+    --num_vectors_;
+  }
+  static constexpr int64_t padding = 64;
+  int num_vectors_;
+  int64_t top_;
+  std::unique_ptr<Buffer> buffer_;
+  int64_t buffer_size_;
+};
+
+template <typename T>
+class TempVectorHolder {
+  friend class TempVectorStack;
+
+ public:
+  ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); }
+  T* mutable_data() { return reinterpret_cast<T*>(data_); }
+  TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) {
+    stack_ = stack;
+    num_elements_ = num_elements;
+    stack_->alloc(num_elements * sizeof(T), &data_, &id_);
+  }
+
+ private:
+  TempVectorStack* stack_;
+  uint8_t* data_;
+  int id_;
+  uint32_t num_elements_;
+};
+
+class BitUtil {
+ public:
+  static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes);
+
+  static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes);
+
+  // Input and output indexes may be pointing to the same data (in-place filtering).
+  static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1);
+
+  // Bit 1 is replaced with byte 0xFF.
+  static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes);
+  // Return highest bit of each byte.
+  static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits);
+
+  static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes);
+
+ private:
+  inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes);
+  inline static void bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search, bool filter_input_indexes>
+  static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+  static void bits_to_bytes_internal(const int num_bits, const uint8_t* bits,
+                                     uint8_t* bytes);
+  static void bytes_to_bits_internal(const int num_bits, const uint8_t* bytes,
+                                     uint8_t* bits);
+
+#if defined(ARROW_HAVE_AVX2)
+  static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                   const uint8_t* bits, int* num_indexes,
+                                   uint16_t* indexes);
+  static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                           const uint16_t* input_indexes,
+                                           int* num_indexes, uint16_t* indexes);
+  static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes);
+  static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits);
+  static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
+#endif
+};
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/util_avx2.cc b/cpp/src/arrow/compute/exec/util_avx2.cc
new file mode 100644
index 00000000000..8cf0104db46
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/util_avx2.cc
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <immintrin.h>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace util {
+
+#if defined(ARROW_HAVE_AVX2)
+
+void BitUtil::bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                   const uint8_t* bits, int* num_indexes,
+                                   uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes);
+  } else {
+    ARROW_DCHECK(bit_to_search == 1);
+    bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes);
+  }
+}
+
+template <int bit_to_search>
+void BitUtil::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                       int* num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+
+  // The caller takes care of processing the remaining bits at the end outside of the
+  // multiples of 64
+  ARROW_DCHECK(num_bits % unroll == 0);
+
+  constexpr uint64_t kEachByteIs1 = 0X0101010101010101ULL;
+  constexpr uint64_t kEachByteIs8 = 0x0808080808080808ULL;
+  constexpr uint64_t kByteSequence0To7 = 0x0706050403020100ULL;
+
+  uint8_t byte_indexes[64];
+  const uint64_t incr = kEachByteIs8;
+  const uint64_t mask = kByteSequence0To7;
+  *num_indexes = 0;
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    uint64_t word = reinterpret_cast<const uint64_t*>(bits)[i];
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+    uint64_t base = 0;
+    int num_indexes_loop = 0;
+    while (word) {
+      uint64_t byte_indexes_next =
+          _pext_u64(mask, _pdep_u64(word, kEachByteIs1) * 0xff) + base;
+      *reinterpret_cast<uint64_t*>(byte_indexes + num_indexes_loop) = byte_indexes_next;
+      base += incr;
+      num_indexes_loop += static_cast<int>(arrow::BitUtil::PopCount(word & 0xff));
+      word >>= 8;
+    }
+    // Unpack indexes to 16-bits and either add the base of i * 64 or shuffle input
+    // indexes
+    for (int j = 0; j < (num_indexes_loop + 15) / 16; ++j) {
+      __m256i output = _mm256_cvtepi8_epi16(
+          _mm_loadu_si128(reinterpret_cast<const __m128i*>(byte_indexes) + j));
+      output = _mm256_add_epi16(output, _mm256_set1_epi16(i * 64));
+      _mm256_storeu_si256(((__m256i*)(indexes + *num_indexes)) + j, output);
+    }
+    *num_indexes += num_indexes_loop;
+  }
+}
+
+void BitUtil::bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes) {
+  if (bit_to_search == 0) {
+    bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes);
+  } else {
+    bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes);
+  }
+}
+
+template <int bit_to_search>
+void BitUtil::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                           const uint16_t* input_indexes,
+                                           int* out_num_indexes, uint16_t* indexes) {
+  // 64 bits at a time
+  constexpr int unroll = 64;
+
+  // The caller takes care of processing the remaining bits at the end outside of the
+  // multiples of 64
+  ARROW_DCHECK(num_bits % unroll == 0);
+
+  constexpr uint64_t kRepeatedBitPattern0001 = 0x1111111111111111ULL;
+  constexpr uint64_t k4BitSequence0To15 = 0xfedcba9876543210ULL;
+  constexpr uint64_t kByteSequence_0_0_1_1_2_2_3_3 = 0x0303020201010000ULL;
+  constexpr uint64_t kByteSequence_4_4_5_5_6_6_7_7 = 0x0707060605050404ULL;
+  constexpr uint64_t kByteSequence_0_2_4_6_8_10_12_14 = 0x0e0c0a0806040200ULL;
+  constexpr uint64_t kByteSequence_1_3_5_7_9_11_13_15 = 0x0f0d0b0907050301ULL;
+  constexpr uint64_t kByteSequence_0_8_1_9_2_10_3_11 = 0x0b030a0209010800ULL;
+  constexpr uint64_t kByteSequence_4_12_5_13_6_14_7_15 = 0x0f070e060d050c04ULL;
+
+  const uint64_t mask = k4BitSequence0To15;
+  int num_indexes = 0;
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    uint64_t word = reinterpret_cast<const uint64_t*>(bits)[i];
+    if (bit_to_search == 0) {
+      word = ~word;
+    }
+
+    int loop_id = 0;
+    while (word) {
+      uint64_t indexes_4bit =
+          _pext_u64(mask, _pdep_u64(word, kRepeatedBitPattern0001) * 0xf);
+      // Unpack 4 bit indexes to 8 bits
+      __m256i indexes_8bit = _mm256_set1_epi64x(indexes_4bit);
+      indexes_8bit = _mm256_shuffle_epi8(
+          indexes_8bit,
+          _mm256_setr_epi64x(kByteSequence_0_0_1_1_2_2_3_3, kByteSequence_4_4_5_5_6_6_7_7,
+                             kByteSequence_0_0_1_1_2_2_3_3,
+                             kByteSequence_4_4_5_5_6_6_7_7));
+      indexes_8bit = _mm256_blendv_epi8(
+          _mm256_and_si256(indexes_8bit, _mm256_set1_epi8(0x0f)),
+          _mm256_and_si256(_mm256_srli_epi32(indexes_8bit, 4), _mm256_set1_epi8(0x0f)),
+          _mm256_set1_epi16(static_cast<uint16_t>(0xff00)));
+      __m256i input =
+          _mm256_loadu_si256(((const __m256i*)input_indexes) + 4 * i + loop_id);
+      // Shuffle bytes to get low bytes in the first 128-bit lane and high bytes in the
+      // second
+      input = _mm256_shuffle_epi8(
+          input, _mm256_setr_epi64x(
+                     kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15,
+                     kByteSequence_0_2_4_6_8_10_12_14, kByteSequence_1_3_5_7_9_11_13_15));
+      input = _mm256_permute4x64_epi64(input, 0xd8);  // 0b11011000
+      // Apply permutation
+      __m256i output = _mm256_shuffle_epi8(input, indexes_8bit);
+      // Move low and high bytes across 128-bit lanes to assemble back 16-bit indexes.
+      // (This is the reverse of the byte permutation we did on the input)
+      output = _mm256_permute4x64_epi64(output,
+                                        0xd8);  // The reverse of swapping 2nd and 3rd
+                                                // 64-bit element is the same permutation
+      output = _mm256_shuffle_epi8(output,
+                                   _mm256_setr_epi64x(kByteSequence_0_8_1_9_2_10_3_11,
+                                                      kByteSequence_4_12_5_13_6_14_7_15,
+                                                      kByteSequence_0_8_1_9_2_10_3_11,
+                                                      kByteSequence_4_12_5_13_6_14_7_15));
+      _mm256_storeu_si256((__m256i*)(indexes + num_indexes), output);
+      num_indexes += static_cast<int>(arrow::BitUtil::PopCount(word & 0xffff));
+      word >>= 16;
+      ++loop_id;
+    }
+  }
+
+  *out_num_indexes = num_indexes;
+}
+
+void BitUtil::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
+                                 uint8_t* bytes) {
+  constexpr int unroll = 32;
+
+  constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL;
+  constexpr uint64_t kEachByteIs2 = 0x0202020202020202ULL;
+  constexpr uint64_t kEachByteIs3 = 0x0303030303030303ULL;
+  constexpr uint64_t kByteSequencePowersOf2 = 0x8040201008040201ULL;
+
+  // Processing 32 bits at a time
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    __m256i unpacked = _mm256_set1_epi32(reinterpret_cast<const uint32_t*>(bits)[i]);
+    unpacked = _mm256_shuffle_epi8(
+        unpacked, _mm256_setr_epi64x(0ULL, kEachByteIs1, kEachByteIs2, kEachByteIs3));
+    __m256i bits_in_bytes = _mm256_set1_epi64x(kByteSequencePowersOf2);
+    unpacked =
+        _mm256_cmpeq_epi8(bits_in_bytes, _mm256_and_si256(unpacked, bits_in_bytes));
+    _mm256_storeu_si256(reinterpret_cast<__m256i*>(bytes) + i, unpacked);
+  }
+}
+
+void BitUtil::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
+                                 uint8_t* bits) {
+  constexpr int unroll = 32;
+  // Processing 32 bits at a time
+  for (int i = 0; i < num_bits / unroll; ++i) {
+    reinterpret_cast<uint32_t*>(bits)[i] = _mm256_movemask_epi8(
+        _mm256_loadu_si256(reinterpret_cast<const __m256i*>(bytes) + i));
+  }
+}
+
+bool BitUtil::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) {
+  __m256i result_or = _mm256_setzero_si256();
+  uint32_t i;
+  for (i = 0; i < num_bytes / 32; ++i) {
+    __m256i x = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(bytes) + i);
+    result_or = _mm256_or_si256(result_or, x);
+  }
+  uint32_t result_or32 = _mm256_movemask_epi8(result_or);
+  if (num_bytes % 32 > 0) {
+    uint64_t tail[4] = {0, 0, 0, 0};
+    result_or32 |= memcmp(bytes + i * 32, tail, num_bytes % 32);
+  }
+  return result_or32 == 0;
+}
+
+#endif  // ARROW_HAVE_AVX2
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index ae7bf9324db..0e5c8ace53f 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/api_aggregate.h"
-
 #include <functional>
 #include <memory>
 #include <string>
@@ -24,7 +22,13 @@
 #include <vector>
 
 #include "arrow/buffer_builder.h"
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec/key_compare.h"
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/key_hash.h"
+#include "arrow/compute/exec/key_map.h"
+#include "arrow/compute/exec/util.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
@@ -33,6 +37,7 @@
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_writer.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/cpu_info.h"
 #include "arrow/util/make_unique.h"
 #include "arrow/visitor_inline.h"
 
@@ -436,6 +441,297 @@ struct GrouperImpl : Grouper {
   std::vector<std::unique_ptr<KeyEncoder>> encoders_;
 };
 
+struct GrouperFastImpl : Grouper {
+  static bool CanUse(const std::vector<ValueDescr>& keys) {
+#if ARROW_LITTLE_ENDIAN
+    for (size_t i = 0; i < keys.size(); ++i) {
+      const auto& key = keys[i].type;
+      if (is_large_binary_like(key->id())) {
+        return false;
+      }
+    }
+    return true;
+#else
+    return false;
+#endif
+  }
+
+  static Result<std::unique_ptr<GrouperFastImpl>> Make(
+      const std::vector<ValueDescr>& keys, ExecContext* ctx) {
+    auto impl = ::arrow::internal::make_unique<GrouperFastImpl>();
+    impl->ctx_ = ctx;
+
+    RETURN_NOT_OK(impl->temp_stack_.Init(ctx->memory_pool(), 64 * minibatch_size_max_));
+    impl->encode_ctx_.hardware_flags =
+        arrow::internal::CpuInfo::GetInstance()->hardware_flags();
+    impl->encode_ctx_.stack = &impl->temp_stack_;
+
+    auto num_columns = keys.size();
+    impl->col_metadata_.resize(num_columns);
+    impl->key_types_.resize(num_columns);
+    impl->dictionaries_.resize(num_columns);
+    for (size_t icol = 0; icol < num_columns; ++icol) {
+      const auto& key = keys[icol].type;
+      if (key->id() == Type::DICTIONARY) {
+        auto bit_width = checked_cast<const FixedWidthType&>(*key).bit_width();
+        ARROW_DCHECK(bit_width % 8 == 0);
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(true, bit_width / 8);
+      } else if (key->id() == Type::BOOL) {
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(true, 0);
+      } else if (is_fixed_width(key->id())) {
+        impl->col_metadata_[icol] = arrow::compute::KeyEncoder::KeyColumnMetadata(
+            true, checked_cast<const FixedWidthType&>(*key).bit_width() / 8);
+      } else if (is_binary_like(key->id())) {
+        impl->col_metadata_[icol] =
+            arrow::compute::KeyEncoder::KeyColumnMetadata(false, sizeof(uint32_t));
+      } else {
+        return Status::NotImplemented("Keys of type ", *key);
+      }
+      impl->key_types_[icol] = key;
+    }
+
+    impl->encoder_.Init(impl->col_metadata_, &impl->encode_ctx_,
+                        /* row_alignment = */ sizeof(uint64_t),
+                        /* string_alignment = */ sizeof(uint64_t));
+    RETURN_NOT_OK(impl->rows_.Init(ctx->memory_pool(), impl->encoder_.row_metadata()));
+    RETURN_NOT_OK(
+        impl->rows_minibatch_.Init(ctx->memory_pool(), impl->encoder_.row_metadata()));
+    impl->minibatch_size_ = impl->minibatch_size_min_;
+    GrouperFastImpl* impl_ptr = impl.get();
+    auto equal_func = [impl_ptr](
+                          int num_keys_to_compare, const uint16_t* selection_may_be_null,
+                          const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                          uint16_t* out_selection_mismatch) {
+      arrow::compute::KeyCompare::CompareRows(
+          num_keys_to_compare, selection_may_be_null, group_ids, &impl_ptr->encode_ctx_,
+          out_num_keys_mismatch, out_selection_mismatch, impl_ptr->rows_minibatch_,
+          impl_ptr->rows_);
+    };
+    auto append_func = [impl_ptr](int num_keys, const uint16_t* selection) {
+      return impl_ptr->rows_.AppendSelectionFrom(impl_ptr->rows_minibatch_, num_keys,
+                                                 selection);
+    };
+    RETURN_NOT_OK(impl->map_.init(impl->encode_ctx_.hardware_flags, ctx->memory_pool(),
+                                  impl->encode_ctx_.stack, impl->log_minibatch_max_,
+                                  equal_func, append_func));
+    impl->cols_.resize(num_columns);
+    constexpr int padding_for_SIMD = 32;
+    impl->minibatch_hashes_.resize(impl->minibatch_size_max_ +
+                                   padding_for_SIMD / sizeof(uint32_t));
+
+    return std::move(impl);
+  }
+
+  ~GrouperFastImpl() { map_.cleanup(); }
+
+  Result<Datum> Consume(const ExecBatch& batch) override {
+    int64_t num_rows = batch.length;
+    int num_columns = batch.num_values();
+
+    // Process dictionaries
+    for (int icol = 0; icol < num_columns; ++icol) {
+      if (key_types_[icol]->id() == Type::DICTIONARY) {
+        auto data = batch[icol].array();
+        auto dict = MakeArray(data->dictionary);
+        if (dictionaries_[icol]) {
+          if (!dictionaries_[icol]->Equals(dict)) {
+            // TODO(bkietz) unify if necessary. For now, just error if any batch's
+            // dictionary differs from the first we saw for this key
+            return Status::NotImplemented("Unifying differing dictionaries");
+          }
+        } else {
+          dictionaries_[icol] = std::move(dict);
+        }
+      }
+    }
+
+    std::shared_ptr<arrow::Buffer> group_ids;
+    ARROW_ASSIGN_OR_RAISE(
+        group_ids, AllocateBuffer(sizeof(uint32_t) * num_rows, ctx_->memory_pool()));
+
+    for (int icol = 0; icol < num_columns; ++icol) {
+      const uint8_t* non_nulls = nullptr;
+      if (batch[icol].array()->buffers[0] != NULLPTR) {
+        non_nulls = batch[icol].array()->buffers[0]->data();
+      }
+      const uint8_t* fixedlen = batch[icol].array()->buffers[1]->data();
+      const uint8_t* varlen = nullptr;
+      if (!col_metadata_[icol].is_fixed_length) {
+        varlen = batch[icol].array()->buffers[2]->data();
+      }
+
+      cols_[icol] = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[icol], num_rows, non_nulls, fixedlen, varlen);
+    }
+
+    // Split into smaller mini-batches
+    //
+    for (uint32_t start_row = 0; start_row < num_rows;) {
+      uint32_t batch_size_next = std::min(static_cast<uint32_t>(minibatch_size_),
+                                          static_cast<uint32_t>(num_rows) - start_row);
+
+      // Encode
+      rows_minibatch_.Clean();
+      RETURN_NOT_OK(encoder_.PrepareOutputForEncode(start_row, batch_size_next,
+                                                    &rows_minibatch_, cols_));
+      encoder_.Encode(start_row, batch_size_next, &rows_minibatch_, cols_);
+
+      // Compute hash
+      if (encoder_.row_metadata().is_fixed_length) {
+        Hashing::hash_fixed(encode_ctx_.hardware_flags, batch_size_next,
+                            encoder_.row_metadata().fixed_length, rows_minibatch_.data(1),
+                            minibatch_hashes_.data());
+      } else {
+        auto hash_temp_buf =
+            util::TempVectorHolder<uint32_t>(&temp_stack_, 4 * batch_size_next);
+        Hashing::hash_varlen(encode_ctx_.hardware_flags, batch_size_next,
+                             rows_minibatch_.offsets(), rows_minibatch_.data(2),
+                             hash_temp_buf.mutable_data(), minibatch_hashes_.data());
+      }
+
+      // Map
+      RETURN_NOT_OK(
+          map_.map(batch_size_next, minibatch_hashes_.data(),
+                   reinterpret_cast<uint32_t*>(group_ids->mutable_data()) + start_row));
+
+      start_row += batch_size_next;
+
+      if (minibatch_size_ * 2 <= minibatch_size_max_) {
+        minibatch_size_ *= 2;
+      }
+    }
+
+    return Datum(UInt32Array(batch.length, std::move(group_ids)));
+  }
+
+  uint32_t num_groups() const override { return static_cast<uint32_t>(rows_.length()); }
+
+  Result<ExecBatch> GetUniques() override {
+    auto num_columns = static_cast<uint32_t>(col_metadata_.size());
+    int64_t num_groups = rows_.length();
+
+    std::vector<std::shared_ptr<Buffer>> non_null_bufs(num_columns);
+    std::vector<std::shared_ptr<Buffer>> fixedlen_bufs(num_columns);
+    std::vector<std::shared_ptr<Buffer>> varlen_bufs(num_columns);
+
+    constexpr int padding_bits = 64;
+    constexpr int padding_for_SIMD = 32;
+    for (size_t i = 0; i < num_columns; ++i) {
+      ARROW_ASSIGN_OR_RAISE(non_null_bufs[i], AllocateBitmap(num_groups + padding_bits,
+                                                             ctx_->memory_pool()));
+      if (col_metadata_[i].is_fixed_length) {
+        if (col_metadata_[i].fixed_length == 0) {
+          ARROW_ASSIGN_OR_RAISE(
+              fixedlen_bufs[i],
+              AllocateBitmap(num_groups + padding_bits, ctx_->memory_pool()));
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              fixedlen_bufs[i],
+              AllocateBuffer(
+                  num_groups * col_metadata_[i].fixed_length + padding_for_SIMD,
+                  ctx_->memory_pool()));
+        }
+      } else {
+        ARROW_ASSIGN_OR_RAISE(
+            fixedlen_bufs[i],
+            AllocateBuffer((num_groups + 1) * sizeof(uint32_t) + padding_for_SIMD,
+                           ctx_->memory_pool()));
+      }
+      cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
+          fixedlen_bufs[i]->mutable_data(), nullptr);
+    }
+
+    for (int64_t start_row = 0; start_row < num_groups;) {
+      int64_t batch_size_next =
+          std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_));
+      encoder_.DecodeFixedLengthBuffers(start_row, start_row, batch_size_next, rows_,
+                                        &cols_);
+      start_row += batch_size_next;
+    }
+
+    if (!rows_.metadata().is_fixed_length) {
+      for (size_t i = 0; i < num_columns; ++i) {
+        if (!col_metadata_[i].is_fixed_length) {
+          auto varlen_size =
+              reinterpret_cast<const uint32_t*>(fixedlen_bufs[i]->data())[num_groups];
+          ARROW_ASSIGN_OR_RAISE(
+              varlen_bufs[i],
+              AllocateBuffer(varlen_size + padding_for_SIMD, ctx_->memory_pool()));
+          cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
+              col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
+              fixedlen_bufs[i]->mutable_data(), varlen_bufs[i]->mutable_data());
+        }
+      }
+
+      for (int64_t start_row = 0; start_row < num_groups;) {
+        int64_t batch_size_next =
+            std::min(num_groups - start_row, static_cast<int64_t>(minibatch_size_max_));
+        encoder_.DecodeVaryingLengthBuffers(start_row, start_row, batch_size_next, rows_,
+                                            &cols_);
+        start_row += batch_size_next;
+      }
+    }
+
+    ExecBatch out({}, num_groups);
+    out.values.resize(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+      auto valid_count = arrow::internal::CountSetBits(
+          non_null_bufs[i]->data(), /*offset=*/0, static_cast<int64_t>(num_groups));
+      int null_count = static_cast<int>(num_groups) - static_cast<int>(valid_count);
+
+      if (col_metadata_[i].is_fixed_length) {
+        out.values[i] = ArrayData::Make(
+            key_types_[i], num_groups,
+            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i])}, null_count);
+      } else {
+        out.values[i] =
+            ArrayData::Make(key_types_[i], num_groups,
+                            {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i]),
+                             std::move(varlen_bufs[i])},
+                            null_count);
+      }
+    }
+
+    // Process dictionaries
+    for (size_t icol = 0; icol < num_columns; ++icol) {
+      if (key_types_[icol]->id() == Type::DICTIONARY) {
+        if (dictionaries_[icol]) {
+          out.values[icol].array()->dictionary = dictionaries_[icol]->data();
+        } else {
+          ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(key_types_[icol], 0));
+          out.values[icol].array()->dictionary = dict->data();
+        }
+      }
+    }
+
+    return out;
+  }
+
+  static constexpr int log_minibatch_max_ = 10;
+  static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_;
+  static constexpr int minibatch_size_min_ = 128;
+  int minibatch_size_;
+
+  ExecContext* ctx_;
+  arrow::util::TempVectorStack temp_stack_;
+  arrow::compute::KeyEncoder::KeyEncoderContext encode_ctx_;
+
+  std::vector<std::shared_ptr<arrow::DataType>> key_types_;
+  std::vector<arrow::compute::KeyEncoder::KeyColumnMetadata> col_metadata_;
+  std::vector<arrow::compute::KeyEncoder::KeyColumnArray> cols_;
+  std::vector<uint32_t> minibatch_hashes_;
+
+  std::vector<std::shared_ptr<Array>> dictionaries_;
+
+  arrow::compute::KeyEncoder::KeyRowArray rows_;
+  arrow::compute::KeyEncoder::KeyRowArray rows_minibatch_;
+  arrow::compute::KeyEncoder encoder_;
+  arrow::compute::SwissTable map_;
+};
+
 /// C++ abstract base class for the HashAggregateKernel interface.
 /// Implementations should be default constructible and perform initialization in
 /// Init().
@@ -884,6 +1180,9 @@ Result<FieldVector> ResolveKernels(
 
 Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs,
                                                ExecContext* ctx) {
+  if (GrouperFastImpl::CanUse(descrs)) {
+    return GrouperFastImpl::Make(descrs, ctx);
+  }
   return GrouperImpl::Make(descrs, ctx);
 }
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 507f1716110..a0d2fd208a9 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gtest/gtest.h>
+
 #include <algorithm>
 #include <limits>
 #include <memory>
@@ -22,8 +24,6 @@
 #include <unordered_map>
 #include <utility>
 
-#include <gtest/gtest.h>
-
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_aggregate.h"
@@ -182,10 +182,52 @@ struct TestGrouper {
     ExpectConsume(*ExecBatch::Make(key_batch), expected);
   }
 
+  void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
+    auto left = expected.make_array();
+    auto right = actual.make_array();
+    ASSERT_EQ(left->length(), right->length()) << "#ids unequal";
+    int64_t num_ids = left->length();
+    auto left_data = left->data();
+    auto right_data = right->data();
+    const uint32_t* left_ids =
+        reinterpret_cast<const uint32_t*>(left_data->buffers[1]->data());
+    const uint32_t* right_ids =
+        reinterpret_cast<const uint32_t*>(right_data->buffers[1]->data());
+    uint32_t max_left_id = 0;
+    uint32_t max_right_id = 0;
+    for (int64_t i = 0; i < num_ids; ++i) {
+      if (left_ids[i] > max_left_id) {
+        max_left_id = left_ids[i];
+      }
+      if (right_ids[i] > max_right_id) {
+        max_right_id = right_ids[i];
+      }
+    }
+    std::vector<bool> right_to_left_present(max_right_id + 1, false);
+    std::vector<bool> left_to_right_present(max_left_id + 1, false);
+    std::vector<uint32_t> right_to_left(max_right_id + 1);
+    std::vector<uint32_t> left_to_right(max_left_id + 1);
+    for (int64_t i = 0; i < num_ids; ++i) {
+      uint32_t left_id = left_ids[i];
+      uint32_t right_id = right_ids[i];
+      if (!left_to_right_present[left_id]) {
+        left_to_right[left_id] = right_id;
+        left_to_right_present[left_id] = true;
+      }
+      if (!right_to_left_present[right_id]) {
+        right_to_left[right_id] = left_id;
+        right_to_left_present[right_id] = true;
+      }
+      ASSERT_EQ(left_id, right_to_left[right_id]);
+      ASSERT_EQ(right_id, left_to_right[left_id]);
+    }
+  }
+
   void ExpectConsume(const ExecBatch& key_batch, Datum expected) {
     Datum ids;
     ConsumeAndValidate(key_batch, &ids);
-    AssertDatumsEqual(expected, ids, /*verbose=*/true);
+    AssertEquivalentIds(expected, ids);
+    // AssertDatumsEqual(expected, ids, /*verbose=*/true);
   }
 
   void ConsumeAndValidate(const ExecBatch& key_batch, Datum* ids = nullptr) {
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index 1c776f18329..7a7ffcff229 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -85,16 +85,23 @@ class TestPartitioning : public ::testing::Test {
                        const std::vector<compute::Expression>& expected_expressions) {
     ASSERT_OK_AND_ASSIGN(auto partition_results, partitioning->Partition(full_batch));
     std::shared_ptr<RecordBatch> rest = full_batch;
+
     ASSERT_EQ(partition_results.batches.size(), expected_batches.size());
-    auto max_index = std::min(partition_results.batches.size(), expected_batches.size());
-    for (std::size_t partition_index = 0; partition_index < max_index;
-         partition_index++) {
-      std::shared_ptr<RecordBatch> actual_batch =
-          partition_results.batches[partition_index];
-      AssertBatchesEqual(*expected_batches[partition_index], *actual_batch);
-      compute::Expression actual_expression =
-          partition_results.expressions[partition_index];
-      ASSERT_EQ(expected_expressions[partition_index], actual_expression);
+
+    for (size_t i = 0; i < partition_results.batches.size(); i++) {
+      std::shared_ptr<RecordBatch> actual_batch = partition_results.batches[i];
+      compute::Expression actual_expression = partition_results.expressions[i];
+
+      auto expected_expression = std::find(expected_expressions.begin(),
+                                           expected_expressions.end(), actual_expression);
+      ASSERT_NE(expected_expression, expected_expressions.end())
+          << "Unexpected partition expr " << actual_expression.ToString();
+
+      auto expected_batch =
+          expected_batches[expected_expression - expected_expressions.begin()];
+
+      SCOPED_TRACE("Batch for " + expected_expression->ToString());
+      AssertBatchesEqual(*expected_batch, *actual_batch);
     }
   }
 

From 0a824324eb87612b8e6baadddee3bec0cada2d0e Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Fri, 21 May 2021 10:01:26 +0900
Subject: [PATCH 276/719] ARROW-9054: [C++] Add ScalarAggregateOptions

[ARROW-9054](https://issues.apache.org/jira/projects/ARROW/issues/ARROW-9054).

This adds `ScalarAggregateOptions` to control null behavior of `mean` and `sum` kernels.

Closes #9758 from rok/ARROW-9054

Lead-authored-by: Rok <rok@mihevc.org>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Rok Mihevc <rok@mihevc.org>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/compute.cpp                 | 182 +++++-----
 c_glib/arrow-glib/compute.h                   |  30 +-
 c_glib/arrow-glib/compute.hpp                 |  10 +-
 c_glib/arrow-glib/version.h.in                |  23 ++
 c_glib/doc/arrow-glib/arrow-glib-docs.xml     |   4 +
 c_glib/test/test-count.rb                     |  21 +-
 c_glib/test/test-scalar-aggregate-options.rb  |  48 +++
 cpp/src/arrow/compute/api_aggregate.cc        |  16 +-
 cpp/src/arrow/compute/api_aggregate.h         |  63 ++--
 .../arrow/compute/kernels/aggregate_basic.cc  |  91 +++--
 .../compute/kernels/aggregate_basic_avx2.cc   |  22 +-
 .../compute/kernels/aggregate_basic_avx512.cc |  22 +-
 .../kernels/aggregate_basic_internal.h        |  32 +-
 .../arrow/compute/kernels/aggregate_test.cc   | 312 ++++++++++++++----
 .../arrow/compute/kernels/hash_aggregate.cc   |  30 +-
 .../compute/kernels/hash_aggregate_test.cc    |  21 +-
 docs/source/cpp/compute.rst                   |  14 +-
 python/pyarrow/_compute.pyx                   |  51 +--
 python/pyarrow/compute.py                     |   5 +-
 python/pyarrow/includes/libarrow.pxd          |  28 +-
 python/pyarrow/tests/test_compute.py          |  36 +-
 r/R/compute.R                                 |  29 +-
 r/src/compute.cpp                             |   9 +-
 r/tests/testthat/test-compute-aggregate.R     |   1 -
 24 files changed, 686 insertions(+), 414 deletions(-)
 create mode 100644 c_glib/test/test-scalar-aggregate-options.rb

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 275e406be79..d284a430b81 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -130,8 +130,9 @@ G_BEGIN_DECLS
  * #GArrowCastOptions is a class to customize the `cast` function and
  * garrow_array_cast().
  *
- * #GArrowCountOptions is a class to customize the `count` function and
- * garrow_array_count().
+ * #GArrowScalarAggregateOptions is a class to customize the scalar
+ * aggregate functions such as `count` function and convenient
+ * functions of them such as garrow_array_count().
  *
  * #GArrowFilterOptions is a class to customize the `filter` function and
  * garrow_array_filter() family.
@@ -636,60 +637,65 @@ garrow_cast_options_new(void)
 }
 
 
-typedef struct GArrowCountOptionsPrivate_ {
-  arrow::compute::CountOptions options;
-} GArrowCountOptionsPrivate;
+typedef struct GArrowScalarAggregateOptionsPrivate_ {
+  arrow::compute::ScalarAggregateOptions options;
+} GArrowScalarAggregateOptionsPrivate;
 
 enum {
-  PROP_MODE = 1,
+  PROP_SKIP_NULLS = 1,
+  PROP_MIN_COUNT,
 };
 
 static arrow::compute::FunctionOptions *
-garrow_count_options_get_raw_function_options(GArrowFunctionOptions *options)
+garrow_scalar_aggregate_options_get_raw_function_options(
+  GArrowFunctionOptions *options)
 {
-  return garrow_count_options_get_raw(GARROW_COUNT_OPTIONS(options));
+  return garrow_scalar_aggregate_options_get_raw(
+    GARROW_SCALAR_AGGREGATE_OPTIONS(options));
 }
 
 static void
-garrow_count_options_function_options_interface_init(
+garrow_scalar_aggregate_options_function_options_interface_init(
   GArrowFunctionOptionsInterface *iface)
 {
-  iface->get_raw = garrow_count_options_get_raw_function_options;
+  iface->get_raw = garrow_scalar_aggregate_options_get_raw_function_options;
 }
 
-G_DEFINE_TYPE_WITH_CODE(GArrowCountOptions,
-                        garrow_count_options,
+G_DEFINE_TYPE_WITH_CODE(GArrowScalarAggregateOptions,
+                        garrow_scalar_aggregate_options,
                         G_TYPE_OBJECT,
-                        G_ADD_PRIVATE(GArrowCountOptions)
+                        G_ADD_PRIVATE(GArrowScalarAggregateOptions)
                         G_IMPLEMENT_INTERFACE(
                           GARROW_TYPE_FUNCTION_OPTIONS,
-                          garrow_count_options_function_options_interface_init))
+                          garrow_scalar_aggregate_options_function_options_interface_init))
 
-#define GARROW_COUNT_OPTIONS_GET_PRIVATE(object)        \
-  static_cast<GArrowCountOptionsPrivate *>(             \
-    garrow_count_options_get_instance_private(          \
-      GARROW_COUNT_OPTIONS(object)))
+#define GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object)        \
+  static_cast<GArrowScalarAggregateOptionsPrivate *>(              \
+    garrow_scalar_aggregate_options_get_instance_private(          \
+      GARROW_SCALAR_AGGREGATE_OPTIONS(object)))
 
 static void
-garrow_count_options_finalize(GObject *object)
+garrow_scalar_aggregate_options_finalize(GObject *object)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
-  priv->options.~CountOptions();
-  G_OBJECT_CLASS(garrow_count_options_parent_class)->finalize(object);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
+  priv->options.~ScalarAggregateOptions();
+  G_OBJECT_CLASS(garrow_scalar_aggregate_options_parent_class)->finalize(object);
 }
 
 static void
-garrow_count_options_set_property(GObject *object,
-                                  guint prop_id,
-                                  const GValue *value,
-                                  GParamSpec *pspec)
+garrow_scalar_aggregate_options_set_property(GObject *object,
+                                             guint prop_id,
+                                             const GValue *value,
+                                             GParamSpec *pspec)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_MODE:
-    priv->options.count_mode =
-      static_cast<arrow::compute::CountOptions::Mode>(g_value_get_enum(value));
+  case PROP_SKIP_NULLS:
+    priv->options.skip_nulls = g_value_get_boolean(value);
+    break;
+  case PROP_MIN_COUNT:
+    priv->options.min_count = g_value_get_uint(value);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -698,16 +704,19 @@ garrow_count_options_set_property(GObject *object,
 }
 
 static void
-garrow_count_options_get_property(GObject *object,
-                                 guint prop_id,
-                                 GValue *value,
-                                 GParamSpec *pspec)
+garrow_scalar_aggregate_options_get_property(GObject *object,
+                                             guint prop_id,
+                                             GValue *value,
+                                             GParamSpec *pspec)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_MODE:
-    g_value_set_enum(value, priv->options.count_mode);
+  case PROP_SKIP_NULLS:
+    g_value_set_boolean(value, priv->options.skip_nulls);
+    break;
+  case PROP_MIN_COUNT:
+    g_value_set_uint(value, priv->options.min_count);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -716,51 +725,69 @@ garrow_count_options_get_property(GObject *object,
 }
 
 static void
-garrow_count_options_init(GArrowCountOptions *object)
+garrow_scalar_aggregate_options_init(GArrowScalarAggregateOptions *object)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::compute::CountOptions(
-    arrow::compute::CountOptions::COUNT_NON_NULL);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(object);
+  new(&priv->options) arrow::compute::ScalarAggregateOptions();
 }
 
 static void
-garrow_count_options_class_init(GArrowCountOptionsClass *klass)
+garrow_scalar_aggregate_options_class_init(
+  GArrowScalarAggregateOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_count_options_finalize;
-  gobject_class->set_property = garrow_count_options_set_property;
-  gobject_class->get_property = garrow_count_options_get_property;
+  gobject_class->finalize     = garrow_scalar_aggregate_options_finalize;
+  gobject_class->set_property = garrow_scalar_aggregate_options_set_property;
+  gobject_class->get_property = garrow_scalar_aggregate_options_get_property;
+
+  auto options = arrow::compute::ScalarAggregateOptions::Defaults();
 
   GParamSpec *spec;
   /**
-   * GArrowCountOptions:mode:
+   * GArrowScalarAggregateOptions:skip-nulls:
    *
-   * How to count values.
+   * Whether NULLs are skipped or not.
    *
-   * Since: 0.13.0
+   * Since: 5.0.0
    */
-  spec = g_param_spec_enum("mode",
-                           "Mode",
-                           "How to count values",
-                           GARROW_TYPE_COUNT_MODE,
-                           GARROW_COUNT_ALL,
+  spec = g_param_spec_boolean("skip-nulls",
+                              "Skip NULLs",
+                              "Whether NULLs are skipped or not",
+                              options.skip_nulls,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_SKIP_NULLS, spec);
+
+  /**
+   * GArrowScalarAggregateOptions:min-count:
+   *
+   * The minimum required number of values.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_uint("min-count",
+                           "Min count",
+                           "The minimum required number of values",
+                           0,
+                           G_MAXUINT,
+                           options.min_count,
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_MODE, spec);
+  g_object_class_install_property(gobject_class, PROP_MIN_COUNT, spec);
 }
 
 /**
- * garrow_count_options_new:
+ * garrow_scalar_aggregate_options_new:
  *
- * Returns: A newly created #GArrowCountOptions.
+ * Returns: A newly created #GArrowScalarAggregateOptions.
  *
- * Since: 0.13.0
+ * Since: 5.0.0
  */
-GArrowCountOptions *
-garrow_count_options_new(void)
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new(void)
 {
-  auto count_options = g_object_new(GARROW_TYPE_COUNT_OPTIONS, NULL);
-  return GARROW_COUNT_OPTIONS(count_options);
+  auto scalar_aggregate_options =
+    g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS, NULL);
+  return GARROW_SCALAR_AGGREGATE_OPTIONS(scalar_aggregate_options);
 }
 
 
@@ -863,14 +890,14 @@ garrow_filter_options_class_init(GArrowFilterOptionsClass *klass)
 
   GParamSpec *spec;
   /**
-   * GArrowFilterOptions:null_selection_behavior:
+   * GArrowFilterOptions:null-selection-behavior:
    *
    * How to handle filtered values.
    *
    * Since: 0.17.0
    */
-  spec = g_param_spec_enum("null_selection_behavior",
-                           "Null selection behavior",
+  spec = g_param_spec_enum("null-selection-behavior",
+                           "NULL selection behavior",
                            "How to handle filtered values",
                            GARROW_TYPE_FILTER_NULL_SELECTION_BEHAVIOR,
                            static_cast<GArrowFilterNullSelectionBehavior>(
@@ -1682,7 +1709,7 @@ garrow_array_dictionary_encode(GArrowArray *array,
 /**
  * garrow_array_count:
  * @array: A #GArrowArray.
- * @options: (nullable): A #GArrowCountOptions.
+ * @options: (nullable): A #GArrowScalarAggregateOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: The number of target values on success. If an error is occurred,
@@ -1692,14 +1719,14 @@ garrow_array_dictionary_encode(GArrowArray *array,
  */
 gint64
 garrow_array_count(GArrowArray *array,
-                   GArrowCountOptions *options,
+                   GArrowScalarAggregateOptions *options,
                    GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_array_raw = arrow_array.get();
   arrow::Result<arrow::Datum> arrow_counted_datum;
   if (options) {
-    auto arrow_options = garrow_count_options_get_raw(options);
+    auto arrow_options = garrow_scalar_aggregate_options_get_raw(options);
     arrow_counted_datum =
       arrow::compute::Count(*arrow_array_raw, *arrow_options);
   } else {
@@ -3059,20 +3086,23 @@ garrow_cast_options_get_raw(GArrowCastOptions *cast_options)
   return &(priv->options);
 }
 
-GArrowCountOptions *
-garrow_count_options_new_raw(arrow::compute::CountOptions *arrow_count_options)
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new_raw(
+  arrow::compute::ScalarAggregateOptions *arrow_scalar_aggregate_options)
 {
-  auto count_options =
-    g_object_new(GARROW_TYPE_COUNT_OPTIONS,
-                 "mode", arrow_count_options->count_mode,
+  auto scalar_aggregate_options =
+    g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS,
+                 "skip-nulls", arrow_scalar_aggregate_options->skip_nulls,
+                 "min-count", arrow_scalar_aggregate_options->min_count,
                  NULL);
-  return GARROW_COUNT_OPTIONS(count_options);
+  return GARROW_SCALAR_AGGREGATE_OPTIONS(scalar_aggregate_options);
 }
 
-arrow::compute::CountOptions *
-garrow_count_options_get_raw(GArrowCountOptions *count_options)
+arrow::compute::ScalarAggregateOptions *
+garrow_scalar_aggregate_options_get_raw(
+  GArrowScalarAggregateOptions *scalar_aggregate_options)
 {
-  auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(count_options);
+  auto priv = GARROW_SCALAR_AGGREGATE_OPTIONS_GET_PRIVATE(scalar_aggregate_options);
   return &(priv->options);
 }
 
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 63ba6e0eae5..a9e57945ba5 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -83,32 +83,20 @@ struct _GArrowCastOptionsClass
 GArrowCastOptions *garrow_cast_options_new(void);
 
 
-/**
- * GArrowCountMode:
- * @GARROW_COUNT_ALL: Count all non-null values.
- * @GARROW_COUNT_NULL: Count all null values.
- *
- * They are corresponding to `arrow::compute::CountOptions::Mode` values.
- */
-typedef enum {
-  GARROW_COUNT_ALL,
-  GARROW_COUNT_NULL,
-} GArrowCountMode;
-
-#define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCountOptions,
-                         garrow_count_options,
+#define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS (garrow_scalar_aggregate_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions,
+                         garrow_scalar_aggregate_options,
                          GARROW,
-                         COUNT_OPTIONS,
+                         SCALAR_AGGREGATE_OPTIONS,
                          GObject)
-struct _GArrowCountOptionsClass
+struct _GArrowScalarAggregateOptionsClass
 {
   GObjectClass parent_class;
 };
 
-GARROW_AVAILABLE_IN_0_13
-GArrowCountOptions *
-garrow_count_options_new(void);
+GARROW_AVAILABLE_IN_5_0
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new(void);
 
 
 /**
@@ -290,7 +278,7 @@ GArrowDictionaryArray *garrow_array_dictionary_encode(GArrowArray *array,
                                                       GError **error);
 GARROW_AVAILABLE_IN_0_13
 gint64 garrow_array_count(GArrowArray *array,
-                          GArrowCountOptions *options,
+                          GArrowScalarAggregateOptions *options,
                           GError **error);
 GARROW_AVAILABLE_IN_0_13
 GArrowStructArray *garrow_array_count_values(GArrowArray *array,
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 1bc6fefdd40..289bcbe31af 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -46,10 +46,12 @@ garrow_function_get_raw(GArrowFunction *function);
 GArrowCastOptions *garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_cast_options);
 arrow::compute::CastOptions *garrow_cast_options_get_raw(GArrowCastOptions *cast_options);
 
-GArrowCountOptions *
-garrow_count_options_new_raw(arrow::compute::CountOptions *arrow_count_options);
-arrow::compute::CountOptions *
-garrow_count_options_get_raw(GArrowCountOptions *count_options);
+GArrowScalarAggregateOptions *
+garrow_scalar_aggregate_options_new_raw(
+  arrow::compute::ScalarAggregateOptions *arrow_scalar_aggregate_options);
+arrow::compute::ScalarAggregateOptions *
+garrow_scalar_aggregate_options_get_raw(
+  GArrowScalarAggregateOptions *scalar_aggregate_options);
 
 arrow::compute::FilterOptions *
 garrow_filter_options_get_raw(GArrowFilterOptions *filter_options);
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index 5a74566fd4a..3dc2f702c5e 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -110,6 +110,15 @@
 #  define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
 #endif
 
+/**
+ * GARROW_VERSION_5_0:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 5.0.0
+ */
+#define GARROW_VERSION_5_0 G_ENCODE_VERSION(5, 0)
+
 /**
  * GARROW_VERSION_4_0:
  *
@@ -256,6 +265,20 @@
 
 #define GARROW_AVAILABLE_IN_ALL
 
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0
+#  define GARROW_DEPRECATED_IN_5_0                GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_5_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_5_0
+#  define GARROW_DEPRECATED_IN_5_0_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_5_0
+#  define GARROW_AVAILABLE_IN_5_0 GARROW_UNAVAILABLE(5, 0)
+#else
+#  define GARROW_AVAILABLE_IN_5_0
+#endif
+
 #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_4_0
 #  define GARROW_DEPRECATED_IN_4_0                GARROW_DEPRECATED
 #  define GARROW_DEPRECATED_IN_4_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index 9198b6a13a6..80af9506ecd 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -179,6 +179,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-5-0-0" role="5.0.0">
+    <title>Index of new symbols in 5.0.0</title>
+    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-4-0-0" role="4.0.0">
     <title>Index of new symbols in 4.0.0</title>
     <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/test/test-count.rb b/c_glib/test/test-count.rb
index 36390f880aa..39b6f06c4e6 100644
--- a/c_glib/test/test-count.rb
+++ b/c_glib/test/test-count.rb
@@ -19,27 +19,14 @@ class TestCount < Test::Unit::TestCase
   include Helper::Buildable
   include Helper::Omittable
 
-  sub_test_case("CountOptions") do
-    def test_default_mode
-      assert_equal(Arrow::CountMode::ALL,
-                   Arrow::CountOptions.new.mode)
-    end
-  end
-
-  sub_test_case("mode") do
+  sub_test_case("skip_nulls") do
     def test_default
       assert_equal(2, build_int32_array([1, nil, 3]).count)
     end
 
-    def test_all
-      options = Arrow::CountOptions.new
-      options.mode = :all
-      assert_equal(2, build_int32_array([1, nil, 3]).count(options))
-    end
-
-    def test_null
-      options = Arrow::CountOptions.new
-      options.mode = :null
+    def test_false
+      options = Arrow::ScalarAggregateOptions.new
+      options.skip_nulls = false
       assert_equal(1, build_int32_array([1, nil, 3]).count(options))
     end
   end
diff --git a/c_glib/test/test-scalar-aggregate-options.rb b/c_glib/test/test-scalar-aggregate-options.rb
new file mode 100644
index 00000000000..a794b53243a
--- /dev/null
+++ b/c_glib/test/test-scalar-aggregate-options.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestScalarAggregateOptions < Test::Unit::TestCase
+  def setup
+    @options = Arrow::ScalarAggregateOptions.new
+  end
+
+  sub_test_case("skip_nulls") do
+    def test_default
+      assert do
+        @options.skip_nulls?
+      end
+    end
+
+    def test_accessor
+      @options.skip_nulls = false
+      assert do
+        not @options.skip_nulls?
+      end
+    end
+  end
+
+  sub_test_case("min_count") do
+    def test_default
+      assert_equal(1, @options.min_count)
+    end
+
+    def test_accessor
+      @options.min_count = 0
+      assert_equal(0, @options.min_count)
+    end
+  end
+end
diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index 5afa1048960..dca54a0faba 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -25,19 +25,23 @@ namespace compute {
 // ----------------------------------------------------------------------
 // Scalar aggregates
 
-Result<Datum> Count(const Datum& value, CountOptions options, ExecContext* ctx) {
+Result<Datum> Count(const Datum& value, ScalarAggregateOptions options,
+                    ExecContext* ctx) {
   return CallFunction("count", {value}, &options, ctx);
 }
 
-Result<Datum> Mean(const Datum& value, ExecContext* ctx) {
-  return CallFunction("mean", {value}, ctx);
+Result<Datum> Mean(const Datum& value, const ScalarAggregateOptions& options,
+                   ExecContext* ctx) {
+  return CallFunction("mean", {value}, &options, ctx);
 }
 
-Result<Datum> Sum(const Datum& value, ExecContext* ctx) {
-  return CallFunction("sum", {value}, ctx);
+Result<Datum> Sum(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("sum", {value}, &options, ctx);
 }
 
-Result<Datum> MinMax(const Datum& value, const MinMaxOptions& options, ExecContext* ctx) {
+Result<Datum> MinMax(const Datum& value, const ScalarAggregateOptions& options,
+                     ExecContext* ctx) {
   return CallFunction("min_max", {value}, &options, ctx);
 }
 
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index ca118ec5678..a7ceb2ac2fd 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -40,40 +40,17 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-/// \brief Control Count kernel behavior
-///
-/// By default, all non-null values are counted.
-struct ARROW_EXPORT CountOptions : public FunctionOptions {
-  enum Mode {
-    /// Count all non-null values.
-    COUNT_NON_NULL = 0,
-    /// Count all null values.
-    COUNT_NULL,
-  };
-
-  explicit CountOptions(enum Mode count_mode = COUNT_NON_NULL) : count_mode(count_mode) {}
-
-  static CountOptions Defaults() { return CountOptions(COUNT_NON_NULL); }
-
-  enum Mode count_mode;
-};
-
-/// \brief Control MinMax kernel behavior
+/// \brief Control general scalar aggregate kernel behavior
 ///
 /// By default, null values are ignored
-struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
-  enum Mode {
-    /// Skip null values
-    SKIP = 0,
-    /// Any nulls will result in null output
-    EMIT_NULL
-  };
+struct ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
+  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1)
+      : skip_nulls(skip_nulls), min_count(min_count) {}
 
-  explicit MinMaxOptions(enum Mode null_handling = SKIP) : null_handling(null_handling) {}
+  static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
 
-  static MinMaxOptions Defaults() { return MinMaxOptions{}; }
-
-  enum Mode null_handling;
+  bool skip_nulls;
+  uint32_t min_count;
 };
 
 /// \brief Control Mode kernel behavior
@@ -153,7 +130,7 @@ struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
 
 /// \brief Count non-null (or null) values in an array.
 ///
-/// \param[in] options counting options, see CountOptions for more information
+/// \param[in] options counting options, see ScalarAggregateOptions for more information
 /// \param[in] datum to count
 /// \param[in] ctx the function execution context, optional
 /// \return out resulting datum
@@ -161,30 +138,39 @@ struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Count(const Datum& datum, CountOptions options = CountOptions::Defaults(),
+Result<Datum> Count(const Datum& datum,
+                    ScalarAggregateOptions options = ScalarAggregateOptions::Defaults(),
                     ExecContext* ctx = NULLPTR);
 
 /// \brief Compute the mean of a numeric array.
 ///
 /// \param[in] value datum to compute the mean, expecting Array
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return datum of the computed mean as a DoubleScalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Mean(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> Mean(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Sum values of a numeric array.
 ///
 /// \param[in] value datum to sum, expecting Array or ChunkedArray
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return datum of the computed sum as a Scalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Sum(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> Sum(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Calculate the min / max of a numeric array
 ///
@@ -192,16 +178,17 @@ Result<Datum> Sum(const Datum& value, ExecContext* ctx = NULLPTR);
 /// struct<min: T, max: T>, where T is the input type
 ///
 /// \param[in] value input datum, expecting Array or ChunkedArray
-/// \param[in] options see MinMaxOptions for more information
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a struct<min: T, max: T> scalar
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> MinMax(const Datum& value,
-                     const MinMaxOptions& options = MinMaxOptions::Defaults(),
-                     ExecContext* ctx = NULLPTR);
+Result<Datum> MinMax(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Test whether any element in a boolean array evaluates to true.
 ///
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index e4eec50c66d..446c1b9fc62 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -56,7 +56,7 @@ namespace aggregate {
 // Count implementation
 
 struct CountImpl : public ScalarAggregator {
-  explicit CountImpl(CountOptions options) : options(std::move(options)) {}
+  explicit CountImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
     const ArrayData& input = *batch[0].array();
@@ -75,20 +75,15 @@ struct CountImpl : public ScalarAggregator {
 
   Status Finalize(KernelContext* ctx, Datum* out) override {
     const auto& state = checked_cast<const CountImpl&>(*ctx->state());
-    switch (state.options.count_mode) {
-      case CountOptions::COUNT_NON_NULL:
-        *out = Datum(state.non_nulls);
-        break;
-      case CountOptions::COUNT_NULL:
-        *out = Datum(state.nulls);
-        break;
-      default:
-        return Status::Invalid("Unknown CountOptions encountered");
+    if (state.options.skip_nulls) {
+      *out = Datum(state.non_nulls);
+    } else {
+      *out = Datum(state.nulls);
     }
     return Status::OK();
   }
 
-  CountOptions options;
+  ScalarAggregateOptions options;
   int64_t non_nulls = 0;
   int64_t nulls = 0;
 };
@@ -96,27 +91,39 @@ struct CountImpl : public ScalarAggregator {
 Result<std::unique_ptr<KernelState>> CountInit(KernelContext*,
                                                const KernelInitArgs& args) {
   return ::arrow::internal::make_unique<CountImpl>(
-      static_cast<const CountOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
 // ----------------------------------------------------------------------
 // Sum implementation
 
 template <typename ArrowType>
-struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {};
+struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
+  explicit SumImplDefault(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 template <typename ArrowType>
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {};
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  explicit MeanImplDefault(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
-  SumLikeInit<SumImplDefault> visitor(ctx, *args.inputs[0].type);
+  SumLikeInit<SumImplDefault> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
-  SumLikeInit<MeanImplDefault> visitor(ctx, *args.inputs[0].type);
+  SumLikeInit<MeanImplDefault> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
@@ -127,7 +134,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::NONE> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const MinMaxOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
@@ -247,23 +254,33 @@ namespace {
 
 const FunctionDoc count_doc{"Count the number of null / non-null values",
                             ("By default, non-null values are counted.\n"
-                             "This can be changed through CountOptions."),
+                             "This can be changed through ScalarAggregateOptions."),
                             {"array"},
-                            "CountOptions"};
-
-const FunctionDoc sum_doc{
-    "Sum values of a numeric array", ("Null values are ignored."), {"array"}};
-
-const FunctionDoc mean_doc{"Compute the mean of a numeric array",
-                           ("Null values are ignored. The result is always computed\n"
-                            "as a double, regardless of the input types"),
-                           {"array"}};
+                            "ScalarAggregateOptions"};
+
+const FunctionDoc sum_doc{"Sum values of a numeric array",
+                          ("Null values are ignored. Minimum count of non-NA\n"
+                           "values can be set and NAN is returned if too "
+                           "few are present.\n"
+                           "This can be changed through ScalarAggregateOptions."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
+
+const FunctionDoc mean_doc{
+    "Compute the mean of a numeric array",
+    ("Null values are ignored by default. Minimum count of non-NA\n"
+     "values can be set and NAN is returned if too few are \n"
+     "present. The result is always computed as a double, \n"
+     "regardless of the input types.\n"
+     "This can be changed through ScalarAggregateOptions."),
+    {"array"},
+    "ScalarAggregateOptions"};
 
 const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numeric array",
                               ("Null values are ignored by default.\n"
-                               "This can be changed through MinMaxOptions."),
+                               "This can be changed through ScalarAggregateOptions."),
                               {"array"},
-                              "MinMaxOptions"};
+                              "ScalarAggregateOptions"};
 
 const FunctionDoc any_doc{"Test whether any element in a boolean array evaluates to true",
                           ("Null values are ignored."),
@@ -276,9 +293,10 @@ const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate
 }  // namespace
 
 void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
-  static auto default_count_options = CountOptions::Defaults();
+  static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+
   auto func = std::make_shared<ScalarAggregateFunction>(
-      "count", Arity::Unary(), &count_doc, &default_count_options);
+      "count", Arity::Unary(), &count_doc, &default_scalar_aggregate_options);
 
   // Takes any array input, outputs int64 scalar
   InputType any_array(ValueDescr::ARRAY);
@@ -286,7 +304,8 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
                aggregate::CountInit, func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc);
+  func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc,
+                                                   &default_scalar_aggregate_options);
   aggregate::AddBasicAggKernels(aggregate::SumInit, {boolean()}, int64(), func.get());
   aggregate::AddBasicAggKernels(aggregate::SumInit, SignedIntTypes(), int64(),
                                 func.get());
@@ -310,7 +329,8 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 #endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc);
+  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc,
+                                                   &default_scalar_aggregate_options);
   aggregate::AddBasicAggKernels(aggregate::MeanInit, {boolean()}, float64(), func.get());
   aggregate::AddBasicAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
                                 func.get());
@@ -327,9 +347,8 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 #endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  static auto default_minmax_options = MinMaxOptions::Defaults();
-  func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(),
-                                                   &min_max_doc, &default_minmax_options);
+  func = std::make_shared<ScalarAggregateFunction>(
+      "min_max", Arity::Unary(), &min_max_doc, &default_scalar_aggregate_options);
   aggregate::AddMinMaxKernels(aggregate::MinMaxInit, {boolean()}, func.get());
   aggregate::AddMinMaxKernels(aggregate::MinMaxInit, NumericTypes(), func.get());
   // Add the SIMD variants for min max
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index a70363aab9b..8d3e5a0409d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -25,20 +25,32 @@ namespace aggregate {
 // Sum implementation
 
 template <typename ArrowType>
-struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {};
+struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {
+  explicit SumImplAvx2(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 template <typename ArrowType>
-struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {};
+struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
+  explicit MeanImplAvx2(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
-  SumLikeInit<SumImplAvx2> visitor(ctx, *args.inputs[0].type);
+  SumLikeInit<SumImplAvx2> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
-  SumLikeInit<MeanImplAvx2> visitor(ctx, *args.inputs[0].type);
+  SumLikeInit<MeanImplAvx2> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
@@ -49,7 +61,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::AVX2> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const MinMaxOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 1ecbd7041e6..4f8ad74a086 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -25,20 +25,32 @@ namespace aggregate {
 // Sum implementation
 
 template <typename ArrowType>
-struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {};
+struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {
+  explicit SumImplAvx512(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 template <typename ArrowType>
-struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {};
+struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
+  explicit MeanImplAvx512(const ScalarAggregateOptions& options_) {
+    this->options = options_;
+  }
+};
 
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
-  SumLikeInit<SumImplAvx512> visitor(ctx, *args.inputs[0].type);
+  SumLikeInit<SumImplAvx512> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
-  SumLikeInit<MeanImplAvx512> visitor(ctx, *args.inputs[0].type);
+  SumLikeInit<MeanImplAvx512> visitor(
+      ctx, *args.inputs[0].type,
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
@@ -49,7 +61,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
   MinMaxInitState<SimdLevel::AVX512> visitor(
       ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const MinMaxOptions&>(*args.options));
+      static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index f8db180b1e3..86e321ba522 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -78,7 +78,7 @@ struct SumImpl : public ScalarAggregator {
   }
 
   Status Finalize(KernelContext*, Datum* out) override {
-    if (this->count == 0) {
+    if (this->count < options.min_count) {
       out->value = std::make_shared<OutputType>();
     } else {
       out->value = MakeScalar(this->sum);
@@ -88,12 +88,13 @@ struct SumImpl : public ScalarAggregator {
 
   size_t count = 0;
   typename SumType::c_type sum = 0;
+  ScalarAggregateOptions options;
 };
 
 template <typename ArrowType, SimdLevel::type SimdLevel>
 struct MeanImpl : public SumImpl<ArrowType, SimdLevel> {
   Status Finalize(KernelContext*, Datum* out) override {
-    if (this->count == 0) {
+    if (this->count < options.min_count) {
       out->value = std::make_shared<DoubleScalar>();
     } else {
       const double mean = static_cast<double>(this->sum) / this->count;
@@ -101,6 +102,7 @@ struct MeanImpl : public SumImpl<ArrowType, SimdLevel> {
     }
     return Status::OK();
   }
+  ScalarAggregateOptions options;
 };
 
 template <template <typename> class KernelClass>
@@ -108,8 +110,11 @@ struct SumLikeInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
   const DataType& type;
+  const ScalarAggregateOptions& options;
 
-  SumLikeInit(KernelContext* ctx, const DataType& type) : ctx(ctx), type(type) {}
+  SumLikeInit(KernelContext* ctx, const DataType& type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
 
   Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
 
@@ -118,13 +123,13 @@ struct SumLikeInit {
   }
 
   Status Visit(const BooleanType&) {
-    state.reset(new KernelClass<BooleanType>());
+    state.reset(new KernelClass<BooleanType>(options));
     return Status::OK();
   }
 
   template <typename Type>
   enable_if_number<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>());
+    state.reset(new KernelClass<Type>(options));
     return Status::OK();
   }
 
@@ -218,7 +223,8 @@ struct MinMaxImpl : public ScalarAggregator {
   using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
   using StateType = MinMaxState<ArrowType, SimdLevel>;
 
-  MinMaxImpl(const std::shared_ptr<DataType>& out_type, const MinMaxOptions& options)
+  MinMaxImpl(const std::shared_ptr<DataType>& out_type,
+             const ScalarAggregateOptions& options)
       : out_type(out_type), options(options) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
@@ -230,7 +236,7 @@ struct MinMaxImpl : public ScalarAggregator {
     local.has_nulls = null_count > 0;
     local.has_values = (arr.length() - null_count) > 0;
 
-    if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
+    if (local.has_nulls && !options.skip_nulls) {
       this->state = local;
       return Status::OK();
     }
@@ -256,8 +262,7 @@ struct MinMaxImpl : public ScalarAggregator {
     using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
 
     std::vector<std::shared_ptr<Scalar>> values;
-    if (!state.has_values ||
-        (state.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL)) {
+    if (!state.has_values || (state.has_nulls && !options.skip_nulls)) {
       // (null, null)
       values = {std::make_shared<ScalarType>(), std::make_shared<ScalarType>()};
     } else {
@@ -269,7 +274,7 @@ struct MinMaxImpl : public ScalarAggregator {
   }
 
   std::shared_ptr<DataType> out_type;
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   MinMaxState<ArrowType, SimdLevel> state;
 
  private:
@@ -348,7 +353,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
 
     local.has_nulls = null_count > 0;
     local.has_values = valid_count > 0;
-    if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
+    if (local.has_nulls && !options.skip_nulls) {
       this->state = local;
       return Status::OK();
     }
@@ -369,10 +374,11 @@ struct MinMaxInitState {
   KernelContext* ctx;
   const DataType& in_type;
   const std::shared_ptr<DataType>& out_type;
-  const MinMaxOptions& options;
+  const ScalarAggregateOptions& options;
 
   MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type, const MinMaxOptions& options)
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
       : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
 
   Status Visit(const DataType&) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 455a3edaaf8..f1a2863e97d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -104,71 +104,129 @@ static Datum NaiveSum(const Array& array) {
 }
 
 template <typename ArrowType>
-void ValidateSum(const Array& input, Datum expected) {
+void ValidateSum(
+    const Array& input, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   using OutputType = typename FindAccumulatorType<ArrowType>::Type;
 
-  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input));
+  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input, options));
   DatumEqual<OutputType>::EnsureEqual(result, expected);
 }
 
 template <typename ArrowType>
-void ValidateSum(const std::shared_ptr<ChunkedArray>& input, Datum expected) {
+void ValidateSum(const std::shared_ptr<ChunkedArray>& input, Datum expected,
+                 const ScalarAggregateOptions& options) {
   using OutputType = typename FindAccumulatorType<ArrowType>::Type;
 
-  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input));
+  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input, options));
   DatumEqual<OutputType>::EnsureEqual(result, expected);
 }
 
 template <typename ArrowType>
-void ValidateSum(const char* json, Datum expected) {
+void ValidateSum(
+    const char* json, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateSum<ArrowType>(*array, expected);
+  ValidateSum<ArrowType>(*array, expected, options);
 }
 
 template <typename ArrowType>
-void ValidateSum(const std::vector<std::string>& json, Datum expected) {
+void ValidateSum(
+    const std::vector<std::string>& json, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ChunkedArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateSum<ArrowType>(array, expected);
+  ValidateSum<ArrowType>(array, expected, options);
 }
 
 template <typename ArrowType>
-void ValidateSum(const Array& array) {
-  ValidateSum<ArrowType>(array, NaiveSum<ArrowType>(array));
+void ValidateSum(const Array& array, const ScalarAggregateOptions& options =
+                                         ScalarAggregateOptions::Defaults()) {
+  ValidateSum<ArrowType>(array, NaiveSum<ArrowType>(array), options);
 }
 
-using UnaryOp = Result<Datum>(const Datum&, ExecContext*);
+using UnaryOp = Result<Datum>(const Datum&, const ScalarAggregateOptions&, ExecContext*);
 
-template <UnaryOp& Op, typename ScalarType>
+template <UnaryOp& Op, typename ScalarAggregateOptions, typename ScalarType>
 void ValidateBooleanAgg(const std::string& json,
-                        const std::shared_ptr<ScalarType>& expected) {
+                        const std::shared_ptr<ScalarType>& expected,
+                        const ScalarAggregateOptions& options) {
   auto array = ArrayFromJSON(boolean(), json);
-  auto exp = Datum(expected);
-  ASSERT_OK_AND_ASSIGN(Datum result, Op(array, nullptr));
-  ASSERT_TRUE(result.Equals(exp));
+  ASSERT_OK_AND_ASSIGN(Datum result, Op(array, options, nullptr));
+
+  const auto& exp = Datum(expected);
+  const auto& res = checked_pointer_cast<ScalarType>(result.scalar());
+  if (!(std::isnan((double)res->value) && std::isnan((double)expected->value))) {
+    ASSERT_TRUE(result.Equals(exp));
+  }
 }
 
 TEST(TestBooleanAggregation, Sum) {
-  ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>());
-  ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>());
-  ValidateBooleanAgg<Sum>("[null, false]", std::make_shared<UInt64Scalar>(0));
-  ValidateBooleanAgg<Sum>("[true]", std::make_shared<UInt64Scalar>(1));
-  ValidateBooleanAgg<Sum>("[true, false, true]", std::make_shared<UInt64Scalar>(2));
+  const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults();
+  ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>(), options);
+  ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>(), options);
+  ValidateBooleanAgg<Sum>("[null, false]", std::make_shared<UInt64Scalar>(0), options);
+  ValidateBooleanAgg<Sum>("[true]", std::make_shared<UInt64Scalar>(1), options);
+  ValidateBooleanAgg<Sum>("[true, false, true]", std::make_shared<UInt64Scalar>(2),
+                          options);
   ValidateBooleanAgg<Sum>("[true, false, true, true, null]",
-                          std::make_shared<UInt64Scalar>(3));
+                          std::make_shared<UInt64Scalar>(3), options);
+
+  const ScalarAggregateOptions& options_min_count_zero =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  ValidateBooleanAgg<Sum>("[]", std::make_shared<UInt64Scalar>(0),
+                          options_min_count_zero);
+  ValidateBooleanAgg<Sum>("[null]", std::make_shared<UInt64Scalar>(0),
+                          options_min_count_zero);
+
+  const char* json = "[true, null, false, null]";
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(),
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(1),
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2));
+  ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(),
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
 }
 
 TEST(TestBooleanAggregation, Mean) {
-  ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>());
-  ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>());
-  ValidateBooleanAgg<Mean>("[null, false]", std::make_shared<DoubleScalar>(0));
-  ValidateBooleanAgg<Mean>("[true]", std::make_shared<DoubleScalar>(1));
+  const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults();
+  ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>(), options);
+  ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>(), options);
+  ValidateBooleanAgg<Mean>("[null, false]", std::make_shared<DoubleScalar>(0), options);
+  ValidateBooleanAgg<Mean>("[true]", std::make_shared<DoubleScalar>(1), options);
   ValidateBooleanAgg<Mean>("[true, false, true, false]",
-                           std::make_shared<DoubleScalar>(0.5));
-  ValidateBooleanAgg<Mean>("[true, null]", std::make_shared<DoubleScalar>(1));
+                           std::make_shared<DoubleScalar>(0.5), options);
+  ValidateBooleanAgg<Mean>("[true, null]", std::make_shared<DoubleScalar>(1), options);
   ValidateBooleanAgg<Mean>("[true, null, false, true, true]",
-                           std::make_shared<DoubleScalar>(0.75));
+                           std::make_shared<DoubleScalar>(0.75), options);
   ValidateBooleanAgg<Mean>("[true, null, false, false, false]",
-                           std::make_shared<DoubleScalar>(0.25));
+                           std::make_shared<DoubleScalar>(0.25), options);
+
+  const ScalarAggregateOptions& options_min_count_zero =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  ValidateBooleanAgg<Mean>("[]", std::make_shared<DoubleScalar>(NAN),
+                           options_min_count_zero);
+  ValidateBooleanAgg<Mean>("[null]", std::make_shared<DoubleScalar>(NAN),
+                           options_min_count_zero);
+
+  const char* json = "[true, null, false, null]";
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(),
+                           ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(0.5),
+                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2));
+  ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(),
+                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
 }
 
 template <typename ArrowType>
@@ -199,13 +257,56 @@ TYPED_TEST(TestNumericSumKernel, SimpleSum) {
   ValidateSum<TypeParam>(chunks,
                          Datum(std::make_shared<ScalarType>(static_cast<T>(5 * 6 / 2))));
 
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+
+  ValidateSum<TypeParam>("[]", Datum(std::make_shared<ScalarType>(static_cast<T>(0))),
+                         options);
+
+  ValidateSum<TypeParam>("[null]", Datum(std::make_shared<ScalarType>(static_cast<T>(0))),
+                         options);
+
   chunks = {};
-  ValidateSum<TypeParam>(chunks,
-                         Datum(std::make_shared<ScalarType>()));  // null
+  ValidateSum<TypeParam>(chunks, Datum(std::make_shared<ScalarType>(static_cast<T>(0))),
+                         options);
 
   const T expected_result = static_cast<T>(14);
   ValidateSum<TypeParam>("[1, null, 3, null, 3, null, 7]",
-                         Datum(std::make_shared<ScalarType>(expected_result)));
+                         Datum(std::make_shared<ScalarType>(expected_result)), options);
+}
+
+TYPED_TEST_SUITE(TestNumericSumKernel, NumericArrowTypes);
+TYPED_TEST(TestNumericSumKernel, ScalarAggregateOptions) {
+  using SumType = typename FindAccumulatorType<TypeParam>::Type;
+  using ScalarType = typename TypeTraits<SumType>::ScalarType;
+  using T = typename TypeParam::c_type;
+
+  const T expected_result = static_cast<T>(14);
+  auto null_result = Datum(std::make_shared<ScalarType>());
+  auto zero_result = Datum(std::make_shared<ScalarType>(static_cast<T>(0)));
+  auto result = Datum(std::make_shared<ScalarType>(expected_result));
+  const char* json = "[1, null, 3, null, 3, null, 7]";
+
+  ValidateSum<TypeParam>("[]", zero_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateSum<TypeParam>("[null]", zero_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/4));
+  ValidateSum<TypeParam>(json, null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/5));
+  ValidateSum<TypeParam>("[]", null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateSum<TypeParam>("[null]", null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+  ValidateSum<TypeParam>(json, result,
+                         ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/4));
+  ValidateSum<TypeParam>(json, null_result,
+                         ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/5));
 }
 
 template <typename ArrowType>
@@ -214,13 +315,15 @@ class TestRandomNumericSumKernel : public ::testing::Test {};
 TYPED_TEST_SUITE(TestRandomNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericSumKernel, RandomArraySum) {
   auto rand = random::RandomArrayGenerator(0x5487655);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
     for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
-        ValidateSum<TypeParam>(*array);
+        ValidateSum<TypeParam>(*array, options);
       }
     }
   }
@@ -240,12 +343,14 @@ TYPED_TEST(TestRandomNumericSumKernel, RandomArraySumOverflow) {
   int64_t length = 1024;
 
   auto rand = random::RandomArrayGenerator(0x5487655);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
     // Test overflow on the original type
     auto array = rand.Numeric<TypeParam>(length, max - 200, max - 100, null_probability);
-    ValidateSum<TypeParam>(*array);
+    ValidateSum<TypeParam>(*array, options);
     array = rand.Numeric<TypeParam>(length, min + 100, min + 200, null_probability);
-    ValidateSum<TypeParam>(*array);
+    ValidateSum<TypeParam>(*array, options);
   }
 }
 
@@ -309,8 +414,8 @@ static CountPair NaiveCount(const Array& array) {
 }
 
 void ValidateCount(const Array& input, CountPair expected) {
-  CountOptions all = CountOptions(CountOptions::COUNT_NON_NULL);
-  CountOptions nulls = CountOptions(CountOptions::COUNT_NULL);
+  ScalarAggregateOptions all = ScalarAggregateOptions(/*skip_nulls=*/true);
+  ScalarAggregateOptions nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
 
   ASSERT_OK_AND_ASSIGN(Datum result, Count(input, all));
   AssertDatumsEqual(result, Datum(expected.first));
@@ -374,22 +479,31 @@ static Datum NaiveMean(const Array& array) {
 }
 
 template <typename ArrowType>
-void ValidateMean(const Array& input, Datum expected) {
+void ValidateMean(const Array& input, Datum expected,
+                  const ScalarAggregateOptions& options) {
   using OutputType = typename FindAccumulatorType<DoubleType>::Type;
 
-  ASSERT_OK_AND_ASSIGN(Datum result, Mean(input));
-  DatumEqual<OutputType>::EnsureEqual(result, expected);
+  ASSERT_OK_AND_ASSIGN(Datum result, Mean(input, options, nullptr));
+  using ScalarType = typename TypeTraits<OutputType>::ScalarType;
+  const auto& res = checked_pointer_cast<ScalarType>(result.scalar());
+  const auto& exp = checked_pointer_cast<ScalarType>(expected.scalar());
+  if (!(std::isnan(res->value) && std::isnan(exp->value))) {
+    DatumEqual<OutputType>::EnsureEqual(result, expected);
+  }
 }
 
 template <typename ArrowType>
-void ValidateMean(const char* json, Datum expected) {
+void ValidateMean(
+    const char* json, Datum expected,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateMean<ArrowType>(*array, expected);
+  ValidateMean<ArrowType>(*array, expected, options);
 }
 
 template <typename ArrowType>
-void ValidateMean(const Array& array) {
-  ValidateMean<ArrowType>(array, NaiveMean<ArrowType>(array));
+void ValidateMean(const Array& array, const ScalarAggregateOptions& options =
+                                          ScalarAggregateOptions::Defaults()) {
+  ValidateMean<ArrowType>(array, NaiveMean<ArrowType>(array), options);
 }
 
 template <typename ArrowType>
@@ -399,6 +513,13 @@ TYPED_TEST_SUITE(TestMeanKernelNumeric, NumericArrowTypes);
 TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;
 
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+
+  ValidateMean<TypeParam>("[]", Datum(std::make_shared<ScalarType>(NAN)), options);
+
+  ValidateMean<TypeParam>("[null]", Datum(std::make_shared<ScalarType>(NAN)), options);
+
   ValidateMean<TypeParam>("[]", Datum(std::make_shared<ScalarType>()));
 
   ValidateMean<TypeParam>("[null]", Datum(std::make_shared<ScalarType>()));
@@ -415,19 +536,64 @@ TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
                           Datum(std::make_shared<ScalarType>(1.0)));
 }
 
+TYPED_TEST_SUITE(TestMeanKernelNumeric, NumericArrowTypes);
+TYPED_TEST(TestMeanKernelNumeric, ScalarAggregateOptions) {
+  using ScalarType = typename TypeTraits<DoubleType>::ScalarType;
+  auto expected_result = Datum(std::make_shared<ScalarType>(2));
+  auto null_result = Datum(std::make_shared<ScalarType>());
+  auto nan_result = Datum(std::make_shared<ScalarType>(NAN));
+  const char* json = "[1, null, 2, 2, null, 7]";
+
+  ValidateMean<TypeParam>("[]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateMean<TypeParam>("[null]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateMean<TypeParam>("[]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateMean<TypeParam>("[null]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/3));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/4));
+  ValidateMean<TypeParam>(json, null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/5));
+
+  ValidateMean<TypeParam>("[]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/0));
+  ValidateMean<TypeParam>("[null]", nan_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/0));
+  ValidateMean<TypeParam>("[]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateMean<TypeParam>("[null]", null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/0));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+  ValidateMean<TypeParam>(json, expected_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/4));
+  ValidateMean<TypeParam>(json, null_result,
+                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/15));
+}
+
 template <typename ArrowType>
 class TestRandomNumericMeanKernel : public ::testing::Test {};
 
 TYPED_TEST_SUITE(TestRandomNumericMeanKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMean) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
     for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
-        ValidateMean<TypeParam>(*array);
+        ValidateMean<TypeParam>(*array, options);
       }
     }
   }
@@ -447,12 +613,14 @@ TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMeanOverflow) {
   int64_t length = 1024;
 
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
     // Test overflow on the original type
     auto array = rand.Numeric<TypeParam>(length, max - 200, max - 100, null_probability);
-    ValidateMean<TypeParam>(*array);
+    ValidateMean<TypeParam>(*array, options);
     array = rand.Numeric<TypeParam>(length, min + 100, min + 200, null_probability);
-    ValidateMean<TypeParam>(*array);
+    ValidateMean<TypeParam>(*array, options);
   }
 }
 
@@ -469,7 +637,7 @@ class TestPrimitiveMinMaxKernel : public ::testing::Test {
 
  public:
   void AssertMinMaxIs(const Datum& array, c_type expected_min, c_type expected_max,
-                      const MinMaxOptions& options) {
+                      const ScalarAggregateOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
     const StructScalar& value = out.scalar_as<StructScalar>();
 
@@ -481,31 +649,32 @@ class TestPrimitiveMinMaxKernel : public ::testing::Test {
   }
 
   void AssertMinMaxIs(const std::string& json, c_type expected_min, c_type expected_max,
-                      const MinMaxOptions& options) {
+                      const ScalarAggregateOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertMinMaxIs(array, expected_min, expected_max, options);
   }
 
   void AssertMinMaxIs(const std::vector<std::string>& json, c_type expected_min,
-                      c_type expected_max, const MinMaxOptions& options) {
+                      c_type expected_max, const ScalarAggregateOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertMinMaxIs(array, expected_min, expected_max, options);
   }
 
-  void AssertMinMaxIsNull(const Datum& array, const MinMaxOptions& options) {
+  void AssertMinMaxIsNull(const Datum& array, const ScalarAggregateOptions& options) {
     ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
     for (const auto& val : out.scalar_as<StructScalar>().value) {
       ASSERT_FALSE(val->is_valid);
     }
   }
 
-  void AssertMinMaxIsNull(const std::string& json, const MinMaxOptions& options) {
+  void AssertMinMaxIsNull(const std::string& json,
+                          const ScalarAggregateOptions& options) {
     auto array = ArrayFromJSON(type_singleton(), json);
     AssertMinMaxIsNull(array, options);
   }
 
   void AssertMinMaxIsNull(const std::vector<std::string>& json,
-                          const MinMaxOptions& options) {
+                          const ScalarAggregateOptions& options) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
     AssertMinMaxIsNull(array, options);
   }
@@ -522,13 +691,14 @@ class TestFloatingMinMaxKernel : public TestPrimitiveMinMaxKernel<ArrowType> {};
 class TestBooleanMinMaxKernel : public TestPrimitiveMinMaxKernel<BooleanType> {};
 
 TEST_F(TestBooleanMinMaxKernel, Basics) {
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   std::vector<std::string> chunked_input0 = {"[]", "[]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[true, null]", "[null, false]"};
 
   // SKIP nulls by default
+  options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIs("[false, false, false]", false, false, options);
@@ -541,7 +711,7 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIs(chunked_input3, false, true, options);
 
-  options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
+  options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
   this->AssertMinMaxIsNull("[false, null, false]", options);
@@ -553,11 +723,15 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  this->AssertMinMaxIsNull("[]", options);
+  this->AssertMinMaxIsNull("[null]", options);
 }
 
 TYPED_TEST_SUITE(TestIntegerMinMaxKernel, IntegralArrowTypes);
 TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};
@@ -571,7 +745,7 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);
 
-  options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
+  options = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   // output null
   this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
@@ -583,7 +757,7 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
 
 TYPED_TEST_SUITE(TestFloatingMinMaxKernel, RealArrowTypes);
 TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
-  MinMaxOptions options;
+  ScalarAggregateOptions options;
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};
@@ -598,7 +772,7 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);
 
-  options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
+  options = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
   // output null
@@ -609,6 +783,14 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   this->AssertMinMaxIsNull(chunked_input1, options);
   this->AssertMinMaxIsNull(chunked_input2, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
+  this->AssertMinMaxIsNull("[]", options);
+  this->AssertMinMaxIsNull("[null]", options);
+
+  options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1);
+  this->AssertMinMaxIsNull("[]", options);
+  this->AssertMinMaxIsNull("[null]", options);
 }
 
 TYPED_TEST(TestFloatingMinMaxKernel, DefaultOptions) {
@@ -616,7 +798,7 @@ TYPED_TEST(TestFloatingMinMaxKernel, DefaultOptions) {
 
   ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("min_max", {values}));
 
-  auto default_options = MinMaxOptions::Defaults();
+  auto default_options = ScalarAggregateOptions::Defaults();
   ASSERT_OK_AND_ASSIGN(auto explicit_defaults,
                        CallFunction("min_max", {values}, &default_options));
 
@@ -711,11 +893,11 @@ static enable_if_floating_point<ArrowType, MinMaxResult<ArrowType>> NaiveMinMax(
 }
 
 template <typename ArrowType>
-void ValidateMinMax(const Array& array) {
+void ValidateMinMax(const Array& array, const ScalarAggregateOptions& options) {
   using Traits = TypeTraits<ArrowType>;
   using ScalarType = typename Traits::ScalarType;
 
-  ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array));
+  ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options));
   const StructScalar& value = out.scalar_as<StructScalar>();
 
   auto expected = NaiveMinMax<ArrowType>(array);
@@ -739,6 +921,8 @@ class TestRandomNumericMinMaxKernel : public ::testing::Test {};
 TYPED_TEST_SUITE(TestRandomNumericMinMaxKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  const ScalarAggregateOptions& options =
+      ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
   // Test size up to 1<<11 (2048).
   for (size_t i = 3; i < 12; i += 2) {
     for (auto null_probability : {0.0, 0.01, 0.1, 0.5, 0.99, 1.0}) {
@@ -746,7 +930,7 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
       auto array = rand.Numeric<TypeParam>(base_length, 0, 100, null_probability);
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
-        ValidateMinMax<TypeParam>(*array->Slice(0, length));
+        ValidateMinMax<TypeParam>(*array->Slice(0, length), options);
       }
     }
   }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 0e5c8ace53f..0b6e9e9b2ea 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -762,7 +762,7 @@ struct GroupedAggregator : KernelState {
 struct GroupedCountImpl : public GroupedAggregator {
   Status Init(ExecContext* ctx, const FunctionOptions* options,
               const std::shared_ptr<DataType>&) override {
-    options_ = checked_cast<const CountOptions&>(*options);
+    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
     counts_ = BufferBuilder(ctx->memory_pool());
     return Status::OK();
   }
@@ -778,7 +778,7 @@ struct GroupedCountImpl : public GroupedAggregator {
 
     const auto& input = batch[0].array();
 
-    if (options_.count_mode == CountOptions::COUNT_NULL) {
+    if (!options_.skip_nulls) {
       if (input->GetNullCount() != 0) {
         for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
           auto g = group_ids[i];
@@ -808,7 +808,7 @@ struct GroupedCountImpl : public GroupedAggregator {
   std::shared_ptr<DataType> out_type() const override { return int64(); }
 
   int64_t num_groups_ = 0;
-  CountOptions options_;
+  ScalarAggregateOptions options_;
   BufferBuilder counts_;
 };
 
@@ -995,7 +995,7 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
 
   Status Init(ExecContext* ctx, const FunctionOptions* options,
               const std::shared_ptr<DataType>& input_type) override {
-    options_ = *checked_cast<const MinMaxOptions*>(options);
+    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
     type_ = input_type;
 
     mins_ = BufferBuilder(ctx->memory_pool());
@@ -1035,7 +1035,7 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
     // aggregation for group is valid if there was at least one value in that group
     ARROW_ASSIGN_OR_RAISE(auto null_bitmap, has_values_.Finish());
 
-    if (options_.null_handling == MinMaxOptions::EMIT_NULL) {
+    if (!options_.skip_nulls) {
       // ... and there were no nulls in that group
       ARROW_ASSIGN_OR_RAISE(auto has_nulls, has_nulls_.Finish());
       arrow::internal::BitmapAndNot(null_bitmap->data(), 0, has_nulls->data(), 0,
@@ -1060,7 +1060,7 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
   std::shared_ptr<DataType> type_;
   ConsumeImpl consume_impl_;
   ResizeImpl resize_min_impl_, resize_max_impl_, resize_bitmap_impl_;
-  MinMaxOptions options_;
+  ScalarAggregateOptions options_;
 };
 
 template <typename Impl>
@@ -1319,9 +1319,9 @@ Result<std::shared_ptr<ListArray>> Grouper::MakeGroupings(const UInt32Array& ids
 namespace {
 const FunctionDoc hash_count_doc{"Count the number of null / non-null values",
                                  ("By default, non-null values are counted.\n"
-                                  "This can be changed through CountOptions."),
+                                  "This can be changed through ScalarAggregateOptions."),
                                  {"array", "group_id_array", "group_count"},
-                                 "CountOptions"};
+                                 "ScalarAggregateOptions"};
 
 const FunctionDoc hash_sum_doc{"Sum values of a numeric array",
                                ("Null values are ignored."),
@@ -1330,16 +1330,17 @@ const FunctionDoc hash_sum_doc{"Sum values of a numeric array",
 const FunctionDoc hash_min_max_doc{
     "Compute the minimum and maximum values of a numeric array",
     ("Null values are ignored by default.\n"
-     "This can be changed through MinMaxOptions."),
+     "This can be changed through ScalarAggregateOptions."),
     {"array", "group_id_array", "group_count"},
-    "MinMaxOptions"};
+    "ScalarAggregateOptions"};
 }  // namespace
 
 void RegisterHashAggregateBasic(FunctionRegistry* registry) {
   {
-    static auto default_count_options = CountOptions::Defaults();
+    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
-        "hash_count", Arity::Ternary(), &hash_count_doc, &default_count_options);
+        "hash_count", Arity::Ternary(), &hash_count_doc,
+        &default_scalar_aggregate_options);
     DCHECK_OK(func->AddKernel(MakeKernel<GroupedCountImpl>(ValueDescr::ARRAY)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
@@ -1352,9 +1353,10 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
   }
 
   {
-    static auto default_minmax_options = MinMaxOptions::Defaults();
+    static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
-        "hash_min_max", Arity::Ternary(), &hash_min_max_doc, &default_minmax_options);
+        "hash_min_max", Arity::Ternary(), &hash_min_max_doc,
+        &default_scalar_aggregate_options);
     DCHECK_OK(func->AddKernel(MakeKernel<GroupedMinMaxImpl>(ValueDescr::ARRAY)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index a0d2fd208a9..8e3278b12be 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -609,7 +609,7 @@ TEST(GroupBy, CountAndSum) {
     [null,  3]
   ])");
 
-  CountOptions count_options;
+  ScalarAggregateOptions count_options;
   ASSERT_OK_AND_ASSIGN(
       Datum aggregated_and_grouped,
       internal::GroupBy(
@@ -701,18 +701,16 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
     [null,  "gama"]
   ])");
 
-  CountOptions count_non_null{CountOptions::COUNT_NON_NULL},
-      count_null{CountOptions::COUNT_NULL};
-
-  MinMaxOptions emit_null{MinMaxOptions::EMIT_NULL};
+  ScalarAggregateOptions keepna{false, 1};
+  ScalarAggregateOptions skipna{true, 1};
 
   using internal::Aggregate;
   for (auto agg : {
            Aggregate{"hash_sum", nullptr},
-           Aggregate{"hash_count", &count_non_null},
-           Aggregate{"hash_count", &count_null},
+           Aggregate{"hash_count", &skipna},
+           Aggregate{"hash_count", &keepna},
            Aggregate{"hash_min_max", nullptr},
-           Aggregate{"hash_min_max", &emit_null},
+           Aggregate{"hash_min_max", &keepna},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
@@ -729,13 +727,12 @@ TEST(GroupBy, CountNull) {
     [3.0, "gama"]
   ])");
 
-  CountOptions count_non_null{CountOptions::COUNT_NON_NULL},
-      count_null{CountOptions::COUNT_NULL};
+  ScalarAggregateOptions keepna{false}, skipna{true};
 
   using internal::Aggregate;
   for (auto agg : {
-           Aggregate{"hash_count", &count_non_null},
-           Aggregate{"hash_count", &count_null},
+           Aggregate{"hash_count", &keepna},
+           Aggregate{"hash_count", &skipna},
        }) {
     SCOPED_TRACE(agg.function);
     ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index f18ed4ea0eb..b20af43ac37 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -82,15 +82,15 @@ Many compute functions are also available directly as concrete APIs, here
 Some functions accept or require an options structure that determines the
 exact semantics of the function::
 
-   MinMaxOptions min_max_options;
-   min_max_options.null_handling = MinMaxOptions::EMIT_NULL;
+   ScalarAggregateOptions scalar_aggregate_options;
+   scalar_aggregate_options.skip_nulls = false;
 
    std::shared_ptr<arrow::Array> array = ...;
    arrow::Datum min_max;
 
    ARROW_ASSIGN_OR_RAISE(min_max,
                          arrow::compute::CallFunction("min_max", {array},
-                                                      &min_max_options));
+                                                      &scalar_aggregate_options));
 
    // Unpack struct scalar result (a two-field {"min", "max"} scalar)
    std::shared_ptr<arrow::Scalar> min_value, max_value;
@@ -190,11 +190,11 @@ Aggregations
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | any                      | Unary      | Boolean            | Scalar Boolean        |                                            |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| count                    | Unary      | Any                | Scalar Int64          | :struct:`CountOptions`                     |
+| count                    | Unary      | Any                | Scalar Int64          | :struct:`ScalarAggregateOptions`           |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| mean                     | Unary      | Numeric            | Scalar Float64        |                                            |
+| mean                     | Unary      | Numeric            | Scalar Float64        | :struct:`ScalarAggregateOptions`           |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| min_max                  | Unary      | Numeric            | Scalar Struct  (1)    | :struct:`MinMaxOptions`                    |
+| min_max                  | Unary      | Numeric            | Scalar Struct  (1)    | :struct:`ScalarAggregateOptions`           |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | mode                     | Unary      | Numeric            | Struct  (2)           | :struct:`ModeOptions`                      |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
@@ -202,7 +202,7 @@ Aggregations
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | stddev                   | Unary      | Numeric            | Scalar Float64        | :struct:`VarianceOptions`                  |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| sum                      | Unary      | Numeric            | Scalar Numeric (4)    |                                            |
+| sum                      | Unary      | Numeric            | Scalar Numeric (4)    | :struct:`ScalarAggregateOptions`           |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | tdigest                  | Unary      | Numeric            | Scalar Float64        | :struct:`TDigestOptions`                   |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 3af485343f2..aea72c457e0 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -824,54 +824,21 @@ class ProjectOptions(_ProjectOptions):
         self._set_options(field_names)
 
 
-cdef class _MinMaxOptions(FunctionOptions):
+cdef class _ScalarAggregateOptions(FunctionOptions):
     cdef:
-        unique_ptr[CMinMaxOptions] min_max_options
+        unique_ptr[CScalarAggregateOptions] scalar_aggregate_options
 
     cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.min_max_options.get()
-
-    def _set_options(self, null_handling):
-        if null_handling == 'skip':
-            self.min_max_options.reset(
-                new CMinMaxOptions(CMinMaxMode_SKIP))
-        elif null_handling == 'emit_null':
-            self.min_max_options.reset(
-                new CMinMaxOptions(CMinMaxMode_EMIT_NULL))
-        else:
-            raise ValueError(
-                '{!r} is not a valid null_handling'
-                .format(null_handling))
-
-
-class MinMaxOptions(_MinMaxOptions):
-    def __init__(self, null_handling='skip'):
-        self._set_options(null_handling)
-
+        return self.scalar_aggregate_options.get()
 
-cdef class _CountOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CCountOptions] count_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.count_options.get()
-
-    def _set_options(self, count_mode):
-        if count_mode == 'count_null':
-            self.count_options.reset(
-                new CCountOptions(CCountMode_COUNT_NULL))
-        elif count_mode == 'count_non_null':
-            self.count_options.reset(
-                new CCountOptions(CCountMode_COUNT_NON_NULL))
-        else:
-            raise ValueError(
-                '{!r} is not a valid count_mode'
-                .format(count_mode))
+    def _set_options(self, skip_nulls, min_count):
+        self.scalar_aggregate_options.reset(
+            new CScalarAggregateOptions(skip_nulls, min_count))
 
 
-class CountOptions(_CountOptions):
-    def __init__(self, count_mode='count_non_null'):
-        self._set_options(count_mode)
+class ScalarAggregateOptions(_ScalarAggregateOptions):
+    def __init__(self, skip_nulls=True, min_count=1):
+        self._set_options(skip_nulls, min_count)
 
 
 cdef class _ModeOptions(FunctionOptions):
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 18d7fee8df0..cb6ba475b5f 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -31,13 +31,12 @@
     # Option classes
     ArraySortOptions,
     CastOptions,
-    CountOptions,
     DictionaryEncodeOptions,
     ExtractRegexOptions,
     FilterOptions,
     MatchSubstringOptions,
-    MinMaxOptions,
     ModeOptions,
+    ScalarAggregateOptions,
     SplitOptions,
     SplitPatternOptions,
     PartitionNthOptions,
@@ -128,7 +127,7 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class):
             options : pyarrow.compute.{0}, optional
                 Parameters altering compute function semantics
             **kwargs : optional
-                Parameters for {0} constructor.  Either `options`
+                Parameters for {0} constructor. Either `options`
                 or `**kwargs` can be passed, but not both at the same time.
             """.format(option_class.__name__))
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 4ac18d1e905..9b05359bdf4 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1872,35 +1872,17 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CVarianceOptions(int ddof)
         int ddof
 
-    enum CMinMaxMode \
-            "arrow::compute::MinMaxOptions::Mode":
-        CMinMaxMode_SKIP \
-            "arrow::compute::MinMaxOptions::SKIP"
-        CMinMaxMode_EMIT_NULL \
-            "arrow::compute::MinMaxOptions::EMIT_NULL"
-
-    cdef cppclass CMinMaxOptions \
-            "arrow::compute::MinMaxOptions"(CFunctionOptions):
-        CMinMaxOptions(CMinMaxMode null_handling)
-        CMinMaxMode null_handling
+    cdef cppclass CScalarAggregateOptions \
+            "arrow::compute::ScalarAggregateOptions"(CFunctionOptions):
+        CScalarAggregateOptions(c_bool skip_nulls, int64_t min_count)
+        c_bool skip_nulls
+        int64_t min_count
 
     cdef cppclass CModeOptions \
             "arrow::compute::ModeOptions"(CFunctionOptions):
         CModeOptions(int64_t n)
         int64_t n
 
-    enum CCountMode \
-            "arrow::compute::CountOptions::Mode":
-        CCountMode_COUNT_NON_NULL \
-            "arrow::compute::CountOptions::COUNT_NON_NULL"
-        CCountMode_COUNT_NULL \
-            "arrow::compute::CountOptions::COUNT_NULL"
-
-    cdef cppclass CCountOptions \
-            "arrow::compute::CountOptions"(CFunctionOptions):
-        CCountOptions(CCountMode count_mode)
-        CCountMode count_mode
-
     cdef cppclass CPartitionNthOptions \
             "arrow::compute::PartitionNthOptions"(CFunctionOptions):
         CPartitionNthOptions(int64_t pivot)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index fc87b2b4a19..db4c6ba1fe5 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -206,6 +206,14 @@ def test_sum_array(arrow_type):
     assert arr.sum().as_py() == 10
     assert pc.sum(arr).as_py() == 10
 
+    arr = pa.array([1, 2, 3, 4, None], type=arrow_type)
+    assert arr.sum().as_py() == 10
+    assert pc.sum(arr).as_py() == 10
+
+    arr = pa.array([None], type=arrow_type)
+    assert arr.sum().as_py() is None  # noqa: E711
+    assert pc.sum(arr).as_py() is None  # noqa: E711
+
     arr = pa.array([], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
 
@@ -361,24 +369,24 @@ def test_min_max():
     data = [4, 5, 6, None, 1]
     s = pc.min_max(data)
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.MinMaxOptions())
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions())
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.MinMaxOptions(null_handling='skip'))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.MinMaxOptions(null_handling='emit_null'))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
     assert s.as_py() == {'min': None, 'max': None}
 
     # Options as dict of kwargs
-    s = pc.min_max(data, options={'null_handling': 'emit_null'})
+    s = pc.min_max(data, options={'skip_nulls': False})
     assert s.as_py() == {'min': None, 'max': None}
     # Options as named functions arguments
-    s = pc.min_max(data, null_handling='emit_null')
+    s = pc.min_max(data, skip_nulls=False)
     assert s.as_py() == {'min': None, 'max': None}
 
     # Both options and named arguments
     with pytest.raises(TypeError):
-        s = pc.min_max(data, options=pc.MinMaxOptions(),
-                       null_handling='emit_null')
+        s = pc.min_max(
+            data, options=pc.ScalarAggregateOptions(), skip_nulls=False)
 
     # Wrong options type
     options = pc.TakeOptions()
@@ -434,7 +442,7 @@ def test_generated_docstrings():
         Compute the minimum and maximum values of a numeric array.
 
         Null values are ignored by default.
-        This can be changed through MinMaxOptions.
+        This can be changed through ScalarAggregateOptions.
 
         Parameters
         ----------
@@ -442,10 +450,10 @@ def test_generated_docstrings():
             Argument to compute function
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
-        options : pyarrow.compute.MinMaxOptions, optional
+        options : pyarrow.compute.ScalarAggregateOptions, optional
             Parameters altering compute function semantics
         **kwargs : optional
-            Parameters for MinMaxOptions constructor.  Either `options`
+            Parameters for ScalarAggregateOptions constructor. Either `options`
             or `**kwargs` can be passed, but not both at the same time.
         """)
     assert pc.add.__doc__ == textwrap.dedent("""\
@@ -1099,11 +1107,11 @@ def test_strptime():
 def test_count():
     arr = pa.array([1, 2, 3, None, None])
     assert pc.count(arr).as_py() == 3
-    assert pc.count(arr, count_mode='count_non_null').as_py() == 3
-    assert pc.count(arr, count_mode='count_null').as_py() == 2
+    assert pc.count(arr, skip_nulls=True).as_py() == 3
+    assert pc.count(arr, skip_nulls=False).as_py() == 2
 
-    with pytest.raises(ValueError, match="'zzz' is not a valid count_mode"):
-        pc.count(arr, count_mode='zzz')
+    with pytest.raises(TypeError, match="an integer is required"):
+        pc.count(arr, min_count='zzz')
 
 
 def test_partition_nth():
diff --git a/r/R/compute.R b/r/R/compute.R
index c3783ba3295..43c3285481c 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -95,10 +95,14 @@ list_compute_functions <- function(pattern = NULL, ...) {
 }
 
 #' @export
-sum.ArrowDatum <- function(..., na.rm = FALSE) scalar_aggregate("sum", ..., na.rm = na.rm)
+sum.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("sum", ..., na.rm = na.rm)
+}
 
 #' @export
-mean.ArrowDatum <- function(..., na.rm = FALSE) scalar_aggregate("mean", ..., na.rm = na.rm)
+mean.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("mean", ..., na.rm = na.rm)
+}
 
 #' @export
 min.ArrowDatum <- function(..., na.rm = FALSE) {
@@ -110,15 +114,22 @@ max.ArrowDatum <- function(..., na.rm = FALSE) {
   scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max")
 }
 
-scalar_aggregate <- function(FUN, ..., na.rm = FALSE) {
+scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) {
   a <- collect_arrays_from_dots(list(...))
-  if (!na.rm && a$null_count > 0 && (FUN %in% c("mean", "sum"))) {
-    # Arrow sum/mean function always drops NAs so handle that here
-    # https://issues.apache.org/jira/browse/ARROW-9054
-    return(Scalar$create(NA_real_))
+  if (!na.rm) {
+    # When not removing null values, we require all values to be not null and 
+    # return null otherwise. We do that by setting minimum count of non-null 
+    # option values to the full array length.
+    na.min_count <- length(a)
+  }
+  if (FUN == "min_max" && na.rm && a$null_count == length(a)) {
+    Array$create(data.frame(min = Inf, max = -Inf))
+    # If na.rm == TRUE and all values in array are NA, R returns
+    # Inf/-Inf, which are type double. Since Arrow is type-stable
+    # and does not do that, we handle this special case here.
+  } else {
+    call_function(FUN, a, options = list(na.rm = na.rm, na.min_count = na.min_count))
   }
-
-  call_function(FUN, a, options = list(na.rm = na.rm))
 }
 
 collect_arrays_from_dots <- function(dots) {
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 0ffe53578c4..b8a5b89d931 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -171,11 +171,12 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return out;
   }
 
-  if (func_name == "min_max") {
-    using Options = arrow::compute::MinMaxOptions;
+  if (func_name == "min_max" || func_name == "sum" || func_name == "mean" ||
+      func_name == "count") {
+    using Options = arrow::compute::ScalarAggregateOptions;
     auto out = std::make_shared<Options>(Options::Defaults());
-    out->null_handling =
-        cpp11::as_cpp<bool>(options["na.rm"]) ? Options::SKIP : Options::EMIT_NULL;
+    out->min_count = cpp11::as_cpp<int>(options["na.min_count"]);
+    out->skip_nulls = cpp11::as_cpp<bool>(options["na.rm"]);
     return out;
   }
 
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 95f958cf9f3..1e9d21b8248 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -205,7 +205,6 @@ test_that("max.ChunkedArray", {
 })
 
 test_that("Edge cases", {
-  skip("ARROW-9054")
   a <- Array$create(NA)
   for (type in c(int32(), float64(), bool())) {
     expect_equal(as.vector(sum(a$cast(type), na.rm = TRUE)), sum(NA, na.rm = TRUE))

From 71808f1a64789eb047ffa79be12a2984a9883334 Mon Sep 17 00:00:00 2001
From: Menno Siekerman <69851993+mennosiek@users.noreply.github.com>
Date: Fri, 21 May 2021 10:46:26 +0900
Subject: [PATCH 277/719] ARROW-12836: [C++] Add support for newer IBM i

Newer versions of IBM i have CMAKE_SYSTEM_PROCESSOR set to powerpc or powerpc64. Hence in the current release ARROW_CPU_FLAG  defaults to "x86" which makes the install fail.

Closes #10365 from mennosiek/master

Lead-authored-by: Menno Siekerman <69851993+mennosiek@users.noreply.github.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/SetupCxxFlags.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 6e259559e42..e1b3c1b95ad 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -28,7 +28,7 @@ if(NOT DEFINED ARROW_CPU_FLAG)
     set(ARROW_CPU_FLAG "armv8")
   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "armv7")
     set(ARROW_CPU_FLAG "armv7")
-  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc")
+  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc|ppc")
     set(ARROW_CPU_FLAG "ppc")
   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
     set(ARROW_CPU_FLAG "s390x")

From 7707d333fb09fd0019a9bf03047147e05e463766 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 21 May 2021 12:24:56 +0900
Subject: [PATCH 278/719] ARROW-12829: [GLib][Ruby] Add support for Apache
 Arrow Flight

Closes #10355 from kou/glib-flight

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/ruby.yml                    |   9 +-
 .travis.yml                                   |   1 -
 c_glib/arrow-dataset-glib/file-format.cpp     |   2 +-
 c_glib/arrow-flight-glib/arrow-flight-glib.h  |  24 ++
 .../arrow-flight-glib/arrow-flight-glib.hpp   |  24 ++
 c_glib/arrow-flight-glib/client.cpp           | 275 +++++++++++++++++
 c_glib/arrow-flight-glib/client.h             |  77 +++++
 c_glib/arrow-flight-glib/client.hpp           |  33 ++
 c_glib/arrow-flight-glib/common.cpp           | 161 ++++++++++
 c_glib/arrow-flight-glib/common.h             |  57 ++++
 c_glib/arrow-flight-glib/common.hpp           |  28 ++
 c_glib/arrow-flight-glib/meson.build          |  82 +++++
 c_glib/arrow-flight-glib/server.cpp           | 282 ++++++++++++++++++
 c_glib/arrow-flight-glib/server.h             |  72 +++++
 c_glib/arrow-flight-glib/server.hpp           |  31 ++
 .../arrow-flight-glib-docs.xml                |  63 ++++
 c_glib/doc/arrow-flight-glib/entities.xml.in  |  24 ++
 c_glib/doc/arrow-flight-glib/meson.build      |  83 ++++++
 c_glib/meson.build                            |  10 +
 c_glib/test/flight/test-client.rb             |  41 +++
 c_glib/test/flight/test-location.rb           |  40 +++
 c_glib/test/flight/test-server-options.rb     |  28 ++
 c_glib/test/helper/flight-server.rb           |  22 ++
 c_glib/test/run-test.rb                       |   8 +
 c_glib/test/run-test.sh                       |   2 +-
 ci/docker/ubuntu-18.04-cpp.dockerfile         |   1 +
 ci/docker/ubuntu-20.04-cpp.dockerfile         |   3 +-
 ci/docker/ubuntu-20.10-cpp.dockerfile         |   3 +-
 cpp/Brewfile                                  |   3 -
 ...nd-c-aresAlt.cmake => Findc-aresAlt.cmake} |   0
 dev/release/01-prepare-test.rb                |  14 +
 dev/release/rat_exclude_files.txt             |   6 +
 dev/release/verify-apt.sh                     |   3 +-
 dev/release/verify-release-candidate.sh       |   5 +-
 dev/release/verify-yum.sh                     |   3 +-
 .../linux-packages/apache-arrow/Rakefile      |  11 +
 .../apache-arrow/apt/ubuntu-focal/Dockerfile  |   1 +
 .../apache-arrow/apt/ubuntu-groovy/Dockerfile |   1 +
 .../apt/ubuntu-hirsute/Dockerfile             |   1 +
 .../apache-arrow/debian/control.in            |  53 ++++
 .../debian/libarrow-flight-glib-dev.install   |   4 +
 .../debian/libarrow-flight-glib-doc.doc-base  |   9 +
 .../debian/libarrow-flight-glib-doc.install   |   1 +
 .../debian/libarrow-flight-glib-doc.links     |   3 +
 .../debian/libarrow-flight-glib500.install    |   1 +
 .../apache-arrow/yum/arrow.spec.in            |  69 ++++-
 .../apache-arrow/yum/centos-8/Dockerfile      |   1 +
 dev/tasks/tasks.yml                           |   9 +
 ruby/red-arrow-cuda/.gitignore                |   1 +
 ruby/red-arrow-dataset/.gitignore             |   1 +
 ruby/red-arrow-flight/.gitignore              |  18 ++
 ruby/red-arrow-flight/Gemfile                 |  24 ++
 ruby/red-arrow-flight/LICENSE.txt             | 202 +++++++++++++
 ruby/red-arrow-flight/NOTICE.txt              |   2 +
 ruby/red-arrow-flight/README.md               |  50 ++++
 ruby/red-arrow-flight/Rakefile                |  41 +++
 .../dependency-check/Rakefile                 |  47 +++
 ruby/red-arrow-flight/lib/arrow-flight.rb     |  29 ++
 .../lib/arrow-flight/call-options.rb          |  35 +++
 .../lib/arrow-flight/client-options.rb        |  35 +++
 .../lib/arrow-flight/loader.rb                |  38 +++
 .../lib/arrow-flight/location.rb              |  31 ++
 .../lib/arrow-flight/server-options.rb        |  41 +++
 .../lib/arrow-flight/version.rb               |  26 ++
 .../red-arrow-flight/red-arrow-flight.gemspec |  52 ++++
 ruby/red-arrow-flight/test/helper.rb          |  22 ++
 ruby/red-arrow-flight/test/helper/server.rb   |  22 ++
 ruby/red-arrow-flight/test/run-test.rb        |  50 ++++
 ruby/red-arrow-flight/test/test-client.rb     |  35 +++
 ruby/red-arrow-flight/test/test-location.rb   |  26 ++
 ruby/red-arrow/.gitignore                     |   1 +
 ruby/red-gandiva/.gitignore                   |   1 +
 ruby/red-parquet/.gitignore                   |   1 +
 ruby/red-plasma/.gitignore                    |   1 +
 74 files changed, 2497 insertions(+), 19 deletions(-)
 create mode 100644 c_glib/arrow-flight-glib/arrow-flight-glib.h
 create mode 100644 c_glib/arrow-flight-glib/arrow-flight-glib.hpp
 create mode 100644 c_glib/arrow-flight-glib/client.cpp
 create mode 100644 c_glib/arrow-flight-glib/client.h
 create mode 100644 c_glib/arrow-flight-glib/client.hpp
 create mode 100644 c_glib/arrow-flight-glib/common.cpp
 create mode 100644 c_glib/arrow-flight-glib/common.h
 create mode 100644 c_glib/arrow-flight-glib/common.hpp
 create mode 100644 c_glib/arrow-flight-glib/meson.build
 create mode 100644 c_glib/arrow-flight-glib/server.cpp
 create mode 100644 c_glib/arrow-flight-glib/server.h
 create mode 100644 c_glib/arrow-flight-glib/server.hpp
 create mode 100644 c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
 create mode 100644 c_glib/doc/arrow-flight-glib/entities.xml.in
 create mode 100644 c_glib/doc/arrow-flight-glib/meson.build
 create mode 100644 c_glib/test/flight/test-client.rb
 create mode 100644 c_glib/test/flight/test-location.rb
 create mode 100644 c_glib/test/flight/test-server-options.rb
 create mode 100644 c_glib/test/helper/flight-server.rb
 rename cpp/cmake_modules/{Find-c-aresAlt.cmake => Findc-aresAlt.cmake} (100%)
 create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
 create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
 create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
 create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
 create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install
 create mode 100644 ruby/red-arrow-flight/.gitignore
 create mode 100644 ruby/red-arrow-flight/Gemfile
 create mode 100644 ruby/red-arrow-flight/LICENSE.txt
 create mode 100644 ruby/red-arrow-flight/NOTICE.txt
 create mode 100644 ruby/red-arrow-flight/README.md
 create mode 100644 ruby/red-arrow-flight/Rakefile
 create mode 100644 ruby/red-arrow-flight/dependency-check/Rakefile
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/call-options.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/client-options.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/loader.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/location.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/server-options.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/version.rb
 create mode 100644 ruby/red-arrow-flight/red-arrow-flight.gemspec
 create mode 100644 ruby/red-arrow-flight/test/helper.rb
 create mode 100644 ruby/red-arrow-flight/test/helper/server.rb
 create mode 100755 ruby/red-arrow-flight/test/run-test.rb
 create mode 100644 ruby/red-arrow-flight/test/test-client.rb
 create mode 100644 ruby/red-arrow-flight/test/test-location.rb

diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 91470e30fd4..20318169740 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -59,7 +59,6 @@ jobs:
       fail-fast: false
       matrix:
         ubuntu:
-          - 18.04
           - 20.04
     env:
       UBUNTU: ${{ matrix.ubuntu }}
@@ -90,7 +89,11 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run ubuntu-ruby
+          archery docker run \
+            -e ARROW_FLIGHT=ON \
+            -e Protobuf_SOURCE=BUNDLED \
+            -e gRPC_SOURCE=BUNDLED \
+            ubuntu-ruby
       - name: Docker Push
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
@@ -106,6 +109,7 @@ jobs:
       fail-fast: false
     env:
       ARROW_BUILD_TESTS: OFF
+      ARROW_FLIGHT: ON
       ARROW_GANDIVA: ON
       ARROW_GLIB_DEVELOPMENT_MODE: true
       ARROW_GLIB_GTK_DOC: true
@@ -185,7 +189,6 @@ jobs:
       ARROW_BUILD_STATIC: OFF
       ARROW_BUILD_TESTS: OFF
       ARROW_BUILD_TYPE: release
-      ARROW_DATASET: ON
       ARROW_FLIGHT: ON
       ARROW_GANDIVA: ON
       ARROW_HDFS: OFF
diff --git a/.travis.yml b/.travis.yml
index 26b4d78fd2d..fd87cf352e9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -97,7 +97,6 @@ jobs:
           -e PARQUET_BUILD_EXAMPLES=OFF
           -e PARQUET_BUILD_EXECUTABLES=OFF
           -e Protobuf_SOURCE=BUNDLED
-          -e cares_SOURCE=BUNDLED
           -e gRPC_SOURCE=BUNDLED
           "
         UBUNTU: "20.04"
diff --git a/c_glib/arrow-dataset-glib/file-format.cpp b/c_glib/arrow-dataset-glib/file-format.cpp
index 7f10c9debbe..89d56058928 100644
--- a/c_glib/arrow-dataset-glib/file-format.cpp
+++ b/c_glib/arrow-dataset-glib/file-format.cpp
@@ -37,7 +37,7 @@ G_BEGIN_DECLS
  *
  * #GADParquetFileFormat is a class for Parquet file format.
  *
- * * Since: 3.0.0
+ * Since: 3.0.0
  */
 
 typedef struct GADFileFormatPrivate_ {
diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.h b/c_glib/arrow-flight-glib/arrow-flight-glib.h
new file mode 100644
index 00000000000..6fc8f43d840
--- /dev/null
+++ b/c_glib/arrow-flight-glib/arrow-flight-glib.h
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/client.h>
+#include <arrow-flight-glib/common.h>
+#include <arrow-flight-glib/server.h>
diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.hpp b/c_glib/arrow-flight-glib/arrow-flight-glib.hpp
new file mode 100644
index 00000000000..11e1fe94d52
--- /dev/null
+++ b/c_glib/arrow-flight-glib/arrow-flight-glib.hpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/client.hpp>
+#include <arrow-flight-glib/common.hpp>
+#include <arrow-flight-glib/server.hpp>
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
new file mode 100644
index 00000000000..468993d798c
--- /dev/null
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+
+#include <arrow-flight-glib/client.hpp>
+#include <arrow-flight-glib/common.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: client
+ * @section_id: client
+ * @title: Client related classes
+ * @include: arrow-flight-glib/arrow-flight-glib.h
+ *
+ * #GAFlightCallOptions is a class for options of each call.
+ *
+ * #GAFlightClientOptions is a class for options of each client.
+ *
+ * #GAFlightClient is a class for Apache Arrow Flight client.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GAFlightCallOptionsPrivate_ {
+  arrow::flight::FlightCallOptions options;
+} GAFlightCallOptionsPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCallOptions,
+                           gaflight_call_options,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(obj)        \
+  static_cast<GAFlightCallOptionsPrivate *>(          \
+    gaflight_call_options_get_instance_private(       \
+      GAFLIGHT_CALL_OPTIONS(obj)))
+
+static void
+gaflight_call_options_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
+
+  priv->options.~FlightCallOptions();
+
+  G_OBJECT_CLASS(gaflight_call_options_parent_class)->finalize(object);
+}
+
+static void
+gaflight_call_options_init(GAFlightCallOptions *object)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
+  new(&priv->options) arrow::flight::FlightCallOptions;
+}
+
+static void
+gaflight_call_options_class_init(GAFlightCallOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_call_options_finalize;
+}
+
+/**
+ * gaflight_call_options_new:
+ *
+ * Returns: The newly created options for a call.
+ *
+ * Since: 5.0.0
+ */
+GAFlightCallOptions *
+gaflight_call_options_new(void)
+{
+  return static_cast<GAFlightCallOptions *>(
+    g_object_new(GAFLIGHT_TYPE_CALL_OPTIONS, NULL));
+}
+
+
+typedef struct GAFlightClientOptionsPrivate_ {
+  arrow::flight::FlightClientOptions options;
+} GAFlightClientOptionsPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClientOptions,
+                           gaflight_client_options,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(obj)        \
+  static_cast<GAFlightClientOptionsPrivate *>(          \
+    gaflight_client_options_get_instance_private(       \
+      GAFLIGHT_CLIENT_OPTIONS(obj)))
+
+static void
+gaflight_client_options_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object);
+
+  priv->options.~FlightClientOptions();
+
+  G_OBJECT_CLASS(gaflight_client_options_parent_class)->finalize(object);
+}
+
+static void
+gaflight_client_options_init(GAFlightClientOptions *object)
+{
+  auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object);
+  new(&(priv->options)) arrow::flight::FlightClientOptions;
+  priv->options = arrow::flight::FlightClientOptions::Defaults();
+}
+
+static void
+gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_client_options_finalize;
+}
+
+/**
+ * gaflight_client_options_new:
+ *
+ * Returns: The newly created options for a client.
+ *
+ * Since: 5.0.0
+ */
+GAFlightClientOptions *
+gaflight_client_options_new(void)
+{
+  return static_cast<GAFlightClientOptions *>(
+    g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
+}
+
+
+typedef struct GAFlightClientPrivate_ {
+  arrow::flight::FlightClient *client;
+} GAFlightClientPrivate;
+
+enum {
+  PROP_CLIENT = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClient,
+                           gaflight_client,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CLIENT_GET_PRIVATE(obj)         \
+  static_cast<GAFlightClientPrivate *>(          \
+    gaflight_client_get_instance_private(        \
+      GAFLIGHT_CLIENT(obj)))
+
+static void
+gaflight_client_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object);
+
+  delete priv->client;
+
+  G_OBJECT_CLASS(gaflight_client_parent_class)->finalize(object);
+}
+
+static void
+gaflight_client_set_property(GObject *object,
+                             guint prop_id,
+                             const GValue *value,
+                             GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CLIENT:
+    priv->client =
+      static_cast<arrow::flight::FlightClient *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_client_init(GAFlightClient *object)
+{
+}
+
+static void
+gaflight_client_class_init(GAFlightClientClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_client_finalize;
+  gobject_class->set_property = gaflight_client_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("client",
+                              "Client",
+                              "The raw arrow::flight::FlightClient *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CLIENT, spec);
+}
+
+/**
+ * gaflight_client_new:
+ * @location: A #GAFlightLocation to be connected.
+ * @options: (nullable): A #GAFlightClientOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): The newly created client, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightClient *
+gaflight_client_new(GAFlightLocation *location,
+                    GAFlightClientOptions *options,
+                    GError **error)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  std::unique_ptr<arrow::flight::FlightClient> flight_client;
+  arrow::Status status;
+  if (options) {
+    const auto flight_options = gaflight_client_options_get_raw(options);
+    status = arrow::flight::FlightClient::Connect(*flight_location,
+                                                  *flight_options,
+                                                  &flight_client);
+  } else {
+    status = arrow::flight::FlightClient::Connect(*flight_location,
+                                                  &flight_client);
+  }
+  if (garrow::check(error, status, "[flight-client][new]")) {
+    return gaflight_client_new_raw(flight_client.release());
+  } else {
+    return NULL;
+  }
+}
+
+
+G_END_DECLS
+
+
+arrow::flight::FlightClientOptions *
+gaflight_client_options_get_raw(GAFlightClientOptions *options)
+{
+  auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(options);
+  return &(priv->options);
+}
+
+arrow::flight::FlightClient *
+gaflight_client_get_raw(GAFlightClient *client)
+{
+  auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(client);
+  return priv->client;
+}
+
+GAFlightClient *
+gaflight_client_new_raw(arrow::flight::FlightClient *flight_client)
+{
+  return GAFLIGHT_CLIENT(g_object_new(GAFLIGHT_TYPE_CLIENT,
+                                      "client", flight_client,
+                                      NULL));
+}
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
new file mode 100644
index 00000000000..92b43f7522a
--- /dev/null
+++ b/c_glib/arrow-flight-glib/client.h
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/common.h>
+
+G_BEGIN_DECLS
+
+
+#define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightCallOptions,
+                         gaflight_call_options,
+                         GAFLIGHT,
+                         CALL_OPTIONS,
+                         GObject)
+struct _GAFlightCallOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightCallOptions *
+gaflight_call_options_new(void);
+
+
+#define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightClientOptions,
+                         gaflight_client_options,
+                         GAFLIGHT,
+                         CLIENT_OPTIONS,
+                         GObject)
+struct _GAFlightClientOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightClientOptions *
+gaflight_client_options_new(void);
+
+
+#define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightClient,
+                         gaflight_client,
+                         GAFLIGHT,
+                         CLIENT,
+                         GObject)
+struct _GAFlightClientClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightClient *
+gaflight_client_new(GAFlightLocation *location,
+                    GAFlightClientOptions *options,
+                    GError **error);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
new file mode 100644
index 00000000000..32d3f77cdb3
--- /dev/null
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/flight/api.h>
+
+#include <arrow-flight-glib/client.h>
+
+
+arrow::flight::FlightClientOptions *
+gaflight_client_options_get_raw(GAFlightClientOptions *options);
+
+arrow::flight::FlightClient *
+gaflight_client_get_raw(GAFlightClient *client);
+GAFlightClient *
+gaflight_client_new_raw(arrow::flight::FlightClient *flight_client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
new file mode 100644
index 00000000000..4c14027167f
--- /dev/null
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+
+#include <arrow-flight-glib/common.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: common
+ * @section_id: common
+ * @title: Classes both for client and server
+ * @include: arrow-flight-glib/arrow-flight-glib.h
+ *
+ * #GAFlightLocation is a class for location.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GAFlightLocationPrivate_ {
+  arrow::flight::Location location;
+} GAFlightLocationPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightLocation,
+                           gaflight_location,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_LOCATION_GET_PRIVATE(obj)            \
+  static_cast<GAFlightLocationPrivate *>(             \
+    gaflight_location_get_instance_private(           \
+      GAFLIGHT_LOCATION(obj)))
+
+static void
+gaflight_location_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object);
+
+  priv->location.~Location();
+
+  G_OBJECT_CLASS(gaflight_location_parent_class)->finalize(object);
+}
+
+static void
+gaflight_location_init(GAFlightLocation *object)
+{
+  auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object);
+  new(&priv->location) arrow::flight::Location;
+}
+
+static void
+gaflight_location_class_init(GAFlightLocationClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_location_finalize;
+}
+
+/**
+ * gaflight_location_new:
+ * @uri: An URI to specify location.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): The newly created location, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightLocation *
+gaflight_location_new(const gchar *uri,
+                      GError **error)
+{
+  auto location = GAFLIGHT_LOCATION(g_object_new(GAFLIGHT_TYPE_LOCATION, NULL));
+  auto flight_location = gaflight_location_get_raw(location);
+  if (garrow::check(error,
+                    arrow::flight::Location::Parse(uri, flight_location),
+                    "[flight-location][new]")) {
+    return location;
+  } else {
+    g_object_unref(location);
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_location_to_string:
+ * @location: A #GAFlightLocation.
+ *
+ * Returns: A representation of this URI as a string.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_location_to_string(GAFlightLocation *location)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  return g_strdup(flight_location->ToString().c_str());
+}
+
+/**
+ * gaflight_location_get_scheme:
+ * @location: A #GAFlightLocation.
+ *
+ * Returns: The scheme of this URI.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_location_get_scheme(GAFlightLocation *location)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  return g_strdup(flight_location->scheme().c_str());
+}
+
+/**
+ * gaflight_location_equal:
+ * @location: A #GAFlightLocation.
+ * @other_location: A #GAFlightLocation to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same URI, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_location_equal(GAFlightLocation *location,
+                        GAFlightLocation *other_location)
+{
+  const auto flight_location = gaflight_location_get_raw(location);
+  const auto flight_other_location = gaflight_location_get_raw(other_location);
+  return flight_location->Equals(*flight_other_location);
+}
+
+
+G_END_DECLS
+
+
+arrow::flight::Location *
+gaflight_location_get_raw(GAFlightLocation *location)
+{
+  auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(location);
+  return &(priv->location);
+}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
new file mode 100644
index 00000000000..f5a641b1e05
--- /dev/null
+++ b/c_glib/arrow-flight-glib/common.h
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/arrow-glib.h>
+
+G_BEGIN_DECLS
+
+
+#define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightLocation,
+                         gaflight_location,
+                         GAFLIGHT,
+                         LOCATION,
+                         GObject)
+struct _GAFlightLocationClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightLocation *
+gaflight_location_new(const gchar *uri,
+                      GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_location_to_string(GAFlightLocation *location);
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_location_get_scheme(GAFlightLocation *location);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_location_equal(GAFlightLocation *location,
+                        GAFlightLocation *other_location);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
new file mode 100644
index 00000000000..2ea06d9f39f
--- /dev/null
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/flight/api.h>
+
+#include <arrow-flight-glib/common.h>
+
+
+arrow::flight::Location *
+gaflight_location_get_raw(GAFlightLocation *location);
diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build
new file mode 100644
index 00000000000..c17415fee3d
--- /dev/null
+++ b/c_glib/arrow-flight-glib/meson.build
@@ -0,0 +1,82 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sources = files(
+  'client.cpp',
+  'common.cpp',
+  'server.cpp',
+)
+
+c_headers = files(
+  'arrow-flight-glib.h',
+  'client.h',
+  'common.h',
+  'server.h',
+)
+
+cpp_headers = files(
+  'arrow-flight-glib.hpp',
+  'client.hpp',
+  'common.hpp',
+  'server.hpp',
+)
+
+headers = c_headers + cpp_headers
+install_headers(headers, subdir: 'arrow-flight-glib')
+
+dependencies = [
+  arrow_flight,
+  arrow_glib,
+]
+libarrow_flight_glib = library('arrow-flight-glib',
+                               sources: sources,
+                               install: true,
+                               dependencies: dependencies,
+                               include_directories: base_include_directories,
+                               soversion: so_version,
+                               version: library_version)
+arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib,
+                                       include_directories: base_include_directories,
+                                       dependencies: dependencies)
+
+pkgconfig.generate(libarrow_flight_glib,
+                   filebase: 'arrow-flight-glib',
+                   name: 'Apache Arrow Flight GLib',
+                   description: 'C API for Apache Arrow Flight based on GLib',
+                   version: version,
+                   requires: ['arrow-glib', 'arrow-flight'])
+
+if have_gi
+  gnome.generate_gir(libarrow_flight_glib,
+                     dependencies: declare_dependency(sources: arrow_glib_gir),
+                     sources: sources + c_headers,
+                     namespace: 'ArrowFlight',
+                     nsversion: api_version,
+                     identifier_prefix: 'GAFlight',
+                     symbol_prefix: 'gaflight',
+                     export_packages: 'arrow-flight-glib',
+                     includes: [
+                       'Arrow-1.0',
+                     ],
+                     install: true,
+                     extra_args: [
+                       '--warn-all',
+                       '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+                     ])
+endif
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
new file mode 100644
index 00000000000..1d2e3a5b10a
--- /dev/null
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+
+#include <arrow-flight-glib/common.hpp>
+#include <arrow-flight-glib/server.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: server
+ * @section_id: server
+ * @title: Server related classes
+ * @include: arrow-flight-glib/arrow-flight-glib.h
+ *
+ * #GAFlightServerOptions is a class for options of each server.
+ *
+ * #GAFlightServer is a class to develop an Apache Arrow Flight server.
+ *
+ * Since: 5.0.0
+ */
+
+
+typedef struct GAFlightServerOptionsPrivate_ {
+  arrow::flight::FlightServerOptions options;
+  GAFlightLocation *location;
+} GAFlightServerOptionsPrivate;
+
+enum {
+  PROP_LOCATION = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerOptions,
+                           gaflight_server_options,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(obj)        \
+  static_cast<GAFlightServerOptionsPrivate *>(          \
+    gaflight_server_options_get_instance_private(       \
+      GAFLIGHT_SERVER_OPTIONS(obj)))
+
+static void
+gaflight_server_options_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  if (priv->location) {
+    g_object_unref(priv->location);
+    priv->location = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_server_options_parent_class)->dispose(object);
+}
+
+static void
+gaflight_server_options_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  priv->options.~FlightServerOptions();
+
+  G_OBJECT_CLASS(gaflight_server_options_parent_class)->finalize(object);
+}
+
+static void
+gaflight_server_options_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_LOCATION:
+    {
+      priv->location = GAFLIGHT_LOCATION(g_value_dup_object(value));
+      auto flight_location = gaflight_location_get_raw(priv->location);
+      new(&(priv->options)) arrow::flight::FlightServerOptions(*flight_location);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_server_options_get_property(GObject *object,
+                                     guint prop_id,
+                                     GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_LOCATION:
+    g_value_set_object(value, priv->location);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_server_options_init(GAFlightServerOptions *object)
+{
+}
+
+static void
+gaflight_server_options_class_init(GAFlightServerOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_server_options_dispose;
+  gobject_class->finalize = gaflight_server_options_finalize;
+  gobject_class->set_property = gaflight_server_options_set_property;
+  gobject_class->get_property = gaflight_server_options_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("location",
+                             "Location",
+                             "The location to be listened",
+                             GAFLIGHT_TYPE_LOCATION,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_LOCATION, spec);
+}
+
+/**
+ * gaflight_server_options_new:
+ * @location: A #GAFlightLocation to be listened.
+ *
+ * Returns: The newly created options for a server.
+ *
+ * Since: 5.0.0
+ */
+GAFlightServerOptions *
+gaflight_server_options_new(GAFlightLocation *location)
+{
+  return static_cast<GAFlightServerOptions *>(
+    g_object_new(GAFLIGHT_TYPE_SERVER_OPTIONS,
+                 "location", location,
+                 NULL));
+}
+
+
+typedef struct GAFlightServerPrivate_ {
+  arrow::flight::FlightServerBase server;
+} GAFlightServerPrivate;
+
+enum {
+  PROP_SERVER = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightServer,
+                                    gaflight_server,
+                                    G_TYPE_OBJECT)
+
+#define GAFLIGHT_SERVER_GET_PRIVATE(obj)         \
+  static_cast<GAFlightServerPrivate *>(          \
+    gaflight_server_get_instance_private(        \
+      GAFLIGHT_SERVER(obj)))
+
+static void
+gaflight_server_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
+
+  priv->server.~FlightServerBase();
+
+  G_OBJECT_CLASS(gaflight_server_parent_class)->finalize(object);
+}
+
+static void
+gaflight_server_init(GAFlightServer *object)
+{
+  auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
+  new(&(priv->server)) arrow::flight::FlightServerBase;
+}
+
+static void
+gaflight_server_class_init(GAFlightServerClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_server_finalize;
+}
+
+/**
+ * gaflight_server_listen:
+ * @server: A #GAFlightServer.
+ * @options: A #GAFlightServerOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_server_listen(GAFlightServer *server,
+                       GAFlightServerOptions *options,
+                       GError **error)
+{
+  auto flight_server = gaflight_server_get_raw(server);
+  const auto flight_options = gaflight_server_options_get_raw(options);
+  return garrow::check(error,
+                       flight_server->Init(*flight_options),
+                       "[flight-server][listen]");
+}
+
+/**
+ * gaflight_server_new:
+ * @server: A #GAFlightServer.
+ *
+ * Returns: The port number listening.
+ *
+ * Since: 5.0.0
+ */
+gint
+gaflight_server_get_port(GAFlightServer *server)
+{
+  const auto flight_server = gaflight_server_get_raw(server);
+  return flight_server->port();
+}
+
+/**
+ * gaflight_server_shutdown:
+ * @server: A #GAFlightServer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Shuts down the serve. This function can be called from signal
+ * handler or another thread while gaflight_server_serve() blocks.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_server_shutdown(GAFlightServer *server,
+                         GError **error)
+{
+  auto flight_server = gaflight_server_get_raw(server);
+  return garrow::check(error,
+                       flight_server->Shutdown(),
+                       "[flight-server][shutdown]");
+}
+
+
+G_END_DECLS
+
+
+arrow::flight::FlightServerOptions *
+gaflight_server_options_get_raw(GAFlightServerOptions *options)
+{
+  auto priv = GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(options);
+  return &(priv->options);
+}
+
+arrow::flight::FlightServerBase *
+gaflight_server_get_raw(GAFlightServer *server)
+{
+  auto priv = GAFLIGHT_SERVER_GET_PRIVATE(server);
+  return &(priv->server);
+}
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
new file mode 100644
index 00000000000..ef477b4b1d9
--- /dev/null
+++ b/c_glib/arrow-flight-glib/server.h
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-flight-glib/common.h>
+
+G_BEGIN_DECLS
+
+
+#define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightServerOptions,
+                         gaflight_server_options,
+                         GAFLIGHT,
+                         SERVER_OPTIONS,
+                         GObject)
+struct _GAFlightServerOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightServerOptions *
+gaflight_server_options_new(GAFlightLocation *location);
+
+
+#define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightServer,
+                         gaflight_server,
+                         GAFLIGHT,
+                         SERVER,
+                         GObject)
+struct _GAFlightServerClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_server_listen(GAFlightServer *server,
+                       GAFlightServerOptions *options,
+                       GError **error);
+GARROW_AVAILABLE_IN_5_0
+gint
+gaflight_server_get_port(GAFlightServer *server);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_server_shutdown(GAFlightServer *server,
+                         GError **error);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_server_wait(GAFlightServer *server,
+                     GError **error);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp
new file mode 100644
index 00000000000..0103d14996d
--- /dev/null
+++ b/c_glib/arrow-flight-glib/server.hpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/flight/api.h>
+
+#include <arrow-flight-glib/server.h>
+
+
+arrow::flight::FlightServerOptions *
+gaflight_server_options_get_raw(GAFlightServerOptions *options);
+
+arrow::flight::FlightServerBase *
+gaflight_server_get_raw(GAFlightServer *server);
diff --git a/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
new file mode 100644
index 00000000000..d1850185692
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
+               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
+[
+  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
+  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
+  %gtkdocentities;
+]>
+<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
+  <bookinfo>
+    <title>&package_name; Reference Manual</title>
+    <releaseinfo>
+      for &package_string;.
+      <!--
+      The latest version of this documentation can be found on-line at
+      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
+      -->
+    </releaseinfo>
+  </bookinfo>
+
+  <part id="rpc">
+    <title>RPC</title>
+    <xi:include href="xml/common.xml"/>
+    <xi:include href="xml/client.xml"/>
+    <xi:include href="xml/server.xml"/>
+  </part>
+
+  <chapter id="object-tree">
+    <title>Object Hierarchy</title>
+    <xi:include href="xml/tree_index.sgml"/>
+  </chapter>
+  <index id="api-index-full">
+    <title>API Index</title>
+    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="deprecated-api-index" role="deprecated">
+    <title>Index of deprecated API</title>
+    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
+  </index>
+  <index id="api-index-5-0-0" role="5.0.0">
+    <title>Index of new symbols in 5.0.0</title>
+    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
+  </index>
+  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
+</book>
diff --git a/c_glib/doc/arrow-flight-glib/entities.xml.in b/c_glib/doc/arrow-flight-glib/entities.xml.in
new file mode 100644
index 00000000000..aa5addb4e84
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib/entities.xml.in
@@ -0,0 +1,24 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<!ENTITY package "@PACKAGE@">
+<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
+<!ENTITY package_name "@PACKAGE_NAME@">
+<!ENTITY package_string "@PACKAGE_STRING@">
+<!ENTITY package_url "@PACKAGE_URL@">
+<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/arrow-flight-glib/meson.build b/c_glib/doc/arrow-flight-glib/meson.build
new file mode 100644
index 00000000000..7ae38e4f5e4
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib/meson.build
@@ -0,0 +1,83 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+package_id = 'arrow-flight-glib'
+package_name = 'Apache Arrow Flight GLib'
+entities_conf = configuration_data()
+entities_conf.set('PACKAGE', package_id)
+entities_conf.set('PACKAGE_BUGREPORT',
+                  'https://issues.apache.org/jira/browse/ARROW')
+entities_conf.set('PACKAGE_NAME', package_name)
+entities_conf.set('PACKAGE_STRING',
+                  ' '.join([package_id, version]))
+entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
+entities_conf.set('PACKAGE_VERSION', version)
+configure_file(input: 'entities.xml.in',
+               output: 'entities.xml',
+               configuration: entities_conf)
+
+private_headers = [
+]
+
+content_files = [
+]
+
+html_images = [
+]
+
+glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
+glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
+arrow_glib_doc_path = join_paths(data_dir,
+                                 'gtk-doc',
+                                 'html',
+                                 'arrow-glib')
+doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
+
+source_directories = [
+  join_paths(meson.source_root(), package_id),
+  join_paths(meson.build_root(), package_id),
+]
+dependencies = [
+  arrow_glib,
+  arrow_flight_glib,
+]
+ignore_headers = []
+gnome.gtkdoc(package_id,
+             main_xml: package_id + '-docs.xml',
+             src_dir: source_directories,
+             dependencies: dependencies,
+             ignore_headers: ignore_headers,
+             gobject_typesfile: package_id + '.types',
+             scan_args: [
+               '--rebuild-types',
+               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
+             ],
+             mkdb_args: [
+               '--output-format=xml',
+               '--name-space=gad',
+               '--source-suffixes=c,cpp,h',
+             ],
+             fixxref_args: [
+               '--html-dir=' + doc_path,
+               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
+               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
+               '--extra-dir=' + arrow_glib_doc_path,
+             ],
+             html_assets: html_images,
+             install: true)
diff --git a/c_glib/meson.build b/c_glib/meson.build
index bd1f3a2eca7..7c453af9e33 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -75,6 +75,7 @@ if arrow_cpp_build_lib_dir == ''
   have_arrow_orc = dependency('arrow-orc', required: false).found()
   arrow_cuda = dependency('arrow-cuda', required: false)
   arrow_dataset = dependency('arrow-dataset', required: false)
+  arrow_flight = dependency('arrow-flight', required: false)
   gandiva = dependency('gandiva', required: false)
   parquet = dependency('parquet', required: false)
   plasma = dependency('plasma', required: false)
@@ -105,6 +106,9 @@ main(void)
   arrow_dataset = cpp_compiler.find_library('arrow_dataset',
                                             dirs: [arrow_cpp_build_lib_dir],
                                             required: false)
+  arrow_flight = cpp_compiler.find_library('arrow_flight',
+                                           dirs: [arrow_cpp_build_lib_dir],
+                                           required: false)
   gandiva = cpp_compiler.find_library('gandiva',
                                       dirs: [arrow_cpp_build_lib_dir],
                                       required: false)
@@ -137,6 +141,9 @@ endif
 if arrow_dataset.found()
   subdir('arrow-dataset-glib')
 endif
+if arrow_flight.found()
+  subdir('arrow-flight-glib')
+endif
 if gandiva.found()
   subdir('gandiva-glib')
 endif
@@ -153,6 +160,9 @@ if get_option('gtk_doc')
   if arrow_dataset.found()
     subdir('doc/arrow-dataset-glib')
   endif
+  if arrow_flight.found()
+    subdir('doc/arrow-flight-glib')
+  endif
   if gandiva.found()
     subdir('doc/gandiva-glib')
   endif
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
new file mode 100644
index 00000000000..7f076f64c9d
--- /dev/null
+++ b/c_glib/test/flight/test-client.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightClient < Test::Unit::TestCase
+  def setup
+    @server = nil
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    @server = Helper::FlightServer.new
+    host = "127.0.0.1"
+    location = ArrowFlight::Location.new("grpc://#{host}:0")
+    options = ArrowFlight::ServerOptions.new(location)
+    @server.listen(options)
+    @location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}")
+  end
+
+  def shutdown
+    return if @server.nil?
+    @server.shutdown
+  end
+
+  def test_connect
+    # TODO: Add tests that use other methods and remove this.
+    assert_nothing_raised do
+      ArrowFlight::Client.new(@location)
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-location.rb b/c_glib/test/flight/test-location.rb
new file mode 100644
index 00000000000..5b167932218
--- /dev/null
+++ b/c_glib/test/flight/test-location.rb
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightLocation < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_to_s
+    location = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    assert_equal("grpc://127.0.0.1:2929", location.to_s)
+  end
+
+  def test_scheme
+    location = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    assert_equal("grpc", location.scheme)
+  end
+
+  def test_equal
+    location1 = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    location2 = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+    assert do
+      location1 == location2
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-server-options.rb b/c_glib/test/flight/test-server-options.rb
new file mode 100644
index 00000000000..93a90297ea2
--- /dev/null
+++ b/c_glib/test/flight/test-server-options.rb
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightServerOptions < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_location
+    location = ArrowFlight::Location.new("grpc://127.0.0.1:0")
+    options = ArrowFlight::ServerOptions.new(location)
+    assert_equal(location, options.location)
+  end
+end
diff --git a/c_glib/test/helper/flight-server.rb b/c_glib/test/helper/flight-server.rb
new file mode 100644
index 00000000000..e1bafb10846
--- /dev/null
+++ b/c_glib/test/helper/flight-server.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Helper
+  class FlightServer < ArrowFlight::Server
+    type_register
+  end
+end
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 7911cf44b6e..9dff2fe5b06 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -51,6 +51,11 @@ def initialize(data)
 rescue GObjectIntrospection::RepositoryError::TypelibNotFound
 end
 
+begin
+  ArrowFlight = GI.load("ArrowFlight")
+rescue GObjectIntrospection::RepositoryError::TypelibNotFound
+end
+
 begin
   Gandiva = GI.load("Gandiva")
 rescue GObjectIntrospection::RepositoryError::TypelibNotFound
@@ -76,5 +81,8 @@ def initialize(data)
 require_relative "helper/fixture"
 require_relative "helper/omittable"
 require_relative "helper/plasma-store"
+if defined?(ArrowFlight)
+  require_relative "helper/flight-server"
+end
 
 exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/c_glib/test/run-test.sh b/c_glib/test/run-test.sh
index 2120aa9f8f0..7e0901df5b5 100755
--- a/c_glib/test/run-test.sh
+++ b/c_glib/test/run-test.sh
@@ -20,7 +20,7 @@
 test_dir="$(cd $(dirname $0); pwd)"
 build_dir="$(cd .; pwd)"
 
-modules="arrow-glib arrow-cuda-glib arrow-dataset-glib gandiva-glib parquet-glib plasma-glib"
+modules="arrow-glib arrow-cuda-glib arrow-dataset-glib arrow-flight-glib gandiva-glib parquet-glib plasma-glib"
 
 for module in ${modules}; do
   module_build_dir="${build_dir}/${module}"
diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile
index 4b855b52610..9445475ab5f 100644
--- a/ci/docker/ubuntu-18.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-18.04-cpp.dockerfile
@@ -69,6 +69,7 @@ RUN apt-get update -y -q && \
         libboost-system-dev \
         libbrotli-dev \
         libbz2-dev \
+        libc-ares-dev \
         libcurl4-openssl-dev \
         libgflags-dev \
         libgoogle-glog-dev \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 3a37ace1381..022fc3ee22c 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -71,8 +71,9 @@ RUN apt-get update -y -q && \
         libboost-system-dev \
         libbrotli-dev \
         libbz2-dev \
-        libgflags-dev \
+        libc-ares-dev \
         libcurl4-openssl-dev \
+        libgflags-dev \
         libgoogle-glog-dev \
         liblz4-dev \
         libprotobuf-dev \
diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile
index 80eb072e7ed..6cefecfd678 100644
--- a/ci/docker/ubuntu-20.10-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.10-cpp.dockerfile
@@ -71,8 +71,9 @@ RUN apt-get update -y -q && \
         libboost-system-dev \
         libbrotli-dev \
         libbz2-dev \
-        libgflags-dev \
+        libc-ares-dev \
         libcurl4-openssl-dev \
+        libgflags-dev \
         libgoogle-glog-dev \
         libgrpc++-dev \
         liblz4-dev \
diff --git a/cpp/Brewfile b/cpp/Brewfile
index 7de6c7deabe..78ee5e64c8f 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -35,9 +35,6 @@ brew "openssl@1.1"
 brew "protobuf"
 brew "python"
 brew "rapidjson"
-# grpc bundles re2 and causes a conflict when Homebrew tries to install it,
-# so temporarily skip installing re2. See ARROW-9972.
-# brew "re2"
 brew "snappy"
 brew "thrift"
 brew "wget"
diff --git a/cpp/cmake_modules/Find-c-aresAlt.cmake b/cpp/cmake_modules/Findc-aresAlt.cmake
similarity index 100%
rename from cpp/cmake_modules/Find-c-aresAlt.cmake
rename to cpp/cmake_modules/Findc-aresAlt.cmake
diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index 3cc5418df0f..dd908ad8d77 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -296,6 +296,13 @@ def test_version_pre_tag
                         "+  VERSION = \"#{@release_version}\""],
                      ],
                    },
+                   {
+                     path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@snapshot_version}\"",
+                        "+  VERSION = \"#{@release_version}\""],
+                     ],
+                   },
                    {
                      path: "ruby/red-arrow/lib/arrow/version.rb",
                      hunks: [
@@ -492,6 +499,13 @@ def test_version_post_tag
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
+                   {
+                     path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
+                     hunks: [
+                       ["-  VERSION = \"#{@release_version}\"",
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
+                     ],
+                   },
                    {
                      path: "ruby/red-arrow/lib/arrow/version.rb",
                      hunks: [
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 13e431ceb8d..f8ec55f621e 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -60,6 +60,7 @@ dev/tasks/linux-packages/apache-arrow/debian/control.in
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install
+dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-flight-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install
 dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install
@@ -81,6 +82,11 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index e7b87a3a4da..2656dd25813 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -132,7 +132,8 @@ apt install -y -V libarrow-glib-dev=${package_version}
 apt install -y -V libarrow-glib-doc=${package_version}
 
 if [ "${have_flight}" = "yes" ]; then
-  apt install -y -V libarrow-flight-dev=${package_version}
+  apt install -y -V libarrow-flight-glib-dev=${package_version}
+  apt install -y -V libarrow-flight-glib-doc=${package_version}
 fi
 
 apt install -y -V libarrow-python-dev=${package_version}
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index d03341ce44d..23239040f0a 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -427,10 +427,13 @@ test_js() {
 test_ruby() {
   pushd ruby
 
-  local modules="red-arrow red-plasma red-parquet"
+  local modules="red-arrow red-arrow-dataset red-plasma red-parquet"
   if [ "${ARROW_CUDA}" = "ON" ]; then
     modules="${modules} red-arrow-cuda"
   fi
+  if [ "${ARROW_FLIGHT}" = "ON" ]; then
+    modules="${modules} red-arrow-flight"
+  fi
   if [ "${ARROW_GANDIVA}" = "ON" ]; then
     modules="${modules} red-gandiva"
   fi
diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index 71d28f8644f..14318678015 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -154,7 +154,8 @@ else
 fi
 
 if [ "${have_flight}" = "yes" ]; then
-  ${install_command} --enablerepo=epel arrow-flight-devel-${package_version}
+  ${install_command} --enablerepo=epel arrow-flight-glib-devel-${package_version}
+  ${install_command} --enablerepo=epel arrow-flight-glib-doc-${package_version}
 fi
 
 if [ "${have_gandiva}" = "yes" ]; then
diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile
index 13d8fc81e90..8197130b403 100644
--- a/dev/tasks/linux-packages/apache-arrow/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -106,10 +106,21 @@ class ApacheArrowPackageTask < PackageTask
     control.gsub(/@USE_SYSTEM_GRPC@/, use_system_grpc)
   end
 
+  def apt_prepare_debian_control_c_ares(control, target)
+    case target
+    when /\Aubuntu-bionic/
+      use_system_c_ares = "#"
+    else
+      use_system_c_ares = ""
+    end
+    control.gsub(/@USE_SYSTEM_C_ARES@/, use_system_c_ares)
+  end
+
   def apt_prepare_debian_control(control_in, target)
     control = control_in.dup
     control = apt_prepare_debian_control_cuda_architecture(control, target)
     control = apt_prepare_debian_control_grpc(control, target)
+    control = apt_prepare_debian_control_c_ares(control, target)
     control
   end
 end
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index c8f1794afe0..112cc1846bc 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -43,6 +43,7 @@ RUN \
     libboost-system-dev \
     libbrotli-dev \
     libbz2-dev \
+    libc-ares-dev \
     libcurl4-openssl-dev \
     libgirepository1.0-dev \
     libglib2.0-doc \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
index 8d36a5f80ae..4f57a47e24c 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
@@ -43,6 +43,7 @@ RUN \
     libboost-system-dev \
     libbrotli-dev \
     libbz2-dev \
+    libc-ares-dev \
     libcurl4-openssl-dev \
     libgirepository1.0-dev \
     libglib2.0-doc \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
index 0b8f2499b5b..7e26d3eb2b0 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-hirsute/Dockerfile
@@ -43,6 +43,7 @@ RUN \
     libboost-system-dev \
     libbrotli-dev \
     libbz2-dev \
+    libc-ares-dev \
     libcurl4-openssl-dev \
     libgirepository1.0-dev \
     libglib2.0-doc \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index b846abd6f06..e50eeaff581 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -12,6 +12,7 @@ Build-Depends:
   libboost-system-dev,
   libbrotli-dev,
   libbz2-dev,
+@USE_SYSTEM_C_ARES@  libc-ares-dev,
   libcurl4-openssl-dev,
   libgirepository1.0-dev,
   libgoogle-glog-dev,
@@ -127,6 +128,7 @@ Depends:
   libarrow500 (= ${binary:Version}),
   libbrotli-dev,
   libbz2-dev,
+@USE_SYSTEM_C_ARES@  libc-ares-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev,
   liblz4-dev,
   libre2-dev,
@@ -429,6 +431,57 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
+Package: libarrow-flight-glib500
+Section: libs
+Architecture: any
+Multi-Arch: same
+Pre-Depends: ${misc:Pre-Depends}
+Depends:
+  ${misc:Depends},
+  ${shlibs:Depends},
+  libarrow-glib500 (= ${binary:Version}),
+  libarrow-flight500 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based library files for Apache Arrow Flight.
+
+Package: gir1.2-arrow-flight-1.0
+Section: introspection
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${gir:Depends},
+  ${misc:Depends}
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GObject Introspection typelib files for Apache Arrow
+ Flight.
+
+Package: libarrow-flight-glib-dev
+Section: libdevel
+Architecture: any
+Multi-Arch: same
+Depends:
+  ${misc:Depends},
+  libarrow-flight-dev (= ${binary:Version}),
+  libarrow-glib-dev (= ${binary:Version}),
+  libarrow-flight-glib500 (= ${binary:Version}),
+  gir1.2-arrow-flight-1.0 (= ${binary:Version})
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides GLib based header files for Apache Arrow Flight.
+
+Package: libarrow-flight-glib-doc
+Section: doc
+Architecture: any
+Multi-Arch: foreign
+Depends:
+  ${misc:Depends}
+Recommends: libarrow-glib-doc
+Description: Apache Arrow is a data processing library for analysis
+ .
+ This package provides documentations for Apache Arrow Flight.
+
 Package: libgandiva-glib500
 Section: libs
 Architecture: any
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
new file mode 100644
index 00000000000..8a8dee3ac5a
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
@@ -0,0 +1,4 @@
+usr/include/arrow-flight-glib/
+usr/lib/*/libarrow-flight-glib.so
+usr/lib/*/pkgconfig/arrow-flight-glib.pc
+usr/share/gir-1.0/ArrowFlight-1.0.gir
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
new file mode 100644
index 00000000000..94b17c11b9d
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
@@ -0,0 +1,9 @@
+Document: arrow-flight-glib
+Title: Apache Arrow Flight GLib Reference Manual
+Author: The Apache Software Foundation
+Abstract: Apache Arrow Flight GLib provides a general-purpose client-server framework to simplify high performance transport of large datasets over network interfaces.
+Section: Programming
+
+Format: HTML
+Index: /usr/share/gtk-doc/html/arrow-flight-glib/index.html
+Files: /usr/share/gtk-doc/html/arrow-flight-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
new file mode 100644
index 00000000000..3c95f17ed77
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
@@ -0,0 +1 @@
+usr/share/gtk-doc/html/arrow-flight-glib
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
new file mode 100644
index 00000000000..d55c89a1b08
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
@@ -0,0 +1,3 @@
+usr/share/gtk-doc/html/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib 
+usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-flight-glib-doc/glib
+usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-flight-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install
new file mode 100644
index 00000000000..a6156ed94c9
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install
@@ -0,0 +1 @@
+usr/lib/*/libarrow-flight-glib.so.*
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 4d23c55ca4d..eb322582ba1 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -65,6 +65,9 @@ BuildRequires:	boost%{boost_version}-devel
 %endif
 BuildRequires:	brotli-devel
 BuildRequires:	bzip2-devel
+%if %{use_flight}
+BuildRequires:	c-ares-devel
+%endif
 BuildRequires:	cmake%{cmake_version}
 %if %{use_s3}
 BuildRequires:	curl-devel
@@ -218,6 +221,9 @@ License:	Apache-2.0
 Requires:	%{name}-libs = %{version}-%{release}
 Requires:	brotli-devel
 Requires:	bzip2-devel
+%if %{use_flight}
+Requires:	c-ares-devel
+%endif
 Requires:	libzstd-devel
 Requires:	lz4-devel
 Requires:	openssl-devel
@@ -252,6 +258,9 @@ Libraries and header files for Apache Arrow C++.
 %{_libdir}/cmake/arrow/FindBrotli.cmake
 %{_libdir}/cmake/arrow/FindLz4.cmake
 %{_libdir}/cmake/arrow/FindSnappy.cmake
+%if %{use_flight}
+%{_libdir}/cmake/arrow/Findc-aresAlt.cmake
+%endif
 %if %{have_re2}
 %{_libdir}/cmake/arrow/Findre2Alt.cmake
 %endif
@@ -307,6 +316,9 @@ Libraries and header files for Apache Arrow dataset.
 Summary:	C++ library for fast data transport.
 License:	Apache-2.0
 Requires:	%{name}-libs = %{version}-%{release}
+%if %{use_flight}
+Requires:	c-ares
+%endif
 Requires:	openssl
 
 %description flight-libs
@@ -578,13 +590,13 @@ Documentation for Apache Arrow GLib.
 %{_datadir}/gtk-doc/html/arrow-glib/
 
 %package dataset-glib-libs
-Summary:	Runtime libraries for Apache Arrow dataset GLib
+Summary:	Runtime libraries for Apache Arrow Dataset GLib
 License:	Apache-2.0
 Requires:	%{name}-dataset-libs = %{version}-%{release}
 Requires:	%{name}-glib-libs = %{version}-%{release}
 
 %description dataset-glib-libs
-This package contains the libraries for Apache Arrow dataset GLib.
+This package contains the libraries for Apache Arrow Dataset GLib.
 
 %files dataset-glib-libs
 %defattr(-,root,root,-)
@@ -593,13 +605,13 @@ This package contains the libraries for Apache Arrow dataset GLib.
 %{_datadir}/gir-1.0/ArrowDataset-1.0.gir
 
 %package dataset-glib-devel
-Summary:	Libraries and header files for Apache Arrow dataset GLib
+Summary:	Libraries and header files for Apache Arrow Dataset GLib
 License:	Apache-2.0
 Requires:	%{name}-dataset-devel = %{version}-%{release}
 Requires:	%{name}-glib-devel = %{version}-%{release}
 
 %description dataset-glib-devel
-Libraries and header files for Apache Arrow dataset GLib.
+Libraries and header files for Apache Arrow Dataset GLib.
 
 %files dataset-glib-devel
 %defattr(-,root,root,-)
@@ -611,7 +623,7 @@ Libraries and header files for Apache Arrow dataset GLib.
 %{_libdir}/girepository-1.0/ArrowDataset-1.0.typelib
 
 %package dataset-glib-doc
-Summary:	Documentation for Apache Arrow dataset GLib
+Summary:	Documentation for Apache Arrow Dataset GLib
 License:	Apache-2.0
 
 %description dataset-glib-doc
@@ -622,6 +634,53 @@ Documentation for Apache Arrow dataset GLib.
 %doc README.md LICENSE.txt NOTICE.txt
 %{_datadir}/gtk-doc/html/arrow-dataset-glib/
 
+%if %{use_flight}
+%package flight-glib-libs
+Summary:	Runtime libraries for Apache Arrow Flight GLib
+License:	Apache-2.0
+Requires:	%{name}-flight-libs = %{version}-%{release}
+Requires:	%{name}-glib-libs = %{version}-%{release}
+
+%description flight-glib-libs
+This package contains the libraries for Apache Arrow Flight GLib.
+
+%files flight-glib-libs
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_libdir}/libarrow-flight-glib.so.*
+%{_datadir}/gir-1.0/ArrowFlight-1.0.gir
+
+%package flight-glib-devel
+Summary:	Libraries and header files for Apache Arrow Flight GLib
+License:	Apache-2.0
+Requires:	%{name}-flight-devel = %{version}-%{release}
+Requires:	%{name}-glib-devel = %{version}-%{release}
+
+%description flight-glib-devel
+Libraries and header files for Apache Arrow Flight GLib.
+
+%files flight-glib-devel
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_includedir}/arrow-flight-glib/
+%{_libdir}/libarrow-flight-glib.a
+%{_libdir}/libarrow-flight-glib.so
+%{_libdir}/pkgconfig/arrow-flight-glib.pc
+%{_libdir}/girepository-1.0/ArrowFlight-1.0.typelib
+
+%package flight-glib-doc
+Summary:	Documentation for Apache Arrow Flight GLib
+License:	Apache-2.0
+
+%description flight-glib-doc
+Documentation for Apache Arrow Flight GLib.
+
+%files flight-glib-doc
+%defattr(-,root,root,-)
+%doc README.md LICENSE.txt NOTICE.txt
+%{_datadir}/gtk-doc/html/arrow-flight-glib/
+%endif
+
 %if %{use_gandiva}
 %package -n gandiva-glib-libs
 Summary:	Runtime libraries for Gandiva GLib
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
index 5ced36cab0d..7a7865ae404 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
@@ -28,6 +28,7 @@ RUN \
     boost-devel \
     brotli-devel \
     bzip2-devel \
+    c-ares-devel \
     ccache \
     clang \
     cmake \
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index b2e4d7a0545..e60cffffde3 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -429,6 +429,7 @@ tasks:
     {% endif %}
       - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb
+      - gir1.2-arrow-flight-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
@@ -440,6 +441,10 @@ tasks:
       - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-glib500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
@@ -535,6 +540,10 @@ tasks:
       - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
       {% if target == "centos-8" %}
       - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
+      - arrow-flight-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
       - arrow-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
       - arrow-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm
       {% endif %}
diff --git a/ruby/red-arrow-cuda/.gitignore b/ruby/red-arrow-cuda/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-arrow-cuda/.gitignore
+++ b/ruby/red-arrow-cuda/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-arrow-dataset/.gitignore b/ruby/red-arrow-dataset/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-arrow-dataset/.gitignore
+++ b/ruby/red-arrow-dataset/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-arrow-flight/.gitignore b/ruby/red-arrow-flight/.gitignore
new file mode 100644
index 00000000000..779545d9026
--- /dev/null
+++ b/ruby/red-arrow-flight/.gitignore
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+/pkg/
diff --git a/ruby/red-arrow-flight/Gemfile b/ruby/red-arrow-flight/Gemfile
new file mode 100644
index 00000000000..7c4cefcf39d
--- /dev/null
+++ b/ruby/red-arrow-flight/Gemfile
@@ -0,0 +1,24 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source "https://rubygems.org/"
+
+gemspec
+
+gem "red-arrow", path: "../red-arrow"
diff --git a/ruby/red-arrow-flight/LICENSE.txt b/ruby/red-arrow-flight/LICENSE.txt
new file mode 100644
index 00000000000..d6456956733
--- /dev/null
+++ b/ruby/red-arrow-flight/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/ruby/red-arrow-flight/NOTICE.txt b/ruby/red-arrow-flight/NOTICE.txt
new file mode 100644
index 00000000000..e08aeda8a41
--- /dev/null
+++ b/ruby/red-arrow-flight/NOTICE.txt
@@ -0,0 +1,2 @@
+Apache Arrow
+Copyright 2016 The Apache Software Foundation
diff --git a/ruby/red-arrow-flight/README.md b/ruby/red-arrow-flight/README.md
new file mode 100644
index 00000000000..e81f50f9a4f
--- /dev/null
+++ b/ruby/red-arrow-flight/README.md
@@ -0,0 +1,50 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Red Arrow Flight - Apache Arrow Flight Ruby
+
+Red Arrow Flight is the Ruby bindings of Apache Arrow Flight. Red Arrow Flight is based on GObject Introspection.
+
+[Apache Arrow Flight](https://arrow.apache.org/) is one of Apache Arrow components to read and write semantic flights stored in different locations and formats.
+
+[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
+
+Red Arrow Flight uses [Apache Arrow Flight GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow Flight.
+
+Apache Arrow Flight GLib is a C wrapper for [Apache Arrow Flight C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow Flight C++ directly. Apache Arrow Flight GLib is a bridge between Apache Arrow Flight C++ and GObject Introspection.
+
+gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow Flight uses GObject Introspection via gobject-introspection gem.
+
+## Install
+
+Install Apache Arrow Flight GLib before install Red Arrow Flight. Install Apache Arrow GLib before install Red Arrow. See [Apache Arrow install document](https://arrow.apache.org/install/) for details.
+
+Install Red Arrow Flight after you install Apache Arrow Flight GLib:
+
+```console
+$ gem install red-arrow-flight
+```
+
+## Usage
+
+```ruby
+require "arrow-flight"
+
+# TODO
+```
diff --git a/ruby/red-arrow-flight/Rakefile b/ruby/red-arrow-flight/Rakefile
new file mode 100644
index 00000000000..2bbe6e7619c
--- /dev/null
+++ b/ruby/red-arrow-flight/Rakefile
@@ -0,0 +1,41 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "rubygems"
+require "bundler/gem_helper"
+
+base_dir = File.join(File.dirname(__FILE__))
+
+helper = Bundler::GemHelper.new(base_dir)
+helper.install
+
+release_task = Rake::Task["release"]
+release_task.prerequisites.replace(["build", "release:rubygem_push"])
+
+desc "Run tests"
+task :test do
+  cd(base_dir) do
+    cd("dependency-check") do
+      ruby("-S", "rake")
+    end
+    ruby("test/run-test.rb")
+  end
+end
+
+task default: :test
diff --git a/ruby/red-arrow-flight/dependency-check/Rakefile b/ruby/red-arrow-flight/dependency-check/Rakefile
new file mode 100644
index 00000000000..6aca19609e4
--- /dev/null
+++ b/ruby/red-arrow-flight/dependency-check/Rakefile
@@ -0,0 +1,47 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "pkg-config"
+require "native-package-installer"
+require_relative "../lib/arrow-flight/version"
+
+case RUBY_PLATFORM
+when /mingw|mswin/
+  task :default => "nothing"
+else
+  task :default => "dependency:check"
+end
+
+task :nothing do
+end
+
+namespace :dependency do
+  desc "Check dependency"
+  task :check do
+    unless PKGConfig.check_version?("arrow-flight-glib",
+                                    ArrowFlight::Version::MAJOR,
+                                    ArrowFlight::Version::MINOR,
+                                    ArrowFlight::Version::MICRO)
+      unless NativePackageInstaller.install(:debian => "libarrow-flight-glib-dev",
+                                            :redhat => "arrow-flight-glib-devel")
+        exit(false)
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight.rb b/ruby/red-arrow-flight/lib/arrow-flight.rb
new file mode 100644
index 00000000000..2070f354a68
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow"
+
+require "arrow-flight/version"
+
+require "arrow-flight/loader"
+
+module ArrowFlight
+  class Error < StandardError
+  end
+
+  Loader.load
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/call-options.rb b/ruby/red-arrow-flight/lib/arrow-flight/call-options.rb
new file mode 100644
index 00000000000..2030b2d3306
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/call-options.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class CallOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |name, value|
+            options.__send__("#{name}=", value)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/client-options.rb b/ruby/red-arrow-flight/lib/arrow-flight/client-options.rb
new file mode 100644
index 00000000000..2294b2133bb
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/client-options.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class ClientOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |name, value|
+            options.__send__("#{name}=", value)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/loader.rb b/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
new file mode 100644
index 00000000000..c89ff994784
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class Loader < GObjectIntrospection::Loader
+    class << self
+      def load
+        super("ArrowFlight", ArrowFlight)
+      end
+    end
+
+    private
+    def post_load(repository, namespace)
+      require_libraries
+    end
+
+    def require_libraries
+      require "arrow-flight/call-options"
+      require "arrow-flight/client-options"
+      require "arrow-flight/location"
+      require "arrow-flight/server-options"
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/location.rb b/ruby/red-arrow-flight/lib/arrow-flight/location.rb
new file mode 100644
index 00000000000..d49178d04a3
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/location.rb
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class Location
+    class << self
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/server-options.rb b/ruby/red-arrow-flight/lib/arrow-flight/server-options.rb
new file mode 100644
index 00000000000..f28aed87ec3
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/server-options.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class ServerOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Location
+          new(value)
+        when Hash
+          return nil unless value.key?(:location)
+          options = new(value[:location])
+          value.each do |name, value|
+            next if name == :location
+            options.__send__("#{name}=", value)
+          end
+          options
+        else
+          value = Location.try_convert(value)
+          return nil if value.nil?
+          try_convert(value)
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
new file mode 100644
index 00000000000..c18cde8aee4
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  VERSION = "5.0.0-SNAPSHOT"
+
+  module Version
+    numbers, TAG = VERSION.split("-")
+    MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+    STRING = VERSION
+  end
+end
diff --git a/ruby/red-arrow-flight/red-arrow-flight.gemspec b/ruby/red-arrow-flight/red-arrow-flight.gemspec
new file mode 100644
index 00000000000..efe868ca741
--- /dev/null
+++ b/ruby/red-arrow-flight/red-arrow-flight.gemspec
@@ -0,0 +1,52 @@
+# -*- ruby -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "lib/arrow-flight/version"
+
+Gem::Specification.new do |spec|
+  spec.name = "red-arrow-flight"
+  version_components = [
+    ArrowFlight::Version::MAJOR.to_s,
+    ArrowFlight::Version::MINOR.to_s,
+    ArrowFlight::Version::MICRO.to_s,
+    ArrowFlight::Version::TAG,
+  ]
+  spec.version = version_components.compact.join(".")
+  spec.homepage = "https://arrow.apache.org/"
+  spec.authors = ["Apache Arrow Developers"]
+  spec.email = ["dev@arrow.apache.org"]
+
+  spec.summary = "Red Arrow Flight is the Ruby bindings of Apache Arrow Flight"
+  spec.description =
+    "Apache Arrow Flight is a general-purpose client-server framework to " +
+    "simplify high performance transport of large datasets over " +
+    "network interfaces."
+  spec.license = "Apache-2.0"
+  spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
+  spec.files += ["LICENSE.txt", "NOTICE.txt"]
+  spec.files += Dir.glob("lib/**/*.rb")
+  spec.test_files += Dir.glob("test/**/*")
+  spec.extensions = ["dependency-check/Rakefile"]
+
+  spec.add_runtime_dependency("red-arrow", "= #{spec.version}")
+
+  spec.add_development_dependency("bundler")
+  spec.add_development_dependency("rake")
+  spec.add_development_dependency("test-unit")
+end
diff --git a/ruby/red-arrow-flight/test/helper.rb b/ruby/red-arrow-flight/test/helper.rb
new file mode 100644
index 00000000000..cddfdea5fb3
--- /dev/null
+++ b/ruby/red-arrow-flight/test/helper.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow-flight"
+
+require "test-unit"
+
+require_relative "helper/server"
diff --git a/ruby/red-arrow-flight/test/helper/server.rb b/ruby/red-arrow-flight/test/helper/server.rb
new file mode 100644
index 00000000000..461fc92d12c
--- /dev/null
+++ b/ruby/red-arrow-flight/test/helper/server.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Helper
+  class Server < ArrowFlight::Server
+    type_register
+  end
+end
diff --git a/ruby/red-arrow-flight/test/run-test.rb b/ruby/red-arrow-flight/test/run-test.rb
new file mode 100755
index 00000000000..48d2c49e1d4
--- /dev/null
+++ b/ruby/red-arrow-flight/test/run-test.rb
@@ -0,0 +1,50 @@
+#!/usr/bin/env ruby
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+$VERBOSE = true
+
+require "pathname"
+
+(ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
+  RubyInstaller::Runtime.add_dll_directory(path)
+end
+
+base_dir = Pathname.new(__dir__).parent.expand_path
+arrow_base_dir = base_dir.parent + "red-arrow"
+
+lib_dir = base_dir + "lib"
+test_dir = base_dir + "test"
+
+arrow_lib_dir = arrow_base_dir + "lib"
+arrow_ext_dir = arrow_base_dir + "ext" + "arrow"
+
+build_dir = ENV["BUILD_DIR"]
+if build_dir
+  arrow_build_dir = Pathname.new(build_dir) + "red-arrow"
+else
+  arrow_build_dir = arrow_ext_dir
+end
+
+$LOAD_PATH.unshift(arrow_build_dir.to_s)
+$LOAD_PATH.unshift(arrow_lib_dir.to_s)
+$LOAD_PATH.unshift(lib_dir.to_s)
+
+require_relative "helper"
+
+exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/ruby/red-arrow-flight/test/test-client.rb b/ruby/red-arrow-flight/test/test-client.rb
new file mode 100644
index 00000000000..0514c46a3c1
--- /dev/null
+++ b/ruby/red-arrow-flight/test/test-client.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestClient < Test::Unit::TestCase
+  def setup
+    @server = Helper::Server.new
+    @server.listen("grpc://127.0.0.1:0")
+    @location = "grpc://127.0.0.1:#{@server.port}"
+  end
+
+  def shutdown
+    @server.shutdow
+  end
+
+  def test_connect
+    # TODO: Add tests that use other methods and remove this.
+    assert_nothing_raised do
+      ArrowFlight::Client.new(@location)
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/test/test-location.rb b/ruby/red-arrow-flight/test/test-location.rb
new file mode 100644
index 00000000000..5edd5594f8a
--- /dev/null
+++ b/ruby/red-arrow-flight/test/test-location.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestLocation < Test::Unit::TestCase
+  sub_test_case(".try_convert") do
+    def test_string
+      location = ArrowFlight::Location.try_convert("grpc://127.0.0.1:2929")
+      assert_equal("grpc://127.0.0.1:2929",
+                   location.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/.gitignore b/ruby/red-arrow/.gitignore
index e41483f2899..3330f865731 100644
--- a/ruby/red-arrow/.gitignore
+++ b/ruby/red-arrow/.gitignore
@@ -16,6 +16,7 @@
 # under the License.
 
 /.yardoc/
+/Gemfile.lock
 /doc/reference/
 /ext/arrow/Makefile
 /ext/arrow/mkmf.log
diff --git a/ruby/red-gandiva/.gitignore b/ruby/red-gandiva/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-gandiva/.gitignore
+++ b/ruby/red-gandiva/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-parquet/.gitignore b/ruby/red-parquet/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-parquet/.gitignore
+++ b/ruby/red-parquet/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/
diff --git a/ruby/red-plasma/.gitignore b/ruby/red-plasma/.gitignore
index 779545d9026..afd93a1686b 100644
--- a/ruby/red-plasma/.gitignore
+++ b/ruby/red-plasma/.gitignore
@@ -15,4 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
+/Gemfile.lock
 /pkg/

From 1174c6e6e35febc3a0e15114e6eee92591ddd612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 21 May 2021 12:54:55 +0200
Subject: [PATCH 279/719] ARROW-12764: [CI] Support wildcard expansion when
 uploading crossbow artifacts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10322 from kszucs/ARROW-12764

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/crossbow/cli.py |  3 +--
 dev/tasks/macros.jinja              | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py
index 15dbb5d4e62..99a3369a9a0 100644
--- a/dev/archery/archery/crossbow/cli.py
+++ b/dev/archery/archery/crossbow/cli.py
@@ -340,11 +340,10 @@ def asset_callback(task_name, task, asset):
 
 
 @crossbow.command()
+@click.argument('patterns', nargs=-1, required=True)
 @click.option('--sha', required=True, help='Target committish')
 @click.option('--tag', required=True, help='Target tag')
 @click.option('--method', default='curl', help='Use cURL to upload')
-@click.option('--pattern', '-p', 'patterns', required=True, multiple=True,
-              help='File pattern to upload as assets')
 @click.pass_obj
 def upload_artifacts(obj, tag, sha, patterns, method):
     queue = obj['queue']
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 38bbea4fc60..5f1056ca6a0 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -80,10 +80,10 @@ on:
       --sha {{ task.branch }} \
       --tag {{ task.tag }} \
     {% if pattern is string %}
-      --pattern "{{ pattern }}"
+      "{{ pattern }}"
     {% elif pattern is iterable %}
       {% for p in pattern %}
-      --pattern "{{ p }}" {{ "\\" if not loop.last else "" }}
+      "{{ p }}" {{ "\\" if not loop.last else "" }}
       {% endfor %}
     {% endif %}
     env:
@@ -126,10 +126,10 @@ on:
         --sha {{ task.branch }} \
         --tag {{ task.tag }} \
       {% if pattern is string %}
-        --pattern "{{ pattern }}"
+        "{{ pattern }}"
       {% elif pattern is iterable %}
         {% for p in pattern %}
-        --pattern "{{ p }}" {{ "" if not loop.last else "" }}
+        "{{ p }}" {{ "\\" if not loop.last else "" }}
         {% endfor %}
       {% endif %}
     env:
@@ -179,10 +179,10 @@ on:
       --sha {{ task.branch }} \
       --tag {{ task.tag }} \
     {% if pattern is string %}
-      --pattern "{{ pattern }}"
+      "{{ pattern }}"
     {% elif pattern is iterable %}
       {% for p in pattern %}
-      --pattern "{{ p }}" {{ "\\" if not loop.last else "" }}
+      "{{ p }}" {{ "\\" if not loop.last else "" }}
       {% endfor %}
     {% endif %}
 {% endmacro %}

From e86beb86c1734b84257316e37415468461cd88c9 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 21 May 2021 08:52:47 -0400
Subject: [PATCH 280/719] ARROW-12608: [C++][Python][R] Add split_pattern_regex
 kernel

This adds a split_pattern_regex kernel using RE2.

Caveats:
- RE2 requires us to wrap the user's regex in a capture group in order to actually get the matched delimiter.
- Reverse splitting is not implemented - there's not a good way to do this with RE2.
- In R, strsplit behaves differently - trailing empty splits are no longer dropped:

```
> df <- tibble(x = c("foo bar"))
> (df %>% mutate(x = strsplit(x, "bar")) %>% collect())$x
[[1]]
[1] "foo "

> (record_batch(df) %>% mutate(x = strsplit(x, "bar")) %>% collect())$x
<list<character>[1]>
[[1]]
[1] "foo " ""
```

So the behavior here does not exactly match R. Though this was already the case:

```
> (df %>% mutate(x = strsplit(x, "bar", fixed = TRUE)) %>% collect())$x
[[1]]
[1] "foo "

> (record_batch(df) %>% mutate(x = strsplit(x, "bar", fixed = TRUE)) %>% collect())$x
<list<character>[1]>
[[1]]
[1] "foo " ""
```

Closes #10354 from lidavidm/arrow-12608

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/compute/kernels/scalar_string.cc    | 88 +++++++++++++++++--
 .../compute/kernels/scalar_string_test.cc     | 28 ++++++
 docs/source/cpp/compute.rst                   | 13 ++-
 docs/source/python/api/compute.rst            | 11 +++
 python/pyarrow/tests/test_compute.py          | 16 ++++
 r/R/dplyr-functions.R                         | 20 +----
 r/src/compute.cpp                             |  2 +-
 .../testthat/test-dplyr-string-functions.R    | 28 +++---
 8 files changed, 164 insertions(+), 42 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 1475379391e..3915951969f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -1103,17 +1103,17 @@ struct SplitBaseTransform {
     return Status::OK();
   }
 
-  static Status CheckOptions(const Options& options) { return Status::OK(); }
-
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     Options options = State::Get(ctx);
     Derived splitter(options);  // we make an instance to reuse the parts vectors
+    RETURN_NOT_OK(splitter.CheckOptions());
     return splitter.Split(ctx, batch, out);
   }
 
+  Status CheckOptions() { return Status::OK(); }
+
   Status Split(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     EnsureLookupTablesFilled();  // only needed for unicode
-    RETURN_NOT_OK(Derived::CheckOptions(options));
 
     if (batch[0].kind() == Datum::ARRAY) {
       const ArrayData& input = *batch[0].array();
@@ -1179,8 +1179,8 @@ struct SplitPatternTransform : SplitBaseTransform<Type, ListType, SplitPatternOp
   using string_offset_type = typename Type::offset_type;
   using Base::Base;
 
-  static Status CheckOptions(const SplitPatternOptions& options) {
-    if (options.pattern.length() == 0) {
+  Status CheckOptions() {
+    if (Base::options.pattern.length() == 0) {
       return Status::Invalid("Empty separator");
     }
     return Status::OK();
@@ -1399,12 +1399,90 @@ void AddSplitWhitespaceUTF8(FunctionRegistry* registry) {
 }
 #endif
 
+#ifdef ARROW_WITH_RE2
+template <typename Type, typename ListType>
+struct SplitRegexTransform : SplitBaseTransform<Type, ListType, SplitPatternOptions,
+                                                SplitRegexTransform<Type, ListType>> {
+  using Base = SplitBaseTransform<Type, ListType, SplitPatternOptions,
+                                  SplitRegexTransform<Type, ListType>>;
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using string_offset_type = typename Type::offset_type;
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+
+  const RE2 regex_split;
+
+  explicit SplitRegexTransform(SplitPatternOptions options)
+      : Base(options), regex_split(MakePattern(options)) {}
+
+  static std::string MakePattern(const SplitPatternOptions& options) {
+    // RE2 does *not* give you the full match! Must wrap the regex in a capture group
+    // There is FindAndConsume, but it would give only the end of the separator
+    std::string pattern = "(";
+    pattern.reserve(options.pattern.size() + 2);
+    pattern += options.pattern;
+    pattern += ')';
+    return pattern;
+  }
+
+  Status CheckOptions() {
+    if (Base::options.reverse) {
+      return Status::NotImplemented("Cannot split in reverse with regex");
+    }
+    return RegexStatus(regex_split);
+  }
+
+  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
+            const uint8_t** separator_end, const SplitOptions& options) {
+    re2::StringPiece piece(reinterpret_cast<const char*>(begin),
+                           std::distance(begin, end));
+    // "StringPiece is mutated to point to matched piece"
+    re2::StringPiece result;
+    if (!re2::RE2::PartialMatch(piece, regex_split, &result)) {
+      return false;
+    }
+    *separator_begin = reinterpret_cast<const uint8_t*>(result.data());
+    *separator_end = reinterpret_cast<const uint8_t*>(result.data() + result.size());
+    return true;
+  }
+  bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitOptions& options) {
+    // Not easily supportable, unfortunately
+    return false;
+  }
+};
+
+const FunctionDoc split_pattern_regex_doc(
+    "Split string according to regex pattern",
+    ("Split each string according to the regex `pattern` defined in\n"
+     "SplitPatternOptions.  The output for each string input is a list\n"
+     "of strings.\n"
+     "\n"
+     "The maximum number of splits and direction of splitting\n"
+     "(forward, reverse) can optionally be defined in SplitPatternOptions."),
+    {"strings"}, "SplitPatternOptions");
+
+void AddSplitRegex(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("split_pattern_regex", Arity::Unary(),
+                                               &split_pattern_regex_doc);
+  using t32 = SplitRegexTransform<StringType, ListType>;
+  using t64 = SplitRegexTransform<LargeStringType, ListType>;
+  DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
+  DCHECK_OK(
+      func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+#endif
+
 void AddSplit(FunctionRegistry* registry) {
   AddSplitPattern(registry);
   AddSplitWhitespaceAscii(registry);
 #ifdef ARROW_WITH_UTF8PROC
   AddSplitWhitespaceUTF8(registry);
 #endif
+#ifdef ARROW_WITH_RE2
+  AddSplitRegex(registry);
+#endif
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index c20af503ca9..dfcfaf969c7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -527,6 +527,34 @@ TYPED_TEST(TestStringKernels, SplitWhitespaceUTF8Reverse) {
                    &options_max);
 }
 
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestStringKernels, SplitRegex) {
+  SplitPatternOptions options{"a+|b"};
+
+  this->CheckUnary(
+      "split_pattern_regex", R"(["aaaab", "foob", "foo bar", "foo", "AaaaBaaaC", null])",
+      list(this->type()),
+      R"([["", "", ""], ["foo", ""], ["foo ", "", "r"], ["foo"], ["A", "B", "C"], null])",
+      &options);
+
+  options.max_splits = 1;
+  this->CheckUnary(
+      "split_pattern_regex", R"(["aaaab", "foob", "foo bar", "foo", "AaaaBaaaC", null])",
+      list(this->type()),
+      R"([["", "b"], ["foo", ""], ["foo ", "ar"], ["foo"], ["A", "BaaaC"], null])",
+      &options);
+}
+
+TYPED_TEST(TestStringKernels, SplitRegexReverse) {
+  SplitPatternOptions options{"a+|b", /*max_splits=*/1, /*reverse=*/true};
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      NotImplemented, ::testing::HasSubstr("Cannot split in reverse with regex"),
+      CallFunction("split_pattern_regex", {input}, &options));
+}
+#endif
+
 TYPED_TEST(TestStringKernels, ReplaceSubstring) {
   ReplaceSubstringOptions options{"foo", "bazz"};
   this->CheckUnary("replace_substring", R"(["foo", "this foo that foo", null])",
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index b20af43ac37..667d9ab2a71 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -581,18 +581,23 @@ when a positive ``max_splits`` is given.
 +==========================+============+=========================+===================+==================================+=========+
 | split_pattern            | Unary      | String-like             | List-like         | :struct:`SplitPatternOptions`    | \(1)    |
 +--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
-| utf8_split_whitespace    | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(2)    |
+| split_pattern_regex      | Unary      | String-like             | List-like         | :struct:`SplitPatternOptions`    | \(2)    |
 +--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
-| ascii_split_whitespace   | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(3)    |
+| utf8_split_whitespace    | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(3)    |
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+| ascii_split_whitespace   | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(4)    |
 +--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
 
 * \(1) The string is split when an exact pattern is found (the pattern itself
   is not included in the output).
 
-* \(2) A non-zero length sequence of Unicode defined whitespace codepoints
+* \(2) The string is split when a regex match is found (the matched
+  substring itself is not included in the output).
+
+* \(3) A non-zero length sequence of Unicode defined whitespace codepoints
   is seen as separator.
 
-* \(3) A non-zero length sequence of ASCII defined whitespace bytes
+* \(4) A non-zero length sequence of ASCII defined whitespace bytes
   (``'\t'``, ``'\n'``, ``'\v'``, ``'\f'``, ``'\r'``  and ``' '``) is seen
   as separator.
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 2e841c54886..ead1f2408c4 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -139,6 +139,17 @@ a byte-by-byte basis.
 
    string_is_ascii
 
+String Splitting
+----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   split_pattern
+   split_pattern_regex
+   ascii_split_whitespace
+   utf8_split_whitespace
+
 String Transforms
 -----------------
 
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index db4c6ba1fe5..b014dcc0c8a 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -364,6 +364,22 @@ def test_split_whitespace_ascii():
     assert expected.equals(result)
 
 
+def test_split_pattern_regex():
+    arr = pa.array(["-foo---bar--", "---foo---b"])
+    result = pc.split_pattern_regex(arr, pattern="-+")
+    expected = pa.array([["", "foo", "bar", ""], ["", "foo", "b"]])
+    assert expected.equals(result)
+
+    result = pc.split_pattern_regex(arr, pattern="-+", max_splits=1)
+    expected = pa.array([["", "foo---bar--"], ["", "foo---b"]])
+    assert expected.equals(result)
+
+    with pytest.raises(NotImplementedError,
+                       match="Cannot split in reverse with regex"):
+        result = pc.split_pattern_regex(
+            arr, pattern="---", max_splits=1, reverse=True)
+
+
 def test_min_max():
     # An example generated function wrapper with possible options
     data = [4, 5, 6, None, 1]
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index bee06a7cb6a..e3ff5cecebd 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -201,17 +201,7 @@ nse_funcs$strsplit <- function(x,
                                useBytes = FALSE) {
   assert_that(is.string(split))
 
-  # The Arrow C++ library does not support splitting a string by a regular
-  # expression pattern (ARROW-12608) but the default behavior of
-  # base::strsplit() is to interpret the split pattern as a regex
-  # (fixed = FALSE). R users commonly pass non-regex split patterns to
-  # strsplit() without bothering to set fixed = TRUE. It would be annoying if
-  # that didn't work here. So: if fixed = FALSE, let's check the split pattern
-  # to see if it is a regex (if it contains any regex metacharacters). If not,
-  # then allow to proceed.
-  if (!fixed && contains_regex(split)) {
-    arrow_not_supported("Regular expression matching in strsplit()")
-  }
+  arrow_fun <- ifelse(fixed, "split_pattern", "split_pattern_regex")
   # warn when the user specifies both fixed = TRUE and perl = TRUE, for
   # consistency with the behavior of base::strsplit()
   if (fixed && perl) {
@@ -221,7 +211,7 @@ nse_funcs$strsplit <- function(x,
   # regardless of the value of perl, for consistency with the behavior of
   # base::strsplit()
   Expression$create(
-    "split_pattern",
+    arrow_fun,
     x,
     options = list(pattern = split, reverse = FALSE, max_splits = -1L)
   )
@@ -229,9 +219,7 @@ nse_funcs$strsplit <- function(x,
 
 nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
   opts <- get_stringr_pattern_options(enexpr(pattern))
-  if (!opts$fixed && contains_regex(opts$pattern)) {
-    arrow_not_supported("Regular expression matching in str_split()")
-  }
+  arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex")
   if (opts$ignore_case) {
     arrow_not_supported("Case-insensitive string splitting")
   }
@@ -249,7 +237,7 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
   # str_split() controls the maximum number of pieces to return. So we must
   # subtract 1 from n to get max_splits.
   Expression$create(
-    "split_pattern",
+    arrow_fun,
     string,
     options = list(
       pattern =
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index b8a5b89d931..90c7b4129c7 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -234,7 +234,7 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_replacements);
   }
 
-  if (func_name == "split_pattern") {
+  if (func_name == "split_pattern" || func_name == "split_pattern_regex") {
     using Options = arrow::compute::SplitPatternOptions;
     int64_t max_splits = -1;
     if (!Rf_isNull(options["max_splits"])) {
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index fb5e6752709..bb4794ef4c5 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -271,6 +271,12 @@ test_that("strsplit and str_split", {
       collect(),
     df
   )
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = strsplit(x, " +and +")) %>%
+      collect(),
+    df
+  )
   expect_dplyr_equal(
     input %>%
       mutate(x = str_split(x, "and")) %>%
@@ -295,7 +301,12 @@ test_that("strsplit and str_split", {
       collect(),
     df
   )
-
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_split(x, "Foo|bar", n = 2)) %>%
+      collect(),
+    df
+  )
 })
 
 test_that("arrow_*_split_whitespace functions", {
@@ -352,21 +363,6 @@ test_that("errors and warnings in string splitting", {
   # so here we can just call the functions directly
 
   x <- Expression$field_ref("x")
-  expect_error(
-    nse_funcs$strsplit(x, "and.*", fixed = FALSE),
-    'Regular expression matching in strsplit() not supported by Arrow',
-    fixed = TRUE
-  )
-  expect_error(
-    nse_funcs$str_split(x, "and.?"),
-    'Regular expression matching in str_split() not supported by Arrow',
-    fixed = TRUE
-  )
-  expect_error(
-    nse_funcs$str_split(x, regex("and.*")),
-    'Regular expression matching in str_split() not supported by Arrow',
-    fixed = TRUE
-  )
   expect_error(
     nse_funcs$str_split(x, fixed("and", ignore_case = TRUE)),
     "Case-insensitive string splitting not supported by Arrow"

From 52e6d6984908e235fb1ffb1e11f177de38ce3500 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Fri, 21 May 2021 09:24:09 -0400
Subject: [PATCH 281/719] ARROW-12713  [C++] String reverse kernel

This PR adds ascii and utf8 reverse kernels.

Closes #10317 from nirandaperera/ARROW-12713

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/compute/kernels/scalar_string.cc    | 67 ++++++++++++++++++-
 .../compute/kernels/scalar_string_test.cc     | 25 +++++++
 cpp/src/arrow/util/utf8.cc                    |  2 +
 cpp/src/arrow/util/utf8.h                     | 14 ++++
 docs/source/cpp/compute.rst                   | 33 ++++++---
 docs/source/python/api/compute.rst            |  2 +
 6 files changed, 129 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 3915951969f..23a94d9eb92 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -142,6 +142,11 @@ struct StringTransform {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     return Derived().Execute(ctx, batch, out);
   }
+
+  static Status InvalidStatus() {
+    return Status::Invalid("Invalid UTF8 sequence in input");
+  }
+
   Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
       const ArrayData& input = *batch[0].array();
@@ -173,7 +178,7 @@ struct StringTransform {
         if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
                 input_string, input_string_ncodeunits, output_str + output_ncodeunits,
                 &encoded_nbytes))) {
-          return Status::Invalid("Invalid UTF8 sequence in input");
+          return Derived::InvalidStatus();
         }
         output_ncodeunits += encoded_nbytes;
         output_string_offsets[i + 1] = output_ncodeunits;
@@ -199,7 +204,7 @@ struct StringTransform {
         if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
                 input.value->data(), data_nbytes, value_buffer->mutable_data(),
                 &encoded_nbytes))) {
-          return Status::Invalid("Invalid UTF8 sequence in input");
+          return Derived::InvalidStatus();
         }
         RETURN_NOT_OK(value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true));
       }
@@ -266,6 +271,45 @@ void EnsureLookupTablesFilled() {}
 
 #endif  // ARROW_WITH_UTF8PROC
 
+template <typename Type>
+struct AsciiReverse : StringTransform<Type, AsciiReverse<Type>> {
+  using Base = StringTransform<Type, AsciiReverse<Type>>;
+  using offset_type = typename Base::offset_type;
+
+  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
+                 uint8_t* output, offset_type* output_written) {
+    uint8_t utf8_char_found = 0;
+    for (offset_type i = 0; i < input_string_ncodeunits; i++) {
+      // if a utf8 char is found, report to utf8_char_found
+      utf8_char_found |= input[i] & 0x80;
+      output[input_string_ncodeunits - i - 1] = input[i];
+    }
+    *output_written = input_string_ncodeunits;
+    return utf8_char_found == 0;
+  }
+
+  static Status InvalidStatus() { return Status::Invalid("Non-ASCII sequence in input"); }
+};
+
+template <typename Type>
+struct Utf8Reverse : StringTransform<Type, Utf8Reverse<Type>> {
+  using Base = StringTransform<Type, Utf8Reverse<Type>>;
+  using offset_type = typename Base::offset_type;
+
+  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
+                 uint8_t* output, offset_type* output_written) {
+    offset_type i = 0;
+    while (i < input_string_ncodeunits) {
+      uint8_t offset = util::ValidUtf8CodepointByteSize(input + i);
+      offset_type stride = std::min(i + offset, input_string_ncodeunits);
+      std::copy(input + i, input + stride, output + input_string_ncodeunits - stride);
+      i += offset;
+    }
+    *output_written = input_string_ncodeunits;
+    return true;
+  }
+};
+
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
 
 // Transform a buffer of offsets to one which begins with 0 and has same
@@ -2482,7 +2526,7 @@ const FunctionDoc ascii_upper_doc(
 const FunctionDoc ascii_lower_doc(
     "Transform ASCII input to lowercase",
     ("For each string in `strings`, return a lowercase version.\n\n"
-     "This function assumes the input is fully ASCII.  It it may contain\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
      "non-ASCII characters, use \"utf8_lower\" instead."),
     {"strings"});
 
@@ -2494,6 +2538,21 @@ const FunctionDoc utf8_lower_doc(
     "Transform input to lowercase",
     ("For each string in `strings`, return a lowercase version."), {"strings"});
 
+const FunctionDoc ascii_reverse_doc(
+    "Reverse ASCII input",
+    ("For each ASCII string in `strings`, return a reversed version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_reverse\" instead."),
+    {"strings"});
+
+const FunctionDoc utf8_reverse_doc(
+    "Reverse utf8 input",
+    ("For each utf8 string in `strings`, return a reversed version.\n\n"
+     "This function operates on codepoints/UTF-8 code units, not grapheme\n"
+     "clusters. Hence, it will not correctly reverse grapheme clusters\n"
+     "composed of multiple codepoints."),
+    {"strings"});
+
 }  // namespace
 
 void RegisterScalarStringAscii(FunctionRegistry* registry) {
@@ -2509,6 +2568,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
                                                    &ascii_ltrim_whitespace_doc);
   MakeUnaryStringBatchKernel<AsciiRTrimWhitespace>("ascii_rtrim_whitespace", registry,
                                                    &ascii_rtrim_whitespace_doc);
+  MakeUnaryStringBatchKernel<AsciiReverse>("ascii_reverse", registry, &ascii_reverse_doc);
+  MakeUnaryStringBatchKernel<Utf8Reverse>("utf8_reverse", registry, &utf8_reverse_doc);
   MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry,
                                                  &ascii_lower_doc);
   MakeUnaryStringBatchKernelWithState<AsciiLTrim>("ascii_ltrim", registry,
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index dfcfaf969c7..5ec7f579fff 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -91,6 +91,31 @@ TYPED_TEST(TestStringKernels, AsciiLower) {
                    "[\"aaazzæÆ&\", null, \"\", \"bbb\"]");
 }
 
+TYPED_TEST(TestStringKernels, AsciiReverse) {
+  this->CheckUnary("ascii_reverse", "[]", this->type(), "[]");
+  this->CheckUnary("ascii_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
+                   R"(["dcba", null, "", "bbb"])");
+
+  Datum invalid_input = ArrayFromJSON(this->type(), R"(["aAazZæÆ&", null, "", "bbb"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  testing::HasSubstr("Non-ASCII sequence in input"),
+                                  CallFunction("ascii_reverse", {invalid_input}));
+}
+
+TYPED_TEST(TestStringKernels, Utf8Reverse) {
+  this->CheckUnary("utf8_reverse", "[]", this->type(), "[]");
+  this->CheckUnary("utf8_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
+                   R"(["dcba", null, "", "bbb"])");
+  this->CheckUnary("utf8_reverse", R"(["aAazZæÆ&", null, "", "bbb", "ɑɽⱤæÆ"])",
+                   this->type(), R"(["&ÆæZzaAa", null, "", "bbb", "ÆæⱤɽɑ"])");
+
+  // inputs with malformed utf8 chars would produce garbage output, but the end result
+  // would produce arrays with same lengths. Hence checking offset buffer equality
+  auto malformed_input = ArrayFromJSON(this->type(), "[\"ɑ\xFFɑa\", \"ɽ\xe1\xbdɽa\"]");
+  const Result<Datum>& res = CallFunction("utf8_reverse", {malformed_input});
+  ASSERT_TRUE(res->array()->buffers[1]->Equals(*malformed_input->data()->buffers[1]));
+}
+
 TEST(TestStringKernels, LARGE_MEMORY_TEST(Utf8Upper32bitGrowth)) {
   // 0x7fff * 0xffff is the max a 32 bit string array can hold
   // since the utf8_upper kernel can grow it by 3/2, the max we should accept is is
diff --git a/cpp/src/arrow/util/utf8.cc b/cpp/src/arrow/util/utf8.cc
index 478d8ade95f..11394d2e64c 100644
--- a/cpp/src/arrow/util/utf8.cc
+++ b/cpp/src/arrow/util/utf8.cc
@@ -64,6 +64,8 @@ const uint8_t utf8_small_table[] = { // NOLINT
 
 uint16_t utf8_large_table[9 * 256] = {0xffff};
 
+const uint8_t utf8_byte_size_table[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
+
 static void InitializeLargeTable() {
   for (uint32_t state = 0; state < 9; ++state) {
     for (uint32_t byte = 0; byte < 256; ++byte) {
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index 2d94ca4986e..310d6913403 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -65,6 +65,8 @@ static constexpr uint8_t kUTF8DecodeReject = 12;
 // In this table states are multiples of 256.
 ARROW_EXPORT extern uint16_t utf8_large_table[9 * 256];
 
+ARROW_EXPORT extern const uint8_t utf8_byte_size_table[16];
+
 // Success / reject states when looked up in the large table
 static constexpr uint16_t kUTF8ValidateAccept = 0;
 static constexpr uint16_t kUTF8ValidateReject = 256;
@@ -293,6 +295,18 @@ Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size);
 
 static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000;
 
+// size of a valid UTF8 can be determined by looking at leading 4 bits of BYTE1
+// utf8_byte_size_table[0..7] --> pure ascii chars --> 1B length
+// utf8_byte_size_table[8..11] --> internal bytes --> 1B length
+// utf8_byte_size_table[12,13] --> 2B long UTF8 chars
+// utf8_byte_size_table[14] --> 3B long UTF8 chars
+// utf8_byte_size_table[15] --> 4B long UTF8 chars
+// NOTE: Results for invalid/ malformed utf-8 sequences are undefined.
+// ex: \xFF... returns 4B
+static inline uint8_t ValidUtf8CodepointByteSize(const uint8_t* codeunit) {
+  return internal::utf8_byte_size_table[*codeunit >> 4];
+}
+
 static inline bool Utf8IsContinuation(const uint8_t codeunit) {
   return (codeunit & 0xC0) == 0x80;  // upper two bits should be 10
 }
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 667d9ab2a71..79140257a9b 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -439,47 +439,58 @@ String transforms
 +==========================+============+=========================+=====================+=========+=======================================+
 | ascii_lower              | Unary      | String-like             | String-like         | \(1)    |                                       |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
+| ascii_reverse            | Unary      | String-like             | String-like         | \(2)    |                                       |
++--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
 | ascii_upper              | Unary      | String-like             | String-like         | \(1)    |                                       |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| binary_length            | Unary      | Binary- or String-like  | Int32 or Int64      | \(2)    |                                       |
+| binary_length            | Unary      | Binary- or String-like  | Int32 or Int64      | \(3)    |                                       |
++--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
+| replace_substring        | Unary      | String-like             | String-like         | \(4)    | :struct:`ReplaceSubstringOptions`     |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| replace_substring        | Unary      | String-like             | String-like         | \(3)    | :struct:`ReplaceSubstringOptions`     |
+| replace_substring_regex  | Unary      | String-like             | String-like         | \(5)    | :struct:`ReplaceSubstringOptions`     |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| replace_substring_regex  | Unary      | String-like             | String-like         | \(4)    | :struct:`ReplaceSubstringOptions`     |
+| utf8_length              | Unary      | String-like             | Int32 or Int64      | \(6)    |                                       |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_length              | Unary      | String-like             | Int32 or Int64      | \(5)    |                                       |
+| utf8_lower               | Unary      | String-like             | String-like         | \(7)    |                                       |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_lower               | Unary      | String-like             | String-like         | \(6)    |                                       |
+| utf8_reverse             | Unary      | String-like             | String-like         | \(8)    |                                       |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_upper               | Unary      | String-like             | String-like         | \(6)    |                                       |
+| utf8_upper               | Unary      | String-like             | String-like         | \(7)    |                                       |
 +--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
 
 
 * \(1) Each ASCII character in the input is converted to lowercase or
   uppercase.  Non-ASCII characters are left untouched.
 
-* \(2) Output is the physical length in bytes of each input element.  Output
+* \(2) ASCII input is reversed to the output. If non-ASCII characters 
+  are present, ``Invalid`` :class:`Status` will be returned.
+
+* \(3) Output is the physical length in bytes of each input element.  Output
   type is Int32 for Binary / String, Int64 for LargeBinary / LargeString.
 
-* \(3) Replace non-overlapping substrings that match to
+* \(4) Replace non-overlapping substrings that match to
   :member:`ReplaceSubstringOptions::pattern` by
   :member:`ReplaceSubstringOptions::replacement`. If
   :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
   maximum number of replacements made, counting from the left.
 
-* \(4) Replace non-overlapping substrings that match to the regular expression
+* \(5) Replace non-overlapping substrings that match to the regular expression
   :member:`ReplaceSubstringOptions::pattern` by
   :member:`ReplaceSubstringOptions::replacement`, using the Google RE2 library. If
   :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
   maximum number of replacements made, counting from the left. Note that if the
   pattern contains groups, backreferencing can be used.
 
-* \(5) Output is the number of characters (not bytes) of each input element.
+* \(6) Output is the number of characters (not bytes) of each input element.
   Output type is Int32 for String, Int64 for LargeString. 
 
-* \(6) Each UTF8-encoded character in the input is converted to lowercase or
+* \(7) Each UTF8-encoded character in the input is converted to lowercase or
   uppercase.
 
+* \(8) Each UTF8-encoded code unit is written in reverse order to the output.
+  If the input is not valid UTF8, then the output is undefined (but the size of output
+  buffers will be preserved).
+
 
 String trimming
 ~~~~~~~~~~~~~~~
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index ead1f2408c4..d206cbc9e50 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -157,8 +157,10 @@ String Transforms
    :toctree: ../generated/
 
    ascii_lower
+   ascii_reverse
    ascii_upper
    utf8_lower
+   utf8_reverse
    utf8_upper
 
 Containment tests

From 78c88a9f517b540ea010a6bd6a8c8f6d91769559 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 21 May 2021 11:32:58 -0400
Subject: [PATCH 282/719] ARROW-12468: [Python][R] Expose
 ScannerBuilder::UseAsync to Python & R

This exposes (in python & R) the `UseAsync` method added to `ScannerBuilder` in ARROW-12289.

Closes #10118 from westonpace/feature/ARROW-12468--c-expose-useasync-to-python-r

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/_dataset.pyx                  |  25 +-
 python/pyarrow/dataset.py                    |   8 +-
 python/pyarrow/includes/libarrow_dataset.pxd |   1 +
 python/pyarrow/tests/test_dataset.py         | 386 +++++++++++--------
 r/R/arrowExports.R                           |   4 +
 r/R/dataset-scan.R                           |  16 +
 r/man/Scanner.Rd                             |   3 +
 r/src/arrowExports.cpp                       |  18 +
 r/src/dataset.cpp                            |   6 +
 r/tests/testthat/test-dataset.R              |   7 +
 10 files changed, 315 insertions(+), 159 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 3e89ce14045..356bf8ce9c7 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -393,6 +393,11 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        use_async : bool, default False
+            If enabled, an async scanner will be used that should offer
+            better performance with high-latency/highly-parallel filesystems
+            (e.g. S3)
+
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -2645,7 +2650,7 @@ _DEFAULT_BATCH_SIZE = 2**20
 cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr,
                             object columns=None, Expression filter=None,
                             int batch_size=_DEFAULT_BATCH_SIZE,
-                            bint use_threads=True,
+                            bint use_threads=True, bint use_async=False,
                             MemoryPool memory_pool=None,
                             FragmentScanOptions fragment_scan_options=None)\
         except *:
@@ -2681,6 +2686,7 @@ cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr,
 
     check_status(builder.BatchSize(batch_size))
     check_status(builder.UseThreads(use_threads))
+    check_status(builder.UseAsync(use_async))
     if memory_pool:
         check_status(builder.Pool(maybe_unbox_memory_pool(memory_pool)))
     if fragment_scan_options:
@@ -2721,6 +2727,10 @@ cdef class Scanner(_Weakrefable):
     use_threads : bool, default True
         If enabled, then maximum parallelism will be used determined by
         the number of available CPU cores.
+    use_async : bool, default False
+        If enabled, an async scanner will be used that should offer
+        better performance with high-latency/highly-parallel filesystems
+        (e.g. S3)
     memory_pool : MemoryPool, default None
         For memory allocations, if required. If not specified, uses the
         default pool.
@@ -2748,7 +2758,8 @@ cdef class Scanner(_Weakrefable):
 
     @staticmethod
     def from_dataset(Dataset dataset not None,
-                     bint use_threads=True, MemoryPool memory_pool=None,
+                     bint use_threads=True, bint use_async=False,
+                     MemoryPool memory_pool=None,
                      object columns=None, Expression filter=None,
                      int batch_size=_DEFAULT_BATCH_SIZE,
                      FragmentScanOptions fragment_scan_options=None):
@@ -2760,7 +2771,7 @@ cdef class Scanner(_Weakrefable):
         builder = make_shared[CScannerBuilder](dataset.unwrap(), options)
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, use_threads=use_threads,
-                          memory_pool=memory_pool,
+                          use_async=use_async, memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
 
         scanner = GetResultValue(builder.get().Finish())
@@ -2768,7 +2779,8 @@ cdef class Scanner(_Weakrefable):
 
     @staticmethod
     def from_fragment(Fragment fragment not None, Schema schema=None,
-                      bint use_threads=True, MemoryPool memory_pool=None,
+                      bint use_threads=True, bint use_async=False,
+                      MemoryPool memory_pool=None,
                       object columns=None, Expression filter=None,
                       int batch_size=_DEFAULT_BATCH_SIZE,
                       FragmentScanOptions fragment_scan_options=None):
@@ -2783,7 +2795,7 @@ cdef class Scanner(_Weakrefable):
                                                fragment.unwrap(), options)
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, use_threads=use_threads,
-                          memory_pool=memory_pool,
+                          use_async=use_async, memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
 
         scanner = GetResultValue(builder.get().Finish())
@@ -2791,6 +2803,7 @@ cdef class Scanner(_Weakrefable):
 
     @staticmethod
     def from_batches(source, Schema schema=None, bint use_threads=True,
+                     bint use_async=False,
                      MemoryPool memory_pool=None, object columns=None,
                      Expression filter=None,
                      int batch_size=_DEFAULT_BATCH_SIZE,
@@ -2824,7 +2837,7 @@ cdef class Scanner(_Weakrefable):
         builder = CScannerBuilder.FromRecordBatchReader(reader.reader)
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, use_threads=use_threads,
-                          memory_pool=memory_pool,
+                          use_async=use_async, memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
         scanner = GetResultValue(builder.get().Finish())
         return Scanner.wrap(scanner)
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index e80de1688e7..b93f492dd38 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -690,7 +690,7 @@ def _ensure_write_partitioning(scheme):
 def write_dataset(data, base_dir, basename_template=None, format=None,
                   partitioning=None, schema=None,
                   filesystem=None, file_options=None, use_threads=True,
-                  max_partitions=None):
+                  use_async=False, max_partitions=None):
     """
     Write a dataset to a given format and partitioning.
 
@@ -725,6 +725,10 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
     use_threads : bool, default True
         Write files in parallel. If enabled, then maximum parallelism will be
         used determined by the number of available CPU cores.
+    use_async : bool, default False
+        If enabled, an async scanner will be used that should offer
+        better performance with high-latency/highly-parallel filesystems
+        (e.g. S3)
     max_partitions : int, default 1024
         Maximum number of partitions any batch may be written into.
     """
@@ -770,7 +774,7 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
     filesystem, base_dir = _resolve_filesystem_and_path(base_dir, filesystem)
 
     if isinstance(data, Dataset):
-        scanner = data.scanner(use_threads=use_threads)
+        scanner = data.scanner(use_threads=use_threads, use_async=use_async)
     else:
         # scanner was passed directly by the user, in which case a schema
         # cannot be passed
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index c3cfe01538d..96c9648f920 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -131,6 +131,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
         CStatus Filter(CExpression filter)
         CStatus UseThreads(c_bool use_threads)
+        CStatus UseAsync(c_bool use_async)
         CStatus Pool(CMemoryPool* pool)
         CStatus BatchSize(int64_t batch_size)
         CStatus FragmentScanOptions(
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 2d83ee1f437..982bbe31f74 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -149,7 +149,7 @@ def multisourcefs(request):
     df = _generate_data(1000)
     mockfs = fs._MockFileSystem()
 
-    # simply split the dataframe into three chunks to construct a data source
+    # simply split the dataframe into four chunks to construct a data source
     # from each chunk into its own directory
     df_a, df_b, df_c, df_d = np.array_split(df, 4)
 
@@ -206,6 +206,63 @@ def dataset(mockfs):
     return factory.finish()
 
 
+@pytest.fixture(params=[
+    (True, True),
+    (True, False),
+    (False, True),
+    (False, False)
+], ids=['threaded-async', 'threaded-sync', 'serial-async', 'serial-sync'])
+def dataset_reader(request):
+    '''
+    Fixture which allows dataset scanning operations to be
+    run with/without threads and with/without async
+    '''
+    use_threads, use_async = request.param
+
+    class reader:
+
+        def __init__(self):
+            self.use_threads = use_threads
+            self.use_async = use_async
+
+        def _patch_kwargs(self, kwargs):
+            if 'use_threads' in kwargs:
+                raise Exception(
+                    ('Invalid use of dataset_reader, do not specify'
+                     ' use_threads'))
+            if 'use_async' in kwargs:
+                raise Exception(
+                    'Invalid use of dataset_reader, do not specify use_async')
+            kwargs['use_threads'] = use_threads
+            kwargs['use_async'] = use_async
+
+        def to_table(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.to_table(**kwargs)
+
+        def to_batches(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.to_batches(**kwargs)
+
+        def scanner(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.scanner(**kwargs)
+
+        def head(self, dataset, num_rows, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.head(num_rows, **kwargs)
+
+        def take(self, dataset, indices, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.take(indices, **kwargs)
+
+        def count_rows(self, dataset, **kwargs):
+            self._patch_kwargs(kwargs)
+            return dataset.count_rows(**kwargs)
+
+    return reader()
+
+
 def test_filesystem_dataset(mockfs):
     schema = pa.schema([
         pa.field('const', pa.int64())
@@ -277,7 +334,7 @@ def test_filesystem_dataset(mockfs):
         ds.FileSystemDataset.from_paths(fragments, format=file_format)
 
 
-def test_filesystem_dataset_no_filesystem_interaction():
+def test_filesystem_dataset_no_filesystem_interaction(dataset_reader):
     # ARROW-8283
     schema = pa.schema([
         pa.field('f1', pa.int64())
@@ -296,10 +353,10 @@ def test_filesystem_dataset_no_filesystem_interaction():
 
     # scanning does raise
     with pytest.raises(FileNotFoundError):
-        dataset.to_table()
+        dataset_reader.to_table(dataset)
 
 
-def test_dataset(dataset):
+def test_dataset(dataset, dataset_reader):
     assert isinstance(dataset, ds.Dataset)
     assert isinstance(dataset.schema, pa.Schema)
 
@@ -307,16 +364,16 @@ def test_dataset(dataset):
     expected_i64 = pa.array([0, 1, 2, 3, 4], type=pa.int64())
     expected_f64 = pa.array([0, 1, 2, 3, 4], type=pa.float64())
 
-    for batch in dataset.to_batches():
+    for batch in dataset_reader.to_batches(dataset):
         assert isinstance(batch, pa.RecordBatch)
         assert batch.column(0).equals(expected_i64)
         assert batch.column(1).equals(expected_f64)
 
-    for batch in dataset.scanner().scan_batches():
+    for batch in dataset_reader.scanner(dataset).scan_batches():
         assert isinstance(batch, ds.TaggedRecordBatch)
         assert isinstance(batch.fragment, ds.Fragment)
 
-    table = dataset.to_table()
+    table = dataset_reader.to_table(dataset)
     assert isinstance(table, pa.Table)
     assert len(table) == 10
 
@@ -330,15 +387,16 @@ def test_dataset(dataset):
     assert sorted(result['key']) == ['xxx', 'yyy']
 
 
-def test_scanner(dataset):
-    scanner = dataset.scanner(memory_pool=pa.default_memory_pool())
+def test_scanner(dataset, dataset_reader):
+    scanner = dataset_reader.scanner(
+        dataset, memory_pool=pa.default_memory_pool())
     assert isinstance(scanner, ds.Scanner)
 
     with pytest.raises(pa.ArrowInvalid):
-        dataset.scanner(columns=['unknown'])
+        dataset_reader.scanner(dataset, columns=['unknown'])
 
-    scanner = dataset.scanner(columns=['i64'],
-                              memory_pool=pa.default_memory_pool())
+    scanner = dataset_reader.scanner(dataset, columns=['i64'],
+                                     memory_pool=pa.default_memory_pool())
     assert scanner.dataset_schema == dataset.schema
     assert scanner.projected_schema == pa.schema([("i64", pa.int64())])
 
@@ -359,18 +417,18 @@ def test_scanner(dataset):
     assert table.num_rows == scanner.count_rows()
 
 
-def test_head(dataset):
-    result = dataset.head(0)
+def test_head(dataset, dataset_reader):
+    result = dataset_reader.head(dataset, 0)
     assert result == pa.Table.from_batches([], schema=dataset.schema)
 
-    result = dataset.head(1, columns=['i64']).to_pydict()
+    result = dataset_reader.head(dataset, 1, columns=['i64']).to_pydict()
     assert result == {'i64': [0]}
 
-    result = dataset.head(2, columns=['i64'],
-                          filter=ds.field('i64') > 1).to_pydict()
+    result = dataset_reader.head(dataset, 2, columns=['i64'],
+                                 filter=ds.field('i64') > 1).to_pydict()
     assert result == {'i64': [2, 3]}
 
-    result = dataset.head(1024, columns=['i64']).to_pydict()
+    result = dataset_reader.head(dataset, 1024, columns=['i64']).to_pydict()
     assert result == {'i64': list(range(5)) * 2}
 
     fragment = next(dataset.get_fragments())
@@ -381,30 +439,34 @@ def test_head(dataset):
     assert result == {'i64': list(range(5))}
 
 
-def test_take(dataset):
+def test_take(dataset, dataset_reader):
     fragment = next(dataset.get_fragments())
     indices = pa.array([1, 3])
-    assert fragment.take(indices) == fragment.to_table().take(indices)
+    assert dataset_reader.take(
+        fragment, indices) == dataset_reader.to_table(fragment).take(indices)
     with pytest.raises(IndexError):
-        fragment.take(pa.array([5]))
+        dataset_reader.take(fragment, pa.array([5]))
 
     indices = pa.array([1, 7])
-    assert dataset.take(indices) == dataset.to_table().take(indices)
+    assert dataset_reader.take(
+        dataset, indices) == dataset_reader.to_table(dataset).take(indices)
     with pytest.raises(IndexError):
-        dataset.take(pa.array([10]))
+        dataset_reader.take(dataset, pa.array([10]))
 
 
-def test_count_rows(dataset):
+def test_count_rows(dataset, dataset_reader):
     fragment = next(dataset.get_fragments())
-    assert fragment.count_rows() == 5
-    assert fragment.count_rows(filter=ds.field("i64") == 4) == 1
+    assert dataset_reader.count_rows(fragment) == 5
+    assert dataset_reader.count_rows(
+        fragment, filter=ds.field("i64") == 4) == 1
 
-    assert dataset.count_rows() == 10
+    assert dataset_reader.count_rows(dataset) == 10
     # Filter on partition key
-    assert dataset.count_rows(filter=ds.field("group") == 1) == 5
+    assert dataset_reader.count_rows(
+        dataset, filter=ds.field("group") == 1) == 5
     # Filter on data
-    assert dataset.count_rows(filter=ds.field("i64") >= 3) == 4
-    assert dataset.count_rows(filter=ds.field("i64") < 0) == 0
+    assert dataset_reader.count_rows(dataset, filter=ds.field("i64") >= 3) == 4
+    assert dataset_reader.count_rows(dataset, filter=ds.field("i64") < 0) == 0
 
 
 def test_abstract_classes():
@@ -731,7 +793,7 @@ def test_make_fragment(multisourcefs):
         assert row_group_fragment.row_groups == [0]
 
 
-def test_make_csv_fragment_from_buffer():
+def test_make_csv_fragment_from_buffer(dataset_reader):
     content = textwrap.dedent("""
         alpha,num,animal
         a,12,dog
@@ -747,14 +809,14 @@ def test_make_csv_fragment_from_buffer():
                          [12, 11, 10],
                          ['dog', 'cat', 'rabbit']],
                         names=['alpha', 'num', 'animal'])
-    assert fragment.to_table().equals(expected)
+    assert dataset_reader.to_table(fragment).equals(expected)
 
     pickled = pickle.loads(pickle.dumps(fragment))
-    assert pickled.to_table().equals(fragment.to_table())
+    assert dataset_reader.to_table(pickled).equals(fragment.to_table())
 
 
 @pytest.mark.parquet
-def test_make_parquet_fragment_from_buffer():
+def test_make_parquet_fragment_from_buffer(dataset_reader):
     import pyarrow.parquet as pq
 
     arrays = [
@@ -787,10 +849,10 @@ def test_make_parquet_fragment_from_buffer():
         buffer = out.getvalue()
 
         fragment = format_.make_fragment(buffer)
-        assert fragment.to_table().equals(table)
+        assert dataset_reader.to_table(fragment).equals(table)
 
         pickled = pickle.loads(pickle.dumps(fragment))
-        assert pickled.to_table().equals(table)
+        assert dataset_reader.to_table(pickled).equals(table)
 
 
 def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
@@ -815,7 +877,7 @@ def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments(tempdir):
+def test_fragments(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
     # list fragments
@@ -830,19 +892,20 @@ def test_fragments(tempdir):
     assert f.partition_expression.equals(ds.field('part') == 'a')
 
     # By default, the partition column is not part of the schema.
-    result = f.to_table()
+    result = dataset_reader.to_table(f)
     assert result.column_names == physical_names
     assert result.equals(table.remove_column(2).slice(0, 4))
 
     # scanning fragment includes partition columns when given the proper
     # schema.
-    result = f.to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(f, schema=dataset.schema)
     assert result.column_names == ['f1', 'f2', 'part']
     assert result.equals(table.slice(0, 4))
     assert f.physical_schema == result.schema.remove(2)
 
     # scanning fragments follow filter predicate
-    result = f.to_table(schema=dataset.schema, filter=ds.field('f1') < 2)
+    result = dataset_reader.to_table(
+        f, schema=dataset.schema, filter=ds.field('f1') < 2)
     assert result.column_names == ['f1', 'f2', 'part']
 
 
@@ -864,7 +927,7 @@ def test_fragments_implicit_cast(tempdir):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_reconstruct(tempdir):
+def test_fragments_reconstruct(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
     def assert_yields_projected(fragment, row_slice,
@@ -882,13 +945,15 @@ def assert_yields_projected(fragment, row_slice,
 
     # test pickle roundtrip
     pickled_fragment = pickle.loads(pickle.dumps(fragment))
-    assert pickled_fragment.to_table() == fragment.to_table()
+    assert dataset_reader.to_table(
+        pickled_fragment) == dataset_reader.to_table(fragment)
 
     # manually re-construct a fragment, with explicit schema
     new_fragment = parquet_format.make_fragment(
         fragment.path, fragment.filesystem,
         partition_expression=fragment.partition_expression)
-    assert new_fragment.to_table().equals(fragment.to_table())
+    assert dataset_reader.to_table(new_fragment).equals(
+        dataset_reader.to_table(fragment))
     assert_yields_projected(new_fragment, (0, 4))
 
     # filter / column projection, inspected schema
@@ -919,12 +984,12 @@ def assert_yields_projected(fragment, row_slice,
         new_fragment = parquet_format.make_fragment(
             fragment.path, fragment.filesystem,
             partition_expression=fragment.partition_expression)
-        new_fragment.to_table(filter=ds.field('part') == 'a')
+        dataset_reader.to_table(new_fragment, filter=ds.field('part') == 'a')
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_row_groups(tempdir):
+def test_fragments_parquet_row_groups(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
     fragment = list(dataset.get_fragments())[0]
@@ -932,7 +997,8 @@ def test_fragments_parquet_row_groups(tempdir):
     # list and scan row group fragments
     row_group_fragments = list(fragment.split_by_row_group())
     assert len(row_group_fragments) == fragment.num_row_groups == 2
-    result = row_group_fragments[0].to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(
+        row_group_fragments[0], schema=dataset.schema)
     assert result.column_names == ['f1', 'f2', 'part']
     assert len(result) == 2
     assert result.equals(table.slice(0, 2))
@@ -947,7 +1013,8 @@ def test_fragments_parquet_row_groups(tempdir):
     fragment = list(dataset.get_fragments(filter=ds.field('f1') < 1))[0]
     row_group_fragments = list(fragment.split_by_row_group(ds.field('f1') < 1))
     assert len(row_group_fragments) == 1
-    result = row_group_fragments[0].to_table(filter=ds.field('f1') < 1)
+    result = dataset_reader.to_table(
+        row_group_fragments[0], filter=ds.field('f1') < 1)
     assert len(result) == 1
 
 
@@ -973,7 +1040,7 @@ def test_fragments_parquet_num_row_groups(tempdir):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_row_groups_dictionary(tempdir):
+def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader):
     import pandas as pd
 
     df = pd.DataFrame(dict(col1=['a', 'b'], col2=[1, 2]))
@@ -984,7 +1051,7 @@ def test_fragments_parquet_row_groups_dictionary(tempdir):
 
     import pyarrow.dataset as ds
     dataset = ds.dataset(tempdir / 'test_filter_dictionary.parquet')
-    result = dataset.to_table(filter=ds.field("col1") == "a")
+    result = dataset_reader.to_table(dataset, filter=ds.field("col1") == "a")
 
     assert (df.iloc[0] == result.to_pandas()).all().all()
 
@@ -1182,7 +1249,7 @@ def test_fragments_parquet_row_groups_predicate(tempdir):
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_row_groups_reconstruct(tempdir):
+def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
     fragment = list(dataset.get_fragments())[0]
@@ -1191,23 +1258,25 @@ def test_fragments_parquet_row_groups_reconstruct(tempdir):
 
     # test pickle roundtrip
     pickled_fragment = pickle.loads(pickle.dumps(fragment))
-    assert pickled_fragment.to_table() == fragment.to_table()
+    assert dataset_reader.to_table(
+        pickled_fragment) == dataset_reader.to_table(fragment)
 
     # manually re-construct row group fragments
     new_fragment = parquet_format.make_fragment(
         fragment.path, fragment.filesystem,
         partition_expression=fragment.partition_expression,
         row_groups=[0])
-    result = new_fragment.to_table()
-    assert result.equals(row_group_fragments[0].to_table())
+    result = dataset_reader.to_table(new_fragment)
+    assert result.equals(dataset_reader.to_table(row_group_fragments[0]))
 
     # manually re-construct a row group fragment with filter/column projection
     new_fragment = parquet_format.make_fragment(
         fragment.path, fragment.filesystem,
         partition_expression=fragment.partition_expression,
         row_groups={1})
-    result = new_fragment.to_table(schema=table.schema, columns=['f1', 'part'],
-                                   filter=ds.field('f1') < 3, )
+    result = dataset_reader.to_table(
+        new_fragment, schema=table.schema, columns=['f1', 'part'],
+        filter=ds.field('f1') < 3, )
     assert result.column_names == ['f1', 'part']
     assert len(result) == 1
 
@@ -1217,12 +1286,13 @@ def test_fragments_parquet_row_groups_reconstruct(tempdir):
         partition_expression=fragment.partition_expression,
         row_groups={2})
     with pytest.raises(IndexError, match="references row group 2"):
-        new_fragment.to_table()
+        dataset_reader.to_table(new_fragment)
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_subset_ids(tempdir, open_logging_fs):
+def test_fragments_parquet_subset_ids(tempdir, open_logging_fs,
+                                      dataset_reader):
     fs, assert_opens = open_logging_fs
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=1,
                                                    filesystem=fs)
@@ -1236,21 +1306,22 @@ def test_fragments_parquet_subset_ids(tempdir, open_logging_fs):
         assert subfrag.row_groups[0].statistics is not None
 
     # check correct scan result of subset
-    result = subfrag.to_table()
+    result = dataset_reader.to_table(subfrag)
     assert result.to_pydict() == {"f1": [0, 3], "f2": [1, 1]}
 
     # empty list of ids
     subfrag = fragment.subset(row_group_ids=[])
     assert subfrag.num_row_groups == 0
     assert subfrag.row_groups == []
-    result = subfrag.to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(subfrag, schema=dataset.schema)
     assert result.num_rows == 0
     assert result.equals(table[:0])
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-def test_fragments_parquet_subset_filter(tempdir, open_logging_fs):
+def test_fragments_parquet_subset_filter(tempdir, open_logging_fs,
+                                         dataset_reader):
     fs, assert_opens = open_logging_fs
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=1,
                                                    filesystem=fs)
@@ -1264,14 +1335,14 @@ def test_fragments_parquet_subset_filter(tempdir, open_logging_fs):
         assert subfrag.row_groups[0].statistics is not None
 
     # check correct scan result of subset
-    result = subfrag.to_table()
+    result = dataset_reader.to_table(subfrag)
     assert result.to_pydict() == {"f1": [1, 2, 3], "f2": [1, 1, 1]}
 
     # filter that results in empty selection
     subfrag = fragment.subset(ds.field("f1") > 5)
     assert subfrag.num_row_groups == 0
     assert subfrag.row_groups == []
-    result = subfrag.to_table(schema=dataset.schema)
+    result = dataset_reader.to_table(subfrag, schema=dataset.schema)
     assert result.num_rows == 0
     assert result.equals(table[:0])
 
@@ -1507,14 +1578,14 @@ def _create_directory_of_files(base_dir):
     return (table1, table2), (path1, path2)
 
 
-def _check_dataset(dataset, table):
+def _check_dataset(dataset, table, dataset_reader):
     # also test that pickle roundtrip keeps the functionality
     for d in [dataset, pickle.loads(pickle.dumps(dataset))]:
         assert dataset.schema.equals(table.schema)
-        assert dataset.to_table().equals(table)
+        assert dataset_reader.to_table(dataset).equals(table)
 
 
-def _check_dataset_from_path(path, table, **kwargs):
+def _check_dataset_from_path(path, table, dataset_reader, **kwargs):
     # pathlib object
     assert isinstance(path, pathlib.Path)
 
@@ -1522,39 +1593,39 @@ def _check_dataset_from_path(path, table, **kwargs):
     for p in [path, str(path), [path], [str(path)]]:
         dataset = ds.dataset(path, **kwargs)
         assert isinstance(dataset, ds.FileSystemDataset)
-        _check_dataset(dataset, table)
+        _check_dataset(dataset, table, dataset_reader)
 
     # relative string path
     with change_cwd(path.parent):
         dataset = ds.dataset(path.name, **kwargs)
         assert isinstance(dataset, ds.FileSystemDataset)
-        _check_dataset(dataset, table)
+        _check_dataset(dataset, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_open_dataset_single_file(tempdir):
+def test_open_dataset_single_file(tempdir, dataset_reader):
     table, path = _create_single_file(tempdir)
-    _check_dataset_from_path(path, table)
+    _check_dataset_from_path(path, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_deterministic_row_order(tempdir):
+def test_deterministic_row_order(tempdir, dataset_reader):
     # ARROW-8447 Ensure that dataset.to_table (and Scanner::ToTable) returns a
     # deterministic row ordering. This is achieved by constructing a single
     # parquet file with one row per RowGroup.
     table, path = _create_single_file(tempdir, row_group_size=1)
-    _check_dataset_from_path(path, table)
+    _check_dataset_from_path(path, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_open_dataset_directory(tempdir):
+def test_open_dataset_directory(tempdir, dataset_reader):
     tables, _ = _create_directory_of_files(tempdir)
     table = pa.concat_tables(tables)
-    _check_dataset_from_path(tempdir, table)
+    _check_dataset_from_path(tempdir, table, dataset_reader)
 
 
 @pytest.mark.parquet
-def test_open_dataset_list_of_files(tempdir):
+def test_open_dataset_list_of_files(tempdir, dataset_reader):
     tables, (path1, path2) = _create_directory_of_files(tempdir)
     table = pa.concat_tables(tables)
 
@@ -1568,7 +1639,7 @@ def test_open_dataset_list_of_files(tempdir):
 
     for dataset in datasets:
         assert dataset.schema.equals(table.schema)
-        result = dataset.to_table()
+        result = dataset_reader.to_table(dataset)
         assert result.equals(table)
 
 
@@ -1592,7 +1663,7 @@ def test_open_dataset_filesystem_fspath(tempdir):
         ds.dataset(fspath, filesystem=fs._MockFileSystem())
 
 
-def test_construct_from_single_file(tempdir):
+def test_construct_from_single_file(tempdir, dataset_reader):
     directory = tempdir / 'single-file'
     directory.mkdir()
     table, path = _create_single_file(directory)
@@ -1607,10 +1678,11 @@ def test_construct_from_single_file(tempdir):
     # pickle roundtrip
     d4 = pickle.loads(pickle.dumps(d1))
 
-    assert d1.to_table() == d2.to_table() == d3.to_table() == d4.to_table()
+    assert dataset_reader.to_table(d1) == dataset_reader.to_table(
+        d2) == dataset_reader.to_table(d3) == dataset_reader.to_table(d4)
 
 
-def test_construct_from_single_directory(tempdir):
+def test_construct_from_single_directory(tempdir, dataset_reader):
     directory = tempdir / 'single-directory'
     directory.mkdir()
     tables, paths = _create_directory_of_files(directory)
@@ -1618,18 +1690,18 @@ def test_construct_from_single_directory(tempdir):
     d1 = ds.dataset(directory)
     d2 = ds.dataset(directory, filesystem=fs.LocalFileSystem())
     d3 = ds.dataset(directory.name, filesystem=_filesystem_uri(tempdir))
-    t1 = d1.to_table()
-    t2 = d2.to_table()
-    t3 = d3.to_table()
+    t1 = dataset_reader.to_table(d1)
+    t2 = dataset_reader.to_table(d2)
+    t3 = dataset_reader.to_table(d3)
     assert t1 == t2 == t3
 
     # test pickle roundtrip
     for d in [d1, d2, d3]:
         restored = pickle.loads(pickle.dumps(d))
-        assert restored.to_table() == t1
+        assert dataset_reader.to_table(restored) == t1
 
 
-def test_construct_from_list_of_files(tempdir):
+def test_construct_from_list_of_files(tempdir, dataset_reader):
     # instantiate from a list of files
     directory = tempdir / 'list-of-files'
     directory.mkdir()
@@ -1638,15 +1710,15 @@ def test_construct_from_list_of_files(tempdir):
     relative_paths = [p.relative_to(tempdir) for p in paths]
     with change_cwd(tempdir):
         d1 = ds.dataset(relative_paths)
-        t1 = d1.to_table()
+        t1 = dataset_reader.to_table(d1)
         assert len(t1) == sum(map(len, tables))
 
     d2 = ds.dataset(relative_paths, filesystem=_filesystem_uri(tempdir))
-    t2 = d2.to_table()
+    t2 = dataset_reader.to_table(d2)
     d3 = ds.dataset(paths)
-    t3 = d3.to_table()
+    t3 = dataset_reader.to_table(d3)
     d4 = ds.dataset(paths, filesystem=fs.LocalFileSystem())
-    t4 = d4.to_table()
+    t4 = dataset_reader.to_table(d4)
 
     assert t1 == t2 == t3 == t4
 
@@ -1764,7 +1836,7 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
         ds.InMemoryDataset([batch1, 0])
 
 
-def test_construct_in_memory():
+def test_construct_in_memory(dataset_reader):
     batch = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
     table = pa.Table.from_batches([batch])
 
@@ -1772,13 +1844,16 @@ def test_construct_in_memory():
 
     for source in (batch, table, [batch], [table]):
         dataset = ds.dataset(source)
-        assert dataset.to_table() == table
+        assert dataset_reader.to_table(dataset) == table
         assert len(list(dataset.get_fragments())) == 1
         assert next(dataset.get_fragments()).to_table() == table
         assert pa.Table.from_batches(list(dataset.to_batches())) == table
 
 
-def test_scan_iterator():
+@pytest.mark.parametrize('use_threads,use_async',
+                         [(False, False), (False, True),
+                          (True, False), (True, True)])
+def test_scan_iterator(use_threads, use_async):
     batch = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["a"])
     table = pa.Table.from_batches([batch])
     # When constructed from readers/iterators, should be one-shot
@@ -1789,14 +1864,16 @@ def test_scan_iterator():
             (lambda: (batch for _ in range(1)), batch.schema),
     ):
         # Scanning the fragment consumes the underlying iterator
-        scanner = ds.Scanner.from_batches(factory(), schema=schema)
+        scanner = ds.Scanner.from_batches(
+            factory(), schema=schema, use_threads=use_threads,
+            use_async=use_async)
         assert scanner.to_table() == table
         with pytest.raises(pa.ArrowInvalid, match=match):
             scanner.to_table()
 
 
 @pytest.mark.parquet
-def test_open_dataset_partitioned_directory(tempdir):
+def test_open_dataset_partitioned_directory(tempdir, dataset_reader):
     import pyarrow.parquet as pq
     table = pa.table({'a': range(9), 'b': [0.] * 4 + [1.] * 5})
 
@@ -1810,7 +1887,7 @@ def test_open_dataset_partitioned_directory(tempdir):
 
     # no partitioning specified, just read all individual files
     full_table = pa.concat_tables([table] * 3)
-    _check_dataset_from_path(path, full_table)
+    _check_dataset_from_path(path, full_table, dataset_reader)
 
     # specify partition scheme with discovery
     dataset = ds.dataset(
@@ -1874,7 +1951,7 @@ def test_open_dataset_unsupported_format(tempdir):
 
 
 @pytest.mark.parquet
-def test_open_union_dataset(tempdir):
+def test_open_union_dataset(tempdir, dataset_reader):
     _, path = _create_single_file(tempdir)
     dataset = ds.dataset(path)
 
@@ -1882,7 +1959,7 @@ def test_open_union_dataset(tempdir):
     assert isinstance(union, ds.UnionDataset)
 
     pickled = pickle.loads(pickle.dumps(union))
-    assert pickled.to_table() == union.to_table()
+    assert dataset_reader.to_table(pickled) == dataset_reader.to_table(union)
 
 
 def test_open_union_dataset_with_additional_kwargs(multisourcefs):
@@ -2027,17 +2104,17 @@ def s3_example_simple(s3_connection, s3_server):
 
 @pytest.mark.parquet
 @pytest.mark.s3
-def test_open_dataset_from_uri_s3(s3_example_simple):
+def test_open_dataset_from_uri_s3(s3_example_simple, dataset_reader):
     # open dataset from non-localfs string path
     table, path, fs, uri, _, _, _, _ = s3_example_simple
 
     # full string URI
     dataset = ds.dataset(uri, format="parquet")
-    assert dataset.to_table().equals(table)
+    assert dataset_reader.to_table(dataset).equals(table)
 
     # passing filesystem object
     dataset = ds.dataset(path, format="parquet", filesystem=fs)
-    assert dataset.to_table().equals(table)
+    assert dataset_reader.to_table(dataset).equals(table)
 
 
 @pytest.mark.parquet
@@ -2146,7 +2223,7 @@ def test_open_dataset_from_fsspec(tempdir):
 
 
 @pytest.mark.pandas
-def test_filter_timestamp(tempdir):
+def test_filter_timestamp(tempdir, dataset_reader):
     # ARROW-11379
     path = tempdir / "test_partition_timestamps"
 
@@ -2164,24 +2241,24 @@ def test_filter_timestamp(tempdir):
     dataset = ds.dataset(path, format="feather", partitioning=part)
 
     condition = ds.field("dates") > pd.Timestamp("2012-01-01")
-    table = dataset.to_table(filter=condition)
+    table = dataset_reader.to_table(dataset, filter=condition)
     assert table.column('id').to_pylist() == [1, 3, 5, 7, 9]
 
     import datetime
     condition = ds.field("dates") > datetime.datetime(2012, 1, 1)
-    table = dataset.to_table(filter=condition)
+    table = dataset_reader.to_table(dataset, filter=condition)
     assert table.column('id').to_pylist() == [1, 3, 5, 7, 9]
 
 
 @pytest.mark.parquet
-def test_filter_implicit_cast(tempdir):
+def test_filter_implicit_cast(tempdir, dataset_reader):
     # ARROW-7652
     table = pa.table({'a': pa.array([0, 1, 2, 3, 4, 5], type=pa.int8())})
     _, path = _create_single_file(tempdir, table)
     dataset = ds.dataset(str(path))
 
     filter_ = ds.field('a') > 2
-    assert len(dataset.to_table(filter=filter_)) == 3
+    assert len(dataset_reader.to_table(dataset, filter=filter_)) == 3
 
 
 def test_dataset_union(multisourcefs):
@@ -2304,7 +2381,7 @@ def test_union_dataset_filesystem_datasets(multisourcefs):
 
 
 @pytest.mark.parquet
-def test_specified_schema(tempdir):
+def test_specified_schema(tempdir, dataset_reader):
     import pyarrow.parquet as pq
 
     table = pa.table({'a': [1, 2, 3], 'b': [.1, .2, .3]})
@@ -2316,7 +2393,7 @@ def _check_dataset(schema, expected, expected_schema=None):
             assert dataset.schema.equals(expected_schema)
         else:
             assert dataset.schema.equals(schema)
-        result = dataset.to_table()
+        result = dataset_reader.to_table(dataset)
         assert result.equals(expected)
 
     # no schema specified
@@ -2360,10 +2437,10 @@ def _check_dataset(schema, expected, expected_schema=None):
     assert dataset.schema.equals(schema)
     with pytest.raises(NotImplementedError,
                        match='Unsupported cast from int64 to list'):
-        dataset.to_table()
+        dataset_reader.to_table(dataset)
 
 
-def test_ipc_format(tempdir):
+def test_ipc_format(tempdir, dataset_reader):
     table = pa.table({'a': pa.array([1, 2, 3], type="int8"),
                       'b': pa.array([.1, .2, .3], type="float64")})
 
@@ -2374,17 +2451,17 @@ def test_ipc_format(tempdir):
         writer.close()
 
     dataset = ds.dataset(path, format=ds.IpcFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     for format_str in ["ipc", "arrow"]:
         dataset = ds.dataset(path, format=format_str)
-        result = dataset.to_table()
+        result = dataset_reader.to_table(dataset)
         assert result.equals(table)
 
 
 @pytest.mark.pandas
-def test_csv_format(tempdir):
+def test_csv_format(tempdir, dataset_reader):
     table = pa.table({'a': pa.array([1, 2, 3], type="int64"),
                       'b': pa.array([.1, .2, .3], type="float64")})
 
@@ -2392,11 +2469,11 @@ def test_csv_format(tempdir):
     table.to_pandas().to_csv(path, index=False)
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     dataset = ds.dataset(path, format='csv')
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
 
@@ -2407,7 +2484,7 @@ def test_csv_format(tempdir):
     "lz4",
     "zstd",
 ])
-def test_csv_format_compressed(tempdir, compression):
+def test_csv_format_compressed(tempdir, compression, dataset_reader):
     if not pyarrow.Codec.is_available(compression):
         pytest.skip("{} support is not built".format(compression))
     table = pa.table({'a': pa.array([1, 2, 3], type="int64"),
@@ -2423,32 +2500,32 @@ def test_csv_format_compressed(tempdir, compression):
         sink.write(csv_str.encode('utf-8'))
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
 
-def test_csv_format_options(tempdir):
+def test_csv_format_options(tempdir, dataset_reader):
     path = str(tempdir / 'test.csv')
     with open(path, 'w') as sink:
         sink.write('skipped\ncol0\nfoo\nbar\n')
     dataset = ds.dataset(path, format='csv')
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(
         pa.table({'skipped': pa.array(['col0', 'foo', 'bar'])}))
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat(
         read_options=pa.csv.ReadOptions(skip_rows=1)))
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(pa.table({'col0': pa.array(['foo', 'bar'])}))
 
     dataset = ds.dataset(path, format=ds.CsvFileFormat(
         read_options=pa.csv.ReadOptions(column_names=['foo'])))
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(
         pa.table({'foo': pa.array(['skipped', 'col0', 'foo', 'bar'])}))
 
 
-def test_csv_fragment_options(tempdir):
+def test_csv_fragment_options(tempdir, dataset_reader):
     path = str(tempdir / 'test.csv')
     with open(path, 'w') as sink:
         sink.write('col0\nfoo\nspam\nMYNULL\n')
@@ -2458,21 +2535,21 @@ def test_csv_fragment_options(tempdir):
     options = ds.CsvFragmentScanOptions(
         convert_options=convert_options,
         read_options=pa.csv.ReadOptions(block_size=2**16))
-    result = dataset.to_table(fragment_scan_options=options)
+    result = dataset_reader.to_table(dataset, fragment_scan_options=options)
     assert result.equals(pa.table({'col0': pa.array(['foo', 'spam', None])}))
 
     csv_format = ds.CsvFileFormat(convert_options=convert_options)
     dataset = ds.dataset(path, format=csv_format)
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(pa.table({'col0': pa.array(['foo', 'spam', None])}))
 
     options = ds.CsvFragmentScanOptions()
-    result = dataset.to_table(fragment_scan_options=options)
+    result = dataset_reader.to_table(dataset, fragment_scan_options=options)
     assert result.equals(
         pa.table({'col0': pa.array(['foo', 'spam', 'MYNULL'])}))
 
 
-def test_feather_format(tempdir):
+def test_feather_format(tempdir, dataset_reader):
     from pyarrow.feather import write_feather
 
     table = pa.table({'a': pa.array([1, 2, 3], type="int8"),
@@ -2483,23 +2560,23 @@ def test_feather_format(tempdir):
     write_feather(table, str(basedir / "data.feather"))
 
     dataset = ds.dataset(basedir, format=ds.IpcFileFormat())
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     dataset = ds.dataset(basedir, format="feather")
-    result = dataset.to_table()
+    result = dataset_reader.to_table(dataset)
     assert result.equals(table)
 
     # ARROW-8641 - column selection order
-    result = dataset.to_table(columns=["b", "a"])
+    result = dataset_reader.to_table(dataset, columns=["b", "a"])
     assert result.column_names == ["b", "a"]
-    result = dataset.to_table(columns=["a", "a"])
+    result = dataset_reader.to_table(dataset, columns=["a", "a"])
     assert result.column_names == ["a", "a"]
 
     # error with Feather v1 files
     write_feather(table, str(basedir / "data1.feather"), version=1)
     with pytest.raises(ValueError):
-        ds.dataset(basedir, format="feather").to_table()
+        dataset_reader.to_table(ds.dataset(basedir, format="feather"))
 
 
 def _create_parquet_dataset_simple(root_path):
@@ -2659,15 +2736,15 @@ def test_parquet_dataset_lazy_filtering(tempdir, open_logging_fs):
 
 @pytest.mark.parquet
 @pytest.mark.pandas
-def test_dataset_schema_metadata(tempdir):
+def test_dataset_schema_metadata(tempdir, dataset_reader):
     # ARROW-8802
     df = pd.DataFrame({'a': [1, 2, 3]})
     path = tempdir / "test.parquet"
     df.to_parquet(path)
     dataset = ds.dataset(path)
 
-    schema = dataset.to_table().schema
-    projected_schema = dataset.to_table(columns=["a"]).schema
+    schema = dataset_reader.to_table(dataset).schema
+    projected_schema = dataset_reader.to_table(dataset, columns=["a"]).schema
 
     # ensure the pandas metadata is included in the schema
     assert b"pandas" in schema.metadata
@@ -2676,7 +2753,7 @@ def test_dataset_schema_metadata(tempdir):
 
 
 @pytest.mark.parquet
-def test_filter_mismatching_schema(tempdir):
+def test_filter_mismatching_schema(tempdir, dataset_reader):
     # ARROW-9146
     import pyarrow.parquet as pq
 
@@ -2690,17 +2767,18 @@ def test_filter_mismatching_schema(tempdir):
 
     # filtering on a column with such type mismatch should implicitly
     # cast the column
-    filtered = dataset.to_table(filter=ds.field("col") > 2)
+    filtered = dataset_reader.to_table(dataset, filter=ds.field("col") > 2)
     assert filtered["col"].equals(table["col"].cast('int64').slice(2))
 
     fragment = list(dataset.get_fragments())[0]
-    filtered = fragment.to_table(filter=ds.field("col") > 2, schema=schema)
+    filtered = dataset_reader.to_table(
+        fragment, filter=ds.field("col") > 2, schema=schema)
     assert filtered["col"].equals(table["col"].cast('int64').slice(2))
 
 
 @pytest.mark.parquet
 @pytest.mark.pandas
-def test_dataset_project_only_partition_columns(tempdir):
+def test_dataset_project_only_partition_columns(tempdir, dataset_reader):
     # ARROW-8729
     import pyarrow.parquet as pq
 
@@ -2710,15 +2788,15 @@ def test_dataset_project_only_partition_columns(tempdir):
     pq.write_to_dataset(table, path, partition_cols=['part'])
     dataset = ds.dataset(path, partitioning='hive')
 
-    all_cols = dataset.to_table(use_threads=False)
-    part_only = dataset.to_table(columns=['part'], use_threads=False)
+    all_cols = dataset_reader.to_table(dataset)
+    part_only = dataset_reader.to_table(dataset, columns=['part'])
 
     assert all_cols.column('part').equals(part_only.column('part'))
 
 
 @pytest.mark.parquet
 @pytest.mark.pandas
-def test_dataset_project_null_column(tempdir):
+def test_dataset_project_null_column(tempdir, dataset_reader):
     import pandas as pd
     df = pd.DataFrame({"col": np.array([None, None, None], dtype='object')})
 
@@ -2728,17 +2806,17 @@ def test_dataset_project_null_column(tempdir):
     dataset = ds.dataset(f, format="parquet",
                          schema=pa.schema([("col", pa.int64())]))
     expected = pa.table({'col': pa.array([None, None, None], pa.int64())})
-    assert dataset.to_table().equals(expected)
+    assert dataset_reader.to_table(dataset).equals(expected)
 
 
-def test_dataset_project_columns(tempdir):
+def test_dataset_project_columns(tempdir, dataset_reader):
     # basic column re-projection with expressions
     from pyarrow import feather
     table = pa.table({"A": [1, 2, 3], "B": [1., 2., 3.], "C": ["a", "b", "c"]})
     feather.write_feather(table, tempdir / "data.feather")
 
     dataset = ds.dataset(tempdir / "data.feather", format="feather")
-    result = dataset.to_table(columns={
+    result = dataset_reader.to_table(dataset, columns={
         'A_renamed': ds.field('A'),
         'B_as_int': ds.field('B').cast("int32", safe=False),
         'C_is_a': ds.field('C') == 'a'
@@ -2752,7 +2830,7 @@ def test_dataset_project_columns(tempdir):
 
     # raise proper error when not passing an expression
     with pytest.raises(TypeError, match="Expected an Expression"):
-        dataset.to_table(columns={"A": "A"})
+        dataset_reader.to_table(dataset, columns={"A": "A"})
 
 
 @pytest.mark.parquet
@@ -3032,7 +3110,12 @@ def test_write_iterable(tempdir):
     assert result.equals(table)
 
 
-def test_write_scanner(tempdir):
+def test_write_scanner(tempdir, dataset_reader):
+    if dataset_reader.use_async:
+        pytest.skip(
+            ('ARROW-12803: Write dataset with scanner does not'
+             ' support async scan'))
+
     table = pa.table([
         pa.array(range(20)), pa.array(np.random.randn(20)),
         pa.array(np.repeat(['a', 'b'], 10))
@@ -3040,21 +3123,22 @@ def test_write_scanner(tempdir):
     dataset = ds.dataset(table)
 
     base_dir = tempdir / 'dataset_from_scanner'
-    ds.write_dataset(dataset.scanner(), base_dir, format="feather")
-    result = ds.dataset(base_dir, format="ipc").to_table()
+    ds.write_dataset(dataset_reader.scanner(
+        dataset), base_dir, format="feather")
+    result = dataset_reader.to_table(ds.dataset(base_dir, format="ipc"))
     assert result.equals(table)
 
     # scanner with different projected_schema
     base_dir = tempdir / 'dataset_from_scanner2'
-    ds.write_dataset(dataset.scanner(columns=["f1"]),
+    ds.write_dataset(dataset_reader.scanner(dataset, columns=["f1"]),
                      base_dir, format="feather")
-    result = ds.dataset(base_dir, format="ipc").to_table()
+    result = dataset_reader.to_table(ds.dataset(base_dir, format="ipc"))
     assert result.equals(table.select(["f1"]))
 
     # schema not allowed when writing a scanner
     with pytest.raises(ValueError, match="Cannot specify a schema"):
-        ds.write_dataset(dataset.scanner(), base_dir, schema=table.schema,
-                         format="feather")
+        ds.write_dataset(dataset_reader.scanner(dataset), base_dir,
+                         schema=table.schema, format="feather")
 
 
 def test_write_table_partitioned_dict(tempdir):
@@ -3202,7 +3286,7 @@ def test_write_dataset_s3(s3_example_simple):
 
 
 @pytest.mark.parquet
-def test_dataset_null_to_dictionary_cast(tempdir):
+def test_dataset_null_to_dictionary_cast(tempdir, dataset_reader):
     # ARROW-12420
     import pyarrow.parquet as pq
 
@@ -3218,5 +3302,5 @@ def test_dataset_null_to_dictionary_cast(tempdir):
         format=ds.ParquetFileFormat(),
         filesystem=fs.LocalFileSystem(),
     )
-    table = fsds.to_table()
+    table = dataset_reader.to_table(fsds)
     assert table.schema == schema
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 8477f949f1b..28cafcead3b 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -504,6 +504,10 @@ dataset___ScannerBuilder__UseThreads <- function(sb, threads){
     invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
 }
 
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
+}
+
 dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){
     invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
 }
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 43e72456dc0..4fc73485e3a 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -32,6 +32,8 @@
 #' * `filter`: A `Expression` to filter the scanned rows by, or `TRUE` (default)
 #'    to keep all rows.
 #' * `use_threads`: logical: should scanning use multithreading? Default `TRUE`
+#' * `use_async`: logical: should the async scanner (performs better on
+#'    high-latency/highly parallel filesystems like S3) be used? Default `FALSE`
 #' * `...`: Additional arguments, currently ignored
 #' @section Methods:
 #' `ScannerBuilder` has the following methods:
@@ -42,6 +44,7 @@
 #' - `$UseThreads(threads)`: logical: should the scan use multithreading?
 #' The method's default input is `TRUE`, but you must call the method to enable
 #' multithreading because the scanner default is `FALSE`.
+#' - `$UseAsync(use_async)`: logical: should the async scanner be used?
 #' - `$BatchSize(batch_size)`: integer: Maximum row count of scanned record
 #' batches, default is 32K. If scanned record batches are overflowing memory
 #' then this method can be called to reduce their size.
@@ -68,9 +71,14 @@ Scanner$create <- function(dataset,
                            projection = NULL,
                            filter = TRUE,
                            use_threads = option_use_threads(),
+                           use_async = NULL,
                            batch_size = NULL,
                            fragment_scan_options = NULL,
                            ...) {
+  if (is.null(use_async)) {
+    use_async = getOption("arrow.use_async", FALSE)
+  }
+
   if (inherits(dataset, "arrow_dplyr_query")) {
     if (inherits(dataset$.data, "ArrowTabular")) {
       # To handle mutate() on Table/RecordBatch, we need to collect(as_data_frame=FALSE) now
@@ -81,6 +89,7 @@ Scanner$create <- function(dataset,
       c(dataset$selected_columns, dataset$temp_columns),
       dataset$filtered_rows,
       use_threads,
+      use_async,
       batch_size,
       fragment_scan_options,
       ...
@@ -95,6 +104,9 @@ Scanner$create <- function(dataset,
   if (use_threads) {
     scanner_builder$UseThreads()
   }
+  if (use_async) {
+    scanner_builder$UseAsync()
+  }
   if (!is.null(projection)) {
     scanner_builder$Project(projection)
   }
@@ -181,6 +193,10 @@ ScannerBuilder <- R6Class("ScannerBuilder", inherit = ArrowObject,
       dataset___ScannerBuilder__UseThreads(self, threads)
       self
     },
+    UseAsync = function(use_async = TRUE) {
+      dataset___ScannerBuilder__UseAsync(self, use_async)
+      self
+    },
     BatchSize = function(batch_size) {
       dataset___ScannerBuilder__BatchSize(self, batch_size)
       self
diff --git a/r/man/Scanner.Rd b/r/man/Scanner.Rd
index 36b6e7f21f7..76946c69b9b 100644
--- a/r/man/Scanner.Rd
+++ b/r/man/Scanner.Rd
@@ -20,6 +20,8 @@ It takes the following arguments:
 \item \code{filter}: A \code{Expression} to filter the scanned rows by, or \code{TRUE} (default)
 to keep all rows.
 \item \code{use_threads}: logical: should scanning use multithreading? Default \code{TRUE}
+\item \code{use_async}: logical: should the async scanner (performs better on
+high-latency/highly parallel filesystems like S3) be used? Default \code{FALSE}
 \item \code{...}: Additional arguments, currently ignored
 }
 }
@@ -34,6 +36,7 @@ by \code{cols}, a character vector of column names
 \item \verb{$UseThreads(threads)}: logical: should the scan use multithreading?
 The method's default input is \code{TRUE}, but you must call the method to enable
 multithreading because the scanner default is \code{FALSE}.
+\item \verb{$UseAsync(use_async)}: logical: should the async scanner be used?
 \item \verb{$BatchSize(batch_size)}: integer: Maximum row count of scanned record
 batches, default is 32K. If scanned record batches are overflowing memory
 then this method can be called to reduce their size.
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 1ccfd593d2c..20fd92f7fae 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1984,6 +1984,23 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__UseThreads(SEXP sb_sexp, SEXP t
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+void dataset___ScannerBuilder__UseAsync(const std::shared_ptr<ds::ScannerBuilder>& sb, bool use_async);
+extern "C" SEXP _arrow_dataset___ScannerBuilder__UseAsync(SEXP sb_sexp, SEXP use_async_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::ScannerBuilder>&>::type sb(sb_sexp);
+	arrow::r::Input<bool>::type use_async(use_async_sexp);
+	dataset___ScannerBuilder__UseAsync(sb, use_async);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___ScannerBuilder__UseAsync(SEXP sb_sexp, SEXP use_async_sexp){
+	Rf_error("Cannot call dataset___ScannerBuilder__UseAsync(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 void dataset___ScannerBuilder__BatchSize(const std::shared_ptr<ds::ScannerBuilder>& sb, int64_t batch_size);
@@ -6950,6 +6967,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3}, 
 		{ "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, 
+		{ "_arrow_dataset___ScannerBuilder__UseAsync", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseAsync, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__FragmentScanOptions", (DL_FUNC) &_arrow_dataset___ScannerBuilder__FragmentScanOptions, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index c419c69ffce..c5ecc84dbaa 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -393,6 +393,12 @@ void dataset___ScannerBuilder__UseThreads(const std::shared_ptr<ds::ScannerBuild
   StopIfNotOk(sb->UseThreads(threads));
 }
 
+// [[dataset::export]]
+void dataset___ScannerBuilder__UseAsync(const std::shared_ptr<ds::ScannerBuilder>& sb,
+                                        bool use_async) {
+  StopIfNotOk(sb->UseAsync(use_async));
+}
+
 // [[dataset::export]]
 void dataset___ScannerBuilder__BatchSize(const std::shared_ptr<ds::ScannerBuilder>& sb,
                                          int64_t batch_size) {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index ce9e5e84402..920a99cd6b6 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1034,6 +1034,13 @@ test_that("Scanner$ScanBatches", {
   batches <- ds$NewScan()$Finish()$ScanBatches()
   table <- Table$create(!!!batches)
   expect_equivalent(as.data.frame(table), rbind(df1, df2))
+
+  # use_async will always use the thread pool (even if it only uses
+  # one thread) and RTools 3.5 on Windows doesn't support this
+  skip_on_os("windows")
+  batches <- ds$NewScan()$UseAsync(TRUE)$Finish()$ScanBatches()
+  table <- Table$create(!!!batches)
+  expect_equivalent(as.data.frame(table), rbind(df1, df2))
 })
 
 test_that("Scanner$ToRecordBatchReader()", {

From 022dd636150066e18b3d9a449a59aa873ec92e96 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sat, 22 May 2021 15:19:39 +0800
Subject: [PATCH 283/719] ARROW-12717: [C++][Python] Add find_substring kernel

This adds a very simple lfind kernel. If the substring is not found, -1 is reported. Nulls are propagated. Regexes are not supported, nor is rfind.

Closes #10353 from lidavidm/arrow-12717

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 .../arrow/compute/kernels/scalar_string.cc    | 53 +++++++++++++++++-
 .../compute/kernels/scalar_string_test.cc     | 38 +++++++++++++
 docs/source/cpp/compute.rst                   | 55 +++++++++++--------
 docs/source/python/api/compute.rst            |  1 +
 python/pyarrow/compute.py                     | 19 +++++++
 python/pyarrow/tests/test_compute.py          | 22 ++++++++
 6 files changed, 161 insertions(+), 27 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 23a94d9eb92..4d83e1ec24e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -488,21 +488,25 @@ struct PlainSubstringMatcher {
     }
   }
 
-  bool Match(util::string_view current) const {
+  int64_t Find(util::string_view current) const {
     // Phase 2: Find the prefix in the data
     const auto pattern_length = options_.pattern.size();
     int64_t pattern_pos = 0;
+    int64_t pos = 0;
     for (const auto c : current) {
       while ((pattern_pos >= 0) && (options_.pattern[pattern_pos] != c)) {
         pattern_pos = prefix_table[pattern_pos];
       }
       pattern_pos++;
       if (static_cast<size_t>(pattern_pos) == pattern_length) {
-        return true;
+        return pos + 1 - pattern_length;
       }
+      pos++;
     }
-    return false;
+    return -1;
   }
+
+  bool Match(util::string_view current) const { return Find(current) >= 0; }
 };
 
 const FunctionDoc match_substring_doc(
@@ -664,6 +668,48 @@ void AddMatchSubstring(FunctionRegistry* registry) {
 #endif
 }
 
+// Substring find - lfind/index/etc.
+
+struct FindSubstring {
+  const PlainSubstringMatcher matcher_;
+
+  explicit FindSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {}
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    return static_cast<OutValue>(matcher_.Find(val));
+  }
+};
+
+template <typename InputType>
+Status FindSubstringExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  using offset_type = typename TypeTraits<InputType>::OffsetType;
+  applicator::ScalarUnaryNotNullStateful<offset_type, InputType, FindSubstring> kernel{
+      FindSubstring(PlainSubstringMatcher(MatchSubstringState::Get(ctx)))};
+  return kernel.Exec(ctx, batch, out);
+}
+
+const FunctionDoc find_substring_doc(
+    "Find first occurrence of substring",
+    ("For each string in `strings`, emit the index of the first occurrence of the given "
+     "pattern, or -1 if not found.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+
+void AddFindSubstring(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(),
+                                               &find_substring_doc);
+  DCHECK_OK(func->AddKernel({binary()}, int32(), FindSubstringExec<BinaryType>,
+                            MatchSubstringState::Init));
+  DCHECK_OK(func->AddKernel({utf8()}, int32(), FindSubstringExec<StringType>,
+                            MatchSubstringState::Init));
+  DCHECK_OK(func->AddKernel({large_binary()}, int64(), FindSubstringExec<LargeBinaryType>,
+                            MatchSubstringState::Init));
+  DCHECK_OK(func->AddKernel({large_utf8()}, int64(), FindSubstringExec<LargeStringType>,
+                            MatchSubstringState::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
 // IsAlpha/Digit etc
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -2626,6 +2672,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   AddBinaryLength(registry);
   AddUtf8Length(registry);
   AddMatchSubstring(registry);
+  AddFindSubstring(registry);
   MakeUnaryStringBatchKernelWithState<ReplaceSubStringPlain>(
       "replace_substring", registry, &replace_substring_doc,
       MemAllocation::NO_PREALLOCATE);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 5ec7f579fff..7f2126828ce 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -74,6 +74,25 @@ TYPED_TEST(TestBinaryKernels, BinaryLength) {
                    this->offset_type(), "[3, null, 10, 0, 1]");
 }
 
+TYPED_TEST(TestBinaryKernels, FindSubstring) {
+  MatchSubstringOptions options{"ab"};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring", R"(["abc", "acb", "cab", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options);
+
+  MatchSubstringOptions options_repeated{"abab"};
+  this->CheckUnary("find_substring", R"(["abab", "ab", "cababc", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options_repeated);
+
+  MatchSubstringOptions options_double_char{"aab"};
+  this->CheckUnary("find_substring", R"(["aacb", "aab", "ab", "aaab"])",
+                   this->offset_type(), "[-1, 0, -1, 1]", &options_double_char);
+
+  MatchSubstringOptions options_double_char_2{"bbcaa"};
+  this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
+                   "[7]", &options_double_char_2);
+}
+
 template <typename TestType>
 class TestStringKernels : public BaseTestStringKernels<TestType> {};
 
@@ -470,6 +489,25 @@ TYPED_TEST(TestStringKernels, MatchLikeEscaping) {
 }
 #endif
 
+TYPED_TEST(TestStringKernels, FindSubstring) {
+  MatchSubstringOptions options{"ab"};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring", R"(["abc", "acb", "cab", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options);
+
+  MatchSubstringOptions options_repeated{"abab"};
+  this->CheckUnary("find_substring", R"(["abab", "ab", "cababc", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options_repeated);
+
+  MatchSubstringOptions options_double_char{"aab"};
+  this->CheckUnary("find_substring", R"(["aacb", "aab", "ab", "aaab"])",
+                   this->offset_type(), "[-1, 0, -1, 1]", &options_double_char);
+
+  MatchSubstringOptions options_double_char_2{"bbcaa"};
+  this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
+                   "[7]", &options_double_char_2);
+}
+
 TYPED_TEST(TestStringKernels, SplitBasics) {
   SplitPatternOptions options{" "};
   // basics
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 79140257a9b..ca68a31cc21 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -462,7 +462,7 @@ String transforms
 * \(1) Each ASCII character in the input is converted to lowercase or
   uppercase.  Non-ASCII characters are left untouched.
 
-* \(2) ASCII input is reversed to the output. If non-ASCII characters 
+* \(2) ASCII input is reversed to the output. If non-ASCII characters
   are present, ``Invalid`` :class:`Status` will be returned.
 
 * \(3) Output is the physical length in bytes of each input element.  Output
@@ -482,7 +482,7 @@ String transforms
   pattern contains groups, backreferencing can be used.
 
 * \(6) Output is the number of characters (not bytes) of each input element.
-  Output type is Int32 for String, Int64 for LargeString. 
+  Output type is Int32 for String, Int64 for LargeString.
 
 * \(7) Each UTF8-encoded character in the input is converted to lowercase or
   uppercase.
@@ -541,40 +541,48 @@ These functions trim off characters on both sides (trim), or the left (ltrim) or
 Containment tests
 ~~~~~~~~~~~~~~~~~
 
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| Function name             | Arity      | Input types                        | Output type   | Options class                          |
-+===========================+============+====================================+===============+========================================+
-| match_like                | Unary      | String-like                        | Boolean (1)   | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| match_substring           | Unary      | String-like                        | Boolean (2)   | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| match_substring_regex     | Unary      | String-like                        | Boolean (3)   | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (4)     | :struct:`SetLookupOptions`             |
-|                           |            | Binary- and String-like            |               |                                        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (5)   | :struct:`SetLookupOptions`             |
-|                           |            | Binary- and String-like            |               |                                        |
-+---------------------------+------------+------------------------------------+---------------+----------------------------------------+
-
-* \(1) Output is true iff the SQL-style LIKE pattern
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| Function name             | Arity      | Input types                        | Output type        | Options class                          |
++===========================+============+====================================+====================+========================================+
+| find_substring            | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| match_like                | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| match_substring           | Unary      | String-like                        | Boolean (3)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| match_substring_regex     | Unary      | String-like                        | Boolean (4)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (5)          | :struct:`SetLookupOptions`             |
+|                           |            | Binary- and String-like            |                    |                                        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (6)        | :struct:`SetLookupOptions`             |
+|                           |            | Binary- and String-like            |                    |                                        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+
+
+* \(1) Output is the index of the first occurrence of
+  :member:`MatchSubstringOptions::pattern` in the corresponding input
+  string, otherwise -1. Output type is Int32 for Binary/String, Int64
+  for LargeBinary/LargeString.
+
+* \(2) Output is true iff the SQL-style LIKE pattern
   :member:`MatchSubstringOptions::pattern` fully matches the
   corresponding input element. That is, ``%`` will match any number of
   characters, ``_`` will match exactly one character, and any other
   character matches itself. To match a literal percent sign or
   underscore, precede the character with a backslash.
 
-* \(2) Output is true iff :member:`MatchSubstringOptions::pattern`
+* \(3) Output is true iff :member:`MatchSubstringOptions::pattern`
   is a substring of the corresponding input element.
 
-* \(3) Output is true iff :member:`MatchSubstringOptions::pattern`
+* \(4) Output is true iff :member:`MatchSubstringOptions::pattern`
   matches the corresponding input element at any position.
 
-* \(4) Output is the index of the corresponding input element in
+* \(5) Output is the index of the corresponding input element in
   :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
   output is null.
 
-* \(5) Output is true iff the corresponding input element is equal to one
+* \(6) Output is true iff the corresponding input element is equal to one
   of the elements in :member:`SetLookupOptions::value_set`.
 
 
@@ -878,4 +886,3 @@ Structural transforms
 * \(2) For each value in the list child array, the index at which it is found
   in the list array is appended to the output.  Nulls in the parent list array
   are discarded.
-
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index d206cbc9e50..61482f49f19 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -169,6 +169,7 @@ Containment tests
 .. autosummary::
    :toctree: ../generated/
 
+   find_substring
    index_in
    is_in
    match_like
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index cb6ba475b5f..6bb0efb5963 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -288,6 +288,25 @@ def cast(arr, target_type, safe=True):
     return call_function("cast", [arr], options)
 
 
+def find_substring(array, pattern):
+    """
+    Find the index of the first occurrence of substring *pattern* in each
+    value of a string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        pattern to search for exact matches
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("find_substring", [array],
+                         MatchSubstringOptions(pattern))
+
+
 def match_like(array, pattern):
     """
     Test if the SQL-style LIKE pattern *pattern* matches a value of a
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index b014dcc0c8a..c62ff72acd5 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -280,6 +280,28 @@ def test_variance():
     assert pc.variance(data, ddof=1).as_py() == 6.0
 
 
+def test_find_substring():
+    arr = pa.array(["ab", "cab", "ba", None])
+    result = pc.find_substring(arr, "ab")
+    expected = pa.array([0, 1, -1, None], type=pa.int32())
+    assert expected.equals(result)
+
+    arr = pa.array(["ab", "cab", "ba", None], type=pa.large_string())
+    result = pc.find_substring(arr, "ab")
+    expected = pa.array([0, 1, -1, None], type=pa.int64())
+    assert expected.equals(result)
+
+    arr = pa.array([b"ab", b"cab", b"ba", None])
+    result = pc.find_substring(arr, b"ab")
+    expected = pa.array([0, 1, -1, None], type=pa.int32())
+    assert expected.equals(result)
+
+    arr = pa.array([b"ab", b"cab", b"ba", None], type=pa.large_binary())
+    result = pc.find_substring(arr, b"ab")
+    expected = pa.array([0, 1, -1, None], type=pa.int64())
+    assert expected.equals(result)
+
+
 def test_match_like():
     arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
     result = pc.match_like(arr, r"_a\%%")

From e7e3784d8aa6b27fb199a5a74805e8e923714fb7 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sat, 22 May 2021 17:24:30 +0900
Subject: [PATCH 284/719] ARROW-12800: [JS] Remove text encoder and decoder
 polyfills

This drops IE support as well.

Closes #10340 from domoritz/remove-text-encoding-utf-8

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/gulp/closure-task.js       |  2 --
 js/package.json               |  2 --
 js/src/util/utf8.ts           | 11 +++--------
 js/test/generate-test-data.ts |  3 +--
 js/yarn.lock                  | 10 ----------
 5 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index 4ab9e7416fd..46e87e9f99e 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -72,8 +72,6 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
                 /* external libs first */
                 `node_modules/flatbuffers/package.json`,
                 `node_modules/flatbuffers/js/flatbuffers.mjs`,
-                `node_modules/text-encoding-utf-8/package.json`,
-                `node_modules/text-encoding-utf-8/src/encoding.js`,
                 `${src}/**/*.js` /* <-- then source globs */
             ], { base: `./` }),
             sourcemaps.init(),
diff --git a/js/package.json b/js/package.json
index 58721f6d6bd..1774f0076ba 100644
--- a/js/package.json
+++ b/js/package.json
@@ -55,13 +55,11 @@
   "dependencies": {
     "@types/flatbuffers": "^1.10.0",
     "@types/node": "^15.0.2",
-    "@types/text-encoding-utf-8": "^1.0.1",
     "command-line-args": "5.1.1",
     "command-line-usage": "6.1.1",
     "flatbuffers": "1.12.0",
     "json-bignum": "^0.0.3",
     "pad-left": "^2.1.0",
-    "text-encoding-utf-8": "^1.0.2",
     "tslib": "^2.2.0"
   },
   "devDependencies": {
diff --git a/js/src/util/utf8.ts b/js/src/util/utf8.ts
index 1eee9314ccd..b6f8fcdb824 100644
--- a/js/src/util/utf8.ts
+++ b/js/src/util/utf8.ts
@@ -15,15 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import {
-    TextDecoder as TextDecoderPolyfill,
-    TextEncoder as TextEncoderPolyfill,
-} from 'text-encoding-utf-8';
-
-const decoder = new (typeof TextDecoder !== 'undefined' ? TextDecoder : TextDecoderPolyfill)('utf-8');
+const decoder = new TextDecoder('utf-8');
 /** @ignore */
-export const decodeUtf8 = (buffer?: ArrayBuffer | ArrayBufferView) => decoder.decode(buffer);
+export const decodeUtf8 = (buffer?: BufferSource) => decoder.decode(buffer);
 
-const encoder = new (typeof TextEncoder !== 'undefined' ? TextEncoder : TextEncoderPolyfill)();
+const encoder = new TextEncoder();
 /** @ignore */
 export const encodeUtf8 = (value?: string) => encoder.encode(value);
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index d12bb88a098..4572be61379 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -16,7 +16,6 @@
 // under the License.
 
 const randomatic = require('randomatic');
-import { TextEncoder } from 'text-encoding-utf-8';
 import { VectorType as V } from '../src/interfaces';
 
 import {
@@ -589,7 +588,7 @@ const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
 
 const encodeUtf8 = ((encoder) =>
     encoder.encode.bind(encoder) as (input?: string, options?: { stream?: boolean }) => Uint8Array
-)(new TextEncoder('utf-8'));
+)(new TextEncoder());
 
 function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
     const BPE = ArrayType.BYTES_PER_ELEMENT;
diff --git a/js/yarn.lock b/js/yarn.lock
index 6399e592561..66c01691edb 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1683,11 +1683,6 @@
   resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
   integrity sha512-RJJrrySY7A8havqpGObOB4W92QXKJo63/jFLLgpvOtsGUqbQZ9Sbgl35KMm1DjC6j7AvmmU2bIno+3IyEaemaw==
 
-"@types/text-encoding-utf-8@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@types/text-encoding-utf-8/-/text-encoding-utf-8-1.0.1.tgz#908d884af1114e5d8df47597b1e04f833383d23d"
-  integrity sha512-GpIEYaS+yNfYqpowLLziiY42pyaL+lThd/wMh6tTubaKuG4IRkXqqyxK7Nddn3BvpUg2+go3Gv/jbXvAFMRjiQ==
-
 "@types/yargs-parser@*":
   version "20.2.0"
   resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.0.tgz#dd3e6699ba3237f0348cd085e4698780204842f9"
@@ -9228,11 +9223,6 @@ test-exclude@^6.0.0:
     glob "^7.1.4"
     minimatch "^3.0.4"
 
-text-encoding-utf-8@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz#585b62197b0ae437e3c7b5d0af27ac1021e10d13"
-  integrity sha512-8bw4MY9WjdsD2aMtO0OzOCY3pXGYNx2d2FfHRVUKkiCPDWjKuOlhLVASS+pD7VkLTVjW268LYJHwsnPFlBpbAg==
-
 text-extensions@^1.0.0:
   version "1.9.0"
   resolved "https://registry.yarnpkg.com/text-extensions/-/text-extensions-1.9.0.tgz#1853e45fee39c945ce6f6c36b2d659b5aabc2a26"

From 618b2863235ca4f9d0006e38581044298808e93a Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Sat, 22 May 2021 21:39:04 +0800
Subject: [PATCH 285/719] ARROW-2665: [C++][Python] Add index() kernel

Add a simple index() kernel.

Note that the Python start/end options are handled entirely in Python, not in the kernel itself.

Short-circuiting is somewhat implemented: the kernel executor will still loop through all batches but the kernel will stop looking at data once it finds an item.

Closes #10358 from lidavidm/arrow-2665

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/api_aggregate.cc        |   4 +
 cpp/src/arrow/compute/api_aggregate.h         |  19 +++
 cpp/src/arrow/compute/kernel.h                |   2 +-
 .../arrow/compute/kernels/aggregate_basic.cc  | 141 ++++++++++++++++
 .../arrow/compute/kernels/aggregate_test.cc   | 156 ++++++++++++++++++
 .../arrow/compute/kernels/codegen_internal.h  |   2 +-
 docs/source/cpp/compute.rst                   |   2 +
 docs/source/python/api/compute.rst            |   1 +
 python/pyarrow/_compute.pyx                   |  26 +++
 python/pyarrow/array.pxi                      |   8 +
 python/pyarrow/compute.py                     |  35 ++++
 python/pyarrow/includes/libarrow.pxd          |   4 +
 python/pyarrow/table.pxi                      |   8 +
 python/pyarrow/tests/test_compute.py          |  14 ++
 14 files changed, 420 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index dca54a0faba..967c8179da7 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -77,5 +77,9 @@ Result<Datum> TDigest(const Datum& value, const TDigestOptions& options,
   return CallFunction("tdigest", {value}, &options, ctx);
 }
 
+Result<Datum> Index(const Datum& value, IndexOptions options, ExecContext* ctx) {
+  return CallFunction("index", {value}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index a7ceb2ac2fd..d781bbb6205 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -126,6 +126,13 @@ struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
   uint32_t buffer_size;
 };
 
+/// \brief Control Index kernel behavior
+struct ARROW_EXPORT IndexOptions : public FunctionOptions {
+  explicit IndexOptions(std::shared_ptr<Scalar> value) : value{std::move(value)} {}
+
+  std::shared_ptr<Scalar> value;
+};
+
 /// @}
 
 /// \brief Count non-null (or null) values in an array.
@@ -293,6 +300,18 @@ Result<Datum> TDigest(const Datum& value,
                       const TDigestOptions& options = TDigestOptions::Defaults(),
                       ExecContext* ctx = NULLPTR);
 
+/// \brief Find the first index of a value in an array.
+///
+/// \param[in] value The array to search.
+/// \param[in] options The array to search for. See IndexOoptions.
+/// \param[in] ctx the function execution context, optional
+/// \return out a Scalar containing the index (or -1 if not found).
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Index(const Datum& value, IndexOptions options, ExecContext* ctx = NULLPTR);
+
 namespace internal {
 
 /// Internal use only: streaming group identifier.
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 0fecea080d8..0d5fa147727 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -52,7 +52,7 @@ struct ARROW_EXPORT KernelState {
 /// \brief Context/state for the execution of a particular kernel.
 class ARROW_EXPORT KernelContext {
  public:
-  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx) {}
+  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx), state_() {}
 
   /// \brief Allocate buffer from the context's memory pool. The contents are
   /// not initialized.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 446c1b9fc62..1ea63cdc4a0 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -225,6 +225,131 @@ Result<std::unique_ptr<KernelState>> AllInit(KernelContext*, const KernelInitArg
   return ::arrow::internal::make_unique<BooleanAllImpl>();
 }
 
+// ----------------------------------------------------------------------
+// Index implementation
+
+template <typename ArgType>
+struct IndexImpl : public ScalarAggregator {
+  using ArgValue = typename internal::GetViewType<ArgType>::T;
+
+  explicit IndexImpl(IndexOptions options, KernelState* raw_state)
+      : options(std::move(options)), seen(0), index(-1) {
+    if (auto state = static_cast<IndexImpl<ArgType>*>(raw_state)) {
+      seen = state->seen;
+      index = state->index;
+    }
+  }
+
+  Status Consume(KernelContext* ctx, const ExecBatch& batch) override {
+    // short-circuit
+    if (index >= 0 || !options.value->is_valid) {
+      return Status::OK();
+    }
+
+    auto input = batch[0].array();
+    seen = input->length;
+    const ArgValue desired = internal::UnboxScalar<ArgType>::Unbox(*options.value);
+    int64_t i = 0;
+
+    ARROW_UNUSED(internal::VisitArrayValuesInline<ArgType>(
+        *input,
+        [&](ArgValue v) -> Status {
+          if (v == desired) {
+            index = i;
+            return Status::Cancelled("Found");
+          } else {
+            ++i;
+            return Status::OK();
+          }
+        },
+        [&]() -> Status {
+          ++i;
+          return Status::OK();
+        }));
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const IndexImpl&>(src);
+    if (index < 0 && other.index >= 0) {
+      index = seen + other.index;
+    }
+    seen += other.seen;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    out->value = std::make_shared<Int64Scalar>(index >= 0 ? index : -1);
+    return Status::OK();
+  }
+
+  const IndexOptions options;
+  int64_t seen = 0;
+  int64_t index = -1;
+};
+
+struct IndexInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const IndexOptions& options;
+  const DataType& type;
+
+  IndexInit(KernelContext* ctx, const IndexOptions& options, const DataType& type)
+      : ctx(ctx), options(options), type(type) {}
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Index kernel not implemented for ", type.ToString());
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new IndexImpl<BooleanType>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_date<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_time<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_timestamp<Type, Status> Visit(const Type&) {
+    state.reset(new IndexImpl<Type>(options, ctx->state()));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    RETURN_NOT_OK(VisitTypeInline(type, this));
+    return std::move(state);
+  }
+
+  static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+    IndexInit visitor(ctx, static_cast<const IndexOptions&>(*args.options),
+                      *args.inputs[0].type);
+    return visitor.Create();
+  }
+};
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -290,6 +415,12 @@ const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate
                           ("Null values are ignored."),
                           {"array"}};
 
+const FunctionDoc index_doc{"Find the index of the first occurrence of a given value",
+                            ("The result is always computed as an int64_t, regardless\n"
+                             "of the offset type of the input array."),
+                            {"array"},
+                            "IndexOptions"};
+
 }  // namespace
 
 void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
@@ -374,6 +505,16 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc);
   aggregate::AddBasicAggKernels(aggregate::AllInit, {boolean()}, boolean(), func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // index
+  func = std::make_shared<ScalarAggregateFunction>("index", Arity::Unary(), &index_doc);
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, BaseBinaryTypes(), int64(),
+                                func.get());
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, PrimitiveTypes(), int64(),
+                                func.get());
+  aggregate::AddBasicAggKernels(aggregate::IndexInit::Init, TemporalTypes(), int64(),
+                                func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index f1a2863e97d..476caab03d5 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -1044,6 +1044,162 @@ TEST_F(TestAllKernel, Basics) {
   this->AssertAllIs(chunked_input5, false);
 }
 
+//
+// Index
+//
+
+template <typename ArrowType>
+class TestIndexKernel : public ::testing::Test {
+ public:
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+  void AssertIndexIs(const Datum& array, const std::shared_ptr<ScalarType>& value,
+                     int64_t expected) {
+    IndexOptions options(value);
+    ASSERT_OK_AND_ASSIGN(Datum out, Index(array, options));
+    const Int64Scalar& out_index = out.scalar_as<Int64Scalar>();
+    ASSERT_EQ(out_index.value, expected);
+  }
+
+  void AssertIndexIs(const std::string& json, const std::shared_ptr<ScalarType>& value,
+                     int64_t expected) {
+    SCOPED_TRACE("Value: " + value->ToString());
+    SCOPED_TRACE("Input: " + json);
+    auto array = ArrayFromJSON(type_singleton(), json);
+    AssertIndexIs(array, value, expected);
+  }
+
+  void AssertIndexIs(const std::vector<std::string>& json,
+                     const std::shared_ptr<ScalarType>& value, int64_t expected) {
+    SCOPED_TRACE("Value: " + value->ToString());
+    auto array = ChunkedArrayFromJSON(type_singleton(), json);
+    SCOPED_TRACE("Input: " + array->ToString());
+    AssertIndexIs(array, value, expected);
+  }
+
+  std::shared_ptr<DataType> type_singleton() { return std::make_shared<ArrowType>(); }
+};
+
+template <typename ArrowType>
+class TestNumericIndexKernel : public TestIndexKernel<ArrowType> {
+ public:
+  using CType = typename TypeTraits<ArrowType>::CType;
+};
+TYPED_TEST_SUITE(TestNumericIndexKernel, NumericArrowTypes);
+TYPED_TEST(TestNumericIndexKernel, Basics) {
+  std::vector<std::string> chunked_input0 = {"[]", "[0]"};
+  std::vector<std::string> chunked_input1 = {"[1, 0, null]", "[0, 0]"};
+  std::vector<std::string> chunked_input2 = {"[1, 1, 1]", "[1, 0]", "[0, 1]"};
+  std::vector<std::string> chunked_input3 = {"[1, 1, 1]", "[1, 1]"};
+  std::vector<std::string> chunked_input4 = {"[1, 1, 1]", "[1, 1]", "[0]"};
+
+  auto value = std::make_shared<typename TestFixture::ScalarType>(
+      static_cast<typename TestFixture::CType>(0));
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(
+      static_cast<typename TestFixture::CType>(0));
+  null_value->is_valid = false;
+
+  this->AssertIndexIs("[]", value, -1);
+  this->AssertIndexIs("[0]", value, 0);
+  this->AssertIndexIs("[1, 2, 3, 4]", value, -1);
+  this->AssertIndexIs("[1, 2, 3, 4, 0]", value, 4);
+  this->AssertIndexIs("[null, null, null]", value, -1);
+  this->AssertIndexIs("[null, null, null]", null_value, -1);
+  this->AssertIndexIs("[0, null, null]", null_value, -1);
+  this->AssertIndexIs(chunked_input0, value, 0);
+  this->AssertIndexIs(chunked_input1, value, 1);
+  this->AssertIndexIs(chunked_input2, value, 4);
+  this->AssertIndexIs(chunked_input3, value, -1);
+  this->AssertIndexIs(chunked_input4, value, 5);
+}
+TYPED_TEST(TestNumericIndexKernel, Random) {
+  constexpr auto kChunks = 4;
+  auto rand = random::RandomArrayGenerator(0x5487655);
+  auto value = std::make_shared<typename TestFixture::ScalarType>(
+      static_cast<typename TestFixture::CType>(0));
+
+  // Test chunked array sizes from 32 to 2048
+  for (size_t i = 3; i <= 9; i += 2) {
+    const int64_t chunk_length = static_cast<int64_t>(1) << i;
+    ArrayVector chunks;
+    for (int i = 0; i < kChunks; i++) {
+      chunks.push_back(
+          rand.ArrayOf(this->type_singleton(), chunk_length, /*null_probability=*/0.1));
+    }
+    ChunkedArray chunked_array(std::move(chunks));
+
+    int64_t expected = -1;
+    int64_t index = 0;
+    for (auto chunk : chunked_array.chunks()) {
+      auto typed_chunk = arrow::internal::checked_pointer_cast<
+          typename TypeTraits<TypeParam>::ArrayType>(chunk);
+      for (auto value : *typed_chunk) {
+        if (value.has_value() &&
+            value.value() == static_cast<typename TestFixture::CType>(0)) {
+          expected = index;
+          break;
+        }
+        index++;
+      }
+      if (expected >= 0) break;
+    }
+
+    this->AssertIndexIs(Datum(chunked_array), value, expected);
+  }
+}
+
+template <typename ArrowType>
+class TestDateTimeIndexKernel : public TestIndexKernel<ArrowType> {};
+TYPED_TEST_SUITE(TestDateTimeIndexKernel, TemporalArrowTypes);
+TYPED_TEST(TestDateTimeIndexKernel, Basics) {
+  auto type = this->type_singleton();
+  auto value = std::make_shared<typename TestFixture::ScalarType>(42, type);
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(42, type);
+  null_value->is_valid = false;
+
+  this->AssertIndexIs("[]", value, -1);
+  this->AssertIndexIs("[42]", value, 0);
+  this->AssertIndexIs("[84, 84, 84, 84]", value, -1);
+  this->AssertIndexIs("[84, 84, 84, 84, 42]", value, 4);
+  this->AssertIndexIs("[null, null, null]", value, -1);
+  this->AssertIndexIs("[null, null, null]", null_value, -1);
+  this->AssertIndexIs("[42, null, null]", null_value, -1);
+}
+
+template <typename ArrowType>
+class TestBooleanIndexKernel : public TestIndexKernel<ArrowType> {};
+TYPED_TEST_SUITE(TestBooleanIndexKernel, ::testing::Types<BooleanType>);
+TYPED_TEST(TestBooleanIndexKernel, Basics) {
+  auto value = std::make_shared<typename TestFixture::ScalarType>(true);
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(true);
+  null_value->is_valid = false;
+
+  this->AssertIndexIs("[]", value, -1);
+  this->AssertIndexIs("[true]", value, 0);
+  this->AssertIndexIs("[false, false, false, false]", value, -1);
+  this->AssertIndexIs("[false, false, false, false, true]", value, 4);
+  this->AssertIndexIs("[null, null, null]", value, -1);
+  this->AssertIndexIs("[null, null, null]", null_value, -1);
+  this->AssertIndexIs("[true, null, null]", null_value, -1);
+}
+
+template <typename ArrowType>
+class TestStringIndexKernel : public TestIndexKernel<ArrowType> {};
+TYPED_TEST_SUITE(TestStringIndexKernel, BinaryTypes);
+TYPED_TEST(TestStringIndexKernel, Basics) {
+  auto buffer = Buffer::FromString("foo");
+  auto value = std::make_shared<typename TestFixture::ScalarType>(buffer);
+  auto null_value = std::make_shared<typename TestFixture::ScalarType>(buffer);
+  null_value->is_valid = false;
+
+  this->AssertIndexIs(R"([])", value, -1);
+  this->AssertIndexIs(R"(["foo"])", value, 0);
+  this->AssertIndexIs(R"(["bar", "bar", "bar", "bar"])", value, -1);
+  this->AssertIndexIs(R"(["bar", "bar", "bar", "bar", "foo"])", value, 4);
+  this->AssertIndexIs(R"([null, null, null])", value, -1);
+  this->AssertIndexIs(R"([null, null, null])", null_value, -1);
+  this->AssertIndexIs(R"(["foo", null, null])", null_value, -1);
+}
+
 //
 // Mode
 //
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 7b394565f7d..e31771a89ca 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -349,7 +349,7 @@ template <typename T, typename VisitFunc, typename NullFunc>
 static typename arrow::internal::call_traits::enable_if_return<VisitFunc, Status>::type
 VisitArrayValuesInline(const ArrayData& arr, VisitFunc&& valid_func,
                        NullFunc&& null_func) {
-  VisitArrayDataInline<T>(
+  return VisitArrayDataInline<T>(
       arr,
       [&](typename GetViewType<T>::PhysicalType v) {
         return valid_func(GetViewType<T>::LogicalValue(std::move(v)));
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index ca68a31cc21..caf75a0b5ef 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -192,6 +192,8 @@ Aggregations
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | count                    | Unary      | Any                | Scalar Int64          | :struct:`ScalarAggregateOptions`           |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| index                    | Unary      | Any                | Scalar Int64          | :struct:`IndexOptions`                     |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | mean                     | Unary      | Numeric            | Scalar Float64        | :struct:`ScalarAggregateOptions`           |
 +--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
 | min_max                  | Unary      | Numeric            | Scalar Struct  (1)    | :struct:`ScalarAggregateOptions`           |
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 61482f49f19..eacb061dcbc 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -28,6 +28,7 @@ Aggregations
    :toctree: ../generated/
 
    count
+   index
    mean
    min_max
    mode
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index aea72c457e0..debea53b17b 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -841,6 +841,32 @@ class ScalarAggregateOptions(_ScalarAggregateOptions):
         self._set_options(skip_nulls, min_count)
 
 
+cdef class _IndexOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CIndexOptions] index_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.index_options.get()
+
+    def _set_options(self, Scalar scalar):
+        self.index_options.reset(
+            new CIndexOptions(pyarrow_unwrap_scalar(scalar)))
+
+
+class IndexOptions(_IndexOptions):
+    """
+    Options for the index kernel.
+
+    Parameters
+    ----------
+    value : Scalar
+        The value to search for.
+    """
+
+    def __init__(self, value):
+        self._set_options(value)
+
+
 cdef class _ModeOptions(FunctionOptions):
     cdef:
         unique_ptr[CModeOptions] mode_options
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 3da5033ac47..278b29000f6 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1122,6 +1122,14 @@ cdef class Array(_PandasConvertible):
         """
         return _pc().filter(self, mask, null_selection_behavior)
 
+    def index(self, value, start=None, end=None, *, memory_pool=None):
+        """
+        Find the first index of a value.
+
+        See pyarrow.compute.index for full usage.
+        """
+        return _pc().index(self, value, start, end, memory_pool=memory_pool)
+
     def _to_pandas(self, options, **kwargs):
         return _array_like_to_pandas(self, options)
 
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 6bb0efb5963..9430dd4faf2 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -34,6 +34,7 @@
     DictionaryEncodeOptions,
     ExtractRegexOptions,
     FilterOptions,
+    IndexOptions,
     MatchSubstringOptions,
     ModeOptions,
     ScalarAggregateOptions,
@@ -454,6 +455,40 @@ def filter(data, mask, null_selection_behavior='drop'):
     return call_function('filter', [data, mask], options)
 
 
+def index(data, value, start=None, end=None, *, memory_pool=None):
+    """
+    Find the index of the first occurrence of a given value.
+
+    Parameters
+    ----------
+    data : Array or ChunkedArray
+    value : Scalar-like object
+    start : int, optional
+    end : int, optional
+
+    Returns
+    -------
+    index : the index, or -1 if not found
+    """
+    if start is not None:
+        if end is not None:
+            data = data.slice(start, end - start)
+        else:
+            data = data.slice(start)
+    elif end is not None:
+        data = data.slice(0, end)
+
+    if not isinstance(value, pa.Scalar):
+        value = pa.scalar(value, type=data.type)
+    elif data.type != value.type:
+        value = pa.scalar(value.as_py(), type=data.type)
+    options = IndexOptions(value=value)
+    result = call_function('index', [data], options, memory_pool)
+    if start is not None and result.as_py() >= 0:
+        result = pa.scalar(result.as_py() + start, type=pa.int64())
+    return result
+
+
 def take(data, indices, *, boundscheck=True, memory_pool=None):
     """
     Select values (or records) from array- or table-like data given integer
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9b05359bdf4..a8306b47798 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1883,6 +1883,10 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CModeOptions(int64_t n)
         int64_t n
 
+    cdef cppclass CIndexOptions \
+            "arrow::compute::IndexOptions"(CFunctionOptions):
+        CIndexOptions(shared_ptr[CScalar] value)
+
     cdef cppclass CPartitionNthOptions \
             "arrow::compute::PartitionNthOptions"(CFunctionOptions):
         CPartitionNthOptions(int64_t pivot)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 662c0e39fd9..f9dcb2aa60b 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -382,6 +382,14 @@ cdef class ChunkedArray(_PandasConvertible):
         """
         return _pc().filter(self, mask, null_selection_behavior)
 
+    def index(self, value, start=None, end=None, *, memory_pool=None):
+        """
+        Find the first index of a value.
+
+        See pyarrow.compute.index for full usage.
+        """
+        return _pc().index(self, value, start, end, memory_pool=memory_pool)
+
     def take(self, object indices):
         """
         Select values from a chunked array. See pyarrow.compute.take for full
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index c62ff72acd5..26d52eff08b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1152,6 +1152,20 @@ def test_count():
         pc.count(arr, min_count='zzz')
 
 
+def test_index():
+    arr = pa.array([0, 1, None, 3, 4], type=pa.int64())
+    assert pc.index(arr, pa.scalar(0)).as_py() == 0
+    assert pc.index(arr, pa.scalar(2, type=pa.int8())).as_py() == -1
+    assert pc.index(arr, 4).as_py() == 4
+    assert arr.index(3, start=2).as_py() == 3
+    assert arr.index(None).as_py() == -1
+
+    arr = pa.chunked_array([[1, 2], [1, 3]], type=pa.int64())
+    assert arr.index(1).as_py() == 0
+    assert arr.index(1, start=2).as_py() == 2
+    assert arr.index(1, start=1, end=2).as_py() == -1
+
+
 def test_partition_nth():
     data = list(range(100, 140))
     random.shuffle(data)

From 488fcd7abfbeaa7581a5778610a048e67fe8fbe1 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sat, 22 May 2021 11:03:02 -0700
Subject: [PATCH 286/719] ARROW-12832: [JS] Write benchmarks in TypeScript

Closes #10361 from domoritz/ts-perf

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Brian Hulette <hulettbh@gmail.com>
---
 js/DEVELOP.md                    |   4 +-
 js/package.json                  |   2 +-
 js/perf/{config.js => config.ts} |  45 +++++--------
 js/perf/{index.js => index.ts}   | 108 +++++++++++++++++--------------
 js/test/generate-test-data.ts    |   4 +-
 js/tsconfig.json                 |   5 +-
 6 files changed, 83 insertions(+), 85 deletions(-)
 rename js/perf/{config.js => config.ts} (50%)
 rename js/perf/{index.js => index.ts} (66%)

diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index 88ec899e5a2..d97d3c65ddc 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -72,7 +72,9 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm
 
 # Running the Performance Benchmarks
 
-First, compile the bundles with `yarn build` and generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. You can change the target you want to test by changing the imports in `perf/index.js`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
+First, generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
+
+You can change the target you want to test by changing the imports in `perf/index.ts`. Note that you need to compile the bundles with `yarn build` before you can import them.
 
 # Updating the Arrow format flatbuffers generated code
 
diff --git a/js/package.json b/js/package.json
index 1774f0076ba..bea0ec5a8d6 100644
--- a/js/package.json
+++ b/js/package.json
@@ -11,7 +11,7 @@
     "build": "cross-env NODE_NO_WARNINGS=1 gulp build",
     "clean": "cross-env NODE_NO_WARNINGS=1 gulp clean",
     "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
-    "perf": "node ./perf/index.js",
+    "perf": "ts-node-transpile-only ./perf/index.ts",
     "test:integration": "node ./bin/integration.js --mode validate",
     "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
     "release": "./npm-release.sh",
diff --git a/js/perf/config.js b/js/perf/config.ts
similarity index 50%
rename from js/perf/config.js
rename to js/perf/config.ts
index f733c67f933..c40254f9f8c 100644
--- a/js/perf/config.js
+++ b/js/perf/config.ts
@@ -15,34 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-const fs = require('fs');
-const path = require('path');
-const glob = require('glob');
+import {readFileSync} from 'fs';
+import {resolve, parse} from 'path';
+import {sync} from 'glob';
 
-const config = [];
-const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
+const filenames = sync(resolve(__dirname, `../test/data/tables/`, `*.arrow`));
 
-const countBys = {
-    tracks: ['origin', 'destination']
-}
-const counts = {
-    tracks: [
-        {column: 'lat',    test: 'gt', value: 0        },
-        {column: 'lng',    test: 'gt', value: 0        },
-        {column: 'origin', test: 'eq', value: 'Seattle'},
-    ]
-}
+export default filenames.map(filename => {
+    const { name } = parse(filename);
+    return {
+        name,
+        buffers: [readFileSync(filename)],
+        countBys: ['origin', 'destination'],
+        counts: [
+            {column: 'lat',    test: 'gt' as 'gt' | 'eq', value: 0        },
+            {column: 'lng',    test: 'gt' as 'gt' | 'eq', value: 0        },
+            {column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle'},
+        ],
+    };
+});
 
-for (const filename of filenames) {
-    const { name } = path.parse(filename);
-    if (name in counts) {
-        config.push({
-            name,
-            buffers: [fs.readFileSync(filename)],
-            countBys: countBys[name],
-            counts: counts[name],
-        });
-    }
-}
-
-module.exports = config;
diff --git a/js/perf/index.js b/js/perf/index.ts
similarity index 66%
rename from js/perf/index.js
rename to js/perf/index.ts
index 57f170ed4e9..7c73033a663 100644
--- a/js/perf/index.js
+++ b/js/perf/index.ts
@@ -15,27 +15,35 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// Use the ES5 UMD target as perf baseline
-// const { predicate, Table, RecordBatchReader } = require('../targets/es5/umd');
-// const { predicate, Table, RecordBatchReader } = require('../targets/es5/cjs');
-// const { predicate, Table, RecordBatchReader } = require('../targets/es2015/umd');
-const { predicate, Table, DataFrame, RecordBatchReader } = require('../targets/es2015/cjs');
-const kleur = require('kleur');
-const b = require('benny');
+// Alternatively, use bundles for performance tests
+// import * as Arrow from '../targets/es5/umd';
+// import * as Arrow from '../targets/es5/cjs';
+// import * as Arrow from '../targets/es2015/umd';
+// import * as Arrow from '../targets/es2015/cjs';
+
+import * as Arrow from '../src/Arrow';
+
+import config from './config';
+import b from 'benny';
+import { CaseResult, Summary } from 'benny/lib/internal/common-types';
+import kleur from 'kleur';
+
+const { predicate, Table, DataFrame, RecordBatchReader } = Arrow;
 const { col } = predicate;
 
+
 const args = process.argv.slice(2);
 const json = args[0] === '--json';
 
 const formatter = new Intl.NumberFormat();
-function formatNumber(number, precision) {
+function formatNumber(number: number, precision = 0) {
     const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
-    return formatter.format(rounded)
+    return formatter.format(rounded);
 }
 
-const results = []
+const results: CaseResult[] = [];
 
-function cycle(result, _summary) {
+function cycle(result: CaseResult, _summary: Summary) {
     const duration = result.details.median * 1000;
     if (json) {
         results.push(result);
@@ -45,7 +53,7 @@ function cycle(result, _summary) {
     );
 }
 
-for (const { name, buffers } of require('./config')) {
+for (const { name, buffers } of config) {
     b.suite(
         `Parse "${name}"`,
 
@@ -54,31 +62,31 @@ for (const { name, buffers } of require('./config')) {
         }),
 
         b.add(`readBatches`, () => {
-            for (recordBatch of RecordBatchReader.from(buffers)) {}
+            for (const _recordBatch of RecordBatchReader.from(buffers)) {}
         }),
 
         b.cycle(cycle)
     );
 
-    const table = Table.from(buffers)
+    const table = Table.from(buffers);
     const schema = table.schema;
 
     const suites = [{
             name: `Get "${name}" values by index`,
-            fn(vector) {
+            fn(vector: Arrow.Column<any>) {
                 for (let i = -1, n = vector.length; ++i < n;) {
-                    value = vector.get(i);
+                    vector.get(i);
                 }
             }
         }, {
             name: `Iterate "${name}" vectors`,
-            fn(vector) { for (value of vector) {} }
+            fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
         }, {
             name: `Slice toArray "${name}" vectors`,
-            fn(vector) { xs = vector.slice().toArray(); }
+            fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
         }, {
             name: `Slice "${name}" vectors`,
-            fn(vector) { xs = vector.slice(); }
+            fn(vector: Arrow.Column<any>) { vector.slice(); }
         }];
 
     for (const {name, fn} of suites) {
@@ -86,10 +94,10 @@ for (const { name, buffers } of require('./config')) {
             name,
 
             ...schema.fields.map((f, i) => {
-                const vector = table.getColumnAt(i);
+                const vector = table.getColumnAt(i)!;
                 return b.add(`name: '${f.name}', length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
-                    fn(vector)
-                })
+                    fn(vector);
+                });
             }),
 
             b.cycle(cycle)
@@ -98,14 +106,14 @@ for (const { name, buffers } of require('./config')) {
 }
 
 
-for (const { name, buffers, countBys, counts } of require('./config')) {
+for (const { name, buffers, countBys, counts } of config) {
     const df = DataFrame.from(buffers);
 
     b.suite(
         `DataFrame Iterate "${name}"`,
 
         b.add(`length: ${formatNumber(df.length)}`, () => {
-            for (value of df) {}
+            for (const _value of df) {}
         }),
 
         b.cycle(cycle)
@@ -114,8 +122,8 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
     b.suite(
         `DataFrame Count By "${name}"`,
 
-        ...countBys.map((column) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}`,
+        ...countBys.map((column: string) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
             () => df.countBy(column)
         )),
 
@@ -125,10 +133,10 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
     b.suite(
         `DataFrame Filter-Scan Count "${name}"`,
 
-        ...counts.map(({ column, test, value }) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
             () => {
-                let filteredDf;
+                let filteredDf: Arrow.FilteredDataFrame;
                 if (test == 'gt') {
                     filteredDf = df.filter(col(column).gt(value));
                 } else if (test == 'eq') {
@@ -147,10 +155,10 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
     b.suite(
         `DataFrame Filter-Iterate "${name}"`,
 
-        ...counts.map(({ column, test, value }) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
             () => {
-                let filteredDf;
+                let filteredDf: Arrow.FilteredDataFrame;
                 if (test == 'gt') {
                     filteredDf = df.filter(col(column).gt(value));
                 } else if (test == 'eq') {
@@ -160,8 +168,8 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
                 }
 
                 return () => {
-                    for (value of filteredDf) {}
-                }
+                    for (const _value of filteredDf) {}
+                };
             }
         )),
 
@@ -171,43 +179,43 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
     b.suite(
         `DataFrame Direct Count "${name}"`,
 
-        ...counts.map(({ column, test, value }) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
+        ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
             () => {
-                let colidx = df.schema.fields.findIndex((c)=> c.name === column);
+                const colidx = df.schema.fields.findIndex((c)=> c.name === column);
 
                 if (test == 'gt') {
                     return () => {
-                        sum = 0;
-                        let batches = df.chunks;
-                        let numBatches = batches.length;
+                        let sum = 0;
+                        const batches = df.chunks;
+                        const numBatches = batches.length;
                         for (let batchIndex = -1; ++batchIndex < numBatches;) {
                             // load batches
                             const batch = batches[batchIndex];
-                            const vector = batch.getChildAt(colidx);
+                            const vector = batch.getChildAt(colidx)!;
                             // yield all indices
                             for (let index = -1, length = batch.length; ++index < length;) {
-                                sum += (vector.get(index) >= value);
+                                sum += (vector.get(index) >= value) ? 1 : 0;
                             }
                         }
                         return sum;
-                    }
+                    };
                 } else if (test == 'eq') {
                     return () => {
-                        sum = 0;
-                        let batches = df.chunks;
-                        let numBatches = batches.length;
+                        let sum = 0;
+                        const batches = df.chunks;
+                        const numBatches = batches.length;
                         for (let batchIndex = -1; ++batchIndex < numBatches;) {
                             // load batches
                             const batch = batches[batchIndex];
-                            const vector = batch.getChildAt(colidx);
+                            const vector = batch.getChildAt(colidx)!;
                             // yield all indices
                             for (let index = -1, length = batch.length; ++index < length;) {
-                                sum += (vector.get(index) === value);
+                                sum += (vector.get(index) === value) ?  1 : 0;
                             }
                         }
                         return sum;
-                    }
+                    };
                 } else {
                     throw new Error(`Unrecognized test "${test}"`);
                 }
@@ -218,7 +226,7 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
 
         b.complete(() => {
             // last benchmark finished
-            json && process.stderr.write(JSON.stringify(results, null, 2))
+            json && process.stderr.write(JSON.stringify(results, null, 2));
         })
     );
 }
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 4572be61379..3b83bd149f2 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -580,9 +580,7 @@ type TypedArrayConstructor =
 
 const rand = Math.random.bind(Math);
 const randomBytes = (length: number) => fillRandom(Uint8Array, length);
-const randomString = ((opts) =>
-    (length: number) => randomatic('?', length, opts)
-)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
+const randomString = (length: number) => randomatic('?', length, { chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
 
 const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
 
diff --git a/js/tsconfig.json b/js/tsconfig.json
index 20163756487..d61218686d6 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -7,7 +7,8 @@
   "compilerOptions": {
     "target": "ESNEXT",
     "module": "commonjs",
-    "noEmit": true
+    "noEmit": true,
+    "esModuleInterop": true
   },
-  "include": ["src/**/*.ts", "test/**/*.ts"]
+  "include": ["src/**/*.ts", "test/**/*.ts", "perf/**/*.ts"]
 }

From 7c944e8e066405364c15c6c6de39d696b2ebc413 Mon Sep 17 00:00:00 2001
From: P42 <72252241+p42-ai[bot]@users.noreply.github.com>
Date: Sat, 22 May 2021 11:17:45 -0700
Subject: [PATCH 287/719] ARROW-12798: [JS] Use == null Comparison

The `== null` check is a concise expression to identify nullish values (`null` and `undefined`).

This refactoring replaces the following combinations of longer strict equality checks with the shorter `null` comparison:

* `a === null || a === undefined` becomes `a == null`
* `b !== null && b !== undefined` becomes `b != null`
* `x.f(1, 2) === null || x.f(1, 2) === undefined` becomes `x.f(1, 2) == null`

Learn More: [Equality comparisons and sameness (MDN)](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Equality_comparisons_and_sameness), [Equality table](https://dorey.github.io/JavaScript-Equality-Table/)

When two similar-looking function calls have a side effect, this refactoring can change the behavior of the code.

For example, the refactoring changes:

```javascript
let a = f(1) === null || f(1) === undefined;
```

into

```javascript
let a = f(1) == null;
```

If `f(1)` has a side effect, it would have been called once or twice before the refactoring, and once after the refactoring.
This means that the side effect would have been called a different number of times, potentially changing the behavior.

Closes #10338 from domoritz/p42/eq_eq_null/1621098523557

Authored-by: P42 <72252241+p42-ai[bot]@users.noreply.github.com>
Signed-off-by: Brian Hulette <hulettbh@gmail.com>
---
 js/src/ipc/writer.ts   | 2 +-
 js/src/vector/index.ts | 2 +-
 js/src/vector/row.ts   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
index 83703c15526..12aa83355f0 100644
--- a/js/src/ipc/writer.ts
+++ b/js/src/ipc/writer.ts
@@ -163,7 +163,7 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
 
         if (!this._sink) {
             throw new Error(`RecordBatchWriter is closed`);
-        } else if (payload === null || payload === undefined) {
+        } else if (payload == null) {
             return this.finish() && undefined;
         } else if (payload instanceof Table && !(schema = payload.schema)) {
             return this.finish() && undefined;
diff --git a/js/src/vector/index.ts b/js/src/vector/index.ts
index af001931995..30f5e3cfa8a 100644
--- a/js/src/vector/index.ts
+++ b/js/src/vector/index.ts
@@ -191,7 +191,7 @@ function wrapNullableGet<T extends DataType, V extends Vector<T>, F extends (i:
 /** @ignore */
 function wrapNullableSet<T extends DataType, V extends BaseVector<T>, F extends (i: number, a: any) => void>(fn: F): (...args: Parameters<F>) => void {
     return function(this: V, i: number, a: any) {
-        if (setBool(this.nullBitmap, this.offset + i, !(a === null || a === undefined))) {
+        if (setBool(this.nullBitmap, this.offset + i, !((a == null)))) {
             fn.call(this, i, a);
         }
     };
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index a759a8096e8..23d1b5440f8 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -54,7 +54,7 @@ abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
 
     public get(key: K) {
         let val = undefined;
-        if (key !== null && key !== undefined) {
+        if (key != null) {
             const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
             let idx = ktoi.get(key);
             if (idx !== undefined) {
@@ -70,7 +70,7 @@ abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
     }
 
     public set(key: K, val: V) {
-        if (key !== null && key !== undefined) {
+        if (key != null) {
             const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
             let idx = ktoi.get(key);
             if (idx === undefined) {

From 478203453127b648ce717396fe4fb03165287878 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 23 May 2021 05:09:43 +0900
Subject: [PATCH 288/719] ARROW-12808: [JS] Document browser support

Closes #10360 from domoritz/drop-ie

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/js/README.md b/js/README.md
index b51a7896874..201a128b06c 100644
--- a/js/README.md
+++ b/js/README.md
@@ -200,6 +200,12 @@ The JS community is a diverse group with a varied list of target environments an
 
 If you think we missed a compilation target and it's a blocker for adoption, please open an issue.
 
+### Supported Browsers and Platforms
+
+The bundles we compile support moderns browser released in the last 5 years. This includes supported versions of
+Firefox, Chrome, Edge, and Safari. We do not actively support Internet Explorer.
+Apache Arrow also works on [maintained versions of Node](https://nodejs.org/en/about/releases/).
+
 # People
 
 Full list of broader Apache Arrow [committers](https://arrow.apache.org/committers/).

From 5ca16287a389afceabdd4b487d2e43e62745abcc Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Mon, 24 May 2021 05:47:20 +0900
Subject: [PATCH 289/719] ARROW-12833: [JS] Construct perf data in JS

Closes #10362 from domoritz/perf-data

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 js/DEVELOP.md                   |  2 +-
 js/package.json                 |  1 -
 js/perf/config.ts               | 63 ++++++++++++++++++++++++++-------
 js/perf/index.ts                | 21 +++++------
 js/src/io/interfaces.ts         |  2 +-
 js/test/data/tables/generate.py | 50 --------------------------
 6 files changed, 64 insertions(+), 75 deletions(-)
 delete mode 100644 js/test/data/tables/generate.py

diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index d97d3c65ddc..cba4faf3aa5 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -72,7 +72,7 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm
 
 # Running the Performance Benchmarks
 
-First, generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
+You can run the benchmarks with `yarn perf`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
 
 You can change the target you want to test by changing the imports in `perf/index.ts`. Note that you need to compile the bundles with `yarn build` before you can import them.
 
diff --git a/js/package.json b/js/package.json
index bea0ec5a8d6..da8f65a55f8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -13,7 +13,6 @@
     "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
     "perf": "ts-node-transpile-only ./perf/index.ts",
     "test:integration": "node ./bin/integration.js --mode validate",
-    "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
     "release": "./npm-release.sh",
     "clean:all": "run-p clean clean:testdata",
     "clean:testdata": "gulp clean:testdata",
diff --git a/js/perf/config.ts b/js/perf/config.ts
index c40254f9f8c..f9915c440c0 100644
--- a/js/perf/config.ts
+++ b/js/perf/config.ts
@@ -15,23 +15,62 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import {readFileSync} from 'fs';
-import {resolve, parse} from 'path';
-import {sync} from 'glob';
+import * as Arrow from '../src/Arrow.dom';
 
-const filenames = sync(resolve(__dirname, `../test/data/tables/`, `*.arrow`));
+// from https://stackoverflow.com/a/19303725/214950
+let seed = 1;
+function random() {
+    const x = Math.sin(seed++) * 10000;
+    return x - Math.floor(x);
+}
 
-export default filenames.map(filename => {
-    const { name } = parse(filename);
-    return {
-        name,
-        buffers: [readFileSync(filename)],
+console.time('Prepare Data');
+
+const LENGTH = 100000;
+const NUM_BATCHES = 10;
+
+const values = Arrow.Utf8Vector.from(['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC']);
+
+const batches = Array.from({length: NUM_BATCHES}).map(() => {
+    const lat = Float32Array.from(
+        { length: LENGTH },
+        () => ((random() - 0.5) * 2 * 90));
+    const lng = Float32Array.from(
+        { length: LENGTH },
+        () => ((random() - 0.5) * 2 * 90));
+
+    const origin = Uint8Array.from(
+        { length: LENGTH },
+        () => (random() * 6));
+    const destination = Uint8Array.from(
+        { length: LENGTH },
+        () => (random() * 6));
+
+    const originType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false);
+    const destinationType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false);
+
+    return Arrow.RecordBatch.new({
+        'lat': Arrow.Float32Vector.from(lat),
+        'lng': Arrow.Float32Vector.from(lng),
+        'origin': Arrow.Vector.new(Arrow.Data.Dictionary(originType, 0, origin.length, 0, null, origin, values)),
+        'destination': Arrow.Vector.new(Arrow.Data.Dictionary(destinationType, 0, destination.length, 0, null, destination, values)),
+    });
+});
+
+const tracks = new Arrow.Table(batches[0].schema, batches);
+
+console.timeEnd('Prepare Data');
+
+export default [
+    {
+        name: 'tracks',
+        df: tracks,
+        ipc: tracks.serialize(),
         countBys: ['origin', 'destination'],
         counts: [
             {column: 'lat',    test: 'gt' as 'gt' | 'eq', value: 0        },
             {column: 'lng',    test: 'gt' as 'gt' | 'eq', value: 0        },
             {column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle'},
         ],
-    };
-});
-
+    }
+];
diff --git a/js/perf/index.ts b/js/perf/index.ts
index 7c73033a663..506f16b60f4 100644
--- a/js/perf/index.ts
+++ b/js/perf/index.ts
@@ -28,7 +28,7 @@ import b from 'benny';
 import { CaseResult, Summary } from 'benny/lib/internal/common-types';
 import kleur from 'kleur';
 
-const { predicate, Table, DataFrame, RecordBatchReader } = Arrow;
+const { predicate, Table, RecordBatchReader } = Arrow;
 const { col } = predicate;
 
 
@@ -53,23 +53,26 @@ function cycle(result: CaseResult, _summary: Summary) {
     );
 }
 
-for (const { name, buffers } of config) {
+for (const { name, ipc, df } of config) {
     b.suite(
         `Parse "${name}"`,
 
         b.add(`Table.from`, () => {
-            Table.from(buffers);
+            Table.from(ipc);
         }),
 
         b.add(`readBatches`, () => {
-            for (const _recordBatch of RecordBatchReader.from(buffers)) {}
+            for (const _recordBatch of RecordBatchReader.from(ipc)) {}
+        }),
+
+        b.add(`serialize`, () => {
+            df.serialize();
         }),
 
         b.cycle(cycle)
     );
 
-    const table = Table.from(buffers);
-    const schema = table.schema;
+    const schema = df.schema;
 
     const suites = [{
             name: `Get "${name}" values by index`,
@@ -94,7 +97,7 @@ for (const { name, buffers } of config) {
             name,
 
             ...schema.fields.map((f, i) => {
-                const vector = table.getColumnAt(i)!;
+                const vector = df.getColumnAt(i)!;
                 return b.add(`name: '${f.name}', length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
                     fn(vector);
                 });
@@ -106,9 +109,7 @@ for (const { name, buffers } of config) {
 }
 
 
-for (const { name, buffers, countBys, counts } of config) {
-    const df = DataFrame.from(buffers);
-
+for (const { name, df, countBys, counts } of config) {
     b.suite(
         `DataFrame Iterate "${name}"`,
 
diff --git a/js/src/io/interfaces.ts b/js/src/io/interfaces.ts
index febd0ea997e..4b5641ff13a 100644
--- a/js/src/io/interfaces.ts
+++ b/js/src/io/interfaces.ts
@@ -174,6 +174,6 @@ export class AsyncQueue<TReadable = Uint8Array, TWritable = TReadable> extends R
         if (this._closedPromiseResolve) {
             return true;
         }
-        throw new Error(`${this} is closed`);
+        throw new Error(`AsyncQueue is closed`);
     }
 }
diff --git a/js/test/data/tables/generate.py b/js/test/data/tables/generate.py
deleted file mode 100644
index 36def01620d..00000000000
--- a/js/test/data/tables/generate.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow as pa
-import random
-import numpy as np
-import pandas as pd
-
-
-cities = [u'Charlottesville', u'New York', u'San Francisco', u'Seattle', u'Terre Haute', u'Washington, DC']
-
-def generate_batch(batch_len):
-    return pa.RecordBatch.from_arrays([
-        pa.Array.from_pandas(pd.Series(np.random.uniform(-90,90,batch_len), dtype="float32")),
-        pa.Array.from_pandas(pd.Series(np.random.uniform(-180,180,batch_len), dtype="float32")),
-        pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities)),
-        pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities))
-    ], ['lat', 'lng', 'origin', 'destination'])
-
-def write_record_batches(filename, batch_len, num_batches):
-    with pa.ipc.RecordBatchStreamWriter(filename, generate_batch(1).schema) as writer:
-        for _ in range(num_batches):
-            writer.write_batch(generate_batch(batch_len))
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument('filename', help='number of batches')
-    parser.add_argument('-n', '--num-batches', help='number of batches', type=int, default=10)
-    parser.add_argument('-b', '--batch-size', help='size of each batch', type=int, default=100000)
-
-    args = parser.parse_args()
-
-    print("Writing {} {}-element batches to '{}'".format(args.num_batches, args.batch_size, args.filename))
-    write_record_batches(args.filename, args.batch_size, args.num_batches)

From ae9c05d93520bf85612e730cdf9103e1c8aaa15b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 24 May 2021 13:34:52 +0200
Subject: [PATCH 290/719] ARROW-12812: [Packaging][Java] Improve JNI jars build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10329 from kszucs/improve-jars-build

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ...ile => java-jni-manylinux-201x.dockerfile} |   6 +-
 .../java_bundled_jars_check_dependencies.sh   |  52 -------
 ci/scripts/java_bundled_jars_macos_build.sh   |  92 -------------
 .../java_bundled_jars_manylinux_build.sh      | 124 -----------------
 ...d_jars_java_build.sh => java_jni_build.sh} |  43 ++++--
 ci/scripts/java_jni_macos_build.sh            | 114 +++++++++++++++
 ci/scripts/java_jni_manylinux_build.sh        | 130 ++++++++++++++++++
 dev/archery/archery/cli.py                    |  28 ++++
 dev/archery/archery/linking.py                |  75 ++++++++++
 dev/tasks/{jars => java-jars}/README.md       |   0
 dev/tasks/{jars => java-jars}/github.yml      |  69 ++++------
 dev/tasks/tasks.yml                           |   9 +-
 docker-compose.yml                            |  14 +-
 13 files changed, 417 insertions(+), 339 deletions(-)
 rename ci/docker/{java-bundled-jars.dockerfile => java-jni-manylinux-201x.dockerfile} (94%)
 delete mode 100755 ci/scripts/java_bundled_jars_check_dependencies.sh
 delete mode 100755 ci/scripts/java_bundled_jars_macos_build.sh
 delete mode 100755 ci/scripts/java_bundled_jars_manylinux_build.sh
 rename ci/scripts/{java_bundled_jars_java_build.sh => java_jni_build.sh} (57%)
 create mode 100755 ci/scripts/java_jni_macos_build.sh
 create mode 100755 ci/scripts/java_jni_manylinux_build.sh
 create mode 100644 dev/archery/archery/linking.py
 rename dev/tasks/{jars => java-jars}/README.md (100%)
 rename dev/tasks/{jars => java-jars}/github.yml (55%)

diff --git a/ci/docker/java-bundled-jars.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile
similarity index 94%
rename from ci/docker/java-bundled-jars.dockerfile
rename to ci/docker/java-jni-manylinux-201x.dockerfile
index 96274b26dd9..021dab686f3 100644
--- a/ci/docker/java-bundled-jars.dockerfile
+++ b/ci/docker/java-jni-manylinux-201x.dockerfile
@@ -14,6 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+
 ARG base
 FROM ${base}
 
@@ -30,8 +31,7 @@ RUN vcpkg install --clean-after-build \
         boost-variant \
         boost-multiprecision
 
-# Install dependencies
+# Install Java
 ARG java=1.8.0
 RUN yum install -y java-$java-openjdk-devel && yum clean all
-
-ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/
\ No newline at end of file
+ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/
diff --git a/ci/scripts/java_bundled_jars_check_dependencies.sh b/ci/scripts/java_bundled_jars_check_dependencies.sh
deleted file mode 100755
index b13d57036fd..00000000000
--- a/ci/scripts/java_bundled_jars_check_dependencies.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-function check_dynamic_dependencies(){
-  local so_dep=$1
-  local library=$2
-  shift 2
-  local whitelist=("$@")
-
-  # print the shared library dependencies
-  $so_dep "$library" | tee dependencies_temp_file.txt 
-
-  # exit if any shared library not in whitelisted set is found
-  echo "Checking shared dependencies"
-  awk '{print $1}' dependencies_temp_file.txt | \
-  while read -r line
-  do
-    found=false
-  
-    for item in "${whitelist[@]}"
-    do
-    if [[ "$line" == *"$item"* ]] ; then
-      found=true
-    fi
-  done
-
-  if [[ "$found" == false ]] ; then
-    echo "Unexpected shared dependency found in $library : $line"
-    exit 1
-  fi
-  done
-
-  rm dependencies_temp_file.txt
-}
\ No newline at end of file
diff --git a/ci/scripts/java_bundled_jars_macos_build.sh b/ci/scripts/java_bundled_jars_macos_build.sh
deleted file mode 100755
index c050c087b85..00000000000
--- a/ci/scripts/java_bundled_jars_macos_build.sh
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -ex
-
-arrow_dir=${1}
-build_dir=${2}
-# The directory where the final binaries will be stored when scripts finish
-distribution_dir=${3}
-source_dir=${arrow_dir}/cpp
-
-export ARROW_TEST_DATA="${arrow_dir}/testing/data"
-export PARQUET_TEST_DATA="${source_dir}/submodules/parquet-testing/data"
-export AWS_EC2_METADATA_DISABLED=TRUE
-
-# Builds arrow + gandiva and tests the same.
-mkdir -p "${build_dir}"
-pushd "${build_dir}"
-  CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Release \
-        -DARROW_GANDIVA=ON \
-        -DARROW_GANDIVA_JAVA=ON \
-        -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
-        -DARROW_ORC=ON \
-        -DARROW_JNI=ON \
-        -DARROW_PLASMA=ON \
-        -DARROW_PLASMA_JAVA_CLIENT=ON \
-        -DARROW_BUILD_TESTS=ON \
-        -DARROW_BUILD_UTILITIES=OFF \
-        -DPARQUET_REQUIRE_ENCRYPTION=OFF \
-        -DARROW_PARQUET=ON \
-        -DPARQUET_BUILD_EXAMPLES=OFF \
-        -DPARQUET_BUILD_EXECUTABLES=OFF \
-        -DARROW_FILESYSTEM=ON \
-        -DARROW_DATASET=ON \
-        -DARROW_BOOST_USE_SHARED=OFF \
-        -DARROW_PROTOBUF_USE_SHARED=OFF \
-        -DARROW_GFLAGS_USE_SHARED=OFF \
-        -DARROW_OPENSSL_USE_SHARED=OFF \
-        -DARROW_BROTLI_USE_SHARED=OFF \
-        -DARROW_BZ2_USE_SHARED=OFF \
-        -DARROW_GRPC_USE_SHARED=OFF \
-        -DARROW_LZ4_USE_SHARED=OFF \
-        -DARROW_SNAPPY_USE_SHARED=OFF \
-        -DARROW_THRIFT_USE_SHARED=OFF \
-        -DARROW_UTF8PROC_USE_SHARED=OFF \
-        -DARROW_ZSTD_USE_SHARED=OFF \
-        -DCMAKE_INSTALL_PREFIX=${build_dir} \
-        -DCMAKE_INSTALL_LIBDIR=lib"
-
-  cmake $CMAKE_FLAGS $source_dir
-  make -j4
-  make install
-  ctest
-
-  # Copy all generated libraries to the distribution folder
-  mkdir -p "${distribution_dir}"
-  cp -L ${build_dir}/lib/libgandiva_jni.dylib ${distribution_dir}
-  cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${distribution_dir}
-  cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${distribution_dir}
-popd
-
-#Check if any libraries contains an unwhitelisted shared dependency
-source $arrow_dir/ci/scripts/java_bundled_jars_check_dependencies.sh
-SO_DEP="otool -L"
-
-GANDIVA_LIB=$distribution_dir/libgandiva_jni.dylib
-DATASET_LIB=$distribution_dir/libarrow_dataset_jni.dylib
-ORC_LIB=$distribution_dir/libarrow_orc_jni.dylib
-LIBRARIES=($GANDIVA_LIB $ORC_LIB $DATASET_LIB)
-
-WHITELIST=(libgandiva_jni libarrow_orc_jni libarrow_dataset_jni libz libncurses libSystem libc++)
-
-for library in "${LIBRARIES[@]}"
-do
-  check_dynamic_dependencies $SO_DEP $library "${WHITELIST[@]}"  
-done
\ No newline at end of file
diff --git a/ci/scripts/java_bundled_jars_manylinux_build.sh b/ci/scripts/java_bundled_jars_manylinux_build.sh
deleted file mode 100755
index 934c221f11b..00000000000
--- a/ci/scripts/java_bundled_jars_manylinux_build.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Quit on failure
-set -e
-
-arrow_dir=${1}
-build_dir=${2}
-# The directory where the final binaries will be stored when scripts finish
-distribution_dir=${3}
-source_dir=${arrow_dir}/cpp
-
-echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
-# Clear output directories and leftovers
-rm -rf ${build_dir}
-
-echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
-: ${ARROW_DATASET:=ON}
-: ${ARROW_GANDIVA:=ON}
-: ${ARROW_GANDIVA_JAVA:=ON}
-: ${ARROW_FILESYSTEM:=ON}
-: ${ARROW_JEMALLOC:=ON}
-: ${ARROW_RPATH_ORIGIN:=ON}
-: ${ARROW_ORC:=ON}
-: ${ARROW_PARQUET:=ON}
-: ${ARROW_PLASMA:=ON}
-: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
-: ${ARROW_PYTHON:=OFF}
-: ${ARROW_JNI:=ON}
-: ${ARROW_BUILD_TESTS:=ON}
-: ${CMAKE_BUILD_TYPE:=Release}
-: ${CMAKE_UNITY_BUILD:=ON}
-: ${CMAKE_GENERATOR:=Ninja}
-: ${VCPKG_FEATURE_FLAGS:=-manifests}
-: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
-: ${PYTHON_VERSION:=3.7}
-: ${GANDIVA_CXX_FLAGS:=-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9;-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9/x86_64-redhat-linux;-isystem;-lpthread}
-
-mkdir -p "${build_dir}"
-pushd "${build_dir}"
-  export ARROW_TEST_DATA="${arrow_dir}/testing/data"
-  export PARQUET_TEST_DATA="${source_dir}/submodules/parquet-testing/data"
-  export AWS_EC2_METADATA_DISABLED=TRUE
-
-  cmake -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-      -DARROW_DEPENDENCY_SOURCE="VCPKG" \
-      -DCMAKE_INSTALL_PREFIX=${build_dir} \
-      -DCMAKE_INSTALL_LIBDIR=lib \
-      -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
-      -DARROW_BUILD_SHARED=ON \
-      -DARROW_BOOST_USE_SHARED=OFF \
-      -DARROW_PROTOBUF_USE_SHARED=OFF \
-      -DARROW_OPENSSL_USE_SHARED=OFF \
-      -DARROW_BROTLI_USE_SHARED=OFF \
-      -DARROW_BZ2_USE_SHARED=OFF \
-      -DARROW_GRPC_USE_SHARED=OFF \
-      -DARROW_LZ4_USE_SHARED=OFF \
-      -DARROW_SNAPPY_USE_SHARED=OFF \
-      -DARROW_THRIFT_USE_SHARED=OFF \
-      -DARROW_UTF8PROC_USE_SHARED=OFF \
-      -DARROW_ZSTD_USE_SHARED=OFF \
-      -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
-      -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
-      -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
-      -DARROW_PYTHON=${ARROW_PYTHON} \
-      -DARROW_PARQUET=${ARROW_PARQUET} \
-      -DARROW_DATASET=${ARROW_DATASET} \
-      -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
-      -DPARQUET_REQUIRE_ENCRYPTION=OFF \
-      -DPARQUET_BUILD_EXAMPLES=OFF \
-      -DPARQUET_BUILD_EXECUTABLES=OFF \
-      -DPythonInterp_FIND_VERSION=ON \
-      -DPythonInterp_FIND_VERSION_MAJOR=3 \
-      -DARROW_GANDIVA=${ARROW_GANDIVA} \
-      -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
-      -DARROW_ORC=${ARROW_ORC} \
-      -DARROW_JNI=${ARROW_JNI} \
-      -DARROW_PLASMA=${ARROW_PLASMA} \
-      -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
-      -DARROW_BUILD_UTILITIES=OFF \
-      -DVCPKG_MANIFEST_MODE=OFF \
-      -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
-      -GNinja ${source_dir}
-  ninja install
-  CTEST_OUTPUT_ON_FAILURE=1 ninja test
-popd
-
-echo "=== (${PYTHON_VERSION}) Copying libraries to the distribution folder ==="
-mkdir -p "${distribution_dir}"
-cp -L  ${build_dir}/lib/libgandiva_jni.so ${distribution_dir}
-cp -L  ${build_dir}/lib/libarrow_dataset_jni.so ${distribution_dir}
-cp -L  ${build_dir}/lib/libarrow_orc_jni.so ${distribution_dir}
-
-echo "=== (${PYTHON_VERSION}) Checking shared dependencies for libraries ==="
-source $arrow_dir/ci/scripts/java_bundled_jars_check_dependencies.sh
-SO_DEP=ldd
-
-GANDIVA_LIB=$distribution_dir/libgandiva_jni.so
-DATASET_LIB=$distribution_dir/libarrow_dataset_jni.so
-ORC_LIB=$distribution_dir/libarrow_orc_jni.so
-LIBRARIES=($GANDIVA_LIB $ORC_LIB $DATASET_LIB)
-
-WHITELIST=(linux-vdso libz librt libdl libpthread libstdc++ libm libgcc_s libc ld-linux-x86-64)
-
-for library in "${LIBRARIES[@]}"
-do
-  check_dynamic_dependencies $SO_DEP $library "${WHITELIST[@]}"  
-done
\ No newline at end of file
diff --git a/ci/scripts/java_bundled_jars_java_build.sh b/ci/scripts/java_jni_build.sh
similarity index 57%
rename from ci/scripts/java_bundled_jars_java_build.sh
rename to ci/scripts/java_jni_build.sh
index 8ab502e30c8..638d9d11d48 100755
--- a/ci/scripts/java_bundled_jars_java_build.sh
+++ b/ci/scripts/java_jni_build.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -21,21 +21,38 @@ set -e
 
 arrow_dir=${1}
 cpp_build_dir=${2}
-copy_jar_to_distribution_folder=${3:-true}
 java_dir=${arrow_dir}/java
 
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
 
 pushd $java_dir
-  # build the entire project
-  mvn clean install -DskipTests -P arrow-jni -Darrow.cpp.build.dir=$cpp_build_dir
-  # test jars that have cpp dependencies
-  mvn test -P arrow-jni -pl adapter/orc,gandiva,dataset -Dgandiva.cpp.build.dir=$cpp_build_dir
-
-  if [[ $copy_jar_to_distribution_folder ]] ; then
-    # copy the jars that has cpp dependencies to distribution folder
-    find gandiva/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $cpp_build_dir \;
-    find adapter/orc/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $cpp_build_dir \;
-    find dataset/target/ -name "*.jar" -not -name "*tests*" -exec cp  {} $cpp_build_dir \;
-  fi
+
+# build the entire project
+mvn clean install -P arrow-jni -Darrow.cpp.build.dir=$cpp_build_dir
+
+MODULES=(
+  adapter/avro
+  adapter/jdbc
+  adapter/orc
+  algorithm
+  compression
+  dataset
+  flight/flight-core
+  flight/flight-grpc
+  format
+  gandiva
+  memory/memory-core
+  memory/memory-netty
+  memory/memory-unsafe
+  performance
+  plasma
+  tools
+  vector
+)
+
+# copy all jars to distribution folder, excluding the unit tests
+for module in "${MODULES[@]}"; do
+  find $module/target/ -name "*.jar" -not -name "*tests*" -not -name "*benchmarks*" -exec cp  {} $cpp_build_dir \;
+done
+
 popd
diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh
new file mode 100755
index 00000000000..5c11ee97584
--- /dev/null
+++ b/ci/scripts/java_jni_macos_build.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+: ${ARROW_BUILD_TESTS:=ON}
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+  -DARROW_BOOST_USE_SHARED=OFF \
+  -DARROW_BROTLI_USE_SHARED=OFF \
+  -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+  -DARROW_BUILD_UTILITIES=OFF \
+  -DARROW_BZ2_USE_SHARED=OFF \
+  -DARROW_DATASET=${ARROW_DATASET} \
+  -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+  -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+  -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+  -DARROW_GANDIVA=${ARROW_GANDIVA} \
+  -DARROW_GFLAGS_USE_SHARED=OFF \
+  -DARROW_GRPC_USE_SHARED=OFF \
+  -DARROW_JNI=ON \
+  -DARROW_LZ4_USE_SHARED=OFF \
+  -DARROW_OPENSSL_USE_SHARED=OFF \
+  -DARROW_ORC=${ARROW_ORC} \
+  -DARROW_PARQUET=${ARROW_PARQUET} \
+  -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+  -DARROW_PLASMA=${ARROW_PLASMA} \
+  -DARROW_PROTOBUF_USE_SHARED=OFF \
+  -DARROW_PYTHON=${ARROW_PYTHON} \
+  -DARROW_SNAPPY_USE_SHARED=OFF \
+  -DARROW_THRIFT_USE_SHARED=OFF \
+  -DARROW_UTF8PROC_USE_SHARED=OFF \
+  -DARROW_ZSTD_USE_SHARED=OFF \
+  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DCMAKE_INSTALL_PREFIX=${build_dir} \
+  -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+  -DPARQUET_BUILD_EXAMPLES=OFF \
+  -DPARQUET_BUILD_EXECUTABLES=OFF \
+  -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+  ${arrow_dir}/cpp
+cmake --build . --target install
+
+if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
+  ctest
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+  --allow libarrow_dataset_jni \
+  --allow libarrow_orc_jni \
+  --allow libc++ \
+  --allow libgandiva_jni \
+  --allow libncurses \
+  --allow libSystem \
+  --allow libz \
+  libgandiva_jni.dylib \
+  libarrow_dataset_jni.dylib \
+  libarrow_orc_jni.dylib
+popd
diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh
new file mode 100755
index 00000000000..4d01c1c30d1
--- /dev/null
+++ b/ci/scripts/java_jni_manylinux_build.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_RPATH_ORIGIN:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${ARROW_BUILD_TESTS:=ON}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+: ${GANDIVA_CXX_FLAGS:=-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9;-isystem;/opt/rh/devtoolset-9/root/usr/include/c++/9/x86_64-redhat-linux;-isystem;-lpthread}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+  -DARROW_BOOST_USE_SHARED=OFF \
+  -DARROW_BROTLI_USE_SHARED=OFF \
+  -DARROW_BUILD_SHARED=ON \
+  -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+  -DARROW_BUILD_UTILITIES=OFF \
+  -DARROW_BZ2_USE_SHARED=OFF \
+  -DARROW_DATASET=${ARROW_DATASET} \
+  -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+  -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+  -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+  -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
+  -DARROW_GANDIVA=${ARROW_GANDIVA} \
+  -DARROW_GRPC_USE_SHARED=OFF \
+  -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+  -DARROW_JNI=ON \
+  -DARROW_LZ4_USE_SHARED=OFF \
+  -DARROW_OPENSSL_USE_SHARED=OFF \
+  -DARROW_ORC=${ARROW_ORC} \
+  -DARROW_PARQUET=${ARROW_PARQUET} \
+  -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+  -DARROW_PLASMA=${ARROW_PLASMA} \
+  -DARROW_PROTOBUF_USE_SHARED=OFF \
+  -DARROW_PYTHON=${ARROW_PYTHON} \
+  -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
+  -DARROW_SNAPPY_USE_SHARED=OFF \
+  -DARROW_THRIFT_USE_SHARED=OFF \
+  -DARROW_UTF8PROC_USE_SHARED=OFF \
+  -DARROW_ZSTD_USE_SHARED=OFF \
+  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+  -DCMAKE_INSTALL_LIBDIR=lib \
+  -DCMAKE_INSTALL_PREFIX=${build_dir} \
+  -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+  -DPARQUET_BUILD_EXAMPLES=OFF \
+  -DPARQUET_BUILD_EXECUTABLES=OFF \
+  -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+  -DPythonInterp_FIND_VERSION_MAJOR=3 \
+  -DPythonInterp_FIND_VERSION=ON \
+  -DVCPKG_MANIFEST_MODE=OFF \
+  -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+  -GNinja \
+  ${arrow_dir}/cpp
+ninja install
+
+if [ $ARROW_BUILD_TESTS = "ON" ]; then
+  CTEST_OUTPUT_ON_FAILURE=1 ninja test
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.so ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+  --allow ld-linux-x86-64 \
+  --allow libc \
+  --allow libdl \
+  --allow libgcc_s \
+  --allow libm \
+  --allow libpthread \
+  --allow librt \
+  --allow libstdc++ \
+  --allow libz \
+  --allow linux-vdso \
+  libgandiva_jni.so \
+  libarrow_dataset_jni.so \
+  libarrow_orc_jni.so
+popd
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 1e70ee29128..c35b0864900 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -1152,6 +1152,34 @@ def release_cherry_pick(obj, version, dry_run, recreate):
         click.echo('git cherry-pick {}'.format(commit.hexsha))
 
 
+@archery.group("linking")
+@click.pass_obj
+def linking(obj):
+    """
+    Quick and dirty utilities for checking library linkage.
+    """
+    pass
+
+
+@linking.command("check-dependencies")
+@click.argument("paths", nargs=-1)
+@click.option("--allow", "-a", "allowed", multiple=True,
+              help="Name of the allowed libraries")
+@click.option("--disallow", "-d", "disallowed", multiple=True,
+              help="Name of the disallowed libraries")
+@click.pass_obj
+def linking_check_dependencies(obj, allowed, disallowed, paths):
+    from .linking import check_dynamic_library_dependencies, DependencyError
+
+    allowed, disallowed = set(allowed), set(disallowed)
+    try:
+        for path in map(pathlib.Path, paths):
+            check_dynamic_library_dependencies(path, allowed=allowed,
+                                               disallowed=disallowed)
+    except DependencyError as e:
+        raise click.ClickException(str(e))
+
+
 try:
     from .crossbow.cli import crossbow  # noqa
 except ImportError as exc:
diff --git a/dev/archery/archery/linking.py b/dev/archery/archery/linking.py
new file mode 100644
index 00000000000..c2e6f1772fa
--- /dev/null
+++ b/dev/archery/archery/linking.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import platform
+import subprocess
+
+from .utils.command import Command
+
+
+_ldd = Command("ldd")
+_otool = Command("otool")
+
+
+class DependencyError(Exception):
+    pass
+
+
+class DynamicLibrary:
+
+    def __init__(self, path):
+        self.path = path
+
+    def list_dependencies(self):
+        """
+        List the full name of the library dependencies.
+        """
+        system = platform.system()
+        if system == "Linux":
+            result = _ldd.run(self.path, stdout=subprocess.PIPE)
+            lines = result.stdout.splitlines()
+            return [ll.split(None, 1)[0].decode() for ll in lines]
+        elif system == "Darwin":
+            result = _otool.run("-L", self.path, stdout=subprocess.PIPE)
+            lines = result.stdout.splitlines()
+            return [dl.split(None, 1)[0].decode() for dl in lines]
+        else:
+            raise ValueError(f"{platform} is not supported")
+
+    def list_dependency_names(self):
+        """
+        List the truncated names of the dynamic library dependencies.
+        """
+        names = []
+        for dependency in self.list_dependencies():
+            *_, library = dependency.rsplit("/", 1)
+            name, *_ = library.split(".", 1)
+            names.append(name)
+        return names
+
+
+def check_dynamic_library_dependencies(path, allowed, disallowed):
+    dylib = DynamicLibrary(path)
+    for dep in dylib.list_dependency_names():
+        if allowed and dep not in allowed:
+            raise DependencyError(
+                f"Unexpected shared dependency found in {dylib.path}: `{dep}`"
+            )
+        if disallowed and dep in disallowed:
+            raise DependencyError(
+                f"Disallowed shared dependency found in {dylib.path}: `{dep}`"
+            )
diff --git a/dev/tasks/jars/README.md b/dev/tasks/java-jars/README.md
similarity index 100%
rename from dev/tasks/jars/README.md
rename to dev/tasks/java-jars/README.md
diff --git a/dev/tasks/jars/github.yml b/dev/tasks/java-jars/github.yml
similarity index 55%
rename from dev/tasks/jars/github.yml
rename to dev/tasks/java-jars/github.yml
index 773c4fcda36..117aede6089 100644
--- a/dev/tasks/jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -20,57 +20,44 @@
 {{ macros.github_header() }}
 
 jobs:
+
   build-cpp-ubuntu:
     name: Build C++ Libs Ubuntu
     runs-on: ubuntu-18.04
     steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
-      - name: Setup Python
-        uses: actions/setup-python@v1
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e arrow/dev/archery[docker]
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_install_archery()|indent }}
       - name: Build C++ Libs
-        run: |
-          python3 -VV
-          cd arrow
-          mkdir -p dist
-          export CC="gcc-4.9" CXX="g++-4.9"
-          ulimit -c unlimited -S
-          set -e
-          archery docker build java-bundled-jars
-          archery docker run java-bundled-jars
+        run: archery docker run java-jni-manylinux-2014
       - name: Compress into single artifact
-        run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/dist/
+        run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java/dist/
       - name: Upload Artifacts
         uses: actions/upload-artifact@v2
         with:
           name: ubuntu-shared-lib
           path: arrow-shared-libs-linux.tar.gz
+    {% if arrow.branch == 'master' %}
+      {{ macros.github_login_dockerhub()|indent }}
+      - name: Push Docker Image
+        shell: bash
+        run: archery docker push java-jni-manylinux-2014
+    {% endif %}
+
   build-cpp-macos:
     name: Build C++ Libs MacOS
     runs-on: macos-latest
+    env:
+      MACOSX_DEPLOYMENT_TARGET: "10.11"
     steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
+      {{ macros.github_checkout_arrow()|indent }}
+      {{ macros.github_install_archery()|indent }}
       - name: Build C++ Libs
         run: |
           set -e
-          arrow/ci/scripts/java_bundled_jars_macos_build.sh $GITHUB_WORKSPACE/arrow \
+          arrow/ci/scripts/java_jni_macos_build.sh \
+            $GITHUB_WORKSPACE/arrow \
             $GITHUB_WORKSPACE/arrow/cpp-build \
             $GITHUB_WORKSPACE/arrow/dist
-        env:
-          MACOSX_DEPLOYMENT_TARGET: "10.11"
       - name: Compress into single artifact
         run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/dist/
       - name: Upload Artifacts
@@ -78,18 +65,13 @@ jobs:
         with:
           name: macos-shared-lib
           path: arrow-shared-libs-macos.tar.gz
-  package-jar:
+
+  package-jars:
     name: Build Jar Files
     runs-on: macos-latest
     needs: [build-cpp-macos, build-cpp-ubuntu]
     steps:
-      - name: Checkout Arrow
-        run: |
-          git clone --no-checkout {{ arrow.remote }} arrow
-          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-          if [ $CROSSBOW_USE_COMMIT_ID = true ]; then git -C arrow checkout {{ arrow.head }}; else git -C arrow checkout FETCH_HEAD; fi
-          git -C arrow submodule update --init --recursive
-          mkdir -p arrow/dist
+      {{ macros.github_checkout_arrow()|indent }}
       - name: Download Linux C++ Libraries
         uses: actions/download-artifact@v2
         with:
@@ -99,14 +81,13 @@ jobs:
         with:
           name: macos-shared-lib
       - name: Descompress artifacts
-        run: | 
+        run: |
           tar -xvzf arrow-shared-libs-macos.tar.gz
           tar -xvzf arrow-shared-libs-linux.tar.gz
       - name: Build Jar
         run: |
           set -e
-          arrow/ci/scripts/java_bundled_jars_java_build.sh $GITHUB_WORKSPACE/arrow \
+          arrow/ci/scripts/java_jni_build.sh \
+            $GITHUB_WORKSPACE/arrow \
             $GITHUB_WORKSPACE/arrow/dist \
-            true
-          
-      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
\ No newline at end of file
+      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index e60cffffde3..b08b9888963 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -52,7 +52,7 @@ groups:
     - centos-*
     - conda-*
     - debian-*
-    - jars
+    - java-jars
     - nuget
     - python-sdist
     - ubuntu-*
@@ -125,7 +125,7 @@ groups:
     - ubuntu-*
     - centos-*
     - conda-*
-    - jars
+    - java-jars
     # List the homebrews explicitly because we don't care about running homebrew-cpp-autobrew
     - homebrew-cpp
     - homebrew-r-autobrew
@@ -630,11 +630,12 @@ tasks:
 
   ############################## Arrow JAR's ##################################
 
-  jars:
+  java-jars:
     # Build jar's that contains cpp libraries dependencies
     ci: github
-    template: jars/github.yml
+    template: java-jars/github.yml
     artifacts:
+      #TODO(kszucs): need to list the rest of the jars here
       - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
       - arrow-orc-{no_rc_version}-SNAPSHOT.jar
       - arrow-dataset-{no_rc_version}-SNAPSHOT.jar
diff --git a/docker-compose.yml b/docker-compose.yml
index 215aa2c6b7a..20a743f8c31 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -124,7 +124,7 @@ x-hierarchy:
   - postgres
   - python-wheel-manylinux-2010
   - python-wheel-manylinux-2014:
-    - java-bundled-jars
+    - java-jni-manylinux-2014
   - python-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
   - python-wheel-windows-vs2017
@@ -816,24 +816,24 @@ services:
         target: "C:/arrow"
     command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
 
-  java-bundled-jars:
-    # Docker image
-    image: ${REPO}:${ARCH}-java-bundled-jars-vcpkg-${VCPKG}
+  java-jni-manylinux-2014:
+    image: ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
     build:
       args:
         base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
         java: 1.8.0
       context: .
-      dockerfile: ci/docker/java-bundled-jars.dockerfile
+      dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-java-bundled-jars-vcpkg-${VCPKG}
+        - ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
     environment:
       <<: *ccache
     volumes:
       - .:/arrow:delegated
       - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
     command:
-      [/arrow/ci/scripts/java_bundled_jars_manylinux_build.sh /arrow /build /arrow/dist]
+      ["pip install -e /arrow/dev/archery &&
+        /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java/dist"]
 
   ##############################  Integration #################################
 

From 8892eaa523f8532e9d0ad1e8bd31b36d6451dd2b Mon Sep 17 00:00:00 2001
From: crystrix <chenxi.li@live.com>
Date: Mon, 24 May 2021 14:38:22 +0200
Subject: [PATCH 291/719] ARROW-12771: [C++][Compute] Fix MaybeReserve
 parameter in the Consume function of GroupedCountImpl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameter of `MaybeReserve` in `GroupedCountImpl`'s `Consume` function is incorrect.
`counts_.length()` means the buffer's byte size, it should be replaced with `num_groups_`.  Otherwise, subsequent chunked arrays with more groups are skipped as `counts_.length()` is actually `num_groups_*sizeof(int64_t)`

Closes #10309 from Crystrix/arrow-12771

Authored-by: crystrix <chenxi.li@live.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .../arrow/compute/kernels/hash_aggregate.cc   |  2 +-
 .../compute/kernels/hash_aggregate_test.cc    | 51 +++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 0b6e9e9b2ea..586a7087dc3 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -768,7 +768,7 @@ struct GroupedCountImpl : public GroupedAggregator {
   }
 
   Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(counts_.length(), batch, [&](int64_t added_groups) {
+    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
       num_groups_ += added_groups;
       return counts_.Append(added_groups * sizeof(int64_t), 0);
     }));
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 8e3278b12be..08cf09680ba 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -34,6 +34,7 @@
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/registry.h"
+#include "arrow/table.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
@@ -761,5 +762,55 @@ TEST(GroupBy, RandomArraySum) {
   }
 }
 
+TEST(GroupBy, WithChunkedArray) {
+  auto table =
+      TableFromJSON(schema({field("argument", float64()), field("key", int64())}),
+                    {R"([{"argument": 1.0,   "key": 1},
+                         {"argument": null,  "key": 1}
+                        ])",
+                     R"([{"argument": 0.0,   "key": 2},
+                         {"argument": null,  "key": 3},
+                         {"argument": 4.0,   "key": null},
+                         {"argument": 3.25,  "key": 1},
+                         {"argument": 0.125, "key": 2},
+                         {"argument": -0.25, "key": 2},
+                         {"argument": 0.75,  "key": null},
+                         {"argument": null,  "key": 3}
+                        ])"});
+  CountOptions count_options;
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               table->GetColumnByName("argument"),
+                               table->GetColumnByName("argument"),
+                               table->GetColumnByName("argument"),
+                           },
+                           {
+                               table->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_count", &count_options},
+                               {"hash_sum", nullptr},
+                               {"hash_min_max", nullptr},
+                           }));
+
+  AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("hash_count", int64()),
+                                      field("hash_sum", float64()),
+                                      field("hash_min_max", struct_({
+                                                                field("min", float64()),
+                                                                field("max", float64()),
+                                                            })),
+                                      field("key_0", int64()),
+                                  }),
+                                  R"([
+    [2, 4.25,   {"min": 1.0,   "max": 3.25},  1],
+    [3, -0.125, {"min": -0.25, "max": 0.125}, 2],
+    [0, null,   {"min": null,  "max": null},  3],
+    [2, 4.75,   {"min": 0.75,  "max": 4.0},   null]
+  ])"),
+                    aggregated_and_grouped,
+                    /*verbose=*/true);
+}
 }  // namespace compute
 }  // namespace arrow

From e6432d37a9954d21e45dab50a25c22f66fe38348 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 24 May 2021 22:11:59 +0800
Subject: [PATCH 292/719] ARROW-12857: [C++] Fix build of hash_aggregate_test

Closes #10384 from lidavidm/arrow-12857

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/kernels/hash_aggregate_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 08cf09680ba..86ed04e5ad3 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -777,7 +777,7 @@ TEST(GroupBy, WithChunkedArray) {
                          {"argument": 0.75,  "key": null},
                          {"argument": null,  "key": 3}
                         ])"});
-  CountOptions count_options;
+  ScalarAggregateOptions count_options;
   ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
                        internal::GroupBy(
                            {

From b4f2c1c72f745acf12e8a6d2d031750745ab2de2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 25 May 2021 05:02:09 +0900
Subject: [PATCH 293/719] ARROW-12854: [Dev][Release] Windows wheel
 verification script fails to download artifacts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See the passing build at https://github.com/apache/arrow/pull/10374#issuecomment-846935656

Closes #10380 from kszucs/ARROW-12854

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../verify-release-candidate-wheels.bat       | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/dev/release/verify-release-candidate-wheels.bat b/dev/release/verify-release-candidate-wheels.bat
index 21a0e3a0bc3..5bcefe80d60 100644
--- a/dev/release/verify-release-candidate-wheels.bat
+++ b/dev/release/verify-release-candidate-wheels.bat
@@ -45,17 +45,24 @@ pushd arrow
 git submodule update --init
 popd
 
+set ARROW_VERSION=%1
+set RC_NUMBER=%2
+
+python arrow\dev\release\download_rc_binaries.py %ARROW_VERSION% %RC_NUMBER% ^
+    --package_type python ^
+    --regex=".*win_amd64.*" || EXIT /B 1
+
 call deactivate
 
 set ARROW_TEST_DATA=%cd%\arrow\testing\data
 
-CALL :verify_wheel 3.6 %1 %2 m
+CALL :verify_wheel 3.6 m
 if errorlevel 1 GOTO error
 
-CALL :verify_wheel 3.7 %1 %2 m
+CALL :verify_wheel 3.7 m
 if errorlevel 1 GOTO error
 
-CALL :verify_wheel 3.8 %1 %2
+CALL :verify_wheel 3.8
 if errorlevel 1 GOTO error
 
 :done
@@ -73,9 +80,7 @@ EXIT /B 1
 :verify_wheel
 
 set PY_VERSION=%1
-set ARROW_VERSION=%2
-set RC_NUMBER=%3
-set ABI_TAG=%4
+set ABI_TAG=%2
 set PY_VERSION_NO_PERIOD=%PY_VERSION:.=%
 
 set CONDA_ENV_PATH=%_VERIFICATION_DIR%\_verify-wheel-%PY_VERSION%
@@ -86,20 +91,15 @@ call activate %CONDA_ENV_PATH%
 
 set WHEEL_FILENAME=pyarrow-%ARROW_VERSION%-cp%PY_VERSION_NO_PERIOD%-cp%PY_VERSION_NO_PERIOD%%ABI_TAG%-win_amd64.whl
 
-@rem Requires GNU Wget for Windows
-wget --no-check-certificate -O %WHEEL_FILENAME% https://apache.jfrog.io/artifactory/arrow/download_file?file_path=python-rc%%2F%ARROW_VERSION%-rc%RC_NUMBER%%%2F%WHEEL_FILENAME% || EXIT /B 1
-
-pip install %WHEEL_FILENAME% || EXIT /B 1
-
-pip install -r arrow/python/requirements-test.txt || EXIT /B 1
-
-py.test %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1
-
+pip install python-rc\%ARROW_VERSION%-rc%RC_NUMBER%\%WHEEL_FILENAME% || EXIT /B 1
 python -c "import pyarrow" || EXIT /B 1
 python -c "import pyarrow.parquet" || EXIT /B 1
 python -c "import pyarrow.flight" || EXIT /B 1
 python -c "import pyarrow.dataset" || EXIT /B 1
 
+pip install -r arrow\python\requirements-test.txt || EXIT /B 1
+pytest %CONDA_ENV_PATH%\Lib\site-packages\pyarrow --pdb -v || EXIT /B 1
+
 :done
 
 call deactivate

From 233a76cb3392483424ba54b06d7e7e1fd1780582 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Tue, 25 May 2021 10:54:29 +0900
Subject: [PATCH 294/719] ARROW-12864: [C++] Remove needless out argument from
 arrow::internal::InvertBitmap

We should have removed the out argument in b218a7fdae0792e185579d8cd20748ed0752b9ff.

Closes #10391 from nirandaperera/ARROW-12864

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/util/bitmap_ops.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index 1f9cf19bbd0..32da60aafd9 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -390,8 +390,7 @@ Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* data
 }
 
 Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* data,
-                                             int64_t offset, int64_t length,
-                                             std::shared_ptr<Buffer>* out) {
+                                             int64_t offset, int64_t length) {
   return TransferBitmap<TransferMode::Invert>(pool, data, offset, length);
 }
 

From 5380a4dab1a7847fa0d4e8787ef664e9c5b70d2b Mon Sep 17 00:00:00 2001
From: frank400 <j.victorhuguenin2018@gmail.com>
Date: Tue, 25 May 2021 12:14:02 +0900
Subject: [PATCH 295/719] ARROW-12838: [Java][Gandiva] Fix JNI CI test

Fix checkstyle error in JNI CI build

Closes #10367 from jvictorhuguenin/fix-ci-checkstyle-error

Authored-by: frank400 <j.victorhuguenin2018@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../arrow/gandiva/expression/InNode.java      | 28 ++++++++++
 .../gandiva/evaluator/ProjectorTest.java      | 55 +++++++++----------
 2 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
index fef8e311e6c..0f8de962869 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
@@ -59,23 +59,51 @@ private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValu
     this.input = node;
   }
 
+  /**
+   * Makes an IN node for int values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param intValues Int values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
   public static InNode makeIntInExpr(TreeNode node, Set<Integer> intValues) {
     return new InNode(intValues,
             null, null, null, null, null, null, null,
             null, node);
   }
 
+  /**
+   * Makes an IN node for long values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param longValues Long values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
   public static InNode makeLongInExpr(TreeNode node, Set<Long> longValues) {
     return new InNode(null, longValues,
             null, null, null, null, null, null,
             null, node);
   }
 
+  /**
+   * Makes an IN node for float values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param floatValues Float values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
   public static InNode makeFloatInExpr(TreeNode node, Set<Float> floatValues) {
     return new InNode(null, null, null, null, null, null,
             null, floatValues, null, node);
   }
 
+  /**
+   * Makes an IN node for double values.
+   *
+   * @param node      Node with the 'IN' clause.
+   * @param doubleValues Double values to build the IN node.
+   * @retur InNode referring to tree node.
+   */
   public static InNode makeDoubleInExpr(TreeNode node, Set<Double> doubleValues) {
     return new InNode(null, null, null, null, null,
             null, null, null, doubleValues, node);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index e51f4586124..9844d026ae9 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -1220,10 +1220,10 @@ public void testInExpr() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < 5; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }
-    for (int i = 4; i < 16; i++) {
+    for (int i = 5; i < 16; i++) {
       assertFalse(bitVector.getObject(i).booleanValue());
     }
 
@@ -1245,31 +1245,29 @@ public void testInExprDecimal() throws GandivaException, Exception {
     decimalSet.add(new BigDecimal(Long.MAX_VALUE));
     decimalSet.add(new BigDecimal(Long.MIN_VALUE));
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionDecimal(TreeBuilder.makeField(c1),
-                    decimalSet, precision, scale);
+        TreeBuilder.makeInExpressionDecimal(TreeBuilder.makeField(c1),
+            decimalSet, precision, scale);
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr,
-            Field.nullable("result", boolType));
+        Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
 
-    // Create a row-batch with some sample data to look for
     int numRows = 16;
-    // Only the first 8 values will be valid.
     byte[] validity = new byte[]{(byte) 255, 0};
     String[] c1Values =
-            new String[]{"1", "2", "3", "4", "-0.0", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    String.valueOf(Long.MAX_VALUE),
-                    String.valueOf(Long.MIN_VALUE)};
+        new String[]{"1", "2", "3", "4", "-0.0", "6", "7", "8", "9", "10", "11", "12", "13", "14",
+            String.valueOf(Long.MAX_VALUE),
+            String.valueOf(Long.MIN_VALUE)};
 
     DecimalVector c1Data = decimalVector(c1Values, precision, scale);
     ArrowBuf c1Validity = buf(validity);
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, c1Data.getDataBuffer(), c1Data.getValidityBuffer()));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, c1Data.getDataBuffer(), c1Data.getValidityBuffer()));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1278,11 +1276,10 @@ public void testInExprDecimal() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
-    // The first four values in the vector must match the expression, but not the other ones.
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < 5; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }
-    for (int i = 4; i < 16; i++) {
+    for (int i = 5; i < 16; i++) {
       assertFalse(bitVector.getObject(i).booleanValue());
     }
 
@@ -1296,9 +1293,9 @@ public void testInExprDouble() throws GandivaException, Exception {
     Field c1 = Field.nullable("c1", float64);
 
     TreeNode inExpr =
-            TreeBuilder.makeInExpressionDouble(TreeBuilder.makeField(c1),
-                    Sets.newHashSet(1.0, -0.0, 3.0, 4.0, Double.NaN,
-                            Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+        TreeBuilder.makeInExpressionDouble(TreeBuilder.makeField(c1),
+            Sets.newHashSet(1.0, -0.0, 3.0, 4.0, Double.NaN,
+                Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
     ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
     Schema schema = new Schema(Lists.newArrayList(c1));
     Projector eval = Projector.make(schema, Lists.newArrayList(expr));
@@ -1307,8 +1304,8 @@ public void testInExprDouble() throws GandivaException, Exception {
     int numRows = 16;
     // Only the first 8 values will be valid.
     byte[] validity = new byte[]{(byte) 255, 0};
-    double[] c1Values = new double[]{1, -0.0, Double.NEGATIVE_INFINITY , Double.POSITIVE_INFINITY, Double.NaN,
-        6, 7, 8, 9, 10, 11, 12, 13, 14, 4 , 3};
+    double[] c1Values = new double[]{1, -0.0, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN,
+        6, 7, 8, 9, 10, 11, 12, 13, 14, 4, 3};
 
     ArrowBuf c1Validity = buf(validity);
     ArrowBuf c1Data = doubleBuf(c1Values);
@@ -1316,10 +1313,10 @@ public void testInExprDouble() throws GandivaException, Exception {
 
     ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
     ArrowRecordBatch batch =
-            new ArrowRecordBatch(
-                    numRows,
-                    Lists.newArrayList(fieldNode, fieldNode),
-                    Lists.newArrayList(c1Validity, c1Data, c2Validity));
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(fieldNode, fieldNode),
+            Lists.newArrayList(c1Validity, c1Data, c2Validity));
 
     BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
     bitVector.allocateNew(numRows);
@@ -1328,11 +1325,11 @@ public void testInExprDouble() throws GandivaException, Exception {
     output.add(bitVector);
     eval.evaluate(batch, output);
 
-    // The first five values in the vector must match the expression, but not the other ones.
-    for (int i = 1; i < 5; i++) {
+    // The first four values in the vector must match the expression, but not the other ones.
+    for (int i = 0; i < 4; i++) {
       assertTrue(bitVector.getObject(i).booleanValue());
     }
-    for (int i = 5; i < 16; i++) {
+    for (int i = 4; i < 16; i++) {
       assertFalse(bitVector.getObject(i).booleanValue());
     }
 

From 0dc9bc21c968e38ee2fa47b41213d33c2d2a7d1f Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Tue, 25 May 2021 09:35:00 +0530
Subject: [PATCH 296/719] ARROW-11565: [C++][Gandiva] Modify upper()/lower() to
 work with UTF8 and add INIT_CAP function

It finishes the implementation that started in the https://github.com/apache/arrow/pull/9450 pull request

Closes #10040 from anthonylouisbsb/feature/fix-upper-lower-for-utf8 and squashes the following commits:

52c11f8d6 <Anthony Louis> Add missing ;
2aaa7d891 <Anthony Louis> Remove utf8proc libs from wheel scripts
253426c04 <Anthony Louis> Add changes for initcap function
5fa60c1eb <Anthony Louis> Change the name of the UTF8PROC dir to include
3c2b25ee1 <Anthony Louis> Remove unnecessary lib references
a8f5e1fc9 <Anthony Louis> Remove gandiva aliases
5f5ec7f53 <Anthony Louis> Add missing function in global engine mapping
1f06fa758 <Anthony Louis> Fix cmake formatting
39dd7712e <Anthony Louis> Fix way library is imported
2d6157bc6 <Anthony Louis> Apply formatter changes
5cb1f8092 <Anthony Louis> Move function to a stub
1d3b7c48b <Sagnik Chakraborty> ARROW-11565:  Modify upper()/lower() logic to make them work for utf8 strings

Lead-authored-by: Anthony Louis <anthony@simbioseventures.com>
Co-authored-by: Sagnik Chakraborty <sagnikc@dremio.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/cmake_modules/DefineOptions.cmake         |   2 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake   |   4 +-
 cpp/src/gandiva/CMakeLists.txt                |   4 +-
 cpp/src/gandiva/function_registry_string.cc   |   8 +-
 cpp/src/gandiva/gdv_function_stubs.cc         | 318 ++++++++++++++++++
 cpp/src/gandiva/gdv_function_stubs.h          |  21 ++
 cpp/src/gandiva/gdv_function_stubs_test.cc    | 194 +++++++++++
 cpp/src/gandiva/precompiled/CMakeLists.txt    |   1 +
 cpp/src/gandiva/precompiled/string_ops.cc     |  61 +---
 .../gandiva/precompiled/string_ops_test.cc    |  22 --
 cpp/src/gandiva/precompiled/types.h           |   3 -
 .../gandiva/evaluator/ProjectorTest.java      |  72 +++-
 12 files changed, 618 insertions(+), 92 deletions(-)

diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 0e92811da8c..033076ebdb0 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -365,7 +365,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
 
   define_option(
     ARROW_WITH_UTF8PROC
-    "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON)"
+    "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON or ARROW_GANDIVA is ON)"
     ON)
   define_option(
     ARROW_WITH_RE2
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 01e818b5375..18941df2260 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -287,10 +287,10 @@ if(ARROW_S3)
   set(ARROW_WITH_ZLIB ON)
 endif()
 
-if(NOT ARROW_COMPUTE)
-  # utf8proc is only potentially used in kernels for now
+if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA))
   set(ARROW_WITH_UTF8PROC OFF)
 endif()
+
 if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA) AND (NOT ARROW_WITH_GRPC))
   set(ARROW_WITH_RE2 OFF)
 endif()
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index fcdaf97d526..44b6fab14c3 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -138,6 +138,7 @@ add_arrow_lib(gandiva
               EXTRA_INCLUDES
               $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
               ${GANDIVA_OPENSSL_INCLUDE_DIR}
+              ${UTF8PROC_INCLUDE_DIR}
               SHARED_LINK_FLAGS
               ${GANDIVA_SHARED_LINK_FLAGS}
               SHARED_LINK_LIBS
@@ -239,7 +240,8 @@ add_gandiva_test(internals-test
                  EXTRA_INCLUDES
                  $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
                  ${GANDIVA_INTERNALS_TEST_ARGUMENTS}
-                 ${GANDIVA_OPENSSL_INCLUDE_DIR})
+                 ${GANDIVA_OPENSSL_INCLUDE_DIR}
+                 ${UTF8PROC_INCLUDE_DIR})
 
 if(ARROW_GANDIVA_JAVA)
   add_subdirectory(jni)
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index e50069e738b..cbc70066306 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -63,10 +63,14 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
 
       NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
-                     "upper_utf8", NativeFunction::kNeedsContext),
+                     "gdv_fn_upper_utf8", NativeFunction::kNeedsContext),
 
       NativeFunction("lower", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
-                     "lower_utf8", NativeFunction::kNeedsContext),
+                     "gdv_fn_lower_utf8", NativeFunction::kNeedsContext),
+
+      NativeFunction("initcap", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
+                     "gdv_fn_initcap_utf8",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
       NativeFunction("castINT", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
                      "gdv_fn_castINT_utf8",
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index acf3f56ccef..a890775edad 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -17,10 +17,13 @@
 
 #include "gandiva/gdv_function_stubs.h"
 
+#include <utf8proc.h>
+
 #include <string>
 #include <vector>
 
 #include "arrow/util/formatting.h"
+#include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
 #include "gandiva/engine.h"
 #include "gandiva/exported_funcs.h"
@@ -402,6 +405,286 @@ GDV_FN_CAST_VARCHAR_REAL(float64, DoubleType)
 
 #undef GDV_FN_CAST_VARCHAR_INTEGER
 #undef GDV_FN_CAST_VARCHAR_REAL
+
+GANDIVA_EXPORT
+int32_t gdv_fn_utf8_char_length(char c) {
+  if ((signed char)c >= 0) {  // 1-byte char (0x00 ~ 0x7F)
+    return 1;
+  } else if ((c & 0xE0) == 0xC0) {  // 2-byte char
+    return 2;
+  } else if ((c & 0xF0) == 0xE0) {  // 3-byte char
+    return 3;
+  } else if ((c & 0xF8) == 0xF0) {  // 4-byte char
+    return 4;
+  }
+  // invalid char
+  return 0;
+}
+
+GANDIVA_EXPORT
+void gdv_fn_set_error_for_invalid_utf8(int64_t execution_context, char val) {
+  char const* fmt = "unexpected byte \\%02hhx encountered while decoding utf8 string";
+  int size = static_cast<int>(strlen(fmt)) + 64;
+  char* error = reinterpret_cast<char*>(malloc(size));
+  snprintf(error, size, fmt, (unsigned char)val);
+  gdv_fn_context_set_error_msg(execution_context, error);
+  free(error);
+}
+
+// Convert an utf8 string to its corresponding uppercase string
+GANDIVA_EXPORT
+const char* gdv_fn_upper_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // If it is a single-byte character (ASCII), corresponding uppercase is always 1-byte
+  // long; if it is >= 2 bytes long, uppercase can be at most 4 bytes long, so length of
+  // the output can be at most twice the length of the input
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 2 * data_len));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t char_len, out_char_len, out_idx = 0;
+  uint32_t char_codepoint;
+
+  for (int32_t i = 0; i < data_len; i += char_len) {
+    char_len = gdv_fn_utf8_char_length(data[i]);
+    // For single byte characters:
+    // If it is a lowercase ASCII character, set the output to its corresponding uppercase
+    // character; else, set the output to the read character
+    if (char_len == 1) {
+      char cur = data[i];
+      // 'A' - 'Z' : 0x41 - 0x5a
+      // 'a' - 'z' : 0x61 - 0x7a
+      if (cur >= 0x61 && cur <= 0x7a) {
+        out[out_idx++] = static_cast<char>(cur - 0x20);
+      } else {
+        out[out_idx++] = cur;
+      }
+      continue;
+    }
+
+    // Control reaches here when we encounter a multibyte character
+    const auto* in_char = (const uint8_t*)(data + i);
+
+    // Decode the multibyte character
+    bool is_valid_utf8_char =
+        arrow::util::UTF8Decode((const uint8_t**)&in_char, &char_codepoint);
+
+    // If it is an invalid utf8 character, UTF8Decode evaluates to false
+    if (!is_valid_utf8_char) {
+      gdv_fn_set_error_for_invalid_utf8(context, data[i]);
+      *out_len = 0;
+      return "";
+    }
+
+    // Convert the encoded codepoint to its uppercase codepoint
+    int32_t upper_codepoint = utf8proc_toupper(char_codepoint);
+
+    // UTF8Encode advances the pointer by the number of bytes present in the uppercase
+    // character
+    auto* out_char = (uint8_t*)(out + out_idx);
+    uint8_t* out_char_start = out_char;
+
+    // Encode the uppercase character
+    out_char = arrow::util::UTF8Encode(out_char, upper_codepoint);
+
+    out_char_len = static_cast<int32_t>(out_char - out_char_start);
+    out_idx += out_char_len;
+  }
+
+  *out_len = out_idx;
+  return out;
+}
+
+// Convert an utf8 string to its corresponding lowercase string
+GANDIVA_EXPORT
+const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // If it is a single-byte character (ASCII), corresponding lowercase is always 1-byte
+  // long; if it is >= 2 bytes long, lowercase can be at most 4 bytes long, so length of
+  // the output can be at most twice the length of the input
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 2 * data_len));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t char_len, out_char_len, out_idx = 0;
+  uint32_t char_codepoint;
+
+  for (int32_t i = 0; i < data_len; i += char_len) {
+    char_len = gdv_fn_utf8_char_length(data[i]);
+    // For single byte characters:
+    // If it is an uppercase ASCII character, set the output to its corresponding
+    // lowercase character; else, set the output to the read character
+    if (char_len == 1) {
+      char cur = data[i];
+      // 'A' - 'Z' : 0x41 - 0x5a
+      // 'a' - 'z' : 0x61 - 0x7a
+      if (cur >= 0x41 && cur <= 0x5a) {
+        out[out_idx++] = static_cast<char>(cur + 0x20);
+      } else {
+        out[out_idx++] = cur;
+      }
+      continue;
+    }
+
+    // Control reaches here when we encounter a multibyte character
+    const auto* in_char = (const uint8_t*)(data + i);
+
+    // Decode the multibyte character
+    bool is_valid_utf8_char =
+        arrow::util::UTF8Decode((const uint8_t**)&in_char, &char_codepoint);
+
+    // If it is an invalid utf8 character, UTF8Decode evaluates to false
+    if (!is_valid_utf8_char) {
+      gdv_fn_set_error_for_invalid_utf8(context, data[i]);
+      *out_len = 0;
+      return "";
+    }
+
+    // Convert the encoded codepoint to its lowercase codepoint
+    int32_t lower_codepoint = utf8proc_tolower(char_codepoint);
+
+    // UTF8Encode advances the pointer by the number of bytes present in the lowercase
+    // character
+    auto* out_char = (uint8_t*)(out + out_idx);
+    uint8_t* out_char_start = out_char;
+
+    // Encode the lowercase character
+    out_char = arrow::util::UTF8Encode(out_char, lower_codepoint);
+
+    out_char_len = static_cast<int32_t>(out_char - out_char_start);
+    out_idx += out_char_len;
+  }
+
+  *out_len = out_idx;
+  return out;
+}
+
+// Checks if the character is a whitespace by its code point. To check the list
+// of the existent whitespaces characters in UTF8, take a look at this link
+// https://en.wikipedia.org/wiki/Whitespace_character#Unicode
+//
+// The Unicode characters also are divided between categories. This link
+// https://en.wikipedia.org/wiki/Unicode_character_property#General_Category shows
+// more information about characters categories.
+GANDIVA_EXPORT
+bool gdv_fn_is_codepoint_for_space(uint32_t val) {
+  auto category = utf8proc_category(val);
+
+  return category == utf8proc_category_t::UTF8PROC_CATEGORY_ZS ||
+         category == utf8proc_category_t::UTF8PROC_CATEGORY_ZL ||
+         category == utf8proc_category_t::UTF8PROC_CATEGORY_ZP;
+}
+
+// For a given text, initialize the first letter of each word, e.g:
+//     - "it is a text str" -> "It Is A Text Str"
+GANDIVA_EXPORT
+const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
+                                int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = data_len;
+    return "";
+  }
+
+  // If it is a single-byte character (ASCII), corresponding uppercase is always 1-byte
+  // long; if it is >= 2 bytes long, uppercase can be at most 4 bytes long, so length of
+  // the output can be at most twice the length of the input
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 2 * data_len));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t char_len = 0;
+  int32_t out_char_len = 0;
+  int32_t out_idx = 0;
+  uint32_t char_codepoint;
+  bool last_char_was_space = true;
+
+  for (int32_t i = 0; i < data_len; i += char_len) {
+    char_len = gdv_fn_utf8_char_length(data[i]);
+    // For single byte characters:
+    // If it is a lowercase ASCII character, set the output to its corresponding uppercase
+    // character; else, set the output to the read character
+    if (char_len == 1) {
+      char cur = data[i];
+
+      if (cur >= 0x61 && cur <= 0x7a && last_char_was_space) {
+        // 'A' - 'Z' : 0x41 - 0x5a
+        // 'a' - 'z' : 0x61 - 0x7a
+        out[out_idx++] = static_cast<char>(cur - 0x20);
+        last_char_was_space = false;
+      } else {
+        // Check if the ASCII character is one of these:
+        // - space : 0x20
+        // - character tabulation : 0x9
+        // - line feed : 0xA
+        // - line tabulation : 0xB
+        // - form feed : 0xC
+        // - carriage return : 0xD
+        last_char_was_space = cur <= 0x20;
+        out[out_idx++] = cur;
+      }
+      continue;
+    }
+
+    // Control reaches here when we encounter a multibyte character
+    const auto* in_char = (const uint8_t*)(data + i);
+
+    // Decode the multibyte character
+    bool is_valid_utf8_char =
+        arrow::util::UTF8Decode((const uint8_t**)&in_char, &char_codepoint);
+
+    // If it is an invalid utf8 character, UTF8Decode evaluates to false
+    if (!is_valid_utf8_char) {
+      gdv_fn_set_error_for_invalid_utf8(context, data[i]);
+      *out_len = 0;
+      return "";
+    }
+
+    bool is_char_space = gdv_fn_is_codepoint_for_space(char_codepoint);
+
+    int32_t formatted_codepoint;
+    if (last_char_was_space && !is_char_space) {
+      // Convert the encoded codepoint to its uppercase codepoint
+      formatted_codepoint = utf8proc_toupper(char_codepoint);
+    } else {
+      // Leave the codepoint as is
+      formatted_codepoint = char_codepoint;
+    }
+
+    // UTF8Encode advances the pointer by the number of bytes present in the character
+    auto* out_char = (uint8_t*)(out + out_idx);
+    uint8_t* out_char_start = out_char;
+
+    // Encode the uppercase character
+    out_char = arrow::util::UTF8Encode(out_char, formatted_codepoint);
+
+    out_char_len = static_cast<int32_t>(out_char - out_char_start);
+    out_idx += out_char_len;
+
+    last_char_was_space = is_char_space;
+  }
+
+  *out_len = out_idx;
+  return out;
+}
 }
 
 namespace gandiva {
@@ -1031,5 +1314,40 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_sha256_decimal128",
                                   types->i8_ptr_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_sha256_decimal128));
+
+  // gdv_fn_upper_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // data
+      types->i32_type(),      // data_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_upper_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_upper_utf8));
+  // gdv_fn_lower_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // data
+      types->i32_type(),      // data_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_lower_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_lower_utf8));
+
+  // gdv_fn_initcap_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // const char*
+      types->i32_type(),     // value_length
+      types->i32_ptr_type()  // out_length
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_initcap_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_initcap_utf8));
 }
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 0a6cd70ca7c..847772b17a4 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -108,4 +108,25 @@ const char* gdv_fn_castVARCHAR_float32_int64(int64_t context, float value, int64
 GANDIVA_EXPORT
 const char* gdv_fn_castVARCHAR_float64_int64(int64_t context, double value, int64_t len,
                                              int32_t* out_len);
+
+GANDIVA_EXPORT
+int32_t gdv_fn_utf8_char_length(char c);
+
+GANDIVA_EXPORT
+void gdv_fn_set_error_for_invalid_utf8(int64_t execution_context, char val);
+
+GANDIVA_EXPORT
+const char* gdv_fn_upper_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_len,
+                              int32_t* out_len);
+
+GANDIVA_EXPORT
+bool gdv_fn_is_codepoint_for_space(uint32_t val);
+
+GANDIVA_EXPORT
+const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
+                                int32_t* out_len);
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 8f44ce27982..6cfff5b891f 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -290,4 +290,198 @@ TEST(TestGdvFnStubs, TestCastVARCHARFromDouble) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestGdvFnStubs, TestUpper) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = gdv_fn_upper_utf8(ctx_ptr, "AbcDEfGh", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ABCDEFGH");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "asdfj", 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ASDFJ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "s;dcGS,jO!l", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "S;DCGS,JO!L");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "münchen", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "MÜNCHEN");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "CITROËN", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "CITROËN");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "âBćDëFGH", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ÂBĆDËFGH");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "øhpqRšvñ", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ØHPQRŠVÑ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "Möbelträgerfüße", 19, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "MÖBELTRÄGERFÜẞE");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "{õhp,PQŚv}ń+", 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "{ÕHP,PQŚV}Ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_upper_utf8(ctx_ptr, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d("AbOJjÜoß\xc3");
+  out_str = gdv_fn_upper_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  std::string e(
+      "åbÑg\xe0\xa0"
+      "åBUå");
+  out_str = gdv_fn_upper_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\e0 encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestLower) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = gdv_fn_lower_utf8(ctx_ptr, "AbcDEfGh", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "abcdefgh");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "asdfj", 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdfj");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "S;DCgs,Jo!L", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "s;dcgs,jo!l");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "MÜNCHEN", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "münchen");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "citroën", 8, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "citroën");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "ÂbĆDËFgh", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "âbćdëfgh");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "ØHPQrŠvÑ", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "øhpqršvñ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "MÖBELTRÄGERFÜẞE", 20, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "möbelträgerfüße");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "{ÕHP,pqśv}Ń+", 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "{õhp,pqśv}ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_lower_utf8(ctx_ptr, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d("AbOJjÜoß\xc3");
+  out_str = gdv_fn_lower_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  std::string e(
+      "åbÑg\xe0\xa0"
+      "åBUå");
+  out_str = gdv_fn_lower_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\e0 encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestInitCap) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = gdv_fn_initcap_utf8(ctx_ptr, "test string", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "asdfj\nhlqf", 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Asdfj\nHlqf");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "s;DCgs,Jo!L", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "S;DCgs,Jo!L");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, " mÜNCHEN", 9, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), " MÜNCHEN");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "citroën CaR", 12, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Citroën CaR");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "ÂbĆDËFgh\néll", 16, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ÂbĆDËFgh\nÉll");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "  øhpqršvñ  \n\n", 17, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "  Øhpqršvñ  \n\n");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str =
+      gdv_fn_initcap_utf8(ctx_ptr, "möbelträgerfüße   \nmöbelträgerfüße", 42, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Möbelträgerfüße   \nMöbelträgerfüße");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "{ÕHP,pqśv}Ń+", 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "{ÕHP,pqśv}Ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d("AbOJjÜoß\xc3");
+  out_str =
+      gdv_fn_initcap_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\c3 encountered while decoding utf8 string"));
+  ctx.Reset();
+
+  std::string e(
+      "åbÑg\xe0\xa0"
+      "åBUå");
+  out_str =
+      gdv_fn_initcap_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\e0 encountered while decoding utf8 string"));
+  ctx.Reset();
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index 7343bc052c4..176b0473855 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -77,6 +77,7 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS})
            ${ARROW_GANDIVA_PC_CXX_FLAGS}
            -I${CMAKE_SOURCE_DIR}/src
            -I${ARROW_BINARY_DIR}/src)
+
   if(NOT ARROW_USE_NATIVE_INT128)
     list(APPEND PRECOMPILE_COMMAND -I${Boost_INCLUDE_DIR})
   endif()
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index b35062dadc3..ac50633f3c2 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -17,6 +17,7 @@
 
 // String functions
 #include "arrow/util/value_parsing.h"
+
 extern "C" {
 
 #include <algorithm>
@@ -221,66 +222,6 @@ UTF8_LENGTH(char_length, utf8)
 UTF8_LENGTH(length, utf8)
 UTF8_LENGTH(lengthUtf8, binary)
 
-// Convert a utf8 sequence to upper case.
-// TODO : This handles only ascii characters.
-FORCE_INLINE
-const char* upper_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
-                       int32_t* out_len) {
-  if (data_len == 0) {
-    *out_len = 0;
-    return "";
-  }
-
-  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len));
-  if (ret == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
-    *out_len = 0;
-    return "";
-  }
-  for (gdv_int32 i = 0; i < data_len; ++i) {
-    char cur = data[i];
-
-    // 'A- - 'Z' : 0x41 - 0x5a
-    // 'a' - 'z' : 0x61 - 0x7a
-    if (cur >= 0x61 && cur <= 0x7a) {
-      cur = static_cast<char>(cur - 0x20);
-    }
-    ret[i] = cur;
-  }
-  *out_len = data_len;
-  return ret;
-}
-
-// Convert a utf8 sequence to lower case.
-// TODO : This handles only ascii characters.
-FORCE_INLINE
-const char* lower_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
-                       int32_t* out_len) {
-  if (data_len == 0) {
-    *out_len = 0;
-    return "";
-  }
-
-  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len));
-  if (ret == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
-    *out_len = 0;
-    return "";
-  }
-  for (gdv_int32 i = 0; i < data_len; ++i) {
-    char cur = data[i];
-
-    // 'A' - 'Z' : 0x41 - 0x5a
-    // 'a' - 'z' : 0x61 - 0x7a
-    if (cur >= 0x41 && cur <= 0x5a) {
-      cur = static_cast<char>(cur + 0x20);
-    }
-    ret[i] = cur;
-  }
-  *out_len = data_len;
-  return ret;
-}
-
 // Reverse a utf8 sequence
 FORCE_INLINE
 const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index fd36665065b..ae3c0f2e28c 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -695,28 +695,6 @@ TEST(TestStringOps, TestConcat) {
   EXPECT_FALSE(ctx.has_error());
 }
 
-TEST(TestStringOps, TestLower) {
-  gandiva::ExecutionContext ctx;
-  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
-  gdv_int32 out_len = 0;
-
-  const char* out_str = lower_utf8(ctx_ptr, "AsDfJ", 5, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "asdfj");
-  EXPECT_FALSE(ctx.has_error());
-
-  out_str = lower_utf8(ctx_ptr, "asdfj", 5, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "asdfj");
-  EXPECT_FALSE(ctx.has_error());
-
-  out_str = lower_utf8(ctx_ptr, "Ç††AbD", 11, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "Ç††abd");
-  EXPECT_FALSE(ctx.has_error());
-
-  out_str = lower_utf8(ctx_ptr, "", 0, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "");
-  EXPECT_FALSE(ctx.has_error());
-}
-
 TEST(TestStringOps, TestReverse) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 170cf92aa2d..b8c7aa9147e 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -378,9 +378,6 @@ const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
                                    gdv_int32 data_len, int64_t out_len,
                                    int32_t* out_length);
 
-const char* lower_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
-                       int32_t* out_length);
-
 const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                          int32_t* out_len);
 
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 9844d026ae9..80d4281f4c2 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -73,7 +73,13 @@ public class ProjectorTest extends BaseEvaluatorTest {
 
   List<ArrowBuf> varBufs(String[] strings, Charset charset) {
     ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4);
-    ArrowBuf dataBuffer = allocator.buffer(strings.length * 8);
+    
+    long dataBufferSize = 0L;
+    for (String string : strings) {
+      dataBufferSize += string.getBytes(charset).length;
+    }
+
+    ArrowBuf dataBuffer = allocator.buffer(dataBufferSize);
 
     int startOffset = 0;
     for (int i = 0; i < strings.length; i++) {
@@ -2284,4 +2290,68 @@ public void testCastVarcharFromFloat() throws Exception {
     releaseValueVectors(output);
   }
 
+  @Test
+  public void testInitCap() throws Exception {
+
+    Field x = Field.nullable("x", new ArrowType.Utf8());
+
+    Field retType = Field.nullable("c", new ArrowType.Utf8());
+
+    TreeNode cond =
+            TreeBuilder.makeFunction(
+                    "initcap",
+                    Lists.newArrayList(TreeBuilder.makeField(x)),
+                    new ArrowType.Utf8());
+    ExpressionTree expr = TreeBuilder.makeExpression(cond, retType);
+    Schema schema = new Schema(Lists.newArrayList(x));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    int numRows = 5;
+    byte[] validity = new byte[]{(byte) 15, 0};
+    String[] valuesX = new String[]{
+        "  øhpqršvñ  \n\n",
+        "möbelträgerfüße   \nmöbelträgerfüße",
+        "ÂbĆDËFgh\néll",
+        "citroën CaR",
+        "kjk"
+    };
+
+    String[] expected = new String[]{
+        "  Øhpqršvñ  \n\n",
+        "Möbelträgerfüße   \nMöbelträgerfüße",
+        "ÂbĆDËFgh\nÉll",
+        "Citroën CaR",
+        null
+    };
+
+    ArrowBuf validityX = buf(validity);
+    List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+                    Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1)));
+
+    // allocate data for output vector.
+    VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+    outVector.allocateNew(numRows * 100, numRows);
+
+    // evaluate expression
+    List<ValueVector> output = new ArrayList<>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+    eval.close();
+
+    // match expected output.
+    for (int i = 0; i < numRows - 1; i++) {
+      assertFalse("Expect none value equals null", outVector.isNull(i));
+      assertEquals(expected[i], new String(outVector.get(i)));
+    }
+
+    assertTrue("Last value must be null", outVector.isNull(numRows - 1));
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
 }

From 5790a80396fc39232cd66669d09c9cbd594ba6cb Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 25 May 2021 10:11:11 -0400
Subject: [PATCH 297/719] ARROW-12729: [R] Fix length method for Table,
 RecordBatch

Closes #10388 from thisisnic/ARROW-12729_length

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/NAMESPACE                         |  1 +
 r/R/arrow-tabular.R                 |  3 +++
 r/tests/testthat/test-RecordBatch.R | 14 ++++++++++++++
 r/tests/testthat/test-Table.R       | 14 ++++++++++++++
 4 files changed, 32 insertions(+)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index f89d2effea7..82837cdd24c 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -53,6 +53,7 @@ S3method(is.nan,ArrowDatum)
 S3method(is_in,ArrowDatum)
 S3method(is_in,default)
 S3method(length,ArrowDatum)
+S3method(length,ArrowTabular)
 S3method(length,Scalar)
 S3method(length,Schema)
 S3method(match_arrow,ArrowDatum)
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index 2bd0a99534f..f5535f9ac20 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -238,3 +238,6 @@ ToString_tabular <- function(x, ...) {
   dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
   paste(c(dims, sch), collapse = "\n")
 }
+
+#' @export
+length.ArrowTabular <- function(x) x$num_columns
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index eef79100950..beb1306ab4f 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -517,3 +517,17 @@ test_that("ARROW-11769 - grouping preserved in record batch creation", {
   )
 
 })
+
+test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
+  
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+  
+  rb <- record_batch(!!!tbl)
+  
+  expect_identical(length(rb), 3L)
+  
+})
\ No newline at end of file
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index ba41c2be705..1f9628859d0 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -493,3 +493,17 @@ test_that("ARROW-11769 - grouping preserved in table creation", {
   )
 
 })
+
+test_that("ARROW-12729 - length returns number of columns in Table", {
+
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+  
+  tab <- Table$create(!!!tbl)
+  
+  expect_identical(length(tab), 3L)
+  
+})

From eb20a3dbc7732f612e5ce54be5f4291440829350 Mon Sep 17 00:00:00 2001
From: Jeroen Ooms <jeroenooms@gmail.com>
Date: Tue, 25 May 2021 14:55:51 -0700
Subject: [PATCH 298/719] ARROW-11926: [R] Add ucrt64 binaries and fix CI

The R project is experimenting with a new ucrt toolchain. This prepares us by adding ucrt64 binaries to the windows bundle.

Closes #10372 from jeroen/test-ucrt

Lead-authored-by: Jeroen Ooms <jeroenooms@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 ci/scripts/r_windows_build.sh | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh
index 9988dfb6494..20f824a9e01 100755
--- a/ci/scripts/r_windows_build.sh
+++ b/ci/scripts/r_windows_build.sh
@@ -26,11 +26,6 @@ export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)"
 if [ "$RTOOLS_VERSION" = "35" ]; then
   # Use rtools-backports if building with rtools35
   curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf
-  # Update keys: https://www.msys2.org/news/#2020-06-29-new-packagers
-  msys2_repo_base_url=https://repo.msys2.org/msys
-  curl -OSsL "${msys2_repo_base_url}/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
-  pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-  pacman --noconfirm -Scc
   pacman --noconfirm -Syy
   # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)
   RWINLIB_LIB_DIR="lib-4.9.3"
@@ -43,6 +38,7 @@ else
 
   pacman --noconfirm -Syy
   RWINLIB_LIB_DIR="lib"
+  export MINGW_ARCH="mingw32 mingw64 ucrt64"
 fi
 
 cp $ARROW_HOME/ci/scripts/PKGBUILD .
@@ -64,7 +60,7 @@ MSYS_LIB_DIR="/c/rtools40"
 ls $MSYS_LIB_DIR/mingw64/lib/
 ls $MSYS_LIB_DIR/mingw32/lib/
 
-# Untar the two builds we made
+# Untar the three builds we made
 ls *.xz | xargs -n 1 tar -xJf
 mkdir -p $DST_DIR
 # Grab the headers from one, either one is fine
@@ -94,6 +90,14 @@ cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i3
 cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64
 cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386
 
+# Do the same also for ucrt64
+if [ "$RTOOLS_VERSION" != "35" ]; then
+ls $MSYS_LIB_DIR/ucrt64/lib/
+mkdir -p $DST_DIR/lib/x64-ucrt
+mv ucrt64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64-ucrt
+cp $MSYS_LIB_DIR/ucrt64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64-ucrt
+fi
+
 # Create build artifact
 zip -r ${DST_DIR}.zip $DST_DIR
 

From 16199b717ba133adf6c63911cb33b8bc98d3362e Mon Sep 17 00:00:00 2001
From: Jorge Leitao <jorgecarleitao@gmail.com>
Date: Wed, 26 May 2021 04:10:06 +0000
Subject: [PATCH 299/719] ARROW-12643: [Governance] Added experimental repos
 guidelines.

Closes #10239 from jorgecarleitao/experimental

Lead-authored-by: Jorge Leitao <jorgecarleitao@gmail.com>
Co-authored-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
Signed-off-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
---
 docs/source/developers/contributing.rst       |  2 +
 docs/source/developers/experimental_repos.rst | 65 +++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 docs/source/developers/experimental_repos.rst

diff --git a/docs/source/developers/contributing.rst b/docs/source/developers/contributing.rst
index e75d2c6336f..9b81a6ff190 100644
--- a/docs/source/developers/contributing.rst
+++ b/docs/source/developers/contributing.rst
@@ -311,6 +311,8 @@ In addition, the GitHub PR "suggestion" feature can also add commits to
 your branch, so it is possible that your local copy of your branch is missing
 some additions.
 
+.. include:: experimental_repos.rst
+
 Guidance for specific features
 ==============================
 
diff --git a/docs/source/developers/experimental_repos.rst b/docs/source/developers/experimental_repos.rst
new file mode 100644
index 00000000000..6f800b5b9ca
--- /dev/null
+++ b/docs/source/developers/experimental_repos.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Experimental repositories
+=========================
+
+Apache Arrow has an explicit policy over developing experimental repositories
+in the context of
+`rules for revolutionaries <https://grep.codeconsult.ch/2020/04/07/rules-for-revolutionaries-2000-edition/>`_.
+
+The main motivation for this policy is to offer a lightweight mechanism to
+conduct experimental work, with the necessary creative freedom, within the ASF
+and the Apache Arrow governance model. This policy allows committers to work on
+new repositories, as they offer many important tools to manage it (e.g. github
+issues, “watch”, “github stars” to measure overall interest).
+
+Process
+-------
+
+* A committer *may* initiate experimental work by creating a separate git
+  repository within the Apache Arrow (e.g. via `selfserve <https://selfserve.apache.org/>`_)
+  and announcing it on the mailing list, together with its goals, and a link to the
+  newly created repository.
+* The committer *must* initiate an email thread with the sole purpose of
+  presenting updates to the community about the status of the repo.
+* There *must not* be official releases from the repository.
+* Any decision to make the experimental repo official in any way, whether by merging or migrating, *must* be discussed and voted on in the mailing list.
+* The committer is responsible for managing issues, documentation, CI of the repository,
+  including licensing checks.
+* The committer decides when the repository is archived.
+
+Repository management
+---------------------
+
+* The repository *must* be under `apache/`
+* The repository’s name *must* be prefixed by `arrow-experimental-`
+* The committer has full permissions over the repository (within possible in ASF)
+* Push / merge permissions *must only* be granted to Apache Arrow committers
+
+Development process
+-------------------
+
+* The repository must follow the ASF requirements about 3rd party code.
+* The committer decides how to manage issues, PRs, etc.
+
+Divergences
+-----------
+
+* If any of the “must” above fails to materialize and no correction measure
+  is taken by the committer upon request, the PMC *should* take ownership
+  and decide what to do.

From 943d2bd06a5a7c87d8a05c96bc02d2e7fc8a60b9 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Wed, 26 May 2021 15:35:46 +0900
Subject: [PATCH 300/719] ARROW-12865: [C++][FlightRPC] Link gRPC with RE2

This fixes undefined symbol error if build flight with vendored re2.

Closes #10403 from cyb70289/grpc-re2

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 18941df2260..ff55936c228 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2548,6 +2548,7 @@ macro(build_grpc)
       gRPC::upb
       gRPC::address_sorting
       ${ABSL_LIBRARIES}
+      re2::re2
       c-ares::cares
       ZLIB::ZLIB
       Threads::Threads)

From 861b5dad14e609f042d7f32ba3926e91e232ba92 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 26 May 2021 16:51:30 +0200
Subject: [PATCH 301/719] ARROW-12760: [C++][Python][R] Allow setting I/O
 thread pool size

This adds functions to change the pool size at runtime, but doesn't adjust the default size of 8.

Closes #10316 from lidavidm/arrow-12760

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/io/interfaces.cc       |  7 +++++
 cpp/src/arrow/io/memory_test.cc      | 10 +++++++
 cpp/src/arrow/io/type_fwd.h          | 18 +++++++++++++
 python/pyarrow/__init__.py           |  3 ++-
 python/pyarrow/includes/libarrow.pxd |  2 ++
 python/pyarrow/io.pxi                | 40 ++++++++++++++++++++++++++++
 python/pyarrow/lib.pyx               | 10 +++++++
 python/pyarrow/tests/test_misc.py    | 10 +++++++
 r/NAMESPACE                          |  2 ++
 r/R/arrowExports.R                   |  8 ++++++
 r/R/config.R                         | 14 ++++++++++
 r/_pkgdown.yml                       |  1 +
 r/man/io_thread_count.Rd             | 17 ++++++++++++
 r/src/arrowExports.cpp               | 32 ++++++++++++++++++++++
 r/src/threadpool.cpp                 |  8 ++++++
 r/tests/testthat/test-thread-pool.R  |  8 ++++++
 16 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 r/man/io_thread_count.Rd

diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index 670fab415d7..7193a56e8d1 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -29,6 +29,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/io/concurrency.h"
+#include "arrow/io/type_fwd.h"
 #include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
@@ -53,6 +54,12 @@ IOContext::IOContext(MemoryPool* pool, StopToken stop_token)
 
 const IOContext& default_io_context() { return g_default_io_context; }
 
+int GetIOThreadPoolCapacity() { return internal::GetIOThreadPool()->GetCapacity(); }
+
+Status SetIOThreadPoolCapacity(int threads) {
+  return internal::GetIOThreadPool()->SetCapacity(threads);
+}
+
 FileInterface::~FileInterface() = default;
 
 Status FileInterface::Abort() { return Close(); }
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index be3c3ab6605..3365674af84 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -861,5 +861,15 @@ TEST(CacheOptions, Basics) {
   check(CacheOptions::MakeFromNetworkMetrics(5, 500, .75, 5), 2.5, 5);
 }
 
+TEST(IOThreadPool, Capacity) {
+  // Simple sanity check
+  auto pool = internal::GetIOThreadPool();
+  int capacity = pool->GetCapacity();
+  ASSERT_GT(capacity, 0);
+  ASSERT_EQ(GetIOThreadPoolCapacity(), capacity);
+  ASSERT_OK(SetIOThreadPoolCapacity(capacity + 1));
+  ASSERT_EQ(GetIOThreadPoolCapacity(), capacity + 1);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/type_fwd.h b/cpp/src/arrow/io/type_fwd.h
index d8208d39d60..a2fd33bf360 100644
--- a/cpp/src/arrow/io/type_fwd.h
+++ b/cpp/src/arrow/io/type_fwd.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -33,6 +34,23 @@ struct CacheOptions;
 ARROW_EXPORT
 const IOContext& default_io_context();
 
+/// \brief Get the capacity of the global I/O thread pool
+///
+/// Return the number of worker threads in the thread pool to which
+/// Arrow dispatches various I/O-bound tasks.  This is an ideal number,
+/// not necessarily the exact number of threads at a given point in time.
+///
+/// You can change this number using SetIOThreadPoolCapacity().
+ARROW_EXPORT int GetIOThreadPoolCapacity();
+
+/// \brief Set the capacity of the global I/O thread pool
+///
+/// Set the number of worker threads in the thread pool to which
+/// Arrow dispatches various I/O-bound tasks.
+///
+/// The current number is returned by GetIOThreadPoolCapacity().
+ARROW_EXPORT Status SetIOThreadPoolCapacity(int threads);
+
 class FileInterface;
 class Seekable;
 class Writable;
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 1488f5c42e8..91bffeb6ad4 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -67,7 +67,8 @@ def parse_git(root, **kwargs):
 from pyarrow.lib import (BuildInfo, RuntimeInfo, VersionInfo,
                          cpp_build_info, cpp_version, cpp_version_info,
                          runtime_info, cpu_count, set_cpu_count,
-                         enable_signal_handlers)
+                         enable_signal_handlers,
+                         io_thread_count, set_io_thread_count)
 
 
 def show_versions():
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index a8306b47798..3912dac9fbf 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1163,6 +1163,8 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
         CIOContext(CMemoryPool*, CStopToken)
 
     CIOContext c_default_io_context "arrow::io::default_io_context"()
+    int GetIOThreadPoolCapacity()
+    CStatus SetIOThreadPoolCapacity(int threads)
 
     cdef cppclass FileStatistics:
         int64_t size
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 9c501adcc2b..63ce5860845 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -42,6 +42,46 @@ cdef extern from "Python.h":
         char *v, Py_ssize_t len) except NULL
 
 
+def io_thread_count():
+    """
+    Return the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool. The number of threads is set to a fixed value at
+    startup. It can be modified at runtime by calling
+    :func:`set_io_thread_count()`.
+
+    See Also
+    --------
+    set_io_thread_count : Modify the size of this pool.
+    cpu_count : The analogous function for the CPU thread pool.
+    """
+    return GetIOThreadPoolCapacity()
+
+
+def set_io_thread_count(int count):
+    """
+    Set the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool.
+
+    Parameters
+    ----------
+    count : int
+        The max number of threads that may be used for I/O.
+        Must be positive.
+
+    See Also
+    --------
+    io_thread_count : Get the size of this pool.
+    set_cpu_count : The analogous function for the CPU thread pool.
+    """
+    if count < 1:
+        raise ValueError("IO thread count must be strictly positive")
+    check_status(SetIOThreadPoolCapacity(count))
+
+
 cdef class NativeFile(_Weakrefable):
     """
     The base class for all Arrow streams.
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 191250b3d5b..fdf16961de6 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -48,6 +48,11 @@ def cpu_count():
     If neither is present, it will default to the number of hardware threads
     on the system. It can be modified at runtime by calling
     :func:`set_cpu_count()`.
+
+    See Also
+    --------
+    set_cpu_count : Modify the size of this pool.
+    io_thread_count : The analogous function for the I/O thread pool.
     """
     return GetCpuThreadPoolCapacity()
 
@@ -55,6 +60,11 @@ def cpu_count():
 def set_cpu_count(int count):
     """
     Set the number of threads to use in parallel operations.
+
+    See Also
+    --------
+    cpu_count : Get the size of this pool.
+    set_io_thread_count : The analogous function for the I/O thread pool.
     """
     if count < 1:
         raise ValueError("CPU count must be strictly positive")
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 74764779361..012f15e16be 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -45,6 +45,16 @@ def test_cpu_count():
         pa.set_cpu_count(n)
 
 
+def test_io_thread_count():
+    n = pa.io_thread_count()
+    assert n > 0
+    try:
+        pa.set_io_thread_count(n + 5)
+        assert pa.io_thread_count() == n + 5
+    finally:
+        pa.set_io_thread_count(n)
+
+
 def test_build_info():
     assert isinstance(pa.cpp_build_info, pa.BuildInfo)
     assert isinstance(pa.cpp_version_info, pa.VersionInfo)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 82837cdd24c..fef22413d56 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -229,6 +229,7 @@ export(int16)
 export(int32)
 export(int64)
 export(int8)
+export(io_thread_count)
 export(is_in)
 export(large_binary)
 export(large_list_of)
@@ -261,6 +262,7 @@ export(record_batch)
 export(s3_bucket)
 export(schema)
 export(set_cpu_count)
+export(set_io_thread_count)
 export(starts_with)
 export(string)
 export(struct)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 28cafcead3b..038467fcad0 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1720,6 +1720,14 @@ SetCpuThreadPoolCapacity <- function(threads){
     invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
 }
 
+GetIOThreadPoolCapacity <- function(){
+    .Call(`_arrow_GetIOThreadPoolCapacity`)
+}
+
+SetIOThreadPoolCapacity <- function(threads){
+    invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+}
+
 Array__infer_type <- function(x){
     .Call(`_arrow_Array__infer_type`, x)
 }
diff --git a/r/R/config.R b/r/R/config.R
index 301d0fad547..af07ad9a9d2 100644
--- a/r/R/config.R
+++ b/r/R/config.R
@@ -28,3 +28,17 @@ cpu_count <- function() {
 set_cpu_count <- function(num_threads) {
   SetCpuThreadPoolCapacity(as.integer(num_threads))
 }
+
+#' Manage the global I/O thread pool in libarrow
+#'
+#' @export
+io_thread_count <- function() {
+  GetIOThreadPoolCapacity()
+}
+
+#' @rdname io_thread_count
+#' @param num_threads integer: New number of threads for thread pool
+#' @export
+set_io_thread_count <- function(num_threads) {
+  SetIOThreadPoolCapacity(as.integer(num_threads))
+}
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index b2266cde758..5bd8418db7b 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -168,6 +168,7 @@ reference:
     contents:
       - arrow_info
       - cpu_count
+      - io_thread_count
       - arrow_available
       - install_arrow
       - install_pyarrow
diff --git a/r/man/io_thread_count.Rd b/r/man/io_thread_count.Rd
new file mode 100644
index 00000000000..b1dfa0ba780
--- /dev/null
+++ b/r/man/io_thread_count.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/config.R
+\name{io_thread_count}
+\alias{io_thread_count}
+\alias{set_io_thread_count}
+\title{Manage the global I/O thread pool in libarrow}
+\usage{
+io_thread_count()
+
+set_io_thread_count(num_threads)
+}
+\arguments{
+\item{num_threads}{integer: New number of threads for thread pool}
+}
+\description{
+Manage the global I/O thread pool in libarrow
+}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 20fd92f7fae..0a9b8394e4b 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -6759,6 +6759,36 @@ extern "C" SEXP _arrow_SetCpuThreadPoolCapacity(SEXP threads_sexp){
 }
 #endif
 
+// threadpool.cpp
+#if defined(ARROW_R_WITH_ARROW)
+int GetIOThreadPoolCapacity();
+extern "C" SEXP _arrow_GetIOThreadPoolCapacity(){
+BEGIN_CPP11
+	return cpp11::as_sexp(GetIOThreadPoolCapacity());
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_GetIOThreadPoolCapacity(){
+	Rf_error("Cannot call GetIOThreadPoolCapacity(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// threadpool.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void SetIOThreadPoolCapacity(int threads);
+extern "C" SEXP _arrow_SetIOThreadPoolCapacity(SEXP threads_sexp){
+BEGIN_CPP11
+	arrow::r::Input<int>::type threads(threads_sexp);
+	SetIOThreadPoolCapacity(threads);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_SetIOThreadPoolCapacity(SEXP threads_sexp){
+	Rf_error("Cannot call SetIOThreadPoolCapacity(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // type_infer.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::DataType> Array__infer_type(SEXP x);
@@ -7271,6 +7301,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, 
 		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, 
 		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, 
+		{ "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, 
+		{ "_arrow_SetIOThreadPoolCapacity", (DL_FUNC) &_arrow_SetIOThreadPoolCapacity, 1}, 
 		{ "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, 
 		{ "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, 
 		{ "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, 
diff --git a/r/src/threadpool.cpp b/r/src/threadpool.cpp
index 0a2013d0304..fb5005517a4 100644
--- a/r/src/threadpool.cpp
+++ b/r/src/threadpool.cpp
@@ -48,4 +48,12 @@ void SetCpuThreadPoolCapacity(int threads) {
   StopIfNotOk(arrow::SetCpuThreadPoolCapacity(threads));
 }
 
+// [[arrow::export]]
+int GetIOThreadPoolCapacity() { return arrow::GetCpuThreadPoolCapacity(); }
+
+// [[arrow::export]]
+void SetIOThreadPoolCapacity(int threads) {
+  StopIfNotOk(arrow::SetCpuThreadPoolCapacity(threads));
+}
+
 #endif
diff --git a/r/tests/testthat/test-thread-pool.R b/r/tests/testthat/test-thread-pool.R
index 6ac9eab6dc9..dab46269ca6 100644
--- a/r/tests/testthat/test-thread-pool.R
+++ b/r/tests/testthat/test-thread-pool.R
@@ -24,3 +24,11 @@ test_that("can set/get cpu thread pool capacity", {
   set_cpu_count(old)
   expect_equal(cpu_count(), old)
 })
+
+test_that("can set/get I/O thread pool capacity", {
+  old <- io_thread_count()
+  set_io_thread_count(19)
+  expect_equal(io_thread_count(), 19L)
+  set_io_thread_count(old)
+  expect_equal(io_thread_count(), old)
+})

From 176988893e182ac418072ef8cd9a4bc598784d97 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 27 May 2021 03:20:45 +0000
Subject: [PATCH 302/719] ARROW-12843: [C++][R] Implement is_inf kernel

Closes #10375 from lidavidm/arrow-12843

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 .../arrow/compute/kernels/scalar_validity.cc  |  59 ++++++++--
 .../compute/kernels/scalar_validity_test.cc   | 108 +++++++++++++++++-
 docs/source/cpp/compute.rst                   |  29 +++--
 docs/source/python/api/compute.rst            |   3 +
 r/NAMESPACE                                   |   3 +-
 r/R/arrow-datum.R                             |  14 +++
 r/R/dplyr-functions.R                         |  12 ++
 r/R/expression.R                              |   3 +-
 r/R/scalar.R                                  |   3 -
 r/tests/testthat/test-chunked-array.R         |  16 +++
 r/tests/testthat/test-dplyr.R                 |  21 ++++
 r/tests/testthat/test-scalar.R                |   2 +-
 12 files changed, 245 insertions(+), 28 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index ebb3dca0d1e..ead88abc0f2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -60,6 +60,20 @@ struct IsValidOperator {
   }
 };
 
+struct IsFiniteOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isfinite(value);
+  }
+};
+
+struct IsInfOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isinf(value);
+  }
+};
+
 struct IsNullOperator {
   static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
@@ -103,19 +117,38 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <typename InType>
-void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
-  DCHECK_OK(
-      func->AddKernel({ty}, boolean(),
-                      applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
+template <typename InType, typename Op>
+void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
+  DCHECK_OK(func->AddKernel({ty}, boolean(),
+                            applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
+}
+
+std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
+                                                     const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
+
+  return func;
+}
+
+std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
+                                                  const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
+
+  return func;
 }
 
 std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
                                                   const FunctionDoc* doc) {
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
 
-  AddIsNanKernel<FloatType>(float32(), func.get());
-  AddIsNanKernel<DoubleType>(float64(), func.get());
+  AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
 
   return func;
 }
@@ -159,6 +192,16 @@ const FunctionDoc is_valid_doc(
     "Return true if non-null",
     ("For each input value, emit true iff the value is valid (non-null)."), {"values"});
 
+const FunctionDoc is_finite_doc(
+    "Return true if value is finite",
+    ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
+    {"values"});
+
+const FunctionDoc is_inf_doc(
+    "Return true if infinity",
+    ("For each input value, emit true iff the value is infinite (inf or -inf)."),
+    {"values"});
+
 const FunctionDoc is_null_doc("Return true if null",
                               ("For each input value, emit true iff the value is null."),
                               {"values"});
@@ -177,6 +220,8 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
                registry, MemAllocation::PREALLOCATE,
                /*can_write_into_slices=*/true);
 
+  DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
+  DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
   DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
index 54fa5967f7a..1a7a1cbda15 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
@@ -88,15 +88,107 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) {
   CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true));
 }
 
+TEST_F(TestFloatValidityKernels, FloatArrayIsFinite) {
+  // All Inf
+  CheckScalarUnary("is_finite", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
+  // No Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
+  // Some Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleArrayIsFinite) {
+  // All Inf
+  CheckScalarUnary("is_finite", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
+  // No Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
+  // Some Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
+}
+
+TEST_F(TestFloatValidityKernels, FloatScalarIsFinite) {
+  CheckScalarUnary("is_finite", MakeNullScalar(float32()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_finite", MakeScalar(42.0f), MakeScalar(true));
+  CheckScalarUnary("is_finite", MakeScalar(std::nanf("")), MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleScalarIsFinite) {
+  CheckScalarUnary("is_finite", MakeNullScalar(float64()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_finite", MakeScalar(42.0), MakeScalar(true));
+  CheckScalarUnary("is_finite", MakeScalar(std::nan("")), MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+}
+
+TEST_F(TestFloatValidityKernels, FloatArrayIsInf) {
+  // All Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
+  // No Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
+  // Some Infs
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleArrayIsInf) {
+  // All Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
+  // No Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
+  // Some Infs
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
+}
+
+TEST_F(TestFloatValidityKernels, FloatScalarIsInf) {
+  CheckScalarUnary("is_inf", MakeNullScalar(float32()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_inf", MakeScalar(42.0f), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::nanf("")), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(true));
+  CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(true));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleScalarIsInf) {
+  CheckScalarUnary("is_inf", MakeNullScalar(float64()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_inf", MakeScalar(42.0), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::nan("")), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(true));
+  CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(true));
+}
+
 TEST_F(TestFloatValidityKernels, FloatArrayIsNan) {
   // All NaN
   CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
   // No NaN
-  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
   // Some NaNs
-  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
 }
 
@@ -105,10 +197,10 @@ TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) {
   CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
   // No NaN
-  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
   // Some NaNs
-  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
 }
 
@@ -116,12 +208,20 @@ TEST_F(TestFloatValidityKernels, FloatScalarIsNan) {
   CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean()));
   CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false));
   CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true));
+  CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
 }
 
 TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) {
   CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean()));
   CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false));
   CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true));
+  CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
 }
 
 }  // namespace compute
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index caf75a0b5ef..3cf244ca5e8 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -648,31 +648,40 @@ Structural transforms
 +==========================+============+================================================+=====================+=========+
 | fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like  | Input type          | \(1)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(2)    |
+| is_finite                | Unary      | Float, Double                                  | Boolean             | \(2)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                            | Boolean             | \(3)    |
+| is_inf                   | Unary      | Float, Double                                  | Boolean             | \(3)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                            | Boolean             | \(4)    |
+| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(4)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(5)    |
+| is_null                  | Unary      | Any                                            | Boolean             | \(5)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| project                  | Varargs    | Any                                            | Struct              | \(6)    |
+| is_valid                 | Unary      | Any                                            | Boolean             | \(6)    |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(7)    |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+| project                  | Varargs    | Any                                            | Struct              | \(8)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
 
 * \(1) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(2) Output is true iff the corresponding input element is NaN.
+* \(2) Output is true iff the corresponding input element is finite (not Infinity,
+  -Infinity, or NaN).
+
+* \(3) Output is true iff the corresponding input element is Infinity/-Infinity.
+
+* \(4) Output is true iff the corresponding input element is NaN.
 
-* \(3) Output is true iff the corresponding input element is null.
+* \(5) Output is true iff the corresponding input element is null.
 
-* \(4) Output is true iff the corresponding input element is non-null.
+* \(6) Output is true iff the corresponding input element is non-null.
 
-* \(5) Each output element is the length of the corresponding input element
+* \(7) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(6) The output struct's field types are the types of its arguments. The
+* \(8) The output struct's field types are the types of its arguments. The
   field names are specified using an instance of :struct:`ProjectOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index eacb061dcbc..91eeeedbeaa 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -222,6 +222,9 @@ Structural Transforms
 
    binary_length
    fill_null
+   is_finite
+   is_inf
+   is_nan
    is_null
    is_valid
    list_value_length
diff --git a/r/NAMESPACE b/r/NAMESPACE
index fef22413d56..d38431d97e7 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -46,9 +46,10 @@ S3method(head,ArrowDatum)
 S3method(head,ArrowTabular)
 S3method(head,Dataset)
 S3method(head,arrow_dplyr_query)
+S3method(is.finite,ArrowDatum)
+S3method(is.infinite,ArrowDatum)
 S3method(is.na,ArrowDatum)
 S3method(is.na,Expression)
-S3method(is.na,Scalar)
 S3method(is.nan,ArrowDatum)
 S3method(is_in,ArrowDatum)
 S3method(is_in,default)
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index f7c1d4d4ed7..3be8d75af0b 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -32,6 +32,20 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
 #' @export
 length.ArrowDatum <- function(x) x$length()
 
+#' @export
+is.finite.ArrowDatum <- function(x) {
+  is_fin <- call_function("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !is.na(is_fin)
+}
+
+#' @export
+is.infinite.ArrowDatum <- function(x) {
+  is_inf <- call_function("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !is.na(is_inf)
+}
+
 #' @export
 is.na.ArrowDatum <- function(x) call_function("is_null", x)
 
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index e3ff5cecebd..7e0eadfdcea 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -72,6 +72,18 @@ nse_funcs$between <- function(x, left, right) {
   x >= left & x <= right
 }
 
+nse_funcs$is.finite <- function(x) {
+  is_fin <- Expression$create("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !nse_funcs$is.na(is_fin)
+}
+
+nse_funcs$is.infinite <- function(x) {
+  is_inf <- Expression$create("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !nse_funcs$is.na(is_inf)
+}
+
 # as.* type casting functions
 # as.factor() is mapped in expression.R
 nse_funcs$as.character <- function(x) {
diff --git a/r/R/expression.R b/r/R/expression.R
index 3b24b09bb8b..99d98b6af0a 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -22,8 +22,7 @@
   "as.factor" = "dictionary_encode",
   "is.na" = "is_null",
   "is.nan" = "is_nan",
-  # nchar is defined in dplyr.R because it is more complex
-  # "nchar" = "utf8_length",
+  # nchar is defined in dplyr-functions.R
   "tolower" = "utf8_lower",
   "toupper" = "utf8_upper",
   # stringr spellings of those
diff --git a/r/R/scalar.R b/r/R/scalar.R
index cbda5964a2c..54fe37f081e 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -72,8 +72,5 @@ StructScalar <- R6Class("StructScalar",
 #' @export
 length.Scalar <- function(x) 1L
 
-#' @export
-is.na.Scalar <- function(x) !x$is_valid
-
 #' @export
 sort.Scalar <- function(x, decreasing = FALSE, ...) x
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index f5b2dca2e44..f9b102c6819 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -108,6 +108,22 @@ test_that("ChunkedArray handles !!! splicing", {
   expect_equal(x$num_chunks, 3L)
 })
 
+test_that("ChunkedArray handles Inf", {
+  data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10)
+  x <- chunked_array(!!!data)
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
+  expect_equal(length(x), 25L)
+  expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10))
+
+  chunks <- x$chunks
+  expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]]))
+  expect_equal(
+    as.vector(is.infinite(x)),
+    c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]]))
+  )
+})
+
 test_that("ChunkedArray handles NA", {
   data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
   x <- chunked_array(!!!data)
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 46d30e37823..bf5f06b038c 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -503,6 +503,27 @@ test_that("explicit type conversions with as.*()", {
   )
 })
 
+test_that("is.finite(), is.infinite(), is.nan()", {
+  df <- tibble(x =c(-4.94065645841246544e-324, 1.79769313486231570e+308, 0,
+                    NA_real_, NaN, Inf, -Inf))
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        is_fin = is.finite(x),
+        is_inf = is.infinite(x)
+      ) %>% collect(),
+    df
+  )
+  skip("is.nan() evaluates to NA on NA values (ARROW-12850)")
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        is_nan = is.nan(x)
+      ) %>% collect(),
+    df
+  )
+})
+
 test_that("as.factor()/dictionary_encode()", {
   skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R
index 21b2836496e..d0b13423463 100644
--- a/r/tests/testthat/test-scalar.R
+++ b/r/tests/testthat/test-scalar.R
@@ -26,7 +26,7 @@ expect_scalar_roundtrip <- function(x, type) {
     # Should this be? Missing if all elements are missing?
     # expect_identical(is.na(s), all(is.na(x)))
   } else {
-    expect_identical(is.na(s), is.na(x))
+    expect_identical(as.vector(is.na(s)), is.na(x))
     # MakeArrayFromScalar not implemented for list types
     expect_equal(as.vector(s), x)
   }

From 2db58d2644eb925af4c0e9951fb29e28d5803d62 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 27 May 2021 15:49:44 +0200
Subject: [PATCH 303/719] ARROW-12887: [CI] AppVeyor SSL certificate issue

It seems git+https downloads can fail with a certificate error in some situations.

Closes #10407 from pitrou/appveyor-conda-dll

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/appveyor-cpp-setup.bat | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat
index 616232d202c..261b1183f5b 100644
--- a/ci/appveyor-cpp-setup.bat
+++ b/ci/appveyor-cpp-setup.bat
@@ -86,12 +86,12 @@ if defined need_vcvarsall (
 @rem
 @rem Use clcache for faster builds
 @rem
-pip install -q git+https://github.com/frerich/clcache.git
+pip install -q clcache-alt || exit /B
 @rem Limit cache size to 500 MB
 clcache -M 500000000
 clcache -c
 clcache -s
-powershell.exe -Command "Start-Process clcache-server"
+powershell.exe -Command "Start-Process clcache-server" || exit /B
 
 @rem
 @rem Download Minio somewhere on PATH, for unit tests

From 7634d0b0d70bcfad32b07418d49f3d24b76ab24d Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 27 May 2021 10:16:01 -0400
Subject: [PATCH 304/719] ARROW-11849: [R] Use roxygen @examplesIf

Swap out dontrun and donttest tags in examples and replace with examplesIf

Closes #10373 from thisisnic/ARROW-11849_examplesIf

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/DESCRIPTION              |  2 +-
 r/R/compute.R              |  4 +---
 r/R/csv.R                  |  8 ++------
 r/R/dataset-partition.R    |  4 +---
 r/R/feather.R              |  8 ++------
 r/R/field.R                |  4 +---
 r/R/filesystem.R           |  4 +---
 r/R/json.R                 |  4 +---
 r/R/parquet.R              | 12 +++---------
 r/R/record-batch-reader.R  |  4 +---
 r/R/record-batch-writer.R  |  4 +---
 r/R/record-batch.R         |  4 +---
 r/R/schema.R               |  8 ++------
 r/R/table.R                |  4 +---
 r/R/type.R                 |  4 +---
 r/man/Field.Rd             |  4 ++--
 r/man/ParquetFileReader.Rd |  4 ++--
 r/man/RecordBatch.Rd       |  4 ++--
 r/man/RecordBatchReader.Rd |  4 ++--
 r/man/RecordBatchWriter.Rd |  4 ++--
 r/man/Schema.Rd            |  4 ++--
 r/man/Table.Rd             |  4 ++--
 r/man/call_function.Rd     |  4 ++--
 r/man/copy_files.Rd        |  4 ++--
 r/man/data-type.Rd         |  4 ++--
 r/man/hive_partition.Rd    |  4 ++--
 r/man/read_delim_arrow.Rd  |  4 ++--
 r/man/read_feather.Rd      |  4 ++--
 r/man/read_json_arrow.Rd   |  4 ++--
 r/man/read_parquet.Rd      |  4 ++--
 r/man/unify_schemas.Rd     |  4 ++--
 r/man/write_csv_arrow.Rd   |  4 ++--
 r/man/write_feather.Rd     |  4 ++--
 r/man/write_parquet.Rd     |  4 ++--
 r/vignettes/developing.Rmd |  8 ++++++++
 35 files changed, 66 insertions(+), 96 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 82ca6fed617..451ac6c05f7 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -36,7 +36,7 @@ Imports:
     utils,
     vctrs
 Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.1.9001
 VignetteBuilder: knitr
 Suggests:
     decor,
diff --git a/r/R/compute.R b/r/R/compute.R
index 43c3285481c..35dbd63b90f 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -31,15 +31,13 @@
 #' 0-based integers (consistent with C++).
 #' @return An `Array`, `ChunkedArray`, `Scalar`, `RecordBatch`, or `Table`, whatever the compute function results in.
 #' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for the functions and their respective options.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' a <- Array$create(c(1L, 2L, 3L, NA, 5L))
 #' s <- Scalar$create(4L)
 #' call_function("fill_null", a, s)
 #'
 #' a <- Array$create(rnorm(10000))
 #' call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
-#' }
 #' @export
 #' @include array.R
 #' @include chunked-array.R
diff --git a/r/R/csv.R b/r/R/csv.R
index 3357df52132..70435b7650a 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -129,8 +129,7 @@
 #'
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
 #' @export
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #'   tf <- tempfile()
 #'   on.exit(unlink(tf))
 #'   write.csv(mtcars, file = tf)
@@ -138,7 +137,6 @@
 #'   dim(df)
 #'   # Can select columns
 #'   df <- read_csv_arrow(tf, col_select = starts_with("d"))
-#' }
 read_delim_arrow <- function(file,
                              delim = ",",
                              quote = '"',
@@ -615,12 +613,10 @@ readr_to_csv_convert_options <- function(na,
 #' @return The input `x`, invisibly. Note that if `sink` is an [OutputStream],
 #' the stream will be left open.
 #' @export
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_csv_arrow(mtcars, tf)
-#' }
 #' @include arrow-package.R
 write_csv_arrow <- function(x,
                             sink,
diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R
index e40427a9f18..3c4f18a5692 100644
--- a/r/R/dataset-partition.R
+++ b/r/R/dataset-partition.R
@@ -88,10 +88,8 @@ HivePartitioning$create <- function(schm, null_fallback = NULL) {
 #' which is what Hive uses.
 #' @return A [HivePartitioning][Partitioning], or a `HivePartitioningFactory` if
 #' calling `hive_partition()` with no arguments.
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_dataset()
 #' hive_partition(year = int16(), month = int8())
-#' }
 #' @export
 hive_partition <- function(..., null_fallback = NULL) {
   schm <- schema(...)
diff --git a/r/R/feather.R b/r/R/feather.R
index a9781106b03..d0f4a7e6257 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -45,12 +45,10 @@
 #' @export
 #' @seealso [RecordBatchWriter] for lower-level access to writing Arrow IPC data.
 #' @seealso [Schema] for information about schemas and metadata handling.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_feather(mtcars, tf)
-#' }
 #' @include arrow-package.R
 write_feather <- function(x,
                           sink,
@@ -132,8 +130,7 @@ write_feather <- function(x,
 #'
 #' @export
 #' @seealso [FeatherReader] and [RecordBatchReader] for lower-level access to reading Arrow IPC data.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_feather(mtcars, tf)
@@ -141,7 +138,6 @@ write_feather <- function(x,
 #' dim(df)
 #' # Can select columns
 #' df <- read_feather(tf, col_select = starts_with("d"))
-#' }
 read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
   if (!inherits(file, "RandomAccessFile")) {
     file <- make_readable_file(file, ...)
diff --git a/r/R/field.R b/r/R/field.R
index 33549d344c5..e4fba2af0b8 100644
--- a/r/R/field.R
+++ b/r/R/field.R
@@ -64,10 +64,8 @@ Field$create <- function(name, type, metadata) {
 #' @param type logical type, instance of [DataType]
 #' @param metadata currently ignored
 #'
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' field("x", int32())
-#' }
 #' @rdname Field
 #' @export
 field <- Field$create
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index 3a624fd89e1..a42cf92b628 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -476,15 +476,13 @@ SubTreeFileSystem$create <- function(base_path, base_fs = NULL) {
 #' copying but may help accommodate high latency FileSystems.
 #' @return Nothing: called for side effects in the file system
 #' @export
-#' @examples
-#' \dontrun{
+#' @examplesIf FALSE
 #' # Copy an S3 bucket's files to a local directory:
 #' copy_files("s3://your-bucket-name", "local-directory")
 #' # Using a FileSystem object
 #' copy_files(s3_bucket("your-bucket-name"), "local-directory")
 #' # Or go the other way, from local to S3
 #' copy_files("local-directory", s3_bucket("your-bucket-name"))
-#' }
 copy_files <- function(from, to, chunk_size = 1024L * 1024L) {
   from <- get_path_and_filesystem(from)
   to <- get_path_and_filesystem(to)
diff --git a/r/R/json.R b/r/R/json.R
index 89595a5b0ae..cc4866512cd 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -25,8 +25,7 @@
 #'
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
 #' @export
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #'   tf <- tempfile()
 #'   on.exit(unlink(tf))
 #'   writeLines('
@@ -35,7 +34,6 @@
 #'     { "hello": 0.0, "world": true, "yo": null }
 #'   ', tf, useBytes=TRUE)
 #'   df <- read_json_arrow(tf)
-#' }
 read_json_arrow <- function(file,
                             col_select = NULL,
                             as_data_frame = TRUE,
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 169d9f57f52..9baecb3fb60 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -26,14 +26,12 @@
 #'
 #' @return A [arrow::Table][Table], or a `data.frame` if `as_data_frame` is
 #' `TRUE` (the default).
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_parquet()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_parquet(mtcars, tf)
 #' df <- read_parquet(tf, col_select = starts_with("d"))
 #' head(df)
-#' }
 #' @export
 read_parquet <- function(file,
                          col_select = NULL,
@@ -127,8 +125,7 @@ read_parquet <- function(file,
 #'
 #' @return the input `x` invisibly.
 #'
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_parquet()
 #' tf1 <- tempfile(fileext = ".parquet")
 #' write_parquet(data.frame(x = 1:5), tf1)
 #'
@@ -137,7 +134,6 @@ read_parquet <- function(file,
 #'   tf2 <- tempfile(fileext = ".gz.parquet")
 #'   write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
 #' }
-#' }
 #' @export
 write_parquet <- function(x,
                           sink,
@@ -454,8 +450,7 @@ ParquetFileWriter$create <- function(schema,
 #' - `$num_row_groups`: number of row groups.
 #'
 #' @export
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_with_parquet()
 #' f <- system.file("v0.7.1.parquet", package="arrow")
 #' pq <- ParquetFileReader$create(f)
 #' pq$GetSchema()
@@ -464,7 +459,6 @@ ParquetFileWriter$create <- function(schema,
 #'   tab <- pq$ReadTable()
 #'   tab$schema
 #' }
-#' }
 #' @include arrow-package.R
 ParquetFileReader <- R6Class("ParquetFileReader",
   inherit = ArrowObject,
diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R
index b4daa8b24e3..e00d24d8c6b 100644
--- a/r/R/record-batch-reader.R
+++ b/r/R/record-batch-reader.R
@@ -56,8 +56,7 @@
 #' @rdname RecordBatchReader
 #' @name RecordBatchReader
 #' @include arrow-package.R
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #'
@@ -91,7 +90,6 @@
 #' # Unlike the Writers, we don't have to close RecordBatchReaders,
 #' # but we do still need to close the file connection
 #' read_file_obj$close()
-#' }
 RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
   public = list(
     read_next_batch = function() RecordBatchReader__ReadNext(self),
diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R
index 60e87c951dd..64c1cf0cec8 100644
--- a/r/R/record-batch-writer.R
+++ b/r/R/record-batch-writer.R
@@ -59,8 +59,7 @@
 #' @rdname RecordBatchWriter
 #' @name RecordBatchWriter
 #' @include arrow-package.R
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #'
@@ -94,7 +93,6 @@
 #' # Unlike the Writers, we don't have to close RecordBatchReaders,
 #' # but we do still need to close the file connection
 #' read_file_obj$close()
-#' }
 RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = ArrowObject,
   public = list(
     write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch),
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index 2ad3408d706..1e41d6533a8 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -187,8 +187,7 @@ RecordBatch$from_message <- function(obj, schema) {
 #' @param schema a [Schema], or `NULL` (the default) to infer the schema from
 #' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
 #' @rdname RecordBatch
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' batch <- record_batch(name = rownames(mtcars), mtcars)
 #' dim(batch)
 #' dim(head(batch))
@@ -196,7 +195,6 @@ RecordBatch$from_message <- function(obj, schema) {
 #' batch$mpg
 #' batch[["cyl"]]
 #' as.data.frame(batch[4:8, c("gear", "hp", "wt")])
-#' }
 #' @export
 record_batch <- RecordBatch$create
 
diff --git a/r/R/schema.R b/r/R/schema.R
index ec3bc43cf21..d0491fdf6e3 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -77,14 +77,12 @@
 #'
 #' @rdname Schema
 #' @name Schema
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
 #' tab1 <- Table$create(df)
 #' tab1$schema
 #' tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
 #' tab2$schema
-#' }
 #' @export
 Schema <- R6Class("Schema",
   inherit = ArrowObject,
@@ -284,12 +282,10 @@ read_schema <- function(stream, ...) {
 #' @param schemas Alternatively, a list of schemas
 #' @return A `Schema` with the union of fields contained in the inputs
 #' @export
-#' @examples
-#' \dontrun{
+#' @examplesIf arrow_available()
 #' a <- schema(b = double(), c = bool())
 #' z <- schema(b = double(), k = utf8())
 #' unify_schemas(a, z)
-#' }
 unify_schemas <- function(..., schemas = list(...)) {
   arrow__UnifySchemas(schemas)
 }
diff --git a/r/R/table.R b/r/R/table.R
index 2c432ac8983..7645e2f552c 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -85,8 +85,7 @@
 #' - `$columns`: Returns a list of `ChunkedArray`s
 #' @rdname Table
 #' @name Table
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' tab <- Table$create(name = rownames(mtcars), mtcars)
 #' dim(tab)
 #' dim(head(tab))
@@ -94,7 +93,6 @@
 #' tab$mpg
 #' tab[["cyl"]]
 #' as.data.frame(tab[4:8, c("gear", "hp", "wt")])
-#' }
 #' @export
 Table <- R6Class("Table", inherit = ArrowTabular,
   public = list(
diff --git a/r/R/type.R b/r/R/type.R
index ecb9b48a185..fd7470ce88e 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -186,13 +186,11 @@ NestedType <- R6Class("NestedType", inherit = DataType)
 #' @return An Arrow type object inheriting from DataType.
 #' @export
 #' @seealso [dictionary()] for creating a dictionary (factor-like) type.
-#' @examples
-#' \donttest{
+#' @examplesIf arrow_available()
 #' bool()
 #' struct(a = int32(), b = double())
 #' timestamp("ms", timezone = "CEST")
 #' time64("ns")
-#' }
 int8 <- function() Int8__initialize()
 
 #' @rdname data-type
diff --git a/r/man/Field.Rd b/r/man/Field.Rd
index d5f147c595f..77d31fa637a 100644
--- a/r/man/Field.Rd
+++ b/r/man/Field.Rd
@@ -29,7 +29,7 @@ field(name, type, metadata)
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 field("x", int32())
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd
index 9885802011d..39146919768 100644
--- a/r/man/ParquetFileReader.Rd
+++ b/r/man/ParquetFileReader.Rd
@@ -45,7 +45,7 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat
 }
 
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 f <- system.file("v0.7.1.parquet", package="arrow")
 pq <- ParquetFileReader$create(f)
 pq$GetSchema()
@@ -54,5 +54,5 @@ if (codec_is_available("snappy")) {
   tab <- pq$ReadTable()
   tab$schema
 }
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd
index 184fea99c7f..ff08c215853 100644
--- a/r/man/RecordBatch.Rd
+++ b/r/man/RecordBatch.Rd
@@ -80,7 +80,7 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 batch <- record_batch(name = rownames(mtcars), mtcars)
 dim(batch)
 dim(head(batch))
@@ -88,5 +88,5 @@ names(batch)
 batch$mpg
 batch[["cyl"]]
 as.data.frame(batch[4:8, c("gear", "hp", "wt")])
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd
index 6b204b0aae2..d2e1a6919e6 100644
--- a/r/man/RecordBatchReader.Rd
+++ b/r/man/RecordBatchReader.Rd
@@ -44,7 +44,7 @@ are in the file.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 
@@ -78,7 +78,7 @@ all.equal(df, chickwts, check.attributes = FALSE)
 # Unlike the Writers, we don't have to close RecordBatchReaders,
 # but we do still need to close the file connection
 read_file_obj$close()
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd
index 038653b9e24..219c150e6a4 100644
--- a/r/man/RecordBatchWriter.Rd
+++ b/r/man/RecordBatchWriter.Rd
@@ -46,7 +46,7 @@ to be closed separately.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 
@@ -80,7 +80,7 @@ all.equal(df, chickwts, check.attributes = FALSE)
 # Unlike the Writers, we don't have to close RecordBatchReaders,
 # but we do still need to close the file connection
 read_file_obj$close()
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd
index c2fb2fac681..6e385bb804e 100644
--- a/r/man/Schema.Rd
+++ b/r/man/Schema.Rd
@@ -75,11 +75,11 @@ the metadata is dropped.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
 tab1 <- Table$create(df)
 tab1$schema
 tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
 tab2$schema
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/Table.Rd b/r/man/Table.Rd
index 98a5c354ced..2675943e572 100644
--- a/r/man/Table.Rd
+++ b/r/man/Table.Rd
@@ -80,7 +80,7 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tab <- Table$create(name = rownames(mtcars), mtcars)
 dim(tab)
 dim(head(tab))
@@ -88,5 +88,5 @@ names(tab)
 tab$mpg
 tab[["cyl"]]
 as.data.frame(tab[4:8, c("gear", "hp", "wt")])
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd
index e89fd00576e..f63038442dc 100644
--- a/r/man/call_function.Rd
+++ b/r/man/call_function.Rd
@@ -36,14 +36,14 @@ When passing indices in \code{...}, \code{args}, or \code{options}, express them
 0-based integers (consistent with C++).
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 a <- Array$create(c(1L, 2L, 3L, NA, 5L))
 s <- Scalar$create(4L)
 call_function("fill_null", a, s)
 
 a <- Array$create(rnorm(10000))
 call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for the functions and their respective options.
diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd
index 65edf56cb48..1b83703f19f 100644
--- a/r/man/copy_files.Rd
+++ b/r/man/copy_files.Rd
@@ -24,12 +24,12 @@ Nothing: called for side effects in the file system
 Copy files between FileSystems
 }
 \examples{
-\dontrun{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Copy an S3 bucket's files to a local directory:
 copy_files("s3://your-bucket-name", "local-directory")
 # Using a FileSystem object
 copy_files(s3_bucket("your-bucket-name"), "local-directory")
 # Or go the other way, from local to S3
 copy_files("local-directory", s3_bucket("your-bucket-name"))
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
index f113455a90d..a0631897573 100644
--- a/r/man/data-type.Rd
+++ b/r/man/data-type.Rd
@@ -151,12 +151,12 @@ types, this conversion can be disabled (so that \code{int64} always yields a
 \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 bool()
 struct(a = int32(), b = double())
 timestamp("ms", timezone = "CEST")
 time64("ns")
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd
index ab427f746b3..aeb9cd4b3d1 100644
--- a/r/man/hive_partition.Rd
+++ b/r/man/hive_partition.Rd
@@ -26,7 +26,7 @@ Because fields are named in the path segments, order of fields passed to
 \code{hive_partition()} does not matter.
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 hive_partition(year = int16(), month = int8())
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index f676b9fc75d..71394e547c9 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -206,7 +206,7 @@ to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
 }
 
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
   tf <- tempfile()
   on.exit(unlink(tf))
   write.csv(mtcars, file = tf)
@@ -214,5 +214,5 @@ to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
   dim(df)
   # Can select columns
   df <- read_csv_arrow(tf, col_select = starts_with("d"))
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index fe3a7f1e23d..95f4d1d12c6 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -35,7 +35,7 @@ This function reads both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_feather(mtcars, tf)
@@ -43,7 +43,7 @@ df <- read_feather(tf)
 dim(df)
 # Can select columns
 df <- read_feather(tf, col_select = starts_with("d"))
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data.
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 83765b2c51a..4806b4ad1f0 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -39,7 +39,7 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
 Using \link{JsonTableReader}
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
   tf <- tempfile()
   on.exit(unlink(tf))
   writeLines('
@@ -48,5 +48,5 @@ Using \link{JsonTableReader}
     { "hello": 0.0, "world": true, "yo": null }
   ', tf, useBytes=TRUE)
   df <- read_json_arrow(tf)
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 5d6e2e2d5b3..056e8644747 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -40,11 +40,11 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is
 This function enables you to read Parquet files into R.
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_parquet(mtcars, tf)
 df <- read_parquet(tf, col_select = starts_with("d"))
 head(df)
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd
index a6b7ec0f015..609581914ad 100644
--- a/r/man/unify_schemas.Rd
+++ b/r/man/unify_schemas.Rd
@@ -18,9 +18,9 @@ A \code{Schema} with the union of fields contained in the inputs
 Combine and harmonize schemas
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 a <- schema(b = double(), c = bool())
 z <- schema(b = double(), k = utf8())
 unify_schemas(a, z)
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
index dfed1613149..55a239ca998 100644
--- a/r/man/write_csv_arrow.Rd
+++ b/r/man/write_csv_arrow.Rd
@@ -24,9 +24,9 @@ the stream will be left open.
 Write CSV file to disk
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_csv_arrow(mtcars, tf)
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index 691adbeef05..c6273b61be8 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -48,11 +48,11 @@ This function writes both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
 \examples{
-\donttest{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_feather(mtcars, tf)
-}
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data.
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index c89c709dfb0..d7147f7e8e6 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -95,7 +95,7 @@ disable compression, set \code{compression = "uncompressed"}.
 Note that "uncompressed" columns may still have dictionary encoding.
 }
 \examples{
-\dontrun{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf1 <- tempfile(fileext = ".parquet")
 write_parquet(data.frame(x = 1:5), tf1)
 
@@ -104,5 +104,5 @@ if (codec_is_available("gzip")) {
   tf2 <- tempfile(fileext = ".gz.parquet")
   write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
 }
-}
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index dcda13098ef..8de751b6947 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -282,6 +282,14 @@ cmake \
 </p>
 </details>  
 
+### Documentation
+
+The documentation for the R package uses features of `roxygen2` that haven't yet been released on CRAN, such as conditional inclusion of examples via the `@examplesIf` tag.  If you are making changes which require updating the documentation, please install the development version of `roxygen2` from GitHub. 
+
+```{r}
+remotes::install_github("r-lib/roxygen2")
+```
+
 ## Troubleshooting
 
 Note that after any change to the C++ library, you must reinstall it and

From 822a5a2a744fb5ba3bb21a76cea0b8fcc052cf41 Mon Sep 17 00:00:00 2001
From: Ying Zhou <yingzhou474@gmail.com>
Date: Thu, 27 May 2021 17:52:58 +0200
Subject: [PATCH 305/719] ARROW-9299: [C++][Python] Expose ORC metadata

Closes #10157 from mathyingzhou/ARROW-9299

Lead-authored-by: Ying Zhou <yingzhou474@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/adapters/orc/adapter.cc      | 23 +++++++++++++---------
 cpp/src/arrow/adapters/orc/adapter.h       |  5 +++++
 cpp/src/arrow/adapters/orc/adapter_test.cc |  5 +++++
 python/pyarrow/_orc.pxd                    |  2 ++
 python/pyarrow/_orc.pyx                    | 20 +++++++++++++++++++
 python/pyarrow/orc.py                      |  5 +++++
 python/pyarrow/tests/test_orc.py           |  6 ++++--
 7 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 4548b9923a7..2f74b40e40d 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -229,6 +229,15 @@ class ORCFileReader::Impl {
     return GetArrowSchema(type, out);
   }
 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() {
+    const std::list<std::string> keys = reader_->getMetadataKeys();
+    auto metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& key : keys) {
+      metadata->Append(key, reader_->getMetadataValue(key));
+    }
+    return std::const_pointer_cast<const KeyValueMetadata>(metadata);
+  }
+
   Status GetArrowSchema(const liborc::Type& type, std::shared_ptr<Schema>* out) {
     if (type.getKind() != liborc::STRUCT) {
       return Status::NotImplemented(
@@ -243,15 +252,7 @@ class ORCFileReader::Impl {
       std::string name = type.getFieldName(child);
       fields.push_back(field(name, elemtype));
     }
-    std::list<std::string> keys = reader_->getMetadataKeys();
-    std::shared_ptr<KeyValueMetadata> metadata;
-    if (!keys.empty()) {
-      metadata = std::make_shared<KeyValueMetadata>();
-      for (auto it = keys.begin(); it != keys.end(); ++it) {
-        metadata->Append(*it, reader_->getMetadataValue(*it));
-      }
-    }
-
+    ARROW_ASSIGN_OR_RAISE(auto metadata, ReadMetadata());
     *out = std::make_shared<Schema>(std::move(fields), std::move(metadata));
     return Status::OK();
   }
@@ -435,6 +436,10 @@ Status ORCFileReader::Open(const std::shared_ptr<io::RandomAccessFile>& file,
   return Status::OK();
 }
 
+Result<std::shared_ptr<const KeyValueMetadata>> ORCFileReader::ReadMetadata() {
+  return impl_->ReadMetadata();
+}
+
 Status ORCFileReader::ReadSchema(std::shared_ptr<Schema>* out) {
   return impl_->ReadSchema(out);
 }
diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h
index 86dfd2c9241..012c1701980 100644
--- a/cpp/src/arrow/adapters/orc/adapter.h
+++ b/cpp/src/arrow/adapters/orc/adapter.h
@@ -48,6 +48,11 @@ class ARROW_EXPORT ORCFileReader {
   static Status Open(const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool,
                      std::unique_ptr<ORCFileReader>* reader);
 
+  /// \brief Return the metadata read from the ORC file
+  ///
+  /// \return A KeyValueMetadata object containing the ORC metadata
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
+
   /// \brief Return the schema read from the ORC file
   ///
   /// \param[out] out the returned Schema object
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 7bf0b61774d..9f7fb561362 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -36,6 +36,7 @@
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace liborc = orc;
 
@@ -326,6 +327,10 @@ TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) {
   ASSERT_TRUE(
       adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(), &reader).ok());
 
+  EXPECT_OK_AND_ASSIGN(auto metadata, reader->ReadMetadata());
+  auto expected_metadata = std::const_pointer_cast<const KeyValueMetadata>(
+      key_value_metadata(std::vector<std::string>(), std::vector<std::string>()));
+  ASSERT_TRUE(metadata->Equals(*expected_metadata));
   ASSERT_EQ(stripe_row_count * stripe_count, reader->NumberOfRows());
   ASSERT_EQ(stripe_count, reader->NumberOfStripes());
   accumulated = 0;
diff --git a/python/pyarrow/_orc.pxd b/python/pyarrow/_orc.pxd
index 51d0bbd73a3..fd72ac42930 100644
--- a/python/pyarrow/_orc.pxd
+++ b/python/pyarrow/_orc.pxd
@@ -39,6 +39,8 @@ cdef extern from "arrow/adapters/orc/adapter.h" \
                      CMemoryPool* pool,
                      unique_ptr[ORCFileReader]* reader)
 
+        CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()
+
         CStatus ReadSchema(shared_ptr[CSchema]* out)
 
         CStatus ReadStripe(int64_t stripe, shared_ptr[CRecordBatch]* out)
diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index e56a62d8def..d0457203446 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -27,11 +27,13 @@ from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport (check_status, _Weakrefable,
                           MemoryPool, maybe_unbox_memory_pool,
                           Schema, pyarrow_wrap_schema,
+                          KeyValueMetadata,
                           pyarrow_wrap_batch,
                           RecordBatch,
                           Table,
                           pyarrow_wrap_table,
                           pyarrow_unwrap_schema,
+                          pyarrow_wrap_metadata,
                           pyarrow_unwrap_table,
                           get_reader,
                           get_writer)
@@ -57,6 +59,24 @@ cdef class ORCReader(_Weakrefable):
             check_status(ORCFileReader.Open(rd_handle, self.allocator,
                                             &self.reader))
 
+    def metadata(self):
+        """
+        The arrow metadata for this file.
+
+        Returns
+        -------
+        metadata : pyarrow.KeyValueMetadata
+        """
+        cdef:
+            shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
+
+        with nogil:
+            sp_arrow_metadata = GetResultValue(
+                deref(self.reader).ReadMetadata()
+            )
+
+        return pyarrow_wrap_metadata(sp_arrow_metadata)
+
     def schema(self):
         """
         The arrow schema for this file.
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 13af5a2376a..20aaa31fc20 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -75,6 +75,11 @@ def __init__(self, source):
         self.reader.open(source)
         self._column_index_lookup = _schema_to_indices(self.schema)
 
+    @property
+    def metadata(self):
+        """The file metadata, as an arrow KeyValueMetadata"""
+        return self.reader.metadata()
+
     @property
     def schema(self):
         """The file schema, as an arrow schema"""
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index e71c4529c04..c9add765552 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -178,7 +178,8 @@ def test_orcfile_readwrite():
     table = pa.table({"int64": a, "utf8": b})
     orc.write_table(table, buffer_output_stream)
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
-    output_table = orc.ORCFile(buffer_reader).read()
+    orc_file = orc.ORCFile(buffer_reader)
+    output_table = orc_file.read()
     assert table.equals(output_table)
 
     # deprecated keyword order
@@ -186,5 +187,6 @@ def test_orcfile_readwrite():
     with pytest.warns(FutureWarning):
         orc.write_table(buffer_output_stream, table)
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
-    output_table = orc.ORCFile(buffer_reader).read()
+    orc_file = orc.ORCFile(buffer_reader)
+    output_table = orc_file.read()
     assert table.equals(output_table)

From 26de76e27e2a67f44128ca5946f3e0737ce39de7 Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Thu, 27 May 2021 17:53:42 +0200
Subject: [PATCH 306/719] ARROW-12675: [C++] CSV parsing report row on which
 error occurred

For serial CSV readers track the absolute row number and report it in errors encountered during parsing or converting.

I did try to get row numbers for the parallel reader but the only way I thought that could work would be to add delimiter counting to the Chunker but that seemed to add more complexity than I wanted to.

Closes #10321 from n3world/ARROW-12675-report_rows

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/parser.cc       | 58 +++++++++++++++++++------
 cpp/src/arrow/csv/parser.h        | 22 +++++++---
 cpp/src/arrow/csv/parser_test.cc  | 71 +++++++++++++++++++++++++++---
 cpp/src/arrow/csv/reader.cc       | 37 ++++++++++++----
 cpp/src/arrow/dataset/file_csv.cc |  2 +-
 python/pyarrow/tests/test_csv.py  | 72 +++++++++++++++++++++++++++++--
 6 files changed, 223 insertions(+), 39 deletions(-)

diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index 07e561276fa..446f36a4ee5 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -35,14 +35,24 @@ using detail::ParsedValueDesc;
 
 namespace {
 
-Status ParseError(const char* message) {
-  return Status::Invalid("CSV parse error: ", message);
+template <typename... Args>
+Status ParseError(Args&&... args) {
+  return Status::Invalid("CSV parse error: ", std::forward<Args>(args)...);
 }
 
-Status MismatchingColumns(int32_t expected, int32_t actual) {
-  char s[50];
-  snprintf(s, sizeof(s), "Expected %d columns, got %d", expected, actual);
-  return ParseError(s);
+Status MismatchingColumns(int32_t expected, int32_t actual, int64_t row_num,
+                          util::string_view row) {
+  std::string ellipse;
+  if (row.length() > 100) {
+    row = row.substr(0, 96);
+    ellipse = " ...";
+  }
+  if (row_num < 0) {
+    return ParseError("Expected ", expected, " columns, got ", actual, ": ", row,
+                      ellipse);
+  }
+  return ParseError("Row #", row_num, ": Expected ", expected, " columns, got ", actual,
+                    ": ", row, ellipse);
 }
 
 inline bool IsControlChar(uint8_t c) { return c < ' '; }
@@ -173,17 +183,24 @@ class PresizedValueDescWriter : public ValueDescWriter<PresizedValueDescWriter>
 class BlockParserImpl {
  public:
   BlockParserImpl(MemoryPool* pool, ParseOptions options, int32_t num_cols,
-                  int32_t max_num_rows)
-      : pool_(pool), options_(options), max_num_rows_(max_num_rows), batch_(num_cols) {}
+                  int64_t first_row, int32_t max_num_rows)
+      : pool_(pool),
+        options_(options),
+        first_row_(first_row),
+        max_num_rows_(max_num_rows),
+        batch_(num_cols) {}
 
   const DataBatch& parsed_batch() const { return batch_; }
 
+  int64_t first_row_num() const { return first_row_; }
+
   template <typename SpecializedOptions, typename ValueDescWriter, typename DataWriter>
   Status ParseLine(ValueDescWriter* values_writer, DataWriter* parsed_writer,
                    const char* data, const char* data_end, bool is_final,
                    const char** out_data) {
     int32_t num_cols = 0;
     char c;
+    const auto start = data;
 
     DCHECK_GT(data_end, data);
 
@@ -299,7 +316,17 @@ class BlockParserImpl {
       if (batch_.num_cols_ == -1) {
         batch_.num_cols_ = num_cols;
       } else {
-        return MismatchingColumns(batch_.num_cols_, num_cols);
+        // Find the end of the line without newline or carriage return
+        auto end = data;
+        if (*(end - 1) == '\n') {
+          --end;
+        }
+        if (*(end - 1) == '\r') {
+          --end;
+        }
+        return MismatchingColumns(batch_.num_cols_, num_cols,
+                                  first_row_ < 0 ? -1 : first_row_ + batch_.num_rows_,
+                                  util::string_view(start, end - start));
       }
     }
     ++batch_.num_rows_;
@@ -481,6 +508,7 @@ class BlockParserImpl {
  protected:
   MemoryPool* pool_;
   const ParseOptions options_;
+  const int64_t first_row_;
   // The maximum number of rows to parse from a block
   int32_t max_num_rows_;
 
@@ -490,12 +518,14 @@ class BlockParserImpl {
   DataBatch batch_;
 };
 
-BlockParser::BlockParser(ParseOptions options, int32_t num_cols, int32_t max_num_rows)
-    : BlockParser(default_memory_pool(), options, num_cols, max_num_rows) {}
+BlockParser::BlockParser(ParseOptions options, int32_t num_cols, int64_t first_row,
+                         int32_t max_num_rows)
+    : BlockParser(default_memory_pool(), options, num_cols, first_row, max_num_rows) {}
 
 BlockParser::BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_cols,
-                         int32_t max_num_rows)
-    : impl_(new BlockParserImpl(pool, std::move(options), num_cols, max_num_rows)) {}
+                         int64_t first_row, int32_t max_num_rows)
+    : impl_(new BlockParserImpl(pool, std::move(options), num_cols, first_row,
+                                max_num_rows)) {}
 
 BlockParser::~BlockParser() {}
 
@@ -519,6 +549,8 @@ Status BlockParser::ParseFinal(util::string_view data, uint32_t* out_size) {
 
 const DataBatch& BlockParser::parsed_batch() const { return impl_->parsed_batch(); }
 
+int64_t BlockParser::first_row_num() const { return impl_->first_row_num(); }
+
 int32_t SkipRows(const uint8_t* data, uint32_t size, int32_t num_rows,
                  const uint8_t** out_data) {
   const auto end = data + size;
diff --git a/cpp/src/arrow/csv/parser.h b/cpp/src/arrow/csv/parser.h
index 4fcc52fb3a6..ffc735c228f 100644
--- a/cpp/src/arrow/csv/parser.h
+++ b/cpp/src/arrow/csv/parser.h
@@ -63,19 +63,26 @@ class ARROW_EXPORT DataBatch {
   uint32_t num_bytes() const { return parsed_size_; }
 
   template <typename Visitor>
-  Status VisitColumn(int32_t col_index, Visitor&& visit) const {
+  Status VisitColumn(int32_t col_index, int64_t first_row, Visitor&& visit) const {
     using detail::ParsedValueDesc;
 
+    int64_t row = first_row;
     for (size_t buf_index = 0; buf_index < values_buffers_.size(); ++buf_index) {
       const auto& values_buffer = values_buffers_[buf_index];
       const auto values = reinterpret_cast<const ParsedValueDesc*>(values_buffer->data());
       const auto max_pos =
           static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc)) - 1;
-      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_) {
+      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_, ++row) {
         auto start = values[pos].offset;
         auto stop = values[pos + 1].offset;
         auto quoted = values[pos + 1].quoted;
-        ARROW_RETURN_NOT_OK(visit(parsed_ + start, stop - start, quoted));
+        Status status = visit(parsed_ + start, stop - start, quoted);
+        if (ARROW_PREDICT_FALSE(!status.ok())) {
+          if (first_row >= 0) {
+            status = status.WithMessage("Row #", row, ": ", status.message());
+          }
+          ARROW_RETURN_NOT_OK(status);
+        }
       }
     }
     return Status::OK();
@@ -134,9 +141,9 @@ constexpr int32_t kMaxParserNumRows = 100000;
 class ARROW_EXPORT BlockParser {
  public:
   explicit BlockParser(ParseOptions options, int32_t num_cols = -1,
-                       int32_t max_num_rows = kMaxParserNumRows);
+                       int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
   explicit BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_cols = -1,
-                       int32_t max_num_rows = kMaxParserNumRows);
+                       int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
   ~BlockParser();
 
   /// \brief Parse a block of data
@@ -167,6 +174,8 @@ class ARROW_EXPORT BlockParser {
   int32_t num_cols() const { return parsed_batch().num_cols(); }
   /// \brief Return the total size in bytes of parsed data
   uint32_t num_bytes() const { return parsed_batch().num_bytes(); }
+  /// \brief Return the row number of the first row in the block or -1 if unsupported
+  int64_t first_row_num() const;
 
   /// \brief Visit parsed values in a column
   ///
@@ -174,7 +183,8 @@ class ARROW_EXPORT BlockParser {
   /// Status(const uint8_t* data, uint32_t size, bool quoted)
   template <typename Visitor>
   Status VisitColumn(int32_t col_index, Visitor&& visit) const {
-    return parsed_batch().VisitColumn(col_index, std::forward<Visitor>(visit));
+    return parsed_batch().VisitColumn(col_index, first_row_num(),
+                                      std::forward<Visitor>(visit));
   }
 
   template <typename Visitor>
diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc
index 6414b379804..67cf4226a7a 100644
--- a/cpp/src/arrow/csv/parser_test.cc
+++ b/cpp/src/arrow/csv/parser_test.cc
@@ -20,6 +20,7 @@
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "arrow/csv/options.h"
@@ -295,7 +296,7 @@ TEST(BlockParser, Newlines) {
 
 TEST(BlockParser, MaxNumRows) {
   auto csv = MakeCSVData({"a\n", "b\n", "c\n", "d\n"});
-  BlockParser parser(ParseOptions::Defaults(), -1, 3 /* max_num_rows */);
+  BlockParser parser(ParseOptions::Defaults(), -1, 0, 3 /* max_num_rows */);
 
   AssertParsePartial(parser, csv, 6);
   AssertColumnsEq(parser, {{"a", "b", "c"}});
@@ -536,22 +537,37 @@ TEST(BlockParser, QuotesSpecial) {
 TEST(BlockParser, MismatchingNumColumns) {
   uint32_t out_size;
   {
-    BlockParser parser(ParseOptions::Defaults());
+    BlockParser parser(ParseOptions::Defaults(), -1, 0 /* first_row */);
     auto csv = MakeCSVData({"a,b\nc\n"});
     Status st = Parse(parser, csv, &out_size);
-    ASSERT_RAISES(Invalid, st);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("CSV parse error: Row #1: Expected 2 columns, got 1: c"), st);
   }
   {
-    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */);
+    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */, 0 /* first_row */);
     auto csv = MakeCSVData({"a\n"});
     Status st = Parse(parser, csv, &out_size);
-    ASSERT_RAISES(Invalid, st);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("CSV parse error: Row #0: Expected 2 columns, got 1: a"), st);
   }
   {
-    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */);
+    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */, 50 /* first_row */);
     auto csv = MakeCSVData({"a,b,c\n"});
     Status st = Parse(parser, csv, &out_size);
-    ASSERT_RAISES(Invalid, st);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        testing::HasSubstr("CSV parse error: Row #50: Expected 2 columns, got 3: a,b,c"),
+        st);
+  }
+  // No row number
+  {
+    BlockParser parser(ParseOptions::Defaults(), 2 /* num_cols */, -1);
+    auto csv = MakeCSVData({"a\n"});
+    Status st = Parse(parser, csv, &out_size);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, testing::HasSubstr("CSV parse error: Expected 2 columns, got 1: a"), st);
   }
 }
 
@@ -623,5 +639,46 @@ TEST(BlockParser, QuotedEscape) {
   }
 }
 
+TEST(BlockParser, RowNumberAppendedToError) {
+  auto options = ParseOptions::Defaults();
+  auto csv = "a,b,c\nd,e,f\ng,h,i\n";
+  {
+    BlockParser parser(options, -1, 0);
+    ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, csv));
+    int row = 0;
+    auto status = parser.VisitColumn(
+        0, [row](const uint8_t* data, uint32_t size, bool quoted) mutable -> Status {
+          return ++row == 2 ? Status::Invalid("Bad value") : Status::OK();
+        });
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Row #1: Bad value"),
+                                    status);
+  }
+
+  {
+    BlockParser parser(options, -1, 100);
+    ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, csv));
+    int row = 0;
+    auto status = parser.VisitColumn(
+        0, [row](const uint8_t* data, uint32_t size, bool quoted) mutable -> Status {
+          return ++row == 3 ? Status::Invalid("Bad value") : Status::OK();
+        });
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Row #102: Bad value"),
+                                    status);
+  }
+
+  // No first row specified should not append row information
+  {
+    BlockParser parser(options, -1, -1);
+    ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, csv));
+    int row = 0;
+    auto status = parser.VisitColumn(
+        0, [row](const uint8_t* data, uint32_t size, bool quoted) mutable -> Status {
+          return ++row == 3 ? Status::Invalid("Bad value") : Status::OK();
+        });
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::Not(testing::HasSubstr("Row")),
+                                    status);
+  }
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index f05f8cac9a9..7a12cfea943 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -319,11 +319,13 @@ class ReaderMixin {
  public:
   ReaderMixin(io::IOContext io_context, std::shared_ptr<io::InputStream> input,
               const ReadOptions& read_options, const ParseOptions& parse_options,
-              const ConvertOptions& convert_options)
+              const ConvertOptions& convert_options, bool count_rows)
       : io_context_(std::move(io_context)),
         read_options_(read_options),
         parse_options_(parse_options),
         convert_options_(convert_options),
+        count_rows_(count_rows),
+        num_rows_seen_(count_rows_ ? 1 : -1),
         input_(std::move(input)) {}
 
  protected:
@@ -344,11 +346,15 @@ class ReaderMixin {
             " rows from CSV file, "
             "either file is too short or header is larger than block size");
       }
+      if (count_rows_) {
+        num_rows_seen_ = num_skipped_rows;
+      }
     }
 
     if (read_options_.column_names.empty()) {
       // Parse one row (either to read column names or to know the number of columns)
-      BlockParser parser(io_context_.pool(), parse_options_, num_csv_cols_, 1);
+      BlockParser parser(io_context_.pool(), parse_options_, num_csv_cols_,
+                         num_rows_seen_, 1);
       uint32_t parsed_size = 0;
       RETURN_NOT_OK(parser.Parse(
           util::string_view(reinterpret_cast<const char*>(data), data_end - data),
@@ -374,6 +380,9 @@ class ReaderMixin {
         DCHECK_EQ(static_cast<size_t>(parser.num_cols()), column_names_.size());
         // Skip parsed header row
         data += parsed_size;
+        if (count_rows_) {
+          ++num_rows_seen_;
+        }
       }
     } else {
       column_names_ = read_options_.column_names;
@@ -466,8 +475,8 @@ class ReaderMixin {
                             const std::shared_ptr<Buffer>& block, int64_t block_index,
                             bool is_final) {
     static constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
-    auto parser = std::make_shared<BlockParser>(io_context_.pool(), parse_options_,
-                                                num_csv_cols_, max_num_rows);
+    auto parser = std::make_shared<BlockParser>(
+        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows);
 
     std::shared_ptr<Buffer> straddling;
     std::vector<util::string_view> views;
@@ -490,6 +499,9 @@ class ReaderMixin {
     } else {
       RETURN_NOT_OK(parser->Parse(views, &parsed_size));
     }
+    if (count_rows_) {
+      num_rows_seen_ += parser->num_rows();
+    }
     return ParseResult{std::move(parser), static_cast<int64_t>(parsed_size)};
   }
 
@@ -500,6 +512,10 @@ class ReaderMixin {
 
   // Number of columns in the CSV file
   int32_t num_csv_cols_ = -1;
+  // Whether num_rows_seen_ tracks the number of rows seen in the CSV being parsed
+  bool count_rows_;
+  // Number of rows seen in the csv. Not used if count_rows is false
+  int64_t num_rows_seen_;
   // Column names in the CSV file
   std::vector<std::string> column_names_;
   ConversionSchema conversion_schema_;
@@ -588,9 +604,9 @@ class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
   BaseStreamingReader(io::IOContext io_context, Executor* cpu_executor,
                       std::shared_ptr<io::InputStream> input,
                       const ReadOptions& read_options, const ParseOptions& parse_options,
-                      const ConvertOptions& convert_options)
+                      const ConvertOptions& convert_options, bool count_rows)
       : ReaderMixin(io_context, std::move(input), read_options, parse_options,
-                    convert_options),
+                    convert_options, count_rows),
         cpu_executor_(cpu_executor) {}
 
   virtual Future<std::shared_ptr<csv::StreamingReader>> Init() = 0;
@@ -889,8 +905,9 @@ class AsyncThreadedTableReader
                            const ReadOptions& read_options,
                            const ParseOptions& parse_options,
                            const ConvertOptions& convert_options, Executor* cpu_executor)
+      // Count rows is currently not supported during parallel read
       : BaseTableReader(std::move(io_context), input, read_options, parse_options,
-                        convert_options),
+                        convert_options, /*count_rows=*/false),
         cpu_executor_(cpu_executor) {}
 
   ~AsyncThreadedTableReader() override {
@@ -992,7 +1009,8 @@ Result<std::shared_ptr<TableReader>> MakeTableReader(
         io_context, input, read_options, parse_options, convert_options, cpu_executor);
   } else {
     reader = std::make_shared<SerialTableReader>(io_context, input, read_options,
-                                                 parse_options, convert_options);
+                                                 parse_options, convert_options,
+                                                 /*count_rows=*/true);
   }
   RETURN_NOT_OK(reader->Init());
   return reader;
@@ -1004,7 +1022,8 @@ Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
     const ParseOptions& parse_options, const ConvertOptions& convert_options) {
   std::shared_ptr<BaseStreamingReader> reader;
   reader = std::make_shared<SerialStreamingReader>(
-      io_context, cpu_executor, input, read_options, parse_options, convert_options);
+      io_context, cpu_executor, input, read_options, parse_options, convert_options,
+      /*count_rows=*/true);
   return reader->Init();
 }
 
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 9c11afec264..5a16a52c544 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -51,7 +51,7 @@ Result<std::unordered_set<std::string>> GetColumnNames(
     const csv::ParseOptions& parse_options, util::string_view first_block,
     MemoryPool* pool) {
   uint32_t parsed_size = 0;
-  csv::BlockParser parser(pool, parse_options, /*num_cols=*/-1,
+  csv::BlockParser parser(pool, parse_options, /*num_cols=*/-1, /*first_row=*/1,
                           /*max_num_rows=*/1);
 
   RETURN_NOT_OK(parser.Parse(util::string_view{first_block}, &parsed_size));
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index fef1ac60f37..3fa9ae02e4d 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -52,11 +52,12 @@ def generate_col_names():
             yield first + second
 
 
-def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n'):
+def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
     arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
-    col_names = list(itertools.islice(generate_col_names(), num_cols))
     csv = io.StringIO()
-    csv.write(",".join(col_names))
+    col_names = list(itertools.islice(generate_col_names(), num_cols))
+    if write_names:
+        csv.write(",".join(col_names))
     csv.write(linesep)
     for row in arr.T:
         csv.write(",".join(map(str, row)))
@@ -974,6 +975,71 @@ def read_csv(self, *args, validate_full=True, **kwargs):
         table.validate(full=validate_full)
         return table
 
+    def test_row_numbers_in_errors(self):
+        """ Row numbers are only correctly counted in serial reads """
+        csv, _ = make_random_csv(4, 100, write_names=True)
+
+        read_options = ReadOptions()
+        read_options.block_size = len(csv) / 3
+        convert_options = ConvertOptions()
+        convert_options.column_types = {"a": pa.int32(), "d": pa.int32()}
+
+        # Test without skip_rows and column names in the csv
+        csv_bad_columns = csv + b"1,2\r\n"
+        with pytest.raises(pa.ArrowInvalid,
+                           match="Row #102: Expected 4 columns, got 2"):
+            self.read_bytes(csv_bad_columns, read_options=read_options,
+                            convert_options=convert_options)
+
+        csv_bad_type = csv + b"a,b,c,d\r\n"
+        message = ("In CSV column #0: Row #102: " +
+                   "CSV conversion error to int32: invalid value 'a'")
+        with pytest.raises(pa.ArrowInvalid, match=message):
+            self.read_bytes(csv_bad_type, read_options=read_options,
+                            convert_options=convert_options)
+
+        long_row = (b"this is a long row" * 15) + b",3\r\n"
+        csv_bad_columns_long = csv + long_row
+        message = ("Row #102: Expected 4 columns, got 2: " +
+                   long_row[0:96].decode("utf-8") + " ...")
+        with pytest.raises(pa.ArrowInvalid, match=message):
+            self.read_bytes(csv_bad_columns_long, read_options=read_options,
+                            convert_options=convert_options)
+
+        # Test without skip_rows and column names not in the csv
+        csv, _ = make_random_csv(4, 100, write_names=False)
+        read_options.column_names = ["a", "b", "c", "d"]
+        csv_bad_columns = csv + b"1,2\r\n"
+        with pytest.raises(pa.ArrowInvalid,
+                           match="Row #101: Expected 4 columns, got 2"):
+            self.read_bytes(csv_bad_columns, read_options=read_options,
+                            convert_options=convert_options)
+
+        csv_bad_columns_long = csv + long_row
+        message = ("Row #101: Expected 4 columns, got 2: " +
+                   long_row[0:96].decode("utf-8") + " ...")
+        with pytest.raises(pa.ArrowInvalid, match=message):
+            self.read_bytes(csv_bad_columns_long, read_options=read_options,
+                            convert_options=convert_options)
+
+        csv_bad_type = csv + b"a,b,c,d\r\n"
+        message = ("In CSV column #0: Row #101: " +
+                   "CSV conversion error to int32: invalid value 'a'")
+        with pytest.raises(pa.ArrowInvalid, match=message):
+            self.read_bytes(csv_bad_type, read_options=read_options,
+                            convert_options=convert_options)
+
+        # Test with skip_rows and column names not in the csv
+        read_options.skip_rows = 23
+        with pytest.raises(pa.ArrowInvalid,
+                           match="Row #101: Expected 4 columns, got 2"):
+            self.read_bytes(csv_bad_columns, read_options=read_options,
+                            convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message):
+            self.read_bytes(csv_bad_type, read_options=read_options,
+                            convert_options=convert_options)
+
 
 class TestParallelCSVRead(BaseTestCSVRead, unittest.TestCase):
 

From d0de88d8384c7593fac1b1e82b276d4a0d364767 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Thu, 27 May 2021 17:55:20 +0200
Subject: [PATCH 307/719] PARQUET-1798: [C++] Review logic around automatic
 assignment of field_id's

Questions:

- This is my first PR in the parquet namespace, I'm not sure of all the special rules.
- The field ID generation doesn't happen on the `parquet::schema` -> `arrow::schema` phase but on the `parquet::format::schema` -> `parquet::schema` phase.  So in order to test I had to add `#include "generated/parquet_types.h"` to `arrow_schema_test.cc` and I wasn't sure if I was allowed to reference the `generated/*` files like that.
- This PR simply allows user specified field id's to be persisted.  Is that sufficient for PARQUET-1798 (the title is rather general) or should I open up a dedicated JIRA?

Closes #10289 from westonpace/feature/PARQUET-1798-field-id-assignment

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/arrow/arrow_schema_test.cc    | 138 ++++++++++++++++++
 cpp/src/parquet/arrow/reader.h                |   3 +
 cpp/src/parquet/arrow/schema.cc               |  42 +++++-
 cpp/src/parquet/arrow/writer.h                |   4 +
 cpp/src/parquet/schema.cc                     |  15 +-
 cpp/src/parquet/schema.h                      |   6 +-
 cpp/src/parquet/schema_test.cc                |   8 +-
 python/pyarrow/tests/parquet/test_metadata.py |  60 +++++---
 python/pyarrow/tests/test_extension_type.py   |   5 +-
 9 files changed, 236 insertions(+), 45 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index 4c5a24bcb9f..880eba7a1bc 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -26,7 +26,9 @@
 #include "parquet/arrow/schema.h"
 #include "parquet/file_reader.h"
 #include "parquet/schema.h"
+#include "parquet/schema_internal.h"
 #include "parquet/test_util.h"
+#include "parquet/thrift_internal.h"
 
 #include "arrow/array.h"
 #include "arrow/testing/gtest_util.h"
@@ -41,6 +43,7 @@ using ParquetType = parquet::Type;
 using parquet::ConvertedType;
 using parquet::LogicalType;
 using parquet::Repetition;
+using parquet::format::SchemaElement;
 using parquet::internal::LevelInfo;
 using parquet::schema::GroupNode;
 using parquet::schema::NodePtr;
@@ -1157,6 +1160,141 @@ TEST_F(TestConvertArrowSchema, ParquetFlatDecimals) {
   ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields));
 }
 
+class TestConvertRoundTrip : public ::testing::Test {
+ public:
+  ::arrow::Status RoundTripSchema(
+      const std::vector<std::shared_ptr<Field>>& fields,
+      std::shared_ptr<::parquet::ArrowWriterProperties> arrow_properties =
+          ::parquet::default_arrow_writer_properties()) {
+    arrow_schema_ = ::arrow::schema(fields);
+    std::shared_ptr<::parquet::WriterProperties> properties =
+        ::parquet::default_writer_properties();
+    RETURN_NOT_OK(ToParquetSchema(arrow_schema_.get(), *properties.get(),
+                                  *arrow_properties, &parquet_schema_));
+    ::parquet::schema::ToParquet(parquet_schema_->group_node(), &parquet_format_schema_);
+    auto parquet_schema = ::parquet::schema::FromParquet(parquet_format_schema_);
+    return FromParquetSchema(parquet_schema.get(), &result_schema_);
+  }
+
+ protected:
+  std::shared_ptr<::arrow::Schema> arrow_schema_;
+  std::shared_ptr<SchemaDescriptor> parquet_schema_;
+  std::vector<SchemaElement> parquet_format_schema_;
+  std::shared_ptr<::arrow::Schema> result_schema_;
+};
+
+int GetFieldId(const ::arrow::Field& field) {
+  if (field.metadata() == nullptr) {
+    return -1;
+  }
+  auto maybe_field = field.metadata()->Get("PARQUET:field_id");
+  if (!maybe_field.ok()) {
+    return -1;
+  }
+  return std::stoi(maybe_field.ValueOrDie());
+}
+
+void GetFieldIdsDfs(const ::arrow::FieldVector& fields, std::vector<int>* field_ids) {
+  for (const auto& field : fields) {
+    field_ids->push_back(GetFieldId(*field));
+    GetFieldIdsDfs(field->type()->fields(), field_ids);
+  }
+}
+
+std::vector<int> GetFieldIdsDfs(const ::arrow::FieldVector& fields) {
+  std::vector<int> field_ids;
+  GetFieldIdsDfs(fields, &field_ids);
+  return field_ids;
+}
+
+std::vector<int> GetParquetFieldIdsHelper(const parquet::schema::Node* node) {
+  std::vector<int> field_ids;
+  field_ids.push_back(node->field_id());
+  if (node->is_group()) {
+    const GroupNode* group_node = static_cast<const GroupNode*>(node);
+    for (int i = 0; i < group_node->field_count(); i++) {
+      for (auto id : GetParquetFieldIdsHelper(group_node->field(i).get())) {
+        field_ids.push_back(id);
+      }
+    }
+  }
+  return field_ids;
+}
+
+std::vector<int> GetParquetFieldIds(std::shared_ptr<SchemaDescriptor> parquet_schema) {
+  return GetParquetFieldIdsHelper(
+      static_cast<const parquet::schema::Node*>(parquet_schema->group_node()));
+}
+
+std::vector<int> GetThriftFieldIds(
+    const std::vector<SchemaElement>& parquet_format_schema) {
+  std::vector<int> field_ids;
+  for (const auto& element : parquet_format_schema) {
+    field_ids.push_back(element.field_id);
+  }
+  return field_ids;
+}
+
+TEST_F(TestConvertRoundTrip, FieldIdMissingIfNotSpecified) {
+  std::vector<std::shared_ptr<Field>> arrow_fields;
+  arrow_fields.push_back(::arrow::field("simple", ::arrow::int32(), false));
+  /// { "nested": { "outer": { "inner" }, "sibling" } }
+  arrow_fields.push_back(::arrow::field(
+      "nested",
+      ::arrow::struct_({::arrow::field("outer", ::arrow::struct_({::arrow::field(
+                                                    "inner", ::arrow::utf8())})),
+                        ::arrow::field("sibling", ::arrow::date32())}),
+      false));
+
+  ASSERT_OK(RoundTripSchema(arrow_fields));
+  auto field_ids = GetFieldIdsDfs(result_schema_->fields());
+  for (int actual_id : field_ids) {
+    ASSERT_EQ(actual_id, -1);
+  }
+  auto parquet_field_ids = GetParquetFieldIds(parquet_schema_);
+  for (int actual_id : parquet_field_ids) {
+    ASSERT_EQ(actual_id, -1);
+  }
+  // In our unit test a "not set" thrift field has a value of 0
+  auto thrift_field_ids = GetThriftFieldIds(parquet_format_schema_);
+  for (int actual_id : thrift_field_ids) {
+    ASSERT_EQ(actual_id, 0);
+  }
+}
+
+std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
+  return ::arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
+}
+
+TEST_F(TestConvertRoundTrip, FieldIdPreserveExisting) {
+  std::vector<std::shared_ptr<Field>> arrow_fields;
+  arrow_fields.push_back(
+      ::arrow::field("simple", ::arrow::int32(), /*nullable=*/true, FieldIdMetadata(2)));
+  /// { "nested": { "outer": { "inner" }, "sibling" }
+  arrow_fields.push_back(::arrow::field(
+      "nested",
+      ::arrow::struct_({::arrow::field("outer", ::arrow::struct_({::arrow::field(
+                                                    "inner", ::arrow::utf8())})),
+                        ::arrow::field("sibling", ::arrow::date32(), /*nullable=*/true,
+                                       FieldIdMetadata(17))}),
+      false));
+
+  ASSERT_OK(RoundTripSchema(arrow_fields));
+  auto field_ids = GetFieldIdsDfs(result_schema_->fields());
+  auto expected_field_ids = std::vector<int>{2, -1, -1, -1, 17};
+  ASSERT_EQ(field_ids, expected_field_ids);
+
+  // Parquet has a field id for the schema itself
+  expected_field_ids = std::vector<int>{-1, 2, -1, -1, -1, 17};
+  auto parquet_ids = GetParquetFieldIds(parquet_schema_);
+  ASSERT_EQ(parquet_ids, expected_field_ids);
+
+  // In our unit test a "not set" thrift field has a value of 0
+  expected_field_ids = std::vector<int>{0, 2, 0, 0, 0, 17};
+  auto thrift_field_ids = GetThriftFieldIds(parquet_format_schema_);
+  ASSERT_EQ(thrift_field_ids, expected_field_ids);
+}
+
 TEST(InvalidSchema, ParquetNegativeDecimalScale) {
   const auto& type = ::arrow::decimal(23, -2);
   const auto& field = ::arrow::field("f0", type);
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 4e75b25a4ae..765e2f6d39a 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -65,6 +65,9 @@ class RowGroupReader;
 /// `FileReader::RowGroup(i)->Column(j)->Read` and receive an `arrow::Column`
 /// instance.
 ///
+/// The parquet format supports an optional integer field_id which can be assigned
+/// to a field.  Arrow will convert these field IDs to a metadata key named
+/// PARQUET:field_id on the appropriate field.
 // TODO(wesm): nested data does not always make sense with this user
 // interface unless you are only reading a single leaf node from a branch of
 // a table. For example:
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 75813da0b50..7610ce17605 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -30,6 +30,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/value_parsing.h"
 
 #include "parquet/arrow/schema_internal.h"
 #include "parquet/exception.h"
@@ -231,6 +232,40 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type,
   return Status::OK();
 }
 
+static constexpr char FIELD_ID_KEY[] = "PARQUET:field_id";
+
+std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
+  if (field_id >= 0) {
+    return ::arrow::key_value_metadata({FIELD_ID_KEY}, {std::to_string(field_id)});
+  } else {
+    return nullptr;
+  }
+}
+
+int FieldIdFromMetadata(
+    const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata) {
+  if (!metadata) {
+    return -1;
+  }
+  int key = metadata->FindKey(FIELD_ID_KEY);
+  if (key < 0) {
+    return -1;
+  }
+  std::string field_id_str = metadata->value(key);
+  int field_id;
+  if (::arrow::internal::ParseValue<::arrow::Int32Type>(
+          field_id_str.c_str(), field_id_str.length(), &field_id)) {
+    if (field_id < 0) {
+      // Thrift should convert any negative value to null but normalize to -1 here in case
+      // we later check this in logic.
+      return -1;
+    }
+    return field_id;
+  } else {
+    return -1;
+  }
+}
+
 Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
                    const WriterProperties& properties,
                    const ArrowWriterProperties& arrow_properties, NodePtr* out) {
@@ -387,8 +422,9 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     }
   }
 
+  int field_id = FieldIdFromMetadata(field->metadata());
   PARQUET_CATCH_NOT_OK(*out = PrimitiveNode::Make(name, repetition, logical_type, type,
-                                                  length));
+                                                  length, field_id));
 
   return Status::OK();
 }
@@ -453,10 +489,6 @@ bool HasStructListName(const GroupNode& node) {
   return name == "array" || name.ends_with("_tuple");
 }
 
-std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) {
-  return ::arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
-}
-
 Status GroupToStruct(const GroupNode& node, LevelInfo current_levels,
                      SchemaTreeContext* ctx, const SchemaField* parent,
                      SchemaField* out) {
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 60e373c664c..f31f3d03def 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -43,6 +43,10 @@ namespace arrow {
 ///
 /// Start a new RowGroup or Chunk with NewRowGroup.
 /// Write column-by-column the whole column chunk.
+///
+/// If PARQUET:field_id is present as a metadata key on a field, and the corresponding
+/// value is a nonnegative integer, then it will be used as the field_id in the parquet
+/// file.
 class PARQUET_EXPORT FileWriter {
  public:
   static ::arrow::Status Make(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index bfb295f0be3..cfa6bdb2912 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -406,10 +406,11 @@ void GroupNode::VisitConst(Node::ConstVisitor* visitor) const { visitor->Visit(t
 // Node construction from Parquet metadata
 
 std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element,
-                                             NodeVector fields, int field_id) {
+                                             NodeVector fields) {
   const format::SchemaElement* element =
       static_cast<const format::SchemaElement*>(opaque_element);
 
+  int field_id = -1;
   if (element->__isset.field_id) {
     field_id = element->field_id;
   }
@@ -431,11 +432,11 @@ std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element,
   return std::unique_ptr<Node>(group_node.release());
 }
 
-std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
-                                                 int field_id) {
+std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element) {
   const format::SchemaElement* element =
       static_cast<const format::SchemaElement*>(opaque_element);
 
+  int field_id = -1;
   if (element->__isset.field_id) {
     field_id = element->field_id;
   }
@@ -538,7 +539,7 @@ std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int lengt
   if (elements[0].num_children == 0) {
     if (length == 1) {
       // Degenerate case of Parquet file with no columns
-      return GroupNode::FromParquet(elements, {}, /*field_id=*/0);
+      return GroupNode::FromParquet(elements, {});
     } else {
       throw ParquetException(
           "Parquet schema had multiple nodes but root had no children");
@@ -549,19 +550,17 @@ std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int lengt
   // consistently set by implementations
 
   int pos = 0;
-  int current_id = 0;
 
   std::function<std::unique_ptr<Node>()> NextNode = [&]() {
     if (pos == length) {
       throw ParquetException("Malformed schema: not enough elements");
     }
     const SchemaElement& element = elements[pos++];
-    int field_id = current_id++;
     const void* opaque_element = static_cast<const void*>(&element);
 
     if (element.num_children == 0 && element.__isset.type) {
       // Leaf (primitive) node: always has a type
-      return PrimitiveNode::FromParquet(opaque_element, field_id);
+      return PrimitiveNode::FromParquet(opaque_element);
     } else {
       // Group node (may have 0 children, but cannot have a type)
       NodeVector fields;
@@ -569,7 +568,7 @@ std::unique_ptr<Node> Unflatten(const format::SchemaElement* elements, int lengt
         std::unique_ptr<Node> field = NextNode();
         fields.push_back(NodePtr(field.release()));
       }
-      return GroupNode::FromParquet(opaque_element, std::move(fields), field_id);
+      return GroupNode::FromParquet(opaque_element, std::move(fields));
     }
   };
   return NextNode();
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index bf119d6624e..7dcfa7d144e 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -200,8 +200,7 @@ typedef std::vector<NodePtr> NodeVector;
 // parameters)
 class PARQUET_EXPORT PrimitiveNode : public Node {
  public:
-  // The field_id here is the default to use if it is not set in the SchemaElement
-  static std::unique_ptr<Node> FromParquet(const void* opaque_element, int field_id = -1);
+  static std::unique_ptr<Node> FromParquet(const void* opaque_element);
 
   // A field_id -1 (or any negative value) will be serialized as null in Thrift
   static inline NodePtr Make(const std::string& name, Repetition::type repetition,
@@ -266,9 +265,8 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
 
 class PARQUET_EXPORT GroupNode : public Node {
  public:
-  // The field_id here is the default to use if it is not set in the SchemaElement
   static std::unique_ptr<Node> FromParquet(const void* opaque_element,
-                                           NodeVector fields = {}, int field_id = -1);
+                                           NodeVector fields = {});
 
   // A field_id -1 (or any negative value) will be serialized as null in Thrift
   static inline NodePtr Make(const std::string& name, Repetition::type repetition,
diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
index 43760d34ab4..703bac81086 100644
--- a/cpp/src/parquet/schema_test.cc
+++ b/cpp/src/parquet/schema_test.cc
@@ -121,7 +121,7 @@ class TestPrimitiveNode : public ::testing::Test {
   }
 
   void Convert(const format::SchemaElement* element) {
-    node_ = PrimitiveNode::FromParquet(element, field_id_);
+    node_ = PrimitiveNode::FromParquet(element);
     ASSERT_TRUE(node_->is_primitive());
     prim_node_ = static_cast<const PrimitiveNode*>(node_.get());
   }
@@ -1728,7 +1728,7 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   node->ToParquet(&string_intermediary);
   // ... corrupt the Thrift intermediary ....
   string_intermediary.logicalType.__isset.STRING = false;
-  ASSERT_ANY_THROW(node = PrimitiveNode::FromParquet(&string_intermediary, 1));
+  ASSERT_ANY_THROW(node = PrimitiveNode::FromParquet(&string_intermediary));
 
   // Invalid TimeUnit in deserialized TimeLogicalType ...
   node = PrimitiveNode::Make("time", Repetition::REQUIRED,
@@ -1738,7 +1738,7 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   node->ToParquet(&time_intermediary);
   // ... corrupt the Thrift intermediary ....
   time_intermediary.logicalType.TIME.unit.__isset.NANOS = false;
-  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&time_intermediary, 1));
+  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&time_intermediary));
 
   // Invalid TimeUnit in deserialized TimestampLogicalType ...
   node = PrimitiveNode::Make(
@@ -1748,7 +1748,7 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   node->ToParquet(&timestamp_intermediary);
   // ... corrupt the Thrift intermediary ....
   timestamp_intermediary.logicalType.TIMESTAMP.unit.__isset.NANOS = false;
-  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&timestamp_intermediary, 1));
+  ASSERT_ANY_THROW(PrimitiveNode::FromParquet(&timestamp_intermediary));
 }
 
 struct SchemaElementConstructionArguments {
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 5679b8a6eb2..4c310661fe9 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -291,10 +291,27 @@ def test_parquet_write_disable_statistics(tempdir):
 
 def test_field_id_metadata():
     # ARROW-7080
-    table = pa.table([pa.array([1], type='int32'),
-                      pa.array([[]], type=pa.list_(pa.int32())),
-                      pa.array([b'boo'], type='binary')],
-                     ['f0', 'f1', 'f2'])
+    field_id = b'PARQUET:field_id'
+    inner = pa.field('inner', pa.int32(), metadata={field_id: b'100'})
+    middle = pa.field('middle', pa.struct(
+        [inner]), metadata={field_id: b'101'})
+    fields = [
+        pa.field('basic', pa.int32(), metadata={
+                 b'other': b'abc', field_id: b'1'}),
+        pa.field(
+            'list',
+            pa.list_(pa.field('list-inner', pa.int32(),
+                              metadata={field_id: b'10'})),
+            metadata={field_id: b'11'}),
+        pa.field('struct', pa.struct([middle]), metadata={field_id: b'102'}),
+        pa.field('no-metadata', pa.int32()),
+        pa.field('non-integral-field-id', pa.int32(),
+                 metadata={field_id: b'xyz'}),
+        pa.field('negative-field-id', pa.int32(),
+                 metadata={field_id: b'-1000'})
+    ]
+    arrs = [[] for _ in fields]
+    table = pa.table(arrs, schema=pa.schema(fields))
 
     bio = pa.BufferOutputStream()
     pq.write_table(table, bio)
@@ -303,28 +320,29 @@ def test_field_id_metadata():
     pf = pq.ParquetFile(pa.BufferReader(contents))
     schema = pf.schema_arrow
 
-    # Expected Parquet schema for reference
-    #
-    # required group field_id=0 schema {
-    #   optional int32 field_id=1 f0;
-    #   optional group field_id=2 f1 (List) {
-    #     repeated group field_id=3 list {
-    #       optional int32 field_id=4 item;
-    #     }
-    #   }
-    #   optional binary field_id=5 f2;
-    # }
-
-    field_name = b'PARQUET:field_id'
-    assert schema[0].metadata[field_name] == b'1'
+    assert schema[0].metadata[field_id] == b'1'
+    assert schema[0].metadata[b'other'] == b'abc'
 
     list_field = schema[1]
-    assert list_field.metadata[field_name] == b'2'
+    assert list_field.metadata[field_id] == b'11'
 
     list_item_field = list_field.type.value_field
-    assert list_item_field.metadata[field_name] == b'4'
+    assert list_item_field.metadata[field_id] == b'10'
+
+    struct_field = schema[2]
+    assert struct_field.metadata[field_id] == b'102'
+
+    struct_middle_field = struct_field.type[0]
+    assert struct_middle_field.metadata[field_id] == b'101'
+
+    struct_inner_field = struct_middle_field.type[0]
+    assert struct_inner_field.metadata[field_id] == b'100'
 
-    assert schema[2].metadata[field_name] == b'5'
+    assert schema[3].metadata is None
+    # Invalid input is passed through (ok) but does not
+    # have field_id in parquet (not tested)
+    assert schema[4].metadata[field_id] == b'xyz'
+    assert schema[5].metadata[field_id] == b'-1000'
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 8e9cb1a9300..ba8366a43c6 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -525,7 +525,7 @@ def test_parquet_period(tmpdir, registered_period_type):
     # When reading in, properly create extension type if it is registered
     result = pq.read_table(filename)
     assert result.schema.field("ext").type == period_type
-    assert result.schema.field("ext").metadata == {b'PARQUET:field_id': b'1'}
+    assert result.schema.field("ext").metadata == {}
     # Get the exact array class defined by the registered type.
     result_array = result.column("ext").chunk(0)
     assert type(result_array) is period_class
@@ -537,8 +537,7 @@ def test_parquet_period(tmpdir, registered_period_type):
     # The extension metadata is present for roundtripping.
     assert result.schema.field("ext").metadata == {
         b'ARROW:extension:metadata': b'freq=D',
-        b'ARROW:extension:name': b'test.period',
-        b'PARQUET:field_id': b'1',
+        b'ARROW:extension:name': b'test.period'
     }
 
 

From 809606df7d6d7e8877ba70d5ee7b995b2211b05d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 28 May 2021 05:28:44 +0900
Subject: [PATCH 308/719] ARROW-12898: [Release][C#] Fix package upload

* Download URL is wrong
* Downloaded packages aren't removed

Closes #10418 from kou/release-csharp

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-06-csharp.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev/release/post-06-csharp.sh b/dev/release/post-06-csharp.sh
index 84e0eec7c0f..2e816e46e49 100755
--- a/dev/release/post-06-csharp.sh
+++ b/dev/release/post-06-csharp.sh
@@ -47,12 +47,13 @@ for base_name in ${base_names[@]}; do
       --fail \
       --location \
       --remote-name \
-      https://apache.jfrog.io/artifactory/nuget/${version}/${path}
+      https://apache.jfrog.io/artifactory/arrow/nuget/${version}/${path}
   done
   dotnet nuget push \
     ${base_name}.nupkg \
     -k ${NUGET_API_KEY} \
     -s https://api.nuget.org/v3/index.json
+  rm -f ${base_name}.{nupkg,snupkg}
 done
 
 echo "Success! The released NuGet package is available here:"

From fe2d940faa3231a2729218c74723f03b77a8ef05 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 27 May 2021 15:41:10 -0500
Subject: [PATCH 309/719] ARROW-12758: [R] Add examples to more function
 documentation

Closes #10343 from thisisnic/ARROW-12758_examples

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/array-data.R                | 10 ++++-----
 r/R/array.R                     | 27 ++++++++++++++++++++---
 r/R/buffer.R                    | 14 ++++++++----
 r/R/chunked-array.R             | 27 +++++++++++++++++++----
 r/R/compute.R                   | 39 ++++++++++++++++++++++++++++-----
 r/R/ipc_stream.R                |  4 ++++
 r/R/type.R                      |  7 ++++++
 r/man/ArrayData.Rd              | 10 ++++-----
 r/man/ChunkedArray.Rd           | 28 +++++++++++++++++++----
 r/man/array.Rd                  | 28 ++++++++++++++++++++---
 r/man/buffer.Rd                 | 15 +++++++++----
 r/man/list_compute_functions.Rd |  5 +++++
 r/man/match_arrow.Rd            | 33 +++++++++++++++++++++++-----
 r/man/type.Rd                   |  8 +++++++
 r/man/value_counts.Rd           |  4 ++++
 r/man/write_to_raw.Rd           |  5 +++++
 16 files changed, 222 insertions(+), 42 deletions(-)

diff --git a/r/R/array-data.R b/r/R/array-data.R
index 08b09133361..99c24fdcf67 100644
--- a/r/R/array-data.R
+++ b/r/R/array-data.R
@@ -27,11 +27,11 @@
 #' ```
 #' data <- Array$create(x)$data()
 #'
-#' data$type()
-#' data$length()
-#' data$null_count()
-#' data$offset()
-#' data$buffers()
+#' data$type
+#' data$length
+#' data$null_count
+#' data$offset
+#' data$buffers
 #' ```
 #'
 #' @section Methods:
diff --git a/r/R/array.R b/r/R/array.R
index 1d63c5735a7..0f65743d44d 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -56,9 +56,9 @@
 #' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck
 #' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck
 #' - `$length()`: Size in the number of elements this array contains
-#' - `$offset()`: A relative position into another array's data, to enable zero-copy slicing
-#' - `$null_count()`: The number of null entries in the array
-#' - `$type()`: logical type of data
+#' - `$offset`: A relative position into another array's data, to enable zero-copy slicing
+#' - `$null_count`: The number of null entries in the array
+#' - `$type`: logical type of data
 #' - `$type_id()`: type id
 #' - `$Equals(other)` : is this array equal to `other`
 #' - `$ApproxEquals(other)` :
@@ -84,6 +84,27 @@
 #'
 #' @rdname array
 #' @name array
+#' @examples
+#' my_array <- Array$create(1:10)
+#' my_array$type
+#' my_array$cast(int8())
+#' 
+#' # Check if value is null; zero-indexed
+#' na_array <- Array$create(c(1:5, NA))
+#' na_array$IsNull(0)
+#' na_array$IsNull(5)
+#' na_array$IsValid(5)
+#' na_array$null_count
+#' 
+#' # zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+#' new_array <- na_array$Slice(5)
+#' new_array$offset
+#' 
+#' # Compare 2 arrays
+#' na_array2 = na_array
+#' na_array2 == na_array # element-wise comparison
+#' na_array2$Equals(na_array) # overall comparison 
+#' 
 #' @export
 Array <- R6Class("Array",
   inherit = ArrowDatum,
diff --git a/r/R/buffer.R b/r/R/buffer.R
index db61ed36d78..78c6dc666b8 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -25,13 +25,19 @@
 #' `buffer()` lets you create an `arrow::Buffer` from an R object
 #' @section Methods:
 #'
-#' - `$is_mutable()` :
-#' - `$ZeroPadding()` :
-#' - `$size()` :
-#' - `$capacity()`:
+#' - `$is_mutable` : is this buffer mutable?
+#' - `$ZeroPadding()` : zero bytes in padding, i.e. bytes between size and capacity
+#' - `$size` : size in memory, in bytes
+#' - `$capacity`: possible capacity, in bytes 
 #'
 #' @rdname buffer
 #' @name buffer
+#' @examples
+#' my_buffer <- buffer(c(1, 2, 3, 4))
+#' my_buffer$is_mutable
+#' my_buffer$ZeroPadding()
+#' my_buffer$size
+#' my_buffer$capacity
 #' @export
 #' @include arrow-package.R
 #' @include enums.R
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index a7f9c8f790c..d03db3047fd 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -45,10 +45,10 @@
 #'    used to rearrange the `ChunkedArray` in ascending or descending order
 #' - `$cast(target_type, safe = TRUE, options = cast_options(safe))`: Alter the
 #'    data in the array to change its type.
-#' - `$null_count()`: The number of null entries in the array
-#' - `$chunks()`: return a list of `Array`s
-#' - `$num_chunks()`: integer number of chunks in the `ChunkedArray`
-#' - `$type()`: logical type of data
+#' - `$null_count`: The number of null entries in the array
+#' - `$chunks`: return a list of `Array`s
+#' - `$num_chunks`: integer number of chunks in the `ChunkedArray`
+#' - `$type`: logical type of data
 #' - `$View(type)`: Construct a zero-copy view of this `ChunkedArray` with the
 #'    given type.
 #' - `$Validate()`: Perform any validation checks to determine obvious inconsistencies
@@ -57,6 +57,25 @@
 #' @rdname ChunkedArray
 #' @name ChunkedArray
 #' @seealso [Array]
+#' @examples
+#' # Pass items into chunked_array as separate objects to create chunks
+#' class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+#' class_scores$num_chunks
+#' 
+#' # When taking a Slice from a chunked_array, chunks are preserved
+#' class_scores$Slice(2, length = 5)
+#' 
+#' # You can combine Take and SortIndices to return a ChunkedArray with 1 chunk 
+#' # containing all values, ordered.
+#' class_scores$Take(class_scores$SortIndices(descending = TRUE))
+#' 
+#' # If you pass a list into chunked_array, you get a list of length 1
+#' list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+#' list_scores$num_chunks
+#' 
+#' # When constructing a ChunkedArray, the first chunk is used to infer type.
+#' doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+#' doubles$type
 #' @export
 ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
   public = list(
diff --git a/r/R/compute.R b/r/R/compute.R
index 35dbd63b90f..20a236901e4 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -83,6 +83,10 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
 #' @param pattern Optional regular expression to filter the function list
 #' @param ... Additional parameters passed to `grep()`
 #' @return A character vector of available Arrow C++ function names
+#' @examples
+#' list_compute_functions() 
+#' list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+#' list_compute_functions(pattern = "^is", invert = TRUE)
 #' @export
 list_compute_functions <- function(pattern = NULL, ...) {
   funcs <- compute__GetFunctionNames()
@@ -231,13 +235,35 @@ all.ArrowDatum <- function(..., na.rm = FALSE){
 #' `base::match()` is not a generic, so we can't just define Arrow methods for
 #' it. This function exposes the analogous functions in the Arrow C++ library.
 #'
-#' @param x `Array` or `ChunkedArray`
-#' @param table `Array`, `ChunkedArray`, or R vector lookup table.
+#' @param x `Scalar`, `Array` or `ChunkedArray`
+#' @param table `Scalar`, Array`, `ChunkedArray`, or R vector lookup table.
 #' @param ... additional arguments, ignored
-#' @return `match_arrow()` returns an `int32`-type `Array` of the same length
-#' as `x` with the (0-based) indexes into `table`. `is_in()` returns a
-#' `boolean`-type `Array` of the same length as `x` with values indicating
+#' @return `match_arrow()` returns an `int32`-type Arrow object of the same length
+#' and type as `x` with the (0-based) indexes into `table`. `is_in()` returns a
+#' `boolean`-type Arrow object of the same length and type as `x` with values indicating
 #' per element of `x` it it is present in `table`.
+#' @examples
+#' # note that the returned value is 0-indexed
+#' cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
+#' match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+#'
+#' is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+#' 
+#' # Although there are multiple matches, you are returned the index of the first 
+#' # match, as with the base R equivalent
+#' match(4, mtcars$cyl) # 1-indexed
+#' match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+#' 
+#' # If `x` contains multiple values, you are returned the indices of the first 
+#' # match for each value.
+#' match(c(4, 6, 8), mtcars$cyl)
+#' match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+#' 
+#' # Return type matches type of `x`
+#' is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+#' is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+#' is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+#' is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
 #' @export
 match_arrow <- function(x, table, ...) UseMethod("match_arrow")
 
@@ -273,6 +299,9 @@ is_in.ArrowDatum <- function(x, table, ...) {
 #' @param x `Array` or `ChunkedArray`
 #' @return A `StructArray` containing "values" (same type as `x`) and "counts"
 #' `Int64`.
+#' @examples
+#' cyl_vals <- Array$create(mtcars$cyl)
+#' value_counts(cyl_vals)
 #' @export
 value_counts <- function(x) {
   call_function("value_counts", x)
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 4f506f3332b..5ede18cd0e3 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -60,6 +60,10 @@ write_ipc_stream <- function(x, sink, ...) {
 #' @inheritParams write_feather
 #' @param format one of `c("stream", "file")`, indicating the IPC format to use
 #' @return A `raw` vector containing the bytes of the IPC serialized data.
+#' @examples
+#' # The default format is "stream"
+#' write_to_raw(mtcars)
+#' write_to_raw(mtcars, format = "file")
 #' @export
 write_to_raw <- function(x, format = c("stream", "file")) {
   sink <- BufferOutputStream$create()
diff --git a/r/R/type.R b/r/R/type.R
index fd7470ce88e..04b0a378e10 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -57,6 +57,13 @@ FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double"
 #' @param x an R vector
 #'
 #' @return an arrow logical type
+#' @examples
+#' type(1:10)
+#' type(1L:10L)
+#' type(c(1, 1.5, 2))
+#' type(c("A", "B", "C"))
+#' type(mtcars)
+#' type(Sys.Date())
 #' @export
 type <- function(x) UseMethod("type")
 
diff --git a/r/man/ArrayData.Rd b/r/man/ArrayData.Rd
index 24530c42317..383ab317d1e 100644
--- a/r/man/ArrayData.Rd
+++ b/r/man/ArrayData.Rd
@@ -11,11 +11,11 @@ inside an \code{arrow::Array}.
 \section{Usage}{
 \preformatted{data <- Array$create(x)$data()
 
-data$type()
-data$length()
-data$null_count()
-data$offset()
-data$buffers()
+data$type
+data$length
+data$null_count
+data$offset
+data$buffers
 }
 }
 
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index 90dd2e39e40..b0058bbac8f 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -42,10 +42,10 @@ logical vector or Arrow boolean-type \verb{(Chunked)Array} \code{i} is \code{TRU
 used to rearrange the \code{ChunkedArray} in ascending or descending order
 \item \verb{$cast(target_type, safe = TRUE, options = cast_options(safe))}: Alter the
 data in the array to change its type.
-\item \verb{$null_count()}: The number of null entries in the array
-\item \verb{$chunks()}: return a list of \code{Array}s
-\item \verb{$num_chunks()}: integer number of chunks in the \code{ChunkedArray}
-\item \verb{$type()}: logical type of data
+\item \verb{$null_count}: The number of null entries in the array
+\item \verb{$chunks}: return a list of \code{Array}s
+\item \verb{$num_chunks}: integer number of chunks in the \code{ChunkedArray}
+\item \verb{$type}: logical type of data
 \item \verb{$View(type)}: Construct a zero-copy view of this \code{ChunkedArray} with the
 given type.
 \item \verb{$Validate()}: Perform any validation checks to determine obvious inconsistencies
@@ -53,6 +53,26 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+# Pass items into chunked_array as separate objects to create chunks
+class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+class_scores$num_chunks
+
+# When taking a Slice from a chunked_array, chunks are preserved
+class_scores$Slice(2,length = 5)
+
+# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk 
+# containing all values, ordered.
+class_scores$Take(class_scores$SortIndices(descending = TRUE))
+
+# If you pass a list into chunked_array, you get a list of length 1
+list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2,8.3,8.4), c(10.0, 9.9, 9.8)))
+list_scores$num_chunks
+
+# When constructing a ChunkedArray, the first chunk is used to infer type.
+doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+doubles$type
+}
 \seealso{
 \link{Array}
 }
diff --git a/r/man/array.Rd b/r/man/array.Rd
index f65afe9fbc3..34f106c0cfa 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -54,9 +54,9 @@ a == a
 \item \verb{$IsNull(i)}: Return true if value at index is null. Does not boundscheck
 \item \verb{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck
 \item \verb{$length()}: Size in the number of elements this array contains
-\item \verb{$offset()}: A relative position into another array's data, to enable zero-copy slicing
-\item \verb{$null_count()}: The number of null entries in the array
-\item \verb{$type()}: logical type of data
+\item \verb{$offset}: A relative position into another array's data, to enable zero-copy slicing
+\item \verb{$null_count}: The number of null entries in the array
+\item \verb{$type}: logical type of data
 \item \verb{$type_id()}: type id
 \item \verb{$Equals(other)} : is this array equal to \code{other}
 \item \verb{$ApproxEquals(other)} :
@@ -82,3 +82,25 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+my_array <- Array$create(1:10)
+my_array$type
+my_array$cast(int8())
+
+# Check if value is null; zero-indexed
+na_array <- Array$create(c(1:5, NA))
+na_array$IsNull(0)
+na_array$IsNull(5)
+na_array$IsValid(5)
+na_array$null_count
+
+# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+new_array <- na_array$Slice(5)
+new_array$offset
+
+# Compare 2 arrays
+na_array2 = na_array
+na_array2 == na_array # element-wise comparison
+na_array2$Equals(na_array) # overall comparison 
+
+}
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index 4a479b7d650..08d66ece5dc 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -26,10 +26,17 @@ contiguous memory with a particular size.
 \section{Methods}{
 
 \itemize{
-\item \verb{$is_mutable()} :
-\item \verb{$ZeroPadding()} :
-\item \verb{$size()} :
-\item \verb{$capacity()}:
+\item \verb{$is_mutable} : is this buffer mutable?
+\item \verb{$ZeroPadding()} : zero bytes in padding, i.e. bytes between size and capacity
+\item \verb{$size} : size in memory, in bytes
+\item \verb{$capacity}: possible capacity, in bytes
 }
 }
 
+\examples{
+my_buffer <- buffer(c(1, 2, 3, 4))
+my_buffer$is_mutable
+my_buffer$ZeroPadding()
+my_buffer$size
+my_buffer$capacity
+}
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index ba17688d833..18c2aa35fab 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -37,3 +37,8 @@ The package includes Arrow methods for many base R functions that can
 be called directly on Arrow objects, as well as some tidyverse-flavored versions
 available inside \code{dplyr} verbs.
 }
+\examples{
+list_compute_functions() 
+list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+list_compute_functions(pattern = "^is", invert = TRUE)
+}
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index 1460ba40926..9b863254d1a 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -10,19 +10,42 @@ match_arrow(x, table, ...)
 is_in(x, table, ...)
 }
 \arguments{
-\item{x}{\code{Array} or \code{ChunkedArray}}
+\item{x}{\code{Scalar}, \code{Array} or \code{ChunkedArray}}
 
-\item{table}{\code{Array}, \code{ChunkedArray}, or R vector lookup table.}
+\item{table}{\code{Scalar}, Array\verb{, }ChunkedArray`, or R vector lookup table.}
 
 \item{...}{additional arguments, ignored}
 }
 \value{
-\code{match_arrow()} returns an \code{int32}-type \code{Array} of the same length
-as \code{x} with the (0-based) indexes into \code{table}. \code{is_in()} returns a
-\code{boolean}-type \code{Array} of the same length as \code{x} with values indicating
+\code{match_arrow()} returns an \code{int32}-type Arrow object of the same length
+and type as \code{x} with the (0-based) indexes into \code{table}. \code{is_in()} returns a
+\code{boolean}-type Arrow object of the same length and type as \code{x} with values indicating
 per element of \code{x} it it is present in \code{table}.
 }
 \description{
 \code{base::match()} is not a generic, so we can't just define Arrow methods for
 it. This function exposes the analogous functions in the Arrow C++ library.
 }
+\examples{
+# note that the returned value is 0-indexed
+cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
+match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+
+is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+
+# Although there are multiple matches, you are returned the index of the first 
+# match, as with the base R equivalent
+match(4, mtcars$cyl) # 1-indexed
+match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+
+# If `x` contains multiple values, you are returned the indices of the first 
+# match for each value.
+match(c(4, 6, 8), mtcars$cyl)
+match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+
+# Return type matches type of `x`
+is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+}
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 2f85e4a6ac6..7ef8ea60ec0 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -15,3 +15,11 @@ an arrow logical type
 \description{
 infer the arrow Array type from an R vector
 }
+\examples{
+type(1:10)
+type(1L:10L)
+type(c(1,1.5,2))
+type(c("A", "B", "C"))
+type(mtcars)
+type(Sys.Date())
+}
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index 139af8edc63..e8023c2fd3f 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -16,3 +16,7 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
 \description{
 This function tabulates the values in the array and returns a table of counts.
 }
+\examples{
+cyl_vals <- Array$create(mtcars$cyl)
+value_counts(cyl_vals)
+}
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index 46af09a96e8..aa682c09a76 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -20,3 +20,8 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give
 This function wraps those so that you can serialize data to a buffer and
 access that buffer as a \code{raw} vector in R.
 }
+\examples{
+# The default format is "stream"
+write_to_raw(mtcars)
+write_to_raw(mtcars, format = "file")
+}

From ffaa372b2e1c64ec6e5a7dc1f3c4856042ec53d8 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 27 May 2021 16:18:41 -0500
Subject: [PATCH 310/719] ARROW-12569: [R] [CI]  Run revdep in CI

This runs reverse dependency checks using {revdepchecks}. The way that works is by installing a release version of arrow and the current development version (i.e. from the git checkout), and then runs checks on each of the reverse dependencies first with the release (called "old" in {revdepcheck}'s terms) and with the development version ("new" in {revdepcheck}'s terms). Then it compares the outputs and will only fail if there is a failure in the new check that is not in the old check.

I've customized the output a bit so that it prints any errors that come up in either (in the revdepcheck problems step) so we can more easily diagnose, but it will only fail if there are new errors.

One thing that I tried and was unable to do is to find a way to cache packages+info across runs. The github cache action will create a cache, but because of how they are run on crossbow (i.e. on different branches) the caches are never accessible in different runs. I've kept the cacheing step in for now, if we could find a way to (manually?) run this on the main branch like https://github.com/ursacomputing/crossbow/blob/master/.github/workflows/cache_vcpkg.yml before we use this heavily (i.e. likely only around a release) that would create a cache that could be used to speed up some of the jobs.

Closes #10345 from jonkeane/ARROW-12569-revdepcheck

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/scripts/r_revdepcheck.sh              | 87 ++++++++++++++++++++++++
 dev/tasks/r/github.linux.revdepcheck.yml | 77 +++++++++++++++++++++
 dev/tasks/tasks.yml                      |  4 ++
 docker-compose.yml                       | 25 +++++++
 4 files changed, 193 insertions(+)
 create mode 100755 ci/scripts/r_revdepcheck.sh
 create mode 100644 dev/tasks/r/github.linux.revdepcheck.yml

diff --git a/ci/scripts/r_revdepcheck.sh b/ci/scripts/r_revdepcheck.sh
new file mode 100755
index 00000000000..e2605911f93
--- /dev/null
+++ b/ci/scripts/r_revdepcheck.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+# cpp building dependencies
+apt install -y cmake
+
+# system dependencies needed for arrow's reverse dependencies
+apt install -y libxml2-dev \
+  libfontconfig1-dev \
+  libcairo2-dev \
+  libglpk-dev \
+  libmariadb-dev \
+  unixodbc-dev \
+  libpq-dev \
+  coinor-libsymphony-dev \
+  coinor-libcgl-dev \
+  coinor-symphony \
+  libzmq3-dev
+
+pushd ${source_dir}
+
+printenv
+
+: ${TEST_R_WITH_ARROW:=TRUE}
+export TEST_R_WITH_ARROW=$TEST_R_WITH_ARROW
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Set crancache dir so we can cache it
+export CRANCACHE_DIR="/arrow/.crancache"
+
+SCRIPT="
+    # We can't use RSPM binaries because we need source packages
+    options('repos' = c(CRAN = 'https://packagemanager.rstudio.com/all/latest'))
+    remotes::install_github('r-lib/revdepcheck')
+
+    # zoo is needed by RcisTarget tests, though only listed in enhances so not installed by revdepcheck
+    install.packages('zoo')
+
+    # actually run revdepcheck
+    revdepcheck::revdep_check(
+    quiet = FALSE,
+    timeout = as.difftime(120, units = 'mins'),
+    num_workers = 1,
+    env = c(
+        ARROW_R_DEV = '$ARROW_R_DEV',
+        LIBARROW_DOWNLOAD = TRUE,
+        LIBARROW_MINIMAL = FALSE,
+        revdepcheck::revdep_env_vars()
+    ))
+    revdepcheck::revdep_report(all = TRUE)
+
+    # Go through the summary and fail if any of the statuses include -
+    summary <- revdepcheck::revdep_summary()
+    failed <- lapply(summary, function(check) grepl('-', check[['status']]))
+
+    if (any(unlist(failed))) {
+      quit(status = 1)
+    }
+    "
+
+echo "$SCRIPT" | ${R_BIN} --no-save
+
+popd
diff --git a/dev/tasks/r/github.linux.revdepcheck.yml b/dev/tasks/r/github.linux.revdepcheck.yml
new file mode 100644
index 00000000000..80071171b75
--- /dev/null
+++ b/dev/tasks/r/github.linux.revdepcheck.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  r-versions:
+    name: "rstudio/r-base:latest-focal"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    env:
+      R_ORG: "rstudio"
+      R_IMAGE: "r-base"
+      R_TAG: "latest-focal"
+      ARROW_R_DEV: "TRUE"
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - name: Free Up Disk Space
+        shell: bash
+        run: arrow/ci/scripts/util_cleanup.sh
+      - name: Fetch Submodules and Tags
+        shell: bash
+        run: cd arrow && ci/scripts/util_checkout.sh
+      - name: Docker Pull
+        shell: bash
+        run: cd arrow && docker-compose pull --ignore-pull-failures r
+      - name: Docker Build
+        shell: bash
+        run: cd arrow && docker-compose build r-revdepcheck
+      - name: Docker Run
+        shell: bash
+        run: cd arrow && docker-compose run r-revdepcheck
+      - name: revdepcheck CRAN report
+        if: always()
+        shell: bash
+        run: cat arrow/r/revdep/cran.md
+      - name: revdepcheck failures
+        if: always()
+        shell: bash
+        run: cat arrow/r/revdep/failures.md
+      - name: revdepcheck problems
+        if: always()
+        shell: bash
+        run: cat arrow/r/revdep/problems.md
+      - name: Save the revdep output
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: revdepcheck-folder
+          path: arrow/r/revdep
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index b08b9888963..ca21496b913 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -929,6 +929,10 @@ tasks:
         UBUNTU: 18.04
       run: ubuntu-r-sanitizer
 
+  revdep-r-check:
+    ci: github
+    template: r/github.linux.revdepcheck.yml
+
   test-debian-10-go-1.15:
     ci: azure
     template: docker-tests/azure.linux.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index 20a743f8c31..a0605c23881 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -119,6 +119,7 @@ x-hierarchy:
   - ubuntu-r-valgrind
   - python-sdist
   - r
+  - r-revdepcheck
   # helper services
   - impala
   - postgres
@@ -1096,6 +1097,30 @@ services:
       /bin/bash -c "
         /arrow/ci/scripts/r_valgrind.sh /arrow"
 
+  r-revdepcheck:
+    # Usage:
+    #   docker-compose build r-revdepcheck
+    #   docker-compose run r-revdepcheck
+    image: ${REPO}:r-rstudio-r-base-4.0-focal-revdepcheck
+    build:
+      context: .
+      dockerfile: ci/docker/linux-r.dockerfile
+      cache_from:
+        - ${REPO}:r-rstudio-r-base-4.0-focal-revdepcheck
+      args:
+        base: rstudio/r-base:4.0-focal
+        r_dev: ${ARROW_R_DEV}
+    shm_size: *shm-size
+    environment:
+      LIBARROW_DOWNLOAD: "true"
+      LIBARROW_MINIMAL: "false"
+      ARROW_SOURCE_HOME: "/arrow"
+      ARROW_R_DEV: "true"
+    volumes: *ubuntu-volumes
+    command: >
+      /bin/bash -c "/arrow/ci/scripts/r_revdepcheck.sh /arrow"
+
+
 
   ################################# Go ########################################
 

From c0e78393a4b0c42d63dc40cc24edad1d3a10db63 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 27 May 2021 16:30:04 -0500
Subject: [PATCH 311/719] ARROW-12883: [R] [CI] version compatibility fails on
 R 4.1

Adjust the R version used to be able to install binary arrow packages from RSPM. Small adjustment to tests that doesn't require the order of attributes to be fixed (the order changed slightly in version 3.0.0)

Closes #10409 from jonkeane/ARROW-12883-version-compatibility

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .../r/github.linux.version.compatibility.yml  | 19 +++++++++++++------
 r/extra-tests/test-read-files.R               |  8 +++++---
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/dev/tasks/r/github.linux.version.compatibility.yml b/dev/tasks/r/github.linux.version.compatibility.yml
index 2f64227eb8d..18537499d44 100644
--- a/dev/tasks/r/github.linux.version.compatibility.yml
+++ b/dev/tasks/r/github.linux.version.compatibility.yml
@@ -68,18 +68,23 @@ jobs:
           path: arrow/r/extra-tests/files
 
   read-files:
-    name: "Read files with Arrow {{ '${{ matrix.old_arrow_version }}' }}"
+    name: "Read files with Arrow {{ '${{ matrix.config.old_arrow_version }}' }}"
     needs: [write-files]
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
-        old_arrow_version:
-          - "2.0.0"
-          - "1.0.1"
+        config:
+        # We use the R version that was released at the time of the arrow release in order
+        # to make sure we can download binaries from RSPM.
+        - { old_arrow_version: '4.0.0', r: '4.0' }
+        - { old_arrow_version: '3.0.0', r: '4.0' }
+        - { old_arrow_version: '2.0.0', r: '4.0' }
+        - { old_arrow_version: '1.0.1', r: '4.0' }
     env:
+      ARROW_R_DEV: "TRUE"
       RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
-      OLD_ARROW_VERSION: {{ '${{ matrix.old_arrow_version }}' }}
+      OLD_ARROW_VERSION: {{ '${{ matrix.config.old_arrow_version }}' }}
     steps:
       - name: Checkout Arrow
         run: |
@@ -88,10 +93,12 @@ jobs:
           git -C arrow checkout FETCH_HEAD
           git -C arrow submodule update --init --recursive
       - uses: r-lib/actions/setup-r@v1
+        with:
+          r-version: {{ '${{ matrix.config.r }}' }}
       - name: Install old Arrow
         run: |
           install.packages(c("remotes", "testthat"))
-          remotes::install_version("arrow",  "{{ '${{ matrix.old_arrow_version }}' }}")
+          remotes::install_version("arrow",  "{{ '${{ matrix.config.old_arrow_version }}' }}")
         shell: Rscript {0}
       - name: Setup our testing directory, copy only the tests to it.
         run: |
diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R
index 10e9f957920..a2453e2516e 100644
--- a/r/extra-tests/test-read-files.R
+++ b/r/extra-tests/test-read-files.R
@@ -35,7 +35,9 @@ test_that("Can see the metadata (parquet)", {
   df <- read_parquet(pq_file)
   expect_s3_class(df, "tbl")
 
-  expect_equal(
+  # expect_mapequal() instead of expect_equal() because there was an order change where
+  # `class` is located in version 3.0.0 and above.
+  expect_mapequal(
     attributes(df),
     list(
       names = letters[1:4],
@@ -78,7 +80,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) {
     df <- read_feather(feather_file)
     expect_s3_class(df, "tbl")
 
-    expect_equal(
+    expect_mapequal(
       attributes(df),
       list(
         names = letters[1:4],
@@ -137,7 +139,7 @@ test_that("Can see the metadata (stream)", {
 
   expect_s3_class(df, "tbl")
 
-  expect_equal(
+  expect_mapequal(
     attributes(df),
     list(
       names = letters[1:4],

From aa8086093df0959309e5afb11a6d05ac1b50edf5 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 27 May 2021 16:30:36 -0500
Subject: [PATCH 312/719] ARROW-12841: [R] Add examples to more function
 documentation - part 2

Closes #10368 from thisisnic/ARROW-12841_examples_part_2

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/compression.R           |  2 ++
 r/R/dataset-format.R        | 12 ++++++++++++
 r/R/dataset.R               | 24 ++++++++++++++++++++++++
 r/R/flight.R                |  2 ++
 r/R/ipc_stream.R            |  4 ++++
 r/R/scalar.R                | 24 ++++++++++++++++++++++++
 r/man/FileFormat.Rd         | 15 +++++++++++++++
 r/man/Scalar.Rd             | 27 +++++++++++++++++++++++++++
 r/man/codec_is_available.Rd |  3 +++
 r/man/load_flight_server.Rd |  3 +++
 r/man/open_dataset.Rd       | 27 +++++++++++++++++++++++++++
 r/man/write_ipc_stream.Rd   |  5 +++++
 12 files changed, 148 insertions(+)

diff --git a/r/R/compression.R b/r/R/compression.R
index ebd4c54cd82..bb051b8d535 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -63,6 +63,8 @@ Codec$create <- function(type = "gzip", compression_level = NA) {
 #' "zstd", "lz4", "lzo", or "bz2", case insensitive.
 #' @return Logical: is `type` available?
 #' @export
+#' @examples
+#' codec_is_available("gzip")
 codec_is_available <- function(type) {
   util___Codec__IsAvailable(compression_from_name(type))
 }
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index 854672b66a2..3259ff1077c 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -53,6 +53,18 @@
 #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`)
 #' @rdname FileFormat
 #' @name FileFormat
+#' @examplesIf arrow_with_dataset()
+#' ## Semi-colon delimited files
+#' # Set up directory for examples
+#' tf <- tempfile()
+#' dir.create(tf)
+#' on.exit(unlink(tf))
+#' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+#' 
+#' # Create FileFormat object
+#' format <- FileFormat$create(format = "text", delimiter = ";")
+#' 
+#' open_dataset(tf, format = format)
 #' @export
 FileFormat <- R6Class("FileFormat", inherit = ArrowObject,
   active = list(
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 745c39af068..8716ef0d5c5 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -69,6 +69,30 @@
 #' @export
 #' @seealso `vignette("dataset", package = "arrow")`
 #' @include arrow-package.R
+#' @examplesIf arrow_with_dataset()
+#' # Set up directory for examples
+#' tf <- tempfile()
+#' dir.create(tf)
+#' on.exit(unlink(tf))
+#' \dontrun{
+#' write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet"))
+#' write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet"))
+#' write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet"))
+#' 
+#' # You can specify a directory containing the files for your dataset and
+#' # open_dataset will scan all files in your directory.
+#' open_dataset(tf)
+#' 
+#' # You can also supply a vector of paths
+#' open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet")))
+#' }
+#' ## You must specify the file format if using a format other than parquet.
+#' write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv"))
+#' write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv"))
+#' # This line will results in errors when you try to work with the data
+#' \dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))}
+#' # This is the correct way to open a dataset containing CSVs
+#' open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") 
 open_dataset <- function(sources,
                          schema = NULL,
                          partitioning = hive_partition(),
diff --git a/r/R/flight.R b/r/R/flight.R
index 486c59a9e12..b3b354e82f6 100644
--- a/r/R/flight.R
+++ b/r/R/flight.R
@@ -21,6 +21,8 @@
 #' @param path file system path where the Python module is found. Default is
 #' to look in the `inst/` directory for included modules.
 #' @export
+#' @examples
+#' \dontrun{load_flight_server("demo_flight_server")}
 load_flight_server <- function(name, path = system.file(package = "arrow")) {
   reticulate::import_from_path(name, path)
 }
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 5ede18cd0e3..0934d0b0100 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -35,6 +35,10 @@
 #' serialize data to a buffer.
 #' [RecordBatchWriter] for a lower-level interface.
 #' @export
+#' @examples 
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_ipc_stream(mtcars, tf)
 write_ipc_stream <- function(x, sink, ...) {
   x_out <- x # So we can return the data we got
   if (is.data.frame(x)) {
diff --git a/r/R/scalar.R b/r/R/scalar.R
index 54fe37f081e..9865315ee56 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -23,9 +23,33 @@
 #' @docType class
 #'
 #' @description A `Scalar` holds a single value of an Arrow type.
+#' 
+#' @section Methods:
+#'   `$ToString()`: convert to a string
+#'   `$as_vector()`: convert to an R vector
+#'   `$as_array()`: convert to an Arrow `Array`
+#'   `$Equals(other)`: is this Scalar equal to `other`
+#'   `$ApproxEquals(other)`: is this Scalar approximately equal to `other`
+#'   `$is_valid`: is this Scalar valid
+#'   `$null_count`: number of invalid values - 1 or 0 
+#'   `$type`: Scalar type
 #'
 #' @name Scalar
 #' @rdname Scalar
+#' @examples 
+#' Scalar$create(pi)
+#' Scalar$create(404)
+#' # If you pass a vector into Scalar$create, you get a list containing your items
+#' Scalar$create(c(1, 2, 3))
+#' 
+#' # Comparisons
+#' my_scalar <- Scalar$create(99)
+#' my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+#' my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+#' my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+#' my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+#' 
+#' my_scalar$ToString()
 #' @export
 Scalar <- R6Class("Scalar",
   inherit = ArrowDatum,
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index b8d4dc01bad..5bc9475b408 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time)
 It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat})
 }
 
+\examples{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+## Semi-colon delimited files
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+
+# Create FileFormat object
+format <- FileFormat$create(format = "text", delimiter = ";")
+
+open_dataset(tf, format = format)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index 2ef5b02ccbe..1c115b7c199 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -7,3 +7,30 @@
 \description{
 A \code{Scalar} holds a single value of an Arrow type.
 }
+\section{Methods}{
+
+\verb{$ToString()}: convert to a string
+\verb{$as_vector()}: convert to an R vector
+\verb{$as_array()}: convert to an Arrow \code{Array}
+\verb{$Equals(other)}: is this Scalar equal to \code{other}
+\verb{$ApproxEquals(other)}: is this Scalar approximately equal to \code{other}
+\verb{$is_valid}: is this Scalar valid
+\verb{$null_count}: number of invalid values - 1 or 0
+\verb{$type}: Scalar type
+}
+
+\examples{
+Scalar$create(pi)
+Scalar$create(404)
+# If you pass a vector into Scalar$create, you get a list containing your items
+Scalar$create(c(1, 2, 3))
+
+# Comparisons
+my_scalar <- Scalar$create(99)
+my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+
+my_scalar$ToString()
+}
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index 1b5e8278fa9..5cda813f416 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -18,3 +18,6 @@ Support for compression libraries depends on the build-time settings of
 the Arrow C++ library. This function lets you know which are available for
 use.
 }
+\examples{
+codec_is_available("gzip")
+}
diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd
index 7e2000a9ca2..e521efa3328 100644
--- a/r/man/load_flight_server.Rd
+++ b/r/man/load_flight_server.Rd
@@ -15,3 +15,6 @@ to look in the \verb{inst/} directory for included modules.}
 \description{
 Load a Python Flight server
 }
+\examples{
+\dontrun{load_flight_server("demo_flight_server")}
+}
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index e3e36178627..7175bb132ea 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -74,6 +74,33 @@ can accelerate queries that only touch some partitions (files). Call
 \code{open_dataset()} to point to a directory of data files and return a
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
+\examples{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+\dontrun{
+write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet"))
+write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet"))
+write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet"))
+
+# You can specify a directory containing the files for your dataset and
+# open_dataset will scan all files in your directory.
+open_dataset(tf)
+
+# You can also supply a vector of paths
+open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet")))
+}
+## You must specify the file format if using a format other than parquet.
+write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv"))
+write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv"))
+# This line will results in errors when you try to work with the data
+\dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))}
+# This is the correct way to open a dataset containing CSVs
+open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") 
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{vignette("dataset", package = "arrow")}
 }
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 4f742ce9178..a504a31c304 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -31,6 +31,11 @@ with some nonstandard behavior, is deprecated. You should explicitly choose
 the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
+\examples{
+tf <- tempfile()
+on.exit(unlink(tf))
+write_ipc_stream(mtcars, tf)
+}
 \seealso{
 \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
 serialize data to a buffer.

From bf0f6aafc81e9c0d86b015607b8a637f9c4136eb Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 27 May 2021 17:51:21 -0500
Subject: [PATCH 313/719] ARROW-12777: [R] Convert all inputs to Arrow objects
 in match_arrow and is_in

Closes #10383 from thisisnic/ARROW-12777_match_arrow_is_in

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/NAMESPACE                               |  4 ----
 r/NEWS.md                                 |  1 +
 r/R/compute.R                             | 25 ++++++++++-------------
 r/tests/testthat/test-compute-aggregate.R | 23 +++++++++++++++++++++
 4 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index d38431d97e7..1510ad89d26 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -51,14 +51,10 @@ S3method(is.infinite,ArrowDatum)
 S3method(is.na,ArrowDatum)
 S3method(is.na,Expression)
 S3method(is.nan,ArrowDatum)
-S3method(is_in,ArrowDatum)
-S3method(is_in,default)
 S3method(length,ArrowDatum)
 S3method(length,ArrowTabular)
 S3method(length,Scalar)
 S3method(length,Schema)
-S3method(match_arrow,ArrowDatum)
-S3method(match_arrow,default)
 S3method(max,ArrowDatum)
 S3method(mean,ArrowDatum)
 S3method(median,ArrowDatum)
diff --git a/r/NEWS.md b/r/NEWS.md
index d416aa34dd3..931369aab71 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -24,6 +24,7 @@
 * `arrow_info()` now includes details on the C++ build, such as compiler version
 * `dplyr` queries on `Table` and `RecordBatch` now use the same expression internals as `Dataset` (via `InMemoryDataset`). Among other (mostly internal) benefits that come with this, the print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
 * Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
+* `match_arrow` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
 
 # arrow 4.0.0.1
 
diff --git a/r/R/compute.R b/r/R/compute.R
index 20a236901e4..f9da04c9b03 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -265,13 +265,11 @@ all.ArrowDatum <- function(..., na.rm = FALSE){
 #' is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
 #' is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
 #' @export
-match_arrow <- function(x, table, ...) UseMethod("match_arrow")
-
-#' @export
-match_arrow.default <- function(x, table, ...) match(x, table, ...)
-
-#' @export
-match_arrow.ArrowDatum <- function(x, table, ...) {
+match_arrow <- function(x, table, ...)  {
+  if (!inherits(x, "ArrowDatum")) {
+    x <- Array$create(x)
+  }
+  
   if (!inherits(table, c("Array", "ChunkedArray"))) {
     table <- Array$create(table)
   }
@@ -280,13 +278,12 @@ match_arrow.ArrowDatum <- function(x, table, ...) {
 
 #' @rdname match_arrow
 #' @export
-is_in <- function(x, table, ...) UseMethod("is_in")
-
-#' @export
-is_in.default <- function(x, table, ...) x %in% table
-
-#' @export
-is_in.ArrowDatum <- function(x, table, ...) {
+is_in <- function(x, table, ...) {
+  
+  if (!inherits(x, "ArrowDatum")) {
+    x <- Array$create(x)
+  }
+  
   if (!inherits(table, c("Array", "DictionaryArray", "ChunkedArray"))) {
     table <- Array$create(table)
   }
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 1e9d21b8248..25bdddef689 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -342,6 +342,29 @@ test_that("match_arrow", {
 
   ca <- ChunkedArray$create(c(1, 4, 3, 1, 1, 3, 4))
   expect_equal(match_arrow(ca, tab), ChunkedArray$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
+  
+  sc <- Scalar$create(3)
+  expect_equal(match_arrow(sc, tab), Scalar$create(1L))
+  
+  vec <-  c(1,2)
+  expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L)))
+  
+})
+
+test_that("is_in", {
+  a <- Array$create(c(9, 4, 3))
+  tab <- c(4, 3, 2, 1)
+  expect_equal(is_in(a, tab), Array$create(c(FALSE, TRUE, TRUE)))
+  
+  ca <- ChunkedArray$create(c(9, 4, 3))
+  expect_equal(is_in(ca, tab), ChunkedArray$create(c(FALSE, TRUE, TRUE)))
+  
+  sc <- Scalar$create(3)
+  expect_equal(is_in(sc, tab), Scalar$create(TRUE))
+  
+  vec <-  c(1,9)
+  expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE)))
+  
 })
 
 test_that("value_counts", {

From de0bb963b0a4b8adfd957d51e0803582d7291347 Mon Sep 17 00:00:00 2001
From: Ray Bell <rayjohnbell0@gmail.com>
Date: Fri, 28 May 2021 09:26:50 -0400
Subject: [PATCH 314/719] ARROW-12900: [Python][Doc] Add missing numpy import

Closes #10419 from raybellwaves/docs-np-import

Authored-by: Ray Bell <rayjohnbell0@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/python/dataset.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst
index 3753a0bdec4..f63be7e3932 100644
--- a/docs/source/python/dataset.rst
+++ b/docs/source/python/dataset.rst
@@ -64,6 +64,7 @@ of a directory with two parquet files:
     import pathlib
     import pyarrow as pa
     import pyarrow.parquet as pq
+    import numpy as np
 
     base = pathlib.Path(tempfile.gettempdir())
     (base / "parquet_dataset").mkdir(exist_ok=True)

From 406af5e250de4a889b7c54893448ed8d4eb892f4 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Fri, 28 May 2021 08:26:13 -0700
Subject: [PATCH 315/719] ARROW-12894: [R] Bump R version

Closes #10413 from jonkeane/ARROW-12894

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 ci/scripts/PKGBUILD                                  | 2 +-
 dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb | 2 +-
 r/DESCRIPTION                                        | 2 +-
 r/NEWS.md                                            | 6 +++++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 366a3c3cc17..f746c4a81a5 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=4.0.0.9000
+pkgver=4.0.1.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
index 2a8f73ea1ef..e17ff8cfc82 100644
--- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
@@ -19,7 +19,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-4.0.0.9000/apache-arrow-4.0.0.9000.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-4.0.1.9000/apache-arrow-4.0.1.9000.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   head "https://github.com/apache/arrow.git"
 
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 451ac6c05f7..a78acdd4a8f 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 4.0.0.9000
+Version: 4.0.1.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 931369aab71..4cea5edf6b3 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,7 @@
   under the License.
 -->
 
-# arrow 4.0.0.9000
+# arrow 4.0.1.9000
 
 * `write_csv_arrow()` to write Arrow data to CSV
 * Bindings and support for more Arrow C++ Compute functions: `strsplit()` and `str_split()`, `na.omit()` et al., `any()`/`all()`,
@@ -26,6 +26,10 @@
 * Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
 * `match_arrow` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
 
+# arrow 4.0.1
+
+* Resolved a few bugs in new string compute kernels (ARROW-12774, ARROW-12670)
+
 # arrow 4.0.0.1
 
  * The mimalloc memory allocator is the default memory allocator when using a static source build of the package on Linux. This is because it has better behavior under valgrind than jemalloc does. A full-featured build (installed with `LIBARROW_MINIMAL=false`) includes both jemalloc and mimalloc, and it has still has jemalloc as default, though this is configurable at runtime with the `ARROW_DEFAULT_MEMORY_POOL` environment variable.

From 1a2ab46a2b934649f8453fb66c4ef013c3d9b2c2 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sun, 30 May 2021 05:50:46 +0900
Subject: [PATCH 316/719] ARROW-12848: [Release] Fix URLs in vote mail template

Closes #10422 from kou/release-mail-template

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/02-source-test.rb | 20 +++++++++++---------
 dev/release/02-source.sh      | 20 +++++++++++---------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb
index 102ff706816..652d4c07fa2 100644
--- a/dev/release/02-source-test.rb
+++ b/dev/release/02-source-test.rb
@@ -120,11 +120,11 @@ def test_vote
 #{@current_commit} [2]
 
 The source release rc0 is hosted at [3].
-The binary artifacts are hosted at [4][5][6][7].
-The changelog is located at [8].
+The binary artifacts are hosted at [4][5][6][7][8][9].
+The changelog is located at [10].
 
 Please download, verify checksums and signatures, run the unit tests,
-and vote on the release. See [9] for how to validate a release candidate.
+and vote on the release. See [11] for how to validate a release candidate.
 
 The vote will be open for at least 72 hours.
 
@@ -135,12 +135,14 @@ def test_vote
 [1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20#{@release_version}
 [2]: https://github.com/apache/arrow/tree/#{@current_commit}
 [3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0
-[4]: https://apache.jfrog.io/artifactory/arrow/centos-rc/#{@release_version}-rc0
-[5]: https://apache.jfrog.io/artifactory/arrow/debian-rc/#{@release_version}-rc0
-[6]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0
-[7]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/#{@release_version}-rc0
-[8]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
-[9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/
+[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/
+[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{@release_version}-rc0
+[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0
+[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+[10]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
+[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
     VOTE
   end
 end
diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh
index 9809f4f5e66..7e0c2451089 100755
--- a/dev/release/02-source.sh
+++ b/dev/release/02-source.sh
@@ -136,11 +136,11 @@ This release candidate is based on commit:
 ${release_hash} [2]
 
 The source release rc${rc} is hosted at [3].
-The binary artifacts are hosted at [4][5][6][7].
-The changelog is located at [8].
+The binary artifacts are hosted at [4][5][6][7][8][9].
+The changelog is located at [10].
 
 Please download, verify checksums and signatures, run the unit tests,
-and vote on the release. See [9] for how to validate a release candidate.
+and vote on the release. See [11] for how to validate a release candidate.
 
 The vote will be open for at least 72 hours.
 
@@ -151,12 +151,14 @@ The vote will be open for at least 72 hours.
 [1]: ${jira_url}/issues/?jql=${jql}
 [2]: https://github.com/apache/arrow/tree/${release_hash}
 [3]: ${rc_url}
-[4]: https://apache.jfrog.io/artifactory/arrow/centos-rc/${version}-rc${rc}
-[5]: https://apache.jfrog.io/artifactory/arrow/debian-rc/${version}-rc${rc}
-[6]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc}
-[7]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/${version}-rc${rc}
-[8]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
-[9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[4]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/
+[5]: https://apache.jfrog.io/artifactory/arrow/centos-rc/
+[6]: https://apache.jfrog.io/artifactory/arrow/debian-rc/
+[7]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/${version}-rc${rc}
+[8]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc}
+[9]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
+[10]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
+[11]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
 MAIL
   echo "---------------------------------------------------------"
 fi

From ed7ae4df121ca6e01659f1eb913ee21a98f20279 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Sun, 30 May 2021 10:51:56 -0400
Subject: [PATCH 317/719] ARROW-12781: [R] Implement is.type() functions for
 dplyr

- implements base R-flavored `is.*()` functions and rlang `is_*()` functions for checking column types in dplyr
- implements a general `is()` function like `methods::is()` for the same
- adds a `$type_id()` method to `Expression` and to some other R6 classes that were missing it
- changes how `Expression$type()`works, allowing the schema to be assigned to `Expression$schema` before
- adds support for the `where()` select helper in `relocate()` (but not in other verbs; that's ARROW-12778)

Closes #10327 from ianmcook/ARROW-12781

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/NAMESPACE                        |   1 +
 r/R/arrow-package.R                |   2 +-
 r/R/arrowExports.R                 |   4 +
 r/R/chunked-array.R                |   1 +
 r/R/dplyr-eval.R                   |   3 +
 r/R/dplyr-functions.R              |  75 ++++++++
 r/R/dplyr-select.R                 |  15 +-
 r/R/expression.R                   |  10 +-
 r/R/scalar.R                       |   1 +
 r/R/type.R                         |  49 +++++
 r/src/arrowExports.cpp             |  17 ++
 r/src/expression.cpp               |   7 +
 r/tests/testthat/test-dplyr.R      | 293 ++++++++++++++++++++++++++++-
 r/tests/testthat/test-expression.R |   2 +-
 r/tests/testthat/test-type.R       | 105 +++++++++++
 15 files changed, 568 insertions(+), 17 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 1510ad89d26..991e384723e 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -300,6 +300,7 @@ importFrom(rlang,"%||%")
 importFrom(rlang,.data)
 importFrom(rlang,abort)
 importFrom(rlang,as_label)
+importFrom(rlang,caller_env)
 importFrom(rlang,dots_n)
 importFrom(rlang,enexpr)
 importFrom(rlang,enexprs)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index e557f869325..c263d20f8df 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -19,7 +19,7 @@
 #' @importFrom R6 R6Class
 #' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 038467fcad0..55a28529f85 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -812,6 +812,10 @@ compute___expr__type <- function(x, schema){
     .Call(`_arrow_compute___expr__type`, x, schema)
 }
 
+compute___expr__type_id <- function(x, schema){
+    .Call(`_arrow_compute___expr__type_id`, x, schema)
+}
+
 ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
     invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index d03db3047fd..61093e203e7 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -80,6 +80,7 @@
 ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
   public = list(
     length = function() ChunkedArray__length(self),
+    type_id = function() ChunkedArray__type(self)$id,
     chunk = function(i) Array$create(ChunkedArray__chunk(self, i)),
     as_vector = function() ChunkedArray__as_vector(self),
     Slice = function(offset, length = NULL){
diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index 2d19bd4cb90..de68d2f2c4d 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -86,6 +86,9 @@ arrow_mask <- function(.data) {
     f_env[[f]] <- fail
   }
 
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~(.$schema <- .data$.data$schema))
+
   # Add the column references and make the mask
   out <- new_data_mask(
     new_environment(.data$selected_columns, parent = f_env),
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 7e0eadfdcea..fadd216a30c 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -57,6 +57,30 @@ nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
   Expression$create("cast", x, options = opts)
 }
 
+nse_funcs$is <- function(object, class2) {
+  if (is.string(class2)) {
+    switch(class2,
+      # for R data types, pass off to is.*() functions
+      character = nse_funcs$is.character(object),
+      numeric = nse_funcs$is.numeric(object),
+      integer = nse_funcs$is.integer(object),
+      integer64 = nse_funcs$is.integer64(object),
+      logical = nse_funcs$is.logical(object),
+      factor = nse_funcs$is.factor(object),
+      list = nse_funcs$is.list(object),
+      # for Arrow data types, compare class2 with object$type()$ToString(),
+      # but first strip off any parameters to only compare the top-level data
+      # type,  and canonicalize class2
+      sub("^([^([<]+).*$", "\\1", object$type()$ToString()) ==
+        canonical_type_str(class2)
+    )
+  } else if (inherits(class2, "DataType")) {
+    object$type() == as_type(class2)
+  } else {
+    stop("Second argument to is() is not a string or DataType", call. = FALSE)
+  }
+}
+
 nse_funcs$dictionary_encode <- function(x,
                                         null_encoding_behavior = c("mask", "encode")) {
   behavior <- toupper(match.arg(null_encoding_behavior))
@@ -121,6 +145,57 @@ nse_funcs$as.numeric <- function(x) {
   Expression$create("cast", x, options = cast_options(to_type = float64()))
 }
 
+# is.* type functions
+nse_funcs$is.character <- function(x) {
+  x$type_id() %in% Type[c("STRING", "LARGE_STRING")]
+}
+nse_funcs$is.numeric <- function(x) {
+  x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+                          "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
+                          "DECIMAL", "DECIMAL256")]
+}
+nse_funcs$is.double <- function(x) {
+  x$type_id() == Type["DOUBLE"]
+}
+nse_funcs$is.integer <- function(x) {
+  x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+                          "UINT64", "INT64")]
+}
+nse_funcs$is.integer64 <- function(x) {
+  x$type_id() == Type["INT64"]
+}
+nse_funcs$is.logical <- function(x) {
+  x$type_id() == Type["BOOL"]
+}
+nse_funcs$is.factor <- function(x) {
+  x$type_id() == Type["DICTIONARY"]
+}
+nse_funcs$is.list <- function(x) {
+  x$type_id() %in% Type[c("LIST", "FIXED_SIZE_LIST", "LARGE_LIST")]
+}
+
+# rlang::is_* type functions
+nse_funcs$is_character <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.character(x)
+}
+nse_funcs$is_double <- function(x, n = NULL, finite = NULL) {
+  assert_that(is.null(n) && is.null(finite))
+  nse_funcs$is.double(x)
+}
+nse_funcs$is_integer <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.integer(x)
+}
+nse_funcs$is_list <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.list(x)
+}
+nse_funcs$is_logical <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.logical(x)
+}
+
 # String functions
 nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
   if (allowNA) {
diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R
index 3730fe63fec..686965a4197 100644
--- a/r/R/dplyr-select.R
+++ b/r/R/dplyr-select.R
@@ -59,11 +59,16 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL
   # The code in this function is adapted from the code in dplyr::relocate.data.frame
   # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
   # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
-  check_select_helpers(c(enexprs(...), enexpr(.before), enexpr(.after)))
 
   .data <- arrow_dplyr_query(.data)
 
-  to_move <- eval_select(expr(c(...)), .data$selected_columns)
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~(.$schema <- .data$.data$schema))
+
+  # Create a mask for evaluating expressions in tidyselect helpers
+  mask <- new_environment(.cache$functions, parent = caller_env())
+
+  to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask)
 
   .before <- enquo(.before)
   .after <- enquo(.after)
@@ -73,12 +78,12 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL
   if (has_before && has_after) {
     abort("Must supply only one of `.before` and `.after`.")
   } else if (has_before) {
-    where <- min(unname(eval_select(.before, .data$selected_columns)))
+    where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask)))
     if (!where %in% to_move) {
       to_move <- c(to_move, where)
     }
   } else if (has_after) {
-    where <- max(unname(eval_select(.after, .data$selected_columns)))
+    where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask)))
     if (!where %in% to_move) {
       to_move <- c(where, to_move)
     }
@@ -117,4 +122,4 @@ check_select_helpers <- function(exprs) {
       call. = FALSE
     )
   }
-}
\ No newline at end of file
+}
diff --git a/r/R/expression.R b/r/R/expression.R
index 99d98b6af0a..417a12eeb81 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -75,7 +75,15 @@
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
     ToString = function() compute___expr__ToString(self),
-    type = function(schema) compute___expr__type(self, schema),
+    schema = NULL,
+    type = function(schema = self$schema) {
+      assert_that(!is.null(schema))
+      compute___expr__type(self, schema)
+    },
+    type_id = function(schema = self$schema) {
+      assert_that(!is.null(schema))
+      compute___expr__type_id(self, schema)
+    },
     cast = function(to_type, safe = TRUE, ...) {
       opts <- list(
         to_type = to_type,
diff --git a/r/R/scalar.R b/r/R/scalar.R
index 9865315ee56..40e9c65ce71 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -56,6 +56,7 @@ Scalar <- R6Class("Scalar",
   # TODO: document the methods
   public = list(
     ToString = function() Scalar__ToString(self),
+    type_id = function() Scalar__type(self)$id,
     as_vector = function() Scalar__as_vector(self),
     as_array = function() MakeArrayFromScalar(self),
     Equals = function(other, ...) {
diff --git a/r/R/type.R b/r/R/type.R
index 04b0a378e10..a22323c4ba1 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -428,6 +428,55 @@ as_type <- function(type, name = "type") {
   type
 }
 
+canonical_type_str <- function(type_str) {
+  # canonicalizes data type strings, converting data type function names and
+  # aliases to match the strings returned by DataType$ToString()
+  assert_that(is.string(type_str))
+  if (grepl("[([<]", type_str)) {
+    stop("Cannot interpret string representations of data types that have parameters", call. = FALSE)
+  }
+  switch(type_str,
+    int8 = "int8",
+    int16 = "int16",
+    int32 = "int32",
+    int64 = "int64",
+    uint8 = "uint8",
+    uint16 = "uint16",
+    uint32 = "uint32",
+    uint64 = "uint64",
+    float16 = "halffloat",
+    halffloat = "halffloat",
+    float32 = "float",
+    float = "float",
+    float64 = "double",
+    double = "double",
+    boolean = "bool",
+    bool = "bool",
+    utf8 = "string",
+    large_utf8 = "large_string",
+    large_string = "large_string",
+    binary = "binary",
+    large_binary = "large_binary",
+    fixed_size_binary = "fixed_size_binary",
+    string = "string",
+    date32 = "date32",
+    date64 = "date64",
+    time32 = "time32",
+    time64 = "time64",
+    null = "null",
+    timestamp = "timestamp",
+    decimal = "decimal128",
+    struct = "struct",
+    list_of = "list",
+    list = "list",
+    large_list_of = "large_list",
+    large_list = "large_list",
+    fixed_size_list_of = "fixed_size_list",
+    fixed_size_list = "fixed_size_list",
+    stop("Unrecognized string representation of data type", call. = FALSE)
+  )
+}
+
 # vctrs support -----------------------------------------------------------
 str_dup <- function(x, times) {
   paste0(rep(x, times = times), collapse = "")
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 0a9b8394e4b..b7ca5e9414c 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3145,6 +3145,22 @@ extern "C" SEXP _arrow_compute___expr__type(SEXP x_sexp, SEXP schema_sexp){
 }
 #endif
 
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+arrow::Type::type compute___expr__type_id(const std::shared_ptr<compute::Expression>& x, const std::shared_ptr<arrow::Schema>& schema);
+extern "C" SEXP _arrow_compute___expr__type_id(SEXP x_sexp, SEXP schema_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schema(schema_sexp);
+	return cpp11::as_sexp(compute___expr__type_id(x, schema));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__type_id(SEXP x_sexp, SEXP schema_sexp){
+	Rf_error("Cannot call compute___expr__type_id(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // feather.cpp
 #if defined(ARROW_R_WITH_ARROW)
 void ipc___WriteFeather__Table(const std::shared_ptr<arrow::io::OutputStream>& stream, const std::shared_ptr<arrow::Table>& table, int version, int chunk_size, arrow::Compression::type compression, int compression_level);
@@ -7074,6 +7090,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
 		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
 		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, 
+		{ "_arrow_compute___expr__type_id", (DL_FUNC) &_arrow_compute___expr__type_id, 2}, 
 		{ "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, 
 		{ "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, 
 		{ "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, 
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index d8745ade479..4b671cb99dd 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -76,4 +76,11 @@ std::shared_ptr<arrow::DataType> compute___expr__type(
   return bound.type();
 }
 
+// [[arrow::export]]
+arrow::Type::type compute___expr__type_id(const std::shared_ptr<compute::Expression>& x,
+                                          const std::shared_ptr<arrow::Schema>& schema) {
+  auto bound = ValueOrStop(x->Bind(*schema));
+  return bound.type()->id();
+}
+
 #endif
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index bf5f06b038c..378640e8308 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -343,21 +343,22 @@ test_that("relocate", {
 })
 
 test_that("relocate with selection helpers", {
+  df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a")
   expect_dplyr_equal(
     input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(),
     df
   )
-  expect_error(
-    df %>% Table$create() %>% relocate(where(is.character)),
-    "Unsupported selection helper"
+  expect_dplyr_equal(
+    input %>% relocate(where(is.character)) %>% collect(),
+    df
   )
-  expect_error(
-    df %>% Table$create() %>% relocate(a, b, c, .after = where(is.character)),
-    "Unsupported selection helper"
+  expect_dplyr_equal(
+    input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(),
+    df
   )
-  expect_error(
-    df %>% Table$create() %>% relocate(d, e, f, .before = where(is.numeric)),
-    "Unsupported selection helper"
+  expect_dplyr_equal(
+    input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(),
+    df
   )
 })
 
@@ -524,6 +525,280 @@ test_that("is.finite(), is.infinite(), is.nan()", {
   )
 })
 
+test_that("type checks with is() giving Arrow types", {
+  # with class2=DataType
+  expect_equal(
+    Table$create(
+        i32 = Array$create(1, int32()),
+        dec = Array$create(pi)$cast(decimal(3, 2)),
+        f64 = Array$create(1.1, float64()),
+        str = Array$create("a", arrow::string())
+      ) %>% transmute(
+        i32_is_i32 = is(i32, int32()),
+        i32_is_dec = is(i32, decimal(3, 2)),
+        i32_is_i64 = is(i32, float64()),
+        i32_is_str = is(i32, arrow::string()),
+        dec_is_i32 = is(dec, int32()),
+        dec_is_dec = is(dec, decimal(3, 2)),
+        dec_is_i64 = is(dec, float64()),
+        dec_is_str = is(dec, arrow::string()),
+        f64_is_i32 = is(f64, int32()),
+        f64_is_dec = is(f64, decimal(3, 2)),
+        f64_is_i64 = is(f64, float64()),
+        f64_is_str = is(f64, arrow::string()),
+        str_is_i32 = is(str, int32()),
+        str_is_dec = is(str, decimal(3, 2)),
+        str_is_i64 = is(str, float64()),
+        str_is_str = is(str, arrow::string())
+      ) %>%
+      collect() %>% t() %>% as.vector(),
+    c(TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
+      FALSE, FALSE, FALSE, FALSE, TRUE)
+  )
+  # with class2=string
+  expect_equal(
+    Table$create(
+        i32 = Array$create(1, int32()),
+        f64 = Array$create(1.1, float64()),
+        str = Array$create("a", arrow::string())
+      ) %>% transmute(
+        i32_is_i32 = is(i32, "int32"),
+        i32_is_i64 = is(i32, "double"),
+        i32_is_str = is(i32, "string"),
+        f64_is_i32 = is(f64, "int32"),
+        f64_is_i64 = is(f64, "double"),
+        f64_is_str = is(f64, "string"),
+        str_is_i32 = is(str, "int32"),
+        str_is_i64 = is(str, "double"),
+        str_is_str = is(str, "string")
+      ) %>%
+      collect() %>% t() %>% as.vector(),
+    c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE)
+  )
+  # with class2=string alias
+  expect_equal(
+    Table$create(
+        f16 = Array$create(NA_real_, halffloat()),
+        f32 = Array$create(1.1, float()),
+        f64 = Array$create(2.2, float64()),
+        lgl = Array$create(TRUE, bool()),
+        str = Array$create("a", arrow::string())
+      ) %>% transmute(
+        f16_is_f16 = is(f16, "float16"),
+        f16_is_f32 = is(f16, "float32"),
+        f16_is_f64 = is(f16, "float64"),
+        f16_is_lgl = is(f16, "boolean"),
+        f16_is_str = is(f16, "utf8"),
+        f32_is_f16 = is(f32, "float16"),
+        f32_is_f32 = is(f32, "float32"),
+        f32_is_f64 = is(f32, "float64"),
+        f32_is_lgl = is(f32, "boolean"),
+        f32_is_str = is(f32, "utf8"),
+        f64_is_f16 = is(f64, "float16"),
+        f64_is_f32 = is(f64, "float32"),
+        f64_is_f64 = is(f64, "float64"),
+        f64_is_lgl = is(f64, "boolean"),
+        f64_is_str = is(f64, "utf8"),
+        lgl_is_f16 = is(lgl, "float16"),
+        lgl_is_f32 = is(lgl, "float32"),
+        lgl_is_f64 = is(lgl, "float64"),
+        lgl_is_lgl = is(lgl, "boolean"),
+        lgl_is_str = is(lgl, "utf8"),
+        str_is_f16 = is(str, "float16"),
+        str_is_f32 = is(str, "float32"),
+        str_is_f64 = is(str, "float64"),
+        str_is_lgl = is(str, "boolean"),
+        str_is_str = is(str, "utf8")
+      ) %>%
+      collect() %>% t() %>% as.vector(),
+    c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
+      FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
+      FALSE, FALSE, TRUE)
+  )
+})
+
+test_that("type checks with is() giving R types", {
+  library(bit64)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is(chr, "character"),
+        chr_is_fct = is(chr, "factor"),
+        chr_is_int = is(chr, "integer"),
+        chr_is_i64 = is(chr, "integer64"),
+        chr_is_lst = is(chr, "list"),
+        chr_is_lgl = is(chr, "logical"),
+        chr_is_num = is(chr, "numeric"),
+        dbl_is_chr = is(dbl, "character"),
+        dbl_is_fct = is(dbl, "factor"),
+        dbl_is_int = is(dbl, "integer"),
+        dbl_is_i64 = is(dbl, "integer64"),
+        dbl_is_lst = is(dbl, "list"),
+        dbl_is_lgl = is(dbl, "logical"),
+        dbl_is_num = is(dbl, "numeric"),
+        fct_is_chr = is(fct, "character"),
+        fct_is_fct = is(fct, "factor"),
+        fct_is_int = is(fct, "integer"),
+        fct_is_i64 = is(fct, "integer64"),
+        fct_is_lst = is(fct, "list"),
+        fct_is_lgl = is(fct, "logical"),
+        fct_is_num = is(fct, "numeric"),
+        int_is_chr = is(int, "character"),
+        int_is_fct = is(int, "factor"),
+        int_is_int = is(int, "integer"),
+        int_is_i64 = is(int, "integer64"),
+        int_is_lst = is(int, "list"),
+        int_is_lgl = is(int, "logical"),
+        int_is_num = is(int, "numeric"),
+        lgl_is_chr = is(lgl, "character"),
+        lgl_is_fct = is(lgl, "factor"),
+        lgl_is_int = is(lgl, "integer"),
+        lgl_is_i64 = is(lgl, "integer64"),
+        lgl_is_lst = is(lgl, "list"),
+        lgl_is_lgl = is(lgl, "logical"),
+        lgl_is_num = is(lgl, "numeric")
+      ) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        i64_is_chr = is(i64, "character"),
+        i64_is_fct = is(i64, "factor"),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        #i64_is_int = is(i64, "integer"),
+        i64_is_i64 = is(i64, "integer64"),
+        i64_is_lst = is(i64, "list"),
+        i64_is_lgl = is(i64, "logical"),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        #i64_is_num = is(i64, "numeric"),
+        lst_is_chr = is(lst, "character"),
+        lst_is_fct = is(lst, "factor"),
+        lst_is_int = is(lst, "integer"),
+        lst_is_i64 = is(lst, "integer64"),
+        lst_is_lst = is(lst, "list"),
+        lst_is_lgl = is(lst, "logical"),
+        lst_is_num = is(lst, "numeric")
+      ) %>%
+      collect(),
+    tibble(
+      i64 = as.integer64(1:3),
+      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
+    )
+  )
+})
+
+test_that("type checks with is.*()", {
+  library(bit64)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is.character(chr),
+        chr_is_dbl = is.double(chr),
+        chr_is_fct = is.factor(chr),
+        chr_is_int = is.integer(chr),
+        chr_is_i64 = is.integer64(chr),
+        chr_is_lst = is.list(chr),
+        chr_is_lgl = is.logical(chr),
+        chr_is_num = is.numeric(chr),
+        dbl_is_chr = is.character(dbl),
+        dbl_is_dbl = is.double(dbl),
+        dbl_is_fct = is.factor(dbl),
+        dbl_is_int = is.integer(dbl),
+        dbl_is_i64 = is.integer64(dbl),
+        dbl_is_lst = is.list(dbl),
+        dbl_is_lgl = is.logical(dbl),
+        dbl_is_num = is.numeric(dbl),
+        fct_is_chr = is.character(fct),
+        fct_is_dbl = is.double(fct),
+        fct_is_fct = is.factor(fct),
+        fct_is_int = is.integer(fct),
+        fct_is_i64 = is.integer64(fct),
+        fct_is_lst = is.list(fct),
+        fct_is_lgl = is.logical(fct),
+        fct_is_num = is.numeric(fct),
+        int_is_chr = is.character(int),
+        int_is_dbl = is.double(int),
+        int_is_fct = is.factor(int),
+        int_is_int = is.integer(int),
+        int_is_i64 = is.integer64(int),
+        int_is_lst = is.list(int),
+        int_is_lgl = is.logical(int),
+        int_is_num = is.numeric(int),
+        lgl_is_chr = is.character(lgl),
+        lgl_is_dbl = is.double(lgl),
+        lgl_is_fct = is.factor(lgl),
+        lgl_is_int = is.integer(lgl),
+        lgl_is_i64 = is.integer64(lgl),
+        lgl_is_lst = is.list(lgl),
+        lgl_is_lgl = is.logical(lgl),
+        lgl_is_num = is.numeric(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        i64_is_chr = is.character(i64),
+        # TODO: investigate why this is not matching when testthat runs it
+        #i64_is_dbl = is.double(i64),
+        i64_is_fct = is.factor(i64),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        #i64_is_int = is.integer(i64),
+        i64_is_i64 = is.integer64(i64),
+        i64_is_lst = is.list(i64),
+        i64_is_lgl = is.logical(i64),
+        i64_is_num = is.numeric(i64),
+        lst_is_chr = is.character(lst),
+        lst_is_dbl = is.double(lst),
+        lst_is_fct = is.factor(lst),
+        lst_is_int = is.integer(lst),
+        lst_is_i64 = is.integer64(lst),
+        lst_is_lst = is.list(lst),
+        lst_is_lgl = is.logical(lst),
+        lst_is_num = is.numeric(lst)
+      ) %>%
+      collect(),
+    tibble(
+      i64 = as.integer64(1:3),
+      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
+    )
+  )
+})
+
+test_that("type checks with is_*()", {
+  library(rlang)
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is_character(chr),
+        chr_is_dbl = is_double(chr),
+        chr_is_int = is_integer(chr),
+        chr_is_lst = is_list(chr),
+        chr_is_lgl = is_logical(chr),
+        dbl_is_chr = is_character(dbl),
+        dbl_is_dbl = is_double(dbl),
+        dbl_is_int = is_integer(dbl),
+        dbl_is_lst = is_list(dbl),
+        dbl_is_lgl = is_logical(dbl),
+        int_is_chr = is_character(int),
+        int_is_dbl = is_double(int),
+        int_is_int = is_integer(int),
+        int_is_lst = is_list(int),
+        int_is_lgl = is_logical(int),
+        lgl_is_chr = is_character(lgl),
+        lgl_is_dbl = is_double(lgl),
+        lgl_is_int = is_integer(lgl),
+        lgl_is_lst = is_list(lgl),
+        lgl_is_lgl = is_logical(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("as.factor()/dictionary_encode()", {
   skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index d0459fde5b5..49babf30d5c 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -58,4 +58,4 @@ test_that("C++ expressions", {
   )
   # Interprets that as a list type
   expect_r6_class(f == c(1L, 2L), "Expression")
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R
index 56cef722556..a3118be2a2c 100644
--- a/r/tests/testthat/test-type.R
+++ b/r/tests/testthat/test-type.R
@@ -106,3 +106,108 @@ test_that("Masked data type functions still work", {
   rm(type)
 
 })
+
+test_that("Type strings are correctly canonicalized", {
+  # data types without arguments
+  expect_equal(canonical_type_str("int8"), int8()$ToString())
+  expect_equal(canonical_type_str("int16"), int16()$ToString())
+  expect_equal(canonical_type_str("int32"), int32()$ToString())
+  expect_equal(canonical_type_str("int64"), int64()$ToString())
+  expect_equal(canonical_type_str("uint8"), uint8()$ToString())
+  expect_equal(canonical_type_str("uint16"), uint16()$ToString())
+  expect_equal(canonical_type_str("uint32"), uint32()$ToString())
+  expect_equal(canonical_type_str("uint64"), uint64()$ToString())
+  expect_equal(canonical_type_str("float16"), float16()$ToString())
+  expect_equal(canonical_type_str("halffloat"), halffloat()$ToString())
+  expect_equal(canonical_type_str("float32"), float32()$ToString())
+  expect_equal(canonical_type_str("float"), float()$ToString())
+  expect_equal(canonical_type_str("float64"), float64()$ToString())
+  expect_equal(canonical_type_str("double"), float64()$ToString())
+  expect_equal(canonical_type_str("boolean"), boolean()$ToString())
+  expect_equal(canonical_type_str("bool"), bool()$ToString())
+  expect_equal(canonical_type_str("utf8"), utf8()$ToString())
+  expect_equal(canonical_type_str("large_utf8"), large_utf8()$ToString())
+  expect_equal(canonical_type_str("large_string"), large_utf8()$ToString())
+  expect_equal(canonical_type_str("binary"), binary()$ToString())
+  expect_equal(canonical_type_str("large_binary"), large_binary()$ToString())
+  expect_equal(canonical_type_str("string"), arrow::string()$ToString())
+  expect_equal(canonical_type_str("null"), null()$ToString())
+
+  # data types with arguments
+  expect_equal(
+    canonical_type_str("fixed_size_binary"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_binary(42)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("date32"),
+    sub("^([^([<]+).*$", "\\1", date32()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("date64"),
+    sub("^([^([<]+).*$", "\\1", date64()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("time32"),
+    sub("^([^([<]+).*$", "\\1", time32()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("time64"),
+    sub("^([^([<]+).*$", "\\1", time64()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("timestamp"),
+    sub("^([^([<]+).*$", "\\1", timestamp()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("decimal"),
+    sub("^([^([<]+).*$", "\\1", decimal(3,2)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("struct"),
+    sub("^([^([<]+).*$", "\\1", struct(foo = int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("list_of"),
+    sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("list"),
+    sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("large_list_of"),
+    sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("large_list"),
+    sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("fixed_size_list_of"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("fixed_size_list"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString())
+  )
+
+  # unsupported data types
+  expect_error(
+    canonical_type_str("decimal128(3, 2)"),
+    "parameters"
+  )
+  expect_error(
+    canonical_type_str("list<item: int32>"),
+    "parameters"
+  )
+  expect_error(
+    canonical_type_str("time32[s]"),
+    "parameters"
+  )
+
+  # unrecognized data types
+  expect_error(
+    canonical_type_str("foo"),
+    "Unrecognized"
+  )
+})

From 9b68458bbf0a63a0be867d15ee608ebac69704d8 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 31 May 2021 11:04:48 +0200
Subject: [PATCH 318/719] ARROW-12895: [CI] Use "concurrency" setting on Github
 Actions to cancel stale jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're currently using a dedicated Github Actions to cancel previous jobs when a new job is queued. It seems this now can be done better using the "concurrency" setting (unfortunately in beta):
https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#concurrency

Closes #10416 from pitrou/ARROW-12895-gha-concurrency

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/archery.yml     |   4 +
 .github/workflows/cancel.yml      | 117 ------------------------------
 .github/workflows/cpp.yml         |   4 +
 .github/workflows/csharp.yml      |   4 +
 .github/workflows/dev.yml         |   4 +
 .github/workflows/dev_pr.yml      |   4 +
 .github/workflows/go.yml          |   4 +
 .github/workflows/integration.yml |   4 +
 .github/workflows/java.yml        |   4 +
 .github/workflows/java_jni.yml    |   4 +
 .github/workflows/js.yml          |   4 +
 .github/workflows/julia.yml       |   4 +
 .github/workflows/python.yml      |   4 +
 .github/workflows/r.yml           |   4 +
 .github/workflows/ruby.yml        |   4 +
 15 files changed, 56 insertions(+), 117 deletions(-)
 delete mode 100644 .github/workflows/cancel.yml

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index ac2799bee70..65ae59713a4 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -31,6 +31,10 @@ on:
       - 'dev/tasks/**'
       - 'docker-compose.yml'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
 
   test:
diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
deleted file mode 100644
index da668bcdc3b..00000000000
--- a/.github/workflows/cancel.yml
+++ /dev/null
@@ -1,117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Cancel stale runs
-
-on:
-  workflow_run:
-    # The name of another workflow (whichever one) that always runs on PRs
-    workflows: ['Dev']
-    types: ['requested']
-
-jobs:
-  cancel-stale-workflow-runs:
-    name: "Cancel stale workflow runs"
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      # Unfortunately, we need to define a separate cancellation step for
-      # each workflow where we want to cancel stale runs.
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale C++ runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: cpp.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale C# runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: csharp.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Dev runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: dev.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Go runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: go.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Integration runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: integration.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Java JNI runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: java_jni.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Java runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: java.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale JS runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: js.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Julia runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: julia.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Python runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: python.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale R runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: r.yml
-          skipEventTypes: '["push", "schedule"]'
-      - uses: potiuk/cancel-workflow-runs@master
-        name: "Cancel stale Ruby runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          workflowFileName: ruby.yml
-          skipEventTypes: '["push", "schedule"]'
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 2e34c5c6df9..f9298174f08 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -37,6 +37,10 @@ on:
       - 'cpp/**'
       - 'format/Flight.proto'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 217985a9d10..b339b8f4655 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -29,6 +29,10 @@ on:
       - 'ci/scripts/csharp_*'
       - 'csharp/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
 
   ubuntu:
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 4ae6f1dc9bd..03851a0cc88 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -22,6 +22,10 @@ on:
   push:
   pull_request:
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 7b92b897051..e9ff7d9c635 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -28,6 +28,10 @@ on:
       - edited
       - synchronize
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
   process:
     name: Process
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index f70821ffb7f..3c9100c20b7 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -32,6 +32,10 @@ on:
       - 'ci/scripts/go_*'
       - 'go/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 59007e0ec8a..7a4deb8e3ea 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -41,6 +41,10 @@ on:
       - 'java/**'
       - 'format/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index bc4ce64f5ae..72f4df7e36e 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -35,6 +35,10 @@ on:
       - 'format/Flight.proto'
       - 'java/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 7c747f4e4c8..7d7e26c75fe 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -35,6 +35,10 @@ on:
       - 'cpp/**'
       - 'java/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 304ccb0f3aa..95414909d39 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -31,6 +31,10 @@ on:
       - 'ci/scripts/js_*'
       - 'js/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
   ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
diff --git a/.github/workflows/julia.yml b/.github/workflows/julia.yml
index 47de39ccfbd..226ec3e6ad0 100644
--- a/.github/workflows/julia.yml
+++ b/.github/workflows/julia.yml
@@ -26,6 +26,10 @@ on:
       - '.github/workflows/julia.yml'
       - 'julia/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
   test:
     name: AMD64 ${{ matrix.os }} Julia ${{ matrix.version }}
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 2a7d28dfa16..981fd61c029 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -31,6 +31,10 @@ on:
       - 'cpp/**'
       - 'python/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 4fde4fd39ad..dd7d98d0890 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -39,6 +39,10 @@ on:
       - "cpp/**"
       - "r/**"
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 20318169740..ec3e9946c0c 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -43,6 +43,10 @@ on:
       - 'cpp/**'
       - 'ruby/**'
 
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 env:
   DOCKER_VOLUME_PREFIX: ".docker/"
   ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}

From 81039729bd0b575e5abc2fca4b61f1c909b0e786 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 1 Jun 2021 10:09:37 +0200
Subject: [PATCH 319/719] ARROW-11161: [C++][Python] Add stream metadata

Extend the InputStream API to allow reading metadata.

Extend the FileSystem API to allow setting metadata when creating an output stream or file.

Implement metadata reading and writing in the S3 filesystem.
A few metadata keys are supported such as "Content-Type" and "Expires".

Closes #10295 from pitrou/ARROW-11161-stream-metadata

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/filesystem.cc      | 29 ++++--
 cpp/src/arrow/filesystem/filesystem.h       | 20 +++--
 cpp/src/arrow/filesystem/filesystem_test.cc | 15 ++++
 cpp/src/arrow/filesystem/hdfs.cc            |  4 +-
 cpp/src/arrow/filesystem/hdfs.h             |  6 +-
 cpp/src/arrow/filesystem/localfs.cc         |  4 +-
 cpp/src/arrow/filesystem/localfs.h          |  6 +-
 cpp/src/arrow/filesystem/mockfs.cc          | 35 +++++---
 cpp/src/arrow/filesystem/mockfs.h           |  6 +-
 cpp/src/arrow/filesystem/s3fs.cc            | 98 ++++++++++++++++++++-
 cpp/src/arrow/filesystem/s3fs.h             |  6 +-
 cpp/src/arrow/filesystem/s3fs_test.cc       | 31 ++++++-
 cpp/src/arrow/filesystem/test_util.cc       | 32 ++++++-
 cpp/src/arrow/filesystem/test_util.h        |  2 +
 cpp/src/arrow/io/buffered.cc                |  9 ++
 cpp/src/arrow/io/buffered.h                 |  3 +
 cpp/src/arrow/io/compressed.cc              |  9 ++
 cpp/src/arrow/io/compressed.h               |  3 +
 cpp/src/arrow/io/interfaces.cc              | 23 ++++-
 cpp/src/arrow/io/interfaces.h               | 21 +++--
 cpp/src/arrow/io/memory.cc                  |  4 +-
 cpp/src/arrow/io/memory_test.cc             |  8 ++
 cpp/src/arrow/io/transform.cc               | 13 +++
 cpp/src/arrow/io/transform.h                |  4 +
 cpp/src/arrow/python/filesystem.cc          |  8 +-
 cpp/src/arrow/python/filesystem.h           |  8 +-
 cpp/src/arrow/util/key_value_metadata.cc    |  5 ++
 cpp/src/arrow/util/key_value_metadata.h     |  3 +
 python/pyarrow/_fs.pyx                      | 52 ++++++++---
 python/pyarrow/fs.py                        |  6 +-
 python/pyarrow/includes/libarrow.pxd        |  2 +-
 python/pyarrow/includes/libarrow_fs.pxd     |  9 +-
 python/pyarrow/io.pxi                       | 18 ++++
 python/pyarrow/lib.pxd                      |  4 +-
 python/pyarrow/tests/test_fs.py             | 38 ++++++--
 35 files changed, 451 insertions(+), 93 deletions(-)

diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 98dc05731b9..6b94d6118c2 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -232,6 +232,16 @@ Future<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFileAsync(
       [info](std::shared_ptr<FileSystem> self) { return self->OpenInputFile(info); });
 }
 
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenOutputStream(
+    const std::string& path) {
+  return OpenOutputStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenAppendStream(
+    const std::string& path) {
+  return OpenAppendStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
 //////////////////////////////////////////////////////////////////////////
 // SubTreeFileSystem implementation
 
@@ -447,17 +457,17 @@ Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAs
 }
 
 Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto s = path;
   RETURN_NOT_OK(PrependBaseNonEmpty(&s));
-  return base_fs_->OpenOutputStream(s);
+  return base_fs_->OpenOutputStream(s, metadata);
 }
 
 Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto s = path;
   RETURN_NOT_OK(PrependBaseNonEmpty(&s));
-  return base_fs_->OpenAppendStream(s);
+  return base_fs_->OpenAppendStream(s, metadata);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -555,16 +565,16 @@ Result<std::shared_ptr<io::RandomAccessFile>> SlowFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   latencies_->Sleep();
   // XXX Should we have a SlowOutputStream that waits on Flush() and Close()?
-  return base_fs_->OpenOutputStream(path);
+  return base_fs_->OpenOutputStream(path, metadata);
 }
 
 Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   latencies_->Sleep();
-  return base_fs_->OpenAppendStream(path);
+  return base_fs_->OpenAppendStream(path, metadata);
 }
 
 Status CopyFiles(const std::vector<FileLocator>& sources,
@@ -582,9 +592,10 @@ Status CopyFiles(const std::vector<FileLocator>& sources,
 
     ARROW_ASSIGN_OR_RAISE(auto source,
                           sources[i].filesystem->OpenInputStream(sources[i].path));
+    ARROW_ASSIGN_OR_RAISE(const auto metadata, source->ReadMetadata());
 
     ARROW_ASSIGN_OR_RAISE(auto destination, destinations[i].filesystem->OpenOutputStream(
-                                                destinations[i].path));
+                                                destinations[i].path, metadata));
     RETURN_NOT_OK(internal::CopyStream(source, destination, chunk_size, io_context));
     return destination->Close();
   };
diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h
index 2fc58364407..c739471c725 100644
--- a/cpp/src/arrow/filesystem/filesystem.h
+++ b/cpp/src/arrow/filesystem/filesystem.h
@@ -283,13 +283,17 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   ///
   /// If the target already exists, existing data is truncated.
   virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) = 0;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
 
   /// Open an output stream for appending.
   ///
   /// If the target doesn't exist, a new empty file is created.
   virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) = 0;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
 
  protected:
   explicit FileSystem(const io::IOContext& io_context = io::default_io_context())
@@ -364,9 +368,11 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
       const FileInfo& info) override;
 
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
  protected:
   SubTreeFileSystem() {}
@@ -420,9 +426,11 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const FileInfo& info) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
  protected:
   std::shared_ptr<FileSystem> base_fs_;
diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index 8df84ff91e6..44889356b1f 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -28,6 +28,7 @@
 #include "arrow/filesystem/test_util.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 namespace fs {
@@ -278,6 +279,8 @@ class TestMockFSGeneric : public ::testing::Test, public GenericFileSystemTest {
  protected:
   std::shared_ptr<FileSystem> GetEmptyFileSystem() override { return fs_; }
 
+  bool have_file_metadata() const override { return true; }
+
   TimePoint time_;
   std::shared_ptr<FileSystem> fs_;
 };
@@ -456,6 +459,18 @@ TEST_F(TestMockFS, OpenOutputStream) {
   ASSERT_OK(stream->Close());
   CheckDirs({});
   CheckFiles({{"ab", time_, ""}});
+
+  // With metadata
+  auto metadata = KeyValueMetadata::Make({"some key"}, {"some value"});
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("cd", metadata));
+  ASSERT_OK(WriteString(stream.get(), "data"));
+  ASSERT_OK(stream->Close());
+  CheckFiles({{"ab", time_, ""}, {"cd", time_, "data"}});
+
+  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream("cd"));
+  ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+  ASSERT_NE(got_metadata, nullptr);
+  ASSERT_TRUE(got_metadata->Equals(*metadata));
 }
 
 TEST_F(TestMockFS, OpenAppendStream) {
diff --git a/cpp/src/arrow/filesystem/hdfs.cc b/cpp/src/arrow/filesystem/hdfs.cc
index 6ac81d01275..77433172f54 100644
--- a/cpp/src/arrow/filesystem/hdfs.cc
+++ b/cpp/src/arrow/filesystem/hdfs.cc
@@ -471,12 +471,12 @@ Result<std::shared_ptr<io::RandomAccessFile>> HadoopFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> HadoopFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   return impl_->OpenOutputStream(path);
 }
 
 Result<std::shared_ptr<io::OutputStream>> HadoopFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   return impl_->OpenAppendStream(path);
 }
 
diff --git a/cpp/src/arrow/filesystem/hdfs.h b/cpp/src/arrow/filesystem/hdfs.h
index 72cb469b79d..bc72e1cdc92 100644
--- a/cpp/src/arrow/filesystem/hdfs.h
+++ b/cpp/src/arrow/filesystem/hdfs.h
@@ -92,9 +92,11 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   /// Create a HdfsFileSystem instance from the given options.
   static Result<std::shared_ptr<HadoopFileSystem>> Make(
diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index 490bacea413..775fd746aa6 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -431,14 +431,14 @@ Result<std::shared_ptr<io::OutputStream>> OpenOutputStreamGeneric(const std::str
 }  // namespace
 
 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   bool truncate = true;
   bool append = false;
   return OpenOutputStreamGeneric(path, truncate, append);
 }
 
 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   bool truncate = false;
   bool append = true;
   return OpenOutputStreamGeneric(path, truncate, append);
diff --git a/cpp/src/arrow/filesystem/localfs.h b/cpp/src/arrow/filesystem/localfs.h
index d660dd36a5d..f8e77aee591 100644
--- a/cpp/src/arrow/filesystem/localfs.h
+++ b/cpp/src/arrow/filesystem/localfs.h
@@ -91,9 +91,11 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
  protected:
   LocalFileSystemOptions options_;
diff --git a/cpp/src/arrow/filesystem/mockfs.cc b/cpp/src/arrow/filesystem/mockfs.cc
index e1ac05ced54..f2d2f87263e 100644
--- a/cpp/src/arrow/filesystem/mockfs.cc
+++ b/cpp/src/arrow/filesystem/mockfs.cc
@@ -53,6 +53,7 @@ struct File {
   TimePoint mtime;
   std::string name;
   std::shared_ptr<Buffer> data;
+  std::shared_ptr<const KeyValueMetadata> metadata;
 
   File(TimePoint mtime, std::string name) : mtime(mtime), name(std::move(name)) {}
 
@@ -232,6 +233,19 @@ class MockFSOutputStream : public io::OutputStream {
   bool closed_;
 };
 
+class MockFSInputStream : public io::BufferReader {
+ public:
+  explicit MockFSInputStream(const File& file)
+      : io::BufferReader(file.data), metadata_(file.metadata) {}
+
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override {
+    return metadata_;
+  }
+
+ protected:
+  std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
 }  // namespace
 
 std::ostream& operator<<(std::ostream& os, const MockDirInfo& di) {
@@ -358,8 +372,9 @@ class MockFileSystem::Impl {
     }
   }
 
-  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path,
-                                                             bool append) {
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path, bool append,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) {
     auto parts = SplitAbstractPath(path);
     RETURN_NOT_OK(ValidateAbstractPathParts(parts));
 
@@ -381,6 +396,7 @@ class MockFileSystem::Impl {
     } else {
       return NotAFile(path);
     }
+    file->metadata = metadata;
     auto ptr = std::make_shared<MockFSOutputStream>(file, pool);
     if (append && file->data) {
       RETURN_NOT_OK(ptr->Write(file->data->data(), file->data->size()));
@@ -399,12 +415,7 @@ class MockFileSystem::Impl {
     if (!entry->is_file()) {
       return NotAFile(path);
     }
-    const auto& file = entry->as_file();
-    if (file.data) {
-      return std::make_shared<io::BufferReader>(file.data);
-    } else {
-      return std::make_shared<io::BufferReader>("");
-    }
+    return std::make_shared<MockFSInputStream>(entry->as_file());
   }
 };
 
@@ -687,17 +698,17 @@ Result<std::shared_ptr<io::RandomAccessFile>> MockFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto guard = impl_->lock_guard();
 
-  return impl_->OpenOutputStream(path, false /* append */);
+  return impl_->OpenOutputStream(path, /*append=*/false, metadata);
 }
 
 Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   auto guard = impl_->lock_guard();
 
-  return impl_->OpenOutputStream(path, true /* append */);
+  return impl_->OpenOutputStream(path, /*append=*/true, metadata);
 }
 
 std::vector<MockDirInfo> MockFileSystem::AllDirs() {
diff --git a/cpp/src/arrow/filesystem/mockfs.h b/cpp/src/arrow/filesystem/mockfs.h
index af0a327e263..378f30d295d 100644
--- a/cpp/src/arrow/filesystem/mockfs.h
+++ b/cpp/src/arrow/filesystem/mockfs.h
@@ -90,9 +90,11 @@ class ARROW_EXPORT MockFileSystem : public FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   // Contents-dumping helpers to ease testing.
   // Output is lexicographically-ordered by full path.
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index c22571aaa56..fe9036823be 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -78,6 +78,7 @@
 #include "arrow/util/atomic_shared_ptr.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
 #include "arrow/util/task_group.h"
@@ -684,6 +685,80 @@ Result<S3Model::GetObjectResult> GetObjectRange(Aws::S3::S3Client* client,
   return OutcomeToResult(client->GetObject(req));
 }
 
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& result) {
+  auto md = std::make_shared<KeyValueMetadata>();
+
+  auto push = [&](std::string k, const Aws::String& v) {
+    if (!v.empty()) {
+      md->Append(std::move(k), FromAwsString(v).to_string());
+    }
+  };
+  auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) {
+    if (v != Aws::Utils::DateTime(0.0)) {
+      push(std::move(k), v.ToGmtString(Aws::Utils::DateFormat::ISO_8601));
+    }
+  };
+
+  md->Append("Content-Length", std::to_string(result.GetContentLength()));
+  push("Cache-Control", result.GetCacheControl());
+  push("Content-Type", result.GetContentType());
+  push("Content-Language", result.GetContentLanguage());
+  push("ETag", result.GetETag());
+  push("VersionId", result.GetVersionId());
+  push_datetime("Last-Modified", result.GetLastModified());
+  push_datetime("Expires", result.GetExpires());
+  return md;
+}
+
+template <typename ObjectRequest>
+struct ObjectMetadataSetter {
+  using Setter = std::function<Status(const std::string& value, ObjectRequest* req)>;
+
+  static std::unordered_map<std::string, Setter> GetSetters() {
+    return {{"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)},
+            {"Content-Type", StringSetter(&ObjectRequest::SetContentType)},
+            {"Content-Language", StringSetter(&ObjectRequest::SetContentLanguage)},
+            {"Expires", DateTimeSetter(&ObjectRequest::SetExpires)}};
+  }
+
+ private:
+  static Setter StringSetter(void (ObjectRequest::*req_method)(Aws::String&&)) {
+    return [req_method](const std::string& v, ObjectRequest* req) {
+      (req->*req_method)(ToAwsString(v));
+      return Status::OK();
+    };
+  }
+
+  static Setter DateTimeSetter(
+      void (ObjectRequest::*req_method)(Aws::Utils::DateTime&&)) {
+    return [req_method](const std::string& v, ObjectRequest* req) {
+      (req->*req_method)(
+          Aws::Utils::DateTime(v.data(), Aws::Utils::DateFormat::ISO_8601));
+      return Status::OK();
+    };
+  }
+};
+
+template <typename ObjectRequest>
+Status SetObjectMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata,
+                         ObjectRequest* req) {
+  static auto setters = ObjectMetadataSetter<ObjectRequest>::GetSetters();
+
+  if (metadata) {
+    const auto& keys = metadata->keys();
+    const auto& values = metadata->values();
+
+    for (size_t i = 0; i < keys.size(); ++i) {
+      auto it = setters.find(keys[i]);
+      if (it != setters.end()) {
+        RETURN_NOT_OK(it->second(values[i], req));
+      }
+    }
+  }
+  return Status::OK();
+}
+
 // A RandomAccessFile that reads from a S3 object
 class ObjectInputFile final : public io::RandomAccessFile {
  public:
@@ -720,6 +795,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
     }
     content_length_ = outcome.GetResult().GetContentLength();
     DCHECK_GE(content_length_, 0);
+    metadata_ = GetObjectMetadata(outcome.GetResult());
     return Status::OK();
   }
 
@@ -742,6 +818,15 @@ class ObjectInputFile final : public io::RandomAccessFile {
 
   // RandomAccessFile APIs
 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override {
+    return metadata_;
+  }
+
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const io::IOContext& io_context) override {
+    return metadata_;
+  }
+
   Status Close() override {
     client_ = nullptr;
     closed_ = true;
@@ -825,6 +910,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
   bool closed_ = false;
   int64_t pos_ = 0;
   int64_t content_length_ = kNoSize;
+  std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
 // Minimum size for each part of a multipart upload, except for the last part.
@@ -841,10 +927,12 @@ class ObjectOutputStream final : public io::OutputStream {
  public:
   ObjectOutputStream(std::shared_ptr<Aws::S3::S3Client> client,
                      const io::IOContext& io_context, const S3Path& path,
-                     const S3Options& options)
+                     const S3Options& options,
+                     const std::shared_ptr<const KeyValueMetadata>& metadata)
       : client_(std::move(client)),
         io_context_(io_context),
         path_(path),
+        metadata_(metadata),
         background_writes_(options.background_writes) {}
 
   ~ObjectOutputStream() override {
@@ -858,6 +946,7 @@ class ObjectOutputStream final : public io::OutputStream {
     S3Model::CreateMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
+    RETURN_NOT_OK(SetObjectMetadata(metadata_, &req));
 
     auto outcome = client_->CreateMultipartUpload(req);
     if (!outcome.IsSuccess()) {
@@ -1127,6 +1216,7 @@ class ObjectOutputStream final : public io::OutputStream {
   std::shared_ptr<Aws::S3::S3Client> client_;
   const io::IOContext io_context_;
   const S3Path path_;
+  const std::shared_ptr<const KeyValueMetadata> metadata_;
   const bool background_writes_;
 
   Aws::String upload_id_;
@@ -2106,18 +2196,18 @@ Result<std::shared_ptr<io::RandomAccessFile>> S3FileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> S3FileSystem::OpenOutputStream(
-    const std::string& s) {
+    const std::string& s, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   ARROW_ASSIGN_OR_RAISE(auto path, S3Path::FromString(s));
   RETURN_NOT_OK(ValidateFilePath(path));
 
   auto ptr = std::make_shared<ObjectOutputStream>(impl_->client_, io_context(), path,
-                                                  impl_->options());
+                                                  impl_->options(), metadata);
   RETURN_NOT_OK(ptr->Init());
   return ptr;
 }
 
 Result<std::shared_ptr<io::OutputStream>> S3FileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   // XXX Investigate UploadPartCopy? Does it work with source == destination?
   // https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPartCopy.html
   // (but would need to fall back to GET if the current data is < 5 MB)
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index 6e73ed436c5..4fb7b007dfd 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -214,10 +214,12 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   /// It is recommended to enable background_writes unless you prefer
   /// implementing your own background execution strategy.
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   /// Create a S3FileSystem instance from the given options.
   static Result<std::shared_ptr<S3FileSystem>> Make(
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index f5efcda5120..5ba8e237c7a 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -77,6 +77,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 
@@ -397,6 +398,7 @@ class TestS3FS : public S3TestMixin {
       ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
       req.SetKey(ToAwsString("somefile"));
       req.SetBody(std::make_shared<std::stringstream>("some data"));
+      req.SetContentType("x-arrow/test");
       ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
     }
   }
@@ -451,6 +453,17 @@ class TestS3FS : public S3TestMixin {
     ASSERT_OK(stream->Close());
     AssertObjectContents(client_.get(), "bucket", "newfile4", expected);
 
+    // Create new file with metadata
+    auto metadata = KeyValueMetadata::Make({"Content-Type", "Expires"},
+                                           {"x-arrow/test6", "2016-02-05T20:08:35Z"});
+    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile5", metadata));
+    ASSERT_OK(stream->Close());
+    ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream("bucket/newfile5"));
+    ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+    ASSERT_NE(got_metadata, nullptr);
+    ASSERT_THAT(got_metadata->sorted_pairs(),
+                testing::IsSupersetOf(metadata->sorted_pairs()));
+
     // Overwrite
     ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile1"));
     ASSERT_OK(stream->Write("overwritten data"));
@@ -465,12 +478,12 @@ class TestS3FS : public S3TestMixin {
     // Open file and then lose filesystem reference
     ASSERT_EQ(fs_.use_count(), 1);  // needed for test to work
     std::weak_ptr<S3FileSystem> weak_fs(fs_);
-    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile5"));
+    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile6"));
     fs_.reset();
     ASSERT_OK(stream->Write("some other data"));
     ASSERT_OK(stream->Close());
     ASSERT_TRUE(weak_fs.expired());
-    AssertObjectContents(client_.get(), "bucket", "newfile5", "some other data");
+    AssertObjectContents(client_.get(), "bucket", "newfile6", "some other data");
   }
 
   void TestOpenOutputStreamAbort() {
@@ -839,6 +852,19 @@ TEST_F(TestS3FS, OpenInputStream) {
   ASSERT_TRUE(weak_fs.expired());
 }
 
+TEST_F(TestS3FS, OpenInputStreamMetadata) {
+  std::shared_ptr<io::InputStream> stream;
+  std::shared_ptr<const KeyValueMetadata> metadata;
+
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream("bucket/somefile"));
+  ASSERT_FINISHES_OK_AND_ASSIGN(metadata, stream->ReadMetadataAsync());
+
+  std::vector<std::pair<std::string, std::string>> expected_kv{
+      {"Content-Length", "9"}, {"Content-Type", "x-arrow/test"}};
+  ASSERT_NE(metadata, nullptr);
+  ASSERT_THAT(metadata->sorted_pairs(), testing::IsSupersetOf(expected_kv));
+}
+
 TEST_F(TestS3FS, OpenInputFile) {
   std::shared_ptr<io::RandomAccessFile> file;
   std::shared_ptr<Buffer> buf;
@@ -959,6 +985,7 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest {
     return false;
 #endif
   }
+  bool have_file_metadata() const override { return true; }
 
   S3Options options_;
   std::shared_ptr<S3FileSystem> s3fs_;
diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index 2a40c041848..bbff33f4d32 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <chrono>
+#include <ostream>
 #include <string>
 #include <utility>
 #include <vector>
@@ -32,6 +33,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/vector.h"
 
 using ::testing::ElementsAre;
@@ -410,9 +412,9 @@ void GenericFileSystemTest::TestMoveFile(FileSystem* fs) {
 
 void GenericFileSystemTest::TestMoveDir(FileSystem* fs) {
   if (!allow_move_dir()) {
-    // XXX skip
-    return;
+    GTEST_SKIP() << "Filesystem doesn't allow moving directories";
   }
+
   ASSERT_OK(fs->CreateDir("AB/CD"));
   ASSERT_OK(fs->CreateDir("EF"));
   CreateFile(fs, "AB/abc", "abc data");
@@ -851,6 +853,24 @@ void GenericFileSystemTest::TestOpenOutputStream(FileSystem* fs) {
 
   ASSERT_RAISES(Invalid, stream->Write("x"));  // Stream is closed
 
+  // Storing metadata along file
+  auto metadata = KeyValueMetadata::Make({"Content-Type", "Content-Language"},
+                                         {"x-arrow/filesystem-test", "fr_FR"});
+  ASSERT_OK_AND_ASSIGN(stream, fs->OpenOutputStream("jkl", metadata));
+  ASSERT_OK(stream->Write("data"));
+  ASSERT_OK(stream->Close());
+  ASSERT_OK_AND_ASSIGN(auto input, fs->OpenInputStream("jkl"));
+  ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+  if (have_file_metadata()) {
+    ASSERT_NE(got_metadata, nullptr);
+    ASSERT_GE(got_metadata->size(), 2);
+    ASSERT_OK_AND_EQ("x-arrow/filesystem-test", got_metadata->Get("Content-Type"));
+  } else {
+    if (got_metadata) {
+      ASSERT_EQ(got_metadata->size(), 0);
+    }
+  }
+
   if (!allow_write_file_over_dir()) {
     // Cannot turn dir into file
     ASSERT_RAISES(IOError, fs->OpenOutputStream("CD"));
@@ -860,9 +880,9 @@ void GenericFileSystemTest::TestOpenOutputStream(FileSystem* fs) {
 
 void GenericFileSystemTest::TestOpenAppendStream(FileSystem* fs) {
   if (!allow_append_to_file()) {
-    // XXX skip
-    return;
+    GTEST_SKIP() << "Filesystem doesn't allow file appends";
   }
+
   std::shared_ptr<io::OutputStream> stream;
 
   ASSERT_OK_AND_ASSIGN(stream, fs->OpenAppendStream("abc"));
@@ -893,6 +913,8 @@ void GenericFileSystemTest::TestOpenInputStream(FileSystem* fs) {
   std::shared_ptr<io::InputStream> stream;
   std::shared_ptr<Buffer> buffer;
   ASSERT_OK_AND_ASSIGN(stream, fs->OpenInputStream("AB/abc"));
+  ASSERT_OK_AND_ASSIGN(auto metadata, stream->ReadMetadata());
+  // XXX we cannot really test anything more about metadata...
   ASSERT_OK_AND_ASSIGN(buffer, stream->Read(4));
   AssertBufferEqual(*buffer, "some");
   ASSERT_OK_AND_ASSIGN(buffer, stream->Read(6));
@@ -946,7 +968,9 @@ void GenericFileSystemTest::TestOpenInputStreamAsync(FileSystem* fs) {
 
   std::shared_ptr<io::InputStream> stream;
   std::shared_ptr<Buffer> buffer;
+  std::shared_ptr<const KeyValueMetadata> metadata;
   ASSERT_FINISHES_OK_AND_ASSIGN(stream, fs->OpenInputStreamAsync("AB/abc"));
+  ASSERT_FINISHES_OK_AND_ASSIGN(metadata, stream->ReadMetadataAsync());
   ASSERT_OK_AND_ASSIGN(buffer, stream->Read(4));
   AssertBufferEqual(*buffer, "some");
   ASSERT_OK(stream->Close());
diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h
index 79417561277..64577e1c60a 100644
--- a/cpp/src/arrow/filesystem/test_util.h
+++ b/cpp/src/arrow/filesystem/test_util.h
@@ -140,6 +140,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   virtual bool have_directory_mtimes() const { return true; }
   // - Whether some directory tree deletion tests may fail randomly
   virtual bool have_flaky_directory_tree_deletion() const { return false; }
+  // - Whether the filesystem stores some metadata alongside files
+  virtual bool have_file_metadata() const { return false; }
 
   void TestEmpty(FileSystem* fs);
   void TestNormalizePath(FileSystem* fs);
diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc
index 16b969617e5..7804c130ca1 100644
--- a/cpp/src/arrow/io/buffered.cc
+++ b/cpp/src/arrow/io/buffered.cc
@@ -476,5 +476,14 @@ Result<std::shared_ptr<Buffer>> BufferedInputStream::DoRead(int64_t nbytes) {
   return impl_->Read(nbytes);
 }
 
+Result<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadata() {
+  return impl_->raw()->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> BufferedInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  return impl_->raw()->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/buffered.h b/cpp/src/arrow/io/buffered.h
index 56c8c390f79..8116613fa4e 100644
--- a/cpp/src/arrow/io/buffered.h
+++ b/cpp/src/arrow/io/buffered.h
@@ -132,6 +132,9 @@ class ARROW_EXPORT BufferedInputStream
   // InputStream APIs
 
   bool closed() const override;
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
 
  private:
   friend InputStreamConcurrencyWrapper<BufferedInputStream>;
diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc
index 4993ae2dd37..72977f0f297 100644
--- a/cpp/src/arrow/io/compressed.cc
+++ b/cpp/src/arrow/io/compressed.cc
@@ -437,5 +437,14 @@ Result<std::shared_ptr<Buffer>> CompressedInputStream::DoRead(int64_t nbytes) {
 
 std::shared_ptr<InputStream> CompressedInputStream::raw() const { return impl_->raw(); }
 
+Result<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadata() {
+  return impl_->raw()->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> CompressedInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  return impl_->raw()->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/compressed.h b/cpp/src/arrow/io/compressed.h
index 677e45c6ff2..cd1a7f673ce 100644
--- a/cpp/src/arrow/io/compressed.h
+++ b/cpp/src/arrow/io/compressed.h
@@ -89,6 +89,9 @@ class ARROW_EXPORT CompressedInputStream
   // InputStream interface
 
   bool closed() const override;
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
 
   /// \brief Return the underlying raw input stream.
   std::shared_ptr<InputStream> raw() const;
diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc
index 7193a56e8d1..954c0f37b2d 100644
--- a/cpp/src/arrow/io/interfaces.cc
+++ b/cpp/src/arrow/io/interfaces.cc
@@ -33,6 +33,7 @@
 #include "arrow/io/util_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
@@ -41,6 +42,7 @@
 
 namespace arrow {
 
+using internal::checked_pointer_cast;
 using internal::Executor;
 using internal::TaskHints;
 using internal::ThreadPool;
@@ -105,6 +107,22 @@ Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) {
 
 bool InputStream::supports_zero_copy() const { return false; }
 
+Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() {
+  return std::shared_ptr<const KeyValueMetadata>{};
+}
+
+// Default ReadMetadataAsync() implementation: simply issue the read on the context's
+// executor
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync(
+    const IOContext& ctx) {
+  auto self = shared_from_this();
+  return DeferNotOk(internal::SubmitIO(ctx, [self] { return self->ReadMetadata(); }));
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync() {
+  return ReadMetadataAsync(io_context());
+}
+
 Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
     std::shared_ptr<InputStream> stream, int64_t block_size) {
   if (stream->closed()) {
@@ -139,10 +157,7 @@ Result<std::shared_ptr<Buffer>> RandomAccessFile::ReadAt(int64_t position,
 Future<std::shared_ptr<Buffer>> RandomAccessFile::ReadAsync(const IOContext& ctx,
                                                             int64_t position,
                                                             int64_t nbytes) {
-  auto self = shared_from_this();
-  TaskHints hints;
-  hints.io_size = nbytes;
-  hints.external_id = ctx.external_id();
+  auto self = checked_pointer_cast<RandomAccessFile>(shared_from_this());
   return DeferNotOk(internal::SubmitIO(
       ctx, [self, position, nbytes] { return self->ReadAt(position, nbytes); }));
 }
diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h
index 0afd2f236b8..e524afa99a3 100644
--- a/cpp/src/arrow/io/interfaces.h
+++ b/cpp/src/arrow/io/interfaces.h
@@ -202,7 +202,9 @@ class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable
   OutputStream() = default;
 };
 
-class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Readable {
+class ARROW_EXPORT InputStream : virtual public FileInterface,
+                                 virtual public Readable,
+                                 public std::enable_shared_from_this<InputStream> {
  public:
   /// \brief Advance or skip stream indicated number of bytes
   /// \param[in] nbytes the number to move forward
@@ -225,14 +227,23 @@ class ARROW_EXPORT InputStream : virtual public FileInterface, virtual public Re
   /// Zero copy reads imply the use of Buffer-returning Read() overloads.
   virtual bool supports_zero_copy() const;
 
+  /// \brief Read and return stream metadata
+  ///
+  /// If the stream implementation doesn't support metadata, empty metadata
+  /// is returned.  Note that it is allowed to return a null pointer rather
+  /// than an allocated empty metadata.
+  virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
+
+  /// \brief Read stream metadata asynchronously
+  virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context);
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
+
  protected:
   InputStream() = default;
 };
 
-class ARROW_EXPORT RandomAccessFile
-    : public std::enable_shared_from_this<RandomAccessFile>,
-      public InputStream,
-      public Seekable {
+class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
  public:
   /// Necessary because we hold a std::unique_ptr
   ~RandomAccessFile() override;
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 7d111183635..6495242e63b 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -263,8 +263,8 @@ void FixedSizeBufferWriter::set_memcopy_threshold(int64_t threshold) {
 
 BufferReader::BufferReader(std::shared_ptr<Buffer> buffer)
     : buffer_(std::move(buffer)),
-      data_(buffer_->data()),
-      size_(buffer_->size()),
+      data_(buffer_ ? buffer_->data() : reinterpret_cast<const uint8_t*>("")),
+      size_(buffer_ ? buffer_->size() : 0),
       position_(0),
       is_open_(true) {}
 
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index 3365674af84..bd62761c739 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -179,6 +179,14 @@ TEST(TestBufferReader, FromStrings) {
   ASSERT_EQ(0, memcmp(piece->data(), data.data() + 2, 4));
 }
 
+TEST(TestBufferReader, FromNullBuffer) {
+  std::shared_ptr<Buffer> buf;
+  BufferReader reader(buf);
+  ASSERT_OK_AND_EQ(0, reader.GetSize());
+  ASSERT_OK_AND_ASSIGN(auto piece, reader.Read(10));
+  ASSERT_EQ(0, piece->size());
+}
+
 TEST(TestBufferReader, Seeking) {
   std::string data = "data123456";
 
diff --git a/cpp/src/arrow/io/transform.cc b/cpp/src/arrow/io/transform.cc
index a0b0b33d8dd..3fdf5a7a9ba 100644
--- a/cpp/src/arrow/io/transform.cc
+++ b/cpp/src/arrow/io/transform.cc
@@ -145,5 +145,18 @@ Result<int64_t> TransformInputStream::Tell() const {
   return impl_->pos_;
 }
 
+Result<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadata() {
+  RETURN_NOT_OK(impl_->CheckClosed());
+
+  return impl_->wrapped_->ReadMetadata();
+}
+
+Future<std::shared_ptr<const KeyValueMetadata>> TransformInputStream::ReadMetadataAsync(
+    const IOContext& io_context) {
+  RETURN_NOT_OK(impl_->CheckClosed());
+
+  return impl_->wrapped_->ReadMetadataAsync(io_context);
+}
+
 }  // namespace io
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/transform.h b/cpp/src/arrow/io/transform.h
index d983b7c25b3..c117f275929 100644
--- a/cpp/src/arrow/io/transform.h
+++ b/cpp/src/arrow/io/transform.h
@@ -45,6 +45,10 @@ class ARROW_EXPORT TransformInputStream : public InputStream {
   Result<int64_t> Read(int64_t nbytes, void* out) override;
   Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
 
+  Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override;
+  Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
+      const IOContext& io_context) override;
+
   Result<int64_t> Tell() const override;
 
  protected:
diff --git a/cpp/src/arrow/python/filesystem.cc b/cpp/src/arrow/python/filesystem.cc
index 8e8e8a6e899..8c12f05a0f3 100644
--- a/cpp/src/arrow/python/filesystem.cc
+++ b/cpp/src/arrow/python/filesystem.cc
@@ -170,10 +170,10 @@ Result<std::shared_ptr<io::RandomAccessFile>> PyFileSystem::OpenInputFile(
 }
 
 Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenOutputStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   std::shared_ptr<io::OutputStream> stream;
   auto st = SafeCallIntoPython([&]() -> Status {
-    vtable_.open_output_stream(handler_.obj(), path, &stream);
+    vtable_.open_output_stream(handler_.obj(), path, metadata, &stream);
     return CheckPyError();
   });
   RETURN_NOT_OK(st);
@@ -181,10 +181,10 @@ Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenOutputStream(
 }
 
 Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenAppendStream(
-    const std::string& path) {
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
   std::shared_ptr<io::OutputStream> stream;
   auto st = SafeCallIntoPython([&]() -> Status {
-    vtable_.open_append_stream(handler_.obj(), path, &stream);
+    vtable_.open_append_stream(handler_.obj(), path, metadata, &stream);
     return CheckPyError();
   });
   RETURN_NOT_OK(st);
diff --git a/cpp/src/arrow/python/filesystem.h b/cpp/src/arrow/python/filesystem.h
index f2d9c90c817..e1235f8de5f 100644
--- a/cpp/src/arrow/python/filesystem.h
+++ b/cpp/src/arrow/python/filesystem.h
@@ -60,9 +60,11 @@ class ARROW_PYTHON_EXPORT PyFileSystemVtable {
                      std::shared_ptr<io::RandomAccessFile>* out)>
       open_input_file;
   std::function<void(PyObject*, const std::string& path,
+                     const std::shared_ptr<const KeyValueMetadata>&,
                      std::shared_ptr<io::OutputStream>* out)>
       open_output_stream;
   std::function<void(PyObject*, const std::string& path,
+                     const std::shared_ptr<const KeyValueMetadata>&,
                      std::shared_ptr<io::OutputStream>* out)>
       open_append_stream;
 
@@ -104,9 +106,11 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem {
   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
-      const std::string& path) override;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
 
   Result<std::string> NormalizePath(std::string path) override;
 
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index 4c6af29dab7..ad3b686a9bd 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -70,6 +70,11 @@ KeyValueMetadata::KeyValueMetadata(std::vector<std::string> keys,
   ARROW_CHECK_EQ(keys.size(), values.size());
 }
 
+std::shared_ptr<KeyValueMetadata> KeyValueMetadata::Make(
+    std::vector<std::string> keys, std::vector<std::string> values) {
+  return std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
+}
+
 void KeyValueMetadata::ToUnorderedMap(
     std::unordered_map<std::string, std::string>* out) const {
   DCHECK_NE(out, nullptr);
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
index d4207a53dc4..d42ab78f667 100644
--- a/cpp/src/arrow/util/key_value_metadata.h
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -39,6 +39,9 @@ class ARROW_EXPORT KeyValueMetadata {
   explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>& map);
   virtual ~KeyValueMetadata() = default;
 
+  static std::shared_ptr<KeyValueMetadata> Make(std::vector<std::string> keys,
+                                                std::vector<std::string> values);
+
   void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
   void Append(const std::string& key, const std::string& value);
 
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index d881c749c71..42e2484e9cf 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -627,7 +627,8 @@ cdef class FileSystem(_Weakrefable):
             stream, path=path, compression=compression, buffer_size=buffer_size
         )
 
-    def open_output_stream(self, path, compression='detect', buffer_size=None):
+    def open_output_stream(self, path, compression='detect',
+                           buffer_size=None, metadata=None):
         """
         Open an output stream for sequential writing.
 
@@ -646,6 +647,11 @@ cdef class FileSystem(_Weakrefable):
         buffer_size: int optional, default None
             If None or 0, no buffering will happen. Otherwise the size of the
             temporary write buffer.
+        metadata: dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
 
         Returns
         -------
@@ -655,9 +661,14 @@ cdef class FileSystem(_Weakrefable):
             c_string pathstr = _path_as_bytes(path)
             NativeFile stream = NativeFile()
             shared_ptr[COutputStream] out_handle
+            shared_ptr[const CKeyValueMetadata] c_metadata
+
+        if metadata is not None:
+            c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(metadata))
 
         with nogil:
-            out_handle = GetResultValue(self.fs.OpenOutputStream(pathstr))
+            out_handle = GetResultValue(
+                self.fs.OpenOutputStream(pathstr, c_metadata))
 
         stream.set_output_stream(out_handle)
         stream.is_writable = True
@@ -666,7 +677,8 @@ cdef class FileSystem(_Weakrefable):
             stream, path=path, compression=compression, buffer_size=buffer_size
         )
 
-    def open_append_stream(self, path, compression='detect', buffer_size=None):
+    def open_append_stream(self, path, compression='detect',
+                           buffer_size=None, metadata=None):
         """
         Open an output stream for appending.
 
@@ -685,6 +697,11 @@ cdef class FileSystem(_Weakrefable):
         buffer_size: int optional, default None
             If None or 0, no buffering will happen. Otherwise the size of the
             temporary write buffer.
+        metadata: dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
 
         Returns
         -------
@@ -694,9 +711,14 @@ cdef class FileSystem(_Weakrefable):
             c_string pathstr = _path_as_bytes(path)
             NativeFile stream = NativeFile()
             shared_ptr[COutputStream] out_handle
+            shared_ptr[const CKeyValueMetadata] c_metadata
+
+        if metadata is not None:
+            c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(metadata))
 
         with nogil:
-            out_handle = GetResultValue(self.fs.OpenAppendStream(pathstr))
+            out_handle = GetResultValue(
+                self.fs.OpenAppendStream(pathstr, c_metadata))
 
         stream.set_output_stream(out_handle)
         stream.is_writable = True
@@ -970,13 +992,13 @@ class FileSystemHandler(ABC):
         """
 
     @abstractmethod
-    def open_output_stream(self, path):
+    def open_output_stream(self, path, metadata):
         """
         Implement PyFileSystem.open_output_stream(...).
         """
 
     @abstractmethod
-    def open_append_stream(self, path):
+    def open_append_stream(self, path, metadata):
         """
         Implement PyFileSystem.open_append_stream(...).
         """
@@ -1067,17 +1089,23 @@ cdef void _cb_open_input_file(handler, const c_string& path,
                         "a PyArrow file")
     out[0] = (<NativeFile> stream).get_random_access_file()
 
-cdef void _cb_open_output_stream(handler, const c_string& path,
-                                 shared_ptr[COutputStream]* out) except *:
-    stream = handler.open_output_stream(frombytes(path))
+cdef void _cb_open_output_stream(
+        handler, const c_string& path,
+        const shared_ptr[const CKeyValueMetadata]& metadata,
+        shared_ptr[COutputStream]* out) except *:
+    stream = handler.open_output_stream(
+        frombytes(path), pyarrow_wrap_metadata(metadata))
     if not isinstance(stream, NativeFile):
         raise TypeError("open_output_stream should have returned "
                         "a PyArrow file")
     out[0] = (<NativeFile> stream).get_output_stream()
 
-cdef void _cb_open_append_stream(handler, const c_string& path,
-                                 shared_ptr[COutputStream]* out) except *:
-    stream = handler.open_append_stream(frombytes(path))
+cdef void _cb_open_append_stream(
+        handler, const c_string& path,
+        const shared_ptr[const CKeyValueMetadata]& metadata,
+        shared_ptr[COutputStream]* out) except *:
+    stream = handler.open_append_stream(
+        frombytes(path), pyarrow_wrap_metadata(metadata))
     if not isinstance(stream, NativeFile):
         raise TypeError("open_append_stream should have returned "
                         "a PyArrow file")
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 7e63c01a57e..fe505530751 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -299,6 +299,8 @@ def copy_file(self, src, dest):
         # instead of a file
         self.fs.copy(src, dest)
 
+    # TODO can we read/pass metadata (e.g. Content-Type) in the methods below?
+
     def open_input_stream(self, path):
         from pyarrow import PythonFile
 
@@ -315,12 +317,12 @@ def open_input_file(self, path):
 
         return PythonFile(self.fs.open(path, mode="rb"), mode="r")
 
-    def open_output_stream(self, path):
+    def open_output_stream(self, path, metadata):
         from pyarrow import PythonFile
 
         return PythonFile(self.fs.open(path, mode="wb"), mode="w")
 
-    def open_append_stream(self, path):
+    def open_append_stream(self, path, metadata):
         from pyarrow import PythonFile
 
         return PythonFile(self.fs.open(path, mode="ab"), mode="w")
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 3912dac9fbf..5afa806fa84 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1196,7 +1196,7 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
 
     cdef cppclass CInputStream" arrow::io::InputStream"(FileInterface,
                                                         Readable):
-        pass
+        CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()
 
     cdef cppclass CRandomAccessFile" arrow::io::RandomAccessFile"(CInputStream,
                                                                   Seekable):
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index ee1b8a70aef..baa5ecad8db 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -73,9 +73,9 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         CResult[shared_ptr[CRandomAccessFile]] OpenInputFile(
             const c_string& path)
         CResult[shared_ptr[COutputStream]] OpenOutputStream(
-            const c_string& path)
+            const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
         CResult[shared_ptr[COutputStream]] OpenAppendStream(
-            const c_string& path)
+            const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
         c_bool Equals(const CFileSystem& other)
         c_bool Equals(shared_ptr[CFileSystem] other)
 
@@ -234,8 +234,9 @@ ctypedef void CallbackOpenInputStream(object, const c_string&,
                                       shared_ptr[CInputStream]*)
 ctypedef void CallbackOpenInputFile(object, const c_string&,
                                     shared_ptr[CRandomAccessFile]*)
-ctypedef void CallbackOpenOutputStream(object, const c_string&,
-                                       shared_ptr[COutputStream]*)
+ctypedef void CallbackOpenOutputStream(
+    object, const c_string&, const shared_ptr[const CKeyValueMetadata]&,
+    shared_ptr[COutputStream]*)
 ctypedef void CallbackNormalizePath(object, const c_string&, c_string*)
 
 cdef extern from "arrow/python/filesystem.h" namespace "arrow::py::fs" nogil:
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 63ce5860845..494566437e0 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -230,6 +230,24 @@ cdef class NativeFile(_Weakrefable):
 
         return size
 
+    def metadata(self):
+        """
+        Return file metadata
+        """
+        cdef:
+            shared_ptr[const CKeyValueMetadata] c_metadata
+
+        handle = self.get_input_stream()
+        with nogil:
+            c_metadata = GetResultValue(handle.get().ReadMetadata())
+
+        metadata = {}
+        if c_metadata.get() != nullptr:
+            for i in range(c_metadata.get().size()):
+                metadata[frombytes(c_metadata.get().key(i))] = \
+                    c_metadata.get().value(i)
+        return metadata
+
     def tell(self):
         """
         Return current stream position
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 8880179e7c4..1959519c49d 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -528,8 +528,8 @@ cpdef DataType ensure_type(object type, bint allow_none=*)
 
 # Exceptions may be raised when converting dict values, so need to
 # check exception state on return
-cdef shared_ptr[CKeyValueMetadata] pyarrow_unwrap_metadata(object meta) \
-    except *
+cdef shared_ptr[const CKeyValueMetadata] pyarrow_unwrap_metadata(
+    object meta) except *
 cdef object pyarrow_wrap_metadata(
     const shared_ptr[const CKeyValueMetadata]& meta)
 
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index acdff253917..ae687a3dbcc 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -128,12 +128,12 @@ def open_input_file(self, path):
         data = "{0}:input_file".format(path).encode('utf8')
         return pa.BufferReader(data)
 
-    def open_output_stream(self, path):
+    def open_output_stream(self, path, metadata):
         if "notfound" in path:
             raise FileNotFoundError(path)
         return pa.BufferOutputStream()
 
-    def open_append_stream(self, path):
+    def open_append_stream(self, path, metadata):
         if "notfound" in path:
             raise FileNotFoundError(path)
         return pa.BufferOutputStream()
@@ -193,11 +193,11 @@ def open_input_stream(self, path):
     def open_input_file(self, path):
         return self._fs.open_input_file(path)
 
-    def open_output_stream(self, path):
-        return self._fs.open_output_stream(path)
+    def open_output_stream(self, path, metadata):
+        return self._fs.open_output_stream(path, metadata=metadata)
 
-    def open_append_stream(self, path):
-        return self._fs.open_append_stream(path)
+    def open_append_stream(self, path, metadata):
+        return self._fs.open_append_stream(path, metadata=metadata)
 
 
 @pytest.fixture
@@ -967,6 +967,25 @@ def test_open_append_stream(fs, pathfn, compression, buffer_size, compressor,
                                   buffer_size=buffer_size)
 
 
+def test_open_output_stream_metadata(fs, pathfn):
+    p = pathfn('open-output-stream-metadata')
+    metadata = {'Content-Type': 'x-pyarrow/test'}
+
+    data = b'some data'
+    with fs.open_output_stream(p, metadata=metadata) as f:
+        f.write(data)
+
+    with fs.open_input_stream(p) as f:
+        assert f.read() == data
+        got_metadata = f.metadata()
+
+    if fs.type_name == 's3' or 'mock' in fs.type_name:
+        for k, v in metadata.items():
+            assert got_metadata[k] == v.encode()
+    else:
+        assert got_metadata == {}
+
+
 def test_localfs_options():
     # LocalFileSystem instantiation
     LocalFileSystem(use_mmap=False)
@@ -1493,6 +1512,13 @@ def test_s3_real_aws():
     fs = S3FileSystem(anonymous=True, region='us-east-2')
     entries = fs.get_file_info(FileSelector('ursa-labs-taxi-data'))
     assert len(entries) > 0
+    with fs.open_input_stream('ursa-labs-taxi-data/2019/06/data.parquet') as f:
+        md = f.metadata()
+        assert 'Content-Type' in md
+        assert md['Last-Modified'] == b'2020-01-17T16:26:28Z'
+        # For some reason, the header value is quoted
+        # (both with AWS and Minio)
+        assert md['ETag'] == b'"f1efd5d76cb82861e1542117bfa52b90-8"'
 
 
 @pytest.mark.s3

From 105edc2970dc8233c0e419259ee267fb885fc0cb Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 1 Jun 2021 18:21:01 +0200
Subject: [PATCH 320/719] ARROW-12598: [C++][Dataset] Speed up CountRows for
 CSV

This does not implement a fast path for CSV. However, it does configure the CSV reader to not actually deserialize any data, resulting in a large gain. When scanning 85 million rows of the NYC Taxi dataset, scan time dropped from ~7 seconds to 2.

This also sneaks in an implementation of the fast path for InMemoryFragment.

Closes #10270 from lidavidm/arrow-12598

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/reader.cc            | 74 ++++++++++++++++++++++++++
 cpp/src/arrow/csv/reader.h             |  8 +++
 cpp/src/arrow/csv/reader_test.cc       | 50 +++++++++++++++++
 cpp/src/arrow/dataset/dataset.cc       | 14 ++++-
 cpp/src/arrow/dataset/dataset.h        |  7 ++-
 cpp/src/arrow/dataset/file_base.cc     |  6 +--
 cpp/src/arrow/dataset/file_base.h      |  5 +-
 cpp/src/arrow/dataset/file_csv.cc      | 50 ++++++++++++-----
 cpp/src/arrow/dataset/file_csv.h       |  4 ++
 cpp/src/arrow/dataset/file_csv_test.cc | 55 +++++++++++--------
 cpp/src/arrow/dataset/file_ipc.cc      |  2 +-
 cpp/src/arrow/dataset/file_ipc.h       |  2 +-
 cpp/src/arrow/dataset/file_parquet.cc  |  2 +-
 cpp/src/arrow/dataset/file_parquet.h   |  2 +-
 cpp/src/arrow/dataset/scanner_test.cc  | 12 ++---
 r/tests/testthat/test-dataset.R        |  5 +-
 16 files changed, 246 insertions(+), 52 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 7a12cfea943..b80a8fffe80 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -1027,6 +1027,70 @@ Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
   return reader->Init();
 }
 
+/////////////////////////////////////////////////////////////////////////
+// Row count implementation
+
+class CSVRowCounter : public ReaderMixin,
+                      public std::enable_shared_from_this<CSVRowCounter> {
+ public:
+  CSVRowCounter(io::IOContext io_context, Executor* cpu_executor,
+                std::shared_ptr<io::InputStream> input, const ReadOptions& read_options,
+                const ParseOptions& parse_options)
+      : ReaderMixin(io_context, std::move(input), read_options, parse_options,
+                    ConvertOptions::Defaults(), /*count_rows=*/true),
+        cpu_executor_(cpu_executor),
+        row_count_(0) {}
+
+  Future<int64_t> Count() {
+    auto self = shared_from_this();
+    return Init(self).Then([self]() { return self->DoCount(self); });
+  }
+
+ private:
+  Future<> Init(const std::shared_ptr<CSVRowCounter>& self) {
+    ARROW_ASSIGN_OR_RAISE(auto istream_it,
+                          io::MakeInputStreamIterator(input_, read_options_.block_size));
+    // TODO Consider exposing readahead as a read option (ARROW-12090)
+    ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
+                                                              io_context_.executor()));
+    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
+    auto buffer_generator = CSVBufferIterator::MakeAsync(std::move(transferred_it));
+
+    return buffer_generator().Then([self, buffer_generator](
+                                       std::shared_ptr<Buffer> first_buffer) {
+      if (!first_buffer) {
+        return Status::Invalid("Empty CSV file");
+      }
+      RETURN_NOT_OK(self->ProcessHeader(first_buffer, &first_buffer));
+      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
+          buffer_generator, MakeChunker(self->parse_options_), std::move(first_buffer));
+      return Status::OK();
+    });
+  }
+
+  Future<int64_t> DoCount(const std::shared_ptr<CSVRowCounter>& self) {
+    // We must return a value instead of Status/Future<> to work with MakeMappedGenerator,
+    // and we must use a type with a valid end value to work with IterationEnd.
+    std::function<Result<util::optional<int64_t>>(const CSVBlock&)> count_cb =
+        [self](const CSVBlock& maybe_block) -> Result<util::optional<int64_t>> {
+      ARROW_ASSIGN_OR_RAISE(
+          auto parser,
+          self->Parse(maybe_block.partial, maybe_block.completion, maybe_block.buffer,
+                      maybe_block.block_index, maybe_block.is_final));
+      RETURN_NOT_OK(maybe_block.consume_bytes(parser.parsed_bytes));
+      self->row_count_ += parser.parser->num_rows();
+      return parser.parser->num_rows();
+    };
+    auto count_gen = MakeMappedGenerator(block_generator_, std::move(count_cb));
+    return DiscardAllFromAsyncGenerator(count_gen).Then(
+        [self]() { return self->row_count_; });
+  }
+
+  Executor* cpu_executor_;
+  AsyncGenerator<CSVBlock> block_generator_;
+  int64_t row_count_;
+};
+
 }  // namespace
 
 /////////////////////////////////////////////////////////////////////////
@@ -1081,6 +1145,16 @@ Future<std::shared_ptr<StreamingReader>> StreamingReader::MakeAsync(
                              parse_options, convert_options);
 }
 
+Future<int64_t> CountRowsAsync(io::IOContext io_context,
+                               std::shared_ptr<io::InputStream> input,
+                               internal::Executor* cpu_executor,
+                               const ReadOptions& read_options,
+                               const ParseOptions& parse_options) {
+  auto counter = std::make_shared<CSVRowCounter>(
+      io_context, cpu_executor, std::move(input), read_options, parse_options);
+  return counter->Count();
+}
+
 }  // namespace csv
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index 253911bb4b9..5314104f048 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -96,5 +96,13 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
       const ConvertOptions& convert_options);
 };
 
+/// \brief Count the logical rows of data in a CSV file (i.e. the
+/// number of rows you would get if you read the file into a table).
+ARROW_EXPORT
+Future<int64_t> CountRowsAsync(io::IOContext io_context,
+                               std::shared_ptr<io::InputStream> input,
+                               internal::Executor* cpu_executor, const ReadOptions&,
+                               const ParseOptions&);
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 4f6c175996c..4d4f04964bd 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -216,5 +216,55 @@ TEST(StreamingReaderTests, NestedParallelism) {
   TestNestedParallelism(thread_pool, table_factory);
 }
 
+TEST(CountRowsAsync, Basics) {
+  constexpr int NROWS = 4096;
+  ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(NROWS));
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(
+        NROWS, CountRowsAsync(io::default_io_context(), reader,
+                              internal::GetCpuThreadPool(), read_options, parse_options));
+  }
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    read_options.skip_rows = 20;
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(NROWS - 20, CountRowsAsync(io::default_io_context(), reader,
+                                                         internal::GetCpuThreadPool(),
+                                                         read_options, parse_options));
+  }
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    read_options.autogenerate_column_names = true;
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(NROWS + 1, CountRowsAsync(io::default_io_context(), reader,
+                                                        internal::GetCpuThreadPool(),
+                                                        read_options, parse_options));
+  }
+  {
+    auto reader = std::make_shared<io::BufferReader>(table_buffer);
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 1024;
+    auto parse_options = ParseOptions::Defaults();
+    ASSERT_FINISHES_OK_AND_EQ(
+        NROWS, CountRowsAsync(io::default_io_context(), reader,
+                              internal::GetCpuThreadPool(), read_options, parse_options));
+  }
+}
+
+TEST(CountRowsAsync, Errors) {
+  ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(4096, /*valid=*/false));
+  auto reader = std::make_shared<io::BufferReader>(table_buffer);
+  auto read_options = ReadOptions::Defaults();
+  auto parse_options = ParseOptions::Defaults();
+  ASSERT_FINISHES_AND_RAISES(
+      Invalid, CountRowsAsync(io::default_io_context(), reader,
+                              internal::GetCpuThreadPool(), read_options, parse_options));
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index 4305420fd74..841b792ee34 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -53,7 +53,7 @@ Result<std::shared_ptr<Schema>> Fragment::ReadPhysicalSchema() {
 }
 
 Future<util::optional<int64_t>> Fragment::CountRows(compute::Expression,
-                                                    std::shared_ptr<ScanOptions>) {
+                                                    const std::shared_ptr<ScanOptions>&) {
   return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
 }
 
@@ -149,6 +149,18 @@ Result<RecordBatchGenerator> InMemoryFragment::ScanBatchesAsync(
                    options->batch_size);
 }
 
+Future<util::optional<int64_t>> InMemoryFragment::CountRows(
+    compute::Expression predicate, const std::shared_ptr<ScanOptions>& options) {
+  if (ExpressionHasFieldRefs(predicate)) {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  int64_t total = 0;
+  for (const auto& batch : record_batches_) {
+    total += batch->num_rows();
+  }
+  return Future<util::optional<int64_t>>::MakeFinished(total);
+}
+
 Dataset::Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_expression)
     : schema_(std::move(schema)),
       partition_expression_(std::move(partition_expression)) {}
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index a58e1c425fe..d2cba730252 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -76,8 +76,8 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
   ///
   /// If this is not possible, resolve with an empty optional. The fragment can perform
   /// I/O (e.g. to read metadata) before it deciding whether it can satisfy the request.
-  virtual Future<util::optional<int64_t>> CountRows(compute::Expression predicate,
-                                                    std::shared_ptr<ScanOptions> options);
+  virtual Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>& options);
 
   virtual std::string type_name() const = 0;
   virtual std::string ToString() const { return type_name(); }
@@ -133,6 +133,9 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override;
   Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options) override;
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
 
   std::string type_name() const override { return "in-memory"; }
 
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 56b97414602..b1cbd63ec61 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -86,7 +86,7 @@ Result<std::shared_ptr<io::InputStream>> FileSource::OpenCompressed(
 
 Future<util::optional<int64_t>> FileFormat::CountRows(
     const std::shared_ptr<FileFragment>&, compute::Expression,
-    std::shared_ptr<ScanOptions>) {
+    const std::shared_ptr<ScanOptions>&) {
   return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
 }
 
@@ -176,14 +176,14 @@ Result<RecordBatchGenerator> FileFragment::ScanBatchesAsync(
 }
 
 Future<util::optional<int64_t>> FileFragment::CountRows(
-    compute::Expression predicate, std::shared_ptr<ScanOptions> options) {
+    compute::Expression predicate, const std::shared_ptr<ScanOptions>& options) {
   ARROW_ASSIGN_OR_RAISE(predicate, compute::SimplifyWithGuarantee(std::move(predicate),
                                                                   partition_expression_));
   if (!predicate.IsSatisfiable()) {
     return Future<util::optional<int64_t>>::MakeFinished(0);
   }
   auto self = internal::checked_pointer_cast<FileFragment>(shared_from_this());
-  return format()->CountRows(self, std::move(predicate), std::move(options));
+  return format()->CountRows(self, std::move(predicate), options);
 }
 
 struct FileSystemDataset::FragmentSubtrees {
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 43085a2d8de..dd47b1226f4 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -157,7 +157,7 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
       const std::shared_ptr<FileFragment>& file) const;
   virtual Future<util::optional<int64_t>> CountRows(
       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
-      std::shared_ptr<ScanOptions> options);
+      const std::shared_ptr<ScanOptions>& options);
 
   /// \brief Open a fragment
   virtual Result<std::shared_ptr<FileFragment>> MakeFragment(
@@ -188,7 +188,8 @@ class ARROW_DS_EXPORT FileFragment : public Fragment {
   Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options) override;
   Future<util::optional<int64_t>> CountRows(
-      compute::Expression predicate, std::shared_ptr<ScanOptions> options) override;
+      compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
 
   std::string type_name() const override { return format_->type_name(); }
   std::string ToString() const override { return source_.path(); };
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 5a16a52c544..fd96fe8f50e 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -48,18 +48,29 @@ using internal::SerialExecutor;
 using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
 
 Result<std::unordered_set<std::string>> GetColumnNames(
-    const csv::ParseOptions& parse_options, util::string_view first_block,
-    MemoryPool* pool) {
+    const csv::ReadOptions& read_options, const csv::ParseOptions& parse_options,
+    util::string_view first_block, MemoryPool* pool) {
+  if (!read_options.column_names.empty()) {
+    std::unordered_set<std::string> column_names;
+    for (const auto& s : read_options.column_names) {
+      if (!column_names.emplace(s).second) {
+        return Status::Invalid("CSV file contained multiple columns named ", s);
+      }
+    }
+    return column_names;
+  }
+
   uint32_t parsed_size = 0;
+  int32_t max_num_rows = read_options.skip_rows + 1;
   csv::BlockParser parser(pool, parse_options, /*num_cols=*/-1, /*first_row=*/1,
-                          /*max_num_rows=*/1);
+                          max_num_rows);
 
   RETURN_NOT_OK(parser.Parse(util::string_view{first_block}, &parsed_size));
 
-  if (parser.num_rows() != 1) {
-    return Status::Invalid(
-        "Could not read first row from CSV file, either "
-        "file is truncated or header is larger than block size");
+  if (parser.num_rows() != max_num_rows) {
+    return Status::Invalid("Could not read first ", max_num_rows,
+                           " rows from CSV file, either file is truncated or"
+                           " header is larger than block size");
   }
 
   if (parser.num_cols() == 0) {
@@ -83,15 +94,15 @@ Result<std::unordered_set<std::string>> GetColumnNames(
 static inline Result<csv::ConvertOptions> GetConvertOptions(
     const CsvFileFormat& format, const ScanOptions* scan_options,
     const util::string_view first_block) {
-  ARROW_ASSIGN_OR_RAISE(
-      auto column_names,
-      GetColumnNames(format.parse_options, first_block,
-                     scan_options ? scan_options->pool : default_memory_pool()));
-
   ARROW_ASSIGN_OR_RAISE(
       auto csv_scan_options,
       GetFragmentScanOptions<CsvFragmentScanOptions>(
           kCsvTypeName, scan_options, format.default_fragment_scan_options));
+  ARROW_ASSIGN_OR_RAISE(
+      auto column_names,
+      GetColumnNames(csv_scan_options->read_options, format.parse_options, first_block,
+                     scan_options ? scan_options->pool : default_memory_pool()));
+
   auto convert_options = csv_scan_options->convert_options;
 
   if (!scan_options) return convert_options;
@@ -257,5 +268,20 @@ Result<RecordBatchGenerator> CsvFileFormat::ScanBatchesAsync(
   return GeneratorFromReader(std::move(reader_fut));
 }
 
+Future<util::optional<int64_t>> CsvFileFormat::CountRows(
+    const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+    const std::shared_ptr<ScanOptions>& options) {
+  if (ExpressionHasFieldRefs(predicate)) {
+    return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
+  }
+  auto self = internal::checked_pointer_cast<CsvFileFormat>(shared_from_this());
+  ARROW_ASSIGN_OR_RAISE(auto input, file->source().OpenCompressed());
+  ARROW_ASSIGN_OR_RAISE(auto read_options, GetReadOptions(*self, options));
+  return csv::CountRowsAsync(options->io_context, std::move(input),
+                             internal::GetCpuThreadPool(), read_options,
+                             self->parse_options)
+      .Then([](int64_t count) { return util::make_optional<int64_t>(count); });
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index 0b24e083cca..f6636285c92 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -61,6 +61,10 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
       const std::shared_ptr<ScanOptions>& scan_options,
       const std::shared_ptr<FileFragment>& file) const override;
 
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
       std::shared_ptr<FileWriteOptions> options) const override {
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 489fea4ca56..a0d0a75a20f 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -50,7 +50,11 @@ class CsvFormatHelper {
   }
 
   static std::shared_ptr<CsvFileFormat> MakeFormat() {
-    return std::make_shared<CsvFileFormat>();
+    auto format = std::make_shared<CsvFileFormat>();
+    // Required for CountRows (since the test generates data with nulls that get written
+    // as empty lines)
+    format->parse_options.ignore_empty_lines = false;
+    return format;
   }
 };
 
@@ -122,7 +126,7 @@ N/A
     row_count += batch->num_rows();
   }
 
-  ASSERT_EQ(row_count, 3);
+  ASSERT_EQ(row_count, 4);
 }
 
 TEST_P(TestCsvFileFormat, CustomConvertOptions) {
@@ -154,15 +158,15 @@ foo
 MYNULL
 N/A
 bar)");
-  SetSchema({field("str", utf8())});
-  auto defaults = std::make_shared<CsvFragmentScanOptions>();
-  defaults->read_options.skip_rows = 1;
-  format_->default_fragment_scan_options = defaults;
-  auto fragment = MakeFragment(*source);
-  ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
-  AssertSchemaEqual(opts_->dataset_schema, physical_schema);
-
   {
+    SetSchema({field("str", utf8())});
+    auto defaults = std::make_shared<CsvFragmentScanOptions>();
+    defaults->read_options.skip_rows = 1;
+    format_->default_fragment_scan_options = defaults;
+    auto fragment = MakeFragment(*source);
+    ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
+    AssertSchemaEqual(opts_->dataset_schema, physical_schema);
+
     int64_t rows = 0;
     for (auto maybe_batch : Batches(fragment.get())) {
       ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
@@ -171,17 +175,33 @@ bar)");
     ASSERT_EQ(rows, 4);
   }
   {
+    SetSchema({field("header_skipped", utf8())});
     // These options completely override the default ones
     auto fragment_scan_options = std::make_shared<CsvFragmentScanOptions>();
     fragment_scan_options->read_options.block_size = 1 << 22;
     opts_->fragment_scan_options = fragment_scan_options;
     int64_t rows = 0;
+    auto fragment = MakeFragment(*source);
     for (auto maybe_batch : Batches(fragment.get())) {
       ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
       rows += batch->GetColumnByName("header_skipped")->length();
     }
     ASSERT_EQ(rows, 5);
   }
+  {
+    SetSchema({field("custom_header", utf8())});
+    auto defaults = std::make_shared<CsvFragmentScanOptions>();
+    defaults->read_options.column_names = {"custom_header"};
+    format_->default_fragment_scan_options = defaults;
+    opts_->fragment_scan_options = nullptr;
+    int64_t rows = 0;
+    auto fragment = MakeFragment(*source);
+    for (auto maybe_batch : Batches(fragment.get())) {
+      ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+      rows += batch->GetColumnByName("custom_header")->length();
+    }
+    ASSERT_EQ(rows, 6);
+  }
 }
 
 TEST_P(TestCsvFileFormat, ScanRecordBatchReaderWithVirtualColumn) {
@@ -205,7 +225,7 @@ N/A
     row_count += batch->num_rows();
   }
 
-  ASSERT_EQ(row_count, 3);
+  ASSERT_EQ(row_count, 4);
 }
 
 TEST_P(TestCsvFileFormat, InspectFailureWithRelevantError) {
@@ -305,6 +325,8 @@ TEST_P(TestCsvFileFormat, WriteRecordBatchReader) {
   GTEST_SKIP() << "Write support not implemented for CSV";
 }
 
+TEST_P(TestCsvFileFormat, CountRows) { TestCountRows(); }
+
 INSTANTIATE_TEST_SUITE_P(TestUncompressedCsv, TestCsvFileFormat,
                          ::testing::Values(Compression::UNCOMPRESSED));
 #ifdef ARROW_WITH_BZ2
@@ -325,16 +347,7 @@ INSTANTIATE_TEST_SUITE_P(TestZSTDCsv, TestCsvFileFormat,
                          ::testing::Values(Compression::ZSTD));
 #endif
 
-class CsvWithNullsHelper : public CsvFormatHelper {
- public:
-  static std::shared_ptr<CsvFileFormat> MakeFormat() {
-    auto format = std::make_shared<CsvFileFormat>();
-    format->parse_options.ignore_empty_lines = false;
-    return format;
-  }
-};
-
-class TestCsvFileFormatScan : public FileFormatScanMixin<CsvWithNullsHelper> {};
+class TestCsvFileFormatScan : public FileFormatScanMixin<CsvFormatHelper> {};
 
 TEST_P(TestCsvFileFormatScan, ScanRecordBatchReader) { TestScan(); }
 TEST_P(TestCsvFileFormatScan, ScanRecordBatchReaderWithVirtualColumn) {
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index a8863ee0775..2032f03d28f 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -231,7 +231,7 @@ Result<RecordBatchGenerator> IpcFileFormat::ScanBatchesAsync(
 
 Future<util::optional<int64_t>> IpcFileFormat::CountRows(
     const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
-    std::shared_ptr<ScanOptions> options) {
+    const std::shared_ptr<ScanOptions>& options) {
   if (ExpressionHasFieldRefs(predicate)) {
     return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
   }
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index 3888de027c5..deff26c6f95 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -63,7 +63,7 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
 
   Future<util::optional<int64_t>> CountRows(
       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
-      std::shared_ptr<ScanOptions> options) override;
+      const std::shared_ptr<ScanOptions>& options) override;
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 356492cd164..86bea49c22e 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -392,7 +392,7 @@ Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
 
 Future<util::optional<int64_t>> ParquetFileFormat::CountRows(
     const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
-    std::shared_ptr<ScanOptions> options) {
+    const std::shared_ptr<ScanOptions>& options) {
   auto parquet_file = internal::checked_pointer_cast<ParquetFileFragment>(file);
   if (parquet_file->metadata()) {
     ARROW_ASSIGN_OR_RAISE(auto maybe_count,
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index f49637b13a1..f6505ed6dd2 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -101,7 +101,7 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   Future<util::optional<int64_t>> CountRows(
       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
-      std::shared_ptr<ScanOptions> options) override;
+      const std::shared_ptr<ScanOptions>& options) override;
 
   using FileFormat::MakeFragment;
 
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 04e4de406c6..87fc2c902c3 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -347,8 +347,8 @@ class CountRowsOnlyFragment : public InMemoryFragment {
  public:
   using InMemoryFragment::InMemoryFragment;
 
-  Future<util::optional<int64_t>> CountRows(compute::Expression predicate,
-                                            std::shared_ptr<ScanOptions>) override {
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>&) override {
     if (compute::FieldsInExpression(predicate).size() > 0) {
       return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
     }
@@ -371,8 +371,8 @@ class ScanOnlyFragment : public InMemoryFragment {
  public:
   using InMemoryFragment::InMemoryFragment;
 
-  Future<util::optional<int64_t>> CountRows(compute::Expression predicate,
-                                            std::shared_ptr<ScanOptions>) override {
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>&) override {
     return Future<util::optional<int64_t>>::MakeFinished(util::nullopt);
   }
   Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
@@ -410,8 +410,8 @@ class CountFailFragment : public InMemoryFragment {
       : InMemoryFragment(std::move(record_batches)),
         count(Future<util::optional<int64_t>>::Make()) {}
 
-  Future<util::optional<int64_t>> CountRows(compute::Expression,
-                                            std::shared_ptr<ScanOptions>) override {
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression, const std::shared_ptr<ScanOptions>&) override {
     return count;
   }
 
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 920a99cd6b6..be141c74659 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -352,7 +352,10 @@ test_that("CSV dataset", {
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
   expect_identical(names(ds), c(names(df1), "part"))
-  expect_identical(dim(ds), c(20L, 7L))
+  if (getRversion() >= "4.0.0") {
+    # CountRows segfaults on RTools35/R 3.6, so don't test it there
+    expect_identical(dim(ds), c(20L, 7L))
+  }
   expect_equivalent(
     ds %>%
       select(string = chr, integer = int, part) %>%

From e5e1b14a08ebb414fcce58f265d53ec9f498f4a0 Mon Sep 17 00:00:00 2001
From: sjgupta2 <sjgupta2@illinois.edu>
Date: Tue, 1 Jun 2021 18:39:48 +0200
Subject: [PATCH 321/719] ARROW-10675 [C++][Python] Support AWS S3 Web identity
 credentials

Added basic support for explicitly specifying use of web identity credentials (it's already part of the default credentials provider chain) in the C++ API. Will add Python API in a separate PR.
Also refactored some existing code to make serialization and deserialization in Python more structured by adding an enum class `S3CredentialsKind` which uniquely identifies the credential type being used.
Also adds some missing error-checking and tests in the Python API.

Closes #10088 from sahil1105/sahil/s3-web-id-creds

Authored-by: sjgupta2 <sjgupta2@illinois.edu>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc        | 21 ++++++++++++++++++
 cpp/src/arrow/filesystem/s3fs.h         | 24 ++++++++++++++++++++
 python/pyarrow/_s3fs.pyx                | 29 +++++++++++++++----------
 python/pyarrow/includes/libarrow_fs.pxd |  9 ++++++++
 python/pyarrow/tests/test_fs.py         | 10 +++++++++
 5 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fe9036823be..c24ebd79f9b 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -40,6 +40,7 @@
 #include <aws/core/Region.h>
 #include <aws/core/auth/AWSCredentials.h>
 #include <aws/core/auth/AWSCredentialsProviderChain.h>
+#include <aws/core/auth/STSCredentialsProvider.h>
 #include <aws/core/client/RetryStrategy.h>
 #include <aws/core/http/HttpResponse.h>
 #include <aws/core/utils/logging/ConsoleLogSystem.h>
@@ -205,10 +206,12 @@ bool S3ProxyOptions::Equals(const S3ProxyOptions& other) const {
 void S3Options::ConfigureDefaultCredentials() {
   credentials_provider =
       std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
+  credentials_kind = S3CredentialsKind::Default;
 }
 
 void S3Options::ConfigureAnonymousCredentials() {
   credentials_provider = std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
+  credentials_kind = S3CredentialsKind::Anonymous;
 }
 
 void S3Options::ConfigureAccessKey(const std::string& access_key,
@@ -216,6 +219,7 @@ void S3Options::ConfigureAccessKey(const std::string& access_key,
                                    const std::string& session_token) {
   credentials_provider = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(
       ToAwsString(access_key), ToAwsString(secret_key), ToAwsString(session_token));
+  credentials_kind = S3CredentialsKind::Explicit;
 }
 
 void S3Options::ConfigureAssumeRoleCredentials(
@@ -225,6 +229,16 @@ void S3Options::ConfigureAssumeRoleCredentials(
   credentials_provider = std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
       ToAwsString(role_arn), ToAwsString(session_name), ToAwsString(external_id),
       load_frequency, stsClient);
+  credentials_kind = S3CredentialsKind::Role;
+}
+
+void S3Options::ConfigureAssumeRoleWithWebIdentityCredentials() {
+  // The AWS SDK uses environment variables AWS_DEFAULT_REGION,
+  // AWS_ROLE_ARN, AWS_WEB_IDENTITY_TOKEN_FILE and AWS_ROLE_SESSION_NAME
+  // to configure the required credentials
+  credentials_provider =
+      std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>();
+  credentials_kind = S3CredentialsKind::WebIdentity;
 }
 
 std::string S3Options::GetAccessKey() const {
@@ -276,6 +290,12 @@ S3Options S3Options::FromAssumeRole(
   return options;
 }
 
+S3Options S3Options::FromAssumeRoleWithWebIdentity() {
+  S3Options options;
+  options.ConfigureAssumeRoleWithWebIdentityCredentials();
+  return options;
+}
+
 Result<S3Options> S3Options::FromUri(const Uri& uri, std::string* out_path) {
   S3Options options;
 
@@ -344,6 +364,7 @@ Result<S3Options> S3Options::FromUri(const std::string& uri_string,
 bool S3Options::Equals(const S3Options& other) const {
   return (region == other.region && endpoint_override == other.endpoint_override &&
           scheme == other.scheme && background_writes == other.background_writes &&
+          credentials_kind == other.credentials_kind &&
           proxy_options.Equals(other.proxy_options) &&
           GetAccessKey() == other.GetAccessKey() &&
           GetSecretKey() == other.GetSecretKey() &&
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index 4fb7b007dfd..d04eaa8ba16 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -56,6 +56,19 @@ struct ARROW_EXPORT S3ProxyOptions {
   bool Equals(const S3ProxyOptions& other) const;
 };
 
+enum class S3CredentialsKind : int8_t {
+  /// Anonymous access (no credentials used)
+  Anonymous,
+  /// Use default AWS credentials, configured through environment variables
+  Default,
+  /// Use explicitly-provided access key pair
+  Explicit,
+  /// Assume role through a role ARN
+  Role,
+  /// Use web identity token to assume role, configured through environment variables
+  WebIdentity
+};
+
 /// Options for the S3FileSystem implementation.
 struct ARROW_EXPORT S3Options {
   /// AWS region to connect to.
@@ -88,6 +101,9 @@ struct ARROW_EXPORT S3Options {
   /// AWS credentials provider
   std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider;
 
+  /// Type of credentials being used. Set along with credentials_provider.
+  S3CredentialsKind credentials_kind = S3CredentialsKind::Default;
+
   /// Whether OutputStream writes will be issued in the background, without blocking.
   bool background_writes = true;
 
@@ -107,6 +123,9 @@ struct ARROW_EXPORT S3Options {
       const std::string& external_id = "", int load_frequency = 900,
       const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
 
+  /// Configure with credentials from role assumed using a web identitiy token
+  void ConfigureAssumeRoleWithWebIdentityCredentials();
+
   std::string GetAccessKey() const;
   std::string GetSecretKey() const;
   std::string GetSessionToken() const;
@@ -138,6 +157,11 @@ struct ARROW_EXPORT S3Options {
       const std::string& external_id = "", int load_frequency = 900,
       const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
 
+  /// \brief Initialize from an assumed role with web-identity.
+  /// Uses the AWS SDK which uses environment variables to
+  /// generate temporary credentials.
+  static S3Options FromAssumeRoleWithWebIdentity();
+
   static Result<S3Options> FromUri(const ::arrow::internal::Uri& uri,
                                    std::string* out_path = NULLPTR);
   static Result<S3Options> FromUri(const std::string& uri,
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 1a907d02ca9..20c3e6478fa 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -113,7 +113,7 @@ cdef class S3FileSystem(FileSystem):
         CS3FileSystem* s3fs
 
     def __init__(self, *, access_key=None, secret_key=None, session_token=None,
-                 anonymous=False, region=None, scheme=None,
+                 bint anonymous=False, region=None, scheme=None,
                  endpoint_override=None, bint background_writes=True,
                  role_arn=None, session_name=None, external_id=None,
                  load_frequency=900, proxy_options=None):
@@ -161,8 +161,13 @@ cdef class S3FileSystem(FileSystem):
                 tobytes(session_token)
             )
         elif anonymous:
+            if role_arn:
+                raise ValueError(
+                    'Cannot provide role_arn with anonymous=True')
+
             options = CS3Options.Anonymous()
-        elif role_arn is not None:
+        elif role_arn:
+
             options = CS3Options.FromAssumeRole(
                 tobytes(role_arn),
                 tobytes(session_name),
@@ -216,28 +221,28 @@ cdef class S3FileSystem(FileSystem):
     def __reduce__(self):
         cdef CS3Options opts = self.s3fs.options()
 
-        role_arn = frombytes(opts.role_arn)
-
-        # if role_arn is set, we should not re-use temporary credentials
-        # but instead recreate a new assume role session
-        if role_arn:
-            access_key = None
-            secret_key = None
-            session_token = None
-        else:
+        # if creds were explicitly provided, then use them
+        # else obtain them as they were last time.
+        if opts.credentials_kind == CS3CredentialsKind_Explicit:
             access_key = frombytes(opts.GetAccessKey())
             secret_key = frombytes(opts.GetSecretKey())
             session_token = frombytes(opts.GetSessionToken())
+        else:
+            access_key = None
+            secret_key = None
+            session_token = None
 
         return (
             S3FileSystem._reconstruct, (dict(
                 access_key=access_key,
                 secret_key=secret_key,
                 session_token=session_token,
+                anonymous=(opts.credentials_kind ==
+                           CS3CredentialsKind_Anonymous),
                 region=frombytes(opts.region),
                 scheme=frombytes(opts.scheme),
                 endpoint_override=frombytes(opts.endpoint_override),
-                role_arn=role_arn,
+                role_arn=frombytes(opts.role_arn),
                 session_name=frombytes(opts.session_name),
                 external_id=frombytes(opts.external_id),
                 load_frequency=opts.load_frequency,
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index baa5ecad8db..33f61e7766e 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -137,6 +137,14 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         CResult[CS3ProxyOptions] FromUriString "FromUri"(
             const c_string& uri_string)
 
+    ctypedef enum CS3CredentialsKind "arrow::fs::S3CredentialsKind":
+        CS3CredentialsKind_Anonymous "arrow::fs::S3CredentialsKind::Anonymous"
+        CS3CredentialsKind_Default "arrow::fs::S3CredentialsKind::Default"
+        CS3CredentialsKind_Explicit "arrow::fs::S3CredentialsKind::Explicit"
+        CS3CredentialsKind_Role "arrow::fs::S3CredentialsKind::Role"
+        CS3CredentialsKind_WebIdentity \
+            "arrow::fs::S3CredentialsKind::WebIdentity"
+
     cdef cppclass CS3Options "arrow::fs::S3Options":
         c_string region
         c_string endpoint_override
@@ -147,6 +155,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_string external_id
         int load_frequency
         CS3ProxyOptions proxy_options
+        CS3CredentialsKind credentials_kind
         void ConfigureDefaultCredentials()
         void ConfigureAccessKey(const c_string& access_key,
                                 const c_string& secret_key,
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index ae687a3dbcc..f7baeb6c396 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1054,6 +1054,10 @@ def test_s3_options():
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
+    fs = S3FileSystem(anonymous=True)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
     with pytest.raises(ValueError):
         S3FileSystem(access_key='access')
     with pytest.raises(ValueError):
@@ -1066,6 +1070,12 @@ def test_s3_options():
         S3FileSystem(
             access_key='access', secret_key='secret', role_arn='arn'
         )
+    with pytest.raises(ValueError):
+        S3FileSystem(
+            access_key='access', secret_key='secret', anonymous=True
+        )
+    with pytest.raises(ValueError):
+        S3FileSystem(role_arn="arn", anonymous=True)
 
 
 @pytest.mark.s3

From b3e9da83f23a4aa66ffca2d95fd618aa0c9ba5a4 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 1 Jun 2021 19:33:38 +0200
Subject: [PATCH 322/719] ARROW-12835: [C++][Python][R] Implement
 case-insensitive match using RE2

This uses RE2 to implement a case-insensitive substring search.

Originally, I implemented this using utf8proc, but then found it was about an order of magnitude slower than RE2. (This isn't an apples-to-apples comparison; utf8proc does it more 'properly' and handles more Unicode corners.) So I switched to just doing it with RE2 instead, especially since the utf8proc approach was complicated. (You can still see it in the original commit here if you're curious.)

Closes #10369 from lidavidm/arrow-12835

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.h            |   5 +-
 .../arrow/compute/kernels/scalar_string.cc    | 130 ++++++++++++------
 .../compute/kernels/scalar_string_test.cc     |  38 ++++-
 python/pyarrow/_compute.pyx                   |   8 +-
 python/pyarrow/compute.py                     |  18 ++-
 python/pyarrow/includes/libarrow.pxd          |   3 +-
 python/pyarrow/tests/test_compute.py          |  24 ++++
 r/R/dplyr-functions.R                         |   4 +-
 r/src/compute.cpp                             |   7 +-
 9 files changed, 177 insertions(+), 60 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 3a007e06567..dce420b32b2 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -43,10 +43,13 @@ struct ArithmeticOptions : public FunctionOptions {
 };
 
 struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
-  explicit MatchSubstringOptions(std::string pattern) : pattern(std::move(pattern)) {}
+  explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false)
+      : pattern(std::move(pattern)), ignore_case(ignore_case) {}
 
   /// The exact substring (or regex, depending on kernel) to look for inside input values.
   std::string pattern;
+  /// Whether to perform a case-insensitive match.
+  bool ignore_case = false;
 };
 
 struct ARROW_EXPORT SplitOptions : public FunctionOptions {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 4d83e1ec24e..d939d1c7722 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -433,33 +433,6 @@ void StringBoolTransform(KernelContext* ctx, const ExecBatch& batch,
 
 using MatchSubstringState = OptionsWrapper<MatchSubstringOptions>;
 
-template <typename Type, typename Matcher>
-struct MatchSubstring {
-  using offset_type = typename Type::offset_type;
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    // TODO Cache matcher across invocations (for regex compilation)
-    ARROW_ASSIGN_OR_RAISE(auto matcher, Matcher::Make(MatchSubstringState::Get(ctx)));
-    StringBoolTransform<Type>(
-        ctx, batch,
-        [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length,
-                   int64_t output_offset, uint8_t* output) {
-          const offset_type* offsets = reinterpret_cast<const offset_type*>(raw_offsets);
-          FirstTimeBitmapWriter bitmap_writer(output, output_offset, length);
-          for (int64_t i = 0; i < length; ++i) {
-            const char* current_data = reinterpret_cast<const char*>(data + offsets[i]);
-            int64_t current_length = offsets[i + 1] - offsets[i];
-            if (matcher->Match(util::string_view(current_data, current_length))) {
-              bitmap_writer.Set();
-            }
-            bitmap_writer.Next();
-          }
-          bitmap_writer.Finish();
-        },
-        out);
-    return Status::OK();
-  }
-};
-
 // This is an implementation of the Knuth-Morris-Pratt algorithm
 struct PlainSubstringMatcher {
   const MatchSubstringOptions& options_;
@@ -467,6 +440,8 @@ struct PlainSubstringMatcher {
 
   static Result<std::unique_ptr<PlainSubstringMatcher>> Make(
       const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
     return ::arrow::internal::make_unique<PlainSubstringMatcher>(options);
   }
 
@@ -509,38 +484,109 @@ struct PlainSubstringMatcher {
   bool Match(util::string_view current) const { return Find(current) >= 0; }
 };
 
-const FunctionDoc match_substring_doc(
-    "Match strings against literal pattern",
-    ("For each string in `strings`, emit true iff it contains a given pattern.\n"
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
-    {"strings"}, "MatchSubstringOptions");
-
 #ifdef ARROW_WITH_RE2
 struct RegexSubstringMatcher {
   const MatchSubstringOptions& options_;
   const RE2 regex_match_;
 
   static Result<std::unique_ptr<RegexSubstringMatcher>> Make(
-      const MatchSubstringOptions& options) {
-    auto matcher = ::arrow::internal::make_unique<RegexSubstringMatcher>(options);
+      const MatchSubstringOptions& options, bool literal = false) {
+    auto matcher =
+        ::arrow::internal::make_unique<RegexSubstringMatcher>(options, literal);
     RETURN_NOT_OK(RegexStatus(matcher->regex_match_));
     return std::move(matcher);
   }
 
-  explicit RegexSubstringMatcher(const MatchSubstringOptions& options)
-      : options_(options), regex_match_(options_.pattern, RE2::Quiet) {}
+  explicit RegexSubstringMatcher(const MatchSubstringOptions& options,
+                                 bool literal = false)
+      : options_(options),
+        regex_match_(options_.pattern, MakeRE2Options(options, literal)) {}
 
   bool Match(util::string_view current) const {
     auto piece = re2::StringPiece(current.data(), current.length());
     return re2::RE2::PartialMatch(piece, regex_match_);
   }
+
+  static RE2::RE2::Options MakeRE2Options(const MatchSubstringOptions& options,
+                                          bool literal) {
+    RE2::RE2::Options re2_options(RE2::Quiet);
+    re2_options.set_case_sensitive(!options.ignore_case);
+    re2_options.set_literal(literal);
+    return re2_options;
+  }
+};
+#endif
+
+template <typename Type, typename Matcher>
+struct MatchSubstringImpl {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out,
+                     const Matcher* matcher) {
+    StringBoolTransform<Type>(
+        ctx, batch,
+        [&matcher](const void* raw_offsets, const uint8_t* data, int64_t length,
+                   int64_t output_offset, uint8_t* output) {
+          const offset_type* offsets = reinterpret_cast<const offset_type*>(raw_offsets);
+          FirstTimeBitmapWriter bitmap_writer(output, output_offset, length);
+          for (int64_t i = 0; i < length; ++i) {
+            const char* current_data = reinterpret_cast<const char*>(data + offsets[i]);
+            int64_t current_length = offsets[i + 1] - offsets[i];
+            if (matcher->Match(util::string_view(current_data, current_length))) {
+              bitmap_writer.Set();
+            }
+            bitmap_writer.Next();
+          }
+          bitmap_writer.Finish();
+        },
+        out);
+    return Status::OK();
+  }
 };
 
+template <typename Type, typename Matcher>
+struct MatchSubstring {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // TODO Cache matcher across invocations (for regex compilation)
+    ARROW_ASSIGN_OR_RAISE(auto matcher, Matcher::Make(MatchSubstringState::Get(ctx)));
+    return MatchSubstringImpl<Type, Matcher>::Exec(ctx, batch, out, matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainSubstringMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      ARROW_ASSIGN_OR_RAISE(auto matcher,
+                            RegexSubstringMatcher::Make(options, /*literal=*/true));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainSubstringMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainSubstringMatcher>::Exec(ctx, batch, out,
+                                                                 matcher.get());
+  }
+};
+
+const FunctionDoc match_substring_doc(
+    "Match strings against literal pattern",
+    ("For each string in `strings`, emit true iff it contains a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+#ifdef ARROW_WITH_RE2
 const FunctionDoc match_substring_regex_doc(
     "Match strings against regex pattern",
     ("For each string in `strings`, emit true iff it matches a given pattern at any "
      "position.\n"
-     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions."),
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
     {"strings"}, "MatchSubstringOptions");
 
 // SQL LIKE match
@@ -605,14 +651,16 @@ struct MatchLike {
 
     Status status;
     std::string pattern;
-    if (re2::RE2::FullMatch(original_options.pattern, kLikePatternIsSubstringMatch,
+    if (!original_options.ignore_case &&
+        re2::RE2::FullMatch(original_options.pattern, kLikePatternIsSubstringMatch,
                             &pattern)) {
-      MatchSubstringOptions converted_options{pattern};
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
       MatchSubstringState converted_state(converted_options);
       ctx->SetState(&converted_state);
       status = MatchSubstring<StringType, PlainSubstringMatcher>::Exec(ctx, batch, out);
     } else {
-      MatchSubstringOptions converted_options{MakeLikeRegex(original_options)};
+      MatchSubstringOptions converted_options{MakeLikeRegex(original_options),
+                                              original_options.ignore_case};
       MatchSubstringState converted_state(converted_options);
       ctx->SetState(&converted_state);
       status = MatchSubstring<StringType, RegexSubstringMatcher>::Exec(ctx, batch, out);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 7f2126828ce..5c230c41cd9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -377,8 +377,8 @@ TYPED_TEST(TestStringKernels, IsUpperAscii) {
 TYPED_TEST(TestStringKernels, MatchSubstring) {
   MatchSubstringOptions options{"ab"};
   this->CheckUnary("match_substring", "[]", boolean(), "[]", &options);
-  this->CheckUnary("match_substring", R"(["abc", "acb", "cab", null, "bac"])", boolean(),
-                   "[true, false, true, null, false]", &options);
+  this->CheckUnary("match_substring", R"(["abc", "acb", "cab", null, "bac", "AB"])",
+                   boolean(), "[true, false, true, null, false, false]", &options);
 
   MatchSubstringOptions options_repeated{"abab"};
   this->CheckUnary("match_substring", R"(["abab", "ab", "cababc", null, "bac"])",
@@ -393,12 +393,29 @@ TYPED_TEST(TestStringKernels, MatchSubstring) {
                    &options_double_char_2);
 }
 
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestStringKernels, MatchSubstringIgnoreCase) {
+  MatchSubstringOptions options_insensitive{"aé(", /*ignore_case=*/true};
+  this->CheckUnary("match_substring", R"(["abc", "aEb", "baÉ(", "aé(", "ae(", "Aé("])",
+                   boolean(), "[false, false, true, true, false, true]",
+                   &options_insensitive);
+}
+#else
+TYPED_TEST(TestStringKernels, MatchSubstringIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("match_substring", {input}, &options));
+}
+#endif
+
 #ifdef ARROW_WITH_RE2
 TYPED_TEST(TestStringKernels, MatchSubstringRegex) {
   MatchSubstringOptions options{"ab"};
   this->CheckUnary("match_substring_regex", "[]", boolean(), "[]", &options);
-  this->CheckUnary("match_substring_regex", R"(["abc", "acb", "cab", null, "bac"])",
-                   boolean(), "[true, false, true, null, false]", &options);
+  this->CheckUnary("match_substring_regex", R"(["abc", "acb", "cab", null, "bac", "AB"])",
+                   boolean(), "[true, false, true, null, false, false]", &options);
   MatchSubstringOptions options_repeated{"(ab){2}"};
   this->CheckUnary("match_substring_regex", R"(["abab", "ab", "cababc", null, "bac"])",
                    boolean(), "[true, false, true, null, false]", &options_repeated);
@@ -411,6 +428,10 @@ TYPED_TEST(TestStringKernels, MatchSubstringRegex) {
   MatchSubstringOptions options_plus{"a+b"};
   this->CheckUnary("match_substring_regex", R"(["aacb", "aab", "dab", "caaab", "b", ""])",
                    boolean(), "[false, true, true, true, false, false]", &options_plus);
+  MatchSubstringOptions options_insensitive{"ab|é", /*ignore_case=*/true};
+  this->CheckUnary("match_substring_regex", R"(["abc", "acb", "É", null, "bac", "AB"])",
+                   boolean(), "[true, false, true, null, false, true]",
+                   &options_insensitive);
 
   // Unicode character semantics
   // "\pL" means: unicode category "letter"
@@ -458,6 +479,15 @@ TYPED_TEST(TestStringKernels, MatchLike) {
   this->CheckUnary("match_like", inputs, boolean(),
                    "[false, false, true, false, false, false, false, null]",
                    &regex_match);
+
+  // ignore_case means this still gets mapped to a regex search
+  MatchSubstringOptions insensitive_substring{"%é%", /*ignore_case=*/true};
+  this->CheckUnary("match_like", R"(["é", "fooÉbar", "e"])", boolean(),
+                   "[true, true, false]", &insensitive_substring);
+
+  MatchSubstringOptions insensitive_regex{"_é%", /*ignore_case=*/true};
+  this->CheckUnary("match_like", R"(["éfoo", "aÉfoo", "e"])", boolean(),
+                   "[false, true, false]", &insensitive_regex);
 }
 
 TYPED_TEST(TestStringKernels, MatchLikeEscaping) {
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index debea53b17b..1b62226b2b3 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -657,14 +657,14 @@ cdef class _MatchSubstringOptions(FunctionOptions):
     cdef const CFunctionOptions* get_options(self) except NULL:
         return self.match_substring_options.get()
 
-    def _set_options(self, pattern):
+    def _set_options(self, pattern, bint ignore_case):
         self.match_substring_options.reset(
-            new CMatchSubstringOptions(tobytes(pattern)))
+            new CMatchSubstringOptions(tobytes(pattern), ignore_case))
 
 
 class MatchSubstringOptions(_MatchSubstringOptions):
-    def __init__(self, pattern):
-        self._set_options(pattern)
+    def __init__(self, pattern, bint ignore_case=False):
+        self._set_options(pattern, ignore_case)
 
 
 cdef class _TrimOptions(FunctionOptions):
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 9430dd4faf2..c447aa95c5c 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -308,7 +308,7 @@ def find_substring(array, pattern):
                          MatchSubstringOptions(pattern))
 
 
-def match_like(array, pattern):
+def match_like(array, pattern, *, ignore_case=False):
     """
     Test if the SQL-style LIKE pattern *pattern* matches a value of a
     string array.
@@ -321,6 +321,8 @@ def match_like(array, pattern):
         characters, '_' will match exactly one character, and all
         other characters match themselves. To match a literal percent
         sign or underscore, precede the character with a backslash.
+    ignore_case : bool, default False
+        Ignore case while searching.
 
     Returns
     -------
@@ -328,10 +330,10 @@ def match_like(array, pattern):
 
     """
     return call_function("match_like", [array],
-                         MatchSubstringOptions(pattern))
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
-def match_substring(array, pattern):
+def match_substring(array, pattern, *, ignore_case=False):
     """
     Test if substring *pattern* is contained within a value of a string array.
 
@@ -340,16 +342,18 @@ def match_substring(array, pattern):
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         pattern to search for exact matches
+    ignore_case : bool, default False
+        Ignore case while searching.
 
     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("match_substring", [array],
-                         MatchSubstringOptions(pattern))
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
-def match_substring_regex(array, pattern):
+def match_substring_regex(array, pattern, *, ignore_case=False):
     """
     Test if regex *pattern* matches at any position a value of a string array.
 
@@ -358,13 +362,15 @@ def match_substring_regex(array, pattern):
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         regex pattern to search
+    ignore_case : bool, default False
+        Ignore case while searching.
 
     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("match_substring_regex", [array],
-                         MatchSubstringOptions(pattern))
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
 def sum(array):
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 5afa806fa84..9184bd5bbfd 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1783,8 +1783,9 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     cdef cppclass CMatchSubstringOptions \
             "arrow::compute::MatchSubstringOptions"(CFunctionOptions):
-        CMatchSubstringOptions(c_string pattern)
+        CMatchSubstringOptions(c_string pattern, c_bool ignore_case)
         c_string pattern
+        c_bool ignore_case
 
     cdef cppclass CTrimOptions \
             "arrow::compute::TrimOptions"(CFunctionOptions):
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 26d52eff08b..127b271dda5 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -308,6 +308,14 @@ def test_match_like():
     expected = pa.array([False, True, False, True, None])
     assert expected.equals(result)
 
+    arr = pa.array(["aB", "bA%", "ba", "ca%d", None])
+    result = pc.match_like(arr, r"_a\%%", ignore_case=True)
+    expected = pa.array([False, True, False, True, None])
+    assert expected.equals(result)
+    result = pc.match_like(arr, r"_a\%%", ignore_case=False)
+    expected = pa.array([False, False, False, True, None])
+    assert expected.equals(result)
+
 
 def test_match_substring():
     arr = pa.array(["ab", "abc", "ba", None])
@@ -315,6 +323,14 @@ def test_match_substring():
     expected = pa.array([True, True, False, None])
     assert expected.equals(result)
 
+    arr = pa.array(["áB", "Ábc", "ba", None])
+    result = pc.match_substring(arr, "áb", ignore_case=True)
+    expected = pa.array([True, True, False, None])
+    assert expected.equals(result)
+    result = pc.match_substring(arr, "áb", ignore_case=False)
+    expected = pa.array([False, False, False, None])
+    assert expected.equals(result)
+
 
 def test_match_substring_regex():
     arr = pa.array(["ab", "abc", "ba", "c", None])
@@ -322,6 +338,14 @@ def test_match_substring_regex():
     expected = pa.array([True, True, True, False, None])
     assert expected.equals(result)
 
+    arr = pa.array(["aB", "Abc", "BA", "c", None])
+    result = pc.match_substring_regex(arr, "^a?b", ignore_case=True)
+    expected = pa.array([True, True, True, False, None])
+    assert expected.equals(result)
+    result = pc.match_substring_regex(arr, "^a?b", ignore_case=False)
+    expected = pa.array([False, False, False, False, None])
+    assert expected.equals(result)
+
 
 def test_trim():
     # \u3000 is unicode whitespace
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index fadd216a30c..e62f3e93007 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -226,11 +226,11 @@ nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
 }
 
 nse_funcs$grepl <- function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
-  arrow_fun <- ifelse(fixed && !ignore.case, "match_substring", "match_substring_regex")
+  arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex")
   Expression$create(
     arrow_fun,
     x,
-    options = list(pattern = format_string_pattern(pattern, ignore.case, fixed))
+    options = list(pattern = pattern, ignore_case = ignore.case)
   )
 }
 
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 90c7b4129c7..26f0752d847 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -220,7 +220,12 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
 
   if (func_name == "match_substring" || func_name == "match_substring_regex") {
     using Options = arrow::compute::MatchSubstringOptions;
-    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]));
+    bool ignore_case = false;
+    if (!Rf_isNull(options["ignore_case"])) {
+      ignore_case = cpp11::as_cpp<bool>(options["ignore_case"]);
+    }
+    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
+                                     ignore_case);
   }
 
   if (func_name == "replace_substring" || func_name == "replace_substring_regex") {

From 7c8d628d15615c24ab992f006b5cfa9e0b83f79d Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 1 Jun 2021 13:14:38 -0500
Subject: [PATCH 323/719] ARROW-12918: [C++] Fill out
 iterator_traits<ArrayIterator>

Closes #10428 from lidavidm/arrow-12918

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/stl_iterator.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cpp/src/arrow/stl_iterator.h b/cpp/src/arrow/stl_iterator.h
index c8c2bb15e07..6225a89aae4 100644
--- a/cpp/src/arrow/stl_iterator.h
+++ b/cpp/src/arrow/stl_iterator.h
@@ -47,6 +47,8 @@ class ArrayIterator {
  public:
   using value_type = arrow::util::optional<typename ValueAccessor::ValueType>;
   using difference_type = int64_t;
+  using pointer = value_type*;
+  using reference = value_type&;
   using iterator_category = std::random_access_iterator_tag;
 
   // Some algorithms need to default-construct an iterator
@@ -136,6 +138,8 @@ struct iterator_traits<::arrow::stl::ArrayIterator<ArrayType>> {
   using IteratorType = ::arrow::stl::ArrayIterator<ArrayType>;
   using difference_type = typename IteratorType::difference_type;
   using value_type = typename IteratorType::value_type;
+  using pointer = typename IteratorType::pointer;
+  using reference = typename IteratorType::reference;
   using iterator_category = typename IteratorType::iterator_category;
 };
 

From 868b834e4b92ddb20b03f10b204fbd84d937d5d8 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 1 Jun 2021 13:33:15 -0500
Subject: [PATCH 324/719] ARROW-12909: [R][Release] Build of ubuntu-docs is
 failing

This PR ensures that the same version of r-base and r-recommended is installed to prevent errors when they end up with different versions.

Closes #10426 from thisisnic/ARROW-12909-build

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/docker/linux-apt-docs.dockerfile | 1 +
 ci/docker/linux-apt-r.dockerfile    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 20cb889f28d..e3b258987f6 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -51,6 +51,7 @@ RUN apt-get update -y && \
         nvidia-cuda-toolkit \
         openjdk-${jdk}-jdk-headless \
         pandoc \
+        r-recommended=${r}* \
         r-base=${r}* \
         rsync \
         ruby-dev \
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
index f47044e334b..8924208b878 100644
--- a/ci/docker/linux-apt-r.dockerfile
+++ b/ci/docker/linux-apt-r.dockerfile
@@ -40,6 +40,7 @@ RUN apt-get update -y && \
     add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5)'/' && \
     apt-get install -y \
         r-base=${r}* \
+        r-recommended=${r}* \
         # system libs needed by core R packages
         libxml2-dev \
         libgit2-dev \

From 14016e9a430fa4d3f599f7a98b19c501c76154b7 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 1 Jun 2021 15:15:00 -0700
Subject: [PATCH 325/719] ARROW-12722: [R] Raise error when attemping to print
 table with duplicated naming

Closes #10381 from thisisnic/ARROW-12722-getfieldbyname

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/dplyr.R                   | 12 ++++++++++++
 r/tests/testthat/test-dplyr.R | 12 +++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 56be8cff1db..b77b0cf6575 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -31,6 +31,18 @@ arrow_dplyr_query <- function(.data) {
     return(.data)
   }
 
+  # Evaluating expressions on a dataset with duplicated fieldnames will error
+  dupes <- duplicated(names(.data))
+  if (any(dupes)) {
+    abort(c(
+      "Duplicated field names",
+      x = paste0(
+        "The following field names were found more than once in the data: ",
+        oxford_paste(names(.data)[dupes])
+      )
+    ))
+  }
+  
   structure(
     list(
       .data = if (inherits(.data, "Dataset")) {
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 378640e8308..4fcb0e710a4 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -25,7 +25,7 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10) + 1, side = "both")
 
 test_that("basic select/filter/collect", {
   batch <- record_batch(tbl)
@@ -67,6 +67,7 @@ chr: string
 See $.data for the source Arrow object',
   fixed = TRUE
   )
+  
 })
 
 test_that("summarize", {
@@ -889,3 +890,12 @@ test_that("bad explicit type conversions with as.*()", {
   )
 
 })
+
+test_that("No duplicate field names are allowed in an arrow_dplyr_query", {
+  expect_error(
+    Table$create(tbl, tbl) %>%
+      filter(int > 0),
+    regexp = 'The following field names were found more than once in the data: "int", "dbl", "dbl2", "lgl", "false", "chr", "fct", "verses", and "padded_strings"'
+  )
+})
+

From f3344dc64685b0499f3fc866ff13835c904ed711 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 2 Jun 2021 11:43:55 +0200
Subject: [PATCH 326/719] ARROW-12859: [C++] Add ScalarFromJSON for testing

```
ScalarFromJSON(int64(), "null") == Datum(Int64Scalar())
ScalarFromJSON(int64(), "5")    == Datum(Int64Scalar(5))
```

Closes #10386 from lidavidm/arrow-12859

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/chunked_array.cc        | 27 ++++++++++++++
 cpp/src/arrow/chunked_array.h         |  4 ++
 cpp/src/arrow/ipc/json_simple.cc      | 21 +++++++++++
 cpp/src/arrow/ipc/json_simple.h       |  4 ++
 cpp/src/arrow/ipc/json_simple_test.cc | 54 ++++++++++++++++++++++++++-
 cpp/src/arrow/testing/gtest_util.cc   | 49 ++++++++++++++++++++++++
 cpp/src/arrow/testing/gtest_util.h    | 10 +++++
 7 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index b259b05d7cf..142bd0d8c89 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -118,6 +118,33 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
   return Equals(*other.get());
 }
 
+bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
+                                const EqualOptions& equal_options) const {
+  if (length_ != other.length()) {
+    return false;
+  }
+  if (null_count_ != other.null_count()) {
+    return false;
+  }
+  // We cannot toggle check_metadata here yet, so we don't check it
+  if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
+    return false;
+  }
+
+  // Check contents of the underlying arrays. This checks for equality of
+  // the underlying data independently of the chunk size.
+  return internal::ApplyBinaryChunked(
+             *this, other,
+             [&](const Array& left_piece, const Array& right_piece,
+                 int64_t ARROW_ARG_UNUSED(position)) {
+               if (!left_piece.ApproxEquals(right_piece, equal_options)) {
+                 return Status::Invalid("Unequal piece");
+               }
+               return Status::OK();
+             })
+      .ok();
+}
+
 std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
   ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
   bool offset_equals_length = offset == length_;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5c0dda91850..2ace045c2bf 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compare.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -136,6 +137,9 @@ class ARROW_EXPORT ChunkedArray {
   bool Equals(const ChunkedArray& other) const;
   /// \brief Determine if two chunked arrays are equal.
   bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+  /// \brief Determine if two chunked arrays approximately equal
+  bool ApproxEquals(const ChunkedArray& other,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
 
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index caf6fd06b9c..eb7a4f3a790 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -30,6 +30,7 @@
 #include "arrow/array/builder_time.h"
 #include "arrow/array/builder_union.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/scalar.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
@@ -911,6 +912,26 @@ Status DictArrayFromJSON(const std::shared_ptr<DataType>& type,
       .Value(out);
 }
 
+Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
+                      util::string_view json_string, std::shared_ptr<Scalar>* out) {
+  std::shared_ptr<Converter> converter;
+  RETURN_NOT_OK(GetConverter(type, &converter));
+
+  rj::Document json_doc;
+  json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
+  if (json_doc.HasParseError()) {
+    return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
+                           GetParseError_En(json_doc.GetParseError()));
+  }
+
+  std::shared_ptr<Array> array;
+  RETURN_NOT_OK(converter->AppendValue(json_doc));
+  RETURN_NOT_OK(converter->Finish(&array));
+  DCHECK_EQ(array->length(), 1);
+  ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
+  return Status::OK();
+}
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h
index 8f6b57a4608..4dd3a664aa6 100644
--- a/cpp/src/arrow/ipc/json_simple.h
+++ b/cpp/src/arrow/ipc/json_simple.h
@@ -51,6 +51,10 @@ ARROW_EXPORT
 Status DictArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view indices_json,
                          util::string_view dictionary_json, std::shared_ptr<Array>* out);
 
+ARROW_EXPORT
+Status ScalarFromJSON(const std::shared_ptr<DataType>&, util::string_view json,
+                      std::shared_ptr<Scalar>* out);
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index c5358ac89f1..481f38aab21 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -136,6 +136,21 @@ void AssertJSONDictArray(const std::shared_ptr<DataType>& index_type,
   AssertArraysEqual(*expected_values, *dict_array.dictionary());
 }
 
+template <typename T, typename C_TYPE = typename T::c_type>
+void AssertJSONScalar(const std::shared_ptr<DataType>& type, const std::string& json,
+                      const bool is_valid, const C_TYPE value) {
+  SCOPED_TRACE(json);
+  std::shared_ptr<Scalar> actual, expected;
+
+  ASSERT_OK(ScalarFromJSON(type, json, &actual));
+  if (is_valid) {
+    ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value));
+  } else {
+    expected = MakeNullScalar(type);
+  }
+  AssertScalarsEqual(*expected, *actual, /*verbose=*/true);
+}
+
 TEST(TestHelper, JSONArray) {
   // Test the JSONArray helper func
   std::string s =
@@ -329,7 +344,6 @@ TEST(TestNull, Errors) {
 
 TEST(TestBoolean, Basics) {
   std::shared_ptr<DataType> type = boolean();
-  std::shared_ptr<Array> expected, actual;
 
   AssertJSONArray<BooleanType, bool>(type, "[]", {});
   AssertJSONArray<BooleanType, bool>(type, "[false, true, false]", {false, true, false});
@@ -1327,6 +1341,44 @@ TEST(TestDictArrayFromJSON, Errors) {
                                            &array));  // dict value isn't string
 }
 
+TEST(TestScalarFromJSON, Basics) {
+  // Sanity check for common types (not exhaustive)
+  std::shared_ptr<Scalar> scalar;
+  AssertJSONScalar<Int64Type>(int64(), "4", true, 4);
+  AssertJSONScalar<Int64Type>(int64(), "null", false, 0);
+  AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("")", true,
+                                                        Buffer::FromString(""));
+  AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("foo")", true,
+                                                        Buffer::FromString("foo"));
+  AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"(null)", false,
+                                                        Buffer::FromString(""));
+  AssertJSONScalar<NullType, std::nullptr_t>(null(), "null", false, nullptr);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "true", true, true);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "false", true, false);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "null", false, false);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "0", true, false);
+  AssertJSONScalar<BooleanType, bool>(boolean(), "1", true, true);
+  AssertJSONScalar<DoubleType, bool>(float64(), "1.0", true, 1.0);
+  AssertJSONScalar<DoubleType, bool>(float64(), "-0.0", true, -0.0);
+  ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar));
+  ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*scalar).value));
+  ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar));
+  ASSERT_TRUE(std::isinf(checked_cast<DoubleScalar&>(*scalar).value));
+}
+
+TEST(TestScalarFromJSON, Errors) {
+  std::shared_ptr<Scalar> scalar;
+  ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[0]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[9223372036854775808]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[-9223372036854775809]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[18446744073709551616]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[-1]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "0", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "[]", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "0.0", &scalar));
+  ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "\"true\"", &scalar));
+}
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index ba4fe1e1fe7..39bd665d5b6 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -222,6 +222,20 @@ void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& a
   }
 }
 
+void AssertChunkedApproxEquivalent(const ChunkedArray& expected,
+                                   const ChunkedArray& actual,
+                                   const EqualOptions& equal_options) {
+  if (!actual.ApproxEquals(expected, equal_options)) {
+    std::stringstream pp_expected;
+    std::stringstream pp_actual;
+    ::arrow::PrettyPrintOptions options(/*indent=*/2);
+    options.window = 50;
+    ARROW_EXPECT_OK(PrettyPrint(expected, options, &pp_expected));
+    ARROW_EXPECT_OK(PrettyPrint(actual, options, &pp_actual));
+    FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str();
+  }
+}
+
 void AssertBufferEqual(const Buffer& buffer, const std::vector<uint8_t>& expected) {
   ASSERT_EQ(static_cast<size_t>(buffer.size()), expected.size())
       << "Mismatching buffer size";
@@ -361,6 +375,34 @@ void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose)
   }
 }
 
+void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool verbose,
+                             const EqualOptions& options) {
+  ASSERT_EQ(expected.kind(), actual.kind())
+      << "expected:" << expected.ToString() << " got:" << actual.ToString();
+
+  switch (expected.kind()) {
+    case Datum::SCALAR:
+      AssertScalarsApproxEqual(*expected.scalar(), *actual.scalar(), verbose, options);
+      break;
+    case Datum::ARRAY: {
+      auto expected_array = expected.make_array();
+      auto actual_array = actual.make_array();
+      AssertArraysApproxEqual(*expected_array, *actual_array, verbose, options);
+      break;
+    }
+    case Datum::CHUNKED_ARRAY: {
+      auto expected_array = expected.chunked_array();
+      auto actual_array = actual.chunked_array();
+      AssertChunkedApproxEquivalent(*expected_array, *actual_array, options);
+      break;
+    }
+    default:
+      // TODO: Implement better print
+      ASSERT_TRUE(actual.Equals(expected));
+      break;
+  }
+}
+
 std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
                                      util::string_view json) {
   std::shared_ptr<Array> out;
@@ -396,6 +438,13 @@ std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&
   return *RecordBatch::FromStructArray(struct_array);
 }
 
+std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>& type,
+                                       util::string_view json) {
+  std::shared_ptr<Scalar> out;
+  ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out));
+  return out;
+}
+
 std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>& schema,
                                      const std::vector<std::string>& json) {
   std::vector<std::shared_ptr<RecordBatch>> batches;
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 757986e13ca..b8ea8e76298 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -199,6 +199,9 @@ ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
 // Like ChunkedEqual, but permits different chunk layout
 ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
                                                   const ChunkedArray& actual);
+ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
+    const ChunkedArray& expected, const ChunkedArray& actual,
+    const EqualOptions& equal_options = EqualOptions::Defaults());
 ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
                                             const std::vector<uint8_t>& expected);
 ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
@@ -246,6 +249,9 @@ ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table&
 
 ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
                                             bool verbose = false);
+ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
+    const Datum& expected, const Datum& actual, bool verbose = false,
+    const EqualOptions& options = EqualOptions::Defaults());
 
 template <typename C_TYPE>
 void AssertNumericDataEqual(const C_TYPE* raw_data,
@@ -301,6 +307,10 @@ ARROW_TESTING_EXPORT
 std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>&,
                                                    const std::vector<std::string>& json);
 
+ARROW_TESTING_EXPORT
+std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>&,
+                                       util::string_view json);
+
 ARROW_TESTING_EXPORT
 std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>&,
                                      const std::vector<std::string>& json);

From dc397b7af92b19532fc344796803f320a4bab9fd Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Wed, 2 Jun 2021 11:47:05 +0200
Subject: [PATCH 327/719] ARROW-12906: [C++][Python] Fix fill_null segfault

Closes #10434 from cyb70289/12906-fill-null-crash

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/kernels/scalar_fill_null.cc | 2 +-
 python/pyarrow/tests/test_compute.py              | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
index 85af0e5b1cd..cf22b0de3dc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null.cc
@@ -162,7 +162,6 @@ struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
     const ArrayData& input = *batch[0].array();
     const auto& fill_value_scalar =
         checked_cast<const BaseBinaryScalar&>(*batch[1].scalar());
-    util::string_view fill_value(*fill_value_scalar.value);
     ArrayData* output = out->mutable_array();
 
     // Ensure the kernel is configured properly to have no validity bitmap /
@@ -172,6 +171,7 @@ struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
     const int64_t null_count = input.GetNullCount();
 
     if (null_count > 0 && fill_value_scalar.is_valid) {
+      util::string_view fill_value(*fill_value_scalar.value);
       BuilderType builder(input.type, ctx->memory_pool());
       RETURN_NOT_OK(builder.ReserveData(input.buffers[2]->size() +
                                         fill_value.length() * null_count));
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 127b271dda5..a9a2c0f347d 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1078,6 +1078,11 @@ def test_fill_null():
     expected = pa.array([b'a', b'bb', b'ccc'], type=pa.large_binary())
     assert result.equals(expected)
 
+    arr = pa.array(['a', 'bb', None])
+    result = arr.fill_null(None)
+    expected = pa.array(['a', 'bb', None])
+    assert result.equals(expected)
+
 
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
 def test_fill_null_array(arrow_type):

From dec44c6b06d5716abb1d296219bd1210657032bf Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 2 Jun 2021 14:52:55 +0200
Subject: [PATCH 328/719] ARROW-12935: [C++][CI] Fix compiler error on some
 clang versions

This fixes a regression after merging ARROW-12859.

Closes #10435 from pitrou/ARROW-12935-clang-compile-error

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/ipc/json_simple_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index 481f38aab21..512905dde55 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -1358,8 +1358,8 @@ TEST(TestScalarFromJSON, Basics) {
   AssertJSONScalar<BooleanType, bool>(boolean(), "null", false, false);
   AssertJSONScalar<BooleanType, bool>(boolean(), "0", true, false);
   AssertJSONScalar<BooleanType, bool>(boolean(), "1", true, true);
-  AssertJSONScalar<DoubleType, bool>(float64(), "1.0", true, 1.0);
-  AssertJSONScalar<DoubleType, bool>(float64(), "-0.0", true, -0.0);
+  AssertJSONScalar<DoubleType>(float64(), "1.0", true, 1.0);
+  AssertJSONScalar<DoubleType>(float64(), "-0.0", true, -0.0);
   ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar));
   ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*scalar).value));
   ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar));

From 8a200abab93de4136928e5643af13ae878249877 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 2 Jun 2021 10:52:42 -0500
Subject: [PATCH 329/719] ARROW-12915: [Release] Build of ubuntu-docs is
 failing on thrift

Closes #10430 from jonkeane/ARROW-12915

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/docker/ubuntu-20.04-cpp.dockerfile | 2 ++
 dev/tasks/tasks.yml                   | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 022fc3ee22c..c75c013799a 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -129,8 +129,10 @@ ENV ARROW_BUILD_TESTS=ON \
     ARROW_WITH_ZSTD=ON \
     AWSSDK_SOURCE=BUNDLED \
     GTest_SOURCE=BUNDLED \
+    gRPC_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
     PARQUET_BUILD_EXAMPLES=ON \
     PARQUET_BUILD_EXECUTABLES=ON \
     PATH=/usr/lib/ccache/:$PATH \
+    Protobuf_SOURCE=BUNDLED \
     PYTHON=python3
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index ca21496b913..23ff18d6dec 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -949,6 +949,12 @@ tasks:
         UBUNTU: "20.10"
       run: ubuntu-docs
 
+  test-ubuntu-default-docs:
+    ci: azure
+    template: docker-tests/azure.linux.yml
+    params:
+      run: ubuntu-docs
+
   ############################## vcpkg tests ##################################
 
   test-build-vcpkg-win:

From c5af5ddddff44db4a32bb24bf2f5863bc632d3c9 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 2 Jun 2021 11:09:50 -0500
Subject: [PATCH 330/719] ARROW-12719: [C++] Allow passing S3 canned ACL as
 output stream metadata

This allows users to specify access rights when creating a file with the S3 filesystem.

Canned ACL values are described in https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl

Closes #10439 from pitrou/ARROW-12719-s3-acl

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/filesystem/s3fs.cc      | 27 ++++++++++++++++++++++++++-
 cpp/src/arrow/filesystem/s3fs_test.cc | 10 ++++++++--
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index c24ebd79f9b..e60cb119e29 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -62,6 +62,7 @@
 #include <aws/s3/model/HeadObjectRequest.h>
 #include <aws/s3/model/ListBucketsResult.h>
 #include <aws/s3/model/ListObjectsV2Request.h>
+#include <aws/s3/model/ObjectCannedACL.h>
 #include <aws/s3/model/PutObjectRequest.h>
 #include <aws/s3/model/UploadPartRequest.h>
 
@@ -729,6 +730,8 @@ std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& re
   push("VersionId", result.GetVersionId());
   push_datetime("Last-Modified", result.GetLastModified());
   push_datetime("Expires", result.GetExpires());
+  // NOTE the "canned ACL" isn't available for reading (one can get an expanded
+  // ACL using a separate GetObjectAcl request)
   return md;
 }
 
@@ -737,7 +740,8 @@ struct ObjectMetadataSetter {
   using Setter = std::function<Status(const std::string& value, ObjectRequest* req)>;
 
   static std::unordered_map<std::string, Setter> GetSetters() {
-    return {{"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)},
+    return {{"ACL", CannedACLSetter()},
+            {"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)},
             {"Content-Type", StringSetter(&ObjectRequest::SetContentType)},
             {"Content-Language", StringSetter(&ObjectRequest::SetContentLanguage)},
             {"Expires", DateTimeSetter(&ObjectRequest::SetExpires)}};
@@ -759,6 +763,27 @@ struct ObjectMetadataSetter {
       return Status::OK();
     };
   }
+
+  static Setter CannedACLSetter() {
+    return [](const std::string& v, ObjectRequest* req) {
+      ARROW_ASSIGN_OR_RAISE(auto acl, ParseACL(v));
+      req->SetACL(acl);
+      return Status::OK();
+    };
+  }
+
+  static Result<S3Model::ObjectCannedACL> ParseACL(const std::string& v) {
+    if (v.empty()) {
+      return S3Model::ObjectCannedACL::NOT_SET;
+    }
+    auto acl = S3Model::ObjectCannedACLMapper::GetObjectCannedACLForName(ToAwsString(v));
+    if (acl == S3Model::ObjectCannedACL::NOT_SET) {
+      // XXX This actually never happens, as the AWS SDK dynamically
+      // expands the enum range using Aws::GetEnumOverflowContainer()
+      return Status::Invalid("Invalid S3 canned ACL: '", v, "'");
+    }
+    return acl;
+  }
 };
 
 template <typename ObjectRequest>
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 5ba8e237c7a..4f83bdea2dd 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -464,6 +464,12 @@ class TestS3FS : public S3TestMixin {
     ASSERT_THAT(got_metadata->sorted_pairs(),
                 testing::IsSupersetOf(metadata->sorted_pairs()));
 
+    // Create new file with valid canned ACL
+    // XXX: no easy way of testing the ACL actually gets set
+    metadata = KeyValueMetadata::Make({"ACL"}, {"authenticated-read"});
+    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile6", metadata));
+    ASSERT_OK(stream->Close());
+
     // Overwrite
     ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile1"));
     ASSERT_OK(stream->Write("overwritten data"));
@@ -478,12 +484,12 @@ class TestS3FS : public S3TestMixin {
     // Open file and then lose filesystem reference
     ASSERT_EQ(fs_.use_count(), 1);  // needed for test to work
     std::weak_ptr<S3FileSystem> weak_fs(fs_);
-    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile6"));
+    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile99"));
     fs_.reset();
     ASSERT_OK(stream->Write("some other data"));
     ASSERT_OK(stream->Close());
     ASSERT_TRUE(weak_fs.expired());
-    AssertObjectContents(client_.get(), "bucket", "newfile6", "some other data");
+    AssertObjectContents(client_.get(), "bucket", "newfile99", "some other data");
   }
 
   void TestOpenOutputStreamAbort() {

From 5e86300e15b1680604560f5075d22876ecbc7126 Mon Sep 17 00:00:00 2001
From: Romain Francois <romain@rstudio.com>
Date: Wed, 2 Jun 2021 11:45:26 -0700
Subject: [PATCH 331/719] ARROW-3316: [R] Multi-threaded conversion from R
 data.frame to Arrow table / record batch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #9615 from romainfrancois/RConverter_Parallel

Lead-authored-by: Romain Francois <romain@rstudio.com>
Co-authored-by: Romain François <romain@rstudio.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/arrowExports.R           |   8 +-
 r/R/table.R                  |   8 +-
 r/src/arrowExports.cpp       |  35 +-
 r/src/arrow_types.h          |   8 +
 r/src/csv.cpp                |   1 -
 r/src/r_to_arrow.cpp         | 837 ++++++++++++++++++++++++++++-------
 r/src/table.cpp              |  64 ---
 r/src/type_infer.cpp         |   2 +-
 r/tests/testthat/test-type.R |   2 +-
 9 files changed, 701 insertions(+), 264 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 55a28529f85..29aa1911a55 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1372,6 +1372,10 @@ ExportRecordBatchReader <- function(reader, stream_ptr){
     invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
 }
 
+Table__from_dots <- function(lst, schema_sxp, use_threads){
+    .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
+}
+
 vec_to_arrow <- function(x, s_type){
     .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
@@ -1712,10 +1716,6 @@ Table__from_record_batches <- function(batches, schema_sxp){
     .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
 }
 
-Table__from_dots <- function(lst, schema_sxp){
-    .Call(`_arrow_Table__from_dots`, lst, schema_sxp)
-}
-
 GetCpuThreadPoolCapacity <- function(){
     .Call(`_arrow_GetCpuThreadPoolCapacity`)
 }
diff --git a/r/R/table.R b/r/R/table.R
index 7645e2f552c..09be952af61 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -166,17 +166,17 @@ Table$create <- function(..., schema = NULL) {
     names(dots) <- rep_len("", length(dots))
   }
   stopifnot(length(dots) > 0)
-  
+
   # Preserve any grouping
   if (length(dots) == 1 && inherits(dots[[1]], "grouped_df")) {
-    out <- Table__from_dots(dots, schema)
+    out <- Table__from_dots(dots, schema, option_use_threads())
     return(dplyr::group_by(out, !!!dplyr::groups(dots[[1]])))
   }
-  
+
   if (all_record_batches(dots)) {
     Table__from_record_batches(dots, schema)
   } else {
-    Table__from_dots(dots, schema)
+    Table__from_dots(dots, schema, option_use_threads())
   }
 }
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index b7ca5e9414c..483b1f42ca0 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -5385,6 +5385,23 @@ extern "C" SEXP _arrow_ExportRecordBatchReader(SEXP reader_sexp, SEXP stream_ptr
 }
 #endif
 
+// r_to_arrow.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp, bool use_threads);
+extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp, SEXP use_threads_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type lst(lst_sexp);
+	arrow::r::Input<SEXP>::type schema_sxp(schema_sxp_sexp);
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(Table__from_dots(lst, schema_sxp, use_threads));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp, SEXP use_threads_sexp){
+	Rf_error("Cannot call Table__from_dots(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // r_to_arrow.cpp
 #if defined(ARROW_R_WITH_ARROW)
 SEXP vec_to_arrow(SEXP x, SEXP s_type);
@@ -6729,22 +6746,6 @@ extern "C" SEXP _arrow_Table__from_record_batches(SEXP batches_sexp, SEXP schema
 }
 #endif
 
-// table.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp);
-extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp){
-BEGIN_CPP11
-	arrow::r::Input<SEXP>::type lst(lst_sexp);
-	arrow::r::Input<SEXP>::type schema_sxp(schema_sxp_sexp);
-	return cpp11::as_sexp(Table__from_dots(lst, schema_sxp));
-END_CPP11
-}
-#else
-extern "C" SEXP _arrow_Table__from_dots(SEXP lst_sexp, SEXP schema_sxp_sexp){
-	Rf_error("Cannot call Table__from_dots(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
-}
-#endif
-
 // threadpool.cpp
 #if defined(ARROW_R_WITH_ARROW)
 int GetCpuThreadPoolCapacity();
@@ -7230,6 +7231,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, 
 		{ "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, 
 		{ "_arrow_ExportRecordBatchReader", (DL_FUNC) &_arrow_ExportRecordBatchReader, 2}, 
+		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 3}, 
 		{ "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, 
 		{ "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, 
 		{ "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, 
@@ -7315,7 +7317,6 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, 
 		{ "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, 
 		{ "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, 
-		{ "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, 
 		{ "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, 
 		{ "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, 
 		{ "_arrow_GetIOThreadPoolCapacity", (DL_FUNC) &_arrow_GetIOThreadPoolCapacity, 0}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 5aa26eebd71..ca4ca9519c3 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -149,6 +149,14 @@ void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) {
   }
 }
 
+inline cpp11::writable::list FlattenDots(cpp11::list dots, int num_fields) {
+  std::vector<SEXP> out(num_fields);
+  auto set = [&](int j, SEXP x, cpp11::r_string) { out[j] = x; };
+  TraverseDots(dots, num_fields, set);
+
+  return cpp11::writable::list(out.begin(), out.end());
+}
+
 arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields,
                                   std::shared_ptr<arrow::Schema>& schema);
 
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
index 3df5db87efa..a8d2256cfe3 100644
--- a/r/src/csv.cpp
+++ b/r/src/csv.cpp
@@ -22,7 +22,6 @@
 #include <arrow/csv/reader.h>
 #include <arrow/csv/writer.h>
 #include <arrow/memory_pool.h>
-
 #include <arrow/util/value_parsing.h>
 
 // [[arrow::export]]
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index 0ab9718da26..d0f4f3a6def 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -25,10 +25,14 @@
 #include <arrow/array/builder_dict.h>
 #include <arrow/array/builder_nested.h>
 #include <arrow/array/builder_primitive.h>
+#include <arrow/table.h>
 #include <arrow/type_traits.h>
 #include <arrow/util/bitmap_writer.h>
 #include <arrow/util/checked_cast.h>
 #include <arrow/util/converter.h>
+#include <arrow/util/logging.h>
+#include <arrow/util/parallel.h>
+#include <arrow/util/task_group.h>
 
 namespace arrow {
 
@@ -46,6 +50,90 @@ using internal::MakeConverter;
 
 namespace r {
 
+class RTasks {
+ public:
+  using Task = internal::FnOnce<Status()>;
+
+  explicit RTasks(bool use_threads)
+      : use_threads_(use_threads),
+        stop_source_(),
+        parallel_tasks_(
+            use_threads ? arrow::internal::TaskGroup::MakeThreaded(
+                              arrow::internal::GetCpuThreadPool(), stop_source_.token())
+                        : nullptr) {}
+
+  // This Finish() method must never be called from a thread pool thread
+  // as this would deadlock.
+  //
+  // Usage is to :
+  // - create an RTasks instance on the main thread
+  // - add some tasks with .Append()
+  // - and then call .Finish() so that the parallel tasks are finished
+  Status Finish() {
+    Status status = Status::OK();
+
+    // run the delayed tasks now
+    for (auto& task : delayed_serial_tasks_) {
+      status &= std::move(task)();
+    }
+
+    // then wait for the parallel tasks to finish
+    if (use_threads_) {
+      status &= parallel_tasks_->Finish();
+    }
+
+    return status;
+  }
+
+  void Append(bool parallel, Task&& task) {
+    StoppingTask stopping_task(stop_source_, std::move(task));
+    if (parallel && use_threads_) {
+      parallel_tasks_->Append(std::move(stopping_task));
+    } else {
+      delayed_serial_tasks_.push_back(std::move(stopping_task));
+    }
+  }
+
+  void Reset() {
+    delayed_serial_tasks_.clear();
+
+    stop_source_.Reset();
+    if (use_threads_) {
+      parallel_tasks_ = arrow::internal::TaskGroup::MakeThreaded(
+          arrow::internal::GetCpuThreadPool(), stop_source_.token());
+    }
+  }
+
+  bool use_threads_;
+  StopSource stop_source_;
+  std::shared_ptr<arrow::internal::TaskGroup> parallel_tasks_;
+  std::vector<Task> delayed_serial_tasks_;
+
+ private:
+  class StoppingTask {
+   public:
+    StoppingTask(StopSource stop_source, Task&& task) : task_(std::move(task)) {}
+
+    Status operator()() {
+      Status status;
+      StopToken token = stop_source_.token();
+      if (token.IsStopRequested()) {
+        status &= token.Poll();
+      } else {
+        Status status = std::move(task_)();
+        if (!status.ok()) {
+          stop_source_.RequestStop();
+        }
+      }
+      return status;
+    }
+
+   private:
+    StopSource stop_source_;
+    Task task_;
+  };
+};
+
 struct RConversionOptions {
   RConversionOptions() = default;
 
@@ -168,46 +256,85 @@ bool is_NA<int64_t>(int64_t value) {
 }
 
 template <typename T>
-struct RVectorVisitor {
+class RVectorIterator {
+ public:
+  using value_type = T;
+  RVectorIterator(SEXP x, int64_t start)
+      : ptr_x_(reinterpret_cast<const T*>(DATAPTR_RO(x)) + start) {}
+
+  RVectorIterator& operator++() {
+    ++ptr_x_;
+    return *this;
+  }
+
+  const T operator*() const { return *ptr_x_; }
+
+ private:
+  const T* ptr_x_;
+};
+
+template <typename T>
+class RVectorIterator_ALTREP {
+ public:
+  using value_type = T;
   using data_type =
       typename std::conditional<std::is_same<T, int64_t>::value, double, T>::type;
   using r_vector_type = cpp11::r_vector<data_type>;
+  using r_vector_iterator = typename r_vector_type::const_iterator;
 
-  template <typename AppendNull, typename AppendValue>
-  static Status Visit(SEXP x, int64_t size, AppendNull&& append_null,
-                      AppendValue&& append_value) {
-    r_vector_type values(x);
-    auto it = values.begin();
-
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      auto value = GetValue(*it);
+  RVectorIterator_ALTREP(SEXP x, int64_t start)
+      : vector_(x), it_(vector_.begin() + start) {}
 
-      if (is_NA<T>(value)) {
-        RETURN_NOT_OK(append_null());
-      } else {
-        RETURN_NOT_OK(append_value(value));
-      }
-    }
-
-    return Status::OK();
+  RVectorIterator_ALTREP& operator++() {
+    ++it_;
+    return *this;
   }
 
+  const T operator*() const { return GetValue(*it_); }
+
   static T GetValue(data_type x) { return x; }
+
+ private:
+  r_vector_type vector_;
+  r_vector_iterator it_;
 };
 
 template <>
-int64_t RVectorVisitor<int64_t>::GetValue(double x) {
+int64_t RVectorIterator_ALTREP<int64_t>::GetValue(double x) {
   int64_t value;
   memcpy(&value, &x, sizeof(int64_t));
   return value;
 }
 
+template <typename Iterator, typename AppendNull, typename AppendValue>
+Status VisitVector(Iterator it, int64_t n, AppendNull&& append_null,
+                   AppendValue&& append_value) {
+  for (R_xlen_t i = 0; i < n; i++, ++it) {
+    auto value = *it;
+
+    if (is_NA<typename Iterator::value_type>(value)) {
+      RETURN_NOT_OK(append_null());
+    } else {
+      RETURN_NOT_OK(append_value(value));
+    }
+  }
+
+  return Status::OK();
+}
+
 class RConverter : public Converter<SEXP, RConversionOptions> {
  public:
   virtual Status Append(SEXP) { return Status::NotImplemented("Append"); }
 
   virtual Status Extend(SEXP values, int64_t size) {
-    return Status::NotImplemented("ExtendMasked");
+    return Status::NotImplemented("Extend");
+  }
+
+  // by default, just delay the ->Extend(), i.e. not run in parallel
+  // implementations might redefine so that ->Extend() is run in parallel
+  virtual void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(false, task);
   }
 
   virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size) {
@@ -312,6 +439,7 @@ class RPrimitiveConverter<T, enable_if_null<T>>
   }
 };
 
+// TODO: extend this to BooleanType, but this needs some work in RConvert
 template <typename T>
 class RPrimitiveConverter<
     T, enable_if_t<is_integer_type<T>::value || is_floating_type<T>::value>>
@@ -321,13 +449,13 @@ class RPrimitiveConverter<
     auto rtype = GetVectorType(x);
     switch (rtype) {
       case UINT8:
-        return AppendRangeDispatch<unsigned char>(x, size);
+        return ExtendDispatch<unsigned char>(x, size);
       case INT32:
-        return AppendRangeDispatch<int>(x, size);
+        return ExtendDispatch<int>(x, size);
       case FLOAT64:
-        return AppendRangeDispatch<double>(x, size);
+        return ExtendDispatch<double>(x, size);
       case INT64:
-        return AppendRangeDispatch<int64_t>(x, size);
+        return ExtendDispatch<int64_t>(x, size);
 
       default:
         break;
@@ -336,83 +464,49 @@ class RPrimitiveConverter<
     return Status::Invalid("cannot convert");
   }
 
- private:
-  template <typename r_value_type>
-  Status AppendRangeLoopDifferentType(SEXP x, int64_t size) {
-    RETURN_NOT_OK(this->Reserve(size));
-
-    auto append_value = [this](r_value_type value) {
-      ARROW_ASSIGN_OR_RAISE(auto converted,
-                            RConvert::Convert(this->primitive_type_, value));
-      this->primitive_builder_->UnsafeAppend(converted);
-      return Status::OK();
-    };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<r_value_type>::Visit(x, size, append_null, append_value);
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 
+ private:
   template <typename r_value_type>
-  Status AppendRangeSameTypeNotALTREP(SEXP x, int64_t size) {
-    auto p = reinterpret_cast<const r_value_type*>(DATAPTR_RO(x));
-    auto p_end = p + size;
-
-    auto first_na = std::find_if(p, p_end, is_NA<r_value_type>);
-
-    if (first_na == p_end) {
-      // no nulls, so we can use AppendValues() directly
-      return this->primitive_builder_->AppendValues(p, p_end);
-    }
-
-    // Append all values up until the first NULL
-    RETURN_NOT_OK(this->primitive_builder_->AppendValues(p, first_na));
-
-    // loop for the remaining
-    RETURN_NOT_OK(this->primitive_builder_->Reserve(p_end - first_na));
-    p = first_na;
-    for (; p < p_end; ++p) {
-      r_value_type value = *p;
-      if (is_NA<r_value_type>(value)) {
-        this->primitive_builder_->UnsafeAppendNull();
-      } else {
-        this->primitive_builder_->UnsafeAppend(value);
-      }
+  Status ExtendDispatch(SEXP x, int64_t size) {
+    if (ALTREP(x)) {
+      // `x` is an ALTREP R vector storing `r_value_type`
+      // and that type matches exactly the type of the array this is building
+      return Extend_impl(RVectorIterator_ALTREP<r_value_type>(x, 0), size);
+    } else {
+      // `x` is not an ALTREP vector so we have direct access to a range of values
+      return Extend_impl(RVectorIterator<r_value_type>(x, 0), size);
     }
-    return Status::OK();
   }
 
-  template <typename r_value_type>
-  Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) {
-    // if it is altrep, then we use cpp11 looping
-    // without needing to convert
+  template <typename Iterator>
+  Status Extend_impl(Iterator it, int64_t size) {
+    using r_value_type = typename Iterator::value_type;
     RETURN_NOT_OK(this->primitive_builder_->Reserve(size));
-    typename RVectorVisitor<r_value_type>::r_vector_type vec(x);
-    auto it = vec.begin();
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      r_value_type value = RVectorVisitor<r_value_type>::GetValue(*it);
-      if (is_NA<r_value_type>(value)) {
-        this->primitive_builder_->UnsafeAppendNull();
-      } else {
-        this->primitive_builder_->UnsafeAppend(value);
-      }
-    }
-    return Status::OK();
-  }
 
-  template <typename r_value_type>
-  Status AppendRangeDispatch(SEXP x, int64_t size) {
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
+
     if (std::is_same<typename T::c_type, r_value_type>::value) {
-      if (!ALTREP(x)) {
-        return AppendRangeSameTypeNotALTREP<r_value_type>(x, size);
-      } else {
-        return AppendRangeSameTypeALTREP<r_value_type>(x, size);
-      }
+      auto append_value = [this](r_value_type value) {
+        this->primitive_builder_->UnsafeAppend(value);
+        return Status::OK();
+      };
+      return VisitVector(it, size, append_null, append_value);
+    } else {
+      auto append_value = [this](r_value_type value) {
+        ARROW_ASSIGN_OR_RAISE(auto converted,
+                              RConvert::Convert(this->primitive_type_, value));
+        this->primitive_builder_->UnsafeAppend(converted);
+        return Status::OK();
+      };
+      return VisitVector(it, size, append_null, append_value);
     }
-
-    // here if underlying types differ so going
-    return AppendRangeLoopDifferentType<r_value_type>(x, size);
   }
 };
 
@@ -425,17 +519,33 @@ class RPrimitiveConverter<T, enable_if_t<is_boolean_type<T>::value>>
     if (rtype != BOOLEAN) {
       return Status::Invalid("Expecting a logical vector");
     }
+
+    if (ALTREP(x)) {
+      return Extend_impl(RVectorIterator_ALTREP<cpp11::r_bool>(x, 0), size);
+    } else {
+      return Extend_impl(RVectorIterator<cpp11::r_bool>(x, 0), size);
+    }
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
+  }
+
+ private:
+  template <typename Iterator>
+  Status Extend_impl(Iterator it, int64_t size) {
     RETURN_NOT_OK(this->Reserve(size));
 
-    auto append_value = [this](cpp11::r_bool value) {
-      this->primitive_builder_->UnsafeAppend(value == 1);
-      return Status::OK();
-    };
     auto append_null = [this]() {
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
     };
-    return RVectorVisitor<cpp11::r_bool>::Visit(x, size, append_null, append_value);
+    auto append_value = [this](cpp11::r_bool value) {
+      this->primitive_builder_->UnsafeAppend(value == 1);
+      return Status::OK();
+    };
+    return VisitVector(it, size, append_null, append_value);
   }
 };
 
@@ -444,17 +554,15 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
   Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
-
     switch (GetVectorType(x)) {
       case DATE_INT:
-        return AppendRange_Date<int>(x, size);
+        return AppendRange_Date_dispatch<int>(x, size);
 
       case DATE_DBL:
-        return AppendRange_Date<double>(x, size);
+        return AppendRange_Date_dispatch<double>(x, size);
 
       case POSIXCT:
-        return AppendRange_Posixct(x, size);
+        return AppendRange_Posixct_dispatch(x, size);
 
       default:
         break;
@@ -463,9 +571,26 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     return Status::Invalid("cannot convert to date type ");
   }
 
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
+  }
+
  private:
   template <typename r_value_type>
-  Status AppendRange_Date(SEXP x, int64_t size) {
+  Status AppendRange_Date_dispatch(SEXP x, int64_t size) {
+    if (ALTREP(x)) {
+      return AppendRange_Date(RVectorIterator_ALTREP<r_value_type>(x, 0), size);
+    } else {
+      return AppendRange_Date(RVectorIterator<r_value_type>(x, 0), size);
+    }
+  }
+
+  template <typename Iterator>
+  Status AppendRange_Date(Iterator it, int64_t size) {
+    using r_value_type = typename Iterator::value_type;
+    RETURN_NOT_OK(this->Reserve(size));
+
     auto append_null = [this]() {
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
@@ -474,21 +599,31 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
       this->primitive_builder_->UnsafeAppend(FromRDate(this->primitive_type_, value));
       return Status::OK();
     };
+    return VisitVector(it, size, append_null, append_value);
+  }
 
-    return RVectorVisitor<r_value_type>::Visit(x, size, append_null, append_value);
+  Status AppendRange_Posixct_dispatch(SEXP x, int64_t size) {
+    if (ALTREP(x)) {
+      return AppendRange_Posixct(RVectorIterator_ALTREP<double>(x, 0), size);
+    } else {
+      return AppendRange_Posixct(RVectorIterator<double>(x, 0), size);
+    }
   }
 
-  Status AppendRange_Posixct(SEXP x, int64_t size) {
+  template <typename Iterator>
+  Status AppendRange_Posixct(Iterator it, int64_t size) {
+    using r_value_type = typename Iterator::value_type;
+    RETURN_NOT_OK(this->Reserve(size));
+
     auto append_null = [this]() {
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
     };
-    auto append_value = [this](double value) {
+    auto append_value = [this](r_value_type value) {
       this->primitive_builder_->UnsafeAppend(FromPosixct(this->primitive_type_, value));
       return Status::OK();
     };
-
-    return RVectorVisitor<double>::Visit(x, size, append_null, append_value);
+    return VisitVector(it, size, append_null, append_value);
   }
 
   static int FromRDate(const Date32Type*, int from) { return from; }
@@ -553,16 +688,27 @@ class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
     auto multiplier =
         get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier;
 
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
     auto append_value = [this, multiplier](double value) {
       auto converted = static_cast<typename T::c_type>(value * multiplier);
       this->primitive_builder_->UnsafeAppend(converted);
       return Status::OK();
     };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<double>::Visit(x, size, append_null, append_value);
+
+    if (ALTREP(x)) {
+      return VisitVector(RVectorIterator_ALTREP<double>(x, 0), size, append_null,
+                         append_value);
+    } else {
+      return VisitVector(RVectorIterator<double>(x, 0), size, append_null, append_value);
+    }
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -589,7 +735,18 @@ class RPrimitiveConverter<T, enable_if_t<is_timestamp_type<T>::value>>
       this->primitive_builder_->UnsafeAppendNull();
       return Status::OK();
     };
-    return RVectorVisitor<double>::Visit(x, size, append_null, append_value);
+
+    if (ALTREP(x)) {
+      return VisitVector(RVectorIterator_ALTREP<double>(x, 0), size, append_null,
+                         append_value);
+    } else {
+      return VisitVector(RVectorIterator<double>(x, 0), size, append_null, append_value);
+    }
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -633,17 +790,23 @@ class RPrimitiveConverter<T, enable_if_binary<T>>
     RETURN_NOT_OK(this->Reserve(size));
     RETURN_NOT_OK(check_binary(x, size));
 
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
+
     auto append_value = [this](SEXP raw) {
       R_xlen_t n = XLENGTH(raw);
       ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n));
       this->primitive_builder_->UnsafeAppend(RAW_RO(raw), static_cast<OffsetType>(n));
       return Status::OK();
     };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<SEXP>::Visit(x, size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, 0), size, append_null, append_value);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -655,6 +818,11 @@ class RPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::v
     RETURN_NOT_OK(this->Reserve(size));
     RETURN_NOT_OK(check_binary(x, size));
 
+    auto append_null = [this]() {
+      this->primitive_builder_->UnsafeAppendNull();
+      return Status::OK();
+    };
+
     auto append_value = [this](SEXP raw) {
       R_xlen_t n = XLENGTH(raw);
 
@@ -665,11 +833,12 @@ class RPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::v
       this->primitive_builder_->UnsafeAppend(RAW_RO(raw));
       return Status::OK();
     };
-    auto append_null = [this]() {
-      this->primitive_builder_->UnsafeAppendNull();
-      return Status::OK();
-    };
-    return RVectorVisitor<SEXP>::Visit(x, size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, 0), size, append_null, append_value);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(!ALTREP(values), std::move(task));
   }
 };
 
@@ -680,33 +849,41 @@ class RPrimitiveConverter<T, enable_if_string_like<T>>
   using OffsetType = typename T::offset_type;
 
   Status Extend(SEXP x, int64_t size) override {
-    int64_t start = 0;
     RVectorType rtype = GetVectorType(x);
     if (rtype != STRING) {
       return Status::Invalid("Expecting a character vector");
     }
+    return UnsafeAppendUtf8Strings(arrow::r::utf8_strings(x), size);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    // TODO: refine this., e.g. extract setup from Extend()
+    tasks.Append(false, std::move(task));
+  }
 
-    cpp11::strings s(arrow::r::utf8_strings(x));
+ private:
+  Status UnsafeAppendUtf8Strings(const cpp11::strings& s, int64_t size) {
     RETURN_NOT_OK(this->primitive_builder_->Reserve(s.size()));
-    auto it = s.begin() + start;
+    const SEXP* p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
 
     // we know all the R strings are utf8 already, so we can get
     // a definite size and then use UnsafeAppend*()
     int64_t total_length = 0;
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      cpp11::r_string si = *it;
-      total_length += cpp11::is_na(si) ? 0 : si.size();
+    for (R_xlen_t i = 0; i < size; i++, ++p_strings) {
+      SEXP si = *p_strings;
+      total_length += si == NA_STRING ? 0 : LENGTH(si);
     }
     RETURN_NOT_OK(this->primitive_builder_->ReserveData(total_length));
 
     // append
-    it = s.begin() + start;
-    for (R_xlen_t i = 0; i < size; i++, ++it) {
-      cpp11::r_string si = *it;
+    p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
+    for (R_xlen_t i = 0; i < size; i++, ++p_strings) {
+      SEXP si = *p_strings;
       if (si == NA_STRING) {
         this->primitive_builder_->UnsafeAppendNull();
       } else {
-        this->primitive_builder_->UnsafeAppend(CHAR(si), si.size());
+        this->primitive_builder_->UnsafeAppend(CHAR(si), LENGTH(si));
       }
     }
 
@@ -746,25 +923,24 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
   using BuilderType = DictionaryBuilder<ValueType>;
 
   Status Extend(SEXP x, int64_t size) override {
-    // first we need to handle the levels
-    cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol));
-    auto memo_array = arrow::r::vec_to_arrow(levels, utf8(), false);
-    RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*memo_array));
+    RETURN_NOT_OK(ExtendSetup(x, size));
+    return ExtendImpl(x, size, GetCharLevels(x));
+  }
 
-    // then we can proceed
-    RETURN_NOT_OK(this->Reserve(size));
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    // the setup runs synchronously first
+    Status setup = ExtendSetup(values, size);
 
-    RVectorType rtype = GetVectorType(x);
-    if (rtype != FACTOR) {
-      return Status::Invalid("invalid R type to convert to dictionary");
-    }
+    if (!setup.ok()) {
+      // if that fails, propagate the error
+      tasks.Append(false, [setup]() { return setup; });
+    } else {
+      auto char_levels = GetCharLevels(values);
 
-    auto append_value = [this, levels](int value) {
-      SEXP s = STRING_ELT(levels, value - 1);
-      return this->value_builder_->Append(CHAR(s));
-    };
-    auto append_null = [this]() { return this->value_builder_->AppendNull(); };
-    return RVectorVisitor<int>::Visit(x, size, append_null, append_value);
+      tasks.Append(true, [this, values, size, char_levels]() {
+        return this->ExtendImpl(values, size, char_levels);
+      });
+    }
   }
 
   Result<std::shared_ptr<Array>> ToArray() override {
@@ -780,6 +956,44 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
 
     return std::make_shared<DictionaryArray>(result->data());
   }
+
+ private:
+  std::vector<const char*> GetCharLevels(SEXP x) {
+    SEXP levels = Rf_getAttrib(x, R_LevelsSymbol);
+    R_xlen_t n_levels = XLENGTH(levels);
+    std::vector<const char*> char_levels(XLENGTH(levels));
+    const SEXP* p_levels = reinterpret_cast<const SEXP*>(DATAPTR_RO(levels));
+    for (R_xlen_t i = 0; i < n_levels; i++, ++p_levels) {
+      char_levels[i] = CHAR(*p_levels);
+    }
+
+    return char_levels;
+  }
+
+  Status ExtendSetup(SEXP x, int64_t size) {
+    RVectorType rtype = GetVectorType(x);
+    if (rtype != FACTOR) {
+      return Status::Invalid("invalid R type to convert to dictionary");
+    }
+
+    // first we need to handle the levels
+    SEXP levels = Rf_getAttrib(x, R_LevelsSymbol);
+    auto memo_array = arrow::r::vec_to_arrow(levels, utf8(), false);
+    RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*memo_array));
+
+    // then we can proceed
+    return this->Reserve(size);
+  }
+
+  Status ExtendImpl(SEXP values, int64_t size,
+                    const std::vector<const char*>& char_levels) {
+    auto append_null = [this]() { return this->value_builder_->AppendNull(); };
+    auto append_value = [this, &char_levels](int value) {
+      return this->value_builder_->Append(char_levels[value - 1]);
+    };
+
+    return VisitVector(RVectorIterator<int>(values, 0), size, append_null, append_value);
+  }
 };
 
 template <typename T, typename Enable = void>
@@ -808,15 +1022,28 @@ class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
       return Status::Invalid("Cannot convert to list type");
     }
 
+    auto append_null = [this]() { return this->list_builder_->AppendNull(); };
+
     auto append_value = [this](SEXP value) {
+      // TODO: if we decide that this can be run concurrently
+      //       we'll have to do vec_size() upfront
       int n = vctrs::vec_size(value);
 
       RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n));
       RETURN_NOT_OK(this->list_builder_->Append());
       return this->value_converter_.get()->Extend(value, n);
     };
-    auto append_null = [this]() { return this->list_builder_->AppendNull(); };
-    return RVectorVisitor<SEXP>::Visit(x, size, append_null, append_value);
+
+    return VisitVector(RVectorIterator<SEXP>(x, 0), size, append_null, append_value);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    // NOTE: because Extend::[]append_value() calls Extend() on the
+    // value converter, which might require a setup step, it feels
+    // complicated to run this task concurrently.
+    //
+    // TODO: perhaps allow running concurrently in some cases, e.g. list(int32(!altrep))
+    tasks.Append(false, [this, values, size]() { return this->Extend(values, size); });
   }
 };
 
@@ -830,6 +1057,45 @@ struct RConverterTrait<StructType> {
 class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
  public:
   Status Extend(SEXP x, int64_t size) override {
+    RETURN_NOT_OK(ExtendSetup(x, size));
+
+    auto fields = this->struct_type_->fields();
+    R_xlen_t n_columns = XLENGTH(x);
+    for (R_xlen_t i = 0; i < n_columns; i++) {
+      auto status = children_[i]->Extend(VECTOR_ELT(x, i), size);
+      if (!status.ok()) {
+        return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(),
+                               "): ", status.ToString());
+      }
+    }
+
+    return Status::OK();
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    // the setup runs synchronously first
+    Status setup = ExtendSetup(values, size);
+
+    if (!setup.ok()) {
+      // if that fails, propagate the error
+      tasks.Append(false, [setup]() { return setup; });
+    } else {
+      // otherwise deal with each column, maybe concurrently
+      auto fields = this->struct_type_->fields();
+      R_xlen_t n_columns = XLENGTH(values);
+
+      for (R_xlen_t i = 0; i < n_columns; i++) {
+        children_[i]->DelayedExtend(VECTOR_ELT(values, i), size, tasks);
+      }
+    }
+  }
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    return StructConverter<RConverter, RConverterTrait>::Init(pool);
+  }
+
+  Status ExtendSetup(SEXP x, int64_t size) {
     // check that x is compatible
     R_xlen_t n_columns = XLENGTH(x);
 
@@ -860,15 +1126,6 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
       return Status::OK();
     }));
 
-    for (R_xlen_t i = 0; i < n_columns; i++) {
-      std::string name(x_names[i]);
-      if (name != fields[i]->name()) {
-        return Status::RError(
-            "Field name in position ", i, " (", fields[i]->name(),
-            ") does not match the name of the column of the data frame (", name, ")");
-      }
-    }
-
     for (R_xlen_t i = 0; i < n_columns; i++) {
       SEXP x_i = VECTOR_ELT(x, i);
       if (vctrs::vec_size(x_i) < size) {
@@ -882,21 +1139,8 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
       RETURN_NOT_OK(struct_builder_->Append());
     }
 
-    for (R_xlen_t i = 0; i < n_columns; i++) {
-      auto status = children_[i]->Extend(VECTOR_ELT(x, i), size);
-      if (!status.ok()) {
-        return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(),
-                               "): ", status.ToString());
-      }
-    }
-
     return Status::OK();
   }
-
- protected:
-  Status Init(MemoryPool* pool) override {
-    return StructConverter<RConverter, RConverterTrait>::Init(pool);
-  }
 };
 
 template <>
@@ -992,6 +1236,37 @@ std::shared_ptr<arrow::Array> vec_to_arrow__reuse_memory(SEXP x) {
   cpp11::stop("Unreachable: you might need to fix can_reuse_memory()");
 }
 
+Status vector_to_Array(SEXP x, const std::shared_ptr<arrow::DataType>& type,
+                       bool type_inferred,
+                       std::shared_ptr<internal::TaskGroup>& task_group,
+                       std::shared_ptr<arrow::Array>& out) {
+  // short circuit if `x` is already an Array
+  if (Rf_inherits(x, "Array")) {
+    out = cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x);
+    return Status::OK();
+  }
+
+  RConversionOptions options;
+  options.strict = !type_inferred;
+  options.type = type;
+  options.size = vctrs::vec_size(x);
+
+  // maybe short circuit when zero-copy is possible
+  if (can_reuse_memory(x, options.type)) {
+    out = vec_to_arrow__reuse_memory(x);
+    return Status::OK();
+  }
+
+  // otherwise go through the converter api
+  auto converter = ValueOrStop(MakeConverter<RConverter, RConverterTrait>(
+      options.type, options, gc_memory_pool()));
+
+  RETURN_NOT_OK(converter->Extend(x, options.size));
+  ARROW_ASSIGN_OR_RAISE(out, converter->ToArray());
+
+  return Status::OK();
+}
+
 std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x,
                                            const std::shared_ptr<arrow::DataType>& type,
                                            bool type_inferred) {
@@ -1015,12 +1290,230 @@ std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x,
       options.type, options, gc_memory_pool()));
 
   StopIfNotOk(converter->Extend(x, options.size));
+
   return ValueOrStop(converter->ToArray());
 }
 
+// TODO: most of this is very similar to MakeSimpleArray, just adapted to
+//       leverage concurrency. Maybe some refactoring needed.
+template <typename RVector, typename Type>
+bool vector_from_r_memory_impl(SEXP x, const std::shared_ptr<DataType>& type,
+                               std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns,
+                               int j, RTasks& tasks) {
+  RVector vec(x);
+  using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type;
+  auto buffer = std::make_shared<RBuffer<RVector>>(vec);
+
+  tasks.Append(true, [buffer, x, &columns, j]() {
+    std::vector<std::shared_ptr<Buffer>> buffers{nullptr, buffer};
+
+    auto n = XLENGTH(x);
+    auto p_x_start = reinterpret_cast<const value_type*>(DATAPTR_RO(x));
+    auto p_x_end = p_x_start + n;
+
+    int null_count = 0;
+    auto first_na = std::find_if(p_x_start, p_x_end, is_NA<value_type>);
+
+    if (first_na < p_x_end) {
+      auto null_bitmap =
+          ValueOrStop(AllocateBuffer(BitUtil::BytesForBits(n), gc_memory_pool()));
+      internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap->mutable_data(), 0, n);
+
+      // first loop to clear all the bits before the first NA
+      auto k = std::distance(p_x_start, first_na);
+      int i = 0;
+      for (; i < k; i++, bitmap_writer.Next()) {
+        bitmap_writer.Set();
+      }
+
+      auto p_vec = first_na;
+      // then finish
+      for (; i < n; i++, bitmap_writer.Next(), ++p_vec) {
+        if (is_NA<value_type>(*p_vec)) {
+          bitmap_writer.Clear();
+          null_count++;
+        } else {
+          bitmap_writer.Set();
+        }
+      }
+
+      bitmap_writer.Finish();
+      buffers[0] = std::move(null_bitmap);
+    }
+
+    auto data = ArrayData::Make(std::make_shared<Type>(), n, std::move(buffers),
+                                null_count, 0 /*offset*/);
+    auto array = std::make_shared<typename TypeTraits<Type>::ArrayType>(data);
+    columns[j] = std::make_shared<arrow::ChunkedArray>(array);
+
+    return Status::OK();
+  });
+
+  return true;
+}
+
+bool vector_from_r_memory(SEXP x, const std::shared_ptr<DataType>& type,
+                          std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns,
+                          int j, RTasks& tasks) {
+  if (ALTREP(x)) return false;
+
+  switch (type->id()) {
+    case Type::INT32:
+      return TYPEOF(x) == INTSXP && !OBJECT(x) &&
+             vector_from_r_memory_impl<cpp11::integers, Int32Type>(x, type, columns, j,
+                                                                   tasks);
+
+    case Type::DOUBLE:
+      return TYPEOF(x) == REALSXP && !OBJECT(x) &&
+             vector_from_r_memory_impl<cpp11::doubles, DoubleType>(x, type, columns, j,
+                                                                   tasks);
+
+    case Type::UINT8:
+      return TYPEOF(x) == RAWSXP && !OBJECT(x) &&
+             vector_from_r_memory_impl<cpp11::raws, UInt8Type>(x, type, columns, j,
+                                                               tasks);
+
+    case Type::INT64:
+      return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64") &&
+             vector_from_r_memory_impl<cpp11::doubles, Int64Type>(x, type, columns, j,
+                                                                  tasks);
+    default:
+      break;
+  }
+
+  return false;
+}
+
 }  // namespace r
 }  // namespace arrow
 
+arrow::Status check_consistent_column_length(
+    const std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns) {
+  if (columns.size()) {
+    int64_t num_rows = columns[0]->length();
+
+    for (const auto& column : columns) {
+      if (column->length() != num_rows) {
+        return arrow::Status::Invalid("All columns must have the same length");
+      }
+    }
+  }
+
+  return arrow::Status::OK();
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp,
+                                               bool use_threads) {
+  bool infer_schema = !Rf_inherits(schema_sxp, "Schema");
+
+  int num_fields;
+  StopIfNotOk(arrow::r::count_fields(lst, &num_fields));
+
+  // schema + metadata
+  std::shared_ptr<arrow::Schema> schema;
+  StopIfNotOk(arrow::r::InferSchemaFromDots(lst, schema_sxp, num_fields, schema));
+  StopIfNotOk(arrow::r::AddMetadataFromDots(lst, num_fields, schema));
+
+  if (!infer_schema && schema->num_fields() != num_fields) {
+    cpp11::stop("incompatible. schema has %d fields, and %d columns are supplied",
+                schema->num_fields(), num_fields);
+  }
+
+  // table
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(num_fields);
+
+  if (!infer_schema) {
+    auto check_name = [&](int j, SEXP, cpp11::r_string name) {
+      std::string cpp_name(name);
+      if (schema->field(j)->name() != cpp_name) {
+        cpp11::stop("field at index %d has name '%s' != '%s'", j + 1,
+                    schema->field(j)->name().c_str(), cpp_name.c_str());
+      }
+    };
+    arrow::r::TraverseDots(lst, num_fields, check_name);
+  }
+
+  // must be careful to avoid R stop() until the tasks
+  // are finished, i.e. after tasks.Finish()
+  arrow::r::RTasks tasks(use_threads);
+
+  arrow::Status status = arrow::Status::OK();
+
+  auto flatten_lst = arrow::r::FlattenDots(lst, num_fields);
+  std::vector<std::unique_ptr<arrow::r::RConverter>> converters(num_fields);
+
+  // init converters
+  for (int j = 0; j < num_fields && status.ok(); j++) {
+    SEXP x = flatten_lst[j];
+
+    if (Rf_inherits(x, "ChunkedArray")) {
+      columns[j] = cpp11::as_cpp<std::shared_ptr<arrow::ChunkedArray>>(x);
+    } else if (Rf_inherits(x, "Array")) {
+      columns[j] = std::make_shared<arrow::ChunkedArray>(
+          cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x));
+    } else {
+      arrow::r::RConversionOptions options;
+      options.strict = !infer_schema;
+      options.type = schema->field(j)->type();
+      options.size = vctrs::vec_size(x);
+
+      // first try to add a task to do a zero copy in parallel
+      if (arrow::r::vector_from_r_memory(x, options.type, columns, j, tasks)) {
+        continue;
+      }
+
+      // if unsuccessful: use RConverter api
+      auto converter_result =
+          arrow::MakeConverter<arrow::r::RConverter, arrow::r::RConverterTrait>(
+              options.type, options, gc_memory_pool());
+      if (!converter_result.ok()) {
+        status = converter_result.status();
+        break;
+      }
+      converters[j] = std::move(converter_result.ValueUnsafe());
+    }
+  }
+
+  // if the previous loop didn't break early, spawn
+  // tasks to Extend, maybe in parallel
+  if (status.ok()) {
+    for (int j = 0; j < num_fields; j++) {
+      auto& converter = converters[j];
+      if (converter != nullptr) {
+        converter->DelayedExtend(flatten_lst[j], converter->options().size, tasks);
+      }
+    }
+  }
+
+  // in any case, this needs to wait until all tasks are finished
+  status &= tasks.Finish();
+
+  // nothing is running in parallel here, so we have an opportunity to stop
+  StopIfNotOk(status);
+
+  // then finally convert to chunked arrays in parallel
+  tasks.Reset();
+
+  for (int j = 0; j < num_fields; j++) {
+    tasks.Append(true, [&columns, j, &converters]() {
+      auto& converter = converters[j];
+      if (converter != nullptr) {
+        ARROW_ASSIGN_OR_RAISE(auto array, converter->ToArray());
+        columns[j] = std::make_shared<arrow::ChunkedArray>(array);
+      }
+      return arrow::Status::OK();
+    });
+  }
+  status &= tasks.Finish();
+  StopIfNotOk(status);
+
+  status &= check_consistent_column_length(columns);
+  StopIfNotOk(status);
+
+  return arrow::Table::Make(schema, columns);
+}
+
 // [[arrow::export]]
 SEXP vec_to_arrow(SEXP x, SEXP s_type) {
   if (Rf_inherits(x, "Array")) return x;
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 997d8f137cb..68adefcfd4a 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -150,21 +150,6 @@ std::shared_ptr<arrow::Table> Table__SelectColumns(
 namespace arrow {
 namespace r {
 
-arrow::Status check_consistent_column_length(
-    const std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns) {
-  if (columns.size()) {
-    int64_t num_rows = columns[0]->length();
-
-    for (const auto& column : columns) {
-      if (column->length() != num_rows) {
-        return arrow::Status::Invalid("All columns must have the same length");
-      }
-    }
-  }
-
-  return arrow::Status::OK();
-}
-
 arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields,
                                   std::shared_ptr<arrow::Schema>& schema) {
   // maybe a schema was given
@@ -269,33 +254,6 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
   return arrow::Status::OK();
 }
 
-arrow::Status CollectTableColumns(
-    SEXP lst, const std::shared_ptr<arrow::Schema>& schema, int num_fields, bool inferred,
-    std::vector<std::shared_ptr<arrow::ChunkedArray>>& columns) {
-  if (!inferred && schema->num_fields() != num_fields) {
-    cpp11::stop("incompatible. schema has %d fields, and %d columns are supplied",
-                schema->num_fields(), num_fields);
-  }
-  auto extract_one_column = [&columns, &schema, inferred](int j, SEXP x,
-                                                          std::string name) {
-    if (!inferred && schema->field(j)->name() != name) {
-      cpp11::stop("field at index %d has name '%s' != '%s'", j + 1,
-                  schema->field(j)->name().c_str(), name.c_str());
-    }
-    if (Rf_inherits(x, "ChunkedArray")) {
-      columns[j] = cpp11::as_cpp<std::shared_ptr<arrow::ChunkedArray>>(x);
-    } else if (Rf_inherits(x, "Array")) {
-      columns[j] = std::make_shared<arrow::ChunkedArray>(
-          cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x));
-    } else {
-      auto array = arrow::r::vec_to_arrow(x, schema->field(j)->type(), inferred);
-      columns[j] = std::make_shared<arrow::ChunkedArray>(array);
-    }
-  };
-  arrow::r::TraverseDots(lst, num_fields, extract_one_column);
-  return arrow::Status::OK();
-}
-
 }  // namespace r
 }  // namespace arrow
 
@@ -325,26 +283,4 @@ std::shared_ptr<arrow::Table> Table__from_record_batches(
   return tab;
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp) {
-  bool infer_schema = !Rf_inherits(schema_sxp, "Schema");
-
-  int num_fields;
-  StopIfNotOk(arrow::r::count_fields(lst, &num_fields));
-
-  // schema + metadata
-  std::shared_ptr<arrow::Schema> schema;
-  StopIfNotOk(arrow::r::InferSchemaFromDots(lst, schema_sxp, num_fields, schema));
-  StopIfNotOk(arrow::r::AddMetadataFromDots(lst, num_fields, schema));
-
-  // table
-  std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(num_fields);
-  StopIfNotOk(
-      arrow::r::CollectTableColumns(lst, schema, num_fields, infer_schema, columns));
-
-  StopIfNotOk(arrow::r::check_consistent_column_length(columns));
-
-  return arrow::Table::Make(schema, columns);
-}
-
 #endif
diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp
index 93e51be6462..022a29ea5b2 100644
--- a/r/src/type_infer.cpp
+++ b/r/src/type_infer.cpp
@@ -179,7 +179,7 @@ std::shared_ptr<arrow::DataType> InferArrowType(SEXP x) {
     case REALSXP:
       return InferArrowTypeFromVector<REALSXP>(x);
     case RAWSXP:
-      return int8();
+      return uint8();
     case STRSXP:
       return InferArrowTypeFromVector<STRSXP>(x);
     case VECSXP:
diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R
index a3118be2a2c..1f13466c9dd 100644
--- a/r/tests/testthat/test-type.R
+++ b/r/tests/testthat/test-type.R
@@ -31,7 +31,7 @@ test_that("type() infers from R type", {
   expect_type_equal(type(1:10), int32())
   expect_type_equal(type(1), float64())
   expect_type_equal(type(TRUE), boolean())
-  expect_type_equal(type(raw()), int8())
+  expect_type_equal(type(raw()), uint8())
   expect_type_equal(type(""), utf8())
   expect_type_equal(
     type(example_data$fct),

From 5c821117a0928015a3ef6884ff165c153e2f2e13 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 2 Jun 2021 18:49:52 -0700
Subject: [PATCH 332/719] MINOR: [JS] Update committers to JS library

Closes #10441 from domoritz/patch-9

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/js/README.md b/js/README.md
index 201a128b06c..8da6f933649 100644
--- a/js/README.md
+++ b/js/README.md
@@ -210,8 +210,9 @@ Apache Arrow also works on [maintained versions of Node](https://nodejs.org/en/a
 
 Full list of broader Apache Arrow [committers](https://arrow.apache.org/committers/).
 
-* Brian Hulette,  _committer_
-* Paul Taylor, Graphistry, Inc.,  _committer_
+* Brian Hulette, _committer_
+* Paul Taylor, _committer_
+* Dominik Moritz, _committer_
 
 # Powered By Apache Arrow in JS
 

From b1eecc4afc65d6cb39fefef9a10d4bc1efadb29f Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 2 Jun 2021 18:52:38 -0700
Subject: [PATCH 333/719] ARROW-12703: [JS] Separate Table from DataFrame

Changes:

* **Breaking:** Tables are not DataFrames anymore. You can construct `DataFrame`s just like `Table`s.

Closes #10277 from domoritz/dataframe

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/src/compute/dataframe.ts     |  15 +-
 js/src/table.ts                 |  32 ++--
 js/test/unit/dataframe-tests.ts | 291 ++++++++++++++++++++++++++++++++
 js/test/unit/table-tests.ts     | 242 +-------------------------
 4 files changed, 318 insertions(+), 262 deletions(-)
 create mode 100644 js/test/unit/dataframe-tests.ts

diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts
index ecebce09394..e9df3719490 100644
--- a/js/src/compute/dataframe.ts
+++ b/js/src/compute/dataframe.ts
@@ -29,11 +29,16 @@ export type BindFunc = (batch: RecordBatch) => void;
 /** @ignore */
 export type NextFunc = (idx: number, batch: RecordBatch) => void;
 
-Table.prototype.countBy = function(this: Table, name: Col | string) { return new DataFrame(this.chunks).countBy(name); };
-Table.prototype.scan = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scan(next, bind); };
-Table.prototype.scanReverse = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scanReverse(next, bind); };
-Table.prototype.filter = function(this: Table, predicate: Predicate): FilteredDataFrame { return new DataFrame(this.chunks).filter(predicate); };
-
+/**
+ * `DataFrame` extends {@link Table} with support for predicate filtering.
+ *
+ * You can construct `DataFrames` like tables or convert a `Table` to a `DataFrame`
+ * with the constructor.
+ *
+ * ```ts
+ * const df = new DataFrame(table);
+ * ```
+ */
 export class DataFrame<T extends { [key: string]: DataType } = any> extends Table<T> {
     public filter(predicate: Predicate): FilteredDataFrame<T> {
         return new FilteredDataFrame<T>(this.chunks, predicate);
diff --git a/js/src/table.ts b/js/src/table.ts
index 0a8d4459e47..d5e121de78c 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -15,20 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data } from './data';
 import { Column } from './column';
-import { Schema, Field } from './schema';
-import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch';
-import { DataFrame } from './compute/dataframe';
+import { Data } from './data';
+import { TypedArray, TypedArrayDataType } from './interfaces';
 import { RecordBatchReader } from './ipc/reader';
-import { DataType, RowLike, Struct } from './type';
-import { selectColumnArgs, selectArgs } from './util/args';
-import { Clonable, Sliceable, Applicative } from './vector';
-import { isPromise, isIterable, isAsyncIterable } from './util/compat';
 import { RecordBatchFileWriter, RecordBatchStreamWriter } from './ipc/writer';
+import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch';
+import { Field, Schema } from './schema';
+import { DataType, RowLike, Struct } from './type';
+import { selectArgs, selectColumnArgs } from './util/args';
+import { isAsyncIterable, isIterable, isPromise } from './util/compat';
 import { distributeColumnsIntoRecordBatches, distributeVectorsIntoRecordBatches } from './util/recordbatch';
-import { Vector, Chunked, StructVector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
-import { TypedArray, TypedArrayDataType } from './interfaces';
+import { Applicative, Clonable, Sliceable } from './vector';
+import { Chunked, StructVector, Vector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
 
 type VectorMap = { [key: string]: Vector | Exclude<TypedArray, Uint8ClampedArray> };
 type Fields<T extends { [key: string]: DataType }> = (keyof T)[] | Field<T[keyof T]>[];
@@ -43,17 +42,11 @@ export interface Table<T extends { [key: string]: DataType } = any> {
     slice(begin?: number, end?: number): Table<T>;
     concat(...others: Vector<Struct<T>>[]): Table<T>;
     clone(chunks?: RecordBatch<T>[], offsets?: Uint32Array): Table<T>;
-
-    scan(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void;
-    scanReverse(next: import('./compute/dataframe').NextFunc, bind?: import('./compute/dataframe').BindFunc): void;
-    countBy(name: import('./compute/predicate').Col | string): import('./compute/dataframe').CountByResult;
-    filter(predicate: import('./compute/predicate').Predicate): import('./compute/dataframe').FilteredDataFrame<T>;
 }
 
 export class Table<T extends { [key: string]: DataType } = any>
     extends Chunked<Struct<T>>
-    implements DataFrame<T>,
-               Clonable<Table<T>>,
+    implements Clonable<Table<T>>,
                Sliceable<Table<T>>,
                Applicative<Struct<T>, Table<T>> {
 
@@ -173,6 +166,7 @@ export class Table<T extends { [key: string]: DataType } = any>
         return new Table(...distributeColumnsIntoRecordBatches(selectColumnArgs(cols)));
     }
 
+    constructor(table: Table<T>);
     constructor(batches: RecordBatch<T>[]);
     constructor(...batches: RecordBatch<T>[]);
     constructor(schema: Schema<T>, batches: RecordBatch<T>[]);
@@ -181,9 +175,9 @@ export class Table<T extends { [key: string]: DataType } = any>
 
         let schema: Schema<T> = null!;
 
-        if (args[0] instanceof Schema) { schema = args.shift(); }
+        if (args[0] instanceof Schema) { schema = args[0]; }
 
-        const chunks = selectArgs<RecordBatch<T>>(RecordBatch, args);
+        const chunks = args[0] instanceof Table ? (args[0] as Table<T>).chunks : selectArgs<RecordBatch<T>>(RecordBatch, args);
 
         if (!schema && !(schema = chunks[0]?.schema)) {
             throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
diff --git a/js/test/unit/dataframe-tests.ts b/js/test/unit/dataframe-tests.ts
new file mode 100644
index 00000000000..169cc6d1ae8
--- /dev/null
+++ b/js/test/unit/dataframe-tests.ts
@@ -0,0 +1,291 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import {
+    predicate, DataFrame, RecordBatch
+} from '../Arrow';
+import { test_data } from './table-tests';
+
+const { col, lit, custom, and, or, And, Or } = predicate;
+
+const F32 = 0, I32 = 1, DICT = 2;
+
+describe(`DataFrame`, () => {
+
+    for (let datum of test_data) {
+        describe(datum.name, () => {
+
+            describe(`scan()`, () => {
+                test(`yields all values`, () => {
+                    const df = new DataFrame(datum.table());
+                    let expected_idx = 0;
+                    df.scan((idx, batch) => {
+                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                        expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
+                    });
+                });
+                test(`calls bind function with every batch`, () => {
+                    const df = new DataFrame(datum.table());
+                    let bind = jest.fn();
+                    df.scan(() => { }, bind);
+                    for (let batch of df.chunks) {
+                        expect(bind).toHaveBeenCalledWith(batch);
+                    }
+                });
+            });
+            describe(`scanReverse()`, () => {
+                test(`yields all values`, () => {
+                    const df = new DataFrame(datum.table());
+                    let expected_idx = values.length;
+                    df.scanReverse((idx, batch) => {
+                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                        expect(columns.map((c) => c.get(idx))).toEqual(values[--expected_idx]);
+                    });
+                });
+                test(`calls bind function with every batch`, () => {
+                    const df = new DataFrame(datum.table());
+                    let bind = jest.fn();
+                    df.scanReverse(() => { }, bind);
+                    for (let batch of df.chunks) {
+                        expect(bind).toHaveBeenCalledWith(batch);
+                    }
+                });
+            });
+            test(`count() returns the correct length`, () => {
+                const df = new DataFrame(datum.table());
+                const values = datum.values();
+                expect(df.count()).toEqual(values.length);
+            });
+            test(`getColumnIndex`, () => {
+                const df = new DataFrame(datum.table());
+                expect(df.getColumnIndex('i32')).toEqual(I32);
+                expect(df.getColumnIndex('f32')).toEqual(F32);
+                expect(df.getColumnIndex('dictionary')).toEqual(DICT);
+            });
+            const df = new DataFrame(datum.table());
+            const values = datum.values();
+            let get_i32: (idx: number) => number, get_f32: (idx: number) => number;
+            const filter_tests = [
+                {
+                    name: `filter on f32 >= 0`,
+                    filtered: df.filter(col('f32').ge(0)),
+                    expected: values.filter((row) => row[F32] >= 0)
+                }, {
+                    name: `filter on 0 <= f32`,
+                    filtered: df.filter(lit(0).le(col('f32'))),
+                    expected: values.filter((row) => 0 <= row[F32])
+                }, {
+                    name: `filter on i32 <= 0`,
+                    filtered: df.filter(col('i32').le(0)),
+                    expected: values.filter((row) => row[I32] <= 0)
+                }, {
+                    name: `filter on 0 >= i32`,
+                    filtered: df.filter(lit(0).ge(col('i32'))),
+                    expected: values.filter((row) => 0 >= row[I32])
+                }, {
+                    name: `filter on f32 < 0`,
+                    filtered: df.filter(col('f32').lt(0)),
+                    expected: values.filter((row) => row[F32] < 0)
+                }, {
+                    name: `filter on i32 > 1 (empty)`,
+                    filtered: df.filter(col('i32').gt(0)),
+                    expected: values.filter((row) => row[I32] > 0)
+                }, {
+                    name: `filter on f32 <= -.25 || f3 >= .25`,
+                    filtered: df.filter(col('f32').le(-.25).or(col('f32').ge(.25))),
+                    expected: values.filter((row) => row[F32] <= -.25 || row[F32] >= .25)
+                }, {
+                    name: `filter on !(f32 <= -.25 || f3 >= .25) (not)`,
+                    filtered: df.filter(col('f32').le(-.25).or(col('f32').ge(.25)).not()),
+                    expected: values.filter((row) => !(row[F32] <= -.25 || row[F32] >= .25))
+                }, {
+                    name: `filter method combines predicates (f32 >= 0 && i32 <= 0)`,
+                    filtered: df.filter(col('i32').le(0)).filter(col('f32').ge(0)),
+                    expected: values.filter((row) => row[I32] <= 0 && row[F32] >= 0)
+                }, {
+                    name: `filter on dictionary == 'a'`,
+                    filtered: df.filter(col('dictionary').eq('a')),
+                    expected: values.filter((row) => row[DICT] === 'a')
+                }, {
+                    name: `filter on 'a' == dictionary (commutativity)`,
+                    filtered: df.filter(lit('a').eq(col('dictionary'))),
+                    expected: values.filter((row) => row[DICT] === 'a')
+                }, {
+                    name: `filter on dictionary != 'b'`,
+                    filtered: df.filter(col('dictionary').ne('b')),
+                    expected: values.filter((row) => row[DICT] !== 'b')
+                }, {
+                    name: `filter on f32 >= i32`,
+                    filtered: df.filter(col('f32').ge(col('i32'))),
+                    expected: values.filter((row) => row[F32] >= row[I32])
+                }, {
+                    name: `filter on f32 <= i32`,
+                    filtered: df.filter(col('f32').le(col('i32'))),
+                    expected: values.filter((row) => row[F32] <= row[I32])
+                }, {
+                    name: `filter on f32*i32 > 0 (custom predicate)`,
+                    filtered: df.filter(custom(
+                        (idx: number) => (get_f32(idx) * get_i32(idx) > 0),
+                        (batch: RecordBatch) => {
+                            get_f32 = col('f32').bind(batch);
+                            get_i32 = col('i32').bind(batch);
+                        })),
+                    expected: values.filter((row) => (row[F32] as number) * (row[I32] as number) > 0)
+                }, {
+                    name: `filter out all records`,
+                    filtered: df.filter(lit(1).eq(0)),
+                    expected: []
+                }
+            ];
+            for (let this_test of filter_tests) {
+                const { name, filtered, expected } = this_test;
+                describe(name, () => {
+                    test(`count() returns the correct length`, () => {
+                        expect(filtered.count()).toEqual(expected.length);
+                    });
+                    describe(`scan()`, () => {
+                        test(`iterates over expected values`, () => {
+                            let expected_idx = 0;
+                            filtered.scan((idx, batch) => {
+                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                                expect(columns.map((c) => c.get(idx))).toEqual(expected[expected_idx++]);
+                            });
+                        });
+                        test(`calls bind function lazily`, () => {
+                            let bind = jest.fn();
+                            filtered.scan(() => { }, bind);
+                            if (expected.length) {
+                                expect(bind).toHaveBeenCalled();
+                            } else {
+                                expect(bind).not.toHaveBeenCalled();
+                            }
+                        });
+                    });
+                    describe(`scanReverse()`, () => {
+                        test(`iterates over expected values in reverse`, () => {
+                            let expected_idx = expected.length;
+                            filtered.scanReverse((idx, batch) => {
+                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+                                expect(columns.map((c) => c.get(idx))).toEqual(expected[--expected_idx]);
+                            });
+                        });
+                        test(`calls bind function lazily`, () => {
+                            let bind = jest.fn();
+                            filtered.scanReverse(() => { }, bind);
+                            if (expected.length) {
+                                expect(bind).toHaveBeenCalled();
+                            } else {
+                                expect(bind).not.toHaveBeenCalled();
+                            }
+                        });
+                    });
+                });
+            }
+            test(`countBy on dictionary returns the correct counts`, () => {
+                // Make sure countBy works both with and without the Col wrapper
+                // class
+                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
+                for (let row of values) {
+                    expected[row[DICT]] += 1;
+                }
+
+                expect(df.countBy(col('dictionary')).toJSON()).toEqual(expected);
+                expect(df.countBy('dictionary').toJSON()).toEqual(expected);
+            });
+            test(`countBy on dictionary with filter returns the correct counts`, () => {
+                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
+                for (let row of values) {
+                    if (row[I32] === 1) { expected[row[DICT]] += 1; }
+                }
+
+                expect(df.filter(col('i32').eq(1)).countBy('dictionary').toJSON()).toEqual(expected);
+            });
+            test(`countBy on non dictionary column throws error`, () => {
+                expect(() => { df.countBy('i32'); }).toThrow();
+                expect(() => { df.filter(col('dict').eq('a')).countBy('i32'); }).toThrow();
+            });
+            test(`countBy on non-existent column throws error`, () => {
+                expect(() => { df.countBy('FAKE' as any); }).toThrow();
+            });
+            test(`table.select() basic tests`, () => {
+                let selected = df.select('f32', 'dictionary');
+                expect(selected.schema.fields).toHaveLength(2);
+                expect(selected.schema.fields[0]).toEqual(df.schema.fields[0]);
+                expect(selected.schema.fields[1]).toEqual(df.schema.fields[2]);
+
+                expect(selected).toHaveLength(values.length);
+                let idx = 0, expected_row;
+                for (let row of selected) {
+                    expected_row = values[idx++];
+                    expect(row.f32).toEqual(expected_row[F32]);
+                    expect(row.dictionary).toEqual(expected_row[DICT]);
+                }
+            });
+            // test(`table.toString()`, () => {
+            //     let selected = table.select('i32', 'dictionary');
+            //     let headers = [`"row_id"`, `"i32: Int32"`, `"dictionary: Dictionary<Int8, Utf8>"`];
+            //     let expected = [headers.join(' | '), ...values.map((row, idx) => {
+            //         return [`${idx}`, `${row[I32]}`, `"${row[DICT]}"`].map((str, col) => {
+            //             return leftPad(str, ' ', headers[col].length);
+            //         }).join(' | ');
+            //     })].join('\n') + '\n';
+            //     expect(selected.toString()).toEqual(expected);
+            // });
+            test(`table.filter(..).count() on always false predicates returns 0`, () => {
+                expect(df.filter(col('i32').ge(100)).count()).toEqual(0);
+                expect(df.filter(col('dictionary').eq('z')).count()).toEqual(0);
+            });
+            describe(`lit-lit comparison`, () => {
+                test(`always-false count() returns 0`, () => {
+                    expect(df.filter(lit('abc').eq('def')).count()).toEqual(0);
+                    expect(df.filter(lit(0).ge(1)).count()).toEqual(0);
+                });
+                test(`always-true count() returns length`, () => {
+                    expect(df.filter(lit('abc').eq('abc')).count()).toEqual(df.length);
+                    expect(df.filter(lit(-100).le(0)).count()).toEqual(df.length);
+                });
+            });
+            describe(`col-col comparison`, () => {
+                test(`always-false count() returns 0`, () => {
+                    expect(df.filter(col('dictionary').eq(col('i32'))).count()).toEqual(0);
+                });
+                test(`always-true count() returns length`, () => {
+                    expect(df.filter(col('dictionary').eq(col('dictionary'))).count()).toEqual(df.length);
+                });
+            });
+        });
+    }
+});
+
+describe(`Predicate`, () => {
+    const p1 = col('a').gt(100);
+    const p2 = col('a').lt(1000);
+    const p3 = col('b').eq('foo');
+    const p4 = col('c').eq('bar');
+    const expected = [p1, p2, p3, p4];
+    test(`and flattens children`, () => {
+        expect(and(p1, p2, p3, p4).children).toEqual(expected);
+        expect(and(p1.and(p2), new And(p3, p4)).children).toEqual(expected);
+        expect(and(p1.and(p2, p3, p4)).children).toEqual(expected);
+    });
+    test(`or flattens children`, () => {
+        expect(or(p1, p2, p3, p4).children).toEqual(expected);
+        expect(or(p1.or(p2), new Or(p3, p4)).children).toEqual(expected);
+        expect(or(p1.or(p2, p3, p4)).children).toEqual(expected);
+    });
+});
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index d863f7581a4..c6b0bb5f75e 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -17,18 +17,15 @@
 
 import '../jest-extensions';
 import {
-    predicate,
     Data, Schema, Field, Table, RecordBatch, Column,
     Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
-    Struct, Float32, Int32, Dictionary, Utf8, Int8, Type
+    Struct, Float32, Int32, Dictionary, Utf8, Int8
 } from '../Arrow';
 import { arange } from './utils';
 
-const { col, lit, custom, and, or, And, Or } = predicate;
-
 const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
 const F32 = 0, I32 = 1, DICT = 2;
-const test_data = [
+export const test_data = [
     {
         name: `single record batch`,
         table: getSingleRecordBatchTable,
@@ -319,42 +316,6 @@ describe(`Table`, () => {
                 expect(clone).toEqualTable(table);
             });
 
-            describe(`scan()`, () => {
-                test(`yields all values`, () => {
-                    const table = datum.table();
-                    let expected_idx = 0;
-                    table.scan((idx, batch) => {
-                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                        expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
-                    });
-                });
-                test(`calls bind function with every batch`, () => {
-                    const table = datum.table();
-                    let bind = jest.fn();
-                    table.scan(() => { }, bind);
-                    for (let batch of table.chunks) {
-                        expect(bind).toHaveBeenCalledWith(batch);
-                    }
-                });
-            });
-            describe(`scanReverse()`, () => {
-                test(`yields all values`, () => {
-                    const table = datum.table();
-                    let expected_idx = values.length;
-                    table.scanReverse((idx, batch) => {
-                        const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                        expect(columns.map((c) => c.get(idx))).toEqual(values[--expected_idx]);
-                    });
-                });
-                test(`calls bind function with every batch`, () => {
-                    const table = datum.table();
-                    let bind = jest.fn();
-                    table.scanReverse(() => { }, bind);
-                    for (let batch of table.chunks) {
-                        expect(bind).toHaveBeenCalledWith(batch);
-                    }
-                });
-            });
             test(`count() returns the correct length`, () => {
                 const table = datum.table();
                 const values = datum.values();
@@ -366,151 +327,10 @@ describe(`Table`, () => {
                 expect(table.getColumnIndex('f32')).toEqual(F32);
                 expect(table.getColumnIndex('dictionary')).toEqual(DICT);
             });
+
             const table = datum.table();
             const values = datum.values();
-            let get_i32: (idx: number) => number, get_f32: (idx: number) => number;
-            const filter_tests = [
-                {
-                    name: `filter on f32 >= 0`,
-                    filtered: table.filter(col('f32').ge(0)),
-                    expected: values.filter((row) => row[F32] >= 0)
-                }, {
-                    name: `filter on 0 <= f32`,
-                    filtered: table.filter(lit(0).le(col('f32'))),
-                    expected: values.filter((row) => 0 <= row[F32])
-                }, {
-                    name: `filter on i32 <= 0`,
-                    filtered: table.filter(col('i32').le(0)),
-                    expected: values.filter((row) => row[I32] <= 0)
-                }, {
-                    name: `filter on 0 >= i32`,
-                    filtered: table.filter(lit(0).ge(col('i32'))),
-                    expected: values.filter((row) => 0 >= row[I32])
-                }, {
-                    name: `filter on f32 < 0`,
-                    filtered: table.filter(col('f32').lt(0)),
-                    expected: values.filter((row) => row[F32] < 0)
-                }, {
-                    name: `filter on i32 > 1 (empty)`,
-                    filtered: table.filter(col('i32').gt(0)),
-                    expected: values.filter((row) => row[I32] > 0)
-                }, {
-                    name: `filter on f32 <= -.25 || f3 >= .25`,
-                    filtered: table.filter(col('f32').le(-.25).or(col('f32').ge(.25))),
-                    expected: values.filter((row) => row[F32] <= -.25 || row[F32] >= .25)
-                }, {
-                    name: `filter on !(f32 <= -.25 || f3 >= .25) (not)`,
-                    filtered: table.filter(col('f32').le(-.25).or(col('f32').ge(.25)).not()),
-                    expected: values.filter((row) => !(row[F32] <= -.25 || row[F32] >= .25))
-                }, {
-                    name: `filter method combines predicates (f32 >= 0 && i32 <= 0)`,
-                    filtered: table.filter(col('i32').le(0)).filter(col('f32').ge(0)),
-                    expected: values.filter((row) => row[I32] <= 0 && row[F32] >= 0)
-                }, {
-                    name: `filter on dictionary == 'a'`,
-                    filtered: table.filter(col('dictionary').eq('a')),
-                    expected: values.filter((row) => row[DICT] === 'a')
-                }, {
-                    name: `filter on 'a' == dictionary (commutativity)`,
-                    filtered: table.filter(lit('a').eq(col('dictionary'))),
-                    expected: values.filter((row) => row[DICT] === 'a')
-                }, {
-                    name: `filter on dictionary != 'b'`,
-                    filtered: table.filter(col('dictionary').ne('b')),
-                    expected: values.filter((row) => row[DICT] !== 'b')
-                }, {
-                    name: `filter on f32 >= i32`,
-                    filtered: table.filter(col('f32').ge(col('i32'))),
-                    expected: values.filter((row) => row[F32] >= row[I32])
-                }, {
-                    name: `filter on f32 <= i32`,
-                    filtered: table.filter(col('f32').le(col('i32'))),
-                    expected: values.filter((row) => row[F32] <= row[I32])
-                }, {
-                    name: `filter on f32*i32 > 0 (custom predicate)`,
-                    filtered: table.filter(custom(
-                        (idx: number) => (get_f32(idx) * get_i32(idx) > 0),
-                        (batch: RecordBatch) => {
-                            get_f32 = col('f32').bind(batch);
-                            get_i32 = col('i32').bind(batch);
-                        })),
-                    expected: values.filter((row) => (row[F32] as number) * (row[I32] as number) > 0)
-                }, {
-                    name: `filter out all records`,
-                    filtered: table.filter(lit(1).eq(0)),
-                    expected: []
-                }
-            ];
-            for (let this_test of filter_tests) {
-                const { name, filtered, expected } = this_test;
-                describe(name, () => {
-                    test(`count() returns the correct length`, () => {
-                        expect(filtered.count()).toEqual(expected.length);
-                    });
-                    describe(`scan()`, () => {
-                        test(`iterates over expected values`, () => {
-                            let expected_idx = 0;
-                            filtered.scan((idx, batch) => {
-                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                                expect(columns.map((c) => c.get(idx))).toEqual(expected[expected_idx++]);
-                            });
-                        });
-                        test(`calls bind function lazily`, () => {
-                            let bind = jest.fn();
-                            filtered.scan(() => { }, bind);
-                            if (expected.length) {
-                                expect(bind).toHaveBeenCalled();
-                            } else {
-                                expect(bind).not.toHaveBeenCalled();
-                            }
-                        });
-                    });
-                    describe(`scanReverse()`, () => {
-                        test(`iterates over expected values in reverse`, () => {
-                            let expected_idx = expected.length;
-                            filtered.scanReverse((idx, batch) => {
-                                const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
-                                expect(columns.map((c) => c.get(idx))).toEqual(expected[--expected_idx]);
-                            });
-                        });
-                        test(`calls bind function lazily`, () => {
-                            let bind = jest.fn();
-                            filtered.scanReverse(() => { }, bind);
-                            if (expected.length) {
-                                expect(bind).toHaveBeenCalled();
-                            } else {
-                                expect(bind).not.toHaveBeenCalled();
-                            }
-                        });
-                    });
-                });
-            }
-            test(`countBy on dictionary returns the correct counts`, () => {
-                // Make sure countBy works both with and without the Col wrapper
-                // class
-                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
-                for (let row of values) {
-                    expected[row[DICT]] += 1;
-                }
 
-                expect(table.countBy(col('dictionary')).toJSON()).toEqual(expected);
-                expect(table.countBy('dictionary').toJSON()).toEqual(expected);
-            });
-            test(`countBy on dictionary with filter returns the correct counts`, () => {
-                let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
-                for (let row of values) {
-                    if (row[I32] === 1) { expected[row[DICT]] += 1; }
-                }
-
-                expect(table.filter(col('i32').eq(1)).countBy('dictionary').toJSON()).toEqual(expected);
-            });
-            test(`countBy on non dictionary column throws error`, () => {
-                expect(() => { table.countBy('i32'); }).toThrow();
-                expect(() => { table.filter(col('dict').eq('a')).countBy('i32'); }).toThrow();
-            });
-            test(`countBy on non-existent column throws error`, () => {
-                expect(() => { table.countBy('FAKE' as any); }).toThrow();
-            });
             test(`table.select() basic tests`, () => {
                 let selected = table.select('f32', 'dictionary');
                 expect(selected.schema.fields).toHaveLength(2);
@@ -525,64 +345,10 @@ describe(`Table`, () => {
                     expect(row.dictionary).toEqual(expected_row[DICT]);
                 }
             });
-            // test(`table.toString()`, () => {
-            //     let selected = table.select('i32', 'dictionary');
-            //     let headers = [`"row_id"`, `"i32: Int32"`, `"dictionary: Dictionary<Int8, Utf8>"`];
-            //     let expected = [headers.join(' | '), ...values.map((row, idx) => {
-            //         return [`${idx}`, `${row[I32]}`, `"${row[DICT]}"`].map((str, col) => {
-            //             return leftPad(str, ' ', headers[col].length);
-            //         }).join(' | ');
-            //     })].join('\n') + '\n';
-            //     expect(selected.toString()).toEqual(expected);
-            // });
-            test(`table.filter(..).count() on always false predicates returns 0`, () => {
-                expect(table.filter(col('i32').ge(100)).count()).toEqual(0);
-                expect(table.filter(col('dictionary').eq('z')).count()).toEqual(0);
-            });
-            describe(`lit-lit comparison`, () => {
-                test(`always-false count() returns 0`, () => {
-                    expect(table.filter(lit('abc').eq('def')).count()).toEqual(0);
-                    expect(table.filter(lit(0).ge(1)).count()).toEqual(0);
-                });
-                test(`always-true count() returns length`, () => {
-                    expect(table.filter(lit('abc').eq('abc')).count()).toEqual(table.length);
-                    expect(table.filter(lit(-100).le(0)).count()).toEqual(table.length);
-                });
-            });
-            describe(`col-col comparison`, () => {
-                test(`always-false count() returns 0`, () => {
-                    expect(table.filter(col('dictionary').eq(col('i32'))).count()).toEqual(0);
-                });
-                test(`always-true count() returns length`, () => {
-                    expect(table.filter(col('dictionary').eq(col('dictionary'))).count()).toEqual(table.length);
-                });
-            });
         });
     }
 });
 
-describe(`Predicate`, () => {
-    const p1 = col('a').gt(100);
-    const p2 = col('a').lt(1000);
-    const p3 = col('b').eq('foo');
-    const p4 = col('c').eq('bar');
-    const expected = [p1, p2, p3, p4];
-    test(`and flattens children`, () => {
-        expect(and(p1, p2, p3, p4).children).toEqual(expected);
-        expect(and(p1.and(p2), new And(p3, p4)).children).toEqual(expected);
-        expect(and(p1.and(p2, p3, p4)).children).toEqual(expected);
-    });
-    test(`or flattens children`, () => {
-        expect(or(p1, p2, p3, p4).children).toEqual(expected);
-        expect(or(p1.or(p2), new Or(p3, p4)).children).toEqual(expected);
-        expect(or(p1.or(p2, p3, p4)).children).toEqual(expected);
-    });
-});
-
-// function leftPad(str: string, fill: string, n: number) {
-//     return (new Array(n + 1).join(fill) + str).slice(-1 * n);
-// }
-
 type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8> };
 
 function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
@@ -594,7 +360,7 @@ function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: n
     return [Vector.new(f32Data), Vector.new(i32Data), DictionaryVector.from(values, new Int8(), dictIndices)];
 }
 
-export function getSingleRecordBatchTable() {
+function getSingleRecordBatchTable() {
     const vectors = getTestVectors(
         [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3],
         [-1, 1, -1, 1, -1, 1, -1],

From e6e437a14e9eec16707944ea3b3a3a298e0d8dec Mon Sep 17 00:00:00 2001
From: P42 <72252241+p42-ai[bot]@users.noreply.github.com>
Date: Wed, 2 Jun 2021 18:57:42 -0700
Subject: [PATCH 334/719] ARROW-12799: [JS] Use Nullish Coalescing Operator
 (??) For Defaults

**The nullish coalescing operator (`??`) returns its right side when its left side is nullish** (`null` or `undefined`), and its left side otherwise.
For example, `const x = a ?? b` would set `x` to `a` if `a` has a value, and to `b` if `a` is `null` or `undefined`.

The nullish coalescing operator is very useful to **provide default values when a value or an expression is nullish**.
Before its introduction in ES2020, this default value pattern was often expressed using the conditional operator.

This refactoring simplifies conditional (ternary) checks to nullish coalescing operator expressions:

* `a == null ? x : a` becomes `a ?? x`
* `a != null ? a : x` becomes `a ?? x`
* `a === null || a === undefined ? x : a` becomes `a ?? x`
* `a !== null && a !== undefined ? a : x` becomes `a ?? x`
* `f(1) != null ? f(1) : x` becomes `f(1) ?? x`
* etc.

Learn More: [Nullish coalescing operator (MDN)](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Nullish_coalescing_operator)

When two similar-looking function calls have a side effect, this refactoring can change the behavior of the code.

For example, the refactoring changes:

```javascript
let a = f(1) === null || f(1) === undefined ? 'default' : f(1);
```

into

```javascript
let a = f(1) ?? 'default';
```

If `f(1)` has a side effect, it would have been called one, two or three times before the refactoring, and once after the refactoring.
This means that the side effect would have been called a different number of times, potentially changing the behavior.

Closes #10339 from domoritz/p42/nullish_coalescing_operator/1621098556103

Authored-by: P42 <72252241+p42-ai[bot]@users.noreply.github.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/bin/integration.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/bin/integration.js b/js/bin/integration.js
index c357c128aa1..507514ebade 100755
--- a/js/bin/integration.js
+++ b/js/bin/integration.js
@@ -189,7 +189,7 @@ function compareTableIsh(actual, expected) {
 function compareVectors(actual, expected) {
 
     if ((actual == null && expected != null) || (expected == null && actual != null)) {
-        throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual == null ? expected : actual} to be that also`);
+        throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual ?? expected} to be that also`);
     }
 
     let props = ['type', 'length', 'nullCount'];

From bbf0e674f8b237f85c479680738b2fa20da6a5f0 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 2 Jun 2021 18:59:10 -0700
Subject: [PATCH 335/719] ARROW-12875: [JS] Upgrade Jest and other minor
 updates

Closes #10398 from domoritz/upgrade-deps

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/gulp/test-task.js                          |    2 +-
 js/jest.config.js                             |    3 -
 js/package.json                               |   41 +-
 js/test/unit/ipc/reader/streams-node-tests.ts |    7 +-
 js/yarn.lock                                  | 5106 ++++++++---------
 5 files changed, 2340 insertions(+), 2819 deletions(-)

diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index cd01131779d..699070a8ff4 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -29,7 +29,7 @@ const asyncDone = promisify(require('async-done'));
 const exec = promisify(require('child_process').exec);
 const parseXML = promisify(require('xml2js').parseString);
 
-const jestArgv = [];
+const jestArgv = [`--reporters=jest-silent-reporter`];
 argv.verbose && jestArgv.push(`--verbose`);
 argv.coverage
     ? jestArgv.push(`-c`, `jest.coverage.config.js`, `--coverage`, `-i`)
diff --git a/js/jest.config.js b/js/jest.config.js
index 30f7bb6311f..9ebf291f701 100644
--- a/js/jest.config.js
+++ b/js/jest.config.js
@@ -17,9 +17,6 @@
 
 module.exports = {
     "verbose": false,
-    "reporters": [
-      "jest-silent-reporter"
-    ],
     "testEnvironment": "node",
     "globals": {
       "ts-jest": {
diff --git a/js/package.json b/js/package.json
index da8f65a55f8..2a8a70bdf9d 100644
--- a/js/package.json
+++ b/js/package.json
@@ -53,7 +53,7 @@
   ],
   "dependencies": {
     "@types/flatbuffers": "^1.10.0",
-    "@types/node": "^15.0.2",
+    "@types/node": "^15.6.1",
     "command-line-args": "5.1.1",
     "command-line-usage": "6.1.1",
     "flatbuffers": "1.12.0",
@@ -64,17 +64,18 @@
   "devDependencies": {
     "@types/glob": "7.1.3",
     "@types/jest": "26.0.23",
-    "@typescript-eslint/eslint-plugin": "^4.22.1",
-    "@typescript-eslint/parser": "^4.22.1",
-    "async-done": "1.3.1",
+    "@types/multistream": "2.1.1",
+    "@typescript-eslint/eslint-plugin": "4.25.0",
+    "@typescript-eslint/parser": "4.25.0",
+    "async-done": "1.3.2",
     "benny": "3.6.15",
-    "cpy": "^8.1.2",
-    "cross-env": "^7.0.3",
+    "cpy": "8.1.2",
+    "cross-env": "7.0.3",
     "del-cli": "3.0.1",
-    "eslint": "^7.24.0",
-    "eslint-plugin-jest": "^24.3.5",
+    "eslint": "7.27.0",
+    "eslint-plugin-jest": "24.3.6",
     "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
-    "glob": "7.1.4",
+    "glob": "7.1.7",
     "google-closure-compiler": "20210505.0.0",
     "gulp": "4.0.2",
     "gulp-json-transform": "0.4.7",
@@ -82,24 +83,24 @@
     "gulp-sourcemaps": "3.0.0",
     "gulp-typescript": "5.0.1",
     "ix": "2.5.3",
-    "jest": "26.6.3",
-    "jest-silent-reporter": "0.1.2",
-    "lerna": "3.22.1",
-    "memfs": "2.15.2",
+    "jest": "27.0.1",
+    "jest-silent-reporter": "0.5.0",
+    "lerna": "4.0.0",
+    "memfs": "3.2.2",
     "mkdirp": "1.0.4",
-    "multistream": "2.1.1",
+    "multistream": "4.1.0",
     "npm-run-all": "4.1.5",
     "randomatic": "3.1.1",
     "rxjs": "5.5.11",
-    "source-map-loader": "2.0.1",
-    "ts-jest": "26.5.4",
-    "ts-node": "9.1.1",
-    "typedoc": "0.20.35",
+    "source-map-loader": "3.0.0",
+    "ts-jest": "27.0.0",
+    "ts-node": "10.0.0",
+    "typedoc": "0.20.36",
     "typescript": "4.0.2",
     "web-stream-tools": "0.0.1",
     "web-streams-polyfill": "3.0.3",
-    "webpack": "5.36.2",
-    "xml2js": "0.4.19"
+    "webpack": "5.37.1",
+    "xml2js": "0.4.23"
   },
   "engines": {
     "node": ">=11.12"
diff --git a/js/test/unit/ipc/reader/streams-node-tests.ts b/js/test/unit/ipc/reader/streams-node-tests.ts
index 609c03a47fe..080ebab73b9 100644
--- a/js/test/unit/ipc/reader/streams-node-tests.ts
+++ b/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -16,8 +16,7 @@
 // under the License.
 
 import {
-    generateRandomTables,
-    // generateDictionaryTables
+    generateRandomTables
 } from '../../../data/tables';
 
 import {
@@ -37,8 +36,8 @@ import { validateRecordBatchAsyncIterator } from '../validate';
 
     const { Readable, PassThrough } = require('stream');
     const { parse: bignumJSONParse } = require('json-bignum');
-    const concatStream = ((multistream) => (...xs: any[]) =>
-        new Readable().wrap(multistream(...xs))
+    const concatStream = ((MultiStream) => (...xs: any[]) =>
+        new Readable().wrap(new MultiStream(...xs))
     )(require('multistream'));
 
     for (const table of generateRandomTables([10, 20, 30])) {
diff --git a/js/yarn.lock b/js/yarn.lock
index 66c01691edb..c8047a2dda7 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -50,25 +50,25 @@
   dependencies:
     "@babel/highlight" "^7.12.13"
 
-"@babel/compat-data@^7.13.12":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.13.15.tgz#7e8eea42d0b64fda2b375b22d06c605222e848f4"
-  integrity sha512-ltnibHKR1VnrU4ymHyQ/CXtNXI6yZC0oJThyW78Hft8XndANwi+9H+UIklBDraIjFEJzw8wmcM427oDd9KS5wA==
+"@babel/compat-data@^7.13.15":
+  version "7.14.0"
+  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.14.0.tgz#a901128bce2ad02565df95e6ecbf195cf9465919"
+  integrity sha512-vu9V3uMM/1o5Hl5OekMUowo3FqXLJSw+s+66nt0fSWVWTtmosdzn45JHOB3cPtZoe6CTBDzvSw0RdOY85Q37+Q==
 
-"@babel/core@^7.1.0", "@babel/core@^7.7.5":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.13.15.tgz#a6d40917df027487b54312202a06812c4f7792d0"
-  integrity sha512-6GXmNYeNjS2Uz+uls5jalOemgIhnTMeaXo+yBUA72kC2uX/8VW6XyhVIo2L8/q0goKQA3EVKx0KOQpVKSeWadQ==
+"@babel/core@^7.1.0", "@babel/core@^7.7.2", "@babel/core@^7.7.5":
+  version "7.14.3"
+  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.14.3.tgz#5395e30405f0776067fbd9cf0884f15bfb770a38"
+  integrity sha512-jB5AmTKOCSJIZ72sd78ECEhuPiDMKlQdDI/4QRI6lzYATx5SSogS1oQA2AoPecRCknm30gHi2l+QVvNUu3wZAg==
   dependencies:
     "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.13.9"
-    "@babel/helper-compilation-targets" "^7.13.13"
-    "@babel/helper-module-transforms" "^7.13.14"
-    "@babel/helpers" "^7.13.10"
-    "@babel/parser" "^7.13.15"
+    "@babel/generator" "^7.14.3"
+    "@babel/helper-compilation-targets" "^7.13.16"
+    "@babel/helper-module-transforms" "^7.14.2"
+    "@babel/helpers" "^7.14.0"
+    "@babel/parser" "^7.14.3"
     "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.13.15"
-    "@babel/types" "^7.13.14"
+    "@babel/traverse" "^7.14.2"
+    "@babel/types" "^7.14.2"
     convert-source-map "^1.7.0"
     debug "^4.1.0"
     gensync "^1.0.0-beta.2"
@@ -76,33 +76,33 @@
     semver "^6.3.0"
     source-map "^0.5.0"
 
-"@babel/generator@^7.13.9":
-  version "7.13.9"
-  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.13.9.tgz#3a7aa96f9efb8e2be42d38d80e2ceb4c64d8de39"
-  integrity sha512-mHOOmY0Axl/JCTkxTU6Lf5sWOg/v8nUa+Xkt4zMTftX0wqmb6Sh7J8gvcehBw7q0AhrhAR+FDacKjCZ2X8K+Sw==
+"@babel/generator@^7.14.2", "@babel/generator@^7.14.3", "@babel/generator@^7.7.2":
+  version "7.14.3"
+  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.14.3.tgz#0c2652d91f7bddab7cccc6ba8157e4f40dcedb91"
+  integrity sha512-bn0S6flG/j0xtQdz3hsjJ624h3W0r3llttBMfyHX3YrZ/KtLYr15bjA0FXkgW7FpvrDuTuElXeVjiKlYRpnOFA==
   dependencies:
-    "@babel/types" "^7.13.0"
+    "@babel/types" "^7.14.2"
     jsesc "^2.5.1"
     source-map "^0.5.0"
 
-"@babel/helper-compilation-targets@^7.13.13":
-  version "7.13.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.13.tgz#2b2972a0926474853f41e4adbc69338f520600e5"
-  integrity sha512-q1kcdHNZehBwD9jYPh3WyXcsFERi39X4I59I3NadciWtNDyZ6x+GboOxncFK0kXlKIv6BJm5acncehXWUjWQMQ==
+"@babel/helper-compilation-targets@^7.13.16":
+  version "7.13.16"
+  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.16.tgz#6e91dccf15e3f43e5556dffe32d860109887563c"
+  integrity sha512-3gmkYIrpqsLlieFwjkGgLaSHmhnvlAYzZLlYVjlW+QwI+1zE17kGxuJGmIqDQdYp56XdmGeD+Bswx0UTyG18xA==
   dependencies:
-    "@babel/compat-data" "^7.13.12"
+    "@babel/compat-data" "^7.13.15"
     "@babel/helper-validator-option" "^7.12.17"
     browserslist "^4.14.5"
     semver "^6.3.0"
 
-"@babel/helper-function-name@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.12.13.tgz#93ad656db3c3c2232559fd7b2c3dbdcbe0eb377a"
-  integrity sha512-TZvmPn0UOqmvi5G4vvw0qZTpVptGkB1GL61R6lKvrSdIxGm5Pky7Q3fpKiIkQCAtRCBUwB0PaThlx9vebCDSwA==
+"@babel/helper-function-name@^7.14.2":
+  version "7.14.2"
+  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.14.2.tgz#397688b590760b6ef7725b5f0860c82427ebaac2"
+  integrity sha512-NYZlkZRydxw+YT56IlhIcS8PAhb+FEUiOzuhFTfqDyPmzAhRge6ua0dQYT/Uh0t/EDHq05/i+e5M2d4XvjgarQ==
   dependencies:
     "@babel/helper-get-function-arity" "^7.12.13"
     "@babel/template" "^7.12.13"
-    "@babel/types" "^7.12.13"
+    "@babel/types" "^7.14.2"
 
 "@babel/helper-get-function-arity@^7.12.13":
   version "7.12.13"
@@ -125,19 +125,19 @@
   dependencies:
     "@babel/types" "^7.13.12"
 
-"@babel/helper-module-transforms@^7.13.14":
-  version "7.13.14"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.13.14.tgz#e600652ba48ccb1641775413cb32cfa4e8b495ef"
-  integrity sha512-QuU/OJ0iAOSIatyVZmfqB0lbkVP0kDRiKj34xy+QNsnVZi/PA6BoSoreeqnxxa9EHFAIL0R9XOaAR/G9WlIy5g==
+"@babel/helper-module-transforms@^7.14.2":
+  version "7.14.2"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.14.2.tgz#ac1cc30ee47b945e3e0c4db12fa0c5389509dfe5"
+  integrity sha512-OznJUda/soKXv0XhpvzGWDnml4Qnwp16GN+D/kZIdLsWoHj05kyu8Rm5kXmMef+rVJZ0+4pSGLkeixdqNUATDA==
   dependencies:
     "@babel/helper-module-imports" "^7.13.12"
     "@babel/helper-replace-supers" "^7.13.12"
     "@babel/helper-simple-access" "^7.13.12"
     "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/helper-validator-identifier" "^7.12.11"
+    "@babel/helper-validator-identifier" "^7.14.0"
     "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.13.13"
-    "@babel/types" "^7.13.14"
+    "@babel/traverse" "^7.14.2"
+    "@babel/types" "^7.14.2"
 
 "@babel/helper-optimise-call-expression@^7.12.13":
   version "7.12.13"
@@ -152,14 +152,14 @@
   integrity sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==
 
 "@babel/helper-replace-supers@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.13.12.tgz#6442f4c1ad912502481a564a7386de0c77ff3804"
-  integrity sha512-Gz1eiX+4yDO8mT+heB94aLVNCL+rbuT2xy4YfyNqu8F+OI6vMvJK891qGBTqL9Uc8wxEvRW92Id6G7sDen3fFw==
+  version "7.14.3"
+  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.14.3.tgz#ca17b318b859d107f0e9b722d58cf12d94436600"
+  integrity sha512-Rlh8qEWZSTfdz+tgNV/N4gz1a0TMNwCUcENhMjHTHKp3LseYH5Jha0NSlyTQWMnjbYcwFt+bqAMqSLHVXkQ6UA==
   dependencies:
     "@babel/helper-member-expression-to-functions" "^7.13.12"
     "@babel/helper-optimise-call-expression" "^7.12.13"
-    "@babel/traverse" "^7.13.0"
-    "@babel/types" "^7.13.12"
+    "@babel/traverse" "^7.14.2"
+    "@babel/types" "^7.14.2"
 
 "@babel/helper-simple-access@^7.13.12":
   version "7.13.12"
@@ -175,38 +175,38 @@
   dependencies:
     "@babel/types" "^7.12.13"
 
-"@babel/helper-validator-identifier@^7.12.11":
-  version "7.12.11"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.12.11.tgz#c9a1f021917dcb5ccf0d4e453e399022981fc9ed"
-  integrity sha512-np/lG3uARFybkoHokJUmf1QfEvRVCPbmQeUQpKow5cQ3xWrV9i3rUHodKDJPQfTVX61qKi+UdYk8kik84n7XOw==
+"@babel/helper-validator-identifier@^7.14.0":
+  version "7.14.0"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz#d26cad8a47c65286b15df1547319a5d0bcf27288"
+  integrity sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==
 
 "@babel/helper-validator-option@^7.12.17":
   version "7.12.17"
   resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz#d1fbf012e1a79b7eebbfdc6d270baaf8d9eb9831"
   integrity sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==
 
-"@babel/helpers@^7.13.10":
-  version "7.13.10"
-  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.13.10.tgz#fd8e2ba7488533cdeac45cc158e9ebca5e3c7df8"
-  integrity sha512-4VO883+MWPDUVRF3PhiLBUFHoX/bsLTGFpFK/HqvvfBZz2D57u9XzPVNFVBTc0PW/CWR9BXTOKt8NF4DInUHcQ==
+"@babel/helpers@^7.14.0":
+  version "7.14.0"
+  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.14.0.tgz#ea9b6be9478a13d6f961dbb5f36bf75e2f3b8f62"
+  integrity sha512-+ufuXprtQ1D1iZTO/K9+EBRn+qPWMJjZSw/S0KlFrxCw4tkrzv9grgpDHkY9MeQTjTY8i2sp7Jep8DfU6tN9Mg==
   dependencies:
     "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.13.0"
-    "@babel/types" "^7.13.0"
+    "@babel/traverse" "^7.14.0"
+    "@babel/types" "^7.14.0"
 
 "@babel/highlight@^7.10.4", "@babel/highlight@^7.12.13":
-  version "7.13.10"
-  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.13.10.tgz#a8b2a66148f5b27d666b15d81774347a731d52d1"
-  integrity sha512-5aPpe5XQPzflQrFwL1/QoeHkP2MsA4JCntcXHRhEsdsfPVkvPi2w7Qix4iV7t5S/oC9OodGrggd8aco1g3SZFg==
+  version "7.14.0"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.14.0.tgz#3197e375711ef6bf834e67d0daec88e4f46113cf"
+  integrity sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==
   dependencies:
-    "@babel/helper-validator-identifier" "^7.12.11"
+    "@babel/helper-validator-identifier" "^7.14.0"
     chalk "^2.0.0"
     js-tokens "^4.0.0"
 
-"@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.13.15":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.13.15.tgz#8e66775fb523599acb6a289e12929fa5ab0954d8"
-  integrity sha512-b9COtcAlVEQljy/9fbcMHpG+UIW9ReF+gpaxDHTlZd0c6/UU9ng8zdySAW9sRTzpvcdCHn6bUcbuYUgGzLAWVQ==
+"@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.14.2", "@babel/parser@^7.14.3", "@babel/parser@^7.7.2":
+  version "7.14.3"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.14.3.tgz#9b530eecb071fd0c93519df25c5ff9f14759f298"
+  integrity sha512-7MpZDIfI7sUC5zWo2+foJ50CSI5lcqDehZ0lVgIhSi4bFEk94fLAKlF3Q0nzSQQ+ca0lm+O6G9ztKVBeu8PMRQ==
 
 "@babel/plugin-syntax-async-generators@^7.8.4":
   version "7.8.4"
@@ -292,6 +292,13 @@
   dependencies:
     "@babel/helper-plugin-utils" "^7.12.13"
 
+"@babel/plugin-syntax-typescript@^7.7.2":
+  version "7.12.13"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.12.13.tgz#9dff111ca64154cef0f4dc52cf843d9f12ce4474"
+  integrity sha512-cHP3u1JiUiG2LFDKbXnwVad81GvfyIOmCD6HIEId6ojrY0Drfy2q1jw7BwN7dE84+kTnBjLkXoL3IEy/3JPu2w==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.12.13"
+
 "@babel/template@^7.12.13", "@babel/template@^7.3.3":
   version "7.12.13"
   resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.12.13.tgz#530265be8a2589dbb37523844c5bcb55947fb327"
@@ -301,27 +308,26 @@
     "@babel/parser" "^7.12.13"
     "@babel/types" "^7.12.13"
 
-"@babel/traverse@^7.1.0", "@babel/traverse@^7.13.0", "@babel/traverse@^7.13.13", "@babel/traverse@^7.13.15":
-  version "7.13.15"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.13.15.tgz#c38bf7679334ddd4028e8e1f7b3aa5019f0dada7"
-  integrity sha512-/mpZMNvj6bce59Qzl09fHEs8Bt8NnpEDQYleHUPZQ3wXUMvXi+HJPLars68oAbmp839fGoOkv2pSL2z9ajCIaQ==
+"@babel/traverse@^7.1.0", "@babel/traverse@^7.14.0", "@babel/traverse@^7.14.2", "@babel/traverse@^7.7.2":
+  version "7.14.2"
+  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.14.2.tgz#9201a8d912723a831c2679c7ebbf2fe1416d765b"
+  integrity sha512-TsdRgvBFHMyHOOzcP9S6QU0QQtjxlRpEYOy3mcCO5RgmC305ki42aSAmfZEMSSYBla2oZ9BMqYlncBaKmD/7iA==
   dependencies:
     "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.13.9"
-    "@babel/helper-function-name" "^7.12.13"
+    "@babel/generator" "^7.14.2"
+    "@babel/helper-function-name" "^7.14.2"
     "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/parser" "^7.13.15"
-    "@babel/types" "^7.13.14"
+    "@babel/parser" "^7.14.2"
+    "@babel/types" "^7.14.2"
     debug "^4.1.0"
     globals "^11.1.0"
 
-"@babel/types@^7.0.0", "@babel/types@^7.12.13", "@babel/types@^7.13.0", "@babel/types@^7.13.12", "@babel/types@^7.13.14", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
-  version "7.13.14"
-  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.13.14.tgz#c35a4abb15c7cd45a2746d78ab328e362cbace0d"
-  integrity sha512-A2aa3QTkWoyqsZZFl56MLUsfmh7O0gN41IPvXAE/++8ojpbz12SszD7JEGYVdn4f9Kt4amIei07swF1h4AqmmQ==
+"@babel/types@^7.0.0", "@babel/types@^7.12.13", "@babel/types@^7.13.12", "@babel/types@^7.14.0", "@babel/types@^7.14.2", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
+  version "7.14.2"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.14.2.tgz#4208ae003107ef8a057ea8333e56eb64d2f6a2c3"
+  integrity sha512-SdjAG/3DikRHpUOjxZgnkbR11xUlyDMUFJdvnIgZEE16mqmY0BINMmc4//JMJglEmn6i7sq6p+mGrFWyZ98EEw==
   dependencies:
-    "@babel/helper-validator-identifier" "^7.12.11"
-    lodash "^4.17.19"
+    "@babel/helper-validator-identifier" "^7.14.0"
     to-fast-properties "^2.0.0"
 
 "@bcoe/v8-coverage@^0.2.3":
@@ -329,18 +335,10 @@
   resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
   integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==
 
-"@cnakazawa/watch@^1.0.3":
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a"
-  integrity sha512-v9kIhKwjeZThiWrLmj0y17CWoyddASLj9O2yvbZkbvw/N3rWOYy9zkV66ursAoVr0mV15bL8g0c4QZUE6cdDoQ==
-  dependencies:
-    exec-sh "^0.3.2"
-    minimist "^1.2.0"
-
-"@eslint/eslintrc@^0.4.0":
-  version "0.4.0"
-  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.4.0.tgz#99cc0a0584d72f1df38b900fb062ba995f395547"
-  integrity sha512-2ZPCc+uNbjV5ERJr+aKSPRwZgKd2z11x0EgLvb1PURmUrn9QNRXFqje0Ldq454PfAVyaJYyrDvvIKSFP4NnBog==
+"@eslint/eslintrc@^0.4.1":
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.4.1.tgz#442763b88cecbe3ee0ec7ca6d6dd6168550cbf14"
+  integrity sha512-5v7TDE9plVhvxQeWLXDTvFvJBdH6pEsdnl2g/dAptmuFEPedQ4Erq5rsDsX+mvAM610IhNaO2W5V1dOOnDKxkQ==
   dependencies:
     ajv "^6.12.4"
     debug "^4.1.1"
@@ -352,80 +350,6 @@
     minimatch "^3.0.4"
     strip-json-comments "^3.1.1"
 
-"@evocateur/libnpmaccess@^3.1.2":
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/@evocateur/libnpmaccess/-/libnpmaccess-3.1.2.tgz#ecf7f6ce6b004e9f942b098d92200be4a4b1c845"
-  integrity sha512-KSCAHwNWro0CF2ukxufCitT9K5LjL/KuMmNzSu8wuwN2rjyKHD8+cmOsiybK+W5hdnwc5M1SmRlVCaMHQo+3rg==
-  dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    aproba "^2.0.0"
-    figgy-pudding "^3.5.1"
-    get-stream "^4.0.0"
-    npm-package-arg "^6.1.0"
-
-"@evocateur/libnpmpublish@^1.2.2":
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/@evocateur/libnpmpublish/-/libnpmpublish-1.2.2.tgz#55df09d2dca136afba9c88c759ca272198db9f1a"
-  integrity sha512-MJrrk9ct1FeY9zRlyeoyMieBjGDG9ihyyD9/Ft6MMrTxql9NyoEx2hw9casTIP4CdqEVu+3nQ2nXxoJ8RCXyFg==
-  dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    aproba "^2.0.0"
-    figgy-pudding "^3.5.1"
-    get-stream "^4.0.0"
-    lodash.clonedeep "^4.5.0"
-    normalize-package-data "^2.4.0"
-    npm-package-arg "^6.1.0"
-    semver "^5.5.1"
-    ssri "^6.0.1"
-
-"@evocateur/npm-registry-fetch@^4.0.0":
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/@evocateur/npm-registry-fetch/-/npm-registry-fetch-4.0.0.tgz#8c4c38766d8d32d3200fcb0a83f064b57365ed66"
-  integrity sha512-k1WGfKRQyhJpIr+P17O5vLIo2ko1PFLKwoetatdduUSt/aQ4J2sJrJwwatdI5Z3SiYk/mRH9S3JpdmMFd/IK4g==
-  dependencies:
-    JSONStream "^1.3.4"
-    bluebird "^3.5.1"
-    figgy-pudding "^3.4.1"
-    lru-cache "^5.1.1"
-    make-fetch-happen "^5.0.0"
-    npm-package-arg "^6.1.0"
-    safe-buffer "^5.1.2"
-
-"@evocateur/pacote@^9.6.3":
-  version "9.6.5"
-  resolved "https://registry.yarnpkg.com/@evocateur/pacote/-/pacote-9.6.5.tgz#33de32ba210b6f17c20ebab4d497efc6755f4ae5"
-  integrity sha512-EI552lf0aG2nOV8NnZpTxNo2PcXKPmDbF9K8eCBFQdIZwHNGN/mi815fxtmUMa2wTa1yndotICIDt/V0vpEx2w==
-  dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    bluebird "^3.5.3"
-    cacache "^12.0.3"
-    chownr "^1.1.2"
-    figgy-pudding "^3.5.1"
-    get-stream "^4.1.0"
-    glob "^7.1.4"
-    infer-owner "^1.0.4"
-    lru-cache "^5.1.1"
-    make-fetch-happen "^5.0.0"
-    minimatch "^3.0.4"
-    minipass "^2.3.5"
-    mississippi "^3.0.0"
-    mkdirp "^0.5.1"
-    normalize-package-data "^2.5.0"
-    npm-package-arg "^6.1.0"
-    npm-packlist "^1.4.4"
-    npm-pick-manifest "^3.0.0"
-    osenv "^0.1.5"
-    promise-inflight "^1.0.1"
-    promise-retry "^1.1.1"
-    protoduck "^5.0.1"
-    rimraf "^2.6.3"
-    safe-buffer "^5.2.0"
-    semver "^5.7.0"
-    ssri "^6.0.1"
-    tar "^4.4.10"
-    unique-filename "^1.1.1"
-    which "^1.3.1"
-
 "@gulp-sourcemaps/identity-map@^2.0.1":
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/@gulp-sourcemaps/identity-map/-/identity-map-2.0.1.tgz#a6e8b1abec8f790ec6be2b8c500e6e68037c0019"
@@ -461,111 +385,94 @@
   resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98"
   integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==
 
-"@jest/console@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-24.9.0.tgz#79b1bc06fb74a8cfb01cbdedf945584b1b9707f0"
-  integrity sha512-Zuj6b8TnKXi3q4ymac8EQfc3ea/uhLeCGThFqXeC8H9/raaH8ARPUTdId+XyGd03Z4In0/VjD2OYFcBF09fNLQ==
-  dependencies:
-    "@jest/source-map" "^24.9.0"
-    chalk "^2.0.1"
-    slash "^2.0.0"
-
-"@jest/console@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-26.6.2.tgz#4e04bc464014358b03ab4937805ee36a0aeb98f2"
-  integrity sha512-IY1R2i2aLsLr7Id3S6p2BA82GNWryt4oSvEXLAKc+L2zdi89dSkE8xC1C+0kpATG4JhBJREnQOH7/zmccM2B0g==
+"@jest/console@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/console/-/console-27.0.1.tgz#c6acfec201f9b6823596eb6c4fcd77c89a8b27e9"
+  integrity sha512-50E6nN2F5cAXn1lDljn0gE9F0WFXHYz/u0EeR7sOt4nbRPNli34ckbl6CUDaDABJbHt62DYnyQAIB3KgdzwKDw==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
     chalk "^4.0.0"
-    jest-message-util "^26.6.2"
-    jest-util "^26.6.2"
+    jest-message-util "^27.0.1"
+    jest-util "^27.0.1"
     slash "^3.0.0"
 
-"@jest/core@^26.6.3":
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/@jest/core/-/core-26.6.3.tgz#7639fcb3833d748a4656ada54bde193051e45fad"
-  integrity sha512-xvV1kKbhfUqFVuZ8Cyo+JPpipAHHAV3kcDBftiduK8EICXmTFddryy3P7NfZt8Pv37rA9nEJBKCCkglCPt/Xjw==
+"@jest/core@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/core/-/core-27.0.1.tgz#88d0ff55f465fe1fc3a940718e8cf0fea242be4b"
+  integrity sha512-PiCbKSMf6t8PEfY3MAd0Ldn3aJAt5T+UcaFkAfMZ1VZgas35+fXk5uHIjAQHQLNIHZWX19TLv0wWNT03yvrw6w==
   dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/reporters" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.1"
+    "@jest/reporters" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/transform" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
+    emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-changed-files "^26.6.2"
-    jest-config "^26.6.3"
-    jest-haste-map "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-resolve-dependencies "^26.6.3"
-    jest-runner "^26.6.3"
-    jest-runtime "^26.6.3"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    jest-watcher "^26.6.2"
-    micromatch "^4.0.2"
+    jest-changed-files "^27.0.1"
+    jest-config "^27.0.1"
+    jest-haste-map "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-regex-util "^27.0.1"
+    jest-resolve "^27.0.1"
+    jest-resolve-dependencies "^27.0.1"
+    jest-runner "^27.0.1"
+    jest-runtime "^27.0.1"
+    jest-snapshot "^27.0.1"
+    jest-util "^27.0.1"
+    jest-validate "^27.0.1"
+    jest-watcher "^27.0.1"
+    micromatch "^4.0.4"
     p-each-series "^2.1.0"
     rimraf "^3.0.0"
     slash "^3.0.0"
     strip-ansi "^6.0.0"
 
-"@jest/environment@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-26.6.2.tgz#ba364cc72e221e79cc8f0a99555bf5d7577cf92c"
-  integrity sha512-nFy+fHl28zUrRsCeMB61VDThV1pVTtlEokBRgqPrcT1JNq4yRNIyTHfyht6PqtUvY9IsuLGTrbG8kPXjSZIZwA==
+"@jest/environment@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-27.0.1.tgz#27ed89bf8179c0a030690f063d922d6da7a519ac"
+  integrity sha512-nG+r3uSs2pOTsdhgt6lUm4ZGJLRcTc6HZIkrFsVpPcdSqEpJehEny9r9y2Bmhkn8fKXWdGCYJKF3i4nKO0HSmA==
   dependencies:
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/fake-timers" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
-    jest-mock "^26.6.2"
-
-"@jest/fake-timers@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-24.9.0.tgz#ba3e6bf0eecd09a636049896434d306636540c93"
-  integrity sha512-eWQcNa2YSwzXWIMC5KufBh3oWRIijrQFROsIqt6v/NS9Io/gknw1jsAC9c+ih/RQX4A3O7SeWAhQeN0goKhT9A==
-  dependencies:
-    "@jest/types" "^24.9.0"
-    jest-message-util "^24.9.0"
-    jest-mock "^24.9.0"
+    jest-mock "^27.0.1"
 
-"@jest/fake-timers@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-26.6.2.tgz#459c329bcf70cee4af4d7e3f3e67848123535aad"
-  integrity sha512-14Uleatt7jdzefLPYM3KLcnUl1ZNikaKq34enpb5XG9i81JpppDb5muZvonvKyrl7ftEHkKS5L5/eB/kxJ+bvA==
+"@jest/fake-timers@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-27.0.1.tgz#6987a596b0bcf8c07653086076c17058b4c77b5c"
+  integrity sha512-3CyLJQnHzKI4TCJSCo+I9TzIHjSK4RrNEk93jFM6Q9+9WlSJ3mpMq/p2YuKMe0SiHKbmZOd5G/Ll5ofF9Xkw9g==
   dependencies:
-    "@jest/types" "^26.6.2"
-    "@sinonjs/fake-timers" "^6.0.1"
+    "@jest/types" "^27.0.1"
+    "@sinonjs/fake-timers" "^7.0.2"
     "@types/node" "*"
-    jest-message-util "^26.6.2"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
+    jest-message-util "^27.0.1"
+    jest-mock "^27.0.1"
+    jest-util "^27.0.1"
 
-"@jest/globals@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-26.6.2.tgz#5b613b78a1aa2655ae908eba638cc96a20df720a"
-  integrity sha512-85Ltnm7HlB/KesBUuALwQ68YTU72w9H2xW9FjZ1eL1U3lhtefjjl5c2MiUbpXt/i6LaPRvoOFJ22yCBSfQ0JIA==
+"@jest/globals@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-27.0.1.tgz#14c776942f7047a04f2aea09b148065e2aa9d7e9"
+  integrity sha512-80ZCzgopysKdpp5EOglgjApKxiNDR96PG4PwngB4fTwZ4qqqSKo0EwGwQIhl16szQ1M2xCVYmr9J6KelvnABNQ==
   dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    expect "^26.6.2"
+    "@jest/environment" "^27.0.1"
+    "@jest/types" "^27.0.1"
+    expect "^27.0.1"
 
-"@jest/reporters@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-26.6.2.tgz#1f518b99637a5f18307bd3ecf9275f6882a667f6"
-  integrity sha512-h2bW53APG4HvkOnVMo8q3QXa6pcaNt1HkwVsOPMBV6LD/q9oSpxNSYZQYkAnjdMjrJ86UuYeLo+aEZClV6opnw==
+"@jest/reporters@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-27.0.1.tgz#5b491f64e37c9b97b13e564f18f36b6697d28045"
+  integrity sha512-lZbJWuS1h/ytKERfu1D6tEQ4PuQ7+15S4+HrSzHR0i7AGVT1WRo49h4fZqxASOp7AQCupUVtPJNZDkaG9ZXy0g==
   dependencies:
     "@bcoe/v8-coverage" "^0.2.3"
-    "@jest/console" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/transform" "^27.0.1"
+    "@jest/types" "^27.0.1"
     chalk "^4.0.0"
     collect-v8-coverage "^1.0.0"
     exit "^0.1.2"
@@ -576,96 +483,67 @@
     istanbul-lib-report "^3.0.0"
     istanbul-lib-source-maps "^4.0.0"
     istanbul-reports "^3.0.2"
-    jest-haste-map "^26.6.2"
-    jest-resolve "^26.6.2"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
+    jest-haste-map "^27.0.1"
+    jest-resolve "^27.0.1"
+    jest-util "^27.0.1"
+    jest-worker "^27.0.1"
     slash "^3.0.0"
     source-map "^0.6.0"
     string-length "^4.0.1"
     terminal-link "^2.0.0"
     v8-to-istanbul "^7.0.0"
-  optionalDependencies:
-    node-notifier "^8.0.0"
 
-"@jest/source-map@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-24.9.0.tgz#0e263a94430be4b41da683ccc1e6bffe2a191714"
-  integrity sha512-/Xw7xGlsZb4MJzNDgB7PW5crou5JqWiBQaz6xyPd3ArOg2nfn/PunV8+olXbbEZzNl591o5rWKE9BRDaFAuIBg==
-  dependencies:
-    callsites "^3.0.0"
-    graceful-fs "^4.1.15"
-    source-map "^0.6.0"
-
-"@jest/source-map@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-26.6.2.tgz#29af5e1e2e324cafccc936f218309f54ab69d535"
-  integrity sha512-YwYcCwAnNmOVsZ8mr3GfnzdXDAl4LaenZP5z+G0c8bzC9/dugL8zRmxZzdoTl4IaS3CryS1uWnROLPFmb6lVvA==
+"@jest/source-map@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-27.0.1.tgz#2afbf73ddbaddcb920a8e62d0238a0a9e0a8d3e4"
+  integrity sha512-yMgkF0f+6WJtDMdDYNavmqvbHtiSpwRN2U/W+6uztgfqgkq/PXdKPqjBTUF1RD/feth4rH5N3NW0T5+wIuln1A==
   dependencies:
     callsites "^3.0.0"
     graceful-fs "^4.2.4"
     source-map "^0.6.0"
 
-"@jest/test-result@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-24.9.0.tgz#11796e8aa9dbf88ea025757b3152595ad06ba0ca"
-  integrity sha512-XEFrHbBonBJ8dGp2JmF8kP/nQI/ImPpygKHwQ/SY+es59Z3L5PI4Qb9TQQMAEeYsThG1xF0k6tmG0tIKATNiiA==
-  dependencies:
-    "@jest/console" "^24.9.0"
-    "@jest/types" "^24.9.0"
-    "@types/istanbul-lib-coverage" "^2.0.0"
-
-"@jest/test-result@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-26.6.2.tgz#55da58b62df134576cc95476efa5f7949e3f5f18"
-  integrity sha512-5O7H5c/7YlojphYNrK02LlDIV2GNPYisKwHm2QTKjNZeEzezCbwYs9swJySv2UfPMyZ0VdsmMv7jIlD/IKYQpQ==
+"@jest/test-result@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-27.0.1.tgz#8fb97214268ea21cf8cfb83edc0f17e558b3466d"
+  integrity sha512-5aa+ibX2dsGSDLKaQMZb453MqjJU/CRVumebXfaJmuzuGE4qf87yQ2QZ6PEpEtBwVUEgrJCzi3jLCRaUbksSuw==
   dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/istanbul-lib-coverage" "^2.0.0"
     collect-v8-coverage "^1.0.0"
 
-"@jest/test-sequencer@^26.6.3":
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-26.6.3.tgz#98e8a45100863886d074205e8ffdc5a7eb582b17"
-  integrity sha512-YHlVIjP5nfEyjlrSr8t/YdNfU/1XEt7c5b4OxcXCjyRhjzLYu/rO69/WHPuYcbCWkz8kAeZVZp2N2+IOLLEPGw==
+"@jest/test-sequencer@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-27.0.1.tgz#2a3b85130978fc545d8ee6c34d65ff4231dbad86"
+  integrity sha512-yK2c2iruJ35WgH4KH8whS72uH+FASJUrzwxzNKTzLAEWmNpWKNEPOsSEKsHynvz78bLHafrTg4adN7RrYNbEOA==
   dependencies:
-    "@jest/test-result" "^26.6.2"
+    "@jest/test-result" "^27.0.1"
     graceful-fs "^4.2.4"
-    jest-haste-map "^26.6.2"
-    jest-runner "^26.6.3"
-    jest-runtime "^26.6.3"
+    jest-haste-map "^27.0.1"
+    jest-runner "^27.0.1"
+    jest-runtime "^27.0.1"
 
-"@jest/transform@^26.6.2":
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-26.6.2.tgz#5ac57c5fa1ad17b2aae83e73e45813894dcf2e4b"
-  integrity sha512-E9JjhUgNzvuQ+vVAL21vlyfy12gP0GhazGgJC4h6qUt1jSdUXGWJ1wfu/X7Sd8etSgxV4ovT1pb9v5D6QW4XgA==
+"@jest/transform@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-27.0.1.tgz#a9ece291f82273d5e58132550996c16edd5a902a"
+  integrity sha512-LC95VpT6wMnQ96dRJDlUiAnW/90zyh4+jS30szI/5AsfS0qwSlr/O4TPcGoD2WVaVMfo6KvR+brvOtGyMHaNhA==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.1"
     babel-plugin-istanbul "^6.0.0"
     chalk "^4.0.0"
     convert-source-map "^1.4.0"
     fast-json-stable-stringify "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-haste-map "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-util "^26.6.2"
-    micromatch "^4.0.2"
+    jest-haste-map "^27.0.1"
+    jest-regex-util "^27.0.1"
+    jest-util "^27.0.1"
+    micromatch "^4.0.4"
     pirates "^4.0.1"
     slash "^3.0.0"
     source-map "^0.6.1"
     write-file-atomic "^3.0.0"
 
-"@jest/types@^24.9.0":
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-24.9.0.tgz#63cb26cb7500d069e5a389441a7c6ab5e909fc59"
-  integrity sha512-XKK7ze1apu5JWQ5eZjHITP66AX+QsLlbaJRBGYr8pNzwcAE2JVkwnf0yqjHTsDRcjR0mujy/NmZMXw5kl+kGBw==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    "@types/istanbul-reports" "^1.1.1"
-    "@types/yargs" "^13.0.0"
-
 "@jest/types@^26.6.2":
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/@jest/types/-/types-26.6.2.tgz#bef5a532030e1d88a2f5a6d933f84e97226ed48e"
@@ -677,690 +555,687 @@
     "@types/yargs" "^15.0.0"
     chalk "^4.0.0"
 
-"@lerna/add@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/add/-/add-3.21.0.tgz#27007bde71cc7b0a2969ab3c2f0ae41578b4577b"
-  integrity sha512-vhUXXF6SpufBE1EkNEXwz1VLW03f177G9uMOFMQkp6OJ30/PWg4Ekifuz9/3YfgB2/GH8Tu4Lk3O51P2Hskg/A==
-  dependencies:
-    "@evocateur/pacote" "^9.6.3"
-    "@lerna/bootstrap" "3.21.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/npm-conf" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
+"@jest/types@^27.0.1":
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.1.tgz#631738c942e70045ebbf42a3f9b433036d3845e4"
+  integrity sha512-8A25RRV4twZutsx2D+7WphnDsp7If9Yu6ko0Gxwrwv8BiWESFzka34+Aa2kC8w9xewt7SDuCUSZ6IiAFVj3PRg==
+  dependencies:
+    "@types/istanbul-lib-coverage" "^2.0.0"
+    "@types/istanbul-reports" "^3.0.0"
+    "@types/node" "*"
+    "@types/yargs" "^16.0.0"
+    chalk "^4.0.0"
+
+"@lerna/add@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/add/-/add-4.0.0.tgz#c36f57d132502a57b9e7058d1548b7a565ef183f"
+  integrity sha512-cpmAH1iS3k8JBxNvnMqrGTTjbY/ZAiKa1ChJzFevMYY3eeqbvhsBKnBcxjRXtdrJ6bd3dCQM+ZtK+0i682Fhng==
+  dependencies:
+    "@lerna/bootstrap" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/npm-conf" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    npm-package-arg "^6.1.0"
-    p-map "^2.1.0"
-    semver "^6.2.0"
+    npm-package-arg "^8.1.0"
+    p-map "^4.0.0"
+    pacote "^11.2.6"
+    semver "^7.3.4"
 
-"@lerna/bootstrap@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/bootstrap/-/bootstrap-3.21.0.tgz#bcd1b651be5b0970b20d8fae04c864548123aed6"
-  integrity sha512-mtNHlXpmvJn6JTu0KcuTTPl2jLsDNud0QacV/h++qsaKbhAaJr/FElNZ5s7MwZFUM3XaDmvWzHKaszeBMHIbBw==
-  dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/has-npm-version" "3.16.5"
-    "@lerna/npm-install" "3.16.5"
-    "@lerna/package-graph" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/rimraf-dir" "3.16.5"
-    "@lerna/run-lifecycle" "3.16.2"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/symlink-binary" "3.17.0"
-    "@lerna/symlink-dependencies" "3.17.0"
-    "@lerna/validation-error" "3.13.0"
+"@lerna/bootstrap@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/bootstrap/-/bootstrap-4.0.0.tgz#5f5c5e2c6cfc8fcec50cb2fbe569a8c607101891"
+  integrity sha512-RkS7UbeM2vu+kJnHzxNRCLvoOP9yGNgkzRdy4UV2hNalD7EP41bLvRVOwRYQ7fhc2QcbhnKNdOBihYRL0LcKtw==
+  dependencies:
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/has-npm-version" "4.0.0"
+    "@lerna/npm-install" "4.0.0"
+    "@lerna/package-graph" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/rimraf-dir" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/symlink-binary" "4.0.0"
+    "@lerna/symlink-dependencies" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    get-port "^4.2.0"
-    multimatch "^3.0.0"
-    npm-package-arg "^6.1.0"
+    get-port "^5.1.1"
+    multimatch "^5.0.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    p-finally "^1.0.0"
-    p-map "^2.1.0"
-    p-map-series "^1.0.0"
-    p-waterfall "^1.0.0"
-    read-package-tree "^5.1.6"
-    semver "^6.2.0"
+    p-map "^4.0.0"
+    p-map-series "^2.1.0"
+    p-waterfall "^2.1.1"
+    read-package-tree "^5.3.1"
+    semver "^7.3.4"
 
-"@lerna/changed@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/changed/-/changed-3.21.0.tgz#108e15f679bfe077af500f58248c634f1044ea0b"
-  integrity sha512-hzqoyf8MSHVjZp0gfJ7G8jaz+++mgXYiNs9iViQGA8JlN/dnWLI5sWDptEH3/B30Izo+fdVz0S0s7ydVE3pWIw==
+"@lerna/changed@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/changed/-/changed-4.0.0.tgz#b9fc76cea39b9292a6cd263f03eb57af85c9270b"
+  integrity sha512-cD+KuPRp6qiPOD+BO6S6SN5cARspIaWSOqGBpGnYzLb4uWT8Vk4JzKyYtc8ym1DIwyoFXHosXt8+GDAgR8QrgQ==
   dependencies:
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/listable" "3.18.5"
-    "@lerna/output" "3.13.0"
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/listable" "4.0.0"
+    "@lerna/output" "4.0.0"
 
-"@lerna/check-working-tree@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/check-working-tree/-/check-working-tree-3.16.5.tgz#b4f8ae61bb4523561dfb9f8f8d874dd46bb44baa"
-  integrity sha512-xWjVBcuhvB8+UmCSb5tKVLB5OuzSpw96WEhS2uz6hkWVa/Euh1A0/HJwn2cemyK47wUrCQXtczBUiqnq9yX5VQ==
+"@lerna/check-working-tree@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/check-working-tree/-/check-working-tree-4.0.0.tgz#257e36a602c00142e76082a19358e3e1ae8dbd58"
+  integrity sha512-/++bxM43jYJCshBiKP5cRlCTwSJdRSxVmcDAXM+1oUewlZJVSVlnks5eO0uLxokVFvLhHlC5kHMc7gbVFPHv6Q==
   dependencies:
-    "@lerna/collect-uncommitted" "3.16.5"
-    "@lerna/describe-ref" "3.16.5"
-    "@lerna/validation-error" "3.13.0"
+    "@lerna/collect-uncommitted" "4.0.0"
+    "@lerna/describe-ref" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
 
-"@lerna/child-process@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-3.16.5.tgz#38fa3c18064aa4ac0754ad80114776a7b36a69b2"
-  integrity sha512-vdcI7mzei9ERRV4oO8Y1LHBZ3A5+ampRKg1wq5nutLsUA4mEBN6H7JqjWOMY9xZemv6+kATm2ofjJ3lW5TszQg==
+"@lerna/child-process@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-4.0.0.tgz#341b96a57dffbd9705646d316e231df6fa4df6e1"
+  integrity sha512-XtCnmCT9eyVsUUHx6y/CTBYdV9g2Cr/VxyseTWBgfIur92/YKClfEtJTbOh94jRT62hlKLqSvux/UhxXVh613Q==
   dependencies:
-    chalk "^2.3.1"
-    execa "^1.0.0"
-    strong-log-transformer "^2.0.0"
+    chalk "^4.1.0"
+    execa "^5.0.0"
+    strong-log-transformer "^2.1.0"
 
-"@lerna/clean@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/clean/-/clean-3.21.0.tgz#c0b46b5300cc3dae2cda3bec14b803082da3856d"
-  integrity sha512-b/L9l+MDgE/7oGbrav6rG8RTQvRiZLO1zTcG17zgJAAuhlsPxJExMlh2DFwJEVi2les70vMhHfST3Ue1IMMjpg==
-  dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/rimraf-dir" "3.16.5"
-    p-map "^2.1.0"
-    p-map-series "^1.0.0"
-    p-waterfall "^1.0.0"
-
-"@lerna/cli@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/cli/-/cli-3.18.5.tgz#c90c461542fcd35b6d5b015a290fb0dbfb41d242"
-  integrity sha512-erkbxkj9jfc89vVs/jBLY/fM0I80oLmJkFUV3Q3wk9J3miYhP14zgVEBsPZY68IZlEjT6T3Xlq2xO1AVaatHsA==
-  dependencies:
-    "@lerna/global-options" "3.13.0"
+"@lerna/clean@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/clean/-/clean-4.0.0.tgz#8f778b6f2617aa2a936a6b5e085ae62498e57dc5"
+  integrity sha512-uugG2iN9k45ITx2jtd8nEOoAtca8hNlDCUM0N3lFgU/b1mEQYAPRkqr1qs4FLRl/Y50ZJ41wUz1eazS+d/0osA==
+  dependencies:
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/rimraf-dir" "4.0.0"
+    p-map "^4.0.0"
+    p-map-series "^2.1.0"
+    p-waterfall "^2.1.1"
+
+"@lerna/cli@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/cli/-/cli-4.0.0.tgz#8eabd334558836c1664df23f19acb95e98b5bbf3"
+  integrity sha512-Neaw3GzFrwZiRZv2g7g6NwFjs3er1vhraIniEs0jjVLPMNC4eata0na3GfE5yibkM/9d3gZdmihhZdZ3EBdvYA==
+  dependencies:
+    "@lerna/global-options" "4.0.0"
     dedent "^0.7.0"
     npmlog "^4.1.2"
-    yargs "^14.2.2"
+    yargs "^16.2.0"
 
-"@lerna/collect-uncommitted@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/collect-uncommitted/-/collect-uncommitted-3.16.5.tgz#a494d61aac31cdc7aec4bbe52c96550274132e63"
-  integrity sha512-ZgqnGwpDZiWyzIQVZtQaj9tRizsL4dUOhuOStWgTAw1EMe47cvAY2kL709DzxFhjr6JpJSjXV5rZEAeU3VE0Hg==
+"@lerna/collect-uncommitted@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/collect-uncommitted/-/collect-uncommitted-4.0.0.tgz#855cd64612969371cfc2453b90593053ff1ba779"
+  integrity sha512-ufSTfHZzbx69YNj7KXQ3o66V4RC76ffOjwLX0q/ab//61bObJ41n03SiQEhSlmpP+gmFbTJ3/7pTe04AHX9m/g==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    chalk "^2.3.1"
-    figgy-pudding "^3.5.1"
+    "@lerna/child-process" "4.0.0"
+    chalk "^4.1.0"
     npmlog "^4.1.2"
 
-"@lerna/collect-updates@3.20.0":
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/@lerna/collect-updates/-/collect-updates-3.20.0.tgz#62f9d76ba21a25b7d9fbf31c02de88744a564bd1"
-  integrity sha512-qBTVT5g4fupVhBFuY4nI/3FSJtQVcDh7/gEPOpRxoXB/yCSnT38MFHXWl+y4einLciCjt/+0x6/4AG80fjay2Q==
+"@lerna/collect-updates@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/collect-updates/-/collect-updates-4.0.0.tgz#8e208b1bafd98a372ff1177f7a5e288f6bea8041"
+  integrity sha512-bnNGpaj4zuxsEkyaCZLka9s7nMs58uZoxrRIPJ+nrmrZYp1V5rrd+7/NYTuunOhY2ug1sTBvTAxj3NZQ+JKnOw==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/describe-ref" "3.16.5"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/describe-ref" "4.0.0"
     minimatch "^3.0.4"
     npmlog "^4.1.2"
-    slash "^2.0.0"
+    slash "^3.0.0"
 
-"@lerna/command@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/command/-/command-3.21.0.tgz#9a2383759dc7b700dacfa8a22b2f3a6e190121f7"
-  integrity sha512-T2bu6R8R3KkH5YoCKdutKv123iUgUbW8efVjdGCDnCMthAQzoentOJfDeodBwn0P2OqCl3ohsiNVtSn9h78fyQ==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/package-graph" "3.18.5"
-    "@lerna/project" "3.21.0"
-    "@lerna/validation-error" "3.13.0"
-    "@lerna/write-log-file" "3.13.0"
+"@lerna/command@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/command/-/command-4.0.0.tgz#991c7971df8f5bf6ae6e42c808869a55361c1b98"
+  integrity sha512-LM9g3rt5FsPNFqIHUeRwWXLNHJ5NKzOwmVKZ8anSp4e1SPrv2HNc1V02/9QyDDZK/w+5POXH5lxZUI1CHaOK/A==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    "@lerna/package-graph" "4.0.0"
+    "@lerna/project" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    "@lerna/write-log-file" "4.0.0"
     clone-deep "^4.0.1"
     dedent "^0.7.0"
-    execa "^1.0.0"
+    execa "^5.0.0"
     is-ci "^2.0.0"
     npmlog "^4.1.2"
 
-"@lerna/conventional-commits@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/conventional-commits/-/conventional-commits-3.22.0.tgz#2798f4881ee2ef457bdae027ab7d0bf0af6f1e09"
-  integrity sha512-z4ZZk1e8Mhz7+IS8NxHr64wyklHctCJyWpJKEZZPJiLFJ8yKto/x38O80R10pIzC0rr8Sy/OsjSH4bl0TbbgqA==
-  dependencies:
-    "@lerna/validation-error" "3.13.0"
-    conventional-changelog-angular "^5.0.3"
-    conventional-changelog-core "^3.1.6"
-    conventional-recommended-bump "^5.0.0"
-    fs-extra "^8.1.0"
-    get-stream "^4.0.0"
+"@lerna/conventional-commits@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/conventional-commits/-/conventional-commits-4.0.0.tgz#660fb2c7b718cb942ead70110df61f18c6f99750"
+  integrity sha512-CSUQRjJHFrH8eBn7+wegZLV3OrNc0Y1FehYfYGhjLE2SIfpCL4bmfu/ViYuHh9YjwHaA+4SX6d3hR+xkeseKmw==
+  dependencies:
+    "@lerna/validation-error" "4.0.0"
+    conventional-changelog-angular "^5.0.12"
+    conventional-changelog-core "^4.2.2"
+    conventional-recommended-bump "^6.1.0"
+    fs-extra "^9.1.0"
+    get-stream "^6.0.0"
     lodash.template "^4.5.0"
-    npm-package-arg "^6.1.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    pify "^4.0.1"
-    semver "^6.2.0"
+    pify "^5.0.0"
+    semver "^7.3.4"
 
-"@lerna/create-symlink@3.16.2":
-  version "3.16.2"
-  resolved "https://registry.yarnpkg.com/@lerna/create-symlink/-/create-symlink-3.16.2.tgz#412cb8e59a72f5a7d9463e4e4721ad2070149967"
-  integrity sha512-pzXIJp6av15P325sgiIRpsPXLFmkisLhMBCy4764d+7yjf2bzrJ4gkWVMhsv4AdF0NN3OyZ5jjzzTtLNqfR+Jw==
+"@lerna/create-symlink@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/create-symlink/-/create-symlink-4.0.0.tgz#8c5317ce5ae89f67825443bd7651bf4121786228"
+  integrity sha512-I0phtKJJdafUiDwm7BBlEUOtogmu8+taxq6PtIrxZbllV9hWg59qkpuIsiFp+no7nfRVuaasNYHwNUhDAVQBig==
   dependencies:
-    "@zkochan/cmd-shim" "^3.1.0"
-    fs-extra "^8.1.0"
+    cmd-shim "^4.1.0"
+    fs-extra "^9.1.0"
     npmlog "^4.1.2"
 
-"@lerna/create@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/create/-/create-3.22.0.tgz#d6bbd037c3dc5b425fe5f6d1b817057c278f7619"
-  integrity sha512-MdiQQzCcB4E9fBF1TyMOaAEz9lUjIHp1Ju9H7f3lXze5JK6Fl5NYkouAvsLgY6YSIhXMY8AHW2zzXeBDY4yWkw==
+"@lerna/create@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/create/-/create-4.0.0.tgz#b6947e9b5dfb6530321952998948c3e63d64d730"
+  integrity sha512-mVOB1niKByEUfxlbKTM1UNECWAjwUdiioIbRQZEeEabtjCL69r9rscIsjlGyhGWCfsdAG5wfq4t47nlDXdLLag==
   dependencies:
-    "@evocateur/pacote" "^9.6.3"
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/npm-conf" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
-    camelcase "^5.0.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/npm-conf" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    fs-extra "^8.1.0"
-    globby "^9.2.0"
-    init-package-json "^1.10.3"
-    npm-package-arg "^6.1.0"
-    p-reduce "^1.0.0"
-    pify "^4.0.1"
-    semver "^6.2.0"
-    slash "^2.0.0"
-    validate-npm-package-license "^3.0.3"
+    fs-extra "^9.1.0"
+    globby "^11.0.2"
+    init-package-json "^2.0.2"
+    npm-package-arg "^8.1.0"
+    p-reduce "^2.1.0"
+    pacote "^11.2.6"
+    pify "^5.0.0"
+    semver "^7.3.4"
+    slash "^3.0.0"
+    validate-npm-package-license "^3.0.4"
     validate-npm-package-name "^3.0.0"
-    whatwg-url "^7.0.0"
+    whatwg-url "^8.4.0"
+    yargs-parser "20.2.4"
 
-"@lerna/describe-ref@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/describe-ref/-/describe-ref-3.16.5.tgz#a338c25aaed837d3dc70b8a72c447c5c66346ac0"
-  integrity sha512-c01+4gUF0saOOtDBzbLMFOTJDHTKbDFNErEY6q6i9QaXuzy9LNN62z+Hw4acAAZuJQhrVWncVathcmkkjvSVGw==
+"@lerna/describe-ref@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/describe-ref/-/describe-ref-4.0.0.tgz#53c53b4ea65fdceffa072a62bfebe6772c45d9ec"
+  integrity sha512-eTU5+xC4C5Gcgz+Ey4Qiw9nV2B4JJbMulsYJMW8QjGcGh8zudib7Sduj6urgZXUYNyhYpRs+teci9M2J8u+UvQ==
   dependencies:
-    "@lerna/child-process" "3.16.5"
+    "@lerna/child-process" "4.0.0"
     npmlog "^4.1.2"
 
-"@lerna/diff@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/diff/-/diff-3.21.0.tgz#e6df0d8b9916167ff5a49fcb02ac06424280a68d"
-  integrity sha512-5viTR33QV3S7O+bjruo1SaR40m7F2aUHJaDAC7fL9Ca6xji+aw1KFkpCtVlISS0G8vikUREGMJh+c/VMSc8Usw==
+"@lerna/diff@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/diff/-/diff-4.0.0.tgz#6d3071817aaa4205a07bf77cfc6e932796d48b92"
+  integrity sha512-jYPKprQVg41+MUMxx6cwtqsNm0Yxx9GDEwdiPLwcUTFx+/qKCEwifKNJ1oGIPBxyEHX2PFCOjkK39lHoj2qiag==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/validation-error" "3.13.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     npmlog "^4.1.2"
 
-"@lerna/exec@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/exec/-/exec-3.21.0.tgz#17f07533893cb918a17b41bcc566dc437016db26"
-  integrity sha512-iLvDBrIE6rpdd4GIKTY9mkXyhwsJ2RvQdB9ZU+/NhR3okXfqKc6py/24tV111jqpXTtZUW6HNydT4dMao2hi1Q==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/profiler" "3.20.0"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/validation-error" "3.13.0"
-    p-map "^2.1.0"
-
-"@lerna/filter-options@3.20.0":
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/@lerna/filter-options/-/filter-options-3.20.0.tgz#0f0f5d5a4783856eece4204708cc902cbc8af59b"
-  integrity sha512-bmcHtvxn7SIl/R9gpiNMVG7yjx7WyT0HSGw34YVZ9B+3xF/83N3r5Rgtjh4hheLZ+Q91Or0Jyu5O3Nr+AwZe2g==
-  dependencies:
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/filter-packages" "3.18.0"
+"@lerna/exec@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/exec/-/exec-4.0.0.tgz#eb6cb95cb92d42590e9e2d628fcaf4719d4a8be6"
+  integrity sha512-VGXtL/b/JfY84NB98VWZpIExfhLOzy0ozm/0XaS4a2SmkAJc5CeUfrhvHxxkxiTBLkU+iVQUyYEoAT0ulQ8PCw==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/profiler" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    p-map "^4.0.0"
+
+"@lerna/filter-options@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/filter-options/-/filter-options-4.0.0.tgz#ac94cc515d7fa3b47e2f7d74deddeabb1de5e9e6"
+  integrity sha512-vV2ANOeZhOqM0rzXnYcFFCJ/kBWy/3OA58irXih9AMTAlQLymWAK0akWybl++sUJ4HB9Hx12TOqaXbYS2NM5uw==
+  dependencies:
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/filter-packages" "4.0.0"
     dedent "^0.7.0"
-    figgy-pudding "^3.5.1"
     npmlog "^4.1.2"
 
-"@lerna/filter-packages@3.18.0":
-  version "3.18.0"
-  resolved "https://registry.yarnpkg.com/@lerna/filter-packages/-/filter-packages-3.18.0.tgz#6a7a376d285208db03a82958cfb8172e179b4e70"
-  integrity sha512-6/0pMM04bCHNATIOkouuYmPg6KH3VkPCIgTfQmdkPJTullERyEQfNUKikrefjxo1vHOoCACDpy65JYyKiAbdwQ==
+"@lerna/filter-packages@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/filter-packages/-/filter-packages-4.0.0.tgz#b1f70d70e1de9cdd36a4e50caa0ac501f8d012f2"
+  integrity sha512-+4AJIkK7iIiOaqCiVTYJxh/I9qikk4XjNQLhE3kixaqgMuHl1NQ99qXRR0OZqAWB9mh8Z1HA9bM5K1HZLBTOqA==
   dependencies:
-    "@lerna/validation-error" "3.13.0"
-    multimatch "^3.0.0"
+    "@lerna/validation-error" "4.0.0"
+    multimatch "^5.0.0"
     npmlog "^4.1.2"
 
-"@lerna/get-npm-exec-opts@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-3.13.0.tgz#d1b552cb0088199fc3e7e126f914e39a08df9ea5"
-  integrity sha512-Y0xWL0rg3boVyJk6An/vurKzubyJKtrxYv2sj4bB8Mc5zZ3tqtv0ccbOkmkXKqbzvNNF7VeUt1OJ3DRgtC/QZw==
+"@lerna/get-npm-exec-opts@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-4.0.0.tgz#dc955be94a4ae75c374ef9bce91320887d34608f"
+  integrity sha512-yvmkerU31CTWS2c7DvmAWmZVeclPBqI7gPVr5VATUKNWJ/zmVcU4PqbYoLu92I9Qc4gY1TuUplMNdNuZTSL7IQ==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/get-packed@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/get-packed/-/get-packed-3.16.0.tgz#1b316b706dcee86c7baa55e50b087959447852ff"
-  integrity sha512-AjsFiaJzo1GCPnJUJZiTW6J1EihrPkc2y3nMu6m3uWFxoleklsSCyImumzVZJssxMi3CPpztj8LmADLedl9kXw==
+"@lerna/get-packed@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/get-packed/-/get-packed-4.0.0.tgz#0989d61624ac1f97e393bdad2137c49cd7a37823"
+  integrity sha512-rfWONRsEIGyPJTxFzC8ECb3ZbsDXJbfqWYyeeQQDrJRPnEJErlltRLPLgC2QWbxFgFPsoDLeQmFHJnf0iDfd8w==
   dependencies:
-    fs-extra "^8.1.0"
-    ssri "^6.0.1"
-    tar "^4.4.8"
+    fs-extra "^9.1.0"
+    ssri "^8.0.1"
+    tar "^6.1.0"
 
-"@lerna/github-client@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/github-client/-/github-client-3.22.0.tgz#5d816aa4f76747ed736ae64ff962b8f15c354d95"
-  integrity sha512-O/GwPW+Gzr3Eb5bk+nTzTJ3uv+jh5jGho9BOqKlajXaOkMYGBELEAqV5+uARNGWZFvYAiF4PgqHb6aCUu7XdXg==
+"@lerna/github-client@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/github-client/-/github-client-4.0.0.tgz#2ced67721363ef70f8e12ffafce4410918f4a8a4"
+  integrity sha512-2jhsldZtTKXYUBnOm23Lb0Fx8G4qfSXF9y7UpyUgWUj+YZYd+cFxSuorwQIgk5P4XXrtVhsUesIsli+BYSThiw==
   dependencies:
-    "@lerna/child-process" "3.16.5"
+    "@lerna/child-process" "4.0.0"
     "@octokit/plugin-enterprise-rest" "^6.0.1"
-    "@octokit/rest" "^16.28.4"
-    git-url-parse "^11.1.2"
+    "@octokit/rest" "^18.1.0"
+    git-url-parse "^11.4.4"
     npmlog "^4.1.2"
 
-"@lerna/gitlab-client@3.15.0":
-  version "3.15.0"
-  resolved "https://registry.yarnpkg.com/@lerna/gitlab-client/-/gitlab-client-3.15.0.tgz#91f4ec8c697b5ac57f7f25bd50fe659d24aa96a6"
-  integrity sha512-OsBvRSejHXUBMgwWQqNoioB8sgzL/Pf1pOUhHKtkiMl6aAWjklaaq5HPMvTIsZPfS6DJ9L5OK2GGZuooP/5c8Q==
+"@lerna/gitlab-client@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/gitlab-client/-/gitlab-client-4.0.0.tgz#00dad73379c7b38951d4b4ded043504c14e2b67d"
+  integrity sha512-OMUpGSkeDWFf7BxGHlkbb35T7YHqVFCwBPSIR6wRsszY8PAzCYahtH3IaJzEJyUg6vmZsNl0FSr3pdA2skhxqA==
   dependencies:
-    node-fetch "^2.5.0"
+    node-fetch "^2.6.1"
     npmlog "^4.1.2"
-    whatwg-url "^7.0.0"
-
-"@lerna/global-options@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/global-options/-/global-options-3.13.0.tgz#217662290db06ad9cf2c49d8e3100ee28eaebae1"
-  integrity sha512-SlZvh1gVRRzYLVluz9fryY1nJpZ0FHDGB66U9tFfvnnxmueckRQxLopn3tXj3NU1kc3QANT2I5BsQkOqZ4TEFQ==
-
-"@lerna/has-npm-version@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/has-npm-version/-/has-npm-version-3.16.5.tgz#ab83956f211d8923ea6afe9b979b38cc73b15326"
-  integrity sha512-WL7LycR9bkftyqbYop5rEGJ9sRFIV55tSGmbN1HLrF9idwOCD7CLrT64t235t3t4O5gehDnwKI5h2U3oxTrF8Q==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    semver "^6.2.0"
-
-"@lerna/import@3.22.0":
-  version "3.22.0"
-  resolved "https://registry.yarnpkg.com/@lerna/import/-/import-3.22.0.tgz#1a5f0394f38e23c4f642a123e5e1517e70d068d2"
-  integrity sha512-uWOlexasM5XR6tXi4YehODtH9Y3OZrFht3mGUFFT3OIl2s+V85xIGFfqFGMTipMPAGb2oF1UBLL48kR43hRsOg==
-  dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/validation-error" "3.13.0"
+    whatwg-url "^8.4.0"
+
+"@lerna/global-options@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/global-options/-/global-options-4.0.0.tgz#c7d8b0de6a01d8a845e2621ea89e7f60f18c6a5f"
+  integrity sha512-TRMR8afAHxuYBHK7F++Ogop2a82xQjoGna1dvPOY6ltj/pEx59pdgcJfYcynYqMkFIk8bhLJJN9/ndIfX29FTQ==
+
+"@lerna/has-npm-version@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/has-npm-version/-/has-npm-version-4.0.0.tgz#d3fc3292c545eb28bd493b36e6237cf0279f631c"
+  integrity sha512-LQ3U6XFH8ZmLCsvsgq1zNDqka0Xzjq5ibVN+igAI5ccRWNaUsE/OcmsyMr50xAtNQMYMzmpw5GVLAivT2/YzCg==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    semver "^7.3.4"
+
+"@lerna/import@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/import/-/import-4.0.0.tgz#bde656c4a451fa87ae41733ff8a8da60547c5465"
+  integrity sha512-FaIhd+4aiBousKNqC7TX1Uhe97eNKf5/SC7c5WZANVWtC7aBWdmswwDt3usrzCNpj6/Wwr9EtEbYROzxKH8ffg==
+  dependencies:
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
     dedent "^0.7.0"
-    fs-extra "^8.1.0"
-    p-map-series "^1.0.0"
+    fs-extra "^9.1.0"
+    p-map-series "^2.1.0"
 
-"@lerna/info@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/info/-/info-3.21.0.tgz#76696b676fdb0f35d48c83c63c1e32bb5e37814f"
-  integrity sha512-0XDqGYVBgWxUquFaIptW2bYSIu6jOs1BtkvRTWDDhw4zyEdp6q4eaMvqdSap1CG+7wM5jeLCi6z94wS0AuiuwA==
+"@lerna/info@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/info/-/info-4.0.0.tgz#b9fb0e479d60efe1623603958a831a88b1d7f1fc"
+  integrity sha512-8Uboa12kaCSZEn4XRfPz5KU9XXoexSPS4oeYGj76s2UQb1O1GdnEyfjyNWoUl1KlJ2i/8nxUskpXIftoFYH0/Q==
   dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/output" "3.13.0"
-    envinfo "^7.3.1"
+    "@lerna/command" "4.0.0"
+    "@lerna/output" "4.0.0"
+    envinfo "^7.7.4"
 
-"@lerna/init@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/init/-/init-3.21.0.tgz#1e810934dc8bf4e5386c031041881d3b4096aa5c"
-  integrity sha512-6CM0z+EFUkFfurwdJCR+LQQF6MqHbYDCBPyhu/d086LRf58GtYZYj49J8mKG9ktayp/TOIxL/pKKjgLD8QBPOg==
+"@lerna/init@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/init/-/init-4.0.0.tgz#dadff67e6dfb981e8ccbe0e6a310e837962f6c7a"
+  integrity sha512-wY6kygop0BCXupzWj5eLvTUqdR7vIAm0OgyV9WHpMYQGfs1V22jhztt8mtjCloD/O0nEe4tJhdG62XU5aYmPNQ==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/command" "3.21.0"
-    fs-extra "^8.1.0"
-    p-map "^2.1.0"
-    write-json-file "^3.2.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/command" "4.0.0"
+    fs-extra "^9.1.0"
+    p-map "^4.0.0"
+    write-json-file "^4.3.0"
 
-"@lerna/link@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/link/-/link-3.21.0.tgz#8be68ff0ccee104b174b5bbd606302c2f06e9d9b"
-  integrity sha512-tGu9GxrX7Ivs+Wl3w1+jrLi1nQ36kNI32dcOssij6bg0oZ2M2MDEFI9UF2gmoypTaN9uO5TSsjCFS7aR79HbdQ==
+"@lerna/link@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/link/-/link-4.0.0.tgz#c3a38aabd44279d714e90f2451e31b63f0fb65ba"
+  integrity sha512-KlvPi7XTAcVOByfaLlOeYOfkkDcd+bejpHMCd1KcArcFTwijOwXOVi24DYomIeHvy6HsX/IUquJ4PPUJIeB4+w==
   dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/package-graph" "3.18.5"
-    "@lerna/symlink-dependencies" "3.17.0"
-    p-map "^2.1.0"
-    slash "^2.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/package-graph" "4.0.0"
+    "@lerna/symlink-dependencies" "4.0.0"
+    p-map "^4.0.0"
+    slash "^3.0.0"
 
-"@lerna/list@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/list/-/list-3.21.0.tgz#42f76fafa56dea13b691ec8cab13832691d61da2"
-  integrity sha512-KehRjE83B1VaAbRRkRy6jLX1Cin8ltsrQ7FHf2bhwhRHK0S54YuA6LOoBnY/NtA8bHDX/Z+G5sMY78X30NS9tg==
+"@lerna/list@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/list/-/list-4.0.0.tgz#24b4e6995bd73f81c556793fe502b847efd9d1d7"
+  integrity sha512-L2B5m3P+U4Bif5PultR4TI+KtW+SArwq1i75QZ78mRYxPc0U/piau1DbLOmwrdqr99wzM49t0Dlvl6twd7GHFg==
   dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/listable" "3.18.5"
-    "@lerna/output" "3.13.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/listable" "4.0.0"
+    "@lerna/output" "4.0.0"
 
-"@lerna/listable@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/listable/-/listable-3.18.5.tgz#e82798405b5ed8fc51843c8ef1e7a0e497388a1a"
-  integrity sha512-Sdr3pVyaEv5A7ZkGGYR7zN+tTl2iDcinryBPvtuv20VJrXBE8wYcOks1edBTcOWsPjCE/rMP4bo1pseyk3UTsg==
+"@lerna/listable@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/listable/-/listable-4.0.0.tgz#d00d6cb4809b403f2b0374fc521a78e318b01214"
+  integrity sha512-/rPOSDKsOHs5/PBLINZOkRIX1joOXUXEtyUs5DHLM8q6/RP668x/1lFhw6Dx7/U+L0+tbkpGtZ1Yt0LewCLgeQ==
   dependencies:
-    "@lerna/query-graph" "3.18.5"
-    chalk "^2.3.1"
+    "@lerna/query-graph" "4.0.0"
+    chalk "^4.1.0"
     columnify "^1.5.4"
 
-"@lerna/log-packed@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/log-packed/-/log-packed-3.16.0.tgz#f83991041ee77b2495634e14470b42259fd2bc16"
-  integrity sha512-Fp+McSNBV/P2mnLUYTaSlG8GSmpXM7krKWcllqElGxvAqv6chk2K3c2k80MeVB4WvJ9tRjUUf+i7HUTiQ9/ckQ==
+"@lerna/log-packed@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/log-packed/-/log-packed-4.0.0.tgz#95168fe2e26ac6a71e42f4be857519b77e57a09f"
+  integrity sha512-+dpCiWbdzgMAtpajLToy9PO713IHoE6GV/aizXycAyA07QlqnkpaBNZ8DW84gHdM1j79TWockGJo9PybVhrrZQ==
   dependencies:
-    byte-size "^5.0.1"
+    byte-size "^7.0.0"
     columnify "^1.5.4"
     has-unicode "^2.0.1"
     npmlog "^4.1.2"
 
-"@lerna/npm-conf@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-conf/-/npm-conf-3.16.0.tgz#1c10a89ae2f6c2ee96962557738685300d376827"
-  integrity sha512-HbO3DUrTkCAn2iQ9+FF/eisDpWY5POQAOF1m7q//CZjdC2HSW3UYbKEGsSisFxSfaF9Z4jtrV+F/wX6qWs3CuA==
+"@lerna/npm-conf@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-conf/-/npm-conf-4.0.0.tgz#b259fd1e1cee2bf5402b236e770140ff9ade7fd2"
+  integrity sha512-uS7H02yQNq3oejgjxAxqq/jhwGEE0W0ntr8vM3EfpCW1F/wZruwQw+7bleJQ9vUBjmdXST//tk8mXzr5+JXCfw==
   dependencies:
-    config-chain "^1.1.11"
-    pify "^4.0.1"
+    config-chain "^1.1.12"
+    pify "^5.0.0"
 
-"@lerna/npm-dist-tag@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-dist-tag/-/npm-dist-tag-3.18.5.tgz#9ef9abb7c104077b31f6fab22cc73b314d54ac55"
-  integrity sha512-xw0HDoIG6HreVsJND9/dGls1c+lf6vhu7yJoo56Sz5bvncTloYGLUppIfDHQr4ZvmPCK8rsh0euCVh2giPxzKQ==
+"@lerna/npm-dist-tag@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-dist-tag/-/npm-dist-tag-4.0.0.tgz#d1e99b4eccd3414142f0548ad331bf2d53f3257a"
+  integrity sha512-F20sg28FMYTgXqEQihgoqSfwmq+Id3zT23CnOwD+XQMPSy9IzyLf1fFVH319vXIw6NF6Pgs4JZN2Qty6/CQXGw==
   dependencies:
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    "@lerna/otplease" "3.18.5"
-    figgy-pudding "^3.5.1"
-    npm-package-arg "^6.1.0"
+    "@lerna/otplease" "4.0.0"
+    npm-package-arg "^8.1.0"
+    npm-registry-fetch "^9.0.0"
     npmlog "^4.1.2"
 
-"@lerna/npm-install@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-install/-/npm-install-3.16.5.tgz#d6bfdc16f81285da66515ae47924d6e278d637d3"
-  integrity sha512-hfiKk8Eku6rB9uApqsalHHTHY+mOrrHeWEs+gtg7+meQZMTS3kzv4oVp5cBZigndQr3knTLjwthT/FX4KvseFg==
+"@lerna/npm-install@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-install/-/npm-install-4.0.0.tgz#31180be3ab3b7d1818a1a0c206aec156b7094c78"
+  integrity sha512-aKNxq2j3bCH3eXl3Fmu4D54s/YLL9WSwV8W7X2O25r98wzrO38AUN6AB9EtmAx+LV/SP15et7Yueg9vSaanRWg==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/get-npm-exec-opts" "3.13.0"
-    fs-extra "^8.1.0"
-    npm-package-arg "^6.1.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/get-npm-exec-opts" "4.0.0"
+    fs-extra "^9.1.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    signal-exit "^3.0.2"
-    write-pkg "^3.1.0"
-
-"@lerna/npm-publish@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-publish/-/npm-publish-3.18.5.tgz#240e4039959fd9816b49c5b07421e11b5cb000af"
-  integrity sha512-3etLT9+2L8JAx5F8uf7qp6iAtOLSMj+ZYWY6oUgozPi/uLqU0/gsMsEXh3F0+YVW33q0M61RpduBoAlOOZnaTg==
-  dependencies:
-    "@evocateur/libnpmpublish" "^1.2.2"
-    "@lerna/otplease" "3.18.5"
-    "@lerna/run-lifecycle" "3.16.2"
-    figgy-pudding "^3.5.1"
-    fs-extra "^8.1.0"
-    npm-package-arg "^6.1.0"
+    signal-exit "^3.0.3"
+    write-pkg "^4.0.0"
+
+"@lerna/npm-publish@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-publish/-/npm-publish-4.0.0.tgz#84eb62e876fe949ae1fd62c60804423dbc2c4472"
+  integrity sha512-vQb7yAPRo5G5r77DRjHITc9piR9gvEKWrmfCH7wkfBnGWEqu7n8/4bFQ7lhnkujvc8RXOsYpvbMQkNfkYibD/w==
+  dependencies:
+    "@lerna/otplease" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    fs-extra "^9.1.0"
+    libnpmpublish "^4.0.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    pify "^4.0.1"
-    read-package-json "^2.0.13"
+    pify "^5.0.0"
+    read-package-json "^3.0.0"
 
-"@lerna/npm-run-script@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/npm-run-script/-/npm-run-script-3.16.5.tgz#9c2ec82453a26c0b46edc0bb7c15816c821f5c15"
-  integrity sha512-1asRi+LjmVn3pMjEdpqKJZFT/3ZNpb+VVeJMwrJaV/3DivdNg7XlPK9LTrORuKU4PSvhdEZvJmSlxCKyDpiXsQ==
+"@lerna/npm-run-script@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/npm-run-script/-/npm-run-script-4.0.0.tgz#dfebf4f4601442e7c0b5214f9fb0d96c9350743b"
+  integrity sha512-Jmyh9/IwXJjOXqKfIgtxi0bxi1pUeKe5bD3S81tkcy+kyng/GNj9WSqD5ZggoNP2NP//s4CLDAtUYLdP7CU9rA==
   dependencies:
-    "@lerna/child-process" "3.16.5"
-    "@lerna/get-npm-exec-opts" "3.13.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/get-npm-exec-opts" "4.0.0"
     npmlog "^4.1.2"
 
-"@lerna/otplease@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/otplease/-/otplease-3.18.5.tgz#b77b8e760b40abad9f7658d988f3ea77d4fd0231"
-  integrity sha512-S+SldXAbcXTEDhzdxYLU0ZBKuYyURP/ND2/dK6IpKgLxQYh/z4ScljPDMyKymmEvgiEJmBsPZAAPfmNPEzxjog==
+"@lerna/otplease@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/otplease/-/otplease-4.0.0.tgz#84972eb43448f8a1077435ba1c5e59233b725850"
+  integrity sha512-Sgzbqdk1GH4psNiT6hk+BhjOfIr/5KhGBk86CEfHNJTk9BK4aZYyJD4lpDbDdMjIV4g03G7pYoqHzH765T4fxw==
   dependencies:
-    "@lerna/prompt" "3.18.5"
-    figgy-pudding "^3.5.1"
+    "@lerna/prompt" "4.0.0"
 
-"@lerna/output@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/output/-/output-3.13.0.tgz#3ded7cc908b27a9872228a630d950aedae7a4989"
-  integrity sha512-7ZnQ9nvUDu/WD+bNsypmPG5MwZBwu86iRoiW6C1WBuXXDxM5cnIAC1m2WxHeFnjyMrYlRXM9PzOQ9VDD+C15Rg==
+"@lerna/output@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/output/-/output-4.0.0.tgz#b1d72215c0e35483e4f3e9994debc82c621851f2"
+  integrity sha512-Un1sHtO1AD7buDQrpnaYTi2EG6sLF+KOPEAMxeUYG5qG3khTs2Zgzq5WE3dt2N/bKh7naESt20JjIW6tBELP0w==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/pack-directory@3.16.4":
-  version "3.16.4"
-  resolved "https://registry.yarnpkg.com/@lerna/pack-directory/-/pack-directory-3.16.4.tgz#3eae5f91bdf5acfe0384510ed53faddc4c074693"
-  integrity sha512-uxSF0HZeGyKaaVHz5FroDY9A5NDDiCibrbYR6+khmrhZtY0Bgn6hWq8Gswl9iIlymA+VzCbshWIMX4o2O8C8ng==
+"@lerna/pack-directory@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/pack-directory/-/pack-directory-4.0.0.tgz#8b617db95d20792f043aaaa13a9ccc0e04cb4c74"
+  integrity sha512-NJrmZNmBHS+5aM+T8N6FVbaKFScVqKlQFJNY2k7nsJ/uklNKsLLl6VhTQBPwMTbf6Tf7l6bcKzpy7aePuq9UiQ==
   dependencies:
-    "@lerna/get-packed" "3.16.0"
-    "@lerna/package" "3.16.0"
-    "@lerna/run-lifecycle" "3.16.2"
-    figgy-pudding "^3.5.1"
-    npm-packlist "^1.4.4"
+    "@lerna/get-packed" "4.0.0"
+    "@lerna/package" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    npm-packlist "^2.1.4"
     npmlog "^4.1.2"
-    tar "^4.4.10"
-    temp-write "^3.4.0"
+    tar "^6.1.0"
+    temp-write "^4.0.0"
 
-"@lerna/package-graph@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/package-graph/-/package-graph-3.18.5.tgz#c740e2ea3578d059e551633e950690831b941f6b"
-  integrity sha512-8QDrR9T+dBegjeLr+n9WZTVxUYUhIUjUgZ0gvNxUBN8S1WB9r6H5Yk56/MVaB64tA3oGAN9IIxX6w0WvTfFudA==
+"@lerna/package-graph@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/package-graph/-/package-graph-4.0.0.tgz#16a00253a8ac810f72041481cb46bcee8d8123dd"
+  integrity sha512-QED2ZCTkfXMKFoTGoccwUzjHtZMSf3UKX14A4/kYyBms9xfFsesCZ6SLI5YeySEgcul8iuIWfQFZqRw+Qrjraw==
   dependencies:
-    "@lerna/prerelease-id-from-version" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
-    npm-package-arg "^6.1.0"
+    "@lerna/prerelease-id-from-version" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    npm-package-arg "^8.1.0"
     npmlog "^4.1.2"
-    semver "^6.2.0"
+    semver "^7.3.4"
 
-"@lerna/package@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/package/-/package-3.16.0.tgz#7e0a46e4697ed8b8a9c14d59c7f890e0d38ba13c"
-  integrity sha512-2lHBWpaxcBoiNVbtyLtPUuTYEaB/Z+eEqRS9duxpZs6D+mTTZMNy6/5vpEVSCBmzvdYpyqhqaYjjSLvjjr5Riw==
+"@lerna/package@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/package/-/package-4.0.0.tgz#1b4c259c4bcff45c876ee1d591a043aacbc0d6b7"
+  integrity sha512-l0M/izok6FlyyitxiQKr+gZLVFnvxRQdNhzmQ6nRnN9dvBJWn+IxxpM+cLqGACatTnyo9LDzNTOj2Db3+s0s8Q==
   dependencies:
-    load-json-file "^5.3.0"
-    npm-package-arg "^6.1.0"
-    write-pkg "^3.1.0"
+    load-json-file "^6.2.0"
+    npm-package-arg "^8.1.0"
+    write-pkg "^4.0.0"
 
-"@lerna/prerelease-id-from-version@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/prerelease-id-from-version/-/prerelease-id-from-version-3.16.0.tgz#b24bfa789f5e1baab914d7b08baae9b7bd7d83a1"
-  integrity sha512-qZyeUyrE59uOK8rKdGn7jQz+9uOpAaF/3hbslJVFL1NqF9ELDTqjCPXivuejMX/lN4OgD6BugTO4cR7UTq/sZA==
+"@lerna/prerelease-id-from-version@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/prerelease-id-from-version/-/prerelease-id-from-version-4.0.0.tgz#c7e0676fcee1950d85630e108eddecdd5b48c916"
+  integrity sha512-GQqguzETdsYRxOSmdFZ6zDBXDErIETWOqomLERRY54f4p+tk4aJjoVdd9xKwehC9TBfIFvlRbL1V9uQGHh1opg==
   dependencies:
-    semver "^6.2.0"
+    semver "^7.3.4"
 
-"@lerna/profiler@3.20.0":
-  version "3.20.0"
-  resolved "https://registry.yarnpkg.com/@lerna/profiler/-/profiler-3.20.0.tgz#0f6dc236f4ea8f9ea5f358c6703305a4f32ad051"
-  integrity sha512-bh8hKxAlm6yu8WEOvbLENm42i2v9SsR4WbrCWSbsmOElx3foRnMlYk7NkGECa+U5c3K4C6GeBbwgqs54PP7Ljg==
+"@lerna/profiler@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/profiler/-/profiler-4.0.0.tgz#8a53ab874522eae15d178402bff90a14071908e9"
+  integrity sha512-/BaEbqnVh1LgW/+qz8wCuI+obzi5/vRE8nlhjPzdEzdmWmZXuCKyWSEzAyHOJWw1ntwMiww5dZHhFQABuoFz9Q==
   dependencies:
-    figgy-pudding "^3.5.1"
-    fs-extra "^8.1.0"
+    fs-extra "^9.1.0"
     npmlog "^4.1.2"
-    upath "^1.2.0"
+    upath "^2.0.1"
 
-"@lerna/project@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/project/-/project-3.21.0.tgz#5d784d2d10c561a00f20320bcdb040997c10502d"
-  integrity sha512-xT1mrpET2BF11CY32uypV2GPtPVm6Hgtha7D81GQP9iAitk9EccrdNjYGt5UBYASl4CIDXBRxwmTTVGfrCx82A==
+"@lerna/project@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/project/-/project-4.0.0.tgz#ff84893935833533a74deff30c0e64ddb7f0ba6b"
+  integrity sha512-o0MlVbDkD5qRPkFKlBZsXZjoNTWPyuL58564nSfZJ6JYNmgAptnWPB2dQlAc7HWRZkmnC2fCkEdoU+jioPavbg==
   dependencies:
-    "@lerna/package" "3.16.0"
-    "@lerna/validation-error" "3.13.0"
-    cosmiconfig "^5.1.0"
+    "@lerna/package" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    cosmiconfig "^7.0.0"
     dedent "^0.7.0"
-    dot-prop "^4.2.0"
-    glob-parent "^5.0.0"
-    globby "^9.2.0"
-    load-json-file "^5.3.0"
+    dot-prop "^6.0.1"
+    glob-parent "^5.1.1"
+    globby "^11.0.2"
+    load-json-file "^6.2.0"
     npmlog "^4.1.2"
-    p-map "^2.1.0"
-    resolve-from "^4.0.0"
-    write-json-file "^3.2.0"
+    p-map "^4.0.0"
+    resolve-from "^5.0.0"
+    write-json-file "^4.3.0"
 
-"@lerna/prompt@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/prompt/-/prompt-3.18.5.tgz#628cd545f225887d060491ab95df899cfc5218a1"
-  integrity sha512-rkKj4nm1twSbBEb69+Em/2jAERK8htUuV8/xSjN0NPC+6UjzAwY52/x9n5cfmpa9lyKf/uItp7chCI7eDmNTKQ==
+"@lerna/prompt@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/prompt/-/prompt-4.0.0.tgz#5ec69a803f3f0db0ad9f221dad64664d3daca41b"
+  integrity sha512-4Ig46oCH1TH5M7YyTt53fT6TuaKMgqUUaqdgxvp6HP6jtdak6+amcsqB8YGz2eQnw/sdxunx84DfI9XpoLj4bQ==
   dependencies:
-    inquirer "^6.2.0"
+    inquirer "^7.3.3"
     npmlog "^4.1.2"
 
-"@lerna/publish@3.22.1":
-  version "3.22.1"
-  resolved "https://registry.yarnpkg.com/@lerna/publish/-/publish-3.22.1.tgz#b4f7ce3fba1e9afb28be4a1f3d88222269ba9519"
-  integrity sha512-PG9CM9HUYDreb1FbJwFg90TCBQooGjj+n/pb3gw/eH5mEDq0p8wKdLFe0qkiqUkm/Ub5C8DbVFertIo0Vd0zcw==
-  dependencies:
-    "@evocateur/libnpmaccess" "^3.1.2"
-    "@evocateur/npm-registry-fetch" "^4.0.0"
-    "@evocateur/pacote" "^9.6.3"
-    "@lerna/check-working-tree" "3.16.5"
-    "@lerna/child-process" "3.16.5"
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/describe-ref" "3.16.5"
-    "@lerna/log-packed" "3.16.0"
-    "@lerna/npm-conf" "3.16.0"
-    "@lerna/npm-dist-tag" "3.18.5"
-    "@lerna/npm-publish" "3.18.5"
-    "@lerna/otplease" "3.18.5"
-    "@lerna/output" "3.13.0"
-    "@lerna/pack-directory" "3.16.4"
-    "@lerna/prerelease-id-from-version" "3.16.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/pulse-till-done" "3.13.0"
-    "@lerna/run-lifecycle" "3.16.2"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/validation-error" "3.13.0"
-    "@lerna/version" "3.22.1"
-    figgy-pudding "^3.5.1"
-    fs-extra "^8.1.0"
-    npm-package-arg "^6.1.0"
+"@lerna/publish@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/publish/-/publish-4.0.0.tgz#f67011305adeba120066a3b6d984a5bb5fceef65"
+  integrity sha512-K8jpqjHrChH22qtkytA5GRKIVFEtqBF6JWj1I8dWZtHs4Jywn8yB1jQ3BAMLhqmDJjWJtRck0KXhQQKzDK2UPg==
+  dependencies:
+    "@lerna/check-working-tree" "4.0.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/describe-ref" "4.0.0"
+    "@lerna/log-packed" "4.0.0"
+    "@lerna/npm-conf" "4.0.0"
+    "@lerna/npm-dist-tag" "4.0.0"
+    "@lerna/npm-publish" "4.0.0"
+    "@lerna/otplease" "4.0.0"
+    "@lerna/output" "4.0.0"
+    "@lerna/pack-directory" "4.0.0"
+    "@lerna/prerelease-id-from-version" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/pulse-till-done" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    "@lerna/version" "4.0.0"
+    fs-extra "^9.1.0"
+    libnpmaccess "^4.0.1"
+    npm-package-arg "^8.1.0"
+    npm-registry-fetch "^9.0.0"
     npmlog "^4.1.2"
-    p-finally "^1.0.0"
-    p-map "^2.1.0"
-    p-pipe "^1.2.0"
-    semver "^6.2.0"
+    p-map "^4.0.0"
+    p-pipe "^3.1.0"
+    pacote "^11.2.6"
+    semver "^7.3.4"
 
-"@lerna/pulse-till-done@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/pulse-till-done/-/pulse-till-done-3.13.0.tgz#c8e9ce5bafaf10d930a67d7ed0ccb5d958fe0110"
-  integrity sha512-1SOHpy7ZNTPulzIbargrgaJX387csN7cF1cLOGZiJQA6VqnS5eWs2CIrG8i8wmaUavj2QlQ5oEbRMVVXSsGrzA==
+"@lerna/pulse-till-done@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/pulse-till-done/-/pulse-till-done-4.0.0.tgz#04bace7d483a8205c187b806bcd8be23d7bb80a3"
+  integrity sha512-Frb4F7QGckaybRhbF7aosLsJ5e9WuH7h0KUkjlzSByVycxY91UZgaEIVjS2oN9wQLrheLMHl6SiFY0/Pvo0Cxg==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/query-graph@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/query-graph/-/query-graph-3.18.5.tgz#df4830bb5155273003bf35e8dda1c32d0927bd86"
-  integrity sha512-50Lf4uuMpMWvJ306be3oQDHrWV42nai9gbIVByPBYJuVW8dT8O8pA3EzitNYBUdLL9/qEVbrR0ry1HD7EXwtRA==
+"@lerna/query-graph@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/query-graph/-/query-graph-4.0.0.tgz#09dd1c819ac5ee3f38db23931143701f8a6eef63"
+  integrity sha512-YlP6yI3tM4WbBmL9GCmNDoeQyzcyg1e4W96y/PKMZa5GbyUvkS2+Jc2kwPD+5KcXou3wQZxSPzR3Te5OenaDdg==
   dependencies:
-    "@lerna/package-graph" "3.18.5"
-    figgy-pudding "^3.5.1"
+    "@lerna/package-graph" "4.0.0"
 
-"@lerna/resolve-symlink@3.16.0":
-  version "3.16.0"
-  resolved "https://registry.yarnpkg.com/@lerna/resolve-symlink/-/resolve-symlink-3.16.0.tgz#37fc7095fabdbcf317c26eb74e0d0bde8efd2386"
-  integrity sha512-Ibj5e7njVHNJ/NOqT4HlEgPFPtPLWsO7iu59AM5bJDcAJcR96mLZ7KGVIsS2tvaO7akMEJvt2P+ErwCdloG3jQ==
+"@lerna/resolve-symlink@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/resolve-symlink/-/resolve-symlink-4.0.0.tgz#6d006628a210c9b821964657a9e20a8c9a115e14"
+  integrity sha512-RtX8VEUzqT+uLSCohx8zgmjc6zjyRlh6i/helxtZTMmc4+6O4FS9q5LJas2uGO2wKvBlhcD6siibGt7dIC3xZA==
   dependencies:
-    fs-extra "^8.1.0"
+    fs-extra "^9.1.0"
     npmlog "^4.1.2"
-    read-cmd-shim "^1.0.1"
+    read-cmd-shim "^2.0.0"
 
-"@lerna/rimraf-dir@3.16.5":
-  version "3.16.5"
-  resolved "https://registry.yarnpkg.com/@lerna/rimraf-dir/-/rimraf-dir-3.16.5.tgz#04316ab5ffd2909657aaf388ea502cb8c2f20a09"
-  integrity sha512-bQlKmO0pXUsXoF8lOLknhyQjOZsCc0bosQDoX4lujBXSWxHVTg1VxURtWf2lUjz/ACsJVDfvHZbDm8kyBk5okA==
+"@lerna/rimraf-dir@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/rimraf-dir/-/rimraf-dir-4.0.0.tgz#2edf3b62d4eb0ef4e44e430f5844667d551ec25a"
+  integrity sha512-QNH9ABWk9mcMJh2/muD9iYWBk1oQd40y6oH+f3wwmVGKYU5YJD//+zMiBI13jxZRtwBx0vmBZzkBkK1dR11cBg==
   dependencies:
-    "@lerna/child-process" "3.16.5"
+    "@lerna/child-process" "4.0.0"
     npmlog "^4.1.2"
-    path-exists "^3.0.0"
-    rimraf "^2.6.2"
+    path-exists "^4.0.0"
+    rimraf "^3.0.2"
 
-"@lerna/run-lifecycle@3.16.2":
-  version "3.16.2"
-  resolved "https://registry.yarnpkg.com/@lerna/run-lifecycle/-/run-lifecycle-3.16.2.tgz#67b288f8ea964db9ea4fb1fbc7715d5bbb0bce00"
-  integrity sha512-RqFoznE8rDpyyF0rOJy3+KjZCeTkO8y/OB9orPauR7G2xQ7PTdCpgo7EO6ZNdz3Al+k1BydClZz/j78gNCmL2A==
+"@lerna/run-lifecycle@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/run-lifecycle/-/run-lifecycle-4.0.0.tgz#e648a46f9210a9bcd7c391df6844498cb5079334"
+  integrity sha512-IwxxsajjCQQEJAeAaxF8QdEixfI7eLKNm4GHhXHrgBu185JcwScFZrj9Bs+PFKxwb+gNLR4iI5rpUdY8Y0UdGQ==
   dependencies:
-    "@lerna/npm-conf" "3.16.0"
-    figgy-pudding "^3.5.1"
-    npm-lifecycle "^3.1.2"
+    "@lerna/npm-conf" "4.0.0"
+    npm-lifecycle "^3.1.5"
     npmlog "^4.1.2"
 
-"@lerna/run-topologically@3.18.5":
-  version "3.18.5"
-  resolved "https://registry.yarnpkg.com/@lerna/run-topologically/-/run-topologically-3.18.5.tgz#3cd639da20e967d7672cb88db0f756b92f2fdfc3"
-  integrity sha512-6N1I+6wf4hLOnPW+XDZqwufyIQ6gqoPfHZFkfWlvTQ+Ue7CuF8qIVQ1Eddw5HKQMkxqN10thKOFfq/9NQZ4NUg==
+"@lerna/run-topologically@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/run-topologically/-/run-topologically-4.0.0.tgz#af846eeee1a09b0c2be0d1bfb5ef0f7b04bb1827"
+  integrity sha512-EVZw9hGwo+5yp+VL94+NXRYisqgAlj0jWKWtAIynDCpghRxCE5GMO3xrQLmQgqkpUl9ZxQFpICgYv5DW4DksQA==
   dependencies:
-    "@lerna/query-graph" "3.18.5"
-    figgy-pudding "^3.5.1"
-    p-queue "^4.0.0"
+    "@lerna/query-graph" "4.0.0"
+    p-queue "^6.6.2"
 
-"@lerna/run@3.21.0":
-  version "3.21.0"
-  resolved "https://registry.yarnpkg.com/@lerna/run/-/run-3.21.0.tgz#2a35ec84979e4d6e42474fe148d32e5de1cac891"
-  integrity sha512-fJF68rT3veh+hkToFsBmUJ9MHc9yGXA7LSDvhziAojzOb0AI/jBDp6cEcDQyJ7dbnplba2Lj02IH61QUf9oW0Q==
-  dependencies:
-    "@lerna/command" "3.21.0"
-    "@lerna/filter-options" "3.20.0"
-    "@lerna/npm-run-script" "3.16.5"
-    "@lerna/output" "3.13.0"
-    "@lerna/profiler" "3.20.0"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/timer" "3.13.0"
-    "@lerna/validation-error" "3.13.0"
-    p-map "^2.1.0"
-
-"@lerna/symlink-binary@3.17.0":
-  version "3.17.0"
-  resolved "https://registry.yarnpkg.com/@lerna/symlink-binary/-/symlink-binary-3.17.0.tgz#8f8031b309863814883d3f009877f82e38aef45a"
-  integrity sha512-RLpy9UY6+3nT5J+5jkM5MZyMmjNHxZIZvXLV+Q3MXrf7Eaa1hNqyynyj4RO95fxbS+EZc4XVSk25DGFQbcRNSQ==
-  dependencies:
-    "@lerna/create-symlink" "3.16.2"
-    "@lerna/package" "3.16.0"
-    fs-extra "^8.1.0"
-    p-map "^2.1.0"
-
-"@lerna/symlink-dependencies@3.17.0":
-  version "3.17.0"
-  resolved "https://registry.yarnpkg.com/@lerna/symlink-dependencies/-/symlink-dependencies-3.17.0.tgz#48d6360e985865a0e56cd8b51b308a526308784a"
-  integrity sha512-KmjU5YT1bpt6coOmdFueTJ7DFJL4H1w5eF8yAQ2zsGNTtZ+i5SGFBWpb9AQaw168dydc3s4eu0W0Sirda+F59Q==
-  dependencies:
-    "@lerna/create-symlink" "3.16.2"
-    "@lerna/resolve-symlink" "3.16.0"
-    "@lerna/symlink-binary" "3.17.0"
-    fs-extra "^8.1.0"
-    p-finally "^1.0.0"
-    p-map "^2.1.0"
-    p-map-series "^1.0.0"
+"@lerna/run@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/run/-/run-4.0.0.tgz#4bc7fda055a729487897c23579694f6183c91262"
+  integrity sha512-9giulCOzlMPzcZS/6Eov6pxE9gNTyaXk0Man+iCIdGJNMrCnW7Dme0Z229WWP/UoxDKg71F2tMsVVGDiRd8fFQ==
+  dependencies:
+    "@lerna/command" "4.0.0"
+    "@lerna/filter-options" "4.0.0"
+    "@lerna/npm-run-script" "4.0.0"
+    "@lerna/output" "4.0.0"
+    "@lerna/profiler" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/timer" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    p-map "^4.0.0"
+
+"@lerna/symlink-binary@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/symlink-binary/-/symlink-binary-4.0.0.tgz#21009f62d53a425f136cb4c1a32c6b2a0cc02d47"
+  integrity sha512-zualodWC4q1QQc1pkz969hcFeWXOsVYZC5AWVtAPTDfLl+TwM7eG/O6oP+Rr3fFowspxo6b1TQ6sYfDV6HXNWA==
+  dependencies:
+    "@lerna/create-symlink" "4.0.0"
+    "@lerna/package" "4.0.0"
+    fs-extra "^9.1.0"
+    p-map "^4.0.0"
 
-"@lerna/timer@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/timer/-/timer-3.13.0.tgz#bcd0904551db16e08364d6c18e5e2160fc870781"
-  integrity sha512-RHWrDl8U4XNPqY5MQHkToWS9jHPnkLZEt5VD+uunCKTfzlxGnRCr3/zVr8VGy/uENMYpVP3wJa4RKGY6M0vkRw==
+"@lerna/symlink-dependencies@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/symlink-dependencies/-/symlink-dependencies-4.0.0.tgz#8910eca084ae062642d0490d8972cf2d98e9ebbd"
+  integrity sha512-BABo0MjeUHNAe2FNGty1eantWp8u83BHSeIMPDxNq0MuW2K3CiQRaeWT3EGPAzXpGt0+hVzBrA6+OT0GPn7Yuw==
+  dependencies:
+    "@lerna/create-symlink" "4.0.0"
+    "@lerna/resolve-symlink" "4.0.0"
+    "@lerna/symlink-binary" "4.0.0"
+    fs-extra "^9.1.0"
+    p-map "^4.0.0"
+    p-map-series "^2.1.0"
+
+"@lerna/timer@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/timer/-/timer-4.0.0.tgz#a52e51bfcd39bfd768988049ace7b15c1fd7a6da"
+  integrity sha512-WFsnlaE7SdOvjuyd05oKt8Leg3ENHICnvX3uYKKdByA+S3g+TCz38JsNs7OUZVt+ba63nC2nbXDlUnuT2Xbsfg==
 
-"@lerna/validation-error@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/validation-error/-/validation-error-3.13.0.tgz#c86b8f07c5ab9539f775bd8a54976e926f3759c3"
-  integrity sha512-SiJP75nwB8GhgwLKQfdkSnDufAaCbkZWJqEDlKOUPUvVOplRGnfL+BPQZH5nvq2BYSRXsksXWZ4UHVnQZI/HYA==
+"@lerna/validation-error@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/validation-error/-/validation-error-4.0.0.tgz#af9d62fe8304eaa2eb9a6ba1394f9aa807026d35"
+  integrity sha512-1rBOM5/koiVWlRi3V6dB863E1YzJS8v41UtsHgMr6gB2ncJ2LsQtMKlJpi3voqcgh41H8UsPXR58RrrpPpufyw==
   dependencies:
     npmlog "^4.1.2"
 
-"@lerna/version@3.22.1":
-  version "3.22.1"
-  resolved "https://registry.yarnpkg.com/@lerna/version/-/version-3.22.1.tgz#9805a9247a47ee62d6b81bd9fa5fb728b24b59e2"
-  integrity sha512-PSGt/K1hVqreAFoi3zjD0VEDupQ2WZVlVIwesrE5GbrL2BjXowjCsTDPqblahDUPy0hp6h7E2kG855yLTp62+g==
-  dependencies:
-    "@lerna/check-working-tree" "3.16.5"
-    "@lerna/child-process" "3.16.5"
-    "@lerna/collect-updates" "3.20.0"
-    "@lerna/command" "3.21.0"
-    "@lerna/conventional-commits" "3.22.0"
-    "@lerna/github-client" "3.22.0"
-    "@lerna/gitlab-client" "3.15.0"
-    "@lerna/output" "3.13.0"
-    "@lerna/prerelease-id-from-version" "3.16.0"
-    "@lerna/prompt" "3.18.5"
-    "@lerna/run-lifecycle" "3.16.2"
-    "@lerna/run-topologically" "3.18.5"
-    "@lerna/validation-error" "3.13.0"
-    chalk "^2.3.1"
+"@lerna/version@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/version/-/version-4.0.0.tgz#532659ec6154d8a8789c5ab53878663e244e3228"
+  integrity sha512-otUgiqs5W9zGWJZSCCMRV/2Zm2A9q9JwSDS7s/tlKq4mWCYriWo7+wsHEA/nPTMDyYyBO5oyZDj+3X50KDUzeA==
+  dependencies:
+    "@lerna/check-working-tree" "4.0.0"
+    "@lerna/child-process" "4.0.0"
+    "@lerna/collect-updates" "4.0.0"
+    "@lerna/command" "4.0.0"
+    "@lerna/conventional-commits" "4.0.0"
+    "@lerna/github-client" "4.0.0"
+    "@lerna/gitlab-client" "4.0.0"
+    "@lerna/output" "4.0.0"
+    "@lerna/prerelease-id-from-version" "4.0.0"
+    "@lerna/prompt" "4.0.0"
+    "@lerna/run-lifecycle" "4.0.0"
+    "@lerna/run-topologically" "4.0.0"
+    "@lerna/validation-error" "4.0.0"
+    chalk "^4.1.0"
     dedent "^0.7.0"
-    load-json-file "^5.3.0"
+    load-json-file "^6.2.0"
     minimatch "^3.0.4"
     npmlog "^4.1.2"
-    p-map "^2.1.0"
-    p-pipe "^1.2.0"
-    p-reduce "^1.0.0"
-    p-waterfall "^1.0.0"
-    semver "^6.2.0"
-    slash "^2.0.0"
-    temp-write "^3.4.0"
-    write-json-file "^3.2.0"
+    p-map "^4.0.0"
+    p-pipe "^3.1.0"
+    p-reduce "^2.1.0"
+    p-waterfall "^2.1.1"
+    semver "^7.3.4"
+    slash "^3.0.0"
+    temp-write "^4.0.0"
+    write-json-file "^4.3.0"
 
-"@lerna/write-log-file@3.13.0":
-  version "3.13.0"
-  resolved "https://registry.yarnpkg.com/@lerna/write-log-file/-/write-log-file-3.13.0.tgz#b78d9e4cfc1349a8be64d91324c4c8199e822a26"
-  integrity sha512-RibeMnDPvlL8bFYW5C8cs4mbI3AHfQef73tnJCQ/SgrXZHehmHnsyWUiE7qDQCAo+B1RfTapvSyFF69iPj326A==
+"@lerna/write-log-file@4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@lerna/write-log-file/-/write-log-file-4.0.0.tgz#18221a38a6a307d6b0a5844dd592ad53fa27091e"
+  integrity sha512-XRG5BloiArpXRakcnPHmEHJp+4AtnhRtpDIHSghmXD5EichI1uD73J7FgPp30mm2pDRq3FdqB0NbwSEsJ9xFQg==
   dependencies:
     npmlog "^4.1.2"
-    write-file-atomic "^2.3.0"
+    write-file-atomic "^3.0.3"
 
 "@mrmlnc/readdir-enhanced@^2.2.1":
   version "2.2.1"
@@ -1396,13 +1271,84 @@
     "@nodelib/fs.scandir" "2.1.4"
     fastq "^1.6.0"
 
-"@octokit/auth-token@^2.4.0":
+"@npmcli/ci-detect@^1.0.0":
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/@npmcli/ci-detect/-/ci-detect-1.3.0.tgz#6c1d2c625fb6ef1b9dea85ad0a5afcbef85ef22a"
+  integrity sha512-oN3y7FAROHhrAt7Rr7PnTSwrHrZVRTS2ZbyxeQwSSYD0ifwM3YNgQqbaRmjcWoPyq77MjchusjJDspbzMmip1Q==
+
+"@npmcli/git@^2.0.1":
+  version "2.0.9"
+  resolved "https://registry.yarnpkg.com/@npmcli/git/-/git-2.0.9.tgz#915bbfe66300e67b4da5ef765a4475ffb2ca5b6b"
+  integrity sha512-hTMbMryvOqGLwnmMBKs5usbPsJtyEsMsgXwJbmNrsEuQQh1LAIMDU77IoOrwkCg+NgQWl+ySlarJASwM3SutCA==
+  dependencies:
+    "@npmcli/promise-spawn" "^1.3.2"
+    lru-cache "^6.0.0"
+    mkdirp "^1.0.4"
+    npm-pick-manifest "^6.1.1"
+    promise-inflight "^1.0.1"
+    promise-retry "^2.0.1"
+    semver "^7.3.5"
+    which "^2.0.2"
+
+"@npmcli/installed-package-contents@^1.0.6":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@npmcli/installed-package-contents/-/installed-package-contents-1.0.7.tgz#ab7408c6147911b970a8abe261ce512232a3f4fa"
+  integrity sha512-9rufe0wnJusCQoLpV9ZPKIVP55itrM5BxOXs10DmdbRfgWtHy1LDyskbwRnBghuB0PrF7pNPOqREVtpz4HqzKw==
+  dependencies:
+    npm-bundled "^1.1.1"
+    npm-normalize-package-bin "^1.0.1"
+
+"@npmcli/move-file@^1.0.1":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-1.1.2.tgz#1a82c3e372f7cae9253eb66d72543d6b8685c674"
+  integrity sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg==
+  dependencies:
+    mkdirp "^1.0.4"
+    rimraf "^3.0.2"
+
+"@npmcli/node-gyp@^1.0.2":
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@npmcli/node-gyp/-/node-gyp-1.0.2.tgz#3cdc1f30e9736dbc417373ed803b42b1a0a29ede"
+  integrity sha512-yrJUe6reVMpktcvagumoqD9r08fH1iRo01gn1u0zoCApa9lnZGEigVKUd2hzsCId4gdtkZZIVscLhNxMECKgRg==
+
+"@npmcli/promise-spawn@^1.2.0", "@npmcli/promise-spawn@^1.3.2":
+  version "1.3.2"
+  resolved "https://registry.yarnpkg.com/@npmcli/promise-spawn/-/promise-spawn-1.3.2.tgz#42d4e56a8e9274fba180dabc0aea6e38f29274f5"
+  integrity sha512-QyAGYo/Fbj4MXeGdJcFzZ+FkDkomfRBrPM+9QYJSg+PxgAUL+LU3FneQk37rKR2/zjqkCV1BLHccX98wRXG3Sg==
+  dependencies:
+    infer-owner "^1.0.4"
+
+"@npmcli/run-script@^1.8.2":
+  version "1.8.5"
+  resolved "https://registry.yarnpkg.com/@npmcli/run-script/-/run-script-1.8.5.tgz#f250a0c5e1a08a792d775a315d0ff42fc3a51e1d"
+  integrity sha512-NQspusBCpTjNwNRFMtz2C5MxoxyzlbuJ4YEhxAKrIonTiirKDtatsZictx9RgamQIx6+QuHMNmPl0wQdoESs9A==
+  dependencies:
+    "@npmcli/node-gyp" "^1.0.2"
+    "@npmcli/promise-spawn" "^1.3.2"
+    infer-owner "^1.0.4"
+    node-gyp "^7.1.0"
+    read-package-json-fast "^2.0.1"
+
+"@octokit/auth-token@^2.4.4":
   version "2.4.5"
   resolved "https://registry.yarnpkg.com/@octokit/auth-token/-/auth-token-2.4.5.tgz#568ccfb8cb46f36441fac094ce34f7a875b197f3"
   integrity sha512-BpGYsPgJt05M7/L/5FoE1PiAbdxXFZkX/3kDYcsvd1v6UhlnE5e96dTDr0ezX/EFwciQxf3cNV0loipsURU+WA==
   dependencies:
     "@octokit/types" "^6.0.3"
 
+"@octokit/core@^3.2.3":
+  version "3.4.0"
+  resolved "https://registry.yarnpkg.com/@octokit/core/-/core-3.4.0.tgz#b48aa27d755b339fe7550548b340dcc2b513b742"
+  integrity sha512-6/vlKPP8NF17cgYXqucdshWqmMZGXkuvtcrWCgU5NOI0Pl2GjlmZyWgBMrU8zJ3v2MJlM6++CiB45VKYmhiWWg==
+  dependencies:
+    "@octokit/auth-token" "^2.4.4"
+    "@octokit/graphql" "^4.5.8"
+    "@octokit/request" "^5.4.12"
+    "@octokit/request-error" "^2.0.5"
+    "@octokit/types" "^6.0.3"
+    before-after-hook "^2.2.0"
+    universal-user-agent "^6.0.0"
+
 "@octokit/endpoint@^6.0.1":
   version "6.0.11"
   resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-6.0.11.tgz#082adc2aebca6dcefa1fb383f5efb3ed081949d1"
@@ -1412,46 +1358,46 @@
     is-plain-object "^5.0.0"
     universal-user-agent "^6.0.0"
 
-"@octokit/openapi-types@^6.0.0":
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-6.0.0.tgz#7da8d7d5a72d3282c1a3ff9f951c8133a707480d"
-  integrity sha512-CnDdK7ivHkBtJYzWzZm7gEkanA7gKH6a09Eguz7flHw//GacPJLmkHA3f3N++MJmlxD1Fl+mB7B32EEpSCwztQ==
+"@octokit/graphql@^4.5.8":
+  version "4.6.2"
+  resolved "https://registry.yarnpkg.com/@octokit/graphql/-/graphql-4.6.2.tgz#ec44abdfa87f2b9233282136ae33e4ba446a04e7"
+  integrity sha512-WmsIR1OzOr/3IqfG9JIczI8gMJUMzzyx5j0XXQ4YihHtKlQc+u35VpVoOXhlKAlaBntvry1WpAzPl/a+s3n89Q==
+  dependencies:
+    "@octokit/request" "^5.3.0"
+    "@octokit/types" "^6.0.3"
+    universal-user-agent "^6.0.0"
+
+"@octokit/openapi-types@^7.2.0":
+  version "7.2.1"
+  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-7.2.1.tgz#3ba1abe8906863edd403e185bc12e2bf79b3e240"
+  integrity sha512-IHQJpLciwzwDvciLxiFj3IEV5VYn7lSVcj5cu0jbTwMfK4IG6/g8SPrVp3Le1VRzIiYSRcBzm1dA7vgWelYP3Q==
 
 "@octokit/plugin-enterprise-rest@^6.0.1":
   version "6.0.1"
   resolved "https://registry.yarnpkg.com/@octokit/plugin-enterprise-rest/-/plugin-enterprise-rest-6.0.1.tgz#e07896739618dab8da7d4077c658003775f95437"
   integrity sha512-93uGjlhUD+iNg1iWhUENAtJata6w5nE+V4urXOAlIXdco6xNZtUSfYY8dzp3Udy74aqO/B5UZL80x/YMa5PKRw==
 
-"@octokit/plugin-paginate-rest@^1.1.1":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-1.1.2.tgz#004170acf8c2be535aba26727867d692f7b488fc"
-  integrity sha512-jbsSoi5Q1pj63sC16XIUboklNw+8tL9VOnJsWycWYR78TKss5PVpIPb1TUUcMQ+bBh7cY579cVAWmf5qG+dw+Q==
+"@octokit/plugin-paginate-rest@^2.6.2":
+  version "2.13.3"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.13.3.tgz#f0f1792230805108762d87906fb02d573b9e070a"
+  integrity sha512-46lptzM9lTeSmIBt/sVP/FLSTPGx6DCzAdSX3PfeJ3mTf4h9sGC26WpaQzMEq/Z44cOcmx8VsOhO+uEgE3cjYg==
   dependencies:
-    "@octokit/types" "^2.0.1"
+    "@octokit/types" "^6.11.0"
 
-"@octokit/plugin-request-log@^1.0.0":
+"@octokit/plugin-request-log@^1.0.2":
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.3.tgz#70a62be213e1edc04bb8897ee48c311482f9700d"
   integrity sha512-4RFU4li238jMJAzLgAwkBAw+4Loile5haQMQr+uhFq27BmyJXcXSKvoQKqh0agsZEiUlW6iSv3FAgvmGkur7OQ==
 
-"@octokit/plugin-rest-endpoint-methods@2.4.0":
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-2.4.0.tgz#3288ecf5481f68c494dd0602fc15407a59faf61e"
-  integrity sha512-EZi/AWhtkdfAYi01obpX0DF7U6b1VRr30QNQ5xSFPITMdLSfhcBqjamE3F+sKcxPbD7eZuMHu3Qkk2V+JGxBDQ==
+"@octokit/plugin-rest-endpoint-methods@5.0.1":
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.0.1.tgz#631b8d4edc6798b03489911252a25f2a4e58c594"
+  integrity sha512-vvWbPtPqLyIzJ7A4IPdTl+8IeuKAwMJ4LjvmqWOOdfSuqWQYZXq2CEd0hsnkidff2YfKlguzujHs/reBdAx8Sg==
   dependencies:
-    "@octokit/types" "^2.0.1"
+    "@octokit/types" "^6.13.1"
     deprecation "^2.3.1"
 
-"@octokit/request-error@^1.0.2":
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-1.2.1.tgz#ede0714c773f32347576c25649dc013ae6b31801"
-  integrity sha512-+6yDyk1EES6WK+l3viRDElw96MvwfJxCt45GvmjDUKWjYIb3PJZQkq3i46TwGwoPD4h8NmTrENmtyA1FwbmhRA==
-  dependencies:
-    "@octokit/types" "^2.0.0"
-    deprecation "^2.0.0"
-    once "^1.4.0"
-
-"@octokit/request-error@^2.0.0":
+"@octokit/request-error@^2.0.0", "@octokit/request-error@^2.0.5":
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-2.0.5.tgz#72cc91edc870281ad583a42619256b380c600143"
   integrity sha512-T/2wcCFyM7SkXzNoyVNWjyVlUwBvW3igM3Btr/eKYiPmucXTtkxt2RBsf6gn3LTzaLSLTQtNmvg+dGsOxQrjZg==
@@ -1460,55 +1406,34 @@
     deprecation "^2.0.0"
     once "^1.4.0"
 
-"@octokit/request@^5.2.0":
-  version "5.4.14"
-  resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.4.14.tgz#ec5f96f78333bb2af390afa5ff66f114b063bc96"
-  integrity sha512-VkmtacOIQp9daSnBmDI92xNIeLuSRDOIuplp/CJomkvzt7M18NXgG044Cx/LFKLgjKt9T2tZR6AtJayba9GTSA==
+"@octokit/request@^5.3.0", "@octokit/request@^5.4.12":
+  version "5.4.15"
+  resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.4.15.tgz#829da413dc7dd3aa5e2cdbb1c7d0ebe1f146a128"
+  integrity sha512-6UnZfZzLwNhdLRreOtTkT9n57ZwulCve8q3IT/Z477vThu6snfdkBuhxnChpOKNGxcQ71ow561Qoa6uqLdPtag==
   dependencies:
     "@octokit/endpoint" "^6.0.1"
     "@octokit/request-error" "^2.0.0"
     "@octokit/types" "^6.7.1"
-    deprecation "^2.0.0"
     is-plain-object "^5.0.0"
     node-fetch "^2.6.1"
-    once "^1.4.0"
     universal-user-agent "^6.0.0"
 
-"@octokit/rest@^16.28.4":
-  version "16.43.2"
-  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-16.43.2.tgz#c53426f1e1d1044dee967023e3279c50993dd91b"
-  integrity sha512-ngDBevLbBTFfrHZeiS7SAMAZ6ssuVmXuya+F/7RaVvlysgGa1JKJkKWY+jV6TCJYcW0OALfJ7nTIGXcBXzycfQ==
-  dependencies:
-    "@octokit/auth-token" "^2.4.0"
-    "@octokit/plugin-paginate-rest" "^1.1.1"
-    "@octokit/plugin-request-log" "^1.0.0"
-    "@octokit/plugin-rest-endpoint-methods" "2.4.0"
-    "@octokit/request" "^5.2.0"
-    "@octokit/request-error" "^1.0.2"
-    atob-lite "^2.0.0"
-    before-after-hook "^2.0.0"
-    btoa-lite "^1.0.0"
-    deprecation "^2.0.0"
-    lodash.get "^4.4.2"
-    lodash.set "^4.3.2"
-    lodash.uniq "^4.5.0"
-    octokit-pagination-methods "^1.1.0"
-    once "^1.4.0"
-    universal-user-agent "^4.0.0"
-
-"@octokit/types@^2.0.0", "@octokit/types@^2.0.1":
-  version "2.16.2"
-  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-2.16.2.tgz#4c5f8da3c6fecf3da1811aef678fda03edac35d2"
-  integrity sha512-O75k56TYvJ8WpAakWwYRN8Bgu60KrmX0z1KqFp1kNiFNkgW+JW+9EBKZ+S33PU6SLvbihqd+3drvPxKK68Ee8Q==
+"@octokit/rest@^18.1.0":
+  version "18.5.3"
+  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.5.3.tgz#6a2e6006a87ebbc34079c419258dd29ec9ff659d"
+  integrity sha512-KPAsUCr1DOdLVbZJgGNuE/QVLWEaVBpFQwDAz/2Cnya6uW2wJ/P5RVGk0itx7yyN1aGa8uXm2pri4umEqG1JBA==
   dependencies:
-    "@types/node" ">= 8"
+    "@octokit/core" "^3.2.3"
+    "@octokit/plugin-paginate-rest" "^2.6.2"
+    "@octokit/plugin-request-log" "^1.0.2"
+    "@octokit/plugin-rest-endpoint-methods" "5.0.1"
 
-"@octokit/types@^6.0.3", "@octokit/types@^6.7.1":
-  version "6.13.0"
-  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.13.0.tgz#779e5b7566c8dde68f2f6273861dd2f0409480d0"
-  integrity sha512-W2J9qlVIU11jMwKHUp5/rbVUeErqelCsO5vW5PKNb7wAXQVUz87Rc+imjlEvpvbH8yUb+KHmv8NEjVZdsdpyxA==
+"@octokit/types@^6.0.3", "@octokit/types@^6.11.0", "@octokit/types@^6.13.1", "@octokit/types@^6.7.1":
+  version "6.16.0"
+  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.16.0.tgz#15f71e391ca74e91a21b70e3a1b033c89625dca4"
+  integrity sha512-EktqSNq8EKXE82a7Vw33ozOEhFXIRik+rZHJTHAgVZRm/p2K5r5ecn5fVpRkLCm3CAVFwchRvt3yvtmfbt2LCQ==
   dependencies:
-    "@octokit/openapi-types" "^6.0.0"
+    "@octokit/openapi-types" "^7.2.0"
 
 "@sinonjs/commons@^1.7.0":
   version "1.8.3"
@@ -1517,14 +1442,39 @@
   dependencies:
     type-detect "4.0.8"
 
-"@sinonjs/fake-timers@^6.0.1":
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-6.0.1.tgz#293674fccb3262ac782c7aadfdeca86b10c75c40"
-  integrity sha512-MZPUxrmFubI36XS1DI3qmI0YdN1gks62JtFZvxR67ljjSNCeK6U08Zx4msEWOXuofgqUt6zPHSi1H9fbjR/NRA==
+"@sinonjs/fake-timers@^7.0.2":
+  version "7.1.0"
+  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-7.1.0.tgz#8f13af27d842cbf51ad4502e05562fe9391d084e"
+  integrity sha512-hAEzXi6Wbvlb67NnGMGSNOeAflLVnMa4yliPU/ty1qjgW/vAletH15/v/esJwASSIA0YlIyjnloenFbEZc9q9A==
   dependencies:
     "@sinonjs/commons" "^1.7.0"
 
-"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.7":
+"@tootallnate/once@1":
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82"
+  integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==
+
+"@tsconfig/node10@^1.0.7":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.7.tgz#1eb1de36c73478a2479cc661ef5af1c16d86d606"
+  integrity sha512-aBvUmXLQbayM4w3A8TrjwrXs4DZ8iduJnuJLLRGdkWlyakCf1q6uHZJBzXoRA/huAEknG5tcUyQxN3A+In5euQ==
+
+"@tsconfig/node12@^1.0.7":
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.7.tgz#677bd9117e8164dc319987dd6ff5fc1ba6fbf18b"
+  integrity sha512-dgasobK/Y0wVMswcipr3k0HpevxFJLijN03A8mYfEPvWvOs14v0ZlYTR4kIgMx8g4+fTyTFv8/jLCIfRqLDJ4A==
+
+"@tsconfig/node14@^1.0.0":
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.0.tgz#5bd046e508b1ee90bc091766758838741fdefd6e"
+  integrity sha512-RKkL8eTdPv6t5EHgFKIVQgsDapugbuOptNd9OOunN/HAkzmmTnZELx1kNCK0rSdUYGmiFMM3rRQMAWiyp023LQ==
+
+"@tsconfig/node16@^1.0.1":
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.1.tgz#a6ca6a9a0ff366af433f42f5f0e124794ff6b8f1"
+  integrity sha512-FTgBI767POY/lKNDNbIzgAX6miIDBs6NTCbdlDb8TrWovHsSvaVIZDlTqym29C6UqhzwcJx4CYr+AlrMywA0cA==
+
+"@types/babel__core@^7.0.0", "@types/babel__core@^7.1.14":
   version "7.1.14"
   resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.14.tgz#faaeefc4185ec71c389f4501ee5ec84b170cc402"
   integrity sha512-zGZJzzBUVDo/eV6KgbE0f0ZI7dInEYvo12Rb70uNQDshC3SkRMb67ja0GgRHZgAX3Za6rhaWlvbDO8rrGyAb1g==
@@ -1566,9 +1516,9 @@
     "@types/estree" "*"
 
 "@types/eslint@*":
-  version "7.2.10"
-  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.10.tgz#4b7a9368d46c0f8cd5408c23288a59aa2394d917"
-  integrity sha512-kUEPnMKrqbtpCq/KTaGFFKAcz6Ethm2EjCoKIDaCmfRBWLbFuTcOJfTlorwbnboXBzahqWLgUp1BQeKHiJzPUQ==
+  version "7.2.11"
+  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.11.tgz#180b58f5bb7d7376e39d22496e2b08901aa52fd2"
+  integrity sha512-WYhv//5K8kQtsSc9F1Kn2vHzhYor6KpwPbARH7hwYe3C3ETD0EVx/3P5qQybUoaBEuUa9f/02JjBiXFWalYUmw==
   dependencies:
     "@types/estree" "*"
     "@types/json-schema" "*"
@@ -1610,14 +1560,6 @@
   dependencies:
     "@types/istanbul-lib-coverage" "*"
 
-"@types/istanbul-reports@^1.1.1":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-1.1.2.tgz#e875cc689e47bce549ec81f3df5e6f6f11cfaeb2"
-  integrity sha512-P/W9yOX/3oPZSpaYOCQzGqgCQRXn0FFO/V8bWrCQs+wLmvVVxk6CRBXALEvNs9OHIatlnlFokfhuDo2ug01ciw==
-  dependencies:
-    "@types/istanbul-lib-coverage" "*"
-    "@types/istanbul-lib-report" "*"
-
 "@types/istanbul-reports@^3.0.0":
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.0.tgz#508b13aa344fa4976234e75dddcc34925737d821"
@@ -1638,7 +1580,7 @@
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
   integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
 
-"@types/minimatch@*":
+"@types/minimatch@*", "@types/minimatch@^3.0.3":
   version "3.0.4"
   resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.4.tgz#f0ec25dbf2f0e4b18647313ac031134ca5b24b21"
   integrity sha512-1z8k4wzFnNjVK/tlxvrWuK5WMt6mydWWP7+zvH5eFep4oj+UkrfiJTRtjCeBXNpwaA/FYqqtb4/QS4ianFpIRA==
@@ -1648,36 +1590,38 @@
   resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
   integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
 
-"@types/node@*", "@types/node@>= 8":
-  version "14.14.37"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.37.tgz#a3dd8da4eb84a996c36e331df98d82abd76b516e"
-  integrity sha512-XYmBiy+ohOR4Lh5jE379fV2IU+6Jn4g5qASinhitfyO71b/sCo6MKsMLF5tc7Zf2CE8hViVQyYSobJNke8OvUw==
+"@types/multistream@2.1.1":
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/@types/multistream/-/multistream-2.1.1.tgz#4badd2440ee3570594ea552420fe2e29ebe512bd"
+  integrity sha512-PqavtNFnMyXRZS5vuW16wMOKeJUCD5PIGHdNBHzF5Urjncsij90hRQ82Wcy9+uSdnmrR2Gfao6xoJVq1wAWzbA==
+  dependencies:
+    "@types/node" "*"
+
+"@types/node@*", "@types/node@^15.6.1":
+  version "15.6.1"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-15.6.1.tgz#32d43390d5c62c5b6ec486a9bc9c59544de39a08"
+  integrity sha512-7EIraBEyRHEe7CH+Fm1XvgqU6uwZN8Q7jppJGcqjROMT29qhAuuOxYB1uEY5UMYQKEmA5D+5tBnhdaPXSsLONA==
 
 "@types/node@^11.11.6":
-  version "11.15.50"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-11.15.50.tgz#a8c76622a20320d4a04adf2002b04737c510ef11"
-  integrity sha512-kG/ZmA/uD1L1gVD7vVXQB6v+ICZlJgvakrodHiltT3Zq0YjXq5H9tfgop8MsdMGCwrcLJg9QCQDRP4DZsn9T/g==
-
-"@types/node@^15.0.2":
-  version "15.0.2"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-15.0.2.tgz#51e9c0920d1b45936ea04341aa3e2e58d339fb67"
-  integrity sha512-p68+a+KoxpoB47015IeYZYRrdqMUcpbK8re/zpFB8Ld46LHC1lPEbp3EXgkEhAYEcPvjJF6ZO+869SQ0aH1dcA==
+  version "11.15.54"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-11.15.54.tgz#59ed60e7b0d56905a654292e8d73275034eb6283"
+  integrity sha512-1RWYiq+5UfozGsU6MwJyFX6BtktcT10XRjvcAQmskCtMcW3tPske88lM/nHv7BQG1w9KBXI1zPGuu5PnNCX14g==
 
 "@types/normalize-package-data@^2.4.0":
   version "2.4.0"
   resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
   integrity sha512-f5j5b/Gf71L+dbqxIpQ4Z2WlmI/mPJ0fOkGGmFgtb6sAu97EPczzbS3/tJKxmcYDj55OX6ssqwDAWOHIYDRDGA==
 
-"@types/prettier@^2.0.0":
+"@types/parse-json@^4.0.0":
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0"
+  integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==
+
+"@types/prettier@^2.1.5":
   version "2.2.3"
   resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.2.3.tgz#ef65165aea2924c9359205bf748865b8881753c0"
   integrity sha512-PijRCG/K3s3w1We6ynUKdxEc5AcuuH3NBmMDP8uvKVp6X43UY7NQlTzczakXP3DJR0F4dfNQIGjU2cUeRYs2AA==
 
-"@types/stack-utils@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-1.0.1.tgz#0a851d3bd96498fa25c33ab7278ed3bd65f06c3e"
-  integrity sha512-l42BggppR6zLmpfU6fq9HEa2oGPEI8yrSPL3GITjfRInppYFahObbIQOQK3UGxEnyQpltZLaPe75046NOZQikw==
-
 "@types/stack-utils@^2.0.0":
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
@@ -1688,13 +1632,6 @@
   resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.0.tgz#dd3e6699ba3237f0348cd085e4698780204842f9"
   integrity sha512-37RSHht+gzzgYeobbG+KWryeAW8J33Nhr69cjTqSYymXVZEN9NbRYWoYlRtDhHKPVT1FyNKwaTPC1NynKZpzRA==
 
-"@types/yargs@^13.0.0":
-  version "13.0.11"
-  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-13.0.11.tgz#def2f0c93e4bdf2c61d7e34899b17e34be28d3b1"
-  integrity sha512-NRqD6T4gktUrDi1o1wLH3EKC1o2caCr7/wR87ODcbVITQF106OM3sFN92ysZ++wqelOd1CTzatnOBRDYYG6wGQ==
-  dependencies:
-    "@types/yargs-parser" "*"
-
 "@types/yargs@^15.0.0":
   version "15.0.13"
   resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.13.tgz#34f7fec8b389d7f3c1fd08026a5763e072d3c6dc"
@@ -1702,13 +1639,20 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@^4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.22.1.tgz#6bcdbaa4548553ab861b4e5f34936ead1349a543"
-  integrity sha512-kVTAghWDDhsvQ602tHBc6WmQkdaYbkcTwZu+7l24jtJiYvm9l+/y/b2BZANEezxPDiX5MK2ZecE+9BFi/YJryw==
+"@types/yargs@^16.0.0":
+  version "16.0.3"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-16.0.3.tgz#4b6d35bb8e680510a7dc2308518a80ee1ef27e01"
+  integrity sha512-YlFfTGS+zqCgXuXNV26rOIeETOkXnGQXP/pjjL9P0gO/EP9jTmc7pUBhx+jVEIxpq41RX33GQ7N3DzOSfZoglQ==
   dependencies:
-    "@typescript-eslint/experimental-utils" "4.22.1"
-    "@typescript-eslint/scope-manager" "4.22.1"
+    "@types/yargs-parser" "*"
+
+"@typescript-eslint/eslint-plugin@4.25.0":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.25.0.tgz#d82657b6ab4caa4c3f888ff923175fadc2f31f2a"
+  integrity sha512-Qfs3dWkTMKkKwt78xp2O/KZQB8MPS1UQ5D3YW2s6LQWBE1074BE+Rym+b1pXZIX3M3fSvPUDaCvZLKV2ylVYYQ==
+  dependencies:
+    "@typescript-eslint/experimental-utils" "4.25.0"
+    "@typescript-eslint/scope-manager" "4.25.0"
     debug "^4.1.1"
     functional-red-black-tree "^1.0.1"
     lodash "^4.17.15"
@@ -1716,106 +1660,60 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
-"@typescript-eslint/experimental-utils@4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.22.1.tgz#3938a5c89b27dc9a39b5de63a62ab1623ab27497"
-  integrity sha512-svYlHecSMCQGDO2qN1v477ax/IDQwWhc7PRBiwAdAMJE7GXk5stF4Z9R/8wbRkuX/5e9dHqbIWxjeOjckK3wLQ==
+"@typescript-eslint/experimental-utils@4.25.0", "@typescript-eslint/experimental-utils@^4.0.1":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.25.0.tgz#b2febcfa715d2c1806fd5f0335193a6cd270df54"
+  integrity sha512-f0doRE76vq7NEEU0tw+ajv6CrmPelw5wLoaghEHkA2dNLFb3T/zJQqGPQ0OYt5XlZaS13MtnN+GTPCuUVg338w==
   dependencies:
     "@types/json-schema" "^7.0.3"
-    "@typescript-eslint/scope-manager" "4.22.1"
-    "@typescript-eslint/types" "4.22.1"
-    "@typescript-eslint/typescript-estree" "4.22.1"
+    "@typescript-eslint/scope-manager" "4.25.0"
+    "@typescript-eslint/types" "4.25.0"
+    "@typescript-eslint/typescript-estree" "4.25.0"
     eslint-scope "^5.0.0"
     eslint-utils "^2.0.0"
 
-"@typescript-eslint/experimental-utils@^4.0.1":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.21.0.tgz#0b0bb7c15d379140a660c003bdbafa71ae9134b6"
-  integrity sha512-cEbgosW/tUFvKmkg3cU7LBoZhvUs+ZPVM9alb25XvR0dal4qHL3SiUqHNrzoWSxaXA9gsifrYrS1xdDV6w/gIA==
+"@typescript-eslint/parser@4.25.0":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.25.0.tgz#6b2cb6285aa3d55bfb263c650739091b0f19aceb"
+  integrity sha512-OZFa1SKyEJpAhDx8FcbWyX+vLwh7OEtzoo2iQaeWwxucyfbi0mT4DijbOSsTgPKzGHr6GrF2V5p/CEpUH/VBxg==
   dependencies:
-    "@types/json-schema" "^7.0.3"
-    "@typescript-eslint/scope-manager" "4.21.0"
-    "@typescript-eslint/types" "4.21.0"
-    "@typescript-eslint/typescript-estree" "4.21.0"
-    eslint-scope "^5.0.0"
-    eslint-utils "^2.0.0"
-
-"@typescript-eslint/parser@^4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.22.1.tgz#a95bda0fd01d994a15fc3e99dc984294f25c19cc"
-  integrity sha512-l+sUJFInWhuMxA6rtirzjooh8cM/AATAe3amvIkqKFeMzkn85V+eLzb1RyuXkHak4dLfYzOmF6DXPyflJvjQnw==
-  dependencies:
-    "@typescript-eslint/scope-manager" "4.22.1"
-    "@typescript-eslint/types" "4.22.1"
-    "@typescript-eslint/typescript-estree" "4.22.1"
+    "@typescript-eslint/scope-manager" "4.25.0"
+    "@typescript-eslint/types" "4.25.0"
+    "@typescript-eslint/typescript-estree" "4.25.0"
     debug "^4.1.1"
 
-"@typescript-eslint/scope-manager@4.21.0":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.21.0.tgz#c81b661c4b8af1ec0c010d847a8f9ab76ab95b4d"
-  integrity sha512-kfOjF0w1Ix7+a5T1knOw00f7uAP9Gx44+OEsNQi0PvvTPLYeXJlsCJ4tYnDj5PQEYfpcgOH5yBlw7K+UEI9Agw==
-  dependencies:
-    "@typescript-eslint/types" "4.21.0"
-    "@typescript-eslint/visitor-keys" "4.21.0"
-
-"@typescript-eslint/scope-manager@4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.22.1.tgz#5bb357f94f9cd8b94e6be43dd637eb73b8f355b4"
-  integrity sha512-d5bAiPBiessSmNi8Amq/RuLslvcumxLmyhf1/Xa9IuaoFJ0YtshlJKxhlbY7l2JdEk3wS0EnmnfeJWSvADOe0g==
+"@typescript-eslint/scope-manager@4.25.0":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.25.0.tgz#9d86a5bcc46ef40acd03d85ad4e908e5aab8d4ca"
+  integrity sha512-2NElKxMb/0rya+NJG1U71BuNnp1TBd1JgzYsldsdA83h/20Tvnf/HrwhiSlNmuq6Vqa0EzidsvkTArwoq+tH6w==
   dependencies:
-    "@typescript-eslint/types" "4.22.1"
-    "@typescript-eslint/visitor-keys" "4.22.1"
+    "@typescript-eslint/types" "4.25.0"
+    "@typescript-eslint/visitor-keys" "4.25.0"
 
-"@typescript-eslint/types@4.21.0":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.21.0.tgz#abdc3463bda5d31156984fa5bc316789c960edef"
-  integrity sha512-+OQaupjGVVc8iXbt6M1oZMwyKQNehAfLYJJ3SdvnofK2qcjfor9pEM62rVjBknhowTkh+2HF+/KdRAc/wGBN2w==
+"@typescript-eslint/types@4.25.0":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.25.0.tgz#0e444a5c5e3c22d7ffa5e16e0e60510b3de5af87"
+  integrity sha512-+CNINNvl00OkW6wEsi32wU5MhHti2J25TJsJJqgQmJu3B3dYDBcmOxcE5w9cgoM13TrdE/5ND2HoEnBohasxRQ==
 
-"@typescript-eslint/types@4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.22.1.tgz#bf99c6cec0b4a23d53a61894816927f2adad856a"
-  integrity sha512-2HTkbkdAeI3OOcWbqA8hWf/7z9c6gkmnWNGz0dKSLYLWywUlkOAQ2XcjhlKLj5xBFDf8FgAOF5aQbnLRvgNbCw==
-
-"@typescript-eslint/typescript-estree@4.21.0":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.21.0.tgz#3817bd91857beeaeff90f69f1f112ea58d350b0a"
-  integrity sha512-ZD3M7yLaVGVYLw4nkkoGKumb7Rog7QID9YOWobFDMQKNl+vPxqVIW/uDk+MDeGc+OHcoG2nJ2HphwiPNajKw3w==
-  dependencies:
-    "@typescript-eslint/types" "4.21.0"
-    "@typescript-eslint/visitor-keys" "4.21.0"
-    debug "^4.1.1"
-    globby "^11.0.1"
-    is-glob "^4.0.1"
-    semver "^7.3.2"
-    tsutils "^3.17.1"
-
-"@typescript-eslint/typescript-estree@4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.22.1.tgz#dca379eead8cdfd4edc04805e83af6d148c164f9"
-  integrity sha512-p3We0pAPacT+onSGM+sPR+M9CblVqdA9F1JEdIqRVlxK5Qth4ochXQgIyb9daBomyQKAXbygxp1aXQRV0GC79A==
+"@typescript-eslint/typescript-estree@4.25.0":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.25.0.tgz#942e4e25888736bff5b360d9b0b61e013d0cfa25"
+  integrity sha512-1B8U07TGNAFMxZbSpF6jqiDs1cVGO0izVkf18Q/SPcUAc9LhHxzvSowXDTvkHMWUVuPpagupaW63gB6ahTXVlg==
   dependencies:
-    "@typescript-eslint/types" "4.22.1"
-    "@typescript-eslint/visitor-keys" "4.22.1"
+    "@typescript-eslint/types" "4.25.0"
+    "@typescript-eslint/visitor-keys" "4.25.0"
     debug "^4.1.1"
     globby "^11.0.1"
     is-glob "^4.0.1"
     semver "^7.3.2"
     tsutils "^3.17.1"
 
-"@typescript-eslint/visitor-keys@4.21.0":
-  version "4.21.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.21.0.tgz#990a9acdc124331f5863c2cf21c88ba65233cd8d"
-  integrity sha512-dH22dROWGi5Z6p+Igc8bLVLmwy7vEe8r+8c+raPQU0LxgogPUrRAtRGtvBWmlr9waTu3n+QLt/qrS/hWzk1x5w==
-  dependencies:
-    "@typescript-eslint/types" "4.21.0"
-    eslint-visitor-keys "^2.0.0"
-
-"@typescript-eslint/visitor-keys@4.22.1":
-  version "4.22.1"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.22.1.tgz#6045ae25a11662c671f90b3a403d682dfca0b7a6"
-  integrity sha512-WPkOrIRm+WCLZxXQHCi+WG8T2MMTUFR70rWjdWYddLT7cEfb2P4a3O/J2U1FBVsSFTocXLCoXWY6MZGejeStvQ==
+"@typescript-eslint/visitor-keys@4.25.0":
+  version "4.25.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.25.0.tgz#863e7ed23da4287c5b469b13223255d0fde6aaa7"
+  integrity sha512-AmkqV9dDJVKP/TcZrbf6s6i1zYXt5Hl8qOLrRDTFfRNae4+LB8A4N3i+FLZPW85zIxRy39BgeWOfMS3HoH5ngg==
   dependencies:
-    "@typescript-eslint/types" "4.22.1"
+    "@typescript-eslint/types" "4.25.0"
     eslint-visitor-keys "^2.0.0"
 
 "@webassemblyjs/ast@1.11.0":
@@ -1949,16 +1847,7 @@
   resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d"
   integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==
 
-"@zkochan/cmd-shim@^3.1.0":
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/@zkochan/cmd-shim/-/cmd-shim-3.1.0.tgz#2ab8ed81f5bb5452a85f25758eb9b8681982fd2e"
-  integrity sha512-o8l0+x7C7sMZU3v9GuJIAU10qQLtwR1dtRQIOmlNMtyaqhmpXOzx1HWiYoWfmmf9HHZoAkXpc9TM9PQYF9d4Jg==
-  dependencies:
-    is-windows "^1.0.0"
-    mkdirp-promise "^5.0.1"
-    mz "^2.5.0"
-
-JSONStream@^1.0.4, JSONStream@^1.3.4:
+JSONStream@^1.0.4:
   version "1.3.5"
   resolved "https://registry.yarnpkg.com/JSONStream/-/JSONStream-1.3.5.tgz#3208c1f08d3a4d99261ab64f92302bc15e111ca0"
   integrity sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ==
@@ -2004,35 +1893,30 @@ acorn@^7.1.1, acorn@^7.4.0:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa"
   integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
 
-acorn@^8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.1.0.tgz#52311fd7037ae119cbb134309e901aa46295b3fe"
-  integrity sha512-LWCF/Wn0nfHOmJ9rzQApGnxnvgfROzGilS8936rqN/lfcYkY9MYZzdMqN+2NJ4SlTc+m5HiSa+kNfDtI64dwUA==
-
-acorn@^8.2.1:
+acorn@^8.2.1, acorn@^8.2.4:
   version "8.2.4"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.2.4.tgz#caba24b08185c3b56e3168e97d15ed17f4d31fd0"
   integrity sha512-Ibt84YwBDDA890eDiDCEqcbwvHlBvzzDkU2cGBBDDI1QWT12jTiXIOn2CIw5KK4i6N5Z2HUxwYjzriDyqaqqZg==
 
-agent-base@4, agent-base@^4.3.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee"
-  integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==
-  dependencies:
-    es6-promisify "^5.0.0"
+add-stream@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/add-stream/-/add-stream-1.0.0.tgz#6a7990437ca736d5e1288db92bd3266d5f5cb2aa"
+  integrity sha1-anmQQ3ynNtXhKI25K9MmbV9csqo=
 
-agent-base@~4.2.1:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.2.1.tgz#d89e5999f797875674c07d87f260fc41e83e8ca9"
-  integrity sha512-JVwXMr9nHYTUXsBFKUqhJwvlcYU/blreOEUkhNR2eXZIvwd+c+o5V4MgDPKWnMS/56awN3TRzIP+KoPn+roQtg==
+agent-base@6:
+  version "6.0.2"
+  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77"
+  integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==
   dependencies:
-    es6-promisify "^5.0.0"
+    debug "4"
 
-agentkeepalive@^3.4.1:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-3.5.2.tgz#a113924dd3fa24a0bc3b78108c450c2abee00f67"
-  integrity sha512-e0L/HNe6qkQ7H19kTlRRqUibEAwDK5AFk6y3PtMsuut2VAH6+Q4xZml1tNDJD7kSAyqmbG/K08K5WEJYtUrSlQ==
+agentkeepalive@^4.1.3:
+  version "4.1.4"
+  resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.1.4.tgz#d928028a4862cb11718e55227872e842a44c945b"
+  integrity sha512-+V/rGa3EuU74H6wR04plBb7Ks10FbtUQgRj/FQOG7uUIEuaINI+AiqJR1k6t3SVNs7o7ZjIdus6706qqzVq8jQ==
   dependencies:
+    debug "^4.1.0"
+    depd "^1.1.2"
     humanize-ms "^1.2.1"
 
 aggregate-error@^3.0.0:
@@ -2059,9 +1943,9 @@ ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4, ajv@^6.12.5:
     uri-js "^4.2.2"
 
 ajv@^8.0.1:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.1.0.tgz#45d5d3d36c7cdd808930cc3e603cf6200dbeb736"
-  integrity sha512-B/Sk2Ix7A36fs/ZkuGLIR86EdjbgR6fsAcbx9lOP/QBSXujDNbVmIS/U4Itz5k8fPFDeVZl/zQ/gJW4Jrq6XjQ==
+  version "8.5.0"
+  resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.5.0.tgz#695528274bcb5afc865446aa275484049a18ae4b"
+  integrity sha512-Y2l399Tt1AguU3BPRP9Fn4eN+Or+StUGWCUpbnFyXSo8NZ9S4uj+AG2pjs5apK+ZMOwYOz1+a+VKvKH7CudXgQ==
   dependencies:
     fast-deep-equal "^3.1.1"
     json-schema-traverse "^1.0.0"
@@ -2085,11 +1969,6 @@ ansi-colors@^4.1.1:
   resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.1.tgz#cbb9ae256bf750af1eab344f229aa27fe94ba348"
   integrity sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==
 
-ansi-escapes@^3.2.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-3.2.0.tgz#8780b98ff9dbf5638152d1f1fe5c1d7b4442976b"
-  integrity sha512-cBhpre4ma+U0T1oM5fXg7Dy1Jw7zzwv7lt/GoCpr+hDQJoYnKVPLL4dCvSEFMmQurOQvSrwT7SL/DAlhBI97RQ==
-
 ansi-escapes@^4.2.1, ansi-escapes@^4.3.0:
   version "4.3.2"
   resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e"
@@ -2114,17 +1993,12 @@ ansi-regex@^3.0.0:
   resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-3.0.0.tgz#ed0317c322064f79466c02966bddb605ab37d998"
   integrity sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=
 
-ansi-regex@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-4.1.0.tgz#8b9f8f08cf1acb843756a839ca8c7e3168c51997"
-  integrity sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==
-
 ansi-regex@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.0.tgz#388539f55179bf39339c81af30a654d69f87cb75"
   integrity sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==
 
-ansi-styles@^3.2.0, ansi-styles@^3.2.1:
+ansi-styles@^3.2.1:
   version "3.2.1"
   resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-3.2.1.tgz#41fbb20243e50b12be0f04b8dedbf07520ce841d"
   integrity sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==
@@ -2138,16 +2012,16 @@ ansi-styles@^4.0.0, ansi-styles@^4.1.0:
   dependencies:
     color-convert "^2.0.1"
 
+ansi-styles@^5.0.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-5.2.0.tgz#07449690ad45777d1924ac2abb2fc8895dba836b"
+  integrity sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==
+
 ansi-wrap@0.1.0, ansi-wrap@^0.1.0:
   version "0.1.0"
   resolved "https://registry.yarnpkg.com/ansi-wrap/-/ansi-wrap-0.1.0.tgz#a82250ddb0015e9a27ca82e82ea603bbfa45efaf"
   integrity sha1-qCJQ3bABXponyoLoLqYDu/pF768=
 
-any-promise@^1.0.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/any-promise/-/any-promise-1.3.0.tgz#abc6afeedcea52e809cdc0376aed3ce39635d17f"
-  integrity sha1-q8av7tzqUugJzcA3au0845Y10X8=
-
 anymatch@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-2.0.0.tgz#bcb24b4f37934d9aa7ac17b4adaf89e7c76ef2eb"
@@ -2171,7 +2045,7 @@ append-buffer@^1.0.2:
   dependencies:
     buffer-equal "^1.0.0"
 
-aproba@^1.0.3, aproba@^1.1.1:
+aproba@^1.0.3:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/aproba/-/aproba-1.2.0.tgz#6802e6264efd18c790a1b0d517f0f2627bf2c94a"
   integrity sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==
@@ -2241,14 +2115,14 @@ array-back@^3.0.1:
   integrity sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==
 
 array-back@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/array-back/-/array-back-4.0.1.tgz#9b80312935a52062e1a233a9c7abeb5481b30e90"
-  integrity sha512-Z/JnaVEXv+A9xabHzN43FiiiWEE7gPCRXMrVmRm00tWbjZRul1iHm7ECzlyNq1p4a4ATXz+G9FJ3GqGOkOV3fg==
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/array-back/-/array-back-4.0.2.tgz#8004e999a6274586beeb27342168652fdb89fa1e"
+  integrity sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==
 
-array-differ@^2.0.3:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/array-differ/-/array-differ-2.1.0.tgz#4b9c1c3f14b906757082925769e8ab904f4801b1"
-  integrity sha512-KbUpJgx909ZscOc/7CLATBFam7P1Z1QRQInvgT0UztM9Q72aGKCunKASAl7WNW0tnPmPyEMeMhdsfWhfmW037w==
+array-differ@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/array-differ/-/array-differ-3.0.0.tgz#3cbb3d0f316810eafcc47624734237d6aee4ae6b"
+  integrity sha512-THtfYS6KtME/yIAhKjZ2ul7XI96lQGHRputJQHO80LAWQnuGP4iCIN8vdMRboGbIEYBwU33q8Tch1os2+X0kMg==
 
 array-each@^1.0.0, array-each@^1.0.1:
   version "1.0.1"
@@ -2353,17 +2227,7 @@ astral-regex@^2.0.0:
   resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31"
   integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==
 
-async-done@1.3.1:
-  version "1.3.1"
-  resolved "https://registry.yarnpkg.com/async-done/-/async-done-1.3.1.tgz#14b7b73667b864c8f02b5b253fc9c6eddb777f3e"
-  integrity sha512-R1BaUeJ4PMoLNJuk+0tLJgjmEqVsdN118+Z8O+alhnQDQgy0kmD5Mqi0DNEmMx2LM0Ed5yekKu+ZXYvIHceicg==
-  dependencies:
-    end-of-stream "^1.1.0"
-    once "^1.3.2"
-    process-nextick-args "^1.0.7"
-    stream-exhaust "^1.0.1"
-
-async-done@^1.2.0, async-done@^1.2.2:
+async-done@1.3.2, async-done@^1.2.0, async-done@^1.2.2:
   version "1.3.2"
   resolved "https://registry.yarnpkg.com/async-done/-/async-done-1.3.2.tgz#5e15aa729962a4b07414f528a88cdf18e0b290a2"
   integrity sha512-uYkTP8dw2og1tu1nmza1n1CMW0qb8gWWlwqMmLb7MhBVs4BXrFziT6HXUd+/RlRA/i4H9AkofYloUbs1fwMqlw==
@@ -2395,11 +2259,6 @@ at-least-node@^1.0.0:
   resolved "https://registry.yarnpkg.com/at-least-node/-/at-least-node-1.0.0.tgz#602cd4b46e844ad4effc92a8011a3c46e0238dc2"
   integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==
 
-atob-lite@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/atob-lite/-/atob-lite-2.0.0.tgz#0fef5ad46f1bd7a8502c65727f0367d5ee43d696"
-  integrity sha1-D+9a1G8b16hQLGVyfwNn1e5D1pY=
-
 atob@^2.1.2:
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9"
@@ -2415,16 +2274,16 @@ aws4@^1.8.0:
   resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
   integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
 
-babel-jest@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-26.6.3.tgz#d87d25cb0037577a0c89f82e5755c5d293c01056"
-  integrity sha512-pl4Q+GAVOHwvjrck6jKjvmGhnO3jHX/xuB9d27f+EJZ/6k+6nMuPjorrYp7s++bKKdANwzElBWnLWaObvTnaZA==
+babel-jest@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-27.0.1.tgz#9f1c4571ac17a39e599d1325dcaf53a274261df4"
+  integrity sha512-aWFD7OGQjk3Y8MdZKf1XePlQvHnjMVJQjIq9WKrlAjz9by703kJ45Jxhp26JwnovoW71YYz5etuqRl8wMcIv0w==
   dependencies:
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/babel__core" "^7.1.7"
+    "@jest/transform" "^27.0.1"
+    "@jest/types" "^27.0.1"
+    "@types/babel__core" "^7.1.14"
     babel-plugin-istanbul "^6.0.0"
-    babel-preset-jest "^26.6.2"
+    babel-preset-jest "^27.0.1"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
     slash "^3.0.0"
@@ -2440,10 +2299,10 @@ babel-plugin-istanbul@^6.0.0:
     istanbul-lib-instrument "^4.0.0"
     test-exclude "^6.0.0"
 
-babel-plugin-jest-hoist@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-26.6.2.tgz#8185bd030348d254c6d7dd974355e6a28b21e62d"
-  integrity sha512-PO9t0697lNTmcEHH69mdtYiOIkkOlj9fySqfO3K1eCcdISevLAE0xY59VLLUj0SoiPiTX/JU2CYFpILydUa5Lw==
+babel-plugin-jest-hoist@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-27.0.1.tgz#a6d10e484c93abff0f4e95f437dad26e5736ea11"
+  integrity sha512-sqBF0owAcCDBVEDtxqfYr2F36eSHdx7lAVGyYuOBRnKdD6gzcy0I0XrAYCZgOA3CRrLhmR+Uae9nogPzmAtOfQ==
   dependencies:
     "@babel/template" "^7.3.3"
     "@babel/types" "^7.3.3"
@@ -2468,12 +2327,12 @@ babel-preset-current-node-syntax@^1.0.0:
     "@babel/plugin-syntax-optional-chaining" "^7.8.3"
     "@babel/plugin-syntax-top-level-await" "^7.8.3"
 
-babel-preset-jest@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-26.6.2.tgz#747872b1171df032252426586881d62d31798fee"
-  integrity sha512-YvdtlVm9t3k777c5NPQIv6cxFFFapys25HiUmuSgHwIZhfifweR5c5Sf5nwE3MAbfu327CYSvps8Yx6ANLyleQ==
+babel-preset-jest@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-27.0.1.tgz#7a50c75d16647c23a2cf5158d5bb9eb206b10e20"
+  integrity sha512-nIBIqCEpuiyhvjQs2mVNwTxQQa2xk70p9Dd/0obQGBf8FBzbnI8QhQKzLsWMN2i6q+5B0OcWDtrboBX5gmOLyA==
   dependencies:
-    babel-plugin-jest-hoist "^26.6.2"
+    babel-plugin-jest-hoist "^27.0.1"
     babel-preset-current-node-syntax "^1.0.0"
 
 bach@^1.0.0:
@@ -2516,7 +2375,7 @@ bcrypt-pbkdf@^1.0.0:
   dependencies:
     tweetnacl "^0.14.3"
 
-before-after-hook@^2.0.0:
+before-after-hook@^2.2.0:
   version "2.2.1"
   resolved "https://registry.yarnpkg.com/before-after-hook/-/before-after-hook-2.2.1.tgz#73540563558687586b52ed217dad6a802ab1549c"
   integrity sha512-/6FKxSTWoJdbsLDF8tdIjaRiFXiE6UHsEHE3OPI/cwPURCVi1ukP0gmLn7XWEiFk5TcwQjjY5PWsU+j+tgXgmw==
@@ -2557,11 +2416,6 @@ bindings@^1.5.0:
   dependencies:
     file-uri-to-path "1.0.0"
 
-bluebird@^3.5.1, bluebird@^3.5.3, bluebird@^3.5.5:
-  version "3.7.2"
-  resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f"
-  integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==
-
 brace-expansion@^1.1.7:
   version "1.1.11"
   resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@@ -2599,15 +2453,15 @@ browser-process-hrtime@^1.0.0:
   integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
 
 browserslist@^4.14.5:
-  version "4.16.3"
-  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.3.tgz#340aa46940d7db878748567c5dea24a48ddf3717"
-  integrity sha512-vIyhWmIkULaq04Gt93txdh+j02yX/JzlyhLYbV3YQCn/zvES3JnY7TifHHvvr1w5hTDluNKMkV05cs4vy8Q7sw==
+  version "4.16.6"
+  resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.6.tgz#d7901277a5a88e554ed305b183ec9b0c08f66fa2"
+  integrity sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==
   dependencies:
-    caniuse-lite "^1.0.30001181"
-    colorette "^1.2.1"
-    electron-to-chromium "^1.3.649"
+    caniuse-lite "^1.0.30001219"
+    colorette "^1.2.2"
+    electron-to-chromium "^1.3.723"
     escalade "^3.1.1"
-    node-releases "^1.1.70"
+    node-releases "^1.1.71"
 
 bs-logger@0.x:
   version "0.2.6"
@@ -2623,11 +2477,6 @@ bser@2.1.1:
   dependencies:
     node-int64 "^0.4.0"
 
-btoa-lite@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/btoa-lite/-/btoa-lite-1.0.0.tgz#337766da15801210fdd956c22e9c6891ab9d0337"
-  integrity sha1-M3dm2hWAEhD92VbCLpxokaudAzc=
-
 buffer-equal@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/buffer-equal/-/buffer-equal-1.0.0.tgz#59616b498304d556abd466966b22eeda3eca5fbe"
@@ -2648,31 +2497,33 @@ byline@^5.0.0:
   resolved "https://registry.yarnpkg.com/byline/-/byline-5.0.0.tgz#741c5216468eadc457b03410118ad77de8c1ddb1"
   integrity sha1-dBxSFkaOrcRXsDQQEYrXfejB3bE=
 
-byte-size@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-5.0.1.tgz#4b651039a5ecd96767e71a3d7ed380e48bed4191"
-  integrity sha512-/XuKeqWocKsYa/cBY1YbSJSWWqTi4cFgr9S6OyM7PBaPbr9zvNGwWP33vt0uqGhwDdN+y3yhbXVILEUpnwEWGw==
+byte-size@^7.0.0:
+  version "7.0.1"
+  resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-7.0.1.tgz#b1daf3386de7ab9d706b941a748dbfc71130dee3"
+  integrity sha512-crQdqyCwhokxwV1UyDzLZanhkugAgft7vt0qbbdt60C6Zf3CAiGmtUCylbtYwrU6loOUw3euGrNtW1J651ot1A==
 
-cacache@^12.0.0, cacache@^12.0.3:
-  version "12.0.4"
-  resolved "https://registry.yarnpkg.com/cacache/-/cacache-12.0.4.tgz#668bcbd105aeb5f1d92fe25570ec9525c8faa40c"
-  integrity sha512-a0tMB40oefvuInr4Cwb3GerbL9xTj1D5yg0T5xrjGCGyfvbxseIXX7BAO/u/hIXdafzOI5JC3wDwHyf24buOAQ==
+cacache@^15.0.5:
+  version "15.2.0"
+  resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.2.0.tgz#73af75f77c58e72d8c630a7a2858cb18ef523389"
+  integrity sha512-uKoJSHmnrqXgthDFx/IU6ED/5xd+NNGe+Bb+kLZy7Ku4P+BaiWEUflAKPZ7eAzsYGcsAGASJZsybXp+quEcHTw==
   dependencies:
-    bluebird "^3.5.5"
-    chownr "^1.1.1"
-    figgy-pudding "^3.5.1"
+    "@npmcli/move-file" "^1.0.1"
+    chownr "^2.0.0"
+    fs-minipass "^2.0.0"
     glob "^7.1.4"
-    graceful-fs "^4.1.15"
-    infer-owner "^1.0.3"
-    lru-cache "^5.1.1"
-    mississippi "^3.0.0"
-    mkdirp "^0.5.1"
-    move-concurrently "^1.0.1"
+    infer-owner "^1.0.4"
+    lru-cache "^6.0.0"
+    minipass "^3.1.1"
+    minipass-collect "^1.0.2"
+    minipass-flush "^1.0.5"
+    minipass-pipeline "^1.2.2"
+    mkdirp "^1.0.3"
+    p-map "^4.0.0"
     promise-inflight "^1.0.1"
-    rimraf "^2.6.3"
-    ssri "^6.0.1"
+    rimraf "^3.0.2"
+    ssri "^8.0.1"
+    tar "^6.0.2"
     unique-filename "^1.1.1"
-    y18n "^4.0.0"
 
 cache-base@^1.0.1:
   version "1.0.1"
@@ -2702,25 +2553,6 @@ call-me-maybe@^1.0.1:
   resolved "https://registry.yarnpkg.com/call-me-maybe/-/call-me-maybe-1.0.1.tgz#26d208ea89e37b5cbde60250a15f031c16a4d66b"
   integrity sha1-JtII6onje1y95gJQoV8DHBak1ms=
 
-caller-callsite@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/caller-callsite/-/caller-callsite-2.0.0.tgz#847e0fce0a223750a9a027c54b33731ad3154134"
-  integrity sha1-hH4PzgoiN1CpoCfFSzNzGtMVQTQ=
-  dependencies:
-    callsites "^2.0.0"
-
-caller-path@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/caller-path/-/caller-path-2.0.0.tgz#468f83044e369ab2010fac5f06ceee15bb2cb1f4"
-  integrity sha1-Ro+DBE42mrIBD6xfBs7uFbsssfQ=
-  dependencies:
-    caller-callsite "^2.0.0"
-
-callsites@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/callsites/-/callsites-2.0.0.tgz#06eb84f00eea413da86affefacbffb36093b3c50"
-  integrity sha1-BuuE8A7qQT2oav/vrL/7Ngk7PFA=
-
 callsites@^3.0.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
@@ -2734,15 +2566,6 @@ camelcase-keys@^2.0.0:
     camelcase "^2.0.0"
     map-obj "^1.0.0"
 
-camelcase-keys@^4.0.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-4.2.0.tgz#a2aa5fb1af688758259c32c141426d78923b9b77"
-  integrity sha1-oqpfsa9oh1glnDLBQUJteJI7m3c=
-  dependencies:
-    camelcase "^4.1.0"
-    map-obj "^2.0.0"
-    quick-lru "^1.0.0"
-
 camelcase-keys@^6.2.2:
   version "6.2.2"
   resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-6.2.2.tgz#5e755d6ba51aa223ec7d3d52f25778210f9dc3c0"
@@ -2762,39 +2585,27 @@ camelcase@^3.0.0:
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a"
   integrity sha1-MvxLn82vhF/N9+c7uXysImHwqwo=
 
-camelcase@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-4.1.0.tgz#d545635be1e33c542649c69173e5de6acfae34dd"
-  integrity sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=
-
 camelcase@^5.0.0, camelcase@^5.3.1:
   version "5.3.1"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
   integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
 
-camelcase@^6.0.0:
+camelcase@^6.2.0:
   version "6.2.0"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.2.0.tgz#924af881c9d525ac9d87f40d964e5cea982a1809"
   integrity sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==
 
-caniuse-lite@^1.0.30001181:
-  version "1.0.30001208"
-  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001208.tgz#a999014a35cebd4f98c405930a057a0d75352eb9"
-  integrity sha512-OE5UE4+nBOro8Dyvv0lfx+SRtfVIOM9uhKqFmJeUbGriqhhStgp1A0OyBpgy3OUF8AhYCT+PVwPC1gMl2ZcQMA==
-
-capture-exit@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/capture-exit/-/capture-exit-2.0.0.tgz#fb953bfaebeb781f62898239dabb426d08a509a4"
-  integrity sha512-PiT/hQmTonHhl/HFGN+Lx3JJUznrVYJ3+AQsnthneZbvW7x+f08Tk7yLJTLEOUvBTbduLeeBkxEaYXUOUrRq6g==
-  dependencies:
-    rsvp "^4.8.4"
+caniuse-lite@^1.0.30001219:
+  version "1.0.30001230"
+  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001230.tgz#8135c57459854b2240b57a4a6786044bdc5a9f71"
+  integrity sha512-5yBd5nWCBS+jWKTcHOzXwo5xzcj4ePE/yjtkZyUV1BTUmrBaA9MRGC+e7mxnqXSA90CmCA8L3eKLaSUkt099IQ==
 
 caseless@~0.12.0:
   version "0.12.0"
   resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc"
   integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=
 
-chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.1, chalk@^2.4.1, chalk@^2.4.2:
+chalk@2.x, chalk@^2.0.0, chalk@^2.4.1, chalk@^2.4.2:
   version "2.4.2"
   resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424"
   integrity sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==
@@ -2803,10 +2614,10 @@ chalk@2.x, chalk@^2.0.0, chalk@^2.0.1, chalk@^2.3.1, chalk@^2.4.1, chalk@^2.4.2:
     escape-string-regexp "^1.0.5"
     supports-color "^5.3.0"
 
-chalk@^4.0.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
-  integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A==
+chalk@^4.0.0, chalk@^4.1.0:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.1.tgz#c80b3fab28bf6371e6863325eee67e618b77e6ad"
+  integrity sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==
   dependencies:
     ansi-styles "^4.1.0"
     supports-color "^7.1.0"
@@ -2840,11 +2651,16 @@ chokidar@^2.0.0:
   optionalDependencies:
     fsevents "^1.2.7"
 
-chownr@^1.1.1, chownr@^1.1.2:
+chownr@^1.1.1:
   version "1.1.4"
   resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
   integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
 
+chownr@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
+  integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
+
 chrome-trace-event@^1.0.2:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz#1015eced4741e15d06664a957dbbf50d041e26ac"
@@ -2855,10 +2671,15 @@ ci-info@^2.0.0:
   resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
   integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
 
-cjs-module-lexer@^0.6.0:
-  version "0.6.0"
-  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-0.6.0.tgz#4186fcca0eae175970aee870b9fe2d6cf8d5655f"
-  integrity sha512-uc2Vix1frTfnuzxxu1Hp4ktSvM3QaI4oXl4ZUqL1wjTu/BGki9TrCWoqLTg/drR1KwAEarXuRFCG2Svr1GxPFw==
+ci-info@^3.1.1:
+  version "3.1.1"
+  resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.1.1.tgz#9a32fcefdf7bcdb6f0a7e1c0f8098ec57897b80a"
+  integrity sha512-kdRWLBIJwdsYJWYJFtAFFYxybguqeF91qpZaggjG5Nf8QKdizFG2hjqvaTXbxFIcYbSaD74KpAXv6BSm17DHEQ==
+
+cjs-module-lexer@^1.0.0:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-1.2.1.tgz#2fd46d9906a126965aa541345c499aaa18e8cd73"
+  integrity sha512-jVamGdJPDeuQilKhvVn1h3knuMOZzr8QDnpk+M9aMlCaMkTDd6fBWPhiDqFvFZ07pL0liqabAiuy8SY4jGHeaw==
 
 class-utils@^0.3.5:
   version "0.3.6"
@@ -2875,13 +2696,6 @@ clean-stack@^2.0.0:
   resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b"
   integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==
 
-cli-cursor@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-2.1.0.tgz#b35dac376479facc3e94747d41d0d0f5238ffcb5"
-  integrity sha1-s12sN2R5+sw+lHR9QdDQ9SOP/LU=
-  dependencies:
-    restore-cursor "^2.0.0"
-
 cli-cursor@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307"
@@ -2889,10 +2703,10 @@ cli-cursor@^3.1.0:
   dependencies:
     restore-cursor "^3.1.0"
 
-cli-width@^2.0.0:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-2.2.1.tgz#b0433d0b4e9c847ef18868a4ef16fd5fc8271c48"
-  integrity sha512-GRMWDxpOB6Dgk2E5Uo+3eEBvtOOlimMmpbFiKuLFnQzYDavtLFY3K5ona41jgN/WdRZtG7utuVSVTL4HbZHGkw==
+cli-width@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-3.0.0.tgz#a2f48437a2caa9a22436e794bf071ec9e61cedf6"
+  integrity sha512-FxqpkPPwu1HjuN93Omfm4h8uIanXofW0RxVEW3k5RKx+mJJYSthzNhp32Kzxxy3YAEZ/Dc/EWN1vZRY0+kOhbw==
 
 cliui@^3.2.0:
   version "3.2.0"
@@ -2903,23 +2717,14 @@ cliui@^3.2.0:
     strip-ansi "^3.0.1"
     wrap-ansi "^2.0.0"
 
-cliui@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/cliui/-/cliui-5.0.0.tgz#deefcfdb2e800784aa34f46fa08e06851c7bbbc5"
-  integrity sha512-PYeGSEmmHM6zvoef2w8TPzlrnNpXIjTipYK780YswmIP9vjxmd6Y2a3CB2Ks6/AU8NHjZugXvo8w3oWM2qnwXA==
-  dependencies:
-    string-width "^3.1.0"
-    strip-ansi "^5.2.0"
-    wrap-ansi "^5.1.0"
-
-cliui@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1"
-  integrity sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==
+cliui@^7.0.2:
+  version "7.0.4"
+  resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f"
+  integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==
   dependencies:
     string-width "^4.2.0"
     strip-ansi "^6.0.0"
-    wrap-ansi "^6.2.0"
+    wrap-ansi "^7.0.0"
 
 clone-buffer@^1.0.0:
   version "1.0.0"
@@ -2959,6 +2764,13 @@ cloneable-readable@^1.0.0:
     process-nextick-args "^2.0.0"
     readable-stream "^2.3.5"
 
+cmd-shim@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/cmd-shim/-/cmd-shim-4.1.0.tgz#b3a904a6743e9fede4148c6f3800bf2a08135bdd"
+  integrity sha512-lb9L7EM4I/ZRVuljLPEtUJOP+xiQVknZ4ZMpMgEp4JzNldPb27HU03hi6K1/6CoIuit/Zm/LQXySErFeXxDprw==
+  dependencies:
+    mkdirp-infer-owner "^2.0.0"
+
 co@^4.6.0:
   version "4.6.0"
   resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184"
@@ -3020,7 +2832,7 @@ color-support@^1.1.3:
   resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2"
   integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==
 
-colorette@^1.2.1:
+colorette@^1.2.2:
   version "1.2.2"
   resolved "https://registry.yarnpkg.com/colorette/-/colorette-1.2.2.tgz#cbcc79d5e99caea2dbf10eb3a26fd8b3e6acfa94"
   integrity sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==
@@ -3038,7 +2850,7 @@ columnify@^1.5.4:
     strip-ansi "^3.0.0"
     wcwidth "^1.0.0"
 
-combined-stream@^1.0.6, combined-stream@~1.0.6:
+combined-stream@^1.0.6, combined-stream@^1.0.8, combined-stream@~1.0.6:
   version "1.0.8"
   resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
   integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
@@ -3093,7 +2905,7 @@ concat-map@0.0.1:
   resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
   integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
 
-concat-stream@^1.5.0, concat-stream@^1.6.0:
+concat-stream@^1.6.0:
   version "1.6.2"
   resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-1.6.2.tgz#904bdf194cd3122fc675c77fc4ac3d4ff0fd1a34"
   integrity sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==
@@ -3113,7 +2925,7 @@ concat-stream@^2.0.0:
     readable-stream "^3.0.2"
     typedarray "^0.0.6"
 
-config-chain@^1.1.11:
+config-chain@^1.1.12:
   version "1.1.12"
   resolved "https://registry.yarnpkg.com/config-chain/-/config-chain-1.1.12.tgz#0fde8d091200eb5e808caf25fe618c02f48e4efa"
   integrity sha512-a1eOIcu8+7lUInge4Rpf/n4Krkf3Dd9lqhljRzII1/Zno/kRtUWnznPO3jOKBmTEktkt3fkxisUcivoj0ebzoA==
@@ -3126,7 +2938,7 @@ console-control-strings@^1.0.0, console-control-strings@~1.1.0:
   resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e"
   integrity sha1-PXz0Rk22RG6mRL9LOVB/mFEAjo4=
 
-conventional-changelog-angular@^5.0.3:
+conventional-changelog-angular@^5.0.12:
   version "5.0.12"
   resolved "https://registry.yarnpkg.com/conventional-changelog-angular/-/conventional-changelog-angular-5.0.12.tgz#c979b8b921cbfe26402eb3da5bbfda02d865a2b9"
   integrity sha512-5GLsbnkR/7A89RyHLvvoExbiGbd9xKdKqDTrArnPbOqBqG/2wIosu0fHwpeIRI8Tl94MhVNBXcLJZl92ZQ5USw==
@@ -3134,31 +2946,33 @@ conventional-changelog-angular@^5.0.3:
     compare-func "^2.0.0"
     q "^1.5.1"
 
-conventional-changelog-core@^3.1.6:
-  version "3.2.3"
-  resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-3.2.3.tgz#b31410856f431c847086a7dcb4d2ca184a7d88fb"
-  integrity sha512-LMMX1JlxPIq/Ez5aYAYS5CpuwbOk6QFp8O4HLAcZxe3vxoCtABkhfjetk8IYdRB9CDQGwJFLR3Dr55Za6XKgUQ==
+conventional-changelog-core@^4.2.2:
+  version "4.2.2"
+  resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-4.2.2.tgz#f0897df6d53b5d63dec36b9442bd45354f8b3ce5"
+  integrity sha512-7pDpRUiobQDNkwHyJG7k9f6maPo9tfPzkSWbRq97GGiZqisElhnvUZSvyQH20ogfOjntB5aadvv6NNcKL1sReg==
   dependencies:
-    conventional-changelog-writer "^4.0.6"
-    conventional-commits-parser "^3.0.3"
+    add-stream "^1.0.0"
+    conventional-changelog-writer "^4.0.18"
+    conventional-commits-parser "^3.2.0"
     dateformat "^3.0.0"
     get-pkg-repo "^1.0.0"
-    git-raw-commits "2.0.0"
+    git-raw-commits "^2.0.8"
     git-remote-origin-url "^2.0.0"
-    git-semver-tags "^2.0.3"
-    lodash "^4.2.1"
-    normalize-package-data "^2.3.5"
+    git-semver-tags "^4.1.1"
+    lodash "^4.17.15"
+    normalize-package-data "^3.0.0"
     q "^1.5.1"
     read-pkg "^3.0.0"
     read-pkg-up "^3.0.0"
-    through2 "^3.0.0"
+    shelljs "^0.8.3"
+    through2 "^4.0.0"
 
-conventional-changelog-preset-loader@^2.1.1:
+conventional-changelog-preset-loader@^2.3.4:
   version "2.3.4"
   resolved "https://registry.yarnpkg.com/conventional-changelog-preset-loader/-/conventional-changelog-preset-loader-2.3.4.tgz#14a855abbffd59027fd602581f1f34d9862ea44c"
   integrity sha512-GEKRWkrSAZeTq5+YjUZOYxdHq+ci4dNwHvpaBC3+ENalzFWuCWa9EZXSuZBpkr72sMdKB+1fyDV4takK1Lf58g==
 
-conventional-changelog-writer@^4.0.6:
+conventional-changelog-writer@^4.0.18:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/conventional-changelog-writer/-/conventional-changelog-writer-4.1.0.tgz#1ca7880b75aa28695ad33312a1f2366f4b12659f"
   integrity sha512-WwKcUp7WyXYGQmkLsX4QmU42AZ1lqlvRW9mqoyiQzdD+rJWbTepdWoKJuwXTS+yq79XKnQNa93/roViPQrAQgw==
@@ -3174,7 +2988,7 @@ conventional-changelog-writer@^4.0.6:
     split "^1.0.0"
     through2 "^4.0.0"
 
-conventional-commits-filter@^2.0.2, conventional-commits-filter@^2.0.7:
+conventional-commits-filter@^2.0.7:
   version "2.0.7"
   resolved "https://registry.yarnpkg.com/conventional-commits-filter/-/conventional-commits-filter-2.0.7.tgz#f8d9b4f182fce00c9af7139da49365b136c8a0b3"
   integrity sha512-ASS9SamOP4TbCClsRHxIHXRfcGCnIoQqkvAzCSbZzTFLfcTqJVugB0agRgsEELsqaeWgsXv513eS116wnlSSPA==
@@ -3182,7 +2996,7 @@ conventional-commits-filter@^2.0.2, conventional-commits-filter@^2.0.7:
     lodash.ismatch "^4.4.0"
     modify-values "^1.0.0"
 
-conventional-commits-parser@^3.0.3:
+conventional-commits-parser@^3.2.0:
   version "3.2.1"
   resolved "https://registry.yarnpkg.com/conventional-commits-parser/-/conventional-commits-parser-3.2.1.tgz#ba44f0b3b6588da2ee9fd8da508ebff50d116ce2"
   integrity sha512-OG9kQtmMZBJD/32NEw5IhN5+HnBqVjy03eC+I71I0oQRFA5rOgA4OtPOYG7mz1GkCfCNxn3gKIX8EiHJYuf1cA==
@@ -3195,18 +3009,18 @@ conventional-commits-parser@^3.0.3:
     through2 "^4.0.0"
     trim-off-newlines "^1.0.0"
 
-conventional-recommended-bump@^5.0.0:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/conventional-recommended-bump/-/conventional-recommended-bump-5.0.1.tgz#5af63903947b6e089e77767601cb592cabb106ba"
-  integrity sha512-RVdt0elRcCxL90IrNP0fYCpq1uGt2MALko0eyeQ+zQuDVWtMGAy9ng6yYn3kax42lCj9+XBxQ8ZN6S9bdKxDhQ==
+conventional-recommended-bump@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/conventional-recommended-bump/-/conventional-recommended-bump-6.1.0.tgz#cfa623285d1de554012f2ffde70d9c8a22231f55"
+  integrity sha512-uiApbSiNGM/kkdL9GTOLAqC4hbptObFo4wW2QRyHsKciGAfQuLU1ShZ1BIVI/+K2BE/W1AWYQMCXAsv4dyKPaw==
   dependencies:
     concat-stream "^2.0.0"
-    conventional-changelog-preset-loader "^2.1.1"
-    conventional-commits-filter "^2.0.2"
-    conventional-commits-parser "^3.0.3"
-    git-raw-commits "2.0.0"
-    git-semver-tags "^2.0.3"
-    meow "^4.0.0"
+    conventional-changelog-preset-loader "^2.3.4"
+    conventional-commits-filter "^2.0.7"
+    conventional-commits-parser "^3.2.0"
+    git-raw-commits "^2.0.8"
+    git-semver-tags "^4.1.1"
+    meow "^8.0.0"
     q "^1.5.1"
 
 convert-source-map@^1.0.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
@@ -3216,18 +3030,6 @@ convert-source-map@^1.0.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0,
   dependencies:
     safe-buffer "~5.1.1"
 
-copy-concurrently@^1.0.0:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/copy-concurrently/-/copy-concurrently-1.0.5.tgz#92297398cae34937fcafd6ec8139c18051f0b5e0"
-  integrity sha512-f2domd9fsVDFtaFcbaRZuYXwtdmnzqbADSwhSWYxYB/Q8zsdUUFMXVRwXGDMWmbEzAn1kdRrtI1T/KTFOL4X2A==
-  dependencies:
-    aproba "^1.1.1"
-    fs-write-stream-atomic "^1.0.8"
-    iferr "^0.1.5"
-    mkdirp "^0.5.1"
-    rimraf "^2.5.4"
-    run-queue "^1.0.0"
-
 copy-descriptor@^0.1.0:
   version "0.1.1"
   resolved "https://registry.yarnpkg.com/copy-descriptor/-/copy-descriptor-0.1.1.tgz#676f6eb3c39997c2ee1ac3a924fd6124748f578d"
@@ -3246,15 +3048,16 @@ core-util-is@1.0.2, core-util-is@~1.0.0:
   resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7"
   integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=
 
-cosmiconfig@^5.1.0:
-  version "5.2.1"
-  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-5.2.1.tgz#040f726809c591e77a17c0a3626ca45b4f168b1a"
-  integrity sha512-H65gsXo1SKjf8zmrJ67eJk8aIRKV5ff2D4uKZIBZShbhGSpEmsQOPW/SKMKYhSTrqR7ufy6RP69rPogdaPh/kA==
+cosmiconfig@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-7.0.0.tgz#ef9b44d773959cae63ddecd122de23853b60f8d3"
+  integrity sha512-pondGvTuVYDk++upghXJabWzL6Kxu6f26ljFw64Swq9v6sQPUL3EUlVDV56diOjpCayKihL6hVe8exIACU4XcA==
   dependencies:
-    import-fresh "^2.0.0"
-    is-directory "^0.3.1"
-    js-yaml "^3.13.1"
-    parse-json "^4.0.0"
+    "@types/parse-json" "^4.0.0"
+    import-fresh "^3.2.1"
+    parse-json "^5.0.0"
+    path-type "^4.0.0"
+    yaml "^1.10.0"
 
 cp-file@^7.0.0:
   version "7.0.0"
@@ -3266,7 +3069,7 @@ cp-file@^7.0.0:
     nested-error-stacks "^2.0.0"
     p-event "^4.1.0"
 
-cpy@^8.1.2:
+cpy@8.1.2:
   version "8.1.2"
   resolved "https://registry.yarnpkg.com/cpy/-/cpy-8.1.2.tgz#e339ea54797ad23f8e3919a5cffd37bfc3f25935"
   integrity sha512-dmC4mUesv0OYH2kNFEidtf/skUwv4zePmGeepjyyJ0qTo5+8KhA1o99oIAwVVLzQMAeDJml74d6wPPKb6EZUTg==
@@ -3286,14 +3089,14 @@ create-require@^1.1.0:
   resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
   integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==
 
-cross-env@^7.0.3:
+cross-env@7.0.3:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/cross-env/-/cross-env-7.0.3.tgz#865264b29677dc015ba8418918965dd232fc54cf"
   integrity sha512-+/HKd6EgcQCJGh2PSjZuUitQBQynKor4wrFbRg4DtAgS1aWO+gU52xpH7M9ScGgXSYmAVS9bIJ8EzuaGw0oNAw==
   dependencies:
     cross-spawn "^7.0.1"
 
-cross-spawn@^6.0.0, cross-spawn@^6.0.5:
+cross-spawn@^6.0.5:
   version "6.0.5"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
   integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==
@@ -3304,7 +3107,7 @@ cross-spawn@^6.0.0, cross-spawn@^6.0.5:
     shebang-command "^1.2.0"
     which "^1.2.9"
 
-cross-spawn@^7.0.0, cross-spawn@^7.0.1, cross-spawn@^7.0.2:
+cross-spawn@^7.0.1, cross-spawn@^7.0.2, cross-spawn@^7.0.3:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
   integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==
@@ -3346,11 +3149,6 @@ currently-unhandled@^0.4.1:
   dependencies:
     array-find-index "^1.0.1"
 
-cyclist@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-1.0.1.tgz#596e9698fd0c80e12038c2b82d6eb1b35b6224d9"
-  integrity sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk=
-
 d@1, d@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
@@ -3359,12 +3157,10 @@ d@1, d@^1.0.1:
     es5-ext "^0.10.50"
     type "^1.0.1"
 
-dargs@^4.0.1:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/dargs/-/dargs-4.1.0.tgz#03a9dbb4b5c2f139bf14ae53f0b8a2a6a86f4e17"
-  integrity sha1-A6nbtLXC8Tm/FK5T8LiipqhvThc=
-  dependencies:
-    number-is-nan "^1.0.0"
+dargs@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/dargs/-/dargs-7.0.0.tgz#04015c41de0bcb69ec84050f3d9be0caf8d6d5cc"
+  integrity sha512-2iy1EkLdlBzQGvbweYRFxmFath8+K7+AKB0TlhHWkNuH+TmovaMH/Wp7V7R4u7f4SnX3OgLsU9t1NI9ioDnUpg==
 
 dashdash@^1.12.0:
   version "1.14.1"
@@ -3396,20 +3192,20 @@ debug-fabulous@^1.0.0:
     memoizee "0.4.X"
     object-assign "4.X"
 
-debug@3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-3.1.0.tgz#5bb5a0672628b64149566ba16819e61518c67261"
-  integrity sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==
-  dependencies:
-    ms "2.0.0"
-
-debug@3.X, debug@^3.1.0:
+debug@3.X:
   version "3.2.7"
   resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
   integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
   dependencies:
     ms "^2.1.1"
 
+debug@4, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
+  version "4.3.1"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
+  integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
+  dependencies:
+    ms "2.1.2"
+
 debug@^2.2.0, debug@^2.3.3:
   version "2.6.9"
   resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
@@ -3417,19 +3213,12 @@ debug@^2.2.0, debug@^2.3.3:
   dependencies:
     ms "2.0.0"
 
-debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
-  integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
-  dependencies:
-    ms "2.1.2"
-
 debuglog@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/debuglog/-/debuglog-1.0.1.tgz#aa24ffb9ac3df9a2351837cfb2d279360cd78492"
   integrity sha1-qiT/uaw9+aI1GDfPstJ5NgzXhJI=
 
-decamelize-keys@^1.0.0, decamelize-keys@^1.1.0:
+decamelize-keys@^1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/decamelize-keys/-/decamelize-keys-1.1.0.tgz#d171a87933252807eb3cb61dc1c1445d078df2d9"
   integrity sha1-0XGoeTMlKAfrPLYdwcFEXQeN8tk=
@@ -3552,6 +3341,11 @@ delegates@^1.0.0:
   resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a"
   integrity sha1-hMbhWbgZBP3KWaDvRM2HDTElD5o=
 
+depd@^1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9"
+  integrity sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=
+
 deprecation@^2.0.0, deprecation@^2.3.1:
   version "2.3.1"
   resolved "https://registry.yarnpkg.com/deprecation/-/deprecation-2.3.1.tgz#6368cbdb40abf3373b525ac87e4a260c3a700919"
@@ -3567,6 +3361,11 @@ detect-indent@^5.0.0:
   resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-5.0.0.tgz#3871cc0a6a002e8c3e5b3cf7f336264675f06b9d"
   integrity sha1-OHHMCmoALow+Wzz38zYmRnXwa50=
 
+detect-indent@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.0.0.tgz#0abd0f549f69fc6659a254fe96786186b6f528fd"
+  integrity sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==
+
 detect-newline@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-2.1.0.tgz#f41f1c10be4b00e87b5f13da680759f2c5bfd3e2"
@@ -3590,6 +3389,11 @@ diff-sequences@^26.6.2:
   resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.6.2.tgz#48ba99157de1923412eed41db6b6d4aa9ca7c0b1"
   integrity sha512-Mv/TDa3nZ9sbc5soK+OoA74BsS3mL37yixCvUAQkiuA4Wz6YtwP/K47n2rv2ovzHZvoiQeA5FTQOschKkEwB0Q==
 
+diff-sequences@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-27.0.1.tgz#9c9801d52ed5f576ff0a20e3022a13ee6e297e7c"
+  integrity sha512-XPLijkfJUh/PIBnfkcSHgvD6tlYixmcMAn3osTk6jt+H0v/mgURto1XUiD9DKuGX5NDoVS6dSlA23gd9FUaCFg==
+
 diff@^4.0.1:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/diff/-/diff-4.0.2.tgz#60f3aecb89d5fae520c11aa19efc2bb982aade7d"
@@ -3623,13 +3427,6 @@ domexception@^2.0.1:
   dependencies:
     webidl-conversions "^5.0.0"
 
-dot-prop@^4.2.0:
-  version "4.2.1"
-  resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-4.2.1.tgz#45884194a71fc2cda71cbb4bceb3a4dd2f433ba4"
-  integrity sha512-l0p4+mIuJIua0mhxGoh4a+iNL9bmeK5DvnSVQa6T0OhrVmaEa1XScX5Etc673FePCJOArq/4Pa2cLGODUWTPOQ==
-  dependencies:
-    is-obj "^1.0.0"
-
 dot-prop@^5.1.0:
   version "5.3.0"
   resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-5.3.0.tgz#90ccce708cd9cd82cc4dc8c3ddd9abdd55b20e88"
@@ -3637,12 +3434,19 @@ dot-prop@^5.1.0:
   dependencies:
     is-obj "^2.0.0"
 
+dot-prop@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-6.0.1.tgz#fc26b3cf142b9e59b74dbd39ed66ce620c681083"
+  integrity sha512-tE7ztYzXHIeyvc7N+hR3oi7FIbf/NIjVP9hmAt3yMXzrQ072/fpjGLx2GxNxGxUl5V73MEqYzioOMoVhGMJ5cA==
+  dependencies:
+    is-obj "^2.0.0"
+
 duplexer@^0.1.1:
   version "0.1.2"
   resolved "https://registry.yarnpkg.com/duplexer/-/duplexer-0.1.2.tgz#3abe43aef3835f8ae077d136ddce0f276b0400e6"
   integrity sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==
 
-duplexify@^3.4.2, duplexify@^3.6.0:
+duplexify@^3.6.0:
   version "3.7.1"
   resolved "https://registry.yarnpkg.com/duplexify/-/duplexify-3.7.1.tgz#2a4df5317f6ccfd91f86d6fd25d8d8a103b88309"
   integrity sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==
@@ -3668,27 +3472,22 @@ ecc-jsbn@~0.1.1:
     jsbn "~0.1.0"
     safer-buffer "^2.1.0"
 
-electron-to-chromium@^1.3.649:
-  version "1.3.711"
-  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.711.tgz#92c3caf7ffed5e18bf63f66b4b57b4db2409c450"
-  integrity sha512-XbklBVCDiUeho0PZQCjC25Ha6uBwqqJeyDhPLwLwfWRAo4x+FZFsmu1pPPkXT+B4MQMQoQULfyaMltDopfeiHQ==
-
-emittery@^0.7.1:
-  version "0.7.2"
-  resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.7.2.tgz#25595908e13af0f5674ab419396e2fb394cdfa82"
-  integrity sha512-A8OG5SR/ij3SsJdWDJdkkSYUjQdCUx6APQXem0SaEePBSRg4eymGYwBkKo1Y6DU+af/Jn2dBQqDBvjnr9Vi8nQ==
+electron-to-chromium@^1.3.723:
+  version "1.3.738"
+  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.738.tgz#aec24b091c82acbfabbdcce08076a703941d17ca"
+  integrity sha512-vCMf4gDOpEylPSLPLSwAEsz+R3ShP02Y3cAKMZvTqule3XcPp7tgc/0ESI7IS6ZeyBlGClE50N53fIOkcIVnpw==
 
-emoji-regex@^7.0.1:
-  version "7.0.3"
-  resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-7.0.3.tgz#933a04052860c85e83c122479c4748a8e4c72156"
-  integrity sha512-CwBLREIQ7LvYFB0WyRvwhq5N5qPhc6PMjD6bYggFlI5YyDgl+0vxq5VHbMOFqLg7hfWzmu8T5Z1QofhmTIhItA==
+emittery@^0.8.1:
+  version "0.8.1"
+  resolved "https://registry.yarnpkg.com/emittery/-/emittery-0.8.1.tgz#bb23cc86d03b30aa75a7f734819dee2e1ba70860"
+  integrity sha512-uDfvUjVrfGJJhymx/kz6prltenw1u7WrCg1oa94zYY8xxVpLLUu045LAT0dhDZdXG58/EpPL/5kA180fQ/qudg==
 
 emoji-regex@^8.0.0:
   version "8.0.0"
   resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37"
   integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==
 
-encoding@^0.1.11:
+encoding@^0.1.12:
   version "0.1.13"
   resolved "https://registry.yarnpkg.com/encoding/-/encoding-0.1.13.tgz#56574afdd791f54a8e9b2785c0582a2d26210fa9"
   integrity sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==
@@ -3703,9 +3502,9 @@ end-of-stream@^1.0.0, end-of-stream@^1.1.0:
     once "^1.4.0"
 
 enhanced-resolve@^5.8.0:
-  version "5.8.0"
-  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.8.0.tgz#d9deae58f9d3773b6a111a5a46831da5be5c9ac0"
-  integrity sha512-Sl3KRpJA8OpprrtaIswVki3cWPiPKxXuFxJXBp+zNb6s6VwNWwFRUdtmzd2ReUut8n+sCPx7QCtQ7w5wfJhSgQ==
+  version "5.8.2"
+  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.8.2.tgz#15ddc779345cbb73e97c611cd00c01c1e7bf4d8b"
+  integrity sha512-F27oB3WuHDzvR2DOGNTaYy0D5o0cnrv8TeI482VM4kYgQd/FT9lUQwuNsJ0oOHtBUq7eiW5ytqzp7nBFknL+GA==
   dependencies:
     graceful-fs "^4.2.4"
     tapable "^2.2.0"
@@ -3722,15 +3521,15 @@ env-paths@^2.2.0:
   resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.1.tgz#420399d416ce1fbe9bc0a07c62fa68d67fd0f8f2"
   integrity sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==
 
-envinfo@^7.3.1:
+envinfo@^7.7.4:
   version "7.8.1"
   resolved "https://registry.yarnpkg.com/envinfo/-/envinfo-7.8.1.tgz#06377e3e5f4d379fea7ac592d5ad8927e0c4d475"
   integrity sha512-/o+BXHmB7ocbHEAs6F2EnG0ogybVVUdkRunTT2glZU9XAaGmhqskrvKwqXuDfNjEO0LZKWdejEEpnq8aM0tOaw==
 
-err-code@^1.0.0:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/err-code/-/err-code-1.1.2.tgz#06e0116d3028f6aef4806849eb0ea6a748ae6960"
-  integrity sha1-BuARbTAo9q70gGhJ6w6mp0iuaWA=
+err-code@^2.0.2:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/err-code/-/err-code-2.0.3.tgz#23c2f3b756ffdfc608d30e27c9a941024807e7f9"
+  integrity sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==
 
 error-ex@^1.2.0, error-ex@^1.3.1:
   version "1.3.2"
@@ -3740,9 +3539,9 @@ error-ex@^1.2.0, error-ex@^1.3.1:
     is-arrayish "^0.2.1"
 
 es-abstract@^1.18.0-next.2:
-  version "1.18.0"
-  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.0.tgz#ab80b359eecb7ede4c298000390bc5ac3ec7b5a4"
-  integrity sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==
+  version "1.18.2"
+  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.2.tgz#6eb518b640262e8ddcbd48e0bc8549f82efd48a7"
+  integrity sha512-byRiNIQXE6HWNySaU6JohoNXzYgbBjztwFnBLUTiJmWXjaU9bSq3urQLUlNLQ292tc+gc07zYZXNZjaOoAX3sw==
   dependencies:
     call-bind "^1.0.2"
     es-to-primitive "^1.2.1"
@@ -3752,14 +3551,14 @@ es-abstract@^1.18.0-next.2:
     has-symbols "^1.0.2"
     is-callable "^1.2.3"
     is-negative-zero "^2.0.1"
-    is-regex "^1.1.2"
-    is-string "^1.0.5"
-    object-inspect "^1.9.0"
+    is-regex "^1.1.3"
+    is-string "^1.0.6"
+    object-inspect "^1.10.3"
     object-keys "^1.1.1"
     object.assign "^4.1.2"
     string.prototype.trimend "^1.0.4"
     string.prototype.trimstart "^1.0.4"
-    unbox-primitive "^1.0.0"
+    unbox-primitive "^1.0.1"
 
 es-module-lexer@^0.4.0:
   version "0.4.1"
@@ -3793,18 +3592,6 @@ es6-iterator@^2.0.1, es6-iterator@^2.0.3, es6-iterator@~2.0.3:
     es5-ext "^0.10.35"
     es6-symbol "^3.1.1"
 
-es6-promise@^4.0.3:
-  version "4.2.8"
-  resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a"
-  integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==
-
-es6-promisify@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203"
-  integrity sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM=
-  dependencies:
-    es6-promise "^4.0.3"
-
 es6-symbol@^3.1.1, es6-symbol@~3.1.3:
   version "3.1.3"
   resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.3.tgz#bad5d3c1bcdac28269f4cb331e431c78ac705d18"
@@ -3838,6 +3625,11 @@ escape-string-regexp@^2.0.0:
   resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz#a30304e99daa32e23b2fd20f51babd07cffca344"
   integrity sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==
 
+escape-string-regexp@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
+  integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
+
 escodegen@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-2.0.0.tgz#5e32b12833e8aa8fa35e1bf0befa89380484c7dd"
@@ -3850,10 +3642,10 @@ escodegen@^2.0.0:
   optionalDependencies:
     source-map "~0.6.1"
 
-eslint-plugin-jest@^24.3.5:
-  version "24.3.5"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.3.5.tgz#71f0b580f87915695c286c3f0eb88cf23664d044"
-  integrity sha512-XG4rtxYDuJykuqhsOqokYIR84/C8pRihRtEpVskYLbIIKGwPNW2ySxdctuVzETZE+MbF/e7wmsnbNVpzM0rDug==
+eslint-plugin-jest@24.3.6:
+  version "24.3.6"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.3.6.tgz#5f0ca019183c3188c5ad3af8e80b41de6c8e9173"
+  integrity sha512-WOVH4TIaBLIeCX576rLcOgjNXqP+jNlCiEmRgFTfQtJ52DpwnIQKAVGlGPAN7CZ33bW6eNfHD6s8ZbEUTQubJg==
   dependencies:
     "@typescript-eslint/experimental-utils" "^4.0.1"
 
@@ -3878,29 +3670,31 @@ eslint-visitor-keys@^1.1.0, eslint-visitor-keys@^1.3.0:
   integrity sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==
 
 eslint-visitor-keys@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.0.0.tgz#21fdc8fbcd9c795cc0321f0563702095751511a8"
-  integrity sha512-QudtT6av5WXels9WjIM7qz1XD1cWGvX4gGXvp/zBn9nXG02D0utdU3Em2m/QjTnrsk6bBjmCygl3rmj118msQQ==
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz#f65328259305927392c938ed44eb0a5c9b2bd303"
+  integrity sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==
 
-eslint@^7.24.0:
-  version "7.24.0"
-  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.24.0.tgz#2e44fa62d93892bfdb100521f17345ba54b8513a"
-  integrity sha512-k9gaHeHiFmGCDQ2rEfvULlSLruz6tgfA8DEn+rY9/oYPFFTlz55mM/Q/Rij1b2Y42jwZiK3lXvNTw6w6TXzcKQ==
+eslint@7.27.0:
+  version "7.27.0"
+  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.27.0.tgz#665a1506d8f95655c9274d84bd78f7166b07e9c7"
+  integrity sha512-JZuR6La2ZF0UD384lcbnd0Cgg6QJjiCwhMD6eU4h/VGPcVGwawNNzKU41tgokGXnfjOOyI6QIffthhJTPzzuRA==
   dependencies:
     "@babel/code-frame" "7.12.11"
-    "@eslint/eslintrc" "^0.4.0"
+    "@eslint/eslintrc" "^0.4.1"
     ajv "^6.10.0"
     chalk "^4.0.0"
     cross-spawn "^7.0.2"
     debug "^4.0.1"
     doctrine "^3.0.0"
     enquirer "^2.3.5"
+    escape-string-regexp "^4.0.0"
     eslint-scope "^5.1.1"
     eslint-utils "^2.1.0"
     eslint-visitor-keys "^2.0.0"
     espree "^7.3.1"
     esquery "^1.4.0"
     esutils "^2.0.2"
+    fast-deep-equal "^3.1.3"
     file-entry-cache "^6.0.1"
     functional-red-black-tree "^1.0.1"
     glob-parent "^5.0.0"
@@ -3912,7 +3706,7 @@ eslint@^7.24.0:
     js-yaml "^3.13.1"
     json-stable-stringify-without-jsonify "^1.0.1"
     levn "^0.4.1"
-    lodash "^4.17.21"
+    lodash.merge "^4.6.2"
     minimatch "^3.0.4"
     natural-compare "^1.4.0"
     optionator "^0.9.1"
@@ -3921,7 +3715,7 @@ eslint@^7.24.0:
     semver "^7.2.1"
     strip-ansi "^6.0.0"
     strip-json-comments "^3.1.0"
-    table "^6.0.4"
+    table "^6.0.9"
     text-table "^0.2.0"
     v8-compile-cache "^2.0.3"
 
@@ -3980,47 +3774,29 @@ event-emitter@^0.3.5:
     d "1"
     es5-ext "~0.10.14"
 
-eventemitter3@^3.1.0:
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-3.1.2.tgz#2d3d48f9c346698fce83a85d7d664e98535df6e7"
-  integrity sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==
+eventemitter3@^4.0.4:
+  version "4.0.7"
+  resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
+  integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
 
 events@^3.2.0:
   version "3.3.0"
   resolved "https://registry.yarnpkg.com/events/-/events-3.3.0.tgz#31a95ad0a924e2d2c419a813aeb2c4e878ea7400"
   integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==
 
-exec-sh@^0.3.2:
-  version "0.3.6"
-  resolved "https://registry.yarnpkg.com/exec-sh/-/exec-sh-0.3.6.tgz#ff264f9e325519a60cb5e273692943483cca63bc"
-  integrity sha512-nQn+hI3yp+oD0huYhKwvYI32+JFeq+XkNcD1GAo3Y/MjxsfVGmrrzrnzjWiNY6f+pUCP440fThsFh5gZrRAU/w==
-
-execa@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8"
-  integrity sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==
-  dependencies:
-    cross-spawn "^6.0.0"
-    get-stream "^4.0.0"
-    is-stream "^1.1.0"
-    npm-run-path "^2.0.0"
-    p-finally "^1.0.0"
-    signal-exit "^3.0.0"
-    strip-eof "^1.0.0"
-
-execa@^4.0.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a"
-  integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA==
+execa@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/execa/-/execa-5.0.0.tgz#4029b0007998a841fbd1032e5f4de86a3c1e3376"
+  integrity sha512-ov6w/2LCiuyO4RLYGdpFGjkcs0wMTgGE8PrkTHikeUy5iJekXyPIKUjifk5CsE0pt7sMCrMZ3YNqoCj6idQOnQ==
   dependencies:
-    cross-spawn "^7.0.0"
-    get-stream "^5.0.0"
-    human-signals "^1.1.1"
+    cross-spawn "^7.0.3"
+    get-stream "^6.0.0"
+    human-signals "^2.1.0"
     is-stream "^2.0.0"
     merge-stream "^2.0.0"
-    npm-run-path "^4.0.0"
-    onetime "^5.1.0"
-    signal-exit "^3.0.2"
+    npm-run-path "^4.0.1"
+    onetime "^5.1.2"
+    signal-exit "^3.0.3"
     strip-final-newline "^2.0.0"
 
 exit@^0.1.2:
@@ -4048,17 +3824,17 @@ expand-tilde@^2.0.0, expand-tilde@^2.0.2:
   dependencies:
     homedir-polyfill "^1.0.1"
 
-expect@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/expect/-/expect-26.6.2.tgz#c6b996bf26bf3fe18b67b2d0f51fc981ba934417"
-  integrity sha512-9/hlOBkQl2l/PLHJx6JjoDF6xPKcJEsUlWKb23rKE7KzeDqUZKXKNMW27KIue5JMdBV9HgmoJPcc8HtO85t9IA==
+expect@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/expect/-/expect-27.0.1.tgz#1290c74fef8d62f15f4c5dd1d7233001909abbfb"
+  integrity sha512-hjKwLeAvKUiq0Plha1dmzOH1FGEwJC9njbT993cq4PK9r58/+3NM+WDqFVGcPuRH7XTjmbIeHQBzp2faDrPhjQ==
   dependencies:
-    "@jest/types" "^26.6.2"
-    ansi-styles "^4.0.0"
-    jest-get-type "^26.3.0"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-regex-util "^26.0.0"
+    "@jest/types" "^27.0.1"
+    ansi-styles "^5.0.0"
+    jest-get-type "^27.0.1"
+    jest-matcher-utils "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-regex-util "^27.0.1"
 
 ext@^1.1.2:
   version "1.4.0"
@@ -4130,16 +3906,11 @@ fancy-log@^1.3.2:
     parse-node-version "^1.0.0"
     time-stamp "^1.0.0"
 
-fast-deep-equal@^3.1.1:
+fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3:
   version "3.1.3"
   resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525"
   integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==
 
-fast-extend@0.0.2:
-  version "0.0.2"
-  resolved "https://registry.yarnpkg.com/fast-extend/-/fast-extend-0.0.2.tgz#f5ec42cf40b9460f521a6387dfb52deeed671dbd"
-  integrity sha1-9exCz0C5Rg9SGmOH37Ut7u1nHb0=
-
 fast-glob@^2.2.6:
   version "2.2.7"
   resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-2.2.7.tgz#6953857c3afa475fff92ee6015d52da70a4cd39d"
@@ -4193,15 +3964,10 @@ fb-watchman@^2.0.0:
   dependencies:
     bser "2.1.1"
 
-figgy-pudding@^3.4.1, figgy-pudding@^3.5.1:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/figgy-pudding/-/figgy-pudding-3.5.2.tgz#b4eee8148abb01dcf1d1ac34367d59e12fa61d6e"
-  integrity sha512-0btnI/H8f2pavGMN8w40mlSKOfTK2SVJmBfBeVIj3kNw0swwgzyRq0d5TJVOwodFmtvpPeWPN/MCcfuWF0Ezbw==
-
-figures@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/figures/-/figures-2.0.0.tgz#3ab1a2d2a62c8bfb431a0c94cb797a2fce27c962"
-  integrity sha1-OrGi0qYsi/tDGgyUy3l6L84nyWI=
+figures@^3.0.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af"
+  integrity sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg==
   dependencies:
     escape-string-regexp "^1.0.5"
 
@@ -4261,13 +4027,6 @@ find-up@^2.0.0:
   dependencies:
     locate-path "^2.0.0"
 
-find-up@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/find-up/-/find-up-3.0.0.tgz#49169f1d7993430646da61ecc5ae355c21c97b73"
-  integrity sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==
-  dependencies:
-    locate-path "^3.0.0"
-
 find-up@^4.0.0, find-up@^4.1.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/find-up/-/find-up-4.1.0.tgz#97afe7d6cdc0bc5928584b7c8d7b16e8a9aa5d19"
@@ -4330,7 +4089,7 @@ flatted@^3.1.0:
   resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.1.tgz#c4b489e80096d9df1dfc97c79871aea7c617c469"
   integrity sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==
 
-flush-write-stream@^1.0.0, flush-write-stream@^1.0.2:
+flush-write-stream@^1.0.2:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
   integrity sha512-3Z4XhFZ3992uIq0XOqb9AreonueSYphE6oYbpt5+3u06JWklbsPkNv3ZKkP9Bz/r+1MWCaMoSQ28P85+1Yc77w==
@@ -4355,6 +4114,15 @@ forever-agent@~0.6.1:
   resolved "https://registry.yarnpkg.com/forever-agent/-/forever-agent-0.6.1.tgz#fbc71f0c41adeb37f96c577ad1ed42d8fdacca91"
   integrity sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=
 
+form-data@^3.0.0:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f"
+  integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==
+  dependencies:
+    asynckit "^0.4.0"
+    combined-stream "^1.0.8"
+    mime-types "^2.1.12"
+
 form-data@~2.3.2:
   version "2.3.3"
   resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.3.3.tgz#dcce52c05f644f298c6a7ab936bd724ceffbf3a6"
@@ -4371,23 +4139,6 @@ fragment-cache@^0.2.1:
   dependencies:
     map-cache "^0.2.2"
 
-from2@^2.1.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/from2/-/from2-2.3.0.tgz#8bfb5502bde4a4d36cfdeea007fcca21d7e382af"
-  integrity sha1-i/tVAr3kpNNs/e6gB/zKIdfjgq8=
-  dependencies:
-    inherits "^2.0.1"
-    readable-stream "^2.0.0"
-
-fs-extra@^8.1.0:
-  version "8.1.0"
-  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-8.1.0.tgz#49d43c45a88cd9677668cb7be1b46efdb8d2e1c0"
-  integrity sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==
-  dependencies:
-    graceful-fs "^4.2.0"
-    jsonfile "^4.0.0"
-    universalify "^0.1.0"
-
 fs-extra@^9.0.1, fs-extra@^9.1.0:
   version "9.1.0"
   resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
@@ -4405,6 +4156,13 @@ fs-minipass@^1.2.5:
   dependencies:
     minipass "^2.6.0"
 
+fs-minipass@^2.0.0, fs-minipass@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
+  integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==
+  dependencies:
+    minipass "^3.0.0"
+
 fs-mkdirp-stream@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/fs-mkdirp-stream/-/fs-mkdirp-stream-1.0.0.tgz#0b7815fc3201c6a69e14db98ce098c16935259eb"
@@ -4413,20 +4171,10 @@ fs-mkdirp-stream@^1.0.0:
     graceful-fs "^4.1.11"
     through2 "^2.0.3"
 
-fs-monkey@^0.3.3:
-  version "0.3.3"
-  resolved "https://registry.yarnpkg.com/fs-monkey/-/fs-monkey-0.3.3.tgz#7960bb2b1fa2653731b9d0e2e84812a7e8b3664a"
-  integrity sha512-FNUvuTAJ3CqCQb5ELn+qCbGR/Zllhf2HtwsdAtBi59s1WeCjKMT81fHcSu7dwIskqGVK+MmOrb7VOBlq3/SItw==
-
-fs-write-stream-atomic@^1.0.8:
-  version "1.0.10"
-  resolved "https://registry.yarnpkg.com/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz#b47df53493ef911df75731e70a9ded0189db40c9"
-  integrity sha1-tH31NJPvkR33VzHnCp3tAYnbQMk=
-  dependencies:
-    graceful-fs "^4.1.2"
-    iferr "^0.1.5"
-    imurmurhash "^0.1.4"
-    readable-stream "1 || 2"
+fs-monkey@1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/fs-monkey/-/fs-monkey-1.0.3.tgz#ae3ac92d53bb328efe0e9a1d9541f6ad8d48e2d3"
+  integrity sha512-cybjIfiiE+pTWicSCLFHSrXZ6EilF30oh91FDP9S2B051prEa7QWfrVTQm10/dDpswBDXZugPa1Ogu8Yh+HV0Q==
 
 fs.realpath@^1.0.0:
   version "1.0.0"
@@ -4441,7 +4189,7 @@ fsevents@^1.2.7:
     bindings "^1.5.0"
     nan "^2.12.1"
 
-fsevents@^2.1.2:
+fsevents@^2.3.2:
   version "2.3.2"
   resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
   integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
@@ -4470,11 +4218,6 @@ gauge@~2.7.3:
     strip-ansi "^3.0.1"
     wide-align "^1.1.0"
 
-genfun@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/genfun/-/genfun-5.0.0.tgz#9dd9710a06900a5c4a5bf57aca5da4e52fe76537"
-  integrity sha512-KGDOARWVga7+rnB3z9Sd2Letx515owfk0hSxHGuqjANb1M+x2bGZGqHLiozPsYMdM2OubeMni/Hpwmjq6qIUhA==
-
 gensync@^1.0.0-beta.2:
   version "1.0.0-beta.2"
   resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0"
@@ -4485,7 +4228,7 @@ get-caller-file@^1.0.1:
   resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-1.0.3.tgz#f978fa4c90d1dfe7ff2d6beda2a515e713bdcf4a"
   integrity sha512-3t6rVToeoZfYSGd8YoLFR2DJkiQrIiUrGcjvFX2mDw3bn6k2OtwHN0TNCLbBO+w8qTvimhDkv+LSscbJY1vE6w==
 
-get-caller-file@^2.0.1:
+get-caller-file@^2.0.5:
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-2.0.5.tgz#4f94412a82db32f36e3b0b9741f8a97feb031f7e"
   integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==
@@ -4515,29 +4258,20 @@ get-pkg-repo@^1.0.0:
     parse-github-repo-url "^1.3.0"
     through2 "^2.0.0"
 
-get-port@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/get-port/-/get-port-4.2.0.tgz#e37368b1e863b7629c43c5a323625f95cf24b119"
-  integrity sha512-/b3jarXkH8KJoOMQc3uVGHASwGLPq3gSFJ7tgJm2diza+bydJPTGOibin2steecKeOylE8oY2JERlVWkAJO6yw==
+get-port@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/get-port/-/get-port-5.1.1.tgz#0469ed07563479de6efb986baf053dcd7d4e3193"
+  integrity sha512-g/Q1aTSDOxFpchXC4i8ZWvxA1lnPqx/JHqcpIw0/LX9T8x/GBbi6YnlN5nhaKIFkT8oFsscUKgDJYxfwfS6QsQ==
 
 get-stdin@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/get-stdin/-/get-stdin-4.0.1.tgz#b968c6b0a04384324902e8bf1a5df32579a450fe"
   integrity sha1-uWjGsKBDhDJJAui/Gl3zJXmkUP4=
 
-get-stream@^4.0.0, get-stream@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-4.1.0.tgz#c1b255575f3dc21d59bfc79cd3d2b46b1c3a54b5"
-  integrity sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==
-  dependencies:
-    pump "^3.0.0"
-
-get-stream@^5.0.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3"
-  integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==
-  dependencies:
-    pump "^3.0.0"
+get-stream@^6.0.0:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7"
+  integrity sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==
 
 get-value@^2.0.3, get-value@^2.0.6:
   version "2.0.6"
@@ -4551,16 +4285,16 @@ getpass@^0.1.1:
   dependencies:
     assert-plus "^1.0.0"
 
-git-raw-commits@2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/git-raw-commits/-/git-raw-commits-2.0.0.tgz#d92addf74440c14bcc5c83ecce3fb7f8a79118b5"
-  integrity sha512-w4jFEJFgKXMQJ0H0ikBk2S+4KP2VEjhCvLCNqbNRQC8BgGWgLKNCO7a9K9LI+TVT7Gfoloje502sEnctibffgg==
+git-raw-commits@^2.0.8:
+  version "2.0.10"
+  resolved "https://registry.yarnpkg.com/git-raw-commits/-/git-raw-commits-2.0.10.tgz#e2255ed9563b1c9c3ea6bd05806410290297bbc1"
+  integrity sha512-sHhX5lsbG9SOO6yXdlwgEMQ/ljIn7qMpAbJZCGfXX2fq5T8M5SrDnpYk9/4HswTildcIqatsWa91vty6VhWSaQ==
   dependencies:
-    dargs "^4.0.1"
-    lodash.template "^4.0.2"
-    meow "^4.0.0"
-    split2 "^2.0.0"
-    through2 "^2.0.0"
+    dargs "^7.0.0"
+    lodash "^4.17.15"
+    meow "^8.0.0"
+    split2 "^3.0.0"
+    through2 "^4.0.0"
 
 git-remote-origin-url@^2.0.0:
   version "2.0.0"
@@ -4570,12 +4304,12 @@ git-remote-origin-url@^2.0.0:
     gitconfiglocal "^1.0.0"
     pify "^2.3.0"
 
-git-semver-tags@^2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/git-semver-tags/-/git-semver-tags-2.0.3.tgz#48988a718acf593800f99622a952a77c405bfa34"
-  integrity sha512-tj4FD4ww2RX2ae//jSrXZzrocla9db5h0V7ikPl1P/WwoZar9epdUhwR7XHXSgc+ZkNq72BEEerqQuicoEQfzA==
+git-semver-tags@^4.1.1:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/git-semver-tags/-/git-semver-tags-4.1.1.tgz#63191bcd809b0ec3e151ba4751c16c444e5b5780"
+  integrity sha512-OWyMt5zBe7xFs8vglMmhM9lRQzCWL3WjHtxNNfJTMngGym7pC1kh8sP6jevfydJ6LP3ZvGxfb6ABYgPUM0mtsA==
   dependencies:
-    meow "^4.0.0"
+    meow "^8.0.0"
     semver "^6.0.0"
 
 git-up@^4.0.0:
@@ -4586,7 +4320,7 @@ git-up@^4.0.0:
     is-ssh "^1.3.0"
     parse-url "^5.0.0"
 
-git-url-parse@^11.1.2:
+git-url-parse@^11.4.4:
   version "11.4.4"
   resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-11.4.4.tgz#5d747debc2469c17bc385719f7d0427802d83d77"
   integrity sha512-Y4o9o7vQngQDIU9IjyCmRJBin5iYjI5u9ZITnddRZpD7dcCFQj2sL2XuMNbLRE4b4B/4ENPsp2Q8P44fjAZ0Pw==
@@ -4608,7 +4342,7 @@ glob-parent@^3.1.0:
     is-glob "^3.1.0"
     path-dirname "^1.0.0"
 
-glob-parent@^5.0.0, glob-parent@^5.1.0:
+glob-parent@^5.0.0, glob-parent@^5.1.0, glob-parent@^5.1.1:
   version "5.1.2"
   resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4"
   integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@@ -4654,22 +4388,10 @@ glob-watcher@^5.0.3:
     normalize-path "^3.0.0"
     object.defaults "^1.1.0"
 
-glob@7.1.4:
-  version "7.1.4"
-  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.4.tgz#aa608a2f6c577ad357e1ae5a5c26d9a8d1969255"
-  integrity sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A==
-  dependencies:
-    fs.realpath "^1.0.0"
-    inflight "^1.0.4"
-    inherits "2"
-    minimatch "^3.0.4"
-    once "^1.3.0"
-    path-is-absolute "^1.0.0"
-
-glob@^7.0.0, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4:
-  version "7.1.6"
-  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
-  integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
+glob@7.1.7, glob@^7.0.0, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6:
+  version "7.1.7"
+  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.7.tgz#3b193e9233f01d42d0b3f78294bbeeb418f94a90"
+  integrity sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==
   dependencies:
     fs.realpath "^1.0.0"
     inflight "^1.0.4"
@@ -4731,7 +4453,7 @@ globby@^10.0.1:
     merge2 "^1.2.3"
     slash "^3.0.0"
 
-globby@^11.0.1:
+globby@^11.0.1, globby@^11.0.2:
   version "11.0.3"
   resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.3.tgz#9b1f0cb523e171dd1ad8c7b2a9fb4b644b9593cb"
   integrity sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==
@@ -4799,16 +4521,11 @@ google-closure-compiler@20210505.0.0:
     google-closure-compiler-osx "^20210505.0.0"
     google-closure-compiler-windows "^20210505.0.0"
 
-graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.4:
+graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.3, graceful-fs@^4.2.4:
   version "4.2.6"
   resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee"
   integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==
 
-growly@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/growly/-/growly-1.3.0.tgz#f10748cbe76af964b7c96c93c6bcc28af120c081"
-  integrity sha1-8QdIy+dq+WS3yWyTxrzCivEgwIE=
-
 gulp-cli@^2.2.0:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/gulp-cli/-/gulp-cli-2.3.0.tgz#ec0d380e29e52aa45e47977f0d32e18fd161122f"
@@ -5003,7 +4720,7 @@ homedir-polyfill@^1.0.1:
   dependencies:
     parse-passwd "^1.0.0"
 
-hosted-git-info@^2.1.4, hosted-git-info@^2.7.1:
+hosted-git-info@^2.1.4:
   version "2.8.9"
   resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9"
   integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==
@@ -5027,18 +4744,19 @@ html-escaper@^2.0.0:
   resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
   integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
 
-http-cache-semantics@^3.8.1:
-  version "3.8.1"
-  resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-3.8.1.tgz#39b0e16add9b605bf0a9ef3d9daaf4843b4cacd2"
-  integrity sha512-5ai2iksyV8ZXmnZhHH4rWPoxxistEexSi5936zIQ1bnNTW5VnA85B6P/VpXiRM017IgRvb2kKo1a//y+0wSp3w==
+http-cache-semantics@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390"
+  integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==
 
-http-proxy-agent@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-2.1.0.tgz#e4821beef5b2142a2026bd73926fe537631c5405"
-  integrity sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==
+http-proxy-agent@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz#8a8c8ef7f5932ccf953c296ca8291b95aa74aa3a"
+  integrity sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==
   dependencies:
-    agent-base "4"
-    debug "3.1.0"
+    "@tootallnate/once" "1"
+    agent-base "6"
+    debug "4"
 
 http-signature@~1.2.0:
   version "1.2.0"
@@ -5049,18 +4767,18 @@ http-signature@~1.2.0:
     jsprim "^1.2.2"
     sshpk "^1.7.0"
 
-https-proxy-agent@^2.2.3:
-  version "2.2.4"
-  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b"
-  integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==
+https-proxy-agent@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2"
+  integrity sha512-EkYm5BcKUGiduxzSt3Eppko+PiNWNEpa4ySk9vTC6wDsQJW9rHSa+UhGNJoRYp7bz6Ht1eaRIa6QaJqO5rCFbA==
   dependencies:
-    agent-base "^4.3.0"
-    debug "^3.1.0"
+    agent-base "6"
+    debug "4"
 
-human-signals@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
-  integrity sha512-SEQu7vl8KjNL2eoGBLF3+wAjpsNfA9XMlXAYj/3EdaNfAlxKthD1xjEQfGOUhllCGGJVNY34bRr6lPINhNjyZw==
+human-signals@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0"
+  integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==
 
 humanize-ms@^1.2.1:
   version "1.2.1"
@@ -5077,21 +4795,16 @@ iconv-lite@0.4.24, iconv-lite@^0.4.24:
     safer-buffer ">= 2.1.2 < 3"
 
 iconv-lite@^0.6.2:
-  version "0.6.2"
-  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.2.tgz#ce13d1875b0c3a674bd6a04b7f76b01b1b6ded01"
-  integrity sha512-2y91h5OpQlolefMPmUlivelittSWy0rP+oYVpn6A7GwVHNE8AWzoYOBNmlwks3LobaJxgHCYZAnyNo2GgpNRNQ==
+  version "0.6.3"
+  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501"
+  integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==
   dependencies:
     safer-buffer ">= 2.1.2 < 3.0.0"
 
-iferr@^0.1.5:
-  version "0.1.5"
-  resolved "https://registry.yarnpkg.com/iferr/-/iferr-0.1.5.tgz#c60eed69e6d8fdb6b3104a1fcbca1c192dc5b501"
-  integrity sha1-xg7taebY/bazEEofy8ocGS3FtQE=
-
-ignore-walk@^3.0.1:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.3.tgz#017e2447184bfeade7c238e4aefdd1e8f95b1e37"
-  integrity sha512-m7o6xuOaT1aqheYHKf8W6J5pYH85ZI9w077erOzLje3JsB1gkafkAhHHY19dqjulgIZHFm32Cp5uNZgcQqdJKw==
+ignore-walk@^3.0.3:
+  version "3.0.4"
+  resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.4.tgz#c9a09f69b7c7b479a5d74ac1a3c0d4236d2a6335"
+  integrity sha512-PY6Ii8o1jMRA1z4F2hRkH/xN59ox43DavKvD3oDpfurRlOJyAHpifIwpbdv1n4jt4ov0jSpw3kQ4GhJnpBL6WQ==
   dependencies:
     minimatch "^3.0.4"
 
@@ -5105,14 +4818,6 @@ ignore@^5.1.1, ignore@^5.1.4:
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57"
   integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==
 
-import-fresh@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-2.0.0.tgz#d81355c15612d386c61f9ddd3922d4304822a546"
-  integrity sha1-2BNVwVYS04bGH53dOSLUMEgipUY=
-  dependencies:
-    caller-path "^2.0.0"
-    resolve-from "^3.0.0"
-
 import-fresh@^3.0.0, import-fresh@^3.2.1:
   version "3.3.0"
   resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b"
@@ -5121,14 +4826,6 @@ import-fresh@^3.0.0, import-fresh@^3.2.1:
     parent-module "^1.0.0"
     resolve-from "^4.0.0"
 
-import-local@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/import-local/-/import-local-2.0.0.tgz#55070be38a5993cf18ef6db7e961f5bee5c5a09d"
-  integrity sha512-b6s04m3O+s3CGSbqDIyP4R6aAwAeYlVq9+WUWep6iHa8ETRf9yei1U48C5MmfJmV9AiLYYBKPMq/W+/WRpQmCQ==
-  dependencies:
-    pkg-dir "^3.0.0"
-    resolve-cwd "^2.0.0"
-
 import-local@^3.0.2:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/import-local/-/import-local-3.0.2.tgz#a8cfd0431d1de4a2199703d003e3e62364fa6db6"
@@ -5149,17 +4846,12 @@ indent-string@^2.1.0:
   dependencies:
     repeating "^2.0.0"
 
-indent-string@^3.0.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-3.2.0.tgz#4a5fd6d27cc332f37e5419a504dbb837105c9289"
-  integrity sha1-Sl/W0nzDMvN+VBmlBNu4NxBckok=
-
 indent-string@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
   integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
 
-infer-owner@^1.0.3, infer-owner@^1.0.4:
+infer-owner@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/infer-owner/-/infer-owner-1.0.4.tgz#c4cefcaa8e51051c2a40ba2ce8a3d27295af9467"
   integrity sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==
@@ -5182,37 +4874,37 @@ ini@^1.3.2, ini@^1.3.4:
   resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
   integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==
 
-init-package-json@^1.10.3:
-  version "1.10.3"
-  resolved "https://registry.yarnpkg.com/init-package-json/-/init-package-json-1.10.3.tgz#45ffe2f610a8ca134f2bd1db5637b235070f6cbe"
-  integrity sha512-zKSiXKhQveNteyhcj1CoOP8tqp1QuxPIPBl8Bid99DGLFqA1p87M6lNgfjJHSBoWJJlidGOv5rWjyYKEB3g2Jw==
+init-package-json@^2.0.2:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/init-package-json/-/init-package-json-2.0.3.tgz#c8ae4f2a4ad353bcbc089e5ffe98a8f1a314e8fd"
+  integrity sha512-tk/gAgbMMxR6fn1MgMaM1HpU1ryAmBWWitnxG5OhuNXeX0cbpbgV5jA4AIpQJVNoyOfOevTtO6WX+rPs+EFqaQ==
   dependencies:
     glob "^7.1.1"
-    npm-package-arg "^4.0.0 || ^5.0.0 || ^6.0.0"
+    npm-package-arg "^8.1.2"
     promzard "^0.3.0"
     read "~1.0.1"
-    read-package-json "1 || 2"
-    semver "2.x || 3.x || 4 || 5"
-    validate-npm-package-license "^3.0.1"
+    read-package-json "^3.0.1"
+    semver "^7.3.5"
+    validate-npm-package-license "^3.0.4"
     validate-npm-package-name "^3.0.0"
 
-inquirer@^6.2.0:
-  version "6.5.2"
-  resolved "https://registry.yarnpkg.com/inquirer/-/inquirer-6.5.2.tgz#ad50942375d036d327ff528c08bd5fab089928ca"
-  integrity sha512-cntlB5ghuB0iuO65Ovoi8ogLHiWGs/5yNrtUcKjFhSSiVeAIVpD7koaSU9RM8mpXw5YDi9RdYXGQMaOURB7ycQ==
+inquirer@^7.3.3:
+  version "7.3.3"
+  resolved "https://registry.yarnpkg.com/inquirer/-/inquirer-7.3.3.tgz#04d176b2af04afc157a83fd7c100e98ee0aad003"
+  integrity sha512-JG3eIAj5V9CwcGvuOmoo6LB9kbAYT8HXffUl6memuszlwDC/qvFAJw49XJ5NROSFNPxp3iQg1GqkFhaY/CR0IA==
   dependencies:
-    ansi-escapes "^3.2.0"
-    chalk "^2.4.2"
-    cli-cursor "^2.1.0"
-    cli-width "^2.0.0"
+    ansi-escapes "^4.2.1"
+    chalk "^4.1.0"
+    cli-cursor "^3.1.0"
+    cli-width "^3.0.0"
     external-editor "^3.0.3"
-    figures "^2.0.0"
-    lodash "^4.17.12"
-    mute-stream "0.0.7"
-    run-async "^2.2.0"
-    rxjs "^6.4.0"
-    string-width "^2.1.0"
-    strip-ansi "^5.1.0"
+    figures "^3.0.0"
+    lodash "^4.17.19"
+    mute-stream "0.0.8"
+    run-async "^2.4.0"
+    rxjs "^6.6.0"
+    string-width "^4.1.0"
+    strip-ansi "^6.0.0"
     through "^2.3.6"
 
 interpret@^1.0.0, interpret@^1.4.0:
@@ -5225,7 +4917,7 @@ invert-kv@^1.0.0:
   resolved "https://registry.yarnpkg.com/invert-kv/-/invert-kv-1.0.0.tgz#104a8e4aaca6d3d8cd157a8ef8bfab2d7a3ffdb6"
   integrity sha1-EEqOSqym09jNFXqO+L+rLXo//bY=
 
-ip@1.1.5:
+ip@^1.1.5:
   version "1.1.5"
   resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.5.tgz#bdded70114290828c0a039e72ef25f5aaec4354a"
   integrity sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=
@@ -5258,9 +4950,9 @@ is-arrayish@^0.2.1:
   integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=
 
 is-bigint@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.1.tgz#6923051dfcbc764278540b9ce0e6b3213aa5ebc2"
-  integrity sha512-J0ELF4yHFxHy0cmSxZuheDOz2luOdVvqjwmEcj8H/L1JHeuEDSDbeRP+Dk9kFVk5RTFzbucJ2Kb9F7ixY2QaCg==
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.2.tgz#ffb381442503235ad245ea89e45b3dbff040ee5a"
+  integrity sha512-0JV5+SOCQkIdzjBK9buARcV804Ddu7A0Qet6sHi3FimE9ne6m4BGQZfRn+NZiXbBk4F4XmHfDZIipLj9pX8dSA==
 
 is-binary-path@^1.0.0:
   version "1.0.1"
@@ -5270,11 +4962,11 @@ is-binary-path@^1.0.0:
     binary-extensions "^1.0.0"
 
 is-boolean-object@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.0.tgz#e2aaad3a3a8fca34c28f6eee135b156ed2587ff0"
-  integrity sha512-a7Uprx8UtD+HWdyYwnD1+ExtTgqQtD2k/1yJgtXP6wnMm8byhkoTZRl+95LLThpzNZJ5aEvi46cdH+ayMFRwmA==
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.1.tgz#3c0878f035cb821228d350d2e1e36719716a3de8"
+  integrity sha512-bXdQWkECBUIAcCkeH1unwJLIpZYaa5VvuygSyS/c2lf719mTKZDU5UdDRlpd01UjADgmW8RfqaP+mRaVPdr/Ng==
   dependencies:
-    call-bind "^1.0.0"
+    call-bind "^1.0.2"
 
 is-buffer@^1.1.5:
   version "1.1.6"
@@ -5293,10 +4985,17 @@ is-ci@^2.0.0:
   dependencies:
     ci-info "^2.0.0"
 
+is-ci@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/is-ci/-/is-ci-3.0.0.tgz#c7e7be3c9d8eef7d0fa144390bd1e4b88dc4c994"
+  integrity sha512-kDXyttuLeslKAHYL/K28F2YkM3x5jvFPEw3yXbRptXydjD9rpLEz+C5K5iutY9ZiUu6AP41JdvRQwF4Iqs4ZCQ==
+  dependencies:
+    ci-info "^3.1.1"
+
 is-core-module@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.2.0.tgz#97037ef3d52224d85163f5597b2b63d9afed981a"
-  integrity sha512-XRAfAdyyY5F5cOXn7hYQDqh2Xmii+DEfIcQGxK/uNwMHhIkPWO0g8msXcbzLe+MpGoR951MlqM/2iIlU4vKDdQ==
+  version "2.4.0"
+  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.4.0.tgz#8e9fc8e15027b011418026e98f0e6f4d86305cc1"
+  integrity sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==
   dependencies:
     has "^1.0.3"
 
@@ -5315,9 +5014,9 @@ is-data-descriptor@^1.0.0:
     kind-of "^6.0.0"
 
 is-date-object@^1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.2.tgz#bda736f2cd8fd06d32844e7743bfa7494c3bfd7e"
-  integrity sha512-USlDT524woQ08aoZFzh3/Z6ch9Y/EWXEHQ/AaRN0SkKq4t2Jw2R2339tSXmwuVoY7LLlBCbOIlx2myP/L5zk0g==
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.4.tgz#550cfcc03afada05eea3dd30981c7b09551f73e5"
+  integrity sha512-/b4ZVsG7Z5XVtIxs/h9W8nvfLgSAyKYdtGWQLbqy6jA1icmgjf8WCoTKgeS4wy5tYaPePouzFMANbnj94c2Z+A==
 
 is-descriptor@^0.1.0:
   version "0.1.6"
@@ -5337,16 +5036,6 @@ is-descriptor@^1.0.0, is-descriptor@^1.0.2:
     is-data-descriptor "^1.0.0"
     kind-of "^6.0.2"
 
-is-directory@^0.3.1:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/is-directory/-/is-directory-0.3.1.tgz#61339b6f2475fc772fd9c9d83f5c8575dc154ae1"
-  integrity sha1-YTObbyR1/Hcv2cnYP1yFddwVSuE=
-
-is-docker@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.0.tgz#b037c8815281edaad6c2562648a5f5f18839d5f7"
-  integrity sha512-K4GwB4i/HzhAzwP/XSlspzRdFTI9N8OxJOyOU7Y5Rz+p+WBokXWVWblaJeBkggthmoSV0OoGTH5thJNvplpkvQ==
-
 is-extendable@^0.1.0, is-extendable@^0.1.1:
   version "0.1.1"
   resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89"
@@ -5405,6 +5094,11 @@ is-glob@^4.0.0, is-glob@^4.0.1:
   dependencies:
     is-extglob "^2.1.1"
 
+is-lambda@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/is-lambda/-/is-lambda-1.0.1.tgz#3d9877899e6a53efc0160504cde15f82e6f061d5"
+  integrity sha1-PZh3iZ5qU+/AFgUEzeFfgubwYdU=
+
 is-negated-glob@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/is-negated-glob/-/is-negated-glob-1.0.0.tgz#6910bca5da8c95e784b5751b976cf5a10fee36d2"
@@ -5416,9 +5110,9 @@ is-negative-zero@^2.0.1:
   integrity sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==
 
 is-number-object@^1.0.4:
-  version "1.0.4"
-  resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.4.tgz#36ac95e741cf18b283fc1ddf5e83da798e3ec197"
-  integrity sha512-zohwelOAur+5uXtk8O3GPQ1eAcu4ZX3UwxQhUlfFFMNpUd83gXgjbhJh6HmB6LUNV/ieOLQuDwJO3dWJosUeMw==
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.5.tgz#6edfaeed7950cff19afedce9fbfca9ee6dd289eb"
+  integrity sha512-RU0lI/n95pMoUKu9v1BZP5MBcZuNSVJkMkAG2dJqC4z2GlkGUNeH68SuHuBKBD/XFe+LHZ+f9BKkLET60Niedw==
 
 is-number@^3.0.0:
   version "3.0.0"
@@ -5437,11 +5131,6 @@ is-number@^7.0.0:
   resolved "https://registry.yarnpkg.com/is-number/-/is-number-7.0.0.tgz#7535345b896734d5f80c4d06c50955527a14f12b"
   integrity sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==
 
-is-obj@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-1.0.1.tgz#3e4729ac1f5fde025cd7d83a896dab9f4f67db0f"
-  integrity sha1-PkcprB9f3gJc19g6iW2rn09n2w8=
-
 is-obj@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-2.0.0.tgz#473fb05d973705e3fd9620545018ca8e22ef4982"
@@ -5462,6 +5151,11 @@ is-plain-obj@^1.0.0, is-plain-obj@^1.1.0:
   resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e"
   integrity sha1-caUMhCnfync8kqOQpKA7OfzVHT4=
 
+is-plain-obj@^2.0.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287"
+  integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==
+
 is-plain-object@^2.0.1, is-plain-object@^2.0.3, is-plain-object@^2.0.4:
   version "2.0.4"
   resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
@@ -5474,7 +5168,7 @@ is-plain-object@^5.0.0:
   resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344"
   integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==
 
-is-potential-custom-element-name@^1.0.0:
+is-potential-custom-element-name@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz#171ed6f19e3ac554394edf78caa05784a45bebb5"
   integrity sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==
@@ -5484,13 +5178,13 @@ is-promise@^2.2.2:
   resolved "https://registry.yarnpkg.com/is-promise/-/is-promise-2.2.2.tgz#39ab959ccbf9a774cf079f7b40c7a26f763135f1"
   integrity sha512-+lP4/6lKUBfQjZ2pdxThZvLUAafmZb8OAxFb8XXtiQmS35INgr85hdOGoEs124ez1FCnZJt6jau/T+alh58QFQ==
 
-is-regex@^1.1.2:
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.2.tgz#81c8ebde4db142f2cf1c53fc86d6a45788266251"
-  integrity sha512-axvdhb5pdhEVThqJzYXwMlVuZwC+FF2DpcOhTS+y/8jVq4trxyPgfcwIxIKiyeuLlSQYKkmUaPQJ8ZE4yNKXDg==
+is-regex@^1.1.3:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.3.tgz#d029f9aff6448b93ebbe3f33dac71511fdcbef9f"
+  integrity sha512-qSVXFz28HM7y+IWX6vLCsexdlvzT1PJNFSBuaQLQ5o0IEw8UDYW6/2+eCMVyIsbM8CNLX2a/QWmSpyxYEHY7CQ==
   dependencies:
     call-bind "^1.0.2"
-    has-symbols "^1.0.1"
+    has-symbols "^1.0.2"
 
 is-relative@^1.0.0:
   version "1.0.0"
@@ -5500,33 +5194,28 @@ is-relative@^1.0.0:
     is-unc-path "^1.0.0"
 
 is-ssh@^1.3.0:
-  version "1.3.2"
-  resolved "https://registry.yarnpkg.com/is-ssh/-/is-ssh-1.3.2.tgz#a4b82ab63d73976fd8263cceee27f99a88bdae2b"
-  integrity sha512-elEw0/0c2UscLrNG+OAorbP539E3rhliKPg+hDMWN9VwrDXfYK+4PBEykDPfxlYYtQvl84TascnQyobfQLHEhQ==
+  version "1.3.3"
+  resolved "https://registry.yarnpkg.com/is-ssh/-/is-ssh-1.3.3.tgz#7f133285ccd7f2c2c7fc897b771b53d95a2b2c7e"
+  integrity sha512-NKzJmQzJfEEma3w5cJNcUMxoXfDjz0Zj0eyCalHn2E6VOwlzjZo0yuO2fcBSf8zhFuVCL/82/r5gRcoi6aEPVQ==
   dependencies:
     protocols "^1.1.0"
 
-is-stream@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44"
-  integrity sha1-EtSj3U5o4Lec6428hBc66A2RykQ=
-
 is-stream@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.0.tgz#bde9c32680d6fae04129d6ac9d921ce7815f78e3"
   integrity sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw==
 
-is-string@^1.0.5:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.5.tgz#40493ed198ef3ff477b8c7f92f644ec82a5cd3a6"
-  integrity sha512-buY6VNRjhQMiF1qWDouloZlQbRhDPCebwxSjxMjxgemYT46YMd2NR0/H+fBhEfWX4A/w9TBJ+ol+okqJKFE6vQ==
+is-string@^1.0.5, is-string@^1.0.6:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.6.tgz#3fe5d5992fb0d93404f32584d4b0179a71b54a5f"
+  integrity sha512-2gdzbKUuqtQ3lYNrUTQYoClPhm7oQu4UdpSZMp1/DGgkHBT8E2Z1l0yMdb6D4zNAxwDiMv8MdulKROJGNl0Q0w==
 
 is-symbol@^1.0.2, is-symbol@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.3.tgz#38e1014b9e6329be0de9d24a414fd7441ec61937"
-  integrity sha512-OwijhaRSgqvhm/0ZdAcXNZt9lYdKFpcRDT5ULUuYXPoT794UNOdU+gpT6Rzo7b4V2HUl/op6GqY894AZwv9faQ==
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c"
+  integrity sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==
   dependencies:
-    has-symbols "^1.0.1"
+    has-symbols "^1.0.2"
 
 is-text-path@^1.0.1:
   version "1.0.1"
@@ -5557,18 +5246,11 @@ is-valid-glob@^1.0.0:
   resolved "https://registry.yarnpkg.com/is-valid-glob/-/is-valid-glob-1.0.0.tgz#29bf3eff701be2d4d315dbacc39bc39fe8f601aa"
   integrity sha1-Kb8+/3Ab4tTTFdusw5vDn+j2Aao=
 
-is-windows@^1.0.0, is-windows@^1.0.1, is-windows@^1.0.2:
+is-windows@^1.0.1, is-windows@^1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d"
   integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==
 
-is-wsl@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271"
-  integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==
-  dependencies:
-    is-docker "^2.0.0"
-
 isarray@1.0.0, isarray@~1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
@@ -5645,59 +5327,85 @@ ix@2.5.3:
     "@types/node" "^11.11.6"
     tslib "^1.9.3"
 
-jest-changed-files@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-26.6.2.tgz#f6198479e1cc66f22f9ae1e22acaa0b429c042d0"
-  integrity sha512-fDS7szLcY9sCtIip8Fjry9oGf3I2ht/QT21bAHm5Dmf0mD4X3ReNUf17y+bO6fR8WgbIZTlbyG1ak/53cbRzKQ==
+jest-changed-files@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-27.0.1.tgz#b8356b3708cac9d05ebf6f9e0b32227b514945c8"
+  integrity sha512-Y/4AnqYNcUX/vVgfkmvSA3t7rcg+t8m3CsSGlU+ra8kjlVW5ZqXcBZY/NUew2Mo8M+dn0ApKl+FmGGT1JV5dVA==
   dependencies:
-    "@jest/types" "^26.6.2"
-    execa "^4.0.0"
-    throat "^5.0.0"
+    "@jest/types" "^27.0.1"
+    execa "^5.0.0"
+    throat "^6.0.1"
 
-jest-cli@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-26.6.3.tgz#43117cfef24bc4cd691a174a8796a532e135e92a"
-  integrity sha512-GF9noBSa9t08pSyl3CY4frMrqp+aQXFGFkf5hEPbh/pIUFYWMK6ZLTfbmadxJVcJrdRoChlWQsA2VkJcDFK8hg==
+jest-circus@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-27.0.1.tgz#3a7ec9e9fd60ef4c827197dffe2288aa19f86678"
+  integrity sha512-Tz3ytmrsgxWlTwSyPYb8StF9J2IMjLlbBMKAjhL2UU9/0ZpYb2JiEGjXaAhnGauQRbbpyFbSH3yj5HIbdurmwQ==
   dependencies:
-    "@jest/core" "^26.6.3"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/environment" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/types" "^27.0.1"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    co "^4.6.0"
+    dedent "^0.7.0"
+    expect "^27.0.1"
+    is-generator-fn "^2.0.0"
+    jest-each "^27.0.1"
+    jest-matcher-utils "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-runner "^27.0.1"
+    jest-runtime "^27.0.1"
+    jest-snapshot "^27.0.1"
+    jest-util "^27.0.1"
+    pretty-format "^27.0.1"
+    stack-utils "^2.0.3"
+    throat "^6.0.1"
+
+jest-cli@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-27.0.1.tgz#9accc8a505438571ee423438eac526a7ee4654b5"
+  integrity sha512-plDsQQwpkKK1SZ5L5xqMa7v/sTwB5LTIeSJqb+cV+4EMlThdUQfg8jwMfHX8jHuUc9TPGLcdoZeBuZcGGn3Rlg==
+  dependencies:
+    "@jest/core" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/types" "^27.0.1"
     chalk "^4.0.0"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
     import-local "^3.0.2"
-    is-ci "^2.0.0"
-    jest-config "^26.6.3"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
+    jest-config "^27.0.1"
+    jest-util "^27.0.1"
+    jest-validate "^27.0.1"
     prompts "^2.0.1"
-    yargs "^15.4.1"
+    yargs "^16.0.3"
 
-jest-config@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-26.6.3.tgz#64f41444eef9eb03dc51d5c53b75c8c71f645349"
-  integrity sha512-t5qdIj/bCj2j7NFVHb2nFB4aUdfucDn3JRKgrZnplb8nieAirAzRSHP8uDEd+qV6ygzg9Pz4YG7UTJf94LPSyg==
+jest-config@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-27.0.1.tgz#db4f202efcbb92011f62d8f25b52c3d1bd5672d4"
+  integrity sha512-V8O6+CZjGF0OMq4kxVR29ztV/LQqlAAcJLw7a94RndfRXkha4U84n50yZCXiPWtAHHTmb3g1y52US6rGPxA+3w==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/test-sequencer" "^26.6.3"
-    "@jest/types" "^26.6.2"
-    babel-jest "^26.6.3"
+    "@jest/test-sequencer" "^27.0.1"
+    "@jest/types" "^27.0.1"
+    babel-jest "^27.0.1"
     chalk "^4.0.0"
     deepmerge "^4.2.2"
     glob "^7.1.1"
     graceful-fs "^4.2.4"
-    jest-environment-jsdom "^26.6.2"
-    jest-environment-node "^26.6.2"
-    jest-get-type "^26.3.0"
-    jest-jasmine2 "^26.6.3"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
-    micromatch "^4.0.2"
-    pretty-format "^26.6.2"
-
-jest-diff@^26.0.0, jest-diff@^26.6.2:
+    is-ci "^3.0.0"
+    jest-circus "^27.0.1"
+    jest-environment-jsdom "^27.0.1"
+    jest-environment-node "^27.0.1"
+    jest-get-type "^27.0.1"
+    jest-jasmine2 "^27.0.1"
+    jest-regex-util "^27.0.1"
+    jest-resolve "^27.0.1"
+    jest-util "^27.0.1"
+    jest-validate "^27.0.1"
+    micromatch "^4.0.4"
+    pretty-format "^27.0.1"
+
+jest-diff@^26.0.0:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-26.6.2.tgz#1aa7468b52c3a68d7d5c5fdcdfcd5e49bd164394"
   integrity sha512-6m+9Z3Gv9wN0WFVasqjCL/06+EFCMTqDEUl/b87HYK2rAPTyfz4ZIuSlPhY51PIQRWx5TaxeF1qmXKe9gfN3sA==
@@ -5707,159 +5415,152 @@ jest-diff@^26.0.0, jest-diff@^26.6.2:
     jest-get-type "^26.3.0"
     pretty-format "^26.6.2"
 
-jest-docblock@^26.0.0:
-  version "26.0.0"
-  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-26.0.0.tgz#3e2fa20899fc928cb13bd0ff68bd3711a36889b5"
-  integrity sha512-RDZ4Iz3QbtRWycd8bUEPxQsTlYazfYn/h5R65Fc6gOfwozFhoImx+affzky/FFBuqISPTqjXomoIGJVKBWoo0w==
-  dependencies:
-    detect-newline "^3.0.0"
-
-jest-each@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-26.6.2.tgz#02526438a77a67401c8a6382dfe5999952c167cb"
-  integrity sha512-Mer/f0KaATbjl8MCJ+0GEpNdqmnVmDYqCTJYTvoo7rqmRiDllmp2AYN+06F93nXcY3ur9ShIjS+CO/uD+BbH4A==
-  dependencies:
-    "@jest/types" "^26.6.2"
-    chalk "^4.0.0"
-    jest-get-type "^26.3.0"
-    jest-util "^26.6.2"
-    pretty-format "^26.6.2"
-
-jest-environment-jsdom@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-26.6.2.tgz#78d09fe9cf019a357009b9b7e1f101d23bd1da3e"
-  integrity sha512-jgPqCruTlt3Kwqg5/WVFyHIOJHsiAvhcp2qiR2QQstuG9yWox5+iHpU3ZrcBxW14T4fe5Z68jAfLRh7joCSP2Q==
+jest-diff@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.0.1.tgz#021beb29fe9f07e83c809a4f7a1ce807b229c4ab"
+  integrity sha512-DQ3OgfJgoGWVTYo4qnYW/Jg5mpYFS2QW9BLxA8bs12ZRN1K8QPZtWeYvUPohQFs3CHX3JLTndGg3jyxdL5THFQ==
   dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/node" "*"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
-    jsdom "^16.4.0"
+    chalk "^4.0.0"
+    diff-sequences "^27.0.1"
+    jest-get-type "^27.0.1"
+    pretty-format "^27.0.1"
 
-jest-environment-node@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-26.6.2.tgz#824e4c7fb4944646356f11ac75b229b0035f2b0c"
-  integrity sha512-zhtMio3Exty18dy8ee8eJ9kjnRyZC1N4C1Nt/VShN1apyXc8rWGtJ9lI7vqiWcyyXS4BVSEn9lxAM2D+07/Tag==
+jest-docblock@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-27.0.1.tgz#bd9752819b49fa4fab1a50b73eb58c653b962e8b"
+  integrity sha512-TA4+21s3oebURc7VgFV4r7ltdIJ5rtBH1E3Tbovcg7AV+oLfD5DcJ2V2vJ5zFA9sL5CFd/d2D6IpsAeSheEdrA==
   dependencies:
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    detect-newline "^3.0.0"
+
+jest-each@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-27.0.1.tgz#37fa20b7d809b29d4349d8eb7d01f17c2feeab10"
+  integrity sha512-uJTK/aZ05HsdKkfXucAT5+/1DIURnTRv34OSxn1HWHrD+xu9eDX5Xgds09QSvg/mU01VS5upuHTDKG3W+r0rQA==
+  dependencies:
+    "@jest/types" "^27.0.1"
+    chalk "^4.0.0"
+    jest-get-type "^27.0.1"
+    jest-util "^27.0.1"
+    pretty-format "^27.0.1"
+
+jest-environment-jsdom@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-27.0.1.tgz#12b0ed587fb53e0a581a5101bb209aef09da2310"
+  integrity sha512-lesU8T9zkjgLaLpUFmFDgchu6/2OCoXm52nN6UumR063Hb+1TJdI7ihgM86+G01Ay86Lyr+K/FAR6yIIOviH3Q==
+  dependencies:
+    "@jest/environment" "^27.0.1"
+    "@jest/fake-timers" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
-    jest-mock "^26.6.2"
-    jest-util "^26.6.2"
+    jest-mock "^27.0.1"
+    jest-util "^27.0.1"
+    jsdom "^16.6.0"
+
+jest-environment-node@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-27.0.1.tgz#7d7df7ae191477a823ffb4fcc0772b4c23ec5c87"
+  integrity sha512-/p94lo0hx+hbKUw1opnRFUPPsjncRBEUU+2Dh7BuxX8Nr4rRiTivLYgXzo79FhaeMYV0uiV5WAbHBq6xC11JJg==
+  dependencies:
+    "@jest/environment" "^27.0.1"
+    "@jest/fake-timers" "^27.0.1"
+    "@jest/types" "^27.0.1"
+    "@types/node" "*"
+    jest-mock "^27.0.1"
+    jest-util "^27.0.1"
 
 jest-get-type@^26.3.0:
   version "26.3.0"
   resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.3.0.tgz#e97dc3c3f53c2b406ca7afaed4493b1d099199e0"
   integrity sha512-TpfaviN1R2pQWkIihlfEanwOXK0zcxrKEE4MlU6Tn7keoXdN6/3gK/xl0yEh8DOunn5pOVGKf8hB4R9gVh04ig==
 
-jest-haste-map@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-26.6.2.tgz#dd7e60fe7dc0e9f911a23d79c5ff7fb5c2cafeaa"
-  integrity sha512-easWIJXIw71B2RdR8kgqpjQrbMRWQBgiBwXYEhtGUTaX+doCjBheluShdDMeR8IMfJiTqH4+zfhtg29apJf/8w==
+jest-get-type@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.0.1.tgz#34951e2b08c8801eb28559d7eb732b04bbcf7815"
+  integrity sha512-9Tggo9zZbu0sHKebiAijyt1NM77Z0uO4tuWOxUCujAiSeXv30Vb5D4xVF4UR4YWNapcftj+PbByU54lKD7/xMg==
+
+jest-haste-map@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.0.1.tgz#653c4ba59309a86499ad7bf663176e7f97478191"
+  integrity sha512-ioCuobr4z90H1Pz8+apz2vfz63387apzAoawm/9IIOndarDfRkjLURdLOe//AI5jUQmjVRg+WiL92339kqlCmA==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.1"
     "@types/graceful-fs" "^4.1.2"
     "@types/node" "*"
     anymatch "^3.0.3"
     fb-watchman "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-regex-util "^26.0.0"
-    jest-serializer "^26.6.2"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
-    micromatch "^4.0.2"
-    sane "^4.0.3"
+    jest-regex-util "^27.0.1"
+    jest-serializer "^27.0.1"
+    jest-util "^27.0.1"
+    jest-worker "^27.0.1"
+    micromatch "^4.0.4"
     walker "^1.0.7"
   optionalDependencies:
-    fsevents "^2.1.2"
+    fsevents "^2.3.2"
 
-jest-jasmine2@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-26.6.3.tgz#adc3cf915deacb5212c93b9f3547cd12958f2edd"
-  integrity sha512-kPKUrQtc8aYwBV7CqBg5pu+tmYXlvFlSFYn18ev4gPFtrRzB15N2gW/Roew3187q2w2eHuu0MU9TJz6w0/nPEg==
+jest-jasmine2@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-27.0.1.tgz#d975bfe072f3ac3596c0be5fc0a1215fd2e91e77"
+  integrity sha512-o8Ist0o970QDDm/R2o9UDbvNxq8A0++FTFQ0z9OnieJwS1nDH6H7WBDYAGPTdmnla7kbW41oLFPvhmjJE4mekg==
   dependencies:
     "@babel/traverse" "^7.1.0"
-    "@jest/environment" "^26.6.2"
-    "@jest/source-map" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/environment" "^27.0.1"
+    "@jest/source-map" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
     chalk "^4.0.0"
     co "^4.6.0"
-    expect "^26.6.2"
+    expect "^27.0.1"
     is-generator-fn "^2.0.0"
-    jest-each "^26.6.2"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-runtime "^26.6.3"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    pretty-format "^26.6.2"
-    throat "^5.0.0"
-
-jest-leak-detector@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-26.6.2.tgz#7717cf118b92238f2eba65054c8a0c9c653a91af"
-  integrity sha512-i4xlXpsVSMeKvg2cEKdfhh0H39qlJlP5Ex1yQxwF9ubahboQYMgTtz5oML35AVA3B4Eu+YsmwaiKVev9KCvLxg==
-  dependencies:
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-matcher-utils@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-26.6.2.tgz#8e6fd6e863c8b2d31ac6472eeb237bc595e53e7a"
-  integrity sha512-llnc8vQgYcNqDrqRDXWwMr9i7rS5XFiCwvh6DTP7Jqa2mqpcCBBlpCbn+trkG0KNhPu/h8rzyBkriOtBstvWhw==
+    jest-each "^27.0.1"
+    jest-matcher-utils "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-runtime "^27.0.1"
+    jest-snapshot "^27.0.1"
+    jest-util "^27.0.1"
+    pretty-format "^27.0.1"
+    throat "^6.0.1"
+
+jest-leak-detector@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-27.0.1.tgz#eedeaee7c0ab553db4d8908f74967329624342b9"
+  integrity sha512-SQ/lRhfmnV3UuiaKIjwNXCaW2yh1rTMAL4n4Cl4I4gU0X2LoIc6Ogxe4UKM/J6Ld2uzc4gDGVYc5lSdpf6WjYw==
+  dependencies:
+    jest-get-type "^27.0.1"
+    pretty-format "^27.0.1"
+
+jest-matcher-utils@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.0.1.tgz#7a01330786e370f152b0b0159f827293b6322909"
+  integrity sha512-NauNU+olKhPzLlsRnTOYFGk/MK5QFYl9ZzkrtfsY4eCq4SB3Bcl03UL44VdnlN5S/uFn4H2jwvRY1y6nSDTX3g==
   dependencies:
     chalk "^4.0.0"
-    jest-diff "^26.6.2"
-    jest-get-type "^26.3.0"
-    pretty-format "^26.6.2"
-
-jest-message-util@^24.9.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-24.9.0.tgz#527f54a1e380f5e202a8d1149b0ec872f43119e3"
-  integrity sha512-oCj8FiZ3U0hTP4aSui87P4L4jC37BtQwUMqk+zk/b11FR19BJDeZsZAvIHutWnmtw7r85UmR3CEWZ0HWU2mAlw==
-  dependencies:
-    "@babel/code-frame" "^7.0.0"
-    "@jest/test-result" "^24.9.0"
-    "@jest/types" "^24.9.0"
-    "@types/stack-utils" "^1.0.1"
-    chalk "^2.0.1"
-    micromatch "^3.1.10"
-    slash "^2.0.0"
-    stack-utils "^1.0.1"
+    jest-diff "^27.0.1"
+    jest-get-type "^27.0.1"
+    pretty-format "^27.0.1"
 
-jest-message-util@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-26.6.2.tgz#58173744ad6fc0506b5d21150b9be56ef001ca07"
-  integrity sha512-rGiLePzQ3AzwUshu2+Rn+UMFk0pHN58sOG+IaJbk5Jxuqo3NYO1U2/MIR4S1sKgsoYSXSzdtSa0TgrmtUwEbmA==
+jest-message-util@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-27.0.1.tgz#382b7c55d8e0b1aba9eeb41d3cfdd34e451210ed"
+  integrity sha512-w8BfON2GwWORkos8BsxcwwQrLkV2s1ENxSRXK43+6yuquDE2hVxES/jrFqOArpP1ETVqqMmktU6iGkG8ncVzeA==
   dependencies:
-    "@babel/code-frame" "^7.0.0"
-    "@jest/types" "^26.6.2"
+    "@babel/code-frame" "^7.12.13"
+    "@jest/types" "^27.0.1"
     "@types/stack-utils" "^2.0.0"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
-    micromatch "^4.0.2"
-    pretty-format "^26.6.2"
+    micromatch "^4.0.4"
+    pretty-format "^27.0.1"
     slash "^3.0.0"
-    stack-utils "^2.0.2"
-
-jest-mock@^24.9.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-24.9.0.tgz#c22835541ee379b908673ad51087a2185c13f1c6"
-  integrity sha512-3BEYN5WbSq9wd+SyLDES7AHnjH9A/ROBwmz7l2y+ol+NtSFO8DYiEBzoO1CeFc9a8DYy10EO4dDFVv/wN3zl1w==
-  dependencies:
-    "@jest/types" "^24.9.0"
+    stack-utils "^2.0.3"
 
-jest-mock@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-26.6.2.tgz#d6cb712b041ed47fe0d9b6fc3474bc6543feb302"
-  integrity sha512-YyFjePHHp1LzpzYcmgqkJ0nm0gg/lJx2aZFzFy1S6eUqNjXsOqTK10zNRff2dNfssgokjkG65OlWNcIlgd3zew==
+jest-mock@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-27.0.1.tgz#8394e297bc3dfed980961622cb51fd042b4acf5a"
+  integrity sha512-fXCSZQDT5hUcAUy8OBnB018x7JFOMQnz4XfpSKEbfpWzL6o5qaLRhgf2Qg2NPuVKmC/fgOf33Edj8wjF4I24CQ==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
 
 jest-pnp-resolver@^1.2.2:
@@ -5867,150 +5568,140 @@ jest-pnp-resolver@^1.2.2:
   resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c"
   integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w==
 
-jest-regex-util@^26.0.0:
-  version "26.0.0"
-  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-26.0.0.tgz#d25e7184b36e39fd466c3bc41be0971e821fee28"
-  integrity sha512-Gv3ZIs/nA48/Zvjrl34bf+oD76JHiGDUxNOVgUjh3j890sblXryjY4rss71fPtD/njchl6PSE2hIhvyWa1eT0A==
+jest-regex-util@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.0.1.tgz#69d4b1bf5b690faa3490113c47486ed85dd45b68"
+  integrity sha512-6nY6QVcpTgEKQy1L41P4pr3aOddneK17kn3HJw6SdwGiKfgCGTvH02hVXL0GU8GEKtPH83eD2DIDgxHXOxVohQ==
 
-jest-resolve-dependencies@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-26.6.3.tgz#6680859ee5d22ee5dcd961fe4871f59f4c784fb6"
-  integrity sha512-pVwUjJkxbhe4RY8QEWzN3vns2kqyuldKpxlxJlzEYfKSvY6/bMvxoFrYYzUO1Gx28yKWN37qyV7rIoIp2h8fTg==
+jest-resolve-dependencies@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-27.0.1.tgz#3dcaeb277e0253747706467e8f05e1e78a1d534d"
+  integrity sha512-ly1x5mEf21f3IVWbUNwIz/ePLtv4QdhYuQIVSVDqxx7yzAwhhdu0DJo7UNiEYKQY7Im48wfbNdOUpo7euFUXBQ==
   dependencies:
-    "@jest/types" "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-snapshot "^26.6.2"
+    "@jest/types" "^27.0.1"
+    jest-regex-util "^27.0.1"
+    jest-snapshot "^27.0.1"
 
-jest-resolve@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-26.6.2.tgz#a3ab1517217f469b504f1b56603c5bb541fbb507"
-  integrity sha512-sOxsZOq25mT1wRsfHcbtkInS+Ek7Q8jCHUB0ZUTP0tc/c41QHriU/NunqMfCUWsL4H3MHpvQD4QR9kSYhS7UvQ==
+jest-resolve@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-27.0.1.tgz#4e1b76f61c7e2213d2fbd37342800864309de538"
+  integrity sha512-Q7QQ0OZ7z6D5Dul0MrsexlKalU8ZwexBfHLSu1qYPgphvUm6WO1b/xUnipU3e+uW1riDzMcJeJVYbdQ37hBHeg==
   dependencies:
-    "@jest/types" "^26.6.2"
+    "@jest/types" "^27.0.1"
     chalk "^4.0.0"
+    escalade "^3.1.1"
     graceful-fs "^4.2.4"
     jest-pnp-resolver "^1.2.2"
-    jest-util "^26.6.2"
-    read-pkg-up "^7.0.1"
-    resolve "^1.18.1"
+    jest-util "^27.0.1"
+    resolve "^1.20.0"
     slash "^3.0.0"
 
-jest-runner@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-26.6.3.tgz#2d1fed3d46e10f233fd1dbd3bfaa3fe8924be159"
-  integrity sha512-atgKpRHnaA2OvByG/HpGA4g6CSPS/1LK0jK3gATJAoptC1ojltpmVlYC3TYgdmGp+GLuhzpH30Gvs36szSL2JQ==
+jest-runner@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-27.0.1.tgz#52137173fbf318b7b1f034b81200c2846758f681"
+  integrity sha512-DUNizlD2D7J80G3VOrwfbtb7KYxiftMng82HNcKwTW0W3AwwNuBeq+1exoCnLO7Mxh7NP+k/1XQBlzLpjr/CnA==
   dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/environment" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/console" "^27.0.1"
+    "@jest/environment" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/transform" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
     chalk "^4.0.0"
-    emittery "^0.7.1"
+    emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-config "^26.6.3"
-    jest-docblock "^26.0.0"
-    jest-haste-map "^26.6.2"
-    jest-leak-detector "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-resolve "^26.6.2"
-    jest-runtime "^26.6.3"
-    jest-util "^26.6.2"
-    jest-worker "^26.6.2"
+    jest-config "^27.0.1"
+    jest-docblock "^27.0.1"
+    jest-haste-map "^27.0.1"
+    jest-leak-detector "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-resolve "^27.0.1"
+    jest-runtime "^27.0.1"
+    jest-util "^27.0.1"
+    jest-worker "^27.0.1"
     source-map-support "^0.5.6"
-    throat "^5.0.0"
-
-jest-runtime@^26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-26.6.3.tgz#4f64efbcfac398331b74b4b3c82d27d401b8fa2b"
-  integrity sha512-lrzyR3N8sacTAMeonbqpnSka1dHNux2uk0qqDXVkMv2c/A3wYnvQ4EXuI013Y6+gSKSCxdaczvf4HF0mVXHRdw==
-  dependencies:
-    "@jest/console" "^26.6.2"
-    "@jest/environment" "^26.6.2"
-    "@jest/fake-timers" "^26.6.2"
-    "@jest/globals" "^26.6.2"
-    "@jest/source-map" "^26.6.2"
-    "@jest/test-result" "^26.6.2"
-    "@jest/transform" "^26.6.2"
-    "@jest/types" "^26.6.2"
-    "@types/yargs" "^15.0.0"
+    throat "^6.0.1"
+
+jest-runtime@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-27.0.1.tgz#b71bb8ea189c50525aebb4aba6c524633ca27659"
+  integrity sha512-ImcrbQtpCUp8X9Rm4ky3j1GG9cqIKZJvXGZyB5cHEapGPTmg7wvvNooLmKragEe61/p/bhw1qO68Y0/9BSsBBg==
+  dependencies:
+    "@jest/console" "^27.0.1"
+    "@jest/environment" "^27.0.1"
+    "@jest/fake-timers" "^27.0.1"
+    "@jest/globals" "^27.0.1"
+    "@jest/source-map" "^27.0.1"
+    "@jest/test-result" "^27.0.1"
+    "@jest/transform" "^27.0.1"
+    "@jest/types" "^27.0.1"
+    "@types/yargs" "^16.0.0"
     chalk "^4.0.0"
-    cjs-module-lexer "^0.6.0"
+    cjs-module-lexer "^1.0.0"
     collect-v8-coverage "^1.0.0"
     exit "^0.1.2"
     glob "^7.1.3"
     graceful-fs "^4.2.4"
-    jest-config "^26.6.3"
-    jest-haste-map "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-mock "^26.6.2"
-    jest-regex-util "^26.0.0"
-    jest-resolve "^26.6.2"
-    jest-snapshot "^26.6.2"
-    jest-util "^26.6.2"
-    jest-validate "^26.6.2"
+    jest-haste-map "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-mock "^27.0.1"
+    jest-regex-util "^27.0.1"
+    jest-resolve "^27.0.1"
+    jest-snapshot "^27.0.1"
+    jest-util "^27.0.1"
+    jest-validate "^27.0.1"
     slash "^3.0.0"
     strip-bom "^4.0.0"
-    yargs "^15.4.1"
+    yargs "^16.0.3"
 
-jest-serializer@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-26.6.2.tgz#d139aafd46957d3a448f3a6cdabe2919ba0742d1"
-  integrity sha512-S5wqyz0DXnNJPd/xfIzZ5Xnp1HrJWBczg8mMfMpN78OJ5eDxXyf+Ygld9wX1DnUWbIbhM1YDY95NjR4CBXkb2g==
+jest-serializer@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-27.0.1.tgz#2464d04dcc33fb71dc80b7c82e3c5e8a08cb1020"
+  integrity sha512-svy//5IH6bfQvAbkAEg1s7xhhgHTtXu0li0I2fdKHDsLP2P2MOiscPQIENQep8oU2g2B3jqLyxKKzotZOz4CwQ==
   dependencies:
     "@types/node" "*"
     graceful-fs "^4.2.4"
 
-jest-silent-reporter@0.1.2:
-  version "0.1.2"
-  resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.1.2.tgz#9d797c0b509e1def16647a07daf25f014c50b333"
-  integrity sha512-w/qc9NvWqdX0vZv6TUG4EE15d72+JxQJYh+3hqq8cTi3BnfBOtwNtL3T6TwkZSy/sfc3REW5niz0eSBPTIvWnA==
-  dependencies:
-    chalk "^2.3.1"
-    jest-util "^24.0.0"
-
-jest-snapshot@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-26.6.2.tgz#f3b0af1acb223316850bd14e1beea9837fb39c84"
-  integrity sha512-OLhxz05EzUtsAmOMzuupt1lHYXCNib0ECyuZ/PZOx9TrZcC8vL0x+DUG3TL+GLX3yHG45e6YGjIm0XwDc3q3og==
+jest-silent-reporter@0.5.0:
+  version "0.5.0"
+  resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.5.0.tgz#5fd8ccd61665227e3bf19d908b7350719d06ff38"
+  integrity sha512-epdLt8Oj0a1AyRiR6F8zx/1SVT1Mi7VU3y4wB2uOBHs/ohIquC7v2eeja7UN54uRPyHInIKWdL+RdG228n5pJQ==
   dependencies:
+    chalk "^4.0.0"
+    jest-util "^26.0.0"
+
+jest-snapshot@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-27.0.1.tgz#01a82d901f260604908373795c9255b032d2a07a"
+  integrity sha512-HgKmSebDB3rswugREeh+nKrxJEVZE12K7lZ2MuwfFZT6YmiH0TlofsL2YmiLsCsG5KH5ZcLYYpF5bDrvtVx/Xg==
+  dependencies:
+    "@babel/core" "^7.7.2"
+    "@babel/generator" "^7.7.2"
+    "@babel/parser" "^7.7.2"
+    "@babel/plugin-syntax-typescript" "^7.7.2"
+    "@babel/traverse" "^7.7.2"
     "@babel/types" "^7.0.0"
-    "@jest/types" "^26.6.2"
+    "@jest/transform" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/babel__traverse" "^7.0.4"
-    "@types/prettier" "^2.0.0"
+    "@types/prettier" "^2.1.5"
+    babel-preset-current-node-syntax "^1.0.0"
     chalk "^4.0.0"
-    expect "^26.6.2"
+    expect "^27.0.1"
     graceful-fs "^4.2.4"
-    jest-diff "^26.6.2"
-    jest-get-type "^26.3.0"
-    jest-haste-map "^26.6.2"
-    jest-matcher-utils "^26.6.2"
-    jest-message-util "^26.6.2"
-    jest-resolve "^26.6.2"
+    jest-diff "^27.0.1"
+    jest-get-type "^27.0.1"
+    jest-haste-map "^27.0.1"
+    jest-matcher-utils "^27.0.1"
+    jest-message-util "^27.0.1"
+    jest-resolve "^27.0.1"
+    jest-util "^27.0.1"
     natural-compare "^1.4.0"
-    pretty-format "^26.6.2"
+    pretty-format "^27.0.1"
     semver "^7.3.2"
 
-jest-util@^24.0.0:
-  version "24.9.0"
-  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-24.9.0.tgz#7396814e48536d2e85a37de3e4c431d7cb140162"
-  integrity sha512-x+cZU8VRmOJxbA1K5oDBdxQmdq0OIdADarLxk0Mq+3XS4jgvhG/oKGWcIDCtPG0HgjxOYvF+ilPJQsAyXfbNOg==
-  dependencies:
-    "@jest/console" "^24.9.0"
-    "@jest/fake-timers" "^24.9.0"
-    "@jest/source-map" "^24.9.0"
-    "@jest/test-result" "^24.9.0"
-    "@jest/types" "^24.9.0"
-    callsites "^3.0.0"
-    chalk "^2.0.1"
-    graceful-fs "^4.1.15"
-    is-ci "^2.0.0"
-    mkdirp "^0.5.1"
-    slash "^2.0.0"
-    source-map "^0.6.0"
-
-jest-util@^26.1.0, jest-util@^26.6.2:
+jest-util@^26.0.0:
   version "26.6.2"
   resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-26.6.2.tgz#907535dbe4d5a6cb4c47ac9b926f6af29576cbc1"
   integrity sha512-MDW0fKfsn0OI7MS7Euz6h8HNDXVQ0gaM9uW6RjfDmd1DAFcaxX9OqIakHIqhbnmF08Cf2DLDG+ulq8YQQ0Lp0Q==
@@ -6022,29 +5713,41 @@ jest-util@^26.1.0, jest-util@^26.6.2:
     is-ci "^2.0.0"
     micromatch "^4.0.2"
 
-jest-validate@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-26.6.2.tgz#23d380971587150467342911c3d7b4ac57ab20ec"
-  integrity sha512-NEYZ9Aeyj0i5rQqbq+tpIOom0YS1u2MVu6+euBsvpgIme+FOfRmoC4R5p0JiAUpaFvFy24xgrpMknarR/93XjQ==
+jest-util@^27.0.0, jest-util@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.1.tgz#324ed9879d129c1e64f9169a739d6d50d7928769"
+  integrity sha512-lEw3waSmEOO4ZkwkUlFSvg4es1+8+LIkSGxp/kF60K0+vMR3Dv3O2HMZhcln9NHqSQzpVbsDT6OeMzUPW7DfRg==
   dependencies:
-    "@jest/types" "^26.6.2"
-    camelcase "^6.0.0"
+    "@jest/types" "^27.0.1"
+    "@types/node" "*"
     chalk "^4.0.0"
-    jest-get-type "^26.3.0"
+    graceful-fs "^4.2.4"
+    is-ci "^3.0.0"
+    picomatch "^2.2.3"
+
+jest-validate@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-27.0.1.tgz#8e43428674b6097f8ee3abe42c4248a4826cd008"
+  integrity sha512-zvmPRcfTkqTZuHveIKAI2nbkUc3SDXjWVJULknPLGF5bdxOGSeGZg7f/Uw0MUVOkCOaspcHnsPCgZG0pqmg71g==
+  dependencies:
+    "@jest/types" "^27.0.1"
+    camelcase "^6.2.0"
+    chalk "^4.0.0"
+    jest-get-type "^27.0.1"
     leven "^3.1.0"
-    pretty-format "^26.6.2"
+    pretty-format "^27.0.1"
 
-jest-watcher@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-26.6.2.tgz#a5b683b8f9d68dbcb1d7dae32172d2cca0592975"
-  integrity sha512-WKJob0P/Em2csiVthsI68p6aGKTIcsfjH9Gsx1f0A3Italz43e3ho0geSAVsmj09RWOELP1AZ/DXyJgOgDKxXQ==
+jest-watcher@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-27.0.1.tgz#61b9403d7b498161f6aa6124602363525ac3efc2"
+  integrity sha512-Chp9c02BN0IgEbtGreyAhGqIsOrn9a0XnzbuXOxdW1+cW0Tjh12hMzHDIdLFHpYP/TqaMTmPHaJ5KWvpCCrNFw==
   dependencies:
-    "@jest/test-result" "^26.6.2"
-    "@jest/types" "^26.6.2"
+    "@jest/test-result" "^27.0.1"
+    "@jest/types" "^27.0.1"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
-    jest-util "^26.6.2"
+    jest-util "^27.0.1"
     string-length "^4.0.1"
 
 jest-worker@^26.6.2:
@@ -6056,14 +5759,23 @@ jest-worker@^26.6.2:
     merge-stream "^2.0.0"
     supports-color "^7.0.0"
 
-jest@26.6.3:
-  version "26.6.3"
-  resolved "https://registry.yarnpkg.com/jest/-/jest-26.6.3.tgz#40e8fdbe48f00dfa1f0ce8121ca74b88ac9148ef"
-  integrity sha512-lGS5PXGAzR4RF7V5+XObhqz2KZIDUA1yD0DG6pBVmy10eh0ZIXQImRuzocsI/N2XZ1GrLFwTS27In2i2jlpq1Q==
+jest-worker@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.0.1.tgz#b255fcbb40fb467295010c628474b1185cab4f9e"
+  integrity sha512-NhHqClI3owOjmS8dBhQMKHZ2rrT0sBTpqGitp9nMX5AAjVXd+15o4v96uBEMhoywaLKN+5opcKBlXwAoADZolA==
+  dependencies:
+    "@types/node" "*"
+    merge-stream "^2.0.0"
+    supports-color "^8.0.0"
+
+jest@27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/jest/-/jest-27.0.1.tgz#d3822f0904f3bbe884bea393cede2be2aa290d0e"
+  integrity sha512-lFEoUdXjbGAIxk/gZhcv98xOaH1hjqG5R/PQHs5GBfIK5iL3tnXCjHQf4HQLVZZ2rcXML3oeVg9+XrRZbooBdQ==
   dependencies:
-    "@jest/core" "^26.6.3"
+    "@jest/core" "^27.0.1"
     import-local "^3.0.2"
-    jest-cli "^26.6.3"
+    jest-cli "^27.0.1"
 
 js-tokens@^4.0.0:
   version "4.0.0"
@@ -6083,13 +5795,13 @@ jsbn@~0.1.0:
   resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
   integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM=
 
-jsdom@^16.4.0:
-  version "16.5.2"
-  resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.5.2.tgz#583fac89a0aea31dbf6237e7e4bedccd9beab472"
-  integrity sha512-JxNtPt9C1ut85boCbJmffaQ06NBnzkQY/MWO3YxPW8IWS38A26z+B1oBvA9LwKrytewdfymnhi4UNH3/RAgZrg==
+jsdom@^16.6.0:
+  version "16.6.0"
+  resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-16.6.0.tgz#f79b3786682065492a3da6a60a4695da983805ac"
+  integrity sha512-Ty1vmF4NHJkolaEmdjtxTfSfkdb8Ywarwf63f+F8/mDD1uLSSWDxDuMiZxiPhwunLrn9LOSVItWj4bLYsLN3Dg==
   dependencies:
     abab "^2.0.5"
-    acorn "^8.1.0"
+    acorn "^8.2.4"
     acorn-globals "^6.0.0"
     cssom "^0.4.4"
     cssstyle "^2.3.0"
@@ -6097,12 +5809,13 @@ jsdom@^16.4.0:
     decimal.js "^10.2.1"
     domexception "^2.0.1"
     escodegen "^2.0.0"
+    form-data "^3.0.0"
     html-encoding-sniffer "^2.0.1"
-    is-potential-custom-element-name "^1.0.0"
+    http-proxy-agent "^4.0.1"
+    https-proxy-agent "^5.0.0"
+    is-potential-custom-element-name "^1.0.1"
     nwsapi "^2.2.0"
     parse5 "6.0.1"
-    request "^2.88.2"
-    request-promise-native "^1.0.9"
     saxes "^5.0.1"
     symbol-tree "^3.2.4"
     tough-cookie "^4.0.0"
@@ -6112,7 +5825,7 @@ jsdom@^16.4.0:
     whatwg-encoding "^1.0.5"
     whatwg-mimetype "^2.3.0"
     whatwg-url "^8.5.0"
-    ws "^7.4.4"
+    ws "^7.4.5"
     xml-name-validator "^3.0.0"
 
 jsesc@^2.5.1:
@@ -6125,7 +5838,7 @@ json-bignum@^0.0.3:
   resolved "https://registry.yarnpkg.com/json-bignum/-/json-bignum-0.0.3.tgz#41163b50436c773d82424dbc20ed70db7604b8d7"
   integrity sha1-QRY7UENsdz2CQk28IO1w23YEuNc=
 
-json-parse-better-errors@^1.0.0, json-parse-better-errors@^1.0.1, json-parse-better-errors@^1.0.2:
+json-parse-better-errors@^1.0.1, json-parse-better-errors@^1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9"
   integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==
@@ -6176,13 +5889,6 @@ json5@2.x, json5@^2.1.2:
   dependencies:
     minimist "^1.2.5"
 
-jsonfile@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-4.0.0.tgz#8771aae0799b64076b76640fca058f9c10e33ecb"
-  integrity sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=
-  optionalDependencies:
-    graceful-fs "^4.1.6"
-
 jsonfile@^6.0.1:
   version "6.1.0"
   resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae"
@@ -6280,28 +5986,28 @@ lead@^1.0.0:
   dependencies:
     flush-write-stream "^1.0.2"
 
-lerna@3.22.1:
-  version "3.22.1"
-  resolved "https://registry.yarnpkg.com/lerna/-/lerna-3.22.1.tgz#82027ac3da9c627fd8bf02ccfeff806a98e65b62"
-  integrity sha512-vk1lfVRFm+UuEFA7wkLKeSF7Iz13W+N/vFd48aW2yuS7Kv0RbNm2/qcDPV863056LMfkRlsEe+QYOw3palj5Lg==
-  dependencies:
-    "@lerna/add" "3.21.0"
-    "@lerna/bootstrap" "3.21.0"
-    "@lerna/changed" "3.21.0"
-    "@lerna/clean" "3.21.0"
-    "@lerna/cli" "3.18.5"
-    "@lerna/create" "3.22.0"
-    "@lerna/diff" "3.21.0"
-    "@lerna/exec" "3.21.0"
-    "@lerna/import" "3.22.0"
-    "@lerna/info" "3.21.0"
-    "@lerna/init" "3.21.0"
-    "@lerna/link" "3.21.0"
-    "@lerna/list" "3.21.0"
-    "@lerna/publish" "3.22.1"
-    "@lerna/run" "3.21.0"
-    "@lerna/version" "3.22.1"
-    import-local "^2.0.0"
+lerna@4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/lerna/-/lerna-4.0.0.tgz#b139d685d50ea0ca1be87713a7c2f44a5b678e9e"
+  integrity sha512-DD/i1znurfOmNJb0OBw66NmNqiM8kF6uIrzrJ0wGE3VNdzeOhz9ziWLYiRaZDGGwgbcjOo6eIfcx9O5Qynz+kg==
+  dependencies:
+    "@lerna/add" "4.0.0"
+    "@lerna/bootstrap" "4.0.0"
+    "@lerna/changed" "4.0.0"
+    "@lerna/clean" "4.0.0"
+    "@lerna/cli" "4.0.0"
+    "@lerna/create" "4.0.0"
+    "@lerna/diff" "4.0.0"
+    "@lerna/exec" "4.0.0"
+    "@lerna/import" "4.0.0"
+    "@lerna/info" "4.0.0"
+    "@lerna/init" "4.0.0"
+    "@lerna/link" "4.0.0"
+    "@lerna/list" "4.0.0"
+    "@lerna/publish" "4.0.0"
+    "@lerna/run" "4.0.0"
+    "@lerna/version" "4.0.0"
+    import-local "^3.0.2"
     npmlog "^4.1.2"
 
 leven@^3.1.0:
@@ -6325,6 +6031,27 @@ levn@~0.3.0:
     prelude-ls "~1.1.2"
     type-check "~0.3.2"
 
+libnpmaccess@^4.0.1:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/libnpmaccess/-/libnpmaccess-4.0.2.tgz#781832fb7ccb867b26343a75a85ad9c43e50406e"
+  integrity sha512-avXtJibZuGap0/qADDYqb9zdpgzVu/yG5+tl2sTRa7MCkDNv2ZlGwCYI0r6/+tmqXPj0iB9fKexHz426vB326w==
+  dependencies:
+    aproba "^2.0.0"
+    minipass "^3.1.1"
+    npm-package-arg "^8.1.2"
+    npm-registry-fetch "^10.0.0"
+
+libnpmpublish@^4.0.0:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/libnpmpublish/-/libnpmpublish-4.0.1.tgz#08ca2cbb5d7f6be1ce4f3f9c49b3822682bcf166"
+  integrity sha512-hZCrZ8v4G9YH3DxpIyBdob25ijD5v5LNzRbwsej4pPDopjdcLLj1Widl+BUeFa7D0ble1JYL4F3owjLJqiA8yA==
+  dependencies:
+    normalize-package-data "^3.0.2"
+    npm-package-arg "^8.1.2"
+    npm-registry-fetch "^10.0.0"
+    semver "^7.1.3"
+    ssri "^8.0.1"
+
 liftoff@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/liftoff/-/liftoff-3.1.0.tgz#c9ba6081f908670607ee79062d700df062c52ed3"
@@ -6365,16 +6092,15 @@ load-json-file@^4.0.0:
     pify "^3.0.0"
     strip-bom "^3.0.0"
 
-load-json-file@^5.3.0:
-  version "5.3.0"
-  resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-5.3.0.tgz#4d3c1e01fa1c03ea78a60ac7af932c9ce53403f3"
-  integrity sha512-cJGP40Jc/VXUsp8/OrnyKyTZ1y6v/dphm3bioS+RrKXjK2BB6wHUd6JptZEFDGgGahMT+InnZO5i1Ei9mpC8Bw==
+load-json-file@^6.2.0:
+  version "6.2.0"
+  resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-6.2.0.tgz#5c7770b42cafa97074ca2848707c61662f4251a1"
+  integrity sha512-gUD/epcRms75Cw8RT1pUdHugZYM5ce64ucs2GEISABwkRsOQr0q2wm/MV2TKThycIe5e0ytRweW2RZxclogCdQ==
   dependencies:
     graceful-fs "^4.1.15"
-    parse-json "^4.0.0"
-    pify "^4.0.1"
-    strip-bom "^3.0.0"
-    type-fest "^0.3.0"
+    parse-json "^5.0.0"
+    strip-bom "^4.0.0"
+    type-fest "^0.6.0"
 
 loader-runner@^4.2.0:
   version "4.2.0"
@@ -6389,14 +6115,6 @@ locate-path@^2.0.0:
     p-locate "^2.0.0"
     path-exists "^3.0.0"
 
-locate-path@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-3.0.0.tgz#dbec3b3ab759758071b58fe59fc41871af21400e"
-  integrity sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==
-  dependencies:
-    p-locate "^3.0.0"
-    path-exists "^3.0.0"
-
 locate-path@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-5.0.0.tgz#1afba396afd676a6d42504d0a67a3a7eb9f62aa0"
@@ -6419,11 +6137,6 @@ lodash.clonedeep@^4.5.0:
   resolved "https://registry.yarnpkg.com/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz#e23f3f9c4f8fbdde872529c1071857a086e5ccef"
   integrity sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=
 
-lodash.flatten@^4.4.0:
-  version "4.4.0"
-  resolved "https://registry.yarnpkg.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz#f31c22225a9632d2bbf8e4addbef240aa765a61f"
-  integrity sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=
-
 lodash.get@^4.4.2:
   version "4.4.2"
   resolved "https://registry.yarnpkg.com/lodash.get/-/lodash.get-4.4.2.tgz#2d177f652fa31e939b4438d5341499dfa3825e99"
@@ -6434,17 +6147,12 @@ lodash.ismatch@^4.4.0:
   resolved "https://registry.yarnpkg.com/lodash.ismatch/-/lodash.ismatch-4.4.0.tgz#756cb5150ca3ba6f11085a78849645f188f85f37"
   integrity sha1-dWy1FQyjum8RCFp4hJZF8Yj4Xzc=
 
-lodash.set@^4.3.2:
-  version "4.3.2"
-  resolved "https://registry.yarnpkg.com/lodash.set/-/lodash.set-4.3.2.tgz#d8757b1da807dde24816b0d6a84bea1a76230b23"
-  integrity sha1-2HV7HagH3eJIFrDWqEvqGnYjCyM=
-
-lodash.sortby@^4.7.0:
-  version "4.7.0"
-  resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438"
-  integrity sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=
+lodash.merge@^4.6.2:
+  version "4.6.2"
+  resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
+  integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
 
-lodash.template@^4.0.2, lodash.template@^4.5.0:
+lodash.template@^4.5.0:
   version "4.5.0"
   resolved "https://registry.yarnpkg.com/lodash.template/-/lodash.template-4.5.0.tgz#f976195cf3f347d0d5f52483569fe8031ccce8ab"
   integrity sha512-84vYFxIkmidUiFxidA/KjjH9pAycqW+h980j7Fuz5qxRtO9pgB7MDFTdys1N7A5mcucRiDyEq4fusljItR1T/A==
@@ -6464,12 +6172,7 @@ lodash.truncate@^4.4.2:
   resolved "https://registry.yarnpkg.com/lodash.truncate/-/lodash.truncate-4.4.2.tgz#5a350da0b1113b837ecfffd5812cbe58d6eae193"
   integrity sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=
 
-lodash.uniq@^4.5.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
-  integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=
-
-lodash@4.x, lodash@^4.17.12, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.2.1, lodash@^4.7.0:
+lodash@4.x, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.7.0:
   version "4.17.21"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
@@ -6518,18 +6221,6 @@ lunr@^2.3.9:
   resolved "https://registry.yarnpkg.com/lunr/-/lunr-2.3.9.tgz#18b123142832337dd6e964df1a5a7707b25d35e1"
   integrity sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==
 
-macos-release@^2.2.0:
-  version "2.4.1"
-  resolved "https://registry.yarnpkg.com/macos-release/-/macos-release-2.4.1.tgz#64033d0ec6a5e6375155a74b1a1eba8e509820ac"
-  integrity sha512-H/QHeBIN1fIGJX517pvK8IEK53yQOW7YcEI55oYtgjDdoCQQz7eJS94qt5kNrscReEyuD/JcdFCm2XBEcGOITg==
-
-make-dir@^1.0.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-1.3.0.tgz#79c1033b80515bd6d24ec9933e860ca75ee27f0c"
-  integrity sha512-2w31R7SJtieJJnQtGc7RVL2StM2vGYVfqUOvUDxH6bC6aJTxPxTF0GnIgCyu7tjockiUWAYQRbxa7vKn34s5sQ==
-  dependencies:
-    pify "^3.0.0"
-
 make-dir@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-2.1.0.tgz#5f0310e18b8be898cc07009295a30ae41e91e6f5"
@@ -6550,22 +6241,26 @@ make-error@1.x, make-error@^1.1.1:
   resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2"
   integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==
 
-make-fetch-happen@^5.0.0:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-5.0.2.tgz#aa8387104f2687edca01c8687ee45013d02d19bd"
-  integrity sha512-07JHC0r1ykIoruKO8ifMXu+xEU8qOXDFETylktdug6vJDACnP+HKevOu3PXyNPzFyTSlz8vrBYlBO1JZRe8Cag==
-  dependencies:
-    agentkeepalive "^3.4.1"
-    cacache "^12.0.0"
-    http-cache-semantics "^3.8.1"
-    http-proxy-agent "^2.1.0"
-    https-proxy-agent "^2.2.3"
-    lru-cache "^5.1.1"
-    mississippi "^3.0.0"
-    node-fetch-npm "^2.0.2"
-    promise-retry "^1.1.1"
-    socks-proxy-agent "^4.0.0"
-    ssri "^6.0.0"
+make-fetch-happen@^8.0.9:
+  version "8.0.14"
+  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-8.0.14.tgz#aaba73ae0ab5586ad8eaa68bd83332669393e222"
+  integrity sha512-EsS89h6l4vbfJEtBZnENTOFk8mCRpY5ru36Xe5bcX1KYIli2mkSHqoFsp5O1wMDvTJJzxe/4THpCTtygjeeGWQ==
+  dependencies:
+    agentkeepalive "^4.1.3"
+    cacache "^15.0.5"
+    http-cache-semantics "^4.1.0"
+    http-proxy-agent "^4.0.1"
+    https-proxy-agent "^5.0.0"
+    is-lambda "^1.0.1"
+    lru-cache "^6.0.0"
+    minipass "^3.1.3"
+    minipass-collect "^1.0.2"
+    minipass-fetch "^1.3.2"
+    minipass-flush "^1.0.5"
+    minipass-pipeline "^1.2.4"
+    promise-retry "^2.0.1"
+    socks-proxy-agent "^5.0.0"
+    ssri "^8.0.0"
 
 make-iterator@^1.0.0:
   version "1.0.1"
@@ -6591,11 +6286,6 @@ map-obj@^1.0.0, map-obj@^1.0.1:
   resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-1.0.1.tgz#d933ceb9205d82bdcf4886f6742bdc2b4dea146d"
   integrity sha1-2TPOuSBdgr3PSIb2dCvcK03qFG0=
 
-map-obj@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-2.0.0.tgz#a65cd29087a92598b8791257a523e021222ac1f9"
-  integrity sha1-plzSkIepJZi4eRJXpSPgISIqwfk=
-
 map-obj@^4.0.0:
   version "4.2.1"
   resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-4.2.1.tgz#e4ea399dbc979ae735c83c863dd31bdf364277b7"
@@ -6608,10 +6298,10 @@ map-visit@^1.0.0:
   dependencies:
     object-visit "^1.0.0"
 
-marked@^2.0.1:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.3.tgz#3551c4958c4da36897bda2a16812ef1399c8d6b0"
-  integrity sha512-5otztIIcJfPc2qGTN8cVtOJEjNJZ0jwa46INMagrYfk0EvqtRuEHLsEe0LrFS0/q+ZRKT0+kXK7P2T1AN5lWRA==
+marked@^2.0.3:
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.5.tgz#2d15c759b9497b0e7b5b57f4c2edabe1002ef9e7"
+  integrity sha512-yfCEUXmKhBPLOzEC7c+tc4XZdIeTdGoRCZakFMkCxodr7wDXqoapIME4wjcpBPJLNyUnKJ3e8rb8wlAgnLnaDw==
 
 matchdep@^2.0.0:
   version "2.0.0"
@@ -6628,13 +6318,12 @@ math-random@^1.0.1:
   resolved "https://registry.yarnpkg.com/math-random/-/math-random-1.0.4.tgz#5dd6943c938548267016d4e34f057583080c514c"
   integrity sha512-rUxjysqif/BZQH2yhd5Aaq7vXMSx9NdEsQcyA07uEzIvxgI7zIr33gGsh+RU0/XjmQpCW7RsVof1vlkvQVCK5A==
 
-memfs@2.15.2:
-  version "2.15.2"
-  resolved "https://registry.yarnpkg.com/memfs/-/memfs-2.15.2.tgz#199b64580cf849ea641d8fac81d96742bfebd26d"
-  integrity sha512-jFC2mc3Aa224nJB824vbJzuiksf3+wPjFSKrXS7bA3o3H0Yy4/bh0R1nAsQyL/P80PVyT56ZowQJ+NGniHWhVQ==
+memfs@3.2.2:
+  version "3.2.2"
+  resolved "https://registry.yarnpkg.com/memfs/-/memfs-3.2.2.tgz#5de461389d596e3f23d48bb7c2afb6161f4df40e"
+  integrity sha512-RE0CwmIM3CEvpcdK3rZ19BC4E6hv9kADkMN5rPduRak58cNArWLi/9jFLsa4rhsjfVxMP3v0jO7FHXq7SvFY5Q==
   dependencies:
-    fast-extend "0.0.2"
-    fs-monkey "^0.3.3"
+    fs-monkey "1.0.3"
 
 memoizee@0.4.X:
   version "0.4.15"
@@ -6671,21 +6360,6 @@ meow@^3.3.0:
     redent "^1.0.0"
     trim-newlines "^1.0.0"
 
-meow@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-4.0.1.tgz#d48598f6f4b1472f35bf6317a95945ace347f975"
-  integrity sha512-xcSBHD5Z86zaOc+781KrupuHAzeGXSLtiAOmBsiLDiPSaYSB6hdew2ng9EBAnZ62jagG9MHAOdxpDi/lWBFJ/A==
-  dependencies:
-    camelcase-keys "^4.0.0"
-    decamelize-keys "^1.0.0"
-    loud-rejection "^1.0.0"
-    minimist "^1.1.3"
-    minimist-options "^3.0.1"
-    normalize-package-data "^2.3.4"
-    read-pkg-up "^3.0.0"
-    redent "^2.0.0"
-    trim-newlines "^2.0.0"
-
 meow@^6.1.1:
   version "6.1.1"
   resolved "https://registry.yarnpkg.com/meow/-/meow-6.1.1.tgz#1ad64c4b76b2a24dfb2f635fddcadf320d251467"
@@ -6749,13 +6423,13 @@ micromatch@^3.0.4, micromatch@^3.1.10, micromatch@^3.1.4:
     snapdragon "^0.8.1"
     to-regex "^3.0.2"
 
-micromatch@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.2.tgz#4fcb0999bf9fbc2fcbdd212f6d629b9a56c39259"
-  integrity sha512-y7FpHSbMUMoyPbYUSzO6PaZ6FyRnQOpHuKwbo1G+Knck95XVU4QAiKdGEnj5wwoS7PlOgthX/09u5iFJ+aYf5Q==
+micromatch@^4.0.2, micromatch@^4.0.4:
+  version "4.0.4"
+  resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.4.tgz#896d519dfe9db25fce94ceb7a500919bf881ebf9"
+  integrity sha512-pRmzw/XUcwXGpD9aI9q/0XOwLNygjETJ8y0ao0wdqprrzDa4YnxLcz7fQRZr8voh8V10kGhABbNcHVk5wHgWwg==
   dependencies:
     braces "^3.0.1"
-    picomatch "^2.0.5"
+    picomatch "^2.2.3"
 
 mime-db@1.47.0:
   version "1.47.0"
@@ -6769,11 +6443,6 @@ mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.19:
   dependencies:
     mime-db "1.47.0"
 
-mimic-fn@^1.0.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-1.2.0.tgz#820c86a39334640e99516928bd03fca88057d022"
-  integrity sha512-jf84uxzwiuiIVKiOLpfYk7N46TSy8ubTonmneY9vrpHNAnp0QBt2BxWV9dO3/j+BoVAb+a5G6YDPW3M5HOdMWQ==
-
 mimic-fn@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
@@ -6800,20 +6469,59 @@ minimist-options@4.1.0, minimist-options@^4.0.2:
     is-plain-obj "^1.1.0"
     kind-of "^6.0.3"
 
-minimist-options@^3.0.1:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-3.0.2.tgz#fba4c8191339e13ecf4d61beb03f070103f3d954"
-  integrity sha512-FyBrT/d0d4+uiZRbqznPXqw3IpZZG3gl3wKWiX784FycUKVwBt0uLBFkQrtE4tZOrgo78nZp2jnKz3L65T5LdQ==
-  dependencies:
-    arrify "^1.0.1"
-    is-plain-obj "^1.1.0"
-
-minimist@1.x, minimist@^1.1.1, minimist@^1.1.3, minimist@^1.2.0, minimist@^1.2.5:
+minimist@1.x, minimist@^1.1.3, minimist@^1.2.0, minimist@^1.2.5:
   version "1.2.5"
   resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
   integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
 
-minipass@^2.3.5, minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
+minipass-collect@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/minipass-collect/-/minipass-collect-1.0.2.tgz#22b813bf745dc6edba2576b940022ad6edc8c617"
+  integrity sha512-6T6lH0H8OG9kITm/Jm6tdooIbogG9e0tLgpY6mphXSm/A9u8Nq1ryBG+Qspiub9LjWlBPsPS3tWQ/Botq4FdxA==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass-fetch@^1.3.0, minipass-fetch@^1.3.2:
+  version "1.3.3"
+  resolved "https://registry.yarnpkg.com/minipass-fetch/-/minipass-fetch-1.3.3.tgz#34c7cea038c817a8658461bf35174551dce17a0a"
+  integrity sha512-akCrLDWfbdAWkMLBxJEeWTdNsjML+dt5YgOI4gJ53vuO0vrmYQkUPxa6j6V65s9CcePIr2SSWqjT2EcrNseryQ==
+  dependencies:
+    minipass "^3.1.0"
+    minipass-sized "^1.0.3"
+    minizlib "^2.0.0"
+  optionalDependencies:
+    encoding "^0.1.12"
+
+minipass-flush@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/minipass-flush/-/minipass-flush-1.0.5.tgz#82e7135d7e89a50ffe64610a787953c4c4cbb373"
+  integrity sha512-JmQSYYpPUqX5Jyn1mXaRwOda1uQ8HP5KAT/oDSLCzt1BYRhQU0/hDtsB1ufZfEEzMZ9aAVmsBw8+FWsIXlClWw==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass-json-stream@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/minipass-json-stream/-/minipass-json-stream-1.0.1.tgz#7edbb92588fbfc2ff1db2fc10397acb7b6b44aa7"
+  integrity sha512-ODqY18UZt/I8k+b7rl2AENgbWE8IDYam+undIJONvigAz8KR5GWblsFTEfQs0WODsjbSXWlm+JHEv8Gr6Tfdbg==
+  dependencies:
+    jsonparse "^1.3.1"
+    minipass "^3.0.0"
+
+minipass-pipeline@^1.2.2, minipass-pipeline@^1.2.4:
+  version "1.2.4"
+  resolved "https://registry.yarnpkg.com/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz#68472f79711c084657c067c5c6ad93cddea8214c"
+  integrity sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass-sized@^1.0.3:
+  version "1.0.3"
+  resolved "https://registry.yarnpkg.com/minipass-sized/-/minipass-sized-1.0.3.tgz#70ee5a7c5052070afacfbc22977ea79def353b70"
+  integrity sha512-MbkQQ2CTiBMlA2Dm/5cY+9SWFEN8pzzOXi6rlM5Xxq0Yqbda5ZQy9sU75a673FE9ZK0Zsbr6Y5iP6u9nktfg2g==
+  dependencies:
+    minipass "^3.0.0"
+
+minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
   version "2.9.0"
   resolved "https://registry.yarnpkg.com/minipass/-/minipass-2.9.0.tgz#e713762e7d3e32fed803115cf93e04bca9fcc9a6"
   integrity sha512-wxfUjg9WebH+CUDX/CdbRlh5SmfZiy/hpkxaRI16Y9W56Pa75sWgd/rvFilSgrauD9NyFymP/+JFV3KwzIsJeg==
@@ -6821,6 +6529,13 @@ minipass@^2.3.5, minipass@^2.6.0, minipass@^2.8.6, minipass@^2.9.0:
     safe-buffer "^5.1.2"
     yallist "^3.0.0"
 
+minipass@^3.0.0, minipass@^3.1.0, minipass@^3.1.1, minipass@^3.1.3:
+  version "3.1.3"
+  resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.3.tgz#7d42ff1f39635482e15f9cdb53184deebd5815fd"
+  integrity sha512-Mgd2GdMVzY+x3IJ+oHnVM+KG3lA5c8tnabyJKmHSaG2kAGpudxuOf8ToDkhumF7UzME7DecbQE9uOZhNm7PuJg==
+  dependencies:
+    yallist "^4.0.0"
+
 minizlib@^1.2.1:
   version "1.3.3"
   resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-1.3.3.tgz#2290de96818a34c29551c8a8d301216bd65a861d"
@@ -6828,21 +6543,13 @@ minizlib@^1.2.1:
   dependencies:
     minipass "^2.9.0"
 
-mississippi@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/mississippi/-/mississippi-3.0.0.tgz#ea0a3291f97e0b5e8776b363d5f0a12d94c67022"
-  integrity sha512-x471SsVjUtBRtcvd4BzKE9kFC+/2TeWgKCgw0bZcw1b9l2X3QX5vCWgF+KaZaYm87Ss//rHnWryupDrgLvmSkA==
+minizlib@^2.0.0, minizlib@^2.1.1:
+  version "2.1.2"
+  resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
+  integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==
   dependencies:
-    concat-stream "^1.5.0"
-    duplexify "^3.4.2"
-    end-of-stream "^1.1.0"
-    flush-write-stream "^1.0.0"
-    from2 "^2.1.0"
-    parallel-transform "^1.1.0"
-    pump "^3.0.0"
-    pumpify "^1.3.3"
-    stream-each "^1.1.0"
-    through2 "^2.0.0"
+    minipass "^3.0.0"
+    yallist "^4.0.0"
 
 mixin-deep@^1.2.0:
   version "1.3.2"
@@ -6852,14 +6559,16 @@ mixin-deep@^1.2.0:
     for-in "^1.0.2"
     is-extendable "^1.0.1"
 
-mkdirp-promise@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/mkdirp-promise/-/mkdirp-promise-5.0.1.tgz#e9b8f68e552c68a9c1713b84883f7a1dd039b8a1"
-  integrity sha1-6bj2jlUsaKnBcTuEiD96HdA5uKE=
+mkdirp-infer-owner@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/mkdirp-infer-owner/-/mkdirp-infer-owner-2.0.0.tgz#55d3b368e7d89065c38f32fd38e638f0ab61d316"
+  integrity sha512-sdqtiFt3lkOaYvTXSRIUjkIdPTcxgv5+fgqYE/5qgwdw12cOrAuzzgzvVExIkH/ul1oeHN3bCLOWSG3XOqbKKw==
   dependencies:
-    mkdirp "*"
+    chownr "^2.0.0"
+    infer-owner "^1.0.4"
+    mkdirp "^1.0.3"
 
-mkdirp@*, mkdirp@1.0.4, mkdirp@1.x:
+mkdirp@1.0.4, mkdirp@1.x, mkdirp@^1.0.3, mkdirp@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
   integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==
@@ -6876,18 +6585,6 @@ modify-values@^1.0.0:
   resolved "https://registry.yarnpkg.com/modify-values/-/modify-values-1.0.1.tgz#b3939fa605546474e3e3e3c63d64bd43b4ee6022"
   integrity sha512-xV2bxeN6F7oYjZWTe/YPAy6MN2M+sL4u/Rlm2AHCIVGfo2p1yGmBHQ6vHehl4bRTZBdHu3TSkWdYgkwpYzAGSw==
 
-move-concurrently@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/move-concurrently/-/move-concurrently-1.0.1.tgz#be2c005fda32e0b29af1f05d7c4b33214c701f92"
-  integrity sha1-viwAX9oy4LKa8fBdfEszIUxwH5I=
-  dependencies:
-    aproba "^1.1.1"
-    copy-concurrently "^1.0.0"
-    fs-write-stream-atomic "^1.0.8"
-    mkdirp "^0.5.1"
-    rimraf "^2.5.4"
-    run-queue "^1.0.3"
-
 ms@2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
@@ -6903,48 +6600,35 @@ ms@^2.0.0, ms@^2.1.1:
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
   integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
 
-multimatch@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/multimatch/-/multimatch-3.0.0.tgz#0e2534cc6bc238d9ab67e1b9cd5fcd85a6dbf70b"
-  integrity sha512-22foS/gqQfANZ3o+W7ST2x25ueHDVNWl/b9OlGcLpy/iKxjCpvcNCM51YCenUi7Mt/jAjjqv8JwZRs8YP5sRjA==
+multimatch@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/multimatch/-/multimatch-5.0.0.tgz#932b800963cea7a31a033328fa1e0c3a1874dbe6"
+  integrity sha512-ypMKuglUrZUD99Tk2bUQ+xNQj43lPEfAeX2o9cTteAmShXy2VHDJpuwu1o0xqoKCt9jLVAvwyFKdLTPXKAfJyA==
   dependencies:
-    array-differ "^2.0.3"
-    array-union "^1.0.2"
-    arrify "^1.0.1"
+    "@types/minimatch" "^3.0.3"
+    array-differ "^3.0.0"
+    array-union "^2.1.0"
+    arrify "^2.0.1"
     minimatch "^3.0.4"
 
-multistream@2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/multistream/-/multistream-2.1.1.tgz#629d3a29bd76623489980d04519a2c365948148c"
-  integrity sha512-xasv76hl6nr1dEy3lPvy7Ej7K/Lx3O/FCvwge8PeVJpciPPoNCbaANcNiBug3IpdvTveZUcAV0DJzdnUDMesNQ==
+multistream@4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8"
+  integrity sha512-J1XDiAmmNpRCBfIWJv+n0ymC4ABcf/Pl+5YvC5B/D2f/2+8PtHvCNxMPKiQcZyi922Hq69J2YOpb1pTywfifyw==
   dependencies:
-    inherits "^2.0.1"
-    readable-stream "^2.0.5"
+    once "^1.4.0"
+    readable-stream "^3.6.0"
 
 mute-stdout@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/mute-stdout/-/mute-stdout-1.0.1.tgz#acb0300eb4de23a7ddeec014e3e96044b3472331"
   integrity sha512-kDcwXR4PS7caBpuRYYBUz9iVixUk3anO3f5OYFiIPwK/20vCzKCHyKoulbiDY1S53zD2bxUpxN/IJ+TnXjfvxg==
 
-mute-stream@0.0.7:
-  version "0.0.7"
-  resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.7.tgz#3075ce93bc21b8fab43e1bc4da7e8115ed1e7bab"
-  integrity sha1-MHXOk7whuPq0PhvE2n6BFe0ee6s=
-
-mute-stream@~0.0.4:
+mute-stream@0.0.8, mute-stream@~0.0.4:
   version "0.0.8"
   resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.8.tgz#1630c42b2251ff81e2a283de96a5497ea92e5e0d"
   integrity sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==
 
-mz@^2.5.0:
-  version "2.7.0"
-  resolved "https://registry.yarnpkg.com/mz/-/mz-2.7.0.tgz#95008057a56cafadc2bc63dde7f9ff6955948e32"
-  integrity sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==
-  dependencies:
-    any-promise "^1.0.0"
-    object-assign "^4.0.1"
-    thenify-all "^1.0.0"
-
 nan@^2.12.1:
   version "2.14.2"
   resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.2.tgz#f5376400695168f4cc694ac9393d0c9585eeea19"
@@ -6997,16 +6681,7 @@ nice-try@^1.0.4:
   resolved "https://registry.yarnpkg.com/nice-try/-/nice-try-1.0.5.tgz#a3378a7696ce7d223e88fc9b764bd7ef1089e366"
   integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==
 
-node-fetch-npm@^2.0.2:
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/node-fetch-npm/-/node-fetch-npm-2.0.4.tgz#6507d0e17a9ec0be3bec516958a497cec54bf5a4"
-  integrity sha512-iOuIQDWDyjhv9qSDrj9aq/klt6F9z1p2otB3AV7v3zBDcL/x+OfGsvGQZZCcMZbUf4Ujw1xGNQkjvGnVT22cKg==
-  dependencies:
-    encoding "^0.1.11"
-    json-parse-better-errors "^1.0.0"
-    safe-buffer "^5.1.1"
-
-node-fetch@^2.5.0, node-fetch@^2.6.1:
+node-fetch@^2.6.1:
   version "2.6.1"
   resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052"
   integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==
@@ -7028,6 +6703,22 @@ node-gyp@^5.0.2:
     tar "^4.4.12"
     which "^1.3.1"
 
+node-gyp@^7.1.0:
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/node-gyp/-/node-gyp-7.1.2.tgz#21a810aebb187120251c3bcec979af1587b188ae"
+  integrity sha512-CbpcIo7C3eMu3dL1c3d0xw449fHIGALIJsRP4DDPHpyiW8vcriNY7ubh9TE4zEKfSxscY7PjeFnshE7h75ynjQ==
+  dependencies:
+    env-paths "^2.2.0"
+    glob "^7.1.4"
+    graceful-fs "^4.2.3"
+    nopt "^5.0.0"
+    npmlog "^4.1.2"
+    request "^2.88.2"
+    rimraf "^3.0.2"
+    semver "^7.3.2"
+    tar "^6.0.2"
+    which "^2.0.2"
+
 node-int64@^0.4.0:
   version "0.4.0"
   resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
@@ -7038,22 +6729,10 @@ node-modules-regexp@^1.0.0:
   resolved "https://registry.yarnpkg.com/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz#8d9dbe28964a4ac5712e9131642107c71e90ec40"
   integrity sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=
 
-node-notifier@^8.0.0:
-  version "8.0.2"
-  resolved "https://registry.yarnpkg.com/node-notifier/-/node-notifier-8.0.2.tgz#f3167a38ef0d2c8a866a83e318c1ba0efeb702c5"
-  integrity sha512-oJP/9NAdd9+x2Q+rfphB2RJCHjod70RcRLjosiPMMu5gjIfwVnOUGq2nbTjTUbmy0DJ/tFIVT30+Qe3nzl4TJg==
-  dependencies:
-    growly "^1.3.0"
-    is-wsl "^2.2.0"
-    semver "^7.3.2"
-    shellwords "^0.1.1"
-    uuid "^8.3.0"
-    which "^2.0.2"
-
-node-releases@^1.1.70:
-  version "1.1.71"
-  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.71.tgz#cb1334b179896b1c89ecfdd4b725fb7bbdfc7dbb"
-  integrity sha512-zR6HoT6LrLCRBwukmrVbHv0EpEQjksO6GmFcZQQuCAy139BEsoVKPYnf3jongYW83fAa1torLGYwxxky/p28sg==
+node-releases@^1.1.71:
+  version "1.1.72"
+  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.72.tgz#14802ab6b1039a79a0c7d662b610a5bbd76eacbe"
+  integrity sha512-LLUo+PpH3dU6XizX3iVoubUNheF/owjXCZZ5yACDxNnPtgFuludV1ZL3ayK1kVep42Rmm0+R9/Y60NQbZ2bifw==
 
 nopt@^4.0.1:
   version "4.0.3"
@@ -7063,7 +6742,14 @@ nopt@^4.0.1:
     abbrev "1"
     osenv "^0.1.4"
 
-normalize-package-data@^2.0.0, normalize-package-data@^2.3.0, normalize-package-data@^2.3.2, normalize-package-data@^2.3.4, normalize-package-data@^2.3.5, normalize-package-data@^2.4.0, normalize-package-data@^2.5.0:
+nopt@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/nopt/-/nopt-5.0.0.tgz#530942bb58a512fccafe53fe210f13a25355dc88"
+  integrity sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==
+  dependencies:
+    abbrev "1"
+
+normalize-package-data@^2.0.0, normalize-package-data@^2.3.0, normalize-package-data@^2.3.2, normalize-package-data@^2.3.4, normalize-package-data@^2.5.0:
   version "2.5.0"
   resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
   integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==
@@ -7073,7 +6759,7 @@ normalize-package-data@^2.0.0, normalize-package-data@^2.3.0, normalize-package-
     semver "2 || 3 || 4 || 5"
     validate-npm-package-license "^3.0.1"
 
-normalize-package-data@^3.0.0:
+normalize-package-data@^3.0.0, normalize-package-data@^3.0.2:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-3.0.2.tgz#cae5c410ae2434f9a6c1baa65d5bc3b9366c8699"
   integrity sha512-6CdZocmfGaKnIHPVFhJJZ3GuR8SsLKvDANFp47Jmy51aKIr8akjAWTSxtpI+MBgBFdSMRyo4hMpDlT6dTffgZg==
@@ -7107,14 +6793,21 @@ now-and-later@^2.0.0:
   dependencies:
     once "^1.3.2"
 
-npm-bundled@^1.0.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/npm-bundled/-/npm-bundled-1.1.1.tgz#1edd570865a94cdb1bc8220775e29466c9fb234b"
-  integrity sha512-gqkfgGePhTpAEgUsGEgcq1rqPXA+tv/aVBlgEzfXwA1yiUJF7xtEt3CtVwOjNYQOVknDk0F20w58Fnm3EtG0fA==
+npm-bundled@^1.1.1:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/npm-bundled/-/npm-bundled-1.1.2.tgz#944c78789bd739035b70baa2ca5cc32b8d860bc1"
+  integrity sha512-x5DHup0SuyQcmL3s7Rx/YQ8sbw/Hzg0rj48eN0dV7hf5cmQq5PXIeioroH3raV1QC1yh3uTYuMThvEQF3iKgGQ==
   dependencies:
     npm-normalize-package-bin "^1.0.1"
 
-npm-lifecycle@^3.1.2:
+npm-install-checks@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/npm-install-checks/-/npm-install-checks-4.0.0.tgz#a37facc763a2fde0497ef2c6d0ac7c3fbe00d7b4"
+  integrity sha512-09OmyDkNLYwqKPOnbI8exiOZU2GVVmQp7tgez2BPi5OZC8M82elDAps7sxC4l//uSUtotWqoEIDwjRvWH4qz8w==
+  dependencies:
+    semver "^7.1.1"
+
+npm-lifecycle@^3.1.5:
   version "3.1.5"
   resolved "https://registry.yarnpkg.com/npm-lifecycle/-/npm-lifecycle-3.1.5.tgz#9882d3642b8c82c815782a12e6a1bfeed0026309"
   integrity sha512-lDLVkjfZmvmfvpvBzA4vzee9cn+Me4orq0QF8glbswJVEbIcSNWib7qGOffolysc3teCqbbPZZkzbr3GQZTL1g==
@@ -7133,33 +6826,61 @@ npm-normalize-package-bin@^1.0.0, npm-normalize-package-bin@^1.0.1:
   resolved "https://registry.yarnpkg.com/npm-normalize-package-bin/-/npm-normalize-package-bin-1.0.1.tgz#6e79a41f23fd235c0623218228da7d9c23b8f6e2"
   integrity sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==
 
-"npm-package-arg@^4.0.0 || ^5.0.0 || ^6.0.0", npm-package-arg@^6.0.0, npm-package-arg@^6.1.0:
-  version "6.1.1"
-  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-6.1.1.tgz#02168cb0a49a2b75bf988a28698de7b529df5cb7"
-  integrity sha512-qBpssaL3IOZWi5vEKUKW0cO7kzLeT+EQO9W8RsLOZf76KF9E/K9+wH0C7t06HXPpaH8WH5xF1MExLuCwbTqRUg==
+npm-package-arg@^8.0.0, npm-package-arg@^8.0.1, npm-package-arg@^8.1.0, npm-package-arg@^8.1.2:
+  version "8.1.2"
+  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.2.tgz#b868016ae7de5619e729993fbd8d11dc3c52ab62"
+  integrity sha512-6Eem455JsSMJY6Kpd3EyWE+n5hC+g9bSyHr9K9U2zqZb7+02+hObQ2c0+8iDk/mNF+8r1MhY44WypKJAkySIYA==
   dependencies:
-    hosted-git-info "^2.7.1"
-    osenv "^0.1.5"
-    semver "^5.6.0"
+    hosted-git-info "^4.0.1"
+    semver "^7.3.4"
     validate-npm-package-name "^3.0.0"
 
-npm-packlist@^1.4.4:
-  version "1.4.8"
-  resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-1.4.8.tgz#56ee6cc135b9f98ad3d51c1c95da22bbb9b2ef3e"
-  integrity sha512-5+AZgwru5IevF5ZdnFglB5wNlHG1AOOuw28WhUq8/8emhBmLv6jX5by4WJCh7lW0uSYZYS6DXqIsyZVIXRZU9A==
+npm-packlist@^2.1.4:
+  version "2.2.2"
+  resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-2.2.2.tgz#076b97293fa620f632833186a7a8f65aaa6148c8"
+  integrity sha512-Jt01acDvJRhJGthnUJVF/w6gumWOZxO7IkpY/lsX9//zqQgnF7OJaxgQXcerd4uQOLu7W5bkb4mChL9mdfm+Zg==
   dependencies:
-    ignore-walk "^3.0.1"
-    npm-bundled "^1.0.1"
+    glob "^7.1.6"
+    ignore-walk "^3.0.3"
+    npm-bundled "^1.1.1"
     npm-normalize-package-bin "^1.0.1"
 
-npm-pick-manifest@^3.0.0:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/npm-pick-manifest/-/npm-pick-manifest-3.0.2.tgz#f4d9e5fd4be2153e5f4e5f9b7be8dc419a99abb7"
-  integrity sha512-wNprTNg+X5nf+tDi+hbjdHhM4bX+mKqv6XmPh7B5eG+QY9VARfQPfCEH013H5GqfNj6ee8Ij2fg8yk0mzps1Vw==
+npm-pick-manifest@^6.0.0, npm-pick-manifest@^6.1.1:
+  version "6.1.1"
+  resolved "https://registry.yarnpkg.com/npm-pick-manifest/-/npm-pick-manifest-6.1.1.tgz#7b5484ca2c908565f43b7f27644f36bb816f5148"
+  integrity sha512-dBsdBtORT84S8V8UTad1WlUyKIY9iMsAmqxHbLdeEeBNMLQDlDWWra3wYUx9EBEIiG/YwAy0XyNHDd2goAsfuA==
+  dependencies:
+    npm-install-checks "^4.0.0"
+    npm-normalize-package-bin "^1.0.1"
+    npm-package-arg "^8.1.2"
+    semver "^7.3.4"
+
+npm-registry-fetch@^10.0.0:
+  version "10.1.2"
+  resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-10.1.2.tgz#11ffe03d813c653e768bdf762cfc5f1afe91b8bd"
+  integrity sha512-KsM/TdPmntqgBFlfsbkOLkkE9ovZo7VpVcd+/eTdYszCrgy5zFl5JzWm+OxavFaEWlbkirpkou+ZYI00RmOBFA==
   dependencies:
-    figgy-pudding "^3.5.1"
-    npm-package-arg "^6.0.0"
-    semver "^5.4.1"
+    lru-cache "^6.0.0"
+    make-fetch-happen "^8.0.9"
+    minipass "^3.1.3"
+    minipass-fetch "^1.3.0"
+    minipass-json-stream "^1.0.1"
+    minizlib "^2.0.0"
+    npm-package-arg "^8.0.0"
+
+npm-registry-fetch@^9.0.0:
+  version "9.0.0"
+  resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-9.0.0.tgz#86f3feb4ce00313bc0b8f1f8f69daae6face1661"
+  integrity sha512-PuFYYtnQ8IyVl6ib9d3PepeehcUeHN9IO5N/iCRhyg9tStQcqGQBRVHmfmMWPDERU3KwZoHFvbJ4FPXPspvzbA==
+  dependencies:
+    "@npmcli/ci-detect" "^1.0.0"
+    lru-cache "^6.0.0"
+    make-fetch-happen "^8.0.9"
+    minipass "^3.1.3"
+    minipass-fetch "^1.3.0"
+    minipass-json-stream "^1.0.1"
+    minizlib "^2.0.0"
+    npm-package-arg "^8.0.0"
 
 npm-run-all@4.1.5:
   version "4.1.5"
@@ -7176,14 +6897,7 @@ npm-run-all@4.1.5:
     shell-quote "^1.6.1"
     string.prototype.padend "^3.0.0"
 
-npm-run-path@^2.0.0:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-2.0.2.tgz#35a9232dfa35d7067b4cb2ddf2357b1871536c5f"
-  integrity sha1-NakjLfo11wZ7TLLd8jV7GHFTbF8=
-  dependencies:
-    path-key "^2.0.0"
-
-npm-run-path@^4.0.0:
+npm-run-path@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea"
   integrity sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==
@@ -7229,10 +6943,10 @@ object-copy@^0.1.0:
     define-property "^0.2.5"
     kind-of "^3.0.3"
 
-object-inspect@^1.9.0:
-  version "1.9.0"
-  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.9.0.tgz#c90521d74e1127b67266ded3394ad6116986533a"
-  integrity sha512-i3Bp9iTqwhaLZBxGkRfo5ZbE07BQRT7MGu8+nNgwW9ItGp1TzCTw2DLEoWwjClxBjOFI/hWljTAmYGCEwmtnOw==
+object-inspect@^1.10.3, object-inspect@^1.9.0:
+  version "1.10.3"
+  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.10.3.tgz#c2aa7d2d09f50c99375704f7a0adf24c5782d369"
+  integrity sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==
 
 object-keys@^1.0.12, object-keys@^1.1.1:
   version "1.1.1"
@@ -7298,11 +7012,6 @@ object.reduce@^1.0.0:
     for-own "^1.0.0"
     make-iterator "^1.0.0"
 
-octokit-pagination-methods@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/octokit-pagination-methods/-/octokit-pagination-methods-1.1.0.tgz#cf472edc9d551055f9ef73f6e42b4dbb4c80bea4"
-  integrity sha512-fZ4qZdQ2nxJvtcasX7Ghl+WlWS/d9IgnBIwFZXVNNZUmzpno91SX5bc5vuxiuKoCtK78XxGGNuSCrDC7xYB3OQ==
-
 once@^1.3.0, once@^1.3.1, once@^1.3.2, once@^1.4.0:
   version "1.4.0"
   resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
@@ -7310,14 +7019,7 @@ once@^1.3.0, once@^1.3.1, once@^1.3.2, once@^1.4.0:
   dependencies:
     wrappy "1"
 
-onetime@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/onetime/-/onetime-2.0.1.tgz#067428230fd67443b2794b22bba528b6867962d4"
-  integrity sha1-BnQoIw/WdEOyeUsiu6UotoZ5YtQ=
-  dependencies:
-    mimic-fn "^1.0.0"
-
-onetime@^5.1.0:
+onetime@^5.1.0, onetime@^5.1.2:
   version "5.1.2"
   resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e"
   integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==
@@ -7374,20 +7076,12 @@ os-locale@^1.4.0:
   dependencies:
     lcid "^1.0.0"
 
-os-name@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/os-name/-/os-name-3.1.0.tgz#dec19d966296e1cd62d701a5a66ee1ddeae70801"
-  integrity sha512-h8L+8aNjNcMpo/mAIBPn5PXCM16iyPGjHNWo6U1YO8sJTMHtEtyczI6QJnLoplswm6goopQkqc7OAnjhWcugVg==
-  dependencies:
-    macos-release "^2.2.0"
-    windows-release "^3.1.0"
-
 os-tmpdir@^1.0.0, os-tmpdir@~1.0.2:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274"
   integrity sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=
 
-osenv@^0.1.4, osenv@^0.1.5:
+osenv@^0.1.4:
   version "0.1.5"
   resolved "https://registry.yarnpkg.com/osenv/-/osenv-0.1.5.tgz#85cdfafaeb28e8677f416e287592b5f3f49ea410"
   integrity sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==
@@ -7433,7 +7127,7 @@ p-limit@^1.1.0:
   dependencies:
     p-try "^1.0.0"
 
-p-limit@^2.0.0, p-limit@^2.2.0:
+p-limit@^2.2.0:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-2.3.0.tgz#3dd33c647a214fdfffd835933eb086da0dc21db1"
   integrity sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==
@@ -7454,13 +7148,6 @@ p-locate@^2.0.0:
   dependencies:
     p-limit "^1.1.0"
 
-p-locate@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-3.0.0.tgz#322d69a05c0264b25997d9f40cd8a891ab0064a4"
-  integrity sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==
-  dependencies:
-    p-limit "^2.0.0"
-
 p-locate@^4.1.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-4.1.0.tgz#a3428bb7088b3a60292f66919278b7c297ad4f07"
@@ -7468,14 +7155,12 @@ p-locate@^4.1.0:
   dependencies:
     p-limit "^2.2.0"
 
-p-map-series@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-map-series/-/p-map-series-1.0.0.tgz#bf98fe575705658a9e1351befb85ae4c1f07bdca"
-  integrity sha1-v5j+V1cFZYqeE1G++4WuTB8Hvco=
-  dependencies:
-    p-reduce "^1.0.0"
-
-p-map@^2.0.0, p-map@^2.1.0:
+p-map-series@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/p-map-series/-/p-map-series-2.1.0.tgz#7560d4c452d9da0c07e692fdbfe6e2c81a2a91f2"
+  integrity sha512-RpYIIK1zXSNEOdwxcfe7FdvGcs7+y5n8rifMhMNWvaxRNMPINJHF5GDeuVxWqnfrcHPSCnp7Oo5yNXHId9Av2Q==
+
+p-map@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175"
   integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==
@@ -7487,24 +7172,32 @@ p-map@^3.0.0:
   dependencies:
     aggregate-error "^3.0.0"
 
-p-pipe@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-1.2.0.tgz#4b1a11399a11520a67790ee5a0c1d5881d6befe9"
-  integrity sha1-SxoROZoRUgpneQ7loMHViB1r7+k=
-
-p-queue@^4.0.0:
+p-map@^4.0.0:
   version "4.0.0"
-  resolved "https://registry.yarnpkg.com/p-queue/-/p-queue-4.0.0.tgz#ed0eee8798927ed6f2c2f5f5b77fdb2061a5d346"
-  integrity sha512-3cRXXn3/O0o3+eVmUroJPSj/esxoEFIm0ZOno/T+NzG/VZgPOqQ8WKmlNqubSEpZmCIngEy34unkHGg83ZIBmg==
+  resolved "https://registry.yarnpkg.com/p-map/-/p-map-4.0.0.tgz#bb2f95a5eda2ec168ec9274e06a747c3e2904d2b"
+  integrity sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==
   dependencies:
-    eventemitter3 "^3.1.0"
+    aggregate-error "^3.0.0"
 
-p-reduce@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-reduce/-/p-reduce-1.0.0.tgz#18c2b0dd936a4690a529f8231f58a0fdb6a47dfa"
-  integrity sha1-GMKw3ZNqRpClKfgjH1ig/bakffo=
+p-pipe@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-3.1.0.tgz#48b57c922aa2e1af6a6404cb7c6bf0eb9cc8e60e"
+  integrity sha512-08pj8ATpzMR0Y80x50yJHn37NF6vjrqHutASaX5LiH5npS9XPvrUmscd9MF5R4fuYRHOxQR1FfMIlF7AzwoPqw==
+
+p-queue@^6.6.2:
+  version "6.6.2"
+  resolved "https://registry.yarnpkg.com/p-queue/-/p-queue-6.6.2.tgz#2068a9dcf8e67dd0ec3e7a2bcb76810faa85e426"
+  integrity sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==
+  dependencies:
+    eventemitter3 "^4.0.4"
+    p-timeout "^3.2.0"
+
+p-reduce@^2.0.0, p-reduce@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/p-reduce/-/p-reduce-2.1.0.tgz#09408da49507c6c274faa31f28df334bc712b64a"
+  integrity sha512-2USApvnsutq8uoxZBGbbWM0JIYLiEMJ9RlaN7fAzVNb9OZN0SHjjTTfIcb667XynS5Y1VhwDJVDa72TnPzAYWw==
 
-p-timeout@^3.1.0:
+p-timeout@^3.1.0, p-timeout@^3.2.0:
   version "3.2.0"
   resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-3.2.0.tgz#c7e17abc971d2a7962ef83626b35d635acf23dfe"
   integrity sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==
@@ -7521,12 +7214,37 @@ p-try@^2.0.0:
   resolved "https://registry.yarnpkg.com/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
   integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==
 
-p-waterfall@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/p-waterfall/-/p-waterfall-1.0.0.tgz#7ed94b3ceb3332782353af6aae11aa9fc235bb00"
-  integrity sha1-ftlLPOszMngjU69qrhGqn8I1uwA=
-  dependencies:
-    p-reduce "^1.0.0"
+p-waterfall@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/p-waterfall/-/p-waterfall-2.1.1.tgz#63153a774f472ccdc4eb281cdb2967fcf158b2ee"
+  integrity sha512-RRTnDb2TBG/epPRI2yYXsimO0v3BXC8Yd3ogr1545IaqKK17VGhbWVeGGN+XfCm/08OK8635nH31c8bATkHuSw==
+  dependencies:
+    p-reduce "^2.0.0"
+
+pacote@^11.2.6:
+  version "11.3.3"
+  resolved "https://registry.yarnpkg.com/pacote/-/pacote-11.3.3.tgz#d7d6091464f77c09691699df2ded13ab906b3e68"
+  integrity sha512-GQxBX+UcVZrrJRYMK2HoG+gPeSUX/rQhnbPkkGrCYa4n2F/bgClFPaMm0nsdnYrxnmUy85uMHoFXZ0jTD0drew==
+  dependencies:
+    "@npmcli/git" "^2.0.1"
+    "@npmcli/installed-package-contents" "^1.0.6"
+    "@npmcli/promise-spawn" "^1.2.0"
+    "@npmcli/run-script" "^1.8.2"
+    cacache "^15.0.5"
+    chownr "^2.0.0"
+    fs-minipass "^2.1.0"
+    infer-owner "^1.0.4"
+    minipass "^3.1.3"
+    mkdirp "^1.0.3"
+    npm-package-arg "^8.0.1"
+    npm-packlist "^2.1.4"
+    npm-pick-manifest "^6.0.0"
+    npm-registry-fetch "^10.0.0"
+    promise-retry "^2.0.1"
+    read-package-json-fast "^2.0.1"
+    rimraf "^3.0.2"
+    ssri "^8.0.1"
+    tar "^6.1.0"
 
 pad-left@^2.1.0:
   version "2.1.0"
@@ -7535,15 +7253,6 @@ pad-left@^2.1.0:
   dependencies:
     repeat-string "^1.5.4"
 
-parallel-transform@^1.1.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/parallel-transform/-/parallel-transform-1.2.0.tgz#9049ca37d6cb2182c3b1d2c720be94d14a5814fc"
-  integrity sha512-P2vSmIu38uIlvdcU7fDkyrxj33gTUy/ABO5ZUbGowxNCopBq/OoD42bP4UmMrJoPyk4Uqf0mu3mtWBhHCZD8yg==
-  dependencies:
-    cyclist "^1.0.1"
-    inherits "^2.0.3"
-    readable-stream "^2.1.5"
-
 parent-module@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
@@ -7657,7 +7366,7 @@ path-is-absolute@^1.0.0:
   resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
   integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
 
-path-key@^2.0.0, path-key@^2.0.1:
+path-key@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/path-key/-/path-key-2.0.1.tgz#411cadb574c5a140d3a4b1910d40d80cc9f40b40"
   integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=
@@ -7668,9 +7377,9 @@ path-key@^3.0.0, path-key@^3.1.0:
   integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==
 
 path-parse@^1.0.6:
-  version "1.0.6"
-  resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c"
-  integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==
+  version "1.0.7"
+  resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735"
+  integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==
 
 path-root-regex@^0.1.0:
   version "0.1.2"
@@ -7710,10 +7419,10 @@ performance-now@^2.1.0:
   resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
   integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
 
-picomatch@^2.0.4, picomatch@^2.0.5, picomatch@^2.2.1:
-  version "2.2.2"
-  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.2.2.tgz#21f333e9b6b8eaff02468f5146ea406d345f4dad"
-  integrity sha512-q0M/9eZHzmr0AulXyPwNfZjtwZ/RBZlbN3K3CErVrk50T2ASYI7Bye0EvekFY3IP1Nt2DHu0re+V2ZHIpMkuWg==
+picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.2.3:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.0.tgz#f1f061de8f6a4bf022892e2d128234fb98302972"
+  integrity sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==
 
 pidtree@^0.3.0:
   version "0.3.1"
@@ -7735,6 +7444,11 @@ pify@^4.0.1:
   resolved "https://registry.yarnpkg.com/pify/-/pify-4.0.1.tgz#4b2cd25c50d598735c50292224fd8c6df41e3231"
   integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==
 
+pify@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/pify/-/pify-5.0.0.tgz#1f5eca3f5e87ebec28cc6d54a0e4aaf00acc127f"
+  integrity sha512-eW/gHNMlxdSP6dmG6uJip6FXN0EQBwm2clYYd8Wul42Cwu/DK8HEftzsapcNdYe2MfLiIwZqsDk2RDEsTE79hA==
+
 pinkie-promise@^2.0.0:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/pinkie-promise/-/pinkie-promise-2.0.1.tgz#2135d6dfa7a358c069ac9b178776288228450ffa"
@@ -7754,13 +7468,6 @@ pirates@^4.0.1:
   dependencies:
     node-modules-regexp "^1.0.0"
 
-pkg-dir@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-3.0.0.tgz#2749020f239ed990881b1f71210d51eb6523bea3"
-  integrity sha512-/E57AYkoeQ25qkxMj5PBOVgF8Kiu/h7cYS30Z5+R7WaiCCBfLq58ZI/dSeaEKb9WVJV5n/03QwrN3IeWIFllvw==
-  dependencies:
-    find-up "^3.0.0"
-
 pkg-dir@^4.2.0:
   version "4.2.0"
   resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-4.2.0.tgz#f099133df7ede422e81d1d8448270eeb3e4261f3"
@@ -7822,16 +7529,21 @@ pretty-format@^26.0.0, pretty-format@^26.6.2:
     ansi-styles "^4.0.0"
     react-is "^17.0.1"
 
+pretty-format@^27.0.1:
+  version "27.0.1"
+  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.0.1.tgz#c4094621dfbd3e8ab751964d1cf01edc6f88474d"
+  integrity sha512-qE+0J6c/gd+R6XTcQgPJMc5hMJNsxzSF5p8iZSbMZ7GQzYGlSLNkh2P80Wa2dbF4gEVUsJEgcrBY+1L2/j265w==
+  dependencies:
+    "@jest/types" "^27.0.1"
+    ansi-regex "^5.0.0"
+    ansi-styles "^5.0.0"
+    react-is "^17.0.1"
+
 pretty-hrtime@^1.0.0:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz#b7e3ea42435a4c9b2759d99e0f201eb195802ee1"
   integrity sha1-t+PqQkNaTJsnWdmeDyAesZWALuE=
 
-process-nextick-args@^1.0.7:
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-1.0.7.tgz#150e20b756590ad3f91093f25a4f2ad8bff30ba3"
-  integrity sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=
-
 process-nextick-args@^2.0.0, process-nextick-args@~2.0.0:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
@@ -7847,13 +7559,13 @@ promise-inflight@^1.0.1:
   resolved "https://registry.yarnpkg.com/promise-inflight/-/promise-inflight-1.0.1.tgz#98472870bf228132fcbdd868129bad12c3c029e3"
   integrity sha1-mEcocL8igTL8vdhoEputEsPAKeM=
 
-promise-retry@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/promise-retry/-/promise-retry-1.1.1.tgz#6739e968e3051da20ce6497fb2b50f6911df3d6d"
-  integrity sha1-ZznpaOMFHaIM5kl/srUPaRHfPW0=
+promise-retry@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/promise-retry/-/promise-retry-2.0.1.tgz#ff747a13620ab57ba688f5fc67855410c370da22"
+  integrity sha512-y+WKFlBR8BGXnsNlIHFGPZmyDf3DFMoLhaflAnyZgV6rG6xu+JwesTo2Q9R6XwYmtmwAFCkAk3e35jEdoeh/3g==
   dependencies:
-    err-code "^1.0.0"
-    retry "^0.10.0"
+    err-code "^2.0.2"
+    retry "^0.12.0"
 
 promise@^8.0.1:
   version "8.1.0"
@@ -7887,13 +7599,6 @@ protocols@^1.1.0, protocols@^1.4.0:
   resolved "https://registry.yarnpkg.com/protocols/-/protocols-1.4.8.tgz#48eea2d8f58d9644a4a32caae5d5db290a075ce8"
   integrity sha512-IgjKyaUSjsROSO8/D49Ab7hP8mJgTYcqApOqdPhLoPxAplXmkp+zRvsrSQjFn5by0rhm4VH0GAUELIPpx7B1yg==
 
-protoduck@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/protoduck/-/protoduck-5.0.1.tgz#03c3659ca18007b69a50fd82a7ebcc516261151f"
-  integrity sha512-WxoCeDCoCBY55BMvj4cAEjdVUFGRWed9ZxPlqTKYyw1nDDTQ4pqmnIMAGfJlg7Dx35uB/M+PHJPTmGOvaCaPTg==
-  dependencies:
-    genfun "^5.0.0"
-
 psl@^1.1.28, psl@^1.1.33:
   version "1.8.0"
   resolved "https://registry.yarnpkg.com/psl/-/psl-1.8.0.tgz#9326f8bcfb013adcc005fdff056acce020e51c24"
@@ -7907,15 +7612,7 @@ pump@^2.0.0:
     end-of-stream "^1.1.0"
     once "^1.3.1"
 
-pump@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
-  integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
-  dependencies:
-    end-of-stream "^1.1.0"
-    once "^1.3.1"
-
-pumpify@^1.3.3, pumpify@^1.3.5:
+pumpify@^1.3.5:
   version "1.5.1"
   resolved "https://registry.yarnpkg.com/pumpify/-/pumpify-1.5.1.tgz#36513be246ab27570b1a374a5ce278bfd74370ce"
   integrity sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==
@@ -7961,11 +7658,6 @@ queue-microtask@^1.2.2:
   resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243"
   integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==
 
-quick-lru@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-1.1.0.tgz#4360b17c61136ad38078397ff11416e186dcfbb8"
-  integrity sha1-Q2CxfGETatOAeDl/8RQW4Ybc+7g=
-
 quick-lru@^4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f"
@@ -7992,14 +7684,20 @@ react-is@^17.0.1:
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0"
   integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==
 
-read-cmd-shim@^1.0.1:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/read-cmd-shim/-/read-cmd-shim-1.0.5.tgz#87e43eba50098ba5a32d0ceb583ab8e43b961c16"
-  integrity sha512-v5yCqQ/7okKoZZkBQUAfTsQ3sVJtXdNfbPnI5cceppoxEVLYA3k+VtV2omkeo8MS94JCy4fSiUwlRBAwCVRPUA==
+read-cmd-shim@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/read-cmd-shim/-/read-cmd-shim-2.0.0.tgz#4a50a71d6f0965364938e9038476f7eede3928d9"
+  integrity sha512-HJpV9bQpkl6KwjxlJcBoqu9Ba0PQg8TqSNIOrulGt54a0uup0HtevreFHzYzkm0lpnleRdNBzXznKrgxglEHQw==
+
+read-package-json-fast@^2.0.1:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/read-package-json-fast/-/read-package-json-fast-2.0.2.tgz#2dcb24d9e8dd50fb322042c8c35a954e6cc7ac9e"
+  integrity sha512-5fyFUyO9B799foVk4n6ylcoAktG/FbE3jwRKxvwaeSrIunaoMc0u81dzXxjeAFKOce7O5KncdfwpGvvs6r5PsQ==
   dependencies:
-    graceful-fs "^4.1.2"
+    json-parse-even-better-errors "^2.3.0"
+    npm-normalize-package-bin "^1.0.1"
 
-"read-package-json@1 || 2", read-package-json@^2.0.0, read-package-json@^2.0.13:
+read-package-json@^2.0.0:
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-2.1.2.tgz#6992b2b66c7177259feb8eaac73c3acd28b9222a"
   integrity sha512-D1KmuLQr6ZSJS0tW8hf3WGpRlwszJOXZ3E8Yd/DNRaM5d+1wVRZdHlpGBLAuovjr28LbWvjpWkBHMxpRGGjzNA==
@@ -8009,7 +7707,17 @@ read-cmd-shim@^1.0.1:
     normalize-package-data "^2.0.0"
     npm-normalize-package-bin "^1.0.0"
 
-read-package-tree@^5.1.6:
+read-package-json@^3.0.0, read-package-json@^3.0.1:
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-3.0.1.tgz#c7108f0b9390257b08c21e3004d2404c806744b9"
+  integrity sha512-aLcPqxovhJTVJcsnROuuzQvv6oziQx4zd3JvG0vGCL5MjTONUc4uJ90zCBC6R7W7oUKBNoR/F8pkyfVwlbxqng==
+  dependencies:
+    glob "^7.1.1"
+    json-parse-even-better-errors "^2.3.0"
+    normalize-package-data "^3.0.0"
+    npm-normalize-package-bin "^1.0.0"
+
+read-package-tree@^5.3.1:
   version "5.3.1"
   resolved "https://registry.yarnpkg.com/read-package-tree/-/read-package-tree-5.3.1.tgz#a32cb64c7f31eb8a6f31ef06f9cedf74068fe636"
   integrity sha512-mLUDsD5JVtlZxjSlPPx1RETkNjjvQYuweKwNVt1Sn8kP5Jh44pvYuUHCp6xSVDZWbNxVxG5lyZJ921aJH61sTw==
@@ -8078,7 +7786,16 @@ read@1, read@~1.0.1:
   dependencies:
     mute-stream "~0.0.4"
 
-"readable-stream@1 || 2", readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@^2.0.6, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.5, readable-stream@^2.3.6, readable-stream@~2.3.6:
+"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.6.0:
+  version "3.6.0"
+  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
+  integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
+  dependencies:
+    inherits "^2.0.3"
+    string_decoder "^1.1.1"
+    util-deprecate "^1.0.1"
+
+readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@^2.0.6, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.5, readable-stream@^2.3.6, readable-stream@~2.3.6:
   version "2.3.7"
   resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57"
   integrity sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw==
@@ -8091,15 +7808,6 @@ read@1, read@~1.0.1:
     string_decoder "~1.1.1"
     util-deprecate "~1.0.1"
 
-"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2:
-  version "3.6.0"
-  resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
-  integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
-  dependencies:
-    inherits "^2.0.3"
-    string_decoder "^1.1.1"
-    util-deprecate "^1.0.1"
-
 readdir-scoped-modules@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/readdir-scoped-modules/-/readdir-scoped-modules-1.1.0.tgz#8d45407b4f870a0dcaebc0e28670d18e74514309"
@@ -8134,14 +7842,6 @@ redent@^1.0.0:
     indent-string "^2.1.0"
     strip-indent "^1.0.1"
 
-redent@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/redent/-/redent-2.0.0.tgz#c1b2007b42d57eb1389079b3c8333639d5e1ccaa"
-  integrity sha1-wbIAe0LVfrE4kHmzyDM2OdXhzKo=
-  dependencies:
-    indent-string "^3.0.0"
-    strip-indent "^2.0.0"
-
 redent@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f"
@@ -8221,22 +7921,6 @@ replace-homedir@^1.0.0:
     is-absolute "^1.0.0"
     remove-trailing-separator "^1.1.0"
 
-request-promise-core@1.1.4:
-  version "1.1.4"
-  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.4.tgz#3eedd4223208d419867b78ce815167d10593a22f"
-  integrity sha512-TTbAfBBRdWD7aNNOoVOBH4pN/KigV6LyapYNNlAPA8JwbovRti1E88m3sYAwsLi5ryhPKsE9APwnjFTgdUjTpw==
-  dependencies:
-    lodash "^4.17.19"
-
-request-promise-native@^1.0.9:
-  version "1.0.9"
-  resolved "https://registry.yarnpkg.com/request-promise-native/-/request-promise-native-1.0.9.tgz#e407120526a5efdc9a39b28a5679bf47b9d9dc28"
-  integrity sha512-wcW+sIUiWnKgNY0dqCpOZkUbF/I+YPi+f09JZIDa39Ec+q82CpSYniDp+ISgTTbKmnpJWASeJBPZmoxH84wt3g==
-  dependencies:
-    request-promise-core "1.1.4"
-    stealthy-require "^1.1.1"
-    tough-cookie "^2.3.3"
-
 request@^2.88.0, request@^2.88.2:
   version "2.88.2"
   resolved "https://registry.yarnpkg.com/request/-/request-2.88.2.tgz#d73c918731cb5a87da047e207234146f664d12b3"
@@ -8278,18 +7962,6 @@ require-main-filename@^1.0.1:
   resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-1.0.1.tgz#97f717b69d48784f5f526a6c5aa8ffdda055a4d1"
   integrity sha1-l/cXtp1IeE9fUmpsWqj/3aBVpNE=
 
-require-main-filename@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-2.0.0.tgz#d0b329ecc7cc0f61649f62215be69af54aa8989b"
-  integrity sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==
-
-resolve-cwd@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-2.0.0.tgz#00a9f7387556e27038eae232caa372a6a59b665a"
-  integrity sha1-AKn3OHVW4nA46uIyyqNypqWbZlo=
-  dependencies:
-    resolve-from "^3.0.0"
-
 resolve-cwd@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/resolve-cwd/-/resolve-cwd-3.0.0.tgz#0f0075f1bb2544766cf73ba6a6e2adfebcb13f2d"
@@ -8305,11 +7977,6 @@ resolve-dir@^1.0.0, resolve-dir@^1.0.1:
     expand-tilde "^2.0.0"
     global-modules "^1.0.0"
 
-resolve-from@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-3.0.0.tgz#b22c7af7d9d6881bc8b6e653335eebcb0a188748"
-  integrity sha1-six699nWiBvItuZTM17rywoYh0g=
-
 resolve-from@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"
@@ -8332,7 +7999,7 @@ resolve-url@^0.2.1:
   resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a"
   integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=
 
-resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.18.1, resolve@^1.20.0, resolve@^1.4.0:
+resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.20.0, resolve@^1.4.0:
   version "1.20.0"
   resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975"
   integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==
@@ -8340,14 +8007,6 @@ resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.18.1, resolve@^1.20.
     is-core-module "^2.2.0"
     path-parse "^1.0.6"
 
-restore-cursor@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-2.0.0.tgz#9f7ee287f82fd326d4fd162923d62129eee0dfaf"
-  integrity sha1-n37ih/gv0ybU/RYpI9YhKe7g368=
-  dependencies:
-    onetime "^2.0.0"
-    signal-exit "^3.0.2"
-
 restore-cursor@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/restore-cursor/-/restore-cursor-3.1.0.tgz#39f67c54b3a7a58cea5236d95cf0034239631f7e"
@@ -8361,17 +8020,17 @@ ret@~0.1.10:
   resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc"
   integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==
 
-retry@^0.10.0:
-  version "0.10.1"
-  resolved "https://registry.yarnpkg.com/retry/-/retry-0.10.1.tgz#e76388d217992c252750241d3d3956fed98d8ff4"
-  integrity sha1-52OI0heZLCUnUCQdPTlW/tmNj/Q=
+retry@^0.12.0:
+  version "0.12.0"
+  resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
+  integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=
 
 reusify@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76"
   integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==
 
-rimraf@^2.5.4, rimraf@^2.6.2, rimraf@^2.6.3:
+rimraf@^2.6.3:
   version "2.7.1"
   resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
   integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
@@ -8385,12 +8044,7 @@ rimraf@^3.0.0, rimraf@^3.0.2:
   dependencies:
     glob "^7.1.3"
 
-rsvp@^4.8.4:
-  version "4.8.5"
-  resolved "https://registry.yarnpkg.com/rsvp/-/rsvp-4.8.5.tgz#c8f155311d167f68f21e168df71ec5b083113734"
-  integrity sha512-nfMOlASu9OnRJo1mbEk2cz0D56a1MBNrJ7orjRZQG10XDyuvwksKbuXNp6qa+kbn839HwjwhBzhFmdsaEAfauA==
-
-run-async@^2.2.0:
+run-async@^2.4.0:
   version "2.4.1"
   resolved "https://registry.yarnpkg.com/run-async/-/run-async-2.4.1.tgz#8440eccf99ea3e70bd409d49aab88e10c189a455"
   integrity sha512-tvVnVv01b8c1RrA6Ep7JkStj85Guv/YrMcwqYQnwjsAS2cTmmPGBBjAjpCW7RrSodNSoE2/qg9O4bceNvUuDgQ==
@@ -8402,13 +8056,6 @@ run-parallel@^1.1.9:
   dependencies:
     queue-microtask "^1.2.2"
 
-run-queue@^1.0.0, run-queue@^1.0.3:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/run-queue/-/run-queue-1.0.3.tgz#e848396f057d223f24386924618e25694161ec47"
-  integrity sha1-6Eg5bwV9Ij8kOGkkYY4laUFh7Ec=
-  dependencies:
-    aproba "^1.1.1"
-
 rxjs@5.5.11:
   version "5.5.11"
   resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-5.5.11.tgz#f733027ca43e3bec6b994473be4ab98ad43ced87"
@@ -8416,14 +8063,14 @@ rxjs@5.5.11:
   dependencies:
     symbol-observable "1.0.1"
 
-rxjs@^6.4.0:
+rxjs@^6.6.0:
   version "6.6.7"
   resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-6.6.7.tgz#90ac018acabf491bf65044235d5863c4dab804c9"
   integrity sha512-hTdwr+7yYNIT5n4AMYp85KA6yw2Va0FLa3Rguvbpa4W3I5xynaBZo41cM3XM+4Q6fRMj3sBYIR1VAmZMXYJvRQ==
   dependencies:
     tslib "^1.9.0"
 
-safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@^5.2.0, safe-buffer@~5.2.0:
+safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.2, safe-buffer@~5.2.0:
   version "5.2.1"
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
@@ -8445,21 +8092,6 @@ safe-regex@^1.1.0:
   resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
   integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
 
-sane@^4.0.3:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/sane/-/sane-4.1.0.tgz#ed881fd922733a6c461bc189dc2b6c006f3ffded"
-  integrity sha512-hhbzAgTIX8O7SHfp2c8/kREfEn4qO/9q8C9beyY6+tvZ87EpoZ3i1RIEvp27YBswnNbY9mWd6paKVmKbAgLfZA==
-  dependencies:
-    "@cnakazawa/watch" "^1.0.3"
-    anymatch "^2.0.0"
-    capture-exit "^2.0.0"
-    exec-sh "^0.3.2"
-    execa "^1.0.0"
-    fb-watchman "^2.0.0"
-    micromatch "^3.1.4"
-    minimist "^1.1.1"
-    walker "~1.0.5"
-
 sax@>=0.6.0:
   version "1.2.4"
   resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
@@ -8488,19 +8120,19 @@ semver-greatest-satisfied-range@^1.1.0:
   dependencies:
     sver-compat "^1.5.0"
 
-"semver@2 || 3 || 4 || 5", "semver@2.x || 3.x || 4 || 5", semver@^5.4.1, semver@^5.5.0, semver@^5.5.1, semver@^5.6.0, semver@^5.7.0, semver@^5.7.1:
+"semver@2 || 3 || 4 || 5", semver@^5.5.0, semver@^5.6.0, semver@^5.7.1:
   version "5.7.1"
   resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
   integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
 
-semver@7.x, semver@^7.2.1, semver@^7.3.2, semver@^7.3.4:
+semver@7.x, semver@^7.1.1, semver@^7.1.3, semver@^7.2.1, semver@^7.3.2, semver@^7.3.4, semver@^7.3.5:
   version "7.3.5"
   resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.5.tgz#0b621c879348d8998e4b0e4be94b3f12e6018ef7"
   integrity sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==
   dependencies:
     lru-cache "^6.0.0"
 
-semver@^6.0.0, semver@^6.2.0, semver@^6.3.0:
+semver@^6.0.0, semver@^6.3.0:
   version "6.3.0"
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
   integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
@@ -8563,7 +8195,7 @@ shell-quote@^1.6.1:
   resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.7.2.tgz#67a7d02c76c9da24f99d20808fcaded0e0e04be2"
   integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==
 
-shelljs@^0.8.4:
+shelljs@^0.8.3, shelljs@^0.8.4:
   version "0.8.4"
   resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.4.tgz#de7684feeb767f8716b326078a8a00875890e3c2"
   integrity sha512-7gk3UZ9kOfPLIAbslLzyWeGiEqx9e3rxwZM0KE6EL8GlGwjym9Mrlx5/p33bWTu9YG6vcS4MBxYZDHYr5lr8BQ==
@@ -8572,11 +8204,6 @@ shelljs@^0.8.4:
     interpret "^1.0.0"
     rechoir "^0.6.2"
 
-shellwords@^0.1.1:
-  version "0.1.1"
-  resolved "https://registry.yarnpkg.com/shellwords/-/shellwords-0.1.1.tgz#d6b9181c1a48d397324c84871efbcfc73fc0654b"
-  integrity sha512-vFwSUfQvqybiICwZY5+DAWIPLKsWO31Q91JSKl3UYv+K5c2QRPzn0qzec6QPu1Qc9eHYItiP3NdJqNVqetYAww==
-
 shiki@^0.9.3:
   version "0.9.3"
   resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.9.3.tgz#7bf7bcf3ed50ca525ec89cc09254abce4264d5ca"
@@ -8594,7 +8221,7 @@ side-channel@^1.0.4:
     get-intrinsic "^1.0.2"
     object-inspect "^1.9.0"
 
-signal-exit@^3.0.0, signal-exit@^3.0.2:
+signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.3:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.3.tgz#a1410c2edd8f077b08b4e253c8eacfcaf057461c"
   integrity sha512-VUJ49FC8U1OxwZLxIbTTrDvLnf/6TDgxZcK8wxR8zs13xpx7xbG60ndBlhNrFi2EMuFRoeDoJO7wthSLq42EjA==
@@ -8663,20 +8290,21 @@ snapdragon@^0.8.1:
     source-map-resolve "^0.5.0"
     use "^3.1.0"
 
-socks-proxy-agent@^4.0.0:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-4.0.2.tgz#3c8991f3145b2799e70e11bd5fbc8b1963116386"
-  integrity sha512-NT6syHhI9LmuEMSK6Kd2V7gNv5KFZoLE7V5udWmn0de+3Mkj3UMA/AJPLyeNUVmElCurSHtUdM3ETpR3z770Wg==
+socks-proxy-agent@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-5.0.0.tgz#7c0f364e7b1cf4a7a437e71253bed72e9004be60"
+  integrity sha512-lEpa1zsWCChxiynk+lCycKuC502RxDWLKJZoIhnxrWNjLSDGYRFflHA1/228VkRcnv9TIb8w98derGbpKxJRgA==
   dependencies:
-    agent-base "~4.2.1"
-    socks "~2.3.2"
+    agent-base "6"
+    debug "4"
+    socks "^2.3.3"
 
-socks@~2.3.2:
-  version "2.3.3"
-  resolved "https://registry.yarnpkg.com/socks/-/socks-2.3.3.tgz#01129f0a5d534d2b897712ed8aceab7ee65d78e3"
-  integrity sha512-o5t52PCNtVdiOvzMry7wU4aOqYWL0PeCXRWBEiJow4/i/wr+wpsJQ9awEu1EonLIqsfGd5qSgDdxEOvCdmBEpA==
+socks@^2.3.3:
+  version "2.6.1"
+  resolved "https://registry.yarnpkg.com/socks/-/socks-2.6.1.tgz#989e6534a07cf337deb1b1c94aaa44296520d30e"
+  integrity sha512-kLQ9N5ucj8uIcxrDwjm0Jsqk06xdpBjGNQtpXy4Q8/QY2k+fY7nZH8CARy+hkbG+SGAovmzzuauCpBlb8FrnBA==
   dependencies:
-    ip "1.1.5"
+    ip "^1.1.5"
     smart-buffer "^4.1.0"
 
 sort-keys@^2.0.0:
@@ -8686,6 +8314,13 @@ sort-keys@^2.0.0:
   dependencies:
     is-plain-obj "^1.0.0"
 
+sort-keys@^4.0.0:
+  version "4.2.0"
+  resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-4.2.0.tgz#6b7638cee42c506fff8c1cecde7376d21315be18"
+  integrity sha512-aUYIEU/UviqPgc8mHR6IW1EGxkAXpeRETYcrzg8cLAvUPZcpAlleSXHV2mY7G12GphSH6Gzv+4MMVSSkbdteHg==
+  dependencies:
+    is-plain-obj "^2.0.0"
+
 source-list-map@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/source-list-map/-/source-list-map-2.0.1.tgz#3993bd873bfc48479cca9ea3a547835c7c154b34"
@@ -8696,10 +8331,10 @@ source-map-js@^0.6.2:
   resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e"
   integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug==
 
-source-map-loader@2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/source-map-loader/-/source-map-loader-2.0.1.tgz#b4fd0ae7fa7e7d3954300f383f2d6fcc230a4261"
-  integrity sha512-UzOTTQhoNPeTNzOxwFw220RSRzdGSyH4lpNyWjR7Qm34P4/N0W669YSUFdH07+YNeN75h765XLHmNsF/bm97RQ==
+source-map-loader@3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/source-map-loader/-/source-map-loader-3.0.0.tgz#f2a04ee2808ad01c774dea6b7d2639839f3b3049"
+  integrity sha512-GKGWqWvYr04M7tn8dryIWvb0s8YM41z82iQv01yBtIylgxax0CwvSy6gc2Y02iuXwEfGWRlMicH0nvms9UZphw==
   dependencies:
     abab "^2.0.5"
     iconv-lite "^0.6.2"
@@ -8779,9 +8414,9 @@ spdx-expression-parse@^3.0.0:
     spdx-license-ids "^3.0.0"
 
 spdx-license-ids@^3.0.0:
-  version "3.0.7"
-  resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.7.tgz#e9c18a410e5ed7e12442a549fbd8afa767038d65"
-  integrity sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==
+  version "3.0.9"
+  resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.9.tgz#8a595135def9592bda69709474f1cbeea7c2467f"
+  integrity sha512-Ki212dKK4ogX+xDo4CtOZBVIwhsKBEfsEEcwmJfLQzirgc2jIWdzg40Unxz/HzEUqM1WFzVlQSMF9kZZ2HboLQ==
 
 split-on-first@^1.0.0:
   version "1.1.0"
@@ -8795,13 +8430,6 @@ split-string@^3.0.1, split-string@^3.0.2:
   dependencies:
     extend-shallow "^3.0.0"
 
-split2@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/split2/-/split2-2.2.0.tgz#186b2575bcf83e85b7d18465756238ee4ee42493"
-  integrity sha512-RAb22TG39LhI31MbreBgIuKiIKhVsawfTgEGqKHTK87aG+ul/PB8Sqoi3I7kVdRWiCfrKxK3uo4/YUkpNvhPbw==
-  dependencies:
-    through2 "^2.0.2"
-
 split2@^3.0.0:
   version "3.2.2"
   resolved "https://registry.yarnpkg.com/split2/-/split2-3.2.2.tgz#bf2cf2a37d838312c249c89206fd7a17dd12365f"
@@ -8836,26 +8464,19 @@ sshpk@^1.7.0:
     safer-buffer "^2.0.2"
     tweetnacl "~0.14.0"
 
-ssri@^6.0.0, ssri@^6.0.1:
-  version "6.0.2"
-  resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.2.tgz#157939134f20464e7301ddba3e90ffa8f7728ac5"
-  integrity sha512-cepbSq/neFK7xB6A50KHN0xHDotYzq58wWCa5LeWqnPrHG8GzfEjO/4O8kpmcGW+oaxkvhEJCWgbgNk4/ZV93Q==
+ssri@^8.0.0, ssri@^8.0.1:
+  version "8.0.1"
+  resolved "https://registry.yarnpkg.com/ssri/-/ssri-8.0.1.tgz#638e4e439e2ffbd2cd289776d5ca457c4f51a2af"
+  integrity sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==
   dependencies:
-    figgy-pudding "^3.5.1"
+    minipass "^3.1.1"
 
 stack-trace@0.0.10:
   version "0.0.10"
   resolved "https://registry.yarnpkg.com/stack-trace/-/stack-trace-0.0.10.tgz#547c70b347e8d32b4e108ea1a2a159e5fdde19c0"
   integrity sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA=
 
-stack-utils@^1.0.1:
-  version "1.0.5"
-  resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-1.0.5.tgz#a19b0b01947e0029c8e451d5d61a498f5bb1471b"
-  integrity sha512-KZiTzuV3CnSnSvgMRrARVCj+Ht7rMbauGDK0LdVFRGyenwdylpajAp4Q0i6SX8rEmbTpMMf6ryq2gb8pPq2WgQ==
-  dependencies:
-    escape-string-regexp "^2.0.0"
-
-stack-utils@^2.0.2:
+stack-utils@^2.0.3:
   version "2.0.3"
   resolved "https://registry.yarnpkg.com/stack-utils/-/stack-utils-2.0.3.tgz#cd5f030126ff116b78ccb3c027fe302713b61277"
   integrity sha512-gL//fkxfWUsIlFL2Tl42Cl6+HFALEaB1FU76I/Fy+oZjRreP7OPMXFlGbxM7NQsI0ZpUfw76sHnv0WNYuTb7Iw==
@@ -8875,19 +8496,6 @@ stats-median@^1.0.1:
   resolved "https://registry.yarnpkg.com/stats-median/-/stats-median-1.0.1.tgz#ca8497cb1014d23d145db4d6fc93c8e815eed3ef"
   integrity sha512-IYsheLg6dasD3zT/w9+8Iq9tcIQqqu91ZIpJOnIEM25C3X/g4Tl8mhXwW2ZQpbrsJISr9+wizEYgsibN5/b32Q==
 
-stealthy-require@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
-  integrity sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=
-
-stream-each@^1.1.0:
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/stream-each/-/stream-each-1.2.3.tgz#ebe27a0c389b04fbcc233642952e10731afa9bae"
-  integrity sha512-vlMC2f8I2u/bZGqkdfLQW/13Zihpej/7PmSiMQsbYddxuTsJp8vRe2x2FvVExZg7FaOds43ROAuFJwPR4MTZLw==
-  dependencies:
-    end-of-stream "^1.1.0"
-    stream-shift "^1.0.0"
-
 stream-exhaust@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/stream-exhaust/-/stream-exhaust-1.0.2.tgz#acdac8da59ef2bc1e17a2c0ccf6c320d120e555d"
@@ -8920,7 +8528,7 @@ string-width@^1.0.1, string-width@^1.0.2:
     is-fullwidth-code-point "^1.0.0"
     strip-ansi "^3.0.0"
 
-"string-width@^1.0.2 || 2", string-width@^2.1.0:
+"string-width@^1.0.2 || 2":
   version "2.1.1"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-2.1.1.tgz#ab93f27a8dc13d28cac815c462143a6d9012ae9e"
   integrity sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==
@@ -8928,15 +8536,6 @@ string-width@^1.0.1, string-width@^1.0.2:
     is-fullwidth-code-point "^2.0.0"
     strip-ansi "^4.0.0"
 
-string-width@^3.0.0, string-width@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/string-width/-/string-width-3.1.0.tgz#22767be21b62af1081574306f69ac51b62203961"
-  integrity sha512-vafcv6KjVZKSgz06oM/H6GDBrAtz8vdhQakGjFIvNrHA6y3HCF1CInLy+QLq8dTJPQ1b+KDUqDFctkdRW44e1w==
-  dependencies:
-    emoji-regex "^7.0.1"
-    is-fullwidth-code-point "^2.0.0"
-    strip-ansi "^5.1.0"
-
 string-width@^4.1.0, string-width@^4.2.0:
   version "4.2.2"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.2.tgz#dafd4f9559a7585cfba529c6a0a4f73488ebd4c5"
@@ -8999,13 +8598,6 @@ strip-ansi@^4.0.0:
   dependencies:
     ansi-regex "^3.0.0"
 
-strip-ansi@^5.0.0, strip-ansi@^5.1.0, strip-ansi@^5.2.0:
-  version "5.2.0"
-  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-5.2.0.tgz#8c9a536feb6afc962bdfa5b104a5091c1ad9c0ae"
-  integrity sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==
-  dependencies:
-    ansi-regex "^4.1.0"
-
 strip-ansi@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.0.tgz#0b1571dd7669ccd4f3e06e14ef1eed26225ae532"
@@ -9035,11 +8627,6 @@ strip-bom@^4.0.0:
   resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-4.0.0.tgz#9c3505c1db45bcedca3d9cf7a16f5c5aa3901878"
   integrity sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==
 
-strip-eof@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/strip-eof/-/strip-eof-1.0.0.tgz#bb43ff5598a6eb05d89b59fcd129c983313606bf"
-  integrity sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=
-
 strip-final-newline@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
@@ -9052,11 +8639,6 @@ strip-indent@^1.0.1:
   dependencies:
     get-stdin "^4.0.1"
 
-strip-indent@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-2.0.0.tgz#5ef8db295d01e6ed6cbf7aab96998d7822527b68"
-  integrity sha1-XvjbKV0B5u1sv3qrlpmNeCJSe2g=
-
 strip-indent@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001"
@@ -9069,7 +8651,7 @@ strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
   resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
   integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
 
-strong-log-transformer@^2.0.0:
+strong-log-transformer@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/strong-log-transformer/-/strong-log-transformer-2.1.0.tgz#0f5ed78d325e0421ac6f90f7f10e691d6ae3ae10"
   integrity sha512-B3Hgul+z0L9a236FAUC9iZsL+nVHgoCJnqCbN588DjYxvGXaXaaFbfmQ/JhvKjZwsOukuR72XbHv71Qkug0HxA==
@@ -9099,6 +8681,13 @@ supports-color@^7.0.0, supports-color@^7.1.0:
   dependencies:
     has-flag "^4.0.0"
 
+supports-color@^8.0.0:
+  version "8.1.1"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c"
+  integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==
+  dependencies:
+    has-flag "^4.0.0"
+
 supports-hyperlinks@^2.0.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/supports-hyperlinks/-/supports-hyperlinks-2.2.0.tgz#4f77b42488765891774b70c79babd87f9bd594bb"
@@ -9135,27 +8724,24 @@ table-layout@^1.0.1:
     typical "^5.2.0"
     wordwrapjs "^4.0.0"
 
-table@^6.0.4:
-  version "6.0.9"
-  resolved "https://registry.yarnpkg.com/table/-/table-6.0.9.tgz#790a12bf1e09b87b30e60419bafd6a1fd85536fb"
-  integrity sha512-F3cLs9a3hL1Z7N4+EkSscsel3z55XT950AvB05bwayrNg5T1/gykXtigioTAjbltvbMSJvvhFCbnf6mX+ntnJQ==
+table@^6.0.9:
+  version "6.7.1"
+  resolved "https://registry.yarnpkg.com/table/-/table-6.7.1.tgz#ee05592b7143831a8c94f3cee6aae4c1ccef33e2"
+  integrity sha512-ZGum47Yi6KOOFDE8m223td53ath2enHcYLgOCjGr5ngu8bdIARQk6mN/wRMv4yMRcHnCSnHbCEha4sobQx5yWg==
   dependencies:
     ajv "^8.0.1"
-    is-boolean-object "^1.1.0"
-    is-number-object "^1.0.4"
-    is-string "^1.0.5"
     lodash.clonedeep "^4.5.0"
-    lodash.flatten "^4.4.0"
     lodash.truncate "^4.4.2"
     slice-ansi "^4.0.0"
     string-width "^4.2.0"
+    strip-ansi "^6.0.0"
 
 tapable@^2.1.1, tapable@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.0.tgz#5c373d281d9c672848213d0e037d1c4165ab426b"
   integrity sha512-FBk4IesMV1rBxX2tfiK8RAmogtWn53puLOQlvO8XuwlgxcYbP4mVPS9Ph4aeamSyyVjOl24aYWAuc8U5kCVwMw==
 
-tar@^4.4.10, tar@^4.4.12, tar@^4.4.8:
+tar@^4.4.12:
   version "4.4.13"
   resolved "https://registry.yarnpkg.com/tar/-/tar-4.4.13.tgz#43b364bc52888d555298637b10d60790254ab525"
   integrity sha512-w2VwSrBoHa5BsSyH+KxEqeQBAllHhccyMFVHtGtdMpF4W7IRWfZjFiQceJPChOeTsSDVUpER2T8FA93pr0L+QA==
@@ -9168,22 +8754,33 @@ tar@^4.4.10, tar@^4.4.12, tar@^4.4.8:
     safe-buffer "^5.1.2"
     yallist "^3.0.3"
 
+tar@^6.0.2, tar@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83"
+  integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA==
+  dependencies:
+    chownr "^2.0.0"
+    fs-minipass "^2.0.0"
+    minipass "^3.0.0"
+    minizlib "^2.1.1"
+    mkdirp "^1.0.3"
+    yallist "^4.0.0"
+
 temp-dir@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d"
   integrity sha1-CnwOom06Oa+n4OvqnB/AvE2qAR0=
 
-temp-write@^3.4.0:
-  version "3.4.0"
-  resolved "https://registry.yarnpkg.com/temp-write/-/temp-write-3.4.0.tgz#8cff630fb7e9da05f047c74ce4ce4d685457d492"
-  integrity sha1-jP9jD7fp2gXwR8dM5M5NaFRX1JI=
+temp-write@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/temp-write/-/temp-write-4.0.0.tgz#cd2e0825fc826ae72d201dc26eef3bf7e6fc9320"
+  integrity sha512-HIeWmj77uOOHb0QX7siN3OtwV3CTntquin6TNVg6SHOqCP3hYKmox90eeFOGaY1MqJ9WYDDjkyZrW6qS5AWpbw==
   dependencies:
-    graceful-fs "^4.1.2"
-    is-stream "^1.1.0"
-    make-dir "^1.0.0"
-    pify "^3.0.0"
+    graceful-fs "^4.1.15"
+    is-stream "^2.0.0"
+    make-dir "^3.0.0"
     temp-dir "^1.0.0"
-    uuid "^3.0.1"
+    uuid "^3.3.2"
 
 terminal-link@^2.0.0:
   version "2.1.1"
@@ -9194,18 +8791,18 @@ terminal-link@^2.0.0:
     supports-hyperlinks "^2.0.0"
 
 terser-webpack-plugin@^5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.1.1.tgz#7effadee06f7ecfa093dbbd3e9ab23f5f3ed8673"
-  integrity sha512-5XNNXZiR8YO6X6KhSGXfY0QrGrCRlSwAEjIIrlRQR4W8nP69TaJUlh3bkuac6zzgspiGPfKEHcY295MMVExl5Q==
+  version "5.1.2"
+  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.1.2.tgz#51d295eb7cc56785a67a372575fdc46e42d5c20c"
+  integrity sha512-6QhDaAiVHIQr5Ab3XUWZyDmrIPCHMiqJVljMF91YKyqwKkL5QHnYMkrMBy96v9Z7ev1hGhSEw1HQZc2p/s5Z8Q==
   dependencies:
     jest-worker "^26.6.2"
     p-limit "^3.1.0"
     schema-utils "^3.0.0"
     serialize-javascript "^5.0.1"
     source-map "^0.6.1"
-    terser "^5.5.1"
+    terser "^5.7.0"
 
-terser@^5.5.1:
+terser@^5.7.0:
   version "5.7.0"
   resolved "https://registry.yarnpkg.com/terser/-/terser-5.7.0.tgz#a761eeec206bc87b605ab13029876ead938ae693"
   integrity sha512-HP5/9hp2UaZt5fYkuhNBR8YyRcT8juw8+uFbAme53iN9hblvKnLUTKkmwJG6ocWpIKf8UK4DoeWG4ty0J6S6/g==
@@ -9233,24 +8830,10 @@ text-table@^0.2.0:
   resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4"
   integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=
 
-thenify-all@^1.0.0:
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/thenify-all/-/thenify-all-1.6.0.tgz#1a1918d402d8fc3f98fbf234db0bcc8cc10e9726"
-  integrity sha1-GhkY1ALY/D+Y+/I02wvMjMEOlyY=
-  dependencies:
-    thenify ">= 3.1.0 < 4"
-
-"thenify@>= 3.1.0 < 4":
-  version "3.3.1"
-  resolved "https://registry.yarnpkg.com/thenify/-/thenify-3.3.1.tgz#8932e686a4066038a016dd9e2ca46add9838a95f"
-  integrity sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==
-  dependencies:
-    any-promise "^1.0.0"
-
-throat@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/throat/-/throat-5.0.0.tgz#c5199235803aad18754a667d659b5e72ce16764b"
-  integrity sha512-fcwX4mndzpLQKBS1DVYhGAcYaYt7vsHNIvQV+WXMvnow5cgjPphq5CaayLaGsjRdSCKZFNGt7/GYAuXaNOiYCA==
+throat@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/throat/-/throat-6.0.1.tgz#d514fedad95740c12c2d7fc70ea863eb51ade375"
+  integrity sha512-8hmiGIJMDlwjg7dlJ4yKGLK8EsYqKgPWbG3b4wjJddKNwc7N7Dpn08Df4szr/sZdMVeOstrdYSsqzX6BYbcB+w==
 
 through2-filter@^3.0.0:
   version "3.0.0"
@@ -9260,7 +8843,7 @@ through2-filter@^3.0.0:
     through2 "~2.0.0"
     xtend "~4.0.0"
 
-through2@^2.0.0, through2@^2.0.2, through2@^2.0.3, through2@~2.0.0:
+through2@^2.0.0, through2@^2.0.3, through2@~2.0.0:
   version "2.0.5"
   resolved "https://registry.yarnpkg.com/through2/-/through2-2.0.5.tgz#01c1e39eb31d07cb7d03a96a70823260b23132cd"
   integrity sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==
@@ -9365,14 +8948,6 @@ to-through@^2.0.0:
   dependencies:
     through2 "^2.0.3"
 
-tough-cookie@^2.3.3, tough-cookie@~2.5.0:
-  version "2.5.0"
-  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
-  integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
-  dependencies:
-    psl "^1.1.28"
-    punycode "^2.1.1"
-
 tough-cookie@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-4.0.0.tgz#d822234eeca882f991f0f908824ad2622ddbece4"
@@ -9382,17 +8957,18 @@ tough-cookie@^4.0.0:
     punycode "^2.1.1"
     universalify "^0.1.2"
 
-tr46@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/tr46/-/tr46-1.0.1.tgz#a8b13fd6bfd2489519674ccde55ba3693b706d09"
-  integrity sha1-qLE/1r/SSJUZZ0zN5VujaTtwbQk=
+tough-cookie@~2.5.0:
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2"
+  integrity sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==
   dependencies:
-    punycode "^2.1.0"
+    psl "^1.1.28"
+    punycode "^2.1.1"
 
 tr46@^2.0.2:
-  version "2.0.2"
-  resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.0.2.tgz#03273586def1595ae08fedb38d7733cee91d2479"
-  integrity sha512-3n1qG+/5kg+jrbTzwAykB5yRYtQCTqOGKq5U5PE3b0a1/mzo6snDhjGS0zJVJunO0NrT3Dg1MLy5TjWP/UJppg==
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.1.0.tgz#fa87aa81ca5d5941da8cbf1f9b749dc969a4e240"
+  integrity sha512-15Ih7phfcdP5YxqiB+iDtLoaTz4Nd35+IiAv0kQ5FNKHzXgdWqPoTIqEDDJmXceQt4JZk6lVPT8lnDlPpGDppw==
   dependencies:
     punycode "^2.1.1"
 
@@ -9401,11 +8977,6 @@ trim-newlines@^1.0.0:
   resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-1.0.0.tgz#5887966bb582a4503a41eb524f7d35011815a613"
   integrity sha1-WIeWa7WCpFA6QetST301ARgVphM=
 
-trim-newlines@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-2.0.0.tgz#b403d0b91be50c331dfc4b82eeceb22c3de16d20"
-  integrity sha1-tAPQuRvlDDMd/EuC7s6yLD3hbSA=
-
 trim-newlines@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.0.tgz#79726304a6a898aa8373427298d54c2ee8b1cb30"
@@ -9416,15 +8987,15 @@ trim-off-newlines@^1.0.0:
   resolved "https://registry.yarnpkg.com/trim-off-newlines/-/trim-off-newlines-1.0.1.tgz#9f9ba9d9efa8764c387698bcbfeb2c848f11adb3"
   integrity sha1-n5up2e+odkw4dpi8v+sshI8RrbM=
 
-ts-jest@26.5.4:
-  version "26.5.4"
-  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-26.5.4.tgz#207f4c114812a9c6d5746dd4d1cdf899eafc9686"
-  integrity sha512-I5Qsddo+VTm94SukBJ4cPimOoFZsYTeElR2xy6H2TOVs+NsvgYglW8KuQgKoApOKuaU/Ix/vrF9ebFZlb5D2Pg==
+ts-jest@27.0.0:
+  version "27.0.0"
+  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-27.0.0.tgz#b94dbad8f39276b583edc7712e6b3c29e16c8863"
+  integrity sha512-YhuEjDZz9ZjxKbUlgT3XtJb9lyditEjctlo1nLcn983my3Xz4BE3c2ogHhonmGlAdUUiGlz/Dq2KOMXmf1WHfA==
   dependencies:
     bs-logger "0.x"
     buffer-from "1.x"
     fast-json-stable-stringify "2.x"
-    jest-util "^26.1.0"
+    jest-util "^27.0.0"
     json5 "2.x"
     lodash "4.x"
     make-error "1.x"
@@ -9432,11 +9003,15 @@ ts-jest@26.5.4:
     semver "7.x"
     yargs-parser "20.x"
 
-ts-node@9.1.1:
-  version "9.1.1"
-  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-9.1.1.tgz#51a9a450a3e959401bda5f004a72d54b936d376d"
-  integrity sha512-hPlt7ZACERQGf03M253ytLY3dHbGNGrAq9qIHWUY9XHYl1z7wYngSr3OQ5xmui8o2AaxsONxIzjafLUiWBo1Fg==
+ts-node@10.0.0:
+  version "10.0.0"
+  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.0.0.tgz#05f10b9a716b0b624129ad44f0ea05dac84ba3be"
+  integrity sha512-ROWeOIUvfFbPZkoDis0L/55Fk+6gFQNZwwKPLinacRl6tsxstTF1DbAcLKkovwnpKMVvOMHP1TIbnwXwtLg1gg==
   dependencies:
+    "@tsconfig/node10" "^1.0.7"
+    "@tsconfig/node12" "^1.0.7"
+    "@tsconfig/node14" "^1.0.0"
+    "@tsconfig/node16" "^1.0.1"
     arg "^4.1.0"
     create-require "^1.1.0"
     diff "^4.0.1"
@@ -9512,10 +9087,10 @@ type-fest@^0.21.3:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.21.3.tgz#d260a24b0198436e133fa26a524a6d65fa3b2e37"
   integrity sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==
 
-type-fest@^0.3.0:
-  version "0.3.1"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.3.1.tgz#63d00d204e059474fe5e1b7c011112bbd1dc29e1"
-  integrity sha512-cUGJnCdr4STbePCgqNFbpVNCepa+kAVohJs1sLhxzdH+gnEoOd8VhbYa7pD3zZYGiURWM2xzEII3fQcRizDkYQ==
+type-fest@^0.4.1:
+  version "0.4.1"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.4.1.tgz#8bdf77743385d8a4f13ba95f610f5ccd68c728f8"
+  integrity sha512-IwzA/LSfD2vC1/YDYMv/zHP4rDF1usCwllsDpbolT3D4fUepIO7f9K70jjmUewU/LmGUKJcwcVtDCpnKk4BPMw==
 
 type-fest@^0.6.0:
   version "0.6.0"
@@ -9549,27 +9124,27 @@ typedarray@^0.0.6:
   resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
   integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=
 
-typedoc-default-themes@^0.12.9:
+typedoc-default-themes@^0.12.10:
   version "0.12.10"
   resolved "https://registry.yarnpkg.com/typedoc-default-themes/-/typedoc-default-themes-0.12.10.tgz#614c4222fe642657f37693ea62cad4dafeddf843"
   integrity sha512-fIS001cAYHkyQPidWXmHuhs8usjP5XVJjWB8oZGqkTowZaz3v7g3KDZeeqE82FBrmkAnIBOY3jgy7lnPnqATbA==
 
-typedoc@0.20.35:
-  version "0.20.35"
-  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.20.35.tgz#c36996098cbeb2ef63d9d7991262a071b98336a3"
-  integrity sha512-7sNca19LXg2hgyGHq3b33tQ1YFApmd8aBDEzWQ2ry4VDkw/NdFWkysGiGRY1QckDCB0gVH8+MlXA4K71IB3azg==
+typedoc@0.20.36:
+  version "0.20.36"
+  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.20.36.tgz#ee5523c32f566ad8283fc732aa8ea322d1a45f6a"
+  integrity sha512-qFU+DWMV/hifQ9ZAlTjdFO9wbUIHuUBpNXzv68ZyURAP9pInjZiO4+jCPeAzHVcaBCHER9WL/+YzzTt6ZlN/Nw==
   dependencies:
     colors "^1.4.0"
     fs-extra "^9.1.0"
     handlebars "^4.7.7"
     lodash "^4.17.21"
     lunr "^2.3.9"
-    marked "^2.0.1"
+    marked "^2.0.3"
     minimatch "^3.0.0"
     progress "^2.0.3"
     shelljs "^0.8.4"
     shiki "^0.9.3"
-    typedoc-default-themes "^0.12.9"
+    typedoc-default-themes "^0.12.10"
 
 typescript@4.0.2:
   version "4.0.2"
@@ -9587,9 +9162,9 @@ typical@^5.2.0:
   integrity sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==
 
 uglify-js@^3.1.4:
-  version "3.13.3"
-  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.3.tgz#ce72a1ad154348ea2af61f50933c76cc8802276e"
-  integrity sha512-otIc7O9LyxpUcQoXzj2hL4LPWKklO6LJWoJUzNa8A17Xgi4fOeDC8FBDOLHnC/Slo1CQgsZMcM6as0M76BZaig==
+  version "3.13.7"
+  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.7.tgz#25468a3b39b1c875df03f0937b2b7036a93f3fee"
+  integrity sha512-1Psi2MmnZJbnEsgJJIlfnd7tFlJfitusmR7zDI8lXlFI0ACD4/Rm/xdrU8bh6zF0i74aiVoBtkRiFulkrmh3AA==
 
 uid-number@0.0.6:
   version "0.0.6"
@@ -9601,7 +9176,7 @@ umask@^1.1.0:
   resolved "https://registry.yarnpkg.com/umask/-/umask-1.1.0.tgz#f29cebf01df517912bb58ff9c4e50fde8e33320d"
   integrity sha1-8pzr8B31F5ErtY/5xOUP3o4zMg0=
 
-unbox-primitive@^1.0.0:
+unbox-primitive@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.1.tgz#085e215625ec3162574dc8859abee78a59b14471"
   integrity sha512-tZU/3NqK3dA5gpE1KtyiJUrEB0lxnGkMFHptJ7q6ewdZ8s12QrODwNbhIJStmJkd1QDXa1NRA8aF2A1zk/Ypyw==
@@ -9669,19 +9244,12 @@ unique-stream@^2.0.2:
     json-stable-stringify-without-jsonify "^1.0.1"
     through2-filter "^3.0.0"
 
-universal-user-agent@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/universal-user-agent/-/universal-user-agent-4.0.1.tgz#fd8d6cb773a679a709e967ef8288a31fcc03e557"
-  integrity sha512-LnST3ebHwVL2aNe4mejI9IQh2HfZ1RLo8Io2HugSif8ekzD1TlWpHpColOB/eh8JHMLkGH3Akqf040I+4ylNxg==
-  dependencies:
-    os-name "^3.1.0"
-
 universal-user-agent@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/universal-user-agent/-/universal-user-agent-6.0.0.tgz#3381f8503b251c0d9cd21bc1de939ec9df5480ee"
   integrity sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==
 
-universalify@^0.1.0, universalify@^0.1.2:
+universalify@^0.1.2:
   version "0.1.2"
   resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66"
   integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==
@@ -9699,11 +9267,16 @@ unset-value@^1.0.0:
     has-value "^0.3.1"
     isobject "^3.0.0"
 
-upath@^1.1.1, upath@^1.2.0:
+upath@^1.1.1:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/upath/-/upath-1.2.0.tgz#8f66dbcd55a883acdae4408af8b035a5044c1894"
   integrity sha512-aZwGpamFO61g3OlfT7OQCHqhGnW43ieH9WZeP7QxN/G/jS4jfqUkZxoryvJgVPEcrl5NL/ggHsSmLMHuH64Lhg==
 
+upath@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/upath/-/upath-2.0.1.tgz#50c73dea68d6f6b990f51d279ce6081665d61a8b"
+  integrity sha512-1uEe95xksV1O0CYKXo8vQvN1JEbtJp7lb7C5U9HMsIp6IVwntkH/oNUzyVNQSd4S1sYk2FpSSW44FqMc8qee5w==
+
 uri-js@^4.2.2:
   version "4.4.1"
   resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e"
@@ -9733,25 +9306,20 @@ util-promisify@^2.1.0:
   dependencies:
     object.getownpropertydescriptors "^2.0.3"
 
-uuid@^3.0.1, uuid@^3.3.2:
+uuid@^3.3.2:
   version "3.4.0"
   resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee"
   integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A==
 
-uuid@^8.3.0:
-  version "8.3.2"
-  resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
-  integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
-
 v8-compile-cache@^2.0.3:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee"
   integrity sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==
 
 v8-to-istanbul@^7.0.0:
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.1.tgz#04bfd1026ba4577de5472df4f5e89af49de5edda"
-  integrity sha512-p0BB09E5FRjx0ELN6RgusIPsSPhtgexSRcKETybEs6IGOTXJSZqfwxp7r//55nnu0f1AxltY5VvdVqy2vZf9AA==
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.2.tgz#30898d1a7fa0c84d225a2c1434fb958f290883c1"
+  integrity sha512-TxNb7YEUwkLXCQYeudi6lgQ/SZrzNO4kMdlqVxaZPUIUjCv6iSSypUQX70kNBSERpQ8fk48+d61FXk+tgqcWow==
   dependencies:
     "@types/istanbul-lib-coverage" "^2.0.1"
     convert-source-map "^1.6.0"
@@ -9764,7 +9332,7 @@ v8flags@^3.2.0:
   dependencies:
     homedir-polyfill "^1.0.1"
 
-validate-npm-package-license@^3.0.1, validate-npm-package-license@^3.0.3:
+validate-npm-package-license@^3.0.1, validate-npm-package-license@^3.0.4:
   version "3.0.4"
   resolved "https://registry.yarnpkg.com/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz#fc91f6b9c7ba15c857f4cb2c5defeec39d4f410a"
   integrity sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==
@@ -9867,7 +9435,7 @@ w3c-xmlserializer@^2.0.0:
   dependencies:
     xml-name-validator "^3.0.0"
 
-walker@^1.0.7, walker@~1.0.5:
+walker@^1.0.7:
   version "1.0.7"
   resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.7.tgz#2f7f9b8fd10d677262b18a884e28d19618e028fb"
   integrity sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=
@@ -9875,9 +9443,9 @@ walker@^1.0.7, walker@~1.0.5:
     makeerror "1.0.x"
 
 watchpack@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.1.1.tgz#e99630550fca07df9f90a06056987baa40a689c7"
-  integrity sha512-Oo7LXCmc1eE1AjyuSBmtC3+Wy4HcV8PxWh2kP6fOl8yTlNS7r0K9l1ao2lrrUza7V39Y3D/BbJgY8VeSlc5JKw==
+  version "2.2.0"
+  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.2.0.tgz#47d78f5415fe550ecd740f99fe2882323a58b1ce"
+  integrity sha512-up4YAn/XHgZHIxFBVCdlMiWDj6WaLKpwVeGQk2I5thdYxF/KmF0aaz6TfJZ/hfl1h/XlcDr7k1KH7ThDagpFaA==
   dependencies:
     glob-to-regexp "^0.4.1"
     graceful-fs "^4.1.2"
@@ -9899,11 +9467,6 @@ web-streams-polyfill@3.0.3:
   resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.0.3.tgz#f49e487eedeca47a207c1aee41ee5578f884b42f"
   integrity sha512-d2H/t0eqRNM4w2WvmTdoeIvzAUSpK7JmATB8Nr2lb7nQ9BTIJVjbQ/TRFVEh2gUH1HwclPdoPtfMoFfetXaZnA==
 
-webidl-conversions@^4.0.2:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-4.0.2.tgz#a855980b1f0b6b359ba1d5d9fb39ae941faa63ad"
-  integrity sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==
-
 webidl-conversions@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"
@@ -9922,10 +9485,10 @@ webpack-sources@^2.1.1:
     source-list-map "^2.0.1"
     source-map "^0.6.1"
 
-webpack@5.36.2:
-  version "5.36.2"
-  resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.36.2.tgz#6ef1fb2453ad52faa61e78d486d353d07cca8a0f"
-  integrity sha512-XJumVnnGoH2dV+Pk1VwgY4YT6AiMKpVoudUFCNOXMIVrEKPUgEwdIfWPjIuGLESAiS8EdIHX5+TiJz/5JccmRg==
+webpack@5.37.1:
+  version "5.37.1"
+  resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.37.1.tgz#2deb5acd350583c1ab9338471f323381b0b0c14b"
+  integrity sha512-btZjGy/hSjCAAVHw+cKG+L0M+rstlyxbO2C+BOTaQ5/XAnxkDrP5sVbqWhXgo4pL3X2dcOib6rqCP20Zr9PLow==
   dependencies:
     "@types/eslint-scope" "^3.7.0"
     "@types/estree" "^0.0.47"
@@ -9963,16 +9526,7 @@ whatwg-mimetype@^2.3.0:
   resolved "https://registry.yarnpkg.com/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz#3d4b1e0312d2079879f826aff18dbeeca5960fbf"
   integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==
 
-whatwg-url@^7.0.0:
-  version "7.1.0"
-  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-7.1.0.tgz#c2c492f1eca612988efd3d2266be1b9fc6170d06"
-  integrity sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg==
-  dependencies:
-    lodash.sortby "^4.7.0"
-    tr46 "^1.0.1"
-    webidl-conversions "^4.0.2"
-
-whatwg-url@^8.0.0, whatwg-url@^8.5.0:
+whatwg-url@^8.0.0, whatwg-url@^8.4.0, whatwg-url@^8.5.0:
   version "8.5.0"
   resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.5.0.tgz#7752b8464fc0903fec89aa9846fc9efe07351fd3"
   integrity sha512-fy+R77xWv0AiqfLl4nuGUlQ3/6b5uNfQ4WAbGQVMYshCTCCPK9psC1nWh3XHuxGVCtlcDDQPQW1csmmIQo+fwg==
@@ -9997,11 +9551,6 @@ which-module@^1.0.0:
   resolved "https://registry.yarnpkg.com/which-module/-/which-module-1.0.0.tgz#bba63ca861948994ff307736089e3b96026c2a4f"
   integrity sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8=
 
-which-module@^2.0.0:
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/which-module/-/which-module-2.0.0.tgz#d9ef07dce77b9902b8a3a8fa4b31c3e3f7e6e87a"
-  integrity sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=
-
 which@^1.2.14, which@^1.2.9, which@^1.3.1:
   version "1.3.1"
   resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a"
@@ -10023,13 +9572,6 @@ wide-align@^1.1.0:
   dependencies:
     string-width "^1.0.2 || 2"
 
-windows-release@^3.1.0:
-  version "3.3.3"
-  resolved "https://registry.yarnpkg.com/windows-release/-/windows-release-3.3.3.tgz#1c10027c7225743eec6b89df160d64c2e0293999"
-  integrity sha512-OSOGH1QYiW5yVor9TtmXKQvt2vjQqbYS+DqmsZw+r7xDwLXEeT3JGW0ZppFmHx4diyXmxt238KFR3N9jzevBRg==
-  dependencies:
-    execa "^1.0.0"
-
 word-wrap@^1.2.3, word-wrap@~1.2.3:
   version "1.2.3"
   resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.3.tgz#610636f6b1f703891bd34771ccb17fb93b47079c"
@@ -10056,15 +9598,6 @@ wrap-ansi@^2.0.0:
     string-width "^1.0.1"
     strip-ansi "^3.0.1"
 
-wrap-ansi@^5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-5.1.0.tgz#1fd1f67235d5b6d0fee781056001bfb694c03b09"
-  integrity sha512-QC1/iN/2/RPVJ5jYK8BGttj5z83LmSKmvbvrXPNCLZSEb32KKVDJDl/MOt2N01qU2H/FkzEa9PKto1BqDjtd7Q==
-  dependencies:
-    ansi-styles "^3.2.0"
-    string-width "^3.0.0"
-    strip-ansi "^5.0.0"
-
 wrap-ansi@^6.2.0:
   version "6.2.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-6.2.0.tgz#e9393ba07102e6c91a3b221478f0257cd2856e53"
@@ -10074,12 +9607,21 @@ wrap-ansi@^6.2.0:
     string-width "^4.1.0"
     strip-ansi "^6.0.0"
 
+wrap-ansi@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
+  integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
+  dependencies:
+    ansi-styles "^4.0.0"
+    string-width "^4.1.0"
+    strip-ansi "^6.0.0"
+
 wrappy@1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
   integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
 
-write-file-atomic@^2.0.0, write-file-atomic@^2.3.0, write-file-atomic@^2.4.2:
+write-file-atomic@^2.4.2:
   version "2.4.3"
   resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-2.4.3.tgz#1fd2e9ae1df3e75b8d8c367443c692d4ca81f481"
   integrity sha512-GaETH5wwsX+GcnzhPgKcKjJ6M2Cq3/iZp1WyY/X1CSqrW+jVNM9Y7D8EC2sM4ZG/V8wZlSniJnCKWPmBYAucRQ==
@@ -10088,7 +9630,7 @@ write-file-atomic@^2.0.0, write-file-atomic@^2.3.0, write-file-atomic@^2.4.2:
     imurmurhash "^0.1.4"
     signal-exit "^3.0.2"
 
-write-file-atomic@^3.0.0:
+write-file-atomic@^3.0.0, write-file-atomic@^3.0.3:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-3.0.3.tgz#56bd5c5a5c70481cd19c571bd39ab965a5de56e8"
   integrity sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q==
@@ -10098,18 +9640,6 @@ write-file-atomic@^3.0.0:
     signal-exit "^3.0.2"
     typedarray-to-buffer "^3.1.5"
 
-write-json-file@^2.2.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/write-json-file/-/write-json-file-2.3.0.tgz#2b64c8a33004d54b8698c76d585a77ceb61da32f"
-  integrity sha1-K2TIozAE1UuGmMdtWFp3zrYdoy8=
-  dependencies:
-    detect-indent "^5.0.0"
-    graceful-fs "^4.1.2"
-    make-dir "^1.0.0"
-    pify "^3.0.0"
-    sort-keys "^2.0.0"
-    write-file-atomic "^2.0.0"
-
 write-json-file@^3.2.0:
   version "3.2.0"
   resolved "https://registry.yarnpkg.com/write-json-file/-/write-json-file-3.2.0.tgz#65bbdc9ecd8a1458e15952770ccbadfcff5fe62a"
@@ -10122,36 +9652,49 @@ write-json-file@^3.2.0:
     sort-keys "^2.0.0"
     write-file-atomic "^2.4.2"
 
-write-pkg@^3.1.0:
-  version "3.2.0"
-  resolved "https://registry.yarnpkg.com/write-pkg/-/write-pkg-3.2.0.tgz#0e178fe97820d389a8928bc79535dbe68c2cff21"
-  integrity sha512-tX2ifZ0YqEFOF1wjRW2Pk93NLsj02+n1UP5RvO6rCs0K6R2g1padvf006cY74PQJKMGS2r42NK7FD0dG6Y6paw==
+write-json-file@^4.3.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/write-json-file/-/write-json-file-4.3.0.tgz#908493d6fd23225344af324016e4ca8f702dd12d"
+  integrity sha512-PxiShnxf0IlnQuMYOPPhPkhExoCQuTUNPOa/2JWCYTmBquU9njyyDuwRKN26IZBlp4yn1nt+Agh2HOOBl+55HQ==
+  dependencies:
+    detect-indent "^6.0.0"
+    graceful-fs "^4.1.15"
+    is-plain-obj "^2.0.0"
+    make-dir "^3.0.0"
+    sort-keys "^4.0.0"
+    write-file-atomic "^3.0.0"
+
+write-pkg@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/write-pkg/-/write-pkg-4.0.0.tgz#675cc04ef6c11faacbbc7771b24c0abbf2a20039"
+  integrity sha512-v2UQ+50TNf2rNHJ8NyWttfm/EJUBWMJcx6ZTYZr6Qp52uuegWw/lBkCtCbnYZEmPRNL61m+u67dAmGxo+HTULA==
   dependencies:
     sort-keys "^2.0.0"
-    write-json-file "^2.2.0"
+    type-fest "^0.4.1"
+    write-json-file "^3.2.0"
 
-ws@^7.4.4:
-  version "7.4.4"
-  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.4.tgz#383bc9742cb202292c9077ceab6f6047b17f2d59"
-  integrity sha512-Qm8k8ojNQIMx7S+Zp8u/uHOx7Qazv3Yv4q68MiWWWOJhiwG5W3x7iqmRtJo8xxrciZUY4vRxUTJCKuRnF28ZZw==
+ws@^7.4.5:
+  version "7.4.6"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.6.tgz#5654ca8ecdeee47c33a9a4bf6d28e2be2980377c"
+  integrity sha512-YmhHDO4MzaDLB+M9ym/mDA5z0naX8j7SIlT8f8z+I0VtzsRbekxEutHSme7NPS2qE8StCYQNUnfWdXta/Yu85A==
 
 xml-name-validator@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
   integrity sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==
 
-xml2js@0.4.19:
-  version "0.4.19"
-  resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.19.tgz#686c20f213209e94abf0d1bcf1efaa291c7827a7"
-  integrity sha512-esZnJZJOiJR9wWKMyuvSE1y6Dq5LCuJanqhxslH2bxM6duahNZ+HMpCLhBQGZkbX6xRf8x1Y2eJlgt2q3qo49Q==
+xml2js@0.4.23:
+  version "0.4.23"
+  resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.23.tgz#a0c69516752421eb2ac758ee4d4ccf58843eac66"
+  integrity sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==
   dependencies:
     sax ">=0.6.0"
-    xmlbuilder "~9.0.1"
+    xmlbuilder "~11.0.0"
 
-xmlbuilder@~9.0.1:
-  version "9.0.7"
-  resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-9.0.7.tgz#132ee63d2ec5565c557e20f4c22df9aca686b10d"
-  integrity sha1-Ey7mPS7FVlxVfiD0wi35rKaGsQ0=
+xmlbuilder@~11.0.0:
+  version "11.0.1"
+  resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3"
+  integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==
 
 xmlchars@^2.2.0:
   version "2.2.0"
@@ -10168,10 +9711,10 @@ y18n@^3.2.1:
   resolved "https://registry.yarnpkg.com/y18n/-/y18n-3.2.2.tgz#85c901bd6470ce71fc4bb723ad209b70f7f28696"
   integrity sha512-uGZHXkHnhF0XeeAPgnKfPv1bgKAYyVvmNL1xlKsPYZPaIHxGti2hHqvOCQv71XMsLxu1QjergkqogUnms5D3YQ==
 
-y18n@^4.0.0:
-  version "4.0.3"
-  resolved "https://registry.yarnpkg.com/y18n/-/y18n-4.0.3.tgz#b5f259c82cd6e336921efd7bfd8bf560de9eeedf"
-  integrity sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==
+y18n@^5.0.5:
+  version "5.0.8"
+  resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
+  integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==
 
 yallist@^3.0.0, yallist@^3.0.2, yallist@^3.0.3:
   version "3.1.1"
@@ -10183,28 +9726,22 @@ yallist@^4.0.0:
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
   integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
 
-yargs-parser@20.x, yargs-parser@^20.2.3:
+yaml@^1.10.0:
+  version "1.10.2"
+  resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.2.tgz#2301c5ffbf12b467de8da2333a459e29e7920e4b"
+  integrity sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==
+
+yargs-parser@20.2.4:
+  version "20.2.4"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.4.tgz#b42890f14566796f85ae8e3a25290d205f154a54"
+  integrity sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==
+
+yargs-parser@20.x, yargs-parser@^20.2.2, yargs-parser@^20.2.3:
   version "20.2.7"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
   integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
 
-yargs-parser@5.0.0-security.0:
-  version "5.0.0-security.0"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.0-security.0.tgz#4ff7271d25f90ac15643b86076a2ab499ec9ee24"
-  integrity sha512-T69y4Ps64LNesYxeYGYPvfoMTt/7y1XtfpIslUeK4um+9Hu7hlGoRtaDLvdXb7+/tfq4opVa2HRY5xGip022rQ==
-  dependencies:
-    camelcase "^3.0.0"
-    object.assign "^4.1.0"
-
-yargs-parser@^15.0.1:
-  version "15.0.1"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-15.0.1.tgz#54786af40b820dcb2fb8025b11b4d659d76323b3"
-  integrity sha512-0OAMV2mAZQrs3FkNpDQcBk1x5HXb8X4twADss4S0Iuk+2dGnLOE/fRHrsYm542GduMveyA77OF4wrNJuanRCWw==
-  dependencies:
-    camelcase "^5.0.0"
-    decamelize "^1.2.0"
-
-yargs-parser@^18.1.2, yargs-parser@^18.1.3:
+yargs-parser@^18.1.3:
   version "18.1.3"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
   integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
@@ -10212,44 +9749,31 @@ yargs-parser@^18.1.2, yargs-parser@^18.1.3:
     camelcase "^5.0.0"
     decamelize "^1.2.0"
 
-yargs@^14.2.2:
-  version "14.2.3"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-14.2.3.tgz#1a1c3edced1afb2a2fea33604bc6d1d8d688a414"
-  integrity sha512-ZbotRWhF+lkjijC/VhmOT9wSgyBQ7+zr13+YLkhfsSiTriYsMzkTUFP18pFhWwBeMa5gUc1MzbhrO6/VB7c9Xg==
+yargs-parser@^5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.1.tgz#7ede329c1d8cdbbe209bd25cdb990e9b1ebbb394"
+  integrity sha512-wpav5XYiddjXxirPoCTUPbqM0PXvJ9hiBMvuJgInvo4/lAOTZzUprArw17q2O1P2+GHhbBr18/iQwjL5Z9BqfA==
   dependencies:
-    cliui "^5.0.0"
-    decamelize "^1.2.0"
-    find-up "^3.0.0"
-    get-caller-file "^2.0.1"
-    require-directory "^2.1.1"
-    require-main-filename "^2.0.0"
-    set-blocking "^2.0.0"
-    string-width "^3.0.0"
-    which-module "^2.0.0"
-    y18n "^4.0.0"
-    yargs-parser "^15.0.1"
+    camelcase "^3.0.0"
+    object.assign "^4.1.0"
 
-yargs@^15.4.1:
-  version "15.4.1"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-15.4.1.tgz#0d87a16de01aee9d8bec2bfbf74f67851730f4f8"
-  integrity sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==
+yargs@^16.0.3, yargs@^16.2.0:
+  version "16.2.0"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66"
+  integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==
   dependencies:
-    cliui "^6.0.0"
-    decamelize "^1.2.0"
-    find-up "^4.1.0"
-    get-caller-file "^2.0.1"
+    cliui "^7.0.2"
+    escalade "^3.1.1"
+    get-caller-file "^2.0.5"
     require-directory "^2.1.1"
-    require-main-filename "^2.0.0"
-    set-blocking "^2.0.0"
     string-width "^4.2.0"
-    which-module "^2.0.0"
-    y18n "^4.0.0"
-    yargs-parser "^18.1.2"
+    y18n "^5.0.5"
+    yargs-parser "^20.2.2"
 
 yargs@^7.1.0:
-  version "7.1.1"
-  resolved "https://registry.yarnpkg.com/yargs/-/yargs-7.1.1.tgz#67f0ef52e228d4ee0d6311acede8850f53464df6"
-  integrity sha512-huO4Fr1f9PmiJJdll5kwoS2e4GqzGSsMT3PPMpOwoVkOK8ckqAewMTZyA6LXVQWflleb/Z8oPBEvNsMft0XE+g==
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-7.1.2.tgz#63a0a5d42143879fdbb30370741374e0641d55db"
+  integrity sha512-ZEjj/dQYQy0Zx0lgLMLR8QuaqTihnxirir7EwUHp1Axq4e3+k8jXU5K0VLbNvedv1f4EWtBonDIZm0NUr+jCcA==
   dependencies:
     camelcase "^3.0.0"
     cliui "^3.2.0"
@@ -10263,7 +9787,7 @@ yargs@^7.1.0:
     string-width "^1.0.2"
     which-module "^1.0.0"
     y18n "^3.2.1"
-    yargs-parser "5.0.0-security.0"
+    yargs-parser "^5.0.1"
 
 yn@3.1.1:
   version "3.1.1"

From c420ce284ceb5ee3c807b1a7159bb11d4bc88b3a Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 2 Jun 2021 20:01:33 -0700
Subject: [PATCH 336/719] ARROW-12393: [JS] Use closure compiler for all UMD
 targets

Closure compiler creates smaller bundles.

This branch

```
targets/es5/umd/:
695k -I Arrow.es5.min.js.map
 55k -I Arrow.externs.js
254k -I Arrow.js

targets/es2015/umd/:
534k -I Arrow.es2015.min.js.map
 55k -I Arrow.externs.js
180k -I Arrow.js

targets/esnext/umd/:
512k -I Arrow.esnext.min.js.map
 55k -I Arrow.externs.js
172k -I Arrow.js
```

Master (which uses closure only for es5 targets and terser otherwise)

```
targets/es5/umd/:
694k -I Arrow.es5.min.js.map
 55k -I Arrow.externs.js
253k -I Arrow.js

targets/es2015/umd/:
1.2M -I Arrow.es2015.min.js.map
233k -I Arrow.js

targets/esnext/umd/:
1.1M -I Arrow.esnext.min.js.map
225k -I Arrow.js
```

Closes #10281 from domoritz/dom/closure-everything-but-es5

Lead-authored-by: Dominik Moritz <domoritz@gmail.com>
Co-authored-by: p42-ai[bot] <72252241+p42-ai[bot]@users.noreply.github.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/gulp/closure-task.js              |   6 +-
 js/gulp/compile-task.js              |   4 +-
 js/gulp/minify-task.js               |  87 ---
 js/gulp/util.js                      |  35 +-
 js/gulpfile.js                       |   5 +-
 js/src/util/math.ts                  |   3 +-
 js/tsconfig/tsconfig.es2015.cls.json |   4 +-
 js/tsconfig/tsconfig.esnext.cls.json |   4 +-
 js/yarn.lock                         | 981 ++++++++++++++-------------
 9 files changed, 543 insertions(+), 586 deletions(-)
 delete mode 100644 js/gulp/minify-task.js

diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index 46e87e9f99e..6e5a61d82b5 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -75,7 +75,7 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
                 `${src}/**/*.js` /* <-- then source globs */
             ], { base: `./` }),
             sourcemaps.init(),
-            closureCompiler(createClosureArgs(entry_point, externs), {
+            closureCompiler(createClosureArgs(entry_point, externs, target), {
                 platform: ['native', 'java', 'javascript']
             }),
             // rename the sourcemaps from *.js.map files to *.min.js.map
@@ -88,7 +88,7 @@ const closureTask = ((cache) => memoizeTask(cache, async function closure(target
 module.exports = closureTask;
 module.exports.closureTask = closureTask;
 
-const createClosureArgs = (entry_point, externs) => ({
+const createClosureArgs = (entry_point, externs, target) => ({
     externs,
     entry_point,
     third_party: true,
@@ -103,7 +103,7 @@ const createClosureArgs = (entry_point, externs) => ({
     assume_function_wrapper: true,
     js_output_file: `${mainExport}.js`,
     language_in: gCCLanguageNames[`esnext`],
-    language_out: gCCLanguageNames[`es5`],
+    language_out: gCCLanguageNames[target],
     output_wrapper:`${apacheHeader()}
 (function (global, factory) {
     typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
diff --git a/js/gulp/compile-task.js b/js/gulp/compile-task.js
index 60e2ebbe36a..07109ef73e0 100644
--- a/js/gulp/compile-task.js
+++ b/js/gulp/compile-task.js
@@ -19,7 +19,6 @@ const { Observable } = require('rxjs');
 const { npmPkgName } = require('./util');
 const { memoizeTask } = require('./memoize-task');
 
-const minifyTask = require('./minify-task');
 const closureTask = require('./closure-task');
 const typescriptTask = require('./typescript-task');
 const { arrowTask, arrowTSTask } = require('./arrow-task');
@@ -28,8 +27,7 @@ const compileTask = ((cache) => memoizeTask(cache, function compile(target, form
     return target === `src`                    ? Observable.empty()
          : target === npmPkgName               ? arrowTask(target, format, ...args)()
          : target === `ts`                     ? arrowTSTask(target, format, ...args)()
-         : format === `umd` ? target === `es5` ? closureTask(target, format, ...args)()
-                                               : minifyTask(target, format, ...args)()
+         : format === `umd`                    ? closureTask(target, format, ...args)()
                                                : typescriptTask(target, format, ...args)();
 }))({});
 
diff --git a/js/gulp/minify-task.js b/js/gulp/minify-task.js
deleted file mode 100644
index d987b72e675..00000000000
--- a/js/gulp/minify-task.js
+++ /dev/null
@@ -1,87 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-const {
-    targetDir,
-    mainExport,
-    UMDSourceTargets,
-    shouldRunInChildProcess,
-    spawnGulpCommandInChildProcess,
-} = require('./util');
-
-const path = require('path');
-const webpack = require(`webpack`);
-const { memoizeTask } = require('./memoize-task');
-const { compileBinFiles } = require('./typescript-task');
-const { Observable, ReplaySubject } = require('rxjs');
-const TerserPlugin = require(`terser-webpack-plugin`);
-
-const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJS(target, format) {
-
-    if (shouldRunInChildProcess(target, format)) {
-        return spawnGulpCommandInChildProcess('compile', target, format);
-    }
-
-    const sourceTarget = UMDSourceTargets[target];
-    const out = targetDir(target, format), src = targetDir(sourceTarget, `cls`);
-
-    const targetConfig = { ...commonConfig,
-        output: { ...commonConfig.output,
-            path: path.resolve(`./${out}`) } };
-
-    const webpackConfigs = [mainExport].map((entry) => ({
-        ...targetConfig,
-        mode: 'production',
-        name: entry,
-        entry: { [entry]: path.resolve(`${src}/${entry}.dom.js`) },
-        plugins: [
-            ...(targetConfig.plugins || []),
-            new webpack.SourceMapDevToolPlugin({
-                filename: `[name].${target}.min.js.map`,
-                moduleFilenameTemplate: ({ resourcePath }) =>
-                    resourcePath
-                        .replace(/\s/, `_`)
-                        .replace(/\.\/node_modules\//, ``)
-            })
-        ],
-        optimization: {
-            minimize: true,
-            minimizer: [
-                new TerserPlugin({
-                    terserOptions: {
-                        ecma: target,
-                        output: { comments: false },
-                        compress: { unsafe: true }
-                    },
-                })
-            ]
-        }
-    }));
-
-    const compilers = webpack(webpackConfigs);
-    return Observable
-            .bindNodeCallback(compilers.run.bind(compilers))()
-            .merge(compileBinFiles(target, format)).takeLast(1)
-            .multicast(new ReplaySubject()).refCount();
-}))({}, {
-    resolve: { mainFields: [`module`, `main`] },
-    module: { rules: [{ test: /\.js$/, enforce: `pre`, use: [`source-map-loader`] }] },
-    output: { filename: '[name].js', library: mainExport, libraryTarget: `umd`, umdNamedDefine: true, globalObject: 'this' },
-});
-
-module.exports = minifyTask;
-module.exports.minifyTask = minifyTask;
diff --git a/js/gulp/util.js b/js/gulp/util.js
index 8a72c5356d6..cf53d0ef18b 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -42,7 +42,7 @@ const packageJSONFields = [
 ];
 
 const metadataFiles = [`LICENSE.txt`, `NOTICE.txt`, `README.md`].map((filename) => {
-    let err = false, prefixes = [`./`, `../`];
+    let prefixes = [`./`, `../`];
     let p = prefixes.find((prefix) => {
         try {
             fs.statSync(path.resolve(path.join(prefix, filename)));
@@ -66,32 +66,6 @@ const gCCLanguageNames = {
  esnext: `ECMASCRIPT_NEXT`
 };
 
-const UMDSourceTargets = {
-    es5: `es5`,
- es2015: `es2015`,
- es2016: `es2016`,
- es2017: `es2017`,
- esnext: `esnext`
-};
-
-// ES7+ keywords Terser shouldn't mangle
-// Hardcoded here since some are from ES7+, others are
-// only defined in interfaces, so difficult to get by reflection.
-const ESKeywords = [
-    // PropertyDescriptors
-    `configurable`, `enumerable`,
-    // IteratorResult, Symbol.asyncIterator
-    `done`, `value`, `Symbol.asyncIterator`, `asyncIterator`,
-    // AsyncObserver
-    `values`, `hasError`, `hasCompleted`,`errorValue`, `closed`,
-    // Observable/Subscription/Scheduler
-    `next`, `error`, `complete`, `subscribe`, `unsubscribe`, `isUnsubscribed`,
-    // EventTarget
-    `addListener`, `removeListener`, `addEventListener`, `removeEventListener`,
-    // Arrow properties
-    `low`, `high`, `data`, `index`, `field`, `columns`, 'numCols', 'numRows', `values`, `valueOffsets`, `nullBitmap`, `subarray`
-];
-
 function taskName(target, format) {
     return !format ? target : `${target}:${format}`;
 }
@@ -144,7 +118,6 @@ function observableFromStreams(...streams) {
 }
 
 function* combinations(_targets, _modules) {
-
     const targets = known(knownTargets, _targets || [`all`]);
     const modules = known(knownModules, _modules || [`all`]);
 
@@ -203,12 +176,10 @@ const esmRequire = require(`esm`)(module, {
 });
 
 module.exports = {
-
     mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields,
 
-    knownTargets, knownModules, tasksToSkipPerTargetOrFormat,
-    gCCLanguageNames, UMDSourceTargets,
+    knownTargets, knownModules, tasksToSkipPerTargetOrFormat, gCCLanguageNames,
 
     taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams,
-    ESKeywords, publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess
+    publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess
 };
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 271bd3426d8..019f5b0e056 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -26,8 +26,7 @@ const { testTask, createTestData, cleanTestData } = require('./gulp/test-task');
 const {
     taskName, combinations,
     targetDir, knownTargets,
-    npmPkgName, UMDSourceTargets,
-    tasksToSkipPerTargetOrFormat
+    npmPkgName, tasksToSkipPerTargetOrFormat
 } = require('./gulp/util');
 
 for (const [target, format] of combinations([`all`], [`all`])) {
@@ -46,7 +45,7 @@ for (const [target, format] of combinations([`all`], [`all`])) {
 // a minifier, so we special case that here.
 knownTargets.forEach((target) => {
     const umd = taskName(target, `umd`);
-    const cls = taskName(UMDSourceTargets[target], `cls`);
+    const cls = taskName(target, `cls`);
     gulp.task(`build:${umd}`, gulp.series(
         `build:${cls}`,
         `clean:${umd}`, `compile:${umd}`, `package:${umd}`,
diff --git a/js/src/util/math.ts b/js/src/util/math.ts
index 47678e1a961..c61f31c25dc 100644
--- a/js/src/util/math.ts
+++ b/js/src/util/math.ts
@@ -28,7 +28,8 @@ const u32 = new Uint32Array(f64.buffer);
 export function uint16ToFloat64(h: number) {
     const expo = (h & 0x7C00) >> 10;
     const sigf = (h & 0x03FF) / 1024;
-    const sign = (-1) ** ((h & 0x8000) >> 15);
+    // use Math.pow to prevent closure compiler from creating incorrect js: https://github.com/google/closure-compiler/issues/3810
+    const sign = Math.pow(-1, (h & 0x8000) >> 15);
     switch (expo) {
         case 0x1F: return sign * (sigf ? NaN : 1 / 0);
         case 0x00: return sign * (sigf ? 6.103515625e-5 * sigf : 0);
diff --git a/js/tsconfig/tsconfig.es2015.cls.json b/js/tsconfig/tsconfig.es2015.cls.json
index ae1b25f2f36..fe2f0b4022a 100644
--- a/js/tsconfig/tsconfig.es2015.cls.json
+++ b/js/tsconfig/tsconfig.es2015.cls.json
@@ -2,10 +2,10 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES2015",
+    "target": "esnext",
     "module": "es2015",
     "declaration": false,
     "noEmitHelpers": true,
-    "importHelpers": true
+    "importHelpers": false
   }
 }
diff --git a/js/tsconfig/tsconfig.esnext.cls.json b/js/tsconfig/tsconfig.esnext.cls.json
index 3c68218d81f..176a72ba614 100644
--- a/js/tsconfig/tsconfig.esnext.cls.json
+++ b/js/tsconfig/tsconfig.esnext.cls.json
@@ -2,10 +2,10 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "es2015",
     "declaration": false,
     "noEmitHelpers": true,
-    "importHelpers": true
+    "importHelpers": false
   }
 }
diff --git a/js/yarn.lock b/js/yarn.lock
index c8047a2dda7..e8ef060e131 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -50,10 +50,10 @@
   dependencies:
     "@babel/highlight" "^7.12.13"
 
-"@babel/compat-data@^7.13.15":
-  version "7.14.0"
-  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.14.0.tgz#a901128bce2ad02565df95e6ecbf195cf9465919"
-  integrity sha512-vu9V3uMM/1o5Hl5OekMUowo3FqXLJSw+s+66nt0fSWVWTtmosdzn45JHOB3cPtZoe6CTBDzvSw0RdOY85Q37+Q==
+"@babel/compat-data@^7.14.4":
+  version "7.14.4"
+  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.14.4.tgz#45720fe0cecf3fd42019e1d12cc3d27fadc98d58"
+  integrity sha512-i2wXrWQNkH6JplJQGn3Rd2I4Pij8GdHkXwHMxm+zV5YG/Jci+bCNrWZEWC4o+umiDkRrRs4dVzH3X4GP7vyjQQ==
 
 "@babel/core@^7.1.0", "@babel/core@^7.7.2", "@babel/core@^7.7.5":
   version "7.14.3"
@@ -86,13 +86,13 @@
     source-map "^0.5.0"
 
 "@babel/helper-compilation-targets@^7.13.16":
-  version "7.13.16"
-  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.16.tgz#6e91dccf15e3f43e5556dffe32d860109887563c"
-  integrity sha512-3gmkYIrpqsLlieFwjkGgLaSHmhnvlAYzZLlYVjlW+QwI+1zE17kGxuJGmIqDQdYp56XdmGeD+Bswx0UTyG18xA==
+  version "7.14.4"
+  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.14.4.tgz#33ebd0ffc34248051ee2089350a929ab02f2a516"
+  integrity sha512-JgdzOYZ/qGaKTVkn5qEDV/SXAh8KcyUVkCoSWGN8T3bwrgd6m+/dJa2kVGi6RJYJgEYPBdZ84BZp9dUjNWkBaA==
   dependencies:
-    "@babel/compat-data" "^7.13.15"
+    "@babel/compat-data" "^7.14.4"
     "@babel/helper-validator-option" "^7.12.17"
-    browserslist "^4.14.5"
+    browserslist "^4.16.6"
     semver "^6.3.0"
 
 "@babel/helper-function-name@^7.14.2":
@@ -152,14 +152,14 @@
   integrity sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==
 
 "@babel/helper-replace-supers@^7.13.12":
-  version "7.14.3"
-  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.14.3.tgz#ca17b318b859d107f0e9b722d58cf12d94436600"
-  integrity sha512-Rlh8qEWZSTfdz+tgNV/N4gz1a0TMNwCUcENhMjHTHKp3LseYH5Jha0NSlyTQWMnjbYcwFt+bqAMqSLHVXkQ6UA==
+  version "7.14.4"
+  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.14.4.tgz#b2ab16875deecfff3ddfcd539bc315f72998d836"
+  integrity sha512-zZ7uHCWlxfEAAOVDYQpEf/uyi1dmeC7fX4nCf2iz9drnCwi1zvwXL3HwWWNXUQEJ1k23yVn3VbddiI9iJEXaTQ==
   dependencies:
     "@babel/helper-member-expression-to-functions" "^7.13.12"
     "@babel/helper-optimise-call-expression" "^7.12.13"
     "@babel/traverse" "^7.14.2"
-    "@babel/types" "^7.14.2"
+    "@babel/types" "^7.14.4"
 
 "@babel/helper-simple-access@^7.13.12":
   version "7.13.12"
@@ -204,9 +204,9 @@
     js-tokens "^4.0.0"
 
 "@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.14.2", "@babel/parser@^7.14.3", "@babel/parser@^7.7.2":
-  version "7.14.3"
-  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.14.3.tgz#9b530eecb071fd0c93519df25c5ff9f14759f298"
-  integrity sha512-7MpZDIfI7sUC5zWo2+foJ50CSI5lcqDehZ0lVgIhSi4bFEk94fLAKlF3Q0nzSQQ+ca0lm+O6G9ztKVBeu8PMRQ==
+  version "7.14.4"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.14.4.tgz#a5c560d6db6cd8e6ed342368dea8039232cbab18"
+  integrity sha512-ArliyUsWDUqEGfWcmzpGUzNfLxTdTp6WU4IuP6QFSp9gGfWS6boxFCkJSJ/L4+RG8z/FnIU3WxCk6hPL9SSWeA==
 
 "@babel/plugin-syntax-async-generators@^7.8.4":
   version "7.8.4"
@@ -322,10 +322,10 @@
     debug "^4.1.0"
     globals "^11.1.0"
 
-"@babel/types@^7.0.0", "@babel/types@^7.12.13", "@babel/types@^7.13.12", "@babel/types@^7.14.0", "@babel/types@^7.14.2", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
-  version "7.14.2"
-  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.14.2.tgz#4208ae003107ef8a057ea8333e56eb64d2f6a2c3"
-  integrity sha512-SdjAG/3DikRHpUOjxZgnkbR11xUlyDMUFJdvnIgZEE16mqmY0BINMmc4//JMJglEmn6i7sq6p+mGrFWyZ98EEw==
+"@babel/types@^7.0.0", "@babel/types@^7.12.13", "@babel/types@^7.13.12", "@babel/types@^7.14.0", "@babel/types@^7.14.2", "@babel/types@^7.14.4", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
+  version "7.14.4"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.14.4.tgz#bfd6980108168593b38b3eb48a24aa026b919bc0"
+  integrity sha512-lCj4aIs0xUefJFQnwwQv2Bxg7Omd6bgquZ6LGC+gGMh6/s5qDVfjuCMlDmYQ15SLsWHd9n+X3E75lKIhl5Lkiw==
   dependencies:
     "@babel/helper-validator-identifier" "^7.14.0"
     to-fast-properties "^2.0.0"
@@ -385,94 +385,94 @@
   resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98"
   integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==
 
-"@jest/console@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-27.0.1.tgz#c6acfec201f9b6823596eb6c4fcd77c89a8b27e9"
-  integrity sha512-50E6nN2F5cAXn1lDljn0gE9F0WFXHYz/u0EeR7sOt4nbRPNli34ckbl6CUDaDABJbHt62DYnyQAIB3KgdzwKDw==
+"@jest/console@^27.0.2":
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/@jest/console/-/console-27.0.2.tgz#b8eeff8f21ac51d224c851e1729d2630c18631e6"
+  integrity sha512-/zYigssuHLImGeMAACkjI4VLAiiJznHgAl3xnFT19iWyct2LhrH3KXOjHRmxBGTkiPLZKKAJAgaPpiU9EZ9K+w==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     chalk "^4.0.0"
-    jest-message-util "^27.0.1"
-    jest-util "^27.0.1"
+    jest-message-util "^27.0.2"
+    jest-util "^27.0.2"
     slash "^3.0.0"
 
-"@jest/core@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/core/-/core-27.0.1.tgz#88d0ff55f465fe1fc3a940718e8cf0fea242be4b"
-  integrity sha512-PiCbKSMf6t8PEfY3MAd0Ldn3aJAt5T+UcaFkAfMZ1VZgas35+fXk5uHIjAQHQLNIHZWX19TLv0wWNT03yvrw6w==
-  dependencies:
-    "@jest/console" "^27.0.1"
-    "@jest/reporters" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/transform" "^27.0.1"
-    "@jest/types" "^27.0.1"
+"@jest/core@^27.0.1", "@jest/core@^27.0.3":
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/@jest/core/-/core-27.0.3.tgz#b5a38675fa0466450a7fd465f4b226762cb592a2"
+  integrity sha512-rN8lr/OJ8iApcQUh4khnMaOCVX4oRnLwy2tPW3Vh70y62K8Da8fhkxMUq0xX9VPa4+yWUm0tGc/jUSJi+Jzuwg==
+  dependencies:
+    "@jest/console" "^27.0.2"
+    "@jest/reporters" "^27.0.2"
+    "@jest/test-result" "^27.0.2"
+    "@jest/transform" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
     emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-changed-files "^27.0.1"
-    jest-config "^27.0.1"
-    jest-haste-map "^27.0.1"
-    jest-message-util "^27.0.1"
+    jest-changed-files "^27.0.2"
+    jest-config "^27.0.3"
+    jest-haste-map "^27.0.2"
+    jest-message-util "^27.0.2"
     jest-regex-util "^27.0.1"
-    jest-resolve "^27.0.1"
-    jest-resolve-dependencies "^27.0.1"
-    jest-runner "^27.0.1"
-    jest-runtime "^27.0.1"
-    jest-snapshot "^27.0.1"
-    jest-util "^27.0.1"
-    jest-validate "^27.0.1"
-    jest-watcher "^27.0.1"
+    jest-resolve "^27.0.2"
+    jest-resolve-dependencies "^27.0.3"
+    jest-runner "^27.0.3"
+    jest-runtime "^27.0.3"
+    jest-snapshot "^27.0.2"
+    jest-util "^27.0.2"
+    jest-validate "^27.0.2"
+    jest-watcher "^27.0.2"
     micromatch "^4.0.4"
     p-each-series "^2.1.0"
     rimraf "^3.0.0"
     slash "^3.0.0"
     strip-ansi "^6.0.0"
 
-"@jest/environment@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-27.0.1.tgz#27ed89bf8179c0a030690f063d922d6da7a519ac"
-  integrity sha512-nG+r3uSs2pOTsdhgt6lUm4ZGJLRcTc6HZIkrFsVpPcdSqEpJehEny9r9y2Bmhkn8fKXWdGCYJKF3i4nKO0HSmA==
+"@jest/environment@^27.0.3":
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-27.0.3.tgz#68769b1dfdd213e3456169d64fbe9bd63a5fda92"
+  integrity sha512-pN9m7fbKsop5vc3FOfH8NF7CKKdRbEZzcxfIo1n2TT6ucKWLFq0P6gCJH0GpnQp036++yY9utHOxpeT1WnkWTA==
   dependencies:
-    "@jest/fake-timers" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/fake-timers" "^27.0.3"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
-    jest-mock "^27.0.1"
+    jest-mock "^27.0.3"
 
-"@jest/fake-timers@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-27.0.1.tgz#6987a596b0bcf8c07653086076c17058b4c77b5c"
-  integrity sha512-3CyLJQnHzKI4TCJSCo+I9TzIHjSK4RrNEk93jFM6Q9+9WlSJ3mpMq/p2YuKMe0SiHKbmZOd5G/Ll5ofF9Xkw9g==
+"@jest/fake-timers@^27.0.3":
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-27.0.3.tgz#9899ba6304cc636734c74478df502e18136461dd"
+  integrity sha512-fQ+UCKRIYKvTCEOyKPnaPnomLATIhMnHC/xPZ7yT1Uldp7yMgMxoYIFidDbpSTgB79+/U+FgfoD30c6wg3IUjA==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     "@sinonjs/fake-timers" "^7.0.2"
     "@types/node" "*"
-    jest-message-util "^27.0.1"
-    jest-mock "^27.0.1"
-    jest-util "^27.0.1"
+    jest-message-util "^27.0.2"
+    jest-mock "^27.0.3"
+    jest-util "^27.0.2"
 
-"@jest/globals@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-27.0.1.tgz#14c776942f7047a04f2aea09b148065e2aa9d7e9"
-  integrity sha512-80ZCzgopysKdpp5EOglgjApKxiNDR96PG4PwngB4fTwZ4qqqSKo0EwGwQIhl16szQ1M2xCVYmr9J6KelvnABNQ==
+"@jest/globals@^27.0.3":
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-27.0.3.tgz#1cf8933b7791bba0b99305cbf39fd4d2e3fe4060"
+  integrity sha512-OzsIuf7uf+QalqAGbjClyezzEcLQkdZ+7PejUrZgDs+okdAK8GwRCGcYCirHvhMBBQh60Jr3NlIGbn/KBPQLEQ==
   dependencies:
-    "@jest/environment" "^27.0.1"
-    "@jest/types" "^27.0.1"
-    expect "^27.0.1"
+    "@jest/environment" "^27.0.3"
+    "@jest/types" "^27.0.2"
+    expect "^27.0.2"
 
-"@jest/reporters@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-27.0.1.tgz#5b491f64e37c9b97b13e564f18f36b6697d28045"
-  integrity sha512-lZbJWuS1h/ytKERfu1D6tEQ4PuQ7+15S4+HrSzHR0i7AGVT1WRo49h4fZqxASOp7AQCupUVtPJNZDkaG9ZXy0g==
+"@jest/reporters@^27.0.2":
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-27.0.2.tgz#ad73835d1cd54da08b0998a70b14446405e8e0d9"
+  integrity sha512-SVQjew/kafNxSN1my4praGQP+VPVGHsU8zqiEDppLvq6j1lryIjdNb9P+bZSsKeifU4bIoaPnf9Ui0tK9WOpFA==
   dependencies:
     "@bcoe/v8-coverage" "^0.2.3"
-    "@jest/console" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/transform" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/console" "^27.0.2"
+    "@jest/test-result" "^27.0.2"
+    "@jest/transform" "^27.0.2"
+    "@jest/types" "^27.0.2"
     chalk "^4.0.0"
     collect-v8-coverage "^1.0.0"
     exit "^0.1.2"
@@ -483,10 +483,10 @@
     istanbul-lib-report "^3.0.0"
     istanbul-lib-source-maps "^4.0.0"
     istanbul-reports "^3.0.2"
-    jest-haste-map "^27.0.1"
-    jest-resolve "^27.0.1"
-    jest-util "^27.0.1"
-    jest-worker "^27.0.1"
+    jest-haste-map "^27.0.2"
+    jest-resolve "^27.0.2"
+    jest-util "^27.0.2"
+    jest-worker "^27.0.2"
     slash "^3.0.0"
     source-map "^0.6.0"
     string-length "^4.0.1"
@@ -502,42 +502,41 @@
     graceful-fs "^4.2.4"
     source-map "^0.6.0"
 
-"@jest/test-result@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-27.0.1.tgz#8fb97214268ea21cf8cfb83edc0f17e558b3466d"
-  integrity sha512-5aa+ibX2dsGSDLKaQMZb453MqjJU/CRVumebXfaJmuzuGE4qf87yQ2QZ6PEpEtBwVUEgrJCzi3jLCRaUbksSuw==
+"@jest/test-result@^27.0.2":
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-27.0.2.tgz#0451049e32ceb609b636004ccc27c8fa22263f10"
+  integrity sha512-gcdWwL3yP5VaIadzwQtbZyZMgpmes8ryBAJp70tuxghiA8qL4imJyZex+i+USQH2H4jeLVVszhwntgdQ97fccA==
   dependencies:
-    "@jest/console" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/console" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/istanbul-lib-coverage" "^2.0.0"
     collect-v8-coverage "^1.0.0"
 
-"@jest/test-sequencer@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-27.0.1.tgz#2a3b85130978fc545d8ee6c34d65ff4231dbad86"
-  integrity sha512-yK2c2iruJ35WgH4KH8whS72uH+FASJUrzwxzNKTzLAEWmNpWKNEPOsSEKsHynvz78bLHafrTg4adN7RrYNbEOA==
+"@jest/test-sequencer@^27.0.3":
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-27.0.3.tgz#2a8632b86a9a6f8900e514917cdab6a062e71049"
+  integrity sha512-DcLTzraZ8xLr5fcIl+CF14vKeBBpBrn55wFxI9Ju+dhEBdjRdJQ/Z/pLkMehkPZWIQ+rR23J8e+wFDkfjree0Q==
   dependencies:
-    "@jest/test-result" "^27.0.1"
+    "@jest/test-result" "^27.0.2"
     graceful-fs "^4.2.4"
-    jest-haste-map "^27.0.1"
-    jest-runner "^27.0.1"
-    jest-runtime "^27.0.1"
+    jest-haste-map "^27.0.2"
+    jest-runtime "^27.0.3"
 
-"@jest/transform@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-27.0.1.tgz#a9ece291f82273d5e58132550996c16edd5a902a"
-  integrity sha512-LC95VpT6wMnQ96dRJDlUiAnW/90zyh4+jS30szI/5AsfS0qwSlr/O4TPcGoD2WVaVMfo6KvR+brvOtGyMHaNhA==
+"@jest/transform@^27.0.2":
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-27.0.2.tgz#b073b7c589e3f4b842102468875def2bb722d6b5"
+  integrity sha512-H8sqKlgtDfVog/s9I4GG2XMbi4Ar7RBxjsKQDUhn2XHAi3NG+GoQwWMER+YfantzExbjNqQvqBHzo/G2pfTiPw==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     babel-plugin-istanbul "^6.0.0"
     chalk "^4.0.0"
     convert-source-map "^1.4.0"
     fast-json-stable-stringify "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-haste-map "^27.0.1"
+    jest-haste-map "^27.0.2"
     jest-regex-util "^27.0.1"
-    jest-util "^27.0.1"
+    jest-util "^27.0.2"
     micromatch "^4.0.4"
     pirates "^4.0.1"
     slash "^3.0.0"
@@ -555,10 +554,10 @@
     "@types/yargs" "^15.0.0"
     chalk "^4.0.0"
 
-"@jest/types@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.1.tgz#631738c942e70045ebbf42a3f9b433036d3845e4"
-  integrity sha512-8A25RRV4twZutsx2D+7WphnDsp7If9Yu6ko0Gxwrwv8BiWESFzka34+Aa2kC8w9xewt7SDuCUSZ6IiAFVj3PRg==
+"@jest/types@^27.0.2":
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.2.tgz#e153d6c46bda0f2589f0702b071f9898c7bbd37e"
+  integrity sha512-XpjCtJ/99HB4PmyJ2vgmN7vT+JLP7RW1FBT9RgnMFS4Dt7cvIyBee8O3/j98aUZ34ZpenPZFqmaaObWSeL65dg==
   dependencies:
     "@types/istanbul-lib-coverage" "^2.0.0"
     "@types/istanbul-reports" "^3.0.0"
@@ -1367,10 +1366,10 @@
     "@octokit/types" "^6.0.3"
     universal-user-agent "^6.0.0"
 
-"@octokit/openapi-types@^7.2.0":
-  version "7.2.1"
-  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-7.2.1.tgz#3ba1abe8906863edd403e185bc12e2bf79b3e240"
-  integrity sha512-IHQJpLciwzwDvciLxiFj3IEV5VYn7lSVcj5cu0jbTwMfK4IG6/g8SPrVp3Le1VRzIiYSRcBzm1dA7vgWelYP3Q==
+"@octokit/openapi-types@^7.2.3":
+  version "7.2.3"
+  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-7.2.3.tgz#a7105796db9b85d25d3feba9a1785a124c7803e4"
+  integrity sha512-V1ycxkR19jqbIl3evf2RQiMRBvTNRi+Iy9h20G5OP5dPfEF6GJ1DPlUeiZRxo2HJxRr+UA4i0H1nn4btBDPFrw==
 
 "@octokit/plugin-enterprise-rest@^6.0.1":
   version "6.0.1"
@@ -1389,12 +1388,12 @@
   resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.3.tgz#70a62be213e1edc04bb8897ee48c311482f9700d"
   integrity sha512-4RFU4li238jMJAzLgAwkBAw+4Loile5haQMQr+uhFq27BmyJXcXSKvoQKqh0agsZEiUlW6iSv3FAgvmGkur7OQ==
 
-"@octokit/plugin-rest-endpoint-methods@5.0.1":
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.0.1.tgz#631b8d4edc6798b03489911252a25f2a4e58c594"
-  integrity sha512-vvWbPtPqLyIzJ7A4IPdTl+8IeuKAwMJ4LjvmqWOOdfSuqWQYZXq2CEd0hsnkidff2YfKlguzujHs/reBdAx8Sg==
+"@octokit/plugin-rest-endpoint-methods@5.3.1":
+  version "5.3.1"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.3.1.tgz#deddce769b4ec3179170709ab42e4e9e6195aaa9"
+  integrity sha512-3B2iguGmkh6bQQaVOtCsS0gixrz8Lg0v4JuXPqBcFqLKuJtxAUf3K88RxMEf/naDOI73spD+goJ/o7Ie7Cvdjg==
   dependencies:
-    "@octokit/types" "^6.13.1"
+    "@octokit/types" "^6.16.2"
     deprecation "^2.3.1"
 
 "@octokit/request-error@^2.0.0", "@octokit/request-error@^2.0.5":
@@ -1419,21 +1418,21 @@
     universal-user-agent "^6.0.0"
 
 "@octokit/rest@^18.1.0":
-  version "18.5.3"
-  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.5.3.tgz#6a2e6006a87ebbc34079c419258dd29ec9ff659d"
-  integrity sha512-KPAsUCr1DOdLVbZJgGNuE/QVLWEaVBpFQwDAz/2Cnya6uW2wJ/P5RVGk0itx7yyN1aGa8uXm2pri4umEqG1JBA==
+  version "18.5.6"
+  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.5.6.tgz#8c9a7c9329c7bbf478af20df78ddeab0d21f6d89"
+  integrity sha512-8HdG6ZjQdZytU6tCt8BQ2XLC7EJ5m4RrbyU/EARSkAM1/HP3ceOzMG/9atEfe17EDMer3IVdHWLedz2wDi73YQ==
   dependencies:
     "@octokit/core" "^3.2.3"
     "@octokit/plugin-paginate-rest" "^2.6.2"
     "@octokit/plugin-request-log" "^1.0.2"
-    "@octokit/plugin-rest-endpoint-methods" "5.0.1"
+    "@octokit/plugin-rest-endpoint-methods" "5.3.1"
 
-"@octokit/types@^6.0.3", "@octokit/types@^6.11.0", "@octokit/types@^6.13.1", "@octokit/types@^6.7.1":
-  version "6.16.0"
-  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.16.0.tgz#15f71e391ca74e91a21b70e3a1b033c89625dca4"
-  integrity sha512-EktqSNq8EKXE82a7Vw33ozOEhFXIRik+rZHJTHAgVZRm/p2K5r5ecn5fVpRkLCm3CAVFwchRvt3yvtmfbt2LCQ==
+"@octokit/types@^6.0.3", "@octokit/types@^6.11.0", "@octokit/types@^6.16.2", "@octokit/types@^6.7.1":
+  version "6.16.2"
+  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.16.2.tgz#62242e0565a3eb99ca2fd376283fe78b4ea057b4"
+  integrity sha512-wWPSynU4oLy3i4KGyk+J1BLwRKyoeW2TwRHgwbDz17WtVFzSK2GOErGliruIx8c+MaYtHSYTx36DSmLNoNbtgA==
   dependencies:
-    "@octokit/openapi-types" "^7.2.0"
+    "@octokit/openapi-types" "^7.2.3"
 
 "@sinonjs/commons@^1.7.0":
   version "1.8.3"
@@ -1443,9 +1442,9 @@
     type-detect "4.0.8"
 
 "@sinonjs/fake-timers@^7.0.2":
-  version "7.1.0"
-  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-7.1.0.tgz#8f13af27d842cbf51ad4502e05562fe9391d084e"
-  integrity sha512-hAEzXi6Wbvlb67NnGMGSNOeAflLVnMa4yliPU/ty1qjgW/vAletH15/v/esJwASSIA0YlIyjnloenFbEZc9q9A==
+  version "7.1.2"
+  resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-7.1.2.tgz#2524eae70c4910edccf99b2f4e6efc5894aff7b5"
+  integrity sha512-iQADsW4LBMISqZ6Ci1dupJL9pprqwcVFTcOsEmQOEhW+KLCVn/Y4Jrvg2k19fIHCp+iFprriYPTdRcQR8NbUPg==
   dependencies:
     "@sinonjs/commons" "^1.7.0"
 
@@ -1516,14 +1515,19 @@
     "@types/estree" "*"
 
 "@types/eslint@*":
-  version "7.2.11"
-  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.11.tgz#180b58f5bb7d7376e39d22496e2b08901aa52fd2"
-  integrity sha512-WYhv//5K8kQtsSc9F1Kn2vHzhYor6KpwPbARH7hwYe3C3ETD0EVx/3P5qQybUoaBEuUa9f/02JjBiXFWalYUmw==
+  version "7.2.13"
+  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.13.tgz#e0ca7219ba5ded402062ad6f926d491ebb29dd53"
+  integrity sha512-LKmQCWAlnVHvvXq4oasNUMTJJb2GwSyTY8+1C7OH5ILR8mPLaljv1jxL1bXW3xB3jFbQxTKxJAvI8PyjB09aBg==
   dependencies:
     "@types/estree" "*"
     "@types/json-schema" "*"
 
-"@types/estree@*", "@types/estree@^0.0.47":
+"@types/estree@*":
+  version "0.0.48"
+  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.48.tgz#18dc8091b285df90db2f25aa7d906cfc394b7f74"
+  integrity sha512-LfZwXoGUDo0C3me81HXgkBg5CTQYb6xzEl+fNmbO4JdRiSKQ8A0GD1OBBvKAIsbCUgoyAty7m99GqqMQe784ew==
+
+"@types/estree@^0.0.47":
   version "0.0.47"
   resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.47.tgz#d7a51db20f0650efec24cd04994f523d93172ed4"
   integrity sha512-c5ciR06jK8u9BstrmJyO97m+klJrrhCf9u3rLu3DEAJBirxRqSCvDQoYKmxuYwQI5SZChAWu+tq9oVlGRuzPAg==
@@ -1561,9 +1565,9 @@
     "@types/istanbul-lib-coverage" "*"
 
 "@types/istanbul-reports@^3.0.0":
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.0.tgz#508b13aa344fa4976234e75dddcc34925737d821"
-  integrity sha512-nwKNbvnwJ2/mndE9ItP/zc2TCzw6uuodnF4EHYWD+gCQDVBuRQL5UzbZD0/ezy1iKsFU2ZQiDqg4M9dN4+wZgA==
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/@types/istanbul-reports/-/istanbul-reports-3.0.1.tgz#9153fe98bba2bd565a63add9436d6f0d7f8468ff"
+  integrity sha512-c3mAZEuK0lvBp8tmuL74XRKn1+y2dcwOUpH7x4WrF6gk1GIgiluDRgMYQtw2OFcBvAJWlt6ASU3tSqxp0Uu0Aw==
   dependencies:
     "@types/istanbul-lib-report" "*"
 
@@ -1575,7 +1579,7 @@
     jest-diff "^26.0.0"
     pretty-format "^26.0.0"
 
-"@types/json-schema@*", "@types/json-schema@^7.0.3", "@types/json-schema@^7.0.6":
+"@types/json-schema@*", "@types/json-schema@^7.0.3", "@types/json-schema@^7.0.6", "@types/json-schema@^7.0.7":
   version "7.0.7"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
   integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
@@ -1598,9 +1602,9 @@
     "@types/node" "*"
 
 "@types/node@*", "@types/node@^15.6.1":
-  version "15.6.1"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-15.6.1.tgz#32d43390d5c62c5b6ec486a9bc9c59544de39a08"
-  integrity sha512-7EIraBEyRHEe7CH+Fm1XvgqU6uwZN8Q7jppJGcqjROMT29qhAuuOxYB1uEY5UMYQKEmA5D+5tBnhdaPXSsLONA==
+  version "15.9.0"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-15.9.0.tgz#0b7f6c33ca5618fe329a9d832b478b4964d325a8"
+  integrity sha512-AR1Vq1Ei1GaA5FjKL5PBqblTZsL5M+monvGSZwe6sSIdGiuu7Xr/pNwWJY+0ZQuN8AapD/XMB5IzBAyYRFbocA==
 
 "@types/node@^11.11.6":
   version "11.15.54"
@@ -1660,7 +1664,7 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
-"@typescript-eslint/experimental-utils@4.25.0", "@typescript-eslint/experimental-utils@^4.0.1":
+"@typescript-eslint/experimental-utils@4.25.0":
   version "4.25.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.25.0.tgz#b2febcfa715d2c1806fd5f0335193a6cd270df54"
   integrity sha512-f0doRE76vq7NEEU0tw+ajv6CrmPelw5wLoaghEHkA2dNLFb3T/zJQqGPQ0OYt5XlZaS13MtnN+GTPCuUVg338w==
@@ -1672,6 +1676,18 @@
     eslint-scope "^5.0.0"
     eslint-utils "^2.0.0"
 
+"@typescript-eslint/experimental-utils@^4.0.1":
+  version "4.26.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.26.0.tgz#ba7848b3f088659cdf71bce22454795fc55be99a"
+  integrity sha512-TH2FO2rdDm7AWfAVRB5RSlbUhWxGVuxPNzGT7W65zVfl8H/WeXTk1e69IrcEVsBslrQSTDKQSaJD89hwKrhdkw==
+  dependencies:
+    "@types/json-schema" "^7.0.7"
+    "@typescript-eslint/scope-manager" "4.26.0"
+    "@typescript-eslint/types" "4.26.0"
+    "@typescript-eslint/typescript-estree" "4.26.0"
+    eslint-scope "^5.1.1"
+    eslint-utils "^3.0.0"
+
 "@typescript-eslint/parser@4.25.0":
   version "4.25.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.25.0.tgz#6b2cb6285aa3d55bfb263c650739091b0f19aceb"
@@ -1690,11 +1706,24 @@
     "@typescript-eslint/types" "4.25.0"
     "@typescript-eslint/visitor-keys" "4.25.0"
 
+"@typescript-eslint/scope-manager@4.26.0":
+  version "4.26.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.26.0.tgz#60d1a71df162404e954b9d1c6343ff3bee496194"
+  integrity sha512-G6xB6mMo4xVxwMt5lEsNTz3x4qGDt0NSGmTBNBPJxNsrTXJSm21c6raeYroS2OwQsOyIXqKZv266L/Gln1BWqg==
+  dependencies:
+    "@typescript-eslint/types" "4.26.0"
+    "@typescript-eslint/visitor-keys" "4.26.0"
+
 "@typescript-eslint/types@4.25.0":
   version "4.25.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.25.0.tgz#0e444a5c5e3c22d7ffa5e16e0e60510b3de5af87"
   integrity sha512-+CNINNvl00OkW6wEsi32wU5MhHti2J25TJsJJqgQmJu3B3dYDBcmOxcE5w9cgoM13TrdE/5ND2HoEnBohasxRQ==
 
+"@typescript-eslint/types@4.26.0":
+  version "4.26.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.26.0.tgz#7c6732c0414f0a69595f4f846ebe12616243d546"
+  integrity sha512-rADNgXl1kS/EKnDr3G+m7fB9yeJNnR9kF7xMiXL6mSIWpr3Wg5MhxyfEXy/IlYthsqwBqHOr22boFbf/u6O88A==
+
 "@typescript-eslint/typescript-estree@4.25.0":
   version "4.25.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.25.0.tgz#942e4e25888736bff5b360d9b0b61e013d0cfa25"
@@ -1708,6 +1737,19 @@
     semver "^7.3.2"
     tsutils "^3.17.1"
 
+"@typescript-eslint/typescript-estree@4.26.0":
+  version "4.26.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.26.0.tgz#aea17a40e62dc31c63d5b1bbe9a75783f2ce7109"
+  integrity sha512-GHUgahPcm9GfBuy3TzdsizCcPjKOAauG9xkz9TR8kOdssz2Iz9jRCSQm6+aVFa23d5NcSpo1GdHGSQKe0tlcbg==
+  dependencies:
+    "@typescript-eslint/types" "4.26.0"
+    "@typescript-eslint/visitor-keys" "4.26.0"
+    debug "^4.3.1"
+    globby "^11.0.3"
+    is-glob "^4.0.1"
+    semver "^7.3.5"
+    tsutils "^3.21.0"
+
 "@typescript-eslint/visitor-keys@4.25.0":
   version "4.25.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.25.0.tgz#863e7ed23da4287c5b469b13223255d0fde6aaa7"
@@ -1716,6 +1758,14 @@
     "@typescript-eslint/types" "4.25.0"
     eslint-visitor-keys "^2.0.0"
 
+"@typescript-eslint/visitor-keys@4.26.0":
+  version "4.26.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.26.0.tgz#26d2583169222815be4dcd1da4fe5459bc3bcc23"
+  integrity sha512-cw4j8lH38V1ycGBbF+aFiLUls9Z0Bw8QschP3mkth50BbWzgFS33ISIgBzUMuQ2IdahoEv/rXstr8Zhlz4B1Zg==
+  dependencies:
+    "@typescript-eslint/types" "4.26.0"
+    eslint-visitor-keys "^2.0.0"
+
 "@webassemblyjs/ast@1.11.0":
   version "1.11.0"
   resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.0.tgz#a5aa679efdc9e51707a4207139da57920555961f"
@@ -1894,9 +1944,9 @@ acorn@^7.1.1, acorn@^7.4.0:
   integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
 
 acorn@^8.2.1, acorn@^8.2.4:
-  version "8.2.4"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.2.4.tgz#caba24b08185c3b56e3168e97d15ed17f4d31fd0"
-  integrity sha512-Ibt84YwBDDA890eDiDCEqcbwvHlBvzzDkU2cGBBDDI1QWT12jTiXIOn2CIw5KK4i6N5Z2HUxwYjzriDyqaqqZg==
+  version "8.3.0"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.3.0.tgz#1193f9b96c4e8232f00b11a9edff81b2c8b98b88"
+  integrity sha512-tqPKHZ5CaBJw0Xmy0ZZvLs1qTV+BNFSyvn77ASXkpBNfIRk8ev26fKrD9iLGwGA9zedPao52GSHzq8lyZG0NUw==
 
 add-stream@^1.0.0:
   version "1.0.0"
@@ -2274,13 +2324,13 @@ aws4@^1.8.0:
   resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
   integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
 
-babel-jest@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-27.0.1.tgz#9f1c4571ac17a39e599d1325dcaf53a274261df4"
-  integrity sha512-aWFD7OGQjk3Y8MdZKf1XePlQvHnjMVJQjIq9WKrlAjz9by703kJ45Jxhp26JwnovoW71YYz5etuqRl8wMcIv0w==
+babel-jest@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-27.0.2.tgz#7dc18adb01322acce62c2af76ea2c7cd186ade37"
+  integrity sha512-9OThPl3/IQbo4Yul2vMz4FYwILPQak8XelX4YGowygfHaOl5R5gfjm4iVx4d8aUugkW683t8aq0A74E7b5DU1Q==
   dependencies:
-    "@jest/transform" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/transform" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/babel__core" "^7.1.14"
     babel-plugin-istanbul "^6.0.0"
     babel-preset-jest "^27.0.1"
@@ -2452,7 +2502,7 @@ browser-process-hrtime@^1.0.0:
   resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
   integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
 
-browserslist@^4.14.5:
+browserslist@^4.14.5, browserslist@^4.16.6:
   version "4.16.6"
   resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.6.tgz#d7901277a5a88e554ed305b183ec9b0c08f66fa2"
   integrity sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==
@@ -2502,7 +2552,7 @@ byte-size@^7.0.0:
   resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-7.0.1.tgz#b1daf3386de7ab9d706b941a748dbfc71130dee3"
   integrity sha512-crQdqyCwhokxwV1UyDzLZanhkugAgft7vt0qbbdt60C6Zf3CAiGmtUCylbtYwrU6loOUw3euGrNtW1J651ot1A==
 
-cacache@^15.0.5:
+cacache@^15.0.5, cacache@^15.2.0:
   version "15.2.0"
   resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.2.0.tgz#73af75f77c58e72d8c630a7a2858cb18ef523389"
   integrity sha512-uKoJSHmnrqXgthDFx/IU6ED/5xd+NNGe+Bb+kLZy7Ku4P+BaiWEUflAKPZ7eAzsYGcsAGASJZsybXp+quEcHTw==
@@ -2596,9 +2646,9 @@ camelcase@^6.2.0:
   integrity sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==
 
 caniuse-lite@^1.0.30001219:
-  version "1.0.30001230"
-  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001230.tgz#8135c57459854b2240b57a4a6786044bdc5a9f71"
-  integrity sha512-5yBd5nWCBS+jWKTcHOzXwo5xzcj4ePE/yjtkZyUV1BTUmrBaA9MRGC+e7mxnqXSA90CmCA8L3eKLaSUkt099IQ==
+  version "1.0.30001233"
+  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001233.tgz#b7cb4a377a4b12ed240d2fa5c792951a06e5f2c4"
+  integrity sha512-BmkbxLfStqiPA7IEzQpIk0UFZFf3A4E6fzjPJ6OR+bFC2L8ES9J8zGA/asoi47p8XDVkev+WJo2I2Nc8c/34Yg==
 
 caseless@~0.12.0:
   version "0.12.0"
@@ -2672,9 +2722,9 @@ ci-info@^2.0.0:
   integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ==
 
 ci-info@^3.1.1:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.1.1.tgz#9a32fcefdf7bcdb6f0a7e1c0f8098ec57897b80a"
-  integrity sha512-kdRWLBIJwdsYJWYJFtAFFYxybguqeF91qpZaggjG5Nf8QKdizFG2hjqvaTXbxFIcYbSaD74KpAXv6BSm17DHEQ==
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.2.0.tgz#2876cb948a498797b5236f0095bc057d0dca38b6"
+  integrity sha512-dVqRX7fLUm8J6FgHJ418XuIgDLZDkYcDFTeL6TA2gt5WlIZUQrrH6EZrNClwT/H0FateUsZkGIOPRrLbP+PR9A==
 
 cjs-module-lexer@^1.0.0:
   version "1.2.1"
@@ -3199,7 +3249,7 @@ debug@3.X:
   dependencies:
     ms "^2.1.1"
 
-debug@4, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
+debug@4, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1:
   version "4.3.1"
   resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
   integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
@@ -3362,9 +3412,9 @@ detect-indent@^5.0.0:
   integrity sha1-OHHMCmoALow+Wzz38zYmRnXwa50=
 
 detect-indent@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.0.0.tgz#0abd0f549f69fc6659a254fe96786186b6f528fd"
-  integrity sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/detect-indent/-/detect-indent-6.1.0.tgz#592485ebbbf6b3b1ab2be175c8393d04ca0d57e6"
+  integrity sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==
 
 detect-newline@^2.0.0:
   version "2.1.0"
@@ -3473,9 +3523,9 @@ ecc-jsbn@~0.1.1:
     safer-buffer "^2.1.0"
 
 electron-to-chromium@^1.3.723:
-  version "1.3.738"
-  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.738.tgz#aec24b091c82acbfabbdcce08076a703941d17ca"
-  integrity sha512-vCMf4gDOpEylPSLPLSwAEsz+R3ShP02Y3cAKMZvTqule3XcPp7tgc/0ESI7IS6ZeyBlGClE50N53fIOkcIVnpw==
+  version "1.3.744"
+  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.744.tgz#34e0da7babb325e18b50d3a0214504b12045ca85"
+  integrity sha512-o/vep/PvSXg+7buwCbVJXHY3zbjYVmFPwnMMnchESXgAzrfcasvbX/hQZHCFGG7YdZgdtwt1KTMyK9CyBxPbLA==
 
 emittery@^0.8.1:
   version "0.8.1"
@@ -3539,9 +3589,9 @@ error-ex@^1.2.0, error-ex@^1.3.1:
     is-arrayish "^0.2.1"
 
 es-abstract@^1.18.0-next.2:
-  version "1.18.2"
-  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.2.tgz#6eb518b640262e8ddcbd48e0bc8549f82efd48a7"
-  integrity sha512-byRiNIQXE6HWNySaU6JohoNXzYgbBjztwFnBLUTiJmWXjaU9bSq3urQLUlNLQ292tc+gc07zYZXNZjaOoAX3sw==
+  version "1.18.3"
+  resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.18.3.tgz#25c4c3380a27aa203c44b2b685bba94da31b63e0"
+  integrity sha512-nQIr12dxV7SSxE6r6f1l3DtAeEYdsGpps13dR0TwJg1S8gyp4ZPgy3FZcHBgbiQqnoqSTb+oC+kO4UQ0C/J8vw==
   dependencies:
     call-bind "^1.0.2"
     es-to-primitive "^1.2.1"
@@ -3664,6 +3714,13 @@ eslint-utils@^2.0.0, eslint-utils@^2.1.0:
   dependencies:
     eslint-visitor-keys "^1.1.0"
 
+eslint-utils@^3.0.0:
+  version "3.0.0"
+  resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-3.0.0.tgz#8aebaface7345bb33559db0a1f13a1d2d48c3672"
+  integrity sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==
+  dependencies:
+    eslint-visitor-keys "^2.0.0"
+
 eslint-visitor-keys@^1.1.0, eslint-visitor-keys@^1.3.0:
   version "1.3.0"
   resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz#30ebd1ef7c2fdff01c3a4f151044af25fab0523e"
@@ -3785,9 +3842,9 @@ events@^3.2.0:
   integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==
 
 execa@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-5.0.0.tgz#4029b0007998a841fbd1032e5f4de86a3c1e3376"
-  integrity sha512-ov6w/2LCiuyO4RLYGdpFGjkcs0wMTgGE8PrkTHikeUy5iJekXyPIKUjifk5CsE0pt7sMCrMZ3YNqoCj6idQOnQ==
+  version "5.1.0"
+  resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.0.tgz#3ea50ee863d226bfa323528cce1684e7481dfe46"
+  integrity sha512-CkdUB7s2y6S+d4y+OM/+ZtQcJCiKUCth4cNImGMqrt2zEVtW2rfHGspQBE1GDo6LjeNIQmTPKXqTCKjqFKyu3A==
   dependencies:
     cross-spawn "^7.0.3"
     get-stream "^6.0.0"
@@ -3824,16 +3881,16 @@ expand-tilde@^2.0.0, expand-tilde@^2.0.2:
   dependencies:
     homedir-polyfill "^1.0.1"
 
-expect@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/expect/-/expect-27.0.1.tgz#1290c74fef8d62f15f4c5dd1d7233001909abbfb"
-  integrity sha512-hjKwLeAvKUiq0Plha1dmzOH1FGEwJC9njbT993cq4PK9r58/+3NM+WDqFVGcPuRH7XTjmbIeHQBzp2faDrPhjQ==
+expect@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/expect/-/expect-27.0.2.tgz#e66ca3a4c9592f1c019fa1d46459a9d2084f3422"
+  integrity sha512-YJFNJe2+P2DqH+ZrXy+ydRQYO87oxRUonZImpDodR1G7qo3NYd3pL+NQ9Keqpez3cehczYwZDBC3A7xk3n7M/w==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     ansi-styles "^5.0.0"
     jest-get-type "^27.0.1"
-    jest-matcher-utils "^27.0.1"
-    jest-message-util "^27.0.1"
+    jest-matcher-utils "^27.0.2"
+    jest-message-util "^27.0.2"
     jest-regex-util "^27.0.1"
 
 ext@^1.1.2:
@@ -4433,9 +4490,9 @@ globals@^12.1.0:
     type-fest "^0.8.1"
 
 globals@^13.6.0:
-  version "13.8.0"
-  resolved "https://registry.yarnpkg.com/globals/-/globals-13.8.0.tgz#3e20f504810ce87a8d72e55aecf8435b50f4c1b3"
-  integrity sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==
+  version "13.9.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-13.9.0.tgz#4bf2bf635b334a173fb1daf7c5e6b218ecdc06cb"
+  integrity sha512-74/FduwI/JaIrr1H8e71UbDE+5x7pIPs1C2rrwC52SszOo043CsWOZEMW7o2Y58xwm9b+0RBKDxY5n2sUpEFxA==
   dependencies:
     type-fest "^0.20.2"
 
@@ -4453,7 +4510,7 @@ globby@^10.0.1:
     merge2 "^1.2.3"
     slash "^3.0.0"
 
-globby@^11.0.1, globby@^11.0.2:
+globby@^11.0.1, globby@^11.0.2, globby@^11.0.3:
   version "11.0.3"
   resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.3.tgz#9b1f0cb523e171dd1ad8c7b2a9fb4b644b9593cb"
   integrity sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==
@@ -5327,83 +5384,84 @@ ix@2.5.3:
     "@types/node" "^11.11.6"
     tslib "^1.9.3"
 
-jest-changed-files@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-27.0.1.tgz#b8356b3708cac9d05ebf6f9e0b32227b514945c8"
-  integrity sha512-Y/4AnqYNcUX/vVgfkmvSA3t7rcg+t8m3CsSGlU+ra8kjlVW5ZqXcBZY/NUew2Mo8M+dn0ApKl+FmGGT1JV5dVA==
+jest-changed-files@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-27.0.2.tgz#997253042b4a032950fc5f56abf3c5d1f8560801"
+  integrity sha512-eMeb1Pn7w7x3wue5/vF73LPCJ7DKQuC9wQUR5ebP9hDPpk5hzcT/3Hmz3Q5BOFpR3tgbmaWhJcMTVgC8Z1NuMw==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     execa "^5.0.0"
     throat "^6.0.1"
 
-jest-circus@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-27.0.1.tgz#3a7ec9e9fd60ef4c827197dffe2288aa19f86678"
-  integrity sha512-Tz3ytmrsgxWlTwSyPYb8StF9J2IMjLlbBMKAjhL2UU9/0ZpYb2JiEGjXaAhnGauQRbbpyFbSH3yj5HIbdurmwQ==
+jest-circus@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-27.0.3.tgz#32006967de484e03589da944064d72e172ce3261"
+  integrity sha512-tdMfzs7SgD5l7jRcI1iB3vtQi5fHwCgo4RlO8bzZnYc05PZ+tlAOMZeS8eGYkZ2tPaRY/aRLMFWQp/8zXBrolQ==
   dependencies:
-    "@jest/environment" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/environment" "^27.0.3"
+    "@jest/test-result" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     chalk "^4.0.0"
     co "^4.6.0"
     dedent "^0.7.0"
-    expect "^27.0.1"
+    expect "^27.0.2"
     is-generator-fn "^2.0.0"
-    jest-each "^27.0.1"
-    jest-matcher-utils "^27.0.1"
-    jest-message-util "^27.0.1"
-    jest-runner "^27.0.1"
-    jest-runtime "^27.0.1"
-    jest-snapshot "^27.0.1"
-    jest-util "^27.0.1"
-    pretty-format "^27.0.1"
+    jest-each "^27.0.2"
+    jest-matcher-utils "^27.0.2"
+    jest-message-util "^27.0.2"
+    jest-runtime "^27.0.3"
+    jest-snapshot "^27.0.2"
+    jest-util "^27.0.2"
+    pretty-format "^27.0.2"
+    slash "^3.0.0"
     stack-utils "^2.0.3"
     throat "^6.0.1"
 
 jest-cli@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-27.0.1.tgz#9accc8a505438571ee423438eac526a7ee4654b5"
-  integrity sha512-plDsQQwpkKK1SZ5L5xqMa7v/sTwB5LTIeSJqb+cV+4EMlThdUQfg8jwMfHX8jHuUc9TPGLcdoZeBuZcGGn3Rlg==
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-27.0.3.tgz#b733871acb526054a0f8c971d0466595c5f8316d"
+  integrity sha512-7bt9Sgv4nWH5pUnyJfdLf8CHWfo4+7lSPxeBwQx4r0vBj9jweJam/piE2U91SXtQI+ckm+TIN97OVnqIYpVhSg==
   dependencies:
-    "@jest/core" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/core" "^27.0.3"
+    "@jest/test-result" "^27.0.2"
+    "@jest/types" "^27.0.2"
     chalk "^4.0.0"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
     import-local "^3.0.2"
-    jest-config "^27.0.1"
-    jest-util "^27.0.1"
-    jest-validate "^27.0.1"
+    jest-config "^27.0.3"
+    jest-util "^27.0.2"
+    jest-validate "^27.0.2"
     prompts "^2.0.1"
     yargs "^16.0.3"
 
-jest-config@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-27.0.1.tgz#db4f202efcbb92011f62d8f25b52c3d1bd5672d4"
-  integrity sha512-V8O6+CZjGF0OMq4kxVR29ztV/LQqlAAcJLw7a94RndfRXkha4U84n50yZCXiPWtAHHTmb3g1y52US6rGPxA+3w==
+jest-config@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-27.0.3.tgz#31871583573c6d669dcdb5bb2d1a8738f3b91c20"
+  integrity sha512-zgtI2YQo+ekKsmYNyDlXFY/7w7WWBSJFoj/WRe173WB88CDUrEYWr0sLdbLOQe+sRu6l1Y2S0MCS6BOJm5jkoA==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/test-sequencer" "^27.0.1"
-    "@jest/types" "^27.0.1"
-    babel-jest "^27.0.1"
+    "@jest/test-sequencer" "^27.0.3"
+    "@jest/types" "^27.0.2"
+    babel-jest "^27.0.2"
     chalk "^4.0.0"
     deepmerge "^4.2.2"
     glob "^7.1.1"
     graceful-fs "^4.2.4"
     is-ci "^3.0.0"
-    jest-circus "^27.0.1"
-    jest-environment-jsdom "^27.0.1"
-    jest-environment-node "^27.0.1"
+    jest-circus "^27.0.3"
+    jest-environment-jsdom "^27.0.3"
+    jest-environment-node "^27.0.3"
     jest-get-type "^27.0.1"
-    jest-jasmine2 "^27.0.1"
+    jest-jasmine2 "^27.0.3"
     jest-regex-util "^27.0.1"
-    jest-resolve "^27.0.1"
-    jest-util "^27.0.1"
-    jest-validate "^27.0.1"
+    jest-resolve "^27.0.2"
+    jest-runner "^27.0.3"
+    jest-util "^27.0.2"
+    jest-validate "^27.0.2"
     micromatch "^4.0.4"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
 
 jest-diff@^26.0.0:
   version "26.6.2"
@@ -5415,15 +5473,15 @@ jest-diff@^26.0.0:
     jest-get-type "^26.3.0"
     pretty-format "^26.6.2"
 
-jest-diff@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.0.1.tgz#021beb29fe9f07e83c809a4f7a1ce807b229c4ab"
-  integrity sha512-DQ3OgfJgoGWVTYo4qnYW/Jg5mpYFS2QW9BLxA8bs12ZRN1K8QPZtWeYvUPohQFs3CHX3JLTndGg3jyxdL5THFQ==
+jest-diff@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.0.2.tgz#f315b87cee5dc134cf42c2708ab27375cc3f5a7e"
+  integrity sha512-BFIdRb0LqfV1hBt8crQmw6gGQHVDhM87SpMIZ45FPYKReZYG5er1+5pIn2zKqvrJp6WNox0ylR8571Iwk2Dmgw==
   dependencies:
     chalk "^4.0.0"
     diff-sequences "^27.0.1"
     jest-get-type "^27.0.1"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
 
 jest-docblock@^27.0.1:
   version "27.0.1"
@@ -5432,41 +5490,41 @@ jest-docblock@^27.0.1:
   dependencies:
     detect-newline "^3.0.0"
 
-jest-each@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-27.0.1.tgz#37fa20b7d809b29d4349d8eb7d01f17c2feeab10"
-  integrity sha512-uJTK/aZ05HsdKkfXucAT5+/1DIURnTRv34OSxn1HWHrD+xu9eDX5Xgds09QSvg/mU01VS5upuHTDKG3W+r0rQA==
+jest-each@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-27.0.2.tgz#865ddb4367476ced752167926b656fa0dcecd8c7"
+  integrity sha512-OLMBZBZ6JkoXgUenDtseFRWA43wVl2BwmZYIWQws7eS7pqsIvePqj/jJmEnfq91ALk3LNphgwNK/PRFBYi7ITQ==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     chalk "^4.0.0"
     jest-get-type "^27.0.1"
-    jest-util "^27.0.1"
-    pretty-format "^27.0.1"
+    jest-util "^27.0.2"
+    pretty-format "^27.0.2"
 
-jest-environment-jsdom@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-27.0.1.tgz#12b0ed587fb53e0a581a5101bb209aef09da2310"
-  integrity sha512-lesU8T9zkjgLaLpUFmFDgchu6/2OCoXm52nN6UumR063Hb+1TJdI7ihgM86+G01Ay86Lyr+K/FAR6yIIOviH3Q==
+jest-environment-jsdom@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-27.0.3.tgz#ed73e913ddc03864eb9f934b5cbabf1b63504e2e"
+  integrity sha512-5KLmgv1bhiimpSA8oGTnZYk6g4fsNyZiA/6gI2tAZUgrufd7heRUSVh4gRokzZVEj8zlwAQYT0Zs6tuJSW/ECA==
   dependencies:
-    "@jest/environment" "^27.0.1"
-    "@jest/fake-timers" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/environment" "^27.0.3"
+    "@jest/fake-timers" "^27.0.3"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
-    jest-mock "^27.0.1"
-    jest-util "^27.0.1"
+    jest-mock "^27.0.3"
+    jest-util "^27.0.2"
     jsdom "^16.6.0"
 
-jest-environment-node@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-27.0.1.tgz#7d7df7ae191477a823ffb4fcc0772b4c23ec5c87"
-  integrity sha512-/p94lo0hx+hbKUw1opnRFUPPsjncRBEUU+2Dh7BuxX8Nr4rRiTivLYgXzo79FhaeMYV0uiV5WAbHBq6xC11JJg==
+jest-environment-node@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-27.0.3.tgz#b4acb3679d2552a4215732cab8b0ca7ec4398ee0"
+  integrity sha512-co2/IVnIFL3cItpFULCvXFg9us4gvWXgs7mutAMPCbFhcqh56QAOdKhNzC2+RycsC/k4mbMj1VF+9F/NzA0ROg==
   dependencies:
-    "@jest/environment" "^27.0.1"
-    "@jest/fake-timers" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/environment" "^27.0.3"
+    "@jest/fake-timers" "^27.0.3"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
-    jest-mock "^27.0.1"
-    jest-util "^27.0.1"
+    jest-mock "^27.0.3"
+    jest-util "^27.0.2"
 
 jest-get-type@^26.3.0:
   version "26.3.0"
@@ -5478,12 +5536,12 @@ jest-get-type@^27.0.1:
   resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.0.1.tgz#34951e2b08c8801eb28559d7eb732b04bbcf7815"
   integrity sha512-9Tggo9zZbu0sHKebiAijyt1NM77Z0uO4tuWOxUCujAiSeXv30Vb5D4xVF4UR4YWNapcftj+PbByU54lKD7/xMg==
 
-jest-haste-map@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.0.1.tgz#653c4ba59309a86499ad7bf663176e7f97478191"
-  integrity sha512-ioCuobr4z90H1Pz8+apz2vfz63387apzAoawm/9IIOndarDfRkjLURdLOe//AI5jUQmjVRg+WiL92339kqlCmA==
+jest-haste-map@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.0.2.tgz#3f1819400c671237e48b4d4b76a80a0dbed7577f"
+  integrity sha512-37gYfrYjjhEfk37C4bCMWAC0oPBxDpG0qpl8lYg8BT//wf353YT/fzgA7+Dq0EtM7rPFS3JEcMsxdtDwNMi2cA==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     "@types/graceful-fs" "^4.1.2"
     "@types/node" "*"
     anymatch "^3.0.3"
@@ -5491,76 +5549,76 @@ jest-haste-map@^27.0.1:
     graceful-fs "^4.2.4"
     jest-regex-util "^27.0.1"
     jest-serializer "^27.0.1"
-    jest-util "^27.0.1"
-    jest-worker "^27.0.1"
+    jest-util "^27.0.2"
+    jest-worker "^27.0.2"
     micromatch "^4.0.4"
     walker "^1.0.7"
   optionalDependencies:
     fsevents "^2.3.2"
 
-jest-jasmine2@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-27.0.1.tgz#d975bfe072f3ac3596c0be5fc0a1215fd2e91e77"
-  integrity sha512-o8Ist0o970QDDm/R2o9UDbvNxq8A0++FTFQ0z9OnieJwS1nDH6H7WBDYAGPTdmnla7kbW41oLFPvhmjJE4mekg==
+jest-jasmine2@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-27.0.3.tgz#fa6f6499566ea1b01b68b3ad13f49d1592b02c85"
+  integrity sha512-odJ2ia8P5c+IsqOcWJPmku4AqbXIfTVLRjYTKHri3TEvbmTdLw0ghy13OAPIl/0v7cVH0TURK7+xFOHKDLvKIA==
   dependencies:
     "@babel/traverse" "^7.1.0"
-    "@jest/environment" "^27.0.1"
+    "@jest/environment" "^27.0.3"
     "@jest/source-map" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/test-result" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     chalk "^4.0.0"
     co "^4.6.0"
-    expect "^27.0.1"
+    expect "^27.0.2"
     is-generator-fn "^2.0.0"
-    jest-each "^27.0.1"
-    jest-matcher-utils "^27.0.1"
-    jest-message-util "^27.0.1"
-    jest-runtime "^27.0.1"
-    jest-snapshot "^27.0.1"
-    jest-util "^27.0.1"
-    pretty-format "^27.0.1"
+    jest-each "^27.0.2"
+    jest-matcher-utils "^27.0.2"
+    jest-message-util "^27.0.2"
+    jest-runtime "^27.0.3"
+    jest-snapshot "^27.0.2"
+    jest-util "^27.0.2"
+    pretty-format "^27.0.2"
     throat "^6.0.1"
 
-jest-leak-detector@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-27.0.1.tgz#eedeaee7c0ab553db4d8908f74967329624342b9"
-  integrity sha512-SQ/lRhfmnV3UuiaKIjwNXCaW2yh1rTMAL4n4Cl4I4gU0X2LoIc6Ogxe4UKM/J6Ld2uzc4gDGVYc5lSdpf6WjYw==
+jest-leak-detector@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-27.0.2.tgz#ce19aa9dbcf7a72a9d58907a970427506f624e69"
+  integrity sha512-TZA3DmCOfe8YZFIMD1GxFqXUkQnIoOGQyy4hFCA2mlHtnAaf+FeOMxi0fZmfB41ZL+QbFG6BVaZF5IeFIVy53Q==
   dependencies:
     jest-get-type "^27.0.1"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
 
-jest-matcher-utils@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.0.1.tgz#7a01330786e370f152b0b0159f827293b6322909"
-  integrity sha512-NauNU+olKhPzLlsRnTOYFGk/MK5QFYl9ZzkrtfsY4eCq4SB3Bcl03UL44VdnlN5S/uFn4H2jwvRY1y6nSDTX3g==
+jest-matcher-utils@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.0.2.tgz#f14c060605a95a466cdc759acc546c6f4cbfc4f0"
+  integrity sha512-Qczi5xnTNjkhcIB0Yy75Txt+Ez51xdhOxsukN7awzq2auZQGPHcQrJ623PZj0ECDEMOk2soxWx05EXdXGd1CbA==
   dependencies:
     chalk "^4.0.0"
-    jest-diff "^27.0.1"
+    jest-diff "^27.0.2"
     jest-get-type "^27.0.1"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
 
-jest-message-util@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-27.0.1.tgz#382b7c55d8e0b1aba9eeb41d3cfdd34e451210ed"
-  integrity sha512-w8BfON2GwWORkos8BsxcwwQrLkV2s1ENxSRXK43+6yuquDE2hVxES/jrFqOArpP1ETVqqMmktU6iGkG8ncVzeA==
+jest-message-util@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-27.0.2.tgz#181c9b67dff504d8f4ad15cba10d8b80f272048c"
+  integrity sha512-rTqWUX42ec2LdMkoUPOzrEd1Tcm+R1KfLOmFK+OVNo4MnLsEaxO5zPDb2BbdSmthdM/IfXxOZU60P/WbWF8BTw==
   dependencies:
     "@babel/code-frame" "^7.12.13"
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     "@types/stack-utils" "^2.0.0"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
     micromatch "^4.0.4"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
     slash "^3.0.0"
     stack-utils "^2.0.3"
 
-jest-mock@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-27.0.1.tgz#8394e297bc3dfed980961622cb51fd042b4acf5a"
-  integrity sha512-fXCSZQDT5hUcAUy8OBnB018x7JFOMQnz4XfpSKEbfpWzL6o5qaLRhgf2Qg2NPuVKmC/fgOf33Edj8wjF4I24CQ==
+jest-mock@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-27.0.3.tgz#5591844f9192b3335c0dca38e8e45ed297d4d23d"
+  integrity sha512-O5FZn5XDzEp+Xg28mUz4ovVcdwBBPfAhW9+zJLO0Efn2qNbYcDaJvSlRiQ6BCZUCVOJjALicuJQI9mRFjv1o9Q==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
 
 jest-pnp-resolver@^1.2.2:
@@ -5573,69 +5631,69 @@ jest-regex-util@^27.0.1:
   resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.0.1.tgz#69d4b1bf5b690faa3490113c47486ed85dd45b68"
   integrity sha512-6nY6QVcpTgEKQy1L41P4pr3aOddneK17kn3HJw6SdwGiKfgCGTvH02hVXL0GU8GEKtPH83eD2DIDgxHXOxVohQ==
 
-jest-resolve-dependencies@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-27.0.1.tgz#3dcaeb277e0253747706467e8f05e1e78a1d534d"
-  integrity sha512-ly1x5mEf21f3IVWbUNwIz/ePLtv4QdhYuQIVSVDqxx7yzAwhhdu0DJo7UNiEYKQY7Im48wfbNdOUpo7euFUXBQ==
+jest-resolve-dependencies@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-27.0.3.tgz#7e258f7d0458bb910855f8a50f5c1e9d92c319dc"
+  integrity sha512-HdjWOvFAgT5CYChF2eiBN2rRKicjaTCCtA3EtH47REIdGzEHGUhYrWYgLahXsiOovvWN6edhcHL5WCa3gbc04A==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     jest-regex-util "^27.0.1"
-    jest-snapshot "^27.0.1"
+    jest-snapshot "^27.0.2"
 
-jest-resolve@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-27.0.1.tgz#4e1b76f61c7e2213d2fbd37342800864309de538"
-  integrity sha512-Q7QQ0OZ7z6D5Dul0MrsexlKalU8ZwexBfHLSu1qYPgphvUm6WO1b/xUnipU3e+uW1riDzMcJeJVYbdQ37hBHeg==
+jest-resolve@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-27.0.2.tgz#087a3ed17182722a3415f92bfacc99c49cf8a965"
+  integrity sha512-rmfLGyZhwAUR5z3EwPAW7LQTorWAuCYCcsQJoQxT2it+BOgX3zKxa67r1pfpK3ihy2k9TjYD3/lMp5rPm/CL1Q==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     chalk "^4.0.0"
     escalade "^3.1.1"
     graceful-fs "^4.2.4"
     jest-pnp-resolver "^1.2.2"
-    jest-util "^27.0.1"
+    jest-util "^27.0.2"
+    jest-validate "^27.0.2"
     resolve "^1.20.0"
     slash "^3.0.0"
 
-jest-runner@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-27.0.1.tgz#52137173fbf318b7b1f034b81200c2846758f681"
-  integrity sha512-DUNizlD2D7J80G3VOrwfbtb7KYxiftMng82HNcKwTW0W3AwwNuBeq+1exoCnLO7Mxh7NP+k/1XQBlzLpjr/CnA==
-  dependencies:
-    "@jest/console" "^27.0.1"
-    "@jest/environment" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/transform" "^27.0.1"
-    "@jest/types" "^27.0.1"
+jest-runner@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-27.0.3.tgz#d9747af3bee5a6ffaeb9e10b653263b780258b54"
+  integrity sha512-zH23uIIh1ro1JCD7XX1bQ0bQwXEsBzLX2UJVE/AVLsk4YJRmTfyXIzzRzBWRdnMHHg1NWkJ4fGs7eFP15IqZpQ==
+  dependencies:
+    "@jest/console" "^27.0.2"
+    "@jest/environment" "^27.0.3"
+    "@jest/test-result" "^27.0.2"
+    "@jest/transform" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     chalk "^4.0.0"
     emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-config "^27.0.1"
     jest-docblock "^27.0.1"
-    jest-haste-map "^27.0.1"
-    jest-leak-detector "^27.0.1"
-    jest-message-util "^27.0.1"
-    jest-resolve "^27.0.1"
-    jest-runtime "^27.0.1"
-    jest-util "^27.0.1"
-    jest-worker "^27.0.1"
+    jest-haste-map "^27.0.2"
+    jest-leak-detector "^27.0.2"
+    jest-message-util "^27.0.2"
+    jest-resolve "^27.0.2"
+    jest-runtime "^27.0.3"
+    jest-util "^27.0.2"
+    jest-worker "^27.0.2"
     source-map-support "^0.5.6"
     throat "^6.0.1"
 
-jest-runtime@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-27.0.1.tgz#b71bb8ea189c50525aebb4aba6c524633ca27659"
-  integrity sha512-ImcrbQtpCUp8X9Rm4ky3j1GG9cqIKZJvXGZyB5cHEapGPTmg7wvvNooLmKragEe61/p/bhw1qO68Y0/9BSsBBg==
+jest-runtime@^27.0.3:
+  version "27.0.3"
+  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-27.0.3.tgz#32499c1047e5d953cfbb67fe790ab0167a614d28"
+  integrity sha512-k1Hl2pWWHBkSXdCggX2lyLRuDnnnmMlnJd+DPLb8LmmAeHW87WgGC6TplD377VxY3KQu73sklkhGUIdwFgsRVQ==
   dependencies:
-    "@jest/console" "^27.0.1"
-    "@jest/environment" "^27.0.1"
-    "@jest/fake-timers" "^27.0.1"
-    "@jest/globals" "^27.0.1"
+    "@jest/console" "^27.0.2"
+    "@jest/environment" "^27.0.3"
+    "@jest/fake-timers" "^27.0.3"
+    "@jest/globals" "^27.0.3"
     "@jest/source-map" "^27.0.1"
-    "@jest/test-result" "^27.0.1"
-    "@jest/transform" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/test-result" "^27.0.2"
+    "@jest/transform" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/yargs" "^16.0.0"
     chalk "^4.0.0"
     cjs-module-lexer "^1.0.0"
@@ -5643,14 +5701,14 @@ jest-runtime@^27.0.1:
     exit "^0.1.2"
     glob "^7.1.3"
     graceful-fs "^4.2.4"
-    jest-haste-map "^27.0.1"
-    jest-message-util "^27.0.1"
-    jest-mock "^27.0.1"
+    jest-haste-map "^27.0.2"
+    jest-message-util "^27.0.2"
+    jest-mock "^27.0.3"
     jest-regex-util "^27.0.1"
-    jest-resolve "^27.0.1"
-    jest-snapshot "^27.0.1"
-    jest-util "^27.0.1"
-    jest-validate "^27.0.1"
+    jest-resolve "^27.0.2"
+    jest-snapshot "^27.0.2"
+    jest-util "^27.0.2"
+    jest-validate "^27.0.2"
     slash "^3.0.0"
     strip-bom "^4.0.0"
     yargs "^16.0.3"
@@ -5671,10 +5729,10 @@ jest-silent-reporter@0.5.0:
     chalk "^4.0.0"
     jest-util "^26.0.0"
 
-jest-snapshot@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-27.0.1.tgz#01a82d901f260604908373795c9255b032d2a07a"
-  integrity sha512-HgKmSebDB3rswugREeh+nKrxJEVZE12K7lZ2MuwfFZT6YmiH0TlofsL2YmiLsCsG5KH5ZcLYYpF5bDrvtVx/Xg==
+jest-snapshot@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-27.0.2.tgz#40c48dc6afd3cbc5d3d07c061f20fc10d94ca0cd"
+  integrity sha512-4RcgvZbPrrbEE/hT6XQ4hr+NVVLNrmsgUnYSnZRT6UAvW9Q2yzGMS+tfJh+xlQJAapnnkNJzsMn6vUa+yfiVHA==
   dependencies:
     "@babel/core" "^7.7.2"
     "@babel/generator" "^7.7.2"
@@ -5682,23 +5740,23 @@ jest-snapshot@^27.0.1:
     "@babel/plugin-syntax-typescript" "^7.7.2"
     "@babel/traverse" "^7.7.2"
     "@babel/types" "^7.0.0"
-    "@jest/transform" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/transform" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/babel__traverse" "^7.0.4"
     "@types/prettier" "^2.1.5"
     babel-preset-current-node-syntax "^1.0.0"
     chalk "^4.0.0"
-    expect "^27.0.1"
+    expect "^27.0.2"
     graceful-fs "^4.2.4"
-    jest-diff "^27.0.1"
+    jest-diff "^27.0.2"
     jest-get-type "^27.0.1"
-    jest-haste-map "^27.0.1"
-    jest-matcher-utils "^27.0.1"
-    jest-message-util "^27.0.1"
-    jest-resolve "^27.0.1"
-    jest-util "^27.0.1"
+    jest-haste-map "^27.0.2"
+    jest-matcher-utils "^27.0.2"
+    jest-message-util "^27.0.2"
+    jest-resolve "^27.0.2"
+    jest-util "^27.0.2"
     natural-compare "^1.4.0"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
     semver "^7.3.2"
 
 jest-util@^26.0.0:
@@ -5713,56 +5771,47 @@ jest-util@^26.0.0:
     is-ci "^2.0.0"
     micromatch "^4.0.2"
 
-jest-util@^27.0.0, jest-util@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.1.tgz#324ed9879d129c1e64f9169a739d6d50d7928769"
-  integrity sha512-lEw3waSmEOO4ZkwkUlFSvg4es1+8+LIkSGxp/kF60K0+vMR3Dv3O2HMZhcln9NHqSQzpVbsDT6OeMzUPW7DfRg==
+jest-util@^27.0.0, jest-util@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.2.tgz#fc2c7ace3c75ae561cf1e5fdb643bf685a5be7c7"
+  integrity sha512-1d9uH3a00OFGGWSibpNYr+jojZ6AckOMCXV2Z4K3YXDnzpkAaXQyIpY14FOJPiUmil7CD+A6Qs+lnnh6ctRbIA==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
     is-ci "^3.0.0"
     picomatch "^2.2.3"
 
-jest-validate@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-27.0.1.tgz#8e43428674b6097f8ee3abe42c4248a4826cd008"
-  integrity sha512-zvmPRcfTkqTZuHveIKAI2nbkUc3SDXjWVJULknPLGF5bdxOGSeGZg7f/Uw0MUVOkCOaspcHnsPCgZG0pqmg71g==
+jest-validate@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-27.0.2.tgz#7fe2c100089449cd5cbb47a5b0b6cb7cda5beee5"
+  integrity sha512-UgBF6/oVu1ofd1XbaSotXKihi8nZhg0Prm8twQ9uCuAfo59vlxCXMPI/RKmrZEVgi3Nd9dS0I8A0wzWU48pOvg==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     camelcase "^6.2.0"
     chalk "^4.0.0"
     jest-get-type "^27.0.1"
     leven "^3.1.0"
-    pretty-format "^27.0.1"
+    pretty-format "^27.0.2"
 
-jest-watcher@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-27.0.1.tgz#61b9403d7b498161f6aa6124602363525ac3efc2"
-  integrity sha512-Chp9c02BN0IgEbtGreyAhGqIsOrn9a0XnzbuXOxdW1+cW0Tjh12hMzHDIdLFHpYP/TqaMTmPHaJ5KWvpCCrNFw==
+jest-watcher@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-27.0.2.tgz#dab5f9443e2d7f52597186480731a8c6335c5deb"
+  integrity sha512-8nuf0PGuTxWj/Ytfw5fyvNn/R80iXY8QhIT0ofyImUvdnoaBdT6kob0GmhXR+wO+ALYVnh8bQxN4Tjfez0JgkA==
   dependencies:
-    "@jest/test-result" "^27.0.1"
-    "@jest/types" "^27.0.1"
+    "@jest/test-result" "^27.0.2"
+    "@jest/types" "^27.0.2"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
-    jest-util "^27.0.1"
+    jest-util "^27.0.2"
     string-length "^4.0.1"
 
-jest-worker@^26.6.2:
-  version "26.6.2"
-  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-26.6.2.tgz#7f72cbc4d643c365e27b9fd775f9d0eaa9c7a8ed"
-  integrity sha512-KWYVV1c4i+jbMpaBC+U++4Va0cp8OisU185o73T1vo99hqi7w8tSJfUXYswwqqrjzwxa6KpRK54WhPvwf5w6PQ==
-  dependencies:
-    "@types/node" "*"
-    merge-stream "^2.0.0"
-    supports-color "^7.0.0"
-
-jest-worker@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.0.1.tgz#b255fcbb40fb467295010c628474b1185cab4f9e"
-  integrity sha512-NhHqClI3owOjmS8dBhQMKHZ2rrT0sBTpqGitp9nMX5AAjVXd+15o4v96uBEMhoywaLKN+5opcKBlXwAoADZolA==
+jest-worker@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.0.2.tgz#4ebeb56cef48b3e7514552f80d0d80c0129f0b05"
+  integrity sha512-EoBdilOTTyOgmHXtw/cPc+ZrCA0KJMrkXzkrPGNwLmnvvlN1nj7MPrxpT7m+otSv2e1TLaVffzDnE/LB14zJMg==
   dependencies:
     "@types/node" "*"
     merge-stream "^2.0.0"
@@ -6032,23 +6081,23 @@ levn@~0.3.0:
     type-check "~0.3.2"
 
 libnpmaccess@^4.0.1:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/libnpmaccess/-/libnpmaccess-4.0.2.tgz#781832fb7ccb867b26343a75a85ad9c43e50406e"
-  integrity sha512-avXtJibZuGap0/qADDYqb9zdpgzVu/yG5+tl2sTRa7MCkDNv2ZlGwCYI0r6/+tmqXPj0iB9fKexHz426vB326w==
+  version "4.0.3"
+  resolved "https://registry.yarnpkg.com/libnpmaccess/-/libnpmaccess-4.0.3.tgz#dfb0e5b0a53c315a2610d300e46b4ddeb66e7eec"
+  integrity sha512-sPeTSNImksm8O2b6/pf3ikv4N567ERYEpeKRPSmqlNt1dTZbvgpJIzg5vAhXHpw2ISBsELFRelk0jEahj1c6nQ==
   dependencies:
     aproba "^2.0.0"
     minipass "^3.1.1"
     npm-package-arg "^8.1.2"
-    npm-registry-fetch "^10.0.0"
+    npm-registry-fetch "^11.0.0"
 
 libnpmpublish@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/libnpmpublish/-/libnpmpublish-4.0.1.tgz#08ca2cbb5d7f6be1ce4f3f9c49b3822682bcf166"
-  integrity sha512-hZCrZ8v4G9YH3DxpIyBdob25ijD5v5LNzRbwsej4pPDopjdcLLj1Widl+BUeFa7D0ble1JYL4F3owjLJqiA8yA==
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/libnpmpublish/-/libnpmpublish-4.0.2.tgz#be77e8bf5956131bcb45e3caa6b96a842dec0794"
+  integrity sha512-+AD7A2zbVeGRCFI2aO//oUmapCwy7GHqPXFJh3qpToSRNU+tXKJ2YFUgjt04LPPAf2dlEH95s6EhIHM1J7bmOw==
   dependencies:
     normalize-package-data "^3.0.2"
     npm-package-arg "^8.1.2"
-    npm-registry-fetch "^10.0.0"
+    npm-registry-fetch "^11.0.0"
     semver "^7.1.3"
     ssri "^8.0.1"
 
@@ -6262,6 +6311,28 @@ make-fetch-happen@^8.0.9:
     socks-proxy-agent "^5.0.0"
     ssri "^8.0.0"
 
+make-fetch-happen@^9.0.1:
+  version "9.0.1"
+  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-9.0.1.tgz#77d0e8b8ed7d387be7f137b76621fd904e4e10df"
+  integrity sha512-c2IxuRxsPKpW9ftCUnsbbAD3rBZNGsuRNwexAbWI8Eh9jlEVPrxZYK5ffgYRAVTQBegqrqR3DlWrsvvLhi4xQA==
+  dependencies:
+    agentkeepalive "^4.1.3"
+    cacache "^15.2.0"
+    http-cache-semantics "^4.1.0"
+    http-proxy-agent "^4.0.1"
+    https-proxy-agent "^5.0.0"
+    is-lambda "^1.0.1"
+    lru-cache "^6.0.0"
+    minipass "^3.1.3"
+    minipass-collect "^1.0.2"
+    minipass-fetch "^1.3.2"
+    minipass-flush "^1.0.5"
+    minipass-pipeline "^1.2.4"
+    negotiator "^0.6.2"
+    promise-retry "^2.0.1"
+    socks-proxy-agent "^5.0.0"
+    ssri "^8.0.0"
+
 make-iterator@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/make-iterator/-/make-iterator-1.0.1.tgz#29b33f312aa8f547c4a5e490f56afcec99133ad6"
@@ -6299,9 +6370,9 @@ map-visit@^1.0.0:
     object-visit "^1.0.0"
 
 marked@^2.0.3:
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.5.tgz#2d15c759b9497b0e7b5b57f4c2edabe1002ef9e7"
-  integrity sha512-yfCEUXmKhBPLOzEC7c+tc4XZdIeTdGoRCZakFMkCxodr7wDXqoapIME4wjcpBPJLNyUnKJ3e8rb8wlAgnLnaDw==
+  version "2.0.7"
+  resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.7.tgz#bc5b857a09071b48ce82a1f7304913a993d4b7d1"
+  integrity sha512-BJXxkuIfJchcXOJWTT2DOL+yFWifFv2yGYOUzvXg8Qz610QKw+sHCvTMYwA+qWGhlA2uivBezChZ/pBy1tWdkQ==
 
 matchdep@^2.0.0:
   version "2.0.0"
@@ -6431,17 +6502,17 @@ micromatch@^4.0.2, micromatch@^4.0.4:
     braces "^3.0.1"
     picomatch "^2.2.3"
 
-mime-db@1.47.0:
-  version "1.47.0"
-  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.47.0.tgz#8cb313e59965d3c05cfbf898915a267af46a335c"
-  integrity sha512-QBmA/G2y+IfeS4oktet3qRZ+P5kPhCKRXxXnQEudYqUaEioAU1/Lq2us3D/t1Jfo4hE9REQPrbB7K5sOczJVIw==
+mime-db@1.48.0:
+  version "1.48.0"
+  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.48.0.tgz#e35b31045dd7eada3aaad537ed88a33afbef2d1d"
+  integrity sha512-FM3QwxV+TnZYQ2aRqhlKBMHxk10lTbMt3bBkMAp54ddrNeVSfcQYOOKuGuy3Ddrm38I04If834fOUSq1yzslJQ==
 
 mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.19:
-  version "2.1.30"
-  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.30.tgz#6e7be8b4c479825f85ed6326695db73f9305d62d"
-  integrity sha512-crmjA4bLtR8m9qLpHvgxSChT+XoSlZi8J4n/aIdn3z92e/U47Z0V/yl+Wh9W046GgFVAmoNR/fmdbZYcSSIUeg==
+  version "2.1.31"
+  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.31.tgz#a00d76b74317c61f9c2db2218b8e9f8e9c5c9e6b"
+  integrity sha512-XGZnNzm3QvgKxa8dpzyhFTHmpP3l5YNusmne07VUOXxou9CqUqYa/HBy124RqtVh/O2pECas/MOcsDgpilPOPg==
   dependencies:
-    mime-db "1.47.0"
+    mime-db "1.48.0"
 
 mimic-fn@^2.1.0:
   version "2.1.0"
@@ -6656,6 +6727,11 @@ natural-compare@^1.4.0:
   resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
   integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=
 
+negotiator@^0.6.2:
+  version "0.6.2"
+  resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
+  integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
+
 neo-async@^2.6.0, neo-async@^2.6.2:
   version "2.6.2"
   resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
@@ -6781,10 +6857,10 @@ normalize-path@^3.0.0:
   resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
   integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
 
-normalize-url@^3.3.0:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-3.3.0.tgz#b2e1c4dc4f7c6d57743df733a4f5978d18650559"
-  integrity sha512-U+JJi7duF1o+u2pynbp2zXDW2/PADgC30f0GsHZtRh+HOcXHnw137TrNlyxxRvWW5fjKd3bcLHPxofWuCjaeZg==
+normalize-url@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.0.1.tgz#a4f27f58cf8c7b287b440b8a8201f42d0b00d256"
+  integrity sha512-VU4pzAuh7Kip71XEmO9aNREYAdMHFGTVj/i+CaTImS8x0i1d3jUZkXhqluy/PRgjPLMgsLQulYY3PJ/aSbSjpQ==
 
 now-and-later@^2.0.0:
   version "2.0.1"
@@ -6827,9 +6903,9 @@ npm-normalize-package-bin@^1.0.0, npm-normalize-package-bin@^1.0.1:
   integrity sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==
 
 npm-package-arg@^8.0.0, npm-package-arg@^8.0.1, npm-package-arg@^8.1.0, npm-package-arg@^8.1.2:
-  version "8.1.2"
-  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.2.tgz#b868016ae7de5619e729993fbd8d11dc3c52ab62"
-  integrity sha512-6Eem455JsSMJY6Kpd3EyWE+n5hC+g9bSyHr9K9U2zqZb7+02+hObQ2c0+8iDk/mNF+8r1MhY44WypKJAkySIYA==
+  version "8.1.4"
+  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.4.tgz#8001cdbc4363997b8ef6c6cf7aaf543c5805879d"
+  integrity sha512-xLokoCFqj/rPdr3LvcdDL6Kj6ipXGEDHD/QGpzwU6/pibYUOXmp5DBmg76yukFyx4ZDbrXNOTn+BPyd8TD4Jlw==
   dependencies:
     hosted-git-info "^4.0.1"
     semver "^7.3.4"
@@ -6855,13 +6931,12 @@ npm-pick-manifest@^6.0.0, npm-pick-manifest@^6.1.1:
     npm-package-arg "^8.1.2"
     semver "^7.3.4"
 
-npm-registry-fetch@^10.0.0:
-  version "10.1.2"
-  resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-10.1.2.tgz#11ffe03d813c653e768bdf762cfc5f1afe91b8bd"
-  integrity sha512-KsM/TdPmntqgBFlfsbkOLkkE9ovZo7VpVcd+/eTdYszCrgy5zFl5JzWm+OxavFaEWlbkirpkou+ZYI00RmOBFA==
+npm-registry-fetch@^11.0.0:
+  version "11.0.0"
+  resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-11.0.0.tgz#68c1bb810c46542760d62a6a965f85a702d43a76"
+  integrity sha512-jmlgSxoDNuhAtxUIG6pVwwtz840i994dL14FoNVZisrmZW5kWd63IUTNv1m/hyRSGSqWjCUp/YZlS1BJyNp9XA==
   dependencies:
-    lru-cache "^6.0.0"
-    make-fetch-happen "^8.0.9"
+    make-fetch-happen "^9.0.1"
     minipass "^3.1.3"
     minipass-fetch "^1.3.0"
     minipass-json-stream "^1.0.1"
@@ -7222,9 +7297,9 @@ p-waterfall@^2.1.1:
     p-reduce "^2.0.0"
 
 pacote@^11.2.6:
-  version "11.3.3"
-  resolved "https://registry.yarnpkg.com/pacote/-/pacote-11.3.3.tgz#d7d6091464f77c09691699df2ded13ab906b3e68"
-  integrity sha512-GQxBX+UcVZrrJRYMK2HoG+gPeSUX/rQhnbPkkGrCYa4n2F/bgClFPaMm0nsdnYrxnmUy85uMHoFXZ0jTD0drew==
+  version "11.3.4"
+  resolved "https://registry.yarnpkg.com/pacote/-/pacote-11.3.4.tgz#c290b790a5cee3082bb8fa223f3f3e2fdf3d0bfc"
+  integrity sha512-RfahPCunM9GI7ryJV/zY0bWQiokZyLqaSNHXtbNSoLb7bwTvBbJBEyCJ01KWs4j1Gj7GmX8crYXQ1sNX6P2VKA==
   dependencies:
     "@npmcli/git" "^2.0.1"
     "@npmcli/installed-package-contents" "^1.0.6"
@@ -7239,7 +7314,7 @@ pacote@^11.2.6:
     npm-package-arg "^8.0.1"
     npm-packlist "^2.1.4"
     npm-pick-manifest "^6.0.0"
-    npm-registry-fetch "^10.0.0"
+    npm-registry-fetch "^11.0.0"
     promise-retry "^2.0.1"
     read-package-json-fast "^2.0.1"
     rimraf "^3.0.2"
@@ -7320,12 +7395,12 @@ parse-path@^4.0.0:
     query-string "^6.13.8"
 
 parse-url@^5.0.0:
-  version "5.0.2"
-  resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-5.0.2.tgz#856a3be1fcdf78dc93fc8b3791f169072d898b59"
-  integrity sha512-Czj+GIit4cdWtxo3ISZCvLiUjErSo0iI3wJ+q9Oi3QuMYTI6OZu+7cewMWZ+C1YAnKhYTk6/TLuhIgCypLthPA==
+  version "5.0.3"
+  resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-5.0.3.tgz#c158560f14cb1560917e0b7fd8b01adc1e9d3cab"
+  integrity sha512-nrLCVMJpqo12X8uUJT4GJPd5AFaTOrGx/QpJy3HNcVtq0AZSstVIsnxS5fqNPuoqMUs3MyfBoOP6Zvu2Arok5A==
   dependencies:
     is-ssh "^1.3.0"
-    normalize-url "^3.3.0"
+    normalize-url "^6.0.1"
     parse-path "^4.0.0"
     protocols "^1.4.0"
 
@@ -7529,12 +7604,12 @@ pretty-format@^26.0.0, pretty-format@^26.6.2:
     ansi-styles "^4.0.0"
     react-is "^17.0.1"
 
-pretty-format@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.0.1.tgz#c4094621dfbd3e8ab751964d1cf01edc6f88474d"
-  integrity sha512-qE+0J6c/gd+R6XTcQgPJMc5hMJNsxzSF5p8iZSbMZ7GQzYGlSLNkh2P80Wa2dbF4gEVUsJEgcrBY+1L2/j265w==
+pretty-format@^27.0.2:
+  version "27.0.2"
+  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.0.2.tgz#9283ff8c4f581b186b2d4da461617143dca478a4"
+  integrity sha512-mXKbbBPnYTG7Yra9qFBtqj+IXcsvxsvOBco3QHxtxTl+hHKq6QdzMZ+q0CtL4ORHZgwGImRr2XZUX2EWzORxig==
   dependencies:
-    "@jest/types" "^27.0.1"
+    "@jest/types" "^27.0.2"
     ansi-regex "^5.0.0"
     ansi-styles "^5.0.0"
     react-is "^17.0.1"
@@ -8791,11 +8866,11 @@ terminal-link@^2.0.0:
     supports-hyperlinks "^2.0.0"
 
 terser-webpack-plugin@^5.1.1:
-  version "5.1.2"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.1.2.tgz#51d295eb7cc56785a67a372575fdc46e42d5c20c"
-  integrity sha512-6QhDaAiVHIQr5Ab3XUWZyDmrIPCHMiqJVljMF91YKyqwKkL5QHnYMkrMBy96v9Z7ev1hGhSEw1HQZc2p/s5Z8Q==
+  version "5.1.3"
+  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.1.3.tgz#30033e955ca28b55664f1e4b30a1347e61aa23af"
+  integrity sha512-cxGbMqr6+A2hrIB5ehFIF+F/iST5ZOxvOmy9zih9ySbP1C2oEWQSOUS+2SNBTjzx5xLKO4xnod9eywdfq1Nb9A==
   dependencies:
-    jest-worker "^26.6.2"
+    jest-worker "^27.0.2"
     p-limit "^3.1.0"
     schema-utils "^3.0.0"
     serialize-javascript "^5.0.1"
@@ -8978,9 +9053,9 @@ trim-newlines@^1.0.0:
   integrity sha1-WIeWa7WCpFA6QetST301ARgVphM=
 
 trim-newlines@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.0.tgz#79726304a6a898aa8373427298d54c2ee8b1cb30"
-  integrity sha512-C4+gOpvmxaSMKuEf9Qc134F1ZuOHVXKRbtEflf4NTtuuJDEIJ9p5PXsalL8SkeRw+qit1Mo+yuvMPAKwWg/1hA==
+  version "3.0.1"
+  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144"
+  integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw==
 
 trim-off-newlines@^1.0.0:
   version "1.0.1"
@@ -9029,7 +9104,7 @@ tslib@^2.2.0:
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.2.0.tgz#fb2c475977e35e241311ede2693cee1ec6698f5c"
   integrity sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==
 
-tsutils@^3.17.1:
+tsutils@^3.17.1, tsutils@^3.21.0:
   version "3.21.0"
   resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
   integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==
@@ -9162,9 +9237,9 @@ typical@^5.2.0:
   integrity sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==
 
 uglify-js@^3.1.4:
-  version "3.13.7"
-  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.7.tgz#25468a3b39b1c875df03f0937b2b7036a93f3fee"
-  integrity sha512-1Psi2MmnZJbnEsgJJIlfnd7tFlJfitusmR7zDI8lXlFI0ACD4/Rm/xdrU8bh6zF0i74aiVoBtkRiFulkrmh3AA==
+  version "3.13.8"
+  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.8.tgz#7c2f9f2553f611f3ff592bdc19c6fb208dc60afb"
+  integrity sha512-PvFLMFIQHfIjFFlvAch69U2IvIxK9TNzNWt1SxZGp9JZ/v70yvqIQuiJeVPPtUMOzoNt+aNRDk4wgxb34wvEqA==
 
 uid-number@0.0.6:
   version "0.0.6"
@@ -9478,9 +9553,9 @@ webidl-conversions@^6.1.0:
   integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
 
 webpack-sources@^2.1.1:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-2.2.0.tgz#058926f39e3d443193b6c31547229806ffd02bac"
-  integrity sha512-bQsA24JLwcnWGArOKUxYKhX3Mz/nK1Xf6hxullKERyktjNMC4x8koOeaDNTA2fEJ09BdWLbM/iTW0ithREUP0w==
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-2.3.0.tgz#9ed2de69b25143a4c18847586ad9eccb19278cfa"
+  integrity sha512-WyOdtwSvOML1kbgtXbTDnEW0jkJ7hZr/bDByIwszhWd/4XX1A3XMkrbFMsuH4+/MfLlZCUzlAdg4r7jaGKEIgQ==
   dependencies:
     source-list-map "^2.0.1"
     source-map "^0.6.1"

From 54c0c090f470eb1ee9765854f0fdd62d22653974 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 3 Jun 2021 14:32:13 +0200
Subject: [PATCH 337/719] ARROW-12919: [Dev][Archery] Crossbow comment bot
 failing to react to comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10440 from jonkeane/ARROW-12919

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 5ff9e214791..6dec95fdb0a 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -32,7 +32,7 @@
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
     'release': [jinja_req, 'jira', 'semver', 'gitpython'],
-    'crossbow': ['github3.py', jinja_req, 'pygit2', 'ruamel.yaml',
+    'crossbow': ['github3.py', jinja_req, 'pygit2==1.5.0', 'ruamel.yaml',
                  'setuptools_scm'],
 }
 extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']

From 942a41d64c7b9084419a39dbf8e598bed691c3c9 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 3 Jun 2021 16:01:22 +0200
Subject: [PATCH 338/719] ARROW-12917: [C++] Fix handling of decimal types with
 negative scale in C data import

Closes #10429 from pitrou/ARROW-12917-decimal-c-bridge

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/c/bridge.cc      |  4 ++--
 cpp/src/arrow/c/bridge_test.cc | 27 ++++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 5cb3e577235..a43bf8104f2 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -984,11 +984,11 @@ struct SchemaImporter {
     if (prec_scale.size() != 2 && prec_scale.size() != 3) {
       return f_parser_.Invalid();
     }
-    if (prec_scale[0] <= 0 || prec_scale[1] <= 0) {
+    if (prec_scale[0] <= 0) {
       return f_parser_.Invalid();
     }
     if (prec_scale.size() == 2 || prec_scale[2] == 128) {
-      type_ = decimal(prec_scale[0], prec_scale[1]);
+      type_ = decimal128(prec_scale[0], prec_scale[1]);
     } else if (prec_scale[2] == 256) {
       type_ = decimal256(prec_scale[0], prec_scale[1]);
     } else {
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 317fd01f17c..54ce0efcf9d 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -283,6 +283,12 @@ TEST_F(TestSchemaExport, Primitive) {
 
   TestPrimitive(decimal(16, 4), "d:16,4");
   TestPrimitive(decimal256(16, 4), "d:16,4,256");
+
+  TestPrimitive(decimal(15, 0), "d:15,0");
+  TestPrimitive(decimal256(15, 0), "d:15,0,256");
+
+  TestPrimitive(decimal(15, -4), "d:15,-4");
+  TestPrimitive(decimal256(15, -4), "d:15,-4,256");
 }
 
 TEST_F(TestSchemaExport, Temporal) {
@@ -1196,6 +1202,20 @@ TEST_F(TestSchemaImport, Primitive) {
   CheckImport(field("", decimal128(16, 4)));
   FillPrimitive("d:16,4,256");
   CheckImport(field("", decimal256(16, 4)));
+
+  FillPrimitive("d:16,0");
+  CheckImport(field("", decimal128(16, 0)));
+  FillPrimitive("d:16,0,128");
+  CheckImport(field("", decimal128(16, 0)));
+  FillPrimitive("d:16,0,256");
+  CheckImport(field("", decimal256(16, 0)));
+
+  FillPrimitive("d:16,-4");
+  CheckImport(field("", decimal128(16, -4)));
+  FillPrimitive("d:16,-4,128");
+  CheckImport(field("", decimal128(16, -4)));
+  FillPrimitive("d:16,-4,256");
+  CheckImport(field("", decimal256(16, -4)));
 }
 
 TEST_F(TestSchemaImport, Temporal) {
@@ -1395,6 +1415,8 @@ TEST_F(TestSchemaImport, FormatStringError) {
   CheckImportError();
   FillPrimitive("d:15.4");
   CheckImportError();
+  FillPrimitive("d:15,z");
+  CheckImportError();
   FillPrimitive("t");
   CheckImportError();
   FillPrimitive("td");
@@ -2382,9 +2404,12 @@ TEST_F(TestSchemaRoundtrip, Primitive) {
   TestWithTypeFactory(boolean);
   TestWithTypeFactory(float16);
 
-  TestWithTypeFactory(std::bind(decimal, 19, 4));
   TestWithTypeFactory(std::bind(decimal128, 19, 4));
   TestWithTypeFactory(std::bind(decimal256, 19, 4));
+  TestWithTypeFactory(std::bind(decimal128, 19, 0));
+  TestWithTypeFactory(std::bind(decimal256, 19, 0));
+  TestWithTypeFactory(std::bind(decimal128, 19, -5));
+  TestWithTypeFactory(std::bind(decimal256, 19, -5));
   TestWithTypeFactory(std::bind(fixed_size_binary, 3));
   TestWithTypeFactory(binary);
   TestWithTypeFactory(large_utf8);

From 2215a05e6bda98936e248a4ea4aba9a7463a4f8e Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Thu, 3 Jun 2021 09:30:19 -0500
Subject: [PATCH 339/719] ARROW-12941: [C++] Add rows skipped to rows seen

Closes #10442 from n3world/ARROW-12941-fix_skip_count

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/csv/reader.cc      | 2 +-
 python/pyarrow/tests/test_csv.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index b80a8fffe80..598c16db360 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -347,7 +347,7 @@ class ReaderMixin {
             "either file is too short or header is larger than block size");
       }
       if (count_rows_) {
-        num_rows_seen_ = num_skipped_rows;
+        num_rows_seen_ += num_skipped_rows;
       }
     }
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 3fa9ae02e4d..f02406dec40 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -58,7 +58,7 @@ def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
     col_names = list(itertools.islice(generate_col_names(), num_cols))
     if write_names:
         csv.write(",".join(col_names))
-    csv.write(linesep)
+        csv.write(linesep)
     for row in arr.T:
         csv.write(",".join(map(str, row)))
         csv.write(linesep)

From 5baebbaecc26ac9f7ecad1ddffe0490499411236 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Thu, 3 Jun 2021 13:18:58 -0500
Subject: [PATCH 340/719] ARROW-10640: [C++] A, "if_else" ("where") kernel to
 combine two arrays based on a mask

Adding a preliminary impl for an `if_else(cond: Datum, left: Datum, right: Datum)` function. It works as follows,
```python
def if_else(cond, left, right):
    for c, true_val, false_val in zip(cond, left, right):
        if c:
            yield true_val
        else:
            yield false_val
```
`null` values will be promoted to the output.

Closes #10410 from nirandaperera/ARROW-10640

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                  |   1 +
 cpp/src/arrow/compute/api_scalar.cc           |   5 +
 cpp/src/arrow/compute/api_scalar.h            |  16 +
 cpp/src/arrow/compute/kernels/CMakeLists.txt  |   1 +
 .../arrow/compute/kernels/scalar_if_else.cc   | 587 ++++++++++++++++++
 .../compute/kernels/scalar_if_else_test.cc    | 275 ++++++++
 cpp/src/arrow/compute/kernels/test_util.cc    |   4 +-
 cpp/src/arrow/compute/kernels/test_util.h     |  15 +-
 cpp/src/arrow/compute/registry.cc             |   1 +
 cpp/src/arrow/compute/registry_internal.h     |   1 +
 cpp/src/arrow/util/bitmap_ops.cc              |  18 +
 cpp/src/arrow/util/bitmap_ops.h               |  19 +
 docs/source/cpp/compute.rst                   |  36 +-
 docs/source/python/api/compute.rst            |   1 +
 14 files changed, 960 insertions(+), 20 deletions(-)
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_if_else.cc
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_if_else_test.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 1d832cc25a2..f6d5a540c98 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -399,6 +399,7 @@ if(ARROW_COMPUTE)
               compute/kernels/scalar_string.cc
               compute/kernels/scalar_validity.cc
               compute/kernels/scalar_fill_null.cc
+              compute/kernels/scalar_if_else.cc
               compute/kernels/util_internal.cc
               compute/kernels/vector_hash.cc
               compute/kernels/vector_nested.cc
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 9f4ad42fecb..105ba7a0589 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -157,5 +157,10 @@ Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext
   return CallFunction("fill_null", {values, fill_value}, ctx);
 }
 
+Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_false,
+                     ExecContext* ctx) {
+  return CallFunction("if_else", {cond, if_true, if_false}, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index dce420b32b2..0a05b123a44 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -465,5 +465,21 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief IfElse returns elements chosen from `left` or `right`
+/// depending on `cond`. `null` values in `cond` will be promoted to the result
+///
+/// \param[in] cond `Boolean` condition Scalar/ Array
+/// \param[in] left Scalar/ Array
+/// \param[in] right Scalar/ Array
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right,
+                     ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 5e223a1f906..fc11d144105 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -29,6 +29,7 @@ add_arrow_compute_test(scalar_test
                        scalar_string_test.cc
                        scalar_validity_test.cc
                        scalar_fill_null_test.cc
+                       scalar_if_else_test.cc
                        test_util.cc)
 
 add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
new file mode 100644
index 00000000000..63086172c97
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -0,0 +1,587 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/kernels/codegen_internal.h>
+#include <arrow/compute/util_internal.h>
+#include <arrow/util/bit_block_counter.h>
+#include <arrow/util/bitmap.h>
+#include <arrow/util/bitmap_ops.h>
+
+namespace arrow {
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::Bitmap;
+
+namespace compute {
+
+namespace {
+
+constexpr uint64_t kAllNull = 0;
+constexpr uint64_t kAllValid = ~kAllNull;
+
+util::optional<uint64_t> GetConstantValidityWord(const Datum& data) {
+  if (data.is_scalar()) {
+    return data.scalar()->is_valid ? kAllValid : kAllNull;
+  }
+
+  if (data.array()->null_count == data.array()->length) return kAllNull;
+
+  if (!data.array()->MayHaveNulls()) return kAllValid;
+
+  // no constant validity word available
+  return {};
+}
+
+inline Bitmap GetBitmap(const Datum& datum, int i) {
+  if (datum.is_scalar()) return {};
+  const ArrayData& a = *datum.array();
+  return Bitmap{a.buffers[i], a.offset, a.length};
+}
+
+// if the condition is null then output is null otherwise we take validity from the
+// selected argument
+// ie. cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& left_d,
+                           const Datum& right_d, ArrayData* output) {
+  auto cond_const = GetConstantValidityWord(cond_d);
+  auto left_const = GetConstantValidityWord(left_d);
+  auto right_const = GetConstantValidityWord(right_d);
+
+  enum { COND_CONST = 1, LEFT_CONST = 2, RIGHT_CONST = 4 };
+  auto flag = COND_CONST * cond_const.has_value() | LEFT_CONST * left_const.has_value() |
+              RIGHT_CONST * right_const.has_value();
+
+  const ArrayData& cond = *cond_d.array();
+  // cond.data will always be available
+  Bitmap cond_data{cond.buffers[1], cond.offset, cond.length};
+  Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length};
+  Bitmap left_valid = GetBitmap(left_d, 0);
+  Bitmap right_valid = GetBitmap(right_d, 0);
+  // sometimes Bitmaps will be ignored, in which case we replace access to them with
+  // duplicated (probably elided) access to cond_data
+  const Bitmap& _ = cond_data;
+
+  // lambda function that will be used inside the visitor
+  uint64_t* out_validity = nullptr;
+  int64_t i = 0;
+  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
+                   uint64_t r_valid) {
+    out_validity[i] = c_valid & ((c_data & l_valid) | (~c_data & r_valid));
+    i++;
+  };
+
+  // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+  // In the following cases, we dont need to allocate out_valid bitmap
+
+  // if cond & left & right all ones, then output is all valid --> out_valid = nullptr
+  if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) {
+    return Status::OK();
+  }
+
+  if (left_const == kAllValid && right_const == kAllValid) {
+    // if both left and right are valid, no need to calculate out_valid bitmap. Pass
+    // cond validity buffer
+    // if there's an offset, copy bitmap (cannot slice a bitmap)
+    if (cond.offset) {
+      ARROW_ASSIGN_OR_RAISE(
+          output->buffers[0],
+          arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(),
+                                      cond.offset, cond.length));
+    } else {  // just copy assign cond validity buffer
+      output->buffers[0] = cond.buffers[0];
+    }
+    return Status::OK();
+  }
+
+  // following cases requires a separate out_valid buffer
+  ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length));
+  out_validity = output->GetMutableValues<uint64_t>(0);
+
+  enum { C_VALID, C_DATA, L_VALID, R_VALID };
+
+  switch (flag) {
+    case COND_CONST | LEFT_CONST | RIGHT_CONST: {
+      Bitmap bitmaps[] = {_, cond_data, _, _};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(*cond_const, words[C_DATA], *left_const, *right_const);
+      });
+      break;
+    }
+    case LEFT_CONST | RIGHT_CONST: {
+      Bitmap bitmaps[] = {cond_valid, cond_data, _, _};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(words[C_VALID], words[C_DATA], *left_const, *right_const);
+      });
+      break;
+    }
+    case COND_CONST | RIGHT_CONST: {
+      // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for
+      // Visit()
+      Bitmap bitmaps[] = {_, cond_data, left_valid, _};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(*cond_const, words[C_DATA], words[L_VALID], *right_const);
+      });
+      break;
+    }
+    case RIGHT_CONST: {
+      // bitmaps[R_VALID] might be null; override to make it safe for Visit()
+      Bitmap bitmaps[] = {cond_valid, cond_data, left_valid, _};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(words[C_VALID], words[C_DATA], words[L_VALID], *right_const);
+      });
+      break;
+    }
+    case COND_CONST | LEFT_CONST: {
+      // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for
+      // Visit()
+      Bitmap bitmaps[] = {_, cond_data, _, right_valid};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(*cond_const, words[C_DATA], *left_const, words[R_VALID]);
+      });
+      break;
+    }
+    case LEFT_CONST: {
+      // bitmaps[L_VALID] might be null; override to make it safe for Visit()
+      Bitmap bitmaps[] = {cond_valid, cond_data, _, right_valid};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(words[C_VALID], words[C_DATA], *left_const, words[R_VALID]);
+      });
+      break;
+    }
+    case COND_CONST: {
+      // bitmaps[C_VALID] might be null; override to make it safe for Visit()
+      Bitmap bitmaps[] = {_, cond_data, left_valid, right_valid};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(*cond_const, words[C_DATA], words[L_VALID], words[R_VALID]);
+      });
+      break;
+    }
+    case 0: {
+      Bitmap bitmaps[] = {cond_valid, cond_data, left_valid, right_valid};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+        apply(words[C_VALID], words[C_DATA], words[L_VALID], words[R_VALID]);
+      });
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct IfElseFunctor {};
+
+// only number types needs to be handled for Fixed sized primitive data types because,
+// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned
+// int type
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_number<Type>> {
+  using T = typename TypeTraits<Type>::CType;
+  // A - Array
+  // S - Scalar
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          ctx->Allocate(cond.length * sizeof(T)));
+    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+
+    // copy right data to out_buff
+    const T* right_data = right.GetValues<T>(1);
+    std::memcpy(out_values, right_data, right.length * sizeof(T));
+
+    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
+
+    // selectively copy values from left data
+    const T* left_data = left.GetValues<T>(1);
+    int64_t offset = cond.offset;
+
+    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
+    while (offset < cond.offset + cond.length) {
+      const BitBlockCount& block = bit_counter.NextWord();
+      if (block.AllSet()) {  // all from left
+        std::memcpy(out_values, left_data, block.length * sizeof(T));
+      } else if (block.popcount) {  // selectively copy from left
+        for (int64_t i = 0; i < block.length; ++i) {
+          if (BitUtil::GetBit(cond_data, offset + i)) {
+            out_values[i] = left_data[i];
+          }
+        }
+      }
+
+      offset += block.length;
+      out_values += block.length;
+      left_data += block.length;
+    }
+
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          ctx->Allocate(cond.length * sizeof(T)));
+    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+
+    // copy right data to out_buff
+    const T* right_data = right.GetValues<T>(1);
+    std::memcpy(out_values, right_data, right.length * sizeof(T));
+
+    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
+
+    // selectively copy values from left data
+    T left_data = internal::UnboxScalar<Type>::Unbox(left);
+    int64_t offset = cond.offset;
+
+    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
+    while (offset < cond.offset + cond.length) {
+      const BitBlockCount& block = bit_counter.NextWord();
+      if (block.AllSet()) {  // all from left
+        std::fill(out_values, out_values + block.length, left_data);
+      } else if (block.popcount) {  // selectively copy from left
+        for (int64_t i = 0; i < block.length; ++i) {
+          if (BitUtil::GetBit(cond_data, offset + i)) {
+            out_values[i] = left_data;
+          }
+        }
+      }
+
+      offset += block.length;
+      out_values += block.length;
+    }
+
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          ctx->Allocate(cond.length * sizeof(T)));
+    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+
+    // copy left data to out_buff
+    const T* left_data = left.GetValues<T>(1);
+    std::memcpy(out_values, left_data, left.length * sizeof(T));
+
+    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
+
+    // selectively copy values from left data
+    T right_data = internal::UnboxScalar<Type>::Unbox(right);
+    int64_t offset = cond.offset;
+
+    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
+    // left data is already in the output buffer. Therefore, mask needs to be inverted
+    while (offset < cond.offset + cond.length) {
+      const BitBlockCount& block = bit_counter.NextWord();
+      if (block.NoneSet()) {  // all from right
+        std::fill(out_values, out_values + block.length, right_data);
+      } else if (block.popcount) {  // selectively copy from right
+        for (int64_t i = 0; i < block.length; ++i) {
+          if (!BitUtil::GetBit(cond_data, offset + i)) {
+            out_values[i] = right_data;
+          }
+        }
+      }
+
+      offset += block.length;
+      out_values += block.length;
+    }
+
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          ctx->Allocate(cond.length * sizeof(T)));
+    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+
+    // copy right data to out_buff
+    T right_data = internal::UnboxScalar<Type>::Unbox(right);
+    std::fill(out_values, out_values + cond.length, right_data);
+
+    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
+
+    // selectively copy values from left data
+    T left_data = internal::UnboxScalar<Type>::Unbox(left);
+    int64_t offset = cond.offset;
+
+    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
+    while (offset < cond.offset + cond.length) {
+      const BitBlockCount& block = bit_counter.NextWord();
+      if (block.AllSet()) {  // all from left
+        std::fill(out_values, out_values + block.length, left_data);
+      } else if (block.popcount) {  // selectively copy from left
+        for (int64_t i = 0; i < block.length; ++i) {
+          if (BitUtil::GetBit(cond_data, offset + i)) {
+            out_values[i] = left_data;
+          }
+        }
+      }
+
+      offset += block.length;
+      out_values += block.length;
+    }
+
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_boolean<Type>> {
+  // AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    // out_buff = right & ~cond
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          arrow::internal::BitmapAndNot(
+                              ctx->memory_pool(), right.buffers[1]->data(), right.offset,
+                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+
+    // out_buff = left & cond
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> temp_buf,
+                          arrow::internal::BitmapAnd(
+                              ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+
+    arrow::internal::BitmapOr(out_buf->data(), 0, temp_buf->data(), 0, cond.length, 0,
+                              out_buf->mutable_data());
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    // out_buff = right & ~cond
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          arrow::internal::BitmapAndNot(
+                              ctx->memory_pool(), right.buffers[1]->data(), right.offset,
+                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+
+    // out_buff = left & cond
+    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
+    if (left_data) {
+      arrow::internal::BitmapOr(out_buf->data(), 0, cond.buffers[1]->data(), cond.offset,
+                                cond.length, 0, out_buf->mutable_data());
+    }
+
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    // out_buff = left & cond
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
+                          arrow::internal::BitmapAnd(
+                              ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+
+    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
+
+    // out_buff = left & cond | right & ~cond
+    if (right_data) {
+      arrow::internal::BitmapOrNot(out_buf->data(), 0, cond.buffers[1]->data(),
+                                   cond.offset, cond.length, 0, out_buf->mutable_data());
+    }
+
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
+    bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
+
+    // out_buf = left & cond | right & ~cond
+    std::shared_ptr<Buffer> out_buf = nullptr;
+    if (left_data) {
+      if (right_data) {
+        // out_buf = ones
+        ARROW_ASSIGN_OR_RAISE(out_buf, ctx->AllocateBitmap(cond.length));
+        // filling with UINT8_MAX upto the buffer's size (in bytes)
+        std::memset(out_buf->mutable_data(), UINT8_MAX, out_buf->size());
+      } else {
+        // out_buf = cond
+        out_buf = SliceBuffer(cond.buffers[1], cond.offset, cond.length);
+      }
+    } else {
+      if (right_data) {
+        // out_buf = ~cond
+        ARROW_ASSIGN_OR_RAISE(out_buf, arrow::internal::InvertBitmap(
+                                           ctx->memory_pool(), cond.buffers[1]->data(),
+                                           cond.offset, cond.length))
+      } else {
+        // out_buf = zeros
+        ARROW_ASSIGN_OR_RAISE(out_buf, ctx->AllocateBitmap(cond.length));
+      }
+    }
+    out->buffers[1] = std::move(out_buf);
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_null<Type>> {
+  template <typename T>
+  static inline Status ReturnCopy(const T& in, T* out) {
+    // Nothing preallocated, so we assign in into the output
+    *out = in;
+    return Status::OK();
+  }
+
+  // AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    return ReturnCopy(left, out);
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    return ReturnCopy(right, out);
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    return ReturnCopy(left, out);
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    return ReturnCopy(cond, out);
+  }
+};
+
+template <typename Type>
+struct ResolveIfElseExec {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // cond is scalar
+    if (batch[0].is_scalar()) {
+      const auto& cond = batch[0].scalar_as<BooleanScalar>();
+      if (batch[1].is_scalar() && batch[2].is_scalar()) {
+        if (cond.is_valid) {
+          *out = cond.value ? batch[1].scalar() : batch[2].scalar();
+        } else {
+          *out = MakeNullScalar(batch[1].type());
+        }
+        return Status::OK();
+      }
+      // either left or right is an array. Output is always an array
+      if (!cond.is_valid) {
+        // cond is null; just create a null array
+        ARROW_ASSIGN_OR_RAISE(
+            *out, MakeArrayOfNull(batch[1].type(), batch.length, ctx->memory_pool()))
+        return Status::OK();
+      }
+
+      const auto& valid_data = cond.value ? batch[1] : batch[2];
+      if (valid_data.is_array()) {
+        *out = valid_data;
+      } else {
+        // valid data is a scalar that needs to be broadcasted
+        ARROW_ASSIGN_OR_RAISE(
+            *out,
+            MakeArrayFromScalar(*valid_data.scalar(), batch.length, ctx->memory_pool()));
+      }
+      return Status::OK();
+    }
+
+    // cond is array. Use functors to sort things out
+    ARROW_RETURN_NOT_OK(
+        PromoteNullsVisitor(ctx, batch[0], batch[1], batch[2], out->mutable_array()));
+
+    if (batch[1].kind() == Datum::ARRAY) {
+      if (batch[2].kind() == Datum::ARRAY) {  // AAA
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(),
+                                         *batch[2].array(), out->mutable_array());
+      } else {  // AAS
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].array(),
+                                         *batch[2].scalar(), out->mutable_array());
+      }
+    } else {
+      if (batch[2].kind() == Datum::ARRAY) {  // ASA
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                                         *batch[2].array(), out->mutable_array());
+      } else {  // ASS
+        return IfElseFunctor<Type>::Call(ctx, *batch[0].array(), *batch[1].scalar(),
+                                         *batch[2].scalar(), out->mutable_array());
+      }
+    }
+  }
+};
+
+void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                               const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec>(*type);
+    // cond array needs to be boolean always
+    ScalarKernel kernel({boolean(), type, type}, type, exec);
+    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+
+    DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+  }
+}
+
+}  // namespace
+
+const FunctionDoc if_else_doc{"Choose values based on a condition",
+                              ("`cond` must be a Boolean scalar/ array. \n`left` or "
+                               "`right` must be of the same type scalar/ array.\n"
+                               "`null` values in `cond` will be promoted to the"
+                               " output."),
+                              {"cond", "left", "right"}};
+
+namespace internal {
+
+void RegisterScalarIfElse(FunctionRegistry* registry) {
+  ScalarKernel scalar_kernel;
+  scalar_kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  scalar_kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+
+  auto func = std::make_shared<ScalarFunction>("if_else", Arity::Ternary(), &if_else_doc);
+
+  AddPrimitiveIfElseKernels(func, NumericTypes());
+  AddPrimitiveIfElseKernels(func, TemporalTypes());
+  AddPrimitiveIfElseKernels(func, {boolean(), null()});
+  // todo add binary kernels
+
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
new file mode 100644
index 00000000000..5d3d22210d2
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -0,0 +1,275 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/array.h>
+#include <arrow/compute/api_scalar.h>
+#include <arrow/compute/kernels/test_util.h>
+#include <arrow/testing/gtest_util.h>
+#include <gtest/gtest.h>
+
+namespace arrow {
+namespace compute {
+
+void CheckIfElseOutput(const Datum& cond, const Datum& left, const Datum& right,
+                       const Datum& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum datum_out, IfElse(cond, left, right));
+  if (datum_out.is_array()) {
+    std::shared_ptr<Array> result = datum_out.make_array();
+    ASSERT_OK(result->ValidateFull());
+    std::shared_ptr<Array> expected_ = expected.make_array();
+    AssertArraysEqual(*expected_, *result, /*verbose=*/true);
+  } else {  // expecting scalar
+    const std::shared_ptr<Scalar>& result = datum_out.scalar();
+    const std::shared_ptr<Scalar>& expected_ = expected.scalar();
+    AssertScalarsEqual(*expected_, *result, /*verbose=*/true);
+  }
+}
+
+class TestIfElseKernel : public ::testing::Test {};
+
+template <typename Type>
+class TestIfElsePrimitive : public ::testing::Test {};
+
+using PrimitiveTypes = ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type,
+                                        Int32Type, UInt32Type, Int64Type, UInt64Type,
+                                        FloatType, DoubleType, Date32Type, Date64Type>;
+
+TYPED_TEST_SUITE(TestIfElsePrimitive, PrimitiveTypes);
+
+TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+  auto cond = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto left = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  typename TypeTraits<TypeParam>::BuilderType builder;
+
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    if (cond->Value(i)) {
+      ASSERT_OK(builder.Append(left->Value(i)));
+    } else {
+      ASSERT_OK(builder.Append(right->Value(i)));
+    }
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+void CheckWithDifferentShapes(const std::shared_ptr<Array>& cond,
+                              const std::shared_ptr<Array>& left,
+                              const std::shared_ptr<Array>& right,
+                              const std::shared_ptr<Array>& expected) {
+  // this will check for whole arrays, every scalar at i'th index and slicing (offset)
+  CheckScalar("if_else", {cond, left, right}, expected);
+
+  auto len = left->length();
+
+  enum { COND_SCALAR = 1, LEFT_SCALAR = 2, RIGHT_SCALAR = 4 };
+  for (int mask = 0; mask < (COND_SCALAR | LEFT_SCALAR | RIGHT_SCALAR); ++mask) {
+    for (int64_t cond_idx = 0; cond_idx < len; ++cond_idx) {
+      Datum cond_in, cond_bcast;
+      std::string trace_cond = "Cond";
+      if (mask & COND_SCALAR) {
+        ASSERT_OK_AND_ASSIGN(cond_in, cond->GetScalar(cond_idx));
+        ASSERT_OK_AND_ASSIGN(cond_bcast, MakeArrayFromScalar(*cond_in.scalar(), len));
+        trace_cond += "@" + std::to_string(cond_idx) + "=" + cond_in.scalar()->ToString();
+      } else {
+        cond_in = cond_bcast = cond;
+      }
+      SCOPED_TRACE(trace_cond);
+
+      for (int64_t left_idx = 0; left_idx < len; ++left_idx) {
+        Datum left_in, left_bcast;
+        std::string trace_left = "Left";
+        if (mask & LEFT_SCALAR) {
+          ASSERT_OK_AND_ASSIGN(left_in, left->GetScalar(left_idx).As<Datum>());
+          ASSERT_OK_AND_ASSIGN(left_bcast, MakeArrayFromScalar(*left_in.scalar(), len));
+          trace_cond +=
+              "@" + std::to_string(left_idx) + "=" + left_in.scalar()->ToString();
+        } else {
+          left_in = left_bcast = left;
+        }
+        SCOPED_TRACE(trace_left);
+
+        for (int64_t right_idx = 0; right_idx < len; ++right_idx) {
+          Datum right_in, right_bcast;
+          std::string trace_right = "Right";
+          if (mask & RIGHT_SCALAR) {
+            ASSERT_OK_AND_ASSIGN(right_in, right->GetScalar(right_idx));
+            ASSERT_OK_AND_ASSIGN(right_bcast,
+                                 MakeArrayFromScalar(*right_in.scalar(), len));
+            trace_right +=
+                "@" + std::to_string(right_idx) + "=" + right_in.scalar()->ToString();
+          } else {
+            right_in = right_bcast = right;
+          }
+          SCOPED_TRACE(trace_right);
+
+          ASSERT_OK_AND_ASSIGN(auto exp, IfElse(cond_bcast, left_bcast, right_bcast));
+          ASSERT_OK_AND_ASSIGN(auto actual, IfElse(cond_in, left_in, right_in));
+          AssertDatumsEqual(exp, actual, /*verbose=*/true);
+
+          if (right_in.is_array()) break;
+        }
+        if (left_in.is_array()) break;
+      }
+      if (cond_in.is_array()) break;
+    }
+  }  // for (mask)
+}
+
+TYPED_TEST(TestIfElsePrimitive, IfElseFixedSize) {
+  auto type = TypeTraits<TypeParam>::type_singleton();
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 8]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[1, 2, 3, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[1, 2, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[1, 2, null, 8]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[null, 2, null, 8]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, null, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[null, 2, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, null]"),
+                           ArrayFromJSON(type, "[null, 2, 3, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[1, 2, 3, 4]"),
+                           ArrayFromJSON(type, "[5, 6, 7, 8]"),
+                           ArrayFromJSON(type, "[null, 2, 3, 8]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseBoolean) {
+  auto type = boolean();
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[false, false, false, true]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[false, false, false, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[false, false, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[false, false, null, true]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[null, false, null, true]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, null, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[null, false, null, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, null]"),
+                           ArrayFromJSON(type, "[null, false, false, null]"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, true, true, false]"),
+                           ArrayFromJSON(type, "[false, false, false, false]"),
+                           ArrayFromJSON(type, "[true, true, true, true]"),
+                           ArrayFromJSON(type, "[null, false, false, true]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseBooleanRand) {
+  auto type = boolean();
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+  auto cond = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto left = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  BooleanBuilder builder;
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    if (cond->Value(i)) {
+      ASSERT_OK(builder.Append(left->Value(i)));
+    } else {
+      ASSERT_OK(builder.Append(right->Value(i)));
+    }
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+TEST_F(TestIfElseKernel, IfElseNull) {
+  CheckIfElseOutput(ArrayFromJSON(boolean(), "[null, null, null, null]"),
+                    ArrayFromJSON(null(), "[null, null, null, null]"),
+                    ArrayFromJSON(null(), "[null, null, null, null]"),
+                    ArrayFromJSON(null(), "[null, null, null, null]"));
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index 672308452cf..c74ef3b76dd 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -70,6 +70,8 @@ ScalarVector GetScalars(const ArrayVector& inputs, int64_t index) {
   return scalars;
 }
 
+}  // namespace
+
 void CheckScalar(std::string func_name, const ScalarVector& inputs,
                  std::shared_ptr<Scalar> expected, const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, GetDatums(inputs), options));
@@ -140,8 +142,6 @@ void CheckScalar(std::string func_name, const ArrayVector& inputs,
   }
 }
 
-}  // namespace
-
 void CheckScalarUnary(std::string func_name, std::shared_ptr<Array> input,
                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
   CheckScalar(std::move(func_name), {input}, expected, options);
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index aea3d8360e6..cadcc4fe35c 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -19,13 +19,14 @@
 
 // IWYU pragma: begin_exports
 
+#include <gmock/gmock.h>
+
 #include <memory>
 #include <string>
 #include <vector>
 
-#include <gmock/gmock.h>
-
 #include "arrow/array.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/pretty_print.h"
@@ -34,8 +35,6 @@
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 
-#include "arrow/compute/kernel.h"
-
 // IWYU pragma: end_exports
 
 namespace arrow {
@@ -90,6 +89,14 @@ struct DatumEqual<Type, enable_if_integer<Type>> {
   }
 };
 
+void CheckScalar(std::string func_name, const ScalarVector& inputs,
+                 std::shared_ptr<Scalar> expected,
+                 const FunctionOptions* options = nullptr);
+
+void CheckScalar(std::string func_name, const ArrayVector& inputs,
+                 std::shared_ptr<Array> expected,
+                 const FunctionOptions* options = nullptr);
+
 void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
                       std::string json_input, std::shared_ptr<DataType> out_ty,
                       std::string json_expected,
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 3a8a3a0eb85..1d713b96e1e 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -125,6 +125,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarStringAscii(registry.get());
   RegisterScalarValidity(registry.get());
   RegisterScalarFillNull(registry.get());
+  RegisterScalarIfElse(registry.get());
 
   // Vector functions
   RegisterVectorHash(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index e4008cf3f27..f97553af4b1 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -34,6 +34,7 @@ void RegisterScalarSetLookup(FunctionRegistry* registry);
 void RegisterScalarStringAscii(FunctionRegistry* registry);
 void RegisterScalarValidity(FunctionRegistry* registry);
 void RegisterScalarFillNull(FunctionRegistry* registry);
+void RegisterScalarIfElse(FunctionRegistry* registry);
 
 // Vector functions
 void RegisterVectorHash(FunctionRegistry* registry);
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index 32da60aafd9..a27a61cadf3 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -583,5 +583,23 @@ void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right
   BitmapOp<AndNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
 }
 
+template <typename T>
+struct OrNotOp {
+  constexpr T operator()(const T& l, const T& r) const { return l | ~r; }
+};
+
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset) {
+  return BitmapOp<OrNotOp>(pool, left, left_offset, right, right_offset, length,
+                           out_offset);
+}
+
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out) {
+  BitmapOp<OrNotOp>(left, left_offset, right, right_offset, length, out_offset, out);
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h
index 554e1d7468b..40a7797a239 100644
--- a/cpp/src/arrow/util/bitmap_ops.h
+++ b/cpp/src/arrow/util/bitmap_ops.h
@@ -183,5 +183,24 @@ ARROW_EXPORT
 void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                   int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
 
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset);
+
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 3cf244ca5e8..4e729b055cf 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -648,40 +648,48 @@ Structural transforms
 +==========================+============+================================================+=====================+=========+
 | fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like  | Input type          | \(1)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_finite                | Unary      | Float, Double                                  | Boolean             | \(2)    |
+| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal               | Input type          + \(2)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_inf                   | Unary      | Float, Double                                  | Boolean             | \(3)    |
+| is_finite                | Unary      | Float, Double                                  | Boolean             | \(3)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(4)    |
+| is_inf                   | Unary      | Float, Double                                  | Boolean             | \(4)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                            | Boolean             | \(5)    |
+| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(5)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                            | Boolean             | \(6)    |
+| is_null                  | Unary      | Any                                            | Boolean             | \(6)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(7)    |
+| is_valid                 | Unary      | Any                                            | Boolean             | \(7)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| project                  | Varargs    | Any                                            | Struct              | \(8)    |
+| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(8)    |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+| project                  | Varargs    | Any                                            | Struct              | \(9)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
 
 * \(1) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(2) Output is true iff the corresponding input element is finite (not Infinity,
+* \(2) First input must be a Boolean scalar or array. Second and third inputs
+  could be scalars or arrays and must be of the same type. Output is an array
+  (or scalar if all inputs are scalar) of the same type as the second/ third
+  input. If the nulls present on the first input, they will be promoted to the
+  output, otherwise nulls will be chosen based on the first input values.
+
+* \(3) Output is true iff the corresponding input element is finite (not Infinity,
   -Infinity, or NaN).
 
-* \(3) Output is true iff the corresponding input element is Infinity/-Infinity.
+* \(4) Output is true iff the corresponding input element is Infinity/-Infinity.
 
-* \(4) Output is true iff the corresponding input element is NaN.
+* \(5) Output is true iff the corresponding input element is NaN.
 
-* \(5) Output is true iff the corresponding input element is null.
+* \(6) Output is true iff the corresponding input element is null.
 
-* \(6) Output is true iff the corresponding input element is non-null.
+* \(7) Output is true iff the corresponding input element is non-null.
 
-* \(7) Each output element is the length of the corresponding input element
+* \(8) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(8) The output struct's field types are the types of its arguments. The
+* \(9) The output struct's field types are the types of its arguments. The
   field names are specified using an instance of :struct:`ProjectOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 91eeeedbeaa..3010776930f 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -222,6 +222,7 @@ Structural Transforms
 
    binary_length
    fill_null
+   if_else
    is_finite
    is_inf
    is_nan

From e1690d6cc9ab1aa56c9e6a4782ee3fe9bd644c06 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 3 Jun 2021 21:40:29 -0400
Subject: [PATCH 341/719] ARROW-12751: [C++] Implement minimum/maximum kernels

This is a bit messy, but implements a variadic scalar maximum/minimum kernel.

Closes #10390 from lidavidm/arrow-12751

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |  10 +
 cpp/src/arrow/compute/api_scalar.h            |  29 ++
 .../arrow/compute/kernels/codegen_internal.h  |  43 ++-
 .../arrow/compute/kernels/scalar_boolean.cc   |  56 +--
 .../arrow/compute/kernels/scalar_compare.cc   | 291 +++++++++++++++
 .../compute/kernels/scalar_compare_test.cc    | 348 ++++++++++++++++++
 docs/source/cpp/compute.rst                   |  15 +
 docs/source/python/api/compute.rst            |   8 +
 8 files changed, 770 insertions(+), 30 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 105ba7a0589..6f77d6f9785 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -63,6 +63,16 @@ SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
 SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
 
+Result<Datum> ElementWiseMax(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options, ExecContext* ctx) {
+  return CallFunction("element_wise_max", args, &options, ctx);
+}
+
+Result<Datum> ElementWiseMin(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options, ExecContext* ctx) {
+  return CallFunction("element_wise_min", args, &options, ctx);
+}
+
 // ----------------------------------------------------------------------
 // Set-related operations
 
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 0a05b123a44..ab690f4c456 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -42,6 +42,11 @@ struct ArithmeticOptions : public FunctionOptions {
   bool check_overflow;
 };
 
+struct ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
+  ElementWiseAggregateOptions() : skip_nulls(true) {}
+  bool skip_nulls;
+};
+
 struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
   explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false)
       : pattern(std::move(pattern)), ignore_case(ignore_case) {}
@@ -253,6 +258,30 @@ Result<Datum> Power(const Datum& left, const Datum& right,
                     ArithmeticOptions options = ArithmeticOptions(),
                     ExecContext* ctx = NULLPTR);
 
+/// \brief Find the element-wise maximum of any number of arrays or scalars.
+/// Array values must be the same length.
+///
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise maximum
+ARROW_EXPORT
+Result<Datum> ElementWiseMax(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options = {},
+                             ExecContext* ctx = NULLPTR);
+
+/// \brief Find the element-wise minimum of any number of arrays or scalars.
+/// Array values must be the same length.
+///
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise minimum
+ARROW_EXPORT
+Result<Datum> ElementWiseMin(const std::vector<Datum>& args,
+                             ElementWiseAggregateOptions options = {},
+                             ExecContext* ctx = NULLPTR);
+
 /// \brief Compare a numeric array with a scalar.
 ///
 /// \param[in] left datum to compare, must be an Array
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index e31771a89ca..6d5c837f514 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -303,8 +303,12 @@ struct BoxScalar;
 template <typename Type>
 struct BoxScalar<Type, enable_if_has_c_type<Type>> {
   using T = typename GetOutputType<Type>::T;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  static void Box(T val, Scalar* out) { checked_cast<ScalarType*>(out)->value = val; }
+  static void Box(T val, Scalar* out) {
+    // Enables BoxScalar<Int64Type> to work on a (for example) Time64Scalar
+    T* mutable_data = reinterpret_cast<T*>(
+        checked_cast<::arrow::internal::PrimitiveScalarBase*>(out)->mutable_data());
+    *mutable_data = val;
+  }
 };
 
 template <typename Type>
@@ -1093,6 +1097,41 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId get_id) {
   }
 }
 
+template <template <typename... Args> class Generator, typename... Args>
+ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return Generator<Int8Type, Args...>::Exec;
+    case Type::INT16:
+      return Generator<Int16Type, Args...>::Exec;
+    case Type::INT32:
+    case Type::DATE32:
+    case Type::TIME32:
+      return Generator<Int32Type, Args...>::Exec;
+    case Type::INT64:
+    case Type::DATE64:
+    case Type::TIMESTAMP:
+    case Type::TIME64:
+    case Type::DURATION:
+      return Generator<Int64Type, Args...>::Exec;
+    case Type::UINT8:
+      return Generator<UInt8Type, Args...>::Exec;
+    case Type::UINT16:
+      return Generator<UInt16Type, Args...>::Exec;
+    case Type::UINT32:
+      return Generator<UInt32Type, Args...>::Exec;
+    case Type::UINT64:
+      return Generator<UInt64Type, Args...>::Exec;
+    case Type::FLOAT:
+      return Generator<FloatType, Args...>::Exec;
+    case Type::DOUBLE:
+      return Generator<DoubleType, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 // Generate a kernel given a templated functor for integer types
 //
 // See "Numeric" above for description of the generator functor
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 3d47d239888..89107120fa3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -95,7 +95,7 @@ inline Bitmap GetBitmap(const ArrayData& arr, int index) {
   return Bitmap{arr.buffers[index], arr.offset, arr.length};
 }
 
-struct Invert {
+struct InvertOp {
   static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
     return Status::OK();
@@ -115,8 +115,8 @@ struct Commutative {
   }
 };
 
-struct And : Commutative<And> {
-  using Commutative<And>::Call;
+struct AndOp : Commutative<AndOp> {
+  using Commutative<AndOp>::Call;
 
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
@@ -147,8 +147,8 @@ struct And : Commutative<And> {
   }
 };
 
-struct KleeneAnd : Commutative<KleeneAnd> {
-  using Commutative<KleeneAnd>::Call;
+struct KleeneAndOp : Commutative<KleeneAndOp> {
+  using Commutative<KleeneAndOp>::Call;
 
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
@@ -205,7 +205,7 @@ struct KleeneAnd : Commutative<KleeneAnd> {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       out->null_count = 0;
       out->buffers[0] = nullptr;
-      return And::Call(ctx, left, right, out);
+      return AndOp::Call(ctx, left, right, out);
     }
     auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
                            uint64_t right_false, uint64_t* out_valid,
@@ -218,8 +218,8 @@ struct KleeneAnd : Commutative<KleeneAnd> {
   }
 };
 
-struct Or : Commutative<Or> {
-  using Commutative<Or>::Call;
+struct OrOp : Commutative<OrOp> {
+  using Commutative<OrOp>::Call;
 
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
@@ -250,8 +250,8 @@ struct Or : Commutative<Or> {
   }
 };
 
-struct KleeneOr : Commutative<KleeneOr> {
-  using Commutative<KleeneOr>::Call;
+struct KleeneOrOp : Commutative<KleeneOrOp> {
+  using Commutative<KleeneOrOp>::Call;
 
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
@@ -308,7 +308,7 @@ struct KleeneOr : Commutative<KleeneOr> {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       out->null_count = 0;
       out->buffers[0] = nullptr;
-      return Or::Call(ctx, left, right, out);
+      return OrOp::Call(ctx, left, right, out);
     }
 
     static auto compute_word = [](uint64_t left_true, uint64_t left_false,
@@ -323,8 +323,8 @@ struct KleeneOr : Commutative<KleeneOr> {
   }
 };
 
-struct Xor : Commutative<Xor> {
-  using Commutative<Xor>::Call;
+struct XorOp : Commutative<XorOp> {
+  using Commutative<XorOp>::Call;
 
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
@@ -355,10 +355,10 @@ struct Xor : Commutative<Xor> {
   }
 };
 
-struct AndNot {
+struct AndNotOp {
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
-    return And::Call(ctx, left, InvertScalar(right), out);
+    return AndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
   static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
@@ -373,7 +373,7 @@ struct AndNot {
 
   static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
                      ArrayData* out) {
-    return And::Call(ctx, left, InvertScalar(right), out);
+    return AndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
   static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
@@ -385,10 +385,10 @@ struct AndNot {
   }
 };
 
-struct KleeneAndNot {
+struct KleeneAndNotOp {
   static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
                      Scalar* out) {
-    return KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
   static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
@@ -430,7 +430,7 @@ struct KleeneAndNot {
 
   static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
                      ArrayData* out) {
-    return KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
   }
 
   static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
@@ -438,7 +438,7 @@ struct KleeneAndNot {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       out->null_count = 0;
       out->buffers[0] = nullptr;
-      return AndNot::Call(ctx, left, right, out);
+      return AndNotOp::Call(ctx, left, right, out);
     }
 
     static auto compute_word = [](uint64_t left_true, uint64_t left_false,
@@ -543,20 +543,20 @@ namespace internal {
 
 void RegisterScalarBoolean(FunctionRegistry* registry) {
   // These functions can write into sliced output bitmaps
-  MakeFunction("invert", 1, applicator::SimpleUnary<Invert>, &invert_doc, registry);
-  MakeFunction("and", 2, applicator::SimpleBinary<And>, &and_doc, registry);
-  MakeFunction("and_not", 2, applicator::SimpleBinary<AndNot>, &and_not_doc, registry);
-  MakeFunction("or", 2, applicator::SimpleBinary<Or>, &or_doc, registry);
-  MakeFunction("xor", 2, applicator::SimpleBinary<Xor>, &xor_doc, registry);
+  MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, &invert_doc, registry);
+  MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, &and_doc, registry);
+  MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, &and_not_doc, registry);
+  MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, &or_doc, registry);
+  MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, &xor_doc, registry);
 
   // The Kleene logic kernels cannot write into sliced output bitmaps
-  MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAnd>, &and_kleene_doc,
+  MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, &and_kleene_doc,
                registry,
                /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
-  MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNot>,
+  MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>,
                &and_not_kleene_doc, registry,
                /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
-  MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOr>, &or_kleene_doc,
+  MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, &or_kleene_doc,
                registry,
                /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 8da97ef2260..8e9e224bdde 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -15,7 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cmath>
+#include <limits>
+
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/util/bitmap_ops.h"
 
 namespace arrow {
 
@@ -56,6 +61,75 @@ struct GreaterEqual {
   }
 };
 
+template <typename T>
+using is_unsigned_integer = std::integral_constant<bool, std::is_integral<T>::value &&
+                                                             std::is_unsigned<T>::value>;
+
+template <typename T>
+using is_signed_integer =
+    std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;
+
+template <typename T>
+using enable_if_integer =
+    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, T>;
+
+template <typename T>
+using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
+
+struct Minimum {
+  template <typename T>
+  static enable_if_floating_point<T> Call(T left, T right) {
+    return std::fmin(left, right);
+  }
+
+  template <typename T>
+  static enable_if_integer<T> Call(T left, T right) {
+    return std::min(left, right);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() {
+    return std::nanf("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() {
+    return std::nan("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_integer<T> antiextreme() {
+    return std::numeric_limits<T>::max();
+  }
+};
+
+struct Maximum {
+  template <typename T>
+  static enable_if_floating_point<T> Call(T left, T right) {
+    return std::fmax(left, right);
+  }
+
+  template <typename T>
+  static enable_if_integer<T> Call(T left, T right) {
+    return std::max(left, right);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<float, T>::value, T> antiextreme() {
+    return std::nanf("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_same<double, T>::value, T> antiextreme() {
+    return std::nan("");
+  }
+
+  template <typename T>
+  static constexpr enable_if_integer<T> antiextreme() {
+    return std::numeric_limits<T>::min();
+  }
+};
+
 // Implement Less, LessEqual by flipping arguments to Greater, GreaterEqual
 
 template <typename Op>
@@ -97,6 +171,28 @@ struct CompareFunction : ScalarFunction {
   }
 };
 
+struct VarArgsCompareFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    } else if (auto type = CommonTimestamp(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeCompareFunction(std::string name,
                                                     const FunctionDoc* doc) {
@@ -170,6 +266,177 @@ std::shared_ptr<ScalarFunction> MakeFlippedFunction(std::string name,
   return flipped_func;
 }
 
+using MinMaxState = OptionsWrapper<ElementWiseAggregateOptions>;
+
+// Implement a variadic scalar min/max kernel.
+template <typename OutType, typename Op>
+struct ScalarMinMax {
+  using OutValue = typename GetOutputType<OutType>::T;
+
+  static void ExecScalar(const ExecBatch& batch,
+                         const ElementWiseAggregateOptions& options, Scalar* out) {
+    // All arguments are scalar
+    OutValue value{};
+    bool valid = false;
+    for (const auto& arg : batch.values) {
+      // Ignore non-scalar arguments so we can use it in the mixed-scalar-and-array case
+      if (!arg.is_scalar()) continue;
+      const auto& scalar = *arg.scalar();
+      if (!scalar.is_valid) {
+        if (options.skip_nulls) continue;
+        out->is_valid = false;
+        return;
+      }
+      if (!valid) {
+        value = UnboxScalar<OutType>::Unbox(scalar);
+        valid = true;
+      } else {
+        value = Op::Call(value, UnboxScalar<OutType>::Unbox(scalar));
+      }
+    }
+    out->is_valid = valid;
+    if (valid) {
+      BoxScalar<OutType>::Box(value, out);
+    }
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
+    const auto descrs = batch.GetDescriptors();
+    const size_t scalar_count =
+        static_cast<size_t>(std::count_if(batch.values.begin(), batch.values.end(),
+                                          [](const Datum& d) { return d.is_scalar(); }));
+    if (scalar_count == batch.values.size()) {
+      ExecScalar(batch, options, out->scalar().get());
+      return Status::OK();
+    }
+
+    ArrayData* output = out->mutable_array();
+
+    // At least one array, two or more arguments
+    ArrayDataVector arrays;
+    for (const auto& arg : batch.values) {
+      if (!arg.is_array()) continue;
+      arrays.push_back(arg.array());
+    }
+
+    bool initialize_output = true;
+    if (scalar_count > 0) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> temp_scalar,
+                            MakeScalar(out->type(), 0));
+      ExecScalar(batch, options, temp_scalar.get());
+      if (temp_scalar->is_valid) {
+        const auto value = UnboxScalar<OutType>::Unbox(*temp_scalar);
+        initialize_output = false;
+        OutValue* out = output->GetMutableValues<OutValue>(1);
+        std::fill(out, out + batch.length, value);
+      } else if (!options.skip_nulls) {
+        // Abort early
+        ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
+                                                              ctx->memory_pool()));
+        *output = *array->data();
+        return Status::OK();
+      }
+    }
+
+    if (initialize_output) {
+      OutValue* out = output->GetMutableValues<OutValue>(1);
+      std::fill(out, out + batch.length, Op::template antiextreme<OutValue>());
+    }
+
+    // Precompute the validity buffer
+    if (options.skip_nulls && initialize_output) {
+      // OR together the validity buffers of all arrays
+      if (std::all_of(arrays.begin(), arrays.end(),
+                      [](const std::shared_ptr<ArrayData>& arr) {
+                        return arr->MayHaveNulls();
+                      })) {
+        for (const auto& arr : arrays) {
+          if (!arr->MayHaveNulls()) continue;
+          if (!output->buffers[0]) {
+            ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+            ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+
+                                          batch.length,
+                                          output->buffers[0]->mutable_data(),
+                                          /*dest_offset=*/0);
+          } else {
+            ::arrow::internal::BitmapOr(
+                output->buffers[0]->data(), /*left_offset=*/0, arr->buffers[0]->data(),
+                arr->offset, batch.length,
+                /*out_offset=*/0, output->buffers[0]->mutable_data());
+          }
+        }
+      }
+    } else if (!options.skip_nulls) {
+      // AND together the validity buffers of all arrays
+      for (const auto& arr : arrays) {
+        if (!arr->MayHaveNulls()) continue;
+        if (!output->buffers[0]) {
+          ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+          ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+                                        batch.length, output->buffers[0]->mutable_data(),
+                                        /*dest_offset=*/0);
+        } else {
+          ::arrow::internal::BitmapAnd(output->buffers[0]->data(), /*left_offset=*/0,
+                                       arr->buffers[0]->data(), arr->offset, batch.length,
+                                       /*out_offset=*/0,
+                                       output->buffers[0]->mutable_data());
+        }
+      }
+    }
+
+    for (const auto& array : arrays) {
+      OutputArrayWriter<OutType> writer(out->mutable_array());
+      ArrayIterator<OutType> out_it(*output);
+      int64_t index = 0;
+      VisitArrayValuesInline<OutType>(
+          *array,
+          [&](OutValue value) {
+            auto u = out_it();
+            if (!output->buffers[0] ||
+                BitUtil::GetBit(output->buffers[0]->data(), index)) {
+              writer.Write(Op::Call(u, value));
+            } else {
+              writer.Write(value);
+            }
+            index++;
+          },
+          [&]() {
+            // RHS is null, preserve the LHS
+            writer.values++;
+            index++;
+            out_it();
+          });
+    }
+    output->null_count = output->buffers[0] ? -1 : 0;
+    return Status::OK();
+  }
+};
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name,
+                                                 const FunctionDoc* doc) {
+  auto func = std::make_shared<VarArgsCompareFunction>(name, Arity::VarArgs(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  for (const auto& ty : TemporalTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
+  return func;
+}
+
 const FunctionDoc equal_doc{"Compare values for equality (x == y)",
                             ("A null on either side emits a null comparison result."),
                             {"x", "y"}};
@@ -196,6 +463,19 @@ const FunctionDoc less_equal_doc{
     ("A null on either side emits a null comparison result."),
     {"x", "y"}};
 
+const FunctionDoc element_wise_min_doc{
+    "Find the element-wise minimum value",
+    ("Nulls will be ignored (default) or propagated. "
+     "NaN will be taken over null, but not over any valid float."),
+    {"*args"},
+    "ElementWiseAggregateOptions"};
+
+const FunctionDoc element_wise_max_doc{
+    "Find the element-wise maximum value",
+    ("Nulls will be ignored (default) or propagated. "
+     "NaN will be taken over null, but not over any valid float."),
+    {"*args"},
+    "ElementWiseAggregateOptions"};
 }  // namespace
 
 void RegisterScalarComparison(FunctionRegistry* registry) {
@@ -213,6 +493,17 @@ void RegisterScalarComparison(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(less_equal)));
   DCHECK_OK(registry->AddFunction(std::move(greater)));
   DCHECK_OK(registry->AddFunction(std::move(greater_equal)));
+
+  // ----------------------------------------------------------------------
+  // Variadic element-wise functions
+
+  auto element_wise_min =
+      MakeScalarMinMax<Minimum>("element_wise_min", &element_wise_min_doc);
+  DCHECK_OK(registry->AddFunction(std::move(element_wise_min)));
+
+  auto element_wise_max =
+      MakeScalarMinMax<Maximum>("element_wise_max", &element_wise_max_doc);
+  DCHECK_OK(registry->AddFunction(std::move(element_wise_max)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 7b0906395d7..6318a891d3a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -652,5 +652,353 @@ TEST_F(TestStringCompareKernel, RandomCompareArrayArray) {
   }
 }
 
+template <typename T>
+class TestVarArgsCompare : public TestBase {
+ protected:
+  static std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<T>::type_singleton();
+  }
+
+  using VarArgsFunction = std::function<Result<Datum>(
+      const std::vector<Datum>&, ElementWiseAggregateOptions, ExecContext*)>;
+
+  void SetUp() override { equal_options_ = equal_options_.nans_equal(true); }
+
+  Datum scalar(const std::string& value) {
+    return ScalarFromJSON(type_singleton(), value);
+  }
+
+  Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
+
+  Datum Eval(VarArgsFunction func, const std::vector<Datum>& args) {
+    EXPECT_OK_AND_ASSIGN(auto actual,
+                         func(args, element_wise_aggregate_options_, nullptr));
+    if (actual.is_array()) {
+      auto arr = actual.make_array();
+      ARROW_EXPECT_OK(arr->ValidateFull());
+    }
+    return actual;
+  }
+
+  void AssertNullScalar(VarArgsFunction func, const std::vector<Datum>& args) {
+    auto datum = this->Eval(func, args);
+    ASSERT_TRUE(datum.is_scalar());
+    ASSERT_FALSE(datum.scalar()->is_valid);
+  }
+
+  void Assert(VarArgsFunction func, Datum expected, const std::vector<Datum>& args) {
+    auto actual = Eval(func, args);
+    AssertDatumsApproxEqual(expected, actual, /*verbose=*/true, equal_options_);
+  }
+
+  EqualOptions equal_options_ = EqualOptions::Defaults();
+  ElementWiseAggregateOptions element_wise_aggregate_options_;
+};
+
+template <typename T>
+class TestVarArgsCompareNumeric : public TestVarArgsCompare<T> {};
+
+template <typename T>
+class TestVarArgsCompareFloating : public TestVarArgsCompare<T> {};
+
+template <typename T>
+class TestVarArgsCompareParametricTemporal : public TestVarArgsCompare<T> {
+ protected:
+  static std::shared_ptr<DataType> type_singleton() {
+    // Time32 requires second/milli, Time64 requires nano/micro
+    if (TypeTraits<T>::bytes_required(1) == 4) {
+      return std::make_shared<T>(TimeUnit::type::SECOND);
+    } else {
+      return std::make_shared<T>(TimeUnit::type::NANO);
+    }
+  }
+
+  Datum scalar(const std::string& value) {
+    return ScalarFromJSON(type_singleton(), value);
+  }
+
+  Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
+};
+
+using NumericBasedTypes =
+    ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                     Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type>;
+using ParametricTemporalTypes = ::testing::Types<TimestampType, Time32Type, Time64Type>;
+
+TYPED_TEST_SUITE(TestVarArgsCompareNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareFloating, RealArrowTypes);
+TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, ParametricTemporalTypes);
+
+TYPED_TEST(TestVarArgsCompareNumeric, ElementWiseMin) {
+  this->AssertNullScalar(ElementWiseMin, {});
+  this->AssertNullScalar(ElementWiseMin, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(ElementWiseMin, this->scalar("0"), {this->scalar("0")});
+  this->Assert(ElementWiseMin, this->scalar("0"),
+               {this->scalar("2"), this->scalar("0"), this->scalar("1")});
+  this->Assert(
+      ElementWiseMin, this->scalar("0"),
+      {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
+  this->Assert(ElementWiseMin, this->scalar("1"),
+               {this->scalar("null"), this->scalar("null"), this->scalar("1"),
+                this->scalar("null")});
+
+  this->Assert(ElementWiseMin, (this->array("[]")), {this->array("[]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->scalar("2")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
+
+  this->Assert(ElementWiseMin, this->array("[1, 2, null, 6]"),
+               {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, null, 6]"),
+               {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 3, 4]"),
+               {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
+
+  this->Assert(ElementWiseMin, this->array("[1, 1, 1, 1]"),
+               {this->scalar("1"), this->array("[1, 2, 3, 4]")});
+  this->Assert(ElementWiseMin, this->array("[1, 1, 1, 1]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(ElementWiseMin, this->array("[1, 1, 1, 1]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[null, null, null, null]")});
+
+  // Test null handling
+  this->element_wise_aggregate_options_.skip_nulls = false;
+  this->AssertNullScalar(ElementWiseMin, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(ElementWiseMin, {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(ElementWiseMin, this->array("[1, null, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+  this->Assert(ElementWiseMin, this->array("[1, null, 2, 2]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+
+  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+}
+
+TYPED_TEST(TestVarArgsCompareFloating, ElementWiseMin) {
+  auto Check = [this](const std::string& expected,
+                      const std::vector<std::string>& inputs) {
+    std::vector<Datum> args;
+    for (const auto& input : inputs) {
+      args.emplace_back(this->scalar(input));
+    }
+    this->Assert(ElementWiseMin, this->scalar(expected), args);
+
+    args.clear();
+    for (const auto& input : inputs) {
+      args.emplace_back(this->array("[" + input + "]"));
+    }
+    this->Assert(ElementWiseMin, this->array("[" + expected + "]"), args);
+  };
+  Check("-0.0", {"0.0", "-0.0"});
+  Check("-0.0", {"1.0", "-0.0", "0.0"});
+  Check("-1.0", {"-1.0", "-0.0"});
+  Check("0", {"0", "NaN"});
+  Check("0", {"NaN", "0"});
+  Check("Inf", {"Inf", "NaN"});
+  Check("Inf", {"NaN", "Inf"});
+  Check("-Inf", {"-Inf", "NaN"});
+  Check("-Inf", {"NaN", "-Inf"});
+  Check("NaN", {"NaN", "null"});
+  Check("0", {"0", "Inf"});
+  Check("-Inf", {"0", "-Inf"});
+}
+
+TYPED_TEST(TestVarArgsCompareParametricTemporal, ElementWiseMin) {
+  // Temporal kernel is implemented with numeric kernel underneath
+  this->AssertNullScalar(ElementWiseMin, {});
+  this->AssertNullScalar(ElementWiseMin, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(ElementWiseMin, this->scalar("0"), {this->scalar("0")});
+  this->Assert(ElementWiseMin, this->scalar("0"), {this->scalar("2"), this->scalar("0")});
+  this->Assert(ElementWiseMin, this->scalar("0"),
+               {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(ElementWiseMin, (this->array("[]")), {this->array("[]")});
+  this->Assert(ElementWiseMin, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(ElementWiseMin, this->array("[1, 2, 3, 2]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
+}
+
+TYPED_TEST(TestVarArgsCompareNumeric, ElementWiseMax) {
+  this->AssertNullScalar(ElementWiseMax, {});
+  this->AssertNullScalar(ElementWiseMax, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(ElementWiseMax, this->scalar("0"), {this->scalar("0")});
+  this->Assert(ElementWiseMax, this->scalar("2"),
+               {this->scalar("2"), this->scalar("0"), this->scalar("1")});
+  this->Assert(
+      ElementWiseMax, this->scalar("2"),
+      {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
+  this->Assert(ElementWiseMax, this->scalar("1"),
+               {this->scalar("null"), this->scalar("null"), this->scalar("1"),
+                this->scalar("null")});
+
+  this->Assert(ElementWiseMax, (this->array("[]")), {this->array("[]")});
+  this->Assert(ElementWiseMax, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->scalar("2")});
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2")});
+  this->Assert(ElementWiseMax, this->array("[4, 4, 4, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
+
+  this->Assert(ElementWiseMax, this->array("[4, 2, null, 6]"),
+               {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
+  this->Assert(ElementWiseMax, this->array("[4, 2, null, 6]"),
+               {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
+  this->Assert(ElementWiseMax, this->array("[1, 2, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
+  this->Assert(ElementWiseMax, this->array("[1, 2, 3, 4]"),
+               {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
+
+  this->Assert(ElementWiseMax, this->array("[1, 2, 3, 4]"),
+               {this->scalar("1"), this->array("[1, 2, 3, 4]")});
+  this->Assert(ElementWiseMax, this->array("[1, 1, 1, 1]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(ElementWiseMax, this->array("[1, 1, 1, 1]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[null, null, null, null]")});
+
+  // Test null handling
+  this->element_wise_aggregate_options_.skip_nulls = false;
+  this->AssertNullScalar(ElementWiseMax, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(ElementWiseMax, {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(ElementWiseMax, this->array("[4, null, 4, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+  this->Assert(ElementWiseMax, this->array("[2, null, 3, 4]"),
+               {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+
+  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+               {this->scalar("1"), this->array("[null, null, null, null]")});
+  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+               {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+}
+
+TYPED_TEST(TestVarArgsCompareFloating, ElementWiseMax) {
+  auto Check = [this](const std::string& expected,
+                      const std::vector<std::string>& inputs) {
+    std::vector<Datum> args;
+    for (const auto& input : inputs) {
+      args.emplace_back(this->scalar(input));
+    }
+    this->Assert(ElementWiseMax, this->scalar(expected), args);
+
+    args.clear();
+    for (const auto& input : inputs) {
+      args.emplace_back(this->array("[" + input + "]"));
+    }
+    this->Assert(ElementWiseMax, this->array("[" + expected + "]"), args);
+  };
+  Check("0.0", {"0.0", "-0.0"});
+  Check("1.0", {"1.0", "-0.0", "0.0"});
+  Check("-0.0", {"-1.0", "-0.0"});
+  Check("0", {"0", "NaN"});
+  Check("0", {"NaN", "0"});
+  Check("Inf", {"Inf", "NaN"});
+  Check("Inf", {"NaN", "Inf"});
+  Check("-Inf", {"-Inf", "NaN"});
+  Check("-Inf", {"NaN", "-Inf"});
+  Check("NaN", {"NaN", "null"});
+  Check("Inf", {"0", "Inf"});
+  Check("0", {"0", "-Inf"});
+}
+
+TYPED_TEST(TestVarArgsCompareParametricTemporal, ElementWiseMax) {
+  // Temporal kernel is implemented with numeric kernel underneath
+  this->AssertNullScalar(ElementWiseMax, {});
+  this->AssertNullScalar(ElementWiseMax, {this->scalar("null"), this->scalar("null")});
+
+  this->Assert(ElementWiseMax, this->scalar("0"), {this->scalar("0")});
+  this->Assert(ElementWiseMax, this->scalar("2"), {this->scalar("2"), this->scalar("0")});
+  this->Assert(ElementWiseMax, this->scalar("0"),
+               {this->scalar("0"), this->scalar("null")});
+
+  this->Assert(ElementWiseMax, (this->array("[]")), {this->array("[]")});
+  this->Assert(ElementWiseMax, this->array("[1, 2, 3, null]"),
+               {this->array("[1, 2, 3, null]")});
+
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+               {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
+}
+
+TEST(TestElementWiseMaxElementWiseMin, CommonTimestamp) {
+  {
+    auto t1 = std::make_shared<TimestampType>(TimeUnit::SECOND);
+    auto t2 = std::make_shared<TimestampType>(TimeUnit::MILLI);
+    auto expected = MakeScalar(t2, 1000).ValueOrDie();
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         ElementWiseMin({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                                         Datum(MakeScalar(t2, 12000).ValueOrDie())}));
+    AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+  {
+    auto t1 = std::make_shared<Date32Type>();
+    auto t2 = std::make_shared<TimestampType>(TimeUnit::SECOND);
+    auto expected = MakeScalar(t2, 86401).ValueOrDie();
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         ElementWiseMax({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                                         Datum(MakeScalar(t2, 86401).ValueOrDie())}));
+    AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+  {
+    auto t1 = std::make_shared<Date32Type>();
+    auto t2 = std::make_shared<Date64Type>();
+    auto t3 = std::make_shared<TimestampType>(TimeUnit::SECOND);
+    auto expected = MakeScalar(t3, 86400).ValueOrDie();
+    ASSERT_OK_AND_ASSIGN(
+        auto actual, ElementWiseMin({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                                     Datum(MakeScalar(t2, 2 * 86400000).ValueOrDie())}));
+    AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 4e729b055cf..0b54cd3dd0b 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -310,6 +310,21 @@ output element is null.
 | less, less_equal         |            |                                             |                     |
 +--------------------------+------------+---------------------------------------------+---------------------+
 
+These functions take any number of inputs of numeric type (in which case they
+will be cast to the :ref:`common numeric type <common-numeric-type>` before
+comparison) or of temporal types. If any input is dictionary encoded it will be
+expanded for the purposes of comparison.
+
++--------------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+| Function names           | Arity      | Input types                                 | Output type         | Options class                         | Notes |
++==========================+============+=============================================+=====================+=======================================+=======+
+| element_wise_max,        | Varargs    | Numeric and Temporal                        | Numeric or Temporal | :struct:`ElementWiseAggregateOptions` | \(1)  |
+| element_wise_min         |            |                                             |                     |                                       |       |
++--------------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+
+* \(1) By default, nulls are skipped (but the kernel can be configured to propagate nulls).
+  For floating point values, NaN will be taken over null but not over any other value.
+
 Logical functions
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 3010776930f..ccd530073aa 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -75,6 +75,14 @@ they return ``null``.
    less_equal
    not_equal
 
+These functions take any number of arguments of a numeric or temporal type.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   element_wise_max
+   element_wise_min
+
 Logical Functions
 -----------------
 

From 4abd200e0b2df46d0a47b265ad0c981041bb6b4f Mon Sep 17 00:00:00 2001
From: Micah Kornfield <emkornfield@gmail.com>
Date: Fri, 4 Jun 2021 10:03:25 +0800
Subject: [PATCH 342/719] ARROW-12907: [Java] Fix memory leak on
 deserialization errors

Closes #10423 from emkornfield/ARROW-12907

Authored-by: Micah Kornfield <emkornfield@gmail.com>
Signed-off-by: liyafan82 <fan_li_ya@foxmail.com>
---
 .../vector/ipc/message/MessageSerializer.java |  9 +++-
 .../vector/ipc/MessageSerializerTest.java     | 44 ++++++++++++++-----
 2 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
index 5d332eb8f3c..6597e0302c7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
@@ -723,8 +723,13 @@ public static MessageMetadataResult readMessage(ReadChannel in) throws IOExcepti
   public static ArrowBuf readMessageBody(ReadChannel in, long bodyLength,
       BufferAllocator allocator) throws IOException {
     ArrowBuf bodyBuffer = allocator.buffer(bodyLength);
-    if (in.readFully(bodyBuffer, bodyLength) != bodyLength) {
-      throw new IOException("Unexpected end of input trying to read batch.");
+    try {
+      if (in.readFully(bodyBuffer, bodyLength) != bodyLength) {
+        throw new IOException("Unexpected end of input trying to read batch.");
+      }
+    } catch (RuntimeException | IOException e) {
+      bodyBuffer.close();
+      throw e;
     }
     return bodyBuffer;
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
index ae18fab743e..11b8d4fadd1 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -29,6 +30,7 @@
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.channels.Channels;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -72,8 +74,8 @@ private int intToByteRoundtrip(int v, byte[] bytes) {
   @Test
   public void testIntToBytes() {
     byte[] bytes = new byte[4];
-    int[] values = new int[] {1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE};
-    for (int v: values) {
+    int[] values = new int[]{1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE};
+    for (int v : values) {
       assertEquals(intToByteRoundtrip(v, bytes), v);
     }
   }
@@ -157,9 +159,9 @@ public void testSchemaDictionaryMessageSerialization() throws IOException {
 
   @Test
   public void testSerializeRecordBatchV4() throws IOException {
-    byte[] validity = new byte[] {(byte) 255, 0};
+    byte[] validity = new byte[]{(byte) 255, 0};
     // second half is "undefined"
-    byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+    byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
 
     BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
     ArrowBuf validityb = buf(alloc, validity);
@@ -181,10 +183,10 @@ public void testSerializeRecordBatchV4() throws IOException {
   }
 
   @Test
-  public void testSerializeRecordBatchV5() throws IOException {
-    byte[] validity = new byte[] {(byte) 255, 0};
+  public void testSerializeRecordBatchV5() throws Exception {
+    byte[] validity = new byte[]{(byte) 255, 0};
     // second half is "undefined"
-    byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+    byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
 
     BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
     ArrowBuf validityb = buf(alloc, validity);
@@ -197,12 +199,30 @@ public void testSerializeRecordBatchV5() throws IOException {
     IpcOption option = new IpcOption(false, MetadataVersion.V5);
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option);
+    validityb.close();
+    valuesb.close();
+    batch.close();
+
+    {
+      ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+      ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+      ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
+      assertEquals(ArrowRecordBatch.class, deserialized.getClass());
+      verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+      deserialized.close();
+    }
 
-    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
-    ReadChannel channel = new ReadChannel(Channels.newChannel(in));
-    ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
-    assertEquals(ArrowRecordBatch.class, deserialized.getClass());
-    verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+    {
+      byte[] validBytes = out.toByteArray();
+      byte[] missingBytes = Arrays.copyOfRange(validBytes, /*from=*/0, validBytes.length - 1);
+
+      ByteArrayInputStream in = new ByteArrayInputStream(missingBytes);
+      ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+
+      assertThrows(IOException.class, () -> MessageSerializer.deserializeMessageBatch(channel, alloc));
+    }
+
+    alloc.close();
   }
 
   public static Schema testSchema() {

From 54f790c3a8daf62d789197abd375cda6c45df61f Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 4 Jun 2021 05:40:22 +0000
Subject: [PATCH 343/719] ARROW-12961: [Python] Fix MSVC warning building
 PyArrow

I happened to notice this in a VM, though, it's not a big deal as it doesn't fail compilation.

Closes #10447 from lidavidm/arrow-12961

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 python/pyarrow/includes/libarrow.pxd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9184bd5bbfd..b7de65120a3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1877,9 +1877,9 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     cdef cppclass CScalarAggregateOptions \
             "arrow::compute::ScalarAggregateOptions"(CFunctionOptions):
-        CScalarAggregateOptions(c_bool skip_nulls, int64_t min_count)
+        CScalarAggregateOptions(c_bool skip_nulls, uint32_t min_count)
         c_bool skip_nulls
-        int64_t min_count
+        uint32_t min_count
 
     cdef cppclass CModeOptions \
             "arrow::compute::ModeOptions"(CFunctionOptions):

From ca66567febd283fc635da68361a72877a23d0c26 Mon Sep 17 00:00:00 2001
From: crystrix <chenxi.li@live.com>
Date: Fri, 4 Jun 2021 15:11:54 +0900
Subject: [PATCH 344/719] ARROW-11960: [C++][Gandiva] Support escape in LIKE

Add gdv_fn_like_utf8_utf8_int8 function in Gandiva to support escape char in LIKE. An escape char is stored in an int8 type which is compatible with char type in C++.

Closes #9700 from Crystrix/arrow-11960

Authored-by: crystrix <chenxi.li@live.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/gandiva/function_registry_string.cc |  4 +
 cpp/src/gandiva/gdv_function_stubs.cc       | 20 +++++
 cpp/src/gandiva/gdv_function_stubs.h        |  4 +
 cpp/src/gandiva/like_holder.cc              | 43 ++++++++++-
 cpp/src/gandiva/like_holder.h               |  3 +
 cpp/src/gandiva/like_holder_test.cc         | 84 +++++++++++++++++++++
 cpp/src/gandiva/tests/utf8_test.cc          | 43 +++++++++++
 7 files changed, 197 insertions(+), 4 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index cbc70066306..35ef2dfcb34 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -124,6 +124,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "gdv_fn_like_utf8_utf8",
                      NativeFunction::kNeedsFunctionHolder),
 
+      NativeFunction("like", {}, DataTypeVector{utf8(), utf8(), utf8()}, boolean(),
+                     kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8",
+                     NativeFunction::kNeedsFunctionHolder),
+
       NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
                      kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),
 
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index a890775edad..26b8654fb7e 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -45,6 +45,13 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
   return (*holder)(std::string(data, data_len));
 }
 
+bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                                const char* pattern, int pattern_len,
+                                const char* escape_char, int escape_char_len) {
+  gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
+  return (*holder)(std::string(data, data_len));
+}
+
 double gdv_fn_random(int64_t ptr) {
   gandiva::RandomGeneratorHolder* holder =
       reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
@@ -732,6 +739,19 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_like_utf8_utf8));
 
+  // gdv_fn_like_utf8_utf8_utf8
+  args = {types->i64_type(),     // int64_t ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type(),     // int data_len
+          types->i8_ptr_type(),  // const char* pattern
+          types->i32_type(),     // int pattern_len
+          types->i8_ptr_type(),  // const char* escape_char
+          types->i32_type()};    // int escape_char_len
+
+  engine->AddGlobalMappingForFunc("gdv_fn_like_utf8_utf8_utf8",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_utf8));
+
   // gdv_fn_to_date_utf8_utf8
   args = {types->i64_type(),                   // int64_t execution_context
           types->i64_type(),                   // int64_t holder_ptr
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 847772b17a4..d4a127dd1cf 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -46,6 +46,10 @@ using gdv_day_time_interval = int64_t;
 bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
                            const char* pattern, int pattern_len);
 
+bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                                const char* pattern, int pattern_len,
+                                const char* escape_char, int escape_char_len);
+
 int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
                                        int data_len, bool in1_validity,
                                        const char* pattern, int pattern_len,
diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc
index 688a4ffa130..5a3510e3652 100644
--- a/cpp/src/gandiva/like_holder.cc
+++ b/cpp/src/gandiva/like_holder.cc
@@ -67,8 +67,8 @@ static bool IsArrowStringLiteral(arrow::Type::type type) {
 }
 
 Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
-  ARROW_RETURN_IF(node.children().size() != 2,
-                  Status::Invalid("'like' function requires two parameters"));
+  ARROW_RETURN_IF(node.children().size() != 2 && node.children().size() != 3,
+                  Status::Invalid("'like' function requires two or three parameters"));
 
   auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
   ARROW_RETURN_IF(
@@ -80,8 +80,22 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
       !IsArrowStringLiteral(literal_type),
       Status::Invalid(
           "'like' function requires a string literal as the second parameter"));
-
-  return Make(arrow::util::get<std::string>(literal->holder()), holder);
+  if (node.children().size() == 2) {
+    return Make(arrow::util::get<std::string>(literal->holder()), holder);
+  } else {
+    auto escape_char = dynamic_cast<LiteralNode*>(node.children().at(2).get());
+    ARROW_RETURN_IF(
+        escape_char == nullptr,
+        Status::Invalid("'like' function requires a literal as the third parameter"));
+
+    auto escape_char_type = escape_char->return_type()->id();
+    ARROW_RETURN_IF(
+        !IsArrowStringLiteral(escape_char_type),
+        Status::Invalid(
+            "'like' function requires a string literal as the third parameter"));
+    return Make(arrow::util::get<std::string>(literal->holder()),
+                arrow::util::get<std::string>(escape_char->holder()), holder);
+  }
 }
 
 Status LikeHolder::Make(const std::string& sql_pattern,
@@ -97,4 +111,25 @@ Status LikeHolder::Make(const std::string& sql_pattern,
   return Status::OK();
 }
 
+Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escape_char,
+                        std::shared_ptr<LikeHolder>* holder) {
+  ARROW_RETURN_IF(escape_char.length() > 1,
+                  Status::Invalid("The length of escape char ", escape_char,
+                                  " in 'like' function is greater than 1"));
+  std::string pcre_pattern;
+  if (escape_char.length() == 1) {
+    ARROW_RETURN_NOT_OK(
+        RegexUtil::SqlLikePatternToPcre(sql_pattern, escape_char.at(0), pcre_pattern));
+  } else {
+    ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
+  }
+
+  auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h
index 82c9e3b29a6..c7982e91437 100644
--- a/cpp/src/gandiva/like_holder.h
+++ b/cpp/src/gandiva/like_holder.h
@@ -39,6 +39,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
 
   static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder);
 
+  static Status Make(const std::string& sql_pattern, const std::string& escape_char,
+                     std::shared_ptr<LikeHolder>* holder);
+
   // Try and optimise a function node with a "like" pattern.
   static const FunctionNode TryOptimize(const FunctionNode& node);
 
diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/like_holder_test.cc
index ce6697e72d6..18e585fc502 100644
--- a/cpp/src/gandiva/like_holder_test.cc
+++ b/cpp/src/gandiva/like_holder_test.cc
@@ -33,6 +33,16 @@ class TestLikeHolder : public ::testing::Test {
         std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
     return FunctionNode("like", {field, pattern_node}, arrow::boolean());
   }
+
+  FunctionNode BuildLike(std::string pattern, char escape_char) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
+    auto escape_char_node = std::make_shared<LiteralNode>(
+        arrow::int8(), LiteralHolder((int8_t)escape_char), false);
+    return FunctionNode("like", {field, pattern_node, escape_char_node},
+                        arrow::boolean());
+  }
 };
 
 TEST_F(TestLikeHolder, TestMatchAny) {
@@ -125,6 +135,80 @@ TEST_F(TestLikeHolder, TestOptimise) {
 
   fnode = LikeHolder::TryOptimize(BuildLike("x_yz%"));
   EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  // no optimisation for escaped pattern.
+  fnode = LikeHolder::TryOptimize(BuildLike("\\%xyz", '\\'));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+  EXPECT_EQ(fnode.ToString(),
+            "bool like((string) in, (const string) \\%xyz, (const int8) \\)");
+}
+
+TEST_F(TestLikeHolder, TestMatchOneEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab_"));
+
+  EXPECT_FALSE(like("abc"));
+  EXPECT_FALSE(like("abd"));
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestMatchManyEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab%"));
+
+  EXPECT_FALSE(like("abc"));
+  EXPECT_FALSE(like("abd"));
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestMatchEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab\\"));
+
+  EXPECT_FALSE(like("abc"));
 }
 
+TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab\\c"));
+  EXPECT_TRUE(like("ab\\_"));
+
+  EXPECT_FALSE(like("ab\\_d"));
+  EXPECT_FALSE(like("ab__"));
+}
+
+TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc
index 29ce81f4942..01e62a59379 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -221,6 +221,49 @@ TEST_F(TestUtf8, TestLike) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestUtf8, TestLikeWithEscape) {
+  // schema for input fields
+  auto field_a = field("a", utf8());
+  auto schema = arrow::schema({field_a});
+
+  // output fields
+  auto res = field("res", boolean());
+
+  // build expressions.
+  // like(literal(s), a, '\')
+
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto literal_s = TreeExprBuilder::MakeStringLiteral("%pa\\%rk%");
+  auto escape_char = TreeExprBuilder::MakeStringLiteral("\\");
+  auto is_like =
+      TreeExprBuilder::MakeFunction("like", {node_a, literal_s, escape_char}, boolean());
+  auto expr = TreeExprBuilder::MakeExpression(is_like, res);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_a = MakeArrowArrayUtf8(
+      {"park", "spa%rkle", "bright spa%rk and fire", "spark"}, {true, true, true, true});
+
+  // expected output
+  auto exp = MakeArrowArrayBool({false, true, true, false}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
 TEST_F(TestUtf8, TestBeginsEnds) {
   // schema for input fields
   auto field_a = field("a", utf8());

From 60e97272262ad2cc439ffe98502604a7e126726c Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Fri, 4 Jun 2021 12:14:50 +0200
Subject: [PATCH 345/719] ARROW-12911: [Python] Export scalar aggregate options
 to pc.sum

Closes #10433 from cyb70289/12911-py-sum

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/array.pxi             |  5 +++--
 python/pyarrow/compute.py            | 15 ---------------
 python/pyarrow/tests/test_compute.py |  5 +++++
 3 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 278b29000f6..a6c3b3a382f 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -836,11 +836,12 @@ cdef class Array(_PandasConvertible):
             result = GetResultValue(self.ap.View(type.sp_type))
         return pyarrow_wrap_array(result)
 
-    def sum(self):
+    def sum(self, **kwargs):
         """
         Sum the values in a numerical array.
         """
-        return _pc().call_function('sum', [self])
+        options = _pc().ScalarAggregateOptions(**kwargs)
+        return _pc().call_function('sum', [self], options)
 
     def unique(self):
         """
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index c447aa95c5c..2c05d342008 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -373,21 +373,6 @@ def match_substring_regex(array, pattern, *, ignore_case=False):
                          MatchSubstringOptions(pattern, ignore_case))
 
 
-def sum(array):
-    """
-    Sum the values in a numerical (chunked) array.
-
-    Parameters
-    ----------
-    array : pyarrow.Array or pyarrow.ChunkedArray
-
-    Returns
-    -------
-    sum : pyarrow.Scalar
-    """
-    return call_function('sum', [array])
-
-
 def mode(array, n=1):
     """
     Return top-n most common values and number of times they occur in a passed
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index a9a2c0f347d..d7a164fc9dd 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -213,9 +213,13 @@ def test_sum_array(arrow_type):
     arr = pa.array([None], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
     assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert arr.sum(min_count=0).as_py() == 0
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
     arr = pa.array([], type=arrow_type)
     assert arr.sum().as_py() is None  # noqa: E711
+    assert arr.sum(min_count=0).as_py() == 0
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
 
 @pytest.mark.parametrize('arrow_type', numerical_arrow_types)
@@ -238,6 +242,7 @@ def test_sum_chunked_array(arrow_type):
     arr = pa.chunked_array((), type=arrow_type)
     assert arr.num_chunks == 0
     assert pc.sum(arr).as_py() is None  # noqa: E711
+    assert pc.sum(arr, min_count=0).as_py() == 0
 
 
 def test_mode_array():

From 649108e3d8fca2cd056776ff70aede66d57873ab Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Fri, 4 Jun 2021 09:57:05 -0500
Subject: [PATCH 346/719] ARROW-12956: [C++] Fix crash on Parquet file
 (OSS-Fuzz)

Should fix https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34521

Closes #10446 from pitrou/ARROW-12956-parquet-fuzz

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/parquet/file_reader.cc | 10 +++++++++-
 testing                        |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 7ac0c9d86a8..4ff214232e5 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
+#include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 #include "parquet/column_reader.h"
@@ -44,6 +45,8 @@
 #include "parquet/schema.h"
 #include "parquet/types.h"
 
+using arrow::internal::AddWithOverflow;
+
 namespace parquet {
 
 // PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file
@@ -103,13 +106,18 @@ ::arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata,
   }
 
   int64_t col_length = column_metadata->total_compressed_size();
+  int64_t col_end;
+  if (AddWithOverflow(col_start, col_length, &col_end) || col_end > source_size) {
+    throw ParquetException("Invalid column metadata (corrupt file?)");
+  }
+
   // PARQUET-816 workaround for old files created by older parquet-mr
   const ApplicationVersion& version = file_metadata->writer_version();
   if (version.VersionLt(ApplicationVersion::PARQUET_816_FIXED_VERSION())) {
     // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the
     // dictionary page header size in total_compressed_size and total_uncompressed_size
     // (see IMPALA-694). We add padding to compensate.
-    int64_t bytes_remaining = source_size - (col_start + col_length);
+    int64_t bytes_remaining = source_size - col_end;
     int64_t padding = std::min<int64_t>(kMaxDictHeaderSize, bytes_remaining);
     col_length += padding;
   }
diff --git a/testing b/testing
index b658b087767..6d98243093c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit b658b087767b041b2081766814655b4dd5a9a439
+Subproject commit 6d98243093c0b36442da94de7010f3eacc2a9909

From 2e3a25e5f1329929e0fdb88ecc76bf404a5ccf57 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Fri, 4 Jun 2021 08:53:00 -0700
Subject: [PATCH 347/719] ARROW-12761: [R] Better error handling for
 write_to_raw

Closes #10387 from thisisnic/ARROW-12761_error_handling_write_to_raw

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/csv.R                       |  2 +-
 r/R/feather.R                   |  3 ++-
 r/R/ipc_stream.R                |  1 +
 r/R/parquet.R                   |  5 ++++-
 r/R/util.R                      | 16 ++++++++++++++++
 r/tests/testthat/test-csv.R     |  6 ++++--
 r/tests/testthat/test-feather.R |  8 ++++++++
 r/tests/testthat/test-parquet.R |  8 ++++++++
 8 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/r/R/csv.R b/r/R/csv.R
index 70435b7650a..2708a5370f0 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -630,7 +630,7 @@ write_csv_arrow <- function(x,
     x <- Table$create(x)
   }
   
-  assert_is(x, "ArrowTabular")
+  assert_that(is_writable_table(x))
   
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
diff --git a/r/R/feather.R b/r/R/feather.R
index d0f4a7e6257..187a5e06279 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -103,7 +103,8 @@ write_feather <- function(x,
   if (is.data.frame(x) || inherits(x, "RecordBatch")) {
     x <- Table$create(x)
   }
-  assert_is(x, "Table")
+  
+  assert_that(is_writable_table(x))
 
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 0934d0b0100..47ff43d9ac5 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -44,6 +44,7 @@ write_ipc_stream <- function(x, sink, ...) {
   if (is.data.frame(x)) {
     x <- Table$create(x)
   }
+  assert_that(is_writable_table(x))
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
     on.exit(sink$close())
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 9baecb3fb60..a9aef2c4d0d 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -152,9 +152,12 @@ write_parquet <- function(x,
                           properties = NULL,
                           arrow_properties = NULL) {
   x_out <- x
-  if (!inherits(x, "Table")) {
+  
+  if (is.data.frame(x) || inherits(x, "RecordBatch")) {
     x <- Table$create(x)
   }
+  
+  assert_that(is_writable_table(x))
 
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
diff --git a/r/R/util.R b/r/R/util.R
index f5b505f352a..8fbe6c4d080 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -110,3 +110,19 @@ handle_embedded_nul_error <- function(e) {
   }
   stop(e)
 }
+
+is_writable_table <- function(x) {
+  inherits(x, c("data.frame", "ArrowTabular"))
+}
+
+# This attribute is used when is_writable is passed into assert_that, and allows 
+# the call to form part of the error message when is_writable is FALSE
+attr(is_writable_table, "fail") <- function(call, env){
+  paste0(
+    deparse(call$x),
+    " must be an object of class 'data.frame', 'RecordBatch', or 'Table', not '",
+    class(env[[deparse(call$x)]])[[1]], 
+    "'."
+  )
+}
+
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index a61480fb33a..2a62b8c4e34 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -315,9 +315,11 @@ test_that("Write a CSV file with different batch sizes", {
 })
 
 test_that("Write a CSV file with invalid input type", {
+  
+  bad_input <- Array$create(1:5)
   expect_error(
-    write_csv_arrow(Array$create(1:5), csv_file),
-    regexp = 'x must be a "ArrowTabular"'
+    write_csv_arrow(bad_input, csv_file),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
     )
 })
 
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index d5d82a73e12..5a537c7a9bc 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -103,6 +103,14 @@ test_that("write_feather option error handling", {
   expect_false(file.exists(tf))
 })
 
+test_that("write_feather with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_feather(bad_input, feather_file),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
 test_that("read_feather supports col_select = <names>", {
   tab1 <- read_feather(feather_file, col_select = c("x", "y"))
   expect_s3_class(tab1, "data.frame")
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index 14e7aa78e05..2e282a4b5fc 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -106,6 +106,14 @@ test_that("write_parquet() accepts RecordBatch too", {
   expect_equivalent(tab, Table$create(batch))
 })
 
+test_that("write_parquet() with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_parquet(bad_input, tempfile()),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
 test_that("write_parquet() can truncate timestamps", {
   tab <- Table$create(x1 = as.POSIXct("2020/06/03 18:00:00", tz = "UTC"))
   expect_type_equal(tab$x1, timestamp("us", "UTC"))

From 15b7ae1521a5010964aa475e7600dda80c03bc89 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 4 Jun 2021 11:33:18 -0500
Subject: [PATCH 348/719] ARROW-12966: [Python] Expose element_wise_min/max and
 options in Python

Closes #10451 from jorisvandenbossche/ARROW-12966

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.h            | 17 ++++++----
 .../arrow/compute/kernels/scalar_compare.cc   |  6 +++-
 python/pyarrow/_compute.pyx                   | 17 ++++++++++
 python/pyarrow/compute.py                     |  1 +
 python/pyarrow/includes/libarrow.pxd          |  5 +++
 python/pyarrow/tests/test_compute.py          | 34 +++++++++++++++++++
 6 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index ab690f4c456..1d15b629b6c 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -43,7 +43,8 @@ struct ArithmeticOptions : public FunctionOptions {
 };
 
 struct ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
-  ElementWiseAggregateOptions() : skip_nulls(true) {}
+  explicit ElementWiseAggregateOptions(bool skip_nulls = true) : skip_nulls(skip_nulls) {}
+  static ElementWiseAggregateOptions Defaults() { return ElementWiseAggregateOptions{}; }
   bool skip_nulls;
 };
 
@@ -266,9 +267,10 @@ Result<Datum> Power(const Datum& left, const Datum& right,
 /// \param[in] ctx the function execution context, optional
 /// \return the element-wise maximum
 ARROW_EXPORT
-Result<Datum> ElementWiseMax(const std::vector<Datum>& args,
-                             ElementWiseAggregateOptions options = {},
-                             ExecContext* ctx = NULLPTR);
+Result<Datum> ElementWiseMax(
+    const std::vector<Datum>& args,
+    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Find the element-wise minimum of any number of arrays or scalars.
 /// Array values must be the same length.
@@ -278,9 +280,10 @@ Result<Datum> ElementWiseMax(const std::vector<Datum>& args,
 /// \param[in] ctx the function execution context, optional
 /// \return the element-wise minimum
 ARROW_EXPORT
-Result<Datum> ElementWiseMin(const std::vector<Datum>& args,
-                             ElementWiseAggregateOptions options = {},
-                             ExecContext* ctx = NULLPTR);
+Result<Datum> ElementWiseMin(
+    const std::vector<Datum>& args,
+    ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Compare a numeric array with a scalar.
 ///
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 8e9e224bdde..6763b6793f3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -417,7 +417,11 @@ struct ScalarMinMax {
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name,
                                                  const FunctionDoc* doc) {
-  auto func = std::make_shared<VarArgsCompareFunction>(name, Arity::VarArgs(), doc);
+  static auto default_element_wise_aggregate_options =
+      ElementWiseAggregateOptions::Defaults();
+
+  auto func = std::make_shared<VarArgsCompareFunction>(
+      name, Arity::VarArgs(), doc, &default_element_wise_aggregate_options);
   for (const auto& ty : NumericTypes()) {
     auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
     ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 1b62226b2b3..b3d12396b0a 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -650,6 +650,23 @@ class CastOptions(_CastOptions):
         return self
 
 
+cdef class _ElementWiseAggregateOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CElementWiseAggregateOptions] element_wise_aggregate_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.element_wise_aggregate_options.get()
+
+    def _set_options(self, bint skip_nulls):
+        self.element_wise_aggregate_options.reset(
+            new CElementWiseAggregateOptions(skip_nulls))
+
+
+class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
+    def __init__(self, bint skip_nulls=True):
+        self._set_options(skip_nulls)
+
+
 cdef class _MatchSubstringOptions(FunctionOptions):
     cdef:
         unique_ptr[CMatchSubstringOptions] match_substring_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 2c05d342008..37451c439ce 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -32,6 +32,7 @@
     ArraySortOptions,
     CastOptions,
     DictionaryEncodeOptions,
+    ElementWiseAggregateOptions,
     ExtractRegexOptions,
     FilterOptions,
     IndexOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index b7de65120a3..eb7e27d60bf 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1781,6 +1781,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     CFunctionRegistry* GetFunctionRegistry()
 
+    cdef cppclass CElementWiseAggregateOptions \
+            "arrow::compute::ElementWiseAggregateOptions"(CFunctionOptions):
+        CElementWiseAggregateOptions(c_bool skip_nulls)
+        c_bool skip_nulls
+
     cdef cppclass CMatchSubstringOptions \
             "arrow::compute::MatchSubstringOptions"(CFunctionOptions):
         CMatchSubstringOptions(c_string pattern, c_bool ignore_case)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index d7a164fc9dd..7444c24ccf2 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1356,3 +1356,37 @@ def test_fill_null_segfault():
     arr = pa.array([None], pa.bool_()).fill_null(False)
     result = arr.cast(pa.int8())
     assert result == pa.array([0], pa.int8())
+
+
+def test_elementwise_min_max():
+    arr1 = pa.array([1, 2, 3])
+    arr2 = pa.array([3, 1, 2])
+    arr3 = pa.array([2, 3, None])
+
+    result = pc.element_wise_max(arr1, arr2)
+    assert result == pa.array([3, 2, 3])
+    result = pc.element_wise_min(arr1, arr2)
+    assert result == pa.array([1, 1, 2])
+
+    result = pc.element_wise_max(arr1, arr2, arr3)
+    assert result == pa.array([3, 3, 3])
+    result = pc.element_wise_min(arr1, arr2, arr3)
+    assert result == pa.array([1, 1, 2])
+
+    # with specifying the option
+    result = pc.element_wise_max(arr1, arr3, skip_nulls=True)
+    assert result == pa.array([2, 3, 3])
+    result = pc.element_wise_min(arr1, arr3, skip_nulls=True)
+    assert result == pa.array([1, 2, 3])
+    result = pc.element_wise_max(
+        arr1, arr3, options=pc.ElementWiseAggregateOptions())
+    assert result == pa.array([2, 3, 3])
+    result = pc.element_wise_min(
+        arr1, arr3, options=pc.ElementWiseAggregateOptions())
+    assert result == pa.array([1, 2, 3])
+
+    # not skipping nulls
+    result = pc.element_wise_max(arr1, arr3, skip_nulls=False)
+    assert result == pa.array([2, 3, None])
+    result = pc.element_wise_min(arr1, arr3, skip_nulls=False)
+    assert result == pa.array([1, 2, None])

From 788dbbc14f07fd9afc76a15ae17efb74ac5936f6 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Fri, 4 Jun 2021 13:11:41 -0700
Subject: [PATCH 349/719] ARROW-12791: [R] Better error handling for
 DatasetFactory$Finish() when no format specified

Closes #10326 from thisisnic/ARROW-12791_schema_error

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NAMESPACE                     |  1 +
 r/R/arrow-package.R             |  2 +-
 r/R/dataset.R                   | 25 ++++++++++++++++++++++---
 r/R/util.R                      | 13 +++++++++++++
 r/man/open_dataset.Rd           | 16 ++++++++++++++++
 r/tests/testthat/test-dataset.R | 16 ++++++++++++++++
 6 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 991e384723e..f298ba905ee 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -312,6 +312,7 @@ importFrom(rlang,eval_tidy)
 importFrom(rlang,exec)
 importFrom(rlang,expr)
 importFrom(rlang,is_bare_character)
+importFrom(rlang,is_character)
 importFrom(rlang,is_false)
 importFrom(rlang,is_integerish)
 importFrom(rlang,is_quosure)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index c263d20f8df..684382039f1 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -19,7 +19,7 @@
 #' @importFrom R6 R6Class
 #' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 8716ef0d5c5..095c56fc891 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -60,6 +60,18 @@
 #' be slow) but `TRUE` when `sources` is a list of `Dataset`s (because there
 #' should be few `Dataset`s in the list and their `Schema`s are already in
 #' memory).
+#' @param format A [FileFormat] object, or a string identifier of the format of
+#' the files in `x`. This argument is ignored when `sources` is a list of `Dataset` objects.
+#' Currently supported values:
+#' * "parquet"
+#' * "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+#'   only version 2 files are supported
+#' * "csv"/"text", aliases for the same thing (because comma is the default
+#'   delimiter for text files
+#' * "tsv", equivalent to passing `format = "text", delimiter = "\t"`
+#'
+#' Default is "parquet", unless a `delimiter` is also specified, in which case
+#' it is assumed to be "text".
 #' @param ... additional arguments passed to `dataset_factory()` when `sources`
 #' is a directory path/URI or vector of file paths/URIs, otherwise ignored.
 #' These may include `format` to indicate the file format, or other
@@ -97,6 +109,7 @@ open_dataset <- function(sources,
                          schema = NULL,
                          partitioning = hive_partition(),
                          unify_schemas = NULL,
+                         format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
                          ...) {
   if (is_list_of(sources, "Dataset")) {
     if (is.null(schema)) {
@@ -116,9 +129,15 @@ open_dataset <- function(sources,
     })
     return(dataset___UnionDataset__create(sources, schema))
   }
-  factory <- DatasetFactory$create(sources, partitioning = partitioning, ...)
-  # Default is _not_ to inspect/unify schemas
-  factory$Finish(schema, isTRUE(unify_schemas))
+  
+  factory <- DatasetFactory$create(sources, partitioning = partitioning, format = format, ...)
+  tryCatch(
+    # Default is _not_ to inspect/unify schemas
+    factory$Finish(schema, isTRUE(unify_schemas)),
+    error = function(e){
+      handle_parquet_io_error(e, format)
+    }
+  )
 }
 
 #' Multi-file datasets
diff --git a/r/R/util.R b/r/R/util.R
index 8fbe6c4d080..8d1f51bd079 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -111,6 +111,19 @@ handle_embedded_nul_error <- function(e) {
   stop(e)
 }
 
+handle_parquet_io_error <- function(e, format) {
+  msg <- conditionMessage(e)
+  if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) {
+    # If length(format) > 1, that means it is (almost certainly) the default/not specified value
+    # so let the user know that they should specify the actual (not parquet) format
+    abort(c(
+      msg, 
+      i = "Did you mean to specify a 'format' other than the default (parquet)?"
+    ))
+  }
+  stop(e)
+}
+
 is_writable_table <- function(x) {
   inherits(x, c("data.frame", "ArrowTabular"))
 }
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 7175bb132ea..adc0b56eac4 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -9,6 +9,7 @@ open_dataset(
   schema = NULL,
   partitioning = hive_partition(),
   unify_schemas = NULL,
+  format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
   ...
 )
 }
@@ -58,6 +59,21 @@ be slow) but \code{TRUE} when \code{sources} is a list of \code{Dataset}s (becau
 should be few \code{Dataset}s in the list and their \code{Schema}s are already in
 memory).}
 
+\item{format}{A \link{FileFormat} object, or a string identifier of the format of
+the files in \code{x}. This argument is ignored when \code{sources} is a list of \code{Dataset} objects.
+Currently supported values:
+\itemize{
+\item "parquet"
+\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+only version 2 files are supported
+\item "csv"/"text", aliases for the same thing (because comma is the default
+delimiter for text files
+\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"}
+}
+
+Default is "parquet", unless a \code{delimiter} is also specified, in which case
+it is assumed to be "text".}
+
 \item{...}{additional arguments passed to \code{dataset_factory()} when \code{sources}
 is a directory path/URI or vector of file paths/URIs, otherwise ignored.
 These may include \code{format} to indicate the file format, or other
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index be141c74659..7c8ab5e47af 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1502,3 +1502,19 @@ test_that("Collecting zero columns from a dataset doesn't return entire dataset"
     c(32, 0)
   )
 })
+
+# see https://issues.apache.org/jira/browse/ARROW-12791
+test_that("Error if no format specified and files are not parquet", {
+  skip_if_not_available("parquet")
+  expect_error(
+    open_dataset(csv_dir, partitioning = "part"),
+    "Did you mean to specify a 'format' other than the default (parquet)?",
+    fixed = TRUE
+  )
+  expect_failure(
+    expect_error(
+      open_dataset(csv_dir, partitioning = "part", format = "parquet"),
+      "Did you mean to specify a 'format'"
+    )
+  )
+})

From 99fd3b86f5cf352d9f4df0f779f5c1f800401c01 Mon Sep 17 00:00:00 2001
From: Alenka Frim <frim.alenka@gmail.com>
Date: Fri, 4 Jun 2021 15:13:32 -0500
Subject: [PATCH 350/719] ARROW-12198: [R] bindings for strptime

Closes #10334 from AlenkaF/ARROW-12198

Lead-authored-by: Alenka Frim <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/dplyr-functions.R                         | 17 +++++
 r/src/compute.cpp                             |  7 ++
 .../testthat/test-dplyr-string-functions.R    | 75 +++++++++++++++++++
 3 files changed, 99 insertions(+)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index e62f3e93007..91d1b21ad88 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -425,3 +425,20 @@ get_stringr_pattern_options <- function(pattern) {
 contains_regex <- function(string) {
   grepl("[.\\|()[{^$*+?]", string)
 }
+
+nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit = "ms") {
+  # Arrow uses unit for time parsing, strptime() does not.
+  # Arrow has no default option for strptime (format, unit),
+  # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms",
+  # (ARROW-12809)
+
+  # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820).
+  # Stop if tz is provided.
+  if (is.character(tz)) {
+    arrow_not_supported("Time zone argument")
+  }
+
+  unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units))
+
+  Expression$create("strptime", x, options = list(format = format, unit = unit))
+}
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 26f0752d847..eab9db54134 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -239,6 +239,13 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_replacements);
   }
 
+  if (func_name == "strptime") {
+    using Options = arrow::compute::StrptimeOptions;
+    return std::make_shared<Options>(
+        cpp11::as_cpp<std::string>(options["format"]),
+        cpp11::as_cpp<arrow::TimeUnit::type>(options["unit"]));
+  }
+
   if (func_name == "split_pattern" || func_name == "split_pattern_regex") {
     using Options = arrow::compute::SplitPatternOptions;
     int64_t max_splits = -1;
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index bb4794ef4c5..ea27aa14777 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -493,3 +493,78 @@ test_that("edge cases in string detection and replacement", {
     tibble(x = c("ABC"))
   )
 })
+
+test_that("strptime", {
+
+  t_string <- tibble(x = c("2018-10-07 19:04:05", NA))
+  t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA))
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x)
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S")
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "ns")
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "s")
+      ) %>%
+      collect(),
+    t_stamp,
+    check.tzone = FALSE
+  )
+
+  tstring <- tibble(x = c("08-05-2008", NA))
+  tstamp <- tibble(x = c(strptime("08-05-2008", format = "%m-%d-%Y"), NA))
+
+  expect_equal(
+    tstring %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%m-%d-%Y")
+      ) %>%
+      collect(),
+    tstamp,
+    check.tzone = FALSE
+  )
+
+})
+
+test_that("errors in strptime", {
+  # Error when tz is passed
+
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$strptime(x, tz = "PDT"),
+    'Time zone argument not supported by Arrow'
+  )
+})

From 3835dd5c2beb114a09c8e648ef7754d27c9b06aa Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Fri, 4 Jun 2021 15:21:56 -0500
Subject: [PATCH 351/719] ARROW-12968: [R] [CI] Add an rchk job to our
 nightlies

This is failing, though we have ARROW-12957 #10449 for that

Closes #10456 from jonkeane/ARROW-12968-rchk-CI

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 dev/tasks/r/github.linux.rchk.yml | 77 +++++++++++++++++++++++++++++++
 dev/tasks/tasks.yml               |  4 ++
 2 files changed, 81 insertions(+)
 create mode 100644 dev/tasks/r/github.linux.rchk.yml

diff --git a/dev/tasks/r/github.linux.rchk.yml b/dev/tasks/r/github.linux.rchk.yml
new file mode 100644
index 00000000000..49b819efcfa
--- /dev/null
+++ b/dev/tasks/r/github.linux.rchk.yml
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  as-cran:
+    name: "rchk"
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+    env:
+      ARROW_R_DEV: "FALSE"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - name: Free Up Disk Space
+        shell: bash
+        run: arrow/ci/scripts/util_cleanup.sh
+      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-pandoc@v1
+      - name: Install dependencies
+        run: |
+          install.packages("remotes")
+          remotes::install_deps("arrow/r", dependencies = TRUE)
+        shell: Rscript {0}
+      - name: Build arrow package
+        run: |
+          R CMD build arrow/r
+          mkdir packages
+          mv arrow_*.tar.gz packages
+      - name: rchk
+        run: |
+          docker run -v `pwd`/packages:/rchk/packages kalibera/rchk:latest /rchk/packages/arrow_*.tar.gz |& tee rchk.out
+      - name: Confirm that rchk has no errors
+        # Suspicious call, [UP], and [PB] are all of the error types currently at
+        # https://github.com/kalibera/cran-checks/tree/master/rchk/results
+        # though this might not be exhaustive, there does not appear to be a way to have rchk return an error code
+        # CRAN also will remove some of the outputs (especially those related to Rcpp and strptime, e.g.
+        # ERROR: too many states (abstraction error?))
+        # https://github.com/kalibera/rchk
+        run: |
+          if [ $(grep -c "Suspicious call" rchk.out) -gt 0 ] || [ $(grep -c "\[UP\]" rchk.out) -gt 0 ] || [ $(grep -c "\[PB\]" rchk.out) -gt 0 ]; then
+            echo "Found rchk errors"
+            cat rchk.out
+            exit 1
+          fi
+        if: always()
+      - name: Dump rchk output logs
+        run: cat rchk.out
+        if: always()
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 23ff18d6dec..7e3fe74bbbb 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -847,6 +847,10 @@ tasks:
         UBUNTU: 18.04
       run: ubuntu-r-valgrind
 
+  test-r-linux-rchk:
+    ci: github
+    template: r/github.linux.rchk.yml
+
   test-r-linux-as-cran:
     ci: github
     template: r/github.linux.cran.yml

From 98f352ed8325e9d9c01f9d840dec221311f7fb22 Mon Sep 17 00:00:00 2001
From: Romain Francois <romain@rstudio.com>
Date: Fri, 4 Jun 2021 15:58:55 -0500
Subject: [PATCH 352/719] ARROW-12957: [R] rchk issues on cran

This should fix the issues reported by rchk

Closes #10449 from romainfrancois/ARROW_12957_rchk

Authored-by: Romain Francois <romain@rstudio.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/src/arrow_cpp11.h | 1 -
 r/src/symbols.cpp   | 9 ++++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index 1d0e26e1a38..f8dc2bc7322 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -141,7 +141,6 @@ struct data {
   static SEXP classes_ordered;
 
   static SEXP names_metadata;
-  static SEXP empty_raw;
 };
 
 struct ns {
diff --git a/r/src/symbols.cpp b/r/src/symbols.cpp
index 256f9e7acce..49f18271b57 100644
--- a/r/src/symbols.cpp
+++ b/r/src/symbols.cpp
@@ -36,15 +36,19 @@ SEXP symbols::create = Rf_install("create");
 
 // persistently protect `x` and return it
 SEXP precious(SEXP x) {
+  PROTECT(x);
   R_PreserveObject(x);
+  UNPROTECT(1);
   return x;
 }
 
 // returns the namespace environment for package `name`
-SEXP r_namespace(std::string name) {
+SEXP precious_namespace(std::string name) {
   SEXP s_name = PROTECT(cpp11::writable::strings({name}));
   SEXP ns = R_FindNamespace(s_name);
+  R_PreserveObject(ns);
   UNPROTECT(1);
+
   return ns;
 }
 SEXP data::classes_POSIXct = precious(cpp11::writable::strings({"POSIXct", "POSIXt"}));
@@ -71,9 +75,8 @@ SEXP data::classes_arrow_fixed_size_list = precious(cpp11::writable::strings(
     {"arrow_fixed_size_list", "vctrs_list_of", "vctrs_vctr", "list"}));
 
 SEXP data::names_metadata = precious(cpp11::writable::strings({"attributes", "columns"}));
-SEXP data::empty_raw = precious(Rf_allocVector(RAWSXP, 0));
 
-SEXP ns::arrow = precious(r_namespace("arrow"));
+SEXP ns::arrow = precious_namespace("arrow");
 
 void inspect(SEXP obj) {
   SEXP call_inspect = PROTECT(Rf_lang2(symbols::inspect, obj));

From f0c4854412c768a3630b5dc272bdd2040abe4c83 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Sun, 6 Jun 2021 09:33:59 -0700
Subject: [PATCH 353/719] ARROW-12974: [R] test-r-without-arrow build fails
 because of example requiring Arrow

Closes #10455 from thisisnic/ARROW-12974_chunkedarray_bug

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/array.R                     | 2 +-
 r/R/buffer.R                    | 2 +-
 r/R/chunked-array.R             | 2 +-
 r/R/compression.R               | 2 +-
 r/R/compute.R                   | 6 +++---
 r/R/filesystem.R                | 6 ++----
 r/R/flight.R                    | 4 ++--
 r/R/ipc_stream.R                | 4 ++--
 r/R/scalar.R                    | 2 +-
 r/R/type.R                      | 2 +-
 r/man/ChunkedArray.Rd           | 6 ++++--
 r/man/Scalar.Rd                 | 2 ++
 r/man/array.Rd                  | 3 ++-
 r/man/buffer.Rd                 | 2 ++
 r/man/codec_is_available.Rd     | 2 ++
 r/man/list_compute_functions.Rd | 2 ++
 r/man/load_flight_server.Rd     | 4 +++-
 r/man/match_arrow.Rd            | 2 ++
 r/man/s3_bucket.Rd              | 6 +++---
 r/man/type.Rd                   | 4 +++-
 r/man/value_counts.Rd           | 2 ++
 r/man/write_ipc_stream.Rd       | 2 ++
 r/man/write_to_raw.Rd           | 2 ++
 23 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/r/R/array.R b/r/R/array.R
index 0f65743d44d..0a117e5e74f 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -84,7 +84,7 @@
 #'
 #' @rdname array
 #' @name array
-#' @examples
+#' @examplesIf arrow_available()
 #' my_array <- Array$create(1:10)
 #' my_array$type
 #' my_array$cast(int8())
diff --git a/r/R/buffer.R b/r/R/buffer.R
index 78c6dc666b8..8ea0d74cdae 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -32,7 +32,7 @@
 #'
 #' @rdname buffer
 #' @name buffer
-#' @examples
+#' @examplesIf arrow_available()
 #' my_buffer <- buffer(c(1, 2, 3, 4))
 #' my_buffer$is_mutable
 #' my_buffer$ZeroPadding()
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index 61093e203e7..fac1eeba2b1 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -57,7 +57,7 @@
 #' @rdname ChunkedArray
 #' @name ChunkedArray
 #' @seealso [Array]
-#' @examples
+#' @examplesIf arrow_available()
 #' # Pass items into chunked_array as separate objects to create chunks
 #' class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
 #' class_scores$num_chunks
diff --git a/r/R/compression.R b/r/R/compression.R
index bb051b8d535..8fd709f4fda 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -63,7 +63,7 @@ Codec$create <- function(type = "gzip", compression_level = NA) {
 #' "zstd", "lz4", "lzo", or "bz2", case insensitive.
 #' @return Logical: is `type` available?
 #' @export
-#' @examples
+#' @examplesIf arrow_available()
 #' codec_is_available("gzip")
 codec_is_available <- function(type) {
   util___Codec__IsAvailable(compression_from_name(type))
diff --git a/r/R/compute.R b/r/R/compute.R
index f9da04c9b03..4d36f6057b6 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -83,7 +83,7 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
 #' @param pattern Optional regular expression to filter the function list
 #' @param ... Additional parameters passed to `grep()`
 #' @return A character vector of available Arrow C++ function names
-#' @examples
+#' @examplesIf arrow_available()
 #' list_compute_functions() 
 #' list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 #' list_compute_functions(pattern = "^is", invert = TRUE)
@@ -242,7 +242,7 @@ all.ArrowDatum <- function(..., na.rm = FALSE){
 #' and type as `x` with the (0-based) indexes into `table`. `is_in()` returns a
 #' `boolean`-type Arrow object of the same length and type as `x` with values indicating
 #' per element of `x` it it is present in `table`.
-#' @examples
+#' @examplesIf arrow_available()
 #' # note that the returned value is 0-indexed
 #' cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
 #' match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
@@ -296,7 +296,7 @@ is_in <- function(x, table, ...) {
 #' @param x `Array` or `ChunkedArray`
 #' @return A `StructArray` containing "values" (same type as `x`) and "counts"
 #' `Int64`.
-#' @examples
+#' @examplesIf arrow_available()
 #' cyl_vals <- Array$create(mtcars$cyl)
 #' value_counts(cyl_vals)
 #' @export
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index a42cf92b628..6761acab30e 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -398,10 +398,8 @@ default_s3_options <- list(
 #' @return A `SubTreeFileSystem` containing an `S3FileSystem` and the bucket's
 #' relative path. Note that this function's success does not guarantee that you
 #' are authorized to access the bucket's contents.
-#' @examples
-#' if (arrow_with_s3()) {
-#'   bucket <- s3_bucket("ursa-labs-taxi-data")
-#' }
+#' @examplesIf arrow_with_s3()
+#' bucket <- s3_bucket("ursa-labs-taxi-data")
 #' @export
 s3_bucket <- function(bucket, ...) {
   assert_that(is.string(bucket))
diff --git a/r/R/flight.R b/r/R/flight.R
index b3b354e82f6..0143dc5b9ef 100644
--- a/r/R/flight.R
+++ b/r/R/flight.R
@@ -21,8 +21,8 @@
 #' @param path file system path where the Python module is found. Default is
 #' to look in the `inst/` directory for included modules.
 #' @export
-#' @examples
-#' \dontrun{load_flight_server("demo_flight_server")}
+#' @examplesIf FALSE
+#' load_flight_server("demo_flight_server")
 load_flight_server <- function(name, path = system.file(package = "arrow")) {
   reticulate::import_from_path(name, path)
 }
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 47ff43d9ac5..2a489c03cae 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -35,7 +35,7 @@
 #' serialize data to a buffer.
 #' [RecordBatchWriter] for a lower-level interface.
 #' @export
-#' @examples 
+#' @examplesIf arrow_available() 
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_ipc_stream(mtcars, tf)
@@ -65,7 +65,7 @@ write_ipc_stream <- function(x, sink, ...) {
 #' @inheritParams write_feather
 #' @param format one of `c("stream", "file")`, indicating the IPC format to use
 #' @return A `raw` vector containing the bytes of the IPC serialized data.
-#' @examples
+#' @examplesIf arrow_available()
 #' # The default format is "stream"
 #' write_to_raw(mtcars)
 #' write_to_raw(mtcars, format = "file")
diff --git a/r/R/scalar.R b/r/R/scalar.R
index 40e9c65ce71..01a50b0f358 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -36,7 +36,7 @@
 #'
 #' @name Scalar
 #' @rdname Scalar
-#' @examples 
+#' @examplesIf arrow_available()
 #' Scalar$create(pi)
 #' Scalar$create(404)
 #' # If you pass a vector into Scalar$create, you get a list containing your items
diff --git a/r/R/type.R b/r/R/type.R
index a22323c4ba1..0b9e1dbd03c 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -57,7 +57,7 @@ FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double"
 #' @param x an R vector
 #'
 #' @return an arrow logical type
-#' @examples
+#' @examplesIf arrow_available()
 #' type(1:10)
 #' type(1L:10L)
 #' type(c(1, 1.5, 2))
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index b0058bbac8f..eaae0b3d4b8 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -54,24 +54,26 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Pass items into chunked_array as separate objects to create chunks
 class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
 class_scores$num_chunks
 
 # When taking a Slice from a chunked_array, chunks are preserved
-class_scores$Slice(2,length = 5)
+class_scores$Slice(2, length = 5)
 
 # You can combine Take and SortIndices to return a ChunkedArray with 1 chunk 
 # containing all values, ordered.
 class_scores$Take(class_scores$SortIndices(descending = TRUE))
 
 # If you pass a list into chunked_array, you get a list of length 1
-list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2,8.3,8.4), c(10.0, 9.9, 9.8)))
+list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
 list_scores$num_chunks
 
 # When constructing a ChunkedArray, the first chunk is used to infer type.
 doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
 doubles$type
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \link{Array}
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index 1c115b7c199..21e04c12e08 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -20,6 +20,7 @@ A \code{Scalar} holds a single value of an Arrow type.
 }
 
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 Scalar$create(pi)
 Scalar$create(404)
 # If you pass a vector into Scalar$create, you get a list containing your items
@@ -33,4 +34,5 @@ my_scalar$Equals(Scalar$create(99.000009)) # FALSE
 my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
 
 my_scalar$ToString()
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/array.Rd b/r/man/array.Rd
index 34f106c0cfa..0c1aed407ac 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -83,6 +83,7 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 my_array <- Array$create(1:10)
 my_array$type
 my_array$cast(int8())
@@ -102,5 +103,5 @@ new_array$offset
 na_array2 = na_array
 na_array2 == na_array # element-wise comparison
 na_array2$Equals(na_array) # overall comparison 
-
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index 08d66ece5dc..a3ca1fc2fcb 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -34,9 +34,11 @@ contiguous memory with a particular size.
 }
 
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 my_buffer <- buffer(c(1, 2, 3, 4))
 my_buffer$is_mutable
 my_buffer$ZeroPadding()
 my_buffer$size
 my_buffer$capacity
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index 5cda813f416..b3238ff1dca 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -19,5 +19,7 @@ the Arrow C++ library. This function lets you know which are available for
 use.
 }
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 codec_is_available("gzip")
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index 18c2aa35fab..668e090c0ca 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -38,7 +38,9 @@ be called directly on Arrow objects, as well as some tidyverse-flavored versions
 available inside \code{dplyr} verbs.
 }
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 list_compute_functions() 
 list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 list_compute_functions(pattern = "^is", invert = TRUE)
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd
index e521efa3328..66d30f39147 100644
--- a/r/man/load_flight_server.Rd
+++ b/r/man/load_flight_server.Rd
@@ -16,5 +16,7 @@ to look in the \verb{inst/} directory for included modules.}
 Load a Python Flight server
 }
 \examples{
-\dontrun{load_flight_server("demo_flight_server")}
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+load_flight_server("demo_flight_server")
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index 9b863254d1a..d63ef3eed87 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -27,6 +27,7 @@ per element of \code{x} it it is present in \code{table}.
 it. This function exposes the analogous functions in the Arrow C++ library.
 }
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # note that the returned value is 0-indexed
 cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
 match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
@@ -48,4 +49,5 @@ is_in(c(4, 6, 8), mtcars$cyl) # returns vector
 is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
 is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
 is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd
index 7791e9bc5f2..95a086deae5 100644
--- a/r/man/s3_bucket.Rd
+++ b/r/man/s3_bucket.Rd
@@ -22,7 +22,7 @@ that automatically detects the bucket's AWS region and holding onto the its
 relative path.
 }
 \examples{
-if (arrow_with_s3()) {
-  bucket <- s3_bucket("ursa-labs-taxi-data")
-}
+\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bucket <- s3_bucket("ursa-labs-taxi-data")
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 7ef8ea60ec0..d55bbe24bd5 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -16,10 +16,12 @@ an arrow logical type
 infer the arrow Array type from an R vector
 }
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 type(1:10)
 type(1L:10L)
-type(c(1,1.5,2))
+type(c(1, 1.5, 2))
 type(c("A", "B", "C"))
 type(mtcars)
 type(Sys.Date())
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index e8023c2fd3f..6ef77cd4727 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -17,6 +17,8 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
 This function tabulates the values in the array and returns a table of counts.
 }
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 cyl_vals <- Array$create(mtcars$cyl)
 value_counts(cyl_vals)
+\dontshow{\}) # examplesIf}
 }
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index a504a31c304..888d947eb99 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -32,9 +32,11 @@ the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
 \examples{
+\dontshow{if (arrow_available() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 tf <- tempfile()
 on.exit(unlink(tf))
 write_ipc_stream(mtcars, tf)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index aa682c09a76..1f507e384c3 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -21,7 +21,9 @@ This function wraps those so that you can serialize data to a buffer and
 access that buffer as a \code{raw} vector in R.
 }
 \examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # The default format is "stream"
 write_to_raw(mtcars)
 write_to_raw(mtcars, format = "file")
+\dontshow{\}) # examplesIf}
 }

From 3e3ab3bee8a83bb10033a03f0e05d875aee5bf8c Mon Sep 17 00:00:00 2001
From: crystrix <chenxi.li@live.com>
Date: Mon, 7 Jun 2021 07:28:55 +0000
Subject: [PATCH 354/719] ARROW-12984: [C++][Compute] Passing options parameter
 of Count/Index aggregation by reference

The options parameter of `Count` function is passed by value, it's better to be passed by reference like other aggregation functions.

Closes #10459 from Crystrix/arrow-12984

Authored-by: crystrix <chenxi.li@live.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/api_aggregate.cc |  4 ++--
 cpp/src/arrow/compute/api_aggregate.h  | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index 967c8179da7..efff4ac67df 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -25,7 +25,7 @@ namespace compute {
 // ----------------------------------------------------------------------
 // Scalar aggregates
 
-Result<Datum> Count(const Datum& value, ScalarAggregateOptions options,
+Result<Datum> Count(const Datum& value, const ScalarAggregateOptions& options,
                     ExecContext* ctx) {
   return CallFunction("count", {value}, &options, ctx);
 }
@@ -77,7 +77,7 @@ Result<Datum> TDigest(const Datum& value, const TDigestOptions& options,
   return CallFunction("tdigest", {value}, &options, ctx);
 }
 
-Result<Datum> Index(const Datum& value, IndexOptions options, ExecContext* ctx) {
+Result<Datum> Index(const Datum& value, const IndexOptions& options, ExecContext* ctx) {
   return CallFunction("index", {value}, &options, ctx);
 }
 
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index d781bbb6205..121896f1c97 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -145,9 +145,10 @@ struct ARROW_EXPORT IndexOptions : public FunctionOptions {
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Count(const Datum& datum,
-                    ScalarAggregateOptions options = ScalarAggregateOptions::Defaults(),
-                    ExecContext* ctx = NULLPTR);
+Result<Datum> Count(
+    const Datum& datum,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Compute the mean of a numeric array.
 ///
@@ -310,7 +311,8 @@ Result<Datum> TDigest(const Datum& value,
 /// \since 5.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Index(const Datum& value, IndexOptions options, ExecContext* ctx = NULLPTR);
+Result<Datum> Index(const Datum& value, const IndexOptions& options,
+                    ExecContext* ctx = NULLPTR);
 
 namespace internal {
 

From f1a7b0d765ad925cc764ebd3e512f02bcdedfd41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Mon, 7 Jun 2021 15:52:06 +0530
Subject: [PATCH 355/719] ARROW-12205: [C++][Gandiva] Implement
 TO_TIME([number] seconds) and TO_TIMESTAMP([number] seconds) function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ImpleImplement TO_TIME([number] seconds) and TO_TIMESTAMP([number] seconds) function

Closes #9890 from jpedroantunes/feature/to-timestamp-function and squashes the following commits:

315a6db80 <Anthony Louis> Change return type of to_time functions
31fdbd2a0 <João Pedro> Correct values used on to time test
ec3919f3c <João Pedro Antunes Ferreira> Correct comments on to_time test
7421142be <João Pedro> Apply corrections on to time tests to try to  handle floating points
0df0a3b11 <João Pedro> Define input timestamp comments
805a2d909 <João Pedro> Apply corrections on to time tests to handle floating points
93f8ec536 <João Pedro> Correct dev lint options
4999fd534 <João Pedro> Correct dev lint options
f117af642 <João Pedro> Add necessary static casts on tests
a6a396e25 <João Pedro> Add case tests for to_timestamp with fractional parts
de64aae49 <João Pedro> Add case tests for to_timestamp with fractional parts
de93dc2da <João Pedro> Fix to time implementation to get the time of the day
b44aea07f <João Pedro> Fix local lint errors
2013d44ae <João Pedro> Fix to time function to consider seconds as input
3af071893 <João Pedro> Change to time implementation to handle secs and not millis
b5c6ca101 <João Pedro> Fix lint problems on time.cc file
f904a3b56 <João Pedro> Fix tests for timestamp and time to not need casting
f09022011 <João Pedro> Add tests for to timestamp function
bf81503b8 <João Pedro> Add to_time definition for numeric types
d9119eefb <João Pedro> Change definition of to_timestamp function to work for numeric types
33230ca78 <João Pedro> Remove unused function on time.cc
b41c3c2bf <João Pedro> Add function registry for to_timestamp method
b5a80edb5 <João Pedro> Add unit tests for to_timestamp method
d26179ee3 <João Pedro> Add base declaration for to_timestamp for numeric in gandiva types.h
d19f3f609 <João Pedro> Add base implementation for to_timestamp for numeric in gandiva

Lead-authored-by: João Pedro <joaop@simbioseventures.com>
Co-authored-by: João Pedro Antunes Ferreira <42006402+jpedroantunes@users.noreply.github.com>
Co-authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_common.h    |  8 +-
 cpp/src/gandiva/function_registry_datetime.cc | 12 ++-
 .../gandiva/precompiled/epoch_time_point.h    | 12 +--
 cpp/src/gandiva/precompiled/time.cc           | 32 +++++++
 cpp/src/gandiva/precompiled/time_test.cc      | 96 +++++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           | 10 ++
 6 files changed, 161 insertions(+), 9 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index 580b2f68d28..40efc1fe1a9 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -229,12 +229,16 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
 // Iterate the inner macro over all numeric types
-#define NUMERIC_TYPES(INNER, NAME, ALIASES)                                             \
+#define BASE_NUMERIC_TYPES(INNER, NAME, ALIASES)                                        \
   INNER(NAME, ALIASES, int8), INNER(NAME, ALIASES, int16), INNER(NAME, ALIASES, int32), \
       INNER(NAME, ALIASES, int64), INNER(NAME, ALIASES, uint8),                         \
       INNER(NAME, ALIASES, uint16), INNER(NAME, ALIASES, uint32),                       \
       INNER(NAME, ALIASES, uint64), INNER(NAME, ALIASES, float32),                      \
-      INNER(NAME, ALIASES, float64), INNER(NAME, ALIASES, decimal128)
+      INNER(NAME, ALIASES, float64)
+
+// Iterate the inner macro over all base numeric types
+#define NUMERIC_TYPES(INNER, NAME, ALIASES) \
+  BASE_NUMERIC_TYPES(INNER, NAME, ALIASES), INNER(NAME, ALIASES, decimal128)
 
 // Iterate the inner macro over numeric and date/time types
 #define NUMERIC_DATE_TYPES(INNER, NAME, ALIASES)                         \
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index a5e0c1c3789..56c10bd706d 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -29,6 +29,14 @@ namespace gandiva {
       DATE_TYPES(INNER, name##Hour, {}), DATE_TYPES(INNER, name##Minute, {}),    \
       DATE_TYPES(INNER, name##Second, {})
 
+#define TO_TIMESTAMP_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                       \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 timestamp(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
+
+#define TO_TIME_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                            \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 time32(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
+
 #define TIME_EXTRACTION_FNS(name)                              \
   TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {}),       \
       TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {}), \
@@ -86,7 +94,9 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
       NativeFunction("extractDay", {}, DataTypeVector{day_time_interval()}, int64(),
                      kResultNullIfNull, "extractDay_daytimeinterval"),
 
-      DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {})};
+      DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {}),
+      BASE_NUMERIC_TYPES(TO_TIME_SAFE_NULL_IF_NULL, to_time, {}),
+      BASE_NUMERIC_TYPES(TO_TIMESTAMP_SAFE_NULL_IF_NULL, to_timestamp, {})};
 
   return date_time_fn_registry_;
 }
diff --git a/cpp/src/gandiva/precompiled/epoch_time_point.h b/cpp/src/gandiva/precompiled/epoch_time_point.h
index 80841b1a629..2a8b08c6d7f 100644
--- a/cpp/src/gandiva/precompiled/epoch_time_point.h
+++ b/cpp/src/gandiva/precompiled/epoch_time_point.h
@@ -87,12 +87,6 @@ class EpochTimePoint {
 
   int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); }
 
- private:
-  arrow_vendored::date::year_month_day YearMonthDay() const {
-    return arrow_vendored::date::year_month_day{
-        arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)};  // NOLINT
-  }
-
   arrow_vendored::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
     auto millis_since_midnight =
         tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
@@ -100,5 +94,11 @@ class EpochTimePoint {
         millis_since_midnight);
   }
 
+ private:
+  arrow_vendored::date::year_month_day YearMonthDay() const {
+    return arrow_vendored::date::year_month_day{
+        arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)};  // NOLINT
+  }
+
   std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_;
 };
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index b25769f9123..e5cdd9de64f 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -40,6 +40,19 @@ extern "C" {
   INNER(date64)           \
   INNER(timestamp)
 
+// Expand inner macro for all base numeric types.
+#define NUMERIC_TYPES(INNER) \
+  INNER(int8)                \
+  INNER(int16)               \
+  INNER(int32)               \
+  INNER(int64)               \
+  INNER(uint8)               \
+  INNER(uint16)              \
+  INNER(uint32)              \
+  INNER(uint64)              \
+  INNER(float32)             \
+  INNER(float64)
+
 // Extract millennium
 #define EXTRACT_MILLENNIUM(TYPE)                            \
   FORCE_INLINE                                              \
@@ -828,4 +841,23 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+// Convert the seconds since epoch argument to timestamp
+#define TO_TIMESTAMP(TYPE)                                      \
+  FORCE_INLINE                                                  \
+  gdv_timestamp to_timestamp##_##TYPE(gdv_##TYPE seconds) {     \
+    return static_cast<gdv_timestamp>(seconds * MILLIS_IN_SEC); \
+  }
+
+NUMERIC_TYPES(TO_TIMESTAMP)
+
+// Convert the seconds since epoch argument to time
+#define TO_TIME(TYPE)                                                     \
+  FORCE_INLINE                                                            \
+  gdv_time32 to_time##_##TYPE(gdv_##TYPE seconds) {                       \
+    EpochTimePoint tp(static_cast<int64_t>(seconds * MILLIS_IN_SEC));     \
+    return static_cast<gdv_time32>(tp.TimeOfDay().to_duration().count()); \
+  }
+
+NUMERIC_TYPES(TO_TIME)
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 27db8dac464..4a5ba5b1627 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -743,4 +743,100 @@ TEST(TestTime, TestLastDay) {
   EXPECT_EQ(StringToTimestamp("2015-12-31 00:00:00"), out);
 }
 
+TEST(TestTime, TestToTimestamp) {
+  auto ts = StringToTimestamp("1970-01-01 00:00:00");
+  EXPECT_EQ(ts, to_timestamp_int32(0));
+  EXPECT_EQ(ts, to_timestamp_int64(0));
+  EXPECT_EQ(ts, to_timestamp_float32(0));
+  EXPECT_EQ(ts, to_timestamp_float64(0));
+
+  ts = StringToTimestamp("1970-01-01 00:00:01");
+  EXPECT_EQ(ts, to_timestamp_int32(1));
+  EXPECT_EQ(ts, to_timestamp_int64(1));
+  EXPECT_EQ(ts, to_timestamp_float32(1));
+  EXPECT_EQ(ts, to_timestamp_float64(1));
+
+  ts = StringToTimestamp("1970-01-01 00:01:00");
+  EXPECT_EQ(ts, to_timestamp_int32(60));
+  EXPECT_EQ(ts, to_timestamp_int64(60));
+  EXPECT_EQ(ts, to_timestamp_float32(60));
+  EXPECT_EQ(ts, to_timestamp_float64(60));
+
+  ts = StringToTimestamp("1970-01-01 01:00:00");
+  EXPECT_EQ(ts, to_timestamp_int32(3600));
+  EXPECT_EQ(ts, to_timestamp_int64(3600));
+  EXPECT_EQ(ts, to_timestamp_float32(3600));
+  EXPECT_EQ(ts, to_timestamp_float64(3600));
+
+  ts = StringToTimestamp("1970-01-02 00:00:00");
+  EXPECT_EQ(ts, to_timestamp_int32(86400));
+  EXPECT_EQ(ts, to_timestamp_int64(86400));
+  EXPECT_EQ(ts, to_timestamp_float32(86400));
+  EXPECT_EQ(ts, to_timestamp_float64(86400));
+
+  // tests with fractional part
+  ts = StringToTimestamp("1970-01-01 00:00:01") + 500;
+  EXPECT_EQ(ts, to_timestamp_float32(1.500f));
+  EXPECT_EQ(ts, to_timestamp_float64(1.500));
+
+  ts = StringToTimestamp("1970-01-01 00:01:01") + 600;
+  EXPECT_EQ(ts, to_timestamp_float32(61.600f));
+  EXPECT_EQ(ts, to_timestamp_float64(61.600));
+
+  ts = StringToTimestamp("1970-01-01 01:00:01") + 400;
+  EXPECT_EQ(ts, to_timestamp_float32(3601.400f));
+  EXPECT_EQ(ts, to_timestamp_float64(3601.400));
+}
+
+TEST(TestTime, TestToTimeNumeric) {
+  // input timestamp in seconds: 1970-01-01 00:00:00
+  int64_t expected_output = 0;  // 0 milliseconds
+  EXPECT_EQ(expected_output, to_time_int32(0));
+  EXPECT_EQ(expected_output, to_time_int64(0));
+  EXPECT_EQ(expected_output, to_time_float32(0.000f));
+  EXPECT_EQ(expected_output, to_time_float64(0.000));
+
+  // input timestamp in seconds: 1970-01-01 00:00:01
+  expected_output = 1000;  // 1 seconds
+  EXPECT_EQ(expected_output, to_time_int32(1));
+  EXPECT_EQ(expected_output, to_time_int64(1));
+  EXPECT_EQ(expected_output, to_time_float32(1.000f));
+  EXPECT_EQ(expected_output, to_time_float64(1.000));
+
+  // input timestamp in seconds: 1970-01-01 01:00:00
+  expected_output = 3600000;  // 3600 seconds
+  EXPECT_EQ(expected_output, to_time_int32(3600));
+  EXPECT_EQ(expected_output, to_time_int64(3600));
+  EXPECT_EQ(expected_output, to_time_float32(3600.000f));
+  EXPECT_EQ(expected_output, to_time_float64(3600.000));
+
+  // input timestamp in seconds: 1970-01-01 23:59:59
+  expected_output = 86399000;  // 86399 seconds
+  EXPECT_EQ(expected_output, to_time_int32(86399));
+  EXPECT_EQ(expected_output, to_time_int64(86399));
+  EXPECT_EQ(expected_output, to_time_float32(86399.000f));
+  EXPECT_EQ(expected_output, to_time_float64(86399.000));
+
+  // input timestamp in seconds: 2020-01-01 00:00:01
+  expected_output = 1000;  // 1 second
+  EXPECT_EQ(expected_output, to_time_int64(1577836801));
+  EXPECT_EQ(expected_output, to_time_float64(1577836801.000));
+
+  // tests with fractional part
+  // input timestamp in seconds: 1970-01-01 00:00:01.500
+  expected_output = 1500;  // 1.5 seconds
+  EXPECT_EQ(expected_output, to_time_float32(1.500f));
+  EXPECT_EQ(expected_output, to_time_float64(1.500));
+
+  // input timestamp in seconds: 1970-01-01 00:01:01.500
+  expected_output = 61500;  // 61.5 seconds
+  EXPECT_EQ(expected_output, to_time_float32(61.500f));
+  EXPECT_EQ(expected_output, to_time_float64(61.500));
+
+  // input timestamp in seconds: 1970-01-01 01:00:01.500
+  expected_output = 3601500;  // 3601.5 seconds
+  EXPECT_EQ(expected_output, to_time_float32(3601.500f));
+  EXPECT_EQ(expected_output, to_time_float64(3601.500));
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index b8c7aa9147e..64ef7540b53 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -100,6 +100,16 @@ gdv_int64 add_int32_timestamp(gdv_int32, gdv_timestamp);
 gdv_int64 date_add_int64_timestamp(gdv_int64, gdv_timestamp);
 gdv_timestamp add_date64_int64(gdv_date64, gdv_int64);
 
+gdv_timestamp to_timestamp_int32(gdv_int32);
+gdv_timestamp to_timestamp_int64(gdv_int64);
+gdv_timestamp to_timestamp_float32(gdv_float32);
+gdv_timestamp to_timestamp_float64(gdv_float64);
+
+gdv_time32 to_time_int32(gdv_int32);
+gdv_time32 to_time_int64(gdv_int64);
+gdv_time32 to_time_float32(gdv_float32);
+gdv_time32 to_time_float64(gdv_float64);
+
 gdv_int64 date_sub_timestamp_int32(gdv_timestamp, gdv_int32);
 gdv_int64 subtract_timestamp_int32(gdv_timestamp, gdv_int32);
 gdv_int64 date_diff_timestamp_int64(gdv_timestamp, gdv_int64);

From 3791510f9ed6fb2f0ed24d02629952d9c9ecc7f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Mon, 7 Jun 2021 15:55:55 +0530
Subject: [PATCH 356/719] ARROW-12534: [C++][Gandiva] Implement LEFT and RIGHT
 functions on Gandiva for string input values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impleented functions:

- LEFT (string, int):  Returns the x leftmost characters of giventext;
- RIGHT (string, int):  Returns the x rightmost characters of giventext;

Closes #10155 from jpedroantunes/feature/add-left-right-gandiva-functions and squashes the following commits:

23dbab8b4 <João Pedro> Fix identation on function string registry
7dc833c61 <João Pedro> Remove unnecessary validation
6ad707a44 <João Pedro> Change signature of left and right methods to consider argument types
4f33d851c <João Pedro> Add comments for the left and right functions
b0035522c <João Pedro> Change left function to iterate only once through the string
32697f6c7 <João Pedro> Change variables on left and right functions to be snake_case
6622a039e <João Pedro> Add missing space ident on string_ops.cc file
9aa250069 <João Pedro> Correct lint mistakes on left and right function iplementation
69cb2f739 <João Pedro> Add projector test for RIGHT string function
900bf7f58 <João Pedro> Add function registry for RIGHT string function
a7e542b51 <João Pedro> Add base implementation and tests for RIGHT function considering string input values
763547b1b <João Pedro> Add projector test for left string function
5870abb62 <João Pedro> Add function registry for left string function
3db95882a <João Pedro> Add base implementation and tests for left function considering string input values

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  7 ++
 cpp/src/gandiva/precompiled/string_ops.cc     | 92 +++++++++++++++++++
 .../gandiva/precompiled/string_ops_test.cc    | 64 +++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  6 ++
 cpp/src/gandiva/tests/projector_test.cc       | 78 ++++++++++++++++
 5 files changed, 247 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 35ef2dfcb34..b0280c5611d 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -309,6 +309,13 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(),
                      kResultNullIfNull, "binary_string", NativeFunction::kNeedsContext),
 
+      NativeFunction("left", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "left_utf8_int32", NativeFunction::kNeedsContext),
+
+      NativeFunction("right", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "right_utf8_int32",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("split_part", {}, DataTypeVector{utf8(), utf8(), int32()}, utf8(),
                      kResultNullIfNull, "split_part",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)};
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index ac50633f3c2..4472c4857ef 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1639,6 +1639,98 @@ const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
   return "";
 }
 
+// Returns the x leftmost characters of a given string. Cases:
+//     LEFT("TestString", 10) => "TestString"
+//     LEFT("TestString", 3) => "Tes"
+//     LEFT("TestString", -3) => "TestStr"
+FORCE_INLINE
+const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 number, gdv_int32* out_len) {
+  // returns the 'number' left most characters of a given text
+  if (text_len == 0 || number == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // iterate over the utf8 string validating each character
+  int char_len;
+  int char_count = 0;
+  int byte_index = 0;
+  for (int i = 0; i < text_len; i += char_len) {
+    char_len = utf8_char_length(text[i]);
+    if (char_len == 0 || i + char_len > text_len) {  // invalid byte or incomplete glyph
+      set_error_for_invalid_utf(context, text[i]);
+      *out_len = 0;
+      return "";
+    }
+    for (int j = 1; j < char_len; ++j) {
+      if ((text[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
+        set_error_for_invalid_utf(context, text[i + j]);
+        *out_len = 0;
+        return "";
+      }
+    }
+    byte_index += char_len;
+    ++char_count;
+    // Define the rules to stop the iteration over the string
+    // case where left('abc', 5) -> 'abc'
+    if (number > 0 && char_count == number) break;
+    // case where left('abc', -5) ==> ''
+    if (number < 0 && char_count == number + text_len) break;
+  }
+
+  *out_len = byte_index;
+  return text;
+}
+
+// Returns the x rightmost characters of a given string. Cases:
+//     RIGHT("TestString", 10) => "TestString"
+//     RIGHT("TestString", 3) => "ing"
+//     RIGHT("TestString", -3) => "tString"
+FORCE_INLINE
+const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                             gdv_int32 number, gdv_int32* out_len) {
+  // returns the 'number' left most characters of a given text
+  if (text_len == 0 || number == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // initially counts the number of utf8 characters in the defined text
+  int32_t char_count = utf8_length(context, text, text_len);
+  // char_count is zero if input has invalid utf8 char
+  if (char_count == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t start_char_pos;  // the char result start position (inclusive)
+  int32_t end_char_len;    // the char result end position (inclusive)
+  if (number > 0) {
+    // case where right('abc', 5) ==> 'abc' start_char_pos=1.
+    start_char_pos = (char_count > number) ? char_count - number : 0;
+    end_char_len = char_count - start_char_pos;
+  } else {
+    start_char_pos = number * -1;
+    end_char_len = char_count - start_char_pos;
+  }
+
+  // calculate the start byte position and the output length
+  int32_t start_byte_pos = utf8_byte_pos(context, text, text_len, start_char_pos);
+  *out_len = utf8_byte_pos(context, text, text_len, end_char_len);
+
+  // try to allocate memory for the response
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+  memcpy(ret, text + start_byte_pos, *out_len);
+  return ret;
+}
+
 FORCE_INLINE
 const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
                           gdv_int32* out_len) {
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index ae3c0f2e28c..ff1555f59a8 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1104,6 +1104,70 @@ TEST(TestStringOps, TestReplace) {
   ctx.Reset();
 }
 
+TEST(TestStringOps, TestLeftString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  std::string output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "TestString");
+
+  out_str = left_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = left_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "Tes");
+
+  out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "TestStr");
+
+  // the text length for this string is 10 (each utf8 char is represented by two bytes)
+  out_str = left_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "абв");
+}
+
+TEST(TestStringOps, TestRightString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  std::string output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "TestString");
+
+  out_str = right_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = right_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+
+  out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "ing");
+
+  out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "tString");
+
+  // the text length for this string is 10 (each utf8 char is represented by two bytes)
+  out_str = right_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "вгд");
+}
+
 TEST(TestStringOps, TestBinaryString) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 64ef7540b53..f427f972aad 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -489,6 +489,12 @@ const char* castVARCHAR_float32_int64(int64_t context, float value, int64_t len,
 const char* castVARCHAR_float64_int64(int64_t context, double value, int64_t len,
                                       int32_t* out_len);
 
+const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 number, gdv_int32* out_len);
+
+const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                             gdv_int32 number, gdv_int32* out_len);
+
 const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
                           gdv_int32* out_len);
 
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index b63af40d359..27de24e3adf 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -818,6 +818,84 @@ TEST_F(TestProjector, TestConcat) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_concat, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestLeftString) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_concat = field("left", arrow::utf8());
+
+  // Build expression
+  auto concat_expr =
+      TreeExprBuilder::MakeExpression("left", {field0, field1}, field_concat);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {concat_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayUtf8({"ab", "", "ab", "invalid", "valid", "invalid"},
+                                   {true, true, true, true, true, true});
+  auto array1 =
+      MakeArrowArrayInt32({1, 500, 2, -5, 5, 0}, {true, true, true, true, true, true});
+  // expected output
+  auto exp_left = MakeArrowArrayUtf8({"a", "", "ab", "in", "valid", ""},
+                                     {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_left, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestRightString) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_concat = field("right", arrow::utf8());
+
+  // Build expression
+  auto concat_expr =
+      TreeExprBuilder::MakeExpression("right", {field0, field1}, field_concat);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {concat_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayUtf8({"ab", "", "ab", "invalid", "valid", "invalid"},
+                                   {true, true, true, true, true, true});
+  auto array1 =
+      MakeArrowArrayInt32({1, 500, 2, -5, 5, 0}, {true, true, true, true, true, true});
+  // expected output
+  auto exp_left = MakeArrowArrayUtf8({"b", "", "ab", "id", "valid", ""},
+                                     {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_left, outputs.at(0));
+}
+
 TEST_F(TestProjector, TestOffset) {
   // schema for input fields
   auto field0 = field("f0", arrow::int32());

From 0477cfcddf4e3017fd2e48d11d87cabbc20de6f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Mon, 7 Jun 2021 15:56:55 +0530
Subject: [PATCH 357/719] ARROW-12567: [C++][Gandiva] Implement LPAD and RPAD
 functions for string input values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

#### Implement LPAD and RPAD functions for string input values.

- LPAD([string] basetext, [number] x, [optional string] padtext)
- RPAD([string] basetext, [number] x, [optional string] padtext)

#### Description

lpad - Prepends padtext to basetext in a way that allows as many characters as possible from padtext given an output string length of x. When x is less than or equal to the length of basetext, only characters from basetext are printed in the output. If padtext is omitted then spaces are prepended.

rpad - Appends padtext to basetext in a way that allows as many characters as possible from padtext given an output string length of x. When x is less than or equal to the length of basetext, only characters from basetext are printed in the output. If padtext is omitted then spaces are appended.

Closes #10173 from jpedroantunes/feature/lpad-rpad-functions and squashes the following commits:

4efc0fe8c <João Pedro> Add utf8_length method that ignore invalid char considering size 1
33a5a1478 <João Pedro> Fix identation on function string registry
4c4b2f490 <João Pedro> Change lpad and rpad functions signature and definition
26b90b09e <João Pedro> Correct ci lint errors on gandiva
66594a0af <João Pedro> Correct lint local errors on gandiva
b6b63e9d3 <João Pedro> Add projector test for RPAD string function
dc72148d6 <João Pedro> Add function registry for RPAD string function without pad text
c270fb1ec <João Pedro> Add base implementation and tests for RPAD functions
08d205323 <João Pedro> Add function registry for LPAD string function without pad text
585cad384 <João Pedro> Add base implementation and tests for LPAD function without pad texts considering string input values
73927fc61 <João Pedro> Add projector test for LPAD string function
2c929a98a <João Pedro> Add function registry for LPAD string function
aecaff643 <João Pedro> Add base implementation and tests for LPAD function considering string input values

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  14 ++
 cpp/src/gandiva/precompiled/string_ops.cc     | 156 ++++++++++++++++++
 .../gandiva/precompiled/string_ops_test.cc    | 138 ++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  14 ++
 cpp/src/gandiva/tests/projector_test.cc       |  84 ++++++++++
 5 files changed, 406 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index b0280c5611d..9b65f280399 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -146,6 +146,20 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      utf8(), kResultNullIfNull, "substr_utf8_int64",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("lpad", {}, DataTypeVector{utf8(), int32(), utf8()}, utf8(),
+                     kResultNullIfNull, "lpad_utf8_int32_utf8",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("lpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "lpad_utf8_int32", NativeFunction::kNeedsContext),
+
+      NativeFunction("rpad", {}, DataTypeVector{utf8(), int32(), utf8()}, utf8(),
+                     kResultNullIfNull, "rpad_utf8_int32_utf8",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("rpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "rpad_utf8_int32", NativeFunction::kNeedsContext),
+
       NativeFunction("concatOperator", {}, DataTypeVector{utf8(), utf8()}, utf8(),
                      kResultNullIfNull, "concatOperator_utf8_utf8",
                      NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 4472c4857ef..1687e0e0ab5 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -192,6 +192,27 @@ gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len) {
   return count;
 }
 
+// Count the number of utf8 characters, ignoring invalid char, considering size 1
+FORCE_INLINE
+gdv_int32 utf8_length_ignore_invalid(const char* data, gdv_int32 data_len) {
+  int char_len = 0;
+  int count = 0;
+  for (int i = 0; i < data_len; i += char_len) {
+    char_len = utf8_char_length(data[i]);
+    if (char_len == 0 || i + char_len > data_len) {  // invalid byte or incomplete glyph
+      // if invalid byte or incomplete glyph, ignore it
+      char_len = 1;
+    }
+    for (int j = 1; j < char_len; ++j) {
+      if ((data[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
+        char_len += 1;
+      }
+    }
+    ++count;
+  }
+  return count;
+}
+
 // Get the byte position corresponding to a character position for a non-empty utf8
 // sequence
 FORCE_INLINE
@@ -1580,6 +1601,141 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text,
                                              out_len);
 }
 
+FORCE_INLINE
+const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len) {
+  // if the text length or the defined return length (number of characters to return)
+  // is <=0, then return an empty string.
+  if (text_len == 0 || return_length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // count the number of utf8 characters on text, ignoring invalid bytes
+  int text_char_count = utf8_length_ignore_invalid(text, text_len);
+
+  if (return_length == text_char_count ||
+      (return_length > text_char_count && fill_text_len == 0)) {
+    // case where the return length is same as the text's length, or if it need to
+    // fill into text but "fill_text" is empty, then return text directly.
+    *out_len = text_len;
+    return text;
+  } else if (return_length < text_char_count) {
+    // case where it truncates the result on return length.
+    *out_len = utf8_byte_pos(context, text, text_len, return_length);
+    return text;
+  } else {
+    // case (return_length > text_char_count)
+    // case where it needs to copy "fill_text" on the string left. The total number
+    // of chars to copy is given by (return_length -  text_char_count)
+    char* ret =
+        reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length));
+    if (ret == nullptr) {
+      gdv_fn_context_set_error_msg(context,
+                                   "Could not allocate memory for output string");
+      *out_len = 0;
+      return "";
+    }
+    // try to fulfill the return string with the "fill_text" continuously
+    int32_t copied_chars_count = 0;
+    int32_t copied_chars_position = 0;
+    while (copied_chars_count < return_length - text_char_count) {
+      int32_t char_len;
+      int32_t fill_index;
+      // for each char, evaluate its length to consider it when mem copying
+      for (fill_index = 0; fill_index < fill_text_len; fill_index += char_len) {
+        if (copied_chars_count >= return_length - text_char_count) {
+          break;
+        }
+        char_len = utf8_char_length(fill_text[fill_index]);
+        // ignore invalid char on the fill text, considering it as size 1
+        if (char_len == 0) char_len += 1;
+        copied_chars_count++;
+      }
+      memcpy(ret + copied_chars_position, fill_text, fill_index);
+      copied_chars_position += fill_index;
+    }
+    // after fulfilling the text, copy the main string
+    memcpy(ret + copied_chars_position, text, text_len);
+    *out_len = copied_chars_position + text_len;
+    return ret;
+  }
+}
+
+FORCE_INLINE
+const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len) {
+  // if the text length or the defined return length (number of characters to return)
+  // is <=0, then return an empty string.
+  if (text_len == 0 || return_length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  // count the number of utf8 characters on text, ignoring invalid bytes
+  int text_char_count = utf8_length_ignore_invalid(text, text_len);
+
+  if (return_length == text_char_count ||
+      (return_length > text_char_count && fill_text_len == 0)) {
+    // case where the return length is same as the text's length, or if it need to
+    // fill into text but "fill_text" is empty, then return text directly.
+    *out_len = text_len;
+    return text;
+  } else if (return_length < text_char_count) {
+    // case where it truncates the result on return length.
+    *out_len = utf8_byte_pos(context, text, text_len, return_length);
+    return text;
+  } else {
+    // case (return_length > text_char_count)
+    // case where it needs to copy "fill_text" on the string right
+    char* ret =
+        reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length));
+    if (ret == nullptr) {
+      gdv_fn_context_set_error_msg(context,
+                                   "Could not allocate memory for output string");
+      *out_len = 0;
+      return "";
+    }
+    // fulfill the initial text copying the main input string
+    memcpy(ret, text, text_len);
+    // try to fulfill the return string with the "fill_text" continuously
+    int32_t copied_chars_count = 0;
+    int32_t copied_chars_position = 0;
+    while (text_char_count + copied_chars_count < return_length) {
+      int32_t char_len;
+      int32_t fill_length;
+      // for each char, evaluate its length to consider it when mem copying
+      for (fill_length = 0; fill_length < fill_text_len; fill_length += char_len) {
+        if (text_char_count + copied_chars_count >= return_length) {
+          break;
+        }
+        char_len = utf8_char_length(fill_text[fill_length]);
+        // ignore invalid char on the fill text, considering it as size 1
+        if (char_len == 0) char_len += 1;
+        copied_chars_count++;
+      }
+      memcpy(ret + text_len + copied_chars_position, fill_text, fill_length);
+      copied_chars_position += fill_length;
+    }
+    *out_len = copied_chars_position + text_len;
+    return ret;
+  }
+}
+
+FORCE_INLINE
+const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len) {
+  return lpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
+}
+
+FORCE_INLINE
+const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len) {
+  return rpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
+}
+
 FORCE_INLINE
 const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
                        const char* delimiter, gdv_int32 delim_len, gdv_int32 index,
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index ff1555f59a8..c9a829a70d4 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -801,6 +801,144 @@ TEST(TestStringOps, TestLtrim) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestStringOps, TestLpadString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  // LPAD function tests - with defined fill pad text
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "FillFillTestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "FillFTestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "FillFillFiTestString");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ддабвгд");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
+
+  out_str = lpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "дhello");
+
+  // LPAD function tests - with NO pad text
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "        TestString");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "     TestString");
+
+  out_str = lpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "  абвгд");
+}
+
+TEST(TestStringOps, TestRpadString) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  // RPAD function tests - with defined fill pad text
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFill");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestStringFillF");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFillFi");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгддд");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
+
+  out_str = rpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "helloд");
+
+  // RPAD function tests - with NO pad text
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString        ");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString     ");
+
+  out_str = rpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "абвгд  ");
+}
+
 TEST(TestStringOps, TestRtrim) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index f427f972aad..f16f93fa447 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -419,6 +419,20 @@ gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
                                  gdv_int32 sub_str_len, const char* str,
                                  gdv_int32 str_len, gdv_int32 start_pos);
 
+const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len);
+
+const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+                                 gdv_int32 return_length, const char* fill_text,
+                                 gdv_int32 fill_text_len, gdv_int32* out_len);
+
+const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len);
+
+const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+                            gdv_int32 return_length, gdv_int32* out_len);
+
 const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
                                                 gdv_int32 text_len, const char* from_str,
                                                 gdv_int32 from_str_len,
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 27de24e3adf..ebe3009c356 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1088,4 +1088,88 @@ TEST_F(TestProjector, TestIfElseOpt) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestLpad) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto field2 = field("f2", arrow::utf8());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto field_lpad = field("lpad", arrow::utf8());
+
+  // Build expression
+  auto lpad_expr =
+      TreeExprBuilder::MakeExpression("lpad", {field0, field1, field2}, field_lpad);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {lpad_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 7;
+  auto array0 = MakeArrowArrayUtf8({"ab", "a", "ab", "invalid", "valid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({1, 5, 3, 12, 0, 2, 10},
+                                    {true, true, true, true, true, true, true});
+  auto array2 = MakeArrowArrayUtf8({"z", "z", "c", "valid", "invalid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  // expected output
+  auto exp_lpad = MakeArrowArrayUtf8({"a", "zzzza", "cab", "validinvalid", "", "in", ""},
+                                     {true, true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_lpad, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestRpad) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto field2 = field("f2", arrow::utf8());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto field_rpad = field("rpad", arrow::utf8());
+
+  // Build expression
+  auto rpad_expr =
+      TreeExprBuilder::MakeExpression("rpad", {field0, field1, field2}, field_rpad);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {rpad_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 7;
+  auto array0 = MakeArrowArrayUtf8({"ab", "a", "ab", "invalid", "valid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({1, 5, 3, 12, 0, 2, 10},
+                                    {true, true, true, true, true, true, true});
+  auto array2 = MakeArrowArrayUtf8({"z", "z", "c", "valid", "invalid", "invalid", ""},
+                                   {true, true, true, true, true, true, true});
+  // expected output
+  auto exp_rpad = MakeArrowArrayUtf8({"a", "azzzz", "abc", "invalidvalid", "", "in", ""},
+                                     {true, true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_rpad, outputs.at(0));
+}
+
 }  // namespace gandiva

From b0da01de22e129c5e12bf8041305ca48cee9600b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Mon, 7 Jun 2021 16:04:29 +0530
Subject: [PATCH 358/719] ARROW-12936: [C++][Gandiva] Implement ASCII Hive
 function on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement ASCII Hive function on Gandiva

Closes #10438 from jpedroantunes/feature/add-ascii and squashes the following commits:

49df14525 <João Pedro> Correct linter errors
5bda1e453 <João Pedro> Add projector test for ascii
15e193602 <João Pedro> Add base implementation for ascii

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  3 ++
 cpp/src/gandiva/precompiled/string_ops.cc     |  9 +++++
 .../gandiva/precompiled/string_ops_test.cc    | 11 ++++++
 cpp/src/gandiva/precompiled/types.h           |  2 ++
 cpp/src/gandiva/tests/utf8_test.cc            | 35 +++++++++++++++++++
 5 files changed, 60 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 9b65f280399..e8c0739b3d4 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -62,6 +62,9 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
 
+      NativeFunction("ascii", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
+                     "ascii_utf8"),
+
       NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
                      "gdv_fn_upper_utf8", NativeFunction::kNeedsContext),
 
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 1687e0e0ab5..738ec367cd7 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1214,6 +1214,15 @@ const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
   return ret;
 }
 
+// Returns the numeric value of the first character of str.
+GANDIVA_EXPORT
+gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) {
+  if (data_len == 0) {
+    return 0;
+  }
+  return static_cast<gdv_int32>(data[0]);
+}
+
 FORCE_INLINE
 const char* convert_fromUTF8_binary(gdv_int64 context, const char* bin_in, gdv_int32 len,
                                     gdv_int32* out_len) {
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index c9a829a70d4..8ffaace624a 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -42,6 +42,17 @@ TEST(TestStringOps, TestCompare) {
   EXPECT_GT(mem_compare(left, 7, right, 5), 0);
 }
 
+TEST(TestStringOps, TestAscii) {
+  // ASCII
+  EXPECT_EQ(ascii_utf8("ABC", 3), 65);
+  EXPECT_EQ(ascii_utf8("abc", 3), 97);
+  EXPECT_EQ(ascii_utf8("Hello World!", 12), 72);
+  EXPECT_EQ(ascii_utf8("This is us", 10), 84);
+  EXPECT_EQ(ascii_utf8("", 0), 0);
+  EXPECT_EQ(ascii_utf8("123", 3), 49);
+  EXPECT_EQ(ascii_utf8("999", 3), 57);
+}
+
 TEST(TestStringOps, TestBeginsEnds) {
   // starts_with
   EXPECT_TRUE(starts_with_utf8_utf8("hello sir", 9, "hello", 5));
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index f16f93fa447..be769ddbdaf 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -412,6 +412,8 @@ const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
                             gdv_int32 basetext_len, const char* trimtext,
                             gdv_int32 trimtext_len, int32_t* out_len);
 
+gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len);
+
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
                            const char* str, gdv_int32 str_len);
 
diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc
index 01e62a59379..146af2010c6 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -680,4 +680,39 @@ TEST_F(TestUtf8, TestCastVarChar) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
 }
 
+TEST_F(TestUtf8, TestAscii) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_asc = field("ascii", arrow::int32());
+
+  // Build expression
+  auto asc_expr = TreeExprBuilder::MakeExpression("ascii", {field0}, field_asc);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {asc_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayUtf8({"ABC", "", "abc", "Hello World", "123", "999"},
+                                   {true, true, true, true, true, true});
+  // expected output
+  auto exp_asc =
+      MakeArrowArrayInt32({65, 0, 97, 72, 49, 57}, {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
+}
+
 }  // namespace gandiva

From 4fb9de2ef23dbbc3cd2b8991c7f40fccd7b87cd1 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Mon, 7 Jun 2021 10:41:29 +0000
Subject: [PATCH 359/719] ARROW-12972: [CI] Fix centos-8 cmake error

Install libarchive to fix cmake error on centos-8.

cmake: undefined symbol: archive_write_add_filter_zstd

Closes #10460 from cyb70289/12972-cmake-centos8

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 dev/release/verify-yum.sh                                     | 1 +
 dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile | 1 +
 2 files changed, 2 insertions(+)

diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index 14318678015..ddc45a6c95f 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -128,6 +128,7 @@ ${install_command} \
   ${cmake_package} \
   gcc-c++ \
   git \
+  libarchive \
   make
 mkdir -p build
 cp -a /arrow/cpp/examples/minimal_build build
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
index 7a7865ae404..ad145c4ee2a 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile
@@ -40,6 +40,7 @@ RUN \
     glog-devel \
     gobject-introspection-devel \
     gtk-doc \
+    libarchive \
     libzstd-devel \
     llvm-devel \
     llvm-static \

From 9f5a4913b2304d68fa47ab6a3e2f56f0143dde28 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Mon, 7 Jun 2021 09:00:21 -0500
Subject: [PATCH 360/719] ARROW-12824: [R][CI] Upgrade builds for R 4.1 release

Closes #10389 from thisisnic/ARROW-12824

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .env                                          |   2 +-
 .github/workflows/r.yml                       |  21 +---
 ci/docker/linux-apt-docs.dockerfile           |   2 +-
 ci/docker/linux-apt-r.dockerfile              |   4 +-
 ...ionNonenumpy1.17python3.7.____cpython.yaml |  12 +-
 ...osx_64_numpy1.17python3.6.____cpython.yaml |  12 +-
 ...osx_64_numpy1.17python3.7.____cpython.yaml |  12 +-
 ...r_base3.6.yaml => linux_64_r_base4.1.yaml} |  15 ++-
 .../.ci_support/r/osx_64_r_base4.0.yaml       |   9 +-
 ...4_r_base3.6.yaml => osx_64_r_base4.1.yaml} |  11 +-
 ...4_r_base3.6.yaml => win_64_r_base4.1.yaml} |   2 +-
 dev/tasks/r/github.linux.versions.yml         |   1 +
 dev/tasks/r/github.windows.rtools35.yml       | 108 ++++++++++++++++++
 dev/tasks/tasks.yml                           |  30 ++---
 14 files changed, 175 insertions(+), 66 deletions(-)
 rename dev/tasks/conda-recipes/.ci_support/r/{linux_64_r_base3.6.yaml => linux_64_r_base4.1.yaml} (62%)
 rename dev/tasks/conda-recipes/.ci_support/r/{osx_64_r_base3.6.yaml => osx_64_r_base4.1.yaml} (80%)
 rename dev/tasks/conda-recipes/.ci_support/r/{win_64_r_base3.6.yaml => win_64_r_base4.1.yaml} (95%)
 create mode 100644 dev/tasks/r/github.windows.rtools35.yml

diff --git a/.env b/.env
index 16e971c82e6..e06c6e57a3e 100644
--- a/.env
+++ b/.env
@@ -59,7 +59,7 @@ KARTOTHEK=latest
 HDFS=3.2.1
 SPARK=master
 DOTNET=3.1
-R=4.0
+R=4.1
 ARROW_R_DEV=TRUE
 # These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest
 R_ORG=rhub
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index dd7d98d0890..e1647807cef 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -57,8 +57,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        r: ["3.6"]
-        ubuntu: [18.04]
+        r: ["4.1"]
+        ubuntu: [20.04]
     env:
       R: ${{ matrix.r }}
       UBUNTU: ${{ matrix.ubuntu }}
@@ -173,7 +173,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        rtools: [35, 40]
+        rtools: [40]
     env:
       TEST_R_WITH_ARROW: "TRUE"
       ARROW_R_CXXFLAGS: "-Werror"
@@ -209,18 +209,10 @@ jobs:
           path: ccache
           key: r-${{ matrix.rtools }}-ccache-mingw-${{ hashFiles('cpp/**') }}
           restore-keys: r-${{ matrix.rtools }}-ccache-mingw-
-      # We use the makepkg-mingw setup that is included in rtools40 even when
-      # we use the rtools35 compilers, so we always install R 4.0/Rtools40
       - uses: r-lib/actions/setup-r@master
         with:
           rtools-version: 40
-          r-version: "4.0"
-          Ncpus: 2
-      - uses: r-lib/actions/setup-r@master
-        if: ${{ matrix.rtools == 35 }}
-        with:
-          rtools-version: 35
-          r-version: "3.6"
+          r-version: "4.1"
           Ncpus: 2
       - name: Build Arrow C++
         shell: bash
@@ -255,8 +247,3 @@ jobs:
         shell: cmd
         run: cat check/arrow.Rcheck/00install.out
         if: always()
-      # We can remove this when we drop support for Rtools 3.5.
-      - name: Ensure using system tar in actions/cache
-        run: |
-          Write-Output "${Env:windir}\System32" | `
-            Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index e3b258987f6..95ccb4b94cc 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -18,7 +18,7 @@
 ARG base
 FROM ${base}
 
-ARG r=4.0
+ARG r=4.1
 ARG jdk=8
 
 # See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
index 8924208b878..7b78da9691a 100644
--- a/ci/docker/linux-apt-r.dockerfile
+++ b/ci/docker/linux-apt-r.dockerfile
@@ -36,8 +36,8 @@ RUN apt-get update -y && \
     # -cran40 has 4.0 versions for bionic and focal
     # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
     # TODO: make sure OS version and R version are valid together and conditionally set repo suffix
-    # This is a hack to turn 3.6 into 35 and 4.0 into 40:
-    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5)'/' && \
+    # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
     apt-get install -y \
         r-base=${r}* \
         r-recommended=${r}* \
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index 6ffa87a5eb9..1f4c527effd 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
\ No newline at end of file
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
index 7b2dbb34d76..40c017cf36e 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -21,11 +21,11 @@ cxx_compiler_version:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 macos_machine:
@@ -33,7 +33,7 @@ macos_machine:
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
\ No newline at end of file
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
index 8e3e828ab8a..378a28348b2 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -21,11 +21,11 @@ cxx_compiler_version:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 macos_machine:
@@ -33,7 +33,7 @@ macos_machine:
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
\ No newline at end of file
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base3.6.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
similarity index 62%
rename from dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base3.6.yaml
rename to dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
index ac945ce72d3..5bee341b87e 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base3.6.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
@@ -1,7 +1,9 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -9,14 +11,19 @@ channel_targets:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 pin_run_as_build:
   r-base:
     min_pin: x.x
     max_pin: x.x
 r_base:
-- '3.6'
+- '4.1'
 target_platform:
 - linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - docker_image
\ No newline at end of file
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
index 8343a284b97..4d1fe27c357 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
@@ -3,7 +3,7 @@ MACOSX_DEPLOYMENT_TARGET:
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -11,11 +11,9 @@ channel_targets:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 macos_machine:
 - x86_64-apple-darwin13.4.0
-macos_min_version:
-- '10.9'
 pin_run_as_build:
   r-base:
     min_pin: x.x
@@ -24,3 +22,6 @@ r_base:
 - '4.0'
 target_platform:
 - osx-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
\ No newline at end of file
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base3.6.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
similarity index 80%
rename from dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base3.6.yaml
rename to dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
index e3c5b898be6..4821a15260a 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base3.6.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
@@ -3,7 +3,7 @@ MACOSX_DEPLOYMENT_TARGET:
 c_compiler:
 - clang
 c_compiler_version:
-- '10'
+- '11'
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -11,16 +11,17 @@ channel_targets:
 cxx_compiler:
 - clangxx
 cxx_compiler_version:
-- '10'
+- '11'
 macos_machine:
 - x86_64-apple-darwin13.4.0
-macos_min_version:
-- '10.9'
 pin_run_as_build:
   r-base:
     min_pin: x.x
     max_pin: x.x
 r_base:
-- '3.6'
+- '4.1'
 target_platform:
 - osx-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
\ No newline at end of file
diff --git a/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base3.6.yaml b/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
similarity index 95%
rename from dev/tasks/conda-recipes/.ci_support/r/win_64_r_base3.6.yaml
rename to dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
index 3fb7f88499a..2fe9ad314dc 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base3.6.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/win_64_r_base4.1.yaml
@@ -7,6 +7,6 @@ pin_run_as_build:
     min_pin: x.x
     max_pin: x.x
 r_base:
-- '3.6'
+- '4.1'
 target_platform:
 - win-64
diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml
index 25f1f8a6557..f383fe8d07f 100644
--- a/dev/tasks/r/github.linux.versions.yml
+++ b/dev/tasks/r/github.linux.versions.yml
@@ -39,6 +39,7 @@ jobs:
           - "3.3"
           - "3.4"
           - "3.5"
+          - "3.6"
     env:
       R_ORG: "rstudio"
       R_IMAGE: "r-base"
diff --git a/dev/tasks/r/github.windows.rtools35.yml b/dev/tasks/r/github.windows.rtools35.yml
new file mode 100644
index 00000000000..53b4200d2ce
--- /dev/null
+++ b/dev/tasks/r/github.windows.rtools35.yml
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+
+
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push:
+    branches:
+      - "*-github-*"
+
+jobs:
+  windows-rtools35:
+      name: "AMD64 Windows R 3.6 RTools 35"
+      runs-on: windows-latest
+      timeout-minutes: 60
+      strategy:
+        fail-fast: false
+      env:
+        TEST_R_WITH_ARROW: "TRUE"
+        ARROW_R_CXXFLAGS: "-Werror"
+        _R_CHECK_TESTS_NLINES_: 0
+      steps:
+        - run: git config --global core.autocrlf false
+        - name: Checkout Crossbow
+          uses: actions/checkout@v2
+          with:
+            fetch-depth: 0
+        - name: Make R tests verbose
+          # If you get a segfault/mysterious test Execution halted,
+          # make this `true` to see where it dies.
+          if: false
+          shell: cmd
+          run: |
+            cd arrow/r/tests
+            sed -i.bak -E -e 's/"arrow"/"arrow", reporter = "location"/' testthat.R
+            rm -f testthat.R.bak
+        - name: Checkout Arrow
+          run: |
+            git clone --no-checkout {{ arrow.remote }} arrow
+            git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+            git -C arrow checkout FETCH_HEAD
+            git -C arrow submodule update --init --recursive
+        # We use the makepkg-mingw setup / pacman that is included in rtools40
+        # even when though use the rtools35 compilers
+        - uses: r-lib/actions/setup-r@master
+          with:
+            rtools-version: 40
+            r-version: "4.0"
+            Ncpus: 2
+        - uses: r-lib/actions/setup-r@master
+          with:
+            rtools-version: 35
+            r-version: "3.6"
+            Ncpus: 2
+        - name: Build Arrow C++
+          shell: bash
+          env:
+            RTOOLS_VERSION: 35
+          run: cd arrow && ci/scripts/r_windows_build.sh
+        - uses: actions/upload-artifact@v1
+          with:
+            name: Rtools 35 Arrow C++
+            path: arrow/libarrow.zip
+        - name: Install R package dependencies
+          shell: Rscript {0}
+          run: |
+            options(pkgType="win.binary")
+            install.packages(c("remotes", "rcmdcheck"))
+            remotes::install_deps("arrow/r", dependencies = TRUE)
+        - name: Check
+          shell: Rscript {0}
+          run: |
+            Sys.setenv(
+              RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "arrow", "libarrow.zip"),
+              MAKEFLAGS = paste0("-j", parallel::detectCores())
+            )
+            rcmdcheck::rcmdcheck("arrow/r",
+                                build_args = '--no-build-vignettes',
+                                args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'),
+                                error_on = 'warning',
+                                check_dir = 'check',
+                                timeout = 3600
+            )
+        - name: Dump install logs
+          shell: cmd
+          run: cat check/arrow.Rcheck/00install.out
+          if: always()
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 7e3fe74bbbb..1720b9316fd 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -157,27 +157,27 @@ tasks:
   #   Python and the OS are the main dimension. The R package `r-arrow` is
   #   an independent feedstock as it doesn't have the Python but the
   #   R dimension. To limit the number of CI jobs, we are building `r-arrow`
-  #   for R 3.6 with the Python 3.6 jobs and for R 4.0 with the Python 3.7 jobs.
+  #   for R 4.0 with the Python 3.6 jobs and for R 4.1 with the Python 3.7 jobs.
   # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically
   #   generated and to be synced regularly from the feedstock. We have no way
   #   yet to generate them inside the arrow repository automatically.
 
-  conda-linux-gcc-py36-cpu-r36:
+  conda-linux-gcc-py36-cpu-r40:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
       config: linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
-      r_config: linux_64_r_base3.6
+      r_config: linux_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-linux-gcc-py37-cpu-r40:
+  conda-linux-gcc-py37-cpu-r41:
     ci: azure
     template: conda-recipes/azure.linux.yml
     params:
       config: linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
-      r_config: linux_64_r_base4.0
+      r_config: linux_64_r_base4.1
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -228,22 +228,22 @@ tasks:
 
   ############################## Conda OSX ####################################
 
-  conda-osx-clang-py36-r36:
+  conda-osx-clang-py36-r40:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
       config: osx_64_numpy1.17python3.6.____cpython
-      r_config: osx_64_r_base3.6
+      r_config: osx_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-osx-clang-py37-r40:
+  conda-osx-clang-py37-r41:
     ci: azure
     template: conda-recipes/azure.osx.yml
     params:
       config: osx_64_numpy1.17python3.7.____cpython
-      r_config: osx_64_r_base4.0
+      r_config: osx_64_r_base4.1
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -286,22 +286,22 @@ tasks:
 
   ############################## Conda Windows ################################
 
-  conda-win-vs2017-py36-r36:
+  conda-win-vs2017-py36-r40:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
       config: win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython
-      r_config: win_64_r_base3.6
+      r_config: win_64_r_base4.0
     artifacts:
       - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0_cpu.tar.bz2
 
-  conda-win-vs2017-py37-r40:
+  conda-win-vs2017-py37-r41:
     ci: azure
     template: conda-recipes/azure.win.yml
     params:
       config: win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython
-      r_config: win_64_r_base4.0
+      r_config: win_64_r_base4.1
     artifacts:
       - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
       - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0_cpu.tar.bz2
@@ -860,6 +860,10 @@ tasks:
   test-r-version-compatibility:
     ci: github
     template: r/github.linux.version.compatibility.yml
+    
+  test-r-rtools-35:
+    ci: github
+    template: r/github.windows.rtools35.yml
 
   test-r-versions:
     ci: github

From 8773b9d45d254474f45630e508a1b3530be1fa99 Mon Sep 17 00:00:00 2001
From: "Maarten A. Breddels" <maartenbreddels@gmail.com>
Date: Mon, 7 Jun 2021 16:31:26 +0200
Subject: [PATCH 361/719] ARROW-10557: [C++] Add scalar string
 slicing/substring extract kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Needs a rebase after https://github.com/apache/arrow/pull/8621 is merged

I totally agree with https://github.com/python/cpython/blob/c9bc290dd6e3994a4ead2a224178bcba86f0c0e4/Objects/sliceobject.c#L252

This was tricky to get right, the main difficulty is in manually dealing with reverse iterators. Therefore I put on extra guardrails by having the Python unittests cover a lot of cases. All edge cases detected by this are translated to the C++ unittest suite, so we could reduce them to reduce pytest execution cost (I added 1 second).

Slicing is based on Python, `[start, stop)` inclusive/exclusive semantics, where an index refers to a codeunit (like Python apparently, badly documented), and negative indices start counting from the right. `step != 0` is supported, like Python.

The only thing we cannot support easily, are things like reversing a string, since in Python one can do `s[::-1]` or `s[-1::-1]`, but we don't support empty values with the Option machinery (we model this as an c-`int64`). To mimic this, we can do `pc.utf8_slice_codeunits(ar, start=-1, end=-sys.maxsize, step=-1)` (i.e. a very large negative value).

For instance, libraries such as Pandas and Vaex can do sth like that, confirmed to be working by modifying the unittest like this:
```python
import sys
@pytest.mark.parametrize('start', list(range(-6, 6)) + [None])
@pytest.mark.parametrize('stop', list(range(-6, 6)) + [None])
@pytest.mark.parametrize('step', [-3, -2, -1, 1, 2, 3])
def test_slice_compatibility(start,stop, step):
    input = pa.array(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])
    expected = pa.array([k.as_py()[start:stop:step] for k in input])
    if start is None:
        start = -sys.maxsize if step > 0 else sys.maxsize
    if stop is None:
        stop = sys.maxsize if step > 0 else -sys.maxsize
    result = pc.utf8_slice_codeunits(input, start=start, stop=stop, step=step)
    assert expected.equals(result)
```

So libraries using this can implement the full Python behavior with this workaround.

Closes #9000 from maartenbreddels/ARROW-10557

Lead-authored-by: Maarten A. Breddels <maartenbreddels@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.h            |   8 +
 .../arrow/compute/kernels/scalar_string.cc    | 263 ++++++++++++++++--
 .../compute/kernels/scalar_string_test.cc     | 112 ++++++++
 cpp/src/arrow/util/utf8.h                     |  24 ++
 docs/source/cpp/compute.rst                   |  34 ++-
 python/pyarrow/_compute.pyx                   |  17 ++
 python/pyarrow/compute.py                     |   7 +-
 python/pyarrow/includes/libarrow.pxd          |   7 +
 python/pyarrow/tests/test_compute.py          |  12 +
 9 files changed, 448 insertions(+), 36 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 1d15b629b6c..21d5c5324d4 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -130,6 +130,14 @@ struct ARROW_EXPORT TrimOptions : public FunctionOptions {
   std::string characters;
 };
 
+struct ARROW_EXPORT SliceOptions : public FunctionOptions {
+  explicit SliceOptions(int64_t start, int64_t stop = std::numeric_limits<int64_t>::max(),
+                        int64_t step = 1)
+      : start(start), stop(stop), step(step) {}
+
+  int64_t start, stop, step;
+};
+
 enum CompareOperator : int8_t {
   EQUAL,
   NOT_EQUAL,
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index d939d1c7722..1d87bd86c67 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -138,7 +138,10 @@ struct StringTransform {
   using offset_type = typename Type::offset_type;
   using ArrayType = typename TypeTraits<Type>::ArrayType;
 
-  static int64_t MaxCodeunits(offset_type input_ncodeunits) { return input_ncodeunits; }
+  virtual int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
+    return input_ncodeunits;
+  }
+
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     return Derived().Execute(ctx, batch, out);
   }
@@ -156,7 +159,8 @@ struct StringTransform {
       offset_type input_ncodeunits = input_boxed.total_values_length();
       offset_type input_nstrings = static_cast<offset_type>(input.length);
 
-      int64_t output_ncodeunits_max = Derived::MaxCodeunits(input_ncodeunits);
+      const int64_t output_ncodeunits_max =
+          MaxCodeunits(input_nstrings, input_ncodeunits);
       if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
         return Status::CapacityError(
             "Result might not fit in a 32bit utf8 array, convert to large_utf8");
@@ -183,35 +187,36 @@ struct StringTransform {
         output_ncodeunits += encoded_nbytes;
         output_string_offsets[i + 1] = output_ncodeunits;
       }
+      DCHECK_LE(output_ncodeunits, output_ncodeunits_max);
 
       // Trim the codepoint buffer, since we allocated too much
-      RETURN_NOT_OK(values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true));
+      return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
     } else {
+      DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
       const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
-      auto result = checked_pointer_cast<BaseBinaryScalar>(MakeNullScalar(out->type()));
-      if (input.is_valid) {
-        result->is_valid = true;
-        offset_type data_nbytes = static_cast<offset_type>(input.value->size());
+      if (!input.is_valid) {
+        return Status::OK();
+      }
+      auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+      result->is_valid = true;
+      offset_type data_nbytes = static_cast<offset_type>(input.value->size());
 
-        int64_t output_ncodeunits_max = Derived::MaxCodeunits(data_nbytes);
-        if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-          return Status::CapacityError(
-              "Result might not fit in a 32bit utf8 array, convert to large_utf8");
-        }
-        ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
-        result->value = value_buffer;
-        offset_type encoded_nbytes = 0;
-        if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
-                input.value->data(), data_nbytes, value_buffer->mutable_data(),
-                &encoded_nbytes))) {
-          return Derived::InvalidStatus();
-        }
-        RETURN_NOT_OK(value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true));
+      int64_t output_ncodeunits_max = MaxCodeunits(1, data_nbytes);
+      if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+        return Status::CapacityError(
+            "Result might not fit in a 32bit utf8 array, convert to large_utf8");
       }
-      out->value = result;
+      ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
+      result->value = value_buffer;
+      offset_type encoded_nbytes = 0;
+      if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
+              input.value->data(), data_nbytes, value_buffer->mutable_data(),
+              &encoded_nbytes))) {
+        return Derived::InvalidStatus();
+      }
+      DCHECK_LE(encoded_nbytes, output_ncodeunits_max);
+      return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
     }
-
-    return Status::OK();
   }
 };
 
@@ -234,7 +239,8 @@ struct StringTransformCodepoint : StringTransform<Type, Derived> {
     *output_written = static_cast<offset_type>(output - output_start);
     return true;
   }
-  static int64_t MaxCodeunits(offset_type input_ncodeunits) {
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
     // Section 5.18 of the Unicode spec claim that the number of codepoints for case
     // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes
     // However, since we don't support all casings (SpecialCasing.txt) the growth
@@ -243,6 +249,7 @@ struct StringTransformCodepoint : StringTransform<Type, Derived> {
     // two code units (even) can grow to 3 code units.
     return static_cast<int64_t>(input_ncodeunits) * 3 / 2;
   }
+
   Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     EnsureLookupTablesFilled();
     return Base::Execute(ctx, batch, out);
@@ -758,6 +765,209 @@ void AddFindSubstring(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
+// Slicing
+
+template <typename Type, typename Derived>
+struct SliceBase : StringTransform<Type, Derived> {
+  using Base = StringTransform<Type, Derived>;
+  using offset_type = typename Base::offset_type;
+  using State = OptionsWrapper<SliceOptions>;
+
+  SliceOptions options;
+
+  explicit SliceBase(SliceOptions options) : options(options) {}
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    SliceOptions options = State::Get(ctx);
+    if (options.step == 0) {
+      return Status::Invalid("Slice step cannot be zero");
+    }
+    return Derived(options).Execute(ctx, batch, out);
+  }
+};
+
+#define PROPAGATE_FALSE(expr)         \
+  do {                                \
+    if (ARROW_PREDICT_FALSE(!expr)) { \
+      return false;                   \
+    }                                 \
+  } while (0)
+
+bool SliceCodeunitsTransform(const uint8_t* input, int64_t input_string_ncodeunits,
+                             uint8_t* output, int64_t* output_written,
+                             const SliceOptions& options) {
+  const uint8_t* begin = input;
+  const uint8_t* end = input + input_string_ncodeunits;
+  const uint8_t* begin_sliced = begin;
+  const uint8_t* end_sliced = end;
+
+  if (options.step >= 1) {
+    if (options.start >= 0) {
+      // start counting from the left
+      PROPAGATE_FALSE(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, options.start));
+      if (options.stop > options.start) {
+        // continue counting from begin_sliced
+        int64_t length = options.stop - options.start;
+        PROPAGATE_FALSE(
+            arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length));
+      } else if (options.stop < 0) {
+        // or from the end (but we will never need to < begin_sliced)
+        PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -options.stop));
+      } else {
+        // zero length slice
+        *output_written = 0;
+        return true;
+      }
+    } else {
+      // start counting from the right
+      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
+                                                                -options.start));
+      if (options.stop > 0) {
+        // continue counting from the left, we cannot start from begin_sliced because we
+        // don't know how many codepoints are between begin and begin_sliced
+        PROPAGATE_FALSE(
+            arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, options.stop));
+        // and therefore we also needs this
+        if (end_sliced <= begin_sliced) {
+          // zero length slice
+          *output_written = 0;
+          return true;
+        }
+      } else if ((options.stop < 0) && (options.stop > options.start)) {
+        // stop is negative, but larger than start, so we count again from the right
+        // in some cases we can optimize this, depending on the shortest path (from end
+        // or begin_sliced), but begin_sliced and options.start can be 'out of sync',
+        // for instance when start=-100, when the string length is only 10.
+        PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -options.stop));
+      } else {
+        // zero length slice
+        *output_written = 0;
+        return true;
+      }
+    }
+    DCHECK(begin_sliced <= end_sliced);
+    if (options.step == 1) {
+      // fast case, where we simply can finish with a memcpy
+      std::copy(begin_sliced, end_sliced, output);
+      *output_written = end_sliced - begin_sliced;
+    } else {
+      uint8_t* dest = output;
+      const uint8_t* i = begin_sliced;
+
+      while (i < end_sliced) {
+        uint32_t codepoint = 0;
+        // write a single codepoint
+        PROPAGATE_FALSE(arrow::util::UTF8Decode(&i, &codepoint));
+        dest = arrow::util::UTF8Encode(dest, codepoint);
+        // and skip the remainder
+        int64_t skips = options.step - 1;
+        while ((skips--) && (i < end_sliced)) {
+          PROPAGATE_FALSE(arrow::util::UTF8Decode(&i, &codepoint));
+        }
+      }
+      *output_written = dest - output;
+    }
+    return true;
+  } else {  // step < 0
+    // serious +1 -1 kung fu because now begin_slice and end_slice act like reverse
+    // iterators.
+
+    if (options.start >= 0) {
+      // +1 because begin_sliced acts as as the end of a reverse iterator
+      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced,
+                                                         options.start + 1));
+      // and make it point at the last codeunit of the previous codeunit
+      begin_sliced--;
+    } else {
+      // -1 because start=-1 means the last codeunit, which is 0 advances
+      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
+                                                                -options.start - 1));
+      // and make it point at the last codeunit of the previous codeunit
+      begin_sliced--;
+    }
+    // similar to options.start
+    if (options.stop >= 0) {
+      PROPAGATE_FALSE(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, options.stop + 1));
+      end_sliced--;
+    } else {
+      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &end_sliced,
+                                                                -options.stop - 1));
+      end_sliced--;
+    }
+
+    uint8_t* dest = output;
+    const uint8_t* i = begin_sliced;
+
+    while (i > end_sliced) {
+      uint32_t codepoint = 0;
+      // write a single codepoint
+      PROPAGATE_FALSE(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      dest = arrow::util::UTF8Encode(dest, codepoint);
+      // and skip the remainder
+      int64_t skips = -options.step - 1;
+      while ((skips--) && (i > end_sliced)) {
+        PROPAGATE_FALSE(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      }
+    }
+    *output_written = dest - output;
+    return true;
+  }
+}
+
+#undef PROPAGATE_FALSE
+
+template <typename Type>
+struct SliceCodeunits : SliceBase<Type, SliceCodeunits<Type>> {
+  using Base = SliceBase<Type, SliceCodeunits<Type>>;
+  using offset_type = typename Base::offset_type;
+  using Base::Base;
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    const SliceOptions& opt = this->options;
+    if ((opt.start >= 0) != (opt.stop >= 0)) {
+      // If start and stop don't have the same sign, we can't guess an upper bound
+      // on the resulting slice lengths, so return a worst case estimate.
+      return input_ncodeunits;
+    }
+    int64_t max_slice_codepoints = (opt.stop - opt.start + opt.step - 1) / opt.step;
+    // The maximum UTF8 byte size of a codepoint is 4
+    return std::min(input_ncodeunits,
+                    4 * ninputs * std::max<int64_t>(0, max_slice_codepoints));
+  }
+
+  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
+                 uint8_t* output, offset_type* output_written) {
+    int64_t output_written_64;
+    bool res = SliceCodeunitsTransform(input, input_string_ncodeunits, output,
+                                       &output_written_64, this->options);
+    *output_written = static_cast<offset_type>(output_written_64);
+    return res;
+  }
+};
+
+const FunctionDoc utf8_slice_codeunits_doc(
+    "Slice string ",
+    ("For each string in `strings`, slice into a substring defined by\n"
+     "`start`, `stop`, `step`) as given by `SliceOptions` where `start` is inclusive\n"
+     "and `stop` is exclusive and are measured in codeunits. If step is negative, the\n"
+     "string will be advanced in reversed order. A `step` of zero is considered an\n"
+     "error.\n"
+     "Null inputs emit null."),
+    {"strings"}, "SliceOptions");
+
+void AddSlice(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("utf8_slice_codeunits", Arity::Unary(),
+                                               &utf8_slice_codeunits_doc);
+  using t32 = SliceCodeunits<StringType>;
+  using t64 = SliceCodeunits<LargeStringType>;
+  DCHECK_OK(func->AddKernel({utf8()}, utf8(), t32::Exec, t32::State::Init));
+  DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), t64::Exec, t64::State::Init));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
 // IsAlpha/Digit etc
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -2716,7 +2926,6 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   AddUnaryStringPredicate<IsUpperUnicode>("utf8_is_upper", registry, &utf8_is_upper_doc);
 #endif
 
-  AddSplit(registry);
   AddBinaryLength(registry);
   AddUtf8Length(registry);
   AddMatchSubstring(registry);
@@ -2730,6 +2939,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
       MemAllocation::NO_PREALLOCATE);
   AddExtractRegex(registry);
 #endif
+  AddSlice(registry);
+  AddSplit(registry);
   AddStrptime(registry);
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 5c230c41cd9..fe069810dbd 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -809,6 +809,118 @@ TYPED_TEST(TestStringKernels, TrimUTF8) {
 }
 #endif
 
+// produce test data with e.g.:
+// repr([k[-3:1] for k in ["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"]]).replace("'", '"')
+
+#ifdef ARROW_WITH_UTF8PROC
+TYPED_TEST(TestStringKernels, SliceCodeunitsBasic) {
+  SliceOptions options{2, 4};
+  this->CheckUnary("utf8_slice_codeunits", R"(["foo", "fo", null, "foo bar"])",
+                   this->type(), R"(["o", "", null, "o "])", &options);
+  SliceOptions options_2{2, 3};
+  // ensure we slice in codeunits, not graphemes
+  // a\u0308 is ä, which is 1 grapheme (character), but two codepoints
+  // \u0308 in utf8 encoding is \xcc\x88
+  this->CheckUnary("utf8_slice_codeunits", R"(["ää", "bä"])", this->type(),
+                   "[\"a\", \"\xcc\x88\"]", &options_2);
+  SliceOptions options_empty_pos{6, 6};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓öõ"])", this->type(), R"(["",
+  ""])",
+                   &options_empty_pos);
+  SliceOptions options_empty_neg{-6, -6};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓öõ"])", this->type(), R"(["",
+  ""])",
+                   &options_empty_neg);
+  SliceOptions options_empty_neg_to_zero{-6, 0};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓öõ"])", this->type(), R"(["", ""])",
+                   &options_empty_neg_to_zero);
+
+  // end is beyond 0, but before start (hence empty)
+  SliceOptions options_edgecase_1{-3, 1};
+  this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"([""])",
+                   &options_edgecase_1);
+
+  // this is a safeguard agains an optimization path possible, but actually a tricky case
+  SliceOptions options_edgecase_2{-6, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"(["𝑓öõ"])",
+                   &options_edgecase_2);
+
+  auto input = ArrayFromJSON(this->type(), R"(["𝑓öõḍš"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Attempted to initialize KernelState from null FunctionOptions"),
+      CallFunction("utf8_slice_codeunits", {input}));
+
+  SliceOptions options_invalid{2, 4, 0};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Slice step cannot be zero"),
+      CallFunction("utf8_slice_codeunits", {input}, &options_invalid));
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsPosPos) {
+  SliceOptions options{2, 4};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "õ", "õḍ", "õḍ"])", &options);
+  SliceOptions options_step{1, 5, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "ö", "ö", "öḍ", "öḍ"])", &options_step);
+  SliceOptions options_step_neg{5, 1, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "õ", "ḍ", "šõ"])", &options_step_neg);
+  options_step_neg.stop = 0;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ","𝑓öõḍš"])",
+                   this->type(), R"(["", "", "ö", "õ", "ḍö", "šõ"])", &options_step_neg);
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsPosNeg) {
+  SliceOptions options{2, -1};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "", "õ", "õḍ"])", &options);
+  SliceOptions options_step{1, -1, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "f", "fö", "föo", "föod","foodš"])",
+                   this->type(), R"(["", "", "", "ö", "ö", "od"])", &options_step);
+  SliceOptions options_step_neg{3, -4, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ","𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ𝑓", "ḍö", "ḍ"])", &options_step_neg);
+  options_step_neg.stop = -5;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ","𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ𝑓", "ḍö", "ḍö"])",
+                   &options_step_neg);
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsNegNeg) {
+  SliceOptions options{-2, -1};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "𝑓", "ö", "õ", "ḍ"])", &options);
+  SliceOptions options_step{-4, -1, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "𝑓", "𝑓", "𝑓õ", "öḍ"])", &options_step);
+  SliceOptions options_step_neg{-1, -3, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ", "ḍ", "š"])", &options_step_neg);
+  options_step_neg.stop = -4;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "ö", "õ𝑓", "ḍö", "šõ"])",
+                   &options_step_neg);
+}
+
+TYPED_TEST(TestStringKernels, SliceCodeunitsNegPos) {
+  SliceOptions options{-2, 4};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "𝑓ö", "öõ", "õḍ", "ḍ"])", &options);
+  SliceOptions options_step{-4, 4, 2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "𝑓", "𝑓", "𝑓õ", "𝑓õ", "öḍ"])", &options_step);
+  SliceOptions options_step_neg{-1, 1, -2};
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "", "õ", "ḍ", "šõ"])", &options_step_neg);
+  options_step_neg.stop = 0;
+  this->CheckUnary("utf8_slice_codeunits", R"(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])",
+                   this->type(), R"(["", "", "ö", "õ", "ḍö", "šõ"])", &options_step_neg);
+}
+
+#endif  // ARROW_WITH_UTF8PROC
+
 TYPED_TEST(TestStringKernels, TrimWhitespaceAscii) {
   // \xe2\x80\x88 is punctuation space
   this->CheckUnary("ascii_trim_whitespace",
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index 310d6913403..1426dc904ee 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -492,6 +492,30 @@ static inline bool UTF8FindIfReverse(const uint8_t* first, const uint8_t* last,
   return true;
 }
 
+static inline bool UTF8AdvanceCodepoints(const uint8_t* first, const uint8_t* last,
+                                         const uint8_t** destination, int64_t n) {
+  return UTF8FindIf(
+      first, last,
+      [&](uint32_t codepoint) {
+        bool done = n == 0;
+        n--;
+        return done;
+      },
+      destination);
+}
+
+static inline bool UTF8AdvanceCodepointsReverse(const uint8_t* first, const uint8_t* last,
+                                                const uint8_t** destination, int64_t n) {
+  return UTF8FindIfReverse(
+      first, last,
+      [&](uint32_t codepoint) {
+        bool done = n == 0;
+        n--;
+        return done;
+      },
+      destination);
+}
+
 template <class UnaryFunction>
 static inline bool UTF8ForEach(const uint8_t* first, const uint8_t* last,
                                UnaryFunction&& f) {
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 0b54cd3dd0b..3f30bbcaa06 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -638,14 +638,14 @@ when a positive ``max_splits`` is given.
   as separator.
 
 
-String extraction
-~~~~~~~~~~~~~~~~~
+String component extraction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-+--------------------+------------+------------------------------------+---------------+----------------------------------------+
-| Function name      | Arity      | Input types                        | Output type   | Options class                          |
-+====================+============+====================================+===============+========================================+
-| extract_regex      | Unary      | String-like                        | Struct (1)    | :struct:`ExtractRegexOptions`          |
-+--------------------+------------+------------------------------------+---------------+----------------------------------------+
++--------------------+------------+----------------+---------------+----------------------------------------+
+| Function name      | Arity      | Input types    | Output type   | Options class                          |
++====================+============+================+===============+========================================+
+| extract_regex      | Unary      | String-like    | Struct (1)    | :struct:`ExtractRegexOptions`          |
++--------------------+------------+----------------+---------------+----------------------------------------+
 
 * \(1) Extract substrings defined by a regular expression using the Google RE2
   library.  The output struct field names refer to the named capture groups,
@@ -653,6 +653,26 @@ String extraction
   ``(?P<letter>[ab])(?P<digit>\\d)``.
 
 
+Slicing
+~~~~~~~
+
+These function transform each sequence of the array to a subsequence, according
+to start and stop indices, and a non-zero step (defaulting to 1).  Slicing
+semantics follow Python slicing semantics: the start index is inclusive,
+the stop index exclusive; if the step is negative, the sequence is followed
+in reverse order.
+
++--------------------------+------------+----------------+-----------------+--------------------------+---------+
+| Function name            | Arity      | Input types    | Output type     | Options class            | Notes   |
++==========================+============+================+=================+==========================+=========+
+| utf8_slice_codepoints    | Unary      | String-like    | String-like     | :struct:`SliceOptions`   | \(1)    |
++--------------------------+------------+----------------+-----------------+--------------------------+---------+
+
+* \(1) Slice string into a substring defined by (``start``, ``stop``, ``step``)
+  as given by :struct:`SliceOptions` where ``start`` and ``stop`` are measured
+  in codeunits. Null inputs emit null.
+
+
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index b3d12396b0a..8da0ea05006 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -738,6 +738,23 @@ class ExtractRegexOptions(_ExtractRegexOptions):
         self._set_options(pattern)
 
 
+cdef class _SliceOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CSliceOptions] slice_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.slice_options.get()
+
+    def _set_options(self, start, stop, step):
+        self.slice_options.reset(
+            new CSliceOptions(start, stop, step))
+
+
+class SliceOptions(_SliceOptions):
+    def __init__(self, start, stop, step=1):
+        self._set_options(start, stop, step)
+
+
 cdef class _FilterOptions(FunctionOptions):
     cdef:
         unique_ptr[CFilterOptions] filter_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 37451c439ce..eb66f4407c8 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -38,15 +38,16 @@
     IndexOptions,
     MatchSubstringOptions,
     ModeOptions,
-    ScalarAggregateOptions,
-    SplitOptions,
-    SplitPatternOptions,
     PartitionNthOptions,
     ProjectOptions,
     QuantileOptions,
     ReplaceSubstringOptions,
+    ScalarAggregateOptions,
     SetLookupOptions,
+    SliceOptions,
     SortOptions,
+    SplitOptions,
+    SplitPatternOptions,
     StrptimeOptions,
     TakeOptions,
     TDigestOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index eb7e27d60bf..7b1c564923d 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1797,6 +1797,13 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CTrimOptions(c_string characters)
         c_string characters
 
+    cdef cppclass CSliceOptions \
+            "arrow::compute::SliceOptions"(CFunctionOptions):
+        CSliceOptions(int64_t start, int64_t stop, int64_t step)
+        int64_t start
+        int64_t stop
+        int64_t step
+
     cdef cppclass CSplitOptions \
             "arrow::compute::SplitOptions"(CFunctionOptions):
         CSplitOptions(int64_t max_splits, c_bool reverse)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 7444c24ccf2..b3f87127397 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -370,6 +370,18 @@ def test_trim():
     assert expected.equals(result)
 
 
+def test_slice_compatibility():
+    arr = pa.array(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])
+    for start in range(-6, 6):
+        for stop in range(-6, 6):
+            for step in [-3, -2, -1, 1, 2, 3]:
+                expected = pa.array([k.as_py()[start:stop:step]
+                                     for k in arr])
+                result = pc.utf8_slice_codeunits(
+                    arr, start=start, stop=stop, step=step)
+                assert expected.equals(result)
+
+
 def test_split_pattern():
     arr = pa.array(["-foo---bar--", "---foo---b"])
     result = pc.split_pattern(arr, pattern="---")

From 7540b9f7055489aa7863e62085196e53e2868297 Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Mon, 7 Jun 2021 17:51:41 +0200
Subject: [PATCH 362/719] ARROW-12661: [C++] Add
 ReaderOptions::skip_rows_after_names

Add a new csv reader option which allows the reader to skip rows after reading the column names from the csv.

Closes #10255 from n3world/ARROW-12661-skip_after_names

Lead-authored-by: Nate Clark <nate@neworld.us>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/chunker.cc         |  34 +++++++
 cpp/src/arrow/csv/chunker_test.cc    |  68 +++++++++++++
 cpp/src/arrow/csv/options.h          |   3 +
 cpp/src/arrow/csv/reader.cc          | 106 +++++++++++++-------
 cpp/src/arrow/json/chunker.cc        |   5 +
 cpp/src/arrow/util/delimiting.cc     |  52 ++++++++++
 cpp/src/arrow/util/delimiting.h      |  34 +++++++
 python/pyarrow/_csv.pyx              |  37 ++++++-
 python/pyarrow/includes/libarrow.pxd |   1 +
 python/pyarrow/tests/test_csv.py     | 141 ++++++++++++++++++++++++---
 10 files changed, 425 insertions(+), 56 deletions(-)

diff --git a/cpp/src/arrow/csv/chunker.cc b/cpp/src/arrow/csv/chunker.cc
index 95b5031dec3..b3a0dead593 100644
--- a/cpp/src/arrow/csv/chunker.cc
+++ b/cpp/src/arrow/csv/chunker.cc
@@ -171,6 +171,7 @@ class Lexer {
     goto FieldStart;
 
   LineEnd:
+    state_ = FIELD_START;
     return data;
 
   AbortLine:
@@ -234,6 +235,39 @@ class LexingBoundaryFinder : public BoundaryFinder {
     return Status::OK();
   }
 
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    Lexer<quoting, escaping> lexer(options_);
+    int64_t found = 0;
+    const char* data = block.data();
+    const char* const data_end = block.data() + block.size();
+
+    const char* line_end;
+    if (partial.size()) {
+      line_end = lexer.ReadLine(partial.data(), partial.data() + partial.size());
+      DCHECK_EQ(line_end, nullptr);  // Otherwise `partial` is a whole CSV line
+    }
+
+    for (; data < data_end && found < count; ++found) {
+      line_end = lexer.ReadLine(data, data_end);
+      if (line_end == nullptr) {
+        // Cannot read any further
+        break;
+      }
+      DCHECK_GT(line_end, data);
+      data = line_end;
+    }
+
+    if (data == block.data()) {
+      // No complete CSV line
+      *out_pos = kNoDelimiterFound;
+    } else {
+      *out_pos = static_cast<int64_t>(data - block.data());
+    }
+    *num_found = found;
+    return Status::OK();
+  }
+
  protected:
   ParseOptions options_;
 };
diff --git a/cpp/src/arrow/csv/chunker_test.cc b/cpp/src/arrow/csv/chunker_test.cc
index ab565567bde..27101e5538c 100644
--- a/cpp/src/arrow/csv/chunker_test.cc
+++ b/cpp/src/arrow/csv/chunker_test.cc
@@ -71,6 +71,36 @@ class BaseChunkerTest : public ::testing::TestWithParam<bool> {
 
   void MakeChunker() { chunker_ = ::arrow::csv::MakeChunker(options_); }
 
+  void AssertSkip(const std::string& str, int64_t count, int64_t rem_count,
+                  int64_t rest_size) {
+    MakeChunker();
+    {
+      auto test_count = count;
+      auto partial = std::make_shared<Buffer>("");
+      auto block = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(str.data()),
+                                            static_cast<int64_t>(str.size()));
+      std::shared_ptr<Buffer> rest;
+      ASSERT_OK(chunker_->ProcessSkip(partial, block, true, &test_count, &rest));
+      ASSERT_EQ(rem_count, test_count);
+      ASSERT_EQ(rest_size, rest->size());
+      AssertBufferEqual(*SliceBuffer(block, block->size() - rest_size), *rest);
+    }
+    {
+      auto test_count = count;
+      auto split = static_cast<int64_t>(str.find_first_of('\n'));
+      auto partial =
+          std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(str.data()), split);
+      auto block =
+          std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(str.data() + split),
+                                   static_cast<int64_t>(str.size()) - split);
+      std::shared_ptr<Buffer> rest;
+      ASSERT_OK(chunker_->ProcessSkip(partial, block, true, &test_count, &rest));
+      ASSERT_EQ(rem_count, test_count);
+      ASSERT_EQ(rest_size, rest->size());
+      AssertBufferEqual(*SliceBuffer(block, block->size() - rest_size), *rest);
+    }
+  }
+
   ParseOptions options_;
   std::unique_ptr<Chunker> chunker_;
 };
@@ -261,5 +291,43 @@ TEST_P(BaseChunkerTest, EscapingNewline) {
   }
 }
 
+TEST_P(BaseChunkerTest, ParseSkip) {
+  {
+    auto csv = MakeCSVData({"ab,c,\n", "def,,gh\n", ",ij,kl\n"});
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 1, 0, 15));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 7));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 6, 3, 0));
+  }
+
+  // Test with no trailing new line
+  {
+    auto csv = MakeCSVData({"ab,c,\n", "def,,gh\n", ",ij,kl"});
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 6));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+  }
+
+  // Test skip with new lines in values
+  {
+    auto csv = MakeCSVData({"ab,\"c\n\",\n", "\"d\nef\",,gh\n", ",ij,\"nkl\"\n"});
+    options_.newlines_in_values = true;
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 1, 0, 21));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 10));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 6, 3, 0));
+  }
+
+  // Test with no trailing new line and new lines in values
+  {
+    auto csv = MakeCSVData({"ab,\"c\n\",\n", "\"d\nef\",,gh\n", ",ij,\"nkl\""});
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 2, 0, 9));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 3, 0, 0));
+    ASSERT_NO_FATAL_FAILURE(AssertSkip(csv, 4, 1, 0));
+  }
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index f183743ac84..d9c94a03f86 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -129,6 +129,9 @@ struct ARROW_EXPORT ReadOptions {
   /// Number of header rows to skip (not including the row of column names, if any)
   int32_t skip_rows = 0;
 
+  /// Number of rows to skip after the column names are read, if any
+  int32_t skip_rows_after_names = 0;
+
   /// Column names for the target table.
   /// If empty, fall back on autogenerate_column_names.
   std::vector<std::string> column_names;
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 598c16db360..068e06178c8 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -166,14 +166,17 @@ namespace {
 // iterator APIs (e.g. Visit)) even though an empty optional is never used in this code.
 class BlockReader {
  public:
-  BlockReader(std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer)
+  BlockReader(std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer,
+              int64_t skip_rows)
       : chunker_(std::move(chunker)),
         partial_(std::make_shared<Buffer>("")),
-        buffer_(std::move(first_buffer)) {}
+        buffer_(std::move(first_buffer)),
+        skip_rows_(skip_rows) {}
 
  protected:
   std::unique_ptr<Chunker> chunker_;
   std::shared_ptr<Buffer> partial_, buffer_;
+  int64_t skip_rows_;
   int64_t block_index_ = 0;
   // Whether there was a trailing CR at the end of last received buffer
   bool trailing_cr_ = false;
@@ -188,9 +191,9 @@ class SerialBlockReader : public BlockReader {
 
   static Iterator<CSVBlock> MakeIterator(
       Iterator<std::shared_ptr<Buffer>> buffer_iterator, std::unique_ptr<Chunker> chunker,
-      std::shared_ptr<Buffer> first_buffer) {
+      std::shared_ptr<Buffer> first_buffer, int64_t skip_rows) {
     auto block_reader =
-        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer);
+        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer, skip_rows);
     // Wrap shared pointer in callable
     Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
         [block_reader](std::shared_ptr<Buffer> buf) {
@@ -201,9 +204,10 @@ class SerialBlockReader : public BlockReader {
 
   static AsyncGenerator<CSVBlock> MakeAsyncIterator(
       AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
-      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer) {
+      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer,
+      int64_t skip_rows) {
     auto block_reader =
-        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer);
+        std::make_shared<SerialBlockReader>(std::move(chunker), first_buffer, skip_rows);
     // Wrap shared pointer in callable
     Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
         [block_reader](std::shared_ptr<Buffer> next) {
@@ -217,9 +221,23 @@ class SerialBlockReader : public BlockReader {
       return TransformFinish();
     }
 
-    std::shared_ptr<Buffer> completion;
     bool is_final = (next_buffer == nullptr);
 
+    if (skip_rows_) {
+      RETURN_NOT_OK(
+          chunker_->ProcessSkip(partial_, buffer_, is_final, &skip_rows_, &buffer_));
+      partial_ = SliceBuffer(buffer_, 0, 0);
+      if (skip_rows_) {
+        // Still have rows beyond this buffer to skip return empty block
+        buffer_ = next_buffer;
+        return TransformYield<CSVBlock>(CSVBlock{partial_, partial_, partial_,
+                                                 block_index_++, is_final,
+                                                 [](int64_t) { return Status::OK(); }});
+      }
+    }
+
+    std::shared_ptr<Buffer> completion;
+
     if (is_final) {
       // End of file reached => compute completion from penultimate block
       RETURN_NOT_OK(chunker_->ProcessFinal(partial_, buffer_, &completion, &buffer_));
@@ -254,22 +272,12 @@ class ThreadedBlockReader : public BlockReader {
  public:
   using BlockReader::BlockReader;
 
-  static Iterator<CSVBlock> MakeIterator(
-      Iterator<std::shared_ptr<Buffer>> buffer_iterator, std::unique_ptr<Chunker> chunker,
-      std::shared_ptr<Buffer> first_buffer) {
-    auto block_reader =
-        std::make_shared<ThreadedBlockReader>(std::move(chunker), first_buffer);
-    // Wrap shared pointer in callable
-    Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
-        [block_reader](std::shared_ptr<Buffer> next) { return (*block_reader)(next); };
-    return MakeTransformedIterator(std::move(buffer_iterator), block_reader_fn);
-  }
-
   static AsyncGenerator<CSVBlock> MakeAsyncIterator(
       AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
-      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer) {
-    auto block_reader =
-        std::make_shared<ThreadedBlockReader>(std::move(chunker), first_buffer);
+      std::unique_ptr<Chunker> chunker, std::shared_ptr<Buffer> first_buffer,
+      int64_t skip_rows) {
+    auto block_reader = std::make_shared<ThreadedBlockReader>(std::move(chunker),
+                                                              first_buffer, skip_rows);
     // Wrap shared pointer in callable
     Transformer<std::shared_ptr<Buffer>, CSVBlock> block_reader_fn =
         [block_reader](std::shared_ptr<Buffer> next) { return (*block_reader)(next); };
@@ -282,12 +290,29 @@ class ThreadedBlockReader : public BlockReader {
       return TransformFinish();
     }
 
-    std::shared_ptr<Buffer> whole, completion, next_partial;
     bool is_final = (next_buffer == nullptr);
 
     auto current_partial = std::move(partial_);
     auto current_buffer = std::move(buffer_);
 
+    if (skip_rows_) {
+      RETURN_NOT_OK(chunker_->ProcessSkip(current_partial, current_buffer, is_final,
+                                          &skip_rows_, &current_buffer));
+      current_partial = SliceBuffer(current_buffer, 0, 0);
+      if (skip_rows_) {
+        partial_ = std::move(current_buffer);
+        buffer_ = std::move(next_buffer);
+        return TransformYield<CSVBlock>(CSVBlock{current_partial,
+                                                 current_partial,
+                                                 current_partial,
+                                                 block_index_++,
+                                                 is_final,
+                                                 {}});
+      }
+    }
+
+    std::shared_ptr<Buffer> whole, completion, next_partial;
+
     if (is_final) {
       // End of file reached => compute completion from penultimate block
       RETURN_NOT_OK(
@@ -387,6 +412,12 @@ class ReaderMixin {
     } else {
       column_names_ = read_options_.column_names;
     }
+
+    if (count_rows_) {
+      // increase rows seen to skip past rows which will be skipped
+      num_rows_seen_ += read_options_.skip_rows_after_names;
+    }
+
     *rest = SliceBuffer(buf, data - buf->data());
 
     num_csv_cols_ = static_cast<int32_t>(column_names_.size());
@@ -828,7 +859,7 @@ class SerialStreamingReader : public BaseStreamingReader,
 
       self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
           std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
-          std::move(own_first_buffer));
+          std::move(own_first_buffer), self->read_options_.skip_rows_after_names);
       return Status::OK();
     });
   }
@@ -868,9 +899,9 @@ class SerialTableReader : public BaseTableReader {
     RETURN_NOT_OK(ProcessHeader(first_buffer, &first_buffer));
     RETURN_NOT_OK(MakeColumnBuilders());
 
-    auto block_iterator = SerialBlockReader::MakeIterator(std::move(buffer_iterator_),
-                                                          MakeChunker(parse_options_),
-                                                          std::move(first_buffer));
+    auto block_iterator = SerialBlockReader::MakeIterator(
+        std::move(buffer_iterator_), MakeChunker(parse_options_), std::move(first_buffer),
+        read_options_.skip_rows_after_names);
     while (true) {
       RETURN_NOT_OK(io_context_.stop_token().Poll());
 
@@ -944,7 +975,7 @@ class AsyncThreadedTableReader
     return ProcessFirstBuffer().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
       auto block_generator = ThreadedBlockReader::MakeAsyncIterator(
           self->buffer_generator_, MakeChunker(self->parse_options_),
-          std::move(first_buffer));
+          std::move(first_buffer), self->read_options_.skip_rows_after_names);
 
       std::function<Status(CSVBlock)> block_visitor =
           [self](CSVBlock maybe_block) -> Status {
@@ -1056,16 +1087,17 @@ class CSVRowCounter : public ReaderMixin,
     auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
     auto buffer_generator = CSVBufferIterator::MakeAsync(std::move(transferred_it));
 
-    return buffer_generator().Then([self, buffer_generator](
-                                       std::shared_ptr<Buffer> first_buffer) {
-      if (!first_buffer) {
-        return Status::Invalid("Empty CSV file");
-      }
-      RETURN_NOT_OK(self->ProcessHeader(first_buffer, &first_buffer));
-      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
-          buffer_generator, MakeChunker(self->parse_options_), std::move(first_buffer));
-      return Status::OK();
-    });
+    return buffer_generator().Then(
+        [self, buffer_generator](std::shared_ptr<Buffer> first_buffer) {
+          if (!first_buffer) {
+            return Status::Invalid("Empty CSV file");
+          }
+          RETURN_NOT_OK(self->ProcessHeader(first_buffer, &first_buffer));
+          self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
+              buffer_generator, MakeChunker(self->parse_options_),
+              std::move(first_buffer), 0);
+          return Status::OK();
+        });
   }
 
   Future<int64_t> DoCount(const std::shared_ptr<CSVRowCounter>& self) {
diff --git a/cpp/src/arrow/json/chunker.cc b/cpp/src/arrow/json/chunker.cc
index 568246bb63b..b4b4d31eb94 100644
--- a/cpp/src/arrow/json/chunker.cc
+++ b/cpp/src/arrow/json/chunker.cc
@@ -163,6 +163,11 @@ class ParsingBoundaryFinder : public BoundaryFinder {
     }
     return Status::OK();
   }
+
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    return Status::NotImplemented("ParsingBoundaryFinder::FindNth");
+  }
 };
 
 }  // namespace
diff --git a/cpp/src/arrow/util/delimiting.cc b/cpp/src/arrow/util/delimiting.cc
index 1b23c377052..fe1b6ea3126 100644
--- a/cpp/src/arrow/util/delimiting.cc
+++ b/cpp/src/arrow/util/delimiting.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/util/delimiting.h"
 #include "arrow/buffer.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
@@ -60,6 +61,35 @@ class NewlineBoundaryFinder : public BoundaryFinder {
     return Status::OK();
   }
 
+  Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+                 int64_t* out_pos, int64_t* num_found) override {
+    DCHECK(partial.find_first_of(newline_delimiters) == util::string_view::npos);
+
+    int64_t found = 0;
+    int64_t pos = kNoDelimiterFound;
+
+    auto cur_pos = block.find_first_of(newline_delimiters);
+    while (cur_pos != util::string_view::npos) {
+      if (block[cur_pos] == '\r' && cur_pos + 1 < block.length() &&
+          block[cur_pos + 1] == '\n') {
+        cur_pos += 2;
+      } else {
+        ++cur_pos;
+      }
+
+      pos = static_cast<int64_t>(cur_pos);
+      if (++found >= count) {
+        break;
+      }
+
+      cur_pos = block.find_first_of(newline_delimiters, cur_pos);
+    }
+
+    *out_pos = pos;
+    *num_found = found;
+    return Status::OK();
+  }
+
  protected:
   static constexpr const char* newline_delimiters = "\r\n";
 };
@@ -138,4 +168,26 @@ Status Chunker::ProcessFinal(std::shared_ptr<Buffer> partial,
   return Status::OK();
 }
 
+Status Chunker::ProcessSkip(std::shared_ptr<Buffer> partial,
+                            std::shared_ptr<Buffer> block, bool final, int64_t* count,
+                            std::shared_ptr<Buffer>* rest) {
+  DCHECK_GT(*count, 0);
+  int64_t pos;
+  int64_t num_found;
+  ARROW_RETURN_NOT_OK(boundary_finder_->FindNth(
+      util::string_view(*partial), util::string_view(*block), *count, &pos, &num_found));
+  if (pos == BoundaryFinder::kNoDelimiterFound) {
+    return StraddlingTooLarge();
+  }
+  if (ARROW_PREDICT_FALSE(final && *count > num_found && block->size() != pos)) {
+    // Skip the last row in the final block which does not have a delimiter
+    ++num_found;
+    *rest = SliceBuffer(block, 0, 0);
+  } else {
+    *rest = SliceBuffer(block, pos);
+  }
+  *count -= num_found;
+  return Status::OK();
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/delimiting.h b/cpp/src/arrow/util/delimiting.h
index 33828964414..b4b868340db 100644
--- a/cpp/src/arrow/util/delimiting.h
+++ b/cpp/src/arrow/util/delimiting.h
@@ -53,6 +53,19 @@ class ARROW_EXPORT BoundaryFinder {
   /// `out_pos` will be -1 if no delimiter is found.
   virtual Status FindLast(util::string_view block, int64_t* out_pos) = 0;
 
+  /// \brief Find the position of the Nth delimiter inside the block
+  ///
+  /// `partial` is taken to be the beginning of the block, and `block`
+  /// its continuation.  Also, `partial` doesn't contain a delimiter.
+  ///
+  /// The returned `out_pos` is relative to `block`'s start and should point
+  /// to the first character after the first delimiter.
+  /// `out_pos` will be -1 if no delimiter is found.
+  ///
+  /// The returned `num_found` is the number of delimiters actually found
+  virtual Status FindNth(util::string_view partial, util::string_view block,
+                         int64_t count, int64_t* out_pos, int64_t* num_found) = 0;
+
   static constexpr int64_t kNoDelimiterFound = -1;
 
  protected:
@@ -138,6 +151,27 @@ class ARROW_EXPORT Chunker {
   Status ProcessFinal(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
                       std::shared_ptr<Buffer>* completion, std::shared_ptr<Buffer>* rest);
 
+  /// \brief Skip count number of rows
+  /// Pre-conditions:
+  /// - `partial` is the start of a valid block of delimited data
+  ///   (i.e. starts just after a delimiter)
+  /// - `block` follows `partial` in file order
+  ///
+  /// Post-conditions:
+  /// - `count` is updated to indicate the number of rows that still need to be skipped
+  /// - If `count` is > 0 then `rest` is an incomplete block that should be a future
+  /// `partial`
+  /// - Else `rest` could be one or more valid blocks of delimited data which need to be
+  /// parsed
+  ///
+  /// \param[in] partial incomplete delimited data
+  /// \param[in] block delimited data following partial
+  /// \param[in] final whether this is the final chunk
+  /// \param[in,out] count number of rows that need to be skipped
+  /// \param[out] rest subrange of block containing what was not skipped
+  Status ProcessSkip(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
+                     bool final, int64_t* count, std::shared_ptr<Buffer>* rest);
+
  protected:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Chunker);
 
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 04b9cfd2bcd..e7dda3fb953 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -61,6 +61,14 @@ cdef class ReadOptions(_Weakrefable):
     skip_rows: int, optional (default 0)
         The number of rows to skip before the column names (if any)
         and the CSV data.
+    skip_rows_after_names: int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names aread (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
     column_names: list, optional
         The column names of the target table.  If empty, fall back on
         `autogenerate_column_names`.
@@ -83,7 +91,7 @@ cdef class ReadOptions(_Weakrefable):
 
     def __init__(self, *, use_threads=None, block_size=None, skip_rows=None,
                  column_names=None, autogenerate_column_names=None,
-                 encoding='utf8'):
+                 encoding='utf8', skip_rows_after_names=None):
         if use_threads is not None:
             self.use_threads = use_threads
         if block_size is not None:
@@ -96,6 +104,8 @@ cdef class ReadOptions(_Weakrefable):
             self.autogenerate_column_names= autogenerate_column_names
         # Python-specific option
         self.encoding = encoding
+        if skip_rows_after_names is not None:
+            self.skip_rows_after_names = skip_rows_after_names
 
     @property
     def use_threads(self):
@@ -126,6 +136,7 @@ cdef class ReadOptions(_Weakrefable):
         """
         The number of rows to skip before the column names (if any)
         and the CSV data.
+        See `skip_rows_after_names` for interaction description
         """
         return deref(self.options).skip_rows
 
@@ -161,6 +172,23 @@ cdef class ReadOptions(_Weakrefable):
     def autogenerate_column_names(self, value):
         deref(self.options).autogenerate_column_names = value
 
+    @property
+    def skip_rows_after_names(self):
+        """
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names aread (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
+        """
+        return deref(self.options).skip_rows_after_names
+
+    @skip_rows_after_names.setter
+    def skip_rows_after_names(self, value):
+        deref(self.options).skip_rows_after_names = value
+
     def equals(self, ReadOptions other):
         return (
             self.use_threads == other.use_threads and
@@ -169,7 +197,8 @@ cdef class ReadOptions(_Weakrefable):
             self.column_names == other.column_names and
             self.autogenerate_column_names ==
             other.autogenerate_column_names and
-            self.encoding == other.encoding
+            self.encoding == other.encoding and
+            self.skip_rows_after_names == other.skip_rows_after_names
         )
 
     @staticmethod
@@ -182,12 +211,12 @@ cdef class ReadOptions(_Weakrefable):
     def __getstate__(self):
         return (self.use_threads, self.block_size, self.skip_rows,
                 self.column_names, self.autogenerate_column_names,
-                self.encoding)
+                self.encoding, self.skip_rows_after_names)
 
     def __setstate__(self, state):
         (self.use_threads, self.block_size, self.skip_rows,
          self.column_names, self.autogenerate_column_names,
-         self.encoding) = state
+         self.encoding, self.skip_rows_after_names) = state
 
     def __eq__(self, other):
         try:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 7b1c564923d..eefca44605c 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1617,6 +1617,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         c_bool use_threads
         int32_t block_size
         int32_t skip_rows
+        int32_t skip_rows_after_names
         vector[c_string] column_names
         c_bool autogenerate_column_names
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index f02406dec40..3a27132ec61 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -115,13 +115,15 @@ def test_read_options():
                         skip_rows=[0, 3],
                         column_names=[[], ["ab", "cd"]],
                         autogenerate_column_names=[False, True],
-                        encoding=['utf8', 'utf16'])
+                        encoding=['utf8', 'utf16'],
+                        skip_rows_after_names=[0, 27])
 
     check_options_class_pickling(cls, use_threads=True,
                                  skip_rows=3,
                                  column_names=["ab", "cd"],
                                  autogenerate_column_names=False,
-                                 encoding='utf16')
+                                 encoding='utf16',
+                                 skip_rows_after_names=27)
 
     assert opts.block_size > 0
     opts.block_size = 12345
@@ -319,6 +321,97 @@ def test_header_skip_rows(self):
             "kl": ["op"],
         }
 
+    def test_skip_rows_after_names(self):
+        rows = b"ab,cd\nef,gh\nij,kl\nmn,op\n"
+
+        opts = ReadOptions()
+        opts.skip_rows_after_names = 1
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": ["ij", "mn"],
+            "cd": ["kl", "op"],
+        }
+
+        opts.skip_rows_after_names = 3
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": [],
+            "cd": [],
+        }
+
+        opts.skip_rows_after_names = 4
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": [],
+            "cd": [],
+        }
+
+        # Can skip rows with a different number of columns
+        rows = b"abcd\n,,,,,\nij,kl\nmn,op\n"
+        opts.skip_rows_after_names = 2
+        opts.column_names = ["f0", "f1"]
+        table = self.read_bytes(rows, read_options=opts)
+        self.check_names(table, ["f0", "f1"])
+        assert table.to_pydict() == {
+            "f0": ["ij", "mn"],
+            "f1": ["kl", "op"],
+        }
+        opts = ReadOptions()
+
+        # Can skip rows with new lines in the value
+        rows = b'ab,cd\n"e\nf","g\n\nh"\n"ij","k\nl"\nmn,op'
+        opts.skip_rows_after_names = 2
+        parse_opts = ParseOptions()
+        parse_opts.newlines_in_values = True
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": ["mn"],
+            "cd": ["op"],
+        }
+
+        # Can skip rows that are beyond the first block without lexer
+        rows, expected = make_random_csv(num_cols=5, num_rows=1000)
+        opts.skip_rows_after_names = 900
+        opts.block_size = len(rows) / 11
+        table = self.read_bytes(rows, read_options=opts)
+        assert table.schema == expected.schema
+        assert table.num_rows == 100
+        table_dict = table.to_pydict()
+        for name, values in expected.to_pydict().items():
+            assert values[900:] == table_dict[name]
+
+        # Can skip rows that are beyond the first block with lexer
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        assert table.schema == expected.schema
+        assert table.num_rows == 100
+        table_dict = table.to_pydict()
+        for name, values in expected.to_pydict().items():
+            assert values[900:] == table_dict[name]
+
+        # Skip rows and skip rows after names
+        rows, expected = make_random_csv(num_cols=5, num_rows=200,
+                                         write_names=False)
+        opts = ReadOptions()
+        opts.skip_rows = 37
+        opts.skip_rows_after_names = 41
+        opts.column_names = expected.schema.names
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        assert table.schema == expected.schema
+        assert (table.num_rows ==
+                expected.num_rows - opts.skip_rows -
+                opts.skip_rows_after_names)
+        table_dict = table.to_pydict()
+        for name, values in expected.to_pydict().items():
+            assert (values[opts.skip_rows + opts.skip_rows_after_names:] ==
+                    table_dict[name])
+
     def test_header_column_names(self):
         rows = b"ab,cd\nef,gh\nij,kl\nmn,op\n"
 
@@ -992,20 +1085,38 @@ def test_row_numbers_in_errors(self):
                             convert_options=convert_options)
 
         csv_bad_type = csv + b"a,b,c,d\r\n"
-        message = ("In CSV column #0: Row #102: " +
-                   "CSV conversion error to int32: invalid value 'a'")
-        with pytest.raises(pa.ArrowInvalid, match=message):
+        message_value = ("In CSV column #0: Row #102: " +
+                         "CSV conversion error to int32: invalid value 'a'")
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
             self.read_bytes(csv_bad_type, read_options=read_options,
                             convert_options=convert_options)
 
         long_row = (b"this is a long row" * 15) + b",3\r\n"
         csv_bad_columns_long = csv + long_row
-        message = ("Row #102: Expected 4 columns, got 2: " +
-                   long_row[0:96].decode("utf-8") + " ...")
-        with pytest.raises(pa.ArrowInvalid, match=message):
+        message_long = ("Row #102: Expected 4 columns, got 2: " +
+                        long_row[0:96].decode("utf-8") + " ...")
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            self.read_bytes(csv_bad_columns_long, read_options=read_options,
+                            convert_options=convert_options)
+
+        # Test skipping rows after the names
+        read_options.skip_rows_after_names = 47
+
+        with pytest.raises(pa.ArrowInvalid,
+                           match="Row #102: Expected 4 columns, got 2"):
+            self.read_bytes(csv_bad_columns, read_options=read_options,
+                            convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            self.read_bytes(csv_bad_type, read_options=read_options,
+                            convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
             self.read_bytes(csv_bad_columns_long, read_options=read_options,
                             convert_options=convert_options)
 
+        read_options.skip_rows_after_names = 0
+
         # Test without skip_rows and column names not in the csv
         csv, _ = make_random_csv(4, 100, write_names=False)
         read_options.column_names = ["a", "b", "c", "d"]
@@ -1016,16 +1127,16 @@ def test_row_numbers_in_errors(self):
                             convert_options=convert_options)
 
         csv_bad_columns_long = csv + long_row
-        message = ("Row #101: Expected 4 columns, got 2: " +
-                   long_row[0:96].decode("utf-8") + " ...")
-        with pytest.raises(pa.ArrowInvalid, match=message):
+        message_long = ("Row #101: Expected 4 columns, got 2: " +
+                        long_row[0:96].decode("utf-8") + " ...")
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
             self.read_bytes(csv_bad_columns_long, read_options=read_options,
                             convert_options=convert_options)
 
         csv_bad_type = csv + b"a,b,c,d\r\n"
-        message = ("In CSV column #0: Row #101: " +
-                   "CSV conversion error to int32: invalid value 'a'")
-        with pytest.raises(pa.ArrowInvalid, match=message):
+        message_value = ("In CSV column #0: Row #101: " +
+                         "CSV conversion error to int32: invalid value 'a'")
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
             self.read_bytes(csv_bad_type, read_options=read_options,
                             convert_options=convert_options)
 
@@ -1036,7 +1147,7 @@ def test_row_numbers_in_errors(self):
             self.read_bytes(csv_bad_columns, read_options=read_options,
                             convert_options=convert_options)
 
-        with pytest.raises(pa.ArrowInvalid, match=message):
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
             self.read_bytes(csv_bad_type, read_options=read_options,
                             convert_options=convert_options)
 

From e7b6c4ac711b93734e918e905b4cafd7e44cffc1 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 7 Jun 2021 17:54:28 +0200
Subject: [PATCH 363/719] ARROW-12560: [C++] Add scheduling option for Future
 callbacks

Previously a future's callbacks would always run synchronously, either as part of `Future::MarkFinished` or as part of `Future::AddCallback`.  `Executor::Transfer` made it possible to schedule continuations on a new thread but it would only take effect if the transferred future's callbacks were added before the source future finished.  There are times when the desired behavior is to spawn a new thread task even if the source future is finished already.

This PR adds three scheduling options:
* Never - The default (and existing) behavior, never spawn a new task
* IfUnfinished - Spawn a new task only if the future isn't already finished when the callback is added
* Always - Always spawn a new task, on both finished and unfinished futures, regardless of destination thread pool idleness.

The `Never` option doesn't make any sense for transferring so the transfer only has two choices (always or if unfinished).

Closes #10258 from westonpace/feature/ARROW-12560--c-investigate-utilizing-aggressive-thread-task

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/testing/executor_util.h  |  55 ++++++++++++
 cpp/src/arrow/util/future.cc           |  66 +++++++++++---
 cpp/src/arrow/util/future.h            |  92 +++++++++++++------
 cpp/src/arrow/util/future_test.cc      | 119 ++++++++++++++++++++++++-
 cpp/src/arrow/util/test_common.h       |   2 +
 cpp/src/arrow/util/thread_pool.h       |  67 ++++++++++----
 cpp/src/arrow/util/thread_pool_test.cc |  39 ++++++++
 7 files changed, 380 insertions(+), 60 deletions(-)
 create mode 100644 cpp/src/arrow/testing/executor_util.h

diff --git a/cpp/src/arrow/testing/executor_util.h b/cpp/src/arrow/testing/executor_util.h
new file mode 100644
index 00000000000..e34fc858d07
--- /dev/null
+++ b/cpp/src/arrow/testing/executor_util.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+/// An executor which synchronously runs the task as part of the SpawnReal call.
+class MockExecutor : public internal::Executor {
+ public:
+  int GetCapacity() override { return 0; }
+
+  Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
+                   StopCallback&&) override {
+    spawn_count++;
+    std::move(task)();
+    return Status::OK();
+  }
+
+  int spawn_count = 0;
+};
+
+/// An executor which does not actually run the task.  Can be used to simulate situations
+/// where the executor schedules a task in a long queue and doesn't get around to running
+/// it for a while
+class DelayedExecutor : public internal::Executor {
+ public:
+  int GetCapacity() override { return 0; }
+
+  Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
+                   StopCallback&&) override {
+    captured_tasks.push_back(std::move(task));
+    return Status::OK();
+  }
+
+  std::vector<internal::FnOnce<void()>> captured_tasks;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc
index c7d7c37ad33..b329f99ed17 100644
--- a/cpp/src/arrow/util/future.cc
+++ b/cpp/src/arrow/util/future.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
 
 namespace arrow {
 
@@ -231,26 +232,68 @@ class ConcreteFutureImpl : public FutureImpl {
 
   void DoMarkFailed() { DoMarkFinishedOrFailed(FutureState::FAILURE); }
 
-  void AddCallback(Callback callback) {
+  void CheckOptions(const CallbackOptions& opts) {
+    if (opts.should_schedule != ShouldSchedule::Never) {
+      DCHECK_NE(opts.executor, nullptr)
+          << "An executor must be specified when adding a callback that might schedule";
+    }
+  }
+
+  void AddCallback(Callback callback, CallbackOptions opts) {
+    CheckOptions(opts);
     std::unique_lock<std::mutex> lock(mutex_);
+    CallbackRecord callback_record{std::move(callback), opts};
     if (IsFutureFinished(state_)) {
       lock.unlock();
-      std::move(callback)();
+      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/true);
     } else {
-      callbacks_.push_back(std::move(callback));
+      callbacks_.push_back(std::move(callback_record));
     }
   }
 
-  bool TryAddCallback(const std::function<Callback()>& callback_factory) {
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts) {
+    CheckOptions(opts);
     std::unique_lock<std::mutex> lock(mutex_);
     if (IsFutureFinished(state_)) {
       return false;
     } else {
-      callbacks_.push_back(callback_factory());
+      callbacks_.push_back({callback_factory(), opts});
       return true;
     }
   }
 
+  bool ShouldScheduleCallback(const CallbackRecord& callback_record,
+                              bool in_add_callback) {
+    switch (callback_record.options.should_schedule) {
+      case ShouldSchedule::Never:
+        return false;
+      case ShouldSchedule::Always:
+        return true;
+      case ShouldSchedule::IfUnfinished:
+        return !in_add_callback;
+      default:
+        DCHECK(false) << "Unrecognized ShouldSchedule option";
+        return false;
+    }
+  }
+
+  void RunOrScheduleCallback(CallbackRecord&& callback_record, bool in_add_callback) {
+    if (ShouldScheduleCallback(callback_record, in_add_callback)) {
+      struct CallbackTask {
+        void operator()() { std::move(callback)(*self); }
+
+        Callback callback;
+        std::shared_ptr<FutureImpl> self;
+      };
+      // Need to keep `this` alive until the callback has a chance to be scheduled.
+      CallbackTask task{std::move(callback_record.callback), shared_from_this()};
+      DCHECK_OK(callback_record.options.executor->Spawn(std::move(task)));
+    } else {
+      std::move(callback_record.callback)(*this);
+    }
+  }
+
   void DoMarkFinishedOrFailed(FutureState state) {
     {
       // Lock the hypothetical waiter first, and the future after.
@@ -272,8 +315,8 @@ class ConcreteFutureImpl : public FutureImpl {
     //
     // In fact, it is important not to hold the locks because the callback
     // may be slow or do its own locking on other resources
-    for (auto&& callback : callbacks_) {
-      std::move(callback)();
+    for (auto& callback_record : callbacks_) {
+      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/false);
     }
     callbacks_.clear();
   }
@@ -334,12 +377,13 @@ void FutureImpl::MarkFinished() { GetConcreteFuture(this)->DoMarkFinished(); }
 
 void FutureImpl::MarkFailed() { GetConcreteFuture(this)->DoMarkFailed(); }
 
-void FutureImpl::AddCallback(Callback callback) {
-  GetConcreteFuture(this)->AddCallback(std::move(callback));
+void FutureImpl::AddCallback(Callback callback, CallbackOptions opts) {
+  GetConcreteFuture(this)->AddCallback(std::move(callback), opts);
 }
 
-bool FutureImpl::TryAddCallback(const std::function<Callback()>& callback_factory) {
-  return GetConcreteFuture(this)->TryAddCallback(callback_factory);
+bool FutureImpl::TryAddCallback(const std::function<Callback()>& callback_factory,
+                                CallbackOptions opts) {
+  return GetConcreteFuture(this)->TryAddCallback(callback_factory, opts);
 }
 
 Future<> AllComplete(const std::vector<Future<>>& futures) {
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index 132443176ed..5fb17f95f2b 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -202,8 +202,30 @@ enum class FutureState : int8_t { PENDING, SUCCESS, FAILURE };
 
 inline bool IsFutureFinished(FutureState state) { return state != FutureState::PENDING; }
 
+/// \brief Describe whether the callback should be scheduled or run synchronously
+enum class ShouldSchedule {
+  /// Always run the callback synchronously (the default)
+  Never = 0,
+  /// Schedule a new task only if the future is not finished when the
+  /// callback is added
+  IfUnfinished = 1,
+  /// Always schedule the callback as a new task
+  Always = 2
+};
+
+/// \brief Options that control how a continuation is run
+struct CallbackOptions {
+  /// Describe whether the callback should be run synchronously or scheduled
+  ShouldSchedule should_schedule = ShouldSchedule::Never;
+  /// If the callback is scheduled then this is the executor it should be scheduled
+  /// on.  If this is NULL then should_schedule must be Never
+  internal::Executor* executor = NULL;
+
+  static CallbackOptions Defaults() { return CallbackOptions(); }
+};
+
 // Untyped private implementation
-class ARROW_EXPORT FutureImpl {
+class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl> {
  public:
   FutureImpl();
   virtual ~FutureImpl() = default;
@@ -218,10 +240,15 @@ class ARROW_EXPORT FutureImpl {
   void MarkFailed();
   void Wait();
   bool Wait(double seconds);
+  template <typename ValueType>
+  Result<ValueType>* CastResult() const {
+    return static_cast<Result<ValueType>*>(result_.get());
+  }
 
-  using Callback = internal::FnOnce<void()>;
-  void AddCallback(Callback callback);
-  bool TryAddCallback(const std::function<Callback()>& callback_factory);
+  using Callback = internal::FnOnce<void(const FutureImpl& impl)>;
+  void AddCallback(Callback callback, CallbackOptions opts);
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts);
 
   // Waiter API
   inline FutureState SetWaiter(FutureWaiter* w, int future_num);
@@ -234,7 +261,11 @@ class ARROW_EXPORT FutureImpl {
   using Storage = std::unique_ptr<void, void (*)(void*)>;
   Storage result_{NULLPTR, NULLPTR};
 
-  std::vector<Callback> callbacks_;
+  struct CallbackRecord {
+    Callback callback;
+    CallbackOptions options;
+  };
+  std::vector<CallbackRecord> callbacks_;
 };
 
 // An object that waits on multiple futures at once.  Only one waiter
@@ -453,30 +484,34 @@ class Future {
   /// cyclic reference to itself through the callback.
   template <typename OnComplete>
   typename std::enable_if<!detail::first_arg_is_status<OnComplete>::value>::type
-  AddCallback(OnComplete on_complete) const {
+  AddCallback(OnComplete on_complete,
+              CallbackOptions opts = CallbackOptions::Defaults()) const {
     // We know impl_ will not be dangling when invoking callbacks because at least one
     // thread will be waiting for MarkFinished to return. Thus it's safe to keep a
     // weak reference to impl_ here
     struct Callback {
-      void operator()() && { std::move(on_complete)(weak_self.get().result()); }
-      WeakFuture<T> weak_self;
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(*impl.CastResult<ValueType>());
+      }
       OnComplete on_complete;
     };
-    impl_->AddCallback(Callback{WeakFuture<T>(*this), std::move(on_complete)});
+    impl_->AddCallback(Callback{std::move(on_complete)}, opts);
   }
 
   /// Overload for callbacks accepting a Status
   template <typename OnComplete>
   typename std::enable_if<detail::first_arg_is_status<OnComplete>::value>::type
-  AddCallback(OnComplete on_complete) const {
+  AddCallback(OnComplete on_complete,
+              CallbackOptions opts = CallbackOptions::Defaults()) const {
     static_assert(std::is_same<internal::Empty, ValueType>::value,
                   "Callbacks for Future<> should accept Status and not Result");
     struct Callback {
-      void operator()() && { std::move(on_complete)(weak_self.get().status()); }
-      WeakFuture<T> weak_self;
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(impl.CastResult<ValueType>()->status());
+      }
       OnComplete on_complete;
     };
-    impl_->AddCallback(Callback{WeakFuture<T>(*this), std::move(on_complete)});
+    impl_->AddCallback(Callback{std::move(on_complete)}, opts);
   }
 
   /// \brief Overload of AddCallback that will return false instead of running
@@ -495,30 +530,33 @@ class Future {
   template <typename CallbackFactory,
             typename OnComplete = detail::result_of_t<CallbackFactory()>>
   typename std::enable_if<!detail::first_arg_is_status<OnComplete>::value, bool>::type
-  TryAddCallback(const CallbackFactory& callback_factory) const {
+  TryAddCallback(const CallbackFactory& callback_factory,
+                 CallbackOptions opts = CallbackOptions::Defaults()) const {
     struct Callback {
-      void operator()() && { std::move(on_complete)(weak_self.get().result()); }
-      WeakFuture<T> weak_self;
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(*static_cast<Result<ValueType>*>(impl.result_.get()));
+      }
       OnComplete on_complete;
     };
-    return impl_->TryAddCallback([this, &callback_factory]() {
-      return Callback{WeakFuture<T>(*this), callback_factory()};
-    });
+    return impl_->TryAddCallback(
+        [&callback_factory]() { return Callback{callback_factory()}; }, opts);
   }
 
   template <typename CallbackFactory,
             typename OnComplete = detail::result_of_t<CallbackFactory()>>
   typename std::enable_if<detail::first_arg_is_status<OnComplete>::value, bool>::type
-  TryAddCallback(const CallbackFactory& callback_factory) const {
+  TryAddCallback(const CallbackFactory& callback_factory,
+                 CallbackOptions opts = CallbackOptions::Defaults()) const {
     struct Callback {
-      void operator()() && { std::move(on_complete)(weak_self.get().status()); }
-      WeakFuture<T> weak_self;
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(
+            static_cast<Result<ValueType>*>(impl.result_.get())->status());
+      }
       OnComplete on_complete;
     };
 
-    return impl_->TryAddCallback([this, &callback_factory]() {
-      return Callback{WeakFuture<T>(*this), callback_factory()};
-    });
+    return impl_->TryAddCallback(
+        [&callback_factory]() { return Callback{callback_factory()}; }, opts);
   }
 
   /// \brief Consumer API: Register a continuation to run when this future completes
@@ -696,9 +734,7 @@ class Future {
 
   void Initialize() { impl_ = FutureImpl::Make(); }
 
-  Result<ValueType>* GetResult() const {
-    return static_cast<Result<ValueType>*>(impl_->result_.get());
-  }
+  Result<ValueType>* GetResult() const { return impl_->CastResult<ValueType>(); }
 
   void SetResult(Result<ValueType> res) {
     impl_->result_ = {new Result<ValueType>(std::move(res)),
diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc
index 8c1e72a48bd..33796a05bb1 100644
--- a/cpp/src/arrow/util/future_test.cc
+++ b/cpp/src/arrow/util/future_test.cc
@@ -27,11 +27,13 @@
 #include <random>
 #include <string>
 #include <thread>
+#include <unordered_set>
 #include <vector>
 
 #include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
+#include "arrow/testing/executor_util.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/logging.h"
@@ -387,10 +389,8 @@ TEST(FutureRefTest, TailRemoved) {
 
 TEST(FutureRefTest, HeadRemoved) {
   // Keeping the tail of the future chain should not keep the entire chain alive.  If no
-  // one has a reference to the head then there is no need to keep it, nothing will finish
-  // it.  In theory the intermediate futures could be finished by some external process
-  // but that would be highly unusual and bad practice so in reality this would just be a
-  // reference to a future that will never complete which is ok.
+  // one has a reference to the head then the future is abandoned.  TODO (ARROW-12207):
+  // detect abandonment.
   std::weak_ptr<FutureImpl> ref;
   std::shared_ptr<Future<>> ref2;
   {
@@ -952,6 +952,117 @@ TEST(FutureCompletionTest, FutureVoid) {
   }
 }
 
+class FutureSchedulingTest : public testing::Test {
+ public:
+  internal::Executor* executor() { return mock_executor.get(); }
+
+  int spawn_count() { return static_cast<int>(mock_executor->captured_tasks.size()); }
+
+  void AssertRunSynchronously(const std::vector<int>& ids) { AssertIds(ids, true); }
+
+  void AssertScheduled(const std::vector<int>& ids) { AssertIds(ids, false); }
+
+  void AssertIds(const std::vector<int>& ids, bool should_be_synchronous) {
+    for (auto id : ids) {
+      ASSERT_EQ(should_be_synchronous, callbacks_run_synchronously.find(id) !=
+                                           callbacks_run_synchronously.end());
+    }
+  }
+
+  std::function<void(const Status&)> callback(int id) {
+    return [this, id](const Status&) { callbacks_run_synchronously.insert(id); };
+  }
+
+  std::shared_ptr<DelayedExecutor> mock_executor = std::make_shared<DelayedExecutor>();
+  std::unordered_set<int> callbacks_run_synchronously;
+};
+
+TEST_F(FutureSchedulingTest, ScheduleNever) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::Never;
+  options.executor = executor();
+  // Successful future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(1), options);
+    fut.MarkFinished();
+    fut.AddCallback(callback(2), options);
+    ASSERT_EQ(0, spawn_count());
+    AssertRunSynchronously({1, 2});
+  }
+  // Failing future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(3), options);
+    fut.MarkFinished(Status::Invalid("XYZ"));
+    fut.AddCallback(callback(4), options);
+    ASSERT_EQ(0, spawn_count());
+    AssertRunSynchronously({3, 4});
+  }
+}
+
+TEST_F(FutureSchedulingTest, ScheduleAlways) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::Always;
+  options.executor = executor();
+  // Successful future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(1), options);
+    fut.MarkFinished();
+    fut.AddCallback(callback(2), options);
+    ASSERT_EQ(2, spawn_count());
+    AssertScheduled({1, 2});
+  }
+  // Failing future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(3), options);
+    fut.MarkFinished(Status::Invalid("XYZ"));
+    fut.AddCallback(callback(4), options);
+    ASSERT_EQ(4, spawn_count());
+    AssertScheduled({3, 4});
+  }
+}
+
+TEST_F(FutureSchedulingTest, ScheduleIfUnfinished) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::IfUnfinished;
+  options.executor = executor();
+  // Successful future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(1), options);
+    fut.MarkFinished();
+    fut.AddCallback(callback(2), options);
+    ASSERT_EQ(1, spawn_count());
+    AssertRunSynchronously({2});
+    AssertScheduled({1});
+  }
+  // Failing future
+  {
+    auto fut = Future<>::Make();
+    fut.AddCallback(callback(3), options);
+    fut.MarkFinished(Status::Invalid("XYZ"));
+    fut.AddCallback(callback(4), options);
+    ASSERT_EQ(2, spawn_count());
+    AssertRunSynchronously({4});
+    AssertScheduled({3});
+  }
+}
+
+TEST_F(FutureSchedulingTest, ScheduleAlwaysKeepsFutureAliveUntilCallback) {
+  CallbackOptions options;
+  options.should_schedule = ShouldSchedule::Always;
+  options.executor = executor();
+  {
+    auto fut = Future<int>::Make();
+    fut.AddCallback([](const Result<int> val) { ASSERT_EQ(7, *val); }, options);
+    fut.MarkFinished(7);
+  }
+  std::move(mock_executor->captured_tasks[0])();
+}
+
 TEST(FutureAllTest, Empty) {
   auto combined = arrow::All(std::vector<Future<int>>{});
   auto after_assert = combined.Then(
diff --git a/cpp/src/arrow/util/test_common.h b/cpp/src/arrow/util/test_common.h
index 8c304ffbbcf..511daed1eca 100644
--- a/cpp/src/arrow/util/test_common.h
+++ b/cpp/src/arrow/util/test_common.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include <iosfwd>
 
 #include "arrow/testing/gtest_util.h"
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index 8626132a348..d012aa02010 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -102,24 +102,24 @@ class ARROW_EXPORT Executor {
   // The continuations of that future should run on the CPU thread pool keeping
   // CPU heavy work off the I/O thread pool.  So the I/O task should transfer
   // the future to the CPU executor before returning.
-  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
+  //
+  // By default this method will only transfer if the future is not already completed.  If
+  // the future is already completed then any callback would be run synchronously and so
+  // no transfer is typically necessary.  However, in cases where you want to force a
+  // transfer (e.g. to help the scheduler break up units of work across multiple cores)
+  // then you can override this behavior with `always_transfer`.
+  template <typename T>
   Future<T> Transfer(Future<T> future) {
-    auto transferred = Future<T>::Make();
-    auto callback = [this, transferred](const FTSync& result) mutable {
-      auto spawn_status =
-          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
-      if (!spawn_status.ok()) {
-        transferred.MarkFinished(spawn_status);
-      }
-    };
-    auto callback_factory = [&callback]() { return callback; };
-    if (future.TryAddCallback(callback_factory)) {
-      return transferred;
-    }
-    // If the future is already finished and we aren't going to force spawn a thread
-    // then we don't need to add another layer of callback and can return the original
-    // future
-    return future;
+    return DoTransfer(std::move(future), false);
+  }
+
+  // Overload of Transfer which will always schedule callbacks on new threads even if the
+  // future is finished when the callback is added.
+  //
+  // This can be useful in cases where you want to ensure parallelism
+  template <typename T>
+  Future<T> TransferAlways(Future<T> future) {
+    return DoTransfer(std::move(future), true);
   }
 
   // Submit a callable and arguments for execution.  Return a future that
@@ -184,6 +184,39 @@ class ARROW_EXPORT Executor {
 
   Executor() = default;
 
+  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
+  Future<T> DoTransfer(Future<T> future, bool always_transfer = false) {
+    auto transferred = Future<T>::Make();
+    if (always_transfer) {
+      CallbackOptions callback_options = CallbackOptions::Defaults();
+      callback_options.should_schedule = ShouldSchedule::Always;
+      callback_options.executor = this;
+      auto sync_callback = [transferred](const FTSync& result) mutable {
+        transferred.MarkFinished(result);
+      };
+      future.AddCallback(sync_callback, callback_options);
+      return transferred;
+    }
+
+    // We could use AddCallback's ShouldSchedule::IfUnfinished but we can save a bit of
+    // work by doing the test here.
+    auto callback = [this, transferred](const FTSync& result) mutable {
+      auto spawn_status =
+          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
+      if (!spawn_status.ok()) {
+        transferred.MarkFinished(spawn_status);
+      }
+    };
+    auto callback_factory = [&callback]() { return callback; };
+    if (future.TryAddCallback(callback_factory)) {
+      return transferred;
+    }
+    // If the future is already finished and we aren't going to force spawn a thread
+    // then we don't need to add another layer of callback and can return the original
+    // future
+    return future;
+  }
+
   // Subclassing API
   virtual Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
                            StopCallback&&) = 0;
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index bac6baf839f..2cfb4c62613 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -32,9 +32,12 @@
 #include <gtest/gtest.h>
 
 #include "arrow/status.h"
+#include "arrow/testing/executor_util.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/test_common.h"
 #include "arrow/util/thread_pool.h"
 
 namespace arrow {
@@ -256,6 +259,42 @@ TEST_P(TestRunSynchronously, PropagatedError) {
 INSTANTIATE_TEST_SUITE_P(TestRunSynchronously, TestRunSynchronously,
                          ::testing::Values(false, true));
 
+class TransferTest : public testing::Test {
+ public:
+  internal::Executor* executor() { return mock_executor.get(); }
+  int spawn_count() { return mock_executor->spawn_count; }
+
+  std::function<void(const Status&)> callback = [](const Status&) {};
+  std::shared_ptr<MockExecutor> mock_executor = std::make_shared<MockExecutor>();
+};
+
+TEST_F(TransferTest, DefaultTransferIfNotFinished) {
+  {
+    Future<> fut = Future<>::Make();
+    auto transferred = executor()->Transfer(fut);
+    fut.MarkFinished();
+    ASSERT_FINISHES_OK(transferred);
+    ASSERT_EQ(1, spawn_count());
+  }
+  {
+    Future<> fut = Future<>::Make();
+    fut.MarkFinished();
+    auto transferred = executor()->Transfer(fut);
+    ASSERT_FINISHES_OK(transferred);
+    ASSERT_EQ(1, spawn_count());
+  }
+}
+
+TEST_F(TransferTest, TransferAlways) {
+  {
+    Future<> fut = Future<>::Make();
+    fut.MarkFinished();
+    auto transferred = executor()->TransferAlways(fut);
+    ASSERT_FINISHES_OK(transferred);
+    ASSERT_EQ(1, spawn_count());
+  }
+}
+
 class TestThreadPool : public ::testing::Test {
  public:
   void TearDown() override {

From e380fa1de5e6a52af787890d48dcaf9b1a5cf57e Mon Sep 17 00:00:00 2001
From: Matthijs Brobbel <m1brobbel@gmail.com>
Date: Mon, 7 Jun 2021 17:57:41 +0200
Subject: [PATCH 364/719] MINOR: [Docs] Fix a typo in Python IPC ReadStats
 class

Closes #10463 from mbrobbel/fix-typo

Authored-by: Matthijs Brobbel <m1brobbel@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/ipc.pxi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 2f82a9f64df..93dd2eaef5e 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -72,7 +72,7 @@ _ReadStats = namedtuple(
 
 
 class ReadStats(_ReadStats):
-    """IPC write statistics
+    """IPC read statistics
     """
     __slots__ = ()
 

From e933457db038da03ded4da30c647f6a11e1606fb Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 7 Jun 2021 18:00:35 +0200
Subject: [PATCH 365/719] ARROW-12988: [CI] Skip the failing test in kartothek
 nightly integration build

Closes #10466 from jorisvandenbossche/ARROW-12988-ci-kartothek

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/scripts/integration_kartothek.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/scripts/integration_kartothek.sh b/ci/scripts/integration_kartothek.sh
index 379569b9c99..9e0a6996a27 100755
--- a/ci/scripts/integration_kartothek.sh
+++ b/ci/scripts/integration_kartothek.sh
@@ -28,4 +28,4 @@ python -c "import kartothek"
 
 pushd /kartothek
 # See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message
-pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing"
+pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing and not test_update_dataset_from_ddf_empty"

From b788b0da5cef805cd9518e9873efe5f3e19a4b81 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 7 Jun 2021 18:03:01 +0200
Subject: [PATCH 366/719] ARROW-12982: [C++] Re-enable unused-variable warning

The warning was originally disabled out of convenience in https://github.com/apache/arrow/pull/3976

Closes #10458 from westonpace/experiment/no-unused-variable

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/cmake_modules/SetupCxxFlags.cmake    | 3 ---
 cpp/src/arrow/compute/exec/expression.cc | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index e1b3c1b95ad..0a92702c4ec 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -266,16 +266,13 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-conversion")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion")
-    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-variable")
   elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
     if(WIN32)
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wno-deprecated")
-      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wno-unused-variable")
     else()
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
       set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated")
-      set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-variable")
     endif()
   else()
     message(FATAL_ERROR "${UNKNOWN_COMPILER_MESSAGE}")
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index 91bf73166ca..1c8c82de05e 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -616,7 +616,7 @@ std::vector<FieldRef> FieldsInExpression(const Expression& expr) {
 bool ExpressionHasFieldRefs(const Expression& expr) {
   if (expr.literal()) return false;
 
-  if (auto ref = expr.field_ref()) return true;
+  if (expr.field_ref()) return true;
 
   for (const Expression& arg : CallNotNull(expr)->arguments) {
     if (ExpressionHasFieldRefs(arg)) return true;

From e6d632ed2f7f97445a25a61bff8fe9dd126e550e Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 7 Jun 2021 18:06:37 +0200
Subject: [PATCH 367/719] ARROW-12969: [C++] Fix match_substring with empty
 haystack

Closes #10453 from lidavidm/arrow-12969

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    | 34 +++++++++++--------
 .../compute/kernels/scalar_string_test.cc     | 10 ++++++
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 1d87bd86c67..9db16e26ca5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -475,6 +475,7 @@ struct PlainSubstringMatcher {
     const auto pattern_length = options_.pattern.size();
     int64_t pattern_pos = 0;
     int64_t pos = 0;
+    if (pattern_length == 0) return 0;
     for (const auto c : current) {
       while ((pattern_pos >= 0) && (options_.pattern[pattern_pos] != c)) {
         pattern_pos = prefix_table[pattern_pos];
@@ -737,12 +738,14 @@ struct FindSubstring {
 };
 
 template <typename InputType>
-Status FindSubstringExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  using offset_type = typename TypeTraits<InputType>::OffsetType;
-  applicator::ScalarUnaryNotNullStateful<offset_type, InputType, FindSubstring> kernel{
-      FindSubstring(PlainSubstringMatcher(MatchSubstringState::Get(ctx)))};
-  return kernel.Exec(ctx, batch, out);
-}
+struct FindSubstringExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstring> kernel{
+        FindSubstring(PlainSubstringMatcher(MatchSubstringState::Get(ctx)))};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
 
 const FunctionDoc find_substring_doc(
     "Find first occurrence of substring",
@@ -754,14 +757,17 @@ const FunctionDoc find_substring_doc(
 void AddFindSubstring(FunctionRegistry* registry) {
   auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(),
                                                &find_substring_doc);
-  DCHECK_OK(func->AddKernel({binary()}, int32(), FindSubstringExec<BinaryType>,
-                            MatchSubstringState::Init));
-  DCHECK_OK(func->AddKernel({utf8()}, int32(), FindSubstringExec<StringType>,
-                            MatchSubstringState::Init));
-  DCHECK_OK(func->AddKernel({large_binary()}, int64(), FindSubstringExec<LargeBinaryType>,
-                            MatchSubstringState::Init));
-  DCHECK_OK(func->AddKernel({large_utf8()}, int64(), FindSubstringExec<LargeStringType>,
-                            MatchSubstringState::Init));
+  for (const auto& ty : BaseBinaryTypes()) {
+    std::shared_ptr<DataType> offset_type;
+    if (ty->id() == Type::type::LARGE_BINARY || ty->id() == Type::type::LARGE_STRING) {
+      offset_type = int64();
+    } else {
+      offset_type = int32();
+    }
+    DCHECK_OK(func->AddKernel({ty}, offset_type,
+                              GenerateTypeAgnosticVarBinaryBase<FindSubstringExec>(ty),
+                              MatchSubstringState::Init));
+  }
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index fe069810dbd..bd5c8eec03f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -91,6 +91,10 @@ TYPED_TEST(TestBinaryKernels, FindSubstring) {
   MatchSubstringOptions options_double_char_2{"bbcaa"};
   this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
                    "[7]", &options_double_char_2);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("find_substring", R"(["", "a", null])", this->offset_type(),
+                   "[0, 0, null]", &options_empty);
 }
 
 template <typename TestType>
@@ -391,6 +395,12 @@ TYPED_TEST(TestStringKernels, MatchSubstring) {
   MatchSubstringOptions options_double_char_2{"bbcaa"};
   this->CheckUnary("match_substring", R"(["abcbaabbbcaabccabaab"])", boolean(), "[true]",
                    &options_double_char_2);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("match_substring", "[]", boolean(), "[]", &options);
+  this->CheckUnary("match_substring", R"(["abc", "acb", "cab", null, "bac", "AB", ""])",
+                   boolean(), "[true, true, true, null, true, true, true]",
+                   &options_empty);
 }
 
 #ifdef ARROW_WITH_RE2

From 1ae486c13588aca3c8e00246292a3dd38eab4101 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 7 Jun 2021 18:24:36 +0200
Subject: [PATCH 368/719] ARROW-12644: [C++][Python][R][Dataset] URL-decode
 path segments in partitioning

Now by default, directory/hive partitioning will URL-decode potential partition values before trying to parse them, since systems like Spark apparently may URL-encode the values in some cases. Note for Hive partitioning, this applies only to the value, not to the key itself. This behavior can be toggled.

Closes #10264 from lidavidm/arrow-12644

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/partition.cc           | 146 ++++++++++++++++---
 cpp/src/arrow/dataset/partition.h            |  79 ++++++++--
 cpp/src/arrow/dataset/partition_test.cc      | 100 ++++++++++++-
 cpp/src/arrow/dataset/type_fwd.h             |   2 +
 python/pyarrow/_dataset.pyx                  |  48 +++++-
 python/pyarrow/includes/libarrow_dataset.pxd |  24 ++-
 python/pyarrow/tests/test_dataset.py         |  93 ++++++++++++
 r/R/arrowExports.R                           |  16 +-
 r/R/dataset-partition.R                      |  26 ++--
 r/man/hive_partition.Rd                      |   5 +-
 r/src/arrowExports.cpp                       |  44 +++---
 r/src/dataset.cpp                            |  34 ++++-
 r/tests/testthat/test-dataset.R              | 105 +++++++++++++
 13 files changed, 634 insertions(+), 88 deletions(-)

diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index f6e7b9a0d28..5c390b6b487 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -37,6 +37,8 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
 #include "arrow/util/string_view.h"
+#include "arrow/util/uri.h"
+#include "arrow/util/utf8.h"
 
 namespace arrow {
 
@@ -46,6 +48,18 @@ using util::string_view;
 
 namespace dataset {
 
+namespace {
+/// Apply UriUnescape, then ensure the results are valid UTF-8.
+Result<std::string> SafeUriUnescape(util::string_view encoded) {
+  auto decoded = internal::UriUnescape(encoded);
+  if (!util::ValidateUTF8(decoded)) {
+    return Status::Invalid("Partition segment was not valid UTF-8 after URL decoding: ",
+                           encoded);
+  }
+  return decoded;
+}
+}  // namespace
+
 std::shared_ptr<Partitioning> Partitioning::Default() {
   class DefaultPartitioning : public Partitioning {
    public:
@@ -158,6 +172,21 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
   return out;
 }
 
+std::ostream& operator<<(std::ostream& os, SegmentEncoding segment_encoding) {
+  switch (segment_encoding) {
+    case SegmentEncoding::None:
+      os << "SegmentEncoding::None";
+      break;
+    case SegmentEncoding::Uri:
+      os << "SegmentEncoding::Uri";
+      break;
+    default:
+      os << "(invalid SegmentEncoding " << static_cast<int8_t>(segment_encoding) << ")";
+      break;
+  }
+  return os;
+}
+
 Result<compute::Expression> KeyValuePartitioning::ConvertKey(const Key& key) const {
   ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(key.name).FindOneOrNone(*schema_));
   if (match.empty()) {
@@ -209,7 +238,8 @@ Result<compute::Expression> KeyValuePartitioning::ConvertKey(const Key& key) con
 Result<compute::Expression> KeyValuePartitioning::Parse(const std::string& path) const {
   std::vector<compute::Expression> expressions;
 
-  for (const Key& key : ParseKeys(path)) {
+  ARROW_ASSIGN_OR_RAISE(auto parsed, ParseKeys(path));
+  for (const Key& key : parsed) {
     ARROW_ASSIGN_OR_RAISE(auto expr, ConvertKey(key));
     if (expr == compute::literal(true)) continue;
     expressions.push_back(std::move(expr));
@@ -259,7 +289,14 @@ Result<std::string> KeyValuePartitioning::Format(const compute::Expression& expr
   return FormatValues(values);
 }
 
-std::vector<KeyValuePartitioning::Key> DirectoryPartitioning::ParseKeys(
+DirectoryPartitioning::DirectoryPartitioning(std::shared_ptr<Schema> schema,
+                                             ArrayVector dictionaries,
+                                             KeyValuePartitioningOptions options)
+    : KeyValuePartitioning(std::move(schema), std::move(dictionaries), options) {
+  util::InitializeUTF8();
+}
+
+Result<std::vector<KeyValuePartitioning::Key>> DirectoryPartitioning::ParseKeys(
     const std::string& path) const {
   std::vector<Key> keys;
 
@@ -267,7 +304,23 @@ std::vector<KeyValuePartitioning::Key> DirectoryPartitioning::ParseKeys(
   for (auto&& segment : fs::internal::SplitAbstractPath(path)) {
     if (i >= schema_->num_fields()) break;
 
-    keys.push_back({schema_->field(i++)->name(), std::move(segment)});
+    switch (options_.segment_encoding) {
+      case SegmentEncoding::None: {
+        if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(segment))) {
+          return Status::Invalid("Partition segment was not valid UTF-8: ", segment);
+        }
+        keys.push_back({schema_->field(i++)->name(), std::move(segment)});
+        break;
+      }
+      case SegmentEncoding::Uri: {
+        ARROW_ASSIGN_OR_RAISE(auto decoded, SafeUriUnescape(segment));
+        keys.push_back({schema_->field(i++)->name(), std::move(decoded)});
+        break;
+      }
+      default:
+        return Status::NotImplemented("Unknown segment encoding: ",
+                                      options_.segment_encoding);
+    }
   }
 
   return keys;
@@ -308,6 +361,20 @@ Result<std::string> DirectoryPartitioning::FormatValues(
   return fs::internal::JoinAbstractPath(std::move(segments));
 }
 
+KeyValuePartitioningOptions PartitioningFactoryOptions::AsPartitioningOptions() const {
+  KeyValuePartitioningOptions options;
+  options.segment_encoding = segment_encoding;
+  return options;
+}
+
+HivePartitioningOptions HivePartitioningFactoryOptions::AsHivePartitioningOptions()
+    const {
+  HivePartitioningOptions options;
+  options.segment_encoding = segment_encoding;
+  options.null_fallback = null_fallback;
+  return options;
+}
+
 namespace {
 class KeyValuePartitioningFactory : public PartitioningFactory {
  protected:
@@ -430,6 +497,7 @@ class DirectoryPartitioningFactory : public KeyValuePartitioningFactory {
                                PartitioningFactoryOptions options)
       : KeyValuePartitioningFactory(options), field_names_(std::move(field_names)) {
     Reset();
+    util::InitializeUTF8();
   }
 
   std::string type_name() const override { return "schema"; }
@@ -441,7 +509,23 @@ class DirectoryPartitioningFactory : public KeyValuePartitioningFactory {
       for (auto&& segment : fs::internal::SplitAbstractPath(path)) {
         if (field_index == field_names_.size()) break;
 
-        RETURN_NOT_OK(InsertRepr(static_cast<int>(field_index++), segment));
+        switch (options_.segment_encoding) {
+          case SegmentEncoding::None: {
+            if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(segment))) {
+              return Status::Invalid("Partition segment was not valid UTF-8: ", segment);
+            }
+            RETURN_NOT_OK(InsertRepr(static_cast<int>(field_index++), segment));
+            break;
+          }
+          case SegmentEncoding::Uri: {
+            ARROW_ASSIGN_OR_RAISE(auto decoded, SafeUriUnescape(segment));
+            RETURN_NOT_OK(InsertRepr(static_cast<int>(field_index++), decoded));
+            break;
+          }
+          default:
+            return Status::NotImplemented("Unknown segment encoding: ",
+                                          options_.segment_encoding);
+        }
       }
     }
 
@@ -458,7 +542,8 @@ class DirectoryPartitioningFactory : public KeyValuePartitioningFactory {
     // drop fields which aren't in field_names_
     auto out_schema = SchemaFromColumnNames(schema, field_names_);
 
-    return std::make_shared<DirectoryPartitioning>(std::move(out_schema), dictionaries_);
+    return std::make_shared<DirectoryPartitioning>(std::move(out_schema), dictionaries_,
+                                                   options_.AsPartitioningOptions());
   }
 
  private:
@@ -481,28 +566,50 @@ std::shared_ptr<PartitioningFactory> DirectoryPartitioning::MakeFactory(
       new DirectoryPartitioningFactory(std::move(field_names), options));
 }
 
-util::optional<KeyValuePartitioning::Key> HivePartitioning::ParseKey(
-    const std::string& segment, const std::string& null_fallback) {
+Result<util::optional<KeyValuePartitioning::Key>> HivePartitioning::ParseKey(
+    const std::string& segment, const HivePartitioningOptions& options) {
   auto name_end = string_view(segment).find_first_of('=');
   // Not round-trippable
   if (name_end == string_view::npos) {
     return util::nullopt;
   }
 
+  // Static method, so we have no better place for it
+  util::InitializeUTF8();
+
   auto name = segment.substr(0, name_end);
-  auto value = segment.substr(name_end + 1);
-  if (value == null_fallback) {
-    return Key{name, util::nullopt};
+  std::string value;
+  switch (options.segment_encoding) {
+    case SegmentEncoding::None: {
+      value = segment.substr(name_end + 1);
+      if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(value))) {
+        return Status::Invalid("Partition segment was not valid UTF-8: ", value);
+      }
+      break;
+    }
+    case SegmentEncoding::Uri: {
+      auto raw_value = util::string_view(segment).substr(name_end + 1);
+      ARROW_ASSIGN_OR_RAISE(value, SafeUriUnescape(raw_value));
+      break;
+    }
+    default:
+      return Status::NotImplemented("Unknown segment encoding: ",
+                                    options.segment_encoding);
+  }
+
+  if (value == options.null_fallback) {
+    return Key{std::move(name), util::nullopt};
   }
-  return Key{name, value};
+  return Key{std::move(name), std::move(value)};
 }
 
-std::vector<KeyValuePartitioning::Key> HivePartitioning::ParseKeys(
+Result<std::vector<KeyValuePartitioning::Key>> HivePartitioning::ParseKeys(
     const std::string& path) const {
   std::vector<Key> keys;
 
   for (const auto& segment : fs::internal::SplitAbstractPath(path)) {
-    if (auto key = ParseKey(segment, null_fallback_)) {
+    ARROW_ASSIGN_OR_RAISE(auto maybe_key, ParseKey(segment, hive_options_));
+    if (auto key = maybe_key) {
       keys.push_back(std::move(*key));
     }
   }
@@ -521,7 +628,7 @@ Result<std::string> HivePartitioning::FormatValues(const ScalarVector& values) c
     } else if (!values[i]->is_valid) {
       // If no key is available just provide a placeholder segment to maintain the
       // field_index <-> path nesting relation
-      segments[i] = name + "=" + null_fallback_;
+      segments[i] = name + "=" + hive_options_.null_fallback;
     } else {
       segments[i] = name + "=" + values[i]->ToString();
     }
@@ -533,15 +640,18 @@ Result<std::string> HivePartitioning::FormatValues(const ScalarVector& values) c
 class HivePartitioningFactory : public KeyValuePartitioningFactory {
  public:
   explicit HivePartitioningFactory(HivePartitioningFactoryOptions options)
-      : KeyValuePartitioningFactory(options), null_fallback_(options.null_fallback) {}
+      : KeyValuePartitioningFactory(options), options_(std::move(options)) {}
 
   std::string type_name() const override { return "hive"; }
 
   Result<std::shared_ptr<Schema>> Inspect(
       const std::vector<std::string>& paths) override {
+    auto options = options_.AsHivePartitioningOptions();
     for (auto path : paths) {
       for (auto&& segment : fs::internal::SplitAbstractPath(path)) {
-        if (auto key = HivePartitioning::ParseKey(segment, null_fallback_)) {
+        ARROW_ASSIGN_OR_RAISE(auto maybe_key,
+                              HivePartitioning::ParseKey(segment, options));
+        if (auto key = maybe_key) {
           RETURN_NOT_OK(InsertRepr(key->name, key->value));
         }
       }
@@ -565,12 +675,12 @@ class HivePartitioningFactory : public KeyValuePartitioningFactory {
       auto out_schema = SchemaFromColumnNames(schema, field_names_);
 
       return std::make_shared<HivePartitioning>(std::move(out_schema), dictionaries_,
-                                                null_fallback_);
+                                                options_.AsHivePartitioningOptions());
     }
   }
 
  private:
-  const std::string null_fallback_;
+  const HivePartitioningFactoryOptions options_;
   std::vector<std::string> field_names_;
 };
 
diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h
index 36276e7a3b1..db3008f1d67 100644
--- a/cpp/src/arrow/dataset/partition.h
+++ b/cpp/src/arrow/dataset/partition.h
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <functional>
+#include <iosfwd>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -89,8 +90,26 @@ class ARROW_DS_EXPORT Partitioning {
   std::shared_ptr<Schema> schema_;
 };
 
+/// \brief The encoding of partition segments.
+enum class SegmentEncoding : int8_t {
+  /// No encoding.
+  None = 0,
+  /// Segment values are URL-encoded.
+  Uri = 1,
+};
+
+ARROW_DS_EXPORT
+std::ostream& operator<<(std::ostream& os, SegmentEncoding segment_encoding);
+
+/// \brief Options for key-value based partitioning (hive/directory).
+struct ARROW_DS_EXPORT KeyValuePartitioningOptions {
+  /// After splitting a path into components, decode the path components
+  /// before parsing according to this scheme.
+  SegmentEncoding segment_encoding = SegmentEncoding::Uri;
+};
+
 /// \brief Options for inferring a partitioning.
-struct PartitioningFactoryOptions {
+struct ARROW_DS_EXPORT PartitioningFactoryOptions {
   /// When inferring a schema for partition fields, yield dictionary encoded types
   /// instead of plain. This can be more efficient when materializing virtual
   /// columns, and Expressions parsed by the finished Partitioning will include
@@ -100,12 +119,19 @@ struct PartitioningFactoryOptions {
   /// will only check discovered fields against the schema and update internal
   /// state (such as dictionaries).
   std::shared_ptr<Schema> schema;
+  /// After splitting a path into components, decode the path components
+  /// before parsing according to this scheme.
+  SegmentEncoding segment_encoding = SegmentEncoding::Uri;
+
+  KeyValuePartitioningOptions AsPartitioningOptions() const;
 };
 
 /// \brief Options for inferring a hive-style partitioning.
-struct HivePartitioningFactoryOptions : PartitioningFactoryOptions {
+struct ARROW_DS_EXPORT HivePartitioningFactoryOptions : PartitioningFactoryOptions {
   /// The hive partitioning scheme maps null to a hard coded fallback string.
   std::string null_fallback;
+
+  HivePartitioningOptions AsHivePartitioningOptions() const;
 };
 
 /// \brief PartitioningFactory provides creation of a partitioning  when the
@@ -147,14 +173,17 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
   Result<std::string> Format(const compute::Expression& expr) const override;
 
  protected:
-  KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries)
-      : Partitioning(std::move(schema)), dictionaries_(std::move(dictionaries)) {
+  KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
+                       KeyValuePartitioningOptions options)
+      : Partitioning(std::move(schema)),
+        dictionaries_(std::move(dictionaries)),
+        options_(options) {
     if (dictionaries_.empty()) {
       dictionaries_.resize(schema_->num_fields());
     }
   }
 
-  virtual std::vector<Key> ParseKeys(const std::string& path) const = 0;
+  virtual Result<std::vector<Key>> ParseKeys(const std::string& path) const = 0;
 
   virtual Result<std::string> FormatValues(const ScalarVector& values) const = 0;
 
@@ -162,6 +191,7 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
   Result<compute::Expression> ConvertKey(const Key& key) const;
 
   ArrayVector dictionaries_;
+  KeyValuePartitioningOptions options_;
 };
 
 /// \brief DirectoryPartitioning parses one segment of a path for each field in its
@@ -175,8 +205,8 @@ class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
   /// If a field in schema is of dictionary type, the corresponding element of
   /// dictionaries must be contain the dictionary of values for that field.
   explicit DirectoryPartitioning(std::shared_ptr<Schema> schema,
-                                 ArrayVector dictionaries = {})
-      : KeyValuePartitioning(std::move(schema), std::move(dictionaries)) {}
+                                 ArrayVector dictionaries = {},
+                                 KeyValuePartitioningOptions options = {});
 
   std::string type_name() const override { return "schema"; }
 
@@ -188,7 +218,7 @@ class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
       std::vector<std::string> field_names, PartitioningFactoryOptions = {});
 
  private:
-  std::vector<Key> ParseKeys(const std::string& path) const override;
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
 
   Result<std::string> FormatValues(const ScalarVector& values) const override;
 };
@@ -196,6 +226,16 @@ class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
 /// \brief The default fallback used for null values in a Hive-style partitioning.
 static constexpr char kDefaultHiveNullFallback[] = "__HIVE_DEFAULT_PARTITION__";
 
+struct ARROW_DS_EXPORT HivePartitioningOptions : public KeyValuePartitioningOptions {
+  std::string null_fallback = kDefaultHiveNullFallback;
+
+  static HivePartitioningOptions DefaultsWithNullFallback(std::string fallback) {
+    HivePartitioningOptions options;
+    options.null_fallback = std::move(fallback);
+    return options;
+  }
+};
+
 /// \brief Multi-level, directory based partitioning
 /// originating from Apache Hive with all data files stored in the
 /// leaf directories. Data is partitioned by static values of a
@@ -211,22 +251,31 @@ class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
   /// dictionaries must be contain the dictionary of values for that field.
   explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries = {},
                             std::string null_fallback = kDefaultHiveNullFallback)
-      : KeyValuePartitioning(std::move(schema), std::move(dictionaries)),
-        null_fallback_(std::move(null_fallback)) {}
+      : KeyValuePartitioning(std::move(schema), std::move(dictionaries),
+                             KeyValuePartitioningOptions()),
+        hive_options_(
+            HivePartitioningOptions::DefaultsWithNullFallback(std::move(null_fallback))) {
+  }
+
+  explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
+                            HivePartitioningOptions options)
+      : KeyValuePartitioning(std::move(schema), std::move(dictionaries), options),
+        hive_options_(options) {}
 
   std::string type_name() const override { return "hive"; }
-  std::string null_fallback() const { return null_fallback_; }
+  std::string null_fallback() const { return hive_options_.null_fallback; }
+  const HivePartitioningOptions& options() const { return hive_options_; }
 
-  static util::optional<Key> ParseKey(const std::string& segment,
-                                      const std::string& null_fallback);
+  static Result<util::optional<Key>> ParseKey(const std::string& segment,
+                                              const HivePartitioningOptions& options);
 
   /// \brief Create a factory for a hive partitioning.
   static std::shared_ptr<PartitioningFactory> MakeFactory(
       HivePartitioningFactoryOptions = {});
 
  private:
-  const std::string null_fallback_;
-  std::vector<Key> ParseKeys(const std::string& path) const override;
+  const HivePartitioningOptions hive_options_;
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
 
   Result<std::string> FormatValues(const ScalarVector& values) const override;
 };
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index 7a7ffcff229..d8e5198f21d 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -558,6 +558,103 @@ TEST_F(TestPartitioning, ExistingSchemaHive) {
   AssertInspect({"/a=0/b=1", "/b=2"}, options.schema->fields());
 }
 
+TEST_F(TestPartitioning, UrlEncodedDirectory) {
+  PartitioningFactoryOptions options;
+  auto ts = timestamp(TimeUnit::type::SECOND);
+  options.schema = schema({field("date", ts), field("time", ts), field("str", utf8())});
+  factory_ = DirectoryPartitioning::MakeFactory(options.schema->field_names(), options);
+
+  AssertInspect({"/2021-05-04 00:00:00/2021-05-04 07:27:00/%24",
+                 "/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/foo"},
+                options.schema->fields());
+  auto date = std::make_shared<TimestampScalar>(1620086400, ts);
+  auto time = std::make_shared<TimestampScalar>(1620113220, ts);
+  partitioning_ = std::make_shared<DirectoryPartitioning>(options.schema, ArrayVector());
+  AssertParse("/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/%24",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)),
+                    equal(field_ref("str"), literal("$"))}));
+
+  // Invalid UTF-8
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  factory_->Inspect({"/%AF/%BF/%CF"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  partitioning_->Parse({"/%AF/%BF/%CF"}));
+
+  options.segment_encoding = SegmentEncoding::None;
+  options.schema =
+      schema({field("date", utf8()), field("time", utf8()), field("str", utf8())});
+  factory_ = DirectoryPartitioning::MakeFactory(options.schema->field_names(), options);
+  AssertInspect({"/2021-05-04 00:00:00/2021-05-04 07:27:00/%E3%81%8F%E3%81%BE",
+                 "/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/foo"},
+                options.schema->fields());
+  partitioning_ = std::make_shared<DirectoryPartitioning>(
+      options.schema, ArrayVector(), options.AsPartitioningOptions());
+  AssertParse("/2021-05-04 00%3A00%3A00/2021-05-04 07%3A27%3A00/%24",
+              and_({equal(field_ref("date"), literal("2021-05-04 00%3A00%3A00")),
+                    equal(field_ref("time"), literal("2021-05-04 07%3A27%3A00")),
+                    equal(field_ref("str"), literal("%24"))}));
+}
+
+TEST_F(TestPartitioning, UrlEncodedHive) {
+  HivePartitioningFactoryOptions options;
+  auto ts = timestamp(TimeUnit::type::SECOND);
+  options.schema = schema({field("date", ts), field("time", ts), field("str", utf8())});
+  options.null_fallback = "$";
+  factory_ = HivePartitioning::MakeFactory(options);
+
+  AssertInspect(
+      {"/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=$",
+       "/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=%E3%81%8F%E3%81%BE",
+       "/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24"},
+      options.schema->fields());
+
+  auto date = std::make_shared<TimestampScalar>(1620086400, ts);
+  auto time = std::make_shared<TimestampScalar>(1620113220, ts);
+  partitioning_ = std::make_shared<HivePartitioning>(options.schema, ArrayVector(),
+                                                     options.AsHivePartitioningOptions());
+  AssertParse("/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=$",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)), is_null(field_ref("str"))}));
+  AssertParse("/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=%E3%81%8F%E3%81%BE",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)),
+                    equal(field_ref("str"), literal("\xE3\x81\x8F\xE3\x81\xBE"))}));
+  // URL-encoded null fallback value
+  AssertParse("/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24",
+              and_({equal(field_ref("date"), literal(date)),
+                    equal(field_ref("time"), literal(time)), is_null(field_ref("str"))}));
+
+  // Invalid UTF-8
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  factory_->Inspect({"/date=%AF/time=%BF/str=%CF"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  partitioning_->Parse({"/date=%AF/time=%BF/str=%CF"}));
+
+  options.segment_encoding = SegmentEncoding::None;
+  options.schema =
+      schema({field("date", utf8()), field("time", utf8()), field("str", utf8())});
+  factory_ = HivePartitioning::MakeFactory(options);
+  AssertInspect(
+      {"/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=$",
+       "/date=2021-05-04 00:00:00/time=2021-05-04 07:27:00/str=%E3%81%8F%E3%81%BE",
+       "/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24"},
+      options.schema->fields());
+  partitioning_ = std::make_shared<HivePartitioning>(options.schema, ArrayVector(),
+                                                     options.AsHivePartitioningOptions());
+  AssertParse("/date=2021-05-04 00%3A00%3A00/time=2021-05-04 07%3A27%3A00/str=%24",
+              and_({equal(field_ref("date"), literal("2021-05-04 00%3A00%3A00")),
+                    equal(field_ref("time"), literal("2021-05-04 07%3A27%3A00")),
+                    equal(field_ref("str"), literal("%24"))}));
+
+  // Invalid UTF-8
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+                                  factory_->Inspect({"/date=\xAF/time=\xBF/str=\xCF"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("was not valid UTF-8"),
+      partitioning_->Parse({"/date=\xAF/time=\xBF/str=\xCF"}));
+}
+
 TEST_F(TestPartitioning, EtlThenHive) {
   FieldVector etl_fields{field("year", int16()), field("month", int8()),
                          field("day", int8()), field("hour", int8())};
@@ -655,8 +752,9 @@ class RangePartitioning : public Partitioning {
   Result<compute::Expression> Parse(const std::string& path) const override {
     std::vector<compute::Expression> ranges;
 
+    HivePartitioningOptions options;
     for (auto segment : fs::internal::SplitAbstractPath(path)) {
-      auto key = HivePartitioning::ParseKey(segment, "");
+      ARROW_ASSIGN_OR_RAISE(auto key, HivePartitioning::ParseKey(segment, options));
       if (!key) {
         return Status::Invalid("can't parse '", segment, "' as a range");
       }
diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h
index 67a999456be..019aaf4241b 100644
--- a/cpp/src/arrow/dataset/type_fwd.h
+++ b/cpp/src/arrow/dataset/type_fwd.h
@@ -71,8 +71,10 @@ class ParquetFileWriteOptions;
 class Partitioning;
 class PartitioningFactory;
 class PartitioningOrFactory;
+struct KeyValuePartitioningOptions;
 class DirectoryPartitioning;
 class HivePartitioning;
+struct HivePartitioningOptions;
 
 struct ScanOptions;
 
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 356bf8ce9c7..78620b25942 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -85,6 +85,14 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=None):
     return c_source
 
 
+cdef CSegmentEncoding _get_segment_encoding(str segment_encoding):
+    if segment_encoding == "none":
+        return CSegmentEncodingNone
+    elif segment_encoding == "uri":
+        return CSegmentEncodingUri
+    raise ValueError(f"Unknown segment encoding: {segment_encoding}")
+
+
 cdef class Expression(_Weakrefable):
     """
     A logical expression to be evaluated against some input.
@@ -1930,6 +1938,9 @@ cdef class DirectoryPartitioning(Partitioning):
         corresponding entry of `dictionaries` must be an array containing
         every value which may be taken by the corresponding column or an
         error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
 
     Returns
     -------
@@ -1947,13 +1958,17 @@ cdef class DirectoryPartitioning(Partitioning):
     cdef:
         CDirectoryPartitioning* directory_partitioning
 
-    def __init__(self, Schema schema not None, dictionaries=None):
+    def __init__(self, Schema schema not None, dictionaries=None,
+                 segment_encoding="uri"):
         cdef:
             shared_ptr[CDirectoryPartitioning] c_partitioning
+            CKeyValuePartitioningOptions c_options
 
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
         c_partitioning = make_shared[CDirectoryPartitioning](
             pyarrow_unwrap_schema(schema),
-            _partitioning_dictionaries(schema, dictionaries)
+            _partitioning_dictionaries(schema, dictionaries),
+            c_options,
         )
         self.init(<shared_ptr[CPartitioning]> c_partitioning)
 
@@ -1964,7 +1979,7 @@ cdef class DirectoryPartitioning(Partitioning):
     @staticmethod
     def discover(field_names=None, infer_dictionary=False,
                  max_partition_dictionary_size=0,
-                 schema=None):
+                 schema=None, segment_encoding="uri"):
         """
         Discover a DirectoryPartitioning.
 
@@ -1987,6 +2002,9 @@ cdef class DirectoryPartitioning(Partitioning):
             Use this schema instead of inferring a schema from partition
             values. Partition values will be validated against this schema
             before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
 
         Returns
         -------
@@ -2015,6 +2033,9 @@ cdef class DirectoryPartitioning(Partitioning):
                 "cannot infer field_names")
         else:
             c_field_names = [tobytes(s) for s in field_names]
+
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
+
         return PartitioningFactory.wrap(
             CDirectoryPartitioning.MakeFactory(c_field_names, c_options))
 
@@ -2044,6 +2065,9 @@ cdef class HivePartitioning(Partitioning):
         error will be raised in parsing.
     null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
         If any field is None then this fallback will be used as a label
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
 
     Returns
     -------
@@ -2065,16 +2089,20 @@ cdef class HivePartitioning(Partitioning):
     def __init__(self,
                  Schema schema not None,
                  dictionaries=None,
-                 null_fallback="__HIVE_DEFAULT_PARTITION__"):
+                 null_fallback="__HIVE_DEFAULT_PARTITION__",
+                 segment_encoding="uri"):
 
         cdef:
             shared_ptr[CHivePartitioning] c_partitioning
-            c_string c_null_fallback = tobytes(null_fallback)
+            CHivePartitioningOptions c_options
+
+        c_options.null_fallback = tobytes(null_fallback)
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
 
         c_partitioning = make_shared[CHivePartitioning](
             pyarrow_unwrap_schema(schema),
             _partitioning_dictionaries(schema, dictionaries),
-            c_null_fallback
+            c_options,
         )
         self.init(<shared_ptr[CPartitioning]> c_partitioning)
 
@@ -2086,7 +2114,8 @@ cdef class HivePartitioning(Partitioning):
     def discover(infer_dictionary=False,
                  max_partition_dictionary_size=0,
                  null_fallback="__HIVE_DEFAULT_PARTITION__",
-                 schema=None):
+                 schema=None,
+                 segment_encoding="uri"):
         """
         Discover a HivePartitioning.
 
@@ -2110,6 +2139,9 @@ cdef class HivePartitioning(Partitioning):
             Use this schema instead of inferring a schema from partition
             values. Partition values will be validated against this schema
             before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
 
         Returns
         -------
@@ -2133,6 +2165,8 @@ cdef class HivePartitioning(Partitioning):
         if schema:
             c_options.schema = pyarrow_unwrap_schema(schema)
 
+        c_options.segment_encoding = _get_segment_encoding(segment_encoding)
+
         return PartitioningFactory.wrap(
             CHivePartitioning.MakeFactory(c_options))
 
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 96c9648f920..8cab5536647 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -305,16 +305,35 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CResult[CExpression] Parse(const c_string & path) const
         const shared_ptr[CSchema] & schema()
 
+    cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding":
+        pass
+
+    CSegmentEncoding CSegmentEncodingNone\
+        " arrow::dataset::SegmentEncoding::None"
+    CSegmentEncoding CSegmentEncodingUri\
+        " arrow::dataset::SegmentEncoding::Uri"
+
+    cdef cppclass CKeyValuePartitioningOptions \
+            "arrow::dataset::KeyValuePartitioningOptions":
+        CSegmentEncoding segment_encoding
+
+    cdef cppclass CHivePartitioningOptions \
+            "arrow::dataset::HivePartitioningOptions":
+        CSegmentEncoding segment_encoding
+        c_string null_fallback
+
     cdef cppclass CPartitioningFactoryOptions \
             "arrow::dataset::PartitioningFactoryOptions":
         c_bool infer_dictionary
         shared_ptr[CSchema] schema
+        CSegmentEncoding segment_encoding
 
     cdef cppclass CHivePartitioningFactoryOptions \
             "arrow::dataset::HivePartitioningFactoryOptions":
-        c_bool infer_dictionary,
+        c_bool infer_dictionary
         c_string null_fallback
         shared_ptr[CSchema] schema
+        CSegmentEncoding segment_encoding
 
     cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory":
         pass
@@ -331,7 +350,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
     cdef cppclass CHivePartitioning \
             "arrow::dataset::HivePartitioning"(CPartitioning):
         CHivePartitioning(shared_ptr[CSchema] schema,
-                          vector[shared_ptr[CArray]] dictionaries)
+                          vector[shared_ptr[CArray]] dictionaries,
+                          CHivePartitioningOptions options)
 
         @staticmethod
         shared_ptr[CPartitioningFactory] MakeFactory(
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 982bbe31f74..5e83657ebf2 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -1424,6 +1424,99 @@ def test_partitioning_factory_dictionary(mockfs, infer_dictionary):
         assert inferred_schema.field('key').type == pa.string()
 
 
+def test_partitioning_factory_segment_encoding():
+    mockfs = fs._MockFileSystem()
+    format = ds.IpcFileFormat()
+    schema = pa.schema([("i64", pa.int64())])
+    table = pa.table([pa.array(range(10))], schema=schema)
+    partition_schema = pa.schema(
+        [("date", pa.timestamp("s")), ("string", pa.string())])
+    string_partition_schema = pa.schema(
+        [("date", pa.string()), ("string", pa.string())])
+    full_schema = pa.schema(list(schema) + list(partition_schema))
+    for directory in [
+            "directory/2021-05-04 00%3A00%3A00/%24",
+            "hive/date=2021-05-04 00%3A00%3A00/string=%24",
+    ]:
+        mockfs.create_dir(directory)
+        with mockfs.open_output_stream(directory + "/0.feather") as sink:
+            with pa.ipc.new_file(sink, schema) as writer:
+                writer.write_table(table)
+                writer.close()
+
+    # Directory
+    selector = fs.FileSelector("directory", recursive=True)
+    options = ds.FileSystemFactoryOptions("directory")
+    options.partitioning_factory = ds.DirectoryPartitioning.discover(
+        schema=partition_schema)
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    inferred_schema = factory.inspect()
+    assert inferred_schema == full_schema
+    actual = factory.finish().to_table(columns={
+        "date_int": ds.field("date").cast(pa.int64()),
+    })
+    assert actual[0][0].as_py() == 1620086400
+
+    options.partitioning_factory = ds.DirectoryPartitioning.discover(
+        ["date", "string"], segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning = ds.DirectoryPartitioning(
+        string_partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning_factory = ds.DirectoryPartitioning.discover(
+        schema=partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    with pytest.raises(pa.ArrowInvalid,
+                       match="Could not cast segments for partition field"):
+        inferred_schema = factory.inspect()
+
+    # Hive
+    selector = fs.FileSelector("hive", recursive=True)
+    options = ds.FileSystemFactoryOptions("hive")
+    options.partitioning_factory = ds.HivePartitioning.discover(
+        schema=partition_schema)
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    inferred_schema = factory.inspect()
+    assert inferred_schema == full_schema
+    actual = factory.finish().to_table(columns={
+        "date_int": ds.field("date").cast(pa.int64()),
+    })
+    assert actual[0][0].as_py() == 1620086400
+
+    options.partitioning_factory = ds.HivePartitioning.discover(
+        segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning = ds.HivePartitioning(
+        string_partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    fragments = list(factory.finish().get_fragments())
+    assert fragments[0].partition_expression.equals(
+        (ds.field("date") == "2021-05-04 00%3A00%3A00") &
+        (ds.field("string") == "%24"))
+
+    options.partitioning_factory = ds.HivePartitioning.discover(
+        schema=partition_schema, segment_encoding="none")
+    factory = ds.FileSystemDatasetFactory(mockfs, selector, format, options)
+    with pytest.raises(pa.ArrowInvalid,
+                       match="Could not cast segments for partition field"):
+        inferred_schema = factory.inspect()
+
+
 def test_dictionary_partitioning_outer_nulls_raises(tempdir):
     table = pa.table({'a': ['x', 'y', None], 'b': ['x', 'y', 'z']})
     part = ds.partitioning(
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 29aa1911a55..45a0ea69c59 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -472,20 +472,20 @@ dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buff
     .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
 }
 
-dataset___DirectoryPartitioning <- function(schm){
-    .Call(`_arrow_dataset___DirectoryPartitioning`, schm)
+dataset___DirectoryPartitioning <- function(schm, segment_encoding){
+    .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
 }
 
-dataset___DirectoryPartitioning__MakeFactory <- function(field_names){
-    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names)
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding){
+    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
 }
 
-dataset___HivePartitioning <- function(schm, null_fallback){
-    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback)
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding){
+    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
 }
 
-dataset___HivePartitioning__MakeFactory <- function(null_fallback){
-    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback)
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding){
+    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
 }
 
 dataset___ScannerBuilder__ProjectNames <- function(sb, cols){
diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R
index 3c4f18a5692..6e29e4ea31c 100644
--- a/r/R/dataset-partition.R
+++ b/r/R/dataset-partition.R
@@ -64,15 +64,19 @@ Partitioning <- R6Class("Partitioning", inherit = ArrowObject)
 #' @rdname Partitioning
 #' @export
 DirectoryPartitioning <- R6Class("DirectoryPartitioning", inherit = Partitioning)
-DirectoryPartitioning$create <- dataset___DirectoryPartitioning
+DirectoryPartitioning$create <- function(schm, segment_encoding = "uri") {
+  dataset___DirectoryPartitioning(schm, segment_encoding = segment_encoding)
+}
 
 #' @usage NULL
 #' @format NULL
 #' @rdname Partitioning
 #' @export
 HivePartitioning <- R6Class("HivePartitioning", inherit = Partitioning)
-HivePartitioning$create <- function(schm, null_fallback = NULL) {
-  dataset___HivePartitioning(schm, null_fallback = null_fallback_or_default(null_fallback))
+HivePartitioning$create <- function(schm, null_fallback = NULL, segment_encoding = "uri") {
+  dataset___HivePartitioning(schm,
+                             null_fallback = null_fallback_or_default(null_fallback),
+                             segment_encoding = segment_encoding)
 }
 
 #' Construct Hive partitioning
@@ -86,17 +90,19 @@ HivePartitioning$create <- function(schm, null_fallback = NULL) {
 #' @param null_fallback character to be used in place of missing values (`NA` or `NULL`)
 #' in partition columns. Default is `"__HIVE_DEFAULT_PARTITION__"`,
 #' which is what Hive uses.
+#' @param segment_encoding Decode partition segments after splitting paths.
+#' Default is `"uri"` (URI-decode segments). May also be `"none"` (leave as-is).
 #' @return A [HivePartitioning][Partitioning], or a `HivePartitioningFactory` if
 #' calling `hive_partition()` with no arguments.
 #' @examplesIf arrow_with_dataset()
 #' hive_partition(year = int16(), month = int8())
 #' @export
-hive_partition <- function(..., null_fallback = NULL) {
+hive_partition <- function(..., null_fallback = NULL, segment_encoding = "uri") {
   schm <- schema(...)
   if (length(schm) == 0) {
-    HivePartitioningFactory$create(null_fallback)
+    HivePartitioningFactory$create(null_fallback, segment_encoding)
   } else {
-    HivePartitioning$create(schm, null_fallback)
+    HivePartitioning$create(schm, null_fallback, segment_encoding)
   }
 }
 
@@ -107,15 +113,17 @@ PartitioningFactory <- R6Class("PartitioningFactory", inherit = ArrowObject)
 #' @rdname Partitioning
 #' @export
 DirectoryPartitioningFactory <- R6Class("DirectoryPartitioningFactory ", inherit = PartitioningFactory)
-DirectoryPartitioningFactory$create <- dataset___DirectoryPartitioning__MakeFactory
+DirectoryPartitioningFactory$create <- function(field_names, segment_encoding = "uri") {
+  dataset___DirectoryPartitioning__MakeFactory(field_names, segment_encoding)
+}
 
 #' @usage NULL
 #' @format NULL
 #' @rdname Partitioning
 #' @export
 HivePartitioningFactory <- R6Class("HivePartitioningFactory", inherit = PartitioningFactory)
-HivePartitioningFactory$create <- function(null_fallback = NULL) {
-  dataset___HivePartitioning__MakeFactory(null_fallback_or_default(null_fallback))
+HivePartitioningFactory$create <- function(null_fallback = NULL, segment_encoding = "uri") {
+  dataset___HivePartitioning__MakeFactory(null_fallback_or_default(null_fallback), segment_encoding)
 }
 
 null_fallback_or_default <- function(null_fallback) {
diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd
index aeb9cd4b3d1..eef9f9157ea 100644
--- a/r/man/hive_partition.Rd
+++ b/r/man/hive_partition.Rd
@@ -4,7 +4,7 @@
 \alias{hive_partition}
 \title{Construct Hive partitioning}
 \usage{
-hive_partition(..., null_fallback = NULL)
+hive_partition(..., null_fallback = NULL, segment_encoding = "uri")
 }
 \arguments{
 \item{...}{named list of \link[=data-type]{data types}, passed to \code{\link[=schema]{schema()}}}
@@ -12,6 +12,9 @@ hive_partition(..., null_fallback = NULL)
 \item{null_fallback}{character to be used in place of missing values (\code{NA} or \code{NULL})
 in partition columns. Default is \code{"__HIVE_DEFAULT_PARTITION__"},
 which is what Hive uses.}
+
+\item{segment_encoding}{Decode partition segments after splitting paths.
+Default is \code{"uri"} (URI-decode segments). May also be \code{"none"} (leave as-is).}
 }
 \value{
 A \link[=Partitioning]{HivePartitioning}, or a \code{HivePartitioningFactory} if
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 483b1f42ca0..2024483f47d 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1856,61 +1856,65 @@ extern "C" SEXP _arrow_dataset___ParquetFragmentScanOptions__Make(SEXP use_buffe
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::DirectoryPartitioning> dataset___DirectoryPartitioning(const std::shared_ptr<arrow::Schema>& schm);
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){
+std::shared_ptr<ds::DirectoryPartitioning> dataset___DirectoryPartitioning(const std::shared_ptr<arrow::Schema>& schm, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schm(schm_sexp);
-	return cpp11::as_sexp(dataset___DirectoryPartitioning(schm));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___DirectoryPartitioning(schm, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___DirectoryPartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::PartitioningFactory> dataset___DirectoryPartitioning__MakeFactory(const std::vector<std::string>& field_names);
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp){
+std::shared_ptr<ds::PartitioningFactory> dataset___DirectoryPartitioning__MakeFactory(const std::vector<std::string>& field_names, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::vector<std::string>&>::type field_names(field_names_sexp);
-	return cpp11::as_sexp(dataset___DirectoryPartitioning__MakeFactory(field_names));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___DirectoryPartitioning__MakeFactory(field_names, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp){
+extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_names_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___DirectoryPartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::HivePartitioning> dataset___HivePartitioning(const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback);
-extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp){
+std::shared_ptr<ds::HivePartitioning> dataset___HivePartitioning(const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type schm(schm_sexp);
 	arrow::r::Input<const std::string&>::type null_fallback(null_fallback_sexp);
-	return cpp11::as_sexp(dataset___HivePartitioning(schm, null_fallback));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___HivePartitioning(schm, null_fallback, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp){
+extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp, SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___HivePartitioning(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::PartitioningFactory> dataset___HivePartitioning__MakeFactory(const std::string& null_fallback);
-extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp){
+std::shared_ptr<ds::PartitioningFactory> dataset___HivePartitioning__MakeFactory(const std::string& null_fallback, const std::string& segment_encoding);
+extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::string&>::type null_fallback(null_fallback_sexp);
-	return cpp11::as_sexp(dataset___HivePartitioning__MakeFactory(null_fallback));
+	arrow::r::Input<const std::string&>::type segment_encoding(segment_encoding_sexp);
+	return cpp11::as_sexp(dataset___HivePartitioning__MakeFactory(null_fallback, segment_encoding));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp){
+extern "C" SEXP _arrow_dataset___HivePartitioning__MakeFactory(SEXP null_fallback_sexp, SEXP segment_encoding_sexp){
 	Rf_error("Cannot call dataset___HivePartitioning__MakeFactory(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -7006,10 +7010,10 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1}, 
 		{ "_arrow_dataset___CsvFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___CsvFragmentScanOptions__Make, 2}, 
 		{ "_arrow_dataset___ParquetFragmentScanOptions__Make", (DL_FUNC) &_arrow_dataset___ParquetFragmentScanOptions__Make, 3}, 
-		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, 
-		{ "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 1}, 
-		{ "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 2}, 
-		{ "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 1}, 
+		{ "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 2}, 
+		{ "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 2}, 
+		{ "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 3}, 
+		{ "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__ProjectNames", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectNames, 2}, 
 		{ "_arrow_dataset___ScannerBuilder__ProjectExprs", (DL_FUNC) &_arrow_dataset___ScannerBuilder__ProjectExprs, 3}, 
 		{ "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index c5ecc84dbaa..24c1a1343ea 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -333,30 +333,50 @@ dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buf
 
 // DirectoryPartitioning, HivePartitioning
 
+ds::SegmentEncoding GetSegmentEncoding(const std::string& segment_encoding) {
+  if (segment_encoding == "none") {
+    return ds::SegmentEncoding::None;
+  } else if (segment_encoding == "uri") {
+    return ds::SegmentEncoding::Uri;
+  }
+  cpp11::stop("invalid segment encoding: " + segment_encoding);
+  return ds::SegmentEncoding::None;
+}
+
 // [[dataset::export]]
 std::shared_ptr<ds::DirectoryPartitioning> dataset___DirectoryPartitioning(
-    const std::shared_ptr<arrow::Schema>& schm) {
-  return std::make_shared<ds::DirectoryPartitioning>(schm);
+    const std::shared_ptr<arrow::Schema>& schm, const std::string& segment_encoding) {
+  ds::KeyValuePartitioningOptions options;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
+  std::vector<std::shared_ptr<arrow::Array>> dictionaries;
+  return std::make_shared<ds::DirectoryPartitioning>(schm, dictionaries, options);
 }
 
 // [[dataset::export]]
 std::shared_ptr<ds::PartitioningFactory> dataset___DirectoryPartitioning__MakeFactory(
-    const std::vector<std::string>& field_names) {
-  return ds::DirectoryPartitioning::MakeFactory(field_names);
+    const std::vector<std::string>& field_names, const std::string& segment_encoding) {
+  ds::PartitioningFactoryOptions options;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
+  return ds::DirectoryPartitioning::MakeFactory(field_names, options);
 }
 
 // [[dataset::export]]
 std::shared_ptr<ds::HivePartitioning> dataset___HivePartitioning(
-    const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback) {
+    const std::shared_ptr<arrow::Schema>& schm, const std::string& null_fallback,
+    const std::string& segment_encoding) {
+  ds::HivePartitioningOptions options;
+  options.null_fallback = null_fallback;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
   std::vector<std::shared_ptr<arrow::Array>> dictionaries;
-  return std::make_shared<ds::HivePartitioning>(schm, dictionaries, null_fallback);
+  return std::make_shared<ds::HivePartitioning>(schm, dictionaries, options);
 }
 
 // [[dataset::export]]
 std::shared_ptr<ds::PartitioningFactory> dataset___HivePartitioning__MakeFactory(
-    const std::string& null_fallback) {
+    const std::string& null_fallback, const std::string& segment_encoding) {
   ds::HivePartitioningFactoryOptions options;
   options.null_fallback = null_fallback;
+  options.segment_encoding = GetSegmentEncoding(segment_encoding);
   return ds::HivePartitioning::MakeFactory(options);
 }
 
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 7c8ab5e47af..d84ed03c2d2 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1108,6 +1108,111 @@ test_that("Assembling a Dataset manually and getting a Table", {
   expect_scan_result(ds, schm)
 })
 
+test_that("URI-decoding with directory partitioning", {
+  root <- make_temp_dir()
+  fmt <- FileFormat$create("feather")
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(root, recursive = TRUE)
+  dir1 <- file.path(root, "2021-05-04 00%3A00%3A00", "%24")
+  dir.create(dir1, recursive = TRUE)
+  write_feather(df1, file.path(dir1, "data.feather"))
+
+  partitioning <- DirectoryPartitioning$create(
+    schema(date = timestamp(unit = "s"), string = utf8()))
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning = partitioning)
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_scan_result(ds, schm)
+
+  partitioning <- DirectoryPartitioning$create(
+    schema(date = timestamp(unit = "s"), string = utf8()),
+    segment_encoding = "none")
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning = partitioning)
+  schm <- factory$Inspect()
+  expect_error(factory$Finish(schm), "Invalid: error parsing")
+
+  partitioning_factory <- DirectoryPartitioningFactory$create(
+    c("date", "string"))
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory)
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  # Can't directly inspect partition expressions, so do it implicitly via scan
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00:00:00", string == "$") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+
+  partitioning_factory <- DirectoryPartitioningFactory$create(
+    c("date", "string"), segment_encoding = "none")
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory)
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00%3A00%3A00", string == "%24") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+})
+
+test_that("URI-decoding with hive partitioning", {
+  root <- make_temp_dir()
+  fmt <- FileFormat$create("feather")
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(root, recursive = TRUE)
+  dir1 <- file.path(root, "date=2021-05-04 00%3A00%3A00", "string=%24")
+  dir.create(dir1, recursive = TRUE)
+  write_feather(df1, file.path(dir1, "data.feather"))
+
+  partitioning <- hive_partition(
+    date = timestamp(unit = "s"), string = utf8())
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning = partitioning)
+  ds <- factory$Finish(schm)
+  expect_scan_result(ds, schm)
+
+  partitioning <- hive_partition(
+    date = timestamp(unit = "s"), string = utf8(), segment_encoding = "none")
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning = partitioning)
+  expect_error(factory$Finish(schm), "Invalid: error parsing")
+
+  partitioning_factory <- hive_partition()
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory)
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  # Can't directly inspect partition expressions, so do it implicitly via scan
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00:00:00", string == "$") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+
+  partitioning_factory <- hive_partition(segment_encoding = "none")
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory)
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00%3A00%3A00", string == "%24") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+})
+
 test_that("Assembling multiple DatasetFactories with DatasetFactory", {
   skip_if_not_available("parquet")
   factory1 <- dataset_factory(file.path(dataset_dir, 1), format = "parquet")

From 2464b149b3d06c6c3e45136dad0f3956772ca487 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 7 Jun 2021 18:27:32 +0200
Subject: [PATCH 369/719] ARROW-12050: [C++][Python][FlightRPC] Make Flight
 operations interruptible in Python

This uses a stop token to let interactive users interrupt a long-running Flight operation. It's not perfect: the operation won't be cancelled until the server delivers a message, so this doesn't protect against very slow servers. (In that case, we'd need some way for the stop source to call TryCancel() on the gRPC RPC object, which would be tricky.) But so long as the server is being responsive, this means Ctrl-C should do what people expect in Python.

Closes #10318 from lidavidm/arrow-12050

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/flight/client.cc              |  56 ++++++--
 cpp/src/arrow/flight/client.h               |  10 ++
 cpp/src/arrow/flight/flight_test.cc         | 142 ++++++++++++++++++++
 cpp/src/arrow/flight/server.cc              |   1 +
 cpp/src/arrow/flight/server.h               |   3 +
 cpp/src/arrow/util/cancel.cc                |   4 +-
 cpp/src/arrow/util/cancel.h                 |   4 +-
 python/pyarrow/_flight.pyx                  |  69 ++++++----
 python/pyarrow/includes/libarrow_flight.pxd |   4 +
 python/pyarrow/tests/test_csv.py            |  15 +--
 python/pyarrow/tests/test_flight.py         |  60 +++++++++
 python/pyarrow/tests/util.py                |  17 +++
 12 files changed, 336 insertions(+), 49 deletions(-)

diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index c0e8eaaed28..84fc4a28e92 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -45,6 +45,7 @@
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/uri.h"
@@ -92,6 +93,14 @@ std::shared_ptr<FlightWriteSizeStatusDetail> FlightWriteSizeStatusDetail::Unwrap
 
 FlightClientOptions FlightClientOptions::Defaults() { return FlightClientOptions(); }
 
+Status FlightStreamReader::ReadAll(std::shared_ptr<Table>* table,
+                                   const StopToken& stop_token) {
+  std::vector<std::shared_ptr<RecordBatch>> batches;
+  RETURN_NOT_OK(ReadAll(&batches, stop_token));
+  ARROW_ASSIGN_OR_RAISE(auto schema, GetSchema());
+  return Table::FromRecordBatches(schema, std::move(batches)).Value(table);
+}
+
 struct ClientRpc {
   grpc::ClientContext context;
 
@@ -484,11 +493,12 @@ template <typename Reader>
 class GrpcStreamReader : public FlightStreamReader {
  public:
   GrpcStreamReader(std::shared_ptr<ClientRpc> rpc, std::shared_ptr<std::mutex> read_mutex,
-                   const ipc::IpcReadOptions& options,
+                   const ipc::IpcReadOptions& options, StopToken stop_token,
                    std::shared_ptr<FinishableStream<Reader, internal::FlightData>> stream)
       : rpc_(rpc),
         read_mutex_(read_mutex),
         options_(options),
+        stop_token_(std::move(stop_token)),
         stream_(stream),
         peekable_reader_(new internal::PeekableFlightDataReader<std::shared_ptr<Reader>>(
             stream->stream())),
@@ -552,6 +562,28 @@ class GrpcStreamReader : public FlightStreamReader {
     out->app_metadata = std::move(app_metadata_);
     return Status::OK();
   }
+  Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches) override {
+    return ReadAll(batches, stop_token_);
+  }
+  Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches,
+                 const StopToken& stop_token) override {
+    FlightStreamChunk chunk;
+
+    while (true) {
+      if (stop_token.IsStopRequested()) {
+        Cancel();
+        return stop_token.Poll();
+      }
+      RETURN_NOT_OK(Next(&chunk));
+      if (!chunk.data) break;
+      batches->emplace_back(std::move(chunk.data));
+    }
+    return Status::OK();
+  }
+  Status ReadAll(std::shared_ptr<Table>* table) override {
+    return ReadAll(table, stop_token_);
+  }
+  using FlightStreamReader::ReadAll;
   void Cancel() override { rpc_->context.TryCancel(); }
 
  private:
@@ -574,6 +606,7 @@ class GrpcStreamReader : public FlightStreamReader {
   // read. Nullable, as DoGet() doesn't need this.
   std::shared_ptr<std::mutex> read_mutex_;
   ipc::IpcReadOptions options_;
+  StopToken stop_token_;
   std::shared_ptr<FinishableStream<Reader, internal::FlightData>> stream_;
   std::shared_ptr<internal::PeekableFlightDataReader<std::shared_ptr<Reader>>>
       peekable_reader_;
@@ -1060,12 +1093,13 @@ class FlightClient::FlightClientImpl {
     std::vector<FlightInfo> flights;
 
     pb::FlightInfo pb_info;
-    while (stream->Read(&pb_info)) {
+    while (!options.stop_token.IsStopRequested() && stream->Read(&pb_info)) {
       FlightInfo::Data info_data;
       RETURN_NOT_OK(internal::FromProto(pb_info, &info_data));
       flights.emplace_back(std::move(info_data));
     }
-
+    if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
+    RETURN_NOT_OK(options.stop_token.Poll());
     listing->reset(new SimpleFlightListing(std::move(flights)));
     return internal::FromGrpcStatus(stream->Finish(), &rpc.context);
   }
@@ -1083,11 +1117,13 @@ class FlightClient::FlightClientImpl {
     pb::Result pb_result;
 
     std::vector<Result> materialized_results;
-    while (stream->Read(&pb_result)) {
+    while (!options.stop_token.IsStopRequested() && stream->Read(&pb_result)) {
       Result result;
       RETURN_NOT_OK(internal::FromProto(pb_result, &result));
       materialized_results.emplace_back(std::move(result));
     }
+    if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
+    RETURN_NOT_OK(options.stop_token.Poll());
 
     *results = std::unique_ptr<ResultStream>(
         new SimpleResultStream(std::move(materialized_results)));
@@ -1104,10 +1140,12 @@ class FlightClient::FlightClientImpl {
 
     pb::ActionType pb_type;
     ActionType type;
-    while (stream->Read(&pb_type)) {
+    while (!options.stop_token.IsStopRequested() && stream->Read(&pb_type)) {
       RETURN_NOT_OK(internal::FromProto(pb_type, &type));
       types->emplace_back(std::move(type));
     }
+    if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
+    RETURN_NOT_OK(options.stop_token.Poll());
     return internal::FromGrpcStatus(stream->Finish(), &rpc.context);
   }
 
@@ -1163,8 +1201,8 @@ class FlightClient::FlightClientImpl {
     auto finishable_stream = std::make_shared<
         FinishableStream<grpc::ClientReader<pb::FlightData>, internal::FlightData>>(
         rpc, stream);
-    *out = std::unique_ptr<StreamReader>(
-        new StreamReader(rpc, nullptr, options.read_options, finishable_stream));
+    *out = std::unique_ptr<StreamReader>(new StreamReader(
+        rpc, nullptr, options.read_options, options.stop_token, finishable_stream));
     // Eagerly read the schema
     return static_cast<StreamReader*>(out->get())->EnsureDataStarted();
   }
@@ -1208,8 +1246,8 @@ class FlightClient::FlightClientImpl {
     auto finishable_stream =
         std::make_shared<FinishableWritableStream<GrpcStream, internal::FlightData>>(
             rpc, read_mutex, stream);
-    *reader = std::unique_ptr<StreamReader>(
-        new StreamReader(rpc, read_mutex, options.read_options, finishable_stream));
+    *reader = std::unique_ptr<StreamReader>(new StreamReader(
+        rpc, read_mutex, options.read_options, options.stop_token, finishable_stream));
     // Do not eagerly read the schema. There may be metadata messages
     // before any data is sent, or data may not be sent at all.
     return StreamWriter::Open(descriptor, nullptr, options.write_options, rpc,
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index b3c5a96e597..0a35b6d10e8 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -31,6 +31,7 @@
 #include "arrow/ipc/writer.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/cancel.h"
 #include "arrow/util/variant.h"
 
 #include "arrow/flight/types.h"  // IWYU pragma: keep
@@ -69,6 +70,9 @@ class ARROW_FLIGHT_EXPORT FlightCallOptions {
 
   /// \brief Headers for client to add to context.
   std::vector<std::pair<std::string, std::string>> headers;
+
+  /// \brief A token to enable interactive user cancellation of long-running requests.
+  StopToken stop_token;
 };
 
 /// \brief Indicate that the client attempted to write a message
@@ -129,6 +133,12 @@ class ARROW_FLIGHT_EXPORT FlightStreamReader : public MetadataRecordBatchReader
  public:
   /// \brief Try to cancel the call.
   virtual void Cancel() = 0;
+  using MetadataRecordBatchReader::ReadAll;
+  /// \brief Consume entire stream as a vector of record batches
+  virtual Status ReadAll(std::vector<std::shared_ptr<RecordBatch>>* batches,
+                         const StopToken& stop_token) = 0;
+  /// \brief Consume entire stream as a Table
+  Status ReadAll(std::shared_ptr<Table>* table, const StopToken& stop_token);
 };
 
 // Silence warning
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 35993f1eaa1..8264f3e2197 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -2673,5 +2673,147 @@ TEST_F(TestCookieParsing, CookieCache) {
   AddCookieVerifyCache({"id0=0;", "id1=1;", "id2=2"}, "id0=\"0\"; id1=\"1\"; id2=\"2\"");
 }
 
+class ForeverFlightListing : public FlightListing {
+  Status Next(std::unique_ptr<FlightInfo>* info) override {
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    *info = arrow::internal::make_unique<FlightInfo>(ExampleFlightInfo()[0]);
+    return Status::OK();
+  }
+};
+
+class ForeverResultStream : public ResultStream {
+  Status Next(std::unique_ptr<Result>* result) override {
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    *result = arrow::internal::make_unique<Result>();
+    (*result)->body = Buffer::FromString("foo");
+    return Status::OK();
+  }
+};
+
+class ForeverDataStream : public FlightDataStream {
+ public:
+  ForeverDataStream() : schema_(arrow::schema({})), mapper_(*schema_) {}
+  std::shared_ptr<Schema> schema() override { return schema_; }
+
+  Status GetSchemaPayload(FlightPayload* payload) override {
+    return ipc::GetSchemaPayload(*schema_, ipc::IpcWriteOptions::Defaults(), mapper_,
+                                 &payload->ipc_message);
+  }
+
+  Status Next(FlightPayload* payload) override {
+    auto batch = RecordBatch::Make(schema_, 0, ArrayVector{});
+    return ipc::GetRecordBatchPayload(*batch, ipc::IpcWriteOptions::Defaults(),
+                                      &payload->ipc_message);
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  ipc::DictionaryFieldMapper mapper_;
+};
+
+class CancelTestServer : public FlightServerBase {
+ public:
+  Status ListFlights(const ServerCallContext&, const Criteria*,
+                     std::unique_ptr<FlightListing>* listings) override {
+    *listings = arrow::internal::make_unique<ForeverFlightListing>();
+    return Status::OK();
+  }
+  Status DoAction(const ServerCallContext&, const Action&,
+                  std::unique_ptr<ResultStream>* result) override {
+    *result = arrow::internal::make_unique<ForeverResultStream>();
+    return Status::OK();
+  }
+  Status ListActions(const ServerCallContext&,
+                     std::vector<ActionType>* actions) override {
+    *actions = {};
+    return Status::OK();
+  }
+  Status DoGet(const ServerCallContext&, const Ticket&,
+               std::unique_ptr<FlightDataStream>* data_stream) override {
+    *data_stream = arrow::internal::make_unique<ForeverDataStream>();
+    return Status::OK();
+  }
+};
+
+class TestCancel : public ::testing::Test {
+ public:
+  void SetUp() {
+    ASSERT_OK(MakeServer<CancelTestServer>(
+        &server_, &client_, [](FlightServerOptions* options) { return Status::OK(); },
+        [](FlightClientOptions* options) { return Status::OK(); }));
+  }
+  void TearDown() { ASSERT_OK(server_->Shutdown()); }
+
+ protected:
+  std::unique_ptr<FlightClient> client_;
+  std::unique_ptr<FlightServerBase> server_;
+};
+
+TEST_F(TestCancel, ListFlights) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<FlightListing> listing;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  client_->ListFlights(options, {}, &listing));
+}
+
+TEST_F(TestCancel, DoAction) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<ResultStream> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  client_->DoAction(options, {}, &results));
+}
+
+TEST_F(TestCancel, ListActions) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::vector<ActionType> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  client_->ListActions(options, &results));
+}
+
+TEST_F(TestCancel, DoGet) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<ResultStream> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  std::unique_ptr<FlightStreamReader> stream;
+  ASSERT_OK(client_->DoGet(options, {}, &stream));
+  std::shared_ptr<Table> table;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table));
+
+  ASSERT_OK(client_->DoGet({}, &stream));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table, options.stop_token));
+}
+
+TEST_F(TestCancel, DoExchange) {
+  StopSource stop_source;
+  FlightCallOptions options;
+  options.stop_token = stop_source.token();
+  std::unique_ptr<ResultStream> results;
+  stop_source.RequestStop(Status::Cancelled("StopSource"));
+  std::unique_ptr<FlightStreamWriter> writer;
+  std::unique_ptr<FlightStreamReader> stream;
+  ASSERT_OK(
+      client_->DoExchange(options, FlightDescriptor::Command(""), &writer, &stream));
+  std::shared_ptr<Table> table;
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table));
+
+  ASSERT_OK(client_->DoExchange(FlightDescriptor::Command(""), &writer, &stream));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Cancelled, ::testing::HasSubstr("StopSource"),
+                                  stream->ReadAll(&table, options.stop_token));
+}
+
 }  // namespace flight
 }  // namespace arrow
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index ce5a07fc3e0..8ed76e78da8 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -383,6 +383,7 @@ class GrpcServerCallContext : public ServerCallContext {
 
   const std::string& peer_identity() const override { return peer_identity_; }
   const std::string& peer() const override { return peer_; }
+  bool is_cancelled() const override { return context_->IsCancelled(); }
 
   // Helper method that runs interceptors given the result of an RPC,
   // then returns the final gRPC status to send to the client
diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h
index dd95b7536cd..96b2da488ee 100644
--- a/cpp/src/arrow/flight/server.h
+++ b/cpp/src/arrow/flight/server.h
@@ -119,6 +119,9 @@ class ARROW_FLIGHT_EXPORT ServerCallContext {
   /// to the object beyond the request body.
   /// \return The middleware, or nullptr if not found.
   virtual ServerMiddleware* GetMiddleware(const std::string& key) const = 0;
+  /// \brief Check if the current RPC has been cancelled (by the client, by
+  /// a network error, etc.).
+  virtual bool is_cancelled() const = 0;
 };
 
 class ARROW_FLIGHT_EXPORT FlightServerOptions {
diff --git a/cpp/src/arrow/util/cancel.cc b/cpp/src/arrow/util/cancel.cc
index 533075a9a64..874b2c2c886 100644
--- a/cpp/src/arrow/util/cancel.cc
+++ b/cpp/src/arrow/util/cancel.cc
@@ -74,14 +74,14 @@ void StopSource::Reset() {
 
 StopToken StopSource::token() { return StopToken(impl_); }
 
-bool StopToken::IsStopRequested() {
+bool StopToken::IsStopRequested() const {
   if (!impl_) {
     return false;
   }
   return impl_->requested_.load() != 0;
 }
 
-Status StopToken::Poll() {
+Status StopToken::Poll() const {
   if (!impl_) {
     return Status::OK();
   }
diff --git a/cpp/src/arrow/util/cancel.h b/cpp/src/arrow/util/cancel.h
index 506a7e16e4f..9e00f673a21 100644
--- a/cpp/src/arrow/util/cancel.h
+++ b/cpp/src/arrow/util/cancel.h
@@ -65,8 +65,8 @@ class ARROW_EXPORT StopToken {
   static StopToken Unstoppable() { return StopToken(); }
 
   // Producer API (the side that gets asked to stopped)
-  Status Poll();
-  bool IsStopRequested();
+  Status Poll() const;
+  bool IsStopRequested() const;
 
  protected:
   std::shared_ptr<StopSourceImpl> impl_;
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index e5d80df9380..a84166ce866 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -32,7 +32,7 @@ from cython.operator cimport postincrement
 from libcpp cimport bool as c_bool
 
 from pyarrow.lib cimport *
-from pyarrow.lib import ArrowException, ArrowInvalid
+from pyarrow.lib import ArrowException, ArrowInvalid, SignalStopHandler
 from pyarrow.lib import as_buffer, frombytes, tobytes
 from pyarrow.includes.libarrow_flight cimport *
 from pyarrow.ipc import _get_legacy_format_default, _ReadPandasMixin
@@ -897,6 +897,19 @@ cdef class FlightStreamReader(MetadataRecordBatchReader):
         with nogil:
             (<CFlightStreamReader*> self.reader.get()).Cancel()
 
+    def read_all(self):
+        """Read the entire contents of the stream as a Table."""
+        cdef:
+            shared_ptr[CTable] c_table
+            CStopToken stop_token
+        with SignalStopHandler() as stop_handler:
+            stop_token = (<StopToken> stop_handler.stop_token).stop_token
+            with nogil:
+                check_flight_status(
+                    (<CFlightStreamReader*> self.reader.get())
+                    .ReadAllWithStopToken(&c_table, stop_token))
+        return pyarrow_wrap_table(c_table)
+
 
 cdef class MetadataRecordBatchWriter(_CRecordBatchWriter):
     """A RecordBatchWriter that also allows writing application metadata.
@@ -1204,17 +1217,20 @@ cdef class FlightClient(_Weakrefable):
             vector[CActionType] results
             CFlightCallOptions* c_options = FlightCallOptions.unwrap(options)
 
-        with nogil:
-            check_flight_status(
-                self.client.get().ListActions(deref(c_options), &results))
+        with SignalStopHandler() as stop_handler:
+            c_options.stop_token = \
+                (<StopToken> stop_handler.stop_token).stop_token
+            with nogil:
+                check_flight_status(
+                    self.client.get().ListActions(deref(c_options), &results))
 
-        result = []
-        for action_type in results:
-            py_action = ActionType(frombytes(action_type.type),
-                                   frombytes(action_type.description))
-            result.append(py_action)
+            result = []
+            for action_type in results:
+                py_action = ActionType(frombytes(action_type.type),
+                                       frombytes(action_type.description))
+                result.append(py_action)
 
-        return result
+            return result
 
     def do_action(self, action, options: FlightCallOptions = None):
         """
@@ -1247,9 +1263,8 @@ cdef class FlightClient(_Weakrefable):
         cdef CAction c_action = Action.unwrap(<Action> action)
         with nogil:
             check_flight_status(
-                self.client.get().DoAction(deref(c_options), c_action,
-                                           &results))
-
+                self.client.get().DoAction(
+                    deref(c_options), c_action, &results))
         while True:
             result = Result.__new__(Result)
             with nogil:
@@ -1270,18 +1285,21 @@ cdef class FlightClient(_Weakrefable):
         if criteria:
             c_criteria.expression = tobytes(criteria)
 
-        with nogil:
-            check_flight_status(
-                self.client.get().ListFlights(deref(c_options),
-                                              c_criteria, &listing))
-
-        while True:
-            result = FlightInfo.__new__(FlightInfo)
+        with SignalStopHandler() as stop_handler:
+            c_options.stop_token = \
+                (<StopToken> stop_handler.stop_token).stop_token
             with nogil:
-                check_flight_status(listing.get().Next(&result.info))
-                if result.info == NULL:
-                    break
-            yield result
+                check_flight_status(
+                    self.client.get().ListFlights(deref(c_options),
+                                                  c_criteria, &listing))
+
+            while True:
+                result = FlightInfo.__new__(FlightInfo)
+                with nogil:
+                    check_flight_status(listing.get().Next(&result.info))
+                    if result.info == NULL:
+                        break
+                yield result
 
     def get_flight_info(self, descriptor: FlightDescriptor,
                         options: FlightCallOptions = None):
@@ -1497,6 +1515,9 @@ cdef class ServerCallContext(_Weakrefable):
         # Set safe=True as gRPC on Windows sometimes gives garbage bytes
         return frombytes(self.context.peer(), safe=True)
 
+    def is_cancelled(self):
+        return self.context.is_cancelled()
+
     def get_middleware(self, key):
         """
         Get a middleware instance by key.
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index 737babb3fd5..2ac737abaa0 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -166,6 +166,8 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef cppclass CFlightStreamReader \
             " arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader):
         void Cancel()
+        CStatus ReadAllWithStopToken" ReadAll"\
+            (shared_ptr[CTable]* table, const CStopToken& stop_token)
 
     cdef cppclass CFlightMessageReader \
             " arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader):
@@ -211,6 +213,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef cppclass CServerCallContext" arrow::flight::ServerCallContext":
         c_string& peer_identity()
         c_string& peer()
+        c_bool is_cancelled()
         CServerMiddleware* GetMiddleware(const c_string& key)
 
     cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration":
@@ -221,6 +224,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         CTimeoutDuration timeout
         CIpcWriteOptions write_options
         vector[pair[c_string, c_string]] headers
+        CStopToken stop_token
 
     cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair":
         CCertKeyPair()
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 3a27132ec61..32c0353fada 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -27,7 +27,6 @@
 import shutil
 import signal
 import string
-import sys
 import tempfile
 import threading
 import time
@@ -41,6 +40,7 @@
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
     write_csv, WriteOptions)
+from pyarrow.tests import util
 
 
 def generate_col_names():
@@ -1011,17 +1011,8 @@ def test_cancellation(self):
         if (threading.current_thread().ident !=
                 threading.main_thread().ident):
             pytest.skip("test only works from main Python thread")
-
-        if sys.version_info >= (3, 8):
-            raise_signal = signal.raise_signal
-        elif os.name == 'nt':
-            # On Windows, os.kill() doesn't actually send a signal,
-            # it just terminates the process with the given exit code.
-            pytest.skip("test requires Python 3.8+ on Windows")
-        else:
-            # On Unix, emulate raise_signal() with os.kill().
-            def raise_signal(signum):
-                os.kill(os.getpid(), signum)
+        # Skips test if not available
+        raise_signal = util.get_raise_signal()
 
         # Make the interruptible workload large enough to not finish
         # before the interrupt comes, even in release mode on fast machines
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 585fdb2a062..1ab01f735e9 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -17,7 +17,9 @@
 
 import ast
 import base64
+import itertools
 import os
+import signal
 import struct
 import tempfile
 import threading
@@ -30,6 +32,7 @@
 
 from pyarrow.lib import tobytes
 from pyarrow.util import pathlib, find_free_port
+from pyarrow.tests import util
 
 try:
     from pyarrow import flight
@@ -1810,3 +1813,60 @@ def test_generic_options():
                                 generic_options=options)
         with pytest.raises(pa.ArrowInvalid):
             client.do_get(flight.Ticket(b'ints'))
+
+
+class CancelFlightServer(FlightServerBase):
+    """A server for testing StopToken."""
+
+    def do_get(self, context, ticket):
+        schema = pa.schema([])
+        rb = pa.RecordBatch.from_arrays([], schema=schema)
+        return flight.GeneratorStream(schema, itertools.repeat(rb))
+
+    def do_exchange(self, context, descriptor, reader, writer):
+        schema = pa.schema([])
+        rb = pa.RecordBatch.from_arrays([], schema=schema)
+        writer.begin(schema)
+        while not context.is_cancelled():
+            writer.write_batch(rb)
+            time.sleep(0.5)
+
+
+def test_interrupt():
+    if threading.current_thread().ident != threading.main_thread().ident:
+        pytest.skip("test only works from main Python thread")
+    # Skips test if not available
+    raise_signal = util.get_raise_signal()
+
+    def signal_from_thread():
+        time.sleep(0.5)
+        raise_signal(signal.SIGINT)
+
+    exc_types = (KeyboardInterrupt, pa.ArrowCancelled)
+
+    def test(read_all):
+        try:
+            try:
+                t = threading.Thread(target=signal_from_thread)
+                with pytest.raises(exc_types) as exc_info:
+                    t.start()
+                    read_all()
+            finally:
+                t.join()
+        except KeyboardInterrupt:
+            # In case KeyboardInterrupt didn't interrupt read_all
+            # above, at least prevent it from stopping the test suite
+            pytest.fail("KeyboardInterrupt didn't interrupt Flight read_all")
+        e = exc_info.value.__context__
+        assert isinstance(e, pa.ArrowCancelled) or \
+            isinstance(e, KeyboardInterrupt)
+
+    with CancelFlightServer() as server:
+        client = FlightClient(("localhost", server.port))
+
+        reader = client.do_get(flight.Ticket(b""))
+        test(reader.read_all)
+
+        descriptor = flight.FlightDescriptor.for_command(b"echo")
+        writer, reader = client.do_exchange(descriptor)
+        test(reader.read_all)
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index ea43b7c4e64..3425fe01c9b 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -25,10 +25,13 @@
 import numpy as np
 import os
 import random
+import signal
 import string
 import subprocess
 import sys
 
+import pytest
+
 import pyarrow as pa
 
 
@@ -237,3 +240,17 @@ def __init__(self, path):
 
     def __fspath__(self):
         return str(self._path)
+
+
+def get_raise_signal():
+    if sys.version_info >= (3, 8):
+        return signal.raise_signal
+    elif os.name == 'nt':
+        # On Windows, os.kill() doesn't actually send a signal,
+        # it just terminates the process with the given exit code.
+        pytest.skip("test requires Python 3.8+ on Windows")
+    else:
+        # On Unix, emulate raise_signal() with os.kill().
+        def raise_signal(signum):
+            os.kill(os.getpid(), signum)
+        return raise_signal

From 5754e4bb13a83c0bc6d082d67b1571bd8e6bae06 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 7 Jun 2021 18:29:17 +0200
Subject: [PATCH 370/719] ARROW-12989: [CI] Avoid aggressive cancellation of
 the "Dev PR" workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10467 from pitrou/ARROW-12989-dev-pr-cancel

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/dev_pr.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index e9ff7d9c635..5f3acd7bebf 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -28,9 +28,8 @@ on:
       - edited
       - synchronize
 
-concurrency:
-  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
-  cancel-in-progress: true
+# NOTE: not using the "cancel-in-progress" feature here as the group key
+# does not have enough information for linking it to a particular PR
 
 jobs:
   process:

From 2820b25c88856476380a4840a3e1388466fda676 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 7 Jun 2021 19:08:01 +0200
Subject: [PATCH 371/719] ARROW-10959: [C++] Add scalar string join kernel

@jorisvandenbossche I've implemented this kernel as a binary (arity) kernel, so the input list array *and* the separator input string array can both be an array (see python test).

I did not implement the case where the input list is a scalar, and the separator an array, since I don't think that's very common.

And note that the kernel is named `binary_join` because it takes string-like and binary-like inputs.

Closes #8990 from maartenbreddels/ARROW-10959

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Maarten A. Breddels <maartenbreddels@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_binary_test.cc      |  70 +++++
 cpp/src/arrow/array/builder_binary.h          |  33 +++
 cpp/src/arrow/compute/function.cc             |   5 +-
 .../arrow/compute/kernels/codegen_internal.h  |   6 +-
 .../arrow/compute/kernels/scalar_string.cc    | 278 ++++++++++++++++++
 .../kernels/scalar_string_benchmark.cc        |  44 +++
 .../compute/kernels/scalar_string_test.cc     |  53 ++++
 cpp/src/arrow/compute/kernels/test_util.cc    |  82 ++++--
 cpp/src/arrow/compute/kernels/test_util.h     |  14 +-
 docs/source/cpp/compute.rst                   |  19 +-
 python/pyarrow/tests/test_compute.py          |  11 +
 11 files changed, 587 insertions(+), 28 deletions(-)

diff --git a/cpp/src/arrow/array/array_binary_test.cc b/cpp/src/arrow/array/array_binary_test.cc
index 5c247a6dc66..e593cf7e6c4 100644
--- a/cpp/src/arrow/array/array_binary_test.cc
+++ b/cpp/src/arrow/array/array_binary_test.cc
@@ -473,6 +473,70 @@ class TestStringBuilder : public TestBuilder {
     CheckStringArray(*result_, strings, is_valid, reps);
   }
 
+  void TestExtendCurrent() {
+    std::vector<std::string> strings = {"", "bbbb", "aaaaa", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+
+    int N = static_cast<int>(strings.size());
+    int reps = 10;
+
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          ASSERT_OK(builder_->AppendNull());
+        } else if (strings[i].length() > 3) {
+          ASSERT_OK(builder_->Append(strings[i].substr(0, 3)));
+          ASSERT_OK(builder_->ExtendCurrent(strings[i].substr(3)));
+        } else {
+          ASSERT_OK(builder_->Append(strings[i]));
+        }
+      }
+    }
+    Done();
+
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 12, result_->value_data()->size());
+
+    CheckStringArray(*result_, strings, is_valid, reps);
+  }
+
+  void TestExtendCurrentUnsafe() {
+    std::vector<std::string> strings = {"", "bbbb", "aaaaa", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+
+    int N = static_cast<int>(strings.size());
+    int reps = 13;
+    int64_t total_length = 0;
+    for (const auto& s : strings) {
+      total_length += static_cast<int64_t>(s.size());
+    }
+
+    ASSERT_OK(builder_->Reserve(N * reps));
+    ASSERT_OK(builder_->ReserveData(total_length * reps));
+
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          builder_->UnsafeAppendNull();
+        } else if (strings[i].length() > 3) {
+          builder_->UnsafeAppend(strings[i].substr(0, 3));
+          builder_->UnsafeExtendCurrent(strings[i].substr(3));
+        } else {
+          builder_->UnsafeAppend(strings[i]);
+        }
+      }
+    }
+    ASSERT_EQ(builder_->value_data_length(), total_length * reps);
+    Done();
+
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 12, result_->value_data()->size());
+
+    CheckStringArray(*result_, strings, is_valid, reps);
+  }
+
   void TestVectorAppend() {
     std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
     std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
@@ -608,6 +672,12 @@ TYPED_TEST(TestStringBuilder, TestScalarAppend) { this->TestScalarAppend(); }
 
 TYPED_TEST(TestStringBuilder, TestScalarAppendUnsafe) { this->TestScalarAppendUnsafe(); }
 
+TYPED_TEST(TestStringBuilder, TestExtendCurrent) { this->TestExtendCurrent(); }
+
+TYPED_TEST(TestStringBuilder, TestExtendCurrentUnsafe) {
+  this->TestExtendCurrentUnsafe();
+}
+
 TYPED_TEST(TestStringBuilder, TestVectorAppend) { this->TestVectorAppend(); }
 
 TYPED_TEST(TestStringBuilder, TestAppendCStringsWithValidBytes) {
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index bc49c7d6787..a60031258ad 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -77,6 +77,23 @@ class BaseBinaryBuilder : public ArrayBuilder {
     return Append(value.data(), static_cast<offset_type>(value.size()));
   }
 
+  /// Extend the last appended value by appending more data at the end
+  ///
+  /// Unlike Append, this does not create a new offset.
+  Status ExtendCurrent(const uint8_t* value, offset_type length) {
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+    return Status::OK();
+  }
+
+  Status ExtendCurrent(util::string_view value) {
+    return ExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                         static_cast<offset_type>(value.size()));
+  }
+
   Status AppendNulls(int64_t length) final {
     const int64_t num_bytes = value_data_builder_.length();
     ARROW_RETURN_NOT_OK(Reserve(length));
@@ -133,12 +150,28 @@ class BaseBinaryBuilder : public ArrayBuilder {
     UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
   }
 
+  /// Like ExtendCurrent, but do not check capacity
+  void UnsafeExtendCurrent(const uint8_t* value, offset_type length) {
+    value_data_builder_.UnsafeAppend(value, length);
+  }
+
+  void UnsafeExtendCurrent(util::string_view value) {
+    UnsafeExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                        static_cast<offset_type>(value.size()));
+  }
+
   void UnsafeAppendNull() {
     const int64_t num_bytes = value_data_builder_.length();
     offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     UnsafeAppendToBitmap(false);
   }
 
+  void UnsafeAppendEmptyValue() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(true);
+  }
+
   /// \brief Append a sequence of strings in one shot.
   ///
   /// \param[in] values a vector of strings
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index f74bb245d77..0f94baaedfc 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -210,8 +210,9 @@ Status Function::Validate() const {
     if (arity_.is_varargs && arg_count == arity_.num_args + 1) {
       return Status::OK();
     }
-    return Status::Invalid("In function '", name_,
-                           "': ", "number of argument names != function arity");
+    return Status::Invalid(
+        "In function '", name_,
+        "': ", "number of argument names for function documentation != function arity");
   }
   return Status::OK();
 }
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 6d5c837f514..913c4dacf56 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -1193,15 +1193,15 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
 }
 
 // similar to GenerateTypeAgnosticPrimitive, but for variable types
-template <template <typename...> class Generator>
+template <template <typename...> class Generator, typename... Args>
 ArrayKernelExec GenerateTypeAgnosticVarBinaryBase(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::BINARY:
     case Type::STRING:
-      return Generator<BinaryType>::Exec;
+      return Generator<BinaryType, Args...>::Exec;
     case Type::LARGE_BINARY:
     case Type::LARGE_STRING:
-      return Generator<LargeBinaryType>::Exec;
+      return Generator<LargeBinaryType, Args...>::Exec;
     default:
       DCHECK(false);
       return ExecFail;
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 9db16e26ca5..154b57d1d8e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -2643,6 +2643,283 @@ void AddUtf8Length(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
+template <typename BinaryType, typename ListType>
+struct BinaryJoin {
+  using ArrayType = typename TypeTraits<BinaryType>::ArrayType;
+  using ListArrayType = typename TypeTraits<ListType>::ArrayType;
+  using ListScalarType = typename TypeTraits<ListType>::ScalarType;
+  using ListOffsetType = typename ListArrayType::offset_type;
+  using BuilderType = typename TypeTraits<BinaryType>::BuilderType;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::SCALAR) {
+      if (batch[1].kind() == Datum::SCALAR) {
+        return ExecScalarScalar(ctx, *batch[0].scalar(), *batch[1].scalar(), out);
+      }
+      DCHECK_EQ(batch[1].kind(), Datum::ARRAY);
+      return ExecScalarArray(ctx, *batch[0].scalar(), batch[1].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+    if (batch[1].kind() == Datum::SCALAR) {
+      return ExecArrayScalar(ctx, batch[0].array(), *batch[1].scalar(), out);
+    }
+    DCHECK_EQ(batch[1].kind(), Datum::ARRAY);
+    return ExecArrayArray(ctx, batch[0].array(), batch[1].array(), out);
+  }
+
+  struct ListScalarOffsetLookup {
+    const ArrayType& values;
+
+    int64_t GetStart(int64_t i) { return 0; }
+    int64_t GetStop(int64_t i) { return values.length(); }
+    bool IsNull(int64_t i) { return false; }
+  };
+
+  struct ListArrayOffsetLookup {
+    explicit ListArrayOffsetLookup(const ListArrayType& lists)
+        : lists_(lists), offsets_(lists.raw_value_offsets()) {}
+
+    int64_t GetStart(int64_t i) { return offsets_[i]; }
+    int64_t GetStop(int64_t i) { return offsets_[i + 1]; }
+    bool IsNull(int64_t i) { return lists_.IsNull(i); }
+
+   private:
+    const ListArrayType& lists_;
+    const ListOffsetType* offsets_;
+  };
+
+  struct SeparatorScalarLookup {
+    const util::string_view separator;
+
+    bool IsNull(int64_t i) { return false; }
+    util::string_view GetView(int64_t i) { return separator; }
+  };
+
+  struct SeparatorArrayLookup {
+    const ArrayType& separators;
+
+    bool IsNull(int64_t i) { return separators.IsNull(i); }
+    util::string_view GetView(int64_t i) { return separators.GetView(i); }
+  };
+
+  // Scalar, scalar -> scalar
+  static Status ExecScalarScalar(KernelContext* ctx, const Scalar& left,
+                                 const Scalar& right, Datum* out) {
+    const auto& list = checked_cast<const ListScalarType&>(left);
+    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
+    if (!list.is_valid || !separator_scalar.is_valid) {
+      return Status::OK();
+    }
+    util::string_view separator(*separator_scalar.value);
+
+    const auto& strings = checked_cast<const ArrayType&>(*list.value);
+    if (strings.null_count() > 0) {
+      out->scalar()->is_valid = false;
+      return Status::OK();
+    }
+
+    TypedBufferBuilder<uint8_t> builder(ctx->memory_pool());
+    auto Append = [&](util::string_view value) {
+      return builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
+                            static_cast<int64_t>(value.size()));
+    };
+    if (strings.length() > 0) {
+      auto data_length =
+          strings.total_values_length() + (strings.length() - 1) * separator.length();
+      RETURN_NOT_OK(builder.Reserve(data_length));
+      RETURN_NOT_OK(Append(strings.GetView(0)));
+      for (int64_t j = 1; j < strings.length(); j++) {
+        RETURN_NOT_OK(Append(separator));
+        RETURN_NOT_OK(Append(strings.GetView(j)));
+      }
+    }
+    auto out_scalar = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    return builder.Finish(&out_scalar->value);
+  }
+
+  // Scalar, array -> array
+  static Status ExecScalarArray(KernelContext* ctx, const Scalar& left,
+                                const std::shared_ptr<ArrayData>& right, Datum* out) {
+    const auto& list_scalar = checked_cast<const BaseListScalar&>(left);
+    if (!list_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+    const auto& strings = checked_cast<const ArrayType&>(*list_scalar.value);
+    if (strings.null_count() != 0) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls, MakeArrayOfNull(right->type, right->length, ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+    const ArrayType separators(right);
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(separators.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = 0;
+    const int64_t list_length = strings.length();
+    if (list_length) {
+      const int64_t string_length = strings.total_values_length();
+      total_data_length +=
+          string_length * (separators.length() - separators.null_count());
+      for (int64_t i = 0; i < separators.length(); ++i) {
+        if (separators.IsNull(i)) {
+          continue;
+        }
+        total_data_length += (list_length - 1) * separators.value_length(i);
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    return JoinStrings(separators.length(), strings, ListScalarOffsetLookup{strings},
+                       SeparatorArrayLookup{separators}, &builder, out);
+  }
+
+  // Array, scalar -> array
+  static Status ExecArrayScalar(KernelContext* ctx,
+                                const std::shared_ptr<ArrayData>& left,
+                                const Scalar& right, Datum* out) {
+    const ListArrayType lists(left);
+    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
+
+    if (!separator_scalar.is_valid) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(lists.value_type(), lists.length(), ctx->memory_pool()));
+      *out = *nulls->data();
+      return Status::OK();
+    }
+
+    util::string_view separator(*separator_scalar.value);
+    const auto& strings = checked_cast<const ArrayType&>(*lists.values());
+    const auto list_offsets = lists.raw_value_offsets();
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(lists.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = strings.total_values_length();
+    for (int64_t i = 0; i < lists.length(); ++i) {
+      const auto start = list_offsets[i], end = list_offsets[i + 1];
+      if (end > start && !ValuesContainNull(strings, start, end)) {
+        total_data_length += (end - start - 1) * separator.length();
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists},
+                       SeparatorScalarLookup{separator}, &builder, out);
+  }
+
+  // Array, array -> array
+  static Status ExecArrayArray(KernelContext* ctx, const std::shared_ptr<ArrayData>& left,
+                               const std::shared_ptr<ArrayData>& right, Datum* out) {
+    const ListArrayType lists(left);
+    const auto& strings = checked_cast<const ArrayType&>(*lists.values());
+    const auto list_offsets = lists.raw_value_offsets();
+    const auto string_offsets = strings.raw_value_offsets();
+    const ArrayType separators(right);
+
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(lists.length()));
+
+    // Presize data to avoid multiple reallocations when joining strings
+    int64_t total_data_length = 0;
+    for (int64_t i = 0; i < lists.length(); ++i) {
+      if (separators.IsNull(i)) {
+        continue;
+      }
+      const auto start = list_offsets[i], end = list_offsets[i + 1];
+      if (end > start && !ValuesContainNull(strings, start, end)) {
+        total_data_length += string_offsets[end] - string_offsets[start];
+        total_data_length += (end - start - 1) * separators.value_length(i);
+      }
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_data_length));
+
+    struct SeparatorLookup {
+      const ArrayType& separators;
+
+      bool IsNull(int64_t i) { return separators.IsNull(i); }
+      util::string_view GetView(int64_t i) { return separators.GetView(i); }
+    };
+    return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists},
+                       SeparatorArrayLookup{separators}, &builder, out);
+  }
+
+  template <typename ListOffsetLookup, typename SeparatorLookup>
+  static Status JoinStrings(int64_t length, const ArrayType& strings,
+                            ListOffsetLookup&& list_offsets, SeparatorLookup&& separators,
+                            BuilderType* builder, Datum* out) {
+    for (int64_t i = 0; i < length; ++i) {
+      if (list_offsets.IsNull(i) || separators.IsNull(i)) {
+        builder->UnsafeAppendNull();
+        continue;
+      }
+      const auto j_start = list_offsets.GetStart(i), j_end = list_offsets.GetStop(i);
+      if (j_start == j_end) {
+        builder->UnsafeAppendEmptyValue();
+        continue;
+      }
+      if (ValuesContainNull(strings, j_start, j_end)) {
+        builder->UnsafeAppendNull();
+        continue;
+      }
+      builder->UnsafeAppend(strings.GetView(j_start));
+      for (int64_t j = j_start + 1; j < j_end; ++j) {
+        builder->UnsafeExtendCurrent(separators.GetView(i));
+        builder->UnsafeExtendCurrent(strings.GetView(j));
+      }
+    }
+
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder->Finish(&string_array));
+    *out = *string_array->data();
+    // Correct the output type based on the input
+    out->mutable_array()->type = strings.type();
+    return Status::OK();
+  }
+
+  static bool ValuesContainNull(const ArrayType& values, int64_t start, int64_t end) {
+    if (values.null_count() == 0) {
+      return false;
+    }
+    for (int64_t i = start; i < end; ++i) {
+      if (values.IsNull(i)) {
+        return true;
+      }
+    }
+    return false;
+  }
+};
+
+const FunctionDoc binary_join_doc(
+    "Join a list of strings together with a `separator` to form a single string",
+    ("Insert `separator` between `list` elements, and concatenate them.\n"
+     "Any null input and any null `list` element emits a null output.\n"),
+    {"list", "separator"});
+
+template <typename ListType>
+void AddBinaryJoinForListType(ScalarFunction* func) {
+  for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
+    auto exec = GenerateTypeAgnosticVarBinaryBase<BinaryJoin, ListType>(*ty);
+    auto list_ty = std::make_shared<ListType>(ty);
+    DCHECK_OK(func->AddKernel({InputType(list_ty), InputType(ty)}, ty, exec));
+  }
+}
+
+void AddBinaryJoin(FunctionRegistry* registry) {
+  auto func =
+      std::make_shared<ScalarFunction>("binary_join", Arity::Binary(), &binary_join_doc);
+  AddBinaryJoinForListType<ListType>(func.get());
+  AddBinaryJoinForListType<LargeListType>(func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
 template <template <typename> class ExecFunctor>
 void MakeUnaryStringBatchKernel(
     std::string name, FunctionRegistry* registry, const FunctionDoc* doc,
@@ -2948,6 +3225,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   AddSlice(registry);
   AddSplit(registry);
   AddStrptime(registry);
+  AddBinaryJoin(registry);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 32ac5a7df3b..8528c0d9e5d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <functional>
+
 #include "benchmark/benchmark.h"
 
 #include "arrow/compute/api_scalar.h"
@@ -22,8 +24,12 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_cast;
+
 namespace compute {
 
 constexpr auto kSeed = 0x94378165;
@@ -104,6 +110,41 @@ static void TrimManyUtf8(benchmark::State& state) {
 }
 #endif
 
+using SeparatorFactory = std::function<Datum(int64_t n, double null_probability)>;
+
+static void BinaryJoin(benchmark::State& state, SeparatorFactory make_separator) {
+  const int64_t n_strings = 10000;
+  const int64_t n_lists = 1000;
+  const double null_probability = 0.02;
+
+  random::RandomArrayGenerator rng(kSeed);
+
+  auto strings =
+      rng.String(n_strings, /*min_length=*/5, /*max_length=*/20, null_probability);
+  auto lists = rng.List(*strings, n_lists, null_probability, /*force_empty_nulls=*/true);
+  auto separator = make_separator(n_lists, null_probability);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("binary_join", {lists, separator}));
+  }
+  state.SetBytesProcessed(
+      state.iterations() *
+      checked_cast<const StringArray&>(*strings).total_values_length());
+}
+
+static void BinaryJoinArrayScalar(benchmark::State& state) {
+  BinaryJoin(state, [](int64_t n, double null_probability) -> Datum {
+    return ScalarFromJSON(utf8(), R"("--")");
+  });
+}
+
+static void BinaryJoinArrayArray(benchmark::State& state) {
+  BinaryJoin(state, [](int64_t n, double null_probability) -> Datum {
+    random::RandomArrayGenerator rng(kSeed + 1);
+    return rng.String(n, /*min_length=*/0, /*max_length=*/4, null_probability);
+  });
+}
+
 BENCHMARK(AsciiLower);
 BENCHMARK(AsciiUpper);
 BENCHMARK(IsAlphaNumericAscii);
@@ -119,5 +160,8 @@ BENCHMARK(TrimSingleUtf8);
 BENCHMARK(TrimManyUtf8);
 #endif
 
+BENCHMARK(BinaryJoinArrayScalar);
+BENCHMARK(BinaryJoinArrayArray);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index bd5c8eec03f..bd9dba2deb3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -42,6 +42,7 @@ template <typename TestType>
 class BaseTestStringKernels : public ::testing::Test {
  protected:
   using OffsetType = typename TypeTraits<TestType>::OffsetType;
+  using ScalarType = typename TypeTraits<TestType>::ScalarType;
 
   void CheckUnary(std::string func_name, std::string json_input,
                   std::shared_ptr<DataType> out_ty, std::string json_expected,
@@ -59,6 +60,11 @@ class BaseTestStringKernels : public ::testing::Test {
 
   std::shared_ptr<DataType> type() { return TypeTraits<TestType>::type_singleton(); }
 
+  template <typename CType>
+  std::shared_ptr<ScalarType> scalar(CType value) {
+    return std::make_shared<ScalarType>(value);
+  }
+
   std::shared_ptr<DataType> offset_type() {
     return TypeTraits<OffsetType>::type_singleton();
   }
@@ -785,6 +791,53 @@ TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) {
   ASSERT_RAISES(Invalid, CallFunction("strptime", {input}));
 }
 
+TYPED_TEST(TestStringKernels, BinaryJoin) {
+  // Scalar separator
+  auto separator = this->scalar("--");
+  std::string list_json =
+      R"([["a", "bb", "ccc"], [], null, ["dd"], ["eee", null], ["ff", ""]])";
+  auto expected =
+      ArrayFromJSON(this->type(), R"(["a--bb--ccc", "", null, "dd", null, "ff--"])");
+  CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
+                    separator, expected);
+  CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
+                    separator, expected);
+
+  auto separator_null = MakeNullScalar(this->type());
+  expected = ArrayFromJSON(this->type(), R"([null, null, null, null, null, null])");
+  CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
+                    separator_null, expected);
+  CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
+                    separator_null, expected);
+
+  // Array list, Array separator
+  auto separators =
+      ArrayFromJSON(this->type(), R"(["1", "2", "3", "4", "5", "6", null])");
+  list_json =
+      R"([["a", "bb", "ccc"], [], null, ["dd"], ["eee", null], ["ff", ""], ["hh", "ii"]])";
+  expected =
+      ArrayFromJSON(this->type(), R"(["a1bb1ccc", "", null, "dd", null, "ff6", null])");
+  CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
+                    separators, expected);
+  CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
+                    separators, expected);
+
+  // Scalar list, Array separator
+  separators = ArrayFromJSON(this->type(), R"(["1", "", null])");
+  list_json = R"(["a", "bb", "ccc"])";
+  expected = ArrayFromJSON(this->type(), R"(["a1bb1ccc", "abbccc", null])");
+  CheckScalarBinary("binary_join", ScalarFromJSON(list(this->type()), list_json),
+                    separators, expected);
+  CheckScalarBinary("binary_join", ScalarFromJSON(large_list(this->type()), list_json),
+                    separators, expected);
+  list_json = R"(["a", "bb", null])";
+  expected = ArrayFromJSON(this->type(), R"([null, null, null])");
+  CheckScalarBinary("binary_join", ScalarFromJSON(list(this->type()), list_json),
+                    separators, expected);
+  CheckScalarBinary("binary_join", ScalarFromJSON(large_list(this->type()), list_json),
+                    separators, expected);
+}
+
 #ifdef ARROW_WITH_UTF8PROC
 
 TYPED_TEST(TestStringKernels, TrimWhitespaceUTF8) {
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index c74ef3b76dd..18257973150 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -36,7 +36,7 @@ namespace compute {
 namespace {
 
 template <typename T>
-std::vector<Datum> GetDatums(const std::vector<T>& inputs) {
+DatumVector GetDatums(const std::vector<T>& inputs) {
   std::vector<Datum> datums;
   for (const auto& input : inputs) {
     datums.emplace_back(input);
@@ -44,28 +44,36 @@ std::vector<Datum> GetDatums(const std::vector<T>& inputs) {
   return datums;
 }
 
-void CheckScalarNonRecursive(const std::string& func_name, const ArrayVector& inputs,
+void CheckScalarNonRecursive(const std::string& func_name, const DatumVector& inputs,
                              const std::shared_ptr<Array>& expected,
                              const FunctionOptions* options) {
-  ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, GetDatums(inputs), options));
+  ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, inputs, options));
   std::shared_ptr<Array> actual = std::move(out).make_array();
   ASSERT_OK(actual->ValidateFull());
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
 template <typename... SliceArgs>
-ArrayVector SliceAll(const ArrayVector& inputs, SliceArgs... slice_args) {
-  ArrayVector sliced;
+DatumVector SliceArrays(const DatumVector& inputs, SliceArgs... slice_args) {
+  DatumVector sliced;
   for (const auto& input : inputs) {
-    sliced.push_back(input->Slice(slice_args...));
+    if (input.is_array()) {
+      sliced.push_back(*input.make_array()->Slice(slice_args...));
+    } else {
+      sliced.push_back(input);
+    }
   }
   return sliced;
 }
 
-ScalarVector GetScalars(const ArrayVector& inputs, int64_t index) {
+ScalarVector GetScalars(const DatumVector& inputs, int64_t index) {
   ScalarVector scalars;
   for (const auto& input : inputs) {
-    scalars.push_back(*input->GetScalar(index));
+    if (input.is_array()) {
+      scalars.push_back(*input.make_array()->GetScalar(index));
+    } else {
+      scalars.push_back(input.scalar());
+    }
   }
   return scalars;
 }
@@ -93,44 +101,63 @@ void CheckScalar(std::string func_name, const ScalarVector& inputs,
   }
 }
 
-void CheckScalar(std::string func_name, const ArrayVector& inputs,
+void CheckScalar(std::string func_name, const DatumVector& inputs,
                  std::shared_ptr<Array> expected, const FunctionOptions* options) {
   CheckScalarNonRecursive(func_name, inputs, expected, options);
 
+  // check for at least 1 array, and make sure the others are of equal length
+  std::shared_ptr<Array> array;
+  for (const auto& input : inputs) {
+    if (input.is_array()) {
+      if (!array) {
+        array = input.make_array();
+      } else {
+        ASSERT_EQ(input.array()->length, array->length());
+      }
+    }
+  }
+
   // Check all the input scalars, if scalars are implemented
-  if (std::none_of(inputs.begin(), inputs.end(), [](const std::shared_ptr<Array>& array) {
-        return array->type_id() == Type::EXTENSION;
+  if (std::none_of(inputs.begin(), inputs.end(), [](const Datum& datum) {
+        return datum.type()->id() == Type::EXTENSION;
       })) {
-    for (int64_t i = 0; i < inputs[0]->length(); ++i) {
+    // Check all the input scalars
+    for (int64_t i = 0; i < array->length(); ++i) {
       CheckScalar(func_name, GetScalars(inputs, i), *expected->GetScalar(i), options);
     }
   }
 
   // Since it's a scalar function, calling it on sliced inputs should
   // result in the sliced expected output.
-  const auto slice_length = inputs[0]->length() / 3;
+  const auto slice_length = array->length() / 3;
   if (slice_length > 0) {
-    CheckScalarNonRecursive(func_name, SliceAll(inputs, 0, slice_length),
+    CheckScalarNonRecursive(func_name, SliceArrays(inputs, 0, slice_length),
                             expected->Slice(0, slice_length), options);
 
-    CheckScalarNonRecursive(func_name, SliceAll(inputs, slice_length, slice_length),
+    CheckScalarNonRecursive(func_name, SliceArrays(inputs, slice_length, slice_length),
                             expected->Slice(slice_length, slice_length), options);
 
-    CheckScalarNonRecursive(func_name, SliceAll(inputs, 2 * slice_length),
+    CheckScalarNonRecursive(func_name, SliceArrays(inputs, 2 * slice_length),
                             expected->Slice(2 * slice_length), options);
   }
 
   // Should also work with an empty slice
-  CheckScalarNonRecursive(func_name, SliceAll(inputs, 0, 0), expected->Slice(0, 0),
+  CheckScalarNonRecursive(func_name, SliceArrays(inputs, 0, 0), expected->Slice(0, 0),
                           options);
 
   // Ditto with ChunkedArray inputs
   if (slice_length > 0) {
-    std::vector<std::shared_ptr<ChunkedArray>> chunked_inputs;
+    DatumVector chunked_inputs;
     chunked_inputs.reserve(inputs.size());
     for (const auto& input : inputs) {
-      chunked_inputs.push_back(std::make_shared<ChunkedArray>(
-          ArrayVector{input->Slice(0, slice_length), input->Slice(slice_length)}));
+      if (input.is_array()) {
+        auto ar = input.make_array();
+        auto ar_chunked = std::make_shared<ChunkedArray>(
+            ArrayVector{ar->Slice(0, slice_length), ar->Slice(slice_length)});
+        chunked_inputs.push_back(ar_chunked);
+      } else {
+        chunked_inputs.push_back(input.scalar());
+      }
     }
     ArrayVector expected_chunks{expected->Slice(0, slice_length),
                                 expected->Slice(slice_length)};
@@ -144,7 +171,8 @@ void CheckScalar(std::string func_name, const ArrayVector& inputs,
 
 void CheckScalarUnary(std::string func_name, std::shared_ptr<Array> input,
                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {input}, expected, options);
+  ArrayVector input_vector = {input};
+  CheckScalar(std::move(func_name), GetDatums(input_vector), expected, options);
 }
 
 void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
@@ -179,6 +207,18 @@ void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
   CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
 }
 
+void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
+                       std::shared_ptr<Scalar> right_input,
+                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
+  CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
+}
+
+void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
+                       std::shared_ptr<Array> right_input,
+                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
+  CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
+}
+
 void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_values,
                        std::vector<ValueDescr> expected_equivalent_values) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index cadcc4fe35c..85ed04c183a 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -43,6 +43,8 @@ using internal::checked_cast;
 
 namespace compute {
 
+using DatumVector = std::vector<Datum>;
+
 template <typename Type, typename T>
 std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
                                   const std::vector<T>& values,
@@ -93,7 +95,7 @@ void CheckScalar(std::string func_name, const ScalarVector& inputs,
                  std::shared_ptr<Scalar> expected,
                  const FunctionOptions* options = nullptr);
 
-void CheckScalar(std::string func_name, const ArrayVector& inputs,
+void CheckScalar(std::string func_name, const DatumVector& inputs,
                  std::shared_ptr<Array> expected,
                  const FunctionOptions* options = nullptr);
 
@@ -120,6 +122,16 @@ void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
                        std::shared_ptr<Array> expected,
                        const FunctionOptions* options = nullptr);
 
+void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
+                       std::shared_ptr<Scalar> right_input,
+                       std::shared_ptr<Array> expected,
+                       const FunctionOptions* options = nullptr);
+
+void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
+                       std::shared_ptr<Array> right_input,
+                       std::shared_ptr<Array> expected,
+                       const FunctionOptions* options = nullptr);
+
 void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array> expected,
                       const FunctionOptions* options = nullptr);
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 3f30bbcaa06..02c8fb3eac7 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -653,10 +653,27 @@ String component extraction
   ``(?P<letter>[ab])(?P<digit>\\d)``.
 
 
+String joining
+~~~~~~~~~~~~~~
+
+This function does the inverse of string splitting.
+
++-----------------+-----------+----------------------+----------------+-------------------+---------+
+| Function name   | Arity     | Input type 1         | Input type 2   | Output type       | Notes   |
++=================+===========+======================+================+===================+=========+
+| binary_join     | Binary    | List of string-like  | String-like    | String-like       | \(1)    |
++-----------------+-----------+----------------------+----------------+-------------------+---------+
+
+* \(1) The first input must be an array, while the second can be a scalar or array.
+  Each list of values in the first input is joined using each second input
+  as separator.  If any input list is null or contains a null, the corresponding
+  output will be null.
+
+
 Slicing
 ~~~~~~~
 
-These function transform each sequence of the array to a subsequence, according
+This function transforms each sequence of the array to a subsequence, according
 to start and stop indices, and a non-zero step (defaulting to 1).  Slicing
 semantics follow Python slicing semantics: the start index is inclusive,
 the stop index exclusive; if the step is negative, the sequence is followed
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index b3f87127397..a78be20f9cf 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -699,6 +699,17 @@ def test_extract_regex():
         'letter': 'b', 'digit': '2'}]
 
 
+def test_binary_join():
+    ar_list = pa.array([['foo', 'bar'], None, []])
+    expected = pa.array(['foo-bar', None, ''])
+    assert pc.binary_join(ar_list, '-').equals(expected)
+
+    separator_array = pa.array(['1', '2'], type=pa.binary())
+    expected = pa.array(['a1b', 'c2d'], type=pa.binary())
+    ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
+    assert pc.binary_join(ar_list, separator_array).equals(expected)
+
+
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_take(ty, values):
     arr = pa.array(values, type=ty)

From 80fe83a4fd2cc6d119eaf547cee24a2cdf1d28d8 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 7 Jun 2021 19:10:57 +0200
Subject: [PATCH 372/719] ARROW-11843: [C++] Provide async Parquet reader

This provides an async Parquet reader where the unit of concurrency is a single row group.

Closes #9620 from lidavidm/parquet-reentrant

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/file_parquet.cc         |  88 +++++
 cpp/src/arrow/dataset/file_parquet.h          |   7 +
 cpp/src/arrow/testing/future_util.h           |   7 +
 cpp/src/arrow/util/async_generator.h          |   8 +
 cpp/src/arrow/util/future.h                   |   3 +
 .../parquet/arrow/arrow_reader_writer_test.cc |  69 +++-
 cpp/src/parquet/arrow/reader.cc               | 127 ++++++-
 cpp/src/parquet/arrow/reader.h                |  16 +
 .../parquet/arrow/reader_writer_benchmark.cc  |  41 ++-
 .../encryption/test_encryption_util.cc        |  24 +-
 .../parquet/encryption/test_encryption_util.h |   5 +
 cpp/src/parquet/file_reader.cc                | 344 +++++++++++++-----
 cpp/src/parquet/file_reader.h                 |  31 +-
 cpp/src/parquet/properties.h                  |   2 +-
 cpp/src/parquet/reader_test.cc                |  72 ++++
 python/pyarrow/_dataset.pyx                   |   8 +-
 16 files changed, 726 insertions(+), 126 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 86bea49c22e..8c325d21da1 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -333,6 +333,53 @@ Result<std::unique_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
   return std::move(arrow_reader);
 }
 
+Future<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReaderAsync(
+    const FileSource& source, const std::shared_ptr<ScanOptions>& options) const {
+  ARROW_ASSIGN_OR_RAISE(
+      auto parquet_scan_options,
+      GetFragmentScanOptions<ParquetFragmentScanOptions>(kParquetTypeName, options.get(),
+                                                         default_fragment_scan_options));
+  auto properties =
+      MakeReaderProperties(*this, parquet_scan_options.get(), options->pool);
+  ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+  // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+  auto reader_fut =
+      parquet::ParquetFileReader::OpenAsync(std::move(input), std::move(properties));
+  auto path = source.path();
+  auto self = checked_pointer_cast<const ParquetFileFormat>(shared_from_this());
+  return reader_fut.Then(
+      [=](const std::unique_ptr<parquet::ParquetFileReader>&) mutable
+      -> Result<std::shared_ptr<parquet::arrow::FileReader>> {
+        ARROW_ASSIGN_OR_RAISE(std::unique_ptr<parquet::ParquetFileReader> reader,
+                              reader_fut.MoveResult());
+        std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
+        auto arrow_properties = MakeArrowReaderProperties(*self, *metadata);
+        arrow_properties.set_batch_size(options->batch_size);
+        // Must be set here since the sync ScanTask handles pre-buffering itself
+        arrow_properties.set_pre_buffer(
+            parquet_scan_options->arrow_reader_properties->pre_buffer());
+        arrow_properties.set_cache_options(
+            parquet_scan_options->arrow_reader_properties->cache_options());
+        arrow_properties.set_io_context(
+            parquet_scan_options->arrow_reader_properties->io_context());
+        // TODO: ARROW-12597 will let us enable parallel conversion
+        if (!options->use_threads) {
+          arrow_properties.set_use_threads(
+              parquet_scan_options->enable_parallel_column_conversion);
+        }
+        std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
+        RETURN_NOT_OK(parquet::arrow::FileReader::Make(options->pool, std::move(reader),
+                                                       std::move(arrow_properties),
+                                                       &arrow_reader));
+        return std::move(arrow_reader);
+      },
+      [path](
+          const Status& status) -> Result<std::shared_ptr<parquet::arrow::FileReader>> {
+        return status.WithMessage("Could not open Parquet input source '", path,
+                                  "': ", status.message());
+      });
+}
+
 Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
     const std::shared_ptr<ScanOptions>& options,
     const std::shared_ptr<FileFragment>& fragment) const {
@@ -390,6 +437,47 @@ Result<ScanTaskIterator> ParquetFileFormat::ScanFile(
   return MakeVectorIterator(std::move(tasks));
 }
 
+Result<RecordBatchGenerator> ParquetFileFormat::ScanBatchesAsync(
+    const std::shared_ptr<ScanOptions>& options,
+    const std::shared_ptr<FileFragment>& file) const {
+  auto parquet_fragment = checked_pointer_cast<ParquetFileFragment>(file);
+  std::vector<int> row_groups;
+  bool pre_filtered = false;
+  // If RowGroup metadata is cached completely we can pre-filter RowGroups before opening
+  // a FileReader, potentially avoiding IO altogether if all RowGroups are excluded due to
+  // prior statistics knowledge. In the case where a RowGroup doesn't have statistics
+  // metdata, it will not be excluded.
+  if (parquet_fragment->metadata() != nullptr) {
+    ARROW_ASSIGN_OR_RAISE(row_groups, parquet_fragment->FilterRowGroups(options->filter));
+    pre_filtered = true;
+    if (row_groups.empty()) return MakeEmptyGenerator<std::shared_ptr<RecordBatch>>();
+  }
+  // Open the reader and pay the real IO cost.
+  auto make_generator =
+      [=](const std::shared_ptr<parquet::arrow::FileReader>& reader) mutable
+      -> Result<RecordBatchGenerator> {
+    // Ensure that parquet_fragment has FileMetaData
+    RETURN_NOT_OK(parquet_fragment->EnsureCompleteMetadata(reader.get()));
+    if (!pre_filtered) {
+      // row groups were not already filtered; do this now
+      ARROW_ASSIGN_OR_RAISE(row_groups,
+                            parquet_fragment->FilterRowGroups(options->filter));
+      if (row_groups.empty()) return MakeEmptyGenerator<std::shared_ptr<RecordBatch>>();
+    }
+    auto column_projection = InferColumnProjection(*reader, *options);
+    ARROW_ASSIGN_OR_RAISE(
+        auto parquet_scan_options,
+        GetFragmentScanOptions<ParquetFragmentScanOptions>(
+            kParquetTypeName, options.get(), default_fragment_scan_options));
+    ARROW_ASSIGN_OR_RAISE(auto generator, reader->GetRecordBatchGenerator(
+                                              reader, row_groups, column_projection,
+                                              internal::GetCpuThreadPool()));
+    return MakeReadaheadGenerator(std::move(generator), options->batch_readahead);
+  };
+  return MakeFromFuture(GetReaderAsync(parquet_fragment->source(), options)
+                            .Then(std::move(make_generator)));
+}
+
 Future<util::optional<int64_t>> ParquetFileFormat::CountRows(
     const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
     const std::shared_ptr<ScanOptions>& options) {
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index f6505ed6dd2..8286e2776cb 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -99,6 +99,10 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
       const std::shared_ptr<ScanOptions>& options,
       const std::shared_ptr<FileFragment>& file) const override;
 
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
   Future<util::optional<int64_t>> CountRows(
       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
       const std::shared_ptr<ScanOptions>& options) override;
@@ -119,6 +123,9 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
   Result<std::unique_ptr<parquet::arrow::FileReader>> GetReader(
       const FileSource& source, ScanOptions* = NULLPTR) const;
 
+  Future<std::shared_ptr<parquet::arrow::FileReader>> GetReaderAsync(
+      const FileSource& source, const std::shared_ptr<ScanOptions>& options) const;
+
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
       std::shared_ptr<FileWriteOptions> options) const override;
diff --git a/cpp/src/arrow/testing/future_util.h b/cpp/src/arrow/testing/future_util.h
index 190e5839bbf..878840587ff 100644
--- a/cpp/src/arrow/testing/future_util.h
+++ b/cpp/src/arrow/testing/future_util.h
@@ -52,6 +52,13 @@
     ASSERT_RAISES(ENUM, _fut.status());        \
   } while (false)
 
+#define EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr) \
+  do {                                                                    \
+    auto&& fut = (expr);                                                  \
+    ASSERT_FINISHES_IMPL(fut);                                            \
+    EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, fut.status());         \
+  } while (false)
+
 #define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, _future_name) \
   auto _future_name = (rexpr);                                       \
   ASSERT_FINISHES_IMPL(_future_name);                                \
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 4cd8a3a9c9d..d975792ea10 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1522,6 +1522,14 @@ std::function<Future<T>()> MakeSingleFutureGenerator(Future<T> future) {
   };
 }
 
+/// \brief Make a generator that immediately ends.
+///
+/// This generator is async-reentrant.
+template <typename T>
+std::function<Future<T>()> MakeEmptyGenerator() {
+  return []() -> Future<T> { return AsyncGeneratorEnd<T>(); };
+}
+
 /// \brief Make a generator that always fails with a given error
 ///
 /// This generator is async-reentrant.
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index 5fb17f95f2b..d08c598a32b 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -865,6 +865,9 @@ Future<std::vector<Result<T>>> All(std::vector<Future<T>> futures) {
   return out;
 }
 
+template <>
+inline Future<>::Future(Status s) : Future(internal::Empty::ToResult(std::move(s))) {}
+
 /// \brief Create a Future which completes when all of `futures` complete.
 ///
 /// The future will be marked complete if all `futures` complete
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 303fb454880..677458ce37e 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -2287,10 +2287,9 @@ TEST(TestArrowReadWrite, WaitCoalescedReads) {
   ASSERT_OK(builder.Open(std::make_shared<BufferReader>(buffer)));
   ASSERT_OK(builder.properties(properties)->Build(&reader));
   // Pre-buffer data and wait for I/O to complete.
-  ASSERT_OK(reader->parquet_reader()
-                ->PreBuffer({0}, {0, 1, 2, 3, 4}, ::arrow::io::IOContext(),
-                            ::arrow::io::CacheOptions::Defaults())
-                .status());
+  reader->parquet_reader()->PreBuffer({0}, {0, 1, 2, 3, 4}, ::arrow::io::IOContext(),
+                                      ::arrow::io::CacheOptions::Defaults());
+  ASSERT_OK(reader->parquet_reader()->WhenBuffered({0}, {0, 1, 2, 3, 4}).status());
 
   std::shared_ptr<::arrow::RecordBatchReader> rb_reader;
   ASSERT_OK_NO_THROW(reader->GetRecordBatchReader({0}, {0, 1, 2, 3, 4}, &rb_reader));
@@ -2331,6 +2330,66 @@ TEST(TestArrowReadWrite, GetRecordBatchReaderNoColumns) {
   ASSERT_EQ(actual_batch->num_rows(), num_rows);
 }
 
+TEST(TestArrowReadWrite, GetRecordBatchGenerator) {
+  ArrowReaderProperties properties = default_arrow_reader_properties();
+  const int num_rows = 1024;
+  const int row_group_size = 512;
+  const int num_columns = 2;
+
+  std::shared_ptr<Table> table;
+  ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, 1, &table));
+
+  std::shared_ptr<Buffer> buffer;
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
+
+  std::shared_ptr<FileReader> reader;
+  {
+    std::unique_ptr<FileReader> unique_reader;
+    FileReaderBuilder builder;
+    ASSERT_OK(builder.Open(std::make_shared<BufferReader>(buffer)));
+    ASSERT_OK(builder.properties(properties)->Build(&unique_reader));
+    reader = std::move(unique_reader);
+  }
+
+  auto check_batches = [](const std::shared_ptr<::arrow::RecordBatch>& batch,
+                          int num_columns, int num_rows) {
+    ASSERT_NE(batch, nullptr);
+    ASSERT_EQ(batch->num_columns(), num_columns);
+    ASSERT_EQ(batch->num_rows(), num_rows);
+  };
+  {
+    ASSERT_OK_AND_ASSIGN(auto batch_generator,
+                         reader->GetRecordBatchGenerator(reader, {0, 1}, {0, 1}));
+    auto fut1 = batch_generator();
+    auto fut2 = batch_generator();
+    auto fut3 = batch_generator();
+    ASSERT_OK_AND_ASSIGN(auto batch1, fut1.result());
+    ASSERT_OK_AND_ASSIGN(auto batch2, fut2.result());
+    ASSERT_OK_AND_ASSIGN(auto batch3, fut3.result());
+    ASSERT_EQ(batch3, nullptr);
+    check_batches(batch1, num_columns, row_group_size);
+    check_batches(batch2, num_columns, row_group_size);
+    ASSERT_OK_AND_ASSIGN(auto actual, ::arrow::Table::FromRecordBatches(
+                                          batch1->schema(), {batch1, batch2}));
+    AssertTablesEqual(*table, *actual, /*same_chunk_layout=*/false);
+  }
+  {
+    // No columns case
+    ASSERT_OK_AND_ASSIGN(auto batch_generator,
+                         reader->GetRecordBatchGenerator(reader, {0, 1}, {}));
+    auto fut1 = batch_generator();
+    auto fut2 = batch_generator();
+    auto fut3 = batch_generator();
+    ASSERT_OK_AND_ASSIGN(auto batch1, fut1.result());
+    ASSERT_OK_AND_ASSIGN(auto batch2, fut2.result());
+    ASSERT_OK_AND_ASSIGN(auto batch3, fut3.result());
+    ASSERT_EQ(batch3, nullptr);
+    check_batches(batch1, 0, row_group_size);
+    check_batches(batch2, 0, row_group_size);
+  }
+}
+
 TEST(TestArrowReadWrite, ScanContents) {
   const int num_columns = 20;
   const int num_rows = 1000;
@@ -2700,7 +2759,7 @@ TEST(ArrowReadWrite, Decimal256) {
 
   auto type = ::arrow::decimal256(8, 4);
 
-  const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678", 
+  const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678",
                          "-9999.9999", "9999.9999"])";
   auto array = ::arrow::ArrayFromJSON(type, json);
   auto table = ::arrow::Table::Make(::arrow::schema({field("root", type)}), {array});
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 016ceacb0ef..14eb7495805 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -30,7 +30,9 @@
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/future.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/make_unique.h"
@@ -291,6 +293,11 @@ class FileReaderImpl : public FileReader {
                        const std::vector<int>& indices,
                        std::shared_ptr<Table>* table) override;
 
+  // Helper method used by ReadRowGroups/Generator - read the given row groups/columns,
+  // skipping bounds checks and pre-buffering.
+  Status DecodeRowGroups(const std::vector<int>& row_groups,
+                         const std::vector<int>& indices, std::shared_ptr<Table>* table);
+
   Status ReadRowGroups(const std::vector<int>& row_groups,
                        std::shared_ptr<Table>* table) override {
     return ReadRowGroups(row_groups, Iota(reader_->metadata()->num_columns()), table);
@@ -315,6 +322,12 @@ class FileReaderImpl : public FileReader {
                                 Iota(reader_->metadata()->num_columns()), out);
   }
 
+  ::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor) override;
+
   int num_columns() const { return reader_->metadata()->num_columns(); }
 
   ParquetFileReader* parquet_reader() const override { return reader_.get(); }
@@ -890,9 +903,8 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups,
   if (reader_properties_.pre_buffer()) {
     // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
     BEGIN_PARQUET_CATCH_EXCEPTIONS
-    ARROW_UNUSED(reader_->PreBuffer(row_groups, column_indices,
-                                    reader_properties_.io_context(),
-                                    reader_properties_.cache_options()));
+    reader_->PreBuffer(row_groups, column_indices, reader_properties_.io_context(),
+                       reader_properties_.cache_options());
     END_PARQUET_CATCH_EXCEPTIONS
   }
 
@@ -968,6 +980,102 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector<int>& row_groups,
   return Status::OK();
 }
 
+/// Given a file reader and a list of row groups, this is a generator of record
+/// batch generators (where each sub-generator is the contents of a single row group).
+class RowGroupGenerator {
+ public:
+  using RecordBatchGenerator =
+      ::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>;
+
+  explicit RowGroupGenerator(std::shared_ptr<FileReaderImpl> arrow_reader,
+                             ::arrow::internal::Executor* cpu_executor,
+                             std::vector<int> row_groups, std::vector<int> column_indices)
+      : arrow_reader_(std::move(arrow_reader)),
+        cpu_executor_(cpu_executor),
+        row_groups_(std::move(row_groups)),
+        column_indices_(std::move(column_indices)),
+        index_(0) {}
+
+  ::arrow::Future<RecordBatchGenerator> operator()() {
+    if (index_ >= row_groups_.size()) {
+      return ::arrow::AsyncGeneratorEnd<RecordBatchGenerator>();
+    }
+    int row_group = row_groups_[index_++];
+    std::vector<int> column_indices = column_indices_;
+    auto reader = arrow_reader_;
+    if (!reader->properties().pre_buffer()) {
+      return SubmitRead(cpu_executor_, reader, row_group, column_indices);
+    }
+    auto ready = reader->parquet_reader()->WhenBuffered({row_group}, column_indices);
+    // TODO(ARROW-12916): always transfer here
+    if (cpu_executor_) ready = cpu_executor_->Transfer(ready);
+    return ready.Then([=]() -> ::arrow::Result<RecordBatchGenerator> {
+      return ReadOneRowGroup(reader, row_group, column_indices);
+    });
+  }
+
+ private:
+  // Synchronous fallback for when pre-buffer isn't enabled.
+  //
+  // Making the Parquet reader truly asynchronous requires heavy refactoring, so the
+  // generator piggybacks on ReadRangeCache. The lazy ReadRangeCache can be used for
+  // async I/O without forcing readahead.
+  static ::arrow::Future<RecordBatchGenerator> SubmitRead(
+      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
+      const int row_group, const std::vector<int>& column_indices) {
+    if (!cpu_executor) {
+      return Future<RecordBatchGenerator>::MakeFinished(
+          ReadOneRowGroup(self, row_group, column_indices));
+    }
+    // If we have an executor, then force transfer (even if I/O was complete)
+    return ::arrow::DeferNotOk(
+        cpu_executor->Submit(ReadOneRowGroup, self, row_group, column_indices));
+  }
+
+  static ::arrow::Result<RecordBatchGenerator> ReadOneRowGroup(
+      std::shared_ptr<FileReaderImpl> self, const int row_group,
+      const std::vector<int>& column_indices) {
+    std::shared_ptr<::arrow::Table> table;
+    // Skips bound checks/pre-buffering, since we've done that already
+    RETURN_NOT_OK(self->DecodeRowGroups({row_group}, column_indices, &table));
+    auto table_reader = std::make_shared<::arrow::TableBatchReader>(*table);
+    ::arrow::RecordBatchVector batches;
+    while (true) {
+      std::shared_ptr<::arrow::RecordBatch> batch;
+      RETURN_NOT_OK(table_reader->ReadNext(&batch));
+      if (!batch) {
+        break;
+      }
+      batches.push_back(batch);
+    }
+    return ::arrow::MakeVectorGenerator(std::move(batches));
+  }
+
+  std::shared_ptr<FileReaderImpl> arrow_reader_;
+  ::arrow::internal::Executor* cpu_executor_;
+  std::vector<int> row_groups_;
+  std::vector<int> column_indices_;
+  size_t index_;
+};
+
+::arrow::Result<::arrow::AsyncGenerator<std::shared_ptr<::arrow::RecordBatch>>>
+FileReaderImpl::GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                                        const std::vector<int> row_group_indices,
+                                        const std::vector<int> column_indices,
+                                        ::arrow::internal::Executor* cpu_executor) {
+  RETURN_NOT_OK(BoundsCheck(row_group_indices, column_indices));
+  if (reader_properties_.pre_buffer()) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    reader_->PreBuffer(row_group_indices, column_indices, reader_properties_.io_context(),
+                       reader_properties_.cache_options());
+    END_PARQUET_CATCH_EXCEPTIONS
+  }
+  ::arrow::AsyncGenerator<RowGroupGenerator::RecordBatchGenerator> row_group_generator =
+      RowGroupGenerator(::arrow::internal::checked_pointer_cast<FileReaderImpl>(reader),
+                        cpu_executor, row_group_indices, column_indices);
+  return ::arrow::MakeConcatenatedGenerator(std::move(row_group_generator));
+}
+
 Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_factory,
                                  std::unique_ptr<ColumnReader>* out) {
   RETURN_NOT_OK(BoundsCheckColumn(i));
@@ -990,12 +1098,19 @@ Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
   // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
   if (reader_properties_.pre_buffer()) {
     BEGIN_PARQUET_CATCH_EXCEPTIONS
-    ARROW_UNUSED(parquet_reader()->PreBuffer(row_groups, column_indices,
-                                             reader_properties_.io_context(),
-                                             reader_properties_.cache_options()));
+    parquet_reader()->PreBuffer(row_groups, column_indices,
+                                reader_properties_.io_context(),
+                                reader_properties_.cache_options());
     END_PARQUET_CATCH_EXCEPTIONS
   }
 
+  return DecodeRowGroups(row_groups, column_indices, out);
+}
+
+// Also used by RowGroupGenerator - skip bounds check/pre-buffer to avoid doing that twice
+Status FileReaderImpl::DecodeRowGroups(const std::vector<int>& row_groups,
+                                       const std::vector<int>& column_indices,
+                                       std::shared_ptr<Table>* out) {
   std::vector<std::shared_ptr<ColumnReaderImpl>> readers;
   std::shared_ptr<::arrow::Schema> result_schema;
   RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &result_schema));
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 765e2f6d39a..2d6a5ef2c3e 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -18,6 +18,8 @@
 #pragma once
 
 #include <cstdint>
+// N.B. we don't include async_generator.h as it's relatively heavy
+#include <functional>
 #include <memory>
 #include <vector>
 
@@ -178,6 +180,20 @@ class PARQUET_EXPORT FileReader {
       const std::vector<int>& row_group_indices, const std::vector<int>& column_indices,
       std::unique_ptr<::arrow::RecordBatchReader>* out) = 0;
 
+  /// \brief Return a generator of record batches.
+  ///
+  /// The FileReader must outlive the generator, so this requires that you pass in a
+  /// shared_ptr.
+  ///
+  /// \returns error Result if either row_group_indices or column_indices contains an
+  ///     invalid index
+  virtual ::arrow::Result<
+      std::function<::arrow::Future<std::shared_ptr<::arrow::RecordBatch>>()>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor = NULLPTR) = 0;
+
   ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
                                        const std::vector<int>& column_indices,
                                        std::shared_ptr<::arrow::RecordBatchReader>* out);
diff --git a/cpp/src/parquet/arrow/reader_writer_benchmark.cc b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
index 6f5d195aad6..6445bb02758 100644
--- a/cpp/src/parquet/arrow/reader_writer_benchmark.cc
+++ b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
@@ -33,7 +33,9 @@
 #include "arrow/array/builder_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/logging.h"
 
@@ -534,6 +536,7 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
   EXIT_NOT_OK(
       WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE / 10));
   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
+  std::vector<int> rgs{0, 2, 4, 6, 8};
 
   while (state.KeepRunning()) {
     auto reader =
@@ -541,16 +544,6 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
     std::unique_ptr<FileReader> arrow_reader;
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
-
-    std::vector<std::shared_ptr<::arrow::Table>> tables;
-    std::vector<int> rgs;
-    for (int i = 0; i < arrow_reader->num_row_groups(); i++) {
-      // Only read the even numbered RowGroups
-      if ((i % 2) == 0) {
-        rgs.push_back(i);
-      }
-    }
-
     std::shared_ptr<::arrow::Table> table;
     EXIT_NOT_OK(arrow_reader->ReadRowGroups(rgs, &table));
   }
@@ -559,6 +552,34 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
 
 BENCHMARK(BM_ReadMultipleRowGroups);
 
+static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) {
+  std::vector<int64_t> values(BENCHMARK_SIZE, 128);
+  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  auto output = CreateOutputStream();
+  // This writes 10 RowGroups
+  EXIT_NOT_OK(
+      WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE / 10));
+  PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
+  std::vector<int> rgs{0, 2, 4, 6, 8};
+
+  while (state.KeepRunning()) {
+    auto reader =
+        ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+    std::unique_ptr<FileReader> unique_reader;
+    EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
+                                 &unique_reader));
+    std::shared_ptr<FileReader> arrow_reader = std::move(unique_reader);
+    ASSIGN_OR_ABORT(auto generator,
+                    arrow_reader->GetRecordBatchGenerator(arrow_reader, rgs, {0}));
+    auto fut = ::arrow::CollectAsyncGenerator(generator);
+    ASSIGN_OR_ABORT(auto batches, fut.result());
+    ASSIGN_OR_ABORT(auto actual, ::arrow::Table::FromRecordBatches(std::move(batches)));
+  }
+  SetBytesProcessed<true, Int64Type>(state);
+}
+
+BENCHMARK(BM_ReadMultipleRowGroupsGenerator);
+
 }  // namespace benchmark
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc
index 8fe048e3bcd..8b83154c96c 100644
--- a/cpp/src/parquet/encryption/test_encryption_util.cc
+++ b/cpp/src/parquet/encryption/test_encryption_util.cc
@@ -23,6 +23,7 @@
 
 #include <arrow/io/file.h>
 
+#include "arrow/testing/future_util.h"
 #include "parquet/encryption/test_encryption_util.h"
 #include "parquet/file_reader.h"
 #include "parquet/file_writer.h"
@@ -284,6 +285,7 @@ void FileEncryptor::EncryptFile(
 
   // Close the ParquetFileWriter
   file_writer->Close();
+  PARQUET_THROW_NOT_OK(out_file->Close());
 
   return;
 }  // namespace test
@@ -334,8 +336,27 @@ void FileDecryptor::DecryptFile(
     reader_properties.file_decryption_properties(file_decryption_properties->DeepClone());
   }
 
-  auto file_reader = parquet::ParquetFileReader::OpenFile(file, false, reader_properties);
+  std::shared_ptr<::arrow::io::RandomAccessFile> source;
+  PARQUET_ASSIGN_OR_THROW(
+      source, ::arrow::io::ReadableFile::Open(file, reader_properties.memory_pool()));
 
+  auto file_reader = parquet::ParquetFileReader::Open(source, reader_properties);
+  CheckFile(file_reader.get(), file_decryption_properties.get());
+
+  if (file_decryption_properties) {
+    reader_properties.file_decryption_properties(file_decryption_properties->DeepClone());
+  }
+  auto fut = parquet::ParquetFileReader::OpenAsync(source, reader_properties);
+  ASSERT_FINISHES_OK(fut);
+  ASSERT_OK_AND_ASSIGN(file_reader, fut.MoveResult());
+  CheckFile(file_reader.get(), file_decryption_properties.get());
+
+  file_reader->Close();
+  PARQUET_THROW_NOT_OK(source->Close());
+}
+
+void FileDecryptor::CheckFile(parquet::ParquetFileReader* file_reader,
+                              FileDecryptionProperties* file_decryption_properties) {
   // Get the File MetaData
   std::shared_ptr<parquet::FileMetaData> file_metadata = file_reader->metadata();
 
@@ -474,7 +495,6 @@ void FileDecryptor::DecryptFile(
     // make sure we got the same number of values the metadata says
     ASSERT_EQ(flba_md->num_values(), i);
   }
-  file_reader->Close();
 }
 
 }  // namespace test
diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h
index 32790950f84..b5d71b9954f 100644
--- a/cpp/src/parquet/encryption/test_encryption_util.h
+++ b/cpp/src/parquet/encryption/test_encryption_util.h
@@ -33,6 +33,7 @@
 #include "parquet/test_util.h"
 
 namespace parquet {
+class ParquetFileReader;
 namespace encryption {
 namespace test {
 
@@ -106,6 +107,10 @@ class FileDecryptor {
  public:
   void DecryptFile(std::string file_name,
                    std::shared_ptr<FileDecryptionProperties> file_decryption_properties);
+
+ private:
+  void CheckFile(parquet::ParquetFileReader* file_reader,
+                 FileDecryptionProperties* file_decryption_properties);
 };
 
 }  // namespace test
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 4ff214232e5..9dbfca433ce 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -258,10 +258,10 @@ class SerializedFile : public ParquetFileReader::Contents {
     file_metadata_ = std::move(metadata);
   }
 
-  ::arrow::Future<> PreBuffer(const std::vector<int>& row_groups,
-                              const std::vector<int>& column_indices,
-                              const ::arrow::io::IOContext& ctx,
-                              const ::arrow::io::CacheOptions& options) {
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options) {
     cached_source_ =
         std::make_shared<::arrow::io::internal::ReadRangeCache>(source_, ctx, options);
     std::vector<::arrow::io::ReadRange> ranges;
@@ -272,10 +272,79 @@ class SerializedFile : public ParquetFileReader::Contents {
       }
     }
     PARQUET_THROW_NOT_OK(cached_source_->Cache(ranges));
-    return cached_source_->Wait();
   }
 
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const {
+    if (!cached_source_) {
+      return ::arrow::Status::Invalid("Must call PreBuffer before WhenBuffered");
+    }
+    std::vector<::arrow::io::ReadRange> ranges;
+    for (int row : row_groups) {
+      for (int col : column_indices) {
+        ranges.push_back(
+            ComputeColumnChunkRange(file_metadata_.get(), source_size_, row, col));
+      }
+    }
+    return cached_source_->WaitFor(ranges);
+  }
+
+  // Metadata/footer parsing. Divided up to separate sync/async paths, and to use
+  // exceptions for error handling (with the async path converting to Future/Status).
+
   void ParseMetaData() {
+    int64_t footer_read_size = GetFooterReadSize();
+    PARQUET_ASSIGN_OR_THROW(
+        auto footer_buffer,
+        source_->ReadAt(source_size_ - footer_read_size, footer_read_size));
+    uint32_t metadata_len = ParseFooterLength(footer_buffer, footer_read_size);
+    int64_t metadata_start = source_size_ - kFooterSize - metadata_len;
+
+    std::shared_ptr<::arrow::Buffer> metadata_buffer;
+    if (footer_read_size >= (metadata_len + kFooterSize)) {
+      metadata_buffer = SliceBuffer(
+          footer_buffer, footer_read_size - metadata_len - kFooterSize, metadata_len);
+    } else {
+      PARQUET_ASSIGN_OR_THROW(metadata_buffer,
+                              source_->ReadAt(metadata_start, metadata_len));
+    }
+
+    // Parse the footer depending on encryption type
+    const bool is_encrypted_footer =
+        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0;
+    if (is_encrypted_footer) {
+      // Encrypted file with Encrypted footer.
+      const std::pair<int64_t, uint32_t> read_size =
+          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len);
+      // Read the actual footer
+      metadata_start = read_size.first;
+      metadata_len = read_size.second;
+      PARQUET_ASSIGN_OR_THROW(metadata_buffer,
+                              source_->ReadAt(metadata_start, metadata_len));
+      // Fall through
+    }
+
+    const uint32_t read_metadata_len =
+        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len);
+    auto file_decryption_properties = properties_.file_decryption_properties().get();
+    if (is_encrypted_footer) {
+      // Nothing else to do here.
+      return;
+    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
+      if (file_decryption_properties != nullptr) {
+        if (!file_decryption_properties->plaintext_files_allowed()) {
+          throw ParquetException("Applying decryption properties on plaintext file");
+        }
+      }
+    } else {
+      // Encrypted file with plaintext footer mode.
+      ParseMetaDataOfEncryptedFileWithPlaintextFooter(
+          file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len);
+    }
+  }
+
+  // Validate the source size and get the initial read size.
+  int64_t GetFooterReadSize() {
     if (source_size_ == 0) {
       throw ParquetInvalidOrCorruptedFileException("Parquet file size is 0 bytes");
     } else if (source_size_ < kFooterSize) {
@@ -283,12 +352,12 @@ class SerializedFile : public ParquetFileReader::Contents {
           "Parquet file size is ", source_size_,
           " bytes, smaller than the minimum file footer (", kFooterSize, " bytes)");
     }
+    return std::min(source_size_, kDefaultFooterReadSize);
+  }
 
-    int64_t footer_read_size = std::min(source_size_, kDefaultFooterReadSize);
-    PARQUET_ASSIGN_OR_THROW(
-        auto footer_buffer,
-        source_->ReadAt(source_size_ - footer_read_size, footer_read_size));
-
+  // Validate the magic bytes and get the length of the full footer.
+  uint32_t ParseFooterLength(const std::shared_ptr<::arrow::Buffer>& footer_buffer,
+                             const int64_t footer_read_size) {
     // Check if all bytes are read. Check if last 4 bytes read have the magic bits
     if (footer_buffer->size() != footer_read_size ||
         (memcmp(footer_buffer->data() + footer_read_size - 4, kParquetMagic, 4) != 0 &&
@@ -297,21 +366,91 @@ class SerializedFile : public ParquetFileReader::Contents {
           "Parquet magic bytes not found in footer. Either the file is corrupted or this "
           "is not a parquet file.");
     }
+    // Both encrypted/unencrypted footers have the same footer length check.
+    uint32_t metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
+        reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
+        kFooterSize);
+    if (metadata_len > source_size_ - kFooterSize) {
+      throw ParquetInvalidOrCorruptedFileException(
+          "Parquet file size is ", source_size_,
+          " bytes, smaller than the size reported by footer's (", metadata_len, "bytes)");
+    }
+    return metadata_len;
+  }
+
+  // Does not throw.
+  ::arrow::Future<> ParseMetaDataAsync() {
+    int64_t footer_read_size;
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    footer_read_size = GetFooterReadSize();
+    END_PARQUET_CATCH_EXCEPTIONS
+    // Assumes this is kept alive externally
+    return source_->ReadAsync(source_size_ - footer_read_size, footer_read_size)
+        .Then([=](const std::shared_ptr<::arrow::Buffer>& footer_buffer)
+                  -> ::arrow::Future<> {
+          uint32_t metadata_len;
+          BEGIN_PARQUET_CATCH_EXCEPTIONS
+          metadata_len = ParseFooterLength(footer_buffer, footer_read_size);
+          END_PARQUET_CATCH_EXCEPTIONS
+          int64_t metadata_start = source_size_ - kFooterSize - metadata_len;
+
+          std::shared_ptr<::arrow::Buffer> metadata_buffer;
+          if (footer_read_size >= (metadata_len + kFooterSize)) {
+            metadata_buffer =
+                SliceBuffer(footer_buffer, footer_read_size - metadata_len - kFooterSize,
+                            metadata_len);
+            return ParseMaybeEncryptedMetaDataAsync(footer_buffer,
+                                                    std::move(metadata_buffer),
+                                                    footer_read_size, metadata_len);
+          }
+          return source_->ReadAsync(metadata_start, metadata_len)
+              .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) {
+                return ParseMaybeEncryptedMetaDataAsync(footer_buffer, metadata_buffer,
+                                                        footer_read_size, metadata_len);
+              });
+        });
+  }
 
-    if (memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0) {
+  // Continuation
+  ::arrow::Future<> ParseMaybeEncryptedMetaDataAsync(
+      std::shared_ptr<::arrow::Buffer> footer_buffer,
+      std::shared_ptr<::arrow::Buffer> metadata_buffer, int64_t footer_read_size,
+      uint32_t metadata_len) {
+    // Parse the footer depending on encryption type
+    const bool is_encrypted_footer =
+        memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0;
+    if (is_encrypted_footer) {
       // Encrypted file with Encrypted footer.
-      ParseMetaDataOfEncryptedFileWithEncryptedFooter(footer_buffer, footer_read_size);
-      return;
+      std::pair<int64_t, uint32_t> read_size;
+      BEGIN_PARQUET_CATCH_EXCEPTIONS
+      read_size =
+          ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len);
+      END_PARQUET_CATCH_EXCEPTIONS
+      // Read the actual footer
+      int64_t metadata_start = read_size.first;
+      metadata_len = read_size.second;
+      return source_->ReadAsync(metadata_start, metadata_len)
+          .Then([=](const std::shared_ptr<::arrow::Buffer>& metadata_buffer) {
+            // Continue and read the file footer
+            return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer);
+          });
     }
+    return ParseMetaDataFinal(std::move(metadata_buffer), metadata_len,
+                              is_encrypted_footer);
+  }
 
-    // No encryption or encryption with plaintext footer mode.
-    std::shared_ptr<Buffer> metadata_buffer;
-    uint32_t metadata_len, read_metadata_len;
-    ParseUnencryptedFileMetadata(footer_buffer, footer_read_size, &metadata_buffer,
-                                 &metadata_len, &read_metadata_len);
-
+  // Continuation
+  ::arrow::Status ParseMetaDataFinal(std::shared_ptr<::arrow::Buffer> metadata_buffer,
+                                     uint32_t metadata_len,
+                                     const bool is_encrypted_footer) {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    const uint32_t read_metadata_len =
+        ParseUnencryptedFileMetadata(metadata_buffer, metadata_len);
     auto file_decryption_properties = properties_.file_decryption_properties().get();
-    if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
+    if (is_encrypted_footer) {
+      // Nothing else to do here.
+      return ::arrow::Status::OK();
+    } else if (!file_metadata_->is_encryption_algorithm_set()) {  // Non encrypted file.
       if (file_decryption_properties != nullptr) {
         if (!file_decryption_properties->plaintext_files_allowed()) {
           throw ParquetException("Applying decryption properties on plaintext file");
@@ -322,6 +461,8 @@ class SerializedFile : public ParquetFileReader::Contents {
       ParseMetaDataOfEncryptedFileWithPlaintextFooter(
           file_decryption_properties, metadata_buffer, metadata_len, read_metadata_len);
     }
+    END_PARQUET_CATCH_EXCEPTIONS
+    return ::arrow::Status::OK();
   }
 
  private:
@@ -333,10 +474,9 @@ class SerializedFile : public ParquetFileReader::Contents {
 
   std::shared_ptr<InternalFileDecryptor> file_decryptor_;
 
-  void ParseUnencryptedFileMetadata(const std::shared_ptr<Buffer>& footer_buffer,
-                                    int64_t footer_read_size,
-                                    std::shared_ptr<Buffer>* metadata_buffer,
-                                    uint32_t* metadata_len, uint32_t* read_metadata_len);
+  // \return The true length of the metadata in bytes
+  uint32_t ParseUnencryptedFileMetadata(const std::shared_ptr<Buffer>& footer_buffer,
+                                        const uint32_t metadata_len);
 
   std::string HandleAadPrefix(FileDecryptionProperties* file_decryption_properties,
                               EncryptionAlgorithm& algo);
@@ -346,68 +486,36 @@ class SerializedFile : public ParquetFileReader::Contents {
       const std::shared_ptr<Buffer>& metadata_buffer, uint32_t metadata_len,
       uint32_t read_metadata_len);
 
-  void ParseMetaDataOfEncryptedFileWithEncryptedFooter(
-      const std::shared_ptr<Buffer>& footer_buffer, int64_t footer_read_size);
+  // \return The position and size of the actual footer
+  std::pair<int64_t, uint32_t> ParseMetaDataOfEncryptedFileWithEncryptedFooter(
+      const std::shared_ptr<Buffer>& crypto_metadata_buffer, uint32_t footer_len);
 };
 
-void SerializedFile::ParseUnencryptedFileMetadata(
-    const std::shared_ptr<Buffer>& footer_buffer, int64_t footer_read_size,
-    std::shared_ptr<Buffer>* metadata_buffer, uint32_t* metadata_len,
-    uint32_t* read_metadata_len) {
-  *metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
-      reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
-      kFooterSize);
-  int64_t metadata_start = source_size_ - kFooterSize - *metadata_len;
-  if (*metadata_len > source_size_ - kFooterSize) {
-    throw ParquetInvalidOrCorruptedFileException(
-        "Parquet file size is ", source_size_,
-        " bytes, smaller than the size reported by metadata (", metadata_len, "bytes)");
-  }
-
-  // Check if the footer_buffer contains the entire metadata
-  if (footer_read_size >= (*metadata_len + kFooterSize)) {
-    *metadata_buffer = SliceBuffer(
-        footer_buffer, footer_read_size - *metadata_len - kFooterSize, *metadata_len);
-  } else {
-    PARQUET_ASSIGN_OR_THROW(*metadata_buffer,
-                            source_->ReadAt(metadata_start, *metadata_len));
-    if ((*metadata_buffer)->size() != *metadata_len) {
-      throw ParquetException("Failed reading metadata buffer (requested " +
-                             std::to_string(*metadata_len) + " bytes but got " +
-                             std::to_string((*metadata_buffer)->size()) + " bytes)");
-    }
+uint32_t SerializedFile::ParseUnencryptedFileMetadata(
+    const std::shared_ptr<Buffer>& metadata_buffer, const uint32_t metadata_len) {
+  if (metadata_buffer->size() != metadata_len) {
+    throw ParquetException("Failed reading metadata buffer (requested " +
+                           std::to_string(metadata_len) + " bytes but got " +
+                           std::to_string(metadata_buffer->size()) + " bytes)");
   }
-
-  *read_metadata_len = *metadata_len;
-  file_metadata_ = FileMetaData::Make((*metadata_buffer)->data(), read_metadata_len);
+  uint32_t read_metadata_len = metadata_len;
+  // The encrypted read path falls through to here, so pass in the decryptor
+  file_metadata_ =
+      FileMetaData::Make(metadata_buffer->data(), &read_metadata_len, file_decryptor_);
+  return read_metadata_len;
 }
 
-void SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
-    const std::shared_ptr<Buffer>& footer_buffer, int64_t footer_read_size) {
+std::pair<int64_t, uint32_t>
+SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
+    const std::shared_ptr<::arrow::Buffer>& crypto_metadata_buffer,
+    // both metadata & crypto metadata length
+    const uint32_t footer_len) {
   // encryption with encrypted footer
-  // both metadata & crypto metadata length
-  uint32_t footer_len = ::arrow::util::SafeLoadAs<uint32_t>(
-      reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
-      kFooterSize);
-  int64_t crypto_metadata_start = source_size_ - kFooterSize - footer_len;
-  if (kFooterSize + footer_len > source_size_) {
-    throw ParquetInvalidOrCorruptedFileException(
-        "Parquet file size is ", source_size_,
-        " bytes, smaller than the size reported by footer's (", footer_len, "bytes)");
-  }
-  std::shared_ptr<Buffer> crypto_metadata_buffer;
   // Check if the footer_buffer contains the entire metadata
-  if (footer_read_size >= (footer_len + kFooterSize)) {
-    crypto_metadata_buffer = SliceBuffer(
-        footer_buffer, footer_read_size - footer_len - kFooterSize, footer_len);
-  } else {
-    PARQUET_ASSIGN_OR_THROW(crypto_metadata_buffer,
-                            source_->ReadAt(crypto_metadata_start, footer_len));
-    if (crypto_metadata_buffer->size() != footer_len) {
-      throw ParquetException("Failed reading encrypted metadata buffer (requested " +
-                             std::to_string(footer_len) + " bytes but got " +
-                             std::to_string(crypto_metadata_buffer->size()) + " bytes)");
-    }
+  if (crypto_metadata_buffer->size() != footer_len) {
+    throw ParquetException("Failed reading encrypted metadata buffer (requested " +
+                           std::to_string(footer_len) + " bytes but got " +
+                           std::to_string(crypto_metadata_buffer->size()) + " bytes)");
   }
   auto file_decryption_properties = properties_.file_decryption_properties().get();
   if (file_decryption_properties == nullptr) {
@@ -426,16 +534,7 @@ void SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter(
 
   int64_t metadata_offset = source_size_ - kFooterSize - footer_len + crypto_metadata_len;
   uint32_t metadata_len = footer_len - crypto_metadata_len;
-  PARQUET_ASSIGN_OR_THROW(auto metadata_buffer,
-                          source_->ReadAt(metadata_offset, metadata_len));
-  if (metadata_buffer->size() != metadata_len) {
-    throw ParquetException("Failed reading metadata buffer (requested " +
-                           std::to_string(metadata_len) + " bytes but got " +
-                           std::to_string(metadata_buffer->size()) + " bytes)");
-  }
-
-  file_metadata_ =
-      FileMetaData::Make(metadata_buffer->data(), &metadata_len, file_decryptor_);
+  return std::make_pair(metadata_offset, metadata_len);
 }
 
 void SerializedFile::ParseMetaDataOfEncryptedFileWithPlaintextFooter(
@@ -547,6 +646,33 @@ std::unique_ptr<ParquetFileReader::Contents> ParquetFileReader::Contents::Open(
   return result;
 }
 
+::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>
+ParquetFileReader::Contents::OpenAsync(std::shared_ptr<ArrowInputFile> source,
+                                       const ReaderProperties& props,
+                                       std::shared_ptr<FileMetaData> metadata) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  std::unique_ptr<ParquetFileReader::Contents> result(
+      new SerializedFile(std::move(source), props));
+  SerializedFile* file = static_cast<SerializedFile*>(result.get());
+  if (metadata == nullptr) {
+    // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+    struct {
+      ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>> operator()() {
+        return std::move(result);
+      }
+
+      std::unique_ptr<ParquetFileReader::Contents> result;
+    } Continuation;
+    Continuation.result = std::move(result);
+    return file->ParseMetaDataAsync().Then(std::move(Continuation));
+  } else {
+    file->set_metadata(std::move(metadata));
+    return ::arrow::Future<std::unique_ptr<ParquetFileReader::Contents>>::MakeFinished(
+        std::move(result));
+  }
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
 std::unique_ptr<ParquetFileReader> ParquetFileReader::Open(
     std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props,
     std::shared_ptr<FileMetaData> metadata) {
@@ -571,6 +697,28 @@ std::unique_ptr<ParquetFileReader> ParquetFileReader::OpenFile(
   return Open(std::move(source), props, std::move(metadata));
 }
 
+::arrow::Future<std::unique_ptr<ParquetFileReader>> ParquetFileReader::OpenAsync(
+    std::shared_ptr<::arrow::io::RandomAccessFile> source, const ReaderProperties& props,
+    std::shared_ptr<FileMetaData> metadata) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  auto fut = SerializedFile::OpenAsync(std::move(source), props, std::move(metadata));
+  // TODO(ARROW-12259): workaround since we have Future<(move-only type)>
+  auto completed = ::arrow::Future<std::unique_ptr<ParquetFileReader>>::Make();
+  fut.AddCallback([fut, completed](
+                      const ::arrow::Result<std::unique_ptr<ParquetFileReader::Contents>>&
+                          contents) mutable {
+    if (!contents.ok()) {
+      completed.MarkFinished(contents.status());
+      return;
+    }
+    std::unique_ptr<ParquetFileReader> result(new ParquetFileReader());
+    result->Open(fut.MoveResult().MoveValueUnsafe());
+    completed.MarkFinished(std::move(result));
+  });
+  return completed;
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
 void ParquetFileReader::Open(std::unique_ptr<ParquetFileReader::Contents> contents) {
   contents_ = std::move(contents);
 }
@@ -595,14 +743,22 @@ std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
   return contents_->GetRowGroup(i);
 }
 
-::arrow::Future<> ParquetFileReader::PreBuffer(const std::vector<int>& row_groups,
-                                               const std::vector<int>& column_indices,
-                                               const ::arrow::io::IOContext& ctx,
-                                               const ::arrow::io::CacheOptions& options) {
+void ParquetFileReader::PreBuffer(const std::vector<int>& row_groups,
+                                  const std::vector<int>& column_indices,
+                                  const ::arrow::io::IOContext& ctx,
+                                  const ::arrow::io::CacheOptions& options) {
+  // Access private methods here
+  SerializedFile* file =
+      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
+  file->PreBuffer(row_groups, column_indices, ctx, options);
+}
+
+::arrow::Future<> ParquetFileReader::WhenBuffered(
+    const std::vector<int>& row_groups, const std::vector<int>& column_indices) const {
   // Access private methods here
   SerializedFile* file =
       ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
-  return file->PreBuffer(row_groups, column_indices, ctx, options);
+  return file->WhenBuffered(row_groups, column_indices);
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index de8685c7b90..4bc7ec2353a 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -74,6 +74,11 @@ class PARQUET_EXPORT ParquetFileReader {
         const ReaderProperties& props = default_reader_properties(),
         std::shared_ptr<FileMetaData> metadata = NULLPTR);
 
+    static ::arrow::Future<std::unique_ptr<Contents>> OpenAsync(
+        std::shared_ptr<::arrow::io::RandomAccessFile> source,
+        const ReaderProperties& props = default_reader_properties(),
+        std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
     virtual ~Contents() = default;
     // Perform any cleanup associated with the file contents
     virtual void Close() = 0;
@@ -98,6 +103,13 @@ class PARQUET_EXPORT ParquetFileReader {
       const ReaderProperties& props = default_reader_properties(),
       std::shared_ptr<FileMetaData> metadata = NULLPTR);
 
+  // Asynchronously open a file reader from an Arrow file object.
+  // Does not throw - all errors are reported through the Future.
+  static ::arrow::Future<std::unique_ptr<ParquetFileReader>> OpenAsync(
+      std::shared_ptr<::arrow::io::RandomAccessFile> source,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
   void Open(std::unique_ptr<Contents> contents);
   void Close();
 
@@ -125,10 +137,21 @@ class PARQUET_EXPORT ParquetFileReader {
   /// buffered in memory until either \a PreBuffer() is called again,
   /// or the reader itself is destructed. Reading - and buffering -
   /// only one row group at a time may be useful.
-  ::arrow::Future<> PreBuffer(const std::vector<int>& row_groups,
-                              const std::vector<int>& column_indices,
-                              const ::arrow::io::IOContext& ctx,
-                              const ::arrow::io::CacheOptions& options);
+  ///
+  /// This method may throw.
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options);
+
+  /// Wait for the specified row groups and column indices to be pre-buffered.
+  ///
+  /// After the returned Future completes, reading the specified row
+  /// groups/columns will not block.
+  ///
+  /// PreBuffer must be called first. This method does not throw.
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const;
 
  private:
   // Holds a pointer to an instance of Contents implementation
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 13ddc78cf11..5018fff9531 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -613,7 +613,7 @@ class PARQUET_EXPORT ArrowReaderProperties {
   /// implementation for characteristics of different filesystems.
   void set_cache_options(::arrow::io::CacheOptions options) { cache_options_ = options; }
 
-  ::arrow::io::CacheOptions cache_options() const { return cache_options_; }
+  const ::arrow::io::CacheOptions& cache_options() const { return cache_options_; }
 
   /// Set execution context for read coalescing.
   void set_io_context(const ::arrow::io::IOContext& ctx) { io_context_ = ctx; }
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 321531bb8f1..9bbcda3cf1f 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <fcntl.h>
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <cstdint>
 #include <cstdlib>
@@ -26,6 +27,7 @@
 #include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/io/file.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/checked_cast.h"
@@ -559,6 +561,76 @@ TEST(TestFileReader, BufferedReads) {
   }
 }
 
+std::unique_ptr<ParquetFileReader> OpenBuffer(const std::string& contents) {
+  auto buffer = ::arrow::Buffer::FromString(contents);
+  return ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+}
+
+::arrow::Future<> OpenBufferAsync(const std::string& contents) {
+  auto buffer = ::arrow::Buffer::FromString(contents);
+  return ::arrow::Future<>(
+      ParquetFileReader::OpenAsync(std::make_shared<::arrow::io::BufferReader>(buffer)));
+}
+
+// https://github.com/google/googletest/pull/2904 not available in our version of
+// gtest/gmock
+#define EXPECT_THROW_THAT(callable, ex_type, property)   \
+  EXPECT_THROW(                                          \
+      try { (callable)(); } catch (const ex_type& err) { \
+        EXPECT_THAT(err, (property));                    \
+        throw;                                           \
+      },                                                 \
+      ex_type)
+
+TEST(TestFileReader, TestOpenErrors) {
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer(""); }, ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(&ParquetInvalidOrCorruptedFileException::what,
+                          ::testing::HasSubstr("Parquet file size is 0 bytes")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer("AAAAPAR0"); }, ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(&ParquetInvalidOrCorruptedFileException::what,
+                          ::testing::HasSubstr("Parquet magic bytes not found")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer("APAR1"); }, ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(
+          &ParquetInvalidOrCorruptedFileException::what,
+          ::testing::HasSubstr(
+              "Parquet file size is 5 bytes, smaller than the minimum file footer")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer("\xFF\xFF\xFF\x0FPAR1"); },
+      ParquetInvalidOrCorruptedFileException,
+      ::testing::Property(&ParquetInvalidOrCorruptedFileException::what,
+                          ::testing::HasSubstr("Parquet file size is 8 bytes, smaller "
+                                               "than the size reported by footer's")));
+  EXPECT_THROW_THAT(
+      []() { OpenBuffer(std::string("\x00\x00\x00\x00PAR1", 8)); }, ParquetException,
+      ::testing::Property(
+          &ParquetException::what,
+          ::testing::HasSubstr("Couldn't deserialize thrift: No more data to read")));
+
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("Parquet file size is 0 bytes"), OpenBufferAsync(""));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("Parquet magic bytes not found"),
+      OpenBufferAsync("AAAAPAR0"));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      IOError,
+      ::testing::HasSubstr(
+          "Parquet file size is 5 bytes, smaller than the minimum file footer"),
+      OpenBufferAsync("APAR1"));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      IOError,
+      ::testing::HasSubstr(
+          "Parquet file size is 8 bytes, smaller than the size reported by footer's"),
+      OpenBufferAsync("\xFF\xFF\xFF\x0FPAR1"));
+  EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("Couldn't deserialize thrift: No more data to read"),
+      OpenBufferAsync(std::string("\x00\x00\x00\x00PAR1", 8)));
+}
+
+#undef EXPECT_THROW_THAT
+
 #ifdef ARROW_WITH_LZ4
 struct TestCodecParam {
   std::string name;
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 78620b25942..bd93da9cb18 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -925,10 +925,10 @@ cdef class Fragment(_Weakrefable):
         """Return the physical schema of this Fragment. This schema can be
         different from the dataset read schema."""
         cdef:
-            shared_ptr[CSchema] c_schema
-
-        c_schema = GetResultValue(self.fragment.ReadPhysicalSchema())
-        return pyarrow_wrap_schema(c_schema)
+            CResult[shared_ptr[CSchema]] maybe_schema
+        with nogil:
+            maybe_schema = self.fragment.ReadPhysicalSchema()
+        return pyarrow_wrap_schema(GetResultValue(maybe_schema))
 
     @property
     def partition_expression(self):

From d77e272c81d409c77a2ebe4127572f2ef44c7632 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 7 Jun 2021 20:45:36 +0200
Subject: [PATCH 373/719] ARROW-12950: [C++] Add count_substring kernel

Depends on ARROW-12969. ignore_case is not included here; I'll include it with the regex variant in ARROW-12952.

Closes #10454 from lidavidm/arrow-12950

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    | 70 ++++++++++++++++++-
 .../compute/kernels/scalar_string_test.cc     | 19 +++++
 docs/source/cpp/compute.rst                   | 30 ++++----
 docs/source/python/api/compute.rst            |  1 +
 python/pyarrow/compute.py                     | 19 +++++
 python/pyarrow/tests/test_compute.py          | 13 ++++
 6 files changed, 139 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 154b57d1d8e..df3a3991fcf 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -741,8 +741,12 @@ template <typename InputType>
 struct FindSubstringExec {
   using OffsetType = typename TypeTraits<InputType>::OffsetType;
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+      return Status::NotImplemented("find_substring with ignore_case");
+    }
     applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstring> kernel{
-        FindSubstring(PlainSubstringMatcher(MatchSubstringState::Get(ctx)))};
+        FindSubstring(PlainSubstringMatcher(options))};
     return kernel.Exec(ctx, batch, out);
   }
 };
@@ -771,6 +775,69 @@ void AddFindSubstring(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
+// Substring count
+
+struct CountSubstring {
+  const PlainSubstringMatcher matcher_;
+
+  explicit CountSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {}
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    OutValue count = 0;
+    uint64_t start = 0;
+    const auto pattern_size = std::max<uint64_t>(1, matcher_.options_.pattern.size());
+    while (start <= val.size()) {
+      const int64_t index = matcher_.Find(val.substr(start));
+      if (index >= 0) {
+        count++;
+        start += index + pattern_size;
+      } else {
+        break;
+      }
+    }
+    return count;
+  }
+};
+
+template <typename InputType>
+struct CountSubstringExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+      return Status::NotImplemented("count_substring with ignore_case");
+    }
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstring> kernel{
+        CountSubstring(PlainSubstringMatcher(options))};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+const FunctionDoc count_substring_doc(
+    "Count occurrences of substring",
+    ("For each string in `strings`, emit the number of occurrences of the given "
+     "pattern.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+
+void AddCountSubstring(FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>("count_substring", Arity::Unary(),
+                                               &count_substring_doc);
+  for (const auto& ty : BaseBinaryTypes()) {
+    std::shared_ptr<DataType> offset_type;
+    if (ty->id() == Type::type::LARGE_BINARY || ty->id() == Type::type::LARGE_STRING) {
+      offset_type = int64();
+    } else {
+      offset_type = int32();
+    }
+    DCHECK_OK(func->AddKernel({ty}, offset_type,
+                              GenerateTypeAgnosticVarBinaryBase<CountSubstringExec>(ty),
+                              MatchSubstringState::Init));
+  }
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
 // Slicing
 
 template <typename Type, typename Derived>
@@ -3213,6 +3280,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   AddUtf8Length(registry);
   AddMatchSubstring(registry);
   AddFindSubstring(registry);
+  AddCountSubstring(registry);
   MakeUnaryStringBatchKernelWithState<ReplaceSubStringPlain>(
       "replace_substring", registry, &replace_substring_doc,
       MemAllocation::NO_PREALLOCATE);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index bd9dba2deb3..9b4cef494d7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -103,6 +103,25 @@ TYPED_TEST(TestBinaryKernels, FindSubstring) {
                    "[0, 0, null]", &options_empty);
 }
 
+TYPED_TEST(TestBinaryKernels, CountSubstring) {
+  MatchSubstringOptions options{"aba"};
+  this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aba", "baba", "ababa", "abaaba", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 0]", &options);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("count_substring", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+
+  MatchSubstringOptions options_repeated{"aaa"};
+  this->CheckUnary("count_substring", R"(["", "aaaa", "aaaaa", "aaaaaa", "aaá"])",
+                   this->offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
+
+  // TODO: case-insensitive
+}
+
 template <typename TestType>
 class TestStringKernels : public BaseTestStringKernels<TestType> {};
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 02c8fb3eac7..434d4a23e9c 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -561,45 +561,51 @@ Containment tests
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
 | Function name             | Arity      | Input types                        | Output type        | Options class                          |
 +===========================+============+====================================+====================+========================================+
-| find_substring            | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
+| count_substring           | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_like                | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
+| find_substring            | Unary      | String-like                        | Int32 or Int64 (2) | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_substring           | Unary      | String-like                        | Boolean (3)        | :struct:`MatchSubstringOptions`        |
+| match_like                | Unary      | String-like                        | Boolean (3)        | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_substring_regex     | Unary      | String-like                        | Boolean (4)        | :struct:`MatchSubstringOptions`        |
+| match_substring           | Unary      | String-like                        | Boolean (4)        | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (5)          | :struct:`SetLookupOptions`             |
+| match_substring_regex     | Unary      | String-like                        | Boolean (5)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (6)          | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |                    |                                        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (6)        | :struct:`SetLookupOptions`             |
+| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (7)        | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |                    |                                        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
 
+* \(1) Output is the number of occurrences of
+  :member:`MatchSubstringOptions::pattern` in the corresponding input
+  string. Output type is Int32 for Binary/String, Int64
+  for LargeBinary/LargeString.
 
-* \(1) Output is the index of the first occurrence of
+* \(2) Output is the index of the first occurrence of
   :member:`MatchSubstringOptions::pattern` in the corresponding input
   string, otherwise -1. Output type is Int32 for Binary/String, Int64
   for LargeBinary/LargeString.
 
-* \(2) Output is true iff the SQL-style LIKE pattern
+* \(3) Output is true iff the SQL-style LIKE pattern
   :member:`MatchSubstringOptions::pattern` fully matches the
   corresponding input element. That is, ``%`` will match any number of
   characters, ``_`` will match exactly one character, and any other
   character matches itself. To match a literal percent sign or
   underscore, precede the character with a backslash.
 
-* \(3) Output is true iff :member:`MatchSubstringOptions::pattern`
+* \(4) Output is true iff :member:`MatchSubstringOptions::pattern`
   is a substring of the corresponding input element.
 
-* \(4) Output is true iff :member:`MatchSubstringOptions::pattern`
+* \(5) Output is true iff :member:`MatchSubstringOptions::pattern`
   matches the corresponding input element at any position.
 
-* \(5) Output is the index of the corresponding input element in
+* \(6) Output is the index of the corresponding input element in
   :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
   output is null.
 
-* \(6) Output is true iff the corresponding input element is equal to one
+* \(7) Output is true iff the corresponding input element is equal to one
   of the elements in :member:`SetLookupOptions::value_set`.
 
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index ccd530073aa..a586f9011fd 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -178,6 +178,7 @@ Containment tests
 .. autosummary::
    :toctree: ../generated/
 
+   count_substring
    find_substring
    index_in
    is_in
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index eb66f4407c8..8dc7181514c 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -291,6 +291,25 @@ def cast(arr, target_type, safe=True):
     return call_function("cast", [arr], options)
 
 
+def count_substring(array, pattern):
+    """
+    Count the occurrences of substring *pattern* in each value of a
+    string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        pattern to search for exact matches
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("count_substring", [array],
+                         MatchSubstringOptions(pattern))
+
+
 def find_substring(array, pattern):
     """
     Find the index of the first occurrence of substring *pattern* in each
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index a78be20f9cf..64d5ad0a30d 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -285,6 +285,19 @@ def test_variance():
     assert pc.variance(data, ddof=1).as_py() == 6.0
 
 
+def test_count_substring():
+    arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None])
+    result = pc.count_substring(arr, "ab")
+    expected = pa.array([1, 1, 2, 0, 0, None], type=pa.int32())
+    assert expected.equals(result)
+
+    arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None],
+                   type=pa.large_string())
+    result = pc.count_substring(arr, "ab")
+    expected = pa.array([1, 1, 2, 0, 0, None], type=pa.int64())
+    assert expected.equals(result)
+
+
 def test_find_substring():
     arr = pa.array(["ab", "cab", "ba", None])
     result = pc.find_substring(arr, "ab")

From 4b47ccafd03fa02b3268778e5160594321a27f9f Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Mon, 7 Jun 2021 21:09:16 +0200
Subject: [PATCH 374/719] ARROW-12987: [C++][CI] Switch to bundled utf8proc
 with version 2.2 in Ubuntu 18.04 images
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The tests are failing in the builds because the **utf8proc** dependency in Ubuntu 18.04 package is outdated.

I take a look at the utf8proc repository and I check that the error related to the build was fixed by that Pull Request https://github.com/JuliaStrings/utf8proc/pull/134 that is part of the 2.2 release: https://github.com/JuliaStrings/utf8proc/releases/tag/v2.2.0, but the Ubuntu 18.04 has only the 2.1.0 version in its package manager.

Closes #10468 from anthonylouisbsb/fixbug/fix-nightly-build-problems

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/docker/ubuntu-18.04-cpp.dockerfile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile
index 9445475ab5f..065f8faf278 100644
--- a/ci/docker/ubuntu-18.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-18.04-cpp.dockerfile
@@ -79,7 +79,6 @@ RUN apt-get update -y -q && \
         libre2-dev \
         libsnappy-dev \
         libssl-dev \
-        libutf8proc-dev \
         libzstd-dev \
         ninja-build \
         pkg-config \
@@ -97,6 +96,7 @@ RUN apt-get update -y -q && \
 # - libgtest-dev only provide sources
 # - libprotobuf-dev only provide sources
 # - thrift is too old
+# - utf8proc is too old(v2.1.0)
 # - s3 tests would require boost-asio that is included since Boost 1.66.0
 ENV ARROW_BUILD_TESTS=ON \
     ARROW_DEPENDENCY_SOURCE=SYSTEM \
@@ -126,4 +126,5 @@ ENV ARROW_BUILD_TESTS=ON \
     PARQUET_BUILD_EXECUTABLES=ON \
     PARQUET_BUILD_EXAMPLES=ON \
     PATH=/usr/lib/ccache/:$PATH \
-    Thrift_SOURCE=BUNDLED
+    Thrift_SOURCE=BUNDLED \
+    utf8proc_SOURCE=BUNDLED
\ No newline at end of file

From f3a6d54a05edc529ad7673a5b71fb799cb23ed53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 7 Jun 2021 21:10:19 +0200
Subject: [PATCH 375/719] ARROW-12985: [Python][Packaging] Unable to install
 pygit2 in the arm64 wheel builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10462 from kszucs/crossbow-credentials-callback

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/crossbow/core.py | 3 ++-
 dev/archery/setup.py                 | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py
index 9d3074a21d5..4f24d7592e9 100644
--- a/dev/archery/archery/crossbow/core.py
+++ b/dev/archery/archery/crossbow/core.py
@@ -194,7 +194,8 @@ def credentials(self, url, username_from_url, allowed_types):
             print(msg)
             raise CrossbowError(msg)
 
-        if allowed_types & pygit2.credentials.GIT_CREDTYPE_USERPASS_PLAINTEXT:
+        if (allowed_types &
+                pygit2.credentials.GIT_CREDENTIAL_USERPASS_PLAINTEXT):
             return pygit2.UserPass(self.token, 'x-oauth-basic')
         else:
             return None
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 6dec95fdb0a..a363824f8c9 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -32,7 +32,7 @@
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
     'release': [jinja_req, 'jira', 'semver', 'gitpython'],
-    'crossbow': ['github3.py', jinja_req, 'pygit2==1.5.0', 'ruamel.yaml',
+    'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'ruamel.yaml',
                  'setuptools_scm'],
 }
 extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']

From 24bb1579fbd5b83edcb0e202910fc5e1118439db Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 8 Jun 2021 09:39:46 +0900
Subject: [PATCH 376/719] ARROW-12998: [C++] Add dataset->toolchain dependency

This is rather coarse-grained, but in principle if datasets needs any headers that are built as part of the build process or depends on headers that do, then this is the right thing (and it saves us from maintaining a separate target for datasets - though I could do that if preferred).

Closes #10474 from lidavidm/arrow-12998

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/dataset/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt
index f2fde70305e..ca467d110c6 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -53,6 +53,8 @@ add_arrow_lib(arrow_dataset
               ${ARROW_DATASET_SRCS}
               PRECOMPILED_HEADERS
               "$<$<COMPILE_LANGUAGE:CXX>:arrow/dataset/pch.h>"
+              DEPENDENCIES
+              toolchain
               PRIVATE_INCLUDES
               ${ARROW_DATASET_PRIVATE_INCLUDES}
               SHARED_LINK_LIBS

From 30f52a202d0a2f6393366ea1e4a8e5182077c72a Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Tue, 8 Jun 2021 16:49:40 +0900
Subject: [PATCH 377/719] ARROW-13002: [C++] Add a check for the utf8proc's
 version in CMake

It adds a function in CMake to retrieve the used version of the `utf8proc` library and sets a required version to be used by CMake.

The PR complements the [10468](https://github.com/apache/arrow/pull/10468) one, that fixed the bug related to broken builds in Ubuntu 18.04

Closes #10477 from anthonylouisbsb/feature/add-check-for-utf8proc

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/Findutf8proc.cmake        | 35 +++++++++++++++++++--
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  2 +-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake
index edea73b8dae..03c720d9a3c 100644
--- a/cpp/cmake_modules/Findutf8proc.cmake
+++ b/cpp/cmake_modules/Findutf8proc.cmake
@@ -15,6 +15,31 @@
 # specific language governing permissions and limitations
 # under the License.
 
+function(extract_utf8proc_version)
+  if(utf8proc_INCLUDE_DIR)
+    file(READ "${utf8proc_INCLUDE_DIR}/utf8proc.h" UTF8PROC_H_CONTENT)
+
+    string(REGEX MATCH "#define UTF8PROC_VERSION_MAJOR [0-9]+"
+                 UTF8PROC_MAJOR_VERSION_DEFINITION "${UTF8PROC_H_CONTENT}")
+    string(REGEX MATCH "#define UTF8PROC_VERSION_MINOR [0-9]+"
+                 UTF8PROC_MINOR_VERSION_DEFINITION "${UTF8PROC_H_CONTENT}")
+    string(REGEX MATCH "#define UTF8PROC_VERSION_PATCH [0-9]+"
+                 UTF8PROC_PATCH_VERSION_DEFINITION "${UTF8PROC_H_CONTENT}")
+
+    string(REGEX MATCH "[0-9]+$" UTF8PROC_MAJOR_VERSION
+                 "${UTF8PROC_MAJOR_VERSION_DEFINITION}")
+    string(REGEX MATCH "[0-9]+$" UTF8PROC_MINOR_VERSION
+                 "${UTF8PROC_MINOR_VERSION_DEFINITION}")
+    string(REGEX MATCH "[0-9]+$" UTF8PROC_PATCH_VERSION
+                 "${UTF8PROC_PATCH_VERSION_DEFINITION}")
+    set(utf8proc_VERSION
+        "${UTF8PROC_MAJOR_VERSION}.${UTF8PROC_MINOR_VERSION}.${UTF8PROC_PATCH_VERSION}"
+        PARENT_SCOPE)
+  else()
+    set(utf8proc_VERSION "" PARENT_SCOPE)
+  endif()
+endfunction(extract_utf8proc_version)
+
 if(ARROW_UTF8PROC_USE_SHARED)
   set(utf8proc_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
@@ -44,6 +69,7 @@ if(utf8proc_ROOT)
             PATHS ${utf8proc_ROOT}
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  extract_utf8proc_version()
 else()
   find_library(utf8proc_LIB
                NAMES ${utf8proc_LIB_NAMES}
@@ -51,10 +77,15 @@ else()
   find_path(utf8proc_INCLUDE_DIR
             NAMES utf8proc.h
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  extract_utf8proc_version()
 endif()
 
-find_package_handle_standard_args(utf8proc REQUIRED_VARS utf8proc_LIB
-                                  utf8proc_INCLUDE_DIR)
+find_package_handle_standard_args(utf8proc
+                                  REQUIRED_VARS
+                                  utf8proc_LIB
+                                  utf8proc_INCLUDE_DIR
+                                  VERSION_VAR
+                                  utf8proc_VERSION)
 
 if(utf8proc_FOUND)
   set(utf8proc_FOUND TRUE)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index ff55936c228..1350f274565 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2275,7 +2275,7 @@ macro(build_utf8proc)
 endmacro()
 
 if(ARROW_WITH_UTF8PROC)
-  resolve_dependency(utf8proc)
+  resolve_dependency(utf8proc REQUIRED_VERSION "2.2.0")
 
   add_definitions(-DARROW_WITH_UTF8PROC)
 

From 8f001fcf0fc2745b535017ae3bd2366a8f9b1483 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 8 Jun 2021 12:37:56 +0200
Subject: [PATCH 378/719] ARROW-12949: [C++] Add starts_with and ends_with

This adds a simple starts_with and ends_with.

Also, now match_like can optimize some patterns into prefix/suffix matches. This also fixes a bug (which I believe is also present in Apache Impala) where some LIKE patterns are mistakenly optimized into suffix or substring matches.

Closes #10448 from lidavidm/arrow-12949

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    | 143 +++++++++++++++++-
 .../kernels/scalar_string_benchmark.cc        |  30 ++++
 .../compute/kernels/scalar_string_test.cc     |  59 ++++++++
 docs/source/cpp/compute.rst                   |  47 +++---
 docs/source/python/api/compute.rst            |  10 +-
 5 files changed, 260 insertions(+), 29 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index df3a3991fcf..a1e19b608d9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -492,6 +492,46 @@ struct PlainSubstringMatcher {
   bool Match(util::string_view current) const { return Find(current) >= 0; }
 };
 
+struct PlainStartsWithMatcher {
+  const MatchSubstringOptions& options_;
+
+  explicit PlainStartsWithMatcher(const MatchSubstringOptions& options)
+      : options_(options) {}
+
+  static Result<std::unique_ptr<PlainStartsWithMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainStartsWithMatcher>(options);
+  }
+
+  bool Match(util::string_view current) const {
+    // string_view::starts_with is C++20
+    return current.substr(0, options_.pattern.size()) == options_.pattern;
+  }
+};
+
+struct PlainEndsWithMatcher {
+  const MatchSubstringOptions& options_;
+
+  explicit PlainEndsWithMatcher(const MatchSubstringOptions& options)
+      : options_(options) {}
+
+  static Result<std::unique_ptr<PlainEndsWithMatcher>> Make(
+      const MatchSubstringOptions& options) {
+    // Should be handled by partial template specialization below
+    DCHECK(!options.ignore_case);
+    return ::arrow::internal::make_unique<PlainEndsWithMatcher>(options);
+  }
+
+  bool Match(util::string_view current) const {
+    // string_view::ends_with is C++20
+    return current.size() >= options_.pattern.size() &&
+           current.substr(current.size() - options_.pattern.size(),
+                          options_.pattern.size()) == options_.pattern;
+  }
+};
+
 #ifdef ARROW_WITH_RE2
 struct RegexSubstringMatcher {
   const MatchSubstringOptions& options_;
@@ -581,6 +621,48 @@ struct MatchSubstring<Type, PlainSubstringMatcher> {
   }
 };
 
+template <typename Type>
+struct MatchSubstring<Type, PlainStartsWithMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      MatchSubstringOptions converted_options = options;
+      converted_options.pattern = "^" + RE2::QuoteMeta(options.pattern);
+      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainStartsWithMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainStartsWithMatcher>::Exec(ctx, batch, out,
+                                                                  matcher.get());
+  }
+};
+
+template <typename Type>
+struct MatchSubstring<Type, PlainEndsWithMatcher> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    auto options = MatchSubstringState::Get(ctx);
+    if (options.ignore_case) {
+#ifdef ARROW_WITH_RE2
+      MatchSubstringOptions converted_options = options;
+      converted_options.pattern = RE2::QuoteMeta(options.pattern) + "$";
+      ARROW_ASSIGN_OR_RAISE(auto matcher, RegexSubstringMatcher::Make(converted_options));
+      return MatchSubstringImpl<Type, RegexSubstringMatcher>::Exec(ctx, batch, out,
+                                                                   matcher.get());
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
+    }
+    ARROW_ASSIGN_OR_RAISE(auto matcher, PlainEndsWithMatcher::Make(options));
+    return MatchSubstringImpl<Type, PlainEndsWithMatcher>::Exec(ctx, batch, out,
+                                                                matcher.get());
+  }
+};
+
 const FunctionDoc match_substring_doc(
     "Match strings against literal pattern",
     ("For each string in `strings`, emit true iff it contains a given pattern.\n"
@@ -588,6 +670,20 @@ const FunctionDoc match_substring_doc(
      "If ignore_case is set, only simple case folding is performed."),
     {"strings"}, "MatchSubstringOptions");
 
+const FunctionDoc starts_with_doc(
+    "Check if strings start with a literal pattern",
+    ("For each string in `strings`, emit true iff it starts with a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
+const FunctionDoc ends_with_doc(
+    "Check if strings end with a literal pattern",
+    ("For each string in `strings`, emit true iff it ends with a given pattern.\n"
+     "Null inputs emit null.  The pattern must be given in MatchSubstringOptions. "
+     "If ignore_case is set, only simple case folding is performed."),
+    {"strings"}, "MatchSubstringOptions");
+
 #ifdef ARROW_WITH_RE2
 const FunctionDoc match_substring_regex_doc(
     "Match strings against regex pattern",
@@ -643,17 +739,20 @@ std::string MakeLikeRegex(const MatchSubstringOptions& options) {
   return like_pattern;
 }
 
-// A LIKE pattern matching this regex can be translated into a substring search.
-static RE2 kLikePatternIsSubstringMatch("%+([^%_]*)%+");
-
 // Evaluate a SQL-like LIKE pattern by translating it to a regexp or
 // substring search as appropriate. See what Apache Impala does:
 // https://github.com/apache/impala/blob/9c38568657d62b6f6d7b10aa1c721ba843374dd8/be/src/exprs/like-predicate.cc
-// Note that Impala optimizes more cases (e.g. prefix match) but we
-// don't have kernels for those.
 template <typename StringType>
 struct MatchLike {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // NOTE: avoid making those constants global to avoid compiling regexes at startup
+    // A LIKE pattern matching this regex can be translated into a substring search.
+    static const RE2 kLikePatternIsSubstringMatch(R"(%+([^%_]*[^\\%_])?%+)");
+    // A LIKE pattern matching this regex can be translated into a prefix search.
+    static const RE2 kLikePatternIsStartsWith(R"(([^%_]*[^\\%_])?%+)");
+    // A LIKE pattern matching this regex can be translated into a suffix search.
+    static const RE2 kLikePatternIsEndsWith(R"(%+([^%_]*))");
+
     auto original_options = MatchSubstringState::Get(ctx);
     auto original_state = ctx->state();
 
@@ -666,6 +765,20 @@ struct MatchLike {
       MatchSubstringState converted_state(converted_options);
       ctx->SetState(&converted_state);
       status = MatchSubstring<StringType, PlainSubstringMatcher>::Exec(ctx, batch, out);
+    } else if (!original_options.ignore_case &&
+               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsStartsWith,
+                                   &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec(ctx, batch, out);
+    } else if (!original_options.ignore_case &&
+               re2::RE2::FullMatch(original_options.pattern, kLikePatternIsEndsWith,
+                                   &pattern)) {
+      MatchSubstringOptions converted_options{pattern, original_options.ignore_case};
+      MatchSubstringState converted_state(converted_options);
+      ctx->SetState(&converted_state);
+      status = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec(ctx, batch, out);
     } else {
       MatchSubstringOptions converted_options{MakeLikeRegex(original_options),
                                               original_options.ignore_case};
@@ -700,6 +813,26 @@ void AddMatchSubstring(FunctionRegistry* registry) {
         func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+  {
+    auto func = std::make_shared<ScalarFunction>("starts_with", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainStartsWithMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainStartsWithMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>("ends_with", Arity::Unary(),
+                                                 &match_substring_doc);
+    auto exec_32 = MatchSubstring<StringType, PlainEndsWithMatcher>::Exec;
+    auto exec_64 = MatchSubstring<LargeStringType, PlainEndsWithMatcher>::Exec;
+    DCHECK_OK(func->AddKernel({utf8()}, boolean(), exec_32, MatchSubstringState::Init));
+    DCHECK_OK(
+        func->AddKernel({large_utf8()}, boolean(), exec_64, MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 #ifdef ARROW_WITH_RE2
   {
     auto func = std::make_shared<ScalarFunction>("match_substring_regex", Arity::Unary(),
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 8528c0d9e5d..606e774451c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -87,6 +87,30 @@ static void TrimManyAscii(benchmark::State& state) {
   UnaryStringBenchmark(state, "ascii_trim", &options);
 }
 
+#ifdef ARROW_WITH_RE2
+static void MatchLike(benchmark::State& state) {
+  MatchSubstringOptions options("ab%ac");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+
+// MatchLike optimizes the following three into a substring/prefix/suffix search instead
+// of using RE2
+static void MatchLikeSubstring(benchmark::State& state) {
+  MatchSubstringOptions options("%abac%");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+
+static void MatchLikePrefix(benchmark::State& state) {
+  MatchSubstringOptions options("%abac");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+
+static void MatchLikeSuffix(benchmark::State& state) {
+  MatchSubstringOptions options("%abac");
+  UnaryStringBenchmark(state, "match_like", &options);
+}
+#endif
+
 #ifdef ARROW_WITH_UTF8PROC
 static void Utf8Upper(benchmark::State& state) {
   UnaryStringBenchmark(state, "utf8_upper");
@@ -152,6 +176,12 @@ BENCHMARK(MatchSubstring);
 BENCHMARK(SplitPattern);
 BENCHMARK(TrimSingleAscii);
 BENCHMARK(TrimManyAscii);
+#ifdef ARROW_WITH_RE2
+BENCHMARK(MatchLike);
+BENCHMARK(MatchLikeSubstring);
+BENCHMARK(MatchLikePrefix);
+BENCHMARK(MatchLikeSuffix);
+#endif
 #ifdef ARROW_WITH_UTF8PROC
 BENCHMARK(Utf8Lower);
 BENCHMARK(Utf8Upper);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 9b4cef494d7..f015e339423 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -445,6 +445,60 @@ TYPED_TEST(TestStringKernels, MatchSubstringIgnoreCase) {
 }
 #endif
 
+TYPED_TEST(TestStringKernels, MatchStartsWith) {
+  MatchSubstringOptions options{"abab"};
+  this->CheckUnary("starts_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("starts_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, false, true]", &options);
+  this->CheckUnary("starts_with", R"(["ABAB", "BABAB", "ABABC", "bAbAb", "aBaBc"])",
+                   boolean(), "[false, false, false, false, false]", &options);
+}
+
+TYPED_TEST(TestStringKernels, MatchEndsWith) {
+  MatchSubstringOptions options{"abab"};
+  this->CheckUnary("ends_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("ends_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, true, false]", &options);
+  this->CheckUnary("ends_with", R"(["ABAB", "BABAB", "ABABC", "bAbAb", "aBaBc"])",
+                   boolean(), "[false, false, false, false, false]", &options);
+}
+
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestStringKernels, MatchStartsWithIgnoreCase) {
+  MatchSubstringOptions options{"aBAb", /*ignore_case=*/true};
+  this->CheckUnary("starts_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("starts_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, false, true]", &options);
+  this->CheckUnary("starts_with", R"(["ABAB", "$ABAB", "ABAB$", "$AbAb", "aBaB$"])",
+                   boolean(), "[true, false, true, false, true]", &options);
+}
+
+TYPED_TEST(TestStringKernels, MatchEndsWithIgnoreCase) {
+  MatchSubstringOptions options{"aBAb", /*ignore_case=*/true};
+  this->CheckUnary("ends_with", "[]", boolean(), "[]", &options);
+  this->CheckUnary("ends_with", R"([null, "", "ab", "abab", "$abab", "abab$"])",
+                   boolean(), "[null, false, false, true, true, false]", &options);
+  this->CheckUnary("ends_with", R"(["ABAB", "$ABAB", "ABAB$", "$AbAb", "aBaB$"])",
+                   boolean(), "[true, true, false, true, false]", &options);
+}
+#else
+TYPED_TEST(TestStringKernels, MatchStartsWithIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("starts_with", {input}, &options));
+}
+
+TYPED_TEST(TestStringKernels, MatchEndsWithIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("ends_with", {input}, &options));
+}
+#endif
+
 #ifdef ARROW_WITH_RE2
 TYPED_TEST(TestStringKernels, MatchSubstringRegex) {
   MatchSubstringOptions options{"ab"};
@@ -528,10 +582,15 @@ TYPED_TEST(TestStringKernels, MatchLike) {
 TYPED_TEST(TestStringKernels, MatchLikeEscaping) {
   auto inputs = R"(["%%foo", "_bar", "({", "\\baz"])";
 
+  // N.B. I believe Impala mistakenly optimizes these into substring searches
   MatchSubstringOptions escape_percent{"\\%%"};
   this->CheckUnary("match_like", inputs, boolean(), "[true, false, false, false]",
                    &escape_percent);
 
+  MatchSubstringOptions not_substring{"%\\%%"};
+  this->CheckUnary("match_like", inputs, boolean(), "[true, false, false, false]",
+                   &not_substring);
+
   MatchSubstringOptions escape_underscore{"\\____"};
   this->CheckUnary("match_like", inputs, boolean(), "[false, true, false, false]",
                    &escape_underscore);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 434d4a23e9c..4aa38e1a295 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -563,52 +563,59 @@ Containment tests
 +===========================+============+====================================+====================+========================================+
 | count_substring           | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| find_substring            | Unary      | String-like                        | Int32 or Int64 (2) | :struct:`MatchSubstringOptions`        |
+| ends_with                 | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_like                | Unary      | String-like                        | Boolean (3)        | :struct:`MatchSubstringOptions`        |
+| find_substring            | Unary      | String-like                        | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_substring           | Unary      | String-like                        | Boolean (4)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_substring_regex     | Unary      | String-like                        | Boolean (5)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (6)          | :struct:`SetLookupOptions`             |
+| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (4)          | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |                    |                                        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (7)        | :struct:`SetLookupOptions`             |
+| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (5)        | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |                    |                                        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| match_like                | Unary      | String-like                        | Boolean (6)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| match_substring           | Unary      | String-like                        | Boolean (7)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| match_substring_regex     | Unary      | String-like                        | Boolean (8)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| starts_with               | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+
 
 * \(1) Output is the number of occurrences of
   :member:`MatchSubstringOptions::pattern` in the corresponding input
   string. Output type is Int32 for Binary/String, Int64
   for LargeBinary/LargeString.
 
-* \(2) Output is the index of the first occurrence of
+* \(2) Output is true iff :member:`MatchSubstringOptions::pattern`
+  is a suffix/prefix of the corresponding input.
+
+* \(3) Output is the index of the first occurrence of
   :member:`MatchSubstringOptions::pattern` in the corresponding input
   string, otherwise -1. Output type is Int32 for Binary/String, Int64
   for LargeBinary/LargeString.
 
-* \(3) Output is true iff the SQL-style LIKE pattern
+* \(4) Output is the index of the corresponding input element in
+  :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
+  output is null.
+
+* \(5) Output is true iff the corresponding input element is equal to one
+  of the elements in :member:`SetLookupOptions::value_set`.
+
+* \(6) Output is true iff the SQL-style LIKE pattern
   :member:`MatchSubstringOptions::pattern` fully matches the
   corresponding input element. That is, ``%`` will match any number of
   characters, ``_`` will match exactly one character, and any other
   character matches itself. To match a literal percent sign or
   underscore, precede the character with a backslash.
 
-* \(4) Output is true iff :member:`MatchSubstringOptions::pattern`
+* \(7) Output is true iff :member:`MatchSubstringOptions::pattern`
   is a substring of the corresponding input element.
 
-* \(5) Output is true iff :member:`MatchSubstringOptions::pattern`
+* \(8) Output is true iff :member:`MatchSubstringOptions::pattern`
   matches the corresponding input element at any position.
 
-* \(6) Output is the index of the corresponding input element in
-  :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
-  output is null.
-
-* \(7) Output is true iff the corresponding input element is equal to one
-  of the elements in :member:`SetLookupOptions::value_set`.
-
-
 String splitting
 ~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index a586f9011fd..1dbcb3073ca 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -40,7 +40,7 @@ Arithmetic Functions
 --------------------
 
 By default these functions do not detect overflow. Each function is also
-available in an overflow-checking variant, suffixed ``_checked``, which 
+available in an overflow-checking variant, suffixed ``_checked``, which
 throws an ``ArrowInvalid`` exception when overflow is detected.
 
 .. autosummary::
@@ -104,11 +104,11 @@ logic variants are provided (suffixed ``_kleene``). See User Guide for details.
 String Predicates
 -----------------
 
-In these functions an empty string emits false in the output. For ASCII 
+In these functions an empty string emits false in the output. For ASCII
 variants (prefixed ``ascii_``) a string element with non-ASCII characters
 emits false in the output.
 
-The first set of functions emit true if the input contains only 
+The first set of functions emit true if the input contains only
 characters of a given class.
 
 .. autosummary::
@@ -140,7 +140,7 @@ in the string element.
    ascii_is_title
    utf8_is_title
 
-The third set of functions examines string elements on 
+The third set of functions examines string elements on
 a byte-by-byte basis.
 
 .. autosummary::
@@ -179,12 +179,14 @@ Containment tests
    :toctree: ../generated/
 
    count_substring
+   ends_with
    find_substring
    index_in
    is_in
    match_like
    match_substring
    match_substring_regex
+   starts_with
 
 Conversions
 -----------

From 7c9c24404e349c2799f823f72b51e02d817663b0 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 8 Jun 2021 10:20:41 -0400
Subject: [PATCH 379/719] ARROW-13008: [C++] Avoid deprecated API in minimal
 example

Closes #10481 from pitrou/ARROW-13008-example-deprecation

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/examples/minimal_build/example.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/examples/minimal_build/example.cc b/cpp/examples/minimal_build/example.cc
index e1b5c123a85..2ca163155ee 100644
--- a/cpp/examples/minimal_build/example.cc
+++ b/cpp/examples/minimal_build/example.cc
@@ -38,8 +38,7 @@ Status RunMain(int argc, char** argv) {
                         arrow::io::ReadableFile::Open(csv_filename));
   ARROW_ASSIGN_OR_RAISE(
       auto csv_reader,
-      arrow::csv::TableReader::Make(arrow::default_memory_pool(),
-                                    arrow::io::default_io_context(),
+      arrow::csv::TableReader::Make(arrow::io::default_io_context(),
                                     input_file,
                                     arrow::csv::ReadOptions::Defaults(),
                                     arrow::csv::ParseOptions::Defaults(),

From 15c4f1f0315f2c56b3359c9500e0ed1710d9f32a Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 8 Jun 2021 10:08:36 -0500
Subject: [PATCH 380/719] ARROW-12901: [R] Follow on to more examples

Closes #10436 from thisisnic/ARROW-12901_examples

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/dataset.R         | 46 ++++++++++++++++++++++++++++++++-----------
 r/man/open_dataset.Rd | 46 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/r/R/dataset.R b/r/R/dataset.R
index 095c56fc891..6706b48ecc4 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -81,30 +81,52 @@
 #' @export
 #' @seealso `vignette("dataset", package = "arrow")`
 #' @include arrow-package.R
-#' @examplesIf arrow_with_dataset()
+#' @examplesIf arrow_with_dataset() & arrow_with_parquet() 
 #' # Set up directory for examples
 #' tf <- tempfile()
 #' dir.create(tf)
 #' on.exit(unlink(tf))
-#' \dontrun{
-#' write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet"))
-#' write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet"))
-#' write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet"))
+#' 
+#' data <- dplyr::group_by(mtcars, cyl)
+#' write_dataset(data, tf)
 #' 
 #' # You can specify a directory containing the files for your dataset and
 #' # open_dataset will scan all files in your directory.
 #' open_dataset(tf)
 #' 
 #' # You can also supply a vector of paths
-#' open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet")))
-#' }
+#' open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf,"cyl=8/part-2.parquet")))
+#'
 #' ## You must specify the file format if using a format other than parquet.
-#' write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv"))
-#' write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv"))
+#' tf2 <- tempfile()
+#' dir.create(tf2)
+#' on.exit(unlink(tf2))
+#' write_dataset(data, tf2, format = "ipc")
 #' # This line will results in errors when you try to work with the data
-#' \dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))}
-#' # This is the correct way to open a dataset containing CSVs
-#' open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") 
+#' \dontrun{open_dataset(tf2)}
+#' # This line will work
+#' open_dataset(tf2, format = "ipc") 
+#' 
+#' ## You can specify file partitioning to include it as a field in your dataset
+#' # Create a temporary directory and write example dataset
+#' tf3 <- tempfile()
+#' dir.create(tf3)
+#' on.exit(unlink(tf3))
+#' write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+#' 
+#' # View files - you can see the partitioning means that files have been written 
+#' # to folders based on Month/Day values
+#' list.files(tf3, recursive = TRUE)
+#' 
+#' # With no partitioning specified, dataset contains all files but doesn't include
+#' # directory names as field names
+#' open_dataset(tf3)
+#' 
+#' # Now that partitioning has been specified, your dataset contains columns for Month and Day
+#' open_dataset(tf3, partitioning = c("Month", "Day"))
+#' 
+#' # If you want to specify the data types for your fields, you can pass in a Schema
+#' open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
 open_dataset <- function(sources,
                          schema = NULL,
                          partitioning = hive_partition(),
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index adc0b56eac4..1ca3d661880 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -91,30 +91,52 @@ can accelerate queries that only touch some partitions (files). Call
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
 \examples{
-\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Set up directory for examples
 tf <- tempfile()
 dir.create(tf)
 on.exit(unlink(tf))
-\dontrun{
-write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet"))
-write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet"))
-write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet"))
+
+data <- dplyr::group_by(mtcars, cyl)
+write_dataset(data, tf)
 
 # You can specify a directory containing the files for your dataset and
 # open_dataset will scan all files in your directory.
 open_dataset(tf)
 
 # You can also supply a vector of paths
-open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet")))
-}
+open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf,"cyl=8/part-2.parquet")))
+
 ## You must specify the file format if using a format other than parquet.
-write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv"))
-write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv"))
+tf2 <- tempfile()
+dir.create(tf2)
+on.exit(unlink(tf2))
+write_dataset(data, tf2, format = "ipc")
 # This line will results in errors when you try to work with the data
-\dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))}
-# This is the correct way to open a dataset containing CSVs
-open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") 
+\dontrun{open_dataset(tf2)}
+# This line will work
+open_dataset(tf2, format = "ipc") 
+
+## You can specify file partitioning to include it as a field in your dataset
+# Create a temporary directory and write example dataset
+tf3 <- tempfile()
+dir.create(tf3)
+on.exit(unlink(tf3))
+write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+
+# View files - you can see the partitioning means that files have been written 
+# to folders based on Month/Day values
+list.files(tf3, recursive = TRUE)
+
+# With no partitioning specified, dataset contains all files but doesn't include
+# directory names as field names
+open_dataset(tf3)
+
+# Now that partitioning has been specified, your dataset contains columns for Month and Day
+open_dataset(tf3, partitioning = c("Month", "Day"))
+
+# If you want to specify the data types for your fields, you can pass in a Schema
+open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
 \dontshow{\}) # examplesIf}
 }
 \seealso{

From 882c2468a4159d27fd0bd4c7736bb540d09a568d Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 8 Jun 2021 17:18:58 +0200
Subject: [PATCH 381/719] ARROW-12993: [Python] Avoid half-initialized
 FeatherReader object

When trying to read an invalid Feather file, the `stackprinter` project
would crash when walking the stack and attempting to print detailed information
about captured local variables.

Specifically, the crash would occur when looking up the `version` attribute
on the half-initialized FeatherReader object.

Closes #10480 from pitrou/ARROW-12993-feather-uninitialized

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_feather.pyx | 6 +-----
 python/pyarrow/feather.py   | 3 +--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/_feather.pyx b/python/pyarrow/_feather.pyx
index 3939dd5e818..2f46bb98724 100644
--- a/python/pyarrow/_feather.pyx
+++ b/python/pyarrow/_feather.pyx
@@ -69,13 +69,9 @@ cdef class FeatherReader(_Weakrefable):
     cdef:
         shared_ptr[CFeatherReader] reader
 
-    def __cinit__(self):
-        pass
-
-    def open(self, source, c_bool use_memory_map=True):
+    def __cinit__(self, source, c_bool use_memory_map):
         cdef shared_ptr[CRandomAccessFile] reader
         get_reader(source, use_memory_map, &reader)
-
         with nogil:
             self.reader = GetResultValue(CFeatherReader.Open(reader))
 
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index b184e2bf0a0..1e6875ac08e 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -236,8 +236,7 @@ def read_table(source, columns=None, memory_map=True):
     -------
     table : pyarrow.Table
     """
-    reader = _feather.FeatherReader()
-    reader.open(source, use_memory_map=memory_map)
+    reader = _feather.FeatherReader(source, use_memory_map=memory_map)
 
     if columns is None:
         return reader.read()

From 47da302121b59286aa327969e152f6c7c2427741 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 8 Jun 2021 17:19:31 +0200
Subject: [PATCH 382/719] ARROW-12779: [Python][FlightRPC] Guard against DoGet
 handler that never sends data

While rather unrealistic, an RPC handler that just yielded a stream of empty tables would segfault the server, since the bindings would recursively pull from the stream looking for a non-empty table. This changes the pull into a loop, and limits how much it'll pull before giving up.

Closes #10323 from lidavidm/arrow-12779

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_flight.pyx          | 135 +++++++++++++++-------------
 python/pyarrow/tests/test_flight.py |  33 +++++++
 2 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index a84166ce866..618291c1a32 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -1676,70 +1676,83 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *:
         raise RuntimeError("self object in callback is not GeneratorStream")
     stream = <GeneratorStream> py_stream
 
-    if stream.current_stream != nullptr:
-        check_flight_status(stream.current_stream.get().Next(payload))
-        # If the stream ended, see if there's another stream from the
-        # generator
-        if payload.ipc_message.metadata != nullptr:
+    # The generator is allowed to yield a reader or table which we
+    # yield from; if that sub-generator is empty, we need to reset and
+    # try again. However, limit the number of attempts so that we
+    # don't just spin forever.
+    max_attempts = 128
+    for _ in range(max_attempts):
+        if stream.current_stream != nullptr:
+            check_flight_status(stream.current_stream.get().Next(payload))
+            # If the stream ended, see if there's another stream from the
+            # generator
+            if payload.ipc_message.metadata != nullptr:
+                return CStatus_OK()
+            stream.current_stream.reset(nullptr)
+
+        try:
+            result = next(stream.generator)
+        except StopIteration:
+            payload.ipc_message.metadata.reset(<CBuffer*> nullptr)
             return CStatus_OK()
-        stream.current_stream.reset(nullptr)
+        except FlightError as flight_error:
+            return (<FlightError> flight_error).to_status()
 
-    try:
-        result = next(stream.generator)
-    except StopIteration:
-        payload.ipc_message.metadata.reset(<CBuffer*> nullptr)
+        if isinstance(result, (list, tuple)):
+            result, metadata = result
+        else:
+            result, metadata = result, None
+
+        if isinstance(result, (Table, RecordBatchReader)):
+            if metadata:
+                raise ValueError("Can only return metadata alongside a "
+                                 "RecordBatch.")
+            result = RecordBatchStream(result)
+
+        stream_schema = pyarrow_wrap_schema(stream.schema)
+        if isinstance(result, FlightDataStream):
+            if metadata:
+                raise ValueError("Can only return metadata alongside a "
+                                 "RecordBatch.")
+            data_stream = unique_ptr[CFlightDataStream](
+                (<FlightDataStream> result).to_stream())
+            substream_schema = pyarrow_wrap_schema(data_stream.get().schema())
+            if substream_schema != stream_schema:
+                raise ValueError("Got a FlightDataStream whose schema "
+                                 "does not match the declared schema of this "
+                                 "GeneratorStream. "
+                                 "Got: {}\nExpected: {}".format(
+                                     substream_schema, stream_schema))
+            stream.current_stream.reset(
+                new CPyFlightDataStream(result, move(data_stream)))
+            # Loop around and try again
+            continue
+        elif isinstance(result, RecordBatch):
+            batch = <RecordBatch> result
+            if batch.schema != stream_schema:
+                raise ValueError("Got a RecordBatch whose schema does not "
+                                 "match the declared schema of this "
+                                 "GeneratorStream. "
+                                 "Got: {}\nExpected: {}".format(batch.schema,
+                                                                stream_schema))
+            check_flight_status(GetRecordBatchPayload(
+                deref(batch.batch),
+                stream.c_options,
+                &payload.ipc_message))
+            if metadata:
+                payload.app_metadata = pyarrow_unwrap_buffer(
+                    as_buffer(metadata))
+        else:
+            raise TypeError("GeneratorStream must be initialized with "
+                            "an iterator of FlightDataStream, Table, "
+                            "RecordBatch, or RecordBatchStreamReader objects, "
+                            "not {}.".format(type(result)))
+        # Don't loop around
         return CStatus_OK()
-    except FlightError as flight_error:
-        return (<FlightError> flight_error).to_status()
-
-    if isinstance(result, (list, tuple)):
-        result, metadata = result
-    else:
-        result, metadata = result, None
-
-    if isinstance(result, (Table, RecordBatchReader)):
-        if metadata:
-            raise ValueError("Can only return metadata alongside a "
-                             "RecordBatch.")
-        result = RecordBatchStream(result)
-
-    stream_schema = pyarrow_wrap_schema(stream.schema)
-    if isinstance(result, FlightDataStream):
-        if metadata:
-            raise ValueError("Can only return metadata alongside a "
-                             "RecordBatch.")
-        data_stream = unique_ptr[CFlightDataStream](
-            (<FlightDataStream> result).to_stream())
-        substream_schema = pyarrow_wrap_schema(data_stream.get().schema())
-        if substream_schema != stream_schema:
-            raise ValueError("Got a FlightDataStream whose schema does not "
-                             "match the declared schema of this "
-                             "GeneratorStream. "
-                             "Got: {}\nExpected: {}".format(substream_schema,
-                                                            stream_schema))
-        stream.current_stream.reset(
-            new CPyFlightDataStream(result, move(data_stream)))
-        return _data_stream_next(self, payload)
-    elif isinstance(result, RecordBatch):
-        batch = <RecordBatch> result
-        if batch.schema != stream_schema:
-            raise ValueError("Got a RecordBatch whose schema does not "
-                             "match the declared schema of this "
-                             "GeneratorStream. "
-                             "Got: {}\nExpected: {}".format(batch.schema,
-                                                            stream_schema))
-        check_flight_status(GetRecordBatchPayload(
-            deref(batch.batch),
-            stream.c_options,
-            &payload.ipc_message))
-        if metadata:
-            payload.app_metadata = pyarrow_unwrap_buffer(as_buffer(metadata))
-    else:
-        raise TypeError("GeneratorStream must be initialized with "
-                        "an iterator of FlightDataStream, Table, "
-                        "RecordBatch, or RecordBatchStreamReader objects, "
-                        "not {}.".format(type(result)))
-    return CStatus_OK()
+    # Ran out of attempts (the RPC handler kept yielding empty tables/readers)
+    raise RuntimeError("While getting next payload, ran out of attempts to "
+                       "get something to send "
+                       "(application server implementation error)")
 
 
 cdef CStatus _list_flights(void* self, const CServerCallContext& context,
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 1ab01f735e9..36b6d6610d2 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -306,6 +306,25 @@ def do_get(self, context, ticket):
         return flight.GeneratorStream(self.schema, [table1, table2])
 
 
+class NeverSendsDataFlightServer(FlightServerBase):
+    """A Flight server that never actually yields data."""
+
+    schema = pa.schema([('a', pa.int32())])
+
+    def do_get(self, context, ticket):
+        if ticket.ticket == b'yield_data':
+            # Check that the server handler will ignore empty tables
+            # up to a certain extent
+            data = [
+                self.schema.empty_table(),
+                self.schema.empty_table(),
+                pa.RecordBatch.from_arrays([range(5)], schema=self.schema),
+            ]
+            return flight.GeneratorStream(self.schema, data)
+        return flight.GeneratorStream(
+            self.schema, itertools.repeat(self.schema.empty_table()))
+
+
 class SlowFlightServer(FlightServerBase):
     """A Flight server that delays its responses to test timeouts."""
 
@@ -1870,3 +1889,17 @@ def test(read_all):
         descriptor = flight.FlightDescriptor.for_command(b"echo")
         writer, reader = client.do_exchange(descriptor)
         test(reader.read_all)
+
+
+def test_never_sends_data():
+    # Regression test for ARROW-12779
+    match = "application server implementation error"
+    with NeverSendsDataFlightServer() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(flight.FlightServerError, match=match):
+            client.do_get(flight.Ticket(b'')).read_all()
+
+        # Check that the server handler will ignore empty tables
+        # up to a certain extent
+        table = client.do_get(flight.Ticket(b'yield_data')).read_all()
+        assert table.num_rows == 5

From 0e227c9bfdcd761a0c744cf13bd65555ad515fc1 Mon Sep 17 00:00:00 2001
From: crystrix <chenxi.li@live.com>
Date: Tue, 8 Jun 2021 12:02:37 -0400
Subject: [PATCH 383/719] ARROW-12942: [C++][Compute] Fix incorrect result of
 Arrow compute hash_min_max with a chunked array

If there are new groups in the subsequent chunks of a chunked array, the result of Arrow compute hash_min_max is incorrect.
For example, a table with two chunks, the second chunk has a new group key
```
First chunk: {"argument": 1, "key": 0},
Second chunk: {"argument": 0,  "key": 1}
```
the result of hash_min_max by "key" with such data is
```
[{"min": null, "max": null}, 0],
[{"min": 0, "max": 0}, 1]
```
But it should be
```
[{"min": 1, "max": 1}, 0],
[{"min": 0, "max": 0}, 1]
```

The root cause is that `has_values_` and `has_nulls_` are `BufferBuilder` which has no `_size` and `capacity_` property.  So `MakeResizeImpl` function init a `TypedBufferBuilder` with the `BufferBuilder` with `_size` and  `capacity_` of 0. After the first chunk is processed, in the consumption of the second chunk,  `MakeResizeImpl` is called to reserve enough space for the next chunk. Then as the `_size` and  `capacity_` are zero, the original `BufferBuilder` is overwritten by `Reserve`, and outputs an incorrect result.

This MR separates `has_values_` and `has_nulls_` with a `TypedBufferBuilder<bool>` which can keep the `_size` and `capacity_` property. Then in the consumption of the second chunk, the space of `has_values_` and `has_nulls_` is reserved after the data of the first chunk.

Closes #10443 from Crystrix/arrow-12942

Authored-by: crystrix <chenxi.li@live.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .../arrow/compute/kernels/hash_aggregate.cc   | 14 ++++----
 .../compute/kernels/hash_aggregate_test.cc    | 32 +++++++++++++++++++
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 586a7087dc3..5f6503f8c24 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1000,8 +1000,8 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
 
     mins_ = BufferBuilder(ctx->memory_pool());
     maxes_ = BufferBuilder(ctx->memory_pool());
-    has_values_ = BufferBuilder(ctx->memory_pool());
-    has_nulls_ = BufferBuilder(ctx->memory_pool());
+    has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool());
 
     GetImpl get_impl;
     RETURN_NOT_OK(VisitTypeInline(*input_type, &get_impl));
@@ -1009,7 +1009,6 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
     consume_impl_ = std::move(get_impl.consume_impl);
     resize_min_impl_ = std::move(get_impl.resize_min_impl);
     resize_max_impl_ = std::move(get_impl.resize_max_impl);
-    resize_bitmap_impl_ = MakeResizeImpl(false);
 
     return Status::OK();
   }
@@ -1019,8 +1018,8 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
       num_groups_ += added_groups;
       RETURN_NOT_OK(resize_min_impl_(&mins_, added_groups));
       RETURN_NOT_OK(resize_max_impl_(&maxes_, added_groups));
-      RETURN_NOT_OK(resize_bitmap_impl_(&has_values_, added_groups));
-      RETURN_NOT_OK(resize_bitmap_impl_(&has_nulls_, added_groups));
+      RETURN_NOT_OK(has_values_.Append(added_groups, false));
+      RETURN_NOT_OK(has_nulls_.Append(added_groups, false));
       return Status::OK();
     }));
 
@@ -1056,10 +1055,11 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
   }
 
   int64_t num_groups_;
-  BufferBuilder mins_, maxes_, has_values_, has_nulls_;
+  BufferBuilder mins_, maxes_;
+  TypedBufferBuilder<bool> has_values_, has_nulls_;
   std::shared_ptr<DataType> type_;
   ConsumeImpl consume_impl_;
-  ResizeImpl resize_min_impl_, resize_max_impl_, resize_bitmap_impl_;
+  ResizeImpl resize_min_impl_, resize_max_impl_;
   ScalarAggregateOptions options_;
 };
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 86ed04e5ad3..5e4f8c5f0e6 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -812,5 +812,37 @@ TEST(GroupBy, WithChunkedArray) {
                     aggregated_and_grouped,
                     /*verbose=*/true);
 }
+
+TEST(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
+  auto table = TableFromJSON(
+      schema({field("argument", int64()), field("key", int64())}),
+      {R"([{"argument": 1, "key": 0}])", R"([{"argument": 0,   "key": 1}])"});
+  ScalarAggregateOptions count_options;
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               table->GetColumnByName("argument"),
+                           },
+                           {
+                               table->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_min_max", nullptr},
+                           }));
+
+  AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("hash_min_max", struct_({
+                                                                field("min", int64()),
+                                                                field("max", int64()),
+                                                            })),
+                                      field("key_0", int64()),
+                                  }),
+                                  R"([
+    [{"min": 1, "max": 1}, 0],
+    [{"min": 0, "max": 0}, 1]
+  ])"),
+                    aggregated_and_grouped,
+                    /*verbose=*/true);
+}
 }  // namespace compute
 }  // namespace arrow

From 39dcb43dd26df51391f0da14b6e6285d612f3829 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 9 Jun 2021 11:33:11 +0900
Subject: [PATCH 384/719] ARROW-12991: [CI] Migrate Travis-CI ARM job to
 "arm64-graviton2" arch

Closes #10478 from pitrou/ARROW-12991-arm64-graviton2

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .travis.yml | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fd87cf352e9..861cc77f402 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-dist: bionic
+dist: focal
 
 language: minimal
 
@@ -43,7 +43,13 @@ jobs:
   include:
     - name: "C++ on ARM"
       os: linux
-      arch: arm64
+      arch: arm64-graviton2
+      # This is required for arm64-graviton2.
+      # https://docs.travis-ci.com/user/multi-cpu-architectures/#example-multi-architecture-build-matrix
+      group: edge
+      # This is required for arm64-graviton2.
+      # https://docs.travis-ci.com/user/multi-cpu-architectures/#testing-on-multiple-cpu-architectures
+      virt: vm
       env:
         <<: *global_env
         ARCH: arm64v8
@@ -51,22 +57,12 @@ jobs:
         DOCKER_IMAGE_ID: ubuntu-cpp
         # ARROW_USE_GLOG=OFF is needed to avoid build error caused by
         # glog and CMAKE_UNITY_BUILD=ON.
-        #
-        # Disable ARROW_S3 because it often causes "No output has
-        # been received in the last 10m0s, this potentially indicates
-        # a stalled build or something wrong with the build itself."
-        # on Travis CI.
-        #
-        # Limiting CPP_MAKE_PARALLELISM is required to avoid random compiler
-        # crashes.
         DOCKER_RUN_ARGS: >-
           "
           -e ARROW_BUILD_STATIC=OFF
           -e ARROW_ORC=OFF
-          -e ARROW_S3=OFF
           -e ARROW_USE_GLOG=OFF
           -e CMAKE_UNITY_BUILD=ON
-          -e CPP_MAKE_PARALLELISM=4
           "
         # The LLVM's APT repository doesn't provide arm64 binaries.
         # We should use LLVM provided by Ubuntu.
@@ -139,14 +135,11 @@ before_install:
     fi
 
 install:
-  - pip3 install -e dev/archery[docker]
+  - sudo -H pip3 install --upgrade pip
+  - sudo -H pip3 install docker-compose
+  - sudo -H pip3 install -e dev/archery[docker]
 
 script:
-  - sudo sysctl -w kernel.core_pattern="core.%e.%p"
-  # This isn't allowed on Travis CI:
-  #   /home/travis/.travis/functions: line 109: ulimit: core file size: cannot modify limit: Operation not permitted
-  - |
-    ulimit -c unlimited || :
   - |
     archery docker run \
       ${DOCKER_RUN_ARGS} \

From 00cc41fae3618661a48bef08f6dcec2fec462b00 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 9 Jun 2021 19:28:36 +0200
Subject: [PATCH 385/719] ARROW-11759: [C++] Kernel to extract datetime
 components (year, month, day, etc) from timestamp type

This is to resolve [ARROW-11759](https://issues.apache.org/jira/browse/ARROW-11759).

Closes #10176 from rok/ARROW-11759

Lead-authored-by: Rok <rok@mihevc.org>
Co-authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/CMakeLists.txt                  |   1 +
 cpp/src/arrow/compute/api_scalar.cc           |  20 +
 cpp/src/arrow/compute/api_scalar.h            | 183 +++++
 cpp/src/arrow/compute/kernels/CMakeLists.txt  |   1 +
 .../arrow/compute/kernels/scalar_temporal.cc  | 631 ++++++++++++++++++
 .../compute/kernels/scalar_temporal_test.cc   | 181 +++++
 cpp/src/arrow/compute/registry.cc             |   1 +
 cpp/src/arrow/compute/registry_internal.h     |   1 +
 docs/source/cpp/compute.rst                   |  52 ++
 9 files changed, 1071 insertions(+)
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_temporal.cc
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_temporal_test.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index f6d5a540c98..8e411898a34 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -397,6 +397,7 @@ if(ARROW_COMPUTE)
               compute/kernels/scalar_nested.cc
               compute/kernels/scalar_set_lookup.cc
               compute/kernels/scalar_string.cc
+              compute/kernels/scalar_temporal.cc
               compute/kernels/scalar_validity.cc
               compute/kernels/scalar_fill_null.cc
               compute/kernels/scalar_if_else.cc
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 6f77d6f9785..dba71456c29 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -172,5 +172,25 @@ Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa
   return CallFunction("if_else", {cond, if_true, if_false}, ctx);
 }
 
+// ----------------------------------------------------------------------
+// Temporal functions
+
+SCALAR_EAGER_UNARY(Year, "year")
+SCALAR_EAGER_UNARY(Month, "month")
+SCALAR_EAGER_UNARY(Day, "day")
+SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week")
+SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
+SCALAR_EAGER_UNARY(ISOYear, "iso_year")
+SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
+SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar")
+SCALAR_EAGER_UNARY(Quarter, "quarter")
+SCALAR_EAGER_UNARY(Hour, "hour")
+SCALAR_EAGER_UNARY(Minute, "minute")
+SCALAR_EAGER_UNARY(Second, "second")
+SCALAR_EAGER_UNARY(Millisecond, "millisecond")
+SCALAR_EAGER_UNARY(Microsecond, "microsecond")
+SCALAR_EAGER_UNARY(Nanosecond, "nanosecond")
+SCALAR_EAGER_UNARY(Subsecond, "subsecond")
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 21d5c5324d4..190696f6ed5 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -521,5 +521,188 @@ ARROW_EXPORT
 Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right,
                      ExecContext* ctx = NULLPTR);
 
+/// \brief Year returns year for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month for each element of `values`.
+/// Month is encoded as January=1, December=12
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day number for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief DayOfWeek returns number of the day of the week value for each element of
+/// `values`. Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.
+///
+/// \param[in] values input to extract number of the day of the week from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief DayOfYear returns number of day of the year for each element of `values`.
+/// January 1st maps to day number 1, February 1st to 32, etc.
+///
+/// \param[in] values input to extract number of day of the year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOYear returns ISO year number for each element of `values`.
+/// First week of an ISO year has the majority (4 or more) of its days in January.
+///
+/// \param[in] values input to extract ISO year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ISOYear(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOWeek returns ISO week of year number for each element of `values`.
+/// First ISO week has the majority (4 or more) of its days in January.
+/// Week of the year starts with 1 and can run up to 53.
+///
+/// \param[in] values input to extract ISO week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> ISOWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for
+/// each element of `values`.
+/// ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.
+///
+/// \param[in] values input to ISO calendar struct from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> ISOCalendar(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Quarter returns the quarter of year number for each element of `values`
+/// First quarter maps to 1 and fourth quarter maps to 4.
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns number of milliseconds since the last full second
+/// for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns number of microseconds since the last full millisecond
+/// for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns number of nanoseconds since the last full millisecond
+/// for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Subsecond returns the fraction of second elapsed since last full second
+/// as a float for each element of `values`
+///
+/// \param[in] values input to extract subsecond from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Subsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index fc11d144105..326578588a7 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -27,6 +27,7 @@ add_arrow_compute_test(scalar_test
                        scalar_nested_test.cc
                        scalar_set_lookup_test.cc
                        scalar_string_test.cc
+                       scalar_temporal_test.cc
                        scalar_validity_test.cc
                        scalar_fill_null_test.cc
                        scalar_if_else_test.cc
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
new file mode 100644
index 00000000000..cc22ccf044a
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -0,0 +1,631 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/builder.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+namespace compute {
+
+namespace internal {
+
+namespace {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weekday;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+using internal::applicator::ScalarUnaryNotNull;
+using internal::applicator::SimpleUnary;
+
+// Based on ScalarUnaryNotNullStateful. Adds timezone awareness.
+template <typename Op, typename OutType>
+struct ScalarUnaryStatefulTemporal {
+  using ThisType = ScalarUnaryStatefulTemporal<Op, OutType>;
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  Op op;
+  explicit ScalarUnaryStatefulTemporal(Op op) : op(std::move(op)) {}
+
+  template <typename Type>
+  struct ArrayExec {
+    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
+                       Datum* out) {
+      const std::string timezone =
+          checked_pointer_cast<const TimestampType>(arg0.type)->timezone();
+      Status st = Status::OK();
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OutValue>(1);
+
+      if (timezone.empty()) {
+        internal::VisitArrayValuesInline<Int64Type>(
+            arg0,
+            [&](int64_t v) {
+              *out_data++ = functor.op.template Call<OutValue>(ctx, v, &st);
+            },
+            [&]() {
+              // null
+              ++out_data;
+            });
+      } else {
+        st = Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
+                             timezone);
+      }
+      return st;
+    }
+  };
+
+  Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
+    const std::string timezone =
+        checked_pointer_cast<const TimestampType>(arg0.type)->timezone();
+    Status st = Status::OK();
+    if (timezone.empty()) {
+      if (arg0.is_valid) {
+        int64_t arg0_val = internal::UnboxScalar<Int64Type>::Unbox(arg0);
+        internal::BoxScalar<OutType>::Box(
+            this->op.template Call<OutValue>(ctx, arg0_val, &st), out->scalar().get());
+      }
+    } else {
+      st = Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
+                           timezone);
+    }
+    return st;
+  }
+
+  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::ARRAY) {
+      return ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
+    } else {
+      return Scalar(ctx, *batch[0].scalar(), out);
+    }
+  }
+};
+
+template <typename Op, typename OutType>
+struct ScalarUnaryTemporal {
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // Seed kernel with dummy state
+    ScalarUnaryStatefulTemporal<Op, OutType> kernel({});
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract year from timestamp
+
+template <typename Duration>
+struct Year {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    return static_cast<T>(static_cast<const int32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract month from timestamp
+
+template <typename Duration>
+struct Month {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day from timestamp
+
+template <typename Duration>
+struct Day {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day of week from timestamp
+
+template <typename Duration>
+struct DayOfWeek {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    return static_cast<T>(
+        weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
+            .iso_encoding() -
+        1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day of year from timestamp
+
+template <typename Duration>
+struct DayOfYear {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    return static_cast<T>(
+        (t - sys_time<days>(year_month_day(t).year() / jan / 0)).count());
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract ISO Year values from timestamp
+//
+// First week of an ISO year has the majority (4 or more) of it's days in January.
+// Last week of an ISO year has the year's last Thursday in it.
+
+template <typename Duration>
+struct ISOYear {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    auto y = year_month_day{t + days{3}}.year();
+    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (t < start) {
+      --y;
+    }
+    return static_cast<T>(static_cast<int32_t>(y));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract ISO week from timestamp
+//
+// First week of an ISO year has the majority (4 or more) of it's days in January.
+// Last week of an ISO year has the year's last Thursday in it.
+// Based on
+// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
+template <typename Duration>
+struct ISOWeek {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+    auto y = year_month_day{t + days{3}}.year();
+    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (t < start) {
+      --y;
+      start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    }
+    return static_cast<T>(trunc<weeks>(t - start).count() + 1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract quarter from timestamp
+
+template <typename Duration>
+struct Quarter {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    const auto ymd = year_month_day(floor<days>(sys_time<Duration>(Duration{arg})));
+    return static_cast<T>((static_cast<const uint32_t>(ymd.month()) - 1) / 3 + 1);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract hour from timestamp
+
+template <typename Duration>
+struct Hour {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<days>(t)) / std::chrono::hours(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract minute from timestamp
+
+template <typename Duration>
+struct Minute {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<std::chrono::hours>(t)) / std::chrono::minutes(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract second from timestamp
+
+template <typename Duration>
+struct Second {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>((t - floor<std::chrono::minutes>(t)) / std::chrono::seconds(1));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract subsecond from timestamp
+
+template <typename Duration>
+struct Subsecond {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        (std::chrono::duration<double>(t - floor<std::chrono::seconds>(t)).count()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract milliseconds from timestamp
+
+template <typename Duration>
+struct Millisecond {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::milliseconds(1)) % 1000);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract microseconds from timestamp
+
+template <typename Duration>
+struct Microsecond {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::microseconds(1)) % 1000);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract nanoseconds from timestamp
+
+template <typename Duration>
+struct Nanosecond {
+  template <typename T>
+  static T Call(KernelContext*, int64_t arg, Status*) {
+    Duration t = Duration{arg};
+    return static_cast<T>(
+        ((t - floor<std::chrono::seconds>(t)) / std::chrono::nanoseconds(1)) % 1000);
+  }
+};
+
+template <typename Duration>
+inline std::vector<int64_t> get_iso_calendar(int64_t arg) {
+  const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+  const auto ymd = year_month_day(t);
+  auto y = year_month_day{t + days{3}}.year();
+  auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+  if (t < start) {
+    --y;
+    start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+  }
+  return {static_cast<int64_t>(static_cast<int32_t>(y)),
+          static_cast<int64_t>(trunc<weeks>(t - start).count() + 1),
+          static_cast<int64_t>(weekday(ymd).iso_encoding())};
+}
+
+// ----------------------------------------------------------------------
+// Extract ISO calendar values from timestamp
+
+template <typename Duration>
+struct ISOCalendar {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const std::string timezone =
+        checked_pointer_cast<const TimestampType>(in.type)->timezone();
+    if (!timezone.empty()) {
+      return Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
+                             timezone);
+    }
+
+    if (in.is_valid) {
+      const std::shared_ptr<DataType> iso_calendar_type =
+          struct_({field("iso_year", int64()), field("iso_week", int64()),
+                   field("iso_day_of_week", int64())});
+      const auto& in_val = internal::UnboxScalar<const TimestampType>::Unbox(in);
+      const auto iso_calendar = get_iso_calendar<Duration>(in_val);
+
+      std::vector<std::shared_ptr<Scalar>> values = {
+          std::make_shared<Int64Scalar>(iso_calendar[0]),
+          std::make_shared<Int64Scalar>(iso_calendar[1]),
+          std::make_shared<Int64Scalar>(iso_calendar[2])};
+      *checked_cast<StructScalar*>(out) = StructScalar(values, iso_calendar_type);
+    } else {
+      out->is_valid = false;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
+    const std::string timezone =
+        checked_pointer_cast<const TimestampType>(in.type)->timezone();
+    if (!timezone.empty()) {
+      return Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
+                             timezone);
+    }
+    const std::shared_ptr<DataType> iso_calendar_type =
+        struct_({field("iso_year", int64()), field("iso_week", int64()),
+                 field("iso_day_of_week", int64())});
+
+    std::unique_ptr<ArrayBuilder> array_builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), iso_calendar_type, &array_builder));
+    StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
+    RETURN_NOT_OK(struct_builder->Reserve(in.length));
+
+    std::vector<BuilderType*> field_builders;
+    field_builders.reserve(3);
+    for (int i = 0; i < 3; i++) {
+      field_builders.push_back(
+          checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+      RETURN_NOT_OK(field_builders[i]->Reserve(1));
+    }
+    auto visit_null = [&]() { return struct_builder->AppendNull(); };
+    auto visit_value = [&](int64_t arg) {
+      const auto iso_calendar = get_iso_calendar<Duration>(arg);
+      field_builders[0]->UnsafeAppend(iso_calendar[0]);
+      field_builders[1]->UnsafeAppend(iso_calendar[1]);
+      field_builders[2]->UnsafeAppend(iso_calendar[2]);
+      return struct_builder->Append();
+    };
+    RETURN_NOT_OK(VisitArrayDataInline<Int64Type>(in, visit_value, visit_null));
+
+    std::shared_ptr<Array> out_array;
+    RETURN_NOT_OK(struct_builder->Finish(&out_array));
+    *out = *std::move(out_array->data());
+
+    return Status::OK();
+  }
+};
+
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc* doc) {
+  const auto& out_type = TypeTraits<OutType>::type_singleton();
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = ScalarUnaryTemporal<Op<std::chrono::seconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = ScalarUnaryTemporal<Op<std::chrono::milliseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = ScalarUnaryTemporal<Op<std::chrono::microseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = ScalarUnaryTemporal<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+template <template <typename...> class Op>
+std::shared_ptr<ScalarFunction> MakeStructTemporal(std::string name,
+                                                   const FunctionDoc* doc) {
+  const auto& out_type = struct_({field("iso_year", int64()), field("iso_week", int64()),
+                                  field("iso_day_of_week", int64())});
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = SimpleUnary<Op<std::chrono::seconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = SimpleUnary<Op<std::chrono::milliseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = SimpleUnary<Op<std::chrono::microseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = SimpleUnary<Op<std::chrono::nanoseconds>>;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
+const FunctionDoc year_doc{
+    "Extract year from timestamp",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc month_doc{
+    "Extract month number",
+    ("Month is encoded as January=1, December=12.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc day_doc{
+    "Extract day number",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc day_of_week_doc{
+    "Extract day of the week number",
+    ("Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc day_of_year_doc{
+    "Extract number of day of year",
+    ("January 1st maps to day number 1, February 1st to 32, etc.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_year_doc{
+    "Extract ISO year number",
+    ("First week of an ISO year has the majority (4 or more) of its days in January."
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_week_doc{
+    "Extract ISO week of year number",
+    ("First ISO week has the majority (4 or more) of its days in January.\n"
+     "Week of the year starts with 1 and can run up to 53.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc iso_calendar_doc{
+    "Extract (ISO year, ISO week, ISO day of week) struct",
+    ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc quarter_doc{
+    "Extract quarter of year number",
+    ("First quarter maps to 1 and forth quarter maps to 4.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc hour_doc{
+    "Extract hour value",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc minute_doc{
+    "Extract minute values",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc second_doc{
+    "Extract second values",
+    "Returns an error if timestamp has a defined timezone. Null values return null.",
+    {"values"}};
+
+const FunctionDoc millisecond_doc{
+    "Extract millisecond values",
+    ("Millisecond returns number of milliseconds since the last full second.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc microsecond_doc{
+    "Extract microsecond values",
+    ("Millisecond returns number of microseconds since the last full millisecond.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc nanosecond_doc{
+    "Extract nanosecond values",
+    ("Nanosecond returns number of nanoseconds since the last full microsecond.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+const FunctionDoc subsecond_doc{
+    "Extract subsecond values",
+    ("Subsecond returns the fraction of a second since the last full second.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return null."),
+    {"values"}};
+
+}  // namespace
+
+void RegisterScalarTemporal(FunctionRegistry* registry) {
+  auto year = MakeTemporal<Year, Int64Type>("year", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(year)));
+
+  auto month = MakeTemporal<Month, Int64Type>("month", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(month)));
+
+  auto day = MakeTemporal<Day, Int64Type>("day", &year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day)));
+
+  auto day_of_week = MakeTemporal<DayOfWeek, Int64Type>("day_of_week", &day_of_week_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day_of_week)));
+
+  auto day_of_year = MakeTemporal<DayOfYear, Int64Type>("day_of_year", &day_of_year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(day_of_year)));
+
+  auto iso_year = MakeTemporal<ISOYear, Int64Type>("iso_year", &iso_year_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_year)));
+
+  auto iso_week = MakeTemporal<ISOWeek, Int64Type>("iso_week", &iso_week_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_week)));
+
+  auto iso_calendar = MakeStructTemporal<ISOCalendar>("iso_calendar", &iso_calendar_doc);
+  DCHECK_OK(registry->AddFunction(std::move(iso_calendar)));
+
+  auto quarter = MakeTemporal<Quarter, Int64Type>("quarter", &quarter_doc);
+  DCHECK_OK(registry->AddFunction(std::move(quarter)));
+
+  auto hour = MakeTemporal<Hour, Int64Type>("hour", &hour_doc);
+  DCHECK_OK(registry->AddFunction(std::move(hour)));
+
+  auto minute = MakeTemporal<Minute, Int64Type>("minute", &minute_doc);
+  DCHECK_OK(registry->AddFunction(std::move(minute)));
+
+  auto second = MakeTemporal<Second, DoubleType>("second", &second_doc);
+  DCHECK_OK(registry->AddFunction(std::move(second)));
+
+  auto millisecond =
+      MakeTemporal<Millisecond, Int64Type>("millisecond", &millisecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(millisecond)));
+
+  auto microsecond =
+      MakeTemporal<Microsecond, Int64Type>("microsecond", &microsecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(microsecond)));
+
+  auto nanosecond = MakeTemporal<Nanosecond, Int64Type>("nanosecond", &nanosecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(nanosecond)));
+
+  auto subsecond = MakeTemporal<Subsecond, DoubleType>("subsecond", &subsecond_doc);
+  DCHECK_OK(registry->AddFunction(std::move(subsecond)));
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
new file mode 100644
index 00000000000..be1054b3705
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  const char* times =
+      R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
+          "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
+          null, "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+          "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+          "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+          "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+          "2008-12-28", "2008-12-29", "2012-01-01 01:02:03"])";
+  auto unit = timestamp(TimeUnit::NANO);
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+
+  auto year =
+      "[1970, 2000, 1899, 2033, null, 2020, 2019, 2019, 2009, 2010, 2010, 2010, 2006, "
+      "2005, 2008, 2008, 2012]";
+  auto month = "[1, 2, 1, 5, null, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1]";
+  auto day = "[1, 29, 1, 18, null, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1]";
+  auto day_of_week = "[3, 1, 6, 2, null, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6]";
+  auto day_of_year =
+      "[1, 60, 1, 138, null, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1]";
+  auto iso_year =
+      "[1970, 2000, 1898, 2033, null, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, "
+      "2005, 2008, 2009, 2011]";
+  auto iso_week = "[1, 9, 52, 20, null, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
+                        {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
+                        {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
+                        {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3},
+                        null,
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 3},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 2},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 1},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 4},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 5},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 7},
+                        {"iso_year": 2010, "iso_week": 1, "iso_day_of_week": 1},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 7},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
+                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 7},
+                        {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1},
+                        {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}])");
+  auto quarter = "[1, 1, 1, 2, null, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1]";
+  auto hour = "[0, 23, 0, 3, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 1]";
+  auto minute = "[0, 23, 59, 33, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 2]";
+  auto second = "[59, 23, 20, 20, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 3]";
+  auto millisecond = "[123, 999, 1, 0, null, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0]";
+  auto microsecond = "[456, 999, 1, 0, null, 0, 0, 0, 132, 321, 163, 0, 0, 0, 0, 0, 0]";
+  auto nanosecond = "[789, 999, 1, 0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]";
+  auto subsecond =
+      "[0.123456789, 0.999999999, 0.001001001, 0, null, 0.001, 0.002, 0.003, 0.004132, "
+      "0.005321, 0.006163, 0, 0, 0, 0, 0, 0]";
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, float64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+  const char* times =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+            "1899-01-01T00:59:20","2033-05-18T03:33:20", null])";
+  auto year = "[1970, 2000, 1899, 2033, null]";
+  auto month = "[1, 2, 1, 5, null]";
+  auto day = "[1, 29, 1, 18, null]";
+  auto day_of_week = "[3, 1, 6, 2, null]";
+  auto day_of_year = "[1, 60, 1, 138, null]";
+  auto iso_year = "[1970, 2000, 1898, 2033, null]";
+  auto iso_week = "[1, 9, 52, 20, null]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
+                          {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
+                          {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3}, null])");
+  auto quarter = "[1, 1, 1, 2, null]";
+  auto hour = "[0, 23, 0, 3, null]";
+  auto minute = "[0, 23, 59, 33, null]";
+  auto second = "[59, 23, 20, 20, null]";
+  auto zeros = "[0, 0, 0, 0, null]";
+
+  for (auto u : internal::AllTimeUnits()) {
+    auto unit = timestamp(u);
+    CheckScalarUnary("year", unit, times, int64(), year);
+    CheckScalarUnary("month", unit, times, int64(), month);
+    CheckScalarUnary("day", unit, times, int64(), day);
+    CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+    CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+    CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+    CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+    CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+    CheckScalarUnary("quarter", unit, times, int64(), quarter);
+    CheckScalarUnary("hour", unit, times, int64(), hour);
+    CheckScalarUnary("minute", unit, times, int64(), minute);
+    CheckScalarUnary("second", unit, times, float64(), second);
+    CheckScalarUnary("millisecond", unit, times, int64(), zeros);
+    CheckScalarUnary("microsecond", unit, times, int64(), zeros);
+    CheckScalarUnary("nanosecond", unit, times, int64(), zeros);
+    CheckScalarUnary("subsecond", unit, times, float64(), zeros);
+  }
+}
+
+TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
+  std::string timezone = "Asia/Kolkata";
+  const char* times = R"(["1970-01-01T00:00:59", null])";
+
+  for (auto u : internal::AllTimeUnits()) {
+    auto unit = timestamp(u, timezone);
+    auto timestamps = ArrayFromJSON(unit, times);
+
+    ASSERT_RAISES(Invalid, Year(timestamps));
+    ASSERT_RAISES(Invalid, Month(timestamps));
+    ASSERT_RAISES(Invalid, Day(timestamps));
+    ASSERT_RAISES(Invalid, DayOfWeek(timestamps));
+    ASSERT_RAISES(Invalid, DayOfYear(timestamps));
+    ASSERT_RAISES(Invalid, ISOYear(timestamps));
+    ASSERT_RAISES(Invalid, ISOWeek(timestamps));
+    ASSERT_RAISES(Invalid, ISOCalendar(timestamps));
+    ASSERT_RAISES(Invalid, Quarter(timestamps));
+    ASSERT_RAISES(Invalid, Hour(timestamps));
+    ASSERT_RAISES(Invalid, Minute(timestamps));
+    ASSERT_RAISES(Invalid, Second(timestamps));
+    ASSERT_RAISES(Invalid, Millisecond(timestamps));
+    ASSERT_RAISES(Invalid, Microsecond(timestamps));
+    ASSERT_RAISES(Invalid, Nanosecond(timestamps));
+    ASSERT_RAISES(Invalid, Subsecond(timestamps));
+  }
+}
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 1d713b96e1e..673802f99b0 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -126,6 +126,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarValidity(registry.get());
   RegisterScalarFillNull(registry.get());
   RegisterScalarIfElse(registry.get());
+  RegisterScalarTemporal(registry.get());
 
   // Vector functions
   RegisterVectorHash(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index f97553af4b1..68e0f2207f1 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -35,6 +35,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry);
 void RegisterScalarValidity(FunctionRegistry* registry);
 void RegisterScalarFillNull(FunctionRegistry* registry);
 void RegisterScalarIfElse(FunctionRegistry* registry);
+void RegisterScalarTemporal(FunctionRegistry* registry);
 
 // Vector functions
 void RegisterVectorHash(FunctionRegistry* registry);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 4aa38e1a295..ad2d9f8f5d2 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -860,6 +860,58 @@ null input value is converted into a null output value.
   available).
 
 
+Temporal component extraction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions extract datetime components (year, month, day, etc) from timestamp type.
+Note: this is currently not supported for timestamps with timezone information.
+
++--------------------+------------+-------------------+---------------+--------+
+| Function name      | Arity      | Input types       | Output type   | Notes  |
++====================+============+===================+===============+========+
+| year               | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| month              | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| day                | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| day_of_week        | Unary      | Temporal          | Int64         | \(1)   |
++--------------------+------------+-------------------+---------------+--------+
+| day_of_year        | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| iso_year           | Unary      | Temporal          | Int64         | \(2)   |
++--------------------+------------+-------------------+---------------+--------+
+| iso_week           | Unary      | Temporal          | Int64         | \(2)   |
++--------------------+------------+-------------------+---------------+--------+
+| iso_calendar       | Unary      | Temporal          | Struct        | \(3)   |
++--------------------+------------+-------------------+---------------+--------+
+| quarter            | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| hour               | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| minute             | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| second             | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| millisecond        | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| microsecond        | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| nanosecond         | Unary      | Temporal          | Int64         |        |
++--------------------+------------+-------------------+---------------+--------+
+| subsecond          | Unary      | Temporal          | Double        |        |
++--------------------+------------+-------------------+---------------+--------+
+
+* \(1) Outputs the number of the day of the week. Week begins on Monday and is denoted
+  by 0 and ends on Sunday denoted by 6.
+* \(2) First ISO week has the majority (4 or more) of it's days in January. ISO year
+  starts with the first ISO week.
+  See `ISO 8601 week date definition`_ for more details.
+* \(3) Output is a ``{"iso_year": output type, "iso_week": output type, "iso_day_of_week":  output type}`` Struct.
+
+.. _ISO 8601 week date definition: https://en.wikipedia.org/wiki/ISO_week_date#First_week
+
+
 Array-wise ("vector") functions
 -------------------------------
 

From 97a1c264a38be627dee1a03f3ecaa1556655bf6a Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Wed, 9 Jun 2021 14:49:30 -0400
Subject: [PATCH 386/719] ARROW-12975: [C++][Python] if_else kernel doesn't
 support upcasting

Allowing if-else operation for following use cases
```python
>>> pc.if_else([True], [1], [3.5])
```

Closes #10472 from nirandaperera/ARROW-12975

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .../arrow/compute/kernels/scalar_if_else.cc   | 35 +++++++++++++++++-
 .../compute/kernels/scalar_if_else_test.cc    | 37 +++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 63086172c97..7a0defaccd6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -543,7 +543,38 @@ struct ResolveIfElseExec {
   }
 };
 
-void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+struct IfElseFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    // if 0th descriptor is null, replace with bool
+    if (values->at(0).type->id() == Type::NA) {
+      values->at(0).type = boolean();
+    }
+
+    // if-else 0'th descriptor is bool, so skip it
+    std::vector<ValueDescr> values_copy(values->begin() + 1, values->end());
+    internal::EnsureDictionaryDecoded(&values_copy);
+    internal::ReplaceNullWithOtherType(&values_copy);
+
+    if (auto type = internal::CommonNumeric(values_copy)) {
+      internal::ReplaceTypes(type, &values_copy);
+    }
+
+    std::move(values_copy.begin(), values_copy.end(), values->begin() + 1);
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+void AddPrimitiveIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_function,
                                const std::vector<std::shared_ptr<DataType>>& types) {
   for (auto&& type : types) {
     auto exec = internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec>(*type);
@@ -572,7 +603,7 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
   scalar_kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   scalar_kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
 
-  auto func = std::make_shared<ScalarFunction>("if_else", Arity::Ternary(), &if_else_doc);
+  auto func = std::make_shared<IfElseFunction>("if_else", Arity::Ternary(), &if_else_doc);
 
   AddPrimitiveIfElseKernels(func, NumericTypes());
   AddPrimitiveIfElseKernels(func, TemporalTypes());
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 5d3d22210d2..0fb0a1fc2d8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -271,5 +271,42 @@ TEST_F(TestIfElseKernel, IfElseNull) {
                     ArrayFromJSON(null(), "[null, null, null, null]"));
 }
 
+TEST_F(TestIfElseKernel, IfElseMultiType) {
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(int32(), "[1, 2, 3, 4]"),
+                           ArrayFromJSON(float32(), "[5, 6, 7, 8]"),
+                           ArrayFromJSON(float32(), "[1, 2, 3, 8]"));
+}
+
+TEST_F(TestIfElseKernel, IfElseDispatchBest) {
+  std::string name = "if_else";
+  CheckDispatchBest(name, {boolean(), int32(), int32()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), null()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), null(), int32()}, {boolean(), int32(), int32()});
+
+  CheckDispatchBest(name, {boolean(), int32(), int8()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), int16()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), int32()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), int64()}, {boolean(), int64(), int64()});
+
+  CheckDispatchBest(name, {boolean(), int32(), uint8()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), uint16()}, {boolean(), int32(), int32()});
+  CheckDispatchBest(name, {boolean(), int32(), uint32()}, {boolean(), int64(), int64()});
+  CheckDispatchBest(name, {boolean(), int32(), uint64()}, {boolean(), int64(), int64()});
+
+  CheckDispatchBest(name, {boolean(), uint8(), uint8()}, {boolean(), uint8(), uint8()});
+  CheckDispatchBest(name, {boolean(), uint8(), uint16()},
+                    {boolean(), uint16(), uint16()});
+
+  CheckDispatchBest(name, {boolean(), int32(), float32()},
+                    {boolean(), float32(), float32()});
+  CheckDispatchBest(name, {boolean(), float32(), int64()},
+                    {boolean(), float32(), float32()});
+  CheckDispatchBest(name, {boolean(), float64(), int32()},
+                    {boolean(), float64(), float64()});
+
+  CheckDispatchBest(name, {null(), uint8(), int8()}, {boolean(), int16(), int16()});
+}
+
 }  // namespace compute
 }  // namespace arrow

From 9839eb45b46fc8d6cde251b2c8bb35be6d7f2b3b Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 9 Jun 2021 20:56:16 +0200
Subject: [PATCH 387/719] ARROW-12951: [C++] Reduce generated code size for
 string kernels

Factor out type-agnostic string operations (such as finding a split pattern) in separate classes to avoid generating several versions of them when generating the typed kernel execution classes. This also makes the code slightly easier to understand and maintain (IMHO) by reducing use of subclassing.

Also fix a bug where some kernels would error out on invalid UTF8 data, even when it's the masked payload of a null value.

Closes #10496 from pitrou/ARROW-12951-string-transform-refactor-v2

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/scalar_arithmetic_test.cc |   13 -
 .../arrow/compute/kernels/scalar_string.cc    | 1003 +++++++++--------
 .../compute/kernels/scalar_string_test.cc     |    5 +-
 cpp/src/arrow/testing/gtest_util.cc           |   13 +
 cpp/src/arrow/testing/gtest_util.h            |    8 +
 5 files changed, 532 insertions(+), 510 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index ff66fcf1d12..c4bfac459dc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -41,19 +41,6 @@
 namespace arrow {
 namespace compute {
 
-std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
-                                        int64_t index, bool validity) {
-  auto data = array->data()->Copy();
-  if (data->buffers[0] == nullptr) {
-    data->buffers[0] = *AllocateBitmap(data->length);
-    BitUtil::SetBitsTo(data->buffers[0]->mutable_data(), 0, data->length, true);
-  }
-  BitUtil::SetBitTo(data->buffers[0]->mutable_data(), index, validity);
-  data->null_count = kUnknownNullCount;
-  // Need to return a new array, because Array caches the null bitmap pointer
-  return MakeArray(data);
-}
-
 template <typename T>
 class TestUnaryArithmetic : public TestBase {
  protected:
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index a1e19b608d9..8b740f3742a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -38,6 +38,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
 
@@ -130,117 +131,170 @@ void EnsureLookupTablesFilled() {
   });
 }
 
+#else
+
+void EnsureLookupTablesFilled() {}
+
 #endif  // ARROW_WITH_UTF8PROC
 
-/// Transform string -> string with a reasonable guess on the maximum number of codepoints
-template <typename Type, typename Derived>
-struct StringTransform {
-  using offset_type = typename Type::offset_type;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
+constexpr int64_t kTransformError = -1;
 
-  virtual int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
-    return input_ncodeunits;
+struct StringTransformBase {
+  virtual Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
   }
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    return Derived().Execute(ctx, batch, out);
+  // Return the maximum total size of the output in codeunits (i.e. bytes)
+  // given input characteristics.
+  virtual int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
+    return input_ncodeunits;
   }
 
-  static Status InvalidStatus() {
+  virtual Status InvalidStatus() {
     return Status::Invalid("Invalid UTF8 sequence in input");
   }
 
-  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  // Derived classes should also define this method:
+  //   int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+  //                     uint8_t* output);
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExecBase {
+  using offset_type = typename Type::offset_type;
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  static Status Execute(KernelContext* ctx, StringTransform* transform,
+                        const ExecBatch& batch, Datum* out) {
     if (batch[0].kind() == Datum::ARRAY) {
-      const ArrayData& input = *batch[0].array();
-      ArrayType input_boxed(batch[0].array());
-      ArrayData* output = out->mutable_array();
+      return ExecArray(ctx, transform, batch[0].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
+    return ExecScalar(ctx, transform, batch[0].scalar(), out);
+  }
 
-      offset_type input_ncodeunits = input_boxed.total_values_length();
-      offset_type input_nstrings = static_cast<offset_type>(input.length);
+  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
+                          const std::shared_ptr<ArrayData>& data, Datum* out) {
+    ArrayType input(data);
+    ArrayData* output = out->mutable_array();
 
-      const int64_t output_ncodeunits_max =
-          MaxCodeunits(input_nstrings, input_ncodeunits);
-      if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-        return Status::CapacityError(
-            "Result might not fit in a 32bit utf8 array, convert to large_utf8");
-      }
+    const int64_t input_ncodeunits = input.total_values_length();
+    const int64_t input_nstrings = input.length();
 
-      ARROW_ASSIGN_OR_RAISE(auto values_buffer, ctx->Allocate(output_ncodeunits_max));
-      output->buffers[2] = values_buffer;
+    const int64_t output_ncodeunits_max =
+        transform->MaxCodeunits(input_nstrings, input_ncodeunits);
+    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+      return Status::CapacityError(
+          "Result might not fit in a 32bit utf8 array, convert to large_utf8");
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto values_buffer, ctx->Allocate(output_ncodeunits_max));
+    output->buffers[2] = values_buffer;
 
-      // String offsets are preallocated
-      offset_type* output_string_offsets = output->GetMutableValues<offset_type>(1);
-      uint8_t* output_str = output->buffers[2]->mutable_data();
-      offset_type output_ncodeunits = 0;
+    // String offsets are preallocated
+    offset_type* output_string_offsets = output->GetMutableValues<offset_type>(1);
+    uint8_t* output_str = output->buffers[2]->mutable_data();
+    offset_type output_ncodeunits = 0;
 
-      output_string_offsets[0] = 0;
-      for (int64_t i = 0; i < input_nstrings; i++) {
+    output_string_offsets[0] = 0;
+    for (int64_t i = 0; i < input_nstrings; i++) {
+      if (!input.IsNull(i)) {
         offset_type input_string_ncodeunits;
-        const uint8_t* input_string = input_boxed.GetValue(i, &input_string_ncodeunits);
-        offset_type encoded_nbytes = 0;
-        if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
-                input_string, input_string_ncodeunits, output_str + output_ncodeunits,
-                &encoded_nbytes))) {
-          return Derived::InvalidStatus();
+        const uint8_t* input_string = input.GetValue(i, &input_string_ncodeunits);
+        auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
+            input_string, input_string_ncodeunits, output_str + output_ncodeunits));
+        if (encoded_nbytes < 0) {
+          return transform->InvalidStatus();
         }
         output_ncodeunits += encoded_nbytes;
-        output_string_offsets[i + 1] = output_ncodeunits;
       }
-      DCHECK_LE(output_ncodeunits, output_ncodeunits_max);
+      output_string_offsets[i + 1] = output_ncodeunits;
+    }
+    DCHECK_LE(output_ncodeunits, output_ncodeunits_max);
 
-      // Trim the codepoint buffer, since we allocated too much
-      return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
-    } else {
-      DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
-      const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
-      if (!input.is_valid) {
-        return Status::OK();
-      }
-      auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-      result->is_valid = true;
-      offset_type data_nbytes = static_cast<offset_type>(input.value->size());
+    // Trim the codepoint buffer, since we allocated too much
+    return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
+  }
 
-      int64_t output_ncodeunits_max = MaxCodeunits(1, data_nbytes);
-      if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
-        return Status::CapacityError(
-            "Result might not fit in a 32bit utf8 array, convert to large_utf8");
-      }
-      ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
-      result->value = value_buffer;
-      offset_type encoded_nbytes = 0;
-      if (ARROW_PREDICT_FALSE(!static_cast<Derived&>(*this).Transform(
-              input.value->data(), data_nbytes, value_buffer->mutable_data(),
-              &encoded_nbytes))) {
-        return Derived::InvalidStatus();
-      }
-      DCHECK_LE(encoded_nbytes, output_ncodeunits_max);
-      return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
+  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
+                           const std::shared_ptr<Scalar>& scalar, Datum* out) {
+    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
+    if (!input.is_valid) {
+      return Status::OK();
+    }
+    auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    result->is_valid = true;
+    const int64_t data_nbytes = static_cast<int64_t>(input.value->size());
+
+    const int64_t output_ncodeunits_max = transform->MaxCodeunits(1, data_nbytes);
+    if (output_ncodeunits_max > std::numeric_limits<offset_type>::max()) {
+      return Status::CapacityError(
+          "Result might not fit in a 32bit utf8 array, convert to large_utf8");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(output_ncodeunits_max));
+    result->value = value_buffer;
+    auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
+        input.value->data(), data_nbytes, value_buffer->mutable_data()));
+    if (encoded_nbytes < 0) {
+      return transform->InvalidStatus();
     }
+    DCHECK_LE(encoded_nbytes, output_ncodeunits_max);
+    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
+  }
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExec : public StringTransformExecBase<Type, StringTransform> {
+  using StringTransformExecBase<Type, StringTransform>::Execute;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    StringTransform transform;
+    RETURN_NOT_OK(transform.PreExec(ctx, batch, out));
+    return Execute(ctx, &transform, batch, out);
+  }
+};
+
+template <typename Type, typename StringTransform>
+struct StringTransformExecWithState
+    : public StringTransformExecBase<Type, StringTransform> {
+  using State = typename StringTransform::State;
+  using StringTransformExecBase<Type, StringTransform>::Execute;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    StringTransform transform(State::Get(ctx));
+    RETURN_NOT_OK(transform.PreExec(ctx, batch, out));
+    return Execute(ctx, &transform, batch, out);
   }
 };
 
 #ifdef ARROW_WITH_UTF8PROC
 
-// transforms per codepoint
-template <typename Type, typename Derived>
-struct StringTransformCodepoint : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
+template <typename CodepointTransform>
+struct StringTransformCodepoint : public StringTransformBase {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return CodepointTransform::MaxCodeunits(ninputs, input_ncodeunits);
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     uint8_t* output_start = output;
     if (ARROW_PREDICT_FALSE(
             !arrow::util::UTF8Transform(input, input + input_string_ncodeunits, &output,
-                                        Derived::TransformCodepoint))) {
-      return false;
+                                        CodepointTransform::TransformCodepoint))) {
+      return kTransformError;
     }
-    *output_written = static_cast<offset_type>(output - output_start);
-    return true;
+    return output - output_start;
   }
+};
 
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+// struct CaseMappingMixin {
+struct CaseMappingTransform {
+  static int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
     // Section 5.18 of the Unicode spec claim that the number of codepoints for case
     // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes
     // However, since we don't support all casings (SpecialCasing.txt) the growth
@@ -249,74 +303,67 @@ struct StringTransformCodepoint : StringTransform<Type, Derived> {
     // two code units (even) can grow to 3 code units.
     return static_cast<int64_t>(input_ncodeunits) * 3 / 2;
   }
-
-  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();
-    return Base::Execute(ctx, batch, out);
-  }
 };
 
-template <typename Type>
-struct UTF8Upper : StringTransformCodepoint<Type, UTF8Upper<Type>> {
-  inline static uint32_t TransformCodepoint(uint32_t codepoint) {
+struct UTF8UpperTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
     return codepoint <= kMaxCodepointLookup ? lut_upper_codepoint[codepoint]
                                             : utf8proc_toupper(codepoint);
   }
 };
 
 template <typename Type>
-struct UTF8Lower : StringTransformCodepoint<Type, UTF8Lower<Type>> {
-  inline static uint32_t TransformCodepoint(uint32_t codepoint) {
+using UTF8Upper = StringTransformExec<Type, StringTransformCodepoint<UTF8UpperTransform>>;
+
+struct UTF8LowerTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
     return codepoint <= kMaxCodepointLookup ? lut_lower_codepoint[codepoint]
                                             : utf8proc_tolower(codepoint);
   }
 };
 
-#else
-
-void EnsureLookupTablesFilled() {}
+template <typename Type>
+using UTF8Lower = StringTransformExec<Type, StringTransformCodepoint<UTF8LowerTransform>>;
 
 #endif  // ARROW_WITH_UTF8PROC
 
-template <typename Type>
-struct AsciiReverse : StringTransform<Type, AsciiReverse<Type>> {
-  using Base = StringTransform<Type, AsciiReverse<Type>>;
-  using offset_type = typename Base::offset_type;
-
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+struct AsciiReverseTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     uint8_t utf8_char_found = 0;
-    for (offset_type i = 0; i < input_string_ncodeunits; i++) {
+    for (int64_t i = 0; i < input_string_ncodeunits; i++) {
       // if a utf8 char is found, report to utf8_char_found
       utf8_char_found |= input[i] & 0x80;
       output[input_string_ncodeunits - i - 1] = input[i];
     }
-    *output_written = input_string_ncodeunits;
-    return utf8_char_found == 0;
+    return utf8_char_found ? kTransformError : input_string_ncodeunits;
   }
 
-  static Status InvalidStatus() { return Status::Invalid("Non-ASCII sequence in input"); }
+  Status InvalidStatus() override {
+    return Status::Invalid("Non-ASCII sequence in input");
+  }
 };
 
 template <typename Type>
-struct Utf8Reverse : StringTransform<Type, Utf8Reverse<Type>> {
-  using Base = StringTransform<Type, Utf8Reverse<Type>>;
-  using offset_type = typename Base::offset_type;
+using AsciiReverse = StringTransformExec<Type, AsciiReverseTransform>;
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
-    offset_type i = 0;
+struct Utf8ReverseTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    int64_t i = 0;
     while (i < input_string_ncodeunits) {
-      uint8_t offset = util::ValidUtf8CodepointByteSize(input + i);
-      offset_type stride = std::min(i + offset, input_string_ncodeunits);
-      std::copy(input + i, input + stride, output + input_string_ncodeunits - stride);
-      i += offset;
+      int64_t char_end = std::min(i + util::ValidUtf8CodepointByteSize(input + i),
+                                  input_string_ncodeunits);
+      std::copy(input + i, input + char_end, output + input_string_ncodeunits - char_end);
+      i = char_end;
     }
-    *output_written = input_string_ncodeunits;
-    return true;
+    return input_string_ncodeunits;
   }
 };
 
+template <typename Type>
+using Utf8Reverse = StringTransformExec<Type, Utf8ReverseTransform>;
+
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
 
 // Transform a buffer of offsets to one which begins with 0 and has same
@@ -973,187 +1020,182 @@ void AddCountSubstring(FunctionRegistry* registry) {
 
 // Slicing
 
-template <typename Type, typename Derived>
-struct SliceBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
+struct SliceTransformBase : public StringTransformBase {
   using State = OptionsWrapper<SliceOptions>;
 
-  SliceOptions options;
-
-  explicit SliceBase(SliceOptions options) : options(options) {}
+  const SliceOptions* options;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    SliceOptions options = State::Get(ctx);
-    if (options.step == 0) {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    options = &State::Get(ctx);
+    if (options->step == 0) {
       return Status::Invalid("Slice step cannot be zero");
     }
-    return Derived(options).Execute(ctx, batch, out);
+    return Status::OK();
   }
 };
 
-#define PROPAGATE_FALSE(expr)         \
+struct SliceCodeunitsTransform : SliceTransformBase {
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    const SliceOptions& opt = *this->options;
+    if ((opt.start >= 0) != (opt.stop >= 0)) {
+      // If start and stop don't have the same sign, we can't guess an upper bound
+      // on the resulting slice lengths, so return a worst case estimate.
+      return input_ncodeunits;
+    }
+    int64_t max_slice_codepoints = (opt.stop - opt.start + opt.step - 1) / opt.step;
+    // The maximum UTF8 byte size of a codepoint is 4
+    return std::min(input_ncodeunits,
+                    4 * ninputs * std::max<int64_t>(0, max_slice_codepoints));
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (options->step >= 1) {
+      return SliceForward(input, input_string_ncodeunits, output);
+    }
+    return SliceBackward(input, input_string_ncodeunits, output);
+  }
+
+#define RETURN_IF_UTF8_ERROR(expr)    \
   do {                                \
     if (ARROW_PREDICT_FALSE(!expr)) { \
-      return false;                   \
+      return kTransformError;         \
     }                                 \
   } while (0)
 
-bool SliceCodeunitsTransform(const uint8_t* input, int64_t input_string_ncodeunits,
-                             uint8_t* output, int64_t* output_written,
-                             const SliceOptions& options) {
-  const uint8_t* begin = input;
-  const uint8_t* end = input + input_string_ncodeunits;
-  const uint8_t* begin_sliced = begin;
-  const uint8_t* end_sliced = end;
+  int64_t SliceForward(const uint8_t* input, int64_t input_string_ncodeunits,
+                       uint8_t* output) {
+    // Slice in forward order (step > 0)
+    const SliceOptions& opt = *this->options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* begin_sliced = begin;
+    const uint8_t* end_sliced = end;
 
-  if (options.step >= 1) {
-    if (options.start >= 0) {
+    // First, compute begin_sliced and end_sliced
+    if (opt.start >= 0) {
       // start counting from the left
-      PROPAGATE_FALSE(
-          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, options.start));
-      if (options.stop > options.start) {
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start));
+      if (opt.stop > opt.start) {
         // continue counting from begin_sliced
-        int64_t length = options.stop - options.start;
-        PROPAGATE_FALSE(
+        const int64_t length = opt.stop - opt.start;
+        RETURN_IF_UTF8_ERROR(
             arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length));
-      } else if (options.stop < 0) {
+      } else if (opt.stop < 0) {
         // or from the end (but we will never need to < begin_sliced)
-        PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(
-            begin_sliced, end, &end_sliced, -options.stop));
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -opt.stop));
       } else {
         // zero length slice
-        *output_written = 0;
-        return true;
+        return 0;
       }
     } else {
       // start counting from the right
-      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
-                                                                -options.start));
-      if (options.stop > 0) {
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &begin_sliced, -opt.start));
+      if (opt.stop > 0) {
         // continue counting from the left, we cannot start from begin_sliced because we
         // don't know how many codepoints are between begin and begin_sliced
-        PROPAGATE_FALSE(
-            arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, options.stop));
+        RETURN_IF_UTF8_ERROR(
+            arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop));
         // and therefore we also needs this
         if (end_sliced <= begin_sliced) {
           // zero length slice
-          *output_written = 0;
-          return true;
+          return 0;
         }
-      } else if ((options.stop < 0) && (options.stop > options.start)) {
+      } else if ((opt.stop < 0) && (opt.stop > opt.start)) {
         // stop is negative, but larger than start, so we count again from the right
         // in some cases we can optimize this, depending on the shortest path (from end
-        // or begin_sliced), but begin_sliced and options.start can be 'out of sync',
+        // or begin_sliced), but begin_sliced and opt.start can be 'out of sync',
         // for instance when start=-100, when the string length is only 10.
-        PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(
-            begin_sliced, end, &end_sliced, -options.stop));
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+            begin_sliced, end, &end_sliced, -opt.stop));
       } else {
         // zero length slice
-        *output_written = 0;
-        return true;
+        return 0;
       }
     }
+
+    // Second, copy computed slice to output
     DCHECK(begin_sliced <= end_sliced);
-    if (options.step == 1) {
+    if (opt.step == 1) {
       // fast case, where we simply can finish with a memcpy
       std::copy(begin_sliced, end_sliced, output);
-      *output_written = end_sliced - begin_sliced;
-    } else {
-      uint8_t* dest = output;
-      const uint8_t* i = begin_sliced;
-
-      while (i < end_sliced) {
-        uint32_t codepoint = 0;
-        // write a single codepoint
-        PROPAGATE_FALSE(arrow::util::UTF8Decode(&i, &codepoint));
-        dest = arrow::util::UTF8Encode(dest, codepoint);
-        // and skip the remainder
-        int64_t skips = options.step - 1;
-        while ((skips--) && (i < end_sliced)) {
-          PROPAGATE_FALSE(arrow::util::UTF8Decode(&i, &codepoint));
-        }
+      return end_sliced - begin_sliced;
+    }
+    uint8_t* dest = output;
+    const uint8_t* i = begin_sliced;
+
+    while (i < end_sliced) {
+      uint32_t codepoint = 0;
+      // write a single codepoint
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint));
+      dest = arrow::util::UTF8Encode(dest, codepoint);
+      // and skip the remainder
+      int64_t skips = opt.step - 1;
+      while ((skips--) && (i < end_sliced)) {
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8Decode(&i, &codepoint));
       }
-      *output_written = dest - output;
     }
-    return true;
-  } else {  // step < 0
-    // serious +1 -1 kung fu because now begin_slice and end_slice act like reverse
-    // iterators.
+    return dest - output;
+  }
 
-    if (options.start >= 0) {
+  int64_t SliceBackward(const uint8_t* input, int64_t input_string_ncodeunits,
+                        uint8_t* output) {
+    // Slice in reverse order (step < 0)
+    const SliceOptions& opt = *this->options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t* begin_sliced = begin;
+    const uint8_t* end_sliced = end;
+
+    // Serious +1 -1 kung fu because begin_sliced and end_sliced act like
+    // reverse iterators.
+    if (opt.start >= 0) {
       // +1 because begin_sliced acts as as the end of a reverse iterator
-      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced,
-                                                         options.start + 1));
-      // and make it point at the last codeunit of the previous codeunit
-      begin_sliced--;
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opt.start + 1));
     } else {
       // -1 because start=-1 means the last codeunit, which is 0 advances
-      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
-                                                                -options.start - 1));
-      // and make it point at the last codeunit of the previous codeunit
-      begin_sliced--;
-    }
-    // similar to options.start
-    if (options.stop >= 0) {
-      PROPAGATE_FALSE(
-          arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, options.stop + 1));
-      end_sliced--;
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &begin_sliced, -opt.start - 1));
+    }
+    // make it point at the last codeunit of the previous codeunit
+    begin_sliced--;
+
+    // similar to opt.start
+    if (opt.stop >= 0) {
+      RETURN_IF_UTF8_ERROR(
+          arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opt.stop + 1));
     } else {
-      PROPAGATE_FALSE(arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &end_sliced,
-                                                                -options.stop - 1));
-      end_sliced--;
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8AdvanceCodepointsReverse(
+          begin, end, &end_sliced, -opt.stop - 1));
     }
+    end_sliced--;
 
+    // Copy computed slice to output
     uint8_t* dest = output;
     const uint8_t* i = begin_sliced;
-
     while (i > end_sliced) {
       uint32_t codepoint = 0;
       // write a single codepoint
-      PROPAGATE_FALSE(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+      RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint));
       dest = arrow::util::UTF8Encode(dest, codepoint);
       // and skip the remainder
-      int64_t skips = -options.step - 1;
+      int64_t skips = -opt.step - 1;
       while ((skips--) && (i > end_sliced)) {
-        PROPAGATE_FALSE(arrow::util::UTF8DecodeReverse(&i, &codepoint));
+        RETURN_IF_UTF8_ERROR(arrow::util::UTF8DecodeReverse(&i, &codepoint));
       }
     }
-    *output_written = dest - output;
-    return true;
+    return dest - output;
   }
-}
 
-#undef PROPAGATE_FALSE
+#undef RETURN_IF_UTF8_ERROR
+};
 
 template <typename Type>
-struct SliceCodeunits : SliceBase<Type, SliceCodeunits<Type>> {
-  using Base = SliceBase<Type, SliceCodeunits<Type>>;
-  using offset_type = typename Base::offset_type;
-  using Base::Base;
-
-  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
-    const SliceOptions& opt = this->options;
-    if ((opt.start >= 0) != (opt.stop >= 0)) {
-      // If start and stop don't have the same sign, we can't guess an upper bound
-      // on the resulting slice lengths, so return a worst case estimate.
-      return input_ncodeunits;
-    }
-    int64_t max_slice_codepoints = (opt.stop - opt.start + opt.step - 1) / opt.step;
-    // The maximum UTF8 byte size of a codepoint is 4
-    return std::min(input_ncodeunits,
-                    4 * ninputs * std::max<int64_t>(0, max_slice_codepoints));
-  }
-
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
-    int64_t output_written_64;
-    bool res = SliceCodeunitsTransform(input, input_string_ncodeunits, output,
-                                       &output_written_64, this->options);
-    *output_written = static_cast<offset_type>(output_written_64);
-    return res;
-  }
-};
+using SliceCodeunits = StringTransformExec<Type, SliceCodeunitsTransform>;
 
 const FunctionDoc utf8_slice_codeunits_doc(
     "Slice string ",
@@ -1170,10 +1212,13 @@ void AddSlice(FunctionRegistry* registry) {
                                                &utf8_slice_codeunits_doc);
   using t32 = SliceCodeunits<StringType>;
   using t64 = SliceCodeunits<LargeStringType>;
-  DCHECK_OK(func->AddKernel({utf8()}, utf8(), t32::Exec, t32::State::Init));
-  DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), t64::Exec, t64::State::Init));
+  DCHECK_OK(
+      func->AddKernel({utf8()}, utf8(), t32::Exec, SliceCodeunitsTransform::State::Init));
+  DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(), t64::Exec,
+                            SliceCodeunitsTransform::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
+
 // IsAlpha/Digit etc
 
 #ifdef ARROW_WITH_UTF8PROC
@@ -1583,8 +1628,25 @@ struct IsUpperAscii : CharacterPredicateAscii<IsUpperAscii> {
 
 // splitting
 
-template <typename Type, typename ListType, typename Options, typename Derived>
-struct SplitBaseTransform {
+template <typename Options>
+struct SplitFinderBase {
+  virtual Status PreExec(const Options& options) { return Status::OK(); }
+
+  // Derived classes should also define these methods:
+  //   static bool Find(const uint8_t* begin, const uint8_t* end,
+  //                    const uint8_t** separator_begin,
+  //                    const uint8_t** separator_end,
+  //                    const SplitPatternOptions& options);
+  //
+  //   static bool FindReverse(const uint8_t* begin, const uint8_t* end,
+  //                           const uint8_t** separator_begin,
+  //                           const uint8_t** separator_end,
+  //                           const SplitPatternOptions& options);
+};
+
+template <typename Type, typename ListType, typename SplitFinder,
+          typename Options = typename SplitFinder::Options>
+struct SplitExec {
   using string_offset_type = typename Type::offset_type;
   using list_offset_type = typename ListType::offset_type;
   using ArrayType = typename TypeTraits<Type>::ArrayType;
@@ -1595,12 +1657,75 @@ struct SplitBaseTransform {
   using ListOffsetsBuilderType = TypedBufferBuilder<list_offset_type>;
   using State = OptionsWrapper<Options>;
 
+  // Keep the temporary storage accross individual values, to minimize reallocations
   std::vector<util::string_view> parts;
   Options options;
 
-  explicit SplitBaseTransform(Options options) : options(options) {}
+  explicit SplitExec(const Options& options) : options(options) {}
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return SplitExec{State::Get(ctx)}.Execute(ctx, batch, out);
+  }
+
+  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    SplitFinder finder;
+    RETURN_NOT_OK(finder.PreExec(options));
+    if (batch[0].kind() == Datum::ARRAY) {
+      return Execute(ctx, &finder, batch[0].array(), out);
+    }
+    DCHECK_EQ(batch[0].kind(), Datum::SCALAR);
+    return Execute(ctx, &finder, batch[0].scalar(), out);
+  }
+
+  Status Execute(KernelContext* ctx, SplitFinder* finder,
+                 const std::shared_ptr<ArrayData>& data, Datum* out) {
+    const ArrayType input(data);
+
+    BuilderType builder(input.type(), ctx->memory_pool());
+    // A slight overestimate of the data needed
+    RETURN_NOT_OK(builder.ReserveData(input.total_values_length()));
+    // The minimum amount of strings needed
+    RETURN_NOT_OK(builder.Resize(input.length() - input.null_count()));
+
+    ArrayData* output_list = out->mutable_array();
+    // List offsets were preallocated
+    auto* list_offsets = output_list->GetMutableValues<list_offset_type>(1);
+    DCHECK_NE(list_offsets, nullptr);
+    // Initial value
+    *list_offsets++ = 0;
+    for (int64_t i = 0; i < input.length(); ++i) {
+      if (!input.IsNull(i)) {
+        RETURN_NOT_OK(SplitString(input.GetView(i), finder, &builder));
+        if (ARROW_PREDICT_FALSE(builder.length() >
+                                std::numeric_limits<list_offset_type>::max())) {
+          return Status::CapacityError("List offset does not fit into 32 bit");
+        }
+      }
+      *list_offsets++ = static_cast<list_offset_type>(builder.length());
+    }
+    // Assign string array to list child data
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder.Finish(&string_array));
+    output_list->child_data.push_back(string_array->data());
+    return Status::OK();
+  }
+
+  Status Execute(KernelContext* ctx, SplitFinder* finder,
+                 const std::shared_ptr<Scalar>& scalar, Datum* out) {
+    const auto& input = checked_cast<const ScalarType&>(*scalar);
+    auto result = checked_cast<ListScalarType*>(out->scalar().get());
+    if (input.is_valid) {
+      result->is_valid = true;
+      BuilderType builder(input.type, ctx->memory_pool());
+      util::string_view s(*input.value);
+      RETURN_NOT_OK(SplitString(s, finder, &builder));
+      RETURN_NOT_OK(builder.Finish(&result->value));
+    }
+    return Status::OK();
+  }
 
-  Status Split(const util::string_view& s, BuilderType* builder) {
+  Status SplitString(const util::string_view& s, SplitFinder* finder,
+                     BuilderType* builder) {
     const uint8_t* begin = reinterpret_cast<const uint8_t*>(s.data());
     const uint8_t* end = begin + s.length();
 
@@ -1618,8 +1743,7 @@ struct SplitBaseTransform {
       while (max_splits != 0) {
         const uint8_t *separator_begin, *separator_end;
         // find with whatever algo the part we will 'cut out'
-        if (static_cast<Derived&>(*this).FindReverse(begin, i, &separator_begin,
-                                                     &separator_end, options)) {
+        if (finder->FindReverse(begin, i, &separator_begin, &separator_end, options)) {
           parts.emplace_back(reinterpret_cast<const char*>(separator_end),
                              i - separator_end);
           i = separator_begin;
@@ -1639,8 +1763,7 @@ struct SplitBaseTransform {
       while (max_splits != 0) {
         const uint8_t *separator_begin, *separator_end;
         // find with whatever algo the part we will 'cut out'
-        if (static_cast<Derived&>(*this).Find(i, end, &separator_begin, &separator_end,
-                                              options)) {
+        if (finder->Find(i, end, &separator_begin, &separator_end, options)) {
           // the part till the beginning of the 'cut'
           RETURN_NOT_OK(
               builder->Append(i, static_cast<string_offset_type>(separator_begin - i)));
@@ -1656,85 +1779,13 @@ struct SplitBaseTransform {
     }
     return Status::OK();
   }
-
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    Options options = State::Get(ctx);
-    Derived splitter(options);  // we make an instance to reuse the parts vectors
-    RETURN_NOT_OK(splitter.CheckOptions());
-    return splitter.Split(ctx, batch, out);
-  }
-
-  Status CheckOptions() { return Status::OK(); }
-
-  Status Split(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();  // only needed for unicode
-
-    if (batch[0].kind() == Datum::ARRAY) {
-      const ArrayData& input = *batch[0].array();
-      ArrayType input_boxed(batch[0].array());
-
-      BuilderType builder(input.type, ctx->memory_pool());
-      // a slight overestimate of the data needed
-      RETURN_NOT_OK(builder.ReserveData(input_boxed.total_values_length()));
-      // the minimum amount of strings needed
-      RETURN_NOT_OK(builder.Resize(input.length));
-
-      ArrayData* output_list = out->mutable_array();
-      // list offsets were preallocated
-      auto* list_offsets = output_list->GetMutableValues<list_offset_type>(1);
-      DCHECK_NE(list_offsets, nullptr);
-      // initial value
-      *list_offsets++ = 0;
-      RETURN_NOT_OK(VisitArrayDataInline<Type>(
-          input,
-          [&](util::string_view s) {
-            RETURN_NOT_OK(Split(s, &builder));
-            if (ARROW_PREDICT_FALSE(builder.length() >
-                                    std::numeric_limits<list_offset_type>::max())) {
-              return Status::CapacityError("List offset does not fit into 32 bit");
-            }
-            *list_offsets++ = static_cast<list_offset_type>(builder.length());
-            return Status::OK();
-          },
-          [&]() {
-            // null value is already taken from input
-            *list_offsets++ = static_cast<list_offset_type>(builder.length());
-            return Status::OK();
-          }));
-      // assign list child data
-      std::shared_ptr<Array> string_array;
-      RETURN_NOT_OK(builder.Finish(&string_array));
-      output_list->child_data.push_back(string_array->data());
-
-    } else {
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar());
-      auto result = checked_pointer_cast<ListScalarType>(MakeNullScalar(out->type()));
-      if (input.is_valid) {
-        result->is_valid = true;
-        BuilderType builder(input.type, ctx->memory_pool());
-        util::string_view s(*input.value);
-        RETURN_NOT_OK(Split(s, &builder));
-        RETURN_NOT_OK(builder.Finish(&result->value));
-      }
-      out->value = result;
-    }
-
-    return Status::OK();
-  }
 };
 
-template <typename Type, typename ListType>
-struct SplitPatternTransform : SplitBaseTransform<Type, ListType, SplitPatternOptions,
-                                                  SplitPatternTransform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitPatternOptions,
-                                  SplitPatternTransform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using string_offset_type = typename Type::offset_type;
-  using Base::Base;
+struct SplitPatternFinder : public SplitFinderBase<SplitPatternOptions> {
+  using Options = SplitPatternOptions;
 
-  Status CheckOptions() {
-    if (Base::options.pattern.length() == 0) {
+  Status PreExec(const SplitPatternOptions& options) override {
+    if (options.pattern.length() == 0) {
       return Status::Invalid("Empty separator");
     }
     return Status::OK();
@@ -1782,6 +1833,9 @@ struct SplitPatternTransform : SplitBaseTransform<Type, ListType, SplitPatternOp
   }
 };
 
+template <typename Type, typename ListType>
+using SplitPatternExec = SplitExec<Type, ListType, SplitPatternFinder>;
+
 const FunctionDoc split_pattern_doc(
     "Split string according to separator",
     ("Split each string according to the exact `pattern` defined in\n"
@@ -1815,29 +1869,22 @@ const FunctionDoc utf8_split_whitespace_doc(
 void AddSplitPattern(FunctionRegistry* registry) {
   auto func = std::make_shared<ScalarFunction>("split_pattern", Arity::Unary(),
                                                &split_pattern_doc);
-  using t32 = SplitPatternTransform<StringType, ListType>;
-  using t64 = SplitPatternTransform<LargeStringType, ListType>;
+  using t32 = SplitPatternExec<StringType, ListType>;
+  using t64 = SplitPatternExec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <typename Type, typename ListType>
-struct SplitWhitespaceAsciiTransform
-    : SplitBaseTransform<Type, ListType, SplitOptions,
-                         SplitWhitespaceAsciiTransform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitOptions,
-                                  SplitWhitespaceAsciiTransform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using string_offset_type = typename Type::offset_type;
-  using Base::Base;
+struct SplitWhitespaceAsciiFinder : public SplitFinderBase<SplitOptions> {
+  using Options = SplitOptions;
+
   static bool Find(const uint8_t* begin, const uint8_t* end,
                    const uint8_t** separator_begin, const uint8_t** separator_end,
                    const SplitOptions& options) {
     const uint8_t* i = begin;
-    while ((i < end)) {
+    while (i < end) {
       if (IsSpaceCharacterAscii(*i)) {
         *separator_begin = i;
         do {
@@ -1850,6 +1897,7 @@ struct SplitWhitespaceAsciiTransform
     }
     return false;
   }
+
   static bool FindReverse(const uint8_t* begin, const uint8_t* end,
                           const uint8_t** separator_begin, const uint8_t** separator_end,
                           const SplitOptions& options) {
@@ -1869,13 +1917,16 @@ struct SplitWhitespaceAsciiTransform
   }
 };
 
+template <typename Type, typename ListType>
+using SplitWhitespaceAsciiExec = SplitExec<Type, ListType, SplitWhitespaceAsciiFinder>;
+
 void AddSplitWhitespaceAscii(FunctionRegistry* registry) {
   static const SplitOptions default_options{};
   auto func =
       std::make_shared<ScalarFunction>("ascii_split_whitespace", Arity::Unary(),
                                        &ascii_split_whitespace_doc, &default_options);
-  using t32 = SplitWhitespaceAsciiTransform<StringType, ListType>;
-  using t64 = SplitWhitespaceAsciiTransform<LargeStringType, ListType>;
+  using t32 = SplitWhitespaceAsciiExec<StringType, ListType>;
+  using t64 = SplitWhitespaceAsciiExec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
@@ -1883,19 +1934,16 @@ void AddSplitWhitespaceAscii(FunctionRegistry* registry) {
 }
 
 #ifdef ARROW_WITH_UTF8PROC
-template <typename Type, typename ListType>
-struct SplitWhitespaceUtf8Transform
-    : SplitBaseTransform<Type, ListType, SplitOptions,
-                         SplitWhitespaceUtf8Transform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitOptions,
-                                  SplitWhitespaceUtf8Transform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using string_offset_type = typename Type::offset_type;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using Base::Base;
-  static bool Find(const uint8_t* begin, const uint8_t* end,
-                   const uint8_t** separator_begin, const uint8_t** separator_end,
-                   const SplitOptions& options) {
+struct SplitWhitespaceUtf8Finder : public SplitFinderBase<SplitOptions> {
+  using Options = SplitOptions;
+
+  Status PreExec(const SplitOptions& options) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
+            const uint8_t** separator_end, const SplitOptions& options) {
     const uint8_t* i = begin;
     while ((i < end)) {
       uint32_t codepoint = 0;
@@ -1915,9 +1963,10 @@ struct SplitWhitespaceUtf8Transform
     }
     return false;
   }
-  static bool FindReverse(const uint8_t* begin, const uint8_t* end,
-                          const uint8_t** separator_begin, const uint8_t** separator_end,
-                          const SplitOptions& options) {
+
+  bool FindReverse(const uint8_t* begin, const uint8_t* end,
+                   const uint8_t** separator_begin, const uint8_t** separator_end,
+                   const SplitOptions& options) {
     const uint8_t* i = end - 1;
     while ((i >= begin)) {
       uint32_t codepoint = 0;
@@ -1939,73 +1988,68 @@ struct SplitWhitespaceUtf8Transform
   }
 };
 
+template <typename Type, typename ListType>
+using SplitWhitespaceUtf8Exec = SplitExec<Type, ListType, SplitWhitespaceUtf8Finder>;
+
 void AddSplitWhitespaceUTF8(FunctionRegistry* registry) {
   static const SplitOptions default_options{};
   auto func =
       std::make_shared<ScalarFunction>("utf8_split_whitespace", Arity::Unary(),
                                        &utf8_split_whitespace_doc, &default_options);
-  using t32 = SplitWhitespaceUtf8Transform<StringType, ListType>;
-  using t64 = SplitWhitespaceUtf8Transform<LargeStringType, ListType>;
+  using t32 = SplitWhitespaceUtf8Exec<StringType, ListType>;
+  using t64 = SplitWhitespaceUtf8Exec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
-#endif
+#endif  // ARROW_WITH_UTF8PROC
 
 #ifdef ARROW_WITH_RE2
-template <typename Type, typename ListType>
-struct SplitRegexTransform : SplitBaseTransform<Type, ListType, SplitPatternOptions,
-                                                SplitRegexTransform<Type, ListType>> {
-  using Base = SplitBaseTransform<Type, ListType, SplitPatternOptions,
-                                  SplitRegexTransform<Type, ListType>>;
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
-  using string_offset_type = typename Type::offset_type;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
+struct SplitRegexFinder : public SplitFinderBase<SplitPatternOptions> {
+  using Options = SplitPatternOptions;
 
-  const RE2 regex_split;
+  util::optional<RE2> regex_split;
 
-  explicit SplitRegexTransform(SplitPatternOptions options)
-      : Base(options), regex_split(MakePattern(options)) {}
-
-  static std::string MakePattern(const SplitPatternOptions& options) {
+  Status PreExec(const SplitPatternOptions& options) override {
+    if (options.reverse) {
+      return Status::NotImplemented("Cannot split in reverse with regex");
+    }
     // RE2 does *not* give you the full match! Must wrap the regex in a capture group
     // There is FindAndConsume, but it would give only the end of the separator
     std::string pattern = "(";
     pattern.reserve(options.pattern.size() + 2);
     pattern += options.pattern;
     pattern += ')';
-    return pattern;
-  }
-
-  Status CheckOptions() {
-    if (Base::options.reverse) {
-      return Status::NotImplemented("Cannot split in reverse with regex");
-    }
-    return RegexStatus(regex_split);
+    regex_split.emplace(std::move(pattern));
+    return RegexStatus(*regex_split);
   }
 
   bool Find(const uint8_t* begin, const uint8_t* end, const uint8_t** separator_begin,
-            const uint8_t** separator_end, const SplitOptions& options) {
+            const uint8_t** separator_end, const SplitPatternOptions& options) {
     re2::StringPiece piece(reinterpret_cast<const char*>(begin),
                            std::distance(begin, end));
     // "StringPiece is mutated to point to matched piece"
     re2::StringPiece result;
-    if (!re2::RE2::PartialMatch(piece, regex_split, &result)) {
+    if (!re2::RE2::PartialMatch(piece, *regex_split, &result)) {
       return false;
     }
     *separator_begin = reinterpret_cast<const uint8_t*>(result.data());
     *separator_end = reinterpret_cast<const uint8_t*>(result.data() + result.size());
     return true;
   }
+
   bool FindReverse(const uint8_t* begin, const uint8_t* end,
                    const uint8_t** separator_begin, const uint8_t** separator_end,
-                   const SplitOptions& options) {
-    // Not easily supportable, unfortunately
+                   const SplitPatternOptions& options) {
+    // Unsupported (see PreExec)
     return false;
   }
 };
 
+template <typename Type, typename ListType>
+using SplitRegexExec = SplitExec<Type, ListType, SplitRegexFinder>;
+
 const FunctionDoc split_pattern_regex_doc(
     "Split string according to regex pattern",
     ("Split each string according to the regex `pattern` defined in\n"
@@ -2019,14 +2063,14 @@ const FunctionDoc split_pattern_regex_doc(
 void AddSplitRegex(FunctionRegistry* registry) {
   auto func = std::make_shared<ScalarFunction>("split_pattern_regex", Arity::Unary(),
                                                &split_pattern_regex_doc);
-  using t32 = SplitRegexTransform<StringType, ListType>;
-  using t64 = SplitRegexTransform<LargeStringType, ListType>;
+  using t32 = SplitRegexExec<StringType, ListType>;
+  using t64 = SplitRegexExec<LargeStringType, ListType>;
   DCHECK_OK(func->AddKernel({utf8()}, {list(utf8())}, t32::Exec, t32::State::Init));
   DCHECK_OK(
       func->AddKernel({large_utf8()}, {list(large_utf8())}, t64::Exec, t64::State::Init));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
-#endif
+#endif  // ARROW_WITH_RE2
 
 void AddSplit(FunctionRegistry* registry) {
   AddSplitPattern(registry);
@@ -2477,56 +2521,54 @@ Result<ValueDescr> StrptimeResolve(KernelContext* ctx, const std::vector<ValueDe
 
 #ifdef ARROW_WITH_UTF8PROC
 
-template <typename Type, bool left, bool right, typename Derived>
-struct UTF8TrimWhitespaceBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+template <bool TrimLeft, bool TrimRight>
+struct UTF8TrimWhitespaceTransform : public StringTransformBase {
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    EnsureLookupTablesFilled();
+    return Status::OK();
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
     const uint8_t* begin_trimmed = begin;
 
     auto predicate = [](uint32_t c) { return !IsSpaceCharacterUnicode(c); };
-    if (left && !ARROW_PREDICT_TRUE(
-                    arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
-      return false;
+    if (TrimLeft && !ARROW_PREDICT_TRUE(
+                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
+      return kTransformError;
     }
-    if (right && (begin_trimmed < end)) {
+    if (TrimRight && begin_trimmed < end) {
       if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end,
                                                              predicate, &end_trimmed))) {
-        return false;
+        return kTransformError;
       }
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
-  }
-  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();
-    return Base::Execute(ctx, batch, out);
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct UTF8TrimWhitespace
-    : UTF8TrimWhitespaceBase<Type, true, true, UTF8TrimWhitespace<Type>> {};
+using UTF8TrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, true>>;
 
 template <typename Type>
-struct UTF8LTrimWhitespace
-    : UTF8TrimWhitespaceBase<Type, true, false, UTF8LTrimWhitespace<Type>> {};
+using UTF8LTrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<true, false>>;
 
 template <typename Type>
-struct UTF8RTrimWhitespace
-    : UTF8TrimWhitespaceBase<Type, false, true, UTF8RTrimWhitespace<Type>> {};
+using UTF8RTrimWhitespace =
+    StringTransformExec<Type, UTF8TrimWhitespaceTransform<false, true>>;
 
-struct TrimStateUTF8 {
+struct UTF8TrimState {
   TrimOptions options_;
   std::vector<bool> codepoints_;
   Status status_ = Status::OK();
 
-  explicit TrimStateUTF8(KernelContext* ctx, TrimOptions options)
+  explicit UTF8TrimState(KernelContext* ctx, TrimOptions options)
       : options_(std::move(options)) {
     if (!ARROW_PREDICT_TRUE(
             arrow::util::UTF8ForEach(options_.characters, [&](uint32_t c) {
@@ -2539,167 +2581,136 @@ struct TrimStateUTF8 {
   }
 };
 
-template <typename Type, bool left, bool right, typename Derived>
-struct UTF8TrimBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
-  using State = KernelStateFromFunctionOptions<TrimStateUTF8, TrimOptions>;
-  TrimStateUTF8 state_;
+template <bool TrimLeft, bool TrimRight>
+struct UTF8TrimTransform : public StringTransformBase {
+  using State = KernelStateFromFunctionOptions<UTF8TrimState, TrimOptions>;
 
-  explicit UTF8TrimBase(TrimStateUTF8 state) : state_(std::move(state)) {}
+  const UTF8TrimState& state_;
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    TrimStateUTF8 state = State::Get(ctx);
-    RETURN_NOT_OK(state.status_);
-    return Derived(state).Execute(ctx, batch, out);
-  }
+  explicit UTF8TrimTransform(const UTF8TrimState& state) : state_(state) {}
 
-  Status Execute(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    EnsureLookupTablesFilled();
-    return Base::Execute(ctx, batch, out);
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    return state_.status_;
   }
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
     const uint8_t* begin_trimmed = begin;
 
-    auto predicate = [&](uint32_t c) {
-      bool contains = state_.codepoints_[c];
-      return !contains;
-    };
-    if (left && !ARROW_PREDICT_TRUE(
-                    arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
-      return false;
+    auto predicate = [&](uint32_t c) { return !state_.codepoints_[c]; };
+    if (TrimLeft && !ARROW_PREDICT_TRUE(
+                        arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
+      return kTransformError;
     }
-    if (right && (begin_trimmed < end)) {
+    if (TrimRight && begin_trimmed < end) {
       if (!ARROW_PREDICT_TRUE(arrow::util::UTF8FindIfReverse(begin_trimmed, end,
                                                              predicate, &end_trimmed))) {
-        return false;
+        return kTransformError;
       }
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct UTF8Trim : UTF8TrimBase<Type, true, true, UTF8Trim<Type>> {
-  using Base = UTF8TrimBase<Type, true, true, UTF8Trim<Type>>;
-  using Base::Base;
-};
+using UTF8Trim = StringTransformExecWithState<Type, UTF8TrimTransform<true, true>>;
 
 template <typename Type>
-struct UTF8LTrim : UTF8TrimBase<Type, true, false, UTF8LTrim<Type>> {
-  using Base = UTF8TrimBase<Type, true, false, UTF8LTrim<Type>>;
-  using Base::Base;
-};
+using UTF8LTrim = StringTransformExecWithState<Type, UTF8TrimTransform<true, false>>;
 
 template <typename Type>
-struct UTF8RTrim : UTF8TrimBase<Type, false, true, UTF8RTrim<Type>> {
-  using Base = UTF8TrimBase<Type, false, true, UTF8RTrim<Type>>;
-  using Base::Base;
-};
+using UTF8RTrim = StringTransformExecWithState<Type, UTF8TrimTransform<false, true>>;
 
 #endif
 
-template <typename Type, bool left, bool right, typename Derived>
-struct AsciiTrimWhitespaceBase : StringTransform<Type, Derived> {
-  using offset_type = typename Type::offset_type;
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+template <bool TrimLeft, bool TrimRight>
+struct AsciiTrimWhitespaceTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
+    const uint8_t* begin_trimmed = begin;
 
     auto predicate = [](unsigned char c) { return !IsSpaceCharacterAscii(c); };
-    const uint8_t* begin_trimmed = left ? std::find_if(begin, end, predicate) : begin;
-    if (right & (begin_trimmed < end)) {
+    if (TrimLeft) {
+      begin_trimmed = std::find_if(begin, end, predicate);
+    }
+    if (TrimRight && begin_trimmed < end) {
       std::reverse_iterator<const uint8_t*> rbegin(end);
       std::reverse_iterator<const uint8_t*> rend(begin_trimmed);
       end_trimmed = std::find_if(rbegin, rend, predicate).base();
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct AsciiTrimWhitespace
-    : AsciiTrimWhitespaceBase<Type, true, true, AsciiTrimWhitespace<Type>> {};
+using AsciiTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, true>>;
 
 template <typename Type>
-struct AsciiLTrimWhitespace
-    : AsciiTrimWhitespaceBase<Type, true, false, AsciiLTrimWhitespace<Type>> {};
+using AsciiLTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<true, false>>;
 
 template <typename Type>
-struct AsciiRTrimWhitespace
-    : AsciiTrimWhitespaceBase<Type, false, true, AsciiRTrimWhitespace<Type>> {};
-
-template <typename Type, bool left, bool right, typename Derived>
-struct AsciiTrimBase : StringTransform<Type, Derived> {
-  using Base = StringTransform<Type, Derived>;
-  using offset_type = typename Base::offset_type;
-  using State = OptionsWrapper<TrimOptions>;
+using AsciiRTrimWhitespace =
+    StringTransformExec<Type, AsciiTrimWhitespaceTransform<false, true>>;
+
+struct AsciiTrimState {
   TrimOptions options_;
   std::vector<bool> characters_;
 
-  explicit AsciiTrimBase(TrimOptions options)
+  explicit AsciiTrimState(KernelContext* ctx, TrimOptions options)
       : options_(std::move(options)), characters_(256) {
-    std::for_each(options_.characters.begin(), options_.characters.end(),
-                  [&](char c) { characters_[static_cast<unsigned char>(c)] = true; });
+    for (const auto c : options_.characters) {
+      characters_[static_cast<unsigned char>(c)] = true;
+    }
   }
+};
 
-  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    TrimOptions options = State::Get(ctx);
-    return Derived(options).Execute(ctx, batch, out);
-  }
+template <bool TrimLeft, bool TrimRight>
+struct AsciiTrimTransform : public StringTransformBase {
+  using State = KernelStateFromFunctionOptions<AsciiTrimState, TrimOptions>;
+
+  const AsciiTrimState& state_;
+
+  explicit AsciiTrimTransform(const AsciiTrimState& state) : state_(state) {}
 
-  bool Transform(const uint8_t* input, offset_type input_string_ncodeunits,
-                 uint8_t* output, offset_type* output_written) {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
     const uint8_t* begin = input;
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
-    const uint8_t* begin_trimmed;
-
-    auto predicate = [&](unsigned char c) {
-      bool contains = characters_[c];
-      return !contains;
-    };
+    const uint8_t* begin_trimmed = begin;
 
-    begin_trimmed = left ? std::find_if(begin, end, predicate) : begin;
-    if (right & (begin_trimmed < end)) {
+    auto predicate = [&](uint8_t c) { return !state_.characters_[c]; };
+    if (TrimLeft) {
+      begin_trimmed = std::find_if(begin, end, predicate);
+    }
+    if (TrimRight && begin_trimmed < end) {
       std::reverse_iterator<const uint8_t*> rbegin(end);
       std::reverse_iterator<const uint8_t*> rend(begin_trimmed);
       end_trimmed = std::find_if(rbegin, rend, predicate).base();
     }
     std::copy(begin_trimmed, end_trimmed, output);
-    *output_written = static_cast<offset_type>(end_trimmed - begin_trimmed);
-    return true;
+    return end_trimmed - begin_trimmed;
   }
 };
 
 template <typename Type>
-struct AsciiTrim : AsciiTrimBase<Type, true, true, AsciiTrim<Type>> {
-  using Base = AsciiTrimBase<Type, true, true, AsciiTrim<Type>>;
-  using Base::Base;
-};
+using AsciiTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, true>>;
 
 template <typename Type>
-struct AsciiLTrim : AsciiTrimBase<Type, true, false, AsciiLTrim<Type>> {
-  using Base = AsciiTrimBase<Type, true, false, AsciiLTrim<Type>>;
-  using Base::Base;
-};
+using AsciiLTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, false>>;
 
 template <typename Type>
-struct AsciiRTrim : AsciiTrimBase<Type, false, true, AsciiRTrim<Type>> {
-  using Base = AsciiTrimBase<Type, false, true, AsciiRTrim<Type>>;
-  using Base::Base;
-};
+using AsciiRTrim = StringTransformExecWithState<Type, AsciiTrimTransform<false, true>>;
 
 const FunctionDoc utf8_trim_whitespace_doc(
     "Trim leading and trailing whitespace characters",
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index f015e339423..c4b6956be2b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -144,10 +144,13 @@ TYPED_TEST(TestStringKernels, AsciiReverse) {
   this->CheckUnary("ascii_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
                    R"(["dcba", null, "", "bbb"])");
 
-  Datum invalid_input = ArrayFromJSON(this->type(), R"(["aAazZæÆ&", null, "", "bbb"])");
+  auto invalid_input = ArrayFromJSON(this->type(), R"(["aAazZæÆ&", null, "", "bcd"])");
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                   testing::HasSubstr("Non-ASCII sequence in input"),
                                   CallFunction("ascii_reverse", {invalid_input}));
+  auto masked_input = TweakValidityBit(invalid_input, 0, false);
+  CheckScalarUnary("ascii_reverse", masked_input,
+                   ArrayFromJSON(this->type(), R"([null, null, "", "dcb"])"));
 }
 
 TYPED_TEST(TestStringKernels, Utf8Reverse) {
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 39bd665d5b6..eb0edd56566 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -553,6 +553,19 @@ void ApproxCompareBatch(const RecordBatch& left, const RecordBatch& right,
       [](const Array& left, const Array& right) { return left.ApproxEquals(right); });
 }
 
+std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
+                                        int64_t index, bool validity) {
+  auto data = array->data()->Copy();
+  if (data->buffers[0] == nullptr) {
+    data->buffers[0] = *AllocateBitmap(data->length);
+    BitUtil::SetBitsTo(data->buffers[0]->mutable_data(), 0, data->length, true);
+  }
+  BitUtil::SetBitTo(data->buffers[0]->mutable_data(), index, validity);
+  data->null_count = kUnknownNullCount;
+  // Need to return a new array, because Array caches the null bitmap pointer
+  return MakeArray(data);
+}
+
 class LocaleGuard::Impl {
  public:
   explicit Impl(const char* new_locale) : global_locale_(std::locale()) {
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index b8ea8e76298..9d01cd4bf27 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -444,6 +444,14 @@ inline void BitmapFromVector(const std::vector<T>& is_valid,
   ASSERT_OK(GetBitmapFromVector(is_valid, out));
 }
 
+// Given an array, return a new identical array except for one validity bit
+// set to a new value.
+// This is useful to force the underlying "value" of null entries to otherwise
+// invalid data and check that errors don't get reported.
+ARROW_TESTING_EXPORT
+std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
+                                        int64_t index, bool validity);
+
 ARROW_TESTING_EXPORT
 void SleepFor(double seconds);
 

From bd28d5264c9467c3d4d3096788c9890e26264df2 Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Wed, 9 Jun 2021 17:14:42 -0400
Subject: [PATCH 388/719] ARROW-13001: [Go][Parquet] fix build failure on s390x

Closes #10475 from zeroshade/arrow-13001

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .../{bitmap_bmi2.go => bitmap_bmi2_amd64.go}  |  2 ++
 .../{bitmap_bmi2.s => bitmap_bmi2_amd64.s}    |  0
 go/parquet/internal/bmi/bitmap_bmi2_s390x.go  | 24 ++++++++++++++++
 .../internal/bmi/{bmi_noasm.go => bmi.go}     | 26 +++++++++++++++++
 .../bmi/{bmi_init.go => bmi_amd64.go}         | 28 ++-----------------
 .../{bit_packing.go => bit_packing_amd64.go}  |  0
 ...king_avx2.go => bit_packing_avx2_amd64.go} |  0
 ...acking_avx2.s => bit_packing_avx2_amd64.s} |  0
 .../internal/utils/bit_packing_s390x.go       | 23 +++++++++++++++
 go/parquet/internal/utils/bit_run_reader.go   |  2 ++
 ...{min_max_avx2.go => min_max_avx2_amd64.go} |  0
 .../{min_max_avx2.s => min_max_avx2_amd64.s}  |  0
 ...{min_max_sse4.go => min_max_sse4_amd64.go} |  0
 .../{min_max_sse4.s => min_max_sse4_amd64.s}  |  0
 ...bool_avx2.go => unpack_bool_avx2_amd64.go} |  0
 ...k_bool_avx2.s => unpack_bool_avx2_amd64.s} |  0
 .../internal/utils/unpack_bool_s390x.go       | 25 +++++++++++++++++
 ...bool_sse4.go => unpack_bool_sse4_amd64.go} |  0
 ...k_bool_sse4.s => unpack_bool_sse4_amd64.s} |  0
 19 files changed, 104 insertions(+), 26 deletions(-)
 rename go/parquet/internal/bmi/{bitmap_bmi2.go => bitmap_bmi2_amd64.go} (98%)
 rename go/parquet/internal/bmi/{bitmap_bmi2.s => bitmap_bmi2_amd64.s} (100%)
 create mode 100644 go/parquet/internal/bmi/bitmap_bmi2_s390x.go
 rename go/parquet/internal/bmi/{bmi_noasm.go => bmi.go} (89%)
 rename go/parquet/internal/bmi/{bmi_init.go => bmi_amd64.go} (51%)
 rename go/parquet/internal/utils/{bit_packing.go => bit_packing_amd64.go} (100%)
 rename go/parquet/internal/utils/{bit_packing_avx2.go => bit_packing_avx2_amd64.go} (100%)
 rename go/parquet/internal/utils/{bit_packing_avx2.s => bit_packing_avx2_amd64.s} (100%)
 create mode 100644 go/parquet/internal/utils/bit_packing_s390x.go
 rename go/parquet/internal/utils/{min_max_avx2.go => min_max_avx2_amd64.go} (100%)
 rename go/parquet/internal/utils/{min_max_avx2.s => min_max_avx2_amd64.s} (100%)
 rename go/parquet/internal/utils/{min_max_sse4.go => min_max_sse4_amd64.go} (100%)
 rename go/parquet/internal/utils/{min_max_sse4.s => min_max_sse4_amd64.s} (100%)
 rename go/parquet/internal/utils/{unpack_bool_avx2.go => unpack_bool_avx2_amd64.go} (100%)
 rename go/parquet/internal/utils/{unpack_bool_avx2.s => unpack_bool_avx2_amd64.s} (100%)
 create mode 100644 go/parquet/internal/utils/unpack_bool_s390x.go
 rename go/parquet/internal/utils/{unpack_bool_sse4.go => unpack_bool_sse4_amd64.go} (100%)
 rename go/parquet/internal/utils/{unpack_bool_sse4.s => unpack_bool_sse4_amd64.s} (100%)

diff --git a/go/parquet/internal/bmi/bitmap_bmi2.go b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go
similarity index 98%
rename from go/parquet/internal/bmi/bitmap_bmi2.go
rename to go/parquet/internal/bmi/bitmap_bmi2_amd64.go
index ce09b3fd01d..66ffd8e6603 100644
--- a/go/parquet/internal/bmi/bitmap_bmi2.go
+++ b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go
@@ -14,6 +14,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// +build !noasm
+
 package bmi
 
 import "unsafe"
diff --git a/go/parquet/internal/bmi/bitmap_bmi2.s b/go/parquet/internal/bmi/bitmap_bmi2_amd64.s
similarity index 100%
rename from go/parquet/internal/bmi/bitmap_bmi2.s
rename to go/parquet/internal/bmi/bitmap_bmi2_amd64.s
diff --git a/go/parquet/internal/bmi/bitmap_bmi2_s390x.go b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go
new file mode 100644
index 00000000000..498d5452e17
--- /dev/null
+++ b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package bmi
+
+func init() {
+	funclist.extractBits = extractBitsGo
+	funclist.gtbitmap = greaterThanBitmapGo
+}
diff --git a/go/parquet/internal/bmi/bmi_noasm.go b/go/parquet/internal/bmi/bmi.go
similarity index 89%
rename from go/parquet/internal/bmi/bmi_noasm.go
rename to go/parquet/internal/bmi/bmi.go
index 7ebb19597ee..ea0f6e374fe 100644
--- a/go/parquet/internal/bmi/bmi_noasm.go
+++ b/go/parquet/internal/bmi/bmi.go
@@ -14,10 +14,36 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// Package bmi contains helpers for manipulating bitmaps via BMI2 extensions
+// properly falling back to pure go implementations if the CPU doesn't support
+// BMI2.
 package bmi
 
 import "math/bits"
 
+type funcs struct {
+	extractBits func(uint64, uint64) uint64
+	gtbitmap    func([]int16, int16) uint64
+}
+
+var funclist funcs
+
+// ExtractBits performs a Parallel Bit extract as per the PEXT instruction for
+// x86/x86-64 cpus to use the second parameter as a mask to extract the bits from
+// the first argument into a new bitmap.
+//
+// For each bit Set in selectBitmap, the corresponding bits are extracted from bitmap
+// and written to contiguous lower bits of the result, the remaining upper bits are zeroed.
+func ExtractBits(bitmap, selectBitmap uint64) uint64 {
+	return funclist.extractBits(bitmap, selectBitmap)
+}
+
+// GreaterThanBitmap builds a bitmap where each bit corresponds to whether or not
+// the level in that index is greater than the value of rhs.
+func GreaterThanBitmap(levels []int16, rhs int16) uint64 {
+	return funclist.gtbitmap(levels, rhs)
+}
+
 /* Python code to generate lookup table:
 kLookupBits = 5
 count = 0
diff --git a/go/parquet/internal/bmi/bmi_init.go b/go/parquet/internal/bmi/bmi_amd64.go
similarity index 51%
rename from go/parquet/internal/bmi/bmi_init.go
rename to go/parquet/internal/bmi/bmi_amd64.go
index e82b8e556e5..600ef024f69 100644
--- a/go/parquet/internal/bmi/bmi_init.go
+++ b/go/parquet/internal/bmi/bmi_amd64.go
@@ -14,22 +14,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package bmi contains helpers for manipulating bitmaps via BMI2 extensions
-// properly falling back to pure go implementations if the CPU doesn't support
-// BMI2.
+// +build !noasm
+
 package bmi
 
 import (
 	"golang.org/x/sys/cpu"
 )
 
-type funcs struct {
-	extractBits func(uint64, uint64) uint64
-	gtbitmap    func([]int16, int16) uint64
-}
-
-var funclist funcs
-
 func init() {
 	if cpu.X86.HasBMI2 {
 		funclist.extractBits = extractBitsBMI2
@@ -42,19 +34,3 @@ func init() {
 		funclist.gtbitmap = greaterThanBitmapGo
 	}
 }
-
-// ExtractBits performs a Parallel Bit extract as per the PEXT instruction for
-// x86/x86-64 cpus to use the second parameter as a mask to extract the bits from
-// the first argument into a new bitmap.
-//
-// For each bit Set in selectBitmap, the corresponding bits are extracted from bitmap
-// and written to contiguous lower bits of the result, the remaining upper bits are zeroed.
-func ExtractBits(bitmap, selectBitmap uint64) uint64 {
-	return funclist.extractBits(bitmap, selectBitmap)
-}
-
-// GreaterThanBitmap builds a bitmap where each bit corresponds to whether or not
-// the level in that index is greater than the value of rhs.
-func GreaterThanBitmap(levels []int16, rhs int16) uint64 {
-	return funclist.gtbitmap(levels, rhs)
-}
diff --git a/go/parquet/internal/utils/bit_packing.go b/go/parquet/internal/utils/bit_packing_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/bit_packing.go
rename to go/parquet/internal/utils/bit_packing_amd64.go
diff --git a/go/parquet/internal/utils/bit_packing_avx2.go b/go/parquet/internal/utils/bit_packing_avx2_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/bit_packing_avx2.go
rename to go/parquet/internal/utils/bit_packing_avx2_amd64.go
diff --git a/go/parquet/internal/utils/bit_packing_avx2.s b/go/parquet/internal/utils/bit_packing_avx2_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/bit_packing_avx2.s
rename to go/parquet/internal/utils/bit_packing_avx2_amd64.s
diff --git a/go/parquet/internal/utils/bit_packing_s390x.go b/go/parquet/internal/utils/bit_packing_s390x.go
new file mode 100644
index 00000000000..58f869c3f5d
--- /dev/null
+++ b/go/parquet/internal/utils/bit_packing_s390x.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+import "io"
+
+var unpack32 func(io.Reader, []uint32, int) int = unpack32Default
diff --git a/go/parquet/internal/utils/bit_run_reader.go b/go/parquet/internal/utils/bit_run_reader.go
index 2c704cd6a5f..7af0aa056aa 100644
--- a/go/parquet/internal/utils/bit_run_reader.go
+++ b/go/parquet/internal/utils/bit_run_reader.go
@@ -136,6 +136,8 @@ func (b *bitRunReader) loadWord(bitsRemaining int64) {
 		copy(wordptr, b.bitmap[:nbytes])
 
 		bitutil.SetBitTo(wordptr, int(bitsRemaining), bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1)))
+		// reset the value to little endian for big endian architectures
+		b.word = toLEFunc(b.word)
 	}
 
 	// Two cases:
diff --git a/go/parquet/internal/utils/min_max_avx2.go b/go/parquet/internal/utils/min_max_avx2_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/min_max_avx2.go
rename to go/parquet/internal/utils/min_max_avx2_amd64.go
diff --git a/go/parquet/internal/utils/min_max_avx2.s b/go/parquet/internal/utils/min_max_avx2_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/min_max_avx2.s
rename to go/parquet/internal/utils/min_max_avx2_amd64.s
diff --git a/go/parquet/internal/utils/min_max_sse4.go b/go/parquet/internal/utils/min_max_sse4_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/min_max_sse4.go
rename to go/parquet/internal/utils/min_max_sse4_amd64.go
diff --git a/go/parquet/internal/utils/min_max_sse4.s b/go/parquet/internal/utils/min_max_sse4_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/min_max_sse4.s
rename to go/parquet/internal/utils/min_max_sse4_amd64.s
diff --git a/go/parquet/internal/utils/unpack_bool_avx2.go b/go/parquet/internal/utils/unpack_bool_avx2_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_avx2.go
rename to go/parquet/internal/utils/unpack_bool_avx2_amd64.go
diff --git a/go/parquet/internal/utils/unpack_bool_avx2.s b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_avx2.s
rename to go/parquet/internal/utils/unpack_bool_avx2_amd64.s
diff --git a/go/parquet/internal/utils/unpack_bool_s390x.go b/go/parquet/internal/utils/unpack_bool_s390x.go
new file mode 100644
index 00000000000..d833c2b9d62
--- /dev/null
+++ b/go/parquet/internal/utils/unpack_bool_s390x.go
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+// BytesToBools when built with the noasm tag will direct to the pure go implementation
+// for converting a bitmap to a slice of bools
+func BytesToBools(in []byte, out []bool) {
+	bytesToBoolsGo(in, out)
+}
diff --git a/go/parquet/internal/utils/unpack_bool_sse4.go b/go/parquet/internal/utils/unpack_bool_sse4_amd64.go
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_sse4.go
rename to go/parquet/internal/utils/unpack_bool_sse4_amd64.go
diff --git a/go/parquet/internal/utils/unpack_bool_sse4.s b/go/parquet/internal/utils/unpack_bool_sse4_amd64.s
similarity index 100%
rename from go/parquet/internal/utils/unpack_bool_sse4.s
rename to go/parquet/internal/utils/unpack_bool_sse4_amd64.s

From 2f65d11509f0bea9171bfa54e22e4cb6a8e46d17 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 10 Jun 2021 09:32:51 +0200
Subject: [PATCH 389/719] ARROW-13018: [C++][Docs] Use consistent terminology
 for nulls (min_count) in scalar aggregate kernels

Closes #10490 from jorisvandenbossche/ARROW-13018

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/aggregate_basic.cc  | 25 +++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 1ea63cdc4a0..6a844817686 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -378,26 +378,25 @@ namespace internal {
 namespace {
 
 const FunctionDoc count_doc{"Count the number of null / non-null values",
-                            ("By default, non-null values are counted.\n"
+                            ("By default, only non-null values are counted.\n"
                              "This can be changed through ScalarAggregateOptions."),
                             {"array"},
                             "ScalarAggregateOptions"};
 
-const FunctionDoc sum_doc{"Sum values of a numeric array",
-                          ("Null values are ignored. Minimum count of non-NA\n"
-                           "values can be set and NAN is returned if too "
-                           "few are present.\n"
-                           "This can be changed through ScalarAggregateOptions."),
-                          {"array"},
-                          "ScalarAggregateOptions"};
+const FunctionDoc sum_doc{
+    "Compute the sum of a numeric array",
+    ("Null values are ignored by default. Minimum count of non-null\n"
+     "values can be set and null is returned if too few are present.\n"
+     "This can be changed through ScalarAggregateOptions."),
+    {"array"},
+    "ScalarAggregateOptions"};
 
 const FunctionDoc mean_doc{
     "Compute the mean of a numeric array",
-    ("Null values are ignored by default. Minimum count of non-NA\n"
-     "values can be set and NAN is returned if too few are \n"
-     "present. The result is always computed as a double, \n"
-     "regardless of the input types.\n"
-     "This can be changed through ScalarAggregateOptions."),
+    ("Null values are ignored by default. Minimum count of non-null\n"
+     "values can be set and null is returned if too few are "
+     "present.\nThis can be changed through ScalarAggregateOptions.\n"
+     "The result is always computed as a double, regardless of the input types."),
     {"array"},
     "ScalarAggregateOptions"};
 

From f878ee5df5282ddcb2f7b6b604a820840b56e6e1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 10 Jun 2021 11:19:39 +0200
Subject: [PATCH 390/719] ARROW-12150: [Python] Correctly infer type of
 mixed-precision Decimals

We were currently taking the max of current + new precision and scale, independently. But a new value could require a higher precision if it's number of integrals (precision - scale) is higher than the current set.

Closes #9948 from jorisvandenbossche/ARROW-12150-decimal-infer-type

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/python/decimal.cc               | 53 +++++++------
 cpp/src/arrow/python/python_test.cc           |  4 +-
 .../pyarrow/tests/parquet/test_data_types.py  |  2 +-
 python/pyarrow/tests/test_convert_builtin.py  | 78 +++++++++++++++++++
 4 files changed, 107 insertions(+), 30 deletions(-)

diff --git a/cpp/src/arrow/python/decimal.cc b/cpp/src/arrow/python/decimal.cc
index 67389095b94..0c00fcfaa8e 100644
--- a/cpp/src/arrow/python/decimal.cc
+++ b/cpp/src/arrow/python/decimal.cc
@@ -72,23 +72,19 @@ static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* p
   const auto exponent = static_cast<int32_t>(PyLong_AsLong(py_exponent.obj()));
   RETURN_IF_PYERROR();
 
-  const int32_t abs_exponent = std::abs(exponent);
-
-  int32_t num_additional_zeros;
-
-  if (num_digits <= abs_exponent) {
-    DCHECK_NE(exponent, 0) << "exponent should never be zero here";
-
-    // we have leading/trailing zeros, leading if exponent is negative
-    num_additional_zeros = exponent < 0 ? abs_exponent - num_digits : exponent;
-    *scale = static_cast<int32_t>(exponent < 0) * -exponent;
-  } else {
-    // we can use the number of digits as the precision
-    num_additional_zeros = 0;
+  if (exponent < 0) {
+    // If exponent > num_digits, we have a number with leading zeros
+    // such as 0.01234.  Ensure we have enough precision for leading zeros
+    // (which are not included in num_digits).
+    *precision = std::max(num_digits, -exponent);
     *scale = -exponent;
+  } else {
+    // Trailing zeros are not included in num_digits, need to add to precision.
+    // Note we don't generate negative scales as they are poorly supported
+    // in non-Arrow systems.
+    *precision = num_digits + exponent;
+    *scale = 0;
   }
-
-  *precision = num_digits + num_additional_zeros;
   return Status::OK();
 }
 
@@ -120,16 +116,18 @@ Status DecimalFromStdString(const std::string& decimal_string,
   const int32_t precision = arrow_type.precision();
   const int32_t scale = arrow_type.scale();
 
-  if (ARROW_PREDICT_FALSE(inferred_precision > precision)) {
+  if (scale != inferred_scale) {
+    DCHECK_NE(out, NULLPTR);
+    ARROW_ASSIGN_OR_RAISE(*out, out->Rescale(inferred_scale, scale));
+  }
+
+  auto inferred_scale_delta = inferred_scale - scale;
+  if (ARROW_PREDICT_FALSE((inferred_precision - inferred_scale_delta) > precision)) {
     return Status::Invalid(
         "Decimal type with precision ", inferred_precision,
         " does not fit into precision inferred from first array element: ", precision);
   }
 
-  if (scale != inferred_scale) {
-    DCHECK_NE(out, NULLPTR);
-    ARROW_ASSIGN_OR_RAISE(*out, out->Rescale(inferred_scale, scale));
-  }
   return Status::OK();
 }
 
@@ -214,16 +212,17 @@ DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale)
     : precision_(precision), scale_(scale) {}
 
 Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_scale) {
-  const int32_t current_precision = precision_;
-  precision_ = std::max(current_precision, suggested_precision);
-
   const int32_t current_scale = scale_;
   scale_ = std::max(current_scale, suggested_scale);
 
-  // if our suggested scale is zero and we don't yet have enough precision then we need to
-  // add whatever the current scale is to the precision
-  if (suggested_scale == 0 && suggested_precision > current_precision) {
-    precision_ += scale_;
+  const int32_t current_precision = precision_;
+
+  if (current_precision == std::numeric_limits<int32_t>::min()) {
+    precision_ = suggested_precision;
+  } else {
+    auto num_digits = std::max(current_precision - current_scale,
+                               suggested_precision - suggested_scale);
+    precision_ = std::max(num_digits + scale_, current_precision);
   }
 
   return Status::OK();
diff --git a/cpp/src/arrow/python/python_test.cc b/cpp/src/arrow/python/python_test.cc
index 19eb86a09c6..d1c00e68cc4 100644
--- a/cpp/src/arrow/python/python_test.cc
+++ b/cpp/src/arrow/python/python_test.cc
@@ -307,8 +307,8 @@ TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) {
   internal::DecimalMetadata metadata;
   ASSERT_OK(metadata.Update(python_decimal.obj()));
 
-  const auto expected_precision = 9;
-  const int32_t expected_scale = -2;
+  const auto expected_precision = 11;
+  const int32_t expected_scale = 0;
 
   ASSERT_EQ(expected_precision, metadata.precision());
   ASSERT_EQ(expected_scale, metadata.scale());
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index b99d8f26610..850dff94df4 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -240,7 +240,7 @@ def test_decimal_roundtrip(tempdir, use_legacy_dataset):
 
 @pytest.mark.pandas
 @pytest.mark.xfail(
-    raises=pa.ArrowException, reason='Parquet does not support negative scale'
+    raises=OSError, reason='Parquet does not support negative scale'
 )
 def test_decimal_roundtrip_negative_scale(tempdir):
     expected = pd.DataFrame({'decimal_num': [decimal.Decimal('1.23E4')]})
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 933d8d61214..ba94b340bd3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -1501,6 +1501,84 @@ def test_sequence_decimal_too_high_precision():
         pa.array([decimal.Decimal('1' * 80)])
 
 
+def test_sequence_decimal_infer():
+    for data, typ in [
+        # simple case
+        (decimal.Decimal('1.234'), pa.decimal128(4, 3)),
+        # trailing zeros
+        (decimal.Decimal('12300'), pa.decimal128(5, 0)),
+        (decimal.Decimal('12300.0'), pa.decimal128(6, 1)),
+        # scientific power notation
+        (decimal.Decimal('1.23E+4'), pa.decimal128(5, 0)),
+        (decimal.Decimal('123E+2'), pa.decimal128(5, 0)),
+        (decimal.Decimal('123E+4'), pa.decimal128(7, 0)),
+        # leading zeros
+        (decimal.Decimal('0.0123'), pa.decimal128(4, 4)),
+        (decimal.Decimal('0.01230'), pa.decimal128(5, 5)),
+        (decimal.Decimal('1.230E-2'), pa.decimal128(5, 5)),
+    ]:
+        assert pa.infer_type([data]) == typ
+        arr = pa.array([data])
+        assert arr.type == typ
+        assert arr.to_pylist()[0] == data
+
+
+def test_sequence_decimal_infer_mixed():
+    # ARROW-12150 - ensure mixed precision gets correctly inferred to
+    # common type that can hold all input values
+    cases = [
+        ([decimal.Decimal('1.234'), decimal.Decimal('3.456')],
+         pa.decimal128(4, 3)),
+        ([decimal.Decimal('1.234'), decimal.Decimal('456.7')],
+         pa.decimal128(6, 3)),
+        ([decimal.Decimal('123.4'), decimal.Decimal('4.567')],
+         pa.decimal128(6, 3)),
+        ([decimal.Decimal('123e2'), decimal.Decimal('4567e3')],
+         pa.decimal128(7, 0)),
+        ([decimal.Decimal('123e4'), decimal.Decimal('4567e2')],
+         pa.decimal128(7, 0)),
+        ([decimal.Decimal('0.123'), decimal.Decimal('0.04567')],
+         pa.decimal128(5, 5)),
+        ([decimal.Decimal('0.001'), decimal.Decimal('1.01E5')],
+         pa.decimal128(9, 3)),
+    ]
+    for data, typ in cases:
+        assert pa.infer_type(data) == typ
+        arr = pa.array(data)
+        assert arr.type == typ
+        assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_given_type():
+    for data, typs, wrong_typs in [
+        # simple case
+        (
+            decimal.Decimal('1.234'),
+            [pa.decimal128(4, 3), pa.decimal128(5, 3), pa.decimal128(5, 4)],
+            [pa.decimal128(4, 2), pa.decimal128(4, 4)]
+        ),
+        # trailing zeros
+        (
+            decimal.Decimal('12300'),
+            [pa.decimal128(5, 0), pa.decimal128(6, 0), pa.decimal128(3, -2)],
+            [pa.decimal128(4, 0), pa.decimal128(3, -3)]
+        ),
+        # scientific power notation
+        (
+            decimal.Decimal('1.23E+4'),
+            [pa.decimal128(5, 0), pa.decimal128(6, 0), pa.decimal128(3, -2)],
+            [pa.decimal128(4, 0), pa.decimal128(3, -3)]
+        ),
+    ]:
+        for typ in typs:
+            arr = pa.array([data], type=typ)
+            assert arr.type == typ
+            assert arr.to_pylist()[0] == data
+        for typ in wrong_typs:
+            with pytest.raises(ValueError):
+                pa.array([data], type=typ)
+
+
 def test_range_types():
     arr1 = pa.array(range(3))
     arr2 = pa.array((0, 1, 2))

From 0ef2080f6be27465285914fa2535e5e853a298b2 Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Thu, 10 Jun 2021 14:37:02 +0200
Subject: [PATCH 391/719] ARROW-12738: [C++/Python/R] Update conda variant
 files

Closes #10499 from xhochy/ARROW-12738

Authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Signed-off-by: Uwe L. Korn <uwe.korn@quantco.com>
---
 ...ion10.2numpy1.17python3.6.____cpython.yaml | 14 +++++++-------
 ...ion10.2numpy1.17python3.7.____cpython.yaml | 14 +++++++-------
 ...ion10.2numpy1.17python3.8.____cpython.yaml | 14 +++++++-------
 ...ion10.2numpy1.19python3.9.____cpython.yaml | 14 +++++++-------
 ...ionNonenumpy1.17python3.6.____cpython.yaml | 14 +++++++-------
 ...ionNonenumpy1.17python3.7.____cpython.yaml |  4 ++--
 ...ionNonenumpy1.17python3.8.____cpython.yaml | 14 +++++++-------
 ...ionNonenumpy1.19python3.9.____cpython.yaml | 14 +++++++-------
 ...arch64_numpy1.17python3.6.____cpython.yaml | 12 ++++++------
 ...arch64_numpy1.17python3.7.____cpython.yaml | 12 ++++++------
 ...arch64_numpy1.17python3.8.____cpython.yaml | 12 ++++++------
 ...arch64_numpy1.19python3.9.____cpython.yaml | 12 ++++++------
 ...osx_64_numpy1.17python3.6.____cpython.yaml |  4 ++--
 ...osx_64_numpy1.17python3.7.____cpython.yaml |  4 ++--
 ...osx_64_numpy1.17python3.8.____cpython.yaml | 14 +++++++-------
 ...osx_64_numpy1.19python3.9.____cpython.yaml | 14 +++++++-------
 .../osx_arm64_python3.8.____cpython.yaml      | 14 +++++++-------
 .../osx_arm64_python3.9.____cpython.yaml      | 14 +++++++-------
 .../.ci_support/r/linux_64_r_base4.0.yaml     | 13 ++++++++++---
 .../.ci_support/r/linux_64_r_base4.1.yaml     |  2 +-
 .../.ci_support/r/osx_64_r_base4.0.yaml       |  2 +-
 .../.ci_support/r/osx_64_r_base4.1.yaml       |  2 +-
 ...ionNonenumpy1.17python3.6.____cpython.yaml | 12 ++++++------
 ...ionNonenumpy1.17python3.7.____cpython.yaml | 12 ++++++------
 ...ionNonenumpy1.17python3.8.____cpython.yaml | 12 ++++++------
 ...ionNonenumpy1.19python3.9.____cpython.yaml | 12 ++++++------
 dev/tasks/conda-recipes/azure.osx.yml         | 19 +++++++++++--------
 27 files changed, 155 insertions(+), 145 deletions(-)

diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
index dd4c04197c9..9d014c1a2f8 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
index f0c43929b56..b8cde8e0752 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
index 149e70f438b..5e5c7ab7c93 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
index fb15d4e7156..d0926aa3cef 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.19'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
index d977f9e5779..6625c55c2c9 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index 1f4c527effd..d356a8a56cf 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.5'
\ No newline at end of file
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
index 7105f634953..61f311506e6 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
index efe0148cc81..6abfe8271dc 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -25,17 +25,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.19'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -49,7 +49,7 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -67,4 +67,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
index 34a64ceb979..408e05667bd 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
@@ -27,17 +27,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.37'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -51,7 +51,7 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -66,4 +66,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
index 052c58122a6..ef7ff818a54 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
@@ -27,17 +27,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.37'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -51,7 +51,7 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -66,4 +66,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
index a61e0c42b55..ea0327e5c2a 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
@@ -27,17 +27,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.37'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -51,7 +51,7 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -66,4 +66,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
index 0eccab25e47..5ba7c16b1b0 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
@@ -27,17 +27,17 @@ docker_image:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.37'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
 - '1.19'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -51,7 +51,7 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -66,4 +66,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
index 40c017cf36e..d4a7e2a75b5 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
@@ -47,7 +47,7 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.5'
\ No newline at end of file
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
index 378a28348b2..c7e57ba9a1a 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
@@ -47,7 +47,7 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.5'
\ No newline at end of file
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
index cdd53c6006e..7fd69d4e965 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -21,11 +21,11 @@ cxx_compiler_version:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 macos_machine:
@@ -33,7 +33,7 @@ macos_machine:
 numpy:
 - '1.17'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -47,7 +47,7 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
index 37df6a9ec53..8ba2718c411 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '10.9'
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -21,11 +21,11 @@ cxx_compiler_version:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 macos_machine:
@@ -33,7 +33,7 @@ macos_machine:
 numpy:
 - '1.19'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -47,7 +47,7 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
index 5894b8ee70b..e9ae1d9858e 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '11.0'
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -21,11 +21,11 @@ cxx_compiler_version:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 macos_machine:
@@ -33,7 +33,7 @@ macos_machine:
 numpy:
 - '1.19'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -47,7 +47,7 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
index 4e6014c5db8..84ef1bc9b85 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
@@ -1,7 +1,7 @@
 MACOSX_DEPLOYMENT_TARGET:
 - '11.0'
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -21,11 +21,11 @@ cxx_compiler_version:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 macos_machine:
@@ -33,7 +33,7 @@ macos_machine:
 numpy:
 - '1.19'
 orc:
-- 1.6.7
+- 1.6.8
 pin_run_as_build:
   bzip2:
     max_pin: x
@@ -47,7 +47,7 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -62,4 +62,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
index 51d26f834cc..dfdfae9665a 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.0.yaml
@@ -1,7 +1,9 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '9'
+cdt_name:
+- cos6
 channel_sources:
 - conda-forge,defaults
 channel_targets:
@@ -9,9 +11,9 @@ channel_targets:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '9'
 docker_image:
-- condaforge/linux-anvil-comp7
+- quay.io/condaforge/linux-anvil-comp7
 pin_run_as_build:
   r-base:
     min_pin: x.x
@@ -20,3 +22,8 @@ r_base:
 - '4.0'
 target_platform:
 - linux-64
+zip_keys:
+- - c_compiler_version
+  - cxx_compiler_version
+- - cdt_name
+  - docker_image
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
index 5bee341b87e..c5f455c1917 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.1.yaml
@@ -26,4 +26,4 @@ zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
 - - cdt_name
-  - docker_image
\ No newline at end of file
+  - docker_image
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
index 4d1fe27c357..08bb81d0808 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.0.yaml
@@ -24,4 +24,4 @@ target_platform:
 - osx-64
 zip_keys:
 - - c_compiler_version
-  - cxx_compiler_version
\ No newline at end of file
+  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
index 4821a15260a..9974c663853 100644
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.1.yaml
@@ -24,4 +24,4 @@ target_platform:
 - osx-64
 zip_keys:
 - - c_compiler_version
-  - cxx_compiler_version
\ No newline at end of file
+  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
index 8fbbb64af9c..63a7faeaa33 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -17,11 +17,11 @@ cxx_compiler:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
@@ -39,7 +39,7 @@ pin_run_as_build:
 python:
 - 3.6.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -52,4 +52,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index 4b702a38980..684987c6fbb 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -17,11 +17,11 @@ cxx_compiler:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
@@ -39,7 +39,7 @@ pin_run_as_build:
 python:
 - 3.7.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -52,4 +52,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
index 6ae6c2fde4f..afefac79ec7 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -17,11 +17,11 @@ cxx_compiler:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
@@ -39,7 +39,7 @@ pin_run_as_build:
 python:
 - 3.8.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -52,4 +52,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
index 73a8b5099bb..c385d13eac0 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -1,5 +1,5 @@
 aws_sdk_cpp:
-- 1.8.151
+- 1.8.186
 bzip2:
 - '1'
 c_compiler:
@@ -17,11 +17,11 @@ cxx_compiler:
 gflags:
 - '2.2'
 glog:
-- 0.4.0
+- '0.5'
 grpc_cpp:
-- '1.36'
+- '1.38'
 libprotobuf:
-- '3.15'
+- '3.16'
 lz4_c:
 - 1.9.3
 numpy:
@@ -39,7 +39,7 @@ pin_run_as_build:
 python:
 - 3.9.* *_cpython
 re2:
-- 2021.04.01
+- 2021.06.01
 snappy:
 - '1'
 target_platform:
@@ -52,4 +52,4 @@ zip_keys:
 zlib:
 - '1.2'
 zstd:
-- '1.4'
+- '1.5'
diff --git a/dev/tasks/conda-recipes/azure.osx.yml b/dev/tasks/conda-recipes/azure.osx.yml
index dbb1a68aca6..d3cbcbbb787 100755
--- a/dev/tasks/conda-recipes/azure.osx.yml
+++ b/dev/tasks/conda-recipes/azure.osx.yml
@@ -11,14 +11,6 @@ jobs:
     ARROW_VERSION: {{ arrow.no_rc_version }}
     UPLOAD_PACKAGES: False
   steps:
-  - script: |
-      echo "Removing homebrew from Azure to avoid conflicts."
-      curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/uninstall > ~/uninstall_homebrew
-      chmod +x ~/uninstall_homebrew
-      ~/uninstall_homebrew -fq
-      rm ~/uninstall_homebrew
-    displayName: Remove homebrew
-
   - bash: |
       echo "##vso[task.prependpath]$CONDA/bin"
       sudo chown -R $USER $CONDA
@@ -29,6 +21,13 @@ jobs:
       conda install -n base -c conda-forge --quiet --yes conda-forge-ci-setup=3 conda-build
     displayName: 'Add conda-forge-ci-setup=3'
 
+  - script: |
+      echo "Removing homebrew from Azure to avoid conflicts."
+      /usr/bin/sudo mangle_homebrew
+      /usr/bin/sudo -k
+    displayName: Mangle homebrew
+
+
   {{ macros.azure_checkout_arrow() }}
 
   - script: |
@@ -76,5 +75,9 @@ jobs:
     workingDirectory: arrow/dev/tasks/conda-recipes
     displayName: Build recipes
 
+  - script: |
+     sudo mv /usr/local/conda_mangled/* /usr/local/
+    displayName: Unmangle homebrew
+
   {{ macros.azure_upload_releases("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}
   {{ macros.azure_upload_anaconda("arrow/dev/tasks/conda-recipes/build_artifacts/osx-*/*.tar.bz2") }}

From 6ba898742d9226415c3c52d4ac5b8fb1737cfdba Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 10 Jun 2021 18:05:52 +0200
Subject: [PATCH 392/719] ARROW-12937: [C++][Python] Allow setting default
 metadata for new S3 files

Closes #10504 from pitrou/ARROW-12937-s3-default-metadata

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc        | 25 ++++++-----
 cpp/src/arrow/filesystem/s3fs.h         |  7 ++-
 cpp/src/arrow/filesystem/s3fs_test.cc   | 60 ++++++++++++++++++-------
 python/pyarrow/_s3fs.pyx                | 20 ++++++---
 python/pyarrow/includes/libarrow_fs.pxd |  1 +
 python/pyarrow/tests/test_fs.py         |  8 ++++
 6 files changed, 88 insertions(+), 33 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index e60cb119e29..effafad8c25 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -791,15 +791,14 @@ Status SetObjectMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata
                          ObjectRequest* req) {
   static auto setters = ObjectMetadataSetter<ObjectRequest>::GetSetters();
 
-  if (metadata) {
-    const auto& keys = metadata->keys();
-    const auto& values = metadata->values();
-
-    for (size_t i = 0; i < keys.size(); ++i) {
-      auto it = setters.find(keys[i]);
-      if (it != setters.end()) {
-        RETURN_NOT_OK(it->second(values[i], req));
-      }
+  DCHECK_NE(metadata, nullptr);
+  const auto& keys = metadata->keys();
+  const auto& values = metadata->values();
+
+  for (size_t i = 0; i < keys.size(); ++i) {
+    auto it = setters.find(keys[i]);
+    if (it != setters.end()) {
+      RETURN_NOT_OK(it->second(values[i], req));
     }
   }
   return Status::OK();
@@ -979,6 +978,7 @@ class ObjectOutputStream final : public io::OutputStream {
         io_context_(io_context),
         path_(path),
         metadata_(metadata),
+        default_metadata_(options.default_metadata),
         background_writes_(options.background_writes) {}
 
   ~ObjectOutputStream() override {
@@ -992,7 +992,11 @@ class ObjectOutputStream final : public io::OutputStream {
     S3Model::CreateMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
-    RETURN_NOT_OK(SetObjectMetadata(metadata_, &req));
+    if (metadata_ && metadata_->size() != 0) {
+      RETURN_NOT_OK(SetObjectMetadata(metadata_, &req));
+    } else if (default_metadata_ && default_metadata_->size() != 0) {
+      RETURN_NOT_OK(SetObjectMetadata(default_metadata_, &req));
+    }
 
     auto outcome = client_->CreateMultipartUpload(req);
     if (!outcome.IsSuccess()) {
@@ -1263,6 +1267,7 @@ class ObjectOutputStream final : public io::OutputStream {
   const io::IOContext io_context_;
   const S3Path path_;
   const std::shared_ptr<const KeyValueMetadata> metadata_;
+  const std::shared_ptr<const KeyValueMetadata> default_metadata_;
   const bool background_writes_;
 
   Aws::String upload_id_;
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index d04eaa8ba16..1aad4dd7040 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -71,7 +71,7 @@ enum class S3CredentialsKind : int8_t {
 
 /// Options for the S3FileSystem implementation.
 struct ARROW_EXPORT S3Options {
-  /// AWS region to connect to.
+  /// \brief AWS region to connect to.
   ///
   /// If unset, the AWS SDK will choose a default value.  The exact algorithm
   /// depends on the SDK version.  Before 1.8, the default is hardcoded
@@ -107,6 +107,11 @@ struct ARROW_EXPORT S3Options {
   /// Whether OutputStream writes will be issued in the background, without blocking.
   bool background_writes = true;
 
+  /// \brief Default metadata for OpenOutputStream.
+  ///
+  /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
+  std::shared_ptr<const KeyValueMetadata> default_metadata;
+
   /// Configure with the default AWS credentials provider chain.
   void ConfigureDefaultCredentials();
 
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 4f83bdea2dd..966f12d855e 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -410,6 +410,18 @@ class TestS3FS : public S3TestMixin {
     ASSERT_OK_AND_ASSIGN(fs_, S3FileSystem::Make(options_));
   }
 
+  template <typename Matcher>
+  void AssertMetadataRoundtrip(const std::string& path,
+                               const std::shared_ptr<const KeyValueMetadata>& metadata,
+                               Matcher&& matcher) {
+    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, metadata));
+    ASSERT_OK(output->Close());
+    ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+    ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
+    ASSERT_NE(got_metadata, nullptr);
+    ASSERT_THAT(got_metadata->sorted_pairs(), matcher);
+  }
+
   void TestOpenOutputStream() {
     std::shared_ptr<io::OutputStream> stream;
 
@@ -453,23 +465,6 @@ class TestS3FS : public S3TestMixin {
     ASSERT_OK(stream->Close());
     AssertObjectContents(client_.get(), "bucket", "newfile4", expected);
 
-    // Create new file with metadata
-    auto metadata = KeyValueMetadata::Make({"Content-Type", "Expires"},
-                                           {"x-arrow/test6", "2016-02-05T20:08:35Z"});
-    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile5", metadata));
-    ASSERT_OK(stream->Close());
-    ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream("bucket/newfile5"));
-    ASSERT_OK_AND_ASSIGN(auto got_metadata, input->ReadMetadata());
-    ASSERT_NE(got_metadata, nullptr);
-    ASSERT_THAT(got_metadata->sorted_pairs(),
-                testing::IsSupersetOf(metadata->sorted_pairs()));
-
-    // Create new file with valid canned ACL
-    // XXX: no easy way of testing the ACL actually gets set
-    metadata = KeyValueMetadata::Make({"ACL"}, {"authenticated-read"});
-    ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile6", metadata));
-    ASSERT_OK(stream->Close());
-
     // Overwrite
     ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/newfile1"));
     ASSERT_OK(stream->Write("overwritten data"));
@@ -939,6 +934,37 @@ TEST_F(TestS3FS, OpenOutputStreamDestructorSyncWrite) {
   TestOpenOutputStreamDestructor();
 }
 
+TEST_F(TestS3FS, OpenOutputStreamMetadata) {
+  std::shared_ptr<io::OutputStream> stream;
+
+  // Create new file with explicit metadata
+  auto metadata = KeyValueMetadata::Make({"Content-Type", "Expires"},
+                                         {"x-arrow/test6", "2016-02-05T20:08:35Z"});
+  AssertMetadataRoundtrip("bucket/mdfile1", metadata,
+                          testing::IsSupersetOf(metadata->sorted_pairs()));
+
+  // Create new file with valid canned ACL
+  // XXX: no easy way of testing the ACL actually gets set
+  metadata = KeyValueMetadata::Make({"ACL"}, {"authenticated-read"});
+  AssertMetadataRoundtrip("bucket/mdfile2", metadata, testing::_);
+
+  // Create new file with default metadata
+  auto default_metadata = KeyValueMetadata::Make({"Content-Type", "Content-Language"},
+                                                 {"image/png", "fr_FR"});
+  options_.default_metadata = default_metadata;
+  MakeFileSystem();
+  // (null, then empty metadata argument)
+  AssertMetadataRoundtrip("bucket/mdfile3", nullptr,
+                          testing::IsSupersetOf(default_metadata->sorted_pairs()));
+  AssertMetadataRoundtrip("bucket/mdfile4", KeyValueMetadata::Make({}, {}),
+                          testing::IsSupersetOf(default_metadata->sorted_pairs()));
+
+  // Create new file with explicit metadata replacing default metadata
+  metadata = KeyValueMetadata::Make({"Content-Type"}, {"x-arrow/test6"});
+  AssertMetadataRoundtrip("bucket/mdfile5", metadata,
+                          testing::IsSupersetOf(metadata->sorted_pairs()));
+}
+
 TEST_F(TestS3FS, FileSystemFromUri) {
   std::stringstream ss;
   ss << "s3://" << minio_.access_key() << ":" << minio_.secret_key()
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 20c3e6478fa..a45be28d726 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -17,8 +17,9 @@
 
 # cython: language_level = 3
 
-from pyarrow.lib cimport check_status
-from pyarrow.lib import frombytes, tobytes
+from pyarrow.lib cimport (check_status, pyarrow_wrap_metadata,
+                          pyarrow_unwrap_metadata)
+from pyarrow.lib import frombytes, tobytes, KeyValueMetadata
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.includes.libarrow_fs cimport *
@@ -92,8 +93,11 @@ cdef class S3FileSystem(FileSystem):
     endpoint_override: str, default None
         Override region with a connect string such as "localhost:9000"
     background_writes: boolean, default True
-        Whether OutputStream writes will be issued in the background, without
+        Whether file writes will be issued in the background, without
         blocking.
+    default_metadata: mapping or KeyValueMetadata, default None
+        Default metadata for open_output_stream.  This will be ignored if
+        non-empty metadata is passed to open_output_stream.
     proxy_options: dict or str, default None
         If a proxy is used, provide the options here. Supported options are:
         'scheme' (str: 'http' or 'https'; required), 'host' (str; required),
@@ -115,8 +119,8 @@ cdef class S3FileSystem(FileSystem):
     def __init__(self, *, access_key=None, secret_key=None, session_token=None,
                  bint anonymous=False, region=None, scheme=None,
                  endpoint_override=None, bint background_writes=True,
-                 role_arn=None, session_name=None, external_id=None,
-                 load_frequency=900, proxy_options=None):
+                 default_metadata=None, role_arn=None, session_name=None,
+                 external_id=None, load_frequency=900, proxy_options=None):
         cdef:
             CS3Options options
             shared_ptr[CS3FileSystem] wrapped
@@ -185,6 +189,11 @@ cdef class S3FileSystem(FileSystem):
             options.endpoint_override = tobytes(endpoint_override)
         if background_writes is not None:
             options.background_writes = background_writes
+        if default_metadata is not None:
+            if not isinstance(default_metadata, KeyValueMetadata):
+                default_metadata = KeyValueMetadata(default_metadata)
+            options.default_metadata = pyarrow_unwrap_metadata(
+                default_metadata)
 
         if proxy_options is not None:
             if isinstance(proxy_options, dict):
@@ -247,6 +256,7 @@ cdef class S3FileSystem(FileSystem):
                 external_id=frombytes(opts.external_id),
                 load_frequency=opts.load_frequency,
                 background_writes=opts.background_writes,
+                default_metadata=pyarrow_wrap_metadata(opts.default_metadata),
                 proxy_options={'scheme': frombytes(opts.proxy_options.scheme),
                                'host': frombytes(opts.proxy_options.host),
                                'port': opts.proxy_options.port,
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 33f61e7766e..52ef97e5757 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -150,6 +150,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_string endpoint_override
         c_string scheme
         c_bool background_writes
+        shared_ptr[const CKeyValueMetadata] default_metadata
         c_string role_arn
         c_string session_name
         c_string external_id
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index f7baeb6c396..0c65aac3f56 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1058,6 +1058,12 @@ def test_s3_options():
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
+    fs = S3FileSystem(background_writes=True,
+                      default_metadata={"ACL": "authenticated-read",
+                                        "Content-Type": "text/plain"})
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
     with pytest.raises(ValueError):
         S3FileSystem(access_key='access')
     with pytest.raises(ValueError):
@@ -1076,6 +1082,8 @@ def test_s3_options():
         )
     with pytest.raises(ValueError):
         S3FileSystem(role_arn="arn", anonymous=True)
+    with pytest.raises(ValueError):
+        S3FileSystem(default_metadata=["foo", "bar"])
 
 
 @pytest.mark.s3

From 49f4b18fe9a6e075b9378971343b3df66a50e473 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 10 Jun 2021 18:47:13 +0200
Subject: [PATCH 393/719] ARROW-12948: [C++][Python] Add slice_replace kernel

This adds a slice_replace kernel mimicking Pandas's str.slice_replace. There are both ascii and UTF8 variants, indexing respectively with bytes and codepoints. The ascii variant also works on binary arrays.

Closes #10494 from lidavidm/arrow-12948

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.h            |  12 ++
 .../arrow/compute/kernels/scalar_string.cc    | 160 ++++++++++++++++++
 .../compute/kernels/scalar_string_test.cc     | 100 +++++++++++
 docs/source/cpp/compute.rst                   |  65 ++++---
 docs/source/python/api/compute.rst            |   6 +
 python/pyarrow/_compute.pyx                   |  18 ++
 python/pyarrow/compute.py                     |   1 +
 python/pyarrow/includes/libarrow.pxd          |   7 +
 python/pyarrow/tests/test_compute.py          |  28 +++
 9 files changed, 369 insertions(+), 28 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 190696f6ed5..6e9a9340f2c 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -77,6 +77,18 @@ struct ARROW_EXPORT SplitPatternOptions : public SplitOptions {
   std::string pattern;
 };
 
+struct ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
+  explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement)
+      : start(start), stop(stop), replacement(std::move(replacement)) {}
+
+  /// Index to start slicing at
+  int64_t start;
+  /// Index to stop slicing at
+  int64_t stop;
+  /// String to replace the slice with
+  std::string replacement;
+};
+
 struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
   explicit ReplaceSubstringOptions(std::string pattern, std::string replacement,
                                    int64_t max_replacements = -1)
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 8b740f3742a..b6c1b8f6261 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -2288,6 +2288,165 @@ const FunctionDoc replace_substring_regex_doc(
     {"strings"}, "ReplaceSubstringOptions");
 #endif
 
+// ----------------------------------------------------------------------
+// Replace slice
+
+struct ReplaceSliceTransformBase : public StringTransformBase {
+  using State = OptionsWrapper<ReplaceSliceOptions>;
+
+  const ReplaceSliceOptions* options;
+
+  explicit ReplaceSliceTransformBase(const ReplaceSliceOptions& options)
+      : options{&options} {}
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    return ninputs * options->replacement.size() + input_ncodeunits;
+  }
+};
+
+struct BinaryReplaceSliceTransform : ReplaceSliceTransformBase {
+  using ReplaceSliceTransformBase::ReplaceSliceTransformBase;
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const auto& opts = *options;
+    int64_t before_slice = 0;
+    int64_t after_slice = 0;
+    uint8_t* output_start = output;
+
+    if (opts.start >= 0) {
+      // Count from left
+      before_slice = std::min<int64_t>(input_string_ncodeunits, opts.start);
+    } else {
+      // Count from right
+      before_slice = std::max<int64_t>(0, input_string_ncodeunits + opts.start);
+    }
+    // Mimic Pandas: if stop would be before start, treat as 0-length slice
+    if (opts.stop >= 0) {
+      // Count from left
+      after_slice =
+          std::min<int64_t>(input_string_ncodeunits, std::max(before_slice, opts.stop));
+    } else {
+      // Count from right
+      after_slice = std::max<int64_t>(before_slice, input_string_ncodeunits + opts.stop);
+    }
+    output = std::copy(input, input + before_slice, output);
+    output = std::copy(opts.replacement.begin(), opts.replacement.end(), output);
+    output = std::copy(input + after_slice, input + input_string_ncodeunits, output);
+    return output - output_start;
+  }
+};
+
+struct Utf8ReplaceSliceTransform : ReplaceSliceTransformBase {
+  using ReplaceSliceTransformBase::ReplaceSliceTransformBase;
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const auto& opts = *options;
+    const uint8_t* begin = input;
+    const uint8_t* end = input + input_string_ncodeunits;
+    const uint8_t *begin_sliced, *end_sliced;
+    uint8_t* output_start = output;
+
+    // Mimic Pandas: if stop would be before start, treat as 0-length slice
+    if (opts.start >= 0) {
+      // Count from left
+      if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &begin_sliced, opts.start)) {
+        return kTransformError;
+      }
+      if (opts.stop > options->start) {
+        // Continue counting from left
+        const int64_t length = opts.stop - options->start;
+        if (!arrow::util::UTF8AdvanceCodepoints(begin_sliced, end, &end_sliced, length)) {
+          return kTransformError;
+        }
+      } else if (opts.stop < 0) {
+        // Count from right
+        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced,
+                                                       -opts.stop)) {
+          return kTransformError;
+        }
+      } else {
+        // Zero-length slice
+        end_sliced = begin_sliced;
+      }
+    } else {
+      // Count from right
+      if (!arrow::util::UTF8AdvanceCodepointsReverse(begin, end, &begin_sliced,
+                                                     -opts.start)) {
+        return kTransformError;
+      }
+      if (opts.stop >= 0) {
+        // Restart counting from left
+        if (!arrow::util::UTF8AdvanceCodepoints(begin, end, &end_sliced, opts.stop)) {
+          return kTransformError;
+        }
+        if (end_sliced <= begin_sliced) {
+          // Zero-length slice
+          end_sliced = begin_sliced;
+        }
+      } else if ((opts.stop < 0) && (options->stop > options->start)) {
+        // Count from right
+        if (!arrow::util::UTF8AdvanceCodepointsReverse(begin_sliced, end, &end_sliced,
+                                                       -opts.stop)) {
+          return kTransformError;
+        }
+      } else {
+        // zero-length slice
+        end_sliced = begin_sliced;
+      }
+    }
+    output = std::copy(begin, begin_sliced, output);
+    output = std::copy(opts.replacement.begin(), options->replacement.end(), output);
+    output = std::copy(end_sliced, end, output);
+    return output - output_start;
+  }
+};
+
+template <typename Type>
+using BinaryReplaceSlice =
+    StringTransformExecWithState<Type, BinaryReplaceSliceTransform>;
+template <typename Type>
+using Utf8ReplaceSlice = StringTransformExecWithState<Type, Utf8ReplaceSliceTransform>;
+
+const FunctionDoc binary_replace_slice_doc(
+    "Replace a slice of a binary string with `replacement`",
+    ("For each string in `strings`, replace a slice of the string defined by `start`"
+     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, "
+     "and both are measured in bytes.\n"
+     "Null values emit null."),
+    {"strings"}, "ReplaceSliceOptions");
+
+const FunctionDoc utf8_replace_slice_doc(
+    "Replace a slice of a string with `replacement`",
+    ("For each string in `strings`, replace a slice of the string defined by `start`"
+     "and `stop` with `replacement`. `start` is inclusive and `stop` is exclusive, "
+     "and both are measured in codeunits.\n"
+     "Null values emit null."),
+    {"strings"}, "ReplaceSliceOptions");
+
+void AddReplaceSlice(FunctionRegistry* registry) {
+  {
+    auto func = std::make_shared<ScalarFunction>("binary_replace_slice", Arity::Unary(),
+                                                 &binary_replace_slice_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      DCHECK_OK(func->AddKernel({ty}, ty,
+                                GenerateTypeAgnosticVarBinaryBase<BinaryReplaceSlice>(ty),
+                                ReplaceSliceTransformBase::State::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<ScalarFunction>("utf8_replace_slice", Arity::Unary(),
+                                                 &utf8_replace_slice_doc);
+    DCHECK_OK(func->AddKernel({utf8()}, utf8(), Utf8ReplaceSlice<StringType>::Exec,
+                              ReplaceSliceTransformBase::State::Init));
+    DCHECK_OK(func->AddKernel({large_utf8()}, large_utf8(),
+                              Utf8ReplaceSlice<LargeStringType>::Exec,
+                              ReplaceSliceTransformBase::State::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+}
+
 // ----------------------------------------------------------------------
 // Extract with regex
 
@@ -3434,6 +3593,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
       MemAllocation::NO_PREALLOCATE);
   AddExtractRegex(registry);
 #endif
+  AddReplaceSlice(registry);
   AddSlice(registry);
   AddSplit(registry);
   AddStrptime(registry);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index c4b6956be2b..7d52d6aacf2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -122,6 +122,56 @@ TYPED_TEST(TestBinaryKernels, CountSubstring) {
   // TODO: case-insensitive
 }
 
+TYPED_TEST(TestBinaryKernels, AsciiReplaceSlice) {
+  ReplaceSliceOptions options{0, 1, "XX"};
+  this->CheckUnary("binary_replace_slice", "[]", this->type(), "[]", &options);
+  this->CheckUnary("binary_replace_slice", R"([null, "", "a", "ab", "abc"])",
+                   this->type(), R"([null, "XX", "XX", "XXb", "XXbc"])", &options);
+
+  ReplaceSliceOptions options_whole{0, 5, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcde", "abcdef"])", this->type(),
+                   R"([null, "XX", "XX", "XX", "XX", "XX", "XXf"])", &options_whole);
+
+  ReplaceSliceOptions options_middle{2, 4, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "aXX", "abXX", "abXX", "abXX", "abXXe"])",
+                   &options_middle);
+
+  ReplaceSliceOptions options_neg_start{-3, -2, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "XXa", "XXab", "XXbc", "aXXcd", "abXXde"])",
+                   &options_neg_start);
+
+  ReplaceSliceOptions options_neg_end{2, -2, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "aXX", "abXX", "abXXc", "abXXcd", "abXXde"])",
+                   &options_neg_end);
+
+  ReplaceSliceOptions options_neg_pos{-1, 2, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "XX", "aXX", "abXXc", "abcXXd", "abcdXXe"])",
+                   &options_neg_pos);
+
+  // Effectively the same as [2, 2)
+  ReplaceSliceOptions options_flip{2, 0, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "aXX", "abXX", "abXXc", "abXXcd", "abXXcde"])",
+                   &options_flip);
+
+  // Effectively the same as [-3, -3)
+  ReplaceSliceOptions options_neg_flip{-3, -5, "XX"};
+  this->CheckUnary("binary_replace_slice",
+                   R"([null, "", "a", "ab", "abc", "abcd", "abcde"])", this->type(),
+                   R"([null, "XX", "XXa", "XXab", "XXabc", "aXXbcd", "abXXcde"])",
+                   &options_neg_flip);
+}
+
 template <typename TestType>
 class TestStringKernels : public BaseTestStringKernels<TestType> {};
 
@@ -745,6 +795,56 @@ TYPED_TEST(TestStringKernels, SplitRegexReverse) {
 }
 #endif
 
+TYPED_TEST(TestStringKernels, Utf8ReplaceSlice) {
+  ReplaceSliceOptions options{0, 1, "χχ"};
+  this->CheckUnary("utf8_replace_slice", "[]", this->type(), "[]", &options);
+  this->CheckUnary("utf8_replace_slice", R"([null, "", "π", "πb", "πbθ"])", this->type(),
+                   R"([null, "χχ", "χχ", "χχb", "χχbθ"])", &options);
+
+  ReplaceSliceOptions options_whole{0, 5, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθde", "πbθdef"])", this->type(),
+                   R"([null, "χχ", "χχ", "χχ", "χχ", "χχ", "χχf"])", &options_whole);
+
+  ReplaceSliceOptions options_middle{2, 4, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "πχχ", "πbχχ", "πbχχ", "πbχχ", "πbχχe"])",
+                   &options_middle);
+
+  ReplaceSliceOptions options_neg_start{-3, -2, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "χχπ", "χχπb", "χχbθ", "πχχθd", "πbχχde"])",
+                   &options_neg_start);
+
+  ReplaceSliceOptions options_neg_end{2, -2, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "πχχ", "πbχχ", "πbχχθ", "πbχχθd", "πbχχde"])",
+                   &options_neg_end);
+
+  ReplaceSliceOptions options_neg_pos{-1, 2, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "χχ", "πχχ", "πbχχθ", "πbθχχd", "πbθdχχe"])",
+                   &options_neg_pos);
+
+  // Effectively the same as [2, 2)
+  ReplaceSliceOptions options_flip{2, 0, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "πχχ", "πbχχ", "πbχχθ", "πbχχθd", "πbχχθde"])",
+                   &options_flip);
+
+  // Effectively the same as [-3, -3)
+  ReplaceSliceOptions options_neg_flip{-3, -5, "χχ"};
+  this->CheckUnary("utf8_replace_slice",
+                   R"([null, "", "π", "πb", "πbθ", "πbθd", "πbθde"])", this->type(),
+                   R"([null, "χχ", "χχπ", "χχπb", "χχπbθ", "πχχbθd", "πbχχθde"])",
+                   &options_neg_flip);
+}
+
 TYPED_TEST(TestStringKernels, ReplaceSubstring) {
   ReplaceSubstringOptions options{"foo", "bazz"};
   this->CheckUnary("replace_substring", R"(["foo", "this foo that foo", null])",
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index ad2d9f8f5d2..b28e3928a74 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -451,29 +451,33 @@ The third set of functions examines string elements on a byte-per-byte basis:
 String transforms
 ~~~~~~~~~~~~~~~~~
 
-+--------------------------+------------+-------------------------+---------------------+-------------------------------------------------+
-| Function name            | Arity      | Input types             | Output type         | Notes   | Options class                         |
-+==========================+============+=========================+=====================+=========+=======================================+
-| ascii_lower              | Unary      | String-like             | String-like         | \(1)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| ascii_reverse            | Unary      | String-like             | String-like         | \(2)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| ascii_upper              | Unary      | String-like             | String-like         | \(1)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| binary_length            | Unary      | Binary- or String-like  | Int32 or Int64      | \(3)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| replace_substring        | Unary      | String-like             | String-like         | \(4)    | :struct:`ReplaceSubstringOptions`     |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| replace_substring_regex  | Unary      | String-like             | String-like         | \(5)    | :struct:`ReplaceSubstringOptions`     |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_length              | Unary      | String-like             | Int32 or Int64      | \(6)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_lower               | Unary      | String-like             | String-like         | \(7)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_reverse             | Unary      | String-like             | String-like         | \(8)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
-| utf8_upper               | Unary      | String-like             | String-like         | \(7)    |                                       |
-+--------------------------+------------+-------------------------+---------------------+---------+---------------------------------------+
++--------------------------+------------+-------------------------+------------------------+-------------------------------------------------+
+| Function name            | Arity      | Input types             | Output type            | Notes   | Options class                         |
++==========================+============+=========================+========================+=========+=======================================+
+| ascii_lower              | Unary      | String-like             | String-like            | \(1)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| ascii_reverse            | Unary      | String-like             | String-like            | \(2)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| ascii_upper              | Unary      | String-like             | String-like            | \(1)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| binary_length            | Unary      | Binary- or String-like  | Int32 or Int64         | \(3)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| binary_replace_slice     | Unary      | String-like             | Binary- or String-like | \(4)    | :struct:`ReplaceSliceOptions`         |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| replace_substring        | Unary      | String-like             | String-like            | \(5)    | :struct:`ReplaceSubstringOptions`     |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| replace_substring_regex  | Unary      | String-like             | String-like            | \(6)    | :struct:`ReplaceSubstringOptions`     |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| utf8_length              | Unary      | String-like             | Int32 or Int64         | \(7)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| utf8_lower               | Unary      | String-like             | String-like            | \(8)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| utf8_replace_slice       | Unary      | String-like             | String-like            | \(4)    | :struct:`ReplaceSliceOptions`         |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| utf8_reverse             | Unary      | String-like             | String-like            | \(9)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
+| utf8_upper               | Unary      | String-like             | String-like            | \(8)    |                                       |
++--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
 
 
 * \(1) Each ASCII character in the input is converted to lowercase or
@@ -485,26 +489,31 @@ String transforms
 * \(3) Output is the physical length in bytes of each input element.  Output
   type is Int32 for Binary / String, Int64 for LargeBinary / LargeString.
 
-* \(4) Replace non-overlapping substrings that match to
+* \(4) Replace the slice of the substring from :member:`ReplaceSliceOptions::start`
+  (inclusive) to :member:`ReplaceSliceOptions::stop` (exclusive) by
+  :member:`ReplaceSubstringOptions::replacement`. The binary kernel measures the
+  slice in bytes, while the UTF8 kernel measures the slice in codeunits.
+
+* \(5) Replace non-overlapping substrings that match to
   :member:`ReplaceSubstringOptions::pattern` by
   :member:`ReplaceSubstringOptions::replacement`. If
   :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
   maximum number of replacements made, counting from the left.
 
-* \(5) Replace non-overlapping substrings that match to the regular expression
+* \(6) Replace non-overlapping substrings that match to the regular expression
   :member:`ReplaceSubstringOptions::pattern` by
   :member:`ReplaceSubstringOptions::replacement`, using the Google RE2 library. If
   :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
   maximum number of replacements made, counting from the left. Note that if the
   pattern contains groups, backreferencing can be used.
 
-* \(6) Output is the number of characters (not bytes) of each input element.
+* \(7) Output is the number of characters (not bytes) of each input element.
   Output type is Int32 for String, Int64 for LargeString.
 
-* \(7) Each UTF8-encoded character in the input is converted to lowercase or
+* \(8) Each UTF8-encoded character in the input is converted to lowercase or
   uppercase.
 
-* \(8) Each UTF8-encoded code unit is written in reverse order to the output.
+* \(9) Each UTF8-encoded code unit is written in reverse order to the output.
   If the input is not valid UTF8, then the output is undefined (but the size of output
   buffers will be preserved).
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 1dbcb3073ca..2e37f9169a7 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -168,7 +168,13 @@ String Transforms
    ascii_lower
    ascii_reverse
    ascii_upper
+   binary_length
+   binary_replace_slice
+   replace_substring
+   replace_substring_regex
+   utf8_length
    utf8_lower
+   utf8_replace_slice
    utf8_reverse
    utf8_upper
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 8da0ea05006..104cd1bac1f 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -701,6 +701,24 @@ class TrimOptions(_TrimOptions):
         self._set_options(characters)
 
 
+cdef class _ReplaceSliceOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CReplaceSliceOptions] replace_slice_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.replace_slice_options.get()
+
+    def _set_options(self, start, stop, replacement):
+        self.replace_slice_options.reset(
+            new CReplaceSliceOptions(start, stop, tobytes(replacement))
+        )
+
+
+class ReplaceSliceOptions(_ReplaceSliceOptions):
+    def __init__(self, start, stop, replacement):
+        self._set_options(start, stop, replacement)
+
+
 cdef class _ReplaceSubstringOptions(FunctionOptions):
     cdef:
         unique_ptr[CReplaceSubstringOptions] replace_substring_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 8dc7181514c..44282369f87 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -41,6 +41,7 @@
     PartitionNthOptions,
     ProjectOptions,
     QuantileOptions,
+    ReplaceSliceOptions,
     ReplaceSubstringOptions,
     ScalarAggregateOptions,
     SetLookupOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index eefca44605c..d5ce98d9a88 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1817,6 +1817,13 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
                              c_bool reverse)
         c_string pattern
 
+    cdef cppclass CReplaceSliceOptions \
+            "arrow::compute::ReplaceSliceOptions"(CFunctionOptions):
+        CReplaceSliceOptions(int64_t start, int64_t stop, c_string replacement)
+        int64_t start
+        int64_t stop
+        c_string replacement
+
     cdef cppclass CReplaceSubstringOptions \
             "arrow::compute::ReplaceSubstringOptions"(CFunctionOptions):
         CReplaceSubstringOptions(c_string pattern, c_string replacement,
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 64d5ad0a30d..8de24c8c249 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -25,6 +25,11 @@
 
 import numpy as np
 
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
+
 import pyarrow as pa
 import pyarrow.compute as pc
 
@@ -693,6 +698,29 @@ def test_string_py_compat_boolean(function_name, variant):
             assert arrow_func(ar)[0].as_py() == getattr(c, py_name)()
 
 
+@pytest.mark.pandas
+def test_replace_slice():
+    offsets = range(-3, 4)
+
+    arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
+    series = arr.to_pandas()
+    for start in offsets:
+        for stop in offsets:
+            expected = series.str.slice_replace(start, stop, 'XX')
+            actual = pc.binary_replace_slice(
+                arr, start=start, stop=stop, replacement='XX')
+            assert actual.tolist() == expected.tolist()
+
+    arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
+    series = arr.to_pandas()
+    for start in offsets:
+        for stop in offsets:
+            expected = series.str.slice_replace(start, stop, 'XX')
+            actual = pc.utf8_replace_slice(
+                arr, start=start, stop=stop, replacement='XX')
+            assert actual.tolist() == expected.tolist()
+
+
 def test_replace_plain():
     ar = pa.array(['foo', 'food', None])
     ar = pc.replace_substring(ar, pattern='foo', replacement='bar')

From 1830d1558be8741e7412f6af30582ff457f0f34f Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 10 Jun 2021 16:02:22 -0400
Subject: [PATCH 394/719] ARROW-12952: [C++] Add count_substring_regex

This also adds the regular case-insensitive count_substring.

Closes #10471 from lidavidm/arrow-12952

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/compute/kernels/scalar_string.cc    | 103 +++++++++++--
 .../compute/kernels/scalar_string_test.cc     | 143 ++++++++++++------
 docs/source/cpp/compute.rst                   |   2 +
 docs/source/python/api/compute.rst            |   1 +
 python/pyarrow/compute.py                     |  23 ++-
 python/pyarrow/tests/test_compute.py          |  33 ++--
 6 files changed, 238 insertions(+), 67 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index b6c1b8f6261..cd054fcea0e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -980,13 +980,70 @@ struct CountSubstring {
   }
 };
 
+#ifdef ARROW_WITH_RE2
+struct CountSubstringRegex {
+  std::unique_ptr<RE2> regex_match_;
+
+  explicit CountSubstringRegex(const MatchSubstringOptions& options, bool literal = false)
+      : regex_match_(new RE2(options.pattern,
+                             RegexSubstringMatcher::MakeRE2Options(options, literal))) {}
+
+  static Result<CountSubstringRegex> Make(const MatchSubstringOptions& options,
+                                          bool literal = false) {
+    CountSubstringRegex counter(options, literal);
+    RETURN_NOT_OK(RegexStatus(*counter.regex_match_));
+    return std::move(counter);
+  }
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    OutValue count = 0;
+    re2::StringPiece input(val.data(), val.size());
+    auto last_size = input.size();
+    while (re2::RE2::FindAndConsume(&input, *regex_match_)) {
+      count++;
+      if (last_size == input.size()) {
+        // 0-length match
+        if (input.size() > 0) {
+          input.remove_prefix(1);
+        } else {
+          break;
+        }
+      }
+      last_size = input.size();
+    }
+    return count;
+  }
+};
+
+template <typename InputType>
+struct CountSubstringRegexExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    ARROW_ASSIGN_OR_RAISE(auto counter, CountSubstringRegex::Make(options));
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex>
+        kernel{std::move(counter)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+#endif
+
 template <typename InputType>
 struct CountSubstringExec {
   using OffsetType = typename TypeTraits<InputType>::OffsetType;
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
     if (options.ignore_case) {
-      return Status::NotImplemented("count_substring with ignore_case");
+#ifdef ARROW_WITH_RE2
+      ARROW_ASSIGN_OR_RAISE(auto counter,
+                            CountSubstringRegex::Make(options, /*literal=*/true));
+      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstringRegex>
+          kernel{std::move(counter)};
+      return kernel.Exec(ctx, batch, out);
+#else
+      return Status::NotImplemented("ignore_case requires RE2");
+#endif
     }
     applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, CountSubstring> kernel{
         CountSubstring(PlainSubstringMatcher(options))};
@@ -1001,21 +1058,41 @@ const FunctionDoc count_substring_doc(
      "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
     {"strings"}, "MatchSubstringOptions");
 
+#ifdef ARROW_WITH_RE2
+const FunctionDoc count_substring_regex_doc(
+    "Count occurrences of substring",
+    ("For each string in `strings`, emit the number of occurrences of the given "
+     "regex pattern.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+#endif
+
 void AddCountSubstring(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarFunction>("count_substring", Arity::Unary(),
-                                               &count_substring_doc);
-  for (const auto& ty : BaseBinaryTypes()) {
-    std::shared_ptr<DataType> offset_type;
-    if (ty->id() == Type::type::LARGE_BINARY || ty->id() == Type::type::LARGE_STRING) {
-      offset_type = int64();
-    } else {
-      offset_type = int32();
+  {
+    auto func = std::make_shared<ScalarFunction>("count_substring", Arity::Unary(),
+                                                 &count_substring_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(func->AddKernel({ty}, offset_type,
+                                GenerateTypeAgnosticVarBinaryBase<CountSubstringExec>(ty),
+                                MatchSubstringState::Init));
     }
-    DCHECK_OK(func->AddKernel({ty}, offset_type,
-                              GenerateTypeAgnosticVarBinaryBase<CountSubstringExec>(ty),
-                              MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
   }
-  DCHECK_OK(registry->AddFunction(std::move(func)));
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("count_substring_regex", Arity::Unary(),
+                                                 &count_substring_regex_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(
+          func->AddKernel({ty}, offset_type,
+                          GenerateTypeAgnosticVarBinaryBase<CountSubstringRegexExec>(ty),
+                          MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
 }
 
 // Slicing
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 7d52d6aacf2..2053dbaa971 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -80,49 +80,7 @@ TYPED_TEST(TestBinaryKernels, BinaryLength) {
                    this->offset_type(), "[3, null, 10, 0, 1]");
 }
 
-TYPED_TEST(TestBinaryKernels, FindSubstring) {
-  MatchSubstringOptions options{"ab"};
-  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
-  this->CheckUnary("find_substring", R"(["abc", "acb", "cab", null, "bac"])",
-                   this->offset_type(), "[0, -1, 1, null, -1]", &options);
-
-  MatchSubstringOptions options_repeated{"abab"};
-  this->CheckUnary("find_substring", R"(["abab", "ab", "cababc", null, "bac"])",
-                   this->offset_type(), "[0, -1, 1, null, -1]", &options_repeated);
-
-  MatchSubstringOptions options_double_char{"aab"};
-  this->CheckUnary("find_substring", R"(["aacb", "aab", "ab", "aaab"])",
-                   this->offset_type(), "[-1, 0, -1, 1]", &options_double_char);
-
-  MatchSubstringOptions options_double_char_2{"bbcaa"};
-  this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
-                   "[7]", &options_double_char_2);
-
-  MatchSubstringOptions options_empty{""};
-  this->CheckUnary("find_substring", R"(["", "a", null])", this->offset_type(),
-                   "[0, 0, null]", &options_empty);
-}
-
-TYPED_TEST(TestBinaryKernels, CountSubstring) {
-  MatchSubstringOptions options{"aba"};
-  this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
-  this->CheckUnary(
-      "count_substring",
-      R"(["", null, "ab", "aba", "baba", "ababa", "abaaba", "babacaba", "ABA"])",
-      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 0]", &options);
-
-  MatchSubstringOptions options_empty{""};
-  this->CheckUnary("count_substring", R"(["", null, "abc"])", this->offset_type(),
-                   "[1, null, 4]", &options_empty);
-
-  MatchSubstringOptions options_repeated{"aaa"};
-  this->CheckUnary("count_substring", R"(["", "aaaa", "aaaaa", "aaaaaa", "aaá"])",
-                   this->offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
-
-  // TODO: case-insensitive
-}
-
-TYPED_TEST(TestBinaryKernels, AsciiReplaceSlice) {
+TYPED_TEST(TestBinaryKernels, BinaryReplaceSlice) {
   ReplaceSliceOptions options{0, 1, "XX"};
   this->CheckUnary("binary_replace_slice", "[]", this->type(), "[]", &options);
   this->CheckUnary("binary_replace_slice", R"([null, "", "a", "ab", "abc"])",
@@ -172,6 +130,105 @@ TYPED_TEST(TestBinaryKernels, AsciiReplaceSlice) {
                    &options_neg_flip);
 }
 
+TYPED_TEST(TestBinaryKernels, FindSubstring) {
+  MatchSubstringOptions options{"ab"};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring", R"(["abc", "acb", "cab", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options);
+
+  MatchSubstringOptions options_repeated{"abab"};
+  this->CheckUnary("find_substring", R"(["abab", "ab", "cababc", null, "bac"])",
+                   this->offset_type(), "[0, -1, 1, null, -1]", &options_repeated);
+
+  MatchSubstringOptions options_double_char{"aab"};
+  this->CheckUnary("find_substring", R"(["aacb", "aab", "ab", "aaab"])",
+                   this->offset_type(), "[-1, 0, -1, 1]", &options_double_char);
+
+  MatchSubstringOptions options_double_char_2{"bbcaa"};
+  this->CheckUnary("find_substring", R"(["abcbaabbbcaabccabaab"])", this->offset_type(),
+                   "[7]", &options_double_char_2);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("find_substring", R"(["", "a", null])", this->offset_type(),
+                   "[0, 0, null]", &options_empty);
+}
+
+TYPED_TEST(TestBinaryKernels, CountSubstring) {
+  MatchSubstringOptions options{"aba"};
+  this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aba", "baba", "ababa", "abaaba", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 0]", &options);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("count_substring", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+
+  MatchSubstringOptions options_repeated{"aaa"};
+  this->CheckUnary("count_substring", R"(["", "aaaa", "aaaaa", "aaaaaa", "aaá"])",
+                   this->offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
+}
+
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestBinaryKernels, CountSubstringRegex) {
+  MatchSubstringOptions options{"aba"};
+  this->CheckUnary("count_substring_regex", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aba", "baba", "ababa", "abaaba", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 0]", &options);
+
+  MatchSubstringOptions options_empty{""};
+  this->CheckUnary("count_substring_regex", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+
+  MatchSubstringOptions options_as{"a+"};
+  this->CheckUnary("count_substring_regex", R"(["", "bacaaadaaaa", "c", "AAA"])",
+                   this->offset_type(), "[0, 3, 0, 0]", &options_as);
+
+  MatchSubstringOptions options_empty_match{"a*"};
+  this->CheckUnary("count_substring_regex", R"(["", "bacaaadaaaa", "c", "AAA"])",
+                   // 7 is because it matches at |b|a|c|aaa|d|aaaa|
+                   this->offset_type(), "[1, 7, 2, 4]", &options_empty_match);
+
+  MatchSubstringOptions options_repeated{"aaa"};
+  this->CheckUnary("count_substring", R"(["", "aaaa", "aaaaa", "aaaaaa", "aaá"])",
+                   this->offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
+}
+
+TYPED_TEST(TestBinaryKernels, CountSubstringIgnoreCase) {
+  MatchSubstringOptions options{"aba", /*ignore_case=*/true};
+  this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary(
+      "count_substring",
+      R"(["", null, "ab", "aBa", "bAbA", "aBaBa", "abaAbA", "babacaba", "ABA"])",
+      this->offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 1]", &options);
+
+  MatchSubstringOptions options_empty{"", /*ignore_case=*/true};
+  this->CheckUnary("count_substring", R"(["", null, "abc"])", this->offset_type(),
+                   "[1, null, 4]", &options_empty);
+}
+
+TYPED_TEST(TestBinaryKernels, CountSubstringRegexIgnoreCase) {
+  MatchSubstringOptions options_as{"a+", /*ignore_case=*/true};
+  this->CheckUnary("count_substring_regex", R"(["", "bacAaAdaAaA", "c", "AAA"])",
+                   this->offset_type(), "[0, 3, 0, 1]", &options_as);
+
+  MatchSubstringOptions options_empty_match{"a*", /*ignore_case=*/true};
+  this->CheckUnary("count_substring_regex", R"(["", "bacAaAdaAaA", "c", "AAA"])",
+                   this->offset_type(), "[1, 7, 2, 2]", &options_empty_match);
+}
+#else
+TYPED_TEST(TestBinaryKernels, CountSubstringIgnoreCase) {
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  MatchSubstringOptions options{"a", /*ignore_case=*/true};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("count_substring", {input}, &options));
+}
+#endif
+
 template <typename TestType>
 class TestStringKernels : public BaseTestStringKernels<TestType> {};
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index b28e3928a74..91ee6bdf599 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -572,6 +572,8 @@ Containment tests
 +===========================+============+====================================+====================+========================================+
 | count_substring           | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| count_substring_regex     | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
 | ends_with                 | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
 | find_substring            | Unary      | String-like                        | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 2e37f9169a7..dd722e44f05 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -185,6 +185,7 @@ Containment tests
    :toctree: ../generated/
 
    count_substring
+   count_substring_regex
    ends_with
    find_substring
    index_in
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 44282369f87..b8bd9e65f17 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -292,7 +292,7 @@ def cast(arr, target_type, safe=True):
     return call_function("cast", [arr], options)
 
 
-def count_substring(array, pattern):
+def count_substring(array, pattern, *, ignore_case=False):
     """
     Count the occurrences of substring *pattern* in each value of a
     string array.
@@ -308,7 +308,26 @@ def count_substring(array, pattern):
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("count_substring", [array],
-                         MatchSubstringOptions(pattern))
+                         MatchSubstringOptions(pattern, ignore_case))
+
+
+def count_substring_regex(array, pattern, *, ignore_case=False):
+    """
+    Count the non-overlapping matches of regex *pattern* in each value
+    of a string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        pattern to search for exact matches
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("count_substring_regex", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
 def find_substring(array, pattern):
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 8de24c8c249..1ed582db831 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -291,16 +291,31 @@ def test_variance():
 
 
 def test_count_substring():
-    arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None])
-    result = pc.count_substring(arr, "ab")
-    expected = pa.array([1, 1, 2, 0, 0, None], type=pa.int32())
-    assert expected.equals(result)
+    for (ty, offset) in [(pa.string(), pa.int32()),
+                         (pa.large_string(), pa.int64())]:
+        arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None], type=ty)
 
-    arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None],
-                   type=pa.large_string())
-    result = pc.count_substring(arr, "ab")
-    expected = pa.array([1, 1, 2, 0, 0, None], type=pa.int64())
-    assert expected.equals(result)
+        result = pc.count_substring(arr, "ab")
+        expected = pa.array([1, 1, 2, 0, 0, None], type=offset)
+        assert expected.equals(result)
+
+        result = pc.count_substring(arr, "ab", ignore_case=True)
+        expected = pa.array([1, 1, 2, 0, 1, None], type=offset)
+        assert expected.equals(result)
+
+
+def test_count_substring_regex():
+    for (ty, offset) in [(pa.string(), pa.int32()),
+                         (pa.large_string(), pa.int64())]:
+        arr = pa.array(["ab", "cab", "baAacaa", "ba", "AB", None], type=ty)
+
+        result = pc.count_substring_regex(arr, "a+")
+        expected = pa.array([1, 1, 3, 1, 0, None], type=offset)
+        assert expected.equals(result)
+
+        result = pc.count_substring_regex(arr, "a+", ignore_case=True)
+        expected = pa.array([1, 1, 2, 1, 1, None], type=offset)
+        assert expected.equals(result)
 
 
 def test_find_substring():

From f1600538634d0f49f76d2070a7b08c4e638f24d3 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 10 Jun 2021 19:12:29 -0500
Subject: [PATCH 395/719] ARROW-13041: [C++] Ensure unary kernels
 zero-initialize data behind null entries

This avoids unwillingly leaking private data in the output of unary kernels such as "negate_checked".

Closes #10508 from pitrou/ARROW-13041-unary-kernel-null

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 cpp/src/arrow/compute/kernels/codegen_internal.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 913c4dacf56..891f90a97d4 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <utility>
@@ -630,7 +631,7 @@ struct ScalarUnaryNotNullStateful {
           },
           [&]() {
             // null
-            ++out_data;
+            *out_data++ = OutValue{};
           });
       return st;
     }
@@ -700,7 +701,11 @@ struct ScalarUnaryNotNullStateful {
             functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)
                 .ToBytes(out_data++->data());
           },
-          [&]() { ++out_data; });
+          [&]() {
+            // null
+            std::memset(out_data, 0, sizeof(*out_data));
+            ++out_data;
+          });
       return st;
     }
   };

From 6b15e1aa1c270f590c3f680b5ac4063e9e57e62e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 10 Jun 2021 19:15:35 -0500
Subject: [PATCH 396/719] ARROW-13039: [R] Fix error message handling

`cpp11::stop` exposes a printf()-style interface.  By passing the error message
as the first argument, any `%` character in the message would be mistakenly
interpreted as a format code and would trigger reading additional data from the C stack.

Closes #10506 from pitrou/ARROW-13039-r-error-formatting

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/scripts/r_valgrind.sh     | 4 +++-
 dev/tasks/tasks.yml          | 1 +
 docker-compose.yml           | 1 +
 r/inst/build_arrow_static.sh | 2 +-
 r/src/arrow_types.h          | 6 ++++--
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh
index 43f8c26739a..1db526eaf86 100755
--- a/ci/scripts/r_valgrind.sh
+++ b/ci/scripts/r_valgrind.sh
@@ -22,6 +22,8 @@ set -ex
 
 source_dir=${1}/r
 
+export CMAKE_BUILD_TYPE=RelWithDebInfo
+
 ${R_BIN} CMD INSTALL ${source_dir}
 pushd ${source_dir}/tests
 
@@ -42,4 +44,4 @@ fi
 # We might also considering using the greps that LibthGBM uses:
 # https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85
 
-popd
\ No newline at end of file
+popd
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 1720b9316fd..665f27b4285 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -844,6 +844,7 @@ tasks:
     template: docker-tests/azure.linux.yml
     params:
       env:
+        ARROW_R_DEV: "TRUE"
         UBUNTU: 18.04
       run: ubuntu-r-valgrind
 
diff --git a/docker-compose.yml b/docker-compose.yml
index a0605c23881..1133bfa3b29 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1089,6 +1089,7 @@ services:
         r_bin: RDvalgrind
     environment:
       <<: *ccache
+      ARROW_R_DEV: ${ARROW_R_DEV}
       # AVX512 not supported by Valgrind (similar to ARROW-9851) some runners support AVX512 and some do not
       # so some build might pass without this setting, but we want to ensure that we stay to AVX2 regardless of runner.
       EXTRA_CMAKE_FLAGS: "-DARROW_RUNTIME_SIMD_LEVEL=AVX2"
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index 5ae615dae9b..cac0619ee61 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -69,7 +69,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
     -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-$ARROW_DEFAULT_PARAM} \
     -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-$ARROW_DEFAULT_PARAM} \
-    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DCMAKE_INSTALL_PREFIX=${DEST_DIR} \
     -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index ca4ca9519c3..09511e32e87 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -79,8 +79,10 @@ arrow::MemoryPool* gc_memory_pool();
 namespace arrow {
 
 static inline void StopIfNotOk(const Status& status) {
-  if (!(status.ok())) {
-    cpp11::stop(status.ToString());
+  if (!status.ok()) {
+    // ARROW-13039: be careful not to interpret our error message as a %-format string
+    std::string s = status.ToString();
+    cpp11::stop("%s", s.c_str());
   }
 }
 

From 4265666293796a9bc40ff0131caa7f1b3c2ff672 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 10 Jun 2021 21:03:59 -0700
Subject: [PATCH 397/719] ARROW-13031: [JS] Support arm in closure compiler on
 macOS

Includes https://github.com/google/closure-compiler-npm/pull/215

Closes #10500 from domoritz/closure-compiler-arm

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/package.json |  2 +-
 js/yarn.lock    | 56 ++++++++++++++++++++++++-------------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/js/package.json b/js/package.json
index 2a8a70bdf9d..64d3a981395 100644
--- a/js/package.json
+++ b/js/package.json
@@ -76,7 +76,7 @@
     "eslint-plugin-jest": "24.3.6",
     "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
     "glob": "7.1.7",
-    "google-closure-compiler": "20210505.0.0",
+    "google-closure-compiler": "20210601.0.0",
     "gulp": "4.0.2",
     "gulp-json-transform": "0.4.7",
     "gulp-rename": "2.0.0",
diff --git a/js/yarn.lock b/js/yarn.lock
index e8ef060e131..23854eabdd9 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -4543,40 +4543,40 @@ glogg@^1.0.0:
   dependencies:
     sparkles "^1.0.0"
 
-google-closure-compiler-java@^20210505.0.0:
-  version "20210505.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20210505.0.0.tgz#f1acdedbff960ad9c81a6b39d3d02876e33b2141"
-  integrity sha512-h+DfQAaaCLFmmtasOS8eyh0M4D+JInTJfEP4byV5R1cnMninpGGLHOG3PNgLLzkXkIO/fu4ILEcVzoGmgJEoMA==
-
-google-closure-compiler-linux@^20210505.0.0:
-  version "20210505.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20210505.0.0.tgz#87ceaa5750d447725b2dd556b01a2e36e5cbf9cd"
-  integrity sha512-ADN2kFfIR1NiR24kLYb4YkX4MeXDJaT5OfRQEkiuIdZMtd28oEkm80LxCGuC7ftKEixoMm3f9/OG01B4U+xsnA==
-
-google-closure-compiler-osx@^20210505.0.0:
-  version "20210505.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20210505.0.0.tgz#1c31cd460cb6b8357a94add25b3500436c69ce26"
-  integrity sha512-JTwdh23aD2pwRU4QZjujxp/+rGfhex3utNWEdUDRMNpUGstUK7XPCDG8jNBtUpyuRiXFnpZa90qButqRgotQBA==
-
-google-closure-compiler-windows@^20210505.0.0:
-  version "20210505.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20210505.0.0.tgz#45559acde54d1b85973c9984253c15bfa98b5cfb"
-  integrity sha512-bKTbg/f4ak72OggEMaH/7oExqOO9dS+TxwGhoovYOt/YaVR/8MDfGdxsOhqoiboiFwYysTPz8bwINjYQK6AwnA==
-
-google-closure-compiler@20210505.0.0:
-  version "20210505.0.0"
-  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20210505.0.0.tgz#8a321ac49c9d3f0df30d7e15c2adbb2b11c5dd89"
-  integrity sha512-moeYaj4S6YTdOOvjv1ZLdUld/2YXw7q1GqUUHJJd+rE/uViyesozg8yKQZWcB3tvurhb+qEvFFet8CYoeaQHng==
+google-closure-compiler-java@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20210601.0.0.tgz#88dc11b334bee6a704d9674c5143fd2e0d553517"
+  integrity sha512-bH6nIwOmp4qDWvlbXx5/DE3XA2aDGQoCpmRYZJGONY1Sy6Xfbq0ioXRHH9eBDP9hxhCJ5Sd/K89A0NZ8Nz9RJA==
+
+google-closure-compiler-linux@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20210601.0.0.tgz#6e5dd7b00b96dc1fd1ba30e3401af85558768322"
+  integrity sha512-rnEQt7zz/1P1SfPhJiHQpfCgMPrsVVyEgDs09h67xn6+LXa9L0RP+hrJDEHqSWwjDPz0BkfUUv6zkqZvp1h/lw==
+
+google-closure-compiler-osx@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20210601.0.0.tgz#e23356bc9ef6e68c2980f60a207f603767b50b21"
+  integrity sha512-A5r4s/WthR2iLMM0mxsluw8EW2AcOomC5ri/H6FjzpMq0RVEnLTgaGYdXolUAfEzH/7XtJJT2+JkYk3HSLCtrg==
+
+google-closure-compiler-windows@^20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20210601.0.0.tgz#b5400d06bbf0bbd2602ee3ae0c2bc7ebd5829692"
+  integrity sha512-6r94bPShnB0XXh9+5/qXGDHJN2PQGhF9yJPcgBZj+FAZlQGzlYkT0pkyp+loZT3lG+YRbjD28Lgo7xMcY4xgkA==
+
+google-closure-compiler@20210601.0.0:
+  version "20210601.0.0"
+  resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20210601.0.0.tgz#34597c33c9285ebd3a5364f5299f6c9ddc9fc88a"
+  integrity sha512-lzzEoG2VTB7uUjnWnMyeZMU163w69HJpM27yh8Up9Ha5McHZeESjt3NRwU8cWMbCRdY06nFbRCDIVCRcadHCiw==
   dependencies:
     chalk "2.x"
-    google-closure-compiler-java "^20210505.0.0"
+    google-closure-compiler-java "^20210601.0.0"
     minimist "1.x"
     vinyl "2.x"
     vinyl-sourcemaps-apply "^0.2.0"
   optionalDependencies:
-    google-closure-compiler-linux "^20210505.0.0"
-    google-closure-compiler-osx "^20210505.0.0"
-    google-closure-compiler-windows "^20210505.0.0"
+    google-closure-compiler-linux "^20210601.0.0"
+    google-closure-compiler-osx "^20210601.0.0"
+    google-closure-compiler-windows "^20210601.0.0"
 
 graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2, graceful-fs@^4.2.3, graceful-fs@^4.2.4:
   version "4.2.6"

From e6c0b8170bb5d1e516d544494f7797a7bc380e81 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 11 Jun 2021 14:37:04 +0900
Subject: [PATCH 398/719] ARROW-13045: [Packaging][RPM][deb] Don't install
 system utf8proc if it's old

See also:

  * #10477 30f52a202d0a2f6393366ea1e4a8e5182077c72a
  * ARROW-13002

Closes #10514 from kou/linux-packages-find-utf8proc

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/linux-packages/apache-arrow/Rakefile        | 11 +++++++++++
 .../apache-arrow/apt/ubuntu-bionic/Dockerfile         |  1 -
 .../linux-packages/apache-arrow/debian/control.in     |  4 ++--
 .../apache-arrow/debian/libarrow-dev.install          |  3 +--
 .../linux-packages/apache-arrow/yum/arrow.spec.in     |  6 ++++--
 .../apache-arrow/yum/centos-7/Dockerfile              |  1 -
 6 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile
index 8197130b403..e8de53fce4b 100644
--- a/dev/tasks/linux-packages/apache-arrow/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -116,11 +116,22 @@ class ApacheArrowPackageTask < PackageTask
     control.gsub(/@USE_SYSTEM_C_ARES@/, use_system_c_ares)
   end
 
+  def apt_prepare_debian_control_utf8proc(control, target)
+    case target
+    when /\Aubuntu-bionic/
+      use_system_utf8proc = "#"
+    else
+      use_system_utf8proc = ""
+    end
+    control.gsub(/@USE_SYSTEM_UTF8PROC@/, use_system_utf8proc)
+  end
+
   def apt_prepare_debian_control(control_in, target)
     control = control_in.dup
     control = apt_prepare_debian_control_cuda_architecture(control, target)
     control = apt_prepare_debian_control_grpc(control, target)
     control = apt_prepare_debian_control_c_ares(control, target)
+    control = apt_prepare_debian_control_utf8proc(control, target)
     control
   end
 end
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
index af5aac1ed86..b392079fbe4 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
@@ -52,7 +52,6 @@ RUN \
     libre2-dev \
     libsnappy-dev \
     libssl-dev \
-    libutf8proc-dev \
     libzstd-dev \
     llvm-10-dev \
     lsb-release \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index e50eeaff581..b20955f467e 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -24,7 +24,7 @@ Build-Depends:
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
-  libutf8proc-dev,
+@USE_SYSTEM_UTF8PROC@  libutf8proc-dev,
   libzstd-dev,
   ninja-build,
   nvidia-cuda-toolkit [!arm64],
@@ -134,7 +134,7 @@ Depends:
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
-  libutf8proc-dev,
+@USE_SYSTEM_UTF8PROC@  libutf8proc-dev,
   libzstd-dev,
 @USE_SYSTEM_GRPC@  protobuf-compiler-grpc,
   zlib1g-dev
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
index 52fbbb32d81..83ddad126f4 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
@@ -7,8 +7,7 @@ usr/lib/*/cmake/arrow/FindArrow.cmake
 usr/lib/*/cmake/arrow/FindBrotli.cmake
 usr/lib/*/cmake/arrow/FindLz4.cmake
 usr/lib/*/cmake/arrow/FindSnappy.cmake
-usr/lib/*/cmake/arrow/Findutf8proc.cmake
-usr/lib/*/cmake/arrow/Findzstd.cmake
+usr/lib/*/cmake/arrow/Find[uz]*.cmake
 usr/lib/*/cmake/arrow/arrow-config.cmake
 usr/lib/*/libarrow.a
 usr/lib/*/libarrow.so
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index eb322582ba1..28ebc660708 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -43,12 +43,14 @@
 %define use_python (!%{is_amazon_linux})
 # TODO: Enable this. This works on local but is fragile on GitHub Actions and
 # Travis CI.
-# %define use_s3 (%{_centos_ver} >= 8)
+# %define use_s3 (%{rhel} >= 8)
 %define use_s3 0
 
 %define have_rapidjson (%{rhel} == 7)
 %define have_re2 (%{rhel} >= 8)
-%define have_utf8proc (%{rhel} == 7)
+# EPEL ships utf8proc but it's old.
+# %define have_utf8proc (%{rhel} == 7)
+%define have_utf8proc 0
 
 Name:		@PACKAGE@
 Version:	@VERSION@
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index b668165758f..6856e385476 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -51,7 +51,6 @@ RUN \
     rpmdevtools \
     snappy-devel \
     tar \
-    utf8proc-devel \
     zlib-devel && \
   yum clean ${quiet} all
 

From 5526633958d24d77a8d437f71e1eb67be2a15768 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Fri, 11 Jun 2021 16:30:42 +0900
Subject: [PATCH 399/719] ARROW-13030: [CI][Go] Setup Arm64 golang CI

Closes #10515 from cyb70289/13030-go-arm-ci

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .travis.yml                                   | 12 ++++++++
 go/arrow/internal/cpu/cpu_arm64.go            |  7 +++++
 go/arrow/math/float64_arm64.go                | 25 ++++++++++++++++
 go/arrow/math/int64_arm64.go                  | 25 ++++++++++++++++
 go/arrow/math/math_arm64.go                   | 29 +++++++++++++++++++
 go/arrow/math/uint64_arm64.go                 | 25 ++++++++++++++++
 go/parquet/internal/bmi/bitmap_bmi2_arm64.go  | 24 +++++++++++++++
 .../internal/utils/bit_packing_arm64.go       | 23 +++++++++++++++
 .../internal/utils/unpack_bool_arm64.go       | 25 ++++++++++++++++
 9 files changed, 195 insertions(+)
 create mode 100644 go/arrow/internal/cpu/cpu_arm64.go
 create mode 100644 go/arrow/math/float64_arm64.go
 create mode 100644 go/arrow/math/int64_arm64.go
 create mode 100644 go/arrow/math/math_arm64.go
 create mode 100644 go/arrow/math/uint64_arm64.go
 create mode 100644 go/parquet/internal/bmi/bitmap_bmi2_arm64.go
 create mode 100644 go/parquet/internal/utils/bit_packing_arm64.go
 create mode 100644 go/parquet/internal/utils/unpack_bool_arm64.go

diff --git a/.travis.yml b/.travis.yml
index 861cc77f402..ced0405ec86 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -69,6 +69,17 @@ jobs:
         LLVM: "10"
         UBUNTU: "20.04"
 
+    - name: "Go on ARM"
+      os: linux
+      arch: arm64-graviton2
+      group: edge
+      virt: vm
+      env:
+        <<: *global_env
+        ARCH: arm64v8
+        ARROW_CI_MODULES: "GO"
+        DOCKER_IMAGE_ID: debian-go
+
     - name: "C++ on s390x"
       os: linux
       arch: s390x
@@ -117,6 +128,7 @@ jobs:
         JDK: 11
 
   allow_failures:
+    - name: "Go on ARM"
     - name: "Go on s390x"
     - name: "Java on s390x"
 
diff --git a/go/arrow/internal/cpu/cpu_arm64.go b/go/arrow/internal/cpu/cpu_arm64.go
new file mode 100644
index 00000000000..179a03e53d2
--- /dev/null
+++ b/go/arrow/internal/cpu/cpu_arm64.go
@@ -0,0 +1,7 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLineSize = 64
diff --git a/go/arrow/math/float64_arm64.go b/go/arrow/math/float64_arm64.go
new file mode 100644
index 00000000000..f60be90721d
--- /dev/null
+++ b/go/arrow/math/float64_arm64.go
@@ -0,0 +1,25 @@
+// Code generated by type_s390x.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func initFloat64Go() {
+	Float64.sum = sum_float64_go
+}
diff --git a/go/arrow/math/int64_arm64.go b/go/arrow/math/int64_arm64.go
new file mode 100644
index 00000000000..1a615a9b27d
--- /dev/null
+++ b/go/arrow/math/int64_arm64.go
@@ -0,0 +1,25 @@
+// Code generated by type_s390x.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func initInt64Go() {
+	Int64.sum = sum_int64_go
+}
diff --git a/go/arrow/math/math_arm64.go b/go/arrow/math/math_arm64.go
new file mode 100644
index 00000000000..3daeac7efaf
--- /dev/null
+++ b/go/arrow/math/math_arm64.go
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func init() {
+	initGo()
+}
+
+func initGo() {
+	initFloat64Go()
+	initInt64Go()
+	initUint64Go()
+}
diff --git a/go/arrow/math/uint64_arm64.go b/go/arrow/math/uint64_arm64.go
new file mode 100644
index 00000000000..8f7419fd484
--- /dev/null
+++ b/go/arrow/math/uint64_arm64.go
@@ -0,0 +1,25 @@
+// Code generated by type_s390x.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package math
+
+func initUint64Go() {
+	Uint64.sum = sum_uint64_go
+}
diff --git a/go/parquet/internal/bmi/bitmap_bmi2_arm64.go b/go/parquet/internal/bmi/bitmap_bmi2_arm64.go
new file mode 100644
index 00000000000..498d5452e17
--- /dev/null
+++ b/go/parquet/internal/bmi/bitmap_bmi2_arm64.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package bmi
+
+func init() {
+	funclist.extractBits = extractBitsGo
+	funclist.gtbitmap = greaterThanBitmapGo
+}
diff --git a/go/parquet/internal/utils/bit_packing_arm64.go b/go/parquet/internal/utils/bit_packing_arm64.go
new file mode 100644
index 00000000000..58f869c3f5d
--- /dev/null
+++ b/go/parquet/internal/utils/bit_packing_arm64.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+import "io"
+
+var unpack32 func(io.Reader, []uint32, int) int = unpack32Default
diff --git a/go/parquet/internal/utils/unpack_bool_arm64.go b/go/parquet/internal/utils/unpack_bool_arm64.go
new file mode 100644
index 00000000000..d833c2b9d62
--- /dev/null
+++ b/go/parquet/internal/utils/unpack_bool_arm64.go
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !noasm
+
+package utils
+
+// BytesToBools when built with the noasm tag will direct to the pure go implementation
+// for converting a bitmap to a slice of bools
+func BytesToBools(in []byte, out []bool) {
+	bytesToBoolsGo(in, out)
+}

From 08781a319b0b333891caadb25dd692a1552bcd3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 11 Jun 2021 12:40:17 +0200
Subject: [PATCH 400/719] ARROW-12801: [CI][Packaging][Java] Include all
 modules in script that generate Arrow jars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cc @anthonylouisbsb

Closes #10411 from kszucs/ARROW-12801

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/java_jni_build.sh   | 35 ++++++------------------------
 dev/tasks/java-jars/github.yml | 19 ++++++++++++-----
 dev/tasks/tasks.yml            | 39 ++++++++++++++++++++++++++++++++--
 3 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh
index 638d9d11d48..b4ae48f3d9a 100755
--- a/ci/scripts/java_jni_build.sh
+++ b/ci/scripts/java_jni_build.sh
@@ -20,39 +20,18 @@
 set -e
 
 arrow_dir=${1}
-cpp_build_dir=${2}
-java_dir=${arrow_dir}/java
+cpp_lib_dir=${2}
+java_dist_dir=${3}
 
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
 
-pushd $java_dir
+pushd ${arrow_dir}/java
 
 # build the entire project
-mvn clean install -P arrow-jni -Darrow.cpp.build.dir=$cpp_build_dir
+mvn clean install -P arrow-jni -Darrow.cpp.build.dir=$cpp_lib_dir
 
-MODULES=(
-  adapter/avro
-  adapter/jdbc
-  adapter/orc
-  algorithm
-  compression
-  dataset
-  flight/flight-core
-  flight/flight-grpc
-  format
-  gandiva
-  memory/memory-core
-  memory/memory-netty
-  memory/memory-unsafe
-  performance
-  plasma
-  tools
-  vector
-)
-
-# copy all jars to distribution folder, excluding the unit tests
-for module in "${MODULES[@]}"; do
-  find $module/target/ -name "*.jar" -not -name "*tests*" -not -name "*benchmarks*" -exec cp  {} $cpp_build_dir \;
-done
+# copy all jars and pom files to the distribution folder
+find . -name "*.jar" -exec echo {} \; -exec cp {} $java_dist_dir \;
+find . -name "*.pom" -exec echo {} \; -exec cp {} $java_dist_dir \;
 
 popd
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 117aede6089..e2372c56eb0 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -57,9 +57,9 @@ jobs:
           arrow/ci/scripts/java_jni_macos_build.sh \
             $GITHUB_WORKSPACE/arrow \
             $GITHUB_WORKSPACE/arrow/cpp-build \
-            $GITHUB_WORKSPACE/arrow/dist
+            $GITHUB_WORKSPACE/arrow/java/dist
       - name: Compress into single artifact
-        run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/dist/
+        run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/java/dist/
       - name: Upload Artifacts
         uses: actions/upload-artifact@v2
         with:
@@ -84,10 +84,19 @@ jobs:
         run: |
           tar -xvzf arrow-shared-libs-macos.tar.gz
           tar -xvzf arrow-shared-libs-linux.tar.gz
-      - name: Build Jar
+      - name: Test that Shared Libraries Exist
+        run: |
+          test -f arrow/java/dist/libarrow_dataset_jni.dylib
+          test -f arrow/java/dist/libgandiva_jni.dylib
+          test -f arrow/java/dist/libarrow_orc_jni.dylib
+          test -f arrow/java/dist/libarrow_dataset_jni.so
+          test -f arrow/java/dist/libarrow_orc_jni.so
+          test -f arrow/java/dist/libgandiva_jni.so
+      - name: Build Bundled Jar
         run: |
           set -e
           arrow/ci/scripts/java_jni_build.sh \
             $GITHUB_WORKSPACE/arrow \
-            $GITHUB_WORKSPACE/arrow/dist \
-      {{ macros.github_upload_releases("arrow/dist/*.jar")|indent }}
+            $GITHUB_WORKSPACE/arrow/java/dist \
+            $GITHUB_WORKSPACE/arrow/java/dist
+      {{ macros.github_upload_releases(["arrow/java/dist/*.jar", "arrow/java/dist/*.pom"])|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 665f27b4285..8eecb882d98 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -635,10 +635,45 @@ tasks:
     ci: github
     template: java-jars/github.yml
     artifacts:
-      #TODO(kszucs): need to list the rest of the jars here
+      - arrow-algorithm-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-algorithm-{no_rc_version}-SNAPSHOT.jar
+      - arrow-avro-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-avro-{no_rc_version}-SNAPSHOT.jar
+      - arrow-compression-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-compression-{no_rc_version}-SNAPSHOT.jar
+      - arrow-dataset-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-dataset-{no_rc_version}-SNAPSHOT.jar
+      - arrow-format-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-format-{no_rc_version}-SNAPSHOT.jar
+      - arrow-gandiva-{no_rc_version}-SNAPSHOT-tests.jar
       - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
+      - arrow-jdbc-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-jdbc-{no_rc_version}-SNAPSHOT.jar
+      - arrow-memory-core-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-memory-core-{no_rc_version}-SNAPSHOT.jar
+      - arrow-memory-netty-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-memory-netty-{no_rc_version}-SNAPSHOT.jar
+      - arrow-memory-unsafe-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-memory-unsafe-{no_rc_version}-SNAPSHOT.jar
+      - arrow-orc-{no_rc_version}-SNAPSHOT-tests.jar
       - arrow-orc-{no_rc_version}-SNAPSHOT.jar
-      - arrow-dataset-{no_rc_version}-SNAPSHOT.jar
+      - arrow-performance-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-performance-{no_rc_version}-SNAPSHOT.jar
+      - arrow-plasma-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-plasma-{no_rc_version}-SNAPSHOT.jar
+      - arrow-tools-{no_rc_version}-SNAPSHOT-jar-with-dependencies.jar
+      - arrow-tools-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-tools-{no_rc_version}-SNAPSHOT.jar
+      - arrow-vector-{no_rc_version}-SNAPSHOT-shade-format-flatbuffers.jar
+      - arrow-vector-{no_rc_version}-SNAPSHOT-tests.jar
+      - arrow-vector-{no_rc_version}-SNAPSHOT.jar
+      - benchmarks.jar
+      - flight-core-{no_rc_version}-SNAPSHOT-jar-with-dependencies.jar
+      - flight-core-{no_rc_version}-SNAPSHOT-shaded-ext.jar
+      - flight-core-{no_rc_version}-SNAPSHOT-shaded.jar
+      - flight-core-{no_rc_version}-SNAPSHOT-tests.jar
+      - flight-core-{no_rc_version}-SNAPSHOT.jar
+      - flight-grpc-{no_rc_version}-SNAPSHOT-tests.jar
 
   ############################## NuGet packages ###############################
 

From 6c289dd2bbbbd62c16508c2509a920a61bde3869 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 12 Jun 2021 06:12:23 +0900
Subject: [PATCH 401/719] ARROW-13043: [GLib][Ruby] Add GArrowEqualOptions

Closes #10510 from kou/glib-equal-options

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/basic-array.cpp         | 216 +++++++++++++++++++++-
 c_glib/arrow-glib/basic-array.h           |  23 ++-
 c_glib/arrow-glib/basic-array.hpp         |   3 +
 c_glib/arrow-glib/compute.cpp             |   2 +-
 c_glib/test/test-equal-options.rb         |  96 ++++++++++
 ruby/red-arrow/lib/arrow/array.rb         |  12 ++
 ruby/red-arrow/lib/arrow/equal-options.rb |  38 ++++
 ruby/red-arrow/lib/arrow/loader.rb        |   1 +
 ruby/red-arrow/test/test-array.rb         |  34 ++++
 9 files changed, 417 insertions(+), 8 deletions(-)
 create mode 100644 c_glib/test/test-equal-options.rb
 create mode 100644 ruby/red-arrow/lib/arrow/equal-options.rb

diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index f2a924ee45c..9e8f3499dd5 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -167,6 +167,163 @@ G_BEGIN_DECLS
  * extension types.
  */
 
+typedef struct GArrowEqualOptionsPrivate_ {
+  gboolean approx;
+  arrow::EqualOptions options;
+} GArrowEqualOptionsPrivate;
+
+enum {
+  PROP_APPROX = 1,
+  PROP_NANS_EQUAL,
+  PROP_ABSOLUTE_TOLERANCE,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowEqualOptions,
+                           garrow_equal_options,
+                           G_TYPE_OBJECT)
+
+#define GARROW_EQUAL_OPTIONS_GET_PRIVATE(object) \
+  static_cast<GArrowEqualOptionsPrivate *>(      \
+    garrow_equal_options_get_instance_private(   \
+      GARROW_EQUAL_OPTIONS(object)))
+
+static void
+garrow_equal_options_finalize(GObject *object)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+  priv->options.~EqualOptions();
+  G_OBJECT_CLASS(garrow_equal_options_parent_class)->finalize(object);
+}
+
+static void
+garrow_equal_options_set_property(GObject *object,
+                                  guint prop_id,
+                                  const GValue *value,
+                                  GParamSpec *pspec)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_APPROX:
+    priv->approx = g_value_get_boolean(value);
+    break;
+  case PROP_NANS_EQUAL:
+    priv->options = priv->options.nans_equal(g_value_get_boolean(value));
+    break;
+  case PROP_ABSOLUTE_TOLERANCE:
+    priv->options = priv->options.atol(g_value_get_double(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_equal_options_get_property(GObject *object,
+                                 guint prop_id,
+                                 GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_APPROX:
+    g_value_set_boolean(value, priv->approx);
+    break;
+  case PROP_NANS_EQUAL:
+    g_value_set_boolean(value, priv->options.nans_equal());
+    break;
+  case PROP_ABSOLUTE_TOLERANCE:
+    g_value_set_double(value, priv->options.atol());
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_equal_options_init(GArrowEqualOptions *object)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
+  priv->approx = FALSE;
+  new(&priv->options) arrow::EqualOptions;
+  priv->options = arrow::EqualOptions::Defaults();
+}
+
+static void
+garrow_equal_options_class_init(GArrowEqualOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = garrow_equal_options_finalize;
+  gobject_class->set_property = garrow_equal_options_set_property;
+  gobject_class->get_property = garrow_equal_options_get_property;
+
+  auto options = arrow::EqualOptions::Defaults();
+  GParamSpec *spec;
+  /**
+   * GArrowEqualOptions:approx:
+   *
+   * Whether or not approximate comparison is used.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boolean("approx",
+                              "Approx",
+                              "Whether or not approximate comparison is used",
+                              FALSE,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_APPROX, spec);
+
+  /**
+   * GArrowEqualOptions:nans-equal:
+   *
+   * Whether or not NaNs are considered equal.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boolean("nans-equal",
+                              "NaNs equal",
+                              "Whether or not NaNs are considered equal",
+                              options.nans_equal(),
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_NANS_EQUAL, spec);
+
+  /**
+   * GArrowEqualOptions:absolute-tolerance:
+   *
+   * The absolute tolerance for approximate comparison of
+   * floating-point values.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_double("absolute-tolerance",
+                             "Absolute tolerance",
+                             "The absolute tolerance for approximate comparison "
+                             "of floating-point values",
+                             -G_MAXDOUBLE,
+                             G_MAXDOUBLE,
+                             options.atol(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_ABSOLUTE_TOLERANCE, spec);
+}
+
+/**
+ * garrow_equal_options_new:
+ *
+ * Returns: A newly created #GArrowEqualOptions.
+ *
+ * Since: 5.0.0
+ */
+GArrowEqualOptions *
+garrow_equal_options_new(void)
+{
+  auto equal_options = g_object_new(GARROW_TYPE_EQUAL_OPTIONS, NULL);
+  return GARROW_EQUAL_OPTIONS(equal_options);
+}
+
+
 typedef struct GArrowArrayPrivate_ {
   std::shared_ptr<arrow::Array> array;
   GArrowDataType *value_data_type;
@@ -396,10 +553,39 @@ garrow_array_class_init(GArrowArrayClass *klass)
  */
 gboolean
 garrow_array_equal(GArrowArray *array, GArrowArray *other_array)
+{
+  return garrow_array_equal_options(array, other_array, NULL);
+}
+
+/**
+ * garrow_array_equal_options:
+ * @array: A #GArrowArray.
+ * @other_array: A #GArrowArray to be compared.
+ * @options: (nullable): A #GArrowEqualOptions to custom how to compare.
+ *
+ * Returns: %TRUE if both of them have the same data, %FALSE
+ *   otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_array_equal_options(GArrowArray *array,
+                           GArrowArray *other_array,
+                           GArrowEqualOptions *options)
 {
   const auto arrow_array = garrow_array_get_raw(array);
   const auto arrow_other_array = garrow_array_get_raw(other_array);
-  return arrow_array->Equals(arrow_other_array);
+  if (options) {
+    auto options_priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(options);
+    const auto arrow_options = garrow_equal_options_get_raw(options);
+    if (options_priv->approx) {
+      return arrow_array->ApproxEquals(arrow_other_array, *arrow_options);
+    } else {
+      return arrow_array->Equals(arrow_other_array, *arrow_options);
+    }
+  } else {
+    return arrow_array->Equals(arrow_other_array);
+  }
 }
 
 /**
@@ -429,6 +615,7 @@ garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array)
  * @end_index: The end index of @array to be used. The end index of
  *   @other_array is "@other_start_index + (@end_index -
  *   @start_index)".
+ * @options: (nullable): A #GArrowEqualOptions to custom how to compare.
  *
  * Returns: %TRUE if both of them have the same data in the range,
  *   %FALSE otherwise.
@@ -440,14 +627,24 @@ garrow_array_equal_range(GArrowArray *array,
                          gint64 start_index,
                          GArrowArray *other_array,
                          gint64 other_start_index,
-                         gint64 end_index)
+                         gint64 end_index,
+                         GArrowEqualOptions *options)
 {
   const auto arrow_array = garrow_array_get_raw(array);
   const auto arrow_other_array = garrow_array_get_raw(other_array);
-  return arrow_array->RangeEquals(*arrow_other_array,
-                                  start_index,
-                                  end_index,
-                                  other_start_index);
+  if (options) {
+    const auto arrow_options = garrow_equal_options_get_raw(options);
+    return arrow_array->RangeEquals(arrow_other_array,
+                                    start_index,
+                                    end_index,
+                                    other_start_index,
+                                    *arrow_options);
+  } else {
+    return arrow_array->RangeEquals(arrow_other_array,
+                                    start_index,
+                                    end_index,
+                                    other_start_index);
+  }
 }
 
 /**
@@ -2848,6 +3045,13 @@ garrow_extension_array_get_storage(GArrowExtensionArray *array)
 
 G_END_DECLS
 
+arrow::EqualOptions *
+garrow_equal_options_get_raw(GArrowEqualOptions *equal_options)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(equal_options);
+  return &(priv->options);
+}
+
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
 {
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index 9835db5e67a..b2a05f9cb04 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -24,6 +24,22 @@
 
 G_BEGIN_DECLS
 
+#define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowEqualOptions,
+                         garrow_equal_options,
+                         GARROW,
+                         EQUAL_OPTIONS,
+                         GObject)
+struct _GArrowEqualOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowEqualOptions *
+garrow_equal_options_new(void);
+
+
 #define GARROW_TYPE_ARRAY (garrow_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowArray,
                          garrow_array,
@@ -37,13 +53,18 @@ struct _GArrowArrayClass
 
 gboolean       garrow_array_equal       (GArrowArray *array,
                                          GArrowArray *other_array);
+GARROW_AVAILABLE_IN_5_0
+gboolean       garrow_array_equal_options(GArrowArray *array,
+                                          GArrowArray *other_array,
+                                          GArrowEqualOptions *options);
 gboolean       garrow_array_equal_approx(GArrowArray *array,
                                          GArrowArray *other_array);
 gboolean       garrow_array_equal_range (GArrowArray *array,
                                          gint64 start_index,
                                          GArrowArray *other_array,
                                          gint64 other_start_index,
-                                         gint64 end_index);
+                                         gint64 end_index,
+                                         GArrowEqualOptions *options);
 
 gboolean       garrow_array_is_null     (GArrowArray *array,
                                          gint64 i);
diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp
index effebb01a6f..3ef1c196976 100644
--- a/c_glib/arrow-glib/basic-array.hpp
+++ b/c_glib/arrow-glib/basic-array.hpp
@@ -23,6 +23,9 @@
 
 #include <arrow-glib/basic-array.h>
 
+arrow::EqualOptions *
+garrow_equal_options_get_raw(GArrowEqualOptions *equal_options);
+
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array);
 GArrowArray *
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index d284a430b81..3a67fbaad8e 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -526,7 +526,7 @@ garrow_cast_options_class_init(GArrowCastOptionsClass *klass)
   /**
    * GArrowCastOptions:to-data-type:
    *
-   * The GArrowDataType being casted to.
+   * The #GArrowDataType being casted to.
    *
    * Since: 1.0.0
    */
diff --git a/c_glib/test/test-equal-options.rb b/c_glib/test/test-equal-options.rb
new file mode 100644
index 00000000000..4ea1979a76b
--- /dev/null
+++ b/c_glib/test/test-equal-options.rb
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestEqualOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  sub_test_case("approx") do
+    def setup
+      @options = Arrow::EqualOptions.new
+    end
+
+    def test_accessor
+      assert do
+        not @options.approx?
+      end
+      @options.approx = true
+      assert do
+        @options.approx?
+      end
+    end
+
+    def test_compare
+      array1 = build_float_array([0.01])
+      array2 = build_float_array([0.010001])
+      @options.approx = true
+      assert do
+        array1.equal_options(array2, @options)
+      end
+    end
+  end
+
+  sub_test_case("nans-equal") do
+    def setup
+      @options = Arrow::EqualOptions.new
+    end
+
+    def test_accessor
+      assert do
+        not @options.nans_equal?
+      end
+      @options.nans_equal = true
+      assert do
+        @options.nans_equal?
+      end
+    end
+
+    def test_compare
+      array1 = build_float_array([0.1, Float::NAN, 0.2])
+      array2 = build_float_array([0.1, Float::NAN, 0.2])
+      @options.nans_equal = true
+      assert do
+        array1.equal_options(array2, @options)
+      end
+    end
+  end
+
+  sub_test_case("absolute-tolerance") do
+    def setup
+      @options = Arrow::EqualOptions.new
+    end
+
+    def test_accessor
+      assert do
+        @options.absolute_tolerance < 0.001
+      end
+      @options.absolute_tolerance = 0.001
+      assert do
+        @options.absolute_tolerance >= 0.001
+      end
+    end
+
+    def test_compare
+      array1 = build_float_array([0.01])
+      array2 = build_float_array([0.0109])
+      @options.approx = true
+      @options.absolute_tolerance = 0.001
+      assert do
+        array1.equal_options(array2, @options)
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index ae6125d7daf..c6c0daaec58 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -55,6 +55,18 @@ def [](i)
       end
     end
 
+    # @param other [Arrow::Array] The array to be compared.
+    # @param options [Arrow::EqualOptions, Hash] (nil)
+    #   The options to custom how to compare.
+    #
+    # @return [Boolean]
+    #   `true` if both of them have the same data, `false` otherwise.
+    #
+    # @since 5.0.0
+    def equal_array?(other, options=nil)
+      equal_options(other, options)
+    end
+
     def each
       return to_enum(__method__) unless block_given?
 
diff --git a/ruby/red-arrow/lib/arrow/equal-options.rb b/ruby/red-arrow/lib/arrow/equal-options.rb
new file mode 100644
index 00000000000..4eb9964ad33
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/equal-options.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class EqualOptions
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |k, v|
+            setter = :"#{k}="
+            return unless options.respond_to?(setter)
+            options.__send__(setter, v)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 81a4c2045f0..ab152820cb6 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -63,6 +63,7 @@ def require_libraries
       require "arrow/dense-union-data-type"
       require "arrow/dictionary-array"
       require "arrow/dictionary-data-type"
+      require "arrow/equal-options"
       require "arrow/field"
       require "arrow/file-output-stream"
       require "arrow/fixed-size-binary-array"
diff --git a/ruby/red-arrow/test/test-array.rb b/ruby/red-arrow/test/test-array.rb
index 09355517d56..2b7112da6f3 100644
--- a/ruby/red-arrow/test/test-array.rb
+++ b/ruby/red-arrow/test/test-array.rb
@@ -64,6 +64,40 @@ def setup
       end
     end
 
+    sub_test_case("#equal_array?") do
+      test("no options") do
+        array1 = Arrow::FloatArray.new([1.1, Float::NAN])
+        array2 = Arrow::FloatArray.new([1.1, Float::NAN])
+        assert do
+          not array1.equal_array?(array2)
+        end
+      end
+
+      test("approx") do
+        array1 = Arrow::FloatArray.new([1.1])
+        array2 = Arrow::FloatArray.new([1.100001])
+        assert do
+          array1.equal_array?(array2, approx: true)
+        end
+      end
+
+      test("nans-equal") do
+        array1 = Arrow::FloatArray.new([1.1, Float::NAN])
+        array2 = Arrow::FloatArray.new([1.1, Float::NAN])
+        assert do
+          array1.equal_array?(array2, nans_equal: true)
+        end
+      end
+
+      test("absolute-tolerance") do
+        array1 = Arrow::FloatArray.new([1.1])
+        array2 = Arrow::FloatArray.new([1.101])
+        assert do
+          array1.equal_array?(array2, approx: true, absolute_tolerance: 0.01)
+        end
+      end
+    end
+
     sub_test_case("#cast") do
       test("Symbol") do
         assert_equal(Arrow::Int32Array.new([1, 2, 3]),

From 91629540c7cab5501fa15648369f579cc976cf7b Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 12 Jun 2021 14:09:50 +0900
Subject: [PATCH 402/719] ARROW-13065: [Packaging][RPM] Add missing required
 LZ4 version information

If we use old LZ4 (< 1.8.0, that is installed on old CentOS
installation), the following error is occurred:

    /lib64/libarrow.so.400: undefined symbol: LZ4F_resetDecompressionContext

Closes #10521 from kou/rpm-lz4-version

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../linux-packages/apache-arrow/yum/arrow.spec.in     | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 28ebc660708..f2c75e03f64 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -35,6 +35,11 @@
     echo 3; \
   fi)
 
+%define lz4_requirement %( \
+  if [ %{_amzn} -eq 0 ]; then \
+    echo ">= 1.8.0"; \
+  fi)
+
 %define use_boost (!%{is_amazon_linux})
 %define use_flight (%{rhel} >= 8)
 %define use_gandiva (%{rhel} >= 8 && %{_arch} != "aarch64")
@@ -82,7 +87,7 @@ BuildRequires:	gflags-devel
 BuildRequires:	git
 BuildRequires:	glog-devel
 BuildRequires:	libzstd-devel
-BuildRequires:	lz4-devel
+BuildRequires:	lz4-devel %{lz4_requirement}
 BuildRequires:	ninja-build
 BuildRequires:	openssl-devel
 BuildRequires:	pkgconfig
@@ -198,7 +203,7 @@ Requires:	gflags
 %endif
 Requires:	glog
 Requires:	libzstd
-Requires:	lz4
+Requires:	lz4 %{lz4_requirement}
 %if %{have_re2}
 Requires:	re2
 %endif
@@ -227,7 +232,7 @@ Requires:	bzip2-devel
 Requires:	c-ares-devel
 %endif
 Requires:	libzstd-devel
-Requires:	lz4-devel
+Requires:	lz4-devel %{lz4_requirement}
 Requires:	openssl-devel
 %if %{have_rapidjson}
 Requires:	rapidjson-devel

From 7339bd5b3fd133b3407687cf4ca222d6230ced78 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Sat, 12 Jun 2021 08:52:48 -0500
Subject: [PATCH 403/719] [GitHub] Add shorter GitHub repository description to
 .asf.yaml

---
 .asf.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
index 4bd5191a7a6..2c66ce5be63 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+github:
+  description: "Apache Arrow is a multi-language toolbox for accelerated data interchange and in-memory processing"
+  homepage: https://arrow.apache.org/
+
 notifications:
   commits:      commits@arrow.apache.org
   issues:       github@arrow.apache.org

From 0e9285b28d3a64ca109665fa18bbf0e6c40dff9d Mon Sep 17 00:00:00 2001
From: Dorian Kind <dorian.kind@webrepublic.com>
Date: Sun, 13 Jun 2021 07:36:37 +0900
Subject: [PATCH 404/719] ARROW-13053: [Python] Fix build issue with Homebrewed
 arrow library

When installing `apache-arrow` via Homebrew on MacOS, the headers directory `/opt/homebrew/include/arrow` is a relative symlink to the actual headers (e.g. `../Cellar/apache-arrow/4.0.1/include/arrow`).

Closes #10523 from ddoskind/arrow-13053

Authored-by: Dorian Kind <dorian.kind@webrepublic.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 python/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3ed518d0109..52220767854 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -331,7 +331,8 @@ function(bundle_arrow_dependency library_name)
 endfunction()
 
 # Always bundle includes
-file(COPY ${ARROW_INCLUDE_DIR}/arrow DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
+get_filename_component(ARROW_INCLUDE_REALPATH "${ARROW_INCLUDE_DIR}/arrow" REALPATH)
+file(COPY ${ARROW_INCLUDE_REALPATH} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
 
 if(PYARROW_BUNDLE_ARROW_CPP)
   # arrow

From 27d89a99a9386dd691988e49627a6409e6314adf Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sun, 13 Jun 2021 07:51:11 +0900
Subject: [PATCH 405/719] ARROW-12962: [GLib][Ruby] Add Arrow::Scalar

Closes #10522 from kou/glib-scalar

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/arrow-glib.h                |    1 +
 c_glib/arrow-glib/arrow-glib.hpp              |    1 +
 c_glib/arrow-glib/basic-array.cpp             |   19 +-
 c_glib/arrow-glib/basic-array.h               |    3 +
 c_glib/arrow-glib/datum.cpp                   |  151 ++
 c_glib/arrow-glib/datum.h                     |   26 +-
 c_glib/arrow-glib/datum.hpp                   |    3 +
 c_glib/arrow-glib/meson.build                 |    3 +
 c_glib/arrow-glib/scalar.cpp                  | 2382 +++++++++++++++++
 c_glib/arrow-glib/scalar.h                    |  678 +++++
 c_glib/arrow-glib/scalar.hpp                  |   37 +
 .../arrow-dataset-glib-docs.xml               |    4 +
 c_glib/doc/arrow-glib/arrow-glib-docs.xml     |    4 +
 c_glib/test/run-test.rb                       |    4 +
 c_glib/test/test-array-datum.rb               |   12 +
 c_glib/test/test-binary-scalar.rb             |   48 +
 c_glib/test/test-boolean-scalar.rb            |   52 +
 c_glib/test/test-date32-scalar.rb             |   47 +
 c_glib/test/test-date64-scalar.rb             |   47 +
 c_glib/test/test-decimal128-scalar.rb         |   48 +
 c_glib/test/test-decimal256-scalar.rb         |   48 +
 c_glib/test/test-dense-union-scalar.rb        |   52 +
 c_glib/test/test-double-scalar.rb             |   49 +
 c_glib/test/test-fixed-size-binary-scalar.rb  |   49 +
 c_glib/test/test-float-scalar.rb              |   49 +
 c_glib/test/test-function.rb                  |   19 +
 c_glib/test/test-int16-scalar.rb              |   46 +
 c_glib/test/test-int32-scalar.rb              |   46 +
 c_glib/test/test-int64-scalar.rb              |   46 +
 c_glib/test/test-int8-scalar.rb               |   46 +
 c_glib/test/test-large-binary-scalar.rb       |   48 +
 c_glib/test/test-large-string-scalar.rb       |   48 +
 c_glib/test/test-list-scalar.rb               |   50 +
 c_glib/test/test-map-scalar.rb                |   65 +
 c_glib/test/test-null-scalar.rb               |   42 +
 c_glib/test/test-scalar-datum.rb              |   69 +
 c_glib/test/test-sparse-union-scalar.rb       |   52 +
 c_glib/test/test-string-scalar.rb             |   55 +
 c_glib/test/test-struct-scalar.rb             |   55 +
 c_glib/test/test-time32-scalar.rb             |   48 +
 c_glib/test/test-time64-scalar.rb             |   48 +
 c_glib/test/test-timestamp-scalar.rb          |   48 +
 c_glib/test/test-uint16-scalar.rb             |   46 +
 c_glib/test/test-uint32-scalar.rb             |   46 +
 c_glib/test/test-uint64-scalar.rb             |   46 +
 c_glib/test/test-uint8-scalar.rb              |   46 +
 ruby/red-arrow/lib/arrow/buffer.rb            |   16 +-
 .../constructor-arguments-gc-guardable.rb     |   25 +
 ruby/red-arrow/lib/arrow/datum.rb             |   98 +
 ruby/red-arrow/lib/arrow/loader.rb            |   30 +
 ruby/red-arrow/lib/arrow/scalar.rb            |   32 +
 ruby/red-arrow/test/test-boolean-scalar.rb    |   26 +
 ruby/red-arrow/test/test-float-scalar.rb      |   46 +
 ruby/red-arrow/test/test-function.rb          |  176 ++
 54 files changed, 5267 insertions(+), 14 deletions(-)
 create mode 100644 c_glib/arrow-glib/scalar.cpp
 create mode 100644 c_glib/arrow-glib/scalar.h
 create mode 100644 c_glib/arrow-glib/scalar.hpp
 create mode 100644 c_glib/test/test-binary-scalar.rb
 create mode 100644 c_glib/test/test-boolean-scalar.rb
 create mode 100644 c_glib/test/test-date32-scalar.rb
 create mode 100644 c_glib/test/test-date64-scalar.rb
 create mode 100644 c_glib/test/test-decimal128-scalar.rb
 create mode 100644 c_glib/test/test-decimal256-scalar.rb
 create mode 100644 c_glib/test/test-dense-union-scalar.rb
 create mode 100644 c_glib/test/test-double-scalar.rb
 create mode 100644 c_glib/test/test-fixed-size-binary-scalar.rb
 create mode 100644 c_glib/test/test-float-scalar.rb
 create mode 100644 c_glib/test/test-int16-scalar.rb
 create mode 100644 c_glib/test/test-int32-scalar.rb
 create mode 100644 c_glib/test/test-int64-scalar.rb
 create mode 100644 c_glib/test/test-int8-scalar.rb
 create mode 100644 c_glib/test/test-large-binary-scalar.rb
 create mode 100644 c_glib/test/test-large-string-scalar.rb
 create mode 100644 c_glib/test/test-list-scalar.rb
 create mode 100644 c_glib/test/test-map-scalar.rb
 create mode 100644 c_glib/test/test-null-scalar.rb
 create mode 100644 c_glib/test/test-scalar-datum.rb
 create mode 100644 c_glib/test/test-sparse-union-scalar.rb
 create mode 100644 c_glib/test/test-string-scalar.rb
 create mode 100644 c_glib/test/test-struct-scalar.rb
 create mode 100644 c_glib/test/test-time32-scalar.rb
 create mode 100644 c_glib/test/test-time64-scalar.rb
 create mode 100644 c_glib/test/test-timestamp-scalar.rb
 create mode 100644 c_glib/test/test-uint16-scalar.rb
 create mode 100644 c_glib/test/test-uint32-scalar.rb
 create mode 100644 c_glib/test/test-uint64-scalar.rb
 create mode 100644 c_glib/test/test-uint8-scalar.rb
 create mode 100644 ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
 create mode 100644 ruby/red-arrow/lib/arrow/datum.rb
 create mode 100644 ruby/red-arrow/lib/arrow/scalar.rb
 create mode 100644 ruby/red-arrow/test/test-boolean-scalar.rb
 create mode 100644 ruby/red-arrow/test/test-float-scalar.rb
 create mode 100644 ruby/red-arrow/test/test-function.rb

diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h
index 74d9f9209ed..e25044ec9f0 100644
--- a/c_glib/arrow-glib/arrow-glib.h
+++ b/c_glib/arrow-glib/arrow-glib.h
@@ -33,6 +33,7 @@
 #include <arrow-glib/error.h>
 #include <arrow-glib/field.h>
 #include <arrow-glib/record-batch.h>
+#include <arrow-glib/scalar.h>
 #include <arrow-glib/schema.h>
 #include <arrow-glib/table.h>
 #include <arrow-glib/table-builder.h>
diff --git a/c_glib/arrow-glib/arrow-glib.hpp b/c_glib/arrow-glib/arrow-glib.hpp
index 4382328f1bd..6dc6d43f2f9 100644
--- a/c_glib/arrow-glib/arrow-glib.hpp
+++ b/c_glib/arrow-glib/arrow-glib.hpp
@@ -31,6 +31,7 @@
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/field.hpp>
 #include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/scalar.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
 #include <arrow-glib/table-builder.hpp>
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 9e8f3499dd5..d5b221a36b0 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -323,6 +323,21 @@ garrow_equal_options_new(void)
   return GARROW_EQUAL_OPTIONS(equal_options);
 }
 
+/**
+ * garrow_equal_options_is_approx:
+ * @options: A #GArrowEqualOptions.
+ *
+ * Returns: %TRUE if approximate comparison is used, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_equal_options_is_approx(GArrowEqualOptions *options)
+{
+  auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(options);
+  return priv->approx;
+}
+
 
 typedef struct GArrowArrayPrivate_ {
   std::shared_ptr<arrow::Array> array;
@@ -576,9 +591,9 @@ garrow_array_equal_options(GArrowArray *array,
   const auto arrow_array = garrow_array_get_raw(array);
   const auto arrow_other_array = garrow_array_get_raw(other_array);
   if (options) {
-    auto options_priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(options);
+    auto is_approx = garrow_equal_options_is_approx(options);
     const auto arrow_options = garrow_equal_options_get_raw(options);
-    if (options_priv->approx) {
+    if (is_approx) {
       return arrow_array->ApproxEquals(arrow_other_array, *arrow_options);
     } else {
       return arrow_array->Equals(arrow_other_array, *arrow_options);
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index b2a05f9cb04..b4b3de15217 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -38,6 +38,9 @@ struct _GArrowEqualOptionsClass
 GARROW_AVAILABLE_IN_5_0
 GArrowEqualOptions *
 garrow_equal_options_new(void);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_equal_options_is_approx(GArrowEqualOptions *options);
 
 
 #define GARROW_TYPE_ARRAY (garrow_array_get_type())
diff --git a/c_glib/arrow-glib/datum.cpp b/c_glib/arrow-glib/datum.cpp
index 781dc086e46..8f37719a3a9 100644
--- a/c_glib/arrow-glib/datum.cpp
+++ b/c_glib/arrow-glib/datum.cpp
@@ -21,6 +21,7 @@
 #include <arrow-glib/chunked-array.hpp>
 #include <arrow-glib/datum.hpp>
 #include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/scalar.hpp>
 #include <arrow-glib/table.hpp>
 
 G_BEGIN_DECLS
@@ -143,6 +144,37 @@ garrow_datum_is_array_like(GArrowDatum *datum)
   return arrow_datum.is_arraylike();
 }
 
+/**
+ * garrow_datum_is_scalar:
+ * @datum: A #GArrowDatum.
+ *
+ * Returns: %TRUE if the datum holds a #GArrowScalar, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_datum_is_scalar(GArrowDatum *datum)
+{
+  const auto &arrow_datum = garrow_datum_get_raw(datum);
+  return arrow_datum.is_scalar();
+}
+
+/**
+ * garrow_datum_is_value:
+ * @datum: A #GArrowDatum.
+ *
+ * Returns: %TRUE if the datum holds a #GArrowArray, #GChunkedArray or
+ *   #GArrowScalar, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_datum_is_value(GArrowDatum *datum)
+{
+  const auto &arrow_datum = garrow_datum_get_raw(datum);
+  return arrow_datum.is_value();
+}
+
 /**
  * garrow_datum_equal:
  * @datum: A #GArrowDatum.
@@ -286,6 +318,109 @@ garrow_array_datum_new(GArrowArray *value)
 }
 
 
+typedef struct GArrowScalarDatumPrivate_ {
+  GArrowScalar *value;
+} GArrowScalarDatumPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowScalarDatum,
+                           garrow_scalar_datum,
+                           GARROW_TYPE_DATUM)
+
+#define GARROW_SCALAR_DATUM_GET_PRIVATE(obj)         \
+  static_cast<GArrowScalarDatumPrivate *>(           \
+    garrow_scalar_datum_get_instance_private(        \
+      GARROW_SCALAR_DATUM(obj)))
+
+static void
+garrow_scalar_datum_dispose(GObject *object)
+{
+  auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_scalar_datum_parent_class)->dispose(object);
+}
+
+static void
+garrow_scalar_datum_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_SCALAR(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_scalar_datum_get_property(GObject *object,
+                                 guint prop_id,
+                                 GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_SCALAR_DATUM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    g_value_set_object(value, priv->value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_scalar_datum_init(GArrowScalarDatum *object)
+{
+}
+
+static void
+garrow_scalar_datum_class_init(GArrowScalarDatumClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_scalar_datum_dispose;
+  gobject_class->set_property = garrow_scalar_datum_set_property;
+  gobject_class->get_property = garrow_scalar_datum_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The scalar held by this datum",
+                             GARROW_TYPE_SCALAR,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+/**
+ * garrow_scalar_datum_new:
+ * @value: A #GArrowScalar.
+ *
+ * Returns: A newly created #GArrowScalarDatum.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalarDatum *
+garrow_scalar_datum_new(GArrowScalar *value)
+{
+  auto arrow_value = garrow_scalar_get_raw(value);
+  arrow::Datum arrow_datum(arrow_value);
+  return garrow_scalar_datum_new_raw(&arrow_datum, value);
+}
+
+
 typedef struct GArrowChunkedArrayDatumPrivate_ {
   GArrowChunkedArray *value;
 } GArrowChunkedArrayDatumPrivate;
@@ -608,6 +743,12 @@ GArrowDatum *
 garrow_datum_new_raw(arrow::Datum *arrow_datum)
 {
   switch (arrow_datum->kind()) {
+  case arrow::Datum::SCALAR:
+    {
+      auto arrow_scalar = arrow_datum->scalar();
+      auto scalar = garrow_scalar_new_raw(&arrow_scalar);
+      return GARROW_DATUM(garrow_scalar_datum_new_raw(arrow_datum, scalar));
+    }
   case arrow::Datum::ARRAY:
     {
       auto arrow_array = arrow_datum->make_array();
@@ -642,6 +783,16 @@ garrow_datum_new_raw(arrow::Datum *arrow_datum)
   }
 }
 
+GArrowScalarDatum *
+garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum,
+                            GArrowScalar *value)
+{
+  return GARROW_SCALAR_DATUM(g_object_new(GARROW_TYPE_SCALAR_DATUM,
+                                         "datum", arrow_datum,
+                                         "value", value,
+                                         NULL));
+}
+
 GArrowArrayDatum *
 garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
                            GArrowArray *value)
diff --git a/c_glib/arrow-glib/datum.h b/c_glib/arrow-glib/datum.h
index 9b1544f3271..bc7dda36911 100644
--- a/c_glib/arrow-glib/datum.h
+++ b/c_glib/arrow-glib/datum.h
@@ -22,6 +22,7 @@
 #include <arrow-glib/array.h>
 #include <arrow-glib/chunked-array.h>
 #include <arrow-glib/record-batch.h>
+#include <arrow-glib/scalar.h>
 #include <arrow-glib/table.h>
 
 G_BEGIN_DECLS
@@ -41,10 +42,12 @@ GARROW_AVAILABLE_IN_1_0
 gboolean garrow_datum_is_array(GArrowDatum *datum);
 GARROW_AVAILABLE_IN_1_0
 gboolean garrow_datum_is_array_like(GArrowDatum *datum);
-/*
-GARROW_AVAILABLE_IN_1_0
+GARROW_AVAILABLE_IN_5_0
 gboolean garrow_datum_is_scalar(GArrowDatum *datum);
-GARROW_AVAILABLE_IN_1_0
+GARROW_AVAILABLE_IN_5_0
+gboolean garrow_datum_is_value(GArrowDatum *datum);
+/*
+GARROW_AVAILABLE_IN_5_0
 gboolean garrow_datum_is_collection(GArrowDatum *datum);
 */
 GARROW_AVAILABLE_IN_1_0
@@ -54,9 +57,20 @@ GARROW_AVAILABLE_IN_1_0
 gchar *garrow_datum_to_string(GArrowDatum *datum);
 
 /* GARROW_TYPE_NONE_DATUM */
-/* GARROW_TYPE_SCALAR_DATUM */
-/* GARROW_TYPE_INT8_SCALAR_DATUM */
-/* ... */
+
+#define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowScalarDatum,
+                         garrow_scalar_datum,
+                         GARROW,
+                         SCALAR_DATUM,
+                         GArrowDatum)
+struct _GArrowScalarDatumClass
+{
+  GArrowDatumClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalarDatum *garrow_scalar_datum_new(GArrowScalar *value);
 
 #define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowArrayDatum,
diff --git a/c_glib/arrow-glib/datum.hpp b/c_glib/arrow-glib/datum.hpp
index 673501f89ed..d1acfc58c93 100644
--- a/c_glib/arrow-glib/datum.hpp
+++ b/c_glib/arrow-glib/datum.hpp
@@ -28,6 +28,9 @@ garrow_datum_get_raw(GArrowDatum *datum);
 GArrowDatum *
 garrow_datum_new_raw(arrow::Datum *arrow_datum);
 
+GArrowScalarDatum *
+garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum,
+                            GArrowScalar *value);
 GArrowArrayDatum *
 garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
                            GArrowArray *value);
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index dbfea52a847..d0479634d6d 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -31,6 +31,7 @@ sources = files(
   'error.cpp',
   'field.cpp',
   'record-batch.cpp',
+  'scalar.cpp',
   'schema.cpp',
   'table.cpp',
   'table-builder.cpp',
@@ -88,6 +89,7 @@ c_headers = files(
   'field.h',
   'gobject-type.h',
   'record-batch.h',
+  'scalar.h',
   'schema.h',
   'table.h',
   'table-builder.h',
@@ -144,6 +146,7 @@ cpp_headers = files(
   'error.hpp',
   'field.hpp',
   'record-batch.hpp',
+  'scalar.hpp',
   'schema.hpp',
   'table.hpp',
   'table-builder.hpp',
diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
new file mode 100644
index 00000000000..98202a9e281
--- /dev/null
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -0,0 +1,2382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/basic-array.hpp>
+#include <arrow-glib/buffer.hpp>
+#include <arrow-glib/data-type.hpp>
+#include <arrow-glib/decimal.hpp>
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/scalar.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: scalar
+ * @section_id: scalar-classes
+ * @title: Scalar classes
+ * @include: arrow-glib/arrow-glib.h
+ *
+ * #GArrowScalar is a base class for all scalar classes such as
+ * #GArrowBooleanScalar.
+ *
+ * #GArrowNullScalar is a class for a null scalar.
+ *
+ * #GArrowBooleanScalar is a class for a boolean scalar.
+ *
+ * #GArrowInt8Scalar is a class for a 8-bit integer scalar.
+ *
+ * #GArrowInt16Scalar is a class for a 16-bit integer scalar.
+ *
+ * #GArrowInt32Scalar is a class for a 32-bit integer scalar.
+ *
+ * #GArrowInt64Scalar is a class for a 64-bit integer scalar.
+ *
+ * #GArrowUInt8Scalar is a class for a 8-bit unsigned integer scalar.
+ *
+ * #GArrowUInt16Scalar is a class for a 16-bit unsigned integer scalar.
+ *
+ * #GArrowUInt32Scalar is a class for a 32-bit unsigned integer scalar.
+ *
+ * #GArrowUInt64Scalar is a class for a 64-bit unsigned integer scalar.
+ *
+ * #GArrowFloatScalar is a class for a 32-bit floating point scalar.
+ *
+ * #GArrowDoubleScalar is a class for a 64-bit floating point scalar.
+ *
+ * #GArrowBaseBinaryScalar is a base class for all binary and string
+ * scalar classes such as #GArrowBinaryScalar.
+ *
+ * #GArrowBinaryScalar is a class for a binary scalar.
+ *
+ * #GArrowStringScalar is a class for an UTF-8 encoded string scalar.
+ *
+ * #GArrowLargeBinaryScalar is a class for a 64-bit offsets binary
+ * scalar.
+ *
+ * #GArrowLargeStringScalar is a class for a 64-bit offsets UTF-8
+ * encoded string scalar.
+ *
+ * #GArrowFixedSizeBinaryScalar is a class for a fixed-size binary
+ * scalar.
+ *
+ * #GArrowDate32Scalar is a class for the number of days since UNIX
+ * epoch in a 32-bit signed integer scalar.
+ *
+ * #GArrowDate64Scalar is a class for the number of milliseconds
+ * since UNIX epoch in a 64-bit signed integer scalar.
+ *
+ * #GArrowTime32Scalar is a class for the number of seconds or
+ * milliseconds since midnight in a 32-bit signed integer scalar.
+ *
+ * #GArrowTime64Scalar is a class for the number of microseconds or
+ * nanoseconds since midnight in a 64-bit signed integer scalar.
+ *
+ * #GArrowTimestampScalar is a class for the number of
+ * seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in
+ * a 64-bit signed integer scalar.
+ *
+ * #GArrowDecimal128Scalar is a class for a 128-bit decimal scalar.
+ *
+ * #GArrowDecimal256Scalar is a class for a 256-bit decimal scalar.
+ *
+ * #GArrowBaseListScalar is a base class for all list scalar classes
+ * such as #GArrowListScalar.
+ *
+ * #GArrowListScalar is a class for a list scalar.
+ *
+ * #GArrowLargeListScalar is a class for a large list scalar.
+ *
+ * #GArrowMapScalar is a class for a map list scalar.
+ *
+ * #GArrowStructScalar is a class for a struct list scalar.
+ *
+ * #GArrowUnionScalar is a base class for all union scalar classes
+ * such as #GArrowSparseUnionScalar.
+ *
+ * #GArrowSparseUnionScalar is a class for a sparse union scalar.
+ *
+ * #GArrowDenseUnionScalar is a class for a dense union scalar.
+ *
+ * #GArrowExtensionScalar is a base class for user-defined extension
+ * scalar.
+ */
+
+typedef struct GArrowScalarPrivate_ {
+  std::shared_ptr<arrow::Scalar> scalar;
+  GArrowDataType *data_type;
+} GArrowScalarPrivate;
+
+enum {
+  PROP_SCALAR = 1,
+  PROP_DATA_TYPE,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowScalar,
+                                    garrow_scalar,
+                                    G_TYPE_OBJECT)
+
+#define GARROW_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowScalarPrivate *>(             \
+    garrow_scalar_get_instance_private(           \
+      GARROW_SCALAR(obj)))
+
+static void
+garrow_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+
+  if (priv->data_type) {
+    g_object_unref(priv->data_type);
+    priv->data_type = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_scalar_finalize(GObject *object)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+
+  priv->scalar.~shared_ptr();
+
+  G_OBJECT_CLASS(garrow_scalar_parent_class)->finalize(object);
+}
+
+static void
+garrow_scalar_set_property(GObject *object,
+                           guint prop_id,
+                           const GValue *value,
+                           GParamSpec *pspec)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_SCALAR:
+    priv->scalar =
+      *static_cast<std::shared_ptr<arrow::Scalar> *>(g_value_get_pointer(value));
+    break;
+  case PROP_DATA_TYPE:
+    priv->data_type = GARROW_DATA_TYPE(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_scalar_init(GArrowScalar *object)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(object);
+  new(&priv->scalar) std::shared_ptr<arrow::Scalar>;
+}
+
+static void
+garrow_scalar_class_init(GArrowScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_scalar_dispose;
+  gobject_class->finalize     = garrow_scalar_finalize;
+  gobject_class->set_property = garrow_scalar_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("scalar",
+                              "Scalar",
+                              "The raw std::shared<arrow::Scalar> *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_SCALAR, spec);
+
+  /**
+   * GArrowScalar:data-type:
+   *
+   * The data type of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("data-type",
+                             "Data type",
+                             "The data type of the scalar",
+                             GARROW_TYPE_DATA_TYPE,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
+}
+
+/**
+ * garrow_scalar_parse:
+ * @data_type: A #GArrowDataType for the parsed scalar.
+ * @data: (array length=size): Data to be parsed.
+ * @size: The number of bytes of the data.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   A newly created #GArrowScalar if the data is parsed successfully,
+ *   %NULL otherwise.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalar *
+garrow_scalar_parse(GArrowDataType *data_type,
+                    const guint8 *data,
+                    gsize size,
+                    GError **error)
+{
+  const auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_data =
+    arrow::util::string_view(reinterpret_cast<const char *>(data),
+                             size);
+  auto arrow_scalar_result = arrow::Scalar::Parse(arrow_data_type, arrow_data);
+  if (garrow::check(error, arrow_scalar_result, "[scalar][parse]")) {
+    auto arrow_scalar = *arrow_scalar_result;
+    return garrow_scalar_new_raw(&arrow_scalar,
+                                 "scalar", &arrow_scalar,
+                                 "data-type", data_type,
+                                 NULL);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_scalar_get_data_type:
+ * @scalar: A #GArrowScalar.
+ *
+ * Returns: (transfer none): The #GArrowDataType for the scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDataType *
+garrow_scalar_get_data_type(GArrowScalar *scalar)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->data_type) {
+    priv->data_type = garrow_data_type_new_raw(&(priv->scalar->type));
+  }
+  return priv->data_type;
+}
+
+/**
+ * garrow_scalar_is_valid:
+ * @scalar: A #GArrowScalar.
+ *
+ * Returns: %TRUE if the scalar is valid, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_scalar_is_valid(GArrowScalar *scalar)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  return arrow_scalar->is_valid;
+}
+
+/**
+ * garrow_scalar_equal:
+ * @scalar: A #GArrowScalar.
+ * @other_scalar: A #GArrowScalar to be compared.
+ *
+ * Returns: %TRUE if both of them have the same data, %FALSE
+ *   otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_scalar_equal(GArrowScalar *scalar,
+                    GArrowScalar *other_scalar)
+{
+  return garrow_scalar_equal_options(scalar, other_scalar, NULL);
+}
+
+/**
+ * garrow_scalar_equal_options:
+ * @scalar: A #GArrowScalar.
+ * @other_scalar: A #GArrowScalar to be compared.
+ * @options: (nullable): A #GArrowEqualOptions.
+ *
+ * Returns: %TRUE if both of them have the same data, %FALSE
+ *   otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_scalar_equal_options(GArrowScalar *scalar,
+                            GArrowScalar *other_scalar,
+                            GArrowEqualOptions *options)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  const auto arrow_other_scalar = garrow_scalar_get_raw(other_scalar);
+  if (options) {
+    auto is_approx = garrow_equal_options_is_approx(options);
+    const auto arrow_options = garrow_equal_options_get_raw(options);
+    if (is_approx) {
+      return arrow_scalar->ApproxEquals(*arrow_other_scalar, *arrow_options);
+    } else {
+      return arrow_scalar->Equals(arrow_other_scalar, *arrow_options);
+    }
+  } else {
+    return arrow_scalar->Equals(arrow_other_scalar);
+  }
+}
+
+/**
+ * garrow_scalar_to_string:
+ * @scalar: A #GArrowScalar.
+ *
+ * Returns: The string representation of the scalar.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+garrow_scalar_to_string(GArrowScalar *scalar)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  return g_strdup(arrow_scalar->ToString().c_str());
+}
+
+/**
+ * garrow_scalar_cast:
+ * @scalar: A #GArrowScalar.
+ * @data_type: A #GArrowDataType of the casted scalar.
+ * @options: (nullable): A #GArrowCastOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   A newly created casted scalar on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalar *
+garrow_scalar_cast(GArrowScalar *scalar,
+                   GArrowDataType *data_type,
+                   GArrowCastOptions *options,
+                   GError **error)
+{
+  const auto arrow_scalar = garrow_scalar_get_raw(scalar);
+  const auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_casted_scalar_result = arrow_scalar->CastTo(arrow_data_type);
+  if (garrow::check(error, arrow_casted_scalar_result, "[scalar][cast]")) {
+    auto arrow_casted_scalar = *arrow_casted_scalar_result;
+    return garrow_scalar_new_raw(&arrow_casted_scalar,
+                                 "scalar", &arrow_casted_scalar,
+                                 "data-type", data_type,
+                                 NULL);
+  } else {
+    return NULL;
+  }
+}
+
+
+G_DEFINE_TYPE(GArrowNullScalar,
+              garrow_null_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_null_scalar_init(GArrowNullScalar *object)
+{
+}
+
+static void
+garrow_null_scalar_class_init(GArrowNullScalarClass *klass)
+{
+}
+
+/**
+ * garrow_null_scalar_new:
+ *
+ * Returns: A newly created #GArrowNullScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowNullScalar *
+garrow_null_scalar_new(void)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::NullScalar>());
+  return GARROW_NULL_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+
+G_DEFINE_TYPE(GArrowBooleanScalar,
+              garrow_boolean_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_boolean_scalar_init(GArrowBooleanScalar *object)
+{
+}
+
+static void
+garrow_boolean_scalar_class_init(GArrowBooleanScalarClass *klass)
+{
+}
+
+/**
+ * garrow_boolean_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowBooleanScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowBooleanScalar *
+garrow_boolean_scalar_new(gboolean value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::BooleanScalar>(value));
+  return GARROW_BOOLEAN_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_boolean_scalar_get_value:
+ * @scalar: A #GArrowBooleanScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::BooleanScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt8Scalar,
+              garrow_int8_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int8_scalar_init(GArrowInt8Scalar *object)
+{
+}
+
+static void
+garrow_int8_scalar_class_init(GArrowInt8ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int8_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt8Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt8Scalar *
+garrow_int8_scalar_new(gint8 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int8Scalar>(value));
+  return GARROW_INT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int8_scalar_get_value:
+ * @scalar: A #GArrowInt8Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint8
+garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int8Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt16Scalar,
+              garrow_int16_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int16_scalar_init(GArrowInt16Scalar *object)
+{
+}
+
+static void
+garrow_int16_scalar_class_init(GArrowInt16ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int16_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt16Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt16Scalar *
+garrow_int16_scalar_new(gint16 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int16Scalar>(value));
+  return GARROW_INT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int16_scalar_get_value:
+ * @scalar: A #GArrowInt16Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint16
+garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int16Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt32Scalar,
+              garrow_int32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int32_scalar_init(GArrowInt32Scalar *object)
+{
+}
+
+static void
+garrow_int32_scalar_class_init(GArrowInt32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int32_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt32Scalar *
+garrow_int32_scalar_new(gint32 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int32Scalar>(value));
+  return GARROW_INT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int32_scalar_get_value:
+ * @scalar: A #GArrowInt32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint32
+garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowInt64Scalar,
+              garrow_int64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_int64_scalar_init(GArrowInt64Scalar *object)
+{
+}
+
+static void
+garrow_int64_scalar_class_init(GArrowInt64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_int64_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowInt64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowInt64Scalar *
+garrow_int64_scalar_new(gint64 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Int64Scalar>(value));
+  return GARROW_INT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_int64_scalar_get_value:
+ * @scalar: A #GArrowInt64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Int64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt8Scalar,
+              garrow_uint8_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint8_scalar_init(GArrowUInt8Scalar *object)
+{
+}
+
+static void
+garrow_uint8_scalar_class_init(GArrowUInt8ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint8_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt8Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt8Scalar *
+garrow_uint8_scalar_new(guint8 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt8Scalar>(value));
+  return GARROW_UINT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint8_scalar_get_value:
+ * @scalar: A #GArrowUInt8Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint8
+garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt8Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt16Scalar,
+              garrow_uint16_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint16_scalar_init(GArrowUInt16Scalar *object)
+{
+}
+
+static void
+garrow_uint16_scalar_class_init(GArrowUInt16ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint16_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt16Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt16Scalar *
+garrow_uint16_scalar_new(guint16 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt16Scalar>(value));
+  return GARROW_UINT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint16_scalar_get_value:
+ * @scalar: A #GArrowUInt16Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint16
+garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt16Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt32Scalar,
+              garrow_uint32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint32_scalar_init(GArrowUInt32Scalar *object)
+{
+}
+
+static void
+garrow_uint32_scalar_class_init(GArrowUInt32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint32_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt32Scalar *
+garrow_uint32_scalar_new(guint32 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt32Scalar>(value));
+  return GARROW_UINT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint32_scalar_get_value:
+ * @scalar: A #GArrowUInt32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint32
+garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowUInt64Scalar,
+              garrow_uint64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_uint64_scalar_init(GArrowUInt64Scalar *object)
+{
+}
+
+static void
+garrow_uint64_scalar_class_init(GArrowUInt64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_uint64_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowUInt64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowUInt64Scalar *
+garrow_uint64_scalar_new(guint64 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::UInt64Scalar>(value));
+  return GARROW_UINT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_uint64_scalar_get_value:
+ * @scalar: A #GArrowUInt64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+guint64
+garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::UInt64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowFloatScalar,
+              garrow_float_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_float_scalar_init(GArrowFloatScalar *object)
+{
+}
+
+static void
+garrow_float_scalar_class_init(GArrowFloatScalarClass *klass)
+{
+}
+
+/**
+ * garrow_float_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowFloatScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowFloatScalar *
+garrow_float_scalar_new(gfloat value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::FloatScalar>(value));
+  return GARROW_FLOAT_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_float_scalar_get_value:
+ * @scalar: A #GArrowFloatScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gfloat
+garrow_float_scalar_get_value(GArrowFloatScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::FloatScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowDoubleScalar,
+              garrow_double_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_double_scalar_init(GArrowDoubleScalar *object)
+{
+}
+
+static void
+garrow_double_scalar_class_init(GArrowDoubleScalarClass *klass)
+{
+}
+
+/**
+ * garrow_double_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDoubleScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDoubleScalar *
+garrow_double_scalar_new(gdouble value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::DoubleScalar>(value));
+  return GARROW_DOUBLE_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_double_scalar_get_value:
+ * @scalar: A #GArrowDoubleScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gdouble
+garrow_double_scalar_get_value(GArrowDoubleScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::DoubleScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+typedef struct GArrowBaseBinaryScalarPrivate_ {
+  GArrowBuffer *value;
+} GArrowBaseBinaryScalarPrivate;
+
+enum {
+  PROP_VALUE = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseBinaryScalar,
+                                    garrow_base_binary_scalar,
+                                    GARROW_TYPE_SCALAR)
+
+#define GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowBaseBinaryScalarPrivate *>(               \
+    garrow_base_binary_scalar_get_instance_private(           \
+      GARROW_BASE_BINARY_SCALAR(obj)))
+
+static void
+garrow_base_binary_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_base_binary_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_base_binary_scalar_set_property(GObject *object,
+                                       guint prop_id,
+                                       const GValue *value,
+                                       GParamSpec *pspec)
+{
+  auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_BUFFER(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_base_binary_scalar_init(GArrowBaseBinaryScalar *object)
+{
+}
+
+static void
+garrow_base_binary_scalar_class_init(GArrowBaseBinaryScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = garrow_base_binary_scalar_dispose;
+  gobject_class->set_property = garrow_base_binary_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowBaseBinaryScalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             GARROW_TYPE_BUFFER,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+G_END_DECLS
+template<typename ArrowBinaryScalarType>
+GArrowScalar *
+garrow_base_binary_scalar_new(GArrowBuffer *value)
+{
+  auto arrow_value = garrow_buffer_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<ArrowBinaryScalarType>(arrow_value));
+  return garrow_scalar_new_raw(&arrow_scalar,
+                               "scalar", &arrow_scalar,
+                               "value", value,
+                               NULL);
+}
+G_BEGIN_DECLS
+
+/**
+ * garrow_base_binary_scalar_get_value:
+ * @scalar: A #GArrowBaseBinaryScalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowBuffer *
+garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar)
+{
+  auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    const auto arrow_scalar =
+      std::static_pointer_cast<arrow::BaseBinaryScalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    priv->value = garrow_buffer_new_raw(&(arrow_scalar->value));
+  }
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GArrowBinaryScalar,
+              garrow_binary_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_binary_scalar_init(GArrowBinaryScalar *object)
+{
+}
+
+static void
+garrow_binary_scalar_class_init(GArrowBinaryScalarClass *klass)
+{
+}
+
+/**
+ * garrow_binary_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowBinaryScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowBinaryScalar *
+garrow_binary_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_BINARY_SCALAR(
+    garrow_base_binary_scalar_new<arrow::BinaryScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowStringScalar,
+              garrow_string_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_string_scalar_init(GArrowStringScalar *object)
+{
+}
+
+static void
+garrow_string_scalar_class_init(GArrowStringScalarClass *klass)
+{
+}
+
+/**
+ * garrow_string_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowStringScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowStringScalar *
+garrow_string_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_STRING_SCALAR(
+    garrow_base_binary_scalar_new<arrow::StringScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowLargeBinaryScalar,
+              garrow_large_binary_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_large_binary_scalar_init(GArrowLargeBinaryScalar *object)
+{
+}
+
+static void
+garrow_large_binary_scalar_class_init(GArrowLargeBinaryScalarClass *klass)
+{
+}
+
+/**
+ * garrow_large_binary_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowLargeBinaryScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowLargeBinaryScalar *
+garrow_large_binary_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_LARGE_BINARY_SCALAR(
+    garrow_base_binary_scalar_new<arrow::LargeBinaryScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowLargeStringScalar,
+              garrow_large_string_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_large_string_scalar_init(GArrowLargeStringScalar *object)
+{
+}
+
+static void
+garrow_large_string_scalar_class_init(GArrowLargeStringScalarClass *klass)
+{
+}
+
+/**
+ * garrow_large_string_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowLargeStringScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowLargeStringScalar *
+garrow_large_string_scalar_new(GArrowBuffer *value)
+{
+  return GARROW_LARGE_STRING_SCALAR(
+    garrow_base_binary_scalar_new<arrow::LargeStringScalar>(value));
+}
+
+
+G_DEFINE_TYPE(GArrowFixedSizeBinaryScalar,
+              garrow_fixed_size_binary_scalar,
+              GARROW_TYPE_BASE_BINARY_SCALAR)
+
+static void
+garrow_fixed_size_binary_scalar_init(GArrowFixedSizeBinaryScalar *object)
+{
+}
+
+static void
+garrow_fixed_size_binary_scalar_class_init(
+  GArrowFixedSizeBinaryScalarClass *klass)
+{
+}
+
+/**
+ * garrow_fixed_size_binary_scalar_new:
+ * @data_type: A #GArrowFixedSizeBinaryDataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowFixedSizeBinaryScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowFixedSizeBinaryScalar *
+garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type,
+                                    GArrowBuffer *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_buffer_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::FixedSizeBinaryScalar>(
+        arrow_value, arrow_data_type));
+  return GARROW_FIXED_SIZE_BINARY_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          "value", value,
+                          NULL));
+}
+
+
+G_DEFINE_TYPE(GArrowDate32Scalar,
+              garrow_date32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_date32_scalar_init(GArrowDate32Scalar *object)
+{
+}
+
+static void
+garrow_date32_scalar_class_init(GArrowDate32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_date32_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDate32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDate32Scalar *
+garrow_date32_scalar_new(gint32 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Date32Scalar>(value));
+  return GARROW_DATE32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_date32_scalar_get_value:
+ * @scalar: A #GArrowDate32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint32
+garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Date32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowDate64Scalar,
+              garrow_date64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_date64_scalar_init(GArrowDate64Scalar *object)
+{
+}
+
+static void
+garrow_date64_scalar_class_init(GArrowDate64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_date64_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDate64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDate64Scalar *
+garrow_date64_scalar_new(gint64 value)
+{
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Date64Scalar>(value));
+  return GARROW_DATE64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
+}
+
+/**
+ * garrow_date64_scalar_get_value:
+ * @scalar: A #GArrowDate64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Date64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowTime32Scalar,
+              garrow_time32_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_time32_scalar_init(GArrowTime32Scalar *object)
+{
+}
+
+static void
+garrow_time32_scalar_class_init(GArrowTime32ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_time32_scalar_new:
+ * @data_type: A #GArrowTime32DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowTime32Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowTime32Scalar *
+garrow_time32_scalar_new(GArrowTime32DataType *data_type,
+                         gint32 value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Time32Scalar>(value, arrow_data_type));
+  return GARROW_TIME32_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          NULL));
+}
+
+/**
+ * garrow_time32_scalar_get_value:
+ * @scalar: A #GArrowTime32Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint32
+garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Time32Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowTime64Scalar,
+              garrow_time64_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_time64_scalar_init(GArrowTime64Scalar *object)
+{
+}
+
+static void
+garrow_time64_scalar_class_init(GArrowTime64ScalarClass *klass)
+{
+}
+
+/**
+ * garrow_time64_scalar_new:
+ * @data_type: A #GArrowTime64DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowTime64Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowTime64Scalar *
+garrow_time64_scalar_new(GArrowTime64DataType *data_type,
+                         gint64 value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Time64Scalar>(value, arrow_data_type));
+  return GARROW_TIME64_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          NULL));
+}
+
+/**
+ * garrow_time64_scalar_get_value:
+ * @scalar: A #GArrowTime64Scalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::Time64Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+G_DEFINE_TYPE(GArrowTimestampScalar,
+              garrow_timestamp_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_timestamp_scalar_init(GArrowTimestampScalar *object)
+{
+}
+
+static void
+garrow_timestamp_scalar_class_init(GArrowTimestampScalarClass *klass)
+{
+}
+
+/**
+ * garrow_timestamp_scalar_new:
+ * @data_type: A #GArrowTimestampDataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowTimestampScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowTimestampScalar *
+garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
+                            gint64 value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::TimestampScalar>(value, arrow_data_type));
+  return GARROW_TIMESTAMP_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          NULL));
+}
+
+/**
+ * garrow_timestamp_scalar_get_value:
+ * @scalar: A #GArrowTimestampScalar.
+ *
+ * Returns: The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+gint64
+garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar)
+{
+  const auto arrow_scalar =
+    std::static_pointer_cast<arrow::TimestampScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->value;
+}
+
+
+typedef struct GArrowDecimal128ScalarPrivate_ {
+  GArrowDecimal128 *value;
+} GArrowDecimal128ScalarPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128Scalar,
+                           garrow_decimal128_scalar,
+                           GARROW_TYPE_SCALAR)
+
+#define GARROW_DECIMAL128_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowDecimal128ScalarPrivate *>(              \
+    garrow_decimal128_scalar_get_instance_private(           \
+      GARROW_DECIMAL128_SCALAR(obj)))
+
+static void
+garrow_decimal128_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_decimal128_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_decimal128_scalar_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_DECIMAL128(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_decimal128_scalar_init(GArrowDecimal128Scalar *object)
+{
+}
+
+static void
+garrow_decimal128_scalar_class_init(GArrowDecimal128ScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_decimal128_scalar_dispose;
+  gobject_class->set_property = garrow_decimal128_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowDecimal128Scalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             garrow_decimal128_get_type(),
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+/**
+ * garrow_decimal128_scalar_new:
+ * @data_type: A #GArrowDecimal128DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDecimal128Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal128Scalar *
+garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type,
+                             GArrowDecimal128 *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_decimal128_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Decimal128Scalar>(*arrow_value, arrow_data_type));
+  return GARROW_DECIMAL128_SCALAR(
+    garrow_scalar_new_raw(&arrow_scalar,
+                          "scalar", &arrow_scalar,
+                          "data-type", data_type,
+                          "value", value,
+                          NULL));
+}
+
+/**
+ * garrow_decimal128_scalar_get_value:
+ * @scalar: A #GArrowDecimal128Scalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal128 *
+garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar)
+{
+  auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    auto arrow_scalar =
+      std::static_pointer_cast<arrow::Decimal128Scalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_value = std::make_shared<arrow::Decimal128>(arrow_scalar->value);
+    priv->value = garrow_decimal128_new_raw(&arrow_value);
+  }
+  return priv->value;
+}
+
+
+typedef struct GArrowDecimal256ScalarPrivate_ {
+  GArrowDecimal256 *value;
+} GArrowDecimal256ScalarPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256Scalar,
+                           garrow_decimal256_scalar,
+                           GARROW_TYPE_SCALAR)
+
+#define GARROW_DECIMAL256_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowDecimal256ScalarPrivate *>(              \
+    garrow_decimal256_scalar_get_instance_private(           \
+      GARROW_DECIMAL256_SCALAR(obj)))
+
+static void
+garrow_decimal256_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_decimal256_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_decimal256_scalar_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_DECIMAL256(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_decimal256_scalar_init(GArrowDecimal256Scalar *object)
+{
+}
+
+static void
+garrow_decimal256_scalar_class_init(GArrowDecimal256ScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_decimal256_scalar_dispose;
+  gobject_class->set_property = garrow_decimal256_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowDecimal256Scalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             garrow_decimal256_get_type(),
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+/**
+ * garrow_decimal256_scalar_new:
+ * @data_type: A #GArrowDecimal256DataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDecimal256Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal256Scalar *
+garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type,
+                             GArrowDecimal256 *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_decimal256_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::Decimal256Scalar>(*arrow_value, arrow_data_type));
+  return GARROW_DECIMAL256_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                        "scalar", &arrow_scalar,
+                                                        "data-type", data_type,
+                                                        "value", value,
+                                                        NULL));
+}
+
+/**
+ * garrow_decimal256_scalar_get_value:
+ * @scalar: A #GArrowDecimal256Scalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDecimal256 *
+garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar)
+{
+  auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    auto arrow_scalar =
+      std::static_pointer_cast<arrow::Decimal256Scalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_value = std::make_shared<arrow::Decimal256>(arrow_scalar->value);
+    priv->value = garrow_decimal256_new_raw(&arrow_value);
+  }
+  return priv->value;
+}
+
+
+typedef struct GArrowBaseListScalarPrivate_ {
+  GArrowArray *value;
+} GArrowBaseListScalarPrivate;
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseListScalar,
+                                    garrow_base_list_scalar,
+                                    GARROW_TYPE_SCALAR)
+
+#define GARROW_BASE_LIST_SCALAR_GET_PRIVATE(obj)            \
+  static_cast<GArrowBaseListScalarPrivate *>(               \
+    garrow_base_list_scalar_get_instance_private(           \
+      GARROW_BASE_LIST_SCALAR(obj)))
+
+static void
+garrow_base_list_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_base_list_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_base_list_scalar_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_ARRAY(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_base_list_scalar_init(GArrowBaseListScalar *object)
+{
+}
+
+static void
+garrow_base_list_scalar_class_init(GArrowBaseListScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose      = garrow_base_list_scalar_dispose;
+  gobject_class->set_property = garrow_base_list_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowBaseListScalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             GARROW_TYPE_ARRAY,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+G_END_DECLS
+template<typename ArrowListScalarType>
+GArrowScalar *
+garrow_base_list_scalar_new(GArrowArray *value)
+{
+  auto arrow_value = garrow_array_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<ArrowListScalarType>(arrow_value));
+  auto data_type = garrow_array_get_value_data_type(value);
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  g_object_unref(data_type);
+  return scalar;
+}
+G_BEGIN_DECLS
+
+/**
+ * garrow_base_list_scalar_get_value:
+ * @scalar: A #GArrowBaseListScalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowArray *
+garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar)
+{
+  auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(scalar);
+  if (!priv->value) {
+    const auto arrow_scalar =
+      std::static_pointer_cast<arrow::BaseListScalar>(
+        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    priv->value = garrow_array_new_raw(&(arrow_scalar->value));
+  }
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GArrowListScalar,
+              garrow_list_scalar,
+              GARROW_TYPE_BASE_LIST_SCALAR)
+
+static void
+garrow_list_scalar_init(GArrowListScalar *object)
+{
+}
+
+static void
+garrow_list_scalar_class_init(GArrowListScalarClass *klass)
+{
+}
+
+/**
+ * garrow_list_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowListScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowListScalar *
+garrow_list_scalar_new(GArrowListArray *value)
+{
+  return GARROW_LIST_SCALAR(
+    garrow_base_list_scalar_new<arrow::ListScalar>(GARROW_ARRAY(value)));
+}
+
+
+G_DEFINE_TYPE(GArrowLargeListScalar,
+              garrow_large_list_scalar,
+              GARROW_TYPE_BASE_LIST_SCALAR)
+
+static void
+garrow_large_list_scalar_init(GArrowLargeListScalar *object)
+{
+}
+
+static void
+garrow_large_list_scalar_class_init(GArrowLargeListScalarClass *klass)
+{
+}
+
+/**
+ * garrow_large_list_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowLargeListScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowLargeListScalar *
+garrow_large_list_scalar_new(GArrowLargeListArray *value)
+{
+  return GARROW_LARGE_LIST_SCALAR(
+    garrow_base_list_scalar_new<arrow::LargeListScalar>(GARROW_ARRAY(value)));
+}
+
+
+G_DEFINE_TYPE(GArrowMapScalar,
+              garrow_map_scalar,
+              GARROW_TYPE_BASE_LIST_SCALAR)
+
+static void
+garrow_map_scalar_init(GArrowMapScalar *object)
+{
+}
+
+static void
+garrow_map_scalar_class_init(GArrowMapScalarClass *klass)
+{
+}
+
+/**
+ * garrow_map_scalar_new:
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowMapScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowMapScalar *
+garrow_map_scalar_new(GArrowStructArray *value)
+{
+  return GARROW_MAP_SCALAR(
+    garrow_base_list_scalar_new<arrow::MapScalar>(GARROW_ARRAY(value)));
+}
+
+
+typedef struct GArrowStructScalarPrivate_ {
+  GList *value;
+} GArrowStructScalarPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructScalar,
+                           garrow_struct_scalar,
+                           GARROW_TYPE_SCALAR)
+
+#define GARROW_STRUCT_SCALAR_GET_PRIVATE(obj)             \
+  static_cast<GArrowStructScalarPrivate *>(               \
+    garrow_struct_scalar_get_instance_private(            \
+      GARROW_STRUCT_SCALAR(obj)))
+
+static void
+garrow_struct_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_list_free_full(priv->value, g_object_unref);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_struct_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_struct_scalar_init(GArrowStructScalar *object)
+{
+}
+
+static void
+garrow_struct_scalar_class_init(GArrowStructScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose = garrow_struct_scalar_dispose;
+}
+
+/**
+ * garrow_struct_scalar_new:
+ * @data_type: A #GArrowStructDataType for this scalar.
+ * @value: (element-type GArrowScalar): The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDecimal256Scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowStructScalar *
+garrow_struct_scalar_new(GArrowStructDataType *data_type,
+                         GList *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  std::vector<std::shared_ptr<arrow::Scalar>> arrow_value;
+  for (GList *node = value; node; node = node->next) {
+    auto field = GARROW_SCALAR(node->data);
+    auto arrow_field = garrow_scalar_get_raw(field);
+    arrow_value.push_back(arrow_field);
+  }
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::StructScalar>(arrow_value, arrow_data_type));
+  auto scalar =
+    GARROW_STRUCT_SCALAR(
+      garrow_scalar_new_raw(&arrow_scalar,
+                            "scalar", &arrow_scalar,
+                            "data-type", data_type,
+                            NULL));
+  auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar);
+  priv->value = g_list_copy_deep(value,
+                                 reinterpret_cast<GCopyFunc>(g_object_ref),
+                                 NULL);
+  return scalar;
+}
+
+/**
+ * garrow_struct_scalar_get_value:
+ * @scalar: A #GArrowStructScalar.
+ *
+ * Returns: (element-type GArrowScalar) (transfer none):
+ *   The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GList *
+garrow_struct_scalar_get_value(GArrowStructScalar *scalar)
+{
+  auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar);
+  return priv->value;
+}
+
+
+typedef struct GArrowUnionScalarPrivate_ {
+  GArrowScalar *value;
+} GArrowUnionScalarPrivate;
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowUnionScalar,
+                                    garrow_union_scalar,
+                                    GARROW_TYPE_SCALAR)
+
+#define GARROW_UNION_SCALAR_GET_PRIVATE(obj)             \
+  static_cast<GArrowUnionScalarPrivate *>(               \
+    garrow_union_scalar_get_instance_private(            \
+      GARROW_UNION_SCALAR(obj)))
+
+static void
+garrow_union_scalar_dispose(GObject *object)
+{
+  auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(object);
+
+  if (priv->value) {
+    g_object_unref(priv->value);
+    priv->value = NULL;
+  }
+
+  G_OBJECT_CLASS(garrow_union_scalar_parent_class)->dispose(object);
+}
+
+static void
+garrow_union_scalar_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_VALUE:
+    priv->value = GARROW_SCALAR(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_union_scalar_init(GArrowUnionScalar *object)
+{
+}
+
+static void
+garrow_union_scalar_class_init(GArrowUnionScalarClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = garrow_union_scalar_dispose;
+  gobject_class->set_property = garrow_union_scalar_set_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowUnionScalar:value:
+   *
+   * The value of the scalar.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("value",
+                             "Value",
+                             "The value of the scalar",
+                             GARROW_TYPE_SCALAR,
+                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_VALUE, spec);
+}
+
+G_END_DECLS
+template<typename ArrowUnionScalarType>
+GArrowScalar *
+garrow_union_scalar_new(GArrowDataType *data_type,
+                        GArrowScalar *value)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  auto arrow_value = garrow_scalar_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<ArrowUnionScalarType>(arrow_value, arrow_data_type));
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  return scalar;
+}
+G_BEGIN_DECLS
+
+/**
+ * garrow_union_scalar_get_value:
+ * @scalar: A #GArrowUnionScalar.
+ *
+ * Returns: (transfer none): The value of this scalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowScalar *
+garrow_union_scalar_get_value(GArrowUnionScalar *scalar)
+{
+  auto priv = GARROW_UNION_SCALAR_GET_PRIVATE(scalar);
+  return priv->value;
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionScalar,
+              garrow_sparse_union_scalar,
+              GARROW_TYPE_UNION_SCALAR)
+
+static void
+garrow_sparse_union_scalar_init(GArrowSparseUnionScalar *object)
+{
+}
+
+static void
+garrow_sparse_union_scalar_class_init(GArrowSparseUnionScalarClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_scalar_new:
+ * @data_type: A #GArrowSparseUnionDataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowSparseUnionScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowSparseUnionScalar *
+garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
+                               GArrowScalar *value)
+{
+  return GARROW_SPARSE_UNION_SCALAR(
+    garrow_union_scalar_new<arrow::SparseUnionScalar>(
+      GARROW_DATA_TYPE(data_type), value));
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionScalar,
+              garrow_dense_union_scalar,
+              GARROW_TYPE_UNION_SCALAR)
+
+static void
+garrow_dense_union_scalar_init(GArrowDenseUnionScalar *object)
+{
+}
+
+static void
+garrow_dense_union_scalar_class_init(GArrowDenseUnionScalarClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_scalar_new:
+ * @data_type: A #GArrowDenseUnionDataType for this scalar.
+ * @value: The value of this scalar.
+ *
+ * Returns: A newly created #GArrowDenseUnionScalar.
+ *
+ * Since: 5.0.0
+ */
+GArrowDenseUnionScalar *
+garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
+                              GArrowScalar *value)
+{
+  return GARROW_DENSE_UNION_SCALAR(
+    garrow_union_scalar_new<arrow::DenseUnionScalar>(
+      GARROW_DATA_TYPE(data_type), value));
+}
+
+
+G_DEFINE_TYPE(GArrowExtensionScalar,
+              garrow_extension_scalar,
+              GARROW_TYPE_SCALAR)
+
+static void
+garrow_extension_scalar_init(GArrowExtensionScalar *object)
+{
+}
+
+static void
+garrow_extension_scalar_class_init(GArrowExtensionScalarClass *klass)
+{
+}
+
+
+G_END_DECLS
+
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar)
+{
+  return garrow_scalar_new_raw(arrow_scalar,
+                               "scalar", arrow_scalar,
+                               NULL);
+}
+
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                      const gchar *first_property_name,
+                      ...)
+{
+  va_list args;
+  va_start(args, first_property_name);
+  auto array = garrow_scalar_new_raw_valist(arrow_scalar,
+                                            first_property_name,
+                                            args);
+  va_end(args);
+  return array;
+}
+
+GArrowScalar *
+garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                             const gchar *first_property_name,
+                             va_list args)
+{
+  GType type;
+  GArrowScalar *scalar;
+
+  switch ((*arrow_scalar)->type->id()) {
+  case arrow::Type::type::NA:
+    type = GARROW_TYPE_NULL_SCALAR;
+    break;
+  case arrow::Type::type::BOOL:
+    type = GARROW_TYPE_BOOLEAN_SCALAR;
+    break;
+  case arrow::Type::type::INT8:
+    type = GARROW_TYPE_INT8_SCALAR;
+    break;
+  case arrow::Type::type::INT16:
+    type = GARROW_TYPE_INT16_SCALAR;
+    break;
+  case arrow::Type::type::INT32:
+    type = GARROW_TYPE_INT32_SCALAR;
+    break;
+  case arrow::Type::type::INT64:
+    type = GARROW_TYPE_INT64_SCALAR;
+    break;
+  case arrow::Type::type::UINT8:
+    type = GARROW_TYPE_UINT8_SCALAR;
+    break;
+  case arrow::Type::type::UINT16:
+    type = GARROW_TYPE_UINT16_SCALAR;
+    break;
+  case arrow::Type::type::UINT32:
+    type = GARROW_TYPE_UINT32_SCALAR;
+    break;
+  case arrow::Type::type::UINT64:
+    type = GARROW_TYPE_UINT64_SCALAR;
+    break;
+  case arrow::Type::type::FLOAT:
+    type = GARROW_TYPE_FLOAT_SCALAR;
+    break;
+  case arrow::Type::type::DOUBLE:
+    type = GARROW_TYPE_DOUBLE_SCALAR;
+    break;
+  case arrow::Type::type::BINARY:
+    type = GARROW_TYPE_BINARY_SCALAR;
+    break;
+  case arrow::Type::type::STRING:
+    type = GARROW_TYPE_STRING_SCALAR;
+    break;
+  case arrow::Type::type::LARGE_BINARY:
+    type = GARROW_TYPE_LARGE_BINARY_SCALAR;
+    break;
+  case arrow::Type::type::LARGE_STRING:
+    type = GARROW_TYPE_LARGE_STRING_SCALAR;
+    break;
+  case arrow::Type::type::FIXED_SIZE_BINARY:
+    type = GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR;
+    break;
+  case arrow::Type::type::DATE32:
+    type = GARROW_TYPE_DATE32_SCALAR;
+    break;
+  case arrow::Type::type::DATE64:
+    type = GARROW_TYPE_DATE64_SCALAR;
+    break;
+  case arrow::Type::type::TIME32:
+    type = GARROW_TYPE_TIME32_SCALAR;
+    break;
+  case arrow::Type::type::TIME64:
+    type = GARROW_TYPE_TIME64_SCALAR;
+    break;
+  case arrow::Type::type::TIMESTAMP:
+    type = GARROW_TYPE_TIMESTAMP_SCALAR;
+    break;
+  case arrow::Type::type::DECIMAL128:
+    type = GARROW_TYPE_DECIMAL128_SCALAR;
+    break;
+  case arrow::Type::type::DECIMAL256:
+    type = GARROW_TYPE_DECIMAL256_SCALAR;
+    break;
+  case arrow::Type::type::LIST:
+    type = GARROW_TYPE_LIST_SCALAR;
+    break;
+  case arrow::Type::type::LARGE_LIST:
+    type = GARROW_TYPE_LARGE_LIST_SCALAR;
+    break;
+/*
+  case arrow::Type::type::FIXED_SIZE_LIST:
+    type = GARROW_TYPE_FIXED_SIZE_LIST_SCALAR;
+    break;
+*/
+  case arrow::Type::type::MAP:
+    type = GARROW_TYPE_MAP_SCALAR;
+    break;
+  case arrow::Type::type::STRUCT:
+    type = GARROW_TYPE_STRUCT_SCALAR;
+    break;
+  case arrow::Type::type::SPARSE_UNION:
+    type = GARROW_TYPE_SPARSE_UNION_SCALAR;
+    break;
+  case arrow::Type::type::DENSE_UNION:
+    type = GARROW_TYPE_DENSE_UNION_SCALAR;
+    break;
+  case arrow::Type::type::EXTENSION:
+    type = GARROW_TYPE_EXTENSION_SCALAR;
+    break;
+  default:
+    type = GARROW_TYPE_SCALAR;
+    break;
+  }
+  scalar = GARROW_SCALAR(g_object_new_valist(type,
+                                             first_property_name,
+                                             args));
+  return scalar;
+}
+
+std::shared_ptr<arrow::Scalar>
+garrow_scalar_get_raw(GArrowScalar *scalar)
+{
+  auto priv = GARROW_SCALAR_GET_PRIVATE(scalar);
+  return priv->scalar;
+}
diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h
new file mode 100644
index 00000000000..007f1cd29b3
--- /dev/null
+++ b/c_glib/arrow-glib/scalar.h
@@ -0,0 +1,678 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/array.h>
+
+G_BEGIN_DECLS
+
+typedef struct _GArrowCastOptions GArrowCastOptions;
+
+#define GARROW_TYPE_SCALAR (garrow_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowScalar,
+                         garrow_scalar,
+                         GARROW,
+                         SCALAR,
+                         GObject)
+struct _GArrowScalarClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalar *
+garrow_scalar_parse(GArrowDataType *data_type,
+                    const guint8 *data,
+                    gsize size,
+                    GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDataType *
+garrow_scalar_get_data_type(GArrowScalar *scalar);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_scalar_is_valid(GArrowScalar *scalar);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_scalar_equal(GArrowScalar *scalar,
+                    GArrowScalar *other_scalar);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_scalar_equal_options(GArrowScalar *scalar,
+                            GArrowScalar *other_scalar,
+                            GArrowEqualOptions *options);
+GARROW_AVAILABLE_IN_5_0
+gchar *
+garrow_scalar_to_string(GArrowScalar *scalar);
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalar *
+garrow_scalar_cast(GArrowScalar *scalar,
+                   GArrowDataType *data_type,
+                   GArrowCastOptions *options,
+                   GError **error);
+
+
+#define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowNullScalar,
+                         garrow_null_scalar,
+                         GARROW,
+                         NULL_SCALAR,
+                         GArrowScalar)
+struct _GArrowNullScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowNullScalar *
+garrow_null_scalar_new(void);
+
+
+#define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBooleanScalar,
+                         garrow_boolean_scalar,
+                         GARROW,
+                         BOOLEAN_SCALAR,
+                         GArrowScalar)
+struct _GArrowBooleanScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowBooleanScalar *
+garrow_boolean_scalar_new(gboolean value);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar);
+
+
+#define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt8Scalar,
+                         garrow_int8_scalar,
+                         GARROW,
+                         INT8_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt8ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt8Scalar *
+garrow_int8_scalar_new(gint8 value);
+GARROW_AVAILABLE_IN_5_0
+gint8
+garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar);
+
+
+#define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt16Scalar,
+                         garrow_int16_scalar,
+                         GARROW,
+                         INT16_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt16ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt16Scalar *
+garrow_int16_scalar_new(gint16 value);
+GARROW_AVAILABLE_IN_5_0
+gint16
+garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar);
+
+
+#define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt32Scalar,
+                         garrow_int32_scalar,
+                         GARROW,
+                         INT32_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt32Scalar *
+garrow_int32_scalar_new(gint32 value);
+GARROW_AVAILABLE_IN_5_0
+gint32
+garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar);
+
+
+#define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowInt64Scalar,
+                         garrow_int64_scalar,
+                         GARROW,
+                         INT64_SCALAR,
+                         GArrowScalar)
+struct _GArrowInt64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowInt64Scalar *
+garrow_int64_scalar_new(gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt8Scalar,
+                         garrow_uint8_scalar,
+                         GARROW,
+                         UINT8_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt8ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt8Scalar *
+garrow_uint8_scalar_new(guint8 value);
+GARROW_AVAILABLE_IN_5_0
+guint8
+garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt16Scalar,
+                         garrow_uint16_scalar,
+                         GARROW,
+                         UINT16_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt16ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt16Scalar *
+garrow_uint16_scalar_new(guint16 value);
+GARROW_AVAILABLE_IN_5_0
+guint16
+garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt32Scalar,
+                         garrow_uint32_scalar,
+                         GARROW,
+                         UINT32_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt32Scalar *
+garrow_uint32_scalar_new(guint32 value);
+GARROW_AVAILABLE_IN_5_0
+guint32
+garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar);
+
+
+#define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUInt64Scalar,
+                         garrow_uint64_scalar,
+                         GARROW,
+                         UINT64_SCALAR,
+                         GArrowScalar)
+struct _GArrowUInt64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowUInt64Scalar *
+garrow_uint64_scalar_new(guint64 value);
+GARROW_AVAILABLE_IN_5_0
+guint64
+garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar);
+
+
+#define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowFloatScalar,
+                         garrow_float_scalar,
+                         GARROW,
+                         FLOAT_SCALAR,
+                         GArrowScalar)
+struct _GArrowFloatScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowFloatScalar *
+garrow_float_scalar_new(gfloat value);
+GARROW_AVAILABLE_IN_5_0
+gfloat
+garrow_float_scalar_get_value(GArrowFloatScalar *scalar);
+
+
+#define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDoubleScalar,
+                         garrow_double_scalar,
+                         GARROW,
+                         DOUBLE_SCALAR,
+                         GArrowScalar)
+struct _GArrowDoubleScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDoubleScalar *
+garrow_double_scalar_new(gdouble value);
+GARROW_AVAILABLE_IN_5_0
+gdouble
+garrow_double_scalar_get_value(GArrowDoubleScalar *scalar);
+
+
+#define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar,
+                         garrow_base_binary_scalar,
+                         GARROW,
+                         BASE_BINARY_SCALAR,
+                         GArrowScalar)
+struct _GArrowBaseBinaryScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowBuffer *
+garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar);
+
+
+#define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBinaryScalar,
+                         garrow_binary_scalar,
+                         GARROW,
+                         BINARY_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowBinaryScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowBinaryScalar *
+garrow_binary_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStringScalar,
+                         garrow_string_scalar,
+                         GARROW,
+                         STRING_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowStringScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowStringScalar *
+garrow_string_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar,
+                         garrow_large_binary_scalar,
+                         GARROW,
+                         LARGE_BINARY_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowLargeBinaryScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowLargeBinaryScalar *
+garrow_large_binary_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar,
+                         garrow_large_string_scalar,
+                         GARROW,
+                         LARGE_STRING_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowLargeStringScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowLargeStringScalar *
+garrow_large_string_scalar_new(GArrowBuffer *value);
+
+
+#define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR    \
+  (garrow_fixed_size_binary_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar,
+                         garrow_fixed_size_binary_scalar,
+                         GARROW,
+                         FIXED_SIZE_BINARY_SCALAR,
+                         GArrowBaseBinaryScalar)
+struct _GArrowFixedSizeBinaryScalarClass
+{
+  GArrowBaseBinaryScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowFixedSizeBinaryScalar *
+garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type,
+                                    GArrowBuffer *value);
+
+
+#define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDate32Scalar,
+                         garrow_date32_scalar,
+                         GARROW,
+                         DATE32_SCALAR,
+                         GArrowScalar)
+struct _GArrowDate32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDate32Scalar *
+garrow_date32_scalar_new(gint32 value);
+GARROW_AVAILABLE_IN_5_0
+gint32
+garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar);
+
+
+#define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDate64Scalar,
+                         garrow_date64_scalar,
+                         GARROW,
+                         DATE64_SCALAR,
+                         GArrowScalar)
+struct _GArrowDate64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDate64Scalar *
+garrow_date64_scalar_new(gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar);
+
+
+#define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowTime32Scalar,
+                         garrow_time32_scalar,
+                         GARROW,
+                         TIME32_SCALAR,
+                         GArrowScalar)
+struct _GArrowTime32ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTime32Scalar *
+garrow_time32_scalar_new(GArrowTime32DataType *data_type,
+                         gint32 value);
+GARROW_AVAILABLE_IN_5_0
+gint32
+garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar);
+
+
+#define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowTime64Scalar,
+                         garrow_time64_scalar,
+                         GARROW,
+                         TIME64_SCALAR,
+                         GArrowScalar)
+struct _GArrowTime64ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTime64Scalar *
+garrow_time64_scalar_new(GArrowTime64DataType *data_type,
+                         gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar);
+
+
+#define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowTimestampScalar,
+                         garrow_timestamp_scalar,
+                         GARROW,
+                         TIMESTAMP_SCALAR,
+                         GArrowScalar)
+struct _GArrowTimestampScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowTimestampScalar *
+garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
+                            gint64 value);
+GARROW_AVAILABLE_IN_5_0
+gint64
+garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar);
+
+
+#define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar,
+                         garrow_decimal128_scalar,
+                         GARROW,
+                         DECIMAL128_SCALAR,
+                         GArrowScalar)
+struct _GArrowDecimal128ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal128Scalar *
+garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type,
+                             GArrowDecimal128 *value);
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal128 *
+garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar);
+
+
+#define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar,
+                         garrow_decimal256_scalar,
+                         GARROW,
+                         DECIMAL256_SCALAR,
+                         GArrowScalar)
+struct _GArrowDecimal256ScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal256Scalar *
+garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type,
+                             GArrowDecimal256 *value);
+GARROW_AVAILABLE_IN_5_0
+GArrowDecimal256 *
+garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar);
+
+
+#define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowBaseListScalar,
+                         garrow_base_list_scalar,
+                         GARROW,
+                         BASE_LIST_SCALAR,
+                         GArrowScalar)
+struct _GArrowBaseListScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowArray *
+garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar);
+
+#define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowListScalar,
+                         garrow_list_scalar,
+                         GARROW,
+                         LIST_SCALAR,
+                         GArrowBaseListScalar)
+struct _GArrowListScalarClass
+{
+  GArrowBaseListScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowListScalar *
+garrow_list_scalar_new(GArrowListArray *value);
+
+
+#define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar,
+                         garrow_large_list_scalar,
+                         GARROW,
+                         LARGE_LIST_SCALAR,
+                         GArrowBaseListScalar)
+struct _GArrowLargeListScalarClass
+{
+  GArrowBaseListScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowLargeListScalar *
+garrow_large_list_scalar_new(GArrowLargeListArray *value);
+
+
+#define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowMapScalar,
+                         garrow_map_scalar,
+                         GARROW,
+                         MAP_SCALAR,
+                         GArrowBaseListScalar)
+struct _GArrowMapScalarClass
+{
+  GArrowBaseListScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowMapScalar *
+garrow_map_scalar_new(GArrowStructArray *value);
+
+
+#define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStructScalar,
+                         garrow_struct_scalar,
+                         GARROW,
+                         STRUCT_SCALAR,
+                         GArrowScalar)
+struct _GArrowStructScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowStructScalar *
+garrow_struct_scalar_new(GArrowStructDataType *data_type,
+                         GList *value);
+GARROW_AVAILABLE_IN_5_0
+GList *
+garrow_struct_scalar_get_value(GArrowStructScalar *scalar);
+
+
+#define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionScalar,
+                         garrow_union_scalar,
+                         GARROW,
+                         UNION_SCALAR,
+                         GArrowScalar)
+struct _GArrowUnionScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowScalar *
+garrow_union_scalar_get_value(GArrowUnionScalar *scalar);
+
+
+#define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar,
+                         garrow_sparse_union_scalar,
+                         GARROW,
+                         SPARSE_UNION_SCALAR,
+                         GArrowUnionScalar)
+struct _GArrowSparseUnionScalarClass
+{
+  GArrowUnionScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowSparseUnionScalar *
+garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
+                               GArrowScalar *value);
+
+
+#define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar,
+                         garrow_dense_union_scalar,
+                         GARROW,
+                         DENSE_UNION_SCALAR,
+                         GArrowUnionScalar)
+struct _GArrowDenseUnionScalarClass
+{
+  GArrowUnionScalarClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GArrowDenseUnionScalar *
+garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
+                              GArrowScalar *value);
+
+
+#define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowExtensionScalar,
+                         garrow_extension_scalar,
+                         GARROW,
+                         EXTENSION_SCALAR,
+                         GArrowScalar)
+struct _GArrowExtensionScalarClass
+{
+  GArrowScalarClass parent_class;
+};
+
+G_END_DECLS
diff --git a/c_glib/arrow-glib/scalar.hpp b/c_glib/arrow-glib/scalar.hpp
new file mode 100644
index 00000000000..46ac73e21e8
--- /dev/null
+++ b/c_glib/arrow-glib/scalar.hpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/api.h>
+
+#include <arrow-glib/scalar.h>
+
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar);
+GArrowScalar *
+garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                      const gchar *first_property_name,
+                      ...);
+GArrowScalar *
+garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar,
+                             const gchar *first_property_name,
+                             va_list args);
+std::shared_ptr<arrow::Scalar>
+garrow_scalar_get_raw(GArrowScalar *scalar);
diff --git a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
index 92ae0405dac..f9667bc2d43 100644
--- a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
+++ b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
@@ -58,6 +58,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-4-0-0" role="4.0.0">
+    <title>Index of new symbols in 4.0.0</title>
+    <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-1-0-0" role="1.0.0">
     <title>Index of new symbols in 1.0.0</title>
     <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index 80af9506ecd..f7706f9c96e 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -55,6 +55,10 @@
       <title>Value</title>
       <xi:include href="xml/decimal.xml"/>
     </chapter>
+    <chapter id="scalar">
+      <title>Scalar</title>
+      <xi:include href="xml/scalar.xml"/>
+    </chapter>
     <chapter id="type">
       <title>Type</title>
       <xi:include href="xml/type.xml"/>
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 9dff2fe5b06..044cb33a019 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -39,6 +39,10 @@ def initialize(data)
       @data = data
     end
   end
+
+  class BooleanScalar
+    alias_method :value, :value?
+  end
 end
 
 begin
diff --git a/c_glib/test/test-array-datum.rb b/c_glib/test/test-array-datum.rb
index f4bc9be7f14..623e5589ce4 100644
--- a/c_glib/test/test-array-datum.rb
+++ b/c_glib/test/test-array-datum.rb
@@ -35,6 +35,18 @@ def test_array_like?
     end
   end
 
+  def test_scalar?
+    assert do
+      not @datum.scalar?
+    end
+  end
+
+  def test_value?
+    assert do
+      @datum.value?
+    end
+  end
+
   sub_test_case("==") do
     def test_true
       assert_equal(Arrow::ArrayDatum.new(@array),
diff --git a/c_glib/test/test-binary-scalar.rb b/c_glib/test/test-binary-scalar.rb
new file mode 100644
index 00000000000..4efc50da080
--- /dev/null
+++ b/c_glib/test/test-binary-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestBinaryScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("\x03\x01\x02")
+    @scalar = Arrow::BinaryScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::BinaryDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::BinaryScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("\x03\x01\x02", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-boolean-scalar.rb b/c_glib/test/test-boolean-scalar.rb
new file mode 100644
index 00000000000..f8913d6a7e4
--- /dev/null
+++ b/c_glib/test/test-boolean-scalar.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestBooleanScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::BooleanScalar.new(true)
+  end
+
+  def test_parse
+    assert_equal(@scalar,
+                 Arrow::Scalar.parse(Arrow::BooleanDataType.new,
+                                     "true"))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::BooleanDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::BooleanScalar.new(true),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("true", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(true, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-date32-scalar.rb b/c_glib/test/test-date32-scalar.rb
new file mode 100644
index 00000000000..ae41ebf72f5
--- /dev/null
+++ b/c_glib/test/test-date32-scalar.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDate32Scalar < Test::Unit::TestCase
+  def setup
+    @value = 17406 # 2017-08-28
+    @scalar = Arrow::Date32Scalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Date32DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Date32Scalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("2017-08-28", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-date64-scalar.rb b/c_glib/test/test-date64-scalar.rb
new file mode 100644
index 00000000000..ce39d3c2d74
--- /dev/null
+++ b/c_glib/test/test-date64-scalar.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDate64Scalar < Test::Unit::TestCase
+  def setup
+    @value = 1503878400000 # 2017-08-28T00:00:00Z
+    @scalar = Arrow::Date64Scalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Date64DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Date64Scalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("2017-08-28", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-decimal128-scalar.rb b/c_glib/test/test-decimal128-scalar.rb
new file mode 100644
index 00000000000..380623a6701
--- /dev/null
+++ b/c_glib/test/test-decimal128-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDecimal128Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Decimal128DataType.new(8, 2)
+    @value = Arrow::Decimal128.new("23423445")
+    @scalar = Arrow::Decimal128Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Decimal128Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("234234.45", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-decimal256-scalar.rb b/c_glib/test/test-decimal256-scalar.rb
new file mode 100644
index 00000000000..2c419940df7
--- /dev/null
+++ b/c_glib/test/test-decimal256-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDecimal256Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Decimal256DataType.new(8, 2)
+    @value = Arrow::Decimal256.new("23423445")
+    @scalar = Arrow::Decimal256Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Decimal256Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("234234.45", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-dense-union-scalar.rb b/c_glib/test/test-dense-union-scalar.rb
new file mode 100644
index 00000000000..16d9458a9fa
--- /dev/null
+++ b/c_glib/test/test-dense-union-scalar.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDenseUnionScalar < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("number", Arrow::Int8DataType.new),
+      Arrow::Field.new("text", Arrow::StringDataType.new),
+    ]
+    @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9])
+    @value = Arrow::Int8Scalar.new(-29)
+    @scalar = Arrow::DenseUnionScalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::DenseUnionScalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-double-scalar.rb b/c_glib/test/test-double-scalar.rb
new file mode 100644
index 00000000000..eea673b41e5
--- /dev/null
+++ b/c_glib/test/test-double-scalar.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDoubleScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::DoubleScalar.new(1.1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::DoubleDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    options = Arrow::EqualOptions.new
+    options.approx = true
+    assert do
+      @scalar.equal_options(Arrow::DoubleScalar.new(1.1), options)
+    end
+  end
+
+  def test_to_s
+    assert_equal("1.1", @scalar.to_s)
+  end
+
+  def test_value
+    assert_in_delta(1.1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-fixed-size-binary-scalar.rb b/c_glib/test/test-fixed-size-binary-scalar.rb
new file mode 100644
index 00000000000..1a6f0703594
--- /dev/null
+++ b/c_glib/test/test-fixed-size-binary-scalar.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFixedSizeBinaryScalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::FixedSizeBinaryDataType.new(3)
+    @buffer = Arrow::Buffer.new("\x03\x01\x02")
+    @scalar = Arrow::FixedSizeBinaryScalar.new(@data_type, @buffer)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::FixedSizeBinaryScalar.new(@data_type, @buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("\x03\x01\x02", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-float-scalar.rb b/c_glib/test/test-float-scalar.rb
new file mode 100644
index 00000000000..1b830408cbb
--- /dev/null
+++ b/c_glib/test/test-float-scalar.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFloatScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::FloatScalar.new(1.1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::FloatDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    options = Arrow::EqualOptions.new
+    options.approx = true
+    assert do
+      @scalar.equal_options(Arrow::FloatScalar.new(1.1), options)
+    end
+  end
+
+  def test_to_s
+    assert_equal("1.1", @scalar.to_s)
+  end
+
+  def test_value
+    assert_in_delta(1.1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-function.rb b/c_glib/test/test-function.rb
index 8530ea5c153..390bed5cc94 100644
--- a/c_glib/test/test-function.rb
+++ b/c_glib/test/test-function.rb
@@ -50,6 +50,25 @@ def test_chunked_array
                    or_function.execute(args).value)
     end
 
+    def test_input_scalar
+      add_function = Arrow::Function.find("add")
+      args = [
+        Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])),
+        Arrow::ScalarDatum.new(Arrow::Int8Scalar.new(5)),
+      ]
+      assert_equal(build_int8_array([6, 7, 8]),
+                   add_function.execute(args).value)
+    end
+
+    def test_output_scalar
+      sum_function = Arrow::Function.find("sum")
+      args = [
+        Arrow::ArrayDatum.new(build_int8_array([1, 2, 3])),
+      ]
+      assert_equal(Arrow::Int64Scalar.new(6),
+                   sum_function.execute(args).value)
+    end
+
     def test_options
       cast_function = Arrow::Function.find("cast")
       args = [
diff --git a/c_glib/test/test-int16-scalar.rb b/c_glib/test/test-int16-scalar.rb
new file mode 100644
index 00000000000..1a792714079
--- /dev/null
+++ b/c_glib/test/test-int16-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt16Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int16Scalar.new(-(2 ** 15))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int16DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int16Scalar.new(-(2 ** 15)),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal((-(2 ** 15)).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-(2 ** 15), @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-int32-scalar.rb b/c_glib/test/test-int32-scalar.rb
new file mode 100644
index 00000000000..eba554845c7
--- /dev/null
+++ b/c_glib/test/test-int32-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt32Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int32Scalar.new(-(2 ** 31))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int32DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int32Scalar.new(-(2 ** 31)),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal((-(2 ** 31)).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-(2 ** 31), @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-int64-scalar.rb b/c_glib/test/test-int64-scalar.rb
new file mode 100644
index 00000000000..bfa7b4529e8
--- /dev/null
+++ b/c_glib/test/test-int64-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt64Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int64Scalar.new(-(2 ** 63))
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int64DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int64Scalar.new(-(2 ** 63)),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal((-(2 ** 63)).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-(2 ** 63), @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-int8-scalar.rb b/c_glib/test/test-int8-scalar.rb
new file mode 100644
index 00000000000..214c5907375
--- /dev/null
+++ b/c_glib/test/test-int8-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestInt8Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::Int8Scalar.new(-128)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::Int8DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Int8Scalar.new(-128),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("-128", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(-128, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-large-binary-scalar.rb b/c_glib/test/test-large-binary-scalar.rb
new file mode 100644
index 00000000000..a6bc4addb10
--- /dev/null
+++ b/c_glib/test/test-large-binary-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestLargeBinaryScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("\x03\x01\x02")
+    @scalar = Arrow::LargeBinaryScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::LargeBinaryDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::LargeBinaryScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-large-string-scalar.rb b/c_glib/test/test-large-string-scalar.rb
new file mode 100644
index 00000000000..13e28f647ac
--- /dev/null
+++ b/c_glib/test/test-large-string-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestLargeStringScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("Hello")
+    @scalar = Arrow::LargeStringScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::LargeStringDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::LargeStringScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-list-scalar.rb b/c_glib/test/test-list-scalar.rb
new file mode 100644
index 00000000000..3fda3f25bbb
--- /dev/null
+++ b/c_glib/test/test-list-scalar.rb
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestListScalar < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @value = build_list_array(Arrow::Int8DataType.new,
+                              [[1, 2, 3]])
+    @scalar = Arrow::ListScalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(@value.value_data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::ListScalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-map-scalar.rb b/c_glib/test/test-map-scalar.rb
new file mode 100644
index 00000000000..9c6eb69e0a8
--- /dev/null
+++ b/c_glib/test/test-map-scalar.rb
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestMapScalar < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @value = build_struct_array([
+                                  Arrow::Field.new("key",
+                                                   Arrow::StringDataType.new,
+                                                   false),
+                                  Arrow::Field.new("value",
+                                                   Arrow::Int8DataType.new),
+                                ],
+                                [
+                                  {
+                                    "key" => "hello",
+                                    "value" => 1,
+                                  },
+                                  {
+                                    "key" => "world",
+                                    "value" => 2,
+                                  },
+                                ])
+    @scalar = Arrow::MapScalar.new(@value)
+  end
+
+  def test_data_type
+    assert_equal(@value.value_data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::MapScalar.new(@value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-null-scalar.rb b/c_glib/test/test-null-scalar.rb
new file mode 100644
index 00000000000..07b887040fb
--- /dev/null
+++ b/c_glib/test/test-null-scalar.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestNullScalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::NullScalar.new
+  end
+
+  def test_data_type
+    assert_equal(Arrow::NullDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      not @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::NullScalar.new,
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("null", @scalar.to_s)
+  end
+end
diff --git a/c_glib/test/test-scalar-datum.rb b/c_glib/test/test-scalar-datum.rb
new file mode 100644
index 00000000000..17e5d6b061c
--- /dev/null
+++ b/c_glib/test/test-scalar-datum.rb
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestScalarDatum < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @scalar = Arrow::BooleanScalar.new(true)
+    @datum = Arrow::ScalarDatum.new(@scalar)
+  end
+
+  def test_array?
+    assert do
+      not @datum.array?
+    end
+  end
+
+  def test_array_like?
+    assert do
+      not @datum.array_like?
+    end
+  end
+
+  def test_scalar?
+    assert do
+      @datum.scalar?
+    end
+  end
+
+  def test_value?
+    assert do
+      @datum.value?
+    end
+  end
+
+  sub_test_case("==") do
+    def test_true
+      assert_equal(Arrow::ScalarDatum.new(@scalar),
+                   Arrow::ScalarDatum.new(@scalar))
+    end
+
+    def test_false
+      assert_not_equal(@datum,
+                       Arrow::ArrayDatum.new(build_boolean_array([true, false])))
+    end
+  end
+
+  def test_to_string
+    assert_equal("Scalar", @datum.to_s)
+  end
+
+  def test_value
+    assert_equal(@scalar, @datum.value)
+  end
+end
diff --git a/c_glib/test/test-sparse-union-scalar.rb b/c_glib/test/test-sparse-union-scalar.rb
new file mode 100644
index 00000000000..9ca7d62a08d
--- /dev/null
+++ b/c_glib/test/test-sparse-union-scalar.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSparseUnionScalar < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("number", Arrow::Int8DataType.new),
+      Arrow::Field.new("text", Arrow::StringDataType.new),
+    ]
+    @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9])
+    @value = Arrow::Int8Scalar.new(-29)
+    @scalar = Arrow::SparseUnionScalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::SparseUnionScalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-string-scalar.rb b/c_glib/test/test-string-scalar.rb
new file mode 100644
index 00000000000..3b9499ef950
--- /dev/null
+++ b/c_glib/test/test-string-scalar.rb
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStringScalar < Test::Unit::TestCase
+  def setup
+    @buffer = Arrow::Buffer.new("Hello")
+    @scalar = Arrow::StringScalar.new(@buffer)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::StringDataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::StringScalar.new(@buffer),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("Hello", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@buffer,
+                 @scalar.value)
+  end
+
+  def test_cast
+    buffer = Arrow::Buffer.new("-10")
+    scalar = Arrow::StringScalar.new(buffer)
+    assert_equal(Arrow::Int8Scalar.new(-10),
+                 scalar.cast(Arrow::Int8DataType.new))
+  end
+end
diff --git a/c_glib/test/test-struct-scalar.rb b/c_glib/test/test-struct-scalar.rb
new file mode 100644
index 00000000000..917b0f4cc18
--- /dev/null
+++ b/c_glib/test/test-struct-scalar.rb
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStructScalar < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("score", Arrow::Int8DataType.new),
+      Arrow::Field.new("enabled", Arrow::BooleanDataType.new),
+    ]
+    @data_type = Arrow::StructDataType.new(fields)
+    @value = [
+      Arrow::Int8Scalar.new(-29),
+      Arrow::BooleanScalar.new(true),
+    ]
+    @scalar = Arrow::StructScalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::StructScalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("...", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-time32-scalar.rb b/c_glib/test/test-time32-scalar.rb
new file mode 100644
index 00000000000..94c0a759281
--- /dev/null
+++ b/c_glib/test/test-time32-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTime32Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Time32DataType.new(:second)
+    @value = 60 * 10 # 00:10:00
+    @scalar = Arrow::Time32Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Time32Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("00:10:00", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-time64-scalar.rb b/c_glib/test/test-time64-scalar.rb
new file mode 100644
index 00000000000..fb2843ca61a
--- /dev/null
+++ b/c_glib/test/test-time64-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTime64Scalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::Time64DataType.new(:micro)
+    @value = 60 * 10 * 1000 * 1000 # 00:10:00.000000
+    @scalar = Arrow::Time64Scalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::Time64Scalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("00:10:00.000000", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-timestamp-scalar.rb b/c_glib/test/test-timestamp-scalar.rb
new file mode 100644
index 00000000000..9aa676b5d5f
--- /dev/null
+++ b/c_glib/test/test-timestamp-scalar.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTimestampScalar < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::TimestampDataType.new(:milli)
+    @value = 1504953190854 # 2017-09-09T10:33:10.854Z
+    @scalar = Arrow::TimestampScalar.new(@data_type, @value)
+  end
+
+  def test_data_type
+    assert_equal(@data_type,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::TimestampScalar.new(@data_type, @value),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal("2017-09-09 10:33:10.854", @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal(@value, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint16-scalar.rb b/c_glib/test/test-uint16-scalar.rb
new file mode 100644
index 00000000000..000d620b30d
--- /dev/null
+++ b/c_glib/test/test-uint16-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt16Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt16Scalar.new((2 ** 16) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt16DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt16Scalar.new((2 ** 16) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 16) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 16) - 1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint32-scalar.rb b/c_glib/test/test-uint32-scalar.rb
new file mode 100644
index 00000000000..c41f99330cc
--- /dev/null
+++ b/c_glib/test/test-uint32-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt32Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt32Scalar.new((2 ** 32) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt32DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt32Scalar.new((2 ** 32) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 32) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 32) - 1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint64-scalar.rb b/c_glib/test/test-uint64-scalar.rb
new file mode 100644
index 00000000000..19c12461c7a
--- /dev/null
+++ b/c_glib/test/test-uint64-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt64Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt64Scalar.new((2 ** 64) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt64DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt64Scalar.new((2 ** 64) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 64) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 64) - 1, @scalar.value)
+  end
+end
diff --git a/c_glib/test/test-uint8-scalar.rb b/c_glib/test/test-uint8-scalar.rb
new file mode 100644
index 00000000000..54bc1c954a0
--- /dev/null
+++ b/c_glib/test/test-uint8-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestUInt8Scalar < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::UInt8Scalar.new((2 ** 8) - 1)
+  end
+
+  def test_data_type
+    assert_equal(Arrow::UInt8DataType.new,
+                 @scalar.data_type)
+  end
+
+  def test_valid?
+    assert do
+      @scalar.valid?
+    end
+  end
+
+  def test_equal
+    assert_equal(Arrow::UInt8Scalar.new((2 ** 8) - 1),
+                 @scalar)
+  end
+
+  def test_to_s
+    assert_equal(((2 ** 8) - 1).to_s, @scalar.to_s)
+  end
+
+  def test_value
+    assert_equal((2 ** 8) - 1, @scalar.value)
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/buffer.rb b/ruby/red-arrow/lib/arrow/buffer.rb
index 1efd7972320..9f3a3f61bc5 100644
--- a/ruby/red-arrow/lib/arrow/buffer.rb
+++ b/ruby/red-arrow/lib/arrow/buffer.rb
@@ -17,12 +17,16 @@
 
 module Arrow
   class Buffer
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-
-    def initialize(data)
-      @data = data
-      initialize_raw(data)
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        else
+          nil
+        end
+      end
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb b/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
new file mode 100644
index 00000000000..16669be93f5
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  module ConstructorArgumentsGCGuardable
+    def initialize(*args)
+      super
+      @arguments = args
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/datum.rb b/ruby/red-arrow/lib/arrow/datum.rb
new file mode 100644
index 00000000000..99d1dae32f8
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/datum.rb
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Datum
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when Array
+          ArrayDatum.new(value)
+        when ChunkedArray
+          ChunkedArrayDatum.new(value)
+        when Scalar
+          ScalarDatum.new(value)
+        when ::Array
+          ArrayDatum.new(ArrayBuilder.build(value))
+        when Integer
+          case value
+          when (0..((2 ** 8) - 1))
+            try_convert(UInt8Scalar.new(value))
+          when ((-(2 ** 7))..((2 ** 7) - 1))
+            try_convert(Int8Scalar.new(value))
+          when (0..((2 ** 16) - 1))
+            try_convert(UInt16Scalar.new(value))
+          when ((-(2 ** 15))..((2 ** 15) - 1))
+            try_convert(Int16Scalar.new(value))
+          when (0..((2 ** 32) - 1))
+            try_convert(UInt32Scalar.new(value))
+          when ((-(2 ** 31))..((2 ** 31) - 1))
+            try_convert(Int32Scalar.new(value))
+          when (0..((2 ** 64) - 1))
+            try_convert(UInt64Scalar.new(value))
+          when ((-(2 ** 63))..((2 ** 63) - 1))
+            try_convert(Int64Scalar.new(value))
+          else
+            nil
+          end
+        when Float
+          try_convert(DoubleScalar.new(value))
+        when true, false
+          try_convert(BooleanScalar.new(value))
+        when String
+          if value.ascii_only? or value.encoding == Encoding::UTF_8
+            if value.bytesize <= ((2 ** 31) - 1)
+              try_convert(StringScalar.new(value))
+            else
+              try_convert(LargeStringScalar.new(value))
+            end
+          else
+            if value.bytesize <= ((2 ** 31) - 1)
+              try_convert(BinaryScalar.new(value))
+            else
+              try_convert(LargeBinaryScalar.new(value))
+            end
+          end
+        when Date
+          date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
+          try_convert(Date32Scalar.new(date32_value))
+        when Time
+          case value.unit
+          when TimeUnit::SECOND, TimeUnit::MILLI
+            data_type = Time32DataType.new(value.unit)
+            scalar_class = Time32Scalar
+          else
+            data_type = Time64DataType.new(value.unit)
+            scalar_class = Time64Scalar
+          end
+          try_convert(scalar_class.new(data_type, value.value))
+        when ::Time
+          data_type = TimestampDataType.new(:nano)
+          timestamp_value = value.to_i * 1_000_000_000 + value.nsec
+          try_convert(TimestampScalar.new(data_type, timestamp_value))
+        when Decimal128
+          data_type = TimestampDataType.new(:nano)
+          timestamp_value = value.to_i * 1_000_000_000 + value.nsec
+          try_convert(Decimal128Scalar.new(data_type, timestamp_value))
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index ab152820cb6..7af00ffa441 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -29,6 +29,7 @@ def load
     def post_load(repository, namespace)
       require_libraries
       require_extension_library
+      gc_guard
     end
 
     def require_libraries
@@ -52,6 +53,7 @@ def require_libraries
       require "arrow/date32-array-builder"
       require "arrow/date64-array"
       require "arrow/date64-array-builder"
+      require "arrow/datum"
       require "arrow/decimal128"
       require "arrow/decimal128-array"
       require "arrow/decimal128-array-builder"
@@ -81,6 +83,7 @@ def require_libraries
       require "arrow/record-batch-iterator"
       require "arrow/record-batch-stream-reader"
       require "arrow/rolling-window"
+      require "arrow/scalar"
       require "arrow/schema"
       require "arrow/slicer"
       require "arrow/sort-key"
@@ -113,6 +116,27 @@ def require_extension_library
       require "arrow.so"
     end
 
+    def gc_guard
+      require "arrow/constructor-arguments-gc-guardable"
+
+      [
+        @base_module::BinaryScalar,
+        @base_module::Buffer,
+        @base_module::DenseUnionScalar,
+        @base_module::FixedSizeBinaryScalar,
+        @base_module::LargeBinaryScalar,
+        @base_module::LargeListScalar,
+        @base_module::LargeStringScalar,
+        @base_module::ListScalar,
+        @base_module::MapScalar,
+        @base_module::SparseUnionScalar,
+        @base_module::StringScalar,
+        @base_module::StructScalar,
+      ].each do |klass|
+        klass.prepend(ConstructorArgumentsGCGuardable)
+      end
+    end
+
     def load_object_info(info)
       super
 
@@ -165,6 +189,12 @@ def load_method_info(info, klass, method_name)
           method_name = "dup"
         end
         super(info, klass, method_name)
+      when "Arrow::BooleanScalar"
+        case method_name
+        when "value?"
+          method_name = "value"
+        end
+        super(info, klass, method_name)
       else
         super
       end
diff --git a/ruby/red-arrow/lib/arrow/scalar.rb b/ruby/red-arrow/lib/arrow/scalar.rb
new file mode 100644
index 00000000000..b2bf1ac5962
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/scalar.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class Scalar
+    # @param other [Arrow::Scalar] The scalar to be compared.
+    # @param options [Arrow::EqualOptions, Hash] (nil)
+    #   The options to custom how to compare.
+    #
+    # @return [Boolean]
+    #   `true` if both of them have the same data, `false` otherwise.
+    #
+    # @since 5.0.0
+    def equal_scalar?(other, options=nil)
+      equal_options(other, options)
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-boolean-scalar.rb b/ruby/red-arrow/test/test-boolean-scalar.rb
new file mode 100644
index 00000000000..1053d1716a4
--- /dev/null
+++ b/ruby/red-arrow/test/test-boolean-scalar.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class BooleanScalarTest < Test::Unit::TestCase
+  def setup
+    @scalar = Arrow::BooleanScalar.new(true)
+  end
+
+  test("#value") do
+    assert_equal(true, @scalar.value)
+  end
+end
diff --git a/ruby/red-arrow/test/test-float-scalar.rb b/ruby/red-arrow/test/test-float-scalar.rb
new file mode 100644
index 00000000000..1117d772804
--- /dev/null
+++ b/ruby/red-arrow/test/test-float-scalar.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class FloatScalarTest < Test::Unit::TestCase
+  sub_test_case("#equal_scalar?") do
+    test("no options") do
+      scalar1 = Arrow::FloatScalar.new(1.1)
+      scalar2 = Arrow::FloatScalar.new(1.1000001)
+      assert do
+        not scalar1.equal_scalar?(scalar2)
+      end
+    end
+
+    test(":approx") do
+      scalar1 = Arrow::FloatScalar.new(1.1)
+      scalar2 = Arrow::FloatScalar.new(1.1000001)
+      assert do
+        scalar1.equal_scalar?(scalar2, approx: true)
+      end
+    end
+
+    test(":absolute_tolerance") do
+      scalar1 = Arrow::FloatScalar.new(1.1)
+      scalar2 = Arrow::FloatScalar.new(1.1001)
+      assert do
+        scalar1.equal_scalar?(scalar2,
+                              approx: true,
+                              absolute_tolerance: 0.001)
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-function.rb b/ruby/red-arrow/test/test-function.rb
new file mode 100644
index 00000000000..95667e66c2a
--- /dev/null
+++ b/ruby/red-arrow/test/test-function.rb
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class FunctionTest < Test::Unit::TestCase
+  sub_test_case("#execute") do
+    test("Arrow::Array") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        Arrow::BooleanArray.new([true, false, false]),
+        Arrow::BooleanArray.new([true, false, true]),
+      ]
+      assert_equal([true, false, true],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("Array") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        [true, false, false],
+        [true, false, true],
+      ]
+      assert_equal([true, false, true],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("Arrow::ChunkedArray") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        Arrow::ChunkedArray.new([
+                                  Arrow::BooleanArray.new([true]),
+                                  Arrow::BooleanArray.new([false, false]),
+                                ]),
+        Arrow::ChunkedArray.new([
+                                  Arrow::BooleanArray.new([true, false]),
+                                  Arrow::BooleanArray.new([true]),
+                                ]),
+      ]
+      assert_equal([true, false, true],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("Arrow::Scalar") do
+      add_function = Arrow::Function.find("add")
+      args = [
+        Arrow::Int8Array.new([1, 2, 3]),
+        Arrow::Int8Scalar.new(5),
+      ]
+      assert_equal([6, 7, 8],
+                   add_function.execute(args).value.to_a)
+    end
+
+    test("Integer") do
+      add_function = Arrow::Function.find("add")
+      args = [
+        [1, 2, 3],
+        5,
+      ]
+      assert_equal([6, 7, 8],
+                   add_function.execute(args).value.to_a)
+    end
+
+    test("Float") do
+      add_function = Arrow::Function.find("add")
+      args = [
+        [1, 2, 3],
+        5.1,
+      ]
+      assert_equal([6.1, 7.1, 8.1],
+                   add_function.execute(args).value.to_a)
+    end
+
+    test("true") do
+      and_function = Arrow::Function.find("and")
+      args = [
+        Arrow::BooleanArray.new([true, false, false]),
+        true,
+      ]
+      assert_equal([true, false, false],
+                   and_function.execute(args).value.to_a)
+    end
+
+    test("false") do
+      or_function = Arrow::Function.find("or")
+      args = [
+        Arrow::BooleanArray.new([true, false, false]),
+        false,
+      ]
+      assert_equal([true, false, false],
+                   or_function.execute(args).value.to_a)
+    end
+
+    test("String") do
+      ascii_upper_function = Arrow::Function.find("ascii_upper")
+      args = [
+        "Hello",
+      ]
+      assert_equal("HELLO",
+                   ascii_upper_function.execute(args).value.to_s)
+    end
+
+    test("Date") do
+      cast_function = Arrow::Function.find("cast")
+      date = Date.new(2021, 6, 12)
+      args = [date]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::TimestampDataType.new(:second)
+      time = Time.utc(date.year,
+                      date.month,
+                      date.day)
+      assert_equal(Arrow::TimestampScalar.new(options.to_data_type,
+                                              time.to_i),
+                   cast_function.execute(args, options).value)
+    end
+
+    test("Arrow::Time: second") do
+      cast_function = Arrow::Function.find("cast")
+      arrow_time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
+                                   # 00:10:00
+                                   60 * 10)
+      args = [arrow_time]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::Time64DataType.new(:micro)
+      assert_equal(Arrow::Time64Scalar.new(options.to_data_type,
+                                           # 00:10:00.000000
+                                           60 * 10 * 1000 * 1000),
+                   cast_function.execute(args, options).value)
+    end
+
+    test("Arrow::Time: micro") do
+      cast_function = Arrow::Function.find("cast")
+      arrow_time = Arrow::Time.new(Arrow::TimeUnit::MICRO,
+                                   # 00:10:00.000000
+                                   60 * 10 * 1000 * 1000)
+      args = [arrow_time]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::Time32DataType.new(:second)
+      options.allow_time_truncate = true
+      assert_equal(Arrow::Time32Scalar.new(options.to_data_type,
+                                           # 00:10:00
+                                           60 * 10),
+                   cast_function.execute(args, options).value)
+    end
+
+    test("Time") do
+      cast_function = Arrow::Function.find("cast")
+      time = Time.utc(2021, 6, 12, 1, 2, 3, 1)
+      args = [time]
+      options = Arrow::CastOptions.new
+      options.to_data_type = Arrow::TimestampDataType.new(:second)
+      options.allow_time_truncate = true
+      time = Time.utc(time.year,
+                      time.month,
+                      time.day,
+                      time.hour,
+                      time.min,
+                      time.sec)
+      assert_equal(Arrow::TimestampScalar.new(options.to_data_type,
+                                              time.to_i),
+                   cast_function.execute(args, options).value)
+    end
+  end
+end

From b81fcf73ee7722147868e94f0cc1040f7eb51c79 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sun, 13 Jun 2021 20:14:26 +0900
Subject: [PATCH 406/719] ARROW-13068: [GLib][Dataset] Change prefix to
 gdataset_ from gad_

Because we use gaflight_ for Apache Arrow Flight GLib.

Closes #10524 from kou/glib-dataset-prefix

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-dataset-glib/file-format.cpp     | 155 +++++------
 c_glib/arrow-dataset-glib/file-format.h       |  65 ++---
 c_glib/arrow-dataset-glib/file-format.hpp     |   6 +-
 c_glib/arrow-dataset-glib/fragment.cpp        |  99 +++----
 c_glib/arrow-dataset-glib/fragment.h          |  33 +--
 c_glib/arrow-dataset-glib/fragment.hpp        |  12 +-
 c_glib/arrow-dataset-glib/meson.build         |   4 +-
 c_glib/arrow-dataset-glib/scanner.cpp         | 249 +++++++++---------
 c_glib/arrow-dataset-glib/scanner.h           |  62 +++--
 c_glib/arrow-dataset-glib/scanner.hpp         |  16 +-
 .../arrow-dataset-glib-docs.xml               |   6 +
 c_glib/doc/arrow-dataset-glib/meson.build     |   2 +-
 12 files changed, 370 insertions(+), 339 deletions(-)

diff --git a/c_glib/arrow-dataset-glib/file-format.cpp b/c_glib/arrow-dataset-glib/file-format.cpp
index 89d56058928..43f6a198f23 100644
--- a/c_glib/arrow-dataset-glib/file-format.cpp
+++ b/c_glib/arrow-dataset-glib/file-format.cpp
@@ -29,56 +29,57 @@ G_BEGIN_DECLS
  * @title: File format classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADFileFormat is a base class for file format classes.
+ * #GADatasetFileFormat is a base class for file format classes.
  *
- * #GADCSVFileFormat is a class for CSV file format.
+ * #GADatasetCSVFileFormat is a class for CSV file format.
  *
- * #GADIPCFileFormat is a class for IPC file format.
+ * #GADatasetIPCFileFormat is a class for IPC file format.
  *
- * #GADParquetFileFormat is a class for Parquet file format.
+ * #GADatasetParquetFileFormat is a class for Parquet file format.
  *
  * Since: 3.0.0
  */
 
-typedef struct GADFileFormatPrivate_ {
+typedef struct GADatasetFileFormatPrivate_ {
   std::shared_ptr<arrow::dataset::FileFormat> file_format;
-} GADFileFormatPrivate;
+} GADatasetFileFormatPrivate;
 
 enum {
   PROP_FILE_FORMAT = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADFileFormat,
-                           gad_file_format,
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileFormat,
+                           gadataset_file_format,
                            G_TYPE_OBJECT)
 
-#define GAD_FILE_FORMAT_GET_PRIVATE(obj)        \
-  static_cast<GADFileFormatPrivate *>(          \
-    gad_file_format_get_instance_private(       \
-      GAD_FILE_FORMAT(obj)))
+#define GADATASET_FILE_FORMAT_GET_PRIVATE(obj)        \
+  static_cast<GADatasetFileFormatPrivate *>(          \
+    gadataset_file_format_get_instance_private(       \
+      GADATASET_FILE_FORMAT(obj)))
 
 static void
-gad_file_format_finalize(GObject *object)
+gadataset_file_format_finalize(GObject *object)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(object);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
 
   priv->file_format.~shared_ptr();
 
-  G_OBJECT_CLASS(gad_file_format_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_file_format_parent_class)->finalize(object);
 }
 
 static void
-gad_file_format_set_property(GObject *object,
-                             guint prop_id,
-                             const GValue *value,
-                             GParamSpec *pspec)
+gadataset_file_format_set_property(GObject *object,
+                                   guint prop_id,
+                                   const GValue *value,
+                                   GParamSpec *pspec)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(object);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_FILE_FORMAT:
     priv->file_format =
-      *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -87,19 +88,19 @@ gad_file_format_set_property(GObject *object,
 }
 
 static void
-gad_file_format_init(GADFileFormat *object)
+gadataset_file_format_init(GADatasetFileFormat *object)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(object);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
   new(&priv->file_format) std::shared_ptr<arrow::dataset::FileFormat>;
 }
 
 static void
-gad_file_format_class_init(GADFileFormatClass *klass)
+gadataset_file_format_class_init(GADatasetFileFormatClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gad_file_format_finalize;
-  gobject_class->set_property = gad_file_format_set_property;
+  gobject_class->finalize     = gadataset_file_format_finalize;
+  gobject_class->set_property = gadataset_file_format_set_property;
 
   GParamSpec *spec;
   spec = g_param_spec_pointer("file-format",
@@ -111,8 +112,8 @@ gad_file_format_class_init(GADFileFormatClass *klass)
 }
 
 /**
- * gad_file_format_get_type_name:
- * @file_format: A #GADFileFormat.
+ * gadataset_file_format_get_type_name:
+ * @file_format: A #GADatasetFileFormat.
  *
  * Returns: The type name of @file_format.
  *
@@ -121,145 +122,149 @@ gad_file_format_class_init(GADFileFormatClass *klass)
  * Since: 3.0.0
  */
 gchar *
-gad_file_format_get_type_name(GADFileFormat *file_format)
+gadataset_file_format_get_type_name(GADatasetFileFormat *file_format)
 {
-  const auto arrow_file_format = gad_file_format_get_raw(file_format);
+  const auto arrow_file_format = gadataset_file_format_get_raw(file_format);
   const auto &type_name = arrow_file_format->type_name();
   return g_strndup(type_name.data(), type_name.size());
 }
 
 /**
- * gad_file_format_equal:
- * @file_format: A #GADFileFormat.
- * @other_file_format: A #GADFileFormat to be compared.
+ * gadataset_file_format_equal:
+ * @file_format: A #GADatasetFileFormat.
+ * @other_file_format: A #GADatasetFileFormat to be compared.
  *
  * Returns: %TRUE if they are the same content file format, %FALSE otherwise.
  *
  * Since: 3.0.0
  */
 gboolean
-gad_file_format_equal(GADFileFormat *file_format,
-                      GADFileFormat *other_file_format)
+gadataset_file_format_equal(GADatasetFileFormat *file_format,
+                      GADatasetFileFormat *other_file_format)
 {
-  const auto arrow_file_format = gad_file_format_get_raw(file_format);
-  const auto arrow_other_file_format = gad_file_format_get_raw(other_file_format);
+  const auto arrow_file_format = gadataset_file_format_get_raw(file_format);
+  const auto arrow_other_file_format =
+    gadataset_file_format_get_raw(other_file_format);
   return arrow_file_format->Equals(*arrow_other_file_format);
 }
 
 
-G_DEFINE_TYPE(GADCSVFileFormat,
-              gad_csv_file_format,
-              GAD_TYPE_FILE_FORMAT)
+G_DEFINE_TYPE(GADatasetCSVFileFormat,
+              gadataset_csv_file_format,
+              GADATASET_TYPE_FILE_FORMAT)
 
 static void
-gad_csv_file_format_init(GADCSVFileFormat *object)
+gadataset_csv_file_format_init(GADatasetCSVFileFormat *object)
 {
 }
 
 static void
-gad_csv_file_format_class_init(GADCSVFileFormatClass *klass)
+gadataset_csv_file_format_class_init(GADatasetCSVFileFormatClass *klass)
 {
 }
 
 /**
- * gad_csv_file_format_new:
+ * gadataset_csv_file_format_new:
  *
  * Returns: The newly created CSV file format.
  *
  * Since: 3.0.0
  */
-GADCSVFileFormat *
-gad_csv_file_format_new(void)
+GADatasetCSVFileFormat *
+gadataset_csv_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_file_format =
     std::make_shared<arrow::dataset::CsvFileFormat>();
-  return GAD_CSV_FILE_FORMAT(gad_file_format_new_raw(&arrow_file_format));
+  return GADATASET_CSV_FILE_FORMAT(
+    gadataset_file_format_new_raw(&arrow_file_format));
 }
 
 
-G_DEFINE_TYPE(GADIPCFileFormat,
-              gad_ipc_file_format,
-              GAD_TYPE_FILE_FORMAT)
+G_DEFINE_TYPE(GADatasetIPCFileFormat,
+              gadataset_ipc_file_format,
+              GADATASET_TYPE_FILE_FORMAT)
 
 static void
-gad_ipc_file_format_init(GADIPCFileFormat *object)
+gadataset_ipc_file_format_init(GADatasetIPCFileFormat *object)
 {
 }
 
 static void
-gad_ipc_file_format_class_init(GADIPCFileFormatClass *klass)
+gadataset_ipc_file_format_class_init(GADatasetIPCFileFormatClass *klass)
 {
 }
 
 /**
- * gad_ipc_file_format_new:
+ * gadataset_ipc_file_format_new:
  *
  * Returns: The newly created IPC file format.
  *
  * Since: 3.0.0
  */
-GADIPCFileFormat *
-gad_ipc_file_format_new(void)
+GADatasetIPCFileFormat *
+gadataset_ipc_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_file_format =
     std::make_shared<arrow::dataset::IpcFileFormat>();
-  return GAD_IPC_FILE_FORMAT(gad_file_format_new_raw(&arrow_file_format));
+  return GADATASET_IPC_FILE_FORMAT(
+    gadataset_file_format_new_raw(&arrow_file_format));
 }
 
 
-G_DEFINE_TYPE(GADParquetFileFormat,
-              gad_parquet_file_format,
-              GAD_TYPE_FILE_FORMAT)
+G_DEFINE_TYPE(GADatasetParquetFileFormat,
+              gadataset_parquet_file_format,
+              GADATASET_TYPE_FILE_FORMAT)
 
 static void
-gad_parquet_file_format_init(GADParquetFileFormat *object)
+gadataset_parquet_file_format_init(GADatasetParquetFileFormat *object)
 {
 }
 
 static void
-gad_parquet_file_format_class_init(GADParquetFileFormatClass *klass)
+gadataset_parquet_file_format_class_init(GADatasetParquetFileFormatClass *klass)
 {
 }
 
 /**
- * gad_parquet_file_format_new:
+ * gadataset_parquet_file_format_new:
  *
  * Returns: The newly created Parquet file format.
  *
  * Since: 3.0.0
  */
-GADParquetFileFormat *
-gad_parquet_file_format_new(void)
+GADatasetParquetFileFormat *
+gadataset_parquet_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_file_format =
     std::make_shared<arrow::dataset::ParquetFileFormat>();
-  return GAD_PARQUET_FILE_FORMAT(gad_file_format_new_raw(&arrow_file_format));
+  return GADATASET_PARQUET_FILE_FORMAT(
+    gadataset_file_format_new_raw(&arrow_file_format));
 }
 
 
 G_END_DECLS
 
-GADFileFormat *
-gad_file_format_new_raw(
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
   std::shared_ptr<arrow::dataset::FileFormat> *arrow_file_format)
 {
-  GType type = GAD_TYPE_FILE_FORMAT;
+  GType type = GADATASET_TYPE_FILE_FORMAT;
   const auto &type_name = (*arrow_file_format)->type_name();
   if (type_name == "csv") {
-    type = GAD_TYPE_CSV_FILE_FORMAT;
+    type = GADATASET_TYPE_CSV_FILE_FORMAT;
   } else if (type_name == "ipc") {
-    type = GAD_TYPE_IPC_FILE_FORMAT;
+    type = GADATASET_TYPE_IPC_FILE_FORMAT;
   } else if (type_name == "parquet") {
-    type = GAD_TYPE_PARQUET_FILE_FORMAT;
+    type = GADATASET_TYPE_PARQUET_FILE_FORMAT;
   }
-  return GAD_FILE_FORMAT(g_object_new(type,
-                                      "file-format", arrow_file_format,
-                                      NULL));
+  return GADATASET_FILE_FORMAT(g_object_new(type,
+                                            "file-format", arrow_file_format,
+                                            NULL));
 }
 
 std::shared_ptr<arrow::dataset::FileFormat>
-gad_file_format_get_raw(GADFileFormat *file_format)
+gadataset_file_format_get_raw(GADatasetFileFormat *file_format)
 {
-  auto priv = GAD_FILE_FORMAT_GET_PRIVATE(file_format);
+  auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(file_format);
   return priv->file_format;
 }
diff --git a/c_glib/arrow-dataset-glib/file-format.h b/c_glib/arrow-dataset-glib/file-format.h
index f77addc8da6..7a6f46f56e9 100644
--- a/c_glib/arrow-dataset-glib/file-format.h
+++ b/c_glib/arrow-dataset-glib/file-format.h
@@ -23,70 +23,71 @@
 
 G_BEGIN_DECLS
 
-#define GAD_TYPE_FILE_FORMAT (gad_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADFileFormat,
-                         gad_file_format,
-                         GAD,
+#define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileFormat,
+                         gadataset_file_format,
+                         GADATASET,
                          FILE_FORMAT,
                          GObject)
-struct _GADFileFormatClass
+struct _GADatasetFileFormatClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
 gchar *
-gad_file_format_get_type_name(GADFileFormat *file_format);
+gadataset_file_format_get_type_name(GADatasetFileFormat *file_format);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
-gad_file_format_equal(GADFileFormat *file_format,
-                      GADFileFormat *other_file_format);
+gadataset_file_format_equal(GADatasetFileFormat *file_format,
+                            GADatasetFileFormat *other_file_format);
 
 
-#define GAD_TYPE_CSV_FILE_FORMAT (gad_csv_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADCSVFileFormat,
-                         gad_csv_file_format,
-                         GAD,
+#define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat,
+                         gadataset_csv_file_format,
+                         GADATASET,
                          CSV_FILE_FORMAT,
-                         GADFileFormat)
-struct _GADCSVFileFormatClass
+                         GADatasetFileFormat)
+struct _GADatasetCSVFileFormatClass
 {
-  GADFileFormatClass parent_class;
+  GADatasetFileFormatClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADCSVFileFormat *gad_csv_file_format_new(void);
+GADatasetCSVFileFormat *gadataset_csv_file_format_new(void);
 
 
-#define GAD_TYPE_IPC_FILE_FORMAT (gad_ipc_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADIPCFileFormat,
-                         gad_ipc_file_format,
-                         GAD,
+#define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat,
+                         gadataset_ipc_file_format,
+                         GADATASET,
                          IPC_FILE_FORMAT,
-                         GADFileFormat)
-struct _GADIPCFileFormatClass
+                         GADatasetFileFormat)
+struct _GADatasetIPCFileFormatClass
 {
-  GADFileFormatClass parent_class;
+  GADatasetFileFormatClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADIPCFileFormat *gad_ipc_file_format_new(void);
+GADatasetIPCFileFormat *gadataset_ipc_file_format_new(void);
 
 
-#define GAD_TYPE_PARQUET_FILE_FORMAT (gad_parquet_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADParquetFileFormat,
-                         gad_parquet_file_format,
-                         GAD,
+#define GADATASET_TYPE_PARQUET_FILE_FORMAT      \
+  (gadataset_parquet_file_format_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat,
+                         gadataset_parquet_file_format,
+                         GADATASET,
                          PARQUET_FILE_FORMAT,
-                         GADFileFormat)
-struct _GADParquetFileFormatClass
+                         GADatasetFileFormat)
+struct _GADatasetParquetFileFormatClass
 {
-  GADFileFormatClass parent_class;
+  GADatasetFileFormatClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADParquetFileFormat *gad_parquet_file_format_new(void);
+GADatasetParquetFileFormat *gadataset_parquet_file_format_new(void);
 
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/file-format.hpp b/c_glib/arrow-dataset-glib/file-format.hpp
index e7e73f4ed98..5dfb20b3caa 100644
--- a/c_glib/arrow-dataset-glib/file-format.hpp
+++ b/c_glib/arrow-dataset-glib/file-format.hpp
@@ -23,8 +23,8 @@
 
 #include <arrow-dataset-glib/file-format.h>
 
-GADFileFormat *
-gad_file_format_new_raw(
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
   std::shared_ptr<arrow::dataset::FileFormat> *arrow_file_format);
 std::shared_ptr<arrow::dataset::FileFormat>
-gad_file_format_get_raw(GADFileFormat *file_format);
+gadataset_file_format_get_raw(GADatasetFileFormat *file_format);
diff --git a/c_glib/arrow-dataset-glib/fragment.cpp b/c_glib/arrow-dataset-glib/fragment.cpp
index 515a370d8e6..f2f0cd1c3e9 100644
--- a/c_glib/arrow-dataset-glib/fragment.cpp
+++ b/c_glib/arrow-dataset-glib/fragment.cpp
@@ -30,54 +30,55 @@ G_BEGIN_DECLS
  * @title: Fragment classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADFragment is a base class for all fragment classes.
+ * #GADatasetFragment is a base class for all fragment classes.
  *
- * #GADInMemoryFragment is a class for in-memory fragment.
+ * #GADatasetInMemoryFragment is a class for in-memory fragment.
  *
  * Since: 4.0.0
  */
 
 /* arrow::dataset::Fragment */
 
-typedef struct GADFragmentPrivate_ {
+typedef struct GADatasetFragmentPrivate_ {
   std::shared_ptr<arrow::dataset::Fragment> fragment;
-} GADFragmentPrivate;
+} GADatasetFragmentPrivate;
 
 enum {
   PROP_FRAGMENT = 1,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADFragment,
-                                    gad_fragment,
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetFragment,
+                                    gadataset_fragment,
                                     G_TYPE_OBJECT)
 
-#define GAD_FRAGMENT_GET_PRIVATE(obj)           \
-  static_cast<GADFragmentPrivate *>(            \
-    gad_fragment_get_instance_private(          \
-      GAD_FRAGMENT(obj)))
+#define GADATASET_FRAGMENT_GET_PRIVATE(obj)           \
+  static_cast<GADatasetFragmentPrivate *>(            \
+    gadataset_fragment_get_instance_private(          \
+      GADATASET_FRAGMENT(obj)))
 
 static void
-gad_fragment_finalize(GObject *object)
+gadataset_fragment_finalize(GObject *object)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(object);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
 
   priv->fragment.~shared_ptr();
 
-  G_OBJECT_CLASS(gad_fragment_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_fragment_parent_class)->finalize(object);
 }
 
 static void
-gad_fragment_set_property(GObject *object,
-                          guint prop_id,
-                          const GValue *value,
-                          GParamSpec *pspec)
+gadataset_fragment_set_property(GObject *object,
+                                guint prop_id,
+                                const GValue *value,
+                                GParamSpec *pspec)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(object);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_FRAGMENT:
     priv->fragment =
-      *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -86,19 +87,19 @@ gad_fragment_set_property(GObject *object,
 }
 
 static void
-gad_fragment_init(GADFragment *object)
+gadataset_fragment_init(GADatasetFragment *object)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(object);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
   new(&priv->fragment) std::shared_ptr<arrow::dataset::Fragment>;
 }
 
 static void
-gad_fragment_class_init(GADFragmentClass *klass)
+gadataset_fragment_class_init(GADatasetFragmentClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gad_fragment_finalize;
-  gobject_class->set_property = gad_fragment_set_property;
+  gobject_class->finalize     = gadataset_fragment_finalize;
+  gobject_class->set_property = gadataset_fragment_set_property;
 
   GParamSpec *spec;
   spec = g_param_spec_pointer("fragment",
@@ -111,35 +112,35 @@ gad_fragment_class_init(GADFragmentClass *klass)
 
 /* arrow::dataset::InMemoryFragment */
 
-G_DEFINE_TYPE(GADInMemoryFragment,
-              gad_in_memory_fragment,
-              GAD_TYPE_FRAGMENT)
+G_DEFINE_TYPE(GADatasetInMemoryFragment,
+              gadataset_in_memory_fragment,
+              GADATASET_TYPE_FRAGMENT)
 
 static void
-gad_in_memory_fragment_init(GADInMemoryFragment *object)
+gadataset_in_memory_fragment_init(GADatasetInMemoryFragment *object)
 {
 }
 
 static void
-gad_in_memory_fragment_class_init(GADInMemoryFragmentClass *klass)
+gadataset_in_memory_fragment_class_init(GADatasetInMemoryFragmentClass *klass)
 {
 }
 
 /**
- * gad_in_memory_fragment_new:
+ * gadataset_in_memory_fragment_new:
  * @schema: A #GArrowSchema.
  * @record_batches: (array length=n_record_batches):
  *   (element-type GArrowRecordBatch): The record batches of the table.
  * @n_record_batches: The number of record batches.
  *
- * Returns: A newly created #GADInMemoryFragment.
+ * Returns: A newly created #GADatasetInMemoryFragment.
  *
  * Since: 4.0.0
  */
-GADInMemoryFragment *
-gad_in_memory_fragment_new(GArrowSchema *schema,
-                           GArrowRecordBatch **record_batches,
-                           gsize n_record_batches)
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new(GArrowSchema *schema,
+                                 GArrowRecordBatch **record_batches,
+                                 gsize n_record_batches)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
@@ -151,34 +152,36 @@ gad_in_memory_fragment_new(GArrowSchema *schema,
   auto arrow_in_memory_fragment =
     std::make_shared<arrow::dataset::InMemoryFragment>(arrow_schema,
                                                        arrow_record_batches);
-  return gad_in_memory_fragment_new_raw(&arrow_in_memory_fragment);
+  return gadataset_in_memory_fragment_new_raw(&arrow_in_memory_fragment);
 }
 
 G_END_DECLS
 
-GADFragment *
-gad_fragment_new_raw(std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
+GADatasetFragment *
+gadataset_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
 {
   auto fragment =
-    GAD_FRAGMENT(g_object_new(GAD_TYPE_FRAGMENT,
-                              "fragment", arrow_fragment,
-                              NULL));
+    GADATASET_FRAGMENT(g_object_new(GADATASET_TYPE_FRAGMENT,
+                                    "fragment", arrow_fragment,
+                                    NULL));
   return fragment;
 }
 
 std::shared_ptr<arrow::dataset::Fragment>
-gad_fragment_get_raw(GADFragment *fragment)
+gadataset_fragment_get_raw(GADatasetFragment *fragment)
 {
-  auto priv = GAD_FRAGMENT_GET_PRIVATE(fragment);
+  auto priv = GADATASET_FRAGMENT_GET_PRIVATE(fragment);
   return priv->fragment;
 }
 
-GADInMemoryFragment *
-gad_in_memory_fragment_new_raw(std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment)
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment)
 {
   auto fragment =
-    GAD_IN_MEMORY_FRAGMENT(g_object_new(GAD_TYPE_IN_MEMORY_FRAGMENT,
-                                        "fragment", arrow_fragment,
-                                        NULL));
+    GADATASET_IN_MEMORY_FRAGMENT(g_object_new(GADATASET_TYPE_IN_MEMORY_FRAGMENT,
+                                              "fragment", arrow_fragment,
+                                              NULL));
   return fragment;
 }
diff --git a/c_glib/arrow-dataset-glib/fragment.h b/c_glib/arrow-dataset-glib/fragment.h
index c0ee8769db1..9376b6cf3ee 100644
--- a/c_glib/arrow-dataset-glib/fragment.h
+++ b/c_glib/arrow-dataset-glib/fragment.h
@@ -25,34 +25,35 @@ G_BEGIN_DECLS
 
 /* arrow::dataset::Fragment */
 
-#define GAD_TYPE_FRAGMENT (gad_fragment_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADFragment,
-                         gad_fragment,
-                         GAD,
+#define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFragment,
+                         gadataset_fragment,
+                         GADATASET,
                          FRAGMENT,
                          GObject)
-struct _GADFragmentClass
+struct _GADatasetFragmentClass
 {
   GObjectClass parent_class;
 };
 
 /* arrow::dataset::InMemoryFragment */
 
-#define GAD_TYPE_IN_MEMORY_FRAGMENT (gad_in_memory_fragment_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADInMemoryFragment,
-                         gad_in_memory_fragment,
-                         GAD,
+#define GADATASET_TYPE_IN_MEMORY_FRAGMENT       \
+  (gadataset_in_memory_fragment_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment,
+                         gadataset_in_memory_fragment,
+                         GADATASET,
                          IN_MEMORY_FRAGMENT,
-                         GADFragment)
-struct _GADInMemoryFragmentClass
+                         GADatasetFragment)
+struct _GADatasetInMemoryFragmentClass
 {
-  GADFragmentClass parent_class;
+  GADatasetFragmentClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_4_0
-GADInMemoryFragment *
-gad_in_memory_fragment_new(GArrowSchema *schema,
-                           GArrowRecordBatch **record_batches,
-                           gsize n_record_batches);
+GADatasetInMemoryFragment *
+gadataset_in_memory_fragment_new(GArrowSchema *schema,
+                                 GArrowRecordBatch **record_batches,
+                                 gsize n_record_batches);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/fragment.hpp b/c_glib/arrow-dataset-glib/fragment.hpp
index 441b7c99cb8..904f8365396 100644
--- a/c_glib/arrow-dataset-glib/fragment.hpp
+++ b/c_glib/arrow-dataset-glib/fragment.hpp
@@ -24,10 +24,12 @@
 #include <arrow-dataset-glib/fragment.h>
 
 std::shared_ptr<arrow::dataset::Fragment>
-gad_fragment_get_raw(GADFragment *fragment);
+gadataset_fragment_get_raw(GADatasetFragment *fragment);
 
-GADFragment*
-gad_fragment_new_raw(std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
+GADatasetFragment*
+gadataset_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
 
-GADInMemoryFragment*
-gad_in_memory_fragment_new_raw(std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment);
+GADatasetInMemoryFragment*
+gadataset_in_memory_fragment_new_raw(
+  std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment);
diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build
index 83b57504f81..04dc420b057 100644
--- a/c_glib/arrow-dataset-glib/meson.build
+++ b/c_glib/arrow-dataset-glib/meson.build
@@ -68,8 +68,8 @@ if have_gi
                      sources: sources + c_headers,
                      namespace: 'ArrowDataset',
                      nsversion: api_version,
-                     identifier_prefix: 'GAD',
-                     symbol_prefix: 'gad',
+                     identifier_prefix: 'GADataset',
+                     symbol_prefix: 'gadataset',
                      export_packages: 'arrow-dataset-glib',
                      includes: [
                        'Arrow-1.0',
diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp
index 36701ca373a..04778c8ae99 100644
--- a/c_glib/arrow-dataset-glib/scanner.cpp
+++ b/c_glib/arrow-dataset-glib/scanner.cpp
@@ -34,20 +34,20 @@ G_BEGIN_DECLS
  * @title: Scanner classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADScanOptions is a class for a set of scan options.
+ * #GADatasetScanOptions is a class for a set of scan options.
  *
- * #GADScanTask is an abstract class for a scan task.
+ * #GADatasetScanTask is an abstract class for a scan task.
  *
- * #GADInMemoryScanTask is a class for a scan task of record batches.
+ * #GADatasetInMemoryScanTask is a class for a scan task of record batches.
  *
  * Since: 1.0.0
  */
 
 /* arrow::dataset::ScanOptions */
 
-typedef struct GADScanOptionsPrivate_ {
+typedef struct GADatasetScanOptionsPrivate_ {
   std::shared_ptr<arrow::dataset::ScanOptions> scan_options;
-} GADScanOptionsPrivate;
+} GADatasetScanOptionsPrivate;
 
 enum {
   PROP_SCAN_OPTIONS = 1,
@@ -58,37 +58,38 @@ enum {
   PROP_USE_THREADS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADScanOptions,
-                           gad_scan_options,
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanOptions,
+                           gadataset_scan_options,
                            G_TYPE_OBJECT)
 
-#define GAD_SCAN_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GADScanOptionsPrivate *>(         \
-    gad_scan_options_get_instance_private(      \
-      GAD_SCAN_OPTIONS(obj)))
+#define GADATASET_SCAN_OPTIONS_GET_PRIVATE(obj)       \
+  static_cast<GADatasetScanOptionsPrivate *>(         \
+    gadataset_scan_options_get_instance_private(      \
+      GADATASET_SCAN_OPTIONS(obj)))
 
 static void
-gad_scan_options_finalize(GObject *object)
+gadataset_scan_options_finalize(GObject *object)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
 
   priv->scan_options.~shared_ptr();
 
-  G_OBJECT_CLASS(gad_scan_options_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_scan_options_parent_class)->finalize(object);
 }
 
 static void
-gad_scan_options_set_property(GObject *object,
-                              guint prop_id,
-                              const GValue *value,
-                              GParamSpec *pspec)
+gadataset_scan_options_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_SCAN_OPTIONS:
     priv->scan_options =
-      *static_cast<std::shared_ptr<arrow::dataset::ScanOptions> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::dataset::ScanOptions> *>(
+        g_value_get_pointer(value));
     break;
   case PROP_BATCH_SIZE:
     priv->scan_options->batch_size = g_value_get_int64(value);
@@ -103,12 +104,12 @@ gad_scan_options_set_property(GObject *object,
 }
 
 static void
-gad_scan_options_get_property(GObject *object,
-                              guint prop_id,
-                              GValue *value,
-                              GParamSpec *pspec)
+gadataset_scan_options_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_BATCH_SIZE:
@@ -124,23 +125,23 @@ gad_scan_options_get_property(GObject *object,
 }
 
 static void
-gad_scan_options_init(GADScanOptions *object)
+gadataset_scan_options_init(GADatasetScanOptions *object)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
   new(&priv->scan_options) std::shared_ptr<arrow::dataset::ScanOptions>;
 }
 
 static void
-gad_scan_options_class_init(GADScanOptionsClass *klass)
+gadataset_scan_options_class_init(GADatasetScanOptionsClass *klass)
 {
   GObjectClass *gobject_class;
   GParamSpec *spec;
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gad_scan_options_finalize;
-  gobject_class->set_property = gad_scan_options_set_property;
-  gobject_class->get_property = gad_scan_options_get_property;
+  gobject_class->finalize     = gadataset_scan_options_finalize;
+  gobject_class->set_property = gadataset_scan_options_set_property;
+  gobject_class->get_property = gadataset_scan_options_get_property;
 
   auto scan_options = std::make_shared<arrow::dataset::ScanOptions>();
 
@@ -156,7 +157,7 @@ gad_scan_options_class_init(GADScanOptionsClass *klass)
   // TODO: PROP_PROJECTOR
 
   /**
-   * GADScanOptions:batch-size:
+   * GADatasetScanOptions:batch-size:
    *
    * Maximum row count for scanned batches.
    *
@@ -172,7 +173,7 @@ gad_scan_options_class_init(GADScanOptionsClass *klass)
   g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, spec);
 
   /**
-   * GADScanOptions:use-threads:
+   * GADatasetScanOptions:use-threads:
    *
    * Indicate if the Scanner should make use of a ThreadPool.
    *
@@ -187,45 +188,45 @@ gad_scan_options_class_init(GADScanOptionsClass *klass)
 }
 
 /**
- * gad_scan_options_new:
+ * gadataset_scan_options_new:
  * @schema: A #GArrowSchema.
  *
- * Returns: A newly created #GADScanOptions.
+ * Returns: A newly created #GADatasetScanOptions.
  *
  * Since: 1.0.0
  */
-GADScanOptions *
-gad_scan_options_new(GArrowSchema *schema)
+GADatasetScanOptions *
+gadataset_scan_options_new(GArrowSchema *schema)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   auto arrow_scan_options = std::make_shared<arrow::dataset::ScanOptions>();
   arrow_scan_options->dataset_schema = arrow_schema;
-  return gad_scan_options_new_raw(&arrow_scan_options);
+  return gadataset_scan_options_new_raw(&arrow_scan_options);
 }
 
 /**
- * gad_scan_options_get_schema:
- * @scan_options: A #GADScanOptions.
+ * gadataset_scan_options_get_schema:
+ * @scan_options: A #GADatasetScanOptions.
  *
  * Returns: (transfer full): A #GArrowSchema.
  *
  * Since: 1.0.0
  */
 GArrowSchema *
-gad_scan_options_get_schema(GADScanOptions *scan_options)
+gadataset_scan_options_get_schema(GADatasetScanOptions *scan_options)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(scan_options);
+  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(scan_options);
   auto arrow_schema = priv->scan_options->dataset_schema;
   return garrow_schema_new_raw(&arrow_schema);
 }
 
 /* arrow::dataset::ScanTask */
 
-typedef struct GADScanTaskPrivate_ {
+typedef struct GADatasetScanTaskPrivate_ {
   std::shared_ptr<arrow::dataset::ScanTask> scan_task;
-  GADScanOptions *options;
-  GADFragment *fragment;
-} GADScanTaskPrivate;
+  GADatasetScanOptions *options;
+  GADatasetFragment *fragment;
+} GADatasetScanTaskPrivate;
 
 enum {
   PROP_SCAN_TASK = 1,
@@ -233,19 +234,19 @@ enum {
   PROP_FRAGMENT,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADScanTask,
-                                    gad_scan_task,
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetScanTask,
+                                    gadataset_scan_task,
                                     G_TYPE_OBJECT)
 
-#define GAD_SCAN_TASK_GET_PRIVATE(obj)          \
-  static_cast<GADScanTaskPrivate *>(            \
-    gad_scan_task_get_instance_private(         \
-      GAD_SCAN_TASK(obj)))
+#define GADATASET_SCAN_TASK_GET_PRIVATE(obj)          \
+  static_cast<GADatasetScanTaskPrivate *>(            \
+    gadataset_scan_task_get_instance_private(         \
+      GADATASET_SCAN_TASK(obj)))
 
 static void
-gad_scan_task_dispose(GObject *object)
+gadataset_scan_task_dispose(GObject *object)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
 
   if (priv->options) {
     g_object_unref(priv->options);
@@ -257,37 +258,38 @@ gad_scan_task_dispose(GObject *object)
     priv->fragment = NULL;
   }
 
-  G_OBJECT_CLASS(gad_scan_task_parent_class)->dispose(object);
+  G_OBJECT_CLASS(gadataset_scan_task_parent_class)->dispose(object);
 }
 
 static void
-gad_scan_task_finalize(GObject *object)
+gadataset_scan_task_finalize(GObject *object)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
 
   priv->scan_task.~shared_ptr();
 
-  G_OBJECT_CLASS(gad_scan_task_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_scan_task_parent_class)->finalize(object);
 }
 
 static void
-gad_scan_task_set_property(GObject *object,
+gadataset_scan_task_set_property(GObject *object,
                            guint prop_id,
                            const GValue *value,
                            GParamSpec *pspec)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_SCAN_TASK:
     priv->scan_task =
-      *static_cast<std::shared_ptr<arrow::dataset::ScanTask> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::dataset::ScanTask> *>(
+        g_value_get_pointer(value));
     break;
   case PROP_OPTIONS:
-    priv->options = GAD_SCAN_OPTIONS(g_value_dup_object(value));
+    priv->options = GADATASET_SCAN_OPTIONS(g_value_dup_object(value));
     break;
   case PROP_FRAGMENT:
-    priv->fragment = GAD_FRAGMENT(g_value_dup_object(value));
+    priv->fragment = GADATASET_FRAGMENT(g_value_dup_object(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -296,12 +298,12 @@ gad_scan_task_set_property(GObject *object,
 }
 
 static void
-gad_scan_task_get_property(GObject *object,
+gadataset_scan_task_get_property(GObject *object,
                            guint prop_id,
                            GValue *value,
                            GParamSpec *pspec)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_OPTIONS:
@@ -317,21 +319,21 @@ gad_scan_task_get_property(GObject *object,
 }
 
 static void
-gad_scan_task_init(GADScanTask *object)
+gadataset_scan_task_init(GADatasetScanTask *object)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
   new(&priv->scan_task) std::shared_ptr<arrow::dataset::ScanTask>;
 }
 
 static void
-gad_scan_task_class_init(GADScanTaskClass *klass)
+gadataset_scan_task_class_init(GADatasetScanTaskClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = gad_scan_task_dispose;
-  gobject_class->finalize     = gad_scan_task_finalize;
-  gobject_class->set_property = gad_scan_task_set_property;
-  gobject_class->get_property = gad_scan_task_get_property;
+  gobject_class->dispose      = gadataset_scan_task_dispose;
+  gobject_class->finalize     = gadataset_scan_task_finalize;
+  gobject_class->set_property = gadataset_scan_task_set_property;
+  gobject_class->get_property = gadataset_scan_task_get_property;
 
   GParamSpec *spec;
   spec = g_param_spec_pointer("scan-task",
@@ -342,7 +344,7 @@ gad_scan_task_class_init(GADScanTaskClass *klass)
   g_object_class_install_property(gobject_class, PROP_SCAN_TASK, spec);
 
   /**
-   * GADScanTask:options:
+   * GADatasetScanTask:options:
    *
    * The options of the scan task.
    *
@@ -351,13 +353,13 @@ gad_scan_task_class_init(GADScanTaskClass *klass)
   spec = g_param_spec_object("options",
                              "Options",
                              "The options of the scan task",
-                             GAD_TYPE_SCAN_OPTIONS,
+                             GADATASET_TYPE_SCAN_OPTIONS,
                              static_cast<GParamFlags>(G_PARAM_READWRITE |
                                                       G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OPTIONS, spec);
 
   /**
-   * GADScanTask:fragment:
+   * GADatasetScanTask:fragment:
    *
    * The fragment of the scan task.
    *
@@ -366,57 +368,57 @@ gad_scan_task_class_init(GADScanTaskClass *klass)
   spec = g_param_spec_object("fragment",
                              "Fragment",
                              "The fragment of the scan task",
-                             GAD_TYPE_FRAGMENT,
+                             GADATASET_TYPE_FRAGMENT,
                              static_cast<GParamFlags>(G_PARAM_READWRITE |
                                                       G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FRAGMENT, spec);
 }
 
 /**
- * gad_scan_task_get_options:
- * @scan_task: A #GADScanTask.
+ * gadataset_scan_task_get_options:
+ * @scan_task: A #GADatasetScanTask.
  *
- * Returns: (transfer full): A #GADScanOptions.
+ * Returns: (transfer full): A #GADatasetScanOptions.
  *
  * Since: 1.0.0
  */
-GADScanOptions *
-gad_scan_task_get_options(GADScanTask *scan_task)
+GADatasetScanOptions *
+gadataset_scan_task_get_options(GADatasetScanTask *scan_task)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(scan_task);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(scan_task);
   if (priv->options) {
     g_object_ref(priv->options);
     return priv->options;
   }
 
   auto arrow_options = priv->scan_task->options();
-  return gad_scan_options_new_raw(&arrow_options);
+  return gadataset_scan_options_new_raw(&arrow_options);
 }
 
 /**
- * gad_scan_task_get_fragment:
- * @scan_task: A #GADFragment.
+ * gadataset_scan_task_get_fragment:
+ * @scan_task: A #GADatasetFragment.
  *
- * Returns: (transfer full): A #GADFragment.
+ * Returns: (transfer full): A #GADatasetFragment.
  *
  * Since: 4.0.0
  */
-GADFragment *
-gad_scan_task_get_fragment(GADScanTask *scan_task)
+GADatasetFragment *
+gadataset_scan_task_get_fragment(GADatasetScanTask *scan_task)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(scan_task);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(scan_task);
   if (priv->fragment) {
     g_object_ref(priv->fragment);
     return priv->fragment;
   }
 
   auto arrow_fragment = priv->scan_task->fragment();
-  return gad_fragment_new_raw(&arrow_fragment);
+  return gadataset_fragment_new_raw(&arrow_fragment);
 }
 
 /**
- * gad_scan_task_execute:
- * @scan_task: A #GADScanTask.
+ * gadataset_scan_task_execute:
+ * @scan_task: A #GADatasetScanTask.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable) (transfer full): A newly created #GArrowRecordBatchIterator,
@@ -424,10 +426,11 @@ gad_scan_task_get_fragment(GADScanTask *scan_task)
  *
  * Since: 1.0.0
  */
-GArrowRecordBatchIterator *gad_scan_task_execute(GADScanTask *scan_task,
-                                                 GError **error)
+GArrowRecordBatchIterator *
+gadataset_scan_task_execute(GADatasetScanTask *scan_task,
+                            GError **error)
 {
-  auto priv = GAD_SCAN_TASK_GET_PRIVATE(scan_task);
+  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(scan_task);
   auto arrow_result = priv->scan_task->Execute();
   if (garrow::check(error, arrow_result, "[datasets][scan-task][execute]")) {
     auto arrow_record_batch_iteraor = std::move(*arrow_result);
@@ -439,37 +442,37 @@ GArrowRecordBatchIterator *gad_scan_task_execute(GADScanTask *scan_task,
 
 /* arrow::dataset::InMemoryScanTask */
 
-G_DEFINE_TYPE(GADInMemoryScanTask,
-              gad_in_memory_scan_task,
-              GAD_TYPE_SCAN_TASK)
+G_DEFINE_TYPE(GADatasetInMemoryScanTask,
+              gadataset_in_memory_scan_task,
+              GADATASET_TYPE_SCAN_TASK)
 
 static void
-gad_in_memory_scan_task_init(GADInMemoryScanTask *object)
+gadataset_in_memory_scan_task_init(GADatasetInMemoryScanTask *object)
 {
 }
 
 static void
-gad_in_memory_scan_task_class_init(GADInMemoryScanTaskClass *klass)
+gadataset_in_memory_scan_task_class_init(GADatasetInMemoryScanTaskClass *klass)
 {
 }
 
 /**
- * gad_in_memory_scan_task_new:
+ * gadataset_in_memory_scan_task_new:
  * @record_batches: (array length=n_record_batches):
  *   (element-type GArrowRecordBatch): The record batches of the table.
  * @n_record_batches: The number of record batches.
- * @options: A #GADScanOptions.
- * @fragment: A #GADInMemoryFragment.
+ * @options: A #GADatasetScanOptions.
+ * @fragment: A #GADatasetInMemoryFragment.
  *
- * Returns: A newly created #GADInMemoryScanTask.
+ * Returns: A newly created #GADatasetInMemoryScanTask.
  *
  * Since: 1.0.0
  */
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
-                            gsize n_record_batches,
-                            GADScanOptions *options,
-                            GADInMemoryFragment *fragment)
+GADatasetInMemoryScanTask *
+gadataset_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
+                                  gsize n_record_batches,
+                                  GADatasetScanOptions *options,
+                                  GADatasetInMemoryFragment *fragment)
 {
   std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
   arrow_record_batches.reserve(n_record_batches);
@@ -477,43 +480,45 @@ gad_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
     auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]);
     arrow_record_batches.push_back(arrow_record_batch);
   }
-  auto arrow_options = gad_scan_options_get_raw(options);
-  auto arrow_fragment = gad_fragment_get_raw(GAD_FRAGMENT(fragment));
+  auto arrow_options = gadataset_scan_options_get_raw(options);
+  auto arrow_fragment = gadataset_fragment_get_raw(GADATASET_FRAGMENT(fragment));
   auto arrow_in_memory_scan_task =
     std::make_shared<arrow::dataset::InMemoryScanTask>(arrow_record_batches,
                                                        arrow_options,
                                                        arrow_fragment);
-  return gad_in_memory_scan_task_new_raw(&arrow_in_memory_scan_task,
+  return gadataset_in_memory_scan_task_new_raw(&arrow_in_memory_scan_task,
                                          options,
                                          fragment);
 }
 
 G_END_DECLS
 
-GADScanOptions *
-gad_scan_options_new_raw(std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options)
+GADatasetScanOptions *
+gadataset_scan_options_new_raw(
+  std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options)
 {
   auto scan_options =
-    GAD_SCAN_OPTIONS(g_object_new(GAD_TYPE_SCAN_OPTIONS,
+    GADATASET_SCAN_OPTIONS(g_object_new(GADATASET_TYPE_SCAN_OPTIONS,
                                   "scan-options", arrow_scan_options,
                                   NULL));
   return scan_options;
 }
 
 std::shared_ptr<arrow::dataset::ScanOptions>
-gad_scan_options_get_raw(GADScanOptions *scan_options)
+gadataset_scan_options_get_raw(GADatasetScanOptions *scan_options)
 {
-  auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(scan_options);
+  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(scan_options);
   return priv->scan_options;
 }
 
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new_raw(std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
-                                GADScanOptions *options,
-                                GADInMemoryFragment *fragment)
+GADatasetInMemoryScanTask *
+gadataset_in_memory_scan_task_new_raw(
+  std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
+  GADatasetScanOptions *options,
+  GADatasetInMemoryFragment *fragment)
 {
   auto in_memory_scan_task =
-    GAD_IN_MEMORY_SCAN_TASK(g_object_new(GAD_TYPE_IN_MEMORY_SCAN_TASK,
+    GADATASET_IN_MEMORY_SCAN_TASK(g_object_new(GADATASET_TYPE_IN_MEMORY_SCAN_TASK,
                                          "scan-task", arrow_in_memory_scan_task,
                                          "options", options,
                                          "fragment", fragment,
diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h
index f387e8948f2..90a60363e82 100644
--- a/c_glib/arrow-dataset-glib/scanner.h
+++ b/c_glib/arrow-dataset-glib/scanner.h
@@ -27,62 +27,68 @@ G_BEGIN_DECLS
 
 /* arrow::dataset::ScanOptions */
 
-#define GAD_TYPE_SCAN_OPTIONS (gad_scan_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADScanOptions,
-                         gad_scan_options,
-                         GAD,
+#define GADATASET_TYPE_SCAN_OPTIONS (gadataset_scan_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScanOptions,
+                         gadataset_scan_options,
+                         GADATASET,
                          SCAN_OPTIONS,
                          GObject)
-struct _GADScanOptionsClass
+struct _GADatasetScanOptionsClass
 {
   GObjectClass parent_class;
 };
 
 
 GARROW_AVAILABLE_IN_1_0
-GADScanOptions *gad_scan_options_new(GArrowSchema *schema);
+GADatasetScanOptions *
+gadataset_scan_options_new(GArrowSchema *schema);
 GARROW_AVAILABLE_IN_1_0
-GArrowSchema *gad_scan_options_get_schema(GADScanOptions *scan_options);
+GArrowSchema *
+gadataset_scan_options_get_schema(GADatasetScanOptions *scan_options);
 
 /* arrow::dataset::ScanTask */
 
-#define GAD_TYPE_SCAN_TASK (gad_scan_task_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADScanTask,
-                         gad_scan_task,
-                         GAD,
+#define GADATASET_TYPE_SCAN_TASK (gadataset_scan_task_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScanTask,
+                         gadataset_scan_task,
+                         GADATASET,
                          SCAN_TASK,
                          GObject)
-struct _GADScanTaskClass
+struct _GADatasetScanTaskClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_1_0
-GADScanOptions *gad_scan_task_get_options(GADScanTask *scan_task);
+GADatasetScanOptions *
+gadataset_scan_task_get_options(GADatasetScanTask *scan_task);
 GARROW_AVAILABLE_IN_4_0
-GADFragment *gad_scan_task_get_fragment(GADScanTask *scan_task);
+GADatasetFragment *
+gadataset_scan_task_get_fragment(GADatasetScanTask *scan_task);
 GARROW_AVAILABLE_IN_1_0
-GArrowRecordBatchIterator *gad_scan_task_execute(GADScanTask *scan_task,
-                                                 GError **error);
+GArrowRecordBatchIterator *
+gadataset_scan_task_execute(GADatasetScanTask *scan_task,
+                            GError **error);
 
 /* arrow::dataset::InMemoryScanTask */
 
-#define GAD_TYPE_IN_MEMORY_SCAN_TASK (gad_in_memory_scan_task_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADInMemoryScanTask,
-                         gad_in_memory_scan_task,
-                         GAD,
+#define GADATASET_TYPE_IN_MEMORY_SCAN_TASK      \
+  (gadataset_in_memory_scan_task_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryScanTask,
+                         gadataset_in_memory_scan_task,
+                         GADATASET,
                          IN_MEMORY_SCAN_TASK,
-                         GADScanTask)
-struct _GADInMemoryScanTaskClass
+                         GADatasetScanTask)
+struct _GADatasetInMemoryScanTaskClass
 {
-  GADScanTaskClass parent_class;
+  GADatasetScanTaskClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_1_0
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
-                            gsize n_record_batches,
-                            GADScanOptions *options,
-                            GADInMemoryFragment *fragment);
+GADatasetInMemoryScanTask *
+gadataset_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
+                                  gsize n_record_batches,
+                                  GADatasetScanOptions *options,
+                                  GADatasetInMemoryFragment *fragment);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/scanner.hpp b/c_glib/arrow-dataset-glib/scanner.hpp
index f10351ee99b..ad3ac6a03cd 100644
--- a/c_glib/arrow-dataset-glib/scanner.hpp
+++ b/c_glib/arrow-dataset-glib/scanner.hpp
@@ -24,12 +24,14 @@
 #include <arrow-dataset-glib/fragment.h>
 #include <arrow-dataset-glib/scanner.h>
 
-GADScanOptions *
-gad_scan_options_new_raw(std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options);
+GADatasetScanOptions *
+gadataset_scan_options_new_raw(
+  std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options);
 std::shared_ptr<arrow::dataset::ScanOptions>
-gad_scan_options_get_raw(GADScanOptions *scan_options);
+gadataset_scan_options_get_raw(GADatasetScanOptions *scan_options);
 
-GADInMemoryScanTask *
-gad_in_memory_scan_task_new_raw(std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
-                                GADScanOptions *scan_options,
-                                GADInMemoryFragment *fragment);
+GADatasetInMemoryScanTask *
+gadataset_in_memory_scan_task_new_raw(
+  std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
+  GADatasetScanOptions *scan_options,
+  GADatasetInMemoryFragment *fragment);
diff --git a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
index f9667bc2d43..9a1ae059378 100644
--- a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
+++ b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
@@ -43,6 +43,8 @@
       <xi:include href="xml/scanner.xml"/>
       <title>Fragment</title>
       <xi:include href="xml/fragment.xml"/>
+      <title>File format</title>
+      <xi:include href="xml/file-format.xml"/>
     </chapter>
   </part>
 
@@ -62,6 +64,10 @@
     <title>Index of new symbols in 4.0.0</title>
     <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-3-0-0" role="3.0.0">
+    <title>Index of new symbols in 3.0.0</title>
+    <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-1-0-0" role="1.0.0">
     <title>Index of new symbols in 1.0.0</title>
     <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/doc/arrow-dataset-glib/meson.build b/c_glib/doc/arrow-dataset-glib/meson.build
index 1cb2f9e99c8..ca037b7e36a 100644
--- a/c_glib/doc/arrow-dataset-glib/meson.build
+++ b/c_glib/doc/arrow-dataset-glib/meson.build
@@ -70,7 +70,7 @@ gnome.gtkdoc(package_id,
              ],
              mkdb_args: [
                '--output-format=xml',
-               '--name-space=gad',
+               '--name-space=gadataset',
                '--source-suffixes=c,cpp,h',
              ],
              fixxref_args: [

From 5173af0d554e7f0a2136f66bf57c5c07f295b1fa Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 14 Jun 2021 16:09:01 +0000
Subject: [PATCH 407/719] ARROW-13026: [CI] Use LLVM 10 for s390x

[Recent TravisCI for s390x C](https://travis-ci.com/github/apache/arrow/jobs/512489573#L586) causes the following error. The file should be downloaded `...20210605...`. To fix this issue, this PR tries to download LLVM from the default apt repository instead of the LLVM apt repository.

```
Err:8 https://apt.llvm.org/focal llvm-toolchain-focal-12/main s390x libllvm12 s390x 1:12.0.1~++20210604112550+6279fd114acb-1~exp1~20210604213327.98

  404  Not Found [IP: 199.232.38.49 443]
```

Closes #10525 from kiszk/ARROW-13026

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index ced0405ec86..4cd546a29e7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -106,6 +106,9 @@ jobs:
           -e Protobuf_SOURCE=BUNDLED
           -e gRPC_SOURCE=BUNDLED
           "
+        # The LLVM's APT repository causes download error for s390x binary
+        # We should use the LLVM provided by the default APT repository
+        LLVM: "10"
         UBUNTU: "20.04"
 
     - name: "Go on s390x"

From a22fc67e6e8fd78e2d612b99733f47c48630e663 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 14 Jun 2021 20:18:08 +0200
Subject: [PATCH 408/719] ARROW-13075: [Python] Expose C data interface API for
 pyarrow.Field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10529 from kszucs/field-c-interface

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/includes/libarrow.pxd |  3 +++
 python/pyarrow/tests/test_cffi.py    | 24 ++++++++++++++++++++++++
 python/pyarrow/types.pxi             | 21 +++++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index d5ce98d9a88..35a2034eba4 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2334,6 +2334,9 @@ cdef extern from 'arrow/c/bridge.h' namespace 'arrow' nogil:
     CStatus ExportType(CDataType&, ArrowSchema* out)
     CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*)
 
+    CStatus ExportField(CField&, ArrowSchema* out)
+    CResult[shared_ptr[CField]] ImportField(ArrowSchema*)
+
     CStatus ExportSchema(CSchema&, ArrowSchema* out)
     CResult[shared_ptr[CSchema]] ImportSchema(ArrowSchema*)
 
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 5505e571645..db0da5652df 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -105,6 +105,30 @@ def test_export_import_type():
         pa.DataType._import_from_c(ptr_schema)
 
 
+@needs_cffi
+def test_export_import_field():
+    c_schema = ffi.new("struct ArrowSchema*")
+    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
+
+    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
+    old_allocated = pa.total_allocated_bytes()
+
+    field = pa.field("test", pa.list_(pa.int32()), nullable=True)
+    field._export_to_c(ptr_schema)
+    assert pa.total_allocated_bytes() > old_allocated
+    # Delete and recreate C++ object from exported pointer
+    del field
+    assert pa.total_allocated_bytes() > old_allocated
+
+    field_new = pa.Field._import_from_c(ptr_schema)
+    assert field_new == pa.field("test", pa.list_(pa.int32()), nullable=True)
+    assert pa.total_allocated_bytes() == old_allocated
+
+    # Now released
+    with assert_schema_released:
+        pa.Field._import_from_c(ptr_schema)
+
+
 @needs_cffi
 def test_export_import_array():
     c_schema = ffi.new("struct ArrowSchema*")
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 184e3dd8a7c..9cc49b3bfd5 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1201,6 +1201,27 @@ cdef class Field(_Weakrefable):
             flattened = self.field.Flatten()
         return [pyarrow_wrap_field(f) for f in flattened]
 
+    def _export_to_c(self, uintptr_t out_ptr):
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        check_status(ExportField(deref(self.field), <ArrowSchema*> out_ptr))
+
+    @staticmethod
+    def _import_from_c(uintptr_t in_ptr):
+        """
+        Import Field from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+        with nogil:
+            result = GetResultValue(ImportField(<ArrowSchema*> in_ptr))
+        return pyarrow_wrap_field(result)
+
 
 cdef class Schema(_Weakrefable):
 

From 68679b896d70a710f27bab8f23f85377408c0d78 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 14 Jun 2021 16:29:35 -0400
Subject: [PATCH 409/719] ARROW-13048: [C++] Fix copying objects with special
 characters on S3FS

Although the AWS SDK docs claim the caller must URL-encode the source path, the actual SDK source URL-encodes the path for you. This double-encoding was causing CopyFile to fail with a 404 not found as a result.

Closes #10526 from lidavidm/arrow-13048

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/filesystem/s3fs.cc      | 13 +++++++++++--
 cpp/src/arrow/filesystem/s3fs_test.cc |  8 +++++++-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index effafad8c25..5e242d4e807 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -418,6 +418,14 @@ struct S3Path {
     }
   }
 
+  Aws::String ToAwsString() const {
+    Aws::String res(bucket.begin(), bucket.end());
+    res.reserve(bucket.size() + key.size() + 1);
+    res += kSep;
+    res.append(key.begin(), key.end());
+    return res;
+  }
+
   Aws::String ToURLEncodedAwsString() const {
     // URL-encode individual parts, not the '/' separator
     Aws::String res;
@@ -1525,8 +1533,9 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     S3Model::CopyObjectRequest req;
     req.SetBucket(ToAwsString(dest_path.bucket));
     req.SetKey(ToAwsString(dest_path.key));
-    // Copy source "Must be URL-encoded" according to AWS SDK docs.
-    req.SetCopySource(src_path.ToURLEncodedAwsString());
+    // ARROW-13048: Copy source "Must be URL-encoded" according to AWS SDK docs.
+    // However at least in 1.8 and 1.9 the SDK URL-encodes the path for you
+    req.SetCopySource(src_path.ToAwsString());
     return OutcomeToStatus(
         std::forward_as_tuple("When copying key '", src_path.key, "' in bucket '",
                               src_path.bucket, "' to key '", dest_path.key,
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 966f12d855e..d73328554d0 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -781,7 +781,9 @@ TEST_F(TestS3FS, CopyFile) {
   ASSERT_OK(fs_->CopyFile("bucket/somedir/subdir/subfile", "bucket/newfile"));
   AssertFileInfo(fs_.get(), "bucket/newfile", FileType::File, 8);
   AssertObjectContents(client_.get(), "bucket", "newfile", "sub data");
-
+  // ARROW-13048: URL-encoded paths
+  ASSERT_OK(fs_->CopyFile("bucket/somefile", "bucket/a=2/newfile"));
+  ASSERT_OK(fs_->CopyFile("bucket/a=2/newfile", "bucket/a=3/newfile"));
   // Nonexistent
   ASSERT_RAISES(IOError, fs_->CopyFile("bucket/nonexistent", "bucket/newfile2"));
   ASSERT_RAISES(IOError, fs_->CopyFile("nonexistent-bucket/somefile", "bucket/newfile2"));
@@ -804,6 +806,10 @@ TEST_F(TestS3FS, Move) {
   // Source was deleted
   AssertFileInfo(fs_.get(), "bucket/somedir/subdir/subfile", FileType::NotFound);
 
+  // ARROW-13048: URL-encoded paths
+  ASSERT_OK(fs_->Move("bucket/newfile", "bucket/a=2/newfile"));
+  ASSERT_OK(fs_->Move("bucket/a=2/newfile", "bucket/a=3/newfile"));
+
   // Nonexistent
   ASSERT_RAISES(IOError, fs_->Move("bucket/non-existent", "bucket/newfile2"));
   ASSERT_RAISES(IOError, fs_->Move("nonexistent-bucket/somefile", "bucket/newfile2"));

From 43ed963f024f785605757022844a9f8f4bd2f9c0 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 14 Jun 2021 21:19:58 -0700
Subject: [PATCH 410/719] ARROW-13044: [Java] Change UnionVector and
 DenseUnionVector to extend AbstractContainerVector

Currently UnionVector and DenseUnionVector do not extend any base class, only implement the FieldVector interface. This change makes these vectors extend AbstractContainerVector, which is a subclass of BaseValueVector. This allows a the union vectors to be used in extension types as the underlying vector storage.

The current naming of child fields in the union vector ignores the original name and generates a new name based on type (and typeid). This has been changed to respect the original Field name if given, and only if the name is empty it will generate a new name as before. Some current tests add child Fields with empty names, so this preserves the original behavior.

Included tests to verify the added methods from AbstractContainerVector.

Closes #10513 from BryanCutler/union-extend-BaseValueVector-ARROW-13044

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 .../codegen/templates/DenseUnionVector.java   | 45 ++++++++++++++-----
 .../main/codegen/templates/UnionVector.java   | 45 ++++++++++++++-----
 .../arrow/vector/TestDenseUnionVector.java    | 18 +++++++-
 .../apache/arrow/vector/TestUnionVector.java  | 14 ++++++
 4 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java
index fff8b8114ac..c1991f65b92 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionVector.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -84,10 +84,7 @@
  * each time the vector is accessed.
  * Source code generated using FreeMarker template ${.template_name}
  */
-public class DenseUnionVector implements FieldVector {
-
-  private String name;
-  private BufferAllocator allocator;
+public class DenseUnionVector extends AbstractContainerVector implements FieldVector {
   int valueCount;
 
   NonNullableStructVector internalStruct;
@@ -109,13 +106,12 @@ public class DenseUnionVector implements FieldVector {
   private byte[] typeMapFields = new byte[Byte.MAX_VALUE + 1];
 
   /**
-   * The next typd id to allocate.
+   * The next type id to allocate.
    */
   private byte nextTypeId = 0;
 
   private FieldReader reader;
 
-  private final CallBack callBack;
   private long typeBufferAllocationSizeInBytes;
   private long offsetBufferAllocationSizeInBytes;
 
@@ -134,8 +130,7 @@ public static DenseUnionVector empty(String name, BufferAllocator allocator) {
   }
 
   public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
-    this.name = name;
-    this.allocator = allocator;
+    super(name, allocator, callBack);
     this.fieldType = fieldType;
     this.internalStruct = new NonNullableStructVector(
         "internal",
@@ -145,7 +140,6 @@ public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldT
         AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
         false);
     this.typeBuffer = allocator.getEmpty();
-    this.callBack = callBack;
     this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
     this.offsetBuffer = allocator.getEmpty();
     this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
@@ -575,7 +569,7 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
   }
 
   public FieldVector addVector(byte typeId, FieldVector v) {
-    String name = fieldName(typeId, v.getMinorType());
+    final String name = v.getName().isEmpty() ? fieldName(typeId, v.getMinorType()) : v.getName();
     Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
     final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
     v.makeTransferPair(newVector).transfer();
@@ -909,4 +903,35 @@ private void setNegative(long start, long end) {
       typeBuffer.setByte(i, -1);
     }
   }
+
+  @Override
+  public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+    return internalStruct.addOrGet(name, fieldType, clazz);
+  }
+
+  @Override
+  public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+    return internalStruct.getChild(name, clazz);
+  }
+
+  @Override
+  public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+    return internalStruct.getChildVectorWithOrdinal(name);
+  }
+
+  @Override
+  public int size() {
+    return internalStruct.size();
+  }
+
+  @Override
+  public void setInitialCapacity(int valueCount, double density) {
+    for (final ValueVector vector : internalStruct) {
+      if (vector instanceof DensityAwareVector) {
+        ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+      } else {
+        vector.setInitialCapacity(valueCount);
+      }
+    }
+  }
 }
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index 0d9130da0e0..bd5202977b8 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -84,10 +84,7 @@
  * each time the vector is accessed.
  * Source code generated using FreeMarker template ${.template_name}
  */
-public class UnionVector implements FieldVector {
-
-  private String name;
-  private BufferAllocator allocator;
+public class UnionVector extends AbstractContainerVector implements FieldVector {
   int valueCount;
 
   NonNullableStructVector internalStruct;
@@ -102,7 +99,6 @@ public class UnionVector implements FieldVector {
   private int singleType = 0;
   private ValueVector singleVector;
 
-  private final CallBack callBack;
   private int typeBufferAllocationSizeInBytes;
 
   private final FieldType fieldType;
@@ -124,8 +120,7 @@ public UnionVector(String name, BufferAllocator allocator, CallBack callBack) {
   }
 
   public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
-    this.name = name;
-    this.allocator = allocator;
+    super(name, allocator, callBack);
     this.fieldType = fieldType;
     this.internalStruct = new NonNullableStructVector(
         "internal",
@@ -135,7 +130,6 @@ public UnionVector(String name, BufferAllocator allocator, FieldType fieldType,
         AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
         false);
     this.typeBuffer = allocator.getEmpty();
-    this.callBack = callBack;
     this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
   }
 
@@ -500,7 +494,7 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
   }
 
   public FieldVector addVector(FieldVector v) {
-    String name = v.getMinorType().name().toLowerCase();
+    final String name = v.getName().isEmpty() ? fieldName(v.getMinorType()) : v.getName();
     Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
     final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
     v.makeTransferPair(newVector).transfer();
@@ -515,7 +509,7 @@ public FieldVector addVector(FieldVector v) {
    * Directly put a vector to internalStruct without creating a new one with same type.
    */
   public void directAddVector(FieldVector v) {
-    String name = v.getMinorType().name().toLowerCase();
+    String name = fieldName(v.getMinorType());
     Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
     internalStruct.putChild(name, v);
     if (callBack != null) {
@@ -825,4 +819,35 @@ public String getName() {
     public String toString() {
       return ValueVectorUtility.getToString(this, 0, getValueCount());
     }
+
+    @Override
+    public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+      return internalStruct.addOrGet(name, fieldType, clazz);
+    }
+
+    @Override
+    public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+      return internalStruct.getChild(name, clazz);
+    }
+
+    @Override
+    public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+      return internalStruct.getChildVectorWithOrdinal(name);
+    }
+
+    @Override
+    public int size() {
+      return internalStruct.size();
+    }
+
+    @Override
+    public void setInitialCapacity(int valueCount, double density) {
+      for (final ValueVector vector : internalStruct) {
+        if (vector instanceof DensityAwareVector) {
+          ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+        } else {
+          vector.setInitialCapacity(valueCount);
+        }
+      }
+    }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
index d7fccd1ed15..01becf00794 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -31,6 +31,7 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
 import org.apache.arrow.vector.holders.NullableBigIntHolder;
 import org.apache.arrow.vector.holders.NullableBitHolder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
@@ -346,6 +347,19 @@ public void testGetFieldTypeInfo() throws Exception {
     vector.initializeChildrenFromFields(children);
 
     assertEquals(vector.getField(), field);
+
+    // Union has 2 child vectors
+    assertEquals(vector.size(), 2);
+
+    // Check child field 0
+    VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+    assertEquals(intChild.ordinal, 0);
+    assertEquals(intChild.vector.getField(), children.get(0));
+
+    // Check child field 1
+    VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+    assertEquals(varcharChild.ordinal, 1);
+    assertEquals(varcharChild.vector.getField(), children.get(1));
   }
 
   @Test
@@ -406,8 +420,8 @@ public void testGetBufferAddress() throws Exception {
   @Test
   public void testMultipleStructs() {
     FieldType type = new FieldType(true, ArrowType.Struct.INSTANCE, null, null);
-    try (StructVector structVector1 = new StructVector("struct", allocator, type, null);
-         StructVector structVector2 = new StructVector("struct", allocator, type, null);
+    try (StructVector structVector1 = new StructVector("struct1", allocator, type, null);
+         StructVector structVector2 = new StructVector("struct2", allocator, type, null);
          DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) {
 
       // prepare sub vectors
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
index defa82f8e41..962c233889d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -31,6 +31,7 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
 import org.apache.arrow.vector.complex.impl.UnionWriter;
 import org.apache.arrow.vector.holders.NullableBitHolder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
@@ -392,6 +393,19 @@ public void testGetFieldTypeInfo() throws Exception {
     vector.initializeChildrenFromFields(children);
 
     assertTrue(vector.getField().equals(field));
+
+    // Union has 2 child vectors
+    assertEquals(vector.size(), 2);
+
+    // Check child field 0
+    VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+    assertEquals(intChild.ordinal, 0);
+    assertEquals(intChild.vector.getField(), children.get(0));
+
+    // Check child field 1
+    VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+    assertEquals(varcharChild.ordinal, 1);
+    assertEquals(varcharChild.vector.getField(), children.get(1));
   }
 
   @Test

From 655b281237b54f6197549dc0b070b934fb3c07a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 15 Jun 2021 18:36:56 +0900
Subject: [PATCH 411/719] ARROW-13080: [Release] Generate the API docs in
 ubuntu 20.10
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass ubuntu version as a docker build variable instead of a container runtime environment variable.

Closes #10532 from kszucs/post-docs-ubuntu-version

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-09-docs.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev/release/post-09-docs.sh b/dev/release/post-09-docs.sh
index c9f75b48b2c..8751b22887f 100755
--- a/dev/release/post-09-docs.sh
+++ b/dev/release/post-09-docs.sh
@@ -43,10 +43,9 @@ popd
 pushd "${ARROW_DIR}"
 git checkout "${release_tag}"
 
-archery docker run \
+UBUNTU=20.10 archery docker run \
   -v "${ARROW_SITE_DIR}/docs:/build/docs" \
   -e ARROW_DOCS_VERSION="${version}" \
-  -e UBUNTU=20.10 \
   ubuntu-docs
 
 : ${PUSH:=1}

From 4b3f6c39d5818873974d1d996d876ed3e09e7870 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 15 Jun 2021 11:56:52 +0200
Subject: [PATCH 412/719] ARROW-12431: [Python] Mask is inverted when creating
 FixedSizeBinaryArray

Closes #10199 from amol-/ARROW-12431

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/python/numpy_to_arrow.cc | 15 +++++++--
 python/pyarrow/tests/test_array.py     | 45 ++++++++++++++++++++++++++
 python/pyarrow/tests/test_pandas.py    |  2 +-
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index c17e70823d5..a382f766333 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -594,9 +594,20 @@ Status NumPyConverter::Visit(const FixedSizeBinaryType& type) {
 
   if (mask_ != nullptr) {
     Ndarray1DIndexer<uint8_t> mask_values(mask_);
-    RETURN_NOT_OK(builder.AppendValues(data, length_, mask_values.data()));
+    RETURN_NOT_OK(builder.Reserve(length_));
+    for (int64_t i = 0; i < length_; ++i) {
+      if (mask_values[i]) {
+        RETURN_NOT_OK(builder.AppendNull());
+      } else {
+        RETURN_NOT_OK(builder.Append(data));
+      }
+      data += stride_;
+    }
   } else {
-    RETURN_NOT_OK(builder.AppendValues(data, length_));
+    for (int64_t i = 0; i < length_; ++i) {
+      RETURN_NOT_OK(builder.Append(data));
+      data += stride_;
+    }
   }
 
   std::shared_ptr<Array> result;
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 086ed4cb160..30500bc3c5b 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2714,6 +2714,51 @@ def test_array_masked():
     assert arr.type == pa.int64()
 
 
+def test_binary_array_masked():
+    # ARROW-12431
+    masked_basic = pa.array([b'\x05'], type=pa.binary(1),
+                            mask=np.array([False]))
+    assert [b'\x05'] == masked_basic.to_pylist()
+
+    # Fixed Length Binary
+    masked = pa.array(np.array([b'\x05']), type=pa.binary(1),
+                      mask=np.array([False]))
+    assert [b'\x05'] == masked.to_pylist()
+
+    masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(1),
+                            mask=np.array([True]))
+    assert [None] == masked_nulls.to_pylist()
+
+    # Variable Length Binary
+    masked = pa.array(np.array([b'\x05']), type=pa.binary(),
+                      mask=np.array([False]))
+    assert [b'\x05'] == masked.to_pylist()
+
+    masked_nulls = pa.array(np.array([b'\x05']), type=pa.binary(),
+                            mask=np.array([True]))
+    assert [None] == masked_nulls.to_pylist()
+
+    # Fixed Length Binary, copy
+    npa = np.array([b'aaa', b'bbb', b'ccc']*10)
+    arrow_array = pa.array(npa, type=pa.binary(3),
+                           mask=np.array([False, False, False]*10))
+    npa[npa == b"bbb"] = b"XXX"
+    assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()
+
+
+def test_binary_array_strided():
+    # Masked
+    nparray = np.array([b"ab", b"cd", b"ef"])
+    arrow_array = pa.array(nparray[::2], pa.binary(2),
+                           mask=np.array([False, False]))
+    assert [b"ab", b"ef"] == arrow_array.to_pylist()
+
+    # Unmasked
+    nparray = np.array([b"ab", b"cd", b"ef"])
+    arrow_array = pa.array(nparray[::2], pa.binary(2))
+    assert [b"ab", b"ef"] == arrow_array.to_pylist()
+
+
 def test_array_invalid_mask_raises():
     # ARROW-10742
     cases = [
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 77c18b839c6..7f904433fa2 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1705,7 +1705,7 @@ def test_numpy_string_array_to_fixed_size_binary(self):
         expected = pa.array(list(arr), type=pa.binary(3))
         assert converted.equals(expected)
 
-        mask = np.array([True, False, True])
+        mask = np.array([False, True, False])
         converted = pa.array(arr, type=pa.binary(3), mask=mask)
         expected = pa.array([b'foo', None, b'baz'], type=pa.binary(3))
         assert converted.equals(expected)

From 889291bf73ef4ae69c76fd39844582d912f0603f Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Tue, 15 Jun 2021 16:44:37 +0200
Subject: [PATCH 413/719] ARROW-13003: [C++] Fix key map unaligned access

Closes #10489 from cyb70289/13003-unaligned-access

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/exec/key_compare.cc | 21 +++----
 cpp/src/arrow/compute/exec/key_map.cc     | 71 +++++++++++++----------
 cpp/src/arrow/compute/exec/util.cc        | 16 ++---
 3 files changed, 59 insertions(+), 49 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/key_compare.cc b/cpp/src/arrow/compute/exec/key_compare.cc
index f8d74859b01..7a5b0be9990 100644
--- a/cpp/src/arrow/compute/exec/key_compare.cc
+++ b/cpp/src/arrow/compute/exec/key_compare.cc
@@ -21,6 +21,7 @@
 #include <cstdint>
 
 #include "arrow/compute/exec/util.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace compute {
@@ -170,19 +171,19 @@ void KeyCompare::CompareFixedLengthImp(uint32_t num_rows_already_processed,
     //
     if (num_64bit_words == 0) {
       for (; istripe < num_loops_less_one; ++istripe) {
-        uint64_t key_left = key_left_ptr[istripe];
-        uint64_t key_right = key_right_ptr[istripe];
+        uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+        uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
         result_or |= (key_left ^ key_right);
       }
     } else if (num_64bit_words == 2) {
-      uint64_t key_left = key_left_ptr[istripe];
-      uint64_t key_right = key_right_ptr[istripe];
+      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
       result_or |= (key_left ^ key_right);
       ++istripe;
     }
 
-    uint64_t key_left = key_left_ptr[istripe];
-    uint64_t key_right = key_right_ptr[istripe];
+    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
     result_or |= (tail_mask & (key_left ^ key_right));
 
     int result = (result_or == 0 ? 0xff : 0);
@@ -246,16 +247,16 @@ void KeyCompare::CompareVaryingLengthImp(
     int32_t istripe;
     // length can be zero
     for (istripe = 0; istripe < (static_cast<int32_t>(length) + 7) / 8 - 1; ++istripe) {
-      uint64_t key_left = key_left_ptr[istripe];
-      uint64_t key_right = key_right_ptr[istripe];
+      uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+      uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
       result_or |= (key_left ^ key_right);
     }
 
     uint32_t length_remaining = length - static_cast<uint32_t>(istripe) * 8;
     uint64_t tail_mask = tail_masks[length_remaining];
 
-    uint64_t key_left = key_left_ptr[istripe];
-    uint64_t key_right = key_right_ptr[istripe];
+    uint64_t key_left = util::SafeLoad(&key_left_ptr[istripe]);
+    uint64_t key_right = util::SafeLoad(&key_right_ptr[istripe]);
     result_or |= (tail_mask & (key_left ^ key_right));
 
     int result = (result_or == 0 ? 0xff : 0);
diff --git a/cpp/src/arrow/compute/exec/key_map.cc b/cpp/src/arrow/compute/exec/key_map.cc
index c48487793e0..ac47c04403c 100644
--- a/cpp/src/arrow/compute/exec/key_map.cc
+++ b/cpp/src/arrow/compute/exec/key_map.cc
@@ -24,6 +24,7 @@
 
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 
@@ -153,7 +154,7 @@ void SwissTable::lookup_1(const uint16_t* selection, const int num_keys,
   for (int i = 0; i < num_keys; ++i) {
     int id;
     if (use_selection) {
-      id = selection[i];
+      id = util::SafeLoad(&selection[i]);
     } else {
       id = i;
     }
@@ -168,7 +169,7 @@ void SwissTable::lookup_1(const uint16_t* selection, const int num_keys,
     uint32_t num_block_bytes = num_groupid_bits + 8;
     const uint8_t* blockbase = reinterpret_cast<const uint8_t*>(blocks_) +
                                static_cast<uint64_t>(iblock) * num_block_bytes;
-    uint64_t block = *reinterpret_cast<const uint64_t*>(blockbase);
+    uint64_t block = util::SafeLoadAs<uint64_t>(blockbase);
 
     // Call helper functions to obtain the output triplet:
     // - match (of a stamp) found flag
@@ -182,8 +183,8 @@ void SwissTable::lookup_1(const uint16_t* selection, const int num_keys,
     uint64_t islot = next_slot_to_visit(iblock, islot_in_block, match_found);
 
     out_match_bitvector[id / 8] |= match_found << (id & 7);
-    out_groupids[id] = static_cast<uint32_t>(groupid);
-    out_slot_ids[id] = static_cast<uint32_t>(islot);
+    util::SafeStore(&out_groupids[id], static_cast<uint32_t>(groupid));
+    util::SafeStore(&out_slot_ids[id], static_cast<uint32_t>(islot));
   }
 }
 
@@ -239,7 +240,7 @@ Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected
   uint16_t* ids[3]{inout_selection, ids_for_comparison_buf.mutable_data(),
                    ids_inserted_buf.mutable_data()};
   auto push_id = [&num_ids, &ids](int category, int id) {
-    ids[category][num_ids[category]++] = static_cast<uint16_t>(id);
+    util::SafeStore(&ids[category][num_ids[category]++], static_cast<uint16_t>(id));
   };
 
   uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
@@ -256,9 +257,9 @@ Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected
        num_inserted_ + num_ids[category_inserted] < num_groups_limit;
        ++num_processed) {
     // row id in original batch
-    int id = inout_selection[num_processed];
+    int id = util::SafeLoad(&inout_selection[num_processed]);
 
-    uint64_t slot_id = wrap_global_slot_id(inout_next_slot_ids[id]);
+    uint64_t slot_id = wrap_global_slot_id(util::SafeLoad(&inout_next_slot_ids[id]));
     uint64_t block_id = slot_id >> 3;
     uint32_t hash = hashes[id];
     uint8_t* blockbase = blocks_ + num_block_bytes * block_id;
@@ -278,11 +279,13 @@ Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected
       // In that case we can insert group id value using aligned 64-bit word access.
       ARROW_DCHECK(num_groupid_bits == 8 || num_groupid_bits == 16 ||
                    num_groupid_bits == 32 || num_groupid_bits == 64);
-      reinterpret_cast<uint64_t*>(blockbase + 8)[groupid_bit_offset >> 6] |=
-          (static_cast<uint64_t>(group_id) << (groupid_bit_offset & 63));
+      uint64_t* ptr =
+          &reinterpret_cast<uint64_t*>(blockbase + 8)[groupid_bit_offset >> 6];
+      util::SafeStore(ptr, util::SafeLoad(ptr) | (static_cast<uint64_t>(group_id)
+                                                  << (groupid_bit_offset & 63)));
 
       hashes_[slot_id] = hash;
-      out_group_ids[id] = group_id;
+      util::SafeStore(&out_group_ids[id], group_id);
       push_id(category_inserted, id);
     } else {
       // We search for a slot with a matching stamp within a single block.
@@ -298,8 +301,8 @@ Status SwissTable::lookup_2(const uint32_t* hashes, uint32_t* inout_num_selected
       ARROW_DCHECK(new_groupid < num_inserted_ + num_ids[category_inserted]);
       new_slot =
           static_cast<int>(next_slot_to_visit(block_id, new_slot, new_match_found));
-      inout_next_slot_ids[id] = new_slot;
-      out_group_ids[id] = new_groupid;
+      util::SafeStore(&inout_next_slot_ids[id], new_slot);
+      util::SafeStore(&out_group_ids[id], new_groupid);
       push_id(new_match_found, id);
     }
   }
@@ -410,7 +413,8 @@ Status SwissTable::map(const int num_keys, const uint32_t* hashes,
       //
       for (uint32_t i = 0; i < num_ids; ++i) {
         // First slot in the new starting block
-        slot_ids[ids[i]] = (hashes[ids[i]] >> (bits_hash_ - log_blocks_)) * 8;
+        const int16_t id = util::SafeLoad(&ids[i]);
+        util::SafeStore(&slot_ids[id], (hashes[id] >> (bits_hash_ - log_blocks_)) * 8);
       }
     }
   } while (num_ids > 0);
@@ -457,9 +461,8 @@ Status SwissTable::grow_double() {
         static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
     int full_slots_new[2];
     full_slots_new[0] = full_slots_new[1] = 0;
-    *reinterpret_cast<uint64_t*>(double_block_base_new) = kHighBitOfEachByte;
-    *reinterpret_cast<uint64_t*>(double_block_base_new + block_size_after) =
-        kHighBitOfEachByte;
+    util::SafeStore(double_block_base_new, kHighBitOfEachByte);
+    util::SafeStore(double_block_base_new + block_size_after, kHighBitOfEachByte);
 
     for (int j = 0; j < full_slots; ++j) {
       uint64_t slot_id = i * 8 + j;
@@ -474,18 +477,20 @@ Status SwissTable::grow_double() {
       uint8_t stamp_new =
           hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
       uint64_t group_id_bit_offs = j * num_group_id_bits_before;
-      uint64_t group_id = (*reinterpret_cast<const uint64_t*>(block_base + 8 +
-                                                              (group_id_bit_offs >> 3)) >>
-                           (group_id_bit_offs & 7)) &
-                          group_id_mask_before;
+      uint64_t group_id =
+          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
+           (group_id_bit_offs & 7)) &
+          group_id_mask_before;
 
       uint64_t slot_id_new = i * 16 + ihalf * 8 + full_slots_new[ihalf];
       hashes_new[slot_id_new] = hash;
       uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after;
       block_base_new[7 - full_slots_new[ihalf]] = stamp_new;
       int group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after;
-      *reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3)) |=
-          (group_id << (group_id_bit_offs_new & 7));
+      uint64_t* ptr =
+          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr,
+                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
       full_slots_new[ihalf]++;
     }
   }
@@ -495,7 +500,7 @@ Status SwissTable::grow_double() {
   for (int i = 0; i < (1 << log_blocks_); ++i) {
     // How many full slots in this block
     uint8_t* block_base = blocks_ + i * block_size_before;
-    uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
+    uint64_t block = util::SafeLoadAs<uint64_t>(block_base);
     int full_slots = static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
 
     for (int j = 0; j < full_slots; ++j) {
@@ -508,21 +513,21 @@ Status SwissTable::grow_double() {
       }
 
       uint64_t group_id_bit_offs = j * num_group_id_bits_before;
-      uint64_t group_id = (*reinterpret_cast<const uint64_t*>(block_base + 8 +
-                                                              (group_id_bit_offs >> 3)) >>
-                           (group_id_bit_offs & 7)) &
-                          group_id_mask_before;
+      uint64_t group_id =
+          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
+           (group_id_bit_offs & 7)) &
+          group_id_mask_before;
       uint8_t stamp_new =
           hash >> ((bits_hash_ - log_blocks_after - bits_stamp_)) & stamp_mask;
 
       uint8_t* block_base_new = blocks_new + block_id_new * block_size_after;
-      uint64_t block_new = *reinterpret_cast<const uint64_t*>(block_base_new);
+      uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new);
       int full_slots_new =
           static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
       while (full_slots_new == 8) {
         block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
         block_base_new = blocks_new + block_id_new * block_size_after;
-        block_new = *reinterpret_cast<const uint64_t*>(block_base_new);
+        block_new = util::SafeLoadAs<uint64_t>(block_base_new);
         full_slots_new =
             static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
       }
@@ -530,8 +535,10 @@ Status SwissTable::grow_double() {
       hashes_new[block_id_new * 8 + full_slots_new] = hash;
       block_base_new[7 - full_slots_new] = stamp_new;
       int group_id_bit_offs_new = full_slots_new * num_group_id_bits_after;
-      *reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3)) |=
-          (group_id << (group_id_bit_offs_new & 7));
+      uint64_t* ptr =
+          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr,
+                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
     }
   }
 
@@ -567,7 +574,7 @@ Status SwissTable::init(int64_t hardware_flags, MemoryPool* pool,
 
   // Initialize all status bytes to represent an empty slot.
   for (uint64_t i = 0; i < (static_cast<uint64_t>(1) << log_blocks_); ++i) {
-    *reinterpret_cast<uint64_t*>(blocks_ + i * block_bytes) = kHighBitOfEachByte;
+    util::SafeStore(blocks_ + i * block_bytes, kHighBitOfEachByte);
   }
 
   uint64_t num_slots = 1ULL << (log_blocks_ + 3);
diff --git a/cpp/src/arrow/compute/exec/util.cc b/cpp/src/arrow/compute/exec/util.cc
index 5f1c0776c56..88303348645 100644
--- a/cpp/src/arrow/compute/exec/util.cc
+++ b/cpp/src/arrow/compute/exec/util.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 
@@ -66,7 +67,7 @@ void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bit
 #endif
     *num_indexes = 0;
     for (int i = 0; i < num_bits / unroll; ++i) {
-      uint64_t word = reinterpret_cast<const uint64_t*>(bits)[i];
+      uint64_t word = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[i]);
       if (bit_to_search == 0) {
         word = ~word;
       }
@@ -81,7 +82,8 @@ void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bit
 #endif
   // Optionally process the last partial word with masking out bits outside range
   if (tail) {
-    uint64_t word = reinterpret_cast<const uint64_t*>(bits)[num_bits / unroll];
+    uint64_t word =
+        util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[num_bits / unroll]);
     if (bit_to_search == 0) {
       word = ~word;
     }
@@ -144,7 +146,7 @@ void BitUtil::bits_to_bytes_internal(const int num_bits, const uint8_t* bits,
     unpacked |= (bits_next & 1);
     unpacked &= 0x0101010101010101ULL;
     unpacked *= 255;
-    reinterpret_cast<uint64_t*>(bytes)[i] = unpacked;
+    util::SafeStore(&reinterpret_cast<uint64_t*>(bytes)[i], unpacked);
   }
 }
 
@@ -153,7 +155,7 @@ void BitUtil::bytes_to_bits_internal(const int num_bits, const uint8_t* bytes,
   constexpr int unroll = 8;
   // Process 8 bits at a time
   for (int i = 0; i < (num_bits + unroll - 1) / unroll; ++i) {
-    uint64_t bytes_next = reinterpret_cast<const uint64_t*>(bytes)[i];
+    uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
     bytes_next &= 0x0101010101010101ULL;
     bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
     bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
@@ -184,7 +186,7 @@ void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
     unpacked |= (bits_next & 1);
     unpacked &= 0x0101010101010101ULL;
     unpacked *= 255;
-    reinterpret_cast<uint64_t*>(bytes)[i] = unpacked;
+    util::SafeStore(&reinterpret_cast<uint64_t*>(bytes)[i], unpacked);
   }
 }
 
@@ -201,7 +203,7 @@ void BitUtil::bytes_to_bits(int64_t hardware_flags, const int num_bits,
   // Process 8 bits at a time
   constexpr int unroll = 8;
   for (int i = num_processed / unroll; i < (num_bits + unroll - 1) / unroll; ++i) {
-    uint64_t bytes_next = reinterpret_cast<const uint64_t*>(bytes)[i];
+    uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
     bytes_next &= 0x0101010101010101ULL;
     bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
     bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
@@ -220,7 +222,7 @@ bool BitUtil::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
   uint64_t result_or = 0;
   uint32_t i;
   for (i = 0; i < num_bytes / 8; ++i) {
-    uint64_t x = reinterpret_cast<const uint64_t*>(bytes)[i];
+    uint64_t x = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
     result_or |= x;
   }
   if (num_bytes % 8 > 0) {

From b73bcf0a0ddacd5adc80389b10b8c1b3820ee97a Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 15 Jun 2021 11:22:25 -0400
Subject: [PATCH 414/719] ARROW-12597: [C++] Enable per-row-group parallelism
 in async Parquet reader

This adds an OptionalParallelForAsync which lets us have per-row-group parallelism without nested parallelism in the async Parquet reader. This also uses TransferAlways, taking care of ARROW-12916. `enable_parallel_column_conversion` is kept as it still affects the threaded scanner.

Closes #10482 from lidavidm/arrow-12597

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/file_parquet.cc |   6 +-
 cpp/src/arrow/dataset/file_parquet.h  |   3 +-
 cpp/src/arrow/dataset/test_util.h     |  14 +++-
 cpp/src/arrow/ipc/reader.cc           |   9 +--
 cpp/src/arrow/util/parallel.h         |  37 +++++++++
 cpp/src/arrow/util/vector.h           |  13 +++
 cpp/src/parquet/arrow/reader.cc       | 110 ++++++++++++++------------
 7 files changed, 128 insertions(+), 64 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 8c325d21da1..0ebbd0a5333 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -362,11 +362,7 @@ Future<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
             parquet_scan_options->arrow_reader_properties->cache_options());
         arrow_properties.set_io_context(
             parquet_scan_options->arrow_reader_properties->io_context());
-        // TODO: ARROW-12597 will let us enable parallel conversion
-        if (!options->use_threads) {
-          arrow_properties.set_use_threads(
-              parquet_scan_options->enable_parallel_column_conversion);
-        }
+        arrow_properties.set_use_threads(options->use_threads);
         std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
         RETURN_NOT_OK(parquet::arrow::FileReader::Make(options->pool, std::move(reader),
                                                        std::move(arrow_properties),
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 8286e2776cb..347f4032046 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -222,7 +222,8 @@ class ARROW_DS_EXPORT ParquetFragmentScanOptions : public FragmentScanOptions {
   /// EXPERIMENTAL: Parallelize conversion across columns. This option is ignored if a
   /// scan is already parallelized across input files to avoid thread contention. This
   /// option will be removed after support is added for simultaneous parallelization
-  /// across files and columns.
+  /// across files and columns. Only affects the threaded reader; the async reader
+  /// will parallelize across columns if use_threads is enabled.
   bool enable_parallel_column_conversion = false;
 };
 
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 1e4222eec8c..39223eba35b 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -310,6 +310,7 @@ class DatasetFixtureMixinWithParam : public DatasetFixtureMixin,
 
 struct TestFormatParams {
   bool use_async;
+  bool use_threads;
   int num_batches;
   int items_per_batch;
 
@@ -318,7 +319,8 @@ struct TestFormatParams {
   std::string ToString() const {
     // GTest requires this to be alphanumeric
     std::stringstream ss;
-    ss << (use_async ? "Async" : "Sync") << num_batches << "b" << items_per_batch << "r";
+    ss << (use_async ? "Async" : "Sync") << (use_threads ? "Threaded" : "Serial")
+       << num_batches << "b" << items_per_batch << "r";
     return ss.str();
   }
 
@@ -328,8 +330,12 @@ struct TestFormatParams {
   }
 
   static std::vector<TestFormatParams> Values() {
-    std::vector<TestFormatParams> values{{/*async=*/false, 16, 1024},
-                                         {/*async=*/true, 16, 1024}};
+    std::vector<TestFormatParams> values;
+    for (const bool async : std::vector<bool>{true, false}) {
+      for (const bool use_threads : std::vector<bool>{true, false}) {
+        values.push_back(TestFormatParams{async, use_threads, 16, 1024});
+      }
+    }
     return values;
   }
 };
@@ -511,6 +517,7 @@ class FileFormatScanMixin : public FileFormatFixtureMixin<FormatHelper>,
     auto dataset = std::make_shared<FragmentDataset>(schema, FragmentVector{fragment});
     ScannerBuilder builder(dataset, opts_);
     ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
+    ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
     EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
     EXPECT_OK_AND_ASSIGN(auto batch_it, scanner->ScanBatches());
     return MakeMapIterator([](TaggedRecordBatch tagged) { return tagged.record_batch; },
@@ -519,6 +526,7 @@ class FileFormatScanMixin : public FileFormatFixtureMixin<FormatHelper>,
 
   // Scan the fragment directly, without using the scanner.
   RecordBatchIterator PhysicalBatches(std::shared_ptr<Fragment> fragment) {
+    opts_->use_threads = GetParam().use_threads;
     if (GetParam().use_async) {
       EXPECT_OK_AND_ASSIGN(auto batch_gen, fragment->ScanBatchesAsync(opts_));
       EXPECT_OK_AND_ASSIGN(auto batch_it, MakeGeneratorIterator(std::move(batch_gen)));
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 7c26bce913d..a3c345cc440 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -54,6 +54,7 @@
 #include "arrow/util/string.h"
 #include "arrow/util/thread_pool.h"
 #include "arrow/util/ubsan.h"
+#include "arrow/util/vector.h"
 #include "arrow/visitor_inline.h"
 
 #include "generated/File_generated.h"  // IWYU pragma: export
@@ -1368,12 +1369,10 @@ Future<IpcFileRecordBatchGenerator::Item> IpcFileRecordBatchGenerator::operator(
     auto read_messages = All(std::move(messages));
     if (executor_) read_messages = executor_->Transfer(read_messages);
     read_dictionaries_ = read_messages.Then(
-        [=](const std::vector<Result<std::shared_ptr<Message>>> maybe_messages)
+        [=](const std::vector<Result<std::shared_ptr<Message>>>& maybe_messages)
             -> Status {
-          std::vector<std::shared_ptr<Message>> messages(state->num_dictionaries());
-          for (size_t i = 0; i < messages.size(); i++) {
-            ARROW_ASSIGN_OR_RAISE(messages[i], maybe_messages[i]);
-          }
+          ARROW_ASSIGN_OR_RAISE(auto messages,
+                                arrow::internal::UnwrapOrRaise(maybe_messages));
           return ReadDictionaries(state.get(), std::move(messages));
         });
   }
diff --git a/cpp/src/arrow/util/parallel.h b/cpp/src/arrow/util/parallel.h
index e56a71b91af..80f60fbdb36 100644
--- a/cpp/src/arrow/util/parallel.h
+++ b/cpp/src/arrow/util/parallel.h
@@ -21,7 +21,9 @@
 #include <vector>
 
 #include "arrow/status.h"
+#include "arrow/util/functional.h"
 #include "arrow/util/thread_pool.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 namespace internal {
@@ -44,6 +46,21 @@ Status ParallelFor(int num_tasks, FUNCTION&& func,
   return st;
 }
 
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> ParallelForAsync(
+    std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  std::vector<Future<R>> futures(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i])));
+  }
+  return All(std::move(futures))
+      .Then([](const std::vector<Result<R>>& results) -> Result<std::vector<R>> {
+        return UnwrapOrRaise(results);
+      });
+}
+
 // A parallelizer that takes a `Status(int)` function and calls it with
 // arguments between 0 and `num_tasks - 1`, in sequence or in parallel,
 // depending on the input boolean.
@@ -61,5 +78,25 @@ Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func,
   }
 }
 
+// A parallelizer that takes a `Result<R>(int index, T item)` function and
+// calls it with each item from the input array, in sequence or in parallel,
+// depending on the input boolean.
+
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> OptionalParallelForAsync(
+    bool use_threads, std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  if (use_threads) {
+    return ParallelForAsync(std::move(inputs), std::forward<FUNCTION>(func), executor);
+  } else {
+    std::vector<R> result(inputs.size());
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(result[i], func(i, inputs[i]));
+    }
+    return result;
+  }
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index b9f2e2a45aa..3ef0074aa9d 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -133,5 +133,18 @@ Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) {
   return std::move(out);
 }
 
+template <typename T>
+Result<std::vector<T>> UnwrapOrRaise(const std::vector<Result<T>>& results) {
+  std::vector<T> out;
+  out.reserve(results.size());
+  for (const auto& result : results) {
+    if (!result.ok()) {
+      return result.status();
+    }
+    out.push_back(result.ValueUnsafe());
+  }
+  return std::move(out);
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 14eb7495805..4f5f79c964a 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -293,10 +293,12 @@ class FileReaderImpl : public FileReader {
                        const std::vector<int>& indices,
                        std::shared_ptr<Table>* table) override;
 
-  // Helper method used by ReadRowGroups/Generator - read the given row groups/columns,
-  // skipping bounds checks and pre-buffering.
-  Status DecodeRowGroups(const std::vector<int>& row_groups,
-                         const std::vector<int>& indices, std::shared_ptr<Table>* table);
+  // Helper method used by ReadRowGroups - read the given row groups/columns, skipping
+  // bounds checks and pre-buffering. Takes a shared_ptr to self to keep the reader
+  // alive in async contexts.
+  Future<std::shared_ptr<Table>> DecodeRowGroups(
+      std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups,
+      const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor);
 
   Status ReadRowGroups(const std::vector<int>& row_groups,
                        std::shared_ptr<Table>* table) override {
@@ -1007,10 +1009,9 @@ class RowGroupGenerator {
       return SubmitRead(cpu_executor_, reader, row_group, column_indices);
     }
     auto ready = reader->parquet_reader()->WhenBuffered({row_group}, column_indices);
-    // TODO(ARROW-12916): always transfer here
-    if (cpu_executor_) ready = cpu_executor_->Transfer(ready);
-    return ready.Then([=]() -> ::arrow::Result<RecordBatchGenerator> {
-      return ReadOneRowGroup(reader, row_group, column_indices);
+    if (cpu_executor_) ready = cpu_executor_->TransferAlways(ready);
+    return ready.Then([=]() -> ::arrow::Future<RecordBatchGenerator> {
+      return ReadOneRowGroup(cpu_executor_, reader, row_group, column_indices);
     });
   }
 
@@ -1024,31 +1025,25 @@ class RowGroupGenerator {
       ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
       const int row_group, const std::vector<int>& column_indices) {
     if (!cpu_executor) {
-      return Future<RecordBatchGenerator>::MakeFinished(
-          ReadOneRowGroup(self, row_group, column_indices));
+      return ReadOneRowGroup(cpu_executor, self, row_group, column_indices);
     }
     // If we have an executor, then force transfer (even if I/O was complete)
-    return ::arrow::DeferNotOk(
-        cpu_executor->Submit(ReadOneRowGroup, self, row_group, column_indices));
+    return ::arrow::DeferNotOk(cpu_executor->Submit(ReadOneRowGroup, cpu_executor, self,
+                                                    row_group, column_indices));
   }
 
-  static ::arrow::Result<RecordBatchGenerator> ReadOneRowGroup(
-      std::shared_ptr<FileReaderImpl> self, const int row_group,
-      const std::vector<int>& column_indices) {
-    std::shared_ptr<::arrow::Table> table;
+  static ::arrow::Future<RecordBatchGenerator> ReadOneRowGroup(
+      ::arrow::internal::Executor* cpu_executor, std::shared_ptr<FileReaderImpl> self,
+      const int row_group, const std::vector<int>& column_indices) {
     // Skips bound checks/pre-buffering, since we've done that already
-    RETURN_NOT_OK(self->DecodeRowGroups({row_group}, column_indices, &table));
-    auto table_reader = std::make_shared<::arrow::TableBatchReader>(*table);
-    ::arrow::RecordBatchVector batches;
-    while (true) {
-      std::shared_ptr<::arrow::RecordBatch> batch;
-      RETURN_NOT_OK(table_reader->ReadNext(&batch));
-      if (!batch) {
-        break;
-      }
-      batches.push_back(batch);
-    }
-    return ::arrow::MakeVectorGenerator(std::move(batches));
+    return self->DecodeRowGroups(self, {row_group}, column_indices, cpu_executor)
+        .Then([](const std::shared_ptr<Table>& table)
+                  -> ::arrow::Result<RecordBatchGenerator> {
+          ::arrow::TableBatchReader table_reader(*table);
+          ::arrow::RecordBatchVector batches;
+          RETURN_NOT_OK(table_reader.ReadAll(&batches));
+          return ::arrow::MakeVectorGenerator(std::move(batches));
+        });
   }
 
   std::shared_ptr<FileReaderImpl> arrow_reader_;
@@ -1104,34 +1099,49 @@ Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
     END_PARQUET_CATCH_EXCEPTIONS
   }
 
-  return DecodeRowGroups(row_groups, column_indices, out);
+  auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices,
+                             /*cpu_executor=*/nullptr);
+  ARROW_ASSIGN_OR_RAISE(*out, fut.MoveResult());
+  return Status::OK();
 }
 
-// Also used by RowGroupGenerator - skip bounds check/pre-buffer to avoid doing that twice
-Status FileReaderImpl::DecodeRowGroups(const std::vector<int>& row_groups,
-                                       const std::vector<int>& column_indices,
-                                       std::shared_ptr<Table>* out) {
+Future<std::shared_ptr<Table>> FileReaderImpl::DecodeRowGroups(
+    std::shared_ptr<FileReaderImpl> self, const std::vector<int>& row_groups,
+    const std::vector<int>& column_indices, ::arrow::internal::Executor* cpu_executor) {
+  // `self` is used solely to keep `this` alive in an async context - but we use this
+  // in a sync context too so use `this` over `self`
   std::vector<std::shared_ptr<ColumnReaderImpl>> readers;
   std::shared_ptr<::arrow::Schema> result_schema;
   RETURN_NOT_OK(GetFieldReaders(column_indices, row_groups, &readers, &result_schema));
-
-  ::arrow::ChunkedArrayVector columns(readers.size());
-  RETURN_NOT_OK(::arrow::internal::OptionalParallelFor(
-      reader_properties_.use_threads(), static_cast<int>(readers.size()), [&](int i) {
-        return ReadColumn(static_cast<int>(i), row_groups, readers[i].get(), &columns[i]);
-      }));
-
-  int64_t num_rows = 0;
-  if (!columns.empty()) {
-    num_rows = columns[0]->length();
-  } else {
-    for (int i : row_groups) {
-      num_rows += parquet_reader()->metadata()->RowGroup(i)->num_rows();
+  // OptionalParallelForAsync requires an executor
+  if (!cpu_executor) cpu_executor = ::arrow::internal::GetCpuThreadPool();
+
+  auto read_column = [row_groups, self, this](size_t i,
+                                              std::shared_ptr<ColumnReaderImpl> reader)
+      -> ::arrow::Result<std::shared_ptr<::arrow::ChunkedArray>> {
+    std::shared_ptr<::arrow::ChunkedArray> column;
+    RETURN_NOT_OK(ReadColumn(static_cast<int>(i), row_groups, reader.get(), &column));
+    return column;
+  };
+  auto make_table = [result_schema, row_groups, self,
+                     this](const ::arrow::ChunkedArrayVector& columns)
+      -> ::arrow::Result<std::shared_ptr<Table>> {
+    int64_t num_rows = 0;
+    if (!columns.empty()) {
+      num_rows = columns[0]->length();
+    } else {
+      for (int i : row_groups) {
+        num_rows += parquet_reader()->metadata()->RowGroup(i)->num_rows();
+      }
     }
-  }
-
-  *out = Table::Make(std::move(result_schema), std::move(columns), num_rows);
-  return (*out)->Validate();
+    auto table = Table::Make(std::move(result_schema), columns, num_rows);
+    RETURN_NOT_OK(table->Validate());
+    return table;
+  };
+  return ::arrow::internal::OptionalParallelForAsync(reader_properties_.use_threads(),
+                                                     std::move(readers), read_column,
+                                                     cpu_executor)
+      .Then(std::move(make_table));
 }
 
 std::shared_ptr<RowGroupReader> FileReaderImpl::RowGroup(int row_group_index) {

From 85f192a45755b3f15653fdc0a8fbd788086e125f Mon Sep 17 00:00:00 2001
From: Karik Isichei <karik.isichei@gmail.com>
Date: Tue, 15 Jun 2021 18:25:11 +0200
Subject: [PATCH 415/719] ARROW-12096: [C++] Allows users to define arrow
 timestamp unit for Parquet INT96 timestamp

Have added functionality in C++ code to allow users to define the arrow timestamp unit when reading parquet INT96 types. This avoids the overflow bug when trying to convert INT96 values which have dates which are out of bounds for Arrow NS Timestamp.

See added test: `TestArrowReadWrite.DownsampleDeprecatedInt96` which demonstrates use and expected results.

Main discussion of changes in [JIRA Issue ARROW-12096](https://issues.apache.org/jira/browse/ARROW-12096).

Closes #10461 from isichei/ARROW-12096

Lead-authored-by: Karik Isichei <karik.isichei@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../parquet/arrow/arrow_reader_writer_test.cc | 56 +++++++++++++++++++
 cpp/src/parquet/arrow/reader_internal.cc      | 43 +++++++++-----
 cpp/src/parquet/arrow/schema.cc               |  4 +-
 cpp/src/parquet/arrow/schema_internal.cc      | 19 +++----
 cpp/src/parquet/arrow/schema_internal.h       |  7 ++-
 cpp/src/parquet/properties.h                  | 14 ++++-
 cpp/src/parquet/types.h                       | 43 ++++++++++++--
 7 files changed, 150 insertions(+), 36 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 677458ce37e..6c82b8dee78 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -558,6 +558,35 @@ void ReadSingleColumnFileStatistics(std::unique_ptr<FileReader> file_reader,
   ASSERT_OK(StatisticsAsScalars(*statistics, min, max));
 }
 
+void DownsampleInt96RoundTrip(std::shared_ptr<Array> arrow_vector_in,
+                              std::shared_ptr<Array> arrow_vector_out,
+                              ::arrow::TimeUnit::type unit) {
+  // Create single input table of NS to be written to parquet with INT96
+  auto input_schema =
+      ::arrow::schema({::arrow::field("f", ::arrow::timestamp(TimeUnit::NANO))});
+  auto input = Table::Make(input_schema, {arrow_vector_in});
+
+  // Create an expected schema for each resulting table (one for each "downsampled" ts)
+  auto ex_schema = ::arrow::schema({::arrow::field("f", ::arrow::timestamp(unit))});
+  auto ex_result = Table::Make(ex_schema, {arrow_vector_out});
+
+  std::shared_ptr<Table> result;
+
+  ArrowReaderProperties arrow_reader_prop;
+  arrow_reader_prop.set_coerce_int96_timestamp_unit(unit);
+
+  ASSERT_NO_FATAL_FAILURE(DoRoundtrip(
+      input, input->num_rows(), &result, default_writer_properties(),
+      ArrowWriterProperties::Builder().enable_deprecated_int96_timestamps()->build(),
+      arrow_reader_prop));
+
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertSchemaEqual(*ex_result->schema(),
+                                                     *result->schema(),
+                                                     /*check_metadata=*/false));
+
+  ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_result, *result));
+}
+
 // Non-template base class for TestParquetIO, to avoid code duplication
 class ParquetIOTestBase : public ::testing::Test {
  public:
@@ -1671,6 +1700,33 @@ TEST(TestArrowReadWrite, UseDeprecatedInt96) {
   ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*ex_result, *result));
 }
 
+TEST(TestArrowReadWrite, DownsampleDeprecatedInt96) {
+  using ::arrow::ArrayFromJSON;
+  using ::arrow::field;
+  using ::arrow::schema;
+
+  // Timestamp values at 2000-01-01 00:00:00,
+  // then with increment unit of 1ns, 1us, 1ms and 1s.
+  auto a_nano =
+      ArrayFromJSON(timestamp(TimeUnit::NANO),
+                    "[946684800000000000, 946684800000000001, 946684800000001000, "
+                    "946684800001000000, 946684801000000000]");
+  auto a_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO),
+                               "[946684800000000, 946684800000000, 946684800000001, "
+                               "946684800001000, 946684801000000]");
+  auto a_milli = ArrayFromJSON(
+      timestamp(TimeUnit::MILLI),
+      "[946684800000, 946684800000, 946684800000, 946684800001, 946684801000]");
+  auto a_second =
+      ArrayFromJSON(timestamp(TimeUnit::SECOND),
+                    "[946684800, 946684800, 946684800, 946684800, 946684801]");
+
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_nano, TimeUnit::NANO));
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_micro, TimeUnit::MICRO));
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_milli, TimeUnit::MILLI));
+  ASSERT_NO_FATAL_FAILURE(DownsampleInt96RoundTrip(a_nano, a_second, TimeUnit::SECOND));
+}
+
 TEST(TestArrowReadWrite, CoerceTimestamps) {
   using ::arrow::ArrayFromVector;
   using ::arrow::field;
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index 1410a5f89e2..0ffa3e89970 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -353,7 +353,8 @@ Status TransferBool(RecordReader* reader, MemoryPool* pool, Datum* out) {
 }
 
 Status TransferInt96(RecordReader* reader, MemoryPool* pool,
-                     const std::shared_ptr<DataType>& type, Datum* out) {
+                     const std::shared_ptr<DataType>& type, Datum* out,
+                     const ::arrow::TimeUnit::type int96_arrow_time_unit) {
   int64_t length = reader->values_written();
   auto values = reinterpret_cast<const Int96*>(reader->values());
   ARROW_ASSIGN_OR_RAISE(auto data,
@@ -365,7 +366,20 @@ Status TransferInt96(RecordReader* reader, MemoryPool* pool,
       // isn't representable as a 64-bit Unix timestamp.
       *data_ptr++ = 0;
     } else {
-      *data_ptr++ = Int96GetNanoSeconds(values[i]);
+      switch (int96_arrow_time_unit) {
+        case ::arrow::TimeUnit::NANO:
+          *data_ptr++ = Int96GetNanoSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::MICRO:
+          *data_ptr++ = Int96GetMicroSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::MILLI:
+          *data_ptr++ = Int96GetMilliSeconds(values[i]);
+          break;
+        case ::arrow::TimeUnit::SECOND:
+          *data_ptr++ = Int96GetSeconds(values[i]);
+          break;
+      }
     }
   }
   *out = std::make_shared<TimestampArray>(type, length, std::move(data),
@@ -742,20 +756,19 @@ Status TransferColumnData(RecordReader* reader, std::shared_ptr<DataType> value_
     case ::arrow::Type::TIMESTAMP: {
       const ::arrow::TimestampType& timestamp_type =
           checked_cast<::arrow::TimestampType&>(*value_type);
-      switch (timestamp_type.unit()) {
-        case ::arrow::TimeUnit::MILLI:
-        case ::arrow::TimeUnit::MICRO: {
-          result = TransferZeroCopy(reader, value_type);
-        } break;
-        case ::arrow::TimeUnit::NANO: {
-          if (descr->physical_type() == ::parquet::Type::INT96) {
-            RETURN_NOT_OK(TransferInt96(reader, pool, value_type, &result));
-          } else {
+      if (descr->physical_type() == ::parquet::Type::INT96) {
+        RETURN_NOT_OK(
+            TransferInt96(reader, pool, value_type, &result, timestamp_type.unit()));
+      } else {
+        switch (timestamp_type.unit()) {
+          case ::arrow::TimeUnit::MILLI:
+          case ::arrow::TimeUnit::MICRO:
+          case ::arrow::TimeUnit::NANO:
             result = TransferZeroCopy(reader, value_type);
-          }
-        } break;
-        default:
-          return Status::NotImplemented("TimeUnit not supported");
+            break;
+          default:
+            return Status::NotImplemented("TimeUnit not supported");
+        }
       }
     } break;
     default:
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 7610ce17605..eb7fd628dfc 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -454,7 +454,9 @@ bool IsDictionaryReadSupported(const ArrowType& type) {
 ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
     int column_index, const schema::PrimitiveNode& primitive_node,
     SchemaTreeContext* ctx) {
-  ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> storage_type, GetArrowType(primitive_node));
+  ASSIGN_OR_RAISE(
+      std::shared_ptr<ArrowType> storage_type,
+      GetArrowType(primitive_node, ctx->properties.coerce_int96_timestamp_unit()));
   if (ctx->properties.read_dictionary(column_index) &&
       IsDictionaryReadSupported(*storage_type)) {
     return ::arrow::dictionary(::arrow::int32(), storage_type);
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index fbdfa09a040..064bf4f55cc 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -179,9 +179,9 @@ Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) {
   }
 }
 
-Result<std::shared_ptr<ArrowType>> GetArrowType(Type::type physical_type,
-                                                const LogicalType& logical_type,
-                                                int type_length) {
+Result<std::shared_ptr<ArrowType>> GetArrowType(
+    Type::type physical_type, const LogicalType& logical_type, int type_length,
+    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
   if (logical_type.is_invalid() || logical_type.is_null()) {
     return ::arrow::null();
   }
@@ -194,7 +194,7 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(Type::type physical_type,
     case ParquetType::INT64:
       return FromInt64(logical_type);
     case ParquetType::INT96:
-      return ::arrow::timestamp(::arrow::TimeUnit::NANO);
+      return ::arrow::timestamp(int96_arrow_time_unit);
     case ParquetType::FLOAT:
       return ::arrow::float32();
     case ParquetType::DOUBLE:
@@ -211,14 +211,11 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(Type::type physical_type,
   }
 }
 
-Result<std::shared_ptr<ArrowType>> GetArrowType(const schema::PrimitiveNode& primitive) {
+Result<std::shared_ptr<ArrowType>> GetArrowType(
+    const schema::PrimitiveNode& primitive,
+    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
   return GetArrowType(primitive.physical_type(), *primitive.logical_type(),
-                      primitive.type_length());
-}
-
-Result<std::shared_ptr<ArrowType>> GetArrowType(const ColumnDescriptor& descriptor) {
-  return GetArrowType(descriptor.physical_type(), *descriptor.logical_type(),
-                      descriptor.type_length());
+                      primitive.type_length(), int96_arrow_time_unit);
 }
 
 }  // namespace arrow
diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index ec0d9571304..fb837c3ee6c 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -40,9 +40,12 @@ Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type
                                                         int type_length);
 
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
-    const schema::PrimitiveNode& primitive);
+    Type::type physical_type, const LogicalType& logical_type, int type_length,
+    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
-    const ColumnDescriptor& descriptor);
+    const schema::PrimitiveNode& primitive,
+    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
 
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 5018fff9531..d217b8efa52 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -575,7 +575,8 @@ class PARQUET_EXPORT ArrowReaderProperties {
         read_dict_indices_(),
         batch_size_(kArrowDefaultBatchSize),
         pre_buffer_(false),
-        cache_options_(::arrow::io::CacheOptions::Defaults()) {}
+        cache_options_(::arrow::io::CacheOptions::Defaults()),
+        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {}
 
   void set_use_threads(bool use_threads) { use_threads_ = use_threads; }
 
@@ -620,6 +621,16 @@ class PARQUET_EXPORT ArrowReaderProperties {
 
   const ::arrow::io::IOContext& io_context() const { return io_context_; }
 
+  /// Set timestamp unit to use for deprecated INT96-encoded timestamps
+  /// (default is NANO).
+  void set_coerce_int96_timestamp_unit(::arrow::TimeUnit::type unit) {
+    coerce_int96_timestamp_unit_ = unit;
+  }
+
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit() const {
+    return coerce_int96_timestamp_unit_;
+  }
+
  private:
   bool use_threads_;
   std::unordered_set<int> read_dict_indices_;
@@ -627,6 +638,7 @@ class PARQUET_EXPORT ArrowReaderProperties {
   bool pre_buffer_;
   ::arrow::io::IOContext io_context_;
   ::arrow::io::CacheOptions cache_options_;
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
 };
 
 /// EXPERIMENTAL: Constructs the default ArrowReaderProperties
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 4529dbe6133..6bd67f1ee5f 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -591,15 +591,46 @@ static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds)
   std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
 }
 
-static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+struct DecodedInt96 {
+  uint64_t days_since_epoch;
+  uint64_t nanoseconds;
+};
+
+static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) {
   // We do the computations in the unsigned domain to avoid unsigned behaviour
   // on overflow.
-  uint64_t days_since_epoch =
-      i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
-  uint64_t nanoseconds = 0;
+  DecodedInt96 result;
+  result.days_since_epoch = i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  result.nanoseconds = 0;
+
+  memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t));
+  return result;
+}
+
+static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  return static_cast<int64_t>(decoded.days_since_epoch * kNanosecondsPerDay +
+                              decoded.nanoseconds);
+}
+
+static inline int64_t Int96GetMicroSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t microseconds = decoded.nanoseconds / static_cast<uint64_t>(1000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMicrosecondsPerDay +
+                              microseconds);
+}
+
+static inline int64_t Int96GetMilliSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t milliseconds = decoded.nanoseconds / static_cast<uint64_t>(1000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMillisecondsPerDay +
+                              milliseconds);
+}
 
-  memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
-  return static_cast<int64_t>(days_since_epoch * kNanosecondsPerDay + nanoseconds);
+static inline int64_t Int96GetSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t seconds = decoded.nanoseconds / static_cast<uint64_t>(1000000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kSecondsPerDay + seconds);
 }
 
 static inline std::string Int96ToString(const Int96& a) {

From 8ee5f4a2af11dd8d04931efc7961b4aa2b87ff3b Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 15 Jun 2021 18:33:01 +0200
Subject: [PATCH 416/719] ARROW-13027: [C++] Fix ASAN stack traces in CI

Before change:

```
Direct leak of 65536 byte(s) in 1 object(s) allocated from:
    #0 0x522f09 in
    #1 0x7f28ae5826f4 in
    #2 0x7f28ae57fa5d in
    #3 0x7f28ae58cb0f in
    #4 0x7f28ae58bda0 in
    ...
```

After change:
```
Direct leak of 65536 byte(s) in 1 object(s) allocated from:
    #0 0x522f09 in posix_memalign (/build/cpp/debug/arrow-dataset-file-csv-test+0x522f09)
    #1 0x7f28ae5826f4 in arrow::(anonymous namespace)::SystemAllocator::AllocateAligned(long, unsigned char**) /arrow/cpp/src/arrow/memory_pool.cc:213:24
    #2 0x7f28ae57fa5d in arrow::BaseMemoryPoolImpl<arrow::(anonymous namespace)::SystemAllocator>::Allocate(long, unsigned char**) /arrow/cpp/src/arrow/memory_pool.cc:405:5
    #3 0x7f28ae58cb0f in arrow::PoolBuffer::Reserve(long) /arrow/cpp/src/arrow/memory_pool.cc:717:9
    #4 0x7f28ae58bda0 in arrow::PoolBuffer::Resize(long, bool) /arrow/cpp/src/arrow/memory_pool.cc:741:7
    ...
```

Closes #10498 from westonpace/feature/ARROW-13027--c-fix-asan-stack-traces-in-ci

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/docker/ubuntu-20.04-cpp.dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index c75c013799a..c2a468d9e35 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -127,6 +127,7 @@ ENV ARROW_BUILD_TESTS=ON \
     ARROW_WITH_SNAPPY=ON \
     ARROW_WITH_ZLIB=ON \
     ARROW_WITH_ZSTD=ON \
+    ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \
     AWSSDK_SOURCE=BUNDLED \
     GTest_SOURCE=BUNDLED \
     gRPC_SOURCE=BUNDLED \

From b5fcbc6c1256da70783a038ebbb123456716ea99 Mon Sep 17 00:00:00 2001
From: Giordon Stark <kratsg@gmail.com>
Date: Tue, 15 Jun 2021 21:24:49 +0200
Subject: [PATCH 417/719] ARROW-13085: [Python] Document compatible toolchains
 for python bindings

This is a documentation-only PR that adds an additional note for users compibiling C++ extensions using the shared libraries bundled with the python package. Adding this note on the toolchain will help resolve (confusing?) segfaults that occur.

Before (toolchain) change:

- segfault when running the minimal cpp example

After (toolchain) change:

- no segfault when running the minimal cpp example

Please see the linked JIRA for more details.

Closes #10535 from kratsg/docs/pythonBindingExtensions

Lead-authored-by: Giordon Stark <kratsg@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/python/extending.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst
index 738a7369f70..5f6ddb154e6 100644
--- a/docs/source/python/extending.rst
+++ b/docs/source/python/extending.rst
@@ -466,3 +466,14 @@ installed. This function will attempt to create symlinks like
 
    pip install pyarrow
    python -c "import pyarrow; pyarrow.create_library_symlinks()"
+
+Toolchain Compatibility (Linux)
+"""""""""""""""""""""""""""""""
+
+The Python wheels for Linux are built using the
+`PyPA manylinux images <https://quay.io/organization/pypa>`_ which use
+the CentOS `devtoolset-8` or `devtoolset-9` depending on which manylinux
+wheel version (2010 or 2014) is being used. In addition to the other notes
+above, if you are compiling C++ using these shared libraries, you will need
+to make sure you use a compatible toolchain as well or you might see a
+segfault during runtime.

From 44ae09ecc9249b04eaeca5e578336387e918b98c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Wed, 16 Jun 2021 08:39:22 +0900
Subject: [PATCH 418/719] ARROW-13082: [CI] Forward R argument to ubuntu-docs
 build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`R=4.1 archery docker run ubuntu-docs`

Closes #10534 from kszucs/forward-r-arg-to-docs-build

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docker-compose.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 1133bfa3b29..fa0f0a28ad1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1294,6 +1294,7 @@ services:
       cache_from:
         - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs
       args:
+        r: ${R}
         jdk: ${JDK}
         node: ${NODE}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3

From 6865a7b11c47f6b0ee7d996a8cbaf0672d2bb8dd Mon Sep 17 00:00:00 2001
From: Diana Clarke <diana.joan.clarke@gmail.com>
Date: Wed, 16 Jun 2021 01:32:21 +0000
Subject: [PATCH 419/719] ARROW-13073: [Developer] archery benchmark list:
 unexpected keyword 'benchmark_filter'

```
$ archery benchmark list
Traceback (most recent call last):
  File "/Users/diana/envs/arrow/bin/archery", line 33, in <module>
    sys.exit(load_entry_point('archery', 'console_scripts', 'archery')())
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/core.py", line 1137, in __call__
    return self.main(*args, **kwargs)
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/core.py", line 1062, in main
    rv = self.invoke(ctx)
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/core.py", line 1668, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/core.py", line 1668, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/core.py", line 763, in invoke
    return __callback(*args, **kwargs)
  File "/Users/diana/envs/arrow/lib/python3.9/site-packages/click/decorators.py", line 26, in new_func
    return f(get_current_context(), *args, **kwargs)
  File "/Users/diana/workspace/arrow/dev/archery/archery/cli.py", line 430, in benchmark_list
    conf = CppBenchmarkRunner.default_configuration(
  File "/Users/diana/workspace/arrow/dev/archery/archery/benchmark/runner.py", line 118, in default_configuration
    return CppConfiguration(
TypeError: __init__() got an unexpected keyword argument 'benchmark_filter'
```

Closes #10528 from dianaclarke/ARROW-13073

Authored-by: Diana Clarke <diana.joan.clarke@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 dev/archery/archery/cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index c35b0864900..9442b2917e0 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -416,7 +416,6 @@ def benchmark_filter_options(cmd):
 @click.argument("rev_or_path", metavar="[<rev_or_path>]",
                 default="WORKSPACE", required=False)
 @benchmark_common_options
-@benchmark_filter_options
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                    java_home, java_options, build_extras, benchmark_extras,

From f8661e032902a963b0a6a46077d72e804d22560d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 16 Jun 2021 14:03:23 +0900
Subject: [PATCH 420/719] ARROW-11782: [GLib][Ruby][Dataset] Remove bindings
 for internal classes

Closes #10533 from kou/glib-dataset-factory

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../arrow-dataset-glib/arrow-dataset-glib.h   |   2 +
 .../arrow-dataset-glib/arrow-dataset-glib.hpp |   2 +
 c_glib/arrow-dataset-glib/dataset-factory.cpp | 468 ++++++++++++++++
 c_glib/arrow-dataset-glib/dataset-factory.h   |  98 ++++
 c_glib/arrow-dataset-glib/dataset-factory.hpp |  27 +
 c_glib/arrow-dataset-glib/dataset.cpp         | 365 ++++++++++++
 c_glib/arrow-dataset-glib/dataset.h           |  65 +++
 c_glib/arrow-dataset-glib/dataset.hpp         |  48 ++
 c_glib/arrow-dataset-glib/meson.build         |   6 +
 c_glib/arrow-dataset-glib/scanner.cpp         | 527 +++++-------------
 c_glib/arrow-dataset-glib/scanner.h           |  77 +--
 c_glib/arrow-dataset-glib/scanner.hpp         |  20 +-
 c_glib/arrow-glib/basic-array.cpp             |   6 +-
 .../arrow-dataset-glib-docs.xml               |  20 +-
 .../test-file-system-dataset-factory.rb       |  55 ++
 .../test/dataset/test-file-system-dataset.rb  |  23 +-
 .../test/dataset/test-in-memory-scan-task.rb  |  59 --
 c_glib/test/dataset/test-scan-options.rb      |  47 --
 c_glib/test/dataset/test-scanner.rb           |  48 ++
 c_glib/test/helper/buildable.rb               |  19 +-
 .../test/helper/writable.rb                   |  27 +-
 c_glib/test/run-test.rb                       |   5 +-
 cpp/src/arrow/dataset/discovery.h             |  15 +-
 .../{scan-options.rb => dataset.rb}           |  20 +-
 .../lib/arrow-dataset/in-memory-scan-task.rb  |  35 --
 .../lib/arrow-dataset/loader.rb               |   3 +-
 ruby/red-arrow-dataset/test/helper.rb         |   2 +
 ...options.rb => test-file-system-dataset.rb} |  28 +-
 28 files changed, 1462 insertions(+), 655 deletions(-)
 create mode 100644 c_glib/arrow-dataset-glib/dataset-factory.cpp
 create mode 100644 c_glib/arrow-dataset-glib/dataset-factory.h
 create mode 100644 c_glib/arrow-dataset-glib/dataset-factory.hpp
 create mode 100644 c_glib/arrow-dataset-glib/dataset.cpp
 create mode 100644 c_glib/arrow-dataset-glib/dataset.h
 create mode 100644 c_glib/arrow-dataset-glib/dataset.hpp
 create mode 100644 c_glib/test/dataset/test-file-system-dataset-factory.rb
 rename ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb => c_glib/test/dataset/test-file-system-dataset.rb (64%)
 delete mode 100644 c_glib/test/dataset/test-in-memory-scan-task.rb
 delete mode 100644 c_glib/test/dataset/test-scan-options.rb
 create mode 100644 c_glib/test/dataset/test-scanner.rb
 rename ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb => c_glib/test/helper/writable.rb (63%)
 rename ruby/red-arrow-dataset/lib/arrow-dataset/{scan-options.rb => dataset.rb} (69%)
 delete mode 100644 ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb
 rename ruby/red-arrow-dataset/test/{test-scan-options.rb => test-file-system-dataset.rb} (58%)

diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
index ff160452845..03e56516112 100644
--- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
+++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h
@@ -21,6 +21,8 @@
 
 #include <arrow-glib/arrow-glib.h>
 
+#include <arrow-dataset-glib/dataset-factory.h>
+#include <arrow-dataset-glib/dataset.h>
 #include <arrow-dataset-glib/file-format.h>
 #include <arrow-dataset-glib/fragment.h>
 #include <arrow-dataset-glib/scanner.h>
diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp b/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
index c221825bc2a..65341b9b77e 100644
--- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
+++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.hpp
@@ -21,6 +21,8 @@
 
 #include <arrow-glib/arrow-glib.hpp>
 
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
 #include <arrow-dataset-glib/file-format.hpp>
 #include <arrow-dataset-glib/fragment.hpp>
 #include <arrow-dataset-glib/scanner.hpp>
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.cpp b/c_glib/arrow-dataset-glib/dataset-factory.cpp
new file mode 100644
index 00000000000..146db69adfc
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset-factory.cpp
@@ -0,0 +1,468 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/file-system.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/file-format.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: dataset-factory
+ * @section_id: dataset-factory
+ * @title: Dataset factory related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetDatasetFactory is a base class for dataset factories.
+ *
+ * #GADatasetFileSystemDatasetFactory is a class for
+ * #GADatasetFileSystemDataset factory.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetDatasetFactoryPrivate_ {
+  std::shared_ptr<arrow::dataset::DatasetFactory> factory;
+} GADatasetDatasetFactoryPrivate;
+
+enum {
+  PROP_DATASET_FACTORY = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDatasetFactory,
+                                    gadataset_dataset_factory,
+                                    G_TYPE_OBJECT)
+
+#define GADATASET_DATASET_FACTORY_GET_PRIVATE(obj)        \
+  static_cast<GADatasetDatasetFactoryPrivate *>(          \
+    gadataset_dataset_factory_get_instance_private(       \
+      GADATASET_DATASET_FACTORY(obj)))
+
+static void
+gadataset_dataset_factory_finalize(GObject *object)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+  priv->factory.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_dataset_factory_parent_class)->finalize(object);
+}
+
+static void
+gadataset_dataset_factory_set_property(GObject *object,
+                                       guint prop_id,
+                                       const GValue *value,
+                                       GParamSpec *pspec)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATASET_FACTORY:
+    {
+      auto arrow_factory_pointer =
+        static_cast<std::shared_ptr<arrow::dataset::DatasetFactory> *>(
+          g_value_get_pointer(value));
+      if (arrow_factory_pointer) {
+        priv->factory = *arrow_factory_pointer;
+      }
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_dataset_factory_init(GADatasetDatasetFactory *object)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
+  new(&priv->factory) std::shared_ptr<arrow::dataset::DatasetFactory>;
+}
+
+static void
+gadataset_dataset_factory_class_init(GADatasetDatasetFactoryClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = gadataset_dataset_factory_finalize;
+  gobject_class->set_property = gadataset_dataset_factory_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("dataset-factory",
+                              "Dataset factory",
+                              "The raw "
+                              "std::shared<arrow::dataset::DatasetFactory> *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATASET_FACTORY, spec);
+}
+
+/**
+ * gadataset_dataset_factory_finish:
+ * @factory: A #GADatasetDatasetFactory.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetDataset on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetDataset *
+gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
+                                 GError **error)
+{
+  auto arrow_factory = gadataset_dataset_factory_get_raw(factory);
+  auto arrow_dataset_result = arrow_factory->Finish();
+  if (garrow::check(error, arrow_dataset_result, "[dataset-factory][finish]")) {
+    auto arrow_dataset = *arrow_dataset_result;
+    return gadataset_dataset_new_raw(&arrow_dataset);
+  } else {
+    return NULL;
+  }
+}
+
+
+typedef struct GADatasetFileSystemDatasetFactoryPrivate_ {
+  GADatasetFileFormat *format;
+  GArrowFileSystem *file_system;
+  GList *files;
+  arrow::dataset::FileSystemFactoryOptions options;
+} GADatasetFileSystemDatasetFactoryPrivate;
+
+enum {
+  PROP_FORMAT = 1,
+  PROP_FILE_SYSTEM,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetFactory,
+                           gadataset_file_system_dataset_factory,
+                           GADATASET_TYPE_DATASET_FACTORY)
+
+#define GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(obj)  \
+  static_cast<GADatasetFileSystemDatasetFactoryPrivate *>(      \
+    gadataset_file_system_dataset_factory_get_instance_private( \
+      GADATASET_FILE_SYSTEM_DATASET_FACTORY(obj)))
+
+static void
+gadataset_file_system_dataset_factory_dispose(GObject *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+  if (priv->format) {
+    g_object_unref(priv->format);
+    priv->format = NULL;
+  }
+
+  if (priv->file_system) {
+    g_object_unref(priv->file_system);
+    priv->file_system = NULL;
+  }
+
+  if (priv->files) {
+    g_list_free_full(priv->files, g_object_unref);
+    priv->files = NULL;
+  }
+
+  G_OBJECT_CLASS(
+    gadataset_file_system_dataset_factory_parent_class)->dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_factory_finalize(GObject *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+  priv->options.~FileSystemFactoryOptions();
+  G_OBJECT_CLASS(
+    gadataset_file_system_dataset_factory_parent_class)->finalize(object);
+}
+
+static void
+gadataset_file_system_dataset_factory_set_property(GObject *object,
+                                                   guint prop_id,
+                                                   const GValue *value,
+                                                   GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_factory_get_property(GObject *object,
+                                                   guint prop_id,
+                                                   GValue *value,
+                                                   GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    g_value_set_object(value, priv->format);
+    break;
+  case PROP_FILE_SYSTEM:
+    g_value_set_object(value, priv->file_system);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_factory_init(
+  GADatasetFileSystemDatasetFactory *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
+  new(&priv->options) arrow::dataset::FileSystemFactoryOptions;
+}
+
+static void
+gadataset_file_system_dataset_factory_class_init(
+  GADatasetFileSystemDatasetFactoryClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = gadataset_file_system_dataset_factory_dispose;
+  gobject_class->finalize     = gadataset_file_system_dataset_factory_finalize;
+  gobject_class->set_property = gadataset_file_system_dataset_factory_set_property;
+  gobject_class->get_property = gadataset_file_system_dataset_factory_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GADatasetFileSystemDatasetFactory:format:
+   *
+   * Format passed to #GADatasetFileSystemDataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("format",
+                             "Format",
+                             "Format passed to GADatasetFileSystemDataset",
+                             GADATASET_TYPE_FILE_FORMAT,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
+
+  /**
+   * GADatasetFileSystemDatasetFactory:file-system:
+   *
+   * File system passed to #GADatasetFileSystemDataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("file-system",
+                             "File system",
+                             "File system passed to GADatasetFileSystemDataset",
+                             GARROW_TYPE_FILE_SYSTEM,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
+}
+
+/**
+ * gadataset_file_system_factory_new:
+ * @format: A #GADatasetFileFormat.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: A newly created #GADatasetDatasetFileSystemFactory on success,
+ *   %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetFileSystemDatasetFactory *
+gadataset_file_system_dataset_factory_new(GADatasetFileFormat *format)
+{
+  return GADATASET_FILE_SYSTEM_DATASET_FACTORY(
+    g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY,
+                 "format", format,
+                 NULL));
+}
+
+/**
+ * gadataset_file_system_dataset_factory_set_file_system:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @file_system: A #GArrowFileSystem.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_set_file_system(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrowFileSystem *file_system,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][set-file-system]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system is already set"),
+                  context);
+    return FALSE;
+  }
+  priv->file_system = file_system;
+  g_object_ref(priv->file_system);
+  return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_set_file_system_uri:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @uri: An URI for file system.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_set_file_system_uri(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *uri,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][set-file-system-uri]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system is already set"),
+                  context);
+    return FALSE;
+  }
+  std::string internal_path;
+  auto arrow_file_system_result =
+    arrow::fs::FileSystemFromUri(uri, &internal_path);
+  if (!garrow::check(error, arrow_file_system_result, context)) {
+    return FALSE;
+  }
+  auto arrow_file_system = *arrow_file_system_result;
+  auto arrow_file_info_result = arrow_file_system->GetFileInfo(internal_path);
+  if (!garrow::check(error, arrow_file_info_result, context)) {
+    return FALSE;
+  }
+  priv->file_system = garrow_file_system_new_raw(&arrow_file_system);
+  auto file_info = garrow_file_info_new_raw(*arrow_file_info_result);
+  priv->files = g_list_prepend(priv->files, file_info);
+  return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_add_path:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @path: A path to be added.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gadataset_file_system_dataset_factory_add_path(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *path,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][add-path]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (!priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system isn't set"),
+                  context);
+    return FALSE;
+  }
+  auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
+  auto arrow_file_info_result = arrow_file_system->GetFileInfo(path);
+  if (!garrow::check(error, arrow_file_info_result, context)) {
+    return FALSE;
+  }
+  auto file_info = garrow_file_info_new_raw(*arrow_file_info_result);
+  priv->files = g_list_prepend(priv->files, file_info);
+  return TRUE;
+}
+
+/**
+ * gadataset_file_system_dataset_factory_finish:
+ * @factory: A #GADatasetFileSystemDatasetFactory.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetFileSystemDataset on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetFileSystemDataset *
+gadataset_file_system_dataset_factory_finish(
+  GADatasetFileSystemDatasetFactory *factory,
+  GError **error)
+{
+  const gchar *context = "[file-system-dataset-factory][finish]";
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
+  if (!priv->file_system) {
+    garrow::check(error,
+                  arrow::Status::Invalid("file system isn't set"),
+                  context);
+    return NULL;
+  }
+  auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
+  auto arrow_format = gadataset_file_format_get_raw(priv->format);
+  std::vector<arrow::fs::FileInfo> arrow_files;
+  priv->files = g_list_reverse(priv->files);
+  for (auto node = priv->files; node; node = node->next) {
+    auto file = GARROW_FILE_INFO(node->data);
+    arrow_files.push_back(*garrow_file_info_get_raw(file));
+  }
+  priv->files = g_list_reverse(priv->files);
+  auto arrow_factory_result =
+    arrow::dataset::FileSystemDatasetFactory::Make(arrow_file_system,
+                                                   arrow_files,
+                                                   arrow_format,
+                                                   priv->options);
+  if (!garrow::check(error, arrow_factory_result, context)) {
+    return NULL;
+  }
+  auto arrow_dataset_result = (*arrow_factory_result)->Finish();
+  if (!garrow::check(error, arrow_dataset_result, context)) {
+    return NULL;
+  }
+  auto arrow_dataset = *arrow_dataset_result;
+  return GADATASET_FILE_SYSTEM_DATASET(
+    gadataset_dataset_new_raw(&arrow_dataset,
+                              "dataset", &arrow_dataset,
+                              "file-system", priv->file_system,
+                              "format", priv->format,
+                              NULL));
+}
+
+
+G_END_DECLS
+
+std::shared_ptr<arrow::dataset::DatasetFactory>
+gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory)
+{
+  auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(factory);
+  return priv->factory;
+}
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h
new file mode 100644
index 00000000000..e2ee3ed9806
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset-factory.h
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/dataset.h>
+
+G_BEGIN_DECLS
+
+#define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDatasetFactory,
+                         gadataset_dataset_factory,
+                         GADATASET,
+                         DATASET_FACTORY,
+                         GObject)
+struct _GADatasetDatasetFactoryClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetDataset *
+gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
+                                 GError **error);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY      \
+  (gadataset_file_system_dataset_factory_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory,
+                         gadataset_file_system_dataset_factory,
+                         GADATASET,
+                         FILE_SYSTEM_DATASET_FACTORY,
+                         GADatasetDatasetFactory)
+struct _GADatasetFileSystemDatasetFactoryClass
+{
+  GADatasetDatasetFactoryClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetFileSystemDatasetFactory *
+gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_set_file_system(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrowFileSystem *file_system,
+  GError **error);
+gboolean
+gadataset_file_system_dataset_factory_set_file_system_uri(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *uri,
+  GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_path(
+  GADatasetFileSystemDatasetFactory *factory,
+  const gchar *path,
+  GError **error);
+/*
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_file(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrowFileInfo *file,
+  GError **error);
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gadataset_file_system_dataset_factory_add_selector(
+  GADatasetFileSystemDatasetFactory *factory,
+  GArrorFileSelector *selector,
+  GError **error);
+*/
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetFileSystemDataset *
+gadataset_file_system_dataset_factory_finish(
+  GADatasetFileSystemDatasetFactory *factory,
+  GError **error);
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.hpp b/c_glib/arrow-dataset-glib/dataset-factory.hpp
new file mode 100644
index 00000000000..114db35bc59
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset-factory.hpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/dataset-factory.h>
+
+std::shared_ptr<arrow::dataset::DatasetFactory>
+gadataset_dataset_factory_get_raw(GADatasetDatasetFactory *factory);
diff --git a/c_glib/arrow-dataset-glib/dataset.cpp b/c_glib/arrow-dataset-glib/dataset.cpp
new file mode 100644
index 00000000000..3bd62f99ef3
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset.cpp
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/table.hpp>
+
+#include <arrow-dataset-glib/dataset-factory.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
+#include <arrow-dataset-glib/scanner.h>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: dataset
+ * @section_id: dataset
+ * @title: Dataset related classes
+ * @include: arrow-dataset-glib/arrow-dataset-glib.h
+ *
+ * #GADatasetDataset is a base class for datasets.
+ *
+ * #GADatasetFileSystemDataset is a class for file system dataset.
+ *
+ * #GADatasetFileFormat is a base class for file formats.
+ *
+ * #GADatasetCSVFileFormat is a class for CSV file format.
+ *
+ * #GADatasetIPCFileFormat is a class for IPC file format.
+ *
+ * #GADatasetParquetFileFormat is a class for Apache Parquet file format.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GADatasetDatasetPrivate_ {
+  std::shared_ptr<arrow::dataset::Dataset> dataset;
+} GADatasetDatasetPrivate;
+
+enum {
+  PROP_DATASET = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDataset,
+                                    gadataset_dataset,
+                                    G_TYPE_OBJECT)
+
+#define GADATASET_DATASET_GET_PRIVATE(obj)         \
+  static_cast<GADatasetDatasetPrivate *>(          \
+    gadataset_dataset_get_instance_private(        \
+      GADATASET_DATASET(obj)))
+
+static void
+gadataset_dataset_finalize(GObject *object)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+  priv->dataset.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_dataset_parent_class)->finalize(object);
+}
+
+static void
+gadataset_dataset_set_property(GObject *object,
+                               guint prop_id,
+                               const GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATASET:
+    priv->dataset =
+      *static_cast<std::shared_ptr<arrow::dataset::Dataset> *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_dataset_init(GADatasetDataset *object)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(object);
+  new(&priv->dataset) std::shared_ptr<arrow::dataset::Dataset>;
+}
+
+static void
+gadataset_dataset_class_init(GADatasetDatasetClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = gadataset_dataset_finalize;
+  gobject_class->set_property = gadataset_dataset_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("dataset",
+                              "Dataset",
+                              "The raw "
+                              "std::shared<arrow::dataset::Dataset> *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DATASET, spec);
+}
+
+/**
+ * gadataset_dataset_begin_scan:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetScannerBuilder on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GADatasetScannerBuilder *
+gadataset_dataset_begin_scan(GADatasetDataset *dataset,
+                             GError **error)
+{
+  return gadataset_scanner_builder_new(dataset, error);
+}
+
+/**
+ * gadataset_dataset_to_table:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ *   A loaded #GArrowTable on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowTable *
+gadataset_dataset_to_table(GADatasetDataset *dataset,
+                           GError **error)
+{
+  auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+  auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+  if (!garrow::check(error,
+                     arrow_scanner_builder_result,
+                     "[dataset][to-table]")) {
+    return NULL;
+  }
+  auto arrow_scanner_builder = *arrow_scanner_builder_result;
+  auto arrow_scanner_result = arrow_scanner_builder->Finish();
+  if (!garrow::check(error,
+                     arrow_scanner_result,
+                     "[dataset][to-table]")) {
+    return NULL;
+  }
+  auto arrow_scanner = *arrow_scanner_result;
+  auto arrow_table_result = arrow_scanner->ToTable();
+  if (!garrow::check(error,
+                     arrow_scanner_result,
+                     "[dataset][to-table]")) {
+    return NULL;
+  }
+  return garrow_table_new_raw(&(*arrow_table_result));
+}
+
+/**
+ * gadataset_dataset_get_type_name:
+ * @dataset: A #GADatasetDataset.
+ *
+ * Returns: The type name of @dataset.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gadataset_dataset_get_type_name(GADatasetDataset *dataset)
+{
+  const auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+  const auto &type_name = arrow_dataset->type_name();
+  return g_strndup(type_name.data(), type_name.size());
+}
+
+
+typedef struct GADatasetFileSystemDatasetPrivate_ {
+  GADatasetFileFormat *format;
+  GArrowFileSystem *file_system;
+} GADatasetFileSystemDatasetPrivate;
+
+enum {
+  PROP_FORMAT = 1,
+  PROP_FILE_SYSTEM,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDataset,
+                           gadataset_file_system_dataset,
+                           GADATASET_TYPE_DATASET)
+
+#define GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(obj)   \
+  static_cast<GADatasetFileSystemDatasetPrivate *>(      \
+    gadataset_file_system_dataset_get_instance_private(  \
+      GADATASET_FILE_SYSTEM_DATASET(obj)))
+
+static void
+gadataset_file_system_dataset_dispose(GObject *object)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+  if (priv->format) {
+    g_object_unref(priv->format);
+    priv->format = NULL;
+  }
+
+  if (priv->file_system) {
+    g_object_unref(priv->file_system);
+    priv->file_system = NULL;
+  }
+
+  G_OBJECT_CLASS(gadataset_file_system_dataset_parent_class)->dispose(object);
+}
+
+static void
+gadataset_file_system_dataset_set_property(GObject *object,
+                                           guint prop_id,
+                                           const GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    priv->format = GADATASET_FILE_FORMAT(g_value_dup_object(value));
+    break;
+  case PROP_FILE_SYSTEM:
+    priv->file_system = GARROW_FILE_SYSTEM(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_get_property(GObject *object,
+                                           guint prop_id,
+                                           GValue *value,
+                                           GParamSpec *pspec)
+{
+  auto priv = GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_FORMAT:
+    g_value_set_object(value, priv->format);
+    break;
+  case PROP_FILE_SYSTEM:
+    g_value_set_object(value, priv->file_system);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gadataset_file_system_dataset_init(GADatasetFileSystemDataset *object)
+{
+}
+
+static void
+gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = gadataset_file_system_dataset_dispose;
+  gobject_class->set_property = gadataset_file_system_dataset_set_property;
+  gobject_class->get_property = gadataset_file_system_dataset_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GADatasetFileSystemDataset:format:
+   *
+   * Format of the dataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("format",
+                             "Format",
+                             "Format of the dataset",
+                             GADATASET_TYPE_FILE_FORMAT,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
+
+  /**
+   * GADatasetFileSystemDataset:file-system:
+   *
+   * File system of the dataset.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("file-system",
+                             "File system",
+                             "File system of the dataset",
+                             GARROW_TYPE_FILE_SYSTEM,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
+}
+
+
+G_END_DECLS
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset)
+{
+  return gadataset_dataset_new_raw(arrow_dataset,
+                                   "dataset", arrow_dataset,
+                                   NULL);
+}
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  ...)
+{
+  va_list args;
+  va_start(args, first_property_name);
+  auto array = gadataset_dataset_new_raw_valist(arrow_dataset,
+                                                first_property_name,
+                                                args);
+  va_end(args);
+  return array;
+}
+
+GADatasetDataset *
+gadataset_dataset_new_raw_valist(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  va_list args)
+{
+  GType type = GADATASET_TYPE_DATASET;
+  const auto type_name = (*arrow_dataset)->type_name();
+  if (type_name == "filesystem") {
+    type = GADATASET_TYPE_FILE_SYSTEM_DATASET;
+  }
+  return GADATASET_DATASET(g_object_new_valist(type,
+                                               first_property_name,
+                                               args));
+}
+
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset)
+{
+  auto priv = GADATASET_DATASET_GET_PRIVATE(dataset);
+  return priv->dataset;
+}
diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h
new file mode 100644
index 00000000000..97cf35d74d7
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset.h
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-dataset-glib/file-format.h>
+
+G_BEGIN_DECLS
+
+typedef struct _GADatasetScannerBuilder GADatasetScannerBuilder;
+
+#define GADATASET_TYPE_DATASET (gadataset_dataset_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetDataset,
+                         gadataset_dataset,
+                         GADATASET,
+                         DATASET,
+                         GObject)
+struct _GADatasetDatasetClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GADatasetScannerBuilder *
+gadataset_dataset_begin_scan(GADatasetDataset *dataset,
+                             GError **error);
+GARROW_AVAILABLE_IN_5_0
+GArrowTable *
+gadataset_dataset_to_table(GADatasetDataset *dataset,
+                           GError **error);
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gadataset_dataset_get_type_name(GADatasetDataset *dataset);
+
+
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET      \
+  (gadataset_file_system_dataset_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset,
+                         gadataset_file_system_dataset,
+                         GADATASET,
+                         FILE_SYSTEM_DATASET,
+                         GADatasetDataset)
+struct _GADatasetFileSystemDatasetClass
+{
+  GADatasetDatasetClass parent_class;
+};
+
+
+G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/dataset.hpp b/c_glib/arrow-dataset-glib/dataset.hpp
new file mode 100644
index 00000000000..94dddd2eb7a
--- /dev/null
+++ b/c_glib/arrow-dataset-glib/dataset.hpp
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/dataset/api.h>
+
+#include <arrow-dataset-glib/dataset.h>
+
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset);
+GADatasetDataset *
+gadataset_dataset_new_raw(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  ...);
+GADatasetDataset *
+gadataset_dataset_new_raw_valist(
+  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+  const gchar *first_property_name,
+  va_list arg);
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset);
+
+GADatasetFileFormat *
+gadataset_file_format_new_raw(
+  std::shared_ptr<arrow::dataset::FileFormat> *arrow_format);
+std::shared_ptr<arrow::dataset::Dataset>
+gadataset_dataset_get_raw(GADatasetDataset *dataset);
+
+
diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build
index 04dc420b057..b3f617330cf 100644
--- a/c_glib/arrow-dataset-glib/meson.build
+++ b/c_glib/arrow-dataset-glib/meson.build
@@ -18,6 +18,8 @@
 # under the License.
 
 sources = files(
+  'dataset-factory.cpp',
+  'dataset.cpp',
   'file-format.cpp',
   'fragment.cpp',
   'scanner.cpp',
@@ -25,6 +27,8 @@ sources = files(
 
 c_headers = files(
   'arrow-dataset-glib.h',
+  'dataset-factory.h',
+  'dataset.h',
   'file-format.h',
   'fragment.h',
   'scanner.h',
@@ -32,6 +36,8 @@ c_headers = files(
 
 cpp_headers = files(
   'arrow-dataset-glib.hpp',
+  'dataset-factory.hpp',
+  'dataset.hpp',
   'file-format.hpp',
   'fragment.hpp',
   'scanner.hpp',
diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp
index 04778c8ae99..7f8d8be5fdb 100644
--- a/c_glib/arrow-dataset-glib/scanner.cpp
+++ b/c_glib/arrow-dataset-glib/scanner.cpp
@@ -17,13 +17,10 @@
  * under the License.
  */
 
-#include <arrow/util/iterator.h>
-
 #include <arrow-glib/error.hpp>
-#include <arrow-glib/record-batch.hpp>
-#include <arrow-glib/schema.hpp>
+#include <arrow-glib/table.hpp>
 
-#include <arrow-dataset-glib/fragment.hpp>
+#include <arrow-dataset-glib/dataset.hpp>
 #include <arrow-dataset-glib/scanner.hpp>
 
 G_BEGIN_DECLS
@@ -31,72 +28,55 @@ G_BEGIN_DECLS
 /**
  * SECTION: scanner
  * @section_id: scanner
- * @title: Scanner classes
+ * @title: Scanner related classes
  * @include: arrow-dataset-glib/arrow-dataset-glib.h
  *
- * #GADatasetScanOptions is a class for a set of scan options.
- *
- * #GADatasetScanTask is an abstract class for a scan task.
+ * #GADatasetScanner is a class for scanning dataset.
  *
- * #GADatasetInMemoryScanTask is a class for a scan task of record batches.
+ * #GADatasetScannerBuilder is a class for building a scanner.
  *
- * Since: 1.0.0
+ * Since: 5.0.0
  */
 
-/* arrow::dataset::ScanOptions */
-
-typedef struct GADatasetScanOptionsPrivate_ {
-  std::shared_ptr<arrow::dataset::ScanOptions> scan_options;
-} GADatasetScanOptionsPrivate;
+typedef struct GADatasetScannerPrivate_ {
+  std::shared_ptr<arrow::dataset::Scanner> scanner;
+} GADatasetScannerPrivate;
 
 enum {
-  PROP_SCAN_OPTIONS = 1,
-  PROP_FILTER,
-  PROP_EVALUATOR,
-  PROP_PROJECTOR,
-  PROP_BATCH_SIZE,
-  PROP_USE_THREADS,
+  PROP_SCANNER = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanOptions,
-                           gadataset_scan_options,
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner,
+                           gadataset_scanner,
                            G_TYPE_OBJECT)
 
-#define GADATASET_SCAN_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GADatasetScanOptionsPrivate *>(         \
-    gadataset_scan_options_get_instance_private(      \
-      GADATASET_SCAN_OPTIONS(obj)))
+#define GADATASET_SCANNER_GET_PRIVATE(obj)        \
+  static_cast<GADatasetScannerPrivate *>(         \
+    gadataset_scanner_get_instance_private(       \
+      GADATASET_SCANNER(obj)))
 
 static void
-gadataset_scan_options_finalize(GObject *object)
+gadataset_scanner_finalize(GObject *object)
 {
-  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
-
-  priv->scan_options.~shared_ptr();
-
-  G_OBJECT_CLASS(gadataset_scan_options_parent_class)->finalize(object);
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+  priv->scanner.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_scanner_parent_class)->finalize(object);
 }
 
 static void
-gadataset_scan_options_set_property(GObject *object,
-                                    guint prop_id,
-                                    const GValue *value,
-                                    GParamSpec *pspec)
+gadataset_scanner_set_property(GObject *object,
+                               guint prop_id,
+                               const GValue *value,
+                               GParamSpec *pspec)
 {
-  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_SCAN_OPTIONS:
-    priv->scan_options =
-      *static_cast<std::shared_ptr<arrow::dataset::ScanOptions> *>(
+  case PROP_SCANNER:
+    priv->scanner =
+      *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>(
         g_value_get_pointer(value));
     break;
-  case PROP_BATCH_SIZE:
-    priv->scan_options->batch_size = g_value_get_int64(value);
-    break;
-  case PROP_USE_THREADS:
-    priv->scan_options->use_threads = g_value_get_boolean(value);
-    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -104,193 +84,92 @@ gadataset_scan_options_set_property(GObject *object,
 }
 
 static void
-gadataset_scan_options_get_property(GObject *object,
-                                    guint prop_id,
-                                    GValue *value,
-                                    GParamSpec *pspec)
+gadataset_scanner_init(GADatasetScanner *object)
 {
-  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_BATCH_SIZE:
-    g_value_set_int64(value, priv->scan_options->batch_size);
-    break;
-  case PROP_USE_THREADS:
-    g_value_set_boolean(value, priv->scan_options->use_threads);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
+  new(&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>;
 }
 
 static void
-gadataset_scan_options_init(GADatasetScanOptions *object)
+gadataset_scanner_class_init(GADatasetScannerClass *klass)
 {
-  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(object);
-  new(&priv->scan_options) std::shared_ptr<arrow::dataset::ScanOptions>;
-}
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize     = gadataset_scanner_finalize;
+  gobject_class->set_property = gadataset_scanner_set_property;
 
-static void
-gadataset_scan_options_class_init(GADatasetScanOptionsClass *klass)
-{
-  GObjectClass *gobject_class;
   GParamSpec *spec;
-
-  gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize     = gadataset_scan_options_finalize;
-  gobject_class->set_property = gadataset_scan_options_set_property;
-  gobject_class->get_property = gadataset_scan_options_get_property;
-
-  auto scan_options = std::make_shared<arrow::dataset::ScanOptions>();
-
-  spec = g_param_spec_pointer("scan-options",
-                              "ScanOptions",
-                              "The raw std::shared<arrow::dataset::ScanOptions> *",
+  spec = g_param_spec_pointer("scanner",
+                              "Scanner",
+                              "The raw std::shared<arrow::dataset::Scanner> *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_SCAN_OPTIONS, spec);
-
-  // TODO: PROP_FILTER
-  // TODO: PROP_EVALUATOR
-  // TODO: PROP_PROJECTOR
-
-  /**
-   * GADatasetScanOptions:batch-size:
-   *
-   * Maximum row count for scanned batches.
-   *
-   * Since: 1.0.0
-   */
-  spec = g_param_spec_int64("batch-size",
-                            "Batch size",
-                            "Maximum row count for scanned batches",
-                            0,
-                            G_MAXINT64,
-                            scan_options->batch_size,
-                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_BATCH_SIZE, spec);
-
-  /**
-   * GADatasetScanOptions:use-threads:
-   *
-   * Indicate if the Scanner should make use of a ThreadPool.
-   *
-   * Since: 4.0.0
-   */
-  spec = g_param_spec_boolean("use-threads",
-                              "Use threads",
-                              "Indicate if the Scanner should make use of a ThreadPool",
-                              scan_options->use_threads,
-                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_USE_THREADS, spec);
+  g_object_class_install_property(gobject_class, PROP_SCANNER, spec);
 }
 
 /**
- * gadataset_scan_options_new:
- * @schema: A #GArrowSchema.
- *
- * Returns: A newly created #GADatasetScanOptions.
- *
- * Since: 1.0.0
- */
-GADatasetScanOptions *
-gadataset_scan_options_new(GArrowSchema *schema)
-{
-  auto arrow_schema = garrow_schema_get_raw(schema);
-  auto arrow_scan_options = std::make_shared<arrow::dataset::ScanOptions>();
-  arrow_scan_options->dataset_schema = arrow_schema;
-  return gadataset_scan_options_new_raw(&arrow_scan_options);
-}
-
-/**
- * gadataset_scan_options_get_schema:
- * @scan_options: A #GADatasetScanOptions.
+ * gadataset_scanner_to_table:
+ * @scanner: A #GADatasetScanner.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (transfer full): A #GArrowSchema.
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GArrowTable on success, %NULL on error.
  *
- * Since: 1.0.0
+ * Since: 5.0.0
  */
-GArrowSchema *
-gadataset_scan_options_get_schema(GADatasetScanOptions *scan_options)
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+                           GError **error)
 {
-  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(scan_options);
-  auto arrow_schema = priv->scan_options->dataset_schema;
-  return garrow_schema_new_raw(&arrow_schema);
+  auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+  auto arrow_table_result = arrow_scanner->ToTable();
+  if (garrow::check(error, arrow_table_result, "[scanner][to-table]")) {
+    auto arrow_table = *arrow_table_result;
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
 }
 
-/* arrow::dataset::ScanTask */
 
-typedef struct GADatasetScanTaskPrivate_ {
-  std::shared_ptr<arrow::dataset::ScanTask> scan_task;
-  GADatasetScanOptions *options;
-  GADatasetFragment *fragment;
-} GADatasetScanTaskPrivate;
+typedef struct GADatasetScannerBuilderPrivate_ {
+  std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
+} GADatasetScannerBuilderPrivate;
 
 enum {
-  PROP_SCAN_TASK = 1,
-  PROP_OPTIONS,
-  PROP_FRAGMENT,
+  PROP_SCANNER_BUILDER = 1,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetScanTask,
-                                    gadataset_scan_task,
-                                    G_TYPE_OBJECT)
-
-#define GADATASET_SCAN_TASK_GET_PRIVATE(obj)          \
-  static_cast<GADatasetScanTaskPrivate *>(            \
-    gadataset_scan_task_get_instance_private(         \
-      GADATASET_SCAN_TASK(obj)))
-
-static void
-gadataset_scan_task_dispose(GObject *object)
-{
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
-
-  if (priv->options) {
-    g_object_unref(priv->options);
-    priv->options = NULL;
-  }
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScannerBuilder,
+                           gadataset_scanner_builder,
+                           G_TYPE_OBJECT)
 
-  if (priv->fragment) {
-    g_object_unref(priv->fragment);
-    priv->fragment = NULL;
-  }
-
-  G_OBJECT_CLASS(gadataset_scan_task_parent_class)->dispose(object);
-}
+#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj)        \
+  static_cast<GADatasetScannerBuilderPrivate *>(          \
+    gadataset_scanner_builder_get_instance_private(       \
+      GADATASET_SCANNER_BUILDER(obj)))
 
 static void
-gadataset_scan_task_finalize(GObject *object)
+gadataset_scanner_builder_finalize(GObject *object)
 {
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
-
-  priv->scan_task.~shared_ptr();
-
-  G_OBJECT_CLASS(gadataset_scan_task_parent_class)->finalize(object);
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+  priv->scanner_builder.~shared_ptr();
+  G_OBJECT_CLASS(gadataset_scanner_builder_parent_class)->finalize(object);
 }
 
 static void
-gadataset_scan_task_set_property(GObject *object,
-                           guint prop_id,
-                           const GValue *value,
-                           GParamSpec *pspec)
+gadataset_scanner_builder_set_property(GObject *object,
+                                       guint prop_id,
+                                       const GValue *value,
+                                       GParamSpec *pspec)
 {
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_SCAN_TASK:
-    priv->scan_task =
-      *static_cast<std::shared_ptr<arrow::dataset::ScanTask> *>(
+  case PROP_SCANNER_BUILDER:
+    priv->scanner_builder =
+      *static_cast<std::shared_ptr<arrow::dataset::ScannerBuilder> *>(
         g_value_get_pointer(value));
     break;
-  case PROP_OPTIONS:
-    priv->options = GADATASET_SCAN_OPTIONS(g_value_dup_object(value));
-    break;
-  case PROP_FRAGMENT:
-    priv->fragment = GADATASET_FRAGMENT(g_value_dup_object(value));
-    break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -298,230 +177,112 @@ gadataset_scan_task_set_property(GObject *object,
 }
 
 static void
-gadataset_scan_task_get_property(GObject *object,
-                           guint prop_id,
-                           GValue *value,
-                           GParamSpec *pspec)
+gadataset_scanner_builder_init(GADatasetScannerBuilder *object)
 {
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_OPTIONS:
-    g_value_set_object(value, priv->options);
-    break;
-  case PROP_FRAGMENT:
-    g_value_set_object(value, priv->fragment);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
+  new(&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>;
 }
 
 static void
-gadataset_scan_task_init(GADatasetScanTask *object)
-{
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(object);
-  new(&priv->scan_task) std::shared_ptr<arrow::dataset::ScanTask>;
-}
-
-static void
-gadataset_scan_task_class_init(GADatasetScanTaskClass *klass)
+gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->dispose      = gadataset_scan_task_dispose;
-  gobject_class->finalize     = gadataset_scan_task_finalize;
-  gobject_class->set_property = gadataset_scan_task_set_property;
-  gobject_class->get_property = gadataset_scan_task_get_property;
+  gobject_class->finalize     = gadataset_scanner_builder_finalize;
+  gobject_class->set_property = gadataset_scanner_builder_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("scan-task",
-                              "ScanTask",
-                              "The raw std::shared<arrow::dataset::ScanTask> *",
+  spec = g_param_spec_pointer("scanner-builder",
+                              "Scanner builder",
+                              "The raw "
+                              "std::shared<arrow::dataset::ScannerBuilder> *",
                               static_cast<GParamFlags>(G_PARAM_WRITABLE |
                                                        G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_SCAN_TASK, spec);
-
-  /**
-   * GADatasetScanTask:options:
-   *
-   * The options of the scan task.
-   *
-   * Since: 1.0.0
-   */
-  spec = g_param_spec_object("options",
-                             "Options",
-                             "The options of the scan task",
-                             GADATASET_TYPE_SCAN_OPTIONS,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_OPTIONS, spec);
-
-  /**
-   * GADatasetScanTask:fragment:
-   *
-   * The fragment of the scan task.
-   *
-   * Since: 4.0.0
-   */
-  spec = g_param_spec_object("fragment",
-                             "Fragment",
-                             "The fragment of the scan task",
-                             GADATASET_TYPE_FRAGMENT,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_FRAGMENT, spec);
+  g_object_class_install_property(gobject_class, PROP_SCANNER_BUILDER, spec);
 }
 
 /**
- * gadataset_scan_task_get_options:
- * @scan_task: A #GADatasetScanTask.
- *
- * Returns: (transfer full): A #GADatasetScanOptions.
- *
- * Since: 1.0.0
- */
-GADatasetScanOptions *
-gadataset_scan_task_get_options(GADatasetScanTask *scan_task)
-{
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(scan_task);
-  if (priv->options) {
-    g_object_ref(priv->options);
-    return priv->options;
-  }
-
-  auto arrow_options = priv->scan_task->options();
-  return gadataset_scan_options_new_raw(&arrow_options);
-}
-
-/**
- * gadataset_scan_task_get_fragment:
- * @scan_task: A #GADatasetFragment.
+ * gadataset_scanner_builder_new:
+ * @dataset: A #GADatasetDatast to be scanned.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (transfer full): A #GADatasetFragment.
+ * Returns: (nullable): A newly created #GADatasetScannerBuilder on success,
+ *   %NULL on error.
  *
- * Since: 4.0.0
+ * Since: 5.0.0
  */
-GADatasetFragment *
-gadataset_scan_task_get_fragment(GADatasetScanTask *scan_task)
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error)
 {
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(scan_task);
-  if (priv->fragment) {
-    g_object_ref(priv->fragment);
-    return priv->fragment;
+  auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+  auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+  if (garrow::check(error,
+                    arrow_scanner_builder_result,
+                    "[scanner-builder][new]")) {
+    auto arrow_scanner_builder = *arrow_scanner_builder_result;
+    return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
+  } else {
+    return NULL;
   }
-
-  auto arrow_fragment = priv->scan_task->fragment();
-  return gadataset_fragment_new_raw(&arrow_fragment);
 }
 
 /**
- * gadataset_scan_task_execute:
- * @scan_task: A #GADatasetScanTask.
+ * gadataset_scanner_builder_finish:
+ * @builder: A #GADatasetScannerBuilder.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): A newly created #GArrowRecordBatchIterator,
- *   or %NULL on error.
+ * Returns: (transfer full) (nullable):
+ *   A newly created #GADatasetScanner on success, %NULL on error.
  *
- * Since: 1.0.0
+ * Since: 5.0.0
  */
-GArrowRecordBatchIterator *
-gadataset_scan_task_execute(GADatasetScanTask *scan_task,
-                            GError **error)
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+                                 GError **error)
 {
-  auto priv = GADATASET_SCAN_TASK_GET_PRIVATE(scan_task);
-  auto arrow_result = priv->scan_task->Execute();
-  if (garrow::check(error, arrow_result, "[datasets][scan-task][execute]")) {
-    auto arrow_record_batch_iteraor = std::move(*arrow_result);
-    return garrow_record_batch_iterator_new_raw(&arrow_record_batch_iteraor);
+  auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
+  auto arrow_scanner_result = arrow_builder->Finish();
+  if (garrow::check(error, arrow_scanner_result, "[scanner-builder][finish]")) {
+    auto arrow_scanner = *arrow_scanner_result;
+    return gadataset_scanner_new_raw(&arrow_scanner);
   } else {
     return NULL;
   }
 }
 
-/* arrow::dataset::InMemoryScanTask */
-
-G_DEFINE_TYPE(GADatasetInMemoryScanTask,
-              gadataset_in_memory_scan_task,
-              GADATASET_TYPE_SCAN_TASK)
-
-static void
-gadataset_in_memory_scan_task_init(GADatasetInMemoryScanTask *object)
-{
-}
 
-static void
-gadataset_in_memory_scan_task_class_init(GADatasetInMemoryScanTaskClass *klass)
-{
-}
+G_END_DECLS
 
-/**
- * gadataset_in_memory_scan_task_new:
- * @record_batches: (array length=n_record_batches):
- *   (element-type GArrowRecordBatch): The record batches of the table.
- * @n_record_batches: The number of record batches.
- * @options: A #GADatasetScanOptions.
- * @fragment: A #GADatasetInMemoryFragment.
- *
- * Returns: A newly created #GADatasetInMemoryScanTask.
- *
- * Since: 1.0.0
- */
-GADatasetInMemoryScanTask *
-gadataset_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
-                                  gsize n_record_batches,
-                                  GADatasetScanOptions *options,
-                                  GADatasetInMemoryFragment *fragment)
+GADatasetScanner *
+gadataset_scanner_new_raw(
+  std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner)
 {
-  std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
-  arrow_record_batches.reserve(n_record_batches);
-  for (gsize i = 0; i < n_record_batches; ++i) {
-    auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]);
-    arrow_record_batches.push_back(arrow_record_batch);
-  }
-  auto arrow_options = gadataset_scan_options_get_raw(options);
-  auto arrow_fragment = gadataset_fragment_get_raw(GADATASET_FRAGMENT(fragment));
-  auto arrow_in_memory_scan_task =
-    std::make_shared<arrow::dataset::InMemoryScanTask>(arrow_record_batches,
-                                                       arrow_options,
-                                                       arrow_fragment);
-  return gadataset_in_memory_scan_task_new_raw(&arrow_in_memory_scan_task,
-                                         options,
-                                         fragment);
+  auto scanner =
+    GADATASET_SCANNER(g_object_new(GADATASET_TYPE_SCANNER,
+                                   "scanner", arrow_scanner,
+                                   NULL));
+  return scanner;
 }
 
-G_END_DECLS
-
-GADatasetScanOptions *
-gadataset_scan_options_new_raw(
-  std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options)
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner)
 {
-  auto scan_options =
-    GADATASET_SCAN_OPTIONS(g_object_new(GADATASET_TYPE_SCAN_OPTIONS,
-                                  "scan-options", arrow_scan_options,
-                                  NULL));
-  return scan_options;
+  auto priv = GADATASET_SCANNER_GET_PRIVATE(scanner);
+  return priv->scanner;
 }
 
-std::shared_ptr<arrow::dataset::ScanOptions>
-gadataset_scan_options_get_raw(GADatasetScanOptions *scan_options)
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+  std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder)
 {
-  auto priv = GADATASET_SCAN_OPTIONS_GET_PRIVATE(scan_options);
-  return priv->scan_options;
+  return GADATASET_SCANNER_BUILDER(
+    g_object_new(GADATASET_TYPE_SCANNER_BUILDER,
+                 "scanner-builder", arrow_scanner_builder,
+                 NULL));
 }
 
-GADatasetInMemoryScanTask *
-gadataset_in_memory_scan_task_new_raw(
-  std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
-  GADatasetScanOptions *options,
-  GADatasetInMemoryFragment *fragment)
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder)
 {
-  auto in_memory_scan_task =
-    GADATASET_IN_MEMORY_SCAN_TASK(g_object_new(GADATASET_TYPE_IN_MEMORY_SCAN_TASK,
-                                         "scan-task", arrow_in_memory_scan_task,
-                                         "options", options,
-                                         "fragment", fragment,
-                                         NULL));
-  return in_memory_scan_task;
+  auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(scanner_builder);
+  return priv->scanner_builder;
 }
diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h
index 90a60363e82..446815d6db1 100644
--- a/c_glib/arrow-dataset-glib/scanner.h
+++ b/c_glib/arrow-dataset-glib/scanner.h
@@ -19,76 +19,45 @@
 
 #pragma once
 
-#include <arrow-glib/arrow-glib.h>
-
+#include <arrow-dataset-glib/dataset.h>
 #include <arrow-dataset-glib/fragment.h>
 
 G_BEGIN_DECLS
 
-/* arrow::dataset::ScanOptions */
-
-#define GADATASET_TYPE_SCAN_OPTIONS (gadataset_scan_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetScanOptions,
-                         gadataset_scan_options,
+#define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScanner,
+                         gadataset_scanner,
                          GADATASET,
-                         SCAN_OPTIONS,
+                         SCANNER,
                          GObject)
-struct _GADatasetScanOptionsClass
+struct _GADatasetScannerClass
 {
   GObjectClass parent_class;
 };
 
+GARROW_AVAILABLE_IN_5_0
+GArrowTable *
+gadataset_scanner_to_table(GADatasetScanner *scanner,
+                           GError **error);
 
-GARROW_AVAILABLE_IN_1_0
-GADatasetScanOptions *
-gadataset_scan_options_new(GArrowSchema *schema);
-GARROW_AVAILABLE_IN_1_0
-GArrowSchema *
-gadataset_scan_options_get_schema(GADatasetScanOptions *scan_options);
-
-/* arrow::dataset::ScanTask */
-
-#define GADATASET_TYPE_SCAN_TASK (gadataset_scan_task_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetScanTask,
-                         gadataset_scan_task,
+#define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type())
+G_DECLARE_DERIVABLE_TYPE(GADatasetScannerBuilder,
+                         gadataset_scanner_builder,
                          GADATASET,
-                         SCAN_TASK,
+                         SCANNER_BUILDER,
                          GObject)
-struct _GADatasetScanTaskClass
+struct _GADatasetScannerBuilderClass
 {
   GObjectClass parent_class;
 };
 
-GARROW_AVAILABLE_IN_1_0
-GADatasetScanOptions *
-gadataset_scan_task_get_options(GADatasetScanTask *scan_task);
-GARROW_AVAILABLE_IN_4_0
-GADatasetFragment *
-gadataset_scan_task_get_fragment(GADatasetScanTask *scan_task);
-GARROW_AVAILABLE_IN_1_0
-GArrowRecordBatchIterator *
-gadataset_scan_task_execute(GADatasetScanTask *scan_task,
-                            GError **error);
-
-/* arrow::dataset::InMemoryScanTask */
-
-#define GADATASET_TYPE_IN_MEMORY_SCAN_TASK      \
-  (gadataset_in_memory_scan_task_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryScanTask,
-                         gadataset_in_memory_scan_task,
-                         GADATASET,
-                         IN_MEMORY_SCAN_TASK,
-                         GADatasetScanTask)
-struct _GADatasetInMemoryScanTaskClass
-{
-  GADatasetScanTaskClass parent_class;
-};
-
-GARROW_AVAILABLE_IN_1_0
-GADatasetInMemoryScanTask *
-gadataset_in_memory_scan_task_new(GArrowRecordBatch **record_batches,
-                                  gsize n_record_batches,
-                                  GADatasetScanOptions *options,
-                                  GADatasetInMemoryFragment *fragment);
+GARROW_AVAILABLE_IN_5_0
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new(GADatasetDataset *dataset,
+                              GError **error);
+GARROW_AVAILABLE_IN_5_0
+GADatasetScanner *
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
+                                 GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/scanner.hpp b/c_glib/arrow-dataset-glib/scanner.hpp
index ad3ac6a03cd..663ab6fc44b 100644
--- a/c_glib/arrow-dataset-glib/scanner.hpp
+++ b/c_glib/arrow-dataset-glib/scanner.hpp
@@ -24,14 +24,14 @@
 #include <arrow-dataset-glib/fragment.h>
 #include <arrow-dataset-glib/scanner.h>
 
-GADatasetScanOptions *
-gadataset_scan_options_new_raw(
-  std::shared_ptr<arrow::dataset::ScanOptions> *arrow_scan_options);
-std::shared_ptr<arrow::dataset::ScanOptions>
-gadataset_scan_options_get_raw(GADatasetScanOptions *scan_options);
+GADatasetScanner *
+gadataset_scanner_new_raw(
+  std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner);
+std::shared_ptr<arrow::dataset::Scanner>
+gadataset_scanner_get_raw(GADatasetScanner *scanner);
 
-GADatasetInMemoryScanTask *
-gadataset_in_memory_scan_task_new_raw(
-  std::shared_ptr<arrow::dataset::InMemoryScanTask> *arrow_in_memory_scan_task,
-  GADatasetScanOptions *scan_options,
-  GADatasetInMemoryFragment *fragment);
+GADatasetScannerBuilder *
+gadataset_scanner_builder_new_raw(
+  std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder);
+std::shared_ptr<arrow::dataset::ScannerBuilder>
+gadataset_scanner_builder_get_raw(GADatasetScannerBuilder *scanner_builder);
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index d5b221a36b0..1eb65b88964 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -221,9 +221,9 @@ garrow_equal_options_set_property(GObject *object,
 
 static void
 garrow_equal_options_get_property(GObject *object,
-                                 guint prop_id,
-                                 GValue *value,
-                                 GParamSpec *pspec)
+                                  guint prop_id,
+                                  GValue *value,
+                                  GParamSpec *pspec)
 {
   auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
 
diff --git a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
index 9a1ae059378..3e8da5bd9d1 100644
--- a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
+++ b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
@@ -36,9 +36,15 @@
     </releaseinfo>
   </bookinfo>
 
-  <part id="read">
-    <title>Read</title>
-    <chapter id="scan">
+  <part id="data">
+    <title>Data</title>
+    <chapter id="source">
+      <title>Dataset</title>
+      <xi:include href="xml/dataset.xml"/>
+      <title>Dataset factory</title>
+      <xi:include href="xml/dataset-factory.xml"/>
+    </chapter>
+    <chapter id="read">
       <title>Scan</title>
       <xi:include href="xml/scanner.xml"/>
       <title>Fragment</title>
@@ -60,6 +66,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-5-0-0" role="5.0.0">
+    <title>Index of new symbols in 4.0.0</title>
+    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-4-0-0" role="4.0.0">
     <title>Index of new symbols in 4.0.0</title>
     <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
@@ -68,9 +78,5 @@
     <title>Index of new symbols in 3.0.0</title>
     <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include>
   </index>
-  <index id="api-index-1-0-0" role="1.0.0">
-    <title>Index of new symbols in 1.0.0</title>
-    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
-  </index>
   <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
 </book>
diff --git a/c_glib/test/dataset/test-file-system-dataset-factory.rb b/c_glib/test/dataset/test-file-system-dataset-factory.rb
new file mode 100644
index 00000000000..9ef629c222e
--- /dev/null
+++ b/c_glib/test/dataset/test-file-system-dataset-factory.rb
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDatasetFileSystemDatasetFactory < Test::Unit::TestCase
+  include Helper::Buildable
+  include Helper::Writable
+
+  def setup
+    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+    Dir.mktmpdir do |tmpdir|
+      @dir = tmpdir
+      @path = File.join(@dir, "table.arrow")
+      @table = build_table(visible: [
+                             build_boolean_array([true, false, true]),
+                             build_boolean_array([false, true, false, true]),
+                           ],
+                           point: [
+                             build_int32_array([1, 2, 3]),
+                             build_int32_array([-1, -2, -3, -4]),
+                           ])
+      @format = ArrowDataset::IPCFileFormat.new
+      write_table(@table, @path)
+      yield
+    end
+  end
+
+  def test_file_system
+    factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+    factory.file_system = Arrow::LocalFileSystem.new
+    factory.add_path(File.expand_path(@path))
+    dataset = factory.finish
+    assert_equal(@table, dataset.to_table)
+  end
+
+  def test_file_system_uri
+    factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+    factory.file_system_uri = build_file_uri(@path)
+    dataset = factory.finish
+    assert_equal(@table, dataset.to_table)
+  end
+end
diff --git a/ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb b/c_glib/test/dataset/test-file-system-dataset.rb
similarity index 64%
rename from ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb
rename to c_glib/test/dataset/test-file-system-dataset.rb
index 37f041d3159..6d6ec3b18c6 100644
--- a/ruby/red-arrow-dataset/test/test-in-memory-scan-task.rb
+++ b/c_glib/test/dataset/test-file-system-dataset.rb
@@ -15,19 +15,20 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestInMemoryScanTask < Test::Unit::TestCase
+class TestDatasetFileSystemDataset < Test::Unit::TestCase
   def setup
-    @record_batches = [
-      Arrow::RecordBatch.new(visible: [true, false, true],
-                             point: [1, 2, 3]),
-    ]
+    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+    Dir.mktmpdir do |tmpdir|
+      @dir = tmpdir
+      format = ArrowDataset::IPCFileFormat.new
+      factory = ArrowDataset::FileSystemDatasetFactory.new(format)
+      factory.file_system = Arrow::LocalFileSystem.new
+      @dataset = factory.finish
+      yield
+    end
   end
 
-  sub_test_case(".new") do
-    test("[[Arrow::RecordBatch]]") do
-      scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches)
-      assert_equal(@record_batches,
-                   scan_task.execute.to_a)
-    end
+  def test_type_name
+    assert_equal("filesystem", @dataset.type_name)
   end
 end
diff --git a/c_glib/test/dataset/test-in-memory-scan-task.rb b/c_glib/test/dataset/test-in-memory-scan-task.rb
deleted file mode 100644
index 06e3d0d2424..00000000000
--- a/c_glib/test/dataset/test-in-memory-scan-task.rb
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestDatasetInMemoryScanTask < Test::Unit::TestCase
-  include Helper::Buildable
-
-  def setup
-    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
-    fields = [
-      Arrow::Field.new("visible", Arrow::BooleanDataType.new),
-      Arrow::Field.new("point", Arrow::Int32DataType.new),
-    ]
-    @schema = Arrow::Schema.new(fields)
-    @record_batches = [
-      [
-        build_boolean_array([true, false, true]),
-        build_int32_array([1, 2, 3]),
-      ],
-      [
-        build_boolean_array([false, true, false, true]),
-        build_int32_array([-1, -2, -3, -4]),
-      ]
-    ].collect do |columns|
-      Arrow::RecordBatch.new(@schema, columns[0].length, columns)
-    end
-
-    @scan_options = ArrowDataset::ScanOptions.new(@schema)
-
-    @fragment = ArrowDataset::InMemoryFragment.new(@schema,
-                                                   @record_batches)
-
-    @scan_task = ArrowDataset::InMemoryScanTask.new(@record_batches,
-                                                    @scan_options,
-                                                    @fragment)
-  end
-
-  def test_scan_options
-    assert_equal(@scan_options, @scan_task.options)
-  end
-
-  def test_execute
-    assert_equal(@record_batches,
-                 @scan_task.execute.to_list)
-  end
-end
diff --git a/c_glib/test/dataset/test-scan-options.rb b/c_glib/test/dataset/test-scan-options.rb
deleted file mode 100644
index 0536b2a7cca..00000000000
--- a/c_glib/test/dataset/test-scan-options.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestDatasetScanOptions < Test::Unit::TestCase
-  def setup
-    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
-    @schema = Arrow::Schema.new([])
-    @scan_options = ArrowDataset::ScanOptions.new(@schema)
-  end
-
-  def test_schema
-    assert_equal(@schema,
-                 @scan_options.schema)
-  end
-
-  def test_batch_size
-    assert_equal(1<<20,
-                 @scan_options.batch_size)
-    @scan_options.batch_size = 42
-    assert_equal(42,
-                 @scan_options.batch_size)
-  end
-
-  def test_use_threads
-    assert do
-      not @scan_options.use_threads?
-    end
-    @scan_options.use_threads = true
-    assert do
-      @scan_options.use_threads?
-    end
-  end
-end
diff --git a/c_glib/test/dataset/test-scanner.rb b/c_glib/test/dataset/test-scanner.rb
new file mode 100644
index 00000000000..f7702d4905f
--- /dev/null
+++ b/c_glib/test/dataset/test-scanner.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDatasetScanner < Test::Unit::TestCase
+  include Helper::Buildable
+  include Helper::Writable
+
+  def setup
+    omit("Arrow Dataset is required") unless defined?(ArrowDataset)
+    Dir.mktmpdir do |tmpdir|
+      path = File.join(tmpdir, "table.arrow")
+      @table = build_table(visible: [
+                             build_boolean_array([true, false, true]),
+                             build_boolean_array([false, true, false, true]),
+                           ],
+                           point: [
+                             build_int32_array([1, 2, 3]),
+                             build_int32_array([-1, -2, -3, -4]),
+                           ])
+      @format = ArrowDataset::IPCFileFormat.new
+      write_table(@table, path)
+      factory = ArrowDataset::FileSystemDatasetFactory.new(@format)
+      factory.file_system_uri = build_file_uri(path)
+      @dataset = factory.finish
+      builder = @dataset.begin_scan
+      @scanner = builder.finish
+      yield
+    end
+  end
+
+  def test_to_table
+    assert_equal(@table, @scanner.to_table)
+  end
+end
diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb
index 04ae22f8715..356fa651c6a 100644
--- a/c_glib/test/helper/buildable.rb
+++ b/c_glib/test/helper/buildable.rb
@@ -205,7 +205,15 @@ def append_to_builder(builder, value)
     def build_table(columns)
       fields = []
       chunked_arrays = []
-      columns.each do |name, chunked_array|
+      columns.each do |name, data|
+        case data
+        when Arrow::Array
+          chunked_array = Arrow::ChunkedArray.new([data])
+        when Array
+          chunked_array = Arrow::ChunkedArray.new(data)
+        else
+          chunked_array = data
+        end
         fields << Arrow::Field.new(name, chunked_array.value_data_type)
         chunked_arrays << chunked_array
       end
@@ -222,6 +230,15 @@ def build_record_batch(columns)
       Arrow::RecordBatch.new(schema, n_rows, columns.values)
     end
 
+    def build_file_uri(path)
+      absolute_path = File.expand_path(path)
+      if absolute_path.start_with?("/")
+        "file://#{absolute_path}"
+      else
+        "file:///#{absolute_path}"
+      end
+    end
+
     private
     def build_array(builder, values)
       values.each do |value|
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb b/c_glib/test/helper/writable.rb
similarity index 63%
rename from ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb
rename to c_glib/test/helper/writable.rb
index 917d6c79d0d..0053e972f91 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-fragment.rb
+++ b/c_glib/test/helper/writable.rb
@@ -15,18 +15,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
-module ArrowDataset
-  class InMemoryFragment
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-    def initialize(schema, record_batches)
-      record_batches = record_batches.collect do |record_batch|
-        unless record_batch.is_a?(Arrow::RecordBatch)
-          record_batch = Arrow::RecordBatch.new(record_batch)
+module Helper
+  module Writable
+    def write_table(table, path, type: :file)
+      output = Arrow::FileOutputStream.new(path, false)
+      begin
+        if type == :file
+          writer_class = Arrow::RecordBatchFileWriter
+        else
+          writer_class = Arrow::RecordBatchStreamWriter
         end
-        record_batch
+        writer = writer_class.new(output, table.schema)
+        begin
+          writer.write_table(table)
+        ensure
+          writer.close
+        end
+      ensure
+        output.close
       end
-      initialize_raw(schema, record_batches)
     end
   end
 end
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 044cb33a019..9c6af05224e 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -83,10 +83,11 @@ class BooleanScalar
 require_relative "helper/buildable"
 require_relative "helper/data-type"
 require_relative "helper/fixture"
-require_relative "helper/omittable"
-require_relative "helper/plasma-store"
 if defined?(ArrowFlight)
   require_relative "helper/flight-server"
 end
+require_relative "helper/omittable"
+require_relative "helper/plasma-store"
+require_relative "helper/writable"
 
 exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
diff --git a/cpp/src/arrow/dataset/discovery.h b/cpp/src/arrow/dataset/discovery.h
index 5559638448f..40c02051955 100644
--- a/cpp/src/arrow/dataset/discovery.h
+++ b/cpp/src/arrow/dataset/discovery.h
@@ -237,16 +237,23 @@ class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
                                                       std::shared_ptr<FileFormat> format,
                                                       FileSystemFactoryOptions options);
 
+  /// \brief Build a FileSystemDatasetFactory from an explicit list of
+  /// file information.
+  ///
+  /// \param[in] filesystem passed to FileSystemDataset
+  /// \param[in] files passed to FileSystemDataset
+  /// \param[in] format passed to FileSystemDataset
+  /// \param[in] options see FileSystemFactoryOptions for more information.
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      std::shared_ptr<fs::FileSystem> filesystem, const std::vector<fs::FileInfo>& files,
+      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
+
   Result<std::vector<std::shared_ptr<Schema>>> InspectSchemas(
       InspectOptions options) override;
 
   Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) override;
 
  protected:
-  static Result<std::shared_ptr<DatasetFactory>> Make(
-      std::shared_ptr<fs::FileSystem> filesystem, const std::vector<fs::FileInfo>& files,
-      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
-
   FileSystemDatasetFactory(std::vector<fs::FileInfo> files,
                            std::shared_ptr<fs::FileSystem> filesystem,
                            std::shared_ptr<FileFormat> format,
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/scan-options.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
similarity index 69%
rename from ruby/red-arrow-dataset/lib/arrow-dataset/scan-options.rb
rename to ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
index 1467743655b..a658fc3f2e0 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/scan-options.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/dataset.rb
@@ -16,21 +16,13 @@
 # under the License.
 
 module ArrowDataset
-  class ScanOptions
+  class Dataset
     class << self
-      def try_convert(value)
-        case value
-        when Hash
-          return nil unless value.key?(:schema)
-          options = new(value[:schema])
-          value.each do |name, value|
-            next if name == :schema
-            options.__send__("#{name}=", value)
-          end
-          options
-        else
-          nil
-        end
+      def build(*args)
+        factory_class = ArrowDataset.const_get("#{name}Factory")
+        factory = factory_class.new(*args)
+        yield(factory)
+        factory.finish
       end
     end
   end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb
deleted file mode 100644
index 5e127e179c6..00000000000
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/in-memory-scan-task.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module ArrowDataset
-  class InMemoryScanTask
-    alias_method :initialize_raw, :initialize
-    private :initialize_raw
-    def initialize(record_batches, **options)
-      record_batches = record_batches.collect do |record_batch|
-        unless record_batch.is_a?(Arrow::RecordBatch)
-          record_batch = Arrow::RecordBatch.new(record_batch)
-        end
-        record_batch
-      end
-      options[:schema] ||= record_batches.first.schema
-      fragment = options.delete(:fragment)
-      fragment ||= InMemoryFragment.new(options[:schema], record_batches)
-      initialize_raw(record_batches, options, fragment)
-    end
-  end
-end
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
index fcac52d268f..6a0dc5079d8 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/loader.rb
@@ -29,8 +29,7 @@ def post_load(repository, namespace)
     end
 
     def require_libraries
-      require "arrow-dataset/in-memory-scan-task"
-      require "arrow-dataset/scan-options"
+      require "arrow-dataset/dataset"
     end
   end
 end
diff --git a/ruby/red-arrow-dataset/test/helper.rb b/ruby/red-arrow-dataset/test/helper.rb
index 795df3beb01..7231eb1cb64 100644
--- a/ruby/red-arrow-dataset/test/helper.rb
+++ b/ruby/red-arrow-dataset/test/helper.rb
@@ -17,4 +17,6 @@
 
 require "arrow-dataset"
 
+require "tmpdir"
+
 require "test-unit"
diff --git a/ruby/red-arrow-dataset/test/test-scan-options.rb b/ruby/red-arrow-dataset/test/test-file-system-dataset.rb
similarity index 58%
rename from ruby/red-arrow-dataset/test/test-scan-options.rb
rename to ruby/red-arrow-dataset/test/test-file-system-dataset.rb
index a9a947ff88d..17cbcb88d74 100644
--- a/ruby/red-arrow-dataset/test/test-scan-options.rb
+++ b/ruby/red-arrow-dataset/test/test-file-system-dataset.rb
@@ -15,22 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestScanOptions < Test::Unit::TestCase
+class TestFileSystemDataset < Test::Unit::TestCase
   def setup
-    @record_batches = [
-      Arrow::RecordBatch.new(visible: [true, false, true],
-                             point: [1, 2, 3]),
-    ]
-    @schema = @record_batches.first.schema
+    Dir.mktmpdir do |tmpdir|
+      @dir = tmpdir
+      @path = File.join(@dir, "table.arrow")
+      @table = Arrow::Table.new(visible: [true, false, true],
+                                point: [1, 2, 3])
+      @table.save(@path)
+      @format = ArrowDataset::IPCFileFormat.new
+      yield
+    end
   end
 
-  sub_test_case(".try_convert") do
-    def test_hash
-      batch_size = 1024
-      context = ArrowDataset::ScanOptions.try_convert(schema: @schema,
-                                                      batch_size: batch_size)
-      assert_equal([@schema, batch_size],
-                   [context.schema, context.batch_size])
+  test(".build") do
+    dataset = ArrowDataset::FileSystemDataset.build(@format) do |factory|
+      factory.file_system = Arrow::LocalFileSystem.new
+      factory.add_path(File.expand_path(@path))
     end
+    assert_equal(@table, dataset.to_table)
   end
 end

From 59c5781cbb7c3f806f34b3400d2f4d896c902686 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 16 Jun 2021 11:00:11 +0200
Subject: [PATCH 421/719] ARROW-13036: [Doc] Mention recommended file
 extension(s) for Arrow IPC

See JIRA

Closes #10512 from westonpace/feature/ARROW-13036--doc-mention-recommended-file-extension-s-for-ar

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/format/Columnar.rst | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 102c3a73317..52920a49b35 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -1006,19 +1006,21 @@ message flatbuffer is read, you can then read the message body.
 
 The stream writer can signal end-of-stream (EOS) either by writing 8 bytes
 containing the 4-byte continuation indicator (``0xFFFFFFFF``) followed by 0
-metadata length (``0x00000000``) or closing the stream interface.
+metadata length (``0x00000000``) or closing the stream interface. We
+recommend the ".arrows" file extension for the streaming format although
+in many cases these streams will not ever be stored as files.
 
 IPC File Format
 ---------------
 
-We define a "file format" supporting random access that is build with
-the stream format. The file starts and ends with a magic string
-``ARROW1`` (plus padding). What follows in the file is identical to
-the stream format. At the end of the file, we write a *footer*
-containing a redundant copy of the schema (which is a part of the
-streaming format) plus memory offsets and sizes for each of the data
-blocks in the file. This enables random access any record batch in the
-file. See `File.fbs`_ for the precise details of the file footer.
+We define a "file format" supporting random access that is an extension of
+the stream format. The file starts and ends with a magic string ``ARROW1``
+(plus padding). What follows in the file is identical to the stream format.
+At the end of the file, we write a *footer* containing a redundant copy of
+the schema (which is a part of the streaming format) plus memory offsets and
+sizes for each of the data blocks in the file. This enables random access to
+any record batch in the file. See `File.fbs`_ for the precise details of the
+file footer.
 
 Schematically we have: ::
 
@@ -1034,8 +1036,9 @@ should be defined in a ``DictionaryBatch`` before they are used in a
 ``RecordBatch``, as long as the keys are defined somewhere in the
 file. Further more, it is invalid to have more than one **non-delta**
 dictionary batch per dictionary ID (i.e. dictionary replacement is not
-supported).  Delta dictionaries are applied in the order they appear in
-the file footer.
+supported). Delta dictionaries are applied in the order they appear in
+the file footer. We recommend the ".arrow" extension for files created with
+this format.
 
 Dictionary Messages
 -------------------

From 4d19225d57bfc3303758a9547995ac70faccc552 Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Wed, 16 Jun 2021 12:58:39 +0200
Subject: [PATCH 422/719] ARROW-12995: [C++] Add validation to CSV options

Closes #10505 from n3world/ARROW-12995-Validate_csv_opts

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/options.cc         | 43 ++++++++++++++++
 cpp/src/arrow/csv/options.h          | 14 ++++++
 cpp/src/arrow/csv/reader.cc          |  8 +++
 cpp/src/arrow/csv/writer.cc          |  2 +
 python/pyarrow/_csv.pyx              | 13 +++++
 python/pyarrow/includes/libarrow.pxd |  8 +++
 python/pyarrow/tests/test_csv.py     | 74 ++++++++++++++++++++++++++++
 7 files changed, 162 insertions(+)

diff --git a/cpp/src/arrow/csv/options.cc b/cpp/src/arrow/csv/options.cc
index a515abf2cf4..c71cfdaf295 100644
--- a/cpp/src/arrow/csv/options.cc
+++ b/cpp/src/arrow/csv/options.cc
@@ -22,6 +22,19 @@ namespace csv {
 
 ParseOptions ParseOptions::Defaults() { return ParseOptions(); }
 
+Status ParseOptions::Validate() const {
+  if (ARROW_PREDICT_FALSE(delimiter == '\n' || delimiter == '\r')) {
+    return Status::Invalid("ParseOptions: delimiter cannot be \\r or \\n");
+  }
+  if (ARROW_PREDICT_FALSE(quoting && (quote_char == '\n' || quote_char == '\r'))) {
+    return Status::Invalid("ParseOptions: quote_char cannot be \\r or \\n");
+  }
+  if (ARROW_PREDICT_FALSE(escaping && (escape_char == '\n' || escape_char == '\r'))) {
+    return Status::Invalid("ParseOptions: escape_char cannot be \\r or \\n");
+  }
+  return Status::OK();
+}
+
 ConvertOptions ConvertOptions::Defaults() {
   auto options = ConvertOptions();
   // Same default null / true / false spellings as in Pandas.
@@ -33,8 +46,38 @@ ConvertOptions ConvertOptions::Defaults() {
   return options;
 }
 
+Status ConvertOptions::Validate() const { return Status::OK(); }
+
 ReadOptions ReadOptions::Defaults() { return ReadOptions(); }
+
+Status ReadOptions::Validate() const {
+  if (ARROW_PREDICT_FALSE(block_size < 1)) {
+    // Min is 1 because some tests use really small block sizes
+    return Status::Invalid("ReadOptions: block_size must be at least 1: ", block_size);
+  }
+  if (ARROW_PREDICT_FALSE(skip_rows < 0)) {
+    return Status::Invalid("ReadOptions: skip_rows cannot be negative: ", skip_rows);
+  }
+  if (ARROW_PREDICT_FALSE(skip_rows_after_names < 0)) {
+    return Status::Invalid("ReadOptions: skip_rows_after_names cannot be negative: ",
+                           skip_rows_after_names);
+  }
+  if (ARROW_PREDICT_FALSE(autogenerate_column_names && !column_names.empty())) {
+    return Status::Invalid(
+        "ReadOptions: autogenerate_column_names cannot be true when column_names are "
+        "provided");
+  }
+  return Status::OK();
+}
+
 WriteOptions WriteOptions::Defaults() { return WriteOptions(); }
 
+Status WriteOptions::Validate() const {
+  if (ARROW_PREDICT_FALSE(batch_size < 1)) {
+    return Status::Invalid("WriteOptions: batch_size must be at least 1: ", batch_size);
+  }
+  return Status::OK();
+}
+
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index d9c94a03f86..790c47fc3f4 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "arrow/csv/type_fwd.h"
+#include "arrow/status.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -59,6 +60,9 @@ struct ARROW_EXPORT ParseOptions {
 
   /// Create parsing options with default values
   static ParseOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 struct ARROW_EXPORT ConvertOptions {
@@ -112,6 +116,9 @@ struct ARROW_EXPORT ConvertOptions {
   /// Create conversion options with default values, including conventional
   /// values for `null_values`, `true_values` and `false_values`
   static ConvertOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 struct ARROW_EXPORT ReadOptions {
@@ -124,6 +131,7 @@ struct ARROW_EXPORT ReadOptions {
   ///
   /// This will determine multi-threading granularity as well as
   /// the size of individual record batches.
+  /// Minimum valid value for block size is 1
   int32_t block_size = 1 << 20;  // 1 MB
 
   /// Number of header rows to skip (not including the row of column names, if any)
@@ -143,6 +151,9 @@ struct ARROW_EXPORT ReadOptions {
 
   /// Create read options with default values
   static ReadOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 /// Experimental
@@ -158,6 +169,9 @@ struct ARROW_EXPORT WriteOptions {
 
   /// Create write options with default values
   static WriteOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
 };
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 068e06178c8..f221ffcadd9 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -1033,6 +1033,9 @@ Result<std::shared_ptr<TableReader>> MakeTableReader(
     MemoryPool* pool, io::IOContext io_context, std::shared_ptr<io::InputStream> input,
     const ReadOptions& read_options, const ParseOptions& parse_options,
     const ConvertOptions& convert_options) {
+  RETURN_NOT_OK(parse_options.Validate());
+  RETURN_NOT_OK(read_options.Validate());
+  RETURN_NOT_OK(convert_options.Validate());
   std::shared_ptr<BaseTableReader> reader;
   if (read_options.use_threads) {
     auto cpu_executor = internal::GetCpuThreadPool();
@@ -1051,6 +1054,9 @@ Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
     io::IOContext io_context, std::shared_ptr<io::InputStream> input,
     internal::Executor* cpu_executor, const ReadOptions& read_options,
     const ParseOptions& parse_options, const ConvertOptions& convert_options) {
+  RETURN_NOT_OK(parse_options.Validate());
+  RETURN_NOT_OK(read_options.Validate());
+  RETURN_NOT_OK(convert_options.Validate());
   std::shared_ptr<BaseStreamingReader> reader;
   reader = std::make_shared<SerialStreamingReader>(
       io_context, cpu_executor, input, read_options, parse_options, convert_options,
@@ -1182,6 +1188,8 @@ Future<int64_t> CountRowsAsync(io::IOContext io_context,
                                internal::Executor* cpu_executor,
                                const ReadOptions& read_options,
                                const ParseOptions& parse_options) {
+  RETURN_NOT_OK(parse_options.Validate());
+  RETURN_NOT_OK(read_options.Validate());
   auto counter = std::make_shared<CSVRowCounter>(
       io_context, cpu_executor, std::move(input), read_options, parse_options);
   return counter->Count();
diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc
index ddd59b46fc1..e1c34a77ae9 100644
--- a/cpp/src/arrow/csv/writer.cc
+++ b/cpp/src/arrow/csv/writer.cc
@@ -414,6 +414,7 @@ class CSVConverter {
 
 Status WriteCSV(const Table& table, const WriteOptions& options, MemoryPool* pool,
                 arrow::io::OutputStream* output) {
+  RETURN_NOT_OK(options.Validate());
   if (pool == nullptr) {
     pool = default_memory_pool();
   }
@@ -424,6 +425,7 @@ Status WriteCSV(const Table& table, const WriteOptions& options, MemoryPool* poo
 
 Status WriteCSV(const RecordBatch& batch, const WriteOptions& options, MemoryPool* pool,
                 arrow::io::OutputStream* output) {
+  RETURN_NOT_OK(options.Validate());
   if (pool == nullptr) {
     pool = default_memory_pool();
   }
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index e7dda3fb953..8ede8272c07 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -58,6 +58,7 @@ cdef class ReadOptions(_Weakrefable):
         How much bytes to process at a time from the input stream.
         This will determine multi-threading granularity as well as
         the size of individual record batches or table chunks.
+        Minimum valid value for block size is 1
     skip_rows: int, optional (default 0)
         The number of rows to skip before the column names (if any)
         and the CSV data.
@@ -189,6 +190,9 @@ cdef class ReadOptions(_Weakrefable):
     def skip_rows_after_names(self, value):
         deref(self.options).skip_rows_after_names = value
 
+    def validate(self):
+        check_status(deref(self.options).Validate())
+
     def equals(self, ReadOptions other):
         return (
             self.use_threads == other.use_threads and
@@ -359,6 +363,9 @@ cdef class ParseOptions(_Weakrefable):
     def ignore_empty_lines(self, value):
         deref(self.options).ignore_empty_lines = value
 
+    def validate(self):
+        check_status(deref(self.options).Validate())
+
     def equals(self, ParseOptions other):
         return (
             self.delimiter == other.delimiter and
@@ -680,6 +687,9 @@ cdef class ConvertOptions(_Weakrefable):
         out.options.reset(new CCSVConvertOptions(move(options)))
         return out
 
+    def validate(self):
+        check_status(deref(self.options).Validate())
+
     def equals(self, ConvertOptions other):
         return (
             self.check_utf8 == other.check_utf8 and
@@ -941,6 +951,9 @@ cdef class WriteOptions(_Weakrefable):
     def batch_size(self, value):
         self.options.batch_size = value
 
+    def validate(self):
+        check_status(self.options.Validate())
+
 
 cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
     if write_options is None:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 35a2034eba4..b1fb04a1f8e 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1592,6 +1592,8 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         @staticmethod
         CCSVParseOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVConvertOptions" arrow::csv::ConvertOptions":
         c_bool check_utf8
         unordered_map[c_string, shared_ptr[CDataType]] column_types
@@ -1613,6 +1615,8 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         @staticmethod
         CCSVConvertOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVReadOptions" arrow::csv::ReadOptions":
         c_bool use_threads
         int32_t block_size
@@ -1627,6 +1631,8 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         @staticmethod
         CCSVReadOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVWriteOptions" arrow::csv::WriteOptions":
         c_bool include_header
         int32_t batch_size
@@ -1634,6 +1640,8 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         @staticmethod
         CCSVWriteOptions Defaults()
 
+        CStatus Validate()
+
     cdef cppclass CCSVReader" arrow::csv::TableReader":
         @staticmethod
         CResult[shared_ptr[CCSVReader]] Make(
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 32c0353fada..48cdff75f97 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -132,6 +132,34 @@ def test_read_options():
     opts = cls(block_size=1234)
     assert opts.block_size == 1234
 
+    opts.validate()
+
+    match = "ReadOptions: block_size must be at least 1: 0"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.block_size = 0
+        opts.validate()
+
+    match = "ReadOptions: skip_rows cannot be negative: -1"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.skip_rows = -1
+        opts.validate()
+
+    match = "ReadOptions: skip_rows_after_names cannot be negative: -1"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.skip_rows_after_names = -1
+        opts.validate()
+
+    match = "ReadOptions: autogenerate_column_names cannot be true when" \
+            " column_names are provided"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.autogenerate_column_names = True
+        opts.column_names = ('a', 'b')
+        opts.validate()
+
 
 def test_parse_options():
     cls = ParseOptions
@@ -150,6 +178,44 @@ def test_parse_options():
                                  newlines_in_values=True,
                                  ignore_empty_lines=False)
 
+    cls().validate()
+    opts = cls()
+    opts.delimiter = "\t"
+    opts.validate()
+
+    match = "ParseOptions: delimiter cannot be \\\\r or \\\\n"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.delimiter = "\n"
+        opts.validate()
+
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.delimiter = "\r"
+        opts.validate()
+
+    match = "ParseOptions: quote_char cannot be \\\\r or \\\\n"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.quote_char = "\n"
+        opts.validate()
+
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.quote_char = "\r"
+        opts.validate()
+
+    match = "ParseOptions: escape_char cannot be \\\\r or \\\\n"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.escape_char = "\n"
+        opts.validate()
+
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.escape_char = "\r"
+        opts.validate()
+
 
 def test_convert_options():
     cls = ConvertOptions
@@ -238,6 +304,14 @@ def test_write_options():
     opts = cls(batch_size=9876)
     assert opts.batch_size == 9876
 
+    opts.validate()
+
+    match = "WriteOptions: batch_size must be at least 1: 0"
+    with pytest.raises(pa.ArrowInvalid, match=match):
+        opts = cls()
+        opts.batch_size = 0
+        opts.validate()
+
 
 class BaseTestCSVRead:
 

From ec0c3c595554ca441a2161eb4dfc2c0e178b7e41 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 16 Jun 2021 14:29:03 +0200
Subject: [PATCH 423/719] ARROW-13090: [Python] Fix create_dir() implementation
 in FSSpecHandler

Recent fsspec version have started raising FileExistsError if the target directory already exists.  Ignore the error, as create_dir() is supposed to succeed in that case.

Closes #10540 from pitrou/ARROW-13090-fsspec-create-dir

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/fs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index fe505530751..1b86e4b7e0f 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -263,7 +263,10 @@ def get_file_info_selector(self, selector):
 
     def create_dir(self, path, recursive):
         # mkdir also raises FileNotFoundError when base directory is not found
-        self.fs.mkdir(path, create_parents=recursive)
+        try:
+            self.fs.mkdir(path, create_parents=recursive)
+        except FileExistsError:
+            pass
 
     def delete_dir(self, path):
         self.fs.rm(path, recursive=True)

From 99a68d47954a9b2711408af9fb779c7afeff02ac Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 16 Jun 2021 20:05:33 +0200
Subject: [PATCH 424/719] ARROW-10115: [C++] Add CSV option to treat quoted
 strings as always non-null

The option is only applicable to string and binary columns.

Closes #10503 from pitrou/ARROW-10115-csv-quoted-nulls

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/converter.cc       |   1 +
 cpp/src/arrow/csv/converter_test.cc  | 132 +++++++++++++++++----------
 cpp/src/arrow/csv/options.h          |   7 ++
 python/pyarrow/_csv.pyx              |  40 ++++++--
 python/pyarrow/includes/libarrow.pxd |   1 +
 python/pyarrow/tests/test_csv.py     |  16 +++-
 6 files changed, 138 insertions(+), 59 deletions(-)

diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index feebf374e38..cb72b22b405 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -185,6 +185,7 @@ struct BinaryValueDecoder : public ValueDecoder {
 
   bool IsNull(const uint8_t* data, uint32_t size, bool quoted) {
     return options_.strings_can_be_null &&
+           (!quoted || options_.quoted_strings_can_be_null) &&
            ValueDecoder::IsNull(data, size, false /* quoted */);
   }
 };
diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc
index e12e3d17a83..4bed649d558 100644
--- a/cpp/src/arrow/csv/converter_test.cc
+++ b/cpp/src/arrow/csv/converter_test.cc
@@ -174,67 +174,105 @@ void AssertConversionError(const std::shared_ptr<DataType>& type,
 // Converter tests
 
 template <typename T>
-static void TestBinaryConversionBasics() {
-  auto type = TypeTraits<T>::type_singleton();
-  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
-                                   {{"ab", ""}, {"cdé", "\xffgh"}});
-}
-
-TEST(BinaryConversion, Basics) { TestBinaryConversionBasics<BinaryType>(); }
+class BinaryConversionTestBase : public testing::Test {
+ public:
+  std::shared_ptr<DataType> type() { return TypeTraits<T>::type_singleton(); }
 
-TEST(LargeBinaryConversion, Basics) { TestBinaryConversionBasics<LargeBinaryType>(); }
+  void TestNulls() {
+    auto type = this->type();
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "NULL,\n"},
+                                     {{"ab", "NULL"}, {"N/A", ""}},
+                                     {{true, true}, {true, true}});
 
-TEST(BinaryConversion, Nulls) {
-  AssertConversion<BinaryType, std::string>(binary(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", "NULL"}, {"N/A", ""}},
-                                            {{true, true}, {true, true}});
+    auto options = ConvertOptions::Defaults();
+    options.strings_can_be_null = true;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "NULL,\n"},
+                                     {{"ab", ""}, {"", ""}},
+                                     {{true, false}, {false, false}}, options);
+    AssertConversion<T, std::string>(type, {"ab,\"N/A\"\n", "\"NULL\",\"\"\n"},
+                                     {{"ab", ""}, {"", ""}},
+                                     {{true, false}, {false, false}}, options);
+    options.quoted_strings_can_be_null = false;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "NULL,\n"},
+                                     {{"ab", ""}, {"", ""}},
+                                     {{true, false}, {false, false}}, options);
+    AssertConversion<T, std::string>(type, {"ab,\"N/A\"\n", "\"NULL\",\"\"\n"},
+                                     {{"ab", "NULL"}, {"N/A", ""}},
+                                     {{true, true}, {true, true}}, options);
+  }
 
-  auto options = ConvertOptions::Defaults();
-  options.strings_can_be_null = true;
-  AssertConversion<BinaryType, std::string>(binary(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", ""}, {"", ""}},
-                                            {{true, false}, {false, false}}, options);
-}
+  void TestCustomNulls() {
+    auto type = this->type();
+    auto options = ConvertOptions::Defaults();
+    options.null_values = {"xxx", "zzz"};
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "xxx,\"zzz\"\n"},
+                                     {{"ab", "xxx"}, {"N/A", "zzz"}},
+                                     {{true, true}, {true, true}}, options);
+
+    options.strings_can_be_null = true;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "xxx,\"zzz\"\n"},
+                                     {{"ab", ""}, {"N/A", ""}},
+                                     {{true, false}, {true, false}}, options);
+    options.quoted_strings_can_be_null = false;
+    AssertConversion<T, std::string>(type, {"ab,N/A\n", "xxx,\"zzz\"\n"},
+                                     {{"ab", ""}, {"N/A", "zzz"}},
+                                     {{true, false}, {true, true}}, options);
+  }
+};
 
 template <typename T>
-static void TestStringConversionBasics() {
-  auto type = TypeTraits<T>::type_singleton();
-  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",gh\n"},
-                                   {{"ab", ""}, {"cdé", "gh"}});
+class BinaryConversionTest : public BinaryConversionTestBase<T> {
+ public:
+  void TestBasics() {
+    auto type = this->type();
+    AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                     {{"ab", ""}, {"cdé", "\xffgh"}});
+  }
+};
 
-  auto options = ConvertOptions::Defaults();
-  options.check_utf8 = false;
-  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
-                                   {{"ab", ""}, {"cdé", "\xffgh"}}, options,
-                                   /*validate_full=*/false);
-}
+using BinaryTestTypes = ::testing::Types<BinaryType, LargeBinaryType>;
 
-TEST(StringConversion, Basics) { TestStringConversionBasics<StringType>(); }
+TYPED_TEST_SUITE(BinaryConversionTest, BinaryTestTypes);
 
-TEST(LargeStringConversion, Basics) { TestStringConversionBasics<LargeStringType>(); }
+TYPED_TEST(BinaryConversionTest, Basics) { this->TestBasics(); }
 
-TEST(StringConversion, Nulls) {
-  AssertConversion<StringType, std::string>(utf8(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", "NULL"}, {"N/A", ""}},
-                                            {{true, true}, {true, true}});
+TYPED_TEST(BinaryConversionTest, Nulls) { this->TestNulls(); }
 
-  auto options = ConvertOptions::Defaults();
-  options.strings_can_be_null = true;
-  AssertConversion<StringType, std::string>(utf8(), {"ab,N/A\n", "NULL,\n"},
-                                            {{"ab", ""}, {"", ""}},
-                                            {{true, false}, {false, false}}, options);
-}
+TYPED_TEST(BinaryConversionTest, CustomNulls) { this->TestNulls(); }
 
 template <typename T>
-static void TestStringConversionErrors() {
-  auto type = TypeTraits<T>::type_singleton();
-  // Invalid UTF8 in column 0
-  AssertConversionError(type, {"ab,cdé\n", "\xff,gh\n"}, {0});
-}
+class StringConversionTest : public BinaryConversionTestBase<T> {
+ public:
+  void TestBasics() {
+    auto type = TypeTraits<T>::type_singleton();
+    AssertConversion<T, std::string>(type, {"ab,cdé\n", ",gh\n"},
+                                     {{"ab", ""}, {"cdé", "gh"}});
+  }
+
+  void TestInvalidUtf8() {
+    auto type = TypeTraits<T>::type_singleton();
+    // Invalid UTF8 in column 0
+    AssertConversionError(type, {"ab,cdé\n", "\xff,gh\n"}, {0});
+
+    auto options = ConvertOptions::Defaults();
+    options.check_utf8 = false;
+    AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                     {{"ab", ""}, {"cdé", "\xffgh"}}, options,
+                                     /*validate_full=*/false);
+  }
+};
+
+using StringTestTypes = ::testing::Types<StringType, LargeStringType>;
+
+TYPED_TEST_SUITE(StringConversionTest, StringTestTypes);
+
+TYPED_TEST(StringConversionTest, Basics) { this->TestBasics(); }
+
+TYPED_TEST(StringConversionTest, Nulls) { this->TestNulls(); }
 
-TEST(StringConversion, Errors) { TestStringConversionErrors<StringType>(); }
+TYPED_TEST(StringConversionTest, CustomNulls) { this->TestCustomNulls(); }
 
-TEST(LargeStringConversion, Errors) { TestStringConversionErrors<LargeStringType>(); }
+TYPED_TEST(StringConversionTest, InvalidUtf8) { this->TestInvalidUtf8(); }
 
 TEST(FixedSizeBinaryConversion, Basics) {
   AssertConversion<FixedSizeBinaryType, std::string>(
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 790c47fc3f4..1e423fd76db 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -78,11 +78,18 @@ struct ARROW_EXPORT ConvertOptions {
   std::vector<std::string> true_values;
   /// Recognized spellings for boolean false values
   std::vector<std::string> false_values;
+
   /// Whether string / binary columns can have null values.
   ///
   /// If true, then strings in "null_values" are considered null for string columns.
   /// If false, then all strings are valid string values.
   bool strings_can_be_null = false;
+  /// Whether string / binary columns can have quoted null values.
+  ///
+  /// If true *and* `strings_can_be_null` is true, then quoted strings in
+  /// "null_values" are also considered null for string columns.  Otherwise,
+  /// quoted strings are never considered null.
+  bool quoted_strings_can_be_null = true;
 
   /// Whether to try to automatically dict-encode string / binary data.
   /// If true, then when type inference detects a string or binary column,
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 8ede8272c07..01cabc1d8b0 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -447,6 +447,12 @@ cdef class ConvertOptions(_Weakrefable):
         If true, then strings in null_values are considered null for
         string columns.
         If false, then all strings are valid string values.
+    quoted_strings_can_be_null: bool, optional (default True)
+        Whether string / binary columns can have quoted null values.
+        If true *and* strings_can_be_null is true, then strings in
+        null_values are considered null for string columns, even when
+        quoted.
+        Otherwise, then all quoted strings are valid string values.
     auto_dict_encode: bool, optional (default False)
         Whether to try to automatically dict-encode string / binary data.
         If true, then when type inference detects a string or binary column,
@@ -478,9 +484,10 @@ cdef class ConvertOptions(_Weakrefable):
 
     def __init__(self, *, check_utf8=None, column_types=None, null_values=None,
                  true_values=None, false_values=None,
-                 strings_can_be_null=None, include_columns=None,
-                 include_missing_columns=None, auto_dict_encode=None,
-                 auto_dict_max_cardinality=None, timestamp_parsers=None):
+                 strings_can_be_null=None, quoted_strings_can_be_null=None,
+                 include_columns=None, include_missing_columns=None,
+                 auto_dict_encode=None, auto_dict_max_cardinality=None,
+                 timestamp_parsers=None):
         if check_utf8 is not None:
             self.check_utf8 = check_utf8
         if column_types is not None:
@@ -493,6 +500,8 @@ cdef class ConvertOptions(_Weakrefable):
             self.false_values = false_values
         if strings_can_be_null is not None:
             self.strings_can_be_null = strings_can_be_null
+        if quoted_strings_can_be_null is not None:
+            self.quoted_strings_can_be_null = quoted_strings_can_be_null
         if include_columns is not None:
             self.include_columns = include_columns
         if include_missing_columns is not None:
@@ -526,6 +535,17 @@ cdef class ConvertOptions(_Weakrefable):
     def strings_can_be_null(self, value):
         deref(self.options).strings_can_be_null = value
 
+    @property
+    def quoted_strings_can_be_null(self):
+        """
+        Whether string / binary columns can have quoted null values.
+        """
+        return deref(self.options).quoted_strings_can_be_null
+
+    @quoted_strings_can_be_null.setter
+    def quoted_strings_can_be_null(self, value):
+        deref(self.options).quoted_strings_can_be_null = value
+
     @property
     def column_types(self):
         """
@@ -699,6 +719,8 @@ cdef class ConvertOptions(_Weakrefable):
             self.false_values == other.false_values and
             self.timestamp_parsers == other.timestamp_parsers and
             self.strings_can_be_null == other.strings_can_be_null and
+            self.quoted_strings_can_be_null ==
+            other.quoted_strings_can_be_null and
             self.auto_dict_encode == other.auto_dict_encode and
             self.auto_dict_max_cardinality ==
             other.auto_dict_max_cardinality and
@@ -709,16 +731,16 @@ cdef class ConvertOptions(_Weakrefable):
     def __getstate__(self):
         return (self.check_utf8, self.column_types, self.null_values,
                 self.true_values, self.false_values, self.timestamp_parsers,
-                self.strings_can_be_null, self.auto_dict_encode,
-                self.auto_dict_max_cardinality, self.include_columns,
-                self.include_missing_columns)
+                self.strings_can_be_null, self.quoted_strings_can_be_null,
+                self.auto_dict_encode, self.auto_dict_max_cardinality,
+                self.include_columns, self.include_missing_columns)
 
     def __setstate__(self, state):
         (self.check_utf8, self.column_types, self.null_values,
          self.true_values, self.false_values, self.timestamp_parsers,
-         self.strings_can_be_null, self.auto_dict_encode,
-         self.auto_dict_max_cardinality, self.include_columns,
-         self.include_missing_columns) = state
+         self.strings_can_be_null, self.quoted_strings_can_be_null,
+         self.auto_dict_encode, self.auto_dict_max_cardinality,
+         self.include_columns, self.include_missing_columns) = state
 
     def __eq__(self, other):
         try:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index b1fb04a1f8e..072062385ca 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1601,6 +1601,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         vector[c_string] true_values
         vector[c_string] false_values
         c_bool strings_can_be_null
+        c_bool quoted_strings_can_be_null
         vector[shared_ptr[CTimestampParser]] timestamp_parsers
 
         c_bool auto_dict_encode
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 48cdff75f97..482973a7258 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -224,14 +224,16 @@ def test_convert_options():
     check_options_class(
         cls, check_utf8=[True, False],
         strings_can_be_null=[False, True],
+        quoted_strings_can_be_null=[True, False],
         include_columns=[[], ['def', 'abc']],
         include_missing_columns=[False, True],
         auto_dict_encode=[False, True],
         timestamp_parsers=[[], [ISO8601, '%y-%m']])
 
     check_options_class_pickling(
-        cls, check_utf8=True,
-        strings_can_be_null=False,
+        cls, check_utf8=False,
+        strings_can_be_null=True,
+        quoted_strings_can_be_null=False,
         include_columns=['def', 'abc'],
         include_missing_columns=False,
         auto_dict_encode=True,
@@ -828,7 +830,7 @@ def test_auto_dict_encode(self):
     def test_custom_nulls(self):
         # Infer nulls with custom values
         opts = ConvertOptions(null_values=['Xxx', 'Zzz'])
-        rows = b"a,b,c,d\nZzz,Xxx,1,2\nXxx,#N/A,,Zzz\n"
+        rows = b"""a,b,c,d\nZzz,"Xxx",1,2\nXxx,#N/A,,Zzz\n"""
         table = self.read_bytes(rows, convert_options=opts)
         schema = pa.schema([('a', pa.null()),
                             ('b', pa.string()),
@@ -851,6 +853,14 @@ def test_custom_nulls(self):
             'c': ["1", ""],
             'd': [2, None],
         }
+        opts.quoted_strings_can_be_null = False
+        table = self.read_bytes(rows, convert_options=opts)
+        assert table.to_pydict() == {
+            'a': [None, None],
+            'b': ["Xxx", "#N/A"],
+            'c': ["1", ""],
+            'd': [2, None],
+        }
 
         opts = ConvertOptions(null_values=[])
         rows = b"a,b\n#N/A,\n"

From 57ecc73e6153fea04e0ac0d13792ba0abb0dd779 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 16 Jun 2021 20:19:27 +0200
Subject: [PATCH 425/719] ARROW-12709: [C++] Add binary_join_element_wise

This adds a variadic scalar string join kernel, using the last argument (min 1 argument) as the separator. An options class allows emitting null (the default), skipping null non-separator arguments, or replacing null non-separator arguments with another string (mimicking libcudf).

Closes #10520 from lidavidm/arrow-12709

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.cc           |   8 +-
 cpp/src/arrow/compute/api_scalar.h            |  23 +-
 .../arrow/compute/kernels/scalar_compare.cc   |  16 +-
 .../compute/kernels/scalar_compare_test.cc    | 184 +++++++-------
 .../arrow/compute/kernels/scalar_string.cc    | 239 +++++++++++++++++-
 .../kernels/scalar_string_benchmark.cc        |  43 ++++
 .../compute/kernels/scalar_string_test.cc     | 119 +++++++++
 docs/source/cpp/compute.rst                   |  22 +-
 docs/source/python/api/compute.rst            |  21 +-
 python/pyarrow/_compute.pyx                   |  31 +++
 python/pyarrow/compute.py                     |   1 +
 python/pyarrow/includes/libarrow.pxd          |  16 ++
 python/pyarrow/tests/test_compute.py          |  52 +++-
 13 files changed, 643 insertions(+), 132 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index dba71456c29..db1cac290cf 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -63,14 +63,14 @@ SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
 SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
 
-Result<Datum> ElementWiseMax(const std::vector<Datum>& args,
+Result<Datum> MaxElementWise(const std::vector<Datum>& args,
                              ElementWiseAggregateOptions options, ExecContext* ctx) {
-  return CallFunction("element_wise_max", args, &options, ctx);
+  return CallFunction("max_element_wise", args, &options, ctx);
 }
 
-Result<Datum> ElementWiseMin(const std::vector<Datum>& args,
+Result<Datum> MinElementWise(const std::vector<Datum>& args,
                              ElementWiseAggregateOptions options, ExecContext* ctx) {
-  return CallFunction("element_wise_min", args, &options, ctx);
+  return CallFunction("min_element_wise", args, &options, ctx);
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 6e9a9340f2c..082876b356b 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -48,6 +48,25 @@ struct ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
   bool skip_nulls;
 };
 
+/// Options for var_args_join.
+struct ARROW_EXPORT JoinOptions : public FunctionOptions {
+  /// How to handle null values. (A null separator always results in a null output.)
+  enum NullHandlingBehavior {
+    /// A null in any input results in a null in the output.
+    EMIT_NULL,
+    /// Nulls in inputs are skipped.
+    SKIP,
+    /// Nulls in inputs are replaced with the replacement string.
+    REPLACE,
+  };
+  explicit JoinOptions(NullHandlingBehavior null_handling = EMIT_NULL,
+                       std::string null_replacement = "")
+      : null_handling(null_handling), null_replacement(std::move(null_replacement)) {}
+  static JoinOptions Defaults() { return JoinOptions(); }
+  NullHandlingBehavior null_handling;
+  std::string null_replacement;
+};
+
 struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
   explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false)
       : pattern(std::move(pattern)), ignore_case(ignore_case) {}
@@ -287,7 +306,7 @@ Result<Datum> Power(const Datum& left, const Datum& right,
 /// \param[in] ctx the function execution context, optional
 /// \return the element-wise maximum
 ARROW_EXPORT
-Result<Datum> ElementWiseMax(
+Result<Datum> MaxElementWise(
     const std::vector<Datum>& args,
     ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
     ExecContext* ctx = NULLPTR);
@@ -300,7 +319,7 @@ Result<Datum> ElementWiseMax(
 /// \param[in] ctx the function execution context, optional
 /// \return the element-wise minimum
 ARROW_EXPORT
-Result<Datum> ElementWiseMin(
+Result<Datum> MinElementWise(
     const std::vector<Datum>& args,
     ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
     ExecContext* ctx = NULLPTR);
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 6763b6793f3..041c6a282f9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -467,14 +467,14 @@ const FunctionDoc less_equal_doc{
     ("A null on either side emits a null comparison result."),
     {"x", "y"}};
 
-const FunctionDoc element_wise_min_doc{
+const FunctionDoc min_element_wise_doc{
     "Find the element-wise minimum value",
     ("Nulls will be ignored (default) or propagated. "
      "NaN will be taken over null, but not over any valid float."),
     {"*args"},
     "ElementWiseAggregateOptions"};
 
-const FunctionDoc element_wise_max_doc{
+const FunctionDoc max_element_wise_doc{
     "Find the element-wise maximum value",
     ("Nulls will be ignored (default) or propagated. "
      "NaN will be taken over null, but not over any valid float."),
@@ -501,13 +501,13 @@ void RegisterScalarComparison(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
   // Variadic element-wise functions
 
-  auto element_wise_min =
-      MakeScalarMinMax<Minimum>("element_wise_min", &element_wise_min_doc);
-  DCHECK_OK(registry->AddFunction(std::move(element_wise_min)));
+  auto min_element_wise =
+      MakeScalarMinMax<Minimum>("min_element_wise", &min_element_wise_doc);
+  DCHECK_OK(registry->AddFunction(std::move(min_element_wise)));
 
-  auto element_wise_max =
-      MakeScalarMinMax<Maximum>("element_wise_max", &element_wise_max_doc);
-  DCHECK_OK(registry->AddFunction(std::move(element_wise_max)));
+  auto max_element_wise =
+      MakeScalarMinMax<Maximum>("max_element_wise", &max_element_wise_doc);
+  DCHECK_OK(registry->AddFunction(std::move(max_element_wise)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 6318a891d3a..50327e82032 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -729,90 +729,90 @@ TYPED_TEST_SUITE(TestVarArgsCompareNumeric, NumericBasedTypes);
 TYPED_TEST_SUITE(TestVarArgsCompareFloating, RealArrowTypes);
 TYPED_TEST_SUITE(TestVarArgsCompareParametricTemporal, ParametricTemporalTypes);
 
-TYPED_TEST(TestVarArgsCompareNumeric, ElementWiseMin) {
-  this->AssertNullScalar(ElementWiseMin, {});
-  this->AssertNullScalar(ElementWiseMin, {this->scalar("null"), this->scalar("null")});
+TYPED_TEST(TestVarArgsCompareNumeric, MinElementWise) {
+  this->AssertNullScalar(MinElementWise, {});
+  this->AssertNullScalar(MinElementWise, {this->scalar("null"), this->scalar("null")});
 
-  this->Assert(ElementWiseMin, this->scalar("0"), {this->scalar("0")});
-  this->Assert(ElementWiseMin, this->scalar("0"),
+  this->Assert(MinElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MinElementWise, this->scalar("0"),
                {this->scalar("2"), this->scalar("0"), this->scalar("1")});
   this->Assert(
-      ElementWiseMin, this->scalar("0"),
+      MinElementWise, this->scalar("0"),
       {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
-  this->Assert(ElementWiseMin, this->scalar("1"),
+  this->Assert(MinElementWise, this->scalar("1"),
                {this->scalar("null"), this->scalar("null"), this->scalar("1"),
                 this->scalar("null")});
 
-  this->Assert(ElementWiseMin, (this->array("[]")), {this->array("[]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 3, null]"),
+  this->Assert(MinElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 3, null]"),
                {this->array("[1, 2, 3, null]")});
 
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, 2, 3, 4]"), this->scalar("2")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, null, 3, 4]"), this->scalar("2")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
 
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
 
-  this->Assert(ElementWiseMin, this->array("[1, 2, null, 6]"),
+  this->Assert(MinElementWise, this->array("[1, 2, null, 6]"),
                {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, null, 6]"),
+  this->Assert(MinElementWise, this->array("[1, 2, null, 6]"),
                {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 3, 4]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 3, 4]"),
                {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 3, 4]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 3, 4]"),
                {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
 
-  this->Assert(ElementWiseMin, this->array("[1, 1, 1, 1]"),
+  this->Assert(MinElementWise, this->array("[1, 1, 1, 1]"),
                {this->scalar("1"), this->array("[1, 2, 3, 4]")});
-  this->Assert(ElementWiseMin, this->array("[1, 1, 1, 1]"),
+  this->Assert(MinElementWise, this->array("[1, 1, 1, 1]"),
                {this->scalar("1"), this->array("[null, null, null, null]")});
-  this->Assert(ElementWiseMin, this->array("[1, 1, 1, 1]"),
+  this->Assert(MinElementWise, this->array("[1, 1, 1, 1]"),
                {this->scalar("null"), this->array("[1, 1, 1, 1]")});
-  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
                {this->scalar("null"), this->array("[null, null, null, null]")});
 
   // Test null handling
   this->element_wise_aggregate_options_.skip_nulls = false;
-  this->AssertNullScalar(ElementWiseMin, {this->scalar("null"), this->scalar("null")});
-  this->AssertNullScalar(ElementWiseMin, {this->scalar("0"), this->scalar("null")});
+  this->AssertNullScalar(MinElementWise, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(MinElementWise, {this->scalar("0"), this->scalar("null")});
 
-  this->Assert(ElementWiseMin, this->array("[1, null, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, null, 2, 2]"),
                {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
-  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
                {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
-  this->Assert(ElementWiseMin, this->array("[1, null, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, null, 2, 2]"),
                {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
 
-  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
                {this->scalar("1"), this->array("[null, null, null, null]")});
-  this->Assert(ElementWiseMin, this->array("[null, null, null, null]"),
+  this->Assert(MinElementWise, this->array("[null, null, null, null]"),
                {this->scalar("null"), this->array("[1, 1, 1, 1]")});
 }
 
-TYPED_TEST(TestVarArgsCompareFloating, ElementWiseMin) {
+TYPED_TEST(TestVarArgsCompareFloating, MinElementWise) {
   auto Check = [this](const std::string& expected,
                       const std::vector<std::string>& inputs) {
     std::vector<Datum> args;
     for (const auto& input : inputs) {
       args.emplace_back(this->scalar(input));
     }
-    this->Assert(ElementWiseMin, this->scalar(expected), args);
+    this->Assert(MinElementWise, this->scalar(expected), args);
 
     args.clear();
     for (const auto& input : inputs) {
       args.emplace_back(this->array("[" + input + "]"));
     }
-    this->Assert(ElementWiseMin, this->array("[" + expected + "]"), args);
+    this->Assert(MinElementWise, this->array("[" + expected + "]"), args);
   };
   Check("-0.0", {"0.0", "-0.0"});
   Check("-0.0", {"1.0", "-0.0", "0.0"});
@@ -828,111 +828,111 @@ TYPED_TEST(TestVarArgsCompareFloating, ElementWiseMin) {
   Check("-Inf", {"0", "-Inf"});
 }
 
-TYPED_TEST(TestVarArgsCompareParametricTemporal, ElementWiseMin) {
+TYPED_TEST(TestVarArgsCompareParametricTemporal, MinElementWise) {
   // Temporal kernel is implemented with numeric kernel underneath
-  this->AssertNullScalar(ElementWiseMin, {});
-  this->AssertNullScalar(ElementWiseMin, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(MinElementWise, {});
+  this->AssertNullScalar(MinElementWise, {this->scalar("null"), this->scalar("null")});
 
-  this->Assert(ElementWiseMin, this->scalar("0"), {this->scalar("0")});
-  this->Assert(ElementWiseMin, this->scalar("0"), {this->scalar("2"), this->scalar("0")});
-  this->Assert(ElementWiseMin, this->scalar("0"),
+  this->Assert(MinElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MinElementWise, this->scalar("0"), {this->scalar("2"), this->scalar("0")});
+  this->Assert(MinElementWise, this->scalar("0"),
                {this->scalar("0"), this->scalar("null")});
 
-  this->Assert(ElementWiseMin, (this->array("[]")), {this->array("[]")});
-  this->Assert(ElementWiseMin, this->array("[1, 2, 3, null]"),
+  this->Assert(MinElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MinElementWise, this->array("[1, 2, 3, null]"),
                {this->array("[1, 2, 3, null]")});
 
-  this->Assert(ElementWiseMin, this->array("[1, 2, 2, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 2, 2]"),
                {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
 
-  this->Assert(ElementWiseMin, this->array("[1, 2, 3, 2]"),
+  this->Assert(MinElementWise, this->array("[1, 2, 3, 2]"),
                {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
 }
 
-TYPED_TEST(TestVarArgsCompareNumeric, ElementWiseMax) {
-  this->AssertNullScalar(ElementWiseMax, {});
-  this->AssertNullScalar(ElementWiseMax, {this->scalar("null"), this->scalar("null")});
+TYPED_TEST(TestVarArgsCompareNumeric, MaxElementWise) {
+  this->AssertNullScalar(MaxElementWise, {});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("null"), this->scalar("null")});
 
-  this->Assert(ElementWiseMax, this->scalar("0"), {this->scalar("0")});
-  this->Assert(ElementWiseMax, this->scalar("2"),
+  this->Assert(MaxElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MaxElementWise, this->scalar("2"),
                {this->scalar("2"), this->scalar("0"), this->scalar("1")});
   this->Assert(
-      ElementWiseMax, this->scalar("2"),
+      MaxElementWise, this->scalar("2"),
       {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
-  this->Assert(ElementWiseMax, this->scalar("1"),
+  this->Assert(MaxElementWise, this->scalar("1"),
                {this->scalar("null"), this->scalar("null"), this->scalar("1"),
                 this->scalar("null")});
 
-  this->Assert(ElementWiseMax, (this->array("[]")), {this->array("[]")});
-  this->Assert(ElementWiseMax, this->array("[1, 2, 3, null]"),
+  this->Assert(MaxElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, null]"),
                {this->array("[1, 2, 3, null]")});
 
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, 2, 3, 4]"), this->scalar("2")});
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, null, 3, 4]"), this->scalar("2")});
-  this->Assert(ElementWiseMax, this->array("[4, 4, 4, 4]"),
+  this->Assert(MaxElementWise, this->array("[4, 4, 4, 4]"),
                {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
 
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
 
-  this->Assert(ElementWiseMax, this->array("[4, 2, null, 6]"),
+  this->Assert(MaxElementWise, this->array("[4, 2, null, 6]"),
                {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
-  this->Assert(ElementWiseMax, this->array("[4, 2, null, 6]"),
+  this->Assert(MaxElementWise, this->array("[4, 2, null, 6]"),
                {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
-  this->Assert(ElementWiseMax, this->array("[1, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, 4]"),
                {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
-  this->Assert(ElementWiseMax, this->array("[1, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, 4]"),
                {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
 
-  this->Assert(ElementWiseMax, this->array("[1, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, 4]"),
                {this->scalar("1"), this->array("[1, 2, 3, 4]")});
-  this->Assert(ElementWiseMax, this->array("[1, 1, 1, 1]"),
+  this->Assert(MaxElementWise, this->array("[1, 1, 1, 1]"),
                {this->scalar("1"), this->array("[null, null, null, null]")});
-  this->Assert(ElementWiseMax, this->array("[1, 1, 1, 1]"),
+  this->Assert(MaxElementWise, this->array("[1, 1, 1, 1]"),
                {this->scalar("null"), this->array("[1, 1, 1, 1]")});
-  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
                {this->scalar("null"), this->array("[null, null, null, null]")});
 
   // Test null handling
   this->element_wise_aggregate_options_.skip_nulls = false;
-  this->AssertNullScalar(ElementWiseMax, {this->scalar("null"), this->scalar("null")});
-  this->AssertNullScalar(ElementWiseMax, {this->scalar("0"), this->scalar("null")});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("0"), this->scalar("null")});
 
-  this->Assert(ElementWiseMax, this->array("[4, null, 4, 4]"),
+  this->Assert(MaxElementWise, this->array("[4, null, 4, 4]"),
                {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
-  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
                {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
-  this->Assert(ElementWiseMax, this->array("[2, null, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, null, 3, 4]"),
                {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
 
-  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
                {this->scalar("1"), this->array("[null, null, null, null]")});
-  this->Assert(ElementWiseMax, this->array("[null, null, null, null]"),
+  this->Assert(MaxElementWise, this->array("[null, null, null, null]"),
                {this->scalar("null"), this->array("[1, 1, 1, 1]")});
 }
 
-TYPED_TEST(TestVarArgsCompareFloating, ElementWiseMax) {
+TYPED_TEST(TestVarArgsCompareFloating, MaxElementWise) {
   auto Check = [this](const std::string& expected,
                       const std::vector<std::string>& inputs) {
     std::vector<Datum> args;
     for (const auto& input : inputs) {
       args.emplace_back(this->scalar(input));
     }
-    this->Assert(ElementWiseMax, this->scalar(expected), args);
+    this->Assert(MaxElementWise, this->scalar(expected), args);
 
     args.clear();
     for (const auto& input : inputs) {
       args.emplace_back(this->array("[" + input + "]"));
     }
-    this->Assert(ElementWiseMax, this->array("[" + expected + "]"), args);
+    this->Assert(MaxElementWise, this->array("[" + expected + "]"), args);
   };
   Check("0.0", {"0.0", "-0.0"});
   Check("1.0", {"1.0", "-0.0", "0.0"});
@@ -948,34 +948,34 @@ TYPED_TEST(TestVarArgsCompareFloating, ElementWiseMax) {
   Check("0", {"0", "-Inf"});
 }
 
-TYPED_TEST(TestVarArgsCompareParametricTemporal, ElementWiseMax) {
+TYPED_TEST(TestVarArgsCompareParametricTemporal, MaxElementWise) {
   // Temporal kernel is implemented with numeric kernel underneath
-  this->AssertNullScalar(ElementWiseMax, {});
-  this->AssertNullScalar(ElementWiseMax, {this->scalar("null"), this->scalar("null")});
+  this->AssertNullScalar(MaxElementWise, {});
+  this->AssertNullScalar(MaxElementWise, {this->scalar("null"), this->scalar("null")});
 
-  this->Assert(ElementWiseMax, this->scalar("0"), {this->scalar("0")});
-  this->Assert(ElementWiseMax, this->scalar("2"), {this->scalar("2"), this->scalar("0")});
-  this->Assert(ElementWiseMax, this->scalar("0"),
+  this->Assert(MaxElementWise, this->scalar("0"), {this->scalar("0")});
+  this->Assert(MaxElementWise, this->scalar("2"), {this->scalar("2"), this->scalar("0")});
+  this->Assert(MaxElementWise, this->scalar("0"),
                {this->scalar("0"), this->scalar("null")});
 
-  this->Assert(ElementWiseMax, (this->array("[]")), {this->array("[]")});
-  this->Assert(ElementWiseMax, this->array("[1, 2, 3, null]"),
+  this->Assert(MaxElementWise, (this->array("[]")), {this->array("[]")});
+  this->Assert(MaxElementWise, this->array("[1, 2, 3, null]"),
                {this->array("[1, 2, 3, null]")});
 
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
 
-  this->Assert(ElementWiseMax, this->array("[2, 2, 3, 4]"),
+  this->Assert(MaxElementWise, this->array("[2, 2, 3, 4]"),
                {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
 }
 
-TEST(TestElementWiseMaxElementWiseMin, CommonTimestamp) {
+TEST(TestMaxElementWiseMinElementWise, CommonTimestamp) {
   {
     auto t1 = std::make_shared<TimestampType>(TimeUnit::SECOND);
     auto t2 = std::make_shared<TimestampType>(TimeUnit::MILLI);
     auto expected = MakeScalar(t2, 1000).ValueOrDie();
     ASSERT_OK_AND_ASSIGN(auto actual,
-                         ElementWiseMin({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                         MinElementWise({Datum(MakeScalar(t1, 1).ValueOrDie()),
                                          Datum(MakeScalar(t2, 12000).ValueOrDie())}));
     AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
   }
@@ -984,7 +984,7 @@ TEST(TestElementWiseMaxElementWiseMin, CommonTimestamp) {
     auto t2 = std::make_shared<TimestampType>(TimeUnit::SECOND);
     auto expected = MakeScalar(t2, 86401).ValueOrDie();
     ASSERT_OK_AND_ASSIGN(auto actual,
-                         ElementWiseMax({Datum(MakeScalar(t1, 1).ValueOrDie()),
+                         MaxElementWise({Datum(MakeScalar(t1, 1).ValueOrDie()),
                                          Datum(MakeScalar(t2, 86401).ValueOrDie())}));
     AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
   }
@@ -994,7 +994,7 @@ TEST(TestElementWiseMaxElementWiseMin, CommonTimestamp) {
     auto t3 = std::make_shared<TimestampType>(TimeUnit::SECOND);
     auto expected = MakeScalar(t3, 86400).ValueOrDie();
     ASSERT_OK_AND_ASSIGN(
-        auto actual, ElementWiseMin({Datum(MakeScalar(t1, 1).ValueOrDie()),
+        auto actual, MinElementWise({Datum(MakeScalar(t1, 1).ValueOrDie()),
                                      Datum(MakeScalar(t2, 2 * 86400000).ValueOrDie())}));
     AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index cd054fcea0e..3f63bf2c405 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -3344,12 +3344,227 @@ struct BinaryJoin {
   }
 };
 
+using BinaryJoinElementWiseState = OptionsWrapper<JoinOptions>;
+
+template <typename Type>
+struct BinaryJoinElementWise {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    JoinOptions options = BinaryJoinElementWiseState::Get(ctx);
+    // Last argument is the separator (for consistency with binary_join)
+    if (std::all_of(batch.values.begin(), batch.values.end(),
+                    [](const Datum& d) { return d.is_scalar(); })) {
+      return ExecOnlyScalar(ctx, options, batch, out);
+    }
+    return ExecContainingArrays(ctx, options, batch, out);
+  }
+
+  static Status ExecOnlyScalar(KernelContext* ctx, const JoinOptions& options,
+                               const ExecBatch& batch, Datum* out) {
+    BaseBinaryScalar* output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+    const size_t num_args = batch.values.size();
+    if (num_args == 1) {
+      // Only separator, no values
+      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
+      output->is_valid = batch.values[0].scalar()->is_valid;
+      return Status::OK();
+    }
+
+    int64_t final_size = CalculateRowSize(options, batch, 0);
+    if (final_size < 0) {
+      ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
+      output->is_valid = false;
+      return Status::OK();
+    }
+    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(final_size));
+    const auto separator = UnboxScalar<Type>::Unbox(*batch.values.back().scalar());
+    uint8_t* buf = output->value->mutable_data();
+    bool first = true;
+    for (size_t i = 0; i < num_args - 1; i++) {
+      const Scalar& scalar = *batch[i].scalar();
+      util::string_view s;
+      if (scalar.is_valid) {
+        s = UnboxScalar<Type>::Unbox(scalar);
+      } else {
+        switch (options.null_handling) {
+          case JoinOptions::EMIT_NULL:
+            // Handled by CalculateRowSize
+            DCHECK(false) << "unreachable";
+            break;
+          case JoinOptions::SKIP:
+            continue;
+          case JoinOptions::REPLACE:
+            s = options.null_replacement;
+            break;
+        }
+      }
+      if (!first) {
+        buf = std::copy(separator.begin(), separator.end(), buf);
+      }
+      first = false;
+      buf = std::copy(s.begin(), s.end(), buf);
+    }
+    output->is_valid = true;
+    DCHECK_EQ(final_size, buf - output->value->mutable_data());
+    return Status::OK();
+  }
+
+  static Status ExecContainingArrays(KernelContext* ctx, const JoinOptions& options,
+                                     const ExecBatch& batch, Datum* out) {
+    // Presize data to avoid reallocations
+    int64_t final_size = 0;
+    for (int64_t i = 0; i < batch.length; i++) {
+      auto size = CalculateRowSize(options, batch, i);
+      if (size > 0) final_size += size;
+    }
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    RETURN_NOT_OK(builder.ReserveData(final_size));
+
+    std::vector<util::string_view> valid_cols(batch.values.size());
+    for (size_t row = 0; row < static_cast<size_t>(batch.length); row++) {
+      size_t num_valid = 0;  // Not counting separator
+      for (size_t col = 0; col < batch.values.size(); col++) {
+        if (batch[col].is_scalar()) {
+          const auto& scalar = *batch[col].scalar();
+          if (scalar.is_valid) {
+            valid_cols[col] = UnboxScalar<Type>::Unbox(scalar);
+            if (col < batch.values.size() - 1) num_valid++;
+          } else {
+            valid_cols[col] = util::string_view();
+          }
+        } else {
+          const ArrayData& array = *batch[col].array();
+          if (!array.MayHaveNulls() ||
+              BitUtil::GetBit(array.buffers[0]->data(), array.offset + row)) {
+            const offset_type* offsets = array.GetValues<offset_type>(1);
+            const uint8_t* data = array.GetValues<uint8_t>(2, /*absolute_offset=*/0);
+            const int64_t length = offsets[row + 1] - offsets[row];
+            valid_cols[col] = util::string_view(
+                reinterpret_cast<const char*>(data + offsets[row]), length);
+            if (col < batch.values.size() - 1) num_valid++;
+          } else {
+            valid_cols[col] = util::string_view();
+          }
+        }
+      }
+
+      if (!valid_cols.back().data()) {
+        // Separator is null
+        builder.UnsafeAppendNull();
+        continue;
+      } else if (batch.values.size() == 1) {
+        // Only given separator
+        builder.UnsafeAppendEmptyValue();
+        continue;
+      } else if (num_valid < batch.values.size() - 1) {
+        // We had some nulls
+        if (options.null_handling == JoinOptions::EMIT_NULL) {
+          builder.UnsafeAppendNull();
+          continue;
+        }
+      }
+      const auto separator = valid_cols.back();
+      bool first = true;
+      for (size_t col = 0; col < batch.values.size() - 1; col++) {
+        util::string_view value = valid_cols[col];
+        if (!value.data()) {
+          switch (options.null_handling) {
+            case JoinOptions::EMIT_NULL:
+              DCHECK(false) << "unreachable";
+              break;
+            case JoinOptions::SKIP:
+              continue;
+            case JoinOptions::REPLACE:
+              value = options.null_replacement;
+              break;
+          }
+        }
+        if (first) {
+          builder.UnsafeAppend(value);
+          first = false;
+          continue;
+        }
+        builder.UnsafeExtendCurrent(separator);
+        builder.UnsafeExtendCurrent(value);
+      }
+    }
+
+    std::shared_ptr<Array> string_array;
+    RETURN_NOT_OK(builder.Finish(&string_array));
+    *out = *string_array->data();
+    out->mutable_array()->type = batch[0].type();
+    DCHECK_EQ(batch.length, out->array()->length);
+    DCHECK_EQ(final_size,
+              checked_cast<const ArrayType&>(*string_array).total_values_length());
+    return Status::OK();
+  }
+
+  // Compute the length of the output for the given position, or -1 if it would be null.
+  static int64_t CalculateRowSize(const JoinOptions& options, const ExecBatch& batch,
+                                  const int64_t index) {
+    const auto num_args = batch.values.size();
+    int64_t final_size = 0;
+    int64_t num_non_null_args = 0;
+    for (size_t i = 0; i < num_args; i++) {
+      int64_t element_size = 0;
+      bool valid = true;
+      if (batch[i].is_scalar()) {
+        const Scalar& scalar = *batch[i].scalar();
+        valid = scalar.is_valid;
+        element_size = UnboxScalar<Type>::Unbox(scalar).size();
+      } else {
+        const ArrayData& array = *batch[i].array();
+        valid = !array.MayHaveNulls() ||
+                BitUtil::GetBit(array.buffers[0]->data(), array.offset + index);
+        const offset_type* offsets = array.GetValues<offset_type>(1);
+        element_size = offsets[index + 1] - offsets[index];
+      }
+      if (i == num_args - 1) {
+        if (!valid) return -1;
+        if (num_non_null_args > 1) {
+          // Add separator size (only if there were values to join)
+          final_size += (num_non_null_args - 1) * element_size;
+        }
+        break;
+      }
+      if (!valid) {
+        switch (options.null_handling) {
+          case JoinOptions::EMIT_NULL:
+            return -1;
+          case JoinOptions::SKIP:
+            continue;
+          case JoinOptions::REPLACE:
+            element_size = options.null_replacement.size();
+            break;
+        }
+      }
+      num_non_null_args++;
+      final_size += element_size;
+    }
+    return final_size;
+  }
+};
+
 const FunctionDoc binary_join_doc(
     "Join a list of strings together with a `separator` to form a single string",
     ("Insert `separator` between `list` elements, and concatenate them.\n"
      "Any null input and any null `list` element emits a null output.\n"),
     {"list", "separator"});
 
+const FunctionDoc binary_join_element_wise_doc(
+    "Join string arguments into one, using the last argument as the separator",
+    ("Insert the last argument of `strings` between the rest of the elements, "
+     "and concatenate them.\n"
+     "Any null separator element emits a null output. Null elements either "
+     "emit a null (the default), are skipped, or replaced with a given string.\n"),
+    {"*strings"}, "JoinOptions");
+
+const auto kDefaultJoinOptions = JoinOptions::Defaults();
+
 template <typename ListType>
 void AddBinaryJoinForListType(ScalarFunction* func) {
   for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
@@ -3360,11 +3575,25 @@ void AddBinaryJoinForListType(ScalarFunction* func) {
 }
 
 void AddBinaryJoin(FunctionRegistry* registry) {
-  auto func =
-      std::make_shared<ScalarFunction>("binary_join", Arity::Binary(), &binary_join_doc);
-  AddBinaryJoinForListType<ListType>(func.get());
-  AddBinaryJoinForListType<LargeListType>(func.get());
-  DCHECK_OK(registry->AddFunction(std::move(func)));
+  {
+    auto func = std::make_shared<ScalarFunction>("binary_join", Arity::Binary(),
+                                                 &binary_join_doc);
+    AddBinaryJoinForListType<ListType>(func.get());
+    AddBinaryJoinForListType<LargeListType>(func.get());
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<ScalarFunction>(
+        "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1),
+        &binary_join_element_wise_doc, &kDefaultJoinOptions);
+    for (const auto& ty : BaseBinaryTypes()) {
+      DCHECK_OK(
+          func->AddKernel({InputType(ty)}, ty,
+                          GenerateTypeAgnosticVarBinaryBase<BinaryJoinElementWise>(ty),
+                          BinaryJoinElementWiseState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 }
 
 template <template <typename> class ExecFunctor>
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 606e774451c..ddc3a56f00f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -169,6 +169,47 @@ static void BinaryJoinArrayArray(benchmark::State& state) {
   });
 }
 
+static void BinaryJoinElementWise(benchmark::State& state,
+                                  SeparatorFactory make_separator) {
+  // Unfortunately benchmark is not 1:1 with BinaryJoin since BinaryJoin can join a
+  // varying number of inputs per output
+  const int64_t n_rows = 10000;
+  const int64_t n_cols = state.range(0);
+  const double null_probability = 0.02;
+
+  random::RandomArrayGenerator rng(kSeed);
+
+  DatumVector args;
+  ArrayVector strings;
+  int64_t total_values_length = 0;
+  for (int i = 0; i < n_cols; i++) {
+    auto arr = rng.String(n_rows, /*min_length=*/5, /*max_length=*/20, null_probability);
+    strings.push_back(arr);
+    args.emplace_back(arr);
+    total_values_length += checked_cast<const StringArray&>(*arr).total_values_length();
+  }
+  auto separator = make_separator(n_rows, null_probability);
+  args.emplace_back(separator);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("binary_join_element_wise", args));
+  }
+  state.SetBytesProcessed(state.iterations() * total_values_length);
+}
+
+static void BinaryJoinElementWiseArrayScalar(benchmark::State& state) {
+  BinaryJoinElementWise(state, [](int64_t n, double null_probability) -> Datum {
+    return ScalarFromJSON(utf8(), R"("--")");
+  });
+}
+
+static void BinaryJoinElementWiseArrayArray(benchmark::State& state) {
+  BinaryJoinElementWise(state, [](int64_t n, double null_probability) -> Datum {
+    random::RandomArrayGenerator rng(kSeed + 1);
+    return rng.String(n, /*min_length=*/0, /*max_length=*/4, null_probability);
+  });
+}
+
 BENCHMARK(AsciiLower);
 BENCHMARK(AsciiUpper);
 BENCHMARK(IsAlphaNumericAscii);
@@ -192,6 +233,8 @@ BENCHMARK(TrimManyUtf8);
 
 BENCHMARK(BinaryJoinArrayScalar);
 BENCHMARK(BinaryJoinArrayArray);
+BENCHMARK(BinaryJoinElementWiseArrayScalar)->RangeMultiplier(8)->Range(2, 128);
+BENCHMARK(BinaryJoinElementWiseArrayArray)->RangeMultiplier(8)->Range(2, 128);
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 2053dbaa971..6192e0a5dd7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -58,6 +58,26 @@ class BaseTestStringKernels : public ::testing::Test {
                             json_expected, options);
   }
 
+  void CheckVarArgsScalar(std::string func_name, std::string json_input,
+                          std::shared_ptr<DataType> out_ty, std::string json_expected,
+                          const FunctionOptions* options = nullptr) {
+    // CheckScalar (on arrays) checks scalar arguments individually,
+    // but this lets us test the all-scalar case explicitly
+    ScalarVector inputs;
+    std::shared_ptr<Array> args = ArrayFromJSON(type(), json_input);
+    for (int64_t i = 0; i < args->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(auto scalar, args->GetScalar(i));
+      inputs.push_back(std::move(scalar));
+    }
+    CheckScalar(func_name, inputs, ScalarFromJSON(out_ty, json_expected), options);
+  }
+
+  void CheckVarArgs(std::string func_name, const std::vector<Datum>& inputs,
+                    std::shared_ptr<DataType> out_ty, std::string json_expected,
+                    const FunctionOptions* options = nullptr) {
+    CheckScalar(func_name, inputs, ArrayFromJSON(out_ty, json_expected), options);
+  }
+
   std::shared_ptr<DataType> type() { return TypeTraits<TestType>::type_singleton(); }
 
   template <typename CType>
@@ -229,6 +249,105 @@ TYPED_TEST(TestBinaryKernels, CountSubstringIgnoreCase) {
 }
 #endif
 
+TYPED_TEST(TestBinaryKernels, BinaryJoinElementWise) {
+  const auto ty = this->type();
+  JoinOptions options;
+  JoinOptions options_skip(JoinOptions::SKIP);
+  JoinOptions options_replace(JoinOptions::REPLACE, "X");
+  // Scalar args, Scalar separator
+  this->CheckVarArgsScalar("binary_join_element_wise", R"([null])", ty, R"(null)",
+                           &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["-"])", ty, R"("")", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", "-"])", ty, R"("a")",
+                           &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", "b", "-"])", ty,
+                           R"("a-b")", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", "b", null])", ty,
+                           R"(null)", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "-"])", ty,
+                           R"(null)", &options);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["foo", "bar", "baz", "++"])",
+                           ty, R"("foo++bar++baz")", &options);
+
+  // Scalar args, Array separator
+  const auto sep = ArrayFromJSON(ty, R"([null, "-", "--"])");
+  const auto scalar1 = ScalarFromJSON(ty, R"("foo")");
+  const auto scalar2 = ScalarFromJSON(ty, R"("bar")");
+  const auto scalar3 = ScalarFromJSON(ty, R"("")");
+  const auto scalar_null = ScalarFromJSON(ty, R"(null)");
+  this->CheckVarArgs("binary_join_element_wise", {sep}, ty, R"([null, "", ""])",
+                     &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, sep}, ty,
+                     R"([null, "foo", "foo"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, scalar2, sep}, ty,
+                     R"([null, "foo-bar", "foo--bar"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, scalar_null, sep}, ty,
+                     R"([null, null, null])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {scalar1, scalar2, scalar3, sep}, ty,
+                     R"([null, "foo-bar-", "foo--bar--"])", &options);
+
+  // Array args, Scalar separator
+  const auto sep1 = ScalarFromJSON(ty, R"("-")");
+  const auto sep2 = ScalarFromJSON(ty, R"("--")");
+  const auto arr1 = ArrayFromJSON(ty, R"([null, "a", "bb", "ccc"])");
+  const auto arr2 = ArrayFromJSON(ty, R"(["d", null, "e", ""])");
+  const auto arr3 = ArrayFromJSON(ty, R"(["gg", null, "h", "iii"])");
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, scalar_null}, ty,
+                     R"([null, null, null, null])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, sep1}, ty,
+                     R"([null, null, "bb-e-h", "ccc--iii"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, sep2}, ty,
+                     R"([null, null, "bb--e--h", "ccc----iii"])", &options);
+
+  // Array args, Array separator
+  const auto sep3 = ArrayFromJSON(ty, R"(["-", "--", null, "---"])");
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, arr3, sep3}, ty,
+                     R"([null, null, null, "ccc------iii"])", &options);
+
+  // Mixed
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep3}, ty,
+                     R"([null, null, null, "ccc------bar"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, sep3}, ty,
+                     R"([null, null, null, null])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep1}, ty,
+                     R"([null, null, "bb-e-bar", "ccc--bar"])", &options);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, scalar_null},
+                     ty, R"([null, null, null, null])", &options);
+
+  // Skip
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", "-"])", ty,
+                           R"("a-b")", &options_skip);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", null])", ty,
+                           R"(null)", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep3}, ty,
+                     R"(["d-bar", "a--bar", null, "ccc------bar"])", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, sep3}, ty,
+                     R"(["d", "a", null, "ccc---"])", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep1}, ty,
+                     R"(["d-bar", "a-bar", "bb-e-bar", "ccc--bar"])", &options_skip);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, scalar_null},
+                     ty, R"([null, null, null, null])", &options_skip);
+
+  // Replace
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", "-"])", ty,
+                           R"("a-X-b")", &options_replace);
+  this->CheckVarArgsScalar("binary_join_element_wise", R"(["a", null, "b", null])", ty,
+                           R"(null)", &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep3}, ty,
+                     R"(["X-d-bar", "a--X--bar", null, "ccc------bar"])",
+                     &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, sep3}, ty,
+                     R"(["X-d-X", "a--X--X", null, "ccc------X"])", &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar2, sep1}, ty,
+                     R"(["X-d-bar", "a-X-bar", "bb-e-bar", "ccc--bar"])",
+                     &options_replace);
+  this->CheckVarArgs("binary_join_element_wise", {arr1, arr2, scalar_null, scalar_null},
+                     ty, R"([null, null, null, null])", &options_replace);
+
+  // Error cases
+  ASSERT_RAISES(Invalid, CallFunction("binary_join_element_wise", {}, &options));
+}
+
 template <typename TestType>
 class TestStringKernels : public BaseTestStringKernels<TestType> {};
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 91ee6bdf599..dfdd64d19c6 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -318,8 +318,8 @@ expanded for the purposes of comparison.
 +--------------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
 | Function names           | Arity      | Input types                                 | Output type         | Options class                         | Notes |
 +==========================+============+=============================================+=====================+=======================================+=======+
-| element_wise_max,        | Varargs    | Numeric and Temporal                        | Numeric or Temporal | :struct:`ElementWiseAggregateOptions` | \(1)  |
-| element_wise_min         |            |                                             |                     |                                       |       |
+| max_element_wise,        | Varargs    | Numeric and Temporal                        | Numeric or Temporal | :struct:`ElementWiseAggregateOptions` | \(1)  |
+| min_element_wise         |            |                                             |                     |                                       |       |
 +--------------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
 
 * \(1) By default, nulls are skipped (but the kernel can be configured to propagate nulls).
@@ -680,19 +680,25 @@ String component extraction
 String joining
 ~~~~~~~~~~~~~~
 
-This function does the inverse of string splitting.
+These functions do the inverse of string splitting.
 
-+-----------------+-----------+----------------------+----------------+-------------------+---------+
-| Function name   | Arity     | Input type 1         | Input type 2   | Output type       | Notes   |
-+=================+===========+======================+================+===================+=========+
-| binary_join     | Binary    | List of string-like  | String-like    | String-like       | \(1)    |
-+-----------------+-----------+----------------------+----------------+-------------------+---------+
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+| Function name            | Arity     | Input type 1          | Input type 2   | Output type       | Options class         | Notes   |
++==========================+===========+=======================+================+===================+=======================+=========+
+| binary_join              | Binary    | List of string-like   | String-like    | String-like       |                       | \(1)    |
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+| binary_join_element_wise | Varargs   | String-like (varargs) | String-like    | String-like       | :struct:`JoinOptions` | \(2)    |
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
 
 * \(1) The first input must be an array, while the second can be a scalar or array.
   Each list of values in the first input is joined using each second input
   as separator.  If any input list is null or contains a null, the corresponding
   output will be null.
 
+* \(2) All arguments are concatenated element-wise, with the last argument treated
+  as the separator (scalars are recycled in either case). Null separators emit
+  null. If any other argument is null, by default the corresponding output will be
+  null, but it can instead either be skipped or replaced with a given string.
 
 Slicing
 ~~~~~~~
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index dd722e44f05..80fcb2078f1 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -80,8 +80,8 @@ These functions take any number of arguments of a numeric or temporal type.
 .. autosummary::
    :toctree: ../generated/
 
-   element_wise_max
-   element_wise_min
+   max_element_wise
+   min_element_wise
 
 Logical Functions
 -----------------
@@ -159,6 +159,23 @@ String Splitting
    ascii_split_whitespace
    utf8_split_whitespace
 
+String Component Extraction
+---------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   extract_regex
+
+String Joining
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   binary_join
+   binary_join_element_wise
+
 String Transforms
 -----------------
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 104cd1bac1f..559a8a02b1c 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -667,6 +667,37 @@ class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
         self._set_options(skip_nulls)
 
 
+cdef class _JoinOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CJoinOptions] join_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.join_options.get()
+
+    def _set_options(self, null_handling, null_replacement):
+        cdef:
+            CJoinNullHandlingBehavior c_null_handling = \
+                CJoinNullHandlingBehavior_EMIT_NULL
+            c_string c_null_replacement = tobytes(null_replacement)
+        if null_handling == 'emit_null':
+            c_null_handling = CJoinNullHandlingBehavior_EMIT_NULL
+        elif null_handling == 'skip':
+            c_null_handling = CJoinNullHandlingBehavior_SKIP
+        elif null_handling == 'replace':
+            c_null_handling = CJoinNullHandlingBehavior_REPLACE
+        else:
+            raise ValueError(
+                '"{}" is not a valid null_handling'
+                .format(null_handling))
+        self.join_options.reset(
+            new CJoinOptions(c_null_handling, c_null_replacement))
+
+
+class JoinOptions(_JoinOptions):
+    def __init__(self, null_handling='emit_null', null_replacement=''):
+        self._set_options(null_handling, null_replacement)
+
+
 cdef class _MatchSubstringOptions(FunctionOptions):
     cdef:
         unique_ptr[CMatchSubstringOptions] match_substring_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index b8bd9e65f17..b258b551f02 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -36,6 +36,7 @@
     ExtractRegexOptions,
     FilterOptions,
     IndexOptions,
+    JoinOptions,
     MatchSubstringOptions,
     ModeOptions,
     PartitionNthOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 072062385ca..0a8c7494989 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1796,6 +1796,22 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CElementWiseAggregateOptions(c_bool skip_nulls)
         c_bool skip_nulls
 
+    enum CJoinNullHandlingBehavior \
+            "arrow::compute::JoinOptions::NullHandlingBehavior":
+        CJoinNullHandlingBehavior_EMIT_NULL \
+            "arrow::compute::JoinOptions::EMIT_NULL"
+        CJoinNullHandlingBehavior_SKIP \
+            "arrow::compute::JoinOptions::SKIP"
+        CJoinNullHandlingBehavior_REPLACE \
+            "arrow::compute::JoinOptions::REPLACE"
+
+    cdef cppclass CJoinOptions \
+            "arrow::compute::JoinOptions"(CFunctionOptions):
+        CJoinOptions(CJoinNullHandlingBehavior null_handling,
+                     c_string null_replacement)
+        CJoinNullHandlingBehavior null_handling
+        c_string null_replacement
+
     cdef cppclass CMatchSubstringOptions \
             "arrow::compute::MatchSubstringOptions"(CFunctionOptions):
         CMatchSubstringOptions(c_string pattern, c_bool ignore_case)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 1ed582db831..efe2e6be2f8 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -766,6 +766,36 @@ def test_binary_join():
     assert pc.binary_join(ar_list, separator_array).equals(expected)
 
 
+def test_binary_join_element_wise():
+    null = pa.scalar(None, type=pa.string())
+    arrs = [[None, 'a', 'b'], ['c', None, 'd'], [None, '-', '--']]
+    assert pc.binary_join_element_wise(*arrs).to_pylist() == \
+        [None, None, 'b--d']
+    assert pc.binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
+    assert pc.binary_join_element_wise('a', null, '-').as_py() is None
+    assert pc.binary_join_element_wise('a', 'b', null).as_py() is None
+
+    skip = pc.JoinOptions('skip')
+    assert pc.binary_join_element_wise(*arrs, options=skip).to_pylist() == \
+        [None, 'a', 'b--d']
+    assert pc.binary_join_element_wise(
+        'a', 'b', '-', options=skip).as_py() == 'a-b'
+    assert pc.binary_join_element_wise(
+        'a', null, '-', options=skip).as_py() == 'a'
+    assert pc.binary_join_element_wise(
+        'a', 'b', null, options=skip).as_py() is None
+
+    replace = pc.JoinOptions('replace', null_replacement='spam')
+    assert pc.binary_join_element_wise(*arrs, options=replace).to_pylist() == \
+        [None, 'a-spam', 'b--d']
+    assert pc.binary_join_element_wise(
+        'a', 'b', '-', options=replace).as_py() == 'a-b'
+    assert pc.binary_join_element_wise(
+        'a', null, '-', options=replace).as_py() == 'a-spam'
+    assert pc.binary_join_element_wise(
+        'a', 'b', null, options=replace).as_py() is None
+
+
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_take(ty, values):
     arr = pa.array(values, type=ty)
@@ -1437,35 +1467,35 @@ def test_fill_null_segfault():
     assert result == pa.array([0], pa.int8())
 
 
-def test_elementwise_min_max():
+def test_min_max_element_wise():
     arr1 = pa.array([1, 2, 3])
     arr2 = pa.array([3, 1, 2])
     arr3 = pa.array([2, 3, None])
 
-    result = pc.element_wise_max(arr1, arr2)
+    result = pc.max_element_wise(arr1, arr2)
     assert result == pa.array([3, 2, 3])
-    result = pc.element_wise_min(arr1, arr2)
+    result = pc.min_element_wise(arr1, arr2)
     assert result == pa.array([1, 1, 2])
 
-    result = pc.element_wise_max(arr1, arr2, arr3)
+    result = pc.max_element_wise(arr1, arr2, arr3)
     assert result == pa.array([3, 3, 3])
-    result = pc.element_wise_min(arr1, arr2, arr3)
+    result = pc.min_element_wise(arr1, arr2, arr3)
     assert result == pa.array([1, 1, 2])
 
     # with specifying the option
-    result = pc.element_wise_max(arr1, arr3, skip_nulls=True)
+    result = pc.max_element_wise(arr1, arr3, skip_nulls=True)
     assert result == pa.array([2, 3, 3])
-    result = pc.element_wise_min(arr1, arr3, skip_nulls=True)
+    result = pc.min_element_wise(arr1, arr3, skip_nulls=True)
     assert result == pa.array([1, 2, 3])
-    result = pc.element_wise_max(
+    result = pc.max_element_wise(
         arr1, arr3, options=pc.ElementWiseAggregateOptions())
     assert result == pa.array([2, 3, 3])
-    result = pc.element_wise_min(
+    result = pc.min_element_wise(
         arr1, arr3, options=pc.ElementWiseAggregateOptions())
     assert result == pa.array([1, 2, 3])
 
     # not skipping nulls
-    result = pc.element_wise_max(arr1, arr3, skip_nulls=False)
+    result = pc.max_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([2, 3, None])
-    result = pc.element_wise_min(arr1, arr3, skip_nulls=False)
+    result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([1, 2, None])

From dbcd0d944ce9cbf30e2e95468276a89450ac97cb Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 16 Jun 2021 16:18:07 -0700
Subject: [PATCH 426/719] ARROW-11705: [R] Support scalar value recycling in
 RecordBatch/Table$create()

This also adds missing spaces in some unrelated R files

Closes #10269 from thisisnic/ARROW-11705_scalar_recycling

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/arrow-datum.R                     |  4 +--
 r/R/arrow-package.R                   |  2 +-
 r/R/arrow-tabular.R                   |  4 +--
 r/R/arrowExports.R                    |  4 +--
 r/R/chunked-array.R                   |  2 +-
 r/R/compression.R                     |  4 +--
 r/R/compute.R                         |  4 +--
 r/R/csv.R                             |  6 ++--
 r/R/enums.R                           |  4 +--
 r/R/filesystem.R                      |  2 +-
 r/R/metadata.R                        |  2 +-
 r/R/parquet.R                         |  4 +--
 r/R/record-batch.R                    |  3 ++
 r/R/scalar.R                          |  2 +-
 r/R/table.R                           | 19 ++++++-----
 r/R/util.R                            | 45 +++++++++++++++++++++++++
 r/data-raw/codegen.R                  |  8 ++---
 r/extra-tests/helpers.R               |  4 +--
 r/extra-tests/write-files.R           |  2 +-
 r/man/recycle_scalars.Rd              | 18 ++++++++++
 r/man/repeat_value_as_array.Rd        | 20 +++++++++++
 r/src/arrowExports.cpp                | 11 +++---
 r/src/scalar.cpp                      |  4 +--
 r/tests/testthat/helper-expectation.R |  6 ++--
 r/tests/testthat/test-RecordBatch.R   | 46 +++++++++++++++++++++----
 r/tests/testthat/test-Table.R         | 48 +++++++++++++++++++++++++--
 r/tests/testthat/test-dataset.R       |  2 +-
 r/tools/winlibs.R                     |  6 ++--
 28 files changed, 226 insertions(+), 60 deletions(-)
 create mode 100644 r/man/recycle_scalars.Rd
 create mode 100644 r/man/repeat_value_as_array.Rd

diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index 3be8d75af0b..8becc37daf2 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -128,7 +128,7 @@ eval_array_expression <- function(FUN,
 }
 
 #' @export
-na.omit.ArrowDatum <- function(object, ...){
+na.omit.ArrowDatum <- function(object, ...) {
   object$Filter(!is.na(object))
 }
 
@@ -136,7 +136,7 @@ na.omit.ArrowDatum <- function(object, ...){
 na.exclude.ArrowDatum <- na.omit.ArrowDatum
 
 #' @export
-na.fail.ArrowDatum <- function(object, ...){
+na.fail.ArrowDatum <- function(object, ...) {
   if (object$null_count > 0) {
     stop("missing values in object", call. = FALSE)
   }
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 684382039f1..d2bf81cf5ee 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -279,7 +279,7 @@ ArrowObject <- R6Class("ArrowObject",
         class_title <- class(self)[[1]]
       }
       cat(class_title, "\n", sep = "")
-      if (!is.null(self$ToString)){
+      if (!is.null(self$ToString)) {
         cat(self$ToString(), "\n", sep = "")
       }
       invisible(self)
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index f5535f9ac20..440dcea5994 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -212,7 +212,7 @@ head.ArrowTabular <- head.ArrowDatum
 tail.ArrowTabular <- tail.ArrowDatum
 
 #' @export
-na.fail.ArrowTabular <- function(object, ...){
+na.fail.ArrowTabular <- function(object, ...) {
   for (col in seq_len(object$num_columns)) {
     if (object$column(col - 1L)$null_count > 0) {
       stop("missing values in object", call. = FALSE)
@@ -222,7 +222,7 @@ na.fail.ArrowTabular <- function(object, ...){
 }
 
 #' @export
-na.omit.ArrowTabular <- function(object, ...){
+na.omit.ArrowTabular <- function(object, ...) {
   not_na <- map(object$columns, ~call_function("is_valid", .x))
   not_na_agg <- Reduce("&", not_na)
   object$Filter(not_na_agg)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 45a0ea69c59..577773c42bd 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1548,8 +1548,8 @@ Scalar__as_vector <- function(scalar){
     .Call(`_arrow_Scalar__as_vector`, scalar)
 }
 
-MakeArrayFromScalar <- function(scalar){
-    .Call(`_arrow_MakeArrayFromScalar`, scalar)
+MakeArrayFromScalar <- function(scalar, n){
+    .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
 }
 
 Scalar__is_valid <- function(s){
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index fac1eeba2b1..c58e5ac94f9 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -83,7 +83,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
     type_id = function() ChunkedArray__type(self)$id,
     chunk = function(i) Array$create(ChunkedArray__chunk(self, i)),
     as_vector = function() ChunkedArray__as_vector(self),
-    Slice = function(offset, length = NULL){
+    Slice = function(offset, length = NULL) {
       if (is.null(length)) {
         ChunkedArray__Slice1(self, offset)
       } else {
diff --git a/r/R/compression.R b/r/R/compression.R
index 8fd709f4fda..499a75c83e1 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -99,7 +99,7 @@ compression_from_name <- function(name) {
 #' @export
 #' @include arrow-package.R
 CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream)
-CompressedOutputStream$create <- function(stream, codec = "gzip", compression_level = NA){
+CompressedOutputStream$create <- function(stream, codec = "gzip", compression_level = NA) {
   codec <- Codec$create(codec, compression_level = compression_level)
   if (is.string(stream)) {
     stream <- FileOutputStream$create(stream)
@@ -113,7 +113,7 @@ CompressedOutputStream$create <- function(stream, codec = "gzip", compression_le
 #' @format NULL
 #' @export
 CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream)
-CompressedInputStream$create <- function(stream, codec = "gzip", compression_level = NA){
+CompressedInputStream$create <- function(stream, codec = "gzip", compression_level = NA) {
   codec <- Codec$create(codec, compression_level = compression_level)
   if (is.string(stream)) {
     stream <- ReadableFile$create(stream)
diff --git a/r/R/compute.R b/r/R/compute.R
index 4d36f6057b6..5a00e884980 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -202,7 +202,7 @@ unique.ArrowDatum <- function(x, incomparables = FALSE, ...) {
 }
 
 #' @export
-any.ArrowDatum <- function(..., na.rm = FALSE){
+any.ArrowDatum <- function(..., na.rm = FALSE) {
   
   a <- collect_arrays_from_dots(list(...))
   result <- call_function("any", a)
@@ -217,7 +217,7 @@ any.ArrowDatum <- function(..., na.rm = FALSE){
 }
 
 #' @export
-all.ArrowDatum <- function(..., na.rm = FALSE){
+all.ArrowDatum <- function(..., na.rm = FALSE) {
   
   a <- collect_arrays_from_dots(list(...))
   result <- call_function("all", a)
diff --git a/r/R/csv.R b/r/R/csv.R
index 2708a5370f0..1312a2676ae 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -414,7 +414,7 @@ CsvReadOptions$create <- function(use_threads = option_use_threads(),
 #' @rdname CsvReadOptions
 #' @export
 CsvWriteOptions <- R6Class("CsvWriteOptions", inherit = ArrowObject)
-CsvWriteOptions$create <- function(include_header = TRUE, batch_size = 1024L){
+CsvWriteOptions$create <- function(include_header = TRUE, batch_size = 1024L) {
   assert_that(is_integerish(batch_size, n = 1, finite = TRUE), batch_size > 0)
   csv___WriteOptions__initialize(
     list(
@@ -637,9 +637,9 @@ write_csv_arrow <- function(x,
     on.exit(sink$close())
   }
   
-  if(inherits(x, "RecordBatch")){
+  if (inherits(x, "RecordBatch")) {
     csv___WriteCSV__RecordBatch(x, write_options, sink)
-  } else if(inherits(x, "Table")){
+  } else if (inherits(x, "Table")) {
     csv___WriteCSV__Table(x, write_options, sink)
   }
   
diff --git a/r/R/enums.R b/r/R/enums.R
index ae44ccf2cad..4271f2ad138 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -16,11 +16,11 @@
 # under the License.
 
 #' @export
-`print.arrow-enum` <- function(x, ...){
+`print.arrow-enum` <- function(x, ...) {
   NextMethod()
 }
 
-enum <- function(class, ..., .list = list(...)){
+enum <- function(class, ..., .list = list(...)) {
   structure(
     .list,
     class = c(class, "arrow-enum")
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index 6761acab30e..283fbbb0ae5 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -203,7 +203,7 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
     GetFileInfo = function(x) {
       if (inherits(x, "FileSelector")) {
         fs___FileSystem__GetTargetInfos_FileSelector(self, x)
-      } else if (is.character(x)){
+      } else if (is.character(x)) {
         fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x))
       } else {
         abort("incompatible type for FileSystem$GetFileInfo()")
diff --git a/r/R/metadata.R b/r/R/metadata.R
index d3e5e2150bb..408c2214a31 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -59,7 +59,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
           x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
         }
       }
-    } else if(is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
+    } else if (is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
       x <- map2(x, columns_metadata, function(.x, .y) {
         apply_arrow_r_metadata(.x, .y)
       })
diff --git a/r/R/parquet.R b/r/R/parquet.R
index a9aef2c4d0d..3006fcbbe50 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -296,7 +296,7 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe
         parquet___ArrowWriterProperties___Builder__set_compressions
       )
     },
-    set_compression_level = function(table, compression_level){
+    set_compression_level = function(table, compression_level) {
       # cast to integer but keep names
       compression_level <- set_names(as.integer(compression_level), names(compression_level))
       private$.set(table, compression_level,
@@ -558,7 +558,7 @@ ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties",
   ),
   active = list(
     use_threads = function(use_threads) {
-      if(missing(use_threads)) {
+      if (missing(use_threads)) {
         parquet___arrow___ArrowReaderProperties__get_use_threads(self)
       } else {
         parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads)
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index 1e41d6533a8..0ba6b4bd45d 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -162,6 +162,9 @@ RecordBatch$create <- function(..., schema = NULL) {
     return(dplyr::group_by(out, !!!dplyr::groups(arrays[[1]])))
   }
   
+  # If any arrays are length 1, recycle them
+  arrays <- recycle_scalars(arrays)
+
   # TODO: should this also assert that they're all Arrays?
   RecordBatch__from_arrays(schema, arrays)
 }
diff --git a/r/R/scalar.R b/r/R/scalar.R
index 01a50b0f358..6e5e63cee3e 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -58,7 +58,7 @@ Scalar <- R6Class("Scalar",
     ToString = function() Scalar__ToString(self),
     type_id = function() Scalar__type(self)$id,
     as_vector = function() Scalar__as_vector(self),
-    as_array = function() MakeArrayFromScalar(self),
+    as_array = function(length = 1L) MakeArrayFromScalar(self, as.integer(length)),
     Equals = function(other, ...) {
       inherits(other, "Scalar") && Scalar__Equals(self, other)
     },
diff --git a/r/R/table.R b/r/R/table.R
index 09be952af61..3e5c52d9624 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -166,18 +166,21 @@ Table$create <- function(..., schema = NULL) {
     names(dots) <- rep_len("", length(dots))
   }
   stopifnot(length(dots) > 0)
+  
+  if (all_record_batches(dots)) {
+    return(Table__from_record_batches(dots, schema))
+  }
+
+  # If any arrays are length 1, recycle them  
+  dots <- recycle_scalars(dots)
 
+  out <- Table__from_dots(dots, schema, option_use_threads())
+  
   # Preserve any grouping
   if (length(dots) == 1 && inherits(dots[[1]], "grouped_df")) {
-    out <- Table__from_dots(dots, schema, option_use_threads())
-    return(dplyr::group_by(out, !!!dplyr::groups(dots[[1]])))
-  }
-
-  if (all_record_batches(dots)) {
-    Table__from_record_batches(dots, schema)
-  } else {
-    Table__from_dots(dots, schema, option_use_threads())
+    out <- dplyr::group_by(out, !!!dplyr::groups(dots[[1]]))
   }
+  out
 }
 
 #' @export
diff --git a/r/R/util.R b/r/R/util.R
index 8d1f51bd079..884c346e503 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -139,3 +139,48 @@ attr(is_writable_table, "fail") <- function(call, env){
   )
 }
 
+#' Recycle scalar values in a list of arrays
+#' 
+#' @param arrays List of arrays
+#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled 
+#' @keywords internal
+recycle_scalars <- function(arrays){
+  # Get lengths of items in arrays
+  arr_lens <- map_int(arrays, NROW)
+  
+  is_scalar <- arr_lens == 1
+  
+  if (length(arrays) > 1 && any(is_scalar) && !all(is_scalar)) {
+    
+    # Recycling not supported for tibbles and data.frames
+    if (all(map_lgl(arrays, ~inherits(.x, "data.frame")))) {
+      
+      abort(c(
+          "All input tibbles or data.frames must have the same number of rows",
+          x = paste(
+            "Number of rows in longest and shortest inputs:",
+            oxford_paste(c(max(arr_lens), min(arr_lens)))
+          )
+      ))
+    }
+    
+    max_array_len <- max(arr_lens)
+    arrays[is_scalar] <- lapply(arrays[is_scalar], repeat_value_as_array, max_array_len)
+  }
+  arrays
+}
+
+#' Take an object of length 1 and repeat it.
+#' 
+#' @param object Object of length 1 to be repeated - vector, `Scalar`, `Array`, or `ChunkedArray`
+#' @param n Number of repetitions
+#' 
+#' @return `Array` of length `n`
+#' 
+#' @keywords internal
+repeat_value_as_array <- function(object, n) {
+  if (inherits(object, "ChunkedArray")) {
+    return(Scalar$create(object$chunks[[1]])$as_array(n))
+  }
+  return(Scalar$create(object)$as_array(n))
+}
\ No newline at end of file
diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R
index ad4514a3124..9b25cb1842c 100644
--- a/r/data-raw/codegen.R
+++ b/r/data-raw/codegen.R
@@ -67,13 +67,13 @@ get_exported_functions <- function(decorations, export_tag) {
 
 glue_collapse_data <- function(data, ..., sep = ", ", last = "") {
   res <- glue_collapse(glue_data(data, ...), sep = sep, last = last)
-  if(length(res) == 0) res <- ""
+  if (length(res) == 0) res <- ""
   res
 }
 
 wrap_call <- function(name, return_type, args) {
   call <- glue::glue('{name}({list_params})', list_params = glue_collapse_data(args, "{name}"))
-  if(return_type == "void") {
+  if (return_type == "void") {
     glue::glue("\t{call};\n\treturn R_NilValue;", .trim = FALSE)
   } else {
     glue::glue("\treturn cpp11::as_sexp({call});")
@@ -149,7 +149,7 @@ cpp_functions_definitions <- arrow_exports %>%
       sep = "\n",
       real_params = glue_collapse_data(args, "{type} {name}"),
       input_params = glue_collapse_data(args, "\tarrow::r::Input<{type}>::type {name}({name}_sexp);", sep = "\n"),
-      return_line = if(nrow(args)) "\n" else "")
+      return_line = if (nrow(args)) "\n" else "")
 
     glue::glue('
     // {basename(file)}
@@ -162,7 +162,7 @@ cpp_functions_definitions <- arrow_exports %>%
 
 cpp_functions_registration <- arrow_exports %>%
   select(name, return_type, args) %>%
-  pmap_chr(function(name, return_type, args){
+  pmap_chr(function(name, return_type, args) {
     glue('\t\t{{ "_arrow_{name}", (DL_FUNC) &_arrow_{name}, {nrow(args)}}}, ')
   }) %>%
   glue_collapse(sep = "\n")
diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R
index af57d45e5d2..3fb450ee332 100644
--- a/r/extra-tests/helpers.R
+++ b/r/extra-tests/helpers.R
@@ -24,13 +24,13 @@ if_version_less_than <- function(version) {
 }
 
 skip_if_version_less_than <- function(version, msg) {
-  if(if_version(version, `<`)) {
+  if (if_version(version, `<`)) {
     skip(msg)
   }
 }
 
 skip_if_version_equals <- function(version, msg) {
-  if(if_version(version, `==`)) {
+  if (if_version(version, `==`)) {
     skip(msg)
   }
 }
diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R
index 75889b61407..e11405d67bf 100644
--- a/r/extra-tests/write-files.R
+++ b/r/extra-tests/write-files.R
@@ -26,7 +26,7 @@ source("tests/testthat/helper-data.R")
 write_parquet(example_with_metadata, "extra-tests/files/ex_data.parquet")
 
 for (comp in c("lz4", "uncompressed", "zstd")) {
-  if(!codec_is_available(comp)) break
+  if (!codec_is_available(comp)) break
 
   name <- paste0("extra-tests/files/ex_data_", comp, ".feather")
   write_feather(example_with_metadata, name, compression = comp)
diff --git a/r/man/recycle_scalars.Rd b/r/man/recycle_scalars.Rd
new file mode 100644
index 00000000000..3d97ecfd79f
--- /dev/null
+++ b/r/man/recycle_scalars.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{recycle_scalars}
+\alias{recycle_scalars}
+\title{Recycle scalar values in a list of arrays}
+\usage{
+recycle_scalars(arrays)
+}
+\arguments{
+\item{arrays}{List of arrays}
+}
+\value{
+List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled
+}
+\description{
+Recycle scalar values in a list of arrays
+}
+\keyword{internal}
diff --git a/r/man/repeat_value_as_array.Rd b/r/man/repeat_value_as_array.Rd
new file mode 100644
index 00000000000..a4937326efa
--- /dev/null
+++ b/r/man/repeat_value_as_array.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{repeat_value_as_array}
+\alias{repeat_value_as_array}
+\title{Take an object of length 1 and repeat it.}
+\usage{
+repeat_value_as_array(object, n)
+}
+\arguments{
+\item{object}{Object of length 1 to be repeated - vector, \code{Scalar}, \code{Array}, or \code{ChunkedArray}}
+
+\item{n}{Number of repetitions}
+}
+\value{
+\code{Array} of length \code{n}
+}
+\description{
+Take an object of length 1 and repeat it.
+}
+\keyword{internal}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 2024483f47d..024e5c58b0e 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -6091,15 +6091,16 @@ extern "C" SEXP _arrow_Scalar__as_vector(SEXP scalar_sexp){
 
 // scalar.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Array> MakeArrayFromScalar(const std::shared_ptr<arrow::Scalar>& scalar);
-extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp){
+std::shared_ptr<arrow::Array> MakeArrayFromScalar(const std::shared_ptr<arrow::Scalar>& scalar, int n);
+extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp, SEXP n_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Scalar>&>::type scalar(scalar_sexp);
-	return cpp11::as_sexp(MakeArrayFromScalar(scalar));
+	arrow::r::Input<int>::type n(n_sexp);
+	return cpp11::as_sexp(MakeArrayFromScalar(scalar, n));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp){
+extern "C" SEXP _arrow_MakeArrayFromScalar(SEXP scalar_sexp, SEXP n_sexp){
 	Rf_error("Cannot call MakeArrayFromScalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -7279,7 +7280,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, 
 		{ "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, 
 		{ "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, 
-		{ "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 1}, 
+		{ "_arrow_MakeArrayFromScalar", (DL_FUNC) &_arrow_MakeArrayFromScalar, 2}, 
 		{ "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, 
 		{ "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, 
 		{ "_arrow_Scalar__Equals", (DL_FUNC) &_arrow_Scalar__Equals, 2}, 
diff --git a/r/src/scalar.cpp b/r/src/scalar.cpp
index 057e587e7eb..5450a6f0ab7 100644
--- a/r/src/scalar.cpp
+++ b/r/src/scalar.cpp
@@ -70,8 +70,8 @@ SEXP Scalar__as_vector(const std::shared_ptr<arrow::Scalar>& scalar) {
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Array> MakeArrayFromScalar(
-    const std::shared_ptr<arrow::Scalar>& scalar) {
-  return ValueOrStop(arrow::MakeArrayFromScalar(*scalar, 1, gc_memory_pool()));
+    const std::shared_ptr<arrow::Scalar>& scalar, int n) {
+  return ValueOrStop(arrow::MakeArrayFromScalar(*scalar, n, gc_memory_pool()));
 }
 
 // [[arrow::export]]
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 5b6958a9a7a..b815515a4fa 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -16,7 +16,7 @@
 # under the License.
 
 expect_as_vector <- function(x, y, ignore_attr = FALSE, ...) {
-  expect_fun <- if(ignore_attr){
+  expect_fun <- if (ignore_attr) {
     expect_equivalent
   } else {
     expect_equal
@@ -28,7 +28,7 @@ expect_data_frame <- function(x, y, ...) {
   expect_equal(as.data.frame(x), y, ...)
 }
 
-expect_r6_class <- function(object, class){
+expect_r6_class <- function(object, class) {
   expect_s3_class(object, class)
   expect_s3_class(object, "R6")
 }
@@ -255,7 +255,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
   }
 }
 
-split_vector_as_list <- function(vec){
+split_vector_as_list <- function(vec) {
   vec_split <- length(vec) %/% 2
   vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
   vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index beb1306ab4f..6617805db54 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-
 test_that("RecordBatch", {
   # Note that we're reusing `tbl` and `batch` throughout the tests in this file
   tbl <- tibble::tibble(
@@ -415,14 +414,50 @@ test_that("record_batch() handles null type (ARROW-7064)", {
   expect_equivalent(batch$schema,  schema(a = int32(), n = null()))
 })
 
-test_that("record_batch() scalar recycling", {
-  skip("Not implemented (ARROW-11705)")
+test_that("record_batch() scalar recycling with vectors", {
   expect_data_frame(
     record_batch(a = 1:10, b = 5),
     tibble::tibble(a = 1:10, b = 5)
   )
 })
 
+test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+  
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+  
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+  
+})
+
+test_that("record_batch() no recycling with tibbles", {
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+  
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+})
+
 test_that("RecordBatch$Equals", {
   df <- tibble::tibble(x = 1:10, y = letters[1:10])
   a <- record_batch(df)
@@ -435,7 +470,7 @@ test_that("RecordBatch$Equals", {
 test_that("RecordBatch$Equals(check_metadata)", {
   df <- tibble::tibble(x = 1:2, y = c("a", "b"))
   rb1 <- record_batch(df)
-  rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some="metadata")))
+  rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some = "metadata")))
 
   expect_r6_class(rb1, "RecordBatch")
   expect_r6_class(rb2, "RecordBatch")
@@ -467,8 +502,7 @@ test_that("RecordBatch name assignment", {
 
 test_that("record_batch() with different length arrays", {
   msg <- "All arrays must have the same length"
-  expect_error(record_batch(a=1:5, b = 42), msg)
-  expect_error(record_batch(a=1:5, b = 1:6), msg)
+  expect_error(record_batch(a = 1:5, b = 1:6), msg)
 })
 
 test_that("Handling string data with embedded nuls", {
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 1f9628859d0..6dd36b248ec 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-
 test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
   tbl <- tibble::tibble(
     int = 1:10, dbl = as.numeric(1:10),
@@ -471,8 +470,51 @@ test_that("Table name assignment", {
 
 test_that("Table$create() with different length columns", {
   msg <- "All columns must have the same length"
-  expect_error(Table$create(a=1:5, b = 42), msg)
-  expect_error(Table$create(a=1:5, b = 1:6), msg)
+  expect_error(Table$create(a = 1:5, b = 1:6), msg)
+})
+
+test_that("Table$create() scalar recycling with vectors", {
+  expect_data_frame(
+    Table$create(a = 1:10, b = 5),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+  
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+  
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+  
+})
+
+test_that("Table$create() no recycling with tibbles", {
+  expect_error(
+    Table$create(
+      tibble::tibble(a = 1:10, b = 5),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+  
+  expect_error(
+    Table$create(
+      tibble::tibble(a = 1:10, b = 5),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
 })
 
 test_that("ARROW-11769 - grouping preserved in table creation", {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index d84ed03c2d2..ad3e7c30f1f 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -90,7 +90,7 @@ test_that("Setup (putting data in the dir)", {
   expect_length(dir(tsv_dir, recursive = TRUE), 2)
 })
 
-if(arrow_with_parquet()) {
+if (arrow_with_parquet()) {
   files <- c(
     file.path(dataset_dir, 1, "file1.parquet", fsep = "/"),
     file.path(dataset_dir, 2, "file2.parquet", fsep = "/")
diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R
index f90becb7649..ccaa5c95d87 100644
--- a/r/tools/winlibs.R
+++ b/r/tools/winlibs.R
@@ -17,12 +17,12 @@
 
 args <- commandArgs(TRUE)
 VERSION <- args[1]
-if(!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))){
-  if(length(args) > 1){
+if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) {
+  if (length(args) > 1) {
     # Arg 2 would be the path/to/lib.zip
     localfile <- args[2]
     cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile))
-    if(!file.exists(localfile)){
+    if (!file.exists(localfile)) {
       cat(sprintf("*** %s does not exist; build will fail\n", localfile))
     }
     file.copy(localfile, "lib.zip")

From d5a2aa2ffb1c2fc4f3ca48c829fcdba80ec67916 Mon Sep 17 00:00:00 2001
From: Jinpeng Zhou <jinpengz@google.com>
Date: Thu, 17 Jun 2021 00:20:28 -0700
Subject: [PATCH 427/719] PARQUET-2056: [C++] Add ability for retrieving
 dictionary and indices separately for ColumnReader

In some contexts it is useful to be able to retrieve encoding information separately instead of decoding. This introduces new apis in RowGroupReader, ColumnReader, TypedColumnReader, and DictDecoder to support reading batches of dictionary indices. Given that a column chunk only has one dictionary page, the dictionary is read along with the 1st batch.

Thanks.

Closes #10537 from zjpzlz/expose-encodings

Authored-by: Jinpeng Zhou <jinpengz@google.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 cpp/src/parquet/column_reader.cc      | 132 ++++++++++++++++++++------
 cpp/src/parquet/column_reader.h       |  43 +++++++++
 cpp/src/parquet/column_reader_test.cc |  86 +++++++++++++++++
 cpp/src/parquet/encoding.cc           |  13 +++
 cpp/src/parquet/encoding.h            |  18 ++++
 cpp/src/parquet/file_reader.cc        |  39 ++++++++
 cpp/src/parquet/file_reader.h         |  14 +++
 cpp/src/parquet/reader_test.cc        |  78 +++++++++++++++
 cpp/src/parquet/types.h               |   9 ++
 9 files changed, 404 insertions(+), 28 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index ec205f3d3f9..047d99fed9a 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -823,6 +823,9 @@ class ColumnReaderImplBase {
   /// DictionaryRecordReader
   bool new_dictionary_;
 
+  // The exposed encoding
+  ExposedEncoding exposed_encoding_ = ExposedEncoding::NO_ENCODING;
+
   // Map of encoding type to the respective decoder object. For example, a
   // column chunk's data pages may include both dictionary-encoded and
   // plain-encoded data.
@@ -861,8 +864,108 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType>,
   Type::type type() const override { return this->descr_->physical_type(); }
 
   const ColumnDescriptor* descr() const override { return this->descr_; }
+
+  ExposedEncoding GetExposedEncoding() override { return this->exposed_encoding_; };
+
+  int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, int32_t* indices,
+                                  int64_t* indices_read, const T** dict,
+                                  int32_t* dict_len) override;
+
+ protected:
+  void SetExposedEncoding(ExposedEncoding encoding) override {
+    this->exposed_encoding_ = encoding;
+  }
+
+ private:
+  // Read dictionary indices. Similar to ReadValues but decode data to dictionary indices.
+  // This function is called only by ReadBatchWithDictionary().
+  int64_t ReadDictionaryIndices(int64_t indices_to_read, int32_t* indices) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    return decoder->DecodeIndices(static_cast<int>(indices_to_read), indices);
+  }
+
+  // Get dictionary. The dictionary should have been set by SetDict(). The dictionary is
+  // owned by the internal decoder and is destroyed when the reader is destroyed. This
+  // function is called only by ReadBatchWithDictionary() after dictionary is configured.
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) {
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    decoder->GetDictionary(dictionary, dictionary_length);
+  }
+
+  // Read definition and repetition levels. Also return the number of definition levels
+  // and number of values to read. This function is called before reading values.
+  void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                  int64_t* num_def_levels, int64_t* values_to_read) {
+    batch_size =
+        std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
+
+    // If the field is required and non-repeated, there are no definition levels
+    if (this->max_def_level_ > 0 && def_levels != nullptr) {
+      *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
+      // TODO(wesm): this tallying of values-to-decode can be performed with better
+      // cache-efficiency if fused with the level decoding.
+      for (int64_t i = 0; i < *num_def_levels; ++i) {
+        if (def_levels[i] == this->max_def_level_) {
+          ++(*values_to_read);
+        }
+      }
+    } else {
+      // Required field, read all values
+      *values_to_read = batch_size;
+    }
+
+    // Not present for non-repeated fields
+    if (this->max_rep_level_ > 0 && rep_levels != nullptr) {
+      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
+      if (def_levels != nullptr && *num_def_levels != num_rep_levels) {
+        throw ParquetException("Number of decoded rep / def levels did not match");
+      }
+    }
+  }
 };
 
+template <typename DType>
+int64_t TypedColumnReaderImpl<DType>::ReadBatchWithDictionary(
+    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, int32_t* indices,
+    int64_t* indices_read, const T** dict, int32_t* dict_len) {
+  bool has_dict_output = dict != nullptr && dict_len != nullptr;
+  // Similar logic as ReadValues to get pages.
+  if (!HasNext()) {
+    *indices_read = 0;
+    if (has_dict_output) {
+      *dict = nullptr;
+      *dict_len = 0;
+    }
+    return 0;
+  }
+
+  // Verify the current data page is dictionary encoded.
+  if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+    std::stringstream ss;
+    ss << "Data page is not dictionary encoded. Encoding: "
+       << EncodingToString(this->current_encoding_);
+    throw ParquetException(ss.str());
+  }
+
+  // Get dictionary pointer and length.
+  if (has_dict_output) {
+    GetDictionary(dict, dict_len);
+  }
+
+  // Similar logic as ReadValues to get def levels and rep levels.
+  int64_t num_def_levels = 0;
+  int64_t indices_to_read = 0;
+  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &indices_to_read);
+
+  // Read dictionary indices.
+  *indices_read = ReadDictionaryIndices(indices_to_read, indices);
+  int64_t total_indices = std::max(num_def_levels, *indices_read);
+  this->ConsumeBufferedValues(total_indices);
+
+  return total_indices;
+}
+
 template <typename DType>
 int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def_levels,
                                                 int16_t* rep_levels, T* values,
@@ -875,36 +978,9 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
 
   // TODO(wesm): keep reading data pages until batch_size is reached, or the
   // row group is finished
-  batch_size =
-      std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
-
   int64_t num_def_levels = 0;
-  int64_t num_rep_levels = 0;
-
   int64_t values_to_read = 0;
-
-  // If the field is required and non-repeated, there are no definition levels
-  if (this->max_def_level_ > 0 && def_levels) {
-    num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
-    // TODO(wesm): this tallying of values-to-decode can be performed with better
-    // cache-efficiency if fused with the level decoding.
-    for (int64_t i = 0; i < num_def_levels; ++i) {
-      if (def_levels[i] == this->max_def_level_) {
-        ++values_to_read;
-      }
-    }
-  } else {
-    // Required field, read all values
-    values_to_read = batch_size;
-  }
-
-  // Not present for non-repeated fields
-  if (this->max_rep_level_ > 0 && rep_levels) {
-    num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
-    if (def_levels && num_def_levels != num_rep_levels) {
-      throw ParquetException("Number of decoded rep / def levels did not match");
-    }
-  }
+  ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read);
 
   *values_read = this->ReadValues(values_to_read, values);
   int64_t total_values = std::max(num_def_levels, *values_read);
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index a73bba6cb4e..8c48e4d7843 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -128,6 +128,19 @@ class PARQUET_EXPORT ColumnReader {
   virtual Type::type type() const = 0;
 
   virtual const ColumnDescriptor* descr() const = 0;
+
+  // Get the encoding that can be exposed by this reader. If it returns
+  // dictionary encoding, then ReadBatchWithDictionary can be used to read data.
+  //
+  // \note API EXPERIMENTAL
+  virtual ExposedEncoding GetExposedEncoding() = 0;
+
+ protected:
+  friend class RowGroupReader;
+  // Set the encoding that can be exposed by this reader.
+  //
+  // \note API EXPERIMENTAL
+  virtual void SetExposedEncoding(ExposedEncoding encoding) = 0;
 };
 
 // API to read values from a single column. This is a main client facing API.
@@ -201,6 +214,36 @@ class TypedColumnReader : public ColumnReader {
   // Skip reading levels
   // Returns the number of levels skipped
   virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+
+  // Read a batch of repetition levels, definition levels, and indices from the
+  // column. And read the dictionary if a dictionary page is encountered during
+  // reading pages. This API is similar to ReadBatch(), with ability to read
+  // dictionary and indices. It is only valid to call this method  when the reader can
+  // expose dictionary encoding. (i.e., the reader's GetExposedEncoding() returns
+  // DICTIONARY).
+  //
+  // The dictionary is read along with the data page. When there's no data page,
+  // the dictionary won't be returned.
+  //
+  // @param batch_size The batch size to read
+  // @param[out] def_levels The Parquet definition levels.
+  // @param[out] rep_levels The Parquet repetition levels.
+  // @param[out] indices The dictionary indices.
+  // @param[out] indices_read The number of indices read.
+  // @param[out] dict The pointer to dictionary values. It will return nullptr if
+  // there's no data page. Each column chunk only has one dictionary page. The dictionary
+  // is owned by the reader, so the caller is responsible for copying the dictionary
+  // values before the reader gets destroyed.
+  // @param[out] dict_len The dictionary length. It will return 0 if there's no data
+  // page.
+  // @returns: actual number of levels read (see indices_read for number of
+  // indices read
+  //
+  // \note API EXPERIMENTAL
+  virtual int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                          int16_t* rep_levels, int32_t* indices,
+                                          int64_t* indices_read, const T** dict,
+                                          int32_t* dict_len) = 0;
 };
 
 namespace internal {
diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
index f0025f4c3a9..a50610bb8a2 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -27,6 +27,7 @@
 #include <vector>
 
 #include "arrow/testing/macros.h"
+#include "arrow/util/make_unique.h"
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
 #include "parquet/schema.h"
@@ -386,5 +387,90 @@ TEST_F(TestPrimitiveReader, TestDictionaryEncodedPages) {
   pages_.clear();
 }
 
+TEST_F(TestPrimitiveReader, TestDictionaryEncodedPagesWithExposeEncoding) {
+  max_def_level_ = 0;
+  max_rep_level_ = 0;
+  int levels_per_page = 100;
+  int num_pages = 5;
+  std::vector<int16_t> def_levels;
+  std::vector<int16_t> rep_levels;
+  std::vector<ByteArray> values;
+  std::vector<uint8_t> buffer;
+  NodePtr type = schema::ByteArray("a", Repetition::REQUIRED);
+  const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
+
+  // Fully dictionary encoded
+  MakePages<ByteArrayType>(&descr, num_pages, levels_per_page, def_levels, rep_levels,
+                           values, buffer, pages_, Encoding::RLE_DICTIONARY);
+  InitReader(&descr);
+
+  auto reader = static_cast<ByteArrayReader*>(reader_.get());
+  const ByteArray* dict = nullptr;
+  int32_t dict_len = 0;
+  int64_t total_indices = 0;
+  int64_t indices_read = 0;
+  int64_t value_size = values.size();
+  auto indices = ::arrow::internal::make_unique<int32_t[]>(value_size);
+  while (total_indices < value_size && reader->HasNext()) {
+    const ByteArray* tmp_dict = nullptr;
+    int32_t tmp_dict_len = 0;
+    EXPECT_NO_THROW(reader->ReadBatchWithDictionary(
+        value_size, /*def_levels=*/nullptr,
+        /*rep_levels=*/nullptr, indices.get() + total_indices, &indices_read, &tmp_dict,
+        &tmp_dict_len));
+    if (tmp_dict != nullptr) {
+      // Dictionary is read along with data
+      EXPECT_GT(indices_read, 0);
+      dict = tmp_dict;
+      dict_len = tmp_dict_len;
+    } else {
+      // Dictionary is not read when there's no data
+      EXPECT_EQ(indices_read, 0);
+    }
+    total_indices += indices_read;
+  }
+
+  EXPECT_EQ(total_indices, value_size);
+  for (int64_t i = 0; i < total_indices; ++i) {
+    EXPECT_LT(indices[i], dict_len);
+    EXPECT_EQ(dict[indices[i]].len, values[i].len);
+    EXPECT_EQ(memcmp(dict[indices[i]].ptr, values[i].ptr, values[i].len), 0);
+  }
+  pages_.clear();
+}
+
+TEST_F(TestPrimitiveReader, TestNonDictionaryEncodedPagesWithExposeEncoding) {
+  max_def_level_ = 0;
+  max_rep_level_ = 0;
+  int64_t value_size = 100;
+  std::vector<int32_t> values(value_size, 0);
+  NodePtr type = schema::Int32("a", Repetition::REQUIRED);
+  const ColumnDescriptor descr(type, max_def_level_, max_rep_level_);
+
+  // The data page falls back to plain encoding
+  std::shared_ptr<ResizableBuffer> dummy = AllocateBuffer();
+  std::shared_ptr<DictionaryPage> dict_page =
+      std::make_shared<DictionaryPage>(dummy, 0, Encoding::PLAIN);
+  std::shared_ptr<DataPageV1> data_page = MakeDataPage<Int32Type>(
+      &descr, values, static_cast<int>(value_size), Encoding::PLAIN, /*indices=*/{},
+      /*indices_size=*/0, /*def_levels=*/{}, /*max_def_level=*/0, /*rep_levels=*/{},
+      /*max_rep_level=*/0);
+  pages_.push_back(dict_page);
+  pages_.push_back(data_page);
+  InitReader(&descr);
+
+  auto reader = static_cast<ByteArrayReader*>(reader_.get());
+  const ByteArray* dict = nullptr;
+  int32_t dict_len = 0;
+  int64_t indices_read = 0;
+  auto indices = ::arrow::internal::make_unique<int32_t[]>(value_size);
+  // Dictionary cannot be exposed when it's not fully dictionary encoded
+  EXPECT_THROW(reader->ReadBatchWithDictionary(value_size, /*def_levels=*/nullptr,
+                                               /*rep_levels=*/nullptr, indices.get(),
+                                               &indices_read, &dict, &dict_len),
+               ParquetException);
+  pages_.clear();
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index eeeff1c8f9b..89b2b0e0413 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -1569,6 +1569,19 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
     return num_values;
   }
 
+  int DecodeIndices(int num_values, int32_t* indices) override {
+    if (num_values != idx_decoder_.GetBatch(indices, num_values)) {
+      ParquetException::EofException();
+    }
+    num_values_ -= num_values;
+    return num_values;
+  }
+
+  void GetDictionary(const T** dictionary, int32_t* dictionary_length) override {
+    *dictionary_length = dictionary_length_;
+    *dictionary = reinterpret_cast<T*>(dictionary_->mutable_data());
+  }
+
  protected:
   Status IndexInBounds(int32_t index) {
     if (ARROW_PREDICT_TRUE(0 <= index && index < dictionary_length_)) {
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index a3d8e012b6a..b9ca7a7ee68 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -350,6 +350,8 @@ class TypedDecoder : virtual public Decoder {
 template <typename DType>
 class DictDecoder : virtual public TypedDecoder<DType> {
  public:
+  using T = typename DType::c_type;
+
   virtual void SetDict(TypedDecoder<DType>* dictionary) = 0;
 
   /// \brief Insert dictionary values into the Arrow dictionary builder's memo,
@@ -371,6 +373,22 @@ class DictDecoder : virtual public TypedDecoder<DType> {
   /// \warning Remember to reset the builder each time the dict decoder is initialized
   /// with a new dictionary page
   virtual int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices (no nulls). Same as above
+  /// DecodeIndices but target is an array instead of a builder.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual int DecodeIndices(int num_values, int32_t* indices) = 0;
+
+  /// \brief Get dictionary. The reader will call this API when it encounters a
+  /// new dictionary.
+  ///
+  /// @param[out] dictionary The pointer to dictionary values. Dictionary is owned by
+  /// the decoder and is destroyed when the decoder is destroyed.
+  /// @param[out] dictionary_length The dictionary length.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual void GetDictionary(const T** dictionary, int32_t* dictionary_length) = 0;
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 9dbfca433ce..4e38901aa0d 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -77,6 +77,45 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
       const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
 }
 
+std::shared_ptr<ColumnReader> RowGroupReader::ColumnWithExposeEncoding(
+    int i, ExposedEncoding encoding_to_expose) {
+  std::shared_ptr<ColumnReader> reader = Column(i);
+
+  if (encoding_to_expose == ExposedEncoding::DICTIONARY) {
+    // Check the encoding_stats to see if all data pages are dictionary encoded.
+    std::unique_ptr<ColumnChunkMetaData> col = metadata()->ColumnChunk(i);
+    const std::vector<PageEncodingStats>& encoding_stats = col->encoding_stats();
+    if (encoding_stats.empty()) {
+      // Some parquet files may have empty encoding_stats. In this case we are
+      // not sure whether all data pages are dictionary encoded. So we do not
+      // enable exposing dictionary.
+      return reader;
+    }
+    // The 1st page should be the dictionary page.
+    if (encoding_stats[0].page_type != PageType::DICTIONARY_PAGE ||
+        (encoding_stats[0].encoding != Encoding::PLAIN &&
+         encoding_stats[0].encoding != Encoding::PLAIN_DICTIONARY)) {
+      return reader;
+    }
+    // The following pages should be dictionary encoded data pages.
+    for (size_t idx = 1; idx < encoding_stats.size(); ++idx) {
+      if ((encoding_stats[idx].encoding != Encoding::RLE_DICTIONARY &&
+           encoding_stats[idx].encoding != Encoding::PLAIN_DICTIONARY) ||
+          (encoding_stats[idx].page_type != PageType::DATA_PAGE &&
+           encoding_stats[idx].page_type != PageType::DATA_PAGE_V2)) {
+        return reader;
+      }
+    }
+  } else {
+    // Exposing other encodings are not supported for now.
+    return reader;
+  }
+
+  // Set exposed encoding.
+  reader->SetExposedEncoding(encoding_to_expose);
+  return reader;
+}
+
 std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
   if (i >= metadata()->num_columns()) {
     std::stringstream ss;
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index 4bc7ec2353a..0fc84054939 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -56,6 +56,20 @@ class PARQUET_EXPORT RowGroupReader {
   // column. Ownership is shared with the RowGroupReader.
   std::shared_ptr<ColumnReader> Column(int i);
 
+  // Construct a ColumnReader, trying to enable exposed encoding.
+  //
+  // For dictionary encoding, currently we only support column chunks that are fully
+  // dictionary encoded, i.e., all data pages in the column chunk are dictionary encoded.
+  // If a column chunk uses dictionary encoding but then falls back to plain encoding, the
+  // encoding will not be exposed.
+  //
+  // The returned column reader provides an API GetExposedEncoding() for the
+  // users to check the exposed encoding and determine how to read the batches.
+  //
+  // \note API EXPERIMENTAL
+  std::shared_ptr<ColumnReader> ColumnWithExposeEncoding(
+      int i, ExposedEncoding encoding_to_expose);
+
   std::unique_ptr<PageReader> GetColumnPageReader(int i);
 
  private:
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 9bbcda3cf1f..806ff2b9494 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/make_unique.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"
@@ -476,6 +477,83 @@ TEST(TestJSONWithLocalFile, JSONOutput) {
   ASSERT_EQ(json_output, ss.str());
 }
 
+TEST(TestFileReader, BufferedReadsWithDictionary) {
+  const int num_rows = 1000;
+
+  // Make schema
+  schema::NodeVector fields;
+  fields.push_back(PrimitiveNode::Make("field", Repetition::REQUIRED, Type::DOUBLE,
+                                       ConvertedType::NONE));
+  auto schema = std::static_pointer_cast<GroupNode>(
+      GroupNode::Make("schema", Repetition::REQUIRED, fields));
+
+  // Write small batches and small data pages
+  std::shared_ptr<WriterProperties> writer_props = WriterProperties::Builder()
+                                                       .write_batch_size(64)
+                                                       ->data_pagesize(128)
+                                                       ->enable_dictionary()
+                                                       ->build();
+
+  ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create());
+  std::shared_ptr<ParquetFileWriter> file_writer =
+      ParquetFileWriter::Open(out_file, schema, writer_props);
+
+  RowGroupWriter* rg_writer = file_writer->AppendRowGroup();
+
+  // write one column
+  ::arrow::random::RandomArrayGenerator rag(0);
+  DoubleWriter* writer = static_cast<DoubleWriter*>(rg_writer->NextColumn());
+  std::shared_ptr<::arrow::Array> col = rag.Float64(num_rows, 0, 100);
+  const auto& col_typed = static_cast<const ::arrow::DoubleArray&>(*col);
+  writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.raw_values());
+  rg_writer->Close();
+  file_writer->Close();
+
+  // Open the reader
+  ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish());
+  auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf);
+
+  ReaderProperties reader_props;
+  reader_props.enable_buffered_stream();
+  reader_props.set_buffer_size(64);
+  std::unique_ptr<ParquetFileReader> file_reader =
+      ParquetFileReader::Open(in_file, reader_props);
+
+  auto row_group = file_reader->RowGroup(0);
+  auto col_reader = std::static_pointer_cast<DoubleReader>(
+      row_group->ColumnWithExposeEncoding(0, ExposedEncoding::DICTIONARY));
+  EXPECT_EQ(col_reader->GetExposedEncoding(), ExposedEncoding::DICTIONARY);
+
+  auto indices = ::arrow::internal::make_unique<int32_t[]>(num_rows);
+  const double* dict = nullptr;
+  int32_t dict_len = 0;
+  for (int row_index = 0; row_index < num_rows; ++row_index) {
+    const double* tmp_dict = nullptr;
+    int32_t tmp_dict_len = 0;
+    int64_t values_read = 0;
+    int64_t levels_read = col_reader->ReadBatchWithDictionary(
+        /*batch_size=*/1, /*def_levels=*/nullptr, /*rep_levels=*/nullptr,
+        indices.get() + row_index, &values_read, &tmp_dict, &tmp_dict_len);
+
+    if (tmp_dict != nullptr) {
+      EXPECT_EQ(values_read, 1);
+      dict = tmp_dict;
+      dict_len = tmp_dict_len;
+    } else {
+      EXPECT_EQ(values_read, 0);
+    }
+
+    ASSERT_EQ(1, levels_read);
+    ASSERT_EQ(1, values_read);
+  }
+
+  // Check the results
+  for (int row_index = 0; row_index < num_rows; ++row_index) {
+    EXPECT_LT(indices[row_index], dict_len);
+    EXPECT_EQ(dict[indices[row_index]], col_typed.Value(row_index));
+  }
+}
+
 TEST(TestFileReader, BufferedReads) {
   // PARQUET-1636: Buffered reads were broken before introduction of
   // RandomAccessFile::GetStream
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 6bd67f1ee5f..c25719830ec 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -479,6 +479,15 @@ struct Encoding {
   };
 };
 
+// Exposed data encodings. It is the encoding of the data read from the file,
+// rather than the encoding of the data in the file. E.g., the data encoded as
+// RLE_DICTIONARY in the file can be read as dictionary indices by RLE
+// decoding, in which case the data read from the file is DICTIONARY encoded.
+enum class ExposedEncoding {
+  NO_ENCODING = 0,  // data is not encoded, i.e. already decoded during reading
+  DICTIONARY = 1
+};
+
 /// \brief Return true if Parquet supports indicated compression type
 PARQUET_EXPORT
 bool IsCodecSupported(Compression::type codec);

From caee207ef9be60b00307146260dac8626c79fc18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 17 Jun 2021 13:37:46 +0530
Subject: [PATCH 428/719] ARROW-12882: [C++][Gandiva] Fix behavior of the
 convert replace function on gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The convert_replace function on Gandiva, when defining an empty replacement char, should be able to replace the invalid chars with an empty string.

Closes #10406 from jpedroantunes/bugfix/convert-replace-empty-char and squashes the following commits:

0e1ec000f <João Pedro> Fix behavior of the convert replace function on gandiva

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/precompiled/string_ops.cc     | 22 ++++++++++++-------
 .../gandiva/precompiled/string_ops_test.cc    | 12 ++++++++--
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 738ec367cd7..1cd566de4a5 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1243,10 +1243,7 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
                                                     const char* char_to_replace,
                                                     int32_t char_to_replace_len,
                                                     int32_t* out_len) {
-  if (char_to_replace_len == 0) {
-    *out_len = text_len;
-    return text_in;
-  } else if (char_to_replace_len != 1) {
+  if (char_to_replace_len > 1) {
     gdv_fn_context_set_error_msg(context, "Replacement of multiple bytes not supported");
     *out_len = 0;
     return "";
@@ -1262,6 +1259,7 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
   }
   int32_t valid_bytes_to_cpy = 0;
   int32_t out_byte_counter = 0;
+  int32_t in_byte_counter = 0;
   int32_t char_len;
   // scan the base text from left to right and increment the start pointer till
   // looking for invalid chars to substitute
@@ -1273,9 +1271,15 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
       // define char_len = 1 to increase text_index by 1 (as ASCII char fits in 1 byte)
       char_len = 1;
       // first copy the valid bytes until now and then replace the invalid character
-      memcpy(ret + out_byte_counter, text_in + out_byte_counter, valid_bytes_to_cpy);
-      ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0];
-      out_byte_counter += valid_bytes_to_cpy + char_len;
+      memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
+      // if the replacement char is empty, the invalid char should be ignored
+      if (char_to_replace_len == 0) {
+        out_byte_counter += valid_bytes_to_cpy;
+      } else {
+        ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0];
+        out_byte_counter += valid_bytes_to_cpy + char_len;
+      }
+      in_byte_counter += valid_bytes_to_cpy + char_len;
       valid_bytes_to_cpy = 0;
       continue;
     }
@@ -1285,8 +1289,10 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
   if (out_byte_counter == 0) return text_in;
   // if there are still valid bytes to copy, do it
   if (valid_bytes_to_cpy != 0) {
-    memcpy(ret + out_byte_counter, text_in + out_byte_counter, valid_bytes_to_cpy);
+    memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
   }
+  // the out length will be the out bytes copied + the missing end bytes copied
+  *out_len = valid_bytes_to_cpy + out_byte_counter;
   return ret;
 }
 
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 8ffaace624a..2460633d268 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -175,14 +175,22 @@ TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
   EXPECT_TRUE(ctx.has_error());
   ctx.Reset();
 
-  // full valid utf8, but invalid replacement char length
+  // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
   std::string f("ok-\xa0\xa1-valid");
   auto f_in_out_len = static_cast<int>(f.length());
   const char* f_str = convert_replace_invalid_fromUTF8_binary(
       ctx_ptr, f.data(), f_in_out_len, "", 0, &f_in_out_len);
-  EXPECT_EQ(std::string(f_str, f_in_out_len), "ok-\xa0\xa1-valid");
+  EXPECT_EQ(std::string(f_str, f_in_out_len), "ok--valid");
   EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
 
+  // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
+  std::string g("\xa0\xa1-ok-\xa0\xa1-valid-\xa0\xa1");
+  auto g_in_out_len = static_cast<int>(g.length());
+  const char* g_str = convert_replace_invalid_fromUTF8_binary(
+      ctx_ptr, g.data(), g_in_out_len, "", 0, &g_in_out_len);
+  EXPECT_EQ(std::string(g_str, g_in_out_len), "-ok--valid-");
+  EXPECT_FALSE(ctx.has_error());
   ctx.Reset();
 }
 

From 618f5dee5f2a186cb8e24b6f702d1154685f5b71 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Thu, 17 Jun 2021 15:20:44 +0200
Subject: [PATCH 429/719] ARROW-13101: [Python][Doc] pyarrow.FixedSizeListArray
 does not appear in the documentation

Closes #10545 from westonpace/bugfix/ARROW-13101--python-doc-pyarrow-fixedsizelistarray-does-not

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/api/arrays.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index 81a00d8de3d..17b061dc7d8 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -68,6 +68,7 @@ may expose data type-specific methods or properties.
    Decimal128Array
    DictionaryArray
    ListArray
+   FixedSizeListArray
    LargeListArray
    StructArray
    UnionArray

From d3cdcb67d63f5a202afb6d97b173066351b81a84 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 17 Jun 2021 21:35:49 +0200
Subject: [PATCH 430/719] ARROW-13092: [C++] Return an error in CreateDir if
 target is a file

Closes #10541 from pitrou/ARROW-13092-create-dir-is-a-file

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/test_util.cc |  3 +++
 cpp/src/arrow/util/io_util.cc         | 18 +++++++++++--
 cpp/src/arrow/util/io_util_test.cc    | 37 ++++++++++++++++++---------
 3 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index bbff33f4d32..be9d99d72b8 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -208,6 +208,9 @@ void GenericFileSystemTest::TestCreateDir(FileSystem* fs) {
   ASSERT_RAISES(IOError, fs->CreateDir("AB/def/EF/GH", true /* recursive */));
   ASSERT_RAISES(IOError, fs->CreateDir("AB/def/EF", false /* recursive */));
 
+  // Cannot create a directory when there is already a file with the same name
+  ASSERT_RAISES(IOError, fs->CreateDir("AB/def"));
+
   AssertAllDirs(fs, {"AB", "AB/CD", "AB/CD/EF", "AB/GH", "AB/GH/IJ", "XY"});
   AssertAllFiles(fs, {"AB/def"});
 }
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 5e6da2fb9d6..552417e5a13 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -472,11 +472,18 @@ namespace {
 
 Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
 #ifdef _WIN32
-  if (CreateDirectoryW(dir_path.ToNative().c_str(), nullptr)) {
+  const auto s = dir_path.ToNative().c_str();
+  if (CreateDirectoryW(s, nullptr)) {
     return true;
   }
   int errnum = GetLastError();
   if (errnum == ERROR_ALREADY_EXISTS) {
+    const auto attrs = GetFileAttributesW(s);
+    if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
+      // Note we propagate the original error, not the GetFileAttributesW() error
+      return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '",
+                                 dir_path.ToString(), "': non-directory entry exists");
+    }
     return false;
   }
   if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
@@ -489,10 +496,17 @@ Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents)
   return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
                              dir_path.ToString(), "'");
 #else
-  if (mkdir(dir_path.ToNative().c_str(), S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
+  const auto s = dir_path.ToNative().c_str();
+  if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
     return true;
   }
   if (errno == EEXIST) {
+    struct stat st;
+    if (stat(s, &st) || !S_ISDIR(st.st_mode)) {
+      // Note we propagate the original errno, not the stat() errno
+      return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(),
+                              "': non-directory entry exists");
+    }
     return false;
   }
   if (create_parents && errno == ENOENT) {
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index a423ecd0152..c09e4b974dd 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -29,6 +29,7 @@
 #include <pthread.h>
 #endif
 
+#include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_util.h"
@@ -53,6 +54,12 @@ void AssertNotExists(const PlatformFilename& path) {
   ASSERT_FALSE(exists) << "Path '" << path.ToString() << "' exists";
 }
 
+void TouchFile(const PlatformFilename& path) {
+  int fd = -1;
+  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(path));
+  ASSERT_OK(FileClose(fd));
+}
+
 TEST(ErrnoFromStatus, Basics) {
   Status st;
   st = Status::OK();
@@ -370,7 +377,7 @@ TEST(CreateDirDeleteDir, Basics) {
   const std::string BASE =
       temp_dir->path().Join("xxx-io-util-test-dir2").ValueOrDie().ToString();
   bool created, deleted;
-  PlatformFilename parent, child;
+  PlatformFilename parent, child, child_file;
 
   ASSERT_OK_AND_ASSIGN(parent, PlatformFilename::FromString(BASE));
   ASSERT_EQ(parent.ToString(), BASE);
@@ -392,6 +399,11 @@ TEST(CreateDirDeleteDir, Basics) {
   ASSERT_TRUE(created);
   AssertExists(child);
 
+  ASSERT_OK_AND_ASSIGN(child_file, PlatformFilename::FromString(BASE + "/some-file"));
+  TouchFile(child_file);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("non-directory entry exists"), CreateDir(child_file));
+
   ASSERT_OK_AND_ASSIGN(deleted, DeleteDirTree(parent));
   ASSERT_TRUE(deleted);
   AssertNotExists(parent);
@@ -436,9 +448,7 @@ TEST(DeleteDirContents, Basics) {
   ASSERT_OK_AND_ASSIGN(child2, PlatformFilename::FromString(BASE + "/child-file"));
   ASSERT_OK_AND_ASSIGN(created, CreateDir(child1));
   ASSERT_TRUE(created);
-  int fd = -1;
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(child2));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(child2);
   AssertExists(child1);
   AssertExists(child2);
 
@@ -522,6 +532,14 @@ TEST(CreateDirTree, Basics) {
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("EF"));
   ASSERT_OK_AND_ASSIGN(created, CreateDirTree(fn));
   ASSERT_TRUE(created);
+
+  ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/file"));
+  TouchFile(fn);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IOError, ::testing::HasSubstr("non-directory entry exists"), CreateDirTree(fn));
+
+  ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/file/sub"));
+  ASSERT_RAISES(IOError, CreateDirTree(fn));
 }
 
 TEST(ListDir, Basics) {
@@ -546,9 +564,7 @@ TEST(ListDir, Basics) {
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/EF/GH"));
   ASSERT_OK(CreateDirTree(fn));
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB/ghi.txt"));
-  int fd = -1;
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(fn));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(fn);
 
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("AB"));
   ASSERT_OK_AND_ASSIGN(entries, ListDir(fn));
@@ -568,15 +584,13 @@ TEST(ListDir, Basics) {
 TEST(DeleteFile, Basics) {
   std::unique_ptr<TemporaryDir> temp_dir;
   PlatformFilename fn;
-  int fd;
   bool deleted;
 
   ASSERT_OK_AND_ASSIGN(temp_dir, TemporaryDir::Make("io-util-test-"));
   ASSERT_OK_AND_ASSIGN(fn, temp_dir->path().Join("test-file"));
 
   AssertNotExists(fn);
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(fn));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(fn);
   AssertExists(fn);
   ASSERT_OK_AND_ASSIGN(deleted, DeleteFile(fn));
   ASSERT_TRUE(deleted);
@@ -638,8 +652,7 @@ TEST(FileUtils, LongPaths) {
   AssertExists(long_path);
   ASSERT_OK_AND_ASSIGN(long_filename,
                        PlatformFilename::FromString(fs.str() + "/file.txt"));
-  ASSERT_OK_AND_ASSIGN(fd, FileOpenWritable(long_filename));
-  ASSERT_OK(FileClose(fd));
+  TouchFile(long_filename);
   AssertExists(long_filename);
   fd = -1;
   ASSERT_OK_AND_ASSIGN(fd, FileOpenReadable(long_filename));

From f1e3c2e557bb3622b203fb270bf0329f53f1e617 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 17 Jun 2021 19:02:35 -0500
Subject: [PATCH 431/719] ARROW-12940: [R] Expose C interface as R6 methods

Add methods for exporting/importing the data types that are supported in the C-interface

Closes #10536 from jonkeane/ARROW-12940-C-interface-as-methods

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/DESCRIPTION                       |  2 +-
 r/R/array.R                         | 15 ++++----
 r/R/field.R                         |  5 ++-
 r/R/python.R                        | 24 ++++++-------
 r/R/record-batch-reader.R           |  7 ++--
 r/R/record-batch.R                  |  9 +++--
 r/R/schema.R                        |  5 ++-
 r/R/type.R                          |  6 +++-
 r/man/RecordBatchReader.Rd          |  2 +-
 r/man/array.Rd                      |  2 +-
 r/tests/testthat/test-Array.R       | 20 +++++++++++
 r/tests/testthat/test-RecordBatch.R | 56 ++++++++++++++++++++++++++---
 r/tests/testthat/test-Table.R       |  6 ++--
 r/tests/testthat/test-data-type.R   | 13 +++++++
 r/tests/testthat/test-field.R       | 13 +++++++
 r/tests/testthat/test-python.R      |  1 -
 r/tests/testthat/test-schema.R      | 13 +++++++
 17 files changed, 162 insertions(+), 37 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index a78acdd4a8f..a6536015530 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -54,6 +54,7 @@ Suggests:
     withr
 LinkingTo: cpp11 (>= 0.2.0)
 Collate:
+    'arrowExports.R'
     'enums.R'
     'arrow-package.R'
     'type.R'
@@ -61,7 +62,6 @@ Collate:
     'arrow-datum.R'
     'array.R'
     'arrow-tabular.R'
-    'arrowExports.R'
     'buffer.R'
     'chunked-array.R'
     'io.R'
diff --git a/r/R/array.R b/r/R/array.R
index 0a117e5e74f..93d148ec29b 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -88,23 +88,23 @@
 #' my_array <- Array$create(1:10)
 #' my_array$type
 #' my_array$cast(int8())
-#' 
+#'
 #' # Check if value is null; zero-indexed
 #' na_array <- Array$create(c(1:5, NA))
 #' na_array$IsNull(0)
 #' na_array$IsNull(5)
 #' na_array$IsValid(5)
 #' na_array$null_count
-#' 
+#'
 #' # zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
 #' new_array <- na_array$Slice(5)
 #' new_array$offset
-#' 
+#'
 #' # Compare 2 arrays
 #' na_array2 = na_array
 #' na_array2 == na_array # element-wise comparison
-#' na_array2$Equals(na_array) # overall comparison 
-#' 
+#' na_array2$Equals(na_array) # overall comparison
+#'
 #' @export
 Array <- R6Class("Array",
   inherit = ArrowDatum,
@@ -167,7 +167,8 @@ Array <- R6Class("Array",
     View = function(type) {
       Array$create(Array__View(self, as_type(type)))
     },
-    Validate = function() Array__Validate(self)
+    Validate = function() Array__Validate(self),
+    export_to_c = function(array_ptr, schema_ptr) ExportArray(self, array_ptr, schema_ptr)
   ),
   active = list(
     null_count = function() Array__null_count(self),
@@ -188,6 +189,8 @@ Array$create <- function(x, type = NULL) {
   }
   vec_to_arrow(x, type)
 }
+#' @include arrowExports.R
+Array$import_from_c <- ImportArray
 
 #' @rdname array
 #' @usage NULL
diff --git a/r/R/field.R b/r/R/field.R
index e4fba2af0b8..60d8ffde22b 100644
--- a/r/R/field.R
+++ b/r/R/field.R
@@ -38,7 +38,8 @@ Field <- R6Class("Field", inherit = ArrowObject,
     },
     Equals = function(other, ...) {
       inherits(other, "Field") && Field__Equals(self, other)
-    }
+    },
+    export_to_c = function(ptr) ExportField(self, ptr)
   ),
 
   active = list(
@@ -59,6 +60,8 @@ Field$create <- function(name, type, metadata) {
   assert_that(missing(metadata), msg = "metadata= is currently ignored")
   Field__initialize(enc2utf8(name), type, TRUE)
 }
+#' @include arrowExports.R
+Field$import_from_c <- ImportField
 
 #' @param name field name
 #' @param type logical type, instance of [DataType]
diff --git a/r/R/python.R b/r/R/python.R
index 52e4bcd7ac8..9d1ecf6347a 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -24,7 +24,7 @@ py_to_r.pyarrow.lib.Array <- function(x, ...) {
   })
 
   x$`_export_to_c`(array_ptr, schema_ptr)
-  ImportArray(array_ptr, schema_ptr)
+  Array$import_from_c(array_ptr, schema_ptr)
 }
 
 r_to_py.Array <- function(x, convert = FALSE) {
@@ -37,7 +37,7 @@ r_to_py.Array <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportArray(x, array_ptr, schema_ptr)
+  x$export_to_c(array_ptr, schema_ptr)
   out <- pa$Array$`_import_from_c`(array_ptr, schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -54,7 +54,7 @@ py_to_r.pyarrow.lib.RecordBatch <- function(x, ...) {
 
   x$`_export_to_c`(array_ptr, schema_ptr)
 
-  ImportRecordBatch(array_ptr, schema_ptr)
+  RecordBatch$import_from_c(array_ptr, schema_ptr)
 }
 
 r_to_py.RecordBatch <- function(x, convert = FALSE) {
@@ -67,7 +67,7 @@ r_to_py.RecordBatch <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportRecordBatch(x, array_ptr, schema_ptr)
+  x$export_to_c(array_ptr, schema_ptr)
   out <- pa$RecordBatch$`_import_from_c`(array_ptr, schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -108,7 +108,7 @@ py_to_r.pyarrow.lib.Schema <- function(x, ...) {
   on.exit(delete_arrow_schema(schema_ptr))
 
   x$`_export_to_c`(schema_ptr)
-  ImportSchema(schema_ptr)
+  Schema$import_from_c(schema_ptr)
 }
 
 r_to_py.Schema <- function(x, convert = FALSE) {
@@ -117,7 +117,7 @@ r_to_py.Schema <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportSchema(x, schema_ptr)
+  x$export_to_c(schema_ptr)
   out <- pa$Schema$`_import_from_c`(schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -129,7 +129,7 @@ py_to_r.pyarrow.lib.Field <- function(x, ...) {
   on.exit(delete_arrow_schema(schema_ptr))
 
   x$`_export_to_c`(schema_ptr)
-  ImportField(schema_ptr)
+  Field$import_from_c(schema_ptr)
 }
 
 r_to_py.Field <- function(x, convert = FALSE) {
@@ -138,7 +138,7 @@ r_to_py.Field <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportField(x, schema_ptr)
+  x$export_to_c(schema_ptr)
   out <- pa$Field$`_import_from_c`(schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -150,7 +150,7 @@ py_to_r.pyarrow.lib.DataType <- function(x, ...) {
   on.exit(delete_arrow_schema(schema_ptr))
 
   x$`_export_to_c`(schema_ptr)
-  ImportType(schema_ptr)
+  DataType$import_from_c(schema_ptr)
 }
 
 r_to_py.DataType <- function(x, convert = FALSE) {
@@ -159,7 +159,7 @@ r_to_py.DataType <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportType(x, schema_ptr)
+  x$export_to_c(schema_ptr)
   out <- pa$DataType$`_import_from_c`(schema_ptr)
   # But set the convert attribute on the return object to the requested value
   assign("convert", convert, out)
@@ -171,7 +171,7 @@ py_to_r.pyarrow.lib.RecordBatchReader <- function(x, ...) {
   on.exit(delete_arrow_array_stream(stream_ptr))
 
   x$`_export_to_c`(stream_ptr)
-  ImportRecordBatchReader(stream_ptr)
+  RecordBatchFileReader$import_from_c(stream_ptr)
 }
 
 r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
@@ -180,7 +180,7 @@ r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
 
   # Import with convert = FALSE so that `_import_from_c` returns a Python object
   pa <- reticulate::import("pyarrow", convert = FALSE)
-  ExportRecordBatchReader(x, stream_ptr)
+  x$export_to_c(stream_ptr)
   # TODO: handle subclasses of RecordBatchReader?
   out <- pa$lib$RecordBatchReader$`_import_from_c`(stream_ptr)
   # But set the convert attribute on the return object to the requested value
diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R
index e00d24d8c6b..9fffea7da37 100644
--- a/r/R/record-batch-reader.R
+++ b/r/R/record-batch-reader.R
@@ -21,7 +21,7 @@
 #' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather.
 #' `RecordBatchStreamReader` and `RecordBatchFileReader` are
-#' interfaces for accessing record batches from input sources those formats,
+#' interfaces for accessing record batches from input sources in those formats,
 #' respectively.
 #'
 #' For guidance on how to use these classes, see the examples section.
@@ -94,7 +94,8 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
   public = list(
     read_next_batch = function() RecordBatchReader__ReadNext(self),
     batches = function() RecordBatchReader__batches(self),
-    read_table = function() Table__from_RecordBatchReader(self)
+    read_table = function() Table__from_RecordBatchReader(self),
+    export_to_c = function(stream_ptr) ExportRecordBatchReader(self, stream_ptr)
   ),
   active = list(
     schema = function() RecordBatchReader__schema(self)
@@ -115,6 +116,8 @@ RecordBatchStreamReader$create <- function(stream) {
   assert_is(stream, "InputStream")
   ipc___RecordBatchStreamReader__Open(stream)
 }
+#' @include arrowExports.R
+RecordBatchReader$import_from_c <- RecordBatchStreamReader$import_from_c <- ImportRecordBatchReader
 
 #' @rdname RecordBatchReader
 #' @usage NULL
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index 0ba6b4bd45d..c42834762ef 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -118,6 +118,9 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowTabular,
     invalidate = function() {
       .Call(`_arrow_RecordBatch__Reset`, self)
       super$invalidate()
+    },
+    export_to_c = function(array_ptr, schema_ptr) {
+      ExportRecordBatch(self, array_ptr, schema_ptr)
     }
   ),
 
@@ -148,7 +151,7 @@ RecordBatch$create <- function(..., schema = NULL) {
   if (length(arrays) == 1 && inherits(arrays[[1]], c("raw", "Buffer", "InputStream", "Message"))) {
     return(RecordBatch$from_message(arrays[[1]], schema))
   }
-  
+
   # Else, a list of arrays or data.frames
   # making sure there are always names
   if (is.null(names(arrays))) {
@@ -161,7 +164,7 @@ RecordBatch$create <- function(..., schema = NULL) {
     out <- RecordBatch__from_arrays(schema, arrays)
     return(dplyr::group_by(out, !!!dplyr::groups(arrays[[1]])))
   }
-  
+
   # If any arrays are length 1, recycle them
   arrays <- recycle_scalars(arrays)
 
@@ -182,6 +185,8 @@ RecordBatch$from_message <- function(obj, schema) {
     ipc___ReadRecordBatch__Message__Schema(obj, schema)
   }
 }
+#' @include arrowExports.R
+RecordBatch$import_from_c <- ImportRecordBatch
 
 #' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
 #' mixture of data.frames and vectors, the inputs will be autospliced together
diff --git a/r/R/schema.R b/r/R/schema.R
index d0491fdf6e3..32cb1522614 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -112,7 +112,8 @@ Schema <- R6Class("Schema",
     },
     Equals = function(other, check_metadata = FALSE, ...) {
       inherits(other, "Schema") && Schema__Equals(self, other, isTRUE(check_metadata))
-    }
+    },
+    export_to_c = function(ptr) ExportSchema(self, ptr)
   ),
   active = list(
     names = function() {
@@ -136,6 +137,8 @@ Schema <- R6Class("Schema",
   )
 )
 Schema$create <- function(...) schema_(.fields(list2(...)))
+#' @include arrowExports.R
+Schema$import_from_c <- ImportSchema
 
 prepare_key_value_metadata <- function(metadata) {
   # key-value-metadata must be a named character vector;
diff --git a/r/R/type.R b/r/R/type.R
index 0b9e1dbd03c..c96f43bbb46 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -39,7 +39,8 @@ DataType <- R6Class("DataType",
     },
     fields = function() {
       DataType__fields(self)
-    }
+    },
+    export_to_c = function(ptr) ExportType(self, ptr)
   ),
 
   active = list(
@@ -49,6 +50,9 @@ DataType <- R6Class("DataType",
   )
 )
 
+#' @include arrowExports.R
+DataType$import_from_c <- ImportType
+
 INTEGER_TYPES <- as.character(outer(c("uint", "int"), c(8, 16, 32, 64), paste0))
 FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double")
 
diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd
index d2e1a6919e6..90c796a6693 100644
--- a/r/man/RecordBatchReader.Rd
+++ b/r/man/RecordBatchReader.Rd
@@ -10,7 +10,7 @@
 Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
 a "stream" format and a "file" format, known as Feather.
 \code{RecordBatchStreamReader} and \code{RecordBatchFileReader} are
-interfaces for accessing record batches from input sources those formats,
+interfaces for accessing record batches from input sources in those formats,
 respectively.
 
 For guidance on how to use these classes, see the examples section.
diff --git a/r/man/array.Rd b/r/man/array.Rd
index 0c1aed407ac..71957aff90c 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -102,6 +102,6 @@ new_array$offset
 # Compare 2 arrays
 na_array2 = na_array
 na_array2 == na_array # element-wise comparison
-na_array2$Equals(na_array) # overall comparison 
+na_array2$Equals(na_array) # overall comparison
 \dontshow{\}) # examplesIf}
 }
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 26d0a3005e4..a9f20c89574 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -819,3 +819,23 @@ test_that("auto int64 conversion to int can be disabled (ARROW-10093)", {
     expect_true(inherits(as.data.frame(batch)$x, "integer64"))
   })
 })
+
+
+test_that("Array to C-interface", {
+  # create a struct array since that's one of the more complicated array types
+  df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
+  arr <- Array$create(df)
+
+  # export the array via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  on.exit({
+    delete_arrow_schema(schema_ptr)
+    delete_arrow_array(array_ptr)
+  })
+  arr$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Array$import_from_c(array_ptr, schema_ptr)
+  expect_equal(arr, circle)
+})
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 6617805db54..58afe4ef87e 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -553,15 +553,61 @@ test_that("ARROW-11769 - grouping preserved in record batch creation", {
 })
 
 test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
-  
+
   tbl <- tibble::tibble(
     int = 1:10,
     fct = factor(rep(c("A", "B"), 5)),
     fct2 = factor(rep(c("C", "D"), each = 5)),
   )
-  
+
   rb <- record_batch(!!!tbl)
-  
+
   expect_identical(length(rb), 3L)
-  
-})
\ No newline at end of file
+
+})
+
+test_that("RecordBatchReader to C-interface", {
+  tab <- Table$create(example_data)
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+  tab_from_c_new <- circle$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # export the RecordBatchStreamReader via the C-interface
+  stream_ptr_new <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr_new))
+  bytes <- write_to_raw(example_data)
+  expect_type(bytes, "raw")
+  reader_new <- RecordBatchStreamReader$create(bytes)
+  reader_new$export_to_c(stream_ptr_new)
+
+  # then import it and check that the roundtripped value is the same
+  circle_new <- RecordBatchStreamReader$import_from_c(stream_ptr_new)
+  tab_from_c_new <- circle_new$read_table()
+  expect_equal(tab, tab_from_c_new)
+})
+
+test_that("RecordBatch to C-interface", {
+  batch <- RecordBatch$create(example_data)
+
+  # export the RecordBatch via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  on.exit({
+    delete_arrow_schema(schema_ptr)
+    delete_arrow_array(array_ptr)
+  })
+  batch$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
+  expect_equal(batch, circle)
+})
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 6dd36b248ec..9a40e40edf4 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -543,9 +543,9 @@ test_that("ARROW-12729 - length returns number of columns in Table", {
     fct = factor(rep(c("A", "B"), 5)),
     fct2 = factor(rep(c("C", "D"), each = 5)),
   )
-  
+
   tab <- Table$create(!!!tbl)
-  
+
   expect_identical(length(tab), 3L)
-  
+
 })
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index 5c0a31191a1..412abef98e9 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -411,3 +411,16 @@ test_that("FixedSizeBinary", {
   expect_error(fixed_size_binary("four"))
   expect_error(fixed_size_binary(c(2, 4)))
 })
+
+test_that("DataType to C-interface", {
+  datatype <- timestamp("ms", timezone = "Asia/Pyongyang")
+
+  # export the datatype via the C-interface
+  ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(ptr))
+  datatype$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- DataType$import_from_c(ptr)
+  expect_equal(circle, datatype)
+})
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index f72cb379a5e..aacb5012e70 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -36,3 +36,16 @@ test_that("Print method for field", {
     "Field\nzz: dictionary<values=string, indices=int32>"
   )
 })
+
+test_that("Field to C-interface", {
+  field <- field("x", time32("s"))
+
+  # export the field via the C-interface
+  ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(ptr))
+  field$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Field$import_from_c(ptr)
+  expect_equal(circle, field)
+})
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index c3a9e269ad6..c7bedc518ef 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -104,7 +104,6 @@ test_that("DataType roundtrip", {
 })
 
 test_that("Field roundtrip", {
-  skip("TODO in pyarrow: 'pyarrow.lib.Field' has no attribute '_import_from_c'")
   r <- field("x", time32("s"))
   py <- reticulate::r_to_py(r)
   expect_s3_class(py, "pyarrow.lib.Field")
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 87dad175e2b..9509c888578 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -174,3 +174,16 @@ test_that("unify_schemas", {
     schema(b = double(), c = bool(), k = utf8())
   )
 })
+
+test_that("Schema to C-interface", {
+  schema <- schema(b = double(), c = bool())
+
+  # export the schema via the C-interface
+  ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(ptr))
+  schema$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Schema$import_from_c(ptr)
+  expect_equal(circle, schema)
+})

From 4743e181596b9ee45c6b063bcf59fdf9eb72418f Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Fri, 18 Jun 2021 14:20:47 -0400
Subject: [PATCH 432/719] ARROW-12074: [C++][Compute] Add scalar arithmetic
 kernels for decimal

Add basic binary arithmetic (+,-,*,/) kernels for decimal types.

Closes #10364 from cyb70289/decimal-arith

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/kernel.h                |   3 +
 .../arrow/compute/kernels/codegen_internal.h  |  46 +++
 .../compute/kernels/scalar_arithmetic.cc      | 240 +++++++++++-
 .../compute/kernels/scalar_arithmetic_test.cc | 363 ++++++++++++++++++
 cpp/src/arrow/type.cc                         |  11 +
 cpp/src/arrow/type.h                          |   4 +
 cpp/src/arrow/type_traits.h                   |  11 +
 docs/source/cpp/compute.rst                   |  37 +-
 8 files changed, 700 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 0d5fa147727..f8d15952e73 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -321,6 +321,9 @@ class ARROW_EXPORT OutputType {
     this->resolver_ = other.resolver_;
   }
 
+  OutputType& operator=(const OutputType&) = default;
+  OutputType& operator=(OutputType&&) = default;
+
   /// \brief Return the shape and type of the expected output value of the
   /// kernel given the value descriptors (shapes and types) of the input
   /// arguments. The resolver may make use of state information kept in the
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 891f90a97d4..6a5cee124c0 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -149,6 +149,8 @@ struct GetViewType<Decimal128Type> {
   static T LogicalValue(PhysicalType value) {
     return Decimal128(reinterpret_cast<const uint8_t*>(value.data()));
   }
+
+  static T LogicalValue(T value) { return value; }
 };
 
 template <>
@@ -159,6 +161,8 @@ struct GetViewType<Decimal256Type> {
   static T LogicalValue(PhysicalType value) {
     return Decimal256(reinterpret_cast<const uint8_t*>(value.data()));
   }
+
+  static T LogicalValue(T value) { return value; }
 };
 
 template <typename Type, typename Enable = void>
@@ -243,6 +247,18 @@ struct ArrayIterator<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+struct ArrayIterator<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
+  const endian_agnostic* values;
+
+  explicit ArrayIterator(const ArrayData& data)
+      : values(data.GetValues<endian_agnostic>(1)) {}
+
+  T operator()() { return T{values++->data()}; }
+};
+
 // Iterator over various output array types, taking a GetOutputType<Type>
 
 template <typename Type, typename Enable = void>
@@ -262,6 +278,20 @@ struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
   void WriteNull() { *values++ = T{}; }
 };
 
+template <typename Type>
+struct OutputArrayWriter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
+  endian_agnostic* values;
+
+  explicit OutputArrayWriter(ArrayData* data)
+      : values(data->GetMutableValues<endian_agnostic>(1)) {}
+
+  void Write(T value) { value.ToBytes(values++->data()); }
+
+  void WriteNull() { T{}.ToBytes(values++->data()); }
+};
+
 // (Un)box Scalar to / from C++ value
 
 template <typename Type, typename Enable = void>
@@ -538,6 +568,22 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+struct OutputAdapter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
+
+  template <typename Generator>
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    auto out_data = out_arr->GetMutableValues<endian_agnostic>(1);
+    for (int64_t i = 0; i < out_arr->length; ++i) {
+      generator().ToBytes(out_data++->data());
+    }
+    return Status::OK();
+  }
+};
+
 // A kernel exec generator for unary functions that addresses both array and
 // scalar inputs and dispatches input iteration and output writing to other
 // templates
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 743d2e3fc0e..f51484e53ff 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -15,11 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <algorithm>
 #include <cmath>
 #include <limits>
+#include <utility>
 
 #include "arrow/compute/kernels/common.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/macros.h"
 
@@ -62,6 +65,11 @@ using enable_if_integer =
 template <typename T>
 using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
 
+template <typename T>
+using enable_if_decimal =
+    enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, T>::value,
+                T>;
+
 template <typename T, typename Unsigned = typename std::make_unsigned<T>::type>
 constexpr Unsigned to_unsigned(T signed_) {
   return static_cast<Unsigned>(signed_);
@@ -126,11 +134,16 @@ struct Add {
                                                     Status*) {
     return arrow::internal::SafeSignedAdd(left, right);
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
 };
 
 struct AddChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(AddWithOverflow(left, right, &result))) {
@@ -140,10 +153,16 @@ struct AddChecked {
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left + right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + right;
+  }
 };
 
 struct Subtract {
@@ -164,11 +183,16 @@ struct Subtract {
                                                     Status*) {
     return arrow::internal::SafeSignedSubtract(left, right);
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + (-right);
+  }
 };
 
 struct SubtractChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(SubtractWithOverflow(left, right, &result))) {
@@ -178,10 +202,16 @@ struct SubtractChecked {
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left + (-right);
+  }
 };
 
 struct Multiply {
@@ -224,11 +254,16 @@ struct Multiply {
   static constexpr uint16_t Call(KernelContext*, uint16_t left, uint16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left * right;
+  }
 };
 
 struct MultiplyChecked {
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+  static enable_if_integer<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     T result = 0;
     if (ARROW_PREDICT_FALSE(MultiplyWithOverflow(left, right, &result))) {
@@ -238,10 +273,16 @@ struct MultiplyChecked {
   }
 
   template <typename T, typename Arg0, typename Arg1>
-  enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+  static enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
+                                          Status*) {
     static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left * right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status*) {
+    return left * right;
+  }
 };
 
 struct Divide {
@@ -263,6 +304,16 @@ struct Divide {
     }
     return result;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext*, Arg0 left, Arg1 right, Status* st) {
+    if (right == Arg1()) {
+      *st = Status::Invalid("Divide by zero");
+      return T();
+    } else {
+      return left / right;
+    }
+  }
 };
 
 struct DivideChecked {
@@ -290,6 +341,12 @@ struct DivideChecked {
     }
     return left / right;
   }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_decimal<T> Call(KernelContext* ctx, Arg0 left, Arg1 right,
+                                   Status* st) {
+    return Divide::Call<T>(ctx, left, right, st);
+  }
 };
 
 struct Negate {
@@ -304,7 +361,7 @@ struct Negate {
   }
 
   template <typename T, typename Arg>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status* st) {
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) {
     return arrow::internal::SafeSignedNegate(arg);
   }
 };
@@ -428,12 +485,157 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+Status CastBinaryDecimalArgs(const std::string& func_name,
+                             std::vector<ValueDescr>* values) {
+  auto& left_type = (*values)[0].type;
+  auto& right_type = (*values)[1].type;
+  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+
+  // decimal + float = float
+  if (is_floating(left_type->id())) {
+    right_type = left_type;
+    return Status::OK();
+  } else if (is_floating(right_type->id())) {
+    left_type = right_type;
+    return Status::OK();
+  }
+
+  // precision, scale of left and right args
+  int32_t p1, s1, p2, s2;
+
+  // decimal + integer = decimal
+  if (is_decimal(left_type->id())) {
+    auto decimal = checked_cast<const DecimalType*>(left_type.get());
+    p1 = decimal->precision();
+    s1 = decimal->scale();
+  } else {
+    DCHECK(is_integer(left_type->id()));
+    p1 = static_cast<int32_t>(std::ceil(std::log10(bit_width(left_type->id()))));
+    s1 = 0;
+  }
+  if (is_decimal(right_type->id())) {
+    auto decimal = checked_cast<const DecimalType*>(right_type.get());
+    p2 = decimal->precision();
+    s2 = decimal->scale();
+  } else {
+    DCHECK(is_integer(right_type->id()));
+    p2 = static_cast<int32_t>(std::ceil(std::log10(bit_width(right_type->id()))));
+    s2 = 0;
+  }
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  // decimal128 + decimal256 = decimal256
+  Type::type casted_type_id = Type::DECIMAL128;
+  if (left_type->id() == Type::DECIMAL256 || right_type->id() == Type::DECIMAL256) {
+    casted_type_id = Type::DECIMAL256;
+  }
+
+  // decimal promotion rules compatible with amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  int32_t left_scaleup, right_scaleup;
+
+  // "add_checked" -> "add"
+  const std::string op = func_name.substr(0, func_name.find("_"));
+  if (op == "add" || op == "subtract") {
+    left_scaleup = std::max(s1, s2) - s1;
+    right_scaleup = std::max(s1, s2) - s2;
+  } else if (op == "multiply") {
+    left_scaleup = right_scaleup = 0;
+  } else if (op == "divide") {
+    left_scaleup = std::max(4, s1 + p2 - s2 + 1) + s2 - s1;
+    right_scaleup = 0;
+  } else {
+    return Status::Invalid("Invalid decimal function: ", func_name);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(
+      left_type, DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
+  ARROW_ASSIGN_OR_RAISE(right_type, DecimalType::Make(casted_type_id, p2 + right_scaleup,
+                                                      s2 + right_scaleup));
+  return Status::OK();
+}
+
+// resolve decimal binary operation output type per *casted* args
+template <typename OutputGetter>
+Result<ValueDescr> ResolveDecimalBinaryOperationOutput(
+    const std::vector<ValueDescr>& args, OutputGetter&& getter) {
+  // casted args should be same size decimals
+  auto left_type = checked_cast<const DecimalType*>(args[0].type.get());
+  auto right_type = checked_cast<const DecimalType*>(args[1].type.get());
+  DCHECK_EQ(left_type->id(), right_type->id());
+
+  int32_t precision, scale;
+  std::tie(precision, scale) = getter(left_type->precision(), left_type->scale(),
+                                      right_type->precision(), right_type->scale());
+  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type->id(), precision, scale));
+  return ValueDescr(std::move(type), GetBroadcastShape(args));
+}
+
+Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
+    KernelContext*, const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        DCHECK_EQ(s1, s2);
+        const int32_t scale = s1;
+        const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+Result<ValueDescr> ResolveDecimalMultiplicationOutput(
+    KernelContext*, const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        const int32_t scale = s1 + s2;
+        const int32_t precision = p1 + p2 + 1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
+                                                const std::vector<ValueDescr>& args) {
+  return ResolveDecimalBinaryOperationOutput(
+      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+        DCHECK_GE(s1, s2);
+        const int32_t scale = s1 - s2;
+        const int32_t precision = p1;
+        return std::make_pair(precision, scale);
+      });
+}
+
+template <typename Op>
+void AddDecimalBinaryKernels(const std::string& name,
+                             std::shared_ptr<ScalarFunction>* func) {
+  OutputType out_type(null());
+  const std::string op = name.substr(0, name.find("_"));
+  if (op == "add" || op == "subtract") {
+    out_type = OutputType(ResolveDecimalAdditionOrSubtractionOutput);
+  } else if (op == "multiply") {
+    out_type = OutputType(ResolveDecimalMultiplicationOutput);
+  } else if (op == "divide") {
+    out_type = OutputType(ResolveDecimalDivisionOutput);
+  } else {
+    DCHECK(false);
+  }
+
+  auto in_type128 = InputType(Type::DECIMAL128);
+  auto in_type256 = InputType(Type::DECIMAL256);
+  auto exec128 = ScalarBinaryNotNullEqualTypes<Decimal128Type, Decimal128Type, Op>::Exec;
+  auto exec256 = ScalarBinaryNotNullEqualTypes<Decimal256Type, Decimal256Type, Op>::Exec;
+  DCHECK_OK((*func)->AddKernel({in_type128, in_type128}, out_type, exec128));
+  DCHECK_OK((*func)->AddKernel({in_type256, in_type256}, out_type, exec256));
+}
+
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
   Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
     RETURN_NOT_OK(CheckArity(*values));
 
+    RETURN_NOT_OK(CheckDecimals(values));
+
     using arrow::compute::detail::DispatchExactImpl;
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
 
@@ -451,6 +653,22 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Status CheckDecimals(std::vector<ValueDescr>* values) const {
+    bool has_decimal = false;
+    for (const auto& value : *values) {
+      if (is_decimal(value.type->id())) {
+        has_decimal = true;
+        break;
+      }
+    }
+    if (!has_decimal) return Status::OK();
+
+    if (values->size() == 2) {
+      return CastBinaryDecimalArgs(name(), values);
+    }
+    return Status::OK();
+  }
 };
 
 template <typename Op>
@@ -617,16 +835,19 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
 
   // ----------------------------------------------------------------------
   auto add = MakeArithmeticFunction<Add>("add", &add_doc);
+  AddDecimalBinaryKernels<Add>("add", &add);
   DCHECK_OK(registry->AddFunction(std::move(add)));
 
   // ----------------------------------------------------------------------
   auto add_checked =
       MakeArithmeticFunctionNotNull<AddChecked>("add_checked", &add_checked_doc);
+  AddDecimalBinaryKernels<AddChecked>("add_checked", &add_checked);
   DCHECK_OK(registry->AddFunction(std::move(add_checked)));
 
   // ----------------------------------------------------------------------
   // subtract
   auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc);
+  AddDecimalBinaryKernels<Subtract>("subtract", &subtract);
 
   // Add subtract(timestamp, timestamp) -> duration
   for (auto unit : AllTimeUnits()) {
@@ -640,24 +861,29 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
   auto subtract_checked = MakeArithmeticFunctionNotNull<SubtractChecked>(
       "subtract_checked", &sub_checked_doc);
+  AddDecimalBinaryKernels<SubtractChecked>("subtract_checked", &subtract_checked);
   DCHECK_OK(registry->AddFunction(std::move(subtract_checked)));
 
   // ----------------------------------------------------------------------
   auto multiply = MakeArithmeticFunction<Multiply>("multiply", &mul_doc);
+  AddDecimalBinaryKernels<Multiply>("multiply", &multiply);
   DCHECK_OK(registry->AddFunction(std::move(multiply)));
 
   // ----------------------------------------------------------------------
   auto multiply_checked = MakeArithmeticFunctionNotNull<MultiplyChecked>(
       "multiply_checked", &mul_checked_doc);
+  AddDecimalBinaryKernels<MultiplyChecked>("multiply_checked", &multiply_checked);
   DCHECK_OK(registry->AddFunction(std::move(multiply_checked)));
 
   // ----------------------------------------------------------------------
   auto divide = MakeArithmeticFunctionNotNull<Divide>("divide", &div_doc);
+  AddDecimalBinaryKernels<Divide>("divide", &divide);
   DCHECK_OK(registry->AddFunction(std::move(divide)));
 
   // ----------------------------------------------------------------------
   auto divide_checked =
       MakeArithmeticFunctionNotNull<DivideChecked>("divide_checked", &div_checked_doc);
+  AddDecimalBinaryKernels<DivideChecked>("divide_checked", &divide_checked);
   DCHECK_OK(registry->AddFunction(std::move(divide_checked)));
 
   // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index c4bfac459dc..3ee862c834e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -1148,5 +1148,368 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+TEST(TestBinaryDecimalArithmetic, DispatchBest) {
+  // decimal, floating point
+  for (std::string name : {"add", "subtract", "multiply", "divide"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(1, 0), float32()}, {float32(), float32()});
+      CheckDispatchBest(name, {decimal256(1, 0), float64()}, {float64(), float64()});
+      CheckDispatchBest(name, {float32(), decimal256(1, 0)}, {float32(), float32()});
+      CheckDispatchBest(name, {float64(), decimal128(1, 0)}, {float64(), float64()});
+    }
+  }
+
+  // decimal, decimal
+  for (std::string name : {"add", "subtract"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)},
+                        {decimal128(3, 1), decimal128(3, 1)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)},
+                        {decimal256(3, 1), decimal256(3, 1)});
+      CheckDispatchBest(name, {decimal128(2, 1), decimal256(2, 1)},
+                        {decimal256(3, 1), decimal256(3, 1)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal128(2, 1)},
+                        {decimal256(3, 1), decimal256(3, 1)});
+    }
+  }
+  {
+    std::string name = "multiply";
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)},
+                        {decimal128(5, 2), decimal128(5, 2)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)},
+                        {decimal256(5, 2), decimal256(5, 2)});
+      CheckDispatchBest(name, {decimal128(2, 1), decimal256(2, 1)},
+                        {decimal256(5, 2), decimal256(5, 2)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal128(2, 1)},
+                        {decimal256(5, 2), decimal256(5, 2)});
+    }
+  }
+  {
+    std::string name = "divide";
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {decimal128(2, 1), decimal128(2, 1)},
+                        {decimal128(6, 4), decimal128(6, 4)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal256(2, 1)},
+                        {decimal256(6, 4), decimal256(6, 4)});
+      CheckDispatchBest(name, {decimal128(2, 1), decimal256(2, 1)},
+                        {decimal256(6, 4), decimal256(6, 4)});
+      CheckDispatchBest(name, {decimal256(2, 1), decimal128(2, 1)},
+                        {decimal256(6, 4), decimal256(6, 4)});
+    }
+  }
+
+  // TODO(ARROW-13067): add 'integer, decimal' tests
+}
+
+// reference result from bc (precsion=100, scale=40)
+TEST(TestBinaryArithmeticDecimal, AddSubtract) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(30, 3),
+                              R"([
+        "1.000",
+        "-123456789012345678901234567.890",
+        "98765432109876543210.987",
+        "-999999999999999999999999999.999"
+      ])");
+    auto right = ArrayFromJSON(decimal128(20, 9),
+                               R"([
+        "-1.000000000",
+        "12345678901.234567890",
+        "98765.432101234",
+        "-99999999999.999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal128(37, 9),
+                               R"([
+      "0.000000000",
+      "-123456789012345666555555666.655432110",
+      "98765432109876641976.419101234",
+      "-1000000000000000099999999999.998999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal128(37, 9),
+                                    R"([
+      "2.000000000",
+      "-123456789012345691246913469.124567890",
+      "98765432109876444445.554898766",
+      "-999999999999999899999999999.999000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // array array, decimal256
+  {
+    auto left = ArrayFromJSON(decimal256(30, 20),
+                              R"([
+        "-1.00000000000000000001",
+        "1234567890.12345678900000000000",
+        "-9876543210.09876543210987654321",
+        "9999999999.99999999999999999999"
+      ])");
+    auto right = ArrayFromJSON(decimal256(30, 10),
+                               R"([
+        "1.0000000000",
+        "-1234567890.1234567890",
+        "6789.5432101234",
+        "99999999999999999999.9999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal256(41, 20),
+                               R"([
+      "-0.00000000000000000001",
+      "0.00000000000000000000",
+      "-9876536420.55555530870987654321",
+      "100000000009999999999.99999999989999999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal256(41, 20),
+                                    R"([
+      "-2.00000000000000000001",
+      "2469135780.24691357800000000000",
+      "-9876549999.64197555550987654321",
+      "-99999999989999999999.99999999990000000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // scalar array
+  {
+    auto left = ScalarFromJSON(decimal128(6, 1), R"("12345.6")");
+    auto right = ArrayFromJSON(decimal128(10, 3),
+                               R"(["1.234", "1234.000", "-9876.543", "666.888"])");
+    auto added = ArrayFromJSON(decimal128(11, 3),
+                               R"(["12346.834", "13579.600", "2469.057", "13012.488"])");
+    auto left_sub_right = ArrayFromJSON(
+        decimal128(11, 3), R"(["12344.366", "11111.600", "22222.143", "11678.712"])");
+    auto right_sub_left = ArrayFromJSON(
+        decimal128(11, 3), R"(["-12344.366", "-11111.600", "-22222.143", "-11678.712"])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+    CheckScalarBinary("subtract", left, right, left_sub_right);
+    CheckScalarBinary("subtract", right, left, right_sub_left);
+  }
+
+  // scalar scalar
+  {
+    auto left = ScalarFromJSON(decimal256(3, 0), R"("666")");
+    auto right = ScalarFromJSON(decimal256(3, 0), R"("888")");
+    auto added = ScalarFromJSON(decimal256(4, 0), R"("1554")");
+    auto subtracted = ScalarFromJSON(decimal256(4, 0), R"("-222")");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    auto right = ScalarFromJSON(decimal256(3, 0), R"("888")");
+    auto added = ScalarFromJSON(decimal256(4, 0), R"("1554")");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+  }
+
+  // decimal float
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    ASSIGN_OR_ABORT(auto right, arrow::MakeScalar(float64(), 888));
+    ASSIGN_OR_ABORT(auto added, arrow::MakeScalar(float64(), 1554));
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+  }
+
+  // TODO: decimal integer
+
+  // failed case: result maybe overflow
+  {
+    std::shared_ptr<Scalar> left, right;
+
+    left = ScalarFromJSON(decimal128(21, 20), R"("0.12345678901234567890")");
+    right = ScalarFromJSON(decimal128(21, 1), R"("1.0")");
+    ASSERT_RAISES(Invalid, CallFunction("add", {left, right}));
+    ASSERT_RAISES(Invalid, CallFunction("subtract", {left, right}));
+
+    left = ScalarFromJSON(decimal256(75, 0), R"("0")");
+    right = ScalarFromJSON(decimal256(2, 1), R"("0.0")");
+    ASSERT_RAISES(Invalid, CallFunction("add", {left, right}));
+    ASSERT_RAISES(Invalid, CallFunction("subtract", {left, right}));
+  }
+}
+
+TEST(TestBinaryArithmeticDecimal, Multiply) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(20, 10),
+                              R"([
+        "1234567890.1234567890",
+        "-0.0000000001",
+        "-9999999999.9999999999"
+      ])");
+    auto right = ArrayFromJSON(decimal128(13, 3),
+                               R"([
+        "1234567890.123",
+        "0.001",
+        "-9999999999.999"
+      ])");
+    auto expected = ArrayFromJSON(decimal128(34, 13),
+                                  R"([
+      "1524157875323319737.9870903950470",
+      "-0.0000000000001",
+      "99999999999989999999.0000000000001"
+    ])");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
+
+  // array array, decimal26
+  {
+    auto left = ArrayFromJSON(decimal256(30, 3),
+                              R"([
+        "123456789012345678901234567.890",
+        "0.000"
+      ])");
+    auto right = ArrayFromJSON(decimal256(20, 9),
+                               R"([
+        "-12345678901.234567890",
+        "99999999999.999999999"
+      ])");
+    auto expected = ArrayFromJSON(decimal256(51, 12),
+                                  R"([
+      "-1524157875323883675034293577501905199.875019052100",
+      "0.000000000000"
+    ])");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
+
+  // scalar array
+  {
+    auto left = ScalarFromJSON(decimal128(3, 2), R"("3.14")");
+    auto right = ArrayFromJSON(decimal128(1, 0), R"(["1", "2", "3", "4", "5"])");
+    auto expected =
+        ArrayFromJSON(decimal128(5, 2), R"(["3.14", "6.28", "9.42", "12.56", "15.70"])");
+    CheckScalarBinary("multiply", left, right, expected);
+    CheckScalarBinary("multiply", right, left, expected);
+  }
+
+  // scalar scalar
+  {
+    auto left = ScalarFromJSON(decimal128(1, 0), R"("1")");
+    auto right = ScalarFromJSON(decimal128(1, 0), R"("1")");
+    auto expected = ScalarFromJSON(decimal128(3, 0), R"("1")");
+    CheckScalarBinary("multiply", left, right, expected);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = ScalarFromJSON(decimal128(3, 2), R"("6.66")");
+    auto right = ScalarFromJSON(decimal256(3, 1), R"("88.8")");
+    auto expected = ScalarFromJSON(decimal256(7, 3), R"("591.408")");
+    CheckScalarBinary("multiply", left, right, expected);
+    CheckScalarBinary("multiply", right, left, expected);
+  }
+
+  // decimal float
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("666")");
+    ASSIGN_OR_ABORT(auto right, arrow::MakeScalar(float64(), 888));
+    ASSIGN_OR_ABORT(auto expected, arrow::MakeScalar(float64(), 591408));
+    CheckScalarBinary("multiply", left, right, expected);
+    CheckScalarBinary("multiply", right, left, expected);
+  }
+
+  // TODO: decimal integer
+
+  // failed case: result maybe overflow
+  {
+    auto left = ScalarFromJSON(decimal128(20, 0), R"("1")");
+    auto right = ScalarFromJSON(decimal128(18, 1), R"("1.0")");
+    ASSERT_RAISES(Invalid, CallFunction("multiply", {left, right}));
+  }
+}
+
+TEST(TestBinaryArithmeticDecimal, Divide) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(13, 3), R"(["1234567890.123", "0.001"])");
+    auto right = ArrayFromJSON(decimal128(3, 0), R"(["-987", "999"])");
+    auto expected =
+        ArrayFromJSON(decimal128(17, 7), R"(["-1250828.6627386", "0.0000010"])");
+    CheckScalarBinary("divide", left, right, expected);
+  }
+
+  // array array, decimal256
+  {
+    auto left = ArrayFromJSON(decimal256(20, 10),
+                              R"(["1234567890.1234567890", "9999999999.9999999999"])");
+    auto right = ArrayFromJSON(decimal256(13, 3), R"(["1234567890.123", "0.001"])");
+    auto expected = ArrayFromJSON(
+        decimal256(34, 21),
+        R"(["1.000000000000369999093", "9999999999999.999999900000000000000"])");
+    CheckScalarBinary("divide", left, right, expected);
+  }
+
+  // scalar array
+  {
+    auto left = ScalarFromJSON(decimal128(1, 0), R"("1")");
+    auto right = ArrayFromJSON(decimal128(1, 0), R"(["1", "2", "3", "4"])");
+    auto left_div_right =
+        ArrayFromJSON(decimal128(5, 4), R"(["1.0000", "0.5000", "0.3333", "0.2500"])");
+    auto right_div_left =
+        ArrayFromJSON(decimal128(5, 4), R"(["1.0000", "2.0000", "3.0000", "4.0000"])");
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
+
+  // scalar scalar
+  {
+    auto left = ScalarFromJSON(decimal256(6, 5), R"("2.71828")");
+    auto right = ScalarFromJSON(decimal256(6, 5), R"("3.14159")");
+    auto expected = ScalarFromJSON(decimal256(13, 7), R"("0.8652561")");
+    CheckScalarBinary("divide", left, right, expected);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = ScalarFromJSON(decimal256(6, 5), R"("2.71828")");
+    auto right = ScalarFromJSON(decimal128(6, 5), R"("3.14159")");
+    auto left_div_right = ScalarFromJSON(decimal256(13, 7), R"("0.8652561")");
+    auto right_div_left = ScalarFromJSON(decimal256(13, 7), R"("1.1557271")");
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
+
+  // decimal float
+  {
+    auto left = ScalarFromJSON(decimal128(3, 0), R"("100")");
+    ASSIGN_OR_ABORT(auto right, arrow::MakeScalar(float64(), 50));
+    ASSIGN_OR_ABORT(auto left_div_right, arrow::MakeScalar(float64(), 2));
+    ASSIGN_OR_ABORT(auto right_div_left, arrow::MakeScalar(float64(), 0.5));
+    CheckScalarBinary("divide", left, right, left_div_right);
+    CheckScalarBinary("divide", right, left, right_div_left);
+  }
+
+  // TODO: decimal integer
+
+  // failed case: result maybe overflow
+  {
+    auto left = ScalarFromJSON(decimal128(20, 20), R"("0.12345678901234567890")");
+    auto right = ScalarFromJSON(decimal128(20, 0), R"("12345678901234567890")");
+    ASSERT_RAISES(Invalid, CallFunction("divide", {left, right}));
+  }
+
+  // failed case: divide by 0
+  {
+    auto left = ScalarFromJSON(decimal256(1, 0), R"("1")");
+    auto right = ScalarFromJSON(decimal256(1, 0), R"("0")");
+    ASSERT_RAISES(Invalid, CallFunction("divide", {left, right}));
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 344585446fc..65c783ce847 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -771,6 +771,17 @@ std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
   return result;
 }
 
+Result<std::shared_ptr<DataType>> DecimalType::Make(Type::type type_id, int32_t precision,
+                                                    int32_t scale) {
+  if (type_id == Type::DECIMAL128) {
+    return Decimal128Type::Make(precision, scale);
+  } else if (type_id == Type::DECIMAL256) {
+    return Decimal256Type::Make(precision, scale);
+  } else {
+    return Status::Invalid("Not a decimal type_id: ", type_id);
+  }
+}
+
 // Taken from the Apache Impala codebase. The comments next
 // to the return values are the maximum value that can be represented in 2's
 // complement with the returned number of bytes.
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 1d3d1e27f92..b933da66089 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -880,6 +880,10 @@ class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
                        int32_t scale)
       : FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}
 
+  /// Constructs concrete decimal types
+  static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
+                                                int32_t scale);
+
   int32_t precision() const { return precision_; }
   int32_t scale() const { return scale_; }
 
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index b74aa3b0adb..86664bbb162 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -845,6 +845,17 @@ static inline bool is_floating(Type::type type_id) {
   return false;
 }
 
+static inline bool is_decimal(Type::type type_id) {
+  switch (type_id) {
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_primitive(Type::type type_id) {
   switch (type_id) {
     case Type::BOOL:
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index dfdd64d19c6..147885560f5 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -266,17 +266,17 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | abs_checked              | Unary      | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| add                      | Binary     | Numeric            | Numeric             |
+| add                      | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| add_checked              | Binary     | Numeric            | Numeric             |
+| add_checked              | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| divide                   | Binary     | Numeric            | Numeric             |
+| divide                   | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| divide_checked           | Binary     | Numeric            | Numeric             |
+| divide_checked           | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| multiply                 | Binary     | Numeric            | Numeric             |
+| multiply                 | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| multiply_checked         | Binary     | Numeric            | Numeric             |
+| multiply_checked         | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
 | negate                   | Unary      | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
@@ -286,11 +286,32 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | power_checked            | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric             |
+| subtract                 | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric             |
+| subtract_checked         | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
 
+* \(1) Precision and scale of computed DECIMAL results
+
++------------+---------------------------------------------+
+| Operation  | Result precision and scale                  |
++============+=============================================+
+| | add      | | scale = max(s1, s2)                       |
+| | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
++------------+---------------------------------------------+
+| multiply   | | scale = s1 + s2                           |
+|            | | precision = p1 + p2 + 1                   |
++------------+---------------------------------------------+
+| divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+|            | | precision = p1 - s1 + s2 + scale          |
++------------+---------------------------------------------+
+
+It's compatible with Redshift's decimal promotion rules. All decimal digits
+are preserved for `add`, `subtract` and `multiply` operations. The result
+precision of `divide` is at least the sum of precisions of both operands with
+enough scale kept. Error is returned if the result precision is beyond the
+decimal value range.
+
 Comparisons
 ~~~~~~~~~~~
 

From 9d1591781faa52598d30a4e6a3944bc893246d17 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Sat, 19 Jun 2021 13:12:09 -0500
Subject: [PATCH 433/719] ARROW-13116: [R] Test for RecordBatchReader to
 C-interface fails on arrow-r-minimal due to missing dependencies

Closes #10554 from thisisnic/arrow-13116

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/tests/testthat/test-RecordBatch.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 58afe4ef87e..c7e8b2dc2bf 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -567,6 +567,8 @@ test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
 })
 
 test_that("RecordBatchReader to C-interface", {
+  skip_if_not_available("dataset")
+  
   tab <- Table$create(example_data)
 
   # export the RecordBatchReader via the C-interface

From e990d177b1f1dec962315487682f613d46be573c Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 21 Jun 2021 14:04:42 +0200
Subject: [PATCH 434/719] ARROW-13042: [C++] Check that kernel output is fully
 initialized

Enhance TestInitialized() so that it really triggers Valgrind for every uninitialized bit in a buffer's data, including child and dictionary data.

Call TestInitialized() automatically from kernel tests.

Fix the BufferBuilder and TypedBufferBuilder API to really resize a Buffer to 0 when intended; introduce a FinishWithLength() method for cases where the caller wants to force a particular size.

Other fixes to get all tests to pass under Valgrind.

Closes #10550 from pitrou/ARROW-13042-check-kernel-output-initialized

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/builder_primitive.cc      |   5 +-
 cpp/src/arrow/array/builder_primitive.h       |   7 +-
 cpp/src/arrow/array/util.cc                   |   9 +-
 cpp/src/arrow/buffer_builder.h                |  45 ++++-
 cpp/src/arrow/buffer_test.cc                  |  78 +++++++-
 cpp/src/arrow/compute/exec.cc                 |   1 -
 .../arrow/compute/kernels/aggregate_test.cc   |  17 +-
 .../arrow/compute/kernels/codegen_internal.h  |   8 +
 .../arrow/compute/kernels/hash_aggregate.cc   |  45 +++--
 .../compute/kernels/hash_aggregate_test.cc    |   6 +-
 .../compute/kernels/scalar_arithmetic_test.cc |   4 +-
 .../compute/kernels/scalar_cast_internal.cc   |   7 +-
 .../arrow/compute/kernels/scalar_cast_test.cc |  12 +-
 .../compute/kernels/scalar_compare_test.cc    |   5 +-
 .../compute/kernels/scalar_fill_null_test.cc  |   3 +-
 .../compute/kernels/scalar_if_else_test.cc    |   2 +-
 .../compute/kernels/scalar_set_lookup_test.cc |  12 +-
 .../arrow/compute/kernels/scalar_temporal.cc  | 175 +++++++-----------
 .../compute/kernels/scalar_temporal_test.cc   |  32 ++--
 cpp/src/arrow/compute/kernels/test_util.cc    |  61 +++++-
 cpp/src/arrow/compute/kernels/test_util.h     |   2 +
 .../arrow/compute/kernels/vector_hash_test.cc |  13 +-
 .../compute/kernels/vector_selection_test.cc  |  52 +++---
 .../arrow/compute/kernels/vector_sort_test.cc |  11 +-
 cpp/src/arrow/filesystem/s3fs.cc              |   3 +-
 cpp/src/arrow/testing/gtest_util.cc           |  25 ++-
 cpp/src/arrow/testing/gtest_util.h            |   7 +
 cpp/src/arrow/util/hashing.h                  |  12 +-
 cpp/src/arrow/util/windows_fixup.h            |   7 +
 cpp/src/parquet/arrow/writer.cc               |   2 -
 cpp/src/parquet/encoding.cc                   |   2 +-
 cpp/src/parquet/encoding_test.cc              |   2 +-
 32 files changed, 421 insertions(+), 251 deletions(-)

diff --git a/cpp/src/arrow/array/builder_primitive.cc b/cpp/src/arrow/array/builder_primitive.cc
index 037a1ecbf91..e403c42411d 100644
--- a/cpp/src/arrow/array/builder_primitive.cc
+++ b/cpp/src/arrow/array/builder_primitive.cc
@@ -65,9 +65,8 @@ Status BooleanBuilder::Resize(int64_t capacity) {
 }
 
 Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  std::shared_ptr<Buffer> null_bitmap, data;
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-  RETURN_NOT_OK(data_builder_.Finish(&data));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_));
+  ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
 
   *out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);
 
diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
index e10f11fdd6c..e0f39f97967 100644
--- a/cpp/src/arrow/array/builder_primitive.h
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -23,6 +23,7 @@
 
 #include "arrow/array/builder_base.h"
 #include "arrow/array/data.h"
+#include "arrow/result.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 
@@ -185,9 +186,9 @@ class NumericBuilder : public ArrayBuilder {
   }
 
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
-    std::shared_ptr<Buffer> data, null_bitmap;
-    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-    ARROW_RETURN_NOT_OK(data_builder_.Finish(&data));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
+                          null_bitmap_builder_.FinishWithLength(length_));
+    ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
     *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
     capacity_ = length_ = null_count_ = 0;
     return Status::OK();
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index 297745a2b17..d4852234cd0 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -286,7 +286,7 @@ std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data) {
 // ----------------------------------------------------------------------
 // Misc APIs
 
-namespace internal {
+namespace {
 
 // get the maximum buffer length required, then allocate a single zeroed buffer
 // to use anywhere a buffer is required
@@ -650,12 +650,11 @@ class RepeatedArrayFactory {
   std::shared_ptr<Array> out_;
 };
 
-}  // namespace internal
+}  // namespace
 
 Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
                                                int64_t length, MemoryPool* pool) {
-  ARROW_ASSIGN_OR_RAISE(auto data,
-                        internal::NullArrayFactory(pool, type, length).Create());
+  ARROW_ASSIGN_OR_RAISE(auto data, NullArrayFactory(pool, type, length).Create());
   return MakeArray(data);
 }
 
@@ -664,7 +663,7 @@ Result<std::shared_ptr<Array>> MakeArrayFromScalar(const Scalar& scalar, int64_t
   if (!scalar.is_valid) {
     return MakeArrayOfNull(scalar.type, length, pool);
   }
-  return internal::RepeatedArrayFactory(pool, scalar, length).Create();
+  return RepeatedArrayFactory(pool, scalar, length).Create();
 }
 
 namespace internal {
diff --git a/cpp/src/arrow/buffer_builder.h b/cpp/src/arrow/buffer_builder.h
index f525ec23c58..c6250ae2b76 100644
--- a/cpp/src/arrow/buffer_builder.h
+++ b/cpp/src/arrow/buffer_builder.h
@@ -64,15 +64,12 @@ class ARROW_EXPORT BufferBuilder {
   /// \brief Resize the buffer to the nearest multiple of 64 bytes
   ///
   /// \param new_capacity the new capacity of the of the builder. Will be
-  /// rounded up to a multiple of 64 bytes for padding \param shrink_to_fit if
-  /// new capacity is smaller than the existing size, reallocate internal
-  /// buffer. Set to false to avoid reallocations when shrinking the builder.
+  /// rounded up to a multiple of 64 bytes for padding
+  /// \param shrink_to_fit if new capacity is smaller than the existing,
+  /// reallocate internal buffer. Set to false to avoid reallocations when
+  /// shrinking the builder.
   /// \return Status
   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
-    // Resize(0) is a no-op
-    if (new_capacity == 0) {
-      return Status::OK();
-    }
     if (buffer_ == NULLPTR) {
       ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(new_capacity, pool_));
     } else {
@@ -168,6 +165,17 @@ class ARROW_EXPORT BufferBuilder {
     return out;
   }
 
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using BufferBuilder
+  /// mostly for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    size_ = final_length;
+    return Finish(shrink_to_fit);
+  }
+
   void Reset() {
     buffer_ = NULLPTR;
     capacity_ = size_ = 0;
@@ -273,6 +281,16 @@ class TypedBufferBuilder<
     return out;
   }
 
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
+  }
+
   void Reset() { bytes_builder_.Reset(); }
 
   int64_t length() const { return bytes_builder_.length() / sizeof(T); }
@@ -399,6 +417,19 @@ class TypedBufferBuilder<bool> {
     return out;
   }
 
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    const auto final_byte_length = BitUtil::BytesForBits(final_length);
+    bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
+  }
+
   void Reset() {
     bytes_builder_.Reset();
     bit_length_ = false_count_ = 0;
diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc
index 02b96c3b493..4295d4ca692 100644
--- a/cpp/src/arrow/buffer_test.cc
+++ b/cpp/src/arrow/buffer_test.cc
@@ -653,18 +653,77 @@ TEST(TestBufferBuilder, ResizeReserve) {
 
   ASSERT_OK(builder.Resize(128));
   ASSERT_EQ(128, builder.capacity());
+  ASSERT_EQ(9, builder.length());
 
   // Do not shrink to fit
   ASSERT_OK(builder.Resize(64, false));
   ASSERT_EQ(128, builder.capacity());
+  ASSERT_EQ(9, builder.length());
 
   // Shrink to fit
   ASSERT_OK(builder.Resize(64));
   ASSERT_EQ(64, builder.capacity());
+  ASSERT_EQ(9, builder.length());
 
   // Reserve elements
   ASSERT_OK(builder.Reserve(60));
   ASSERT_EQ(128, builder.capacity());
+  ASSERT_EQ(9, builder.length());
+}
+
+TEST(TestBufferBuilder, Finish) {
+  const std::string data = "some data";
+  auto data_ptr = data.c_str();
+
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_OK(builder.Append(data_ptr, 9));
+    ASSERT_OK(builder.Append(data_ptr, 9));
+    ASSERT_EQ(18, builder.length());
+    ASSERT_EQ(64, builder.capacity());
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 18);
+    ASSERT_EQ(buf->capacity(), 64);
+  }
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_OK(builder.Reserve(1024));
+    builder.UnsafeAppend(data_ptr, 9);
+    builder.UnsafeAppend(data_ptr, 9);
+    ASSERT_EQ(18, builder.length());
+    ASSERT_EQ(builder.capacity(), 1024);
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 18);
+    ASSERT_EQ(buf->capacity(), shrink_to_fit ? 64 : 1024);
+  }
+}
+
+TEST(TestBufferBuilder, FinishEmpty) {
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_EQ(0, builder.length());
+    ASSERT_EQ(0, builder.capacity());
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 0);
+    ASSERT_EQ(buf->capacity(), 0);
+  }
+  for (const bool shrink_to_fit : {true, false}) {
+    ARROW_SCOPED_TRACE("shrink_to_fit = ", shrink_to_fit);
+    BufferBuilder builder;
+    ASSERT_OK(builder.Reserve(1024));
+    ASSERT_EQ(0, builder.length());
+    ASSERT_EQ(1024, builder.capacity());
+
+    ASSERT_OK_AND_ASSIGN(auto buf, builder.Finish(shrink_to_fit));
+    ASSERT_EQ(buf->size(), 0);
+    ASSERT_EQ(buf->capacity(), shrink_to_fit ? 0 : 1024);
+  }
 }
 
 template <typename T>
@@ -717,7 +776,7 @@ TYPED_TEST(TypedTestBufferBuilder, AppendCopies) {
   }
 }
 
-TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) {
+TEST(TestBoolBufferBuilder, Basics) {
   TypedBufferBuilder<bool> builder;
 
   ASSERT_OK(builder.Append(false));
@@ -746,7 +805,7 @@ TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) {
   ASSERT_EQ(built->size(), BitUtil::BytesForBits(nvalues + 1));
 }
 
-TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
+TEST(TestBoolBufferBuilder, AppendCopies) {
   TypedBufferBuilder<bool> builder;
 
   ASSERT_OK(builder.Append(13, true));
@@ -766,6 +825,21 @@ TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
   ASSERT_EQ(built->size(), BitUtil::BytesForBits(13 + 17));
 }
 
+TEST(TestBoolBufferBuilder, Reserve) {
+  TypedBufferBuilder<bool> builder;
+
+  ASSERT_OK(builder.Reserve(13 + 17));
+  builder.UnsafeAppend(13, true);
+  builder.UnsafeAppend(17, false);
+  ASSERT_EQ(builder.length(), 13 + 17);
+  ASSERT_EQ(builder.capacity(), 64 * 8);
+  ASSERT_EQ(builder.false_count(), 17);
+
+  ASSERT_OK_AND_ASSIGN(auto built, builder.Finish());
+  AssertIsCPUBuffer(*built);
+  ASSERT_EQ(built->size(), BitUtil::BytesForBits(13 + 17));
+}
+
 template <typename T>
 class TypedTestBuffer : public ::testing::Test {};
 
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 0b1f6b5658e..73cb82ef026 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -106,7 +106,6 @@ Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t l
     int64_t buffer_size = BitUtil::BytesForBits(length * bit_width);
     return ctx->Allocate(buffer_size);
   }
-  return Status::OK();
 }
 
 struct BufferPreallocation {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 476caab03d5..4bce02a990b 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -1215,7 +1215,7 @@ class TestPrimitiveModeKernel : public ::testing::Test {
                       const std::vector<CType>& expected_modes,
                       const std::vector<int64_t>& expected_counts) {
     ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
-    ASSERT_OK(out.make_array()->ValidateFull());
+    ValidateOutput(out);
     const StructArray out_array(out.array());
     ASSERT_EQ(out_array.length(), expected_modes.size());
     ASSERT_EQ(out_array.num_fields(), 2);
@@ -1256,7 +1256,8 @@ class TestPrimitiveModeKernel : public ::testing::Test {
 
   void AssertModesEmpty(const Datum& array, int n) {
     ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
-    ASSERT_OK(out.make_array()->ValidateFull());
+    auto out_array = out.make_array();
+    ValidateOutput(*out_array);
     ASSERT_EQ(out.array()->length, 0);
   }
 
@@ -1397,8 +1398,8 @@ template <typename ArrowType, typename CTYPE = typename ArrowType::c_type>
 void VerifyMode(const std::shared_ptr<Array>& array) {
   auto expected = NaiveMode<ArrowType>(*array);
   ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
-  ASSERT_OK(out.make_array()->ValidateFull());
   const StructArray out_array(out.array());
+  ValidateOutput(out_array);
   ASSERT_EQ(out_array.length(), 1);
   ASSERT_EQ(out_array.num_fields(), 2);
 
@@ -1756,7 +1757,7 @@ class TestPrimitiveQuantileKernel : public ::testing::Test {
 
       ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
       const auto& out_array = out.make_array();
-      ASSERT_OK(out_array->ValidateFull());
+      ValidateOutput(*out_array);
       ASSERT_EQ(out_array->length(), options.q.size());
       ASSERT_EQ(out_array->null_count(), 0);
       AssertTypeEqual(out_array->type(), expected[0][i].type());
@@ -1816,7 +1817,8 @@ class TestPrimitiveQuantileKernel : public ::testing::Test {
     for (auto interpolation : this->interpolations_) {
       options.interpolation = interpolation;
       ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options));
-      ASSERT_OK(out.make_array()->ValidateFull());
+      auto out_array = out.make_array();
+      ValidateOutput(*out_array);
       ASSERT_EQ(out.array()->length, 0);
     }
   }
@@ -2044,7 +2046,7 @@ class TestRandomQuantileKernel : public TestPrimitiveQuantileKernel<ArrowType> {
     TDigestOptions options(quantiles);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, options));
     const auto& out_array = out.make_array();
-    ASSERT_OK(out_array->ValidateFull());
+    ValidateOutput(*out_array);
     ASSERT_EQ(out_array->length(), quantiles.size());
     ASSERT_EQ(out_array->null_count(), 0);
     AssertTypeEqual(out_array->type(), float64());
@@ -2186,7 +2188,8 @@ TEST_F(TestTDigestKernel, AllNullsOrNaNs) {
   for (const auto& json : tests) {
     auto chunked = ChunkedArrayFromJSON(float64(), json);
     ASSERT_OK_AND_ASSIGN(Datum out, TDigest(chunked, TDigestOptions()));
-    ASSERT_OK(out.make_array()->ValidateFull());
+    auto out_array = out.make_array();
+    ValidateOutput(*out_array);
     ASSERT_EQ(out.array()->length, 0);
   }
 }
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 6a5cee124c0..140f9fdc669 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -276,6 +276,8 @@ struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
   // Note that this doesn't write the null bitmap, which should be consistent
   // with Write / WriteNull calls
   void WriteNull() { *values++ = T{}; }
+
+  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); }
 };
 
 template <typename Type>
@@ -290,6 +292,8 @@ struct OutputArrayWriter<Type, enable_if_decimal<Type>> {
   void Write(T value) { value.ToBytes(values++->data()); }
 
   void WriteNull() { T{}.ToBytes(values++->data()); }
+
+  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); }
 };
 
 // (Un)box Scalar to / from C++ value
@@ -918,6 +922,8 @@ struct ScalarBinaryNotNullStateful {
                 op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, u, arg1_val, &st));
           },
           [&]() { writer.WriteNull(); });
+    } else {
+      writer.WriteAllNull(out->mutable_array()->length);
     }
     return st;
   }
@@ -935,6 +941,8 @@ struct ScalarBinaryNotNullStateful {
                 op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, v, &st));
           },
           [&]() { writer.WriteNull(); });
+    } else {
+      writer.WriteAllNull(out->mutable_array()->length);
     }
     return st;
   }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 5f6503f8c24..e282035d82a 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -442,6 +442,9 @@ struct GrouperImpl : Grouper {
 };
 
 struct GrouperFastImpl : Grouper {
+  static constexpr int kBitmapPaddingForSIMD = 64;  // bits
+  static constexpr int kPaddingForSIMD = 32;        // bytes
+
   static bool CanUse(const std::vector<ValueDescr>& keys) {
 #if ARROW_LITTLE_ENDIAN
     for (size_t i = 0; i < keys.size(); ++i) {
@@ -517,9 +520,8 @@ struct GrouperFastImpl : Grouper {
                                   impl->encode_ctx_.stack, impl->log_minibatch_max_,
                                   equal_func, append_func));
     impl->cols_.resize(num_columns);
-    constexpr int padding_for_SIMD = 32;
     impl->minibatch_hashes_.resize(impl->minibatch_size_max_ +
-                                   padding_for_SIMD / sizeof(uint32_t));
+                                   kPaddingForSIMD / sizeof(uint32_t));
 
     return std::move(impl);
   }
@@ -608,6 +610,22 @@ struct GrouperFastImpl : Grouper {
 
   uint32_t num_groups() const override { return static_cast<uint32_t>(rows_.length()); }
 
+  // Make sure padded buffers end up with the right logical size
+
+  Result<std::shared_ptr<Buffer>> AllocatePaddedBitmap(int64_t length) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> buf,
+        AllocateBitmap(length + kBitmapPaddingForSIMD, ctx_->memory_pool()));
+    return SliceMutableBuffer(buf, 0, BitUtil::BytesForBits(length));
+  }
+
+  Result<std::shared_ptr<Buffer>> AllocatePaddedBuffer(int64_t size) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> buf,
+        AllocateBuffer(size + kBitmapPaddingForSIMD, ctx_->memory_pool()));
+    return SliceMutableBuffer(buf, 0, size);
+  }
+
   Result<ExecBatch> GetUniques() override {
     auto num_columns = static_cast<uint32_t>(col_metadata_.size());
     int64_t num_groups = rows_.length();
@@ -616,28 +634,19 @@ struct GrouperFastImpl : Grouper {
     std::vector<std::shared_ptr<Buffer>> fixedlen_bufs(num_columns);
     std::vector<std::shared_ptr<Buffer>> varlen_bufs(num_columns);
 
-    constexpr int padding_bits = 64;
-    constexpr int padding_for_SIMD = 32;
     for (size_t i = 0; i < num_columns; ++i) {
-      ARROW_ASSIGN_OR_RAISE(non_null_bufs[i], AllocateBitmap(num_groups + padding_bits,
-                                                             ctx_->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(non_null_bufs[i], AllocatePaddedBitmap(num_groups));
       if (col_metadata_[i].is_fixed_length) {
         if (col_metadata_[i].fixed_length == 0) {
-          ARROW_ASSIGN_OR_RAISE(
-              fixedlen_bufs[i],
-              AllocateBitmap(num_groups + padding_bits, ctx_->memory_pool()));
+          ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i], AllocatePaddedBitmap(num_groups));
         } else {
           ARROW_ASSIGN_OR_RAISE(
               fixedlen_bufs[i],
-              AllocateBuffer(
-                  num_groups * col_metadata_[i].fixed_length + padding_for_SIMD,
-                  ctx_->memory_pool()));
+              AllocatePaddedBuffer(num_groups * col_metadata_[i].fixed_length));
         }
       } else {
-        ARROW_ASSIGN_OR_RAISE(
-            fixedlen_bufs[i],
-            AllocateBuffer((num_groups + 1) * sizeof(uint32_t) + padding_for_SIMD,
-                           ctx_->memory_pool()));
+        ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i],
+                              AllocatePaddedBuffer((num_groups + 1) * sizeof(uint32_t)));
       }
       cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
           col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
@@ -657,9 +666,7 @@ struct GrouperFastImpl : Grouper {
         if (!col_metadata_[i].is_fixed_length) {
           auto varlen_size =
               reinterpret_cast<const uint32_t*>(fixedlen_bufs[i]->data())[num_groups];
-          ARROW_ASSIGN_OR_RAISE(
-              varlen_bufs[i],
-              AllocateBuffer(varlen_size + padding_for_SIMD, ctx_->memory_pool()));
+          ARROW_ASSIGN_OR_RAISE(varlen_bufs[i], AllocatePaddedBuffer(varlen_size));
           cols_[i] = arrow::compute::KeyEncoder::KeyColumnArray(
               col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
               fixedlen_bufs[i]->mutable_data(), varlen_bufs[i]->mutable_data());
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 5e4f8c5f0e6..a8f8c64663d 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -116,7 +116,7 @@ void ValidateGroupBy(const std::vector<internal::Aggregate>& aggregates,
   ASSERT_OK_AND_ASSIGN(Datum actual, GroupBy(arguments, keys, aggregates));
 
   ASSERT_OK(expected.make_array()->ValidateFull());
-  ASSERT_OK(actual.make_array()->ValidateFull());
+  ValidateOutput(actual);
 
   AssertDatumsEqual(expected, actual, /*verbose=*/true);
 }
@@ -250,7 +250,7 @@ struct TestGrouper {
       // check that uniques_ are prefixes of new_uniques
       for (int i = 0; i < uniques_.num_values(); ++i) {
         auto new_unique = new_uniques[i].make_array();
-        ASSERT_OK(new_unique->ValidateFull());
+        ValidateOutput(*new_unique);
 
         AssertDatumsEqual(uniques_[i], new_unique->Slice(0, uniques_.length),
                           /*verbose=*/true);
@@ -261,7 +261,7 @@ struct TestGrouper {
 
     // check that the ids encode an equivalent key sequence
     auto ids = id_batch.make_array();
-    ASSERT_OK(ids->ValidateFull());
+    ValidateOutput(*ids);
 
     for (int i = 0; i < key_batch.num_values(); ++i) {
       SCOPED_TRACE(std::to_string(i) + "th key array");
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 3ee862c834e..ae2f55c6be6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -125,7 +125,7 @@ class TestUnaryArithmetic : public TestBase {
 
   void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
                                     const std::shared_ptr<Array>& expected) {
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true, equal_options_);
   }
 
@@ -262,7 +262,7 @@ class TestBinaryArithmetic : public TestBase {
 
   void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
                                     const std::shared_ptr<Array>& expected) {
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true, equal_options_);
   }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index f42635c5dcd..198c82bd97e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -255,7 +255,12 @@ static bool CanCastFromDictionary(Type::type type_id) {
 
 void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* func) {
   // From null to this type
-  DCHECK_OK(func->AddKernel(Type::NA, {null()}, out_ty, CastFromNull));
+  ScalarKernel kernel;
+  kernel.exec = CastFromNull;
+  kernel.signature = KernelSignature::Make({null()}, out_ty);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(Type::NA, std::move(kernel)));
 
   // From dictionary to this type
   if (CanCastFromDictionary(out_type_id)) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index ef22fa8cb72..494b15dfbc8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -113,7 +113,7 @@ static void CheckCastZeroCopy(std::shared_ptr<Array> input,
                               std::shared_ptr<DataType> to_type,
                               CastOptions options = CastOptions::Safe()) {
   ASSERT_OK_AND_ASSIGN(auto converted, Cast(*input, to_type, options));
-  ASSERT_OK(converted->ValidateFull());
+  ValidateOutput(*converted);
 
   ASSERT_EQ(input->data()->buffers.size(), converted->data()->buffers.size());
   for (size_t i = 0; i < input->data()->buffers.size(); ++i) {
@@ -1583,7 +1583,7 @@ TEST(Cast, BinaryOrStringToBinary) {
 
       // invalid utf-8 is not an error for binary
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
-      ASSERT_OK(strings->ValidateFull());
+      ValidateOutput(*strings);
       AssertBinaryZeroCopy(invalid_utf8, strings);
 
       // invalid utf-8 masked by a null bit is not an error
@@ -1687,12 +1687,12 @@ TEST(Cast, ListToList) {
     auto list_int64 = list_int32->Copy();
     list_int64->type = make_list(int64());
     list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
-    ASSERT_OK(MakeArray(list_int64)->ValidateFull());
+    ValidateOutput(*list_int64);
 
     auto list_float32 = list_int32->Copy();
     list_float32->type = make_list(float32());
     list_float32->child_data[0] = Cast(list_int32->child_data[0], float32())->array();
-    ASSERT_OK(MakeArray(list_float32)->ValidateFull());
+    ValidateOutput(*list_float32);
 
     CheckCast(MakeArray(list_int32), MakeArray(list_float32));
     CheckCast(MakeArray(list_float32), MakeArray(list_int64));
@@ -1711,7 +1711,7 @@ TEST(Cast, ListToList) {
     auto list_int64 = list_int32->Copy();
     list_int64->type = make_list(int64());
     list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
-    ASSERT_OK(MakeArray(list_int64)->ValidateFull());
+    ValidateOutput(*list_int64);
 
     CheckCast(MakeArray(list_int32), MakeArray(list_int64));
     CheckCast(MakeArray(list_int64), MakeArray(list_int32));
@@ -1861,7 +1861,7 @@ TEST(Cast, FromDictionary) {
     data->buffers[0] = nullptr;
     data->null_count = 0;
     std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
-    ASSERT_OK(dict_array->ValidateFull());
+    ValidateOutput(*dict_array);
 
     CheckCast(dict_array, no_nulls);
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 50327e82032..87f3bd3fc23 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -673,10 +673,7 @@ class TestVarArgsCompare : public TestBase {
   Datum Eval(VarArgsFunction func, const std::vector<Datum>& args) {
     EXPECT_OK_AND_ASSIGN(auto actual,
                          func(args, element_wise_aggregate_options_, nullptr));
-    if (actual.is_array()) {
-      auto arr = actual.make_array();
-      ARROW_EXPECT_OK(arr->ValidateFull());
-    }
+    ValidateOutput(actual);
     return actual;
   }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc b/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
index a0b6fdc63a9..70ce4d5ca7b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_fill_null_test.cc
@@ -22,6 +22,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api.h"
+#include "arrow/compute/kernels/test_util.h"
 #include "arrow/result.h"
 #include "arrow/scalar.h"
 #include "arrow/testing/gtest_compat.h"
@@ -38,7 +39,7 @@ void CheckFillNull(const Array& input, const Datum& fill_value, const Array& exp
   auto Check = [&](const Array& input, const Array& expected) {
     ASSERT_OK_AND_ASSIGN(Datum datum_out, FillNull(input, fill_value));
     std::shared_ptr<Array> result = datum_out.make_array();
-    ASSERT_OK(result->ValidateFull());
+    ValidateOutput(*result);
     AssertArraysEqual(expected, *result, /*verbose=*/true);
     if (all_valid) {
       // Check null count of ArrayData is set, not the computed Array.null_count
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 0fb0a1fc2d8..2b63af2f26f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -29,7 +29,7 @@ void CheckIfElseOutput(const Datum& cond, const Datum& left, const Datum& right,
   ASSERT_OK_AND_ASSIGN(Datum datum_out, IfElse(cond, left, right));
   if (datum_out.is_array()) {
     std::shared_ptr<Array> result = datum_out.make_array();
-    ASSERT_OK(result->ValidateFull());
+    ValidateOutput(*result);
     std::shared_ptr<Array> expected_ = expected.make_array();
     AssertArraysEqual(*expected_, *result, /*verbose=*/true);
   } else {  // expecting scalar
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
index 5c8bf98e196..9b6ded0bbe7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc
@@ -57,7 +57,7 @@ void CheckIsIn(const std::shared_ptr<DataType>& type, const std::string& input_j
   ASSERT_OK_AND_ASSIGN(Datum actual_datum,
                        IsIn(input, SetLookupOptions(value_set, skip_nulls)));
   std::shared_ptr<Array> actual = actual_datum.make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual_datum);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -68,7 +68,7 @@ void CheckIsInChunked(const std::shared_ptr<ChunkedArray>& input,
   ASSERT_OK_AND_ASSIGN(Datum actual_datum,
                        IsIn(input, SetLookupOptions(value_set, skip_nulls)));
   auto actual = actual_datum.chunked_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual_datum);
   AssertChunkedEqual(*expected, *actual);
 }
 
@@ -89,7 +89,7 @@ void CheckIsInDictionary(const std::shared_ptr<DataType>& type,
   ASSERT_OK_AND_ASSIGN(Datum actual_datum,
                        IsIn(input, SetLookupOptions(value_set, skip_nulls)));
   std::shared_ptr<Array> actual = actual_datum.make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual_datum);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -436,7 +436,7 @@ class TestIndexInKernel : public ::testing::Test {
     SetLookupOptions options(value_set, skip_nulls);
     ASSERT_OK_AND_ASSIGN(Datum actual_datum, IndexIn(input, options));
     std::shared_ptr<Array> actual = actual_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual_datum);
     AssertArraysEqual(*expected, *actual, /*verbose=*/true);
   }
 
@@ -447,7 +447,7 @@ class TestIndexInKernel : public ::testing::Test {
     ASSERT_OK_AND_ASSIGN(Datum actual,
                          IndexIn(input, SetLookupOptions(value_set, skip_nulls)));
     ASSERT_EQ(Datum::CHUNKED_ARRAY, actual.kind());
-    ASSERT_OK(actual.chunked_array()->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*expected, *actual.chunked_array());
   }
 
@@ -469,7 +469,7 @@ class TestIndexInKernel : public ::testing::Test {
     SetLookupOptions options(value_set, skip_nulls);
     ASSERT_OK_AND_ASSIGN(Datum actual_datum, IndexIn(input, options));
     std::shared_ptr<Array> actual = actual_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual_datum);
     AssertArraysEqual(*expected, *actual, /*verbose=*/true);
   }
 };
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
index cc22ccf044a..1694d22ffae 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -17,13 +17,16 @@
 
 #include "arrow/builder.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/time.h"
 #include "arrow/vendored/datetime.h"
 
 namespace arrow {
 
-namespace compute {
+using internal::checked_cast;
+using internal::checked_pointer_cast;
 
+namespace compute {
 namespace internal {
 
 namespace {
@@ -45,77 +48,35 @@ using arrow_vendored::date::literals::thu;
 using internal::applicator::ScalarUnaryNotNull;
 using internal::applicator::SimpleUnary;
 
-// Based on ScalarUnaryNotNullStateful. Adds timezone awareness.
-template <typename Op, typename OutType>
-struct ScalarUnaryStatefulTemporal {
-  using ThisType = ScalarUnaryStatefulTemporal<Op, OutType>;
-  using OutValue = typename internal::GetOutputType<OutType>::T;
+const std::string& GetInputTimezone(const Datum& datum) {
+  return checked_cast<const TimestampType&>(*datum.type()).timezone();
+}
 
-  Op op;
-  explicit ScalarUnaryStatefulTemporal(Op op) : op(std::move(op)) {}
-
-  template <typename Type>
-  struct ArrayExec {
-    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                       Datum* out) {
-      const std::string timezone =
-          checked_pointer_cast<const TimestampType>(arg0.type)->timezone();
-      Status st = Status::OK();
-      ArrayData* out_arr = out->mutable_array();
-      auto out_data = out_arr->GetMutableValues<OutValue>(1);
-
-      if (timezone.empty()) {
-        internal::VisitArrayValuesInline<Int64Type>(
-            arg0,
-            [&](int64_t v) {
-              *out_data++ = functor.op.template Call<OutValue>(ctx, v, &st);
-            },
-            [&]() {
-              // null
-              ++out_data;
-            });
-      } else {
-        st = Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
-                             timezone);
-      }
-      return st;
-    }
-  };
-
-  Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
-    const std::string timezone =
-        checked_pointer_cast<const TimestampType>(arg0.type)->timezone();
-    Status st = Status::OK();
-    if (timezone.empty()) {
-      if (arg0.is_valid) {
-        int64_t arg0_val = internal::UnboxScalar<Int64Type>::Unbox(arg0);
-        internal::BoxScalar<OutType>::Box(
-            this->op.template Call<OutValue>(ctx, arg0_val, &st), out->scalar().get());
-      }
-    } else {
-      st = Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
-                           timezone);
-    }
-    return st;
-  }
+const std::string& GetInputTimezone(const Scalar& scalar) {
+  return checked_cast<const TimestampType&>(*scalar.type).timezone();
+}
 
-  Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    if (batch[0].kind() == Datum::ARRAY) {
-      return ArrayExec<OutType>::Exec(*this, ctx, *batch[0].array(), out);
-    } else {
-      return Scalar(ctx, *batch[0].scalar(), out);
-    }
+const std::string& GetInputTimezone(const ArrayData& array) {
+  return checked_cast<const TimestampType&>(*array.type).timezone();
+}
+
+template <typename T>
+Status TemporalComponentExtractCheckTimezone(const T& input) {
+  const auto& timezone = GetInputTimezone(input);
+  if (!timezone.empty()) {
+    return Status::NotImplemented(
+        "Cannot extract components from timestamp with specific timezone: ", timezone);
   }
-};
+  return Status::OK();
+}
 
 template <typename Op, typename OutType>
-struct ScalarUnaryTemporal {
+struct TemporalComponentExtract {
   using OutValue = typename internal::GetOutputType<OutType>::T;
 
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    // Seed kernel with dummy state
-    ScalarUnaryStatefulTemporal<Op, OutType> kernel({});
-    return kernel.Exec(ctx, batch, out);
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
+    return ScalarUnaryNotNull<OutType, TimestampType, Op>::Exec(ctx, batch, out);
   }
 };
 
@@ -124,8 +85,8 @@ struct ScalarUnaryTemporal {
 
 template <typename Duration>
 struct Year {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     return static_cast<T>(static_cast<const int32_t>(
         year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year()));
   }
@@ -136,8 +97,8 @@ struct Year {
 
 template <typename Duration>
 struct Month {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     return static_cast<T>(static_cast<const uint32_t>(
         year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month()));
   }
@@ -148,8 +109,8 @@ struct Month {
 
 template <typename Duration>
 struct Day {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     return static_cast<T>(static_cast<const uint32_t>(
         year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day()));
   }
@@ -160,8 +121,8 @@ struct Day {
 
 template <typename Duration>
 struct DayOfWeek {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     return static_cast<T>(
         weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
             .iso_encoding() -
@@ -174,8 +135,8 @@ struct DayOfWeek {
 
 template <typename Duration>
 struct DayOfYear {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
     return static_cast<T>(
         (t - sys_time<days>(year_month_day(t).year() / jan / 0)).count());
@@ -190,8 +151,8 @@ struct DayOfYear {
 
 template <typename Duration>
 struct ISOYear {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
     auto y = year_month_day{t + days{3}}.year();
     auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
@@ -211,8 +172,8 @@ struct ISOYear {
 // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
 template <typename Duration>
 struct ISOWeek {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
     auto y = year_month_day{t + days{3}}.year();
     auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
@@ -229,8 +190,8 @@ struct ISOWeek {
 
 template <typename Duration>
 struct Quarter {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     const auto ymd = year_month_day(floor<days>(sys_time<Duration>(Duration{arg})));
     return static_cast<T>((static_cast<const uint32_t>(ymd.month()) - 1) / 3 + 1);
   }
@@ -241,8 +202,8 @@ struct Quarter {
 
 template <typename Duration>
 struct Hour {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>((t - floor<days>(t)) / std::chrono::hours(1));
   }
@@ -253,8 +214,8 @@ struct Hour {
 
 template <typename Duration>
 struct Minute {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>((t - floor<std::chrono::hours>(t)) / std::chrono::minutes(1));
   }
@@ -265,8 +226,8 @@ struct Minute {
 
 template <typename Duration>
 struct Second {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>((t - floor<std::chrono::minutes>(t)) / std::chrono::seconds(1));
   }
@@ -277,8 +238,8 @@ struct Second {
 
 template <typename Duration>
 struct Subsecond {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>(
         (std::chrono::duration<double>(t - floor<std::chrono::seconds>(t)).count()));
@@ -290,8 +251,8 @@ struct Subsecond {
 
 template <typename Duration>
 struct Millisecond {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>(
         ((t - floor<std::chrono::seconds>(t)) / std::chrono::milliseconds(1)) % 1000);
@@ -303,8 +264,8 @@ struct Millisecond {
 
 template <typename Duration>
 struct Microsecond {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>(
         ((t - floor<std::chrono::seconds>(t)) / std::chrono::microseconds(1)) % 1000);
@@ -316,8 +277,8 @@ struct Microsecond {
 
 template <typename Duration>
 struct Nanosecond {
-  template <typename T>
-  static T Call(KernelContext*, int64_t arg, Status*) {
+  template <typename T, typename Arg0>
+  static T Call(KernelContext*, Arg0 arg, Status*) {
     Duration t = Duration{arg};
     return static_cast<T>(
         ((t - floor<std::chrono::seconds>(t)) / std::chrono::nanoseconds(1)) % 1000);
@@ -345,13 +306,7 @@ inline std::vector<int64_t> get_iso_calendar(int64_t arg) {
 template <typename Duration>
 struct ISOCalendar {
   static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
-    const std::string timezone =
-        checked_pointer_cast<const TimestampType>(in.type)->timezone();
-    if (!timezone.empty()) {
-      return Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
-                             timezone);
-    }
-
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in));
     if (in.is_valid) {
       const std::shared_ptr<DataType> iso_calendar_type =
           struct_({field("iso_year", int64()), field("iso_week", int64()),
@@ -372,12 +327,8 @@ struct ISOCalendar {
 
   static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
     using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
-    const std::string timezone =
-        checked_pointer_cast<const TimestampType>(in.type)->timezone();
-    if (!timezone.empty()) {
-      return Status::Invalid("Timezone aware timestamps not supported. Timezone found: ",
-                             timezone);
-    }
+
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(in));
     const std::shared_ptr<DataType> iso_calendar_type =
         struct_({field("iso_year", int64()), field("iso_week", int64()),
                  field("iso_day_of_week", int64())});
@@ -421,22 +372,24 @@ std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc
     InputType in_type{match::TimestampTypeUnit(unit)};
     switch (unit) {
       case TimeUnit::SECOND: {
-        auto exec = ScalarUnaryTemporal<Op<std::chrono::seconds>, OutType>::Exec;
+        auto exec = TemporalComponentExtract<Op<std::chrono::seconds>, OutType>::Exec;
         DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
         break;
       }
       case TimeUnit::MILLI: {
-        auto exec = ScalarUnaryTemporal<Op<std::chrono::milliseconds>, OutType>::Exec;
+        auto exec =
+            TemporalComponentExtract<Op<std::chrono::milliseconds>, OutType>::Exec;
         DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
         break;
       }
       case TimeUnit::MICRO: {
-        auto exec = ScalarUnaryTemporal<Op<std::chrono::microseconds>, OutType>::Exec;
+        auto exec =
+            TemporalComponentExtract<Op<std::chrono::microseconds>, OutType>::Exec;
         DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
         break;
       }
       case TimeUnit::NANO: {
-        auto exec = ScalarUnaryTemporal<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        auto exec = TemporalComponentExtract<Op<std::chrono::nanoseconds>, OutType>::Exec;
         DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec)));
         break;
       }
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index be1054b3705..cc01d25de7c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -159,22 +159,22 @@ TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
     auto unit = timestamp(u, timezone);
     auto timestamps = ArrayFromJSON(unit, times);
 
-    ASSERT_RAISES(Invalid, Year(timestamps));
-    ASSERT_RAISES(Invalid, Month(timestamps));
-    ASSERT_RAISES(Invalid, Day(timestamps));
-    ASSERT_RAISES(Invalid, DayOfWeek(timestamps));
-    ASSERT_RAISES(Invalid, DayOfYear(timestamps));
-    ASSERT_RAISES(Invalid, ISOYear(timestamps));
-    ASSERT_RAISES(Invalid, ISOWeek(timestamps));
-    ASSERT_RAISES(Invalid, ISOCalendar(timestamps));
-    ASSERT_RAISES(Invalid, Quarter(timestamps));
-    ASSERT_RAISES(Invalid, Hour(timestamps));
-    ASSERT_RAISES(Invalid, Minute(timestamps));
-    ASSERT_RAISES(Invalid, Second(timestamps));
-    ASSERT_RAISES(Invalid, Millisecond(timestamps));
-    ASSERT_RAISES(Invalid, Microsecond(timestamps));
-    ASSERT_RAISES(Invalid, Nanosecond(timestamps));
-    ASSERT_RAISES(Invalid, Subsecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Year(timestamps));
+    ASSERT_RAISES(NotImplemented, Month(timestamps));
+    ASSERT_RAISES(NotImplemented, Day(timestamps));
+    ASSERT_RAISES(NotImplemented, DayOfWeek(timestamps));
+    ASSERT_RAISES(NotImplemented, DayOfYear(timestamps));
+    ASSERT_RAISES(NotImplemented, ISOYear(timestamps));
+    ASSERT_RAISES(NotImplemented, ISOWeek(timestamps));
+    ASSERT_RAISES(NotImplemented, ISOCalendar(timestamps));
+    ASSERT_RAISES(NotImplemented, Quarter(timestamps));
+    ASSERT_RAISES(NotImplemented, Hour(timestamps));
+    ASSERT_RAISES(NotImplemented, Minute(timestamps));
+    ASSERT_RAISES(NotImplemented, Second(timestamps));
+    ASSERT_RAISES(NotImplemented, Millisecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Microsecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Nanosecond(timestamps));
+    ASSERT_RAISES(NotImplemented, Subsecond(timestamps));
   }
 }
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index 18257973150..a1151717d8b 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -22,12 +22,14 @@
 #include <string>
 
 #include "arrow/array.h"
+#include "arrow/array/validate.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
+#include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 
 namespace arrow {
@@ -49,7 +51,7 @@ void CheckScalarNonRecursive(const std::string& func_name, const DatumVector& in
                              const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, inputs, options));
   std::shared_ptr<Array> actual = std::move(out).make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -164,7 +166,9 @@ void CheckScalar(std::string func_name, const DatumVector& inputs,
 
     ASSERT_OK_AND_ASSIGN(Datum out,
                          CallFunction(func_name, GetDatums(chunked_inputs), options));
-    ASSERT_OK(out.chunked_array()->ValidateFull());
+    ValidateOutput(out);
+    auto chunked = out.chunked_array();
+    (void)chunked;
     AssertDatumsEqual(std::make_shared<ChunkedArray>(expected_chunks), out);
   }
 }
@@ -191,7 +195,7 @@ void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array>
                       const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, {input}, options));
   std::shared_ptr<Array> actual = std::move(out).make_array();
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -219,6 +223,57 @@ void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input
   CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
 }
 
+namespace {
+
+void ValidateOutput(const ArrayData& output) {
+  ASSERT_OK(::arrow::internal::ValidateArrayFull(output));
+  TestInitialized(output);
+}
+
+void ValidateOutput(const ChunkedArray& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& chunk : output.chunks()) {
+    TestInitialized(*chunk);
+  }
+}
+
+void ValidateOutput(const RecordBatch& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& column : output.column_data()) {
+    TestInitialized(*column);
+  }
+}
+
+void ValidateOutput(const Table& output) {
+  ASSERT_OK(output.ValidateFull());
+  for (const auto& column : output.columns()) {
+    for (const auto& chunk : column->chunks()) {
+      TestInitialized(*chunk);
+    }
+  }
+}
+
+}  // namespace
+
+void ValidateOutput(const Datum& output) {
+  switch (output.kind()) {
+    case Datum::ARRAY:
+      ValidateOutput(*output.array());
+      break;
+    case Datum::CHUNKED_ARRAY:
+      ValidateOutput(*output.chunked_array());
+      break;
+    case Datum::RECORD_BATCH:
+      ValidateOutput(*output.record_batch());
+      break;
+    case Datum::TABLE:
+      ValidateOutput(*output.table());
+      break;
+    default:
+      break;
+  }
+}
+
 void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_values,
                        std::vector<ValueDescr> expected_equivalent_values) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index 85ed04c183a..f4854087b51 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -135,6 +135,8 @@ void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input
 void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array> expected,
                       const FunctionOptions* options = nullptr);
 
+void ValidateOutput(const Datum& output);
+
 using BinaryTypes =
     ::testing::Types<BinaryType, LargeBinaryType, StringType, LargeStringType>;
 using StringTypes = ::testing::Types<StringType, LargeStringType>;
diff --git a/cpp/src/arrow/compute/kernels/vector_hash_test.cc b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
index a3fa9314e60..c09b042a8be 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
@@ -59,7 +59,7 @@ template <typename T>
 void CheckUnique(const std::shared_ptr<T>& input,
                  const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, Unique(input));
-  ASSERT_OK(result->ValidateFull());
+  ValidateOutput(*result);
   // TODO: We probably shouldn't rely on array ordering.
   ASSERT_ARRAYS_EQUAL(*expected, *result);
 }
@@ -84,7 +84,7 @@ void CheckValueCountsNull(const std::shared_ptr<DataType>& type) {
   std::shared_ptr<Array> ex_counts = ArrayFromJSON(int64(), "[]");
 
   ASSERT_OK_AND_ASSIGN(auto result_struct, ValueCounts(input));
-  ASSERT_OK(result_struct->ValidateFull());
+  ValidateOutput(*result_struct);
   ASSERT_NE(result_struct->GetFieldByName(kValuesFieldName), nullptr);
   // TODO: We probably shouldn't rely on value ordering.
   ASSERT_ARRAYS_EQUAL(*ex_values, *result_struct->GetFieldByName(kValuesFieldName));
@@ -96,7 +96,7 @@ void CheckValueCounts(const std::shared_ptr<T>& input,
                       const std::shared_ptr<Array>& expected_values,
                       const std::shared_ptr<Array>& expected_counts) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result, ValueCounts(input));
-  ASSERT_OK(result->ValidateFull());
+  ValidateOutput(*result);
   auto result_struct = std::dynamic_pointer_cast<StructArray>(result);
   ASSERT_EQ(result_struct->num_fields(), 2);
   // TODO: We probably shouldn't rely on value ordering.
@@ -128,7 +128,7 @@ void CheckDictEncode(const std::shared_ptr<Array>& input,
 
   ASSERT_OK_AND_ASSIGN(Datum datum_out, DictionaryEncode(input));
   std::shared_ptr<Array> result = MakeArray(datum_out.array());
-  ASSERT_OK(result->ValidateFull());
+  ValidateOutput(*result);
 
   ASSERT_ARRAYS_EQUAL(expected, *result);
 }
@@ -691,10 +691,7 @@ TEST_F(TestHashKernel, ZeroLengthDictionaryEncode) {
   // ARROW-7008
   auto values = ArrayFromJSON(utf8(), "[]");
   ASSERT_OK_AND_ASSIGN(Datum datum_result, DictionaryEncode(values));
-
-  std::shared_ptr<Array> result = datum_result.make_array();
-  const auto& dict_result = checked_cast<const DictionaryArray&>(*result);
-  ASSERT_OK(dict_result.ValidateFull());
+  ValidateOutput(datum_result);
 }
 
 TEST_F(TestHashKernel, NullEncodingSchemes) {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index cf52870ed89..f428da0fe35 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -51,7 +51,7 @@ TEST(GetTakeIndices, Basics) {
     ASSERT_OK_AND_ASSIGN(auto indices,
                          internal::GetTakeIndices(*filter->data(), null_selection));
     auto indices_array = MakeArray(indices);
-    ASSERT_OK(indices_array->ValidateFull());
+    ValidateOutput(indices);
     AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
   };
 
@@ -73,13 +73,13 @@ TEST(GetTakeIndices, NullValidityBuffer) {
   ASSERT_OK_AND_ASSIGN(auto indices,
                        internal::GetTakeIndices(*filter.data(), FilterOptions::DROP));
   auto indices_array = MakeArray(indices);
-  ASSERT_OK(indices_array->ValidateFull());
+  ValidateOutput(indices);
   AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
 
   ASSERT_OK_AND_ASSIGN(
       indices, internal::GetTakeIndices(*filter.data(), FilterOptions::EMIT_NULL));
   indices_array = MakeArray(indices);
-  ASSERT_OK(indices_array->ValidateFull());
+  ValidateOutput(indices);
   AssertArraysEqual(*expected_indices, *indices_array, /*verbose=*/true);
 }
 
@@ -93,7 +93,7 @@ void CheckGetTakeIndicesCase(const Array& untyped_filter) {
   // Verify DROP indices
   {
     IndexArrayType indices(drop_indices);
-    ASSERT_OK(indices.ValidateFull());
+    ValidateOutput(indices);
 
     int64_t out_position = 0;
     for (int64_t i = 0; i < filter.length(); ++i) {
@@ -116,7 +116,7 @@ void CheckGetTakeIndicesCase(const Array& untyped_filter) {
   // Verify EMIT_NULL indices
   {
     IndexArrayType indices(emit_indices);
-    ASSERT_OK(indices.ValidateFull());
+    ValidateOutput(indices);
 
     int64_t out_position = 0;
     for (int64_t i = 0; i < filter.length(); ++i) {
@@ -183,7 +183,7 @@ class TestFilterKernel : public ::testing::Test {
     // test with EMIT_NULL
     ASSERT_OK_AND_ASSIGN(Datum out_datum, Filter(values, filter, emit_null_));
     auto actual = out_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysEqual(*expected, *actual);
 
     // test with DROP using EMIT_NULL and a coalesced filter
@@ -192,7 +192,7 @@ class TestFilterKernel : public ::testing::Test {
     expected = out_datum.make_array();
     ASSERT_OK_AND_ASSIGN(out_datum, Filter(values, filter, drop_));
     actual = out_datum.make_array();
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(*actual);
     AssertArraysEqual(*expected, *actual);
   }
 
@@ -212,11 +212,11 @@ void ValidateFilter(const std::shared_ptr<Array>& values,
 
   ASSERT_OK_AND_ASSIGN(Datum out_datum, Filter(values, filter_boxed, emit_null));
   auto filtered_emit_null = out_datum.make_array();
-  ASSERT_OK(filtered_emit_null->ValidateFull());
+  ValidateOutput(*filtered_emit_null);
 
   ASSERT_OK_AND_ASSIGN(out_datum, Filter(values, filter_boxed, drop));
   auto filtered_drop = out_datum.make_array();
-  ASSERT_OK(filtered_drop->ValidateFull());
+  ValidateOutput(*filtered_drop);
 
   // Create the expected arrays using Take
   ASSERT_OK_AND_ASSIGN(
@@ -384,7 +384,7 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareScalarAndFilterRandomNumeric) {
                            Compare(array, Datum(fifty), CompareOptions(op)));
       ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(array, selection));
       auto filtered_array = filtered.make_array();
-      ASSERT_OK(filtered_array->ValidateFull());
+      ValidateOutput(*filtered_array);
       auto expected =
           CompareAndFilter<TypeParam>(array->raw_values(), array->length(), c_fifty, op);
       ASSERT_ARRAYS_EQUAL(*filtered_array, *expected);
@@ -406,7 +406,7 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareArrayAndFilterRandomNumeric) {
       ASSERT_OK_AND_ASSIGN(Datum selection, Compare(lhs, rhs, CompareOptions(op)));
       ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(lhs, selection));
       auto filtered_array = filtered.make_array();
-      ASSERT_OK(filtered_array->ValidateFull());
+      ValidateOutput(*filtered_array);
       auto expected = CompareAndFilter<TypeParam>(lhs->raw_values(), lhs->length(),
                                                   rhs->raw_values(), op);
       ASSERT_ARRAYS_EQUAL(*filtered_array, *expected);
@@ -434,7 +434,7 @@ TYPED_TEST(TestFilterKernelWithNumeric, ScalarInRangeAndFilterRandomNumeric) {
     ASSERT_OK_AND_ASSIGN(Datum selection, And(greater_than_fifty, less_than_hundred));
     ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(array, selection));
     auto filtered_array = filtered.make_array();
-    ASSERT_OK(filtered_array->ValidateFull());
+    ValidateOutput(*filtered_array);
     auto expected = CompareAndFilter<TypeParam>(
         array->raw_values(), array->length(),
         [&](CType e) { return (e > c_fifty) && (e < c_hundred); });
@@ -642,7 +642,7 @@ class TestFilterKernelWithRecordBatch : public TestFilterKernel<RecordBatch> {
     std::shared_ptr<RecordBatch> actual;
 
     ASSERT_OK(this->DoFilter(schm, batch_json, selection, options, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_BATCHES_EQUAL(*RecordBatchFromJSON(schm, expected_batch), *actual);
   }
 
@@ -695,7 +695,7 @@ class TestFilterKernelWithChunkedArray : public TestFilterKernel<ChunkedArray> {
                     const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->FilterWithArray(type, values, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -705,7 +705,7 @@ class TestFilterKernelWithChunkedArray : public TestFilterKernel<ChunkedArray> {
                            const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->FilterWithChunkedArray(type, values, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -754,7 +754,7 @@ class TestFilterKernelWithTable : public TestFilterKernel<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->FilterWithArray(schm, table_json, filter, options, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
   }
 
@@ -765,7 +765,7 @@ class TestFilterKernelWithTable : public TestFilterKernel<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->FilterWithChunkedArray(schm, table_json, filter, options, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertTablesEqual(*TableFromJSON(schm, expected_table), *actual,
                       /*same_chunk_layout=*/false);
   }
@@ -843,7 +843,7 @@ void AssertTakeArrays(const std::shared_ptr<Array>& values,
                       const std::shared_ptr<Array>& indices,
                       const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> actual, Take(*values, *indices));
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -860,7 +860,7 @@ void CheckTake(const std::shared_ptr<DataType>& type, const std::string& values,
 
   for (auto index_type : {int8(), uint32()}) {
     ASSERT_OK(TakeJSON(type, values, index_type, indices, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertArraysEqual(*ArrayFromJSON(type, expected), *actual, /*verbose=*/true);
   }
 }
@@ -900,7 +900,7 @@ void ValidateTake(const std::shared_ptr<Array>& values,
                   const std::shared_ptr<Array>& indices) {
   ASSERT_OK_AND_ASSIGN(Datum out, Take(values, indices));
   auto taken = out.make_array();
-  ASSERT_OK(taken->ValidateFull());
+  ValidateOutput(taken);
   ASSERT_EQ(indices->length(), taken->length());
   switch (indices->type_id()) {
     case Type::INT8:
@@ -1324,7 +1324,7 @@ class TestPermutationsWithTake : public TestBase {
   void DoTake(const Int16Array& values, const Int16Array& indices,
               std::shared_ptr<Int16Array>* out) {
     ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> boxed_out, Take(values, indices));
-    ASSERT_OK(boxed_out->ValidateFull());
+    ValidateOutput(boxed_out);
     *out = checked_pointer_cast<Int16Array>(std::move(boxed_out));
   }
 
@@ -1441,7 +1441,7 @@ class TestTakeKernelWithRecordBatch : public TestTakeKernelTyped<RecordBatch> {
 
     for (auto index_type : {int8(), uint32()}) {
       ASSERT_OK(TakeJSON(schm, batch_json, index_type, indices, &actual));
-      ASSERT_OK(actual->ValidateFull());
+      ValidateOutput(actual);
       ASSERT_BATCHES_EQUAL(*RecordBatchFromJSON(schm, expected_batch), *actual);
     }
   }
@@ -1499,7 +1499,7 @@ class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped<ChunkedArray>
                   const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->TakeWithArray(type, values, indices, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -1509,7 +1509,7 @@ class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped<ChunkedArray>
                          const std::vector<std::string>& expected) {
     std::shared_ptr<ChunkedArray> actual;
     ASSERT_OK(this->TakeWithChunkedArray(type, values, indices, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
   }
 
@@ -1557,7 +1557,7 @@ class TestTakeKernelWithTable : public TestTakeKernelTyped<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->TakeWithArray(schm, table_json, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
   }
 
@@ -1568,7 +1568,7 @@ class TestTakeKernelWithTable : public TestTakeKernelTyped<Table> {
     std::shared_ptr<Table> actual;
 
     ASSERT_OK(this->TakeWithChunkedArray(schm, table_json, filter, &actual));
-    ASSERT_OK(actual->ValidateFull());
+    ValidateOutput(actual);
     ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
   }
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index a54890e51de..2d76f0102f0 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -24,6 +24,7 @@
 #include "arrow/array/array_decimal.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/test_util.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
@@ -153,7 +154,7 @@ class TestNthToIndicesBase : public TestBase {
     ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> offsets, NthToIndices(*values, n));
     // null_count field should have been initialized to 0, for convenience
     ASSERT_EQ(offsets->data()->null_count, 0);
-    ASSERT_OK(offsets->ValidateFull());
+    ValidateOutput(*offsets);
     Validate(*checked_pointer_cast<ArrayType>(values), n,
              *checked_pointer_cast<UInt64Array>(offsets));
   }
@@ -352,7 +353,7 @@ template <typename T>
 void AssertSortIndices(const std::shared_ptr<T>& input, SortOrder order,
                        const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(auto actual, SortIndices(*input, order));
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -360,7 +361,7 @@ template <typename T>
 void AssertSortIndices(const std::shared_ptr<T>& input, const SortOptions& options,
                        const std::shared_ptr<Array>& expected) {
   ASSERT_OK_AND_ASSIGN(auto actual, SortIndices(Datum(*input), options));
-  ASSERT_OK(actual->ValidateFull());
+  ValidateOutput(*actual);
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
@@ -549,7 +550,7 @@ using SortIndicesableTypes =
 
 template <typename ArrayType>
 void ValidateSorted(const ArrayType& array, UInt64Array& offsets, SortOrder order) {
-  ASSERT_OK(array.ValidateFull());
+  ValidateOutput(array);
   SortComparator<ArrayType> compare;
   for (int i = 1; i < array.length(); i++) {
     uint64_t lhs = offsets.Value(i - 1);
@@ -1171,7 +1172,7 @@ class TestTableSortIndicesRandom : public testing::TestWithParam<RandomParam> {
  public:
   // Validates the sorted indexes are really sorted.
   void Validate(const Table& table, const SortOptions& options, UInt64Array& offsets) {
-    ASSERT_OK(offsets.ValidateFull());
+    ValidateOutput(offsets);
     Comparator comparator{table, options};
     for (int i = 1; i < table.num_rows(); i++) {
       uint64_t lhs = offsets.Value(i - 1);
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 5e242d4e807..39ce58ecaf6 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -66,6 +66,8 @@
 #include <aws/s3/model/PutObjectRequest.h>
 #include <aws/s3/model/UploadPartRequest.h>
 
+#include "arrow/util/windows_fixup.h"
+
 #include "arrow/buffer.h"
 #include "arrow/filesystem/filesystem.h"
 #include "arrow/filesystem/path_util.h"
@@ -85,7 +87,6 @@
 #include "arrow/util/optional.h"
 #include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
-#include "arrow/util/windows_fixup.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index eb0edd56566..ea6edb0258e 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -643,17 +643,34 @@ void AssertZeroPadded(const Array& array) {
   }
 }
 
-void TestInitialized(const Array& array) {
-  for (const auto& buffer : array.data()->buffers) {
+void TestInitialized(const Array& array) { TestInitialized(*array.data()); }
+
+void TestInitialized(const ArrayData& array) {
+  uint8_t total = 0;
+  for (const auto& buffer : array.buffers) {
     if (buffer && buffer->capacity() > 0) {
-      int total = 0;
       auto data = buffer->data();
       for (int64_t i = 0; i < buffer->size(); ++i) {
         total ^= data[i];
       }
-      throw_away = total;
     }
   }
+  uint8_t total_bit = 0;
+  for (uint32_t mask = 1; mask < 256; mask <<= 1) {
+    total_bit ^= (total & mask) != 0;
+  }
+  // This is a dummy condition on all the bits of `total` (which depend on the
+  // entire buffer data).  If not all bits are well-defined, Valgrind will
+  // error with "Conditional jump or move depends on uninitialised value(s)".
+  if (total_bit == 0) {
+    ++throw_away;
+  }
+  for (const auto& child : array.child_data) {
+    TestInitialized(*child);
+  }
+  if (array.dictionary) {
+    TestInitialized(*array.dictionary);
+  }
 }
 
 void SleepFor(double seconds) {
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 9d01cd4bf27..591745151da 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -41,6 +41,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/string_builder.h"
 #include "arrow/util/type_fwd.h"
 
 // NOTE: failing must be inline in the macros below, to get correct file / line number
@@ -136,6 +137,11 @@
     ASSERT_EQ(expected, _actual);               \
   } while (0)
 
+// A generalized version of GTest's SCOPED_TRACE that takes arbitrary arguments.
+//   ARROW_SCOPED_TRACE("some variable = ", some_variable, ...)
+
+#define ARROW_SCOPED_TRACE(...) SCOPED_TRACE(::arrow::util::StringBuilder(__VA_ARGS__))
+
 namespace arrow {
 
 // ----------------------------------------------------------------------
@@ -275,6 +281,7 @@ ARROW_TESTING_EXPORT void AssertZeroPadded(const Array& array);
 
 // Check if the valid buffer bytes are initialized
 // and cause valgrind warnings otherwise.
+ARROW_TESTING_EXPORT void TestInitialized(const ArrayData& array);
 ARROW_TESTING_EXPORT void TestInitialized(const Array& array);
 
 template <typename BuilderType>
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index f55ac88fb91..09076c54d3c 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -329,8 +329,7 @@ class HashTable {
 
     // Stash old entries and seal builder, effectively resetting the Buffer
     const Entry* old_entries = entries_;
-    std::shared_ptr<Buffer> previous;
-    RETURN_NOT_OK(entries_builder_.Finish(&previous));
+    ARROW_ASSIGN_OR_RAISE(auto previous, entries_builder_.FinishWithLength(capacity_));
     // Allocate new buffer
     RETURN_NOT_OK(UpsizeBuffer(new_capacity));
 
@@ -461,6 +460,13 @@ class ScalarMemoTable : public MemoTable {
         out_data[index] = entry->payload.value;
       }
     });
+    // Zero-initialize the null entry
+    if (null_index_ != kKeyNotFound) {
+      int32_t index = null_index_ - start;
+      if (index >= 0) {
+        out_data[index] = Scalar{};
+      }
+    }
   }
 
   void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
@@ -775,6 +781,8 @@ class BinaryMemoTable : public MemoTable {
     if (left_size > 0) {
       memcpy(out_data, in_data + left_offset, left_size);
     }
+    // Zero-initialize the null entry
+    memset(out_data + left_size, 0, width_size);
 
     auto right_size = values_size() - static_cast<size_t>(null_data_offset);
     if (right_size > 0) {
diff --git a/cpp/src/arrow/util/windows_fixup.h b/cpp/src/arrow/util/windows_fixup.h
index 0afa53c6c1e..2949ac4ab76 100644
--- a/cpp/src/arrow/util/windows_fixup.h
+++ b/cpp/src/arrow/util/windows_fixup.h
@@ -19,6 +19,13 @@
 
 #ifdef _WIN32
 
+#ifdef max
+#undef max
+#endif
+#ifdef min
+#undef min
+#endif
+
 // The Windows API defines macros from *File resolving to either
 // *FileA or *FileW.  Need to undo them.
 #ifdef CopyFile
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 2a220231453..2fbebf27fce 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -25,7 +25,6 @@
 #include <vector>
 
 #include "arrow/array.h"
-#include "arrow/buffer_builder.h"
 #include "arrow/extension_type.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/table.h"
@@ -56,7 +55,6 @@ using arrow::ExtensionArray;
 using arrow::ExtensionType;
 using arrow::Field;
 using arrow::FixedSizeBinaryArray;
-using Int16BufferBuilder = arrow::TypedBufferBuilder<int16_t>;
 using arrow::ListArray;
 using arrow::MemoryPool;
 using arrow::NumericArray;
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 89b2b0e0413..cc1e262a96d 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -341,7 +341,6 @@ class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEnco
       // no nulls, just dump the data
       ::arrow::internal::CopyBitmap(data.data()->GetValues<uint8_t>(1), data.offset(),
                                     data.length(), sink_.mutable_data(), sink_.length());
-      sink_.UnsafeAdvance(data.length());
     } else {
       auto n_valid = BitUtil::BytesForBits(data.length() - data.null_count());
       PARQUET_THROW_NOT_OK(sink_.Reserve(n_valid));
@@ -360,6 +359,7 @@ class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEnco
       }
       writer.Finish();
     }
+    sink_.UnsafeAdvance(data.length());
   }
 
  private:
diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc
index 02e81becd47..d271d59ef27 100644
--- a/cpp/src/parquet/encoding_test.cc
+++ b/cpp/src/parquet/encoding_test.cc
@@ -669,7 +669,7 @@ class EncodingAdHocTyped : public ::testing::Test {
     std::shared_ptr<::arrow::Array> result;
     ASSERT_OK(acc.Finish(&result));
     ASSERT_EQ(50, result->length());
-    ::arrow::AssertArraysEqual(*values, *result);
+    ::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
   }
 
   void ByteStreamSplit(int seed) {

From 450e0eb7a640881788f839d0a475d796fa23c81c Mon Sep 17 00:00:00 2001
From: Alenka Frim <frim.alenka@gmail.com>
Date: Mon, 21 Jun 2021 11:15:01 -0500
Subject: [PATCH 435/719] ARROW-12867: [R] Bindings for abs()

Closes #10519 from AlenkaF/ARROW-12867

Lead-authored-by: Alenka Frim <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/expression.R                      |  1 +
 r/tests/testthat/test-dplyr-arrange.R | 13 -------------
 r/tests/testthat/test-dplyr.R         | 11 +++++++++++
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index 417a12eeb81..ba542339ff8 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -22,6 +22,7 @@
   "as.factor" = "dictionary_encode",
   "is.na" = "is_null",
   "is.nan" = "is_nan",
+  "abs" = "abs_checked",
   # nchar is defined in dplyr-functions.R
   "tolower" = "utf8_lower",
   "toupper" = "utf8_upper",
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index 45cd687e848..0d12740f4cb 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -139,19 +139,6 @@ test_that("arrange() on integer, double, and character columns", {
       collect(),
     tbl
   )
-  expect_warning(
-    expect_equal(
-      tbl %>%
-        Table$create() %>%
-        arrange(abs(int), dbl) %>%
-        collect(),
-      tbl %>%
-        arrange(abs(int), dbl) %>%
-        collect()
-    ),
-    "not supported in Arrow",
-    fixed = TRUE
-  )
 })
 
 test_that("arrange() on datetime columns", {
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 4fcb0e710a4..c3df89db359 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -899,3 +899,14 @@ test_that("No duplicate field names are allowed in an arrow_dplyr_query", {
   )
 })
 
+test_that("abs()", {
+  df <- tibble(x = c(-127, -10, -1, -0 , 0, 1, 10, 127, NA))
+
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        abs = abs(x)
+      ) %>% collect(),
+    df
+  )
+})

From 1b8bedcbf4b794858228c85644248300b64ce5a4 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 21 Jun 2021 12:40:22 -0400
Subject: [PATCH 436/719] ARROW-13110: [C++] Deadlock can happen when using
 BackgroundGenerator without transferring callbacks

Closes #10552 from westonpace/bugfix/ARROW-13110--c-deadlock-can-happen-when-using-backgroundgen

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/dataset/scanner.cc     |  6 +++++-
 cpp/src/arrow/util/async_generator.h | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index d9c03239e83..09e05cdbf75 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -808,7 +808,11 @@ class OneShotFragment : public Fragment {
   Result<RecordBatchGenerator> ScanBatchesAsync(
       const std::shared_ptr<ScanOptions>& options) override {
     RETURN_NOT_OK(CheckConsumed());
-    return MakeBackgroundGenerator(std::move(batch_it_), options->io_context.executor());
+    ARROW_ASSIGN_OR_RAISE(
+        auto background_gen,
+        MakeBackgroundGenerator(std::move(batch_it_), options->io_context.executor()));
+    return MakeTransferredGenerator(std::move(background_gen),
+                                    internal::GetCpuThreadPool());
   }
   std::string type_name() const override { return "one-shot"; }
 
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index d975792ea10..5069d5092d4 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -20,6 +20,7 @@
 #include <cassert>
 #include <deque>
 #include <queue>
+#include <thread>
 
 #include "arrow/util/functional.h"
 #include "arrow/util/future.h"
@@ -1327,6 +1328,7 @@ class BackgroundGenerator {
     const int max_q;
     const int q_restart;
     Iterator<T> it;
+    std::thread::id worker_thread_id;
 
     // If true, the task is actively pumping items from the queue and does not need a
     // restart
@@ -1349,6 +1351,12 @@ class BackgroundGenerator {
   struct Cleanup {
     explicit Cleanup(State* state) : state(state) {}
     ~Cleanup() {
+      /// TODO: Once ARROW-13109 is available then we can be force consumers to spawn and
+      /// there is no need to perform this check.
+      ///
+      /// It's a deadlock if we enter cleanup from
+      /// the worker thread but it can happen if the consumer doesn't transfer away
+      assert(state->worker_thread_id != std::this_thread::get_id());
       Future<> finish_fut;
       {
         auto lock = state->mutex.Lock();
@@ -1369,6 +1377,7 @@ class BackgroundGenerator {
   static void WorkerTask(std::shared_ptr<State> state) {
     // We need to capture the state to read while outside the mutex
     bool reading = true;
+    state->worker_thread_id = std::this_thread::get_id();
     while (reading) {
       auto next = state->it.Next();
       // Need to capture state->waiting_future inside the mutex to mark finished outside
@@ -1420,6 +1429,7 @@ class BackgroundGenerator {
       // reference it.  We can safely transition to idle now.
       task_finished = state->task_finished;
       state->task_finished = Future<>();
+      state->worker_thread_id = std::thread::id();
     }
     task_finished.MarkFinished();
   }
@@ -1451,6 +1461,14 @@ constexpr int kDefaultBackgroundQRestart = 16;
 /// again.  If it is too high then it will be constantly stopping and restarting the
 /// background queue task
 ///
+/// The "background thread" is a logical thread and will run as tasks on the io_executor.
+/// This thread may stop and start when the queue fills up but there will only be one
+/// active background thread task at any given time.  You MUST transfer away from this
+/// background generator.  Otherwise there could be a race condition if a callback on the
+/// background thread deletes the last consumer reference to the background generator. You
+/// can transfer onto the same executor as the background thread, it is only neccesary to
+/// create a new thread task, not to switch executors.
+///
 /// This generator is not async-reentrant
 ///
 /// This generator will queue up to max_q blocks

From 676c9020e87c5f8d1f4d325794931df9f0481ec4 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 21 Jun 2021 15:09:14 -0400
Subject: [PATCH 437/719] ARROW-13097: [C++] Provide simple reflection utility

Provides functions for enumerating struct's data members, which enables reduction of boilerplate since many operations (equality comparison, serialization, ...) can be reduced to a generic loop over this enumeration of members.

```c++
struct Person { int age; std::string name; };

static auto kPersonProperties =
    MakeProperties(DataMember("age", &Person::age), DataMember("name", &Person::name));

bool operator==(const Person& l, const Person& r) { return EqualsImpl<Person>{l, r, kPersonProperties}.equal_; }
```

Closes #10551 from bkietz/13097-Provide-a-simple-reflecti

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/util/CMakeLists.txt        |   1 +
 cpp/src/arrow/util/reflection_internal.h | 116 +++++++++++++
 cpp/src/arrow/util/reflection_test.cc    | 197 +++++++++++++++++++++++
 r/src/csv.cpp                            |   2 +-
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 cpp/src/arrow/util/reflection_internal.h
 create mode 100644 cpp/src/arrow/util/reflection_test.cc

diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 37987b98520..e26a17120cd 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -57,6 +57,7 @@ add_arrow_test(utility-test
                logging_test.cc
                queue_test.cc
                range_test.cc
+               reflection_test.cc
                rle_encoding_test.cc
                stl_util_test.cc
                string_test.cc
diff --git a/cpp/src/arrow/util/reflection_internal.h b/cpp/src/arrow/util/reflection_internal.h
new file mode 100644
index 00000000000..522815dd2be
--- /dev/null
+++ b/cpp/src/arrow/util/reflection_internal.h
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+template <size_t...>
+struct index_sequence {};
+
+template <size_t N, size_t Head = N, size_t... Tail>
+struct make_index_sequence_impl;
+
+template <size_t N>
+using make_index_sequence = typename make_index_sequence_impl<N>::type;
+
+template <typename... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+template <size_t N, size_t... I>
+struct make_index_sequence_impl<N, 0, I...> {
+  using type = index_sequence<I...>;
+};
+
+template <size_t N, size_t H, size_t... I>
+struct make_index_sequence_impl : make_index_sequence_impl<N, H - 1, H - 1, I...> {};
+
+static_assert(std::is_same<index_sequence<>, make_index_sequence<0>>::value, "");
+static_assert(std::is_same<index_sequence<0, 1, 2>, make_index_sequence<3>>::value, "");
+
+template <typename...>
+struct all_same : std::true_type {};
+
+template <typename One>
+struct all_same<One> : std::true_type {};
+
+template <typename Same, typename... Rest>
+struct all_same<Same, Same, Rest...> : all_same<Same, Rest...> {};
+
+template <typename One, typename Other, typename... Rest>
+struct all_same<One, Other, Rest...> : std::false_type {};
+
+template <size_t... I, typename... T, typename Fn>
+void ForEachTupleMemberImpl(const std::tuple<T...>& tup, Fn&& fn, index_sequence<I...>) {
+  (void)std::make_tuple((fn(std::get<I>(tup), I), std::ignore)...);
+}
+
+template <typename... T, typename Fn>
+void ForEachTupleMember(const std::tuple<T...>& tup, Fn&& fn) {
+  ForEachTupleMemberImpl(tup, fn, index_sequence_for<T...>());
+}
+
+template <typename C, typename T>
+struct DataMemberProperty {
+  using Class = C;
+  using Type = T;
+
+  constexpr const Type& get(const Class& obj) const { return obj.*ptr_; }
+
+  void set(Class* obj, Type value) const { (*obj).*ptr_ = std::move(value); }
+
+  constexpr util::string_view name() const { return name_; }
+
+  util::string_view name_;
+  Type Class::*ptr_;
+};
+
+template <typename Class, typename Type>
+constexpr DataMemberProperty<Class, Type> DataMember(util::string_view name,
+                                                     Type Class::*ptr) {
+  return {name, ptr};
+}
+
+template <typename... Properties>
+struct PropertyTuple {
+  template <typename Fn>
+  void ForEach(Fn&& fn) const {
+    ForEachTupleMember(props_, fn);
+  }
+
+  static_assert(all_same<typename Properties::Class...>::value,
+                "All properties must be properties of the same class");
+
+  size_t size() const { return sizeof...(Properties); }
+
+  std::tuple<Properties...> props_;
+};
+
+template <typename... Properties>
+PropertyTuple<Properties...> MakeProperties(Properties... props) {
+  return {std::make_tuple(props...)};
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_test.cc b/cpp/src/arrow/util/reflection_test.cc
new file mode 100644
index 00000000000..4ffcf679ecc
--- /dev/null
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+
+namespace arrow {
+namespace internal {
+
+// generic property-based equality comparison
+template <typename Class>
+struct EqualsImpl {
+  template <typename Properties>
+  EqualsImpl(const Class& l, const Class& r, const Properties& props)
+      : left_(l), right_(r) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    equal_ &= prop.get(left_) == prop.get(right_);
+  }
+
+  const Class& left_;
+  const Class& right_;
+  bool equal_ = true;
+};
+
+// generic property-based serialization
+template <typename Class>
+struct ToStringImpl {
+  template <typename Properties>
+  ToStringImpl(util::string_view class_name, const Class& obj, const Properties& props)
+      : class_name_(class_name), obj_(obj), members_(props.size()) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    std::stringstream ss;
+    ss << prop.name() << ":" << prop.get(obj_);
+    members_[i] = ss.str();
+  }
+
+  std::string Finish() {
+    return class_name_.to_string() + "{" + JoinStrings(members_, ",") + "}";
+  }
+
+  util::string_view class_name_;
+  const Class& obj_;
+  std::vector<std::string> members_;
+};
+
+// generic property-based deserialization
+template <typename Class>
+struct FromStringImpl {
+  template <typename Properties>
+  FromStringImpl(util::string_view class_name, util::string_view repr,
+                 const Properties& props) {
+    Init(class_name, repr, props.size());
+    props.ForEach(*this);
+  }
+
+  void Fail() { obj_ = util::nullopt; }
+
+  void Init(util::string_view class_name, util::string_view repr, size_t num_properties) {
+    if (!repr.starts_with(class_name)) return Fail();
+
+    repr = repr.substr(class_name.size());
+    if (repr.empty()) return Fail();
+    if (repr.front() != '{') return Fail();
+    if (repr.back() != '}') return Fail();
+
+    repr = repr.substr(1, repr.size() - 2);
+    members_ = SplitString(repr, ',');
+    if (members_.size() != num_properties) return Fail();
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    if (!obj_) return;
+
+    auto first_colon = members_[i].find_first_of(':');
+    if (first_colon == util::string_view::npos) return Fail();
+
+    auto name = members_[i].substr(0, first_colon);
+    if (name != prop.name()) return Fail();
+
+    auto value_repr = members_[i].substr(first_colon + 1);
+    typename Property::Type value;
+    try {
+      std::stringstream ss(value_repr.to_string());
+      ss >> value;
+      if (!ss.eof()) return Fail();
+    } catch (...) {
+      return Fail();
+    }
+    prop.set(&*obj_, std::move(value));
+  }
+
+  util::optional<Class> obj_ = Class{};
+  std::vector<util::string_view> members_;
+};
+
+// unmodified structure which we wish to reflect on:
+struct Person {
+  int age;
+  std::string name;
+};
+
+// enumeration of properties:
+// NB: no references to Person::age or Person::name after this
+// NB: ordering of properties follows this enum, regardless of
+//     order of declaration in `struct Person`
+static auto kPersonProperties =
+    MakeProperties(DataMember("age", &Person::age), DataMember("name", &Person::name));
+
+// use generic facilities to define equality, serialization and deserialization
+bool operator==(const Person& l, const Person& r) {
+  return EqualsImpl<Person>{l, r, kPersonProperties}.equal_;
+}
+
+bool operator!=(const Person& l, const Person& r) { return !(l == r); }
+
+std::string ToString(const Person& obj) {
+  return ToStringImpl<Person>{"Person", obj, kPersonProperties}.Finish();
+}
+
+void PrintTo(const Person& obj, std::ostream* os) { *os << ToString(obj); }
+
+util::optional<Person> PersonFromString(util::string_view repr) {
+  return FromStringImpl<Person>("Person", repr, kPersonProperties).obj_;
+}
+
+TEST(Reflection, EqualityWithDataMembers) {
+  Person genos{19, "Genos"};
+  Person kuseno{45, "Kuseno"};
+
+  EXPECT_EQ(genos, genos);
+  EXPECT_EQ(kuseno, kuseno);
+
+  EXPECT_NE(genos, kuseno);
+  EXPECT_NE(kuseno, genos);
+}
+
+TEST(Reflection, ToStringFromDataMembers) {
+  Person genos{19, "Genos"};
+  Person kuseno{45, "Kuseno"};
+
+  EXPECT_EQ(ToString(genos), "Person{age:19,name:Genos}");
+  EXPECT_EQ(ToString(kuseno), "Person{age:45,name:Kuseno}");
+}
+
+TEST(Reflection, FromStringToDataMembers) {
+  Person genos{19, "Genos"};
+
+  EXPECT_EQ(PersonFromString(ToString(genos)), genos);
+
+  EXPECT_EQ(PersonFromString(""), util::nullopt);
+  EXPECT_EQ(PersonFromString("Per"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19,name:Genos"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Person{name:Genos"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19,name:Genos,extra:Cyborg}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{name:Genos,age:19"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Fake{age:19,name:Genos}"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Person{age,name:Genos}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:nineteen,name:Genos}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19 ,name:Genos}"), util::nullopt);
+  EXPECT_EQ(PersonFromString("Person{age:19,moniker:Genos}"), util::nullopt);
+
+  EXPECT_EQ(PersonFromString("Person{age: 19, name: Genos}"), util::nullopt);
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
index a8d2256cfe3..3e58f95c372 100644
--- a/r/src/csv.cpp
+++ b/r/src/csv.cpp
@@ -111,7 +111,7 @@ std::shared_ptr<arrow::csv::ConvertOptions> csv___ConvertOptions__initialize(
   if (!Rf_isNull(op_timestamp_parsers)) {
     std::vector<std::shared_ptr<arrow::TimestampParser>> timestamp_parsers;
 
-    // if we have a character vector, convert to arrow::TimestampParser
+    // if we have a character vector, convert to arrow::StrptimeTimestampParser
     if (TYPEOF(op_timestamp_parsers) == STRSXP) {
       cpp11::strings s_timestamp_parsers(op_timestamp_parsers);
       for (cpp11::r_string s : s_timestamp_parsers) {

From c913aa3ad7b36b2eaccd4dc2cf0bc35ab893bb4a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 22 Jun 2021 04:50:21 +0900
Subject: [PATCH 438/719] ARROW-13124: [Ruby] Add support for memory view

Closes #10558 from kou/ruby-memory-view

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ruby/red-arrow/ext/arrow/arrow.cpp       |   3 +
 ruby/red-arrow/ext/arrow/memory-view.cpp | 311 ++++++++++++++++
 ruby/red-arrow/ext/arrow/memory-view.hpp |  26 ++
 ruby/red-arrow/red-arrow.gemspec         |   1 +
 ruby/red-arrow/test/helper.rb            |   1 +
 ruby/red-arrow/test/test-memory-view.rb  | 434 +++++++++++++++++++++++
 6 files changed, 776 insertions(+)
 create mode 100644 ruby/red-arrow/ext/arrow/memory-view.cpp
 create mode 100644 ruby/red-arrow/ext/arrow/memory-view.hpp
 create mode 100644 ruby/red-arrow/test/test-memory-view.rb

diff --git a/ruby/red-arrow/ext/arrow/arrow.cpp b/ruby/red-arrow/ext/arrow/arrow.cpp
index 6226ba0767c..86c8c8fb69f 100644
--- a/ruby/red-arrow/ext/arrow/arrow.cpp
+++ b/ruby/red-arrow/ext/arrow/arrow.cpp
@@ -18,6 +18,7 @@
  */
 
 #include "red-arrow.hpp"
+#include "memory-view.hpp"
 
 #include <ruby.hpp>
 
@@ -78,4 +79,6 @@ extern "C" void Init_arrow() {
   red_arrow::id_jd = rb_intern("jd");
   red_arrow::id_new = rb_intern("new");
   red_arrow::id_to_datetime = rb_intern("to_datetime");
+
+  red_arrow::memory_view::init(mArrow);
 }
diff --git a/ruby/red-arrow/ext/arrow/memory-view.cpp b/ruby/red-arrow/ext/arrow/memory-view.cpp
new file mode 100644
index 00000000000..a3135310c97
--- /dev/null
+++ b/ruby/red-arrow/ext/arrow/memory-view.cpp
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "memory-view.hpp"
+
+#include <arrow-glib/arrow-glib.hpp>
+#include <rbgobject.h>
+
+#include <ruby/version.h>
+
+#if RUBY_API_VERSION_MAJOR >= 3
+#  define HAVE_MEMORY_VIEW
+#  define private memory_view_private
+#  include <ruby/memory_view.h>
+#  undef private
+#endif
+
+#include <sstream>
+
+namespace red_arrow {
+  namespace memory_view {
+#ifdef HAVE_MEMORY_VIEW
+    // This is workaround for the following rb_memory_view_t problems
+    // in C++:
+    //
+    //   * Can't use "private" as member name
+    //   * Can't assign a value to "rb_memory_view_t::private"
+    //
+    // This has compatible layout with rb_memory_view_t.
+    struct memory_view {
+      VALUE obj;
+      void *data;
+      ssize_t byte_size;
+      bool readonly;
+      const char *format;
+      ssize_t item_size;
+      struct {
+        const rb_memory_view_item_component_t *components;
+        size_t length;
+      } item_desc;
+      ssize_t ndim;
+      const ssize_t *shape;
+      const ssize_t *strides;
+      const ssize_t *sub_offsets;
+      void *private_data;
+    };
+
+    struct PrivateData {
+      std::string format;
+    };
+
+    class PrimitiveArrayGetter : public arrow::ArrayVisitor {
+    public:
+      explicit PrimitiveArrayGetter(memory_view *view)
+        : view_(view) {
+      }
+
+      arrow::Status Visit(const arrow::BooleanArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        // Memory view doesn't support bit stream. We use one byte
+        // for 8 elements. Users can't calculate the number of
+        // elements from memory view but it's limitation of memory view.
+#ifdef ARROW_LITTLE_ENDIAN
+        view_->format = "b8";
+#else
+        view_->format = "B8";
+#endif
+        view_->item_size = 1;
+        view_->byte_size = (array.length() + 7) / 8;
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int8Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "c";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int16Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "s";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "l";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Int64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt8Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "C";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt16Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "S";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "L";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::UInt64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "Q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::FloatArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "f";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::DoubleArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "d";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::FixedSizeBinaryArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        auto priv = static_cast<PrivateData *>(view_->private_data);
+        const auto type =
+          std::static_pointer_cast<const arrow::FixedSizeBinaryType>(
+            array.type());
+        std::ostringstream output;
+        output << "C" << type->byte_width();
+        priv->format = output.str();
+        view_->format = priv->format.c_str();
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Date32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "l";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Date64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Time32Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "l";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Time64Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::TimestampArray& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Decimal128Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q2";
+        return arrow::Status::OK();
+      }
+
+      arrow::Status Visit(const arrow::Decimal256Array& array) override {
+        fill(static_cast<const arrow::Array&>(array));
+        view_->format = "q4";
+        return arrow::Status::OK();
+      }
+
+      private:
+      void fill(const arrow::Array& array) {
+        const auto array_data = array.data();
+        const auto data = array_data->GetValuesSafe<uint8_t>(1);
+        view_->data = const_cast<void *>(reinterpret_cast<const void *>(data));
+        const auto type =
+          std::static_pointer_cast<const arrow::FixedWidthType>(array.type());
+        view_->item_size = type->bit_width() / 8;
+        view_->byte_size = view_->item_size * array.length();
+      }
+
+      memory_view *view_;
+    };
+
+    bool primitive_array_get(VALUE obj, rb_memory_view_t *view, int flags) {
+      if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
+        return false;
+      }
+      auto view_ = reinterpret_cast<memory_view *>(view);
+      view_->obj = obj;
+      view_->private_data = new PrivateData();
+      auto array = GARROW_ARRAY(RVAL2GOBJ(obj));
+      auto arrow_array = garrow_array_get_raw(array);
+      PrimitiveArrayGetter getter(view_);
+      auto status = arrow_array->Accept(&getter);
+      if (!status.ok()) {
+        return false;
+      }
+      view_->readonly = true;
+      view_->ndim = 1;
+      view_->shape = NULL;
+      view_->strides = NULL;
+      view_->sub_offsets = NULL;
+      return true;
+    }
+
+    bool primitive_array_release(VALUE obj, rb_memory_view_t *view) {
+      auto view_ = reinterpret_cast<memory_view *>(view);
+      delete static_cast<PrivateData *>(view_->private_data);
+      return true;
+    }
+
+    bool primitive_array_available_p(VALUE obj) {
+      return true;
+    }
+
+    rb_memory_view_entry_t primitive_array_entry = {
+      primitive_array_get,
+      primitive_array_release,
+      primitive_array_available_p,
+    };
+
+    bool buffer_get(VALUE obj, rb_memory_view_t *view, int flags) {
+      if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
+        return false;
+      }
+      auto view_ = reinterpret_cast<memory_view *>(view);
+      view_->obj = obj;
+      auto buffer = GARROW_BUFFER(RVAL2GOBJ(obj));
+      auto arrow_buffer = garrow_buffer_get_raw(buffer);
+      view_->data =
+        const_cast<void *>(reinterpret_cast<const void *>(arrow_buffer->data()));
+      // Memory view doesn't support bit stream. We use one byte
+      // for 8 elements. Users can't calculate the number of
+      // elements from memory view but it's limitation of memory view.
+#ifdef ARROW_LITTLE_ENDIAN
+      view_->format = "b8";
+#else
+      view_->format = "B8";
+#endif
+      view_->item_size = 1;
+      view_->byte_size = arrow_buffer->size();
+      view_->readonly = true;
+      view_->ndim = 1;
+      view_->shape = NULL;
+      view_->strides = NULL;
+      view_->sub_offsets = NULL;
+      return true;
+    }
+
+    bool buffer_release(VALUE obj, rb_memory_view_t *view) {
+      return true;
+    }
+
+    bool buffer_available_p(VALUE obj) {
+      return true;
+    }
+
+    rb_memory_view_entry_t buffer_entry = {
+      buffer_get,
+      buffer_release,
+      buffer_available_p,
+    };
+#endif
+
+    void init(VALUE mArrow) {
+#ifdef HAVE_MEMORY_VIEW
+      auto cPrimitiveArray =
+        rb_const_get_at(mArrow, rb_intern("PrimitiveArray"));
+      rb_memory_view_register(cPrimitiveArray,
+                              &(red_arrow::memory_view::primitive_array_entry));
+
+      auto cBuffer = rb_const_get_at(mArrow, rb_intern("Buffer"));
+      rb_memory_view_register(cBuffer, &(red_arrow::memory_view::buffer_entry));
+#endif
+    }
+  }
+}
diff --git a/ruby/red-arrow/ext/arrow/memory-view.hpp b/ruby/red-arrow/ext/arrow/memory-view.hpp
new file mode 100644
index 00000000000..7a776462275
--- /dev/null
+++ b/ruby/red-arrow/ext/arrow/memory-view.hpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <ruby.hpp>
+
+namespace red_arrow {
+  namespace memory_view {
+    void init(VALUE mArrow);
+  }
+}
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index f23ba9edc59..e5a602a0862 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -55,6 +55,7 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency("benchmark-driver")
   spec.add_development_dependency("bundler")
   spec.add_development_dependency("faker")
+  spec.add_development_dependency("fiddle", ">= 1.0.9")
   spec.add_development_dependency("rake")
   spec.add_development_dependency("redcarpet")
   spec.add_development_dependency("test-unit")
diff --git a/ruby/red-arrow/test/helper.rb b/ruby/red-arrow/test/helper.rb
index f7748b254a7..29e5f9cbcf4 100644
--- a/ruby/red-arrow/test/helper.rb
+++ b/ruby/red-arrow/test/helper.rb
@@ -17,6 +17,7 @@
 
 require "arrow"
 
+require "fiddle"
 require "pathname"
 require "tempfile"
 require "zlib"
diff --git a/ruby/red-arrow/test/test-memory-view.rb b/ruby/red-arrow/test/test-memory-view.rb
new file mode 100644
index 00000000000..0b9c98c407f
--- /dev/null
+++ b/ruby/red-arrow/test/test-memory-view.rb
@@ -0,0 +1,434 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class MemoryViewTest < Test::Unit::TestCase
+  def setup
+    unless Fiddle.const_defined?(:MemoryView)
+      omit("Fiddle::MemoryView is needed")
+    end
+    unless Fiddle::MemoryView.respond_to?(:export)
+      omit("Fiddle::MemoryView.export is needed")
+    end
+  end
+
+  def little_endian?
+    [1].pack("s") == [1].pack("s<")
+  end
+
+  test("BooleanArray") do
+    array = Arrow::BooleanArray.new([true] * 9)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      if little_endian?
+        template = "b"
+      else
+        template = "B"
+      end
+      assert_equal([
+                     "#{template}8",
+                     1,
+                     2,
+                     [(("1" * 9) + ("0" * 7))].pack("#{template}*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int8Array") do
+    values = [-(2 ** 7), 0, (2 ** 7) - 1]
+    array = Arrow::Int8Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "c",
+                     1,
+                     values.size,
+                     values.pack("c*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int16Array") do
+    values = [-(2 ** 15), 0, (2 ** 15) - 1]
+    array = Arrow::Int16Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "s",
+                     2,
+                     2 * values.size,
+                     values.pack("s*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int32Array") do
+    values = [-(2 ** 31), 0, (2 ** 31) - 1]
+    array = Arrow::Int32Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "l",
+                     4,
+                     4 * values.size,
+                     values.pack("l*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Int64Array") do
+    values = [-(2 ** 63), 0, (2 ** 63) - 1]
+    array = Arrow::Int64Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt8Array") do
+    values = [0, (2 ** 8) - 1]
+    array = Arrow::UInt8Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "C",
+                     1,
+                     values.size,
+                     values.pack("C*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt16Array") do
+    values = [0, (2 ** 16) - 1]
+    array = Arrow::UInt16Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "S",
+                     2,
+                     2 * values.size,
+                     values.pack("S*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt32Array") do
+    values = [0, (2 ** 32) - 1]
+    array = Arrow::UInt32Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "L",
+                     4,
+                     4 * values.size,
+                     values.pack("L*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("UInt64Array") do
+    values = [(2 ** 64) - 1]
+    array = Arrow::UInt64Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "Q",
+                     8,
+                     8 * values.size,
+                     values.pack("Q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("FloatArray") do
+    values = [-1.1, 0.0, 1.1]
+    array = Arrow::FloatArray.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "f",
+                     4,
+                     4 * values.size,
+                     values.pack("f*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("DoubleArray") do
+    values = [-1.1, 0.0, 1.1]
+    array = Arrow::DoubleArray.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "d",
+                     8,
+                     8 * values.size,
+                     values.pack("d*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("FixedSizeBinaryArray") do
+    values = ["\x01\x02", "\x03\x04", "\x05\x06"]
+    data_type = Arrow::FixedSizeBinaryDataType.new(2)
+    array = Arrow::FixedSizeBinaryArray.new(data_type, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "C2",
+                     2,
+                     2 * values.size,
+                     values.join("").b,
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Date32Array") do
+    n_days_since_epoch = 17406 # 2017-08-28
+    values = [n_days_since_epoch]
+    array = Arrow::Date32Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "l",
+                     4,
+                     4 * values.size,
+                     values.pack("l*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Date64Array") do
+    n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z
+    values = [n_msecs_since_epoch]
+    array = Arrow::Date64Array.new(values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Time32Array") do
+    values = [1, 2, 3]
+    array = Arrow::Time32Array.new(:milli, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "l",
+                     4,
+                     4 * values.size,
+                     values.pack("l*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Time64Array") do
+    values = [1, 2, 3]
+    array = Arrow::Time64Array.new(:nano, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("TimestampArray") do
+    values = [1, 2, 3]
+    array = Arrow::TimestampArray.new(:micro, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q",
+                     8,
+                     8 * values.size,
+                     values.pack("q*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Decimal128Array") do
+    values = [
+      Arrow::Decimal128.new("10.1"),
+      Arrow::Decimal128.new("11.1"),
+      Arrow::Decimal128.new("10.2"),
+    ]
+    data_type = Arrow::Decimal128DataType.new(3, 1)
+    array = Arrow::Decimal128Array.new(data_type, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q2",
+                     16,
+                     16 * values.size,
+                     values.collect {|value| value.to_bytes.to_s}.join(""),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Decimal256Array") do
+    values = [
+      Arrow::Decimal256.new("10.1"),
+      Arrow::Decimal256.new("11.1"),
+      Arrow::Decimal256.new("10.2"),
+    ]
+    data_type = Arrow::Decimal256DataType.new(3, 1)
+    array = Arrow::Decimal256Array.new(data_type, values)
+    Fiddle::MemoryView.export(array) do |memory_view|
+      assert_equal([
+                     "q4",
+                     32,
+                     32 * values.size,
+                     values.collect {|value| value.to_bytes.to_s}.join(""),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+
+  test("Buffer") do
+    values = [0, nil, nil] * 3
+    array = Arrow::Int8Array.new(values)
+    buffer = array.null_bitmap
+    Fiddle::MemoryView.export(buffer) do |memory_view|
+      if little_endian?
+        template = "b"
+      else
+        template = "B"
+      end
+      assert_equal([
+                     "#{template}8",
+                     1,
+                     2,
+                     ["100" * 3].pack("#{template}*"),
+                   ],
+                   [
+                     memory_view.format,
+                     memory_view.item_size,
+                     memory_view.byte_size,
+                     memory_view.to_s,
+                   ])
+    end
+  end
+end

From c43fab3d621bedef15470a1be43570be2026af20 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 21 Jun 2021 17:08:04 -0500
Subject: [PATCH 439/719] ARROW-13127: [R] Valgrind nightly errors

Closes #10560 from jonkeane/ARROW-13127-valgrind-c-export

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/tests/testthat/test-Array.R       |  8 ++++----
 r/tests/testthat/test-RecordBatch.R | 28 ++++++++++++++++------------
 r/tests/testthat/test-data-type.R   |  4 +++-
 r/tests/testthat/test-field.R       |  4 +++-
 r/tests/testthat/test-schema.R      |  4 +++-
 5 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index a9f20c89574..a86bdf0add4 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -829,13 +829,13 @@ test_that("Array to C-interface", {
   # export the array via the C-interface
   schema_ptr <- allocate_arrow_schema()
   array_ptr <- allocate_arrow_array()
-  on.exit({
-    delete_arrow_schema(schema_ptr)
-    delete_arrow_array(array_ptr)
-  })
   arr$export_to_c(array_ptr, schema_ptr)
 
   # then import it and check that the roundtripped value is the same
   circle <- Array$import_from_c(array_ptr, schema_ptr)
   expect_equal(arr, circle)
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
 })
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index c7e8b2dc2bf..681406caf64 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -422,22 +422,22 @@ test_that("record_batch() scalar recycling with vectors", {
 })
 
 test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
-  
+
   expect_data_frame(
     record_batch(a = Array$create(1:10), b = Scalar$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-  
+
   expect_data_frame(
     record_batch(a = Array$create(1:10), b = Array$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-  
+
   expect_data_frame(
     record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-  
+
 })
 
 test_that("record_batch() no recycling with tibbles", {
@@ -448,7 +448,7 @@ test_that("record_batch() no recycling with tibbles", {
     ),
     regexp = "All input tibbles or data.frames must have the same number of rows"
   )
-  
+
   expect_error(
     record_batch(
       tibble::tibble(a = 1:10),
@@ -573,7 +573,6 @@ test_that("RecordBatchReader to C-interface", {
 
   # export the RecordBatchReader via the C-interface
   stream_ptr <- allocate_arrow_array_stream()
-  on.exit(delete_arrow_array_stream(stream_ptr))
   scan <- Scanner$create(tab)
   reader <- scan$ToRecordBatchReader()
   reader$export_to_c(stream_ptr)
@@ -583,9 +582,11 @@ test_that("RecordBatchReader to C-interface", {
   tab_from_c_new <- circle$read_table()
   expect_equal(tab, tab_from_c_new)
 
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+
   # export the RecordBatchStreamReader via the C-interface
   stream_ptr_new <- allocate_arrow_array_stream()
-  on.exit(delete_arrow_array_stream(stream_ptr_new))
   bytes <- write_to_raw(example_data)
   expect_type(bytes, "raw")
   reader_new <- RecordBatchStreamReader$create(bytes)
@@ -595,6 +596,9 @@ test_that("RecordBatchReader to C-interface", {
   circle_new <- RecordBatchStreamReader$import_from_c(stream_ptr_new)
   tab_from_c_new <- circle_new$read_table()
   expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr_new)
 })
 
 test_that("RecordBatch to C-interface", {
@@ -603,13 +607,13 @@ test_that("RecordBatch to C-interface", {
   # export the RecordBatch via the C-interface
   schema_ptr <- allocate_arrow_schema()
   array_ptr <- allocate_arrow_array()
-  on.exit({
-    delete_arrow_schema(schema_ptr)
-    delete_arrow_array(array_ptr)
-  })
   batch$export_to_c(array_ptr, schema_ptr)
 
   # then import it and check that the roundtripped value is the same
   circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
-  expect_equal(batch, circle)
+  expect_equal
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
 })
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index 412abef98e9..25c0dd5fc9f 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -417,10 +417,12 @@ test_that("DataType to C-interface", {
 
   # export the datatype via the C-interface
   ptr <- allocate_arrow_schema()
-  on.exit(delete_arrow_schema(ptr))
   datatype$export_to_c(ptr)
 
   # then import it and check that the roundtripped value is the same
   circle <- DataType$import_from_c(ptr)
   expect_equal(circle, datatype)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
 })
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index aacb5012e70..a9ef5a32e36 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -42,10 +42,12 @@ test_that("Field to C-interface", {
 
   # export the field via the C-interface
   ptr <- allocate_arrow_schema()
-  on.exit(delete_arrow_schema(ptr))
   field$export_to_c(ptr)
 
   # then import it and check that the roundtripped value is the same
   circle <- Field$import_from_c(ptr)
   expect_equal(circle, field)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
 })
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 9509c888578..278dc19f2c9 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -180,10 +180,12 @@ test_that("Schema to C-interface", {
 
   # export the schema via the C-interface
   ptr <- allocate_arrow_schema()
-  on.exit(delete_arrow_schema(ptr))
   schema$export_to_c(ptr)
 
   # then import it and check that the roundtripped value is the same
   circle <- Schema$import_from_c(ptr)
   expect_equal(circle, schema)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
 })

From c18c7558f66755f5723e82a1e7e4fb21d120b989 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 22 Jun 2021 09:10:35 -0400
Subject: [PATCH 440/719] ARROW-13139: [C++] ReadaheadGenerator cannot be
 safely copied/moved

I changed the readahead generator to be consistent with the rest of the generators.  All state is put into a dedicated struct and the generator only has a shared_ptr to the dedicated struct.

Closes #10569 from westonpace/bugfix/ARROW-13139--c-readaheadgenerator-cannot-be-safely-copied-m

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/async_generator.h       | 114 +++++++++++++++------
 cpp/src/arrow/util/async_generator_test.cc |  32 ++++++
 2 files changed, 113 insertions(+), 33 deletions(-)

diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 5069d5092d4..084720f9908 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -543,7 +543,7 @@ class TransformingGenerator {
 ///
 /// This generator is not async-reentrant
 ///
-/// This generator may queue up to 1 instance of T
+/// This generator may queue up to 1 instance of T but will not delay
 template <typename T, typename V>
 AsyncGenerator<V> MakeTransformedGenerator(AsyncGenerator<T> generator,
                                            Transformer<T, V> transformer) {
@@ -718,50 +718,60 @@ template <typename T>
 class ReadaheadGenerator {
  public:
   ReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead)
-      : source_generator_(std::move(source_generator)), max_readahead_(max_readahead) {
-    auto finished = std::make_shared<std::atomic<bool>>(false);
-    mark_finished_if_done_ = [finished](const Result<T>& next_result) {
-      if (!next_result.ok()) {
-        finished->store(true);
-      } else {
-        if (IsIterationEnd(*next_result)) {
-          *finished = true;
-        }
-      }
-    };
-    finished_ = std::move(finished);
-  }
+      : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {}
 
   Future<T> operator()() {
-    if (readahead_queue_.empty()) {
+    // Copy so we can capture into lambdas
+    auto state = state_;
+    if (state->readahead_queue.empty()) {
       // This is the first request, let's pump the underlying queue
-      for (int i = 0; i < max_readahead_; i++) {
-        auto next = source_generator_();
-        next.AddCallback(mark_finished_if_done_);
-        readahead_queue_.push(std::move(next));
+      for (int i = 0; i < state->max_readahead; i++) {
+        auto next = state->source_generator();
+        auto state = state_;
+        next.AddCallback(
+            [state](const Result<T>& result) { state->MarkFinishedIfDone(result); });
+        state->readahead_queue.push(std::move(next));
       }
     }
     // Pop one and add one
-    auto result = readahead_queue_.front();
-    readahead_queue_.pop();
-    if (finished_->load()) {
-      readahead_queue_.push(AsyncGeneratorEnd<T>());
+    auto result = state->readahead_queue.front();
+    state->readahead_queue.pop();
+    if (state->finished.load()) {
+      state->readahead_queue.push(AsyncGeneratorEnd<T>());
     } else {
-      auto back_of_queue = source_generator_();
-      back_of_queue.AddCallback(mark_finished_if_done_);
-      readahead_queue_.push(std::move(back_of_queue));
+      auto back_of_queue = state->source_generator();
+      auto state = state_;
+      back_of_queue.AddCallback(
+          [state](const Result<T>& result) { state->MarkFinishedIfDone(result); });
+      state->readahead_queue.push(std::move(back_of_queue));
     }
     return result;
   }
 
  private:
-  AsyncGenerator<T> source_generator_;
-  int max_readahead_;
-  std::function<void(const Result<T>&)> mark_finished_if_done_;
-  // Can't use a bool here because finished may be referenced by callbacks that
-  // outlive this class
-  std::shared_ptr<std::atomic<bool>> finished_;
-  std::queue<Future<T>> readahead_queue_;
+  struct State {
+    State(AsyncGenerator<T> source_generator, int max_readahead)
+        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {
+      finished.store(false);
+    }
+
+    void MarkFinishedIfDone(const Result<T>& next_result) {
+      if (!next_result.ok()) {
+        finished.store(true);
+      } else {
+        if (IsIterationEnd(*next_result)) {
+          finished.store(true);
+        }
+      }
+    }
+
+    AsyncGenerator<T> source_generator;
+    int max_readahead;
+    std::atomic<bool> finished;
+    std::queue<Future<T>> readahead_queue;
+  };
+
+  std::shared_ptr<State> state_;
 };
 
 /// \brief A generator where the producer pushes items on a queue.
@@ -1108,6 +1118,10 @@ AsyncGenerator<T> MakeMergedGenerator(AsyncGenerator<AsyncGenerator<T>> source,
 /// will never pull from any subscription reentrantly.
 ///
 /// This generator may queue 1 instance of T
+///
+/// TODO: Could potentially make a bespoke implementation instead of MergedGenerator that
+/// forwards async-reentrant requests instead of buffering them (which is what
+/// MergedGenerator does)
 template <typename T>
 AsyncGenerator<T> MakeConcatenatedGenerator(AsyncGenerator<AsyncGenerator<T>> source) {
   return MergedGenerator<T>(std::move(source), 1);
@@ -1573,4 +1587,38 @@ AsyncGenerator<T> MakeFailingGenerator(const Result<T>& result) {
   return MakeFailingGenerator<T>(result.status());
 }
 
+/// \brief Prepends initial_values onto a generator
+///
+/// This generator is async-reentrant but will buffer requests and will not
+/// pull from following_values async-reentrantly.
+template <typename T>
+AsyncGenerator<T> MakeGeneratorStartsWith(std::vector<T> initial_values,
+                                          AsyncGenerator<T> following_values) {
+  auto initial_values_vec_gen = MakeVectorGenerator(std::move(initial_values));
+  auto gen_gen = MakeVectorGenerator<AsyncGenerator<T>>(
+      {std::move(initial_values_vec_gen), std::move(following_values)});
+  return MakeConcatenatedGenerator(std::move(gen_gen));
+}
+
+template <typename T>
+struct CancellableGenerator {
+  Future<T> operator()() {
+    if (stop_token.IsStopRequested()) {
+      return stop_token.Poll();
+    }
+    return source();
+  }
+
+  AsyncGenerator<T> source;
+  StopToken stop_token;
+};
+
+/// \brief Allows an async generator to be cancelled
+///
+/// This generator is async-reentrant
+template <typename T>
+AsyncGenerator<T> MakeCancellable(AsyncGenerator<T> source, StopToken stop_token) {
+  return CancellableGenerator<T>{std::move(source), std::move(stop_token)};
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 74850b625a2..14b528ade5e 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -1060,6 +1060,38 @@ TEST(TestAsyncUtil, Readahead) {
   ASSERT_TRUE(IsIterationEnd(last_val));
 }
 
+TEST(TestAsyncUtil, ReadaheadCopy) {
+  auto source = AsyncVectorIt<TestInt>(RangeVector(6));
+  auto gen = MakeReadaheadGenerator(std::move(source), 2);
+
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i), gen());
+  }
+  auto gen_copy = gen;
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i + 2), gen_copy());
+  }
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i + 4), gen());
+  }
+  AssertGeneratorExhausted(gen);
+  AssertGeneratorExhausted(gen_copy);
+}
+
+TEST(TestAsyncUtil, ReadaheadMove) {
+  auto source = AsyncVectorIt<TestInt>(RangeVector(6));
+  auto gen = MakeReadaheadGenerator(std::move(source), 2);
+
+  for (int i = 0; i < 2; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i), gen());
+  }
+  auto gen_copy = std::move(gen);
+  for (int i = 0; i < 4; i++) {
+    ASSERT_FINISHES_OK_AND_EQ(TestInt(i + 2), gen_copy());
+  }
+  AssertGeneratorExhausted(gen_copy);
+}
+
 TEST(TestAsyncUtil, ReadaheadFailed) {
   ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(4));
   std::atomic<int32_t> counter(0);

From 01b4ce27e043dbf6578294ed7ff5d1c87a16f25f Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 22 Jun 2021 09:12:47 -0400
Subject: [PATCH 441/719] ARROW-12827: [C++] Improve error message for dataset
 discovery failure

This adds a bit more context to the error messages, though maybe this is a bit wordy?

```
>>> ds.dataset('dataset4', format="ipc")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/lidavidm/Code/upstream/arrow-12827/python/pyarrow/dataset.py", line 655, in dataset
    return _filesystem_dataset(source, **kwargs)
  File "/home/lidavidm/Code/upstream/arrow-12827/python/pyarrow/dataset.py", line 410, in _filesystem_dataset
    return factory.finish(schema)
  File "pyarrow/_dataset.pyx", line 2262, in pyarrow._dataset.DatasetFactory.finish
    return Dataset.wrap(GetResultValue(result))
  File "pyarrow/error.pxi", line 141, in pyarrow.lib.pyarrow_internal_check_status
    return check_status(status)
  File "pyarrow/error.pxi", line 97, in pyarrow.lib.check_status
    raise ArrowInvalid(message)
pyarrow.lib.ArrowInvalid: Error creating dataset. Could not read schema from 'dataset4/foo.parquet': Could not open IPC input source 'dataset4/foo.parquet': File is too small: 9. Is this a 'ipc' file?
>>> ds.dataset('dataset5', format="parquet")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/lidavidm/Code/upstream/arrow-12827/python/pyarrow/dataset.py", line 655, in dataset
    return _filesystem_dataset(source, **kwargs)
  File "/home/lidavidm/Code/upstream/arrow-12827/python/pyarrow/dataset.py", line 410, in _filesystem_dataset
    return factory.finish(schema)
  File "pyarrow/_dataset.pyx", line 2262, in pyarrow._dataset.DatasetFactory.finish
    return Dataset.wrap(GetResultValue(result))
  File "pyarrow/error.pxi", line 141, in pyarrow.lib.pyarrow_internal_check_status
    return check_status(status)
  File "pyarrow/error.pxi", line 112, in pyarrow.lib.check_status
    raise IOError(message)
OSError: Error creating dataset. Could not read schema from 'dataset5/foo.parquet': Could not open Parquet input source 'dataset5/foo.parquet': Invalid: Parquet magic bytes not found in footer. Either the file is corrupted or this is not a parquet file.. Is this a 'parquet' file?
```

Closes #10483 from lidavidm/arrow-12827

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/discovery.cc         | 10 ++++-
 cpp/src/arrow/dataset/file_csv_test.cc     |  2 +-
 cpp/src/arrow/dataset/file_ipc_test.cc     |  2 +-
 cpp/src/arrow/dataset/file_parquet_test.cc |  2 +-
 cpp/src/arrow/dataset/test_util.h          | 50 ++++++++++++++++++----
 5 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/dataset/discovery.cc b/cpp/src/arrow/dataset/discovery.cc
index 70b6930bf2f..e124c7abd91 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -226,8 +226,14 @@ Result<std::vector<std::shared_ptr<Schema>>> FileSystemDatasetFactory::InspectSc
   int fragments = options.fragments;
   for (const auto& info : files_) {
     if (has_fragments_limit && fragments-- == 0) break;
-    ARROW_ASSIGN_OR_RAISE(auto schema, format_->Inspect({info, fs_}));
-    schemas.push_back(schema);
+    auto result = format_->Inspect({info, fs_});
+    if (ARROW_PREDICT_FALSE(!result.ok())) {
+      return result.status().WithMessage(
+          "Error creating dataset. Could not read schema from '", info.path(),
+          "': ", result.status().message(), ". Is this a '", format_->type_name(),
+          "' file?");
+    }
+    schemas.push_back(result.MoveValueUnsafe());
   }
 
   ARROW_ASSIGN_OR_RAISE(auto partition_schema,
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index a0d0a75a20f..acb66d4c75b 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -229,7 +229,7 @@ N/A
 }
 
 TEST_P(TestCsvFileFormat, InspectFailureWithRelevantError) {
-  TestInspectFailureWithRelevantError(StatusCode::Invalid);
+  TestInspectFailureWithRelevantError(StatusCode::Invalid, "CSV");
 }
 
 TEST_P(TestCsvFileFormat, Inspect) {
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index 561ef00ae0b..f0409abe85b 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -85,7 +85,7 @@ TEST_F(TestIpcFileFormat, WriteRecordBatchReaderCustomOptions) {
 }
 
 TEST_F(TestIpcFileFormat, InspectFailureWithRelevantError) {
-  TestInspectFailureWithRelevantError(StatusCode::Invalid);
+  TestInspectFailureWithRelevantError(StatusCode::Invalid, "IPC");
 }
 TEST_F(TestIpcFileFormat, Inspect) { TestInspect(); }
 TEST_F(TestIpcFileFormat, IsSupported) { TestIsSupported(); }
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 7722d4da885..2561bdb7c5d 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -176,7 +176,7 @@ class TestParquetFileFormat : public FileFormatFixtureMixin<ParquetFormatHelper>
 };
 
 TEST_F(TestParquetFileFormat, InspectFailureWithRelevantError) {
-  TestInspectFailureWithRelevantError(StatusCode::IOError);
+  TestInspectFailureWithRelevantError(StatusCode::IOError, "parquet");
 }
 TEST_F(TestParquetFileFormat, Inspect) { TestInspect(); }
 
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 39223eba35b..6a0375073ee 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -407,18 +407,50 @@ class FileFormatFixtureMixin : public ::testing::Test {
   }
 
   // Shared test cases
-  void TestInspectFailureWithRelevantError(StatusCode code) {
-    std::shared_ptr<Buffer> buf = std::make_shared<Buffer>(util::string_view(""));
-    auto result = format_->Inspect(FileSource(buf));
-    EXPECT_EQ(code, result.status().code());
-    EXPECT_THAT(result.status().ToString(), testing::HasSubstr("<Buffer>"));
-
+  void AssertInspectFailure(const std::string& contents, StatusCode code,
+                            const std::string& format_name) {
+    SCOPED_TRACE("Format: " + format_name + " File contents: " + contents);
     constexpr auto file_name = "herp/derp";
+    auto make_error_message = [&](const std::string& filename) {
+      return "Could not open " + format_name + " input source '" + filename + "':";
+    };
+    const auto buf = std::make_shared<Buffer>(contents);
+    Status status;
+
+    status = format_->Inspect(FileSource(buf)).status();
+    EXPECT_EQ(code, status.code());
+    EXPECT_THAT(status.ToString(), ::testing::HasSubstr(make_error_message("<Buffer>")));
+
+    ASSERT_OK_AND_EQ(false, format_->IsSupported(FileSource(buf)));
+
     ASSERT_OK_AND_ASSIGN(
         auto fs, fs::internal::MockFileSystem::Make(fs::kNoTime, {fs::File(file_name)}));
-    result = format_->Inspect({file_name, fs});
-    EXPECT_EQ(code, result.status().code());
-    EXPECT_THAT(result.status().ToString(), testing::HasSubstr(file_name));
+    status = format_->Inspect({file_name, fs}).status();
+    EXPECT_EQ(code, status.code());
+    EXPECT_THAT(status.ToString(), testing::HasSubstr(make_error_message("herp/derp")));
+
+    fs::FileSelector s;
+    s.base_dir = "/";
+    s.recursive = true;
+    FileSystemFactoryOptions options;
+    ASSERT_OK_AND_ASSIGN(auto factory,
+                         FileSystemDatasetFactory::Make(fs, s, format_, options));
+    status = factory->Finish().status();
+    EXPECT_EQ(code, status.code());
+    EXPECT_THAT(
+        status.ToString(),
+        ::testing::AllOf(
+            ::testing::HasSubstr(make_error_message("/herp/derp")),
+            ::testing::HasSubstr(
+                "Error creating dataset. Could not read schema from '/herp/derp':"),
+            ::testing::HasSubstr("Is this a '" + format_->type_name() + "' file?")));
+  }
+  void TestInspectFailureWithRelevantError(StatusCode code,
+                                           const std::string format_name) {
+    const std::vector<std::string> file_contents{"", "PAR0", "ASDFPAR1", "ARROW1"};
+    for (const auto& contents : file_contents) {
+      AssertInspectFailure(contents, code, format_name);
+    }
   }
   void TestInspect() {
     auto reader = GetRecordBatchReader(schema({field("f64", float64())}));

From 7e93a334279d79a67f8744c3c1249944f6da0aca Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 22 Jun 2021 09:16:11 -0400
Subject: [PATCH 442/719] ARROW-13034: [Python][Docs] Update the cloud examples
 on the Parquet doc page

Closes #10548 from jorisvandenbossche/ARROW-13034

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/python/filesystems.rst |  1 +
 docs/source/python/parquet.rst     | 67 +++++++++++++-----------------
 2 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst
index 01e7d7bba55..13c0d93101e 100644
--- a/docs/source/python/filesystems.rst
+++ b/docs/source/python/filesystems.rst
@@ -178,6 +178,7 @@ some environment variables.
   If ``CLASSPATH`` is not set, then it will be set automatically if the
   ``hadoop`` executable is in your system path, or if ``HADOOP_HOME`` is set.
 
+.. _filesystem-fsspec:
 
 Using fsspec-compatible filesystems
 -----------------------------------
diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
index 693fc97e062..0db0df1bc4c 100644
--- a/docs/source/python/parquet.rst
+++ b/docs/source/python/parquet.rst
@@ -387,7 +387,8 @@ individual table writes are wrapped using ``with`` statements so the
 .. code-block:: python
 
    # Remote file-system example
-   fs = pa.hdfs.connect(host, port, user=user, kerb_ticket=ticket_cache_path)
+   from pyarrow.fs import HadoopFileSystem
+   fs = HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path)
    pq.write_to_dataset(table, root_path='dataset_name',
                        partition_cols=['one', 'two'], filesystem=fs)
 
@@ -545,46 +546,38 @@ This can be disabled by specifying ``use_threads=False``.
    The number of threads to use concurrently is automatically inferred by Arrow
    and can be inspected using the :func:`~pyarrow.cpu_count()` function.
 
+Reading from cloud storage
+--------------------------
 
-Reading a Parquet File from Azure Blob storage
-----------------------------------------------
+In addition to local files, pyarrow supports other filesystems, such as cloud
+filesystems, through the ``filesystem`` keyword:
 
-The code below shows how to use Azure's storage sdk along with pyarrow to read
-a parquet file into a Pandas dataframe.
-This is suitable for executing inside a Jupyter notebook running on a Python 3
-kernel.
+.. code-block:: python
+
+    from pyarrow import fs
 
-Dependencies:
+    s3  = fs.S3FileSystem(region="us-east-2")
+    table = pq.read_table("bucket/object/key/prefix", filesystem=s3)
 
-* python 3.6.2
-* azure-storage 0.36.0
-* pyarrow 0.8.0
+Currently, :class:`HDFS <pyarrow.fs.HadoopFileSystem>` and
+:class:`Amazon S3-compatible storage <pyarrow.fs.S3FileSystem>` are
+supported. See the :ref:`filesystem` docs for more details. For those
+built-in filesystems, the filesystem can also be inferred from the file path,
+if specified as a URI:
 
 .. code-block:: python
 
-   import pyarrow.parquet as pq
-   from io import BytesIO
-   from azure.storage.blob import BlockBlobService
-
-   account_name = '...'
-   account_key = '...'
-   container_name = '...'
-   parquet_file = 'mysample.parquet'
-
-   byte_stream = io.BytesIO()
-   block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)
-   try:
-      block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream)
-      df = pq.read_table(source=byte_stream).to_pandas()
-      # Do work on df ...
-   finally:
-      # Add finally block to ensure closure of the stream
-      byte_stream.close()
-
-Notes:
-
-* The ``account_key`` can be found under ``Settings -> Access keys`` in the
-  Microsoft Azure portal for a given container
-* The code above works for a container with private access, Lease State =
-  Available, Lease Status = Unlocked
-* The parquet file was Blob Type = Block blob
+    table = pq.read_table("s3://bucket/object/key/prefix")
+
+Other filesystems can still be supported if there is an
+`fsspec <https://filesystem-spec.readthedocs.io/en/latest/>`__-compatible
+implementation available. See :ref:`filesystem-fsspec` for more details.
+One example is Azure Blob storage, which can be interfaced through the
+`adlfs <https://github.com/dask/adlfs>`__ package.
+
+.. code-block:: python
+
+    from adlfs import AzureBlobFileSystem
+
+    abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX")
+    table = pq.read_table("file.parquet", filesystem=abfs)

From 3e56bdedc54f613e30b999eefab2a3bd408f542d Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Tue, 22 Jun 2021 17:45:17 +0200
Subject: [PATCH 443/719] ARROW-13140: [C++/Python] Upgrade libthrift pin in
 the nightlies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10570 from xhochy/ARROW-13140

Authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ...cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml | 2 +-
 ...cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml | 2 +-
 ...cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml | 2 +-
 ...cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml | 2 +-
 .../linux_aarch64_numpy1.17python3.6.____cpython.yaml           | 2 +-
 .../linux_aarch64_numpy1.17python3.7.____cpython.yaml           | 2 +-
 .../linux_aarch64_numpy1.17python3.8.____cpython.yaml           | 2 +-
 .../linux_aarch64_numpy1.19python3.9.____cpython.yaml           | 2 +-
 .../.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml      | 2 +-
 .../.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml      | 2 +-
 .../.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml      | 2 +-
 .../.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml      | 2 +-
 .../.ci_support/osx_arm64_python3.8.____cpython.yaml            | 2 +-
 .../.ci_support/osx_arm64_python3.9.____cpython.yaml            | 2 +-
 ...cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml | 2 +-
 ...cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml | 2 +-
 22 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
index 9d014c1a2f8..dfc87c80b31 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.6.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
index b8cde8e0752..3416b952c90 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.7.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
index 5e5c7ab7c93..f819ba7229e 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.17python3.8.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
index d0926aa3cef..3e2e0ef51fb 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_version10.2numpy1.19python3.9.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
index 6625c55c2c9..3aba0f1294c 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index d356a8a56cf..ff26bc5215e 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
index 61f311506e6..5703aba68ec 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
index 6abfe8271dc..8ff58d717e8 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -55,7 +55,7 @@ snappy:
 target_platform:
 - linux-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
index 408e05667bd..5bb4381febf 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.6.____cpython.yaml
@@ -57,7 +57,7 @@ snappy:
 target_platform:
 - linux-aarch64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
index ef7ff818a54..2b1715d585b 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.7.____cpython.yaml
@@ -57,7 +57,7 @@ snappy:
 target_platform:
 - linux-aarch64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
index ea0327e5c2a..5a0e7313e9d 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.17python3.8.____cpython.yaml
@@ -57,7 +57,7 @@ snappy:
 target_platform:
 - linux-aarch64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
index 5ba7c16b1b0..16ace00bdae 100644
--- a/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/linux_aarch64_numpy1.19python3.9.____cpython.yaml
@@ -57,7 +57,7 @@ snappy:
 target_platform:
 - linux-aarch64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
index d4a7e2a75b5..0be59fe1a38 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.6.____cpython.yaml
@@ -53,7 +53,7 @@ snappy:
 target_platform:
 - osx-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
index c7e57ba9a1a..d2c046ab2ea 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.7.____cpython.yaml
@@ -53,7 +53,7 @@ snappy:
 target_platform:
 - osx-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
index 7fd69d4e965..43f63445469 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.17python3.8.____cpython.yaml
@@ -53,7 +53,7 @@ snappy:
 target_platform:
 - osx-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
index 8ba2718c411..7cc730f9bb0 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_64_numpy1.19python3.9.____cpython.yaml
@@ -53,7 +53,7 @@ snappy:
 target_platform:
 - osx-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
index e9ae1d9858e..e5f8e2ba2a8 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.8.____cpython.yaml
@@ -53,7 +53,7 @@ snappy:
 target_platform:
 - osx-arm64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
index 84ef1bc9b85..cd3eca6d23d 100644
--- a/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/osx_arm64_python3.9.____cpython.yaml
@@ -53,7 +53,7 @@ snappy:
 target_platform:
 - osx-arm64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
index 63a7faeaa33..8d4e25167b0 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.6.____cpython.yaml
@@ -45,7 +45,7 @@ snappy:
 target_platform:
 - win-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
index 684987c6fbb..8da4a8380b7 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.7.____cpython.yaml
@@ -45,7 +45,7 @@ snappy:
 target_platform:
 - win-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
index afefac79ec7..1980e1be39b 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.17python3.8.____cpython.yaml
@@ -45,7 +45,7 @@ snappy:
 target_platform:
 - win-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - numpy
   - python
diff --git a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
index c385d13eac0..1106037d36b 100644
--- a/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
+++ b/dev/tasks/conda-recipes/.ci_support/win_64_cuda_compiler_versionNonenumpy1.19python3.9.____cpython.yaml
@@ -45,7 +45,7 @@ snappy:
 target_platform:
 - win-64
 thrift_cpp:
-- 0.14.1
+- 0.14.2
 zip_keys:
 - - numpy
   - python

From 8aeec2896759e12ac385b44d61531528528dfac7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 22 Jun 2021 17:46:56 +0200
Subject: [PATCH 444/719] ARROW-12983: [C++][Python][R] Properly overflow to
 chunked array in Python-to-Arrow conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Still need to port the R changes from #10470

Tested locally using:

```
 PYARROW_TEST_SLOW=ON PYARROW_TEST_LARGE_MEMORY=ON ./run_test.sh -sv pyarrow/tests/
```

Closes #10556 from kszucs/fff

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/python.yml                  |   7 +
 ci/scripts/python_test.sh                     |   2 +-
 cpp/src/arrow/array/builder_binary.h          |   7 -
 cpp/src/arrow/python/inference.cc             |   9 +-
 cpp/src/arrow/python/iterators.h              |  19 +--
 cpp/src/arrow/python/numpy_internal.h         |   2 +-
 cpp/src/arrow/python/python_to_arrow.cc       |  55 +++-----
 cpp/src/arrow/util/converter.h                |  85 ++++++++---
 .../pyarrow/tests/parquet/test_data_types.py  |   5 +
 python/pyarrow/tests/test_array.py            |   1 +
 python/pyarrow/tests/test_convert_builtin.py  |  54 ++++++-
 python/pyarrow/tests/test_pandas.py           |   8 +-
 r/src/r_to_arrow.cpp                          | 133 +++++++++---------
 13 files changed, 236 insertions(+), 151 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 981fd61c029..c6781a4b149 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -131,6 +131,7 @@ jobs:
       ARROW_WITH_BROTLI: ON
       ARROW_BUILD_TESTS: OFF
       CMAKE_ARGS: "-DPython3_EXECUTABLE=/usr/local/bin/python3"
+      PYARROW_TEST_LARGE_MEMORY: ON
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
@@ -139,6 +140,12 @@ jobs:
       - name: Fetch Submodules and Tags
         shell: bash
         run: ci/scripts/util_checkout.sh
+      - name: Show available RAM size
+        shell: bash
+        run: |
+          hwmemsize=$(sysctl -n hw.memsize)
+          ramsize=$(expr $hwmemsize / $((1024**3)))
+          echo "System Memory: ${ramsize} GB"
       - name: Install Dependencies
         shell: bash
         run: |
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 80a9cdef4a3..6e05af89a19 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -29,4 +29,4 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 # Enable some checks inside Python itself
 export PYTHONDEVMODE=1
 
-pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
+pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index a60031258ad..c1c664a1249 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -291,14 +291,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
   }
 
   Status Resize(int64_t capacity) override {
-    // XXX Why is this check necessary?  There is no reason to disallow, say,
-    // binary arrays with more than 2**31 empty or null values.
-    if (capacity > memory_limit()) {
-      return Status::CapacityError("BinaryBuilder cannot reserve space for more than ",
-                                   memory_limit(), " child elements, got ", capacity);
-    }
     ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
-
     // One more than requested for offsets
     ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
     return ArrayBuilder::Resize(capacity);
diff --git a/cpp/src/arrow/python/inference.cc b/cpp/src/arrow/python/inference.cc
index 9d6707aa11d..5086815f84f 100644
--- a/cpp/src/arrow/python/inference.cc
+++ b/cpp/src/arrow/python/inference.cc
@@ -379,12 +379,13 @@ class TypeInferrer {
   // Infer value type from a sequence of values
   Status VisitSequence(PyObject* obj, PyObject* mask = nullptr) {
     if (mask == nullptr || mask == Py_None) {
-      return internal::VisitSequence(obj, [this](PyObject* value, bool* keep_going) {
-        return Visit(value, keep_going);
-      });
+      return internal::VisitSequence(
+          obj, /*offset=*/0,
+          [this](PyObject* value, bool* keep_going) { return Visit(value, keep_going); });
     } else {
       return internal::VisitSequenceMasked(
-          obj, mask, [this](PyObject* value, uint8_t masked, bool* keep_going) {
+          obj, mask, /*offset=*/0,
+          [this](PyObject* value, uint8_t masked, bool* keep_going) {
             if (!masked) {
               return Visit(value, keep_going);
             } else {
diff --git a/cpp/src/arrow/python/iterators.h b/cpp/src/arrow/python/iterators.h
index 6b0b55342a5..58213ee2dbc 100644
--- a/cpp/src/arrow/python/iterators.h
+++ b/cpp/src/arrow/python/iterators.h
@@ -36,7 +36,7 @@ namespace internal {
 //
 // If keep_going is set to false, the iteration terminates
 template <class VisitorFunc>
-inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
+inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&& func) {
   // VisitorFunc may set to false to terminate iteration
   bool keep_going = true;
 
@@ -49,7 +49,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
     if (PyArray_DESCR(arr_obj)->type_num == NPY_OBJECT) {
       // It's an array object, we can fetch object pointers directly
       const Ndarray1DIndexer<PyObject*> objects(arr_obj);
-      for (int64_t i = 0; keep_going && i < objects.size(); ++i) {
+      for (int64_t i = offset; keep_going && i < objects.size(); ++i) {
         RETURN_NOT_OK(func(objects[i], i, &keep_going));
       }
       return Status::OK();
@@ -64,7 +64,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
     if (PyList_Check(obj) || PyTuple_Check(obj)) {
       // Use fast item access
       const Py_ssize_t size = PySequence_Fast_GET_SIZE(obj);
-      for (Py_ssize_t i = 0; keep_going && i < size; ++i) {
+      for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
         PyObject* value = PySequence_Fast_GET_ITEM(obj, i);
         RETURN_NOT_OK(func(value, static_cast<int64_t>(i), &keep_going));
       }
@@ -72,7 +72,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
       // Regular sequence: avoid making a potentially large copy
       const Py_ssize_t size = PySequence_Size(obj);
       RETURN_IF_PYERROR();
-      for (Py_ssize_t i = 0; keep_going && i < size; ++i) {
+      for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
         OwnedRef value_ref(PySequence_ITEM(obj, i));
         RETURN_IF_PYERROR();
         RETURN_NOT_OK(func(value_ref.obj(), static_cast<int64_t>(i), &keep_going));
@@ -86,16 +86,17 @@ inline Status VisitSequenceGeneric(PyObject* obj, VisitorFunc&& func) {
 
 // Visit sequence with no null mask
 template <class VisitorFunc>
-inline Status VisitSequence(PyObject* obj, VisitorFunc&& func) {
+inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
   return VisitSequenceGeneric(
-      obj, [&func](PyObject* value, int64_t i /* unused */, bool* keep_going) {
+      obj, offset, [&func](PyObject* value, int64_t i /* unused */, bool* keep_going) {
         return func(value, keep_going);
       });
 }
 
 /// Visit sequence with null mask
 template <class VisitorFunc>
-inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, VisitorFunc&& func) {
+inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
+                                  VisitorFunc&& func) {
   if (mo == nullptr || !PyArray_Check(mo)) {
     return Status::Invalid("Null mask must be NumPy array");
   }
@@ -115,7 +116,7 @@ inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, VisitorFunc&& fun
     Ndarray1DIndexer<uint8_t> mask_values(mask);
 
     return VisitSequenceGeneric(
-        obj, [&func, &mask_values](PyObject* value, int64_t i, bool* keep_going) {
+        obj, offset, [&func, &mask_values](PyObject* value, int64_t i, bool* keep_going) {
           return func(value, mask_values[i], keep_going);
         });
   } else {
@@ -132,7 +133,7 @@ template <class VisitorFunc>
 inline Status VisitIterable(PyObject* obj, VisitorFunc&& func) {
   if (PySequence_Check(obj)) {
     // Numpy arrays fall here as well
-    return VisitSequence(obj, std::forward<VisitorFunc>(func));
+    return VisitSequence(obj, /*offset=*/0, std::forward<VisitorFunc>(func));
   }
   // Fall back on the iterator protocol
   OwnedRef iter_ref(PyObject_GetIter(obj));
diff --git a/cpp/src/arrow/python/numpy_internal.h b/cpp/src/arrow/python/numpy_internal.h
index f43599eb3eb..973f577cb13 100644
--- a/cpp/src/arrow/python/numpy_internal.h
+++ b/cpp/src/arrow/python/numpy_internal.h
@@ -52,7 +52,7 @@ class Ndarray1DIndexer {
 
   int64_t size() const { return PyArray_SIZE(arr_); }
 
-  T* data() const { return data_; }
+  const T* data() const { return reinterpret_cast<const T*>(data_); }
 
   bool is_strided() const { return stride_ != sizeof(T); }
 
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index b2d9f1cb5a3..521249fd542 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -392,22 +392,25 @@ class PyValue {
 class PyConverter : public Converter<PyObject*, PyConversionOptions> {
  public:
   // Iterate over the input values and defer the conversion to the Append method
-  Status Extend(PyObject* values, int64_t size) override {
+  Status Extend(PyObject* values, int64_t size, int64_t offset = 0) override {
+    DCHECK_GE(size, offset);
     /// Ensure we've allocated enough space
-    RETURN_NOT_OK(this->Reserve(size));
+    RETURN_NOT_OK(this->Reserve(size - offset));
     // Iterate over the items adding each one
-    return internal::VisitSequence(values, [this](PyObject* item, bool* /* unused */) {
-      return this->Append(item);
-    });
+    return internal::VisitSequence(
+        values, offset,
+        [this](PyObject* item, bool* /* unused */) { return this->Append(item); });
   }
 
   // Convert and append a sequence of values masked with a numpy array
-  Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size) override {
+  Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size,
+                      int64_t offset = 0) override {
+    DCHECK_GE(size, offset);
     /// Ensure we've allocated enough space
-    RETURN_NOT_OK(this->Reserve(size));
+    RETURN_NOT_OK(this->Reserve(size - offset));
     // Iterate over the items adding each one
     return internal::VisitSequenceMasked(
-        values, mask, [this](PyObject* item, bool is_masked, bool* /* unused */) {
+        values, mask, offset, [this](PyObject* item, bool is_masked, bool* /* unused */) {
           if (is_masked) {
             return this->AppendNull();
           } else {
@@ -514,34 +517,6 @@ class PyPrimitiveConverter<
   }
 };
 
-template <typename T>
-class PyPrimitiveConverter<T, enable_if_binary<T>>
-    : public PrimitiveConverter<T, PyConverter> {
- public:
-  using OffsetType = typename T::offset_type;
-
-  Status Append(PyObject* value) override {
-    if (PyValue::IsNull(this->options_, value)) {
-      this->primitive_builder_->UnsafeAppendNull();
-    } else {
-      ARROW_RETURN_NOT_OK(
-          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
-      // Since we don't know the varying length input size in advance, we need to
-      // reserve space in the value builder one by one. ReserveData raises CapacityError
-      // if the value would not fit into the array.
-      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
-      this->primitive_builder_->UnsafeAppend(view_.bytes,
-                                             static_cast<OffsetType>(view_.size));
-    }
-    return Status::OK();
-  }
-
- protected:
-  // Create a single instance of PyBytesView here to prevent unnecessary object
-  // creation/destruction. This significantly improves the conversion performance.
-  PyBytesView view_;
-};
-
 template <typename T>
 class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
     : public PrimitiveConverter<T, PyConverter> {
@@ -563,7 +538,7 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::
 };
 
 template <typename T>
-class PyPrimitiveConverter<T, enable_if_string_like<T>>
+class PyPrimitiveConverter<T, enable_if_base_binary<T>>
     : public PrimitiveConverter<T, PyConverter> {
  public:
   using OffsetType = typename T::offset_type;
@@ -578,6 +553,9 @@ class PyPrimitiveConverter<T, enable_if_string_like<T>>
         // observed binary value
         observed_binary_ = true;
       }
+      // Since we don't know the varying length input size in advance, we need to
+      // reserve space in the value builder one by one. ReserveData raises CapacityError
+      // if the value would not fit into the array.
       ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
       this->primitive_builder_->UnsafeAppend(view_.bytes,
                                              static_cast<OffsetType>(view_.size));
@@ -728,7 +706,6 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
     auto value_builder =
         checked_cast<ValueBuilderType*>(this->value_converter_->builder().get());
 
-    // TODO(wesm): Vector append when not strided
     Ndarray1DIndexer<NumpyType> values(ndarray);
     if (null_sentinels_possible) {
       for (int64_t i = 0; i < values.size(); ++i) {
@@ -738,6 +715,8 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
           RETURN_NOT_OK(value_builder->Append(values[i]));
         }
       }
+    } else if (!values.is_strided()) {
+      RETURN_NOT_OK(value_builder->AppendValues(values.data(), values.size()));
     } else {
       for (int64_t i = 0; i < values.size(); ++i) {
         RETURN_NOT_OK(value_builder->Append(values[i]));
diff --git a/cpp/src/arrow/util/converter.h b/cpp/src/arrow/util/converter.h
index 2c40a48726b..0b29e0f5bc7 100644
--- a/cpp/src/arrow/util/converter.h
+++ b/cpp/src/arrow/util/converter.h
@@ -54,11 +54,12 @@ class Converter {
 
   virtual Status Append(InputType value) { return Status::NotImplemented("Append"); }
 
-  virtual Status Extend(InputType values, int64_t size) {
+  virtual Status Extend(InputType values, int64_t size, int64_t offset = 0) {
     return Status::NotImplemented("Extend");
   }
 
-  virtual Status ExtendMasked(InputType values, InputType mask, int64_t size) {
+  virtual Status ExtendMasked(InputType values, InputType mask, int64_t size,
+                              int64_t offset = 0) {
     return Status::NotImplemented("ExtendMasked");
   }
 
@@ -70,6 +71,8 @@ class Converter {
 
   bool may_overflow() const { return may_overflow_; }
 
+  bool rewind_on_overflow() const { return rewind_on_overflow_; }
+
   virtual Status Reserve(int64_t additional_capacity) {
     return builder_->Reserve(additional_capacity);
   }
@@ -96,6 +99,7 @@ class Converter {
   std::shared_ptr<ArrayBuilder> builder_;
   OptionsType options_;
   bool may_overflow_ = false;
+  bool rewind_on_overflow_ = false;
 };
 
 template <typename ArrowType, typename BaseConverter>
@@ -134,7 +138,8 @@ class ListConverter : public BaseConverter {
         std::make_shared<BuilderType>(pool, value_converter_->builder(), this->type_);
     list_builder_ = checked_cast<BuilderType*>(this->builder_.get());
     // Narrow list types may overflow
-    this->may_overflow_ = sizeof(typename ArrowType::offset_type) < sizeof(int64_t);
+    this->may_overflow_ = this->rewind_on_overflow_ =
+        sizeof(typename ArrowType::offset_type) < sizeof(int64_t);
     return Status::OK();
   }
 
@@ -167,6 +172,7 @@ class StructConverter : public BaseConverter {
                             (MakeConverter<BaseConverter, ConverterTrait>(
                                 field->type(), this->options_, pool)));
       this->may_overflow_ |= child_converter->may_overflow();
+      this->rewind_on_overflow_ = this->may_overflow_;
       child_builders.push_back(child_converter->builder());
       children_.push_back(std::move(child_converter));
     }
@@ -302,32 +308,69 @@ class Chunker {
     return status;
   }
 
-  // we could get bit smarter here since the whole batch of appendable values
-  // will be rejected if a capacity error is raised
-  Status Extend(InputType values, int64_t size) {
-    auto status = converter_->Extend(values, size);
-    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
-      if (converter_->builder()->length() == 0) {
+  Status Extend(InputType values, int64_t size, int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->Extend(values, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_overflow()) {
+          // The list-like and binary-like conversion paths may raise  a capacity error,
+          // we need to handle them differently. While the binary-like converters check
+          // the capacity before append/extend the list-like converters just check after
+          // append/extend. Thus depending on the implementation semantics we may need
+          // to rewind (slice) the output chunk by one.
+          length_ -= 1;
+          offset -= 1;
+        }
+        ARROW_RETURN_NOT_OK(FinishChunk());
+      } else if (!status.ok()) {
         return status;
       }
-      ARROW_RETURN_NOT_OK(FinishChunk());
-      return Extend(values, size);
     }
-    length_ += size;
-    return status;
+    return Status::OK();
   }
 
-  Status ExtendMasked(InputType values, InputType mask, int64_t size) {
-    auto status = converter_->ExtendMasked(values, mask, size);
-    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
-      if (converter_->builder()->length() == 0) {
+  Status ExtendMasked(InputType values, InputType mask, int64_t size,
+                      int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->ExtendMasked(values, mask, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_overflow()) {
+          // The list-like and binary-like conversion paths may raise  a capacity error,
+          // we need to handle them differently. While the binary-like converters check
+          // the capacity before append/extend the list-like converters just check after
+          // append/extend. Thus depending on the implementation semantics we may need
+          // to rewind (slice) the output chunk by one.
+          length_ -= 1;
+          offset -= 1;
+        }
+        ARROW_RETURN_NOT_OK(FinishChunk());
+      } else if (!status.ok()) {
         return status;
       }
-      ARROW_RETURN_NOT_OK(FinishChunk());
-      return ExtendMasked(values, mask, size);
     }
-    length_ += size;
-    return status;
+    return Status::OK();
   }
 
   Status FinishChunk() {
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index 850dff94df4..bdbc6b7b5a5 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -404,6 +404,7 @@ def test_fixed_size_binary():
 # -----------------------------------------------------------------------------
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 def test_large_table_int32_overflow():
     size = np.iinfo('int32').max + 1
@@ -424,6 +425,7 @@ def _simple_table_roundtrip(table, use_legacy_dataset=False, **write_kwargs):
     return _read_table(buf, use_legacy_dataset=use_legacy_dataset)
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 @parametrize_legacy_dataset
 def test_byte_array_exactly_2gb(use_legacy_dataset):
@@ -444,6 +446,7 @@ def test_byte_array_exactly_2gb(use_legacy_dataset):
         assert t.equals(result)
 
 
+@pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
 @parametrize_legacy_dataset
@@ -469,6 +472,7 @@ def test_binary_array_overflow_to_chunked(use_legacy_dataset):
     assert tbl.equals(read_tbl)
 
 
+@pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
 @parametrize_legacy_dataset
@@ -499,6 +503,7 @@ def test_large_binary():
             _check_roundtrip(table, use_dictionary=use_dictionary)
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 def test_large_binary_huge():
     s = b'xy' * 997
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 30500bc3c5b..9f6ab678a95 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2630,6 +2630,7 @@ def test_array_from_numpy_str_utf8():
         pa.array(vec, pa.string(), mask=np.array([False]))
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 def test_numpy_binary_overflow_to_chunked():
     # ARROW-3762, ARROW-5966
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index ba94b340bd3..1a500b8523f 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -756,6 +756,7 @@ def test_large_binary_array(ty):
     assert len(arr) == nrepeats
 
 
+@pytest.mark.slow
 @pytest.mark.large_memory
 @pytest.mark.parametrize("ty", [pa.large_binary(), pa.large_string()])
 def test_large_binary_value(ty):
@@ -2169,7 +2170,6 @@ def test_auto_chunking_list_of_binary():
     assert arr.chunk(1).to_pylist() == [['x' * 1024]] * 2
 
 
-@pytest.mark.slow
 @pytest.mark.large_memory
 def test_auto_chunking_list_like():
     item = np.ones((2**28,), dtype='uint8')
@@ -2185,7 +2185,11 @@ def test_auto_chunking_list_like():
     assert arr.num_chunks == 2
     assert len(arr.chunk(0)) == 7
     assert len(arr.chunk(1)) == 1
-    assert arr.chunk(1)[0].as_py() == list(item)
+    chunk = arr.chunk(1)
+    scalar = chunk[0]
+    assert isinstance(scalar, pa.ListScalar)
+    expected = pa.array(item, type=pa.uint8())
+    assert scalar.values == expected
 
 
 @pytest.mark.slow
@@ -2232,3 +2236,49 @@ def test_nested_auto_chunking(ty, char):
         'integer': 1,
         'string-like': char
     }
+
+
+@pytest.mark.large_memory
+def test_array_from_pylist_data_overflow():
+    # Regression test for ARROW-12983
+    # Data buffer overflow - should result in chunked array
+    items = [b'a' * 4096] * (2 ** 19)
+    arr = pa.array(items, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**19
+    assert len(arr.chunks) > 1
+
+    mask = np.zeros(2**19, bool)
+    arr = pa.array(items, mask=mask, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**19
+    assert len(arr.chunks) > 1
+
+    arr = pa.array(items, type=pa.binary())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**19
+    assert len(arr.chunks) > 1
+
+
+@pytest.mark.slow
+@pytest.mark.large_memory
+def test_array_from_pylist_offset_overflow():
+    # Regression test for ARROW-12983
+    # Offset buffer overflow - should result in chunked array
+    # Note this doesn't apply to primitive arrays
+    items = [b'a'] * (2 ** 31)
+    arr = pa.array(items, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**31
+    assert len(arr.chunks) > 1
+
+    mask = np.zeros(2**31, bool)
+    arr = pa.array(items, mask=mask, type=pa.string())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**31
+    assert len(arr.chunks) > 1
+
+    arr = pa.array(items, type=pa.binary())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == 2**31
+    assert len(arr.chunks) > 1
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 7f904433fa2..b6557875c2c 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2167,20 +2167,19 @@ def test_list_of_dictionary(self):
         expected[2] = None
         tm.assert_series_equal(arr.to_pandas(), expected)
 
-    @pytest.mark.slow
     @pytest.mark.large_memory
     def test_auto_chunking_on_list_overflow(self):
         # ARROW-9976
-        n = 2**24
+        n = 2**21
         df = pd.DataFrame.from_dict({
-            "a": list(np.zeros((n, 2**7), dtype='uint8')),
+            "a": list(np.zeros((n, 2**10), dtype='uint8')),
             "b": range(n)
         })
         table = pa.Table.from_pandas(df)
 
         column_a = table[0]
         assert column_a.num_chunks == 2
-        assert len(column_a.chunk(0)) == 2**24 - 1
+        assert len(column_a.chunk(0)) == 2**21 - 1
         assert len(column_a.chunk(1)) == 1
 
     def test_map_array_roundtrip(self):
@@ -2356,6 +2355,7 @@ def test_from_numpy_nested(self):
             {'x': {'xx': 1, 'yy': True}, 'y': 2, 'z': 'foo'},
             {'x': {'xx': 3, 'yy': False}, 'y': 4, 'z': 'bar'}]
 
+    @pytest.mark.slow
     @pytest.mark.large_memory
     def test_from_numpy_large(self):
         # Exercise rechunking + nulls
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index d0f4f3a6def..683e8f278e8 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -326,7 +326,7 @@ class RConverter : public Converter<SEXP, RConversionOptions> {
  public:
   virtual Status Append(SEXP) { return Status::NotImplemented("Append"); }
 
-  virtual Status Extend(SEXP values, int64_t size) {
+  virtual Status Extend(SEXP values, int64_t size, int64_t offset = 0) {
     return Status::NotImplemented("Extend");
   }
 
@@ -337,7 +337,7 @@ class RConverter : public Converter<SEXP, RConversionOptions> {
     tasks.Append(false, task);
   }
 
-  virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size) {
+  virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size, int64_t offset = 0) {
     return Status::NotImplemented("ExtendMasked");
   }
 };
@@ -434,8 +434,8 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_null<T>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP, int64_t size) override {
-    return this->primitive_builder_->AppendNulls(size);
+  Status Extend(SEXP, int64_t size, int64_t offset = 0) override {
+    return this->primitive_builder_->AppendNulls(size - offset);
   }
 };
 
@@ -445,17 +445,17 @@ class RPrimitiveConverter<
     T, enable_if_t<is_integer_type<T>::value || is_floating_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     auto rtype = GetVectorType(x);
     switch (rtype) {
       case UINT8:
-        return ExtendDispatch<unsigned char>(x, size);
+        return ExtendDispatch<unsigned char>(x, size, offset);
       case INT32:
-        return ExtendDispatch<int>(x, size);
+        return ExtendDispatch<int>(x, size, offset);
       case FLOAT64:
-        return ExtendDispatch<double>(x, size);
+        return ExtendDispatch<double>(x, size, offset);
       case INT64:
-        return ExtendDispatch<int64_t>(x, size);
+        return ExtendDispatch<int64_t>(x, size, offset);
 
       default:
         break;
@@ -471,14 +471,14 @@ class RPrimitiveConverter<
 
  private:
   template <typename r_value_type>
-  Status ExtendDispatch(SEXP x, int64_t size) {
+  Status ExtendDispatch(SEXP x, int64_t size, int64_t offset) {
     if (ALTREP(x)) {
       // `x` is an ALTREP R vector storing `r_value_type`
       // and that type matches exactly the type of the array this is building
-      return Extend_impl(RVectorIterator_ALTREP<r_value_type>(x, 0), size);
+      return Extend_impl(RVectorIterator_ALTREP<r_value_type>(x, offset), size);
     } else {
       // `x` is not an ALTREP vector so we have direct access to a range of values
-      return Extend_impl(RVectorIterator<r_value_type>(x, 0), size);
+      return Extend_impl(RVectorIterator<r_value_type>(x, offset), size);
     }
   }
 
@@ -514,16 +514,16 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_boolean_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     auto rtype = GetVectorType(x);
     if (rtype != BOOLEAN) {
       return Status::Invalid("Expecting a logical vector");
     }
 
     if (ALTREP(x)) {
-      return Extend_impl(RVectorIterator_ALTREP<cpp11::r_bool>(x, 0), size);
+      return Extend_impl(RVectorIterator_ALTREP<cpp11::r_bool>(x, offset), size);
     } else {
-      return Extend_impl(RVectorIterator<cpp11::r_bool>(x, 0), size);
+      return Extend_impl(RVectorIterator<cpp11::r_bool>(x, offset), size);
     }
   }
 
@@ -553,16 +553,16 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     switch (GetVectorType(x)) {
       case DATE_INT:
-        return AppendRange_Date_dispatch<int>(x, size);
+        return AppendRange_Date_dispatch<int>(x, size, offset);
 
       case DATE_DBL:
-        return AppendRange_Date_dispatch<double>(x, size);
+        return AppendRange_Date_dispatch<double>(x, size, offset);
 
       case POSIXCT:
-        return AppendRange_Posixct_dispatch(x, size);
+        return AppendRange_Posixct_dispatch(x, size, offset);
 
       default:
         break;
@@ -578,11 +578,12 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
 
  private:
   template <typename r_value_type>
-  Status AppendRange_Date_dispatch(SEXP x, int64_t size) {
+  Status AppendRange_Date_dispatch(SEXP x, int64_t size, int64_t offset) {
     if (ALTREP(x)) {
-      return AppendRange_Date(RVectorIterator_ALTREP<r_value_type>(x, 0), size);
+      return AppendRange_Date(RVectorIterator_ALTREP<r_value_type>(x, offset),
+                              size - offset);
     } else {
-      return AppendRange_Date(RVectorIterator<r_value_type>(x, 0), size);
+      return AppendRange_Date(RVectorIterator<r_value_type>(x, offset), size - offset);
     }
   }
 
@@ -602,11 +603,12 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     return VisitVector(it, size, append_null, append_value);
   }
 
-  Status AppendRange_Posixct_dispatch(SEXP x, int64_t size) {
+  Status AppendRange_Posixct_dispatch(SEXP x, int64_t size, int64_t offset) {
     if (ALTREP(x)) {
-      return AppendRange_Posixct(RVectorIterator_ALTREP<double>(x, 0), size);
+      return AppendRange_Posixct(RVectorIterator_ALTREP<double>(x, offset),
+                                 size - offset);
     } else {
-      return AppendRange_Posixct(RVectorIterator<double>(x, 0), size);
+      return AppendRange_Posixct(RVectorIterator<double>(x, offset), size - offset);
     }
   }
 
@@ -660,8 +662,8 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
     auto rtype = GetVectorType(x);
     if (rtype != TIME) {
       return Status::Invalid("Invalid conversion to time");
@@ -699,10 +701,11 @@ class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
     };
 
     if (ALTREP(x)) {
-      return VisitVector(RVectorIterator_ALTREP<double>(x, 0), size, append_null,
+      return VisitVector(RVectorIterator_ALTREP<double>(x, offset), size, append_null,
                          append_value);
     } else {
-      return VisitVector(RVectorIterator<double>(x, 0), size, append_null, append_value);
+      return VisitVector(RVectorIterator<double>(x, offset), size, append_null,
+                         append_value);
     }
   }
 
@@ -716,8 +719,8 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_timestamp_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
 
     RVectorType rtype = GetVectorType(x);
     if (rtype != POSIXCT) {
@@ -737,10 +740,11 @@ class RPrimitiveConverter<T, enable_if_t<is_timestamp_type<T>::value>>
     };
 
     if (ALTREP(x)) {
-      return VisitVector(RVectorIterator_ALTREP<double>(x, 0), size, append_null,
+      return VisitVector(RVectorIterator_ALTREP<double>(x, offset), size, append_null,
                          append_value);
     } else {
-      return VisitVector(RVectorIterator<double>(x, 0), size, append_null, append_value);
+      return VisitVector(RVectorIterator<double>(x, offset), size, append_null,
+                         append_value);
     }
   }
 
@@ -754,7 +758,7 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_decimal_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     return Status::NotImplemented("Extend");
   }
 };
@@ -786,8 +790,8 @@ class RPrimitiveConverter<T, enable_if_binary<T>>
  public:
   using OffsetType = typename T::offset_type;
 
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
     RETURN_NOT_OK(check_binary(x, size));
 
     auto append_null = [this]() {
@@ -801,7 +805,7 @@ class RPrimitiveConverter<T, enable_if_binary<T>>
       this->primitive_builder_->UnsafeAppend(RAW_RO(raw), static_cast<OffsetType>(n));
       return Status::OK();
     };
-    return VisitVector(RVectorIterator<SEXP>(x, 0), size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, offset), size, append_null, append_value);
   }
 
   void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
@@ -814,8 +818,8 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(this->Reserve(size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(this->Reserve(size - offset));
     RETURN_NOT_OK(check_binary(x, size));
 
     auto append_null = [this]() {
@@ -833,7 +837,7 @@ class RPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::v
       this->primitive_builder_->UnsafeAppend(RAW_RO(raw));
       return Status::OK();
     };
-    return VisitVector(RVectorIterator<SEXP>(x, 0), size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, offset), size, append_null, append_value);
   }
 
   void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
@@ -848,12 +852,12 @@ class RPrimitiveConverter<T, enable_if_string_like<T>>
  public:
   using OffsetType = typename T::offset_type;
 
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     RVectorType rtype = GetVectorType(x);
     if (rtype != STRING) {
       return Status::Invalid("Expecting a character vector");
     }
-    return UnsafeAppendUtf8Strings(arrow::r::utf8_strings(x), size);
+    return UnsafeAppendUtf8Strings(arrow::r::utf8_strings(x), size, offset);
   }
 
   void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
@@ -863,14 +867,14 @@ class RPrimitiveConverter<T, enable_if_string_like<T>>
   }
 
  private:
-  Status UnsafeAppendUtf8Strings(const cpp11::strings& s, int64_t size) {
+  Status UnsafeAppendUtf8Strings(const cpp11::strings& s, int64_t size, int64_t offset) {
     RETURN_NOT_OK(this->primitive_builder_->Reserve(s.size()));
     const SEXP* p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
 
     // we know all the R strings are utf8 already, so we can get
     // a definite size and then use UnsafeAppend*()
     int64_t total_length = 0;
-    for (R_xlen_t i = 0; i < size; i++, ++p_strings) {
+    for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
       SEXP si = *p_strings;
       total_length += si == NA_STRING ? 0 : LENGTH(si);
     }
@@ -878,7 +882,7 @@ class RPrimitiveConverter<T, enable_if_string_like<T>>
 
     // append
     p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
-    for (R_xlen_t i = 0; i < size; i++, ++p_strings) {
+    for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
       SEXP si = *p_strings;
       if (si == NA_STRING) {
         this->primitive_builder_->UnsafeAppendNull();
@@ -895,7 +899,7 @@ template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_duration_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     // TODO: look in lubridate
     return Status::NotImplemented("Extend");
   }
@@ -911,7 +915,7 @@ template <typename U>
 class RDictionaryConverter<U, enable_if_has_c_type<U>>
     : public DictionaryConverter<U, RConverter> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     return Status::NotImplemented("Extend");
   }
 };
@@ -922,14 +926,14 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
  public:
   using BuilderType = DictionaryBuilder<ValueType>;
 
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(ExtendSetup(x, size));
-    return ExtendImpl(x, size, GetCharLevels(x));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(ExtendSetup(x, size, offset));
+    return ExtendImpl(x, size, offset, GetCharLevels(x));
   }
 
   void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
     // the setup runs synchronously first
-    Status setup = ExtendSetup(values, size);
+    Status setup = ExtendSetup(values, size, /*offset=*/0);
 
     if (!setup.ok()) {
       // if that fails, propagate the error
@@ -938,7 +942,7 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
       auto char_levels = GetCharLevels(values);
 
       tasks.Append(true, [this, values, size, char_levels]() {
-        return this->ExtendImpl(values, size, char_levels);
+        return this->ExtendImpl(values, size, /*offset=*/0, char_levels);
       });
     }
   }
@@ -970,7 +974,7 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
     return char_levels;
   }
 
-  Status ExtendSetup(SEXP x, int64_t size) {
+  Status ExtendSetup(SEXP x, int64_t size, int64_t offset) {
     RVectorType rtype = GetVectorType(x);
     if (rtype != FACTOR) {
       return Status::Invalid("invalid R type to convert to dictionary");
@@ -982,17 +986,18 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
     RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*memo_array));
 
     // then we can proceed
-    return this->Reserve(size);
+    return this->Reserve(size - offset);
   }
 
-  Status ExtendImpl(SEXP values, int64_t size,
+  Status ExtendImpl(SEXP values, int64_t size, int64_t offset,
                     const std::vector<const char*>& char_levels) {
     auto append_null = [this]() { return this->value_builder_->AppendNull(); };
     auto append_value = [this, &char_levels](int value) {
       return this->value_builder_->Append(char_levels[value - 1]);
     };
 
-    return VisitVector(RVectorIterator<int>(values, 0), size, append_null, append_value);
+    return VisitVector(RVectorIterator<int>(values, offset), size, append_null,
+                       append_value);
   }
 };
 
@@ -1014,7 +1019,7 @@ struct RConverterTrait<T, enable_if_list_like<T>> {
 template <typename T>
 class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
     RETURN_NOT_OK(this->Reserve(size));
 
     RVectorType rtype = GetVectorType(x);
@@ -1034,7 +1039,7 @@ class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
       return this->value_converter_.get()->Extend(value, n);
     };
 
-    return VisitVector(RVectorIterator<SEXP>(x, 0), size, append_null, append_value);
+    return VisitVector(RVectorIterator<SEXP>(x, offset), size, append_null, append_value);
   }
 
   void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
@@ -1056,12 +1061,12 @@ struct RConverterTrait<StructType> {
 
 class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
  public:
-  Status Extend(SEXP x, int64_t size) override {
-    RETURN_NOT_OK(ExtendSetup(x, size));
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RETURN_NOT_OK(ExtendSetup(x, size, offset));
 
     auto fields = this->struct_type_->fields();
     R_xlen_t n_columns = XLENGTH(x);
-    for (R_xlen_t i = 0; i < n_columns; i++) {
+    for (R_xlen_t i = offset; i < n_columns; i++) {
       auto status = children_[i]->Extend(VECTOR_ELT(x, i), size);
       if (!status.ok()) {
         return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(),
@@ -1074,7 +1079,7 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
 
   void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
     // the setup runs synchronously first
-    Status setup = ExtendSetup(values, size);
+    Status setup = ExtendSetup(values, size, /*offset=*/0);
 
     if (!setup.ok()) {
       // if that fails, propagate the error
@@ -1095,7 +1100,7 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
     return StructConverter<RConverter, RConverterTrait>::Init(pool);
   }
 
-  Status ExtendSetup(SEXP x, int64_t size) {
+  Status ExtendSetup(SEXP x, int64_t size, int64_t offset) {
     // check that x is compatible
     R_xlen_t n_columns = XLENGTH(x);
 
@@ -1133,7 +1138,7 @@ class RStructConverter : public StructConverter<RConverter, RConverterTrait> {
       }
     }
 
-    RETURN_NOT_OK(this->Reserve(size));
+    RETURN_NOT_OK(this->Reserve(size - offset));
 
     for (R_xlen_t i = 0; i < size; i++) {
       RETURN_NOT_OK(struct_builder_->Append());

From da841cc761477e2ae7aba3b685731ebdbb820d9c Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 22 Jun 2021 20:15:19 +0200
Subject: [PATCH 445/719] ARROW-13135: [C++] Fix Status propagation from
 Parquet exception

Closes #10566 from pitrou/ARROW-13135-parquet-status-exception

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/file_parquet.cc      | 87 +++++++++++++---------
 cpp/src/arrow/dataset/file_parquet_test.cc |  2 +-
 cpp/src/arrow/dataset/test_util.h          |  4 +-
 cpp/src/parquet/exception.h                | 43 ++++++-----
 cpp/src/parquet/reader_test.cc             |  8 +-
 python/pyarrow/tests/parquet/test_basic.py | 16 +++-
 python/pyarrow/tests/test_hdfs.py          | 17 +++--
 7 files changed, 107 insertions(+), 70 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 0ebbd0a5333..8611cf89997 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -52,6 +52,8 @@ using parquet::arrow::SchemaField;
 using parquet::arrow::SchemaManifest;
 using parquet::arrow::StatisticsAsScalars;
 
+namespace {
+
 /// \brief A ScanTask backed by a parquet file and a RowGroup within a parquet file.
 class ParquetScanTask : public ScanTask {
  public:
@@ -128,7 +130,7 @@ class ParquetScanTask : public ScanTask {
   arrow::io::CacheOptions cache_options_;
 };
 
-static parquet::ReaderProperties MakeReaderProperties(
+parquet::ReaderProperties MakeReaderProperties(
     const ParquetFileFormat& format, ParquetFragmentScanOptions* parquet_scan_options,
     MemoryPool* pool = default_memory_pool()) {
   // Can't mutate pool after construction
@@ -144,7 +146,7 @@ static parquet::ReaderProperties MakeReaderProperties(
   return properties;
 }
 
-static parquet::ArrowReaderProperties MakeArrowReaderProperties(
+parquet::ArrowReaderProperties MakeArrowReaderProperties(
     const ParquetFileFormat& format, const parquet::FileMetaData& metadata) {
   parquet::ArrowReaderProperties properties(/* use_threads = */ false);
   for (const std::string& name : format.reader_options.dict_columns) {
@@ -155,7 +157,7 @@ static parquet::ArrowReaderProperties MakeArrowReaderProperties(
 }
 
 template <typename M>
-static Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
+Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
     const M& metadata, const parquet::ArrowReaderProperties& properties) {
   auto manifest = std::make_shared<SchemaManifest>();
   const std::shared_ptr<const ::arrow::KeyValueMetadata>& key_value_metadata = nullptr;
@@ -164,7 +166,7 @@ static Result<std::shared_ptr<SchemaManifest>> GetSchemaManifest(
   return manifest;
 }
 
-static util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
+util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
     const SchemaField& schema_field, const parquet::RowGroupMetaData& metadata) {
   // For the remaining of this function, failure to extract/parse statistics
   // are ignored by returning nullptr. The goal is two fold. First
@@ -214,8 +216,8 @@ static util::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
   return util::nullopt;
 }
 
-static void AddColumnIndices(const SchemaField& schema_field,
-                             std::vector<int>* column_projection) {
+void AddColumnIndices(const SchemaField& schema_field,
+                      std::vector<int>* column_projection) {
   if (schema_field.is_leaf()) {
     column_projection->push_back(schema_field.column_index);
   } else {
@@ -227,8 +229,8 @@ static void AddColumnIndices(const SchemaField& schema_field,
 }
 
 // Compute the column projection out of an optional arrow::Schema
-static std::vector<int> InferColumnProjection(const parquet::arrow::FileReader& reader,
-                                              const ScanOptions& options) {
+std::vector<int> InferColumnProjection(const parquet::arrow::FileReader& reader,
+                                       const ScanOptions& options) {
   auto manifest = reader.manifest();
   // Checks if the field is needed in either the projection or the filter.
   auto field_names = options.MaterializedFields();
@@ -253,6 +255,33 @@ static std::vector<int> InferColumnProjection(const parquet::arrow::FileReader&
   return columns_selection;
 }
 
+Status WrapSourceError(const Status& status, const std::string& path) {
+  return status.WithMessage("Could not open Parquet input source '", path,
+                            "': ", status.message());
+}
+
+Result<bool> IsSupportedParquetFile(const ParquetFileFormat& format,
+                                    const FileSource& source) {
+  BEGIN_PARQUET_CATCH_EXCEPTIONS
+  try {
+    ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+    ARROW_ASSIGN_OR_RAISE(
+        auto parquet_scan_options,
+        GetFragmentScanOptions<ParquetFragmentScanOptions>(
+            kParquetTypeName, nullptr, format.default_fragment_scan_options));
+    auto reader = parquet::ParquetFileReader::Open(
+        std::move(input), MakeReaderProperties(format, parquet_scan_options.get()));
+    std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
+    return metadata != nullptr && metadata->can_decompress();
+  } catch (const ::parquet::ParquetInvalidOrCorruptedFileException& e) {
+    ARROW_UNUSED(e);
+    return false;
+  }
+  END_PARQUET_CATCH_EXCEPTIONS
+}
+
+}  // namespace
+
 bool ParquetFileFormat::Equals(const FileFormat& other) const {
   if (other.type_name() != type_name()) return false;
 
@@ -270,24 +299,11 @@ ParquetFileFormat::ParquetFileFormat(const parquet::ReaderProperties& reader_pro
 }
 
 Result<bool> ParquetFileFormat::IsSupported(const FileSource& source) const {
-  try {
-    ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
-    ARROW_ASSIGN_OR_RAISE(auto parquet_scan_options,
-                          GetFragmentScanOptions<ParquetFragmentScanOptions>(
-                              kParquetTypeName, nullptr, default_fragment_scan_options));
-    auto reader = parquet::ParquetFileReader::Open(
-        std::move(input), MakeReaderProperties(*this, parquet_scan_options.get()));
-    std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
-    return metadata != nullptr && metadata->can_decompress();
-  } catch (const ::parquet::ParquetInvalidOrCorruptedFileException& e) {
-    ARROW_UNUSED(e);
-    return false;
-  } catch (const ::parquet::ParquetException& e) {
-    return Status::IOError("Could not open parquet input source '", source.path(),
-                           "': ", e.what());
+  auto maybe_is_supported = IsSupportedParquetFile(*this, source);
+  if (!maybe_is_supported.ok()) {
+    return WrapSourceError(maybe_is_supported.status(), source.path());
   }
-
-  return true;
+  return maybe_is_supported;
 }
 
 Result<std::shared_ptr<Schema>> ParquetFileFormat::Inspect(
@@ -307,14 +323,18 @@ Result<std::unique_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
   auto properties = MakeReaderProperties(*this, parquet_scan_options.get(), pool);
 
   ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
-  std::unique_ptr<parquet::ParquetFileReader> reader;
-  try {
-    reader = parquet::ParquetFileReader::Open(std::move(input), std::move(properties));
-  } catch (const ::parquet::ParquetException& e) {
-    return Status::IOError("Could not open parquet input source '", source.path(),
-                           "': ", e.what());
-  }
 
+  auto make_reader = [&]() -> Result<std::unique_ptr<parquet::ParquetFileReader>> {
+    BEGIN_PARQUET_CATCH_EXCEPTIONS
+    return parquet::ParquetFileReader::Open(std::move(input), std::move(properties));
+    END_PARQUET_CATCH_EXCEPTIONS
+  };
+
+  auto maybe_reader = std::move(make_reader)();
+  if (!maybe_reader.ok()) {
+    return WrapSourceError(maybe_reader.status(), source.path());
+  }
+  std::unique_ptr<parquet::ParquetFileReader> reader = *std::move(maybe_reader);
   std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
   auto arrow_properties = MakeArrowReaderProperties(*this, *metadata);
 
@@ -371,8 +391,7 @@ Future<std::shared_ptr<parquet::arrow::FileReader>> ParquetFileFormat::GetReader
       },
       [path](
           const Status& status) -> Result<std::shared_ptr<parquet::arrow::FileReader>> {
-        return status.WithMessage("Could not open Parquet input source '", path,
-                                  "': ", status.message());
+        return WrapSourceError(status, path);
       });
 }
 
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 2561bdb7c5d..04c86b1f16f 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -176,7 +176,7 @@ class TestParquetFileFormat : public FileFormatFixtureMixin<ParquetFormatHelper>
 };
 
 TEST_F(TestParquetFileFormat, InspectFailureWithRelevantError) {
-  TestInspectFailureWithRelevantError(StatusCode::IOError, "parquet");
+  TestInspectFailureWithRelevantError(StatusCode::Invalid, "Parquet");
 }
 TEST_F(TestParquetFileFormat, Inspect) { TestInspect(); }
 
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 6a0375073ee..66f5654fec4 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -445,13 +445,15 @@ class FileFormatFixtureMixin : public ::testing::Test {
                 "Error creating dataset. Could not read schema from '/herp/derp':"),
             ::testing::HasSubstr("Is this a '" + format_->type_name() + "' file?")));
   }
+
   void TestInspectFailureWithRelevantError(StatusCode code,
-                                           const std::string format_name) {
+                                           const std::string& format_name) {
     const std::vector<std::string> file_contents{"", "PAR0", "ASDFPAR1", "ARROW1"};
     for (const auto& contents : file_contents) {
       AssertInspectFailure(contents, code, format_name);
     }
   }
+
   void TestInspect() {
     auto reader = GetRecordBatchReader(schema({field("f64", float64())}));
     auto source = GetFileSource(reader.get());
diff --git a/cpp/src/parquet/exception.h b/cpp/src/parquet/exception.h
index bfd1bfd9422..826f5bdc8bf 100644
--- a/cpp/src/parquet/exception.h
+++ b/cpp/src/parquet/exception.h
@@ -33,23 +33,29 @@
 
 // Parquet exception to Arrow Status
 
-#define PARQUET_CATCH_NOT_OK(s)                          \
-  try {                                                  \
-    (s);                                                 \
-  } catch (const ::parquet::ParquetStatusException& e) { \
-    return e.status();                                   \
-  } catch (const ::parquet::ParquetException& e) {       \
-    return ::arrow::Status::IOError(e.what());           \
+#define BEGIN_PARQUET_CATCH_EXCEPTIONS try {
+#define END_PARQUET_CATCH_EXCEPTIONS                   \
+  }                                                    \
+  catch (const ::parquet::ParquetStatusException& e) { \
+    return e.status();                                 \
+  }                                                    \
+  catch (const ::parquet::ParquetException& e) {       \
+    return ::arrow::Status::IOError(e.what());         \
   }
 
-#define PARQUET_CATCH_AND_RETURN(s)                      \
-  try {                                                  \
-    return (s);                                          \
-  } catch (const ::parquet::ParquetStatusException& e) { \
-    return e.status();                                   \
-  } catch (const ::parquet::ParquetException& e) {       \
-    return ::arrow::Status::IOError(e.what());           \
-  }
+// clang-format off
+
+#define PARQUET_CATCH_NOT_OK(s)    \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS   \
+  (s);                             \
+  END_PARQUET_CATCH_EXCEPTIONS
+
+// clang-format on
+
+#define PARQUET_CATCH_AND_RETURN(s) \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS    \
+  return (s);                       \
+  END_PARQUET_CATCH_EXCEPTIONS
 
 // Arrow Status to Parquet exception
 
@@ -149,11 +155,4 @@ void ThrowNotOk(StatusReturnBlock&& b) {
   PARQUET_THROW_NOT_OK(b());
 }
 
-#define BEGIN_PARQUET_CATCH_EXCEPTIONS try {
-#define END_PARQUET_CATCH_EXCEPTIONS             \
-  }                                              \
-  catch (const ::parquet::ParquetException& e) { \
-    return ::arrow::Status::IOError(e.what());   \
-  }
-
 }  // namespace parquet
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 806ff2b9494..2d13266df22 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -688,17 +688,17 @@ TEST(TestFileReader, TestOpenErrors) {
           ::testing::HasSubstr("Couldn't deserialize thrift: No more data to read")));
 
   EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
-      IOError, ::testing::HasSubstr("Parquet file size is 0 bytes"), OpenBufferAsync(""));
+      Invalid, ::testing::HasSubstr("Parquet file size is 0 bytes"), OpenBufferAsync(""));
   EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
-      IOError, ::testing::HasSubstr("Parquet magic bytes not found"),
+      Invalid, ::testing::HasSubstr("Parquet magic bytes not found"),
       OpenBufferAsync("AAAAPAR0"));
   EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
-      IOError,
+      Invalid,
       ::testing::HasSubstr(
           "Parquet file size is 5 bytes, smaller than the minimum file footer"),
       OpenBufferAsync("APAR1"));
   EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
-      IOError,
+      Invalid,
       ::testing::HasSubstr(
           "Parquet file size is 8 bytes, smaller than the size reported by footer's"),
       OpenBufferAsync("\xFF\xFF\xFF\x0FPAR1"));
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index ebf9d44b3bb..adaa39f7ed6 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -278,13 +278,25 @@ def test_relative_paths(tempdir, use_legacy_dataset, filesystem):
     assert result.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_read_non_existing_file(use_legacy_dataset):
+def test_read_non_existing_file():
     # ensure we have a proper error message
     with pytest.raises(FileNotFoundError):
         pq.read_table('i-am-not-existing.parquet')
 
 
+def test_file_error_python_exception():
+    class BogusFile(io.BytesIO):
+        def read(self, *args):
+            raise ZeroDivisionError("zorglub")
+
+        def seek(self, *args):
+            raise ZeroDivisionError("zorglub")
+
+    # ensure the Python exception is restored
+    with pytest.raises(ZeroDivisionError, match="zorglub"):
+        pq.read_table(BogusFile(b""))
+
+
 @parametrize_legacy_dataset
 def test_parquet_read_from_buffer(tempdir, use_legacy_dataset):
     # reading from a buffer from python's open()
diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py
index e5b8d1a6106..c71353b45f0 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -39,6 +39,15 @@
 # HDFS tests
 
 
+def check_libhdfs_present():
+    if not pa.have_libhdfs():
+        message = 'No libhdfs available on system'
+        if os.environ.get('PYARROW_HDFS_TEST_LIBHDFS_REQUIRE'):
+            pytest.fail(message)
+        else:
+            pytest.skip(message)
+
+
 def hdfs_test_client():
     host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
     user = os.environ.get('ARROW_HDFS_TEST_USER', None)
@@ -382,12 +391,7 @@ class TestLibHdfs(HdfsTestCases, unittest.TestCase):
 
     @classmethod
     def check_driver(cls):
-        if not pa.have_libhdfs():
-            message = 'No libhdfs available on system'
-            if os.environ.get('PYARROW_HDFS_TEST_LIBHDFS_REQUIRE'):
-                pytest.fail(message)
-            else:
-                pytest.skip(message)
+        check_libhdfs_present()
 
     def test_orphaned_file(self):
         hdfs = hdfs_test_client()
@@ -418,6 +422,7 @@ def _get_hdfs_uri(path):
 def test_fastparquet_read_with_hdfs():
     from pandas.testing import assert_frame_equal
 
+    check_libhdfs_present()
     try:
         import snappy  # noqa
     except ImportError:

From 133b1a904bf7fc1d24343c306a2279e27d4ebe6d Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 22 Jun 2021 17:55:20 -0400
Subject: [PATCH 446/719] ARROW-10440: [C++][Dataset] Visit FileWriters before
 Finish

This enables collection of paths written to during writing of a FileSystemDataset

Closes #10573 from bkietz/10440-Add-a-callback-to-visit-f

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/dataset/file_base.cc    | 34 +++++++++++++++------------
 cpp/src/arrow/dataset/file_base.h     | 17 +++++++++++---
 cpp/src/arrow/dataset/file_csv.h      |  3 ++-
 cpp/src/arrow/dataset/file_ipc.cc     | 11 +++++----
 cpp/src/arrow/dataset/file_ipc.h      |  6 +++--
 cpp/src/arrow/dataset/file_parquet.cc | 16 ++++++++-----
 cpp/src/arrow/dataset/file_parquet.h  |  6 +++--
 cpp/src/arrow/dataset/file_test.cc    | 19 ++++++++-------
 cpp/src/arrow/dataset/test_util.h     | 21 +++++++++++++----
 9 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index b1cbd63ec61..741071d1703 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -418,7 +418,8 @@ class WriteQueue {
 
     ARROW_ASSIGN_OR_RAISE(
         writer_, write_options.format()->MakeWriter(std::move(destination), schema_,
-                                                    write_options.file_write_options));
+                                                    write_options.file_write_options,
+                                                    {write_options.filesystem, path}));
     return Status::OK();
   }
 
@@ -445,15 +446,15 @@ struct WriteState {
   std::unordered_map<std::string, std::unique_ptr<WriteQueue>> queues;
 };
 
-Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragment,
+Status WriteNextBatch(WriteState* state, const std::shared_ptr<Fragment>& fragment,
                       std::shared_ptr<RecordBatch> batch) {
-  ARROW_ASSIGN_OR_RAISE(auto groups, state.write_options.partitioning->Partition(batch));
+  ARROW_ASSIGN_OR_RAISE(auto groups, state->write_options.partitioning->Partition(batch));
   batch.reset();  // drop to hopefully conserve memory
 
-  if (groups.batches.size() > static_cast<size_t>(state.write_options.max_partitions)) {
+  if (groups.batches.size() > static_cast<size_t>(state->write_options.max_partitions)) {
     return Status::Invalid("Fragment would be written into ", groups.batches.size(),
                            " partitions. This exceeds the maximum of ",
-                           state.write_options.max_partitions);
+                           state->write_options.max_partitions);
   }
 
   std::unordered_set<WriteQueue*> need_flushed;
@@ -462,20 +463,20 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragme
         and_(std::move(groups.expressions[i]), fragment->partition_expression());
     auto batch = std::move(groups.batches[i]);
 
-    ARROW_ASSIGN_OR_RAISE(auto part,
-                          state.write_options.partitioning->Format(partition_expression));
+    ARROW_ASSIGN_OR_RAISE(
+        auto part, state->write_options.partitioning->Format(partition_expression));
 
     WriteQueue* queue;
     {
       // lookup the queue to which batch should be appended
-      auto queues_lock = state.mutex.Lock();
+      auto queues_lock = state->mutex.Lock();
 
       queue = internal::GetOrInsertGenerated(
-                  &state.queues, std::move(part),
+                  &state->queues, std::move(part),
                   [&](const std::string& emplaced_part) {
                     // lookup in `queues` also failed,
                     // generate a new WriteQueue
-                    size_t queue_index = state.queues.size() - 1;
+                    size_t queue_index = state->queues.size() - 1;
 
                     return internal::make_unique<WriteQueue>(emplaced_part, queue_index,
                                                              batch->schema());
@@ -489,12 +490,12 @@ Status WriteNextBatch(WriteState& state, const std::shared_ptr<Fragment>& fragme
 
   // flush all touched WriteQueues
   for (auto queue : need_flushed) {
-    RETURN_NOT_OK(queue->Flush(state.write_options));
+    RETURN_NOT_OK(queue->Flush(state->write_options));
   }
   return Status::OK();
 }
 
-Status WriteInternal(const ScanOptions& scan_options, WriteState& state,
+Status WriteInternal(const ScanOptions& scan_options, WriteState* state,
                      ScanTaskVector scan_tasks) {
   // Store a mapping from partitions (represened by their formatted partition expressions)
   // to a WriteQueue which flushes batches into that partition's output file. In principle
@@ -544,7 +545,7 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 #pragma warning(disable : 4996)
 #endif
 
-  // TODO: (ARROW-11782/ARROW-12288) Remove calls to Scan()
+  // TODO(ARROW-11782/ARROW-12288) Remove calls to Scan()
   ARROW_ASSIGN_OR_RAISE(auto scan_task_it, scanner->Scan());
   ARROW_ASSIGN_OR_RAISE(ScanTaskVector scan_tasks, scan_task_it.ToVector());
 
@@ -555,11 +556,14 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
 #endif
 
   WriteState state(write_options);
-  RETURN_NOT_OK(WriteInternal(*scanner->options(), state, std::move(scan_tasks)));
+  RETURN_NOT_OK(WriteInternal(*scanner->options(), &state, std::move(scan_tasks)));
 
   auto task_group = scanner->options()->TaskGroup();
   for (const auto& part_queue : state.queues) {
-    task_group->Append([&] { return part_queue.second->writer()->Finish(); });
+    task_group->Append([&] {
+      RETURN_NOT_OK(write_options.writer_pre_finish(part_queue.second->writer().get()));
+      return part_queue.second->writer()->Finish();
+    });
   }
   return task_group->Finish();
 }
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index dd47b1226f4..f074e0f81da 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -175,7 +175,8 @@ class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileForma
   /// \brief Create a writer for this format.
   virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const = 0;
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const = 0;
 
   /// \brief Get default write options for this format.
   virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() = 0;
@@ -313,19 +314,23 @@ class ARROW_DS_EXPORT FileWriter {
   const std::shared_ptr<FileFormat>& format() const { return options_->format(); }
   const std::shared_ptr<Schema>& schema() const { return schema_; }
   const std::shared_ptr<FileWriteOptions>& options() const { return options_; }
+  const fs::FileLocator& destination() const { return destination_locator_; }
 
  protected:
   FileWriter(std::shared_ptr<Schema> schema, std::shared_ptr<FileWriteOptions> options,
-             std::shared_ptr<io::OutputStream> destination)
+             std::shared_ptr<io::OutputStream> destination,
+             fs::FileLocator destination_locator)
       : schema_(std::move(schema)),
         options_(std::move(options)),
-        destination_(destination) {}
+        destination_(std::move(destination)),
+        destination_locator_(std::move(destination_locator)) {}
 
   virtual Status FinishInternal() = 0;
 
   std::shared_ptr<Schema> schema_;
   std::shared_ptr<FileWriteOptions> options_;
   std::shared_ptr<io::OutputStream> destination_;
+  fs::FileLocator destination_locator_;
 };
 
 /// \brief Options for writing a dataset.
@@ -349,6 +354,12 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   /// {i} will be replaced by an auto incremented integer.
   std::string basename_template;
 
+  /// Callback to be invoked against all FileWriters before
+  /// they are finalized with FileWriter::Finish().
+  std::function<Status(FileWriter*)> writer_pre_finish = [](FileWriter*) {
+    return Status::OK();
+  };
+
   const std::shared_ptr<FileFormat>& format() const {
     return file_write_options->format();
   }
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index f6636285c92..a365f7eac2b 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -67,7 +67,8 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override {
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
     return Status::NotImplemented("writing fragment of CsvFileFormat");
   }
 
diff --git a/cpp/src/arrow/dataset/file_ipc.cc b/cpp/src/arrow/dataset/file_ipc.cc
index 2032f03d28f..40f5d3e8e0d 100644
--- a/cpp/src/arrow/dataset/file_ipc.cc
+++ b/cpp/src/arrow/dataset/file_ipc.cc
@@ -258,7 +258,8 @@ std::shared_ptr<FileWriteOptions> IpcFileFormat::DefaultWriteOptions() {
 
 Result<std::shared_ptr<FileWriter>> IpcFileFormat::MakeWriter(
     std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-    std::shared_ptr<FileWriteOptions> options) const {
+    std::shared_ptr<FileWriteOptions> options,
+    fs::FileLocator destination_locator) const {
   if (!Equals(*options->format())) {
     return Status::TypeError("Mismatching format/write options.");
   }
@@ -274,14 +275,16 @@ Result<std::shared_ptr<FileWriter>> IpcFileFormat::MakeWriter(
 
   return std::shared_ptr<FileWriter>(
       new IpcFileWriter(std::move(destination), std::move(writer), std::move(schema),
-                        std::move(ipc_options)));
+                        std::move(ipc_options), std::move(destination_locator)));
 }
 
 IpcFileWriter::IpcFileWriter(std::shared_ptr<io::OutputStream> destination,
                              std::shared_ptr<ipc::RecordBatchWriter> writer,
                              std::shared_ptr<Schema> schema,
-                             std::shared_ptr<IpcFileWriteOptions> options)
-    : FileWriter(std::move(schema), std::move(options), std::move(destination)),
+                             std::shared_ptr<IpcFileWriteOptions> options,
+                             fs::FileLocator destination_locator)
+    : FileWriter(std::move(schema), std::move(options), std::move(destination),
+                 std::move(destination_locator)),
       batch_writer_(std::move(writer)) {}
 
 Status IpcFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
diff --git a/cpp/src/arrow/dataset/file_ipc.h b/cpp/src/arrow/dataset/file_ipc.h
index deff26c6f95..ef78515221c 100644
--- a/cpp/src/arrow/dataset/file_ipc.h
+++ b/cpp/src/arrow/dataset/file_ipc.h
@@ -67,7 +67,8 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override;
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
 
   std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
@@ -107,7 +108,8 @@ class ARROW_DS_EXPORT IpcFileWriter : public FileWriter {
   IpcFileWriter(std::shared_ptr<io::OutputStream> destination,
                 std::shared_ptr<ipc::RecordBatchWriter> writer,
                 std::shared_ptr<Schema> schema,
-                std::shared_ptr<IpcFileWriteOptions> options);
+                std::shared_ptr<IpcFileWriteOptions> options,
+                fs::FileLocator destination_locator);
 
   Status FinishInternal() override;
 
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 8611cf89997..9e29926e837 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -70,7 +70,7 @@ class ParquetScanTask : public ScanTask {
         reader_(std::move(reader)),
         pre_buffer_once_(std::move(pre_buffer_once)),
         pre_buffer_row_groups_(std::move(pre_buffer_row_groups)),
-        io_context_(io_context),
+        io_context_(std::move(io_context)),
         cache_options_(cache_options) {}
 
   Result<RecordBatchIterator> Execute() override {
@@ -540,7 +540,8 @@ std::shared_ptr<FileWriteOptions> ParquetFileFormat::DefaultWriteOptions() {
 
 Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
     std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-    std::shared_ptr<FileWriteOptions> options) const {
+    std::shared_ptr<FileWriteOptions> options,
+    fs::FileLocator destination_locator) const {
   if (!Equals(*options->format())) {
     return Status::TypeError("Mismatching format/write options");
   }
@@ -552,14 +553,17 @@ Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
       *schema, default_memory_pool(), destination, parquet_options->writer_properties,
       parquet_options->arrow_writer_properties, &parquet_writer));
 
-  return std::shared_ptr<FileWriter>(new ParquetFileWriter(
-      std::move(destination), std::move(parquet_writer), std::move(parquet_options)));
+  return std::shared_ptr<FileWriter>(
+      new ParquetFileWriter(std::move(destination), std::move(parquet_writer),
+                            std::move(parquet_options), std::move(destination_locator)));
 }
 
 ParquetFileWriter::ParquetFileWriter(std::shared_ptr<io::OutputStream> destination,
                                      std::shared_ptr<parquet::arrow::FileWriter> writer,
-                                     std::shared_ptr<ParquetFileWriteOptions> options)
-    : FileWriter(writer->schema(), std::move(options), std::move(destination)),
+                                     std::shared_ptr<ParquetFileWriteOptions> options,
+                                     fs::FileLocator destination_locator)
+    : FileWriter(writer->schema(), std::move(options), std::move(destination),
+                 std::move(destination_locator)),
       parquet_writer_(std::move(writer)) {}
 
 Status ParquetFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 347f4032046..da4fd58ebbe 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -128,7 +128,8 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override;
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
 
   std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
@@ -252,7 +253,8 @@ class ARROW_DS_EXPORT ParquetFileWriter : public FileWriter {
  private:
   ParquetFileWriter(std::shared_ptr<io::OutputStream> destination,
                     std::shared_ptr<parquet::arrow::FileWriter> writer,
-                    std::shared_ptr<ParquetFileWriteOptions> options);
+                    std::shared_ptr<ParquetFileWriteOptions> options,
+                    fs::FileLocator destination_locator);
 
   Status FinishInternal() override;
 
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index 839b48a0e64..b80d1bb57f0 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -87,22 +87,23 @@ constexpr int kNumScanTasks = 2;
 constexpr int kBatchesPerScanTask = 2;
 constexpr int kRowsPerBatch = 1024;
 class MockFileFormat : public FileFormat {
-  virtual std::string type_name() const { return "mock"; }
-  virtual bool Equals(const FileFormat& other) const { return false; }
-  virtual Result<bool> IsSupported(const FileSource& source) const { return true; }
-  virtual Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const {
+  std::string type_name() const override { return "mock"; }
+  bool Equals(const FileFormat& other) const override { return false; }
+  Result<bool> IsSupported(const FileSource& source) const override { return true; }
+  Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override {
     return Status::NotImplemented("Not needed for test");
   }
-  virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const {
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
     return Status::NotImplemented("Not needed for test");
   }
-  virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() { return nullptr; }
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return nullptr; }
 
-  virtual Result<ScanTaskIterator> ScanFile(
+  Result<ScanTaskIterator> ScanFile(
       const std::shared_ptr<ScanOptions>& options,
-      const std::shared_ptr<FileFragment>& file) const {
+      const std::shared_ptr<FileFragment>& file) const override {
     auto sch = schema({field("i32", int32())});
     ScanTaskVector scan_tasks;
     for (int i = 0; i < kNumScanTasks; i++) {
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 66f5654fec4..42704fea9b5 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -486,7 +486,7 @@ class FileFormatFixtureMixin : public ::testing::Test {
     EXPECT_OK_AND_ASSIGN(auto sink, GetFileSink());
 
     if (!options) options = format->DefaultWriteOptions();
-    EXPECT_OK_AND_ASSIGN(auto writer, format->MakeWriter(sink, schema, options));
+    EXPECT_OK_AND_ASSIGN(auto writer, format->MakeWriter(sink, schema, options, {}));
     ARROW_EXPECT_OK(writer->Write(GetRecordBatchReader(schema).get()));
     ARROW_EXPECT_OK(writer->Finish());
     EXPECT_OK_AND_ASSIGN(auto written, sink->Finish());
@@ -722,7 +722,8 @@ class DummyFileFormat : public FileFormat {
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override {
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
     return Status::NotImplemented("writing fragment of DummyFileFormat");
   }
 
@@ -770,7 +771,8 @@ class JSONRecordBatchFileFormat : public FileFormat {
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
-      std::shared_ptr<FileWriteOptions> options) const override {
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override {
     return Status::NotImplemented("writing fragment of JSONRecordBatchFileFormat");
   }
 
@@ -1057,8 +1059,12 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   void SetWriteOptions(std::shared_ptr<FileWriteOptions> file_write_options) {
     write_options_.file_write_options = file_write_options;
     write_options_.filesystem = fs_;
-    write_options_.base_dir = "new_root/";
+    write_options_.base_dir = "/new_root/";
     write_options_.basename_template = "dat_{i}";
+    write_options_.writer_pre_finish = [this](FileWriter* writer) {
+      visited_paths_.push_back(writer->destination().path);
+      return Status::OK();
+    };
   }
 
   void DoWrite(std::shared_ptr<Partitioning> desired_partitioning) {
@@ -1210,11 +1216,17 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
     for (const auto& file_contents : expected_files_) {
       expected_paths.insert(file_contents.first);
     }
+
+    // expect the written filesystem to contain precisely the paths we expected
     for (auto path : checked_pointer_cast<FileSystemDataset>(written_)->files()) {
       actual_paths.insert(std::move(path));
     }
     EXPECT_THAT(actual_paths, testing::UnorderedElementsAreArray(expected_paths));
 
+    // Additionally, the writer producing each written file was visited and its path
+    // collected. That should match the expected paths as well
+    EXPECT_THAT(visited_paths_, testing::UnorderedElementsAreArray(expected_paths));
+
     ASSERT_OK_AND_ASSIGN(auto written_fragments_it, written_->GetFragments());
     for (auto maybe_fragment : written_fragments_it) {
       ASSERT_OK_AND_ASSIGN(auto fragment, maybe_fragment);
@@ -1257,6 +1269,7 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
   PathAndContent expected_files_;
   std::shared_ptr<Schema> expected_physical_schema_;
   std::shared_ptr<Dataset> written_;
+  std::vector<std::string> visited_paths_;
   FileSystemDatasetWriteOptions write_options_;
   std::shared_ptr<ScanOptions> scan_options_;
 };

From bd60dbcbca2b6aa384395d83ac86edd904ebb614 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 23 Jun 2021 12:40:06 +0200
Subject: [PATCH 447/719] ARROW-12790: [C++] Improve HadoopFileSystem
 conformance

* Ensure the HadoopFileSystem meets most requirements from the FileSystem API.
* Implement HadoopFileSystem::CopyFile.
* Enable generic filesystem tests for HadoopFileSystem.
* Add generic filesystem test for special characters.

Closes #10574 from pitrou/ARROW-12790-hdfs-special-chars

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/hdfs.cc      | 92 ++++++++++++++++++---------
 cpp/src/arrow/filesystem/hdfs_test.cc | 67 +++++++++++++++----
 cpp/src/arrow/filesystem/test_util.cc | 49 +++++++++++---
 cpp/src/arrow/filesystem/test_util.h  |  9 ++-
 cpp/src/arrow/io/hdfs.cc              | 59 ++++++++++++++---
 cpp/src/arrow/io/hdfs.h               |  4 ++
 python/pyarrow/tests/test_fs.py       | 29 ++-------
 7 files changed, 226 insertions(+), 83 deletions(-)

diff --git a/cpp/src/arrow/filesystem/hdfs.cc b/cpp/src/arrow/filesystem/hdfs.cc
index 77433172f54..c6396deac05 100644
--- a/cpp/src/arrow/filesystem/hdfs.cc
+++ b/cpp/src/arrow/filesystem/hdfs.cc
@@ -106,12 +106,18 @@ class HadoopFileSystem::Impl {
       return st;
     }
     for (const auto& child_path_info : children) {
-      // HDFS returns an absolute URI here, need to extract path relative to wd
-      Uri uri;
-      RETURN_NOT_OK(uri.Parse(child_path_info.name));
-      std::string child_path = uri.path();
+      // HDFS returns an absolute "URI" here, need to extract path relative to wd
+      // XXX: unfortunately, this is not a real URI as special characters
+      // are not %-escaped... hence parsing it as URI would fail.
+      std::string child_path;
       if (!wd.empty()) {
-        ARROW_ASSIGN_OR_RAISE(child_path, MakeAbstractPathRelative(wd, child_path));
+        if (child_path_info.name.substr(0, wd.length()) != wd) {
+          return Status::IOError("HDFS returned path '", child_path_info.name,
+                                 "' that is not a child of '", wd, "'");
+        }
+        child_path = child_path_info.name.substr(wd.length());
+      } else {
+        child_path = child_path_info.name;
       }
 
       FileInfo info;
@@ -134,21 +140,39 @@ class HadoopFileSystem::Impl {
     }
     std::vector<FileInfo> results;
 
+    // Fetch working directory.
+    // If select.base_dir is relative, we need to trim it from the start
+    // of paths returned by ListDirectory.
+    // If select.base_dir is absolute, we need to trim the "URI authority"
+    // portion of the working directory.
     std::string wd;
-    if (select.base_dir.empty() || select.base_dir.front() != '/') {
-      // Fetch working directory, because we need to trim it from the start
-      // of paths returned by ListDirectory as select.base_dir is relative.
-      RETURN_NOT_OK(client_->GetWorkingDirectory(&wd));
-      Uri wd_uri;
-      RETURN_NOT_OK(wd_uri.Parse(wd));
-      wd = wd_uri.path();
+    RETURN_NOT_OK(client_->GetWorkingDirectory(&wd));
+
+    if (!select.base_dir.empty() && select.base_dir.front() == '/') {
+      // base_dir is absolute, only keep the URI authority portion.
+      // As mentioned in StatSelector() above, the URI may contain unescaped
+      // special chars and therefore may not be a valid URI, so we parse by hand.
+      auto pos = wd.find("://");  // start of host:port portion
+      if (pos == std::string::npos) {
+        return Status::IOError("Unexpected HDFS working directory URI: ", wd);
+      }
+      pos = wd.find("/", pos + 3);  // end of host:port portion
+      if (pos == std::string::npos) {
+        return Status::IOError("Unexpected HDFS working directory URI: ", wd);
+      }
+      wd = wd.substr(0, pos);  // keep up until host:port (included)
+    } else if (!wd.empty() && wd.back() != '/') {
+      // For a relative lookup, trim leading slashes
+      wd += '/';
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(select.base_dir));
-    if (info.type() == FileType::File) {
-      return Status::Invalid(
-          "GetFileInfo expects base_dir of selector to be a directory, while '",
-          select.base_dir, "' is a file");
+    if (!select.base_dir.empty()) {
+      ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(select.base_dir));
+      if (info.type() == FileType::File) {
+        return Status::IOError(
+            "GetFileInfo expects base_dir of selector to be a directory, but '",
+            select.base_dir, "' is a file");
+      }
     }
     RETURN_NOT_OK(StatSelector(wd, select.base_dir, select, 0, &results));
     return results;
@@ -178,6 +202,10 @@ class HadoopFileSystem::Impl {
   }
 
   Status DeleteDirContents(const std::string& path) {
+    if (!IsDirectory(path)) {
+      return Status::IOError("Cannot delete contents of directory '", path,
+                             "': not a directory");
+    }
     std::vector<std::string> file_list;
     RETURN_NOT_OK(client_->GetChildren(path, &file_list));
     for (auto file : file_list) {
@@ -195,13 +223,17 @@ class HadoopFileSystem::Impl {
   }
 
   Status Move(const std::string& src, const std::string& dest) {
-    RETURN_NOT_OK(client_->Rename(src, dest));
-    return Status::OK();
+    auto st = client_->Rename(src, dest);
+    if (st.IsIOError() && IsFile(src) && IsFile(dest)) {
+      // Allow file -> file clobber
+      RETURN_NOT_OK(client_->Delete(dest));
+      st = client_->Rename(src, dest);
+    }
+    return st;
   }
 
   Status CopyFile(const std::string& src, const std::string& dest) {
-    // TODO implement this (but only if HDFS supports on-server copy)
-    return Status::NotImplemented("HadoopFileSystem::CopyFile is not supported yet");
+    return client_->Copy(src, dest);
   }
 
   Result<std::shared_ptr<io::InputStream>> OpenInputStream(const std::string& path) {
@@ -253,14 +285,16 @@ class HadoopFileSystem::Impl {
 
   bool IsDirectory(const std::string& path) {
     io::HdfsPathInfo info;
-    Status status = client_->GetPathInfo(path, &info);
-    if (!status.ok()) {
-      return false;
-    }
-    if (info.kind == io::ObjectType::DIRECTORY) {
-      return true;
-    }
-    return false;
+    return GetPathInfo(path, &info) && info.kind == io::ObjectType::DIRECTORY;
+  }
+
+  bool IsFile(const std::string& path) {
+    io::HdfsPathInfo info;
+    return GetPathInfo(path, &info) && info.kind == io::ObjectType::FILE;
+  }
+
+  bool GetPathInfo(const std::string& path, io::HdfsPathInfo* info) {
+    return client_->GetPathInfo(path, info).ok();
   }
 
   TimePoint ToTimePoint(int secs) {
diff --git a/cpp/src/arrow/filesystem/hdfs_test.cc b/cpp/src/arrow/filesystem/hdfs_test.cc
index 8215455613a..498549b85f0 100644
--- a/cpp/src/arrow/filesystem/hdfs_test.cc
+++ b/cpp/src/arrow/filesystem/hdfs_test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <chrono>
 #include <memory>
 #include <sstream>
 #include <string>
@@ -66,9 +67,9 @@ TEST(TestHdfsOptions, FromUri) {
   ASSERT_EQ(options.connection_config.user, "");
 }
 
-class TestHadoopFileSystem : public ::testing::Test {
+class HadoopFileSystemTestMixin {
  public:
-  void SetUp() override {
+  void MakeFileSystem() {
     const char* host = std::getenv("ARROW_HDFS_TEST_HOST");
     const char* port = std::getenv("ARROW_HDFS_TEST_PORT");
     const char* user = std::getenv("ARROW_HDFS_TEST_USER");
@@ -91,9 +92,19 @@ class TestHadoopFileSystem : public ::testing::Test {
       return;
     }
     loaded_driver_ = true;
-    fs_ = std::make_shared<SubTreeFileSystem>("", *result);
+    fs_ = *result;
   }
 
+ protected:
+  HdfsOptions options_;
+  bool loaded_driver_ = false;
+  std::shared_ptr<FileSystem> fs_;
+};
+
+class TestHadoopFileSystem : public ::testing::Test, public HadoopFileSystemTestMixin {
+ public:
+  void SetUp() override { MakeFileSystem(); }
+
   void TestFileSystemFromUri() {
     std::stringstream ss;
     ss << "hdfs://" << options_.connection_config.host << ":"
@@ -176,17 +187,11 @@ class TestHadoopFileSystem : public ::testing::Test {
     ASSERT_OK(fs_->DeleteDir(base_dir + "AB"));
     AssertFileInfo(fs_.get(), base_dir + "AB", FileType::NotFound);
   }
-
- protected:
-  std::shared_ptr<FileSystem> fs_;
-  HdfsOptions options_;
-  bool loaded_driver_ = false;
 };
 
-#define SKIP_IF_NO_DRIVER()                           \
-  if (!this->loaded_driver_) {                        \
-    ARROW_LOG(INFO) << "Driver not loaded, skipping"; \
-    return;                                           \
+#define SKIP_IF_NO_DRIVER()                        \
+  if (!this->loaded_driver_) {                     \
+    GTEST_SKIP() << "Driver not loaded, skipping"; \
   }
 
 TEST_F(TestHadoopFileSystem, CreateDirDeleteDir) {
@@ -308,5 +313,43 @@ TEST_F(TestHadoopFileSystem, FileSystemFromUri) {
   this->TestFileSystemFromUri();
 }
 
+class TestHadoopFileSystemGeneric : public ::testing::Test,
+                                    public HadoopFileSystemTestMixin,
+                                    public GenericFileSystemTest {
+ public:
+  void SetUp() override {
+    MakeFileSystem();
+    SKIP_IF_NO_DRIVER();
+    timestamp_ =
+        static_cast<int64_t>(std::chrono::time_point_cast<std::chrono::nanoseconds>(
+                                 std::chrono::steady_clock::now())
+                                 .time_since_epoch()
+                                 .count());
+  }
+
+ protected:
+  bool allow_write_file_over_dir() const override { return true; }
+  bool allow_move_dir_over_non_empty_dir() const override { return true; }
+  bool have_implicit_directories() const override { return true; }
+  bool allow_append_to_new_file() const override { return false; }
+
+  std::shared_ptr<FileSystem> GetEmptyFileSystem() override {
+    // Since the HDFS contents are kept persistently between test runs,
+    // make sure each test gets a pristine fresh directory.
+    std::stringstream ss;
+    ss << "GenericTest" << timestamp_ << "-" << test_num_++;
+    const auto subdir = ss.str();
+    ARROW_EXPECT_OK(fs_->CreateDir(subdir));
+    return std::make_shared<SubTreeFileSystem>(subdir, fs_);
+  }
+
+  static int test_num_;
+  int64_t timestamp_;
+};
+
+int TestHadoopFileSystemGeneric::test_num_ = 1;
+
+GENERIC_FS_TEST_FUNCTIONS(TestHadoopFileSystemGeneric);
+
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index be9d99d72b8..4fe073c0aa0 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -452,20 +452,28 @@ void GenericFileSystemTest::TestMoveDir(FileSystem* fs) {
   AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
   AssertAllFiles(fs, {"EF/ghi", "KL/CD/def", "KL/abc"});
 
-  // Destination is a non-empty directory
-  ASSERT_RAISES(IOError, fs->Move("KL", "EF"));
-  AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
-  AssertAllFiles(fs, {"EF/ghi", "KL/CD/def", "KL/abc"});
-
   // Cannot move directory inside itself
   ASSERT_RAISES(IOError, fs->Move("KL", "KL/ZZ"));
 
-  // (other errors tested in TestMoveFile)
-
   // Contents didn't change
   AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
   AssertFileContents(fs, "KL/abc", "abc data");
   AssertFileContents(fs, "KL/CD/def", "def data");
+
+  // Destination is a non-empty directory
+  if (!allow_move_dir_over_non_empty_dir()) {
+    ASSERT_RAISES(IOError, fs->Move("KL", "EF"));
+    AssertAllDirs(fs, {"EF", "KL", "KL/CD"});
+    AssertAllFiles(fs, {"EF/ghi", "KL/CD/def", "KL/abc"});
+  } else {
+    // In some filesystems such as HDFS, this operation is interpreted
+    // as with the Unix `mv` command, i.e. move KL *inside* EF.
+    ASSERT_OK(fs->Move("KL", "EF"));
+    AssertAllDirs(fs, {"EF", "EF/KL", "EF/KL/CD"});
+    AssertAllFiles(fs, {"EF/KL/CD/def", "EF/KL/abc", "EF/ghi"});
+  }
+
+  // (other errors tested in TestMoveFile)
 }
 
 void GenericFileSystemTest::TestCopyFile(FileSystem* fs) {
@@ -888,7 +896,11 @@ void GenericFileSystemTest::TestOpenAppendStream(FileSystem* fs) {
 
   std::shared_ptr<io::OutputStream> stream;
 
-  ASSERT_OK_AND_ASSIGN(stream, fs->OpenAppendStream("abc"));
+  if (allow_append_to_new_file()) {
+    ASSERT_OK_AND_ASSIGN(stream, fs->OpenAppendStream("abc"));
+  } else {
+    ASSERT_OK_AND_ASSIGN(stream, fs->OpenOutputStream("abc"));
+  }
   ASSERT_OK_AND_EQ(0, stream->Tell());
   ASSERT_OK(stream->Write("some "));
   ASSERT_OK(stream->Write(Buffer::FromString("data")));
@@ -1050,6 +1062,26 @@ void GenericFileSystemTest::TestOpenInputFileWithFileInfo(FileSystem* fs) {
   ASSERT_RAISES(IOError, fs->OpenInputFile(info));
 }
 
+void GenericFileSystemTest::TestSpecialChars(FileSystem* fs) {
+  ASSERT_OK(fs->CreateDir("Blank Char"));
+  CreateFile(fs, "Blank Char/Special%Char.txt", "data");
+  std::vector<std::string> all_dirs{"Blank Char"};
+
+  AssertAllDirs(fs, all_dirs);
+  AssertAllFiles(fs, {"Blank Char/Special%Char.txt"});
+  AssertFileContents(fs, "Blank Char/Special%Char.txt", "data");
+
+  ASSERT_OK(fs->CopyFile("Blank Char/Special%Char.txt", "Special and%different.txt"));
+  AssertAllDirs(fs, all_dirs);
+  AssertAllFiles(fs, {"Blank Char/Special%Char.txt", "Special and%different.txt"});
+  AssertFileContents(fs, "Special and%different.txt", "data");
+
+  ASSERT_OK(fs->DeleteFile("Special and%different.txt"));
+  ASSERT_OK(fs->DeleteDir("Blank Char"));
+  AssertAllDirs(fs, {});
+  AssertAllFiles(fs, {});
+}
+
 #define GENERIC_FS_TEST_DEFINE(FUNC_NAME) \
   void GenericFileSystemTest::FUNC_NAME() { FUNC_NAME(GetEmptyFileSystem().get()); }
 
@@ -1078,6 +1110,7 @@ GENERIC_FS_TEST_DEFINE(TestOpenInputStreamAsync)
 GENERIC_FS_TEST_DEFINE(TestOpenInputFile)
 GENERIC_FS_TEST_DEFINE(TestOpenInputFileWithFileInfo)
 GENERIC_FS_TEST_DEFINE(TestOpenInputFileAsync)
+GENERIC_FS_TEST_DEFINE(TestSpecialChars)
 
 #undef GENERIC_FS_TEST_DEFINE
 
diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h
index 64577e1c60a..917a768084c 100644
--- a/cpp/src/arrow/filesystem/test_util.h
+++ b/cpp/src/arrow/filesystem/test_util.h
@@ -121,6 +121,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestOpenInputFile();
   void TestOpenInputFileWithFileInfo();
   void TestOpenInputFileAsync();
+  void TestSpecialChars();
 
  protected:
   // This function should return the filesystem under test.
@@ -134,8 +135,12 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   virtual bool allow_write_file_over_dir() const { return false; }
   // - Whether the filesystem allows moving a directory
   virtual bool allow_move_dir() const { return true; }
+  // - Whether the filesystem allows moving a directory "over" a non-empty destination
+  virtual bool allow_move_dir_over_non_empty_dir() const { return false; }
   // - Whether the filesystem allows appending to a file
   virtual bool allow_append_to_file() const { return true; }
+  // - Whether the filesystem allows appending to a new (not existent yet) file
+  virtual bool allow_append_to_new_file() const { return true; }
   // - Whether the filesystem supports directory modification times
   virtual bool have_directory_mtimes() const { return true; }
   // - Whether some directory tree deletion tests may fail randomly
@@ -168,6 +173,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   void TestOpenInputFile(FileSystem* fs);
   void TestOpenInputFileWithFileInfo(FileSystem* fs);
   void TestOpenInputFileAsync(FileSystem* fs);
+  void TestSpecialChars(FileSystem* fs);
 };
 
 #define GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, NAME) \
@@ -198,7 +204,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputStreamAsync)             \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFile)                    \
   GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFileWithFileInfo)        \
-  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFileAsync)
+  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, OpenInputFileAsync)               \
+  GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, SpecialChars)
 
 #define GENERIC_FS_TEST_FUNCTIONS(TEST_CLASS) \
   GENERIC_FS_TEST_FUNCTIONS_MACROS(TEST_F, TEST_CLASS)
diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc
index e7e42f30700..cd9e912050c 100644
--- a/cpp/src/arrow/io/hdfs.cc
+++ b/cpp/src/arrow/io/hdfs.cc
@@ -84,12 +84,14 @@ class HdfsAnyFileImpl {
   }
 
   Status Seek(int64_t position) {
+    RETURN_NOT_OK(CheckClosed());
     int ret = driver_->Seek(fs_, file_, position);
     CHECK_FAILURE(ret, "seek");
     return Status::OK();
   }
 
   Result<int64_t> Tell() {
+    RETURN_NOT_OK(CheckClosed());
     int64_t ret = driver_->Tell(fs_, file_);
     CHECK_FAILURE(ret, "tell");
     return ret;
@@ -98,6 +100,13 @@ class HdfsAnyFileImpl {
   bool is_open() const { return is_open_; }
 
  protected:
+  Status CheckClosed() {
+    if (!is_open_) {
+      return Status::Invalid("Operation on closed HDFS file");
+    }
+    return Status::OK();
+  }
+
   std::string path_;
 
   internal::LibHdfsShim* driver_;
@@ -143,6 +152,7 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   bool closed() const { return !is_open_; }
 
   Result<int64_t> ReadAt(int64_t position, int64_t nbytes, uint8_t* buffer) {
+    RETURN_NOT_OK(CheckClosed());
     if (!driver_->HasPread()) {
       std::lock_guard<std::mutex> guard(lock_);
       RETURN_NOT_OK(Seek(position));
@@ -169,11 +179,11 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) {
-    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+    RETURN_NOT_OK(CheckClosed());
 
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
     ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
                           ReadAt(position, nbytes, buffer->mutable_data()));
-
     if (bytes_read < nbytes) {
       RETURN_NOT_OK(buffer->Resize(bytes_read));
       buffer->ZeroPadding();
@@ -182,6 +192,8 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<int64_t> Read(int64_t nbytes, void* buffer) {
+    RETURN_NOT_OK(CheckClosed());
+
     int64_t total_bytes = 0;
     while (total_bytes < nbytes) {
       tSize ret = driver_->Read(
@@ -197,8 +209,9 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) {
-    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
+    RETURN_NOT_OK(CheckClosed());
 
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes, pool_));
     ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
     if (bytes_read < nbytes) {
       RETURN_NOT_OK(buffer->Resize(bytes_read));
@@ -207,11 +220,12 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl {
   }
 
   Result<int64_t> GetSize() {
+    RETURN_NOT_OK(CheckClosed());
+
     hdfsFileInfo* entry = driver_->GetPathInfo(fs_, path_.c_str());
     if (entry == nullptr) {
       return GetPathInfoFailed(path_);
     }
-
     int64_t size = entry->mSize;
     driver_->FreeFileInfo(entry, 1);
     return size;
@@ -274,7 +288,7 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
       // the error doesn't get propagated properly and the second close
       // initiated by the destructor raises a segfault
       is_open_ = false;
-      RETURN_NOT_OK(Flush());
+      RETURN_NOT_OK(FlushInternal());
       int ret = driver_->CloseFile(fs_, file_);
       CHECK_FAILURE(ret, "CloseFile");
     }
@@ -284,12 +298,14 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
   bool closed() const { return !is_open_; }
 
   Status Flush() {
-    int ret = driver_->Flush(fs_, file_);
-    CHECK_FAILURE(ret, "Flush");
-    return Status::OK();
+    RETURN_NOT_OK(CheckClosed());
+
+    return FlushInternal();
   }
 
   Status Write(const uint8_t* buffer, int64_t nbytes) {
+    RETURN_NOT_OK(CheckClosed());
+
     constexpr int64_t kMaxBlockSize = std::numeric_limits<int32_t>::max();
 
     std::lock_guard<std::mutex> guard(lock_);
@@ -303,6 +319,13 @@ class HdfsOutputStream::HdfsOutputStreamImpl : public HdfsAnyFileImpl {
     }
     return Status::OK();
   }
+
+ protected:
+  Status FlushInternal() {
+    int ret = driver_->Flush(fs_, file_);
+    CHECK_FAILURE(ret, "Flush");
+    return Status::OK();
+  }
 };
 
 HdfsOutputStream::HdfsOutputStream() { impl_.reset(new HdfsOutputStreamImpl()); }
@@ -552,6 +575,18 @@ class HadoopFileSystem::HadoopFileSystemImpl {
     return Status::OK();
   }
 
+  Status Copy(const std::string& src, const std::string& dst) {
+    int ret = driver_->Copy(fs_, src.c_str(), fs_, dst.c_str());
+    CHECK_FAILURE(ret, "Rename");
+    return Status::OK();
+  }
+
+  Status Move(const std::string& src, const std::string& dst) {
+    int ret = driver_->Move(fs_, src.c_str(), fs_, dst.c_str());
+    CHECK_FAILURE(ret, "Rename");
+    return Status::OK();
+  }
+
   Status Chmod(const std::string& path, int mode) {
     int ret = driver_->Chmod(fs_, path.c_str(), static_cast<short>(mode));  // NOLINT
     CHECK_FAILURE(ret, "Chmod");
@@ -683,6 +718,14 @@ Status HadoopFileSystem::Rename(const std::string& src, const std::string& dst)
   return impl_->Rename(src, dst);
 }
 
+Status HadoopFileSystem::Copy(const std::string& src, const std::string& dst) {
+  return impl_->Copy(src, dst);
+}
+
+Status HadoopFileSystem::Move(const std::string& src, const std::string& dst) {
+  return impl_->Move(src, dst);
+}
+
 // ----------------------------------------------------------------------
 // Allow public API users to check whether we are set up correctly
 
diff --git a/cpp/src/arrow/io/hdfs.h b/cpp/src/arrow/io/hdfs.h
index 21b0cd8a282..5244eb05248 100644
--- a/cpp/src/arrow/io/hdfs.h
+++ b/cpp/src/arrow/io/hdfs.h
@@ -173,6 +173,10 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
   // current filesystem
   Status Rename(const std::string& src, const std::string& dst) override;
 
+  Status Copy(const std::string& src, const std::string& dst);
+
+  Status Move(const std::string& src, const std::string& dst);
+
   Status Stat(const std::string& path, FileStatistics* stat) override;
 
   // TODO(wesm): GetWorkingDirectory, SetWorkingDirectory
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 0c65aac3f56..8faddc7b9e4 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -205,7 +205,6 @@ def localfs(request, tempdir):
     return dict(
         fs=LocalFileSystem(),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -216,7 +215,6 @@ def py_localfs(request, tempdir):
     return dict(
         fs=PyFileSystem(ProxyHandler(LocalFileSystem())),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -227,7 +225,6 @@ def mockfs(request):
     return dict(
         fs=_MockFileSystem(),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -238,7 +235,6 @@ def py_mockfs(request):
     return dict(
         fs=PyFileSystem(ProxyHandler(_MockFileSystem())),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -249,7 +245,6 @@ def localfs_with_mmap(request, tempdir):
     return dict(
         fs=LocalFileSystem(use_mmap=True),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -260,7 +255,6 @@ def subtree_localfs(request, tempdir, localfs):
     return dict(
         fs=SubTreeFileSystem(str(tempdir), localfs['fs']),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -285,7 +279,6 @@ def s3fs(request, s3_connection, s3_server):
     yield dict(
         fs=fs,
         pathfn=bucket.__add__,
-        allow_copy_file=True,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -298,7 +291,6 @@ def subtree_s3fs(request, s3fs):
     return dict(
         fs=SubTreeFileSystem(prefix, s3fs['fs']),
         pathfn=prefix.__add__,
-        allow_copy_file=True,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -318,7 +310,6 @@ def hdfs(request, hdfs_connection):
     return dict(
         fs=fs,
         pathfn=lambda p: p,
-        allow_copy_file=False,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -331,7 +322,6 @@ def py_fsspec_localfs(request, tempdir):
     return dict(
         fs=PyFileSystem(FSSpecHandler(fs)),
         pathfn=lambda p: (tempdir / p).as_posix(),
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -347,7 +337,6 @@ def py_fsspec_memoryfs(request, tempdir):
     return dict(
         fs=PyFileSystem(FSSpecHandler(fs)),
         pathfn=lambda p: p,
-        allow_copy_file=True,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -374,7 +363,6 @@ def py_fsspec_s3fs(request, s3_connection, s3_server):
     yield dict(
         fs=fs,
         pathfn=bucket.__add__,
-        allow_copy_file=True,
         allow_move_dir=False,
         allow_append_to_file=True,
     )
@@ -446,11 +434,6 @@ def allow_move_dir(request, filesystem_config):
     return filesystem_config['allow_move_dir']
 
 
-@pytest.fixture
-def allow_copy_file(request, filesystem_config):
-    return filesystem_config['allow_copy_file']
-
-
 @pytest.fixture
 def allow_append_to_file(request, filesystem_config):
     return filesystem_config['allow_append_to_file']
@@ -804,20 +787,16 @@ def test_delete_root_dir_contents(mockfs, py_mockfs):
     _check_root_dir_contents(py_mockfs)
 
 
-def test_copy_file(fs, pathfn, allow_copy_file):
+def test_copy_file(fs, pathfn):
     s = pathfn('test-copy-source-file')
     t = pathfn('test-copy-target-file')
 
     with fs.open_output_stream(s):
         pass
 
-    if allow_copy_file:
-        fs.copy_file(s, t)
-        fs.delete_file(s)
-        fs.delete_file(t)
-    else:
-        with pytest.raises(pa.ArrowNotImplementedError):
-            fs.copy_file(s, t)
+    fs.copy_file(s, t)
+    fs.delete_file(s)
+    fs.delete_file(t)
 
 
 def test_move_directory(fs, pathfn, allow_move_dir):

From 88279786502ba8d7225fba4b686c01f38de43bd7 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 23 Jun 2021 13:35:43 +0200
Subject: [PATCH 448/719] ARROW-13148: [Dev][Archery] Fix crossbow job
 submission

Closes #10577 from pitrou/ARROW-13148-crossbow-submit-fix

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/archery/crossbow/core.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py
index 4f24d7592e9..838d6d48b73 100644
--- a/dev/archery/archery/crossbow/core.py
+++ b/dev/archery/archery/crossbow/core.py
@@ -640,17 +640,20 @@ def get_version(root, **kwargs):
         'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"'
     )
     version = parse_git_version(root, **kwargs)
-
-    # increment the minor version, because there can be patch releases created
-    # from maintenance branches where the tags are unreachable from the
-    # master's HEAD, so the git command above generates 0.17.0.dev300 even if
-    # arrow has a never 0.17.1 patch release
-    pattern = r"^(\d+)\.(\d+)\.(\d+)$"
-    match = re.match(pattern, str(version.tag))
+    tag = str(version.tag)
+
+    # We may get a development tag for the next version, such as "5.0.0.dev0",
+    # or the tag of an already released version, such as "4.0.0".
+    # In the latter case, we need to increment the version so that the computed
+    # version comes after any patch release (the next feature version after
+    # 4.0.0 is 5.0.0).
+    pattern = r"^(\d+)\.(\d+)\.(\d+)"
+    match = re.match(pattern, tag)
     major, minor, patch = map(int, match.groups())
+    if 'dev' not in tag:
+        major += 1
 
-    # the bumped version number after 0.17.x will be 0.18.0.dev300
-    return "{}.{}.{}.dev{}".format(major, minor + 1, patch, version.distance)
+    return "{}.{}.{}.dev{}".format(major, minor, patch, version.distance)
 
 
 class Serializable:

From 593a08d219ea7323c93dfc55261e4cc8e6ff8afe Mon Sep 17 00:00:00 2001
From: Romain Francois <romain@rstudio.com>
Date: Wed, 23 Jun 2021 08:12:34 -0700
Subject: [PATCH 449/719] ARROW-9140: [R] Zero-copy Arrow to R where possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes altrep R vectors of type `INTSXP` or `REALSXP` from `arrow::Array` of type `Int32Type` /  `DoubleType` that don't have any nulls:

the altrep vector holds an external pointer so that the `Array` stays around, and its payload is shared. The R vector is marked as not mutable.

``` r
library(arrow, warn.conflicts = FALSE)
#> See arrow_info() for available features
```

create a “big” arrow Array with no nulls (just for testing purposes)

``` r
a <- arrow:::Test_array_nonull_dbl_vector(1e7)
```

turn into R vector, using altrep, sharing the payload

``` r
v <- a$as_vector()
```

verify it’s an altrep with the inspect method

``` r
.Internal(inspect(v))
#> @7f9abf8ba470 14 REALSXP g0c0 [REF(65535)] std::shared_ptr<arrow::Array, double, NONULL> (len=10000000, ptr=0x7f9ab5c8cd18)
```

it’s marked as not mutable so check that modify -\> duplicate

``` r
v[1] <- 0
#> Duplicate
.Internal(inspect(v))
#> @7f9ac0000000 14 REALSXP g1c7 [MARK,REF(1)] (len=10000000, tl=0) 0,42,42,42,42,...
```

timings for double vector

``` r
bench::workout({
  a <- arrow:::Test_array_nonull_dbl_vector(1e7)
  v <- a$as_vector()
  .Internal(inspect(v))
  v[1] <- 0
  .Internal(inspect(v))
})
#> @7f9abc122190 14 REALSXP g0c0 [REF(65535)] std::shared_ptr<arrow::Array, double, NONULL> (len=10000000, ptr=0x7f9aba2109c8)
#> Duplicate
#> @7f9aa5c00000 14 REALSXP g1c7 [MARK,REF(1)] (len=10000000, tl=0) 0,42,42,42,42,...
#> # A tibble: 5 x 3
#>   exprs                                             process     real
#>   <bch:expr>                                       <bch:tm> <bch:tm>
#> 1 a <- arrow:::Test_array_nonull_dbl_vector(1e+07)   70.3ms   70.6ms
#> 2 v <- a$as_vector()                                   13µs   14.3µs
#> 3 .Internal(inspect(v))                                12µs   11.9µs
```

when a copy is needed, the data is copied entirely:

```r
#> 4 v[1] <- 0                                          53.1ms   53.2ms
#> 5 .Internal(inspect(v))                                20µs   22.6µs
```

timings for integer vector

``` r
bench::workout({
  a <- arrow:::Test_array_nonull_int_vector(1e7)
  v <- a$as_vector()
  .Internal(inspect(v))
  v[1] <- 0
  .Internal(inspect(v))
})
#> @7f9abc5bd780 13 INTSXP g0c0 [REF(65535)] std::shared_ptr<arrow::Array, int32, NONULL> (len=10000000, ptr=0x7f9ab8997378)
#> @7f9ac0000000 14 REALSXP g1c7 [MARK,REF(1)] (len=10000000, tl=0) 0,42,42,42,42,...
#> # A tibble: 5 x 3
#>   exprs                                             process     real
#>   <bch:expr>                                       <bch:tm> <bch:tm>
#> 1 a <- arrow:::Test_array_nonull_int_vector(1e+07)   54.5ms   54.7ms
#> 2 v <- a$as_vector()                                   12µs   13.2µs
#> 3 .Internal(inspect(v))                                11µs   11.3µs
#> 4 v[1] <- 0                                         851.4ms  854.6ms
#> 5 .Internal(inspect(v))                                17µs   18.8µs
```

<sup>Created on 2021-06-08 by the [reprex package](https://reprex.tidyverse.org) (v2.0.0)</sup>

Closes #10445 from romainfrancois/ARROW_9140_zero_copy

Lead-authored-by: Romain Francois <romain@rstudio.com>
Co-authored-by: Romain François <romain@rstudio.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/arrowExports.R             |   8 ++
 r/data-raw/codegen.R           |   5 +
 r/src/altrep.cpp               | 166 +++++++++++++++++++++++++++++++++
 r/src/array_to_vector.cpp      |  23 +++++
 r/src/arrowExports.cpp         |  37 ++++++++
 r/src/arrow_types.h            |   6 ++
 r/tests/testthat/test-altrep.R |  96 +++++++++++++++++++
 7 files changed, 341 insertions(+)
 create mode 100644 r/src/altrep.cpp
 create mode 100644 r/tests/testthat/test-altrep.R

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 577773c42bd..9257f5787b1 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1,5 +1,13 @@
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
+is_altrep_int_nonull <- function(x){
+    .Call(`_arrow_is_altrep_int_nonull`, x)
+}
+
+is_altrep_dbl_nonull <- function(x){
+    .Call(`_arrow_is_altrep_dbl_nonull`, x)
+}
+
 Array__Slice1 <- function(array, offset){
     .Call(`_arrow_Array__Slice1`, array, offset)
 }
diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R
index 9b25cb1842c..1a49ffc80fa 100644
--- a/r/data-raw/codegen.R
+++ b/r/data-raw/codegen.R
@@ -214,6 +214,11 @@ glue::glue('\n
 'extern "C" void R_init_arrow(DllInfo* dll){
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
+
+  #if defined(HAS_ALTREP)
+  arrow::r::Init_Altrep_classes(dll);
+  #endif
+
 }
 \n')
 
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
new file mode 100644
index 00000000000..33e30aa3ffb
--- /dev/null
+++ b/r/src/altrep.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cpp11/altrep.hpp>
+
+#include "./arrow_types.h"
+
+#if defined(HAS_ALTREP)
+
+#include <R_ext/Altrep.h>
+#include <arrow/array.h>
+
+namespace arrow {
+namespace r {
+
+template <int sexp_type>
+struct ArrayNoNull {
+  using data_type = typename std::conditional<sexp_type == INTSXP, int, double>::type;
+  static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
+  using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
+
+  // altrep object around an Array with no nulls
+  // data1: an external pointer to a shared pointer to the Array
+  // data2: not used
+
+  static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>& array) {
+    // we don't need the whole r6 object, just an external pointer
+    // that retain the array
+    Pointer xp(new std::shared_ptr<Array>(array));
+
+    SEXP res = R_new_altrep(class_t, xp, R_NilValue);
+    MARK_NOT_MUTABLE(res);
+
+    return res;
+  }
+
+  static Rboolean Inspect(SEXP x, int pre, int deep, int pvec,
+                          void (*inspect_subtree)(SEXP, int, int, int)) {
+    const auto& array = Get(x);
+    Rprintf("arrow::Array<%s, NONULL> len=%d, Array=<%p>\n",
+            array->type()->ToString().c_str(), array->length(), array.get());
+    inspect_subtree(R_altrep_data1(x), pre, deep + 1, pvec);
+    return TRUE;
+  }
+
+  static const std::shared_ptr<Array>& Get(SEXP vec) {
+    return *Pointer(R_altrep_data1(vec));
+  }
+
+  static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); }
+
+  static const void* Dataptr_or_null(SEXP vec) {
+    return Get(vec)->data()->template GetValues<data_type>(1);
+  }
+
+  static SEXP Duplicate(SEXP vec, Rboolean) {
+    const auto& array = Get(vec);
+    auto size = array->length();
+
+    SEXP copy = PROTECT(Rf_allocVector(sexp_type, array->length()));
+
+    memcpy(DATAPTR(copy), Dataptr_or_null(vec), size * sizeof(data_type));
+
+    UNPROTECT(1);
+    return copy;
+  }
+
+  static void* Dataptr(SEXP vec, Rboolean writeable) {
+    return const_cast<void*>(Dataptr_or_null(vec));
+  }
+
+  // by definition, there are no NA
+  static int No_NA(SEXP vec) { return 1; }
+
+  static void Init(R_altrep_class_t class_t, DllInfo* dll) {
+    // altrep
+    R_set_altrep_Length_method(class_t, ArrayNoNull::Length);
+    R_set_altrep_Inspect_method(class_t, ArrayNoNull::Inspect);
+    R_set_altrep_Duplicate_method(class_t, ArrayNoNull::Duplicate);
+
+    // altvec
+    R_set_altvec_Dataptr_method(class_t, ArrayNoNull::Dataptr);
+    R_set_altvec_Dataptr_or_null_method(class_t, ArrayNoNull::Dataptr_or_null);
+  }
+};
+
+struct DoubleArrayNoNull {
+  static R_altrep_class_t class_t;
+
+  static void Init(DllInfo* dll) {
+    class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll);
+    ArrayNoNull<REALSXP>::Init(class_t, dll);
+    R_set_altreal_No_NA_method(class_t, ArrayNoNull<REALSXP>::No_NA);
+  }
+
+  static SEXP Make(const std::shared_ptr<Array>& array) {
+    return ArrayNoNull<REALSXP>::Make(class_t, array);
+  }
+};
+
+struct Int32ArrayNoNull {
+  static R_altrep_class_t class_t;
+
+  static void Init(DllInfo* dll) {
+    class_t = R_make_altinteger_class("array_nonull_int_vector", "arrow", dll);
+    ArrayNoNull<INTSXP>::Init(class_t, dll);
+    R_set_altinteger_No_NA_method(class_t, ArrayNoNull<INTSXP>::No_NA);
+  }
+
+  static SEXP Make(const std::shared_ptr<Array>& array) {
+    return ArrayNoNull<INTSXP>::Make(class_t, array);
+  }
+};
+
+R_altrep_class_t Int32ArrayNoNull::class_t;
+R_altrep_class_t DoubleArrayNoNull::class_t;
+
+void Init_Altrep_classes(DllInfo* dll) {
+  DoubleArrayNoNull::Init(dll);
+  Int32ArrayNoNull::Init(dll);
+}
+
+SEXP MakeDoubleArrayNoNull(const std::shared_ptr<Array>& array) {
+  return DoubleArrayNoNull::Make(array);
+}
+
+SEXP MakeInt32ArrayNoNull(const std::shared_ptr<Array>& array) {
+  return Int32ArrayNoNull::Make(array);
+}
+
+}  // namespace r
+}  // namespace arrow
+
+#endif
+
+// [[arrow::export]]
+bool is_altrep_int_nonull(SEXP x) {
+#if defined(HAS_ALTREP)
+  return R_altrep_inherits(x, arrow::r::Int32ArrayNoNull::class_t);
+#else
+  return false;
+#endif
+}
+
+// [[arrow::export]]
+bool is_altrep_dbl_nonull(SEXP x) {
+#if defined(HAS_ALTREP)
+  return R_altrep_inherits(x, arrow::r::DoubleArrayNoNull::class_t);
+#else
+  return false;
+#endif
+}
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index d5fae295181..a8f7191bf18 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -28,6 +28,7 @@
 #include <arrow/util/parallel.h>
 #include <arrow/util/task_group.h>
 
+#include <cpp11/altrep.hpp>
 #include <type_traits>
 
 namespace arrow {
@@ -143,6 +144,24 @@ Status IngestSome(const std::shared_ptr<arrow::Array>& array, R_xlen_t n,
 // Allocate + Ingest
 SEXP ArrayVector__as_vector(R_xlen_t n, const std::shared_ptr<DataType>& type,
                             const ArrayVector& arrays) {
+#if defined(HAS_ALTREP)
+  // special case when there is only one array
+  if (arrays.size() == 1) {
+    const auto& array = arrays[0];
+    if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 &&
+        array->null_count() == 0) {
+      switch (type->id()) {
+        case arrow::Type::DOUBLE:
+          return arrow::r::MakeDoubleArrayNoNull(array);
+        case arrow::Type::INT32:
+          return arrow::r::MakeInt32ArrayNoNull(array);
+        default:
+          break;
+      }
+    }
+  }
+#endif
+
   auto converter = Converter::Make(type, arrays);
   SEXP data = PROTECT(converter->Allocate(n));
   StopIfNotOk(converter->IngestSerial(data));
@@ -1280,6 +1299,10 @@ SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
 
 // [[arrow::export]]
 SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
+  if (chunked_array->num_chunks() == 1) {
+    return Array__as_vector(chunked_array->chunk(0));
+  }
+
   return arrow::r::ArrayVector__as_vector(chunked_array->length(), chunked_array->type(),
                                           chunked_array->chunks());
 }
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 024e5c58b0e..427844a3c8e 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -4,6 +4,36 @@
 
 #include "./arrow_types.h"
 
+// altrep.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool is_altrep_int_nonull(SEXP x);
+extern "C" SEXP _arrow_is_altrep_int_nonull(SEXP x_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type x(x_sexp);
+	return cpp11::as_sexp(is_altrep_int_nonull(x));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_is_altrep_int_nonull(SEXP x_sexp){
+	Rf_error("Cannot call is_altrep_int_nonull(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// altrep.cpp
+#if defined(ARROW_R_WITH_ARROW)
+bool is_altrep_dbl_nonull(SEXP x);
+extern "C" SEXP _arrow_is_altrep_dbl_nonull(SEXP x_sexp){
+BEGIN_CPP11
+	arrow::r::Input<SEXP>::type x(x_sexp);
+	return cpp11::as_sexp(is_altrep_dbl_nonull(x));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_is_altrep_dbl_nonull(SEXP x_sexp){
+	Rf_error("Cannot call is_altrep_dbl_nonull(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // array.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::Array> Array__Slice1(const std::shared_ptr<arrow::Array>& array, R_xlen_t offset);
@@ -6893,6 +6923,8 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_dataset_available", (DL_FUNC)& _dataset_available, 0 },
 		{ "_parquet_available", (DL_FUNC)& _parquet_available, 0 },
 		{ "_s3_available", (DL_FUNC)& _s3_available, 0 },
+		{ "_arrow_is_altrep_int_nonull", (DL_FUNC) &_arrow_is_altrep_int_nonull, 1}, 
+		{ "_arrow_is_altrep_dbl_nonull", (DL_FUNC) &_arrow_is_altrep_dbl_nonull, 1}, 
 		{ "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, 
 		{ "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, 
 		{ "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, 
@@ -7334,6 +7366,11 @@ static const R_CallMethodDef CallEntries[] = {
 extern "C" void R_init_arrow(DllInfo* dll){
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
+
+  #if defined(HAS_ALTREP)
+  arrow::r::Init_Altrep_classes(dll);
+  #endif
+
 }
 
 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 09511e32e87..68e1c8659c4 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -165,6 +165,12 @@ arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields,
 arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
                                   std::shared_ptr<arrow::Schema>& schema);
 
+#if defined(HAS_ALTREP)
+void Init_Altrep_classes(DllInfo* dll);
+SEXP MakeInt32ArrayNoNull(const std::shared_ptr<Array>& array);
+SEXP MakeDoubleArrayNoNull(const std::shared_ptr<Array>& array);
+#endif
+
 }  // namespace r
 }  // namespace arrow
 
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
new file mode 100644
index 00000000000..ec1c671b12e
--- /dev/null
+++ b/r/tests/testthat/test-altrep.R
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+context("altrep")
+
+skip_if(getRversion() <= "3.5.0")
+
+test_that("altrep vectors from int32 and dbl arrays with no nulls", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(1:1000)
+  v_dbl <- Array$create(as.numeric(1:1000))
+  c_int <- ChunkedArray$create(1:1000)
+  c_dbl <- ChunkedArray$create(as.numeric(1:1000))
+
+  expect_true(is_altrep_int_nonull(as.vector(v_int)))
+  expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+
+  expect_equal(c_int$num_chunks, 1L)
+  expect_true(is_altrep_int_nonull(as.vector(c_int)))
+  expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(1))))
+
+  expect_equal(c_dbl$num_chunks, 1L)
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl)))
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(1))))
+
+  withr::local_options(list(arrow.use_altrep = NULL))
+  expect_true(is_altrep_int_nonull(as.vector(v_int)))
+  expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+
+  withr::local_options(list(arrow.use_altrep = FALSE))
+  expect_false(is_altrep_int_nonull(as.vector(v_int)))
+  expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+})
+
+test_that("altrep vectors from int32 and dbl arrays with nulls", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(c(1L, NA, 3L))
+  v_dbl <- Array$create(c(1, NA, 3))
+  c_int <- ChunkedArray$create(c(1L, NA, 3L))
+  c_dbl <- ChunkedArray$create(c(1, NA, 3))
+
+  # cannot be altrep because one NA
+  expect_false(is_altrep_int_nonull(as.vector(v_int)))
+  expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1))))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1))))
+  expect_false(is_altrep_int_nonull(as.vector(c_int)))
+  expect_false(is_altrep_int_nonull(as.vector(c_int$Slice(1))))
+  expect_false(is_altrep_dbl_nonull(as.vector(c_dbl)))
+  expect_false(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(1))))
+
+  # but then, no NA beyond, so can be altrep again
+  expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(2))))
+  expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(2))))
+  expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(2))))
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(2))))
+
+  # chunked array with 2 chunks cannot be altrep
+  c_int <- ChunkedArray$create(0L, c(1L, NA, 3L))
+  c_dbl <- ChunkedArray$create(0, c(1, NA, 3))
+  expect_equal(c_int$num_chunks, 2L)
+  expect_equal(c_dbl$num_chunks, 2L)
+  expect_false(is_altrep_int_nonull(as.vector(c_int)))
+  expect_false(is_altrep_dbl_nonull(as.vector(c_dbl)))
+  expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(3))))
+  expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(3))))
+})
+
+test_that("empty vectors are not altrep", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(integer())
+  v_dbl <- Array$create(numeric())
+
+  expect_false(is_altrep_int_nonull(as.vector(v_int)))
+  expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
+})

From 8113c37fb43a5034a1bc0cd8991af9026b473318 Mon Sep 17 00:00:00 2001
From: Will Jones <will.jones@mscience.com>
Date: Wed, 23 Jun 2021 19:23:55 +0200
Subject: [PATCH 450/719] ARROW-11342: [Python] [Gandiva] Expose ToString and
 result type information

These methods are intended to make it easier to work with and debug the Gandiva expression builder.

```python
import pyarrow
import pyarrow.gandiva as gandiva

builder = gandiva.TreeExprBuilder()

lit = builder.make_literal(1000.0, pyarrow.float64())
print(lit)
# Before: <pyarrow.gandiva.Node object at 0x7f36fd37ecf0>
# After: (const double) 1000 raw(408f400000000000)

field = builder.make_field(pyarrow.field('a', pyarrow.float64()))
print(field)
# Before: <pyarrow.gandiva.Node object at 0x7ff7daf99f90>
# After: (double) a

print(builder.make_function('greater_than', [field, lit], pyarrow.bool_()))
# Before: <pyarrow.gandiva.Node object at 0x7ff7d24bde70>
# After: bool greater_than((double) a, (const double) 1000 raw(408f400000000000))
```

Closes #9288 from wjones127/ARROW-11342/gandiva-repr

Lead-authored-by: Will Jones <will.jones@mscience.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/gandiva.pyx             | 39 +++++++++++++++++++++++++-
 python/pyarrow/includes/libgandiva.pxd | 13 ++++++---
 python/pyarrow/tests/test_gandiva.py   | 26 +++++++++++++++++
 3 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 6bbd673ed36..61e2587af2b 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -30,7 +30,8 @@ from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
 from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport (Array, DataType, Field, MemoryPool, RecordBatch,
                           Schema, check_status, pyarrow_wrap_array,
-                          pyarrow_wrap_data_type, ensure_type, _Weakrefable)
+                          pyarrow_wrap_data_type, ensure_type, _Weakrefable,
+                          pyarrow_wrap_field)
 from pyarrow.lib import frombytes
 
 from pyarrow.includes.libgandiva cimport (
@@ -93,6 +94,16 @@ cdef class Node(_Weakrefable):
         self.node = node
         return self
 
+    def __str__(self):
+        return self.node.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def return_type(self):
+        return pyarrow_wrap_data_type(self.node.get().return_type())
+
 cdef class Expression(_Weakrefable):
     cdef:
         shared_ptr[CExpression] expression
@@ -100,6 +111,19 @@ cdef class Expression(_Weakrefable):
     cdef void init(self, shared_ptr[CExpression] expression):
         self.expression = expression
 
+    def __str__(self):
+        return self.expression.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def root(self):
+        return Node.create(self.expression.get().root())
+
+    def result(self):
+        return pyarrow_wrap_field(self.expression.get().result())
+
 cdef class Condition(_Weakrefable):
     cdef:
         shared_ptr[CCondition] condition
@@ -115,6 +139,19 @@ cdef class Condition(_Weakrefable):
         self.condition = condition
         return self
 
+    def __str__(self):
+        return self.condition.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def root(self):
+        return Node.create(self.condition.get().root())
+
+    def result(self):
+        return pyarrow_wrap_field(self.condition.get().result())
+
 cdef class SelectionVector(_Weakrefable):
     cdef:
         shared_ptr[CSelectionVector] selection_vector
diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd
index 12d1cb38b82..c75977d37e8 100644
--- a/python/pyarrow/includes/libgandiva.pxd
+++ b/python/pyarrow/includes/libgandiva.pxd
@@ -24,13 +24,16 @@ from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 
-cdef extern from "gandiva/gandiva_aliases.h" namespace "gandiva" nogil:
+cdef extern from "gandiva/node.h" namespace "gandiva" nogil:
 
     cdef cppclass CNode" gandiva::Node":
-        pass
+        c_string ToString()
+        shared_ptr[CDataType] return_type()
 
     cdef cppclass CExpression" gandiva::Expression":
-        pass
+        c_string ToString()
+        shared_ptr[CNode] root()
+        shared_ptr[CField] result()
 
     ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector"
 
@@ -95,7 +98,9 @@ cdef inline str _selection_mode_name(CSelectionVector_Mode ctype):
 cdef extern from "gandiva/condition.h" namespace "gandiva" nogil:
 
     cdef cppclass CCondition" gandiva::Condition":
-        pass
+        c_string ToString()
+        shared_ptr[CNode] root()
+        shared_ptr[CField] result()
 
 cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil:
 
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 4a26ad433e9..6522c233a15 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -37,12 +37,16 @@ def test_tree_exp_builder():
     node_a = builder.make_field(field_a)
     node_b = builder.make_field(field_b)
 
+    assert node_a.return_type() == field_a.type
+
     condition = builder.make_function("greater_than", [node_a, node_b],
                                       pa.bool_())
     if_node = builder.make_if(condition, node_a, node_b, pa.int32())
 
     expr = builder.make_expression(if_node, field_result)
 
+    assert expr.result().type == pa.int32()
+
     projector = gandiva.make_projector(
         schema, [expr], pa.default_memory_pool())
 
@@ -98,6 +102,8 @@ def test_filter():
     cond = builder.make_function("less_than", [node_a, thousand], pa.bool_())
     condition = builder.make_condition(cond)
 
+    assert condition.result().type == pa.bool_()
+
     filter = gandiva.make_filter(table.schema, condition)
     # Gandiva generates compute kernel function named `@expr_X`
     assert filter.llvm_ir.find("@expr_") != -1
@@ -363,3 +369,23 @@ def test_filter_project():
 
     exp = pa.array([1, -21, None], pa.int32())
     assert r.equals(exp)
+
+
+@pytest.mark.gandiva
+def test_to_string():
+    import pyarrow.gandiva as gandiva
+    builder = gandiva.TreeExprBuilder()
+
+    assert str(builder.make_literal(2.0, pa.float64())
+               ).startswith('(const double) 2 raw(')
+    assert str(builder.make_literal(2, pa.int64())) == '(const int64) 2'
+    assert str(builder.make_field(pa.field('x', pa.float64()))) == '(double) x'
+    assert str(builder.make_field(pa.field('y', pa.string()))) == '(string) y'
+
+    field_z = builder.make_field(pa.field('z', pa.bool_()))
+    func_node = builder.make_function('not', [field_z], pa.bool_())
+    assert str(func_node) == 'bool not((bool) z)'
+
+    field_y = builder.make_field(pa.field('y', pa.bool_()))
+    and_node = builder.make_and([func_node, field_y])
+    assert str(and_node) == 'bool not((bool) z) && (bool) y'

From 9aaf61c044189d4d15bd145380fb4e2a2a65138f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 24 Jun 2021 05:25:05 +0900
Subject: [PATCH 451/719] ARROW-8459: [Dev][Archery] Use a more recent
 cmake-format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- [x] bump cmake-format's version to the latest one
- [x] port `run-cmake-format.py` script to archery
- [x] support `archery lint --cmake-format` format checks without reformatting the files in-place
- [x] support `archery lint --cmake-format --fix` for actually reformat the files
- [x] reformat the cmake files

I assume we may need tune the options a little bit, so feel free to experiment with the values defined in `cmake-format.py` then re-run `archery-lint --cmake-format --fix`.

The `cmakelang` package also provides a `cmake-lint` command which we could experiment with in the future.

Closes #10571 from kszucs/update-cmake-format

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .dockerignore                                 |   2 +-
 .pre-commit-config.yaml                       |   7 -
 ci/docker/linux-apt-lint.dockerfile           |   8 +-
 cmake-format.py                               |  75 +-
 cpp/CMakeLists.txt                            |  73 +-
 cpp/cmake_modules/BuildUtils.cmake            | 173 ++---
 cpp/cmake_modules/DefineOptions.cmake         |  58 +-
 cpp/cmake_modules/FindArrow.cmake             | 138 ++--
 cpp/cmake_modules/FindArrowCUDA.cmake         |  11 +-
 cpp/cmake_modules/FindArrowDataset.cmake      |  12 +-
 cpp/cmake_modules/FindArrowFlight.cmake       |  11 +-
 .../FindArrowFlightTesting.cmake              |  31 +-
 cpp/cmake_modules/FindArrowPython.cmake       |  11 +-
 cpp/cmake_modules/FindArrowPythonFlight.cmake |  27 +-
 cpp/cmake_modules/FindArrowTesting.cmake      |  12 +-
 cpp/cmake_modules/FindBrotli.cmake            |   9 +-
 cpp/cmake_modules/FindClangTools.cmake        |  15 +-
 cpp/cmake_modules/FindGLOG.cmake              |   4 +-
 cpp/cmake_modules/FindGandiva.cmake           |  13 +-
 cpp/cmake_modules/FindLLVMAlt.cmake           |  19 +-
 cpp/cmake_modules/FindLz4.cmake               |  19 +-
 cpp/cmake_modules/FindORC.cmake               |   4 +-
 cpp/cmake_modules/FindParquet.cmake           |  20 +-
 cpp/cmake_modules/FindPlasma.cmake            |  12 +-
 cpp/cmake_modules/FindPython3Alt.cmake        |  33 +-
 cpp/cmake_modules/FindRapidJSONAlt.cmake      |  34 +-
 cpp/cmake_modules/FindSnappy.cmake            |  11 +-
 cpp/cmake_modules/FindThrift.cmake            |  48 +-
 cpp/cmake_modules/Findc-aresAlt.cmake         |  12 +-
 cpp/cmake_modules/FindgRPCAlt.cmake           |  24 +-
 cpp/cmake_modules/Findre2Alt.cmake            |  38 +-
 cpp/cmake_modules/Findutf8proc.cmake          |  30 +-
 cpp/cmake_modules/Findzstd.cmake              |  20 +-
 cpp/cmake_modules/SetupCxxFlags.cmake         |  36 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake   | 726 ++++++++----------
 cpp/cmake_modules/Usevcpkg.cmake              | 102 ++-
 cpp/cmake_modules/san-config.cmake            |  75 +-
 cpp/src/arrow/CMakeLists.txt                  | 187 +++--
 cpp/src/arrow/adapters/orc/CMakeLists.txt     |   8 +-
 cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt  |  14 +-
 .../dbi/hiveserver2/thrift/CMakeLists.txt     |  15 +-
 cpp/src/arrow/filesystem/CMakeLists.txt       |   8 +-
 cpp/src/arrow/flight/CMakeLists.txt           |  59 +-
 cpp/src/arrow/gpu/CMakeLists.txt              |   3 +-
 cpp/src/arrow/python/CMakeLists.txt           |  16 +-
 cpp/src/arrow/python/util/CMakeLists.txt      |   4 +-
 cpp/src/gandiva/CMakeLists.txt                |  11 +-
 cpp/src/gandiva/jni/CMakeLists.txt            |  18 +-
 cpp/src/gandiva/precompiled/CMakeLists.txt    |  65 +-
 cpp/src/parquet/CMakeLists.txt                |  46 +-
 cpp/src/plasma/CMakeLists.txt                 |  24 +-
 dev/archery/archery/utils/command.py          |   7 +-
 dev/archery/archery/utils/lint.py             |  84 +-
 dev/archery/requirements-lint.txt             |   3 -
 dev/archery/setup.py                          |   2 +-
 docker-compose.yml                            |   5 +-
 docs/source/developers/cpp/development.rst    |   2 +-
 docs/source/developers/python.rst             |   3 +-
 java/gandiva/CMakeLists.txt                   |  51 +-
 matlab/CMakeLists.txt                         |  24 +-
 python/CMakeLists.txt                         |  70 +-
 run-cmake-format.py                           | 111 ---
 62 files changed, 1300 insertions(+), 1493 deletions(-)
 delete mode 100644 dev/archery/requirements-lint.txt
 delete mode 100755 run-cmake-format.py

diff --git a/.dockerignore b/.dockerignore
index eb71138c679..a369d7d59a6 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,7 +27,7 @@
 # include explicitly
 !ci/**
 !c_glib/Gemfile
-!dev/archery/requirements*.txt
+!dev/archery/setup.py
 !python/requirements*.txt
 !python/manylinux1/**
 !python/manylinux2010/**
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8b5a24476d8..0718072308a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,13 +29,6 @@ repos:
         entry: bash -c "git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar && ./dev/release/run-rat.sh arrow-src.tar"
         always_run: true
         pass_filenames: false
-      - id: cmake-format
-        name: CMake Format
-        language: python
-        entry: python run-cmake-format.py
-        types: [cmake]
-        additional_dependencies:
-          - cmake_format==0.5.2
       - id: hadolint
         name: Docker Format
         language: docker_image
diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile
index c711c4c883c..2f4bf0010bc 100644
--- a/ci/docker/linux-apt-lint.dockerfile
+++ b/ci/docker/linux-apt-lint.dockerfile
@@ -49,12 +49,8 @@ RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools}
 RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
     ln -s /usr/bin/pip3 /usr/local/bin/pip
 
-COPY dev/archery/requirements.txt \
-     dev/archery/requirements-lint.txt \
-     /arrow/dev/archery/
-RUN pip install \
-      -r arrow/dev/archery/requirements.txt \
-      -r arrow/dev/archery/requirements-lint.txt
+COPY dev/archery/setup.py /arrow/dev/archery/
+RUN pip install -e arrow/dev/archery[lint]
 
 ENV LC_ALL=C.UTF-8 \
     LANG=C.UTF-8
diff --git a/cmake-format.py b/cmake-format.py
index 0976642031f..3e77733f4d1 100644
--- a/cmake-format.py
+++ b/cmake-format.py
@@ -16,44 +16,61 @@
 # under the License.
 
 # cmake-format configuration file
-# Use run-cmake-format.py to reformat all cmake files in the source tree
+# Use `archery lint --cmake-format --fix` to reformat all cmake files in the
+# source tree
 
-# How wide to allow formatted cmake files
-line_width = 90
+# -----------------------------
+# Options affecting formatting.
+# -----------------------------
+with section("format"):
+    # How wide to allow formatted cmake files
+    line_width = 90
 
-# How many spaces to tab for indent
-tab_size = 2
+    # How many spaces to tab for indent
+    tab_size = 2
 
-# If arglists are longer than this, break them always
-max_subargs_per_line = 4
+    # If a positional argument group contains more than this many arguments,
+    # then force it to a vertical layout.
+    max_pargs_hwrap = 4
 
-# If true, separate flow control names from their parentheses with a space
-separate_ctrl_name_with_space = False
+    # If the statement spelling length (including space and parenthesis) is
+    # smaller than this amount, then force reject nested layouts.
+    # This value only comes into play when considering whether or not to nest
+    # arguments below their parent. If the number of characters in the parent
+    # is less than this value, we will not nest.
+    min_prefix_chars = 32
 
-# If true, separate function names from parentheses with a space
-separate_fn_name_with_space = False
+    # If true, separate flow control names from their parentheses with a space
+    separate_ctrl_name_with_space = False
 
-# If a statement is wrapped to more than one line, than dangle the closing
-# parenthesis on it's own line
-dangle_parens = False
+    # If true, separate function names from parentheses with a space
+    separate_fn_name_with_space = False
 
-# What style line endings to use in the output.
-line_ending = 'unix'
+    # If a statement is wrapped to more than one line, than dangle the closing
+    # parenthesis on it's own line
+    dangle_parens = False
 
-# Format command names consistently as 'lower' or 'upper' case
-command_case = 'lower'
+    # What style line endings to use in the output.
+    line_ending = 'unix'
 
-# Format keywords consistently as 'lower' or 'upper' case
-keyword_case = 'unchanged'
+    # Format command names consistently as 'lower' or 'upper' case
+    command_case = 'lower'
 
-# enable comment markup parsing and reflow
-enable_markup = False
+    # Format keywords consistently as 'lower' or 'upper' case
+    keyword_case = 'unchanged'
 
-# If comment markup is enabled, don't reflow the first comment block in
-# eachlistfile. Use this to preserve formatting of your
-# copyright/licensestatements.
-first_comment_is_literal = False
+# ------------------------------------------------
+# Options affecting comment reflow and formatting.
+# ------------------------------------------------
+with section("markup"):
+    # enable comment markup parsing and reflow
+    enable_markup = False
 
-# If comment markup is enabled, don't reflow any comment block which matchesthis
-# (regex) pattern. Default is `None` (disabled).
-literal_comment_pattern = None
+    # If comment markup is enabled, don't reflow the first comment block in
+    # eachlistfile. Use this to preserve formatting of your
+    # copyright/licensestatements.
+    first_comment_is_literal = True
+
+    # If comment markup is enabled, don't reflow any comment block which
+    # matchesthis (regex) pattern. Default is `None` (disabled).
+    literal_comment_pattern = None
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a31af74f68e..91b3528bf70 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -53,7 +53,9 @@ string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSI
 
 # if no build build type is specified, default to release builds
 if(NOT DEFINED CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build.")
+  set(CMAKE_BUILD_TYPE
+      Release
+      CACHE STRING "Choose the type of build.")
 endif()
 string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_BUILD_TYPE)
 string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
@@ -117,18 +119,15 @@ set(ARROW_LLVM_VERSIONS
     "8"
     "7")
 list(GET ARROW_LLVM_VERSIONS 0 ARROW_LLVM_VERSION_PRIMARY)
-string(REGEX
-       REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_LLVM_VERSION_PRIMARY_MAJOR
-               "${ARROW_LLVM_VERSION_PRIMARY}")
+string(REGEX REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_LLVM_VERSION_PRIMARY_MAJOR
+                     "${ARROW_LLVM_VERSION_PRIMARY}")
 
 file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../.env ARROW_ENV)
 string(REGEX MATCH "CLANG_TOOLS=[^\n]+" ARROW_ENV_CLANG_TOOLS_VERSION "${ARROW_ENV}")
-string(REGEX
-       REPLACE "^CLANG_TOOLS=" "" ARROW_CLANG_TOOLS_VERSION
-               "${ARROW_ENV_CLANG_TOOLS_VERSION}")
-string(REGEX
-       REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_CLANG_TOOLS_VERSION_MAJOR
-               "${ARROW_CLANG_TOOLS_VERSION}")
+string(REGEX REPLACE "^CLANG_TOOLS=" "" ARROW_CLANG_TOOLS_VERSION
+                     "${ARROW_ENV_CLANG_TOOLS_VERSION}")
+string(REGEX REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_CLANG_TOOLS_VERSION_MAJOR
+                     "${ARROW_CLANG_TOOLS_VERSION}")
 
 if(APPLE)
   find_program(BREW_BIN brew)
@@ -163,7 +162,9 @@ endif()
 
 find_package(ClangTools)
 find_package(InferTools)
-if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR CLANG_TIDY_FOUND OR INFER_FOUND)
+if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1"
+   OR CLANG_TIDY_FOUND
+   OR INFER_FOUND)
   # Generate a Clang compile_commands.json "compilation database" file for use
   # with various development tools, such as Vim's YouCompleteMe plugin.
   # See http://clang.llvm.org/docs/JSONCompilationDatabase.html
@@ -226,7 +227,9 @@ if(NOT LINT_EXCLUSIONS_FILE)
   set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
 endif()
 
-find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
+find_program(CPPLINT_BIN
+             NAMES cpplint cpplint.py
+             HINTS ${BUILD_SUPPORT_DIR})
 message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
 
 add_custom_target(lint
@@ -271,7 +274,7 @@ if(${CLANG_FORMAT_FOUND})
 endif()
 
 add_custom_target(lint_cpp_cli ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/lint_cpp_cli.py
-                  ${CMAKE_CURRENT_SOURCE_DIR}/src)
+                               ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
 if(ARROW_LINT_ONLY)
   message("ARROW_LINT_ONLY was specified, this is only a partial build directory")
@@ -469,10 +472,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
   if(NOT APPLE)
     set(MORE_ARGS "-T")
   endif()
-  execute_process(COMMAND ln
-                          ${MORE_ARGS}
-                          -sf
-                          ${BUILD_OUTPUT_ROOT_DIRECTORY}
+  execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
                           ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
 else()
   set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}/")
@@ -550,12 +550,9 @@ include_directories(src/generated)
 #
 if(PARQUET_BUILD_SHARED)
   set_target_properties(arrow_shared
-                        PROPERTIES C_VISIBILITY_PRESET
-                                   hidden
-                                   CXX_VISIBILITY_PRESET
-                                   hidden
-                                   VISIBILITY_INLINES_HIDDEN
-                                   1)
+                        PROPERTIES C_VISIBILITY_PRESET hidden
+                                   CXX_VISIBILITY_PRESET hidden
+                                   VISIBILITY_INLINES_HIDDEN 1)
 endif()
 
 #
@@ -599,7 +596,9 @@ endif(UNIX)
 # "make cscope" target
 #
 if(UNIX)
-  add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR}
+  add_custom_target(cscope
+                    find
+                    ${CMAKE_CURRENT_SOURCE_DIR}
                     (-name
                      \\*.cc
                      -or
@@ -636,23 +635,14 @@ endif(UNIX)
 
 if(${INFER_FOUND})
   # runs infer capture
-  add_custom_target(infer
-                    ${BUILD_SUPPORT_DIR}/run-infer.sh
-                    ${INFER_BIN}
-                    ${CMAKE_BINARY_DIR}/compile_commands.json
-                    1)
+  add_custom_target(infer ${BUILD_SUPPORT_DIR}/run-infer.sh ${INFER_BIN}
+                          ${CMAKE_BINARY_DIR}/compile_commands.json 1)
   # runs infer analyze
-  add_custom_target(infer-analyze
-                    ${BUILD_SUPPORT_DIR}/run-infer.sh
-                    ${INFER_BIN}
-                    ${CMAKE_BINARY_DIR}/compile_commands.json
-                    2)
+  add_custom_target(infer-analyze ${BUILD_SUPPORT_DIR}/run-infer.sh ${INFER_BIN}
+                                  ${CMAKE_BINARY_DIR}/compile_commands.json 2)
   # runs infer report
-  add_custom_target(infer-report
-                    ${BUILD_SUPPORT_DIR}/run-infer.sh
-                    ${INFER_BIN}
-                    ${CMAKE_BINARY_DIR}/compile_commands.json
-                    3)
+  add_custom_target(infer-report ${BUILD_SUPPORT_DIR}/run-infer.sh ${INFER_BIN}
+                                 ${CMAKE_BINARY_DIR}/compile_commands.json 3)
 endif()
 
 #
@@ -721,7 +711,7 @@ if(ARROW_ORC)
   list(APPEND ARROW_STATIC_LINK_LIBS orc::liborc ${ARROW_PROTOBUF_LIBPROTOBUF})
   if(ORC_SOURCE STREQUAL "SYSTEM")
     list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::liborc
-                ${ARROW_PROTOBUF_LIBPROTOBUF})
+         ${ARROW_PROTOBUF_LIBPROTOBUF})
   endif()
 endif()
 
@@ -916,8 +906,7 @@ endif()
 
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE.txt
               ${CMAKE_CURRENT_SOURCE_DIR}/../NOTICE.txt
-              ${CMAKE_CURRENT_SOURCE_DIR}/README.md
-        DESTINATION "${ARROW_DOC_DIR}")
+              ${CMAKE_CURRENT_SOURCE_DIR}/README.md DESTINATION "${ARROW_DOC_DIR}")
 
 #
 # Validate and print out Arrow configuration options
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 2fd897b5d1d..cd8290d1bbb 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -62,17 +62,16 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
   if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
     set(AUG_LIB_NAME "${LIB_NAME}_static")
     add_library(${AUG_LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${AUG_LIB_NAME}
-                          PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                     "${ARG_STATIC_LIB}")
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     message(STATUS "Added static library dependency ${AUG_LIB_NAME}: ${ARG_STATIC_LIB}")
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
 
     set(AUG_LIB_NAME "${LIB_NAME}_shared")
@@ -80,36 +79,34 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
 
     if(WIN32)
       # Mark the ".lib" location as part of a Windows DLL
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_IMPLIB
+                                                       "${ARG_SHARED_LIB}")
     else()
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                       "${ARG_SHARED_LIB}")
     endif()
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     message(STATUS "Added shared library dependency ${AUG_LIB_NAME}: ${ARG_SHARED_LIB}")
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
   elseif(ARG_STATIC_LIB)
     set(AUG_LIB_NAME "${LIB_NAME}_static")
     add_library(${AUG_LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${AUG_LIB_NAME}
-                          PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                     "${ARG_STATIC_LIB}")
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     message(STATUS "Added static library dependency ${AUG_LIB_NAME}: ${ARG_STATIC_LIB}")
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
   elseif(ARG_SHARED_LIB)
     set(AUG_LIB_NAME "${LIB_NAME}_shared")
@@ -117,21 +114,20 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
 
     if(WIN32)
       # Mark the ".lib" location as part of a Windows DLL
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_IMPLIB
+                                                       "${ARG_SHARED_LIB}")
     else()
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION
+                                                       "${ARG_SHARED_LIB}")
     endif()
     message(STATUS "Added shared library dependency ${AUG_LIB_NAME}: ${ARG_SHARED_LIB}")
     if(ARG_DEPS)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_LINK_LIBRARIES "${ARG_DEPS}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "${ARG_DEPS}")
     endif()
     if(ARG_INCLUDE_DIRECTORIES)
-      set_target_properties(${AUG_LIB_NAME}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                       "${ARG_INCLUDE_DIRECTORIES}")
+      set_target_properties(${AUG_LIB_NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                       "${ARG_INCLUDE_DIRECTORIES}")
     endif()
   else()
     message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
@@ -159,10 +155,9 @@ function(create_merged_static_lib output_target)
     message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
   endif()
 
-  set(
-    output_lib_path
-    ${BUILD_OUTPUT_ROOT_DIRECTORY}${CMAKE_STATIC_LIBRARY_PREFIX}${ARG_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}
-    )
+  set(output_lib_path
+      ${BUILD_OUTPUT_ROOT_DIRECTORY}${CMAKE_STATIC_LIBRARY_PREFIX}${ARG_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}
+  )
 
   set(all_library_paths $<TARGET_FILE:${ARG_ROOT}>)
   foreach(lib ${ARG_TO_MERGE})
@@ -170,13 +165,8 @@ function(create_merged_static_lib output_target)
   endforeach()
 
   if(APPLE)
-    set(BUNDLE_COMMAND
-        "libtool"
-        "-no_warning_for_no_symbols"
-        "-static"
-        "-o"
-        ${output_lib_path}
-        ${all_library_paths})
+    set(BUNDLE_COMMAND "libtool" "-no_warning_for_no_symbols" "-static" "-o"
+                       ${output_lib_path} ${all_library_paths})
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel)$")
     set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar)
 
@@ -188,7 +178,9 @@ function(create_merged_static_lib output_target)
     endforeach()
 
     file(APPEND ${ar_script_path}.in "SAVE\nEND\n")
-    file(GENERATE OUTPUT ${ar_script_path} INPUT ${ar_script_path}.in)
+    file(GENERATE
+         OUTPUT ${ar_script_path}
+         INPUT ${ar_script_path}.in)
     set(ar_tool ${CMAKE_AR})
 
     if(CMAKE_INTERPROCEDURAL_OPTIMIZATION)
@@ -218,9 +210,8 @@ function(create_merged_static_lib output_target)
                      COMMENT "Bundling ${output_lib_path}"
                      VERBATIM)
 
-  message(
-    STATUS "Creating bundled static library target ${output_target} at ${output_lib_path}"
-    )
+  message(STATUS "Creating bundled static library target ${output_target} at ${output_lib_path}"
+  )
 
   add_custom_target(${output_target} ALL DEPENDS ${output_lib_path})
   add_dependencies(${output_target} ${ARG_ROOT} ${ARG_TO_MERGE})
@@ -355,7 +346,9 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     # On iOS, specifying -undefined conflicts with enabling bitcode
-    if(APPLE AND NOT IOS AND NOT DEFINED ENV{EMSCRIPTEN})
+    if(APPLE
+       AND NOT IOS
+       AND NOT DEFINED ENV{EMSCRIPTEN})
       # On OS X, you can avoid linking at library load time and instead
       # expecting that the symbols have been loaded separately. This happens
       # with libpython* where there can be conflicts between system Python and
@@ -367,20 +360,13 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     set_target_properties(${LIB_NAME}_shared
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY
-                                     "${OUTPUT_PATH}"
-                                     RUNTIME_OUTPUT_DIRECTORY
-                                     "${OUTPUT_PATH}"
-                                     PDB_OUTPUT_DIRECTORY
-                                     "${OUTPUT_PATH}"
-                                     LINK_FLAGS
-                                     "${ARG_SHARED_LINK_FLAGS}"
-                                     OUTPUT_NAME
-                                     ${LIB_NAME}
-                                     VERSION
-                                     "${ARROW_FULL_SO_VERSION}"
-                                     SOVERSION
-                                     "${ARROW_SO_VERSION}")
+                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     RUNTIME_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     PDB_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     LINK_FLAGS "${ARG_SHARED_LINK_FLAGS}"
+                                     OUTPUT_NAME ${LIB_NAME}
+                                     VERSION "${ARROW_FULL_SO_VERSION}"
+                                     SOVERSION "${ARROW_SO_VERSION}")
 
     target_link_libraries(${LIB_NAME}_shared
                           LINK_PUBLIC
@@ -395,8 +381,8 @@ function(ADD_ARROW_LIB LIB_NAME)
       else()
         set(_lib_install_rpath "\$ORIGIN")
       endif()
-      set_target_properties(${LIB_NAME}_shared
-                            PROPERTIES INSTALL_RPATH ${_lib_install_rpath})
+      set_target_properties(${LIB_NAME}_shared PROPERTIES INSTALL_RPATH
+                                                          ${_lib_install_rpath})
     endif()
 
     if(APPLE)
@@ -407,7 +393,7 @@ function(ADD_ARROW_LIB LIB_NAME)
       endif()
       set_target_properties(${LIB_NAME}_shared
                             PROPERTIES BUILD_WITH_INSTALL_RPATH ON INSTALL_NAME_DIR
-                                       "${_lib_install_name}")
+                                                                   "${_lib_install_name}")
     endif()
 
     install(TARGETS ${LIB_NAME}_shared ${INSTALL_IS_OPTIONAL}
@@ -415,7 +401,8 @@ function(ADD_ARROW_LIB LIB_NAME)
             RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR}
             LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+            INCLUDES
+            DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   endif()
 
   if(BUILD_STATIC)
@@ -451,8 +438,8 @@ function(ADD_ARROW_LIB LIB_NAME)
     endif()
 
     set_target_properties(${LIB_NAME}_static
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}" OUTPUT_NAME
-                                     ${LIB_NAME_STATIC})
+                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}"
+                                     OUTPUT_NAME ${LIB_NAME_STATIC})
 
     if(ARG_STATIC_INSTALL_INTERFACE_LIBS)
       target_link_libraries(${LIB_NAME}_static LINK_PUBLIC
@@ -469,7 +456,8 @@ function(ADD_ARROW_LIB LIB_NAME)
             RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR}
             LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+            INCLUDES
+            DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   endif()
 
   if(ARG_CMAKE_PACKAGE_NAME)
@@ -488,9 +476,10 @@ function(ADD_ARROW_LIB LIB_NAME)
 
     set(CONFIG_VERSION_CMAKE "${ARG_CMAKE_PACKAGE_NAME}ConfigVersion.cmake")
     set(BUILT_CONFIG_VERSION_CMAKE "${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_VERSION_CMAKE}")
-    write_basic_package_version_file("${BUILT_CONFIG_VERSION_CMAKE}"
-                                     VERSION ${${PROJECT_NAME}_VERSION}
-                                     COMPATIBILITY AnyNewerVersion)
+    write_basic_package_version_file(
+      "${BUILT_CONFIG_VERSION_CMAKE}"
+      VERSION ${${PROJECT_NAME}_VERSION}
+      COMPATIBILITY AnyNewerVersion)
     install(FILES "${BUILT_CONFIG_VERSION_CMAKE}"
             DESTINATION "${ARROW_CMAKE_INSTALL_DIR}")
   endif()
@@ -501,7 +490,9 @@ function(ADD_ARROW_LIB LIB_NAME)
 
   # Modify variable in calling scope
   if(ARG_OUTPUTS)
-    set(${ARG_OUTPUTS} ${${ARG_OUTPUTS}} PARENT_SCOPE)
+    set(${ARG_OUTPUTS}
+        ${${ARG_OUTPUTS}}
+        PARENT_SCOPE)
   endif()
 endfunction()
 
@@ -589,10 +580,8 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
   # installed there.
   if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "" AND APPLE)
     set_target_properties(${BENCHMARK_NAME}
-                          PROPERTIES BUILD_WITH_INSTALL_RPATH
-                                     TRUE
-                                     INSTALL_RPATH_USE_LINK_PATH
-                                     TRUE
+                          PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+                                     INSTALL_RPATH_USE_LINK_PATH TRUE
                                      INSTALL_RPATH
                                      "$ENV{CONDA_PREFIX}/lib;${EXECUTABLE_OUTPUT_PATH}")
   endif()
@@ -619,7 +608,9 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
            benchmark
            ${BENCHMARK_PATH}
            ${NO_COLOR})
-  set_property(TEST ${BENCHMARK_NAME} APPEND PROPERTY LABELS ${ARG_LABELS})
+  set_property(TEST ${BENCHMARK_NAME}
+               APPEND
+               PROPERTY LABELS ${ARG_LABELS})
 endfunction()
 
 #
@@ -699,10 +690,8 @@ function(ADD_TEST_CASE REL_TEST_NAME)
   # installed there.
   if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "" AND APPLE)
     set_target_properties(${TEST_NAME}
-                          PROPERTIES BUILD_WITH_INSTALL_RPATH
-                                     TRUE
-                                     INSTALL_RPATH_USE_LINK_PATH
-                                     TRUE
+                          PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+                                     INSTALL_RPATH_USE_LINK_PATH TRUE
                                      INSTALL_RPATH
                                      "${EXECUTABLE_OUTPUT_PATH};$ENV{CONDA_PREFIX}/lib")
   endif()
@@ -735,9 +724,10 @@ function(ADD_TEST_CASE REL_TEST_NAME)
   endif()
 
   if(ARROW_TEST_MEMCHECK AND NOT ARG_NO_VALGRIND)
-    add_test(
-      ${TEST_NAME} bash -c
-      "cd '${CMAKE_SOURCE_DIR}'; \
+    add_test(${TEST_NAME}
+             bash
+             -c
+             "cd '${CMAKE_SOURCE_DIR}'; \
                valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \
                  --num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \
                  --error-exitcode=1 ${TEST_PATH}")
@@ -773,17 +763,16 @@ function(ADD_TEST_CASE REL_TEST_NAME)
     set(LABEL_TEST_NAME "test-${LABEL}")
     if(NOT TARGET ${LABEL_TEST_NAME})
       add_custom_target(${LABEL_TEST_NAME}
-                        ctest
-                        -L
-                        "${LABEL}"
-                        --output-on-failure
+                        ctest -L "${LABEL}" --output-on-failure
                         USES_TERMINAL)
     endif()
     # ensure the test is (re)built before the LABEL test runs
     add_dependencies(${LABEL_TEST_NAME} ${TEST_NAME})
   endforeach()
 
-  set_property(TEST ${TEST_NAME} APPEND PROPERTY LABELS ${LABELS})
+  set_property(TEST ${TEST_NAME}
+               APPEND
+               PROPERTY LABELS ${LABELS})
 endfunction()
 
 #
@@ -896,8 +885,8 @@ function(ADD_FUZZ_TARGET REL_FUZZING_NAME)
   add_executable(${FUZZING_NAME} "${REL_FUZZING_NAME}.cc")
   target_link_libraries(${FUZZING_NAME} ${LINK_LIBS})
   target_compile_options(${FUZZING_NAME} PRIVATE ${FUZZ_LDFLAGS})
-  set_target_properties(${FUZZING_NAME}
-                        PROPERTIES LINK_FLAGS ${FUZZ_LDFLAGS} LABELS "fuzzing")
+  set_target_properties(${FUZZING_NAME} PROPERTIES LINK_FLAGS ${FUZZ_LDFLAGS} LABELS
+                                                                              "fuzzing")
 endfunction()
 
 function(ARROW_INSTALL_ALL_HEADERS PATH)
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 033076ebdb0..e2a85a4aa55 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -33,7 +33,9 @@ endfunction()
 
 function(list_join lst glue out)
   if("${${lst}}" STREQUAL "")
-    set(${out} "" PARENT_SCOPE)
+    set(${out}
+        ""
+        PARENT_SCOPE)
     return()
   endif()
 
@@ -42,7 +44,9 @@ function(list_join lst glue out)
   foreach(item ${${lst}})
     set(joined "${joined}${glue}${item}")
   endforeach()
-  set(${out} ${joined} PARENT_SCOPE)
+  set(${out}
+      ${joined}
+      PARENT_SCOPE)
 endfunction()
 
 macro(define_option name description default)
@@ -61,7 +65,9 @@ macro(define_option_string name description default)
   check_description_length(${name} ${description})
   list_join(description "\n" multiline_description)
 
-  set(${name} ${default} CACHE STRING "${multiline_description}")
+  set(${name}
+      ${default}
+      CACHE STRING "${multiline_description}")
 
   list(APPEND "ARROW_${ARROW_OPTION_CATEGORY}_OPTION_NAMES" ${name})
   set("${name}_OPTION_DESCRIPTION" ${description})
@@ -181,8 +187,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
 
   define_option(ARROW_ONLY_LINT "Only define the lint and check-format targets" OFF)
 
-  define_option(ARROW_VERBOSE_LINT "If off, 'quiet' flags will be passed to linting tools"
-                OFF)
+  define_option(ARROW_VERBOSE_LINT
+                "If off, 'quiet' flags will be passed to linting tools" OFF)
 
   define_option(ARROW_GENERATE_COVERAGE "Build with C++ code coverage enabled" OFF)
 
@@ -320,7 +326,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   define_option(ARROW_LZ4_USE_SHARED "Rely on lz4 shared libraries where relevant"
                 ${ARROW_DEPENDENCY_USE_SHARED})
 
-  define_option(ARROW_OPENSSL_USE_SHARED "Rely on OpenSSL shared libraries where relevant"
+  define_option(ARROW_OPENSSL_USE_SHARED
+                "Rely on OpenSSL shared libraries where relevant"
                 ${ARROW_DEPENDENCY_USE_SHARED})
 
   define_option(ARROW_PROTOBUF_USE_SHARED
@@ -363,14 +370,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   define_option(ARROW_WITH_ZLIB "Build with zlib compression" OFF)
   define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF)
 
-  define_option(
-    ARROW_WITH_UTF8PROC
-    "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON or ARROW_GANDIVA is ON)"
-    ON)
-  define_option(
-    ARROW_WITH_RE2
-    "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)"
-    ON)
+  define_option(ARROW_WITH_UTF8PROC
+                "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON or ARROW_GANDIVA is ON)"
+                ON)
+  define_option(ARROW_WITH_RE2
+                "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)"
+                ON)
 
   #----------------------------------------------------------------------
   if(MSVC_TOOLCHAIN)
@@ -416,9 +421,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
                 "Depend only on Thirdparty headers to build libparquet.;\
 Always OFF if building binaries" OFF)
 
-  define_option(
-    PARQUET_BUILD_EXECUTABLES
-    "Build the Parquet executable CLI tools. Requires static libraries to be built." OFF)
+  define_option(PARQUET_BUILD_EXECUTABLES
+                "Build the Parquet executable CLI tools. Requires static libraries to be built."
+                OFF)
 
   define_option(PARQUET_BUILD_EXAMPLES
                 "Build the Parquet examples. Requires static libraries to be built." OFF)
@@ -432,10 +437,9 @@ Always OFF if building binaries" OFF)
   define_option(ARROW_GANDIVA_JAVA "Build the Gandiva JNI wrappers" OFF)
 
   # ARROW-3860: Temporary workaround
-  define_option(
-    ARROW_GANDIVA_STATIC_LIBSTDCPP
-    "Include -static-libstdc++ -static-libgcc when linking with;Gandiva static libraries"
-    OFF)
+  define_option(ARROW_GANDIVA_STATIC_LIBSTDCPP
+                "Include -static-libstdc++ -static-libgcc when linking with;Gandiva static libraries"
+                OFF)
 
   define_option_string(ARROW_GANDIVA_PC_CXX_FLAGS
                        "Compiler flags to append when pre-compiling Gandiva operations"
@@ -450,7 +454,8 @@ Always OFF if building binaries" OFF)
   define_option(ARROW_OPTIONAL_INSTALL
                 "If enabled install ONLY targets that have already been built. Please be;\
 advised that if this is enabled 'install' will fail silently on components;\
-that have not been built" OFF)
+that have not been built"
+                OFF)
 
   option(ARROW_BUILD_CONFIG_SUMMARY_JSON "Summarize build configuration in a JSON file"
          ON)
@@ -465,9 +470,8 @@ macro(validate_config)
       set(value "${${name}}")
       if(possible_values)
         if(NOT "${value}" IN_LIST possible_values)
-          message(
-            FATAL_ERROR "Configuration option ${name} got invalid value '${value}'. "
-                        "Allowed values: ${${name}_OPTION_ENUM}.")
+          message(FATAL_ERROR "Configuration option ${name} got invalid value '${value}'. "
+                              "Allowed values: ${${name}_OPTION_ENUM}.")
         endif()
       endif()
     endforeach()
@@ -486,8 +490,8 @@ macro(config_summary_message)
   message(STATUS "  Source directory: ${CMAKE_CURRENT_SOURCE_DIR}")
   message(STATUS "  Install prefix: ${CMAKE_INSTALL_PREFIX}")
   if(${CMAKE_EXPORT_COMPILE_COMMANDS})
-    message(
-      STATUS "  Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json")
+    message(STATUS "  Compile commands: ${CMAKE_CURRENT_BINARY_DIR}/compile_commands.json"
+    )
   endif()
 
   foreach(category ${ARROW_OPTION_CATEGORIES})
diff --git a/cpp/cmake_modules/FindArrow.cmake b/cpp/cmake_modules/FindArrow.cmake
index 9c987665896..68024cc2760 100644
--- a/cpp/cmake_modules/FindArrow.cmake
+++ b/cpp/cmake_modules/FindArrow.cmake
@@ -50,11 +50,12 @@ set(ARROW_SEARCH_LIB_PATH_SUFFIXES)
 if(CMAKE_LIBRARY_ARCHITECTURE)
   list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
 endif()
-list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES
-            "lib64"
-            "lib32"
-            "lib"
-            "bin")
+list(APPEND
+     ARROW_SEARCH_LIB_PATH_SUFFIXES
+     "lib64"
+     "lib32"
+     "lib"
+     "bin")
 set(ARROW_CONFIG_SUFFIXES
     "_RELEASE"
     "_RELWITHDEBINFO"
@@ -120,10 +121,9 @@ endfunction()
 #   # -> ARROW_STATIC_LIBRARY_NAME=arrow.lib with MSVC on Windows
 #   # -> ARROW_STATIC_LIBRARY_NAME=libarrow.dll.a with MinGW on Windows
 function(arrow_build_static_library_name output_variable base_name)
-  set(
-    ${output_variable}
-    "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    PARENT_SCOPE)
+  set(${output_variable}
+      "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+      PARENT_SCOPE)
 endfunction()
 
 # Internal function.
@@ -138,9 +138,11 @@ endfunction()
 function(arrow_extract_macro_value output_variable macro_name header_content)
   string(REGEX MATCH "#define +${macro_name} +[^\r\n]+" macro_definition
                "${header_content}")
-  string(REGEX
-         REPLACE "^#define +${macro_name} +(.+)$" "\\1" macro_value "${macro_definition}")
-  set(${output_variable} "${macro_value}" PARENT_SCOPE)
+  string(REGEX REPLACE "^#define +${macro_name} +(.+)$" "\\1" macro_value
+                       "${macro_definition}")
+  set(${output_variable}
+      "${macro_value}"
+      PARENT_SCOPE)
 endfunction()
 
 # Internal macro only for arrow_find_package.
@@ -152,7 +154,9 @@ macro(arrow_find_package_home)
             PATH_SUFFIXES "include"
             NO_DEFAULT_PATH)
   set(include_dir "${${prefix}_include_dir}")
-  set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE)
+  set(${prefix}_INCLUDE_DIR
+      "${include_dir}"
+      PARENT_SCOPE)
 
   if(MSVC_TOOLCHAIN)
     set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES})
@@ -169,13 +173,15 @@ macro(arrow_find_package_home)
     set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL})
   endif()
   set(shared_lib "${${prefix}_shared_lib}")
-  set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE)
+  set(${prefix}_SHARED_LIB
+      "${shared_lib}"
+      PARENT_SCOPE)
   if(shared_lib)
     add_library(${target_shared} SHARED IMPORTED)
     set_target_properties(${target_shared} PROPERTIES IMPORTED_LOCATION "${shared_lib}")
     if(include_dir)
-      set_target_properties(${target_shared}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}")
+      set_target_properties(${target_shared} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                        "${include_dir}")
     endif()
     find_library(${prefix}_import_lib
                  NAMES "${import_lib_name}"
@@ -183,7 +189,9 @@ macro(arrow_find_package_home)
                  PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
                  NO_DEFAULT_PATH)
     set(import_lib "${${prefix}_import_lib}")
-    set(${prefix}_IMPORT_LIB "${import_lib}" PARENT_SCOPE)
+    set(${prefix}_IMPORT_LIB
+        "${import_lib}"
+        PARENT_SCOPE)
     if(import_lib)
       set_target_properties(${target_shared} PROPERTIES IMPORTED_IMPLIB "${import_lib}")
     endif()
@@ -195,13 +203,15 @@ macro(arrow_find_package_home)
                PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
                NO_DEFAULT_PATH)
   set(static_lib "${${prefix}_static_lib}")
-  set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE)
+  set(${prefix}_STATIC_LIB
+      "${static_lib}"
+      PARENT_SCOPE)
   if(static_lib)
     add_library(${target_static} STATIC IMPORTED)
     set_target_properties(${target_static} PROPERTIES IMPORTED_LOCATION "${static_lib}")
     if(include_dir)
-      set_target_properties(${target_static}
-                            PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}")
+      set_target_properties(${target_static} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                        "${include_dir}")
     endif()
   endif()
 endmacro()
@@ -212,7 +222,9 @@ endmacro()
 macro(arrow_find_package_cmake_package_configuration)
   find_package(${cmake_package_name} CONFIG)
   if(${cmake_package_name}_FOUND)
-    set(${prefix}_USE_CMAKE_PACKAGE_CONFIG TRUE PARENT_SCOPE)
+    set(${prefix}_USE_CMAKE_PACKAGE_CONFIG
+        TRUE
+        PARENT_SCOPE)
     if(TARGET ${target_shared})
       foreach(suffix ${ARROW_CONFIG_SUFFIXES})
         get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION${suffix})
@@ -221,10 +233,11 @@ macro(arrow_find_package_cmake_package_configuration)
           #   libarrow.so.100.0.0 -> libarrow.so
           # Because ARROW_HOME and pkg-config approaches don't add
           # shared library version.
-          string(REGEX
-                 REPLACE "(${CMAKE_SHARED_LIBRARY_SUFFIX})[.0-9]+$" "\\1" shared_lib
-                         "${shared_lib}")
-          set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE)
+          string(REGEX REPLACE "(${CMAKE_SHARED_LIBRARY_SUFFIX})[.0-9]+$" "\\1"
+                               shared_lib "${shared_lib}")
+          set(${prefix}_SHARED_LIB
+              "${shared_lib}"
+              PARENT_SCOPE)
           break()
         endif()
       endforeach()
@@ -233,7 +246,9 @@ macro(arrow_find_package_cmake_package_configuration)
       foreach(suffix ${ARROW_CONFIG_SUFFIXES})
         get_target_property(static_lib ${target_static} IMPORTED_LOCATION${suffix})
         if(static_lib)
-          set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE)
+          set(${prefix}_STATIC_LIB
+              "${static_lib}"
+              PARENT_SCOPE)
           break()
         endif()
       endforeach()
@@ -247,7 +262,9 @@ endmacro()
 macro(arrow_find_package_pkg_config)
   pkg_check_modules(${prefix}_PC ${pkg_config_name})
   if(${prefix}_PC_FOUND)
-    set(${prefix}_USE_PKG_CONFIG TRUE PARENT_SCOPE)
+    set(${prefix}_USE_PKG_CONFIG
+        TRUE
+        PARENT_SCOPE)
 
     set(include_dir "${${prefix}_PC_INCLUDEDIR}")
     set(lib_dir "${${prefix}_PC_LIBDIR}")
@@ -270,18 +287,21 @@ macro(arrow_find_package_pkg_config)
            rest_shared_lib_paths)
     endif()
 
-    set(${prefix}_VERSION "${${prefix}_PC_VERSION}" PARENT_SCOPE)
-    set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE)
-    set(${prefix}_SHARED_LIB "${first_shared_lib_path}" PARENT_SCOPE)
+    set(${prefix}_VERSION
+        "${${prefix}_PC_VERSION}"
+        PARENT_SCOPE)
+    set(${prefix}_INCLUDE_DIR
+        "${include_dir}"
+        PARENT_SCOPE)
+    set(${prefix}_SHARED_LIB
+        "${first_shared_lib_path}"
+        PARENT_SCOPE)
 
     add_library(${target_shared} SHARED IMPORTED)
     set_target_properties(${target_shared}
-                          PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                     "${include_dir}"
-                                     INTERFACE_LINK_LIBRARIES
-                                     "${rest_shared_lib_paths}"
-                                     IMPORTED_LOCATION
-                                     "${first_shared_lib_path}")
+                          PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}"
+                                     INTERFACE_LINK_LIBRARIES "${rest_shared_lib_paths}"
+                                     IMPORTED_LOCATION "${first_shared_lib_path}")
     get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION)
 
     find_library(${prefix}_static_lib
@@ -289,7 +309,9 @@ macro(arrow_find_package_pkg_config)
                  PATHS "${lib_dir}"
                  NO_DEFAULT_PATH)
     set(static_lib "${${prefix}_static_lib}")
-    set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE)
+    set(${prefix}_STATIC_LIB
+        "${static_lib}"
+        PARENT_SCOPE)
     if(static_lib)
       add_library(${target_static} STATIC IMPORTED)
       set_target_properties(${target_static}
@@ -315,7 +337,9 @@ function(arrow_find_package
 
   if(home)
     arrow_find_package_home()
-    set(${prefix}_FIND_APPROACH "HOME: ${home}" PARENT_SCOPE)
+    set(${prefix}_FIND_APPROACH
+        "HOME: ${home}"
+        PARENT_SCOPE)
   else()
     arrow_find_package_cmake_package_configuration()
     if(${cmake_package_name}_FOUND)
@@ -324,7 +348,9 @@ function(arrow_find_package
           PARENT_SCOPE)
     else()
       arrow_find_package_pkg_config()
-      set(${prefix}_FIND_APPROACH "pkg-config: ${pkg_config_name}" PARENT_SCOPE)
+      set(${prefix}_FIND_APPROACH
+          "pkg-config: ${pkg_config_name}"
+          PARENT_SCOPE)
     endif()
   endif()
 
@@ -336,7 +362,9 @@ function(arrow_find_package
     endif()
   endif()
   if(include_dir)
-    set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE)
+    set(${prefix}_INCLUDE_DIR
+        "${include_dir}"
+        PARENT_SCOPE)
   endif()
 
   if(shared_lib)
@@ -346,9 +374,13 @@ function(arrow_find_package
   else()
     set(lib_dir NOTFOUND)
   endif()
-  set(${prefix}_LIB_DIR "${lib_dir}" PARENT_SCOPE)
+  set(${prefix}_LIB_DIR
+      "${lib_dir}"
+      PARENT_SCOPE)
   # For backward compatibility
-  set(${prefix}_LIBS "${lib_dir}" PARENT_SCOPE)
+  set(${prefix}_LIBS
+      "${lib_dir}"
+      PARENT_SCOPE)
 endfunction()
 
 if(NOT "$ENV{ARROW_HOME}" STREQUAL "")
@@ -384,9 +416,8 @@ if(ARROW_HOME)
     string(REGEX REPLACE "^\"(.+)\"$" "\\1" ARROW_SO_VERSION "${ARROW_SO_VERSION_QUOTED}")
     arrow_extract_macro_value(ARROW_FULL_SO_VERSION_QUOTED "ARROW_FULL_SO_VERSION"
                               "${ARROW_CONFIG_H_CONTENT}")
-    string(REGEX
-           REPLACE "^\"(.+)\"$" "\\1" ARROW_FULL_SO_VERSION
-                   "${ARROW_FULL_SO_VERSION_QUOTED}")
+    string(REGEX REPLACE "^\"(.+)\"$" "\\1" ARROW_FULL_SO_VERSION
+                         "${ARROW_FULL_SO_VERSION_QUOTED}")
   endif()
 else()
   if(ARROW_USE_CMAKE_PACKAGE_CONFIG)
@@ -416,16 +447,13 @@ mark_as_advanced(ARROW_ABI_VERSION
                  ARROW_VERSION_MINOR
                  ARROW_VERSION_PATCH)
 
-find_package_handle_standard_args(Arrow REQUIRED_VARS
-                                  # The first required variable is shown
-                                  # in the found message. So this list is
-                                  # not sorted alphabetically.
-                                  ARROW_INCLUDE_DIR
-                                  ARROW_LIB_DIR
-                                  ARROW_FULL_SO_VERSION
-                                  ARROW_SO_VERSION
-                                  VERSION_VAR
-                                  ARROW_VERSION)
+find_package_handle_standard_args(
+  Arrow
+  REQUIRED_VARS # The first required variable is shown
+                # in the found message. So this list is
+                # not sorted alphabetically.
+                ARROW_INCLUDE_DIR ARROW_LIB_DIR ARROW_FULL_SO_VERSION ARROW_SO_VERSION
+  VERSION_VAR ARROW_VERSION)
 set(ARROW_FOUND ${Arrow_FOUND})
 
 if(Arrow_FOUND AND NOT Arrow_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowCUDA.cmake b/cpp/cmake_modules/FindArrowCUDA.cmake
index 7bc2f5b745b..014386f3012 100644
--- a/cpp/cmake_modules/FindArrowCUDA.cmake
+++ b/cpp/cmake_modules/FindArrowCUDA.cmake
@@ -74,13 +74,10 @@ mark_as_advanced(ARROW_CUDA_IMPORT_LIB
                  ARROW_CUDA_VERSION
                  ARROW_CUDA_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowCUDA
-                                  REQUIRED_VARS
-                                  ARROW_CUDA_INCLUDE_DIR
-                                  ARROW_CUDA_LIB_DIR
-                                  ARROW_CUDA_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_CUDA_VERSION)
+find_package_handle_standard_args(
+  ArrowCUDA
+  REQUIRED_VARS ARROW_CUDA_INCLUDE_DIR ARROW_CUDA_LIB_DIR ARROW_CUDA_VERSION_MATCH
+  VERSION_VAR ARROW_CUDA_VERSION)
 set(ARROW_CUDA_FOUND ${ArrowCUDA_FOUND})
 
 if(ArrowCUDA_FOUND AND NOT ArrowCUDA_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowDataset.cmake b/cpp/cmake_modules/FindArrowDataset.cmake
index d45fae6799b..fe74f247fc3 100644
--- a/cpp/cmake_modules/FindArrowDataset.cmake
+++ b/cpp/cmake_modules/FindArrowDataset.cmake
@@ -74,13 +74,11 @@ mark_as_advanced(ARROW_DATASET_IMPORT_LIB
                  ARROW_DATASET_VERSION
                  ARROW_DATASET_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowDataset
-                                  REQUIRED_VARS
-                                  ARROW_DATASET_INCLUDE_DIR
-                                  ARROW_DATASET_LIB_DIR
-                                  ARROW_DATASET_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_DATASET_VERSION)
+find_package_handle_standard_args(
+  ArrowDataset
+  REQUIRED_VARS ARROW_DATASET_INCLUDE_DIR ARROW_DATASET_LIB_DIR
+                ARROW_DATASET_VERSION_MATCH
+  VERSION_VAR ARROW_DATASET_VERSION)
 set(ARROW_DATASET_FOUND ${ArrowDataset_FOUND})
 
 if(ArrowDataset_FOUND AND NOT ArrowDataset_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowFlight.cmake b/cpp/cmake_modules/FindArrowFlight.cmake
index 344c408995c..805a4ff3803 100644
--- a/cpp/cmake_modules/FindArrowFlight.cmake
+++ b/cpp/cmake_modules/FindArrowFlight.cmake
@@ -75,13 +75,10 @@ mark_as_advanced(ARROW_FLIGHT_IMPORT_LIB
                  ARROW_FLIGHT_VERSION
                  ARROW_FLIGHT_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowFlight
-                                  REQUIRED_VARS
-                                  ARROW_FLIGHT_INCLUDE_DIR
-                                  ARROW_FLIGHT_LIB_DIR
-                                  ARROW_FLIGHT_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_FLIGHT_VERSION)
+find_package_handle_standard_args(
+  ArrowFlight
+  REQUIRED_VARS ARROW_FLIGHT_INCLUDE_DIR ARROW_FLIGHT_LIB_DIR ARROW_FLIGHT_VERSION_MATCH
+  VERSION_VAR ARROW_FLIGHT_VERSION)
 set(ARROW_FLIGHT_FOUND ${ArrowFlight_FOUND})
 
 if(ArrowFlight_FOUND AND NOT ArrowFlight_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowFlightTesting.cmake b/cpp/cmake_modules/FindArrowFlightTesting.cmake
index feb2790dfc6..c0756cf637c 100644
--- a/cpp/cmake_modules/FindArrowFlightTesting.cmake
+++ b/cpp/cmake_modules/FindArrowFlightTesting.cmake
@@ -79,25 +79,20 @@ mark_as_advanced(ARROW_FLIGHT_TESTING_IMPORT_LIB
                  ARROW_FLIGHT_TESTING_VERSION
                  ARROW_FLIGHT_TESTING_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowFlightTesting
-                                  REQUIRED_VARS
-                                  ARROW_FLIGHT_TESTING_INCLUDE_DIR
-                                  ARROW_FLIGHT_TESTING_LIB_DIR
-                                  ARROW_FLIGHT_TESTING_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_FLIGHT_TESTING_VERSION)
+find_package_handle_standard_args(
+  ArrowFlightTesting
+  REQUIRED_VARS ARROW_FLIGHT_TESTING_INCLUDE_DIR ARROW_FLIGHT_TESTING_LIB_DIR
+                ARROW_FLIGHT_TESTING_VERSION_MATCH
+  VERSION_VAR ARROW_FLIGHT_TESTING_VERSION)
 set(ARROW_FLIGHT_TESTING_FOUND ${ArrowFlightTesting_FOUND})
 
 if(ArrowFlightTesting_FOUND AND NOT ArrowFlightTesting_FIND_QUIETLY)
-  message(
-    STATUS "Found the Arrow Flight testing by ${ARROW_FLIGHT_TESTING_FIND_APPROACH}")
-  message(
-    STATUS
-      "Found the Arrow Flight testing shared library: ${ARROW_FLIGHT_TESTING_SHARED_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Flight testing import library: ${ARROW_FLIGHT_TESTING_IMPORT_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Flight testing static library: ${ARROW_FLIGHT_TESTING_STATIC_LIB}")
+  message(STATUS "Found the Arrow Flight testing by ${ARROW_FLIGHT_TESTING_FIND_APPROACH}"
+  )
+  message(STATUS "Found the Arrow Flight testing shared library: ${ARROW_FLIGHT_TESTING_SHARED_LIB}"
+  )
+  message(STATUS "Found the Arrow Flight testing import library: ${ARROW_FLIGHT_TESTING_IMPORT_LIB}"
+  )
+  message(STATUS "Found the Arrow Flight testing static library: ${ARROW_FLIGHT_TESTING_STATIC_LIB}"
+  )
 endif()
diff --git a/cpp/cmake_modules/FindArrowPython.cmake b/cpp/cmake_modules/FindArrowPython.cmake
index 3d1280dff72..b503e6a9e02 100644
--- a/cpp/cmake_modules/FindArrowPython.cmake
+++ b/cpp/cmake_modules/FindArrowPython.cmake
@@ -73,13 +73,10 @@ mark_as_advanced(ARROW_PYTHON_IMPORT_LIB
                  ARROW_PYTHON_VERSION
                  ARROW_PYTHON_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowPython
-                                  REQUIRED_VARS
-                                  ARROW_PYTHON_INCLUDE_DIR
-                                  ARROW_PYTHON_LIB_DIR
-                                  ARROW_PYTHON_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_PYTHON_VERSION)
+find_package_handle_standard_args(
+  ArrowPython
+  REQUIRED_VARS ARROW_PYTHON_INCLUDE_DIR ARROW_PYTHON_LIB_DIR ARROW_PYTHON_VERSION_MATCH
+  VERSION_VAR ARROW_PYTHON_VERSION)
 set(ARROW_PYTHON_FOUND ${ArrowPython_FOUND})
 
 if(ArrowPython_FOUND AND NOT ArrowPython_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindArrowPythonFlight.cmake b/cpp/cmake_modules/FindArrowPythonFlight.cmake
index acb22c64231..3a639928ce5 100644
--- a/cpp/cmake_modules/FindArrowPythonFlight.cmake
+++ b/cpp/cmake_modules/FindArrowPythonFlight.cmake
@@ -76,24 +76,19 @@ mark_as_advanced(ARROW_PYTHON_FLIGHT_IMPORT_LIB
                  ARROW_PYTHON_FLIGHT_VERSION
                  ARROW_PYTHON_FLIGHT_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowPythonFlight
-                                  REQUIRED_VARS
-                                  ARROW_PYTHON_FLIGHT_INCLUDE_DIR
-                                  ARROW_PYTHON_FLIGHT_LIB_DIR
-                                  ARROW_PYTHON_FLIGHT_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_PYTHON_FLIGHT_VERSION)
+find_package_handle_standard_args(
+  ArrowPythonFlight
+  REQUIRED_VARS ARROW_PYTHON_FLIGHT_INCLUDE_DIR ARROW_PYTHON_FLIGHT_LIB_DIR
+                ARROW_PYTHON_FLIGHT_VERSION_MATCH
+  VERSION_VAR ARROW_PYTHON_FLIGHT_VERSION)
 set(ARROW_PYTHON_FLIGHT_FOUND ${ArrowPythonFlight_FOUND})
 
 if(ArrowPythonFlight_FOUND AND NOT ArrowPythonFlight_FIND_QUIETLY)
   message(STATUS "Found the Arrow Python Flight by ${ARROW_PYTHON_FLIGHT_FIND_APPROACH}")
-  message(
-    STATUS
-      "Found the Arrow Python Flight shared library: ${ARROW_PYTHON_FLIGHT_SHARED_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Python Flight import library: ${ARROW_PYTHON_FLIGHT_IMPORT_LIB}")
-  message(
-    STATUS
-      "Found the Arrow Python Flight static library: ${ARROW_PYTHON_FLIGHT_STATIC_LIB}")
+  message(STATUS "Found the Arrow Python Flight shared library: ${ARROW_PYTHON_FLIGHT_SHARED_LIB}"
+  )
+  message(STATUS "Found the Arrow Python Flight import library: ${ARROW_PYTHON_FLIGHT_IMPORT_LIB}"
+  )
+  message(STATUS "Found the Arrow Python Flight static library: ${ARROW_PYTHON_FLIGHT_STATIC_LIB}"
+  )
 endif()
diff --git a/cpp/cmake_modules/FindArrowTesting.cmake b/cpp/cmake_modules/FindArrowTesting.cmake
index ed5a28cd3e4..c405003ad70 100644
--- a/cpp/cmake_modules/FindArrowTesting.cmake
+++ b/cpp/cmake_modules/FindArrowTesting.cmake
@@ -74,13 +74,11 @@ mark_as_advanced(ARROW_TESTING_IMPORT_LIB
                  ARROW_TESTING_VERSION
                  ARROW_TESTING_VERSION_MATCH)
 
-find_package_handle_standard_args(ArrowTesting
-                                  REQUIRED_VARS
-                                  ARROW_TESTING_INCLUDE_DIR
-                                  ARROW_TESTING_LIB_DIR
-                                  ARROW_TESTING_VERSION_MATCH
-                                  VERSION_VAR
-                                  ARROW_TESTING_VERSION)
+find_package_handle_standard_args(
+  ArrowTesting
+  REQUIRED_VARS ARROW_TESTING_INCLUDE_DIR ARROW_TESTING_LIB_DIR
+                ARROW_TESTING_VERSION_MATCH
+  VERSION_VAR ARROW_TESTING_VERSION)
 set(ARROW_TESTING_FOUND ${ArrowTesting_FOUND})
 
 if(ArrowTesting_FOUND AND NOT ArrowTesting_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindBrotli.cmake b/cpp/cmake_modules/FindBrotli.cmake
index b46a0f1a0cf..e2670b51a9e 100644
--- a/cpp/cmake_modules/FindBrotli.cmake
+++ b/cpp/cmake_modules/FindBrotli.cmake
@@ -110,12 +110,9 @@ else()
   endif()
 endif()
 
-find_package_handle_standard_args(Brotli
-                                  REQUIRED_VARS
-                                  BROTLI_COMMON_LIBRARY
-                                  BROTLI_ENC_LIBRARY
-                                  BROTLI_DEC_LIBRARY
-                                  BROTLI_INCLUDE_DIR)
+find_package_handle_standard_args(
+  Brotli REQUIRED_VARS BROTLI_COMMON_LIBRARY BROTLI_ENC_LIBRARY BROTLI_DEC_LIBRARY
+                       BROTLI_INCLUDE_DIR)
 if(Brotli_FOUND OR BROTLI_FOUND)
   set(Brotli_FOUND TRUE)
   add_library(Brotli::brotlicommon UNKNOWN IMPORTED)
diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake
index 88171abed92..52fc59895b8 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -69,15 +69,18 @@ function(FIND_CLANG_TOOL NAME OUTPUT VERSION_CHECK_PATTERN)
     endif()
   endif()
   if(CLANG_TOOL_BIN)
-    set(${OUTPUT} ${CLANG_TOOL_BIN} PARENT_SCOPE)
+    set(${OUTPUT}
+        ${CLANG_TOOL_BIN}
+        PARENT_SCOPE)
   else()
-    set(${OUTPUT} "${OUTPUT}-NOTFOUND" PARENT_SCOPE)
+    set(${OUTPUT}
+        "${OUTPUT}-NOTFOUND"
+        PARENT_SCOPE)
   endif()
 endfunction()
 
-string(REGEX
-       REPLACE "\\." "\\\\." ARROW_CLANG_TOOLS_VERSION_ESCAPED
-               "${ARROW_CLANG_TOOLS_VERSION}")
+string(REGEX REPLACE "\\." "\\\\." ARROW_CLANG_TOOLS_VERSION_ESCAPED
+                     "${ARROW_CLANG_TOOLS_VERSION}")
 
 find_clang_tool(clang-tidy CLANG_TIDY_BIN
                 "LLVM version ${ARROW_CLANG_TOOLS_VERSION_ESCAPED}")
@@ -100,4 +103,4 @@ else()
 endif()
 
 find_package_handle_standard_args(ClangTools REQUIRED_VARS CLANG_FORMAT_BIN
-                                  CLANG_TIDY_BIN)
+                                                           CLANG_TIDY_BIN)
diff --git a/cpp/cmake_modules/FindGLOG.cmake b/cpp/cmake_modules/FindGLOG.cmake
index 81c3f2ec57e..d67eb005621 100644
--- a/cpp/cmake_modules/FindGLOG.cmake
+++ b/cpp/cmake_modules/FindGLOG.cmake
@@ -38,7 +38,9 @@ elseif(GLOG_ROOT)
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 else()
-  find_library(GLOG_LIB NAMES glog PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_library(GLOG_LIB
+               NAMES glog
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
   find_path(GLOG_INCLUDE_DIR
             NAMES glog/logging.h
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
diff --git a/cpp/cmake_modules/FindGandiva.cmake b/cpp/cmake_modules/FindGandiva.cmake
index 15279fd841a..c533abed733 100644
--- a/cpp/cmake_modules/FindGandiva.cmake
+++ b/cpp/cmake_modules/FindGandiva.cmake
@@ -79,14 +79,11 @@ mark_as_advanced(GANDIVA_ABI_VERSION
                  GANDIVA_VERSION
                  GANDIVA_VERSION_MATCH)
 
-find_package_handle_standard_args(Gandiva
-                                  REQUIRED_VARS
-                                  GANDIVA_INCLUDE_DIR
-                                  GANDIVA_LIB_DIR
-                                  GANDIVA_SO_VERSION
-                                  GANDIVA_VERSION_MATCH
-                                  VERSION_VAR
-                                  GANDIVA_VERSION)
+find_package_handle_standard_args(
+  Gandiva
+  REQUIRED_VARS GANDIVA_INCLUDE_DIR GANDIVA_LIB_DIR GANDIVA_SO_VERSION
+                GANDIVA_VERSION_MATCH
+  VERSION_VAR GANDIVA_VERSION)
 set(GANDIVA_FOUND ${Gandiva_FOUND})
 
 if(Gandiva_FOUND AND NOT Gandiva_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake
index 7695c09ae8c..380f2d47c72 100644
--- a/cpp/cmake_modules/FindLLVMAlt.cmake
+++ b/cpp/cmake_modules/FindLLVMAlt.cmake
@@ -58,22 +58,17 @@ if(LLVM_FOUND)
   add_library(LLVM::LLVM_INTERFACE INTERFACE IMPORTED)
 
   set_target_properties(LLVM::LLVM_INTERFACE
-                        PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                                   "${LLVM_INCLUDE_DIRS}"
-                                   INTERFACE_COMPILE_FLAGS
-                                   "${LLVM_DEFINITIONS}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${LLVM_LIBS}")
+                        PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LLVM_INCLUDE_DIRS}"
+                                   INTERFACE_COMPILE_FLAGS "${LLVM_DEFINITIONS}"
+                                   INTERFACE_LINK_LIBRARIES "${LLVM_LIBS}")
 endif()
 
 mark_as_advanced(CLANG_EXECUTABLE LLVM_LINK_EXECUTABLE)
 
-find_package_handle_standard_args(LLVMAlt
-                                  REQUIRED_VARS # The first variable is used for display.
-                                  LLVM_PACKAGE_VERSION
-                                  CLANG_EXECUTABLE
-                                  LLVM_FOUND
-                                  LLVM_LINK_EXECUTABLE)
+find_package_handle_standard_args(
+  LLVMAlt
+  REQUIRED_VARS # The first variable is used for display.
+                LLVM_PACKAGE_VERSION CLANG_EXECUTABLE LLVM_FOUND LLVM_LINK_EXECUTABLE)
 if(LLVMAlt_FOUND)
   message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
   message(STATUS "Found llvm-link ${LLVM_LINK_EXECUTABLE}")
diff --git a/cpp/cmake_modules/FindLz4.cmake b/cpp/cmake_modules/FindLz4.cmake
index 14b6d93b983..bc8051fe9c5 100644
--- a/cpp/cmake_modules/FindLz4.cmake
+++ b/cpp/cmake_modules/FindLz4.cmake
@@ -23,16 +23,13 @@ set(LZ4_LIB_NAME_BASE "${LZ4_MSVC_LIB_PREFIX}lz4")
 if(ARROW_LZ4_USE_SHARED)
   set(LZ4_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(
-      APPEND
-        LZ4_LIB_NAMES
-        "${CMAKE_IMPORT_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
+    list(APPEND
+         LZ4_LIB_NAMES
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
+    )
   endif()
-  list(
-    APPEND
-      LZ4_LIB_NAMES
-      "${CMAKE_SHARED_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  list(APPEND LZ4_LIB_NAMES
+       "${CMAKE_SHARED_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}")
 else()
   if(MSVC AND NOT DEFINED LZ4_MSVC_STATIC_LIB_SUFFIX)
     set(LZ4_MSVC_STATIC_LIB_SUFFIX "_static")
@@ -70,7 +67,9 @@ else()
     find_library(LZ4_LIB
                  NAMES ${LZ4_LIB_NAMES}
                  PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_path(LZ4_INCLUDE_DIR NAMES lz4.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+    find_path(LZ4_INCLUDE_DIR
+              NAMES lz4.h
+              PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
   endif()
 endif()
 
diff --git a/cpp/cmake_modules/FindORC.cmake b/cpp/cmake_modules/FindORC.cmake
index 061a0df2e9e..d45b1607833 100644
--- a/cpp/cmake_modules/FindORC.cmake
+++ b/cpp/cmake_modules/FindORC.cmake
@@ -33,7 +33,9 @@ if(ORC_ROOT)
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 else()
-  find_library(ORC_STATIC_LIB NAMES orc PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_library(ORC_STATIC_LIB
+               NAMES orc
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
   find_path(ORC_INCLUDE_DIR
             NAMES orc/orc-config.hh
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
diff --git a/cpp/cmake_modules/FindParquet.cmake b/cpp/cmake_modules/FindParquet.cmake
index 99124b2c037..e071fc822b6 100644
--- a/cpp/cmake_modules/FindParquet.cmake
+++ b/cpp/cmake_modules/FindParquet.cmake
@@ -83,13 +83,12 @@ if(ARROW_FOUND)
 
       arrow_extract_macro_value(PARQUET_SO_VERSION_QUOTED "PARQUET_SO_VERSION"
                                 "${PARQUET_VERSION_H_CONTENT}")
-      string(REGEX
-             REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION "${PARQUET_SO_VERSION_QUOTED}")
+      string(REGEX REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION
+                           "${PARQUET_SO_VERSION_QUOTED}")
       arrow_extract_macro_value(PARQUET_FULL_SO_VERSION_QUOTED "PARQUET_FULL_SO_VERSION"
                                 "${PARQUET_VERSION_H_CONTENT}")
-      string(REGEX
-             REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION
-                     "${PARQUET_FULL_SO_VERSION_QUOTED}")
+      string(REGEX REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION
+                           "${PARQUET_FULL_SO_VERSION_QUOTED}")
     endif()
   else()
     if(PARQUET_USE_CMAKE_PACKAGE_CONFIG)
@@ -113,13 +112,10 @@ mark_as_advanced(PARQUET_ABI_VERSION
                  PARQUET_STATIC_LIB
                  PARQUET_VERSION)
 
-find_package_handle_standard_args(Parquet
-                                  REQUIRED_VARS
-                                  PARQUET_INCLUDE_DIR
-                                  PARQUET_LIB_DIR
-                                  PARQUET_SO_VERSION
-                                  VERSION_VAR
-                                  PARQUET_VERSION)
+find_package_handle_standard_args(
+  Parquet
+  REQUIRED_VARS PARQUET_INCLUDE_DIR PARQUET_LIB_DIR PARQUET_SO_VERSION
+  VERSION_VAR PARQUET_VERSION)
 set(PARQUET_FOUND ${Parquet_FOUND})
 
 if(Parquet_FOUND AND NOT Parquet_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindPlasma.cmake b/cpp/cmake_modules/FindPlasma.cmake
index d56b7141320..2e634844c59 100644
--- a/cpp/cmake_modules/FindPlasma.cmake
+++ b/cpp/cmake_modules/FindPlasma.cmake
@@ -87,14 +87,10 @@ mark_as_advanced(PLASMA_ABI_VERSION
                  PLASMA_STORE_SERVER
                  PLASMA_VERSION)
 
-find_package_handle_standard_args(Plasma
-                                  REQUIRED_VARS
-                                  PLASMA_INCLUDE_DIR
-                                  PLASMA_LIB_DIR
-                                  PLASMA_SO_VERSION
-                                  PLASMA_STORE_SERVER
-                                  VERSION_VAR
-                                  PLASMA_VERSION)
+find_package_handle_standard_args(
+  Plasma
+  REQUIRED_VARS PLASMA_INCLUDE_DIR PLASMA_LIB_DIR PLASMA_SO_VERSION PLASMA_STORE_SERVER
+  VERSION_VAR PLASMA_VERSION)
 set(PLASMA_FOUND ${Plasma_FOUND})
 
 if(Plasma_FOUND AND NOT Plasma_FIND_QUIETLY)
diff --git a/cpp/cmake_modules/FindPython3Alt.cmake b/cpp/cmake_modules/FindPython3Alt.cmake
index 131a0d395fc..ab91c7be052 100644
--- a/cpp/cmake_modules/FindPython3Alt.cmake
+++ b/cpp/cmake_modules/FindPython3Alt.cmake
@@ -33,11 +33,8 @@ if(${CMAKE_VERSION} VERSION_LESS "3.15.0")
     find_package(PythonLibsNew)
     find_package(NumPy)
   endif()
-  find_package_handle_standard_args(Python3Alt
-                                    REQUIRED_VARS
-                                    PYTHON_EXECUTABLE
-                                    PYTHON_INCLUDE_DIRS
-                                    NUMPY_INCLUDE_DIRS)
+  find_package_handle_standard_args(
+    Python3Alt REQUIRED_VARS PYTHON_EXECUTABLE PYTHON_INCLUDE_DIRS NUMPY_INCLUDE_DIRS)
   return()
 endif()
 
@@ -46,13 +43,17 @@ if(${CMAKE_VERSION} VERSION_LESS "3.18.0" OR ARROW_BUILD_TESTS)
   # the full "Development" component.  Also ask for it on CMake < 3.18,
   # where "Development.Module" is not available.
   if(Python3Alt_FIND_REQUIRED)
-    find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED)
+    find_package(Python3
+                 COMPONENTS Interpreter Development NumPy
+                 REQUIRED)
   else()
     find_package(Python3 COMPONENTS Interpreter Development NumPy)
   endif()
 else()
   if(Python3Alt_FIND_REQUIRED)
-    find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
+    find_package(Python3
+                 COMPONENTS Interpreter Development.Module NumPy
+                 REQUIRED)
   else()
     find_package(Python3 COMPONENTS Interpreter Development.Module NumPy)
   endif()
@@ -72,12 +73,11 @@ get_target_property(NUMPY_INCLUDE_DIRS Python3::NumPy INTERFACE_INCLUDE_DIRECTOR
 # CMake's python3_add_library() doesn't apply the required extension suffix,
 # detect it ourselves.
 # (https://gitlab.kitware.com/cmake/cmake/issues/20408)
-execute_process(
-  COMMAND "${PYTHON_EXECUTABLE}" "-c"
-          "from distutils import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
-  RESULT_VARIABLE _PYTHON_RESULT
-  OUTPUT_VARIABLE _PYTHON_STDOUT
-  ERROR_VARIABLE _PYTHON_STDERR)
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+                        "from distutils import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
+                RESULT_VARIABLE _PYTHON_RESULT
+                OUTPUT_VARIABLE _PYTHON_STDOUT
+                ERROR_VARIABLE _PYTHON_STDERR)
 
 if(NOT _PYTHON_RESULT MATCHES 0)
   if(Python3Alt_FIND_REQUIRED)
@@ -92,8 +92,5 @@ function(PYTHON_ADD_MODULE name)
   set_target_properties(${name} PROPERTIES SUFFIX ${_EXT_SUFFIX})
 endfunction()
 
-find_package_handle_standard_args(Python3Alt
-                                  REQUIRED_VARS
-                                  PYTHON_EXECUTABLE
-                                  PYTHON_INCLUDE_DIRS
-                                  NUMPY_INCLUDE_DIRS)
+find_package_handle_standard_args(
+  Python3Alt REQUIRED_VARS PYTHON_EXECUTABLE PYTHON_INCLUDE_DIRS NUMPY_INCLUDE_DIRS)
diff --git a/cpp/cmake_modules/FindRapidJSONAlt.cmake b/cpp/cmake_modules/FindRapidJSONAlt.cmake
index a967ef61a66..9a449a5280e 100644
--- a/cpp/cmake_modules/FindRapidJSONAlt.cmake
+++ b/cpp/cmake_modules/FindRapidJSONAlt.cmake
@@ -36,39 +36,37 @@ if(RapidJSON_ROOT)
             NO_DEFAULT_PATH
             PATH_SUFFIXES "include")
 else()
-  find_path(RAPIDJSON_INCLUDE_DIR NAMES rapidjson/rapidjson.h PATH_SUFFIXES "include")
+  find_path(RAPIDJSON_INCLUDE_DIR
+            NAMES rapidjson/rapidjson.h
+            PATH_SUFFIXES "include")
 endif()
 
 if(RAPIDJSON_INCLUDE_DIR)
   file(READ "${RAPIDJSON_INCLUDE_DIR}/rapidjson/rapidjson.h" RAPIDJSON_H_CONTENT)
   string(REGEX MATCH "#define RAPIDJSON_MAJOR_VERSION ([0-9]+)"
                RAPIDJSON_MAJOR_VERSION_DEFINITION "${RAPIDJSON_H_CONTENT}")
-  string(REGEX
-         REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MAJOR_VERSION
-                 "${RAPIDJSON_MAJOR_VERSION_DEFINITION}")
+  string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MAJOR_VERSION
+                       "${RAPIDJSON_MAJOR_VERSION_DEFINITION}")
   string(REGEX MATCH "#define RAPIDJSON_MINOR_VERSION ([0-9]+)"
                RAPIDJSON_MINOR_VERSION_DEFINITION "${RAPIDJSON_H_CONTENT}")
-  string(REGEX
-         REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MINOR_VERSION
-                 "${RAPIDJSON_MINOR_VERSION_DEFINITION}")
+  string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_MINOR_VERSION
+                       "${RAPIDJSON_MINOR_VERSION_DEFINITION}")
   string(REGEX MATCH "#define RAPIDJSON_PATCH_VERSION ([0-9]+)"
                RAPIDJSON_PATCH_VERSION_DEFINITION "${RAPIDJSON_H_CONTENT}")
-  string(REGEX
-         REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_PATCH_VERSION
-                 "${RAPIDJSON_PATCH_VERSION_DEFINITION}")
+  string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" RAPIDJSON_PATCH_VERSION
+                       "${RAPIDJSON_PATCH_VERSION_DEFINITION}")
   if("${RAPIDJSON_MAJOR_VERSION}" STREQUAL ""
      OR "${RAPIDJSON_MINOR_VERSION}" STREQUAL ""
      OR "${RAPIDJSON_PATCH_VERSION}" STREQUAL "")
     set(RAPIDJSON_VERSION "0.0.0")
   else()
-    set(
-      RAPIDJSON_VERSION
-      "${RAPIDJSON_MAJOR_VERSION}.${RAPIDJSON_MINOR_VERSION}.${RAPIDJSON_PATCH_VERSION}")
+    set(RAPIDJSON_VERSION
+        "${RAPIDJSON_MAJOR_VERSION}.${RAPIDJSON_MINOR_VERSION}.${RAPIDJSON_PATCH_VERSION}"
+    )
   endif()
 endif()
 
-find_package_handle_standard_args(RapidJSONAlt
-                                  REQUIRED_VARS
-                                  RAPIDJSON_INCLUDE_DIR
-                                  VERSION_VAR
-                                  RAPIDJSON_VERSION)
+find_package_handle_standard_args(
+  RapidJSONAlt
+  REQUIRED_VARS RAPIDJSON_INCLUDE_DIR
+  VERSION_VAR RAPIDJSON_VERSION)
diff --git a/cpp/cmake_modules/FindSnappy.cmake b/cpp/cmake_modules/FindSnappy.cmake
index 26cccb786c5..747df31854d 100644
--- a/cpp/cmake_modules/FindSnappy.cmake
+++ b/cpp/cmake_modules/FindSnappy.cmake
@@ -19,20 +19,19 @@ if(ARROW_SNAPPY_USE_SHARED)
   set(SNAPPY_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
     list(APPEND SNAPPY_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}snappy${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}snappy${CMAKE_IMPORT_LIBRARY_SUFFIX}")
   endif()
   list(APPEND SNAPPY_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}snappy${CMAKE_SHARED_LIBRARY_SUFFIX}")
+       "${CMAKE_SHARED_LIBRARY_PREFIX}snappy${CMAKE_SHARED_LIBRARY_SUFFIX}")
 else()
   set(SNAPPY_STATIC_LIB_NAME_BASE "snappy")
   if(MSVC)
     set(SNAPPY_STATIC_LIB_NAME_BASE
         "${SNAPPY_STATIC_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}")
   endif()
-  set(
-    SNAPPY_LIB_NAMES
-    "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(SNAPPY_LIB_NAMES
+      "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 endif()
 
 if(Snappy_ROOT)
diff --git a/cpp/cmake_modules/FindThrift.cmake b/cpp/cmake_modules/FindThrift.cmake
index 273d907ed07..750d8ce8341 100644
--- a/cpp/cmake_modules/FindThrift.cmake
+++ b/cpp/cmake_modules/FindThrift.cmake
@@ -33,9 +33,13 @@ function(EXTRACT_THRIFT_VERSION)
     string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION
                  "${THRIFT_CONFIG_H_CONTENT}")
     string(REGEX MATCH "[0-9.]+" THRIFT_VERSION "${THRIFT_VERSION_DEFINITION}")
-    set(THRIFT_VERSION "${THRIFT_VERSION}" PARENT_SCOPE)
+    set(THRIFT_VERSION
+        "${THRIFT_VERSION}"
+        PARENT_SCOPE)
   else()
-    set(THRIFT_VERSION "" PARENT_SCOPE)
+    set(THRIFT_VERSION
+        ""
+        PARENT_SCOPE)
   endif()
 endfunction(EXTRACT_THRIFT_VERSION)
 
@@ -53,21 +57,19 @@ set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}")
 if(ARROW_THRIFT_USE_SHARED)
   set(THRIFT_LIB_NAMES thrift)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(
-      APPEND
-        THRIFT_LIB_NAMES
-        "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
-  endif()
-  list(
-    APPEND
-      THRIFT_LIB_NAMES
-      "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+    list(APPEND
+         THRIFT_LIB_NAMES
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
     )
+  endif()
+  list(APPEND
+       THRIFT_LIB_NAMES
+       "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+  )
 else()
-  set(
-    THRIFT_LIB_NAMES
-    "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(THRIFT_LIB_NAMES
+      "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 endif()
 
 if(Thrift_ROOT)
@@ -78,7 +80,9 @@ if(Thrift_ROOT)
   find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h
             PATHS ${Thrift_ROOT}
             PATH_SUFFIXES "include")
-  find_program(THRIFT_COMPILER thrift PATHS ${Thrift_ROOT} PATH_SUFFIXES "bin")
+  find_program(THRIFT_COMPILER thrift
+               PATHS ${Thrift_ROOT}
+               PATH_SUFFIXES "bin")
   extract_thrift_version()
 else()
   # THRIFT-4760: The pkgconfig files are currently only installed when using autotools.
@@ -115,13 +119,11 @@ else()
   set(Thrift_COMPILER_FOUND FALSE)
 endif()
 
-find_package_handle_standard_args(Thrift
-                                  REQUIRED_VARS
-                                  THRIFT_LIB
-                                  THRIFT_INCLUDE_DIR
-                                  VERSION_VAR
-                                  THRIFT_VERSION
-                                  HANDLE_COMPONENTS)
+find_package_handle_standard_args(
+  Thrift
+  REQUIRED_VARS THRIFT_LIB THRIFT_INCLUDE_DIR
+  VERSION_VAR THRIFT_VERSION
+  HANDLE_COMPONENTS)
 
 if(Thrift_FOUND OR THRIFT_FOUND)
   set(Thrift_FOUND TRUE)
diff --git a/cpp/cmake_modules/Findc-aresAlt.cmake b/cpp/cmake_modules/Findc-aresAlt.cmake
index dd16393cad2..5213e8d12a1 100644
--- a/cpp/cmake_modules/Findc-aresAlt.cmake
+++ b/cpp/cmake_modules/Findc-aresAlt.cmake
@@ -55,7 +55,9 @@ else()
                NAMES cares
                      "${CMAKE_SHARED_LIBRARY_PREFIX}cares${CMAKE_SHARED_LIBRARY_SUFFIX}"
                PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-  find_path(c-ares_INCLUDE_DIR NAMES ares.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  find_path(c-ares_INCLUDE_DIR
+            NAMES ares.h
+            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 endif()
 
 find_package_handle_standard_args(c-aresAlt REQUIRED_VARS c-ares_LIB c-ares_INCLUDE_DIR)
@@ -63,9 +65,9 @@ find_package_handle_standard_args(c-aresAlt REQUIRED_VARS c-ares_LIB c-ares_INCL
 if(c-aresAlt_FOUND)
   if(NOT TARGET c-ares::cares)
     add_library(c-ares::cares UNKNOWN IMPORTED)
-    set_target_properties(
-      c-ares::cares
-      PROPERTIES IMPORTED_LOCATION "${c-ares_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-                 "${c-ares_INCLUDE_DIR}")
+    set_target_properties(c-ares::cares
+                          PROPERTIES IMPORTED_LOCATION "${c-ares_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${c-ares_INCLUDE_DIR}")
   endif()
 endif()
diff --git a/cpp/cmake_modules/FindgRPCAlt.cmake b/cpp/cmake_modules/FindgRPCAlt.cmake
index 841b3b61b83..18b23f32269 100644
--- a/cpp/cmake_modules/FindgRPCAlt.cmake
+++ b/cpp/cmake_modules/FindgRPCAlt.cmake
@@ -36,11 +36,9 @@ if(GRPCPP_PC_FOUND)
   else()
     set(GRPCPP_LINK_LIBRARIES)
     foreach(GRPCPP_LIBRARY_NAME ${GRPCPP_PC_STATIC_LIBRARIES})
-      find_library(
-        GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}
-        NAMES
-          "${CMAKE_STATIC_LIBRARY_PREFIX}${GRPCPP_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-        HINTS ${GRPCPP_PC_STATIC_LIBRARY_DIRS})
+      find_library(GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}
+                   NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}${GRPCPP_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+                   HINTS ${GRPCPP_PC_STATIC_LIBRARY_DIRS})
       list(APPEND GRPCPP_LINK_LIBRARIES "${GRPCPP_LIBRARY_${GRPCPP_LIBRARY_NAME}}")
     endforeach()
     set(GRPCPP_LINK_OPTIONS ${GRPCPP_PC_STATIC_LDFLAGS_OTHER})
@@ -65,18 +63,14 @@ endif()
 if(gRPCAlt_FOUND)
   add_library(gRPC::grpc++ UNKNOWN IMPORTED)
   set_target_properties(gRPC::grpc++
-                        PROPERTIES IMPORTED_LOCATION
-                                   "${GRPCPP_IMPORTED_LOCATION}"
-                                   INTERFACE_COMPILE_OPTIONS
-                                   "${GRPCPP_COMPILE_OPTIONS}"
+                        PROPERTIES IMPORTED_LOCATION "${GRPCPP_IMPORTED_LOCATION}"
+                                   INTERFACE_COMPILE_OPTIONS "${GRPCPP_COMPILE_OPTIONS}"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${GRPCPP_INCLUDE_DIRECTORIES}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${GRPCPP_LINK_LIBRARIES}"
-                                   INTERFACE_LINK_OPTIONS
-                                   "${GRPCPP_LINK_OPTIONS}")
+                                   INTERFACE_LINK_LIBRARIES "${GRPCPP_LINK_LIBRARIES}"
+                                   INTERFACE_LINK_OPTIONS "${GRPCPP_LINK_OPTIONS}")
 
   add_executable(gRPC::grpc_cpp_plugin IMPORTED)
-  set_target_properties(gRPC::grpc_cpp_plugin
-                        PROPERTIES IMPORTED_LOCATION ${GRPC_CPP_PLUGIN})
+  set_target_properties(gRPC::grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION
+                                                         ${GRPC_CPP_PLUGIN})
 endif()
diff --git a/cpp/cmake_modules/Findre2Alt.cmake b/cpp/cmake_modules/Findre2Alt.cmake
index 93b69ce77cb..68abf1b75fe 100644
--- a/cpp/cmake_modules/Findre2Alt.cmake
+++ b/cpp/cmake_modules/Findre2Alt.cmake
@@ -42,35 +42,37 @@ if(RE2_PC_FOUND)
   # On Fedora, the reported prefix is wrong. As users likely run into this,
   # workaround.
   # https://bugzilla.redhat.com/show_bug.cgi?id=1652589
-  if(UNIX AND NOT APPLE AND NOT RE2_LIB)
+  if(UNIX
+     AND NOT APPLE
+     AND NOT RE2_LIB)
     if(RE2_PC_PREFIX STREQUAL "/usr/local")
       find_library(RE2_LIB re2)
     endif()
   endif()
 elseif(RE2_ROOT)
-  find_library(
-    RE2_LIB
-    NAMES
-      re2_static re2
-      "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
-    PATHS ${RE2_ROOT}
-    PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
-    NO_DEFAULT_PATH)
+  find_library(RE2_LIB
+               NAMES re2_static
+                     re2
+                     "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATHS ${RE2_ROOT}
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}
+               NO_DEFAULT_PATH)
   find_path(RE2_INCLUDE_DIR
             NAMES re2/re2.h
             PATHS ${RE2_ROOT}
             NO_DEFAULT_PATH
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 else()
-  find_library(
-    RE2_LIB
-    NAMES
-      re2_static re2
-      "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
-    PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-  find_path(RE2_INCLUDE_DIR NAMES re2/re2.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+  find_library(RE2_LIB
+               NAMES re2_static
+                     re2
+                     "${CMAKE_STATIC_LIBRARY_PREFIX}re2${RE2_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+                     "${CMAKE_SHARED_LIBRARY_PREFIX}re2${CMAKE_SHARED_LIBRARY_SUFFIX}"
+               PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
+  find_path(RE2_INCLUDE_DIR
+            NAMES re2/re2.h
+            PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 endif()
 
 find_package_handle_standard_args(re2Alt REQUIRED_VARS RE2_LIB RE2_INCLUDE_DIR)
diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake
index 03c720d9a3c..4d732f18694 100644
--- a/cpp/cmake_modules/Findutf8proc.cmake
+++ b/cpp/cmake_modules/Findutf8proc.cmake
@@ -36,7 +36,9 @@ function(extract_utf8proc_version)
         "${UTF8PROC_MAJOR_VERSION}.${UTF8PROC_MINOR_VERSION}.${UTF8PROC_PATCH_VERSION}"
         PARENT_SCOPE)
   else()
-    set(utf8proc_VERSION "" PARENT_SCOPE)
+    set(utf8proc_VERSION
+        ""
+        PARENT_SCOPE)
   endif()
 endfunction(extract_utf8proc_version)
 
@@ -44,10 +46,10 @@ if(ARROW_UTF8PROC_USE_SHARED)
   set(utf8proc_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
     list(APPEND utf8proc_LIB_NAMES
-                "${CMAKE_IMPORT_LIBRARY_PREFIX}utf8proc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}utf8proc${CMAKE_IMPORT_LIBRARY_SUFFIX}")
   endif()
   list(APPEND utf8proc_LIB_NAMES
-              "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}")
+       "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}")
 else()
   if(MSVC AND NOT DEFINED utf8proc_MSVC_STATIC_LIB_SUFFIX)
     set(utf8proc_MSVC_STATIC_LIB_SUFFIX "_static")
@@ -80,22 +82,20 @@ else()
   extract_utf8proc_version()
 endif()
 
-find_package_handle_standard_args(utf8proc
-                                  REQUIRED_VARS
-                                  utf8proc_LIB
-                                  utf8proc_INCLUDE_DIR
-                                  VERSION_VAR
-                                  utf8proc_VERSION)
+find_package_handle_standard_args(
+  utf8proc
+  REQUIRED_VARS utf8proc_LIB utf8proc_INCLUDE_DIR
+  VERSION_VAR utf8proc_VERSION)
 
 if(utf8proc_FOUND)
   set(utf8proc_FOUND TRUE)
   add_library(utf8proc::utf8proc UNKNOWN IMPORTED)
-  set_target_properties(
-    utf8proc::utf8proc
-    PROPERTIES IMPORTED_LOCATION "${utf8proc_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${utf8proc_INCLUDE_DIR}")
+  set_target_properties(utf8proc::utf8proc
+                        PROPERTIES IMPORTED_LOCATION "${utf8proc_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${utf8proc_INCLUDE_DIR}")
   if(NOT ARROW_UTF8PROC_USE_SHARED)
-    set_target_properties(utf8proc::utf8proc
-                          PROPERTIES INTERFACE_COMPILER_DEFINITIONS "UTF8PROC_STATIC")
+    set_target_properties(utf8proc::utf8proc PROPERTIES INTERFACE_COMPILER_DEFINITIONS
+                                                        "UTF8PROC_STATIC")
   endif()
 endif()
diff --git a/cpp/cmake_modules/Findzstd.cmake b/cpp/cmake_modules/Findzstd.cmake
index f32892aecb8..73b7ab250fb 100644
--- a/cpp/cmake_modules/Findzstd.cmake
+++ b/cpp/cmake_modules/Findzstd.cmake
@@ -23,16 +23,14 @@ set(ZSTD_LIB_NAME_BASE "${ZSTD_MSVC_LIB_PREFIX}zstd")
 if(ARROW_ZSTD_USE_SHARED)
   set(ZSTD_LIB_NAMES)
   if(CMAKE_IMPORT_LIBRARY_SUFFIX)
-    list(
-      APPEND
-        ZSTD_LIB_NAMES
-        "${CMAKE_IMPORT_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
-      )
+    list(APPEND
+         ZSTD_LIB_NAMES
+         "${CMAKE_IMPORT_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
+    )
   endif()
-  list(
-    APPEND
-      ZSTD_LIB_NAMES
-      "${CMAKE_SHARED_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  list(APPEND ZSTD_LIB_NAMES
+       "${CMAKE_SHARED_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+  )
 else()
   if(MSVC AND NOT DEFINED ZSTD_MSVC_STATIC_LIB_SUFFIX)
     set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static")
@@ -75,7 +73,9 @@ else()
     find_library(ZSTD_LIB
                  NAMES ${ZSTD_LIB_NAMES}
                  PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES})
-    find_path(ZSTD_INCLUDE_DIR NAMES zstd.h PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
+    find_path(ZSTD_INCLUDE_DIR
+              NAMES zstd.h
+              PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
   endif()
 endif()
 
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 0a92702c4ec..354461cbd27 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -76,12 +76,13 @@ if(ARROW_CPU_FLAG STREQUAL "x86")
         char out[32];
         _mm512_storeu_si512(out, mask);
         return 0;
-      }" CXX_SUPPORTS_AVX512)
+      }"
+                              CXX_SUPPORTS_AVX512)
     set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
   endif()
   # Runtime SIMD level it can get from compiler and ARROW_RUNTIME_SIMD_LEVEL
-  if(CXX_SUPPORTS_SSE4_2
-     AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SSE4_2|AVX2|AVX512|MAX)$")
+  if(CXX_SUPPORTS_SSE4_2 AND ARROW_RUNTIME_SIMD_LEVEL MATCHES
+                             "^(SSE4_2|AVX2|AVX512|MAX)$")
     set(ARROW_HAVE_RUNTIME_SSE4_2 ON)
     add_definitions(-DARROW_HAVE_RUNTIME_SSE4_2)
   endif()
@@ -252,8 +253,8 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4365")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4267")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4838")
-  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-         OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                        "Clang")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
@@ -286,8 +287,8 @@ elseif("${BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
     # https://docs.microsoft.com/en-us/cpp/build/reference/compiler-option-warning-level
     # /wdnnnn disables a warning where "nnnn" is a warning number
-  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-         OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                        "Clang")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Weverything")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-c++98-compat")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-c++98-compat-pedantic")
@@ -342,8 +343,8 @@ if(MSVC)
   # (required for protobuf, see https://github.com/protocolbuffers/protobuf/issues/6885)
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4065")
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0"
-     OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7.0")
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0" OR CMAKE_CXX_COMPILER_VERSION
+                                                       VERSION_GREATER "7.0")
     # Without this, gcc >= 7 warns related to changes in C++17
     set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -Wno-noexcept-type")
   endif()
@@ -370,8 +371,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -Wno-subobject-linkage")
   endif()
 
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-       OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                      "Clang")
   # Clang options for all builds
 
   # Using Clang with ccache causes a bunch of spurious warnings that are
@@ -453,8 +454,8 @@ if(ARROW_CPU_FLAG STREQUAL "armv8")
     add_definitions(-DARROW_HAVE_NEON)
   endif()
 
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
-     AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.4")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS
+                                              "5.4")
     message(WARNING "Disable Armv8 CRC and Crypto as compiler doesn't support them well.")
   else()
     if(ARROW_ARMV8_ARCH_FLAG MATCHES "\\+crypto")
@@ -491,7 +492,9 @@ function(GET_GOLD_VERSION)
       message(SEND_ERROR "Could not extract GNU gold version. "
                          "Linker version output: ${LINKER_OUTPUT}")
     endif()
-    set(GOLD_VERSION "${CMAKE_MATCH_1}" PARENT_SCOPE)
+    set(GOLD_VERSION
+        "${CMAKE_MATCH_1}"
+        PARENT_SCOPE)
   endif()
 endfunction()
 
@@ -588,9 +591,8 @@ set(CXX_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate")
 set(CXX_FLAGS_PROFILE_BUILD "${CXX_FLAGS_RELEASE} -fprofile-use")
 
 # Set compile flags based on the build type.
-message(
-  "Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})"
-  )
+message("Configured for ${CMAKE_BUILD_TYPE} build (set with cmake -DCMAKE_BUILD_TYPE={release,debug,...})"
+)
 if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_DEBUG}")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}")
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 1350f274565..40f73f92129 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -279,7 +279,9 @@ if(ARROW_JSON)
   set(ARROW_WITH_RAPIDJSON ON)
 endif()
 
-if(ARROW_ORC OR ARROW_FLIGHT OR ARROW_GANDIVA)
+if(ARROW_ORC
+   OR ARROW_FLIGHT
+   OR ARROW_GANDIVA)
   set(ARROW_WITH_PROTOBUF ON)
 endif()
 
@@ -291,7 +293,9 @@ if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA))
   set(ARROW_WITH_UTF8PROC OFF)
 endif()
 
-if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA) AND (NOT ARROW_WITH_GRPC))
+if((NOT ARROW_COMPUTE)
+   AND (NOT ARROW_GANDIVA)
+   AND (NOT ARROW_WITH_GRPC))
   set(ARROW_WITH_RE2 OFF)
 endif()
 
@@ -313,9 +317,8 @@ endmacro()
 file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT)
 foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
   # Exclude comments
-  if(NOT
-     ((_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
-      OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_CHECKSUM=")))
+  if(NOT ((_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
+          OR (_VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_CHECKSUM=")))
     continue()
   endif()
 
@@ -336,46 +339,42 @@ endforeach()
 if(DEFINED ENV{ARROW_ABSL_URL})
   set(ABSL_SOURCE_URL "$ENV{ARROW_ABSL_URL}")
 else()
-  set_urls(
-    ABSL_SOURCE_URL
-    "https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz")
+  set_urls(ABSL_SOURCE_URL
+           "https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWS_C_COMMON_URL})
   set(AWS_C_COMMON_SOURCE_URL "$ENV{ARROW_AWS_C_COMMON_URL}")
 else()
-  set_urls(
-    AWS_C_COMMON_SOURCE_URL
-    "https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWS_C_COMMON_SOURCE_URL
+           "https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWS_CHECKSUMS_URL})
   set(AWS_CHECKSUMS_SOURCE_URL "$ENV{ARROW_AWS_CHECKSUMS_URL}")
 else()
-  set_urls(
-    AWS_CHECKSUMS_SOURCE_URL
-    "https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWS_CHECKSUMS_SOURCE_URL
+           "https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWS_C_EVENT_STREAM_URL})
   set(AWS_C_EVENT_STREAM_SOURCE_URL "$ENV{ARROW_AWS_C_EVENT_STREAM_URL}")
 else()
-  set_urls(
-    AWS_C_EVENT_STREAM_SOURCE_URL
-    "https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWS_C_EVENT_STREAM_SOURCE_URL
+           "https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_AWSSDK_URL})
   set(AWSSDK_SOURCE_URL "$ENV{ARROW_AWSSDK_URL}")
 else()
-  set_urls(
-    AWSSDK_SOURCE_URL
-    "https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(AWSSDK_SOURCE_URL
+           "https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_BOOST_URL})
@@ -383,136 +382,125 @@ if(DEFINED ENV{ARROW_BOOST_URL})
 else()
   string(REPLACE "." "_" ARROW_BOOST_BUILD_VERSION_UNDERSCORES
                  ${ARROW_BOOST_BUILD_VERSION})
-  set_urls(
-    BOOST_SOURCE_URL
-    # These are trimmed boost bundles we maintain.
-    # See cpp/build-support/trim-boost.sh
-    # FIXME(ARROW-6407) automate uploading this archive to ensure it reflects
-    # our currently used packages and doesn't fall out of sync with
-    # ${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
-    "https://sourceforge.net/projects/boost/files/boost/${ARROW_BOOST_BUILD_VERSION}/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
-    "https://github.com/boostorg/boost/archive/boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz")
+  set_urls(BOOST_SOURCE_URL
+           # These are trimmed boost bundles we maintain.
+           # See cpp/build-support/trim-boost.sh
+           # FIXME(ARROW-6407) automate uploading this archive to ensure it reflects
+           # our currently used packages and doesn't fall out of sync with
+           # ${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
+           "https://sourceforge.net/projects/boost/files/boost/${ARROW_BOOST_BUILD_VERSION}/boost_${ARROW_BOOST_BUILD_VERSION_UNDERSCORES}.tar.gz"
+           "https://github.com/boostorg/boost/archive/boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_BROTLI_URL})
   set(BROTLI_SOURCE_URL "$ENV{ARROW_BROTLI_URL}")
 else()
-  set_urls(
-    BROTLI_SOURCE_URL
-    "https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(BROTLI_SOURCE_URL
+           "https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_BZIP2_URL})
   set(ARROW_BZIP2_SOURCE_URL "$ENV{ARROW_BZIP2_URL}")
 else()
-  set_urls(
-    ARROW_BZIP2_SOURCE_URL
-    "https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ARROW_BZIP2_SOURCE_URL
+           "https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_CARES_URL})
   set(CARES_SOURCE_URL "$ENV{ARROW_CARES_URL}")
 else()
-  set_urls(
-    CARES_SOURCE_URL
-    "https://c-ares.haxx.se/download/c-ares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/cares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(CARES_SOURCE_URL
+           "https://c-ares.haxx.se/download/c-ares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/cares-${ARROW_CARES_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GBENCHMARK_URL})
   set(GBENCHMARK_SOURCE_URL "$ENV{ARROW_GBENCHMARK_URL}")
 else()
-  set_urls(
-    GBENCHMARK_SOURCE_URL
-    "https://github.com/google/benchmark/archive/${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/gbenchmark-${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GBENCHMARK_SOURCE_URL
+           "https://github.com/google/benchmark/archive/${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/gbenchmark-${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GFLAGS_URL})
   set(GFLAGS_SOURCE_URL "$ENV{ARROW_GFLAGS_URL}")
 else()
-  set_urls(
-    GFLAGS_SOURCE_URL
-    "https://github.com/gflags/gflags/archive/${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/gflags-${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GFLAGS_SOURCE_URL
+           "https://github.com/gflags/gflags/archive/${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/gflags-${ARROW_GFLAGS_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GLOG_URL})
   set(GLOG_SOURCE_URL "$ENV{ARROW_GLOG_URL}")
 else()
-  set_urls(
-    GLOG_SOURCE_URL
-    "https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GLOG_SOURCE_URL
+           "https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GRPC_URL})
   set(GRPC_SOURCE_URL "$ENV{ARROW_GRPC_URL}")
 else()
-  set_urls(
-    GRPC_SOURCE_URL
-    "https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GRPC_SOURCE_URL
+           "https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_GTEST_URL})
   set(GTEST_SOURCE_URL "$ENV{ARROW_GTEST_URL}")
 else()
-  set_urls(
-    GTEST_SOURCE_URL
-    "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
-    "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(GTEST_SOURCE_URL
+           "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
+           "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_JEMALLOC_URL})
   set(JEMALLOC_SOURCE_URL "$ENV{ARROW_JEMALLOC_URL}")
 else()
-  set_urls(
-    JEMALLOC_SOURCE_URL
-    "https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
-    )
+  set_urls(JEMALLOC_SOURCE_URL
+           "https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_MIMALLOC_URL})
   set(MIMALLOC_SOURCE_URL "$ENV{ARROW_MIMALLOC_URL}")
 else()
-  set_urls(
-    MIMALLOC_SOURCE_URL
-    "https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(MIMALLOC_SOURCE_URL
+           "https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_LZ4_URL})
   set(LZ4_SOURCE_URL "$ENV{ARROW_LZ4_URL}")
 else()
-  set_urls(
-    LZ4_SOURCE_URL "https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(LZ4_SOURCE_URL
+           "https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_ORC_URL})
   set(ORC_SOURCE_URL "$ENV{ARROW_ORC_URL}")
 else()
-  set_urls(
-    ORC_SOURCE_URL
-    "https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ORC_SOURCE_URL
+           "https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_PROTOBUF_URL})
@@ -521,109 +509,101 @@ else()
   string(SUBSTRING ${ARROW_PROTOBUF_BUILD_VERSION} 1 -1
                    ARROW_PROTOBUF_STRIPPED_BUILD_VERSION)
   # strip the leading `v`
-  set_urls(
-    PROTOBUF_SOURCE_URL
-    "https://github.com/protocolbuffers/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_STRIPPED_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(PROTOBUF_SOURCE_URL
+           "https://github.com/protocolbuffers/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_STRIPPED_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_RE2_URL})
   set(RE2_SOURCE_URL "$ENV{ARROW_RE2_URL}")
 else()
-  set_urls(
-    RE2_SOURCE_URL
-    "https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/re2-${ARROW_RE2_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(RE2_SOURCE_URL
+           "https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/re2-${ARROW_RE2_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_RAPIDJSON_URL})
   set(RAPIDJSON_SOURCE_URL "$ENV{ARROW_RAPIDJSON_URL}")
 else()
-  set_urls(
-    RAPIDJSON_SOURCE_URL
-    "https://github.com/miloyip/rapidjson/archive/${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/rapidjson-${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(RAPIDJSON_SOURCE_URL
+           "https://github.com/miloyip/rapidjson/archive/${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/rapidjson-${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_SNAPPY_URL})
   set(SNAPPY_SOURCE_URL "$ENV{ARROW_SNAPPY_URL}")
 else()
-  set_urls(
-    SNAPPY_SOURCE_URL
-    "https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(SNAPPY_SOURCE_URL
+           "https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_THRIFT_URL})
   set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}")
 else()
-  set_urls(
-    THRIFT_SOURCE_URL
-    "http://www.apache.org/dyn/closer.cgi?action=download&filename=/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://downloads.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://github.com/apache/thrift/archive/v${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.claz.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.cs.utah.edu/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.mirrors.lucidnetworks.net/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://apache.osuosl.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://ftp.wayne.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirror.olnevhost.net/pub/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.gigenet.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.koehn.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.ocf.berkeley.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://mirrors.sonic.net/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://us.mirrors.quenda.co/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(THRIFT_SOURCE_URL
+           "http://www.apache.org/dyn/closer.cgi?action=download&filename=/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://downloads.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://github.com/apache/thrift/archive/v${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.claz.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.cs.utah.edu/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.mirrors.lucidnetworks.net/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://apache.osuosl.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://ftp.wayne.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirror.olnevhost.net/pub/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.gigenet.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.koehn.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.ocf.berkeley.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://mirrors.sonic.net/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://us.mirrors.quenda.co/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_UTF8PROC_URL})
   set(ARROW_UTF8PROC_SOURCE_URL "$ENV{ARROW_UTF8PROC_URL}")
 else()
-  set_urls(
-    ARROW_UTF8PROC_SOURCE_URL
-    "https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ARROW_UTF8PROC_SOURCE_URL
+           "https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_XSIMD_URL})
   set(XSIMD_SOURCE_URL "$ENV{ARROW_XSIMD_URL}")
 else()
-  set_urls(
-    XSIMD_SOURCE_URL
-    "https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz")
+  set_urls(XSIMD_SOURCE_URL
+           "https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_ZLIB_URL})
   set(ZLIB_SOURCE_URL "$ENV{ARROW_ZLIB_URL}")
 else()
-  set_urls(
-    ZLIB_SOURCE_URL "https://zlib.net/fossils/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ZLIB_SOURCE_URL
+           "https://zlib.net/fossils/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 if(DEFINED ENV{ARROW_ZSTD_URL})
   set(ZSTD_SOURCE_URL "$ENV{ARROW_ZSTD_URL}")
 else()
-  set_urls(
-    ZSTD_SOURCE_URL
-    "https://github.com/facebook/zstd/archive/${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
-    "https://github.com/ursa-labs/thirdparty/releases/download/latest/zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
-    )
+  set_urls(ZSTD_SOURCE_URL
+           "https://github.com/facebook/zstd/archive/${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
+           "https://github.com/ursa-labs/thirdparty/releases/download/latest/zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz"
+  )
 endif()
 
 # ----------------------------------------------------------------------
 # ExternalProject options
 
-set(
-  EP_CXX_FLAGS
-  "${CMAKE_CXX_COMPILER_ARG1} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}"
-  )
+set(EP_CXX_FLAGS
+    "${CMAKE_CXX_COMPILER_ARG1} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}"
+)
 set(EP_C_FLAGS
     "${CMAKE_C_COMPILER_ARG1} ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}")
 
@@ -730,12 +710,12 @@ macro(build_boost)
     set(BOOST_BUILD_WITH_LIBRARIES "filesystem" "system")
     string(REPLACE ";" "," BOOST_CONFIGURE_LIBRARIES "${BOOST_BUILD_WITH_LIBRARIES}")
     list(APPEND BOOST_CONFIGURE_COMMAND "--prefix=${BOOST_PREFIX}"
-                "--with-libraries=${BOOST_CONFIGURE_LIBRARIES}")
+         "--with-libraries=${BOOST_CONFIGURE_LIBRARIES}")
     set(BOOST_BUILD_COMMAND "./b2" "-j${NPROC}" "link=${BOOST_BUILD_LINK}"
                             "variant=${BOOST_BUILD_VARIANT}")
     if(MSVC)
-      string(REGEX
-             REPLACE "([0-9])$" ".\\1" BOOST_TOOLSET_MSVC_VERSION ${MSVC_TOOLSET_VERSION})
+      string(REGEX REPLACE "([0-9])$" ".\\1" BOOST_TOOLSET_MSVC_VERSION
+                           ${MSVC_TOOLSET_VERSION})
       list(APPEND BOOST_BUILD_COMMAND "toolset=msvc-${BOOST_TOOLSET_MSVC_VERSION}")
       set(BOOST_BUILD_WITH_LIBRARIES_MSVC)
       foreach(_BOOST_LIB ${BOOST_BUILD_WITH_LIBRARIES})
@@ -760,14 +740,12 @@ macro(build_boost)
     else()
       set(BOOST_LIBRARY_SUFFIX "")
     endif()
-    set(
-      BOOST_STATIC_SYSTEM_LIBRARY
-      "${BOOST_LIB_DIR}/libboost_system${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
-    set(
-      BOOST_STATIC_FILESYSTEM_LIBRARY
-      "${BOOST_LIB_DIR}/libboost_filesystem${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(BOOST_STATIC_SYSTEM_LIBRARY
+        "${BOOST_LIB_DIR}/libboost_system${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
+    set(BOOST_STATIC_FILESYSTEM_LIBRARY
+        "${BOOST_LIB_DIR}/libboost_filesystem${BOOST_LIBRARY_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
     set(BOOST_SYSTEM_LIBRARY boost_system_static)
     set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
     set(BOOST_BUILD_PRODUCTS ${BOOST_STATIC_SYSTEM_LIBRARY}
@@ -925,14 +903,13 @@ macro(build_snappy)
   message(STATUS "Building snappy from source")
   set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep/src/snappy_ep-install")
   set(SNAPPY_STATIC_LIB_NAME snappy)
-  set(
-    SNAPPY_STATIC_LIB
-    "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(SNAPPY_STATIC_LIB
+      "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
-  set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=lib
-                        -DSNAPPY_BUILD_TESTS=OFF
-                        "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+  set(SNAPPY_CMAKE_ARGS
+      ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=lib -DSNAPPY_BUILD_TESTS=OFF
+      "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
 
   externalproject_add(snappy_ep
                       ${EP_LOG_OPTIONS}
@@ -970,18 +947,15 @@ macro(build_brotli)
   set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install")
   set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include")
   set(BROTLI_LIB_DIR lib)
-  set(
-    BROTLI_STATIC_LIBRARY_ENC
-    "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    BROTLI_STATIC_LIBRARY_DEC
-    "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    BROTLI_STATIC_LIBRARY_COMMON
-    "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(BROTLI_STATIC_LIBRARY_ENC
+      "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(BROTLI_STATIC_LIBRARY_DEC
+      "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(BROTLI_STATIC_LIBRARY_COMMON
+      "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}"
                         -DCMAKE_INSTALL_LIBDIR=${BROTLI_LIB_DIR})
 
@@ -1016,8 +990,11 @@ macro(build_brotli)
                                    INTERFACE_INCLUDE_DIRECTORIES "${BROTLI_INCLUDE_DIR}")
   add_dependencies(Brotli::brotlidec brotli_ep)
 
-  list(APPEND ARROW_BUNDLED_STATIC_LIBS Brotli::brotlicommon Brotli::brotlienc
-              Brotli::brotlidec)
+  list(APPEND
+       ARROW_BUNDLED_STATIC_LIBS
+       Brotli::brotlicommon
+       Brotli::brotlienc
+       Brotli::brotlidec)
 endmacro()
 
 if(ARROW_WITH_BROTLI)
@@ -1049,7 +1026,9 @@ if(BREW_BIN AND NOT OPENSSL_ROOT_DIR)
 endif()
 
 set(ARROW_USE_OPENSSL OFF)
-if(PARQUET_REQUIRE_ENCRYPTION OR ARROW_FLIGHT OR ARROW_S3)
+if(PARQUET_REQUIRE_ENCRYPTION
+   OR ARROW_FLIGHT
+   OR ARROW_S3)
   # OpenSSL is required
   if(ARROW_OPENSSL_USE_SHARED)
     # Find shared OpenSSL libraries.
@@ -1079,10 +1058,8 @@ if(ARROW_USE_OPENSSL)
 
   include_directories(SYSTEM ${OPENSSL_INCLUDE_DIR})
 else()
-  message(
-    STATUS
-      "Building without OpenSSL support. Minimum OpenSSL version ${ARROW_OPENSSL_REQUIRED_VERSION} required."
-    )
+  message(STATUS "Building without OpenSSL support. Minimum OpenSSL version ${ARROW_OPENSSL_REQUIRED_VERSION} required."
+  )
 endif()
 
 # ----------------------------------------------------------------------
@@ -1097,10 +1074,9 @@ macro(build_glog)
   else()
     set(GLOG_LIB_SUFFIX "")
   endif()
-  set(
-    GLOG_STATIC_LIB
-    "${GLOG_BUILD_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}glog${GLOG_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(GLOG_STATIC_LIB
+      "${GLOG_BUILD_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}glog${GLOG_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
   set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC")
   if(Threads::Threads)
@@ -1201,8 +1177,8 @@ macro(build_gflags)
                         PROPERTIES INTERFACE_COMPILE_DEFINITIONS "GFLAGS_IS_A_DLL=0"
                                    INTERFACE_INCLUDE_DIRECTORIES "${GFLAGS_INCLUDE_DIR}")
   if(MSVC)
-    set_target_properties(${GFLAGS_LIBRARY}
-                          PROPERTIES INTERFACE_LINK_LIBRARIES "shlwapi.lib")
+    set_target_properties(${GFLAGS_LIBRARY} PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                       "shlwapi.lib")
   endif()
   set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY})
 
@@ -1237,8 +1213,8 @@ endif()
 
 macro(build_thrift)
   if(CMAKE_VERSION VERSION_LESS 3.10)
-    message(
-      FATAL_ERROR "Building thrift using ExternalProject requires at least CMake 3.10")
+    message(FATAL_ERROR "Building thrift using ExternalProject requires at least CMake 3.10"
+    )
   endif()
   message("Building Apache Thrift from source")
   set(THRIFT_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/thrift_ep-install")
@@ -1372,8 +1348,8 @@ macro(build_protobuf)
     if(ZLIB_ROOT)
       list(APPEND PROTOBUF_CMAKE_ARGS "-DZLIB_ROOT=${ZLIB_ROOT}")
     endif()
-    set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} SOURCE_SUBDIR
-                                           "cmake")
+    set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS}
+                                           SOURCE_SUBDIR "cmake")
   endif()
 
   externalproject_add(protobuf_ep
@@ -1386,18 +1362,18 @@ macro(build_protobuf)
   file(MAKE_DIRECTORY "${PROTOBUF_INCLUDE_DIR}")
 
   add_library(arrow::protobuf::libprotobuf STATIC IMPORTED)
-  set_target_properties(
-    arrow::protobuf::libprotobuf
-    PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${PROTOBUF_INCLUDE_DIR}")
+  set_target_properties(arrow::protobuf::libprotobuf
+                        PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${PROTOBUF_INCLUDE_DIR}")
   add_library(arrow::protobuf::libprotoc STATIC IMPORTED)
-  set_target_properties(
-    arrow::protobuf::libprotoc
-    PROPERTIES IMPORTED_LOCATION "${PROTOC_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${PROTOBUF_INCLUDE_DIR}")
+  set_target_properties(arrow::protobuf::libprotoc
+                        PROPERTIES IMPORTED_LOCATION "${PROTOC_STATIC_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${PROTOBUF_INCLUDE_DIR}")
   add_executable(arrow::protobuf::protoc IMPORTED)
-  set_target_properties(arrow::protobuf::protoc
-                        PROPERTIES IMPORTED_LOCATION "${PROTOBUF_COMPILER}")
+  set_target_properties(arrow::protobuf::protoc PROPERTIES IMPORTED_LOCATION
+                                                           "${PROTOBUF_COMPILER}")
 
   add_dependencies(toolchain protobuf_ep)
   add_dependencies(arrow::protobuf::libprotobuf protobuf_ep)
@@ -1463,8 +1439,8 @@ if(ARROW_WITH_PROTOBUF)
   else()
     if(NOT TARGET protobuf::protoc)
       add_executable(protobuf::protoc IMPORTED)
-      set_target_properties(protobuf::protoc
-                            PROPERTIES IMPORTED_LOCATION "${PROTOBUF_PROTOC_EXECUTABLE}")
+      set_target_properties(protobuf::protoc PROPERTIES IMPORTED_LOCATION
+                                                        "${PROTOBUF_PROTOC_EXECUTABLE}")
     endif()
     set(ARROW_PROTOBUF_PROTOC protobuf::protoc)
   endif()
@@ -1502,32 +1478,33 @@ if(ARROW_JEMALLOC)
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_CONFIGURE_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")
   endif()
-  list(APPEND JEMALLOC_CONFIGURE_COMMAND
-              "--prefix=${JEMALLOC_PREFIX}"
-              "--libdir=${JEMALLOC_LIB_DIR}"
-              "--with-jemalloc-prefix=je_arrow_"
-              "--with-private-namespace=je_arrow_private_"
-              "--without-export"
-              "--disable-shared"
-              # Don't override operator new()
-              "--disable-cxx" "--disable-libdl"
-              # See https://github.com/jemalloc/jemalloc/issues/1237
-              "--disable-initial-exec-tls" ${EP_LOG_OPTIONS})
+  list(APPEND
+       JEMALLOC_CONFIGURE_COMMAND
+       "--prefix=${JEMALLOC_PREFIX}"
+       "--libdir=${JEMALLOC_LIB_DIR}"
+       "--with-jemalloc-prefix=je_arrow_"
+       "--with-private-namespace=je_arrow_private_"
+       "--without-export"
+       "--disable-shared"
+       # Don't override operator new()
+       "--disable-cxx"
+       "--disable-libdl"
+       # See https://github.com/jemalloc/jemalloc/issues/1237
+       "--disable-initial-exec-tls"
+       ${EP_LOG_OPTIONS})
   set(JEMALLOC_BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS})
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_BUILD_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")
   endif()
-  externalproject_add(
-    jemalloc_ep
-    URL ${JEMALLOC_SOURCE_URL}
-    PATCH_COMMAND
-      touch doc/jemalloc.3 doc/jemalloc.html
-      # The prefix "je_arrow_" must be kept in sync with the value in memory_pool.cc
-    CONFIGURE_COMMAND ${JEMALLOC_CONFIGURE_COMMAND}
-    BUILD_IN_SOURCE 1
-    BUILD_COMMAND ${JEMALLOC_BUILD_COMMAND}
-    BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}"
-    INSTALL_COMMAND ${MAKE} -j1 install)
+  externalproject_add(jemalloc_ep
+                      URL ${JEMALLOC_SOURCE_URL}
+                      PATCH_COMMAND touch doc/jemalloc.3 doc/jemalloc.html
+                                    # The prefix "je_arrow_" must be kept in sync with the value in memory_pool.cc
+                      CONFIGURE_COMMAND ${JEMALLOC_CONFIGURE_COMMAND}
+                      BUILD_IN_SOURCE 1
+                      BUILD_COMMAND ${JEMALLOC_BUILD_COMMAND}
+                      BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}"
+                      INSTALL_COMMAND ${MAKE} -j1 install)
 
   # Don't use the include directory directly so that we can point to a path
   # that is unique to our codebase.
@@ -1536,10 +1513,8 @@ if(ARROW_JEMALLOC)
   file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/")
   add_library(jemalloc::jemalloc STATIC IMPORTED)
   set_target_properties(jemalloc::jemalloc
-                        PROPERTIES INTERFACE_LINK_LIBRARIES
-                                   Threads::Threads
-                                   IMPORTED_LOCATION
-                                   "${JEMALLOC_STATIC_LIB}"
+                        PROPERTIES INTERFACE_LINK_LIBRARIES Threads::Threads
+                                   IMPORTED_LOCATION "${JEMALLOC_STATIC_LIB}"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src")
   add_dependencies(jemalloc::jemalloc jemalloc_ep)
@@ -1564,10 +1539,9 @@ if(ARROW_MIMALLOC)
 
   set(MIMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/mimalloc_ep/src/mimalloc_ep")
   set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/include")
-  set(
-    MIMALLOC_STATIC_LIB
-    "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(MIMALLOC_STATIC_LIB
+      "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
   set(MIMALLOC_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
@@ -1588,10 +1562,8 @@ if(ARROW_MIMALLOC)
 
   add_library(mimalloc::mimalloc STATIC IMPORTED)
   set_target_properties(mimalloc::mimalloc
-                        PROPERTIES INTERFACE_LINK_LIBRARIES
-                                   Threads::Threads
-                                   IMPORTED_LOCATION
-                                   "${MIMALLOC_STATIC_LIB}"
+                        PROPERTIES INTERFACE_LINK_LIBRARIES Threads::Threads
+                                   IMPORTED_LOCATION "${MIMALLOC_STATIC_LIB}"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${MIMALLOC_INCLUDE_DIR}")
   add_dependencies(mimalloc::mimalloc mimalloc_ep)
@@ -1643,10 +1615,9 @@ macro(build_gtest)
       "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${_GTEST_LIBRARY_SUFFIX}")
   set(GMOCK_SHARED_LIB
       "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gmock${_GTEST_LIBRARY_SUFFIX}")
-  set(
-    GTEST_MAIN_SHARED_LIB
-    "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_LIBRARY_SUFFIX}"
-    )
+  set(GTEST_MAIN_SHARED_LIB
+      "${_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_LIBRARY_SUFFIX}"
+  )
   set(GTEST_INSTALL_NAME_DIR "$<INSTALL_PREFIX$<ANGLE-R>/lib")
   # Fix syntax highlighting mess introduced by unclosed bracket above
   set(dummy ">")
@@ -1684,20 +1655,18 @@ macro(build_gtest)
     set(_GTEST_RUNTIME_DIR "${GTEST_PREFIX}/bin")
     set(_GTEST_RUNTIME_SUFFIX
         "${CMAKE_GTEST_DEBUG_EXTENSION}${CMAKE_SHARED_LIBRARY_SUFFIX}")
-    set(
-      _GTEST_RUNTIME_LIB
-      "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${_GTEST_RUNTIME_SUFFIX}")
-    set(
-      _GMOCK_RUNTIME_LIB
-      "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gmock${_GTEST_RUNTIME_SUFFIX}")
-    set(
-      _GTEST_MAIN_RUNTIME_LIB
-      "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_RUNTIME_SUFFIX}"
-      )
+    set(_GTEST_RUNTIME_LIB
+        "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${_GTEST_RUNTIME_SUFFIX}"
+    )
+    set(_GMOCK_RUNTIME_LIB
+        "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gmock${_GTEST_RUNTIME_SUFFIX}"
+    )
+    set(_GTEST_MAIN_RUNTIME_LIB
+        "${_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${_GTEST_RUNTIME_SUFFIX}"
+    )
     if(CMAKE_VERSION VERSION_LESS 3.9)
-      message(
-        FATAL_ERROR
-          "Building GoogleTest from source on Windows requires at least CMake 3.9")
+      message(FATAL_ERROR "Building GoogleTest from source on Windows requires at least CMake 3.9"
+      )
     endif()
     get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
     if(_GENERATOR_IS_MULTI_CONFIG)
@@ -1708,20 +1677,11 @@ macro(build_gtest)
     externalproject_add_step(googletest_ep copy
                              COMMAND ${CMAKE_COMMAND} -E make_directory
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
-                             COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy
-                                     ${_GTEST_RUNTIME_LIB}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${_GTEST_RUNTIME_LIB}
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
-                             COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy
-                                     ${_GMOCK_RUNTIME_LIB}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${_GMOCK_RUNTIME_LIB}
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
-                             COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy
-                                     ${_GTEST_MAIN_RUNTIME_LIB}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${_GTEST_MAIN_RUNTIME_LIB}
                                      ${_GTEST_RUNTIME_OUTPUT_DIR}
                              DEPENDEES install)
   endif()
@@ -1800,25 +1760,20 @@ macro(build_benchmark)
     set(GBENCHMARK_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -std=c++11")
   endif()
 
-  if(APPLE
-     AND (CMAKE_CXX_COMPILER_ID
-          STREQUAL
-          "AppleClang"
-          OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
+  if(APPLE AND (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
+                                                               STREQUAL "Clang"))
     set(GBENCHMARK_CMAKE_CXX_FLAGS "${GBENCHMARK_CMAKE_CXX_FLAGS} -stdlib=libc++")
   endif()
 
   set(GBENCHMARK_PREFIX
       "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install")
   set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include")
-  set(
-    GBENCHMARK_STATIC_LIB
-    "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    GBENCHMARK_MAIN_STATIC_LIB
-    "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark_main${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(GBENCHMARK_STATIC_LIB
+      "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GBENCHMARK_MAIN_STATIC_LIB
+      "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark_main${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(GBENCHMARK_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
       "-DCMAKE_INSTALL_PREFIX=${GBENCHMARK_PREFIX}"
@@ -1858,8 +1813,8 @@ endmacro()
 if(ARROW_BUILD_BENCHMARKS)
   # ArgsProduct() is available since 1.5.2
   set(BENCHMARK_REQUIRED_VERSION 1.5.2)
-  if("${ARROW_DEPENDENCY_SOURCE}" STREQUAL "CONDA"
-     AND "${benchmark_SOURCE}" STREQUAL "SYSTEM")
+  if("${ARROW_DEPENDENCY_SOURCE}" STREQUAL "CONDA" AND "${benchmark_SOURCE}" STREQUAL
+                                                       "SYSTEM")
     # TODO: Remove this workaround once
     # https://github.com/google/benchmark/issues/1046 is resolved.
     #
@@ -1940,8 +1895,8 @@ macro(build_xsimd)
   set(XSIMD_VENDORED TRUE)
 endmacro()
 
-if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
-   OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE"))
+if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE") OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE"
+                                             ))
   set(xsimd_SOURCE "BUNDLED")
   resolve_dependency(xsimd)
   # TODO: Don't use global includes but rather target_include_directories
@@ -2008,29 +1963,21 @@ macro(build_lz4)
     set(LZ4_STATIC_LIB
         "${LZ4_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/liblz4_static.lib")
     set(LZ4_BUILD_COMMAND
-        BUILD_COMMAND
-        msbuild.exe
-        /m
-        /p:Configuration=${CMAKE_BUILD_TYPE}
-        /p:Platform=x64
-        /p:PlatformToolset=v140
-        ${LZ4_RUNTIME_LIBRARY_LINKAGE}
-        /t:Build
+        BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64
+        /p:PlatformToolset=v140 ${LZ4_RUNTIME_LIBRARY_LINKAGE} /t:Build
         ${LZ4_BUILD_DIR}/build/VS2010/lz4.sln)
   else()
     set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a")
-    set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh
-                          "AR=${CMAKE_AR}" "OS=${CMAKE_SYSTEM_NAME}")
+    set(LZ4_BUILD_COMMAND
+        BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh "AR=${CMAKE_AR}"
+        "OS=${CMAKE_SYSTEM_NAME}")
   endif()
 
   # We need to copy the header in lib to directory outside of the build
   externalproject_add(lz4_ep
                       URL ${LZ4_SOURCE_URL} ${EP_LOG_OPTIONS}
-                      UPDATE_COMMAND ${CMAKE_COMMAND}
-                                     -E
-                                     copy_directory
-                                     "${LZ4_BUILD_DIR}/lib"
-                                     "${LZ4_PREFIX}/include"
+                      UPDATE_COMMAND ${CMAKE_COMMAND} -E copy_directory
+                                     "${LZ4_BUILD_DIR}/lib" "${LZ4_PREFIX}/include"
                                      ${LZ4_PATCH_COMMAND}
                       CONFIGURE_COMMAND ""
                       INSTALL_COMMAND ""
@@ -2184,9 +2131,9 @@ endif()
 macro(build_bzip2)
   message(STATUS "Building BZip2 from source")
   set(BZIP2_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/bzip2_ep-install")
-  set(
-    BZIP2_STATIC_LIB
-    "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(BZIP2_STATIC_LIB
+      "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
   set(BZIP2_EXTRA_ARGS "CC=${CMAKE_C_COMPILER}" "CFLAGS=${EP_C_FLAGS}")
 
@@ -2208,10 +2155,10 @@ macro(build_bzip2)
 
   file(MAKE_DIRECTORY "${BZIP2_PREFIX}/include")
   add_library(BZip2::BZip2 STATIC IMPORTED)
-  set_target_properties(
-    BZip2::BZip2
-    PROPERTIES IMPORTED_LOCATION "${BZIP2_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
-               "${BZIP2_PREFIX}/include")
+  set_target_properties(BZip2::BZip2
+                        PROPERTIES IMPORTED_LOCATION "${BZIP2_STATIC_LIB}"
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   "${BZIP2_PREFIX}/include")
   set(BZIP2_INCLUDE_DIR "${BZIP2_PREFIX}/include")
 
   add_dependencies(toolchain bzip2_ep)
@@ -2238,10 +2185,9 @@ macro(build_utf8proc)
   if(MSVC)
     set(UTF8PROC_STATIC_LIB "${UTF8PROC_PREFIX}/lib/utf8proc_static.lib")
   else()
-    set(
-      UTF8PROC_STATIC_LIB
-      "${UTF8PROC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}utf8proc${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(UTF8PROC_STATIC_LIB
+        "${UTF8PROC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}utf8proc${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
   endif()
 
   set(UTF8PROC_CMAKE_ARGS
@@ -2261,10 +2207,8 @@ macro(build_utf8proc)
   file(MAKE_DIRECTORY "${UTF8PROC_PREFIX}/include")
   add_library(utf8proc::utf8proc STATIC IMPORTED)
   set_target_properties(utf8proc::utf8proc
-                        PROPERTIES IMPORTED_LOCATION
-                                   "${UTF8PROC_STATIC_LIB}"
-                                   INTERFACE_COMPILER_DEFINITIONS
-                                   "UTF8PROC_STATIC"
+                        PROPERTIES IMPORTED_LOCATION "${UTF8PROC_STATIC_LIB}"
+                                   INTERFACE_COMPILER_DEFINITIONS "UTF8PROC_STATIC"
                                    INTERFACE_INCLUDE_DIRECTORIES
                                    "${UTF8PROC_PREFIX}/include")
 
@@ -2301,10 +2245,9 @@ macro(build_cares)
 
   # If you set -DCARES_SHARED=ON then the build system names the library
   # libcares_static.a
-  set(
-    CARES_STATIC_LIB
-    "${CARES_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}cares${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(CARES_STATIC_LIB
+      "${CARES_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}cares${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
 
   set(CARES_CMAKE_ARGS
       "${EP_COMMON_CMAKE_ARGS}"
@@ -2331,8 +2274,8 @@ macro(build_cares)
   if(APPLE)
     # libresolv must be linked from c-ares version 1.16.1
     find_library(LIBRESOLV_LIBRARY NAMES resolv libresolv REQUIRED)
-    set_target_properties(c-ares::cares
-                          PROPERTIES INTERFACE_LINK_LIBRARIES "${LIBRESOLV_LIBRARY}")
+    set_target_properties(c-ares::cares PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                   "${LIBRESOLV_LIBRARY}")
   endif()
 
   set(CARES_VENDORED TRUE)
@@ -2389,13 +2332,12 @@ macro(build_grpc)
       raw_logging_internal)
 
   foreach(_ABSL_LIB ${_ABSL_LIBS})
-    set(
-      _ABSL_STATIC_LIBRARY
-      "${ABSL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}absl_${_ABSL_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(_ABSL_STATIC_LIBRARY
+        "${ABSL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}absl_${_ABSL_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
     add_library(absl::${_ABSL_LIB} STATIC IMPORTED)
-    set_target_properties(absl::${_ABSL_LIB}
-                          PROPERTIES IMPORTED_LOCATION ${_ABSL_STATIC_LIBRARY})
+    set_target_properties(absl::${_ABSL_LIB} PROPERTIES IMPORTED_LOCATION
+                                                        ${_ABSL_STATIC_LIBRARY})
     list(APPEND ABSL_BUILD_BYPRODUCTS ${_ABSL_STATIC_LIBRARY})
     list(APPEND ABSL_LIBRARIES absl::${_ABSL_LIB})
   endforeach()
@@ -2411,23 +2353,21 @@ macro(build_grpc)
   set(GRPC_HOME "${GRPC_PREFIX}")
   set(GRPC_INCLUDE_DIR "${GRPC_PREFIX}/include")
 
-  set(
-    GRPC_STATIC_LIBRARY_GPR
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(
-    GRPC_STATIC_LIBRARY_GRPC
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}")
-  set(
-    GRPC_STATIC_LIBRARY_GRPCPP
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    GRPC_STATIC_LIBRARY_ADDRESS_SORTING
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
-  set(
-    GRPC_STATIC_LIBRARY_UPB
-    "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}upb${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(GRPC_STATIC_LIBRARY_GPR
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gpr${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_GRPC
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_GRPCPP
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}grpc++${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_ADDRESS_SORTING
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}address_sorting${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
+  set(GRPC_STATIC_LIBRARY_UPB
+      "${GRPC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}upb${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   set(GRPC_CPP_PLUGIN "${GRPC_PREFIX}/bin/grpc_cpp_plugin${CMAKE_EXECUTABLE_SUFFIX}")
 
   set(GRPC_CMAKE_PREFIX)
@@ -2553,26 +2493,20 @@ macro(build_grpc)
       ZLIB::ZLIB
       Threads::Threads)
   set_target_properties(gRPC::grpc
-                        PROPERTIES IMPORTED_LOCATION
-                                   "${GRPC_STATIC_LIBRARY_GRPC}"
-                                   INTERFACE_INCLUDE_DIRECTORIES
-                                   "${GRPC_INCLUDE_DIR}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${GRPC_LINK_LIBRARIES}")
+                        PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPC}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}"
+                                   INTERFACE_LINK_LIBRARIES "${GRPC_LINK_LIBRARIES}")
 
   add_library(gRPC::grpc++ STATIC IMPORTED)
   set(GRPCPP_LINK_LIBRARIES gRPC::grpc ${ARROW_PROTOBUF_LIBPROTOBUF})
   set_target_properties(gRPC::grpc++
-                        PROPERTIES IMPORTED_LOCATION
-                                   "${GRPC_STATIC_LIBRARY_GRPCPP}"
-                                   INTERFACE_INCLUDE_DIRECTORIES
-                                   "${GRPC_INCLUDE_DIR}"
-                                   INTERFACE_LINK_LIBRARIES
-                                   "${GRPCPP_LINK_LIBRARIES}")
+                        PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPCPP}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${GRPC_INCLUDE_DIR}"
+                                   INTERFACE_LINK_LIBRARIES "${GRPCPP_LINK_LIBRARIES}")
 
   add_executable(gRPC::grpc_cpp_plugin IMPORTED)
-  set_target_properties(gRPC::grpc_cpp_plugin
-                        PROPERTIES IMPORTED_LOCATION ${GRPC_CPP_PLUGIN})
+  set_target_properties(gRPC::grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION
+                                                         ${GRPC_CPP_PLUGIN})
 
   add_dependencies(grpc_ep grpc_dependencies)
   add_dependencies(toolchain grpc_ep)
@@ -2584,15 +2518,11 @@ macro(build_grpc)
   # continuation character in these scripts, so we have to create a copy of the
   # static lib that we will bundle later
 
-  set(
-    GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR
-    "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}grpcpp${CMAKE_STATIC_LIBRARY_SUFFIX}"
-    )
+  set(GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR
+      "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}grpcpp${CMAKE_STATIC_LIBRARY_SUFFIX}"
+  )
   add_custom_command(OUTPUT ${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}
-                     COMMAND ${CMAKE_COMMAND}
-                             -E
-                             copy
-                             $<TARGET_FILE:gRPC::grpc++>
+                     COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:gRPC::grpc++>
                              ${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}
                      DEPENDS grpc_ep)
   add_library(gRPC::grpcpp_for_bundling STATIC IMPORTED)
@@ -2601,17 +2531,18 @@ macro(build_grpc)
                                    "${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}")
 
   set_source_files_properties("${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}" PROPERTIES GENERATED
-                              TRUE)
+                                                                                TRUE)
   add_custom_target(grpc_copy_grpc++ ALL DEPENDS "${GRPC_STATIC_LIBRARY_GRPCPP_FOR_AR}")
   add_dependencies(gRPC::grpcpp_for_bundling grpc_copy_grpc++)
 
-  list(APPEND ARROW_BUNDLED_STATIC_LIBS
-              ${ABSL_LIBRARIES}
-              gRPC::address_sorting
-              gRPC::gpr
-              gRPC::grpc
-              gRPC::grpcpp_for_bundling
-              gRPC::upb)
+  list(APPEND
+       ARROW_BUNDLED_STATIC_LIBS
+       ${ABSL_LIBRARIES}
+       gRPC::address_sorting
+       gRPC::gpr
+       gRPC::grpc
+       gRPC::grpcpp_for_bundling
+       gRPC::upb)
 endmacro()
 
 if(ARROW_WITH_GRPC)
@@ -2748,8 +2679,8 @@ endif()
 
 macro(build_awssdk)
   message("Building AWS C++ SDK from source")
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
-     AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS
+                                              "4.9")
     message(FATAL_ERROR "AWS C++ SDK requires gcc >= 4.9")
   endif()
   set(AWSSDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/awssdk_ep-install")
@@ -2776,14 +2707,14 @@ macro(build_awssdk)
       "-DCMAKE_INSTALL_PREFIX=${AWSSDK_PREFIX}"
       "-DCMAKE_PREFIX_PATH=${AWSSDK_PREFIX}")
 
-  set(
-    AWSSDK_CMAKE_ARGS
-    ${AWSSDK_COMMON_CMAKE_ARGS} -DBUILD_DEPS=OFF
-    -DBUILD_ONLY=config\\$<SEMICOLON>s3\\$<SEMICOLON>transfer\\$<SEMICOLON>identity-management\\$<SEMICOLON>sts
-    -DMINIMIZE_SIZE=ON)
+  set(AWSSDK_CMAKE_ARGS
+      ${AWSSDK_COMMON_CMAKE_ARGS}
+      -DBUILD_DEPS=OFF
+      -DBUILD_ONLY=config\\$<SEMICOLON>s3\\$<SEMICOLON>transfer\\$<SEMICOLON>identity-management\\$<SEMICOLON>sts
+      -DMINIMIZE_SIZE=ON)
   if(UNIX AND TARGET zlib_ep)
     list(APPEND AWSSDK_CMAKE_ARGS -DZLIB_INCLUDE_DIR=${ZLIB_INCLUDE_DIRS}
-                -DZLIB_LIBRARY=${ZLIB_LIBRARIES})
+         -DZLIB_LIBRARY=${ZLIB_LIBRARIES})
   endif()
 
   file(MAKE_DIRECTORY ${AWSSDK_INCLUDE_DIR})
@@ -2804,20 +2735,19 @@ macro(build_awssdk)
     string(TOUPPER ${_AWSSDK_LIB} _AWSSDK_LIB_UPPER)
     # AWS-C-COMMON -> AWS_C_COMMON
     string(REPLACE "-" "_" _AWSSDK_LIB_NAME_PREFIX ${_AWSSDK_LIB_UPPER})
-    set(
-      _AWSSDK_STATIC_LIBRARY
-      "${AWSSDK_PREFIX}/${AWSSDK_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${_AWSSDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
-      )
+    set(_AWSSDK_STATIC_LIBRARY
+        "${AWSSDK_PREFIX}/${AWSSDK_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${_AWSSDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+    )
     if(${_AWSSDK_LIB} MATCHES "^aws-cpp-sdk-")
       set(_AWSSDK_TARGET_NAME ${_AWSSDK_LIB})
     else()
       set(_AWSSDK_TARGET_NAME AWS::${_AWSSDK_LIB})
     endif()
     add_library(${_AWSSDK_TARGET_NAME} STATIC IMPORTED)
-    set_target_properties(
-      ${_AWSSDK_TARGET_NAME}
-      PROPERTIES IMPORTED_LOCATION ${_AWSSDK_STATIC_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
-                 "${AWSSDK_INCLUDE_DIR}")
+    set_target_properties(${_AWSSDK_TARGET_NAME}
+                          PROPERTIES IMPORTED_LOCATION ${_AWSSDK_STATIC_LIBRARY}
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                     "${AWSSDK_INCLUDE_DIR}")
     set("${_AWSSDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AWSSDK_STATIC_LIBRARY})
     list(APPEND AWSSDK_LIBRARIES ${_AWSSDK_TARGET_NAME})
   endforeach()
@@ -2874,7 +2804,7 @@ macro(build_awssdk)
       set_target_properties(CURL::libcurl
                             PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
                                        "${CURL_INCLUDE_DIRS}" IMPORTED_LOCATION
-                                       "${CURL_LIBRARIES}")
+                                                              "${CURL_LIBRARIES}")
     endif()
     set_property(TARGET aws-cpp-sdk-core
                  APPEND
diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake
index 781bec436f3..7d228f59f03 100644
--- a/cpp/cmake_modules/Usevcpkg.cmake
+++ b/cpp/cmake_modules/Usevcpkg.cmake
@@ -22,7 +22,9 @@ message(STATUS "Using vcpkg to find dependencies")
 
 # macro to list subdirectirectories (non-recursive)
 macro(list_subdirs SUBDIRS DIR)
-  file(GLOB children_ RELATIVE ${DIR} ${DIR}/*)
+  file(GLOB children_
+       RELATIVE ${DIR}
+       ${DIR}/*)
   set(subdirs_ "")
   foreach(child_ ${children_})
     if(IS_DIRECTORY "${DIR}/${child_}")
@@ -44,24 +46,27 @@ if(DEFINED CMAKE_TOOLCHAIN_FILE)
     get_filename_component(_VCPKG_BUILDSYSTEMS_DIR "${CMAKE_TOOLCHAIN_FILE}" DIRECTORY)
     get_filename_component(VCPKG_ROOT "${_VCPKG_BUILDSYSTEMS_DIR}/../.." ABSOLUTE)
   else()
-    message(
-      FATAL_ERROR
-        "vcpkg toolchain file not found at path specified in -DCMAKE_TOOLCHAIN_FILE")
+    message(FATAL_ERROR "vcpkg toolchain file not found at path specified in -DCMAKE_TOOLCHAIN_FILE"
+    )
   endif()
 else()
   if(DEFINED VCPKG_ROOT)
     # Get it from the CMake variable VCPKG_ROOT
-    find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH)
+    find_program(_VCPKG_BIN vcpkg
+                 PATHS "${VCPKG_ROOT}"
+                 NO_DEFAULT_PATH)
     if(NOT _VCPKG_BIN)
       message(FATAL_ERROR "vcpkg not found in directory specified in -DVCPKG_ROOT")
     endif()
   elseif(DEFINED ENV{VCPKG_ROOT})
     # Get it from the environment variable VCPKG_ROOT
     set(VCPKG_ROOT $ENV{VCPKG_ROOT})
-    find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH)
+    find_program(_VCPKG_BIN vcpkg
+                 PATHS "${VCPKG_ROOT}"
+                 NO_DEFAULT_PATH)
     if(NOT _VCPKG_BIN)
-      message(
-        FATAL_ERROR "vcpkg not found in directory in environment variable VCPKG_ROOT")
+      message(FATAL_ERROR "vcpkg not found in directory in environment variable VCPKG_ROOT"
+      )
     endif()
   else()
     # Get it from the file vcpkg.path.txt
@@ -78,12 +83,13 @@ else()
       if(EXISTS "${_VCPKG_PATH_TXT}")
         file(READ "${_VCPKG_PATH_TXT}" VCPKG_ROOT)
       else()
-        message(
-          FATAL_ERROR
-            "vcpkg not found. Install vcpkg if not installed, "
-            "then run vcpkg integrate install or set environment variable VCPKG_ROOT.")
+        message(FATAL_ERROR "vcpkg not found. Install vcpkg if not installed, "
+                            "then run vcpkg integrate install or set environment variable VCPKG_ROOT."
+        )
       endif()
-      find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH)
+      find_program(_VCPKG_BIN vcpkg
+                   PATHS "${VCPKG_ROOT}"
+                   NO_DEFAULT_PATH)
       if(NOT _VCPKG_BIN)
         message(FATAL_ERROR "vcpkg not found. Re-run vcpkg integrate install "
                             "or set environment variable VCPKG_ROOT.")
@@ -105,7 +111,9 @@ if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET)
 endif()
 # Explicitly set manifest mode on if it is not set and vcpkg.json exists
 if(NOT DEFINED VCPKG_MANIFEST_MODE AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json")
-  set(VCPKG_MANIFEST_MODE ON CACHE BOOL "Use vcpkg.json manifest")
+  set(VCPKG_MANIFEST_MODE
+      ON
+      CACHE BOOL "Use vcpkg.json manifest")
   message(STATUS "vcpkg.json manifest found. Using VCPKG_MANIFEST_MODE: ON")
 endif()
 # vcpkg can install packages in three different places
@@ -113,13 +121,7 @@ set(_INST_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed") # try here fi
 set(_INST_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg_installed") # try here second
 set(_INST_VCPKG_ROOT "${VCPKG_ROOT}/installed")
 # Iterate over the places
-foreach(_INST_DIR
-        IN
-        LISTS
-        _INST_BUILD_DIR
-        _INST_SOURCE_DIR
-        _INST_VCPKG_ROOT
-        "notfound")
+foreach(_INST_DIR IN LISTS _INST_BUILD_DIR _INST_SOURCE_DIR _INST_VCPKG_ROOT "notfound")
   if(_INST_DIR STREQUAL "notfound")
     message(FATAL_ERROR "vcpkg installed libraries directory not found. "
                         "Install packages with vcpkg before executing cmake.")
@@ -158,10 +160,8 @@ if(NOT DEFINED VCPKG_TARGET_TRIPLET)
   message(FATAL_ERROR "Could not infer VCPKG_TARGET_TRIPLET. "
                       "Specify triplet with -DVCPKG_TARGET_TRIPLET.")
 elseif(NOT DEFINED _VCPKG_INSTALLED_DIR)
-  message(
-    FATAL_ERROR
-      "Could not find installed vcpkg packages for triplet ${VCPKG_TARGET_TRIPLET}. "
-      "Install packages with vcpkg before executing cmake.")
+  message(FATAL_ERROR "Could not find installed vcpkg packages for triplet ${VCPKG_TARGET_TRIPLET}. "
+                      "Install packages with vcpkg before executing cmake.")
 endif()
 
 set(VCPKG_TARGET_TRIPLET
@@ -194,24 +194,50 @@ set(ARROW_VCPKG_PREFIX
     "${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}"
     CACHE PATH "Path to target triplet subdirectory in vcpkg installed directory")
 
-set(ARROW_VCPKG ON CACHE BOOL "Use vcpkg for dependencies")
+set(ARROW_VCPKG
+    ON
+    CACHE BOOL "Use vcpkg for dependencies")
 
 set(ARROW_DEPENDENCY_SOURCE
     "SYSTEM"
     CACHE STRING "The specified value VCPKG is implemented internally as SYSTEM" FORCE)
 
-set(BOOST_ROOT "${ARROW_VCPKG_PREFIX}" CACHE STRING "")
-set(BOOST_INCLUDEDIR "${ARROW_VCPKG_PREFIX}/include/boost" CACHE STRING "")
-set(BOOST_LIBRARYDIR "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "")
-set(OPENSSL_INCLUDE_DIR "${ARROW_VCPKG_PREFIX}/include" CACHE STRING "")
-set(OPENSSL_LIBRARIES "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "")
-set(OPENSSL_ROOT_DIR "${ARROW_VCPKG_PREFIX}" CACHE STRING "")
-set(Thrift_ROOT "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "")
-set(ZSTD_INCLUDE_DIR "${ARROW_VCPKG_PREFIX}/include" CACHE STRING "")
-set(ZSTD_ROOT "${ARROW_VCPKG_PREFIX}" CACHE STRING "")
+set(BOOST_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(BOOST_INCLUDEDIR
+    "${ARROW_VCPKG_PREFIX}/include/boost"
+    CACHE STRING "")
+set(BOOST_LIBRARYDIR
+    "${ARROW_VCPKG_PREFIX}/lib"
+    CACHE STRING "")
+set(OPENSSL_INCLUDE_DIR
+    "${ARROW_VCPKG_PREFIX}/include"
+    CACHE STRING "")
+set(OPENSSL_LIBRARIES
+    "${ARROW_VCPKG_PREFIX}/lib"
+    CACHE STRING "")
+set(OPENSSL_ROOT_DIR
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(Thrift_ROOT
+    "${ARROW_VCPKG_PREFIX}/lib"
+    CACHE STRING "")
+set(ZSTD_INCLUDE_DIR
+    "${ARROW_VCPKG_PREFIX}/include"
+    CACHE STRING "")
+set(ZSTD_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
 
 if(CMAKE_HOST_WIN32)
-  set(LZ4_MSVC_LIB_PREFIX "" CACHE STRING "")
-  set(LZ4_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING "")
-  set(ZSTD_MSVC_LIB_PREFIX "" CACHE STRING "")
+  set(LZ4_MSVC_LIB_PREFIX
+      ""
+      CACHE STRING "")
+  set(LZ4_MSVC_STATIC_LIB_SUFFIX
+      ""
+      CACHE STRING "")
+  set(ZSTD_MSVC_LIB_PREFIX
+      ""
+      CACHE STRING "")
 endif()
diff --git a/cpp/cmake_modules/san-config.cmake b/cpp/cmake_modules/san-config.cmake
index 5eee6278009..bde9af23e57 100644
--- a/cpp/cmake_modules/san-config.cmake
+++ b/cpp/cmake_modules/san-config.cmake
@@ -20,10 +20,8 @@ endif()
 if(${ARROW_USE_ASAN})
   if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
      OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
-     OR (CMAKE_CXX_COMPILER_ID
-         STREQUAL
-         "GNU"
-         AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.8"))
+     OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                                  VERSION_GREATER "4.8"))
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -DADDRESS_SANITIZER")
   else()
     message(SEND_ERROR "Cannot use ASAN without clang or gcc >= 4.8")
@@ -41,18 +39,16 @@ endif()
 #   (https://bugs.llvm.org/show_bug.cgi?id=17000#c1)
 #   Note: GCC does not support the 'function' flag.
 if(${ARROW_USE_UBSAN})
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function,float-divide-by-zero -fno-sanitize-recover=all"
-      )
-  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
-         AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "5.1")
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all"
-      )
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function,float-divide-by-zero -fno-sanitize-recover=all"
+    )
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                                  VERSION_GREATER_EQUAL "5.1")
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all"
+    )
   else()
     message(SEND_ERROR "Cannot use UBSAN without clang or gcc >= 5.1")
   endif()
@@ -61,14 +57,10 @@ endif()
 # Flag to enable thread sanitizer (clang or gcc 4.8)
 if(${ARROW_USE_TSAN})
   if(NOT
-     (CMAKE_CXX_COMPILER_ID
-      STREQUAL
-      "AppleClang"
+     (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
       OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
-      OR (CMAKE_CXX_COMPILER_ID
-          STREQUAL
-          "GNU"
-          AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.8")))
+      OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
+                                                   VERSION_GREATER "4.8")))
     message(SEND_ERROR "Cannot use TSAN without clang or gcc >= 4.8")
   endif()
 
@@ -100,34 +92,31 @@ if(${ARROW_USE_TSAN})
 endif()
 
 if(${ARROW_USE_COVERAGE})
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    add_definitions(
-      "-fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
-      )
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    add_definitions("-fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
+    )
 
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
-      )
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize-coverage=pc-table,inline-8bit-counters,edge,no-prune,trace-cmp,trace-div,trace-gep"
+    )
   else()
     message(SEND_ERROR "You can only enable coverage with clang")
   endif()
 endif()
 
-if("${ARROW_USE_UBSAN}" OR "${ARROW_USE_ASAN}" OR "${ARROW_USE_TSAN}")
+if("${ARROW_USE_UBSAN}"
+   OR "${ARROW_USE_ASAN}"
+   OR "${ARROW_USE_TSAN}")
   # GCC 4.8 and 4.9 (latest as of this writing) don't allow you to specify
   # disallowed entries for the sanitizer.
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(
-      CMAKE_CXX_FLAGS
-      "${CMAKE_CXX_FLAGS} -fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitizer-disallowed-entries.txt"
-      )
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    set(CMAKE_CXX_FLAGS
+        "${CMAKE_CXX_FLAGS} -fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitizer-disallowed-entries.txt"
+    )
   else()
-    message(
-      WARNING
-        "GCC does not support specifying a sanitizer disallowed entries list. Known sanitizer check failures will not be suppressed."
-      )
+    message(WARNING "GCC does not support specifying a sanitizer disallowed entries list. Known sanitizer check failures will not be suppressed."
+    )
   endif()
 endif()
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 8e411898a34..79b48461f9b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -261,11 +261,8 @@ set(ARROW_C_SRCS
     vendored/uriparser/UriShorten.c)
 
 set_source_files_properties(vendored/datetime/tz.cpp
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                       SKIP_UNITY_BUILD_INCLUSION ON)
 
 # Disable DLL exports in vendored uriparser library
 add_definitions(-DURI_STATIC_BUILD)
@@ -326,16 +323,12 @@ endif()
 if(_allocator_dependencies)
   if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
     set_source_files_properties(memory_pool.cc PROPERTIES OBJECT_DEPENDS
-                                "${_allocator_dependencies}")
+                                                          "${_allocator_dependencies}")
   else()
     add_dependencies(arrow_dependencies ${_allocator_dependencies})
   endif()
-  set_source_files_properties(memory_pool.cc
-                              PROPERTIES
-                              SKIP_PRECOMPILE_HEADERS
-                              ON
-                              SKIP_UNITY_BUILD_INCLUSION
-                              ON)
+  set_source_files_properties(memory_pool.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                        SKIP_UNITY_BUILD_INCLUSION ON)
 endif()
 
 unset(_allocator_dependencies)
@@ -351,14 +344,15 @@ endif()
 #
 
 if(ARROW_CSV)
-  list(APPEND ARROW_SRCS
-              csv/converter.cc
-              csv/chunker.cc
-              csv/column_builder.cc
-              csv/column_decoder.cc
-              csv/options.cc
-              csv/parser.cc
-              csv/reader.cc)
+  list(APPEND
+       ARROW_SRCS
+       csv/converter.cc
+       csv/chunker.cc
+       csv/column_builder.cc
+       csv/column_decoder.cc
+       csv/options.cc
+       csv/parser.cc
+       csv/reader.cc)
   if(ARROW_COMPUTE)
     list(APPEND ARROW_SRCS csv/writer.cc)
   endif()
@@ -367,50 +361,51 @@ if(ARROW_CSV)
 endif()
 
 if(ARROW_COMPUTE)
-  list(APPEND ARROW_SRCS
-              compute/api_aggregate.cc
-              compute/api_scalar.cc
-              compute/api_vector.cc
-              compute/cast.cc
-              compute/exec.cc
-              compute/exec/exec_plan.cc
-              compute/exec/expression.cc
-              compute/function.cc
-              compute/kernel.cc
-              compute/registry.cc
-              compute/kernels/aggregate_basic.cc
-              compute/kernels/aggregate_mode.cc
-              compute/kernels/aggregate_quantile.cc
-              compute/kernels/aggregate_tdigest.cc
-              compute/kernels/aggregate_var_std.cc
-              compute/kernels/codegen_internal.cc
-              compute/kernels/hash_aggregate.cc
-              compute/kernels/scalar_arithmetic.cc
-              compute/kernels/scalar_boolean.cc
-              compute/kernels/scalar_cast_boolean.cc
-              compute/kernels/scalar_cast_internal.cc
-              compute/kernels/scalar_cast_nested.cc
-              compute/kernels/scalar_cast_numeric.cc
-              compute/kernels/scalar_cast_string.cc
-              compute/kernels/scalar_cast_temporal.cc
-              compute/kernels/scalar_compare.cc
-              compute/kernels/scalar_nested.cc
-              compute/kernels/scalar_set_lookup.cc
-              compute/kernels/scalar_string.cc
-              compute/kernels/scalar_temporal.cc
-              compute/kernels/scalar_validity.cc
-              compute/kernels/scalar_fill_null.cc
-              compute/kernels/scalar_if_else.cc
-              compute/kernels/util_internal.cc
-              compute/kernels/vector_hash.cc
-              compute/kernels/vector_nested.cc
-              compute/kernels/vector_selection.cc
-              compute/kernels/vector_sort.cc
-              compute/exec/key_hash.cc
-              compute/exec/key_map.cc
-              compute/exec/key_compare.cc
-              compute/exec/key_encode.cc
-              compute/exec/util.cc)
+  list(APPEND
+       ARROW_SRCS
+       compute/api_aggregate.cc
+       compute/api_scalar.cc
+       compute/api_vector.cc
+       compute/cast.cc
+       compute/exec.cc
+       compute/exec/exec_plan.cc
+       compute/exec/expression.cc
+       compute/function.cc
+       compute/kernel.cc
+       compute/registry.cc
+       compute/kernels/aggregate_basic.cc
+       compute/kernels/aggregate_mode.cc
+       compute/kernels/aggregate_quantile.cc
+       compute/kernels/aggregate_tdigest.cc
+       compute/kernels/aggregate_var_std.cc
+       compute/kernels/codegen_internal.cc
+       compute/kernels/hash_aggregate.cc
+       compute/kernels/scalar_arithmetic.cc
+       compute/kernels/scalar_boolean.cc
+       compute/kernels/scalar_cast_boolean.cc
+       compute/kernels/scalar_cast_internal.cc
+       compute/kernels/scalar_cast_nested.cc
+       compute/kernels/scalar_cast_numeric.cc
+       compute/kernels/scalar_cast_string.cc
+       compute/kernels/scalar_cast_temporal.cc
+       compute/kernels/scalar_compare.cc
+       compute/kernels/scalar_nested.cc
+       compute/kernels/scalar_set_lookup.cc
+       compute/kernels/scalar_string.cc
+       compute/kernels/scalar_temporal.cc
+       compute/kernels/scalar_validity.cc
+       compute/kernels/scalar_fill_null.cc
+       compute/kernels/scalar_if_else.cc
+       compute/kernels/util_internal.cc
+       compute/kernels/vector_hash.cc
+       compute/kernels/vector_nested.cc
+       compute/kernels/vector_selection.cc
+       compute/kernels/vector_sort.cc
+       compute/exec/key_hash.cc
+       compute/exec/key_map.cc
+       compute/exec/key_compare.cc
+       compute/exec/key_encode.cc
+       compute/exec/util.cc)
 
   append_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
   append_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
@@ -429,12 +424,13 @@ if(ARROW_FILESYSTEM)
     add_definitions(-DARROW_HDFS)
   endif()
 
-  list(APPEND ARROW_SRCS
-              filesystem/filesystem.cc
-              filesystem/localfs.cc
-              filesystem/mockfs.cc
-              filesystem/path_util.cc
-              filesystem/util_internal.cc)
+  list(APPEND
+       ARROW_SRCS
+       filesystem/filesystem.cc
+       filesystem/localfs.cc
+       filesystem/mockfs.cc
+       filesystem/path_util.cc
+       filesystem/util_internal.cc)
 
   if(ARROW_HDFS)
     list(APPEND ARROW_SRCS filesystem/hdfs.cc)
@@ -442,25 +438,23 @@ if(ARROW_FILESYSTEM)
   if(ARROW_S3)
     list(APPEND ARROW_SRCS filesystem/s3fs.cc)
     set_source_files_properties(filesystem/s3fs.cc
-                                PROPERTIES
-                                SKIP_PRECOMPILE_HEADERS
-                                ON
-                                SKIP_UNITY_BUILD_INCLUSION
-                                ON)
+                                PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                           SKIP_UNITY_BUILD_INCLUSION ON)
   endif()
 
   list(APPEND ARROW_TESTING_SRCS filesystem/test_util.cc)
 endif()
 
 if(ARROW_IPC)
-  list(APPEND ARROW_SRCS
-              ipc/dictionary.cc
-              ipc/feather.cc
-              ipc/message.cc
-              ipc/metadata_internal.cc
-              ipc/options.cc
-              ipc/reader.cc
-              ipc/writer.cc)
+  list(APPEND
+       ARROW_SRCS
+       ipc/dictionary.cc
+       ipc/feather.cc
+       ipc/message.cc
+       ipc/metadata_internal.cc
+       ipc/options.cc
+       ipc/reader.cc
+       ipc/writer.cc)
 
   if(ARROW_JSON)
     list(APPEND ARROW_SRCS ipc/json_simple.cc)
@@ -468,15 +462,16 @@ if(ARROW_IPC)
 endif()
 
 if(ARROW_JSON)
-  list(APPEND ARROW_SRCS
-              json/options.cc
-              json/chunked_builder.cc
-              json/chunker.cc
-              json/converter.cc
-              json/object_parser.cc
-              json/object_writer.cc
-              json/parser.cc
-              json/reader.cc)
+  list(APPEND
+       ARROW_SRCS
+       json/options.cc
+       json/chunked_builder.cc
+       json/chunker.cc
+       json/converter.cc
+       json/object_parser.cc
+       json/object_writer.cc
+       json/parser.cc
+       json/reader.cc)
 endif()
 
 if(ARROW_ORC)
@@ -627,12 +622,8 @@ add_arrow_test(misc_test
 
 add_arrow_test(public_api_test)
 
-set_source_files_properties(public_api_test.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                          SKIP_UNITY_BUILD_INCLUSION ON)
 
 add_arrow_test(scalar_test)
 add_arrow_test(type_test)
diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt
index 516196c2eef..ca901b07dfd 100644
--- a/cpp/src/arrow/adapters/orc/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt
@@ -53,9 +53,5 @@ add_arrow_test(adapter_test
                STATIC_LINK_LIBS
                ${ORC_STATIC_TEST_LINK_LIBS})
 
-set_source_files_properties(adapter_test.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(adapter_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                       SKIP_UNITY_BUILD_INCLUSION ON)
diff --git a/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt b/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
index e8bb533b18e..2638456c61c 100644
--- a/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
+++ b/cpp/src/arrow/dbi/hiveserver2/CMakeLists.txt
@@ -56,11 +56,9 @@ set(HIVESERVER2_THRIFT_SRC
     Types_types.cpp)
 
 set_source_files_properties(${HIVESERVER2_THRIFT_SRC}
-                            PROPERTIES
-                            COMPILE_FLAGS
-                            "-Wno-unused-variable -Wno-shadow-field"
-                            GENERATED
-                            TRUE)
+                            PROPERTIES COMPILE_FLAGS
+                                       "-Wno-unused-variable -Wno-shadow-field" GENERATED
+                                                                                TRUE)
 
 # keep everything in one library, the object files reference
 # each other
@@ -75,9 +73,9 @@ add_library(arrow_hiveserver2_thrift STATIC ${HIVESERVER2_THRIFT_SRC})
 
 add_dependencies(arrow_hiveserver2_thrift hs2-thrift-cpp)
 
-set_target_properties(
-  arrow_hiveserver2_thrift
-  PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+set_target_properties(arrow_hiveserver2_thrift
+                      PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                 "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 
 add_arrow_lib(arrow_hiveserver2
               SOURCES
diff --git a/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt b/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
index f6c88473a6e..237a92a827d 100644
--- a/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
+++ b/cpp/src/arrow/dbi/hiveserver2/thrift/CMakeLists.txt
@@ -44,9 +44,9 @@ function(HS2_THRIFT_GEN VAR)
     # All the output files we can determine based on filename.
     #   - Does not include .skeleton.cpp files
     #   - Does not include java output files
-    set(OUTPUT_BE_FILE "${GEN_DIR}/${FIL_WE}_types.cpp" "${GEN_DIR}/${FIL_WE}_types.h"
-                       "${GEN_DIR}/${FIL_WE}_constants.cpp"
-                       "${GEN_DIR}/${FIL_WE}_constants.h")
+    set(OUTPUT_BE_FILE
+        "${GEN_DIR}/${FIL_WE}_types.cpp" "${GEN_DIR}/${FIL_WE}_types.h"
+        "${GEN_DIR}/${FIL_WE}_constants.cpp" "${GEN_DIR}/${FIL_WE}_constants.h")
     list(APPEND ${VAR} ${OUTPUT_BE_FILE})
 
     # BeeswaxService thrift generation
@@ -80,7 +80,9 @@ function(HS2_THRIFT_GEN VAR)
                        VERBATIM)
   endforeach(FIL)
 
-  set(${VAR} ${${VAR}} PARENT_SCOPE)
+  set(${VAR}
+      ${${VAR}}
+      PARENT_SCOPE)
 endfunction(HS2_THRIFT_GEN)
 
 message("Using Thrift compiler: ${THRIFT_COMPILER}")
@@ -102,8 +104,9 @@ set(SRC_FILES
     Status.thrift
     Types.thrift)
 
-set_source_files_properties(Status.thrift PROPERTIES OBJECT_DEPENDS
-                            ${CMAKE_CURRENT_BINARY_DIR}/ErrorCodes.thrift)
+set_source_files_properties(Status.thrift
+                            PROPERTIES OBJECT_DEPENDS
+                                       ${CMAKE_CURRENT_BINARY_DIR}/ErrorCodes.thrift)
 
 # Create a build command for each of the thrift src files and generate
 # a list of files they produce
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index 473a5ecc0f1..c917db3b99c 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,8 +47,8 @@ if(ARROW_S3)
     if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND NOT APPLE)
       list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_NOT_SHARED")
     endif()
-    target_compile_definitions(arrow-s3fs-test PRIVATE
-                               ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS})
+    target_compile_definitions(arrow-s3fs-test
+                               PRIVATE ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS})
   endif()
 
   if(ARROW_BUILD_TESTS)
@@ -60,8 +60,8 @@ if(ARROW_S3)
 
   if(ARROW_BUILD_BENCHMARKS AND ARROW_PARQUET)
     add_arrow_benchmark(s3fs_benchmark PREFIX "arrow-filesystem")
-    target_compile_definitions(arrow-filesystem-s3fs-benchmark PRIVATE
-                               ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
+    target_compile_definitions(arrow-filesystem-s3fs-benchmark
+                               PRIVATE ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index e1176ff0ac0..4e46243b18d 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -26,9 +26,9 @@ if(WIN32)
 endif()
 
 if(ARROW_TEST_LINKAGE STREQUAL "static")
-  set(ARROW_FLIGHT_TEST_LINK_LIBS arrow_flight_static arrow_flight_testing_static
-                                  ${ARROW_FLIGHT_STATIC_LINK_LIBS}
-                                  ${ARROW_TEST_LINK_LIBS})
+  set(ARROW_FLIGHT_TEST_LINK_LIBS
+      arrow_flight_static arrow_flight_testing_static ${ARROW_FLIGHT_STATIC_LINK_LIBS}
+      ${ARROW_TEST_LINK_LIBS})
 else()
   set(ARROW_FLIGHT_TEST_LINK_LIBS arrow_flight_shared arrow_flight_testing_shared
                                   ${ARROW_TEST_LINK_LIBS})
@@ -39,10 +39,10 @@ endif()
 set(FLIGHT_PROTO_PATH "${ARROW_SOURCE_DIR}/../format")
 set(FLIGHT_PROTO ${ARROW_SOURCE_DIR}/../format/Flight.proto)
 
-set(FLIGHT_GENERATED_PROTO_FILES "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc"
-                                 "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h"
-                                 "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.cc"
-                                 "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.h")
+set(FLIGHT_GENERATED_PROTO_FILES
+    "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc" "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.cc"
+    "${CMAKE_CURRENT_BINARY_DIR}/Flight.grpc.pb.h")
 
 set(PROTO_DEPENDS ${FLIGHT_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF} gRPC::grpc_cpp_plugin)
 
@@ -50,8 +50,7 @@ add_custom_command(OUTPUT ${FLIGHT_GENERATED_PROTO_FILES}
                    COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_PROTO_PATH}"
                            "--cpp_out=${CMAKE_CURRENT_BINARY_DIR}" "${FLIGHT_PROTO}"
                    DEPENDS ${PROTO_DEPENDS} ARGS
-                   COMMAND ${ARROW_PROTOBUF_PROTOC}
-                           "-I${FLIGHT_PROTO_PATH}"
+                   COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_PROTO_PATH}"
                            "--grpc_out=${CMAKE_CURRENT_BINARY_DIR}"
                            "--plugin=protoc-gen-grpc=$<TARGET_FILE:gRPC::grpc_cpp_plugin>"
                            "${FLIGHT_PROTO}")
@@ -70,13 +69,13 @@ string(REPLACE "-Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 # verification when using TLS.
 function(test_grpc_version DST_VAR DETECT_VERSION TEST_FILE)
   if(NOT DEFINED ${DST_VAR})
-    message(
-      STATUS "Checking support for TlsCredentialsOptions (gRPC >= ${DETECT_VERSION})...")
+    message(STATUS "Checking support for TlsCredentialsOptions (gRPC >= ${DETECT_VERSION})..."
+    )
     get_property(CURRENT_INCLUDE_DIRECTORIES
                  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                  PROPERTY INCLUDE_DIRECTORIES)
-    try_compile(HAS_GRPC_VERSION ${CMAKE_CURRENT_BINARY_DIR}/try_compile SOURCES
-                "${CMAKE_CURRENT_SOURCE_DIR}/try_compile/${TEST_FILE}"
+    try_compile(HAS_GRPC_VERSION ${CMAKE_CURRENT_BINARY_DIR}/try_compile
+                SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/try_compile/${TEST_FILE}"
                 CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CURRENT_INCLUDE_DIRECTORIES}"
                 LINK_LIBRARIES gRPC::grpc++
                 OUTPUT_VARIABLE TLS_CREDENTIALS_OPTIONS_CHECK_OUTPUT CXX_STANDARD 11)
@@ -85,10 +84,8 @@ function(test_grpc_version DST_VAR DETECT_VERSION TEST_FILE)
           "${DETECT_VERSION}"
           CACHE INTERNAL "The detected (approximate) gRPC version.")
     else()
-      message(
-        STATUS
-          "TlsCredentialsOptions (for gRPC ${DETECT_VERSION}) not found in grpc::experimental."
-        )
+      message(STATUS "TlsCredentialsOptions (for gRPC ${DETECT_VERSION}) not found in grpc::experimental."
+      )
       message(DEBUG "Build output:")
       list(APPEND CMAKE_MESSAGE_INDENT "${TEST_FILE}: ")
       message(DEBUG ${TLS_CREDENTIALS_OPTIONS_CHECK_OUTPUT})
@@ -105,10 +102,8 @@ else()
   test_grpc_version(GRPC_VERSION "1.34" "check_tls_opts_134.cc")
   test_grpc_version(GRPC_VERSION "1.32" "check_tls_opts_132.cc")
   test_grpc_version(GRPC_VERSION "1.27" "check_tls_opts_127.cc")
-  message(
-    STATUS
-      "Found approximate gRPC version: ${GRPC_VERSION} (ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=${ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS})"
-    )
+  message(STATUS "Found approximate gRPC version: ${GRPC_VERSION} (ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=${ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS})"
+  )
 endif()
 if(GRPC_VERSION EQUAL "1.27")
   add_definitions(-DGRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS=grpc_impl::experimental)
@@ -122,18 +117,13 @@ elseif(GRPC_VERSION EQUAL "1.36")
   add_definitions(-DGRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS
                   -DGRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS=grpc::experimental)
 else()
-  message(
-    STATUS
-      "A proper version of gRPC could not be found to support TlsCredentialsOptions in Arrow Flight."
-    )
-  message(
-    STATUS
-      "You may need a newer version of gRPC (>= 1.27), or the gRPC API has changed and Flight must be updated to match."
-    )
+  message(STATUS "A proper version of gRPC could not be found to support TlsCredentialsOptions in Arrow Flight."
+  )
+  message(STATUS "You may need a newer version of gRPC (>= 1.27), or the gRPC API has changed and Flight must be updated to match."
+  )
   if(ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS)
-    message(
-      FATAL_ERROR "Halting build since ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS is set."
-      )
+    message(FATAL_ERROR "Halting build since ARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS is set."
+    )
   endif()
 endif()
 
@@ -211,8 +201,9 @@ if(ARROW_TESTING)
 endif()
 
 foreach(LIB_TARGET ${ARROW_FLIGHT_TESTING_LIBRARIES})
-  target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_FLIGHT_EXPORTING
-                             ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
+  target_compile_definitions(${LIB_TARGET}
+                             PRIVATE ARROW_FLIGHT_EXPORTING
+                                     ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS})
 endforeach()
 
 add_arrow_test(flight_test
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
index af8cf317969..a1c182a58bb 100644
--- a/cpp/src/arrow/gpu/CMakeLists.txt
+++ b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -51,7 +51,8 @@ add_arrow_lib(arrow_cuda
               arrow_shared
               ${ARROW_CUDA_SHARED_LINK_LIBS}
               # Static arrow_cuda must also link against CUDA shared libs
-              STATIC_LINK_LIBS ${ARROW_CUDA_SHARED_LINK_LIBS})
+              STATIC_LINK_LIBS
+              ${ARROW_CUDA_SHARED_LINK_LIBS})
 
 add_dependencies(arrow_cuda ${ARROW_CUDA_LIBRARIES})
 
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index 960155703e1..40f351b56a5 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -45,12 +45,8 @@ set(ARROW_PYTHON_SRCS
     pyarrow.cc
     serialize.cc)
 
-set_source_files_properties(init.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                               SKIP_UNITY_BUILD_INCLUSION ON)
 
 if(ARROW_FILESYSTEM)
   list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
@@ -59,7 +55,9 @@ endif()
 set(ARROW_PYTHON_DEPENDENCIES arrow_dependencies)
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-  set_property(SOURCE pyarrow.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+  set_property(SOURCE pyarrow.cc
+               APPEND_STRING
+               PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
 endif()
 
 set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
@@ -159,8 +157,8 @@ if(ARROW_BUILD_TESTS)
 
   if(APPLE)
     target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS})
-    set_target_properties(arrow_python_test_main
-                          PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS
+                                                            "-undefined dynamic_lookup")
   elseif(NOT MSVC)
     target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS})
   endif()
diff --git a/cpp/src/arrow/python/util/CMakeLists.txt b/cpp/src/arrow/python/util/CMakeLists.txt
index c75b622847d..74141bebc8b 100644
--- a/cpp/src/arrow/python/util/CMakeLists.txt
+++ b/cpp/src/arrow/python/util/CMakeLists.txt
@@ -24,8 +24,8 @@ if(PYARROW_BUILD_TESTS)
 
   if(APPLE)
     target_link_libraries(arrow/python_test_main GTest::gtest dl)
-    set_target_properties(arrow/python_test_main
-                          PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    set_target_properties(arrow/python_test_main PROPERTIES LINK_FLAGS
+                                                            "-undefined dynamic_lookup")
   else()
     target_link_libraries(arrow/python_test_main GTest::gtest pthread dl)
   endif()
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 44b6fab14c3..83cec08e71c 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -97,8 +97,8 @@ set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE
 
 set(GANDIVA_STATIC_LINK_LIBS arrow_static LLVM::LLVM_INTERFACE ${GANDIVA_OPENSSL_LIBS})
 
-if(ARROW_GANDIVA_STATIC_LIBSTDCPP
-   AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX))
+if(ARROW_GANDIVA_STATIC_LIBSTDCPP AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX
+                                      ))
   set(GANDIVA_STATIC_LINK_LIBS ${GANDIVA_STATIC_LINK_LIBS} -static-libstdc++
                                -static-libgcc)
 endif()
@@ -210,8 +210,11 @@ endfunction()
 
 set(GANDIVA_INTERNALS_TEST_ARGUMENTS)
 if(WIN32)
-  list(APPEND GANDIVA_INTERNALS_TEST_ARGUMENTS EXTRA_LINK_LIBS LLVM::LLVM_INTERFACE
-              ${GANDIVA_OPENSSL_LIBS})
+  list(APPEND
+       GANDIVA_INTERNALS_TEST_ARGUMENTS
+       EXTRA_LINK_LIBS
+       LLVM::LLVM_INTERFACE
+       ${GANDIVA_OPENSSL_LIBS})
 endif()
 add_gandiva_test(internals-test
                  SOURCES
diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt
index b456d5f3def..04fd22eec9a 100644
--- a/cpp/src/gandiva/jni/CMakeLists.txt
+++ b/cpp/src/gandiva/jni/CMakeLists.txt
@@ -32,14 +32,12 @@ set(PROTO_OUTPUT_FILES ${PROTO_OUTPUT_FILES} "${PROTO_OUTPUT_DIR}/Types.pb.h")
 
 set_source_files_properties(${PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE)
 
-get_filename_component(ABS_GANDIVA_PROTO ${CMAKE_SOURCE_DIR}/src/gandiva/proto/Types.proto
-                       ABSOLUTE)
+get_filename_component(ABS_GANDIVA_PROTO
+                       ${CMAKE_SOURCE_DIR}/src/gandiva/proto/Types.proto ABSOLUTE)
 
 add_custom_command(OUTPUT ${PROTO_OUTPUT_FILES}
-                   COMMAND ${ARROW_PROTOBUF_PROTOC}
-                           --proto_path
-                           ${CMAKE_SOURCE_DIR}/src/gandiva/proto
-                           --cpp_out
+                   COMMAND ${ARROW_PROTOBUF_PROTOC} --proto_path
+                           ${CMAKE_SOURCE_DIR}/src/gandiva/proto --cpp_out
                            ${PROTO_OUTPUT_DIR}
                            ${CMAKE_SOURCE_DIR}/src/gandiva/proto/Types.proto
                    DEPENDS ${ABS_GANDIVA_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF}
@@ -101,9 +99,9 @@ if(ARROW_BUILD_SHARED)
   # filter out everything that is not needed for the jni bridge
   # statically linked stdc++ has conflicts with stdc++ loaded by other libraries.
   if(NOT APPLE)
-    set_target_properties(
-      gandiva_jni_shared
-      PROPERTIES LINK_FLAGS
-                 "-Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map")
+    set_target_properties(gandiva_jni_shared
+                          PROPERTIES LINK_FLAGS
+                                     "-Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map"
+    )
   endif()
 endif()
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index 176b0473855..1cd505b44a3 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,35 +53,38 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS})
   set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc)
   set(PRECOMPILE_COMMAND)
   if(CMAKE_OSX_SYSROOT)
-    list(APPEND PRECOMPILE_COMMAND
-                ${CMAKE_COMMAND}
-                -E
-                env
-                SDKROOT=${CMAKE_OSX_SYSROOT})
+    list(APPEND
+         PRECOMPILE_COMMAND
+         ${CMAKE_COMMAND}
+         -E
+         env
+         SDKROOT=${CMAKE_OSX_SYSROOT})
   endif()
-  list(
-    APPEND PRECOMPILE_COMMAND
-           ${CLANG_EXECUTABLE}
-           ${PLATFORM_CLANG_OPTIONS}
-           -DGANDIVA_IR
-           -DNDEBUG # DCHECK macros not implemented in precompiled code
-           -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
-           -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
-           -fno-use-cxa-atexit # Workaround for unresolved __dso_handle
-           -emit-llvm
-           -O3
-           -c
-           ${ABSOLUTE_SRC}
-           -o
-           ${BC_FILE}
-           ${ARROW_GANDIVA_PC_CXX_FLAGS}
-           -I${CMAKE_SOURCE_DIR}/src
-           -I${ARROW_BINARY_DIR}/src)
+  list(APPEND
+       PRECOMPILE_COMMAND
+       ${CLANG_EXECUTABLE}
+       ${PLATFORM_CLANG_OPTIONS}
+       -DGANDIVA_IR
+       -DNDEBUG # DCHECK macros not implemented in precompiled code
+       -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
+       -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
+       -fno-use-cxa-atexit # Workaround for unresolved __dso_handle
+       -emit-llvm
+       -O3
+       -c
+       ${ABSOLUTE_SRC}
+       -o
+       ${BC_FILE}
+       ${ARROW_GANDIVA_PC_CXX_FLAGS}
+       -I${CMAKE_SOURCE_DIR}/src
+       -I${ARROW_BINARY_DIR}/src)
 
   if(NOT ARROW_USE_NATIVE_INT128)
     list(APPEND PRECOMPILE_COMMAND -I${Boost_INCLUDE_DIR})
   endif()
-  add_custom_command(OUTPUT ${BC_FILE} COMMAND ${PRECOMPILE_COMMAND} DEPENDS ${SRC_FILE})
+  add_custom_command(OUTPUT ${BC_FILE}
+                     COMMAND ${PRECOMPILE_COMMAND}
+                     DEPENDS ${SRC_FILE})
   list(APPEND BC_FILES ${BC_FILE})
 endforeach()
 
@@ -96,13 +99,12 @@ add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH}
                    COMMAND ${PYTHON_EXECUTABLE}
                            "${CMAKE_CURRENT_SOURCE_DIR}/../make_precompiled_bitcode.py"
                            ${GANDIVA_PRECOMPILED_CC_IN_PATH}
-                           ${GANDIVA_PRECOMPILED_BC_PATH}
-                           ${GANDIVA_PRECOMPILED_CC_PATH}
+                           ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH}
                    DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH}
                            ${GANDIVA_PRECOMPILED_BC_PATH})
 
-add_custom_target(precompiled ALL
-                  DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH})
+add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
+                                          ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
 if(ARROW_BUILD_TESTS)
@@ -130,11 +132,8 @@ if(ARROW_BUILD_TESTS)
                  ../decimal_xlarge.cc)
   target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
   target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS})
-  target_compile_definitions(gandiva-precompiled-test
-                             PRIVATE
-                             GANDIVA_UNIT_TEST=1
-                             ARROW_STATIC
-                             GANDIVA_STATIC)
+  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
+                                                              ARROW_STATIC GANDIVA_STATIC)
   set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
   add_test(gandiva-precompiled-test ${TEST_PATH})
   set_property(TEST gandiva-precompiled-test
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 3f3ca5a5299..a487760a03e 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -127,19 +127,15 @@ set(PARQUET_STATIC_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS} parquet_static thrift
 
 #
 # Generated Thrift sources
-set_source_files_properties(src/generated/parquet_types.cpp
-                            src/generated/parquet_types.h
+set_source_files_properties(src/generated/parquet_types.cpp src/generated/parquet_types.h
                             src/generated/parquet_constants.cpp
                             src/generated/parquet_constants.h
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                       SKIP_UNITY_BUILD_INCLUSION ON)
 
 if(NOT MSVC)
-  set_source_files_properties(src/parquet/parquet_types.cpp PROPERTIES COMPILE_FLAGS
-                              -Wno-unused-variable)
+  set_source_files_properties(src/parquet/parquet_types.cpp
+                              PROPERTIES COMPILE_FLAGS -Wno-unused-variable)
 endif()
 
 #
@@ -182,21 +178,16 @@ if(ARROW_HAVE_RUNTIME_AVX2)
   # AVX2 is used as a proxy for BMI2.
   list(APPEND PARQUET_SRCS level_comparison_avx2.cc level_conversion_bmi2.cc)
   set_source_files_properties(level_comparison_avx2.cc
-                              PROPERTIES
-                              SKIP_PRECOMPILE_HEADERS
-                              ON
-                              COMPILE_FLAGS
-                              "${ARROW_AVX2_FLAG}")
+                              PROPERTIES SKIP_PRECOMPILE_HEADERS ON COMPILE_FLAGS
+                                                                    "${ARROW_AVX2_FLAG}")
   # WARNING: DO NOT BLINDLY COPY THIS CODE FOR OTHER BMI2 USE CASES.
   # This code is always guarded by runtime dispatch which verifies
   # BMI2 is present.  For a very small number of CPUs AVX2 does not
   # imply BMI2.
   set_source_files_properties(level_conversion_bmi2.cc
-                              PROPERTIES
-                              SKIP_PRECOMPILE_HEADERS
-                              ON
-                              COMPILE_FLAGS
-                              "${ARROW_AVX2_FLAG} -DARROW_HAVE_BMI2 -mbmi2")
+                              PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                         COMPILE_FLAGS
+                                         "${ARROW_AVX2_FLAG} -DARROW_HAVE_BMI2 -mbmi2")
 endif()
 
 if(PARQUET_REQUIRE_ENCRYPTION)
@@ -297,12 +288,9 @@ add_dependencies(parquet ${PARQUET_LIBRARIES} thrift::thrift)
 # Thrift requires these definitions for some types that we use
 foreach(LIB_TARGET ${PARQUET_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
-                             PRIVATE
-                             PARQUET_EXPORTING
-                             PRIVATE
-                             HAVE_INTTYPES_H
-                             PRIVATE
-                             HAVE_NETDB_H)
+                             PRIVATE PARQUET_EXPORTING
+                             PRIVATE HAVE_INTTYPES_H
+                             PRIVATE HAVE_NETDB_H)
   if(WIN32)
     target_compile_definitions(${LIB_TARGET} PRIVATE NOMINMAX)
   else()
@@ -336,12 +324,8 @@ add_parquet_test(internals-test
                  types_test.cc
                  test_util.cc)
 
-set_source_files_properties(public_api_test.cc
-                            PROPERTIES
-                            SKIP_PRECOMPILE_HEADERS
-                            ON
-                            SKIP_UNITY_BUILD_INCLUSION
-                            ON)
+set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                                          SKIP_UNITY_BUILD_INCLUSION ON)
 
 add_parquet_test(reader_test
                  SOURCES
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index 8c8523a37d5..a4e6e72b41b 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -98,14 +98,17 @@ set_source_files_properties(dlmalloc.cc PROPERTIES COMPILE_FLAGS "-O3")
 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
   set_property(SOURCE dlmalloc.cc
                APPEND_STRING
-               PROPERTY COMPILE_FLAGS " -Wno-parentheses-equality \
+               PROPERTY COMPILE_FLAGS
+                        " -Wno-parentheses-equality \
 -Wno-null-pointer-arithmetic \
 -Wno-shorten-64-to-32 \
 -Wno-unused-macros")
 endif()
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  set_property(SOURCE dlmalloc.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-conversion")
+  set_property(SOURCE dlmalloc.cc
+               APPEND_STRING
+               PROPERTY COMPILE_FLAGS " -Wno-conversion")
 endif()
 
 list(APPEND PLASMA_EXTERNAL_STORE_SOURCES "external_store.cc" "hash_table_store.cc")
@@ -128,8 +131,8 @@ if(ARROW_RPATH_ORIGIN)
   else()
     set(_lib_install_rpath "\$ORIGIN")
   endif()
-  set_target_properties(plasma-store-server
-                        PROPERTIES INSTALL_RPATH ${_lib_install_rpath})
+  set_target_properties(plasma-store-server PROPERTIES INSTALL_RPATH
+                                                       ${_lib_install_rpath})
 elseif(APPLE)
   # With OSX and conda, we need to set the correct RPATH so that dependencies
   # are found. The installed libraries with conda have an RPATH that matches
@@ -138,12 +141,9 @@ elseif(APPLE)
   # installed there.
   if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "" AND APPLE)
     set_target_properties(plasma-store-server
-                          PROPERTIES BUILD_WITH_INSTALL_RPATH
-                                     TRUE
-                                     INSTALL_RPATH_USE_LINK_PATH
-                                     TRUE
-                                     INSTALL_RPATH
-                                     "$ENV{CONDA_PREFIX}/lib")
+                          PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE
+                                     INSTALL_RPATH_USE_LINK_PATH TRUE
+                                     INSTALL_RPATH "$ENV{CONDA_PREFIX}/lib")
   endif()
 endif()
 
@@ -156,8 +156,8 @@ install(FILES common.h
 
 # Plasma store
 set_target_properties(plasma-store-server PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)
-install(TARGETS plasma-store-server ${INSTALL_IS_OPTIONAL} DESTINATION
-                ${CMAKE_INSTALL_BINDIR})
+install(TARGETS plasma-store-server ${INSTALL_IS_OPTIONAL}
+        DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 if(ARROW_PLASMA_JAVA_CLIENT)
   # Plasma java client support
diff --git a/dev/archery/archery/utils/command.py b/dev/archery/archery/utils/command.py
index 84d2842073f..f655e2ef2e5 100644
--- a/dev/archery/archery/utils/command.py
+++ b/dev/archery/archery/utils/command.py
@@ -50,7 +50,8 @@ def wrapper(*argv, **kwargs):
 
 
 class Command:
-    """ A runnable command.
+    """
+    A runnable command.
 
     Class inheriting from the Command class must provide the bin
     property/attribute.
@@ -78,7 +79,9 @@ def run(self, *argv, **kwargs):
 
     @property
     def available(self):
-        """ Indicate if the command binary is found in PATH. """
+        """
+        Indicate if the command binary is found in PATH.
+        """
         binary = shlex.split(self.bin)[0]
         return shutil.which(binary) is not None
 
diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py
index 0b0e8b46948..d95bfeea309 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import fnmatch
 import gzip
 import os
 from pathlib import Path
@@ -31,6 +32,11 @@
 from .tmpdir import tmpdir
 
 
+_archery_install_msg = (
+    "Please install archery using: `pip install -e dev/archery[lint]`. "
+)
+
+
 class LintValidationException(Exception):
     pass
 
@@ -90,20 +96,75 @@ def cpp_linter(src, build_dir, clang_format=True, cpplint=True,
 
 
 class CMakeFormat(Command):
-    def __init__(self, cmake_format_bin):
-        self.bin = cmake_format_bin
+
+    def __init__(self, paths, cmake_format_bin=None):
+        self.check_version()
+        self.bin = default_bin(cmake_format_bin, "cmake-format")
+        self.paths = paths
+
+    @classmethod
+    def from_patterns(cls, base_path, include_patterns, exclude_patterns):
+        paths = {
+            str(path.as_posix())
+            for pattern in include_patterns
+            for path in base_path.glob(pattern)
+        }
+        for pattern in exclude_patterns:
+            pattern = (base_path / pattern).as_posix()
+            paths -= set(fnmatch.filter(paths, str(pattern)))
+        return cls(paths)
+
+    @staticmethod
+    def check_version():
+        try:
+            # cmake_format is part of the cmakelang package
+            import cmakelang
+        except ImportError:
+            raise ImportError(
+
+            )
+        # pin a specific version of cmake_format, must be updated in setup.py
+        if cmakelang.__version__ != "0.6.13":
+            raise LintValidationException(
+                f"Wrong version of cmake_format is detected. "
+                f"{_archery_install_msg}"
+            )
+
+    def check(self):
+        return self.run("-l", "error", "--check", *self.paths, check=False)
+
+    def fix(self):
+        return self.run("--in-place", *self.paths, check=False)
 
 
 def cmake_linter(src, fix=False):
-    """ Run cmake-format.py on all CMakeFiles.txt """
+    """
+    Run cmake-format on all CMakeFiles.txt
+    """
     logger.info("Running cmake-format linters")
 
-    if not fix:
-        logger.warn("run-cmake-format modifies files, regardless of --fix")
+    cmake_format = CMakeFormat.from_patterns(
+        src.path,
+        include_patterns=[
+            'ci/**/*.cmake',
+            'cpp/CMakeLists.txt',
+            'cpp/src/**/CMakeLists.txt',
+            'cpp/cmake_modules/*.cmake',
+            'go/**/CMakeLists.txt',
+            'java/**/CMakeLists.txt',
+            'matlab/**/CMakeLists.txt',
+            'python/CMakeLists.txt',
+        ],
+        exclude_patterns=[
+            'cpp/cmake_modules/FindNumPy.cmake',
+            'cpp/cmake_modules/FindPythonLibsNew.cmake',
+            'cpp/cmake_modules/UseCython.cmake',
+            'cpp/src/arrow/util/config.h.cmake',
+        ]
+    )
+    method = cmake_format.fix if fix else cmake_format.check
 
-    arrow_cmake_format = os.path.join(src.path, "run-cmake-format.py")
-    cmake_format = CMakeFormat(cmake_format_bin=arrow_cmake_format)
-    yield LintResult.from_cmd(cmake_format("--check"))
+    yield LintResult.from_cmd(method())
 
 
 def python_linter(src, fix=False):
@@ -118,7 +179,7 @@ def python_linter(src, fix=False):
     if not autopep8.available:
         logger.error(
             "Python formatter requested but autopep8 binary not found. "
-            "Please run `pip install -r dev/archery/requirements-lint.txt`")
+            f"{_archery_install_msg}")
         return
 
     # Gather files for autopep8
@@ -127,8 +188,7 @@ def python_linter(src, fix=False):
                 "python/pyarrow/**/*.pxd",
                 "python/pyarrow/**/*.pxi",
                 "python/examples/**/*.py",
-                "dev/archery/**/*.py",
-                ]
+                "dev/archery/**/*.py"]
     files = [setup_py]
     for pattern in patterns:
         files += list(map(str, Path(src.path).glob(pattern)))
@@ -158,7 +218,7 @@ def python_linter(src, fix=False):
     if not flake8.available:
         logger.error(
             "Python linter requested but flake8 binary not found. "
-            "Please run `pip install -r dev/archery/requirements-lint.txt`")
+            f"{_archery_install_msg}")
         return
 
     flake8_exclude = ['.venv*']
diff --git a/dev/archery/requirements-lint.txt b/dev/archery/requirements-lint.txt
deleted file mode 100644
index fc7f339ed4d..00000000000
--- a/dev/archery/requirements-lint.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-autopep8
-flake8
-cmake_format==0.5.2
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index a363824f8c9..eb70551de52 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -28,7 +28,7 @@
 jinja_req = 'jinja2>=2.11'
 
 extras = {
-    'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.5.2'],
+    'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8', 'cmake_format==0.6.13'],
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
     'release': [jinja_req, 'jira', 'semver', 'gitpython'],
diff --git a/docker-compose.yml b/docker-compose.yml
index fa0f0a28ad1..79618a1cfed 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1332,10 +1332,7 @@ services:
     environment:
       <<: *ccache
     volumes: *ubuntu-volumes
-    command: >
-      /bin/bash -c "
-        pip install -e /arrow/dev/archery &&
-        archery lint --all --no-clang-tidy --no-iwyu --no-numpydoc"
+    command: archery lint --all --no-clang-tidy --no-iwyu --no-numpydoc
 
   ######################### Integration Tests #################################
 
diff --git a/docs/source/developers/cpp/development.rst b/docs/source/developers/cpp/development.rst
index c0f5a0f269a..ca7b64a6dc7 100644
--- a/docs/source/developers/cpp/development.rst
+++ b/docs/source/developers/cpp/development.rst
@@ -100,7 +100,7 @@ following checks:
 * Passes various C++ (and others) style checks, checked with the ``lint``
   subcommand to :ref:`Archery <archery>`.
 * CMake files pass style checks, can be fixed by running
-  ``run-cmake-format.py`` from the root of the repository. This requires Python
+  ``archery lint --cmake-format --fix``. This requires Python
   3 and `cmake_format <https://github.com/cheshirekow/cmake_format>`_ (note:
   this currently does not work on Windows)
 
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index d1fe086cb15..f2fbb927ac9 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -34,8 +34,7 @@ We follow a similar PEP8-like coding style to the `pandas project
 
 .. code-block:: shell
 
-   pip install -e arrow/dev/archery
-   pip install -r arrow/dev/archery/requirements-lint.txt
+   pip install -e arrow/dev/archery[lint]
 
 .. code-block:: shell
 
diff --git a/java/gandiva/CMakeLists.txt b/java/gandiva/CMakeLists.txt
index 0a7c4d03e3b..5010daf7996 100644
--- a/java/gandiva/CMakeLists.txt
+++ b/java/gandiva/CMakeLists.txt
@@ -28,35 +28,28 @@ message("generating headers to ${JNI_HEADERS_DIR}/jni")
 # centos5 does not have java8 images, so supporting java 7 too.
 # unfortunately create_javah does not work in java8 correctly.
 if(ARROW_GANDIVA_JAVA7)
-  add_jar(
-    gandiva_java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
-    src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java)
+  add_jar(gandiva_java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
+          src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java)
 
-  create_javah(TARGET
-               gandiva_jni_headers
-               CLASSES
-               org.apache.arrow.gandiva.evaluator.ConfigurationBuilder
-               org.apache.arrow.gandiva.evaluator.JniWrapper
-               org.apache.arrow.gandiva.evaluator.ExpressionRegistryJniHelper
-               org.apache.arrow.gandiva.exceptions.GandivaException
-               DEPENDS
-               gandiva_java
-               CLASSPATH
-               gandiva_java
-               OUTPUT_DIR
-               ${JNI_HEADERS_DIR}/jni)
+  create_javah(TARGET gandiva_jni_headers
+               CLASSES org.apache.arrow.gandiva.evaluator.ConfigurationBuilder
+                       org.apache.arrow.gandiva.evaluator.JniWrapper
+                       org.apache.arrow.gandiva.evaluator.ExpressionRegistryJniHelper
+                       org.apache.arrow.gandiva.exceptions.GandivaException
+               DEPENDS gandiva_java
+               CLASSPATH gandiva_java
+               OUTPUT_DIR ${JNI_HEADERS_DIR}/jni)
 else()
-  add_jar(
-    gandiva_java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
-    src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
-    src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
-    GENERATE_NATIVE_HEADERS
-    gandiva_jni_headers
-    DESTINATION
-    ${JNI_HEADERS_DIR}/jni)
+  add_jar(gandiva_java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+          src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
+          src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
+          GENERATE_NATIVE_HEADERS
+          gandiva_jni_headers
+          DESTINATION
+          ${JNI_HEADERS_DIR}/jni)
 endif()
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 3c03e6791ee..09c8839aaf0 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -37,24 +37,14 @@ set(MATLAB_ADDITIONAL_VERSIONS "R2018a=9.4")
 find_package(Matlab REQUIRED MX_LIBRARY)
 
 # Build featherread mex file based on the arrow shared library
-matlab_add_mex(NAME
-               featherreadmex
-               SRC
-               src/featherreadmex.cc
-               src/feather_reader.cc
-               src/util/handle_status.cc
-               src/util/unicode_conversion.cc
-               LINK_TO
-               ${ARROW_SHARED_LIB})
+matlab_add_mex(NAME featherreadmex
+               SRC src/featherreadmex.cc src/feather_reader.cc src/util/handle_status.cc
+                   src/util/unicode_conversion.cc
+               LINK_TO ${ARROW_SHARED_LIB})
 target_include_directories(featherreadmex PRIVATE ${ARROW_INCLUDE_DIR})
 
 # Build featherwrite mex file based on the arrow shared library
-matlab_add_mex(NAME
-               featherwritemex
-               SRC
-               src/featherwritemex.cc
-               src/feather_writer.cc
-               src/util/handle_status.cc
-               LINK_TO
-               ${ARROW_SHARED_LIB})
+matlab_add_mex(NAME featherwritemex
+               SRC src/featherwritemex.cc src/feather_writer.cc src/util/handle_status.cc
+               LINK_TO ${ARROW_SHARED_LIB})
 target_include_directories(featherwritemex PRIVATE ${ARROW_INCLUDE_DIR})
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 52220767854..8f91fbeb0dd 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -81,7 +81,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
   option(PYARROW_BUNDLE_BOOST "Bundle the Boost libraries when we bundle Arrow C++" OFF)
   option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF)
-  set(PYARROW_CXXFLAGS "" CACHE STRING "Compiler flags to append when compiling Arrow")
+  set(PYARROW_CXXFLAGS
+      ""
+      CACHE STRING "Compiler flags to append when compiling Arrow")
 endif()
 
 find_program(CCACHE_FOUND ccache)
@@ -132,8 +134,8 @@ else()
   # Suppress Cython warnings
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized")
 
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
     # Cython warnings in clang
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand")
@@ -172,10 +174,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
   if(NOT APPLE)
     set(MORE_ARGS "-T")
   endif()
-  execute_process(COMMAND ln
-                          ${MORE_ARGS}
-                          -sf
-                          ${BUILD_OUTPUT_ROOT_DIRECTORY}
+  execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
                           ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
 else()
   set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
@@ -237,20 +236,17 @@ function(bundle_arrow_lib library_path)
   # Only copy the shared library with ABI version on Linux and macOS
 
   if(MSVC)
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
+    configure_file(${${library_path}}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   COPYONLY)
   elseif(APPLE)
-    configure_file(
-      ${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
+    configure_file(${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   COPYONLY)
   else()
-    configure_file(
-      ${${library_path}}.${ARG_SO_VERSION}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}
-      COPYONLY)
+    configure_file(${${library_path}}.${ARG_SO_VERSION}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}
+                   COPYONLY)
   endif()
 
 endfunction(bundle_arrow_lib)
@@ -270,15 +266,13 @@ function(bundle_boost_lib library_path)
   set(Boost_SO_VERSION
       "${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}")
   if(APPLE)
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
+    configure_file(${${library_path}}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}
+                   COPYONLY)
   else()
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${Boost_SO_VERSION}
-      COPYONLY)
+    configure_file(${${library_path}}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${Boost_SO_VERSION}
+                   COPYONLY)
   endif()
 endfunction()
 
@@ -320,9 +314,8 @@ function(bundle_arrow_dependency library_name)
   if(SHARED_LIB_PATH)
     get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
     get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
-    message(
-      STATUS
-        "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}")
+    message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}"
+    )
     configure_file(${SHARED_LIB_REALPATH}
                    ${BUILD_OUTPUT_ROOT_DIRECTORY}/${SHARED_LIB_NAME} COPYONLY)
   else()
@@ -367,7 +360,9 @@ if(PYARROW_BUNDLE_ARROW_CPP)
       # disable autolinking in boost
       add_definitions(-DBOOST_ALL_NO_LIB)
     endif()
-    find_package(Boost COMPONENTS regex REQUIRED)
+    find_package(Boost
+                 COMPONENTS regex
+                 REQUIRED)
     bundle_boost_lib(Boost_REGEX_LIBRARY)
   endif()
 
@@ -461,7 +456,9 @@ if(PYARROW_BUILD_PARQUET)
     else()
       set(Boost_USE_STATIC_LIBS ON)
     endif()
-    find_package(Boost COMPONENTS regex REQUIRED)
+    find_package(Boost
+                 COMPONENTS regex
+                 REQUIRED)
     add_thirdparty_lib(boost_regex STATIC_LIB ${Boost_REGEX_LIBRARY_RELEASE})
     add_thirdparty_lib(thrift STATIC_LIB ${THRIFT_STATIC_LIB})
     set(PARQUET_LINK_LIBS parquet_static thrift_static boost_regex_static)
@@ -556,8 +553,8 @@ foreach(module ${CYTHON_EXTENSIONS})
 
   if(directories)
     string(REPLACE ";" "/" module_output_directory ${directories})
-    set_target_properties(${module_name}
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${module_output_directory})
+    set_target_properties(${module_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                                    ${module_output_directory})
   endif()
 
   if(PYARROW_BUNDLE_ARROW_CPP)
@@ -585,9 +582,8 @@ foreach(module ${CYTHON_EXTENSIONS})
   endif()
 
   if(PYARROW_GENERATE_COVERAGE)
-    set_target_properties(${module_name}
-                          PROPERTIES COMPILE_DEFINITIONS
-                                     "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
+    set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS
+                                                    "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
   endif()
 
   target_link_libraries(${module_name} PRIVATE ${LINK_LIBS})
diff --git a/run-cmake-format.py b/run-cmake-format.py
deleted file mode 100755
index 1ff103868d8..00000000000
--- a/run-cmake-format.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import argparse
-import fnmatch
-import hashlib
-import pathlib
-import subprocess
-import sys
-
-# Keep an explicit list of files to format as we don't want to reformat
-# files we imported from other location.
-PATTERNS = [
-    'ci/**/*.cmake',
-    'cpp/CMakeLists.txt',
-    'cpp/src/**/CMakeLists.txt',
-    'cpp/cmake_modules/*.cmake',
-    'go/**/CMakeLists.txt',
-    'java/**/CMakeLists.txt',
-    'matlab/**/CMakeLists.txt',
-]
-EXCLUDE = [
-    'cpp/cmake_modules/FindNumPy.cmake',
-    'cpp/cmake_modules/FindPythonLibsNew.cmake',
-    'cpp/cmake_modules/UseCython.cmake',
-    'cpp/src/arrow/util/config.h.cmake',
-]
-
-here = pathlib.Path(__file__).parent
-
-
-def find_cmake_files():
-    for pat in PATTERNS:
-        yield from here.glob(pat)
-
-
-def run_cmake_format(paths):
-    # cmake-format is fast enough that running in parallel doesn't seem
-    # necessary
-    # autosort is off because it breaks in cmake_format 5.1
-    #   See: https://github.com/cheshirekow/cmake_format/issues/111
-    cmd = ['cmake-format', '--in-place', '--autosort=false'] + paths
-    try:
-        subprocess.run(cmd, check=True)
-    except FileNotFoundError:
-        try:
-            import cmake_format
-        except ImportError:
-            raise ImportError(
-                "Please install cmake-format: `pip install cmake_format`")
-        else:
-            # Other error, re-raise
-            raise
-
-
-def check_cmake_format(paths):
-    hashes = {}
-    for p in paths:
-        contents = p.read_bytes()
-        hashes[p] = hashlib.sha256(contents).digest()
-
-    run_cmake_format(paths)
-
-    # Check contents didn't change
-    changed = []
-    for p in paths:
-        contents = p.read_bytes()
-        if hashes[p] != hashlib.sha256(contents).digest():
-            changed.append(p)
-
-    if changed:
-        items = "\n".join("- %s" % p for p in sorted(changed))
-        print("The following cmake files need re-formatting:\n%s" % (items,))
-        print()
-        print("Consider running `run-cmake-format.py`")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--check', action='store_true')
-    parser.add_argument('paths', nargs='*', type=pathlib.Path)
-    args = parser.parse_args()
-
-    paths = find_cmake_files()
-    if args.paths:
-        paths = set(paths) & set([path.resolve() for path in args.paths])
-    paths = [
-        path for path in paths
-        if path.relative_to(here).as_posix() not in EXCLUDE
-    ]
-    if args.check:
-        check_cmake_format(paths)
-    else:
-        run_cmake_format(paths)

From 998a2a1668ea57a49d85fbb38f7f0e7eb94c29db Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 23 Jun 2021 16:46:03 -0400
Subject: [PATCH 452/719] ARROW-11514: [R][C++] Bindings for paste(), paste0(),
 str_c()

Adds support for the string concatenation functions `paste()`, `paste0()`, and `str_c()` in dplyr verbs. Only the non-aggregating `collapse = NULL` case is currently supported.

Closes #10547 from ianmcook/ARROW-11514

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 .../arrow/compute/kernels/scalar_string.cc    |   8 +-
 r/NAMESPACE                                   |   1 +
 r/R/dplyr-functions.R                         |  55 ++++++
 r/R/enums.R                                   |   6 +
 r/man/enums.Rd                                |   5 +
 r/src/compute.cpp                             |  14 ++
 .../testthat/test-dplyr-string-functions.R    | 156 ++++++++++++++++++
 7 files changed, 242 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 3f63bf2c405..dbacb6bb96f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -3587,10 +3587,12 @@ void AddBinaryJoin(FunctionRegistry* registry) {
         "binary_join_element_wise", Arity::VarArgs(/*min_args=*/1),
         &binary_join_element_wise_doc, &kDefaultJoinOptions);
     for (const auto& ty : BaseBinaryTypes()) {
-      DCHECK_OK(
-          func->AddKernel({InputType(ty)}, ty,
+      ScalarKernel kernel{KernelSignature::Make({InputType(ty)}, ty, /*is_varargs=*/true),
                           GenerateTypeAgnosticVarBinaryBase<BinaryJoinElementWise>(ty),
-                          BinaryJoinElementWiseState::Init));
+                          BinaryJoinElementWiseState::Init};
+      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+      DCHECK_OK(func->AddKernel(std::move(kernel)));
     }
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
diff --git a/r/NAMESPACE b/r/NAMESPACE
index f298ba905ee..ab45aa9985e 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -153,6 +153,7 @@ export(MessageReader)
 export(MessageType)
 export(MetadataVersion)
 export(NullEncodingBehavior)
+export(NullHandlingBehavior)
 export(ParquetArrowReaderProperties)
 export(ParquetFileFormat)
 export(ParquetFileReader)
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 91d1b21ad88..1cf6fabebee 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -215,6 +215,61 @@ nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
   }
 }
 
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
+  assert_that(
+    is.null(collapse),
+    msg = "paste() with the collapse argument is not yet supported in Arrow"
+  )
+  if (!inherits(sep, "Expression")) {
+    assert_that(!is.na(sep), msg = "Invalid separator")
+  }
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(
+    is.null(collapse),
+    msg = "paste0() with the collapse argument is not yet supported in Arrow"
+  )
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(
+    is.null(collapse),
+    msg = "str_c() with the collapse argument is not yet supported in Arrow"
+  )
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) {
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string concatenation"
+        )
+        Expression$scalar(as.character(arg))
+      } else {
+        nse_funcs$as.character(arg)
+      }
+    })
+    Expression$create(
+      "binary_join_element_wise",
+      args = args,
+      options = list(
+        null_handling = null_handling,
+        null_replacement = null_replacement
+      )
+    )
+  }
+}
+
 nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
   side <- match.arg(side)
   trim_fun <- switch(side,
diff --git a/r/R/enums.R b/r/R/enums.R
index 4271f2ad138..8a5bf7366a9 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -140,3 +140,9 @@ QuantileInterpolation <- enum("QuantileInterpolation",
 NullEncodingBehavior <- enum("NullEncodingBehavior",
   ENCODE = 0L, MASK = 1L
 )
+
+#' @export
+#' @rdname enums
+NullHandlingBehavior <- enum("NullHandlingBehavior",
+  EMIT_NULL = 0L, SKIP = 1L, REPLACE = 2L
+)
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index b871516def8..57ec3ba115e 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -15,6 +15,7 @@
 \alias{MetadataVersion}
 \alias{QuantileInterpolation}
 \alias{NullEncodingBehavior}
+\alias{NullHandlingBehavior}
 \title{Arrow enums}
 \format{
 An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.
@@ -40,6 +41,8 @@ An object of class \code{MetadataVersion} (inherits from \code{arrow-enum}) of l
 An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}) of length 5.
 
 An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2.
+
+An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3.
 }
 \usage{
 TimeUnit
@@ -65,6 +68,8 @@ MetadataVersion
 QuantileInterpolation
 
 NullEncodingBehavior
+
+NullHandlingBehavior
 }
 \description{
 Arrow enums
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index eab9db54134..3d322ab6c71 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -218,6 +218,20 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return make_cast_options(options);
   }
 
+  if (func_name == "binary_join_element_wise") {
+    using Options = arrow::compute::JoinOptions;
+    auto out = std::make_shared<Options>(Options::Defaults());
+    if (!Rf_isNull(options["null_handling"])) {
+      out->null_handling =
+          cpp11::as_cpp<enum arrow::compute::JoinOptions::NullHandlingBehavior>(
+              options["null_handling"]);
+    }
+    if (!Rf_isNull(options["null_replacement"])) {
+      out->null_replacement = cpp11::as_cpp<std::string>(options["null_replacement"]);
+    }
+    return out;
+  }
+
   if (func_name == "match_substring" || func_name == "match_substring_regex") {
     using Options = arrow::compute::MatchSubstringOptions;
     bool ignore_case = false;
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index ea27aa14777..4afb88e5732 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -21,6 +21,162 @@ skip_if_not_available("utf8proc")
 library(dplyr)
 library(stringr)
 
+test_that("paste, paste0, and str_c", {
+  df <- tibble(
+    v = c("A", "B", "C"),
+    w = c("a", "b", "c"),
+    x = c("d", NA_character_, "f"),
+    y = c(NA_character_, "h", "i"),
+    z = c(1.1, 2.2, NA)
+  )
+  x <- Expression$field_ref("x")
+  y <- Expression$field_ref("y")
+
+  # no NAs in data
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(v, w)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(v, w, sep = "-")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(v, w)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(v, w)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(v, w, sep = "+")) %>%
+      collect(),
+    df
+  )
+
+  # NAs in data
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, y)) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, y, sep = "-")) %>%
+      collect(),
+    df
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(x, y)) %>%
+      collect(),
+    df
+  )
+
+  # non-character column in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(x, y, z)) %>%
+      collect(),
+    df
+  )
+
+  # literal string in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, "foo", y)) %>%
+      collect(),
+    df
+  )
+
+  # literal NA in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste(x, NA, y)) %>%
+      collect(),
+    df
+  )
+
+  # expressions in dots
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(x, toupper(y), as.character(z))) %>%
+      collect(),
+    df
+  )
+
+  # sep is literal NA
+  # errors in paste() (consistent with base::paste())
+  expect_error(
+    nse_funcs$paste(x, y, sep = NA_character_),
+    "Invalid separator"
+  )
+  # emits null in str_c() (consistent with stringr::str_c())
+  expect_dplyr_equal(
+    input %>%
+      transmute(str_c(x, y, sep = NA_character_)) %>%
+      collect(),
+    df
+  )
+
+  # sep passed in dots to paste0 (which doesn't take a sep argument)
+  expect_dplyr_equal(
+    input %>%
+      transmute(paste0(x, y, sep = "-")) %>%
+      collect(),
+    df
+  )
+
+  # known differences
+
+  # arrow allows the separator to be an array
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      transmute(result = paste(x, y, sep = w)) %>%
+      collect(),
+    df %>%
+      transmute(result = paste(x, w, y, sep = ""))
+  )
+
+  # expected errors
+
+  # collapse argument not supported
+  expect_error(
+    nse_funcs$paste(x, y, collapse = ""),
+    "collapse"
+  )
+  expect_error(
+    nse_funcs$paste0(x, y, collapse = ""),
+    "collapse"
+  )
+  expect_error(
+    nse_funcs$str_c(x, y, collapse = ""),
+    "collapse"
+  )
+
+  # literal vectors of length != 1 not supported
+  expect_error(
+    nse_funcs$paste(x, character(0), y),
+    "Literal vectors of length != 1 not supported in string concatenation"
+  )
+  expect_error(
+    nse_funcs$paste(x, c(",", ";"), y),
+    "Literal vectors of length != 1 not supported in string concatenation"
+  )
+})
+
 test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
   df <- tibble(x = c("Foo", "bar"))
   expect_dplyr_equal(

From ad5dc8207192abe71d3e88303252629041968508 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 24 Jun 2021 10:25:52 -0400
Subject: [PATCH 453/719] ARROW-12870: [R] Bindings for stringr::str_like

Closes #10590 from thisisnic/ARROW-12870-str_like

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr-functions.R                         |  8 +++
 r/src/compute.cpp                             |  3 +-
 .../testthat/test-dplyr-string-functions.R    | 63 +++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 1cf6fabebee..7356c469eb1 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -303,6 +303,14 @@ nse_funcs$str_detect <- function(string, pattern, negate = FALSE) {
   out
 }
 
+nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) {
+  Expression$create(
+    "match_like",
+    string,
+    options = list(pattern = pattern, ignore_case = ignore_case)
+  )
+}
+
 # Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
 arrow_r_string_replace_function <- function(max_replacements) {
   function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 3d322ab6c71..01bc684c6df 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -232,7 +232,8 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return out;
   }
 
-  if (func_name == "match_substring" || func_name == "match_substring_regex") {
+  if (func_name == "match_substring" || func_name == "match_substring_regex" ||
+      func_name == "match_like") {
     using Options = arrow::compute::MatchSubstringOptions;
     bool ignore_case = false;
     if (!Rf_isNull(options["ignore_case"])) {
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 4afb88e5732..a58a04eb109 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -724,3 +724,66 @@ test_that("errors in strptime", {
     'Time zone argument not supported by Arrow'
   )
 })
+
+test_that("str_like", {
+  
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+  
+  # TODO: After new version of stringr with str_like has been released, update all
+  # these tests to use expect_dplyr_equal
+  
+  # No match - entire string
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "baz")) %>%
+      collect(),
+    tibble(x = c(FALSE, FALSE))
+  )
+  
+  # Match - entire string
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "Foo and bar")) %>%
+      collect(),
+    tibble(x = c(TRUE, FALSE))
+  )
+  
+  # Wildcard
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>%
+      collect(),
+    tibble(x = c(TRUE, FALSE))
+  )
+  
+  # Ignore case
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>%
+      collect(),
+    tibble(x = c(FALSE, FALSE))
+  )
+  
+  # Single character
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "_a%")) %>%
+      collect(),
+    tibble(x = c(FALSE, TRUE))
+  )
+  
+  # This will give an error until a new version of stringr with str_like has been released
+  skip("Test will fail until stringr > 1.4.0 is release")
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_like(x, "%baz%")) %>%
+      collect(),
+    df,
+  )
+  
+})

From 1e26cc1cd0a09fba9790e25d952afbfeb92aaa41 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 24 Jun 2021 08:15:23 -0700
Subject: [PATCH 454/719] ARROW-13037: [R] Incorrect param when creating
 Expression crashes R

Closes #10584 from thisisnic/ARROW_13037-expression_crash

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/expression.R                   | 2 ++
 r/tests/testthat/test-expression.R | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/r/R/expression.R b/r/R/expression.R
index ba542339ff8..a25c529ae7c 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -104,8 +104,10 @@ Expression$create <- function(function_name,
                               args = list(...),
                               options = empty_named_list()) {
   assert_that(is.string(function_name))
+  assert_that(is_list_of(args, "Expression"), msg = "Expression arguments must be Expression objects")
   compute___expr__call(function_name, args, options)
 }
+
 Expression$field_ref <- function(name) {
   assert_that(is.string(name))
   compute___expr__field_ref(name)
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index 49babf30d5c..d76b339db37 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -58,4 +58,10 @@ test_that("C++ expressions", {
   )
   # Interprets that as a list type
   expect_r6_class(f == c(1L, 2L), "Expression")
+  
+  expect_error(
+    Expression$create("add", 1, 2),
+    "Expression arguments must be Expression objects"
+  )
+  
 })

From 5275e7267f2462b27548e85eb45efe94782a5450 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 24 Jun 2021 08:16:29 -0700
Subject: [PATCH 455/719] ARROW-13022: [R] bindings for lubridate's year,
 isoyear, quarter, month, day, wday, yday, isoweek, hour, minute, and second
 functions

Closes #10507 from thisisnic/ARROW-13022_lubridate

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/dplyr-functions.R                   |  27 ++++
 r/R/expression.R                        |  15 +-
 r/tests/testthat/test-dplyr-lubridate.R | 178 ++++++++++++++++++++++++
 3 files changed, 218 insertions(+), 2 deletions(-)
 create mode 100644 r/tests/testthat/test-dplyr-lubridate.R

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 7356c469eb1..5076fc09847 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -505,3 +505,30 @@ nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit
 
   Expression$create("strptime", x, options = list(format = format, unit = unit))
 }
+
+nse_funcs$second <- function(x) {
+  Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
+}
+
+# After ARROW-13054 is completed, we can refactor this for simplicity
+# 
+# Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas
+# `lubridate::wday` counts from 1 to 7, and allows users to specify which day
+# of the week is first (Sunday by default).  This Expression converts the returned
+# day of the week back to the value that would be returned by lubridate by
+# providing offset values based on the specified week_start day, and adding 1
+# so the returned value is 1-indexed instead of 0-indexed.
+nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
+  
+  # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
+  # When the ticket below is resolved, we should be able to support the label argument
+  # https://issues.apache.org/jira/browse/ARROW-13133
+  if (label) {
+    arrow_not_supported("Label argument")
+  }
+  
+  # overall formula to convert from arrow::wday to lubridate::wday is:
+  #  ((wday(day) - start + 8) %% 7) + 1
+  ((Expression$create("day_of_week", x) - Expression$scalar(week_start) + 8) %% 7) + 1
+  
+}
diff --git a/r/R/expression.R b/r/R/expression.R
index a25c529ae7c..26351d61aa4 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -29,8 +29,19 @@
   # stringr spellings of those
   "str_length" = "utf8_length",
   "str_to_lower" = "utf8_lower",
-  "str_to_upper" = "utf8_upper"
-  # str_trim is defined in dplyr.R
+  "str_to_upper" = "utf8_upper",
+  # str_trim is defined in dplyr-functions.R
+  "year" = "year",
+  "isoyear" = "iso_year",
+  "quarter" = "quarter",
+  "month" = "month",
+  "isoweek" = "iso_week",
+  "day" = "day",
+  # wday is defined in dplyr-functions.R
+  "yday" = "day_of_year",
+  "hour" = "hour",
+  # second is defined in dplyr-functions.R
+  "minute" = "minute"
 )
 
 .binary_function_map <- list(
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
new file mode 100644
index 00000000000..2ebb6f3b93e
--- /dev/null
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+library(lubridate)
+library(dplyr)
+
+test_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "")
+test_df <- tibble::tibble(date = test_date)
+
+# We can support this feature after ARROW-12980 is merged
+test_that("timezone aware timestamps are not supported",{
+  
+  tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "BST")
+  tz_aware_df <- tibble::tibble(date = tz_aware_date)
+  
+  expect_error(
+    Table$create(tz_aware_df) %>%
+      mutate(x = wday(date)) %>%
+      collect(),
+    "Cannot extract components from timestamp with specific timezone"
+  )
+})
+
+# We can support this feature when ARROW-13138 is resolved
+test_that("date32 objects are not supported",{
+  
+  date <- ymd("2017-01-01")
+  df <- tibble::tibble(date = date)
+  
+  expect_error(
+    Table$create(df) %>%
+      mutate(x = year(date)) %>%
+      collect(),
+    "Function year has no kernel matching input types"
+  )
+})
+
+
+test_that("extract year from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = year(date)) %>%
+      collect(),
+    test_df,
+    check.tzone = FALSE
+  )
+})
+
+test_that("extract isoyear from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = isoyear(date)) %>%
+      collect(),
+    test_df
+  )
+})
+  
+test_that("extract quarter from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = quarter(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract month from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = month(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract isoweek from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = isoweek(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract day from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = day(date)) %>%
+      collect(),
+    test_df
+  )
+})
+  
+
+test_that("extract wday from date", {
+ expect_dplyr_equal(
+    input %>%
+      mutate(x = wday(date)) %>%
+      collect(),
+    test_df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = wday(date, week_start = 3)) %>%
+      collect(),
+    test_df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = wday(date, week_start = 1)) %>%
+      collect(),
+    test_df
+  )
+  
+  # We should be able to support the label argument after this ticket is resolved:
+  # https://issues.apache.org/jira/browse/ARROW-13133
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$wday(x, label = TRUE),
+    "Label argument not supported by Arrow"
+  )
+  
+})
+  
+test_that("extract yday from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = yday(date)) %>%
+      collect(),
+    test_df
+  )
+})
+  
+test_that("extract hour from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = hour(date)) %>%
+      collect(),
+    test_df
+  )
+})
+  
+test_that("extract minute from date", {
+   expect_dplyr_equal(
+    input %>%
+      mutate(x = minute(date)) %>%
+      collect(),
+    test_df
+  )
+})
+  
+test_that("extract second from date", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = second(date)) %>%
+      collect(),
+    test_df,
+    # arrow supports nanosecond resolution but lubridate does not
+    tolerance = 1e-6
+  )
+})
+

From c4a20e98a3294b32e51c879e927878e9fb6e799b Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 24 Jun 2021 17:45:05 -0400
Subject: [PATCH 456/719] ARROW-12869: [R] Bindings for utf8_reverse and
 ascii_reverse

This adds tests of the ascii_reverse kernel and a binding and tests for the stri_reverse function which calls the utf8_reverse kernel

Closes #10589 from thisisnic/ARROW-12869_str_reverse

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/DESCRIPTION                                 |  1 +
 r/R/expression.R                              |  1 +
 .../testthat/test-dplyr-string-functions.R    | 42 +++++++++++++++++--
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index a6536015530..3ad9472a209 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -48,6 +48,7 @@ Suggests:
     pkgload,
     reticulate,
     rmarkdown,
+    stringi,
     stringr,
     testthat,
     tibble,
diff --git a/r/R/expression.R b/r/R/expression.R
index 26351d61aa4..bfbaa2f0ab1 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -30,6 +30,7 @@
   "str_length" = "utf8_length",
   "str_to_lower" = "utf8_lower",
   "str_to_upper" = "utf8_upper",
+  "str_reverse" = "utf8_reverse",
   # str_trim is defined in dplyr-functions.R
   "year" = "year",
   "isoyear" = "iso_year",
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index a58a04eb109..4cb07c9e39d 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -20,6 +20,7 @@ skip_if_not_available("utf8proc")
 
 library(dplyr)
 library(stringr)
+library(stringi)
 
 test_that("paste, paste0, and str_c", {
   df <- tibble(
@@ -712,7 +713,6 @@ test_that("strptime", {
     tstamp,
     check.tzone = FALSE
   )
-
 })
 
 test_that("errors in strptime", {
@@ -725,6 +725,43 @@ test_that("errors in strptime", {
   )
 })
 
+test_that("stri_reverse and arrow_ascii_reverse functions", {
+  
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+  
+  df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = stri_reverse(x)) %>%
+      collect(),
+    df_utf8
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = stri_reverse(x)) %>%
+      collect(),
+    df_ascii
+  )
+  
+  expect_equivalent(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_reverse(x)) %>%
+      collect(),
+    tibble(x = c("rab dna\nooF", "xuuq dna xuq dna\tzab"))
+  )
+  
+  expect_error(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_reverse(x)) %>%
+      collect(),
+    "Invalid: Non-ASCII sequence in input"
+  )
+})
+
 test_that("str_like", {
   
   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
@@ -783,7 +820,6 @@ test_that("str_like", {
     input %>%
       mutate(x = str_like(x, "%baz%")) %>%
       collect(),
-    df,
+    df
   )
-  
 })

From dedcbc0fdc9840fda577ecf73420bf34263aff46 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Fri, 25 Jun 2021 09:37:58 -0400
Subject: [PATCH 457/719] ARROW-13125: [R] Throw error when 2+ args passed to
 desc() in arrange()

This throws an error if the user passes two or more arguments to `desc()` in `arrange()`. Previously the second argument was silently ignored. The zero-arguments case is already handled.

Closes #10559 from ianmcook/ARROW-13125

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr-arrange.R                   | 7 ++++++-
 r/tests/testthat/test-dplyr-arrange.R | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R
index 59afa4fe6a0..5ab60abbada 100644
--- a/r/R/dplyr-arrange.R
+++ b/r/R/dplyr-arrange.R
@@ -77,6 +77,11 @@ find_and_remove_desc <- function(quosure) {
       # remove enclosing parentheses
       expr <- expr[[2]]
     } else if (identical(expr[[1]], quote(desc))) {
+      # ensure desc() has only one argument (when an R expression is a function
+      # call, length == 2 means it has exactly one argument)
+      if (length(expr) > 2) {
+        stop("desc() expects only one argument", call. = FALSE)
+      }
       # remove desc() and toggle descending
       expr <- expr[[2]]
       descending <- !descending
@@ -90,4 +95,4 @@ find_and_remove_desc <- function(quosure) {
       desc = descending
     )
   )
-}
\ No newline at end of file
+}
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index 0d12740f4cb..6e663d23ec6 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -197,4 +197,11 @@ test_that("arrange() with bad inputs", {
     "not found",
     fixed = TRUE
   )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(desc(int, chr)),
+    "expects only one argument",
+    fixed = TRUE
+  )
 })

From cc4b9be027102fd54dd5156d2b762fc64d94c448 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Fri, 25 Jun 2021 16:57:06 -0400
Subject: [PATCH 458/719] ARROW-13117: [R] Retain schema in new Expressions

This also resolves ARROW-13119: [R] Set empty schema in scalar Expressions

Closes #10563 from ianmcook/ARROW-13117

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/expression.R                   | 10 ++++-
 r/R/schema.R                       |  4 +-
 r/man/unify_schemas.Rd             |  3 +-
 r/tests/testthat/test-dplyr.R      | 23 +++++++++++
 r/tests/testthat/test-expression.R | 63 ++++++++++++++++++++++++++++++
 r/tests/testthat/test-schema.R     | 13 ++++++
 6 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index bfbaa2f0ab1..de140832374 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -88,6 +88,8 @@
 Expression <- R6Class("Expression", inherit = ArrowObject,
   public = list(
     ToString = function() compute___expr__ToString(self),
+    # TODO: Implement type determination without storing
+    # schemas in Expression objects (ARROW-13186)
     schema = NULL,
     type = function(schema = self$schema) {
       assert_that(!is.null(schema))
@@ -117,7 +119,9 @@ Expression$create <- function(function_name,
                               options = empty_named_list()) {
   assert_that(is.string(function_name))
   assert_that(is_list_of(args, "Expression"), msg = "Expression arguments must be Expression objects")
-  compute___expr__call(function_name, args, options)
+  expr <- compute___expr__call(function_name, args, options)
+  expr$schema <- unify_schemas(schemas = lapply(args, function(x) x$schema))
+  expr
 }
 
 Expression$field_ref <- function(name) {
@@ -125,7 +129,9 @@ Expression$field_ref <- function(name) {
   compute___expr__field_ref(name)
 }
 Expression$scalar <- function(x) {
-  compute___expr__scalar(Scalar$create(x))
+  expr <- compute___expr__scalar(Scalar$create(x))
+  expr$schema <- schema()
+  expr
 }
 
 # Wrapper around Expression$create that:
diff --git a/r/R/schema.R b/r/R/schema.R
index 32cb1522614..3adebe259aa 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -283,13 +283,15 @@ read_schema <- function(stream, ...) {
 #'
 #' @param ... [Schema]s to unify
 #' @param schemas Alternatively, a list of schemas
-#' @return A `Schema` with the union of fields contained in the inputs
+#' @return A `Schema` with the union of fields contained in the inputs, or
+#'   `NULL` if any of `schemas` is `NULL`
 #' @export
 #' @examplesIf arrow_available()
 #' a <- schema(b = double(), c = bool())
 #' z <- schema(b = double(), k = utf8())
 #' unify_schemas(a, z)
 unify_schemas <- function(..., schemas = list(...)) {
+  if (any(vapply(schemas, is.null, TRUE))) return(NULL)
   arrow__UnifySchemas(schemas)
 }
 
diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd
index 609581914ad..50c80c2dda9 100644
--- a/r/man/unify_schemas.Rd
+++ b/r/man/unify_schemas.Rd
@@ -12,7 +12,8 @@ unify_schemas(..., schemas = list(...))
 \item{schemas}{Alternatively, a list of schemas}
 }
 \value{
-A \code{Schema} with the union of fields contained in the inputs
+A \code{Schema} with the union of fields contained in the inputs, or
+\code{NULL} if any of \code{schemas} is \code{NULL}
 }
 \description{
 Combine and harmonize schemas
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index c3df89db359..6740b3ee75f 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -361,6 +361,14 @@ test_that("relocate with selection helpers", {
     input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(),
     df
   )
+  # works after other dplyr verbs
+  expect_dplyr_equal(
+    input %>%
+      mutate(c = as.character(c)) %>%
+      relocate(d, e, f, .after = where(is.numeric)) %>%
+      collect(),
+    df
+  )
 })
 
 test_that("explicit type conversions with cast()", {
@@ -800,6 +808,21 @@ test_that("type checks with is_*()", {
   )
 })
 
+test_that("type checks on expressions", {
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        a = is.character(as.character(int)),
+        b = is.integer(as.character(int)),
+        c = is.integer(int + int),
+        d = is.double(int + dbl),
+        e = is.logical(grepl("[def]", chr))
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("as.factor()/dictionary_encode()", {
   skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index d76b339db37..d8c26db0143 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -65,3 +65,66 @@ test_that("C++ expressions", {
   )
   
 })
+
+test_that("Field reference expression schemas and types", {
+  x <- Expression$field_ref("x")
+
+  # type() throws error when schema is NULL
+  expect_error(x$type(), "schema")
+
+  # type() returns type when schema is set
+  x$schema <- Schema$create(x = int32())
+  expect_equal(x$type(), int32())
+})
+
+test_that("Scalar expression schemas and types", {
+  # type() works on scalars without setting the schema
+  expect_equal(
+    Expression$scalar("foo")$type(),
+    arrow::string()
+  )
+  expect_equal(
+    Expression$scalar(42L)$type(),
+    int32()
+  )
+})
+
+test_that("Expression schemas and types", {
+  x <- Expression$field_ref("x")
+  y <- Expression$field_ref("y")
+  z <- Expression$scalar(42L)
+
+  # type() throws error when both schemas are unset
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() throws error when left schema is unset
+  y$schema <- Schema$create(y = float64())
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() throws error when right schema is unset
+  x$schema <- Schema$create(x = int32())
+  y$schema <- NULL
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() returns type when both schemas are set
+  y$schema <- Schema$create(y = float64())
+  expect_equal(
+    Expression$create("add_checked", x, y)$type(),
+    float64()
+  )
+
+  # type() returns type when one arg has schema set and one is scalar
+  expect_equal(
+    Expression$create("add_checked", x, z)$type(),
+    int32()
+  )
+})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 278dc19f2c9..0de6ccae7a6 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -173,6 +173,19 @@ test_that("unify_schemas", {
     unify_schemas(a, z),
     schema(b = double(), c = bool(), k = utf8())
   )
+  # returns NULL when any arg is NULL
+  expect_null(
+    unify_schemas(a, NULL, z)
+  )
+  # returns NULL when all args are NULL
+  expect_null(
+    unify_schemas(NULL, NULL)
+  )
+  # errors when no args
+  expect_error(
+    unify_schemas(),
+    "Must provide at least one schema to unify"
+  )
 })
 
 test_that("Schema to C-interface", {

From d529de738c3ebadfc1215d55ad3ccb2e9c9c58b4 Mon Sep 17 00:00:00 2001
From: sgilmore <sgilmore@mathworks.com>
Date: Sat, 26 Jun 2021 14:31:27 +0900
Subject: [PATCH 459/719] ARROW-12730: [MATLAB] Update featherreadmex and
 featherwritemex to build against latest Arrow C++ APIs

**Overview**
* The MEX functions ``featherreadmex`` and ``featherwritemex`` fail to build against the latest Arrow C++ APIs. These changes allow them to successfully build.
* These changes require CMake version 3.20 or later in order to access the latest functionality exposed by [FindMatlab.cmake](https://cmake.org/cmake/help/latest/module/FindMatlab.html). We noticed that some Arrow project components, such as [Gandiva](https://arrow.apache.org/docs/developers/cpp/building.html?highlight=gandiva#cmake-version-requirements), require newer versions of CMake than the core Arrow C++ libraries.  If version 3.20 is too new, we're happy to find an alternative.
* We couldn't find a way to read and write a table description for feather V1 files using the latest APIs. It looks like support for reading and writing descriptions was modified in pull request https://github.com/apache/arrow/pull/6694. For now, we've removed support for table descriptions.

**Testing**
* Built ``featherreadmex`` and ``featherwritemex`` on Windows 10 with Visual Studio 2019
* Built ``featherreadmex`` and ``featherwritemex`` on macOS Big Sur (11.2.3) with GNU Make 3.81
* Built ``featherreadmex`` and ``featherwritemex`` on Debian 10 with GNU Make GNU 4.2.1
* Ran all tests in ``tfeather`` and ``tfeathermex`` on all platforms in MATLAB R2021a

**Future Directions**
* We did not detect the build failures due to the lack of CI integration. We hope to add CI support soon and will follow up with a mailing list discussion to talk through the details.
* These changes are temporary to allow us to have a clean slate to start developing the  [MATLAB Interface to Apache Arrow](https://github.com/apache/arrow/blob/master/matlab/doc/matlab_interface_for_apache_arrow_design.md).
* Eventually we would like to support the full ranges of data types for feather V1 and feather V2.
* In order to modernize the code, we plan to migrate to the [C++ MEX](https://www.mathworks.com/help/matlab/cpp-mex-file-applications.html) and [MATLAB Data Array](https://www.mathworks.com/help/matlab/matlab-data-array.html) APIs.
* We are going to follow up with another pull request to update the README.md to provide more detailed platform-specific development instructions.
* The MATLAB based build system inside of the ``build_support`` folder is out of date.  We are not sure if we want to maintain a separate MATLAB based build system along side the CMake based one. We will follow up on this in the future via the mailing list or Jira.

We acknowledge there is a lot of information in this pull request. In the future, we will work in smaller increments. We felt a bigger pull request was necessary to get back to a working state.

Thanks,
Sarah

Closes #10305 from sgilmore10/ARROW_12730

Lead-authored-by: sgilmore <sgilmore@mathworks.com>
Co-authored-by: sgilmore10 <74676073+sgilmore10@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 matlab/CMakeLists.txt                         |  60 +++++---
 .../src/+mlarrow/+util/createMetadataStruct.m |   5 +-
 matlab/src/+mlarrow/+util/table2mlarrow.m     |   3 +-
 matlab/src/feather_reader.cc                  |  88 ++++++-----
 matlab/src/feather_reader.h                   |   6 +-
 matlab/src/feather_writer.cc                  | 138 +++++++++++-------
 matlab/src/feather_writer.h                   |  23 ++-
 matlab/src/featherread.m                      |   4 -
 matlab/src/featherwritemex.cc                 |   3 +-
 matlab/test/tfeathermex.m                     |   2 +-
 .../util/createVariablesAndMetadataStructs.m  |   3 +-
 11 files changed, 191 insertions(+), 144 deletions(-)

diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 09c8839aaf0..5ee48a87c3a 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -15,7 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.2)
+cmake_minimum_required(VERSION 3.20)
+
 set(CMAKE_CXX_STANDARD 11)
 
 set(MLARROW_VERSION "5.0.0-SNAPSHOT")
@@ -29,22 +30,45 @@ if(EXISTS "${CPP_CMAKE_MODULES}")
   set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES})
 endif()
 
-## Arrow is Required
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake_modules)
+
+# Arrow is Required
 find_package(Arrow REQUIRED)
 
-## MATLAB is required to be installed to build MEX interfaces
-set(MATLAB_ADDITIONAL_VERSIONS "R2018a=9.4")
-find_package(Matlab REQUIRED MX_LIBRARY)
-
-# Build featherread mex file based on the arrow shared library
-matlab_add_mex(NAME featherreadmex
-               SRC src/featherreadmex.cc src/feather_reader.cc src/util/handle_status.cc
-                   src/util/unicode_conversion.cc
-               LINK_TO ${ARROW_SHARED_LIB})
-target_include_directories(featherreadmex PRIVATE ${ARROW_INCLUDE_DIR})
-
-# Build featherwrite mex file based on the arrow shared library
-matlab_add_mex(NAME featherwritemex
-               SRC src/featherwritemex.cc src/feather_writer.cc src/util/handle_status.cc
-               LINK_TO ${ARROW_SHARED_LIB})
-target_include_directories(featherwritemex PRIVATE ${ARROW_INCLUDE_DIR})
+# MATLAB is Required
+find_package(Matlab REQUIRED)
+
+# Construct the absolute path to featherread's source files
+set(featherread_sources featherreadmex.cc feather_reader.cc util/handle_status.cc
+                        util/unicode_conversion.cc)
+list(TRANSFORM featherread_sources PREPEND ${CMAKE_SOURCE_DIR}/src/)
+
+# Build featherreadmex MEX binary
+matlab_add_mex(R2018a
+               NAME featherreadmex
+               SRC ${featherread_sources}
+               LINK_TO arrow_shared)
+
+# Construct the absolute path to featherwrite's source files
+set(featherwrite_sources featherwritemex.cc feather_writer.cc util/handle_status.cc
+                         util/unicode_conversion.cc)
+list(TRANSFORM featherwrite_sources PREPEND ${CMAKE_SOURCE_DIR}/src/)
+
+# Build featherwritemex MEX binary
+matlab_add_mex(R2018a
+               NAME featherwritemex
+               SRC ${featherwrite_sources}
+               LINK_TO arrow_shared)
+
+# Ensure the MEX binaries are placed in the src directory on all platforms
+if(WIN32)
+  set_target_properties(featherreadmex PROPERTIES RUNTIME_OUTPUT_DIRECTORY
+                                                  $<1:${CMAKE_SOURCE_DIR}/src>)
+  set_target_properties(featherwritemex PROPERTIES RUNTIME_OUTPUT_DIRECTORY
+                                                   $<1:${CMAKE_SOURCE_DIR}/src>)
+else()
+  set_target_properties(featherreadmex PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                                  $<1:${CMAKE_SOURCE_DIR}/src>)
+  set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                                   $<1:${CMAKE_SOURCE_DIR}/src>)
+endif()
diff --git a/matlab/src/+mlarrow/+util/createMetadataStruct.m b/matlab/src/+mlarrow/+util/createMetadataStruct.m
index 7a2397059b6..b1b8bc7edd9 100644
--- a/matlab/src/+mlarrow/+util/createMetadataStruct.m
+++ b/matlab/src/+mlarrow/+util/createMetadataStruct.m
@@ -1,4 +1,4 @@
-function metadata = createMetadataStruct(description, numRows, numVariables)
+function metadata = createMetadataStruct(numRows, numVariables)
 % CREATEMETADATASTRUCT Helper function for creating Feather MEX metadata
 % struct.
 
@@ -17,8 +17,7 @@
 % implied.  See the License for the specific language governing
 % permissions and limitations under the License.
 
-metadata = struct('Description', description, ...
-                  'NumRows', numRows, ...
+metadata = struct('NumRows', numRows, ...
                   'NumVariables', numVariables);
 end
 
diff --git a/matlab/src/+mlarrow/+util/table2mlarrow.m b/matlab/src/+mlarrow/+util/table2mlarrow.m
index 3103724f945..36e4d1d15a9 100644
--- a/matlab/src/+mlarrow/+util/table2mlarrow.m
+++ b/matlab/src/+mlarrow/+util/table2mlarrow.m
@@ -23,7 +23,6 @@
 %
 %   Field Name    Class         Description
 %   ------------  -------       ----------------------------------------------
-%   Description   char          Table description (T.Properties.Description)
 %   NumRows       double        Number of table rows (height(T))
 %   NumVariables  double        Number of table variables (width(T))
 %
@@ -51,7 +50,7 @@
 variables = repmat(createVariableStruct('', [], [], ''), 1, width(t));
 
 % Struct representing table-level metadata.
-metadata = createMetadataStruct(t.Properties.Description, height(t), width(t));
+metadata = createMetadataStruct(height(t), width(t));
 
 % Iterate over each variable in the given table,
 % extracting the underlying array data.
diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc
index 484c300e0e4..1cbb50541e7 100644
--- a/matlab/src/feather_reader.cc
+++ b/matlab/src/feather_reader.cc
@@ -18,16 +18,21 @@
 #include <algorithm>
 #include <cmath>
 
+#include "feather_reader.h"
+
+#include <arrow/array/array_base.h>
+#include <arrow/array/builder_base.h>
+#include <arrow/array/builder_primitive.h>
 #include <arrow/io/file.h>
 #include <arrow/ipc/feather.h>
+#include <arrow/result.h>
 #include <arrow/status.h>
 #include <arrow/table.h>
 #include <arrow/type.h>
-#include <arrow/util/bit-util.h>
-
+#include <arrow/type_traits.h>
+#include <arrow/util/bitmap_visit.h>
 #include <mex.h>
 
-#include "feather_reader.h"
 #include "matlab_traits.h"
 #include "util/handle_status.h"
 #include "util/unicode_conversion.h"
@@ -52,11 +57,11 @@ mxArray* ReadNumericVariableData(const std::shared_ptr<Array>& column) {
   mxArray* variable_data =
       mxCreateNumericMatrix(column->length(), 1, matlab_class_id, mxREAL);
 
-  std::shared_ptr<ArrowArrayType> integer_array =
+  auto arrow_numeric_array =
       std::static_pointer_cast<ArrowArrayType>(column);
 
   // Get a raw pointer to the Arrow array data.
-  const MatlabType* source = integer_array->raw_values();
+  const MatlabType* source = arrow_numeric_array->raw_values();
 
   // Get a mutable pointer to the MATLAB array data and std::copy the
   // Arrow array data into it.
@@ -121,8 +126,7 @@ void BitUnpackBuffer(const std::shared_ptr<Buffer>& source, int64_t length,
 // writes to a zero-initialized destination buffer.
 // Implements a fast path for the fully-valid and fully-invalid cases.
 // Returns true if the destination buffer was successfully populated.
-bool TryBitUnpackFastPath(const std::shared_ptr<Array>& array,
-                          mxLogical* destination) {
+bool TryBitUnpackFastPath(const std::shared_ptr<Array>& array, mxLogical* destination) {
   const int64_t null_count = array->null_count();
   const int64_t length = array->length();
 
@@ -177,32 +181,24 @@ Status FeatherReader::Open(const std::string& filename,
   *feather_reader = std::shared_ptr<FeatherReader>(new FeatherReader());
 
   // Open file with given filename as a ReadableFile.
-  std::shared_ptr<io::ReadableFile> readable_file(nullptr);
-
-  RETURN_NOT_OK(io::ReadableFile::Open(filename, &readable_file));
-
-  // TableReader expects a RandomAccessFile.
-  std::shared_ptr<io::RandomAccessFile> random_access_file(readable_file);
-
+  ARROW_ASSIGN_OR_RAISE(auto readable_file, io::ReadableFile::Open(filename));
+ 
   // Open the Feather file for reading with a TableReader.
-  RETURN_NOT_OK(ipc::feather::TableReader::Open(random_access_file,
-                                                &(*feather_reader)->table_reader_));
-
-  // Read the table metadata from the Feather file.
-  (*feather_reader)->num_rows_ = (*feather_reader)->table_reader_->num_rows();
-  (*feather_reader)->num_variables_ = (*feather_reader)->table_reader_->num_columns();
-  (*feather_reader)->description_ =
-      (*feather_reader)->table_reader_->HasDescription()
-          ? (*feather_reader)->table_reader_->GetDescription()
-          : "";
-
-  if ((*feather_reader)->num_rows_ > internal::MAX_MATLAB_SIZE ||
-      (*feather_reader)->num_variables_ > internal::MAX_MATLAB_SIZE) {
-    mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge",
-                      "The table size exceeds MATLAB limits: %u x %u",
-                      (*feather_reader)->num_rows_, (*feather_reader)->num_variables_);
+  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::feather::Reader::Open(readable_file));
+ 
+  // Set the internal reader_ object.
+  (*feather_reader)->reader_ = reader;
+
+  // Check the feather file version
+  auto version = reader->version();
+  if (version == ipc::feather::kFeatherV2Version) {
+    return Status::NotImplemented("Support for Feather V2 has not been implemented.");
+  } else if (version != ipc::feather::kFeatherV1Version) {
+    return Status::Invalid("Unknown Feather format version.");
   }
 
+  // read the table metadata from the Feather file
+  (*feather_reader)->num_variables_ = reader->schema()->num_fields();
   return Status::OK();
 }
 
@@ -225,15 +221,11 @@ mxArray* FeatherReader::ReadMetadata() const {
   mxSetField(metadata, 0, "NumVariables",
              mxCreateDoubleScalar(static_cast<double>(num_variables_)));
 
-  // Set the description.
-  mxSetField(metadata, 0, "Description",
-             util::ConvertUTF8StringToUTF16CharMatrix(description_));
-
   return metadata;
 }
 
 // Read the table variables from the Feather file as a mxArray*.
-mxArray* FeatherReader::ReadVariables() const {
+mxArray* FeatherReader::ReadVariables() {
   const int32_t num_variable_fields = 4;
   const char* fieldnames[] = {"Name", "Type", "Data", "Valid"};
 
@@ -242,16 +234,34 @@ mxArray* FeatherReader::ReadVariables() const {
   mxArray* variables =
       mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames);
 
-  // Read all the table variables in the Feather file into memory.
+  std::shared_ptr<arrow::Table> table;
+  auto status = reader_->Read(&table);
+  if (!status.ok()) {
+    mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable",
+                      "Failed to read arrow::Table from Feather file. Reason: %s",
+                      status.message().c_str());
+  }
+
+  // Set the number of rows
+  num_rows_ = table->num_rows();
+
+  if (num_rows_ > internal::MAX_MATLAB_SIZE ||
+      num_variables_ > internal::MAX_MATLAB_SIZE) {
+    mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge",
+                      "The table size exceeds MATLAB limits: %u x %u", num_rows_,
+                      num_variables_);
+  }
+
+  auto column_names = table->ColumnNames();
+
   for (int64_t i = 0; i < num_variables_; ++i) {
-    std::shared_ptr<ChunkedArray> column;
-    util::HandleStatus(table_reader_->GetColumn(i, &column));
+    auto column = table->column(i);
     if (column->num_chunks() != 1) {
       mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::ReadVariables",
                         "Chunked columns not yet supported");
     }
     std::shared_ptr<Array> chunk = column->chunk(0);
-    const std::string column_name = table_reader_->GetColumnName(i);
+    const std::string column_name = column_names[i];
 
     // set the struct fields data
     mxSetField(variables, i, "Name", internal::ReadVariableName(column_name));
diff --git a/matlab/src/feather_reader.h b/matlab/src/feather_reader.h
index 00fea68f7ae..197e470bf6e 100644
--- a/matlab/src/feather_reader.h
+++ b/matlab/src/feather_reader.h
@@ -23,7 +23,6 @@
 #include <arrow/ipc/feather.h>
 #include <arrow/status.h>
 #include <arrow/type.h>
-
 #include <matrix.h>
 
 namespace arrow {
@@ -56,7 +55,7 @@ class FeatherReader {
   ///        Clients are responsible for freeing the returned mxArray memory
   ///        when it is no longer needed, or passing it to MATLAB to be managed.
   /// \return variables mxArray* struct array containing table variable data
-  mxArray* ReadVariables() const;
+  mxArray* ReadVariables();
 
   /// \brief Initialize a FeatherReader object from a given Feather file.
   /// \param[in] filename path to a Feather file
@@ -66,7 +65,7 @@ class FeatherReader {
 
  private:
   FeatherReader() = default;
-  std::unique_ptr<ipc::feather::TableReader> table_reader_;
+  std::shared_ptr<ipc::feather::Reader> reader_;
   int64_t num_rows_;
   int64_t num_variables_;
   std::string description_;
@@ -74,4 +73,3 @@ class FeatherReader {
 
 }  // namespace matlab
 }  // namespace arrow
-
diff --git a/matlab/src/feather_writer.cc b/matlab/src/feather_writer.cc
index bd1576bca46..1a76ada1995 100644
--- a/matlab/src/feather_writer.cc
+++ b/matlab/src/feather_writer.cc
@@ -19,6 +19,8 @@
 #include <functional> /* for std::multiplies */
 #include <numeric>    /* for std::accumulate */
 
+#include "feather_writer.h"
+
 #include <arrow/array.h>
 #include <arrow/buffer.h>
 #include <arrow/io/file.h>
@@ -26,11 +28,11 @@
 #include <arrow/status.h>
 #include <arrow/table.h>
 #include <arrow/type.h>
-#include <arrow/util/bit-util.h>
-
+#include <arrow/util/bit_util.h>
+#include <arrow/util/bitmap_generate.h>
+#include <arrow/util/key_value_metadata.h>
 #include <mex.h>
 
-#include "feather_writer.h"
 #include "matlab_traits.h"
 #include "util/handle_status.h"
 
@@ -38,6 +40,37 @@ namespace arrow {
 namespace matlab {
 namespace internal {
 
+// Returns the arrow::DataType that corresponds to the input type string
+std::shared_ptr<arrow::DataType> ConvertMatlabTypeStringToArrowDataType(
+    const std::string& t) {
+  if (t == "double") {
+    return arrow::float64();
+  } else if (t == "single") {
+    return arrow::float32();
+  } else if (t == "uint64") {
+    return arrow::uint64();
+  } else if (t == "uint32") {
+    return arrow::uint32();
+  } else if (t == "uint16") {
+    return arrow::uint16();
+  } else if (t == "uint8") {
+    return arrow::uint8();
+  } else if (t == "int64") {
+    return arrow::int64();
+  } else if (t == "int32") {
+    return arrow::int32();
+  } else if (t == "int16") {
+    return arrow::int16();
+  } else if (t == "int8") {
+    return arrow::int8();
+  }
+  mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedMatlabTypeString",
+                    "Unsupported MATLAB type string: '%s'", t.c_str());
+
+  // mexErrMsgIdAndTxt throws unconditionally so we should never reach this line
+  return nullptr;
+}
+
 // Utility that helps verify the input mxArray struct field name and type.
 // Returns void since any errors will throw and terminate MEX execution.
 void ValidateMxStructField(const mxArray* struct_array, const char* fieldname,
@@ -71,8 +104,7 @@ void ValidateMxStructField(const mxArray* struct_array, const char* fieldname,
                         mxGetClassName(field), fieldname);
     }
   }
-
-  // Some struct fields (like the table description) can be empty, while others 
+  // Some struct fields (like Data) can be empty, while others
   // (like NumRows) should never be empty. This conditional helps account for both cases.
   if (!can_be_empty) {
     // Ensure that individual mxStructArray fields are non-empty.
@@ -120,7 +152,7 @@ void ValidateNumRows(int64_t actual, int64_t expected) {
 }
 
 // Calculate the number of bytes required in the bit-packed validity buffer.
-constexpr int64_t BitPackedLength(int64_t num_elements) {
+int64_t BitPackedLength(int64_t num_elements) {
   // Since mxLogicalArray encodes [0, 1] in a full byte, we can compress that byte
   // down to a bit...therefore dividing the mxLogicalArray length by 8 here.
   return static_cast<int64_t>(std::ceil(num_elements / 8.0));
@@ -134,7 +166,7 @@ size_t GetNumberOfElements(const mxArray* array) {
   const size_t* dimensions = mxGetDimensions(array);
 
   // Iterate over the dimensions array and accumulate the total number of elements.
-  return std::accumulate(dimensions, dimensions + num_dimensions, 1,
+  return std::accumulate(dimensions, dimensions + num_dimensions, size_t{1},
                          std::multiplies<size_t>());
 }
 
@@ -164,7 +196,7 @@ void BitPackBuffer(const mxArray* logical_array,
 
   // Iterate over the mxLogical array and write bit-packed bools to the arrow::Buffer.
   // Call into a loop-unrolled Arrow utility for better performance when bit-packing.
-  auto generator = [&]() -> uint8_t { return *unpacked_buffer_ptr++; };
+  auto generator = [&]() -> bool { return *(unpacked_buffer_ptr++); };
   const int64_t start_offset = 0;
   arrow::internal::GenerateBitsUnrolled(packed_buffer_ptr, start_offset,
                                         unpacked_buffer_length, generator);
@@ -195,8 +227,8 @@ std::unique_ptr<Array> WriteNumericData(const mxArray* data,
                                mxGetElementSize(data) * mxGetNumberOfElements(data));
 
   // Construct arrow::NumericArray specialization using arrow::Buffer.
-  // Pass in nulls information...we could compute and provide the number of nulls here too,
-  // but passing -1 for now so that Arrow recomputes it if necessary.
+  // Pass in nulls information...we could compute and provide the number of nulls here
+  // too, but passing -1 for now so that Arrow recomputes it if necessary.
   return std::unique_ptr<Array>(new NumericArray<ArrowDataType>(
       mxGetNumberOfElements(data), buffer, validity_bitmap, -1));
 }
@@ -228,7 +260,6 @@ std::unique_ptr<Array> WriteVariableData(const mxArray* data, const std::string&
       return WriteNumericData<Int32Type>(data, validity_bitmap);
     case mxINT64_CLASS:
       return WriteNumericData<Int64Type>(data, validity_bitmap);
-
     default: {
       mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedArrowType",
                         "Unsupported arrow::Type '%s' for variable '%s'",
@@ -248,60 +279,41 @@ Status FeatherWriter::Open(const std::string& filename,
   *feather_writer = std::shared_ptr<FeatherWriter>(new FeatherWriter());
 
   // Open a FileOutputStream corresponding to the provided filename.
-  std::shared_ptr<io::OutputStream> writable_file(nullptr);
-  ARROW_RETURN_NOT_OK(io::FileOutputStream::Open(filename, &writable_file));
-
-  // TableWriter::Open expects a shared_ptr to an OutputStream.
-  // Open the Feather file for writing with a TableWriter.
-  return ipc::feather::TableWriter::Open(writable_file,
-                                         &(*feather_writer)->table_writer_);
-}
-
-// Write table metadata to the Feather file from a mxArray*.
-void FeatherWriter::WriteMetadata(const mxArray* metadata) {
-  // Verify that all required fieldnames are provided.
-  internal::ValidateMxStructField(metadata, "Description", mxCHAR_CLASS, true);
-  internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false);
-  internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false);
-
-  // Convert Description to a std::string and set on FeatherWriter and TableWriter.
-  std::string description =
-      internal::MxArrayToString(mxGetField(metadata, 0, "Description"));
-  this->description_ = description;
-  this->table_writer_->SetDescription(description);
-
-  // Get the NumRows field in the struct array and set on TableWriter.
-  this->num_rows_ = static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumRows")));
-  this->table_writer_->SetNumRows(this->num_rows_);
-
-  // Get the total number of variables. This is checked later for consistency with
-  // the provided number of columns before finishing the file write.
-  this->num_variables_ =
-      static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumVariables")));
+  ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_,
+      io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_)));
+  return Status::OK();
 }
 
 // Write mxArrays from MATLAB into a Feather file.
-Status FeatherWriter::WriteVariables(const mxArray* variables) {
+Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* metadata) {
   // Verify that all required fieldnames are provided.
   internal::ValidateMxStructField(variables, "Name", mxCHAR_CLASS, true);
   internal::ValidateMxStructField(variables, "Type", mxCHAR_CLASS, false);
   internal::ValidateMxStructField(variables, "Data", mxUNKNOWN_CLASS, true);
   internal::ValidateMxStructField(variables, "Valid", mxLOGICAL_CLASS, true);
 
+  // Verify that all required fieldnames are provided.
+  internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false);
+  internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false);
+
   // Get the number of columns in the struct array.
   size_t num_columns = internal::GetNumberOfElements(variables);
 
+  // Get the NumRows field in the struct array and set on TableWriter.
+  num_rows_ = static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumRows")));
+  // Get the total number of variables. This is checked later for consistency with
+  // the provided number of columns before finishing the file write.
+  num_variables_ =
+      static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumVariables")));
+
   // Verify that we have all the columns required for writing
   // Currently we need all columns to be passed in together in the WriteVariables method.
-  internal::ValidateNumColumns(static_cast<int64_t>(num_columns), this->num_variables_);
+  internal::ValidateNumColumns(static_cast<int64_t>(num_columns), num_variables_);
+
+  arrow::SchemaBuilder schema_builder;
+  std::vector<std::shared_ptr<arrow::Array>> table_columns;
 
-  // Allocate a packed validity bitmap for later arrow::Buffers to reference and populate.
-  // Since this is defined in the enclosing scope around any arrow::Buffer usage, this
-  // should outlive any arrow::Buffers created on this range, thus avoiding dangling
-  // references.
-  std::shared_ptr<ResizableBuffer> validity_bitmap;
-  ARROW_RETURN_NOT_OK(AllocateResizableBuffer(internal::BitPackedLength(this->num_rows_),
-                                              &validity_bitmap));
+  const int64_t bitpacked_length = internal::BitPackedLength(num_rows_);
 
   // Iterate over the input columns and generate arrow arrays.
   for (int idx = 0; idx < num_columns; ++idx) {
@@ -316,22 +328,38 @@ Status FeatherWriter::WriteVariables(const mxArray* variables) {
     std::string name_str = internal::MxArrayToString(name);
     std::string type_str = internal::MxArrayToString(type);
 
+    auto datatype = internal::ConvertMatlabTypeStringToArrowDataType(type_str);
+    auto field = std::make_shared<arrow::Field>(name_str, datatype);
+
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> validity_bitmap,
+        arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_)));
+
     // Populate bit-packed arrow::Buffer using validity data in the mxArray*.
     internal::BitPackBuffer(valid, validity_bitmap);
 
     // Wrap mxArray data in an arrow::Array of the equivalent type.
-    std::unique_ptr<Array> array =
+    auto array =
         internal::WriteVariableData(data, type_str, validity_bitmap);
 
     // Verify that the arrow::Array has the right number of elements.
-    internal::ValidateNumRows(array->length(), this->num_rows_);
+    internal::ValidateNumRows(array->length(), num_rows_);
 
-    // Write another column to the Feather file.
-    ARROW_RETURN_NOT_OK(this->table_writer_->Append(name_str, *array));
+    // Append the field to the schema builder
+    RETURN_NOT_OK(schema_builder.AddField(field));
+
+    // Store the table column
+    table_columns.push_back(std::move(array));
   }
+  // Create the table schema
+  ARROW_ASSIGN_OR_RAISE(auto table_schema, schema_builder.Finish());
+
+  // Specify the feather file format version as V1
+  arrow::ipc::feather::WriteProperties write_props;
+  write_props.version = arrow::ipc::feather::kFeatherV1Version;
 
+  std::shared_ptr<arrow::Table> table = arrow::Table::Make(table_schema, table_columns);
   // Write the Feather file metadata to the end of the file.
-  return this->table_writer_->Finalize();
+  return ipc::feather::WriteTable(*table, file_output_stream_.get(), write_props);
 }
 
 }  // namespace matlab
diff --git a/matlab/src/feather_writer.h b/matlab/src/feather_writer.h
index 4b402e01e17..a35b1434340 100644
--- a/matlab/src/feather_writer.h
+++ b/matlab/src/feather_writer.h
@@ -23,7 +23,6 @@
 #include <arrow/ipc/feather.h>
 #include <arrow/status.h>
 #include <arrow/type.h>
-
 #include <matrix.h>
 
 namespace arrow {
@@ -33,24 +32,21 @@ class FeatherWriter {
  public:
   ~FeatherWriter() = default;
 
-  /// \brief Write Feather file metadata using information from an mxArray* struct.
-  ///        The input mxArray must be a scalar struct array with the following fields:
-  ///         - "Description" :: Nx1 mxChar array, table-level description
-  ///         - "NumRows" :: scalar mxDouble array, number of rows in table
-  ///         - "NumVariables" :: scalar mxDouble array, total number of variables
-  /// \param[in] metadata mxArray* scalar struct containing table-level metadata
-  void WriteMetadata(const mxArray* metadata);
-
-  /// \brief Write mxArrays to a Feather file. The input must be a N-by-1 mxStruct
-  //         array with the following fields:
+  /// \brief Write mxArrays to a Feather file. The first input must be a N-by-1 mxStruct
+  ///         array with the following fields:
   ///         - "Name" :: Nx1 mxChar array, name of the column
   ///         - "Type" :: Nx1 mxChar array, the variable's MATLAB datatype
   ///         - "Data" :: Nx1 mxArray, data for this variable
   ///         - "Valid" :: Nx1 mxLogical array, 0 represents invalid (null) values and
   ///                                           1 represents valid (non-null) values
+  ///        The second input must be a scalar mxStruct  with the following
+  ///        fields:
+  ///         - "NumRows" :: scalar mxDouble array, number of rows in table
+  ///         - "NumVariables" :: scalar mxDouble array, total number of variables
   /// \param[in] variables mxArray* struct array containing table variable data
+  /// \param[in] metadata mxArray* scalar struct containing table-level metadata
   /// \return status
-  Status WriteVariables(const mxArray* variables);
+  Status WriteVariables(const mxArray* variables, const mxArray* metadata);
 
   /// \brief Initialize a FeatherWriter object that writes to a Feather file
   /// \param[in] filename path to the new Feather file
@@ -62,12 +58,11 @@ class FeatherWriter {
  private:
   FeatherWriter() = default;
 
-  std::unique_ptr<ipc::feather::TableWriter> table_writer_;
   int64_t num_rows_;
   int64_t num_variables_;
   std::string description_;
+  std::shared_ptr<arrow::io::OutputStream> file_output_stream_;
 };
 
 }  // namespace matlab
 }  // namespace arrow
-
diff --git a/matlab/src/featherread.m b/matlab/src/featherread.m
index 4ac8a565182..31bc426b877 100644
--- a/matlab/src/featherread.m
+++ b/matlab/src/featherread.m
@@ -83,8 +83,4 @@
     t.Properties.VariableDescriptions = cellstr(variableDescriptions);
 end
 
-% Set the Description property of the table based on the Feather file
-% description.
-t.Properties.Description = metadata.Description;
-
 end
diff --git a/matlab/src/featherwritemex.cc b/matlab/src/featherwritemex.cc
index 3a6815e02c1..d8f90baafc5 100644
--- a/matlab/src/featherwritemex.cc
+++ b/matlab/src/featherwritemex.cc
@@ -32,6 +32,5 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
       arrow::matlab::FeatherWriter::Open(filename, &feather_writer));
 
   // Write the Feather file table variables and table metadata from MATLAB.
-  feather_writer->WriteMetadata(prhs[2]);
-  arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1]));
+  arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1], prhs[2]));
 }
diff --git a/matlab/test/tfeathermex.m b/matlab/test/tfeathermex.m
index fa79b4bdef0..77070ad1421 100644
--- a/matlab/test/tfeathermex.m
+++ b/matlab/test/tfeathermex.m
@@ -60,7 +60,7 @@ function InvalidMATLABTableVariableNames(testCase)
             invalidVariable = mlarrow.util.createVariableStruct('double', 1, true, '@');
             validVariable = mlarrow.util.createVariableStruct('double', 1, true, 'Valid');
             variables = [invalidVariable, validVariable];
-            metadata = mlarrow.util.createMetadataStruct('', 1, 2);
+            metadata = mlarrow.util.createMetadataStruct(1, 2);
             featherwritemex(filename, variables, metadata);
             t = featherread(filename);
             
diff --git a/matlab/test/util/createVariablesAndMetadataStructs.m b/matlab/test/util/createVariablesAndMetadataStructs.m
index 01a8f58261b..0c60cbfbbcc 100644
--- a/matlab/test/util/createVariablesAndMetadataStructs.m
+++ b/matlab/test/util/createVariablesAndMetadataStructs.m
@@ -90,9 +90,8 @@
              singleVariable, ...
              doubleVariable];
 
-description = 'test';
 numRows = 3;
 numVariables = length(variables);
 
-metadata = createMetadataStruct(description, numRows, numVariables);
+metadata = createMetadataStruct(numRows, numVariables);
 end

From 6220ddd35aa2318792ada83dadd1679d13787fc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Sun, 27 Jun 2021 20:56:34 +0530
Subject: [PATCH 460/719] ARROW-12378: [C++][Gandiva] Implement castVARBINARY
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement the following functions:

- castVARBINARY(varchar, len)
- castVARBINARY(varbinary, len)
- castVARBINARY(float/double/int/bigint, bigint len)

Closes #10023 from jpedroantunes/feature/add-cast-varbinary and squashes the following commits:

51173737e <João Pedro> Add base implementation for castVARBINARY methods and the respective tests

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  24 +++
 cpp/src/gandiva/gdv_function_stubs.cc         | 199 +++++++++++-------
 cpp/src/gandiva/gdv_function_stubs.h          |   8 +
 cpp/src/gandiva/gdv_function_stubs_test.cc    |  57 +++++
 cpp/src/gandiva/precompiled/string_ops.cc     |  26 +++
 .../gandiva/precompiled/string_ops_test.cc    |  48 +++++
 cpp/src/gandiva/precompiled/types.h           |   8 +
 7 files changed, 298 insertions(+), 72 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index e8c0739b3d4..5218b1cbc15 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -333,6 +333,30 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "right_utf8_int32",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("castVARBINARY", {}, DataTypeVector{binary(), int64()}, binary(),
+                     kResultNullIfNull, "castVARBINARY_binary_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{utf8(), int64()}, binary(),
+                     kResultNullIfNull, "castVARBINARY_utf8_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{int32(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_int32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{int64(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_int64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{float32(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_float32_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castVARBINARY", {}, DataTypeVector{float64(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_float64_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
       NativeFunction("split_part", {}, DataTypeVector{utf8(), utf8(), int32()}, utf8(),
                      kResultNullIfNull, "split_part",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)};
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 26b8654fb7e..38c31a8c3f5 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -333,83 +333,90 @@ CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
 
 #undef CAST_NUMERIC_FROM_STRING
 
-#define GDV_FN_CAST_VARCHAR_INTEGER(IN_TYPE, ARROW_TYPE)                                 \
-  GANDIVA_EXPORT                                                                         \
-  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
-                                                   int64_t len, int32_t * out_len) {     \
-    if (len < 0) {                                                                       \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    if (len == 0) {                                                                      \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                       \
-    char* ret = reinterpret_cast<char*>(                                                 \
-        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
-    if (ret == nullptr) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
-      int64_t size = static_cast<int64_t>(v.size());                                     \
-      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
-      memcpy(ret, v.data(), *out_len);                                                   \
-      return arrow::Status::OK();                                                        \
-    });                                                                                  \
-    if (!status.ok()) {                                                                  \
-      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
-      gdv_fn_context_set_error_msg(context, err.c_str());                                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    return ret;                                                                          \
+#define GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(IN_TYPE, CAST_NAME, ARROW_TYPE)      \
+  GANDIVA_EXPORT                                                                  \
+  const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
+      int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    if (len == 0) {                                                               \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                \
+    char* ret = reinterpret_cast<char*>(                                          \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));         \
+    if (ret == nullptr) {                                                         \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {     \
+      int64_t size = static_cast<int64_t>(v.size());                              \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                   \
+      memcpy(ret, v.data(), *out_len);                                            \
+      return arrow::Status::OK();                                                 \
+    });                                                                           \
+    if (!status.ok()) {                                                           \
+      std::string err = "Could not cast " + std::to_string(value) + " to string"; \
+      gdv_fn_context_set_error_msg(context, err.c_str());                         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    return ret;                                                                   \
   }
 
-#define GDV_FN_CAST_VARCHAR_REAL(IN_TYPE, ARROW_TYPE)                                    \
-  GANDIVA_EXPORT                                                                         \
-  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
-                                                   int64_t len, int32_t * out_len) {     \
-    if (len < 0) {                                                                       \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    if (len == 0) {                                                                      \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    gandiva::GdvStringFormatter<arrow::ARROW_TYPE> formatter;                            \
-    char* ret = reinterpret_cast<char*>(                                                 \
-        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
-    if (ret == nullptr) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
-      int64_t size = static_cast<int64_t>(v.size());                                     \
-      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
-      memcpy(ret, v.data(), *out_len);                                                   \
-      return arrow::Status::OK();                                                        \
-    });                                                                                  \
-    if (!status.ok()) {                                                                  \
-      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
-      gdv_fn_context_set_error_msg(context, err.c_str());                                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    return ret;                                                                          \
+#define GDV_FN_CAST_VARLEN_TYPE_FROM_REAL(IN_TYPE, CAST_NAME, ARROW_TYPE)         \
+  GANDIVA_EXPORT                                                                  \
+  const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
+      int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    if (len == 0) {                                                               \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    gandiva::GdvStringFormatter<arrow::ARROW_TYPE> formatter;                     \
+    char* ret = reinterpret_cast<char*>(                                          \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));         \
+    if (ret == nullptr) {                                                         \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {     \
+      int64_t size = static_cast<int64_t>(v.size());                              \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                   \
+      memcpy(ret, v.data(), *out_len);                                            \
+      return arrow::Status::OK();                                                 \
+    });                                                                           \
+    if (!status.ok()) {                                                           \
+      std::string err = "Could not cast " + std::to_string(value) + " to string"; \
+      gdv_fn_context_set_error_msg(context, err.c_str());                         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    return ret;                                                                   \
   }
 
-GDV_FN_CAST_VARCHAR_INTEGER(int32, Int32Type)
-GDV_FN_CAST_VARCHAR_INTEGER(int64, Int64Type)
-GDV_FN_CAST_VARCHAR_REAL(float32, FloatType)
-GDV_FN_CAST_VARCHAR_REAL(float64, DoubleType)
+#define CAST_VARLEN_TYPE_FROM_NUMERIC(VARLEN_TYPE)                    \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(int32, VARLEN_TYPE, Int32Type) \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(int64, VARLEN_TYPE, Int64Type) \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_REAL(float32, VARLEN_TYPE, FloatType)  \
+  GDV_FN_CAST_VARLEN_TYPE_FROM_REAL(float64, VARLEN_TYPE, DoubleType)
 
+CAST_VARLEN_TYPE_FROM_NUMERIC(VARCHAR)
+CAST_VARLEN_TYPE_FROM_NUMERIC(VARBINARY)
+
+#undef CAST_VARLEN_TYPE_FROM_NUMERIC
+#undef GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER
+#undef GDV_FN_CAST_VARLEN_TYPE_FROM_REAL
 #undef GDV_FN_CAST_VARCHAR_INTEGER
 #undef GDV_FN_CAST_VARCHAR_REAL
 
@@ -700,6 +707,54 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   std::vector<llvm::Type*> args;
   auto types = engine->types();
 
+  // gdv_fn_castVARBINARY_int32
+  args = {
+      types->i64_type(),     // context
+      types->i32_type(),     // int32_t value
+      types->i64_type(),     // int64_t out value length
+      types->i32_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_int32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_int32_int64));
+
+  // gdv_fn_castVARBINARY_int64
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // int64_t value
+      types->i64_type(),     // int64_t out value length
+      types->i32_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_int64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_int64_int64));
+
+  // gdv_fn_castVARBINARY_float32
+  args = {
+      types->i64_type(),     // context
+      types->float_type(),   // float value
+      types->i64_type(),     // int64_t out value length
+      types->i64_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_float32_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_float32_int64));
+
+  // gdv_fn_castVARBINARY_float64
+  args = {
+      types->i64_type(),     // context
+      types->i64_type(),     // double value
+      types->i64_type(),     // int64_t out value length
+      types->i32_ptr_type()  // int32_t out_length
+  };
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_castVARBINARY_float64_int64", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_castVARBINARY_float64_int64));
+
   // gdv_fn_dec_from_string
   args = {
       types->i64_type(),      // context
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index d4a127dd1cf..ee22c3f4ece 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -71,6 +71,14 @@ bool in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len, bool in_va
 int gdv_fn_time_with_zone(int* time_fields, const char* zone, int zone_len,
                           int64_t* ret_time);
 
+GANDIVA_EXPORT
+const char* gdv_fn_castVARBINARY_int32_int64(int64_t context, gdv_int32 value,
+                                             int64_t out_len, int32_t* out_length);
+
+GANDIVA_EXPORT
+const char* gdv_fn_castVARBINARY_int64_int64(int64_t context, gdv_int64 value,
+                                             int64_t out_len, int32_t* out_length);
+
 GANDIVA_EXPORT
 const char* gdv_fn_sha256_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
                                      int32_t x_precision, int32_t x_scale,
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 6cfff5b891f..354a8bb191d 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -24,6 +24,63 @@
 
 namespace gandiva {
 
+TEST(TestGdvFnStubs, TestCastVarbinaryNumeric) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  // tests for integer values as input
+  const char* out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, -46, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-46");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 2147483647, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "2147483647");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, -2147483647 - 1, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-2147483648");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 34567, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "345");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 347, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 347, -1, &out_len);
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+
+  // tests for big integer values as input
+  out_str =
+      gdv_fn_castVARBINARY_int64_int64(ctx_ptr, 9223372036854775807LL, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "9223372036854775807");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int64_int64(ctx_ptr, -9223372036854775807LL - 1, 100,
+                                             &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "-9223372036854775808");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_castVARBINARY_int64_int64(ctx_ptr, 0, 100, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  // test with required length less than actual buffer length
+  out_str = gdv_fn_castVARBINARY_int64_int64(ctx_ptr, 12345, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "123");
+  EXPECT_FALSE(ctx.has_error());
+}
+
 TEST(TestGdvFnStubs, TestCastINT) {
   gandiva::ExecutionContext ctx;
 
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 1cd566de4a5..fe5fcf4293f 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -588,6 +588,32 @@ CAST_VARCHAR_FROM_VARLEN_TYPE(binary)
 
 #undef CAST_VARCHAR_FROM_VARLEN_TYPE
 
+// Add functions for castVARBINARY
+#define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE)                                    \
+  GANDIVA_EXPORT                                                                       \
+  const char* castVARBINARY_##TYPE##_int64(gdv_int64 context, const char* data,        \
+                                           gdv_int32 data_len, int64_t out_len,        \
+                                           int32_t* out_length) {                      \
+    int32_t len = static_cast<int32_t>(out_len);                                       \
+    if (len < 0) {                                                                     \
+      gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    if (len >= data_len || len == 0) {                                                 \
+      *out_length = data_len;                                                          \
+    } else {                                                                           \
+      *out_length = len;                                                               \
+    }                                                                                  \
+    return data;                                                                       \
+  }
+
+CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8)
+CAST_VARBINARY_FROM_STRING_AND_BINARY(binary)
+
+#undef CAST_VARBINARY_FROM_STRING_AND_BINARY
+
 #define IS_NULL(NAME, TYPE)                                                \
   FORCE_INLINE                                                             \
   bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 2460633d268..3763a61b6a7 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -583,6 +583,54 @@ TEST(TestStringOps, TestSubstringInvalidInputs) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestGdvFnStubs, TestCastVarbinaryUtf8) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+  const char* input = "abc";
+  const char* out;
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 0, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 1, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "a");
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 500, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, -10, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Output buffer length can't be negative"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryBinary) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+  const char* input = "\\x41\\x42\\x43";
+  const char* out;
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 0, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 8, 8, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "\\x41\\x42");
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 500, &out_len);
+  EXPECT_EQ(std::string(out, out_len), input);
+
+  out = castVARBINARY_binary_int64(ctx_ptr, input, 12, -10, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Output buffer length can't be negative"));
+  ctx.Reset();
+}
+
 TEST(TestStringOps, TestConcat) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index be769ddbdaf..5bd2242195a 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -388,6 +388,14 @@ const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
                                    gdv_int32 data_len, int64_t out_len,
                                    int32_t* out_length);
 
+const char* castVARBINARY_utf8_int64(gdv_int64 context, const char* data,
+                                     gdv_int32 data_len, int64_t out_len,
+                                     int32_t* out_length);
+
+const char* castVARBINARY_binary_int64(gdv_int64 context, const char* data,
+                                       gdv_int32 data_len, int64_t out_len,
+                                       int32_t* out_length);
+
 const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                          int32_t* out_len);
 

From bbcf31565a89fffbf83198b424cb5b5a73d1f862 Mon Sep 17 00:00:00 2001
From: Ying Zhou <yingzhou474@gmail.com>
Date: Mon, 28 Jun 2021 10:43:00 +0200
Subject: [PATCH 461/719] ARROW-13154: [C++] Remove the undocumented type_code
 <= 125 restriction in union types

For DENSE_UNION and SPARSE_UNION type_code must not be 126 or 127 which is not intended. This has been fixed. Furthermore array_union_test.cc has been modified to test for whether 127 is allowed as a type_code without errors.

Closes #10599 from mathyingzhou/ARROW-13154

Authored-by: Ying Zhou <yingzhou474@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_union_test.cc | 12 ++++++------
 cpp/src/arrow/array/builder_union.cc    |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/array/array_union_test.cc b/cpp/src/arrow/array/array_union_test.cc
index 88d25e823bb..d3afe40df8d 100644
--- a/cpp/src/arrow/array/array_union_test.cc
+++ b/cpp/src/arrow/array/array_union_test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <string>
-
 #include <gtest/gtest.h>
 
+#include <string>
+
 #include "arrow/array.h"
 #include "arrow/array/builder_nested.h"
 #include "arrow/array/builder_union.h"
@@ -107,11 +107,11 @@ class TestUnionArrayFactories : public ::testing::Test {
  public:
   void SetUp() {
     pool_ = default_memory_pool();
-    type_codes_ = {1, 2, 4, 8};
+    type_codes_ = {1, 2, 4, 127};
     ArrayFromVector<Int8Type>({0, 1, 2, 0, 1, 3, 2, 0, 2, 1}, &type_ids_);
-    ArrayFromVector<Int8Type>({1, 2, 4, 1, 2, 8, 4, 1, 4, 2}, &logical_type_ids_);
-    ArrayFromVector<Int8Type>({1, 2, 4, 1, -2, 8, 4, 1, 4, 2}, &invalid_type_ids1_);
-    ArrayFromVector<Int8Type>({1, 2, 4, 1, 3, 8, 4, 1, 4, 2}, &invalid_type_ids2_);
+    ArrayFromVector<Int8Type>({1, 2, 4, 1, 2, 127, 4, 1, 4, 2}, &logical_type_ids_);
+    ArrayFromVector<Int8Type>({1, 2, 4, 1, -2, 127, 4, 1, 4, 2}, &invalid_type_ids1_);
+    ArrayFromVector<Int8Type>({1, 2, 4, 1, 3, 127, 4, 1, 4, 2}, &invalid_type_ids2_);
   }
 
   void CheckUnionArray(const UnionArray& array, UnionMode::type mode,
diff --git a/cpp/src/arrow/array/builder_union.cc b/cpp/src/arrow/array/builder_union.cc
index 90d4f42084a..8617cb73fce 100644
--- a/cpp/src/arrow/array/builder_union.cc
+++ b/cpp/src/arrow/array/builder_union.cc
@@ -65,8 +65,8 @@ BasicUnionBuilder::BasicUnionBuilder(
   children_ = children;
 
   type_id_to_children_.resize(union_type.max_type_code() + 1, nullptr);
-  DCHECK_LT(
-      type_id_to_children_.size(),
+  DCHECK_LE(
+      type_id_to_children_.size() - 1,
       static_cast<decltype(type_id_to_children_)::size_type>(UnionType::kMaxTypeCode));
 
   for (size_t i = 0; i < children.size(); ++i) {

From 7a510539db3e240a75887707293f7515cc39f176 Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Mon, 28 Jun 2021 13:28:03 +0200
Subject: [PATCH 462/719] ARROW-11608: [CI] Fix turbodbc nightly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10605 from xhochy/ARROW-11608

Authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/docker/conda-python-turbodbc.dockerfile | 2 +-
 ci/scripts/install_turbodbc.sh             | 6 ++++++
 ci/scripts/integration_turbodbc.sh         | 6 +++---
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/ci/docker/conda-python-turbodbc.dockerfile b/ci/docker/conda-python-turbodbc.dockerfile
index ff7fdf6e1d0..e748604dee3 100644
--- a/ci/docker/conda-python-turbodbc.dockerfile
+++ b/ci/docker/conda-python-turbodbc.dockerfile
@@ -30,7 +30,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     rm -rf /var/lib/apt/lists/*
 
 # install turbodbc dependencies from conda-forge
-RUN conda install -c conda-forge -q\
+RUN conda install -c conda-forge -q \
         pybind11 \
         pytest-cov \
         mock \
diff --git a/ci/scripts/install_turbodbc.sh b/ci/scripts/install_turbodbc.sh
index a71520bebf4..3e644a3e27a 100755
--- a/ci/scripts/install_turbodbc.sh
+++ b/ci/scripts/install_turbodbc.sh
@@ -35,3 +35,9 @@ elif [ "${turbodbc}" = "latest" ]; then
 else
   git -C "${target}" checkout ${turbodbc};
 fi
+
+pushd ${target}
+wget -q https://github.com/pybind/pybind11/archive/v2.6.2.tar.gz
+tar xvf v2.6.2.tar.gz
+mv pybind11-2.6.2 pybind11
+popd
diff --git a/ci/scripts/integration_turbodbc.sh b/ci/scripts/integration_turbodbc.sh
index f56074358a6..f0fafd51228 100755
--- a/ci/scripts/integration_turbodbc.sh
+++ b/ci/scripts/integration_turbodbc.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 source_dir=${1}
 build_dir=${2}/turbodbc
@@ -31,7 +31,7 @@ mkdir -p ${build_dir}
 pushd ${build_dir}
 
 cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-      -DCMAKE_CXX_FLAGS=${CXXFLAGS} \
+      -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \
       -DPYTHON_EXECUTABLE=$(which python) \
       -GNinja \
       ${source_dir}
@@ -39,7 +39,7 @@ ninja install
 
 # TODO(ARROW-5074)
 export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}"
-export ODBCSYSINI="${source_dir}/travis/odbc/"
+export ODBCSYSINI="${source_dir}/earthly/odbc/"
 
 service postgresql start
 ctest --output-on-failure

From de8260214573942ce4b134962255ee77da26d61e Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 28 Jun 2021 08:39:38 -0400
Subject: [PATCH 463/719] ARROW-13076: [Java] Allow ExtensionTypeVector with
 Struct or Union vector storage

This relaxes the type bounds for an `ExtensionTypeVector` to the `ValueVector` interface so that `UnionVector` and `StructVector` can be used as the underlying storage. Test was added for an extension type based on `StructVector`.

Closes #10607 from BryanCutler/java-UnionVector-extension-ARROW-13076

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/vector/ExtensionTypeVector.java     |   2 +-
 .../vector/types/pojo/TestExtensionType.java  | 134 +++++++++++++++++-
 2 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
index df17ec93dba..516077d8328 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -36,7 +36,7 @@
  * A vector that wraps an underlying vector, used to help implement extension types.
  * @param <T> The wrapped vector type.
  */
-public abstract class ExtensionTypeVector<T extends BaseValueVector & FieldVector> extends BaseValueVector implements
+public abstract class ExtensionTypeVector<T extends ValueVector & FieldVector> extends BaseValueVector implements
     FieldVector {
 
   private final T underlyingVector;
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
index 3e652b9bb06..53f009cb761 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -38,9 +38,12 @@
 import org.apache.arrow.vector.ExtensionTypeVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.ipc.ArrowFileReader;
 import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
 import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
 import org.junit.Assert;
 import org.junit.Test;
@@ -171,6 +174,62 @@ public void testNullCheck() {
     assertTrue(e.getMessage().contains("underlyingVector can not be null."));
   }
 
+  /**
+   * Test that a custom Location type can be round-tripped through a temporary file.
+   */
+  @Test
+  public void roundtripLocation() throws IOException {
+    ExtensionTypeRegistry.register(new LocationType());
+    final Schema schema = new Schema(Collections.singletonList(Field.nullable("location", new LocationType())));
+    try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+         final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+      LocationVector vector = (LocationVector) root.getVector("location");
+      vector.allocateNew();
+      vector.set(0, 34.073814f, -118.240784f);
+      vector.set(2, 37.768056f, -122.3875f);
+      vector.set(3, 40.739716f, -73.840782f);
+      vector.setValueCount(4);
+      root.setRowCount(4);
+
+      final File file = File.createTempFile("locationtest", ".arrow");
+      try (final WritableByteChannel channel = FileChannel
+              .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+           final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+        writer.start();
+        writer.writeBatch();
+        writer.end();
+      }
+
+      try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+           final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+        reader.loadNextBatch();
+        final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+        Assert.assertEquals(root.getSchema(), readerRoot.getSchema());
+
+        final Field field = readerRoot.getSchema().getFields().get(0);
+        final LocationType expectedType = new LocationType();
+        Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+                expectedType.extensionName());
+        Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+                expectedType.serialize());
+
+        final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
+        Assert.assertTrue(deserialized instanceof LocationVector);
+        Assert.assertEquals(deserialized.getName(), "location");
+        StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector();
+        Assert.assertNotNull(deserStruct.getChild("Latitude"));
+        Assert.assertNotNull(deserStruct.getChild("Longitude"));
+        Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+        for (int i = 0; i < vector.getValueCount(); i++) {
+          Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+          if (!vector.isNull(i)) {
+            Assert.assertEquals(vector.getObject(i), deserialized.getObject(i));
+          }
+        }
+      }
+    }
+  }
+
   static class UuidType extends ExtensionType {
 
     @Override
@@ -205,7 +264,6 @@ public String serialize() {
     public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
       return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16));
     }
-
   }
 
   static class UuidVector extends ExtensionTypeVector<FixedSizeBinaryVector> {
@@ -237,4 +295,78 @@ public void set(int index, UUID uuid) {
       getUnderlyingVector().set(index, bb.array());
     }
   }
+
+  static class LocationType extends ExtensionType {
+
+    @Override
+    public ArrowType storageType() {
+      return Struct.INSTANCE;
+    }
+
+    @Override
+    public String extensionName() {
+      return "location";
+    }
+
+    @Override
+    public boolean extensionEquals(ExtensionType other) {
+      return other instanceof LocationType;
+    }
+
+    @Override
+    public ArrowType deserialize(ArrowType storageType, String serializedData) {
+      if (!storageType.equals(storageType())) {
+        throw new UnsupportedOperationException("Cannot construct LocationType from underlying type " + storageType);
+      }
+      return new LocationType();
+    }
+
+    @Override
+    public String serialize() {
+      return "";
+    }
+
+    @Override
+    public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+      return new LocationVector(name, allocator);
+    }
+  }
+
+  static class LocationVector extends ExtensionTypeVector<StructVector> {
+
+    private static StructVector buildUnderlyingVector(String name, BufferAllocator allocator) {
+      final StructVector underlyingVector =
+              new StructVector(name, allocator, FieldType.nullable(ArrowType.Struct.INSTANCE), null);
+      underlyingVector.addOrGet("Latitude",
+              FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+      underlyingVector.addOrGet("Longitude",
+              FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+      return underlyingVector;
+    }
+
+    public LocationVector(String name, BufferAllocator allocator) {
+      super(name, allocator, buildUnderlyingVector(name, allocator));
+    }
+
+    @Override
+    public int hashCode(int index) {
+      return hashCode(index, null);
+    }
+
+    @Override
+    public int hashCode(int index, ArrowBufHasher hasher) {
+      return getUnderlyingVector().hashCode(index, hasher);
+    }
+
+    @Override
+    public java.util.Map<String, ?> getObject(int index) {
+      return getUnderlyingVector().getObject(index);
+    }
+
+    public void set(int index, float latitude, float longitude) {
+      getUnderlyingVector().getChild("Latitude", Float4Vector.class).set(index, latitude);
+      getUnderlyingVector().getChild("Longitude", Float4Vector.class).set(index, longitude);
+      getUnderlyingVector().setIndexDefined(index);
+    }
+  }
 }

From 6339ff89ac8e3559ae4be41a3fd2cdf914127737 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 28 Jun 2021 16:05:29 +0200
Subject: [PATCH 464/719] ARROW-12142: [Python][Doc] Mention the CXX ABI flag
 in the docs

Getting weird linker errors when trying to compile C++ code against the manylinux-provided Arrow shared library is common.  Give the solution explicitly in the docs.

Closes #10582 from pitrou/ARROW-12142-cxx-abi-flag

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/python/extending.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst
index 5f6ddb154e6..5e00e79059c 100644
--- a/docs/source/python/extending.rst
+++ b/docs/source/python/extending.rst
@@ -477,3 +477,7 @@ wheel version (2010 or 2014) is being used. In addition to the other notes
 above, if you are compiling C++ using these shared libraries, you will need
 to make sure you use a compatible toolchain as well or you might see a
 segfault during runtime.
+
+Also, if you encounter errors when linking or loading the library, consider
+setting the ``_GLIBCXX_USE_CXX11_ABI`` preprocessor macro to ``0``
+(for example by adding ``-D_GLIBCXX_USE_CXX11_ABI=0`` to ``CFLAGS``).

From d596ca76d78bc6df57fcc7d0cba1b649a6eec706 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 28 Jun 2021 16:15:59 +0200
Subject: [PATCH 465/719] ARROW-13157: [C++][Python] Add find_substring_regex
 kernel and implement ignore_case for find_substring

This adds a `find_substring_regex` kernel and adds support for case insensitivity to the `find_substring` kernel.

RE2 only returns the match position if you have a capture group. Hence we have to modify the supplied regex. For literal patterns, we have to use RE2::QuoteMeta instead of setting the literal flag on the regex.

Closes #10597 from lidavidm/arrow-13157

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    | 88 ++++++++++++++++---
 .../compute/kernels/scalar_string_test.cc     | 30 +++++++
 docs/source/cpp/compute.rst                   |  4 +-
 docs/source/python/api/compute.rst            |  1 +
 python/pyarrow/compute.py                     | 27 +++++-
 python/pyarrow/tests/test_compute.py          | 27 +++---
 6 files changed, 145 insertions(+), 32 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index dbacb6bb96f..e6820fe4747 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -917,13 +917,44 @@ struct FindSubstring {
   }
 };
 
+#ifdef ARROW_WITH_RE2
+struct FindSubstringRegex {
+  std::unique_ptr<RE2> regex_match_;
+
+  explicit FindSubstringRegex(const MatchSubstringOptions& options,
+                              bool literal = false) {
+    std::string regex = "(";
+    regex.reserve(options.pattern.length() + 2);
+    regex += literal ? RE2::QuoteMeta(options.pattern) : options.pattern;
+    regex += ")";
+    regex_match_.reset(new RE2(std::move(regex), RegexSubstringMatcher::MakeRE2Options(
+                                                     options, /*literal=*/false)));
+  }
+
+  template <typename OutValue, typename... Ignored>
+  OutValue Call(KernelContext*, util::string_view val, Status*) const {
+    re2::StringPiece piece(val.data(), val.length());
+    re2::StringPiece match;
+    if (re2::RE2::PartialMatch(piece, *regex_match_, &match)) {
+      return static_cast<OutValue>(match.data() - piece.data());
+    }
+    return -1;
+  }
+};
+#endif
+
 template <typename InputType>
 struct FindSubstringExec {
   using OffsetType = typename TypeTraits<InputType>::OffsetType;
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
     if (options.ignore_case) {
-      return Status::NotImplemented("find_substring with ignore_case");
+#ifdef ARROW_WITH_RE2
+      applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex>
+          kernel{FindSubstringRegex(options, /*literal=*/true)};
+      return kernel.Exec(ctx, batch, out);
+#endif
+      return Status::NotImplemented("ignore_case requires RE2");
     }
     applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstring> kernel{
         FindSubstring(PlainSubstringMatcher(options))};
@@ -938,21 +969,52 @@ const FunctionDoc find_substring_doc(
      "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
     {"strings"}, "MatchSubstringOptions");
 
+#ifdef ARROW_WITH_RE2
+template <typename InputType>
+struct FindSubstringRegexExec {
+  using OffsetType = typename TypeTraits<InputType>::OffsetType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const MatchSubstringOptions& options = MatchSubstringState::Get(ctx);
+    applicator::ScalarUnaryNotNullStateful<OffsetType, InputType, FindSubstringRegex>
+        kernel{FindSubstringRegex(options, /*literal=*/false)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+const FunctionDoc find_substring_regex_doc(
+    "Find location of first match of regex pattern",
+    ("For each string in `strings`, emit the index of the first match of the given "
+     "pattern, or -1 if not found.\n"
+     "Null inputs emit null. The pattern must be given in MatchSubstringOptions."),
+    {"strings"}, "MatchSubstringOptions");
+#endif
+
 void AddFindSubstring(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(),
-                                               &find_substring_doc);
-  for (const auto& ty : BaseBinaryTypes()) {
-    std::shared_ptr<DataType> offset_type;
-    if (ty->id() == Type::type::LARGE_BINARY || ty->id() == Type::type::LARGE_STRING) {
-      offset_type = int64();
-    } else {
-      offset_type = int32();
+  {
+    auto func = std::make_shared<ScalarFunction>("find_substring", Arity::Unary(),
+                                                 &find_substring_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(func->AddKernel({ty}, offset_type,
+                                GenerateTypeAgnosticVarBinaryBase<FindSubstringExec>(ty),
+                                MatchSubstringState::Init));
     }
-    DCHECK_OK(func->AddKernel({ty}, offset_type,
-                              GenerateTypeAgnosticVarBinaryBase<FindSubstringExec>(ty),
-                              MatchSubstringState::Init));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
   }
-  DCHECK_OK(registry->AddFunction(std::move(func)));
+#ifdef ARROW_WITH_RE2
+  {
+    auto func = std::make_shared<ScalarFunction>("find_substring_regex", Arity::Unary(),
+                                                 &find_substring_regex_doc);
+    for (const auto& ty : BaseBinaryTypes()) {
+      auto offset_type = offset_bit_width(ty->id()) == 64 ? int64() : int32();
+      DCHECK_OK(
+          func->AddKernel({ty}, offset_type,
+                          GenerateTypeAgnosticVarBinaryBase<FindSubstringRegexExec>(ty),
+                          MatchSubstringState::Init));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+#endif
 }
 
 // Substring count
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 6192e0a5dd7..d5c256fd8ef 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -173,6 +173,36 @@ TYPED_TEST(TestBinaryKernels, FindSubstring) {
                    "[0, 0, null]", &options_empty);
 }
 
+#ifdef ARROW_WITH_RE2
+TYPED_TEST(TestBinaryKernels, FindSubstringIgnoreCase) {
+  MatchSubstringOptions options{"?AB)", /*ignore_case=*/true};
+  this->CheckUnary("find_substring", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring",
+                   R"-(["?aB)c", "acb", "c?Ab)", null, "?aBc", "AB)"])-",
+                   this->offset_type(), "[0, -1, 1, null, -1, -1]", &options);
+}
+
+TYPED_TEST(TestBinaryKernels, FindSubstringRegex) {
+  MatchSubstringOptions options{"a+", /*ignore_case=*/false};
+  this->CheckUnary("find_substring_regex", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring_regex", R"(["a", "A", "baaa", null, "", "AaaA"])",
+                   this->offset_type(), "[0, -1, 1, null, -1, 1]", &options);
+
+  options.ignore_case = true;
+  this->CheckUnary("find_substring_regex", "[]", this->offset_type(), "[]", &options);
+  this->CheckUnary("find_substring_regex", R"(["a", "A", "baaa", null, "", "AaaA"])",
+                   this->offset_type(), "[0, 0, 1, null, -1, 0]", &options);
+}
+#else
+TYPED_TEST(TestBinaryKernels, FindSubstringIgnoreCase) {
+  MatchSubstringOptions options{"a+", /*ignore_case=*/true};
+  Datum input = ArrayFromJSON(this->type(), R"(["a"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented,
+                                  ::testing::HasSubstr("ignore_case requires RE2"),
+                                  CallFunction("find_substring", {input}, &options));
+}
+#endif
+
 TYPED_TEST(TestBinaryKernels, CountSubstring) {
   MatchSubstringOptions options{"aba"};
   this->CheckUnary("count_substring", "[]", this->offset_type(), "[]", &options);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 147885560f5..e785756dcda 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -597,7 +597,9 @@ Containment tests
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
 | ends_with                 | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| find_substring            | Unary      | String-like                        | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
+| find_substring            | Unary      | Binary- and String-like            | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
++---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
+| find_substring_regex      | Unary      | Binary- and String-like            | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
 +---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
 | index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (4)          | :struct:`SetLookupOptions`             |
 |                           |            | Binary- and String-like            |                    |                                        |
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 80fcb2078f1..8b264ed9b83 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -205,6 +205,7 @@ Containment tests
    count_substring_regex
    ends_with
    find_substring
+   find_substring_regex
    index_in
    is_in
    match_like
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index b258b551f02..aacf8456c1b 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -331,7 +331,7 @@ def count_substring_regex(array, pattern, *, ignore_case=False):
                          MatchSubstringOptions(pattern, ignore_case))
 
 
-def find_substring(array, pattern):
+def find_substring(array, pattern, *, ignore_case=False):
     """
     Find the index of the first occurrence of substring *pattern* in each
     value of a string array.
@@ -341,13 +341,36 @@ def find_substring(array, pattern):
     array : pyarrow.Array or pyarrow.ChunkedArray
     pattern : str
         pattern to search for exact matches
+    ignore_case : bool, default False
+        Ignore case while searching.
 
     Returns
     -------
     result : pyarrow.Array or pyarrow.ChunkedArray
     """
     return call_function("find_substring", [array],
-                         MatchSubstringOptions(pattern))
+                         MatchSubstringOptions(pattern, ignore_case))
+
+
+def find_substring_regex(array, pattern, *, ignore_case=False):
+    """
+    Find the index of the first match of regex *pattern* in each
+    value of a string array.
+
+    Parameters
+    ----------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+    pattern : str
+        regex pattern to search for
+    ignore_case : bool, default False
+        Ignore case while searching.
+
+    Returns
+    -------
+    result : pyarrow.Array or pyarrow.ChunkedArray
+    """
+    return call_function("find_substring_regex", [array],
+                         MatchSubstringOptions(pattern, ignore_case))
 
 
 def match_like(array, pattern, *, ignore_case=False):
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index efe2e6be2f8..8b294b85759 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -319,25 +319,20 @@ def test_count_substring_regex():
 
 
 def test_find_substring():
-    arr = pa.array(["ab", "cab", "ba", None])
-    result = pc.find_substring(arr, "ab")
-    expected = pa.array([0, 1, -1, None], type=pa.int32())
-    assert expected.equals(result)
+    for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
+        arr = pa.array(["ab", "cab", "ba", None], type=ty)
+        result = pc.find_substring(arr, "ab")
+        assert result.to_pylist() == [0, 1, -1, None]
 
-    arr = pa.array(["ab", "cab", "ba", None], type=pa.large_string())
-    result = pc.find_substring(arr, "ab")
-    expected = pa.array([0, 1, -1, None], type=pa.int64())
-    assert expected.equals(result)
+        result = pc.find_substring_regex(arr, "a?b")
+        assert result.to_pylist() == [0, 1, 0, None]
 
-    arr = pa.array([b"ab", b"cab", b"ba", None])
-    result = pc.find_substring(arr, b"ab")
-    expected = pa.array([0, 1, -1, None], type=pa.int32())
-    assert expected.equals(result)
+        arr = pa.array(["ab*", "cAB*", "ba", "aB?"], type=ty)
+        result = pc.find_substring(arr, "aB*", ignore_case=True)
+        assert result.to_pylist() == [0, 1, -1, -1]
 
-    arr = pa.array([b"ab", b"cab", b"ba", None], type=pa.large_binary())
-    result = pc.find_substring(arr, b"ab")
-    expected = pa.array([0, 1, -1, None], type=pa.int64())
-    assert expected.equals(result)
+        result = pc.find_substring_regex(arr, "a?b", ignore_case=True)
+        assert result.to_pylist() == [0, 1, 0, 0]
 
 
 def test_match_like():

From c7c959a26a6512b0ad078a06df474617f1b306aa Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 28 Jun 2021 14:02:33 -0400
Subject: [PATCH 466/719] ARROW-12842: [FlightRPC][Java] Fix sending trailers
 using CallStatus

Previously this worked only when using gRPC classes, this gives you a pure-Flight way of doing so.

Closes #10370 from lidavidm/arrow-12842

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../apache/arrow/flight/grpc/StatusUtils.java | 16 +++++
 .../arrow/flight/TestErrorMetadata.java       | 63 +++++++++++++++++--
 2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
index e85f1b98632..5b7fa53c836 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
@@ -193,12 +193,28 @@ public static Throwable toGrpcException(Throwable ex) {
       return ex;
     } else if (ex instanceof FlightRuntimeException) {
       final FlightRuntimeException fre = (FlightRuntimeException) ex;
+      if (fre.status().metadata() != null) {
+        Metadata trailers = toGrpcMetadata(fre.status().metadata());
+        return new StatusRuntimeException(toGrpcStatus(fre.status()), trailers);
+      }
       return toGrpcStatus(fre.status()).asRuntimeException();
     }
     return Status.INTERNAL.withCause(ex).withDescription("There was an error servicing your request.")
         .asRuntimeException();
   }
 
+  private static Metadata toGrpcMetadata(ErrorFlightMetadata metadata) {
+    final Metadata trailers = new Metadata();
+    for (final String key : metadata.keys()) {
+      if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+        trailers.put(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER), metadata.getByte(key));
+      } else {
+        trailers.put(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER), metadata.get(key));
+      }
+    }
+    return trailers;
+  }
+
   /**
    * Maps a transformation function to the elements of an iterator, while wrapping exceptions in {@link
    * FlightRuntimeException}.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
index 02a21f2711a..2c62bc7fa68 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
@@ -36,18 +36,22 @@ public class TestErrorMetadata {
   private static final Metadata.BinaryMarshaller<Status> marshaller =
           ProtoUtils.metadataMarshaller(Status.getDefaultInstance());
 
+  /** Ensure metadata attached to a gRPC error is propagated. */
   @Test
-  public void testMetadata() throws Exception {
+  public void testGrpcMetadata() throws Exception {
     PerfOuterClass.Perf perf = PerfOuterClass.Perf.newBuilder()
                 .setStreamCount(12)
                 .setRecordsPerBatch(1000)
                 .setRecordsPerStream(1000000L)
                 .build();
+    StatusRuntimeExceptionProducer producer = new StatusRuntimeExceptionProducer(perf);
     try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
-        final FlightServer s =
+         final FlightServer s =
              FlightTestUtil.getStartedServer(
-               (location) -> FlightServer.builder(allocator, location, new TestFlightProducer(perf)).build());
-          final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+               (location) -> {
+                 return FlightServer.builder(allocator, location, producer).build();
+               });
+         final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
       final CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.CANCELLED, () -> {
         FlightStream stream = client.getStream(new Ticket("abs".getBytes()));
         stream.next();
@@ -72,10 +76,37 @@ public void testMetadata() throws Exception {
     }
   }
 
-  private static class TestFlightProducer extends NoOpFlightProducer {
+  /** Ensure metadata attached to a Flight error is propagated. */
+  @Test
+  public void testFlightMetadata() throws Exception {
+    try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+         final FlightServer s =
+                 FlightTestUtil.getStartedServer(
+                   (location) -> FlightServer.builder(allocator, location, new CallStatusProducer()).build());
+         final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.INVALID_ARGUMENT, () -> {
+        FlightStream stream = client.getStream(new Ticket(new byte[0]));
+        stream.next();
+      });
+      ErrorFlightMetadata metadata = flightStatus.metadata();
+      Assert.assertNotNull(metadata);
+      Assert.assertEquals("foo", metadata.get("x-foo"));
+      Assert.assertArrayEquals(new byte[]{1}, metadata.getByte("x-bar-bin"));
+
+      flightStatus = FlightTestUtil.assertCode(FlightStatusCode.INVALID_ARGUMENT, () -> {
+        client.getInfo(FlightDescriptor.command(new byte[0]));
+      });
+      metadata = flightStatus.metadata();
+      Assert.assertNotNull(metadata);
+      Assert.assertEquals("foo", metadata.get("x-foo"));
+      Assert.assertArrayEquals(new byte[]{1}, metadata.getByte("x-bar-bin"));
+    }
+  }
+
+  private static class StatusRuntimeExceptionProducer extends NoOpFlightProducer {
     private final PerfOuterClass.Perf perf;
 
-    private TestFlightProducer(PerfOuterClass.Perf perf) {
+    private StatusRuntimeExceptionProducer(PerfOuterClass.Perf perf) {
       this.perf = perf;
     }
 
@@ -89,4 +120,24 @@ public void getStream(CallContext context, Ticket ticket, ServerStreamListener l
       listener.error(sre);
     }
   }
+
+  private static class CallStatusProducer extends NoOpFlightProducer {
+    ErrorFlightMetadata metadata;
+
+    CallStatusProducer() {
+      this.metadata = new ErrorFlightMetadata();
+      metadata.insert("x-foo", "foo");
+      metadata.insert("x-bar-bin", new byte[]{1});
+    }
+
+    @Override
+    public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+      listener.error(CallStatus.INVALID_ARGUMENT.withDescription("Failed").withMetadata(metadata).toRuntimeException());
+    }
+
+    @Override
+    public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+      throw CallStatus.INVALID_ARGUMENT.withDescription("Failed").withMetadata(metadata).toRuntimeException();
+    }
+  }
 }

From 5433678dc587d8dcfae82f5a5af73dc50fa25476 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Mon, 28 Jun 2021 17:10:34 -0400
Subject: [PATCH 467/719] ARROW-12868: [R] Bindings for find_substring and
 find_substring_regex

Closes #10588 from thisisnic/ARROW-12868_str_locate

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/src/compute.cpp                             |  1 +
 .../testthat/test-dplyr-string-functions.R    | 43 +++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 01bc684c6df..9a05dd02859 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -233,6 +233,7 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
   }
 
   if (func_name == "match_substring" || func_name == "match_substring_regex" ||
+      func_name == "find_substring" || func_name == "find_substring_regex" ||
       func_name == "match_like") {
     using Options = arrow::compute::MatchSubstringOptions;
     bool ignore_case = false;
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 4cb07c9e39d..ecbe2f00f2d 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -725,6 +725,49 @@ test_that("errors in strptime", {
   )
 })
 
+test_that("arrow_find_substring and arrow_find_substring_regex", {
+
+  df <- tibble(x = c("Foo and Bar", "baz and qux and quux"))
+
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>%
+      collect(),
+    tibble(x = c(-1, 0))
+  )
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring(
+        x,
+        options = list(pattern = "b", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    tibble(x = c(8, 0))
+  )
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring_regex(
+        x,
+        options = list(pattern = "^[fb]")
+      )) %>%
+      collect(),
+    tibble(x = c(-1, 0))
+  )
+  expect_equivalent(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring_regex(
+        x,
+        options = list(pattern = "[AEIOU]", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    tibble(x = c(1, 1))
+  )
+})
+
 test_that("stri_reverse and arrow_ascii_reverse functions", {
   
   df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))

From fd0fc161105376c32380f7238ff2dd833acfca6d Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Mon, 28 Jun 2021 18:09:14 -0500
Subject: [PATCH 468/719] ARROW-13203: [R] Fix optional component checks
 causing failures

Closes #10612 from ianmcook/ARROW-13203

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/data-raw/codegen.R                    | 2 +-
 r/src/altrep.cpp                        | 4 ++++
 r/src/arrowExports.cpp                  | 2 +-
 r/tests/testthat/test-dplyr-lubridate.R | 2 ++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R
index 1a49ffc80fa..20ae3d42a0e 100644
--- a/r/data-raw/codegen.R
+++ b/r/data-raw/codegen.R
@@ -215,7 +215,7 @@ glue::glue('\n
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
 
-  #if defined(HAS_ALTREP)
+  #if defined(ARROW_R_WITH_ARROW) && defined(HAS_ALTREP)
   arrow::r::Init_Altrep_classes(dll);
   #endif
 
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 33e30aa3ffb..f5f499ab3f6 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -19,6 +19,8 @@
 
 #include "./arrow_types.h"
 
+#if defined(ARROW_R_WITH_ARROW)
+
 #if defined(HAS_ALTREP)
 
 #include <R_ext/Altrep.h>
@@ -164,3 +166,5 @@ bool is_altrep_dbl_nonull(SEXP x) {
   return false;
 #endif
 }
+
+#endif
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 427844a3c8e..5f3febffcd3 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -7367,7 +7367,7 @@ extern "C" void R_init_arrow(DllInfo* dll){
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);
 
-  #if defined(HAS_ALTREP)
+  #if defined(ARROW_R_WITH_ARROW) && defined(HAS_ALTREP)
   arrow::r::Init_Altrep_classes(dll);
   #endif
 
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
index 2ebb6f3b93e..47bee2c28e5 100644
--- a/r/tests/testthat/test-dplyr-lubridate.R
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+skip_if_not_available("dataset")
+
 library(lubridate)
 library(dplyr)
 

From 88249321c7bc588bf7d448ff5d39dbd4072000ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 29 Jun 2021 13:38:23 +0200
Subject: [PATCH 469/719] ARROW-13098: [Dev][Archery] Reorganize docker
 submodule to its own subpackage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10542 from kszucs/archery-docker-module

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/cli.py                    | 275 +-----------
 dev/archery/archery/docker/__init__.py        |  18 +
 dev/archery/archery/docker/cli.py             | 242 +++++++++++
 dev/archery/archery/docker/core.py            | 402 ++++++++++++++++++
 .../archery/{ => docker}/tests/test_docker.py |   0
 dev/archery/archery/utils/cli.py              |  73 ++++
 6 files changed, 741 insertions(+), 269 deletions(-)
 create mode 100644 dev/archery/archery/docker/__init__.py
 create mode 100644 dev/archery/archery/docker/cli.py
 create mode 100644 dev/archery/archery/docker/core.py
 rename dev/archery/archery/{ => docker}/tests/test_docker.py (100%)
 create mode 100644 dev/archery/archery/utils/cli.py

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 9442b2917e0..fefd7b02ed4 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -29,34 +29,16 @@
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
 from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner
 from .lang.cpp import CppCMakeDefinition, CppConfiguration
+from .utils.cli import ArrowBool, validate_arrow_sources, add_optional_command
 from .utils.lint import linter, python_numpydoc, LintValidationException
 from .utils.logger import logger, ctx as log_ctx
-from .utils.source import ArrowSources, InvalidArrowSource
+from .utils.source import ArrowSources
 from .utils.tmpdir import tmpdir
 
 # Set default logging to INFO in command line.
 logging.basicConfig(level=logging.INFO)
 
 
-class ArrowBool(click.types.BoolParamType):
-    """
-    ArrowBool supports the 'ON' and 'OFF' values on top of the values
-    supported by BoolParamType. This is convenient to port script which exports
-    CMake options variables.
-    """
-    name = "boolean"
-
-    def convert(self, value, param, ctx):
-        if isinstance(value, str):
-            lowered = value.lower()
-            if lowered == "on":
-                return True
-            elif lowered == "off":
-                return False
-
-        return super().convert(value, param, ctx)
-
-
 BOOL = ArrowBool()
 
 
@@ -88,14 +70,6 @@ def archery(ctx, debug, pdb, quiet):
         sys.excepthook = lambda t, v, e: pdb.pm()
 
 
-def validate_arrow_sources(ctx, param, src):
-    """ Ensure a directory contains Arrow cpp sources. """
-    try:
-        return ArrowSources.find(src)
-    except InvalidArrowSource as e:
-        raise click.BadParameter(str(e))
-
-
 build_dir_type = click.Path(dir_okay=True, file_okay=False, resolve_path=True)
 # Supported build types
 build_type = click.Choice(["debug", "relwithdebinfo", "release"],
@@ -797,228 +771,6 @@ def trigger_bot(event_name, event_payload, arrow_token):
     bot.handle(event_name, event_payload)
 
 
-def _mock_compose_calls(compose):
-    from types import MethodType
-    from subprocess import CompletedProcess
-
-    def _mock(compose, executable):
-        def _execute(self, *args, **kwargs):
-            params = ['{}={}'.format(k, v)
-                      for k, v in self.config.params.items()]
-            command = ' '.join(params + [executable] + list(args))
-            click.echo(command)
-            return CompletedProcess([], 0)
-        return MethodType(_execute, compose)
-
-    compose._execute_docker = _mock(compose, executable='docker')
-    compose._execute_compose = _mock(compose, executable='docker-compose')
-
-
-@archery.group('docker')
-@click.option("--src", metavar="<arrow_src>", default=None,
-              callback=validate_arrow_sources,
-              help="Specify Arrow source directory.")
-@click.option('--dry-run/--execute', default=False,
-              help="Display the docker-compose commands instead of executing "
-                   "them.")
-@click.pass_obj
-def docker_compose(obj, src, dry_run):
-    """Interact with docker-compose based builds."""
-    from .docker import DockerCompose
-
-    config_path = src.path / 'docker-compose.yml'
-    if not config_path.exists():
-        raise click.ClickException(
-            "Docker compose configuration cannot be found in directory {}, "
-            "try to pass the arrow source directory explicitly.".format(src)
-        )
-
-    # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
-    # environment variables to keep the usage similar to docker-compose
-    compose = DockerCompose(config_path, params=os.environ)
-    if dry_run:
-        _mock_compose_calls(compose)
-    obj['compose'] = compose
-
-
-@docker_compose.command('build')
-@click.argument('image')
-@click.option('--force-pull/--no-pull', default=True,
-              help="Whether to force pull the image and its ancestor images")
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.option('--using-docker-buildx', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_BUILDX',
-              help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
-                   "command. This option makes the build cache reusable "
-                   "across hosts.")
-@click.option('--use-cache/--no-cache', default=True,
-              help="Whether to use cache when building the image and its "
-                   "ancestor images")
-@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
-              help="Whether to use cache when building only the (leaf) image "
-                   "passed as the argument. To disable caching for both the "
-                   "image and its ancestors use --no-cache option.")
-@click.pass_obj
-def docker_compose_build(obj, image, *, force_pull, using_docker_cli,
-                         using_docker_buildx, use_cache, use_leaf_cache):
-    """
-    Execute docker-compose builds.
-    """
-    from .docker import UndefinedImage
-
-    compose = obj['compose']
-
-    using_docker_cli |= using_docker_buildx
-    try:
-        if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache,
-                         using_docker=using_docker_cli)
-        compose.build(image, use_cache=use_cache,
-                      use_leaf_cache=use_leaf_cache,
-                      using_docker=using_docker_cli,
-                      using_buildx=using_docker_buildx)
-    except UndefinedImage as e:
-        raise click.ClickException(
-            "There is no service/image defined in docker-compose.yml with "
-            "name: {}".format(str(e))
-        )
-    except RuntimeError as e:
-        raise click.ClickException(str(e))
-
-
-@docker_compose.command('run')
-@click.argument('image')
-@click.argument('command', required=False, default=None)
-@click.option('--env', '-e', multiple=True,
-              help="Set environment variable within the container")
-@click.option('--user', '-u', default=None,
-              help="Username or UID to run the container with")
-@click.option('--force-pull/--no-pull', default=True,
-              help="Whether to force pull the image and its ancestor images")
-@click.option('--force-build/--no-build', default=True,
-              help="Whether to force build the image and its ancestor images")
-@click.option('--build-only', default=False, is_flag=True,
-              help="Pull and/or build the image, but do not run it")
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_CLI',
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.option('--using-docker-buildx', default=False, is_flag=True,
-              envvar='ARCHERY_USE_DOCKER_BUILDX',
-              help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
-                   "command. This option makes the build cache reusable "
-                   "across hosts.")
-@click.option('--use-cache/--no-cache', default=True,
-              help="Whether to use cache when building the image and its "
-                   "ancestor images")
-@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
-              help="Whether to use cache when building only the (leaf) image "
-                   "passed as the argument. To disable caching for both the "
-                   "image and its ancestors use --no-cache option.")
-@click.option('--volume', '-v', multiple=True,
-              help="Set volume within the container")
-@click.pass_obj
-def docker_compose_run(obj, image, command, *, env, user, force_pull,
-                       force_build, build_only, using_docker_cli,
-                       using_docker_buildx, use_cache,
-                       use_leaf_cache, volume):
-    """Execute docker-compose builds.
-
-    To see the available builds run `archery docker images`.
-
-    Examples:
-
-    # execute a single build
-    archery docker run conda-python
-
-    # execute the builds but disable the image pulling
-    archery docker run --no-cache conda-python
-
-    # pass a docker-compose parameter, like the python version
-    PYTHON=3.8 archery docker run conda-python
-
-    # disable the cache only for the leaf image
-    PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
-
-    # entirely skip building the image
-    archery docker run --no-pull --no-build conda-python
-
-    # pass runtime parameters via docker environment variables
-    archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp
-
-    # set a volume
-    archery docker run -v $PWD/build:/build ubuntu-cpp
-
-    # starting an interactive bash session for debugging
-    archery docker run ubuntu-cpp bash
-    """
-    from .docker import UndefinedImage
-
-    compose = obj['compose']
-    using_docker_cli |= using_docker_buildx
-
-    env = dict(kv.split('=', 1) for kv in env)
-    try:
-        if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache,
-                         using_docker=using_docker_cli)
-        if force_build:
-            compose.build(image, use_cache=use_cache,
-                          use_leaf_cache=use_leaf_cache,
-                          using_docker=using_docker_cli,
-                          using_buildx=using_docker_buildx)
-        if build_only:
-            return
-        compose.run(
-            image,
-            command=command,
-            env=env,
-            user=user,
-            using_docker=using_docker_cli,
-            volumes=volume
-        )
-    except UndefinedImage as e:
-        raise click.ClickException(
-            "There is no service/image defined in docker-compose.yml with "
-            "name: {}".format(str(e))
-        )
-    except RuntimeError as e:
-        raise click.ClickException(str(e))
-
-
-@docker_compose.command('push')
-@click.argument('image')
-@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER',
-              help='Docker repository username')
-@click.option('--password', '-p', required=False,
-              envvar='ARCHERY_DOCKER_PASSWORD',
-              help='Docker repository password')
-@click.option('--using-docker-cli', default=False, is_flag=True,
-              help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
-@click.pass_obj
-def docker_compose_push(obj, image, user, password, using_docker_cli):
-    """Push the generated docker-compose image."""
-    compose = obj['compose']
-    compose.push(image, user=user, password=password,
-                 using_docker=using_docker_cli)
-
-
-@docker_compose.command('images')
-@click.pass_obj
-def docker_compose_images(obj):
-    """List the available docker-compose images."""
-    compose = obj['compose']
-    click.echo('Available images:')
-    for image in compose.images():
-        click.echo(' - {}'.format(image))
-
-
 @archery.group('release')
 @click.option("--src", metavar="<arrow_src>", default=None,
               callback=validate_arrow_sources,
@@ -1179,25 +931,10 @@ def linking_check_dependencies(obj, allowed, disallowed, paths):
         raise click.ClickException(str(e))
 
 
-try:
-    from .crossbow.cli import crossbow  # noqa
-except ImportError as exc:
-    missing_package = exc.name
-
-    @archery.command(
-        'crossbow',
-        context_settings={
-            "allow_extra_args": True,
-            "ignore_unknown_options": True,
-        }
-    )
-    def crossbow():
-        raise click.ClickException(
-            "Couldn't import crossbow because of missing dependency: {}"
-            .format(missing_package)
-        )
-else:
-    archery.add_command(crossbow)
+add_optional_command("docker", module=".docker.cli", function="docker",
+                     parent=archery)
+add_optional_command("crossbow", module=".crossbow.cli", function="crossbow",
+                     parent=archery)
 
 
 if __name__ == "__main__":
diff --git a/dev/archery/archery/docker/__init__.py b/dev/archery/archery/docker/__init__.py
new file mode 100644
index 00000000000..6be29c91638
--- /dev/null
+++ b/dev/archery/archery/docker/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import DockerCompose, UndefinedImage  # noqa
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
new file mode 100644
index 00000000000..8d3c64dd1de
--- /dev/null
+++ b/dev/archery/archery/docker/cli.py
@@ -0,0 +1,242 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+import click
+
+from ..utils.cli import validate_arrow_sources
+from .core import DockerCompose, UndefinedImage
+
+
+def _mock_compose_calls(compose):
+    from types import MethodType
+    from subprocess import CompletedProcess
+
+    def _mock(compose, executable):
+        def _execute(self, *args, **kwargs):
+            params = ['{}={}'.format(k, v)
+                      for k, v in self.config.params.items()]
+            command = ' '.join(params + [executable] + list(args))
+            click.echo(command)
+            return CompletedProcess([], 0)
+        return MethodType(_execute, compose)
+
+    compose._execute_docker = _mock(compose, executable='docker')
+    compose._execute_compose = _mock(compose, executable='docker-compose')
+
+
+@click.group()
+@click.option("--src", metavar="<arrow_src>", default=None,
+              callback=validate_arrow_sources,
+              help="Specify Arrow source directory.")
+@click.option('--dry-run/--execute', default=False,
+              help="Display the docker-compose commands instead of executing "
+                   "them.")
+@click.pass_obj
+def docker(obj, src, dry_run):
+    """
+    Interact with docker-compose based builds.
+    """
+
+    config_path = src.path / 'docker-compose.yml'
+    if not config_path.exists():
+        raise click.ClickException(
+            "Docker compose configuration cannot be found in directory {}, "
+            "try to pass the arrow source directory explicitly.".format(src)
+        )
+
+    # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
+    # environment variables to keep the usage similar to docker-compose
+    compose = DockerCompose(config_path, params=os.environ)
+    if dry_run:
+        _mock_compose_calls(compose)
+    obj['compose'] = compose
+
+
+@docker.command('build')
+@click.argument('image')
+@click.option('--force-pull/--no-pull', default=True,
+              help="Whether to force pull the image and its ancestor images")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_CLI',
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_BUILDX',
+              help="Use buildx with docker CLI directly for building instead "
+                   "of calling docker-compose or the plain docker build "
+                   "command. This option makes the build cache reusable "
+                   "across hosts.")
+@click.option('--use-cache/--no-cache', default=True,
+              help="Whether to use cache when building the image and its "
+                   "ancestor images")
+@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
+              help="Whether to use cache when building only the (leaf) image "
+                   "passed as the argument. To disable caching for both the "
+                   "image and its ancestors use --no-cache option.")
+@click.pass_obj
+def docker_build(obj, image, *, force_pull, using_docker_cli,
+                 using_docker_buildx, use_cache, use_leaf_cache):
+    """
+    Execute docker-compose builds.
+    """
+    compose = obj['compose']
+
+    using_docker_cli |= using_docker_buildx
+    try:
+        if force_pull:
+            compose.pull(image, pull_leaf=use_leaf_cache,
+                         using_docker=using_docker_cli)
+        compose.build(image, use_cache=use_cache,
+                      use_leaf_cache=use_leaf_cache,
+                      using_docker=using_docker_cli,
+                      using_buildx=using_docker_buildx)
+    except UndefinedImage as e:
+        raise click.ClickException(
+            "There is no service/image defined in docker-compose.yml with "
+            "name: {}".format(str(e))
+        )
+    except RuntimeError as e:
+        raise click.ClickException(str(e))
+
+
+@docker.command('run')
+@click.argument('image')
+@click.argument('command', required=False, default=None)
+@click.option('--env', '-e', multiple=True,
+              help="Set environment variable within the container")
+@click.option('--user', '-u', default=None,
+              help="Username or UID to run the container with")
+@click.option('--force-pull/--no-pull', default=True,
+              help="Whether to force pull the image and its ancestor images")
+@click.option('--force-build/--no-build', default=True,
+              help="Whether to force build the image and its ancestor images")
+@click.option('--build-only', default=False, is_flag=True,
+              help="Pull and/or build the image, but do not run it")
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_CLI',
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.option('--using-docker-buildx', default=False, is_flag=True,
+              envvar='ARCHERY_USE_DOCKER_BUILDX',
+              help="Use buildx with docker CLI directly for building instead "
+                   "of calling docker-compose or the plain docker build "
+                   "command. This option makes the build cache reusable "
+                   "across hosts.")
+@click.option('--use-cache/--no-cache', default=True,
+              help="Whether to use cache when building the image and its "
+                   "ancestor images")
+@click.option('--use-leaf-cache/--no-leaf-cache', default=True,
+              help="Whether to use cache when building only the (leaf) image "
+                   "passed as the argument. To disable caching for both the "
+                   "image and its ancestors use --no-cache option.")
+@click.option('--volume', '-v', multiple=True,
+              help="Set volume within the container")
+@click.pass_obj
+def docker_run(obj, image, command, *, env, user, force_pull, force_build,
+               build_only, using_docker_cli, using_docker_buildx, use_cache,
+               use_leaf_cache, volume):
+    """
+    Execute docker-compose builds.
+
+    To see the available builds run `archery docker images`.
+
+    Examples:
+
+    # execute a single build
+    archery docker run conda-python
+
+    # execute the builds but disable the image pulling
+    archery docker run --no-cache conda-python
+
+    # pass a docker-compose parameter, like the python version
+    PYTHON=3.8 archery docker run conda-python
+
+    # disable the cache only for the leaf image
+    PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
+
+    # entirely skip building the image
+    archery docker run --no-pull --no-build conda-python
+
+    # pass runtime parameters via docker environment variables
+    archery docker run -e CMAKE_BUILD_TYPE=release ubuntu-cpp
+
+    # set a volume
+    archery docker run -v $PWD/build:/build ubuntu-cpp
+
+    # starting an interactive bash session for debugging
+    archery docker run ubuntu-cpp bash
+    """
+    compose = obj['compose']
+    using_docker_cli |= using_docker_buildx
+
+    env = dict(kv.split('=', 1) for kv in env)
+    try:
+        if force_pull:
+            compose.pull(image, pull_leaf=use_leaf_cache,
+                         using_docker=using_docker_cli)
+        if force_build:
+            compose.build(image, use_cache=use_cache,
+                          use_leaf_cache=use_leaf_cache,
+                          using_docker=using_docker_cli,
+                          using_buildx=using_docker_buildx)
+        if build_only:
+            return
+        compose.run(
+            image,
+            command=command,
+            env=env,
+            user=user,
+            using_docker=using_docker_cli,
+            volumes=volume
+        )
+    except UndefinedImage as e:
+        raise click.ClickException(
+            "There is no service/image defined in docker-compose.yml with "
+            "name: {}".format(str(e))
+        )
+    except RuntimeError as e:
+        raise click.ClickException(str(e))
+
+
+@docker.command('push')
+@click.argument('image')
+@click.option('--user', '-u', required=False, envvar='ARCHERY_DOCKER_USER',
+              help='Docker repository username')
+@click.option('--password', '-p', required=False,
+              envvar='ARCHERY_DOCKER_PASSWORD',
+              help='Docker repository password')
+@click.option('--using-docker-cli', default=False, is_flag=True,
+              help="Use docker CLI directly for building instead of calling "
+                   "docker-compose. This may help to reuse cached layers.")
+@click.pass_obj
+def docker_compose_push(obj, image, user, password, using_docker_cli):
+    """Push the generated docker-compose image."""
+    compose = obj['compose']
+    compose.push(image, user=user, password=password,
+                 using_docker=using_docker_cli)
+
+
+@docker.command('images')
+@click.pass_obj
+def docker_compose_images(obj):
+    """List the available docker-compose images."""
+    compose = obj['compose']
+    click.echo('Available images:')
+    for image in compose.images():
+        click.echo(f' - {image}')
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
new file mode 100644
index 00000000000..6d15b21c788
--- /dev/null
+++ b/dev/archery/archery/docker/core.py
@@ -0,0 +1,402 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import re
+import subprocess
+from io import StringIO
+
+from dotenv import dotenv_values
+from ruamel.yaml import YAML
+
+from ..utils.command import Command, default_bin
+from ..compat import _ensure_path
+
+
+def flatten(node, parents=None):
+    parents = list(parents or [])
+    if isinstance(node, str):
+        yield (node, parents)
+    elif isinstance(node, list):
+        for value in node:
+            yield from flatten(value, parents=parents)
+    elif isinstance(node, dict):
+        for key, value in node.items():
+            yield (key, parents)
+            yield from flatten(value, parents=parents + [key])
+    else:
+        raise TypeError(node)
+
+
+def _sanitize_command(cmd):
+    if isinstance(cmd, list):
+        cmd = " ".join(cmd)
+    return re.sub(r"\s+", " ", cmd)
+
+
+class UndefinedImage(Exception):
+    pass
+
+
+class ComposeConfig:
+
+    def __init__(self, config_path, dotenv_path, compose_bin, params=None):
+        config_path = _ensure_path(config_path)
+        if dotenv_path:
+            dotenv_path = _ensure_path(dotenv_path)
+        else:
+            dotenv_path = config_path.parent / '.env'
+        self._read_env(dotenv_path, params)
+        self._read_config(config_path, compose_bin)
+
+    def _read_env(self, dotenv_path, params):
+        """
+        Read .env and merge it with explicitly passed parameters.
+        """
+        self.dotenv = dotenv_values(str(dotenv_path))
+        if params is None:
+            self.params = {}
+        else:
+            self.params = {k: v for k, v in params.items() if k in self.dotenv}
+
+        # forward the process' environment variables
+        self.env = os.environ.copy()
+        # set the defaults from the dotenv files
+        self.env.update(self.dotenv)
+        # override the defaults passed as parameters
+        self.env.update(self.params)
+
+        # translate docker's architecture notation to a more widely used one
+        arch = self.env.get('ARCH', 'amd64')
+        arch_aliases = {
+            'amd64': 'x86_64',
+            'arm64v8': 'aarch64',
+            's390x': 's390x'
+        }
+        arch_short_aliases = {
+            'amd64': 'x64',
+            'arm64v8': 'arm64',
+            's390x': 's390x'
+        }
+        self.env['ARCH_ALIAS'] = arch_aliases.get(arch, arch)
+        self.env['ARCH_SHORT_ALIAS'] = arch_short_aliases.get(arch, arch)
+
+    def _read_config(self, config_path, compose_bin):
+        """
+        Validate and read the docker-compose.yml
+        """
+        yaml = YAML()
+        with config_path.open() as fp:
+            config = yaml.load(fp)
+
+        services = config['services'].keys()
+        self.hierarchy = dict(flatten(config.get('x-hierarchy', {})))
+        self.with_gpus = config.get('x-with-gpus', [])
+        nodes = self.hierarchy.keys()
+        errors = []
+
+        for name in self.with_gpus:
+            if name not in services:
+                errors.append(
+                    'Service `{}` defined in `x-with-gpus` bot not in '
+                    '`services`'.format(name)
+                )
+        for name in nodes - services:
+            errors.append(
+                'Service `{}` is defined in `x-hierarchy` bot not in '
+                '`services`'.format(name)
+            )
+        for name in services - nodes:
+            errors.append(
+                'Service `{}` is defined in `services` but not in '
+                '`x-hierarchy`'.format(name)
+            )
+
+        # trigger docker-compose's own validation
+        compose = Command('docker-compose')
+        args = ['--file', str(config_path), 'config']
+        result = compose.run(*args, env=self.env, check=False,
+                             stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+
+        if result.returncode != 0:
+            # strip the intro line of docker-compose errors
+            errors += result.stderr.decode().splitlines()
+
+        if errors:
+            msg = '\n'.join([' - {}'.format(msg) for msg in errors])
+            raise ValueError(
+                'Found errors with docker-compose:\n{}'.format(msg)
+            )
+
+        rendered_config = StringIO(result.stdout.decode())
+        self.path = config_path
+        self.config = yaml.load(rendered_config)
+
+    def get(self, service_name):
+        try:
+            service = self.config['services'][service_name]
+        except KeyError:
+            raise UndefinedImage(service_name)
+        service['name'] = service_name
+        service['need_gpu'] = service_name in self.with_gpus
+        service['ancestors'] = self.hierarchy[service_name]
+        return service
+
+    def __getitem__(self, service_name):
+        return self.get(service_name)
+
+
+class Docker(Command):
+
+    def __init__(self, docker_bin=None):
+        self.bin = default_bin(docker_bin, "docker")
+
+
+class DockerCompose(Command):
+
+    def __init__(self, config_path, dotenv_path=None, compose_bin=None,
+                 params=None):
+        compose_bin = default_bin(compose_bin, 'docker-compose')
+        self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
+                                    params)
+        self.bin = compose_bin
+        self.pull_memory = set()
+
+    def clear_pull_memory(self):
+        self.pull_memory = set()
+
+    def _execute_compose(self, *args, **kwargs):
+        # execute as a docker compose command
+        try:
+            result = super().run('--file', str(self.config.path), *args,
+                                 env=self.config.env, **kwargs)
+            result.check_returncode()
+        except subprocess.CalledProcessError as e:
+            def formatdict(d, template):
+                return '\n'.join(
+                    template.format(k, v) for k, v in sorted(d.items())
+                )
+            msg = (
+                "`{cmd}` exited with a non-zero exit code {code}, see the "
+                "process log above.\n\nThe docker-compose command was "
+                "invoked with the following parameters:\n\nDefaults defined "
+                "in .env:\n{dotenv}\n\nArchery was called with:\n{params}"
+            )
+            raise RuntimeError(
+                msg.format(
+                    cmd=' '.join(e.cmd),
+                    code=e.returncode,
+                    dotenv=formatdict(self.config.dotenv, template='  {}: {}'),
+                    params=formatdict(
+                        self.config.params, template='  export {}={}'
+                    )
+                )
+            )
+
+    def _execute_docker(self, *args, **kwargs):
+        # execute as a plain docker cli command
+        try:
+            result = Docker().run(*args, **kwargs)
+            result.check_returncode()
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(
+                "{} exited with non-zero exit code {}".format(
+                    ' '.join(e.cmd), e.returncode
+                )
+            )
+
+    def pull(self, service_name, pull_leaf=True, using_docker=False):
+        def _pull(service):
+            args = ['pull']
+            if service['image'] in self.pull_memory:
+                return
+
+            if using_docker:
+                try:
+                    self._execute_docker(*args, service['image'])
+                except Exception as e:
+                    # better --ignore-pull-failures handling
+                    print(e)
+            else:
+                args.append('--ignore-pull-failures')
+                self._execute_compose(*args, service['name'])
+
+            self.pull_memory.add(service['image'])
+
+        service = self.config.get(service_name)
+        for ancestor in service['ancestors']:
+            _pull(self.config.get(ancestor))
+        if pull_leaf:
+            _pull(service)
+
+    def build(self, service_name, use_cache=True, use_leaf_cache=True,
+              using_docker=False, using_buildx=False):
+        def _build(service, use_cache):
+            if 'build' not in service:
+                # nothing to do
+                return
+
+            args = []
+            cache_from = list(service.get('build', {}).get('cache_from', []))
+            if use_cache:
+                for image in cache_from:
+                    if image not in self.pull_memory:
+                        try:
+                            self._execute_docker('pull', image)
+                        except Exception as e:
+                            print(e)
+                        finally:
+                            self.pull_memory.add(image)
+            else:
+                args.append('--no-cache')
+
+            # turn on inline build cache, this is a docker buildx feature
+            # used to bundle the image build cache to the pushed image manifest
+            # so the build cache can be reused across hosts, documented at
+            # https://github.com/docker/buildx#--cache-tonametypetypekeyvalue
+            if self.config.env.get('BUILDKIT_INLINE_CACHE') == '1':
+                args.extend(['--build-arg', 'BUILDKIT_INLINE_CACHE=1'])
+
+            if using_buildx:
+                for k, v in service['build'].get('args', {}).items():
+                    args.extend(['--build-arg', '{}={}'.format(k, v)])
+
+                if use_cache:
+                    cache_ref = '{}-cache'.format(service['image'])
+                    cache_from = 'type=registry,ref={}'.format(cache_ref)
+                    cache_to = (
+                        'type=registry,ref={},mode=max'.format(cache_ref)
+                    )
+                    args.extend([
+                        '--cache-from', cache_from,
+                        '--cache-to', cache_to,
+                    ])
+
+                args.extend([
+                    '--output', 'type=docker',
+                    '-f', service['build']['dockerfile'],
+                    '-t', service['image'],
+                    service['build'].get('context', '.')
+                ])
+                self._execute_docker("buildx", "build", *args)
+            elif using_docker:
+                # better for caching
+                for k, v in service['build'].get('args', {}).items():
+                    args.extend(['--build-arg', '{}={}'.format(k, v)])
+                for img in cache_from:
+                    args.append('--cache-from="{}"'.format(img))
+                args.extend([
+                    '-f', service['build']['dockerfile'],
+                    '-t', service['image'],
+                    service['build'].get('context', '.')
+                ])
+                self._execute_docker("build", *args)
+            else:
+                self._execute_compose("build", *args, service['name'])
+
+        service = self.config.get(service_name)
+        # build ancestor services
+        for ancestor in service['ancestors']:
+            _build(self.config.get(ancestor), use_cache=use_cache)
+        # build the leaf/target service
+        _build(service, use_cache=use_cache and use_leaf_cache)
+
+    def run(self, service_name, command=None, *, env=None, volumes=None,
+            user=None, using_docker=False):
+        service = self.config.get(service_name)
+
+        args = []
+        if user is not None:
+            args.extend(['-u', user])
+
+        if env is not None:
+            for k, v in env.items():
+                args.extend(['-e', '{}={}'.format(k, v)])
+
+        if volumes is not None:
+            for volume in volumes:
+                args.extend(['--volume', volume])
+
+        if using_docker or service['need_gpu']:
+            # use gpus, requires docker>=19.03
+            if service['need_gpu']:
+                args.extend(['--gpus', 'all'])
+
+            if service.get('shm_size'):
+                args.extend(['--shm-size', service['shm_size']])
+
+            # append env variables from the compose conf
+            for k, v in service.get('environment', {}).items():
+                args.extend(['-e', '{}={}'.format(k, v)])
+
+            # append volumes from the compose conf
+            for v in service.get('volumes', []):
+                if not isinstance(v, str):
+                    # if not the compact string volume definition
+                    v = "{}:{}".format(v['source'], v['target'])
+                args.extend(['-v', v])
+
+            # infer whether an interactive shell is desired or not
+            if command in ['cmd.exe', 'bash', 'sh', 'powershell']:
+                args.append('-it')
+
+            # get the actual docker image name instead of the compose service
+            # name which we refer as image in general
+            args.append(service['image'])
+
+            # add command from compose if it wasn't overridden
+            if command is not None:
+                args.append(command)
+            else:
+                # replace whitespaces from the preformatted compose command
+                cmd = _sanitize_command(service.get('command', ''))
+                if cmd:
+                    args.append(cmd)
+
+            # execute as a plain docker cli command
+            self._execute_docker('run', '--rm', *args)
+        else:
+            # execute as a docker-compose command
+            args.append(service_name)
+            if command is not None:
+                args.append(command)
+            self._execute_compose('run', '--rm', *args)
+
+    def push(self, service_name, user=None, password=None, using_docker=False):
+        def _push(service):
+            if using_docker:
+                return self._execute_docker('push', service['image'])
+            else:
+                return self._execute_compose('push', service['name'])
+
+        if user is not None:
+            try:
+                # TODO(kszucs): have an option for a prompt
+                self._execute_docker('login', '-u', user, '-p', password)
+            except subprocess.CalledProcessError:
+                # hide credentials
+                msg = ('Failed to push `{}`, check the passed credentials'
+                       .format(service_name))
+                raise RuntimeError(msg) from None
+
+        service = self.config.get(service_name)
+        for ancestor in service['ancestors']:
+            _push(self.config.get(ancestor))
+        _push(service)
+
+    def images(self):
+        return sorted(self.config.hierarchy.keys())
diff --git a/dev/archery/archery/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py
similarity index 100%
rename from dev/archery/archery/tests/test_docker.py
rename to dev/archery/archery/docker/tests/test_docker.py
diff --git a/dev/archery/archery/utils/cli.py b/dev/archery/archery/utils/cli.py
new file mode 100644
index 00000000000..701abe925fe
--- /dev/null
+++ b/dev/archery/archery/utils/cli.py
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import importlib
+
+import click
+
+from .source import ArrowSources, InvalidArrowSource
+
+
+class ArrowBool(click.types.BoolParamType):
+    """
+    ArrowBool supports the 'ON' and 'OFF' values on top of the values
+    supported by BoolParamType. This is convenient to port script which exports
+    CMake options variables.
+    """
+    name = "boolean"
+
+    def convert(self, value, param, ctx):
+        if isinstance(value, str):
+            lowered = value.lower()
+            if lowered == "on":
+                return True
+            elif lowered == "off":
+                return False
+
+        return super().convert(value, param, ctx)
+
+
+def validate_arrow_sources(ctx, param, src):
+    """
+    Ensure a directory contains Arrow cpp sources.
+    """
+    try:
+        return ArrowSources.find(src)
+    except InvalidArrowSource as e:
+        raise click.BadParameter(str(e))
+
+
+def add_optional_command(name, module, function, parent):
+    try:
+        module = importlib.import_module(module, package="archery")
+        command = getattr(module, function)
+    except ImportError as exc:
+        error_message = exc.name
+
+        @parent.command(
+            name,
+            context_settings={
+                "allow_extra_args": True,
+                "ignore_unknown_options": True,
+            }
+        )
+        def command():
+            raise click.ClickException(
+                f"Couldn't import command `{name}` due to {error_message}"
+            )
+    else:
+        parent.add_command(command)

From 963830629ad5f877bbac86779b7d28eae91b61c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 29 Jun 2021 13:59:25 +0200
Subject: [PATCH 470/719] ARROW-13108: [Python] Pyarrow 4.0.0 crashes upon
 import on macOS 10.13.6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I think we have been building the macos wheels with the wrong SDK from the latest iteration.

I managed to force-install the requested sdk version, but the vcpkg build fails with an sdk mismatch.

cc @kou @xhochy

Closes #10587 from kszucs/macos-sdkroot

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/cmake_modules/Usevcpkg.cmake       | 6 ++++++
 dev/tasks/python-wheels/github.osx.yml | 9 +++++----
 dev/tasks/tasks.yml                    | 2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake
index 7d228f59f03..06ac4dd075d 100644
--- a/cpp/cmake_modules/Usevcpkg.cmake
+++ b/cpp/cmake_modules/Usevcpkg.cmake
@@ -229,6 +229,12 @@ set(ZSTD_INCLUDE_DIR
 set(ZSTD_ROOT
     "${ARROW_VCPKG_PREFIX}"
     CACHE STRING "")
+set(BROTLI_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
+set(LZ4_ROOT
+    "${ARROW_VCPKG_PREFIX}"
+    CACHE STRING "")
 
 if(CMAKE_HOST_WIN32)
   set(LZ4_MSVC_LIB_PREFIX
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index e5af0ce0ce8..84b094b84d1 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -30,6 +30,7 @@ env:
   VCPKG_DEFAULT_TRIPLET: x64-osx-static-release
   VCPKG_FEATURE_FLAGS: "-manifests"
   VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
+  ARROW_VCPKG_PATCH_FILE: {{ "${{ github.workspace }}/arrow/ci/vcpkg/ports.patch" }}
 
 jobs:
   build:
@@ -52,16 +53,16 @@ jobs:
           # where we build pre-build the vcpkg packages
           setupOnly: true
           doNotSaveCache: true
-          appendedCacheKey: "-macos-{{ macos_deployment_target }}"
+          appendedCacheKey: "-macos-{{ macos_deployment_target }}-2021-06-25"
           vcpkgDirectory: {{ "${{ github.workspace }}/vcpkg" }}
           vcpkgGitCommitId: "2021.04.30"
 
       - name: Patch Vcpkg Ports
         run: |
-          vcpkg_patch_file="../arrow/ci/vcpkg/ports.patch"
+          set -ex
           cd $VCPKG_ROOT
-          if ! git apply --reverse --check --ignore-whitespace ${vcpkg_patch_file}; then
-            git apply --ignore-whitespace ${vcpkg_patch_file}
+          if ! git apply --reverse --check --ignore-whitespace ${ARROW_VCPKG_PATCH_FILE}; then
+            git apply --ignore-whitespace ${ARROW_VCPKG_PATCH_FILE}
             echo "Patch successfully applied!"
           fi
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 8eecb882d98..34307bb2583 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -896,7 +896,7 @@ tasks:
   test-r-version-compatibility:
     ci: github
     template: r/github.linux.version.compatibility.yml
-    
+
   test-r-rtools-35:
     ci: github
     template: r/github.windows.rtools35.yml

From ba5f55c06d9a23186055a1bf9a81e514b3871c55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 29 Jun 2021 14:30:59 +0200
Subject: [PATCH 471/719] ARROW-13211: [C++][CI] Remove outdated Github Actions
 ARM builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can restore if we're going to have arm GHA runners again.

Closes #10618 from kszucs/ARROW-13211

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/cpp.yml | 74 ---------------------------------------
 1 file changed, 74 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index f9298174f08..086f45d6fee 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -96,80 +96,6 @@ jobs:
         continue-on-error: true
         run: archery docker push ${{ matrix.image }}
 
-  docker-arm:
-    # NOTE: this job is specific for self-hosted runners
-    # CACHING: don't use the cache plugin because of various permission
-    #          issues and keep the cached docker volumes permanently on the
-    #          host
-    # PYTHON: no distributions are built for arm machines by the github
-    #         actions team, so python>3.6 must be preinstalled on the self
-    #         hosted machines
-    name: ${{ matrix.title }}
-    runs-on: ${{ matrix.runner }}
-    # TODO(kszucs): re-enable once the self-hosted workers are properly
-    # registered to github
-    if: false && github.event_name == 'push'
-    defaults:
-      # To use certain environment variables set by .bashrc, an interactive
-      # bash shell must be used
-      run:
-        shell: bash -i {0}
-    strategy:
-      fail-fast: false
-      matrix:
-        name:
-          - arm32v7-debian-10-cpp
-          - arm64v8-ubuntu-20.04-cpp
-        include:
-          - name: arm32v7-debian-10-cpp
-            debian: 10
-            title: ARM32v7 Debian 10 C++
-            image: |
-              -e CPP_MAKE_PARALLELISM=2 \
-              -e CXXFLAGS=-Wno-psabi \
-              -e ARROW_PARQUET=OFF \
-              -e ARROW_FLIGHT=OFF \
-              -e ARROW_GANDIVA=OFF \
-              -e ARROW_ORC=OFF \
-              -e CMAKE_ARGS=-DARROW_CPU_FLAG=armv7 \
-              debian-cpp
-            arch: 'arm32v7'
-            runner: [self-hosted, linux, ARM]
-          - name: arm64v8-ubuntu-20.04-cpp
-            ubuntu: 20.04
-            title: ARM64v8 Ubuntu 20.04 C++
-            image: |
-              -e CPP_MAKE_PARALLELISM=1 \
-              -e ARROW_PARQUET=OFF \
-              ubuntu-cpp
-            arch: 'arm64v8'
-            runner: [self-hosted, linux, ARM64]
-    env:
-      # the defaults here should correspond to the values in .env
-      ARCH: ${{ matrix.arch || 'arm64v8' }}
-      DEBIAN: ${{ matrix.debian || 10 }}
-      FEDORA: ${{ matrix.fedora || 32 }}
-      UBUNTU: ${{ matrix.ubuntu || 18.04 }}
-      LLVM: 8
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Fetch Submodules and Tags
-        run: ci/scripts/util_checkout.sh
-      - name: Setup Archery
-        run: pip install -U -e dev/archery[docker]
-      - name: Execute Docker Build
-        # parallelism is reduced because the ARM builders are low on memory
-        run: |
-          ulimit -c unlimited
-          archery docker run ${{ matrix.image }}
-      - name: Docker Push
-        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
-        continue-on-error: true
-        run: archery docker push ${{ matrix.image }}
-
   build-example:
     name: C++ Minimal Build Example
     runs-on: ubuntu-latest

From 32642b92ac38275d633e67b7fc028a3ee988be10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 29 Jun 2021 14:52:52 +0200
Subject: [PATCH 472/719] ARROW-13212: [Release] Support deploying to test PyPI
 in the python post release script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #9322 from kszucs/python-post-release

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/release/post-10-python.sh | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/dev/release/post-10-python.sh b/dev/release/post-10-python.sh
index 0f7a480cde6..9e30bd4b4e2 100755
--- a/dev/release/post-10-python.sh
+++ b/dev/release/post-10-python.sh
@@ -17,10 +17,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 set -o pipefail
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+: ${TEST_PYPI:=0}
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <version> <rc-num>"
@@ -36,8 +37,15 @@ ${PYTHON:-python} \
   ${version} \
   ${rc} \
   --dest="${tmp}" \
-  --package_type=python
-twine upload ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+  --package_type=python \
+  --regex=".*\.(whl|tar\.gz)$"
+
+if [ ${TEST_PYPI} -gt 0 ]; then
+  TWINE_ARGS="--repository-url https://test.pypi.org/legacy/"
+fi
+
+twine upload ${TWINE_ARGS} ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+
 rm -rf "${tmp}"
 
 echo "Success! The released PyPI packages are available here:"

From bf443c40ffc0a67db9e511e70b62033894c5d401 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 29 Jun 2021 17:02:14 +0200
Subject: [PATCH 473/719] ARROW-11675: [CI][C++] Resolve ctest failures on VS
 2019 builds

Closes #10583 from ianmcook/ARROW-11675

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/statistics.cc             | 53 +++++++++++++---
 cpp/src/parquet/statistics_test.cc        | 73 +++++++++++++++++------
 cpp/vcpkg.json                            |  4 +-
 dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat | 13 ++--
 dev/tasks/vcpkg-tests/github.windows.yml  | 10 +---
 5 files changed, 109 insertions(+), 44 deletions(-)

diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index bc474e99abf..72341590e75 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -41,6 +41,7 @@
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
 using arrow::internal::checked_cast;
+using arrow::util::SafeCopy;
 
 namespace parquet {
 namespace {
@@ -55,6 +56,9 @@ template <typename DType, bool is_signed>
 struct CompareHelper {
   using T = typename DType::c_type;
 
+  static_assert(!std::is_unsigned<T>::value || std::is_same<T, bool>::value,
+                "T is an unsigned numeric");
+
   constexpr static T DefaultMin() { return std::numeric_limits<T>::max(); }
   constexpr static T DefaultMax() { return std::numeric_limits<T>::lowest(); }
 
@@ -83,12 +87,24 @@ struct UnsignedCompareHelperBase {
   using T = typename DType::c_type;
   using UCType = typename std::make_unsigned<T>::type;
 
-  constexpr static T DefaultMin() { return std::numeric_limits<UCType>::max(); }
-  constexpr static T DefaultMax() { return std::numeric_limits<UCType>::lowest(); }
+  static_assert(!std::is_same<T, UCType>::value, "T is unsigned");
+  static_assert(sizeof(T) == sizeof(UCType), "T and UCType not the same size");
+
+  // NOTE: according to the C++ spec, unsigned-to-signed conversion is
+  // implementation-defined if the original value does not fit in the signed type
+  // (i.e., two's complement cannot be assumed even on mainstream machines,
+  // because the compiler may decide otherwise).  Hence the use of `SafeCopy`
+  // below for deterministic bit-casting.
+  // (see "Integer conversions" in
+  //  https://en.cppreference.com/w/cpp/language/implicit_conversion)
+
+  static const T DefaultMin() { return SafeCopy<T>(std::numeric_limits<UCType>::max()); }
+  static const T DefaultMax() { return 0; }
+
   static T Coalesce(T val, T fallback) { return val; }
 
-  static inline bool Compare(int type_length, T a, T b) {
-    return ::arrow::util::SafeCopy<UCType>(a) < ::arrow::util::SafeCopy<UCType>(b);
+  static bool Compare(int type_length, T a, T b) {
+    return SafeCopy<UCType>(a) < SafeCopy<UCType>(b);
   }
 
   static T Min(int type_length, T a, T b) { return Compare(type_length, a, b) ? a : b; }
@@ -107,12 +123,12 @@ struct CompareHelper<Int96Type, is_signed> {
   using msb_type = typename std::conditional<is_signed, int32_t, uint32_t>::type;
 
   static T DefaultMin() {
-    uint32_t kMsbMax = std::numeric_limits<msb_type>::max();
+    uint32_t kMsbMax = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::max());
     uint32_t kMax = std::numeric_limits<uint32_t>::max();
     return {kMax, kMax, kMsbMax};
   }
   static T DefaultMax() {
-    uint32_t kMsbMin = std::numeric_limits<msb_type>::min();
+    uint32_t kMsbMin = SafeCopy<uint32_t>(std::numeric_limits<msb_type>::min());
     uint32_t kMin = std::numeric_limits<uint32_t>::min();
     return {kMin, kMin, kMsbMin};
   }
@@ -122,8 +138,7 @@ struct CompareHelper<Int96Type, is_signed> {
     if (a.value[2] != b.value[2]) {
       // Only the MSB bit is by Signed comparison. For little-endian, this is the
       // last bit of Int96 type.
-      return ::arrow::util::SafeCopy<msb_type>(a.value[2]) <
-             ::arrow::util::SafeCopy<msb_type>(b.value[2]);
+      return SafeCopy<msb_type>(a.value[2]) < SafeCopy<msb_type>(b.value[2]);
     } else if (a.value[1] != b.value[1]) {
       return (a.value[1] < b.value[1]);
     }
@@ -374,6 +389,28 @@ class TypedComparatorImpl : virtual public TypedComparator<DType> {
   int type_length_;
 };
 
+// ARROW-11675: A hand-written version of GetMinMax(), to work around
+// what looks like a MSVC code generation bug.
+// This does not seem to be required for GetMinMaxSpaced().
+template <>
+std::pair<int32_t, int32_t>
+TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* values,
+                                                               int64_t length) {
+  DCHECK_GT(length, 0);
+
+  const uint32_t* unsigned_values = reinterpret_cast<const uint32_t*>(values);
+  uint32_t min = std::numeric_limits<uint32_t>::max();
+  uint32_t max = std::numeric_limits<uint32_t>::lowest();
+
+  for (int64_t i = 0; i < length; i++) {
+    const auto val = unsigned_values[i];
+    min = std::min<uint32_t>(min, val);
+    max = std::max<uint32_t>(max, val);
+  }
+
+  return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
+}
+
 template <bool is_signed, typename DType>
 std::pair<typename DType::c_type, typename DType::c_type>
 TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values) {
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 8a275fd0936..dbd7d98b238 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
@@ -44,6 +45,7 @@
 
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
+using arrow::util::SafeCopy;
 
 namespace BitUtil = arrow::BitUtil;
 
@@ -702,10 +704,11 @@ class TestStatisticsSortOrder : public ::testing::Test {
     std::shared_ptr<parquet::FileMetaData> file_metadata = parquet_reader->metadata();
     std::shared_ptr<parquet::RowGroupMetaData> rg_metadata = file_metadata->RowGroup(0);
     for (int i = 0; i < static_cast<int>(fields_.size()); i++) {
+      ARROW_SCOPED_TRACE("Statistics for field #", i);
       std::shared_ptr<parquet::ColumnChunkMetaData> cc_metadata =
           rg_metadata->ColumnChunk(i);
-      ASSERT_EQ(stats_[i].min(), cc_metadata->statistics()->EncodeMin());
-      ASSERT_EQ(stats_[i].max(), cc_metadata->statistics()->EncodeMax());
+      EXPECT_EQ(stats_[i].min(), cc_metadata->statistics()->EncodeMin());
+      EXPECT_EQ(stats_[i].max(), cc_metadata->statistics()->EncodeMax());
     }
   }
 
@@ -934,11 +937,11 @@ template <typename Stats, typename Array, typename T = typename Array::value_typ
 void AssertMinMaxAre(Stats stats, const Array& values, T expected_min, T expected_max) {
   stats->Update(values.data(), values.size(), 0);
   ASSERT_TRUE(stats->HasMinMax());
-  ASSERT_EQ(stats->min(), expected_min);
-  ASSERT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->min(), expected_min);
+  EXPECT_EQ(stats->max(), expected_max);
 }
 
-template <typename Stats, typename Array, typename T = typename Array::value_type>
+template <typename Stats, typename Array, typename T = typename Stats::T>
 void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitmap,
                      T expected_min, T expected_max) {
   auto n_values = values.size();
@@ -946,8 +949,8 @@ void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitm
   auto non_null_count = n_values - null_count;
   stats->UpdateSpaced(values.data(), valid_bitmap, 0, non_null_count, null_count);
   ASSERT_TRUE(stats->HasMinMax());
-  ASSERT_EQ(stats->min(), expected_min);
-  ASSERT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->min(), expected_min);
+  EXPECT_EQ(stats->max(), expected_max);
 }
 
 template <typename Stats, typename Array>
@@ -966,17 +969,17 @@ void AssertUnsetMinMax(Stats stats, const Array& values, const uint8_t* valid_bi
 }
 
 template <typename ParquetType, typename T = typename ParquetType::c_type>
-void CheckExtremums() {
+void CheckExtrema() {
   using UT = typename std::make_unsigned<T>::type;
 
-  T smin = std::numeric_limits<T>::min();
-  T smax = std::numeric_limits<T>::max();
-  T umin = std::numeric_limits<UT>::min();
-  T umax = std::numeric_limits<UT>::max();
+  const T smin = std::numeric_limits<T>::min();
+  const T smax = std::numeric_limits<T>::max();
+  const T umin = SafeCopy<T>(std::numeric_limits<UT>::min());
+  const T umax = SafeCopy<T>(std::numeric_limits<UT>::max());
 
   constexpr int kNumValues = 8;
   std::array<T, kNumValues> values{0,    smin,     smax,     umin,
-                                   umax, smin + 1, smax - 1, umin - 1};
+                                   umax, smin + 1, smax - 1, umax - 1};
 
   NodePtr unsigned_node = PrimitiveNode::Make(
       "uint", Repetition::OPTIONAL,
@@ -987,15 +990,47 @@ void CheckExtremums() {
       LogicalType::Int(sizeof(T) * CHAR_BIT, true /*signed*/), ParquetType::type_num);
   ColumnDescriptor signed_descr(signed_node, 1, 1);
 
-  auto unsigned_stats = MakeStatistics<ParquetType>(&unsigned_descr);
-  AssertMinMaxAre(unsigned_stats, values, umin, umax);
+  {
+    ARROW_SCOPED_TRACE("unsigned statistics: umin = ", umin, ", umax = ", umax,
+                       ", node type = ", unsigned_node->logical_type()->ToString(),
+                       ", physical type = ", unsigned_descr.physical_type(),
+                       ", sort order = ", unsigned_descr.sort_order());
+    auto unsigned_stats = MakeStatistics<ParquetType>(&unsigned_descr);
+    AssertMinMaxAre(unsigned_stats, values, umin, umax);
+  }
+  {
+    ARROW_SCOPED_TRACE("signed statistics: smin = ", smin, ", smax = ", smax,
+                       ", node type = ", signed_node->logical_type()->ToString(),
+                       ", physical type = ", signed_descr.physical_type(),
+                       ", sort order = ", signed_descr.sort_order());
+    auto signed_stats = MakeStatistics<ParquetType>(&signed_descr);
+    AssertMinMaxAre(signed_stats, values, smin, smax);
+  }
 
-  auto signed_stats = MakeStatistics<ParquetType>(&signed_descr);
-  AssertMinMaxAre(signed_stats, values, smin, smax);
+  // With validity bitmap
+  std::vector<bool> is_valid = {true, false, false, false, false, true, true, true};
+  std::shared_ptr<Buffer> valid_bitmap;
+  ::arrow::BitmapFromVector(is_valid, &valid_bitmap);
+  {
+    ARROW_SCOPED_TRACE("spaced unsigned statistics: umin = ", umin, ", umax = ", umax,
+                       ", node type = ", unsigned_node->logical_type()->ToString(),
+                       ", physical type = ", unsigned_descr.physical_type(),
+                       ", sort order = ", unsigned_descr.sort_order());
+    auto unsigned_stats = MakeStatistics<ParquetType>(&unsigned_descr);
+    AssertMinMaxAre(unsigned_stats, values, valid_bitmap->data(), T{0}, umax - 1);
+  }
+  {
+    ARROW_SCOPED_TRACE("spaced signed statistics: smin = ", smin, ", smax = ", smax,
+                       ", node type = ", signed_node->logical_type()->ToString(),
+                       ", physical type = ", signed_descr.physical_type(),
+                       ", sort order = ", signed_descr.sort_order());
+    auto signed_stats = MakeStatistics<ParquetType>(&signed_descr);
+    AssertMinMaxAre(signed_stats, values, valid_bitmap->data(), smin + 1, smax - 1);
+  }
 }
 
-TEST(TestStatistic, Int32Extremums) { CheckExtremums<Int32Type>(); }
-TEST(TestStatistic, Int64Extremums) { CheckExtremums<Int64Type>(); }
+TEST(TestStatistic, Int32Extrema) { CheckExtrema<Int32Type>(); }
+TEST(TestStatistic, Int64Extrema) { CheckExtrema<Int64Type>(); }
 
 // PARQUET-1225: Float NaN values may lead to incorrect min-max
 template <typename ParquetType>
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index b724412d397..5f92affa4eb 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -15,7 +15,9 @@
       ]
     },
     "benchmark",
-    "boost",
+    "boost-filesystem",
+    "boost-multiprecision",
+    "boost-system",
     "brotli",
     "bzip2",
     "c-ares",
diff --git a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
index 12ff9b4b618..6423720c225 100644
--- a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
+++ b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat
@@ -60,7 +60,7 @@ cmake -G "Visual Studio 16 2019" -A x64 ^
       -DARROW_CXXFLAGS="/MP" ^
       -DARROW_DATASET=ON ^
       -DARROW_DEPENDENCY_SOURCE=VCPKG ^
-      -DARROW_FLIGHT=ON ^
+      -DARROW_FLIGHT=OFF ^
       -DARROW_MIMALLOC=ON ^
       -DARROW_PARQUET=ON ^
       -DARROW_PYTHON=OFF ^
@@ -79,13 +79,8 @@ cmake --build . --target INSTALL --config Release || exit /B 1
 
 @rem Test Arrow C++ library
 
-@rem TODO(ARROW-11675): Uncomment the below
-@rem and troubleshoot two test failures:
-@rem  - TestStatisticsSortOrder/0.MinMax
-@rem  - TestStatistic.Int32Extremums
-
-@rem ctest --output-on-failure ^
-@rem       --parallel %NUMBER_OF_PROCESSORS% ^
-@rem       --timeout 300 || exit /B 1
+ctest --output-on-failure ^
+      --parallel %NUMBER_OF_PROCESSORS% ^
+      --timeout 300 || exit /B 1
 
 popd
diff --git a/dev/tasks/vcpkg-tests/github.windows.yml b/dev/tasks/vcpkg-tests/github.windows.yml
index eacb6317c30..ad3e793a6c3 100644
--- a/dev/tasks/vcpkg-tests/github.windows.yml
+++ b/dev/tasks/vcpkg-tests/github.windows.yml
@@ -36,15 +36,11 @@ jobs:
           git -C arrow checkout FETCH_HEAD
           git -C arrow submodule update --init --recursive
       - name: Remove and Reinstall vcpkg
-        # As of January 2021, the version of vcpkg that is preinstalled on the
-        # Github Actions windows-2019 image is 2020.11.12, as noted at
-        # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md
-        # This version of vcpkg has a bug that causes the installation of
-        # aws-cpp-sdk to fail. See details at
-        # https://github.com/awslabs/aws-c-common/issues/734
-        # and https://github.com/microsoft/vcpkg/pull/14716.
         # When running vcpkg in Github Actions on Windows, remove the
         # preinstalled vcpkg and install the newest version from source.
+        # Versions of vcpkg rapidly stop working until updated, and
+        # the safest and most reliable way to update vcpkg is simply
+        # to remove and reinstall it.
         shell: cmd
         run: |
           CALL vcpkg integrate remove 2>NUL

From fda6b27d8106aa20a3a34ad885174bd0a4019348 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 29 Jun 2021 17:08:02 +0200
Subject: [PATCH 474/719] ARROW-13187: [Python] Avoid creating reference cycle
 when reading CSV file

Some Python versions have a bug where `signal.getsignal` creates a reference cycle holding execution frames alive (https://bugs.python.org/issue42248).

This would cause excessive lifetimes of the PyArrow table returned by `read_csv`.

Closes #10609 from pitrou/ARROW-13187-signal-refcycle

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/error.pxi          | 24 +++++++++-----
 python/pyarrow/tests/test_csv.py  | 13 ++++++++
 python/pyarrow/tests/test_util.py | 52 +++++++++++++++++++++++++++++++
 python/pyarrow/tests/util.py      |  9 ++++++
 python/pyarrow/util.py            | 26 ++++++++++++++++
 5 files changed, 117 insertions(+), 7 deletions(-)
 create mode 100644 python/pyarrow/tests/test_util.py

diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index 2866848272a..882427f32ea 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -25,6 +25,8 @@ import os
 import signal
 import threading
 
+from pyarrow.util import _break_traceback_cycle_from_frame
+
 
 class ArrowException(Exception):
     pass
@@ -171,6 +173,10 @@ def enable_signal_handlers(c_bool enable):
 
 # For internal use
 
+# Whether we need a workaround for https://bugs.python.org/issue42248
+have_signal_refcycle = (sys.version_info < (3, 8, 10) or
+                        (3, 9) <= sys.version_info < (3, 9, 5))
+
 cdef class SignalStopHandler:
     cdef:
         StopToken _stop_token
@@ -180,13 +186,9 @@ cdef class SignalStopHandler:
     def __cinit__(self):
         self._enabled = False
 
-        tid = threading.current_thread().ident
-        if (signal_handlers_enabled and
-                threading.current_thread() is threading.main_thread()):
-            self._signals = [
-                sig for sig in (signal.SIGINT, signal.SIGTERM)
-                if signal.getsignal(sig) not in (signal.SIG_DFL,
-                                                 signal.SIG_IGN, None)]
+        self._init_signals()
+        if have_signal_refcycle:
+            _break_traceback_cycle_from_frame(sys._getframe(0))
 
         self._stop_token = StopToken()
         if not self._signals.empty():
@@ -194,6 +196,14 @@ cdef class SignalStopHandler:
                 SetSignalStopSource()).token())
             self._enabled = True
 
+    def _init_signals(self):
+        if (signal_handlers_enabled and
+                threading.current_thread() is threading.main_thread()):
+            self._signals = [
+                sig for sig in (signal.SIGINT, signal.SIGTERM)
+                if signal.getsignal(sig) not in (signal.SIG_DFL,
+                                                 signal.SIG_IGN, None)]
+
     def __enter__(self):
         if self._enabled:
             check_status(RegisterCancellingSignalHandler(self._signals))
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 482973a7258..5faffd3e9f8 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -31,6 +31,7 @@
 import threading
 import time
 import unittest
+import weakref
 
 import pytest
 
@@ -1617,3 +1618,15 @@ def test_write_read_round_trip():
 
         read_options = ReadOptions(column_names=t.column_names)
         assert t == read_csv(buf, read_options=read_options)
+
+
+def test_read_csv_reference_cycle():
+    # ARROW-13187
+    def inner():
+        buf = io.BytesIO(b"a,b,c\n1,2,3\n4,5,6")
+        table = read_csv(buf)
+        return weakref.ref(table)
+
+    with util.disabled_gc():
+        wr = inner()
+        assert wr() is None
diff --git a/python/pyarrow/tests/test_util.py b/python/pyarrow/tests/test_util.py
new file mode 100644
index 00000000000..2b351a53442
--- /dev/null
+++ b/python/pyarrow/tests/test_util.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import gc
+import signal
+import sys
+import weakref
+
+import pytest
+
+from pyarrow import util
+from pyarrow.tests.util import disabled_gc
+
+
+def exhibit_signal_refcycle():
+    # Put an object in the frame locals and return a weakref to it.
+    # If `signal.getsignal` has a bug where it creates a reference cycle
+    # keeping alive the current execution frames, `obj` will not be
+    # destroyed immediately when this function returns.
+    obj = set()
+    signal.getsignal(signal.SIGINT)
+    return weakref.ref(obj)
+
+
+def test_signal_refcycle():
+    # Test possible workaround for https://bugs.python.org/issue42248
+    with disabled_gc():
+        wr = exhibit_signal_refcycle()
+        if wr() is None:
+            pytest.skip(
+                "Python version does not have the bug we're testing for")
+
+    gc.collect()
+    with disabled_gc():
+        wr = exhibit_signal_refcycle()
+        assert wr() is not None
+        util._break_traceback_cycle_from_frame(sys._getframe(0))
+        assert wr() is None
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 3425fe01c9b..558df8cf1b0 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -225,6 +225,15 @@ def change_cwd(path):
         os.chdir(curdir)
 
 
+@contextlib.contextmanager
+def disabled_gc():
+    gc.disable()
+    try:
+        yield
+    finally:
+        gc.enable()
+
+
 def _filesystem_uri(path):
     # URIs on Windows must follow 'file:///C:...' or 'file:/C:...' patterns.
     if os.name == 'nt':
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 446e6733351..69bde250cac 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -19,8 +19,11 @@
 
 import contextlib
 import functools
+import gc
 import pathlib
 import socket
+import sys
+import types
 import warnings
 
 
@@ -150,3 +153,26 @@ def find_free_port():
 def guid():
     from uuid import uuid4
     return uuid4().hex
+
+
+def _break_traceback_cycle_from_frame(frame):
+    # Clear local variables in all inner frames, so as to break the
+    # reference cycle.
+    this_frame = sys._getframe(0)
+    refs = gc.get_referrers(frame)
+    while refs:
+        for frame in refs:
+            if frame is not this_frame and isinstance(frame, types.FrameType):
+                break
+        else:
+            # No frame found in referrers (finished?)
+            break
+        refs = None
+        # Clear the frame locals, to try and break the cycle (it is
+        # somewhere along the chain of execution frames).
+        frame.clear()
+        # To visit the inner frame, we need to find it among the
+        # referers of this frame (while `frame.f_back` would let
+        # us visit the outer frame).
+        refs = gc.get_referrers(frame)
+    refs = frame = this_frame = None

From d0a5c5b4db4cd2f58cae45727621bc7d1fd20d00 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 29 Jun 2021 18:09:48 +0200
Subject: [PATCH 475/719] ARROW-12716: [C++] Add string padding kernel

Closes #10586 from lidavidm/arrow-12716

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.h            |  10 +
 .../arrow/compute/kernels/scalar_string.cc    | 194 ++++++++++++++++--
 .../compute/kernels/scalar_string_test.cc     |  47 +++++
 cpp/src/arrow/util/utf8.h                     |  10 +
 cpp/src/arrow/util/utf8_util_test.cc          |  20 ++
 docs/source/cpp/compute.rst                   |  21 ++
 docs/source/python/api/compute.rst            |  16 ++
 python/pyarrow/_compute.pyx                   |  17 ++
 python/pyarrow/compute.py                     |   1 +
 python/pyarrow/includes/libarrow.pxd          |   6 +
 python/pyarrow/tests/test_compute.py          |  12 ++
 11 files changed, 342 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 082876b356b..5c83dcb5c85 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -154,6 +154,16 @@ struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
   TimeUnit::type unit;
 };
 
+struct ARROW_EXPORT PadOptions : public FunctionOptions {
+  explicit PadOptions(int64_t width, std::string padding = " ")
+      : width(width), padding(std::move(padding)) {}
+
+  /// The desired string length.
+  int64_t width;
+  /// What to pad the string with. Should be one codepoint (Unicode)/byte (ASCII).
+  std::string padding;
+};
+
 struct ARROW_EXPORT TrimOptions : public FunctionOptions {
   explicit TrimOptions(std::string characters) : characters(std::move(characters)) {}
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index e6820fe4747..7f596f22224 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -98,14 +98,7 @@ struct Utf8Length {
   static OutValue Call(KernelContext*, Arg0Value val, Status*) {
     auto str = reinterpret_cast<const uint8_t*>(val.data());
     auto strlen = val.size();
-
-    OutValue length = 0;
-    while (strlen > 0) {
-      length += ((*str & 0xc0) != 0x80);
-      ++str;
-      --strlen;
-    }
-    return length;
+    return static_cast<OutValue>(util::UTF8Length(str, str + strlen));
   }
 };
 
@@ -2817,6 +2810,138 @@ Result<ValueDescr> StrptimeResolve(KernelContext* ctx, const std::vector<ValueDe
   return Status::Invalid("strptime does not provide default StrptimeOptions");
 }
 
+// ----------------------------------------------------------------------
+// string padding
+
+template <bool PadLeft, bool PadRight>
+struct AsciiPadTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit AsciiPadTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    if (options_.padding.size() != 1) {
+      return Status::Invalid("Padding must be one byte, got '", options_.padding, "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    // This is likely very overallocated but hard to do better without
+    // actually looking at each string (because of strings that may be
+    // longer than the given width)
+    return input_ncodeunits + ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (input_string_ncodeunits >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_string_ncodeunits;
+    int64_t left = 0;
+    int64_t right = 0;
+    if (PadLeft && PadRight) {
+      // If odd number of spaces, put the extra space on the left
+      right = spaces / 2;
+      left = spaces - right;
+    } else if (PadLeft) {
+      left = spaces;
+    } else if (PadRight) {
+      right = spaces;
+    } else {
+      DCHECK(false) << "unreachable";
+      return 0;
+    }
+    std::fill(output, output + left, options_.padding[0]);
+    output += left;
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    std::fill(output, output + right, options_.padding[0]);
+    return options_.width;
+  }
+};
+
+template <bool PadLeft, bool PadRight>
+struct Utf8PadTransform : public StringTransformBase {
+  using State = OptionsWrapper<PadOptions>;
+
+  const PadOptions& options_;
+
+  explicit Utf8PadTransform(const PadOptions& options) : options_(options) {}
+
+  Status PreExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) override {
+    auto str = reinterpret_cast<const uint8_t*>(options_.padding.data());
+    auto strlen = options_.padding.size();
+    if (util::UTF8Length(str, str + strlen) != 1) {
+      return Status::Invalid("Padding must be one codepoint, got '", options_.padding,
+                             "'");
+    }
+    return Status::OK();
+  }
+
+  int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) override {
+    // This is likely very overallocated but hard to do better without
+    // actually looking at each string (because of strings that may be
+    // longer than the given width)
+    // One codepoint may be up to 4 bytes
+    return input_ncodeunits + 4 * ninputs * options_.width;
+  }
+
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    const int64_t input_width = util::UTF8Length(input, input + input_string_ncodeunits);
+    if (input_width >= options_.width) {
+      std::copy(input, input + input_string_ncodeunits, output);
+      return input_string_ncodeunits;
+    }
+    const int64_t spaces = options_.width - input_width;
+    int64_t left = 0;
+    int64_t right = 0;
+    if (PadLeft && PadRight) {
+      // If odd number of spaces, put the extra space on the left
+      right = spaces / 2;
+      left = spaces - right;
+    } else if (PadLeft) {
+      left = spaces;
+    } else if (PadRight) {
+      right = spaces;
+    } else {
+      DCHECK(false) << "unreachable";
+      return 0;
+    }
+    uint8_t* start = output;
+    while (left) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), output);
+      left--;
+    }
+    output = std::copy(input, input + input_string_ncodeunits, output);
+    while (right) {
+      output = std::copy(options_.padding.begin(), options_.padding.end(), output);
+      right--;
+    }
+    return output - start;
+  }
+};
+
+template <typename Type>
+using AsciiLPad = StringTransformExecWithState<Type, AsciiPadTransform<true, false>>;
+template <typename Type>
+using AsciiRPad = StringTransformExecWithState<Type, AsciiPadTransform<false, true>>;
+template <typename Type>
+using AsciiCenter = StringTransformExecWithState<Type, AsciiPadTransform<true, true>>;
+template <typename Type>
+using Utf8LPad = StringTransformExecWithState<Type, Utf8PadTransform<true, false>>;
+template <typename Type>
+using Utf8RPad = StringTransformExecWithState<Type, Utf8PadTransform<false, true>>;
+template <typename Type>
+using Utf8Center = StringTransformExecWithState<Type, Utf8PadTransform<true, true>>;
+
+// ----------------------------------------------------------------------
+// string trimming
+
 #ifdef ARROW_WITH_UTF8PROC
 
 template <bool TrimLeft, bool TrimRight>
@@ -3010,6 +3135,42 @@ using AsciiLTrim = StringTransformExecWithState<Type, AsciiTrimTransform<true, f
 template <typename Type>
 using AsciiRTrim = StringTransformExecWithState<Type, AsciiTrimTransform<false, true>>;
 
+const FunctionDoc utf8_center_doc(
+    "Center strings by padding with a given character",
+    ("For each string in `strings`, emit a centered string by padding both sides \n"
+     "with the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_lpad_doc(
+    "Right-align strings by padding with a given character",
+    ("For each string in `strings`, emit a right-aligned string by prepending \n"
+     "the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc utf8_rpad_doc(
+    "Left-align strings by padding with a given character",
+    ("For each string in `strings`, emit a left-aligned string by appending \n"
+     "the given UTF8 codeunit.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_center_doc(
+    utf8_center_doc.description + "",
+    ("For each string in `strings`, emit a centered string by padding both sides \n"
+     "with the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_lpad_doc(
+    utf8_lpad_doc.description + "",
+    ("For each string in `strings`, emit a right-aligned string by prepending \n"
+     "the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
+const FunctionDoc ascii_rpad_doc(
+    utf8_rpad_doc.description + "",
+    ("For each string in `strings`, emit a left-aligned string by appending \n"
+     "the given ASCII character.\nNull values emit null."),
+    {"strings"}, "PadOptions");
+
 const FunctionDoc utf8_trim_whitespace_doc(
     "Trim leading and trailing whitespace characters",
     ("For each string in `strings`, emit a string with leading and trailing whitespace\n"
@@ -3897,12 +4058,21 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
                                                    &ascii_rtrim_whitespace_doc);
   MakeUnaryStringBatchKernel<AsciiReverse>("ascii_reverse", registry, &ascii_reverse_doc);
   MakeUnaryStringBatchKernel<Utf8Reverse>("utf8_reverse", registry, &utf8_reverse_doc);
-  MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry,
-                                                 &ascii_lower_doc);
+
+  MakeUnaryStringBatchKernelWithState<AsciiCenter>("ascii_center", registry,
+                                                   &ascii_center_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiLPad>("ascii_lpad", registry, &ascii_lpad_doc);
+  MakeUnaryStringBatchKernelWithState<AsciiRPad>("ascii_rpad", registry, &ascii_rpad_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8Center>("utf8_center", registry,
+                                                  &utf8_center_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8LPad>("utf8_lpad", registry, &utf8_lpad_doc);
+  MakeUnaryStringBatchKernelWithState<Utf8RPad>("utf8_rpad", registry, &utf8_rpad_doc);
+
+  MakeUnaryStringBatchKernelWithState<AsciiTrim>("ascii_trim", registry, &ascii_trim_doc);
   MakeUnaryStringBatchKernelWithState<AsciiLTrim>("ascii_ltrim", registry,
-                                                  &ascii_lower_doc);
+                                                  &ascii_ltrim_doc);
   MakeUnaryStringBatchKernelWithState<AsciiRTrim>("ascii_rtrim", registry,
-                                                  &ascii_lower_doc);
+                                                  &ascii_rtrim_doc);
 
   AddUnaryStringPredicate<IsAscii>("string_is_ascii", registry, &string_is_ascii_doc);
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index d5c256fd8ef..e88da14c288 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1225,6 +1225,33 @@ TYPED_TEST(TestStringKernels, BinaryJoin) {
                     separators, expected);
 }
 
+TYPED_TEST(TestStringKernels, PadUTF8) {
+  // \xe2\x80\x88 = \u2008 is punctuation space, \xc3\xa1 = \u00E1 = á
+  PadOptions options{/*width=*/5, "\xe2\x80\x88"};
+  this->CheckUnary(
+      "utf8_center", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
+      R"([null, "\u2008\u2008a\u2008\u2008", "\u2008\u2008bb\u2008", "\u2008b\u00E1r\u2008", "foobar"])",
+      &options);
+  this->CheckUnary(
+      "utf8_lpad", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
+      R"([null, "\u2008\u2008\u2008\u2008a", "\u2008\u2008\u2008bb", "\u2008\u2008b\u00E1r", "foobar"])",
+      &options);
+  this->CheckUnary(
+      "utf8_rpad", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
+      R"([null, "a\u2008\u2008\u2008\u2008", "bb\u2008\u2008\u2008", "b\u00E1r\u2008\u2008", "foobar"])",
+      &options);
+
+  PadOptions options_bad{/*width=*/3, /*padding=*/"spam"};
+  auto input = ArrayFromJSON(this->type(), R"(["foo"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one codepoint"),
+                                  CallFunction("utf8_lpad", {input}, &options_bad));
+  options_bad.padding = "";
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one codepoint"),
+                                  CallFunction("utf8_lpad", {input}, &options_bad));
+}
+
 #ifdef ARROW_WITH_UTF8PROC
 
 TYPED_TEST(TestStringKernels, TrimWhitespaceUTF8) {
@@ -1371,6 +1398,26 @@ TYPED_TEST(TestStringKernels, SliceCodeunitsNegPos) {
 
 #endif  // ARROW_WITH_UTF8PROC
 
+TYPED_TEST(TestStringKernels, PadAscii) {
+  PadOptions options{/*width=*/5, " "};
+  this->CheckUnary("ascii_center", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
+                   R"([null, "  a  ", "  bb ", " bar ", "foobar"])", &options);
+  this->CheckUnary("ascii_lpad", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
+                   R"([null, "    a", "   bb", "  bar", "foobar"])", &options);
+  this->CheckUnary("ascii_rpad", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
+                   R"([null, "a    ", "bb   ", "bar  ", "foobar"])", &options);
+
+  PadOptions options_bad{/*width=*/3, /*padding=*/"spam"};
+  auto input = ArrayFromJSON(this->type(), R"(["foo"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one byte"),
+                                  CallFunction("ascii_lpad", {input}, &options_bad));
+  options_bad.padding = "";
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("Padding must be one byte"),
+                                  CallFunction("ascii_lpad", {input}, &options_bad));
+}
+
 TYPED_TEST(TestStringKernels, TrimWhitespaceAscii) {
   // \xe2\x80\x88 is punctuation space
   this->CheckUnary("ascii_trim_whitespace",
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index 1426dc904ee..0c9a368d3dd 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -556,5 +556,15 @@ static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* re
   return true;
 }
 
+/// Count the number of codepoints in the given string (assuming it is valid UTF8).
+static inline int64_t UTF8Length(const uint8_t* first, const uint8_t* last) {
+  int64_t length = 0;
+  while (first != last) {
+    length += ((*first & 0xc0) != 0x80);
+    ++first;
+  }
+  return length;
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/utf8_util_test.cc b/cpp/src/arrow/util/utf8_util_test.cc
index 8b6713623c4..62a3d0d28cb 100644
--- a/cpp/src/arrow/util/utf8_util_test.cc
+++ b/cpp/src/arrow/util/utf8_util_test.cc
@@ -489,5 +489,25 @@ TEST(UTF8FindIf, Basics) {
   CheckOkUTF8("", U'β', 0, 0);
 }
 
+TEST(UTF8Length, Basics) {
+  auto length = [](const std::string& s) {
+    const auto* p = reinterpret_cast<const uint8_t*>(s.data());
+    return UTF8Length(p, p + s.length());
+  };
+  ASSERT_EQ(length("abcde"), 5);
+  // accented a encoded as a single codepoint
+  ASSERT_EQ(length("\xc3\x81"
+                   "bcde"),
+            5);
+  // accented a encoded as two codepoints via combining character
+  ASSERT_EQ(length("a\xcc\x81"
+                   "bcde"),
+            6);
+  // hiragana a (3 bytes)
+  ASSERT_EQ(length("\xe3\x81\x81"), 1);
+  // raised hands emoji (4 bytes)
+  ASSERT_EQ(length("\xf0\x9f\x99\x8c"), 1);
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index e785756dcda..a4ca8f9c61b 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -538,6 +538,27 @@ String transforms
   If the input is not valid UTF8, then the output is undefined (but the size of output
   buffers will be preserved).
 
+String padding
+~~~~~~~~~~~~~~
+
+These functions append/prepend a given padding byte (ASCII) or codepoint (UTF8) in
+order to center (center), right-align (lpad), or left-align (rpad) a string.
+
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| Function name            | Arity      | Input types             | Output type         | Options class                          |
++==========================+============+=========================+=====================+========================================+
+| ascii_lpad               | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| ascii_rpad               | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| ascii_center             | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_lpad                | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_rpad                | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_center              | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
 
 String trimming
 ~~~~~~~~~~~~~~~
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 8b264ed9b83..34626e21fdc 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -182,17 +182,33 @@ String Transforms
 .. autosummary::
    :toctree: ../generated/
 
+   ascii_center
+   ascii_lpad
+   ascii_ltrim
+   ascii_ltrim_whitespace
    ascii_lower
    ascii_reverse
+   ascii_rpad
+   ascii_rtrim
+   ascii_rtrim_whitespace
+   ascii_trim
    ascii_upper
    binary_length
    binary_replace_slice
    replace_substring
    replace_substring_regex
+   utf8_center
    utf8_length
    utf8_lower
+   utf8_lpad
+   utf8_ltrim
+   utf8_ltrim_whitespace
    utf8_replace_slice
    utf8_reverse
+   utf8_rpad
+   utf8_rtrim
+   utf8_rtrim_whitespace
+   utf8_trim
    utf8_upper
 
 Containment tests
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 559a8a02b1c..ae08a5596f3 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -715,6 +715,23 @@ class MatchSubstringOptions(_MatchSubstringOptions):
         self._set_options(pattern, ignore_case)
 
 
+cdef class _PadOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CPadOptions] pad_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.pad_options.get()
+
+    def _set_options(self, width, padding):
+        self.pad_options.reset(
+            new CPadOptions(width, tobytes(padding)))
+
+
+class PadOptions(_PadOptions):
+    def __init__(self, width, padding=' '):
+        self._set_options(width, padding)
+
+
 cdef class _TrimOptions(FunctionOptions):
     cdef:
         unique_ptr[CTrimOptions] trim_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index aacf8456c1b..eadcdaa44a8 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -39,6 +39,7 @@
     JoinOptions,
     MatchSubstringOptions,
     ModeOptions,
+    PadOptions,
     PartitionNthOptions,
     ProjectOptions,
     QuantileOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 0a8c7494989..653a2b83781 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1823,6 +1823,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CTrimOptions(c_string characters)
         c_string characters
 
+    cdef cppclass CPadOptions \
+            "arrow::compute::PadOptions"(CFunctionOptions):
+        CPadOptions(int64_t width, c_string padding)
+        int64_t width
+        c_string padding
+
     cdef cppclass CSliceOptions \
             "arrow::compute::SliceOptions"(CFunctionOptions):
         CSliceOptions(int64_t start, int64_t stop, int64_t step)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 8b294b85759..3a10da0ca2b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -708,6 +708,18 @@ def test_string_py_compat_boolean(function_name, variant):
             assert arrow_func(ar)[0].as_py() == getattr(c, py_name)()
 
 
+def test_pad():
+    arr = pa.array([None, 'a', 'abcd'])
+    assert pc.ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
+    assert pc.ascii_lpad(arr, width=3).tolist() == [None, '  a', 'abcd']
+    assert pc.ascii_rpad(arr, width=3).tolist() == [None, 'a  ', 'abcd']
+
+    arr = pa.array([None, 'á', 'abcd'])
+    assert pc.utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
+    assert pc.utf8_lpad(arr, width=3).tolist() == [None, '  á', 'abcd']
+    assert pc.utf8_rpad(arr, width=3).tolist() == [None, 'á  ', 'abcd']
+
+
 @pytest.mark.pandas
 def test_replace_slice():
     offsets = range(-3, 4)

From 42048e554056baa7fb3d1159a859f3e08056fc1b Mon Sep 17 00:00:00 2001
From: Mauricio Vargas <mavargas11@uc.cl>
Date: Tue, 29 Jun 2021 21:48:32 -0400
Subject: [PATCH 476/719] ARROW-12967: [R] Add bindings for pmin() and pmax()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10615 from pachadotdev/arrow12967v3

Lead-authored-by: Mauricio Vargas <mavargas11@uc.cl>
Co-authored-by: Pachá <mvargas@dcc.uchile.cl>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr-functions.R                | 24 +++++++++++++++++----
 r/src/compute.cpp                    |  9 ++++++++
 r/tests/testthat/test-dplyr-mutate.R | 31 ++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 5076fc09847..27d6e889199 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -398,6 +398,22 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
   )
 }
 
+nse_funcs$pmin <- function(..., na.rm = FALSE) {
+  build_expr(
+    "min_element_wise",
+    ...,
+    options = list(skip_nulls = na.rm)
+  )
+}
+
+nse_funcs$pmax <- function(..., na.rm = FALSE) {
+  build_expr(
+    "max_element_wise",
+    ...,
+    options = list(skip_nulls = na.rm)
+  )
+}
+
 # String function helpers
 
 # format `pattern` as needed for case insensitivity and literal matching by RE2
@@ -511,7 +527,7 @@ nse_funcs$second <- function(x) {
 }
 
 # After ARROW-13054 is completed, we can refactor this for simplicity
-# 
+#
 # Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas
 # `lubridate::wday` counts from 1 to 7, and allows users to specify which day
 # of the week is first (Sunday by default).  This Expression converts the returned
@@ -519,16 +535,16 @@ nse_funcs$second <- function(x) {
 # providing offset values based on the specified week_start day, and adding 1
 # so the returned value is 1-indexed instead of 0-indexed.
 nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
-  
+
   # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
   # When the ticket below is resolved, we should be able to support the label argument
   # https://issues.apache.org/jira/browse/ARROW-13133
   if (label) {
     arrow_not_supported("Label argument")
   }
-  
+
   # overall formula to convert from arrow::wday to lubridate::wday is:
   #  ((wday(day) - start + 8) %% 7) + 1
   ((Expression$create("day_of_week", x) - Expression$scalar(week_start) + 8) %% 7) + 1
-  
+
 }
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 9a05dd02859..458e0e386e9 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -180,6 +180,15 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return out;
   }
 
+  if (func_name == "min_element_wise" || func_name == "max_element_wise") {
+    using Options = arrow::compute::ElementWiseAggregateOptions;
+    bool skip_nulls = true;
+    if (!Rf_isNull(options["skip_nulls"])) {
+      skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
+    }
+    return std::make_shared<Options>(skip_nulls);
+  }
+
   if (func_name == "quantile") {
     using Options = arrow::compute::QuantileOptions;
     auto out = std::make_shared<Options>(Options::Defaults());
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 98eb4983d32..908ada296be 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -418,3 +418,34 @@ test_that("mutate and write_dataset", {
       summarize(mean = mean(integer))
   )
 })
+
+test_that("mutate and pmin/pmax", {
+  df <- tibble(
+    city = c("Chillan", "Valdivia", "Osorno"),
+    val1 = c(200, 300, NA),
+    val2 = c(100, NA, NA),
+    val3 = c(0, NA, NA)
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        max_val_1 = pmax(val1, val2, val3),
+        max_val_2 = pmax(val1, val2, val3, na.rm = T),
+        min_val_1 = pmin(val1, val2, val3),
+        min_val_2 = pmin(val1, val2, val3, na.rm = T)
+      ) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = T),
+        min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = T),
+      ) %>%
+      collect(),
+    df
+  )
+})

From cc4e69d7d3dcee02425aa2603638e44a46e92ebd Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 30 Jun 2021 10:40:26 +0200
Subject: [PATCH 477/719] ARROW-13134: [C++][CI] Pin aws-sdk-cpp to < 1.9

Closes #10620 from pitrou/ARROW-13134

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/conda_env_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml
index 390eb7dcdd5..1a8ae6d7edc 100644
--- a/ci/conda_env_cpp.yml
+++ b/ci/conda_env_cpp.yml
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-aws-sdk-cpp
+aws-sdk-cpp<1.9
 benchmark=1.5.2
 boost-cpp>=1.68.0
 brotli

From 58b310971665af3b80e83631db0a9874c19e8c1e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 30 Jun 2021 11:12:22 +0200
Subject: [PATCH 478/719] ARROW-13104: [C++] Fix unsafe cast in ByteStreamSplit
 implementation

Closes #10596 from pitrou/ARROW-13104-unsafe-cast

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/encoding.cc | 49 +++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index cc1e262a96d..6e8f7ee5491 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -861,20 +861,31 @@ class ByteStreamSplitEncoder : public EncoderImpl, virtual public TypedEncoder<D
                  int64_t valid_bits_offset) override;
 
  protected:
-  ::arrow::TypedBufferBuilder<T> values_;
+  template <typename ArrowType>
+  void PutImpl(const ::arrow::Array& values) {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put to " + ArrowType::type_name() +
+                             " from " + values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+              static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0), data.offset);
+  }
 
- private:
-  void PutArrowArray(const ::arrow::Array& values);
+  ::arrow::BufferBuilder sink_;
+  int64_t num_values_in_buffer_;
 };
 
 template <typename DType>
 ByteStreamSplitEncoder<DType>::ByteStreamSplitEncoder(const ColumnDescriptor* descr,
                                                       ::arrow::MemoryPool* pool)
-    : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool), values_{pool} {}
+    : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
+      sink_{pool},
+      num_values_in_buffer_{0} {}
 
 template <typename DType>
 int64_t ByteStreamSplitEncoder<DType>::EstimatedDataEncodedSize() {
-  return values_.length() * sizeof(T);
+  return sink_.length();
 }
 
 template <typename DType>
@@ -882,34 +893,30 @@ std::shared_ptr<Buffer> ByteStreamSplitEncoder<DType>::FlushValues() {
   std::shared_ptr<ResizableBuffer> output_buffer =
       AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
   uint8_t* output_buffer_raw = output_buffer->mutable_data();
-  const size_t num_values = values_.length();
-  const uint8_t* raw_values = reinterpret_cast<const uint8_t*>(values_.data());
-  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values,
+  const uint8_t* raw_values = sink_.data();
+  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values_in_buffer_,
                                                     output_buffer_raw);
-  values_.Reset();
+  sink_.Reset();
+  num_values_in_buffer_ = 0;
   return std::move(output_buffer);
 }
 
 template <typename DType>
 void ByteStreamSplitEncoder<DType>::Put(const T* buffer, int num_values) {
-  if (num_values > 0) PARQUET_THROW_NOT_OK(values_.Append(buffer, num_values));
-}
-
-template <typename DType>
-void ByteStreamSplitEncoder<DType>::Put(const ::arrow::Array& values) {
-  PutArrowArray(values);
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+    num_values_in_buffer_ += num_values;
+  }
 }
 
 template <>
-void ByteStreamSplitEncoder<FloatType>::PutArrowArray(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::FloatArray>(values,
-                                     reinterpret_cast<::arrow::BufferBuilder*>(&values_));
+void ByteStreamSplitEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  PutImpl<::arrow::FloatType>(values);
 }
 
 template <>
-void ByteStreamSplitEncoder<DoubleType>::PutArrowArray(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::DoubleArray>(
-      values, reinterpret_cast<::arrow::BufferBuilder*>(&values_));
+void ByteStreamSplitEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  PutImpl<::arrow::DoubleType>(values);
 }
 
 template <typename DType>

From e9fa30406215b76ed6c885302fbfe6075c47badf Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 30 Jun 2021 11:19:58 +0200
Subject: [PATCH 479/719] ARROW-13072: [C++] Add bit-wise arithmetic kernels

Closes #10530 from lidavidm/arrow-13072

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.cc           |   2 +
 cpp/src/arrow/compute/api_scalar.h            |  27 ++
 .../compute/kernels/scalar_arithmetic.cc      | 259 ++++++++++++++++++
 .../compute/kernels/scalar_arithmetic_test.cc | 204 ++++++++++++++
 docs/source/cpp/compute.rst                   |  63 +++--
 docs/source/python/api/compute.rst            |  14 +
 6 files changed, 551 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index db1cac290cf..f005e70e348 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -62,6 +62,8 @@ SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked")
 SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
 SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
 SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
+SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked")
+SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked")
 
 Result<Datum> MaxElementWise(const std::vector<Datum>& args,
                              ElementWiseAggregateOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 5c83dcb5c85..b1013257401 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -308,6 +308,33 @@ Result<Datum> Power(const Datum& left, const Datum& right,
                     ArithmeticOptions options = ArithmeticOptions(),
                     ExecContext* ctx = NULLPTR);
 
+/// \brief Left shift the left array by the right array. Array values must be the
+/// same length. If either operand is null, the result will be null.
+///
+/// \param[in] left the value to shift
+/// \param[in] right the value to shift by
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise left value shifted left by the right value
+ARROW_EXPORT
+Result<Datum> ShiftLeft(const Datum& left, const Datum& right,
+                        ArithmeticOptions options = ArithmeticOptions(),
+                        ExecContext* ctx = NULLPTR);
+
+/// \brief Right shift the left array by the right array. Array values must be the
+/// same length. If either operand is null, the result will be null. Performs a
+/// logical shift for unsigned values, and an arithmetic shift for signed values.
+///
+/// \param[in] left the value to shift
+/// \param[in] right the value to shift by
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise left value shifted right by the right value
+ARROW_EXPORT
+Result<Datum> ShiftRight(const Datum& left, const Datum& right,
+                         ArithmeticOptions options = ArithmeticOptions(),
+                         ExecContext* ctx = NULLPTR);
+
 /// \brief Find the element-wise maximum of any number of arrays or scalars.
 /// Array values must be the same length.
 ///
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index f51484e53ff..ef9ef78054a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -454,6 +454,106 @@ struct PowerChecked {
   }
 };
 
+// Bitwise operations
+
+struct BitWiseNot {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return ~arg;
+  }
+};
+
+struct BitWiseAnd {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs & rhs;
+  }
+};
+
+struct BitWiseOr {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs | rhs;
+  }
+};
+
+struct BitWiseXor {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    return lhs ^ rhs;
+  }
+};
+
+struct ShiftLeft {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    using Unsigned = typename std::make_unsigned<Arg0>::type;
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      return lhs;
+    }
+    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs));
+  }
+};
+
+// See SEI CERT C Coding Standard rule INT34-C
+struct ShiftLeftChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs,
+                                            Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    return lhs << rhs;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_signed_integer<T> Call(KernelContext*, Arg0 lhs, Arg1 rhs,
+                                          Status* st) {
+    using Unsigned = typename std::make_unsigned<Arg0>::type;
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    // In C/C++ left shift of a negative number is undefined (C++11 standard 5.8.2)
+    // Mimic Java/etc. and treat left shift as based on two's complement representation
+    // Assumes two's complement machine
+    return static_cast<T>(static_cast<Unsigned>(lhs) << static_cast<Unsigned>(rhs));
+  }
+};
+
+struct ShiftRight {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    // Logical right shift when Arg0 is unsigned
+    // Arithmetic otherwise (this is implementation-defined but GCC and MSVC document this
+    // as arithmetic right shift)
+    // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
+    // https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=msvc-160
+    // Clang doesn't document their behavior.
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      return lhs;
+    }
+    return lhs >> rhs;
+  }
+};
+
+struct ShiftRightChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static T Call(KernelContext*, Arg0 lhs, Arg1 rhs, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(rhs < 0 || rhs >= std::numeric_limits<Arg0>::digits)) {
+      *st = Status::Invalid("shift amount must be >= 0 and less than precision of type");
+      return lhs;
+    }
+    return lhs >> rhs;
+  }
+};
+
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
 ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
@@ -485,6 +585,54 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// Generate a kernel given a bitwise arithmetic functor. Assumes the
+// functor treats all integer types of equal width identically
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<Int16Type, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<Int32Type, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
+    case Type::INT64:
+      return KernelGenerator<Int64Type, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 Status CastBinaryDecimalArgs(const std::string& func_name,
                              std::vector<ValueDescr>* values) {
   auto& left_type = (*values)[0].type;
@@ -734,6 +882,28 @@ std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeBitWiseFunctionNotNull(std::string name,
+                                                           const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : IntTypes()) {
+    auto exec = TypeAgnosticBitWiseExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name,
+                                                         const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : IntTypes()) {
+    auto exec = ShiftExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
+  }
+  return func;
+}
+
 const FunctionDoc absolute_value_doc{
     "Calculate the absolute value of the argument element-wise",
     ("Results will wrap around on integer overflow.\n"
@@ -820,6 +990,57 @@ const FunctionDoc pow_checked_doc{
     ("An error is returned when integer to negative integer power is encountered,\n"
      "or integer overflow is encountered."),
     {"base", "exponent"}};
+
+const FunctionDoc bit_wise_not_doc{
+    "Bit-wise negate the arguments element-wise", ("Null values return null."), {"x"}};
+
+const FunctionDoc bit_wise_and_doc{
+    "Bit-wise AND the arguments element-wise", ("Null values return null."), {"x", "y"}};
+
+const FunctionDoc bit_wise_or_doc{
+    "Bit-wise OR the arguments element-wise", ("Null values return null."), {"x", "y"}};
+
+const FunctionDoc bit_wise_xor_doc{
+    "Bit-wise XOR the arguments element-wise", ("Null values return null."), {"x", "y"}};
+
+const FunctionDoc shift_left_doc{
+    "Left shift `x` by `y`",
+    ("This function will return `x` if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "The shift operates as if on the two's complement representation of the number. "
+     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, "
+     "even if overflow occurs.\n"
+     "Use function \"shift_left_checked\" if you want an invalid shift amount to "
+     "return an error."),
+    {"x", "y"}};
+
+const FunctionDoc shift_left_checked_doc{
+    "Left shift `x` by `y` with invalid shift check",
+    ("This function will raise an error if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`. "
+     "The shift operates as if on the two's complement representation of the number. "
+     "In other words, this is equivalent to multiplying `x` by 2 to the power `y`, "
+     "even if overflow occurs.\n"
+     "See \"shift_left\" for a variant that doesn't fail for an invalid shift amount."),
+    {"x", "y"}};
+
+const FunctionDoc shift_right_doc{
+    "Right shift `x` by `y`",
+    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n"
+     "This function will return `x` if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "Use function \"shift_right_checked\" if you want an invalid shift amount to return "
+     "an error."),
+    {"x", "y"}};
+
+const FunctionDoc shift_right_checked_doc{
+    "Right shift `x` by `y` with invalid shift check",
+    ("Perform a logical shift for unsigned `x` and an arithmetic shift for signed `x`.\n"
+     "This function will raise an error if `y` (the amount to shift by) is: "
+     "(1) negative or (2) greater than or equal to the precision of `x`.\n"
+     "See \"shift_right\" for a variant that doesn't fail for an invalid shift amount"),
+    {"x", "y"}};
+
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -903,6 +1124,44 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   auto power_checked =
       MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(power_checked)));
+
+  // ----------------------------------------------------------------------
+  {
+    auto bit_wise_not = std::make_shared<ArithmeticFunction>(
+        "bit_wise_not", Arity::Unary(), &bit_wise_not_doc);
+    for (const auto& ty : IntTypes()) {
+      auto exec = TypeAgnosticBitWiseExecFromOp<ScalarUnaryNotNull, BitWiseNot>(ty);
+      DCHECK_OK(bit_wise_not->AddKernel({ty}, ty, exec));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(bit_wise_not)));
+  }
+
+  auto bit_wise_and =
+      MakeBitWiseFunctionNotNull<BitWiseAnd>("bit_wise_and", &bit_wise_and_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_and)));
+
+  auto bit_wise_or =
+      MakeBitWiseFunctionNotNull<BitWiseOr>("bit_wise_or", &bit_wise_or_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_or)));
+
+  auto bit_wise_xor =
+      MakeBitWiseFunctionNotNull<BitWiseXor>("bit_wise_xor", &bit_wise_xor_doc);
+  DCHECK_OK(registry->AddFunction(std::move(bit_wise_xor)));
+
+  auto shift_left = MakeShiftFunctionNotNull<ShiftLeft>("shift_left", &shift_left_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_left)));
+
+  auto shift_left_checked = MakeShiftFunctionNotNull<ShiftLeftChecked>(
+      "shift_left_checked", &shift_left_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_left_checked)));
+
+  auto shift_right =
+      MakeShiftFunctionNotNull<ShiftRight>("shift_right", &shift_right_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_right)));
+
+  auto shift_right_checked = MakeShiftFunctionNotNull<ShiftRightChecked>(
+      "shift_right_checked", &shift_right_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(shift_right_checked)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index ae2f55c6be6..a94eabb1be0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -299,6 +299,66 @@ class TestBinaryArithmeticUnsigned : public TestBinaryArithmeticIntegral<T> {};
 template <typename T>
 class TestBinaryArithmeticFloating : public TestBinaryArithmetic<T> {};
 
+template <typename T>
+class TestBitWiseArithmetic : public TestBase {
+ protected:
+  using ArrowType = T;
+  using CType = typename ArrowType::c_type;
+
+  static std::shared_ptr<DataType> type_singleton() {
+    return TypeTraits<ArrowType>::type_singleton();
+  }
+
+  void AssertUnaryOp(const std::string& func, const std::vector<uint8_t>& args,
+                     const std::vector<uint8_t>& expected) {
+    auto input = ExpandByteArray(args);
+    auto output = ExpandByteArray(expected);
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {input}));
+    ValidateAndAssertEqual(actual.make_array(), output);
+    for (int64_t i = 0; i < output->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {*input->GetScalar(i)}));
+      const auto expected_scalar = *output->GetScalar(i);
+      AssertScalarsEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true);
+    }
+  }
+
+  void AssertBinaryOp(const std::string& func, const std::vector<uint8_t>& arg0,
+                      const std::vector<uint8_t>& arg1,
+                      const std::vector<uint8_t>& expected) {
+    auto input0 = ExpandByteArray(arg0);
+    auto input1 = ExpandByteArray(arg1);
+    auto output = ExpandByteArray(expected);
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {input0, input1}));
+    ValidateAndAssertEqual(actual.make_array(), output);
+    for (int64_t i = 0; i < output->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func, {*input0->GetScalar(i),
+                                                             *input1->GetScalar(i)}));
+      const auto expected_scalar = *output->GetScalar(i);
+      AssertScalarsEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true);
+    }
+  }
+
+  // To make it easier to test different widths, tests give bytes which get repeated to
+  // make an array of the actual type
+  std::shared_ptr<Array> ExpandByteArray(const std::vector<uint8_t>& values) {
+    std::vector<CType> c_values(values.size() + 1);
+    for (size_t i = 0; i < values.size(); i++) {
+      std::memset(&c_values[i], values[i], sizeof(CType));
+    }
+    std::vector<bool> valid(values.size() + 1, true);
+    valid.back() = false;
+    std::shared_ptr<Array> arr;
+    ArrayFromVector<ArrowType>(valid, c_values, &arr);
+    return arr;
+  }
+
+  void ValidateAndAssertEqual(const std::shared_ptr<Array>& actual,
+                              const std::shared_ptr<Array>& expected) {
+    ASSERT_OK(actual->ValidateFull());
+    AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+  }
+};
+
 // InputType - OutputType pairs
 using IntegralTypes = testing::Types<Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type,
                                      UInt16Type, UInt32Type, UInt64Type>;
@@ -321,6 +381,31 @@ TYPED_TEST_SUITE(TestBinaryArithmeticSigned, SignedIntegerTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticUnsigned, UnsignedIntegerTypes);
 TYPED_TEST_SUITE(TestBinaryArithmeticFloating, FloatingTypes);
 
+TYPED_TEST_SUITE(TestBitWiseArithmetic, IntegralTypes);
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseNot) {
+  this->AssertUnaryOp("bit_wise_not", std::vector<uint8_t>{0x00, 0x55, 0xAA, 0xFF},
+                      std::vector<uint8_t>{0xFF, 0xAA, 0x55, 0x00});
+}
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseAnd) {
+  this->AssertBinaryOp("bit_wise_and", std::vector<uint8_t>{0x00, 0xFF, 0x00, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0xFF, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0x00, 0xFF});
+}
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseOr) {
+  this->AssertBinaryOp("bit_wise_or", std::vector<uint8_t>{0x00, 0xFF, 0x00, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0xFF, 0xFF},
+                       std::vector<uint8_t>{0x00, 0xFF, 0xFF, 0xFF});
+}
+
+TYPED_TEST(TestBitWiseArithmetic, BitWiseXor) {
+  this->AssertBinaryOp("bit_wise_xor", std::vector<uint8_t>{0x00, 0xFF, 0x00, 0xFF},
+                       std::vector<uint8_t>{0x00, 0x00, 0xFF, 0xFF},
+                       std::vector<uint8_t>{0x00, 0xFF, 0xFF, 0x00});
+}
+
 TYPED_TEST(TestBinaryArithmeticIntegral, Add) {
   for (auto check_overflow : {false, true}) {
     this->SetOverflowCheck(check_overflow);
@@ -1511,5 +1596,124 @@ TEST(TestBinaryArithmeticDecimal, Divide) {
   }
 }
 
+TYPED_TEST(TestBinaryArithmeticIntegral, ShiftLeft) {
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    this->AssertBinop(ShiftLeft, "[]", "[]", "[]");
+    this->AssertBinop(ShiftLeft, "[0, 1, 2, 3]", "[2, 3, 4, 5]", "[0, 8, 32, 96]");
+    // Nulls on one side
+    this->AssertBinop(ShiftLeft, "[0, null, 2, 3]", "[2, 3, 4, 5]", "[0, null, 32, 96]");
+    this->AssertBinop(ShiftLeft, "[0, 1, 2, 3]", "[2, 3, null, 5]", "[0, 8, null, 96]");
+    // Nulls on both sides
+    this->AssertBinop(ShiftLeft, "[0, null, 2, 3]", "[2, 3, null, 5]",
+                      "[0, null, null, 96]");
+    // All nulls
+    this->AssertBinop(ShiftLeft, "[null]", "[null]", "[null]");
+
+    // Scalar on the left
+    this->AssertBinop(ShiftLeft, 2, "[null, 5]", "[null, 64]");
+    this->AssertBinop(ShiftLeft, this->MakeNullScalar(), "[null, 5]", "[null, null]");
+    // Scalar on the right
+    this->AssertBinop(ShiftLeft, "[null, 5]", 3, "[null, 40]");
+    this->AssertBinop(ShiftLeft, "[null, 5]", this->MakeNullScalar(), "[null, null]");
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, ShiftRight) {
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+
+    this->AssertBinop(ShiftRight, "[]", "[]", "[]");
+    this->AssertBinop(ShiftRight, "[0, 1, 4, 8]", "[1, 1, 1, 4]", "[0, 0, 2, 0]");
+    // Nulls on one side
+    this->AssertBinop(ShiftRight, "[0, null, 4, 8]", "[1, 1, 1, 4]", "[0, null, 2, 0]");
+    this->AssertBinop(ShiftRight, "[0, 1, 4, 8]", "[1, 1, null, 4]", "[0, 0, null, 0]");
+    // Nulls on both sides
+    this->AssertBinop(ShiftRight, "[0, null, 4, 8]", "[1, 1, null, 4]",
+                      "[0, null, null, 0]");
+    // All nulls
+    this->AssertBinop(ShiftRight, "[null]", "[null]", "[null]");
+
+    // Scalar on the left
+    this->AssertBinop(ShiftRight, 64, "[null, 2, 6]", "[null, 16, 1]");
+    this->AssertBinop(ShiftRight, this->MakeNullScalar(), "[null, 2, 6]",
+                      "[null, null, null]");
+    // Scalar on the right
+    this->AssertBinop(ShiftRight, "[null, 3, 96]", 3, "[null, 0, 12]");
+    this->AssertBinop(ShiftRight, "[null, 3, 96]", this->MakeNullScalar(),
+                      "[null, null, null]");
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, ShiftLeftOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  const CType min = std::numeric_limits<CType>::min();
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftLeft, "[1]", MakeArray(bit_width - 1),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 2),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  // Shift a bit into the sign bit
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 1), MakeArray(min));
+  // Shift a bit past the sign bit
+  this->AssertBinop(ShiftLeft, "[4]", MakeArray(bit_width - 1), "[0]");
+  this->AssertBinop(ShiftLeft, MakeArray(min), "[1]", "[0]");
+  this->AssertBinopRaises(ShiftLeft, "[1, 2]", "[1, -1]",
+                          "shift amount must be >= 0 and less than precision of type");
+  this->AssertBinopRaises(ShiftLeft, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+
+  this->SetOverflowCheck(false);
+  this->AssertBinop(ShiftLeft, "[1, 1]", MakeArray(-1, bit_width), "[1, 1]");
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, ShiftRightOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  const CType max = std::numeric_limits<CType>::max();
+  const CType min = std::numeric_limits<CType>::min();
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftRight, MakeArray(max), MakeArray(bit_width - 1), "[1]");
+  this->AssertBinop(ShiftRight, "[-1, -1]", "[1, 5]", "[-1, -1]");
+  this->AssertBinop(ShiftRight, MakeArray(min), "[1]", MakeArray(min / 2));
+  this->AssertBinopRaises(ShiftRight, "[1, 2]", "[1, -1]",
+                          "shift amount must be >= 0 and less than precision of type");
+  this->AssertBinopRaises(ShiftRight, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+
+  this->SetOverflowCheck(false);
+  this->AssertBinop(ShiftRight, "[1, 1]", MakeArray(-1, bit_width), "[1, 1]");
+}
+
+TYPED_TEST(TestBinaryArithmeticUnsigned, ShiftLeftOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftLeft, "[1]", MakeArray(bit_width - 1),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 2),
+                    MakeArray(static_cast<CType>(1) << (bit_width - 1)));
+  this->AssertBinop(ShiftLeft, "[2]", MakeArray(bit_width - 1), "[0]");
+  this->AssertBinop(ShiftLeft, "[4]", MakeArray(bit_width - 1), "[0]");
+  this->AssertBinopRaises(ShiftLeft, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+}
+
+TYPED_TEST(TestBinaryArithmeticUnsigned, ShiftRightOverflowRaises) {
+  using CType = typename TestFixture::CType;
+  const CType bit_width = static_cast<CType>(std::numeric_limits<CType>::digits);
+  const CType max = std::numeric_limits<CType>::max();
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(ShiftRight, MakeArray(max), MakeArray(bit_width - 1), "[1]");
+  this->AssertBinopRaises(ShiftRight, "[1]", MakeArray(bit_width),
+                          "shift amount must be >= 0 and less than precision of type");
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index a4ca8f9c61b..c4ca4d3416c 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -293,24 +293,51 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 
 * \(1) Precision and scale of computed DECIMAL results
 
-+------------+---------------------------------------------+
-| Operation  | Result precision and scale                  |
-+============+=============================================+
-| | add      | | scale = max(s1, s2)                       |
-| | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
-+------------+---------------------------------------------+
-| multiply   | | scale = s1 + s2                           |
-|            | | precision = p1 + p2 + 1                   |
-+------------+---------------------------------------------+
-| divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
-|            | | precision = p1 - s1 + s2 + scale          |
-+------------+---------------------------------------------+
-
-It's compatible with Redshift's decimal promotion rules. All decimal digits
-are preserved for `add`, `subtract` and `multiply` operations. The result
-precision of `divide` is at least the sum of precisions of both operands with
-enough scale kept. Error is returned if the result precision is beyond the
-decimal value range.
+  +------------+---------------------------------------------+
+  | Operation  | Result precision and scale                  |
+  +============+=============================================+
+  | | add      | | scale = max(s1, s2)                       |
+  | | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
+  +------------+---------------------------------------------+
+  | multiply   | | scale = s1 + s2                           |
+  |            | | precision = p1 + p2 + 1                   |
+  +------------+---------------------------------------------+
+  | divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+  |            | | precision = p1 - s1 + s2 + scale          |
+  +------------+---------------------------------------------+
+
+  It's compatible with Redshift's decimal promotion rules. All decimal digits
+  are preserved for `add`, `subtract` and `multiply` operations. The result
+  precision of `divide` is at least the sum of precisions of both operands with
+  enough scale kept. Error is returned if the result precision is beyond the
+  decimal value range.
+
+Bit-wise functions
+~~~~~~~~~~~~~~~~~~
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| bit_wise_and             | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_not             | Unary      | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_or              | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_xor             | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_left               | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_left_checked       | Binary     | Numeric            | Numeric (1)         |
++--------------------------+------------+--------------------+---------------------+
+| shift_right              | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_right_checked      | Binary     | Numeric            | Numeric (1)         |
++--------------------------+------------+--------------------+---------------------+
+
+* \(1) An error is emitted if the shift amount (i.e. the second input) is
+  out of bounds for the data type.  However, an overflow when shifting the
+  first input is not error (truncated bits are silently discarded).
 
 Comparisons
 ~~~~~~~~~~~
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 34626e21fdc..461803dc773 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -58,6 +58,20 @@ throws an ``ArrowInvalid`` exception when overflow is detected.
    subtract_checked
    power
    power_checked
+   shift_left
+   shift_left_checked
+   shift_right
+   shift_right_checked
+
+Bit-wise operations do not offer (or need) a checked variant.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   bit_wise_and
+   bit_wise_not
+   bit_wise_or
+   bit_wise_xor
 
 Comparisons
 -----------

From a308f2c765f6a786a28528af85c4be31fe7db26c Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Wed, 30 Jun 2021 16:49:32 +0200
Subject: [PATCH 480/719] ARROW-12996: Add bytes_read() to StreamingReader

Add a bytes_read() to the StreamingReader interface so the progress of the stream can be determined easily and accurately by a user.

Closes #10509 from n3world/ARROW-12996-stream_progress

Lead-authored-by: Nate Clark <nate@neworld.us>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/reader.cc      | 104 ++++++++++++++++++++-----------
 cpp/src/arrow/csv/reader.h       |  15 +++++
 cpp/src/arrow/csv/reader_test.cc |  68 ++++++++++++++++++++
 3 files changed, 150 insertions(+), 37 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index f221ffcadd9..f644b86f89f 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -145,6 +145,7 @@ struct CSVBlock {
   std::shared_ptr<Buffer> buffer;
   int64_t block_index;
   bool is_final;
+  int64_t bytes_skipped;
   std::function<Status(int64_t)> consume_bytes;
 };
 
@@ -153,7 +154,7 @@ struct CSVBlock {
 
 template <>
 struct IterationTraits<csv::CSVBlock> {
-  static csv::CSVBlock End() { return csv::CSVBlock{{}, {}, {}, -1, true, {}}; }
+  static csv::CSVBlock End() { return csv::CSVBlock{{}, {}, {}, -1, true, 0, {}}; }
   static bool IsEnd(const csv::CSVBlock& val) { return val.block_index < 0; }
 };
 
@@ -222,16 +223,20 @@ class SerialBlockReader : public BlockReader {
     }
 
     bool is_final = (next_buffer == nullptr);
+    int64_t bytes_skipped = 0;
 
     if (skip_rows_) {
+      bytes_skipped += partial_->size();
+      auto orig_size = buffer_->size();
       RETURN_NOT_OK(
           chunker_->ProcessSkip(partial_, buffer_, is_final, &skip_rows_, &buffer_));
+      bytes_skipped += orig_size - buffer_->size();
       partial_ = SliceBuffer(buffer_, 0, 0);
       if (skip_rows_) {
         // Still have rows beyond this buffer to skip return empty block
         buffer_ = next_buffer;
         return TransformYield<CSVBlock>(CSVBlock{partial_, partial_, partial_,
-                                                 block_index_++, is_final,
+                                                 block_index_++, is_final, bytes_skipped,
                                                  [](int64_t) { return Status::OK(); }});
       }
     }
@@ -262,7 +267,7 @@ class SerialBlockReader : public BlockReader {
     };
 
     return TransformYield<CSVBlock>(CSVBlock{partial_, completion, buffer_,
-                                             block_index_++, is_final,
+                                             block_index_++, is_final, bytes_skipped,
                                              std::move(consume_bytes)});
   }
 };
@@ -294,10 +299,14 @@ class ThreadedBlockReader : public BlockReader {
 
     auto current_partial = std::move(partial_);
     auto current_buffer = std::move(buffer_);
+    int64_t bytes_skipped = 0;
 
     if (skip_rows_) {
+      auto orig_size = current_buffer->size();
+      bytes_skipped = current_partial->size();
       RETURN_NOT_OK(chunker_->ProcessSkip(current_partial, current_buffer, is_final,
                                           &skip_rows_, &current_buffer));
+      bytes_skipped += orig_size - current_buffer->size();
       current_partial = SliceBuffer(current_buffer, 0, 0);
       if (skip_rows_) {
         partial_ = std::move(current_buffer);
@@ -307,6 +316,7 @@ class ThreadedBlockReader : public BlockReader {
                                                  current_partial,
                                                  block_index_++,
                                                  is_final,
+                                                 bytes_skipped,
                                                  {}});
       }
     }
@@ -332,8 +342,8 @@ class ThreadedBlockReader : public BlockReader {
     partial_ = std::move(next_partial);
     buffer_ = std::move(next_buffer);
 
-    return TransformYield<CSVBlock>(
-        CSVBlock{current_partial, completion, whole, block_index_++, is_final, {}});
+    return TransformYield<CSVBlock>(CSVBlock{
+        current_partial, completion, whole, block_index_++, is_final, bytes_skipped, {}});
   }
 };
 
@@ -761,12 +771,13 @@ class SerialStreamingReader : public BaseStreamingReader,
 
     auto self = shared_from_this();
     // Read schema from first batch
-    return ReadNextAsync().Then([self](const std::shared_ptr<RecordBatch>& first_batch)
-                                    -> Result<std::shared_ptr<csv::StreamingReader>> {
-      self->pending_batch_ = first_batch;
-      DCHECK_NE(self->schema_, nullptr);
-      return self;
-    });
+    return ReadNextAsync(true).Then(
+        [self](const std::shared_ptr<RecordBatch>& first_batch)
+            -> Result<std::shared_ptr<csv::StreamingReader>> {
+          self->pending_batch_ = first_batch;
+          DCHECK_NE(self->schema_, nullptr);
+          return self;
+        });
   }
 
   Result<std::shared_ptr<RecordBatch>> DecodeBatchAndUpdateSchema() {
@@ -788,6 +799,7 @@ class SerialStreamingReader : public BaseStreamingReader,
       return block_generator_()
           .Then([self](const CSVBlock& maybe_block) -> Status {
             if (!IsIterationEnd(maybe_block)) {
+              self->bytes_parsed_ += maybe_block.bytes_skipped;
               self->last_block_index_ = maybe_block.block_index;
               auto maybe_parsed = self->ParseAndInsert(
                   maybe_block.partial, maybe_block.completion, maybe_block.buffer,
@@ -797,6 +809,7 @@ class SerialStreamingReader : public BaseStreamingReader,
                 self->eof_ = true;
                 return maybe_parsed.status();
               }
+              self->bytes_parsed_ += *maybe_parsed;
               RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
             } else {
               self->source_eof_ = true;
@@ -815,16 +828,46 @@ class SerialStreamingReader : public BaseStreamingReader,
   }
 
   Future<std::shared_ptr<RecordBatch>> ReadNextSkippingEmpty(
-      std::shared_ptr<SerialStreamingReader> self) {
-    return DoReadNext(self).Then([self](const std::shared_ptr<RecordBatch>& batch) {
-      if (batch != nullptr && batch->num_rows() == 0) {
-        return self->ReadNextSkippingEmpty(self);
+      std::shared_ptr<SerialStreamingReader> self, bool internal_read) {
+    return DoReadNext(self).Then(
+        [self, internal_read](const std::shared_ptr<RecordBatch>& batch) {
+          if (batch != nullptr && batch->num_rows() == 0) {
+            return self->ReadNextSkippingEmpty(self, internal_read);
+          }
+          if (!internal_read) {
+            self->bytes_decoded_ += self->bytes_parsed_;
+            self->bytes_parsed_ = 0;
+          }
+          return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
+        });
+  }
+
+  Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
+    return ReadNextAsync(false);
+  };
+
+  int64_t bytes_read() const override { return bytes_decoded_; }
+
+ protected:
+  Future<> SetupReader(std::shared_ptr<SerialStreamingReader> self) {
+    return buffer_generator_().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
+      if (first_buffer == nullptr) {
+        return Status::Invalid("Empty CSV file");
       }
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
+      auto own_first_buffer = first_buffer;
+      auto start = own_first_buffer->data();
+      RETURN_NOT_OK(self->ProcessHeader(own_first_buffer, &own_first_buffer));
+      self->bytes_decoded_ = own_first_buffer->data() - start;
+      RETURN_NOT_OK(self->MakeColumnDecoders());
+
+      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
+          std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
+          std::move(own_first_buffer), self->read_options_.skip_rows_after_names);
+      return Status::OK();
     });
   }
 
-  Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
+  Future<std::shared_ptr<RecordBatch>> ReadNextAsync(bool internal_read) {
     if (eof_) {
       return Future<std::shared_ptr<RecordBatch>>::MakeFinished(nullptr);
     }
@@ -835,38 +878,25 @@ class SerialStreamingReader : public BaseStreamingReader,
     auto self = shared_from_this();
     if (!block_generator_) {
       return SetupReader(self).Then(
-          [self]() -> Future<std::shared_ptr<RecordBatch>> {
-            return self->ReadNextSkippingEmpty(self);
+          [self, internal_read]() -> Future<std::shared_ptr<RecordBatch>> {
+            return self->ReadNextSkippingEmpty(self, internal_read);
           },
           [self](const Status& err) -> Result<std::shared_ptr<RecordBatch>> {
             self->eof_ = true;
             return err;
           });
     } else {
-      return self->ReadNextSkippingEmpty(self);
+      return self->ReadNextSkippingEmpty(self, internal_read);
     }
-  };
-
- protected:
-  Future<> SetupReader(std::shared_ptr<SerialStreamingReader> self) {
-    return buffer_generator_().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
-      if (first_buffer == nullptr) {
-        return Status::Invalid("Empty CSV file");
-      }
-      auto own_first_buffer = first_buffer;
-      RETURN_NOT_OK(self->ProcessHeader(own_first_buffer, &own_first_buffer));
-      RETURN_NOT_OK(self->MakeColumnDecoders());
-
-      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
-          std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
-          std::move(own_first_buffer), self->read_options_.skip_rows_after_names);
-      return Status::OK();
-    });
   }
 
   bool source_eof_ = false;
   int64_t last_block_index_ = 0;
   AsyncGenerator<CSVBlock> block_generator_;
+  // bytes of data parsed but not yet decoded
+  int64_t bytes_parsed_ = 0;
+  // bytes which have been decoded for caller
+  int64_t bytes_decoded_ = 0;
 };
 
 /////////////////////////////////////////////////////////////////////////
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index 5314104f048..48f02882b10 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -73,6 +73,21 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
 
   virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
 
+  /// \brief Return the number of bytes which have been read and processed
+  ///
+  /// The returned number includes CSV bytes which the StreamingReader has
+  /// finished processing, but not bytes for which some processing (e.g.
+  /// CSV parsing or conversion to Arrow layout) is still ongoing.
+  ///
+  /// Furthermore, the following rules apply:
+  /// - bytes skipped by `ReadOptions.skip_rows` are counted as being read before
+  /// any records are returned.
+  /// - bytes read while parsing the header are counted as being read before any
+  /// records are returned.
+  /// - bytes skipped by `ReadOptions.skip_rows_after_names` are counted after the
+  /// first batch is returned.
+  virtual int64_t bytes_read() const = 0;
+
   /// Create a StreamingReader instance
   ///
   /// This involves some I/O as the first batch must be loaded during the creation process
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 4d4f04964bd..1ab49fa8664 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -216,6 +216,74 @@ TEST(StreamingReaderTests, NestedParallelism) {
   TestNestedParallelism(thread_pool, table_factory);
 }
 
+TEST(StreamingReaderTest, BytesRead) {
+  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
+  auto table_buffer =
+      std::make_shared<Buffer>("a,b,c\n123,456,789\n101,112,131\n415,161,718\n");
+
+  // Basic read without any skips and small block size
+  {
+    auto input = std::make_shared<io::BufferReader>(table_buffer);
+
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 20;
+    ASSERT_OK_AND_ASSIGN(
+        auto streaming_reader,
+        StreamingReader::Make(io::default_io_context(), input, read_options,
+                              ParseOptions::Defaults(), ConvertOptions::Defaults()));
+    std::shared_ptr<RecordBatch> batch;
+    int64_t bytes = 6;  // Size of header
+    do {
+      ASSERT_EQ(bytes, streaming_reader->bytes_read());
+      ASSERT_OK(streaming_reader->ReadNext(&batch));
+      bytes += 12;  // Add size of each row
+    } while (batch);
+    ASSERT_EQ(42, streaming_reader->bytes_read());
+  }
+
+  // Interaction of skip_rows and bytes_read()
+  {
+    auto input = std::make_shared<io::BufferReader>(table_buffer);
+
+    auto read_options = ReadOptions::Defaults();
+    read_options.skip_rows = 2;
+    ASSERT_OK_AND_ASSIGN(
+        auto streaming_reader,
+        StreamingReader::Make(io::default_io_context(), input, read_options,
+                              ParseOptions::Defaults(), ConvertOptions::Defaults()));
+    std::shared_ptr<RecordBatch> batch;
+    // first two rows and third row as header
+    ASSERT_EQ(30, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
+    ASSERT_EQ(42, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_EQ(batch.get(), nullptr);
+  }
+
+  // Interaction of skip_rows_after_names and bytes_read()
+  {
+    auto input = std::make_shared<io::BufferReader>(table_buffer);
+
+    auto read_options = ReadOptions::Defaults();
+    read_options.skip_rows_after_names = 2;
+
+    ASSERT_OK_AND_ASSIGN(
+        auto streaming_reader,
+        StreamingReader::Make(io::default_io_context(), input, read_options,
+                              ParseOptions::Defaults(), ConvertOptions::Defaults()));
+    std::shared_ptr<RecordBatch> batch;
+
+    // Just header
+    ASSERT_EQ(6, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
+    ASSERT_EQ(42, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_EQ(batch.get(), nullptr);
+  }
+}
+
 TEST(CountRowsAsync, Basics) {
   constexpr int NROWS = 4096;
   ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(NROWS));

From 23d19ce754faa32878bd5e48f56e7510d34f15c6 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Wed, 30 Jun 2021 10:50:36 -0400
Subject: [PATCH 481/719] ARROW-13010: [C++][Compute] Support outputting to
 slices from kleene kernels

This change adds a `Bitmap::VisitWordsAndWrite` method, that outputs the values of the visitor lambda function to a provided bitmap.

Closes #10487 from nirandaperera/ARROW-13010

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/kernels/CMakeLists.txt  |   1 +
 .../arrow/compute/kernels/scalar_boolean.cc   |  77 ++-
 .../arrow/compute/kernels/scalar_if_else.cc   | 600 ++++++++++--------
 .../kernels/scalar_if_else_benchmark.cc       | 113 ++++
 .../compute/kernels/scalar_if_else_test.cc    |  12 +-
 cpp/src/arrow/util/CMakeLists.txt             |   1 +
 cpp/src/arrow/util/bit_util.cc                |  56 ++
 cpp/src/arrow/util/bit_util.h                 |  32 +
 cpp/src/arrow/util/bit_util_test.cc           | 132 ++++
 cpp/src/arrow/util/bitmap.h                   | 152 +++++
 cpp/src/arrow/util/bitmap_ops.cc              | 218 -------
 cpp/src/arrow/util/bitmap_reader.h            | 114 +++-
 cpp/src/arrow/util/bitmap_reader_benchmark.cc | 113 ++++
 cpp/src/arrow/util/bitmap_writer.h            | 101 +++
 14 files changed, 1206 insertions(+), 516 deletions(-)
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
 create mode 100644 cpp/src/arrow/util/bitmap_reader_benchmark.cc

diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 326578588a7..3362d91cbe8 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -37,6 +37,7 @@ add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 89107120fa3..7a0e3654edb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -30,60 +30,60 @@ namespace compute {
 
 namespace {
 
-enum BitmapIndex { LEFT_VALID, LEFT_DATA, RIGHT_VALID, RIGHT_DATA };
-
 template <typename ComputeWord>
 void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayData& left,
                    const ArrayData& right, ArrayData* out) {
   DCHECK(left.null_count != 0 || right.null_count != 0)
       << "ComputeKleene is unnecessarily expensive for the non-null case";
 
-  Bitmap bitmaps[4];
-  bitmaps[LEFT_VALID] = {left.buffers[0], left.offset, left.length};
-  bitmaps[LEFT_DATA] = {left.buffers[1], left.offset, left.length};
+  Bitmap left_valid_bm{left.buffers[0], left.offset, left.length};
+  Bitmap left_data_bm{left.buffers[1], left.offset, left.length};
 
-  bitmaps[RIGHT_VALID] = {right.buffers[0], right.offset, right.length};
-  bitmaps[RIGHT_DATA] = {right.buffers[1], right.offset, right.length};
+  Bitmap right_valid_bm{right.buffers[0], right.offset, right.length};
+  Bitmap right_data_bm{right.buffers[1], right.offset, right.length};
 
-  auto out_validity = out->GetMutableValues<uint64_t>(0);
-  auto out_data = out->GetMutableValues<uint64_t>(1);
+  std::array<Bitmap, 2> out_bms{Bitmap(out->buffers[0], out->offset, out->length),
+                                Bitmap(out->buffers[1], out->offset, out->length)};
 
-  int64_t i = 0;
   auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid,
-                   uint64_t right_data) {
+                   uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) {
     auto left_true = left_valid & left_data;
     auto left_false = left_valid & ~left_data;
 
     auto right_true = right_valid & right_data;
     auto right_false = right_valid & ~right_data;
 
-    compute_word(left_true, left_false, right_true, right_false, &out_validity[i],
-                 &out_data[i]);
-    ++i;
+    compute_word(left_true, left_false, right_true, right_false, out_validity, out_data);
   };
 
   if (right.null_count == 0) {
-    // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
-    bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-    Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]);
-    });
+    std::array<Bitmap, 3> in_bms{left_valid_bm, left_data_bm, right_data_bm};
+    Bitmap::VisitWordsAndWrite(
+        in_bms, &out_bms,
+        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+          apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1)));
+        });
     return;
   }
 
   if (left.null_count == 0) {
-    // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
-    bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
-    Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
-    });
+    std::array<Bitmap, 3> in_bms{left_data_bm, right_valid_bm, right_data_bm};
+    Bitmap::VisitWordsAndWrite(
+        in_bms, &out_bms,
+        [&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+          apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1)));
+        });
     return;
   }
 
   DCHECK(left.null_count != 0 && right.null_count != 0);
-  Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-    apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
-  });
+  std::array<Bitmap, 4> in_bms{left_valid_bm, left_data_bm, right_valid_bm,
+                               right_data_bm};
+  Bitmap::VisitWordsAndWrite(
+      in_bms, &out_bms,
+      [&](const std::array<uint64_t, 4>& in, std::array<uint64_t, 2>* out) {
+        apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1)));
+      });
 }
 
 inline BooleanScalar InvertScalar(const Scalar& in) {
@@ -204,7 +204,8 @@ struct KleeneAndOp : Commutative<KleeneAndOp> {
                      ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       out->null_count = 0;
-      out->buffers[0] = nullptr;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
       return AndOp::Call(ctx, left, right, out);
     }
     auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
@@ -307,7 +308,8 @@ struct KleeneOrOp : Commutative<KleeneOrOp> {
                      ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       out->null_count = 0;
-      out->buffers[0] = nullptr;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
       return OrOp::Call(ctx, left, right, out);
     }
 
@@ -437,7 +439,8 @@ struct KleeneAndNotOp {
                      ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
       out->null_count = 0;
-      out->buffers[0] = nullptr;
+      // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1
+      BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length);
       return AndNotOp::Call(ctx, left, right, out);
     }
 
@@ -453,9 +456,8 @@ struct KleeneAndNotOp {
   }
 };
 
-void MakeFunction(std::string name, int arity, ArrayKernelExec exec,
+void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec,
                   const FunctionDoc* doc, FunctionRegistry* registry,
-                  bool can_write_into_slices = true,
                   NullHandling::type null_handling = NullHandling::INTERSECTION) {
   auto func = std::make_shared<ScalarFunction>(name, Arity(arity), doc);
 
@@ -463,7 +465,6 @@ void MakeFunction(std::string name, int arity, ArrayKernelExec exec,
   std::vector<InputType> in_types(arity, InputType(boolean()));
   ScalarKernel kernel(std::move(in_types), boolean(), exec);
   kernel.null_handling = null_handling;
-  kernel.can_write_into_slices = can_write_into_slices;
 
   DCHECK_OK(func->AddKernel(kernel));
   DCHECK_OK(registry->AddFunction(std::move(func)));
@@ -549,16 +550,12 @@ void RegisterScalarBoolean(FunctionRegistry* registry) {
   MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, &or_doc, registry);
   MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, &xor_doc, registry);
 
-  // The Kleene logic kernels cannot write into sliced output bitmaps
   MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, &and_kleene_doc,
-               registry,
-               /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
+               registry, NullHandling::COMPUTED_PREALLOCATE);
   MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>,
-               &and_not_kleene_doc, registry,
-               /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
+               &and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE);
   MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, &or_kleene_doc,
-               registry,
-               /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
+               registry, NullHandling::COMPUTED_PREALLOCATE);
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 7a0defaccd6..54e0725fce7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -21,11 +21,13 @@
 #include <arrow/util/bit_block_counter.h>
 #include <arrow/util/bitmap.h>
 #include <arrow/util/bitmap_ops.h>
+#include <arrow/util/bitmap_reader.h>
 
 namespace arrow {
 using internal::BitBlockCount;
 using internal::BitBlockCounter;
 using internal::Bitmap;
+using internal::BitmapWordReader;
 
 namespace compute {
 
@@ -72,116 +74,267 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum&
   Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length};
   Bitmap left_valid = GetBitmap(left_d, 0);
   Bitmap right_valid = GetBitmap(right_d, 0);
-  // sometimes Bitmaps will be ignored, in which case we replace access to them with
-  // duplicated (probably elided) access to cond_data
-  const Bitmap& _ = cond_data;
-
-  // lambda function that will be used inside the visitor
-  uint64_t* out_validity = nullptr;
-  int64_t i = 0;
-  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
-                   uint64_t r_valid) {
-    out_validity[i] = c_valid & ((c_data & l_valid) | (~c_data & r_valid));
-    i++;
-  };
 
   // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
   // In the following cases, we dont need to allocate out_valid bitmap
 
-  // if cond & left & right all ones, then output is all valid --> out_valid = nullptr
+  // if cond & left & right all ones, then output is all valid. output validity buffer
+  // is already allocated, hence set all bits
   if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) {
+    BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset,
+                       output->length);
     return Status::OK();
   }
 
   if (left_const == kAllValid && right_const == kAllValid) {
-    // if both left and right are valid, no need to calculate out_valid bitmap. Pass
+    // if both left and right are valid, no need to calculate out_valid bitmap. Copy
     // cond validity buffer
-    // if there's an offset, copy bitmap (cannot slice a bitmap)
-    if (cond.offset) {
-      ARROW_ASSIGN_OR_RAISE(
-          output->buffers[0],
-          arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(),
-                                      cond.offset, cond.length));
-    } else {  // just copy assign cond validity buffer
-      output->buffers[0] = cond.buffers[0];
-    }
+    arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length,
+                                output->buffers[0]->mutable_data(), output->offset);
     return Status::OK();
   }
 
-  // following cases requires a separate out_valid buffer
-  ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length));
-  out_validity = output->GetMutableValues<uint64_t>(0);
+  // lambda function that will be used inside the visitor
+  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
+                   uint64_t r_valid) {
+    return c_valid & ((c_data & l_valid) | (~c_data & r_valid));
+  };
 
-  enum { C_VALID, C_DATA, L_VALID, R_VALID };
+  std::array<Bitmap, 1> out_bitmaps{
+      Bitmap{output->buffers[0], output->offset, output->length}};
 
   switch (flag) {
     case COND_CONST | LEFT_CONST | RIGHT_CONST: {
-      Bitmap bitmaps[] = {_, cond_data, _, _};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(*cond_const, words[C_DATA], *left_const, *right_const);
-      });
+      std::array<Bitmap, 1> bitmaps{cond_data};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 1>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           *left_const, *right_const);
+                                 });
       break;
     }
     case LEFT_CONST | RIGHT_CONST: {
-      Bitmap bitmaps[] = {cond_valid, cond_data, _, _};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(words[C_VALID], words[C_DATA], *left_const, *right_const);
-      });
+      std::array<Bitmap, 2> bitmaps{cond_valid, cond_data};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           *left_const, *right_const);
+                                 });
       break;
     }
     case COND_CONST | RIGHT_CONST: {
       // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for
       // Visit()
-      Bitmap bitmaps[] = {_, cond_data, left_valid, _};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(*cond_const, words[C_DATA], words[L_VALID], *right_const);
-      });
+      std::array<Bitmap, 2> bitmaps{cond_data, left_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           words_in[1], *right_const);
+                                 });
       break;
     }
     case RIGHT_CONST: {
       // bitmaps[R_VALID] might be null; override to make it safe for Visit()
-      Bitmap bitmaps[] = {cond_valid, cond_data, left_valid, _};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(words[C_VALID], words[C_DATA], words[L_VALID], *right_const);
-      });
+      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, left_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           words_in[2], *right_const);
+                                 });
       break;
     }
     case COND_CONST | LEFT_CONST: {
       // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for
       // Visit()
-      Bitmap bitmaps[] = {_, cond_data, _, right_valid};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(*cond_const, words[C_DATA], *left_const, words[R_VALID]);
-      });
+      std::array<Bitmap, 2> bitmaps{cond_data, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 2>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           *left_const, words_in[1]);
+                                 });
       break;
     }
     case LEFT_CONST: {
       // bitmaps[L_VALID] might be null; override to make it safe for Visit()
-      Bitmap bitmaps[] = {cond_valid, cond_data, _, right_valid};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(words[C_VALID], words[C_DATA], *left_const, words[R_VALID]);
-      });
+      std::array<Bitmap, 3> bitmaps{cond_valid, cond_data, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           *left_const, words_in[2]);
+                                 });
       break;
     }
     case COND_CONST: {
       // bitmaps[C_VALID] might be null; override to make it safe for Visit()
-      Bitmap bitmaps[] = {_, cond_data, left_valid, right_valid};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(*cond_const, words[C_DATA], words[L_VALID], words[R_VALID]);
-      });
+      std::array<Bitmap, 3> bitmaps{cond_data, left_valid, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 3>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(*cond_const, words_in[0],
+                                                           words_in[1], words_in[2]);
+                                 });
       break;
     }
     case 0: {
-      Bitmap bitmaps[] = {cond_valid, cond_data, left_valid, right_valid};
-      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-        apply(words[C_VALID], words[C_DATA], words[L_VALID], words[R_VALID]);
-      });
+      std::array<Bitmap, 4> bitmaps{cond_valid, cond_data, left_valid, right_valid};
+      Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps,
+                                 [&](const std::array<uint64_t, 4>& words_in,
+                                     std::array<uint64_t, 1>* word_out) {
+                                   word_out->at(0) = apply(words_in[0], words_in[1],
+                                                           words_in[2], words_in[3]);
+                                 });
       break;
     }
   }
   return Status::OK();
 }
 
+using Word = uint64_t;
+static constexpr int64_t word_len = sizeof(Word) * 8;
+
+/// Runs the main if_else loop. Here, it is expected that the right data has already
+/// been copied to the output.
+/// If `invert` is meant to invert the cond.data. If is set to `true`, then the
+/// buffer will be inverted before calling the handle_bulk or handle_each functions.
+/// This is useful, when left is an array and right is scalar. Then rather than
+/// copying data from the right to output, we can copy left data to the output and
+/// invert the cond data to fill right values. Filling out with a scalar is presumed to
+/// be more efficient than filling with an array
+template <typename HandleBulk, typename HandleEach, bool invert = false>
+static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk,
+                          HandleEach handle_each) {
+  int64_t data_offset = 0;
+  int64_t bit_offset = cond.offset;
+  const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
+
+  BitmapWordReader<Word> cond_reader(cond_data, cond.offset, cond.length);
+
+  int64_t cnt = cond_reader.words();
+  while (cnt--) {
+    Word word = cond_reader.NextWord();
+    if (invert) {
+      if (word == 0) {
+        handle_bulk(data_offset, word_len);
+      } else if (word != UINT64_MAX) {
+        for (int64_t i = 0; i < word_len; ++i) {
+          if (!BitUtil::GetBit(cond_data, bit_offset + i)) {
+            handle_each(data_offset + i);
+          }
+        }
+      }
+    } else {
+      if (word == UINT64_MAX) {
+        handle_bulk(data_offset, word_len);
+      } else if (word) {
+        for (int64_t i = 0; i < word_len; ++i) {
+          if (BitUtil::GetBit(cond_data, bit_offset + i)) {
+            handle_each(data_offset + i);
+          }
+        }
+      }
+    }
+    data_offset += word_len;
+    bit_offset += word_len;
+  }
+
+  cnt = cond_reader.trailing_bytes();
+  while (cnt--) {
+    int valid_bits;
+    uint8_t byte = cond_reader.NextTrailingByte(valid_bits);
+    if (invert) {
+      if (byte == 0 && valid_bits == 8) {
+        handle_bulk(data_offset, 8);
+      } else if (byte != UINT8_MAX) {
+        for (int i = 0; i < valid_bits; ++i) {
+          if (!BitUtil::GetBit(cond_data, bit_offset + i)) {
+            handle_each(data_offset + i);
+          }
+        }
+      }
+    } else {
+      if (byte == UINT8_MAX && valid_bits == 8) {
+        handle_bulk(data_offset, 8);
+      } else if (byte) {
+        for (int i = 0; i < valid_bits; ++i) {
+          if (BitUtil::GetBit(cond_data, bit_offset + i)) {
+            handle_each(data_offset + i);
+          }
+        }
+      }
+    }
+    data_offset += 8;
+    bit_offset += 8;
+  }
+}
+
+template <typename HandleBulk, typename HandleEach>
+static void RunIfElseLoopInverted(const ArrayData& cond, HandleBulk handle_bulk,
+                                  HandleEach handle_each) {
+  return RunIfElseLoop<HandleBulk, HandleEach, true>(cond, handle_bulk, handle_each);
+}
+
+/// Runs if-else when cond is a scalar. Two special functions are required,
+/// 1.CopyArrayData, 2. BroadcastScalar
+template <typename CopyArrayData, typename BroadcastScalar>
+static Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left,
+                              const Datum& right, Datum* out,
+                              CopyArrayData copy_array_data,
+                              BroadcastScalar broadcast_scalar) {
+  if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar
+    if (cond.is_valid) {
+      *out = cond.value ? left.scalar() : right.scalar();
+    } else {
+      *out = MakeNullScalar(left.type());
+    }
+    return Status::OK();
+  }
+
+  // either left or right is an array. Output is always an array`
+  const std::shared_ptr<ArrayData>& out_array = out->array();
+  if (!cond.is_valid) {
+    // cond is null; output is all null --> clear validity buffer
+    BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+    return Status::OK();
+  }
+
+  // cond is a non-null scalar
+  const auto& valid_data = cond.value ? left : right;
+  if (valid_data.is_array()) {
+    // valid_data is an array. Hence copy data to the output buffers
+    const auto& valid_array = valid_data.array();
+    if (valid_array->MayHaveNulls()) {
+      arrow::internal::CopyBitmap(
+          valid_array->buffers[0]->data(), valid_array->offset, valid_array->length,
+          out_array->buffers[0]->mutable_data(), out_array->offset);
+    } else {  // validity buffer is nullptr --> set all bits
+      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+    }
+    copy_array_data(*valid_array, out_array.get());
+    return Status::OK();
+
+  } else {  // valid data is scalar
+    // valid data is a scalar that needs to be broadcasted
+    const auto& valid_scalar = *valid_data.scalar();
+    if (valid_scalar.is_valid) {  // if the scalar is non-null, broadcast
+      BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                         out_array->length);
+      broadcast_scalar(*valid_data.scalar(), out_array.get());
+    } else {  // scalar is null, clear the output validity buffer
+      BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset,
+                           out_array->length);
+    }
+    return Status::OK();
+  }
+}
+
 template <typename Type, typename Enable = void>
 struct IfElseFunctor {};
 
@@ -191,178 +344,148 @@ struct IfElseFunctor {};
 template <typename Type>
 struct IfElseFunctor<Type, enable_if_number<Type>> {
   using T = typename TypeTraits<Type>::CType;
-  // A - Array
-  // S - Scalar
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          std::memcpy(out_array->GetMutableValues<T>(1), valid_array.GetValues<T>(1),
+                      valid_array.length * sizeof(T));
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          T scalar_data = internal::UnboxScalar<Type>::Unbox(scalar);
+          std::fill(out_array->GetMutableValues<T>(1),
+                    out_array->GetMutableValues<T>(1) + out_array->length, scalar_data);
+        });
+  }
 
   //  AAA
   static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
                      const ArrayData& right, ArrayData* out) {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          ctx->Allocate(cond.length * sizeof(T)));
-    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+    T* out_values = out->template GetMutableValues<T>(1);
 
     // copy right data to out_buff
     const T* right_data = right.GetValues<T>(1);
     std::memcpy(out_values, right_data, right.length * sizeof(T));
 
-    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
-    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
-
     // selectively copy values from left data
     const T* left_data = left.GetValues<T>(1);
-    int64_t offset = cond.offset;
-
-    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
-    while (offset < cond.offset + cond.length) {
-      const BitBlockCount& block = bit_counter.NextWord();
-      if (block.AllSet()) {  // all from left
-        std::memcpy(out_values, left_data, block.length * sizeof(T));
-      } else if (block.popcount) {  // selectively copy from left
-        for (int64_t i = 0; i < block.length; ++i) {
-          if (BitUtil::GetBit(cond_data, offset + i)) {
-            out_values[i] = left_data[i];
-          }
-        }
-      }
 
-      offset += block.length;
-      out_values += block.length;
-      left_data += block.length;
-    }
+    RunIfElseLoop(
+        cond,
+        [&](int64_t data_offset, int64_t num_elems) {
+          std::memcpy(out_values + data_offset, left_data + data_offset,
+                      num_elems * sizeof(T));
+        },
+        [&](int64_t data_offset) { out_values[data_offset] = left_data[data_offset]; });
 
-    out->buffers[1] = std::move(out_buf);
     return Status::OK();
   }
 
   // ASA
   static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
                      const ArrayData& right, ArrayData* out) {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          ctx->Allocate(cond.length * sizeof(T)));
-    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+    T* out_values = out->template GetMutableValues<T>(1);
 
     // copy right data to out_buff
     const T* right_data = right.GetValues<T>(1);
     std::memcpy(out_values, right_data, right.length * sizeof(T));
 
-    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
-    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
-
     // selectively copy values from left data
     T left_data = internal::UnboxScalar<Type>::Unbox(left);
-    int64_t offset = cond.offset;
-
-    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
-    while (offset < cond.offset + cond.length) {
-      const BitBlockCount& block = bit_counter.NextWord();
-      if (block.AllSet()) {  // all from left
-        std::fill(out_values, out_values + block.length, left_data);
-      } else if (block.popcount) {  // selectively copy from left
-        for (int64_t i = 0; i < block.length; ++i) {
-          if (BitUtil::GetBit(cond_data, offset + i)) {
-            out_values[i] = left_data;
-          }
-        }
-      }
 
-      offset += block.length;
-      out_values += block.length;
-    }
+    RunIfElseLoop(
+        cond,
+        [&](int64_t data_offset, int64_t num_elems) {
+          std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                    left_data);
+        },
+        [&](int64_t data_offset) { out_values[data_offset] = left_data; });
 
-    out->buffers[1] = std::move(out_buf);
     return Status::OK();
   }
 
   // AAS
   static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
                      const Scalar& right, ArrayData* out) {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          ctx->Allocate(cond.length * sizeof(T)));
-    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+    T* out_values = out->template GetMutableValues<T>(1);
 
     // copy left data to out_buff
     const T* left_data = left.GetValues<T>(1);
     std::memcpy(out_values, left_data, left.length * sizeof(T));
 
-    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
-    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
-
-    // selectively copy values from left data
     T right_data = internal::UnboxScalar<Type>::Unbox(right);
-    int64_t offset = cond.offset;
-
-    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
-    // left data is already in the output buffer. Therefore, mask needs to be inverted
-    while (offset < cond.offset + cond.length) {
-      const BitBlockCount& block = bit_counter.NextWord();
-      if (block.NoneSet()) {  // all from right
-        std::fill(out_values, out_values + block.length, right_data);
-      } else if (block.popcount) {  // selectively copy from right
-        for (int64_t i = 0; i < block.length; ++i) {
-          if (!BitUtil::GetBit(cond_data, offset + i)) {
-            out_values[i] = right_data;
-          }
-        }
-      }
 
-      offset += block.length;
-      out_values += block.length;
-    }
+    RunIfElseLoopInverted(
+        cond,
+        [&](int64_t data_offset, int64_t num_elems) {
+          std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                    right_data);
+        },
+        [&](int64_t data_offset) { out_values[data_offset] = right_data; });
 
-    out->buffers[1] = std::move(out_buf);
     return Status::OK();
   }
 
   // ASS
   static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
                      const Scalar& right, ArrayData* out) {
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          ctx->Allocate(cond.length * sizeof(T)));
-    T* out_values = reinterpret_cast<T*>(out_buf->mutable_data());
+    T* out_values = out->template GetMutableValues<T>(1);
 
     // copy right data to out_buff
     T right_data = internal::UnboxScalar<Type>::Unbox(right);
     std::fill(out_values, out_values + cond.length, right_data);
 
-    const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
-    BitBlockCounter bit_counter(cond_data, cond.offset, cond.length);
-
     // selectively copy values from left data
     T left_data = internal::UnboxScalar<Type>::Unbox(left);
-    int64_t offset = cond.offset;
-
-    // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*)
-    while (offset < cond.offset + cond.length) {
-      const BitBlockCount& block = bit_counter.NextWord();
-      if (block.AllSet()) {  // all from left
-        std::fill(out_values, out_values + block.length, left_data);
-      } else if (block.popcount) {  // selectively copy from left
-        for (int64_t i = 0; i < block.length; ++i) {
-          if (BitUtil::GetBit(cond_data, offset + i)) {
-            out_values[i] = left_data;
-          }
-        }
-      }
-
-      offset += block.length;
-      out_values += block.length;
-    }
+    RunIfElseLoop(
+        cond,
+        [&](int64_t data_offset, int64_t num_elems) {
+          std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                    left_data);
+        },
+        [&](int64_t data_offset) { out_values[data_offset] = left_data; });
 
-    out->buffers[1] = std::move(out_buf);
     return Status::OK();
   }
 };
 
 template <typename Type>
 struct IfElseFunctor<Type, enable_if_boolean<Type>> {
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          arrow::internal::CopyBitmap(
+              valid_array.buffers[1]->data(), valid_array.offset, valid_array.length,
+              out_array->buffers[1]->mutable_data(), out_array->offset);
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          bool scalar_data = internal::UnboxScalar<Type>::Unbox(scalar);
+          BitUtil::SetBitsTo(out_array->buffers[1]->mutable_data(), out_array->offset,
+                             out_array->length, scalar_data);
+        });
+  }
+
   // AAA
   static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
                      const ArrayData& right, ArrayData* out) {
     // out_buff = right & ~cond
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          arrow::internal::BitmapAndNot(
-                              ctx->memory_pool(), right.buffers[1]->data(), right.offset,
-                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset,
+                                  cond.buffers[1]->data(), cond.offset, cond.length,
+                                  out->offset, out_buf->mutable_data());
 
     // out_buff = left & cond
     ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> temp_buf,
@@ -370,9 +493,9 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
                               ctx->memory_pool(), left.buffers[1]->data(), left.offset,
                               cond.buffers[1]->data(), cond.offset, cond.length, 0));
 
-    arrow::internal::BitmapOr(out_buf->data(), 0, temp_buf->data(), 0, cond.length, 0,
-                              out_buf->mutable_data());
-    out->buffers[1] = std::move(out_buf);
+    arrow::internal::BitmapOr(out_buf->data(), out->offset, temp_buf->data(), 0,
+                              cond.length, out->offset, out_buf->mutable_data());
+
     return Status::OK();
   }
 
@@ -380,19 +503,19 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
   static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
                      const ArrayData& right, ArrayData* out) {
     // out_buff = right & ~cond
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          arrow::internal::BitmapAndNot(
-                              ctx->memory_pool(), right.buffers[1]->data(), right.offset,
-                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset,
+                                  cond.buffers[1]->data(), cond.offset, cond.length,
+                                  out->offset, out_buf->mutable_data());
 
     // out_buff = left & cond
     bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
     if (left_data) {
-      arrow::internal::BitmapOr(out_buf->data(), 0, cond.buffers[1]->data(), cond.offset,
-                                cond.length, 0, out_buf->mutable_data());
+      arrow::internal::BitmapOr(out_buf->data(), out->offset, cond.buffers[1]->data(),
+                                cond.offset, cond.length, out->offset,
+                                out_buf->mutable_data());
     }
 
-    out->buffers[1] = std::move(out_buf);
     return Status::OK();
   }
 
@@ -400,20 +523,20 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
   static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
                      const Scalar& right, ArrayData* out) {
     // out_buff = left & cond
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> out_buf,
-                          arrow::internal::BitmapAnd(
-                              ctx->memory_pool(), left.buffers[1]->data(), left.offset,
-                              cond.buffers[1]->data(), cond.offset, cond.length, 0));
+    const auto& out_buf = out->buffers[1];
+    arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset,
+                               cond.buffers[1]->data(), cond.offset, cond.length,
+                               out->offset, out_buf->mutable_data());
 
     bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
 
     // out_buff = left & cond | right & ~cond
     if (right_data) {
-      arrow::internal::BitmapOrNot(out_buf->data(), 0, cond.buffers[1]->data(),
-                                   cond.offset, cond.length, 0, out_buf->mutable_data());
+      arrow::internal::BitmapOrNot(out_buf->data(), out->offset, cond.buffers[1]->data(),
+                                   cond.offset, cond.length, out->offset,
+                                   out_buf->mutable_data());
     }
 
-    out->buffers[1] = std::move(out_buf);
     return Status::OK();
   }
 
@@ -423,66 +546,32 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
     bool left_data = internal::UnboxScalar<BooleanType>::Unbox(left);
     bool right_data = internal::UnboxScalar<BooleanType>::Unbox(right);
 
+    const auto& out_buf = out->buffers[1];
+
     // out_buf = left & cond | right & ~cond
-    std::shared_ptr<Buffer> out_buf = nullptr;
+    //    std::shared_ptr<Buffer> out_buf = nullptr;
     if (left_data) {
       if (right_data) {
         // out_buf = ones
-        ARROW_ASSIGN_OR_RAISE(out_buf, ctx->AllocateBitmap(cond.length));
-        // filling with UINT8_MAX upto the buffer's size (in bytes)
-        std::memset(out_buf->mutable_data(), UINT8_MAX, out_buf->size());
+        BitUtil::SetBitmap(out_buf->mutable_data(), out->offset, cond.length);
       } else {
         // out_buf = cond
-        out_buf = SliceBuffer(cond.buffers[1], cond.offset, cond.length);
+        arrow::internal::CopyBitmap(cond.buffers[1]->data(), cond.offset, cond.length,
+                                    out_buf->mutable_data(), out->offset);
       }
     } else {
       if (right_data) {
         // out_buf = ~cond
-        ARROW_ASSIGN_OR_RAISE(out_buf, arrow::internal::InvertBitmap(
-                                           ctx->memory_pool(), cond.buffers[1]->data(),
-                                           cond.offset, cond.length))
+        arrow::internal::InvertBitmap(cond.buffers[1]->data(), cond.offset, cond.length,
+                                      out_buf->mutable_data(), out->offset);
       } else {
         // out_buf = zeros
-        ARROW_ASSIGN_OR_RAISE(out_buf, ctx->AllocateBitmap(cond.length));
+        BitUtil::ClearBitmap(out_buf->mutable_data(), out->offset, cond.length);
       }
     }
-    out->buffers[1] = std::move(out_buf);
-    return Status::OK();
-  }
-};
 
-template <typename Type>
-struct IfElseFunctor<Type, enable_if_null<Type>> {
-  template <typename T>
-  static inline Status ReturnCopy(const T& in, T* out) {
-    // Nothing preallocated, so we assign in into the output
-    *out = in;
     return Status::OK();
   }
-
-  // AAA
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
-                     const ArrayData& right, ArrayData* out) {
-    return ReturnCopy(left, out);
-  }
-
-  // ASA
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
-                     const ArrayData& right, ArrayData* out) {
-    return ReturnCopy(right, out);
-  }
-
-  // AAS
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
-                     const Scalar& right, ArrayData* out) {
-    return ReturnCopy(left, out);
-  }
-
-  // ASS
-  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
-                     const Scalar& right, ArrayData* out) {
-    return ReturnCopy(cond, out);
-  }
 };
 
 template <typename Type>
@@ -491,32 +580,7 @@ struct ResolveIfElseExec {
     // cond is scalar
     if (batch[0].is_scalar()) {
       const auto& cond = batch[0].scalar_as<BooleanScalar>();
-      if (batch[1].is_scalar() && batch[2].is_scalar()) {
-        if (cond.is_valid) {
-          *out = cond.value ? batch[1].scalar() : batch[2].scalar();
-        } else {
-          *out = MakeNullScalar(batch[1].type());
-        }
-        return Status::OK();
-      }
-      // either left or right is an array. Output is always an array
-      if (!cond.is_valid) {
-        // cond is null; just create a null array
-        ARROW_ASSIGN_OR_RAISE(
-            *out, MakeArrayOfNull(batch[1].type(), batch.length, ctx->memory_pool()))
-        return Status::OK();
-      }
-
-      const auto& valid_data = cond.value ? batch[1] : batch[2];
-      if (valid_data.is_array()) {
-        *out = valid_data;
-      } else {
-        // valid data is a scalar that needs to be broadcasted
-        ARROW_ASSIGN_OR_RAISE(
-            *out,
-            MakeArrayFromScalar(*valid_data.scalar(), batch.length, ctx->memory_pool()));
-      }
-      return Status::OK();
+      return IfElseFunctor<Type>::Call(ctx, cond, batch[1], batch[2], out);
     }
 
     // cond is array. Use functors to sort things out
@@ -543,6 +607,20 @@ struct ResolveIfElseExec {
   }
 };
 
+template <>
+struct ResolveIfElseExec<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].is_scalar()) {
+      *out = MakeNullScalar(null());
+    } else {
+      const std::shared_ptr<ArrayData>& cond_array = batch[0].array();
+      ARROW_ASSIGN_OR_RAISE(
+          *out, MakeArrayOfNull(null(), cond_array->length, ctx->memory_pool()));
+    }
+    return Status::OK();
+  }
+};
+
 struct IfElseFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
@@ -574,14 +652,25 @@ struct IfElseFunction : ScalarFunction {
   }
 };
 
-void AddPrimitiveIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_function,
+void AddNullIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
+  ScalarKernel kernel({boolean(), null(), null()}, null(),
+                      ResolveIfElseExec<NullType>::Exec);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.can_write_into_slices = false;
+
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
                                const std::vector<std::shared_ptr<DataType>>& types) {
   for (auto&& type : types) {
     auto exec = internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec>(*type);
     // cond array needs to be boolean always
     ScalarKernel kernel({boolean(), type, type}, type, exec);
-    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+    kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::PREALLOCATE;
+    kernel.can_write_into_slices = true;
 
     DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
   }
@@ -607,7 +696,8 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
 
   AddPrimitiveIfElseKernels(func, NumericTypes());
   AddPrimitiveIfElseKernels(func, TemporalTypes());
-  AddPrimitiveIfElseKernels(func, {boolean(), null()});
+  AddPrimitiveIfElseKernels(func, {boolean()});
+  AddNullIfElseKernel(func);
   // todo add binary kernels
 
   DCHECK_OK(registry->AddFunction(std::move(func)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
new file mode 100644
index 00000000000..98fb675da40
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/array/concatenate.h>
+#include <arrow/compute/api_scalar.h>
+#include <arrow/testing/gtest_util.h>
+#include <arrow/testing/random.h>
+#include <benchmark/benchmark.h>
+
+namespace arrow {
+namespace compute {
+
+const int64_t elems = 1024 * 1024;
+
+template <typename Type>
+static void IfElseBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  auto cond = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto left = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(IfElse(cond->Slice(offset), left->Slice(offset), right->Slice(offset)));
+  }
+
+  state.SetBytesProcessed(state.iterations() *
+                          ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType)));
+}
+
+template <typename Type>
+static void IfElseBenchContiguous(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), len / 2));
+  ASSERT_OK_AND_ASSIGN(auto temp2,
+                       MakeArrayFromScalar(BooleanScalar(false), len - len / 2));
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  auto left = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(IfElse(cond->Slice(offset), left->Slice(offset), right->Slice(offset)));
+  }
+
+  state.SetBytesProcessed(state.iterations() *
+                          ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType)));
+}
+
+static void IfElseBench64(benchmark::State& state) {
+  return IfElseBench<UInt64Type>(state);
+}
+
+static void IfElseBench32(benchmark::State& state) {
+  return IfElseBench<UInt32Type>(state);
+}
+
+static void IfElseBench64Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt64Type>(state);
+}
+
+static void IfElseBench32Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt32Type>(state);
+}
+
+BENCHMARK(IfElseBench32)->Args({elems, 0});
+BENCHMARK(IfElseBench64)->Args({elems, 0});
+
+BENCHMARK(IfElseBench32)->Args({elems, 99});
+BENCHMARK(IfElseBench64)->Args({elems, 99});
+
+BENCHMARK(IfElseBench32Contiguous)->Args({elems, 0});
+BENCHMARK(IfElseBench64Contiguous)->Args({elems, 0});
+
+BENCHMARK(IfElseBench32Contiguous)->Args({elems, 99});
+BENCHMARK(IfElseBench64Contiguous)->Args({elems, 99});
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 2b63af2f26f..670a2d42a3a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <arrow/array.h>
+#include <arrow/array/concatenate.h>
 #include <arrow/compute/api_scalar.h>
 #include <arrow/compute/kernels/test_util.h>
 #include <arrow/testing/gtest_util.h>
@@ -56,8 +57,15 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
 
   random::RandomArrayGenerator rand(/*seed=*/0);
   int64_t len = 1000;
-  auto cond = std::static_pointer_cast<BooleanArray>(
-      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+
+  // adding 64 consecutive 1's and 0's in the cond array to test all-true/ all-false
+  // word code paths
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
+  ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
+  auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+
   auto left = std::static_pointer_cast<ArrayType>(
       rand.ArrayOf(type, len, /*null_probability=*/0.01));
   auto right = std::static_pointer_cast<ArrayType>(
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index e26a17120cd..660fb2657b6 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -79,6 +79,7 @@ add_arrow_test(threading-utility-test
 
 add_arrow_benchmark(bit_block_counter_benchmark)
 add_arrow_benchmark(bit_util_benchmark)
+add_arrow_benchmark(bitmap_reader_benchmark)
 add_arrow_benchmark(cache_benchmark)
 add_arrow_benchmark(compression_benchmark)
 add_arrow_benchmark(decimal_benchmark)
diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc
index 6e23678ddf9..ee4bcde7713 100644
--- a/cpp/src/arrow/util/bit_util.cc
+++ b/cpp/src/arrow/util/bit_util.cc
@@ -20,6 +20,8 @@
 #include <cstdint>
 #include <cstring>
 
+#include "arrow/util/logging.h"
+
 namespace arrow {
 namespace BitUtil {
 
@@ -67,5 +69,59 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar
   bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
 }
 
+template <bool value>
+void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) {
+  //                 offset  length
+  // data              |<------------->|
+  //   |--------|...|--------|...|--------|
+  //                   |<--->|   |<--->|
+  //                     pro       epi
+  if (ARROW_PREDICT_FALSE(length == 0)) {
+    return;
+  }
+
+  constexpr uint8_t set_byte = value ? UINT8_MAX : 0;
+
+  auto prologue = static_cast<int32_t>(BitUtil::RoundUp(offset, 8) - offset);
+  DCHECK_LT(prologue, 8);
+
+  if (length < prologue) {  // special case where a mask is required
+    //             offset length
+    // data             |<->|
+    //   |--------|...|--------|...
+    //         mask --> |111|
+    //                  |<---->|
+    //                     pro
+    uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^
+                   BitUtil::kPrecedingBitmask[8 - prologue + length];
+    data[offset / 8] = value ? data[offset / 8] | mask : data[offset / 8] & ~mask;
+    return;
+  }
+
+  // align to a byte boundary
+  data[offset / 8] = BitUtil::SpliceWord(8 - prologue, data[offset / 8], set_byte);
+  offset += prologue;
+  length -= prologue;
+
+  // set values per byte
+  DCHECK_EQ(offset % 8, 0);
+  std::memset(data + offset / 8, set_byte, length / 8);
+  offset += BitUtil::RoundDown(length, 8);
+  length -= BitUtil::RoundDown(length, 8);
+
+  // clean up
+  DCHECK_LT(length, 8);
+  data[offset / 8] =
+      BitUtil::SpliceWord(static_cast<int32_t>(length), set_byte, data[offset / 8]);
+}
+
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length) {
+  SetBitmapImpl<true>(data, offset, length);
+}
+
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) {
+  SetBitmapImpl<false>(data, offset, length);
+}
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 01845791faa..1e97e467610 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -316,5 +316,37 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
 ARROW_EXPORT
 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
 
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be
+/// returned
+/// ex:
+/// ref: https://stackoverflow.com/a/59523400
+template <typename Word>
+constexpr Word PrecedingWordBitmask(unsigned int const i) {
+  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1;
+}
+static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
+static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
+static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
+static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
+
+/// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits
+/// from `high`.
+/// Word ret
+/// for (i = 0; i < sizeof(Word)*8; i++){
+///     ret[i]= i < n ? low[i]: high[i];
+/// }
+template <typename Word>
+constexpr Word SpliceWord(int n, Word low, Word high) {
+  return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
+}
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc
index e5a5e4c39be..ded37398f95 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -1532,6 +1532,43 @@ TEST(BitUtilTests, TestSetBitsTo) {
   }
 }
 
+TEST(BitUtilTests, TestSetBitmap) {
+  using BitUtil::SetBitsTo;
+  for (const auto fill_byte_int : {0xff}) {
+    const uint8_t fill_byte = static_cast<uint8_t>(fill_byte_int);
+    {
+      // test set within a byte
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::SetBitmap(bitmap, 2, 2);
+      BitUtil::ClearBitmap(bitmap, 4, 2);
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & ~0x3C) | 0xC)});
+    }
+    {
+      // test straddling a single byte boundary
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::SetBitmap(bitmap, 4, 7);
+      BitUtil::ClearBitmap(bitmap, 11, 7);
+      ASSERT_BYTES_EQ(bitmap, {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x7,
+                               static_cast<uint8_t>(fill_byte & ~0x3)});
+    }
+    {
+      // test byte aligned end
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::SetBitmap(bitmap, 4, 4);
+      BitUtil::ClearBitmap(bitmap, 8, 8);
+      ASSERT_BYTES_EQ(bitmap,
+                      {static_cast<uint8_t>((fill_byte & 0xF) | 0xF0), 0x00, fill_byte});
+    }
+    {
+      // test byte aligned end, multiple bytes
+      uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte};
+      BitUtil::ClearBitmap(bitmap, 0, 24);
+      uint8_t false_byte = static_cast<uint8_t>(0);
+      ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte});
+    }
+  }
+}
+
 TEST(BitUtilTests, TestCopyBitmap) {
   const int kBufferSize = 1000;
 
@@ -1975,6 +2012,34 @@ TEST(BitUtil, BitsetStack) {
   ASSERT_EQ(stack.TopSize(), 0);
 }
 
+TEST(SpliceWord, SpliceWord) {
+  static_assert(
+      BitUtil::PrecedingWordBitmask<uint8_t>(0) == BitUtil::kPrecedingBitmask[0], "");
+  static_assert(
+      BitUtil::PrecedingWordBitmask<uint8_t>(5) == BitUtil::kPrecedingBitmask[5], "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint8_t>(8) == UINT8_MAX, "");
+
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(0) == uint64_t(0), "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(33) == 8589934591, "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(64) == UINT64_MAX, "");
+  static_assert(BitUtil::PrecedingWordBitmask<uint64_t>(65) == UINT64_MAX, "");
+
+  ASSERT_EQ(BitUtil::SpliceWord<uint8_t>(0, 0x12, 0xef), 0xef);
+  ASSERT_EQ(BitUtil::SpliceWord<uint8_t>(8, 0x12, 0xef), 0x12);
+  ASSERT_EQ(BitUtil::SpliceWord<uint8_t>(3, 0x12, 0xef), 0xea);
+
+  ASSERT_EQ(BitUtil::SpliceWord<uint32_t>(0, 0x12345678, 0xfedcba98), 0xfedcba98);
+  ASSERT_EQ(BitUtil::SpliceWord<uint32_t>(32, 0x12345678, 0xfedcba98), 0x12345678);
+  ASSERT_EQ(BitUtil::SpliceWord<uint32_t>(24, 0x12345678, 0xfedcba98), 0xfe345678);
+
+  ASSERT_EQ(BitUtil::SpliceWord<uint64_t>(0, 0x0123456789abcdef, 0xfedcba9876543210),
+            0xfedcba9876543210);
+  ASSERT_EQ(BitUtil::SpliceWord<uint64_t>(64, 0x0123456789abcdef, 0xfedcba9876543210),
+            0x0123456789abcdef);
+  ASSERT_EQ(BitUtil::SpliceWord<uint64_t>(48, 0x0123456789abcdef, 0xfedcba9876543210),
+            0xfedc456789abcdef);
+}
+
 // test the basic assumption of word level Bitmap::Visit
 TEST(Bitmap, ShiftingWordsOptimization) {
   // single word
@@ -2156,5 +2221,72 @@ TEST(Bitmap, VisitWordsAnd) {
   }
 }
 
+void DoBitmapVisitAndWrite(int64_t part, bool with_offset) {
+  int64_t bits = part * 4;
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  auto arrow_data = rand.ArrayOf(boolean(), bits, 0);
+
+  std::shared_ptr<Buffer>& arrow_buffer = arrow_data->data()->buffers[1];
+
+  Bitmap bm0(arrow_buffer, 0, part);
+  Bitmap bm1(arrow_buffer, part * 1, part);
+  Bitmap bm2(arrow_buffer, part * 2, part);
+
+  std::array<Bitmap, 2> out_bms;
+  if (with_offset) {
+    ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4));
+    out_bms[0] = Bitmap(out, part, part);
+    out_bms[1] = Bitmap(out, part * 2, part);
+  } else {
+    ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part));
+    ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part));
+    out_bms[0] = Bitmap(out0, 0, part);
+    out_bms[1] = Bitmap(out1, 0, part);
+  }
+
+  // out0 = bm0 & bm1, out1= bm0 | bm2
+  std::array<Bitmap, 3> in_bms{bm0, bm1, bm2};
+  Bitmap::VisitWordsAndWrite(
+      in_bms, &out_bms,
+      [](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
+        out->at(0) = in[0] & in[1];
+        out->at(1) = in[0] | in[2];
+      });
+
+  auto pool = MemoryPool::CreateDefault();
+  ASSERT_OK_AND_ASSIGN(auto exp_0,
+                       BitmapAnd(pool.get(), bm0.buffer()->data(), bm0.offset(),
+                                 bm1.buffer()->data(), bm1.offset(), part, 0));
+  ASSERT_OK_AND_ASSIGN(auto exp_1,
+                       BitmapOr(pool.get(), bm0.buffer()->data(), bm0.offset(),
+                                bm2.buffer()->data(), bm2.offset(), part, 0));
+
+  ASSERT_TRUE(BitmapEquals(exp_0->data(), 0, out_bms[0].buffer()->data(),
+                           out_bms[0].offset(), part))
+      << "exp: " << Bitmap(exp_0->data(), 0, part).ToString() << std::endl
+      << "got: " << out_bms[0].ToString();
+
+  ASSERT_TRUE(BitmapEquals(exp_1->data(), 0, out_bms[1].buffer()->data(),
+                           out_bms[1].offset(), part))
+      << "exp: " << Bitmap(exp_1->data(), 0, part).ToString() << std::endl
+      << "got: " << out_bms[1].ToString();
+}
+
+class TestBitmapVisitAndWrite : public ::testing::TestWithParam<int32_t> {};
+
+INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWrite,
+                         testing::Values(199, 256, 1000));
+
+INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWrite,
+                         testing::Values(5, 13, 21, 29, 37, 41, 51, 59, 64, 97));
+
+INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWrite,
+                         testing::Values(8, 16, 24, 32, 40, 48, 56, 64));
+
+TEST_P(TestBitmapVisitAndWrite, NoOffset) { DoBitmapVisitAndWrite(GetParam(), false); }
+
+TEST_P(TestBitmapVisitAndWrite, WithOffset) { DoBitmapVisitAndWrite(GetParam(), true); }
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h
index 8562c55e3d5..141f863c0b8 100644
--- a/cpp/src/arrow/util/bitmap.h
+++ b/cpp/src/arrow/util/bitmap.h
@@ -29,6 +29,9 @@
 
 #include "arrow/buffer.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/bitmap_writer.h"
 #include "arrow/util/compare.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/functional.h"
@@ -109,6 +112,21 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     }
   }
 
+  /// \brief Visit bits from each bitmap as bitset<N>
+  ///
+  /// All bitmaps must have identical length.
+  template <size_t N, typename Visitor>
+  static void VisitBits(const std::array<Bitmap, N>& bitmaps, Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps);
+    std::bitset<N> bits;
+    for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
+      for (size_t i = 0; i < N; ++i) {
+        bits[i] = bitmaps[i].GetBit(bit_i);
+      }
+      visitor(bits);
+    }
+  }
+
   /// \brief Visit words of bits from each bitmap as array<Word, N>
   ///
   /// All bitmaps must have identical length. The first bit in a visited bitmap
@@ -225,6 +243,132 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
     return min_offset;
   }
 
+  template <size_t N, size_t M, typename ReaderT, typename WriterT, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void RunVisitWordsAndWriteLoop(int64_t bit_length,
+                                        std::array<ReaderT, N>& readers,
+                                        std::array<WriterT, M>& writers,
+                                        Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    std::array<Word, N> visited_words;
+    std::array<Word, M> output_words;
+
+    // every reader will have same number of words, since they are same length'ed
+    // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond
+    //  Word boundary, every Word would have to be created from 2 adjoining Words
+    auto n_words = readers[0].words();
+    bit_length -= n_words * kBitWidth;
+    while (n_words--) {
+      // first collect all words to visited_words array
+      for (size_t i = 0; i < N; i++) {
+        visited_words[i] = readers[i].NextWord();
+      }
+      visitor(visited_words, &output_words);
+      for (size_t i = 0; i < M; i++) {
+        writers[i].PutNextWord(output_words[i]);
+      }
+    }
+
+    // every reader will have same number of trailing bytes, because of the above reason
+    // tailing portion could be more than one word! (ref: BitmapWordReader constructor)
+    // remaining full/ partial words to write
+
+    if (bit_length) {
+      // convert the word visitor lambda to a byte_visitor
+      auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+                              std::array<uint8_t, M>* out) {
+        std::array<Word, N> in_words;
+        std::array<Word, M> out_words;
+        std::copy(in.begin(), in.end(), in_words.begin());
+        visitor(in_words, &out_words);
+        for (size_t i = 0; i < M; i++) {
+          out->at(i) = static_cast<uint8_t>(out_words[i]);
+        }
+      };
+
+      std::array<uint8_t, N> visited_bytes;
+      std::array<uint8_t, M> output_bytes;
+      int n_bytes = readers[0].trailing_bytes();
+      while (n_bytes--) {
+        visited_bytes.fill(0);
+        output_bytes.fill(0);
+        int valid_bits;
+        for (size_t i = 0; i < N; i++) {
+          visited_bytes[i] = readers[i].NextTrailingByte(valid_bits);
+        }
+        byte_visitor(visited_bytes, &output_bytes);
+        for (size_t i = 0; i < M; i++) {
+          writers[i].PutNextTrailingByte(output_bytes[i], valid_bits);
+        }
+      }
+    }
+  }
+
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    // if both input and output bitmaps have no byte offset, then use special template
+    if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; }) &&
+        std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; })) {
+      std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>(
+            in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>(
+            out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    } else {
+      std::array<BitmapWordReader<Word>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word>(in_bitmap.buffer_->data(), in_bitmap.offset_,
+                                            in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word>(out_bitmap.buffer_->mutable_data(),
+                                            out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    }
+  }
+
   const std::shared_ptr<Buffer>& buffer() const { return buffer_; }
 
   /// offset of first bit relative to buffer().data()
@@ -301,6 +445,14 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
   /// assert bitmaps have identical length and return that length
   static int64_t BitLength(const Bitmap* bitmaps, size_t N);
 
+  template <size_t N>
+  static int64_t BitLength(const std::array<Bitmap, N>& bitmaps) {
+    for (size_t i = 1; i < N; ++i) {
+      assert(bitmaps[i].length() == bitmaps[0].length());
+    }
+    return bitmaps[0].length();
+  }
+
   std::shared_ptr<Buffer> buffer_;
   int64_t offset_ = 0, length_ = 0;
 };
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index a27a61cadf3..63c8b008f4a 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -28,9 +28,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_reader.h"
 #include "arrow/util/bitmap_writer.h"
-#include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace internal {
@@ -85,222 +83,6 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
   return count;
 }
 
-namespace {
-
-// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
-// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
-// and should probably not be used for small bitmaps.
-
-template <typename Word>
-class BitmapWordReader {
- public:
-  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) {
-    bitmap_ = bitmap + offset / 8;
-    offset_ = offset % 8;
-    bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length);
-
-    // decrement word count by one as we may touch two adjacent words in one iteration
-    nwords_ = length / (sizeof(Word) * 8) - 1;
-    if (nwords_ < 0) {
-      nwords_ = 0;
-    }
-    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8);
-    trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_));
-
-    if (nwords_ > 0) {
-      current_word_ = load<Word>(bitmap_);
-    } else if (length > 0) {
-      current_byte_ = load<uint8_t>(bitmap_);
-    }
-  }
-
-  Word NextWord() {
-    bitmap_ += sizeof(Word);
-    const Word next_word = load<Word>(bitmap_);
-    Word word = current_word_;
-    if (offset_) {
-      // combine two adjacent words into one word
-      // |<------ next ----->|<---- current ---->|
-      // +-------------+-----+-------------+-----+
-      // |     ---     |  A  |      B      | --- |
-      // +-------------+-----+-------------+-----+
-      //                  |         |       offset
-      //                  v         v
-      //               +-----+-------------+
-      //               |  A  |      B      |
-      //               +-----+-------------+
-      //               |<------ word ----->|
-      word >>= offset_;
-      word |= next_word << (sizeof(Word) * 8 - offset_);
-    }
-    current_word_ = next_word;
-    return word;
-  }
-
-  uint8_t NextTrailingByte(int& valid_bits) {
-    uint8_t byte;
-    DCHECK_GT(trailing_bits_, 0);
-
-    if (trailing_bits_ <= 8) {
-      // last byte
-      valid_bits = trailing_bits_;
-      trailing_bits_ = 0;
-      byte = 0;
-      internal::BitmapReader reader(bitmap_, offset_, valid_bits);
-      for (int i = 0; i < valid_bits; ++i) {
-        byte >>= 1;
-        if (reader.IsSet()) {
-          byte |= 0x80;
-        }
-        reader.Next();
-      }
-      byte >>= (8 - valid_bits);
-    } else {
-      ++bitmap_;
-      const uint8_t next_byte = load<uint8_t>(bitmap_);
-      byte = current_byte_;
-      if (offset_) {
-        byte >>= offset_;
-        byte |= next_byte << (8 - offset_);
-      }
-      current_byte_ = next_byte;
-      trailing_bits_ -= 8;
-      valid_bits = 8;
-    }
-    return byte;
-  }
-
-  int64_t words() const { return nwords_; }
-  int trailing_bytes() const { return trailing_bytes_; }
-
- private:
-  int64_t offset_;
-  const uint8_t* bitmap_;
-
-  const uint8_t* bitmap_end_;
-  int64_t nwords_;
-  int trailing_bits_;
-  int trailing_bytes_;
-  union {
-    Word current_word_;
-    struct {
-#if ARROW_LITTLE_ENDIAN == 0
-      uint8_t padding_bytes_[sizeof(Word) - 1];
-#endif
-      uint8_t current_byte_;
-    };
-  };
-
-  template <typename DType>
-  DType load(const uint8_t* bitmap) {
-    DCHECK_LE(bitmap + sizeof(DType), bitmap_end_);
-    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
-  }
-};
-
-template <typename Word>
-class BitmapWordWriter {
- public:
-  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) {
-    bitmap_ = bitmap + offset / 8;
-    offset_ = offset % 8;
-    bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length);
-    mask_ = (1U << offset_) - 1;
-
-    if (offset_) {
-      if (length >= static_cast<int>(sizeof(Word) * 8)) {
-        current_word_ = load<Word>(bitmap_);
-      } else if (length > 0) {
-        current_byte_ = load<uint8_t>(bitmap_);
-      }
-    }
-  }
-
-  void PutNextWord(Word word) {
-    if (offset_) {
-      // split one word into two adjacent words, don't touch unused bits
-      //               |<------ word ----->|
-      //               +-----+-------------+
-      //               |  A  |      B      |
-      //               +-----+-------------+
-      //                  |         |
-      //                  v         v       offset
-      // +-------------+-----+-------------+-----+
-      // |     ---     |  A  |      B      | --- |
-      // +-------------+-----+-------------+-----+
-      // |<------ next ----->|<---- current ---->|
-      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
-      Word next_word = load<Word>(bitmap_ + sizeof(Word));
-      current_word_ = (current_word_ & mask_) | (word & ~mask_);
-      next_word = (next_word & ~mask_) | (word & mask_);
-      store<Word>(bitmap_, current_word_);
-      store<Word>(bitmap_ + sizeof(Word), next_word);
-      current_word_ = next_word;
-    } else {
-      store<Word>(bitmap_, word);
-    }
-    bitmap_ += sizeof(Word);
-  }
-
-  void PutNextTrailingByte(uint8_t byte, int valid_bits) {
-    if (valid_bits == 8) {
-      if (offset_) {
-        byte = (byte << offset_) | (byte >> (8 - offset_));
-        uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
-        current_byte_ = (current_byte_ & mask_) | (byte & ~mask_);
-        next_byte = (next_byte & ~mask_) | (byte & mask_);
-        store<uint8_t>(bitmap_, current_byte_);
-        store<uint8_t>(bitmap_ + 1, next_byte);
-        current_byte_ = next_byte;
-      } else {
-        store<uint8_t>(bitmap_, byte);
-      }
-      ++bitmap_;
-    } else {
-      DCHECK_GT(valid_bits, 0);
-      DCHECK_LT(valid_bits, 8);
-      DCHECK_LE(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits), bitmap_end_);
-      internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
-      for (int i = 0; i < valid_bits; ++i) {
-        (byte & 0x01) ? writer.Set() : writer.Clear();
-        writer.Next();
-        byte >>= 1;
-      }
-      writer.Finish();
-    }
-  }
-
- private:
-  int64_t offset_;
-  uint8_t* bitmap_;
-
-  const uint8_t* bitmap_end_;
-  uint64_t mask_;
-  union {
-    Word current_word_;
-    struct {
-#if ARROW_LITTLE_ENDIAN == 0
-      uint8_t padding_bytes_[sizeof(Word) - 1];
-#endif
-      uint8_t current_byte_;
-    };
-  };
-
-  template <typename DType>
-  DType load(const uint8_t* bitmap) {
-    DCHECK_LE(bitmap + sizeof(DType), bitmap_end_);
-    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
-  }
-
-  template <typename DType>
-  void store(uint8_t* bitmap, DType data) {
-    DCHECK_LE(bitmap + sizeof(DType), bitmap_end_);
-    util::SafeStore(bitmap, BitUtil::FromLittleEndian(data));
-  }
-};
-
-}  // namespace
-
 enum class TransferMode : bool { Copy, Invert };
 
 template <TransferMode mode>
diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h
index cf4f5e7db8b..7c43747fafb 100644
--- a/cpp/src/arrow/util/bitmap_reader.h
+++ b/cpp/src/arrow/util/bitmap_reader.h
@@ -142,6 +142,118 @@ class BitmapUInt64Reader {
   uint64_t carry_bits_;
 };
 
+// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
+// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
+// and should probably not be used for small bitmaps.
+
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordReader {
+ public:
+  BitmapWordReader() = default;
+  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)) {
+    // decrement word count by one as we may touch two adjacent words in one iteration
+    nwords_ = length / (sizeof(Word) * 8) - 1;
+    if (nwords_ < 0) {
+      nwords_ = 0;
+    }
+    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8);
+    trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_));
+
+    if (nwords_ > 0) {
+      current_word_ = load<Word>(bitmap_);
+    } else if (length > 0) {
+      current_byte_ = load<uint8_t>(bitmap_);
+    }
+  }
+
+  Word NextWord() {
+    bitmap_ += sizeof(Word);
+    const Word next_word = load<Word>(bitmap_);
+    Word word = current_word_;
+    if (may_have_byte_offset && offset_) {
+      // combine two adjacent words into one word
+      // |<------ next ----->|<---- current ---->|
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      //                  |         |       offset
+      //                  v         v
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //               |<------ word ----->|
+      word >>= offset_;
+      word |= next_word << (sizeof(Word) * 8 - offset_);
+    }
+    current_word_ = next_word;
+    return word;
+  }
+
+  uint8_t NextTrailingByte(int& valid_bits) {
+    uint8_t byte;
+    assert(trailing_bits_ > 0);
+
+    if (trailing_bits_ <= 8) {
+      // last byte
+      valid_bits = trailing_bits_;
+      trailing_bits_ = 0;
+      byte = 0;
+      internal::BitmapReader reader(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        byte >>= 1;
+        if (reader.IsSet()) {
+          byte |= 0x80;
+        }
+        reader.Next();
+      }
+      byte >>= (8 - valid_bits);
+    } else {
+      ++bitmap_;
+      const uint8_t next_byte = load<uint8_t>(bitmap_);
+      byte = current_byte_;
+      if (may_have_byte_offset && offset_) {
+        byte >>= offset_;
+        byte |= next_byte << (8 - offset_);
+      }
+      current_byte_ = next_byte;
+      trailing_bits_ -= 8;
+      trailing_bytes_--;
+      valid_bits = 8;
+    }
+    return byte;
+  }
+
+  int64_t words() const { return nwords_; }
+  int trailing_bytes() const { return trailing_bytes_; }
+
+ private:
+  int64_t offset_;
+  const uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  int64_t nwords_;
+  int trailing_bits_;
+  int trailing_bytes_;
+  union {
+    Word current_word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t current_byte_;
+    };
+  };
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+};
+
 /// \brief Index into a possibly non-existent bitmap
 struct OptionalBitIndexer {
   const uint8_t* bitmap;
@@ -151,7 +263,7 @@ struct OptionalBitIndexer {
       : bitmap(buffer == NULLPTR ? NULLPTR : buffer->data()), offset(offset) {}
 
   bool operator[](int64_t i) const {
-    return bitmap == NULLPTR ? true : BitUtil::GetBit(bitmap, offset + i);
+    return bitmap == NULLPTR || BitUtil::GetBit(bitmap, offset + i);
   }
 };
 
diff --git a/cpp/src/arrow/util/bitmap_reader_benchmark.cc b/cpp/src/arrow/util/bitmap_reader_benchmark.cc
new file mode 100644
index 00000000000..359653c9644
--- /dev/null
+++ b/cpp/src/arrow/util/bitmap_reader_benchmark.cc
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <bitset>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/util.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
+#include "benchmark/benchmark.h"
+
+namespace arrow {
+namespace BitUtil {
+
+using internal::BitBlockCount;
+using internal::BitBlockCounter;
+using internal::BitmapWordReader;
+
+const int64_t kBufferSize = 1024 * (std::rand() % 25 + 1000);
+
+// const int seed = std::rand();
+
+static std::shared_ptr<Buffer> CreateRandomBuffer(int64_t nbytes) {
+  auto buffer = *AllocateBuffer(nbytes);
+  memset(buffer->mutable_data(), 0, nbytes);
+  random_bytes(nbytes, /*seed=*/0, buffer->mutable_data());
+  return std::move(buffer);
+}
+
+static void BitBlockCounterBench(benchmark::State& state) {
+  int64_t nbytes = state.range(0);
+  std::shared_ptr<Buffer> cond_buf = CreateRandomBuffer(nbytes);
+  for (auto _ : state) {
+    BitBlockCounter counter(cond_buf->data(), 0, nbytes * 8);
+
+    int64_t offset = 0;
+    uint64_t set_bits = 0;
+
+    while (offset < nbytes * 8) {
+      const BitBlockCount& word = counter.NextWord();
+      //      if (word.AllSet()) {
+      //        set_bits += word.length;
+      //      } else if (word.popcount) {
+      //        set_bits += word.popcount;
+      //      }
+      set_bits += word.popcount;
+      benchmark::DoNotOptimize(set_bits);
+      offset += word.length;
+    }
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(state.iterations() * nbytes);
+}
+
+static void BitmapWordReaderBench(benchmark::State& state) {
+  int64_t nbytes = state.range(0);
+  std::shared_ptr<Buffer> cond_buf = CreateRandomBuffer(nbytes);
+  for (auto _ : state) {
+    BitmapWordReader<uint64_t> counter(cond_buf->data(), 0, nbytes * 8);
+
+    int64_t set_bits = 0;
+
+    int64_t cnt = counter.words();
+    while (cnt--) {
+      const auto& word = counter.NextWord();
+      //      if (word == UINT64_MAX) {
+      //        set_bits += sizeof(uint64_t) * 8;
+      //      } else if (word) {
+      //        set_bits += PopCount(word);
+      //      }
+      set_bits += PopCount(word);
+      benchmark::DoNotOptimize(set_bits);
+    }
+
+    cnt = counter.trailing_bytes();
+    while (cnt--) {
+      int valid_bits;
+      const auto& byte = static_cast<uint32_t>(counter.NextTrailingByte(valid_bits));
+      set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte);
+      benchmark::DoNotOptimize(set_bits);
+    }
+    benchmark::ClobberMemory();
+  }
+  state.SetBytesProcessed(state.iterations() * nbytes);
+}
+
+BENCHMARK(BitBlockCounterBench)->Arg(kBufferSize);
+BENCHMARK(BitmapWordReaderBench)->Arg(kBufferSize);
+
+}  // namespace BitUtil
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h
index d4f02f37a41..d5c6d909df0 100644
--- a/cpp/src/arrow/util/bitmap_writer.h
+++ b/cpp/src/arrow/util/bitmap_writer.h
@@ -180,5 +180,106 @@ class FirstTimeBitmapWriter {
   int64_t byte_offset_;
 };
 
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordWriter {
+ public:
+  BitmapWordWriter() = default;
+  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)),
+        mask_((1U << offset_) - 1) {
+    if (offset_) {
+      if (length >= static_cast<int>(sizeof(Word) * 8)) {
+        current_word_ = load<Word>(bitmap_);
+      } else if (length > 0) {
+        current_byte_ = load<uint8_t>(bitmap_);
+      }
+    }
+  }
+
+  void PutNextWord(Word word) {
+    if (may_have_byte_offset && offset_) {
+      // split one word into two adjacent words, don't touch unused bits
+      //               |<------ word ----->|
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //                  |         |
+      //                  v         v       offset
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      // |<------ next ----->|<---- current ---->|
+      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
+      Word next_word = load<Word>(bitmap_ + sizeof(Word));
+      current_word_ = (current_word_ & mask_) | (word & ~mask_);
+      next_word = (next_word & ~mask_) | (word & mask_);
+      store<Word>(bitmap_, current_word_);
+      store<Word>(bitmap_ + sizeof(Word), next_word);
+      current_word_ = next_word;
+    } else {
+      store<Word>(bitmap_, word);
+    }
+    bitmap_ += sizeof(Word);
+  }
+
+  void PutNextTrailingByte(uint8_t byte, int valid_bits) {
+    if (valid_bits == 8) {
+      if (may_have_byte_offset && offset_) {
+        byte = (byte << offset_) | (byte >> (8 - offset_));
+        uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
+        current_byte_ = (current_byte_ & mask_) | (byte & ~mask_);
+        next_byte = (next_byte & ~mask_) | (byte & mask_);
+        store<uint8_t>(bitmap_, current_byte_);
+        store<uint8_t>(bitmap_ + 1, next_byte);
+        current_byte_ = next_byte;
+      } else {
+        store<uint8_t>(bitmap_, byte);
+      }
+      ++bitmap_;
+    } else {
+      assert(valid_bits > 0);
+      assert(valid_bits < 8);
+      assert(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits) <= bitmap_end_);
+      internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        (byte & 0x01) ? writer.Set() : writer.Clear();
+        writer.Next();
+        byte >>= 1;
+      }
+      writer.Finish();
+    }
+  }
+
+ private:
+  int64_t offset_;
+  uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  uint64_t mask_;
+  union {
+    Word current_word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t current_byte_;
+    };
+  };
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return BitUtil::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+
+  template <typename DType>
+  void store(uint8_t* bitmap, DType data) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    util::SafeStore(bitmap, BitUtil::FromLittleEndian(data));
+  }
+};
+
 }  // namespace internal
 }  // namespace arrow

From 6bc94da86437646ddcf4902f806ebf36efe36a24 Mon Sep 17 00:00:00 2001
From: Steven Burns <royalstream@hotmail.com>
Date: Wed, 30 Jun 2021 09:58:23 -0500
Subject: [PATCH 482/719] MINOR: [C#] Fixing example to use WriteEndAsync
 instead of WriteFooterAsync

WriteFooterAsync is private, so the example doesn't compile.
This method was probably public in an earlier version of the library.
WriteEndAsync seems to be the proper replacement.

Closes #10399 from royalstream/patch-1

Authored-by: Steven Burns <royalstream@hotmail.com>
Signed-off-by: Eric Erhardt <eric.erhardt@microsoft.com>
---
 csharp/examples/FluentBuilderExample/Program.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/examples/FluentBuilderExample/Program.cs b/csharp/examples/FluentBuilderExample/Program.cs
index a55f8419518..6dbdc3d778e 100644
--- a/csharp/examples/FluentBuilderExample/Program.cs
+++ b/csharp/examples/FluentBuilderExample/Program.cs
@@ -51,7 +51,7 @@ public static async Task Main(string[] args)
             using (var writer = new ArrowFileWriter(stream, recordBatch.Schema))
             {
                 await writer.WriteRecordBatchAsync(recordBatch);
-                await writer.WriteFooterAsync();
+                await writer.WriteEndAsync();
             }
 
             Console.WriteLine("Done");

From 01f3338f7cf8dccf38602842a9ade3b0f840cc10 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 30 Jun 2021 18:30:37 +0200
Subject: [PATCH 483/719] ARROW-13095: [C++] Implement trig compute functions

Adds sin/cos/tan and their inverses. Checked variants check for what would be domain errors (this does not apply to atan/atan2).

Closes #10544 from lidavidm/arrow-13095

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_scalar.cc           |   7 +
 cpp/src/arrow/compute/api_scalar.h            |  61 +++
 .../arrow/compute/kernels/codegen_internal.h  |  14 +-
 .../compute/kernels/scalar_arithmetic.cc      | 360 ++++++++++++++++--
 .../compute/kernels/scalar_arithmetic_test.cc | 106 ++++++
 .../arrow/compute/kernels/scalar_compare.cc   |  45 ++-
 cpp/src/arrow/compute/kernels/util_internal.h |  12 +
 docs/source/cpp/compute.rst                   |  34 ++
 docs/source/python/api/compute.rst            |  22 ++
 9 files changed, 607 insertions(+), 54 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index f005e70e348..20bba982a74 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -49,6 +49,12 @@ namespace compute {
 
 SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
 SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
+SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
+SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
+SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
+SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
+SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
+SCALAR_EAGER_UNARY(Atan, "atan")
 
 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)           \
   Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
@@ -64,6 +70,7 @@ SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
 SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
 SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked")
 SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked")
+SCALAR_EAGER_BINARY(Atan2, "atan2")
 
 Result<Datum> MaxElementWise(const std::vector<Datum>& args,
                              ElementWiseAggregateOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index b1013257401..2ec9c1d7653 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -335,6 +335,67 @@ Result<Datum> ShiftRight(const Datum& left, const Datum& right,
                          ArithmeticOptions options = ArithmeticOptions(),
                          ExecContext* ctx = NULLPTR);
 
+/// \brief Compute the sine of the array values.
+/// \param[in] arg The values to compute the sine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise sine of the values
+ARROW_EXPORT
+Result<Datum> Sin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the cosine of the array values.
+/// \param[in] arg The values to compute the cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise cosine of the values
+ARROW_EXPORT
+Result<Datum> Cos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse sine (arcsine) of the array values.
+/// \param[in] arg The values to compute the inverse sine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse sine of the values
+ARROW_EXPORT
+Result<Datum> Asin(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse cosine (arccosine) of the array values.
+/// \param[in] arg The values to compute the inverse cosine for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse cosine of the values
+ARROW_EXPORT
+Result<Datum> Acos(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the tangent of the array values.
+/// \param[in] arg The values to compute the tangent for.
+/// \param[in] options arithmetic options (enable/disable overflow checking), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise tangent of the values
+ARROW_EXPORT
+Result<Datum> Tan(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                  ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse tangent (arctangent) of the array values.
+/// \param[in] arg The values to compute the inverse tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse tangent of the values
+ARROW_EXPORT
+Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the inverse tangent (arctangent) of y/x, using the
+/// argument signs to determine the correct quadrant.
+/// \param[in] y The y-values to compute the inverse tangent for.
+/// \param[in] x The x-values to compute the inverse tangent for.
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise inverse tangent of the values
+ARROW_EXPORT
+Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);
+
 /// \brief Find the element-wise maximum of any number of arrays or scalars.
 /// Array values must be the same length.
 ///
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 140f9fdc669..a68bb970b4a 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -826,7 +826,8 @@ struct ScalarBinary {
     ArrayIterator<Arg0Type> arg0_it(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
     RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_it(), arg1_it(), &st);
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_it(),
+                                                               &st);
     }));
     return st;
   }
@@ -837,7 +838,8 @@ struct ScalarBinary {
     ArrayIterator<Arg0Type> arg0_it(arg0);
     auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
     RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_it(), arg1_val, &st);
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_it(), arg1_val,
+                                                               &st);
     }));
     return st;
   }
@@ -848,7 +850,8 @@ struct ScalarBinary {
     auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
     ArrayIterator<Arg1Type> arg1_it(arg1);
     RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue {
-      return Op::template Call(ctx, arg0_val, arg1_it(), &st);
+      return Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_it(),
+                                                               &st);
     }));
     return st;
   }
@@ -859,8 +862,9 @@ struct ScalarBinary {
     if (out->scalar()->is_valid) {
       auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
       auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(Op::template Call(ctx, arg0_val, arg1_val, &st),
-                              out->scalar().get());
+      BoxScalar<OutType>::Box(
+          Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
+          out->scalar().get());
     }
     return st;
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index ef9ef78054a..da3a3095041 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -21,6 +21,7 @@
 #include <utility>
 
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/util_internal.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
@@ -58,12 +59,12 @@ using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, T>;
 template <typename T>
 using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, T>;
 
-template <typename T>
+template <typename T, typename R = T>
 using enable_if_integer =
-    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, T>;
+    enable_if_t<is_signed_integer<T>::value || is_unsigned_integer<T>::value, R>;
 
-template <typename T>
-using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
+template <typename T, typename R = T>
+using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, R>;
 
 template <typename T>
 using enable_if_decimal =
@@ -117,20 +118,20 @@ struct AbsoluteValueChecked {
 };
 
 struct Add {
-  template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
                                                     Status*) {
     return left + right;
   }
 
-  template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right,
-                                                      Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left,
+                                                      Arg1 right, Status*) {
     return left + right;
   }
 
-  template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right,
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right,
                                                     Status*) {
     return arrow::internal::SafeSignedAdd(left, right);
   }
@@ -166,21 +167,24 @@ struct AddChecked {
 };
 
 struct Subtract {
-  template <typename T>
-  static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 left, Arg1 right,
                                                     Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
 
-  template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right,
-                                                      Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg0 left,
+                                                      Arg1 right, Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return left - right;
   }
 
-  template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right,
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 left, Arg1 right,
                                                     Status*) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value, "");
     return arrow::internal::SafeSignedSubtract(left, right);
   }
 
@@ -224,21 +228,23 @@ struct Multiply {
   static_assert(std::is_same<decltype(int64_t() * int64_t()), int64_t>::value, "");
   static_assert(std::is_same<decltype(uint64_t() * uint64_t()), uint64_t>::value, "");
 
-  template <typename T>
+  template <typename T, typename Arg0, typename Arg1>
   static constexpr enable_if_floating_point<T> Call(KernelContext*, T left, T right,
                                                     Status*) {
     return left * right;
   }
 
-  template <typename T>
-  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, T left, T right,
-                                                      Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_t<
+      is_unsigned_integer<T>::value && !std::is_same<T, uint16_t>::value, T>
+  Call(KernelContext*, T left, T right, Status*) {
     return left * right;
   }
 
-  template <typename T>
-  static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T right,
-                                                    Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_t<
+      is_signed_integer<T>::value && !std::is_same<T, int16_t>::value, T>
+  Call(KernelContext*, T left, T right, Status*) {
     return to_unsigned(left) * to_unsigned(right);
   }
 
@@ -246,12 +252,14 @@ struct Multiply {
   // integer. However, some inputs may nevertheless overflow (which triggers undefined
   // behaviour). Therefore we first cast to 32 bit unsigned integers where overflow is
   // well defined.
-  template <typename T = void>
-  static constexpr int16_t Call(KernelContext*, int16_t left, int16_t right, Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_same<T, int16_t, T> Call(KernelContext*, int16_t left,
+                                                      int16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
-  template <typename T = void>
-  static constexpr uint16_t Call(KernelContext*, uint16_t left, uint16_t right, Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr enable_if_same<T, uint16_t, T> Call(KernelContext*, uint16_t left,
+                                                       uint16_t right, Status*) {
     return static_cast<uint32_t>(left) * static_cast<uint32_t>(right);
   }
 
@@ -405,7 +413,7 @@ struct Power {
     return pow;
   }
 
-  template <typename T>
+  template <typename T, typename Arg0, typename Arg1>
   static enable_if_integer<T> Call(KernelContext*, T base, T exp, Status* st) {
     if (exp < 0) {
       *st = Status::Invalid("integers to negative integer powers are not allowed");
@@ -414,7 +422,7 @@ struct Power {
     return static_cast<T>(IntegerPower(base, exp));
   }
 
-  template <typename T>
+  template <typename T, typename Arg0, typename Arg1>
   static enable_if_floating_point<T> Call(KernelContext*, T base, T exp, Status*) {
     return std::pow(base, exp);
   }
@@ -554,6 +562,130 @@ struct ShiftRightChecked {
   }
 };
 
+struct Sin {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::sin(val);
+  }
+};
+
+struct SinChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::sin(val);
+  }
+};
+
+struct Cos {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::cos(val);
+  }
+};
+
+struct CosChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::cos(val);
+  }
+};
+
+struct Tan {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::tan(val);
+  }
+};
+
+struct TanChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(std::isinf(val))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    // Cannot raise range errors (overflow) since PI/2 is not exactly representable
+    return std::tan(val);
+  }
+};
+
+struct Asin {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::asin(val);
+  }
+};
+
+struct AsinChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE(val < -1.0 || val > 1.0)) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::asin(val);
+  }
+};
+
+struct Acos {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::acos(val);
+  }
+};
+
+struct AcosChecked {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status* st) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) {
+      *st = Status::Invalid("domain error");
+      return val;
+    }
+    return std::acos(val);
+  }
+};
+
+struct Atan {
+  template <typename T, typename Arg0>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 val, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    return std::atan(val);
+  }
+};
+
+struct Atan2 {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<Arg0, T> Call(KernelContext*, Arg0 y, Arg1 x, Status*) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    static_assert(std::is_same<Arg0, Arg1>::value, "");
+    return std::atan2(y, x);
+  }
+};
+
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
 ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
@@ -633,6 +765,19 @@ ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 Status CastBinaryDecimalArgs(const std::string& func_name,
                              std::vector<ValueDescr>* values) {
   auto& left_type = (*values)[0].type;
@@ -904,6 +1049,42 @@ std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name,
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPoint(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarUnary, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, output, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPointNotNull(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarUnaryNotNull, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, output, exec));
+  }
+  return func;
+}
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeArithmeticFunctionFloatingPoint(
+    std::string name, const FunctionDoc* doc) {
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  for (const auto& ty : FloatingPointTypes()) {
+    auto output = is_integer(ty->id()) ? float64() : ty;
+    auto exec = GenerateArithmeticFloatingPoint<ScalarBinaryEqualTypes, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty, ty}, output, exec));
+  }
+  return func;
+}
+
 const FunctionDoc absolute_value_doc{
     "Calculate the absolute value of the argument element-wise",
     ("Results will wrap around on integer overflow.\n"
@@ -1041,6 +1222,79 @@ const FunctionDoc shift_right_checked_doc{
      "See \"shift_right\" for a variant that doesn't fail for an invalid shift amount"),
     {"x", "y"}};
 
+const FunctionDoc sin_doc{"Compute the sine of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"sin_checked\"."),
+                          {"x"}};
+
+const FunctionDoc sin_checked_doc{
+    "Compute the sine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"sin\"."),
+    {"x"}};
+
+const FunctionDoc cos_doc{"Compute the cosine of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"cos_checked\"."),
+                          {"x"}};
+
+const FunctionDoc cos_checked_doc{
+    "Compute the cosine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"cos\"."),
+    {"x"}};
+
+const FunctionDoc tan_doc{"Compute the tangent of the elements argument-wise",
+                          ("Integer arguments return double values. "
+                           "This function returns NaN on values outside its domain. "
+                           "To raise an error instead, see \"tan_checked\"."),
+                          {"x"}};
+
+const FunctionDoc tan_checked_doc{
+    "Compute the tangent of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"tan\"."),
+    {"x"}};
+
+const FunctionDoc asin_doc{"Compute the inverse sine of the elements argument-wise",
+                           ("Integer arguments return double values. "
+                            "This function returns NaN on values outside its domain. "
+                            "To raise an error instead, see \"asin_checked\"."),
+                           {"x"}};
+
+const FunctionDoc asin_checked_doc{
+    "Compute the inverse sine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"asin\"."),
+    {"x"}};
+
+const FunctionDoc acos_doc{"Compute the inverse cosine of the elements argument-wise",
+                           ("Integer arguments return double values. "
+                            "This function returns NaN on values outside its domain. "
+                            "To raise an error instead, see \"acos_checked\"."),
+                           {"x"}};
+
+const FunctionDoc acos_checked_doc{
+    "Compute the inverse cosine of the elements argument-wise",
+    ("Integer arguments return double values. "
+     "This function raises an error on values outside its domain. "
+     "To return NaN instead, see \"acos\"."),
+    {"x"}};
+
+const FunctionDoc atan_doc{"Compute the principal value of the inverse tangent",
+                           ("Integer arguments return double values."),
+                           {"x"}};
+
+const FunctionDoc atan2_doc{
+    "Compute the inverse tangent using argument signs to determine the quadrant",
+    ("Integer arguments return double values."),
+    {"y", "x"}};
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -1126,6 +1380,7 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(power_checked)));
 
   // ----------------------------------------------------------------------
+  // Bitwise functions
   {
     auto bit_wise_not = std::make_shared<ArithmeticFunction>(
         "bit_wise_not", Arity::Unary(), &bit_wise_not_doc);
@@ -1162,6 +1417,49 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   auto shift_right_checked = MakeShiftFunctionNotNull<ShiftRightChecked>(
       "shift_right_checked", &shift_right_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(shift_right_checked)));
+
+  // ----------------------------------------------------------------------
+  // Trig functions
+  auto sin = MakeUnaryArithmeticFunctionFloatingPoint<Sin>("sin", &sin_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sin)));
+
+  auto sin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<SinChecked>(
+      "sin_checked", &sin_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sin_checked)));
+
+  auto cos = MakeUnaryArithmeticFunctionFloatingPoint<Cos>("cos", &cos_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cos)));
+
+  auto cos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<CosChecked>(
+      "cos_checked", &cos_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(cos_checked)));
+
+  auto tan = MakeUnaryArithmeticFunctionFloatingPoint<Tan>("tan", &tan_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tan)));
+
+  auto tan_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<TanChecked>(
+      "tan_checked", &tan_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(tan_checked)));
+
+  auto asin = MakeUnaryArithmeticFunctionFloatingPoint<Asin>("asin", &asin_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asin)));
+
+  auto asin_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AsinChecked>(
+      "asin_checked", &asin_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(asin_checked)));
+
+  auto acos = MakeUnaryArithmeticFunctionFloatingPoint<Acos>("acos", &acos_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acos)));
+
+  auto acos_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<AcosChecked>(
+      "acos_checked", &acos_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(acos_checked)));
+
+  auto atan = MakeUnaryArithmeticFunctionFloatingPoint<Atan>("atan", &atan_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atan)));
+
+  auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", &atan2_doc);
+  DCHECK_OK(registry->AddFunction(std::move(atan2)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index a94eabb1be0..ed24a44484f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -16,6 +16,8 @@
 // under the License.
 
 #include <algorithm>
+#define _USE_MATH_DEFINES
+#include <cmath>
 #include <memory>
 #include <string>
 #include <type_traits>
@@ -90,6 +92,12 @@ class TestUnaryArithmetic : public TestBase {
   void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
                      const std::string& expected_json) {
     const auto expected = ArrayFromJSON(type_singleton(), expected_json);
+    return AssertUnaryOp(func, arg, expected);
+  }
+
+  // (Array)
+  void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
+                     const std::shared_ptr<Array>& expected) {
     ASSERT_OK_AND_ASSIGN(Datum actual, func(arg, options_, nullptr));
     ValidateAndAssertApproxEqual(actual.make_array(), expected);
 
@@ -108,6 +116,11 @@ class TestUnaryArithmetic : public TestBase {
     auto arg = ArrayFromJSON(type_singleton(), argument);
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(expected_msg),
                                     func(arg, options_, nullptr));
+    for (int64_t i = 0; i < arg->length(); i++) {
+      ASSERT_OK_AND_ASSIGN(auto scalar, arg->GetScalar(i));
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr(expected_msg),
+                                      func(scalar, options_, nullptr));
+    }
   }
 
   void AssertUnaryOpNotImplemented(UnaryFunction func, const std::string& argument) {
@@ -232,6 +245,12 @@ class TestBinaryArithmetic : public TestBase {
                    const std::shared_ptr<Array>& right,
                    const std::string& expected_json) {
     const auto expected = ArrayFromJSON(type_singleton(), expected_json);
+    AssertBinop(func, left, right, expected);
+  }
+
+  void AssertBinop(BinaryFunction func, const std::shared_ptr<Array>& left,
+                   const std::shared_ptr<Array>& right,
+                   const std::shared_ptr<Array>& expected) {
     ASSERT_OK_AND_ASSIGN(Datum actual, func(left, right, options_, nullptr));
     ValidateAndAssertApproxEqual(actual.make_array(), expected);
 
@@ -1715,5 +1734,92 @@ TYPED_TEST(TestBinaryArithmeticUnsigned, ShiftRightOverflowRaises) {
                           "shift amount must be >= 0 and less than precision of type");
 }
 
+TYPED_TEST(TestUnaryArithmeticFloating, TrigSin) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Sin, "[Inf, -Inf]", "[NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Sin, "[]", "[]");
+    this->AssertUnaryOp(Sin, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Sin, MakeArray(0, M_PI_2, M_PI), "[0, 1, 0]");
+  }
+  this->AssertUnaryOpRaises(Sin, "[Inf, -Inf]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigCos) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Cos, "[Inf, -Inf]", "[NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Cos, "[]", "[]");
+    this->AssertUnaryOp(Cos, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Cos, MakeArray(0, M_PI_2, M_PI), "[1, 0, -1]");
+  }
+  this->AssertUnaryOpRaises(Cos, "[Inf, -Inf]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigTan) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Tan, "[Inf, -Inf]", "[NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Tan, "[]", "[]");
+    this->AssertUnaryOp(Tan, "[null, NaN]", "[null, NaN]");
+    // N.B. pi/2 isn't representable exactly -> there are no poles
+    // (i.e. tan(pi/2) is merely a large value and not +Inf)
+    this->AssertUnaryOp(Tan, MakeArray(0, M_PI), "[0, 0]");
+  }
+  this->AssertUnaryOpRaises(Tan, "[Inf, -Inf]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigAsin) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Asin, "[Inf, -Inf, -2, 2]", "[NaN, NaN, NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Asin, "[]", "[]");
+    this->AssertUnaryOp(Asin, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Asin, "[0, 1, -1]", MakeArray(0, M_PI_2, -M_PI_2));
+  }
+  this->AssertUnaryOpRaises(Asin, "[Inf, -Inf, -2, 2]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigAcos) {
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(Asin, "[Inf, -Inf, -2, 2]", "[NaN, NaN, NaN, NaN]");
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Acos, "[]", "[]");
+    this->AssertUnaryOp(Acos, "[null, NaN]", "[null, NaN]");
+    this->AssertUnaryOp(Acos, "[0, 1, -1]", MakeArray(M_PI_2, 0, M_PI));
+  }
+  this->AssertUnaryOpRaises(Acos, "[Inf, -Inf, -2, 2]", "domain error");
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, TrigAtan) {
+  this->SetNansEqual(true);
+  auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Atan(arg, ctx);
+  };
+  this->AssertUnaryOp(atan, "[]", "[]");
+  this->AssertUnaryOp(atan, "[null, NaN]", "[null, NaN]");
+  this->AssertUnaryOp(atan, "[0, 1, -1, Inf, -Inf]",
+                      MakeArray(0, M_PI_4, -M_PI_4, M_PI_2, -M_PI_2));
+}
+
+TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) {
+  this->SetNansEqual(true);
+  auto atan2 = [](const Datum& y, const Datum& x, ArithmeticOptions, ExecContext* ctx) {
+    return Atan2(y, x, ctx);
+  };
+  this->AssertBinop(atan2, "[]", "[]", "[]");
+  this->AssertBinop(atan2, "[0, 0, null, NaN]", "[null, NaN, 0, 0]",
+                    "[null, NaN, null, NaN]");
+  this->AssertBinop(atan2, "[0, 0, -0.0, 0, -0.0, 0, 1, 0, -1, Inf, -Inf, 0, 0]",
+                    "[0, 0, 0, -0.0, -0.0, 1, 0, -1, 0, 0, 0, Inf, -Inf]",
+                    MakeArray(0, 0, -0.0, M_PI, -M_PI, 0, M_PI_2, M_PI, -M_PI_2, M_PI_2,
+                              -M_PI_2, 0, M_PI));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 041c6a282f9..4342d776c38 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -34,29 +34,33 @@ namespace internal {
 namespace {
 
 struct Equal {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left == right;
   }
 };
 
 struct NotEqual {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left != right;
   }
 };
 
 struct Greater {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left > right;
   }
 };
 
 struct GreaterEqual {
-  template <typename T>
-  static constexpr bool Call(KernelContext*, const T& left, const T& right, Status*) {
+  template <typename T, typename Arg0, typename Arg1>
+  static constexpr T Call(KernelContext*, const Arg0& left, const Arg1& right, Status*) {
+    static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value, "");
     return left >= right;
   }
 };
@@ -77,13 +81,15 @@ template <typename T>
 using enable_if_floating_point = enable_if_t<std::is_floating_point<T>::value, T>;
 
 struct Minimum {
-  template <typename T>
-  static enable_if_floating_point<T> Call(T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
     return std::fmin(left, right);
   }
 
-  template <typename T>
-  static enable_if_integer<T> Call(T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
     return std::min(left, right);
   }
 
@@ -104,13 +110,15 @@ struct Minimum {
 };
 
 struct Maximum {
-  template <typename T>
-  static enable_if_floating_point<T> Call(T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_point<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
     return std::fmax(left, right);
   }
 
-  template <typename T>
-  static enable_if_integer<T> Call(T left, T right) {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer<T> Call(Arg0 left, Arg1 right) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<Arg0, Arg1>::value, "");
     return std::max(left, right);
   }
 
@@ -291,7 +299,8 @@ struct ScalarMinMax {
         value = UnboxScalar<OutType>::Unbox(scalar);
         valid = true;
       } else {
-        value = Op::Call(value, UnboxScalar<OutType>::Unbox(scalar));
+        value = Op::template Call<OutValue, OutValue, OutValue>(
+            value, UnboxScalar<OutType>::Unbox(scalar));
       }
     }
     out->is_valid = valid;
@@ -396,7 +405,7 @@ struct ScalarMinMax {
             auto u = out_it();
             if (!output->buffers[0] ||
                 BitUtil::GetBit(output->buffers[0]->data(), index)) {
-              writer.Write(Op::Call(u, value));
+              writer.Write(Op::template Call<OutValue, OutValue, OutValue>(u, value));
             } else {
               writer.Write(value);
             }
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h b/cpp/src/arrow/compute/kernels/util_internal.h
index f230bfbbd6d..394e08da581 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -30,6 +30,18 @@ namespace arrow {
 namespace compute {
 namespace internal {
 
+// Used in some kernels and testing - not provided by default in MSVC
+// and _USE_MATH_DEFINES is not reliable with unity builds
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_PI_2
+#define M_PI_2 1.57079632679489661923
+#endif
+#ifndef M_PI_4
+#define M_PI_4 0.785398163397448309616
+#endif
+
 // An internal data structure for unpacking a primitive argument to pass to a
 // kernel implementation
 struct PrimitiveArg {
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index c4ca4d3416c..33c1b474452 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -339,6 +339,40 @@ Bit-wise functions
   out of bounds for the data type.  However, an overflow when shifting the
   first input is not error (truncated bits are silently discarded).
 
+Trigonometric functions
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Trigonometric functions are also supported, and also offer ``_checked``
+variants that check for domain errors if needed.
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| acos                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| acos_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| asin                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| asin_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| atan                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| atan2                    | Binary     | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| cos                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| cos_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| sin                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| sin_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| tan                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| tan_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+
 Comparisons
 ~~~~~~~~~~~
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 461803dc773..334a76e75d2 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -73,6 +73,28 @@ Bit-wise operations do not offer (or need) a checked variant.
    bit_wise_or
    bit_wise_xor
 
+Trigonometric Functions
+-----------------------
+
+Trigonometric functions are also supported, and also offer ``_checked``
+variants which detect domain errors where appropriate.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   acos
+   acos_checked
+   asin
+   asin_checked
+   atan
+   atan2
+   cos
+   cos_checked
+   sin
+   sin_checked
+   tan
+   tan_checked
+
 Comparisons
 -----------
 

From 1430c93f68960e10a50d27f465eb174e76ac06b2 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 30 Jun 2021 14:23:23 -0400
Subject: [PATCH 484/719] ARROW-13025: [C++][Python] Add
 FunctionOptions::Equals/ToString/Serialize

This is a draft of adding more utility methods to FunctionOptions. It's not fully implemented (it needs rebasing + serialization isn't implemented for most options, plus there are various TODOs scattered). But before I proceed further, I wanted to get some feedback.

Some concerns I have:
- I don't like adding protected methods to a struct, and it's inconsistent with how equality is implemented for other structs (via a visitor or otherwise centralized in a single location). However ARROW-8891 will require that we be able to define kernels - and presumably their options - in a separate shared library, so I don't think we can do much better than this.
- But for (de)serialization, we'll still need some way to dynamically register the mapping between a type_name and the actual struct, so maybe this is a moot point.
- I've exposed the fact that serialization uses StructScalars to support Expression - but maybe this is too much to commit to in the API?

Closes #10511 from lidavidm/arrow-13025

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                  |   1 +
 cpp/src/arrow/array/array_test.cc             |  32 +
 cpp/src/arrow/array/builder_base.cc           | 159 +++++
 cpp/src/arrow/array/builder_base.h            |   5 +
 cpp/src/arrow/array/builder_binary.h          |  12 +
 cpp/src/arrow/array/builder_dict.h            |   1 +
 cpp/src/arrow/compute/api_aggregate.cc        | 110 +++
 cpp/src/arrow/compute/api_aggregate.h         |  52 +-
 cpp/src/arrow/compute/api_scalar.cc           | 266 +++++++-
 cpp/src/arrow/compute/api_scalar.h            | 142 ++--
 cpp/src/arrow/compute/api_vector.cc           | 116 ++++
 cpp/src/arrow/compute/api_vector.h            |  64 +-
 cpp/src/arrow/compute/cast.cc                 |  29 +-
 cpp/src/arrow/compute/cast.h                  |  12 +-
 cpp/src/arrow/compute/exec.h                  |   2 +-
 cpp/src/arrow/compute/exec/expression.cc      | 176 +----
 .../arrow/compute/exec/expression_internal.h  |  11 -
 cpp/src/arrow/compute/exec/expression_test.cc |  41 +-
 cpp/src/arrow/compute/exec/test_util.cc       |   2 +-
 cpp/src/arrow/compute/exec_test.cc            |  53 +-
 cpp/src/arrow/compute/function.cc             |  35 +
 cpp/src/arrow/compute/function.h              |  41 +-
 cpp/src/arrow/compute/function_internal.cc    | 109 +++
 cpp/src/arrow/compute/function_internal.h     | 626 ++++++++++++++++++
 cpp/src/arrow/compute/function_test.cc        |  97 +++
 cpp/src/arrow/compute/kernel.h                |   2 +-
 cpp/src/arrow/compute/registry.cc             |  42 ++
 cpp/src/arrow/compute/registry.h              |  10 +
 cpp/src/arrow/compute/registry_internal.h     |   6 +
 cpp/src/arrow/compute/type_fwd.h              |   4 +-
 cpp/src/arrow/testing/generator.cc            |  90 +--
 cpp/src/arrow/util/reflection_internal.h      |  17 +
 cpp/src/arrow/util/reflection_test.cc         |  27 +
 python/pyarrow/_compute.pxd                   |   3 +
 python/pyarrow/_compute.pyx                   | 313 +++------
 python/pyarrow/includes/libarrow.pxd          |  35 +-
 python/pyarrow/tests/test_compute.py          |  35 +
 37 files changed, 2174 insertions(+), 604 deletions(-)
 create mode 100644 cpp/src/arrow/compute/function_internal.cc
 create mode 100644 cpp/src/arrow/compute/function_internal.h

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 79b48461f9b..484c3e9e769 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -371,6 +371,7 @@ if(ARROW_COMPUTE)
        compute/exec/exec_plan.cc
        compute/exec/expression.cc
        compute/function.cc
+       compute/function_internal.cc
        compute/kernel.cc
        compute/registry.cc
        compute/kernels/aggregate_basic.cc
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index a97bf134604..682baab208d 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -397,6 +397,32 @@ TEST_F(TestArray, TestMakeArrayOfNullUnion) {
   }
 }
 
+void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar) {
+  std::unique_ptr<arrow::ArrayBuilder> builder;
+  auto null_scalar = MakeNullScalar(scalar->type);
+  ASSERT_OK(MakeBuilder(pool, scalar->type, &builder));
+  ASSERT_OK(builder->AppendScalar(*scalar));
+  ASSERT_OK(builder->AppendScalar(*scalar));
+  ASSERT_OK(builder->AppendScalar(*null_scalar));
+  ASSERT_OK(builder->AppendScalars({scalar, null_scalar}));
+  ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2));
+  ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2));
+
+  std::shared_ptr<Array> out;
+  FinishAndCheckPadding(builder.get(), &out);
+  ASSERT_OK(out->ValidateFull());
+  ASSERT_EQ(out->length(), 9);
+  ASSERT_EQ(out->null_count(), 4);
+  for (const auto index : {0, 1, 3, 5, 6}) {
+    ASSERT_FALSE(out->IsNull(index));
+    ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
+    AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true);
+  }
+  for (const auto index : {2, 4, 7, 8}) {
+    ASSERT_TRUE(out->IsNull(index));
+  }
+}
+
 TEST_F(TestArray, TestMakeArrayFromScalar) {
   ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
   ASSERT_OK(null_array->ValidateFull());
@@ -447,6 +473,10 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
       ASSERT_EQ(array->null_count(), 0);
     }
   }
+
+  for (auto scalar : scalars) {
+    AssertAppendScalar(pool_, scalar);
+  }
 }
 
 TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
@@ -481,6 +511,8 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
     ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
     ASSERT_TRUE(item->Equals(scalar));
   }
+
+  AssertAppendScalar(pool_, std::make_shared<MapScalar>(scalar));
 }
 
 TEST_F(TestArray, ValidateBuffersPrimitive) {
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index b92cc285894..c892e3d664b 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -24,8 +24,11 @@
 #include "arrow/array/data.h"
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
 
@@ -92,6 +95,162 @@ Status ArrayBuilder::Advance(int64_t elements) {
   return null_bitmap_builder_.Advance(elements);
 }
 
+namespace {
+struct AppendScalarImpl {
+  template <typename T>
+  enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
+                  is_fixed_size_binary_type<T>::value,
+              Status>
+  Visit(const T&) {
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+           raw++) {
+        auto scalar =
+            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        if (scalar->is_valid) {
+          builder->UnsafeAppend(scalar->value);
+        } else {
+          builder->UnsafeAppendNull();
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_base_binary<T, Status> Visit(const T&) {
+    int64_t data_size = 0;
+    for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+         raw++) {
+      auto scalar =
+          internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+      if (scalar->is_valid) {
+        data_size += scalar->value->size();
+      }
+    }
+
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
+    RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
+           raw++) {
+        auto scalar =
+            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        if (scalar->is_valid) {
+          builder->UnsafeAppend(util::string_view{*scalar->value});
+        } else {
+          builder->UnsafeAppendNull();
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_list_like<T, Status> Visit(const T&) {
+    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    int64_t num_children = 0;
+    for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
+         scalar++) {
+      if (!(*scalar)->is_valid) continue;
+      num_children +=
+          internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
+    }
+    RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
+           scalar++) {
+        if ((*scalar)->is_valid) {
+          RETURN_NOT_OK(builder->Append());
+          const Array& list =
+              *internal::checked_cast<const BaseListScalar&>(**scalar).value;
+          for (int64_t i = 0; i < list.length(); i++) {
+            ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
+            RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
+          }
+        } else {
+          RETURN_NOT_OK(builder_->AppendNull());
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) {
+    auto* builder = internal::checked_cast<StructBuilder*>(builder_);
+    auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
+    RETURN_NOT_OK(builder->Reserve(count));
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
+    }
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
+        const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
+        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+          if (!scalar.is_valid || !scalar.value[field_index]) {
+            RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
+          } else {
+            RETURN_NOT_OK(builder->field_builder(field_index)
+                              ->AppendScalar(*scalar.value[field_index]));
+          }
+        }
+        RETURN_NOT_OK(builder->Append(scalar.is_valid));
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("AppendScalar for type ", type);
+  }
+
+  Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }
+
+  const std::shared_ptr<Scalar>* scalars_begin_;
+  const std::shared_ptr<Scalar>* scalars_end_;
+  int64_t n_repeats_;
+  ArrayBuilder* builder_;
+};
+}  // namespace
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
+  if (!scalar.type->Equals(type())) {
+    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+                           " to builder for type ", type()->ToString());
+  }
+  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
+  return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
+  if (!scalar.type->Equals(type())) {
+    return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+                           " to builder for type ", type()->ToString());
+  }
+  std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
+  return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
+  if (scalars.empty()) return Status::OK();
+  const auto ty = type();
+  for (const auto& scalar : scalars) {
+    if (!scalar->type->Equals(ty)) {
+      return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
+                             " to builder for type ", type()->ToString());
+    }
+  }
+  return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
+                          /*n_repeats=*/1, this}
+      .Convert();
+}
+
 Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
   std::shared_ptr<ArrayData> internal_data;
   RETURN_NOT_OK(FinishInternal(&internal_data));
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 15c726241b5..8e60c306796 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -116,6 +116,11 @@ class ARROW_EXPORT ArrayBuilder {
   /// This method is useful when appending null values to a parent nested type.
   virtual Status AppendEmptyValues(int64_t length) = 0;
 
+  /// \brief Append a value from a scalar
+  Status AppendScalar(const Scalar& scalar);
+  Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
+  Status AppendScalars(const ScalarVector& scalars);
+
   /// For cases where raw data was memcpy'd into the internal buffers, allows us
   /// to advance the length of the builder. It is your responsibility to use
   /// this function responsibly.
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index c1c664a1249..7653eeca5c4 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -467,6 +467,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
+  Status Append(const Buffer& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(util::string_view(s));
+    return Status::OK();
+  }
+
+  Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }
+
   template <size_t NBYTES>
   Status Append(const std::array<uint8_t, NBYTES>& value) {
     ARROW_RETURN_NOT_OK(Reserve(1));
@@ -502,6 +510,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
   }
 
+  void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
+
+  void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }
+
   void UnsafeAppendNull() {
     UnsafeAppendToBitmap(false);
     byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
index 40d6ce1ba9a..455cb3df7b1 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -29,6 +29,7 @@
 #include "arrow/array/builder_primitive.h"  // IWYU pragma: export
 #include "arrow/array/data.h"
 #include "arrow/array/util.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index efff4ac67df..be05c3c11d0 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -18,10 +18,120 @@
 #include "arrow/compute/api_aggregate.h"
 
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/compute/util_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
+
+namespace internal {
+template <>
+struct EnumTraits<compute::QuantileOptions::Interpolation>
+    : BasicEnumTraits<compute::QuantileOptions::Interpolation,
+                      compute::QuantileOptions::LINEAR, compute::QuantileOptions::LOWER,
+                      compute::QuantileOptions::HIGHER, compute::QuantileOptions::NEAREST,
+                      compute::QuantileOptions::MIDPOINT> {
+  static std::string name() { return "QuantileOptions::Interpolation"; }
+  static std::string value_name(compute::QuantileOptions::Interpolation value) {
+    switch (value) {
+      case compute::QuantileOptions::LINEAR:
+        return "LINEAR";
+      case compute::QuantileOptions::LOWER:
+        return "LOWER";
+      case compute::QuantileOptions::HIGHER:
+        return "HIGHER";
+      case compute::QuantileOptions::NEAREST:
+        return "NEAREST";
+      case compute::QuantileOptions::MIDPOINT:
+        return "MIDPOINT";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
+// ----------------------------------------------------------------------
+// Function options
+
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kScalarAggregateOptionsType = GetFunctionOptionsType<ScalarAggregateOptions>(
+    DataMember("skip_nulls", &ScalarAggregateOptions::skip_nulls),
+    DataMember("min_count", &ScalarAggregateOptions::min_count));
+static auto kModeOptionsType =
+    GetFunctionOptionsType<ModeOptions>(DataMember("n", &ModeOptions::n));
+static auto kVarianceOptionsType =
+    GetFunctionOptionsType<VarianceOptions>(DataMember("ddof", &VarianceOptions::ddof));
+static auto kQuantileOptionsType = GetFunctionOptionsType<QuantileOptions>(
+    DataMember("q", &QuantileOptions::q),
+    DataMember("interpolation", &QuantileOptions::interpolation));
+static auto kTDigestOptionsType = GetFunctionOptionsType<TDigestOptions>(
+    DataMember("q", &TDigestOptions::q), DataMember("delta", &TDigestOptions::delta),
+    DataMember("buffer_size", &TDigestOptions::buffer_size));
+static auto kIndexOptionsType =
+    GetFunctionOptionsType<IndexOptions>(DataMember("value", &IndexOptions::value));
+}  // namespace
+}  // namespace internal
+
+ScalarAggregateOptions::ScalarAggregateOptions(bool skip_nulls, uint32_t min_count)
+    : FunctionOptions(internal::kScalarAggregateOptionsType),
+      skip_nulls(skip_nulls),
+      min_count(min_count) {}
+constexpr char ScalarAggregateOptions::kTypeName[];
+
+ModeOptions::ModeOptions(int64_t n) : FunctionOptions(internal::kModeOptionsType), n(n) {}
+constexpr char ModeOptions::kTypeName[];
+
+VarianceOptions::VarianceOptions(int ddof)
+    : FunctionOptions(internal::kVarianceOptionsType), ddof(ddof) {}
+constexpr char VarianceOptions::kTypeName[];
+
+QuantileOptions::QuantileOptions(double q, enum Interpolation interpolation)
+    : FunctionOptions(internal::kQuantileOptionsType),
+      q{q},
+      interpolation{interpolation} {}
+QuantileOptions::QuantileOptions(std::vector<double> q, enum Interpolation interpolation)
+    : FunctionOptions(internal::kQuantileOptionsType),
+      q{std::move(q)},
+      interpolation{interpolation} {}
+constexpr char QuantileOptions::kTypeName[];
+
+TDigestOptions::TDigestOptions(double q, uint32_t delta, uint32_t buffer_size)
+    : FunctionOptions(internal::kTDigestOptionsType),
+      q{q},
+      delta{delta},
+      buffer_size{buffer_size} {}
+TDigestOptions::TDigestOptions(std::vector<double> q, uint32_t delta,
+                               uint32_t buffer_size)
+    : FunctionOptions(internal::kTDigestOptionsType),
+      q{std::move(q)},
+      delta{delta},
+      buffer_size{buffer_size} {}
+constexpr char TDigestOptions::kTypeName[];
+
+IndexOptions::IndexOptions(std::shared_ptr<Scalar> value)
+    : FunctionOptions(internal::kIndexOptionsType), value{std::move(value)} {}
+IndexOptions::IndexOptions() : IndexOptions(std::make_shared<NullScalar>()) {}
+constexpr char IndexOptions::kTypeName[];
+
+namespace internal {
+void RegisterAggregateOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kScalarAggregateOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kModeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kVarianceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kQuantileOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTDigestOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kIndexOptionsType));
+}
+}  // namespace internal
+
 // ----------------------------------------------------------------------
 // Scalar aggregates
 
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 121896f1c97..9be0b406aa4 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -43,10 +43,10 @@ class ExecContext;
 /// \brief Control general scalar aggregate kernel behavior
 ///
 /// By default, null values are ignored
-struct ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
-  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1)
-      : skip_nulls(skip_nulls), min_count(min_count) {}
-
+class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
+ public:
+  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1);
+  constexpr static char const kTypeName[] = "scalar_aggregate";
   static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
 
   bool skip_nulls;
@@ -57,9 +57,10 @@ struct ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
 ///
 /// Returns top-n common values and counts.
 /// By default, returns the most common value and count.
-struct ARROW_EXPORT ModeOptions : public FunctionOptions {
-  explicit ModeOptions(int64_t n = 1) : n(n) {}
-
+class ARROW_EXPORT ModeOptions : public FunctionOptions {
+ public:
+  explicit ModeOptions(int64_t n = 1);
+  constexpr static char const kTypeName[] = "mode";
   static ModeOptions Defaults() { return ModeOptions{}; }
 
   int64_t n = 1;
@@ -69,9 +70,10 @@ struct ARROW_EXPORT ModeOptions : public FunctionOptions {
 ///
 /// The divisor used in calculations is N - ddof, where N is the number of elements.
 /// By default, ddof is zero, and population variance or stddev is returned.
-struct ARROW_EXPORT VarianceOptions : public FunctionOptions {
-  explicit VarianceOptions(int ddof = 0) : ddof(ddof) {}
-
+class ARROW_EXPORT VarianceOptions : public FunctionOptions {
+ public:
+  explicit VarianceOptions(int ddof = 0);
+  constexpr static char const kTypeName[] = "variance";
   static VarianceOptions Defaults() { return VarianceOptions{}; }
 
   int ddof = 0;
@@ -80,7 +82,8 @@ struct ARROW_EXPORT VarianceOptions : public FunctionOptions {
 /// \brief Control Quantile kernel behavior
 ///
 /// By default, returns the median value.
-struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
+class ARROW_EXPORT QuantileOptions : public FunctionOptions {
+ public:
   /// Interpolation method to use when quantile lies between two data points
   enum Interpolation {
     LINEAR = 0,
@@ -90,13 +93,12 @@ struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
     MIDPOINT,
   };
 
-  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR)
-      : q{q}, interpolation{interpolation} {}
+  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR);
 
   explicit QuantileOptions(std::vector<double> q,
-                           enum Interpolation interpolation = LINEAR)
-      : q{std::move(q)}, interpolation{interpolation} {}
+                           enum Interpolation interpolation = LINEAR);
 
+  constexpr static char const kTypeName[] = "quantile";
   static QuantileOptions Defaults() { return QuantileOptions{}; }
 
   /// quantile must be between 0 and 1 inclusive
@@ -107,15 +109,13 @@ struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
 /// \brief Control TDigest approximate quantile kernel behavior
 ///
 /// By default, returns the median value.
-struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
+class ARROW_EXPORT TDigestOptions : public FunctionOptions {
+ public:
   explicit TDigestOptions(double q = 0.5, uint32_t delta = 100,
-                          uint32_t buffer_size = 500)
-      : q{q}, delta{delta}, buffer_size{buffer_size} {}
-
+                          uint32_t buffer_size = 500);
   explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100,
-                          uint32_t buffer_size = 500)
-      : q{std::move(q)}, delta{delta}, buffer_size{buffer_size} {}
-
+                          uint32_t buffer_size = 500);
+  constexpr static char const kTypeName[] = "t_digest";
   static TDigestOptions Defaults() { return TDigestOptions{}; }
 
   /// quantile must be between 0 and 1 inclusive
@@ -127,8 +127,12 @@ struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
 };
 
 /// \brief Control Index kernel behavior
-struct ARROW_EXPORT IndexOptions : public FunctionOptions {
-  explicit IndexOptions(std::shared_ptr<Scalar> value) : value{std::move(value)} {}
+class ARROW_EXPORT IndexOptions : public FunctionOptions {
+ public:
+  explicit IndexOptions(std::shared_ptr<Scalar> value);
+  // Default constructor for serialization
+  IndexOptions();
+  constexpr static char const kTypeName[] = "index";
 
   std::shared_ptr<Scalar> value;
 };
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 20bba982a74..11b5b45b7a0 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -21,13 +21,277 @@
 #include <sstream>
 #include <string>
 
+#include "arrow/array/array_base.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/compute/util_internal.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
+
+namespace internal {
+template <>
+struct EnumTraits<compute::JoinOptions::NullHandlingBehavior>
+    : BasicEnumTraits<compute::JoinOptions::NullHandlingBehavior,
+                      compute::JoinOptions::NullHandlingBehavior::EMIT_NULL,
+                      compute::JoinOptions::NullHandlingBehavior::SKIP,
+                      compute::JoinOptions::NullHandlingBehavior::REPLACE> {
+  static std::string name() { return "JoinOptions::NullHandlingBehavior"; }
+  static std::string value_name(compute::JoinOptions::NullHandlingBehavior value) {
+    switch (value) {
+      case compute::JoinOptions::NullHandlingBehavior::EMIT_NULL:
+        return "EMIT_NULL";
+      case compute::JoinOptions::NullHandlingBehavior::SKIP:
+        return "SKIP";
+      case compute::JoinOptions::NullHandlingBehavior::REPLACE:
+        return "REPLACE";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<TimeUnit::type>
+    : BasicEnumTraits<TimeUnit::type, TimeUnit::type::SECOND, TimeUnit::type::MILLI,
+                      TimeUnit::type::MICRO, TimeUnit::type::NANO> {
+  static std::string name() { return "TimeUnit::type"; }
+  static std::string value_name(TimeUnit::type value) {
+    switch (value) {
+      case TimeUnit::type::SECOND:
+        return "SECOND";
+      case TimeUnit::type::MILLI:
+        return "MILLI";
+      case TimeUnit::type::MICRO:
+        return "MICRO";
+      case TimeUnit::type::NANO:
+        return "NANO";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<compute::CompareOperator>
+    : BasicEnumTraits<
+          compute::CompareOperator, compute::CompareOperator::EQUAL,
+          compute::CompareOperator::NOT_EQUAL, compute::CompareOperator::GREATER,
+          compute::CompareOperator::GREATER_EQUAL, compute::CompareOperator::LESS,
+          compute::CompareOperator::LESS_EQUAL> {
+  static std::string name() { return "compute::CompareOperator"; }
+  static std::string value_name(compute::CompareOperator value) {
+    switch (value) {
+      case compute::CompareOperator::EQUAL:
+        return "EQUAL";
+      case compute::CompareOperator::NOT_EQUAL:
+        return "NOT_EQUAL";
+      case compute::CompareOperator::GREATER:
+        return "GREATER";
+      case compute::CompareOperator::GREATER_EQUAL:
+        return "GREATER_EQUAL";
+      case compute::CompareOperator::LESS:
+        return "LESS";
+      case compute::CompareOperator::LESS_EQUAL:
+        return "LESS_EQUAL";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
+// ----------------------------------------------------------------------
+// Function options
+
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kElementWiseAggregateOptionsType =
+    GetFunctionOptionsType<ElementWiseAggregateOptions>(
+        DataMember("skip_nulls", &ElementWiseAggregateOptions::skip_nulls));
+static auto kJoinOptionsType = GetFunctionOptionsType<JoinOptions>(
+    DataMember("null_handling", &JoinOptions::null_handling),
+    DataMember("null_replacement", &JoinOptions::null_replacement));
+static auto kMatchSubstringOptionsType = GetFunctionOptionsType<MatchSubstringOptions>(
+    DataMember("pattern", &MatchSubstringOptions::pattern),
+    DataMember("ignore_case", &MatchSubstringOptions::ignore_case));
+static auto kSplitOptionsType = GetFunctionOptionsType<SplitOptions>(
+    DataMember("max_splits", &SplitOptions::max_splits),
+    DataMember("reverse", &SplitOptions::reverse));
+static auto kSplitPatternOptionsType = GetFunctionOptionsType<SplitPatternOptions>(
+    DataMember("pattern", &SplitPatternOptions::pattern),
+    DataMember("max_splits", &SplitPatternOptions::max_splits),
+    DataMember("reverse", &SplitPatternOptions::reverse));
+static auto kReplaceSliceOptionsType = GetFunctionOptionsType<ReplaceSliceOptions>(
+    DataMember("start", &ReplaceSliceOptions::start),
+    DataMember("stop", &ReplaceSliceOptions::stop),
+    DataMember("replacement", &ReplaceSliceOptions::replacement));
+static auto kReplaceSubstringOptionsType =
+    GetFunctionOptionsType<ReplaceSubstringOptions>(
+        DataMember("pattern", &ReplaceSubstringOptions::pattern),
+        DataMember("replacement", &ReplaceSubstringOptions::replacement),
+        DataMember("max_replacements", &ReplaceSubstringOptions::max_replacements));
+static auto kExtractRegexOptionsType = GetFunctionOptionsType<ExtractRegexOptions>(
+    DataMember("pattern", &ExtractRegexOptions::pattern));
+static auto kSetLookupOptionsType = GetFunctionOptionsType<SetLookupOptions>(
+    DataMember("value_set", &SetLookupOptions::value_set),
+    DataMember("skip_nulls", &SetLookupOptions::skip_nulls));
+static auto kStrptimeOptionsType = GetFunctionOptionsType<StrptimeOptions>(
+    DataMember("format", &StrptimeOptions::format),
+    DataMember("unit", &StrptimeOptions::unit));
+static auto kPadOptionsType = GetFunctionOptionsType<PadOptions>(
+    DataMember("width", &PadOptions::width), DataMember("padding", &PadOptions::padding));
+static auto kTrimOptionsType = GetFunctionOptionsType<TrimOptions>(
+    DataMember("characters", &TrimOptions::characters));
+static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
+    DataMember("start", &SliceOptions::start), DataMember("stop", &SliceOptions::stop),
+    DataMember("step", &SliceOptions::step));
+static auto kCompareOptionsType =
+    GetFunctionOptionsType<CompareOptions>(DataMember("op", &CompareOptions::op));
+static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
+    DataMember("field_names", &ProjectOptions::field_names),
+    DataMember("field_nullability", &ProjectOptions::field_nullability),
+    DataMember("field_metadata", &ProjectOptions::field_metadata));
+}  // namespace
+}  // namespace internal
+
+ElementWiseAggregateOptions::ElementWiseAggregateOptions(bool skip_nulls)
+    : FunctionOptions(internal::kElementWiseAggregateOptionsType),
+      skip_nulls(skip_nulls) {}
+constexpr char ElementWiseAggregateOptions::kTypeName[];
+
+JoinOptions::JoinOptions(NullHandlingBehavior null_handling, std::string null_replacement)
+    : FunctionOptions(internal::kJoinOptionsType),
+      null_handling(null_handling),
+      null_replacement(std::move(null_replacement)) {}
+constexpr char JoinOptions::kTypeName[];
+
+MatchSubstringOptions::MatchSubstringOptions(std::string pattern, bool ignore_case)
+    : FunctionOptions(internal::kMatchSubstringOptionsType),
+      pattern(std::move(pattern)),
+      ignore_case(ignore_case) {}
+MatchSubstringOptions::MatchSubstringOptions() : MatchSubstringOptions("", false) {}
+constexpr char MatchSubstringOptions::kTypeName[];
+
+SplitOptions::SplitOptions(int64_t max_splits, bool reverse)
+    : FunctionOptions(internal::kSplitOptionsType),
+      max_splits(max_splits),
+      reverse(reverse) {}
+constexpr char SplitOptions::kTypeName[];
+
+SplitPatternOptions::SplitPatternOptions(std::string pattern, int64_t max_splits,
+                                         bool reverse)
+    : FunctionOptions(internal::kSplitPatternOptionsType),
+      pattern(std::move(pattern)),
+      max_splits(max_splits),
+      reverse(reverse) {}
+SplitPatternOptions::SplitPatternOptions() : SplitPatternOptions("", -1, false) {}
+constexpr char SplitPatternOptions::kTypeName[];
+
+ReplaceSliceOptions::ReplaceSliceOptions(int64_t start, int64_t stop,
+                                         std::string replacement)
+    : FunctionOptions(internal::kReplaceSliceOptionsType),
+      start(start),
+      stop(stop),
+      replacement(std::move(replacement)) {}
+ReplaceSliceOptions::ReplaceSliceOptions() : ReplaceSliceOptions(0, 0, "") {}
+constexpr char ReplaceSliceOptions::kTypeName[];
+
+ReplaceSubstringOptions::ReplaceSubstringOptions(std::string pattern,
+                                                 std::string replacement,
+                                                 int64_t max_replacements)
+    : FunctionOptions(internal::kReplaceSubstringOptionsType),
+      pattern(std::move(pattern)),
+      replacement(std::move(replacement)),
+      max_replacements(max_replacements) {}
+ReplaceSubstringOptions::ReplaceSubstringOptions()
+    : ReplaceSubstringOptions("", "", -1) {}
+constexpr char ReplaceSubstringOptions::kTypeName[];
+
+ExtractRegexOptions::ExtractRegexOptions(std::string pattern)
+    : FunctionOptions(internal::kExtractRegexOptionsType), pattern(std::move(pattern)) {}
+ExtractRegexOptions::ExtractRegexOptions() : ExtractRegexOptions("") {}
+constexpr char ExtractRegexOptions::kTypeName[];
+
+SetLookupOptions::SetLookupOptions(Datum value_set, bool skip_nulls)
+    : FunctionOptions(internal::kSetLookupOptionsType),
+      value_set(std::move(value_set)),
+      skip_nulls(skip_nulls) {}
+SetLookupOptions::SetLookupOptions() : SetLookupOptions({}, false) {}
+constexpr char SetLookupOptions::kTypeName[];
+
+StrptimeOptions::StrptimeOptions(std::string format, TimeUnit::type unit)
+    : FunctionOptions(internal::kStrptimeOptionsType),
+      format(std::move(format)),
+      unit(unit) {}
+StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::SECOND) {}
+constexpr char StrptimeOptions::kTypeName[];
+
+PadOptions::PadOptions(int64_t width, std::string padding)
+    : FunctionOptions(internal::kPadOptionsType),
+      width(width),
+      padding(std::move(padding)) {}
+PadOptions::PadOptions() : PadOptions(0, " ") {}
+constexpr char PadOptions::kTypeName[];
+
+TrimOptions::TrimOptions(std::string characters)
+    : FunctionOptions(internal::kTrimOptionsType), characters(std::move(characters)) {}
+TrimOptions::TrimOptions() : TrimOptions("") {}
+constexpr char TrimOptions::kTypeName[];
+
+SliceOptions::SliceOptions(int64_t start, int64_t stop, int64_t step)
+    : FunctionOptions(internal::kSliceOptionsType),
+      start(start),
+      stop(stop),
+      step(step) {}
+SliceOptions::SliceOptions() : SliceOptions(0, 0, 1) {}
+constexpr char SliceOptions::kTypeName[];
+
+CompareOptions::CompareOptions(CompareOperator op)
+    : FunctionOptions(internal::kCompareOptionsType), op(op) {}
+CompareOptions::CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
+constexpr char CompareOptions::kTypeName[];
+
+ProjectOptions::ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
+                               std::vector<std::shared_ptr<const KeyValueMetadata>> m)
+    : FunctionOptions(internal::kProjectOptionsType),
+      field_names(std::move(n)),
+      field_nullability(std::move(r)),
+      field_metadata(std::move(m)) {}
+
+ProjectOptions::ProjectOptions(std::vector<std::string> n)
+    : FunctionOptions(internal::kProjectOptionsType),
+      field_names(std::move(n)),
+      field_nullability(field_names.size(), true),
+      field_metadata(field_names.size(), NULLPTR) {}
+
+ProjectOptions::ProjectOptions() : ProjectOptions(std::vector<std::string>()) {}
+constexpr char ProjectOptions::kTypeName[];
+
+namespace internal {
+void RegisterScalarOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kElementWiseAggregateOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kJoinOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMatchSubstringOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSplitOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSplitPatternOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSliceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kReplaceSubstringOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kExtractRegexOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSetLookupOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kStrptimeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPadOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType));
+}
+}  // namespace internal
+
 #define SCALAR_EAGER_UNARY(NAME, REGISTRY_NAME)              \
   Result<Datum> NAME(const Datum& value, ExecContext* ctx) { \
     return CallFunction(REGISTRY_NAME, {value}, ctx);        \
@@ -162,7 +426,7 @@ Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions opti
       func_name = "less_equal";
       break;
   }
-  return CallFunction(func_name, {left, right}, &options, ctx);
+  return CallFunction(func_name, {left, right}, nullptr, ctx);
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 2ec9c1d7653..bacb287d6bc 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -37,19 +37,25 @@ namespace compute {
 ///
 /// @{
 
-struct ArithmeticOptions : public FunctionOptions {
-  ArithmeticOptions() : check_overflow(false) {}
+struct ARROW_EXPORT ArithmeticOptions {
+ public:
+  explicit ArithmeticOptions(bool check_overflow = false)
+      : check_overflow(check_overflow) {}
   bool check_overflow;
 };
 
-struct ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
-  explicit ElementWiseAggregateOptions(bool skip_nulls = true) : skip_nulls(skip_nulls) {}
+class ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
+ public:
+  explicit ElementWiseAggregateOptions(bool skip_nulls = true);
+  constexpr static char const kTypeName[] = "element_wise_aggregate";
   static ElementWiseAggregateOptions Defaults() { return ElementWiseAggregateOptions{}; }
+
   bool skip_nulls;
 };
 
 /// Options for var_args_join.
-struct ARROW_EXPORT JoinOptions : public FunctionOptions {
+class ARROW_EXPORT JoinOptions : public FunctionOptions {
+ public:
   /// How to handle null values. (A null separator always results in a null output.)
   enum NullHandlingBehavior {
     /// A null in any input results in a null in the output.
@@ -60,16 +66,18 @@ struct ARROW_EXPORT JoinOptions : public FunctionOptions {
     REPLACE,
   };
   explicit JoinOptions(NullHandlingBehavior null_handling = EMIT_NULL,
-                       std::string null_replacement = "")
-      : null_handling(null_handling), null_replacement(std::move(null_replacement)) {}
+                       std::string null_replacement = "");
+  constexpr static char const kTypeName[] = "join";
   static JoinOptions Defaults() { return JoinOptions(); }
   NullHandlingBehavior null_handling;
   std::string null_replacement;
 };
 
-struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
-  explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false)
-      : pattern(std::move(pattern)), ignore_case(ignore_case) {}
+class ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
+ public:
+  explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false);
+  MatchSubstringOptions();
+  constexpr static char const kTypeName[] = "match_substring";
 
   /// The exact substring (or regex, depending on kernel) to look for inside input values.
   std::string pattern;
@@ -77,9 +85,10 @@ struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
   bool ignore_case = false;
 };
 
-struct ARROW_EXPORT SplitOptions : public FunctionOptions {
-  explicit SplitOptions(int64_t max_splits = -1, bool reverse = false)
-      : max_splits(max_splits), reverse(reverse) {}
+class ARROW_EXPORT SplitOptions : public FunctionOptions {
+ public:
+  explicit SplitOptions(int64_t max_splits = -1, bool reverse = false);
+  constexpr static char const kTypeName[] = "split";
 
   /// Maximum number of splits allowed, or unlimited when -1
   int64_t max_splits;
@@ -87,18 +96,26 @@ struct ARROW_EXPORT SplitOptions : public FunctionOptions {
   bool reverse;
 };
 
-struct ARROW_EXPORT SplitPatternOptions : public SplitOptions {
+class ARROW_EXPORT SplitPatternOptions : public FunctionOptions {
+ public:
   explicit SplitPatternOptions(std::string pattern, int64_t max_splits = -1,
-                               bool reverse = false)
-      : SplitOptions(max_splits, reverse), pattern(std::move(pattern)) {}
+                               bool reverse = false);
+  SplitPatternOptions();
+  constexpr static char const kTypeName[] = "split_pattern";
 
-  /// The exact substring to look for inside input values.
+  /// The exact substring to split on.
   std::string pattern;
+  /// Maximum number of splits allowed, or unlimited when -1
+  int64_t max_splits;
+  /// Start splitting from the end of the string (only relevant when max_splits != -1)
+  bool reverse;
 };
 
-struct ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
-  explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement)
-      : start(start), stop(stop), replacement(std::move(replacement)) {}
+class ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
+ public:
+  explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement);
+  ReplaceSliceOptions();
+  constexpr static char const kTypeName[] = "replace_slice";
 
   /// Index to start slicing at
   int64_t start;
@@ -108,12 +125,12 @@ struct ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
   std::string replacement;
 };
 
-struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
+class ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
+ public:
   explicit ReplaceSubstringOptions(std::string pattern, std::string replacement,
-                                   int64_t max_replacements = -1)
-      : pattern(std::move(pattern)),
-        replacement(std::move(replacement)),
-        max_replacements(max_replacements) {}
+                                   int64_t max_replacements = -1);
+  ReplaceSubstringOptions();
+  constexpr static char const kTypeName[] = "replace_substring";
 
   /// Pattern to match, literal, or regular expression depending on which kernel is used
   std::string pattern;
@@ -123,17 +140,22 @@ struct ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
   int64_t max_replacements;
 };
 
-struct ARROW_EXPORT ExtractRegexOptions : public FunctionOptions {
-  explicit ExtractRegexOptions(std::string pattern) : pattern(std::move(pattern)) {}
+class ARROW_EXPORT ExtractRegexOptions : public FunctionOptions {
+ public:
+  explicit ExtractRegexOptions(std::string pattern);
+  ExtractRegexOptions();
+  constexpr static char const kTypeName[] = "extract_regex";
 
   /// Regular expression with named capture fields
   std::string pattern;
 };
 
 /// Options for IsIn and IndexIn functions
-struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
-  explicit SetLookupOptions(Datum value_set, bool skip_nulls = false)
-      : value_set(std::move(value_set)), skip_nulls(skip_nulls) {}
+class ARROW_EXPORT SetLookupOptions : public FunctionOptions {
+ public:
+  explicit SetLookupOptions(Datum value_set, bool skip_nulls = false);
+  SetLookupOptions();
+  constexpr static char const kTypeName[] = "set_lookup";
 
   /// The set of values to look up input values into.
   Datum value_set;
@@ -146,17 +168,21 @@ struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
   bool skip_nulls;
 };
 
-struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
-  explicit StrptimeOptions(std::string format, TimeUnit::type unit)
-      : format(std::move(format)), unit(unit) {}
+class ARROW_EXPORT StrptimeOptions : public FunctionOptions {
+ public:
+  explicit StrptimeOptions(std::string format, TimeUnit::type unit);
+  StrptimeOptions();
+  constexpr static char const kTypeName[] = "strptime";
 
   std::string format;
   TimeUnit::type unit;
 };
 
-struct ARROW_EXPORT PadOptions : public FunctionOptions {
-  explicit PadOptions(int64_t width, std::string padding = " ")
-      : width(width), padding(std::move(padding)) {}
+class ARROW_EXPORT PadOptions : public FunctionOptions {
+ public:
+  explicit PadOptions(int64_t width, std::string padding = " ");
+  PadOptions();
+  constexpr static char const kTypeName[] = "pad";
 
   /// The desired string length.
   int64_t width;
@@ -164,18 +190,22 @@ struct ARROW_EXPORT PadOptions : public FunctionOptions {
   std::string padding;
 };
 
-struct ARROW_EXPORT TrimOptions : public FunctionOptions {
-  explicit TrimOptions(std::string characters) : characters(std::move(characters)) {}
+class ARROW_EXPORT TrimOptions : public FunctionOptions {
+ public:
+  explicit TrimOptions(std::string characters);
+  TrimOptions();
+  constexpr static char const kTypeName[] = "trim";
 
   /// The individual characters that can be trimmed from the string.
   std::string characters;
 };
 
-struct ARROW_EXPORT SliceOptions : public FunctionOptions {
+class ARROW_EXPORT SliceOptions : public FunctionOptions {
+ public:
   explicit SliceOptions(int64_t start, int64_t stop = std::numeric_limits<int64_t>::max(),
-                        int64_t step = 1)
-      : start(start), stop(stop), step(step) {}
-
+                        int64_t step = 1);
+  SliceOptions();
+  constexpr static char const kTypeName[] = "slice";
   int64_t start, stop, step;
 };
 
@@ -188,23 +218,21 @@ enum CompareOperator : int8_t {
   LESS_EQUAL,
 };
 
-struct CompareOptions : public FunctionOptions {
-  explicit CompareOptions(CompareOperator op) : op(op) {}
-
+class ARROW_EXPORT CompareOptions : public FunctionOptions {
+ public:
+  explicit CompareOptions(CompareOperator op);
+  CompareOptions();
+  constexpr static char const kTypeName[] = "compare";
   enum CompareOperator op;
 };
 
-struct ARROW_EXPORT ProjectOptions : public FunctionOptions {
+class ARROW_EXPORT ProjectOptions : public FunctionOptions {
+ public:
   ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
-                 std::vector<std::shared_ptr<const KeyValueMetadata>> m)
-      : field_names(std::move(n)),
-        field_nullability(std::move(r)),
-        field_metadata(std::move(m)) {}
-
-  explicit ProjectOptions(std::vector<std::string> n)
-      : field_names(std::move(n)),
-        field_nullability(field_names.size(), true),
-        field_metadata(field_names.size(), NULLPTR) {}
+                 std::vector<std::shared_ptr<const KeyValueMetadata>> m);
+  explicit ProjectOptions(std::vector<std::string> n);
+  ProjectOptions();
+  constexpr static char const kTypeName[] = "project";
 
   /// Names for wrapped columns
   std::vector<std::string> field_names;
@@ -436,8 +464,8 @@ Result<Datum> MinElementWise(
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Compare(const Datum& left, const Datum& right,
-                      struct CompareOptions options, ExecContext* ctx = NULLPTR);
+Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions options,
+                      ExecContext* ctx = NULLPTR);
 
 /// \brief Invert the values of a boolean datum
 /// \param[in] value datum to invert
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 0082d48112d..9c1ef8533b4 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -18,23 +18,139 @@
 #include "arrow/compute/api_vector.h"
 
 #include <memory>
+#include <sstream>
 #include <utility>
 #include <vector>
 
 #include "arrow/array/array_nested.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::checked_pointer_cast;
 
+namespace internal {
+using compute::DictionaryEncodeOptions;
+using compute::FilterOptions;
+template <>
+struct EnumTraits<FilterOptions::NullSelectionBehavior>
+    : BasicEnumTraits<FilterOptions::NullSelectionBehavior, FilterOptions::DROP,
+                      FilterOptions::EMIT_NULL> {
+  static std::string name() { return "FilterOptions::NullSelectionBehavior"; }
+  static std::string value_name(FilterOptions::NullSelectionBehavior value) {
+    switch (value) {
+      case FilterOptions::DROP:
+        return "DROP";
+      case FilterOptions::EMIT_NULL:
+        return "EMIT_NULL";
+    }
+    return "<INVALID>";
+  }
+};
+template <>
+struct EnumTraits<DictionaryEncodeOptions::NullEncodingBehavior>
+    : BasicEnumTraits<DictionaryEncodeOptions::NullEncodingBehavior,
+                      DictionaryEncodeOptions::ENCODE, DictionaryEncodeOptions::MASK> {
+  static std::string name() { return "DictionaryEncodeOptions::NullEncodingBehavior"; }
+  static std::string value_name(DictionaryEncodeOptions::NullEncodingBehavior value) {
+    switch (value) {
+      case DictionaryEncodeOptions::ENCODE:
+        return "ENCODE";
+      case DictionaryEncodeOptions::MASK:
+        return "MASK";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
 namespace compute {
 
+// ----------------------------------------------------------------------
+// Function options
+
+bool SortKey::Equals(const SortKey& other) const {
+  return name == other.name && order == other.order;
+}
+std::string SortKey::ToString() const {
+  std::stringstream ss;
+  ss << name << ' ';
+  switch (order) {
+    case SortOrder::Ascending:
+      ss << "ASC";
+      break;
+    case SortOrder::Descending:
+      ss << "DESC";
+      break;
+  }
+  return ss.str();
+}
+
+namespace internal {
+namespace {
+using ::arrow::internal::DataMember;
+static auto kFilterOptionsType = GetFunctionOptionsType<FilterOptions>(
+    DataMember("null_selection_behavior", &FilterOptions::null_selection_behavior));
+static auto kTakeOptionsType = GetFunctionOptionsType<TakeOptions>(
+    DataMember("boundscheck", &TakeOptions::boundscheck));
+static auto kDictionaryEncodeOptionsType =
+    GetFunctionOptionsType<DictionaryEncodeOptions>(DataMember(
+        "null_encoding_behavior", &DictionaryEncodeOptions::null_encoding_behavior));
+static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>(
+    DataMember("order", &ArraySortOptions::order));
+static auto kSortOptionsType =
+    GetFunctionOptionsType<SortOptions>(DataMember("sort_keys", &SortOptions::sort_keys));
+static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOptions>(
+    DataMember("pivot", &PartitionNthOptions::pivot));
+}  // namespace
+}  // namespace internal
+
+FilterOptions::FilterOptions(NullSelectionBehavior null_selection)
+    : FunctionOptions(internal::kFilterOptionsType),
+      null_selection_behavior(null_selection) {}
+constexpr char FilterOptions::kTypeName[];
+
+TakeOptions::TakeOptions(bool boundscheck)
+    : FunctionOptions(internal::kTakeOptionsType), boundscheck(boundscheck) {}
+constexpr char TakeOptions::kTypeName[];
+
+DictionaryEncodeOptions::DictionaryEncodeOptions(NullEncodingBehavior null_encoding)
+    : FunctionOptions(internal::kDictionaryEncodeOptionsType),
+      null_encoding_behavior(null_encoding) {}
+constexpr char DictionaryEncodeOptions::kTypeName[];
+
+ArraySortOptions::ArraySortOptions(SortOrder order)
+    : FunctionOptions(internal::kArraySortOptionsType), order(order) {}
+constexpr char ArraySortOptions::kTypeName[];
+
+SortOptions::SortOptions(std::vector<SortKey> sort_keys)
+    : FunctionOptions(internal::kSortOptionsType), sort_keys(std::move(sort_keys)) {}
+constexpr char SortOptions::kTypeName[];
+
+PartitionNthOptions::PartitionNthOptions(int64_t pivot)
+    : FunctionOptions(internal::kPartitionNthOptionsType), pivot(pivot) {}
+constexpr char PartitionNthOptions::kTypeName[];
+
+namespace internal {
+void RegisterVectorOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kTakeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
+}
+}  // namespace internal
+
 // ----------------------------------------------------------------------
 // Direct exec interface to kernels
 
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index d67568e1567..2282b0098f9 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -32,7 +32,8 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-struct FilterOptions : public FunctionOptions {
+class ARROW_EXPORT FilterOptions : public FunctionOptions {
+ public:
   /// Configure the action taken when a slot of the selection mask is null
   enum NullSelectionBehavior {
     /// the corresponding filtered value will be removed in the output
@@ -41,30 +42,27 @@ struct FilterOptions : public FunctionOptions {
     EMIT_NULL,
   };
 
-  explicit FilterOptions(NullSelectionBehavior null_selection = DROP)
-      : null_selection_behavior(null_selection) {}
-
+  explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
+  constexpr static char const kTypeName[] = "filter";
   static FilterOptions Defaults() { return FilterOptions(); }
 
   NullSelectionBehavior null_selection_behavior = DROP;
 };
 
-struct ARROW_EXPORT TakeOptions : public FunctionOptions {
-  explicit TakeOptions(bool boundscheck = true) : boundscheck(boundscheck) {}
-
-  bool boundscheck = true;
+class ARROW_EXPORT TakeOptions : public FunctionOptions {
+ public:
+  explicit TakeOptions(bool boundscheck = true);
+  constexpr static char const kTypeName[] = "take";
   static TakeOptions BoundsCheck() { return TakeOptions(true); }
   static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
   static TakeOptions Defaults() { return BoundsCheck(); }
-};
 
-enum class SortOrder {
-  Ascending,
-  Descending,
+  bool boundscheck = true;
 };
 
 /// \brief Options for the dictionary encode function
-struct DictionaryEncodeOptions : public FunctionOptions {
+class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
+ public:
   /// Configure how null values will be encoded
   enum NullEncodingBehavior {
     /// the null value will be added to the dictionary with a proper index
@@ -73,44 +71,60 @@ struct DictionaryEncodeOptions : public FunctionOptions {
     MASK
   };
 
-  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK)
-      : null_encoding_behavior(null_encoding) {}
-
+  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
+  constexpr static char const kTypeName[] = "dictionary_encode";
   static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }
 
   NullEncodingBehavior null_encoding_behavior = MASK;
 };
 
+enum class SortOrder {
+  Ascending,
+  Descending,
+};
+
 /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
-struct ARROW_EXPORT SortKey {
+class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
+ public:
   explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
       : name(name), order(order) {}
 
+  using util::EqualityComparable<SortKey>::Equals;
+  using util::EqualityComparable<SortKey>::operator==;
+  using util::EqualityComparable<SortKey>::operator!=;
+  bool Equals(const SortKey& other) const;
+  std::string ToString() const;
+
   /// The name of the sort column.
   std::string name;
   /// How to order by this sort key.
   SortOrder order;
 };
 
-struct ARROW_EXPORT ArraySortOptions : public FunctionOptions {
-  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending) : order(order) {}
-
+class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
+ public:
+  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending);
+  constexpr static char const kTypeName[] = "array_sort";
   static ArraySortOptions Defaults() { return ArraySortOptions{}; }
 
   SortOrder order;
 };
 
-struct ARROW_EXPORT SortOptions : public FunctionOptions {
-  explicit SortOptions(std::vector<SortKey> sort_keys = {}) : sort_keys(sort_keys) {}
-
+class ARROW_EXPORT SortOptions : public FunctionOptions {
+ public:
+  explicit SortOptions(std::vector<SortKey> sort_keys = {});
+  constexpr static char const kTypeName[] = "sort";
   static SortOptions Defaults() { return SortOptions{}; }
 
   std::vector<SortKey> sort_keys;
 };
 
 /// \brief Partitioning options for NthToIndices
-struct ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
-  explicit PartitionNthOptions(int64_t pivot) : pivot(pivot) {}
+class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
+ public:
+  explicit PartitionNthOptions(int64_t pivot);
+  PartitionNthOptions() : PartitionNthOptions(0) {}
+  constexpr static char const kTypeName[] = "partition_nth";
 
   /// The index into the equivalent sorted array of the partition pivot element.
   int64_t pivot;
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 8a091f2355d..521f217213d 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -18,6 +18,7 @@
 #include "arrow/compute/cast.h"
 
 #include <mutex>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -26,10 +27,12 @@
 
 #include "arrow/compute/cast_internal.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
 
 namespace arrow {
 
@@ -38,6 +41,9 @@ using internal::ToTypeName;
 namespace compute {
 namespace internal {
 
+// ----------------------------------------------------------------------
+// Function options
+
 namespace {
 
 std::unordered_map<int, std::shared_ptr<CastFunction>> g_cast_table;
@@ -116,14 +122,35 @@ class CastMetaFunction : public MetaFunction {
   }
 };
 
+static auto kCastOptionsType = GetFunctionOptionsType<CastOptions>(
+    arrow::internal::DataMember("to_type", &CastOptions::to_type),
+    arrow::internal::DataMember("allow_int_overflow", &CastOptions::allow_int_overflow),
+    arrow::internal::DataMember("allow_time_truncate", &CastOptions::allow_time_truncate),
+    arrow::internal::DataMember("allow_time_overflow", &CastOptions::allow_time_overflow),
+    arrow::internal::DataMember("allow_decimal_truncate",
+                                &CastOptions::allow_decimal_truncate),
+    arrow::internal::DataMember("allow_float_truncate",
+                                &CastOptions::allow_float_truncate),
+    arrow::internal::DataMember("allow_invalid_utf8", &CastOptions::allow_invalid_utf8));
 }  // namespace
 
 void RegisterScalarCast(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<CastMetaFunction>()));
+  DCHECK_OK(registry->AddFunctionOptionsType(kCastOptionsType));
 }
-
 }  // namespace internal
 
+CastOptions::CastOptions(bool safe)
+    : FunctionOptions(internal::kCastOptionsType),
+      allow_int_overflow(!safe),
+      allow_time_truncate(!safe),
+      allow_time_overflow(!safe),
+      allow_decimal_truncate(!safe),
+      allow_float_truncate(!safe),
+      allow_invalid_utf8(!safe) {}
+
+constexpr char CastOptions::kTypeName[];
+
 CastFunction::CastFunction(std::string name, Type::type out_type_id)
     : ScalarFunction(std::move(name), Arity::Unary(), /*doc=*/nullptr),
       out_type_id_(out_type_id) {}
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 818f2ef9182..8abd2a71bca 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -41,15 +41,11 @@ class ExecContext;
 /// \addtogroup compute-concrete-options
 /// @{
 
-struct ARROW_EXPORT CastOptions : public FunctionOptions {
-  explicit CastOptions(bool safe = true)
-      : allow_int_overflow(!safe),
-        allow_time_truncate(!safe),
-        allow_time_overflow(!safe),
-        allow_decimal_truncate(!safe),
-        allow_float_truncate(!safe),
-        allow_invalid_utf8(!safe) {}
+class ARROW_EXPORT CastOptions : public FunctionOptions {
+ public:
+  explicit CastOptions(bool safe = true);
 
+  constexpr static char const kTypeName[] = "cast";
   static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
     CastOptions safe(true);
     safe.to_type = std::move(to_type);
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index 7659442d8bf..cd95db2fd8c 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -44,7 +44,7 @@ class CpuInfo;
 
 namespace compute {
 
-struct FunctionOptions;
+class FunctionOptions;
 class FunctionRegistry;
 
 // It seems like 64K might be a good default chunksize to use for execution
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index 1c8c82de05e..aeabbf7bc5b 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -24,6 +24,7 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec/expression_internal.h"
 #include "arrow/compute/exec_internal.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
@@ -167,41 +168,14 @@ std::string Expression::ToString() const {
     out += arg.ToString() + ", ";
   }
 
-  if (call->options == nullptr) {
+  if (call->options) {
+    out += call->options->ToString();
+    out.resize(out.size() + 1);
+  } else {
     out.resize(out.size() - 1);
-    out.back() = ')';
-    return out;
   }
-
-  if (auto options = GetSetLookupOptions(*call)) {
-    DCHECK_EQ(options->value_set.kind(), Datum::ARRAY);
-    out += "value_set=" + options->value_set.make_array()->ToString();
-    if (options->skip_nulls) {
-      out += ", skip_nulls";
-    }
-    return out + ")";
-  }
-
-  if (auto options = GetCastOptions(*call)) {
-    if (options->to_type == nullptr) {
-      return out + "to_type=<INVALID NOT PROVIDED>)";
-    }
-    out += "to_type=" + options->to_type->ToString();
-    if (options->allow_int_overflow) out += ", allow_int_overflow";
-    if (options->allow_time_truncate) out += ", allow_time_truncate";
-    if (options->allow_time_overflow) out += ", allow_time_overflow";
-    if (options->allow_decimal_truncate) out += ", allow_decimal_truncate";
-    if (options->allow_float_truncate) out += ", allow_float_truncate";
-    if (options->allow_invalid_utf8) out += ", allow_invalid_utf8";
-    return out + ")";
-  }
-
-  if (auto options = GetStrptimeOptions(*call)) {
-    return out + "format=" + options->format +
-           ", unit=" + arrow::internal::ToString(options->unit) + ")";
-  }
-
-  return out + "{NON-REPRESENTABLE OPTIONS})";
+  out.back() = ')';
+  return out;
 }
 
 void PrintTo(const Expression& expr, std::ostream* os) {
@@ -241,41 +215,9 @@ bool Expression::Equals(const Expression& other) const {
   }
 
   if (call->options == other_call->options) return true;
-
-  if (auto options = GetSetLookupOptions(*call)) {
-    auto other_options = GetSetLookupOptions(*other_call);
-    return options->value_set == other_options->value_set &&
-           options->skip_nulls == other_options->skip_nulls;
+  if (call->options && other_call->options) {
+    return call->options->Equals(other_call->options);
   }
-
-  if (auto options = GetCastOptions(*call)) {
-    auto other_options = GetCastOptions(*other_call);
-    for (auto safety_opt : {
-             &compute::CastOptions::allow_int_overflow,
-             &compute::CastOptions::allow_time_truncate,
-             &compute::CastOptions::allow_time_overflow,
-             &compute::CastOptions::allow_decimal_truncate,
-             &compute::CastOptions::allow_float_truncate,
-             &compute::CastOptions::allow_invalid_utf8,
-         }) {
-      if (options->*safety_opt != other_options->*safety_opt) return false;
-    }
-    return options->to_type->Equals(other_options->to_type);
-  }
-
-  if (auto options = GetProjectOptions(*call)) {
-    auto other_options = GetProjectOptions(*other_call);
-    return options->field_names == other_options->field_names;
-  }
-
-  if (auto options = GetStrptimeOptions(*call)) {
-    auto other_options = GetStrptimeOptions(*other_call);
-    return options->format == other_options->format &&
-           options->unit == other_options->unit;
-  }
-
-  ARROW_LOG(WARNING) << "comparing unknown FunctionOptions for function "
-                     << call->function_name;
   return false;
 }
 
@@ -992,92 +934,6 @@ Result<Expression> SimplifyWithGuarantee(Expression expr,
   return expr;
 }
 
-namespace {
-
-Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
-    const Expression::Call& call) {
-  if (call.options == nullptr) {
-    return nullptr;
-  }
-
-  if (auto options = GetSetLookupOptions(call)) {
-    if (!options->value_set.is_array()) {
-      return Status::NotImplemented("chunked value_set");
-    }
-    return StructScalar::Make(
-        {
-            std::make_shared<ListScalar>(options->value_set.make_array()),
-            MakeScalar(options->skip_nulls),
-        },
-        {"value_set", "skip_nulls"});
-  }
-
-  if (auto options = GetCastOptions(call)) {
-    return StructScalar::Make(
-        {
-            MakeNullScalar(options->to_type),
-            MakeScalar(options->allow_int_overflow),
-            MakeScalar(options->allow_time_truncate),
-            MakeScalar(options->allow_time_overflow),
-            MakeScalar(options->allow_decimal_truncate),
-            MakeScalar(options->allow_float_truncate),
-            MakeScalar(options->allow_invalid_utf8),
-        },
-        {
-            "to_type_holder",
-            "allow_int_overflow",
-            "allow_time_truncate",
-            "allow_time_overflow",
-            "allow_decimal_truncate",
-            "allow_float_truncate",
-            "allow_invalid_utf8",
-        });
-  }
-
-  return Status::NotImplemented("conversion of options for ", call.function_name);
-}
-
-Status FunctionOptionsFromStructScalar(const StructScalar* repr, Expression::Call* call) {
-  if (repr == nullptr) {
-    call->options = nullptr;
-    return Status::OK();
-  }
-
-  if (IsSetLookup(call->function_name)) {
-    ARROW_ASSIGN_OR_RAISE(auto value_set, repr->field("value_set"));
-    ARROW_ASSIGN_OR_RAISE(auto skip_nulls, repr->field("skip_nulls"));
-    call->options = std::make_shared<compute::SetLookupOptions>(
-        checked_cast<const ListScalar&>(*value_set).value,
-        checked_cast<const BooleanScalar&>(*skip_nulls).value);
-    return Status::OK();
-  }
-
-  if (call->function_name == "cast") {
-    auto options = std::make_shared<compute::CastOptions>();
-    ARROW_ASSIGN_OR_RAISE(auto to_type_holder, repr->field("to_type_holder"));
-    options->to_type = to_type_holder->type;
-
-    int i = 1;
-    for (bool* opt : {
-             &options->allow_int_overflow,
-             &options->allow_time_truncate,
-             &options->allow_time_overflow,
-             &options->allow_decimal_truncate,
-             &options->allow_float_truncate,
-             &options->allow_invalid_utf8,
-         }) {
-      *opt = checked_cast<const BooleanScalar&>(*repr->value[i++]).value;
-    }
-
-    call->options = std::move(options);
-    return Status::OK();
-  }
-
-  return Status::NotImplemented("conversion of options for ", call->function_name);
-}
-
-}  // namespace
-
 // Serialization is accomplished by converting expressions to KeyValueMetadata and storing
 // this in the schema of a RecordBatch. Embedded arrays and scalars are stored in its
 // columns. Finally, the RecordBatch is written to an IPC file.
@@ -1119,7 +975,8 @@ Result<std::shared_ptr<Buffer>> Serialize(const Expression& expr) {
       }
 
       if (call->options) {
-        ARROW_ASSIGN_OR_RAISE(auto options_scalar, FunctionOptionsToStructScalar(*call));
+        ARROW_ASSIGN_OR_RAISE(auto options_scalar,
+                              internal::FunctionOptionsToStructScalar(*call->options));
         ARROW_ASSIGN_OR_RAISE(auto value, AddScalar(*options_scalar));
         metadata_->Append("options", std::move(value));
       }
@@ -1204,10 +1061,13 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
       while (metadata().key(index_) != "end") {
         if (metadata().key(index_) == "options") {
           ARROW_ASSIGN_OR_RAISE(auto options_scalar, GetScalar(metadata().value(index_)));
-          auto expr = call(value, std::move(arguments));
-          RETURN_NOT_OK(FunctionOptionsFromStructScalar(
-              checked_cast<const StructScalar*>(options_scalar.get()),
-              const_cast<Expression::Call*>(expr.call())));
+          std::shared_ptr<compute::FunctionOptions> options;
+          if (options_scalar) {
+            ARROW_ASSIGN_OR_RAISE(
+                options, internal::FunctionOptionsFromStructScalar(
+                             checked_cast<const StructScalar&>(*options_scalar)));
+          }
+          auto expr = call(value, std::move(arguments), std::move(options));
           index_ += 2;
           return expr;
         }
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index 7b0cc758f57..b9165a5f0c2 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -216,22 +216,11 @@ inline bool IsSetLookup(const std::string& function) {
   return function == "is_in" || function == "index_in";
 }
 
-inline const compute::SetLookupOptions* GetSetLookupOptions(
-    const Expression::Call& call) {
-  if (!IsSetLookup(call.function_name)) return nullptr;
-  return checked_cast<const compute::SetLookupOptions*>(call.options.get());
-}
-
 inline const compute::ProjectOptions* GetProjectOptions(const Expression::Call& call) {
   if (call.function_name != "project") return nullptr;
   return checked_cast<const compute::ProjectOptions*>(call.options.get());
 }
 
-inline const compute::StrptimeOptions* GetStrptimeOptions(const Expression::Call& call) {
-  if (call.function_name != "strptime") return nullptr;
-  return checked_cast<const compute::StrptimeOptions*>(call.options.get());
-}
-
 /// A helper for unboxing an Expression composed of associative function calls.
 /// Such expressions can frequently be rearranged to a semantically equivalent
 /// expression for more optimal execution or more straightforward manipulation.
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index 66212bf99d6..908e8962e43 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -27,6 +27,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/compute/exec/expression_internal.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/testing/gtest_util.h"
 
@@ -184,17 +185,43 @@ TEST(Expression, ToString) {
   auto in_12 = call("index_in", {field_ref("beta")},
                     compute::SetLookupOptions{ArrayFromJSON(int32(), "[1,2]")});
 
-  EXPECT_EQ(in_12.ToString(), "index_in(beta, value_set=[\n  1,\n  2\n])");
+  EXPECT_EQ(in_12.ToString(),
+            "index_in(beta, {value_set=int32:[\n  1,\n  2\n], skip_nulls=false})");
 
   EXPECT_EQ(and_(field_ref("a"), field_ref("b")).ToString(), "(a and b)");
   EXPECT_EQ(or_(field_ref("a"), field_ref("b")).ToString(), "(a or b)");
   EXPECT_EQ(not_(field_ref("a")).ToString(), "invert(a)");
 
-  EXPECT_EQ(cast(field_ref("a"), int32()).ToString(), "cast(a, to_type=int32)");
-  EXPECT_EQ(cast(field_ref("a"), nullptr).ToString(),
-            "cast(a, to_type=<INVALID NOT PROVIDED>)");
-
-  struct WidgetifyOptions : compute::FunctionOptions {
+  EXPECT_EQ(
+      cast(field_ref("a"), int32()).ToString(),
+      "cast(a, {to_type=int32, allow_int_overflow=false, allow_time_truncate=false, "
+      "allow_time_overflow=false, allow_decimal_truncate=false, "
+      "allow_float_truncate=false, allow_invalid_utf8=false})");
+  EXPECT_EQ(
+      cast(field_ref("a"), nullptr).ToString(),
+      "cast(a, {to_type=<NULLPTR>, allow_int_overflow=false, allow_time_truncate=false, "
+      "allow_time_overflow=false, allow_decimal_truncate=false, "
+      "allow_float_truncate=false, allow_invalid_utf8=false})");
+
+  class WidgetifyOptionsType : public FunctionOptionsType {
+   public:
+    static const FunctionOptionsType* GetInstance() {
+      static std::unique_ptr<FunctionOptionsType> instance(new WidgetifyOptionsType());
+      return instance.get();
+    }
+    const char* type_name() const override { return "widgetify"; }
+    std::string Stringify(const FunctionOptions& options) const override {
+      return type_name();
+    }
+    bool Compare(const FunctionOptions& options,
+                 const FunctionOptions& other) const override {
+      return true;
+    }
+  };
+  class WidgetifyOptions : public compute::FunctionOptions {
+   public:
+    explicit WidgetifyOptions(bool really = true)
+        : FunctionOptions(WidgetifyOptionsType::GetInstance()), really(really) {}
     bool really;
   };
 
@@ -202,7 +229,7 @@ TEST(Expression, ToString) {
   EXPECT_EQ(call("widgetify", {}).ToString(), "widgetif)");
   EXPECT_EQ(
       call("widgetify", {literal(1)}, std::make_shared<WidgetifyOptions>()).ToString(),
-      "widgetify(1, {NON-REPRESENTABLE OPTIONS})");
+      "widgetify(1, widgetify)");
 
   EXPECT_EQ(equal(field_ref("a"), literal(1)).ToString(), "(a == 1)");
   EXPECT_EQ(less(field_ref("a"), literal(2)).ToString(), "(a < 2)");
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index f2cd7d2a740..ae2c9446aa9 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -392,7 +392,7 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, int num_inputs,
 
 RecordBatchCollectNode* MakeRecordBatchCollectNode(
     ExecPlan* plan, std::string label, const std::shared_ptr<Schema>& schema) {
-  return internal::checked_cast<RecordBatchCollectNode*>(
+  return arrow::internal::checked_cast<RecordBatchCollectNode*>(
       plan->EmplaceNode<RecordBatchCollectNodeImpl>(plan, std::move(label), schema));
 }
 
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index c56e6471c97..8ce7e52d252 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/registry.h"
 #include "arrow/memory_pool.h"
@@ -50,6 +51,10 @@ using internal::checked_cast;
 namespace compute {
 namespace detail {
 
+using ::arrow::internal::BitmapEquals;
+using ::arrow::internal::CopyBitmap;
+using ::arrow::internal::CountSetBits;
+
 TEST(ExecContext, BasicWorkings) {
   {
     ExecContext ctx;
@@ -58,7 +63,7 @@ TEST(ExecContext, BasicWorkings) {
     ASSERT_EQ(std::numeric_limits<int64_t>::max(), ctx.exec_chunksize());
 
     ASSERT_TRUE(ctx.use_threads());
-    ASSERT_EQ(internal::CpuInfo::GetInstance(), ctx.cpu_info());
+    ASSERT_EQ(arrow::internal::CpuInfo::GetInstance(), ctx.cpu_info());
   }
 
   // Now, let's customize all the things
@@ -277,9 +282,9 @@ TEST_F(TestPropagateNulls, SingleValueWithNulls) {
 
     ASSERT_EQ(arr->Slice(offset)->null_count(), output.GetNullCount());
 
-    ASSERT_TRUE(internal::BitmapEquals(output.buffers[0]->data(), output.offset,
-                                       sliced->null_bitmap_data(), sliced->offset(),
-                                       output.length));
+    ASSERT_TRUE(BitmapEquals(output.buffers[0]->data(), output.offset,
+                             sliced->null_bitmap_data(), sliced->offset(),
+                             output.length));
     AssertValidityZeroExtraBits(output);
   };
 
@@ -372,8 +377,8 @@ TEST_F(TestPropagateNulls, IntersectsNulls) {
 
     const auto& out_buffer = *output.buffers[0];
 
-    ASSERT_TRUE(internal::BitmapEquals(out_buffer.data(), output_offset, ex_bitmap,
-                                       /*ex_offset=*/0, length));
+    ASSERT_TRUE(BitmapEquals(out_buffer.data(), output_offset, ex_bitmap,
+                             /*ex_offset=*/0, length));
 
     // Now check that the rest of the bits in out_buffer are still 0
     AssertValidityZeroExtraBits(output);
@@ -556,15 +561,14 @@ Status ExecComputedBitmap(KernelContext* ctx, const ExecBatch& batch, Datum* out
   const ArrayData& arg0 = *batch[0].array();
   ArrayData* out_arr = out->mutable_array();
 
-  if (internal::CountSetBits(arg0.buffers[0]->data(), arg0.offset, batch.length) > 0) {
+  if (CountSetBits(arg0.buffers[0]->data(), arg0.offset, batch.length) > 0) {
     // Check that the bitmap has not been already copied over
-    DCHECK(!internal::BitmapEquals(arg0.buffers[0]->data(), arg0.offset,
-                                   out_arr->buffers[0]->data(), out_arr->offset,
-                                   batch.length));
+    DCHECK(!BitmapEquals(arg0.buffers[0]->data(), arg0.offset,
+                         out_arr->buffers[0]->data(), out_arr->offset, batch.length));
   }
 
-  internal::CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
-                       out_arr->buffers[0]->mutable_data(), out_arr->offset);
+  CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
+             out_arr->buffers[0]->mutable_data(), out_arr->offset);
   return ExecCopy(ctx, batch, out);
 }
 
@@ -587,16 +591,33 @@ Status ExecNoPreallocatedAnything(KernelContext* ctx, const ExecBatch& batch,
   Status s = (ctx->AllocateBitmap(out_arr->length).Value(&out_arr->buffers[0]));
   DCHECK_OK(s);
   const ArrayData& arg0 = *batch[0].array();
-  internal::CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
-                       out_arr->buffers[0]->mutable_data(), /*offset=*/0);
+  CopyBitmap(arg0.buffers[0]->data(), arg0.offset, batch.length,
+             out_arr->buffers[0]->mutable_data(), /*offset=*/0);
 
   // Reuse the kernel that allocates the data
   return ExecNoPreallocatedData(ctx, batch, out);
 }
 
-struct ExampleOptions : public FunctionOptions {
+class ExampleOptionsType : public FunctionOptionsType {
+ public:
+  static const FunctionOptionsType* GetInstance() {
+    static std::unique_ptr<FunctionOptionsType> instance(new ExampleOptionsType());
+    return instance.get();
+  }
+  const char* type_name() const override { return "example"; }
+  std::string Stringify(const FunctionOptions& options) const override {
+    return type_name();
+  }
+  bool Compare(const FunctionOptions& options,
+               const FunctionOptions& other) const override {
+    return true;
+  }
+};
+class ExampleOptions : public FunctionOptions {
+ public:
+  explicit ExampleOptions(std::shared_ptr<Scalar> value)
+      : FunctionOptions(ExampleOptionsType::GetInstance()), value(std::move(value)) {}
   std::shared_ptr<Scalar> value;
-  explicit ExampleOptions(std::shared_ptr<Scalar> value) : value(std::move(value)) {}
 };
 
 struct ExampleState : public KernelState {
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index 0f94baaedfc..05d14d03b16 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -21,10 +21,13 @@
 #include <memory>
 #include <sstream>
 
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/kernels/common.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/util/cpu_info.h"
 
@@ -33,6 +36,38 @@ namespace arrow {
 using internal::checked_cast;
 
 namespace compute {
+Result<std::shared_ptr<Buffer>> FunctionOptionsType::Serialize(
+    const FunctionOptions&) const {
+  return Status::NotImplemented("Serialize for ", type_name());
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsType::Deserialize(
+    const Buffer& buffer) const {
+  return Status::NotImplemented("Deserialize for ", type_name());
+}
+
+std::string FunctionOptions::ToString() const { return options_type()->Stringify(*this); }
+
+bool FunctionOptions::Equals(const FunctionOptions& other) const {
+  if (this == &other) return true;
+  if (options_type() != other.options_type()) return false;
+  return options_type()->Compare(*this, other);
+}
+
+Result<std::shared_ptr<Buffer>> FunctionOptions::Serialize() const {
+  return options_type()->Serialize(*this);
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptions::Deserialize(
+    const std::string& type_name, const Buffer& buffer) {
+  ARROW_ASSIGN_OR_RAISE(auto options,
+                        GetFunctionRegistry()->GetFunctionOptionsType(type_name));
+  return options->Deserialize(buffer);
+}
+
+void PrintTo(const FunctionOptions& options, std::ostream* os) {
+  *os << options.ToString();
+}
 
 static const FunctionDoc kEmptyFunctionDoc{};
 
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 9a3e1c1852f..bd854bbb28e 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -29,6 +29,7 @@
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/compare.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
@@ -39,12 +40,50 @@ namespace compute {
 ///
 /// @{
 
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+};
+
 /// \brief Base class for specifying options configuring a function's behavior,
 /// such as error handling.
-struct ARROW_EXPORT FunctionOptions {
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
   virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  using util::EqualityComparable<FunctionOptions>::Equals;
+  using util::EqualityComparable<FunctionOptions>::operator==;
+  using util::EqualityComparable<FunctionOptions>::operator!=;
+  std::string ToString() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
 };
 
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
 /// \brief Contains the number of required arguments for the function.
 ///
 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
diff --git a/cpp/src/arrow/compute/function_internal.cc b/cpp/src/arrow/compute/function_internal.cc
new file mode 100644
index 00000000000..5234a421a7e
--- /dev/null
+++ b/cpp/src/arrow/compute/function_internal.cc
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/function_internal.h"
+
+#include "arrow/array/util.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/registry.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+using ::arrow::internal::checked_cast;
+
+constexpr char kTypeNameField[] = "_type_name";
+
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions& options) {
+  std::vector<std::string> field_names;
+  std::vector<std::shared_ptr<Scalar>> values;
+  const auto* options_type =
+      checked_cast<const GenericOptionsType*>(options.options_type());
+  RETURN_NOT_OK(options_type->ToStructScalar(options, &field_names, &values));
+  field_names.push_back(kTypeNameField);
+  const char* options_name = options.type_name();
+  values.emplace_back(
+      new BinaryScalar(Buffer::Wrap(options_name, std::strlen(options_name))));
+  return StructScalar::Make(std::move(values), std::move(field_names));
+}
+
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar& scalar) {
+  ARROW_ASSIGN_OR_RAISE(auto type_name_holder, scalar.field(kTypeNameField));
+  const std::string type_name =
+      checked_cast<const BinaryScalar&>(*type_name_holder).value->ToString();
+  ARROW_ASSIGN_OR_RAISE(auto raw_options_type,
+                        GetFunctionRegistry()->GetFunctionOptionsType(type_name));
+  const auto* options_type = checked_cast<const GenericOptionsType*>(raw_options_type);
+  return options_type->FromStructScalar(scalar);
+}
+
+Result<std::shared_ptr<Buffer>> GenericOptionsType::Serialize(
+    const FunctionOptions& options) const {
+  ARROW_ASSIGN_OR_RAISE(auto scalar, FunctionOptionsToStructScalar(options));
+  ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*scalar, 1));
+  auto batch =
+      RecordBatch::Make(schema({field("", array->type())}), /*num_rows=*/1, {array});
+  ARROW_ASSIGN_OR_RAISE(auto stream, io::BufferOutputStream::Create());
+  ARROW_ASSIGN_OR_RAISE(auto writer, ipc::MakeFileWriter(stream, batch->schema()));
+  RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+  RETURN_NOT_OK(writer->Close());
+  return stream->Finish();
+}
+
+Result<std::unique_ptr<FunctionOptions>> GenericOptionsType::Deserialize(
+    const Buffer& buffer) const {
+  return DeserializeFunctionOptions(buffer);
+}
+
+Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(
+    const Buffer& buffer) {
+  io::BufferReader stream(buffer);
+  ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream));
+  ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0));
+  if (batch->num_rows() != 1) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a single row - had ",
+        batch->num_rows());
+  }
+  if (batch->num_columns() != 1) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a single column - had ",
+        batch->num_columns());
+  }
+  auto column = batch->column(0);
+  if (column->type()->id() != Type::STRUCT) {
+    return Status::Invalid(
+        "serialized FunctionOptions's batch repr was not a struct column - was ",
+        column->type()->ToString());
+  }
+  ARROW_ASSIGN_OR_RAISE(auto raw_scalar,
+                        checked_cast<const StructArray&>(*column).GetScalar(0));
+  auto scalar = checked_cast<const StructScalar&>(*raw_scalar);
+  return FunctionOptionsFromStructScalar(scalar);
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h
new file mode 100644
index 00000000000..fdd7f09ba1f
--- /dev/null
+++ b/cpp/src/arrow/compute/function_internal.h
@@ -0,0 +1,626 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+
+namespace internal {
+template <>
+struct EnumTraits<compute::SortOrder>
+    : BasicEnumTraits<compute::SortOrder, compute::SortOrder::Ascending,
+                      compute::SortOrder::Descending> {
+  static std::string name() { return "SortOrder"; }
+  static std::string value_name(compute::SortOrder value) {
+    switch (value) {
+      case compute::SortOrder::Ascending:
+        return "Ascending";
+      case compute::SortOrder::Descending:
+        return "Descending";
+    }
+    return "<INVALID>";
+  }
+};
+}  // namespace internal
+
+namespace compute {
+namespace internal {
+
+using arrow::internal::EnumTraits;
+using arrow::internal::has_enum_traits;
+
+template <typename Enum, typename CType = typename std::underlying_type<Enum>::type>
+Result<Enum> ValidateEnumValue(CType raw) {
+  for (auto valid : EnumTraits<Enum>::values()) {
+    if (raw == static_cast<CType>(valid)) {
+      return static_cast<Enum>(raw);
+    }
+  }
+  return Status::Invalid("Invalid value for ", EnumTraits<Enum>::name(), ": ", raw);
+}
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(const Buffer& buffer);
+
+template <typename T>
+static inline enable_if_t<!has_enum_traits<T>::value, std::string> GenericToString(
+    const T& value) {
+  std::stringstream ss;
+  ss << value;
+  return ss.str();
+}
+
+static inline std::string GenericToString(bool value) { return value ? "true" : "false"; }
+
+static inline std::string GenericToString(const std::string& value) {
+  std::stringstream ss;
+  ss << '"' << value << '"';
+  return ss.str();
+}
+
+template <typename T>
+static inline enable_if_t<has_enum_traits<T>::value, std::string> GenericToString(
+    const T value) {
+  return EnumTraits<T>::value_name(value);
+}
+
+template <typename T>
+static inline std::string GenericToString(const std::shared_ptr<T>& value) {
+  std::stringstream ss;
+  return value ? value->ToString() : "<NULLPTR>";
+}
+
+static inline std::string GenericToString(const std::shared_ptr<Scalar>& value) {
+  std::stringstream ss;
+  ss << value->type->ToString() << ":" << value->ToString();
+  return ss.str();
+}
+
+static inline std::string GenericToString(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  std::stringstream ss;
+  ss << "KeyValueMetadata{";
+  if (value) {
+    bool first = true;
+    for (const auto& pair : value->sorted_pairs()) {
+      if (!first) ss << ", ";
+      first = false;
+      ss << pair.first << ':' << pair.second;
+    }
+  }
+  ss << '}';
+  return ss.str();
+}
+
+static inline std::string GenericToString(const Datum& value) {
+  switch (value.kind()) {
+    case Datum::NONE:
+      return "<NULL DATUM>";
+    case Datum::SCALAR:
+      return GenericToString(value.scalar());
+    case Datum::ARRAY: {
+      std::stringstream ss;
+      ss << value.type()->ToString() << ':' << value.make_array()->ToString();
+      return ss.str();
+    }
+    case Datum::CHUNKED_ARRAY:
+    case Datum::RECORD_BATCH:
+    case Datum::TABLE:
+    case Datum::COLLECTION:
+      return value.ToString();
+  }
+  return value.ToString();
+}
+
+template <typename T>
+static inline std::string GenericToString(const std::vector<T>& value) {
+  std::stringstream ss;
+  ss << "[";
+  bool first = true;
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    if (!first) ss << ", ";
+    first = false;
+    ss << GenericToString(*it);
+  }
+  ss << ']';
+  return ss.str();
+}
+
+static inline std::string GenericToString(SortOrder value) {
+  switch (value) {
+    case SortOrder::Ascending:
+      return "Ascending";
+    case SortOrder::Descending:
+      return "Descending";
+  }
+  return "<INVALID SORT ORDER>";
+}
+
+static inline std::string GenericToString(const std::vector<SortKey>& value) {
+  std::stringstream ss;
+  ss << '[';
+  bool first = true;
+  for (const auto& key : value) {
+    if (!first) {
+      ss << ", ";
+    }
+    first = false;
+    ss << key.ToString();
+  }
+  ss << ']';
+  return ss.str();
+}
+
+template <typename T>
+static inline bool GenericEquals(const T& left, const T& right) {
+  return left == right;
+}
+
+template <typename T>
+static inline bool GenericEquals(const std::shared_ptr<T>& left,
+                                 const std::shared_ptr<T>& right) {
+  if (left && right) {
+    return left->Equals(*right);
+  }
+  return left == right;
+}
+
+static inline bool IsEmpty(const std::shared_ptr<const KeyValueMetadata>& meta) {
+  return !meta || meta->size() == 0;
+}
+
+static inline bool GenericEquals(const std::shared_ptr<const KeyValueMetadata>& left,
+                                 const std::shared_ptr<const KeyValueMetadata>& right) {
+  // Special case since null metadata is considered equivalent to empty
+  if (IsEmpty(left) || IsEmpty(right)) {
+    return IsEmpty(left) && IsEmpty(right);
+  }
+  return left->Equals(*right);
+}
+
+template <typename T>
+static inline bool GenericEquals(const std::vector<T>& left,
+                                 const std::vector<T>& right) {
+  if (left.size() != right.size()) return false;
+  for (size_t i = 0; i < left.size(); i++) {
+    if (!GenericEquals(left[i], right[i])) return false;
+  }
+  return true;
+}
+
+template <typename T>
+static inline decltype(TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton())
+GenericTypeSingleton() {
+  return TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton();
+}
+
+template <typename T>
+static inline enable_if_same<T, std::shared_ptr<const KeyValueMetadata>,
+                             std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return map(binary(), binary());
+}
+
+template <typename T>
+static inline enable_if_t<has_enum_traits<T>::value, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return TypeTraits<typename EnumTraits<T>::Type>::type_singleton();
+}
+
+template <typename T>
+static inline enable_if_same<T, SortKey, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  std::vector<std::shared_ptr<Field>> fields;
+  fields.emplace_back(new Field("name", GenericTypeSingleton<std::string>()));
+  fields.emplace_back(new Field("order", GenericTypeSingleton<SortOrder>()));
+  return std::make_shared<StructType>(std::move(fields));
+}
+
+// N.B. ordering of overloads is relatively fragile
+template <typename T>
+static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar(
+    const T& value) {
+  return MakeScalar(value);
+}
+
+// For Clang/libc++: when iterating through vector<bool>, we can't
+// pass it by reference so the overload above doesn't apply
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(bool value) {
+  return MakeScalar(value);
+}
+
+template <typename T, typename Enable = enable_if_t<has_enum_traits<T>::value>>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const T value) {
+  using CType = typename EnumTraits<T>::CType;
+  return GenericToScalar(static_cast<CType>(value));
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const SortKey& value) {
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericToScalar(value.name));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericToScalar(value.order));
+  return StructScalar::Make({name, order}, {"name", "order"});
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), ty, &builder));
+  auto* map_builder = checked_cast<MapBuilder*>(builder.get());
+  auto* key_builder = checked_cast<BinaryBuilder*>(map_builder->key_builder());
+  auto* item_builder = checked_cast<BinaryBuilder*>(map_builder->item_builder());
+  RETURN_NOT_OK(map_builder->Append());
+  if (value) {
+    RETURN_NOT_OK(key_builder->AppendValues(value->keys()));
+    RETURN_NOT_OK(item_builder->AppendValues(value->values()));
+  }
+  std::shared_ptr<Array> arr;
+  RETURN_NOT_OK(map_builder->Finish(&arr));
+  return arr->GetScalar(0);
+}
+
+template <typename T>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::vector<T>& value) {
+  std::shared_ptr<DataType> type = GenericTypeSingleton<T>();
+  std::vector<std::shared_ptr<Scalar>> scalars;
+  scalars.reserve(value.size());
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, GenericToScalar(*it));
+    scalars.push_back(std::move(scalar));
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(
+      MakeBuilder(default_memory_pool(), type ? type : scalars[0]->type, &builder));
+  RETURN_NOT_OK(builder->AppendScalars(scalars));
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(builder->Finish(&out));
+  return std::make_shared<ListScalar>(std::move(out));
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<DataType>& value) {
+  if (!value) {
+    return Status::Invalid("shared_ptr<DataType> is nullptr");
+  }
+  return MakeNullScalar(value);
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Array>& value) {
+  return std::make_shared<ListScalar>(value);
+}
+
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const Datum& value) {
+  // TODO(ARROW-9434): store in a union instead.
+  switch (value.kind()) {
+    case Datum::ARRAY:
+      return GenericToScalar(value.make_array());
+      break;
+    default:
+      return Status::NotImplemented("Cannot serialize Datum kind ", value.kind());
+  }
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename CTypeTraits<T>::ArrowType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ArrowType = typename CTypeTraits<T>::ArrowType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+  if (value->type->id() != ArrowType::type_id) {
+    return Status::Invalid("Expected type ", ArrowType::type_id, " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const ScalarType&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value;
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename EnumTraits<T>::Type, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  ARROW_ASSIGN_OR_RAISE(auto raw_val,
+                        GenericFromScalar<typename EnumTraits<T>::CType>(value));
+  return ValidateEnumValue<T>(raw_val);
+}
+
+template <typename T, typename U>
+using enable_if_same_result = enable_if_same<T, U, Result<T>>;
+
+template <typename T>
+static inline enable_if_same_result<T, std::string> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (!is_base_binary_like(value->type->id())) {
+    return Status::Invalid("Expected binary-like type but got ", value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseBinaryScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value->ToString();
+}
+
+template <typename T>
+static inline enable_if_same_result<T, SortKey> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() != Type::STRUCT) {
+    return Status::Invalid("Expected type STRUCT but got ", value->type->id());
+  }
+  if (!value->is_valid) return Status::Invalid("Got null scalar");
+  const auto& holder = checked_cast<const StructScalar&>(*value);
+  ARROW_ASSIGN_OR_RAISE(auto name_holder, holder.field("name"));
+  ARROW_ASSIGN_OR_RAISE(auto order_holder, holder.field("order"));
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericFromScalar<std::string>(name_holder));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericFromScalar<SortOrder>(order_holder));
+  return SortKey{std::move(name), order};
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<Scalar>> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<const KeyValueMetadata>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  if (!value->type->Equals(ty)) {
+    return Status::Invalid("Expected ", ty->ToString(), " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const MapScalar&>(*value);
+  std::vector<std::string> keys;
+  std::vector<std::string> values;
+  const auto& list = checked_cast<const StructArray&>(*holder.value);
+  const auto& key_arr = checked_cast<const BinaryArray&>(*list.field(0));
+  const auto& value_arr = checked_cast<const BinaryArray&>(*list.field(1));
+  for (int64_t i = 0; i < list.length(); i++) {
+    keys.push_back(key_arr.GetString(i));
+    values.push_back(value_arr.GetString(i));
+  }
+  return key_value_metadata(std::move(keys), std::move(values));
+}
+
+template <typename T>
+static inline enable_if_same_result<T, Datum> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() == Type::LIST) {
+    const auto& holder = checked_cast<const BaseListScalar&>(*value);
+    return holder.value;
+  }
+  // TODO(ARROW-9434): handle other possible datum kinds by looking for a union
+  return Status::Invalid("Cannot deserialize Datum from ", value->ToString());
+}
+
+template <typename T>
+static enable_if_same<typename CTypeTraits<T>::ArrowType, ListType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ValueType = typename T::value_type;
+  if (value->type->id() != Type::LIST) {
+    return Status::Invalid("Expected type LIST but got ", value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseListScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  std::vector<ValueType> result;
+  for (int i = 0; i < holder.value->length(); i++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, holder.value->GetScalar(i));
+    ARROW_ASSIGN_OR_RAISE(auto v, GenericFromScalar<ValueType>(scalar));
+    result.push_back(std::move(v));
+  }
+  return result;
+}
+
+template <typename Options>
+struct StringifyImpl {
+  template <typename Tuple>
+  StringifyImpl(const Options& obj, const Tuple& props)
+      : obj_(obj), members_(props.size()) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    std::stringstream ss;
+    ss << prop.name() << '=' << GenericToString(prop.get(obj_));
+    members_[i] = ss.str();
+  }
+
+  std::string Finish() {
+    return "{" + arrow::internal::JoinStrings(members_, ", ") + "}";
+  }
+
+  const Options& obj_;
+  std::vector<std::string> members_;
+};
+
+template <typename Options>
+struct CompareImpl {
+  template <typename Tuple>
+  CompareImpl(const Options& l, const Options& r, const Tuple& props)
+      : left_(l), right_(r) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    equal_ &= GenericEquals(prop.get(left_), prop.get(right_));
+  }
+
+  const Options& left_;
+  const Options& right_;
+  bool equal_ = true;
+};
+
+template <typename Options>
+struct ToStructScalarImpl {
+  template <typename Tuple>
+  ToStructScalarImpl(const Options& obj, const Tuple& props,
+                     std::vector<std::string>* field_names,
+                     std::vector<std::shared_ptr<Scalar>>* values)
+      : obj_(obj), field_names_(field_names), values_(values) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto result = GenericToScalar(prop.get(obj_));
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Could not serialize field ", prop.name(),
+                                            " of options type ", Options::kTypeName, ": ",
+                                            result.status().message());
+      return;
+    }
+    field_names_->emplace_back(prop.name());
+    values_->push_back(result.MoveValueUnsafe());
+  }
+
+  const Options& obj_;
+  Status status_;
+  std::vector<std::string>* field_names_;
+  std::vector<std::shared_ptr<Scalar>>* values_;
+};
+
+template <typename Options>
+struct FromStructScalarImpl {
+  template <typename Tuple>
+  FromStructScalarImpl(Options* obj, const StructScalar& scalar, const Tuple& props)
+      : obj_(obj), scalar_(scalar) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto maybe_holder = scalar_.field(std::string(prop.name()));
+    if (!maybe_holder.ok()) {
+      status_ = maybe_holder.status().WithMessage(
+          "Cannot deserialize field ", prop.name(), " of options type ",
+          Options::kTypeName, ": ", maybe_holder.status().message());
+      return;
+    }
+    auto holder = maybe_holder.MoveValueUnsafe();
+    auto result = GenericFromScalar<typename Property::Type>(holder);
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Cannot deserialize field ", prop.name(),
+                                            " of options type ", Options::kTypeName, ": ",
+                                            result.status().message());
+      return;
+    }
+    prop.set(obj_, result.MoveValueUnsafe());
+  }
+
+  Options* obj_;
+  Status status_;
+  const StructScalar& scalar_;
+};
+
+template <typename Options, typename... Properties>
+const FunctionOptionsType* GetFunctionOptionsType(const Properties&... properties) {
+  static const class OptionsType : public GenericOptionsType {
+   public:
+    explicit OptionsType(const arrow::internal::PropertyTuple<Properties...> properties)
+        : properties_(properties) {}
+
+    const char* type_name() const override { return Options::kTypeName; }
+
+    std::string Stringify(const FunctionOptions& options) const override {
+      const auto& self = checked_cast<const Options&>(options);
+      return StringifyImpl<Options>(self, properties_).Finish();
+    }
+    bool Compare(const FunctionOptions& options,
+                 const FunctionOptions& other) const override {
+      const auto& lhs = checked_cast<const Options&>(options);
+      const auto& rhs = checked_cast<const Options&>(other);
+      return CompareImpl<Options>(lhs, rhs, properties_).equal_;
+    }
+    Status ToStructScalar(const FunctionOptions& options,
+                          std::vector<std::string>* field_names,
+                          std::vector<std::shared_ptr<Scalar>>* values) const override {
+      const auto& self = checked_cast<const Options&>(options);
+      RETURN_NOT_OK(
+          ToStructScalarImpl<Options>(self, properties_, field_names, values).status_);
+      return Status::OK();
+    }
+    Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+        const StructScalar& scalar) const override {
+      auto options = std::unique_ptr<Options>(new Options());
+      RETURN_NOT_OK(
+          FromStructScalarImpl<Options>(options.get(), scalar, properties_).status_);
+      return std::move(options);
+    }
+
+   private:
+    const arrow::internal::PropertyTuple<Properties...> properties_;
+  } instance(arrow::internal::MakeProperties(properties...));
+  return &instance;
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index 581555e931f..4c42ce39600 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -21,16 +21,113 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/datum.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 namespace compute {
 
+TEST(FunctionOptions, Equality) {
+  std::vector<std::shared_ptr<FunctionOptions>> options;
+  options.emplace_back(new ScalarAggregateOptions());
+  options.emplace_back(new ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
+  options.emplace_back(new ModeOptions());
+  options.emplace_back(new ModeOptions(/*n=*/2));
+  options.emplace_back(new VarianceOptions());
+  options.emplace_back(new VarianceOptions(/*ddof=*/2));
+  options.emplace_back(new QuantileOptions());
+  options.emplace_back(
+      new QuantileOptions(/*q=*/0.75, QuantileOptions::Interpolation::MIDPOINT));
+  options.emplace_back(new TDigestOptions());
+  options.emplace_back(
+      new TDigestOptions(/*q=*/0.75, /*delta=*/50, /*buffer_size=*/1024));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(int64(), "16")));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "true")));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "null")));
+  options.emplace_back(new ElementWiseAggregateOptions());
+  options.emplace_back(new ElementWiseAggregateOptions(/*skip_nulls=*/false));
+  options.emplace_back(new JoinOptions());
+  options.emplace_back(new JoinOptions(JoinOptions::REPLACE, "replacement"));
+  options.emplace_back(new MatchSubstringOptions("pattern"));
+  options.emplace_back(new MatchSubstringOptions("pattern", /*ignore_case=*/true));
+  options.emplace_back(new SplitOptions());
+  options.emplace_back(new SplitOptions(/*max_splits=*/2, /*reverse=*/true));
+  options.emplace_back(new SplitPatternOptions("pattern"));
+  options.emplace_back(
+      new SplitPatternOptions("pattern", /*max_splits=*/2, /*reverse=*/true));
+  options.emplace_back(new ReplaceSubstringOptions("pattern", "replacement"));
+  options.emplace_back(
+      new ReplaceSubstringOptions("pattern", "replacement", /*max_replacements=*/2));
+  options.emplace_back(new ReplaceSliceOptions(0, 1, "foo"));
+  options.emplace_back(new ReplaceSliceOptions(1, -1, "bar"));
+  options.emplace_back(new ExtractRegexOptions("pattern"));
+  options.emplace_back(new ExtractRegexOptions("pattern2"));
+  options.emplace_back(new SetLookupOptions(ArrayFromJSON(int64(), "[1, 2, 3, 4]")));
+  options.emplace_back(new SetLookupOptions(ArrayFromJSON(boolean(), "[true, false]")));
+  options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::MILLI));
+  options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::NANO));
+  options.emplace_back(new PadOptions(5, " "));
+  options.emplace_back(new PadOptions(10, "A"));
+  options.emplace_back(new TrimOptions(" "));
+  options.emplace_back(new TrimOptions("abc"));
+  options.emplace_back(new SliceOptions(/*start=*/1));
+  options.emplace_back(new SliceOptions(/*start=*/1, /*stop=*/-5, /*step=*/-2));
+  options.emplace_back(new CompareOptions(CompareOperator::EQUAL));
+  options.emplace_back(new CompareOptions(CompareOperator::LESS));
+  // N.B. we never actually use field_nullability or field_metadata in Arrow
+  options.emplace_back(new ProjectOptions({"col1"}, {true}, {}));
+  options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
+  options.emplace_back(
+      new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
+  options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
+  options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
+  options.emplace_back(new FilterOptions());
+  options.emplace_back(
+      new FilterOptions(FilterOptions::NullSelectionBehavior::EMIT_NULL));
+  options.emplace_back(new TakeOptions());
+  options.emplace_back(new TakeOptions(/*boundscheck=*/false));
+  options.emplace_back(new DictionaryEncodeOptions());
+  options.emplace_back(
+      new DictionaryEncodeOptions(DictionaryEncodeOptions::NullEncodingBehavior::ENCODE));
+  options.emplace_back(new ArraySortOptions());
+  options.emplace_back(new ArraySortOptions(SortOrder::Descending));
+  options.emplace_back(new SortOptions());
+  options.emplace_back(new SortOptions({SortKey("key", SortOrder::Ascending)}));
+  options.emplace_back(new SortOptions(
+      {SortKey("key", SortOrder::Descending), SortKey("value", SortOrder::Descending)}));
+  options.emplace_back(new PartitionNthOptions(/*pivot=*/0));
+  options.emplace_back(new PartitionNthOptions(/*pivot=*/42));
+
+  for (size_t i = 0; i < options.size(); i++) {
+    const size_t prev_i = i == 0 ? options.size() - 1 : i - 1;
+    const FunctionOptions& cur = *options[i];
+    const FunctionOptions& prev = *options[prev_i];
+    SCOPED_TRACE(cur.type_name());
+    SCOPED_TRACE(cur.ToString());
+    ASSERT_EQ(cur, cur);
+    ASSERT_NE(cur, prev);
+    ASSERT_NE(prev, cur);
+    ASSERT_NE("", cur.ToString());
+
+    ASSERT_OK_AND_ASSIGN(auto serialized, cur.Serialize());
+    const auto* type_name = cur.type_name();
+    ASSERT_OK_AND_ASSIGN(
+        auto deserialized,
+        FunctionOptions::Deserialize(std::string(type_name, std::strlen(type_name)),
+                                     *serialized));
+    ASSERT_TRUE(cur.Equals(*deserialized));
+  }
+}
+
 struct ExecBatch;
 
 TEST(Arity, Basics) {
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index f8d15952e73..c88c924817c 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -41,7 +41,7 @@
 namespace arrow {
 namespace compute {
 
-struct FunctionOptions;
+class FunctionOptions;
 
 /// \brief Base class for opaque kernel-specific state. For example, if there
 /// is some kind of initialization required.
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 673802f99b0..8a0d9e62518 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -24,8 +24,10 @@
 #include <utility>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry_internal.h"
 #include "arrow/status.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace compute {
@@ -57,6 +59,20 @@ class FunctionRegistry::FunctionRegistryImpl {
     return Status::OK();
   }
 
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false) {
+    std::lock_guard<std::mutex> mutation_guard(lock_);
+
+    const std::string name = options_type->type_name();
+    auto it = name_to_options_type_.find(name);
+    if (it != name_to_options_type_.end() && !allow_overwrite) {
+      return Status::KeyError(
+          "Already have a function options type registered with name: ", name);
+    }
+    name_to_options_type_[name] = options_type;
+    return Status::OK();
+  }
+
   Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const {
     auto it = name_to_function_.find(name);
     if (it == name_to_function_.end()) {
@@ -74,11 +90,21 @@ class FunctionRegistry::FunctionRegistryImpl {
     return results;
   }
 
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const {
+    auto it = name_to_options_type_.find(name);
+    if (it == name_to_options_type_.end()) {
+      return Status::KeyError("No function options type registered with name: ", name);
+    }
+    return it->second;
+  }
+
   int num_functions() const { return static_cast<int>(name_to_function_.size()); }
 
  private:
   std::mutex lock_;
   std::unordered_map<std::string, std::shared_ptr<Function>> name_to_function_;
+  std::unordered_map<std::string, const FunctionOptionsType*> name_to_options_type_;
 };
 
 std::unique_ptr<FunctionRegistry> FunctionRegistry::Make() {
@@ -99,6 +125,11 @@ Status FunctionRegistry::AddAlias(const std::string& target_name,
   return impl_->AddAlias(target_name, source_name);
 }
 
+Status FunctionRegistry::AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                                bool allow_overwrite) {
+  return impl_->AddFunctionOptionsType(options_type, allow_overwrite);
+}
+
 Result<std::shared_ptr<Function>> FunctionRegistry::GetFunction(
     const std::string& name) const {
   return impl_->GetFunction(name);
@@ -108,6 +139,11 @@ std::vector<std::string> FunctionRegistry::GetFunctionNames() const {
   return impl_->GetFunctionNames();
 }
 
+Result<const FunctionOptionsType*> FunctionRegistry::GetFunctionOptionsType(
+    const std::string& name) const {
+  return impl_->GetFunctionOptionsType(name);
+}
+
 int FunctionRegistry::num_functions() const { return impl_->num_functions(); }
 
 namespace internal {
@@ -128,12 +164,16 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarIfElse(registry.get());
   RegisterScalarTemporal(registry.get());
 
+  RegisterScalarOptions(registry.get());
+
   // Vector functions
   RegisterVectorHash(registry.get());
   RegisterVectorSelection(registry.get());
   RegisterVectorNested(registry.get());
   RegisterVectorSort(registry.get());
 
+  RegisterVectorOptions(registry.get());
+
   // Aggregate functions
   RegisterScalarAggregateBasic(registry.get());
   RegisterScalarAggregateMode(registry.get());
@@ -142,6 +182,8 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterScalarAggregateVariance(registry.get());
   RegisterHashAggregateBasic(registry.get());
 
+  RegisterAggregateOptions(registry.get());
+
   return registry;
 }
 
diff --git a/cpp/src/arrow/compute/registry.h b/cpp/src/arrow/compute/registry.h
index b4456dc5b6b..e83036db6ac 100644
--- a/cpp/src/arrow/compute/registry.h
+++ b/cpp/src/arrow/compute/registry.h
@@ -32,6 +32,7 @@ namespace arrow {
 namespace compute {
 
 class Function;
+class FunctionOptionsType;
 
 /// \brief A mutable central function registry for built-in functions as well
 /// as user-defined functions. Functions are implementations of
@@ -58,6 +59,11 @@ class ARROW_EXPORT FunctionRegistry {
   /// function with the given name is not registered
   Status AddAlias(const std::string& target_name, const std::string& source_name);
 
+  /// \brief Add a new function options type to the registry. Returns Status::KeyError if
+  /// a function options type with the same name is already registered
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false);
+
   /// \brief Retrieve a function by name from the registry
   Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const;
 
@@ -65,6 +71,10 @@ class ARROW_EXPORT FunctionRegistry {
   /// displaying a manifest of available functions
   std::vector<std::string> GetFunctionNames() const;
 
+  /// \brief Retrieve a function options type by name from the registry
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const;
+
   /// \brief The number of currently registered functions
   int num_functions() const;
 
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index 68e0f2207f1..dd0271eb43d 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -37,12 +37,16 @@ void RegisterScalarFillNull(FunctionRegistry* registry);
 void RegisterScalarIfElse(FunctionRegistry* registry);
 void RegisterScalarTemporal(FunctionRegistry* registry);
 
+void RegisterScalarOptions(FunctionRegistry* registry);
+
 // Vector functions
 void RegisterVectorHash(FunctionRegistry* registry);
 void RegisterVectorSelection(FunctionRegistry* registry);
 void RegisterVectorNested(FunctionRegistry* registry);
 void RegisterVectorSort(FunctionRegistry* registry);
 
+void RegisterVectorOptions(FunctionRegistry* registry);
+
 // Aggregate functions
 void RegisterScalarAggregateBasic(FunctionRegistry* registry);
 void RegisterScalarAggregateMode(FunctionRegistry* registry);
@@ -51,6 +55,8 @@ void RegisterScalarAggregateTDigest(FunctionRegistry* registry);
 void RegisterScalarAggregateVariance(FunctionRegistry* registry);
 void RegisterHashAggregateBasic(FunctionRegistry* registry);
 
+void RegisterAggregateOptions(FunctionRegistry* registry);
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 5370837f1b9..8a0d6de7f25 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -25,9 +25,9 @@ struct ValueDescr;
 namespace compute {
 
 class Function;
-struct FunctionOptions;
+class FunctionOptions;
 
-struct CastOptions;
+class CastOptions;
 
 struct ExecBatch;
 class ExecContext;
diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc
index 71fad394d00..33371d55c6d 100644
--- a/cpp/src/arrow/testing/generator.cc
+++ b/cpp/src/arrow/testing/generator.cc
@@ -95,88 +95,16 @@ std::shared_ptr<arrow::Array> ConstantArrayGenerator::String(int64_t size,
   return ConstantArray<StringType>(size, value);
 }
 
-struct ScalarVectorToArrayImpl {
-  template <typename T, typename AppendScalar,
-            typename BuilderType = typename TypeTraits<T>::BuilderType,
-            typename ScalarType = typename TypeTraits<T>::ScalarType>
-  Status UseBuilder(const AppendScalar& append) {
-    BuilderType builder(type_, default_memory_pool());
-    for (const auto& s : scalars_) {
-      if (s->is_valid) {
-        RETURN_NOT_OK(append(internal::checked_cast<const ScalarType&>(*s), &builder));
-      } else {
-        RETURN_NOT_OK(builder.AppendNull());
-      }
-    }
-    return builder.FinishInternal(&data_);
-  }
-
-  struct AppendValue {
-    template <typename BuilderType, typename ScalarType>
-    Status operator()(const ScalarType& s, BuilderType* builder) const {
-      return builder->Append(s.value);
-    }
-  };
-
-  struct AppendBuffer {
-    template <typename BuilderType, typename ScalarType>
-    Status operator()(const ScalarType& s, BuilderType* builder) const {
-      const Buffer& buffer = *s.value;
-      return builder->Append(util::string_view{buffer});
-    }
-  };
-
-  template <typename T>
-  enable_if_primitive_ctype<T, Status> Visit(const T&) {
-    return UseBuilder<T>(AppendValue{});
-  }
-
-  template <typename T>
-  enable_if_has_string_view<T, Status> Visit(const T&) {
-    return UseBuilder<T>(AppendBuffer{});
-  }
-
-  Status Visit(const StructType& type) {
-    data_ = ArrayData::Make(type_, static_cast<int64_t>(scalars_.size()),
-                            {/*null_bitmap=*/nullptr});
-    data_->child_data.resize(type_->num_fields());
-
-    ScalarVector field_scalars(scalars_.size());
-
-    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
-      for (size_t i = 0; i < scalars_.size(); ++i) {
-        field_scalars[i] =
-            internal::checked_cast<StructScalar*>(scalars_[i].get())->value[field_index];
-      }
-
-      ARROW_ASSIGN_OR_RAISE(data_->child_data[field_index],
-                            ScalarVectorToArrayImpl{}.Convert(field_scalars));
-    }
-    return Status::OK();
-  }
-
-  Status Visit(const DataType& type) {
-    return Status::NotImplemented("ScalarVectorToArray for type ", type);
-  }
-
-  Result<std::shared_ptr<ArrayData>> Convert(const ScalarVector& scalars) && {
-    if (scalars.size() == 0) {
-      return Status::NotImplemented("ScalarVectorToArray with no scalars");
-    }
-    scalars_ = std::move(scalars);
-    type_ = scalars_[0]->type;
-    RETURN_NOT_OK(VisitTypeInline(*type_, this));
-    return std::move(data_);
-  }
-
-  std::shared_ptr<DataType> type_;
-  ScalarVector scalars_;
-  std::shared_ptr<ArrayData> data_;
-};
-
 Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars) {
-  ARROW_ASSIGN_OR_RAISE(auto data, ScalarVectorToArrayImpl{}.Convert(scalars));
-  return MakeArray(std::move(data));
+  if (scalars.empty()) {
+    return Status::NotImplemented("ScalarVectorToArray with no scalars");
+  }
+  std::unique_ptr<arrow::ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), scalars[0]->type, &builder));
+  RETURN_NOT_OK(builder->AppendScalars(scalars));
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(builder->Finish(&out));
+  return out;
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_internal.h b/cpp/src/arrow/util/reflection_internal.h
index 522815dd2be..0440a2eb563 100644
--- a/cpp/src/arrow/util/reflection_internal.h
+++ b/cpp/src/arrow/util/reflection_internal.h
@@ -21,6 +21,7 @@
 #include <tuple>
 #include <utility>
 
+#include "arrow/type_traits.h"
 #include "arrow/util/string_view.h"
 
 namespace arrow {
@@ -112,5 +113,21 @@ PropertyTuple<Properties...> MakeProperties(Properties... props) {
   return {std::make_tuple(props...)};
 }
 
+template <typename Enum>
+struct EnumTraits {};
+
+template <typename Enum, Enum... Values>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+  static std::array<Enum, sizeof...(Values)> values() { return {Values...}; }
+};
+
+template <typename T, typename Enable = void>
+struct has_enum_traits : std::false_type {};
+
+template <typename T>
+struct has_enum_traits<T, void_t<typename EnumTraits<T>::Type>> : std::true_type {};
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_test.cc b/cpp/src/arrow/util/reflection_test.cc
index 4ffcf679ecc..fb3d3b8fb02 100644
--- a/cpp/src/arrow/util/reflection_test.cc
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -193,5 +193,32 @@ TEST(Reflection, FromStringToDataMembers) {
   EXPECT_EQ(PersonFromString("Person{age: 19, name: Genos}"), util::nullopt);
 }
 
+enum class PersonType : int8_t {
+  EMPLOYEE,
+  CONTRACTOR,
+};
+
+template <>
+struct EnumTraits<PersonType>
+    : BasicEnumTraits<PersonType, PersonType::EMPLOYEE, PersonType::CONTRACTOR> {
+  static std::string name() { return "PersonType"; }
+  static std::string value_name(PersonType value) {
+    switch (value) {
+      case PersonType::EMPLOYEE:
+        return "EMPLOYEE";
+      case PersonType::CONTRACTOR:
+        return "CONTRACTOR";
+    }
+    return "<INVALID>";
+  }
+};
+
+TEST(Reflection, EnumTraits) {
+  static_assert(!has_enum_traits<Person>::value, "");
+  static_assert(has_enum_traits<PersonType>::value, "");
+  static_assert(std::is_same<EnumTraits<PersonType>::CType, int8_t>::value, "");
+  static_assert(std::is_same<EnumTraits<PersonType>::Type, Int8Type>::value, "");
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/python/pyarrow/_compute.pxd b/python/pyarrow/_compute.pxd
index e187ed75b69..8358271efa7 100644
--- a/python/pyarrow/_compute.pxd
+++ b/python/pyarrow/_compute.pxd
@@ -23,5 +23,8 @@ from pyarrow.includes.libarrow cimport *
 
 
 cdef class FunctionOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CFunctionOptions] wrapped
 
     cdef const CFunctionOptions* get_options(self) except NULL
+    cdef void init(self, unique_ptr[CFunctionOptions] options)
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index ae08a5596f3..c8393103dc5 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -526,9 +526,70 @@ def call_function(name, args, options=None, memory_pool=None):
 
 
 cdef class FunctionOptions(_Weakrefable):
+    __slots__ = ()  # avoid mistakingly creating attributes
 
     cdef const CFunctionOptions* get_options(self) except NULL:
-        raise NotImplementedError("Unimplemented base options")
+        return self.wrapped.get()
+
+    cdef void init(self, unique_ptr[CFunctionOptions] options):
+        self.wrapped = move(options)
+
+    def serialize(self):
+        cdef:
+            CResult[shared_ptr[CBuffer]] res = self.get_options().Serialize()
+            shared_ptr[CBuffer] c_buf = GetResultValue(res)
+        return pyarrow_wrap_buffer(c_buf)
+
+    @staticmethod
+    def deserialize(buf):
+        cdef:
+            shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
+            CResult[unique_ptr[CFunctionOptions]] maybe_options = \
+                DeserializeFunctionOptions(deref(c_buf))
+            unique_ptr[CFunctionOptions] c_options
+        c_options = move(GetResultValue(move(maybe_options)))
+        type_name = frombytes(c_options.get().options_type().type_name())
+        mapping = {
+            "array_sort": ArraySortOptions,
+            "cast": CastOptions,
+            "dictionary_encode": DictionaryEncodeOptions,
+            "element_wise_aggregate": ElementWiseAggregateOptions,
+            "extract_regex": ExtractRegexOptions,
+            "filter": FilterOptions,
+            "index": IndexOptions,
+            "join": JoinOptions,
+            "match_substring": MatchSubstringOptions,
+            "mode": ModeOptions,
+            "pad": PadOptions,
+            "partition_nth": PartitionNthOptions,
+            "project": ProjectOptions,
+            "quantile": QuantileOptions,
+            "replace_slice": ReplaceSliceOptions,
+            "replace_substring": ReplaceSubstringOptions,
+            "set_lookup": SetLookupOptions,
+            "scalar_aggregate": ScalarAggregateOptions,
+            "slice": SliceOptions,
+            "sort": SortOptions,
+            "split": SplitOptions,
+            "split_pattern": SplitPatternOptions,
+            "strptime": StrptimeOptions,
+            "t_digest": TDigestOptions,
+            "take": TakeOptions,
+            "trim": TrimOptions,
+            "variance": VarianceOptions,
+        }
+        if type_name not in mapping:
+            raise ValueError(f"Cannot deserialize '{type_name}'")
+        klass = mapping[type_name]
+        options = klass.__new__(klass)
+        (<FunctionOptions> options).init(move(c_options))
+        return options
+
+    def __repr__(self):
+        return frombytes(self.get_options().ToString())
+
+    def __eq__(self, FunctionOptions other):
+        return self.get_options().Equals(deref(other.get_options()))
 
 
 # NOTE:
@@ -541,17 +602,16 @@ cdef class FunctionOptions(_Weakrefable):
 
 cdef class _CastOptions(FunctionOptions):
     cdef:
-        unique_ptr[CCastOptions] options
+        CCastOptions* options
 
-    __slots__ = ()  # avoid mistakingly creating attributes
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.options.get()
+    cdef void init(self, unique_ptr[CFunctionOptions] options):
+        FunctionOptions.init(self, move(options))
+        self.options = <CCastOptions*> self.wrapped.get()
 
     def _set_options(self, DataType target_type, allow_int_overflow,
                      allow_time_truncate, allow_time_overflow,
                      allow_float_truncate, allow_invalid_utf8):
-        self.options.reset(new CCastOptions())
+        self.init(unique_ptr[CFunctionOptions](new CCastOptions()))
         self._set_type(target_type)
         if allow_int_overflow is not None:
             self.allow_int_overflow = allow_int_overflow
@@ -571,10 +631,12 @@ cdef class _CastOptions(FunctionOptions):
             )
 
     def _set_safe(self):
-        self.options.reset(new CCastOptions(CCastOptions.Safe()))
+        self.init(unique_ptr[CFunctionOptions](
+            new CCastOptions(CCastOptions.Safe())))
 
     def _set_unsafe(self):
-        self.options.reset(new CCastOptions(CCastOptions.Unsafe()))
+        self.init(unique_ptr[CFunctionOptions](
+            new CCastOptions(CCastOptions.Unsafe())))
 
     def is_safe(self):
         return not (
@@ -651,15 +713,8 @@ class CastOptions(_CastOptions):
 
 
 cdef class _ElementWiseAggregateOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CElementWiseAggregateOptions] element_wise_aggregate_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.element_wise_aggregate_options.get()
-
     def _set_options(self, bint skip_nulls):
-        self.element_wise_aggregate_options.reset(
-            new CElementWiseAggregateOptions(skip_nulls))
+        self.wrapped.reset(new CElementWiseAggregateOptions(skip_nulls))
 
 
 class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
@@ -668,12 +723,6 @@ class ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
 
 
 cdef class _JoinOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CJoinOptions] join_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.join_options.get()
-
     def _set_options(self, null_handling, null_replacement):
         cdef:
             CJoinNullHandlingBehavior c_null_handling = \
@@ -689,7 +738,7 @@ cdef class _JoinOptions(FunctionOptions):
             raise ValueError(
                 '"{}" is not a valid null_handling'
                 .format(null_handling))
-        self.join_options.reset(
+        self.wrapped.reset(
             new CJoinOptions(c_null_handling, c_null_replacement))
 
 
@@ -699,14 +748,8 @@ class JoinOptions(_JoinOptions):
 
 
 cdef class _MatchSubstringOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CMatchSubstringOptions] match_substring_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.match_substring_options.get()
-
     def _set_options(self, pattern, bint ignore_case):
-        self.match_substring_options.reset(
+        self.wrapped.reset(
             new CMatchSubstringOptions(tobytes(pattern), ignore_case))
 
 
@@ -716,15 +759,8 @@ class MatchSubstringOptions(_MatchSubstringOptions):
 
 
 cdef class _PadOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CPadOptions] pad_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.pad_options.get()
-
     def _set_options(self, width, padding):
-        self.pad_options.reset(
-            new CPadOptions(width, tobytes(padding)))
+        self.wrapped.reset(new CPadOptions(width, tobytes(padding)))
 
 
 class PadOptions(_PadOptions):
@@ -733,15 +769,8 @@ class PadOptions(_PadOptions):
 
 
 cdef class _TrimOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTrimOptions] trim_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.trim_options.get()
-
     def _set_options(self, characters):
-        self.trim_options.reset(
-            new CTrimOptions(tobytes(characters)))
+        self.wrapped.reset(new CTrimOptions(tobytes(characters)))
 
 
 class TrimOptions(_TrimOptions):
@@ -750,14 +779,8 @@ class TrimOptions(_TrimOptions):
 
 
 cdef class _ReplaceSliceOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CReplaceSliceOptions] replace_slice_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.replace_slice_options.get()
-
     def _set_options(self, start, stop, replacement):
-        self.replace_slice_options.reset(
+        self.wrapped.reset(
             new CReplaceSliceOptions(start, stop, tobytes(replacement))
         )
 
@@ -768,14 +791,8 @@ class ReplaceSliceOptions(_ReplaceSliceOptions):
 
 
 cdef class _ReplaceSubstringOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CReplaceSubstringOptions] replace_substring_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.replace_substring_options.get()
-
     def _set_options(self, pattern, replacement, max_replacements):
-        self.replace_substring_options.reset(
+        self.wrapped.reset(
             new CReplaceSubstringOptions(tobytes(pattern),
                                          tobytes(replacement),
                                          max_replacements)
@@ -788,14 +805,8 @@ class ReplaceSubstringOptions(_ReplaceSubstringOptions):
 
 
 cdef class _ExtractRegexOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CExtractRegexOptions] extract_regex_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.extract_regex_options.get()
-
     def _set_options(self, pattern):
-        self.extract_regex_options.reset(
+        self.wrapped.reset(
             new CExtractRegexOptions(tobytes(pattern)))
 
 
@@ -805,15 +816,8 @@ class ExtractRegexOptions(_ExtractRegexOptions):
 
 
 cdef class _SliceOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSliceOptions] slice_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.slice_options.get()
-
     def _set_options(self, start, stop, step):
-        self.slice_options.reset(
-            new CSliceOptions(start, stop, step))
+        self.wrapped.reset(new CSliceOptions(start, stop, step))
 
 
 class SliceOptions(_SliceOptions):
@@ -822,18 +826,12 @@ class SliceOptions(_SliceOptions):
 
 
 cdef class _FilterOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CFilterOptions] filter_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.filter_options.get()
-
     def _set_options(self, null_selection_behavior):
         if null_selection_behavior == 'drop':
-            self.filter_options.reset(
+            self.wrapped.reset(
                 new CFilterOptions(CFilterNullSelectionBehavior_DROP))
         elif null_selection_behavior == 'emit_null':
-            self.filter_options.reset(
+            self.wrapped.reset(
                 new CFilterOptions(CFilterNullSelectionBehavior_EMIT_NULL))
         else:
             raise ValueError(
@@ -847,19 +845,13 @@ class FilterOptions(_FilterOptions):
 
 
 cdef class _DictionaryEncodeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CDictionaryEncodeOptions] dictionary_encode_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.dictionary_encode_options.get()
-
     def _set_options(self, null_encoding_behavior):
         if null_encoding_behavior == 'encode':
-            self.dictionary_encode_options.reset(
+            self.wrapped.reset(
                 new CDictionaryEncodeOptions(
                     CDictionaryEncodeNullEncodingBehavior_ENCODE))
         elif null_encoding_behavior == 'mask':
-            self.dictionary_encode_options.reset(
+            self.wrapped.reset(
                 new CDictionaryEncodeOptions(
                     CDictionaryEncodeNullEncodingBehavior_MASK))
         else:
@@ -873,14 +865,8 @@ class DictionaryEncodeOptions(_DictionaryEncodeOptions):
 
 
 cdef class _TakeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTakeOptions] take_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.take_options.get()
-
     def _set_options(self, boundscheck):
-        self.take_options.reset(new CTakeOptions(boundscheck))
+        self.wrapped.reset(new CTakeOptions(boundscheck))
 
 
 class TakeOptions(_TakeOptions):
@@ -889,14 +875,8 @@ class TakeOptions(_TakeOptions):
 
 
 cdef class _PartitionNthOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CPartitionNthOptions] partition_nth_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.partition_nth_options.get()
-
     def _set_options(self, int64_t pivot):
-        self.partition_nth_options.reset(new CPartitionNthOptions(pivot))
+        self.wrapped.reset(new CPartitionNthOptions(pivot))
 
 
 class PartitionNthOptions(_PartitionNthOptions):
@@ -905,18 +885,12 @@ class PartitionNthOptions(_PartitionNthOptions):
 
 
 cdef class _ProjectOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CProjectOptions] project_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.project_options.get()
-
     def _set_options(self, field_names):
         cdef:
             vector[c_string] c_field_names
         for n in field_names:
             c_field_names.push_back(tobytes(n))
-        self.project_options.reset(new CProjectOptions(field_names))
+        self.wrapped.reset(new CProjectOptions(field_names))
 
 
 class ProjectOptions(_ProjectOptions):
@@ -925,14 +899,8 @@ class ProjectOptions(_ProjectOptions):
 
 
 cdef class _ScalarAggregateOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CScalarAggregateOptions] scalar_aggregate_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.scalar_aggregate_options.get()
-
     def _set_options(self, skip_nulls, min_count):
-        self.scalar_aggregate_options.reset(
+        self.wrapped.reset(
             new CScalarAggregateOptions(skip_nulls, min_count))
 
 
@@ -942,15 +910,8 @@ class ScalarAggregateOptions(_ScalarAggregateOptions):
 
 
 cdef class _IndexOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CIndexOptions] index_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.index_options.get()
-
     def _set_options(self, Scalar scalar):
-        self.index_options.reset(
-            new CIndexOptions(pyarrow_unwrap_scalar(scalar)))
+        self.wrapped.reset(new CIndexOptions(pyarrow_unwrap_scalar(scalar)))
 
 
 class IndexOptions(_IndexOptions):
@@ -968,14 +929,8 @@ class IndexOptions(_IndexOptions):
 
 
 cdef class _ModeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CModeOptions] mode_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.mode_options.get()
-
     def _set_options(self, n):
-        self.mode_options.reset(new CModeOptions(n))
+        self.wrapped.reset(new CModeOptions(n))
 
 
 class ModeOptions(_ModeOptions):
@@ -985,12 +940,8 @@ class ModeOptions(_ModeOptions):
 
 cdef class _SetLookupOptions(FunctionOptions):
     cdef:
-        unique_ptr[CSetLookupOptions] set_lookup_options
         unique_ptr[CDatum] valset
 
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.set_lookup_options.get()
-
     def _set_options(self, value_set, c_bool skip_nulls):
         if isinstance(value_set, Array):
             self.valset.reset(new CDatum((<Array> value_set).sp_array))
@@ -1003,9 +954,8 @@ cdef class _SetLookupOptions(FunctionOptions):
         else:
             raise ValueError('"{}" is not a valid value_set'.format(value_set))
 
-        self.set_lookup_options.reset(
-            new CSetLookupOptions(deref(self.valset), skip_nulls)
-        )
+        self.wrapped.reset(
+            new CSetLookupOptions(deref(self.valset), skip_nulls))
 
 
 class SetLookupOptions(_SetLookupOptions):
@@ -1014,27 +964,20 @@ class SetLookupOptions(_SetLookupOptions):
 
 
 cdef class _StrptimeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CStrptimeOptions] strptime_options
-        TimeUnit time_unit
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.strptime_options.get()
-
     def _set_options(self, format, unit):
         if unit == 's':
-            self.time_unit = TimeUnit_SECOND
+            time_unit = TimeUnit_SECOND
         elif unit == 'ms':
-            self.time_unit = TimeUnit_MILLI
+            time_unit = TimeUnit_MILLI
         elif unit == 'us':
-            self.time_unit = TimeUnit_MICRO
+            time_unit = TimeUnit_MICRO
         elif unit == 'ns':
-            self.time_unit = TimeUnit_NANO
+            time_unit = TimeUnit_NANO
         else:
             raise ValueError('"{}" is not a valid time unit'.format(unit))
 
-        self.strptime_options.reset(
-            new CStrptimeOptions(tobytes(format), self.time_unit)
+        self.wrapped.reset(
+            new CStrptimeOptions(tobytes(format), time_unit)
         )
 
 
@@ -1044,14 +987,8 @@ class StrptimeOptions(_StrptimeOptions):
 
 
 cdef class _VarianceOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CVarianceOptions] variance_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.variance_options.get()
-
     def _set_options(self, ddof):
-        self.variance_options.reset(new CVarianceOptions(ddof))
+        self.wrapped.reset(new CVarianceOptions(ddof))
 
 
 class VarianceOptions(_VarianceOptions):
@@ -1060,14 +997,8 @@ class VarianceOptions(_VarianceOptions):
 
 
 cdef class _SplitOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSplitOptions] split_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.split_options.get()
-
     def _set_options(self, max_splits, reverse):
-        self.split_options.reset(
+        self.wrapped.reset(
             new CSplitOptions(max_splits, reverse))
 
 
@@ -1077,14 +1008,8 @@ class SplitOptions(_SplitOptions):
 
 
 cdef class _SplitPatternOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSplitPatternOptions] split_pattern_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.split_pattern_options.get()
-
     def _set_options(self, pattern, max_splits, reverse):
-        self.split_pattern_options.reset(
+        self.wrapped.reset(
             new CSplitPatternOptions(tobytes(pattern), max_splits, reverse))
 
 
@@ -1094,19 +1019,11 @@ class SplitPatternOptions(_SplitPatternOptions):
 
 
 cdef class _ArraySortOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CArraySortOptions] array_sort_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.array_sort_options.get()
-
     def _set_options(self, order):
         if order == "ascending":
-            self.array_sort_options.reset(
-                new CArraySortOptions(CSortOrder_Ascending))
+            self.wrapped.reset(new CArraySortOptions(CSortOrder_Ascending))
         elif order == "descending":
-            self.array_sort_options.reset(
-                new CArraySortOptions(CSortOrder_Descending))
+            self.wrapped.reset(new CArraySortOptions(CSortOrder_Descending))
         else:
             raise ValueError(
                 "{!r} is not a valid order".format(order)
@@ -1119,12 +1036,6 @@ class ArraySortOptions(_ArraySortOptions):
 
 
 cdef class _SortOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSortOptions] sort_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.sort_options.get()
-
     def _set_options(self, sort_keys):
         cdef:
             vector[CSortKey] c_sort_keys
@@ -1143,7 +1054,7 @@ cdef class _SortOptions(FunctionOptions):
             c_name = tobytes(name)
             c_sort_keys.push_back(CSortKey(c_name, c_order))
 
-        self.sort_options.reset(new CSortOptions(c_sort_keys))
+        self.wrapped.reset(new CSortOptions(c_sort_keys))
 
 
 class SortOptions(_SortOptions):
@@ -1154,12 +1065,6 @@ class SortOptions(_SortOptions):
 
 
 cdef class _QuantileOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CQuantileOptions] quantile_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.quantile_options.get()
-
     def _set_options(self, quantiles, interp):
         interp_dict = {
             'linear': CQuantileInterp_LINEAR,
@@ -1172,7 +1077,7 @@ cdef class _QuantileOptions(FunctionOptions):
             raise ValueError(
                 '{!r} is not a valid interpolation'
                 .format(interp))
-        self.quantile_options.reset(
+        self.wrapped.reset(
             new CQuantileOptions(quantiles, interp_dict[interp]))
 
 
@@ -1184,14 +1089,8 @@ class QuantileOptions(_QuantileOptions):
 
 
 cdef class _TDigestOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTDigestOptions] tdigest_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.tdigest_options.get()
-
     def _set_options(self, quantiles, delta, buffer_size):
-        self.tdigest_options.reset(
+        self.wrapped.reset(
             new CTDigestOptions(quantiles, delta, buffer_size))
 
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 653a2b83781..07983b79f40 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1751,8 +1751,18 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         vector[c_string] arg_names
         c_string options_class
 
+    cdef cppclass CFunctionOptionsType" arrow::compute::FunctionOptionsType":
+        const char* type_name() const
+
     cdef cppclass CFunctionOptions" arrow::compute::FunctionOptions":
-        pass
+        const CFunctionOptionsType* options_type() const
+        c_bool Equals(const CFunctionOptions& other)
+        c_string ToString()
+        CResult[shared_ptr[CBuffer]] Serialize() const
+
+        @staticmethod
+        CResult[unique_ptr[CFunctionOptions]] Deserialize(
+            const c_string& type_name, const CBuffer&)
 
     cdef cppclass CFunction" arrow::compute::Function":
         const c_string& name() const
@@ -1843,9 +1853,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_bool reverse
 
     cdef cppclass CSplitPatternOptions \
-            "arrow::compute::SplitPatternOptions"(CSplitOptions):
+            "arrow::compute::SplitPatternOptions"(CFunctionOptions):
         CSplitPatternOptions(c_string pattern, int64_t max_splits,
                              c_bool reverse)
+        int64_t max_splits
+        c_bool reverse
         c_string pattern
 
     cdef cppclass CReplaceSliceOptions \
@@ -2027,6 +2039,25 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_bool skip_nulls
 
 
+cdef extern from * namespace "arrow::compute":
+    # inlined from compute/function_internal.h to avoid exposing
+    # implementation details
+    """
+    #include "arrow/compute/function.h"
+    namespace arrow {
+    namespace compute {
+    namespace internal {
+    Result<std::unique_ptr<FunctionOptions>> DeserializeFunctionOptions(
+        const Buffer& buffer);
+    } //  namespace internal
+    } //  namespace compute
+    } //  namespace arrow
+    """
+    CResult[unique_ptr[CFunctionOptions]] DeserializeFunctionOptions\
+        " arrow::compute::internal::DeserializeFunctionOptions"(
+            const CBuffer& buffer)
+
+
 cdef extern from "arrow/python/api.h" namespace "arrow::py":
     # Requires GIL
     CResult[shared_ptr[CDataType]] InferArrowType(
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 3a10da0ca2b..264da5805e1 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -111,6 +111,41 @@ def test_exported_option_classes():
                                       param.VAR_KEYWORD)
 
 
+def test_option_class_equality():
+    options = [
+        pc.CastOptions.safe(pa.int8()),
+        pc.ExtractRegexOptions("pattern"),
+        pc.IndexOptions(pa.scalar(1)),
+        pc.MatchSubstringOptions("pattern"),
+        pc.PadOptions(5, " "),
+        pc.PartitionNthOptions(1),
+        pc.ProjectOptions([b"field", b"names"]),
+        pc.ReplaceSliceOptions(start=0, stop=1, replacement="a"),
+        pc.ReplaceSubstringOptions("a", "b"),
+        pc.SetLookupOptions(value_set=pa.array([1])),
+        pc.SliceOptions(start=0, stop=1, step=1),
+        pc.SplitPatternOptions(pattern="pattern"),
+        pc.StrptimeOptions("%Y", "s"),
+        pc.TrimOptions(" "),
+    ]
+    classes = {type(option) for option in options}
+    for cls in exported_option_classes:
+        if cls not in classes:
+            try:
+                options.append(cls())
+            except TypeError:
+                pytest.fail(f"Options class is not tested: {cls}")
+    for option in options:
+        assert option == option
+        assert repr(option)
+        buf = option.serialize()
+        deserialized = pc.FunctionOptions.deserialize(buf)
+        assert option == deserialized
+        assert repr(option) == repr(deserialized)
+    for option1, option2 in zip(options, options[1:]):
+        assert option1 != option2
+
+
 def test_list_functions():
     assert len(pc.list_functions()) > 10
     assert "add" in pc.list_functions()

From ab5747959d2defeda9ff159155fb4673e3f77e8d Mon Sep 17 00:00:00 2001
From: nullptr <3621629+0x0L@users.noreply.github.com>
Date: Wed, 30 Jun 2021 14:52:59 -0500
Subject: [PATCH 485/719] ARROW-13128: [C#] TimestampArray conversion logic for
 nano and micro is wrong

Closes #10561 from 0x0L/0x0L-patch-1

Authored-by: nullptr <3621629+0x0L@users.noreply.github.com>
Signed-off-by: Eric Erhardt <eric.erhardt@microsoft.com>
---
 .../src/Apache.Arrow/Arrays/TimestampArray.cs |  8 +++---
 .../Apache.Arrow.Tests/ArrayBuilderTests.cs   | 25 +++++++++++++++++--
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
index 8b5279a1069..0269768f490 100644
--- a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs
@@ -76,9 +76,9 @@ protected override long ConvertTo(DateTimeOffset value)
                 switch (DataType.Unit)
                 {
                     case TimeUnit.Nanosecond:
-                        return ticks / 100;
+                        return ticks * 100;
                     case TimeUnit.Microsecond:
-                        return ticks / TimeSpan.TicksPerMillisecond / 1000;
+                        return ticks / 10;
                     case TimeUnit.Millisecond:
                         return ticks / TimeSpan.TicksPerMillisecond;
                     case TimeUnit.Second:
@@ -116,10 +116,10 @@ public DateTimeOffset GetTimestampUnchecked(int index)
             switch (type.Unit)
             {
                 case TimeUnit.Nanosecond:
-                    ticks = value * 100;
+                    ticks = value / 100;
                     break;
                 case TimeUnit.Microsecond:
-                    ticks = value * TimeSpan.TicksPerMillisecond * 1000;
+                    ticks = value * 10;
                     break;
                 case TimeUnit.Millisecond:
                     ticks = value * TimeSpan.TicksPerMillisecond;
diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
index 7c1fd6476d1..05a566b1cb4 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
@@ -170,8 +170,29 @@ public void ProducesExpectedArray()
                     .Build();
 
                 Assert.Equal(1, array.Length);
-                Assert.NotNull(array.GetTimestamp(0));
-                Assert.Equal(now.Truncate(TimeSpan.FromTicks(100)), array.GetTimestamp(0).Value);
+                var value = array.GetTimestamp(0);
+                Assert.NotNull(value);
+                Assert.Equal(now, value.Value);
+
+                timestampType = new TimestampType(TimeUnit.Microsecond, TimeZoneInfo.Local);
+                array = new TimestampArray.Builder(timestampType)
+                    .Append(now)
+                    .Build();
+
+                Assert.Equal(1, array.Length);
+                value = array.GetTimestamp(0);
+                Assert.NotNull(value);
+                Assert.Equal(now.Truncate(TimeSpan.FromTicks(10)), value.Value);
+
+                timestampType = new TimestampType(TimeUnit.Millisecond, TimeZoneInfo.Local);
+                array = new TimestampArray.Builder(timestampType)
+                    .Append(now)
+                    .Build();
+
+                Assert.Equal(1, array.Length);
+                value = array.GetTimestamp(0);
+                Assert.NotNull(value);
+                Assert.Equal(now.Truncate(TimeSpan.FromTicks(TimeSpan.TicksPerMillisecond)), value.Value);
             }
         }
 

From db79f37cb83575b710e034027756cc0acd854027 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Jul 2021 07:52:31 -0400
Subject: [PATCH 486/719] ARROW-13226: [Python] Add a general purpose cython
 trampolining utility

Closes #10619 from bkietz/BindFunction-cython-utility

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/python/common.h                 | 60 ++++++++++++++++
 dev/archery/archery/cli.py                    |  1 -
 python/pyarrow/includes/common.pxd            |  1 +
 .../tests/bound_function_visit_strings.pyx    | 68 +++++++++++++++++++
 python/pyarrow/tests/test_cython.py           | 49 +++++++++++--
 5 files changed, 172 insertions(+), 7 deletions(-)
 create mode 100644 python/pyarrow/tests/bound_function_visit_strings.pyx

diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 8560fa2d6f4..24dcb130a26 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -185,6 +185,66 @@ class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
   }
 };
 
+template <typename Fn>
+struct BoundFunction;
+
+template <typename... Args>
+struct BoundFunction<void(PyObject*, Args...)> {
+  // We bind `cdef void fn(object, ...)` to get a `Status(...)`
+  // where the Status contains any Python error raised by `fn`
+  using Unbound = void(PyObject*, Args...);
+  using Bound = Status(Args...);
+
+  BoundFunction(Unbound* unbound, PyObject* bound_arg)
+      : bound_arg_(bound_arg), unbound_(unbound) {}
+
+  Status Invoke(Args... args) const {
+    PyAcquireGIL lock;
+    unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
+    RETURN_IF_PYERROR();
+    return Status::OK();
+  }
+
+  Unbound* unbound_;
+  OwnedRefNoGIL bound_arg_;
+};
+
+template <typename Return, typename... Args>
+struct BoundFunction<Return(PyObject*, Args...)> {
+  // We bind `cdef Return fn(object, ...)` to get a `Result<Return>(...)`
+  // where the Result contains any Python error raised by `fn` or the
+  // return value from `fn`.
+  using Unbound = Return(PyObject*, Args...);
+  using Bound = Result<Return>(Args...);
+
+  BoundFunction(Unbound* unbound, PyObject* bound_arg)
+      : bound_arg_(bound_arg), unbound_(unbound) {}
+
+  Result<Return> Invoke(Args... args) const {
+    PyAcquireGIL lock;
+    Return ret = unbound_(bound_arg_.obj(), std::forward<Args>(args)...);
+    RETURN_IF_PYERROR();
+    return ret;
+  }
+
+  Unbound* unbound_;
+  OwnedRefNoGIL bound_arg_;
+};
+
+template <typename OutFn, typename Return, typename... Args>
+std::function<OutFn> BindFunction(Return (*unbound)(PyObject*, Args...),
+                                  PyObject* bound_arg) {
+  using Fn = BoundFunction<Return(PyObject*, Args...)>;
+
+  static_assert(std::is_same<typename Fn::Bound, OutFn>::value,
+                "requested bound function of unsupported type");
+
+  Py_XINCREF(bound_arg);
+  auto bound_fn = std::make_shared<Fn>(unbound, bound_arg);
+  return
+      [bound_fn](Args... args) { return bound_fn->Invoke(std::forward<Args>(args)...); };
+}
+
 // A temporary conversion of a Python object to a bytes area.
 struct PyBytesView {
   const char* bytes;
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index fefd7b02ed4..7fef9edb4b9 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -115,7 +115,6 @@ def _apply_options(cmd, options):
               help="Specify Arrow source directory")
 # toolchain
 @cpp_toolchain_options
-@java_toolchain_options
 @click.option("--build-type", default=None, type=build_type,
               help="CMake's CMAKE_BUILD_TYPE")
 @click.option("--warn-level", default="production", type=warn_level_type,
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 3f67a3256cc..902eaafbbbd 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -128,6 +128,7 @@ cdef extern from "arrow/result.h" namespace "arrow" nogil:
 
 cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
     T GetResultValue[T](CResult[T]) except *
+    cdef function[F] BindFunction[F](void* unbound, object bound, ...)
 
 
 cdef inline object PyObject_to_object(PyObject* o):
diff --git a/python/pyarrow/tests/bound_function_visit_strings.pyx b/python/pyarrow/tests/bound_function_visit_strings.pyx
new file mode 100644
index 00000000000..90437be8cde
--- /dev/null
+++ b/python/pyarrow/tests/bound_function_visit_strings.pyx
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language=c++
+# cython: language_level = 3
+
+import pyarrow as pa
+from pyarrow.lib cimport *
+from pyarrow.lib import frombytes, tobytes
+
+# basic test to roundtrip through a BoundFunction
+
+ctypedef CStatus visit_string_cb(const c_string&)
+
+cdef extern from * namespace "arrow::py" nogil:
+    """
+    #include <functional>
+    #include <string>
+    #include <vector>
+
+    #include "arrow/status.h"
+
+    namespace arrow {
+    namespace py {
+
+    Status VisitStrings(const std::vector<std::string>& strs,
+                        std::function<Status(const std::string&)> cb) {
+      for (const std::string& str : strs) {
+        RETURN_NOT_OK(cb(str));
+      }
+      return Status::OK();
+    }
+
+    }  // namespace py
+    }  // namespace arrow
+    """
+    cdef CStatus CVisitStrings" arrow::py::VisitStrings"(
+        vector[c_string], function[visit_string_cb])
+
+
+cdef void _visit_strings_impl(py_cb, const c_string& s) except *:
+    py_cb(frombytes(s))
+
+
+def _visit_strings(strings, cb):
+    cdef:
+        function[visit_string_cb] c_cb
+        vector[c_string] c_strings
+
+    c_cb = BindFunction[visit_string_cb](&_visit_strings_impl, cb)
+    for s in strings:
+        c_strings.push_back(tobytes(s))
+
+    check_status(CVisitStrings(c_strings, c_cb))
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index b852981ba39..e202b417a18 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -27,6 +27,11 @@
 
 
 here = os.path.dirname(os.path.abspath(__file__))
+test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
+if os.name == 'posix':
+    compiler_opts = ['-std=c++11']
+else:
+    compiler_opts = []
 
 
 setup_template = """if 1:
@@ -82,18 +87,12 @@ def test_cython_api(tmpdir):
     # Fail early if cython is not found
     import cython  # noqa
 
-    test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
-
     with tmpdir.as_cwd():
         # Set up temporary workspace
         pyx_file = 'pyarrow_cython_example.pyx'
         shutil.copyfile(os.path.join(here, pyx_file),
                         os.path.join(str(tmpdir), pyx_file))
         # Create setup.py file
-        if os.name == 'posix':
-            compiler_opts = ['-std=c++11']
-        else:
-            compiler_opts = []
         setup_code = setup_template.format(pyx_file=pyx_file,
                                            compiler_opts=compiler_opts,
                                            test_ld_path=test_ld_path)
@@ -141,3 +140,41 @@ def test_cython_api(tmpdir):
         subprocess.check_call([sys.executable, '-c', code],
                               stdout=subprocess.PIPE,
                               env=subprocess_env)
+
+
+@pytest.mark.cython
+def test_visit_strings(tmpdir):
+    with tmpdir.as_cwd():
+        # Set up temporary workspace
+        pyx_file = 'bound_function_visit_strings.pyx'
+        shutil.copyfile(os.path.join(here, pyx_file),
+                        os.path.join(str(tmpdir), pyx_file))
+        # Create setup.py file
+        setup_code = setup_template.format(pyx_file=pyx_file,
+                                           compiler_opts=compiler_opts,
+                                           test_ld_path=test_ld_path)
+        with open('setup.py', 'w') as f:
+            f.write(setup_code)
+
+        subprocess_env = test_util.get_modified_env_with_pythonpath()
+
+        # Compile extension module
+        subprocess.check_call([sys.executable, 'setup.py',
+                               'build_ext', '--inplace'],
+                              env=subprocess_env)
+
+    sys.path.insert(0, str(tmpdir))
+    mod = __import__('bound_function_visit_strings')
+
+    strings = ['a', 'b', 'c']
+    visited = []
+    mod._visit_strings(strings, visited.append)
+
+    assert visited == strings
+
+    with pytest.raises(ValueError, match="wtf"):
+        def raise_on_b(s):
+            if s == 'b':
+                raise ValueError('wtf')
+
+        mod._visit_strings(strings, raise_on_b)

From 1ae979d21300062bb488a902703dab6f92eb679a Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 1 Jul 2021 08:15:29 -0400
Subject: [PATCH 487/719] ARROW-11930: [C++][Dataset][Compute] Use an ExecPlan
 for dataset scans

So far this involved a lot of refactoring of Expressions to be compatible with ExecBatches. The next step is to add a ScanNode wrapping a ScannerBuilder

Closes #10397 from bkietz/11930-Refactor-Dataset-scans-to

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec.cc                 |  51 +-
 cpp/src/arrow/compute/exec.h                  |  13 +
 cpp/src/arrow/compute/exec/doc/exec_node.md   | 147 ++++++
 cpp/src/arrow/compute/exec/exec_plan.cc       | 481 +++++++++++++++---
 cpp/src/arrow/compute/exec/exec_plan.h        |  86 ++--
 cpp/src/arrow/compute/exec/expression.cc      | 214 ++++----
 cpp/src/arrow/compute/exec/expression.h       |  52 +-
 .../arrow/compute/exec/expression_internal.h  |   4 +
 cpp/src/arrow/compute/exec/expression_test.cc | 161 ++++--
 cpp/src/arrow/compute/exec/plan_test.cc       | 464 ++++++++---------
 cpp/src/arrow/compute/exec/test_util.cc       | 313 ++----------
 cpp/src/arrow/compute/exec/test_util.h        |  35 +-
 .../arrow/compute/kernels/codegen_internal.cc |   2 +-
 cpp/src/arrow/compute/type_fwd.h              |   2 +
 cpp/src/arrow/dataset/dataset.cc              |   7 +-
 cpp/src/arrow/dataset/dataset.h               |   1 -
 cpp/src/arrow/dataset/dataset_internal.h      |  30 ++
 cpp/src/arrow/dataset/file_csv.cc             |   4 +-
 cpp/src/arrow/dataset/file_ipc_test.cc        |   9 +-
 cpp/src/arrow/dataset/file_parquet_test.cc    |  27 +
 cpp/src/arrow/dataset/file_test.cc            |   8 +-
 cpp/src/arrow/dataset/partition.cc            |   3 +-
 cpp/src/arrow/dataset/scanner.cc              | 214 ++++----
 cpp/src/arrow/dataset/scanner.h               |  25 +-
 cpp/src/arrow/dataset/scanner_internal.h      |  52 +-
 cpp/src/arrow/dataset/scanner_test.cc         | 303 ++++++++++-
 cpp/src/arrow/dataset/test_util.h             |  57 +--
 cpp/src/arrow/pretty_print.cc                 |  88 +++-
 cpp/src/arrow/pretty_print.h                  |   6 +-
 cpp/src/arrow/result.h                        |  13 +-
 cpp/src/arrow/result_test.cc                  |  71 +++
 cpp/src/arrow/status.h                        |   7 +-
 cpp/src/arrow/status_test.cc                  |  82 +++
 cpp/src/arrow/testing/matchers.h              | 177 +++++++
 cpp/src/arrow/type.cc                         |   4 +
 cpp/src/arrow/util/async_generator.h          |  42 +-
 cpp/src/arrow/util/async_generator_test.cc    |  12 +-
 cpp/src/arrow/util/future.h                   |  27 +
 cpp/src/arrow/util/future_test.cc             |  41 ++
 cpp/src/arrow/util/thread_pool.cc             |   9 +-
 cpp/src/arrow/util/thread_pool.h              |   6 +
 cpp/src/arrow/util/thread_pool_test.cc        |  17 +
 cpp/src/arrow/util/vector.h                   |  42 +-
 python/pyarrow/_dataset.pyx                   |   2 +-
 python/pyarrow/includes/libarrow_dataset.pxd  |  22 +-
 r/src/dataset.cpp                             |   6 +-
 46 files changed, 2349 insertions(+), 1090 deletions(-)
 create mode 100644 cpp/src/arrow/compute/exec/doc/exec_node.md
 create mode 100644 cpp/src/arrow/testing/matchers.h

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 73cb82ef026..78f3d753711 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -36,6 +36,7 @@
 #include "arrow/compute/registry.h"
 #include "arrow/compute/util_internal.h"
 #include "arrow/datum.h"
+#include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
@@ -69,6 +70,48 @@ ExecBatch::ExecBatch(const RecordBatch& batch)
   std::move(columns.begin(), columns.end(), values.begin());
 }
 
+bool ExecBatch::Equals(const ExecBatch& other) const {
+  return guarantee == other.guarantee && values == other.values;
+}
+
+void PrintTo(const ExecBatch& batch, std::ostream* os) {
+  *os << "ExecBatch\n";
+
+  static const std::string indent = "    ";
+
+  *os << indent << "# Rows: " << batch.length << "\n";
+  if (batch.guarantee != literal(true)) {
+    *os << indent << "Guarantee: " << batch.guarantee.ToString() << "\n";
+  }
+
+  int i = 0;
+  for (const Datum& value : batch.values) {
+    *os << indent << "" << i++ << ": ";
+
+    if (value.is_scalar()) {
+      *os << "Scalar[" << value.scalar()->ToString() << "]\n";
+      continue;
+    }
+
+    auto array = value.make_array();
+    PrettyPrintOptions options;
+    options.skip_new_lines = true;
+    *os << "Array";
+    ARROW_CHECK_OK(PrettyPrint(*array, options, os));
+    *os << "\n";
+  }
+}
+
+ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
+  ExecBatch out = *this;
+  for (auto& value : out.values) {
+    if (value.is_scalar()) continue;
+    value = value.array()->Slice(offset, length);
+  }
+  out.length = length;
+  return out;
+}
+
 Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
   if (values.empty()) {
     return Status::Invalid("Cannot infer ExecBatch length without at least one value");
@@ -77,9 +120,6 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
   int64_t length = -1;
   for (const auto& value : values) {
     if (value.is_scalar()) {
-      if (length == -1) {
-        length = 1;
-      }
       continue;
     }
 
@@ -94,8 +134,13 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
     }
   }
 
+  if (length == -1) {
+    length = 1;
+  }
+
   return ExecBatch(std::move(values), length);
 }
+
 namespace {
 
 Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t length,
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index cd95db2fd8c..e7015814d2a 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -28,6 +28,7 @@
 #include <vector>
 
 #include "arrow/array/data.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
@@ -186,6 +187,9 @@ struct ARROW_EXPORT ExecBatch {
   /// ExecBatch::length is equal to the length of this array.
   std::shared_ptr<SelectionVector> selection_vector;
 
+  /// A predicate Expression guaranteed to evaluate to true for all rows in this batch.
+  Expression guarantee = literal(true);
+
   /// The semantic length of the ExecBatch. When the values are all scalars,
   /// the length should be set to 1, otherwise the length is taken from the
   /// array values, except when there is a selection vector. When there is a
@@ -203,9 +207,13 @@ struct ARROW_EXPORT ExecBatch {
     return values[i];
   }
 
+  bool Equals(const ExecBatch& other) const;
+
   /// \brief A convenience for the number of values / arguments.
   int num_values() const { return static_cast<int>(values.size()); }
 
+  ExecBatch Slice(int64_t offset, int64_t length) const;
+
   /// \brief A convenience for returning the ValueDescr objects (types and
   /// shapes) from the batch.
   std::vector<ValueDescr> GetDescriptors() const {
@@ -215,8 +223,13 @@ struct ARROW_EXPORT ExecBatch {
     }
     return result;
   }
+
+  ARROW_EXPORT friend void PrintTo(const ExecBatch&, std::ostream*);
 };
 
+inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals(r); }
+inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); }
+
 /// \defgroup compute-call-function One-shot calls to compute functions
 ///
 /// @{
diff --git a/cpp/src/arrow/compute/exec/doc/exec_node.md b/cpp/src/arrow/compute/exec/doc/exec_node.md
new file mode 100644
index 00000000000..797cc87d90a
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/doc/exec_node.md
@@ -0,0 +1,147 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# ExecNodes and logical operators
+
+`ExecNode`s are intended to implement individual logical operators
+in a streaming execution graph. Each node receives batches from
+upstream nodes (inputs), processes them in some way, then pushes
+results to downstream nodes (outputs). `ExecNode`s are owned and
+(to an extent) coordinated by an `ExecPlan`.
+
+> Terminology: "operator" and "node" are mostly interchangable, like
+> "Interface" and "Abstract Base Class" in c++ space. The latter is
+> a formal and specific bit of code which implements the abstract
+> concept.
+
+## Types of logical operators
+
+Each of these will have at least one corresponding concrete
+`ExecNode`. Where possible, compatible implementations of a
+logical operator will *not* be exposed as independent subclasses
+of `ExecNode`. Instead we prefer that they be
+be encapsulated internally by a single subclass of `ExecNode`
+to permit switching between them during a query.
+
+- Scan: materializes in-memory batches from storage (e.g. Parquet
+  files, flight stream, ...)
+- Filter: evaluates an `Expression` on each input batch and outputs
+  a copy with any rows excluded for which the filter did not return
+  `true`.
+- Project: evaluates `Expression`s on each input batch to produce
+  the columns of an output batch.
+- Grouped Aggregate: identify groups based on one or more key columns
+  in each input batch, then update aggregates corresponding to those
+  groups. Node that this is a pipeline breaker; it will wait for its
+  inputs to complete before outputting any batches.
+- Union: merge two or more streams of batches into a single stream
+  of batches.
+- Write: write each batch to storage
+- ToTable: Collect batches into a `Table` with stable row ordering where
+  possible.
+
+#### Not in scope for Arrow 5.0:
+
+- Join: perform an inner, left, outer, semi, or anti join given some
+  join predicates.
+- Sort: accumulate all input batches into a single table, reorder its
+  rows by some sorting condition, then stream the sorted table out as
+  batches
+- Top-K: retrieve a limited subset of rows from a table as though it
+  were in sorted order.
+
+For example: a dataset scan with only a filter and a
+projection will correspond to a fairly trivial graph:
+
+```
+ScanNode -> FilterNode -> ProjectNode -> ToTableNode
+```
+
+A scan node loads batches from disk and pushes to a filter node.
+The filter node excludes some rows based on an `Expression` then
+pushes filtered batches to a project node. The project node
+materializes new columns based on `Expression`s then pushes those
+batches to a table collection node. The table collection node
+assembles these batches into a `Table` which is handed off as the
+result of the `ExecPlan`.
+
+## Parallelism, pipelines
+
+The execution graph is orthogonal to parallelism; any
+node may push to any other node from any thread. A scan node causes
+each batch to arrive on a thread after which it will pass through
+each node in the example graph above, never leaving that thread
+(memory/other resource pressure permitting).
+
+The example graph above happens to be simple enough that processing
+of any batch by any node is independent of other nodes and other
+batches; it is a pipeline. Note that there is no explicit `Pipeline`
+class- pipelined execution is an emergent property of some sub
+graphs.
+
+Nodes which do not share this property (pipeline breakers) are
+responsible for deciding when they have received sufficient input,
+when they can start emitting output, etc. For example a `GroupByNode`
+will wait for its input to be exhausted before it begins pushing
+batches to its own outputs.
+
+Parallelism is "seeded" by `ScanNode` (or other source nodes)- it
+owns a reference to the thread pool on which the graph is executing
+and fans out pushing to its outputs across that pool. A subsequent
+`ProjectNode` will process the batch immediately after it is handed
+off by the `ScanNode`- no explicit scheduling required.
+Eventually, individual nodes may internally
+parallelize processing of individual batches (for example, if a
+`FilterNode`'s filter expression is slow). This decision is also left
+up to each `ExecNode` implementation.
+
+# ExecNode interface and usage
+
+`ExecNode`s are constructed using one of the available factory
+functions, such as `arrow::compute::MakeFilterNode`
+or `arrow::dataset::MakeScanNode`. Any inputs to an `ExecNode`
+must be provided when the node is constructed, so the first
+nodes to be constructed are source nodes with no inputs
+such as `ScanNode`.
+
+The batches yielded by an `ExecNode` always conform precisely
+to its output schema. NB: no by-name field lookups or type
+checks are performed during execution. The output schema
+is usually derived from the output schemas of inputs. For
+example a `FilterNode`'s output schema is always identical to
+that of its input since batches are only modified by exclusion
+of some rows.
+
+An `ExecNode` will begin producing batches when
+`node->StartProducing()` is invoked and will proceed until stopped
+with `node->StopProducing()`. Started nodes may not be destroyed
+until stopped. `ExecNode`s are not currently restartable.
+An `ExecNode` pushes batches to its outputs by passing each batch
+to `output->InputReceived()`. It signals exhaustion by invoking
+`output->InputFinished()`.
+
+Error recovery is permitted within a node. For example, if evaluation
+of an `Expression` runs out of memory the governing node may
+try that evaluation again after some memory has been freed up.
+If a node experiences an error from which it cannot recover (for
+example an IO error while parsing a CSV file) then it reports this
+with `output->ErrorReceived()`. An error which escapes the scope of
+a single node should not be considered recoverable (no `FilterNode`
+should `try/catch` the IO error above).
+
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index f765ceccf0c..2dcbfb24724 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -17,10 +17,15 @@
 
 #include "arrow/compute/exec/exec_plan.h"
 
+#include <mutex>
 #include <unordered_set>
 
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
@@ -36,7 +41,11 @@ namespace {
 struct ExecPlanImpl : public ExecPlan {
   ExecPlanImpl() = default;
 
-  ~ExecPlanImpl() override = default;
+  ~ExecPlanImpl() override {
+    if (started_ && !stopped_) {
+      StopProducing();
+    }
+  }
 
   ExecNode* AddNode(std::unique_ptr<ExecNode> node) {
     if (node->num_inputs() == 0) {
@@ -60,79 +69,73 @@ struct ExecPlanImpl : public ExecPlan {
   }
 
   Status StartProducing() {
-    ARROW_ASSIGN_OR_RAISE(auto sorted_nodes, ReverseTopoSort());
-    Status st;
-    auto it = sorted_nodes.begin();
-    while (it != sorted_nodes.end() && st.ok()) {
-      st &= (*it++)->StartProducing();
+    if (started_) {
+      return Status::Invalid("restarted ExecPlan");
     }
-    if (!st.ok()) {
+    started_ = true;
+
+    // producers precede consumers
+    sorted_nodes_ = TopoSort();
+
+    for (size_t i = 0, rev_i = sorted_nodes_.size() - 1; i < sorted_nodes_.size();
+         ++i, --rev_i) {
+      auto st = sorted_nodes_[rev_i]->StartProducing();
+      if (st.ok()) continue;
+
       // Stop nodes that successfully started, in reverse order
-      // (`it` now points after the node that failed starting, so need to rewind)
-      --it;
-      while (it != sorted_nodes.begin()) {
-        (*--it)->StopProducing();
+      for (; rev_i < sorted_nodes_.size(); ++rev_i) {
+        sorted_nodes_[rev_i]->StopProducing();
       }
+      return st;
     }
-    return st;
+    return Status::OK();
   }
 
-  Result<NodeVector> ReverseTopoSort() {
-    struct TopoSort {
+  void StopProducing() {
+    DCHECK(started_) << "stopped an ExecPlan which never started";
+    stopped_ = true;
+
+    for (const auto& node : sorted_nodes_) {
+      node->StopProducing();
+    }
+  }
+
+  NodeVector TopoSort() {
+    struct Impl {
       const std::vector<std::unique_ptr<ExecNode>>& nodes;
       std::unordered_set<ExecNode*> visited;
-      std::unordered_set<ExecNode*> visiting;
       NodeVector sorted;
 
-      explicit TopoSort(const std::vector<std::unique_ptr<ExecNode>>& nodes)
-          : nodes(nodes) {
+      explicit Impl(const std::vector<std::unique_ptr<ExecNode>>& nodes) : nodes(nodes) {
         visited.reserve(nodes.size());
-        sorted.reserve(nodes.size());
-      }
+        sorted.resize(nodes.size());
 
-      Status Sort() {
         for (const auto& node : nodes) {
-          RETURN_NOT_OK(Visit(node.get()));
+          Visit(node.get());
         }
-        DCHECK_EQ(sorted.size(), nodes.size());
+
         DCHECK_EQ(visited.size(), nodes.size());
-        DCHECK_EQ(visiting.size(), 0);
-        return Status::OK();
       }
 
-      Status Visit(ExecNode* node) {
-        if (visited.count(node) != 0) {
-          return Status::OK();
-        }
-
-        auto it_success = visiting.insert(node);
-        if (!it_success.second) {
-          // Insertion failed => node is already being visited
-          return Status::Invalid("Cycle detected in execution plan");
-        }
+      void Visit(ExecNode* node) {
+        if (visited.count(node) != 0) return;
 
         for (auto input : node->inputs()) {
           // Ensure that producers are inserted before this consumer
-          RETURN_NOT_OK(Visit(input));
+          Visit(input);
         }
 
-        visiting.erase(it_success.first);
+        sorted[visited.size()] = node;
         visited.insert(node);
-        sorted.push_back(node);
-        return Status::OK();
-      }
-
-      NodeVector Reverse() {
-        std::reverse(sorted.begin(), sorted.end());
-        return std::move(sorted);
       }
-    } topo_sort(nodes_);
+    };
 
-    RETURN_NOT_OK(topo_sort.Sort());
-    return topo_sort.Reverse();
+    return std::move(Impl{nodes_}.sorted);
   }
 
+  bool started_ = false, stopped_ = false;
   std::vector<std::unique_ptr<ExecNode>> nodes_;
+  NodeVector sorted_nodes_;
   NodeVector sources_, sinks_;
 };
 
@@ -170,21 +173,26 @@ Status ExecPlan::Validate() { return ToDerived(this)->Validate(); }
 
 Status ExecPlan::StartProducing() { return ToDerived(this)->StartProducing(); }
 
-ExecNode::ExecNode(ExecPlan* plan, std::string label,
-                   std::vector<BatchDescr> input_descrs,
-                   std::vector<std::string> input_labels, BatchDescr output_descr,
-                   int num_outputs)
+void ExecPlan::StopProducing() { ToDerived(this)->StopProducing(); }
+
+ExecNode::ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
+                   std::vector<std::string> input_labels,
+                   std::shared_ptr<Schema> output_schema, int num_outputs)
     : plan_(plan),
       label_(std::move(label)),
-      input_descrs_(std::move(input_descrs)),
+      inputs_(std::move(inputs)),
       input_labels_(std::move(input_labels)),
-      output_descr_(std::move(output_descr)),
-      num_outputs_(num_outputs) {}
+      output_schema_(std::move(output_schema)),
+      num_outputs_(num_outputs) {
+  for (auto input : inputs_) {
+    input->outputs_.push_back(this);
+  }
+}
 
 Status ExecNode::Validate() const {
-  if (inputs_.size() != input_descrs_.size()) {
+  if (inputs_.size() != input_labels_.size()) {
     return Status::Invalid("Invalid number of inputs for '", label(), "' (expected ",
-                           num_inputs(), ", actual ", inputs_.size(), ")");
+                           num_inputs(), ", actual ", input_labels_.size(), ")");
   }
 
   if (static_cast<int>(outputs_.size()) != num_outputs_) {
@@ -192,26 +200,369 @@ Status ExecNode::Validate() const {
                            num_outputs(), ", actual ", outputs_.size(), ")");
   }
 
-  DCHECK_EQ(input_descrs_.size(), input_labels_.size());
-
   for (auto out : outputs_) {
     auto input_index = GetNodeIndex(out->inputs(), this);
     if (!input_index) {
       return Status::Invalid("Node '", label(), "' outputs to node '", out->label(),
                              "' but is not listed as an input.");
     }
+  }
+
+  return Status::OK();
+}
 
-    const auto& in_descr = out->input_descrs_[*input_index];
-    if (in_descr != output_descr_) {
-      return Status::Invalid(
-          "Node '", label(), "' (bound to input ", input_labels_[*input_index],
-          ") produces batches with type '", ValueDescr::ToString(output_descr_),
-          "' inconsistent with consumer '", out->label(), "' which accepts '",
-          ValueDescr::ToString(in_descr), "'");
+struct SourceNode : ExecNode {
+  SourceNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> output_schema,
+             AsyncGenerator<util::optional<ExecBatch>> generator)
+      : ExecNode(plan, std::move(label), {}, {}, std::move(output_schema),
+                 /*num_outputs=*/1),
+        generator_(std::move(generator)) {}
+
+  const char* kind_name() override { return "SourceNode"; }
+
+  static void NoInputs() { DCHECK(false) << "no inputs; this should never be called"; }
+  void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); }
+  void ErrorReceived(ExecNode*, Status) override { NoInputs(); }
+  void InputFinished(ExecNode*, int) override { NoInputs(); }
+
+  Status StartProducing() override {
+    if (finished_) {
+      return Status::Invalid("Restarted SourceNode '", label(), "'");
     }
+
+    finished_fut_ =
+        Loop([this] {
+          std::unique_lock<std::mutex> lock(mutex_);
+          int seq = next_batch_index_++;
+          if (finished_) {
+            return Future<ControlFlow<int>>::MakeFinished(Break(seq));
+          }
+          lock.unlock();
+
+          return generator_().Then(
+              [=](const util::optional<ExecBatch>& batch) -> ControlFlow<int> {
+                std::unique_lock<std::mutex> lock(mutex_);
+                if (!batch || finished_) {
+                  finished_ = true;
+                  return Break(seq);
+                }
+                lock.unlock();
+
+                // TODO check if we are on the desired Executor and transfer if not.
+                // This can happen for in-memory scans where batches didn't require
+                // any CPU work to decode. Otherwise, parsing etc should have already
+                // been placed us on the thread pool
+                outputs_[0]->InputReceived(this, seq, *batch);
+                return Continue();
+              },
+              [=](const Status& error) -> ControlFlow<int> {
+                std::unique_lock<std::mutex> lock(mutex_);
+                if (!finished_) {
+                  finished_ = true;
+                  lock.unlock();
+                  // unless we were already finished, push the error to our output
+                  // XXX is this correct? Is it reasonable for a consumer to
+                  // ignore errors from a finished producer?
+                  outputs_[0]->ErrorReceived(this, error);
+                }
+                return Break(seq);
+              });
+        }).Then([&](int seq) {
+          /// XXX this is probably redundant: do we always call InputFinished after
+          /// ErrorReceived or will ErrorRecieved be sufficient?
+          outputs_[0]->InputFinished(this, seq);
+        });
+
+    return Status::OK();
   }
 
-  return Status::OK();
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      finished_ = true;
+    }
+    finished_fut_.Wait();
+  }
+
+  void StopProducing() override { StopProducing(outputs_[0]); }
+
+ private:
+  std::mutex mutex_;
+  bool finished_{false};
+  int next_batch_index_{0};
+  Future<> finished_fut_ = Future<>::MakeFinished();
+  AsyncGenerator<util::optional<ExecBatch>> generator_;
+};
+
+ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
+                         std::shared_ptr<Schema> output_schema,
+                         AsyncGenerator<util::optional<ExecBatch>> generator) {
+  return plan->EmplaceNode<SourceNode>(plan, std::move(label), std::move(output_schema),
+                                       std::move(generator));
+}
+
+struct FilterNode : ExecNode {
+  FilterNode(ExecNode* input, std::string label, Expression filter)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/input->output_schema(),
+                 /*num_outputs=*/1),
+        filter_(std::move(filter)) {}
+
+  const char* kind_name() override { return "FilterNode"; }
+
+  Result<ExecBatch> DoFilter(const ExecBatch& target) {
+    ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
+                          SimplifyWithGuarantee(filter_, target.guarantee));
+
+    // XXX get a non-default exec context
+    ARROW_ASSIGN_OR_RAISE(Datum mask, ExecuteScalarExpression(simplified_filter, target));
+
+    if (mask.is_scalar()) {
+      const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
+      if (mask_scalar.is_valid && mask_scalar.value) {
+        return target;
+      }
+
+      return target.Slice(0, 0);
+    }
+
+    auto values = target.values;
+    for (auto& value : values) {
+      if (value.is_scalar()) continue;
+      ARROW_ASSIGN_OR_RAISE(value, Filter(value, mask, FilterOptions::Defaults()));
+    }
+    return ExecBatch::Make(std::move(values));
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto maybe_filtered = DoFilter(std::move(batch));
+    if (!maybe_filtered.ok()) {
+      outputs_[0]->ErrorReceived(this, maybe_filtered.status());
+      inputs_[0]->StopProducing(this);
+      return;
+    }
+
+    maybe_filtered->guarantee = batch.guarantee;
+    outputs_[0]->InputReceived(this, seq, maybe_filtered.MoveValueUnsafe());
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+    inputs_[0]->StopProducing(this);
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->InputFinished(this, seq);
+  }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    inputs_[0]->StopProducing(this);
+  }
+
+  void StopProducing() override { StopProducing(outputs_[0]); }
+
+ private:
+  Expression filter_;
+};
+
+Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter) {
+  if (!filter.IsBound()) {
+    ARROW_ASSIGN_OR_RAISE(filter, filter.Bind(*input->output_schema()));
+  }
+
+  if (filter.type()->id() != Type::BOOL) {
+    return Status::TypeError("Filter expression must evaluate to bool, but ",
+                             filter.ToString(), " evaluates to ",
+                             filter.type()->ToString());
+  }
+
+  return input->plan()->EmplaceNode<FilterNode>(input, std::move(label),
+                                                std::move(filter));
+}
+
+struct ProjectNode : ExecNode {
+  ProjectNode(ExecNode* input, std::string label, std::shared_ptr<Schema> output_schema,
+              std::vector<Expression> exprs)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/std::move(output_schema),
+                 /*num_outputs=*/1),
+        exprs_(std::move(exprs)) {}
+
+  const char* kind_name() override { return "ProjectNode"; }
+
+  Result<ExecBatch> DoProject(const ExecBatch& target) {
+    // XXX get a non-default exec context
+    std::vector<Datum> values{exprs_.size()};
+    for (size_t i = 0; i < exprs_.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
+                            SimplifyWithGuarantee(exprs_[i], target.guarantee));
+
+      ARROW_ASSIGN_OR_RAISE(values[i], ExecuteScalarExpression(simplified_expr, target));
+    }
+    return ExecBatch::Make(std::move(values));
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    auto maybe_projected = DoProject(std::move(batch));
+    if (!maybe_projected.ok()) {
+      outputs_[0]->ErrorReceived(this, maybe_projected.status());
+      inputs_[0]->StopProducing(this);
+      return;
+    }
+
+    maybe_projected->guarantee = batch.guarantee;
+    outputs_[0]->InputReceived(this, seq, maybe_projected.MoveValueUnsafe());
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+    inputs_[0]->StopProducing(this);
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->InputFinished(this, seq);
+  }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    inputs_[0]->StopProducing(this);
+  }
+
+  void StopProducing() override { StopProducing(outputs_[0]); }
+
+ private:
+  std::vector<Expression> exprs_;
+};
+
+Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
+                                  std::vector<Expression> exprs) {
+  FieldVector fields(exprs.size());
+
+  int i = 0;
+  for (auto& expr : exprs) {
+    if (!expr.IsBound()) {
+      ARROW_ASSIGN_OR_RAISE(expr, expr.Bind(*input->output_schema()));
+    }
+    fields[i] = field(expr.ToString(), expr.type());
+    ++i;
+  }
+
+  return input->plan()->EmplaceNode<ProjectNode>(
+      input, std::move(label), schema(std::move(fields)), std::move(exprs));
+}
+
+struct SinkNode : ExecNode {
+  SinkNode(ExecNode* input, std::string label,
+           AsyncGenerator<util::optional<ExecBatch>>* generator)
+      : ExecNode(input->plan(), std::move(label), {input}, {"collected"}, {},
+                 /*num_outputs=*/0),
+        producer_(MakeProducer(generator)) {}
+
+  static PushGenerator<util::optional<ExecBatch>>::Producer MakeProducer(
+      AsyncGenerator<util::optional<ExecBatch>>* out_gen) {
+    PushGenerator<util::optional<ExecBatch>> gen;
+    auto out = gen.producer();
+    *out_gen = std::move(gen);
+    return out;
+  }
+
+  const char* kind_name() override { return "SinkNode"; }
+
+  Status StartProducing() override { return Status::OK(); }
+
+  // sink nodes have no outputs from which to feel backpressure
+  static void NoOutputs() { DCHECK(false) << "no outputs; this should never be called"; }
+  void ResumeProducing(ExecNode* output) override { NoOutputs(); }
+  void PauseProducing(ExecNode* output) override { NoOutputs(); }
+  void StopProducing(ExecNode* output) override { NoOutputs(); }
+
+  void StopProducing() override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    InputFinishedUnlocked();
+  }
+
+  void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (stopped_) return;
+
+    ++num_received_;
+    if (num_received_ == emit_stop_) {
+      InputFinishedUnlocked();
+    }
+
+    if (emit_stop_ != -1) {
+      DCHECK_LE(seq_num, emit_stop_);
+    }
+    lock.unlock();
+
+    producer_.Push(std::move(batch));
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    producer_.Push(std::move(error));
+    std::unique_lock<std::mutex> lock(mutex_);
+    InputFinishedUnlocked();
+  }
+
+  void InputFinished(ExecNode* input, int seq_stop) override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    emit_stop_ = seq_stop;
+    if (emit_stop_ == num_received_) {
+      InputFinishedUnlocked();
+    }
+  }
+
+ private:
+  void InputFinishedUnlocked() {
+    if (!stopped_) {
+      stopped_ = true;
+      producer_.Close();
+    }
+  }
+
+  std::mutex mutex_;
+
+  int num_received_ = 0;
+  int emit_stop_ = -1;
+  bool stopped_ = false;
+
+  PushGenerator<util::optional<ExecBatch>>::Producer producer_;
+};
+
+AsyncGenerator<util::optional<ExecBatch>> MakeSinkNode(ExecNode* input,
+                                                       std::string label) {
+  AsyncGenerator<util::optional<ExecBatch>> out;
+  (void)input->plan()->EmplaceNode<SinkNode>(input, std::move(label), &out);
+  return out;
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
index 0d2faea0ddc..21a757af5a1 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <functional>
 #include <memory>
 #include <string>
 #include <vector>
@@ -24,6 +25,7 @@
 #include "arrow/compute/type_fwd.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/optional.h"
 #include "arrow/util/visibility.h"
 
 // NOTES:
@@ -48,8 +50,11 @@ class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
   ExecNode* AddNode(std::unique_ptr<ExecNode> node);
 
   template <typename Node, typename... Args>
-  ExecNode* EmplaceNode(Args&&... args) {
-    return AddNode(std::unique_ptr<Node>(new Node{std::forward<Args>(args)...}));
+  Node* EmplaceNode(Args&&... args) {
+    auto node = std::unique_ptr<Node>(new Node{std::forward<Args>(args)...});
+    auto out = node.get();
+    AddNode(std::move(node));
+    return out;
   }
 
   /// The initial inputs
@@ -58,15 +63,6 @@ class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
   /// The final outputs
   const NodeVector& sinks() const;
 
-  // XXX API question:
-  // There are clearly two phases in the ExecPlan lifecycle:
-  // - one construction phase where AddNode() and ExecNode::AddInput() is called
-  //   (with optional validation at the end)
-  // - one execution phase where the nodes are topo-sorted and then started
-  //
-  // => Should we separate out those APIs? e.g. have a ExecPlanBuilder
-  // for the first phase.
-
   Status Validate();
 
   /// Start producing on all nodes
@@ -75,7 +71,7 @@ class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
   /// is started before all of its inputs.
   Status StartProducing();
 
-  // XXX should we also have `void StopProducing()`?
+  void StopProducing();
 
  protected:
   ExecPlan() = default;
@@ -84,32 +80,26 @@ class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
 class ARROW_EXPORT ExecNode {
  public:
   using NodeVector = std::vector<ExecNode*>;
-  using BatchDescr = std::vector<ValueDescr>;
 
   virtual ~ExecNode() = default;
 
   virtual const char* kind_name() = 0;
 
   // The number of inputs/outputs expected by this node
-  int num_inputs() const { return static_cast<int>(input_descrs_.size()); }
+  int num_inputs() const { return static_cast<int>(inputs_.size()); }
   int num_outputs() const { return num_outputs_; }
 
   /// This node's predecessors in the exec plan
   const NodeVector& inputs() const { return inputs_; }
 
-  /// The datatypes accepted by this node for each input
-  const std::vector<BatchDescr>& input_descrs() const { return input_descrs_; }
-
   /// \brief Labels identifying the function of each input.
-  ///
-  /// For example, FilterNode accepts "target" and "filter" inputs.
   const std::vector<std::string>& input_labels() const { return input_labels_; }
 
   /// This node's successors in the exec plan
   const NodeVector& outputs() const { return outputs_; }
 
   /// The datatypes for batches produced by this node
-  const BatchDescr& output_descr() const { return output_descr_; }
+  const std::shared_ptr<Schema>& output_schema() const { return output_schema_; }
 
   /// This node's exec plan
   ExecPlan* plan() { return plan_; }
@@ -119,11 +109,6 @@ class ARROW_EXPORT ExecNode {
   /// There is no guarantee that this value is non-empty or unique.
   const std::string& label() const { return label_; }
 
-  void AddInput(ExecNode* input) {
-    inputs_.push_back(input);
-    input->outputs_.push_back(this);
-  }
-
   Status Validate() const;
 
   /// Upstream API:
@@ -139,7 +124,7 @@ class ARROW_EXPORT ExecNode {
   ///   and StopProducing()
 
   /// Transfer input batch to ExecNode
-  virtual void InputReceived(ExecNode* input, int seq_num, compute::ExecBatch batch) = 0;
+  virtual void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) = 0;
 
   /// Signal error to ExecNode
   virtual void ErrorReceived(ExecNode* input, Status error) = 0;
@@ -222,25 +207,62 @@ class ARROW_EXPORT ExecNode {
   virtual void StopProducing(ExecNode* output) = 0;
 
   /// \brief Stop producing definitively
+  ///
+  /// XXX maybe this should return a Future<>?
   virtual void StopProducing() = 0;
 
  protected:
-  ExecNode(ExecPlan* plan, std::string label, std::vector<BatchDescr> input_descrs,
-           std::vector<std::string> input_labels, BatchDescr output_descr,
+  ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
+           std::vector<std::string> input_labels, std::shared_ptr<Schema> output_schema,
            int num_outputs);
 
   ExecPlan* plan_;
-
   std::string label_;
 
-  std::vector<BatchDescr> input_descrs_;
-  std::vector<std::string> input_labels_;
   NodeVector inputs_;
+  std::vector<std::string> input_labels_;
 
-  BatchDescr output_descr_;
+  std::shared_ptr<Schema> output_schema_;
   int num_outputs_;
   NodeVector outputs_;
 };
 
+/// \brief Adapt an AsyncGenerator<ExecBatch> as a source node
+///
+/// TODO this should accept an Executor and explicitly handle batches
+/// as they are generated on each of the Executor's threads.
+ARROW_EXPORT
+ExecNode* MakeSourceNode(ExecPlan*, std::string label,
+                         std::shared_ptr<Schema> output_schema,
+                         std::function<Future<util::optional<ExecBatch>>()>);
+
+/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
+///
+/// Emitted batches will not be ordered; instead they will be tagged with the `seq` at
+/// which they were received.
+ARROW_EXPORT
+std::function<Future<util::optional<ExecBatch>>()> MakeSinkNode(ExecNode* input,
+                                                                std::string label);
+
+/// \brief Make a node which excludes some rows from batches passed through it
+///
+/// The filter Expression will be evaluated against each batch which is pushed to
+/// this node. Any rows for which the filter does not evaluate to `true` will be excluded
+/// in the batch emitted by this node.
+///
+/// If the filter is not already bound, it will be bound against the input's schema.
+ARROW_EXPORT
+Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression filter);
+
+/// \brief Make a node which executes expressions on input batches, producing new batches.
+///
+/// Each expression will be evaluated against each batch which is pushed to
+/// this node to produce a corresponding output column.
+///
+/// If exprs are not already bound, they will be bound against the input's schema.
+ARROW_EXPORT
+Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
+                                  std::vector<Expression> exprs);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index aeabbf7bc5b..022584d5b39 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -29,6 +29,7 @@
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/hash_util.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
@@ -42,7 +43,13 @@ using internal::checked_pointer_cast;
 
 namespace compute {
 
-Expression::Expression(Call call) : impl_(std::make_shared<Impl>(std::move(call))) {}
+Expression::Expression(Call call) {
+  call.hash = std::hash<std::string>{}(call.function_name);
+  for (const auto& arg : call.arguments) {
+    arrow::internal::hash_combine(call.hash, arg.hash());
+  }
+  impl_ = std::make_shared<Impl>(std::move(call));
+}
 
 Expression::Expression(Datum literal)
     : impl_(std::make_shared<Impl>(std::move(literal))) {}
@@ -53,7 +60,7 @@ Expression::Expression(Parameter parameter)
 Expression literal(Datum lit) { return Expression(std::move(lit)); }
 
 Expression field_ref(FieldRef ref) {
-  return Expression(Expression::Parameter{std::move(ref), {}});
+  return Expression(Expression::Parameter{std::move(ref), ValueDescr{}, -1});
 }
 
 Expression call(std::string function, std::vector<Expression> arguments,
@@ -67,8 +74,12 @@ Expression call(std::string function, std::vector<Expression> arguments,
 
 const Datum* Expression::literal() const { return util::get_if<Datum>(impl_.get()); }
 
+const Expression::Parameter* Expression::parameter() const {
+  return util::get_if<Parameter>(impl_.get());
+}
+
 const FieldRef* Expression::field_ref() const {
-  if (auto parameter = util::get_if<Parameter>(impl_.get())) {
+  if (auto parameter = this->parameter()) {
     return &parameter->ref;
   }
   return nullptr;
@@ -85,7 +96,7 @@ ValueDescr Expression::descr() const {
     return lit->descr();
   }
 
-  if (auto parameter = util::get_if<Parameter>(impl_.get())) {
+  if (auto parameter = this->parameter()) {
     return parameter->descr;
   }
 
@@ -235,21 +246,7 @@ size_t Expression::hash() const {
     return ref->hash();
   }
 
-  auto call = CallNotNull(*this);
-  if (call->hash != nullptr) {
-    return call->hash->load();
-  }
-
-  size_t out = std::hash<std::string>{}(call->function_name);
-  for (const auto& arg : call->arguments) {
-    out ^= arg.hash();
-  }
-
-  std::shared_ptr<std::atomic<size_t>> expected = nullptr;
-  ::arrow::internal::atomic_compare_exchange_strong(
-      &const_cast<Call*>(call)->hash, &expected,
-      std::make_shared<std::atomic<size_t>>(out));
-  return out;
+  return CallNotNull(*this)->hash;
 }
 
 bool Expression::IsBound() const {
@@ -383,76 +380,113 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
   return Expression(std::move(call));
 }
 
-struct FieldPathGetDatumImpl {
-  template <typename T, typename = decltype(FieldPath{}.Get(std::declval<const T&>()))>
-  Result<Datum> operator()(const std::shared_ptr<T>& ptr) {
-    return path_.Get(*ptr).template As<Datum>();
-  }
-
-  template <typename T>
-  Result<Datum> operator()(const T&) {
-    return Status::NotImplemented("FieldPath::Get() into Datum ", datum_.ToString());
+template <typename TypeOrSchema>
+Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
+                            ValueDescr::Shape shape, compute::ExecContext* exec_context) {
+  if (exec_context == nullptr) {
+    compute::ExecContext exec_context;
+    return BindImpl(std::move(expr), in, shape, &exec_context);
   }
 
-  const Datum& datum_;
-  const FieldPath& path_;
-};
+  if (expr.literal()) return expr;
 
-inline Result<Datum> GetDatumField(const FieldRef& ref, const Datum& input) {
-  Datum field;
+  if (auto ref = expr.field_ref()) {
+    if (ref->IsNested()) {
+      return Status::NotImplemented("nested field references");
+    }
 
-  FieldPath match;
-  if (auto type = input.type()) {
-    ARROW_ASSIGN_OR_RAISE(match, ref.FindOneOrNone(*type));
-  } else if (auto schema = input.schema()) {
-    ARROW_ASSIGN_OR_RAISE(match, ref.FindOneOrNone(*schema));
-  } else {
-    return Status::NotImplemented("retrieving fields from datum ", input.ToString());
-  }
+    ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
 
-  if (!match.empty()) {
-    ARROW_ASSIGN_OR_RAISE(field,
-                          util::visit(FieldPathGetDatumImpl{input, match}, input.value));
+    auto bound = *expr.parameter();
+    bound.index = path[0];
+    ARROW_ASSIGN_OR_RAISE(auto field, path.Get(in));
+    bound.descr.type = field->type();
+    bound.descr.shape = shape;
+    return Expression{std::move(bound)};
   }
 
-  if (field == Datum{}) {
-    return Datum(std::make_shared<NullScalar>());
+  auto call = *CallNotNull(expr);
+  for (auto& argument : call.arguments) {
+    ARROW_ASSIGN_OR_RAISE(argument,
+                          BindImpl(std::move(argument), in, shape, exec_context));
   }
-
-  return field;
+  return BindNonRecursive(std::move(call),
+                          /*insert_implicit_casts=*/true, exec_context);
 }
 
 }  // namespace
 
-Result<Expression> Expression::Bind(ValueDescr in,
+Result<Expression> Expression::Bind(const ValueDescr& in,
                                     compute::ExecContext* exec_context) const {
-  if (exec_context == nullptr) {
-    compute::ExecContext exec_context;
-    return Bind(std::move(in), &exec_context);
-  }
+  return BindImpl(*this, *in.type, in.shape, exec_context);
+}
 
-  if (literal()) return *this;
+Result<Expression> Expression::Bind(const Schema& in_schema,
+                                    compute::ExecContext* exec_context) const {
+  return BindImpl(*this, in_schema, ValueDescr::ARRAY, exec_context);
+}
 
-  if (auto ref = field_ref()) {
-    ARROW_ASSIGN_OR_RAISE(auto field, ref->GetOneOrNone(*in.type));
-    auto descr = field ? ValueDescr{field->type(), in.shape} : ValueDescr::Scalar(null());
-    return Expression{Parameter{*ref, std::move(descr)}};
+Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) {
+  ExecBatch out;
+
+  if (partial.kind() == Datum::RECORD_BATCH) {
+    const auto& partial_batch = *partial.record_batch();
+    out.length = partial_batch.num_rows();
+
+    for (const auto& field : full_schema.fields()) {
+      ARROW_ASSIGN_OR_RAISE(auto column,
+                            FieldRef(field->name()).GetOneOrNone(partial_batch));
+
+      if (column) {
+        if (!column->type()->Equals(field->type())) {
+          // Referenced field was present but didn't have the expected type.
+          // This *should* be handled by readers, and will just be an error in the future.
+          ARROW_ASSIGN_OR_RAISE(
+              auto converted,
+              compute::Cast(column, field->type(), compute::CastOptions::Safe()));
+          column = converted.make_array();
+        }
+        out.values.emplace_back(std::move(column));
+      } else {
+        out.values.emplace_back(MakeNullScalar(field->type()));
+      }
+    }
+    return out;
   }
 
-  auto call = *CallNotNull(*this);
-  for (auto& argument : call.arguments) {
-    ARROW_ASSIGN_OR_RAISE(argument, argument.Bind(in, exec_context));
+  // wasteful but useful for testing:
+  if (partial.type()->id() == Type::STRUCT) {
+    if (partial.is_array()) {
+      ARROW_ASSIGN_OR_RAISE(auto partial_batch,
+                            RecordBatch::FromStructArray(partial.make_array()));
+
+      return MakeExecBatch(full_schema, partial_batch);
+    }
+
+    if (partial.is_scalar()) {
+      ARROW_ASSIGN_OR_RAISE(auto partial_array,
+                            MakeArrayFromScalar(*partial.scalar(), 1));
+      ARROW_ASSIGN_OR_RAISE(auto out, MakeExecBatch(full_schema, partial_array));
+
+      for (Datum& value : out.values) {
+        if (value.is_scalar()) continue;
+        ARROW_ASSIGN_OR_RAISE(value, value.make_array()->GetScalar(0));
+      }
+      return out;
+    }
   }
-  return BindNonRecursive(std::move(call),
-                          /*insert_implicit_casts=*/true, exec_context);
+
+  return Status::NotImplemented("MakeExecBatch from ", PrintDatum(partial));
 }
 
-Result<Expression> Expression::Bind(const Schema& in_schema,
-                                    compute::ExecContext* exec_context) const {
-  return Bind(ValueDescr::Array(struct_(in_schema.fields())), exec_context);
+Result<Datum> ExecuteScalarExpression(const Expression& expr, const Schema& full_schema,
+                                      const Datum& partial_input,
+                                      compute::ExecContext* exec_context) {
+  ARROW_ASSIGN_OR_RAISE(auto input, MakeExecBatch(full_schema, partial_input));
+  return ExecuteScalarExpression(expr, input, exec_context);
 }
 
-Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input,
+Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& input,
                                       compute::ExecContext* exec_context) {
   if (exec_context == nullptr) {
     compute::ExecContext exec_context;
@@ -470,15 +504,16 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input
 
   if (auto lit = expr.literal()) return *lit;
 
-  if (auto ref = expr.field_ref()) {
-    ARROW_ASSIGN_OR_RAISE(Datum field, GetDatumField(*ref, input));
+  if (auto param = expr.parameter()) {
+    if (param->descr.type->id() == Type::NA) {
+      return MakeNullScalar(null());
+    }
 
-    if (field.descr() != expr.descr()) {
-      // Refernced field was present but didn't have the expected type.
-      // Should we just error here? For now, pay dispatch cost and just cast.
-      ARROW_ASSIGN_OR_RAISE(
-          field,
-          compute::Cast(field, expr.type(), compute::CastOptions::Safe(), exec_context));
+    const Datum& field = input[param->index];
+    if (!field.type()->Equals(param->descr.type)) {
+      return Status::Invalid("Referenced field ", expr.ToString(), " was ",
+                             field.type()->ToString(), " but should have been ",
+                             param->descr.type->ToString());
     }
 
     return field;
@@ -574,7 +609,7 @@ Result<Expression> FoldConstants(Expression expr) {
         if (std::all_of(call->arguments.begin(), call->arguments.end(),
                         [](const Expression& argument) { return argument.literal(); })) {
           // all arguments are literal; we can evaluate this subexpression *now*
-          static const Datum ignored_input = Datum{};
+          static const ExecBatch ignored_input = ExecBatch{};
           ARROW_ASSIGN_OR_RAISE(Datum constant,
                                 ExecuteScalarExpression(expr, ignored_input));
 
@@ -683,17 +718,16 @@ Status ExtractKnownFieldValuesImpl(
 
 }  // namespace
 
-Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldValues(
+Result<KnownFieldValues> ExtractKnownFieldValues(
     const Expression& guaranteed_true_predicate) {
   auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate);
-  std::unordered_map<FieldRef, Datum, FieldRef::Hash> known_values;
-  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values));
+  KnownFieldValues known_values;
+  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map));
   return known_values;
 }
 
-Result<Expression> ReplaceFieldsWithKnownValues(
-    const std::unordered_map<FieldRef, Datum, FieldRef::Hash>& known_values,
-    Expression expr) {
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression expr) {
   if (!expr.IsBound()) {
     return Status::Invalid(
         "ReplaceFieldsWithKnownValues called on an unbound Expression");
@@ -703,8 +737,8 @@ Result<Expression> ReplaceFieldsWithKnownValues(
       std::move(expr),
       [&known_values](Expression expr) -> Result<Expression> {
         if (auto ref = expr.field_ref()) {
-          auto it = known_values.find(*ref);
-          if (it != known_values.end()) {
+          auto it = known_values.map.find(*ref);
+          if (it != known_values.map.end()) {
             Datum lit = it->second;
             if (lit.descr() == expr.descr()) return literal(std::move(lit));
             // type mismatch, try casting the known value to the correct type
@@ -906,8 +940,8 @@ Result<Expression> SimplifyWithGuarantee(Expression expr,
                                          const Expression& guaranteed_true_predicate) {
   auto conjunction_members = GuaranteeConjunctionMembers(guaranteed_true_predicate);
 
-  std::unordered_map<FieldRef, Datum, FieldRef::Hash> known_values;
-  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values));
+  KnownFieldValues known_values;
+  RETURN_NOT_OK(ExtractKnownFieldValuesImpl(&conjunction_members, &known_values.map));
 
   ARROW_ASSIGN_OR_RAISE(expr,
                         ReplaceFieldsWithKnownValues(known_values, std::move(expr)));
@@ -1144,13 +1178,5 @@ Expression or_(const std::vector<Expression>& operands) {
 
 Expression not_(Expression operand) { return call("invert", {std::move(operand)}); }
 
-Expression operator&&(Expression lhs, Expression rhs) {
-  return and_(std::move(lhs), std::move(rhs));
-}
-
-Expression operator||(Expression lhs, Expression rhs) {
-  return or_(std::move(lhs), std::move(rhs));
-}
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
index f5ca2c2118d..d06a923bb32 100644
--- a/cpp/src/arrow/compute/exec/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -19,10 +19,8 @@
 
 #pragma once
 
-#include <atomic>
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -44,13 +42,13 @@ class ARROW_EXPORT Expression {
   struct Call {
     std::string function_name;
     std::vector<Expression> arguments;
-    std::shared_ptr<compute::FunctionOptions> options;
-    std::shared_ptr<std::atomic<size_t>> hash;
+    std::shared_ptr<FunctionOptions> options;
+    size_t hash;
 
     // post-Bind properties:
-    std::shared_ptr<compute::Function> function;
-    const compute::Kernel* kernel = NULLPTR;
-    std::shared_ptr<compute::KernelState> kernel_state;
+    std::shared_ptr<Function> function;
+    const Kernel* kernel = NULLPTR;
+    std::shared_ptr<KernelState> kernel_state;
     ValueDescr descr;
   };
 
@@ -64,8 +62,8 @@ class ARROW_EXPORT Expression {
   /// Bind this expression to the given input type, looking up Kernels and field types.
   /// Some expression simplification may be performed and implicit casts will be inserted.
   /// Any state necessary for execution will be initialized and returned.
-  Result<Expression> Bind(ValueDescr in, compute::ExecContext* = NULLPTR) const;
-  Result<Expression> Bind(const Schema& in_schema, compute::ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
 
   // XXX someday
   // Clone all KernelState in this bound expression. If any function referenced by this
@@ -108,8 +106,12 @@ class ARROW_EXPORT Expression {
 
   struct Parameter {
     FieldRef ref;
+
+    // post-bind properties
     ValueDescr descr;
+    int index;
   };
+  const Parameter* parameter() const;
 
   Expression() = default;
   explicit Expression(Call call);
@@ -143,10 +145,10 @@ Expression field_ref(FieldRef ref);
 
 ARROW_EXPORT
 Expression call(std::string function, std::vector<Expression> arguments,
-                std::shared_ptr<compute::FunctionOptions> options = NULLPTR);
+                std::shared_ptr<FunctionOptions> options = NULLPTR);
 
-template <typename Options, typename = typename std::enable_if<std::is_base_of<
-                                compute::FunctionOptions, Options>::value>::type>
+template <typename Options, typename = typename std::enable_if<
+                                std::is_base_of<FunctionOptions, Options>::value>::type>
 Expression call(std::string function, std::vector<Expression> arguments,
                 Options options) {
   return call(std::move(function), std::move(arguments),
@@ -162,8 +164,9 @@ ARROW_EXPORT
 bool ExpressionHasFieldRefs(const Expression&);
 
 /// Assemble a mapping from field references to known values.
+struct ARROW_EXPORT KnownFieldValues;
 ARROW_EXPORT
-Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldValues(
+Result<KnownFieldValues> ExtractKnownFieldValues(
     const Expression& guaranteed_true_predicate);
 
 /// \defgroup expression-passes Functions for modification of Expressions
@@ -182,7 +185,7 @@ Result<std::unordered_map<FieldRef, Datum, FieldRef::Hash>> ExtractKnownFieldVal
 /// equivalent Expressions may result in different canonicalized expressions.
 /// TODO this could be a strong canonicalization
 ARROW_EXPORT
-Result<Expression> Canonicalize(Expression, compute::ExecContext* = NULLPTR);
+Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
 
 /// Simplify Expressions based on literal arguments (for example, add(null, x) will always
 /// be null so replace the call with a null literal). Includes early evaluation of all
@@ -192,8 +195,8 @@ Result<Expression> FoldConstants(Expression);
 
 /// Simplify Expressions by replacing with known values of the fields which it references.
 ARROW_EXPORT
-Result<Expression> ReplaceFieldsWithKnownValues(
-    const std::unordered_map<FieldRef, Datum, FieldRef::Hash>& known_values, Expression);
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression);
 
 /// Simplify an expression by replacing subexpressions based on a guarantee:
 /// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
@@ -207,11 +210,22 @@ Result<Expression> SimplifyWithGuarantee(Expression,
 
 // Execution
 
-/// Execute a scalar expression against the provided state and input Datum. This
+/// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
+/// RecordBatch which may have missing or incorrectly ordered columns.
+/// Missing fields will be replaced with null scalars.
+ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
+                                             const Datum& partial);
+
+/// Execute a scalar expression against the provided state and input ExecBatch. This
 /// expression must be bound.
 ARROW_EXPORT
-Result<Datum> ExecuteScalarExpression(const Expression&, const Datum& input,
-                                      compute::ExecContext* = NULLPTR);
+Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
+                                      ExecContext* = NULLPTR);
+
+/// Convenience function for invoking against a RecordBatch
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
+                                      const Datum& partial_input, ExecContext* = NULLPTR);
 
 // Serialization
 
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index b9165a5f0c2..51d242e8d66 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -34,6 +34,10 @@ using internal::checked_cast;
 
 namespace compute {
 
+struct KnownFieldValues {
+  std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
+};
+
 inline const Expression::Call* CallNotNull(const Expression& expr) {
   auto call = expr.call();
   DCHECK_NE(call, nullptr);
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index 908e8962e43..86909f4eb64 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -166,6 +166,56 @@ TEST(ExpressionUtils, StripOrderPreservingCasts) {
   Expect(cast(field_ref("i32"), uint64()), no_change);
 }
 
+TEST(ExpressionUtils, MakeExecBatch) {
+  auto Expect = [](std::shared_ptr<RecordBatch> partial_batch) {
+    SCOPED_TRACE(partial_batch->ToString());
+    ASSERT_OK_AND_ASSIGN(auto batch, MakeExecBatch(*kBoringSchema, partial_batch));
+
+    ASSERT_EQ(batch.num_values(), kBoringSchema->num_fields());
+    for (int i = 0; i < kBoringSchema->num_fields(); ++i) {
+      const auto& field = *kBoringSchema->field(i);
+
+      SCOPED_TRACE("Field#" + std::to_string(i) + " " + field.ToString());
+
+      EXPECT_TRUE(batch[i].type()->Equals(field.type()))
+          << "Incorrect type " << batch[i].type()->ToString();
+
+      ASSERT_OK_AND_ASSIGN(auto col, FieldRef(field.name()).GetOneOrNone(*partial_batch));
+
+      if (batch[i].is_scalar()) {
+        EXPECT_FALSE(batch[i].scalar()->is_valid)
+            << "Non-null placeholder scalar was injected";
+
+        EXPECT_EQ(col, nullptr)
+            << "Placeholder scalar overwrote column " << col->ToString();
+      } else {
+        AssertDatumsEqual(col, batch[i]);
+      }
+    }
+  };
+
+  auto GetField = [](std::string name) { return kBoringSchema->GetFieldByName(name); };
+
+  constexpr int64_t kNumRows = 3;
+  auto i32 = ArrayFromJSON(int32(), "[1, 2, 3]");
+  auto f32 = ArrayFromJSON(float32(), "[1.5, 2.25, 3.125]");
+
+  // empty
+  Expect(RecordBatchFromJSON(kBoringSchema, "[]"));
+
+  // subset
+  Expect(RecordBatch::Make(schema({GetField("i32"), GetField("f32")}), kNumRows,
+                           {i32, f32}));
+
+  // flipped subset
+  Expect(RecordBatch::Make(schema({GetField("f32"), GetField("i32")}), kNumRows,
+                           {f32, i32}));
+
+  auto duplicated_names =
+      RecordBatch::Make(schema({GetField("i32"), GetField("i32")}), kNumRows, {i32, i32});
+  ASSERT_RAISES(Invalid, MakeExecBatch(*kBoringSchema, duplicated_names));
+}
+
 TEST(Expression, ToString) {
   EXPECT_EQ(field_ref("alpha").ToString(), "alpha");
 
@@ -445,21 +495,18 @@ TEST(Expression, BindFieldRef) {
   ExpectBindsTo(field_ref("i32"), no_change, &expr);
   EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
 
-  // if the field is not found, a null scalar will be emitted
-  ExpectBindsTo(field_ref("no such field"), no_change, &expr);
-  EXPECT_EQ(expr.descr(), ValueDescr::Scalar(null()));
+  // if the field is not found, an error will be raised
+  ASSERT_RAISES(Invalid, field_ref("no such field").Bind(*kBoringSchema));
 
   // referencing a field by name is not supported if that name is not unique
   // in the input schema
   ASSERT_RAISES(Invalid, field_ref("alpha").Bind(Schema(
                              {field("alpha", int32()), field("alpha", float32())})));
 
-  // referencing nested fields is supported
-  ASSERT_OK_AND_ASSIGN(expr,
-                       field_ref(FieldRef("a", "b"))
-                           .Bind(Schema({field("a", struct_({field("b", int32())}))})));
-  EXPECT_TRUE(expr.IsBound());
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  // referencing nested fields is not supported
+  ASSERT_RAISES(NotImplemented,
+                field_ref(FieldRef("a", "b"))
+                    .Bind(Schema({field("a", struct_({field("b", int32())}))})));
 }
 
 TEST(Expression, BindCall) {
@@ -525,7 +572,8 @@ TEST(Expression, ExecuteFieldRef) {
     auto expr = field_ref(ref);
 
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
-    ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, in));
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         ExecuteScalarExpression(expr, Schema(in.type()->fields()), in));
 
     AssertDatumsEqual(actual, expected, /*verbose=*/true);
   };
@@ -537,39 +585,45 @@ TEST(Expression, ExecuteFieldRef) {
   ])"),
               ArrayFromJSON(float64(), R"([6.125, 0.0, -1])"));
 
-  // more nested:
-  ExpectRefIs(FieldRef{"a", "a"},
-              ArrayFromJSON(struct_({field("a", struct_({field("a", float64())}))}), R"([
-    {"a": {"a": 6.125}},
-    {"a": {"a": 0.0}},
-    {"a": {"a": -1}}
+  ExpectRefIs("a",
+              ArrayFromJSON(struct_({
+                                field("a", float64()),
+                                field("b", float64()),
+                            }),
+                            R"([
+    {"a": 6.125, "b": 7.5},
+    {"a": 0.0,   "b": 2.125},
+    {"a": -1,    "b": 4.0}
   ])"),
               ArrayFromJSON(float64(), R"([6.125, 0.0, -1])"));
 
-  // absent fields are resolved as a null scalar:
-  ExpectRefIs(FieldRef{"b"}, ArrayFromJSON(struct_({field("a", float64())}), R"([
-    {"a": 6.125},
-    {"a": 0.0},
-    {"a": -1}
+  ExpectRefIs("b",
+              ArrayFromJSON(struct_({
+                                field("a", float64()),
+                                field("b", float64()),
+                            }),
+                            R"([
+    {"a": 6.125, "b": 7.5},
+    {"a": 0.0,   "b": 2.125},
+    {"a": -1,    "b": 4.0}
   ])"),
-              MakeNullScalar(null()));
-
-  // XXX this *should* fail in Bind but for now it will just error in
-  // ExecuteScalarExpression
-  ASSERT_OK_AND_ASSIGN(auto list_item, field_ref("item").Bind(list(int32())));
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("non-struct array"),
-      ExecuteScalarExpression(list_item,
-                              ArrayFromJSON(list(int32()), "[[1,2], [], null, [5]]")));
+              ArrayFromJSON(float64(), R"([7.5, 2.125, 4.0])"));
 }
 
 Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum& input) {
-  auto call = expr.call();
-  if (call == nullptr) {
-    // already tested execution of field_ref, execution of literal is trivial
-    return ExecuteScalarExpression(expr, input);
+  if (auto lit = expr.literal()) {
+    return *lit;
   }
 
+  if (auto ref = expr.field_ref()) {
+    if (input.type()) {
+      return ref->GetOneOrNone(*input.make_array());
+    }
+    return ref->GetOneOrNone(*input.record_batch());
+  }
+
+  auto call = CallNotNull(expr);
+
   std::vector<Datum> arguments(call->arguments.size());
   for (size_t i = 0; i < arguments.size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(arguments[i],
@@ -587,13 +641,16 @@ Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum&
 }
 
 void ExpectExecute(Expression expr, Datum in, Datum* actual_out = NULLPTR) {
+  std::shared_ptr<Schema> schm;
   if (in.is_value()) {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
+    schm = schema(in.type()->fields());
   } else {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.schema()));
+    schm = in.schema();
   }
 
-  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, in));
+  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(expr, *schm, in));
 
   ASSERT_OK_AND_ASSIGN(Datum expected, NaiveExecuteScalarExpression(expr, in));
 
@@ -653,9 +710,9 @@ TEST(Expression, ExecuteDictionaryTransparent) {
   ASSERT_OK_AND_ASSIGN(
       expr, SimplifyWithGuarantee(expr, equal(field_ref("dict_str"), literal("eh"))));
 
-  ASSERT_OK_AND_ASSIGN(
-      auto res,
-      ExecuteScalarExpression(expr, ArrayFromJSON(struct_({field("i32", int32())}), R"([
+  ASSERT_OK_AND_ASSIGN(auto res, ExecuteScalarExpression(
+                                     expr, *kBoringSchema,
+                                     ArrayFromJSON(struct_({field("i32", int32())}), R"([
     {"i32": 0},
     {"i32": 1},
     {"i32": 2}
@@ -773,7 +830,7 @@ TEST(Expression, ExtractKnownFieldValues) {
     void operator()(Expression guarantee,
                     std::unordered_map<FieldRef, Datum, FieldRef::Hash> expected) {
       ASSERT_OK_AND_ASSIGN(auto actual, ExtractKnownFieldValues(guarantee));
-      EXPECT_THAT(actual, UnorderedElementsAreArray(expected))
+      EXPECT_THAT(actual.map, UnorderedElementsAreArray(expected))
           << "  guarantee: " << guarantee.ToString();
     }
   } ExpectKnown;
@@ -825,8 +882,8 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
          Expression unbound_expected) {
         ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*kBoringSchema));
         ASSERT_OK_AND_ASSIGN(auto expected, unbound_expected.Bind(*kBoringSchema));
-        ASSERT_OK_AND_ASSIGN(auto replaced,
-                             ReplaceFieldsWithKnownValues(known_values, expr));
+        ASSERT_OK_AND_ASSIGN(auto replaced, ReplaceFieldsWithKnownValues(
+                                                KnownFieldValues{known_values}, expr));
 
         EXPECT_EQ(replaced, expected);
         ExpectIdenticalIfUnchanged(replaced, expr);
@@ -841,7 +898,7 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
   // NB: known_values will be cast
   ExpectReplacesTo(field_ref("i32"), {{"i32", Datum("3")}}, literal(3));
 
-  ExpectReplacesTo(field_ref("b"), i32_is_3, field_ref("b"));
+  ExpectReplacesTo(field_ref("f32"), i32_is_3, field_ref("f32"));
 
   ExpectReplacesTo(equal(field_ref("i32"), literal(1)), i32_is_3,
                    equal(literal(3), literal(1)));
@@ -886,13 +943,13 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
   Datum dict_i32{
       DictionaryScalar::Make(MakeScalar<int32_t>(0), ArrayFromJSON(int32(), R"([3])"))};
   // Unsupported cast dictionary(int32(), int32()) -> dictionary(int32(), utf8())
-  ASSERT_RAISES(NotImplemented,
-                ReplaceFieldsWithKnownValues({{"dict_str", dict_i32}}, expr));
+  ASSERT_RAISES(NotImplemented, ReplaceFieldsWithKnownValues(
+                                    KnownFieldValues{{{"dict_str", dict_i32}}}, expr));
   // Unsupported cast dictionary(int8(), utf8()) -> dictionary(int32(), utf8())
   dict_str = Datum{
       DictionaryScalar::Make(MakeScalar<int8_t>(0), ArrayFromJSON(utf8(), R"(["a"])"))};
-  ASSERT_RAISES(NotImplemented,
-                ReplaceFieldsWithKnownValues({{"dict_str", dict_str}}, expr));
+  ASSERT_RAISES(NotImplemented, ReplaceFieldsWithKnownValues(
+                                    KnownFieldValues{{{"dict_str", dict_str}}}, expr));
 }
 
 struct {
@@ -1082,7 +1139,8 @@ TEST(Expression, SingleComparisonGuarantees) {
                               {"i32"}));
 
         ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*kBoringSchema));
-        ASSERT_OK_AND_ASSIGN(Datum evaluated, ExecuteScalarExpression(filter, input));
+        ASSERT_OK_AND_ASSIGN(Datum evaluated,
+                             ExecuteScalarExpression(filter, *kBoringSchema, input));
 
         // ensure that the simplified filter is as simplified as it could be
         // (this is always possible for single comparisons)
@@ -1193,7 +1251,8 @@ TEST(Expression, Filter) {
     auto expected_mask = batch->column(0);
 
     ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*kBoringSchema));
-    ASSERT_OK_AND_ASSIGN(Datum mask, ExecuteScalarExpression(filter, batch));
+    ASSERT_OK_AND_ASSIGN(Datum mask,
+                         ExecuteScalarExpression(filter, *kBoringSchema, batch));
 
     AssertDatumsEqual(expected_mask, mask);
   };
@@ -1286,7 +1345,8 @@ TEST(Projection, AugmentWithNull) {
 
   auto ExpectProject = [&](Expression proj, Datum expected) {
     ASSERT_OK_AND_ASSIGN(proj, proj.Bind(*kBoringSchema));
-    ASSERT_OK_AND_ASSIGN(auto actual, ExecuteScalarExpression(proj, input));
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         ExecuteScalarExpression(proj, *kBoringSchema, input));
     AssertDatumsEqual(Datum(expected), actual);
   };
 
@@ -1316,7 +1376,8 @@ TEST(Projection, AugmentWithKnownValues) {
                                       Expression guarantee) {
     ASSERT_OK_AND_ASSIGN(proj, proj.Bind(*kBoringSchema));
     ASSERT_OK_AND_ASSIGN(proj, SimplifyWithGuarantee(proj, guarantee));
-    ASSERT_OK_AND_ASSIGN(auto actual, ExecuteScalarExpression(proj, input));
+    ASSERT_OK_AND_ASSIGN(auto actual,
+                         ExecuteScalarExpression(proj, *kBoringSchema, input));
     AssertDatumsEqual(Datum(expected), actual);
   };
 
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 86f1879cbe9..75b71f97535 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -15,34 +15,33 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gmock/gmock-matchers.h>
-
 #include <functional>
 #include <memory>
 
+#include <gmock/gmock-matchers.h>
+
+#include "arrow/compute/exec.h"
 #include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/expression.h"
 #include "arrow/compute/exec/test_util.h"
 #include "arrow/record_batch.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/thread_pool.h"
+#include "arrow/util/vector.h"
 
-namespace arrow {
+using testing::ElementsAre;
+using testing::HasSubstr;
+using testing::UnorderedElementsAreArray;
 
-using internal::Executor;
+namespace arrow {
 
 namespace compute {
 
-void AssertBatchesEqual(const RecordBatchVector& expected,
-                        const RecordBatchVector& actual) {
-  ASSERT_EQ(expected.size(), actual.size());
-  for (size_t i = 0; i < expected.size(); ++i) {
-    AssertBatchesEqual(*expected[i], *actual[i]);
-  }
-}
-
 TEST(ExecPlanConstruction, Empty) {
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
 
@@ -51,69 +50,49 @@ TEST(ExecPlanConstruction, Empty) {
 
 TEST(ExecPlanConstruction, SingleNode) {
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
-  auto node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/0, /*num_outputs=*/0);
+  auto node = MakeDummyNode(plan.get(), "dummy", /*inputs=*/{}, /*num_outputs=*/0);
   ASSERT_OK(plan->Validate());
-  ASSERT_THAT(plan->sources(), ::testing::ElementsAre(node));
-  ASSERT_THAT(plan->sinks(), ::testing::ElementsAre(node));
-
-  ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
-  node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/1, /*num_outputs=*/0);
-  // Input not bound
-  ASSERT_RAISES(Invalid, plan->Validate());
+  ASSERT_THAT(plan->sources(), ElementsAre(node));
+  ASSERT_THAT(plan->sinks(), ElementsAre(node));
 
   ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
-  node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/0, /*num_outputs=*/1);
+  node = MakeDummyNode(plan.get(), "dummy", /*inputs=*/{}, /*num_outputs=*/1);
   // Output not bound
   ASSERT_RAISES(Invalid, plan->Validate());
 }
 
 TEST(ExecPlanConstruction, SourceSink) {
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
-  auto source = MakeDummyNode(plan.get(), "source", /*num_inputs=*/0, /*num_outputs=*/1);
-  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0);
-  // Input / output not bound
-  ASSERT_RAISES(Invalid, plan->Validate());
+  auto source = MakeDummyNode(plan.get(), "source", /*inputs=*/{}, /*num_outputs=*/1);
+  auto sink = MakeDummyNode(plan.get(), "sink", /*inputs=*/{source}, /*num_outputs=*/0);
 
-  sink->AddInput(source);
   ASSERT_OK(plan->Validate());
-  EXPECT_THAT(plan->sources(), ::testing::ElementsAre(source));
-  EXPECT_THAT(plan->sinks(), ::testing::ElementsAre(sink));
+  EXPECT_THAT(plan->sources(), ElementsAre(source));
+  EXPECT_THAT(plan->sinks(), ElementsAre(sink));
 }
 
 TEST(ExecPlanConstruction, MultipleNode) {
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
 
-  auto source1 =
-      MakeDummyNode(plan.get(), "source1", /*num_inputs=*/0, /*num_outputs=*/2);
+  auto source1 = MakeDummyNode(plan.get(), "source1", /*inputs=*/{}, /*num_outputs=*/2);
 
-  auto source2 =
-      MakeDummyNode(plan.get(), "source2", /*num_inputs=*/0, /*num_outputs=*/1);
+  auto source2 = MakeDummyNode(plan.get(), "source2", /*inputs=*/{}, /*num_outputs=*/1);
 
   auto process1 =
-      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/1, /*num_outputs=*/2);
+      MakeDummyNode(plan.get(), "process1", /*inputs=*/{source1}, /*num_outputs=*/2);
 
-  auto process2 =
-      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/2, /*num_outputs=*/1);
+  auto process2 = MakeDummyNode(plan.get(), "process1", /*inputs=*/{source1, source2},
+                                /*num_outputs=*/1);
 
   auto process3 =
-      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/3, /*num_outputs=*/1);
-
-  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0);
-
-  sink->AddInput(process3);
-
-  process3->AddInput(process1);
-  process3->AddInput(process2);
-  process3->AddInput(process1);
-
-  process2->AddInput(source1);
-  process2->AddInput(source2);
+      MakeDummyNode(plan.get(), "process3", /*inputs=*/{process1, process2, process1},
+                    /*num_outputs=*/1);
 
-  process1->AddInput(source1);
+  auto sink = MakeDummyNode(plan.get(), "sink", /*inputs=*/{process3}, /*num_outputs=*/0);
 
   ASSERT_OK(plan->Validate());
-  ASSERT_THAT(plan->sources(), ::testing::ElementsAre(source1, source2));
-  ASSERT_THAT(plan->sinks(), ::testing::ElementsAre(sink));
+  ASSERT_THAT(plan->sources(), ElementsAre(source1, source2));
+  ASSERT_THAT(plan->sinks(), ElementsAre(sink));
 }
 
 struct StartStopTracker {
@@ -135,30 +114,27 @@ TEST(ExecPlan, DummyStartProducing) {
   StartStopTracker t;
 
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
-  auto source1 = MakeDummyNode(plan.get(), "source1", /*num_inputs=*/0, /*num_outputs=*/2,
+
+  auto source1 = MakeDummyNode(plan.get(), "source1", /*inputs=*/{}, /*num_outputs=*/2,
                                t.start_producing_func(), t.stop_producing_func());
-  auto source2 = MakeDummyNode(plan.get(), "source2", /*num_inputs=*/0, /*num_outputs=*/1,
+
+  auto source2 = MakeDummyNode(plan.get(), "source2", /*inputs=*/{}, /*num_outputs=*/1,
                                t.start_producing_func(), t.stop_producing_func());
+
   auto process1 =
-      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/1, /*num_outputs=*/2,
+      MakeDummyNode(plan.get(), "process1", /*inputs=*/{source1}, /*num_outputs=*/2,
                     t.start_producing_func(), t.stop_producing_func());
+
   auto process2 =
-      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/2, /*num_outputs=*/1,
-                    t.start_producing_func(), t.stop_producing_func());
-  auto process3 =
-      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/3, /*num_outputs=*/1,
-                    t.start_producing_func(), t.stop_producing_func());
+      MakeDummyNode(plan.get(), "process2", /*inputs=*/{process1, source2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
 
-  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0,
-                            t.start_producing_func(), t.stop_producing_func());
+  auto process3 =
+      MakeDummyNode(plan.get(), "process3", /*inputs=*/{process1, source1, process2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
 
-  process1->AddInput(source1);
-  process2->AddInput(process1);
-  process2->AddInput(source2);
-  process3->AddInput(process1);
-  process3->AddInput(source1);
-  process3->AddInput(process2);
-  sink->AddInput(process3);
+  MakeDummyNode(plan.get(), "sink", /*inputs=*/{process3}, /*num_outputs=*/0,
+                t.start_producing_func(), t.stop_producing_func());
 
   ASSERT_OK(plan->Validate());
   ASSERT_EQ(t.started.size(), 0);
@@ -166,68 +142,37 @@ TEST(ExecPlan, DummyStartProducing) {
 
   ASSERT_OK(plan->StartProducing());
   // Note that any correct reverse topological order may do
-  ASSERT_THAT(t.started, ::testing::ElementsAre("sink", "process3", "process2",
-                                                "process1", "source2", "source1"));
+  ASSERT_THAT(t.started, ElementsAre("sink", "process3", "process2", "process1",
+                                     "source2", "source1"));
   ASSERT_EQ(t.stopped.size(), 0);
 }
 
-TEST(ExecPlan, DummyStartProducingCycle) {
-  // A trivial cycle
-  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
-  auto node = MakeDummyNode(plan.get(), "dummy", /*num_inputs=*/1, /*num_outputs=*/1);
-  node->AddInput(node);
-  ASSERT_OK(plan->Validate());
-  ASSERT_RAISES(Invalid, plan->StartProducing());
-
-  // A less trivial one
-  ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
-  auto source = MakeDummyNode(plan.get(), "source", /*num_inputs=*/0, /*num_outputs=*/1);
-  auto process1 =
-      MakeDummyNode(plan.get(), "process1", /*num_inputs=*/2, /*num_outputs=*/2);
-  auto process2 =
-      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/1, /*num_outputs=*/1);
-  auto process3 =
-      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/2, /*num_outputs=*/2);
-  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0);
-
-  process1->AddInput(source);
-  process2->AddInput(process1);
-  process3->AddInput(process2);
-  process3->AddInput(process1);
-  process1->AddInput(process3);
-  sink->AddInput(process3);
-
-  ASSERT_OK(plan->Validate());
-  ASSERT_RAISES(Invalid, plan->StartProducing());
-}
-
 TEST(ExecPlan, DummyStartProducingError) {
   StartStopTracker t;
 
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
-  auto source1 = MakeDummyNode(plan.get(), "source1", /*num_inputs=*/0, /*num_outputs=*/2,
-                               t.start_producing_func(Status::NotImplemented("zzz")),
-                               t.stop_producing_func());
-  auto source2 = MakeDummyNode(plan.get(), "source2", /*num_inputs=*/0, /*num_outputs=*/1,
-                               t.start_producing_func(), t.stop_producing_func());
+  auto source1 = MakeDummyNode(
+      plan.get(), "source1", /*num_inputs=*/{}, /*num_outputs=*/2,
+      t.start_producing_func(Status::NotImplemented("zzz")), t.stop_producing_func());
+
+  auto source2 =
+      MakeDummyNode(plan.get(), "source2", /*num_inputs=*/{}, /*num_outputs=*/1,
+                    t.start_producing_func(), t.stop_producing_func());
+
   auto process1 = MakeDummyNode(
-      plan.get(), "process1", /*num_inputs=*/1, /*num_outputs=*/2,
+      plan.get(), "process1", /*num_inputs=*/{source1}, /*num_outputs=*/2,
       t.start_producing_func(Status::IOError("xxx")), t.stop_producing_func());
+
   auto process2 =
-      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/2, /*num_outputs=*/1,
-                    t.start_producing_func(), t.stop_producing_func());
-  process1->AddInput(source1);
-  process2->AddInput(process1);
-  process2->AddInput(source2);
+      MakeDummyNode(plan.get(), "process2", /*num_inputs=*/{process1, source2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
+
   auto process3 =
-      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/3, /*num_outputs=*/1,
-                    t.start_producing_func(), t.stop_producing_func());
-  process3->AddInput(process1);
-  process3->AddInput(source1);
-  process3->AddInput(process2);
-  auto sink = MakeDummyNode(plan.get(), "sink", /*num_inputs=*/1, /*num_outputs=*/0,
-                            t.start_producing_func(), t.stop_producing_func());
-  sink->AddInput(process3);
+      MakeDummyNode(plan.get(), "process3", /*num_inputs=*/{process1, source1, process2},
+                    /*num_outputs=*/1, t.start_producing_func(), t.stop_producing_func());
+
+  MakeDummyNode(plan.get(), "sink", /*num_inputs=*/{process3}, /*num_outputs=*/0,
+                t.start_producing_func(), t.stop_producing_func());
 
   ASSERT_OK(plan->Validate());
   ASSERT_EQ(t.started.size(), 0);
@@ -235,165 +180,206 @@ TEST(ExecPlan, DummyStartProducingError) {
 
   // `process1` raises IOError
   ASSERT_RAISES(IOError, plan->StartProducing());
-  ASSERT_THAT(t.started,
-              ::testing::ElementsAre("sink", "process3", "process2", "process1"));
+  ASSERT_THAT(t.started, ElementsAre("sink", "process3", "process2", "process1"));
   // Nodes that started successfully were stopped in reverse order
-  ASSERT_THAT(t.stopped, ::testing::ElementsAre("process2", "process3", "sink"));
+  ASSERT_THAT(t.stopped, ElementsAre("process2", "process3", "sink"));
 }
 
-// TODO move this to gtest_util.h?
+namespace {
 
-class SlowRecordBatchReader : public RecordBatchReader {
- public:
-  explicit SlowRecordBatchReader(std::shared_ptr<RecordBatchReader> reader)
-      : reader_(std::move(reader)) {}
+struct BatchesWithSchema {
+  std::vector<ExecBatch> batches;
+  std::shared_ptr<Schema> schema;
+};
+
+Result<ExecNode*> MakeTestSourceNode(ExecPlan* plan, std::string label,
+                                     BatchesWithSchema batches_with_schema, bool parallel,
+                                     bool slow) {
+  DCHECK_GT(batches_with_schema.batches.size(), 0);
+
+  auto opt_batches = internal::MapVector(
+      [](ExecBatch batch) { return util::make_optional(std::move(batch)); },
+      std::move(batches_with_schema.batches));
 
-  std::shared_ptr<Schema> schema() const override { return reader_->schema(); }
+  AsyncGenerator<util::optional<ExecBatch>> gen;
 
-  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
-    SleepABit();
-    return reader_->ReadNext(batch);
+  if (parallel) {
+    // emulate batches completing initial decode-after-scan on a cpu thread
+    ARROW_ASSIGN_OR_RAISE(
+        gen, MakeBackgroundGenerator(MakeVectorIterator(std::move(opt_batches)),
+                                     internal::GetCpuThreadPool()));
+
+    // ensure that callbacks are not executed immediately on a background thread
+    gen = MakeTransferredGenerator(std::move(gen), internal::GetCpuThreadPool());
+  } else {
+    gen = MakeVectorGenerator(std::move(opt_batches));
   }
 
-  static Result<std::shared_ptr<RecordBatchReader>> Make(
-      RecordBatchVector batches, std::shared_ptr<Schema> schema = nullptr) {
-    ARROW_ASSIGN_OR_RAISE(auto reader,
-                          RecordBatchReader::Make(std::move(batches), std::move(schema)));
-    return std::make_shared<SlowRecordBatchReader>(std::move(reader));
+  if (slow) {
+    gen = MakeMappedGenerator(std::move(gen), [](const util::optional<ExecBatch>& batch) {
+      SleepABit();
+      return batch;
+    });
   }
 
- protected:
-  std::shared_ptr<RecordBatchReader> reader_;
-};
+  return MakeSourceNode(plan, label, std::move(batches_with_schema.schema),
+                        std::move(gen));
+}
 
-static Result<RecordBatchGenerator> MakeSlowRecordBatchGenerator(
-    RecordBatchVector batches, std::shared_ptr<Schema> schema) {
-  auto gen = MakeVectorGenerator(batches);
-  // TODO move this into testing/async_generator_util.h?
-  auto delayed_gen = MakeMappedGenerator<std::shared_ptr<RecordBatch>>(
-      std::move(gen), [](const std::shared_ptr<RecordBatch>& batch) {
-        auto fut = Future<std::shared_ptr<RecordBatch>>::Make();
-        SleepABitAsync().AddCallback(
-            [fut, batch](const Status& status) mutable { fut.MarkFinished(batch); });
-        return fut;
-      });
-  // Adding readahead implicitly adds parallelism by pulling reentrantly from
-  // the delayed generator
-  return MakeReadaheadGenerator(std::move(delayed_gen), /*max_readahead=*/64);
+Result<std::vector<ExecBatch>> StartAndCollect(
+    ExecPlan* plan, AsyncGenerator<util::optional<ExecBatch>> gen) {
+  RETURN_NOT_OK(plan->Validate());
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto maybe_collected = CollectAsyncGenerator(gen).result();
+  ARROW_ASSIGN_OR_RAISE(auto collected, maybe_collected);
+
+  plan->StopProducing();
+
+  return internal::MapVector(
+      [](util::optional<ExecBatch> batch) { return std::move(*batch); }, collected);
 }
 
-class TestExecPlanExecution : public ::testing::Test {
- public:
-  void SetUp() override {
-    ASSERT_OK_AND_ASSIGN(io_executor_, internal::ThreadPool::Make(8));
+BatchesWithSchema MakeBasicBatches() {
+  BatchesWithSchema out;
+  out.batches = {
+      ExecBatchFromJSON({int32(), boolean()}, "[[null, true], [4, false]]"),
+      ExecBatchFromJSON({int32(), boolean()}, "[[5, null], [6, false], [7, false]]")};
+  out.schema = schema({field("i32", int32()), field("bool", boolean())});
+  return out;
+}
+
+BatchesWithSchema MakeRandomBatches(const std::shared_ptr<Schema>& schema,
+                                    int num_batches = 10, int batch_size = 4) {
+  BatchesWithSchema out;
+
+  random::RandomArrayGenerator rng(42);
+  out.batches.resize(num_batches);
+
+  for (int i = 0; i < num_batches; ++i) {
+    out.batches[i] = ExecBatch(*rng.BatchOf(schema->fields(), batch_size));
+    // add a tag scalar to ensure the batches are unique
+    out.batches[i].values.emplace_back(i);
   }
+  return out;
+}
+}  // namespace
+
+TEST(ExecPlanExecution, SourceSink) {
+  for (bool slow : {false, true}) {
+    SCOPED_TRACE(slow ? "slowed" : "unslowed");
+
+    for (bool parallel : {false, true}) {
+      SCOPED_TRACE(parallel ? "parallel" : "single threaded");
+
+      ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+      auto basic_data = MakeBasicBatches();
 
-  RecordBatchVector MakeRandomBatches(const std::shared_ptr<Schema>& schema,
-                                      int num_batches = 10, int batch_size = 4) {
-    random::RandomArrayGenerator rng(42);
-    RecordBatchVector batches;
-    batches.reserve(num_batches);
-    for (int i = 0; i < num_batches; ++i) {
-      batches.push_back(rng.BatchOf(schema->fields(), batch_size));
+      ASSERT_OK_AND_ASSIGN(auto source, MakeTestSourceNode(plan.get(), "source",
+                                                           basic_data, parallel, slow));
+
+      auto sink_gen = MakeSinkNode(source, "sink");
+
+      ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                  ResultWith(UnorderedElementsAreArray(basic_data.batches)));
     }
-    return batches;
   }
+}
 
-  struct CollectorPlan {
-    std::shared_ptr<ExecPlan> plan;
-    RecordBatchCollectNode* sink;
+TEST(ExecPlanExecution, SourceSinkError) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+  auto it = basic_data.batches.begin();
+  AsyncGenerator<util::optional<ExecBatch>> gen =
+      [&]() -> Result<util::optional<ExecBatch>> {
+    if (it == basic_data.batches.end()) {
+      return Status::Invalid("Artificial error");
+    }
+    return util::make_optional(*it++);
   };
 
-  Result<CollectorPlan> MakeSourceSink(std::shared_ptr<RecordBatchReader> reader,
-                                       const std::shared_ptr<Schema>& schema) {
-    ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make());
-    auto source =
-        MakeRecordBatchReaderNode(plan.get(), "source", reader, io_executor_.get());
-    auto sink = MakeRecordBatchCollectNode(plan.get(), "sink", schema);
-    sink->AddInput(source);
-    return CollectorPlan{plan, sink};
-  }
+  auto source = MakeSourceNode(plan.get(), "source", {}, gen);
+  auto sink_gen = MakeSinkNode(source, "sink");
 
-  Result<CollectorPlan> MakeSourceSink(RecordBatchGenerator generator,
-                                       const std::shared_ptr<Schema>& schema) {
-    ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make());
-    auto source = MakeRecordBatchReaderNode(plan.get(), "source", schema, generator,
-                                            io_executor_.get());
-    auto sink = MakeRecordBatchCollectNode(plan.get(), "sink", schema);
-    sink->AddInput(source);
-    return CollectorPlan{plan, sink};
-  }
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Raises(StatusCode::Invalid, HasSubstr("Artificial")));
+}
 
-  Result<CollectorPlan> MakeSourceSink(const RecordBatchVector& batches,
-                                       const std::shared_ptr<Schema>& schema) {
-    ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchReader::Make(batches, schema));
-    return MakeSourceSink(std::move(reader), schema);
-  }
+TEST(ExecPlanExecution, StressSourceSink) {
+  for (bool slow : {false, true}) {
+    SCOPED_TRACE(slow ? "slowed" : "unslowed");
 
-  Result<RecordBatchVector> StartAndCollect(ExecPlan* plan,
-                                            RecordBatchCollectNode* sink) {
-    RETURN_NOT_OK(plan->StartProducing());
-    auto fut = CollectAsyncGenerator(sink->generator());
-    return fut.result();
-  }
+    for (bool parallel : {false, true}) {
+      SCOPED_TRACE(parallel ? "parallel" : "single threaded");
 
-  template <typename RecordBatchReaderFactory>
-  void TestSourceSink(RecordBatchReaderFactory reader_factory) {
-    auto schema = ::arrow::schema({field("a", int32()), field("b", boolean())});
-    RecordBatchVector batches{
-        RecordBatchFromJSON(schema, R"([{"a": null, "b": true},
-                                        {"a": 4,    "b": false}])"),
-        RecordBatchFromJSON(schema, R"([{"a": 5,    "b": null},
-                                        {"a": 6,    "b": false},
-                                        {"a": 7,    "b": false}])"),
-    };
+      int num_batches = slow && !parallel ? 30 : 300;
 
-    ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(batches, schema));
-    ASSERT_OK_AND_ASSIGN(auto cp, MakeSourceSink(reader, schema));
-    ASSERT_OK(cp.plan->Validate());
+      ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
 
-    ASSERT_OK_AND_ASSIGN(auto got_batches, StartAndCollect(cp.plan.get(), cp.sink));
-    AssertBatchesEqual(batches, got_batches);
-  }
+      auto random_data = MakeRandomBatches(
+          schema({field("a", int32()), field("b", boolean())}), num_batches);
 
-  template <typename RecordBatchReaderFactory>
-  void TestStressSourceSink(int num_batches, RecordBatchReaderFactory batch_factory) {
-    auto schema = ::arrow::schema({field("a", int32()), field("b", boolean())});
-    auto batches = MakeRandomBatches(schema, num_batches);
+      ASSERT_OK_AND_ASSIGN(auto source, MakeTestSourceNode(plan.get(), "source",
+                                                           random_data, parallel, slow));
 
-    ASSERT_OK_AND_ASSIGN(auto reader, batch_factory(batches, schema));
-    ASSERT_OK_AND_ASSIGN(auto cp, MakeSourceSink(reader, schema));
-    ASSERT_OK(cp.plan->Validate());
+      auto sink_gen = MakeSinkNode(source, "sink");
 
-    ASSERT_OK_AND_ASSIGN(auto got_batches, StartAndCollect(cp.plan.get(), cp.sink));
-    AssertBatchesEqual(batches, got_batches);
+      ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                  ResultWith(UnorderedElementsAreArray(random_data.batches)));
+    }
   }
+}
 
- protected:
-  std::shared_ptr<Executor> io_executor_;
-};
+TEST(ExecPlanExecution, SourceFilterSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
 
-TEST_F(TestExecPlanExecution, SourceSink) { TestSourceSink(RecordBatchReader::Make); }
+  auto basic_data = MakeBasicBatches();
 
-TEST_F(TestExecPlanExecution, SlowSourceSink) {
-  TestSourceSink(SlowRecordBatchReader::Make);
-}
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
 
-TEST_F(TestExecPlanExecution, SlowSourceSinkParallel) {
-  TestSourceSink(MakeSlowRecordBatchGenerator);
-}
+  ASSERT_OK_AND_ASSIGN(auto predicate,
+                       equal(field_ref("i32"), literal(6)).Bind(*basic_data.schema));
 
-TEST_F(TestExecPlanExecution, StressSourceSink) {
-  TestStressSourceSink(/*num_batches=*/200, RecordBatchReader::Make);
-}
+  ASSERT_OK_AND_ASSIGN(auto filter, MakeFilterNode(source, "filter", predicate));
+
+  auto sink_gen = MakeSinkNode(filter, "sink");
 
-TEST_F(TestExecPlanExecution, StressSlowSourceSink) {
-  // This doesn't create parallelism as the RecordBatchReader is iterated serially.
-  TestStressSourceSink(/*num_batches=*/30, SlowRecordBatchReader::Make);
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              ResultWith(UnorderedElementsAreArray(
+                  {ExecBatchFromJSON({int32(), boolean()}, "[]"),
+                   ExecBatchFromJSON({int32(), boolean()}, "[[6, false]]")})));
 }
 
-TEST_F(TestExecPlanExecution, StressSlowSourceSinkParallel) {
-  TestStressSourceSink(/*num_batches=*/300, MakeSlowRecordBatchGenerator);
+TEST(ExecPlanExecution, SourceProjectSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  std::vector<Expression> exprs{
+      not_(field_ref("bool")),
+      call("add", {field_ref("i32"), literal(1)}),
+  };
+  for (auto& expr : exprs) {
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*basic_data.schema));
+  }
+
+  ASSERT_OK_AND_ASSIGN(auto projection, MakeProjectNode(source, "project", exprs));
+
+  auto sink_gen = MakeSinkNode(projection, "sink");
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              ResultWith(UnorderedElementsAreArray(
+                  {ExecBatchFromJSON({boolean(), int32()}, "[[false, null], [true, 5]]"),
+                   ExecBatchFromJSON({boolean(), int32()},
+                                     "[[null, 6], [true, 7], [true, 8]]")})));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index ae2c9446aa9..6fbfa2a430c 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -33,11 +33,13 @@
 #include "arrow/compute/exec/exec_plan.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 
@@ -46,31 +48,22 @@ using internal::Executor;
 namespace compute {
 namespace {
 
-// TODO expose this as `static ValueDescr::FromSchemaColumns`?
-std::vector<ValueDescr> DescrFromSchemaColumns(const Schema& schema) {
-  std::vector<ValueDescr> descr(schema.num_fields());
-  std::transform(schema.fields().begin(), schema.fields().end(), descr.begin(),
-                 [](const std::shared_ptr<Field>& field) {
-                   return ValueDescr::Array(field->type());
-                 });
-  return descr;
-}
-
 struct DummyNode : ExecNode {
-  DummyNode(ExecPlan* plan, std::string label, int num_inputs, int num_outputs,
+  DummyNode(ExecPlan* plan, std::string label, NodeVector inputs, int num_outputs,
             StartProducingFunc start_producing, StopProducingFunc stop_producing)
-      : ExecNode(plan, std::move(label), std::vector<BatchDescr>(num_inputs, descr()), {},
-                 descr(), num_outputs),
+      : ExecNode(plan, std::move(label), std::move(inputs), {}, dummy_schema(),
+                 num_outputs),
         start_producing_(std::move(start_producing)),
         stop_producing_(std::move(stop_producing)) {
-    for (int i = 0; i < num_inputs; ++i) {
-      input_labels_.push_back(std::to_string(i));
+    input_labels_.resize(inputs_.size());
+    for (size_t i = 0; i < input_labels_.size(); ++i) {
+      input_labels_[i] = std::to_string(i);
     }
   }
 
   const char* kind_name() override { return "Dummy"; }
 
-  void InputReceived(ExecNode* input, int seq_num, compute::ExecBatch batch) override {}
+  void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {}
 
   void ErrorReceived(ExecNode* input, Status error) override {}
 
@@ -117,283 +110,45 @@ struct DummyNode : ExecNode {
     ASSERT_NE(std::find(outputs_.begin(), outputs_.end(), output), outputs_.end());
   }
 
-  BatchDescr descr() const { return std::vector<ValueDescr>{ValueDescr(null())}; }
+  std::shared_ptr<Schema> dummy_schema() const {
+    return schema({field("dummy", null())});
+  }
 
   StartProducingFunc start_producing_;
   StopProducingFunc stop_producing_;
   bool started_ = false;
 };
 
-struct RecordBatchReaderNode : ExecNode {
-  RecordBatchReaderNode(ExecPlan* plan, std::string label,
-                        std::shared_ptr<RecordBatchReader> reader, Executor* io_executor)
-      : ExecNode(plan, std::move(label), {}, {},
-                 DescrFromSchemaColumns(*reader->schema()), /*num_outputs=*/1),
-        schema_(reader->schema()),
-        reader_(std::move(reader)),
-        io_executor_(io_executor) {}
-
-  RecordBatchReaderNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> schema,
-                        RecordBatchGenerator generator, Executor* io_executor)
-      : ExecNode(plan, std::move(label), {}, {}, DescrFromSchemaColumns(*schema),
-                 /*num_outputs=*/1),
-        schema_(std::move(schema)),
-        generator_(std::move(generator)),
-        io_executor_(io_executor) {}
-
-  const char* kind_name() override { return "RecordBatchReader"; }
-
-  void InputReceived(ExecNode* input, int seq_num, compute::ExecBatch batch) override {}
-
-  void ErrorReceived(ExecNode* input, Status error) override {}
-
-  void InputFinished(ExecNode* input, int seq_stop) override {}
-
-  Status StartProducing() override {
-    next_batch_index_ = 0;
-    if (!generator_) {
-      auto it = MakeIteratorFromReader(reader_);
-      ARROW_ASSIGN_OR_RAISE(generator_,
-                            MakeBackgroundGenerator(std::move(it), io_executor_));
-    }
-    GenerateOne(std::unique_lock<std::mutex>{mutex_});
-    return Status::OK();
-  }
-
-  void PauseProducing(ExecNode* output) override {}
-
-  void ResumeProducing(ExecNode* output) override {}
-
-  void StopProducing(ExecNode* output) override {
-    ASSERT_EQ(output, outputs_[0]);
-    std::unique_lock<std::mutex> lock(mutex_);
-    generator_ = nullptr;  // null function
-  }
-
-  void StopProducing() override { StopProducing(outputs_[0]); }
-
- private:
-  void GenerateOne(std::unique_lock<std::mutex>&& lock) {
-    if (!generator_) {
-      // Stopped
-      return;
-    }
-    auto plan = this->plan()->shared_from_this();
-    auto fut = generator_();
-    const auto batch_index = next_batch_index_++;
-
-    lock.unlock();
-    // TODO we want to transfer always here
-    io_executor_->Transfer(std::move(fut))
-        .AddCallback(
-            [plan, batch_index, this](const Result<std::shared_ptr<RecordBatch>>& res) {
-              std::unique_lock<std::mutex> lock(mutex_);
-              if (!res.ok()) {
-                for (auto out : outputs_) {
-                  out->ErrorReceived(this, res.status());
-                }
-                return;
-              }
-              const auto& batch = *res;
-              if (IsIterationEnd(batch)) {
-                lock.unlock();
-                for (auto out : outputs_) {
-                  out->InputFinished(this, batch_index);
-                }
-              } else {
-                lock.unlock();
-                for (auto out : outputs_) {
-                  out->InputReceived(this, batch_index, compute::ExecBatch(*batch));
-                }
-                lock.lock();
-                GenerateOne(std::move(lock));
-              }
-            });
-  }
-
-  std::mutex mutex_;
-  const std::shared_ptr<Schema> schema_;
-  const std::shared_ptr<RecordBatchReader> reader_;
-  RecordBatchGenerator generator_;
-  int next_batch_index_;
-
-  Executor* const io_executor_;
-};
-
-struct RecordBatchCollectNodeImpl : public RecordBatchCollectNode {
-  RecordBatchCollectNodeImpl(ExecPlan* plan, std::string label,
-                             std::shared_ptr<Schema> schema)
-      : RecordBatchCollectNode(plan, std::move(label), {DescrFromSchemaColumns(*schema)},
-                               {"batches_to_collect"}, {}, 0),
-        schema_(std::move(schema)) {}
-
-  RecordBatchGenerator generator() override { return generator_; }
-
-  const char* kind_name() override { return "RecordBatchReader"; }
-
-  Status StartProducing() override {
-    num_received_ = 0;
-    num_emitted_ = 0;
-    emit_stop_ = -1;
-    stopped_ = false;
-    producer_.emplace(generator_.producer());
-    return Status::OK();
-  }
-
-  // sink nodes have no outputs from which to feel backpressure
-  void ResumeProducing(ExecNode* output) override {
-    FAIL() << "no outputs; this should never be called";
-  }
-  void PauseProducing(ExecNode* output) override {
-    FAIL() << "no outputs; this should never be called";
-  }
-  void StopProducing(ExecNode* output) override {
-    FAIL() << "no outputs; this should never be called";
-  }
-
-  void StopProducing() override {
-    std::unique_lock<std::mutex> lock(mutex_);
-    StopProducingUnlocked();
-  }
-
-  void InputReceived(ExecNode* input, int seq_num,
-                     compute::ExecBatch exec_batch) override {
-    std::unique_lock<std::mutex> lock(mutex_);
-    if (stopped_) {
-      return;
-    }
-    auto maybe_batch = MakeBatch(std::move(exec_batch));
-    if (!maybe_batch.ok()) {
-      lock.unlock();
-      producer_->Push(std::move(maybe_batch));
-      return;
-    }
-
-    // TODO would be nice to factor this out in a ReorderQueue
-    auto batch = *std::move(maybe_batch);
-    if (seq_num <= static_cast<int>(received_batches_.size())) {
-      received_batches_.resize(seq_num + 1, nullptr);
-    }
-    DCHECK_EQ(received_batches_[seq_num], nullptr);
-    received_batches_[seq_num] = std::move(batch);
-    ++num_received_;
-
-    if (seq_num != num_emitted_) {
-      // Cannot emit yet as there is a hole at `num_emitted_`
-      DCHECK_GT(seq_num, num_emitted_);
-      DCHECK_EQ(received_batches_[num_emitted_], nullptr);
-      return;
-    }
-    if (num_received_ == emit_stop_) {
-      StopProducingUnlocked();
-    }
-
-    // Emit batches in order as far as possible
-    // First collect these batches, then unlock before producing.
-    const auto seq_start = seq_num;
-    while (seq_num < static_cast<int>(received_batches_.size()) &&
-           received_batches_[seq_num] != nullptr) {
-      ++seq_num;
-    }
-    DCHECK_GT(seq_num, seq_start);
-    // By moving the values now, we make sure another thread won't emit the same values
-    // below
-    RecordBatchVector to_emit(
-        std::make_move_iterator(received_batches_.begin() + seq_start),
-        std::make_move_iterator(received_batches_.begin() + seq_num));
-
-    lock.unlock();
-    for (auto&& batch : to_emit) {
-      producer_->Push(std::move(batch));
-    }
-    lock.lock();
-
-    DCHECK_EQ(seq_start, num_emitted_);  // num_emitted_ wasn't bumped in the meantime
-    num_emitted_ = seq_num;
-  }
+}  // namespace
 
-  void ErrorReceived(ExecNode* input, Status error) override {
-    // XXX do we care about properly sequencing the error?
-    producer_->Push(std::move(error));
-    std::unique_lock<std::mutex> lock(mutex_);
-    StopProducingUnlocked();
-  }
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*> inputs,
+                        int num_outputs, StartProducingFunc start_producing,
+                        StopProducingFunc stop_producing) {
+  return plan->EmplaceNode<DummyNode>(plan, std::move(label), std::move(inputs),
+                                      num_outputs, std::move(start_producing),
+                                      std::move(stop_producing));
+}
 
-  void InputFinished(ExecNode* input, int seq_stop) override {
-    std::unique_lock<std::mutex> lock(mutex_);
-    DCHECK_GE(seq_stop, static_cast<int>(received_batches_.size()));
-    received_batches_.reserve(seq_stop);
-    emit_stop_ = seq_stop;
-    if (emit_stop_ == num_received_) {
-      DCHECK_EQ(emit_stop_, num_emitted_);
-      StopProducingUnlocked();
-    }
-  }
+ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+                            util::string_view json) {
+  auto fields = internal::MapVector(
+      [](const ValueDescr& descr) { return field("", descr.type); }, descrs);
 
- private:
-  void StopProducingUnlocked() {
-    if (!stopped_) {
-      stopped_ = true;
-      producer_->Close();
-      inputs_[0]->StopProducing(this);
-    }
-  }
+  ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
 
-  // TODO factor this out as ExecBatch::ToRecordBatch()?
-  Result<std::shared_ptr<RecordBatch>> MakeBatch(compute::ExecBatch&& exec_batch) {
-    ArrayDataVector columns;
-    columns.reserve(exec_batch.values.size());
-    for (auto&& value : exec_batch.values) {
-      if (!value.is_array()) {
-        return Status::TypeError("Expected array input");
+  auto value_it = batch.values.begin();
+  for (const auto& descr : descrs) {
+    if (descr.shape == ValueDescr::SCALAR) {
+      if (batch.length == 0) {
+        *value_it = MakeNullScalar(value_it->type());
+      } else {
+        *value_it = value_it->make_array()->GetScalar(0).ValueOrDie();
       }
-      columns.push_back(std::move(value).array());
     }
-    return RecordBatch::Make(schema_, exec_batch.length, std::move(columns));
+    ++value_it;
   }
 
-  const std::shared_ptr<Schema> schema_;
-
-  std::mutex mutex_;
-  RecordBatchVector received_batches_;
-  int num_received_;
-  int num_emitted_;
-  int emit_stop_;
-  bool stopped_;
-
-  PushGenerator<std::shared_ptr<RecordBatch>> generator_;
-  util::optional<PushGenerator<std::shared_ptr<RecordBatch>>::Producer> producer_;
-};
-
-}  // namespace
-
-ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
-                                    std::shared_ptr<RecordBatchReader> reader,
-                                    Executor* io_executor) {
-  return plan->EmplaceNode<RecordBatchReaderNode>(plan, std::move(label),
-                                                  std::move(reader), io_executor);
-}
-
-ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
-                                    std::shared_ptr<Schema> schema,
-                                    RecordBatchGenerator generator,
-                                    ::arrow::internal::Executor* io_executor) {
-  return plan->EmplaceNode<RecordBatchReaderNode>(
-      plan, std::move(label), std::move(schema), std::move(generator), io_executor);
-}
-
-ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, int num_inputs,
-                        int num_outputs, StartProducingFunc start_producing,
-                        StopProducingFunc stop_producing) {
-  return plan->EmplaceNode<DummyNode>(plan, std::move(label), num_inputs, num_outputs,
-                                      std::move(start_producing),
-                                      std::move(stop_producing));
-}
-
-RecordBatchCollectNode* MakeRecordBatchCollectNode(
-    ExecPlan* plan, std::string label, const std::shared_ptr<Schema>& schema) {
-  return arrow::internal::checked_cast<RecordBatchCollectNode*>(
-      plan->EmplaceNode<RecordBatchCollectNodeImpl>(plan, std::move(label), schema));
+  return batch;
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h
index c2dc785a501..faa395bab78 100644
--- a/cpp/src/arrow/compute/exec/test_util.h
+++ b/cpp/src/arrow/compute/exec/test_util.h
@@ -18,15 +18,13 @@
 #pragma once
 
 #include <functional>
-#include <memory>
 #include <string>
 #include <vector>
 
+#include "arrow/compute/exec.h"
 #include "arrow/compute/exec/exec_plan.h"
-#include "arrow/record_batch.h"
 #include "arrow/testing/visibility.h"
-#include "arrow/util/async_generator.h"
-#include "arrow/util/type_fwd.h"
+#include "arrow/util/string_view.h"
 
 namespace arrow {
 namespace compute {
@@ -36,35 +34,12 @@ using StopProducingFunc = std::function<void(ExecNode*)>;
 
 // Make a dummy node that has no execution behaviour
 ARROW_TESTING_EXPORT
-ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, int num_inputs,
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*> inputs,
                         int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
 
-using RecordBatchGenerator = AsyncGenerator<std::shared_ptr<RecordBatch>>;
-
-// Make a source node (no inputs) that produces record batches by reading in the
-// background from a RecordBatchReader.
-ARROW_TESTING_EXPORT
-ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
-                                    std::shared_ptr<RecordBatchReader> reader,
-                                    ::arrow::internal::Executor* io_executor);
-
-ARROW_TESTING_EXPORT
-ExecNode* MakeRecordBatchReaderNode(ExecPlan* plan, std::string label,
-                                    std::shared_ptr<Schema> schema,
-                                    RecordBatchGenerator generator,
-                                    ::arrow::internal::Executor* io_executor);
-
-class RecordBatchCollectNode : public ExecNode {
- public:
-  virtual RecordBatchGenerator generator() = 0;
-
- protected:
-  using ExecNode::ExecNode;
-};
-
 ARROW_TESTING_EXPORT
-RecordBatchCollectNode* MakeRecordBatchCollectNode(ExecPlan* plan, std::string label,
-                                                   const std::shared_ptr<Schema>& schema);
+ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+                            util::string_view json);
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index d6a1d4ccbc4..e723bd7838e 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -244,7 +244,7 @@ std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
 
   for (const auto& descr : descrs) {
     auto id = descr.type->id();
-    auto max_width = is_signed_integer(id) ? &max_width_signed : &max_width_unsigned;
+    auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned);
     *max_width = std::max(bit_width(id), *max_width);
   }
 
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 8a0d6de7f25..eebc8c1b678 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -41,6 +41,8 @@ struct VectorKernel;
 struct KernelState;
 
 class Expression;
+class ExecNode;
+class ExecPlan;
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index 841b792ee34..fc6b38b37a9 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -165,13 +165,8 @@ Dataset::Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_e
     : schema_(std::move(schema)),
       partition_expression_(std::move(partition_expression)) {}
 
-Result<std::shared_ptr<ScannerBuilder>> Dataset::NewScan(
-    std::shared_ptr<ScanOptions> options) {
-  return std::make_shared<ScannerBuilder>(this->shared_from_this(), options);
-}
-
 Result<std::shared_ptr<ScannerBuilder>> Dataset::NewScan() {
-  return NewScan(std::make_shared<ScanOptions>());
+  return std::make_shared<ScannerBuilder>(this->shared_from_this());
 }
 
 Result<FragmentIterator> Dataset::GetFragments() {
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index d2cba730252..11210fdc27b 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -155,7 +155,6 @@ class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
 class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
  public:
   /// \brief Begin to build a new Scan operation against this Dataset
-  Result<std::shared_ptr<ScannerBuilder>> NewScan(std::shared_ptr<ScanOptions> options);
   Result<std::shared_ptr<ScannerBuilder>> NewScan();
 
   /// \brief GetFragments returns an iterator of Fragments given a predicate.
diff --git a/cpp/src/arrow/dataset/dataset_internal.h b/cpp/src/arrow/dataset/dataset_internal.h
index 4336f9c157e..952ad3e83ca 100644
--- a/cpp/src/arrow/dataset/dataset_internal.h
+++ b/cpp/src/arrow/dataset/dataset_internal.h
@@ -204,5 +204,35 @@ arrow::Result<std::shared_ptr<T>> GetFragmentScanOptions(
   return internal::checked_pointer_cast<T>(source);
 }
 
+class FragmentDataset : public Dataset {
+ public:
+  FragmentDataset(std::shared_ptr<Schema> schema, FragmentVector fragments)
+      : Dataset(std::move(schema)), fragments_(std::move(fragments)) {}
+
+  std::string type_name() const override { return "fragment"; }
+
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override {
+    return std::make_shared<FragmentDataset>(std::move(schema), fragments_);
+  }
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override {
+    // TODO(ARROW-12891) Provide subtree pruning for any vector of fragments
+    FragmentVector fragments;
+    for (const auto& fragment : fragments_) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto simplified_filter,
+          compute::SimplifyWithGuarantee(predicate, fragment->partition_expression()));
+
+      if (simplified_filter.IsSatisfiable()) {
+        fragments.push_back(fragment);
+      }
+    }
+    return MakeVectorIterator(std::move(fragments));
+  }
+  FragmentVector fragments_;
+};
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index fd96fe8f50e..3f42ab44a39 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -162,8 +162,8 @@ static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
       }));
   return reader_fut.Then(
       // Adds the filename to the error
-      [](const std::shared_ptr<csv::StreamingReader>& maybe_reader)
-          -> Result<std::shared_ptr<csv::StreamingReader>> { return maybe_reader; },
+      [](const std::shared_ptr<csv::StreamingReader>& reader)
+          -> Result<std::shared_ptr<csv::StreamingReader>> { return reader; },
       [source](const Status& err) -> Result<std::shared_ptr<csv::StreamingReader>> {
         return err.WithMessage("Could not open CSV input source '", source.path(),
                                "': ", err);
diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc
index f0409abe85b..e6192523f53 100644
--- a/cpp/src/arrow/dataset/file_ipc_test.cc
+++ b/cpp/src/arrow/dataset/file_ipc_test.cc
@@ -100,13 +100,6 @@ class TestIpcFileSystemDataset : public testing::Test,
     format_ = ipc_format;
     SetWriteOptions(ipc_format->DefaultWriteOptions());
   }
-
-  std::shared_ptr<Scanner> MakeScanner(const std::shared_ptr<Dataset>& dataset,
-                                       const std::shared_ptr<ScanOptions>& scan_options) {
-    ScannerBuilder builder(dataset, scan_options);
-    EXPECT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-    return scanner;
-  }
 };
 
 TEST_F(TestIpcFileSystemDataset, WriteWithIdenticalPartitioningSchema) {
@@ -132,7 +125,7 @@ TEST_F(TestIpcFileSystemDataset, WriteExceedsMaxPartitions) {
   // require that no batch be grouped into more than 2 written batches:
   write_options_.max_partitions = 2;
 
-  auto scanner = MakeScanner(dataset_, scan_options_);
+  EXPECT_OK_AND_ASSIGN(auto scanner, ScannerBuilder(dataset_, scan_options_).Finish());
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("This exceeds the maximum"),
                                   FileSystemDataset::Write(write_options_, scanner));
 }
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 04c86b1f16f..ffa64e8ec10 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -25,6 +25,7 @@
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
@@ -283,6 +284,32 @@ TEST_F(TestParquetFileFormat, CountRowsPredicatePushdown) {
   }
 }
 
+TEST_F(TestParquetFileFormat, MultithreadedScan) {
+  constexpr int64_t kNumRowGroups = 16;
+
+  // See PredicatePushdown test below for a description of the generated data
+  auto reader = ArithmeticDatasetFixture::GetRecordBatchReader(kNumRowGroups);
+  auto source = GetFileSource(reader.get());
+  auto options = std::make_shared<ScanOptions>();
+
+  auto fragment = MakeFragment(*source);
+
+  FragmentDataset dataset(ArithmeticDatasetFixture::schema(), {fragment});
+  ScannerBuilder builder({&dataset, [](...) {}});
+
+  ASSERT_OK(builder.UseAsync(true));
+  ASSERT_OK(builder.UseThreads(true));
+  ASSERT_OK(builder.Project({call("add", {field_ref("i64"), literal(3)})}, {""}));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+
+  ASSERT_OK_AND_ASSIGN(auto gen, scanner->ScanBatchesUnorderedAsync());
+
+  auto collect_fut = CollectAsyncGenerator(gen);
+  ASSERT_OK_AND_ASSIGN(auto batches, collect_fut.result());
+
+  ASSERT_EQ(batches.size(), kNumRowGroups);
+}
+
 class TestParquetFileSystemDataset : public WriteFileSystemDatasetMixin,
                                      public testing::Test {
  public:
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index b80d1bb57f0..5bf89330429 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -169,7 +169,8 @@ TEST_F(TestFileSystemDataset, ReplaceSchema) {
 
 TEST_F(TestFileSystemDataset, RootPartitionPruning) {
   auto root_partition = equal(field_ref("i32"), literal(5));
-  MakeDataset({fs::File("a"), fs::File("b")}, root_partition);
+  MakeDataset({fs::File("a"), fs::File("b")}, root_partition, {},
+              schema({field("i32", int32()), field("f32", float32())}));
 
   auto GetFragments = [&](compute::Expression filter) {
     return *dataset_->GetFragments(*filter.Bind(*dataset_->schema()));
@@ -191,8 +192,9 @@ TEST_F(TestFileSystemDataset, RootPartitionPruning) {
   AssertFragmentsAreFromPath(GetFragments(equal(field_ref("f32"), literal(3.F))),
                              {"a", "b"});
 
-  // No partition should match
-  MakeDataset({fs::File("a"), fs::File("b")});
+  // No root partition: don't prune any fragments
+  MakeDataset({fs::File("a"), fs::File("b")}, literal(true), {},
+              schema({field("i32", int32()), field("f32", float32())}));
   AssertFragmentsAreFromPath(GetFragments(equal(field_ref("f32"), literal(3.F))),
                              {"a", "b"});
 }
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index 5c390b6b487..1ec47e3cee1 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/exec/expression_internal.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/scalar.h"
@@ -252,7 +253,7 @@ Result<std::string> KeyValuePartitioning::Format(const compute::Expression& expr
   ScalarVector values{static_cast<size_t>(schema_->num_fields()), nullptr};
 
   ARROW_ASSIGN_OR_RAISE(auto known_values, ExtractKnownFieldValues(expr));
-  for (const auto& ref_value : known_values) {
+  for (const auto& ref_value : known_values.map) {
     if (!ref_value.second.is_scalar()) {
       return Status::Invalid("non-scalar partition key ", ref_value.second.ToString());
     }
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 09e05cdbf75..58e96fdc113 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -27,6 +27,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/exec/exec_plan.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/scanner_internal.h"
@@ -317,10 +318,6 @@ class ARROW_DS_EXPORT SyncScanner : public Scanner {
   SyncScanner(std::shared_ptr<Dataset> dataset, std::shared_ptr<ScanOptions> scan_options)
       : Scanner(std::move(scan_options)), dataset_(std::move(dataset)) {}
 
-  SyncScanner(std::shared_ptr<Fragment> fragment,
-              std::shared_ptr<ScanOptions> scan_options)
-      : Scanner(std::move(scan_options)), fragment_(std::move(fragment)) {}
-
   Result<TaggedRecordBatchIterator> ScanBatches() override;
   Result<ScanTaskIterator> Scan() override;
   Status Scan(std::function<Status(TaggedRecordBatch)> visitor) override;
@@ -337,8 +334,6 @@ class ARROW_DS_EXPORT SyncScanner : public Scanner {
   Result<ScanTaskIterator> ScanInternal();
 
   std::shared_ptr<Dataset> dataset_;
-  // TODO(ARROW-8065) remove fragment_ after a Dataset is constuctible from fragments
-  std::shared_ptr<Fragment> fragment_;
 };
 
 Result<TaggedRecordBatchIterator> SyncScanner::ScanBatches() {
@@ -370,10 +365,6 @@ Result<EnumeratedRecordBatchGenerator> SyncScanner::ScanBatchesUnorderedAsync()
 }
 
 Result<FragmentIterator> SyncScanner::GetFragments() {
-  if (fragment_ != nullptr) {
-    return MakeVectorIterator(FragmentVector{fragment_});
-  }
-
   // Transform Datasets in a flat Iterator<Fragment>. This
   // iterator is lazily constructed, i.e. Dataset::GetFragments is
   // not invoked until a Fragment is requested.
@@ -411,18 +402,6 @@ Result<ScanTaskIterator> SyncScanner::ScanInternal() {
   return GetScanTaskIterator(std::move(fragment_it), scan_options_);
 }
 
-Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
-    std::vector<std::shared_ptr<RecordBatch>> batches,
-    std::shared_ptr<ScanOptions> options) {
-  if (batches.empty()) {
-    return MakeVectorIterator(ScanTaskVector());
-  }
-  auto schema = batches[0]->schema();
-  auto fragment =
-      std::make_shared<InMemoryFragment>(std::move(schema), std::move(batches));
-  return fragment->Scan(std::move(options));
-}
-
 class ARROW_DS_EXPORT AsyncScanner : public Scanner,
                                      public std::enable_shared_from_this<AsyncScanner> {
  public:
@@ -454,15 +433,17 @@ class ARROW_DS_EXPORT AsyncScanner : public Scanner,
 namespace {
 
 inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
-    const std::shared_ptr<Scanner>& scanner, const EnumeratedRecordBatch& in) {
-  ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter,
-                        SimplifyWithGuarantee(scanner->options()->filter,
-                                              in.fragment.value->partition_expression()));
-
-  compute::ExecContext exec_context{scanner->options()->pool};
+    const std::shared_ptr<ScanOptions>& options, const EnumeratedRecordBatch& in) {
   ARROW_ASSIGN_OR_RAISE(
-      Datum mask, ExecuteScalarExpression(simplified_filter, Datum(in.record_batch.value),
-                                          &exec_context));
+      compute::Expression simplified_filter,
+      SimplifyWithGuarantee(options->filter, in.fragment.value->partition_expression()));
+
+  const auto& schema = *options->dataset_schema;
+
+  compute::ExecContext exec_context{options->pool};
+  ARROW_ASSIGN_OR_RAISE(Datum mask,
+                        ExecuteScalarExpression(simplified_filter, schema,
+                                                in.record_batch.value, &exec_context));
 
   Datum filtered;
   if (mask.is_scalar()) {
@@ -481,11 +462,12 @@ inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
   }
 
   ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
-                        SimplifyWithGuarantee(scanner->options()->projection,
+                        SimplifyWithGuarantee(options->projection,
                                               in.fragment.value->partition_expression()));
+
   ARROW_ASSIGN_OR_RAISE(
       Datum projected,
-      ExecuteScalarExpression(simplified_projection, filtered, &exec_context));
+      ExecuteScalarExpression(simplified_projection, schema, filtered, &exec_context));
 
   DCHECK_EQ(projected.type()->id(), Type::STRUCT);
   if (projected.shape() == ValueDescr::SCALAR) {
@@ -493,7 +475,7 @@ inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
     ARROW_ASSIGN_OR_RAISE(
         projected,
         MakeArrayFromScalar(*projected.scalar(), filtered.record_batch()->num_rows(),
-                            scanner->options()->pool));
+                            options->pool));
   }
   ARROW_ASSIGN_OR_RAISE(auto out,
                         RecordBatch::FromStructArray(projected.array_as<StructArray>()));
@@ -506,17 +488,16 @@ inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
 }
 
 inline EnumeratedRecordBatchGenerator FilterAndProjectRecordBatchAsync(
-    const std::shared_ptr<Scanner>& scanner, EnumeratedRecordBatchGenerator rbs) {
-  auto mapper = [scanner](const EnumeratedRecordBatch& in) {
-    return DoFilterAndProjectRecordBatchAsync(scanner, in);
+    const std::shared_ptr<ScanOptions>& options, EnumeratedRecordBatchGenerator rbs) {
+  auto mapper = [options](const EnumeratedRecordBatch& in) {
+    return DoFilterAndProjectRecordBatchAsync(options, in);
   };
-  return MakeMappedGenerator<EnumeratedRecordBatch>(std::move(rbs), mapper);
+  return MakeMappedGenerator(std::move(rbs), mapper);
 }
 
 Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
-    std::shared_ptr<AsyncScanner> scanner,
     const Enumerated<std::shared_ptr<Fragment>>& fragment,
-    const std::shared_ptr<ScanOptions>& options) {
+    const std::shared_ptr<ScanOptions>& options, bool filter_and_project = true) {
   ARROW_ASSIGN_OR_RAISE(auto batch_gen, fragment.value->ScanBatchesAsync(options));
   auto enumerated_batch_gen = MakeEnumeratedGenerator(std::move(batch_gen));
 
@@ -525,30 +506,37 @@ Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
         return EnumeratedRecordBatch{record_batch, fragment};
       };
 
-  auto combined_gen = MakeMappedGenerator<EnumeratedRecordBatch>(enumerated_batch_gen,
-                                                                 std::move(combine_fn));
+  auto combined_gen = MakeMappedGenerator(enumerated_batch_gen, std::move(combine_fn));
 
-  return FilterAndProjectRecordBatchAsync(scanner, std::move(combined_gen));
+  if (filter_and_project) {
+    return FilterAndProjectRecordBatchAsync(options, std::move(combined_gen));
+  }
+  return combined_gen;
 }
 
 Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
-    std::shared_ptr<AsyncScanner> scanner, FragmentGenerator fragment_gen) {
+    FragmentGenerator fragment_gen, const std::shared_ptr<ScanOptions>& options,
+    bool filter_and_project = true) {
   auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
-  return MakeMappedGenerator<EnumeratedRecordBatchGenerator>(
-      std::move(enumerated_fragment_gen),
-      [scanner](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
-        return FragmentToBatches(scanner, fragment, scanner->options());
-      });
+  return MakeMappedGenerator(std::move(enumerated_fragment_gen),
+                             [=](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
+                               return FragmentToBatches(fragment, options,
+                                                        filter_and_project);
+                             });
 }
 
 Result<AsyncGenerator<AsyncGenerator<util::optional<int64_t>>>> FragmentsToRowCount(
-    std::shared_ptr<AsyncScanner> scanner, FragmentGenerator fragment_gen) {
+    FragmentGenerator fragment_gen,
+    std::shared_ptr<ScanOptions> options_with_projection) {
   // Must use optional<int64_t> to avoid breaking the pipeline on empty batches
   auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
-  auto options = std::make_shared<ScanOptions>(*scanner->options());
+
+  // Drop projection since we only need to count rows
+  auto options = std::make_shared<ScanOptions>(*options_with_projection);
   RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+
   auto count_fragment_fn =
-      [scanner, options](const Enumerated<std::shared_ptr<Fragment>>& fragment)
+      [options](const Enumerated<std::shared_ptr<Fragment>>& fragment)
       -> Result<AsyncGenerator<util::optional<int64_t>>> {
     auto count_fut = fragment.value->CountRows(options->filter, options);
     return MakeFromFuture(
@@ -560,18 +548,29 @@ Result<AsyncGenerator<AsyncGenerator<util::optional<int64_t>>>> FragmentsToRowCo
                 Future<util::optional<int64_t>>::MakeFinished(val));
           }
           // Slow path
-          ARROW_ASSIGN_OR_RAISE(auto batch_gen,
-                                FragmentToBatches(scanner, fragment, options));
+          ARROW_ASSIGN_OR_RAISE(auto batch_gen, FragmentToBatches(fragment, options));
           auto count_fn =
               [](const EnumeratedRecordBatch& enumerated) -> util::optional<int64_t> {
             return enumerated.record_batch.value->num_rows();
           };
-          return MakeMappedGenerator<util::optional<int64_t>>(batch_gen,
-                                                              std::move(count_fn));
+          return MakeMappedGenerator(batch_gen, std::move(count_fn));
         }));
   };
-  return MakeMappedGenerator<AsyncGenerator<util::optional<int64_t>>>(
-      std::move(enumerated_fragment_gen), std::move(count_fragment_fn));
+  return MakeMappedGenerator(std::move(enumerated_fragment_gen),
+                             std::move(count_fragment_fn));
+}
+
+Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsyncImpl(
+    const std::shared_ptr<ScanOptions>& options, FragmentGenerator fragment_gen,
+    internal::Executor* cpu_executor, bool filter_and_project = true) {
+  ARROW_ASSIGN_OR_RAISE(
+      auto batch_gen_gen,
+      FragmentsToBatches(std::move(fragment_gen), options, filter_and_project));
+  auto batch_gen_gen_readahead =
+      MakeSerialReadaheadGenerator(std::move(batch_gen_gen), options->fragment_readahead);
+  auto merged_batch_gen = MakeMergedGenerator(std::move(batch_gen_gen_readahead),
+                                              options->fragment_readahead);
+  return MakeReadaheadGenerator(std::move(merged_batch_gen), options->fragment_readahead);
 }
 
 }  // namespace
@@ -607,16 +606,9 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync()
 
 Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
     internal::Executor* cpu_executor) {
-  auto self = shared_from_this();
   ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
-  ARROW_ASSIGN_OR_RAISE(auto batch_gen_gen,
-                        FragmentsToBatches(self, std::move(fragment_gen)));
-  auto batch_gen_gen_readahead = MakeSerialReadaheadGenerator(
-      std::move(batch_gen_gen), scan_options_->fragment_readahead);
-  auto merged_batch_gen = MakeMergedGenerator(std::move(batch_gen_gen_readahead),
-                                              scan_options_->fragment_readahead);
-  return MakeReadaheadGenerator(std::move(merged_batch_gen),
-                                scan_options_->fragment_readahead);
+  return ScanBatchesUnorderedAsyncImpl(scan_options_, std::move(fragment_gen),
+                                       cpu_executor);
 }
 
 Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync() {
@@ -626,13 +618,17 @@ Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync() {
 Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync(
     internal::Executor* cpu_executor) {
   ARROW_ASSIGN_OR_RAISE(auto unordered, ScanBatchesUnorderedAsync(cpu_executor));
-  auto left_after_right = [](const EnumeratedRecordBatch& left,
-                             const EnumeratedRecordBatch& right) {
+  // We need an initial value sentinel, so we use one with fragment.index < 0
+  auto is_before_any = [](const EnumeratedRecordBatch& batch) {
+    return batch.fragment.index < 0;
+  };
+  auto left_after_right = [&is_before_any](const EnumeratedRecordBatch& left,
+                                           const EnumeratedRecordBatch& right) {
     // Before any comes first
-    if (left.fragment.value == nullptr) {
+    if (is_before_any(left)) {
       return false;
     }
-    if (right.fragment.value == nullptr) {
+    if (is_before_any(right)) {
       return true;
     }
     // Compare batches if fragment is the same
@@ -642,10 +638,10 @@ Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync(
     // Otherwise compare fragment
     return left.fragment.index > right.fragment.index;
   };
-  auto is_next = [](const EnumeratedRecordBatch& prev,
-                    const EnumeratedRecordBatch& next) {
+  auto is_next = [is_before_any](const EnumeratedRecordBatch& prev,
+                                 const EnumeratedRecordBatch& next) {
     // Only true if next is the first batch
-    if (prev.fragment.value == nullptr) {
+    if (is_before_any(prev)) {
       return next.fragment.index == 0 && next.record_batch.index == 0;
     }
     // If same fragment, compare batch index
@@ -664,7 +660,7 @@ Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync(
     return TaggedRecordBatch{enumerated_batch.record_batch.value,
                              enumerated_batch.fragment.value};
   };
-  return MakeMappedGenerator<TaggedRecordBatch>(std::move(sequenced), unenumerate_fn);
+  return MakeMappedGenerator(std::move(sequenced), unenumerate_fn);
 }
 
 struct AsyncTableAssemblyState {
@@ -725,8 +721,8 @@ Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
     return batch;
   };
 
-  auto table_building_gen = MakeMappedGenerator<EnumeratedRecordBatch>(
-      positioned_batch_gen, table_building_task);
+  auto table_building_gen =
+      MakeMappedGenerator(positioned_batch_gen, table_building_task);
 
   return DiscardAllFromAsyncGenerator(table_building_gen).Then([state, scan_options]() {
     return Table::FromRecordBatches(scan_options->projected_schema, state->Finish());
@@ -734,10 +730,9 @@ Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
 }
 
 Result<int64_t> AsyncScanner::CountRows() {
-  auto self = shared_from_this();
   ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
   ARROW_ASSIGN_OR_RAISE(auto count_gen_gen,
-                        FragmentsToRowCount(self, std::move(fragment_gen)));
+                        FragmentsToRowCount(std::move(fragment_gen), scan_options_));
   auto count_gen = MakeConcatenatedGenerator(std::move(count_gen_gen));
   int64_t total = 0;
   auto sum_fn = [&total](util::optional<int64_t> count) -> Status {
@@ -755,9 +750,7 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset)
 
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset,
                                std::shared_ptr<ScanOptions> scan_options)
-    : dataset_(std::move(dataset)),
-      fragment_(nullptr),
-      scan_options_(std::move(scan_options)) {
+    : dataset_(std::move(dataset)), scan_options_(std::move(scan_options)) {
   scan_options_->dataset_schema = dataset_->schema();
   DCHECK_OK(Filter(scan_options_->filter));
 }
@@ -765,12 +758,9 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset,
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> schema,
                                std::shared_ptr<Fragment> fragment,
                                std::shared_ptr<ScanOptions> scan_options)
-    : dataset_(nullptr),
-      fragment_(std::move(fragment)),
-      scan_options_(std::move(scan_options)) {
-  scan_options_->dataset_schema = std::move(schema);
-  DCHECK_OK(Filter(scan_options_->filter));
-}
+    : ScannerBuilder(std::make_shared<FragmentDataset>(
+                         std::move(schema), FragmentVector{std::move(fragment)}),
+                     std::move(scan_options)) {}
 
 namespace {
 class OneShotScanTask : public ScanTask {
@@ -898,10 +888,6 @@ Result<std::shared_ptr<Scanner>> ScannerBuilder::Finish() {
     RETURN_NOT_OK(Project(scan_options_->dataset_schema->field_names()));
   }
 
-  if (dataset_ == nullptr) {
-    // AsyncScanner does not support this method of running.  It may in the future
-    return std::make_shared<SyncScanner>(fragment_, scan_options_);
-  }
   if (scan_options_->use_async) {
     return std::make_shared<AsyncScanner>(dataset_, scan_options_);
   } else {
@@ -1119,5 +1105,51 @@ Result<int64_t> SyncScanner::CountRows() {
   return count;
 }
 
+Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
+                                        std::shared_ptr<Dataset> dataset,
+                                        std::shared_ptr<ScanOptions> scan_options) {
+  if (!scan_options->use_async) {
+    return Status::NotImplemented("ScanNodes without asynchrony");
+  }
+
+  // using a generator for speculative forward compatibility with async fragment discovery
+  ARROW_ASSIGN_OR_RAISE(scan_options->filter,
+                        scan_options->filter.Bind(*dataset->schema()));
+  ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset->GetFragments(scan_options->filter));
+  ARROW_ASSIGN_OR_RAISE(auto fragments_vec, fragments_it.ToVector());
+  auto fragments_gen = MakeVectorGenerator(std::move(fragments_vec));
+
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen,
+                        ScanBatchesUnorderedAsyncImpl(
+                            scan_options, std::move(fragments_gen),
+                            internal::GetCpuThreadPool(), /*filter_and_project=*/false));
+
+  auto gen = MakeMappedGenerator(
+      std::move(batch_gen),
+      [dataset](const EnumeratedRecordBatch& partial)
+          -> Result<util::optional<compute::ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(
+            util::optional<compute::ExecBatch> batch,
+            compute::MakeExecBatch(*dataset->schema(), partial.record_batch.value));
+
+        // TODO fragments may be able to attach more guarantees to batches than this,
+        // for example parquet's row group stats.
+        batch->guarantee = partial.fragment.value->partition_expression();
+
+        // tag rows with fragment- and batch-of-origin
+        batch->values.emplace_back(partial.fragment.index);
+        batch->values.emplace_back(partial.record_batch.index);
+        batch->values.emplace_back(partial.record_batch.last);
+        return batch;
+      });
+
+  auto augmented_fields = dataset->schema()->fields();
+  augmented_fields.push_back(field("__fragment_index", int32()));
+  augmented_fields.push_back(field("__batch_index", int32()));
+  augmented_fields.push_back(field("__last_in_fragment", boolean()));
+  return compute::MakeSourceNode(plan, "dataset_scan",
+                                 schema(std::move(augmented_fields)), std::move(gen));
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 29fd5aad994..c803cde1978 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arrow/compute/exec/expression.h"
+#include "arrow/compute/type_fwd.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/projector.h"
 #include "arrow/dataset/type_fwd.h"
@@ -194,20 +195,22 @@ using EnumeratedRecordBatchIterator = Iterator<EnumeratedRecordBatch>;
 template <>
 struct IterationTraits<dataset::TaggedRecordBatch> {
   static dataset::TaggedRecordBatch End() {
-    return dataset::TaggedRecordBatch{NULL, NULL};
+    return dataset::TaggedRecordBatch{NULLPTR, NULLPTR};
   }
   static bool IsEnd(const dataset::TaggedRecordBatch& val) {
-    return val.record_batch == NULL;
+    return val.record_batch == NULLPTR;
   }
 };
 
 template <>
 struct IterationTraits<dataset::EnumeratedRecordBatch> {
   static dataset::EnumeratedRecordBatch End() {
-    return dataset::EnumeratedRecordBatch{{NULL, -1, false}, {NULL, -1, false}};
+    return dataset::EnumeratedRecordBatch{
+        IterationEnd<Enumerated<std::shared_ptr<RecordBatch>>>(),
+        IterationEnd<Enumerated<std::shared_ptr<dataset::Fragment>>>()};
   }
   static bool IsEnd(const dataset::EnumeratedRecordBatch& val) {
-    return val.fragment.value == NULL;
+    return IsIterationEnd(val.fragment);
   }
 };
 
@@ -401,10 +404,16 @@ class ARROW_DS_EXPORT ScannerBuilder {
 
  private:
   std::shared_ptr<Dataset> dataset_;
-  std::shared_ptr<Fragment> fragment_;
-  std::shared_ptr<ScanOptions> scan_options_;
+  std::shared_ptr<ScanOptions> scan_options_ = std::make_shared<ScanOptions>();
 };
 
+/// \brief Construct a source ExecNode which yields batches from a dataset scan.
+///
+/// Does not construct associated filter or project nodes
+ARROW_DS_EXPORT Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan*,
+                                                        std::shared_ptr<Dataset>,
+                                                        std::shared_ptr<ScanOptions>);
+
 /// @}
 
 /// \brief A trivial ScanTask that yields the RecordBatch of an array.
@@ -422,9 +431,5 @@ class ARROW_DS_EXPORT InMemoryScanTask : public ScanTask {
   std::vector<std::shared_ptr<RecordBatch>> record_batches_;
 };
 
-ARROW_DS_EXPORT Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
-    std::vector<std::shared_ptr<RecordBatch>> batches,
-    std::shared_ptr<ScanOptions> options);
-
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 30fb4e07cef..27b32aa6f19 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -40,10 +40,11 @@ namespace dataset {
 
 inline Result<std::shared_ptr<RecordBatch>> FilterSingleBatch(
     const std::shared_ptr<RecordBatch>& in, const compute::Expression& filter,
-    MemoryPool* pool) {
-  compute::ExecContext exec_context{pool};
-  ARROW_ASSIGN_OR_RAISE(Datum mask,
-                        ExecuteScalarExpression(filter, Datum(in), &exec_context));
+    const std::shared_ptr<ScanOptions>& options) {
+  compute::ExecContext exec_context{options->pool};
+  ARROW_ASSIGN_OR_RAISE(
+      Datum mask,
+      ExecuteScalarExpression(filter, *options->dataset_schema, in, &exec_context));
 
   if (mask.is_scalar()) {
     const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
@@ -59,28 +60,29 @@ inline Result<std::shared_ptr<RecordBatch>> FilterSingleBatch(
   return filtered.record_batch();
 }
 
-inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it,
-                                             compute::Expression filter,
-                                             MemoryPool* pool) {
+inline RecordBatchIterator FilterRecordBatch(
+    RecordBatchIterator it, compute::Expression filter,
+    const std::shared_ptr<ScanOptions>& options) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
-        return FilterSingleBatch(in, filter, pool);
+        return FilterSingleBatch(in, filter, options);
       },
       std::move(it));
 }
 
 inline Result<std::shared_ptr<RecordBatch>> ProjectSingleBatch(
     const std::shared_ptr<RecordBatch>& in, const compute::Expression& projection,
-    MemoryPool* pool) {
-  compute::ExecContext exec_context{pool};
-  ARROW_ASSIGN_OR_RAISE(Datum projected,
-                        ExecuteScalarExpression(projection, Datum(in), &exec_context));
+    const std::shared_ptr<ScanOptions>& options) {
+  compute::ExecContext exec_context{options->pool};
+  ARROW_ASSIGN_OR_RAISE(
+      Datum projected,
+      ExecuteScalarExpression(projection, *options->dataset_schema, in, &exec_context));
 
   DCHECK_EQ(projected.type()->id(), Type::STRUCT);
   if (projected.shape() == ValueDescr::SCALAR) {
     // Only virtual columns are projected. Broadcast to an array
-    ARROW_ASSIGN_OR_RAISE(projected,
-                          MakeArrayFromScalar(*projected.scalar(), in->num_rows(), pool));
+    ARROW_ASSIGN_OR_RAISE(projected, MakeArrayFromScalar(*projected.scalar(),
+                                                         in->num_rows(), options->pool));
   }
 
   ARROW_ASSIGN_OR_RAISE(auto out,
@@ -89,12 +91,12 @@ inline Result<std::shared_ptr<RecordBatch>> ProjectSingleBatch(
   return out->ReplaceSchemaMetadata(in->schema()->metadata());
 }
 
-inline RecordBatchIterator ProjectRecordBatch(RecordBatchIterator it,
-                                              compute::Expression projection,
-                                              MemoryPool* pool) {
+inline RecordBatchIterator ProjectRecordBatch(
+    RecordBatchIterator it, compute::Expression projection,
+    const std::shared_ptr<ScanOptions>& options) {
   return MakeMaybeMapIterator(
       [=](std::shared_ptr<RecordBatch> in) -> Result<std::shared_ptr<RecordBatch>> {
-        return ProjectSingleBatch(in, projection, pool);
+        return ProjectSingleBatch(in, projection, options);
       },
       std::move(it));
 }
@@ -117,10 +119,9 @@ class FilterAndProjectScanTask : public ScanTask {
                           SimplifyWithGuarantee(options()->projection, partition_));
 
     RecordBatchIterator filter_it =
-        FilterRecordBatch(std::move(it), simplified_filter, options_->pool);
+        FilterRecordBatch(std::move(it), simplified_filter, options_);
 
-    return ProjectRecordBatch(std::move(filter_it), simplified_projection,
-                              options_->pool);
+    return ProjectRecordBatch(std::move(filter_it), simplified_projection, options_);
   }
 
   Result<RecordBatchIterator> ToFilteredAndProjectedIterator(
@@ -133,10 +134,9 @@ class FilterAndProjectScanTask : public ScanTask {
                           SimplifyWithGuarantee(options()->projection, partition_));
 
     RecordBatchIterator filter_it =
-        FilterRecordBatch(std::move(it), simplified_filter, options_->pool);
+        FilterRecordBatch(std::move(it), simplified_filter, options_);
 
-    return ProjectRecordBatch(std::move(filter_it), simplified_projection,
-                              options_->pool);
+    return ProjectRecordBatch(std::move(filter_it), simplified_projection, options_);
   }
 
   Result<std::shared_ptr<RecordBatch>> FilterAndProjectBatch(
@@ -147,8 +147,8 @@ class FilterAndProjectScanTask : public ScanTask {
     ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
                           SimplifyWithGuarantee(options()->projection, partition_));
     ARROW_ASSIGN_OR_RAISE(auto filtered,
-                          FilterSingleBatch(batch, simplified_filter, options_->pool));
-    return ProjectSingleBatch(filtered, simplified_projection, options_->pool);
+                          FilterSingleBatch(batch, simplified_filter, options_));
+    return ProjectSingleBatch(filtered, simplified_projection, options_);
   }
 
   inline Future<RecordBatchVector> SafeExecute(internal::Executor* executor) override {
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 87fc2c902c3..bed276b1bff 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -18,6 +18,7 @@
 #include "arrow/dataset/scanner.h"
 
 #include <memory>
+#include <utility>
 
 #include <gmock/gmock.h>
 
@@ -25,6 +26,7 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/exec/exec_plan.h"
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/record_batch.h"
@@ -32,11 +34,14 @@
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/range.h"
+#include "arrow/util/vector.h"
 
 using testing::ElementsAre;
 using testing::IsEmpty;
+using testing::UnorderedElementsAreArray;
 
 namespace arrow {
 namespace dataset {
@@ -922,7 +927,7 @@ TEST_F(TestReordering, ScanBatchesUnordered) {
 
 struct BatchConsumer {
   explicit BatchConsumer(EnumeratedRecordBatchGenerator generator)
-      : generator(generator), next() {}
+      : generator(std::move(generator)), next() {}
 
   void AssertCanConsume() {
     if (!next.is_valid()) {
@@ -1087,5 +1092,301 @@ TEST(ScanOptions, TestMaterializedFields) {
   EXPECT_THAT(opts->MaterializedFields(), ElementsAre("i64", "i32"));
 }
 
+namespace {
+
+static Result<std::vector<compute::ExecBatch>> StartAndCollect(
+    compute::ExecPlan* plan, AsyncGenerator<util::optional<compute::ExecBatch>> gen) {
+  RETURN_NOT_OK(plan->Validate());
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto maybe_collected = CollectAsyncGenerator(gen).result();
+  ARROW_ASSIGN_OR_RAISE(auto collected, maybe_collected);
+
+  plan->StopProducing();
+
+  return internal::MapVector(
+      [](util::optional<compute::ExecBatch> batch) { return std::move(*batch); },
+      collected);
+}
+
+struct DatasetAndBatches {
+  std::shared_ptr<Dataset> dataset;
+  std::vector<compute::ExecBatch> batches;
+};
+
+DatasetAndBatches MakeBasicDataset() {
+  const auto dataset_schema = ::arrow::schema({
+      field("a", int32()),
+      field("b", boolean()),
+      field("c", int32()),
+  });
+
+  const auto physical_schema = SchemaFromColumnNames(dataset_schema, {"a", "b"});
+
+  RecordBatchVector record_batches{
+      RecordBatchFromJSON(physical_schema, R"([{"a": 1,    "b": null},
+                                               {"a": 2,    "b": true}])"),
+      RecordBatchFromJSON(physical_schema, R"([{"a": null, "b": true},
+                                               {"a": 3,    "b": false}])"),
+      RecordBatchFromJSON(physical_schema, R"([{"a": null, "b": true},
+                                               {"a": 4,    "b": false}])"),
+      RecordBatchFromJSON(physical_schema, R"([{"a": 5,    "b": null},
+                                               {"a": 6,    "b": false},
+                                               {"a": 7,    "b": false}])"),
+  };
+
+  auto dataset = std::make_shared<FragmentDataset>(
+      dataset_schema,
+      FragmentVector{
+          std::make_shared<InMemoryFragment>(
+              physical_schema, RecordBatchVector{record_batches[0], record_batches[1]},
+              equal(field_ref("c"), literal(23))),
+          std::make_shared<InMemoryFragment>(
+              physical_schema, RecordBatchVector{record_batches[2], record_batches[3]},
+              equal(field_ref("c"), literal(47))),
+      });
+
+  std::vector<compute::ExecBatch> batches;
+
+  auto batch_it = record_batches.begin();
+  for (int fragment_index = 0; fragment_index < 2; ++fragment_index) {
+    for (int batch_index = 0; batch_index < 2; ++batch_index) {
+      const auto& batch = *batch_it++;
+
+      // the scanned ExecBatches will begin with physical columns
+      batches.emplace_back(*batch);
+
+      // a placeholder will be inserted for partition field "c"
+      batches.back().values.emplace_back(std::make_shared<Int32Scalar>());
+
+      // scanned batches will be augmented with fragment and batch indices
+      batches.back().values.emplace_back(fragment_index);
+      batches.back().values.emplace_back(batch_index);
+
+      // ... and with the last-in-fragment flag
+      batches.back().values.emplace_back(batch_index == 1);
+
+      // each batch carries a guarantee inherited from its Fragment's partition expression
+      batches.back().guarantee =
+          equal(field_ref("c"), literal(fragment_index == 0 ? 23 : 47));
+    }
+  }
+
+  return {dataset, batches};
+}
+}  // namespace
+
+TEST(ScanNode, Schema) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto fields = basic.dataset->schema()->fields();
+  fields.push_back(field("__fragment_index", int32()));
+  fields.push_back(field("__batch_index", int32()));
+  fields.push_back(field("__last_in_fragment", boolean()));
+  AssertSchemaEqual(Schema(fields), *scan->output_schema());
+}
+
+TEST(ScanNode, Trivial) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  // trivial scan: the batches are returned unmodified
+  auto expected = basic.batches;
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              ResultWith(UnorderedElementsAreArray(expected)));
+}
+
+TEST(ScanNode, FilteredOnVirtualColumn) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->filter = less(field_ref("c"), literal(30));
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  auto expected = basic.batches;
+
+  // only the first fragment will make it past the filter
+  expected.pop_back();
+  expected.pop_back();
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              ResultWith(UnorderedElementsAreArray(expected)));
+}
+
+TEST(ScanNode, DeferredFilterOnPhysicalColumn) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->filter = greater(field_ref("a"), literal(4));
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  // No post filtering is performed by ScanNode: all batches will be yielded whole.
+  // To filter out rows from individual batches, construct a FilterNode.
+  auto expected = basic.batches;
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              ResultWith(UnorderedElementsAreArray(expected)));
+}
+
+TEST(ScanNode, ProjectionPushdown) {
+  // ensure non-projected columns are dropped
+}
+
+TEST(ScanNode, MaterializationOfVirtualColumn) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto project,
+      compute::MakeProjectNode(
+          scan, "project",
+          {field_ref("a"), field_ref("b"), field_ref("c"), field_ref("__fragment_index"),
+           field_ref("__batch_index"), field_ref("__last_in_fragment")}));
+
+  auto sink_gen = MakeSinkNode(project, "sink");
+
+  auto expected = basic.batches;
+
+  for (auto& batch : expected) {
+    // ProjectNode overwrites "c" placeholder with non-null drawn from guarantee
+    const auto& value = *batch.guarantee.call()->arguments[1].literal();
+    batch.values[project->output_schema()->GetFieldIndex("c")] = value;
+  }
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              ResultWith(UnorderedElementsAreArray(expected)));
+}
+
+TEST(ScanNode, CompareToScanner) {
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  ScannerBuilder builder(basic.dataset);
+  ASSERT_OK(builder.UseAsync(true));
+  ASSERT_OK(builder.UseThreads(true));
+  ASSERT_OK(builder.Filter(greater(field_ref("c"), literal(30))));
+  ASSERT_OK(builder.Project(
+      {field_ref("c"), call("multiply", {field_ref("a"), literal(2)})}, {"c", "a * 2"}));
+  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+
+  ASSERT_OK_AND_ASSIGN(auto fragments_it,
+                       basic.dataset->GetFragments(scanner->options()->filter));
+  ASSERT_OK_AND_ASSIGN(auto fragments, fragments_it.ToVector());
+
+  auto options = scanner->options();
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  ASSERT_OK_AND_ASSIGN(auto filter,
+                       compute::MakeFilterNode(scan, "filter", options->filter));
+
+  auto exprs = options->projection.call()->arguments;
+  exprs.push_back(compute::field_ref("__fragment_index"));
+  exprs.push_back(compute::field_ref("__batch_index"));
+  exprs.push_back(compute::field_ref("__last_in_fragment"));
+  ASSERT_OK_AND_ASSIGN(auto project, compute::MakeProjectNode(filter, "project", exprs));
+
+  AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
+      compute::MakeSinkNode(project, "sink");
+
+  ASSERT_OK(plan->StartProducing());
+
+  auto from_plan =
+      CollectAsyncGenerator(
+          MakeMappedGenerator(
+              sink_gen,
+              [&](const util::optional<compute::ExecBatch>& batch)
+                  -> Result<EnumeratedRecordBatch> {
+                int num_fields = options->projected_schema->num_fields();
+
+                ArrayVector columns(num_fields);
+                for (size_t i = 0; i < columns.size(); ++i) {
+                  const Datum& value = batch->values[i];
+                  if (value.is_array()) {
+                    columns[i] = value.make_array();
+                    continue;
+                  }
+                  ARROW_ASSIGN_OR_RAISE(
+                      columns[i],
+                      MakeArrayFromScalar(*value.scalar(), batch->length, options->pool));
+                }
+
+                EnumeratedRecordBatch out;
+                out.fragment.index =
+                    batch->values[num_fields].scalar_as<Int32Scalar>().value;
+                out.fragment.value = fragments[out.fragment.index];
+                out.fragment.last = false;  // ignored during reordering
+
+                out.record_batch.index =
+                    batch->values[num_fields + 1].scalar_as<Int32Scalar>().value;
+                out.record_batch.value = RecordBatch::Make(
+                    options->projected_schema, batch->length, std::move(columns));
+                out.record_batch.last =
+                    batch->values[num_fields + 2].scalar_as<BooleanScalar>().value;
+
+                return out;
+              }))
+          .result();
+
+  ASSERT_OK_AND_ASSIGN(auto from_scanner_gen, scanner->ScanBatchesUnorderedAsync());
+  auto from_scanner = CollectAsyncGenerator(from_scanner_gen).result();
+
+  auto less = [](const EnumeratedRecordBatch& l, const EnumeratedRecordBatch& r) {
+    if (l.fragment.index < r.fragment.index) return true;
+    return l.record_batch.index < r.record_batch.index;
+  };
+
+  ASSERT_OK(from_plan);
+  std::sort(from_plan->begin(), from_plan->end(), less);
+
+  ASSERT_OK(from_scanner);
+  std::sort(from_scanner->begin(), from_scanner->end(), less);
+
+  ASSERT_EQ(from_plan->size(), from_scanner->size());
+  for (size_t i = 0; i < from_plan->size(); ++i) {
+    const auto& p = from_plan->at(i);
+    const auto& s = from_scanner->at(i);
+    SCOPED_TRACE(i);
+    ASSERT_EQ(p.fragment.index, s.fragment.index);
+    ASSERT_EQ(p.fragment.value, s.fragment.value);
+    ASSERT_EQ(p.record_batch.last, s.record_batch.last);
+    ASSERT_EQ(p.record_batch.index, s.record_batch.index);
+    AssertBatchesEqual(*p.record_batch.value, *s.record_batch.value);
+  }
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 42704fea9b5..201fc7e55b2 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -122,25 +122,6 @@ void EnsureRecordBatchReaderDrained(RecordBatchReader* reader) {
   EXPECT_EQ(batch, nullptr);
 }
 
-/// Test dataset that returns one or more fragments.
-class FragmentDataset : public Dataset {
- public:
-  FragmentDataset(std::shared_ptr<Schema> schema, FragmentVector fragments)
-      : Dataset(std::move(schema)), fragments_(std::move(fragments)) {}
-
-  std::string type_name() const override { return "fragment"; }
-
-  Result<std::shared_ptr<Dataset>> ReplaceSchema(std::shared_ptr<Schema>) const override {
-    return Status::NotImplemented("");
-  }
-
- protected:
-  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override {
-    return MakeVectorIterator(fragments_);
-  }
-  FragmentVector fragments_;
-};
-
 class DatasetFixtureMixin : public ::testing::Test {
  public:
   /// \brief Ensure that record batches found in reader are equals to the
@@ -547,8 +528,8 @@ class FileFormatScanMixin : public FileFormatFixtureMixin<FormatHelper>,
 
   // Scan the fragment through the scanner.
   RecordBatchIterator Batches(std::shared_ptr<Fragment> fragment) {
-    EXPECT_OK_AND_ASSIGN(auto schema, fragment->ReadPhysicalSchema());
-    auto dataset = std::make_shared<FragmentDataset>(schema, FragmentVector{fragment});
+    auto dataset = std::make_shared<FragmentDataset>(opts_->dataset_schema,
+                                                     FragmentVector{fragment});
     ScannerBuilder builder(dataset, opts_);
     ARROW_EXPECT_OK(builder.UseAsync(GetParam().use_async));
     ARROW_EXPECT_OK(builder.UseThreads(GetParam().use_threads));
@@ -761,12 +742,11 @@ class JSONRecordBatchFileFormat : public FileFormat {
     ARROW_ASSIGN_OR_RAISE(auto file, fragment->source().Open());
     ARROW_ASSIGN_OR_RAISE(int64_t size, file->GetSize());
     ARROW_ASSIGN_OR_RAISE(auto buffer, file->Read(size));
-
-    util::string_view view{*buffer};
-
     ARROW_ASSIGN_OR_RAISE(auto schema, Inspect(fragment->source()));
-    std::shared_ptr<RecordBatch> batch = RecordBatchFromJSON(schema, view);
-    return ScanTaskIteratorFromRecordBatch({batch}, std::move(options));
+
+    RecordBatchVector batches{RecordBatchFromJSON(schema, util::string_view{*buffer})};
+    return std::make_shared<InMemoryFragment>(std::move(schema), std::move(batches))
+        ->Scan(std::move(options));
   }
 
   Result<std::shared_ptr<FileWriter>> MakeWriter(
@@ -910,13 +890,10 @@ struct ArithmeticDatasetFixture {
   static std::shared_ptr<Schema> schema() {
     return ::arrow::schema({
         field("i64", int64()),
-        // ARROW-1644: Parquet can't write complex level
-        // field("struct", struct_({
-        //                     // ARROW-2587: Parquet can't write struct with more
-        //                     // than one field.
-        //                     // field("i32", int32()),
-        //                     field("str", utf8()),
-        //                 })),
+        field("struct", struct_({
+                            field("i32", int32()),
+                            field("str", utf8()),
+                        })),
         field("u8", uint8()),
         field("list", list(int32())),
         field("bool", boolean()),
@@ -933,12 +910,12 @@ struct ArithmeticDatasetFixture {
 
     ss << "{";
     ss << "\"i64\": " << n << ", ";
-    // ss << "\"struct\": {";
-    // {
-    //   // ss << "\"i32\": " << n_i32 << ", ";
-    //   ss << "\"str\": \"" << std::to_string(n) << "\"";
-    // }
-    // ss << "}, ";
+    ss << "\"struct\": {";
+    {
+      ss << "\"i32\": " << n_i32 << ", ";
+      ss << R"("str": ")" << std::to_string(n) << "\"";
+    }
+    ss << "}, ";
     ss << "\"u8\": " << static_cast<int32_t>(n) << ", ";
     ss << "\"list\": [" << n_i32 << ", " << n_i32 << "], ";
     ss << "\"bool\": " << (static_cast<bool>(n % 2) ? "true" : "false");
@@ -1052,7 +1029,7 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin {
     ASSERT_OK_AND_ASSIGN(dataset_, factory->Finish());
 
     scan_options_ = std::make_shared<ScanOptions>();
-    scan_options_->dataset_schema = source_schema_;
+    scan_options_->dataset_schema = dataset_->schema();
     ASSERT_OK(SetProjection(scan_options_.get(), source_schema_->field_names()));
   }
 
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 8c2ac376d1e..8d1c16e0ed6 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -69,10 +69,12 @@ class PrettyPrinter {
 };
 
 void PrettyPrinter::OpenArray(const Array& array) {
-  Indent();
+  if (!options_.skip_new_lines) {
+    Indent();
+  }
   (*sink_) << "[";
   if (array.length() > 0) {
-    (*sink_) << "\n";
+    Newline();
     indent_ += options_.indent_size;
   }
 }
@@ -103,7 +105,6 @@ void PrettyPrinter::Newline() {
     return;
   }
   (*sink_) << "\n";
-  Indent();
 }
 
 void PrettyPrinter::Indent() {
@@ -124,11 +125,15 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ",\n";
+        (*sink_) << ",";
+        Newline();
+      }
+      if (!options_.skip_new_lines) {
+        Indent();
       }
-      Indent();
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
-        (*sink_) << "...\n";
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
@@ -137,7 +142,7 @@ class ArrayPrinter : public PrettyPrinter {
         func(i);
       }
     }
-    (*sink_) << "\n";
+    Newline();
   }
 
   Status WriteDataValues(const BooleanArray& array) {
@@ -239,11 +244,13 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ",\n";
+        (*sink_) << ",";
+        Newline();
       }
       if ((i >= options_.window) && (i < (array.length() - options_.window))) {
         Indent();
-        (*sink_) << "...\n";
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
@@ -252,10 +259,11 @@ class ArrayPrinter : public PrettyPrinter {
       } else {
         std::shared_ptr<Array> slice =
             array.values()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*slice, {indent_, options_.window}, sink_));
+        RETURN_NOT_OK(
+            PrettyPrint(*slice, PrettyPrintOptions{indent_, options_.window}, sink_));
       }
     }
-    (*sink_) << "\n";
+    Newline();
     return Status::OK();
   }
 
@@ -265,28 +273,36 @@ class ArrayPrinter : public PrettyPrinter {
       if (skip_comma) {
         skip_comma = false;
       } else {
-        (*sink_) << ",\n";
+        (*sink_) << ",";
+        Newline();
       }
-      if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+
+      if (!options_.skip_new_lines) {
         Indent();
-        (*sink_) << "...\n";
+      }
+
+      if ((i >= options_.window) && (i < (array.length() - options_.window))) {
+        (*sink_) << "...";
+        Newline();
         i = array.length() - options_.window - 1;
         skip_comma = true;
       } else if (array.IsNull(i)) {
-        Indent();
         (*sink_) << options_.null_rep;
       } else {
-        Indent();
-        (*sink_) << "keys:\n";
+        (*sink_) << "keys:";
+        Newline();
         auto keys_slice =
             array.keys()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*keys_slice, {indent_, options_.window}, sink_));
-        (*sink_) << "\n";
+        RETURN_NOT_OK(PrettyPrint(*keys_slice,
+                                  PrettyPrintOptions{indent_, options_.window}, sink_));
+        Newline();
         Indent();
-        (*sink_) << "values:\n";
+        (*sink_) << "values:";
+        Newline();
         auto values_slice =
             array.items()->Slice(array.value_offset(i), array.value_length(i));
-        RETURN_NOT_OK(PrettyPrint(*values_slice, {indent_, options_.window}, sink_));
+        RETURN_NOT_OK(PrettyPrint(*values_slice,
+                                  PrettyPrintOptions{indent_, options_.window}, sink_));
       }
     }
     (*sink_) << "\n";
@@ -325,6 +341,7 @@ class ArrayPrinter : public PrettyPrinter {
                        int64_t length) {
     for (size_t i = 0; i < fields.size(); ++i) {
       Newline();
+      Indent();
       std::stringstream ss;
       ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
       Write(ss.str());
@@ -352,12 +369,14 @@ class ArrayPrinter : public PrettyPrinter {
     RETURN_NOT_OK(WriteValidityBitmap(array));
 
     Newline();
+    Indent();
     Write("-- type_ids: ");
     UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
     RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
 
     if (array.mode() == UnionMode::DENSE) {
       Newline();
+      Indent();
       Write("-- value_offsets: ");
       Int32Array value_offsets(
           array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
@@ -376,11 +395,13 @@ class ArrayPrinter : public PrettyPrinter {
 
   Status Visit(const DictionaryArray& array) {
     Newline();
+    Indent();
     Write("-- dictionary:\n");
     RETURN_NOT_OK(
         PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
 
     Newline();
+    Indent();
     Write("-- indices:\n");
     return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
   }
@@ -431,6 +452,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
 
   if (array.null_count() > 0) {
     Newline();
+    Indent();
     BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
                           array.offset());
     return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
@@ -470,19 +492,28 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
   for (int i = 0; i < indent; ++i) {
     (*sink) << " ";
   }
-  (*sink) << "[\n";
+  (*sink) << "[";
+  if (!options.skip_new_lines) {
+    *sink << "\n";
+  }
   bool skip_comma = true;
   for (int i = 0; i < num_chunks; ++i) {
     if (skip_comma) {
       skip_comma = false;
     } else {
-      (*sink) << ",\n";
+      (*sink) << ",";
+      if (!options.skip_new_lines) {
+        *sink << "\n";
+      }
     }
     if ((i >= window) && (i < (num_chunks - window))) {
       for (int i = 0; i < indent; ++i) {
         (*sink) << " ";
       }
-      (*sink) << "...\n";
+      (*sink) << "...";
+      if (!options.skip_new_lines) {
+        *sink << "\n";
+      }
       i = num_chunks - window - 1;
       skip_comma = true;
     } else {
@@ -492,7 +523,9 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
       RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i)));
     }
   }
-  (*sink) << "\n";
+  if (!options.skip_new_lines) {
+    *sink << "\n";
+  }
 
   for (int i = 0; i < indent; ++i) {
     (*sink) << " ";
@@ -572,6 +605,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintVerboseMetadata(const KeyValueMetadata& metadata) {
     for (int64_t i = 0; i < metadata.size(); ++i) {
       Newline();
+      Indent();
       Write(metadata.key(i) + ": '" + metadata.value(i) + "'");
     }
   }
@@ -579,6 +613,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintTruncatedMetadata(const KeyValueMetadata& metadata) {
     for (int64_t i = 0; i < metadata.size(); ++i) {
       Newline();
+      Indent();
       size_t size = metadata.value(i).size();
       size_t truncated_size = std::max<size_t>(10, 70 - metadata.key(i).size() - indent_);
       if (size <= truncated_size) {
@@ -594,6 +629,7 @@ class SchemaPrinter : public PrettyPrinter {
   void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) {
     if (metadata.size() > 0) {
       Newline();
+      Indent();
       Write(metadata_type);
       if (options_.truncate_metadata) {
         PrintTruncatedMetadata(metadata);
@@ -607,6 +643,7 @@ class SchemaPrinter : public PrettyPrinter {
     for (int i = 0; i < schema_.num_fields(); ++i) {
       if (i > 0) {
         Newline();
+        Indent();
       } else {
         Indent();
       }
@@ -631,6 +668,7 @@ Status SchemaPrinter::PrintType(const DataType& type, bool nullable) {
   }
   for (int i = 0; i < type.num_fields(); ++i) {
     Newline();
+    Indent();
 
     std::stringstream ss;
     ss << "child " << i << ", ";
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index 9d2c72c7186..1bc086a6889 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -19,6 +19,7 @@
 
 #include <iosfwd>
 #include <string>
+#include <utility>
 
 #include "arrow/util/visibility.h"
 
@@ -34,13 +35,14 @@ class Table;
 struct PrettyPrintOptions {
   PrettyPrintOptions() = default;
 
-  PrettyPrintOptions(int indent_arg, int window_arg = 10, int indent_size_arg = 2,
+  PrettyPrintOptions(int indent_arg,  // NOLINT runtime/explicit
+                     int window_arg = 10, int indent_size_arg = 2,
                      std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
                      bool truncate_metadata_arg = true)
       : indent(indent_arg),
         indent_size(indent_size_arg),
         window(window_arg),
-        null_rep(null_rep_arg),
+        null_rep(std::move(null_rep_arg)),
         skip_new_lines(skip_new_lines_arg),
         truncate_metadata(truncate_metadata_arg) {}
 
diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h
index 0172a852434..cb7437cd242 100644
--- a/cpp/src/arrow/result.h
+++ b/cpp/src/arrow/result.h
@@ -478,6 +478,11 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 ///
 /// WARNING: ARROW_ASSIGN_OR_RAISE `std::move`s its right operand. If you have
 /// an lvalue Result which you *don't* want to move out of cast appropriately.
+///
+/// WARNING: ARROW_ASSIGN_OR_RAISE is not a single expression; it will not
+/// maintain lifetimes of all temporaries in `rexpr` (e.g.
+/// `ARROW_ASSIGN_OR_RAISE(auto x, MakeTemp().GetResultRef());`
+/// will most likely segfault)!
 #define ARROW_ASSIGN_OR_RAISE(lhs, rexpr)                                              \
   ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
                              lhs, rexpr);
@@ -485,7 +490,7 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
 namespace internal {
 
 template <typename T>
-inline Status GenericToStatus(const Result<T>& res) {
+inline const Status& GenericToStatus(const Result<T>& res) {
   return res.status();
 }
 
@@ -496,9 +501,9 @@ inline Status GenericToStatus(Result<T>&& res) {
 
 }  // namespace internal
 
-template <typename T>
-Result<T> ToResult(T t) {
-  return Result<T>(std::move(t));
+template <typename T, typename R = typename EnsureResult<T>::type>
+R ToResult(T t) {
+  return R(std::move(t));
 }
 
 template <typename T>
diff --git a/cpp/src/arrow/result_test.cc b/cpp/src/arrow/result_test.cc
index b71af9d8531..cb645bc7402 100644
--- a/cpp/src/arrow/result_test.cc
+++ b/cpp/src/arrow/result_test.cc
@@ -26,6 +26,8 @@
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_compat.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 
 namespace arrow {
 
@@ -724,5 +726,74 @@ TEST(ResultTest, ViewAsStatus) {
   EXPECT_EQ(ViewAsStatus(&err), &err.status());
 }
 
+TEST(ResultTest, MatcherExamples) {
+  EXPECT_THAT(Result<int>(Status::Invalid("arbitrary error")),
+              Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(Result<int>(Status::Invalid("arbitrary error")),
+              Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary")));
+
+  // message doesn't match, so no match
+  EXPECT_THAT(
+      Result<int>(Status::Invalid("arbitrary error")),
+      testing::Not(Raises(StatusCode::Invalid, testing::HasSubstr("reasonable"))));
+
+  // different error code, so no match
+  EXPECT_THAT(Result<int>(Status::TypeError("arbitrary error")),
+              testing::Not(Raises(StatusCode::Invalid)));
+
+  // not an error, so no match
+  EXPECT_THAT(Result<int>(333), testing::Not(Raises(StatusCode::Invalid)));
+
+  EXPECT_THAT(Result<std::string>("hello world"),
+              ResultWith(testing::HasSubstr("hello")));
+
+  EXPECT_THAT(Result<std::string>(Status::Invalid("XXX")),
+              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+
+  // holds a value, but that value doesn't match the given pattern
+  EXPECT_THAT(Result<std::string>("foo bar"),
+              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+}
+
+TEST(ResultTest, MatcherDescriptions) {
+  testing::Matcher<Result<std::string>> matcher = ResultWith(testing::HasSubstr("hello"));
+
+  {
+    std::stringstream ss;
+    matcher.DescribeTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("value has substring \"hello\""));
+  }
+
+  {
+    std::stringstream ss;
+    matcher.DescribeNegationTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("value has no substring \"hello\""));
+  }
+}
+
+TEST(ResultTest, MatcherExplanations) {
+  testing::Matcher<Result<std::string>> matcher = ResultWith(testing::HasSubstr("hello"));
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_TRUE(matcher.MatchAndExplain(Result<std::string>("hello world"), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"hello world\" matches"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Result<std::string>("foo bar"), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"foo bar\" doesn't match"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Status::TypeError("XXX"), &listener));
+    EXPECT_THAT(listener.str(),
+                testing::StrEq("whose error \"Type error: XXX\" doesn't match"));
+  }
+}
+
 }  // namespace
 }  // namespace arrow
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 43879e6c6a3..056d60d6f32 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -312,7 +312,10 @@ class ARROW_MUST_USE_TYPE ARROW_EXPORT Status : public util::EqualityComparable<
   StatusCode code() const { return ok() ? StatusCode::OK : state_->code; }
 
   /// \brief Return the specific error message attached to this status.
-  std::string message() const { return ok() ? "" : state_->msg; }
+  const std::string& message() const {
+    static const std::string no_message = "";
+    return ok() ? no_message : state_->msg;
+  }
 
   /// \brief Return the status detail attached to this message.
   const std::shared_ptr<StatusDetail>& detail() const {
@@ -440,7 +443,7 @@ namespace internal {
 
 // Extract Status from Status or Result<T>
 // Useful for the status check macros such as RETURN_NOT_OK.
-inline Status GenericToStatus(const Status& st) { return st; }
+inline const Status& GenericToStatus(const Status& st) { return st; }
 inline Status GenericToStatus(Status&& st) { return std::move(st); }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/status_test.cc b/cpp/src/arrow/status_test.cc
index fc5a7ec45cf..10a79d9b990 100644
--- a/cpp/src/arrow/status_test.cc
+++ b/cpp/src/arrow/status_test.cc
@@ -17,9 +17,12 @@
 
 #include <sstream>
 
+#include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
 #include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 
 namespace arrow {
 
@@ -114,6 +117,85 @@ TEST(StatusTest, TestEquality) {
   ASSERT_NE(Status::Invalid("error"), Status::Invalid("other error"));
 }
 
+TEST(StatusTest, MatcherExamples) {
+  EXPECT_THAT(Status::Invalid("arbitrary error"), Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(Status::Invalid("arbitrary error"),
+              Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary")));
+
+  // message doesn't match, so no match
+  EXPECT_THAT(
+      Status::Invalid("arbitrary error"),
+      testing::Not(Raises(StatusCode::Invalid, testing::HasSubstr("reasonable"))));
+
+  // different error code, so no match
+  EXPECT_THAT(Status::TypeError("arbitrary error"),
+              testing::Not(Raises(StatusCode::Invalid)));
+
+  // not an error, so no match
+  EXPECT_THAT(Status::OK(), testing::Not(Raises(StatusCode::Invalid)));
+}
+
+TEST(StatusTest, MatcherDescriptions) {
+  testing::Matcher<Status> matcher = Raises(StatusCode::Invalid);
+
+  {
+    std::stringstream ss;
+    matcher.DescribeTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("raises StatusCode::Invalid"));
+  }
+
+  {
+    std::stringstream ss;
+    matcher.DescribeNegationTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("does not raise StatusCode::Invalid"));
+  }
+}
+
+TEST(StatusTest, MessageMatcherDescriptions) {
+  testing::Matcher<Status> matcher =
+      Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary"));
+
+  {
+    std::stringstream ss;
+    matcher.DescribeTo(&ss);
+    EXPECT_THAT(
+        ss.str(),
+        testing::StrEq(
+            "raises StatusCode::Invalid and message has substring \"arbitrary\""));
+  }
+
+  {
+    std::stringstream ss;
+    matcher.DescribeNegationTo(&ss);
+    EXPECT_THAT(ss.str(), testing::StrEq("does not raise StatusCode::Invalid or message "
+                                         "has no substring \"arbitrary\""));
+  }
+}
+
+TEST(StatusTest, MatcherExplanations) {
+  testing::Matcher<Status> matcher = Raises(StatusCode::Invalid);
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_TRUE(matcher.MatchAndExplain(Status::Invalid("XXX"), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"Invalid: XXX\" matches"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Status::OK(), &listener));
+    EXPECT_THAT(listener.str(), testing::StrEq("whose value \"OK\" doesn't match"));
+  }
+
+  {
+    testing::StringMatchResultListener listener;
+    EXPECT_FALSE(matcher.MatchAndExplain(Status::TypeError("XXX"), &listener));
+    EXPECT_THAT(listener.str(),
+                testing::StrEq("whose value \"Type error: XXX\" doesn't match"));
+  }
+}
+
 TEST(StatusTest, TestDetailEquality) {
   const auto status_with_detail =
       arrow::Status(StatusCode::IOError, "", std::make_shared<TestStatusDetail>());
diff --git a/cpp/src/arrow/testing/matchers.h b/cpp/src/arrow/testing/matchers.h
new file mode 100644
index 00000000000..246f321e8fa
--- /dev/null
+++ b/cpp/src/arrow/testing/matchers.h
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <gmock/gmock-matchers.h>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+
+template <typename ValueMatcher>
+class ResultMatcher {
+ public:
+  explicit ResultMatcher(ValueMatcher value_matcher)
+      : value_matcher_(std::move(value_matcher)) {}
+
+  template <typename Res,
+            typename ValueType = typename std::decay<Res>::type::ValueType>
+  operator testing::Matcher<Res>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Res&> {
+      explicit Impl(const ValueMatcher& value_matcher)
+          : value_matcher_(testing::MatcherCast<ValueType>(value_matcher)) {}
+
+      void DescribeTo(::std::ostream* os) const override {
+        *os << "value ";
+        value_matcher_.DescribeTo(os);
+      }
+
+      void DescribeNegationTo(::std::ostream* os) const override {
+        *os << "value ";
+        value_matcher_.DescribeNegationTo(os);
+      }
+
+      bool MatchAndExplain(const Res& maybe_value,
+                           testing::MatchResultListener* listener) const override {
+        if (!maybe_value.status().ok()) {
+          *listener << "whose error "
+                    << testing::PrintToString(maybe_value.status().ToString())
+                    << " doesn't match";
+          return false;
+        }
+        const ValueType& value = GetValue(maybe_value);
+        testing::StringMatchResultListener value_listener;
+        const bool match = value_matcher_.MatchAndExplain(value, &value_listener);
+        *listener << "whose value " << testing::PrintToString(value)
+                  << (match ? " matches" : " doesn't match");
+        testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
+        return match;
+      }
+
+      const testing::Matcher<ValueType> value_matcher_;
+    };
+
+    return testing::Matcher<Res>(new Impl(value_matcher_));
+  }
+
+ private:
+  template <typename T>
+  static const T& GetValue(const Result<T>& maybe_value) {
+    return maybe_value.ValueOrDie();
+  }
+
+  template <typename T>
+  static const T& GetValue(const Future<T>& value_fut) {
+    return GetValue(value_fut.result());
+  }
+
+  const ValueMatcher value_matcher_;
+};
+
+class StatusMatcher {
+ public:
+  explicit StatusMatcher(StatusCode code,
+                         util::optional<testing::Matcher<std::string>> message_matcher)
+      : code_(code), message_matcher_(std::move(message_matcher)) {}
+
+  template <typename Res>
+  operator testing::Matcher<Res>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Res&> {
+      explicit Impl(StatusCode code,
+                    util::optional<testing::Matcher<std::string>> message_matcher)
+          : code_(code), message_matcher_(std::move(message_matcher)) {}
+
+      void DescribeTo(::std::ostream* os) const override {
+        *os << "raises StatusCode::" << Status::CodeAsString(code_);
+        if (message_matcher_) {
+          *os << " and message ";
+          message_matcher_->DescribeTo(os);
+        }
+      }
+
+      void DescribeNegationTo(::std::ostream* os) const override {
+        *os << "does not raise StatusCode::" << Status::CodeAsString(code_);
+        if (message_matcher_) {
+          *os << " or message ";
+          message_matcher_->DescribeNegationTo(os);
+        }
+      }
+
+      bool MatchAndExplain(const Res& maybe_value,
+                           testing::MatchResultListener* listener) const override {
+        const Status& status = GetStatus(maybe_value);
+        testing::StringMatchResultListener value_listener;
+
+        bool match = status.code() == code_;
+        if (message_matcher_) {
+          match = match &&
+                  message_matcher_->MatchAndExplain(status.message(), &value_listener);
+        }
+
+        *listener << "whose value " << testing::PrintToString(status.ToString())
+                  << (match ? " matches" : " doesn't match");
+        testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
+        return match;
+      }
+
+      const StatusCode code_;
+      const util::optional<testing::Matcher<std::string>> message_matcher_;
+    };
+
+    return testing::Matcher<Res>(new Impl(code_, message_matcher_));
+  }
+
+ private:
+  static const Status& GetStatus(const Status& status) { return status; }
+
+  template <typename T>
+  static const Status& GetStatus(const Result<T>& maybe_value) {
+    return maybe_value.status();
+  }
+
+  template <typename T>
+  static const Status& GetStatus(const Future<T>& value_fut) {
+    return value_fut.status();
+  }
+
+  const StatusCode code_;
+  const util::optional<testing::Matcher<std::string>> message_matcher_;
+};
+
+// Returns a matcher that matches the value of a successful Result<T> or Future<T>.
+// (Future<T> will be waited upon to acquire its result for matching.)
+template <typename ValueMatcher>
+ResultMatcher<ValueMatcher> ResultWith(const ValueMatcher& value_matcher) {
+  return ResultMatcher<ValueMatcher>(value_matcher);
+}
+
+// Returns a matcher that matches the StatusCode of a Status, Result<T>, or Future<T>.
+// (Future<T> will be waited upon to acquire its result for matching.)
+inline StatusMatcher Raises(StatusCode code) {
+  return StatusMatcher(code, util::nullopt);
+}
+
+// Returns a matcher that matches the StatusCode and message of a Status, Result<T>, or
+// Future<T>. (Future<T> will be waited upon to acquire its result for matching.)
+template <typename MessageMatcher>
+StatusMatcher Raises(StatusCode code, const MessageMatcher& message_matcher) {
+  return StatusMatcher(code, testing::MatcherCast<std::string>(message_matcher));
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 65c783ce847..41914f43663 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -1195,6 +1195,10 @@ std::string FieldRef::ToString() const {
 }
 
 std::vector<FieldPath> FieldRef::FindAll(const Schema& schema) const {
+  if (auto name = this->name()) {
+    return internal::MapVector([](int i) { return FieldPath{i}; },
+                               schema.GetAllFieldIndices(*name));
+  }
   return FindAll(schema.fields());
 }
 
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 084720f9908..1ac10ad7ce8 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -259,43 +259,17 @@ class MappingGenerator {
 /// Note: Errors returned from the `map` function will be propagated
 ///
 /// If the source generator is async-reentrant then this generator will be also
-template <typename T, typename V>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
-                                      std::function<Result<V>(const T&)> map) {
-  std::function<Future<V>(const T&)> future_map = [map](const T& val) -> Future<V> {
-    return Future<V>::MakeFinished(map(val));
-  };
-  return MappingGenerator<T, V>(std::move(source_generator), std::move(future_map));
-}
-template <typename T, typename V>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
-                                      std::function<V(const T&)> map) {
-  std::function<Future<V>(const T&)> maybe_future_map = [map](const T& val) -> Future<V> {
-    return Future<V>::MakeFinished(map(val));
-  };
-  return MappingGenerator<T, V>(std::move(source_generator), std::move(maybe_future_map));
-}
-template <typename T, typename V>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator,
-                                      std::function<Future<V>(const T&)> map) {
-  return MappingGenerator<T, V>(std::move(source_generator), std::move(map));
-}
-
-template <typename V, typename T, typename MapFunc>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFunc map) {
+template <typename T, typename MapFn,
+          typename Mapped = detail::result_of_t<MapFn(const T&)>,
+          typename V = typename EnsureFuture<Mapped>::type::ValueType>
+AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map) {
   struct MapCallback {
-    MapFunc map;
+    MapFn map_;
 
-    Future<V> operator()(const T& val) { return EnsureFuture(map(val)); }
-
-    Future<V> EnsureFuture(Result<V> val) {
-      return Future<V>::MakeFinished(std::move(val));
-    }
-    Future<V> EnsureFuture(V val) { return Future<V>::MakeFinished(std::move(val)); }
-    Future<V> EnsureFuture(Future<V> val) { return val; }
+    Future<V> operator()(const T& val) { return ToFuture(map_(val)); }
   };
-  std::function<Future<V>(const T&)> map_fn = MapCallback{map};
-  return MappingGenerator<T, V>(std::move(source_generator), map_fn);
+
+  return MappingGenerator<T, V>(std::move(source_generator), MapCallback{std::move(map)});
 }
 
 /// \see MakeSequencingGenerator
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 14b528ade5e..29c8d73ab6c 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -21,6 +21,7 @@
 #include <random>
 #include <thread>
 #include <unordered_set>
+#include <utility>
 
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
@@ -51,7 +52,7 @@ AsyncGenerator<T> FailsAt(AsyncGenerator<T> src, int failing_index) {
 
 template <typename T>
 AsyncGenerator<T> SlowdownABit(AsyncGenerator<T> source) {
-  return MakeMappedGenerator<T, T>(std::move(source), [](const T& res) -> Future<T> {
+  return MakeMappedGenerator(std::move(source), [](const T& res) {
     return SleepABitAsync().Then([res]() { return res; });
   });
 }
@@ -88,8 +89,7 @@ std::function<Future<TestInt>()> BackgroundAsyncVectorIt(
   auto slow_iterator = PossiblySlowVectorIt(v, sleep);
   EXPECT_OK_AND_ASSIGN(
       auto background,
-      MakeBackgroundGenerator<TestInt>(std::move(slow_iterator),
-                                       internal::GetCpuThreadPool(), max_q, q_restart));
+      MakeBackgroundGenerator<TestInt>(std::move(slow_iterator), pool, max_q, q_restart));
   return MakeTransferredGenerator(background, pool);
 }
 
@@ -106,8 +106,7 @@ std::function<Future<TestInt>()> NewBackgroundAsyncVectorIt(std::vector<TestInt>
       });
 
   EXPECT_OK_AND_ASSIGN(auto background,
-                       MakeBackgroundGenerator<TestInt>(std::move(slow_iterator),
-                                                        internal::GetCpuThreadPool()));
+                       MakeBackgroundGenerator<TestInt>(std::move(slow_iterator), pool));
   return MakeTransferredGenerator(background, pool);
 }
 
@@ -176,7 +175,8 @@ class ReentrantChecker {
 template <typename T>
 class ReentrantCheckerGuard {
  public:
-  explicit ReentrantCheckerGuard(ReentrantChecker<T> checker) : checker_(checker) {}
+  explicit ReentrantCheckerGuard(ReentrantChecker<T> checker)
+      : checker_(std::move(checker)) {}
 
   ARROW_DISALLOW_COPY_AND_ASSIGN(ReentrantCheckerGuard);
   ReentrantCheckerGuard(ReentrantCheckerGuard&& other) : checker_(other.checker_) {
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index d08c598a32b..c7c5ba802f9 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -36,6 +36,9 @@
 
 namespace arrow {
 
+template <typename>
+struct EnsureFuture;
+
 namespace detail {
 
 template <typename>
@@ -976,4 +979,28 @@ Future<BreakValueType> Loop(Iterate iterate) {
   return break_fut;
 }
 
+inline Future<> ToFuture(Status status) {
+  return Future<>::MakeFinished(std::move(status));
+}
+
+template <typename T>
+Future<T> ToFuture(T value) {
+  return Future<T>::MakeFinished(std::move(value));
+}
+
+template <typename T>
+Future<T> ToFuture(Result<T> maybe_value) {
+  return Future<T>::MakeFinished(std::move(maybe_value));
+}
+
+template <typename T>
+Future<T> ToFuture(Future<T> fut) {
+  return std::move(fut);
+}
+
+template <typename T>
+struct EnsureFuture {
+  using type = decltype(ToFuture(std::declval<T>()));
+};
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc
index 33796a05bb1..b25d77c48cd 100644
--- a/cpp/src/arrow/util/future_test.cc
+++ b/cpp/src/arrow/util/future_test.cc
@@ -36,6 +36,7 @@
 #include "arrow/testing/executor_util.h"
 #include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/thread_pool.h"
 
@@ -1704,5 +1705,45 @@ TEST(FnOnceTest, MoveOnlyDataType) {
   ASSERT_EQ(i0.moves, 0);
   ASSERT_EQ(i1.moves, 0);
 }
+
+TEST(FutureTest, MatcherExamples) {
+  EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+              Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+              Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary")));
+
+  // message doesn't match, so no match
+  EXPECT_THAT(
+      Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+      testing::Not(Raises(StatusCode::Invalid, testing::HasSubstr("reasonable"))));
+
+  // different error code, so no match
+  EXPECT_THAT(Future<int>::MakeFinished(Status::TypeError("arbitrary error")),
+              testing::Not(Raises(StatusCode::Invalid)));
+
+  // not an error, so no match
+  EXPECT_THAT(Future<int>::MakeFinished(333), testing::Not(Raises(StatusCode::Invalid)));
+
+  EXPECT_THAT(Future<std::string>::MakeFinished("hello world"),
+              ResultWith(testing::HasSubstr("hello")));
+
+  // Matcher waits on Futures
+  auto string_fut = Future<std::string>::Make();
+  auto finisher = std::thread([&] {
+    SleepABit();
+    string_fut.MarkFinished("hello world");
+  });
+  EXPECT_THAT(string_fut, ResultWith(testing::HasSubstr("hello")));
+  finisher.join();
+
+  EXPECT_THAT(Future<std::string>::MakeFinished(Status::Invalid("XXX")),
+              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+
+  // holds a value, but that value doesn't match the given pattern
+  EXPECT_THAT(Future<std::string>::MakeFinished("foo bar"),
+              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index 672839b67d5..758295d01ed 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -321,13 +321,20 @@ void ThreadPool::CollectFinishedWorkersUnlocked() {
   state_->finished_workers_.clear();
 }
 
+thread_local ThreadPool* current_thread_pool_ = nullptr;
+
+bool ThreadPool::OwnsThisThread() { return current_thread_pool_ == this; }
+
 void ThreadPool::LaunchWorkersUnlocked(int threads) {
   std::shared_ptr<State> state = sp_state_;
 
   for (int i = 0; i < threads; i++) {
     state_->workers_.emplace_back();
     auto it = --(state_->workers_.end());
-    *it = std::thread([state, it] { WorkerLoop(state, it); });
+    *it = std::thread([this, state, it] {
+      current_thread_pool_ = this;
+      WorkerLoop(state, it);
+    });
   }
 }
 
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index d012aa02010..febbc997852 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -179,6 +179,10 @@ class ARROW_EXPORT Executor {
   // concurrently).  This may be an approximate number.
   virtual int GetCapacity() = 0;
 
+  // Return true if the thread from which this function is called is owned by this
+  // Executor. Returns false if this Executor does not support this property.
+  virtual bool OwnsThisThread() { return false; }
+
  protected:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Executor);
 
@@ -298,6 +302,8 @@ class ARROW_EXPORT ThreadPool : public Executor {
   // match this value.
   int GetCapacity() override;
 
+  bool OwnsThisThread() override;
+
   // Return the number of tasks either running or in the queue.
   int GetNumTasks();
 
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 2cfb4c62613..399c755a8f9 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -395,6 +395,23 @@ TEST_F(TestThreadPool, StressSpawn) {
   SpawnAdds(pool.get(), 1000, task_add<int>);
 }
 
+TEST_F(TestThreadPool, OwnsCurrentThread) {
+  auto pool = this->MakeThreadPool(30);
+  std::atomic<bool> one_failed{false};
+
+  for (int i = 0; i < 1000; ++i) {
+    ASSERT_OK(pool->Spawn([&] {
+      if (pool->OwnsThisThread()) return;
+
+      one_failed = true;
+    }));
+  }
+
+  ASSERT_OK(pool->Shutdown());
+  ASSERT_FALSE(pool->OwnsThisThread());
+  ASSERT_FALSE(one_failed);
+}
+
 TEST_F(TestThreadPool, StressSpawnThreaded) {
   auto pool = this->MakeThreadPool(30);
   SpawnAddsThreaded(pool.get(), 20, 100, task_add<int>);
diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index 3ef0074aa9d..041bdb424a7 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -84,27 +84,49 @@ std::vector<T> FilterVector(std::vector<T> values, Predicate&& predicate) {
   return values;
 }
 
-/// \brief Like MapVector, but where the function can fail.
-template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
-          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
-Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& src) {
+template <typename Fn, typename From,
+          typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
+std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) {
   std::vector<To> out;
-  out.reserve(src.size());
-  ARROW_RETURN_NOT_OK(MaybeTransform(src.begin(), src.end(), std::back_inserter(out),
-                                     std::forward<Fn>(map)));
-  return std::move(out);
+  out.reserve(source.size());
+  std::transform(source.begin(), source.end(), std::back_inserter(out),
+                 std::forward<Fn>(map));
+  return out;
 }
 
 template <typename Fn, typename From,
           typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
-std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) {
+std::vector<To> MapVector(Fn&& map, std::vector<From>&& source) {
   std::vector<To> out;
   out.reserve(source.size());
-  std::transform(source.begin(), source.end(), std::back_inserter(out),
+  std::transform(std::make_move_iterator(source.begin()),
+                 std::make_move_iterator(source.end()), std::back_inserter(out),
                  std::forward<Fn>(map));
   return out;
 }
 
+/// \brief Like MapVector, but where the function can fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, std::vector<From>&& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(std::make_move_iterator(source.begin()),
+                                     std::make_move_iterator(source.end()),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
 template <typename T>
 std::vector<T> FlattenVectors(const std::vector<std::vector<T>>& vecs) {
   std::size_t sum = 0;
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index bd93da9cb18..e7e8341c9d4 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -3001,7 +3001,7 @@ def _get_partition_keys(Expression partition_expression):
         pair[CFieldRef, CDatum] ref_val
 
     out = {}
-    for ref_val in GetResultValue(CExtractKnownFieldValues(expr)):
+    for ref_val in GetResultValue(CExtractKnownFieldValues(expr)).map:
         assert ref_val.first.name() != nullptr
         assert ref_val.second.kind() == DatumType_SCALAR
         val = pyarrow_wrap_scalar(ref_val.second.scalar())
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 8cab5536647..f9349f3a642 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -32,6 +32,26 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         pass
 
 
+cdef extern from * namespace "arrow::compute":
+    # inlined from expression_internal.h to avoid
+    # proliferation of #include <unordered_map>
+    """
+    #include <unordered_map>
+
+    #include "arrow/type.h"
+    #include "arrow/datum.h"
+
+    namespace arrow {
+    namespace compute {
+    struct KnownFieldValues {
+      std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
+    };
+    } //  namespace compute
+    } //  namespace arrow
+    """
+    cdef struct CKnownFieldValues "arrow::compute::KnownFieldValues":
+        unordered_map[CFieldRef, CDatum, CFieldRefHash] map
+
 cdef extern from "arrow/compute/exec/expression.h" \
         namespace "arrow::compute" nogil:
 
@@ -57,7 +77,7 @@ cdef extern from "arrow/compute/exec/expression.h" \
     cdef CResult[CExpression] CDeserializeExpression \
         "arrow::compute::Deserialize"(shared_ptr[CBuffer])
 
-    cdef CResult[unordered_map[CFieldRef, CDatum, CFieldRefHash]] \
+    cdef CResult[CKnownFieldValues] \
         CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"(
             const CExpression& partition_expression)
 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 24c1a1343ea..7bb1e639e05 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -70,9 +70,9 @@ const char* r6_class_name<ds::FileFormat>::get(
 // [[dataset::export]]
 std::shared_ptr<ds::ScannerBuilder> dataset___Dataset__NewScan(
     const std::shared_ptr<ds::Dataset>& ds) {
-  auto options = std::make_shared<ds::ScanOptions>();
-  options->pool = gc_memory_pool();
-  return ValueOrStop(ds->NewScan(std::move(options)));
+  auto builder = ValueOrStop(ds->NewScan());
+  StopIfNotOk(builder->Pool(gc_memory_pool()));
+  return builder;
 }
 
 // [[dataset::export]]

From 83651ac97de0d3129cb413092cb67f1de476e8a7 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 1 Jul 2021 16:24:44 +0200
Subject: [PATCH 488/719] ARROW-13223: [C++] Fix Thread Sanitizer test failures

Also ensure that the llvm-symbolizer path is correctly set, for useful tracebacks.

Closes #10632 from pitrou/ARROW-13223-tsan-failures

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/scripts/cpp_build.sh                    |  5 ++++
 cpp/cmake_modules/SetupCxxFlags.cmake      |  7 +++--
 cpp/src/arrow/compute/exec/expression.cc   | 13 +++++----
 cpp/src/arrow/compute/exec/expression.h    |  3 +++
 cpp/src/arrow/util/async_generator.h       | 31 ++++++++++++++++++----
 cpp/src/arrow/util/async_generator_test.cc |  5 ++--
 dev/tasks/tasks.yml                        |  3 +++
 docker-compose.yml                         |  2 ++
 8 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index d47a6696e8f..46845d0e623 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -41,6 +41,11 @@ if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
     ccache -s
 fi
 
+if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then
+    echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}"
+    exit 1
+fi
+
 mkdir -p ${build_dir}
 pushd ${build_dir}
 
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 354461cbd27..f12f071642b 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -260,7 +260,6 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
-    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand")
   elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
@@ -342,7 +341,11 @@ if(MSVC)
   # Disable "switch statement contains 'default' but no 'case' labels" warning
   # (required for protobuf, see https://github.com/protocolbuffers/protobuf/issues/6885)
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4065")
+
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  # Avoid error when an unknown warning flag is passed
+  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
+
   if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0" OR CMAKE_CXX_COMPILER_VERSION
                                                        VERSION_GREATER "7.0")
     # Without this, gcc >= 7 warns related to changes in C++17
@@ -383,7 +386,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qunused-arguments")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")
 
-  # Avoid clang error when an unknown warning flag is passed
+  # Avoid error when an unknown warning flag is passed
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
   # Add colors when paired with ninja
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index 022584d5b39..bc9a9103f6d 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -28,7 +28,6 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
-#include "arrow/util/atomic_shared_ptr.h"
 #include "arrow/util/hash_util.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
@@ -43,11 +42,15 @@ using internal::checked_pointer_cast;
 
 namespace compute {
 
-Expression::Expression(Call call) {
-  call.hash = std::hash<std::string>{}(call.function_name);
-  for (const auto& arg : call.arguments) {
-    arrow::internal::hash_combine(call.hash, arg.hash());
+void Expression::Call::ComputeHash() {
+  hash = std::hash<std::string>{}(function_name);
+  for (const auto& arg : arguments) {
+    arrow::internal::hash_combine(hash, arg.hash());
   }
+}
+
+Expression::Expression(Call call) {
+  call.ComputeHash();
   impl_ = std::make_shared<Impl>(std::move(call));
 }
 
diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
index d06a923bb32..3810accf70a 100644
--- a/cpp/src/arrow/compute/exec/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -43,6 +43,7 @@ class ARROW_EXPORT Expression {
     std::string function_name;
     std::vector<Expression> arguments;
     std::shared_ptr<FunctionOptions> options;
+    // Cached hash value
     size_t hash;
 
     // post-Bind properties:
@@ -50,6 +51,8 @@ class ARROW_EXPORT Expression {
     const Kernel* kernel = NULLPTR;
     std::shared_ptr<KernelState> kernel_state;
     ValueDescr descr;
+
+    void ComputeHash();
   };
 
   std::string ToString() const;
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 1ac10ad7ce8..8c130c66193 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -17,7 +17,9 @@
 
 #pragma once
 
+#include <atomic>
 #include <cassert>
+#include <cstring>
 #include <deque>
 #include <queue>
 #include <thread>
@@ -1253,7 +1255,9 @@ class BackgroundGenerator {
           it(std::move(it)),
           reading(false),
           finished(false),
-          should_shutdown(false) {}
+          should_shutdown(false) {
+      SetWorkerThreadId({});  // default-initialized thread id
+    }
 
     void ClearQueue() {
       while (!queue.empty()) {
@@ -1312,11 +1316,28 @@ class BackgroundGenerator {
       return next;
     }
 
+    void SetWorkerThreadId(const std::thread::id tid) {
+      uint64_t equiv{0};
+      // std::thread::id is trivially copyable as per C++ spec,
+      // so type punning as a uint64_t should work
+      static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
+                    "std::thread::id can't fit into uint64_t");
+      memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
+      worker_thread_id.store(equiv);
+    }
+
+    std::thread::id GetWorkerThreadId() {
+      const auto equiv = worker_thread_id.load();
+      std::thread::id tid;
+      memcpy(reinterpret_cast<void*>(&tid), &equiv, sizeof(tid));
+      return tid;
+    }
+
     internal::Executor* io_executor;
     const int max_q;
     const int q_restart;
     Iterator<T> it;
-    std::thread::id worker_thread_id;
+    std::atomic<uint64_t> worker_thread_id;
 
     // If true, the task is actively pumping items from the queue and does not need a
     // restart
@@ -1344,7 +1365,7 @@ class BackgroundGenerator {
       ///
       /// It's a deadlock if we enter cleanup from
       /// the worker thread but it can happen if the consumer doesn't transfer away
-      assert(state->worker_thread_id != std::this_thread::get_id());
+      assert(state->GetWorkerThreadId() != std::this_thread::get_id());
       Future<> finish_fut;
       {
         auto lock = state->mutex.Lock();
@@ -1365,7 +1386,7 @@ class BackgroundGenerator {
   static void WorkerTask(std::shared_ptr<State> state) {
     // We need to capture the state to read while outside the mutex
     bool reading = true;
-    state->worker_thread_id = std::this_thread::get_id();
+    state->SetWorkerThreadId(std::this_thread::get_id());
     while (reading) {
       auto next = state->it.Next();
       // Need to capture state->waiting_future inside the mutex to mark finished outside
@@ -1417,7 +1438,7 @@ class BackgroundGenerator {
       // reference it.  We can safely transition to idle now.
       task_finished = state->task_finished;
       state->task_finished = Future<>();
-      state->worker_thread_id = std::thread::id();
+      state->SetWorkerThreadId({});  // default-initialized thread id
     }
     task_finished.MarkFinished();
   }
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 29c8d73ab6c..87c1737228e 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <atomic>
 #include <chrono>
 #include <condition_variable>
 #include <mutex>
@@ -68,14 +69,14 @@ class TrackingGenerator {
     return state_->source();
   }
 
-  int num_read() { return state_->num_read; }
+  int num_read() { return state_->num_read.load(); }
 
  private:
   struct State {
     explicit State(AsyncGenerator<T> source) : source(std::move(source)), num_read(0) {}
 
     AsyncGenerator<T> source;
-    int num_read;
+    std::atomic<int> num_read;
   };
 
   std::shared_ptr<State> state_;
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 34307bb2583..2df24e3aada 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -824,7 +824,10 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       env:
+        # clang-tools and llvm version need to be synchronized so as
+        # to have the right llvm-symbolizer version
         CLANG_TOOLS: 11
+        LLVM: 11
         UBUNTU: 20.04
       image: ubuntu-cpp-thread-sanitizer
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 79618a1cfed..c872ad42af6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -365,6 +365,7 @@ services:
       <<: *ccache
       CC: clang-${CLANG_TOOLS}
       CXX: clang++-${CLANG_TOOLS}
+      ARROW_BUILD_STATIC: "OFF"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_FUZZING: "ON"  # Check fuzz regressions
       ARROW_JEMALLOC: "OFF"
@@ -399,6 +400,7 @@ services:
       <<: *ccache
       CC: clang-${CLANG_TOOLS}
       CXX: clang++-${CLANG_TOOLS}
+      ARROW_BUILD_STATIC: "OFF"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_DATASET: "ON"
       ARROW_JEMALLOC: "OFF"

From 24bff1e302068541a28ea5a1c85395d5ede95b80 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Thu, 1 Jul 2021 17:35:53 +0200
Subject: [PATCH 489/719] ARROW-13228: [C++] S3 CreateBucket fails because AWS
 treats us-east-1 differently than other regions

Added special case for us-east-1 in CreateBucket.

Note: I'm not sure how to go about testing this.  I don't think minio is going to have the same quirk.

Closes #10637 from westonpace/bugfix/ARROW-13228--c-s3-createbucket-fails-because-aws-treats-us-

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 39ce58ecaf6..cee05647dab 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1492,9 +1492,14 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
   Status CreateBucket(const std::string& bucket) {
     S3Model::CreateBucketConfiguration config;
     S3Model::CreateBucketRequest req;
-    config.SetLocationConstraint(
-        S3Model::BucketLocationConstraintMapper::GetBucketLocationConstraintForName(
-            ToAwsString(options().region)));
+    auto _region = region();
+    // AWS S3 treats the us-east-1 differently than other regions
+    // https://docs.aws.amazon.com/cli/latest/reference/s3api/create-bucket.html
+    if (_region != "us-east-1") {
+      config.SetLocationConstraint(
+          S3Model::BucketLocationConstraintMapper::GetBucketLocationConstraintForName(
+              ToAwsString(_region)));
+    }
     req.SetBucket(ToAwsString(bucket));
     req.SetCreateBucketConfiguration(config);
 

From 2a0654046d286614f770e1eed08adf7ea928b4ec Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 1 Jul 2021 17:37:33 +0200
Subject: [PATCH 490/719] ARROW-13234: [C++] Put extra padding spaces on the
 right

Closes #10639 from lidavidm/arrow-13234

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/kernels/scalar_string.cc      | 12 ++++++------
 cpp/src/arrow/compute/kernels/scalar_string_test.cc |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 7f596f22224..ab0a490eeb3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -2845,9 +2845,9 @@ struct AsciiPadTransform : public StringTransformBase {
     int64_t left = 0;
     int64_t right = 0;
     if (PadLeft && PadRight) {
-      // If odd number of spaces, put the extra space on the left
-      right = spaces / 2;
-      left = spaces - right;
+      // If odd number of spaces, put the extra space on the right
+      left = spaces / 2;
+      right = spaces - left;
     } else if (PadLeft) {
       left = spaces;
     } else if (PadRight) {
@@ -2901,9 +2901,9 @@ struct Utf8PadTransform : public StringTransformBase {
     int64_t left = 0;
     int64_t right = 0;
     if (PadLeft && PadRight) {
-      // If odd number of spaces, put the extra space on the left
-      right = spaces / 2;
-      left = spaces - right;
+      // If odd number of spaces, put the extra space on the right
+      left = spaces / 2;
+      right = spaces - left;
     } else if (PadLeft) {
       left = spaces;
     } else if (PadRight) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index e88da14c288..67f1e02558b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1230,7 +1230,7 @@ TYPED_TEST(TestStringKernels, PadUTF8) {
   PadOptions options{/*width=*/5, "\xe2\x80\x88"};
   this->CheckUnary(
       "utf8_center", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
-      R"([null, "\u2008\u2008a\u2008\u2008", "\u2008\u2008bb\u2008", "\u2008b\u00E1r\u2008", "foobar"])",
+      R"([null, "\u2008\u2008a\u2008\u2008", "\u2008bb\u2008\u2008", "\u2008b\u00E1r\u2008", "foobar"])",
       &options);
   this->CheckUnary(
       "utf8_lpad", R"([null, "a", "bb", "b\u00E1r", "foobar"])", this->type(),
@@ -1401,7 +1401,7 @@ TYPED_TEST(TestStringKernels, SliceCodeunitsNegPos) {
 TYPED_TEST(TestStringKernels, PadAscii) {
   PadOptions options{/*width=*/5, " "};
   this->CheckUnary("ascii_center", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
-                   R"([null, "  a  ", "  bb ", " bar ", "foobar"])", &options);
+                   R"([null, "  a  ", " bb  ", " bar ", "foobar"])", &options);
   this->CheckUnary("ascii_lpad", R"([null, "a", "bb", "bar", "foobar"])", this->type(),
                    R"([null, "    a", "   bb", "  bar", "foobar"])", &options);
   this->CheckUnary("ascii_rpad", R"([null, "a", "bb", "bar", "foobar"])", this->type(),

From 5600536843338f0761fefe95b2c1818ba40b8a3a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 1 Jul 2021 17:51:59 +0200
Subject: [PATCH 491/719] ARROW-10316: [Python] Improve introspection of
 compute function options

Generate a signature for compute functions that better reflects the accepted arguments.

Example before:
```python
>>> pc.sum?
Signature: pc.sum(array, *, options=None, memory_pool=None, **kwargs)
Docstring:
Compute the sum of a numeric array.
[...]
```

Same example after:
```python
>>> ?pc.sum
Signature:
pc.sum(
    array,
    *,
    memory_pool=None,
    options=None,
    skip_nulls=True,
    min_count=1,
)
Docstring:
Compute the sum of a numeric array.
[...]
```

One caveat is that the individual options are not explicitly documented (yet):
```
Parameters
----------
array : Array-like
    Argument to compute function
memory_pool : pyarrow.MemoryPool, optional
    If not passed, will allocate memory from the default memory pool.
options : pyarrow.compute.ScalarAggregateOptions, optional
    Parameters altering compute function semantics
**kwargs : optional
    Parameters for ScalarAggregateOptions constructor. Either `options`
    or `**kwargs` can be passed, but not both at the same time.
```

Closes #10581 from pitrou/ARROW-10316-wrapped-compute-func

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/compute.py            | 78 ++++++++++++++--------------
 python/pyarrow/tests/test_compute.py | 20 ++++++-
 2 files changed, 58 insertions(+), 40 deletions(-)

diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index eadcdaa44a8..fbe9e2c5c0f 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -63,6 +63,7 @@
     list_functions,
 )
 
+import inspect
 from textwrap import dedent
 import warnings
 
@@ -70,20 +71,12 @@
 
 
 def _get_arg_names(func):
-    arg_names = func._doc.arg_names
-    if not arg_names:
-        if func.arity == 1:
-            arg_names = ["arg"]
-        elif func.arity == 2:
-            arg_names = ["left", "right"]
-        else:
-            raise NotImplementedError(
-                f"unsupported arity: {func.arity} (function: {func.name})")
-
-    return arg_names
+    return func._doc.arg_names
 
 
 def _decorate_compute_function(wrapper, exposed_name, func, option_class):
+    # Decorate the given compute function wrapper with useful metadata
+    # and documentation.
     wrapper.__arrow_compute_function__ = dict(name=func.name,
                                               arity=func.arity)
     wrapper.__name__ = exposed_name
@@ -174,41 +167,50 @@ def _handle_options(name, option_class, options, kwargs):
     return options
 
 
-_wrapper_template = dedent("""\
-    def make_wrapper(func, option_class):
-        def {func_name}({args_sig}{kwonly}, memory_pool=None):
-            return func.call([{args_sig}], None, memory_pool)
-        return {func_name}
-    """)
-
-_wrapper_options_template = dedent("""\
-    def make_wrapper(func, option_class):
-        def {func_name}({args_sig}{kwonly}, options=None, memory_pool=None,
-                        **kwargs):
-            options = _handle_options({func_name!r}, option_class, options,
+def _make_generic_wrapper(func_name, func, option_class):
+    if option_class is None:
+        def wrapper(*args, memory_pool=None):
+            return func.call(args, None, memory_pool)
+    else:
+        def wrapper(*args, memory_pool=None, options=None, **kwargs):
+            options = _handle_options(func_name, option_class, options,
                                       kwargs)
-            return func.call([{args_sig}], options, memory_pool)
-        return {func_name}
-    """)
+            return func.call(args, options, memory_pool)
+    return wrapper
+
+
+def _make_signature(arg_names, var_arg_names, option_class):
+    from inspect import Parameter
+    params = []
+    for name in arg_names:
+        params.append(Parameter(name, Parameter.POSITIONAL_OR_KEYWORD))
+    for name in var_arg_names:
+        params.append(Parameter(name, Parameter.VAR_POSITIONAL))
+    params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
+                            default=None))
+    if option_class is not None:
+        params.append(Parameter("options", Parameter.KEYWORD_ONLY,
+                                default=None))
+        options_sig = inspect.signature(option_class)
+        for p in options_sig.parameters.values():
+            # XXX for now, our generic wrappers don't allow positional
+            # option arguments
+            params.append(p.replace(kind=Parameter.KEYWORD_ONLY))
+    return inspect.Signature(params)
 
 
 def _wrap_function(name, func):
     option_class = _get_options_class(func)
     arg_names = _get_arg_names(func)
-    args_sig = ', '.join(arg_names)
-    kwonly = '' if arg_names[-1].startswith('*') else ', *'
-
-    # Generate templated wrapper, so that the signature matches
-    # the documented argument names.
-    ns = {}
-    if option_class is not None:
-        template = _wrapper_options_template
+    has_vararg = arg_names and arg_names[-1].startswith('*')
+    if has_vararg:
+        var_arg_names = [arg_names.pop().lstrip('*')]
     else:
-        template = _wrapper_template
-    exec(template.format(func_name=name, args_sig=args_sig, kwonly=kwonly),
-         globals(), ns)
-    wrapper = ns['make_wrapper'](func, option_class)
+        var_arg_names = []
 
+    wrapper = _make_generic_wrapper(name, func, option_class)
+    wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
+                                            option_class)
     return _decorate_compute_function(wrapper, name, func, option_class)
 
 
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 264da5805e1..6370a5d94e2 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -537,8 +537,8 @@ def test_min_max():
 
     # Missing argument
     with pytest.raises(
-            TypeError,
-            match=r"min_max\(\) missing 1 required positional argument"):
+            ValueError,
+            match=r"Function min_max accepts 1 argument"):
         s = pc.min_max()
 
 
@@ -616,6 +616,22 @@ def test_generated_docstrings():
         """)
 
 
+def test_generated_signatures():
+    # The self-documentation provided by signatures should show acceptable
+    # options and their default values.
+    sig = inspect.signature(pc.add)
+    assert str(sig) == "(x, y, *, memory_pool=None)"
+    sig = inspect.signature(pc.min_max)
+    assert str(sig) == ("(array, *, memory_pool=None, "
+                        "options=None, skip_nulls=True, min_count=1)")
+    sig = inspect.signature(pc.quantile)
+    assert str(sig) == ("(array, *, memory_pool=None, "
+                        "options=None, q=0.5, interpolation='linear')")
+    sig = inspect.signature(pc.binary_join_element_wise)
+    assert str(sig) == ("(*strings, memory_pool=None, options=None, "
+                        "null_handling='emit_null', null_replacement='')")
+
+
 # We use isprintable to find about codepoints that Python doesn't know, but
 # utf8proc does (or in a future version of Python the other way around).
 # These codepoints cannot be compared between Arrow and the Python

From 782d6546c41c0d58a03aff072331910ff6e69e28 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 1 Jul 2021 18:36:59 +0200
Subject: [PATCH 492/719] ARROW-13235: [C++][Python] Simplify mapping of
 function options

Also fixes ArithmeticOptions being unbound.

Closes #10640 from lidavidm/arrow-13235

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/api_aggregate.h  | 12 ++++-----
 cpp/src/arrow/compute/api_scalar.cc    |  7 +++++
 cpp/src/arrow/compute/api_scalar.h     | 36 +++++++++++++-------------
 cpp/src/arrow/compute/api_vector.h     | 12 ++++-----
 cpp/src/arrow/compute/cast.h           |  2 +-
 cpp/src/arrow/compute/function_test.cc |  2 ++
 python/pyarrow/_compute.pyx            | 34 +++---------------------
 7 files changed, 43 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 9be0b406aa4..7b6e2ef96de 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -46,7 +46,7 @@ class ExecContext;
 class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
  public:
   explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1);
-  constexpr static char const kTypeName[] = "scalar_aggregate";
+  constexpr static char const kTypeName[] = "ScalarAggregateOptions";
   static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
 
   bool skip_nulls;
@@ -60,7 +60,7 @@ class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
 class ARROW_EXPORT ModeOptions : public FunctionOptions {
  public:
   explicit ModeOptions(int64_t n = 1);
-  constexpr static char const kTypeName[] = "mode";
+  constexpr static char const kTypeName[] = "ModeOptions";
   static ModeOptions Defaults() { return ModeOptions{}; }
 
   int64_t n = 1;
@@ -73,7 +73,7 @@ class ARROW_EXPORT ModeOptions : public FunctionOptions {
 class ARROW_EXPORT VarianceOptions : public FunctionOptions {
  public:
   explicit VarianceOptions(int ddof = 0);
-  constexpr static char const kTypeName[] = "variance";
+  constexpr static char const kTypeName[] = "VarianceOptions";
   static VarianceOptions Defaults() { return VarianceOptions{}; }
 
   int ddof = 0;
@@ -98,7 +98,7 @@ class ARROW_EXPORT QuantileOptions : public FunctionOptions {
   explicit QuantileOptions(std::vector<double> q,
                            enum Interpolation interpolation = LINEAR);
 
-  constexpr static char const kTypeName[] = "quantile";
+  constexpr static char const kTypeName[] = "QuantileOptions";
   static QuantileOptions Defaults() { return QuantileOptions{}; }
 
   /// quantile must be between 0 and 1 inclusive
@@ -115,7 +115,7 @@ class ARROW_EXPORT TDigestOptions : public FunctionOptions {
                           uint32_t buffer_size = 500);
   explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100,
                           uint32_t buffer_size = 500);
-  constexpr static char const kTypeName[] = "t_digest";
+  constexpr static char const kTypeName[] = "TDigestOptions";
   static TDigestOptions Defaults() { return TDigestOptions{}; }
 
   /// quantile must be between 0 and 1 inclusive
@@ -132,7 +132,7 @@ class ARROW_EXPORT IndexOptions : public FunctionOptions {
   explicit IndexOptions(std::shared_ptr<Scalar> value);
   // Default constructor for serialization
   IndexOptions();
-  constexpr static char const kTypeName[] = "index";
+  constexpr static char const kTypeName[] = "IndexOptions";
 
   std::shared_ptr<Scalar> value;
 };
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 11b5b45b7a0..2021c8a30c6 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -110,6 +110,8 @@ using ::arrow::internal::checked_cast;
 namespace internal {
 namespace {
 using ::arrow::internal::DataMember;
+static auto kArithmeticOptionsType = GetFunctionOptionsType<ArithmeticOptions>(
+    DataMember("check_overflow", &ArithmeticOptions::check_overflow));
 static auto kElementWiseAggregateOptionsType =
     GetFunctionOptionsType<ElementWiseAggregateOptions>(
         DataMember("skip_nulls", &ElementWiseAggregateOptions::skip_nulls));
@@ -159,6 +161,10 @@ static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
 }  // namespace
 }  // namespace internal
 
+ArithmeticOptions::ArithmeticOptions(bool check_overflow)
+    : FunctionOptions(internal::kArithmeticOptionsType), check_overflow(check_overflow) {}
+constexpr char ArithmeticOptions::kTypeName[];
+
 ElementWiseAggregateOptions::ElementWiseAggregateOptions(bool skip_nulls)
     : FunctionOptions(internal::kElementWiseAggregateOptionsType),
       skip_nulls(skip_nulls) {}
@@ -274,6 +280,7 @@ constexpr char ProjectOptions::kTypeName[];
 
 namespace internal {
 void RegisterScalarOptions(FunctionRegistry* registry) {
+  DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kElementWiseAggregateOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kJoinOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kMatchSubstringOptionsType));
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index bacb287d6bc..89b4faca940 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -37,17 +37,17 @@ namespace compute {
 ///
 /// @{
 
-struct ARROW_EXPORT ArithmeticOptions {
+class ARROW_EXPORT ArithmeticOptions : public FunctionOptions {
  public:
-  explicit ArithmeticOptions(bool check_overflow = false)
-      : check_overflow(check_overflow) {}
+  explicit ArithmeticOptions(bool check_overflow = false);
+  constexpr static char const kTypeName[] = "ArithmeticOptions";
   bool check_overflow;
 };
 
 class ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
  public:
   explicit ElementWiseAggregateOptions(bool skip_nulls = true);
-  constexpr static char const kTypeName[] = "element_wise_aggregate";
+  constexpr static char const kTypeName[] = "ElementWiseAggregateOptions";
   static ElementWiseAggregateOptions Defaults() { return ElementWiseAggregateOptions{}; }
 
   bool skip_nulls;
@@ -67,7 +67,7 @@ class ARROW_EXPORT JoinOptions : public FunctionOptions {
   };
   explicit JoinOptions(NullHandlingBehavior null_handling = EMIT_NULL,
                        std::string null_replacement = "");
-  constexpr static char const kTypeName[] = "join";
+  constexpr static char const kTypeName[] = "JoinOptions";
   static JoinOptions Defaults() { return JoinOptions(); }
   NullHandlingBehavior null_handling;
   std::string null_replacement;
@@ -77,7 +77,7 @@ class ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
  public:
   explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false);
   MatchSubstringOptions();
-  constexpr static char const kTypeName[] = "match_substring";
+  constexpr static char const kTypeName[] = "MatchSubstringOptions";
 
   /// The exact substring (or regex, depending on kernel) to look for inside input values.
   std::string pattern;
@@ -88,7 +88,7 @@ class ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
 class ARROW_EXPORT SplitOptions : public FunctionOptions {
  public:
   explicit SplitOptions(int64_t max_splits = -1, bool reverse = false);
-  constexpr static char const kTypeName[] = "split";
+  constexpr static char const kTypeName[] = "SplitOptions";
 
   /// Maximum number of splits allowed, or unlimited when -1
   int64_t max_splits;
@@ -101,7 +101,7 @@ class ARROW_EXPORT SplitPatternOptions : public FunctionOptions {
   explicit SplitPatternOptions(std::string pattern, int64_t max_splits = -1,
                                bool reverse = false);
   SplitPatternOptions();
-  constexpr static char const kTypeName[] = "split_pattern";
+  constexpr static char const kTypeName[] = "SplitPatternOptions";
 
   /// The exact substring to split on.
   std::string pattern;
@@ -115,7 +115,7 @@ class ARROW_EXPORT ReplaceSliceOptions : public FunctionOptions {
  public:
   explicit ReplaceSliceOptions(int64_t start, int64_t stop, std::string replacement);
   ReplaceSliceOptions();
-  constexpr static char const kTypeName[] = "replace_slice";
+  constexpr static char const kTypeName[] = "ReplaceSliceOptions";
 
   /// Index to start slicing at
   int64_t start;
@@ -130,7 +130,7 @@ class ARROW_EXPORT ReplaceSubstringOptions : public FunctionOptions {
   explicit ReplaceSubstringOptions(std::string pattern, std::string replacement,
                                    int64_t max_replacements = -1);
   ReplaceSubstringOptions();
-  constexpr static char const kTypeName[] = "replace_substring";
+  constexpr static char const kTypeName[] = "ReplaceSubstringOptions";
 
   /// Pattern to match, literal, or regular expression depending on which kernel is used
   std::string pattern;
@@ -144,7 +144,7 @@ class ARROW_EXPORT ExtractRegexOptions : public FunctionOptions {
  public:
   explicit ExtractRegexOptions(std::string pattern);
   ExtractRegexOptions();
-  constexpr static char const kTypeName[] = "extract_regex";
+  constexpr static char const kTypeName[] = "ExtractRegexOptions";
 
   /// Regular expression with named capture fields
   std::string pattern;
@@ -155,7 +155,7 @@ class ARROW_EXPORT SetLookupOptions : public FunctionOptions {
  public:
   explicit SetLookupOptions(Datum value_set, bool skip_nulls = false);
   SetLookupOptions();
-  constexpr static char const kTypeName[] = "set_lookup";
+  constexpr static char const kTypeName[] = "SetLookupOptions";
 
   /// The set of values to look up input values into.
   Datum value_set;
@@ -172,7 +172,7 @@ class ARROW_EXPORT StrptimeOptions : public FunctionOptions {
  public:
   explicit StrptimeOptions(std::string format, TimeUnit::type unit);
   StrptimeOptions();
-  constexpr static char const kTypeName[] = "strptime";
+  constexpr static char const kTypeName[] = "StrptimeOptions";
 
   std::string format;
   TimeUnit::type unit;
@@ -182,7 +182,7 @@ class ARROW_EXPORT PadOptions : public FunctionOptions {
  public:
   explicit PadOptions(int64_t width, std::string padding = " ");
   PadOptions();
-  constexpr static char const kTypeName[] = "pad";
+  constexpr static char const kTypeName[] = "PadOptions";
 
   /// The desired string length.
   int64_t width;
@@ -194,7 +194,7 @@ class ARROW_EXPORT TrimOptions : public FunctionOptions {
  public:
   explicit TrimOptions(std::string characters);
   TrimOptions();
-  constexpr static char const kTypeName[] = "trim";
+  constexpr static char const kTypeName[] = "TrimOptions";
 
   /// The individual characters that can be trimmed from the string.
   std::string characters;
@@ -205,7 +205,7 @@ class ARROW_EXPORT SliceOptions : public FunctionOptions {
   explicit SliceOptions(int64_t start, int64_t stop = std::numeric_limits<int64_t>::max(),
                         int64_t step = 1);
   SliceOptions();
-  constexpr static char const kTypeName[] = "slice";
+  constexpr static char const kTypeName[] = "SliceOptions";
   int64_t start, stop, step;
 };
 
@@ -222,7 +222,7 @@ class ARROW_EXPORT CompareOptions : public FunctionOptions {
  public:
   explicit CompareOptions(CompareOperator op);
   CompareOptions();
-  constexpr static char const kTypeName[] = "compare";
+  constexpr static char const kTypeName[] = "CompareOptions";
   enum CompareOperator op;
 };
 
@@ -232,7 +232,7 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions {
                  std::vector<std::shared_ptr<const KeyValueMetadata>> m);
   explicit ProjectOptions(std::vector<std::string> n);
   ProjectOptions();
-  constexpr static char const kTypeName[] = "project";
+  constexpr static char const kTypeName[] = "ProjectOptions";
 
   /// Names for wrapped columns
   std::vector<std::string> field_names;
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 2282b0098f9..6021492320e 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -43,7 +43,7 @@ class ARROW_EXPORT FilterOptions : public FunctionOptions {
   };
 
   explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
-  constexpr static char const kTypeName[] = "filter";
+  constexpr static char const kTypeName[] = "FilterOptions";
   static FilterOptions Defaults() { return FilterOptions(); }
 
   NullSelectionBehavior null_selection_behavior = DROP;
@@ -52,7 +52,7 @@ class ARROW_EXPORT FilterOptions : public FunctionOptions {
 class ARROW_EXPORT TakeOptions : public FunctionOptions {
  public:
   explicit TakeOptions(bool boundscheck = true);
-  constexpr static char const kTypeName[] = "take";
+  constexpr static char const kTypeName[] = "TakeOptions";
   static TakeOptions BoundsCheck() { return TakeOptions(true); }
   static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
   static TakeOptions Defaults() { return BoundsCheck(); }
@@ -72,7 +72,7 @@ class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
   };
 
   explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
-  constexpr static char const kTypeName[] = "dictionary_encode";
+  constexpr static char const kTypeName[] = "DictionaryEncodeOptions";
   static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }
 
   NullEncodingBehavior null_encoding_behavior = MASK;
@@ -104,7 +104,7 @@ class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
 class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
  public:
   explicit ArraySortOptions(SortOrder order = SortOrder::Ascending);
-  constexpr static char const kTypeName[] = "array_sort";
+  constexpr static char const kTypeName[] = "ArraySortOptions";
   static ArraySortOptions Defaults() { return ArraySortOptions{}; }
 
   SortOrder order;
@@ -113,7 +113,7 @@ class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
 class ARROW_EXPORT SortOptions : public FunctionOptions {
  public:
   explicit SortOptions(std::vector<SortKey> sort_keys = {});
-  constexpr static char const kTypeName[] = "sort";
+  constexpr static char const kTypeName[] = "SortOptions";
   static SortOptions Defaults() { return SortOptions{}; }
 
   std::vector<SortKey> sort_keys;
@@ -124,7 +124,7 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
  public:
   explicit PartitionNthOptions(int64_t pivot);
   PartitionNthOptions() : PartitionNthOptions(0) {}
-  constexpr static char const kTypeName[] = "partition_nth";
+  constexpr static char const kTypeName[] = "PartitionNthOptions";
 
   /// The index into the equivalent sorted array of the partition pivot element.
   int64_t pivot;
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 8abd2a71bca..131f57f892f 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -45,7 +45,7 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
  public:
   explicit CastOptions(bool safe = true);
 
-  constexpr static char const kTypeName[] = "cast";
+  constexpr static char const kTypeName[] = "CastOptions";
   static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
     CastOptions safe(true);
     safe.to_type = std::move(to_type);
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index 4c42ce39600..bbe514af09a 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -53,6 +53,8 @@ TEST(FunctionOptions, Equality) {
   options.emplace_back(new IndexOptions(ScalarFromJSON(int64(), "16")));
   options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "true")));
   options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "null")));
+  options.emplace_back(new ArithmeticOptions());
+  options.emplace_back(new ArithmeticOptions(/*check_overflow=*/true));
   options.emplace_back(new ElementWiseAggregateOptions());
   options.emplace_back(new ElementWiseAggregateOptions(/*skip_nulls=*/false));
   options.emplace_back(new JoinOptions());
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index c8393103dc5..63e6fffc782 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -549,38 +549,10 @@ cdef class FunctionOptions(_Weakrefable):
             unique_ptr[CFunctionOptions] c_options
         c_options = move(GetResultValue(move(maybe_options)))
         type_name = frombytes(c_options.get().options_type().type_name())
-        mapping = {
-            "array_sort": ArraySortOptions,
-            "cast": CastOptions,
-            "dictionary_encode": DictionaryEncodeOptions,
-            "element_wise_aggregate": ElementWiseAggregateOptions,
-            "extract_regex": ExtractRegexOptions,
-            "filter": FilterOptions,
-            "index": IndexOptions,
-            "join": JoinOptions,
-            "match_substring": MatchSubstringOptions,
-            "mode": ModeOptions,
-            "pad": PadOptions,
-            "partition_nth": PartitionNthOptions,
-            "project": ProjectOptions,
-            "quantile": QuantileOptions,
-            "replace_slice": ReplaceSliceOptions,
-            "replace_substring": ReplaceSubstringOptions,
-            "set_lookup": SetLookupOptions,
-            "scalar_aggregate": ScalarAggregateOptions,
-            "slice": SliceOptions,
-            "sort": SortOptions,
-            "split": SplitOptions,
-            "split_pattern": SplitPatternOptions,
-            "strptime": StrptimeOptions,
-            "t_digest": TDigestOptions,
-            "take": TakeOptions,
-            "trim": TrimOptions,
-            "variance": VarianceOptions,
-        }
-        if type_name not in mapping:
+        module = globals()
+        if type_name not in module:
             raise ValueError(f"Cannot deserialize '{type_name}'")
-        klass = mapping[type_name]
+        klass = module[type_name]
         options = klass.__new__(klass)
         (<FunctionOptions> options).init(move(c_options))
         return options

From fec99a37233acdab4a885959b8497f7bead41e01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 2 Jul 2021 12:29:38 +0200
Subject: [PATCH 493/719] ARROW-6513: [CI] Rename conda requirements files to
 have txt extension instead of yml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10572 from kszucs/ARROW-6513

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/python.yml                              | 6 ------
 ci/appveyor-cpp-setup.bat                                 | 6 +++---
 ci/{conda_env_archery.yml => conda_env_archery.txt}       | 0
 ci/{conda_env_cpp.yml => conda_env_cpp.txt}               | 1 +
 ci/{conda_env_gandiva.yml => conda_env_gandiva.txt}       | 0
 ...onda_env_gandiva_win.yml => conda_env_gandiva_win.txt} | 0
 ci/{conda_env_python.yml => conda_env_python.txt}         | 0
 ci/{conda_env_r.yml => conda_env_r.txt}                   | 0
 ci/{conda_env_sphinx.yml => conda_env_sphinx.txt}         | 0
 ci/{conda_env_unix.yml => conda_env_unix.txt}             | 0
 ci/docker/conda-cpp.dockerfile                            | 8 ++++----
 ci/docker/conda-integration.dockerfile                    | 5 ++---
 ci/docker/conda-python.dockerfile                         | 4 ++--
 ci/docker/conda.dockerfile                                | 4 ++--
 14 files changed, 14 insertions(+), 20 deletions(-)
 rename ci/{conda_env_archery.yml => conda_env_archery.txt} (100%)
 rename ci/{conda_env_cpp.yml => conda_env_cpp.txt} (93%)
 rename ci/{conda_env_gandiva.yml => conda_env_gandiva.txt} (100%)
 rename ci/{conda_env_gandiva_win.yml => conda_env_gandiva_win.txt} (100%)
 rename ci/{conda_env_python.yml => conda_env_python.txt} (100%)
 rename ci/{conda_env_r.yml => conda_env_r.txt} (100%)
 rename ci/{conda_env_sphinx.yml => conda_env_sphinx.txt} (100%)
 rename ci/{conda_env_unix.yml => conda_env_unix.txt} (100%)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index c6781a4b149..59b14dc3287 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -140,12 +140,6 @@ jobs:
       - name: Fetch Submodules and Tags
         shell: bash
         run: ci/scripts/util_checkout.sh
-      - name: Show available RAM size
-        shell: bash
-        run: |
-          hwmemsize=$(sysctl -n hw.memsize)
-          ramsize=$(expr $hwmemsize / $((1024**3)))
-          echo "System Memory: ${ramsize} GB"
       - name: Install Dependencies
         shell: bash
         run: |
diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat
index 261b1183f5b..4f790f05f00 100644
--- a/ci/appveyor-cpp-setup.bat
+++ b/ci/appveyor-cpp-setup.bat
@@ -50,16 +50,16 @@ set CONDA_PACKAGES=
 
 if "%ARROW_BUILD_GANDIVA%" == "ON" (
   @rem Install llvmdev in the toolchain if building gandiva.dll
-  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.yml
+  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.txt
 )
 if "%JOB%" == "Toolchain" (
   @rem Install pre-built "toolchain" packages for faster builds
-  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.yml
+  set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt
 )
 if "%JOB%" NEQ "Build_Debug" (
   @rem Arrow conda environment is only required for the Build and Toolchain jobs
   conda create -n arrow -q -y -c conda-forge ^
-    --file=ci\conda_env_python.yml ^
+    --file=ci\conda_env_python.txt ^
     %CONDA_PACKAGES%  ^
     "cmake=3.17" ^
     "ninja" ^
diff --git a/ci/conda_env_archery.yml b/ci/conda_env_archery.txt
similarity index 100%
rename from ci/conda_env_archery.yml
rename to ci/conda_env_archery.txt
diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.txt
similarity index 93%
rename from ci/conda_env_cpp.yml
rename to ci/conda_env_cpp.txt
index 1a8ae6d7edc..def40deb98a 100644
--- a/ci/conda_env_cpp.yml
+++ b/ci/conda_env_cpp.txt
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# workaround for https://issues.apache.org/jira/browse/ARROW-13134
 aws-sdk-cpp<1.9
 benchmark=1.5.2
 boost-cpp>=1.68.0
diff --git a/ci/conda_env_gandiva.yml b/ci/conda_env_gandiva.txt
similarity index 100%
rename from ci/conda_env_gandiva.yml
rename to ci/conda_env_gandiva.txt
diff --git a/ci/conda_env_gandiva_win.yml b/ci/conda_env_gandiva_win.txt
similarity index 100%
rename from ci/conda_env_gandiva_win.yml
rename to ci/conda_env_gandiva_win.txt
diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.txt
similarity index 100%
rename from ci/conda_env_python.yml
rename to ci/conda_env_python.txt
diff --git a/ci/conda_env_r.yml b/ci/conda_env_r.txt
similarity index 100%
rename from ci/conda_env_r.yml
rename to ci/conda_env_r.txt
diff --git a/ci/conda_env_sphinx.yml b/ci/conda_env_sphinx.txt
similarity index 100%
rename from ci/conda_env_sphinx.yml
rename to ci/conda_env_sphinx.txt
diff --git a/ci/conda_env_unix.yml b/ci/conda_env_unix.txt
similarity index 100%
rename from ci/conda_env_unix.yml
rename to ci/conda_env_unix.txt
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index 660ad821655..ff31930c06c 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -20,12 +20,12 @@ ARG arch
 FROM ${repo}:${arch}-conda
 
 # install the required conda packages into the test environment
-COPY ci/conda_env_cpp.yml \
-     ci/conda_env_gandiva.yml \
+COPY ci/conda_env_cpp.txt \
+     ci/conda_env_gandiva.txt \
      /arrow/ci/
 RUN conda install \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_gandiva.yml \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_gandiva.txt \
         compilers \
         doxygen \
         valgrind && \
diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile
index 1f2c9ac5da2..8a7dd48b947 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -26,10 +26,9 @@ ARG jdk=8
 ARG go=1.15
 
 # Install Archery and integration dependencies
-COPY ci/conda_env_archery.yml /arrow/ci/
+COPY ci/conda_env_archery.txt /arrow/ci/
 RUN conda install -q \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_archery.yml \
+        --file arrow/ci/conda_env_archery.txt \
         numpy \
         compilers \
         maven=${maven} \
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index a7e76974825..ab3f77be1b6 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -21,9 +21,9 @@ FROM ${repo}:${arch}-conda-cpp
 
 # install python specific packages
 ARG python=3.6
-COPY ci/conda_env_python.yml /arrow/ci/
+COPY ci/conda_env_python.txt /arrow/ci/
 RUN conda install -q \
-        --file arrow/ci/conda_env_python.yml \
+        --file arrow/ci/conda_env_python.txt \
         $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \
         python=${python} \
         nomkl && \
diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile
index 3ea393d3fa9..2e773b5437e 100644
--- a/ci/docker/conda.dockerfile
+++ b/ci/docker/conda.dockerfile
@@ -38,8 +38,8 @@ RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix}
 RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix}
 
 # create a conda environment
-ADD ci/conda_env_unix.yml /arrow/ci/
-RUN conda create -n arrow --file arrow/ci/conda_env_unix.yml git && \
+ADD ci/conda_env_unix.txt /arrow/ci/
+RUN conda create -n arrow --file arrow/ci/conda_env_unix.txt git && \
     conda clean --all
 
 # activate the created environment by default

From cc0006711814e6eb1c486876b54591bd172ef06d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 2 Jul 2021 13:27:27 +0200
Subject: [PATCH 494/719] ARROW-13249: [Java][CI] Consistent timeout in the
 Java JNI build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The JNI build gets stopped due to build timeout. Seems like the docker cache isn't valid anymore so it must build the docker image as well, but doesn't have the opportunity to push at the and of the build.

Closes #10631 from kszucs/jni-build-timeout

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/java_jni.yml | 10 +---------
 docker-compose.yml             |  4 ++--
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 7d7e26c75fe..48351f3c22a 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -50,15 +50,7 @@ jobs:
     name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset)
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 40
-    strategy:
-      fail-fast: false
-      matrix:
-        jdk: [8]
-        maven: [3.5.2]
-    env:
-      JDK: ${{ matrix.jdk }}
-      MAVEN: ${{ matrix.maven }}
+    timeout-minutes: 90
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v2
diff --git a/docker-compose.yml b/docker-compose.yml
index c872ad42af6..f65609af7e1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1223,12 +1223,12 @@ services:
     #   docker-compose build debian-java
     #   docker-compose build debian-java-jni
     #   docker-compose run debian-java-jni
-    image: ${REPO}:${ARCH}-debian-9-java-jni
+    image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
     build:
       context: .
       dockerfile: ci/docker/linux-apt-jni.dockerfile
       cache_from:
-        - ${REPO}:${ARCH}-debian-9-java-jni
+        - ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
       args:
         base: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}
         llvm: ${LLVM}

From f85daeca7ff774ba455281d1ff3ecaced54347a4 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 2 Jul 2021 13:32:45 +0200
Subject: [PATCH 495/719] ARROW-13236: [Python] Include options class name in
 repr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10641 from lidavidm/arrow-13236

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 python/pyarrow/_compute.pyx          | 5 ++++-
 python/pyarrow/includes/libarrow.pxd | 1 +
 python/pyarrow/tests/test_compute.py | 5 ++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 63e6fffc782..02855ee78aa 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -558,7 +558,10 @@ cdef class FunctionOptions(_Weakrefable):
         return options
 
     def __repr__(self):
-        return frombytes(self.get_options().ToString())
+        type_name = self.__class__.__name__
+        # Remove {} so we can use our own braces
+        string_repr = frombytes(self.get_options().ToString())[1:-1]
+        return f"{type_name}({string_repr})"
 
     def __eq__(self, FunctionOptions other):
         return self.get_options().Equals(deref(other.get_options()))
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 07983b79f40..5057eadbb43 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1756,6 +1756,7 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     cdef cppclass CFunctionOptions" arrow::compute::FunctionOptions":
         const CFunctionOptionsType* options_type() const
+        const char* type_name() const
         c_bool Equals(const CFunctionOptions& other)
         c_string ToString()
         CResult[shared_ptr[CBuffer]] Serialize() const
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 6370a5d94e2..35b37d82f95 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -137,7 +137,7 @@ def test_option_class_equality():
                 pytest.fail(f"Options class is not tested: {cls}")
     for option in options:
         assert option == option
-        assert repr(option)
+        assert repr(option).startswith(option.__class__.__name__)
         buf = option.serialize()
         deserialized = pc.FunctionOptions.deserialize(buf)
         assert option == deserialized
@@ -145,6 +145,9 @@ def test_option_class_equality():
     for option1, option2 in zip(options, options[1:]):
         assert option1 != option2
 
+    assert repr(pc.IndexOptions(pa.scalar(1))) == "IndexOptions(value=int64:1)"
+    assert repr(pc.ArraySortOptions()) == "ArraySortOptions(order=Ascending)"
+
 
 def test_list_functions():
     assert len(pc.list_functions()) > 10

From d7a8b468ab64d4318ae62ab90251830acbb9b88d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Fri, 2 Jul 2021 13:35:16 +0200
Subject: [PATCH 496/719] ARROW-13210: [Python][CI] Fix vcpkg caching mechanism
 for the macOS wheels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With [configuration](https://github.com/ursacomputing/crossbow/blob/master/.github/workflows/cache_vcpkg.yml) on crossbow's main branch. Posting the results once the build are finished.

Closes #10635 from kszucs/gha-vcpkg-cache

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/install_vcpkg.sh            | 39 ++++++++++++++++++++++
 dev/tasks/python-wheels/github.osx.yml | 46 +++++++++-----------------
 dev/tasks/tasks.yml                    |  1 +
 3 files changed, 55 insertions(+), 31 deletions(-)
 create mode 100755 ci/scripts/install_vcpkg.sh

diff --git a/ci/scripts/install_vcpkg.sh b/ci/scripts/install_vcpkg.sh
new file mode 100755
index 00000000000..fe99a7fea2f
--- /dev/null
+++ b/ci/scripts/install_vcpkg.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <vcpkg version> <target directory>"
+  exit 1
+fi
+
+vcpkg_version=$1
+vcpkg_destination=$2
+vcpkg_patch=$(realpath $(dirname "${0}")/../vcpkg/ports.patch)
+
+git clone --depth 1 --branch ${vcpkg_version} https://github.com/microsoft/vcpkg ${vcpkg_destination}
+
+pushd ${vcpkg_destination}
+
+./bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics
+git apply --ignore-whitespace ${vcpkg_patch}
+echo "Patch successfully applied!"
+
+popd
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index 84b094b84d1..a2e5c0af21b 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -30,7 +30,7 @@ env:
   VCPKG_DEFAULT_TRIPLET: x64-osx-static-release
   VCPKG_FEATURE_FLAGS: "-manifests"
   VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
-  ARROW_VCPKG_PATCH_FILE: {{ "${{ github.workspace }}/arrow/ci/vcpkg/ports.patch" }}
+  VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
 
 jobs:
   build:
@@ -40,34 +40,20 @@ jobs:
       {{ macros.github_checkout_arrow()|indent }}
 
       - name: Install System Dependencies
-        run: brew install bison ninja
-
-      # Restore from cache the previously built ports.
-      # If cache-miss, download and build vcpkg (aka "bootstrap vcpkg").
-      - name: Restore from Cache and Install Vcpkg
-        # Download and build vcpkg, without installing any port.
-        # If content is cached already, it is a no-op.
-        uses: kszucs/run-vcpkg@main
+        run: brew install bison coreutils ninja cmake
+
+      - uses: actions/cache@v2
+        id: vcpkg-cache
         with:
-          # Required to prevent cache eviction on crossbow's main branch
-          # where we build pre-build the vcpkg packages
-          setupOnly: true
-          doNotSaveCache: true
-          appendedCacheKey: "-macos-{{ macos_deployment_target }}-2021-06-25"
-          vcpkgDirectory: {{ "${{ github.workspace }}/vcpkg" }}
-          vcpkgGitCommitId: "2021.04.30"
-
-      - name: Patch Vcpkg Ports
-        run: |
-          set -ex
-          cd $VCPKG_ROOT
-          if ! git apply --reverse --check --ignore-whitespace ${ARROW_VCPKG_PATCH_FILE}; then
-            git apply --ignore-whitespace ${ARROW_VCPKG_PATCH_FILE}
-            echo "Patch successfully applied!"
-          fi
-
-      # Now that vcpkg is installed, it is being used to run with the desired arguments.
-      - name: Install Vcpkg Dependencies
+          path: vcpkg
+          key: vcpkg-{{ macos_deployment_target }}-{{ vcpkg_version }}-{{ "${{ hashFiles('arrow/ci/vcpkg/**') }}" }}
+
+      - name: Install Vcpkg
+        if: steps.vcpkg-cache.outputs.cache-hit != 'true'
+        shell: bash
+        run: arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
+
+      - name: Install Packages
         run: |
           $VCPKG_ROOT/vcpkg install \
             abseil \
@@ -94,9 +80,7 @@ jobs:
 
       {% if arrow_s3 == "ON" %}
       - name: Install AWS SDK C++
-        run: |
-          $VCPKG_ROOT/vcpkg install \
-            aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer]
+        run: $VCPKG_ROOT/vcpkg install aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer]
       {% endif %}
 
       - name: Setup Multibuild
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 2df24e3aada..e908a632845 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -359,6 +359,7 @@ tasks:
     ci: github
     template: python-wheels/github.osx.yml
     params:
+      vcpkg_version: "2021.04.30"
       python_version: {{ python_version }}
       macos_deployment_target: {{ macos_version }}
       arrow_s3: {{ arrow_s3 }}

From 74af39d486b5a750a8099abc163935e5c8d911bf Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 5 Jul 2021 10:22:03 +0900
Subject: [PATCH 497/719] ARROW-6312: [C++] Add support for "pkg-config
 --static arrow"

Closes #10626 from kou/cpp-pc-libs-private

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt                            | 10 ++-
 cpp/cmake_modules/ThirdpartyToolchain.cmake   | 74 +++++++++++++++----
 cpp/examples/minimal_build/minimal.dockerfile |  3 +-
 cpp/examples/minimal_build/run_static.sh      | 33 ++++++++-
 .../system_dependency.dockerfile              |  1 +
 cpp/src/arrow/CMakeLists.txt                  | 17 ++++-
 cpp/src/arrow/arrow.pc.in                     |  2 +
 .../cpp/{cmake.rst => build_system.rst}       | 72 +++++++++++++++++-
 docs/source/cpp/getting_started.rst           |  2 +-
 9 files changed, 189 insertions(+), 25 deletions(-)
 rename docs/source/cpp/{cmake.rst => build_system.rst} (57%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 91b3528bf70..8a358db8b95 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -507,6 +507,11 @@ endif()
 include(BuildUtils)
 enable_testing()
 
+# For arrow.pc. Requires.private and Libs.private are used when
+# "pkg-config --libs --static arrow" is used.
+set(ARROW_PC_REQUIRES_PRIVATE)
+set(ARROW_PC_LIBS_PRIVATE)
+
 include(ThirdpartyToolchain)
 
 # Add common flags
@@ -855,8 +860,9 @@ endif()
 
 set(ARROW_SYSTEM_LINK_LIBS)
 
-if(THREADS_FOUND)
-  list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads)
+list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads)
+if(CMAKE_THREAD_LIBS_INIT)
+  string(APPEND ARROW_PC_LIBS_PRIVATE " ${CMAKE_THREAD_LIBS_INIT}")
 endif()
 
 if(WIN32)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 40f73f92129..ab2dd168a09 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -196,6 +196,7 @@ endmacro()
 macro(resolve_dependency DEPENDENCY_NAME)
   set(options)
   set(one_value_args HAVE_ALT IS_RUNTIME_DEPENDENCY REQUIRED_VERSION USE_CONFIG)
+  set(multi_value_args PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -236,6 +237,13 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM" AND ARG_IS_RUNTIME_DEPENDENCY)
     provide_find_module(${PACKAGE_NAME})
     list(APPEND ARROW_SYSTEM_DEPENDENCIES ${PACKAGE_NAME})
+    find_package(PkgConfig QUIET)
+    foreach(ARG_PC_PACKAGE_NAME ${ARG_PC_PACKAGE_NAMES})
+      pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC ${ARG_PC_PACKAGE_NAME} QUIET)
+      if(${${ARG_PC_PACKAGE_NAME}_PC_FOUND})
+        string(APPEND ARROW_PC_REQUIRES_PRIVATE " ${ARG_PC_PACKAGE_NAME}")
+      endif()
+    endforeach()
   endif()
 endmacro()
 
@@ -933,7 +941,11 @@ macro(build_snappy)
 endmacro()
 
 if(ARROW_WITH_SNAPPY)
-  resolve_dependency(Snappy)
+  resolve_dependency(Snappy PC_PACKAGE_NAMES snappy)
+  if(${Snappy_SOURCE} STREQUAL "SYSTEM" AND NOT snappy_PC_FOUND)
+    get_target_property(SNAPPY_LIB Snappy::snappy IMPORTED_LOCATION)
+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${SNAPPY_LIB}")
+  endif()
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(SNAPPY_INCLUDE_DIRS Snappy::snappy INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS})
@@ -998,7 +1010,7 @@ macro(build_brotli)
 endmacro()
 
 if(ARROW_WITH_BROTLI)
-  resolve_dependency(Brotli)
+  resolve_dependency(Brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
                       INTERFACE_INCLUDE_DIRECTORIES)
@@ -1079,9 +1091,9 @@ macro(build_glog)
   )
   set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
   set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC")
-  if(Threads::Threads)
-    set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
-    set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} -fPIC -pthread")
+  if(CMAKE_THREAD_LIBS_INIT)
+    set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_THREAD_LIBS_INIT}")
+    set(GLOG_CMAKE_C_FLAGS "${EP_C_FLAGS} ${CMAKE_THREAD_LIBS_INIT}")
   endif()
 
   if(APPLE)
@@ -1117,7 +1129,7 @@ macro(build_glog)
 endmacro()
 
 if(ARROW_USE_GLOG)
-  resolve_dependency(GLOG)
+  resolve_dependency(GLOG PC_PACKAGE_NAMES libglog)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(GLOG_INCLUDE_DIR glog::glog INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${GLOG_INCLUDE_DIR})
@@ -1292,7 +1304,11 @@ if(ARROW_WITH_THRIFT)
   # to build Boost, so don't look again if already found.
   if(NOT Thrift_FOUND AND NOT THRIFT_FOUND)
     # Thrift c++ code generated by 0.13 requires 0.11 or greater
-    resolve_dependency(Thrift REQUIRED_VERSION 0.11.0)
+    resolve_dependency(Thrift
+                       REQUIRED_VERSION
+                       0.11.0
+                       PC_PACKAGE_NAMES
+                       thrift)
   endif()
   # TODO: Don't use global includes but rather target_include_directories
   include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
@@ -1392,7 +1408,11 @@ if(ARROW_WITH_PROTOBUF)
   else()
     set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1")
   endif()
-  resolve_dependency(Protobuf REQUIRED_VERSION ${ARROW_PROTOBUF_REQUIRED_VERSION})
+  resolve_dependency(Protobuf
+                     REQUIRED_VERSION
+                     ${ARROW_PROTOBUF_REQUIRED_VERSION}
+                     PC_PACKAGE_NAMES
+                     protobuf)
 
   if(ARROW_PROTOBUF_USE_SHARED AND MSVC_TOOLCHAIN)
     add_definitions(-DPROTOBUF_USE_DLLS)
@@ -1825,7 +1845,11 @@ if(ARROW_BUILD_BENCHMARKS)
     # ci/conda_env_cpp.yml.
     set(BENCHMARK_REQUIRED_VERSION 0.0.0)
   endif()
-  resolve_dependency(benchmark REQUIRED_VERSION ${BENCHMARK_REQUIRED_VERSION})
+  resolve_dependency(benchmark
+                     REQUIRED_VERSION
+                     ${BENCHMARK_REQUIRED_VERSION}
+                     IS_RUNTIME_DEPENDENCY
+                     FALSE)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(BENCHMARK_INCLUDE_DIR benchmark::benchmark
                       INTERFACE_INCLUDE_DIRECTORIES)
@@ -1940,7 +1964,7 @@ macro(build_zlib)
 endmacro()
 
 if(ARROW_WITH_ZLIB)
-  resolve_dependency(ZLIB)
+  resolve_dependency(ZLIB PC_PACKAGE_NAMES zlib)
 
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
@@ -1996,7 +2020,7 @@ macro(build_lz4)
 endmacro()
 
 if(ARROW_WITH_LZ4)
-  resolve_dependency(Lz4)
+  resolve_dependency(Lz4 PC_PACKAGE_NAMES liblz4)
 
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
@@ -2060,7 +2084,7 @@ macro(build_zstd)
 endmacro()
 
 if(ARROW_WITH_ZSTD)
-  resolve_dependency(zstd)
+  resolve_dependency(zstd PC_PACKAGE_NAMES libzstd)
 
   if(TARGET zstd::libzstd)
     set(ARROW_ZSTD_LIBZSTD zstd::libzstd)
@@ -2120,7 +2144,14 @@ macro(build_re2)
 endmacro()
 
 if(ARROW_WITH_RE2)
+  # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
+  # include -std=c++11. It's not compatible with C source and C++
+  # source not uses C++ 11.
   resolve_dependency(re2 HAVE_ALT TRUE)
+  if(${re2_SOURCE} STREQUAL "SYSTEM")
+    get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION)
+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${RE2_LIB}")
+  endif()
   add_definitions(-DARROW_WITH_RE2)
 
   # TODO: Don't use global includes but rather target_include_directories
@@ -2169,6 +2200,9 @@ endmacro()
 
 if(ARROW_WITH_BZ2)
   resolve_dependency(BZip2)
+  if(${BZip2_SOURCE} STREQUAL "SYSTEM")
+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZIP2_LIBRARIES}")
+  endif()
 
   if(NOT TARGET BZip2::BZip2)
     add_library(BZip2::BZip2 UNKNOWN IMPORTED)
@@ -2219,7 +2253,11 @@ macro(build_utf8proc)
 endmacro()
 
 if(ARROW_WITH_UTF8PROC)
-  resolve_dependency(utf8proc REQUIRED_VERSION "2.2.0")
+  resolve_dependency(utf8proc
+                     REQUIRED_VERSION
+                     "2.2.0"
+                     PC_PACKAGE_NAMES
+                     libutf8proc)
 
   add_definitions(-DARROW_WITH_UTF8PROC)
 
@@ -2287,7 +2325,11 @@ endmacro()
 # Dependencies for Arrow Flight RPC
 
 macro(build_grpc)
-  resolve_dependency(c-ares HAVE_ALT TRUE)
+  resolve_dependency(c-ares
+                     HAVE_ALT
+                     TRUE
+                     PC_PACKAGE_NAMES
+                     libcares)
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(c-ares_INCLUDE_DIR c-ares::cares INTERFACE_INCLUDE_DIRECTORIES)
   include_directories(SYSTEM ${c-ares_INCLUDE_DIR})
@@ -2551,7 +2593,9 @@ if(ARROW_WITH_GRPC)
                      HAVE_ALT
                      TRUE
                      REQUIRED_VERSION
-                     ${ARROW_GRPC_REQUIRED_VERSION})
+                     ${ARROW_GRPC_REQUIRED_VERSION}
+                     PC_PACKAGE_NAMES
+                     grpc++)
 
   # TODO: Don't use global includes but rather target_include_directories
   get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES)
diff --git a/cpp/examples/minimal_build/minimal.dockerfile b/cpp/examples/minimal_build/minimal.dockerfile
index 95f73e9a549..9361fc5e81d 100644
--- a/cpp/examples/minimal_build/minimal.dockerfile
+++ b/cpp/examples/minimal_build/minimal.dockerfile
@@ -22,5 +22,6 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
       build-essential \
-      cmake && \
+      cmake \
+      pkg-config && \
     apt-get clean && rm -rf /var/lib/apt/lists*
diff --git a/cpp/examples/minimal_build/run_static.sh b/cpp/examples/minimal_build/run_static.sh
index 05804a0366c..ff3bb894570 100755
--- a/cpp/examples/minimal_build/run_static.sh
+++ b/cpp/examples/minimal_build/run_static.sh
@@ -67,10 +67,12 @@ popd
 
 echo
 echo "=="
+echo "== CMake:"
 echo "== Building example project using Arrow C++ library"
 echo "=="
 echo
 
+rm -rf $EXAMPLE_BUILD_DIR
 mkdir -p $EXAMPLE_BUILD_DIR
 pushd $EXAMPLE_BUILD_DIR
 
@@ -81,10 +83,39 @@ popd
 
 echo
 echo "=="
+echo "== CMake:"
 echo "== Running example project"
 echo "=="
 echo
 
 pushd $EXAMPLE_DIR
 
-${EXAMPLE_BUILD_DIR}/arrow_example
+$EXAMPLE_BUILD_DIR/arrow_example
+
+echo
+echo "=="
+echo "== pkg-config"
+echo "== Building example project using Arrow C++ library"
+echo "=="
+echo
+
+rm -rf $EXAMPLE_BUILD_DIR
+mkdir -p $EXAMPLE_BUILD_DIR
+${CXX:-c++} \
+  -o $EXAMPLE_BUILD_DIR/arrow_example \
+  $EXAMPLE_DIR/example.cc \
+  $(PKG_CONFIG_PATH=$ARROW_BUILD_DIR/lib/pkgconfig \
+     pkg-config --cflags --libs --static arrow)
+
+popd
+
+echo
+echo "=="
+echo "== pkg-config:"
+echo "== Running example project"
+echo "=="
+echo
+
+pushd $EXAMPLE_DIR
+
+$EXAMPLE_BUILD_DIR/arrow_example
diff --git a/cpp/examples/minimal_build/system_dependency.dockerfile b/cpp/examples/minimal_build/system_dependency.dockerfile
index f0b29cef990..926fcaf6f4b 100644
--- a/cpp/examples/minimal_build/system_dependency.dockerfile
+++ b/cpp/examples/minimal_build/system_dependency.dockerfile
@@ -37,6 +37,7 @@ RUN apt-get update -y -q && \
       libthrift-dev \
       libutf8proc-dev \
       libzstd-dev \
+      pkg-config \
       protobuf-compiler \
       rapidjson-dev \
       zlib1g-dev && \
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 484c3e9e769..56e734226b0 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -490,6 +490,21 @@ endif()
 
 set(ARROW_ALL_SRCS ${ARROW_SRCS} ${ARROW_C_SRCS})
 
+if(ARROW_BUILD_STATIC AND ARROW_BUNDLED_STATIC_LIBS)
+  set(ARROW_BUILD_BUNDLED_DEPENDENCIES TRUE)
+else()
+  set(ARROW_BUILD_BUNDLED_DEPENDENCIES FALSE)
+endif()
+
+if(ARROW_BUILD_BUNDLED_DEPENDENCIES)
+  string(APPEND ARROW_PC_LIBS_PRIVATE " -larrow_bundled_dependencies")
+endif()
+# Need -latomic on Raspbian.
+# See also: https://issues.apache.org/jira/browse/ARROW-12860
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
+  string(APPEND ARROW_PC_LIBS_PRIVATE " -latomic")
+endif()
+
 add_arrow_lib(arrow
               CMAKE_PACKAGE_NAME
               Arrow
@@ -536,7 +551,7 @@ if(ARROW_WITH_BACKTRACE)
   endforeach()
 endif()
 
-if(ARROW_BUILD_STATIC AND ARROW_BUNDLED_STATIC_LIBS)
+if(ARROW_BUILD_BUNDLED_DEPENDENCIES)
   arrow_car(_FIRST_LIB ${ARROW_BUNDLED_STATIC_LIBS})
   arrow_cdr(_OTHER_LIBS ${ARROW_BUNDLED_STATIC_LIBS})
   create_merged_static_lib(arrow_bundled_dependencies
diff --git a/cpp/src/arrow/arrow.pc.in b/cpp/src/arrow/arrow.pc.in
index 947d534fdbf..ef995fdc3db 100644
--- a/cpp/src/arrow/arrow.pc.in
+++ b/cpp/src/arrow/arrow.pc.in
@@ -25,5 +25,7 @@ full_so_version=@ARROW_FULL_SO_VERSION@
 Name: Apache Arrow
 Description: Arrow is a set of technologies that enable big-data systems to process and move data fast.
 Version: @ARROW_VERSION@
+Requires.private:@ARROW_PC_REQUIRES_PRIVATE@
 Libs: -L${libdir} -larrow
+Libs.private:@ARROW_PC_LIBS_PRIVATE@
 Cflags: -I${includedir}
diff --git a/docs/source/cpp/cmake.rst b/docs/source/cpp/build_system.rst
similarity index 57%
rename from docs/source/cpp/cmake.rst
rename to docs/source/cpp/build_system.rst
index f192988fc0c..c0d05e9dab3 100644
--- a/docs/source/cpp/cmake.rst
+++ b/docs/source/cpp/build_system.rst
@@ -18,6 +18,7 @@
 .. default-domain:: cpp
 .. highlight:: cpp
 
+===================================
 Using Arrow C++ in your own project
 ===================================
 
@@ -25,10 +26,16 @@ This section assumes you already have the Arrow C++ libraries on your
 system, either after installing them using a package manager or after
 :ref:`building them yourself <building-arrow-cpp>`.
 
-The recommended way to integrate the Arrow C++ libraries in your own C++
-project is to use CMake's
-`find_package <https://cmake.org/cmake/help/latest/command/find_package.html>`_
-function for locating and integrating dependencies.
+The recommended way to integrate the Arrow C++ libraries in your own
+C++ project is to use CMake's `find_package
+<https://cmake.org/cmake/help/latest/command/find_package.html>`_
+function for locating and integrating dependencies. If you don't use
+CMake as a build system, you can use `pkg-config
+<https://www.freedesktop.org/wiki/Software/pkg-config/>`_ to find
+installed the Arrow C++ libraries.
+
+CMake
+=====
 
 Basic usage
 -----------
@@ -70,3 +77,60 @@ In most cases, it is recommended to use the Arrow shared libraries.
 
 .. seealso::
    A Docker-based :doc:`minimal build example <examples/cmake_minimal_build>`.
+
+pkg-config
+==========
+
+Basic usage
+-----------
+
+You can get suitable build flags by the following command line:
+
+.. code-block:: shell
+
+   pkg-config --cflags --libs arrow
+
+If you want to link the Arrow C++ static library, you need to add
+``--static`` option:
+
+.. code-block:: shell
+
+   pkg-config --cflags --libs --static arrow
+
+This minimal ``Makefile`` file compiles a ``my_example.cc`` source
+file into an executable linked with the Arrow C++ shared library:
+
+.. code-block:: makefile
+
+   my_example: my_example.cc
+   	$(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow)
+
+Many build systems support pkg-config. For example:
+
+  * `GNU Autotools <https://people.freedesktop.org/~dbn/pkg-config-guide.html#using>`_
+  * `CMake <https://cmake.org/cmake/help/latest/module/FindPkgConfig.html>`_
+    (But you should use ``find_package(Arrow)`` instead.)
+  * `Meson <https://mesonbuild.com/Reference-manual.html#dependency>`_
+
+Available packages
+------------------
+
+The Arrow C++ provides a pkg-config package for each module. Here are
+all available packages:
+
+  * ``arrow-csv``
+  * ``arrow-cuda``
+  * ``arrow-dataset``
+  * ``arrow-filesystem``
+  * ``arrow-flight-testing``
+  * ``arrow-flight``
+  * ``arrow-json``
+  * ``arrow-orc``
+  * ``arrow-python-flight``
+  * ``arrow-python``
+  * ``arrow-tensorflow``
+  * ``arrow-testing``
+  * ``arrow``
+  * ``gandiva``
+  * ``parquet``
+  * ``plasma``
diff --git a/docs/source/cpp/getting_started.rst b/docs/source/cpp/getting_started.rst
index d6cfb177044..3c7b7f94f01 100644
--- a/docs/source/cpp/getting_started.rst
+++ b/docs/source/cpp/getting_started.rst
@@ -25,7 +25,7 @@ User Guide
 
    overview
    conventions
-   cmake
+   build_system
    memory
    arrays
    datatypes

From f0e0cfb520ef30f2ef9240b59cecb5482c3f70c7 Mon Sep 17 00:00:00 2001
From: liyafan82 <fan_li_ya@foxmail.com>
Date: Mon, 5 Jul 2021 10:11:27 +0800
Subject: [PATCH 498/719] ARROW-13147: [Java] Respect the rounding policy when
 allocating vector buffers

According to the current implementation, the default "next power of two" rounding policy is assumed when allocating buffers for a vector.

In particular, for fixed width vectors, this policy is applied for the validity and data buffers, and for variable width vectors, this policy is applied for the validity and offset buffers.

However, this default policy is not always used for the allocator. When an alternative policy is in use, the buffers allocated assuming the default policy will have inappropriate capacities, which may lead to waste of memory spaces.

Closes #10576 from liyafan82/fly_0623_pol

Authored-by: liyafan82 <fan_li_ya@foxmail.com>
Signed-off-by: liyafan82 <fan_li_ya@foxmail.com>
---
 .../apache/arrow/memory/BaseAllocator.java    |  5 ++
 .../apache/arrow/memory/BufferAllocator.java  | 10 +++
 .../apache/arrow/vector/BaseValueVector.java  |  5 +-
 .../apache/arrow/vector/TestVectorAlloc.java  | 63 +++++++++++++++++++
 4 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 246b2212e26..8d21cef7aa3 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -715,6 +715,11 @@ public static ImmutableConfig.Builder configBuilder() {
     return ImmutableConfig.builder();
   }
 
+  @Override
+  public RoundingPolicy getRoundingPolicy() {
+    return roundingPolicy;
+  }
+
   /**
    * Config class of {@link BaseAllocator}.
    */
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
index 8fbf6f7b073..e59349c6498 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
@@ -19,6 +19,9 @@
 
 import java.util.Collection;
 
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+
 /**
  * Wrapper class to deal with byte buffer allocation. Ensures users only use designated methods.
  */
@@ -225,4 +228,11 @@ BufferAllocator newChildAllocator(
    * a no-op.
    */
   void assertOpen();
+
+  /**
+   * Gets the rounding policy of the allocator.
+   */
+  default RoundingPolicy getRoundingPolicy() {
+    return DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index 0af9461c525..22fe4254ffd 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -23,7 +23,6 @@
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.ReferenceManager;
-import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 import org.apache.arrow.vector.util.TransferPair;
@@ -141,7 +140,7 @@ long computeCombinedBufferSize(int valueCount, int typeWidth) {
     } else {
       bufferSize += DataSizeRoundingUtil.roundUpTo8Multiple((long) valueCount * typeWidth);
     }
-    return CommonUtil.nextPowerOfTwo(bufferSize);
+    return allocator.getRoundingPolicy().getRoundedSize(bufferSize);
   }
 
   /**
@@ -174,7 +173,7 @@ DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWid
     if (typeWidth == 0) {
       validityBufferSize = dataBufferSize = bufferSize / 2;
     } else {
-      // Due to roundup to power-of-2 allocation, the bufferSize could be greater than the
+      // Due to the rounding policy, the bufferSize could be greater than the
       // requested size. Utilize the allocated buffer fully.;
       long actualCount = (long) ((bufferSize * 8.0) / (8 * typeWidth + 1));
       do {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
index b9e7c8661a7..dfc75ec8e34 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
@@ -18,12 +18,16 @@
 package org.apache.arrow.vector;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.util.Arrays;
 import java.util.Collections;
 
+import org.apache.arrow.memory.AllocationListener;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.Types.MinorType;
@@ -42,14 +46,19 @@
 public class TestVectorAlloc {
   private BufferAllocator rootAllocator;
 
+  private BufferAllocator policyAllocator;
+
   @Before
   public void init() {
     rootAllocator = new RootAllocator(Long.MAX_VALUE);
+    policyAllocator =
+        new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy());
   }
 
   @After
   public void terminate() throws Exception {
     rootAllocator.close();
+    policyAllocator.close();
   }
 
   private static Field field(String name, ArrowType type) {
@@ -103,4 +112,58 @@ public void testVectorAllocWithField() {
       }
     }
   }
+
+  private static final int CUSTOM_SEGMENT_SIZE = 200;
+
+  /**
+   * A custom rounding policy that rounds the size to
+   * the next multiple of 200.
+   */
+  private static class CustomPolicy implements RoundingPolicy {
+
+    @Override
+    public long getRoundedSize(long requestSize) {
+      return (requestSize + CUSTOM_SEGMENT_SIZE - 1) / CUSTOM_SEGMENT_SIZE * CUSTOM_SEGMENT_SIZE;
+    }
+  }
+
+  @Test
+  public void testFixedWidthVectorAllocation() {
+    try (IntVector vec1 = new IntVector("vec", policyAllocator);
+        IntVector vec2 = new IntVector("vec", rootAllocator)) {
+      assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+      vec1.allocateNew(50);
+      long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getDataBuffer().capacity();
+
+      // the total capacity must be a multiple of the segment size
+      assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+      assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+      vec2.allocateNew(50);
+      totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getDataBuffer().capacity();
+
+      // the total capacity must be a power of two
+      assertEquals(totalCapacity & (totalCapacity - 1), 0);
+    }
+  }
+
+  @Test
+  public void testVariableWidthVectorAllocation() {
+    try (VarCharVector vec1 = new VarCharVector("vec", policyAllocator);
+         VarCharVector vec2 = new VarCharVector("vec", rootAllocator)) {
+      assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+      vec1.allocateNew(50);
+      long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getOffsetBuffer().capacity();
+
+      // the total capacity must be a multiple of the segment size
+      assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+      assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+      vec2.allocateNew(50);
+      totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getOffsetBuffer().capacity();
+
+      // the total capacity must be a power of two
+      assertEquals(totalCapacity & (totalCapacity - 1), 0);
+    }
+  }
 }

From dbb5b428ec980b03840c722a08b25e4b928a4d5b Mon Sep 17 00:00:00 2001
From: liyafan82 <fan_li_ya@foxmail.com>
Date: Mon, 5 Jul 2021 10:15:16 +0800
Subject: [PATCH 499/719] ARROW-13194: [Java][Document] Create prose document
 about Java algorithms

See https://issues.apache.org/jira/browse/ARROW-13194

Closes #10617 from liyafan82/fly_0628_doc

Authored-by: liyafan82 <fan_li_ya@foxmail.com>
Signed-off-by: liyafan82 <fan_li_ya@foxmail.com>
---
 docs/source/java/algorithm.rst | 93 ++++++++++++++++++++++++++++++++++
 docs/source/java/index.rst     |  1 +
 2 files changed, 94 insertions(+)
 create mode 100644 docs/source/java/algorithm.rst

diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst
new file mode 100644
index 00000000000..b0a889459ac
--- /dev/null
+++ b/docs/source/java/algorithm.rst
@@ -0,0 +1,93 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+===========
+Java Algorithms
+===========
+
+Arrow's Java library provides algorithms for some commonly-used
+functionalities. The algorithms are provided in the ``org.apache.arrow.algorithm``
+package of the ``algorithm`` module. 
+
+Comparing Vector Elements
+=================
+
+Comparing vector elements is the basic for many algorithms. Vector 
+elements can be compared in one of the two ways:
+
+1. **Equality comparison**: there are two possible results for this type of comparisons: ``equal`` and ``unequal``.
+Currently, this type of comparison is supported through the ``org.apache.arrow.vector.compare.VectorValueEqualizer``
+interface.
+
+2. **Ordering comparison**: there are three possible results for this type of comparisons: ``less than``, ``equal to ``
+and ``greater than``. This comparison is supported by the abstract class ``org.apache.arrow.algorithm.sort.VectorValueComparator``.
+
+We provide default implementations to compare vector elements. However, users can also define ways
+for customized comparisons. 
+
+Vector Element Search
+====================
+
+A search algorithm tries to find a particular value in a vector. When successful, a vector index is 
+returned; otherwise, a ``-1`` is returned. The following search algorithms are provided:
+
+1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is 
+found, or the end of the vector is reached. So it takes ``O(n)`` time, where ``n`` is the number of elements
+in the vector.  This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#linearSearch``.
+
+2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. 
+However, it is only applicable to sorted vectors. To get a sorted vector,
+one can use one of our sorting algorithms, which will be discussed in the next section. This algorithm
+is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#binarySearch``.
+
+3. **Parallel search**: when the vector is large, it takes a long time to traverse the elements to search
+for a value. To make this process faster, one can split the vector into multiple partitions, and perform the 
+search for each partition in parallel. This is supported by ``org.apache.arrow.algorithm.search.ParallelSearcher``.
+
+4. **Range search**: for many scenarios, there can be multiple matching values in the vector. 
+If the vector is sorted, the matching values reside in a contiguous region in the vector. The
+range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. 
+An implementation is provided in ``org.apache.arrow.algorithm.search.VectorRangeSearcher``.
+
+Vector Sorting
+===================
+
+Given a vector, a sorting algorithm turns it into a sorted one. The sorting criteria must
+be specified by some ordering comparison operation. The sorting algorithms can be
+classified into the following categories:
+
+1. **In-place sorter**: an in-place sorter performs the sorting by manipulating the original
+vector, without creating any new vector. So it just returns the original vector after the sorting operations.
+Currently, we have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter`` for in-place
+sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. 
+
+2. **Out-of-place sorter**: an out-of-place sorter does not mutate the original vector. Instead,
+it copies vector elements to a new vector in sorted order, and returns the new vector.
+We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` 
+and ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.VariableWidthOutOfPlaceVectorSorter``
+for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. 
+
+3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer
+vector, which correspond to indices of vector elements in sorted order. With the index vector, one can
+easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th
+smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, 
+which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. 
+
+Other Algorithms
+===================
+
+Other algorithms include vector deduplication, dictionary encoding, etc., in the ``algorithm`` module.
diff --git a/docs/source/java/index.rst b/docs/source/java/index.rst
index 5bb08571b43..64dd44f080b 100644
--- a/docs/source/java/index.rst
+++ b/docs/source/java/index.rst
@@ -27,4 +27,5 @@ on the Arrow format and other language bindings see the :doc:`parent documentati
    vector
    vector_schema_root
    ipc
+   algorithm
    Reference (javadoc) <https://arrow.apache.org/docs/java/reference/>

From 39681465cce8af2a9df667f42ba06348fd962c69 Mon Sep 17 00:00:00 2001
From: Projjal Chanda <iam@pchanda.com>
Date: Mon, 5 Jul 2021 08:59:16 +0000
Subject: [PATCH 500/719] ARROW-13032: [Java] Update guava version

Vulnerabilities in current version: [CVE-2018-10237](https://github.com/advisories/GHSA-mvr2-9pj6-7w5j) [CVE-2020-8908](https://github.com/advisories/GHSA-5mg8-w23w-74h3)
See https://github.com/apache/arrow/issues/10393#issue-900219398

Closes #10501 from projjal/updateguava

Authored-by: Projjal Chanda <iam@pchanda.com>
Signed-off-by: ishizaki <ishizaki@jp.ibm.com>
---
 .../src/test/java/org/apache/arrow/flight/perf/TestPerf.java    | 2 +-
 java/gandiva/pom.xml                                            | 2 --
 java/pom.xml                                                    | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
index 5652e987d51..9e2d7cc544f 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
@@ -103,7 +103,7 @@ public void throughput() throws Exception {
             res.add(f.get());
           }
           return res;
-        }).get();
+        }, pool).get();
 
         double seconds = r.nanos * 1.0d / 1000 / 1000 / 1000;
         throughPuts[i] = (r.bytes * 1.0d / 1024 / 1024) / seconds;
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 227172a59ca..66869ccc18a 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -26,7 +26,6 @@
         <maven.compiler.source>1.8</maven.compiler.source>
         <maven.compiler.target>1.8</maven.compiler.target>
         <protobuf.version>2.5.0</protobuf.version>
-        <dep.guava.version>18.0</dep.guava.version>
         <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
         <arrow.cpp.build.dir>../../../cpp/release-build</arrow.cpp.build.dir>
     </properties>
@@ -56,7 +55,6 @@
         <dependency>
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
-            <version>23.0</version>
         </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/pom.xml b/java/pom.xml
index 9fa10bb5745..c9fc2c331e6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -32,7 +32,7 @@
     <dep.junit.platform.version>1.4.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.4.0</dep.junit.jupiter.version>
     <dep.slf4j.version>1.7.25</dep.slf4j.version>
-    <dep.guava.version>20.0</dep.guava.version>
+    <dep.guava.version>30.1.1-jre</dep.guava.version>
     <dep.netty.version>4.1.48.Final</dep.netty.version>
     <dep.jackson.version>2.11.4</dep.jackson.version>
     <dep.hadoop.version>2.7.1</dep.hadoop.version>

From 32679ddf0495a50b2158146709e7ecfd27a467d9 Mon Sep 17 00:00:00 2001
From: Alenka Frim <frim.alenka@gmail.com>
Date: Mon, 5 Jul 2021 13:02:27 +0200
Subject: [PATCH 501/719] ARROW-13137: [C++][Documentation] Make in-table
 references consistent

Updated `compute.rst` adding Notes column where needed and unifying raw syntax for in-table references.

Closes #10630 from AlenkaF/ARROW-13137

Authored-by: Alenka Frim <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/cpp/compute.rst | 342 ++++++++++++++++++------------------
 1 file changed, 171 insertions(+), 171 deletions(-)

diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 33c1b474452..2d41c579747 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -183,33 +183,33 @@ recommend you try it out.  Unsupported input types return a ``TypeError``
 Aggregations
 ------------
 
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| Function name            | Arity      | Input types        | Output type           | Options class                              |
-+==========================+============+====================+=======================+============================================+
-| all                      | Unary      | Boolean            | Scalar Boolean        |                                            |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| any                      | Unary      | Boolean            | Scalar Boolean        |                                            |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| count                    | Unary      | Any                | Scalar Int64          | :struct:`ScalarAggregateOptions`           |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| index                    | Unary      | Any                | Scalar Int64          | :struct:`IndexOptions`                     |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| mean                     | Unary      | Numeric            | Scalar Float64        | :struct:`ScalarAggregateOptions`           |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| min_max                  | Unary      | Numeric            | Scalar Struct  (1)    | :struct:`ScalarAggregateOptions`           |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| mode                     | Unary      | Numeric            | Struct  (2)           | :struct:`ModeOptions`                      |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| quantile                 | Unary      | Numeric            | Scalar Numeric (3)    | :struct:`QuantileOptions`                  |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| stddev                   | Unary      | Numeric            | Scalar Float64        | :struct:`VarianceOptions`                  |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| sum                      | Unary      | Numeric            | Scalar Numeric (4)    | :struct:`ScalarAggregateOptions`           |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| tdigest                  | Unary      | Numeric            | Scalar Float64        | :struct:`TDigestOptions`                   |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
-| variance                 | Unary      | Numeric            | Scalar Float64        | :struct:`VarianceOptions`                  |
-+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| Function name | Arity | Input types | Output type    | Options class                    | Notes |
++===============+=======+=============+================+==================================+=======+
+| all           | Unary | Boolean     | Scalar Boolean |                                  |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| any           | Unary | Boolean     | Scalar Boolean |                                  |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| count         | Unary | Any         | Scalar Int64   | :struct:`ScalarAggregateOptions` |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| index         | Unary | Any         | Scalar Int64   | :struct:`IndexOptions`           |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| mean          | Unary | Numeric     | Scalar Float64 | :struct:`ScalarAggregateOptions` |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| min_max       | Unary | Numeric     | Scalar Struct  | :struct:`ScalarAggregateOptions` | \(1)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| mode          | Unary | Numeric     | Struct         | :struct:`ModeOptions`            | \(2)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| quantile      | Unary | Numeric     | Scalar Numeric | :struct:`QuantileOptions`        | \(3)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| stddev        | Unary | Numeric     | Scalar Float64 | :struct:`VarianceOptions`        |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| sum           | Unary | Numeric     | Scalar Numeric | :struct:`ScalarAggregateOptions` | \(4)  |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| tdigest       | Unary | Numeric     | Scalar Float64 | :struct:`TDigestOptions`         |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
+| variance      | Unary | Numeric     | Scalar Float64 | :struct:`VarianceOptions`        |       |
++---------------+-------+-------------+----------------+----------------------------------+-------+
 
 Notes:
 
@@ -259,37 +259,37 @@ then typically wraps around).  Each function is also available in an
 overflow-checking variant, suffixed ``_checked``, which returns
 an ``Invalid`` :class:`Status` when overflow is detected.
 
-+--------------------------+------------+--------------------+---------------------+
-| Function name            | Arity      | Input types        | Output type         |
-+==========================+============+====================+=====================+
-| abs                      | Unary      | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
-| abs_checked              | Unary      | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
-| add                      | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| add_checked              | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| divide                   | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| divide_checked           | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| multiply                 | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| multiply_checked         | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| negate                   | Unary      | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
-| negate_checked           | Unary      | Signed Numeric     | Signed Numeric      |
-+--------------------------+------------+--------------------+---------------------+
-| power                    | Binary     | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
-| power_checked            | Binary     | Numeric            | Numeric             |
-+--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric (1)         |
-+--------------------------+------------+--------------------+---------------------+
++------------------+--------+----------------+----------------+-------+
+| Function name    | Arity  | Input types    | Output type    | Notes |
++==================+========+================+================+=======+
+| abs              | Unary  | Numeric        | Numeric        |       |
++------------------+--------+----------------+----------------+-------+
+| abs_checked      | Unary  | Numeric        | Numeric        |       |
++------------------+--------+----------------+----------------+-------+
+| add              | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| add_checked      | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| divide           | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| divide_checked   | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| multiply         | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| multiply_checked | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| negate           | Unary  | Numeric        | Numeric        |       |
++------------------+--------+----------------+----------------+-------+
+| negate_checked   | Unary  | Signed Numeric | Signed Numeric |       |
++------------------+--------+----------------+----------------+-------+
+| power            | Binary | Numeric        | Numeric        |       |
++------------------+--------+----------------+----------------+-------+
+| power_checked    | Binary | Numeric        | Numeric        |       |
++------------------+--------+----------------+----------------+-------+
+| subtract         | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
+| subtract_checked | Binary | Numeric        | Numeric        | \(1)  |
++------------------+--------+----------------+----------------+-------+
 
 * \(1) Precision and scale of computed DECIMAL results
 
@@ -460,41 +460,41 @@ The first set of functions operates on a character-per-character basis,
 and emit true in the output if the input contains only characters of a
 given class:
 
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| Function name            | Arity      | Input types        | Output type    | Matched character class          |
-+==========================+============+====================+================+==================================+
-| ascii_is_alnum           | Unary      | String-like        | Boolean        | Alphanumeric ASCII               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_alpha           | Unary      | String-like        | Boolean        | Alphabetic ASCII                 |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_decimal         | Unary      | String-like        | Boolean        | Decimal ASCII \(1)               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_lower           | Unary      | String-like        | Boolean        | Lowercase ASCII \(2)             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_printable       | Unary      | String-like        | Boolean        | Printable ASCII                  |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_space           | Unary      | String-like        | Boolean        | Whitespace ASCII                 |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| ascii_is_upper           | Unary      | String-like        | Boolean        | Uppercase ASCII \(2)             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_alnum            | Unary      | String-like        | Boolean        | Alphanumeric Unicode             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_alpha            | Unary      | String-like        | Boolean        | Alphabetic Unicode               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_decimal          | Unary      | String-like        | Boolean        | Decimal Unicode                  |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_digit            | Unary      | String-like        | Boolean        | Unicode digit \(3)               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_lower            | Unary      | String-like        | Boolean        | Lowercase Unicode \(2)           |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_numeric          | Unary      | String-like        | Boolean        | Numeric Unicode \(4)             |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_printable        | Unary      | String-like        | Boolean        | Printable Unicode                |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_space            | Unary      | String-like        | Boolean        | Whitespace Unicode               |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
-| utf8_is_upper            | Unary      | String-like        | Boolean        | Uppercase Unicode \(2)           |
-+--------------------------+------------+--------------------+----------------+----------------------------------+
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| Function name      | Arity | Input types | Output type | Matched character class | Notes |
++====================+=======+=============+=============+=========================+=======+
+| ascii_is_alnum     | Unary | String-like | Boolean     | Alphanumeric ASCII      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_alpha     | Unary | String-like | Boolean     | Alphabetic ASCII        |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_decimal   | Unary | String-like | Boolean     | Decimal ASCII           | \(1)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_lower     | Unary | String-like | Boolean     | Lowercase ASCII         | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_printable | Unary | String-like | Boolean     | Printable ASCII         |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_space     | Unary | String-like | Boolean     | Whitespace ASCII        |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_upper     | Unary | String-like | Boolean     | Uppercase ASCII         | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_alnum      | Unary | String-like | Boolean     | Alphanumeric Unicode    |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_alpha      | Unary | String-like | Boolean     | Alphabetic Unicode      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_decimal    | Unary | String-like | Boolean     | Decimal Unicode         |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_digit      | Unary | String-like | Boolean     | Unicode digit           | \(3)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_lower      | Unary | String-like | Boolean     | Lowercase Unicode       | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_numeric    | Unary | String-like | Boolean     | Numeric Unicode         | \(4)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_printable  | Unary | String-like | Boolean     | Printable Unicode       |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_space      | Unary | String-like | Boolean     | Whitespace Unicode      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_upper      | Unary | String-like | Boolean     | Uppercase Unicode       | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
 
 * \(1) Also matches all numeric ASCII characters and all ASCII digits.
 
@@ -533,33 +533,33 @@ The third set of functions examines string elements on a byte-per-byte basis:
 String transforms
 ~~~~~~~~~~~~~~~~~
 
-+--------------------------+------------+-------------------------+------------------------+-------------------------------------------------+
-| Function name            | Arity      | Input types             | Output type            | Notes   | Options class                         |
-+==========================+============+=========================+========================+=========+=======================================+
-| ascii_lower              | Unary      | String-like             | String-like            | \(1)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| ascii_reverse            | Unary      | String-like             | String-like            | \(2)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| ascii_upper              | Unary      | String-like             | String-like            | \(1)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| binary_length            | Unary      | Binary- or String-like  | Int32 or Int64         | \(3)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| binary_replace_slice     | Unary      | String-like             | Binary- or String-like | \(4)    | :struct:`ReplaceSliceOptions`         |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| replace_substring        | Unary      | String-like             | String-like            | \(5)    | :struct:`ReplaceSubstringOptions`     |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| replace_substring_regex  | Unary      | String-like             | String-like            | \(6)    | :struct:`ReplaceSubstringOptions`     |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| utf8_length              | Unary      | String-like             | Int32 or Int64         | \(7)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| utf8_lower               | Unary      | String-like             | String-like            | \(8)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| utf8_replace_slice       | Unary      | String-like             | String-like            | \(4)    | :struct:`ReplaceSliceOptions`         |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| utf8_reverse             | Unary      | String-like             | String-like            | \(9)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
-| utf8_upper               | Unary      | String-like             | String-like            | \(8)    |                                       |
-+--------------------------+------------+-------------------------+------------------------+---------+---------------------------------------+
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| Function name           | Arity | Input types            | Output type            | Options class                     | Notes |
++=========================+=======+========================+========================+===================================+=======+
+| ascii_lower             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_reverse           | Unary | String-like            | String-like            |                                   | \(2)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_upper             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| binary_length           | Unary | Binary- or String-like | Int32 or Int64         |                                   | \(3)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| binary_replace_slice    | Unary | String-like            | Binary- or String-like | :struct:`ReplaceSliceOptions`     | \(4)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| replace_substring       | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(5)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| replace_substring_regex | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(6)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_length             | Unary | String-like            | Int32 or Int64         |                                   | \(7)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_lower              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_replace_slice      | Unary | String-like            | String-like            | :struct:`ReplaceSliceOptions`     | \(4)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_reverse            | Unary | String-like            | String-like            |                                   | \(9)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_upper              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 
 
 * \(1) Each ASCII character in the input is converted to lowercase or
@@ -670,33 +670,33 @@ These functions trim off characters on both sides (trim), or the left (ltrim) or
 Containment tests
 ~~~~~~~~~~~~~~~~~
 
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| Function name             | Arity      | Input types                        | Output type        | Options class                          |
-+===========================+============+====================================+====================+========================================+
-| count_substring           | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| count_substring_regex     | Unary      | String-like                        | Int32 or Int64 (1) | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| ends_with                 | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| find_substring            | Unary      | Binary- and String-like            | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| find_substring_regex      | Unary      | Binary- and String-like            | Int32 or Int64 (3) | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| index_in                  | Unary      | Boolean, Null, Numeric, Temporal,  | Int32 (4)          | :struct:`SetLookupOptions`             |
-|                           |            | Binary- and String-like            |                    |                                        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| is_in                     | Unary      | Boolean, Null, Numeric, Temporal,  | Boolean (5)        | :struct:`SetLookupOptions`             |
-|                           |            | Binary- and String-like            |                    |                                        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_like                | Unary      | String-like                        | Boolean (6)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_substring           | Unary      | String-like                        | Boolean (7)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| match_substring_regex     | Unary      | String-like                        | Boolean (8)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
-| starts_with               | Unary      | String-like                        | Boolean (2)        | :struct:`MatchSubstringOptions`        |
-+---------------------------+------------+------------------------------------+--------------------+----------------------------------------+
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| Function name         | Arity | Input types                       | Output type    | Options class                   | Notes |
++=======================+=======+===================================+================+=================================+=======+
+| count_substring       | Unary | String-like                       | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(1)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| count_substring_regex | Unary | String-like                       | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(1)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| ends_with             | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(2)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| find_substring        | Unary | Binary- and String-like           | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(3)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| find_substring_regex  | Unary | Binary- and String-like           | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(3)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| index_in              | Unary | Boolean, Null, Numeric, Temporal, | Int32          | :struct:`SetLookupOptions`      | \(4)  |
+|                       |       | Binary- and String-like           |                |                                 |       |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| is_in                 | Unary | Boolean, Null, Numeric, Temporal, | Boolean        | :struct:`SetLookupOptions`      | \(5)  |
+|                       |       | Binary- and String-like           |                |                                 |       |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_like            | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(6)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_substring       | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(7)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_substring_regex | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(8)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| starts_with           | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(2)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
 
 
 * \(1) Output is the number of occurrences of
@@ -770,11 +770,11 @@ when a positive ``max_splits`` is given.
 String component extraction
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-+--------------------+------------+----------------+---------------+----------------------------------------+
-| Function name      | Arity      | Input types    | Output type   | Options class                          |
-+====================+============+================+===============+========================================+
-| extract_regex      | Unary      | String-like    | Struct (1)    | :struct:`ExtractRegexOptions`          |
-+--------------------+------------+----------------+---------------+----------------------------------------+
++---------------+-------+-------------+-------------+-------------------------------+-------+
+| Function name | Arity | Input types | Output type | Options class                 | Notes |
++===============+=======+=============+=============+===============================+=======+
+| extract_regex | Unary | String-like | Struct      | :struct:`ExtractRegexOptions` | \(1)  |
++---------------+-------+-------------+-------------+-------------------------------+-------+
 
 * \(1) Extract substrings defined by a regular expression using the Google RE2
   library.  The output struct field names refer to the named capture groups,
@@ -1040,18 +1040,18 @@ Array-wise ("vector") functions
 Associative transforms
 ~~~~~~~~~~~~~~~~~~~~~~
 
-+--------------------------+------------+------------------------------------+----------------------------+
-| Function name            | Arity      | Input types                        | Output type                |
-+==========================+============+====================================+============================+
-| dictionary_encode        | Unary      | Boolean, Null, Numeric,            | Dictionary (1)             |
-|                          |            | Temporal, Binary- and String-like  |                            |
-+--------------------------+------------+------------------------------------+----------------------------+
-| unique                   | Unary      | Boolean, Null, Numeric,            | Input type (2)             |
-|                          |            | Temporal, Binary- and String-like  |                            |
-+--------------------------+------------+------------------------------------+----------------------------+
-| value_counts             | Unary      | Boolean, Null, Numeric,            | Input type (3)             |
-|                          |            | Temporal, Binary- and String-like  |                            |
-+--------------------------+------------+------------------------------------+----------------------------+
++-------------------+-------+-----------------------------------+-------------+-------+
+| Function name     | Arity | Input types                       | Output type | Notes |
++===================+=======+===================================+=============+=======+
+| dictionary_encode | Unary | Boolean, Null, Numeric,           | Dictionary  | \(1)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+| unique            | Unary | Boolean, Null, Numeric,           | Input type  | \(2)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+| value_counts      | Unary | Boolean, Null, Numeric,           | Input type  | \(3)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
 
 * \(1) Output is ``Dictionary(Int32, input type)``.
 
@@ -1067,13 +1067,13 @@ Selections
 
 These functions select a subset of the first input defined by the second input.
 
-+-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
-| Function name   | Arity      | Input type 1  | Input type 2 | Output type      | Options class           | Notes       |
-+=================+============+===============+==============+==================+=========================+=============+
-| filter          | Binary     | Any (1)       | Boolean      | Input type 1     | :struct:`FilterOptions` | \(2)        |
-+-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
-| take            | Binary     | Any (1)       | Integer      | Input type 1     | :struct:`TakeOptions`   | \(3)        |
-+-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| Function name | Arity  | Input type 1 | Input type 2 | Output type  | Options class           | Notes     |
++===============+========+==============+==============+==============+=========================+===========+
+| filter        | Binary | Any          | Boolean      | Input type 1 | :struct:`FilterOptions` | \(1) \(2) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| take          | Binary | Any          | Integer      | Input type 1 | :struct:`TakeOptions`   | \(1) \(3) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
 
 * \(1) Unions are unsupported.
 

From 389587c566e0d0d59b635a76fcc8dbb89358d6ec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 5 Jul 2021 08:35:40 -0400
Subject: [PATCH 502/719] ARROW-11980: [Python] Remove experimental status from
 Table.replace_schema_metadata

Closes #10653 from jorisvandenbossche/ARROW-11980

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/table.h    | 2 +-
 python/pyarrow/table.pxi | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 901c839843a..f1e5f23eed8 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -151,7 +151,7 @@ class ARROW_EXPORT Table {
   /// \brief Return new table with specified columns
   Result<std::shared_ptr<Table>> SelectColumns(const std::vector<int>& indices) const;
 
-  /// \brief Replace schema key-value metadata with new metadata (EXPERIMENTAL)
+  /// \brief Replace schema key-value metadata with new metadata
   /// \since 0.5.0
   ///
   /// \param[in] metadata new KeyValueMetadata
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f9dcb2aa60b..65f1ba11dc9 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -664,7 +664,7 @@ cdef class RecordBatch(_PandasConvertible):
 
     def replace_schema_metadata(self, metadata=None):
         """
-        EXPERIMENTAL: Create shallow copy of record batch by replacing schema
+        Create shallow copy of record batch by replacing schema
         key-value metadata with the indicated new metadata (which may be None,
         which deletes any existing metadata
 
@@ -1331,7 +1331,7 @@ cdef class Table(_PandasConvertible):
 
     def replace_schema_metadata(self, metadata=None):
         """
-        EXPERIMENTAL: Create shallow copy of table by replacing schema
+        Create shallow copy of table by replacing schema
         key-value metadata with the indicated new metadata (which may be None),
         which deletes any existing metadata.
 

From 9891d9b1eacfee0f356531ba381a916380fde9f1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 5 Jul 2021 15:14:54 +0200
Subject: [PATCH 503/719] ARROW-12988: [CI][Python] Revert skip of failing test
 in kartothek nightly integration build

Revert of #10466

Closes #10655 from jorisvandenbossche/ARROW-12988-revert

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/scripts/integration_kartothek.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/scripts/integration_kartothek.sh b/ci/scripts/integration_kartothek.sh
index 9e0a6996a27..379569b9c99 100755
--- a/ci/scripts/integration_kartothek.sh
+++ b/ci/scripts/integration_kartothek.sh
@@ -28,4 +28,4 @@ python -c "import kartothek"
 
 pushd /kartothek
 # See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message
-pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing and not test_update_dataset_from_ddf_empty"
+pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing"

From 0ebed2b9c9b739aa134507d3a26ad2015e535ff9 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 5 Jul 2021 16:17:59 +0200
Subject: [PATCH 504/719] ARROW-12512: [C++][Python][Dataset] Create CSV writer
 class and add Datasets support

This refactors the CSV write support to expose an explicit CSV writer class, and adds Python bindings and Datasets support.

Closes #10230 from lidavidm/arrow-12512

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/options.h                  |   4 +
 cpp/src/arrow/csv/writer.cc                  | 121 +++++++++++--------
 cpp/src/arrow/csv/writer.h                   |  30 ++++-
 cpp/src/arrow/csv/writer_test.cc             |  27 ++++-
 cpp/src/arrow/dataset/file_csv.cc            |  44 +++++++
 cpp/src/arrow/dataset/file_csv.h             |  37 +++++-
 cpp/src/arrow/dataset/file_csv_test.cc       |  42 ++++++-
 cpp/src/arrow/dataset/type_fwd.h             |   2 +
 python/pyarrow/_csv.pxd                      |   8 ++
 python/pyarrow/_csv.pyx                      |  68 ++++++++---
 python/pyarrow/_dataset.pyx                  |  47 +++++--
 python/pyarrow/csv.py                        |   2 +-
 python/pyarrow/includes/libarrow.pxd         |  12 +-
 python/pyarrow/includes/libarrow_dataset.pxd |   5 +
 python/pyarrow/tests/test_csv.py             |  21 +++-
 python/pyarrow/tests/test_dataset.py         |  26 ++++
 r/R/arrowExports.R                           |   4 +
 r/R/dataset-format.R                         |   7 +-
 r/R/dataset-write.R                          |   2 +-
 r/man/write_dataset.Rd                       |   2 +-
 r/src/arrowExports.cpp                       |  18 +++
 r/src/csv.cpp                                |   7 +-
 r/src/dataset.cpp                            |   7 ++
 r/tests/testthat/test-dataset.R              |  22 ++++
 24 files changed, 465 insertions(+), 100 deletions(-)

diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 1e423fd76db..5face6f32d8 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "arrow/csv/type_fwd.h"
+#include "arrow/io/interfaces.h"
 #include "arrow/status.h"
 #include "arrow/util/visibility.h"
 
@@ -174,6 +175,9 @@ struct ARROW_EXPORT WriteOptions {
   /// This number can impact performance.
   int32_t batch_size = 1024;
 
+  /// \brief IO context for writing.
+  io::IOContext io_context;
+
   /// Create write options with default values
   static WriteOptions Defaults();
 
diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc
index e1c34a77ae9..1b782cae7dc 100644
--- a/cpp/src/arrow/csv/writer.cc
+++ b/cpp/src/arrow/csv/writer.cc
@@ -19,6 +19,7 @@
 #include "arrow/array.h"
 #include "arrow/compute/cast.h"
 #include "arrow/io/interfaces.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/result_internal.h"
@@ -282,65 +283,76 @@ Result<std::unique_ptr<ColumnPopulator>> MakePopulator(const Field& field, char
   return std::unique_ptr<ColumnPopulator>(factory.populator);
 }
 
-class CSVConverter {
+class CSVWriterImpl : public ipc::RecordBatchWriter {
  public:
-  static Result<std::unique_ptr<CSVConverter>> Make(std::shared_ptr<Schema> schema,
-                                                    MemoryPool* pool) {
+  static Result<std::shared_ptr<CSVWriterImpl>> Make(
+      io::OutputStream* sink, std::shared_ptr<io::OutputStream> owned_sink,
+      std::shared_ptr<Schema> schema, const WriteOptions& options) {
+    RETURN_NOT_OK(options.Validate());
     std::vector<std::unique_ptr<ColumnPopulator>> populators(schema->num_fields());
     for (int col = 0; col < schema->num_fields(); col++) {
       char end_char = col < schema->num_fields() - 1 ? ',' : '\n';
-      ASSIGN_OR_RAISE(populators[col],
-                      MakePopulator(*schema->field(col), end_char, pool));
+      ASSIGN_OR_RAISE(populators[col], MakePopulator(*schema->field(col), end_char,
+                                                     options.io_context.pool()));
     }
-    return std::unique_ptr<CSVConverter>(
-        new CSVConverter(std::move(schema), std::move(populators), pool));
+    auto writer = std::make_shared<CSVWriterImpl>(
+        sink, std::move(owned_sink), std::move(schema), std::move(populators), options);
+    RETURN_NOT_OK(writer->PrepareForContentsWrite());
+    if (options.include_header) {
+      RETURN_NOT_OK(writer->WriteHeader());
+    }
+    return writer;
   }
 
-  Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
-                  io::OutputStream* out) {
-    RETURN_NOT_OK(PrepareForContentsWrite(options, out));
-    RecordBatchIterator iterator = RecordBatchSliceIterator(batch, options.batch_size);
+  Status WriteRecordBatch(const RecordBatch& batch) override {
+    RecordBatchIterator iterator = RecordBatchSliceIterator(batch, options_.batch_size);
     for (auto maybe_slice : iterator) {
       ASSIGN_OR_RAISE(std::shared_ptr<RecordBatch> slice, maybe_slice);
       RETURN_NOT_OK(TranslateMinimalBatch(*slice));
-      RETURN_NOT_OK(out->Write(data_buffer_));
+      RETURN_NOT_OK(sink_->Write(data_buffer_));
+      stats_.num_record_batches++;
     }
     return Status::OK();
   }
 
-  Status WriteCSV(const Table& table, const WriteOptions& options,
-                  io::OutputStream* out) {
+  Status WriteTable(const Table& table, int64_t max_chunksize) override {
     TableBatchReader reader(table);
-    reader.set_chunksize(options.batch_size);
-    RETURN_NOT_OK(PrepareForContentsWrite(options, out));
+    reader.set_chunksize(max_chunksize > 0 ? max_chunksize : options_.batch_size);
     std::shared_ptr<RecordBatch> batch;
     RETURN_NOT_OK(reader.ReadNext(&batch));
     while (batch != nullptr) {
       RETURN_NOT_OK(TranslateMinimalBatch(*batch));
-      RETURN_NOT_OK(out->Write(data_buffer_));
+      RETURN_NOT_OK(sink_->Write(data_buffer_));
       RETURN_NOT_OK(reader.ReadNext(&batch));
+      stats_.num_record_batches++;
     }
 
     return Status::OK();
   }
 
- private:
-  CSVConverter(std::shared_ptr<Schema> schema,
-               std::vector<std::unique_ptr<ColumnPopulator>> populators, MemoryPool* pool)
-      : column_populators_(std::move(populators)),
-        offsets_(0, 0, ::arrow::stl::allocator<char*>(pool)),
+  Status Close() override { return Status::OK(); }
+
+  ipc::WriteStats stats() const override { return stats_; }
+
+  CSVWriterImpl(io::OutputStream* sink, std::shared_ptr<io::OutputStream> owned_sink,
+                std::shared_ptr<Schema> schema,
+                std::vector<std::unique_ptr<ColumnPopulator>> populators,
+                const WriteOptions& options)
+      : sink_(sink),
+        owned_sink_(std::move(owned_sink)),
+        column_populators_(std::move(populators)),
+        offsets_(0, 0, ::arrow::stl::allocator<char*>(options.io_context.pool())),
         schema_(std::move(schema)),
-        pool_(pool) {}
+        options_(options) {}
 
-  Status PrepareForContentsWrite(const WriteOptions& options, io::OutputStream* out) {
+ private:
+  Status PrepareForContentsWrite() {
+    // Only called once, as part of initialization
     if (data_buffer_ == nullptr) {
-      ASSIGN_OR_RAISE(
-          data_buffer_,
-          AllocateResizableBuffer(
-              options.batch_size * schema_->num_fields() * kColumnSizeGuess, pool_));
-    }
-    if (options.include_header) {
-      RETURN_NOT_OK(WriteHeader(out));
+      ASSIGN_OR_RAISE(data_buffer_,
+                      AllocateResizableBuffer(
+                          options_.batch_size * schema_->num_fields() * kColumnSizeGuess,
+                          options_.io_context.pool()));
     }
     return Status::OK();
   }
@@ -355,7 +367,8 @@ class CSVConverter {
     return header_length + (kQuoteDelimiterCount * schema_->num_fields());
   }
 
-  Status WriteHeader(io::OutputStream* out) {
+  Status WriteHeader() {
+    // Only called once, as part of initialization
     RETURN_NOT_OK(data_buffer_->Resize(CalculateHeaderSize(), /*shrink_to_fit=*/false));
     char* next =
         reinterpret_cast<char*>(data_buffer_->mutable_data() + data_buffer_->size() - 1);
@@ -367,7 +380,7 @@ class CSVConverter {
     }
     *(data_buffer_->mutable_data() + data_buffer_->size() - 1) = '\n';
     DCHECK_EQ(reinterpret_cast<uint8_t*>(next + 1), data_buffer_->data());
-    return out->Write(data_buffer_);
+    return sink_->Write(data_buffer_);
   }
 
   Status TranslateMinimalBatch(const RecordBatch& batch) {
@@ -403,36 +416,44 @@ class CSVConverter {
   }
 
   static constexpr int64_t kColumnSizeGuess = 8;
+  io::OutputStream* sink_;
+  std::shared_ptr<io::OutputStream> owned_sink_;
   std::vector<std::unique_ptr<ColumnPopulator>> column_populators_;
   std::vector<int32_t, arrow::stl::allocator<int32_t>> offsets_;
   std::shared_ptr<ResizableBuffer> data_buffer_;
   const std::shared_ptr<Schema> schema_;
-  MemoryPool* pool_;
+  const WriteOptions options_;
+  ipc::WriteStats stats_;
 };
 
 }  // namespace
 
-Status WriteCSV(const Table& table, const WriteOptions& options, MemoryPool* pool,
+Status WriteCSV(const Table& table, const WriteOptions& options,
                 arrow::io::OutputStream* output) {
-  RETURN_NOT_OK(options.Validate());
-  if (pool == nullptr) {
-    pool = default_memory_pool();
-  }
-  ASSIGN_OR_RAISE(std::unique_ptr<CSVConverter> converter,
-                  CSVConverter::Make(table.schema(), pool));
-  return converter->WriteCSV(table, options, output);
+  ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, table.schema(), options));
+  RETURN_NOT_OK(writer->WriteTable(table));
+  return writer->Close();
 }
 
-Status WriteCSV(const RecordBatch& batch, const WriteOptions& options, MemoryPool* pool,
+Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
                 arrow::io::OutputStream* output) {
-  RETURN_NOT_OK(options.Validate());
-  if (pool == nullptr) {
-    pool = default_memory_pool();
-  }
+  ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, batch.schema(), options));
+  RETURN_NOT_OK(writer->WriteRecordBatch(batch));
+  return writer->Close();
+}
+
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options) {
+  return CSVWriterImpl::Make(sink.get(), sink, schema, options);
+}
 
-  ASSIGN_OR_RAISE(std::unique_ptr<CSVConverter> converter,
-                  CSVConverter::Make(batch.schema(), pool));
-  return converter->WriteCSV(batch, options, output);
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options) {
+  return CSVWriterImpl::Make(sink, nullptr, schema, options);
 }
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/writer.h b/cpp/src/arrow/csv/writer.h
index c009d7849f4..2f1442ae0af 100644
--- a/cpp/src/arrow/csv/writer.h
+++ b/cpp/src/arrow/csv/writer.h
@@ -17,8 +17,11 @@
 
 #pragma once
 
+#include <memory>
+
 #include "arrow/csv/options.h"
 #include "arrow/io/interfaces.h"
+#include "arrow/ipc/type_fwd.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
 
@@ -37,11 +40,34 @@ namespace csv {
 /// \brief Converts table to a CSV and writes the results to output.
 /// Experimental
 ARROW_EXPORT Status WriteCSV(const Table& table, const WriteOptions& options,
-                             MemoryPool* pool, arrow::io::OutputStream* output);
+                             arrow::io::OutputStream* output);
 /// \brief Converts batch to CSV and writes the results to output.
 /// Experimental
 ARROW_EXPORT Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
-                             MemoryPool* pool, arrow::io::OutputStream* output);
+                             arrow::io::OutputStream* output);
+
+/// \brief Create a new CSV writer. User is responsible for closing the
+/// actual OutputStream.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options = WriteOptions::Defaults());
+
+/// \brief Create a new CSV writer.
+///
+/// \param[in] sink output stream to write to (does not take ownership)
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options = WriteOptions::Defaults());
 
 }  // namespace csv
 }  // namespace arrow
diff --git a/cpp/src/arrow/csv/writer_test.cc b/cpp/src/arrow/csv/writer_test.cc
index a49dbcd8268..0c7e3fdb0c5 100644
--- a/cpp/src/arrow/csv/writer_test.cc
+++ b/cpp/src/arrow/csv/writer_test.cc
@@ -23,6 +23,7 @@
 #include "arrow/buffer.h"
 #include "arrow/csv/writer.h"
 #include "arrow/io/memory.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/result_internal.h"
 #include "arrow/testing/gtest_util.h"
@@ -87,7 +88,27 @@ class TestWriteCSV : public ::testing::TestWithParam<WriterTestParams> {
     std::shared_ptr<io::BufferOutputStream> out;
     ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create());
 
-    RETURN_NOT_OK(WriteCSV(data, options, default_memory_pool(), out.get()));
+    RETURN_NOT_OK(WriteCSV(data, options, out.get()));
+    ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer, out->Finish());
+    return std::string(reinterpret_cast<const char*>(buffer->data()), buffer->size());
+  }
+
+  Result<std::string> ToCsvStringUsingWriter(const Table& data,
+                                             const WriteOptions& options) {
+    std::shared_ptr<io::BufferOutputStream> out;
+    ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create());
+    // Write row-by-row
+    ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(out, data.schema(), options));
+    TableBatchReader reader(data);
+    reader.set_chunksize(1);
+    std::shared_ptr<RecordBatch> batch;
+    RETURN_NOT_OK(reader.ReadNext(&batch));
+    while (batch != nullptr) {
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+      RETURN_NOT_OK(reader.ReadNext(&batch));
+    }
+    RETURN_NOT_OK(writer->Close());
+    EXPECT_EQ(data.num_rows(), writer->stats().num_record_batches);
     ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer, out->Finish());
     return std::string(reinterpret_cast<const char*>(buffer->data()), buffer->size());
   }
@@ -112,6 +133,10 @@ TEST_P(TestWriteCSV, TestWrite) {
                        Table::FromRecordBatches({record_batch}));
   ASSERT_OK_AND_ASSIGN(csv, ToCsvString(*table, options));
   EXPECT_EQ(csv, GetParam().expected_output);
+
+  // The writer should work identically.
+  ASSERT_OK_AND_ASSIGN(csv, ToCsvStringUsingWriter(*table, options));
+  EXPECT_EQ(csv, GetParam().expected_output);
 }
 
 INSTANTIATE_TEST_SUITE_P(MultiColumnWriteCSVTest, TestWriteCSV,
diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
index 3f42ab44a39..1ef1d2907c2 100644
--- a/cpp/src/arrow/dataset/file_csv.cc
+++ b/cpp/src/arrow/dataset/file_csv.cc
@@ -26,12 +26,14 @@
 #include "arrow/csv/options.h"
 #include "arrow/csv/parser.h"
 #include "arrow/csv/reader.h"
+#include "arrow/csv/writer.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/io/buffered.h"
 #include "arrow/io/compressed.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
 #include "arrow/util/async_generator.h"
@@ -283,5 +285,47 @@ Future<util::optional<int64_t>> CsvFileFormat::CountRows(
       .Then([](int64_t count) { return util::make_optional<int64_t>(count); });
 }
 
+//
+// CsvFileWriter, CsvFileWriteOptions
+//
+
+std::shared_ptr<FileWriteOptions> CsvFileFormat::DefaultWriteOptions() {
+  std::shared_ptr<CsvFileWriteOptions> csv_options(
+      new CsvFileWriteOptions(shared_from_this()));
+  csv_options->write_options =
+      std::make_shared<csv::WriteOptions>(csv::WriteOptions::Defaults());
+  return csv_options;
+}
+
+Result<std::shared_ptr<FileWriter>> CsvFileFormat::MakeWriter(
+    std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+    std::shared_ptr<FileWriteOptions> options,
+    fs::FileLocator destination_locator) const {
+  if (!Equals(*options->format())) {
+    return Status::TypeError("Mismatching format/write options.");
+  }
+  auto csv_options = checked_pointer_cast<CsvFileWriteOptions>(options);
+  ARROW_ASSIGN_OR_RAISE(
+      auto writer, csv::MakeCSVWriter(destination, schema, *csv_options->write_options));
+  return std::shared_ptr<FileWriter>(
+      new CsvFileWriter(std::move(destination), std::move(writer), std::move(schema),
+                        std::move(csv_options), std::move(destination_locator)));
+}
+
+CsvFileWriter::CsvFileWriter(std::shared_ptr<io::OutputStream> destination,
+                             std::shared_ptr<ipc::RecordBatchWriter> writer,
+                             std::shared_ptr<Schema> schema,
+                             std::shared_ptr<CsvFileWriteOptions> options,
+                             fs::FileLocator destination_locator)
+    : FileWriter(std::move(schema), std::move(options), std::move(destination),
+                 std::move(destination_locator)),
+      batch_writer_(std::move(writer)) {}
+
+Status CsvFileWriter::Write(const std::shared_ptr<RecordBatch>& batch) {
+  return batch_writer_->WriteRecordBatch(*batch);
+}
+
+Status CsvFileWriter::FinishInternal() { return batch_writer_->Close(); }
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
index a365f7eac2b..8d7391727c6 100644
--- a/cpp/src/arrow/dataset/file_csv.h
+++ b/cpp/src/arrow/dataset/file_csv.h
@@ -25,6 +25,7 @@
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
+#include "arrow/ipc/type_fwd.h"
 #include "arrow/status.h"
 #include "arrow/util/compression.h"
 
@@ -68,11 +69,9 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
   Result<std::shared_ptr<FileWriter>> MakeWriter(
       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
       std::shared_ptr<FileWriteOptions> options,
-      fs::FileLocator destination_locator) const override {
-    return Status::NotImplemented("writing fragment of CsvFileFormat");
-  }
+      fs::FileLocator destination_locator) const override;
 
-  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
 };
 
 /// \brief Per-scan options for CSV fragments
@@ -88,6 +87,36 @@ struct ARROW_DS_EXPORT CsvFragmentScanOptions : public FragmentScanOptions {
   csv::ReadOptions read_options = csv::ReadOptions::Defaults();
 };
 
+class ARROW_DS_EXPORT CsvFileWriteOptions : public FileWriteOptions {
+ public:
+  /// Options passed to csv::MakeCSVWriter.
+  std::shared_ptr<csv::WriteOptions> write_options;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class CsvFileFormat;
+};
+
+class ARROW_DS_EXPORT CsvFileWriter : public FileWriter {
+ public:
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+ private:
+  CsvFileWriter(std::shared_ptr<io::OutputStream> destination,
+                std::shared_ptr<ipc::RecordBatchWriter> writer,
+                std::shared_ptr<Schema> schema,
+                std::shared_ptr<CsvFileWriteOptions> options,
+                fs::FileLocator destination_locator);
+
+  Status FinishInternal() override;
+
+  std::shared_ptr<io::OutputStream> destination_;
+  std::shared_ptr<ipc::RecordBatchWriter> batch_writer_;
+
+  friend class CsvFileFormat;
+};
+
 /// @}
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index acb66d4c75b..b7b1d342e61 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
+#include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 
@@ -45,7 +46,7 @@ class CsvFormatHelper {
     std::shared_ptr<Table> table;
     RETURN_NOT_OK(reader->ReadAll(&table));
     auto options = csv::WriteOptions::Defaults();
-    RETURN_NOT_OK(csv::WriteCSV(*table, options, default_memory_pool(), sink.get()));
+    RETURN_NOT_OK(csv::WriteCSV(*table, options, sink.get()));
     return sink->Finish();
   }
 
@@ -204,6 +205,30 @@ bar)");
   }
 }
 
+TEST_P(TestCsvFileFormat, CustomReadOptionsColumnNames) {
+  auto source = GetFileSource("1,1\n2,3");
+  SetSchema({field("ints_1", int64()), field("ints_2", int64())});
+  auto defaults = std::make_shared<CsvFragmentScanOptions>();
+  defaults->read_options.column_names = {"ints_1", "ints_2"};
+  format_->default_fragment_scan_options = defaults;
+  auto fragment = MakeFragment(*source);
+  ASSERT_OK_AND_ASSIGN(auto physical_schema, fragment->ReadPhysicalSchema());
+  AssertSchemaEqual(opts_->dataset_schema, physical_schema);
+  int64_t rows = 0;
+  for (auto maybe_batch : Batches(fragment.get())) {
+    ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
+    rows += batch->num_rows();
+  }
+  ASSERT_EQ(rows, 2);
+
+  defaults->read_options.column_names = {"same", "same"};
+  format_->default_fragment_scan_options = defaults;
+  fragment = MakeFragment(*source);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("CSV file contained multiple columns named same"),
+      Batches(fragment.get()).Next());
+}
+
 TEST_P(TestCsvFileFormat, ScanRecordBatchReaderWithVirtualColumn) {
   auto source = GetFileSource(R"(f64
 1.0
@@ -321,8 +346,19 @@ N/A,bar
   ASSERT_OK(batch_it.Visit([](TaggedRecordBatch) { return Status::OK(); }));
 }
 
-TEST_P(TestCsvFileFormat, WriteRecordBatchReader) {
-  GTEST_SKIP() << "Write support not implemented for CSV";
+TEST_P(TestCsvFileFormat, WriteRecordBatchReader) { TestWrite(); }
+
+TEST_P(TestCsvFileFormat, WriteRecordBatchReaderCustomOptions) {
+  auto options =
+      checked_pointer_cast<CsvFileWriteOptions>(format_->DefaultWriteOptions());
+  options->write_options->include_header = false;
+  auto data_schema = schema({field("f64", float64())});
+  ASSERT_OK_AND_ASSIGN(auto sink, GetFileSink());
+  ASSERT_OK_AND_ASSIGN(auto writer, format_->MakeWriter(sink, data_schema, options, {}));
+  ASSERT_OK(writer->Write(ConstantArrayGenerator::Zeroes(5, data_schema)));
+  ASSERT_OK(writer->Finish());
+  ASSERT_OK_AND_ASSIGN(auto written, sink->Finish());
+  ASSERT_EQ("0\n0\n0\n0\n0\n", written->ToString());
 }
 
 TEST_P(TestCsvFileFormat, CountRows) { TestCountRows(); }
diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h
index 019aaf4241b..ad1a2996af4 100644
--- a/cpp/src/arrow/dataset/type_fwd.h
+++ b/cpp/src/arrow/dataset/type_fwd.h
@@ -55,6 +55,8 @@ struct FileSystemDatasetWriteOptions;
 class InMemoryDataset;
 
 class CsvFileFormat;
+class CsvFileWriter;
+class CsvFileWriteOptions;
 struct CsvFragmentScanOptions;
 
 class IpcFileFormat;
diff --git a/python/pyarrow/_csv.pxd b/python/pyarrow/_csv.pxd
index 030c4610e5d..b2fe7d639ae 100644
--- a/python/pyarrow/_csv.pxd
+++ b/python/pyarrow/_csv.pxd
@@ -44,3 +44,11 @@ cdef class ReadOptions(_Weakrefable):
 
     @staticmethod
     cdef ReadOptions wrap(CCSVReadOptions options)
+
+
+cdef class WriteOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CCSVWriteOptions] options
+
+    @staticmethod
+    cdef WriteOptions wrap(CCSVWriteOptions options)
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 01cabc1d8b0..969fcbafee6 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -31,10 +31,11 @@ from pyarrow.lib cimport (check_status, Field, MemoryPool, Schema,
                           RecordBatchReader, ensure_type,
                           maybe_unbox_memory_pool, get_input_stream,
                           get_writer, native_transcoding_input_stream,
-                          pyarrow_unwrap_batch, pyarrow_unwrap_table,
-                          pyarrow_wrap_schema, pyarrow_wrap_table,
-                          pyarrow_wrap_data_type, pyarrow_unwrap_data_type,
-                          Table, RecordBatch, StopToken)
+                          pyarrow_unwrap_batch, pyarrow_unwrap_schema,
+                          pyarrow_unwrap_table, pyarrow_wrap_schema,
+                          pyarrow_wrap_table, pyarrow_wrap_data_type,
+                          pyarrow_unwrap_data_type, Table, RecordBatch,
+                          StopToken, _CRecordBatchWriter)
 from pyarrow.lib import frombytes, tobytes, SignalStopHandler
 from pyarrow.util import _stringify_path
 
@@ -937,14 +938,12 @@ cdef class WriteOptions(_Weakrefable):
         How many rows to process together when converting and writing
         CSV data
     """
-    cdef:
-        CCSVWriteOptions options
 
     # Avoid mistakingly creating attributes
     __slots__ = ()
 
     def __init__(self, *, include_header=None, batch_size=None):
-        self.options = CCSVWriteOptions.Defaults()
+        self.options.reset(new CCSVWriteOptions(CCSVWriteOptions.Defaults()))
         if include_header is not None:
             self.include_header = include_header
         if batch_size is not None:
@@ -955,11 +954,11 @@ cdef class WriteOptions(_Weakrefable):
         """
         Whether to write an initial header line with column names.
         """
-        return self.options.include_header
+        return deref(self.options).include_header
 
     @include_header.setter
     def include_header(self, value):
-        self.options.include_header = value
+        deref(self.options).include_header = value
 
     @property
     def batch_size(self):
@@ -967,21 +966,27 @@ cdef class WriteOptions(_Weakrefable):
         How many rows to process together when converting and writing
         CSV data.
         """
-        return self.options.batch_size
+        return deref(self.options).batch_size
 
     @batch_size.setter
     def batch_size(self, value):
-        self.options.batch_size = value
+        deref(self.options).batch_size = value
+
+    @staticmethod
+    cdef WriteOptions wrap(CCSVWriteOptions options):
+        out = WriteOptions()
+        out.options.reset(new CCSVWriteOptions(move(options)))
+        return out
 
     def validate(self):
-        check_status(self.options.Validate())
+        check_status(self.options.get().Validate())
 
 
 cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
     if write_options is None:
         out[0] = CCSVWriteOptions.Defaults()
     else:
-        out[0] = write_options.options
+        out[0] = deref(write_options.options)
 
 
 def write_csv(data, output_file, write_options=None,
@@ -1010,15 +1015,44 @@ def write_csv(data, output_file, write_options=None,
 
     get_writer(output_file, &stream)
     c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+    c_write_options.io_context = CIOContext(c_memory_pool)
     if isinstance(data, RecordBatch):
         batch = pyarrow_unwrap_batch(data).get()
         with nogil:
-            check_status(WriteCSV(deref(batch), c_write_options, c_memory_pool,
-                                  stream.get()))
+            check_status(WriteCSV(deref(batch), c_write_options, stream.get()))
     elif isinstance(data, Table):
         table = pyarrow_unwrap_table(data).get()
         with nogil:
-            check_status(WriteCSV(deref(table), c_write_options, c_memory_pool,
-                                  stream.get()))
+            check_status(WriteCSV(deref(table), c_write_options, stream.get()))
     else:
         raise TypeError(f"Expected Table or RecordBatch, got '{type(data)}'")
+
+
+cdef class CSVWriter(_CRecordBatchWriter):
+    """Writer to create a CSV file.
+
+    Parameters
+    ----------
+    sink: string, path, pyarrow.OutputStream or file-like object
+        The location where to write the CSV data.
+    schema: pyarrow.Schema
+        The schema of the data to be written.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+    """
+
+    def __init__(self, sink, Schema schema, *,
+                 WriteOptions write_options=None, MemoryPool memory_pool=None):
+        cdef:
+            shared_ptr[COutputStream] c_stream
+            shared_ptr[CSchema] c_schema = pyarrow_unwrap_schema(schema)
+            CCSVWriteOptions c_write_options
+            CMemoryPool* c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+        _get_write_options(write_options, &c_write_options)
+        c_write_options.io_context = CIOContext(c_memory_pool)
+        get_writer(sink, &c_stream)
+        with nogil:
+            self.writer = GetResultValue(MakeCSVWriter(
+                c_stream, c_schema, c_write_options))
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index e7e8341c9d4..9b20da06ca2 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -31,7 +31,8 @@ from pyarrow.lib cimport *
 from pyarrow.lib import ArrowTypeError, frombytes, tobytes
 from pyarrow.includes.libarrow_dataset cimport *
 from pyarrow._fs cimport FileSystem, FileInfo, FileSelector
-from pyarrow._csv cimport ConvertOptions, ParseOptions, ReadOptions
+from pyarrow._csv cimport (
+    ConvertOptions, ParseOptions, ReadOptions, WriteOptions)
 from pyarrow.util import _is_iterable, _is_path_like, _stringify_path
 
 from pyarrow._parquet cimport (
@@ -761,20 +762,21 @@ cdef class FileWriteOptions(_Weakrefable):
 
     cdef:
         shared_ptr[CFileWriteOptions] wrapped
-        CFileWriteOptions* options
+        CFileWriteOptions* c_options
 
     def __init__(self):
         _forbid_instantiation(self.__class__)
 
     cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
         self.wrapped = sp
-        self.options = sp.get()
+        self.c_options = sp.get()
 
     @staticmethod
     cdef wrap(const shared_ptr[CFileWriteOptions]& sp):
         type_name = frombytes(sp.get().type_name())
 
         classes = {
+            'csv': CsvFileWriteOptions,
             'ipc': IpcFileWriteOptions,
             'parquet': ParquetFileWriteOptions,
         }
@@ -789,7 +791,7 @@ cdef class FileWriteOptions(_Weakrefable):
 
     @property
     def format(self):
-        return FileFormat.wrap(self.options.format())
+        return FileFormat.wrap(self.c_options.format())
 
     cdef inline shared_ptr[CFileWriteOptions] unwrap(self):
         return self.wrapped
@@ -1752,8 +1754,11 @@ cdef class CsvFileFormat(FileFormat):
         FileFormat.init(self, sp)
         self.csv_format = <CCsvFileFormat*> sp.get()
 
-    def make_write_options(self):
-        raise NotImplemented("writing CSV datasets")
+    def make_write_options(self, **kwargs):
+        cdef CsvFileWriteOptions opts = \
+            <CsvFileWriteOptions> FileFormat.make_write_options(self)
+        opts.write_options = WriteOptions(**kwargs)
+        return opts
 
     @property
     def parse_options(self):
@@ -1832,6 +1837,28 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
                                         self.read_options)
 
 
+cdef class CsvFileWriteOptions(FileWriteOptions):
+    cdef:
+        CCsvFileWriteOptions* csv_options
+        object _properties
+
+    def __init__(self):
+        _forbid_instantiation(self.__class__)
+
+    @property
+    def write_options(self):
+        return WriteOptions.wrap(deref(self.csv_options.write_options))
+
+    @write_options.setter
+    def write_options(self, WriteOptions write_options not None):
+        self.csv_options.write_options.reset(
+            new CCSVWriteOptions(deref(write_options.options)))
+
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
+        FileWriteOptions.init(self, sp)
+        self.csv_options = <CCsvFileWriteOptions*> sp.get()
+
+
 cdef class Partitioning(_Weakrefable):
 
     cdef:
@@ -2018,8 +2045,8 @@ cdef class DirectoryPartitioning(Partitioning):
         if max_partition_dictionary_size in {-1, None}:
             infer_dictionary = True
         elif max_partition_dictionary_size != 0:
-            raise NotImplemented("max_partition_dictionary_size must be "
-                                 "0, -1, or None")
+            raise NotImplementedError("max_partition_dictionary_size must be "
+                                      "0, -1, or None")
 
         if infer_dictionary:
             c_options.infer_dictionary = True
@@ -2154,8 +2181,8 @@ cdef class HivePartitioning(Partitioning):
         if max_partition_dictionary_size in {-1, None}:
             infer_dictionary = True
         elif max_partition_dictionary_size != 0:
-            raise NotImplemented("max_partition_dictionary_size must be "
-                                 "0, -1, or None")
+            raise NotImplementedError("max_partition_dictionary_size must be "
+                                      "0, -1, or None")
 
         if infer_dictionary:
             c_options.infer_dictionary = True
diff --git a/python/pyarrow/csv.py b/python/pyarrow/csv.py
index fc1dcafba0b..e073252cbed 100644
--- a/python/pyarrow/csv.py
+++ b/python/pyarrow/csv.py
@@ -19,4 +19,4 @@
 from pyarrow._csv import (  # noqa
     ReadOptions, ParseOptions, ConvertOptions, ISO8601,
     open_csv, read_csv, CSVStreamingReader, write_csv,
-    WriteOptions)
+    WriteOptions, CSVWriter)
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 5057eadbb43..67a960fbacc 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1637,6 +1637,10 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
     cdef cppclass CCSVWriteOptions" arrow::csv::WriteOptions":
         c_bool include_header
         int32_t batch_size
+        CIOContext io_context
+
+        CCSVWriteOptions()
+        CCSVWriteOptions(CCSVWriteOptions&&)
 
         @staticmethod
         CCSVWriteOptions Defaults()
@@ -1658,10 +1662,12 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
             CIOContext, shared_ptr[CInputStream],
             CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions)
 
+    cdef CStatus WriteCSV(CTable&, CCSVWriteOptions& options, COutputStream*)
     cdef CStatus WriteCSV(
-        CTable&, CCSVWriteOptions& options, CMemoryPool*, COutputStream*)
-    cdef CStatus WriteCSV(
-        CRecordBatch&, CCSVWriteOptions& options, CMemoryPool*, COutputStream*)
+        CRecordBatch&, CCSVWriteOptions& options, COutputStream*)
+    cdef CResult[shared_ptr[CRecordBatchWriter]] MakeCSVWriter(
+        shared_ptr[COutputStream], shared_ptr[CSchema],
+        CCSVWriteOptions& options)
 
 
 cdef extern from "arrow/json/options.h" nogil:
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index f9349f3a642..303285905cd 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -311,6 +311,11 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
             CFileFormat):
         pass
 
+    cdef cppclass CCsvFileWriteOptions \
+            "arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions):
+        shared_ptr[CCSVWriteOptions] write_options
+        CMemoryPool* pool
+
     cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"(
             CFileFormat):
         CCSVParseOptions parse_options
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 5faffd3e9f8..b3f0dea3a43 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -40,7 +40,7 @@
 import pyarrow as pa
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
-    write_csv, WriteOptions)
+    write_csv, WriteOptions, CSVWriter)
 from pyarrow.tests import util
 
 
@@ -1619,6 +1619,25 @@ def test_write_read_round_trip():
         read_options = ReadOptions(column_names=t.column_names)
         assert t == read_csv(buf, read_options=read_options)
 
+    # Test with writer
+    for read_options, write_options in [
+            (None, WriteOptions(include_header=True)),
+            (ReadOptions(column_names=t.column_names),
+             WriteOptions(include_header=False)),
+    ]:
+        buf = io.BytesIO()
+        with CSVWriter(buf, t.schema, write_options=write_options) as writer:
+            writer.write_table(t)
+        buf.seek(0)
+        assert t == read_csv(buf, read_options=read_options)
+
+        buf = io.BytesIO()
+        with CSVWriter(buf, t.schema, write_options=write_options) as writer:
+            for batch in t.to_batches(max_chunksize=1):
+                writer.write_batch(batch)
+        buf.seek(0)
+        assert t == read_csv(buf, read_options=read_options)
+
 
 def test_read_csv_reference_cycle():
     # ARROW-13187
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 5e83657ebf2..2ac53864dcd 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -3289,6 +3289,32 @@ def test_write_dataset_parquet(tempdir):
         assert meta.format_version == version
 
 
+def test_write_dataset_csv(tempdir):
+    table = pa.table([
+        pa.array(range(20)), pa.array(np.random.randn(20)),
+        pa.array(np.repeat(['a', 'b'], 10))
+    ], names=["f1", "f2", "chr1"])
+
+    base_dir = tempdir / 'csv_dataset'
+    ds.write_dataset(table, base_dir, format="csv")
+    # check that all files are present
+    file_paths = list(base_dir.rglob("*"))
+    expected_paths = [base_dir / "part-0.csv"]
+    assert set(file_paths) == set(expected_paths)
+    # check Table roundtrip
+    result = ds.dataset(base_dir, format="csv").to_table()
+    assert result.equals(table)
+
+    # using custom options
+    format = ds.CsvFileFormat(read_options=pyarrow.csv.ReadOptions(
+        column_names=table.schema.names))
+    opts = format.make_write_options(include_header=False)
+    base_dir = tempdir / 'csv_dataset_noheader'
+    ds.write_dataset(table, base_dir, format=format, file_options=opts)
+    result = ds.dataset(base_dir, format=format).to_table()
+    assert result.equals(table)
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_write_dataset_arrow_schema_metadata(tempdir):
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 9257f5787b1..9c1e7dedad2 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -460,6 +460,10 @@ dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_forma
     invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
 }
 
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options){
+    invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
+}
+
 dataset___IpcFileFormat__Make <- function(){
     .Call(`_arrow_dataset___IpcFileFormat__Make`)
 }
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index 3259ff1077c..6aa7d33cc3a 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -298,10 +298,10 @@ ParquetFragmentScanOptions$create <- function(use_buffered_stream = FALSE,
 #' A `FileWriteOptions` holds write options specific to a `FileFormat`.
 FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
   public = list(
-    update = function(...) {
+    update = function(table, ...) {
       if (self$type == "parquet") {
         dataset___ParquetFileWriteOptions__update(self,
-            ParquetWriterProperties$create(...),
+            ParquetWriterProperties$create(table, ...),
             ParquetArrowWriterProperties$create(...))
       } else if (self$type == "ipc") {
         args <- list(...)
@@ -315,6 +315,9 @@ FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
               args$codec,
               get_ipc_metadata_version(args$metadata_version))
         }
+      } else if (self$type == "csv") {
+          dataset___CsvFileWriteOptions__update(self,
+              CsvWriteOptions$create(...))
       }
       invisible(self)
     }
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 90413e9b9ed..2cc7201b839 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -57,7 +57,7 @@
 #' @export
 write_dataset <- function(dataset,
                           path,
-                          format = c("parquet", "feather", "arrow", "ipc"),
+                          format = c("parquet", "feather", "arrow", "ipc", "csv"),
                           partitioning = dplyr::group_vars(dataset),
                           basename_template = paste0("part-{i}.", as.character(format)),
                           hive_style = TRUE,
diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd
index 225aab28dd2..f29a9ee2189 100644
--- a/r/man/write_dataset.Rd
+++ b/r/man/write_dataset.Rd
@@ -7,7 +7,7 @@
 write_dataset(
   dataset,
   path,
-  format = c("parquet", "feather", "arrow", "ipc"),
+  format = c("parquet", "feather", "arrow", "ipc", "csv"),
   partitioning = dplyr::group_vars(dataset),
   basename_template = paste0("part-{i}.", as.character(format)),
   hive_style = TRUE,
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 5f3febffcd3..b5bd751af02 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1805,6 +1805,23 @@ extern "C" SEXP _arrow_dataset___IpcFileWriteOptions__update1(SEXP ipc_options_s
 }
 #endif
 
+// dataset.cpp
+#if defined(ARROW_R_WITH_DATASET)
+void dataset___CsvFileWriteOptions__update(const std::shared_ptr<ds::CsvFileWriteOptions>& csv_options, const std::shared_ptr<arrow::csv::WriteOptions>& write_options);
+extern "C" SEXP _arrow_dataset___CsvFileWriteOptions__update(SEXP csv_options_sexp, SEXP write_options_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<ds::CsvFileWriteOptions>&>::type csv_options(csv_options_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::csv::WriteOptions>&>::type write_options(write_options_sexp);
+	dataset___CsvFileWriteOptions__update(csv_options, write_options);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_dataset___CsvFileWriteOptions__update(SEXP csv_options_sexp, SEXP write_options_sexp){
+	Rf_error("Cannot call dataset___CsvFileWriteOptions__update(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
 std::shared_ptr<ds::IpcFileFormat> dataset___IpcFileFormat__Make();
@@ -7038,6 +7055,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, 
 		{ "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, 
 		{ "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, 
+		{ "_arrow_dataset___CsvFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___CsvFileWriteOptions__update, 2}, 
 		{ "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, 
 		{ "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 3}, 
 		{ "_arrow_dataset___FragmentScanOptions__type_name", (DL_FUNC) &_arrow_dataset___FragmentScanOptions__type_name, 1}, 
diff --git a/r/src/csv.cpp b/r/src/csv.cpp
index 3e58f95c372..93d07d82ed4 100644
--- a/r/src/csv.cpp
+++ b/r/src/csv.cpp
@@ -31,6 +31,7 @@ std::shared_ptr<arrow::csv::WriteOptions> csv___WriteOptions__initialize(
       std::make_shared<arrow::csv::WriteOptions>(arrow::csv::WriteOptions::Defaults());
   res->include_header = cpp11::as_cpp<bool>(options["include_header"]);
   res->batch_size = cpp11::as_cpp<int>(options["batch_size"]);
+  res->io_context = arrow::io::IOContext(gc_memory_pool());
   return res;
 }
 
@@ -190,8 +191,7 @@ std::shared_ptr<arrow::TimestampParser> TimestampParser__MakeISO8601() {
 void csv___WriteCSV__Table(const std::shared_ptr<arrow::Table>& table,
                            const std::shared_ptr<arrow::csv::WriteOptions>& write_options,
                            const std::shared_ptr<arrow::io::OutputStream>& stream) {
-  StopIfNotOk(
-      arrow::csv::WriteCSV(*table, *write_options, gc_memory_pool(), stream.get()));
+  StopIfNotOk(arrow::csv::WriteCSV(*table, *write_options, stream.get()));
 }
 
 // [[arrow::export]]
@@ -199,8 +199,7 @@ void csv___WriteCSV__RecordBatch(
     const std::shared_ptr<arrow::RecordBatch>& record_batch,
     const std::shared_ptr<arrow::csv::WriteOptions>& write_options,
     const std::shared_ptr<arrow::io::OutputStream>& stream) {
-  StopIfNotOk(arrow::csv::WriteCSV(*record_batch, *write_options, gc_memory_pool(),
-                                   stream.get()));
+  StopIfNotOk(arrow::csv::WriteCSV(*record_batch, *write_options, stream.get()));
 }
 
 #endif
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 7bb1e639e05..b92e3ad4276 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -279,6 +279,13 @@ void dataset___IpcFileWriteOptions__update1(
   ipc_options->options->metadata_version = metadata_version;
 }
 
+// [[dataset::export]]
+void dataset___CsvFileWriteOptions__update(
+    const std::shared_ptr<ds::CsvFileWriteOptions>& csv_options,
+    const std::shared_ptr<arrow::csv::WriteOptions>& write_options) {
+  *csv_options->write_options = *write_options;
+}
+
 // [[dataset::export]]
 std::shared_ptr<ds::IpcFileFormat> dataset___IpcFileFormat__Make() {
   return std::make_shared<ds::IpcFileFormat>();
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index ad3e7c30f1f..a0b1bdae022 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -1580,6 +1580,28 @@ test_that("Writing a dataset: Parquet format options", {
   )
 })
 
+test_that("Writing a dataset: CSV format options", {
+  df <- tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+    chr = letters[1:10],
+  )
+
+  dst_dir <- make_temp_dir()
+  write_dataset(df, dst_dir, format = "csv")
+  expect_true(dir.exists(dst_dir))
+  new_ds <- open_dataset(dst_dir, format = "csv")
+  expect_equivalent(new_ds %>% collect(), df)
+
+  dst_dir <- make_temp_dir()
+  write_dataset(df, dst_dir, format = "csv", include_header = FALSE)
+  expect_true(dir.exists(dst_dir))
+  new_ds <- open_dataset(dst_dir, format = "csv",
+                         column_names = c("int", "dbl", "lgl", "chr"))
+  expect_equivalent(new_ds %>% collect(), df)
+})
+
 test_that("Dataset writing: unsupported features/input validation", {
   skip_if_not_available("parquet")
   expect_error(write_dataset(4), 'dataset must be a "Dataset"')

From 3a372d6e4af10298cf6219f9951e147ad45c3677 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 5 Jul 2021 12:16:03 -0400
Subject: [PATCH 505/719] ARROW-13258: [Python] Improve the repr of
 ParquetFileFragment

Closes #10654 from jorisvandenbossche/ARROW-13258

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/_dataset.pyx          | 17 ++++++++++++++
 python/pyarrow/tests/test_dataset.py | 34 ++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 9b20da06ca2..562b7a5a3ad 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1059,6 +1059,23 @@ cdef class FileFragment(Fragment):
         Fragment.init(self, sp)
         self.file_fragment = <CFileFragment*> sp.get()
 
+    def __repr__(self):
+        type_name = frombytes(self.fragment.type_name())
+        if type_name != "parquet":
+            typ = f" type={type_name}"
+        else:
+            # parquet has a subclass -> type embedded in class name
+            typ = ""
+        partition_dict = _get_partition_keys(self.partition_expression)
+        partition = ", ".join(
+            [f"{key}={val}" for key, val in partition_dict.items()]
+        )
+        if partition:
+            partition = f" partition=[{partition}]"
+        return "<pyarrow.dataset.{0}{1} path={2}{3}>".format(
+            self.__class__.__name__, typ, self.path, partition
+        )
+
     def __reduce__(self):
         buffer = self.buffer
         return self.format.make_fragment, (
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 2ac53864dcd..3c79d1281cd 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -27,6 +27,7 @@
 
 import pyarrow as pa
 import pyarrow.csv
+import pyarrow.feather
 import pyarrow.fs as fs
 from pyarrow.tests.util import change_cwd, _filesystem_uri, FSProtocolClass
 
@@ -1365,6 +1366,39 @@ def test_fragments_parquet_subset_invalid(tempdir):
         fragment.subset()
 
 
+@pytest.mark.pandas
+@pytest.mark.parquet
+def test_fragments_repr(tempdir, dataset):
+    # partitioned parquet dataset
+    fragment = list(dataset.get_fragments())[0]
+    assert (
+        repr(fragment) ==
+        "<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet "
+        "partition=[key=xxx, group=1]>"
+    )
+
+    # single-file parquet dataset (no partition information in repr)
+    table, path = _create_single_file(tempdir)
+    dataset = ds.dataset(path, format="parquet")
+    fragment = list(dataset.get_fragments())[0]
+    assert (
+        repr(fragment) ==
+        "<pyarrow.dataset.ParquetFileFragment path={}>".format(
+            dataset.filesystem.normalize_path(str(path)))
+    )
+
+    # non-parquet format
+    path = tempdir / "data.feather"
+    pa.feather.write_feather(table, path)
+    dataset = ds.dataset(path, format="feather")
+    fragment = list(dataset.get_fragments())[0]
+    assert (
+        repr(fragment) ==
+        "<pyarrow.dataset.FileFragment type=ipc path={}>".format(
+            dataset.filesystem.normalize_path(str(path)))
+    )
+
+
 def test_partitioning_factory(mockfs):
     paths_or_selector = fs.FileSelector('subdir', recursive=True)
     format = ds.ParquetFileFormat()

From 835de65411caf95432736a4563d8cd4777bf9e27 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 5 Jul 2021 18:25:23 +0200
Subject: [PATCH 506/719] ARROW-13158: [Python] Fix StructScalar contains and
 repr with duplicate field names

This PR does several small things:

- Fix `__contains__` to not check the actual keys (and not rely on `__getitem__` since that also supports integers)
- Add tests for the case of StructScalar with duplicate field names, and ensure some behaviours that already worked (`__getitem__` with integers, `__iter__`, `keys()`)
- Changed `list(scalar)` (`__iter__`) to also return the keys for a "null" scalar (since we do allow `s["key"]` in that case, that seems most consistent?)
- Changed the `repr` to use a tuple instead of dict representation. I know this just postpones a bit the `as_py()` discussion (should it return dict vs tuple?), but at least this ensures that the `repr` doesn't fail on the short term, and so you can inspect the object.
- I added an explicit `items()` method (overriding the ones from Mapping), because the return value of this can in theory support duplicate fields, and this can be a way to get a list of tuples already. But, this doesn't fully follows the Mapping API as the return value is different (not a `dict_items` object).

Closes #10591 from jorisvandenbossche/ARROW-13158

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/scalar.pxi                    | 44 +++++++++++++------
 python/pyarrow/tests/test_convert_builtin.py |  2 +-
 python/pyarrow/tests/test_scalars.py         | 46 +++++++++++++++++++-
 3 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 9b250e499ea..5c0d3ca1157 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -614,17 +614,14 @@ cdef class StructScalar(Scalar, collections.abc.Mapping):
             CStructType* dtype = <CStructType*> sp.type.get()
             vector[shared_ptr[CField]] fields = dtype.fields()
 
-        if sp.is_valid:
-            for i in range(dtype.num_fields()):
-                yield frombytes(fields[i].get().name())
+        for i in range(dtype.num_fields()):
+            yield frombytes(fields[i].get().name())
+
+    def items(self):
+        return ((key, self[i]) for i, key in enumerate(self))
 
     def __contains__(self, key):
-        try:
-            self[key]
-        except (KeyError, IndexError):
-            return False
-        else:
-            return True
+        return key in list(self)
 
     def __getitem__(self, key):
         """
@@ -652,21 +649,42 @@ cdef class StructScalar(Scalar, collections.abc.Mapping):
 
         try:
             return Scalar.wrap(GetResultValue(sp.field(ref)))
-        except ArrowInvalid:
+        except ArrowInvalid as exc:
             if isinstance(key, int):
-                raise IndexError(key)
+                raise IndexError(key) from exc
             else:
-                raise KeyError(key)
+                raise KeyError(key) from exc
 
     def as_py(self):
         """
         Return this value as a Python dict.
         """
         if self.is_valid:
-            return {k: v.as_py() for k, v in self.items()}
+            try:
+                return {k: self[k].as_py() for k in self.keys()}
+            except KeyError:
+                raise ValueError(
+                    "Converting to Python dictionary is not supported when "
+                    "duplicate field names are present")
         else:
             return None
 
+    def _as_py_tuple(self):
+        # a version that returns a tuple instead of dict to support repr/str
+        # with the presence of duplicate field names
+        if self.is_valid:
+            return [(key, self[i].as_py()) for i, key in enumerate(self)]
+        else:
+            return None
+
+    def __repr__(self):
+        return '<pyarrow.{}: {!r}>'.format(
+            self.__class__.__name__, self._as_py_tuple()
+        )
+
+    def __str__(self):
+        return str(self._as_py_tuple())
+
 
 cdef class MapScalar(ListScalar):
     """
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 1a500b8523f..8c4a909e1aa 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -1726,7 +1726,7 @@ def test_struct_from_list_of_pairs():
         [('a', 6), ('a', 'bar'), ('b', False)],
     ]
     arr = pa.array(data, type=ty)
-    with pytest.raises(KeyError):
+    with pytest.raises(ValueError):
         # TODO(kszucs): ARROW-9997
         arr.to_pylist()
 
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 176f1c12053..d20226135d7 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -500,6 +500,7 @@ def test_struct():
     assert 'x' in s
     assert 'y' in s
     assert 'z' not in s
+    assert 0 not in s
 
     assert s.as_py() == v
     assert repr(s) != repr(v)
@@ -514,7 +515,7 @@ def test_struct():
         s['non-existent']
 
     s = pa.scalar(None, type=ty)
-    assert list(s) == []
+    assert list(s) == list(s.keys()) == ['x', 'y']
     assert s.as_py() is None
     assert 'x' in s
     assert 'y' in s
@@ -526,6 +527,49 @@ def test_struct():
     assert s['y'].as_py() is None
 
 
+def test_struct_duplicate_fields():
+    ty = pa.struct([
+        pa.field('x', pa.int16()),
+        pa.field('y', pa.float32()),
+        pa.field('x', pa.int64()),
+    ])
+    s = pa.scalar([('x', 1), ('y', 2.0), ('x', 3)], type=ty)
+
+    assert list(s) == list(s.keys()) == ['x', 'y', 'x']
+    assert len(s) == 3
+    assert s == s
+    assert list(s.items()) == [
+        ('x', pa.scalar(1, pa.int16())),
+        ('y', pa.scalar(2.0, pa.float32())),
+        ('x', pa.scalar(3, pa.int64()))
+    ]
+
+    assert 'x' in s
+    assert 'y' in s
+    assert 'z' not in s
+    assert 0 not in s
+
+    # getitem with field names fails for duplicate fields, works for others
+    with pytest.raises(KeyError):
+        s['x']
+
+    assert isinstance(s['y'], pa.FloatScalar)
+    assert s['y'].as_py() == 2.0
+
+    # getitem with integer index works for all fields
+    assert isinstance(s[0], pa.Int16Scalar)
+    assert s[0].as_py() == 1
+    assert isinstance(s[1], pa.FloatScalar)
+    assert s[1].as_py() == 2.0
+    assert isinstance(s[2], pa.Int64Scalar)
+    assert s[2].as_py() == 3
+
+    assert "pyarrow.StructScalar" in repr(s)
+
+    with pytest.raises(ValueError, match="duplicate field names"):
+        s.as_py()
+
+
 def test_map():
     ty = pa.map_(pa.string(), pa.int8())
     v = [('a', 1), ('b', 2)]

From 905809cbfb780dc1a1be17657334937ae59b446e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 5 Jul 2021 20:16:13 +0200
Subject: [PATCH 507/719] ARROW-13244: [C++] Add facility to get current thread
 id as uint64

Followup to https://github.com/apache/arrow/pull/10632

Closes #10644 from pitrou/ARROW-13244-current-tid

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/CMakeLists.txt         |  1 +
 cpp/src/arrow/io/stdio.cc            | 95 ++++++++++++++++++++++++++++
 cpp/src/arrow/io/stdio.h             | 82 ++++++++++++++++++++++++
 cpp/src/arrow/ipc/file_to_stream.cc  |  3 +-
 cpp/src/arrow/ipc/stream_to_file.cc  |  3 +-
 cpp/src/arrow/util/async_generator.h | 37 +++--------
 cpp/src/arrow/util/io_util.cc        | 87 +++++--------------------
 cpp/src/arrow/util/io_util.h         | 68 ++------------------
 8 files changed, 214 insertions(+), 162 deletions(-)
 create mode 100644 cpp/src/arrow/io/stdio.cc
 create mode 100644 cpp/src/arrow/io/stdio.h

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 56e734226b0..634d202623f 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -188,6 +188,7 @@ set(ARROW_SRCS
     io/interfaces.cc
     io/memory.cc
     io/slow.cc
+    io/stdio.cc
     io/transform.cc
     util/basic_decimal.cc
     util/bit_block_counter.cc
diff --git a/cpp/src/arrow/io/stdio.cc b/cpp/src/arrow/io/stdio.cc
new file mode 100644
index 00000000000..7ef4843a224
--- /dev/null
+++ b/cpp/src/arrow/io/stdio.cc
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/io/stdio.h"
+
+#include <iostream>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+
+namespace arrow {
+namespace io {
+
+//
+// StdoutStream implementation
+//
+
+StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StdoutStream::Close() { return Status::OK(); }
+
+bool StdoutStream::closed() const { return false; }
+
+Result<int64_t> StdoutStream::Tell() const { return pos_; }
+
+Status StdoutStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StderrStream implementation
+//
+
+StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StderrStream::Close() { return Status::OK(); }
+
+bool StderrStream::closed() const { return false; }
+
+Result<int64_t> StderrStream::Tell() const { return pos_; }
+
+Status StderrStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StdinStream implementation
+//
+
+StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+
+Status StdinStream::Close() { return Status::OK(); }
+
+bool StdinStream::closed() const { return false; }
+
+Result<int64_t> StdinStream::Tell() const { return pos_; }
+
+Result<int64_t> StdinStream::Read(int64_t nbytes, void* out) {
+  std::cin.read(reinterpret_cast<char*>(out), nbytes);
+  if (std::cin) {
+    pos_ += nbytes;
+    return nbytes;
+  } else {
+    return 0;
+  }
+}
+
+Result<std::shared_ptr<Buffer>> StdinStream::Read(int64_t nbytes) {
+  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes));
+  ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
+  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+  buffer->ZeroPadding();
+  return std::move(buffer);
+}
+
+}  // namespace io
+}  // namespace arrow
diff --git a/cpp/src/arrow/io/stdio.h b/cpp/src/arrow/io/stdio.h
new file mode 100644
index 00000000000..9484ac77124
--- /dev/null
+++ b/cpp/src/arrow/io/stdio.h
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/io/interfaces.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+// Output stream that just writes to stdout.
+class ARROW_EXPORT StdoutStream : public OutputStream {
+ public:
+  StdoutStream();
+  ~StdoutStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Status Write(const void* data, int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+// Output stream that just writes to stderr.
+class ARROW_EXPORT StderrStream : public OutputStream {
+ public:
+  StderrStream();
+  ~StderrStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Status Write(const void* data, int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+// Input stream that just reads from stdin.
+class ARROW_EXPORT StdinStream : public InputStream {
+ public:
+  StdinStream();
+  ~StdinStream() override {}
+
+  Status Close() override;
+  bool closed() const override;
+
+  Result<int64_t> Tell() const override;
+
+  Result<int64_t> Read(int64_t nbytes, void* out) override;
+
+  Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
+
+ private:
+  int64_t pos_;
+};
+
+}  // namespace io
+}  // namespace arrow
diff --git a/cpp/src/arrow/ipc/file_to_stream.cc b/cpp/src/arrow/ipc/file_to_stream.cc
index c15eb6de21f..6ae6a4fa0c8 100644
--- a/cpp/src/arrow/ipc/file_to_stream.cc
+++ b/cpp/src/arrow/ipc/file_to_stream.cc
@@ -20,13 +20,12 @@
 #include <string>
 
 #include "arrow/io/file.h"
+#include "arrow/io/stdio.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 
-#include "arrow/util/io_util.h"
-
 namespace arrow {
 
 class RecordBatch;
diff --git a/cpp/src/arrow/ipc/stream_to_file.cc b/cpp/src/arrow/ipc/stream_to_file.cc
index 3a2a7fb49fe..40288b687cf 100644
--- a/cpp/src/arrow/ipc/stream_to_file.cc
+++ b/cpp/src/arrow/ipc/stream_to_file.cc
@@ -19,13 +19,12 @@
 #include <memory>
 #include <string>
 
+#include "arrow/io/stdio.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 
-#include "arrow/util/io_util.h"
-
 namespace arrow {
 namespace ipc {
 
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 8c130c66193..c99bd865e45 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -21,11 +21,12 @@
 #include <cassert>
 #include <cstring>
 #include <deque>
+#include <limits>
 #include <queue>
-#include <thread>
 
 #include "arrow/util/functional.h"
 #include "arrow/util/future.h"
+#include "arrow/util/io_util.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/mutex.h"
 #include "arrow/util/optional.h"
@@ -1247,6 +1248,8 @@ class BackgroundGenerator {
   }
 
  protected:
+  static constexpr uint64_t kUnlikelyThreadId{std::numeric_limits<uint64_t>::max()};
+
   struct State {
     State(internal::Executor* io_executor, Iterator<T> it, int max_q, int q_restart)
         : io_executor(io_executor),
@@ -1255,9 +1258,7 @@ class BackgroundGenerator {
           it(std::move(it)),
           reading(false),
           finished(false),
-          should_shutdown(false) {
-      SetWorkerThreadId({});  // default-initialized thread id
-    }
+          should_shutdown(false) {}
 
     void ClearQueue() {
       while (!queue.empty()) {
@@ -1316,28 +1317,11 @@ class BackgroundGenerator {
       return next;
     }
 
-    void SetWorkerThreadId(const std::thread::id tid) {
-      uint64_t equiv{0};
-      // std::thread::id is trivially copyable as per C++ spec,
-      // so type punning as a uint64_t should work
-      static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
-                    "std::thread::id can't fit into uint64_t");
-      memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
-      worker_thread_id.store(equiv);
-    }
-
-    std::thread::id GetWorkerThreadId() {
-      const auto equiv = worker_thread_id.load();
-      std::thread::id tid;
-      memcpy(reinterpret_cast<void*>(&tid), &equiv, sizeof(tid));
-      return tid;
-    }
-
     internal::Executor* io_executor;
     const int max_q;
     const int q_restart;
     Iterator<T> it;
-    std::atomic<uint64_t> worker_thread_id;
+    std::atomic<uint64_t> worker_thread_id{kUnlikelyThreadId};
 
     // If true, the task is actively pumping items from the queue and does not need a
     // restart
@@ -1346,8 +1330,7 @@ class BackgroundGenerator {
     bool finished;
     // Signal to the background task to end early because consumers have given up on it
     bool should_shutdown;
-    // If the queue is empty then the consumer will create a waiting future and wait for
-    // it
+    // If the queue is empty, the consumer will create a waiting future and wait for it
     std::queue<Result<T>> queue;
     util::optional<Future<T>> waiting_future;
     // Every background task is given a future to complete when it is entirely finished
@@ -1365,7 +1348,7 @@ class BackgroundGenerator {
       ///
       /// It's a deadlock if we enter cleanup from
       /// the worker thread but it can happen if the consumer doesn't transfer away
-      assert(state->GetWorkerThreadId() != std::this_thread::get_id());
+      assert(state->worker_thread_id.load() != ::arrow::internal::GetThreadId());
       Future<> finish_fut;
       {
         auto lock = state->mutex.Lock();
@@ -1384,9 +1367,9 @@ class BackgroundGenerator {
   };
 
   static void WorkerTask(std::shared_ptr<State> state) {
+    state->worker_thread_id.store(::arrow::internal::GetThreadId());
     // We need to capture the state to read while outside the mutex
     bool reading = true;
-    state->SetWorkerThreadId(std::this_thread::get_id());
     while (reading) {
       auto next = state->it.Next();
       // Need to capture state->waiting_future inside the mutex to mark finished outside
@@ -1438,7 +1421,7 @@ class BackgroundGenerator {
       // reference it.  We can safely transition to idle now.
       task_finished = state->task_finished;
       state->task_finished = Future<>();
-      state->SetWorkerThreadId({});  // default-initialized thread id
+      state->worker_thread_id.store(kUnlikelyThreadId);
     }
     task_finished.MarkFinished();
   }
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 552417e5a13..f6566ea7e36 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -41,6 +41,7 @@
 #include <random>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <utility>
 #include <vector>
 
@@ -99,76 +100,6 @@ namespace arrow {
 
 using internal::checked_cast;
 
-namespace io {
-
-//
-// StdoutStream implementation
-//
-
-StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
-
-Status StdoutStream::Close() { return Status::OK(); }
-
-bool StdoutStream::closed() const { return false; }
-
-Result<int64_t> StdoutStream::Tell() const { return pos_; }
-
-Status StdoutStream::Write(const void* data, int64_t nbytes) {
-  pos_ += nbytes;
-  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
-  return Status::OK();
-}
-
-//
-// StderrStream implementation
-//
-
-StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
-
-Status StderrStream::Close() { return Status::OK(); }
-
-bool StderrStream::closed() const { return false; }
-
-Result<int64_t> StderrStream::Tell() const { return pos_; }
-
-Status StderrStream::Write(const void* data, int64_t nbytes) {
-  pos_ += nbytes;
-  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
-  return Status::OK();
-}
-
-//
-// StdinStream implementation
-//
-
-StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
-
-Status StdinStream::Close() { return Status::OK(); }
-
-bool StdinStream::closed() const { return false; }
-
-Result<int64_t> StdinStream::Tell() const { return pos_; }
-
-Result<int64_t> StdinStream::Read(int64_t nbytes, void* out) {
-  std::cin.read(reinterpret_cast<char*>(out), nbytes);
-  if (std::cin) {
-    pos_ += nbytes;
-    return nbytes;
-  } else {
-    return 0;
-  }
-}
-
-Result<std::shared_ptr<Buffer>> StdinStream::Read(int64_t nbytes) {
-  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(nbytes));
-  ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data()));
-  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
-  buffer->ZeroPadding();
-  return std::move(buffer);
-}
-
-}  // namespace io
-
 namespace internal {
 
 namespace {
@@ -1734,5 +1665,21 @@ int64_t GetRandomSeed() {
   return static_cast<int64_t>(seed_gen());
 }
 
+uint64_t GetThreadId() {
+  uint64_t equiv{0};
+  // std::thread::id is trivially copyable as per C++ spec,
+  // so type punning as a uint64_t should work
+  static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
+                "std::thread::id can't fit into uint64_t");
+  const auto tid = std::this_thread::get_id();
+  memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
+  return equiv;
+}
+
+uint64_t GetOptionalThreadId() {
+  auto tid = GetThreadId();
+  return (tid == 0) ? tid - 1 : tid;
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 38bcdd4b41f..4255dd37105 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -30,73 +30,12 @@
 #include <signal.h>  // Needed for struct sigaction
 #endif
 
-#include "arrow/io/interfaces.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/windows_fixup.h"
 
 namespace arrow {
-
-class Buffer;
-
-namespace io {
-
-// Output stream that just writes to stdout.
-class ARROW_EXPORT StdoutStream : public OutputStream {
- public:
-  StdoutStream();
-  ~StdoutStream() override {}
-
-  Status Close() override;
-  bool closed() const override;
-
-  Result<int64_t> Tell() const override;
-
-  Status Write(const void* data, int64_t nbytes) override;
-
- private:
-  int64_t pos_;
-};
-
-// Output stream that just writes to stderr.
-class ARROW_EXPORT StderrStream : public OutputStream {
- public:
-  StderrStream();
-  ~StderrStream() override {}
-
-  Status Close() override;
-  bool closed() const override;
-
-  Result<int64_t> Tell() const override;
-
-  Status Write(const void* data, int64_t nbytes) override;
-
- private:
-  int64_t pos_;
-};
-
-// Input stream that just reads from stdin.
-class ARROW_EXPORT StdinStream : public InputStream {
- public:
-  StdinStream();
-  ~StdinStream() override {}
-
-  Status Close() override;
-  bool closed() const override;
-
-  Result<int64_t> Tell() const override;
-
-  Result<int64_t> Read(int64_t nbytes, void* out) override;
-
-  Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
-
- private:
-  int64_t pos_;
-};
-
-}  // namespace io
-
 namespace internal {
 
 // NOTE: 8-bit path strings on Windows are encoded using UTF-8.
@@ -399,5 +338,12 @@ Status SendSignalToThread(int signum, uint64_t thread_id);
 ARROW_EXPORT
 int64_t GetRandomSeed();
 
+/// \brief Get the current thread id
+///
+/// In addition to having the same properties as std::thread, the returned value
+/// is a regular integer value, which is more convenient than an opaque type.
+ARROW_EXPORT
+uint64_t GetThreadId();
+
 }  // namespace internal
 }  // namespace arrow

From 41c4143992905cc85eb61a417cf9460c6db6b4df Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 5 Jul 2021 14:58:49 -0500
Subject: [PATCH 508/719] ARROW-13199: [R] add ubuntu 21.04 to nightly builds

Closes #10611 from jonkeane/ARROW-13199-r-ubuntu-21.04

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .env                                    |   1 +
 ci/docker/linux-apt-r.dockerfile        |  10 ++
 ci/docker/ubuntu-21.04-cpp.dockerfile   | 160 ++++++++++++++++++++++++
 dev/tasks/docker-tests/github.linux.yml |   8 +-
 dev/tasks/tasks.yml                     |  22 ++++
 docker-compose.yml                      |  10 ++
 r/configure                             |   7 +-
 r/tools/nixlibs.R                       |   5 +
 8 files changed, 219 insertions(+), 4 deletions(-)
 create mode 100644 ci/docker/ubuntu-21.04-cpp.dockerfile

diff --git a/.env b/.env
index e06c6e57a3e..579895e2d61 100644
--- a/.env
+++ b/.env
@@ -61,6 +61,7 @@ SPARK=master
 DOTNET=3.1
 R=4.1
 ARROW_R_DEV=TRUE
+GCC_VERSION=""
 # These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest
 R_ORG=rhub
 R_IMAGE=ubuntu-gcc-release
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
index 7b78da9691a..36f4fb24aba 100644
--- a/ci/docker/linux-apt-r.dockerfile
+++ b/ci/docker/linux-apt-r.dockerfile
@@ -61,6 +61,16 @@ RUN apt-get update -y && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+ARG gcc_version=""
+RUN if [ "${gcc_version}" != "" ]; then \
+      update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30 && \
+      update-alternatives --set cc /usr/bin/gcc && \
+      update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30 && \
+      update-alternatives --set c++ /usr/bin/g++; \
+    fi
+
 # Ensure parallel R package installation, set CRAN repo mirror,
 # and use pre-built binaries where possible
 COPY ci/etc/rprofile /arrow/ci/etc/
diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile
new file mode 100644
index 00000000000..18c377811bc
--- /dev/null
+++ b/ci/docker/ubuntu-21.04-cpp.dockerfile
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.04
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          apt-transport-https \
+          ca-certificates \
+          gnupg \
+          wget && \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+      echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${llvm} main" > \
+         /etc/apt/sources.list.d/llvm.list && \
+      if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+        echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${clang_tools} main" > \
+           /etc/apt/sources.list.d/clang-tools.list; \
+      fi \
+    fi && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        clang-${clang_tools} \
+        clang-${llvm} \
+        clang-format-${clang_tools} \
+        clang-tidy-${clang_tools} \
+        llvm-${llvm}-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        autoconf \
+        ca-certificates \
+        ccache \
+        cmake \
+        gdb \
+        git \
+        libbenchmark-dev \
+        libboost-filesystem-dev \
+        libboost-system-dev \
+        libbrotli-dev \
+        libbz2-dev \
+        libc-ares-dev \
+        libcurl4-openssl-dev \
+        libgflags-dev \
+        libgoogle-glog-dev \
+        libgrpc++-dev \
+        liblz4-dev \
+        libprotobuf-dev \
+        libprotoc-dev \
+        libre2-dev \
+        libsnappy-dev \
+        libssl-dev \
+        libthrift-dev \
+        libutf8proc-dev \
+        libzstd-dev \
+        make \
+        ninja-build \
+        pkg-config \
+        protobuf-compiler \
+        protobuf-compiler-grpc \
+        rapidjson-dev \
+        tzdata \
+        wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh \
+     /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=OFF \
+    ARROW_GANDIVA=ON \
+    ARROW_HDFS=ON \
+    ARROW_HOME=/usr/local \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_NO_DEPRECATED_API=ON \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_PLASMA=ON \
+    ARROW_S3=ON \
+    ARROW_USE_ASAN=OFF \
+    ARROW_USE_CCACHE=ON \
+    ARROW_USE_UBSAN=OFF \
+    ARROW_WITH_BROTLI=ON \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_LZ4=ON \
+    ARROW_WITH_SNAPPY=ON \
+    ARROW_WITH_ZLIB=ON \
+    ARROW_WITH_ZSTD=ON \
+    AWSSDK_SOURCE=BUNDLED \
+    GTest_SOURCE=BUNDLED \
+    ORC_SOURCE=BUNDLED \
+    PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
+    PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3
+
+ARG gcc_version=""
+RUN if [ "${gcc_version}" = "" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          g++ \
+          gcc; \
+    else \
+      if [ "${gcc_version}" -gt "10" ]; then \
+          apt-get update -y -q && \
+          apt-get install -y -q --no-install-recommends software-properties-common && \
+          add-apt-repository ppa:ubuntu-toolchain-r/volatile; \
+      fi; \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          g++-${gcc_version} \
+          gcc-${gcc_version} && \
+      update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
+      update-alternatives --set cc /usr/bin/gcc && \
+      update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \
+      update-alternatives --set c++ /usr/bin/g++; \
+    fi
\ No newline at end of file
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 06837a88ded..dd347b8306c 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -39,7 +39,13 @@ jobs:
 
       - name: Execute Docker Build
         shell: bash
-        run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ flags|default("") }} {{ image }}
+        run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" {{ flags|default("") }} {{ image }} {{ command|default("") }}
+
+    {% if '-r-' in image %}
+      - name: Dump R install logs
+        run: cat arrow/r/check/arrow.Rcheck/00install.out
+        continue-on-error: true
+    {% endif %}
 
     {% if arrow.branch == 'master' %}
       {{ macros.github_login_dockerhub()|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index e908a632845..7aea18fdd83 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -934,6 +934,28 @@ tasks:
       r_tag: {{ r_tag }}
 {% endfor %}
 
+  test-r-ubuntu-21.04:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: 21.04
+        CLANG_TOOLS: 9 # can remove this when >=9 is the default
+      flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE'
+      image: ubuntu-r
+
+  test-r-gcc-11:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: 21.04
+        CLANG_TOOLS: 9 # can remove this when >=9 is the default
+        GCC_VERSION: 11
+      # S3 support is not buildable with gcc11 right now
+      flags: '-e ARROW_SOURCE_HOME="/arrow" -e FORCE_BUNDLED_BUILD=TRUE -e LIBARROW_BUILD=TRUE -e ARROW_S3=OFF'
+      image: ubuntu-r-only-r
+
   test-r-rstudio-r-base-3.6-centos7-devtoolset-8:
     ci: azure
     template: r/azure.linux.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index f65609af7e1..6b435e8da5d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -110,6 +110,7 @@ x-hierarchy:
       - ubuntu-docs
     - ubuntu-python-sdist-test
     - ubuntu-r
+    - ubuntu-r-only-r
   - ubuntu-cuda-cpp:
     - ubuntu-cuda-python
   - ubuntu-csharp
@@ -300,6 +301,7 @@ services:
         base: "${ARCH}/ubuntu:${UBUNTU}"
         clang_tools: ${CLANG_TOOLS}
         llvm: ${LLVM}
+        gcc_version: ${GCC_VERSION}
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
@@ -1008,12 +1010,14 @@ services:
         arch: ${ARCH}
         r: ${R}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+        gcc_version: ${GCC_VERSION}
     shm_size: *shm-size
     environment:
       <<: *ccache
       ARROW_R_CXXFLAGS: '-Werror'
       LIBARROW_BUILD: 'false'
       NOT_CRAN: 'true'
+      ARROW_R_DEV: ${ARROW_R_DEV}
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
@@ -1021,6 +1025,12 @@ services:
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         /arrow/ci/scripts/r_test.sh /arrow"
 
+  ubuntu-r-only-r:
+    extends: ubuntu-r
+    command: >
+      /bin/bash -c "
+        /arrow/ci/scripts/r_test.sh /arrow"
+
   r:
     # This lets you test building/installing the arrow R package
     # (including building the C++ library) on any Docker image that contains R
diff --git a/r/configure b/r/configure
index aa7e7a8d01b..22a34bc519f 100755
--- a/r/configure
+++ b/r/configure
@@ -69,7 +69,7 @@ if [ "$FORCE_AUTOBREW" = "true" ] || [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
 fi
 
 # Note that cflags may be empty in case of success
-if [ "$ARROW_HOME" ]; then
+if [ "$ARROW_HOME" ] && [ "$FORCE_BUNDLED_BUILD" != "true" ]; then
   echo "*** Using ARROW_HOME as the source of libarrow"
   PKG_CFLAGS="-I$ARROW_HOME/include $PKG_CFLAGS"
   PKG_DIRS="-L$ARROW_HOME/lib"
@@ -88,7 +88,8 @@ else
   fi
 
   if [ "$PKGCONFIG_CFLAGS" ] && [ "$PKGCONFIG_LIBS" ]; then
-    echo "*** Arrow C++ libraries found via pkg-config"
+    FOUND_LIB_DIR=`echo $PKG_DIRS | sed -e 's/^-L//'`
+    echo "*** Arrow C++ libraries found via pkg-config at $FOUND_LIB_DIR"
     PKG_CFLAGS="$PKGCONFIG_CFLAGS"
     PKG_LIBS=${PKGCONFIG_LIBS}
     PKG_DIRS=${PKGCONFIG_DIRS}
@@ -244,7 +245,7 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then
     fi
   fi
   # prepend PKG_DIRS and append BUNDLED_LIBS to PKG_LIBS
-  PKG_LIBS="$PKG_DIRS $PKG_LIBS $BUNDLED_LIBS"
+  PKG_LIBS="$PKG_DIRS $PKG_LIBS $BUNDLED_LIBS -fno-lto"
   echo "PKG_CFLAGS=$PKG_CFLAGS"
   echo "PKG_LIBS=$PKG_LIBS"
 else
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 97cc69fa62d..f638b7b8a9b 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -326,6 +326,7 @@ build_libarrow <- function(src_dir, dst_dir) {
     CC = R_CMD_config("CC"),
     CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")),
     # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols
+    ARROW_R_CXXFLAGS = paste(Sys.getenv("ARROW_R_CXXFLAGS", ""), "-fno-lto"),
     LDFLAGS = R_CMD_config("LDFLAGS")
   )
   env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = " ")
@@ -415,6 +416,10 @@ cmake_version <- function(cmd = "cmake") {
 
 with_s3_support <- function(env_vars) {
   arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
+  # but if ARROW_S3=OFF explicitly, we are definitely off, so override
+  if (toupper(Sys.getenv("ARROW_S3")) == "OFF" ) {
+    arrow_s3 <- FALSE
+  }
   if (arrow_s3) {
     # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9
     # and make sure that we have curl and openssl system libs

From d9092ec7e11c2a626f9086fedead475846b52356 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 6 Jul 2021 09:09:16 +0800
Subject: [PATCH 509/719] ARROW-13173: [C++] TestAsyncUtil.ReadaheadFailed
 asserts occasionally

As @cyb70289 pointed out the test was dependent on timing and when running on a slow CI machine it could lead to failure.  I changed the test to use condition variables instead of sleeps so that it should be fully deterministic now.

Closes #10602 from westonpace/bugfix/ARROW-13173--c-testasyncutil-readaheadfailed-asserts-occasi

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/util/async_generator.h       | 52 ++++++++++++----------
 cpp/src/arrow/util/async_generator_test.cc | 43 ++++++++----------
 cpp/src/arrow/util/iterator.h              |  1 -
 3 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index c99bd865e45..5a6321fd418 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -697,30 +697,38 @@ class ReadaheadGenerator {
   ReadaheadGenerator(AsyncGenerator<T> source_generator, int max_readahead)
       : state_(std::make_shared<State>(std::move(source_generator), max_readahead)) {}
 
-  Future<T> operator()() {
-    // Copy so we can capture into lambdas
+  Future<T> AddMarkFinishedContinuation(Future<T> fut) {
     auto state = state_;
-    if (state->readahead_queue.empty()) {
+    return fut.Then(
+        [state](const T& result) -> Result<T> {
+          state->MarkFinishedIfDone(result);
+          return result;
+        },
+        [state](const Status& err) -> Result<T> {
+          state->finished.store(true);
+          return err;
+        });
+  }
+
+  Future<T> operator()() {
+    if (state_->readahead_queue.empty()) {
       // This is the first request, let's pump the underlying queue
-      for (int i = 0; i < state->max_readahead; i++) {
-        auto next = state->source_generator();
-        auto state = state_;
-        next.AddCallback(
-            [state](const Result<T>& result) { state->MarkFinishedIfDone(result); });
-        state->readahead_queue.push(std::move(next));
+      for (int i = 0; i < state_->max_readahead; i++) {
+        auto next = state_->source_generator();
+        auto next_after_check = AddMarkFinishedContinuation(std::move(next));
+        state_->readahead_queue.push(std::move(next_after_check));
       }
     }
     // Pop one and add one
-    auto result = state->readahead_queue.front();
-    state->readahead_queue.pop();
-    if (state->finished.load()) {
-      state->readahead_queue.push(AsyncGeneratorEnd<T>());
+    auto result = state_->readahead_queue.front();
+    state_->readahead_queue.pop();
+    if (state_->finished.load()) {
+      state_->readahead_queue.push(AsyncGeneratorEnd<T>());
     } else {
-      auto back_of_queue = state->source_generator();
-      auto state = state_;
-      back_of_queue.AddCallback(
-          [state](const Result<T>& result) { state->MarkFinishedIfDone(result); });
-      state->readahead_queue.push(std::move(back_of_queue));
+      auto back_of_queue = state_->source_generator();
+      auto back_of_queue_after_check =
+          AddMarkFinishedContinuation(std::move(back_of_queue));
+      state_->readahead_queue.push(std::move(back_of_queue_after_check));
     }
     return result;
   }
@@ -732,13 +740,9 @@ class ReadaheadGenerator {
       finished.store(false);
     }
 
-    void MarkFinishedIfDone(const Result<T>& next_result) {
-      if (!next_result.ok()) {
+    void MarkFinishedIfDone(const T& next_result) {
+      if (IsIterationEnd(next_result)) {
         finished.store(true);
-      } else {
-        if (IsIterationEnd(*next_result)) {
-          finished.store(true);
-        }
       }
     }
 
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 87c1737228e..361ce3eacf0 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -1094,42 +1094,37 @@ TEST(TestAsyncUtil, ReadaheadMove) {
 }
 
 TEST(TestAsyncUtil, ReadaheadFailed) {
-  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(4));
+  ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(20));
   std::atomic<int32_t> counter(0);
+  auto gating_task = GatingTask::Make();
   // All tasks are a little slow.  The first task fails.
   // The readahead will have spawned 9 more tasks and they
   // should all pass
-  auto source = [thread_pool, &counter]() -> Future<TestInt> {
+  auto source = [&]() -> Future<TestInt> {
     auto count = counter++;
-    return *thread_pool->Submit([count]() -> Result<TestInt> {
+    return DeferNotOk(thread_pool->Submit([&, count]() -> Result<TestInt> {
+      gating_task->Task()();
       if (count == 0) {
         return Status::Invalid("X");
       }
       return TestInt(count);
-    });
+    }));
   };
   auto readahead = MakeReadaheadGenerator<TestInt>(source, 10);
-  ASSERT_FINISHES_AND_RAISES(Invalid, readahead());
-  SleepABit();
-
-  for (int i = 0; i < 9; i++) {
-    ASSERT_FINISHES_OK_AND_ASSIGN(auto next_val, readahead());
-    ASSERT_EQ(TestInt(i + 1), next_val);
+  auto should_be_invalid = readahead();
+  // Polling once should allow 10 additional calls to start
+  ASSERT_OK(gating_task->WaitForRunning(11));
+  ASSERT_OK(gating_task->Unlock());
+
+  // Once unlocked the error task should always be the first.  Some number of successful
+  // tasks may follow until the end.
+  ASSERT_FINISHES_AND_RAISES(Invalid, should_be_invalid);
+
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto remaining_results, CollectAsyncGenerator(readahead));
+  // Don't need to know the exact number of successful tasks (and it may vary)
+  for (std::size_t i = 0; i < remaining_results.size(); i++) {
+    ASSERT_EQ(TestInt(static_cast<int>(i) + 1), remaining_results[i]);
   }
-  ASSERT_FINISHES_OK_AND_ASSIGN(auto after, readahead());
-
-  // It's possible that finished was set quickly and there
-  // are only 10 elements
-  if (IsIterationEnd(after)) {
-    return;
-  }
-
-  // It's also possible that finished was too slow and there
-  // ended up being 11 elements
-  ASSERT_EQ(TestInt(10), after);
-  // There can't be 12 elements because SleepABit will prevent it
-  ASSERT_FINISHES_OK_AND_ASSIGN(auto definitely_last, readahead());
-  ASSERT_TRUE(IsIterationEnd(definitely_last));
 }
 
 class EnumeratorTestFixture : public GeneratorTestFixture {
diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h
index b82021e4b21..2f42803d26f 100644
--- a/cpp/src/arrow/util/iterator.h
+++ b/cpp/src/arrow/util/iterator.h
@@ -20,7 +20,6 @@
 #include <cassert>
 #include <functional>
 #include <memory>
-#include <queue>
 #include <tuple>
 #include <type_traits>
 #include <utility>

From 304f202f8be988fa96a4e85f005798f51602771b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Tue, 6 Jul 2021 12:16:55 +0530
Subject: [PATCH 510/719] ARROW-12556: [C++][Gandiva] Implement BYTESUBSTRING
 function on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement BYTE_SUBSTR([string] giventext, [number] x, [number] y)

Produces the binary representation of a string y characters long derived by starting at position x in the string giventext. y may also be given by the expression LENGTH(giventext), which indicates that you wish to convert every remaining character in giventext.

Closes #10169 from jpedroantunes/feature/add-byte-substr-gandiva-functions and squashes the following commits:

9d7147c59 <João Pedro> Remove redundant empty line on the end of the file
f2dcd8bff <João Pedro> Add missing comments and validate cases for negative values greater thant offset in module
7feec46ef <João Pedro> Change signature of byte_substr method to consider arguments
de4e5b2e7 <João Pedro> Coorect identation for bytesubstr on projector test
47ae96dd1 <João Pedro> Correct BYTE_SUBSTR function to work for binary input and output values
7d29b1b34 <João Pedro> Add function registry for BYTE_SUBSTR string function
fb0234e32 <João Pedro> Correct BYTE_SUBSTR function to work for binary input and output values
40f8f2a21 <João Pedro> Add projector test for BYTE_SUBSTR string function
32021858c <João Pedro> Add function registry for BYTE_SUBSTR string function
995f1f92c <João Pedro> Add base implementation and tests for BYTE_SUBSTR function considering string input values

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  5 ++
 cpp/src/gandiva/precompiled/string_ops.cc     | 46 ++++++++++++++++++
 .../gandiva/precompiled/string_ops_test.cc    | 47 +++++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  4 ++
 cpp/src/gandiva/tests/projector_test.cc       | 43 +++++++++++++++++
 5 files changed, 145 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 5218b1cbc15..90e22316773 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -245,6 +245,11 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      "concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("byte_substr", {"bytesubstring"},
+                     DataTypeVector{binary(), int32(), int32()}, binary(),
+                     kResultNullIfNull, "byte_substr_binary_int32_int32",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("convert_fromUTF8", {"convert_fromutf8"}, DataTypeVector{binary()},
                      utf8(), kResultNullIfNull, "convert_fromUTF8_binary",
                      NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index fe5fcf4293f..3b475bef4f8 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1966,4 +1966,50 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   *out_len = j;
   return ret;
 }
+
+// Produces the binary representation of a string y characters long derived by starting
+// at offset 'x' and considering the defined length 'y'. Notice that the offset index
+// may be a negative number (starting from the end of the string), or a positive number
+// starting on index 1. Cases:
+//     BYTE_SUBSTR("TestString", 1, 10) => "TestString"
+//     BYTE_SUBSTR("TestString", 5, 10) => "String"
+//     BYTE_SUBSTR("TestString", -6, 10) => "String"
+//     BYTE_SUBSTR("TestString", -600, 10) => "TestString"
+FORCE_INLINE
+const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
+                                           gdv_int32 text_len, gdv_int32 offset,
+                                           gdv_int32 length, gdv_int32* out_len) {
+  // the first offset position for a string is 1, so not consider offset == 0
+  // also, the length should be always a positive number
+  if (text_len == 0 || offset == 0 || length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t startPos = 0;
+  if (offset >= 0) {
+    startPos = offset - 1;
+  } else if (text_len + offset >= 0) {
+    startPos = text_len + offset;
+  }
+
+  // calculate end position from length and truncate to upper value bounds
+  if (startPos + length > text_len) {
+    *out_len = text_len - startPos;
+  } else {
+    *out_len = length;
+  }
+
+  memcpy(ret, text + startPos, *out_len);
+  return ret;
+}
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 3763a61b6a7..e85e0ee3d53 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1256,6 +1256,53 @@ TEST(TestStringOps, TestLocate) {
   ctx.Reset();
 }
 
+TEST(TestStringOps, TestByteSubstr) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str;
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -6, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, -500, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 4, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Test");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 1000, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 3, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Str");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "String");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -100, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "TestString");
+  EXPECT_FALSE(ctx.has_error());
+}
+
 TEST(TestStringOps, TestReplace) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 5bd2242195a..047586b34bf 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -498,6 +498,10 @@ const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
                        const char* splitter, gdv_int32 split_len, gdv_int32 index,
                        gdv_int32* out_len);
 
+const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
+                                           gdv_int32 text_len, gdv_int32 offset,
+                                           gdv_int32 length, gdv_int32* out_len);
+
 const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
                                    gdv_int64 out_len, gdv_int32* out_length);
 
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index ebe3009c356..04fa7a648ed 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -933,6 +933,49 @@ TEST_F(TestProjector, TestOffset) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestByteSubString) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::binary());
+  auto field1 = field("f1", arrow::int32());
+  auto field2 = field("f2", arrow::int32());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto field_byte_substr = field("bytesubstring", arrow::binary());
+
+  // Build expression
+  auto byte_substr_expr = TreeExprBuilder::MakeExpression(
+      "bytesubstring", {field0, field1, field2}, field_byte_substr);
+
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {byte_substr_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 6;
+  auto array0 = MakeArrowArrayBinary({"ab", "", "ab", "invalid", "valid", "invalid"},
+                                     {true, true, true, true, true, true});
+  auto array1 =
+      MakeArrowArrayInt32({0, 1, 1, 1, 3, 3}, {true, true, true, true, true, true});
+  auto array2 =
+      MakeArrowArrayInt32({0, 1, 1, 2, 3, 3}, {true, true, true, true, true, true});
+  // expected output
+  auto exp_byte_substr = MakeArrowArrayBinary({"", "", "a", "in", "lid", "val"},
+                                              {true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_byte_substr, outputs.at(0));
+}
+
 // Test to ensure behaviour of cast functions when the validity is false for an input. The
 // function should not run for that input.
 TEST_F(TestProjector, TestCastFunction) {

From 0072c677fbbc85832fa7a90ab49daf7c1f99a373 Mon Sep 17 00:00:00 2001
From: frank400 <j.victorhuguenin2018@gmail.com>
Date: Tue, 6 Jul 2021 12:19:05 +0530
Subject: [PATCH 511/719] ARROW-12567: [C++][Gandiva] Implement ILIKE SQL
 function

Closes #10179 from jvictorhuguenin/feature/implement-sql-ilike and squashes the following commits:

f160880d2 <frank400> Optimize holder constructor call
97e6e2d83 <frank400> Remove unnecessary Make method
c2363b10f <frank400> Disable TryOptimize for ilike
a48414931 <frank400> Fix checkstyle on cmake file
c6a8372cd <frank400> Delete unnecessary holder
4be6cc611 <frank400> Fix redefined function
b78085a14 <frank400> Fix miss include
2efd43e2b <frank400> Implement ilike function

Authored-by: frank400 <j.victorhuguenin2018@gmail.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_holder_registry.h  |  1 +
 cpp/src/gandiva/function_registry_string.cc |  4 ++
 cpp/src/gandiva/gdv_function_stubs.cc       | 17 +++++
 cpp/src/gandiva/gdv_function_stubs.h        |  3 +
 cpp/src/gandiva/like_holder.cc              | 21 ++++++
 cpp/src/gandiva/like_holder.h               |  6 ++
 cpp/src/gandiva/like_holder_test.cc         | 75 +++++++++++++++++++--
 7 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h
index e1c5630e841..225c73207fc 100644
--- a/cpp/src/gandiva/function_holder_registry.h
+++ b/cpp/src/gandiva/function_holder_registry.h
@@ -62,6 +62,7 @@ class FunctionHolderRegistry {
   static map_type& makers() {
     static map_type maker_map = {
         {"like", LAMBDA_MAKER(LikeHolder)},
+        {"ilike", LAMBDA_MAKER(LikeHolder)},
         {"to_date", LAMBDA_MAKER(ToDateHolder)},
         {"random", LAMBDA_MAKER(RandomGeneratorHolder)},
         {"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 90e22316773..7491e4435a9 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -131,6 +131,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8",
                      NativeFunction::kNeedsFunctionHolder),
 
+      NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(),
+                     kResultNullIfNull, "gdv_fn_ilike_utf8_utf8",
+                     NativeFunction::kNeedsFunctionHolder),
+
       NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
                      kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),
 
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 38c31a8c3f5..3c278049ed6 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -52,6 +52,12 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
   return (*holder)(std::string(data, data_len));
 }
 
+bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                            const char* pattern, int pattern_len) {
+  gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
+  return (*holder)(std::string(data, data_len));
+}
+
 double gdv_fn_random(int64_t ptr) {
   gandiva::RandomGeneratorHolder* holder =
       reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
@@ -807,6 +813,17 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_utf8));
 
+  // gdv_fn_ilike_utf8_utf8
+  args = {types->i64_type(),     // int64_t ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type(),     // int data_len
+          types->i8_ptr_type(),  // const char* pattern
+          types->i32_type()};    // int pattern_len
+
+  engine->AddGlobalMappingForFunc("gdv_fn_ilike_utf8_utf8",
+                                  types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_ilike_utf8_utf8));
+
   // gdv_fn_to_date_utf8_utf8
   args = {types->i64_type(),                   // int64_t execution_context
           types->i64_type(),                   // int64_t holder_ptr
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index ee22c3f4ece..043e94034ed 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -50,6 +50,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
                                 const char* pattern, int pattern_len,
                                 const char* escape_char, int escape_char_len);
 
+bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
+                            const char* pattern, int pattern_len);
+
 int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
                                        int data_len, bool in1_validity,
                                        const char* pattern, int pattern_len,
diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc
index 5a3510e3652..af9ac67d66a 100644
--- a/cpp/src/gandiva/like_holder.cc
+++ b/cpp/src/gandiva/like_holder.cc
@@ -80,6 +80,13 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
       !IsArrowStringLiteral(literal_type),
       Status::Invalid(
           "'like' function requires a string literal as the second parameter"));
+
+  RE2::Options regex_op;
+  if (node.descriptor()->name() == "ilike") {
+    regex_op.set_case_sensitive(false);  // set case-insensitive for ilike function.
+
+    return Make(arrow::util::get<std::string>(literal->holder()), holder, regex_op);
+  }
   if (node.children().size() == 2) {
     return Make(arrow::util::get<std::string>(literal->holder()), holder);
   } else {
@@ -132,4 +139,18 @@ Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escap
   return Status::OK();
 }
 
+Status LikeHolder::Make(const std::string& sql_pattern,
+                        std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op) {
+  std::string pcre_pattern;
+  ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
+
+  std::shared_ptr<LikeHolder> lholder;
+  lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
+
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h
index c7982e91437..73e58017de1 100644
--- a/cpp/src/gandiva/like_holder.h
+++ b/cpp/src/gandiva/like_holder.h
@@ -42,6 +42,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
   static Status Make(const std::string& sql_pattern, const std::string& escape_char,
                      std::shared_ptr<LikeHolder>* holder);
 
+  static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder,
+                     RE2::Options regex_op);
+
   // Try and optimise a function node with a "like" pattern.
   static const FunctionNode TryOptimize(const FunctionNode& node);
 
@@ -51,6 +54,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
  private:
   explicit LikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {}
 
+  LikeHolder(const std::string& pattern, RE2::Options regex_op)
+      : pattern_(pattern), regex_(pattern, regex_op) {}
+
   std::string pattern_;  // posix pattern string, to help debugging
   RE2 regex_;            // compiled regex for the pattern
 
diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/like_holder_test.cc
index 18e585fc502..a52533a1138 100644
--- a/cpp/src/gandiva/like_holder_test.cc
+++ b/cpp/src/gandiva/like_holder_test.cc
@@ -27,6 +27,7 @@ namespace gandiva {
 
 class TestLikeHolder : public ::testing::Test {
  public:
+  RE2::Options regex_op;
   FunctionNode BuildLike(std::string pattern) {
     auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
     auto pattern_node =
@@ -48,7 +49,7 @@ class TestLikeHolder : public ::testing::Test {
 TEST_F(TestLikeHolder, TestMatchAny) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make("ab%", &like_holder);
+  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -63,7 +64,7 @@ TEST_F(TestLikeHolder, TestMatchAny) {
 TEST_F(TestLikeHolder, TestMatchOne) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make("ab_", &like_holder);
+  auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -78,7 +79,7 @@ TEST_F(TestLikeHolder, TestMatchOne) {
 TEST_F(TestLikeHolder, TestPcreSpecial) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make(".*ab_", &like_holder);
+  auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -97,7 +98,7 @@ TEST_F(TestLikeHolder, TestRegexEscape) {
 TEST_F(TestLikeHolder, TestDot) {
   std::shared_ptr<LikeHolder> like_holder;
 
-  auto status = LikeHolder::Make("abc.", &like_holder);
+  auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
   EXPECT_EQ(status.ok(), true) << status.message();
 
   auto& like = *like_holder;
@@ -211,4 +212,70 @@ TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
   auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
   EXPECT_EQ(status.ok(), false) << status.message();
 }
+class TestILikeHolder : public ::testing::Test {
+ public:
+  RE2::Options regex_op;
+  FunctionNode BuildILike(std::string pattern) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
+    return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
+  }
+};
+
+TEST_F(TestILikeHolder, TestMatchAny) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("ab"));
+  EXPECT_TRUE(like("aBc"));
+  EXPECT_TRUE(like("ABCD"));
+
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("cab"));
+}
+
+TEST_F(TestILikeHolder, TestMatchOne) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("abc"));
+  EXPECT_TRUE(like("aBd"));
+
+  EXPECT_FALSE(like("A"));
+  EXPECT_FALSE(like("Abcd"));
+  EXPECT_FALSE(like("DaBc"));
+}
+
+TEST_F(TestILikeHolder, TestPcreSpecial) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like(".*Abc"));  // . and * aren't special in sql regex
+  EXPECT_FALSE(like("xxAbc"));
+}
+
+TEST_F(TestILikeHolder, TestDot) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_FALSE(like("abcd"));
+}
+
 }  // namespace gandiva

From 27be94f39e988e6461d6900ca9b7ae28cfc65ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Tue, 6 Jul 2021 12:21:15 +0530
Subject: [PATCH 512/719] ARROW-12856: [C++][Gandiva] Implement castBIT and
 castBOOLEAN functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement castBIT and castBOOLEAN functions for UTF8 input values

Closes #10382 from jpedroantunes/feature/cast-bit and squashes the following commits:

9aea576e1 <João Pedro> Apply linter corrections
5ed3c64da <João Pedro> Add projector tests and registry for castbit function
51918f8ad <João Pedro> Add base files for castBIT and castBOOLEAN functions

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  3 +
 cpp/src/gandiva/precompiled/string_ops.cc     | 60 +++++++++++++++++++
 .../gandiva/precompiled/string_ops_test.cc    | 55 +++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  2 +
 cpp/src/gandiva/tests/projector_test.cc       | 31 ++++++++++
 5 files changed, 151 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 7491e4435a9..9235a3e01a2 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -75,6 +75,9 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      "gdv_fn_initcap_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
+      NativeFunction("castBIT", {"castBOOLEAN"}, DataTypeVector{utf8()}, boolean(),
+                     kResultNullIfNull, "castBIT_utf8", NativeFunction::kNeedsContext),
+
       NativeFunction("castINT", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
                      "gdv_fn_castINT_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 3b475bef4f8..0820114a0ea 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -480,6 +480,66 @@ const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
   return basetext + start_ptr;
 }
 
+FORCE_INLINE
+gdv_boolean compare_lower_strings(const char* base_str, gdv_int32 base_str_len,
+                                  const char* str, gdv_int32 str_len) {
+  if (base_str_len != str_len) {
+    return false;
+  }
+  for (int i = 0; i < str_len; i++) {
+    // convert char to lower
+    char cur = str[i];
+    // 'A' - 'Z' : 0x41 - 0x5a
+    // 'a' - 'z' : 0x61 - 0x7a
+    if (cur >= 0x41 && cur <= 0x5a) {
+      cur = static_cast<char>(cur + 0x20);
+    }
+    // if the character does not match, break the flow
+    if (cur != base_str[i]) break;
+    // if the character matches and it is the last iteration, return true
+    if (i == str_len - 1) return true;
+  }
+  return false;
+}
+
+// Try to cast the received string ('0', '1', 'true', 'false'), ignoring leading
+// and trailing spaces, also ignoring lower and upper case.
+FORCE_INLINE
+gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len) {
+  if (data_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
+    return false;
+  }
+
+  // trim leading and trailing spaces
+  int32_t trimmed_len;
+  int32_t start = 0, end = data_len - 1;
+  while (start <= end && data[start] == ' ') {
+    ++start;
+  }
+  while (end >= start && data[end] == ' ') {
+    --end;
+  }
+  trimmed_len = end - start + 1;
+  const char* trimmed_data = data + start;
+
+  // compare received string with the valid bool string values '1', '0', 'true', 'false'
+  if (trimmed_len == 1) {
+    // case for '0' and '1' value
+    if (trimmed_data[0] == '1') return true;
+    if (trimmed_data[0] == '0') return false;
+  } else if (trimmed_len == 4) {
+    // case for matching 'true'
+    if (compare_lower_strings("true", 4, trimmed_data, trimmed_len)) return true;
+  } else if (trimmed_len == 5) {
+    // case for matching 'false'
+    if (compare_lower_strings("false", 5, trimmed_data, trimmed_len)) return false;
+  }
+  // if no 'true', 'false', '0' or '1' value is found, set an error
+  gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
+  return false;
+}
+
 FORCE_INLINE
 const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
                                    gdv_int64 out_len, gdv_int32* out_length) {
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index e85e0ee3d53..c4854c52db1 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -221,6 +221,61 @@ TEST(TestStringOps, TestCastBoolToVarchar) {
   ctx.Reset();
 }
 
+TEST(TestStringOps, TestCastVarcharToBool) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "true", 4), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "     true     ", 14), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "true     ", 9), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "     true", 9), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "TRUE", 4), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "TrUe", 4), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "1", 1), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "  1", 3), true);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "false", 5), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "false     ", 10), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "     false", 10), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "0", 1), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "0   ", 4), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "FALSE", 5), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "FaLsE", 5), false);
+  EXPECT_FALSE(ctx.has_error());
+
+  EXPECT_EQ(castBIT_utf8(ctx_ptr, "test", 4), false);
+  EXPECT_TRUE(ctx.has_error());
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Invalid value for boolean"));
+  ctx.Reset();
+}
+
 TEST(TestStringOps, TestCastVarchar) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 047586b34bf..543a00f0b2d 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -114,6 +114,8 @@ gdv_int64 date_sub_timestamp_int32(gdv_timestamp, gdv_int32);
 gdv_int64 subtract_timestamp_int32(gdv_timestamp, gdv_int32);
 gdv_int64 date_diff_timestamp_int64(gdv_timestamp, gdv_int64);
 
+gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len);
+
 bool is_distinct_from_timestamp_timestamp(gdv_int64, bool, gdv_int64, bool);
 bool is_not_distinct_from_int32_int32(gdv_int32, bool, gdv_int32, bool);
 
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 04fa7a648ed..dcdeeb4ee10 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1029,6 +1029,37 @@ TEST_F(TestProjector, TestCastFunction) {
   EXPECT_ARROW_ARRAY_EQUALS(out_int8, outputs.at(3));
 }
 
+TEST_F(TestProjector, TestCastBitFunction) {
+  auto field0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto res_bit = field("res_bit", arrow::boolean());
+
+  // Build expression
+  auto cast_bit = TreeExprBuilder::MakeExpression("castBIT", {field0}, res_bit);
+
+  std::shared_ptr<Projector> projector;
+
+  auto status = Projector::Make(schema, {cast_bit}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto arr = MakeArrowArrayUtf8({"1", "true", "false", "0"}, {true, true, true, true});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arr});
+
+  auto out = MakeArrowArrayBool({true, true, false, false}, {true, true, true, true});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out, outputs.at(0));
+}
+
 TEST_F(TestProjector, TestToDate) {
   // schema for input fields
   auto field0 = field("f0", arrow::utf8());

From b69b3ed50424d0b39213d9a814044a94af2ab8e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 6 Jul 2021 13:05:07 +0200
Subject: [PATCH 513/719] ARROW-6513: [CI] Rename conda requirements files to
 have txt extension instead of yml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Missing renames from #10572

Closes #10656 from kszucs/conda-txt

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake  |  2 +-
 dev/release/verify-release-candidate.bat     |  4 ++--
 docs/source/developers/cpp/windows.rst       | 10 +++++-----
 docs/source/developers/documentation.rst     |  2 +-
 docs/source/developers/python.rst            | 14 +++++++-------
 python/examples/minimal_build/build_conda.sh |  6 +++---
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index ab2dd168a09..efe054e3262 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1842,7 +1842,7 @@ if(ARROW_BUILD_BENCHMARKS)
     # archive. So the benchmark package on conda-forge isn't report
     # the real version. We accept all the benchmark package with
     # conda. Conda users should install benchmark 1.5.2 or later by
-    # ci/conda_env_cpp.yml.
+    # ci/conda_env_cpp.txt.
     set(BENCHMARK_REQUIRED_VERSION 0.0.0)
   endif()
   resolve_dependency(benchmark
diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat
index bef78fc920c..fee8c01bc63 100644
--- a/dev/release/verify-release-candidate.bat
+++ b/dev/release/verify-release-candidate.bat
@@ -42,8 +42,8 @@ set PYTHON=3.6
 @rem Using call with conda.bat seems necessary to avoid terminating the batch
 @rem script execution
 call conda create --no-shortcuts -c conda-forge -f -q -y -p %_VERIFICATION_CONDA_ENV% ^
-    --file=ci\conda_env_cpp.yml ^
-    --file=ci\conda_env_python.yml ^
+    --file=ci\conda_env_cpp.txt ^
+    --file=ci\conda_env_python.txt ^
     git ^
     python=%PYTHON% ^
     || exit /B 1
diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst
index 3251dea2dfa..c06ce225d73 100644
--- a/docs/source/developers/cpp/windows.rst
+++ b/docs/source/developers/cpp/windows.rst
@@ -81,7 +81,7 @@ Arrow codebase):
 
 .. code-block:: shell
 
-   conda create -y -n arrow-dev --file=ci\conda_env_cpp.yml
+   conda create -y -n arrow-dev --file=ci\conda_env_cpp.txt
 
 Then "activate" this conda environment with:
 
@@ -138,7 +138,7 @@ of Arrow and run the command:
      --clean-after-build
 
 On Windows, vcpkg builds dynamic link libraries by default. Use the triplet
-``x64-windows-static`` to build static libraries. vcpkg downloads source 
+``x64-windows-static`` to build static libraries. vcpkg downloads source
 packages and compiles them locally, so installing dependencies with vcpkg is
 more time-consuming than with conda.
 
@@ -150,12 +150,12 @@ Then in your ``cmake`` command, to use dependencies installed by vcpkg, set:
 
 You can optionally set other variables to override the default CMake
 configurations for vcpkg, including:
-   
+
 * ``-DCMAKE_TOOLCHAIN_FILE``: by default, the CMake scripts automatically find
   the location of the vcpkg CMake toolchain file ``vcpkg.cmake``; use this to
   instead specify its location
 * ``-DVCPKG_TARGET_TRIPLET``: by default, the CMake scripts attempt to infer the
-  vcpkg 
+  vcpkg
   `triplet <https://github.com/microsoft/vcpkg/blob/master/docs/users/triplets.md>`_;
   use this to instead specify the triplet
 * ``-DARROW_DEPENDENCY_USE_SHARED``: default is ``ON``; set to ``OFF`` for
@@ -408,7 +408,7 @@ tests can be made with there individual make targets).
 
 .. code-block:: shell
 
-   conda install -c conda-forge --file .\ci\conda_env_cpp.yml
+   conda install -c conda-forge --file .\ci\conda_env_cpp.txt
    .\ci\appveyor-cpp-setup.bat
    @rem this might fail but at this point most unit tests should be buildable by there individual targets
    @rem see next line for example.
diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst
index 4196a0cb3f9..813cc9cbdd2 100644
--- a/docs/source/developers/documentation.rst
+++ b/docs/source/developers/documentation.rst
@@ -30,7 +30,7 @@ If you're using Conda, the required software can be installed in a single line:
 
 .. code-block:: shell
 
-   conda install -c conda-forge --file=ci/conda_env_sphinx.yml
+   conda install -c conda-forge --file=ci/conda_env_sphinx.txt
 
 Otherwise, you'll first need to install `Doxygen <http://www.doxygen.nl/>`_
 yourself (for example from your distribution's official repositories, if
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index f2fbb927ac9..bcecda000e1 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -173,10 +173,10 @@ On Linux and macOS:
 .. code-block:: shell
 
     conda create -y -n pyarrow-dev -c conda-forge \
-        --file arrow/ci/conda_env_unix.yml \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_python.yml \
-        --file arrow/ci/conda_env_gandiva.yml \
+        --file arrow/ci/conda_env_unix.txt \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_python.txt \
+        --file arrow/ci/conda_env_gandiva.txt \
         compilers \
         python=3.7 \
         pandas
@@ -431,9 +431,9 @@ First, starting from fresh clones of Apache Arrow:
 .. code-block:: shell
 
    conda create -y -n pyarrow-dev -c conda-forge ^
-       --file arrow\ci\conda_env_cpp.yml ^
-       --file arrow\ci\conda_env_python.yml ^
-       --file arrow\ci\conda_env_gandiva.yml ^
+       --file arrow\ci\conda_env_cpp.txt ^
+       --file arrow\ci\conda_env_python.txt ^
+       --file arrow\ci\conda_env_gandiva.txt ^
        python=3.7
    conda activate pyarrow-dev
 
diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh
index 6f93ebd5647..13df3d3a89c 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -50,9 +50,9 @@ function setup_miniconda() {
   conda config --add channels conda-forge
 
   conda create -y -n pyarrow-$PYTHON -c conda-forge \
-        --file arrow/ci/conda_env_unix.yml \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_python.yml \
+        --file arrow/ci/conda_env_unix.txt \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_python.txt \
         compilers \
         python=3.7 \
         pandas

From 780e95c512d63bbea1e040af0eb44a0bf63c4d72 Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Tue, 6 Jul 2021 08:28:24 -0400
Subject: [PATCH 514/719] ARROW-13246: [C++] Using CSV skip_rows_after_names
 can cause data to be discarded prematurely

When rows are skipped but there are still more rows to skip set partial to the remainder of buffer.

Closes #10649 from n3world/ARROW-13246-block_middle_of_skip

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/csv/reader.cc      | 10 ++++++----
 python/pyarrow/tests/test_csv.py | 12 ++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index f644b86f89f..d57a2f15667 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -231,14 +231,16 @@ class SerialBlockReader : public BlockReader {
       RETURN_NOT_OK(
           chunker_->ProcessSkip(partial_, buffer_, is_final, &skip_rows_, &buffer_));
       bytes_skipped += orig_size - buffer_->size();
-      partial_ = SliceBuffer(buffer_, 0, 0);
+      auto empty = std::make_shared<Buffer>(nullptr, 0);
       if (skip_rows_) {
         // Still have rows beyond this buffer to skip return empty block
+        partial_ = std::move(buffer_);
         buffer_ = next_buffer;
-        return TransformYield<CSVBlock>(CSVBlock{partial_, partial_, partial_,
-                                                 block_index_++, is_final, bytes_skipped,
+        return TransformYield<CSVBlock>(CSVBlock{empty, empty, empty, block_index_++,
+                                                 is_final, bytes_skipped,
                                                  [](int64_t) { return Status::OK(); }});
       }
+      partial_ = std::move(empty);
     }
 
     std::shared_ptr<Buffer> completion;
@@ -307,7 +309,7 @@ class ThreadedBlockReader : public BlockReader {
       RETURN_NOT_OK(chunker_->ProcessSkip(current_partial, current_buffer, is_final,
                                           &skip_rows_, &current_buffer));
       bytes_skipped += orig_size - current_buffer->size();
-      current_partial = SliceBuffer(current_buffer, 0, 0);
+      current_partial = std::make_shared<Buffer>(nullptr, 0);
       if (skip_rows_) {
         partial_ = std::move(current_buffer);
         buffer_ = std::move(next_buffer);
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index b3f0dea3a43..050342de747 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -451,6 +451,18 @@ def test_skip_rows_after_names(self):
             "cd": ["op"],
         }
 
+        # Can skip rows when block ends in middle of quoted value
+        opts.skip_rows_after_names = 2
+        opts.block_size = 26
+        table = self.read_bytes(rows, read_options=opts,
+                                parse_options=parse_opts)
+        self.check_names(table, ["ab", "cd"])
+        assert table.to_pydict() == {
+            "ab": ["mn"],
+            "cd": ["op"],
+        }
+        opts = ReadOptions()
+
         # Can skip rows that are beyond the first block without lexer
         rows, expected = make_random_csv(num_cols=5, num_rows=1000)
         opts.skip_rows_after_names = 900

From f247e3ab7a4d2c33bfca6165570fabd62c2fb6ea Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 6 Jul 2021 15:27:18 -0400
Subject: [PATCH 515/719] ARROW-12891: [C++] Move subtree pruning to compute

Closes #10484 from lidavidm/arrow-12891

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec/CMakeLists.txt     |   7 +-
 .../exec}/forest_internal.h                   |  11 +-
 cpp/src/arrow/compute/exec/subtree_internal.h | 178 +++++++++
 cpp/src/arrow/compute/exec/subtree_test.cc    | 377 ++++++++++++++++++
 cpp/src/arrow/dataset/dataset_internal.h      | 117 ------
 cpp/src/arrow/dataset/file_base.cc            |  46 +--
 cpp/src/arrow/dataset/file_test.cc            | 290 --------------
 7 files changed, 581 insertions(+), 445 deletions(-)
 rename cpp/src/arrow/{dataset => compute/exec}/forest_internal.h (96%)
 create mode 100644 cpp/src/arrow/compute/exec/subtree_internal.h
 create mode 100644 cpp/src/arrow/compute/exec/subtree_test.cc

diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt
index ac6ddc51dff..2ed8b1c9480 100644
--- a/cpp/src/arrow/compute/exec/CMakeLists.txt
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -17,7 +17,12 @@
 
 arrow_install_all_headers("arrow/compute/exec")
 
-add_arrow_compute_test(expression_test PREFIX "arrow-compute")
+add_arrow_compute_test(expression_test
+                       PREFIX
+                       "arrow-compute"
+                       SOURCES
+                       expression_test.cc
+                       subtree_test.cc)
 
 add_arrow_compute_test(plan_test PREFIX "arrow-compute")
 
diff --git a/cpp/src/arrow/dataset/forest_internal.h b/cpp/src/arrow/compute/exec/forest_internal.h
similarity index 96%
rename from cpp/src/arrow/dataset/forest_internal.h
rename to cpp/src/arrow/compute/exec/forest_internal.h
index 1a7b874065e..7b55a0aabf3 100644
--- a/cpp/src/arrow/dataset/forest_internal.h
+++ b/cpp/src/arrow/compute/exec/forest_internal.h
@@ -21,15 +21,16 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/dataset/visibility.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
 
 namespace arrow {
-namespace dataset {
+namespace compute {
 
 /// A Forest is a view of a sorted range which carries an ancestry relation in addition
 /// to an ordering relation: each element's descendants appear directly after it.
 /// This can be used to efficiently skip subtrees when iterating through the range.
-class ARROW_DS_EXPORT Forest {
+class Forest {
  public:
   Forest() = default;
 
@@ -69,7 +70,7 @@ class ARROW_DS_EXPORT Forest {
            std::equal(it, it + size_, other.descendant_counts_->begin());
   }
 
-  struct ARROW_DS_EXPORT Ref {
+  struct Ref {
     int num_descendants() const { return forest->descendant_counts_->at(i); }
 
     bool IsAncestorOf(const Ref& ref) const {
@@ -120,5 +121,5 @@ class ARROW_DS_EXPORT Forest {
   std::shared_ptr<std::vector<int>> descendant_counts_;
 };
 
-}  // namespace dataset
+}  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/subtree_internal.h b/cpp/src/arrow/compute/exec/subtree_internal.h
new file mode 100644
index 00000000000..72d419df225
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/subtree_internal.h
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "arrow/compute/exec/expression.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+namespace compute {
+// Helper class for efficiently detecting subtrees given expressions.
+//
+// Using fragment partition expressions as an example:
+// Partition expressions are broken into conjunction members and each member dictionary
+// encoded to impose a sortable ordering. In addition, subtrees are generated which span
+// groups of fragments and nested subtrees. After encoding each fragment is guaranteed to
+// be a descendant of at least one subtree. For example, given fragments in a
+// HivePartitioning with paths:
+//
+//   /num=0/al=eh/dat.par
+//   /num=0/al=be/dat.par
+//   /num=1/al=eh/dat.par
+//   /num=1/al=be/dat.par
+//
+// The following subtrees will be introduced:
+//
+//   /num=0/
+//   /num=0/al=eh/
+//   /num=0/al=eh/dat.par
+//   /num=0/al=be/
+//   /num=0/al=be/dat.par
+//   /num=1/
+//   /num=1/al=eh/
+//   /num=1/al=eh/dat.par
+//   /num=1/al=be/
+//   /num=1/al=be/dat.par
+struct SubtreeImpl {
+  // Each unique conjunction member is mapped to an integer.
+  using expression_code = char32_t;
+  // Partition expressions are mapped to strings of codes; strings give us lexicographic
+  // ordering (and potentially useful optimizations).
+  using expression_codes = std::basic_string<expression_code>;
+  // An encoded guarantee (if index is set) or subtree.
+  struct Encoded {
+    // An external index identifying the corresponding object (e.g. a Fragment) of the
+    // guarantee.
+    util::optional<int> index;
+    // An encoded expression representing a guarantee.
+    expression_codes guarantee;
+  };
+
+  std::unordered_map<compute::Expression, expression_code, compute::Expression::Hash>
+      expr_to_code_;
+  std::vector<compute::Expression> code_to_expr_;
+  std::unordered_set<expression_codes> subtree_exprs_;
+
+  // Encode a subexpression (returning the existing code if possible).
+  expression_code GetOrInsert(const compute::Expression& expr) {
+    auto next_code = static_cast<int>(expr_to_code_.size());
+    auto it_success = expr_to_code_.emplace(expr, next_code);
+
+    if (it_success.second) {
+      code_to_expr_.push_back(expr);
+    }
+    return it_success.first->second;
+  }
+
+  // Encode an expression (recursively breaking up conjunction members if possible).
+  void EncodeConjunctionMembers(const compute::Expression& expr,
+                                expression_codes* codes) {
+    if (auto call = expr.call()) {
+      if (call->function_name == "and_kleene") {
+        // expr is a conjunction, encode its arguments
+        EncodeConjunctionMembers(call->arguments[0], codes);
+        EncodeConjunctionMembers(call->arguments[1], codes);
+        return;
+      }
+    }
+    // expr is not a conjunction, encode it whole
+    codes->push_back(GetOrInsert(expr));
+  }
+
+  // Convert an encoded subtree or guarantee back into an expression.
+  compute::Expression GetSubtreeExpression(const Encoded& encoded_subtree) {
+    // Filters will already be simplified by all of a subtree's ancestors, so
+    // we only need to simplify the filter by the trailing conjunction member
+    // of each subtree.
+    return code_to_expr_[encoded_subtree.guarantee.back()];
+  }
+
+  // Insert subtrees for each component of an encoded partition expression.
+  void GenerateSubtrees(expression_codes guarantee, std::vector<Encoded>* encoded) {
+    while (!guarantee.empty()) {
+      if (subtree_exprs_.insert(guarantee).second) {
+        Encoded encoded_subtree{/*index=*/util::nullopt, guarantee};
+        encoded->push_back(std::move(encoded_subtree));
+      }
+      guarantee.resize(guarantee.size() - 1);
+    }
+  }
+
+  // Encode a guarantee, and generate subtrees for it as well.
+  void EncodeOneGuarantee(int index, const Expression& guarantee,
+                          std::vector<Encoded>* encoded) {
+    Encoded encoded_guarantee{index, {}};
+    EncodeConjunctionMembers(guarantee, &encoded_guarantee.guarantee);
+    GenerateSubtrees(encoded_guarantee.guarantee, encoded);
+    encoded->push_back(std::move(encoded_guarantee));
+  }
+
+  template <typename GetGuarantee>
+  std::vector<Encoded> EncodeGuarantees(const GetGuarantee& get, int count) {
+    std::vector<Encoded> encoded;
+    for (int i = 0; i < count; ++i) {
+      EncodeOneGuarantee(i, get(i), &encoded);
+    }
+    return encoded;
+  }
+
+  // Comparator for sort
+  struct ByGuarantee {
+    bool operator()(const Encoded& l, const Encoded& r) {
+      const auto cmp = l.guarantee.compare(r.guarantee);
+      if (cmp != 0) {
+        return cmp < 0;
+      }
+      // Equal guarantees; sort encodings with indices after encodings without
+      return (l.index ? 1 : 0) < (r.index ? 1 : 0);
+    }
+  };
+
+  // Comparator for building a Forest
+  struct IsAncestor {
+    const std::vector<Encoded> encoded;
+
+    bool operator()(int l, int r) const {
+      if (encoded[l].index) {
+        // Leaf-level object (e.g. a Fragment): not an ancestor.
+        return false;
+      }
+
+      const auto& ancestor = encoded[l].guarantee;
+      const auto& descendant = encoded[r].guarantee;
+
+      if (descendant.size() >= ancestor.size()) {
+        return std::equal(ancestor.begin(), ancestor.end(), descendant.begin());
+      }
+      return false;
+    }
+  };
+};
+
+inline bool operator==(const SubtreeImpl::Encoded& l, const SubtreeImpl::Encoded& r) {
+  return l.index == r.index && l.guarantee == r.guarantee;
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/subtree_test.cc b/cpp/src/arrow/compute/exec/subtree_test.cc
new file mode 100644
index 00000000000..97213104454
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/subtree_test.cc
@@ -0,0 +1,377 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/compute/exec/forest_internal.h"
+#include "arrow/compute/exec/subtree_internal.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace compute {
+
+using testing::ContainerEq;
+
+// Tests of subtree pruning
+
+// Don't depend on FileSystem - port just enough to be useful here
+struct FileInfo {
+  bool is_dir;
+  std::string path;
+
+  bool operator==(const FileInfo& other) const {
+    return is_dir == other.is_dir && path == other.path;
+  }
+
+  static FileInfo Dir(std::string path) { return FileInfo{true, std::move(path)}; }
+
+  static FileInfo File(std::string path) { return FileInfo{false, std::move(path)}; }
+
+  static bool ByPath(const FileInfo& l, const FileInfo& r) { return l.path < r.path; }
+};
+
+struct TestPathTree {
+  FileInfo info;
+  std::vector<TestPathTree> subtrees;
+
+  explicit TestPathTree(std::string file_path)
+      : info(FileInfo::File(std::move(file_path))) {}
+
+  TestPathTree(std::string dir_path, std::vector<TestPathTree> subtrees)
+      : info(FileInfo::Dir(std::move(dir_path))), subtrees(std::move(subtrees)) {}
+
+  TestPathTree(Forest::Ref ref, const std::vector<FileInfo>& infos) : info(infos[ref.i]) {
+    const Forest& forest = *ref.forest;
+
+    int begin = ref.i + 1;
+    int end = begin + ref.num_descendants();
+
+    for (int i = begin; i < end; ++i) {
+      subtrees.emplace_back(forest[i], infos);
+      i += forest[i].num_descendants();
+    }
+  }
+
+  bool operator==(const TestPathTree& other) const {
+    return info == other.info && subtrees == other.subtrees;
+  }
+
+  std::string ToString() const {
+    auto out = "\n" + info.path;
+    if (info.is_dir) out += "/";
+
+    for (const auto& subtree : subtrees) {
+      out += subtree.ToString();
+    }
+    return out;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const TestPathTree& tree) {
+    return os << tree.ToString();
+  }
+};
+
+using PT = TestPathTree;
+
+util::string_view RemoveTrailingSlash(util::string_view key) {
+  while (!key.empty() && key.back() == '/') {
+    key.remove_suffix(1);
+  }
+  return key;
+}
+bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) {
+  // See filesystem/path_util.h
+  ancestor = RemoveTrailingSlash(ancestor);
+  if (ancestor == "") return true;
+  descendant = RemoveTrailingSlash(descendant);
+  if (!descendant.starts_with(ancestor)) return false;
+  descendant.remove_prefix(ancestor.size());
+  if (descendant.empty()) return true;
+  return descendant.front() == '/';
+}
+
+Forest MakeForest(std::vector<FileInfo>* infos) {
+  std::sort(infos->begin(), infos->end(), FileInfo::ByPath);
+
+  return Forest(static_cast<int>(infos->size()), [&](int i, int j) {
+    return IsAncestorOf(infos->at(i).path, infos->at(j).path);
+  });
+}
+
+void ExpectForestIs(std::vector<FileInfo> infos, std::vector<PT> expected_roots) {
+  auto forest = MakeForest(&infos);
+
+  std::vector<PT> actual_roots;
+  ASSERT_OK(forest.Visit(
+      [&](Forest::Ref ref) -> Result<bool> {
+        actual_roots.emplace_back(ref, infos);
+        return false;  // only vist roots
+      },
+      [](Forest::Ref) {}));
+
+  // visit expected and assert equality
+  EXPECT_THAT(actual_roots, ContainerEq(expected_roots));
+}
+
+TEST(Forest, Basic) {
+  ExpectForestIs({}, {});
+
+  ExpectForestIs({FileInfo::File("aa")}, {PT("aa")});
+  ExpectForestIs({FileInfo::Dir("AA")}, {PT("AA", {})});
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::File("AA/aa")},
+                 {PT("AA", {PT("AA/aa")})});
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::Dir("AA/BB"), FileInfo::File("AA/BB/0")},
+                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})})});
+
+  // Missing parent can still find ancestor.
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::File("AA/BB/bb")},
+                 {PT("AA", {PT("AA/BB/bb")})});
+
+  // Ancestors should link to parent regardless of ordering.
+  ExpectForestIs({FileInfo::File("AA/aa"), FileInfo::Dir("AA")},
+                 {PT("AA", {PT("AA/aa")})});
+
+  // Multiple roots are supported.
+  ExpectForestIs({FileInfo::File("aa"), FileInfo::File("bb")}, {PT("aa"), PT("bb")});
+  ExpectForestIs({FileInfo::File("00"), FileInfo::Dir("AA"), FileInfo::File("AA/aa"),
+                  FileInfo::File("BB/bb")},
+                 {PT("00"), PT("AA", {PT("AA/aa")}), PT("BB/bb")});
+  ExpectForestIs({FileInfo::Dir("AA"), FileInfo::Dir("AA/BB"), FileInfo::File("AA/BB/0"),
+                  FileInfo::Dir("CC"), FileInfo::Dir("CC/BB"), FileInfo::File("CC/BB/0")},
+                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})}),
+                  PT("CC", {PT("CC/BB", {PT("CC/BB/0")})})});
+}
+
+TEST(Forest, HourlyETL) {
+  // This test mimics a scenario where an ETL dumps hourly files in a structure
+  // `$year/$month/$day/$hour/*.parquet`.
+  constexpr int64_t kYears = 3;
+  constexpr int64_t kMonthsPerYear = 12;
+  constexpr int64_t kDaysPerMonth = 31;
+  constexpr int64_t kHoursPerDay = 24;
+  constexpr int64_t kFilesPerHour = 2;
+
+  // Avoid constructing strings
+  std::vector<std::string> numbers{kDaysPerMonth + 1};
+  for (size_t i = 0; i < numbers.size(); i++) {
+    numbers[i] = std::to_string(i);
+    if (numbers[i].size() == 1) {
+      numbers[i] = "0" + numbers[i];
+    }
+  }
+
+  auto join = [](const std::vector<std::string>& path) {
+    if (path.empty()) return std::string("");
+    std::string result = path[0];
+    for (const auto& part : path) {
+      result += '/';
+      result += part;
+    }
+    return result;
+  };
+
+  std::vector<FileInfo> infos;
+
+  std::vector<PT> forest;
+  for (int64_t year = 0; year < kYears; year++) {
+    auto year_str = std::to_string(year + 2000);
+    auto year_dir = FileInfo::Dir(year_str);
+    infos.push_back(year_dir);
+
+    std::vector<PT> months;
+    for (int64_t month = 0; month < kMonthsPerYear; month++) {
+      auto month_str = join({year_str, numbers[month + 1]});
+      auto month_dir = FileInfo::Dir(month_str);
+      infos.push_back(month_dir);
+
+      std::vector<PT> days;
+      for (int64_t day = 0; day < kDaysPerMonth; day++) {
+        auto day_str = join({month_str, numbers[day + 1]});
+        auto day_dir = FileInfo::Dir(day_str);
+        infos.push_back(day_dir);
+
+        std::vector<PT> hours;
+        for (int64_t hour = 0; hour < kHoursPerDay; hour++) {
+          auto hour_str = join({day_str, numbers[hour]});
+          auto hour_dir = FileInfo::Dir(hour_str);
+          infos.push_back(hour_dir);
+
+          std::vector<PT> files;
+          for (int64_t file = 0; file < kFilesPerHour; file++) {
+            auto file_str = join({hour_str, numbers[file] + ".parquet"});
+            auto file_fd = FileInfo::File(file_str);
+            infos.push_back(file_fd);
+            files.emplace_back(file_str);
+          }
+
+          auto hour_pt = PT(hour_str, std::move(files));
+          hours.push_back(hour_pt);
+        }
+
+        auto day_pt = PT(day_str, std::move(hours));
+        days.push_back(day_pt);
+      }
+
+      auto month_pt = PT(month_str, std::move(days));
+      months.push_back(month_pt);
+    }
+
+    auto year_pt = PT(year_str, std::move(months));
+    forest.push_back(year_pt);
+  }
+
+  ExpectForestIs(infos, forest);
+}
+
+TEST(Forest, Visit) {
+  using Infos = std::vector<FileInfo>;
+
+  for (auto infos :
+       {Infos{}, Infos{FileInfo::Dir("A"), FileInfo::File("A/a")},
+        Infos{FileInfo::Dir("AA"), FileInfo::Dir("AA/BB"), FileInfo::File("AA/BB/0"),
+              FileInfo::Dir("CC"), FileInfo::Dir("CC/BB"), FileInfo::File("CC/BB/0")}}) {
+    ASSERT_TRUE(std::is_sorted(infos.begin(), infos.end(), FileInfo::ByPath));
+
+    auto forest = MakeForest(&infos);
+
+    auto ignore_post = [](Forest::Ref) {};
+
+    // noop is fine
+    ASSERT_OK(
+        forest.Visit([](Forest::Ref) -> Result<bool> { return false; }, ignore_post));
+
+    // Should propagate failure
+    if (forest.size() != 0) {
+      ASSERT_RAISES(
+          Invalid,
+          forest.Visit([](Forest::Ref) -> Result<bool> { return Status::Invalid(""); },
+                       ignore_post));
+    }
+
+    // Ensure basic visit of all nodes
+    int i = 0;
+    ASSERT_OK(forest.Visit(
+        [&](Forest::Ref ref) -> Result<bool> {
+          EXPECT_EQ(ref.i, i);
+          ++i;
+          return true;
+        },
+        ignore_post));
+
+    // Visit only directories
+    Infos actual_dirs;
+    ASSERT_OK(forest.Visit(
+        [&](Forest::Ref ref) -> Result<bool> {
+          if (!infos[ref.i].is_dir) {
+            return false;
+          }
+          actual_dirs.push_back(infos[ref.i]);
+          return true;
+        },
+        ignore_post));
+
+    Infos expected_dirs;
+    for (const auto& info : infos) {
+      if (info.is_dir) {
+        expected_dirs.push_back(info);
+      }
+    }
+    EXPECT_THAT(actual_dirs, ContainerEq(expected_dirs));
+  }
+}
+
+TEST(Subtree, EncodeExpression) {
+  SubtreeImpl tree;
+  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
+  // Should be idempotent
+  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
+  ASSERT_EQ(equal(field_ref("a"), literal("1")), tree.code_to_expr_[0]);
+
+  SubtreeImpl::expression_codes codes;
+  auto conj =
+      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
+  tree.EncodeConjunctionMembers(conj, &codes);
+  ASSERT_EQ(SubtreeImpl::expression_codes({0, 1}), codes);
+
+  codes.clear();
+  conj = or_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
+  tree.EncodeConjunctionMembers(conj, &codes);
+  ASSERT_EQ(SubtreeImpl::expression_codes({2}), codes);
+}
+
+TEST(Subtree, GetSubtreeExpression) {
+  SubtreeImpl tree;
+  const auto expr_a = equal(field_ref("a"), literal("1"));
+  const auto expr_b = equal(field_ref("b"), literal("2"));
+  const auto code_a = tree.GetOrInsert(expr_a);
+  const auto code_b = tree.GetOrInsert(expr_b);
+  ASSERT_EQ(expr_a,
+            tree.GetSubtreeExpression(SubtreeImpl::Encoded{util::nullopt, {code_a}}));
+  ASSERT_EQ(expr_b, tree.GetSubtreeExpression(
+                        SubtreeImpl::Encoded{util::nullopt, {code_a, code_b}}));
+}
+
+class FakeFragment {
+ public:
+  explicit FakeFragment(Expression partition_expression)
+      : partition_expression_(partition_expression) {}
+  const Expression& partition_expression() const { return partition_expression_; }
+
+ private:
+  Expression partition_expression_;
+};
+
+TEST(Subtree, EncodeFragments) {
+  const auto expr_a =
+      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
+  const auto expr_b =
+      and_(equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3")));
+  std::vector<std::shared_ptr<FakeFragment>> fragments;
+  fragments.push_back(std::make_shared<FakeFragment>(expr_a));
+  fragments.push_back(std::make_shared<FakeFragment>(expr_b));
+
+  SubtreeImpl tree;
+  auto encoded = tree.EncodeGuarantees(
+      [&](int index) { return fragments[index]->partition_expression(); },
+      static_cast<int>(fragments.size()));
+  EXPECT_THAT(
+      tree.code_to_expr_,
+      ContainerEq(std::vector<compute::Expression>{
+          equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")),
+          equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3"))}));
+  EXPECT_THAT(
+      encoded,
+      testing::UnorderedElementsAreArray({
+          SubtreeImpl::Encoded{util::make_optional<int>(0),
+                               SubtreeImpl::expression_codes({0, 1})},
+          SubtreeImpl::Encoded{util::make_optional<int>(1),
+                               SubtreeImpl::expression_codes({2, 3})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0, 1})},
+          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2, 3})},
+      }));
+}
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/dataset/dataset_internal.h b/cpp/src/arrow/dataset/dataset_internal.h
index 952ad3e83ca..a1245b7e2a0 100644
--- a/cpp/src/arrow/dataset/dataset_internal.h
+++ b/cpp/src/arrow/dataset/dataset_internal.h
@@ -19,7 +19,6 @@
 
 #include <memory>
 #include <string>
-#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -30,7 +29,6 @@
 #include "arrow/scalar.h"
 #include "arrow/type.h"
 #include "arrow/util/iterator.h"
-#include "arrow/util/optional.h"
 
 namespace arrow {
 namespace dataset {
@@ -67,121 +65,6 @@ inline std::shared_ptr<Schema> SchemaFromColumnNames(
   return schema(std::move(columns))->WithMetadata(input->metadata());
 }
 
-// Helper class for efficiently detecting subtrees given fragment partition expressions.
-// Partition expressions are broken into conjunction members and each member dictionary
-// encoded to impose a sortable ordering. In addition, subtrees are generated which span
-// groups of fragments and nested subtrees. After encoding each fragment is guaranteed to
-// be a descendant of at least one subtree. For example, given fragments in a
-// HivePartitioning with paths:
-//
-//   /num=0/al=eh/dat.par
-//   /num=0/al=be/dat.par
-//   /num=1/al=eh/dat.par
-//   /num=1/al=be/dat.par
-//
-// The following subtrees will be introduced:
-//
-//   /num=0/
-//   /num=0/al=eh/
-//   /num=0/al=eh/dat.par
-//   /num=0/al=be/
-//   /num=0/al=be/dat.par
-//   /num=1/
-//   /num=1/al=eh/
-//   /num=1/al=eh/dat.par
-//   /num=1/al=be/
-//   /num=1/al=be/dat.par
-struct SubtreeImpl {
-  // Each unique conjunction member is mapped to an integer.
-  using expression_code = char32_t;
-  // Partition expressions are mapped to strings of codes; strings give us lexicographic
-  // ordering (and potentially useful optimizations).
-  using expression_codes = std::basic_string<expression_code>;
-  // An encoded fragment (if fragment_index is set) or subtree.
-  struct Encoded {
-    util::optional<int> fragment_index;
-    expression_codes partition_expression;
-  };
-
-  std::unordered_map<compute::Expression, expression_code, compute::Expression::Hash>
-      expr_to_code_;
-  std::vector<compute::Expression> code_to_expr_;
-  std::unordered_set<expression_codes> subtree_exprs_;
-
-  // Encode a subexpression (returning the existing code if possible).
-  expression_code GetOrInsert(const compute::Expression& expr) {
-    auto next_code = static_cast<int>(expr_to_code_.size());
-    auto it_success = expr_to_code_.emplace(expr, next_code);
-
-    if (it_success.second) {
-      code_to_expr_.push_back(expr);
-    }
-    return it_success.first->second;
-  }
-
-  // Encode an expression (recursively breaking up conjunction members if possible).
-  void EncodeConjunctionMembers(const compute::Expression& expr,
-                                expression_codes* codes) {
-    if (auto call = expr.call()) {
-      if (call->function_name == "and_kleene") {
-        // expr is a conjunction, encode its arguments
-        EncodeConjunctionMembers(call->arguments[0], codes);
-        EncodeConjunctionMembers(call->arguments[1], codes);
-        return;
-      }
-    }
-    // expr is not a conjunction, encode it whole
-    codes->push_back(GetOrInsert(expr));
-  }
-
-  // Convert an encoded subtree or fragment back into an expression.
-  compute::Expression GetSubtreeExpression(const Encoded& encoded_subtree) {
-    // Filters will already be simplified by all of a subtree's ancestors, so
-    // we only need to simplify the filter by the trailing conjunction member
-    // of each subtree.
-    return code_to_expr_[encoded_subtree.partition_expression.back()];
-  }
-
-  // Insert subtrees for each component of an encoded partition expression.
-  void GenerateSubtrees(expression_codes partition_expression,
-                        std::vector<Encoded>* encoded) {
-    while (!partition_expression.empty()) {
-      if (subtree_exprs_.insert(partition_expression).second) {
-        Encoded encoded_subtree{/*fragment_index=*/util::nullopt, partition_expression};
-        encoded->push_back(std::move(encoded_subtree));
-      }
-      partition_expression.resize(partition_expression.size() - 1);
-    }
-  }
-
-  // Encode the fragment's partition expression and generate subtrees for it as well.
-  void EncodeOneFragment(int fragment_index, const Fragment& fragment,
-                         std::vector<Encoded>* encoded) {
-    Encoded encoded_fragment{fragment_index, {}};
-
-    EncodeConjunctionMembers(fragment.partition_expression(),
-                             &encoded_fragment.partition_expression);
-
-    GenerateSubtrees(encoded_fragment.partition_expression, encoded);
-
-    encoded->push_back(std::move(encoded_fragment));
-  }
-
-  template <typename Fragments>
-  std::vector<Encoded> EncodeFragments(const Fragments& fragments) {
-    std::vector<Encoded> encoded;
-    for (size_t i = 0; i < fragments.size(); ++i) {
-      EncodeOneFragment(static_cast<int>(i), *fragments[i], &encoded);
-    }
-    return encoded;
-  }
-};
-
-inline bool operator==(const SubtreeImpl::Encoded& l, const SubtreeImpl::Encoded& r) {
-  return l.fragment_index == r.fragment_index &&
-         l.partition_expression == r.partition_expression;
-}
-
 /// Get fragment scan options of the expected type.
 /// \return Fragment scan options if provided on the scan options, else the default
 ///     options if set, else a default-constructed value. If options are provided
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 741071d1703..3a67ea48378 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -23,8 +23,9 @@
 #include <unordered_set>
 #include <vector>
 
+#include "arrow/compute/exec/forest_internal.h"
+#include "arrow/compute/exec/subtree_internal.h"
 #include "arrow/dataset/dataset_internal.h"
-#include "arrow/dataset/forest_internal.h"
 #include "arrow/dataset/scanner.h"
 #include "arrow/dataset/scanner_internal.h"
 #include "arrow/filesystem/filesystem.h"
@@ -188,7 +189,7 @@ Future<util::optional<int64_t>> FileFragment::CountRows(
 
 struct FileSystemDataset::FragmentSubtrees {
   // Forest for skipping fragments based on extracted subtree expressions
-  Forest forest;
+  compute::Forest forest;
   // fragment indices and subtree expressions in forest order
   std::vector<util::Variant<int, compute::Expression>> fragments_and_subtrees;
 };
@@ -243,43 +244,24 @@ std::string FileSystemDataset::ToString() const {
 
 void FileSystemDataset::SetupSubtreePruning() {
   subtrees_ = std::make_shared<FragmentSubtrees>();
-  SubtreeImpl impl;
+  compute::SubtreeImpl impl;
 
-  auto encoded = impl.EncodeFragments(fragments_);
+  auto encoded = impl.EncodeGuarantees(
+      [&](int index) { return fragments_[index]->partition_expression(); },
+      static_cast<int>(fragments_.size()));
 
-  std::sort(encoded.begin(), encoded.end(),
-            [](const SubtreeImpl::Encoded& l, const SubtreeImpl::Encoded& r) {
-              const auto cmp = l.partition_expression.compare(r.partition_expression);
-              if (cmp != 0) {
-                return cmp < 0;
-              }
-              // Equal partition expressions; sort encodings with fragment indices after
-              // encodings without
-              return (l.fragment_index ? 1 : 0) < (r.fragment_index ? 1 : 0);
-            });
+  std::sort(encoded.begin(), encoded.end(), compute::SubtreeImpl::ByGuarantee());
 
   for (const auto& e : encoded) {
-    if (e.fragment_index) {
-      subtrees_->fragments_and_subtrees.emplace_back(*e.fragment_index);
+    if (e.index) {
+      subtrees_->fragments_and_subtrees.emplace_back(*e.index);
     } else {
       subtrees_->fragments_and_subtrees.emplace_back(impl.GetSubtreeExpression(e));
     }
   }
 
-  subtrees_->forest = Forest(static_cast<int>(encoded.size()), [&](int l, int r) {
-    if (encoded[l].fragment_index) {
-      // Fragment: not an ancestor.
-      return false;
-    }
-
-    const auto& ancestor = encoded[l].partition_expression;
-    const auto& descendant = encoded[r].partition_expression;
-
-    if (descendant.size() >= ancestor.size()) {
-      return std::equal(ancestor.begin(), ancestor.end(), descendant.begin());
-    }
-    return false;
-  });
+  subtrees_->forest = compute::Forest(static_cast<int>(encoded.size()),
+                                      compute::SubtreeImpl::IsAncestor{encoded});
 }
 
 Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(
@@ -293,7 +275,7 @@ Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(
 
   std::vector<compute::Expression> predicates{predicate};
   RETURN_NOT_OK(subtrees_->forest.Visit(
-      [&](Forest::Ref ref) -> Result<bool> {
+      [&](compute::Forest::Ref ref) -> Result<bool> {
         if (auto fragment_index =
                 util::get_if<int>(&subtrees_->fragments_and_subtrees[ref.i])) {
           fragment_indices.push_back(*fragment_index);
@@ -312,7 +294,7 @@ Result<FragmentIterator> FileSystemDataset::GetFragmentsImpl(
         predicates.push_back(std::move(simplified));
         return true;
       },
-      [&](Forest::Ref ref) { predicates.pop_back(); }));
+      [&](compute::Forest::Ref ref) { predicates.pop_back(); }));
 
   std::sort(fragment_indices.begin(), fragment_indices.end());
 
diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc
index 5bf89330429..9e6ba8c925a 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -24,7 +24,6 @@
 #include <gtest/gtest.h>
 
 #include "arrow/dataset/api.h"
-#include "arrow/dataset/forest_internal.h"
 #include "arrow/dataset/partition.h"
 #include "arrow/dataset/test_util.h"
 #include "arrow/filesystem/path_util.h"
@@ -340,294 +339,5 @@ TEST_F(TestFileSystemDataset, WriteProjected) {
     }
   }
 }
-
-// Tests of subtree pruning
-
-struct TestPathTree {
-  fs::FileInfo info;
-  std::vector<TestPathTree> subtrees;
-
-  explicit TestPathTree(std::string file_path) : info(fs::File(std::move(file_path))) {}
-
-  TestPathTree(std::string dir_path, std::vector<TestPathTree> subtrees)
-      : info(fs::Dir(std::move(dir_path))), subtrees(std::move(subtrees)) {}
-
-  TestPathTree(Forest::Ref ref, const std::vector<fs::FileInfo>& infos)
-      : info(infos[ref.i]) {
-    const Forest& forest = *ref.forest;
-
-    int begin = ref.i + 1;
-    int end = begin + ref.num_descendants();
-
-    for (int i = begin; i < end; ++i) {
-      subtrees.emplace_back(forest[i], infos);
-      i += forest[i].num_descendants();
-    }
-  }
-
-  bool operator==(const TestPathTree& other) const {
-    return info == other.info && subtrees == other.subtrees;
-  }
-
-  std::string ToString() const {
-    auto out = "\n" + info.path();
-    if (info.IsDirectory()) out += "/";
-
-    for (const auto& subtree : subtrees) {
-      out += subtree.ToString();
-    }
-    return out;
-  }
-
-  friend std::ostream& operator<<(std::ostream& os, const TestPathTree& tree) {
-    return os << tree.ToString();
-  }
-};
-
-using PT = TestPathTree;
-
-Forest MakeForest(std::vector<fs::FileInfo>* infos) {
-  std::sort(infos->begin(), infos->end(), fs::FileInfo::ByPath{});
-
-  return Forest(static_cast<int>(infos->size()), [&](int i, int j) {
-    return fs::internal::IsAncestorOf(infos->at(i).path(), infos->at(j).path());
-  });
-}
-
-void ExpectForestIs(std::vector<fs::FileInfo> infos, std::vector<PT> expected_roots) {
-  auto forest = MakeForest(&infos);
-
-  std::vector<PT> actual_roots;
-  ASSERT_OK(forest.Visit(
-      [&](Forest::Ref ref) -> Result<bool> {
-        actual_roots.emplace_back(ref, infos);
-        return false;  // only vist roots
-      },
-      [](Forest::Ref) {}));
-
-  // visit expected and assert equality
-  EXPECT_THAT(actual_roots, ContainerEq(expected_roots));
-}
-
-TEST(Forest, Basic) {
-  ExpectForestIs({}, {});
-
-  ExpectForestIs({fs::File("aa")}, {PT("aa")});
-  ExpectForestIs({fs::Dir("AA")}, {PT("AA", {})});
-  ExpectForestIs({fs::Dir("AA"), fs::File("AA/aa")}, {PT("AA", {PT("AA/aa")})});
-  ExpectForestIs({fs::Dir("AA"), fs::Dir("AA/BB"), fs::File("AA/BB/0")},
-                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})})});
-
-  // Missing parent can still find ancestor.
-  ExpectForestIs({fs::Dir("AA"), fs::File("AA/BB/bb")}, {PT("AA", {PT("AA/BB/bb")})});
-
-  // Ancestors should link to parent regardless of ordering.
-  ExpectForestIs({fs::File("AA/aa"), fs::Dir("AA")}, {PT("AA", {PT("AA/aa")})});
-
-  // Multiple roots are supported.
-  ExpectForestIs({fs::File("aa"), fs::File("bb")}, {PT("aa"), PT("bb")});
-  ExpectForestIs({fs::File("00"), fs::Dir("AA"), fs::File("AA/aa"), fs::File("BB/bb")},
-                 {PT("00"), PT("AA", {PT("AA/aa")}), PT("BB/bb")});
-  ExpectForestIs({fs::Dir("AA"), fs::Dir("AA/BB"), fs::File("AA/BB/0"), fs::Dir("CC"),
-                  fs::Dir("CC/BB"), fs::File("CC/BB/0")},
-                 {PT("AA", {PT("AA/BB", {PT("AA/BB/0")})}),
-                  PT("CC", {PT("CC/BB", {PT("CC/BB/0")})})});
-}
-
-TEST(Forest, HourlyETL) {
-  // This test mimics a scenario where an ETL dumps hourly files in a structure
-  // `$year/$month/$day/$hour/*.parquet`.
-  constexpr int64_t kYears = 3;
-  constexpr int64_t kMonthsPerYear = 12;
-  constexpr int64_t kDaysPerMonth = 31;
-  constexpr int64_t kHoursPerDay = 24;
-  constexpr int64_t kFilesPerHour = 2;
-
-  // Avoid constructing strings
-  std::vector<std::string> numbers{kDaysPerMonth + 1};
-  for (size_t i = 0; i < numbers.size(); i++) {
-    numbers[i] = std::to_string(i);
-    if (numbers[i].size() == 1) {
-      numbers[i] = "0" + numbers[i];
-    }
-  }
-
-  auto join = [](const std::vector<std::string>& path) {
-    return fs::internal::JoinAbstractPath(path);
-  };
-
-  std::vector<fs::FileInfo> infos;
-
-  std::vector<PT> forest;
-  for (int64_t year = 0; year < kYears; year++) {
-    auto year_str = std::to_string(year + 2000);
-    auto year_dir = fs::Dir(year_str);
-    infos.push_back(year_dir);
-
-    std::vector<PT> months;
-    for (int64_t month = 0; month < kMonthsPerYear; month++) {
-      auto month_str = join({year_str, numbers[month + 1]});
-      auto month_dir = fs::Dir(month_str);
-      infos.push_back(month_dir);
-
-      std::vector<PT> days;
-      for (int64_t day = 0; day < kDaysPerMonth; day++) {
-        auto day_str = join({month_str, numbers[day + 1]});
-        auto day_dir = fs::Dir(day_str);
-        infos.push_back(day_dir);
-
-        std::vector<PT> hours;
-        for (int64_t hour = 0; hour < kHoursPerDay; hour++) {
-          auto hour_str = join({day_str, numbers[hour]});
-          auto hour_dir = fs::Dir(hour_str);
-          infos.push_back(hour_dir);
-
-          std::vector<PT> files;
-          for (int64_t file = 0; file < kFilesPerHour; file++) {
-            auto file_str = join({hour_str, numbers[file] + ".parquet"});
-            auto file_fd = fs::File(file_str);
-            infos.push_back(file_fd);
-            files.emplace_back(file_str);
-          }
-
-          auto hour_pt = PT(hour_str, std::move(files));
-          hours.push_back(hour_pt);
-        }
-
-        auto day_pt = PT(day_str, std::move(hours));
-        days.push_back(day_pt);
-      }
-
-      auto month_pt = PT(month_str, std::move(days));
-      months.push_back(month_pt);
-    }
-
-    auto year_pt = PT(year_str, std::move(months));
-    forest.push_back(year_pt);
-  }
-
-  ExpectForestIs(infos, forest);
-}
-
-TEST(Forest, Visit) {
-  using Infos = std::vector<fs::FileInfo>;
-
-  for (auto infos : {Infos{}, Infos{fs::Dir("A"), fs::File("A/a")},
-                     Infos{fs::Dir("AA"), fs::Dir("AA/BB"), fs::File("AA/BB/0"),
-                           fs::Dir("CC"), fs::Dir("CC/BB"), fs::File("CC/BB/0")}}) {
-    ASSERT_TRUE(std::is_sorted(infos.begin(), infos.end(), fs::FileInfo::ByPath{}));
-
-    auto forest = MakeForest(&infos);
-
-    auto ignore_post = [](Forest::Ref) {};
-
-    // noop is fine
-    ASSERT_OK(
-        forest.Visit([](Forest::Ref) -> Result<bool> { return false; }, ignore_post));
-
-    // Should propagate failure
-    if (forest.size() != 0) {
-      ASSERT_RAISES(
-          Invalid,
-          forest.Visit([](Forest::Ref) -> Result<bool> { return Status::Invalid(""); },
-                       ignore_post));
-    }
-
-    // Ensure basic visit of all nodes
-    int i = 0;
-    ASSERT_OK(forest.Visit(
-        [&](Forest::Ref ref) -> Result<bool> {
-          EXPECT_EQ(ref.i, i);
-          ++i;
-          return true;
-        },
-        ignore_post));
-
-    // Visit only directories
-    Infos actual_dirs;
-    ASSERT_OK(forest.Visit(
-        [&](Forest::Ref ref) -> Result<bool> {
-          if (!infos[ref.i].IsDirectory()) {
-            return false;
-          }
-          actual_dirs.push_back(infos[ref.i]);
-          return true;
-        },
-        ignore_post));
-
-    Infos expected_dirs;
-    for (const auto& info : infos) {
-      if (info.IsDirectory()) {
-        expected_dirs.push_back(info);
-      }
-    }
-    EXPECT_THAT(actual_dirs, ContainerEq(expected_dirs));
-  }
-}
-
-TEST(Subtree, EncodeExpression) {
-  SubtreeImpl tree;
-  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
-  // Should be idempotent
-  ASSERT_EQ(0, tree.GetOrInsert(equal(field_ref("a"), literal("1"))));
-  ASSERT_EQ(equal(field_ref("a"), literal("1")), tree.code_to_expr_[0]);
-
-  SubtreeImpl::expression_codes codes;
-  auto conj =
-      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
-  tree.EncodeConjunctionMembers(conj, &codes);
-  ASSERT_EQ(SubtreeImpl::expression_codes({0, 1}), codes);
-
-  codes.clear();
-  conj = or_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
-  tree.EncodeConjunctionMembers(conj, &codes);
-  ASSERT_EQ(SubtreeImpl::expression_codes({2}), codes);
-}
-
-TEST(Subtree, GetSubtreeExpression) {
-  SubtreeImpl tree;
-  const auto expr_a = equal(field_ref("a"), literal("1"));
-  const auto expr_b = equal(field_ref("b"), literal("2"));
-  const auto code_a = tree.GetOrInsert(expr_a);
-  const auto code_b = tree.GetOrInsert(expr_b);
-  ASSERT_EQ(expr_a,
-            tree.GetSubtreeExpression(SubtreeImpl::Encoded{util::nullopt, {code_a}}));
-  ASSERT_EQ(expr_b, tree.GetSubtreeExpression(
-                        SubtreeImpl::Encoded{util::nullopt, {code_a, code_b}}));
-}
-
-TEST(Subtree, EncodeFragments) {
-  auto fragment_schema = schema({});
-  const auto expr_a =
-      and_(equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")));
-  const auto expr_b =
-      and_(equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3")));
-  std::vector<std::shared_ptr<InMemoryFragment>> fragments;
-  fragments.push_back(std::make_shared<InMemoryFragment>(
-      fragment_schema, arrow::RecordBatchVector(), expr_a));
-  fragments.push_back(std::make_shared<InMemoryFragment>(
-      fragment_schema, arrow::RecordBatchVector(), expr_b));
-
-  SubtreeImpl tree;
-  auto encoded = tree.EncodeFragments(fragments);
-  EXPECT_THAT(
-      tree.code_to_expr_,
-      ContainerEq(std::vector<compute::Expression>{
-          equal(field_ref("a"), literal("1")), equal(field_ref("b"), literal("2")),
-          equal(field_ref("a"), literal("2")), equal(field_ref("b"), literal("3"))}));
-  EXPECT_THAT(
-      encoded,
-      testing::UnorderedElementsAreArray({
-          SubtreeImpl::Encoded{util::make_optional<int>(0),
-                               SubtreeImpl::expression_codes({0, 1})},
-          SubtreeImpl::Encoded{util::make_optional<int>(1),
-                               SubtreeImpl::expression_codes({2, 3})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({0, 1})},
-          SubtreeImpl::Encoded{util::nullopt, SubtreeImpl::expression_codes({2, 3})},
-      }));
-}
-
 }  // namespace dataset
 }  // namespace arrow

From 3ce67ebe6750da22d04e73eab85e484fd29f8264 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Tue, 6 Jul 2021 17:42:49 -0400
Subject: [PATCH 516/719] ARROW-13216: [R] Type checks test fails with rtools35

Closes #10666 from ianmcook/ARROW-13216

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/tests/testthat/test-dplyr.R | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 6740b3ee75f..459c5ebc441 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -816,7 +816,19 @@ test_that("type checks on expressions", {
         b = is.integer(as.character(int)),
         c = is.integer(int + int),
         d = is.double(int + dbl),
-        e = is.logical(grepl("[def]", chr))
+        e = is.logical(dbl > pi)
+      ) %>%
+      collect(),
+    tbl
+  )
+  
+  # the code in the expectation below depends on RE2
+  skip_if_not_available("re2")
+
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        a = is.logical(grepl("[def]", chr))
       ) %>%
       collect(),
     tbl

From afea938e9db889ccc1565b0ad079b56e5192afd3 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 7 Jul 2021 10:22:29 +0900
Subject: [PATCH 517/719] ARROW-13273: [C++] Don't use .pc only in CMake paths
 for Requires.private

Because they can't be found by raw pkg-config usage.

Closes #10668 from kou/cpp-pc-in-cmake-path

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index efe054e3262..1cb4ceb4eea 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -239,7 +239,11 @@ macro(resolve_dependency DEPENDENCY_NAME)
     list(APPEND ARROW_SYSTEM_DEPENDENCIES ${PACKAGE_NAME})
     find_package(PkgConfig QUIET)
     foreach(ARG_PC_PACKAGE_NAME ${ARG_PC_PACKAGE_NAMES})
-      pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC ${ARG_PC_PACKAGE_NAME} QUIET)
+      pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC
+                        ${ARG_PC_PACKAGE_NAME}
+                        NO_CMAKE_PATH
+                        NO_CMAKE_EINVIRONMENT_PATH
+                        QUIET)
       if(${${ARG_PC_PACKAGE_NAME}_PC_FOUND})
         string(APPEND ARROW_PC_REQUIRES_PRIVATE " ${ARG_PC_PACKAGE_NAME}")
       endif()

From bc86814d6cd4865c1250319cbd0bf5431938ac80 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Tue, 6 Jul 2021 19:12:42 -0700
Subject: [PATCH 518/719] ARROW-13275 [JS]: Fix perf tests

We recently split `DataFrame`s from `Table`s so we should run tests on the former.

Closes #10670 from domoritz/dataframe-benchmarks

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/perf/config.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/perf/config.ts b/js/perf/config.ts
index f9915c440c0..08ea9ecc1d5 100644
--- a/js/perf/config.ts
+++ b/js/perf/config.ts
@@ -57,7 +57,7 @@ const batches = Array.from({length: NUM_BATCHES}).map(() => {
     });
 });
 
-const tracks = new Arrow.Table(batches[0].schema, batches);
+const tracks = new Arrow.DataFrame(batches[0].schema, batches);
 
 console.timeEnd('Prepare Data');
 

From dfb0928e91c0d3bd89cb0497a3948ed8fea7fc78 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 7 Jul 2021 02:46:04 +0000
Subject: [PATCH 519/719] ARROW-13096: [C++] Implement logarithm compute
 functions

Adds ln, log10, and log2. We could add a log1e and/or a logN if useful (probably not?)

Has some code from/will conflict with #10544.

Closes #10567 from lidavidm/arrow-13096

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   4 +
 cpp/src/arrow/compute/api_scalar.h            |  59 ++++-
 .../compute/kernels/scalar_arithmetic.cc      | 204 +++++++++++++++++-
 .../compute/kernels/scalar_arithmetic_test.cc |  60 ++++++
 docs/source/cpp/compute.rst                   |  26 +++
 docs/source/python/api/compute.rst            |  18 ++
 6 files changed, 363 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 2021c8a30c6..719fbed78d5 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -326,6 +326,10 @@ SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
 SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked")
 SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
 SCALAR_EAGER_UNARY(Atan, "atan")
+SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked")
+SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked")
+SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked")
+SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked")
 
 #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME)           \
   Result<Datum> NAME(const Datum& left, const Datum& right, ArithmeticOptions options, \
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 89b4faca940..8417d77b9de 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -246,8 +246,9 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions {
 
 /// @}
 
-/// \brief Get the absolute value of a value. Array values can be of arbitrary
-/// length. If argument is null the result will be null.
+/// \brief Get the absolute value of a value.
+///
+/// If argument is null the result will be null.
 ///
 /// \param[in] arg the value transformed
 /// \param[in] options arithmetic options (overflow handling), optional
@@ -311,8 +312,9 @@ Result<Datum> Divide(const Datum& left, const Datum& right,
                      ArithmeticOptions options = ArithmeticOptions(),
                      ExecContext* ctx = NULLPTR);
 
-/// \brief Negate a value. Array values can be of arbitrary length. If argument
-/// is null the result will be null.
+/// \brief Negate values.
+///
+/// If argument is null the result will be null.
 ///
 /// \param[in] arg the value negated
 /// \param[in] options arithmetic options (overflow handling), optional
@@ -424,6 +426,55 @@ Result<Datum> Atan(const Datum& arg, ExecContext* ctx = NULLPTR);
 ARROW_EXPORT
 Result<Datum> Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR);
 
+/// \brief Get the natural log of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise natural log
+ARROW_EXPORT
+Result<Datum> Ln(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                 ExecContext* ctx = NULLPTR);
+
+/// \brief Get the log base 10 of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise log base 10
+ARROW_EXPORT
+Result<Datum> Log10(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Get the log base 2 of a value.
+///
+/// If argument is null the result will be null.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise log base 2
+ARROW_EXPORT
+Result<Datum> Log2(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Get the natural log of (1 + value).
+///
+/// If argument is null the result will be null.
+/// This function may be more accurate than Log(1 + value) for values close to zero.
+///
+/// \param[in] arg The values to compute the logarithm for.
+/// \param[in] options arithmetic options (overflow handling), optional
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise natural log
+ARROW_EXPORT
+Result<Datum> Log1p(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
+                    ExecContext* ctx = NULLPTR);
+
 /// \brief Find the element-wise maximum of any number of arrays or scalars.
 /// Array values must be the same length.
 ///
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index da3a3095041..f0eabf1b40e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -53,11 +53,11 @@ template <typename T>
 using is_signed_integer =
     std::integral_constant<bool, std::is_integral<T>::value && std::is_signed<T>::value>;
 
-template <typename T>
-using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, T>;
+template <typename T, typename R = T>
+using enable_if_signed_integer = enable_if_t<is_signed_integer<T>::value, R>;
 
-template <typename T>
-using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, T>;
+template <typename T, typename R = T>
+using enable_if_unsigned_integer = enable_if_t<is_unsigned_integer<T>::value, R>;
 
 template <typename T, typename R = T>
 using enable_if_integer =
@@ -686,6 +686,118 @@ struct Atan2 {
   }
 };
 
+struct LogNatural {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log(arg);
+  }
+};
+
+struct LogNaturalChecked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0.0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log(arg);
+  }
+};
+
+struct Log10 {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log10(arg);
+  }
+};
+
+struct Log10Checked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log10(arg);
+  }
+};
+
+struct Log2 {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < 0.0) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log2(arg);
+  }
+};
+
+struct Log2Checked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == 0.0) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < 0.0) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log2(arg);
+  }
+};
+
+struct Log1p {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status*) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == -1) {
+      return -std::numeric_limits<T>::infinity();
+    } else if (arg < -1) {
+      return std::numeric_limits<T>::quiet_NaN();
+    }
+    return std::log1p(arg);
+  }
+};
+
+struct Log1pChecked {
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext*, Arg arg, Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    if (arg == -1) {
+      *st = Status::Invalid("logarithm of zero");
+      return arg;
+    } else if (arg < -1) {
+      *st = Status::Invalid("logarithm of negative number");
+      return arg;
+    }
+    return std::log1p(arg);
+  }
+};
+
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
 ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
@@ -1295,6 +1407,60 @@ const FunctionDoc atan2_doc{
     "Compute the inverse tangent using argument signs to determine the quadrant",
     ("Integer arguments return double values."),
     {"y", "x"}};
+
+const FunctionDoc ln_doc{
+    "Compute natural log of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"ln_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc ln_checked_doc{
+    "Compute natural log of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"ln\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log10_doc{
+    "Compute log base 10 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log10_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log10_checked_doc{
+    "Compute log base 10 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log10\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log2_doc{
+    "Compute log base 2 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log2_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log2_checked_doc{
+    "Compute log base 2 of arguments element-wise",
+    ("Non-positive values return -inf or NaN. Null values return null.\n"
+     "Use function \"log2\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
+
+const FunctionDoc log1p_doc{
+    "Compute natural log of (1+x) element-wise",
+    ("Values <= -1 return -inf or NaN. Null values return null.\n"
+     "This function may be more precise than log(1 + x) for x close to zero."
+     "Use function \"log1p_checked\" if you want non-positive values to raise an error."),
+    {"x"}};
+
+const FunctionDoc log1p_checked_doc{
+    "Compute natural log of (1+x) element-wise",
+    ("Values <= -1 return -inf or NaN. Null values return null.\n"
+     "This function may be more precise than log(1 + x) for x close to zero."
+     "Use function \"log1p\" if you want non-positive values to return "
+     "-inf or NaN."),
+    {"x"}};
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -1460,6 +1626,36 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
 
   auto atan2 = MakeArithmeticFunctionFloatingPoint<Atan2>("atan2", &atan2_doc);
   DCHECK_OK(registry->AddFunction(std::move(atan2)));
+
+  // ----------------------------------------------------------------------
+  // Logarithms
+  auto ln = MakeUnaryArithmeticFunctionFloatingPoint<LogNatural>("ln", &ln_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ln)));
+
+  auto ln_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<LogNaturalChecked>(
+      "ln_checked", &ln_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ln_checked)));
+
+  auto log10 = MakeUnaryArithmeticFunctionFloatingPoint<Log10>("log10", &log10_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log10)));
+
+  auto log10_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log10Checked>(
+      "log10_checked", &log10_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log10_checked)));
+
+  auto log2 = MakeUnaryArithmeticFunctionFloatingPoint<Log2>("log2", &log2_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log2)));
+
+  auto log2_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log2Checked>(
+      "log2_checked", &log2_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log2_checked)));
+
+  auto log1p = MakeUnaryArithmeticFunctionFloatingPoint<Log1p>("log1p", &log1p_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log1p)));
+
+  auto log1p_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log1pChecked>(
+      "log1p_checked", &log1p_checked_doc);
+  DCHECK_OK(registry->AddFunction(std::move(log1p_checked)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index ed24a44484f..877b6f31160 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -1821,5 +1821,65 @@ TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) {
                               -M_PI_2, 0, M_PI));
 }
 
+TYPED_TEST(TestUnaryArithmeticFloating, Log) {
+  using CType = typename TestFixture::CType;
+  auto ty = this->type_singleton();
+  this->SetNansEqual(true);
+  auto min_val = std::numeric_limits<CType>::min();
+  auto max_val = std::numeric_limits<CType>::max();
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Ln, "[1, 2.718281828459045, null, NaN, Inf]",
+                        "[0, 1, null, NaN, Inf]");
+    // N.B. min() for float types is smallest normal number > 0
+    this->AssertUnaryOp(Ln, min_val, std::log(min_val));
+    this->AssertUnaryOp(Ln, max_val, std::log(max_val));
+    this->AssertUnaryOp(Log10, "[1, 10, null, NaN, Inf]", "[0, 1, null, NaN, Inf]");
+    this->AssertUnaryOp(Log10, min_val, std::log10(min_val));
+    this->AssertUnaryOp(Log10, max_val, std::log10(max_val));
+    this->AssertUnaryOp(Log2, "[1, 2, null, NaN, Inf]", "[0, 1, null, NaN, Inf]");
+    this->AssertUnaryOp(Log2, min_val, std::log2(min_val));
+    this->AssertUnaryOp(Log2, max_val, std::log2(max_val));
+    this->AssertUnaryOp(Log1p, "[0, 1.718281828459045, null, NaN, Inf]",
+                        "[0, 1, null, NaN, Inf]");
+    this->AssertUnaryOp(Log1p, min_val, std::log1p(min_val));
+    this->AssertUnaryOp(Log1p, max_val, std::log1p(max_val));
+  }
+  this->SetOverflowCheck(false);
+  this->AssertUnaryOp(Ln, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->AssertUnaryOp(Log10, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->AssertUnaryOp(Log2, "[-Inf, -1, 0, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->AssertUnaryOp(Log1p, "[-Inf, -2, -1, Inf]", "[NaN, NaN, -Inf, Inf]");
+  this->SetOverflowCheck(true);
+  this->AssertUnaryOpRaises(Ln, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Ln, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Ln, "[-Inf]", "logarithm of negative number");
+
+  auto lowest_val = MakeScalar(std::numeric_limits<CType>::lowest());
+  // N.B. RapidJSON on some platforms raises "Number too big to be stored in double" so
+  // don't bounce through JSON
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Ln(lowest_val, this->options_));
+  this->AssertUnaryOpRaises(Log10, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log10, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log10, "[-Inf]", "logarithm of negative number");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Log10(lowest_val, this->options_));
+  this->AssertUnaryOpRaises(Log2, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log2, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log2, "[-Inf]", "logarithm of negative number");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Log2(lowest_val, this->options_));
+  this->AssertUnaryOpRaises(Log1p, "[-1]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log1p, "[-2]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log1p, "[-Inf]", "logarithm of negative number");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  ::testing::HasSubstr("logarithm of negative number"),
+                                  Log1p(lowest_val, this->options_));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 2d41c579747..b2bbe686c8e 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -339,6 +339,32 @@ Bit-wise functions
   out of bounds for the data type.  However, an overflow when shifting the
   first input is not error (truncated bits are silently discarded).
 
+Logarithmic functions
+~~~~~~~~~~~~~~~~~~~~~
+
+Logarithmic functions are also supported, and also offer ``_checked``
+variants that check for domain errors if needed.
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| ln                       | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| ln_checked               | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log10                    | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log10_checked            | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log1p                    | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log1p_checked            | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log2                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log2_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+
 Trigonometric functions
 ~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 334a76e75d2..a611d2a2384 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -73,6 +73,24 @@ Bit-wise operations do not offer (or need) a checked variant.
    bit_wise_or
    bit_wise_xor
 
+Logarithmic Functions
+---------------------
+
+Logarithmic functions are also supported, and also offer ``_checked``
+variants which detect domain errors.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ln
+   ln_checked
+   log10
+   log10_checked
+   log1p
+   log1p_checked
+   log2
+   log2_checked
+
 Trigonometric Functions
 -----------------------
 

From fdd7d32bcbc4086242e6a3517ef49e4f4468bd56 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 7 Jul 2021 09:27:07 +0200
Subject: [PATCH 520/719] ARROW-13054: [C++] Add option to specify the first
 day of the week for the "day_of_week" temporal kernel

This is to resolve [ARROW-13054](https://issues.apache.org/jira/browse/ARROW-13054).
This will be needed for casting timezone-naive timestamps [ARROW-13033](https://issues.apache.org/jira/browse/ARROW-13033) and defining [starting day of the week](https://github.com/apache/arrow/pull/10507#pullrequestreview-681491088).

Closes #10598 from rok/ARROW-13054

Lead-authored-by: Rok <rok@mihevc.org>
Co-authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |  15 +-
 cpp/src/arrow/compute/api_scalar.h            |  23 +-
 cpp/src/arrow/compute/function_test.cc        |   1 +
 .../arrow/compute/kernels/scalar_temporal.cc  |  97 +++++++-
 .../compute/kernels/scalar_temporal_test.cc   | 215 +++++++++++-------
 docs/source/cpp/compute.rst                   |  78 +++----
 python/pyarrow/_compute.pyx                   |  12 +
 python/pyarrow/compute.py                     |   1 +
 python/pyarrow/includes/libarrow.pxd          |   6 +
 python/pyarrow/tests/test_compute.py          |  75 ++++++
 r/R/dplyr-functions.R                         |  12 +-
 r/src/compute.cpp                             |  10 +
 12 files changed, 396 insertions(+), 149 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 719fbed78d5..be6498a74c6 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -158,6 +158,9 @@ static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
     DataMember("field_names", &ProjectOptions::field_names),
     DataMember("field_nullability", &ProjectOptions::field_nullability),
     DataMember("field_metadata", &ProjectOptions::field_metadata));
+static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
+    DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
+    DataMember("week_start", &DayOfWeekOptions::week_start));
 }  // namespace
 }  // namespace internal
 
@@ -278,6 +281,12 @@ ProjectOptions::ProjectOptions(std::vector<std::string> n)
 ProjectOptions::ProjectOptions() : ProjectOptions(std::vector<std::string>()) {}
 constexpr char ProjectOptions::kTypeName[];
 
+DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
+    : FunctionOptions(internal::kDayOfWeekOptionsType),
+      one_based_numbering(one_based_numbering),
+      week_start(week_start) {}
+constexpr char DayOfWeekOptions::kTypeName[];
+
 namespace internal {
 void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
@@ -296,6 +305,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
 }
 }  // namespace internal
 
@@ -462,7 +472,6 @@ Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa
 SCALAR_EAGER_UNARY(Year, "year")
 SCALAR_EAGER_UNARY(Month, "month")
 SCALAR_EAGER_UNARY(Day, "day")
-SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week")
 SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
 SCALAR_EAGER_UNARY(ISOYear, "iso_year")
 SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
@@ -476,5 +485,9 @@ SCALAR_EAGER_UNARY(Microsecond, "microsecond")
 SCALAR_EAGER_UNARY(Nanosecond, "nanosecond")
 SCALAR_EAGER_UNARY(Subsecond, "subsecond")
 
+Result<Datum> DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) {
+  return CallFunction("day_of_week", {arg}, &options, ctx);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 8417d77b9de..f0aebc8e032 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -244,6 +244,18 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions {
   std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
 };
 
+struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions {
+ public:
+  explicit DayOfWeekOptions(bool one_based_numbering = false, uint32_t week_start = 1);
+  constexpr static char const kTypeName[] = "DayOfWeekOptions";
+  static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; }
+
+  /// Number days from 1 if true and from 0 if false
+  bool one_based_numbering;
+  /// What day does the week start with (Monday=1, Sunday=7)
+  uint32_t week_start;
+};
+
 /// @}
 
 /// \brief Get the absolute value of a value.
@@ -764,15 +776,22 @@ ARROW_EXPORT
 Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
 
 /// \brief DayOfWeek returns number of the day of the week value for each element of
-/// `values`. Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.
+/// `values`.
+///
+/// By default week starts on Monday denoted by 0 and ends on Sunday denoted
+/// by 6. Start day of the week (Monday=1, Sunday=7) and numbering base (0 or 1) can be
+/// set using DayOfWeekOptions
 ///
 /// \param[in] values input to extract number of the day of the week from
+/// \param[in] options for setting start of the week and day numbering
 /// \param[in] ctx the function execution context, optional
 /// \return the resulting datum
 ///
 /// \since 5.0.0
 /// \note API not yet finalized
-ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values,
+                                     DayOfWeekOptions options = DayOfWeekOptions(),
+                                     ExecContext* ctx = NULLPTR);
 
 /// \brief DayOfYear returns number of day of the year for each element of `values`.
 /// January 1st maps to day number 1, February 1st to 32, etc.
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index bbe514af09a..752ade284b7 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -90,6 +90,7 @@ TEST(FunctionOptions, Equality) {
   options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
   options.emplace_back(
       new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
+  options.emplace_back(new DayOfWeekOptions(false, 1));
   options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
   options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
   options.emplace_back(new FilterOptions());
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
index 1694d22ffae..f0257772d4a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "arrow/builder.h"
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/time.h"
@@ -48,6 +49,8 @@ using arrow_vendored::date::literals::thu;
 using internal::applicator::ScalarUnaryNotNull;
 using internal::applicator::SimpleUnary;
 
+using DayOfWeekState = OptionsWrapper<DayOfWeekOptions>;
+
 const std::string& GetInputTimezone(const Datum& datum) {
   return checked_cast<const TimestampType&>(*datum.type()).timezone();
 }
@@ -80,6 +83,25 @@ struct TemporalComponentExtract {
   }
 };
 
+template <typename Op, typename OutType>
+struct DayOfWeekExec {
+  using OutValue = typename internal::GetOutputType<OutType>::T;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const DayOfWeekOptions& options = DayOfWeekState::Get(ctx);
+    if (options.week_start < 1 || 7 < options.week_start) {
+      return Status::Invalid(
+          "week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=",
+          options.week_start);
+    }
+
+    RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
+    applicator::ScalarUnaryNotNullStateful<OutType, TimestampType, Op> kernel{
+        Op(options)};
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
 // ----------------------------------------------------------------------
 // Extract year from timestamp
 
@@ -118,16 +140,30 @@ struct Day {
 
 // ----------------------------------------------------------------------
 // Extract day of week from timestamp
+//
+// By default week starts on Monday represented by 0 and ends on Sunday represented
+// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 1) can be
+// set using DayOfWeekOptions
 
 template <typename Duration>
 struct DayOfWeek {
+  explicit DayOfWeek(const DayOfWeekOptions& options) {
+    for (int i = 0; i < 7; i++) {
+      lookup_table[i] = i + 8 - options.week_start;
+      lookup_table[i] = (lookup_table[i] > 6) ? lookup_table[i] - 7 : lookup_table[i];
+      lookup_table[i] += options.one_based_numbering;
+    }
+  }
+
   template <typename T, typename Arg0>
-  static T Call(KernelContext*, Arg0 arg, Status*) {
-    return static_cast<T>(
-        weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
-            .iso_encoding() -
-        1);
+  T Call(KernelContext*, Arg0 arg, Status*) const {
+    const auto wd = arrow_vendored::date::year_month_weekday(
+                        floor<days>(sys_time<Duration>(Duration{arg})))
+                        .weekday()
+                        .iso_encoding();
+    return lookup_table[wd - 1];
   }
+  std::array<int64_t, 7> lookup_table;
 };
 
 // ----------------------------------------------------------------------
@@ -398,6 +434,42 @@ std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc
   return func;
 }
 
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporalWithOptions(
+    std::string name, const FunctionDoc* doc, const DayOfWeekOptions& default_options,
+    KernelInit init) {
+  const auto& out_type = TypeTraits<OutType>::type_singleton();
+  auto func =
+      std::make_shared<ScalarFunction>(name, Arity::Unary(), doc, &default_options);
+
+  for (auto unit : internal::AllTimeUnits()) {
+    InputType in_type{match::TimestampTypeUnit(unit)};
+    switch (unit) {
+      case TimeUnit::SECOND: {
+        auto exec = DayOfWeekExec<Op<std::chrono::seconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::MILLI: {
+        auto exec = DayOfWeekExec<Op<std::chrono::milliseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::MICRO: {
+        auto exec = DayOfWeekExec<Op<std::chrono::microseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+      case TimeUnit::NANO: {
+        auto exec = DayOfWeekExec<Op<std::chrono::nanoseconds>, OutType>::Exec;
+        DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
+        break;
+      }
+    }
+  }
+  return func;
+}
+
 template <template <typename...> class Op>
 std::shared_ptr<ScalarFunction> MakeStructTemporal(std::string name,
                                                    const FunctionDoc* doc) {
@@ -451,9 +523,14 @@ const FunctionDoc day_doc{
 
 const FunctionDoc day_of_week_doc{
     "Extract day of the week number",
-    ("Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.\n"
+    ("By default, the week starts on Monday represented by 0 and ends on Sunday "
+     "represented by 6.\n"
+     "DayOfWeekOptions.week_start can be used to set another starting day using ISO "
+     "convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using "
+     "DayOfWeekOptions.one_based_numbering parameter.\n"
      "Returns an error if timestamp has a defined timezone. Null values return null."),
-    {"values"}};
+    {"values"},
+    "DayOfWeekOptions"};
 
 const FunctionDoc day_of_year_doc{
     "Extract number of day of year",
@@ -537,7 +614,9 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {
   auto day = MakeTemporal<Day, Int64Type>("day", &year_doc);
   DCHECK_OK(registry->AddFunction(std::move(day)));
 
-  auto day_of_week = MakeTemporal<DayOfWeek, Int64Type>("day_of_week", &day_of_week_doc);
+  static auto default_day_of_week_options = DayOfWeekOptions::Defaults();
+  auto day_of_week = MakeTemporalWithOptions<DayOfWeek, Int64Type>(
+      "day_of_week", &day_of_week_doc, default_day_of_week_options, DayOfWeekState::Init);
   DCHECK_OK(registry->AddFunction(std::move(day_of_week)));
 
   auto day_of_year = MakeTemporal<DayOfYear, Int64Type>("day_of_year", &day_of_year_doc);
@@ -561,7 +640,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {
   auto minute = MakeTemporal<Minute, Int64Type>("minute", &minute_doc);
   DCHECK_OK(registry->AddFunction(std::move(minute)));
 
-  auto second = MakeTemporal<Second, DoubleType>("second", &second_doc);
+  auto second = MakeTemporal<Second, Int64Type>("second", &second_doc);
   DCHECK_OK(registry->AddFunction(std::move(second)));
 
   auto millisecond =
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index cc01d25de7c..f2e9c12a050 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -26,11 +26,8 @@ namespace arrow {
 
 using internal::StringFormatter;
 
-class ScalarTemporalTest : public ::testing::Test {};
-
-namespace compute {
-
-TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
+class ScalarTemporalTest : public ::testing::Test {
+ public:
   const char* times =
       R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
           "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
@@ -39,52 +36,70 @@ TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
           "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
           "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
           "2008-12-28", "2008-12-29", "2012-01-01 01:02:03"])";
-  auto unit = timestamp(TimeUnit::NANO);
-  auto iso_calendar_type =
+  const char* times_seconds_precision =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "1899-01-01T00:59:20","2033-05-18T03:33:20",
+          null, "2020-01-01T01:05:05", "2019-12-31T02:10:10",
+          "2019-12-30T03:15:15", "2009-12-31T04:20:20",
+          "2010-01-01T05:25:25", "2010-01-03T06:30:30",
+          "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+          "2008-12-28", "2008-12-29", "2012-01-01 01:02:03"])";
+  std::shared_ptr<arrow::DataType> iso_calendar_type =
       struct_({field("iso_year", int64()), field("iso_week", int64()),
                field("iso_day_of_week", int64())});
-
-  auto year =
+  std::shared_ptr<arrow::Array> iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
+                          {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
+                          {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3},
+                          null,
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 3},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 2},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 4},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 5},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 7},
+                          {"iso_year": 2010, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
+                          {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}])");
+  std::string year =
       "[1970, 2000, 1899, 2033, null, 2020, 2019, 2019, 2009, 2010, 2010, 2010, 2006, "
       "2005, 2008, 2008, 2012]";
-  auto month = "[1, 2, 1, 5, null, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1]";
-  auto day = "[1, 29, 1, 18, null, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1]";
-  auto day_of_week = "[3, 1, 6, 2, null, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6]";
-  auto day_of_year =
+  std::string month = "[1, 2, 1, 5, null, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1]";
+  std::string day = "[1, 29, 1, 18, null, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1]";
+  std::string day_of_week = "[3, 1, 6, 2, null, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6]";
+  std::string day_of_year =
       "[1, 60, 1, 138, null, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1]";
-  auto iso_year =
+  std::string iso_year =
       "[1970, 2000, 1898, 2033, null, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, "
       "2005, 2008, 2009, 2011]";
-  auto iso_week = "[1, 9, 52, 20, null, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52]";
-  auto iso_calendar =
-      ArrayFromJSON(iso_calendar_type,
-                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
-                        {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
-                        {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
-                        {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3},
-                        null,
-                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 3},
-                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 2},
-                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 1},
-                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 4},
-                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 5},
-                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 7},
-                        {"iso_year": 2010, "iso_week": 1, "iso_day_of_week": 1},
-                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 7},
-                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
-                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 7},
-                        {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1},
-                        {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}])");
-  auto quarter = "[1, 1, 1, 2, null, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1]";
-  auto hour = "[0, 23, 0, 3, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 1]";
-  auto minute = "[0, 23, 59, 33, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 2]";
-  auto second = "[59, 23, 20, 20, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 3]";
-  auto millisecond = "[123, 999, 1, 0, null, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0]";
-  auto microsecond = "[456, 999, 1, 0, null, 0, 0, 0, 132, 321, 163, 0, 0, 0, 0, 0, 0]";
-  auto nanosecond = "[789, 999, 1, 0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]";
-  auto subsecond =
+  std::string iso_week =
+      "[1, 9, 52, 20, null, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52]";
+
+  std::string quarter = "[1, 1, 1, 2, null, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1]";
+  std::string hour = "[0, 23, 0, 3, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 1]";
+  std::string minute =
+      "[0, 23, 59, 33, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 2]";
+  std::string second =
+      "[59, 23, 20, 20, null, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 3]";
+  std::string millisecond = "[123, 999, 1, 0, null, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0]";
+  std::string microsecond =
+      "[456, 999, 1, 0, null, 0, 0, 0, 132, 321, 163, 0, 0, 0, 0, 0, 0]";
+  std::string nanosecond = "[789, 999, 1, 0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]";
+  std::string subsecond =
       "[0.123456789, 0.999999999, 0.001001001, 0, null, 0.001, 0.002, 0.003, 0.004132, "
       "0.005321, 0.006163, 0, 0, 0, 0, 0, 0]";
+  std::string zeros = "[0, 0, 0, 0, null, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]";
+};
+
+namespace compute {
+
+TEST_F(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  auto unit = timestamp(TimeUnit::NANO);
 
   CheckScalarUnary("year", unit, times, int64(), year);
   CheckScalarUnary("month", unit, times, int64(), month);
@@ -97,67 +112,42 @@ TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
   CheckScalarUnary("quarter", unit, times, int64(), quarter);
   CheckScalarUnary("hour", unit, times, int64(), hour);
   CheckScalarUnary("minute", unit, times, int64(), minute);
-  CheckScalarUnary("second", unit, times, float64(), second);
+  CheckScalarUnary("second", unit, times, int64(), second);
   CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
   CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
   CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
   CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
 }
 
-TEST(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
-  auto iso_calendar_type =
-      struct_({field("iso_year", int64()), field("iso_week", int64()),
-               field("iso_day_of_week", int64())});
-  const char* times =
-      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
-            "1899-01-01T00:59:20","2033-05-18T03:33:20", null])";
-  auto year = "[1970, 2000, 1899, 2033, null]";
-  auto month = "[1, 2, 1, 5, null]";
-  auto day = "[1, 29, 1, 18, null]";
-  auto day_of_week = "[3, 1, 6, 2, null]";
-  auto day_of_year = "[1, 60, 1, 138, null]";
-  auto iso_year = "[1970, 2000, 1898, 2033, null]";
-  auto iso_week = "[1, 9, 52, 20, null]";
-  auto iso_calendar =
-      ArrayFromJSON(iso_calendar_type,
-                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
-                          {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
-                          {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
-                          {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3}, null])");
-  auto quarter = "[1, 1, 1, 2, null]";
-  auto hour = "[0, 23, 0, 3, null]";
-  auto minute = "[0, 23, 59, 33, null]";
-  auto second = "[59, 23, 20, 20, null]";
-  auto zeros = "[0, 0, 0, 0, null]";
-
+TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
   for (auto u : internal::AllTimeUnits()) {
     auto unit = timestamp(u);
-    CheckScalarUnary("year", unit, times, int64(), year);
-    CheckScalarUnary("month", unit, times, int64(), month);
-    CheckScalarUnary("day", unit, times, int64(), day);
-    CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
-    CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
-    CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
-    CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
-    CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
-    CheckScalarUnary("quarter", unit, times, int64(), quarter);
-    CheckScalarUnary("hour", unit, times, int64(), hour);
-    CheckScalarUnary("minute", unit, times, int64(), minute);
-    CheckScalarUnary("second", unit, times, float64(), second);
-    CheckScalarUnary("millisecond", unit, times, int64(), zeros);
-    CheckScalarUnary("microsecond", unit, times, int64(), zeros);
-    CheckScalarUnary("nanosecond", unit, times, int64(), zeros);
-    CheckScalarUnary("subsecond", unit, times, float64(), zeros);
+    CheckScalarUnary("year", unit, times_seconds_precision, int64(), year);
+    CheckScalarUnary("month", unit, times_seconds_precision, int64(), month);
+    CheckScalarUnary("day", unit, times_seconds_precision, int64(), day);
+    CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week);
+    CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year);
+    CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year);
+    CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week);
+    CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision),
+                     iso_calendar);
+    CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter);
+    CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour);
+    CheckScalarUnary("minute", unit, times_seconds_precision, int64(), minute);
+    CheckScalarUnary("second", unit, times_seconds_precision, int64(), second);
+    CheckScalarUnary("millisecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("microsecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("nanosecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("subsecond", unit, times_seconds_precision, float64(), zeros);
   }
 }
 
-TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
+TEST_F(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
   std::string timezone = "Asia/Kolkata";
-  const char* times = R"(["1970-01-01T00:00:59", null])";
 
   for (auto u : internal::AllTimeUnits()) {
     auto unit = timestamp(u, timezone);
-    auto timestamps = ArrayFromJSON(unit, times);
+    auto timestamps = ArrayFromJSON(unit, times_seconds_precision);
 
     ASSERT_RAISES(NotImplemented, Year(timestamps));
     ASSERT_RAISES(NotImplemented, Month(timestamps));
@@ -177,5 +167,54 @@ TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
     ASSERT_RAISES(NotImplemented, Subsecond(timestamps));
   }
 }
+
+TEST_F(ScalarTemporalTest, DayOfWeek) {
+  auto unit = timestamp(TimeUnit::NANO);
+
+  auto timestamps = ArrayFromJSON(unit, times);
+  auto day_of_week_week_start_7_zero_based =
+      "[4, 2, 0, 3, null, 3, 2, 1, 4, 5, 0, 1, 0, 6, 0, 1, 0]";
+  auto day_of_week_week_start_2_zero_based =
+      "[2, 0, 5, 1, null, 1, 0, 6, 2, 3, 5, 6, 5, 4, 5, 6, 5]";
+  auto day_of_week_week_start_7_one_based =
+      "[5, 3, 1, 4, null, 4, 3, 2, 5, 6, 1, 2, 1, 7, 1, 2, 1]";
+  auto day_of_week_week_start_2_one_based =
+      "[3, 1, 6, 2, null, 2, 1, 7, 3, 4, 6, 7, 6, 5, 6, 7, 6]";
+
+  auto expected_70 = ArrayFromJSON(int64(), day_of_week_week_start_7_zero_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_70,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/false, /*week_start=*/7)));
+  ASSERT_TRUE(result_70.Equals(expected_70));
+
+  auto expected_20 = ArrayFromJSON(int64(), day_of_week_week_start_2_zero_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_20,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/false, /*week_start=*/2)));
+  ASSERT_TRUE(result_20.Equals(expected_20));
+
+  auto expected_71 = ArrayFromJSON(int64(), day_of_week_week_start_7_one_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_71,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/true, /*week_start=*/7)));
+  ASSERT_TRUE(result_71.Equals(expected_71));
+
+  auto expected_21 = ArrayFromJSON(int64(), day_of_week_week_start_2_one_based);
+  ASSERT_OK_AND_ASSIGN(
+      Datum result_21,
+      DayOfWeek(timestamps, DayOfWeekOptions(
+                                /*one_based_numbering=*/true, /*week_start=*/2)));
+  ASSERT_TRUE(result_21.Equals(expected_21));
+
+  ASSERT_RAISES(Invalid,
+                DayOfWeek(timestamps, DayOfWeekOptions(/*one_based_numbering=*/true,
+                                                       /*week_start=*/0)));
+  ASSERT_RAISES(Invalid,
+                DayOfWeek(timestamps, DayOfWeekOptions(/*one_based_numbering=*/false,
+                                                       /*week_start=*/8)));
+}
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index b2bbe686c8e..fc6c8b7c7e1 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1014,44 +1014,46 @@ Temporal component extraction
 These functions extract datetime components (year, month, day, etc) from timestamp type.
 Note: this is currently not supported for timestamps with timezone information.
 
-+--------------------+------------+-------------------+---------------+--------+
-| Function name      | Arity      | Input types       | Output type   | Notes  |
-+====================+============+===================+===============+========+
-| year               | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| month              | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| day                | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| day_of_week        | Unary      | Temporal          | Int64         | \(1)   |
-+--------------------+------------+-------------------+---------------+--------+
-| day_of_year        | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| iso_year           | Unary      | Temporal          | Int64         | \(2)   |
-+--------------------+------------+-------------------+---------------+--------+
-| iso_week           | Unary      | Temporal          | Int64         | \(2)   |
-+--------------------+------------+-------------------+---------------+--------+
-| iso_calendar       | Unary      | Temporal          | Struct        | \(3)   |
-+--------------------+------------+-------------------+---------------+--------+
-| quarter            | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| hour               | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| minute             | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| second             | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| millisecond        | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| microsecond        | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| nanosecond         | Unary      | Temporal          | Int64         |        |
-+--------------------+------------+-------------------+---------------+--------+
-| subsecond          | Unary      | Temporal          | Double        |        |
-+--------------------+------------+-------------------+---------------+--------+
-
-* \(1) Outputs the number of the day of the week. Week begins on Monday and is denoted
-  by 0 and ends on Sunday denoted by 6.
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| Function name      | Arity      | Input types       | Output type   | Options class              | Notes |
++====================+============+===================+===============+============================+=======+
+| year               | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| month              | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day                | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day_of_week        | Unary      | Temporal          | Int64         | :struct:`DayOfWeekOptions` | \(1)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day_of_year        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_year           | Unary      | Temporal          | Int64         |                            | \(2)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_week           | Unary      | Temporal          | Int64         |                            | \(2)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_calendar       | Unary      | Temporal          | Struct        |                            | \(3)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| quarter            | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| hour               | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| minute             | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| second             | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| millisecond        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| microsecond        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| nanosecond         | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| subsecond          | Unary      | Temporal          | Double        |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+
+* \(1) Outputs the number of the day of the week. By default week begins on Monday
+  represented by 0 and ends on Sunday represented by 6. :member:`DayOfWeekOptions::week_start` can be used to set
+  the starting day of the week using ISO convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1
+  using :member:`DayOfWeekOptions::one_based_numbering` parameter.
 * \(2) First ISO week has the majority (4 or more) of it's days in January. ISO year
   starts with the first ISO week.
   See `ISO 8601 week date definition`_ for more details.
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 02855ee78aa..1b66f74eb74 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -961,6 +961,18 @@ class StrptimeOptions(_StrptimeOptions):
         self._set_options(format, unit)
 
 
+cdef class _DayOfWeekOptions(FunctionOptions):
+    def _set_options(self, one_based_numbering, week_start):
+        self.wrapped.reset(
+            new CDayOfWeekOptions(one_based_numbering, week_start)
+        )
+
+
+class DayOfWeekOptions(_DayOfWeekOptions):
+    def __init__(self, one_based_numbering=False, week_start=1):
+        self._set_options(one_based_numbering, week_start)
+
+
 cdef class _VarianceOptions(FunctionOptions):
     def _set_options(self, ddof):
         self.wrapped.reset(new CVarianceOptions(ddof))
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index fbe9e2c5c0f..15d1adcbafe 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -52,6 +52,7 @@
     SplitOptions,
     SplitPatternOptions,
     StrptimeOptions,
+    DayOfWeekOptions,
     TakeOptions,
     TDigestOptions,
     TrimOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 67a960fbacc..6977c26cac5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1940,6 +1940,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
             "arrow::compute::StrptimeOptions"(CFunctionOptions):
         CStrptimeOptions(c_string format, TimeUnit unit)
 
+    cdef cppclass CDayOfWeekOptions \
+            "arrow::compute::DayOfWeekOptions"(CFunctionOptions):
+        CDayOfWeekOptions(c_bool one_based_numbering, uint32_t week_start)
+        c_bool one_based_numbering
+        uint32_t week_start
+
     cdef cppclass CVarianceOptions \
             "arrow::compute::VarianceOptions"(CFunctionOptions):
         CVarianceOptions(int ddof)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 35b37d82f95..37040ec86b5 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -120,6 +120,7 @@ def test_option_class_equality():
         pc.PadOptions(5, " "),
         pc.PartitionNthOptions(1),
         pc.ProjectOptions([b"field", b"names"]),
+        pc.DayOfWeekOptions(False, 0),
         pc.ReplaceSliceOptions(start=0, stop=1, replacement="a"),
         pc.ReplaceSubstringOptions("a", "b"),
         pc.SetLookupOptions(value_set=pa.array([1])),
@@ -1346,6 +1347,80 @@ def test_strptime():
     assert got == expected
 
 
+def _check_datetime_components(timestamps, timezone=None):
+    from pyarrow.vendored.version import Version
+
+    ts = pd.to_datetime(timestamps).to_series()
+    tsa = pa.array(ts)
+
+    subseconds = ((ts.dt.microsecond * 10**3 +
+                   ts.dt.nanosecond) * 10**-9).round(9)
+    iso_calendar_fields = [
+        pa.field('iso_year', pa.int64()),
+        pa.field('iso_week', pa.int64()),
+        pa.field('iso_day_of_week', pa.int64())
+    ]
+
+    if Version(pd.__version__) < Version("1.1.0"):
+        # https://github.com/pandas-dev/pandas/issues/33206
+        iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("Int64")
+        iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("Int64")
+        iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("Int64")
+    else:
+        # Casting is required because pandas isocalendar returns int32
+        # while arrow isocalendar returns int64.
+        iso_year = ts.dt.isocalendar()["year"].astype("Int64")
+        iso_week = ts.dt.isocalendar()["week"].astype("Int64")
+        iso_day = ts.dt.isocalendar()["day"].astype("Int64")
+
+    iso_calendar = pa.StructArray.from_arrays(
+        [iso_year, iso_week, iso_day],
+        fields=iso_calendar_fields)
+
+    assert pc.year(tsa).equals(pa.array(ts.dt.year))
+    assert pc.month(tsa).equals(pa.array(ts.dt.month))
+    assert pc.day(tsa).equals(pa.array(ts.dt.day))
+    assert pc.day_of_week(tsa).equals(pa.array(ts.dt.dayofweek))
+    assert pc.day_of_year(tsa).equals(pa.array(ts.dt.dayofyear))
+    assert pc.iso_year(tsa).equals(pa.array(iso_year))
+    assert pc.iso_week(tsa).equals(pa.array(iso_week))
+    assert pc.iso_calendar(tsa).equals(iso_calendar)
+    assert pc.quarter(tsa).equals(pa.array(ts.dt.quarter))
+    assert pc.hour(tsa).equals(pa.array(ts.dt.hour))
+    assert pc.minute(tsa).equals(pa.array(ts.dt.minute))
+    assert pc.second(tsa).equals(pa.array(ts.dt.second.values))
+    assert pc.millisecond(tsa).equals(pa.array(ts.dt.microsecond // 10**3))
+    assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10**3))
+    assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond))
+    assert pc.subsecond(tsa).equals(pa.array(subseconds))
+
+    day_of_week_options = pc.DayOfWeekOptions(
+        one_based_numbering=True, week_start=1)
+    assert pc.day_of_week(tsa, options=day_of_week_options).equals(
+        pa.array(ts.dt.dayofweek+1))
+
+
+@pytest.mark.pandas
+def test_extract_datetime_components():
+    timestamps = ["1970-01-01T00:00:59.123456789",
+                  "2000-02-29T23:23:23.999999999",
+                  "2033-05-18T03:33:20.000000000",
+                  "2020-01-01T01:05:05.001",
+                  "2019-12-31T02:10:10.002",
+                  "2019-12-30T03:15:15.003",
+                  "2009-12-31T04:20:20.004132",
+                  "2010-01-01T05:25:25.005321",
+                  "2010-01-03T06:30:30.006163",
+                  "2010-01-04T07:35:35",
+                  "2006-01-01T08:40:40",
+                  "2005-12-31T09:45:45",
+                  "2008-12-28",
+                  "2008-12-29",
+                  "2012-01-01 01:02:03"]
+
+    _check_datetime_components(timestamps)
+
+
 def test_count():
     arr = pa.array([1, 2, 3, None, None])
     assert pc.count(arr).as_py() == 3
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 27d6e889199..055cff5472b 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -526,14 +526,6 @@ nse_funcs$second <- function(x) {
   Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
 }
 
-# After ARROW-13054 is completed, we can refactor this for simplicity
-#
-# Arrow's `day_of_week` kernel counts from 0 (Monday) to 6 (Sunday), whereas
-# `lubridate::wday` counts from 1 to 7, and allows users to specify which day
-# of the week is first (Sunday by default).  This Expression converts the returned
-# day of the week back to the value that would be returned by lubridate by
-# providing offset values based on the specified week_start day, and adding 1
-# so the returned value is 1-indexed instead of 0-indexed.
 nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
 
   # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
@@ -543,8 +535,6 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption
     arrow_not_supported("Label argument")
   }
 
-  # overall formula to convert from arrow::wday to lubridate::wday is:
-  #  ((wday(day) - start + 8) %% 7) + 1
-  ((Expression$create("day_of_week", x) - Expression$scalar(week_start) + 8) %% 7) + 1
+  Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
 
 }
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 458e0e386e9..9be1cc3a83e 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -264,6 +264,16 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_replacements);
   }
 
+  if (func_name == "day_of_week") {
+    using Options = arrow::compute::DayOfWeekOptions;
+    bool one_based_numbering = true;
+    if (!Rf_isNull(options["one_based_numbering"])) {
+      one_based_numbering = cpp11::as_cpp<bool>(options["one_based_numbering"]);
+    }
+    return std::make_shared<Options>(one_based_numbering,
+                                     cpp11::as_cpp<uint32_t>(options["week_start"]));
+  }
+
   if (func_name == "strptime") {
     using Options = arrow::compute::StrptimeOptions;
     return std::make_shared<Options>(

From a8a81f6e8a93a3e6a08e70ba4e278c97aff944ef Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 7 Jul 2021 07:04:06 -0700
Subject: [PATCH 521/719] ARROW-13274: [JS] Remove Webpack

Closes #10669 from domoritz/rm-webpack

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/package.json |   2 -
 js/yarn.lock    | 343 ++----------------------------------------------
 2 files changed, 9 insertions(+), 336 deletions(-)

diff --git a/js/package.json b/js/package.json
index 64d3a981395..972c2caf82f 100644
--- a/js/package.json
+++ b/js/package.json
@@ -92,14 +92,12 @@
     "npm-run-all": "4.1.5",
     "randomatic": "3.1.1",
     "rxjs": "5.5.11",
-    "source-map-loader": "3.0.0",
     "ts-jest": "27.0.0",
     "ts-node": "10.0.0",
     "typedoc": "0.20.36",
     "typescript": "4.0.2",
     "web-stream-tools": "0.0.1",
     "web-streams-polyfill": "3.0.3",
-    "webpack": "5.37.1",
     "xml2js": "0.4.23"
   },
   "engines": {
diff --git a/js/yarn.lock b/js/yarn.lock
index 23854eabdd9..7b54725509b 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1506,32 +1506,6 @@
   dependencies:
     "@babel/types" "^7.3.0"
 
-"@types/eslint-scope@^3.7.0":
-  version "3.7.0"
-  resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.0.tgz#4792816e31119ebd506902a482caec4951fabd86"
-  integrity sha512-O/ql2+rrCUe2W2rs7wMR+GqPRcgB6UiqN5RhrR5xruFlY7l9YLMn0ZkDzjoHLeiFkR8MCQZVudUuuvQ2BLC9Qw==
-  dependencies:
-    "@types/eslint" "*"
-    "@types/estree" "*"
-
-"@types/eslint@*":
-  version "7.2.13"
-  resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.13.tgz#e0ca7219ba5ded402062ad6f926d491ebb29dd53"
-  integrity sha512-LKmQCWAlnVHvvXq4oasNUMTJJb2GwSyTY8+1C7OH5ILR8mPLaljv1jxL1bXW3xB3jFbQxTKxJAvI8PyjB09aBg==
-  dependencies:
-    "@types/estree" "*"
-    "@types/json-schema" "*"
-
-"@types/estree@*":
-  version "0.0.48"
-  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.48.tgz#18dc8091b285df90db2f25aa7d906cfc394b7f74"
-  integrity sha512-LfZwXoGUDo0C3me81HXgkBg5CTQYb6xzEl+fNmbO4JdRiSKQ8A0GD1OBBvKAIsbCUgoyAty7m99GqqMQe784ew==
-
-"@types/estree@^0.0.47":
-  version "0.0.47"
-  resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.47.tgz#d7a51db20f0650efec24cd04994f523d93172ed4"
-  integrity sha512-c5ciR06jK8u9BstrmJyO97m+klJrrhCf9u3rLu3DEAJBirxRqSCvDQoYKmxuYwQI5SZChAWu+tq9oVlGRuzPAg==
-
 "@types/flatbuffers@^1.10.0":
   version "1.10.0"
   resolved "https://registry.yarnpkg.com/@types/flatbuffers/-/flatbuffers-1.10.0.tgz#aa74e30ffdc86445f2f060e1808fc9d56b5603ba"
@@ -1579,7 +1553,7 @@
     jest-diff "^26.0.0"
     pretty-format "^26.0.0"
 
-"@types/json-schema@*", "@types/json-schema@^7.0.3", "@types/json-schema@^7.0.6", "@types/json-schema@^7.0.7":
+"@types/json-schema@^7.0.3", "@types/json-schema@^7.0.7":
   version "7.0.7"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
   integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
@@ -1766,137 +1740,6 @@
     "@typescript-eslint/types" "4.26.0"
     eslint-visitor-keys "^2.0.0"
 
-"@webassemblyjs/ast@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.0.tgz#a5aa679efdc9e51707a4207139da57920555961f"
-  integrity sha512-kX2W49LWsbthrmIRMbQZuQDhGtjyqXfEmmHyEi4XWnSZtPmxY0+3anPIzsnRb45VH/J55zlOfWvZuY47aJZTJg==
-  dependencies:
-    "@webassemblyjs/helper-numbers" "1.11.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
-
-"@webassemblyjs/floating-point-hex-parser@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.0.tgz#34d62052f453cd43101d72eab4966a022587947c"
-  integrity sha512-Q/aVYs/VnPDVYvsCBL/gSgwmfjeCb4LW8+TMrO3cSzJImgv8lxxEPM2JA5jMrivE7LSz3V+PFqtMbls3m1exDA==
-
-"@webassemblyjs/helper-api-error@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.0.tgz#aaea8fb3b923f4aaa9b512ff541b013ffb68d2d4"
-  integrity sha512-baT/va95eXiXb2QflSx95QGT5ClzWpGaa8L7JnJbgzoYeaA27FCvuBXU758l+KXWRndEmUXjP0Q5fibhavIn8w==
-
-"@webassemblyjs/helper-buffer@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.0.tgz#d026c25d175e388a7dbda9694e91e743cbe9b642"
-  integrity sha512-u9HPBEl4DS+vA8qLQdEQ6N/eJQ7gT7aNvMIo8AAWvAl/xMrcOSiI2M0MAnMCy3jIFke7bEee/JwdX1nUpCtdyA==
-
-"@webassemblyjs/helper-numbers@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.0.tgz#7ab04172d54e312cc6ea4286d7d9fa27c88cd4f9"
-  integrity sha512-DhRQKelIj01s5IgdsOJMKLppI+4zpmcMQ3XboFPLwCpSNH6Hqo1ritgHgD0nqHeSYqofA6aBN/NmXuGjM1jEfQ==
-  dependencies:
-    "@webassemblyjs/floating-point-hex-parser" "1.11.0"
-    "@webassemblyjs/helper-api-error" "1.11.0"
-    "@xtuc/long" "4.2.2"
-
-"@webassemblyjs/helper-wasm-bytecode@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.0.tgz#85fdcda4129902fe86f81abf7e7236953ec5a4e1"
-  integrity sha512-MbmhvxXExm542tWREgSFnOVo07fDpsBJg3sIl6fSp9xuu75eGz5lz31q7wTLffwL3Za7XNRCMZy210+tnsUSEA==
-
-"@webassemblyjs/helper-wasm-section@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.0.tgz#9ce2cc89300262509c801b4af113d1ca25c1a75b"
-  integrity sha512-3Eb88hcbfY/FCukrg6i3EH8H2UsD7x8Vy47iVJrP967A9JGqgBVL9aH71SETPx1JrGsOUVLo0c7vMCN22ytJew==
-  dependencies:
-    "@webassemblyjs/ast" "1.11.0"
-    "@webassemblyjs/helper-buffer" "1.11.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
-    "@webassemblyjs/wasm-gen" "1.11.0"
-
-"@webassemblyjs/ieee754@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/ieee754/-/ieee754-1.11.0.tgz#46975d583f9828f5d094ac210e219441c4e6f5cf"
-  integrity sha512-KXzOqpcYQwAfeQ6WbF6HXo+0udBNmw0iXDmEK5sFlmQdmND+tr773Ti8/5T/M6Tl/413ArSJErATd8In3B+WBA==
-  dependencies:
-    "@xtuc/ieee754" "^1.2.0"
-
-"@webassemblyjs/leb128@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/leb128/-/leb128-1.11.0.tgz#f7353de1df38aa201cba9fb88b43f41f75ff403b"
-  integrity sha512-aqbsHa1mSQAbeeNcl38un6qVY++hh8OpCOzxhixSYgbRfNWcxJNJQwe2rezK9XEcssJbbWIkblaJRwGMS9zp+g==
-  dependencies:
-    "@xtuc/long" "4.2.2"
-
-"@webassemblyjs/utf8@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/utf8/-/utf8-1.11.0.tgz#86e48f959cf49e0e5091f069a709b862f5a2cadf"
-  integrity sha512-A/lclGxH6SpSLSyFowMzO/+aDEPU4hvEiooCMXQPcQFPPJaYcPQNKGOCLUySJsYJ4trbpr+Fs08n4jelkVTGVw==
-
-"@webassemblyjs/wasm-edit@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.0.tgz#ee4a5c9f677046a210542ae63897094c2027cb78"
-  integrity sha512-JHQ0damXy0G6J9ucyKVXO2j08JVJ2ntkdJlq1UTiUrIgfGMmA7Ik5VdC/L8hBK46kVJgujkBIoMtT8yVr+yVOQ==
-  dependencies:
-    "@webassemblyjs/ast" "1.11.0"
-    "@webassemblyjs/helper-buffer" "1.11.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
-    "@webassemblyjs/helper-wasm-section" "1.11.0"
-    "@webassemblyjs/wasm-gen" "1.11.0"
-    "@webassemblyjs/wasm-opt" "1.11.0"
-    "@webassemblyjs/wasm-parser" "1.11.0"
-    "@webassemblyjs/wast-printer" "1.11.0"
-
-"@webassemblyjs/wasm-gen@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.0.tgz#3cdb35e70082d42a35166988dda64f24ceb97abe"
-  integrity sha512-BEUv1aj0WptCZ9kIS30th5ILASUnAPEvE3tVMTrItnZRT9tXCLW2LEXT8ezLw59rqPP9klh9LPmpU+WmRQmCPQ==
-  dependencies:
-    "@webassemblyjs/ast" "1.11.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
-    "@webassemblyjs/ieee754" "1.11.0"
-    "@webassemblyjs/leb128" "1.11.0"
-    "@webassemblyjs/utf8" "1.11.0"
-
-"@webassemblyjs/wasm-opt@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.0.tgz#1638ae188137f4bb031f568a413cd24d32f92978"
-  integrity sha512-tHUSP5F4ywyh3hZ0+fDQuWxKx3mJiPeFufg+9gwTpYp324mPCQgnuVKwzLTZVqj0duRDovnPaZqDwoyhIO8kYg==
-  dependencies:
-    "@webassemblyjs/ast" "1.11.0"
-    "@webassemblyjs/helper-buffer" "1.11.0"
-    "@webassemblyjs/wasm-gen" "1.11.0"
-    "@webassemblyjs/wasm-parser" "1.11.0"
-
-"@webassemblyjs/wasm-parser@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.0.tgz#3e680b8830d5b13d1ec86cc42f38f3d4a7700754"
-  integrity sha512-6L285Sgu9gphrcpDXINvm0M9BskznnzJTE7gYkjDbxET28shDqp27wpruyx3C2S/dvEwiigBwLA1cz7lNUi0kw==
-  dependencies:
-    "@webassemblyjs/ast" "1.11.0"
-    "@webassemblyjs/helper-api-error" "1.11.0"
-    "@webassemblyjs/helper-wasm-bytecode" "1.11.0"
-    "@webassemblyjs/ieee754" "1.11.0"
-    "@webassemblyjs/leb128" "1.11.0"
-    "@webassemblyjs/utf8" "1.11.0"
-
-"@webassemblyjs/wast-printer@1.11.0":
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/@webassemblyjs/wast-printer/-/wast-printer-1.11.0.tgz#680d1f6a5365d6d401974a8e949e05474e1fab7e"
-  integrity sha512-Fg5OX46pRdTgB7rKIUojkh9vXaVN6sGYCnEiJN1GYkb0RPwShZXp6KTDqmoMdQPKhcroOXh3fEzmkWmCYaKYhQ==
-  dependencies:
-    "@webassemblyjs/ast" "1.11.0"
-    "@xtuc/long" "4.2.2"
-
-"@xtuc/ieee754@^1.2.0":
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/@xtuc/ieee754/-/ieee754-1.2.0.tgz#eef014a3145ae477a1cbc00cd1e552336dceb790"
-  integrity sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==
-
-"@xtuc/long@4.2.2":
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/@xtuc/long/-/long-4.2.2.tgz#d291c6a4e97989b5c61d9acf396ae4fe133a718d"
-  integrity sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==
-
 JSONStream@^1.0.4:
   version "1.3.5"
   resolved "https://registry.yarnpkg.com/JSONStream/-/JSONStream-1.3.5.tgz#3208c1f08d3a4d99261ab64f92302bc15e111ca0"
@@ -1943,7 +1786,7 @@ acorn@^7.1.1, acorn@^7.4.0:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa"
   integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
 
-acorn@^8.2.1, acorn@^8.2.4:
+acorn@^8.2.4:
   version "8.3.0"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.3.0.tgz#1193f9b96c4e8232f00b11a9edff81b2c8b98b88"
   integrity sha512-tqPKHZ5CaBJw0Xmy0ZZvLs1qTV+BNFSyvn77ASXkpBNfIRk8ev26fKrD9iLGwGA9zedPao52GSHzq8lyZG0NUw==
@@ -1977,12 +1820,7 @@ aggregate-error@^3.0.0:
     clean-stack "^2.0.0"
     indent-string "^4.0.0"
 
-ajv-keywords@^3.5.2:
-  version "3.5.2"
-  resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
-  integrity sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==
-
-ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4, ajv@^6.12.5:
+ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4:
   version "6.12.6"
   resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
   integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==
@@ -2502,7 +2340,7 @@ browser-process-hrtime@^1.0.0:
   resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-1.0.0.tgz#3c9b4b7d782c8121e56f10106d84c0d0ffc94626"
   integrity sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==
 
-browserslist@^4.14.5, browserslist@^4.16.6:
+browserslist@^4.16.6:
   version "4.16.6"
   resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.16.6.tgz#d7901277a5a88e554ed305b183ec9b0c08f66fa2"
   integrity sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==
@@ -2711,11 +2549,6 @@ chownr@^2.0.0:
   resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece"
   integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==
 
-chrome-trace-event@^1.0.2:
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz#1015eced4741e15d06664a957dbbf50d041e26ac"
-  integrity sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==
-
 ci-info@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46"
@@ -2927,11 +2760,6 @@ command-line-usage@6.1.1:
     table-layout "^1.0.1"
     typical "^5.2.0"
 
-commander@^2.20.0:
-  version "2.20.3"
-  resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
-  integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
-
 commander@^6.1.0:
   version "6.2.1"
   resolved "https://registry.yarnpkg.com/commander/-/commander-6.2.1.tgz#0792eb682dfbc325999bb2b84fddddba110ac73c"
@@ -3551,14 +3379,6 @@ end-of-stream@^1.0.0, end-of-stream@^1.1.0:
   dependencies:
     once "^1.4.0"
 
-enhanced-resolve@^5.8.0:
-  version "5.8.2"
-  resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.8.2.tgz#15ddc779345cbb73e97c611cd00c01c1e7bf4d8b"
-  integrity sha512-F27oB3WuHDzvR2DOGNTaYy0D5o0cnrv8TeI482VM4kYgQd/FT9lUQwuNsJ0oOHtBUq7eiW5ytqzp7nBFknL+GA==
-  dependencies:
-    graceful-fs "^4.2.4"
-    tapable "^2.2.0"
-
 enquirer@^2.3.5:
   version "2.3.6"
   resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d"
@@ -3610,11 +3430,6 @@ es-abstract@^1.18.0-next.2:
     string.prototype.trimstart "^1.0.4"
     unbox-primitive "^1.0.1"
 
-es-module-lexer@^0.4.0:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/es-module-lexer/-/es-module-lexer-0.4.1.tgz#dda8c6a14d8f340a24e34331e0fab0cb50438e0e"
-  integrity sha512-ooYciCUtfw6/d2w56UVeqHPcoCFAiJdz5XOkYpv/Txl1HMUozpXjz/2RIQgqwKdXNDPSF1W7mJCFse3G+HDyAA==
-
 es-to-primitive@^1.2.1:
   version "1.2.1"
   resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a"
@@ -3836,11 +3651,6 @@ eventemitter3@^4.0.4:
   resolved "https://registry.yarnpkg.com/eventemitter3/-/eventemitter3-4.0.7.tgz#2de9b68f6528d5644ef5c59526a1b4a07306169f"
   integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
 
-events@^3.2.0:
-  version "3.3.0"
-  resolved "https://registry.yarnpkg.com/events/-/events-3.3.0.tgz#31a95ad0a924e2d2c419a813aeb2c4e878ea7400"
-  integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==
-
 execa@^5.0.0:
   version "5.1.0"
   resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.0.tgz#3ea50ee863d226bfa323528cce1684e7481dfe46"
@@ -4427,11 +4237,6 @@ glob-to-regexp@^0.3.0:
   resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.3.0.tgz#8c5a1494d2066c570cc3bfe4496175acc4d502ab"
   integrity sha1-jFoUlNIGbFcMw7/kSWF1rMTVAqs=
 
-glob-to-regexp@^0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz#c75297087c851b9a578bd217dd59a92f59fe546e"
-  integrity sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==
-
 glob-watcher@^5.0.3:
   version "5.0.5"
   resolved "https://registry.yarnpkg.com/glob-watcher/-/glob-watcher-5.0.5.tgz#aa6bce648332924d9a8489be41e3e5c52d4186dc"
@@ -5887,7 +5692,7 @@ json-bignum@^0.0.3:
   resolved "https://registry.yarnpkg.com/json-bignum/-/json-bignum-0.0.3.tgz#41163b50436c773d82424dbc20ed70db7604b8d7"
   integrity sha1-QRY7UENsdz2CQk28IO1w23YEuNc=
 
-json-parse-better-errors@^1.0.1, json-parse-better-errors@^1.0.2:
+json-parse-better-errors@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9"
   integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==
@@ -6151,11 +5956,6 @@ load-json-file@^6.2.0:
     strip-bom "^4.0.0"
     type-fest "^0.6.0"
 
-loader-runner@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-4.2.0.tgz#d7022380d66d14c5fb1d496b89864ebcfd478384"
-  integrity sha512-92+huvxMvYlMzMt0iIOukcwYBFpkYJdpl2xsZ7LrlayO7E8SOv+JJUEK17B/dJIHAOLMfh2dZZ/Y18WgmGtYNw==
-
 locate-path@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-2.0.0.tgz#2b568b265eec944c6d9c0de9c3dbbbca0354cd8e"
@@ -6507,7 +6307,7 @@ mime-db@1.48.0:
   resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.48.0.tgz#e35b31045dd7eada3aaad537ed88a33afbef2d1d"
   integrity sha512-FM3QwxV+TnZYQ2aRqhlKBMHxk10lTbMt3bBkMAp54ddrNeVSfcQYOOKuGuy3Ddrm38I04If834fOUSq1yzslJQ==
 
-mime-types@^2.1.12, mime-types@^2.1.27, mime-types@~2.1.19:
+mime-types@^2.1.12, mime-types@~2.1.19:
   version "2.1.31"
   resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.31.tgz#a00d76b74317c61f9c2db2218b8e9f8e9c5c9e6b"
   integrity sha512-XGZnNzm3QvgKxa8dpzyhFTHmpP3l5YNusmne07VUOXxou9CqUqYa/HBy124RqtVh/O2pECas/MOcsDgpilPOPg==
@@ -6732,7 +6532,7 @@ negotiator@^0.6.2:
   resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
   integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
 
-neo-async@^2.6.0, neo-async@^2.6.2:
+neo-async@^2.6.0:
   version "2.6.2"
   resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
   integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==
@@ -7209,13 +7009,6 @@ p-limit@^2.2.0:
   dependencies:
     p-try "^2.0.0"
 
-p-limit@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b"
-  integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==
-  dependencies:
-    yocto-queue "^0.1.0"
-
 p-locate@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-2.0.0.tgz#20a0103b222a70c8fd39cc2e580680f3dde5ec43"
@@ -7747,13 +7540,6 @@ randomatic@3.1.1:
     kind-of "^6.0.0"
     math-random "^1.0.1"
 
-randombytes@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
-  integrity sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==
-  dependencies:
-    safe-buffer "^5.1.0"
-
 react-is@^17.0.1:
   version "17.0.2"
   resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0"
@@ -8179,15 +7965,6 @@ saxes@^5.0.1:
   dependencies:
     xmlchars "^2.2.0"
 
-schema-utils@^3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-3.0.0.tgz#67502f6aa2b66a2d4032b4279a2944978a0913ef"
-  integrity sha512-6D82/xSzO094ajanoOSbe4YvXWMfn2A//8Y1+MUqFAJul5Bs+yn36xbK9OtNDcRVSBJ9jjeoXftM6CfztsjOAA==
-  dependencies:
-    "@types/json-schema" "^7.0.6"
-    ajv "^6.12.5"
-    ajv-keywords "^3.5.2"
-
 semver-greatest-satisfied-range@^1.1.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/semver-greatest-satisfied-range/-/semver-greatest-satisfied-range-1.1.0.tgz#13e8c2658ab9691cb0cd71093240280d36f77a5b"
@@ -8212,13 +7989,6 @@ semver@^6.0.0, semver@^6.3.0:
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
   integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
 
-serialize-javascript@^5.0.1:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-5.0.1.tgz#7886ec848049a462467a97d3d918ebb2aaf934f4"
-  integrity sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==
-  dependencies:
-    randombytes "^2.1.0"
-
 set-blocking@^2.0.0, set-blocking@~2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
@@ -8396,25 +8166,6 @@ sort-keys@^4.0.0:
   dependencies:
     is-plain-obj "^2.0.0"
 
-source-list-map@^2.0.1:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/source-list-map/-/source-list-map-2.0.1.tgz#3993bd873bfc48479cca9ea3a547835c7c154b34"
-  integrity sha512-qnQ7gVMxGNxsiL4lEuJwe/To8UnK7fAnmbGEEH8RpLouuKbeEm0lhbQVFIrNSuB+G7tVrAlVsZgETT5nljf+Iw==
-
-source-map-js@^0.6.2:
-  version "0.6.2"
-  resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e"
-  integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug==
-
-source-map-loader@3.0.0:
-  version "3.0.0"
-  resolved "https://registry.yarnpkg.com/source-map-loader/-/source-map-loader-3.0.0.tgz#f2a04ee2808ad01c774dea6b7d2639839f3b3049"
-  integrity sha512-GKGWqWvYr04M7tn8dryIWvb0s8YM41z82iQv01yBtIylgxax0CwvSy6gc2Y02iuXwEfGWRlMicH0nvms9UZphw==
-  dependencies:
-    abab "^2.0.5"
-    iconv-lite "^0.6.2"
-    source-map-js "^0.6.2"
-
 source-map-resolve@^0.5.0:
   version "0.5.3"
   resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a"
@@ -8434,7 +8185,7 @@ source-map-resolve@^0.6.0:
     atob "^2.1.2"
     decode-uri-component "^0.2.0"
 
-source-map-support@^0.5.17, source-map-support@^0.5.6, source-map-support@~0.5.19:
+source-map-support@^0.5.17, source-map-support@^0.5.6:
   version "0.5.19"
   resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61"
   integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==
@@ -8457,7 +8208,7 @@ source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.1:
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
   integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
 
-source-map@^0.7.3, source-map@~0.7.2:
+source-map@^0.7.3:
   version "0.7.3"
   resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383"
   integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==
@@ -8811,11 +8562,6 @@ table@^6.0.9:
     string-width "^4.2.0"
     strip-ansi "^6.0.0"
 
-tapable@^2.1.1, tapable@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.0.tgz#5c373d281d9c672848213d0e037d1c4165ab426b"
-  integrity sha512-FBk4IesMV1rBxX2tfiK8RAmogtWn53puLOQlvO8XuwlgxcYbP4mVPS9Ph4aeamSyyVjOl24aYWAuc8U5kCVwMw==
-
 tar@^4.4.12:
   version "4.4.13"
   resolved "https://registry.yarnpkg.com/tar/-/tar-4.4.13.tgz#43b364bc52888d555298637b10d60790254ab525"
@@ -8865,27 +8611,6 @@ terminal-link@^2.0.0:
     ansi-escapes "^4.2.1"
     supports-hyperlinks "^2.0.0"
 
-terser-webpack-plugin@^5.1.1:
-  version "5.1.3"
-  resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.1.3.tgz#30033e955ca28b55664f1e4b30a1347e61aa23af"
-  integrity sha512-cxGbMqr6+A2hrIB5ehFIF+F/iST5ZOxvOmy9zih9ySbP1C2oEWQSOUS+2SNBTjzx5xLKO4xnod9eywdfq1Nb9A==
-  dependencies:
-    jest-worker "^27.0.2"
-    p-limit "^3.1.0"
-    schema-utils "^3.0.0"
-    serialize-javascript "^5.0.1"
-    source-map "^0.6.1"
-    terser "^5.7.0"
-
-terser@^5.7.0:
-  version "5.7.0"
-  resolved "https://registry.yarnpkg.com/terser/-/terser-5.7.0.tgz#a761eeec206bc87b605ab13029876ead938ae693"
-  integrity sha512-HP5/9hp2UaZt5fYkuhNBR8YyRcT8juw8+uFbAme53iN9hblvKnLUTKkmwJG6ocWpIKf8UK4DoeWG4ty0J6S6/g==
-  dependencies:
-    commander "^2.20.0"
-    source-map "~0.7.2"
-    source-map-support "~0.5.19"
-
 test-exclude@^6.0.0:
   version "6.0.0"
   resolved "https://registry.yarnpkg.com/test-exclude/-/test-exclude-6.0.0.tgz#04a8698661d805ea6fa293b6cb9e63ac044ef15e"
@@ -9517,14 +9242,6 @@ walker@^1.0.7:
   dependencies:
     makeerror "1.0.x"
 
-watchpack@^2.0.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.2.0.tgz#47d78f5415fe550ecd740f99fe2882323a58b1ce"
-  integrity sha512-up4YAn/XHgZHIxFBVCdlMiWDj6WaLKpwVeGQk2I5thdYxF/KmF0aaz6TfJZ/hfl1h/XlcDr7k1KH7ThDagpFaA==
-  dependencies:
-    glob-to-regexp "^0.4.1"
-    graceful-fs "^4.1.2"
-
 wcwidth@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/wcwidth/-/wcwidth-1.0.1.tgz#f0b0dcf915bc5ff1528afadb2c0e17b532da2fe8"
@@ -9552,43 +9269,6 @@ webidl-conversions@^6.1.0:
   resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-6.1.0.tgz#9111b4d7ea80acd40f5270d666621afa78b69514"
   integrity sha512-qBIvFLGiBpLjfwmYAaHPXsn+ho5xZnGvyGvsarywGNc8VyQJUMHJ8OBKGGrPER0okBeMDaan4mNBlgBROxuI8w==
 
-webpack-sources@^2.1.1:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/webpack-sources/-/webpack-sources-2.3.0.tgz#9ed2de69b25143a4c18847586ad9eccb19278cfa"
-  integrity sha512-WyOdtwSvOML1kbgtXbTDnEW0jkJ7hZr/bDByIwszhWd/4XX1A3XMkrbFMsuH4+/MfLlZCUzlAdg4r7jaGKEIgQ==
-  dependencies:
-    source-list-map "^2.0.1"
-    source-map "^0.6.1"
-
-webpack@5.37.1:
-  version "5.37.1"
-  resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.37.1.tgz#2deb5acd350583c1ab9338471f323381b0b0c14b"
-  integrity sha512-btZjGy/hSjCAAVHw+cKG+L0M+rstlyxbO2C+BOTaQ5/XAnxkDrP5sVbqWhXgo4pL3X2dcOib6rqCP20Zr9PLow==
-  dependencies:
-    "@types/eslint-scope" "^3.7.0"
-    "@types/estree" "^0.0.47"
-    "@webassemblyjs/ast" "1.11.0"
-    "@webassemblyjs/wasm-edit" "1.11.0"
-    "@webassemblyjs/wasm-parser" "1.11.0"
-    acorn "^8.2.1"
-    browserslist "^4.14.5"
-    chrome-trace-event "^1.0.2"
-    enhanced-resolve "^5.8.0"
-    es-module-lexer "^0.4.0"
-    eslint-scope "^5.1.1"
-    events "^3.2.0"
-    glob-to-regexp "^0.4.1"
-    graceful-fs "^4.2.4"
-    json-parse-better-errors "^1.0.2"
-    loader-runner "^4.2.0"
-    mime-types "^2.1.27"
-    neo-async "^2.6.2"
-    schema-utils "^3.0.0"
-    tapable "^2.1.1"
-    terser-webpack-plugin "^5.1.1"
-    watchpack "^2.0.0"
-    webpack-sources "^2.1.1"
-
 whatwg-encoding@^1.0.5:
   version "1.0.5"
   resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-1.0.5.tgz#5abacf777c32166a51d085d6b4f3e7d27113ddb0"
@@ -9868,8 +9548,3 @@ yn@3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
   integrity sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==
-
-yocto-queue@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b"
-  integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==

From 40008951dc7551581084b2359ee5e81ea6ee7f49 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 7 Jul 2021 10:59:55 -0400
Subject: [PATCH 522/719] ARROW-13262: [R] transmute() fails after pulling data
 into R

This fixes a problem with arguments not being defused in our implementation of `transmute()`, and adds checks for unsupported `transmute()` arguments for consistency with dplyr's behavior on data frames.

Closes #10672 from ianmcook/ARROW-13262

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/NEWS.md                            |  3 ++-
 r/R/dplyr-mutate.R                   | 22 +++++++++++++++--
 r/tests/testthat/test-dplyr-mutate.R | 35 ++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 4cea5edf6b3..63be8b9df9b 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -24,7 +24,8 @@
 * `arrow_info()` now includes details on the C++ build, such as compiler version
 * `dplyr` queries on `Table` and `RecordBatch` now use the same expression internals as `Dataset` (via `InMemoryDataset`). Among other (mostly internal) benefits that come with this, the print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
 * Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
-* `match_arrow` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
+* `match_arrow()` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
+* `transmute()` now errors if passed arguments `.keep`, `.before`, or `.after`, for consistency with the behavior of `dplyr` on `data.frame`s.
 
 # arrow 4.0.1
 
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
index 8513a45f6e9..a961fbf056c 100644
--- a/r/R/dplyr-mutate.R
+++ b/r/R/dplyr-mutate.R
@@ -113,5 +113,23 @@ mutate.arrow_dplyr_query <- function(.data,
 }
 mutate.Dataset <- mutate.ArrowTabular <- mutate.arrow_dplyr_query
 
-transmute.arrow_dplyr_query <- function(.data, ...) dplyr::mutate(.data, ..., .keep = "none")
-transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
\ No newline at end of file
+transmute.arrow_dplyr_query <- function(.data, ...) {
+  dots <- check_transmute_args(...)
+  dplyr::mutate(.data, !!!dots, .keep = "none")
+}
+transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
+
+# This function is a copy of dplyr:::check_transmute_args at
+# https://github.com/tidyverse/dplyr/blob/master/R/mutate.R
+check_transmute_args <- function(..., .keep, .before, .after) {
+  if (!missing(.keep)) {
+    abort("`transmute()` does not support the `.keep` argument")
+  }
+  if (!missing(.before)) {
+    abort("`transmute()` does not support the `.before` argument")
+  }
+  if (!missing(.after)) {
+    abort("`transmute()` does not support the `.after` argument")
+  }
+  enquos(...)
+}
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 908ada296be..66cb9ff305d 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -92,6 +92,41 @@ test_that("empty transmute()", {
   )
 })
 
+test_that("transmute() with unsupported arguments", {
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .keep = "all"),
+    "`transmute()` does not support the `.keep` argument",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .before = lgl),
+    "`transmute()` does not support the `.before` argument",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .after = chr),
+    "`transmute()` does not support the `.after` argument",
+    fixed = TRUE
+  )
+})
+
+test_that("transmute() defuses dots arguments (ARROW-13262)", {
+  expect_warning(
+    tbl %>%
+      Table$create() %>%
+      transmute(stringr::str_c(chr, chr)) %>%
+      collect(),
+    "Expression stringr::str_c(chr, chr) not supported in Arrow; pulling data into R",
+    fixed = TRUE
+  )
+})
+
 test_that("mutate and refer to previous mutants", {
   expect_dplyr_equal(
     input %>%

From 903977061194786699d1824c4e6cb977184351d1 Mon Sep 17 00:00:00 2001
From: Diana Clarke <diana.joan.clarke@gmail.com>
Date: Wed, 7 Jul 2021 10:02:41 -0500
Subject: [PATCH 523/719] ARROW-13266: [JS] Improve benchmark names & include
 suite name in json
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before:

```
Prepare Data: 635.29ms
Running "Parse "tracks"" suite...
Table.from 6,513 ops/s ±23%, 0.12 ms, 75 samples
readBatches 6,689 ops/s ±9.0%, 0.12 ms, 70 samples
serialize 325 ops/s ±11%, 2.7 ms, 69 samples
Running "Get "tracks" values by index" suite...
name: 'lat', length: 1,000,000, type: Float32 27.4 ops/s ±8.1%, 33 ms, 49 samples
name: 'lng', length: 1,000,000, type: Float32 28.5 ops/s ±3.8%, 33 ms, 50 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.181 ops/s ±27%, 5,070 ms, 5 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.175 ops/s ±28%, 5,619 ms, 5 samples
Running "Iterate "tracks" vectors" suite...
name: 'lat', length: 1,000,000, type: Float32 46 ops/s ±9.8%, 19 ms, 63 samples
name: 'lng', length: 1,000,000, type: Float32 52 ops/s ±1.6%, 19 ms, 67 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.255 ops/s ±2.7%, 3,903 ms, 5 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.225 ops/s ±14%, 4,328 ms, 5 samples
Running "Slice toArray "tracks" vectors" suite...
name: 'lat', length: 1,000,000, type: Float32 1,069 ops/s ±7.6%, 0.84 ms, 75 samples
name: 'lng', length: 1,000,000, type: Float32 892 ops/s ±7.3%, 1.1 ms, 61 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.151 ops/s ±21%, 6,730 ms, 5 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 0.189 ops/s ±24%, 5,173 ms, 5 samples
Running "Slice "tracks" vectors" suite...
name: 'lat', length: 1,000,000, type: Float32 1,815,763 ops/s ±4.9%, 0.001 ms, 86 samples
name: 'lng', length: 1,000,000, type: Float32 1,355,757 ops/s ±8.3%, 0.001 ms, 63 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8> 1,678,921 ops/s ±15%, 0 ms, 71 samples
name: 'destination', length: 1,000,000, type: Dictionary<Int8, Utf8> 2,214,331 ops/s ±3.0%, 0 ms, 85 samples
Running "DataFrame Iterate "tracks"" suite...
length: 1,000,000 19.6 ops/s ±8.8%, 46 ms, 36 samples
Running "DataFrame Direct Count "tracks"" suite...
name: 'lat', length: 1,000,000, type: Float32, test: gt, value: 0 113 ops/s ±5.3%, 8.2 ms, 72 samples
name: 'lng', length: 1,000,000, type: Float32, test: gt, value: 0 89 ops/s ±13%, 9 ms, 59 samples
name: 'origin', length: 1,000,000, type: Dictionary<Int8, Utf8>, test: eq, value: Seattle 0.304 ops/s ±9.1%, 3,305 ms, 5 samples
```

After:

```
Prepare Data: 4.638ms
Running "Parse" suite...
dataset: tracks, function: Table.from 22,176 ops/s ±16%, 0.037 ms, 84 samples
dataset: tracks, function: readBatches 23,428 ops/s ±5.0%, 0.038 ms, 80 samples
dataset: tracks, function: serialize 14,132 ops/s ±5.8%, 0.063 ms, 74 samples
Running "Get values by index" suite...
dataset: tracks, column: lat, length: 200, type: Float32 450,042 ops/s ±5.9%, 0.002 ms, 91 samples
dataset: tracks, column: lng, length: 200, type: Float32 411,900 ops/s ±6.1%, 0.002 ms, 78 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8> 991 ops/s ±11%, 0.96 ms, 60 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8, Utf8> 1,171 ops/s ±7.2%, 0.72 ms, 70 samples
Running "Iterate vectors" suite...
dataset: tracks, column: lat, length: 200, type: Float32 225,008 ops/s ±3.6%, 0.004 ms, 89 samples
dataset: tracks, column: lng, length: 200, type: Float32 240,921 ops/s ±0.51%, 0.004 ms, 97 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8> 1,161 ops/s ±5.5%, 0.78 ms, 65 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8, Utf8> 1,088 ops/s ±5.6%, 0.96 ms, 42 samples
Running "Slice toArray vectors" suite...
dataset: tracks, column: lat, length: 200, type: Float32 651,617 ops/s ±3.8%, 0.002 ms, 83 samples
dataset: tracks, column: lng, length: 200, type: Float32 673,205 ops/s ±3.0%, 0.001 ms, 63 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8> 1,145 ops/s ±5.7%, 0.77 ms, 69 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8, Utf8> 1,142 ops/s ±5.6%, 0.77 ms, 65 samples
Running "Slice vectors" suite...
dataset: tracks, column: lat, length: 200, type: Float32 5,699,749 ops/s ±1.8%, 0 ms, 93 samples
dataset: tracks, column: lng, length: 200, type: Float32 5,768,406 ops/s ±0.75%, 0 ms, 97 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8> 6,112,340 ops/s ±0.49%, 0 ms, 96 samples
dataset: tracks, column: destination, length: 200, type: Dictionary<Int8, Utf8> 6,033,494 ops/s ±1.3%, 0 ms, 94 samples
Running "DataFrame Iterate" suite...
dataset: tracks, length: 200 106,752 ops/s ±3.1%, 0.009 ms, 86 samples
Running "DataFrame Direct Count" suite...
dataset: tracks, column: lat, length: 200, type: Float32, test: gt, value: 0 1,277,478 ops/s ±2.3%, 0.001 ms, 83 samples
dataset: tracks, column: lng, length: 200, type: Float32, test: gt, value: 0 1,163,103 ops/s ±3.9%, 0.001 ms, 74 samples
dataset: tracks, column: origin, length: 200, type: Dictionary<Int8, Utf8>, test: eq, value: Seattle 1,209 ops/s ±7.1%, 0.76 ms, 63 samples
```

Closes #10665 from dianaclarke/ARROW-13266

Authored-by: Diana Clarke <diana.joan.clarke@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 js/perf/index.ts | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/js/perf/index.ts b/js/perf/index.ts
index 506f16b60f4..fc2cb8b3a13 100644
--- a/js/perf/index.ts
+++ b/js/perf/index.ts
@@ -46,6 +46,7 @@ const results: CaseResult[] = [];
 function cycle(result: CaseResult, _summary: Summary) {
     const duration = result.details.median * 1000;
     if (json) {
+        result.suite = _summary.name
         results.push(result);
     }
     console.log(
@@ -55,17 +56,17 @@ function cycle(result: CaseResult, _summary: Summary) {
 
 for (const { name, ipc, df } of config) {
     b.suite(
-        `Parse "${name}"`,
+        `Parse`,
 
-        b.add(`Table.from`, () => {
+        b.add(`dataset: ${name}, function: Table.from`, () => {
             Table.from(ipc);
         }),
 
-        b.add(`readBatches`, () => {
+        b.add(`dataset: ${name}, function: readBatches`, () => {
             for (const _recordBatch of RecordBatchReader.from(ipc)) {}
         }),
 
-        b.add(`serialize`, () => {
+        b.add(`dataset: ${name}, function: serialize`, () => {
             df.serialize();
         }),
 
@@ -75,30 +76,30 @@ for (const { name, ipc, df } of config) {
     const schema = df.schema;
 
     const suites = [{
-            name: `Get "${name}" values by index`,
+            suite_name: `Get values by index`,
             fn(vector: Arrow.Column<any>) {
                 for (let i = -1, n = vector.length; ++i < n;) {
                     vector.get(i);
                 }
             }
         }, {
-            name: `Iterate "${name}" vectors`,
+            suite_name: `Iterate vectors`,
             fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
         }, {
-            name: `Slice toArray "${name}" vectors`,
+            suite_name: `Slice toArray vectors`,
             fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
         }, {
-            name: `Slice "${name}" vectors`,
+            suite_name: `Slice vectors`,
             fn(vector: Arrow.Column<any>) { vector.slice(); }
         }];
 
-    for (const {name, fn} of suites) {
+    for (const {suite_name, fn} of suites) {
         b.suite(
-            name,
+            suite_name,
 
             ...schema.fields.map((f, i) => {
                 const vector = df.getColumnAt(i)!;
-                return b.add(`name: '${f.name}', length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
+                return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
                     fn(vector);
                 });
             }),
@@ -111,9 +112,9 @@ for (const { name, ipc, df } of config) {
 
 for (const { name, df, countBys, counts } of config) {
     b.suite(
-        `DataFrame Iterate "${name}"`,
+        `DataFrame Iterate`,
 
-        b.add(`length: ${formatNumber(df.length)}`, () => {
+        b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => {
             for (const _value of df) {}
         }),
 
@@ -121,10 +122,10 @@ for (const { name, df, countBys, counts } of config) {
     );
 
     b.suite(
-        `DataFrame Count By "${name}"`,
+        `DataFrame Count By`,
 
         ...countBys.map((column: string) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
             () => df.countBy(column)
         )),
 
@@ -132,10 +133,10 @@ for (const { name, df, countBys, counts } of config) {
     );
 
     b.suite(
-        `DataFrame Filter-Scan Count "${name}"`,
+        `DataFrame Filter-Scan Count`,
 
         ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
             () => {
                 let filteredDf: Arrow.FilteredDataFrame;
                 if (test == 'gt') {
@@ -154,10 +155,10 @@ for (const { name, df, countBys, counts } of config) {
     );
 
     b.suite(
-        `DataFrame Filter-Iterate "${name}"`,
+        `DataFrame Filter-Iterate`,
 
         ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
             () => {
                 let filteredDf: Arrow.FilteredDataFrame;
                 if (test == 'gt') {
@@ -178,10 +179,10 @@ for (const { name, df, countBys, counts } of config) {
     );
 
     b.suite(
-        `DataFrame Direct Count "${name}"`,
+        `DataFrame Direct Count`,
 
         ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
-            `name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+            `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
             () => {
                 const colidx = df.schema.fields.findIndex((c)=> c.name === column);
 

From 6c8d30ea82222fd2750b999840872d3f6cbdc8f8 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 7 Jul 2021 12:20:11 -0400
Subject: [PATCH 524/719] ARROW-13230: [Docs][Python] Add CSV writer docs

Closes #10658 from lidavidm/arrow-13230

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/cpp/api/formats.rst    | 15 +++++++--
 docs/source/cpp/csv.rst            | 54 +++++++++++++++++++++++++++---
 docs/source/python/api/formats.rst | 12 ++++---
 docs/source/python/csv.rst         | 50 +++++++++++++++++++++++----
 python/pyarrow/_csv.pyx            |  2 +-
 python/pyarrow/io.pxi              | 11 +++---
 python/pyarrow/tests/test_io.py    | 10 ++++++
 7 files changed, 131 insertions(+), 23 deletions(-)

diff --git a/docs/source/cpp/api/formats.rst b/docs/source/cpp/api/formats.rst
index a072f114475..2f6b24802f0 100644
--- a/docs/source/cpp/api/formats.rst
+++ b/docs/source/cpp/api/formats.rst
@@ -24,18 +24,29 @@ File Formats
 CSV
 ===
 
-.. doxygenstruct:: arrow::csv::ReadOptions
+.. doxygenstruct:: arrow::csv::ConvertOptions
    :members:
 
 .. doxygenstruct:: arrow::csv::ParseOptions
    :members:
 
-.. doxygenstruct:: arrow::csv::ConvertOptions
+.. doxygenstruct:: arrow::csv::ReadOptions
+   :members:
+
+.. doxygenstruct:: arrow::csv::WriteOptions
    :members:
 
 .. doxygenclass:: arrow::csv::TableReader
    :members:
 
+.. doxygenfunction:: arrow::csv::MakeCSVWriter(io::OutputStream *, const std::shared_ptr<Schema>&, const WriteOptions&)
+
+.. doxygenfunction:: arrow::csv::MakeCSVWriter(std::shared_ptr<io::OutputStream>, const std::shared_ptr<Schema>&, const WriteOptions&)
+
+.. doxygenfunction:: arrow::csv::WriteCSV(const RecordBatch&, const WriteOptions&, arrow::io::OutputStream *)
+
+.. doxygenfunction:: arrow::csv::WriteCSV(const Table&, const WriteOptions&, arrow::io::OutputStream *)
+
 .. _cpp-api-json:
 
 Line-separated JSON
diff --git a/docs/source/cpp/csv.rst b/docs/source/cpp/csv.rst
index 123bc8a467d..2f92708e79f 100644
--- a/docs/source/cpp/csv.rst
+++ b/docs/source/cpp/csv.rst
@@ -20,15 +20,15 @@
 
 .. cpp:namespace:: arrow::csv
 
-=================
-Reading CSV files
-=================
+=============================
+Reading and Writing CSV files
+=============================
 
 Arrow provides a fast CSV reader allowing ingestion of external data
 as Arrow tables.
 
 .. seealso::
-   :ref:`CSV reader API reference <cpp-api-csv>`.
+   :ref:`CSV reader/writer API reference <cpp-api-csv>`.
 
 Basic usage
 ===========
@@ -69,6 +69,46 @@ A CSV file is read from a :class:`~arrow::io::InputStream`.
       std::shared_ptr<arrow::Table> table = *maybe_table;
    }
 
+A CSV file is written to a :class:`~arrow::io::OutputStream`.
+
+.. code-block:: cpp
+
+   #include <arrow/csv/api.h>
+   {
+       // Oneshot write
+       // ...
+       std::shared_ptr<arrow::io::OutputStream> output = ...;
+       auto write_options = arrow::csv::WriteOptions::Defaults();
+       if (WriteCSV(table, options, output.get()).ok()) {
+           // Handle writer error...
+       }
+   }
+   {
+       // Write incrementally
+       // ...
+       std::shared_ptr<arrow::io::OutputStream> output = ...;
+       auto write_options = arrow::csv::WriteOptions::Defaults();
+       auto maybe_writer = arrow::csv::MakeCSVWriter(output, schema, options);
+       if (!maybe_writer.ok()) {
+           // Handle writer instantiation error...
+       }
+       std::shared_ptr<arrow::ipc::RecordBatchWriter> writer = *maybe_writer;
+
+       // Write batches...
+       if (!writer->WriteRecordBatch(*batch).ok()) {
+           // Handle write error...
+       }
+
+       if (!writer->Close().ok()) {
+           // Handle close error...
+       }
+       if (!output->Close().ok()) {
+           // Handle file close error...
+       }
+   }
+
+.. note:: The writer does not yet support all Arrow types.
+
 Column names
 ============
 
@@ -162,6 +202,12 @@ Character encoding
 CSV files are expected to be encoded in UTF8.  However, non-UTF8 data
 is accepted for Binary columns.
 
+Write Options
+=============
+
+The format of written CSV files can be customized via :class:`~arrow::csv::WriteOptions`.
+Currently few options are available; more will be added in future releases.
+
 Performance
 ===========
 
diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst
index 28a30c934e5..fdc28040a71 100644
--- a/docs/source/python/api/formats.rst
+++ b/docs/source/python/api/formats.rst
@@ -28,12 +28,16 @@ CSV Files
 .. autosummary::
    :toctree: ../generated/
 
-   ReadOptions
-   ParseOptions
    ConvertOptions
-   read_csv
-   open_csv
    CSVStreamingReader
+   CSVWriter
+   ISO8601
+   ParseOptions
+   ReadOptions
+   WriteOptions
+   open_csv
+   read_csv
+   write_csv
 
 .. _api.feather:
 
diff --git a/docs/source/python/csv.rst b/docs/source/python/csv.rst
index 3c0b3993217..9c00027b041 100644
--- a/docs/source/python/csv.rst
+++ b/docs/source/python/csv.rst
@@ -18,10 +18,10 @@
 .. currentmodule:: pyarrow.csv
 .. _csv:
 
-Reading CSV files
-=================
+Reading and Writing CSV files
+=============================
 
-Arrow supports reading columnar data from CSV files.
+Arrow supports reading and writing columnar data from/to CSV files.
 The features currently offered are the following:
 
 * multi-threaded or single-threaded reading
@@ -33,13 +33,14 @@ The features currently offered are the following:
 * opportunistic dictionary encoding of ``string`` and ``binary`` columns
   (disabled by default)
 * detecting various spellings of null values such as ``NaN`` or ``#N/A``
+* writing CSV files with options to configure the exact output format
 
 Usage
 -----
 
-CSV reading functionality is available through the :mod:`pyarrow.csv` module.
-In many cases, you will simply call the :func:`read_csv` function
-with the file path you want to read from::
+CSV reading and writing functionality is available through the
+:mod:`pyarrow.csv` module.  In many cases, you will simply call the
+:func:`read_csv` function with the file path you want to read from::
 
    >>> from pyarrow import csv
    >>> fn = 'tips.csv.gz'
@@ -64,6 +65,18 @@ with the file path you want to read from::
    3       23.68  3.31    Male     No  Sun  Dinner     2
    4       24.59  3.61  Female     No  Sun  Dinner     4
 
+To write CSV files, just call :func:`write_csv` with a
+:class:`pyarrow.RecordBatch` or :class:`pyarrow.Table` and a path or
+file-like object::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> csv.write_csv(table, "tips.csv")
+  >>> with pa.CompressedOutputStream("tips.csv.gz", "gzip") as out:
+  ...     csv.write_csv(table, out)
+
+.. note:: The writer does not yet support all Arrow types.
+
 Customized parsing
 ------------------
 
@@ -111,6 +124,31 @@ By default, CSV files are expected to be encoded in UTF8.  Non-UTF8 data
 is accepted for ``binary`` columns.  The encoding can be changed using
 the :class:`ReadOptions` class.
 
+Customized writing
+------------------
+
+To alter the default write settings in case of writing CSV files with
+different conventions, you can create a :class:`WriteOptions` instance and
+pass it to :func:`write_csv`::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> # Omit the header row (include_header=True is the default)
+  >>> options = csv.WriteOptions(include_header=False)
+  >>> csv.write_csv(table, "data.csv", options)
+
+Incremental writing
+-------------------
+
+To write CSV files one batch at a time, create a :class:`CSVWriter`. This
+requires the output (a path or file-like object), the schema of the data to
+be written, and optionally write options as described above::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> with csv.CSVWriter("data.csv", table.schema) as writer:
+  >>>     writer.write_table(table)
+
 Performance
 -----------
 
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 969fcbafee6..1e896a2076a 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -998,7 +998,7 @@ def write_csv(data, output_file, write_options=None,
     ----------
     data: pyarrow.RecordBatch or pyarrow.Table
         The data to write.
-    output_file: string, path, pyarrow.OutputStream or file-like object
+    output_file: string, path, pyarrow.NativeFile, or file-like object
         The location where to write the CSV data.
     write_options: pyarrow.csv.WriteOptions
         Options to configure writing the CSV data.
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 494566437e0..b5da607950b 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1282,21 +1282,20 @@ cdef class CompressedOutputStream(NativeFile):
 
     Parameters
     ----------
-    stream : pa.NativeFile
+    stream : string, path, pa.NativeFile, or file-like object
         Input stream object to wrap with the compression.
     compression : str
         The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
     """
 
-    def __init__(self, NativeFile stream, str compression not None):
+    def __init__(self, object stream, str compression not None):
         cdef:
             Codec codec = Codec(compression)
+            shared_ptr[COutputStream] c_writer
             shared_ptr[CCompressedOutputStream] compressed_stream
+        get_writer(stream, &c_writer)
         compressed_stream = GetResultValue(
-            CCompressedOutputStream.Make(
-                codec.unwrap(),
-                stream.get_output_stream()
-            )
+            CCompressedOutputStream.Make(codec.unwrap(), c_writer)
         )
         self.set_output_stream(<shared_ptr[COutputStream]> compressed_stream)
         self.is_writable = True
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index d637fc8b557..a085312bbc7 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -1267,6 +1267,16 @@ def test_compressed_output_bz2(tmpdir):
         assert got == data
 
 
+def test_output_stream_constructor(tmpdir):
+    if not Codec.is_available("gzip"):
+        pytest.skip("gzip support is not built")
+    with pa.CompressedOutputStream(tmpdir / "ctor.gz", "gzip") as stream:
+        stream.write(b"test")
+    with (tmpdir / "ctor2.gz").open("wb") as f:
+        with pa.CompressedOutputStream(f, "gzip") as stream:
+            stream.write(b"test")
+
+
 @pytest.mark.parametrize(("path", "expected_compression"), [
     ("file.bz2", "bz2"),
     ("file.lz4", "lz4"),

From cf6a7ff65f4e2920641d116a3ba1f578b2bd8a9e Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 8 Jul 2021 05:57:36 +0900
Subject: [PATCH 525/719] ARROW-13284: [C++] Fix wrong pkg_check_modules()
 option name

Closes #10678 from kou/cpp-pc-fix-typo

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 1cb4ceb4eea..39ccbbe72b0 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -242,7 +242,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
       pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC
                         ${ARG_PC_PACKAGE_NAME}
                         NO_CMAKE_PATH
-                        NO_CMAKE_EINVIRONMENT_PATH
+                        NO_CMAKE_ENVIRONMENT_PATH
                         QUIET)
       if(${${ARG_PC_PACKAGE_NAME}_PC_FOUND})
         string(APPEND ARROW_PC_REQUIRES_PRIVATE " ${ARG_PC_PACKAGE_NAME}")

From 18a41b412392c653e03cfe06887530ac3d8bf601 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 8 Jul 2021 12:46:55 +0900
Subject: [PATCH 526/719] ARROW-13276: [GLib][Ruby][Flight] Add support for
 ListFlights

Closes #10671 from kou/glib-flight-list-flights

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/ruby.yml                    |    2 +-
 c_glib/arrow-flight-glib/client.cpp           |   64 +
 c_glib/arrow-flight-glib/client.h             |    7 +
 c_glib/arrow-flight-glib/client.hpp           |    3 +
 c_glib/arrow-flight-glib/common.cpp           | 1033 +++++++++++++++++
 c_glib/arrow-flight-glib/common.h             |  169 +++
 c_glib/arrow-flight-glib/common.hpp           |   23 +
 c_glib/arrow-flight-glib/server.cpp           |  140 ++-
 c_glib/arrow-flight-glib/server.h             |   24 +
 c_glib/test/flight/test-client.rb             |   13 +-
 c_glib/test/flight/test-command-descriptor.rb |   52 +
 c_glib/test/flight/test-criteria.rb           |   29 +
 c_glib/test/flight/test-endpoint.rb           |   67 ++
 c_glib/test/flight/test-info.rb               |   78 ++
 c_glib/test/flight/test-path-descriptor.rb    |   52 +
 c_glib/test/flight/test-ticket.rb             |   47 +
 c_glib/test/helper/data-type.rb               |   16 +
 c_glib/test/helper/flight-info-generator.rb   |   61 +
 c_glib/test/helper/flight-server.rb           |    8 +
 c_glib/test/helper/writable.rb                |   12 +-
 c_glib/test/run-test.rb                       |   11 +-
 .../lib/arrow-flight/loader.rb                |    6 +
 .../lib/arrow-flight/ticket.rb                |   32 +
 .../test/helper/info-generator.rb             |   53 +
 ruby/red-arrow-flight/test/helper/server.rb   |    8 +
 ruby/red-arrow-flight/test/test-client.rb     |   17 +-
 ruby/red-arrow-flight/test/test-ticket.rb     |   26 +
 ruby/red-arrow/lib/arrow/table.rb             |    4 +-
 ruby/red-arrow/red-arrow.gemspec              |    2 +-
 29 files changed, 2031 insertions(+), 28 deletions(-)
 create mode 100644 c_glib/test/flight/test-command-descriptor.rb
 create mode 100644 c_glib/test/flight/test-criteria.rb
 create mode 100644 c_glib/test/flight/test-endpoint.rb
 create mode 100644 c_glib/test/flight/test-info.rb
 create mode 100644 c_glib/test/flight/test-path-descriptor.rb
 create mode 100644 c_glib/test/flight/test-ticket.rb
 create mode 100644 c_glib/test/helper/flight-info-generator.rb
 create mode 100644 ruby/red-arrow-flight/lib/arrow-flight/ticket.rb
 create mode 100644 ruby/red-arrow-flight/test/helper/info-generator.rb
 create mode 100644 ruby/red-arrow-flight/test/test-ticket.rb

diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index ec3e9946c0c..067b40aefe9 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -187,7 +187,7 @@ jobs:
         mingw-n-bits:
           - 64
         ruby-version:
-          - 2.6
+          - "3.0"
     env:
       ARROW_BUILD_SHARED: ON
       ARROW_BUILD_STATIC: OFF
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index 468993d798c..c25b8d4b550 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -248,10 +248,74 @@ gaflight_client_new(GAFlightLocation *location,
   }
 }
 
+/**
+ * gaflight_client_list_flights:
+ * @client: A #GAFlightClient.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @criteria: (nullable): A #GAFlightCriteria.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (element-type GAFlightInfo) (transfer full):
+ *   The returned list of #GAFlightInfo, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_client_list_flights(GAFlightClient *client,
+                             GAFlightCallOptions *options,
+                             GAFlightCriteria *criteria,
+                             GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  arrow::flight::FlightCallOptions flight_default_options;
+  const auto *flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  arrow::flight::Criteria flight_default_criteria;
+  const auto *flight_criteria = &flight_default_criteria;
+  if (criteria) {
+    flight_criteria = gaflight_criteria_get_raw(criteria);
+  }
+  std::unique_ptr<arrow::flight::FlightListing> flight_listing;
+  auto status = flight_client->ListFlights(*flight_options,
+                                           *flight_criteria,
+                                           &flight_listing);
+  if (!garrow::check(error,
+                     status,
+                     "[flight-client][list-flights]")) {
+    return NULL;
+  }
+  GList *listing = NULL;
+  std::unique_ptr<arrow::flight::FlightInfo> flight_info;
+  while (true) {
+    status = flight_listing->Next(&flight_info);
+    if (!garrow::check(error,
+                       status,
+                       "[flight-client][list-flights]")) {
+      g_list_free_full(listing, g_object_unref);
+      return NULL;
+    }
+    if (!flight_info) {
+      break;
+    }
+    auto info = gaflight_info_new_raw(flight_info.release());
+    listing = g_list_prepend(listing, info);
+  }
+  return g_list_reverse(listing);
+}
+
 
 G_END_DECLS
 
 
+arrow::flight::FlightCallOptions *
+gaflight_call_options_get_raw(GAFlightCallOptions *options)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(options);
+  return &(priv->options);
+}
+
 arrow::flight::FlightClientOptions *
 gaflight_client_options_get_raw(GAFlightClientOptions *options)
 {
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index 92b43f7522a..ca75ae24135 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -73,5 +73,12 @@ gaflight_client_new(GAFlightLocation *location,
                     GAFlightClientOptions *options,
                     GError **error);
 
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_client_list_flights(GAFlightClient *client,
+                             GAFlightCallOptions *options,
+                             GAFlightCriteria *criteria,
+                             GError **error);
+
 
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index 32d3f77cdb3..c3fe7b8c790 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -24,6 +24,9 @@
 #include <arrow-flight-glib/client.h>
 
 
+arrow::flight::FlightCallOptions *
+gaflight_call_options_get_raw(GAFlightCallOptions *options);
+
 arrow::flight::FlightClientOptions *
 gaflight_client_options_get_raw(GAFlightClientOptions *options);
 
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index 4c14027167f..9f643177968 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -18,6 +18,8 @@
  */
 
 #include <arrow-glib/error.hpp>
+#include <arrow-glib/ipc-options.hpp>
+#include <arrow-glib/schema.hpp>
 
 #include <arrow-flight-glib/common.hpp>
 
@@ -29,10 +31,163 @@ G_BEGIN_DECLS
  * @title: Classes both for client and server
  * @include: arrow-flight-glib/arrow-flight-glib.h
  *
+ * #GAFlightCriteria is a class for criteria.
+ *
  * #GAFlightLocation is a class for location.
  *
+ * #GAFlightDescriptor is a base class for all descriptor classes such
+ * as #GArrowFlightPathDescriptor.
+ *
+ * #GAFlightPathDescriptor is a class for path descriptor.
+ *
+ * #GAFlightCommandDescriptor is a class for command descriptor.
+ *
+ * #GAFlightTicket is a class for ticket.
+ *
+ * #GAFlightEndpoint is a class for endpoint.
+ *
+ * #GAFlightInfo is a class for flight information.
+ *
+ * Since: 5.0.0
+ */
+
+typedef struct GAFlightCriteriaPrivate_ {
+  arrow::flight::Criteria criteria;
+  GBytes *expression;
+} GAFlightCriteriaPrivate;
+
+enum {
+  PROP_EXPRESSION = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCriteria,
+                           gaflight_criteria,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_CRITERIA_GET_PRIVATE(obj)            \
+  static_cast<GAFlightCriteriaPrivate *>(             \
+    gaflight_criteria_get_instance_private(           \
+      GAFLIGHT_CRITERIA(obj)))
+
+static void
+gaflight_criteria_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  if (priv->expression) {
+    g_bytes_unref(priv->expression);
+    priv->expression = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_criteria_parent_class)->dispose(object);
+}
+
+static void
+gaflight_criteria_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  priv->criteria.~Criteria();
+
+  G_OBJECT_CLASS(gaflight_criteria_parent_class)->finalize(object);
+}
+
+static void
+gaflight_criteria_set_property(GObject *object,
+                               guint prop_id,
+                               const GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_EXPRESSION:
+    if (priv->expression) {
+      g_bytes_unref(priv->expression);
+    }
+    priv->expression = static_cast<GBytes *>(g_value_dup_boxed(value));
+    {
+      gsize size;
+      auto data = g_bytes_get_data(priv->expression, &size);
+      priv->criteria.expression.assign(static_cast<const char *>(data),
+                                       size);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_criteria_get_property(GObject *object,
+                               guint prop_id,
+                               GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_EXPRESSION:
+    g_value_set_boxed(value, priv->expression);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_criteria_init(GAFlightCriteria *object)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
+  new(&priv->criteria) arrow::flight::Criteria;
+}
+
+static void
+gaflight_criteria_class_init(GAFlightCriteriaClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_criteria_dispose;
+  gobject_class->finalize = gaflight_criteria_finalize;
+  gobject_class->set_property = gaflight_criteria_set_property;
+  gobject_class->get_property = gaflight_criteria_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightCriteria:expression:
+   *
+   * Opaque criteria expression, dependent on server implementation.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boxed("expression",
+                            "Expression",
+                            "Opaque criteria expression, "
+                            "dependent on server implementation",
+                            G_TYPE_BYTES,
+                            static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec);
+}
+
+/**
+ * gaflight_criteria_new:
+ * @expression: A #GBytes.
+ *
+ * Returns: The newly created #GAFlightCriteria, %NULL on error.
+ *
  * Since: 5.0.0
  */
+GAFlightCriteria *
+gaflight_criteria_new(GBytes *expression)
+{
+  return GAFLIGHT_CRITERIA(
+    g_object_new(GAFLIGHT_TYPE_CRITERIA,
+                 "expression", expression,
+                 NULL));
+}
+
 
 typedef struct GAFlightLocationPrivate_ {
   arrow::flight::Location location;
@@ -150,12 +305,890 @@ gaflight_location_equal(GAFlightLocation *location,
 }
 
 
+typedef struct GAFlightDescriptorPrivate_ {
+  arrow::flight::FlightDescriptor descriptor;
+} GAFlightDescriptorPrivate;
+
+enum {
+  PROP_DESCRIPTOR = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightDescriptor,
+                                    gaflight_descriptor,
+                                    G_TYPE_OBJECT)
+
+#define GAFLIGHT_DESCRIPTOR_GET_PRIVATE(obj)            \
+  static_cast<GAFlightDescriptorPrivate *>(             \
+    gaflight_descriptor_get_instance_private(           \
+      GAFLIGHT_DESCRIPTOR(obj)))
+
+static void
+gaflight_descriptor_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
+
+  priv->descriptor.~FlightDescriptor();
+
+  G_OBJECT_CLASS(gaflight_descriptor_parent_class)->finalize(object);
+}
+
+static void
+gaflight_descriptor_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DESCRIPTOR:
+    priv->descriptor = *static_cast<arrow::flight::FlightDescriptor *>(
+      g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_descriptor_init(GAFlightDescriptor *object)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
+  new(&priv->descriptor) arrow::flight::FlightDescriptor;
+}
+
+static void
+gaflight_descriptor_class_init(GAFlightDescriptorClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_descriptor_finalize;
+  gobject_class->set_property = gaflight_descriptor_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("descriptor",
+                              "Descriptor",
+                              "The raw arrow::flight::FlightDescriptor",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec);
+}
+
+/**
+ * gaflight_descriptor_to_string:
+ * @descriptor: A #GAFlightDescriptor.
+ *
+ * Returns: A descriptor as a string.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_descriptor_to_string(GAFlightDescriptor *descriptor)
+{
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  return g_strdup(flight_descriptor->ToString().c_str());
+}
+
+/**
+ * gaflight_descriptor_equal:
+ * @descriptor: A #GAFlightDescriptor.
+ * @other_descriptor: A #GAFlightDescriptor to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same descriptor,
+ *   %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_descriptor_equal(GAFlightDescriptor *descriptor,
+                          GAFlightDescriptor *other_descriptor)
+{
+  const auto flight_descriptor =
+    gaflight_descriptor_get_raw(descriptor);
+  const auto flight_other_descriptor =
+    gaflight_descriptor_get_raw(other_descriptor);
+  return flight_descriptor->Equals(*flight_other_descriptor);
+}
+
+
+G_DEFINE_TYPE(GAFlightPathDescriptor,
+              gaflight_path_descriptor,
+              GAFLIGHT_TYPE_DESCRIPTOR)
+
+static void
+gaflight_path_descriptor_init(GAFlightPathDescriptor *object)
+{
+}
+
+static void
+gaflight_path_descriptor_class_init(GAFlightPathDescriptorClass *klass)
+{
+}
+
+/**
+ * gaflight_path_descriptor_new:
+ * @paths: (array length=n_paths): List of paths identifying a
+ *   particular dataset.
+ * @n_paths: The number of @paths.
+ *
+ * Returns: The newly created #GAFlightPathDescriptor.
+ *
+ * Since: 5.0.0
+ */
+GAFlightPathDescriptor *
+gaflight_path_descriptor_new(const gchar **paths,
+                             gsize n_paths)
+{
+  std::vector<std::string> flight_paths;
+  for (gsize i = 0; i < n_paths; i++) {
+    flight_paths.push_back(paths[i]);
+  }
+  auto flight_descriptor = arrow::flight::FlightDescriptor::Path(flight_paths);
+  return GAFLIGHT_PATH_DESCRIPTOR(
+    gaflight_descriptor_new_raw(&flight_descriptor));
+}
+
+/**
+ * gaflight_path_descriptor_get_paths:
+ * @descriptor: A #GAFlightPathDescriptor.
+ *
+ * Returns: (nullable) (array zero-terminated=1) (transfer full):
+ *   The paths in this descriptor.
+ *
+ *   It must be freed with g_strfreev() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar **
+gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor)
+{
+  const auto flight_descriptor =
+    gaflight_descriptor_get_raw(GAFLIGHT_DESCRIPTOR(descriptor));
+  const auto &flight_paths = flight_descriptor->path;
+  if (flight_paths.empty()) {
+    return NULL;
+  } else {
+    auto paths = g_new(gchar *, flight_paths.size() + 1);
+    gsize i = 0;
+    for (const auto &flight_path : flight_paths) {
+      paths[i++] = g_strdup(flight_path.c_str());
+    }
+    paths[i] = NULL;
+    return paths;
+  }
+}
+
+
+G_DEFINE_TYPE(GAFlightCommandDescriptor,
+              gaflight_command_descriptor,
+              GAFLIGHT_TYPE_DESCRIPTOR)
+
+static void
+gaflight_command_descriptor_init(GAFlightCommandDescriptor *object)
+{
+}
+
+static void
+gaflight_command_descriptor_class_init(GAFlightCommandDescriptorClass *klass)
+{
+}
+
+/**
+ * gaflight_command_descriptor_new:
+ * @command: Opaque value used to express a command.
+ *
+ * Returns: The newly created #GAFlightCommandDescriptor.
+ *
+ * Since: 5.0.0
+ */
+GAFlightCommandDescriptor *
+gaflight_command_descriptor_new(const gchar *command)
+{
+  auto flight_descriptor = arrow::flight::FlightDescriptor::Command(command);
+  return GAFLIGHT_COMMAND_DESCRIPTOR(
+    gaflight_descriptor_new_raw(&flight_descriptor));
+}
+
+/**
+ * gaflight_command_descriptor_get_command:
+ * @descriptor: A #GAFlightCommandDescriptor.
+ *
+ * Returns: The opaque value used to express a command.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 5.0.0
+ */
+gchar *
+gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor)
+{
+  const auto flight_descriptor =
+    gaflight_descriptor_get_raw(GAFLIGHT_DESCRIPTOR(descriptor));
+  const auto &flight_command = flight_descriptor->cmd;
+  return g_strdup(flight_command.c_str());
+}
+
+
+typedef struct GAFlightTicketPrivate_ {
+  arrow::flight::Ticket ticket;
+  GBytes *data;
+} GAFlightTicketPrivate;
+
+enum {
+  PROP_DATA = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightTicket,
+                           gaflight_ticket,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_TICKET_GET_PRIVATE(obj)            \
+  static_cast<GAFlightTicketPrivate *>(             \
+    gaflight_ticket_get_instance_private(           \
+      GAFLIGHT_TICKET(obj)))
+
+static void
+gaflight_ticket_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  if (priv->data) {
+    g_bytes_unref(priv->data);
+    priv->data = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_ticket_parent_class)->dispose(object);
+}
+
+static void
+gaflight_ticket_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  priv->ticket.~Ticket();
+
+  G_OBJECT_CLASS(gaflight_ticket_parent_class)->finalize(object);
+}
+
+static void
+gaflight_ticket_set_property(GObject *object,
+                             guint prop_id,
+                             const GValue *value,
+                             GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATA:
+    if (priv->data) {
+      g_bytes_unref(priv->data);
+    }
+    priv->data = static_cast<GBytes *>(g_value_dup_boxed(value));
+    {
+      gsize size;
+      auto data = g_bytes_get_data(priv->data, &size);
+      priv->ticket.ticket.assign(static_cast<const char *>(data),
+                                 size);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_ticket_get_property(GObject *object,
+                             guint prop_id,
+                             GValue *value,
+                             GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DATA:
+    g_value_set_boxed(value, priv->data);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_ticket_init(GAFlightTicket *object)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
+  new(&priv->ticket) arrow::flight::Ticket;
+}
+
+static void
+gaflight_ticket_class_init(GAFlightTicketClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_ticket_dispose;
+  gobject_class->finalize = gaflight_ticket_finalize;
+  gobject_class->set_property = gaflight_ticket_set_property;
+  gobject_class->get_property = gaflight_ticket_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightTicket:data:
+   *
+   * Opaque identifier or credential to use when requesting a data
+   * stream with the DoGet RPC.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_boxed("data",
+                            "Data",
+                            "Opaque identifier or credential to use "
+                            "when requesting a data stream with the DoGet RPC",
+                            G_TYPE_BYTES,
+                            static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_DATA, spec);
+}
+
+/**
+ * gaflight_ticket_new:
+ * @data: A #GBytes.
+ *
+ * Returns: The newly created #GAFlightTicket, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightTicket *
+gaflight_ticket_new(GBytes *data)
+{
+  return GAFLIGHT_TICKET(
+    g_object_new(GAFLIGHT_TYPE_TICKET,
+                 "data", data,
+                 NULL));
+}
+
+/**
+ * gaflight_ticket_equal:
+ * @ticket: A #GAFlightTicket.
+ * @other_ticket: A #GAFlightTicket to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same ticket, %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_ticket_equal(GAFlightTicket *ticket,
+                      GAFlightTicket *other_ticket)
+{
+  const auto flight_ticket = gaflight_ticket_get_raw(ticket);
+  const auto flight_other_ticket = gaflight_ticket_get_raw(other_ticket);
+  return flight_ticket->Equals(*flight_other_ticket);
+}
+
+
+typedef struct GAFlightEndpointPrivate_ {
+  arrow::flight::FlightEndpoint endpoint;
+  GAFlightTicket *ticket;
+  GList *locations;
+} GAFlightEndpointPrivate;
+
+enum {
+  PROP_TICKET = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightEndpoint,
+                           gaflight_endpoint,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_ENDPOINT_GET_PRIVATE(obj)            \
+  static_cast<GAFlightEndpointPrivate *>(             \
+    gaflight_endpoint_get_instance_private(           \
+      GAFLIGHT_ENDPOINT(obj)))
+
+static void
+gaflight_endpoint_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+
+  if (priv->ticket) {
+    g_object_unref(priv->ticket);
+    priv->ticket = NULL;
+  }
+
+  if (priv->locations) {
+    g_list_free_full(priv->locations, g_object_unref);
+    priv->locations = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_endpoint_parent_class)->dispose(object);
+}
+
+static void
+gaflight_endpoint_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+
+  priv->endpoint.~FlightEndpoint();
+
+  G_OBJECT_CLASS(gaflight_endpoint_parent_class)->finalize(object);
+}
+
+static void
+gaflight_endpoint_get_property(GObject *object,
+                               guint prop_id,
+                               GValue *value,
+                               GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_TICKET:
+    g_value_set_object(value, priv->ticket);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_endpoint_init(GAFlightEndpoint *object)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
+  new(&priv->endpoint) arrow::flight::FlightEndpoint;
+}
+
+static void
+gaflight_endpoint_class_init(GAFlightEndpointClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_endpoint_dispose;
+  gobject_class->finalize = gaflight_endpoint_finalize;
+  gobject_class->get_property = gaflight_endpoint_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightEndpoint:ticket:
+   *
+   * Opaque ticket identify; use with DoGet RPC.
+   *
+   * Since: 5.0.0
+   */
+  spec = g_param_spec_object("ticket",
+                             "Ticket",
+                             "Opaque ticket identify; use with DoGet RPC",
+                             GAFLIGHT_TYPE_TICKET,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_TICKET, spec);
+}
+
+/**
+ * gaflight_endpoint_new:
+ * @ticket: A #GAFlightTicket.
+ * @locations: (element-type GAFlightLocation): A list of #GAFlightLocation.
+ *
+ * Returns: The newly created #GAFlightEndpoint, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightEndpoint *
+gaflight_endpoint_new(GAFlightTicket *ticket,
+                      GList *locations)
+{
+  auto endpoint = gaflight_endpoint_new_raw(nullptr, ticket);
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
+  for (auto node = locations; node; node = node->next) {
+    auto location = GAFLIGHT_LOCATION(node->data);
+    priv->endpoint.locations.push_back(*gaflight_location_get_raw(location));
+  }
+  return endpoint;
+}
+
+/**
+ * gaflight_endpoint_equal:
+ * @endpoint: A #GAFlightEndpoint.
+ * @other_endpoint: A #GAFlightEndpoint to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same endpoint,
+ *   %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_endpoint_equal(GAFlightEndpoint *endpoint,
+                        GAFlightEndpoint *other_endpoint)
+{
+  const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint);
+  const auto flight_other_endpoint = gaflight_endpoint_get_raw(other_endpoint);
+  return flight_endpoint->Equals(*flight_other_endpoint);
+}
+
+/**
+ * gaflight_endpoint_get_locations:
+ * @endpoint: A #GAFlightEndpoint.
+ *
+ * Returns: (nullable) (element-type GAFlightLocation) (transfer full):
+ *   The locations in this endpoint.
+ *
+ *   It must be freed with g_list_free() and g_object_unref() when no
+ *   longer needed. You can use `g_list_free_full(locations,
+ *   g_object_unref)`.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint)
+{
+  const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint);
+  GList *locations = NULL;
+  for (const auto &flight_location : flight_endpoint->locations) {
+    auto location = gaflight_location_new(flight_location.ToString().c_str(),
+                                          nullptr);
+    locations = g_list_prepend(locations, location);
+  }
+  return g_list_reverse(locations);
+}
+
+
+typedef struct GAFlightInfoPrivate_ {
+  arrow::flight::FlightInfo info;
+} GAFlightInfoPrivate;
+
+enum {
+  PROP_INFO = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightInfo,
+                           gaflight_info,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_INFO_GET_PRIVATE(obj)            \
+  static_cast<GAFlightInfoPrivate *>(             \
+    gaflight_info_get_instance_private(           \
+      GAFLIGHT_INFO(obj)))
+
+static void
+gaflight_info_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_INFO_GET_PRIVATE(object);
+
+  priv->info.~FlightInfo();
+
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_info_set_property(GObject *object,
+                           guint prop_id,
+                           const GValue *value,
+                           GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_INFO_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_INFO:
+    {
+      auto info =
+        static_cast<arrow::flight::FlightInfo *>(g_value_get_pointer(value));
+      new(&(priv->info)) arrow::flight::FlightInfo(*info);
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_info_init(GAFlightInfo *object)
+{
+}
+
+static void
+gaflight_info_class_init(GAFlightInfoClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_info_finalize;
+  gobject_class->set_property = gaflight_info_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("info",
+                              "Info",
+                              "The raw arrow::flight::FlightInfo *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_INFO, spec);
+}
+
+/**
+ * gaflight_info_new:
+ * @schema: A #GArrowSchema.
+ * @descriptor: A #GAFlightDescriptor.
+ * @endpoints: (element-type GAFlightEndpoint): A list of #GAFlightEndpoint.
+ * @total_records: The number of total records.
+ * @total_bytes: The number of total bytes.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): The newly created #GAFlightInfo, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GAFlightInfo *
+gaflight_info_new(GArrowSchema *schema,
+                  GAFlightDescriptor *descriptor,
+                  GList *endpoints,
+                  gint64 total_records,
+                  gint64 total_bytes,
+                  GError **error)
+{
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  std::vector<arrow::flight::FlightEndpoint> flight_endpoints;
+  for (auto node = endpoints; node; node = node->next) {
+    auto endpoint = GAFLIGHT_ENDPOINT(node->data);
+    flight_endpoints.push_back(*gaflight_endpoint_get_raw(endpoint));
+  }
+  auto flight_info_result =
+    arrow::flight::FlightInfo::Make(*arrow_schema,
+                                    *flight_descriptor,
+                                    flight_endpoints,
+                                    total_records,
+                                    total_bytes);
+  if (!garrow::check(error,
+                     flight_info_result,
+                     "[flight-info][new]")) {
+    return NULL;
+  }
+  return gaflight_info_new_raw(&(*flight_info_result));
+}
+
+/**
+ * gaflight_info_equal:
+ * @info: A #GAFlightInfo.
+ * @other_info: A #GAFlightInfo to be compared.
+ *
+ * Returns: %TRUE if both of them represents the same information,
+ *   %FALSE otherwise.
+ *
+ * Since: 5.0.0
+ */
+gboolean
+gaflight_info_equal(GAFlightInfo *info,
+                    GAFlightInfo *other_info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  const auto flight_other_info = gaflight_info_get_raw(other_info);
+  return
+    (flight_info->serialized_schema() ==
+     flight_other_info->serialized_schema()) &&
+    (flight_info->descriptor() ==
+     flight_other_info->descriptor()) &&
+    (flight_info->endpoints() ==
+     flight_other_info->endpoints()) &&
+    (flight_info->total_records() ==
+     flight_other_info->total_records()) &&
+    (flight_info->total_bytes() ==
+     flight_other_info->total_bytes());
+}
+
+/**
+ * gaflight_info_get_schema:
+ * @info: A #GAFlightInfo.
+ * @options: (nullable): A #GArrowReadOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): Deserialized #GArrowSchema, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GArrowSchema *
+gaflight_info_get_schema(GAFlightInfo *info,
+                         GArrowReadOptions *options,
+                         GError **error)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  arrow::Status status;
+  std::shared_ptr<arrow::Schema> arrow_schema;
+  if (options) {
+    auto arrow_memo = garrow_read_options_get_dictionary_memo_raw(options);
+    status = flight_info->GetSchema(arrow_memo, &arrow_schema);
+  } else {
+    arrow::ipc::DictionaryMemo arrow_memo;
+    status = flight_info->GetSchema(&arrow_memo, &arrow_schema);
+  }
+  if (garrow::check(error, status, "[flight-info][get-schema]")) {
+    return garrow_schema_new_raw(&arrow_schema);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_info_get_descriptor:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: (transfer full): The #GAFlightDescriptor of the information.
+ *
+ * Since: 5.0.0
+ */
+GAFlightDescriptor *
+gaflight_info_get_descriptor(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  return gaflight_descriptor_new_raw(&(flight_info->descriptor()));
+}
+
+/**
+ * gaflight_info_get_endpoints:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: (element-type GAFlightEndpoint) (transfer full):
+ *   The list of #GAFlightEndpoint of the information.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_info_get_endpoints(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  GList *endpoints = NULL;
+  for (const auto &flight_endpoint : flight_info->endpoints()) {
+    auto endpoint = gaflight_endpoint_new_raw(&flight_endpoint, nullptr);
+    endpoints = g_list_prepend(endpoints, endpoint);
+  }
+  return g_list_reverse(endpoints);
+}
+
+/**
+ * gaflight_info_get_total_records:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: The number of total records of the information.
+ *
+ * Since: 5.0.0
+ */
+gint64
+gaflight_info_get_total_records(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  return flight_info->total_records();
+}
+
+/**
+ * gaflight_info_get_total_bytes:
+ * @info: A #GAFlightInfo.
+ *
+ * Returns: The number of total bytes of the information.
+ *
+ * Since: 5.0.0
+ */
+gint64
+gaflight_info_get_total_bytes(GAFlightInfo *info)
+{
+  const auto flight_info = gaflight_info_get_raw(info);
+  return flight_info->total_bytes();
+}
+
+
 G_END_DECLS
 
 
+arrow::flight::Criteria *
+gaflight_criteria_get_raw(GAFlightCriteria *criteria)
+{
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria);
+  return &(priv->criteria);
+}
+
 arrow::flight::Location *
 gaflight_location_get_raw(GAFlightLocation *location)
 {
   auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(location);
   return &(priv->location);
 }
+
+GAFlightDescriptor *
+gaflight_descriptor_new_raw(
+  const arrow::flight::FlightDescriptor *flight_descriptor)
+{
+  GType gtype = GAFLIGHT_TYPE_DESCRIPTOR;
+  switch (flight_descriptor->type) {
+  case arrow::flight::FlightDescriptor::DescriptorType::PATH:
+    gtype = GAFLIGHT_TYPE_PATH_DESCRIPTOR;
+    break;
+  case arrow::flight::FlightDescriptor::DescriptorType::CMD:
+    gtype = GAFLIGHT_TYPE_COMMAND_DESCRIPTOR;
+    break;
+  default:
+    break;
+  }
+  return GAFLIGHT_DESCRIPTOR(g_object_new(gtype,
+                                          "descriptor", flight_descriptor,
+                                          NULL));
+}
+
+arrow::flight::FlightDescriptor *
+gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor)
+{
+  auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(descriptor);
+  return &(priv->descriptor);
+}
+
+arrow::flight::Ticket *
+gaflight_ticket_get_raw(GAFlightTicket *ticket)
+{
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket);
+  return &(priv->ticket);
+}
+
+GAFlightEndpoint *
+gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint,
+                          GAFlightTicket *ticket)
+{
+  auto endpoint = GAFLIGHT_ENDPOINT(g_object_new(GAFLIGHT_TYPE_ENDPOINT,
+                                                 NULL));
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
+  if (ticket) {
+    priv->ticket = ticket;
+    g_object_ref(priv->ticket);
+    priv->endpoint.ticket = *gaflight_ticket_get_raw(priv->ticket);
+  } else {
+    auto data = g_bytes_new(flight_endpoint->ticket.ticket.data(),
+                            flight_endpoint->ticket.ticket.length());
+    auto ticket = gaflight_ticket_new(data);
+    g_bytes_unref(data);
+    priv->ticket = ticket;
+    priv->endpoint.ticket.ticket = flight_endpoint->ticket.ticket;
+  }
+  if (flight_endpoint) {
+    priv->endpoint.locations = flight_endpoint->locations;
+  }
+  return endpoint;
+}
+
+arrow::flight::FlightEndpoint *
+gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint)
+{
+  auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
+  return &(priv->endpoint);
+}
+
+GAFlightInfo *
+gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info)
+{
+  return GAFLIGHT_INFO(g_object_new(GAFLIGHT_TYPE_INFO,
+                                    "info", flight_info,
+                                    NULL));
+}
+
+arrow::flight::FlightInfo *
+gaflight_info_get_raw(GAFlightInfo *info)
+{
+  auto priv = GAFLIGHT_INFO_GET_PRIVATE(info);
+  return &(priv->info);
+}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index f5a641b1e05..77f64e06123 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -24,6 +24,22 @@
 G_BEGIN_DECLS
 
 
+#define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria,
+                         gaflight_criteria,
+                         GAFLIGHT,
+                         CRITERIA,
+                         GObject)
+struct _GAFlightCriteriaClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightCriteria *
+gaflight_criteria_new(GBytes *expression);
+
+
 #define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightLocation,
                          gaflight_location,
@@ -54,4 +70,157 @@ gaflight_location_equal(GAFlightLocation *location,
                         GAFlightLocation *other_location);
 
 
+#define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightDescriptor,
+                         gaflight_descriptor,
+                         GAFLIGHT,
+                         DESCRIPTOR,
+                         GObject)
+struct _GAFlightDescriptorClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_descriptor_to_string(GAFlightDescriptor *descriptor);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_descriptor_equal(GAFlightDescriptor *descriptor,
+                          GAFlightDescriptor *other_descriptor);
+
+
+#define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor,
+                         gaflight_path_descriptor,
+                         GAFLIGHT,
+                         PATH_DESCRIPTOR,
+                         GAFlightDescriptor)
+struct _GAFlightPathDescriptorClass
+{
+  GAFlightDescriptorClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightPathDescriptor *
+gaflight_path_descriptor_new(const gchar **paths,
+                             gsize n_paths);
+
+GARROW_AVAILABLE_IN_5_0
+gchar **
+gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor);
+
+
+#define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor,
+                         gaflight_command_descriptor,
+                         GAFLIGHT,
+                         COMMAND_DESCRIPTOR,
+                         GAFlightDescriptor)
+struct _GAFlightCommandDescriptorClass
+{
+  GAFlightDescriptorClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightCommandDescriptor *
+gaflight_command_descriptor_new(const gchar *command);
+
+GARROW_AVAILABLE_IN_5_0
+gchar *
+gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor);
+
+
+#define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightTicket,
+                         gaflight_ticket,
+                         GAFLIGHT,
+                         TICKET,
+                         GObject)
+struct _GAFlightTicketClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightTicket *
+gaflight_ticket_new(GBytes *data);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_ticket_equal(GAFlightTicket *ticket,
+                      GAFlightTicket *other_ticket);
+
+
+#define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint,
+                         gaflight_endpoint,
+                         GAFLIGHT,
+                         ENDPOINT,
+                         GObject)
+struct _GAFlightEndpointClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightEndpoint *
+gaflight_endpoint_new(GAFlightTicket *ticket,
+                      GList *locations);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_endpoint_equal(GAFlightEndpoint *endpoint,
+                        GAFlightEndpoint *other_endpoint);
+
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint);
+
+
+#define GAFLIGHT_TYPE_INFO (gaflight_info_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightInfo,
+                         gaflight_info,
+                         GAFLIGHT,
+                         INFO,
+                         GObject)
+struct _GAFlightInfoClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_5_0
+GAFlightInfo *
+gaflight_info_new(GArrowSchema *schema,
+                  GAFlightDescriptor *descriptor,
+                  GList *endpoints,
+                  gint64 total_records,
+                  gint64 total_bytes,
+                  GError **error);
+
+GARROW_AVAILABLE_IN_5_0
+gboolean
+gaflight_info_equal(GAFlightInfo *info,
+                    GAFlightInfo *other_info);
+
+GARROW_AVAILABLE_IN_5_0
+GArrowSchema *
+gaflight_info_get_schema(GAFlightInfo *info,
+                         GArrowReadOptions *options,
+                         GError **error);
+GARROW_AVAILABLE_IN_5_0
+GAFlightDescriptor *
+gaflight_info_get_descriptor(GAFlightInfo *info);
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_info_get_endpoints(GAFlightInfo *info);
+GARROW_AVAILABLE_IN_5_0
+gint64
+gaflight_info_get_total_records(GAFlightInfo *info);
+GARROW_AVAILABLE_IN_5_0
+gint64
+gaflight_info_get_total_bytes(GAFlightInfo *info);
+
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
index 2ea06d9f39f..c84bfe7d15c 100644
--- a/c_glib/arrow-flight-glib/common.hpp
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -24,5 +24,28 @@
 #include <arrow-flight-glib/common.h>
 
 
+arrow::flight::Criteria *
+gaflight_criteria_get_raw(GAFlightCriteria *criteria);
+
 arrow::flight::Location *
 gaflight_location_get_raw(GAFlightLocation *location);
+
+GAFlightDescriptor *
+gaflight_descriptor_new_raw(
+  const arrow::flight::FlightDescriptor *flight_descriptor);
+arrow::flight::FlightDescriptor *
+gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor);
+
+arrow::flight::Ticket *
+gaflight_ticket_get_raw(GAFlightTicket *ticket);
+
+GAFlightEndpoint *
+gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint,
+                          GAFlightTicket *ticket);
+arrow::flight::FlightEndpoint *
+gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint);
+
+GAFlightInfo *
+gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info);
+arrow::flight::FlightInfo *
+gaflight_info_get_raw(GAFlightInfo *info);
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
index 1d2e3a5b10a..8fad34926b4 100644
--- a/c_glib/arrow-flight-glib/server.cpp
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -17,6 +17,8 @@
  * under the License.
  */
 
+#include <arrow/util/make_unique.h>
+
 #include <arrow-glib/error.hpp>
 
 #include <arrow-flight-glib/common.hpp>
@@ -162,14 +164,108 @@ gaflight_server_options_new(GAFlightLocation *location)
 }
 
 
-typedef struct GAFlightServerPrivate_ {
-  arrow::flight::FlightServerBase server;
-} GAFlightServerPrivate;
+typedef struct GAFlightServerCallContextPrivate_ {
+  arrow::flight::ServerCallContext *call_context;
+} GAFlightServerCallContextPrivate;
 
 enum {
-  PROP_SERVER = 1,
+  PROP_CALL_CONTEXT = 1,
 };
 
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerCallContext,
+                           gaflight_server_call_context,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(obj)   \
+  static_cast<GAFlightServerCallContextPrivate *>(      \
+    gaflight_server_call_context_get_instance_private(  \
+      GAFLIGHT_SERVER_CALL_CONTEXT(obj)))
+
+static void
+gaflight_server_call_context_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CALL_CONTEXT:
+    priv->call_context =
+      static_cast<arrow::flight::ServerCallContext *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_server_call_context_init(GAFlightServerCallContext *object)
+{
+}
+
+static void
+gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = gaflight_server_call_context_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("call-context",
+                              "Call context",
+                              "The raw arrow::flight::ServerCallContext",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CALL_CONTEXT, spec);
+}
+
+
+G_END_DECLS
+namespace gaflight {
+  class Server : public arrow::flight::FlightServerBase {
+  public:
+    Server(GAFlightServer *gaserver) : gaserver_(gaserver) {
+    }
+
+    arrow::Status ListFlights(
+      const arrow::flight::ServerCallContext &context,
+      const arrow::flight::Criteria *criteria,
+      std::unique_ptr<arrow::flight::FlightListing> *listing) override {
+      GError *gerror = NULL;
+      auto gaflights = gaflight_server_list_flights(gaserver_,
+                                                    NULL,
+                                                    NULL,
+                                                    &gerror);
+      if (gerror) {
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-server][list-flights]");
+      }
+      std::vector<arrow::flight::FlightInfo> flights;
+      for (auto node = gaflights; node; node = node->next) {
+        auto gaflight = GAFLIGHT_INFO(node->data);
+        flights.push_back(*gaflight_info_get_raw(gaflight));
+        g_object_unref(gaflight);
+      }
+      g_list_free(gaflights);
+      *listing = arrow::internal::make_unique<
+        arrow::flight::SimpleFlightListing>(flights);
+      return arrow::Status::OK();
+    }
+
+  private:
+    GAFlightServer *gaserver_;
+  };
+};
+G_BEGIN_DECLS
+
+typedef struct GAFlightServerPrivate_ {
+  gaflight::Server server;
+} GAFlightServerPrivate;
+
 G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightServer,
                                     gaflight_server,
                                     G_TYPE_OBJECT)
@@ -184,7 +280,7 @@ gaflight_server_finalize(GObject *object)
 {
   auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
 
-  priv->server.~FlightServerBase();
+  priv->server.~Server();
 
   G_OBJECT_CLASS(gaflight_server_parent_class)->finalize(object);
 }
@@ -193,7 +289,7 @@ static void
 gaflight_server_init(GAFlightServer *object)
 {
   auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
-  new(&(priv->server)) arrow::flight::FlightServerBase;
+  new(&(priv->server)) gaflight::Server(object);
 }
 
 static void
@@ -263,6 +359,38 @@ gaflight_server_shutdown(GAFlightServer *server,
                        "[flight-server][shutdown]");
 }
 
+/**
+ * gaflight_server_list_flights:
+ * @server: A #GAFlightServer.
+ * @context: A #GAFlightServerCallContext.
+ * @criteria: (nullable): A #GAFlightCriteria.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Shuts down the serve. This function can be called from signal
+ * handler or another thread while gaflight_server_serve() blocks.
+ *
+ * Returns: (element-type GAFlightInfo) (transfer full):
+ *   #GList of #GAFlightInfo on success, %NULL on error.
+ *
+ * Since: 5.0.0
+ */
+GList *
+gaflight_server_list_flights(GAFlightServer *server,
+                             GAFlightServerCallContext *context,
+                             GAFlightCriteria *criteria,
+                             GError **error)
+{
+  auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
+  if (!(klass && klass->list_flights)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "not implemented");
+    return NULL;
+  }
+  return (*(klass->list_flights))(server, context, criteria, error);
+}
+
 
 G_END_DECLS
 
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
index ef477b4b1d9..f7431e8da68 100644
--- a/c_glib/arrow-flight-glib/server.h
+++ b/c_glib/arrow-flight-glib/server.h
@@ -40,6 +40,19 @@ GAFlightServerOptions *
 gaflight_server_options_new(GAFlightLocation *location);
 
 
+#define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT       \
+  (gaflight_server_call_context_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext,
+                         gaflight_server_call_context,
+                         GAFLIGHT,
+                         SERVER_CALL_CONTEXT,
+                         GObject)
+struct _GAFlightServerCallContextClass
+{
+  GObjectClass parent_class;
+};
+
+
 #define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServer,
                          gaflight_server,
@@ -49,6 +62,11 @@ G_DECLARE_DERIVABLE_TYPE(GAFlightServer,
 struct _GAFlightServerClass
 {
   GObjectClass parent_class;
+
+  GList *(*list_flights)(GAFlightServer *server,
+                         GAFlightServerCallContext *context,
+                         GAFlightCriteria *criteria,
+                         GError **error);
 };
 
 GARROW_AVAILABLE_IN_5_0
@@ -68,5 +86,11 @@ gboolean
 gaflight_server_wait(GAFlightServer *server,
                      GError **error);
 
+GARROW_AVAILABLE_IN_5_0
+GList *
+gaflight_server_list_flights(GAFlightServer *server,
+                             GAFlightServerCallContext *context,
+                             GAFlightCriteria *criteria,
+                             GError **error);
 
 G_END_DECLS
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
index 7f076f64c9d..79960e20dbd 100644
--- a/c_glib/test/flight/test-client.rb
+++ b/c_glib/test/flight/test-client.rb
@@ -19,6 +19,7 @@ class TestFlightClient < Test::Unit::TestCase
   def setup
     @server = nil
     omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    omit("Unstable on Windows") if Gem.win_platform?
     @server = Helper::FlightServer.new
     host = "127.0.0.1"
     location = ArrowFlight::Location.new("grpc://#{host}:0")
@@ -27,15 +28,15 @@ def setup
     @location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}")
   end
 
-  def shutdown
+  def teardown
     return if @server.nil?
     @server.shutdown
   end
 
-  def test_connect
-    # TODO: Add tests that use other methods and remove this.
-    assert_nothing_raised do
-      ArrowFlight::Client.new(@location)
-    end
+  def test_list_flights
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::FlightInfoGenerator.new
+    assert_equal([generator.page_view],
+                 client.list_flights)
   end
 end
diff --git a/c_glib/test/flight/test-command-descriptor.rb b/c_glib/test/flight/test-command-descriptor.rb
new file mode 100644
index 00000000000..316973287f0
--- /dev/null
+++ b/c_glib/test/flight/test-command-descriptor.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightCommandDescriptor < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_to_s
+    descriptor = ArrowFlight::CommandDescriptor.new("command")
+    assert_equal("FlightDescriptor<cmd = 'command'>",
+                 descriptor.to_s)
+  end
+
+  def test_command
+    command = "command"
+    descriptor = ArrowFlight::CommandDescriptor.new(command)
+    assert_equal(command, descriptor.command)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      descriptor1 = ArrowFlight::CommandDescriptor.new("command")
+      descriptor2 = ArrowFlight::CommandDescriptor.new("command")
+      assert do
+        descriptor1 == descriptor2
+      end
+    end
+
+    def test_false
+      descriptor1 = ArrowFlight::CommandDescriptor.new("command1")
+      descriptor2 = ArrowFlight::CommandDescriptor.new("command2")
+      assert do
+        not (descriptor1 == descriptor2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-criteria.rb b/c_glib/test/flight/test-criteria.rb
new file mode 100644
index 00000000000..d5f60a8953d
--- /dev/null
+++ b/c_glib/test/flight/test-criteria.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightCriteria < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_expression
+    expression = "expression"
+    criteria = ArrowFlight::Criteria.new(expression)
+    assert_equal(expression,
+                 criteria.expression.to_s)
+  end
+end
diff --git a/c_glib/test/flight/test-endpoint.rb b/c_glib/test/flight/test-endpoint.rb
new file mode 100644
index 00000000000..06cddf0019b
--- /dev/null
+++ b/c_glib/test/flight/test-endpoint.rb
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightEndpoint < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_ticket
+    ticket = ArrowFlight::Ticket.new("data")
+    locations = [
+       ArrowFlight::Location.new("grpc://127.0.0.1:2929"),
+       ArrowFlight::Location.new("grpc+tcp://127.0.0.1:12929"),
+    ]
+    endpoint = ArrowFlight::Endpoint.new(ticket, locations)
+    assert_equal(ticket,
+                 endpoint.ticket)
+  end
+
+  def test_locations
+    ticket = ArrowFlight::Ticket.new("data")
+    locations = [
+       ArrowFlight::Location.new("grpc://127.0.0.1:2929"),
+       ArrowFlight::Location.new("grpc+tcp://127.0.0.1:12929"),
+    ]
+    endpoint = ArrowFlight::Endpoint.new(ticket, locations)
+    assert_equal(locations,
+                 endpoint.locations)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      ticket = ArrowFlight::Ticket.new("data")
+      location = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+      endpoint1 = ArrowFlight::Endpoint.new(ticket, [location])
+      endpoint2 = ArrowFlight::Endpoint.new(ticket, [location])
+      assert do
+        endpoint1 == endpoint2
+      end
+    end
+
+    def test_false
+      ticket = ArrowFlight::Ticket.new("data")
+      location1 = ArrowFlight::Location.new("grpc://127.0.0.1:2929")
+      location2 = ArrowFlight::Location.new("grpc://127.0.0.1:1129")
+      endpoint1 = ArrowFlight::Endpoint.new(ticket, [location1])
+      endpoint2 = ArrowFlight::Endpoint.new(ticket, [location2])
+      assert do
+        not (endpoint1 == endpoint2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-info.rb b/c_glib/test/flight/test-info.rb
new file mode 100644
index 00000000000..5bf0fbfad88
--- /dev/null
+++ b/c_glib/test/flight/test-info.rb
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightInfo < Test::Unit::TestCase
+  include Helper::Writable
+
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    @generator = Helper::FlightInfoGenerator.new
+  end
+
+  sub_test_case("#get_schema") do
+    def test_with_options
+      info = @generator.page_view
+      table = @generator.page_view_table
+      options = Arrow::ReadOptions.new
+      assert_equal(table.schema,
+                   info.get_schema(options))
+    end
+
+    def test_without_options
+      info = @generator.page_view
+      table = @generator.page_view_table
+      assert_equal(table.schema,
+                   info.get_schema)
+    end
+  end
+
+  def test_descriptor
+    info = @generator.page_view
+    assert_equal(@generator.page_view_descriptor,
+                 info.descriptor)
+  end
+
+  def test_endpoints
+    info = @generator.page_view
+    assert_equal(@generator.page_view_endpoints,
+                 info.endpoints)
+  end
+
+  def test_total_records
+    info = @generator.page_view
+    table = @generator.page_view_table
+    assert_equal(table.n_rows,
+                 info.total_records)
+  end
+
+  def test_total_bytes
+    info = @generator.page_view
+    table = @generator.page_view_table
+    output = Arrow::ResizableBuffer.new(0)
+    write_table(table, output, type: :stream)
+    assert_equal(output.size,
+                 info.total_bytes)
+  end
+
+  def test_equal
+    info1 = @generator.page_view
+    info2 = @generator.page_view
+    assert do
+      info1 == info2
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-path-descriptor.rb b/c_glib/test/flight/test-path-descriptor.rb
new file mode 100644
index 00000000000..441fc7bb043
--- /dev/null
+++ b/c_glib/test/flight/test-path-descriptor.rb
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightPathDescriptor < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_to_s
+    descriptor = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+    assert_equal("FlightDescriptor<path = 'a/b/c'>",
+                 descriptor.to_s)
+  end
+
+  def test_paths
+    paths = ["a", "b", "c"]
+    descriptor = ArrowFlight::PathDescriptor.new(paths)
+    assert_equal(paths, descriptor.paths)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      descriptor1 = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+      descriptor2 = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+      assert do
+        descriptor1 == descriptor2
+      end
+    end
+
+    def test_false
+      descriptor1 = ArrowFlight::PathDescriptor.new(["a", "b", "c"])
+      descriptor2 = ArrowFlight::PathDescriptor.new(["A", "B", "C"])
+      assert do
+        not (descriptor1 == descriptor2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/flight/test-ticket.rb b/c_glib/test/flight/test-ticket.rb
new file mode 100644
index 00000000000..976089762f0
--- /dev/null
+++ b/c_glib/test/flight/test-ticket.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightTicket < Test::Unit::TestCase
+  def setup
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+  end
+
+  def test_data
+    data = "data"
+    ticket = ArrowFlight::Ticket.new(data)
+    assert_equal(data,
+                 ticket.data.to_s)
+  end
+
+  sub_test_case("#==") do
+    def test_true
+      ticket1 = ArrowFlight::Ticket.new("data")
+      ticket2 = ArrowFlight::Ticket.new("data")
+      assert do
+        ticket1 == ticket2
+      end
+    end
+
+    def test_false
+      ticket1 = ArrowFlight::Ticket.new("data1")
+      ticket2 = ArrowFlight::Ticket.new("data2")
+      assert do
+        not (ticket1 == ticket2)
+      end
+    end
+  end
+end
diff --git a/c_glib/test/helper/data-type.rb b/c_glib/test/helper/data-type.rb
index b8224409873..bbe6866f5b9 100644
--- a/c_glib/test/helper/data-type.rb
+++ b/c_glib/test/helper/data-type.rb
@@ -52,6 +52,22 @@ def int64_data_type
       Arrow::Int64DataType.new
     end
 
+    def uint8_data_type
+      Arrow::UInt8DataType.new
+    end
+
+    def uint16_data_type
+      Arrow::UInt16DataType.new
+    end
+
+    def uint32_data_type
+      Arrow::UInt32DataType.new
+    end
+
+    def uint64_data_type
+      Arrow::UInt64DataType.new
+    end
+
     def string_data_type
       Arrow::StringDataType.new
     end
diff --git a/c_glib/test/helper/flight-info-generator.rb b/c_glib/test/helper/flight-info-generator.rb
new file mode 100644
index 00000000000..ad48eda2b6f
--- /dev/null
+++ b/c_glib/test/helper/flight-info-generator.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require_relative "buildable"
+require_relative "data-type"
+require_relative "writable"
+
+module Helper
+  class FlightInfoGenerator
+    include Buildable
+    include DataType
+    include Writable
+
+    def page_view_table
+      build_table("count" => build_uint64_array([1, 2, 3]),
+                  "private" => build_boolean_array([true, false, true]))
+    end
+
+    def page_view_descriptor
+      ArrowFlight::PathDescriptor.new(["page-view"])
+    end
+
+    def page_view_endpoints
+      locations = [
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"),
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"),
+      ]
+      [
+        ArrowFlight::Endpoint.new(ArrowFlight::Ticket.new("page-view"),
+                                  locations),
+      ]
+    end
+
+    def page_view
+      table = page_view_table
+      descriptor = page_view_descriptor
+      endpoints = page_view_endpoints
+      output = Arrow::ResizableBuffer.new(0)
+      write_table(table, output, type: :stream)
+      ArrowFlight::Info.new(table.schema,
+                            descriptor,
+                            endpoints,
+                            table.n_rows,
+                            output.size)
+    end
+  end
+end
diff --git a/c_glib/test/helper/flight-server.rb b/c_glib/test/helper/flight-server.rb
index e1bafb10846..a6bcd9ec233 100644
--- a/c_glib/test/helper/flight-server.rb
+++ b/c_glib/test/helper/flight-server.rb
@@ -15,8 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
+require_relative "flight-info-generator"
+
 module Helper
   class FlightServer < ArrowFlight::Server
     type_register
+
+    private
+    def virtual_do_list_flights(context, criteria)
+      generator = FlightInfoGenerator.new
+      [generator.page_view]
+    end
   end
 end
diff --git a/c_glib/test/helper/writable.rb b/c_glib/test/helper/writable.rb
index 0053e972f91..1c8db756c38 100644
--- a/c_glib/test/helper/writable.rb
+++ b/c_glib/test/helper/writable.rb
@@ -17,22 +17,26 @@
 
 module Helper
   module Writable
-    def write_table(table, path, type: :file)
-      output = Arrow::FileOutputStream.new(path, false)
+    def write_table(table, output, type: :file)
+      if output.is_a?(Arrow::Buffer)
+        output_stream = Arrow::BufferOutputStream.new(output)
+      else
+        output_stream = Arrow::FileOutputStream.new(output, false)
+      end
       begin
         if type == :file
           writer_class = Arrow::RecordBatchFileWriter
         else
           writer_class = Arrow::RecordBatchStreamWriter
         end
-        writer = writer_class.new(output, table.schema)
+        writer = writer_class.new(output_stream, table.schema)
         begin
           writer.write_table(table)
         ensure
           writer.close
         end
       ensure
-        output.close
+        output_stream.close
       end
     end
   end
diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb
index 9c6af05224e..abae4e722c5 100755
--- a/c_glib/test/run-test.rb
+++ b/c_glib/test/run-test.rb
@@ -56,7 +56,15 @@ class BooleanScalar
 end
 
 begin
-  ArrowFlight = GI.load("ArrowFlight")
+  class ArrowFlightLoader < GI::Loader
+    def should_unlock_gvl?(info, klass)
+      true
+    end
+  end
+  flight_module = Module.new
+  ArrowFlightLoader.load("ArrowFlight", flight_module)
+  ArrowFlight = flight_module
+  GObjectIntrospection::Loader.start_callback_dispatch_thread
 rescue GObjectIntrospection::RepositoryError::TypelibNotFound
 end
 
@@ -84,6 +92,7 @@ class BooleanScalar
 require_relative "helper/data-type"
 require_relative "helper/fixture"
 if defined?(ArrowFlight)
+  require_relative "helper/flight-info-generator"
   require_relative "helper/flight-server"
 end
 require_relative "helper/omittable"
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/loader.rb b/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
index c89ff994784..2e8878d696e 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/loader.rb
@@ -26,6 +26,7 @@ def load
     private
     def post_load(repository, namespace)
       require_libraries
+      self.class.start_callback_dispatch_thread
     end
 
     def require_libraries
@@ -33,6 +34,11 @@ def require_libraries
       require "arrow-flight/client-options"
       require "arrow-flight/location"
       require "arrow-flight/server-options"
+      require "arrow-flight/ticket"
+    end
+
+    def should_unlock_gvl?(info, klass)
+      true
     end
   end
 end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/ticket.rb b/ruby/red-arrow-flight/lib/arrow-flight/ticket.rb
new file mode 100644
index 00000000000..92afad386e3
--- /dev/null
+++ b/ruby/red-arrow-flight/lib/arrow-flight/ticket.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module ArrowFlight
+  class Ticket
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when String
+          new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/test/helper/info-generator.rb b/ruby/red-arrow-flight/test/helper/info-generator.rb
new file mode 100644
index 00000000000..ef931ebbab4
--- /dev/null
+++ b/ruby/red-arrow-flight/test/helper/info-generator.rb
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+module Helper
+  class InfoGenerator
+    def page_view_table
+      Arrow::Table.new("count" => Arrow::UInt64Array.new([1, 2, 3]),
+                       "private" => Arrow::BooleanArray.new([true, false, true]))
+    end
+
+    def page_view_descriptor
+      ArrowFlight::PathDescriptor.new(["page-view"])
+    end
+
+    def page_view_endpoints
+      locations = [
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"),
+        ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"),
+      ]
+      [
+        ArrowFlight::Endpoint.new("page-view", locations),
+      ]
+    end
+
+    def page_view
+      table = page_view_table
+      descriptor = page_view_descriptor
+      endpoints = page_view_endpoints
+      output = Arrow::ResizableBuffer.new(0)
+      table.save(output, format: :stream)
+      ArrowFlight::Info.new(table.schema,
+                            descriptor,
+                            endpoints,
+                            table.n_rows,
+                            output.size)
+    end
+  end
+end
diff --git a/ruby/red-arrow-flight/test/helper/server.rb b/ruby/red-arrow-flight/test/helper/server.rb
index 461fc92d12c..a884a527b48 100644
--- a/ruby/red-arrow-flight/test/helper/server.rb
+++ b/ruby/red-arrow-flight/test/helper/server.rb
@@ -15,8 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
+require_relative "info-generator"
+
 module Helper
   class Server < ArrowFlight::Server
     type_register
+
+    private
+    def virtual_do_list_flights(context, criteria)
+      generator = InfoGenerator.new
+      [generator.page_view]
+    end
   end
 end
diff --git a/ruby/red-arrow-flight/test/test-client.rb b/ruby/red-arrow-flight/test/test-client.rb
index 0514c46a3c1..50d5d57a63f 100644
--- a/ruby/red-arrow-flight/test/test-client.rb
+++ b/ruby/red-arrow-flight/test/test-client.rb
@@ -17,19 +17,22 @@
 
 class TestClient < Test::Unit::TestCase
   def setup
+    @server = nil
+    omit("Unstable on Windows") if Gem.win_platform?
     @server = Helper::Server.new
     @server.listen("grpc://127.0.0.1:0")
     @location = "grpc://127.0.0.1:#{@server.port}"
   end
 
-  def shutdown
-    @server.shutdow
+  def teardown
+    return if @server.nil?
+    @server.shutdown
   end
 
-  def test_connect
-    # TODO: Add tests that use other methods and remove this.
-    assert_nothing_raised do
-      ArrowFlight::Client.new(@location)
-    end
+  def test_list_flights
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    assert_equal([generator.page_view],
+                 client.list_flights)
   end
 end
diff --git a/ruby/red-arrow-flight/test/test-ticket.rb b/ruby/red-arrow-flight/test/test-ticket.rb
new file mode 100644
index 00000000000..d8668be74ad
--- /dev/null
+++ b/ruby/red-arrow-flight/test/test-ticket.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTicket < Test::Unit::TestCase
+  sub_test_case(".try_convert") do
+    def test_string
+      ticket = ArrowFlight::Ticket.try_convert("data")
+      assert_equal("data",
+                   ticket.data.to_s)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/table.rb b/ruby/red-arrow/lib/arrow/table.rb
index 98789d6d63a..e9bf3221304 100644
--- a/ruby/red-arrow/lib/arrow/table.rb
+++ b/ruby/red-arrow/lib/arrow/table.rb
@@ -442,8 +442,8 @@ def window(size: nil)
       RollingWindow.new(self, size)
     end
 
-    def save(path, options={})
-      saver = TableSaver.new(self, path, options)
+    def save(output, options={})
+      saver = TableSaver.new(self, output, options)
       saver.save
     end
 
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index e5a602a0862..562e474cda3 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -48,7 +48,7 @@ Gem::Specification.new do |spec|
 
   spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
   spec.add_runtime_dependency("extpp", ">= 0.0.7")
-  spec.add_runtime_dependency("gio2", ">= 3.3.6")
+  spec.add_runtime_dependency("gio2", ">= 3.4.5")
   spec.add_runtime_dependency("native-package-installer")
   spec.add_runtime_dependency("pkg-config")
 

From 21990c7d03f4910ade16be5469aaf19d3107e0b8 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 8 Jul 2021 14:47:37 +0900
Subject: [PATCH 527/719] ARROW-13286: [CI] Require docker-compose 1.27.0 or
 later

We need it for "extends".

See also:

  * https://issues.apache.org/jira/browse/ARROW-13199
  * https://github.com/apache/arrow/pull/10611
  * https://github.com/docker/compose/pull/7588

Closes #10681 from kou/require-docker-compose-1.27.0-or-later

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4cd546a29e7..6a279a2f87b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -151,7 +151,7 @@ before_install:
 
 install:
   - sudo -H pip3 install --upgrade pip
-  - sudo -H pip3 install docker-compose
+  - sudo -H pip3 install 'docker-compose>=1.27.0'
   - sudo -H pip3 install -e dev/archery[docker]
 
 script:

From e4dc71ac966997a5d8a0fbd2cf83ceb3e9a5db51 Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Thu, 8 Jul 2021 06:32:37 +0000
Subject: [PATCH 528/719] ARROW-13282: [C++] Remove obsolete generated files

This PR removes obsolete generated files from src/arrow/util/, namely bpacking_avx2_generated.h and bpacking_avx512_generated.h.

Closes #10682 from edponce/ARROW-13282-Remove-obsolete-generated-files

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/util/bpacking_avx2_generated.h  | 1819 -----------------
 .../arrow/util/bpacking_avx512_generated.h    | 1509 --------------
 2 files changed, 3328 deletions(-)
 delete mode 100644 cpp/src/arrow/util/bpacking_avx2_generated.h
 delete mode 100644 cpp/src/arrow/util/bpacking_avx512_generated.h

diff --git a/cpp/src/arrow/util/bpacking_avx2_generated.h b/cpp/src/arrow/util/bpacking_avx2_generated.h
deleted file mode 100644
index 2240143b16d..00000000000
--- a/cpp/src/arrow/util/bpacking_avx2_generated.h
+++ /dev/null
@@ -1,1819 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// Automatically generated file; DO NOT EDIT.
-
-#pragma once
-
-#include <stdint.h>
-#include <string.h>
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#else
-#include <immintrin.h>
-#endif
-
-#include "arrow/util/ubsan.h"
-
-namespace arrow {
-namespace internal {
-
-inline static const uint32_t* unpack0_32_avx2(const uint32_t* in, uint32_t* out) {
-  memset(out, 0x0, 32 * sizeof(*out));
-  out += 32;
-
-  return in;
-}
-
-inline static const uint32_t* unpack1_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(7, 6, 5, 4,
-                                3, 2, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(15, 14, 13, 12,
-                                11, 10, 9, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(23, 22, 21, 20,
-                                19, 18, 17, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(31, 30, 29, 28,
-                                27, 26, 25, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 1;
-
-  return in;
-}
-
-inline static const uint32_t* unpack2_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 2;
-
-  return in;
-}
-
-inline static const uint32_t* unpack3_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(21, 18, 15, 12,
-                                9, 6, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(13, 10, 7, 4,
-                                1, 0, 27, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(5, 2, 0, 28,
-                                25, 22, 19, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(29, 26, 23, 20,
-                                17, 14, 11, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 3;
-
-  return in;
-}
-
-inline static const uint32_t* unpack4_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xf;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 4;
-
-  return in;
-}
-
-inline static const uint32_t* unpack5_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1f;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 25, 20,
-                                15, 10, 5, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(11, 6, 1, 0,
-                                23, 18, 13, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(19, 14, 9, 4,
-                                0, 26, 21, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(27, 22, 17, 12,
-                                7, 2, 0, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 5;
-
-  return in;
-}
-
-inline static const uint32_t* unpack6_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3f;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 6;
-
-  return in;
-}
-
-inline static const uint32_t* unpack7_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7f;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(17, 10, 3, 0,
-                                21, 14, 7, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(9, 2, 0, 20,
-                                13, 6, 0, 24);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 19, 12,
-                                5, 0, 23, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(25, 18, 11, 4,
-                                0, 22, 15, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 7;
-
-  return in;
-}
-
-inline static const uint32_t* unpack8_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 8;
-
-  return in;
-}
-
-inline static const uint32_t* unpack9_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 22, 13, 4,
-                                0, 18, 9, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(7, 0, 21, 12,
-                                3, 0, 17, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(15, 6, 0, 20,
-                                11, 2, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(23, 14, 5, 0,
-                                19, 10, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 9;
-
-  return in;
-}
-
-inline static const uint32_t* unpack10_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6, SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 10;
-
-  return in;
-}
-
-inline static const uint32_t* unpack11_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(13, 2, 0, 12,
-                                1, 0, 11, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 23 | SafeLoad(in + 2) << 9, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(5, 0, 15, 4,
-                                0, 14, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 25 | SafeLoad(in + 4) << 7, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 18, 7, 0,
-                                17, 6, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(21, 10, 0, 20,
-                                9, 0, 19, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 11;
-
-  return in;
-}
-
-inline static const uint32_t* unpack12_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4, SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 9), SafeLoad(in + 9));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 12;
-
-  return in;
-}
-
-inline static const uint32_t* unpack13_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 14, 1, 0,
-                                7, 0, 13, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 9, 0,
-                                15, 2, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 21 | SafeLoad(in + 4) << 11, SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(11, 0, 17, 4,
-                                0, 10, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(19, 6, 0, 12,
-                                0, 18, 5, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12), SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 13;
-
-  return in;
-}
-
-inline static const uint32_t* unpack14_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2, SafeLoad(in + 10));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 14;
-
-  return in;
-}
-
-inline static const uint32_t* unpack15_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(9, 0, 11, 0,
-                                13, 0, 15, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 3, 0,
-                                5, 0, 7, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 10, 0, 12,
-                                0, 14, 0, 16);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(17, 2, 0, 4,
-                                0, 6, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 11));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 15;
-
-  return in;
-}
-
-inline static const uint32_t* unpack16_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 8));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 15), SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12), SafeLoad(in + 12));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 16;
-
-  return in;
-}
-
-inline static const uint32_t* unpack17_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 6, 0, 4,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3) >> 23 | SafeLoad(in + 4) << 9, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 21 | SafeLoad(in + 3) << 11, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 19 | SafeLoad(in + 2) << 13, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 17 | SafeLoad(in + 1) << 15, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 14, 0, 12,
-                                0, 10, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(7, 0, 5, 0,
-                                3, 0, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12), SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 16 | SafeLoad(in + 9) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(15, 0, 13, 0,
-                                11, 0, 9, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 16), SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 17;
-
-  return in;
-}
-
-inline static const uint32_t* unpack18_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 18 | SafeLoad(in + 1) << 14, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14, SafeLoad(in + 9));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 17), SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 18;
-
-  return in;
-}
-
-inline static const uint32_t* unpack19_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(5, 0, 0, 12,
-                                0, 6, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 18 | SafeLoad(in + 4) << 14,
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 25 | SafeLoad(in + 2) << 7, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 19 | SafeLoad(in + 1) << 13, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 10, 0, 4,
-                                0, 0, 11, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 17 | SafeLoad(in + 7) << 15, SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 24 | SafeLoad(in + 5) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 2, 0, 0,
-                                9, 0, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 16 | SafeLoad(in + 10) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(13, 0, 7, 0,
-                                1, 0, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 18), SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 19;
-
-  return in;
-}
-
-inline static const uint32_t* unpack20_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 16 | SafeLoad(in + 3) << 16,
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 20 | SafeLoad(in + 1) << 12, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12, SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14), SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16,
-                              SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12, SafeLoad(in + 10));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 19), SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 18), SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4, SafeLoad(in + 16),
-                              SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12, SafeLoad(in + 15));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 20;
-
-  return in;
-}
-
-inline static const uint32_t* unpack21_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 9, 0,
-                                0, 10, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4) >> 19 | SafeLoad(in + 5) << 13, SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 21 | SafeLoad(in + 1) << 11, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 6, 0, 0,
-                                7, 0, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 17 | SafeLoad(in + 9) << 15, SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 0, 4,
-                                0, 0, 5, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 15), SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18,
-                              SafeLoad(in + 13) >> 25 | SafeLoad(in + 14) << 7, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(11, 0, 1, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 20), SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 21;
-
-  return in;
-}
-
-inline static const uint32_t* unpack22_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 14 | SafeLoad(in + 4) << 18, SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 12 | SafeLoad(in + 2) << 20,
-                              SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 18 | SafeLoad(in + 8) << 14, SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 16 | SafeLoad(in + 6) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6, SafeLoad(in + 15),
-                              SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18, SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 12 | SafeLoad(in + 13) << 20,
-                              SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10, SafeLoad(in + 11));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 21), SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2, SafeLoad(in + 19),
-                              SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 22;
-
-  return in;
-}
-
-inline static const uint32_t* unpack23_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 0, 0,
-                                5, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 10 | SafeLoad(in + 5) << 22,
-                              SafeLoad(in + 3) >> 19 | SafeLoad(in + 4) << 13, SafeLoad(in + 2) >> 28 | SafeLoad(in + 3) << 4,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 14 | SafeLoad(in + 2) << 18,
-                              SafeLoad(in + 0) >> 23 | SafeLoad(in + 1) << 9, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 2, 0, 0,
-                                0, 6, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 11 | SafeLoad(in + 10) << 21, SafeLoad(in + 8) >> 20 | SafeLoad(in + 9) << 12,
-                              SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 15 | SafeLoad(in + 7) << 17, SafeLoad(in + 5) >> 24 | SafeLoad(in + 6) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 3, 0,
-                                0, 0, 7, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 16) >> 17 | SafeLoad(in + 17) << 15, SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 12 | SafeLoad(in + 15) << 20,
-                              SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2,
-                              SafeLoad(in + 12), SafeLoad(in + 11) >> 16 | SafeLoad(in + 12) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(9, 0, 0, 4,
-                                0, 0, 0, 8);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 22), SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14,
-                              SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 20),
-                              SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10,
-                              SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 23;
-
-  return in;
-}
-
-inline static const uint32_t* unpack24_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16,
-                              SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8, SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 16 | SafeLoad(in + 2) << 16,
-                              SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8, SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8, SafeLoad(in + 6));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16,
-                              SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8, SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16,
-                              SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8, SafeLoad(in + 12));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 23), SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16,
-                              SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8, SafeLoad(in + 21),
-                              SafeLoad(in + 20), SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16,
-                              SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8, SafeLoad(in + 18));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 24;
-
-  return in;
-}
-
-inline static const uint32_t* unpack25_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 4,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5) >> 15 | SafeLoad(in + 6) << 17, SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 11 | SafeLoad(in + 3) << 21, SafeLoad(in + 1) >> 18 | SafeLoad(in + 2) << 14,
-                              SafeLoad(in + 0) >> 25 | SafeLoad(in + 1) << 7, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 5, 0,
-                                0, 0, 1, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 19 | SafeLoad(in + 9) << 13, SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 8 | SafeLoad(in + 7) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 6, 0, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 9 | SafeLoad(in + 14) << 23, SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(7, 0, 0, 0,
-                                3, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 24), SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22,
-                              SafeLoad(in + 19) >> 17 | SafeLoad(in + 20) << 15, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 25;
-
-  return in;
-}
-
-inline static const uint32_t* unpack26_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 8 | SafeLoad(in + 4) << 24,
-                              SafeLoad(in + 2) >> 14 | SafeLoad(in + 3) << 18, SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12), SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20,
-                              SafeLoad(in + 10) >> 18 | SafeLoad(in + 11) << 14, SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 10 | SafeLoad(in + 8) << 22, SafeLoad(in + 6) >> 16 | SafeLoad(in + 7) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 8 | SafeLoad(in + 17) << 24,
-                              SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18, SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6, SafeLoad(in + 13));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 25), SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2, SafeLoad(in + 21),
-                              SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22, SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 26;
-
-  return in;
-}
-
-inline static const uint32_t* unpack27_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 2, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 7 | SafeLoad(in + 5) << 25, SafeLoad(in + 3) >> 12 | SafeLoad(in + 4) << 20,
-                              SafeLoad(in + 2) >> 17 | SafeLoad(in + 3) << 15, SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10,
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 4,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 9 | SafeLoad(in + 10) << 23, SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18,
-                              SafeLoad(in + 7) >> 19 | SafeLoad(in + 8) << 13, SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                1, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 6 | SafeLoad(in + 16) << 26,
-                              SafeLoad(in + 14) >> 11 | SafeLoad(in + 15) << 21, SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(5, 0, 0, 0,
-                                0, 0, 3, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 26), SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12,
-                              SafeLoad(in + 22) >> 25 | SafeLoad(in + 23) << 7, SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 8 | SafeLoad(in + 21) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 27;
-
-  return in;
-}
-
-inline static const uint32_t* unpack28_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6), SafeLoad(in + 5) >> 8 | SafeLoad(in + 6) << 24,
-                              SafeLoad(in + 4) >> 12 | SafeLoad(in + 5) << 20, SafeLoad(in + 3) >> 16 | SafeLoad(in + 4) << 16,
-                              SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12, SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13), SafeLoad(in + 12) >> 8 | SafeLoad(in + 13) << 24,
-                              SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20, SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12, SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 20), SafeLoad(in + 19) >> 8 | SafeLoad(in + 20) << 24,
-                              SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20, SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4, SafeLoad(in + 14));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 27), SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20, SafeLoad(in + 24) >> 16 | SafeLoad(in + 25) << 16,
-                              SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4, SafeLoad(in + 21));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 28;
-
-  return in;
-}
-
-inline static const uint32_t* unpack29_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6) >> 11 | SafeLoad(in + 7) << 21, SafeLoad(in + 5) >> 14 | SafeLoad(in + 6) << 18,
-                              SafeLoad(in + 4) >> 17 | SafeLoad(in + 5) << 15, SafeLoad(in + 3) >> 20 | SafeLoad(in + 4) << 12,
-                              SafeLoad(in + 2) >> 23 | SafeLoad(in + 3) << 9, SafeLoad(in + 1) >> 26 | SafeLoad(in + 2) << 6,
-                              SafeLoad(in + 0) >> 29 | SafeLoad(in + 1) << 3, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10,
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4,
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 5 | SafeLoad(in + 9) << 27, SafeLoad(in + 7) >> 8 | SafeLoad(in + 8) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 1, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 4 | SafeLoad(in + 19) << 28,
-                              SafeLoad(in + 17) >> 7 | SafeLoad(in + 18) << 25, SafeLoad(in + 16) >> 10 | SafeLoad(in + 17) << 22,
-                              SafeLoad(in + 15) >> 13 | SafeLoad(in + 16) << 19, SafeLoad(in + 14) >> 16 | SafeLoad(in + 15) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(3, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 28), SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26,
-                              SafeLoad(in + 26) >> 9 | SafeLoad(in + 27) << 23, SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 29;
-
-  return in;
-}
-
-inline static const uint32_t* unpack30_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14, SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10, SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14), SafeLoad(in + 13) >> 4 | SafeLoad(in + 14) << 28,
-                              SafeLoad(in + 12) >> 6 | SafeLoad(in + 13) << 26, SafeLoad(in + 11) >> 8 | SafeLoad(in + 12) << 24,
-                              SafeLoad(in + 10) >> 10 | SafeLoad(in + 11) << 22, SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18, SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14, SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2, SafeLoad(in + 15));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 29), SafeLoad(in + 28) >> 4 | SafeLoad(in + 29) << 28,
-                              SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26, SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22, SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18, SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 30;
-
-  return in;
-}
-
-inline static const uint32_t* unpack31_32_avx2(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffffff;
-  __m256i reg_shifts, reg_inls, reg_masks;
-  __m256i results;
-
-  reg_masks = _mm256_set1_epi32(mask);
-
-  // shift the first 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 6) >> 25 | SafeLoad(in + 7) << 7, SafeLoad(in + 5) >> 26 | SafeLoad(in + 6) << 6,
-                              SafeLoad(in + 4) >> 27 | SafeLoad(in + 5) << 5, SafeLoad(in + 3) >> 28 | SafeLoad(in + 4) << 4,
-                              SafeLoad(in + 2) >> 29 | SafeLoad(in + 3) << 3, SafeLoad(in + 1) >> 30 | SafeLoad(in + 2) << 2,
-                              SafeLoad(in + 0) >> 31 | SafeLoad(in + 1) << 1, SafeLoad(in + 0));
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the second 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 14) >> 17 | SafeLoad(in + 15) << 15, SafeLoad(in + 13) >> 18 | SafeLoad(in + 14) << 14,
-                              SafeLoad(in + 12) >> 19 | SafeLoad(in + 13) << 13, SafeLoad(in + 11) >> 20 | SafeLoad(in + 12) << 12,
-                              SafeLoad(in + 10) >> 21 | SafeLoad(in + 11) << 11, SafeLoad(in + 9) >> 22 | SafeLoad(in + 10) << 10,
-                              SafeLoad(in + 8) >> 23 | SafeLoad(in + 9) << 9, SafeLoad(in + 7) >> 24 | SafeLoad(in + 8) << 8);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the third 8 outs
-  reg_shifts = _mm256_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 22) >> 9 | SafeLoad(in + 23) << 23, SafeLoad(in + 21) >> 10 | SafeLoad(in + 22) << 22,
-                              SafeLoad(in + 20) >> 11 | SafeLoad(in + 21) << 21, SafeLoad(in + 19) >> 12 | SafeLoad(in + 20) << 20,
-                              SafeLoad(in + 18) >> 13 | SafeLoad(in + 19) << 19, SafeLoad(in + 17) >> 14 | SafeLoad(in + 18) << 18,
-                              SafeLoad(in + 16) >> 15 | SafeLoad(in + 17) << 17, SafeLoad(in + 15) >> 16 | SafeLoad(in + 16) << 16);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  // shift the last 8 outs
-  reg_shifts = _mm256_set_epi32(1, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm256_set_epi32(SafeLoad(in + 30), SafeLoad(in + 29) >> 2 | SafeLoad(in + 30) << 30,
-                              SafeLoad(in + 28) >> 3 | SafeLoad(in + 29) << 29, SafeLoad(in + 27) >> 4 | SafeLoad(in + 28) << 28,
-                              SafeLoad(in + 26) >> 5 | SafeLoad(in + 27) << 27, SafeLoad(in + 25) >> 6 | SafeLoad(in + 26) << 26,
-                              SafeLoad(in + 24) >> 7 | SafeLoad(in + 25) << 25, SafeLoad(in + 23) >> 8 | SafeLoad(in + 24) << 24);
-  results = _mm256_and_si256(_mm256_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), results);
-  out += 8;
-
-  in += 31;
-
-  return in;
-}
-
-inline static const uint32_t* unpack32_32_avx2(const uint32_t* in, uint32_t* out) {
-  memcpy(out, in, 32 * sizeof(*out));
-  in += 32;
-  out += 32;
-
-  return in;
-}
-
-}  // namespace internal
-}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking_avx512_generated.h b/cpp/src/arrow/util/bpacking_avx512_generated.h
deleted file mode 100644
index fd5db6ecce5..00000000000
--- a/cpp/src/arrow/util/bpacking_avx512_generated.h
+++ /dev/null
@@ -1,1509 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// Automatically generated file; DO NOT EDIT.
-
-#pragma once
-
-#include <stdint.h>
-#include <string.h>
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#else
-#include <immintrin.h>
-#endif
-
-#include "arrow/util/ubsan.h"
-
-namespace arrow {
-namespace internal {
-
-inline static const uint32_t* unpack0_32_avx512(const uint32_t* in, uint32_t* out) {
-  memset(out, 0x0, 32 * sizeof(*out));
-  out += 32;
-
-  return in;
-}
-
-inline static const uint32_t* unpack1_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(15, 14, 13, 12,
-                                11, 10, 9, 8,
-                                7, 6, 5, 4,
-                                3, 2, 1, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(31, 30, 29, 28,
-                                27, 26, 25, 24,
-                                23, 22, 21, 20,
-                                19, 18, 17, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 1;
-
-  return in;
-}
-
-inline static const uint32_t* unpack2_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16,
-                                14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(30, 28, 26, 24,
-                                22, 20, 18, 16,
-                                14, 12, 10, 8,
-                                6, 4, 2, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 2;
-
-  return in;
-}
-
-inline static const uint32_t* unpack3_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(13, 10, 7, 4,
-                                1, 0, 27, 24,
-                                21, 18, 15, 12,
-                                9, 6, 3, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(29, 26, 23, 20,
-                                17, 14, 11, 8,
-                                5, 2, 0, 28,
-                                25, 22, 19, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 3;
-
-  return in;
-}
-
-inline static const uint32_t* unpack4_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xf;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0,
-                                28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(28, 24, 20, 16,
-                                12, 8, 4, 0,
-                                28, 24, 20, 16,
-                                12, 8, 4, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 4;
-
-  return in;
-}
-
-inline static const uint32_t* unpack5_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1f;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(11, 6, 1, 0,
-                                23, 18, 13, 8,
-                                3, 0, 25, 20,
-                                15, 10, 5, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(27, 22, 17, 12,
-                                7, 2, 0, 24,
-                                19, 14, 9, 4,
-                                0, 26, 21, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 5;
-
-  return in;
-}
-
-inline static const uint32_t* unpack6_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3f;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16,
-                                10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(26, 20, 14, 8,
-                                2, 0, 22, 16,
-                                10, 4, 0, 24,
-                                18, 12, 6, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 6;
-
-  return in;
-}
-
-inline static const uint32_t* unpack7_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7f;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(9, 2, 0, 20,
-                                13, 6, 0, 24,
-                                17, 10, 3, 0,
-                                21, 14, 7, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(25, 18, 11, 4,
-                                0, 22, 15, 8,
-                                1, 0, 19, 12,
-                                5, 0, 23, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 7;
-
-  return in;
-}
-
-inline static const uint32_t* unpack8_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0,
-                                24, 16, 8, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 4));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 8;
-
-  return in;
-}
-
-inline static const uint32_t* unpack9_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(7, 0, 21, 12,
-                                3, 0, 17, 8,
-                                0, 22, 13, 4,
-                                0, 18, 9, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 4), SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(23, 14, 5, 0,
-                                19, 10, 1, 0,
-                                15, 6, 0, 20,
-                                11, 2, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 9;
-
-  return in;
-}
-
-inline static const uint32_t* unpack10_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16,
-                                6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(22, 12, 2, 0,
-                                14, 4, 0, 16,
-                                6, 0, 18, 8,
-                                0, 20, 10, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2, SafeLoad(in + 5),
-                              SafeLoad(in + 5), SafeLoad(in + 5));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 10;
-
-  return in;
-}
-
-inline static const uint32_t* unpack11_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(5, 0, 15, 4,
-                                0, 14, 3, 0,
-                                13, 2, 0, 12,
-                                1, 0, 11, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 5), SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 25 | SafeLoad(in + 4) << 7, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8,
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 23 | SafeLoad(in + 2) << 9, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(21, 10, 0, 20,
-                                9, 0, 19, 8,
-                                0, 18, 7, 0,
-                                17, 6, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 11;
-
-  return in;
-}
-
-inline static const uint32_t* unpack12_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0,
-                                20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4, SafeLoad(in + 4),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(20, 8, 0, 16,
-                                4, 0, 12, 0,
-                                20, 8, 0, 16,
-                                4, 0, 12, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7),
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 6));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 12;
-
-  return in;
-}
-
-inline static const uint32_t* unpack13_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(3, 0, 9, 0,
-                                15, 2, 0, 8,
-                                0, 14, 1, 0,
-                                7, 0, 13, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 6), SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 21 | SafeLoad(in + 4) << 11, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 27 | SafeLoad(in + 3) << 5, SafeLoad(in + 2),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(19, 6, 0, 12,
-                                0, 18, 5, 0,
-                                11, 0, 17, 4,
-                                0, 10, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 12), SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2,
-                              SafeLoad(in + 8), SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 13;
-
-  return in;
-}
-
-inline static const uint32_t* unpack14_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16,
-                                2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(18, 4, 0, 8,
-                                0, 12, 0, 16,
-                                2, 0, 6, 0,
-                                10, 0, 14, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2, SafeLoad(in + 10),
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 7));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 14;
-
-  return in;
-}
-
-inline static const uint32_t* unpack15_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(1, 0, 3, 0,
-                                5, 0, 7, 0,
-                                9, 0, 11, 0,
-                                13, 0, 15, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 1), SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2,
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(17, 2, 0, 4,
-                                0, 6, 0, 8,
-                                0, 10, 0, 12,
-                                0, 14, 0, 16);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 15;
-
-  return in;
-}
-
-inline static const uint32_t* unpack16_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7), SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 4),
-                              SafeLoad(in + 3), SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 2),
-                              SafeLoad(in + 1), SafeLoad(in + 1),
-                              SafeLoad(in + 0), SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0,
-                                16, 0, 16, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 15), SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 14),
-                              SafeLoad(in + 13), SafeLoad(in + 13),
-                              SafeLoad(in + 12), SafeLoad(in + 12),
-                              SafeLoad(in + 11), SafeLoad(in + 11),
-                              SafeLoad(in + 10), SafeLoad(in + 10),
-                              SafeLoad(in + 9), SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 8));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 16;
-
-  return in;
-}
-
-inline static const uint32_t* unpack17_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 14, 0, 12,
-                                0, 10, 0, 8,
-                                0, 6, 0, 4,
-                                0, 2, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 7) >> 31 | SafeLoad(in + 8) << 1, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 29 | SafeLoad(in + 7) << 3, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 27 | SafeLoad(in + 6) << 5, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 25 | SafeLoad(in + 5) << 7, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 23 | SafeLoad(in + 4) << 9, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 21 | SafeLoad(in + 3) << 11, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 19 | SafeLoad(in + 2) << 13, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 17 | SafeLoad(in + 1) << 15, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(15, 0, 13, 0,
-                                11, 0, 9, 0,
-                                7, 0, 5, 0,
-                                3, 0, 1, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 16), SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8,
-                              SafeLoad(in + 12), SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14,
-                              SafeLoad(in + 9), SafeLoad(in + 8) >> 16 | SafeLoad(in + 9) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 17;
-
-  return in;
-}
-
-inline static const uint32_t* unpack18_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0,
-                                0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 8), SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16,
-                              SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 18 | SafeLoad(in + 1) << 14, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(14, 0, 10, 0,
-                                6, 0, 2, 0,
-                                0, 12, 0, 8,
-                                0, 4, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 17), SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16,
-                              SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2, SafeLoad(in + 12),
-                              SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 18 | SafeLoad(in + 10) << 14, SafeLoad(in + 9));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 18;
-
-  return in;
-}
-
-inline static const uint32_t* unpack19_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 10, 0, 4,
-                                0, 0, 11, 0,
-                                5, 0, 0, 12,
-                                0, 6, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 8) >> 29 | SafeLoad(in + 9) << 3, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 23 | SafeLoad(in + 8) << 9, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 17 | SafeLoad(in + 7) << 15, SafeLoad(in + 5) >> 30 | SafeLoad(in + 6) << 2,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 24 | SafeLoad(in + 5) << 8,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 18 | SafeLoad(in + 4) << 14,
-                              SafeLoad(in + 2) >> 31 | SafeLoad(in + 3) << 1, SafeLoad(in + 2),
-                              SafeLoad(in + 1) >> 25 | SafeLoad(in + 2) << 7, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 19 | SafeLoad(in + 1) << 13, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(13, 0, 7, 0,
-                                1, 0, 0, 8,
-                                0, 2, 0, 0,
-                                9, 0, 3, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 18), SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 22 | SafeLoad(in + 11) << 10,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 16 | SafeLoad(in + 10) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 19;
-
-  return in;
-}
-
-inline static const uint32_t* unpack20_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0,
-                                12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 9), SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4, SafeLoad(in + 6),
-                              SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12, SafeLoad(in + 5),
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 16 | SafeLoad(in + 3) << 16,
-                              SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 20 | SafeLoad(in + 1) << 12, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(12, 0, 4, 0,
-                                0, 8, 0, 0,
-                                12, 0, 4, 0,
-                                0, 8, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 19), SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 18), SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4, SafeLoad(in + 16),
-                              SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12, SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16,
-                              SafeLoad(in + 11) >> 28 | SafeLoad(in + 12) << 4, SafeLoad(in + 11),
-                              SafeLoad(in + 10) >> 20 | SafeLoad(in + 11) << 12, SafeLoad(in + 10));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 20;
-
-  return in;
-}
-
-inline static const uint32_t* unpack21_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 6, 0, 0,
-                                7, 0, 0, 8,
-                                0, 0, 9, 0,
-                                0, 10, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 9) >> 27 | SafeLoad(in + 10) << 5, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 17 | SafeLoad(in + 9) << 15, SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14,
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 19 | SafeLoad(in + 5) << 13, SafeLoad(in + 3) >> 30 | SafeLoad(in + 4) << 2,
-                              SafeLoad(in + 3), SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12,
-                              SafeLoad(in + 1) >> 31 | SafeLoad(in + 2) << 1, SafeLoad(in + 1),
-                              SafeLoad(in + 0) >> 21 | SafeLoad(in + 1) << 11, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(11, 0, 1, 0,
-                                0, 2, 0, 0,
-                                3, 0, 0, 4,
-                                0, 0, 5, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 20), SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18,
-                              SafeLoad(in + 13) >> 25 | SafeLoad(in + 14) << 7, SafeLoad(in + 13),
-                              SafeLoad(in + 12) >> 15 | SafeLoad(in + 13) << 17, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 21;
-
-  return in;
-}
-
-inline static const uint32_t* unpack22_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0,
-                                0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 10), SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 18 | SafeLoad(in + 8) << 14, SafeLoad(in + 6) >> 28 | SafeLoad(in + 7) << 4,
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 16 | SafeLoad(in + 6) << 16,
-                              SafeLoad(in + 4) >> 26 | SafeLoad(in + 5) << 6, SafeLoad(in + 4),
-                              SafeLoad(in + 3) >> 14 | SafeLoad(in + 4) << 18, SafeLoad(in + 2) >> 24 | SafeLoad(in + 3) << 8,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 12 | SafeLoad(in + 2) << 20,
-                              SafeLoad(in + 0) >> 22 | SafeLoad(in + 1) << 10, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(10, 0, 0, 8,
-                                0, 0, 6, 0,
-                                0, 4, 0, 0,
-                                2, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 21), SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2, SafeLoad(in + 19),
-                              SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16,
-                              SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6, SafeLoad(in + 15),
-                              SafeLoad(in + 14) >> 14 | SafeLoad(in + 15) << 18, SafeLoad(in + 13) >> 24 | SafeLoad(in + 14) << 8,
-                              SafeLoad(in + 13), SafeLoad(in + 12) >> 12 | SafeLoad(in + 13) << 20,
-                              SafeLoad(in + 11) >> 22 | SafeLoad(in + 12) << 10, SafeLoad(in + 11));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 22;
-
-  return in;
-}
-
-inline static const uint32_t* unpack23_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 2, 0, 0,
-                                0, 6, 0, 0,
-                                1, 0, 0, 0,
-                                5, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 10) >> 25 | SafeLoad(in + 11) << 7, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 11 | SafeLoad(in + 10) << 21, SafeLoad(in + 8) >> 20 | SafeLoad(in + 9) << 12,
-                              SafeLoad(in + 7) >> 29 | SafeLoad(in + 8) << 3, SafeLoad(in + 7),
-                              SafeLoad(in + 6) >> 15 | SafeLoad(in + 7) << 17, SafeLoad(in + 5) >> 24 | SafeLoad(in + 6) << 8,
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 10 | SafeLoad(in + 5) << 22,
-                              SafeLoad(in + 3) >> 19 | SafeLoad(in + 4) << 13, SafeLoad(in + 2) >> 28 | SafeLoad(in + 3) << 4,
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 14 | SafeLoad(in + 2) << 18,
-                              SafeLoad(in + 0) >> 23 | SafeLoad(in + 1) << 9, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(9, 0, 0, 4,
-                                0, 0, 0, 8,
-                                0, 0, 3, 0,
-                                0, 0, 7, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 22), SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14,
-                              SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 20),
-                              SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10,
-                              SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 17 | SafeLoad(in + 17) << 15, SafeLoad(in + 15) >> 26 | SafeLoad(in + 16) << 6,
-                              SafeLoad(in + 15), SafeLoad(in + 14) >> 12 | SafeLoad(in + 15) << 20,
-                              SafeLoad(in + 13) >> 21 | SafeLoad(in + 14) << 11, SafeLoad(in + 12) >> 30 | SafeLoad(in + 13) << 2,
-                              SafeLoad(in + 12), SafeLoad(in + 11) >> 16 | SafeLoad(in + 12) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 23;
-
-  return in;
-}
-
-inline static const uint32_t* unpack24_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 11), SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8, SafeLoad(in + 9),
-                              SafeLoad(in + 8), SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8, SafeLoad(in + 6),
-                              SafeLoad(in + 5), SafeLoad(in + 4) >> 16 | SafeLoad(in + 5) << 16,
-                              SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8, SafeLoad(in + 3),
-                              SafeLoad(in + 2), SafeLoad(in + 1) >> 16 | SafeLoad(in + 2) << 16,
-                              SafeLoad(in + 0) >> 24 | SafeLoad(in + 1) << 8, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0,
-                                8, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 23), SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16,
-                              SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8, SafeLoad(in + 21),
-                              SafeLoad(in + 20), SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16,
-                              SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8, SafeLoad(in + 18),
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 16 | SafeLoad(in + 17) << 16,
-                              SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8, SafeLoad(in + 15),
-                              SafeLoad(in + 14), SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16,
-                              SafeLoad(in + 12) >> 24 | SafeLoad(in + 13) << 8, SafeLoad(in + 12));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 24;
-
-  return in;
-}
-
-inline static const uint32_t* unpack25_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1ffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 5, 0,
-                                0, 0, 1, 0,
-                                0, 0, 0, 4,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 11) >> 23 | SafeLoad(in + 12) << 9, SafeLoad(in + 10) >> 30 | SafeLoad(in + 11) << 2,
-                              SafeLoad(in + 10), SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 19 | SafeLoad(in + 9) << 13, SafeLoad(in + 7) >> 26 | SafeLoad(in + 8) << 6,
-                              SafeLoad(in + 7), SafeLoad(in + 6) >> 8 | SafeLoad(in + 7) << 24,
-                              SafeLoad(in + 5) >> 15 | SafeLoad(in + 6) << 17, SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10,
-                              SafeLoad(in + 3) >> 29 | SafeLoad(in + 4) << 3, SafeLoad(in + 3),
-                              SafeLoad(in + 2) >> 11 | SafeLoad(in + 3) << 21, SafeLoad(in + 1) >> 18 | SafeLoad(in + 2) << 14,
-                              SafeLoad(in + 0) >> 25 | SafeLoad(in + 1) << 7, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(7, 0, 0, 0,
-                                3, 0, 0, 0,
-                                0, 6, 0, 0,
-                                0, 2, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 24), SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22,
-                              SafeLoad(in + 19) >> 17 | SafeLoad(in + 20) << 15, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 17) >> 31 | SafeLoad(in + 18) << 1, SafeLoad(in + 17),
-                              SafeLoad(in + 16) >> 13 | SafeLoad(in + 17) << 19, SafeLoad(in + 15) >> 20 | SafeLoad(in + 16) << 12,
-                              SafeLoad(in + 14) >> 27 | SafeLoad(in + 15) << 5, SafeLoad(in + 14),
-                              SafeLoad(in + 13) >> 9 | SafeLoad(in + 14) << 23, SafeLoad(in + 12) >> 16 | SafeLoad(in + 13) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 25;
-
-  return in;
-}
-
-inline static const uint32_t* unpack26_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3ffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0,
-                                0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 12), SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20,
-                              SafeLoad(in + 10) >> 18 | SafeLoad(in + 11) << 14, SafeLoad(in + 9) >> 24 | SafeLoad(in + 10) << 8,
-                              SafeLoad(in + 8) >> 30 | SafeLoad(in + 9) << 2, SafeLoad(in + 8),
-                              SafeLoad(in + 7) >> 10 | SafeLoad(in + 8) << 22, SafeLoad(in + 6) >> 16 | SafeLoad(in + 7) << 16,
-                              SafeLoad(in + 5) >> 22 | SafeLoad(in + 6) << 10, SafeLoad(in + 4) >> 28 | SafeLoad(in + 5) << 4,
-                              SafeLoad(in + 4), SafeLoad(in + 3) >> 8 | SafeLoad(in + 4) << 24,
-                              SafeLoad(in + 2) >> 14 | SafeLoad(in + 3) << 18, SafeLoad(in + 1) >> 20 | SafeLoad(in + 2) << 12,
-                              SafeLoad(in + 0) >> 26 | SafeLoad(in + 1) << 6, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(6, 0, 0, 0,
-                                0, 4, 0, 0,
-                                0, 0, 2, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 25), SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2, SafeLoad(in + 21),
-                              SafeLoad(in + 20) >> 10 | SafeLoad(in + 21) << 22, SafeLoad(in + 19) >> 16 | SafeLoad(in + 20) << 16,
-                              SafeLoad(in + 18) >> 22 | SafeLoad(in + 19) << 10, SafeLoad(in + 17) >> 28 | SafeLoad(in + 18) << 4,
-                              SafeLoad(in + 17), SafeLoad(in + 16) >> 8 | SafeLoad(in + 17) << 24,
-                              SafeLoad(in + 15) >> 14 | SafeLoad(in + 16) << 18, SafeLoad(in + 14) >> 20 | SafeLoad(in + 15) << 12,
-                              SafeLoad(in + 13) >> 26 | SafeLoad(in + 14) << 6, SafeLoad(in + 13));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 26;
-
-  return in;
-}
-
-inline static const uint32_t* unpack27_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7ffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 0, 4,
-                                0, 0, 0, 0,
-                                0, 2, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 12) >> 21 | SafeLoad(in + 13) << 11, SafeLoad(in + 11) >> 26 | SafeLoad(in + 12) << 6,
-                              SafeLoad(in + 10) >> 31 | SafeLoad(in + 11) << 1, SafeLoad(in + 10),
-                              SafeLoad(in + 9) >> 9 | SafeLoad(in + 10) << 23, SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18,
-                              SafeLoad(in + 7) >> 19 | SafeLoad(in + 8) << 13, SafeLoad(in + 6) >> 24 | SafeLoad(in + 7) << 8,
-                              SafeLoad(in + 5) >> 29 | SafeLoad(in + 6) << 3, SafeLoad(in + 5),
-                              SafeLoad(in + 4) >> 7 | SafeLoad(in + 5) << 25, SafeLoad(in + 3) >> 12 | SafeLoad(in + 4) << 20,
-                              SafeLoad(in + 2) >> 17 | SafeLoad(in + 3) << 15, SafeLoad(in + 1) >> 22 | SafeLoad(in + 2) << 10,
-                              SafeLoad(in + 0) >> 27 | SafeLoad(in + 1) << 5, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(5, 0, 0, 0,
-                                0, 0, 3, 0,
-                                0, 0, 0, 0,
-                                1, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 26), SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12,
-                              SafeLoad(in + 22) >> 25 | SafeLoad(in + 23) << 7, SafeLoad(in + 21) >> 30 | SafeLoad(in + 22) << 2,
-                              SafeLoad(in + 21), SafeLoad(in + 20) >> 8 | SafeLoad(in + 21) << 24,
-                              SafeLoad(in + 19) >> 13 | SafeLoad(in + 20) << 19, SafeLoad(in + 18) >> 18 | SafeLoad(in + 19) << 14,
-                              SafeLoad(in + 17) >> 23 | SafeLoad(in + 18) << 9, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 16), SafeLoad(in + 15) >> 6 | SafeLoad(in + 16) << 26,
-                              SafeLoad(in + 14) >> 11 | SafeLoad(in + 15) << 21, SafeLoad(in + 13) >> 16 | SafeLoad(in + 14) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 27;
-
-  return in;
-}
-
-inline static const uint32_t* unpack28_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0xfffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0,
-                                4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 13), SafeLoad(in + 12) >> 8 | SafeLoad(in + 13) << 24,
-                              SafeLoad(in + 11) >> 12 | SafeLoad(in + 12) << 20, SafeLoad(in + 10) >> 16 | SafeLoad(in + 11) << 16,
-                              SafeLoad(in + 9) >> 20 | SafeLoad(in + 10) << 12, SafeLoad(in + 8) >> 24 | SafeLoad(in + 9) << 8,
-                              SafeLoad(in + 7) >> 28 | SafeLoad(in + 8) << 4, SafeLoad(in + 7),
-                              SafeLoad(in + 6), SafeLoad(in + 5) >> 8 | SafeLoad(in + 6) << 24,
-                              SafeLoad(in + 4) >> 12 | SafeLoad(in + 5) << 20, SafeLoad(in + 3) >> 16 | SafeLoad(in + 4) << 16,
-                              SafeLoad(in + 2) >> 20 | SafeLoad(in + 3) << 12, SafeLoad(in + 1) >> 24 | SafeLoad(in + 2) << 8,
-                              SafeLoad(in + 0) >> 28 | SafeLoad(in + 1) << 4, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(4, 0, 0, 0,
-                                0, 0, 0, 0,
-                                4, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 27), SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20, SafeLoad(in + 24) >> 16 | SafeLoad(in + 25) << 16,
-                              SafeLoad(in + 23) >> 20 | SafeLoad(in + 24) << 12, SafeLoad(in + 22) >> 24 | SafeLoad(in + 23) << 8,
-                              SafeLoad(in + 21) >> 28 | SafeLoad(in + 22) << 4, SafeLoad(in + 21),
-                              SafeLoad(in + 20), SafeLoad(in + 19) >> 8 | SafeLoad(in + 20) << 24,
-                              SafeLoad(in + 18) >> 12 | SafeLoad(in + 19) << 20, SafeLoad(in + 17) >> 16 | SafeLoad(in + 18) << 16,
-                              SafeLoad(in + 16) >> 20 | SafeLoad(in + 17) << 12, SafeLoad(in + 15) >> 24 | SafeLoad(in + 16) << 8,
-                              SafeLoad(in + 14) >> 28 | SafeLoad(in + 15) << 4, SafeLoad(in + 14));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 28;
-
-  return in;
-}
-
-inline static const uint32_t* unpack29_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x1fffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 0, 0,
-                                0, 2, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 13) >> 19 | SafeLoad(in + 14) << 13, SafeLoad(in + 12) >> 22 | SafeLoad(in + 13) << 10,
-                              SafeLoad(in + 11) >> 25 | SafeLoad(in + 12) << 7, SafeLoad(in + 10) >> 28 | SafeLoad(in + 11) << 4,
-                              SafeLoad(in + 9) >> 31 | SafeLoad(in + 10) << 1, SafeLoad(in + 9),
-                              SafeLoad(in + 8) >> 5 | SafeLoad(in + 9) << 27, SafeLoad(in + 7) >> 8 | SafeLoad(in + 8) << 24,
-                              SafeLoad(in + 6) >> 11 | SafeLoad(in + 7) << 21, SafeLoad(in + 5) >> 14 | SafeLoad(in + 6) << 18,
-                              SafeLoad(in + 4) >> 17 | SafeLoad(in + 5) << 15, SafeLoad(in + 3) >> 20 | SafeLoad(in + 4) << 12,
-                              SafeLoad(in + 2) >> 23 | SafeLoad(in + 3) << 9, SafeLoad(in + 1) >> 26 | SafeLoad(in + 2) << 6,
-                              SafeLoad(in + 0) >> 29 | SafeLoad(in + 1) << 3, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(3, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 1, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 28), SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26,
-                              SafeLoad(in + 26) >> 9 | SafeLoad(in + 27) << 23, SafeLoad(in + 25) >> 12 | SafeLoad(in + 26) << 20,
-                              SafeLoad(in + 24) >> 15 | SafeLoad(in + 25) << 17, SafeLoad(in + 23) >> 18 | SafeLoad(in + 24) << 14,
-                              SafeLoad(in + 22) >> 21 | SafeLoad(in + 23) << 11, SafeLoad(in + 21) >> 24 | SafeLoad(in + 22) << 8,
-                              SafeLoad(in + 20) >> 27 | SafeLoad(in + 21) << 5, SafeLoad(in + 19) >> 30 | SafeLoad(in + 20) << 2,
-                              SafeLoad(in + 19), SafeLoad(in + 18) >> 4 | SafeLoad(in + 19) << 28,
-                              SafeLoad(in + 17) >> 7 | SafeLoad(in + 18) << 25, SafeLoad(in + 16) >> 10 | SafeLoad(in + 17) << 22,
-                              SafeLoad(in + 15) >> 13 | SafeLoad(in + 16) << 19, SafeLoad(in + 14) >> 16 | SafeLoad(in + 15) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 29;
-
-  return in;
-}
-
-inline static const uint32_t* unpack30_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x3fffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 14), SafeLoad(in + 13) >> 4 | SafeLoad(in + 14) << 28,
-                              SafeLoad(in + 12) >> 6 | SafeLoad(in + 13) << 26, SafeLoad(in + 11) >> 8 | SafeLoad(in + 12) << 24,
-                              SafeLoad(in + 10) >> 10 | SafeLoad(in + 11) << 22, SafeLoad(in + 9) >> 12 | SafeLoad(in + 10) << 20,
-                              SafeLoad(in + 8) >> 14 | SafeLoad(in + 9) << 18, SafeLoad(in + 7) >> 16 | SafeLoad(in + 8) << 16,
-                              SafeLoad(in + 6) >> 18 | SafeLoad(in + 7) << 14, SafeLoad(in + 5) >> 20 | SafeLoad(in + 6) << 12,
-                              SafeLoad(in + 4) >> 22 | SafeLoad(in + 5) << 10, SafeLoad(in + 3) >> 24 | SafeLoad(in + 4) << 8,
-                              SafeLoad(in + 2) >> 26 | SafeLoad(in + 3) << 6, SafeLoad(in + 1) >> 28 | SafeLoad(in + 2) << 4,
-                              SafeLoad(in + 0) >> 30 | SafeLoad(in + 1) << 2, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(2, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 29), SafeLoad(in + 28) >> 4 | SafeLoad(in + 29) << 28,
-                              SafeLoad(in + 27) >> 6 | SafeLoad(in + 28) << 26, SafeLoad(in + 26) >> 8 | SafeLoad(in + 27) << 24,
-                              SafeLoad(in + 25) >> 10 | SafeLoad(in + 26) << 22, SafeLoad(in + 24) >> 12 | SafeLoad(in + 25) << 20,
-                              SafeLoad(in + 23) >> 14 | SafeLoad(in + 24) << 18, SafeLoad(in + 22) >> 16 | SafeLoad(in + 23) << 16,
-                              SafeLoad(in + 21) >> 18 | SafeLoad(in + 22) << 14, SafeLoad(in + 20) >> 20 | SafeLoad(in + 21) << 12,
-                              SafeLoad(in + 19) >> 22 | SafeLoad(in + 20) << 10, SafeLoad(in + 18) >> 24 | SafeLoad(in + 19) << 8,
-                              SafeLoad(in + 17) >> 26 | SafeLoad(in + 18) << 6, SafeLoad(in + 16) >> 28 | SafeLoad(in + 17) << 4,
-                              SafeLoad(in + 15) >> 30 | SafeLoad(in + 16) << 2, SafeLoad(in + 15));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 30;
-
-  return in;
-}
-
-inline static const uint32_t* unpack31_32_avx512(const uint32_t* in, uint32_t* out) {
-  using ::arrow::util::SafeLoad;
-  uint32_t mask = 0x7fffffff;
-  __m512i reg_shifts, reg_inls, reg_masks;
-  __m512i results;
-
-  reg_masks = _mm512_set1_epi32(mask);
-
-  // shift the first 16 outs
-  reg_shifts = _mm512_set_epi32(0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 14) >> 17 | SafeLoad(in + 15) << 15, SafeLoad(in + 13) >> 18 | SafeLoad(in + 14) << 14,
-                              SafeLoad(in + 12) >> 19 | SafeLoad(in + 13) << 13, SafeLoad(in + 11) >> 20 | SafeLoad(in + 12) << 12,
-                              SafeLoad(in + 10) >> 21 | SafeLoad(in + 11) << 11, SafeLoad(in + 9) >> 22 | SafeLoad(in + 10) << 10,
-                              SafeLoad(in + 8) >> 23 | SafeLoad(in + 9) << 9, SafeLoad(in + 7) >> 24 | SafeLoad(in + 8) << 8,
-                              SafeLoad(in + 6) >> 25 | SafeLoad(in + 7) << 7, SafeLoad(in + 5) >> 26 | SafeLoad(in + 6) << 6,
-                              SafeLoad(in + 4) >> 27 | SafeLoad(in + 5) << 5, SafeLoad(in + 3) >> 28 | SafeLoad(in + 4) << 4,
-                              SafeLoad(in + 2) >> 29 | SafeLoad(in + 3) << 3, SafeLoad(in + 1) >> 30 | SafeLoad(in + 2) << 2,
-                              SafeLoad(in + 0) >> 31 | SafeLoad(in + 1) << 1, SafeLoad(in + 0));
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  // shift the second 16 outs
-  reg_shifts = _mm512_set_epi32(1, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0);
-  reg_inls = _mm512_set_epi32(SafeLoad(in + 30), SafeLoad(in + 29) >> 2 | SafeLoad(in + 30) << 30,
-                              SafeLoad(in + 28) >> 3 | SafeLoad(in + 29) << 29, SafeLoad(in + 27) >> 4 | SafeLoad(in + 28) << 28,
-                              SafeLoad(in + 26) >> 5 | SafeLoad(in + 27) << 27, SafeLoad(in + 25) >> 6 | SafeLoad(in + 26) << 26,
-                              SafeLoad(in + 24) >> 7 | SafeLoad(in + 25) << 25, SafeLoad(in + 23) >> 8 | SafeLoad(in + 24) << 24,
-                              SafeLoad(in + 22) >> 9 | SafeLoad(in + 23) << 23, SafeLoad(in + 21) >> 10 | SafeLoad(in + 22) << 22,
-                              SafeLoad(in + 20) >> 11 | SafeLoad(in + 21) << 21, SafeLoad(in + 19) >> 12 | SafeLoad(in + 20) << 20,
-                              SafeLoad(in + 18) >> 13 | SafeLoad(in + 19) << 19, SafeLoad(in + 17) >> 14 | SafeLoad(in + 18) << 18,
-                              SafeLoad(in + 16) >> 15 | SafeLoad(in + 17) << 17, SafeLoad(in + 15) >> 16 | SafeLoad(in + 16) << 16);
-  results = _mm512_and_epi32(_mm512_srlv_epi32(reg_inls, reg_shifts), reg_masks);
-  _mm512_storeu_si512(out, results);
-  out += 16;
-
-  in += 31;
-
-  return in;
-}
-
-inline static const uint32_t* unpack32_32_avx512(const uint32_t* in, uint32_t* out) {
-  memcpy(out, in, 32 * sizeof(*out));
-  in += 32;
-  out += 32;
-
-  return in;
-}
-
-}  // namespace internal
-}  // namespace arrow

From 7eea2f53a1002552bbb87db5611e75c15b88b504 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 8 Jul 2021 00:46:42 -0700
Subject: [PATCH 529/719] MINOR: [JS] Revert rewrite since closure compiler is
 fixed now (#10680)

https://github.com/google/closure-compiler/issues/3810#issuecomment-875715849
---
 js/src/util/math.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/js/src/util/math.ts b/js/src/util/math.ts
index c61f31c25dc..47678e1a961 100644
--- a/js/src/util/math.ts
+++ b/js/src/util/math.ts
@@ -28,8 +28,7 @@ const u32 = new Uint32Array(f64.buffer);
 export function uint16ToFloat64(h: number) {
     const expo = (h & 0x7C00) >> 10;
     const sigf = (h & 0x03FF) / 1024;
-    // use Math.pow to prevent closure compiler from creating incorrect js: https://github.com/google/closure-compiler/issues/3810
-    const sign = Math.pow(-1, (h & 0x8000) >> 15);
+    const sign = (-1) ** ((h & 0x8000) >> 15);
     switch (expo) {
         case 0x1F: return sign * (sigf ? NaN : 1 / 0);
         case 0x00: return sign * (sigf ? 6.103515625e-5 * sigf : 0);

From 0219e9a198b201df852b4219816752b36f116825 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 8 Jul 2021 10:54:06 -0400
Subject: [PATCH 530/719] ARROW-13171: [R] Add binding for str_pad()

Closes #10638 from thisisnic/ARROW-13171_str_pad

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/R/dplyr-functions.R                         | 21 ++++++++++
 r/R/expression.R                              |  1 +
 r/src/compute.cpp                             |  8 ++++
 .../testthat/test-dplyr-string-functions.R    | 41 +++++++++++++++++++
 4 files changed, 71 insertions(+)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 055cff5472b..5ddd6968972 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -414,6 +414,27 @@ nse_funcs$pmax <- function(..., na.rm = FALSE) {
   )
 }
 
+nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") {
+  
+  assert_that(is_integerish(width))
+  side <- match.arg(side)
+  assert_that(is.string(pad))
+  
+  if (side == "left") {
+    pad_func = "utf8_lpad"
+  } else if (side == "right") {
+    pad_func = "utf8_rpad"
+  } else if (side == "both") {
+    pad_func = "utf8_center"
+  }
+  
+  Expression$create(
+    pad_func,
+    string,
+    options = list(width = width, padding = pad)
+  )
+}
+
 # String function helpers
 
 # format `pattern` as needed for case insensitivity and literal matching by RE2
diff --git a/r/R/expression.R b/r/R/expression.R
index de140832374..9b4b79e458a 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -30,6 +30,7 @@
   "str_length" = "utf8_length",
   "str_to_lower" = "utf8_lower",
   "str_to_upper" = "utf8_upper",
+  # str_pad is defined in dplyr-functions.R
   "str_reverse" = "utf8_reverse",
   # str_trim is defined in dplyr-functions.R
   "year" = "year",
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 9be1cc3a83e..cfa895ecb1e 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -295,6 +295,14 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
                                      max_splits, reverse);
   }
 
+  if (func_name == "utf8_lpad" || func_name == "utf8_rpad" ||
+      func_name == "utf8_center" || func_name == "ascii_lpad" ||
+      func_name == "ascii_rpad" || func_name == "ascii_center") {
+    using Options = arrow::compute::PadOptions;
+    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["width"]),
+                                     cpp11::as_cpp<std::string>(options["padding"]));
+  }
+
   if (func_name == "utf8_split_whitespace" || func_name == "ascii_split_whitespace") {
     using Options = arrow::compute::SplitOptions;
     int64_t max_splits = -1;
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index ecbe2f00f2d..438f1038e57 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -866,3 +866,44 @@ test_that("str_like", {
     df
   )
 })
+
+test_that("str_pad", {
+  
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 31)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 30, side = "right")) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(x = str_pad(x, width = 31, side = "both")) %>%
+      collect(),
+    df
+  )
+  
+})

From d6f322646451555b141d07d63d89534c36d81a32 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 9 Jul 2021 09:05:12 +0900
Subject: [PATCH 531/719] ARROW-13291: [GLib][CI] Require gobject-introspection
 3.4.5 or later

It's needed for Flight tests.

Closes #10687 from kou/glib-ci-gi-version

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/Gemfile                    | 2 +-
 c_glib/test/flight/test-client.rb | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/c_glib/Gemfile b/c_glib/Gemfile
index 4b570902bcd..bd91b629c9e 100644
--- a/c_glib/Gemfile
+++ b/c_glib/Gemfile
@@ -20,4 +20,4 @@
 source "https://rubygems.org/"
 
 gem "test-unit"
-gem "gobject-introspection"
+gem "gobject-introspection", ">= 3.4.5"
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
index 79960e20dbd..f3fca0116de 100644
--- a/c_glib/test/flight/test-client.rb
+++ b/c_glib/test/flight/test-client.rb
@@ -16,10 +16,13 @@
 # under the License.
 
 class TestFlightClient < Test::Unit::TestCase
+  include Helper::Omittable
+
   def setup
     @server = nil
     omit("Arrow Flight is required") unless defined?(ArrowFlight)
     omit("Unstable on Windows") if Gem.win_platform?
+    require_gi_bindings(3, 4, 5)
     @server = Helper::FlightServer.new
     host = "127.0.0.1"
     location = ArrowFlight::Location.new("grpc://#{host}:0")

From 9fead42f50e381d5f9b2cb133bd4693064bdb7f8 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 8 Jul 2021 21:18:20 -0500
Subject: [PATCH 532/719] ARROW-13265: [R] cli valgrind errors in nightlies

Closes #10676 from jonkeane/ARROW-13265-cli-valgrind

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/scripts/r_valgrind.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh
index 1db526eaf86..68c8dd57093 100755
--- a/ci/scripts/r_valgrind.sh
+++ b/ci/scripts/r_valgrind.sh
@@ -28,6 +28,12 @@ ${R_BIN} CMD INSTALL ${source_dir}
 pushd ${source_dir}/tests
 
 export TEST_R_WITH_ARROW=TRUE
+
+# Set cli to not use a separate thread. This thread isn't explicitly closed,
+# which triggers a valgrind possibly lost error. We can remove this when
+# https://github.com/r-lib/cli/issues/311 is resolved + released on cran.
+export CLI_NO_THREAD=1
+
 # to generate suppression files run:
 # ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
 ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out

From 140f6087b526991248a6e05bdcf16996fbc4421f Mon Sep 17 00:00:00 2001
From: Alex Baden <alex.baden@gmail.com>
Date: Fri, 9 Jul 2021 02:25:04 +0000
Subject: [PATCH 533/719] ARROW-13290: [C++] Add missing include

Noticed this issue compiling w/ clang-12 and gcc-11 on Arch linux (both using the PKGBUILD and building from source).

Closes #10685 from alexbaden/ARROW-13290

Authored-by: Alex Baden <alex.baden@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/util/tdigest.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/arrow/util/tdigest.cc b/cpp/src/arrow/util/tdigest.cc
index b23bca397ec..99b771ca0f2 100644
--- a/cpp/src/arrow/util/tdigest.cc
+++ b/cpp/src/arrow/util/tdigest.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cmath>
 #include <iostream>
+#include <limits>
 #include <queue>
 #include <tuple>
 #include <vector>

From 557a7c63d49aa04508564517c77c71f3657d19ff Mon Sep 17 00:00:00 2001
From: Michal Nowakiewicz <michal@ursacomputing.com>
Date: Fri, 9 Jul 2021 11:37:32 -0400
Subject: [PATCH 534/719] ARROW-13169: [C++][Compute] Fix array offset support
 in GrouperFastImpl

GrouperFastImpl was ignoring offset values in input key arrays, treating them as if it was always zero.
This change brings support for arbitrary offsets in input.

Closes #10688 from michalursa/ARROW-13169-array-offset-in-grouper

Lead-authored-by: Michal Nowakiewicz <michal@ursacomputing.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Co-authored-by: michalursa <michal@ursacomputing.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/array/data.cc                   |  22 +-
 cpp/src/arrow/array/data.h                    |  30 +-
 cpp/src/arrow/compute/exec/key_encode.cc      | 292 ++++++++++--------
 cpp/src/arrow/compute/exec/key_encode.h       |  14 +-
 cpp/src/arrow/compute/exec/util.cc            | 118 ++++---
 cpp/src/arrow/compute/exec/util.h             |  16 +-
 .../arrow/compute/kernels/hash_aggregate.cc   |   9 +-
 .../compute/kernels/hash_aggregate_test.cc    |  42 +++
 8 files changed, 329 insertions(+), 214 deletions(-)

diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index e397a752cd8..5a214473972 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -56,41 +56,39 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length,
   }
 }
 
-std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& type,
-                                           int64_t length,
+std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
                                            std::vector<std::shared_ptr<Buffer>> buffers,
                                            int64_t null_count, int64_t offset) {
   AdjustNonNullable(type->id(), length, &buffers, &null_count);
-  return std::make_shared<ArrayData>(type, length, std::move(buffers), null_count,
-                                     offset);
+  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
+                                     null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Make(
-    const std::shared_ptr<DataType>& type, int64_t length,
+    std::shared_ptr<DataType> type, int64_t length,
     std::vector<std::shared_ptr<Buffer>> buffers,
     std::vector<std::shared_ptr<ArrayData>> child_data, int64_t null_count,
     int64_t offset) {
   AdjustNonNullable(type->id(), length, &buffers, &null_count);
-  return std::make_shared<ArrayData>(type, length, std::move(buffers),
+  return std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
                                      std::move(child_data), null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Make(
-    const std::shared_ptr<DataType>& type, int64_t length,
+    std::shared_ptr<DataType> type, int64_t length,
     std::vector<std::shared_ptr<Buffer>> buffers,
     std::vector<std::shared_ptr<ArrayData>> child_data,
     std::shared_ptr<ArrayData> dictionary, int64_t null_count, int64_t offset) {
   AdjustNonNullable(type->id(), length, &buffers, &null_count);
-  auto data = std::make_shared<ArrayData>(type, length, std::move(buffers),
+  auto data = std::make_shared<ArrayData>(std::move(type), length, std::move(buffers),
                                           std::move(child_data), null_count, offset);
   data->dictionary = std::move(dictionary);
   return data;
 }
 
-std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& type,
-                                           int64_t length, int64_t null_count,
-                                           int64_t offset) {
-  return std::make_shared<ArrayData>(type, length, null_count, offset);
+std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64_t length,
+                                           int64_t null_count, int64_t offset) {
+  return std::make_shared<ArrayData>(std::move(type), length, null_count, offset);
 }
 
 std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index 02a49949e1f..418d09def6b 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -71,49 +71,47 @@ constexpr int64_t kUnknownNullCount = -1;
 /// input array and replace them with newly-allocated data, changing the output
 /// data type as well.
 struct ARROW_EXPORT ArrayData {
-  ArrayData() : length(0), null_count(0), offset(0) {}
+  ArrayData() = default;
 
-  ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : type(type), length(length), null_count(null_count), offset(offset) {}
+      : type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
 
-  ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             std::vector<std::shared_ptr<Buffer>> buffers,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : ArrayData(type, length, null_count, offset) {
+      : ArrayData(std::move(type), length, null_count, offset) {
     this->buffers = std::move(buffers);
   }
 
-  ArrayData(const std::shared_ptr<DataType>& type, int64_t length,
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
             std::vector<std::shared_ptr<Buffer>> buffers,
             std::vector<std::shared_ptr<ArrayData>> child_data,
             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : ArrayData(type, length, null_count, offset) {
+      : ArrayData(std::move(type), length, null_count, offset) {
     this->buffers = std::move(buffers);
     this->child_data = std::move(child_data);
   }
 
-  static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
-                                         int64_t length,
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
                                          std::vector<std::shared_ptr<Buffer>> buffers,
                                          int64_t null_count = kUnknownNullCount,
                                          int64_t offset = 0);
 
   static std::shared_ptr<ArrayData> Make(
-      const std::shared_ptr<DataType>& type, int64_t length,
+      std::shared_ptr<DataType> type, int64_t length,
       std::vector<std::shared_ptr<Buffer>> buffers,
       std::vector<std::shared_ptr<ArrayData>> child_data,
       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
   static std::shared_ptr<ArrayData> Make(
-      const std::shared_ptr<DataType>& type, int64_t length,
+      std::shared_ptr<DataType> type, int64_t length,
       std::vector<std::shared_ptr<Buffer>> buffers,
       std::vector<std::shared_ptr<ArrayData>> child_data,
       std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
       int64_t offset = 0);
 
-  static std::shared_ptr<ArrayData> Make(const std::shared_ptr<DataType>& type,
-                                         int64_t length,
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
                                          int64_t null_count = kUnknownNullCount,
                                          int64_t offset = 0);
 
@@ -232,11 +230,11 @@ struct ARROW_EXPORT ArrayData {
   }
 
   std::shared_ptr<DataType> type;
-  int64_t length;
-  mutable std::atomic<int64_t> null_count;
+  int64_t length = 0;
+  mutable std::atomic<int64_t> null_count{0};
   // The logical start point into the physical buffers (in values, not bytes).
   // Note that, for child data, this must be *added* to the child data's own offset.
-  int64_t offset;
+  int64_t offset = 0;
   std::vector<std::shared_ptr<Buffer>> buffers;
   std::vector<std::shared_ptr<ArrayData>> child_data;
 
diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc
index 0c5f27c51c1..de79558f2c2 100644
--- a/cpp/src/arrow/compute/exec/key_encode.cc
+++ b/cpp/src/arrow/compute/exec/key_encode.cc
@@ -35,7 +35,7 @@ Status KeyEncoder::KeyRowArray::Init(MemoryPool* pool, const KeyRowMetadata& met
   pool_ = pool;
   metadata_ = metadata;
 
-  ARROW_DCHECK(!null_masks_ && !offsets_ && !rows_);
+  DCHECK(!null_masks_ && !offsets_ && !rows_);
 
   constexpr int64_t rows_capacity = 8;
   constexpr int64_t bytes_capacity = 1024;
@@ -178,15 +178,14 @@ Status KeyEncoder::KeyRowArray::ResizeOptionalVaryingLengthBuffer(
 Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from,
                                                     uint32_t num_rows_to_append,
                                                     const uint16_t* source_row_ids) {
-  ARROW_DCHECK(metadata_.is_compatible(from.metadata()));
+  DCHECK(metadata_.is_compatible(from.metadata()));
 
   RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
 
   if (!metadata_.is_fixed_length) {
     // Varying-length rows
-    const uint32_t* from_offsets =
-        reinterpret_cast<const uint32_t*>(from.offsets_->data());
-    uint32_t* to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
+    auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
+    auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
     uint32_t total_length = to_offsets[num_rows_];
     uint32_t total_length_to_append = 0;
     for (uint32_t i = 0; i < num_rows_to_append; ++i) {
@@ -203,9 +202,8 @@ Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from,
     for (uint32_t i = 0; i < num_rows_to_append; ++i) {
       uint16_t row_id = source_row_ids[i];
       uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
-      const uint64_t* src64 =
-          reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]);
-      uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+      auto src64 = reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]);
+      auto dst64 = reinterpret_cast<uint64_t*>(dst);
       for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
         dst64[j] = src64[j];
       }
@@ -218,8 +216,8 @@ Status KeyEncoder::KeyRowArray::AppendSelectionFrom(const KeyRowArray& from,
     for (uint32_t i = 0; i < num_rows_to_append; ++i) {
       uint16_t row_id = source_row_ids[i];
       uint32_t length = metadata_.fixed_length;
-      const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src + length * row_id);
-      uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
+      auto src64 = reinterpret_cast<const uint64_t*>(src + length * row_id);
+      auto dst64 = reinterpret_cast<uint64_t*>(dst);
       for (uint32_t j = 0; j < (length + 7) / 8; ++j) {
         dst64[j] = src64[j];
       }
@@ -285,51 +283,67 @@ KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
   }
   buffers_[buffer_id_to_replace] = right.buffers_[buffer_id_to_replace];
   mutable_buffers_[buffer_id_to_replace] = right.mutable_buffers_[buffer_id_to_replace];
+  bit_offset_[0] = left.bit_offset_[0];
+  bit_offset_[1] = left.bit_offset_[1];
+  if (buffer_id_to_replace < max_buffers_ - 1) {
+    bit_offset_[buffer_id_to_replace] = right.bit_offset_[buffer_id_to_replace];
+  }
 }
 
 KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
                                            int64_t length, const uint8_t* buffer0,
-                                           const uint8_t* buffer1,
-                                           const uint8_t* buffer2) {
+                                           const uint8_t* buffer1, const uint8_t* buffer2,
+                                           int bit_offset0, int bit_offset1) {
   metadata_ = metadata;
   length_ = length;
   buffers_[0] = buffer0;
   buffers_[1] = buffer1;
   buffers_[2] = buffer2;
   mutable_buffers_[0] = mutable_buffers_[1] = mutable_buffers_[2] = nullptr;
+  bit_offset_[0] = bit_offset0;
+  bit_offset_[1] = bit_offset1;
 }
 
 KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnMetadata& metadata,
                                            int64_t length, uint8_t* buffer0,
-                                           uint8_t* buffer1, uint8_t* buffer2) {
+                                           uint8_t* buffer1, uint8_t* buffer2,
+                                           int bit_offset0, int bit_offset1) {
   metadata_ = metadata;
   length_ = length;
   buffers_[0] = mutable_buffers_[0] = buffer0;
   buffers_[1] = mutable_buffers_[1] = buffer1;
   buffers_[2] = mutable_buffers_[2] = buffer2;
+  bit_offset_[0] = bit_offset0;
+  bit_offset_[1] = bit_offset1;
 }
 
 KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnArray& from, int64_t start,
                                            int64_t length) {
-  ARROW_DCHECK((start % 8) == 0);
   metadata_ = from.metadata_;
   length_ = length;
   uint32_t fixed_size =
       !metadata_.is_fixed_length ? sizeof(uint32_t) : metadata_.fixed_length;
 
-  buffers_[0] = from.buffers_[0] ? from.buffers_[0] + start / 8 : nullptr;
-  mutable_buffers_[0] =
-      from.mutable_buffers_[0] ? from.mutable_buffers_[0] + start / 8 : nullptr;
+  buffers_[0] =
+      from.buffers_[0] ? from.buffers_[0] + (from.bit_offset_[0] + start) / 8 : nullptr;
+  mutable_buffers_[0] = from.mutable_buffers_[0]
+                            ? from.mutable_buffers_[0] + (from.bit_offset_[0] + start) / 8
+                            : nullptr;
+  bit_offset_[0] = (from.bit_offset_[0] + start) % 8;
 
   if (fixed_size == 0) {
-    buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start / 8 : nullptr;
-    mutable_buffers_[1] =
-        from.mutable_buffers_[1] ? from.mutable_buffers_[1] + start / 8 : nullptr;
+    buffers_[1] =
+        from.buffers_[1] ? from.buffers_[1] + (from.bit_offset_[1] + start) / 8 : nullptr;
+    mutable_buffers_[1] = from.mutable_buffers_[1] ? from.mutable_buffers_[1] +
+                                                         (from.bit_offset_[1] + start) / 8
+                                                   : nullptr;
+    bit_offset_[1] = (from.bit_offset_[1] + start) % 8;
   } else {
     buffers_[1] = from.buffers_[1] ? from.buffers_[1] + start * fixed_size : nullptr;
     mutable_buffers_[1] = from.mutable_buffers_[1]
                               ? from.mutable_buffers_[1] + start * fixed_size
                               : nullptr;
+    bit_offset_[1] = 0;
   }
 
   buffers_[2] = from.buffers_[2];
@@ -339,8 +353,8 @@ KeyEncoder::KeyColumnArray::KeyColumnArray(const KeyColumnArray& from, int64_t s
 KeyEncoder::KeyColumnArray KeyEncoder::TransformBoolean::ArrayReplace(
     const KeyColumnArray& column, const KeyColumnArray& temp) {
   // Make sure that the temp buffer is large enough
-  ARROW_DCHECK(temp.length() >= column.length() && temp.metadata().is_fixed_length &&
-               temp.metadata().fixed_length >= sizeof(uint8_t));
+  DCHECK(temp.length() >= column.length() && temp.metadata().is_fixed_length &&
+         temp.metadata().fixed_length >= sizeof(uint8_t));
   KeyColumnMetadata metadata;
   metadata.is_fixed_length = true;
   metadata.fixed_length = sizeof(uint8_t);
@@ -353,33 +367,31 @@ void KeyEncoder::TransformBoolean::PreEncode(const KeyColumnArray& input,
                                              KeyColumnArray* output,
                                              KeyEncoderContext* ctx) {
   // Make sure that metadata and lengths are compatible.
-  ARROW_DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
-  ARROW_DCHECK(output->metadata().fixed_length == 1 &&
-               input.metadata().fixed_length == 0);
-  ARROW_DCHECK(output->length() == input.length());
+  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  DCHECK(output->metadata().fixed_length == 1 && input.metadata().fixed_length == 0);
+  DCHECK(output->length() == input.length());
   constexpr int buffer_index = 1;
-  ARROW_DCHECK(input.data(buffer_index) != nullptr);
-  ARROW_DCHECK(output->mutable_data(buffer_index) != nullptr);
-  util::BitUtil::bits_to_bytes(ctx->hardware_flags, static_cast<int>(input.length()),
-                               input.data(buffer_index),
-                               output->mutable_data(buffer_index));
+  DCHECK(input.data(buffer_index) != nullptr);
+  DCHECK(output->mutable_data(buffer_index) != nullptr);
+  util::BitUtil::bits_to_bytes(
+      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index),
+      output->mutable_data(buffer_index), input.bit_offset(buffer_index));
 }
 
 void KeyEncoder::TransformBoolean::PostDecode(const KeyColumnArray& input,
                                               KeyColumnArray* output,
                                               KeyEncoderContext* ctx) {
   // Make sure that metadata and lengths are compatible.
-  ARROW_DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
-  ARROW_DCHECK(output->metadata().fixed_length == 0 &&
-               input.metadata().fixed_length == 1);
-  ARROW_DCHECK(output->length() == input.length());
+  DCHECK(output->metadata().is_fixed_length == input.metadata().is_fixed_length);
+  DCHECK(output->metadata().fixed_length == 0 && input.metadata().fixed_length == 1);
+  DCHECK(output->length() == input.length());
   constexpr int buffer_index = 1;
-  ARROW_DCHECK(input.data(buffer_index) != nullptr);
-  ARROW_DCHECK(output->mutable_data(buffer_index) != nullptr);
+  DCHECK(input.data(buffer_index) != nullptr);
+  DCHECK(output->mutable_data(buffer_index) != nullptr);
 
-  util::BitUtil::bytes_to_bits(ctx->hardware_flags, static_cast<int>(input.length()),
-                               input.data(buffer_index),
-                               output->mutable_data(buffer_index));
+  util::BitUtil::bytes_to_bits(
+      ctx->hardware_flags, static_cast<int>(input.length()), input.data(buffer_index),
+      output->mutable_data(buffer_index), output->bit_offset(buffer_index));
 }
 
 bool KeyEncoder::EncoderInteger::IsBoolean(const KeyColumnMetadata& metadata) {
@@ -425,12 +437,12 @@ void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray*
     col_prep = col;
   }
 
-  uint32_t num_rows = static_cast<uint32_t>(col.length());
+  const auto num_rows = static_cast<uint32_t>(col.length());
 
   // When we have a single fixed length column we can just do memcpy
   if (rows->metadata().is_fixed_length &&
       rows->metadata().fixed_length == col.metadata().fixed_length) {
-    ARROW_DCHECK(offset_within_row == 0);
+    DCHECK_EQ(offset_within_row, 0);
     uint32_t row_size = col.metadata().fixed_length;
     memcpy(rows->mutable_data(1), col.data(1), num_rows * row_size);
   } else if (rows->metadata().is_fixed_length) {
@@ -462,7 +474,7 @@ void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray*
         }
         break;
       default:
-        ARROW_DCHECK(false);
+        DCHECK(false);
     }
   } else {
     const uint32_t* row_offsets = rows->offsets();
@@ -493,7 +505,7 @@ void KeyEncoder::EncoderInteger::Encode(uint32_t offset_within_row, KeyRowArray*
         }
         break;
       default:
-        ARROW_DCHECK(false);
+        DCHECK(false);
     }
   }
 }
@@ -512,7 +524,7 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
   // When we have a single fixed length column we can just do memcpy
   if (rows.metadata().is_fixed_length &&
       col_prep.metadata().fixed_length == rows.metadata().fixed_length) {
-    ARROW_DCHECK(offset_within_row == 0);
+    DCHECK_EQ(offset_within_row, 0);
     uint32_t row_size = rows.metadata().fixed_length;
     memcpy(col_prep.mutable_data(1), rows.data(1) + start_row * row_size,
            num_rows * row_size);
@@ -546,7 +558,7 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
         }
         break;
       default:
-        ARROW_DCHECK(false);
+        DCHECK(false);
     }
   } else {
     const uint32_t* row_offsets = rows.offsets() + start_row;
@@ -578,7 +590,7 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
         }
         break;
       default:
-        ARROW_DCHECK(false);
+        DCHECK(false);
     }
   }
 
@@ -625,9 +637,9 @@ void KeyEncoder::EncoderBinary::Encode(uint32_t offset_within_row, KeyRowArray*
 #endif
   }
 
-  ARROW_DCHECK(temp->metadata().is_fixed_length);
-  ARROW_DCHECK(temp->length() * temp->metadata().fixed_length >=
-               col.length() * static_cast<int64_t>(sizeof(uint16_t)));
+  DCHECK(temp->metadata().is_fixed_length);
+  DCHECK(temp->length() * temp->metadata().fixed_length >=
+         col.length() * static_cast<int64_t>(sizeof(uint16_t)));
 
   KeyColumnArray temp16bit(KeyColumnMetadata(true, sizeof(uint16_t)), col.length(),
                            nullptr, temp->mutable_data(1), nullptr);
@@ -677,8 +689,8 @@ void KeyEncoder::EncoderBinary::EncodeImp(uint32_t offset_within_row, KeyRowArra
   EncodeDecodeHelper<is_row_fixed_length, true>(
       0, static_cast<uint32_t>(col.length()), offset_within_row, rows, rows, &col,
       nullptr, [](uint8_t* dst, const uint8_t* src, int64_t length) {
-        uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
-        const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+        auto dst64 = reinterpret_cast<uint64_t*>(dst);
+        auto src64 = reinterpret_cast<const uint64_t*>(src);
         uint32_t istripe;
         for (istripe = 0; istripe < length / 8; ++istripe) {
           dst64[istripe] = util::SafeLoad(src64 + istripe);
@@ -699,8 +711,8 @@ void KeyEncoder::EncoderBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
       start_row, num_rows, offset_within_row, &rows, nullptr, col, col,
       [](uint8_t* dst, const uint8_t* src, int64_t length) {
         for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
-          uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
-          const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+          auto dst64 = reinterpret_cast<uint64_t*>(dst);
+          auto src64 = reinterpret_cast<const uint64_t*>(src);
           util::SafeStore(dst64 + istripe, src64[istripe]);
         }
       });
@@ -709,8 +721,8 @@ void KeyEncoder::EncoderBinary::DecodeImp(uint32_t start_row, uint32_t num_rows,
 void KeyEncoder::EncoderBinary::ColumnMemsetNulls(
     uint32_t offset_within_row, KeyRowArray* rows, const KeyColumnArray& col,
     KeyEncoderContext* ctx, KeyColumnArray* temp_vector_16bit, uint8_t byte_value) {
-  typedef void (*ColumnMemsetNullsImp_t)(uint32_t, KeyRowArray*, const KeyColumnArray&,
-                                         KeyEncoderContext*, KeyColumnArray*, uint8_t);
+  using ColumnMemsetNullsImp_t = void (*)(uint32_t, KeyRowArray*, const KeyColumnArray&,
+                                          KeyEncoderContext*, KeyColumnArray*, uint8_t);
   static const ColumnMemsetNullsImp_t ColumnMemsetNullsImp_fn[] = {
       ColumnMemsetNullsImp<false, 1>,  ColumnMemsetNullsImp<false, 2>,
       ColumnMemsetNullsImp<false, 4>,  ColumnMemsetNullsImp<false, 8>,
@@ -735,18 +747,19 @@ void KeyEncoder::EncoderBinary::ColumnMemsetNullsImp(
     return;
   }
 
-  uint32_t num_rows = static_cast<uint32_t>(col.length());
+  const auto num_rows = static_cast<uint32_t>(col.length());
 
   // Temp vector needs space for the required number of rows
-  ARROW_DCHECK(temp_vector_16bit->length() >= num_rows);
-  ARROW_DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
-               temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
-  uint16_t* temp_vector = reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1));
+  DCHECK(temp_vector_16bit->length() >= num_rows);
+  DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+  auto temp_vector = reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1));
 
   // Bit vector to index vector of null positions
   int num_selected;
   util::BitUtil::bits_to_indexes(0, ctx->hardware_flags, static_cast<int>(col.length()),
-                                 col.data(0), &num_selected, temp_vector);
+                                 col.data(0), &num_selected, temp_vector,
+                                 col.bit_offset(0));
 
   for (int i = 0; i < num_selected; ++i) {
     uint32_t row_id = temp_vector[i];
@@ -793,7 +806,7 @@ void KeyEncoder::EncoderBinaryPair::Encode(uint32_t offset_within_row, KeyRowArr
                                            const KeyColumnArray& col2,
                                            KeyEncoderContext* ctx, KeyColumnArray* temp1,
                                            KeyColumnArray* temp2) {
-  ARROW_DCHECK(CanProcessPair(col1.metadata(), col2.metadata()));
+  DCHECK(CanProcessPair(col1.metadata(), col2.metadata()));
 
   KeyColumnArray col_prep[2];
   if (EncoderInteger::UsesTransform(col1)) {
@@ -818,7 +831,7 @@ void KeyEncoder::EncoderBinaryPair::Encode(uint32_t offset_within_row, KeyRowArr
 
   bool is_row_fixed_length = rows->metadata().is_fixed_length;
 
-  uint32_t num_rows = static_cast<uint32_t>(col1.length());
+  const auto num_rows = static_cast<uint32_t>(col1.length());
   uint32_t num_processed = 0;
 #if defined(ARROW_HAVE_AVX2)
   if (ctx->has_avx2() && col_width1 == col_width2) {
@@ -862,7 +875,7 @@ void KeyEncoder::EncoderBinaryPair::EncodeImp(uint32_t num_rows_to_skip,
   const uint8_t* src_A = col1.data(1);
   const uint8_t* src_B = col2.data(1);
 
-  uint32_t num_rows = static_cast<uint32_t>(col1.length());
+  const auto num_rows = static_cast<uint32_t>(col1.length());
 
   uint32_t fixed_length = rows->metadata().fixed_length;
   const uint32_t* offsets;
@@ -901,7 +914,7 @@ void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows
                                            const KeyRowArray& rows, KeyColumnArray* col1,
                                            KeyColumnArray* col2, KeyEncoderContext* ctx,
                                            KeyColumnArray* temp1, KeyColumnArray* temp2) {
-  ARROW_DCHECK(CanProcessPair(col1->metadata(), col2->metadata()));
+  DCHECK(CanProcessPair(col1->metadata(), col2->metadata()));
 
   KeyColumnArray col_prep[2];
   if (EncoderInteger::UsesTransform(*col1)) {
@@ -933,8 +946,8 @@ void KeyEncoder::EncoderBinaryPair::Decode(uint32_t start_row, uint32_t num_rows
   }
 #endif
   if (num_processed < num_rows) {
-    typedef void (*DecodeImp_t)(uint32_t, uint32_t, uint32_t, uint32_t,
-                                const KeyRowArray&, KeyColumnArray*, KeyColumnArray*);
+    using DecodeImp_t = void (*)(uint32_t, uint32_t, uint32_t, uint32_t,
+                                 const KeyRowArray&, KeyColumnArray*, KeyColumnArray*);
     static const DecodeImp_t DecodeImp_fn[] = {
         DecodeImp<false, uint8_t, uint8_t>,   DecodeImp<false, uint16_t, uint8_t>,
         DecodeImp<false, uint32_t, uint8_t>,  DecodeImp<false, uint64_t, uint8_t>,
@@ -973,8 +986,8 @@ void KeyEncoder::EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip,
                                               const KeyRowArray& rows,
                                               KeyColumnArray* col1,
                                               KeyColumnArray* col2) {
-  ARROW_DCHECK(rows.length() >= start_row + num_rows);
-  ARROW_DCHECK(col1->length() == num_rows && col2->length() == num_rows);
+  DCHECK(rows.length() >= start_row + num_rows);
+  DCHECK(col1->length() == num_rows && col2->length() == num_rows);
 
   uint8_t* dst_A = col1->mutable_data(1);
   uint8_t* dst_B = col2->mutable_data(1);
@@ -1014,25 +1027,31 @@ void KeyEncoder::EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip,
 void KeyEncoder::EncoderOffsets::Encode(KeyRowArray* rows,
                                         const std::vector<KeyColumnArray>& varbinary_cols,
                                         KeyEncoderContext* ctx) {
-  ARROW_DCHECK(!varbinary_cols.empty());
+  DCHECK(!varbinary_cols.empty());
 
   // Rows and columns must all be varying-length
-  ARROW_DCHECK(!rows->metadata().is_fixed_length);
-  for (size_t col = 0; col < varbinary_cols.size(); ++col) {
-    ARROW_DCHECK(!varbinary_cols[col].metadata().is_fixed_length);
+  DCHECK(!rows->metadata().is_fixed_length);
+  for (const auto& col : varbinary_cols) {
+    DCHECK(!col.metadata().is_fixed_length);
   }
 
-  uint32_t num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+
+  uint32_t num_processed = 0;
+#if defined(ARROW_HAVE_AVX2)
+  // Whether any of the columns has non-zero starting bit offset for non-nulls bit vector
+  bool has_bit_offset = false;
 
   // The space in columns must be exactly equal to a space for offsets in rows
-  ARROW_DCHECK(rows->length() == num_rows);
-  for (size_t col = 0; col < varbinary_cols.size(); ++col) {
-    ARROW_DCHECK(varbinary_cols[col].length() == num_rows);
+  DCHECK(rows->length() == num_rows);
+  for (const auto& col : varbinary_cols) {
+    DCHECK(col.length() == num_rows);
+    if (col.bit_offset(0) != 0) {
+      has_bit_offset = true;
+    }
   }
 
-  uint32_t num_processed = 0;
-#if defined(ARROW_HAVE_AVX2)
-  if (ctx->has_avx2()) {
+  if (ctx->has_avx2() && !has_bit_offset) {
     // Create a temp vector sized based on the number of columns
     auto temp_buffer_holder = util::TempVectorHolder<uint32_t>(
         ctx->stack, static_cast<uint32_t>(varbinary_cols.size()) * 8);
@@ -1051,14 +1070,14 @@ void KeyEncoder::EncoderOffsets::Encode(KeyRowArray* rows,
 void KeyEncoder::EncoderOffsets::EncodeImp(
     uint32_t num_rows_already_processed, KeyRowArray* rows,
     const std::vector<KeyColumnArray>& varbinary_cols) {
-  ARROW_DCHECK(varbinary_cols.size() > 0);
+  DCHECK_GT(varbinary_cols.size(), 0);
 
   int row_alignment = rows->metadata().row_alignment;
   int string_alignment = rows->metadata().string_alignment;
 
   uint32_t* row_offsets = rows->mutable_offsets();
   uint8_t* row_values = rows->mutable_data(2);
-  uint32_t num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
+  const auto num_rows = static_cast<uint32_t>(varbinary_cols[0].length());
 
   if (num_rows_already_processed == 0) {
     row_offsets[0] = 0;
@@ -1079,8 +1098,10 @@ void KeyEncoder::EncoderOffsets::EncodeImp(
       const uint32_t* col_offsets = varbinary_cols[col].offsets();
       uint32_t col_length = col_offsets[i + 1] - col_offsets[i];
 
+      const int bit_offset = varbinary_cols[col].bit_offset(0);
+
       const uint8_t* non_nulls = varbinary_cols[col].data(0);
-      if (non_nulls && BitUtil::GetBit(non_nulls, i) == 0) {
+      if (non_nulls && BitUtil::GetBit(non_nulls, bit_offset + i) == 0) {
         col_length = 0;
       }
 
@@ -1102,19 +1123,16 @@ void KeyEncoder::EncoderOffsets::Decode(
     uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
     std::vector<KeyColumnArray>* varbinary_cols,
     const std::vector<uint32_t>& varbinary_cols_base_offset, KeyEncoderContext* ctx) {
-  ARROW_DCHECK(!varbinary_cols->empty());
-  ARROW_DCHECK(varbinary_cols->size() == varbinary_cols_base_offset.size());
+  DCHECK(!varbinary_cols->empty());
+  DCHECK(varbinary_cols->size() == varbinary_cols_base_offset.size());
 
-  // Rows and columns must all be varying-length
-  ARROW_DCHECK(!rows.metadata().is_fixed_length);
-  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
-    ARROW_DCHECK(!(*varbinary_cols)[col].metadata().is_fixed_length);
-  }
-
-  // The space in columns must be exactly equal to a subset of rows selected
-  ARROW_DCHECK(rows.length() >= start_row + num_rows);
-  for (size_t col = 0; col < varbinary_cols->size(); ++col) {
-    ARROW_DCHECK((*varbinary_cols)[col].length() == num_rows);
+  DCHECK(!rows.metadata().is_fixed_length);
+  DCHECK(rows.length() >= start_row + num_rows);
+  for (const auto& col : *varbinary_cols) {
+    // Rows and columns must all be varying-length
+    DCHECK(!col.metadata().is_fixed_length);
+    // The space in columns must be exactly equal to a subset of rows selected
+    DCHECK(col.length() == num_rows);
   }
 
   // Offsets of varbinary columns data within each encoded row are stored
@@ -1197,8 +1215,8 @@ void KeyEncoder::EncoderVarBinary::EncodeImp(uint32_t varbinary_col_id, KeyRowAr
   EncodeDecodeHelper<first_varbinary_col, true>(
       0, static_cast<uint32_t>(col.length()), varbinary_col_id, rows, rows, &col, nullptr,
       [](uint8_t* dst, const uint8_t* src, int64_t length) {
-        uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
-        const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+        auto dst64 = reinterpret_cast<uint64_t*>(dst);
+        auto src64 = reinterpret_cast<const uint64_t*>(src);
         uint32_t istripe;
         for (istripe = 0; istripe < length / 8; ++istripe) {
           dst64[istripe] = util::SafeLoad(src64 + istripe);
@@ -1220,8 +1238,8 @@ void KeyEncoder::EncoderVarBinary::DecodeImp(uint32_t start_row, uint32_t num_ro
       start_row, num_rows, varbinary_col_id, &rows, nullptr, col, col,
       [](uint8_t* dst, const uint8_t* src, int64_t length) {
         for (uint32_t istripe = 0; istripe < (length + 7) / 8; ++istripe) {
-          uint64_t* dst64 = reinterpret_cast<uint64_t*>(dst);
-          const uint64_t* src64 = reinterpret_cast<const uint64_t*>(src);
+          auto dst64 = reinterpret_cast<uint64_t*>(dst);
+          auto src64 = reinterpret_cast<const uint64_t*>(src);
           util::SafeStore(dst64 + istripe, src64[istripe]);
         }
       });
@@ -1231,19 +1249,19 @@ void KeyEncoder::EncoderNulls::Encode(KeyRowArray* rows,
                                       const std::vector<KeyColumnArray>& cols,
                                       KeyEncoderContext* ctx,
                                       KeyColumnArray* temp_vector_16bit) {
-  ARROW_DCHECK(cols.size() > 0);
-  uint32_t num_rows = static_cast<uint32_t>(rows->length());
+  DCHECK_GT(cols.size(), 0);
+  const auto num_rows = static_cast<uint32_t>(rows->length());
 
   // All input columns should have the same number of rows.
   // They may or may not have non-nulls bit-vectors allocated.
-  for (size_t col = 0; col < cols.size(); ++col) {
-    ARROW_DCHECK(cols[col].length() == num_rows);
+  for (const auto& col : cols) {
+    DCHECK(col.length() == num_rows);
   }
 
   // Temp vector needs space for the required number of rows
-  ARROW_DCHECK(temp_vector_16bit->length() >= num_rows);
-  ARROW_DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
-               temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
+  DCHECK(temp_vector_16bit->length() >= num_rows);
+  DCHECK(temp_vector_16bit->metadata().is_fixed_length &&
+         temp_vector_16bit->metadata().fixed_length == sizeof(uint16_t));
 
   uint8_t* null_masks = rows->null_masks();
   uint32_t null_masks_bytes_per_row = rows->metadata().null_masks_bytes_per_row;
@@ -1253,10 +1271,12 @@ void KeyEncoder::EncoderNulls::Encode(KeyRowArray* rows,
     if (!non_nulls) {
       continue;
     }
+    int bit_offset = cols[col].bit_offset(0);
+    DCHECK_LT(bit_offset, 8);
     int num_selected;
     util::BitUtil::bits_to_indexes(
         0, ctx->hardware_flags, num_rows, non_nulls, &num_selected,
-        reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)));
+        reinterpret_cast<uint16_t*>(temp_vector_16bit->mutable_data(1)), bit_offset);
     for (int i = 0; i < num_selected; ++i) {
       uint16_t row_id = reinterpret_cast<const uint16_t*>(temp_vector_16bit->data(1))[i];
       int64_t null_masks_bit_id = row_id * null_masks_bytes_per_row * 8 + col;
@@ -1270,23 +1290,29 @@ void KeyEncoder::EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows,
                                       std::vector<KeyColumnArray>* cols) {
   // Every output column needs to have a space for exactly the required number
   // of rows. It also needs to have non-nulls bit-vector allocated and mutable.
-  ARROW_DCHECK(cols->size() > 0);
-  for (size_t col = 0; col < cols->size(); ++col) {
-    ARROW_DCHECK((*cols)[col].length() == num_rows);
-    ARROW_DCHECK((*cols)[col].mutable_data(0));
+  DCHECK_GT(cols->size(), 0);
+  for (auto& col : *cols) {
+    DCHECK(col.length() == num_rows);
+    DCHECK(col.mutable_data(0));
   }
 
   const uint8_t* null_masks = rows.null_masks();
   uint32_t null_masks_bytes_per_row = rows.metadata().null_masks_bytes_per_row;
   for (size_t col = 0; col < cols->size(); ++col) {
     uint8_t* non_nulls = (*cols)[col].mutable_data(0);
-    memset(non_nulls, 0xff, BitUtil::BytesForBits(num_rows));
+    const int bit_offset = (*cols)[col].bit_offset(0);
+    DCHECK_LT(bit_offset, 8);
+    non_nulls[0] |= 0xff << (bit_offset);
+    if (bit_offset + num_rows > 8) {
+      int bits_in_first_byte = 8 - bit_offset;
+      memset(non_nulls + 1, 0xff, BitUtil::BytesForBits(num_rows - bits_in_first_byte));
+    }
     for (uint32_t row = 0; row < num_rows; ++row) {
       uint32_t null_masks_bit_id =
           (start_row + row) * null_masks_bytes_per_row * 8 + static_cast<uint32_t>(col);
       bool is_set = BitUtil::GetBit(null_masks, null_masks_bit_id);
       if (is_set) {
-        BitUtil::ClearBit(non_nulls, row);
+        BitUtil::ClearBit(non_nulls, bit_offset + row);
       }
     }
   }
@@ -1294,8 +1320,8 @@ void KeyEncoder::EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows,
 
 uint32_t KeyEncoder::KeyRowMetadata::num_varbinary_cols() const {
   uint32_t result = 0;
-  for (size_t i = 0; i < column_metadatas.size(); ++i) {
-    if (!column_metadatas[i].is_fixed_length) {
+  for (auto column_metadata : column_metadatas) {
+    if (!column_metadata.is_fixed_length) {
       ++result;
     }
   }
@@ -1330,7 +1356,7 @@ void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector(
     column_metadatas[i] = cols[i];
   }
 
-  uint32_t num_cols = static_cast<uint32_t>(cols.size());
+  const auto num_cols = static_cast<uint32_t>(cols.size());
 
   // Sort columns.
   // Columns are sorted based on the size in bytes of their fixed-length part.
@@ -1389,8 +1415,8 @@ void KeyEncoder::KeyRowMetadata::FromColumnMetadataVector(
       if (num_varbinary_cols == 0) {
         varbinary_end_array_offset = offset_within_row;
       }
-      ARROW_DCHECK(column_offsets[i] - varbinary_end_array_offset ==
-                   num_varbinary_cols * sizeof(uint32_t));
+      DCHECK(column_offsets[i] - varbinary_end_array_offset ==
+             num_varbinary_cols * sizeof(uint32_t));
       ++num_varbinary_cols;
       offset_within_row += sizeof(uint32_t);
     } else {
@@ -1433,8 +1459,8 @@ void KeyEncoder::Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderCont
 
 void KeyEncoder::PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
                                         const std::vector<KeyColumnArray>& cols_in) {
-  uint32_t num_cols = static_cast<uint32_t>(cols_in.size());
-  ARROW_DCHECK(batch_all_cols_.size() == num_cols);
+  const auto num_cols = static_cast<uint32_t>(cols_in.size());
+  DCHECK(batch_all_cols_.size() == num_cols);
 
   uint32_t num_varbinary_visited = 0;
   for (uint32_t i = 0; i < num_cols; ++i) {
@@ -1442,7 +1468,7 @@ void KeyEncoder::PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
     KeyColumnArray col_window(col, start_row, num_rows);
     batch_all_cols_[i] = col_window;
     if (!col.metadata().is_fixed_length) {
-      ARROW_DCHECK(num_varbinary_visited < batch_varbinary_cols_.size());
+      DCHECK(num_varbinary_visited < batch_varbinary_cols_.size());
       // If start row is zero, then base offset of varbinary column is also zero.
       if (start_row == 0) {
         batch_varbinary_cols_base_offsets_[num_varbinary_visited] = 0;
@@ -1462,10 +1488,9 @@ Status KeyEncoder::PrepareOutputForEncode(int64_t start_row, int64_t num_rows,
 
   int64_t fixed_part = row_metadata_.fixed_length * num_rows;
   int64_t var_part = 0;
-  for (size_t i = 0; i < all_cols.size(); ++i) {
-    const KeyColumnArray& col = all_cols[i];
+  for (const auto& col : all_cols) {
     if (!col.metadata().is_fixed_length) {
-      ARROW_DCHECK(col.length() >= start_row + num_rows);
+      DCHECK(col.length() >= start_row + num_rows);
       const uint32_t* offsets = col.offsets();
       var_part += offsets[start_row + num_rows] - offsets[start_row];
       // Include maximum padding that can be added to align the start of varbinary fields.
@@ -1509,16 +1534,16 @@ void KeyEncoder::Encode(int64_t start_row, int64_t num_rows, KeyRowArray* rows,
     // - offsets for individual varbinary fields within each row
     EncoderOffsets::Encode(rows, batch_varbinary_cols_, ctx_);
 
-    uint32_t num_varbinary_cols = static_cast<uint32_t>(batch_varbinary_cols_.size());
-    for (uint32_t i = 0; i < num_varbinary_cols; ++i) {
+    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) {
       // Memcpy varbinary fields into precomputed in the previous step
       // positions in the output row buffer.
-      EncoderVarBinary::Encode(i, rows, batch_varbinary_cols_[i], ctx_);
+      EncoderVarBinary::Encode(static_cast<uint32_t>(i), rows, batch_varbinary_cols_[i],
+                               ctx_);
     }
   }
 
   // Process fixed length columns
-  uint32_t num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size());
   for (uint32_t i = 0; i < num_cols;) {
     if (!batch_all_cols_[i].metadata().is_fixed_length) {
       i += 1;
@@ -1571,7 +1596,7 @@ void KeyEncoder::DecodeFixedLengthBuffers(int64_t start_row_input,
   }
 
   // Process fixed length columns
-  uint32_t num_cols = static_cast<uint32_t>(batch_all_cols_.size());
+  const auto num_cols = static_cast<uint32_t>(batch_all_cols_.size());
   for (uint32_t i = 0; i < num_cols;) {
     if (!batch_all_cols_[i].metadata().is_fixed_length) {
       i += 1;
@@ -1610,13 +1635,12 @@ void KeyEncoder::DecodeVaryingLengthBuffers(int64_t start_row_input,
 
   bool is_row_fixed_length = row_metadata_.is_fixed_length;
   if (!is_row_fixed_length) {
-    uint32_t num_varbinary_cols = static_cast<uint32_t>(batch_varbinary_cols_.size());
-    for (uint32_t i = 0; i < num_varbinary_cols; ++i) {
+    for (size_t i = 0; i < batch_varbinary_cols_.size(); ++i) {
       // Memcpy varbinary fields into precomputed in the previous step
       // positions in the output row buffer.
       EncoderVarBinary::Decode(static_cast<uint32_t>(start_row_input),
-                               static_cast<uint32_t>(num_rows), i, rows,
-                               &batch_varbinary_cols_[i], ctx_);
+                               static_cast<uint32_t>(num_rows), static_cast<uint32_t>(i),
+                               rows, &batch_varbinary_cols_[i], ctx_);
     }
   }
 }
diff --git a/cpp/src/arrow/compute/exec/key_encode.h b/cpp/src/arrow/compute/exec/key_encode.h
index 3f5ef365a08..e5397b9dfd4 100644
--- a/cpp/src/arrow/compute/exec/key_encode.h
+++ b/cpp/src/arrow/compute/exec/key_encode.h
@@ -247,11 +247,12 @@ class KeyEncoder {
                    const KeyColumnArray& right, int buffer_id_to_replace);
     /// Create for reading
     KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
-                   const uint8_t* buffer0, const uint8_t* buffer1,
-                   const uint8_t* buffer2);
+                   const uint8_t* buffer0, const uint8_t* buffer1, const uint8_t* buffer2,
+                   int bit_offset0 = 0, int bit_offset1 = 0);
     /// Create for writing
     KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, uint8_t* buffer0,
-                   uint8_t* buffer1, uint8_t* buffer2);
+                   uint8_t* buffer1, uint8_t* buffer2, int bit_offset0 = 0,
+                   int bit_offset1 = 0);
     /// Create as a window view of original description that is offset
     /// by a given number of rows.
     /// The number of rows used in offset must be divisible by 8
@@ -269,6 +270,10 @@ class KeyEncoder {
     const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
     const KeyColumnMetadata& metadata() const { return metadata_; }
     int64_t length() const { return length_; }
+    int bit_offset(int i) const {
+      ARROW_DCHECK(i >= 0 && i < max_buffers_);
+      return bit_offset_[i];
+    }
 
    private:
     static constexpr int max_buffers_ = 3;
@@ -276,6 +281,9 @@ class KeyEncoder {
     uint8_t* mutable_buffers_[max_buffers_];
     KeyColumnMetadata metadata_;
     int64_t length_;
+    // Starting bit offset within the first byte (between 0 and 7)
+    // to be used when accessing buffers that store bit vectors.
+    int bit_offset_[max_buffers_ - 1];
   };
 
   void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
diff --git a/cpp/src/arrow/compute/exec/util.cc b/cpp/src/arrow/compute/exec/util.cc
index 88303348645..a44676c2f0d 100644
--- a/cpp/src/arrow/compute/exec/util.cc
+++ b/cpp/src/arrow/compute/exec/util.cc
@@ -99,7 +99,26 @@ void BitUtil::bits_to_indexes_internal(int64_t hardware_flags, const int num_bit
 
 void BitUtil::bits_to_indexes(int bit_to_search, int64_t hardware_flags,
                               const int num_bits, const uint8_t* bits, int* num_indexes,
-                              uint16_t* indexes) {
+                              uint16_t* indexes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    int num_indexes_head = 0;
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_to_indexes(bit_to_search, hardware_flags, bits_in_first_byte,
+                    reinterpret_cast<const uint8_t*>(&bits_head), &num_indexes_head,
+                    indexes);
+    int num_indexes_tail = 0;
+    if (num_bits > bits_in_first_byte) {
+      bits_to_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte,
+                      bits + 1, &num_indexes_tail, indexes + num_indexes_head);
+    }
+    *num_indexes = num_indexes_head + num_indexes_tail;
+    return;
+  }
+
   if (bit_to_search == 0) {
     bits_to_indexes_internal<0, false>(hardware_flags, num_bits, bits, nullptr,
                                        num_indexes, indexes);
@@ -113,7 +132,27 @@ void BitUtil::bits_to_indexes(int bit_to_search, int64_t hardware_flags,
 void BitUtil::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
                                   const int num_bits, const uint8_t* bits,
                                   const uint16_t* input_indexes, int* num_indexes,
-                                  uint16_t* indexes) {
+                                  uint16_t* indexes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    int num_indexes_head = 0;
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_filter_indexes(bit_to_search, hardware_flags, bits_in_first_byte,
+                        reinterpret_cast<const uint8_t*>(&bits_head), input_indexes,
+                        &num_indexes_head, indexes);
+    int num_indexes_tail = 0;
+    if (num_bits > bits_in_first_byte) {
+      bits_filter_indexes(bit_to_search, hardware_flags, num_bits - bits_in_first_byte,
+                          bits + 1, input_indexes + bits_in_first_byte, &num_indexes_tail,
+                          indexes + num_indexes_head);
+    }
+    *num_indexes = num_indexes_head + num_indexes_tail;
+    return;
+  }
+
   if (bit_to_search == 0) {
     bits_to_indexes_internal<0, true>(hardware_flags, num_bits, bits, input_indexes,
                                       num_indexes, indexes);
@@ -126,46 +165,32 @@ void BitUtil::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
 
 void BitUtil::bits_split_indexes(int64_t hardware_flags, const int num_bits,
                                  const uint8_t* bits, int* num_indexes_bit0,
-                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1) {
-  bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0);
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset) {
+  bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0,
+                  bit_offset);
   int num_indexes_bit1;
-  bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1);
+  bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1,
+                  bit_offset);
 }
 
-void BitUtil::bits_to_bytes_internal(const int num_bits, const uint8_t* bits,
-                                     uint8_t* bytes) {
-  constexpr int unroll = 8;
-  // Processing 8 bits at a time
-  for (int i = 0; i < (num_bits + unroll - 1) / unroll; ++i) {
-    uint8_t bits_next = bits[i];
-    // Clear the lowest bit and then make 8 copies of remaining 7 bits, each 7 bits apart
-    // from the previous.
-    uint64_t unpacked = static_cast<uint64_t>(bits_next & 0xfe) *
-                        ((1ULL << 7) | (1ULL << 14) | (1ULL << 21) | (1ULL << 28) |
-                         (1ULL << 35) | (1ULL << 42) | (1ULL << 49));
-    unpacked |= (bits_next & 1);
-    unpacked &= 0x0101010101010101ULL;
-    unpacked *= 255;
-    util::SafeStore(&reinterpret_cast<uint64_t*>(bytes)[i], unpacked);
-  }
-}
-
-void BitUtil::bytes_to_bits_internal(const int num_bits, const uint8_t* bytes,
-                                     uint8_t* bits) {
-  constexpr int unroll = 8;
-  // Process 8 bits at a time
-  for (int i = 0; i < (num_bits + unroll - 1) / unroll; ++i) {
-    uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
-    bytes_next &= 0x0101010101010101ULL;
-    bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
-    bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
-    bytes_next |= (bytes_next >> 28);  // All 8 output bits in the lowest byte
-    bits[i] = static_cast<uint8_t>(bytes_next & 0xff);
+void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    uint64_t bits_head =
+        util::SafeLoad(reinterpret_cast<const uint64_t*>(bits)) >> bit_offset;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bits_to_bytes(hardware_flags, bits_in_first_byte,
+                  reinterpret_cast<const uint8_t*>(&bits_head), bytes);
+    if (num_bits > bits_in_first_byte) {
+      bits_to_bytes(hardware_flags, num_bits - bits_in_first_byte, bits + 1,
+                    bytes + bits_in_first_byte);
+    }
+    return;
   }
-}
 
-void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
-                            const uint8_t* bits, uint8_t* bytes) {
   int num_processed = 0;
 #if defined(ARROW_HAVE_AVX2)
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
@@ -191,7 +216,24 @@ void BitUtil::bits_to_bytes(int64_t hardware_flags, const int num_bits,
 }
 
 void BitUtil::bytes_to_bits(int64_t hardware_flags, const int num_bits,
-                            const uint8_t* bytes, uint8_t* bits) {
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset) {
+  bits += bit_offset / 8;
+  bit_offset %= 8;
+  if (bit_offset != 0) {
+    uint64_t bits_head;
+    int bits_in_first_byte = std::min(num_bits, 8 - bit_offset);
+    bytes_to_bits(hardware_flags, bits_in_first_byte, bytes,
+                  reinterpret_cast<uint8_t*>(&bits_head));
+    uint8_t mask = (1 << bit_offset) - 1;
+    *bits = static_cast<uint8_t>((*bits & mask) | (bits_head << bit_offset));
+
+    if (num_bits > bits_in_first_byte) {
+      bytes_to_bits(hardware_flags, num_bits - bits_in_first_byte,
+                    bytes + bits_in_first_byte, bits + 1);
+    }
+    return;
+  }
+
   int num_processed = 0;
 #if defined(ARROW_HAVE_AVX2)
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h
index d345bd3af0b..471cc332220 100644
--- a/cpp/src/arrow/compute/exec/util.h
+++ b/cpp/src/arrow/compute/exec/util.h
@@ -112,24 +112,26 @@ class BitUtil {
  public:
   static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
                               const int num_bits, const uint8_t* bits, int* num_indexes,
-                              uint16_t* indexes);
+                              uint16_t* indexes, int bit_offset = 0);
 
   static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
                                   const int num_bits, const uint8_t* bits,
                                   const uint16_t* input_indexes, int* num_indexes,
-                                  uint16_t* indexes);
+                                  uint16_t* indexes, int bit_offset = 0);
 
   // Input and output indexes may be pointing to the same data (in-place filtering).
   static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
                                  const uint8_t* bits, int* num_indexes_bit0,
-                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1);
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset = 0);
 
   // Bit 1 is replaced with byte 0xFF.
   static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
-                            const uint8_t* bits, uint8_t* bytes);
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
+
   // Return highest bit of each byte.
   static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
-                            const uint8_t* bytes, uint8_t* bits);
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
 
   static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
                                  uint32_t num_bytes);
@@ -144,10 +146,6 @@ class BitUtil {
   static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
                                        const uint8_t* bits, const uint16_t* input_indexes,
                                        int* num_indexes, uint16_t* indexes);
-  static void bits_to_bytes_internal(const int num_bits, const uint8_t* bits,
-                                     uint8_t* bytes);
-  static void bytes_to_bits_internal(const int num_bits, const uint8_t* bytes,
-                                     uint8_t* bits);
 
 #if defined(ARROW_HAVE_AVX2)
   static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index e282035d82a..ed40a6b1b8c 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -564,8 +564,13 @@ struct GrouperFastImpl : Grouper {
         varlen = batch[icol].array()->buffers[2]->data();
       }
 
-      cols_[icol] = arrow::compute::KeyEncoder::KeyColumnArray(
-          col_metadata_[icol], num_rows, non_nulls, fixedlen, varlen);
+      int64_t offset = batch[icol].array()->offset;
+
+      auto col_base = arrow::compute::KeyEncoder::KeyColumnArray(
+          col_metadata_[icol], offset + num_rows, non_nulls, fixedlen, varlen);
+
+      cols_[icol] =
+          arrow::compute::KeyEncoder::KeyColumnArray(col_base, offset, num_rows);
     }
 
     // Split into smaller mini-batches
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index a8f8c64663d..8c8a4b23932 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -844,5 +844,47 @@ TEST(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
                     aggregated_and_grouped,
                     /*verbose=*/true);
 }
+
+ExecContext* small_chunksize_context() {
+  static ExecContext ctx;
+  ctx.set_exec_chunksize(2);
+  return &ctx;
+}
+
+TEST(GroupBy, SmallChunkSizeSumOnly) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", int64())}), R"([
+    [1.0,   1],
+    [null,  1],
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.25,  1],
+    [0.125, 2],
+    [-0.25, 2],
+    [0.75,  null],
+    [null,  3]
+  ])");
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy({batch->GetColumnByName("argument")},
+                                         {batch->GetColumnByName("key")},
+                                         {
+                                             {"hash_sum", nullptr},
+                                         },
+                                         small_chunksize_context()));
+  AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("hash_sum", float64()),
+                                      field("key_0", int64()),
+                                  }),
+                                  R"([
+    [4.25,   1],
+    [-0.125, 2],
+    [null,   3],
+    [4.75,   null]
+  ])"),
+                    aggregated_and_grouped,
+                    /*verbose=*/true);
+}
+
 }  // namespace compute
 }  // namespace arrow

From b172284f59e1bf75040e18ae1d1dabc95c001016 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 9 Jul 2021 15:38:49 -0400
Subject: [PATCH 535/719] ARROW-13238: [C++][Compute][Dataset] Use an ExecPlan
 for dataset scans

Replaces the body of AsyncScanner::ScanBatchesAsync with usage of an ExecPlan

Closes #10664 from bkietz/13238-Substitute-ExecPlan-impl-

Lead-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Co-authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec.cc              |   5 +-
 cpp/src/arrow/compute/exec.h               |   6 +
 cpp/src/arrow/compute/exec/exec_plan.cc    | 242 ++++++-----
 cpp/src/arrow/compute/exec/exec_plan.h     |  41 +-
 cpp/src/arrow/compute/exec/plan_test.cc    |  77 +++-
 cpp/src/arrow/compute/exec/test_util.cc    |  11 +-
 cpp/src/arrow/compute/exec_test.cc         |   2 +-
 cpp/src/arrow/dataset/file_parquet_test.cc |  10 +-
 cpp/src/arrow/dataset/scanner.cc           | 467 +++++++++++----------
 cpp/src/arrow/dataset/scanner_test.cc      | 165 +++-----
 cpp/src/arrow/testing/future_util.h        |  34 +-
 cpp/src/arrow/testing/matchers.h           | 137 ++++--
 cpp/src/arrow/util/async_generator.h       |  17 +-
 cpp/src/arrow/util/future.cc               |   4 +-
 cpp/src/arrow/util/future.h                | 279 +++++-------
 cpp/src/arrow/util/future_test.cc          |  76 +++-
 cpp/src/arrow/util/thread_pool.h           |  24 +-
 dev/archery/archery/lang/cpp.py            |   5 +-
 18 files changed, 869 insertions(+), 733 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 78f3d753711..8a469e3fe12 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -987,8 +987,9 @@ std::unique_ptr<KernelExecutor> KernelExecutor::MakeScalarAggregate() {
 
 }  // namespace detail
 
-ExecContext::ExecContext(MemoryPool* pool, FunctionRegistry* func_registry)
-    : pool_(pool) {
+ExecContext::ExecContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
+                         FunctionRegistry* func_registry)
+    : pool_(pool), executor_(executor) {
   this->func_registry_ = func_registry == nullptr ? GetFunctionRegistry() : func_registry;
 }
 
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index e7015814d2a..77d04b86ceb 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -34,6 +34,7 @@
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -60,6 +61,7 @@ class ARROW_EXPORT ExecContext {
  public:
   // If no function registry passed, the default is used.
   explicit ExecContext(MemoryPool* pool = default_memory_pool(),
+                       ::arrow::internal::Executor* executor = NULLPTR,
                        FunctionRegistry* func_registry = NULLPTR);
 
   /// \brief The MemoryPool used for allocations, default is
@@ -68,6 +70,9 @@ class ARROW_EXPORT ExecContext {
 
   ::arrow::internal::CpuInfo* cpu_info() const;
 
+  /// \brief An Executor which may be used to parallelize execution.
+  ::arrow::internal::Executor* executor() const { return executor_; }
+
   /// \brief The FunctionRegistry for looking up functions by name and
   /// selecting kernels for execution. Defaults to the library-global function
   /// registry provided by GetFunctionRegistry.
@@ -114,6 +119,7 @@ class ARROW_EXPORT ExecContext {
 
  private:
   MemoryPool* pool_;
+  ::arrow::internal::Executor* executor_;
   FunctionRegistry* func_registry_;
   int64_t exec_chunksize_ = std::numeric_limits<int64_t>::max();
   bool preallocate_contiguous_ = true;
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index 2dcbfb24724..d0d50af1ac7 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -39,11 +39,13 @@ namespace compute {
 namespace {
 
 struct ExecPlanImpl : public ExecPlan {
-  ExecPlanImpl() = default;
+  explicit ExecPlanImpl(ExecContext* exec_context) : ExecPlan(exec_context) {}
 
   ~ExecPlanImpl() override {
-    if (started_ && !stopped_) {
+    if (started_ && !finished_.is_finished()) {
+      ARROW_LOG(WARNING) << "Plan was destroyed before finishing";
       StopProducing();
+      finished().Wait();
     }
   }
 
@@ -77,25 +79,40 @@ struct ExecPlanImpl : public ExecPlan {
     // producers precede consumers
     sorted_nodes_ = TopoSort();
 
-    for (size_t i = 0, rev_i = sorted_nodes_.size() - 1; i < sorted_nodes_.size();
-         ++i, --rev_i) {
-      auto st = sorted_nodes_[rev_i]->StartProducing();
-      if (st.ok()) continue;
+    std::vector<Future<>> futures;
 
-      // Stop nodes that successfully started, in reverse order
-      for (; rev_i < sorted_nodes_.size(); ++rev_i) {
-        sorted_nodes_[rev_i]->StopProducing();
+    Status st = Status::OK();
+
+    using rev_it = std::reverse_iterator<NodeVector::iterator>;
+    for (rev_it it(sorted_nodes_.end()), end(sorted_nodes_.begin()); it != end; ++it) {
+      auto node = *it;
+
+      st = node->StartProducing();
+      if (!st.ok()) {
+        // Stop nodes that successfully started, in reverse order
+        stopped_ = true;
+        StopProducingImpl(it.base(), sorted_nodes_.end());
+        break;
       }
-      return st;
+
+      futures.push_back(node->finished());
     }
-    return Status::OK();
+
+    finished_ = AllComplete(std::move(futures));
+    return st;
   }
 
   void StopProducing() {
     DCHECK(started_) << "stopped an ExecPlan which never started";
     stopped_ = true;
 
-    for (const auto& node : sorted_nodes_) {
+    StopProducingImpl(sorted_nodes_.begin(), sorted_nodes_.end());
+  }
+
+  template <typename It>
+  void StopProducingImpl(It begin, It end) {
+    for (auto it = begin; it != end; ++it) {
+      auto node = *it;
       node->StopProducing();
     }
   }
@@ -133,10 +150,11 @@ struct ExecPlanImpl : public ExecPlan {
     return std::move(Impl{nodes_}.sorted);
   }
 
+  Future<> finished_ = Future<>::MakeFinished();
   bool started_ = false, stopped_ = false;
   std::vector<std::unique_ptr<ExecNode>> nodes_;
-  NodeVector sorted_nodes_;
   NodeVector sources_, sinks_;
+  NodeVector sorted_nodes_;
 };
 
 ExecPlanImpl* ToDerived(ExecPlan* ptr) { return checked_cast<ExecPlanImpl*>(ptr); }
@@ -155,8 +173,8 @@ util::optional<int> GetNodeIndex(const std::vector<ExecNode*>& nodes,
 
 }  // namespace
 
-Result<std::shared_ptr<ExecPlan>> ExecPlan::Make() {
-  return std::make_shared<ExecPlanImpl>();
+Result<std::shared_ptr<ExecPlan>> ExecPlan::Make(ExecContext* ctx) {
+  return std::shared_ptr<ExecPlan>(new ExecPlanImpl{ctx});
 }
 
 ExecNode* ExecPlan::AddNode(std::unique_ptr<ExecNode> node) {
@@ -175,6 +193,8 @@ Status ExecPlan::StartProducing() { return ToDerived(this)->StartProducing(); }
 
 void ExecPlan::StopProducing() { ToDerived(this)->StopProducing(); }
 
+Future<> ExecPlan::finished() { return ToDerived(this)->finished_; }
+
 ExecNode::ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
                    std::vector<std::string> input_labels,
                    std::shared_ptr<Schema> output_schema, int num_outputs)
@@ -220,58 +240,61 @@ struct SourceNode : ExecNode {
 
   const char* kind_name() override { return "SourceNode"; }
 
-  static void NoInputs() { DCHECK(false) << "no inputs; this should never be called"; }
-  void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); }
-  void ErrorReceived(ExecNode*, Status) override { NoInputs(); }
-  void InputFinished(ExecNode*, int) override { NoInputs(); }
+  [[noreturn]] static void NoInputs() {
+    DCHECK(false) << "no inputs; this should never be called";
+    std::abort();
+  }
+  [[noreturn]] void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); }
+  [[noreturn]] void ErrorReceived(ExecNode*, Status) override { NoInputs(); }
+  [[noreturn]] void InputFinished(ExecNode*, int) override { NoInputs(); }
 
   Status StartProducing() override {
-    if (finished_) {
-      return Status::Invalid("Restarted SourceNode '", label(), "'");
+    DCHECK(!stop_requested_) << "Restarted SourceNode";
+
+    CallbackOptions options;
+    if (auto executor = plan()->exec_context()->executor()) {
+      // These options will transfer execution to the desired Executor if necessary.
+      // This can happen for in-memory scans where batches didn't require
+      // any CPU work to decode. Otherwise, parsing etc should have already
+      // been placed us on the desired Executor and no queues will be pushed to.
+      options.executor = executor;
+      options.should_schedule = ShouldSchedule::IfDifferentExecutor;
     }
 
-    finished_fut_ =
-        Loop([this] {
-          std::unique_lock<std::mutex> lock(mutex_);
-          int seq = next_batch_index_++;
-          if (finished_) {
-            return Future<ControlFlow<int>>::MakeFinished(Break(seq));
-          }
-          lock.unlock();
-
-          return generator_().Then(
-              [=](const util::optional<ExecBatch>& batch) -> ControlFlow<int> {
-                std::unique_lock<std::mutex> lock(mutex_);
-                if (!batch || finished_) {
-                  finished_ = true;
-                  return Break(seq);
-                }
-                lock.unlock();
-
-                // TODO check if we are on the desired Executor and transfer if not.
-                // This can happen for in-memory scans where batches didn't require
-                // any CPU work to decode. Otherwise, parsing etc should have already
-                // been placed us on the thread pool
-                outputs_[0]->InputReceived(this, seq, *batch);
-                return Continue();
-              },
-              [=](const Status& error) -> ControlFlow<int> {
-                std::unique_lock<std::mutex> lock(mutex_);
-                if (!finished_) {
-                  finished_ = true;
+    finished_ = Loop([this, options] {
+                  std::unique_lock<std::mutex> lock(mutex_);
+                  int seq = batch_count_++;
+                  if (stop_requested_) {
+                    return Future<ControlFlow<int>>::MakeFinished(Break(seq));
+                  }
                   lock.unlock();
-                  // unless we were already finished, push the error to our output
-                  // XXX is this correct? Is it reasonable for a consumer to
-                  // ignore errors from a finished producer?
-                  outputs_[0]->ErrorReceived(this, error);
-                }
-                return Break(seq);
-              });
-        }).Then([&](int seq) {
-          /// XXX this is probably redundant: do we always call InputFinished after
-          /// ErrorReceived or will ErrorRecieved be sufficient?
-          outputs_[0]->InputFinished(this, seq);
-        });
+
+                  return generator_().Then(
+                      [=](const util::optional<ExecBatch>& batch) -> ControlFlow<int> {
+                        std::unique_lock<std::mutex> lock(mutex_);
+                        if (IsIterationEnd(batch) || stop_requested_) {
+                          stop_requested_ = true;
+                          return Break(seq);
+                        }
+                        lock.unlock();
+
+                        outputs_[0]->InputReceived(this, seq, *batch);
+                        return Continue();
+                      },
+                      [=](const Status& error) -> ControlFlow<int> {
+                        // NB: ErrorReceived is independent of InputFinished, but
+                        // ErrorReceived will usually prompt StopProducing which will
+                        // prompt InputFinished. ErrorReceived may still be called from a
+                        // node which was requested to stop (indeed, the request to stop
+                        // may prompt an error).
+                        std::unique_lock<std::mutex> lock(mutex_);
+                        stop_requested_ = true;
+                        lock.unlock();
+                        outputs_[0]->ErrorReceived(this, error);
+                        return Break(seq);
+                      },
+                      options);
+                }).Then([&](int seq) { outputs_[0]->InputFinished(this, seq); });
 
     return Status::OK();
   }
@@ -282,20 +305,21 @@ struct SourceNode : ExecNode {
 
   void StopProducing(ExecNode* output) override {
     DCHECK_EQ(output, outputs_[0]);
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
-      finished_ = true;
-    }
-    finished_fut_.Wait();
+    StopProducing();
   }
 
-  void StopProducing() override { StopProducing(outputs_[0]); }
+  void StopProducing() override {
+    std::unique_lock<std::mutex> lock(mutex_);
+    stop_requested_ = true;
+  }
+
+  Future<> finished() override { return finished_; }
 
  private:
   std::mutex mutex_;
-  bool finished_{false};
-  int next_batch_index_{0};
-  Future<> finished_fut_ = Future<>::MakeFinished();
+  bool stop_requested_{false};
+  int batch_count_{0};
+  Future<> finished_ = Future<>::MakeFinished();
   AsyncGenerator<util::optional<ExecBatch>> generator_;
 };
 
@@ -319,8 +343,8 @@ struct FilterNode : ExecNode {
     ARROW_ASSIGN_OR_RAISE(Expression simplified_filter,
                           SimplifyWithGuarantee(filter_, target.guarantee));
 
-    // XXX get a non-default exec context
-    ARROW_ASSIGN_OR_RAISE(Datum mask, ExecuteScalarExpression(simplified_filter, target));
+    ARROW_ASSIGN_OR_RAISE(Datum mask, ExecuteScalarExpression(simplified_filter, target,
+                                                              plan()->exec_context()));
 
     if (mask.is_scalar()) {
       const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
@@ -331,6 +355,10 @@ struct FilterNode : ExecNode {
       return target.Slice(0, 0);
     }
 
+    // if the values are all scalar then the mask must also be
+    DCHECK(!std::all_of(target.values.begin(), target.values.end(),
+                        [](const Datum& value) { return value.is_scalar(); }));
+
     auto values = target.values;
     for (auto& value : values) {
       if (value.is_scalar()) continue;
@@ -345,7 +373,6 @@ struct FilterNode : ExecNode {
     auto maybe_filtered = DoFilter(std::move(batch));
     if (!maybe_filtered.ok()) {
       outputs_[0]->ErrorReceived(this, maybe_filtered.status());
-      inputs_[0]->StopProducing(this);
       return;
     }
 
@@ -356,7 +383,6 @@ struct FilterNode : ExecNode {
   void ErrorReceived(ExecNode* input, Status error) override {
     DCHECK_EQ(input, inputs_[0]);
     outputs_[0]->ErrorReceived(this, std::move(error));
-    inputs_[0]->StopProducing(this);
   }
 
   void InputFinished(ExecNode* input, int seq) override {
@@ -372,10 +398,12 @@ struct FilterNode : ExecNode {
 
   void StopProducing(ExecNode* output) override {
     DCHECK_EQ(output, outputs_[0]);
-    inputs_[0]->StopProducing(this);
+    StopProducing();
   }
 
-  void StopProducing() override { StopProducing(outputs_[0]); }
+  void StopProducing() override { inputs_[0]->StopProducing(this); }
+
+  Future<> finished() override { return inputs_[0]->finished(); }
 
  private:
   Expression filter_;
@@ -407,15 +435,15 @@ struct ProjectNode : ExecNode {
   const char* kind_name() override { return "ProjectNode"; }
 
   Result<ExecBatch> DoProject(const ExecBatch& target) {
-    // XXX get a non-default exec context
     std::vector<Datum> values{exprs_.size()};
     for (size_t i = 0; i < exprs_.size(); ++i) {
       ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
                             SimplifyWithGuarantee(exprs_[i], target.guarantee));
 
-      ARROW_ASSIGN_OR_RAISE(values[i], ExecuteScalarExpression(simplified_expr, target));
+      ARROW_ASSIGN_OR_RAISE(values[i], ExecuteScalarExpression(simplified_expr, target,
+                                                               plan()->exec_context()));
     }
-    return ExecBatch::Make(std::move(values));
+    return ExecBatch{std::move(values), target.length};
   }
 
   void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
@@ -424,7 +452,6 @@ struct ProjectNode : ExecNode {
     auto maybe_projected = DoProject(std::move(batch));
     if (!maybe_projected.ok()) {
       outputs_[0]->ErrorReceived(this, maybe_projected.status());
-      inputs_[0]->StopProducing(this);
       return;
     }
 
@@ -435,7 +462,6 @@ struct ProjectNode : ExecNode {
   void ErrorReceived(ExecNode* input, Status error) override {
     DCHECK_EQ(input, inputs_[0]);
     outputs_[0]->ErrorReceived(this, std::move(error));
-    inputs_[0]->StopProducing(this);
   }
 
   void InputFinished(ExecNode* input, int seq) override {
@@ -451,10 +477,12 @@ struct ProjectNode : ExecNode {
 
   void StopProducing(ExecNode* output) override {
     DCHECK_EQ(output, outputs_[0]);
-    inputs_[0]->StopProducing(this);
+    StopProducing();
   }
 
-  void StopProducing() override { StopProducing(outputs_[0]); }
+  void StopProducing() override { inputs_[0]->StopProducing(this); }
+
+  Future<> finished() override { return inputs_[0]->finished(); }
 
  private:
   std::vector<Expression> exprs_;
@@ -494,28 +522,38 @@ struct SinkNode : ExecNode {
 
   const char* kind_name() override { return "SinkNode"; }
 
-  Status StartProducing() override { return Status::OK(); }
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+    return Status::OK();
+  }
 
   // sink nodes have no outputs from which to feel backpressure
-  static void NoOutputs() { DCHECK(false) << "no outputs; this should never be called"; }
-  void ResumeProducing(ExecNode* output) override { NoOutputs(); }
-  void PauseProducing(ExecNode* output) override { NoOutputs(); }
-  void StopProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] static void NoOutputs() {
+    DCHECK(false) << "no outputs; this should never be called";
+    std::abort();
+  }
+  [[noreturn]] void ResumeProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] void PauseProducing(ExecNode* output) override { NoOutputs(); }
+  [[noreturn]] void StopProducing(ExecNode* output) override { NoOutputs(); }
 
   void StopProducing() override {
-    std::unique_lock<std::mutex> lock(mutex_);
-    InputFinishedUnlocked();
+    Finish();
+    inputs_[0]->StopProducing(this);
   }
 
+  Future<> finished() override { return finished_; }
+
   void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {
     DCHECK_EQ(input, inputs_[0]);
 
     std::unique_lock<std::mutex> lock(mutex_);
-    if (stopped_) return;
+    if (finished_.is_finished()) return;
 
     ++num_received_;
     if (num_received_ == emit_stop_) {
-      InputFinishedUnlocked();
+      lock.unlock();
+      Finish();
+      lock.lock();
     }
 
     if (emit_stop_ != -1) {
@@ -529,23 +567,21 @@ struct SinkNode : ExecNode {
   void ErrorReceived(ExecNode* input, Status error) override {
     DCHECK_EQ(input, inputs_[0]);
     producer_.Push(std::move(error));
-    std::unique_lock<std::mutex> lock(mutex_);
-    InputFinishedUnlocked();
+    Finish();
+    inputs_[0]->StopProducing(this);
   }
 
   void InputFinished(ExecNode* input, int seq_stop) override {
     std::unique_lock<std::mutex> lock(mutex_);
     emit_stop_ = seq_stop;
-    if (emit_stop_ == num_received_) {
-      InputFinishedUnlocked();
-    }
+    lock.unlock();
+    Finish();
   }
 
  private:
-  void InputFinishedUnlocked() {
-    if (!stopped_) {
-      stopped_ = true;
-      producer_.Close();
+  void Finish() {
+    if (producer_.Close()) {
+      finished_.MarkFinished();
     }
   }
 
@@ -553,7 +589,7 @@ struct SinkNode : ExecNode {
 
   int num_received_ = 0;
   int emit_stop_ = -1;
-  bool stopped_ = false;
+  Future<> finished_ = Future<>::MakeFinished();
 
   PushGenerator<util::optional<ExecBatch>>::Producer producer_;
 };
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
index 21a757af5a1..6c29ddfa7a6 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -22,36 +22,32 @@
 #include <string>
 #include <vector>
 
+#include "arrow/compute/exec.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/optional.h"
 #include "arrow/util/visibility.h"
 
-// NOTES:
-// - ExecBatches only have arrays or scalars
-// - data streams may be ordered, so add input number?
-// - node to combine input needs to reorder
-
 namespace arrow {
 namespace compute {
 
-class ExecNode;
-
 class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
  public:
   using NodeVector = std::vector<ExecNode*>;
 
   virtual ~ExecPlan() = default;
 
+  ExecContext* exec_context() const { return exec_context_; }
+
   /// Make an empty exec plan
-  static Result<std::shared_ptr<ExecPlan>> Make();
+  static Result<std::shared_ptr<ExecPlan>> Make(ExecContext* = default_exec_context());
 
   ExecNode* AddNode(std::unique_ptr<ExecNode> node);
 
   template <typename Node, typename... Args>
   Node* EmplaceNode(Args&&... args) {
-    auto node = std::unique_ptr<Node>(new Node{std::forward<Args>(args)...});
+    std::unique_ptr<Node> node{new Node{std::forward<Args>(args)...}};
     auto out = node.get();
     AddNode(std::move(node));
     return out;
@@ -65,16 +61,24 @@ class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
 
   Status Validate();
 
-  /// Start producing on all nodes
+  /// \brief Start producing on all nodes
   ///
   /// Nodes are started in reverse topological order, such that any node
   /// is started before all of its inputs.
   Status StartProducing();
 
+  /// \brief Stop producing on all nodes
+  ///
+  /// Nodes are stopped in topological order, such that any node
+  /// is stopped before all of its outputs.
   void StopProducing();
 
+  /// \brief A future which will be marked finished when all nodes have stopped producing.
+  Future<> finished();
+
  protected:
-  ExecPlan() = default;
+  ExecContext* exec_context_;
+  explicit ExecPlan(ExecContext* exec_context) : exec_context_(exec_context) {}
 };
 
 class ARROW_EXPORT ExecNode {
@@ -203,14 +207,15 @@ class ARROW_EXPORT ExecNode {
   /// \brief Stop producing definitively to a single output
   ///
   /// This call is a hint that an output node has completed and is not willing
-  /// to not receive any further data.
+  /// to receive any further data.
   virtual void StopProducing(ExecNode* output) = 0;
 
-  /// \brief Stop producing definitively
-  ///
-  /// XXX maybe this should return a Future<>?
+  /// \brief Stop producing definitively to all outputs
   virtual void StopProducing() = 0;
 
+  /// \brief A future which will be marked finished when this node has stopped producing.
+  virtual Future<> finished() = 0;
+
  protected:
   ExecNode(ExecPlan* plan, std::string label, NodeVector inputs,
            std::vector<std::string> input_labels, std::shared_ptr<Schema> output_schema,
@@ -229,10 +234,10 @@ class ARROW_EXPORT ExecNode {
 
 /// \brief Adapt an AsyncGenerator<ExecBatch> as a source node
 ///
-/// TODO this should accept an Executor and explicitly handle batches
-/// as they are generated on each of the Executor's threads.
+/// plan->exec_context()->executor() is used to parallelize pushing to
+/// outputs, if provided.
 ARROW_EXPORT
-ExecNode* MakeSourceNode(ExecPlan*, std::string label,
+ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
                          std::shared_ptr<Schema> output_schema,
                          std::function<Future<util::optional<ExecBatch>>()>);
 
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 75b71f97535..9ebafc42668 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -36,6 +36,7 @@
 
 using testing::ElementsAre;
 using testing::HasSubstr;
+using testing::Optional;
 using testing::UnorderedElementsAreArray;
 
 namespace arrow {
@@ -45,7 +46,7 @@ namespace compute {
 TEST(ExecPlanConstruction, Empty) {
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
 
-  ASSERT_RAISES(Invalid, plan->Validate());
+  ASSERT_THAT(plan->Validate(), Raises(StatusCode::Invalid));
 }
 
 TEST(ExecPlanConstruction, SingleNode) {
@@ -58,7 +59,7 @@ TEST(ExecPlanConstruction, SingleNode) {
   ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
   node = MakeDummyNode(plan.get(), "dummy", /*inputs=*/{}, /*num_outputs=*/1);
   // Output not bound
-  ASSERT_RAISES(Invalid, plan->Validate());
+  ASSERT_THAT(plan->Validate(), Raises(StatusCode::Invalid));
 }
 
 TEST(ExecPlanConstruction, SourceSink) {
@@ -144,7 +145,15 @@ TEST(ExecPlan, DummyStartProducing) {
   // Note that any correct reverse topological order may do
   ASSERT_THAT(t.started, ElementsAre("sink", "process3", "process2", "process1",
                                      "source2", "source1"));
-  ASSERT_EQ(t.stopped.size(), 0);
+
+  plan->StopProducing();
+  ASSERT_THAT(plan->finished(), Finishes(Ok()));
+  // Note that any correct topological order may do
+  ASSERT_THAT(t.stopped, ElementsAre("source1", "source2", "process1", "process2",
+                                     "process3", "sink"));
+
+  ASSERT_THAT(plan->StartProducing(),
+              Raises(StatusCode::Invalid, HasSubstr("restarted")));
 }
 
 TEST(ExecPlan, DummyStartProducingError) {
@@ -179,7 +188,7 @@ TEST(ExecPlan, DummyStartProducingError) {
   ASSERT_EQ(t.stopped.size(), 0);
 
   // `process1` raises IOError
-  ASSERT_RAISES(IOError, plan->StartProducing());
+  ASSERT_THAT(plan->StartProducing(), Raises(StatusCode::IOError));
   ASSERT_THAT(t.started, ElementsAre("sink", "process3", "process2", "process1"));
   // Nodes that started successfully were stopped in reverse order
   ASSERT_THAT(t.stopped, ElementsAre("process2", "process3", "sink"));
@@ -226,18 +235,20 @@ Result<ExecNode*> MakeTestSourceNode(ExecPlan* plan, std::string label,
                         std::move(gen));
 }
 
-Result<std::vector<ExecBatch>> StartAndCollect(
+Future<std::vector<ExecBatch>> StartAndCollect(
     ExecPlan* plan, AsyncGenerator<util::optional<ExecBatch>> gen) {
   RETURN_NOT_OK(plan->Validate());
   RETURN_NOT_OK(plan->StartProducing());
 
-  auto maybe_collected = CollectAsyncGenerator(gen).result();
-  ARROW_ASSIGN_OR_RAISE(auto collected, maybe_collected);
+  auto collected_fut = CollectAsyncGenerator(gen);
 
-  plan->StopProducing();
-
-  return internal::MapVector(
-      [](util::optional<ExecBatch> batch) { return std::move(*batch); }, collected);
+  return AllComplete({plan->finished(), Future<>(collected_fut)})
+      .Then([collected_fut]() -> Result<std::vector<ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
+        return internal::MapVector(
+            [](util::optional<ExecBatch> batch) { return std::move(*batch); },
+            std::move(collected));
+      });
 }
 
 BatchesWithSchema MakeBasicBatches() {
@@ -282,7 +293,7 @@ TEST(ExecPlanExecution, SourceSink) {
       auto sink_gen = MakeSinkNode(source, "sink");
 
       ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-                  ResultWith(UnorderedElementsAreArray(basic_data.batches)));
+                  Finishes(ResultWith(UnorderedElementsAreArray(basic_data.batches))));
     }
   }
 }
@@ -304,7 +315,7 @@ TEST(ExecPlanExecution, SourceSinkError) {
   auto sink_gen = MakeSinkNode(source, "sink");
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              Raises(StatusCode::Invalid, HasSubstr("Artificial")));
+              Finishes(Raises(StatusCode::Invalid, HasSubstr("Artificial"))));
 }
 
 TEST(ExecPlanExecution, StressSourceSink) {
@@ -327,7 +338,37 @@ TEST(ExecPlanExecution, StressSourceSink) {
       auto sink_gen = MakeSinkNode(source, "sink");
 
       ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-                  ResultWith(UnorderedElementsAreArray(random_data.batches)));
+                  Finishes(ResultWith(UnorderedElementsAreArray(random_data.batches))));
+    }
+  }
+}
+
+TEST(ExecPlanExecution, StressSourceSinkStopped) {
+  for (bool slow : {false, true}) {
+    SCOPED_TRACE(slow ? "slowed" : "unslowed");
+
+    for (bool parallel : {false, true}) {
+      SCOPED_TRACE(parallel ? "parallel" : "single threaded");
+
+      int num_batches = slow && !parallel ? 30 : 300;
+
+      ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+      auto random_data = MakeRandomBatches(
+          schema({field("a", int32()), field("b", boolean())}), num_batches);
+
+      ASSERT_OK_AND_ASSIGN(auto source, MakeTestSourceNode(plan.get(), "source",
+                                                           random_data, parallel, slow));
+
+      auto sink_gen = MakeSinkNode(source, "sink");
+
+      ASSERT_OK(plan->Validate());
+      ASSERT_OK(plan->StartProducing());
+
+      EXPECT_THAT(sink_gen(), Finishes(ResultWith(Optional(random_data.batches[0]))));
+
+      plan->StopProducing();
+      ASSERT_THAT(plan->finished(), Finishes(Ok()));
     }
   }
 }
@@ -349,9 +390,9 @@ TEST(ExecPlanExecution, SourceFilterSink) {
   auto sink_gen = MakeSinkNode(filter, "sink");
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              ResultWith(UnorderedElementsAreArray(
+              Finishes(ResultWith(UnorderedElementsAreArray(
                   {ExecBatchFromJSON({int32(), boolean()}, "[]"),
-                   ExecBatchFromJSON({int32(), boolean()}, "[[6, false]]")})));
+                   ExecBatchFromJSON({int32(), boolean()}, "[[6, false]]")}))));
 }
 
 TEST(ExecPlanExecution, SourceProjectSink) {
@@ -376,10 +417,10 @@ TEST(ExecPlanExecution, SourceProjectSink) {
   auto sink_gen = MakeSinkNode(projection, "sink");
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              ResultWith(UnorderedElementsAreArray(
+              Finishes(ResultWith(UnorderedElementsAreArray(
                   {ExecBatchFromJSON({boolean(), int32()}, "[[false, null], [true, 5]]"),
                    ExecBatchFromJSON({boolean(), int32()},
-                                     "[[null, 6], [true, 7], [true, 8]]")})));
+                                     "[[null, 6], [true, 7], [true, 8]]")}))));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index 6fbfa2a430c..bd203b354f0 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -23,6 +23,7 @@
 #include <memory>
 #include <mutex>
 #include <string>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -88,14 +89,12 @@ struct DummyNode : ExecNode {
   }
 
   void StopProducing(ExecNode* output) override {
-    ASSERT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
+    EXPECT_GE(num_outputs(), 0) << "Sink nodes should not experience backpressure";
     AssertIsOutput(output);
-    StopProducing();
   }
 
   void StopProducing() override {
     if (started_) {
-      started_ = false;
       for (const auto& input : inputs_) {
         input->StopProducing(this);
       }
@@ -105,9 +104,12 @@ struct DummyNode : ExecNode {
     }
   }
 
+  Future<> finished() override { return Future<>::MakeFinished(); }
+
  private:
   void AssertIsOutput(ExecNode* output) {
-    ASSERT_NE(std::find(outputs_.begin(), outputs_.end(), output), outputs_.end());
+    auto it = std::find(outputs_.begin(), outputs_.end(), output);
+    ASSERT_NE(it, outputs_.end());
   }
 
   std::shared_ptr<Schema> dummy_schema() const {
@@ -116,6 +118,7 @@ struct DummyNode : ExecNode {
 
   StartProducingFunc start_producing_;
   StopProducingFunc stop_producing_;
+  std::unordered_set<ExecNode*> requested_stop_;
   bool started_ = false;
 };
 
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index 8ce7e52d252..2c145dadaeb 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -69,7 +69,7 @@ TEST(ExecContext, BasicWorkings) {
   // Now, let's customize all the things
   LoggingMemoryPool my_pool(default_memory_pool());
   std::unique_ptr<FunctionRegistry> custom_reg = FunctionRegistry::Make();
-  ExecContext ctx(&my_pool, custom_reg.get());
+  ExecContext ctx(&my_pool, /*executor=*/nullptr, custom_reg.get());
 
   ASSERT_EQ(custom_reg.get(), ctx.func_registry());
   ASSERT_EQ(&my_pool, ctx.memory_pool());
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index ffa64e8ec10..eab80010c76 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -491,9 +491,6 @@ TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragments) {
   auto all_row_groups = internal::Iota(static_cast<int>(kNumRowGroups));
   CountRowGroupsInFragment(fragment, all_row_groups, literal(true));
 
-  // FIXME this is only meaningful if "not here" is a virtual column
-  // CountRowGroupsInFragment(fragment, all_row_groups, "not here"_ == 0);
-
   for (int i = 0; i < kNumRowGroups; ++i) {
     CountRowGroupsInFragment(fragment, {i}, equal(field_ref("i64"), literal(i + 1)));
   }
@@ -516,9 +513,10 @@ TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragments) {
       fragment, {1, 3},
       or_(equal(field_ref("i64"), literal(2)), equal(field_ref("i64"), literal(4))));
 
-  // TODO(bkietz): better Assume support for InExpression
-  // auto set = ArrayFromJSON(int64(), "[2, 4]");
-  // CountRowGroupsInFragment(fragment, {1, 3}, field_ref("i64").In(set));
+  auto set = ArrayFromJSON(int64(), "[2, 4]");
+  CountRowGroupsInFragment(
+      fragment, {1, 3},
+      call("is_in", {field_ref("i64")}, compute::SetLookupOptions{set}));
 
   CountRowGroupsInFragment(fragment, {0, 1, 2, 3, 4}, less(field_ref("i64"), literal(6)));
 
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 58e96fdc113..cc2e5bcda66 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -24,6 +24,7 @@
 #include <sstream>
 
 #include "arrow/array/array_primitive.h"
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
@@ -432,72 +433,9 @@ class ARROW_DS_EXPORT AsyncScanner : public Scanner,
 
 namespace {
 
-inline Result<EnumeratedRecordBatch> DoFilterAndProjectRecordBatchAsync(
-    const std::shared_ptr<ScanOptions>& options, const EnumeratedRecordBatch& in) {
-  ARROW_ASSIGN_OR_RAISE(
-      compute::Expression simplified_filter,
-      SimplifyWithGuarantee(options->filter, in.fragment.value->partition_expression()));
-
-  const auto& schema = *options->dataset_schema;
-
-  compute::ExecContext exec_context{options->pool};
-  ARROW_ASSIGN_OR_RAISE(Datum mask,
-                        ExecuteScalarExpression(simplified_filter, schema,
-                                                in.record_batch.value, &exec_context));
-
-  Datum filtered;
-  if (mask.is_scalar()) {
-    const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
-    if (mask_scalar.is_valid && mask_scalar.value) {
-      // filter matches entire table
-      filtered = in.record_batch.value;
-    } else {
-      // Filter matches nothing
-      filtered = in.record_batch.value->Slice(0, 0);
-    }
-  } else {
-    ARROW_ASSIGN_OR_RAISE(
-        filtered, compute::Filter(in.record_batch.value, mask,
-                                  compute::FilterOptions::Defaults(), &exec_context));
-  }
-
-  ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection,
-                        SimplifyWithGuarantee(options->projection,
-                                              in.fragment.value->partition_expression()));
-
-  ARROW_ASSIGN_OR_RAISE(
-      Datum projected,
-      ExecuteScalarExpression(simplified_projection, schema, filtered, &exec_context));
-
-  DCHECK_EQ(projected.type()->id(), Type::STRUCT);
-  if (projected.shape() == ValueDescr::SCALAR) {
-    // Only virtual columns are projected. Broadcast to an array
-    ARROW_ASSIGN_OR_RAISE(
-        projected,
-        MakeArrayFromScalar(*projected.scalar(), filtered.record_batch()->num_rows(),
-                            options->pool));
-  }
-  ARROW_ASSIGN_OR_RAISE(auto out,
-                        RecordBatch::FromStructArray(projected.array_as<StructArray>()));
-  auto projected_batch =
-      out->ReplaceSchemaMetadata(in.record_batch.value->schema()->metadata());
-
-  return EnumeratedRecordBatch{
-      {std::move(projected_batch), in.record_batch.index, in.record_batch.last},
-      in.fragment};
-}
-
-inline EnumeratedRecordBatchGenerator FilterAndProjectRecordBatchAsync(
-    const std::shared_ptr<ScanOptions>& options, EnumeratedRecordBatchGenerator rbs) {
-  auto mapper = [options](const EnumeratedRecordBatch& in) {
-    return DoFilterAndProjectRecordBatchAsync(options, in);
-  };
-  return MakeMappedGenerator(std::move(rbs), mapper);
-}
-
 Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
     const Enumerated<std::shared_ptr<Fragment>>& fragment,
-    const std::shared_ptr<ScanOptions>& options, bool filter_and_project = true) {
+    const std::shared_ptr<ScanOptions>& options) {
   ARROW_ASSIGN_OR_RAISE(auto batch_gen, fragment.value->ScanBatchesAsync(options));
   auto enumerated_batch_gen = MakeEnumeratedGenerator(std::move(batch_gen));
 
@@ -506,73 +444,116 @@ Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
         return EnumeratedRecordBatch{record_batch, fragment};
       };
 
-  auto combined_gen = MakeMappedGenerator(enumerated_batch_gen, std::move(combine_fn));
-
-  if (filter_and_project) {
-    return FilterAndProjectRecordBatchAsync(options, std::move(combined_gen));
-  }
-  return combined_gen;
+  return MakeMappedGenerator(enumerated_batch_gen, std::move(combine_fn));
 }
 
 Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
-    FragmentGenerator fragment_gen, const std::shared_ptr<ScanOptions>& options,
-    bool filter_and_project = true) {
+    FragmentGenerator fragment_gen, const std::shared_ptr<ScanOptions>& options) {
   auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
   return MakeMappedGenerator(std::move(enumerated_fragment_gen),
                              [=](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
-                               return FragmentToBatches(fragment, options,
-                                                        filter_and_project);
+                               return FragmentToBatches(fragment, options);
                              });
 }
 
-Result<AsyncGenerator<AsyncGenerator<util::optional<int64_t>>>> FragmentsToRowCount(
-    FragmentGenerator fragment_gen,
-    std::shared_ptr<ScanOptions> options_with_projection) {
-  // Must use optional<int64_t> to avoid breaking the pipeline on empty batches
-  auto enumerated_fragment_gen = MakeEnumeratedGenerator(std::move(fragment_gen));
+Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
+                                        FragmentGenerator fragment_gen,
+                                        std::shared_ptr<ScanOptions> options) {
+  if (!options->use_async) {
+    return Status::NotImplemented("ScanNodes without asynchrony");
+  }
 
-  // Drop projection since we only need to count rows
-  auto options = std::make_shared<ScanOptions>(*options_with_projection);
-  RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+  ARROW_ASSIGN_OR_RAISE(auto batch_gen_gen,
+                        FragmentsToBatches(std::move(fragment_gen), options));
 
-  auto count_fragment_fn =
-      [options](const Enumerated<std::shared_ptr<Fragment>>& fragment)
-      -> Result<AsyncGenerator<util::optional<int64_t>>> {
-    auto count_fut = fragment.value->CountRows(options->filter, options);
-    return MakeFromFuture(
-        count_fut.Then([=](util::optional<int64_t> val)
-                           -> Result<AsyncGenerator<util::optional<int64_t>>> {
-          // Fast path
-          if (val.has_value()) {
-            return MakeSingleFutureGenerator(
-                Future<util::optional<int64_t>>::MakeFinished(val));
-          }
-          // Slow path
-          ARROW_ASSIGN_OR_RAISE(auto batch_gen, FragmentToBatches(fragment, options));
-          auto count_fn =
-              [](const EnumeratedRecordBatch& enumerated) -> util::optional<int64_t> {
-            return enumerated.record_batch.value->num_rows();
-          };
-          return MakeMappedGenerator(batch_gen, std::move(count_fn));
-        }));
-  };
-  return MakeMappedGenerator(std::move(enumerated_fragment_gen),
-                             std::move(count_fragment_fn));
-}
+  auto merged_batch_gen =
+      MakeMergedGenerator(std::move(batch_gen_gen), options->fragment_readahead);
 
-Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsyncImpl(
-    const std::shared_ptr<ScanOptions>& options, FragmentGenerator fragment_gen,
-    internal::Executor* cpu_executor, bool filter_and_project = true) {
-  ARROW_ASSIGN_OR_RAISE(
-      auto batch_gen_gen,
-      FragmentsToBatches(std::move(fragment_gen), options, filter_and_project));
-  auto batch_gen_gen_readahead =
-      MakeSerialReadaheadGenerator(std::move(batch_gen_gen), options->fragment_readahead);
-  auto merged_batch_gen = MakeMergedGenerator(std::move(batch_gen_gen_readahead),
-                                              options->fragment_readahead);
-  return MakeReadaheadGenerator(std::move(merged_batch_gen), options->fragment_readahead);
+  auto batch_gen =
+      MakeReadaheadGenerator(std::move(merged_batch_gen), options->fragment_readahead);
+
+  auto gen = MakeMappedGenerator(
+      std::move(batch_gen),
+      [options](const EnumeratedRecordBatch& partial)
+          -> Result<util::optional<compute::ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(
+            util::optional<compute::ExecBatch> batch,
+            compute::MakeExecBatch(*options->dataset_schema, partial.record_batch.value));
+        // TODO(ARROW-13263) fragments may be able to attach more guarantees to batches
+        // than this, for example parquet's row group stats. Failing to do this leaves
+        // perf on the table because row group stats could be used to skip kernel execs in
+        // FilterNode.
+        //
+        // Additionally, if a fragment failed to perform projection pushdown there may be
+        // unnecessarily materialized columns in batch. We could drop them now instead of
+        // letting them coast through the rest of the plan.
+        batch->guarantee = partial.fragment.value->partition_expression();
+
+        // tag rows with fragment- and batch-of-origin
+        batch->values.emplace_back(partial.fragment.index);
+        batch->values.emplace_back(partial.record_batch.index);
+        batch->values.emplace_back(partial.record_batch.last);
+        return batch;
+      });
+
+  auto augmented_fields = options->dataset_schema->fields();
+  augmented_fields.push_back(field("__fragment_index", int32()));
+  augmented_fields.push_back(field("__batch_index", int32()));
+  augmented_fields.push_back(field("__last_in_fragment", boolean()));
+  return compute::MakeSourceNode(plan, "dataset_scan",
+                                 schema(std::move(augmented_fields)), std::move(gen));
 }
 
+class OneShotScanTask : public ScanTask {
+ public:
+  OneShotScanTask(RecordBatchIterator batch_it, std::shared_ptr<ScanOptions> options,
+                  std::shared_ptr<Fragment> fragment)
+      : ScanTask(std::move(options), std::move(fragment)),
+        batch_it_(std::move(batch_it)) {}
+  Result<RecordBatchIterator> Execute() override {
+    if (!batch_it_) return Status::Invalid("OneShotScanTask was already scanned");
+    return std::move(batch_it_);
+  }
+
+ private:
+  RecordBatchIterator batch_it_;
+};
+
+class OneShotFragment : public Fragment {
+ public:
+  OneShotFragment(std::shared_ptr<Schema> schema, RecordBatchIterator batch_it)
+      : Fragment(compute::literal(true), std::move(schema)),
+        batch_it_(std::move(batch_it)) {
+    DCHECK_NE(physical_schema_, nullptr);
+  }
+  Status CheckConsumed() {
+    if (!batch_it_) return Status::Invalid("OneShotFragment was already scanned");
+    return Status::OK();
+  }
+  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
+    RETURN_NOT_OK(CheckConsumed());
+    ScanTaskVector tasks{std::make_shared<OneShotScanTask>(
+        std::move(batch_it_), std::move(options), shared_from_this())};
+    return MakeVectorIterator(std::move(tasks));
+  }
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override {
+    RETURN_NOT_OK(CheckConsumed());
+    ARROW_ASSIGN_OR_RAISE(
+        auto background_gen,
+        MakeBackgroundGenerator(std::move(batch_it_), options->io_context.executor()));
+    return MakeTransferredGenerator(std::move(background_gen),
+                                    internal::GetCpuThreadPool());
+  }
+  std::string type_name() const override { return "one-shot"; }
+
+ protected:
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
+    return physical_schema_;
+  }
+
+  RecordBatchIterator batch_it_;
+};
 }  // namespace
 
 Result<FragmentGenerator> AsyncScanner::GetFragments() const {
@@ -604,11 +585,88 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync()
   return ScanBatchesUnorderedAsync(internal::GetCpuThreadPool());
 }
 
+namespace {
+Result<EnumeratedRecordBatch> ToEnumeratedRecordBatch(
+    const util::optional<compute::ExecBatch>& batch, const ScanOptions& options,
+    const FragmentVector& fragments) {
+  int num_fields = options.projected_schema->num_fields();
+
+  ArrayVector columns(num_fields);
+  for (size_t i = 0; i < columns.size(); ++i) {
+    const Datum& value = batch->values[i];
+    if (value.is_array()) {
+      columns[i] = value.make_array();
+      continue;
+    }
+    ARROW_ASSIGN_OR_RAISE(
+        columns[i], MakeArrayFromScalar(*value.scalar(), batch->length, options.pool));
+  }
+
+  EnumeratedRecordBatch out;
+  out.fragment.index = batch->values[num_fields].scalar_as<Int32Scalar>().value;
+  out.fragment.value = fragments[out.fragment.index];
+  out.fragment.last = false;  // ignored during reordering
+
+  out.record_batch.index = batch->values[num_fields + 1].scalar_as<Int32Scalar>().value;
+  out.record_batch.value =
+      RecordBatch::Make(options.projected_schema, batch->length, std::move(columns));
+  out.record_batch.last = batch->values[num_fields + 2].scalar_as<BooleanScalar>().value;
+
+  return out;
+}
+}  // namespace
+
 Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
     internal::Executor* cpu_executor) {
-  ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
-  return ScanBatchesUnorderedAsyncImpl(scan_options_, std::move(fragment_gen),
-                                       cpu_executor);
+  if (!scan_options_->use_threads) {
+    cpu_executor = nullptr;
+  }
+
+  auto exec_context =
+      std::make_shared<compute::ExecContext>(scan_options_->pool, cpu_executor);
+
+  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make(exec_context.get()));
+
+  ARROW_ASSIGN_OR_RAISE(auto scan, MakeScanNode(plan.get(), dataset_, scan_options_));
+
+  ARROW_ASSIGN_OR_RAISE(auto filter,
+                        compute::MakeFilterNode(scan, "filter", scan_options_->filter));
+
+  auto exprs = scan_options_->projection.call()->arguments;
+  exprs.push_back(compute::field_ref("__fragment_index"));
+  exprs.push_back(compute::field_ref("__batch_index"));
+  exprs.push_back(compute::field_ref("__last_in_fragment"));
+  ARROW_ASSIGN_OR_RAISE(auto project,
+                        compute::MakeProjectNode(filter, "project", std::move(exprs)));
+
+  AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
+      compute::MakeSinkNode(project, "sink");
+
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto options = scan_options_;
+  ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset_->GetFragments(scan_options_->filter));
+  ARROW_ASSIGN_OR_RAISE(auto fragments, fragments_it.ToVector());
+  auto shared_fragments = std::make_shared<FragmentVector>(std::move(fragments));
+
+  // If the generator is destroyed before being completely drained, inform plan
+  std::shared_ptr<void> stop_producing{
+      nullptr, [plan, exec_context](...) {
+        bool not_finished_yet = plan->finished().TryAddCallback(
+            [&plan, &exec_context] { return [plan, exec_context](const Status&) {}; });
+
+        if (not_finished_yet) {
+          plan->StopProducing();
+        }
+      }};
+
+  return MakeMappedGenerator(
+      std::move(sink_gen),
+      [sink_gen, options, stop_producing,
+       shared_fragments](const util::optional<compute::ExecBatch>& batch)
+          -> Future<EnumeratedRecordBatch> {
+        return ToEnumeratedRecordBatch(batch, *options, *shared_fragments);
+      });
 }
 
 Result<TaggedRecordBatchGenerator> AsyncScanner::ScanBatchesAsync() {
@@ -729,20 +787,75 @@ Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
   });
 }
 
+namespace {
+Result<int64_t> GetSelectionSize(const Datum& selection, int64_t length) {
+  if (length == 0) return 0;
+
+  if (selection.is_scalar()) {
+    if (!selection.scalar()->is_valid) return 0;
+    if (!selection.scalar_as<BooleanScalar>().value) return 0;
+    return length;
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto count, compute::Sum(selection));
+  return static_cast<int64_t>(count.scalar_as<UInt64Scalar>().value);
+}
+}  // namespace
+
 Result<int64_t> AsyncScanner::CountRows() {
   ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
-  ARROW_ASSIGN_OR_RAISE(auto count_gen_gen,
-                        FragmentsToRowCount(std::move(fragment_gen), scan_options_));
-  auto count_gen = MakeConcatenatedGenerator(std::move(count_gen_gen));
-  int64_t total = 0;
-  auto sum_fn = [&total](util::optional<int64_t> count) -> Status {
-    if (count.has_value()) total += *count;
-    return Status::OK();
-  };
-  RETURN_NOT_OK(VisitAsyncGenerator<util::optional<int64_t>>(std::move(count_gen),
-                                                             std::move(sum_fn))
-                    .status());
-  return total;
+  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make());
+  // Drop projection since we only need to count rows
+  auto options = std::make_shared<ScanOptions>(*scan_options_);
+  RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+
+  std::atomic<int64_t> total{0};
+
+  fragment_gen = MakeMappedGenerator(
+      std::move(fragment_gen), [&](const std::shared_ptr<Fragment>& fragment) {
+        return fragment->CountRows(scan_options_->filter, scan_options_)
+            .Then([&, fragment](util::optional<int64_t> fast_count) mutable
+                  -> std::shared_ptr<Fragment> {
+              if (fast_count) {
+                // fast path: got row count directly; skip scanning this fragment
+                total += *fast_count;
+                return std::make_shared<OneShotFragment>(
+                    options->dataset_schema,
+                    MakeEmptyIterator<std::shared_ptr<RecordBatch>>());
+              }
+
+              // slow path: actually filter this fragment's batches
+              return std::move(fragment);
+            });
+      });
+
+  ARROW_ASSIGN_OR_RAISE(auto scan,
+                        MakeScanNode(plan.get(), std::move(fragment_gen), options));
+
+  ARROW_ASSIGN_OR_RAISE(
+      auto get_selection,
+      compute::MakeProjectNode(scan, "get_selection", {options->filter}));
+
+  AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
+      compute::MakeSinkNode(get_selection, "sink");
+
+  RETURN_NOT_OK(plan->StartProducing());
+
+  RETURN_NOT_OK(
+      VisitAsyncGenerator(std::move(sink_gen),
+                          [&](const util::optional<compute::ExecBatch>& batch) {
+                            // TODO replace with scalar aggregation node
+                            ARROW_ASSIGN_OR_RAISE(
+                                int64_t slow_count,
+                                GetSelectionSize(batch->values[0], batch->length));
+                            total += slow_count;
+                            return Status::OK();
+                          })
+          .status());
+
+  plan->finished().Wait();
+
+  return total.load();
 }
 
 ScannerBuilder::ScannerBuilder(std::shared_ptr<Dataset> dataset)
@@ -762,59 +875,6 @@ ScannerBuilder::ScannerBuilder(std::shared_ptr<Schema> schema,
                          std::move(schema), FragmentVector{std::move(fragment)}),
                      std::move(scan_options)) {}
 
-namespace {
-class OneShotScanTask : public ScanTask {
- public:
-  OneShotScanTask(RecordBatchIterator batch_it, std::shared_ptr<ScanOptions> options,
-                  std::shared_ptr<Fragment> fragment)
-      : ScanTask(std::move(options), std::move(fragment)),
-        batch_it_(std::move(batch_it)) {}
-  Result<RecordBatchIterator> Execute() override {
-    if (!batch_it_) return Status::Invalid("OneShotScanTask was already scanned");
-    return std::move(batch_it_);
-  }
-
- private:
-  RecordBatchIterator batch_it_;
-};
-
-class OneShotFragment : public Fragment {
- public:
-  OneShotFragment(std::shared_ptr<Schema> schema, RecordBatchIterator batch_it)
-      : Fragment(compute::literal(true), std::move(schema)),
-        batch_it_(std::move(batch_it)) {
-    DCHECK_NE(physical_schema_, nullptr);
-  }
-  Status CheckConsumed() {
-    if (!batch_it_) return Status::Invalid("OneShotFragment was already scanned");
-    return Status::OK();
-  }
-  Result<ScanTaskIterator> Scan(std::shared_ptr<ScanOptions> options) override {
-    RETURN_NOT_OK(CheckConsumed());
-    ScanTaskVector tasks{std::make_shared<OneShotScanTask>(
-        std::move(batch_it_), std::move(options), shared_from_this())};
-    return MakeVectorIterator(std::move(tasks));
-  }
-  Result<RecordBatchGenerator> ScanBatchesAsync(
-      const std::shared_ptr<ScanOptions>& options) override {
-    RETURN_NOT_OK(CheckConsumed());
-    ARROW_ASSIGN_OR_RAISE(
-        auto background_gen,
-        MakeBackgroundGenerator(std::move(batch_it_), options->io_context.executor()));
-    return MakeTransferredGenerator(std::move(background_gen),
-                                    internal::GetCpuThreadPool());
-  }
-  std::string type_name() const override { return "one-shot"; }
-
- protected:
-  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
-    return physical_schema_;
-  }
-
-  RecordBatchIterator batch_it_;
-};
-}  // namespace
-
 std::shared_ptr<ScannerBuilder> ScannerBuilder::FromRecordBatchReader(
     std::shared_ptr<RecordBatchReader> reader) {
   auto batch_it = MakeIteratorFromReader(reader);
@@ -1108,47 +1168,12 @@ Result<int64_t> SyncScanner::CountRows() {
 Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
                                         std::shared_ptr<Dataset> dataset,
                                         std::shared_ptr<ScanOptions> scan_options) {
-  if (!scan_options->use_async) {
-    return Status::NotImplemented("ScanNodes without asynchrony");
-  }
-
   // using a generator for speculative forward compatibility with async fragment discovery
-  ARROW_ASSIGN_OR_RAISE(scan_options->filter,
-                        scan_options->filter.Bind(*dataset->schema()));
   ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset->GetFragments(scan_options->filter));
   ARROW_ASSIGN_OR_RAISE(auto fragments_vec, fragments_it.ToVector());
   auto fragments_gen = MakeVectorGenerator(std::move(fragments_vec));
 
-  ARROW_ASSIGN_OR_RAISE(auto batch_gen,
-                        ScanBatchesUnorderedAsyncImpl(
-                            scan_options, std::move(fragments_gen),
-                            internal::GetCpuThreadPool(), /*filter_and_project=*/false));
-
-  auto gen = MakeMappedGenerator(
-      std::move(batch_gen),
-      [dataset](const EnumeratedRecordBatch& partial)
-          -> Result<util::optional<compute::ExecBatch>> {
-        ARROW_ASSIGN_OR_RAISE(
-            util::optional<compute::ExecBatch> batch,
-            compute::MakeExecBatch(*dataset->schema(), partial.record_batch.value));
-
-        // TODO fragments may be able to attach more guarantees to batches than this,
-        // for example parquet's row group stats.
-        batch->guarantee = partial.fragment.value->partition_expression();
-
-        // tag rows with fragment- and batch-of-origin
-        batch->values.emplace_back(partial.fragment.index);
-        batch->values.emplace_back(partial.record_batch.index);
-        batch->values.emplace_back(partial.record_batch.last);
-        return batch;
-      });
-
-  auto augmented_fields = dataset->schema()->fields();
-  augmented_fields.push_back(field("__fragment_index", int32()));
-  augmented_fields.push_back(field("__batch_index", int32()));
-  augmented_fields.push_back(field("__last_in_fragment", boolean()));
-  return compute::MakeSourceNode(plan, "dataset_scan",
-                                 schema(std::move(augmented_fields)), std::move(gen));
+  return MakeScanNode(plan, std::move(fragments_gen), std::move(scan_options));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index bed276b1bff..f567054bf91 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1094,19 +1094,20 @@ TEST(ScanOptions, TestMaterializedFields) {
 
 namespace {
 
-static Result<std::vector<compute::ExecBatch>> StartAndCollect(
+Future<std::vector<compute::ExecBatch>> StartAndCollect(
     compute::ExecPlan* plan, AsyncGenerator<util::optional<compute::ExecBatch>> gen) {
   RETURN_NOT_OK(plan->Validate());
   RETURN_NOT_OK(plan->StartProducing());
 
-  auto maybe_collected = CollectAsyncGenerator(gen).result();
-  ARROW_ASSIGN_OR_RAISE(auto collected, maybe_collected);
+  auto collected_fut = CollectAsyncGenerator(gen);
 
-  plan->StopProducing();
-
-  return internal::MapVector(
-      [](util::optional<compute::ExecBatch> batch) { return std::move(*batch); },
-      collected);
+  return AllComplete({plan->finished(), Future<>(collected_fut)})
+      .Then([collected_fut]() -> Result<std::vector<compute::ExecBatch>> {
+        ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
+        return internal::MapVector(
+            [](util::optional<compute::ExecBatch> batch) { return std::move(*batch); },
+            std::move(collected));
+      });
 }
 
 struct DatasetAndBatches {
@@ -1183,6 +1184,7 @@ TEST(ScanNode, Schema) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
+  options->dataset_schema = basic.dataset->schema();
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1200,6 +1202,7 @@ TEST(ScanNode, Trivial) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
+  options->dataset_schema = basic.dataset->schema();
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
   auto sink_gen = MakeSinkNode(scan, "sink");
@@ -1207,7 +1210,7 @@ TEST(ScanNode, Trivial) {
   // trivial scan: the batches are returned unmodified
   auto expected = basic.batches;
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              ResultWith(UnorderedElementsAreArray(expected)));
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
 }
 
 TEST(ScanNode, FilteredOnVirtualColumn) {
@@ -1217,7 +1220,9 @@ TEST(ScanNode, FilteredOnVirtualColumn) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->filter = less(field_ref("c"), literal(30));
+  options->dataset_schema = basic.dataset->schema();
+  ASSERT_OK_AND_ASSIGN(options->filter,
+                       less(field_ref("c"), literal(30)).Bind(*basic.dataset->schema()));
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1230,7 +1235,7 @@ TEST(ScanNode, FilteredOnVirtualColumn) {
   expected.pop_back();
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              ResultWith(UnorderedElementsAreArray(expected)));
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
 }
 
 TEST(ScanNode, DeferredFilterOnPhysicalColumn) {
@@ -1240,7 +1245,10 @@ TEST(ScanNode, DeferredFilterOnPhysicalColumn) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->filter = greater(field_ref("a"), literal(4));
+  options->dataset_schema = basic.dataset->schema();
+  ASSERT_OK_AND_ASSIGN(
+      options->filter,
+      greater(field_ref("a"), literal(4)).Bind(*basic.dataset->schema()));
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1251,11 +1259,36 @@ TEST(ScanNode, DeferredFilterOnPhysicalColumn) {
   auto expected = basic.batches;
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              ResultWith(UnorderedElementsAreArray(expected)));
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
 }
 
-TEST(ScanNode, ProjectionPushdown) {
-  // ensure non-projected columns are dropped
+TEST(ScanNode, DISABLED_ProjectionPushdown) {
+  // ARROW-13263
+  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
+
+  auto basic = MakeBasicDataset();
+
+  auto options = std::make_shared<ScanOptions>();
+  options->use_async = true;
+  options->dataset_schema = basic.dataset->schema();
+  ASSERT_OK(SetProjection(options.get(), {field_ref("b")}, {"b"}));
+
+  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
+
+  auto sink_gen = MakeSinkNode(scan, "sink");
+
+  auto expected = basic.batches;
+
+  int a_index = basic.dataset->schema()->GetFieldIndex("a");
+  int c_index = basic.dataset->schema()->GetFieldIndex("c");
+  for (auto& batch : expected) {
+    // "a", "c" were not projected or filtered so they are dropped eagerly
+    batch.values[a_index] = MakeNullScalar(batch.values[a_index].type());
+    batch.values[c_index] = MakeNullScalar(batch.values[c_index].type());
+  }
+
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
 }
 
 TEST(ScanNode, MaterializationOfVirtualColumn) {
@@ -1265,6 +1298,7 @@ TEST(ScanNode, MaterializationOfVirtualColumn) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
+  options->dataset_schema = basic.dataset->schema();
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1286,106 +1320,7 @@ TEST(ScanNode, MaterializationOfVirtualColumn) {
   }
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
-              ResultWith(UnorderedElementsAreArray(expected)));
-}
-
-TEST(ScanNode, CompareToScanner) {
-  ASSERT_OK_AND_ASSIGN(auto plan, compute::ExecPlan::Make());
-
-  auto basic = MakeBasicDataset();
-
-  ScannerBuilder builder(basic.dataset);
-  ASSERT_OK(builder.UseAsync(true));
-  ASSERT_OK(builder.UseThreads(true));
-  ASSERT_OK(builder.Filter(greater(field_ref("c"), literal(30))));
-  ASSERT_OK(builder.Project(
-      {field_ref("c"), call("multiply", {field_ref("a"), literal(2)})}, {"c", "a * 2"}));
-  ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
-
-  ASSERT_OK_AND_ASSIGN(auto fragments_it,
-                       basic.dataset->GetFragments(scanner->options()->filter));
-  ASSERT_OK_AND_ASSIGN(auto fragments, fragments_it.ToVector());
-
-  auto options = scanner->options();
-
-  ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
-
-  ASSERT_OK_AND_ASSIGN(auto filter,
-                       compute::MakeFilterNode(scan, "filter", options->filter));
-
-  auto exprs = options->projection.call()->arguments;
-  exprs.push_back(compute::field_ref("__fragment_index"));
-  exprs.push_back(compute::field_ref("__batch_index"));
-  exprs.push_back(compute::field_ref("__last_in_fragment"));
-  ASSERT_OK_AND_ASSIGN(auto project, compute::MakeProjectNode(filter, "project", exprs));
-
-  AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
-      compute::MakeSinkNode(project, "sink");
-
-  ASSERT_OK(plan->StartProducing());
-
-  auto from_plan =
-      CollectAsyncGenerator(
-          MakeMappedGenerator(
-              sink_gen,
-              [&](const util::optional<compute::ExecBatch>& batch)
-                  -> Result<EnumeratedRecordBatch> {
-                int num_fields = options->projected_schema->num_fields();
-
-                ArrayVector columns(num_fields);
-                for (size_t i = 0; i < columns.size(); ++i) {
-                  const Datum& value = batch->values[i];
-                  if (value.is_array()) {
-                    columns[i] = value.make_array();
-                    continue;
-                  }
-                  ARROW_ASSIGN_OR_RAISE(
-                      columns[i],
-                      MakeArrayFromScalar(*value.scalar(), batch->length, options->pool));
-                }
-
-                EnumeratedRecordBatch out;
-                out.fragment.index =
-                    batch->values[num_fields].scalar_as<Int32Scalar>().value;
-                out.fragment.value = fragments[out.fragment.index];
-                out.fragment.last = false;  // ignored during reordering
-
-                out.record_batch.index =
-                    batch->values[num_fields + 1].scalar_as<Int32Scalar>().value;
-                out.record_batch.value = RecordBatch::Make(
-                    options->projected_schema, batch->length, std::move(columns));
-                out.record_batch.last =
-                    batch->values[num_fields + 2].scalar_as<BooleanScalar>().value;
-
-                return out;
-              }))
-          .result();
-
-  ASSERT_OK_AND_ASSIGN(auto from_scanner_gen, scanner->ScanBatchesUnorderedAsync());
-  auto from_scanner = CollectAsyncGenerator(from_scanner_gen).result();
-
-  auto less = [](const EnumeratedRecordBatch& l, const EnumeratedRecordBatch& r) {
-    if (l.fragment.index < r.fragment.index) return true;
-    return l.record_batch.index < r.record_batch.index;
-  };
-
-  ASSERT_OK(from_plan);
-  std::sort(from_plan->begin(), from_plan->end(), less);
-
-  ASSERT_OK(from_scanner);
-  std::sort(from_scanner->begin(), from_scanner->end(), less);
-
-  ASSERT_EQ(from_plan->size(), from_scanner->size());
-  for (size_t i = 0; i < from_plan->size(); ++i) {
-    const auto& p = from_plan->at(i);
-    const auto& s = from_scanner->at(i);
-    SCOPED_TRACE(i);
-    ASSERT_EQ(p.fragment.index, s.fragment.index);
-    ASSERT_EQ(p.fragment.value, s.fragment.value);
-    ASSERT_EQ(p.record_batch.last, s.record_batch.last);
-    ASSERT_EQ(p.record_batch.index, s.record_batch.index);
-    AssertBatchesEqual(*p.record_batch.value, *s.record_batch.value);
-  }
+              Finishes(ResultWith(UnorderedElementsAreArray(expected))));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/testing/future_util.h b/cpp/src/arrow/testing/future_util.h
index 878840587ff..2ca70d05402 100644
--- a/cpp/src/arrow/testing/future_util.h
+++ b/cpp/src/arrow/testing/future_util.h
@@ -21,21 +21,21 @@
 #include "arrow/util/future.h"
 
 // This macro should be called by futures that are expected to
-// complete pretty quickly.  2 seconds is the default max wait
-// here.  Anything longer than that and it's a questionable
-// unit test anyways.
-#define ASSERT_FINISHES_IMPL(fut)                            \
-  do {                                                       \
-    ASSERT_TRUE(fut.Wait(300));                              \
-    if (!fut.is_finished()) {                                \
-      FAIL() << "Future did not finish in a timely fashion"; \
-    }                                                        \
+// complete pretty quickly.  arrow::kDefaultAssertFinishesWaitSeconds is the
+// default max wait here.  Anything longer than that and it's a questionable unit test
+// anyways.
+#define ASSERT_FINISHES_IMPL(fut)                                      \
+  do {                                                                 \
+    ASSERT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
+    if (!fut.is_finished()) {                                          \
+      FAIL() << "Future did not finish in a timely fashion";           \
+    }                                                                  \
   } while (false)
 
 #define ASSERT_FINISHES_OK(expr)                                              \
   do {                                                                        \
     auto&& _fut = (expr);                                                     \
-    ASSERT_TRUE(_fut.Wait(300));                                              \
+    ASSERT_TRUE(_fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds));       \
     if (!_fut.is_finished()) {                                                \
       FAIL() << "Future did not finish in a timely fashion";                  \
     }                                                                         \
@@ -74,12 +74,12 @@
     ASSERT_EQ(expected, _actual);                        \
   } while (0)
 
-#define EXPECT_FINISHES_IMPL(fut)                                   \
-  do {                                                              \
-    EXPECT_TRUE(fut.Wait(300));                                     \
-    if (!fut.is_finished()) {                                       \
-      ADD_FAILURE() << "Future did not finish in a timely fashion"; \
-    }                                                               \
+#define EXPECT_FINISHES_IMPL(fut)                                      \
+  do {                                                                 \
+    EXPECT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
+    if (!fut.is_finished()) {                                          \
+      ADD_FAILURE() << "Future did not finish in a timely fashion";    \
+    }                                                                  \
   } while (false)
 
 #define ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, future_name, lhs, rexpr) \
@@ -105,6 +105,8 @@
 
 namespace arrow {
 
+constexpr double kDefaultAssertFinishesWaitSeconds = 64;
+
 template <typename T>
 void AssertNotFinished(const Future<T>& fut) {
   ASSERT_FALSE(IsFutureFinished(fut.state()));
diff --git a/cpp/src/arrow/testing/matchers.h b/cpp/src/arrow/testing/matchers.h
index 246f321e8fa..f76c25dc096 100644
--- a/cpp/src/arrow/testing/matchers.h
+++ b/cpp/src/arrow/testing/matchers.h
@@ -21,9 +21,60 @@
 
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/testing/future_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/future.h"
 
 namespace arrow {
 
+template <typename ResultMatcher>
+class FutureMatcher {
+ public:
+  explicit FutureMatcher(ResultMatcher result_matcher, double wait_seconds)
+      : result_matcher_(std::move(result_matcher)), wait_seconds_(wait_seconds) {}
+
+  template <typename Fut,
+            typename ValueType = typename std::decay<Fut>::type::ValueType>
+  operator testing::Matcher<Fut>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Fut&> {
+      explicit Impl(const ResultMatcher& result_matcher, double wait_seconds)
+          : result_matcher_(testing::MatcherCast<Result<ValueType>>(result_matcher)),
+            wait_seconds_(wait_seconds) {}
+
+      void DescribeTo(::std::ostream* os) const override {
+        *os << "value ";
+        result_matcher_.DescribeTo(os);
+      }
+
+      void DescribeNegationTo(::std::ostream* os) const override {
+        *os << "value ";
+        result_matcher_.DescribeNegationTo(os);
+      }
+
+      bool MatchAndExplain(const Fut& fut,
+                           testing::MatchResultListener* listener) const override {
+        if (!fut.Wait(wait_seconds_)) {
+          *listener << "which didn't finish within " << wait_seconds_ << " seconds";
+          return false;
+        }
+
+        const Result<ValueType>& maybe_value = fut.result();
+        testing::StringMatchResultListener value_listener;
+        return result_matcher_.MatchAndExplain(maybe_value, &value_listener);
+      }
+
+      const testing::Matcher<Result<ValueType>> result_matcher_;
+      const double wait_seconds_;
+    };
+
+    return testing::Matcher<Fut>(new Impl(result_matcher_, wait_seconds_));
+  }
+
+ private:
+  const ResultMatcher result_matcher_;
+  const double wait_seconds_;
+};
+
 template <typename ValueMatcher>
 class ResultMatcher {
  public:
@@ -55,7 +106,7 @@ class ResultMatcher {
                     << " doesn't match";
           return false;
         }
-        const ValueType& value = GetValue(maybe_value);
+        const ValueType& value = maybe_value.ValueOrDie();
         testing::StringMatchResultListener value_listener;
         const bool match = value_matcher_.MatchAndExplain(value, &value_listener);
         *listener << "whose value " << testing::PrintToString(value)
@@ -71,23 +122,13 @@ class ResultMatcher {
   }
 
  private:
-  template <typename T>
-  static const T& GetValue(const Result<T>& maybe_value) {
-    return maybe_value.ValueOrDie();
-  }
-
-  template <typename T>
-  static const T& GetValue(const Future<T>& value_fut) {
-    return GetValue(value_fut.result());
-  }
-
   const ValueMatcher value_matcher_;
 };
 
-class StatusMatcher {
+class ErrorMatcher {
  public:
-  explicit StatusMatcher(StatusCode code,
-                         util::optional<testing::Matcher<std::string>> message_matcher)
+  explicit ErrorMatcher(StatusCode code,
+                        util::optional<testing::Matcher<std::string>> message_matcher)
       : code_(code), message_matcher_(std::move(message_matcher)) {}
 
   template <typename Res>
@@ -115,7 +156,7 @@ class StatusMatcher {
 
       bool MatchAndExplain(const Res& maybe_value,
                            testing::MatchResultListener* listener) const override {
-        const Status& status = GetStatus(maybe_value);
+        const Status& status = internal::GenericToStatus(maybe_value);
         testing::StringMatchResultListener value_listener;
 
         bool match = status.code() == code_;
@@ -138,40 +179,62 @@ class StatusMatcher {
   }
 
  private:
-  static const Status& GetStatus(const Status& status) { return status; }
+  const StatusCode code_;
+  const util::optional<testing::Matcher<std::string>> message_matcher_;
+};
 
-  template <typename T>
-  static const Status& GetStatus(const Result<T>& maybe_value) {
-    return maybe_value.status();
-  }
+class OkMatcher {
+ public:
+  template <typename Res>
+  operator testing::Matcher<Res>() const {  // NOLINT runtime/explicit
+    struct Impl : testing::MatcherInterface<const Res&> {
+      void DescribeTo(::std::ostream* os) const override { *os << "is ok"; }
 
-  template <typename T>
-  static const Status& GetStatus(const Future<T>& value_fut) {
-    return value_fut.status();
-  }
+      void DescribeNegationTo(::std::ostream* os) const override { *os << "is not ok"; }
 
-  const StatusCode code_;
-  const util::optional<testing::Matcher<std::string>> message_matcher_;
+      bool MatchAndExplain(const Res& maybe_value,
+                           testing::MatchResultListener* listener) const override {
+        const Status& status = internal::GenericToStatus(maybe_value);
+        testing::StringMatchResultListener value_listener;
+
+        const bool match = status.ok();
+        *listener << "whose value " << testing::PrintToString(status.ToString())
+                  << (match ? " matches" : " doesn't match");
+        testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
+        return match;
+      }
+    };
+
+    return testing::Matcher<Res>(new Impl());
+  }
 };
 
-// Returns a matcher that matches the value of a successful Result<T> or Future<T>.
-// (Future<T> will be waited upon to acquire its result for matching.)
+// Returns a matcher that waits on a Future (by default for 16 seconds)
+// then applies a matcher to the result.
+template <typename ResultMatcher>
+FutureMatcher<ResultMatcher> Finishes(
+    const ResultMatcher& result_matcher,
+    double wait_seconds = kDefaultAssertFinishesWaitSeconds) {
+  return FutureMatcher<ResultMatcher>(result_matcher, wait_seconds);
+}
+
+// Returns a matcher that matches the value of a successful Result<T>.
 template <typename ValueMatcher>
 ResultMatcher<ValueMatcher> ResultWith(const ValueMatcher& value_matcher) {
   return ResultMatcher<ValueMatcher>(value_matcher);
 }
 
-// Returns a matcher that matches the StatusCode of a Status, Result<T>, or Future<T>.
-// (Future<T> will be waited upon to acquire its result for matching.)
-inline StatusMatcher Raises(StatusCode code) {
-  return StatusMatcher(code, util::nullopt);
-}
+// Returns a matcher that matches an ok Status or Result<T>.
+inline OkMatcher Ok() { return {}; }
+
+// Returns a matcher that matches the StatusCode of a Status or Result<T>.
+// Do not use Raises(StatusCode::OK) to match a non error code.
+inline ErrorMatcher Raises(StatusCode code) { return ErrorMatcher(code, util::nullopt); }
 
-// Returns a matcher that matches the StatusCode and message of a Status, Result<T>, or
-// Future<T>. (Future<T> will be waited upon to acquire its result for matching.)
+// Returns a matcher that matches the StatusCode and message of a Status or Result<T>.
 template <typename MessageMatcher>
-StatusMatcher Raises(StatusCode code, const MessageMatcher& message_matcher) {
-  return StatusMatcher(code, testing::MatcherCast<std::string>(message_matcher));
+ErrorMatcher Raises(StatusCode code, const MessageMatcher& message_matcher) {
+  return ErrorMatcher(code, testing::MatcherCast<std::string>(message_matcher));
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 5a6321fd418..18149884204 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -77,16 +77,15 @@ Future<T> AsyncGeneratorEnd() {
 }
 
 /// returning a future that completes when all have been visited
-template <typename T>
-Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
-                             std::function<Status(T)> visitor) {
+template <typename T, typename Visitor>
+Future<> VisitAsyncGenerator(AsyncGenerator<T> generator, Visitor visitor) {
   struct LoopBody {
     struct Callback {
-      Result<ControlFlow<>> operator()(const T& result) {
-        if (IsIterationEnd(result)) {
+      Result<ControlFlow<>> operator()(const T& next) {
+        if (IsIterationEnd(next)) {
           return Break();
         } else {
-          auto visited = visitor(result);
+          auto visited = visitor(next);
           if (visited.ok()) {
             return Continue();
           } else {
@@ -95,7 +94,7 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
         }
       }
 
-      std::function<Status(T)> visitor;
+      Visitor visitor;
     };
 
     Future<ControlFlow<>> operator()() {
@@ -105,7 +104,7 @@ Future<> VisitAsyncGenerator(AsyncGenerator<T> generator,
     }
 
     AsyncGenerator<T> generator;
-    std::function<Status(T)> visitor;
+    Visitor visitor;
   };
 
   return Loop(LoopBody{std::move(generator), std::move(visitor)});
@@ -775,7 +774,7 @@ class PushGenerator {
   /// Producer API for PushGenerator
   class Producer {
    public:
-    explicit Producer(const std::shared_ptr<State> state) : weak_state_(state) {}
+    explicit Producer(const std::shared_ptr<State>& state) : weak_state_(state) {}
 
     /// \brief Push a value on the queue
     ///
diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc
index b329f99ed17..f288a15be3f 100644
--- a/cpp/src/arrow/util/future.cc
+++ b/cpp/src/arrow/util/future.cc
@@ -272,6 +272,8 @@ class ConcreteFutureImpl : public FutureImpl {
         return true;
       case ShouldSchedule::IfUnfinished:
         return !in_add_callback;
+      case ShouldSchedule::IfDifferentExecutor:
+        return !callback_record.options.executor->OwnsThisThread();
       default:
         DCHECK(false) << "Unrecognized ShouldSchedule option";
         return false;
@@ -309,7 +311,7 @@ class ConcreteFutureImpl : public FutureImpl {
     }
     cv_.notify_all();
 
-    // run callbacks, lock not needed since the future is finsihed by this
+    // run callbacks, lock not needed since the future is finished by this
     // point so nothing else can modify the callbacks list and it is safe
     // to iterate.
     //
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index c7c5ba802f9..d9e0a939f25 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -66,10 +66,9 @@ using first_arg_is_status =
     std::is_same<typename std::decay<internal::call_traits::argument_type<0, Fn>>::type,
                  Status>;
 
-template <typename Fn>
-struct has_no_args {
-  static constexpr bool value = internal::call_traits::argument_count<Fn>::value == 0;
-};
+template <typename Fn, typename Then, typename Else,
+          typename Count = internal::call_traits::argument_count<Fn>>
+using if_has_no_args = typename std::conditional<Count::value == 0, Then, Else>::type;
 
 /// Creates a callback that can be added to a future to mark a `dest` future finished
 template <typename Source, typename Dest, bool SourceEmpty = Source::is_empty,
@@ -169,6 +168,19 @@ struct ContinueFuture {
     MarkNextFinished<ContinueResult, NextFuture> callback{std::move(next)};
     signal_to_complete_next.AddCallback(std::move(callback));
   }
+
+  /// Helpers to conditionally ignore arguments to ContinueFunc
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::true_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&...) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f));
+  }
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::false_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&... a) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f),
+               std::forward<Args>(a)...);
+  }
 };
 
 /// Helper struct which tells us what kind of Future gets returned from `Then` based on
@@ -213,7 +225,10 @@ enum class ShouldSchedule {
   /// callback is added
   IfUnfinished = 1,
   /// Always schedule the callback as a new task
-  Always = 2
+  Always = 2,
+  /// Schedule a new task only if it would run on an executor other than
+  /// the specified executor.
+  IfDifferentExecutor = 3,
 };
 
 /// \brief Options that control how a continuation is run
@@ -222,9 +237,9 @@ struct CallbackOptions {
   ShouldSchedule should_schedule = ShouldSchedule::Never;
   /// If the callback is scheduled then this is the executor it should be scheduled
   /// on.  If this is NULL then should_schedule must be Never
-  internal::Executor* executor = NULL;
+  internal::Executor* executor = NULLPTR;
 
-  static CallbackOptions Defaults() { return CallbackOptions(); }
+  static CallbackOptions Defaults() { return {}; }
 };
 
 // Untyped private implementation
@@ -343,7 +358,7 @@ class ARROW_EXPORT FutureWaiter {
 /// to complete, or wait on multiple Futures at once (using WaitForAll,
 /// WaitForAny or AsCompletedIterator).
 template <typename T>
-class Future {
+class ARROW_MUST_USE_TYPE Future {
  public:
   using ValueType = T;
   using SyncType = typename detail::SyncType<T>::type;
@@ -464,6 +479,34 @@ class Future {
     return MakeFinished(E::ToResult(std::move(s)));
   }
 
+  struct WrapResultyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(*impl.CastResult<ValueType>());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  struct WrapStatusyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      static_assert(std::is_same<internal::Empty, ValueType>::value,
+                    "Only callbacks for Future<> should accept Status and not Result");
+
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(impl.CastResult<ValueType>()->status());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  template <typename OnComplete>
+  using WrapOnComplete = typename std::conditional<
+      detail::first_arg_is_status<OnComplete>::value, WrapStatusyOnComplete,
+      WrapResultyOnComplete>::type::template Callback<OnComplete>;
+
   /// \brief Consumer API: Register a callback to run when this future completes
   ///
   /// The callback should receive the result of the future (const Result<T>&)
@@ -485,35 +528,12 @@ class Future {
   ///
   /// In this example `fut` falls out of scope but is not destroyed because it holds a
   /// cyclic reference to itself through the callback.
-  template <typename OnComplete>
-  typename std::enable_if<!detail::first_arg_is_status<OnComplete>::value>::type
-  AddCallback(OnComplete on_complete,
-              CallbackOptions opts = CallbackOptions::Defaults()) const {
+  template <typename OnComplete, typename Callback = WrapOnComplete<OnComplete>>
+  void AddCallback(OnComplete on_complete,
+                   CallbackOptions opts = CallbackOptions::Defaults()) const {
     // We know impl_ will not be dangling when invoking callbacks because at least one
     // thread will be waiting for MarkFinished to return. Thus it's safe to keep a
     // weak reference to impl_ here
-    struct Callback {
-      void operator()(const FutureImpl& impl) && {
-        std::move(on_complete)(*impl.CastResult<ValueType>());
-      }
-      OnComplete on_complete;
-    };
-    impl_->AddCallback(Callback{std::move(on_complete)}, opts);
-  }
-
-  /// Overload for callbacks accepting a Status
-  template <typename OnComplete>
-  typename std::enable_if<detail::first_arg_is_status<OnComplete>::value>::type
-  AddCallback(OnComplete on_complete,
-              CallbackOptions opts = CallbackOptions::Defaults()) const {
-    static_assert(std::is_same<internal::Empty, ValueType>::value,
-                  "Callbacks for Future<> should accept Status and not Result");
-    struct Callback {
-      void operator()(const FutureImpl& impl) && {
-        std::move(on_complete)(impl.CastResult<ValueType>()->status());
-      }
-      OnComplete on_complete;
-    };
     impl_->AddCallback(Callback{std::move(on_complete)}, opts);
   }
 
@@ -531,36 +551,62 @@ class Future {
   /// Returns true if a callback was actually added and false if the callback failed
   /// to add because the future was marked complete.
   template <typename CallbackFactory,
-            typename OnComplete = detail::result_of_t<CallbackFactory()>>
-  typename std::enable_if<!detail::first_arg_is_status<OnComplete>::value, bool>::type
-  TryAddCallback(const CallbackFactory& callback_factory,
-                 CallbackOptions opts = CallbackOptions::Defaults()) const {
-    struct Callback {
-      void operator()(const FutureImpl& impl) && {
-        std::move(on_complete)(*static_cast<Result<ValueType>*>(impl.result_.get()));
-      }
-      OnComplete on_complete;
-    };
-    return impl_->TryAddCallback(
-        [&callback_factory]() { return Callback{callback_factory()}; }, opts);
+            typename OnComplete = detail::result_of_t<CallbackFactory()>,
+            typename Callback = WrapOnComplete<OnComplete>>
+  bool TryAddCallback(const CallbackFactory& callback_factory,
+                      CallbackOptions opts = CallbackOptions::Defaults()) const {
+    return impl_->TryAddCallback([&]() { return Callback{callback_factory()}; }, opts);
   }
 
-  template <typename CallbackFactory,
-            typename OnComplete = detail::result_of_t<CallbackFactory()>>
-  typename std::enable_if<detail::first_arg_is_status<OnComplete>::value, bool>::type
-  TryAddCallback(const CallbackFactory& callback_factory,
-                 CallbackOptions opts = CallbackOptions::Defaults()) const {
-    struct Callback {
-      void operator()(const FutureImpl& impl) && {
-        std::move(on_complete)(
-            static_cast<Result<ValueType>*>(impl.result_.get())->status());
-      }
-      OnComplete on_complete;
+  template <typename OnSuccess, typename OnFailure>
+  struct ThenOnComplete {
+    static constexpr bool has_no_args =
+        internal::call_traits::argument_count<OnSuccess>::value == 0;
+
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    static_assert(
+        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
+                     ContinuedFuture>::value,
+        "OnSuccess and OnFailure must continue with the same future type");
+
+    struct DummyOnSuccess {
+      void operator()(const T&);
     };
+    using OnSuccessArg = typename std::decay<internal::call_traits::argument_type<
+        0, detail::if_has_no_args<OnSuccess, DummyOnSuccess, OnSuccess>>>::type;
 
-    return impl_->TryAddCallback(
-        [&callback_factory]() { return Callback{callback_factory()}; }, opts);
-  }
+    static_assert(
+        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value,
+        "OnSuccess' argument should not be a Result");
+
+    void operator()(const Result<T>& result) && {
+      detail::ContinueFuture continue_future;
+      if (ARROW_PREDICT_TRUE(result.ok())) {
+        // move on_failure to a(n immediately destroyed) temporary to free its resources
+        ARROW_UNUSED(OnFailure(std::move(on_failure)));
+        continue_future.IgnoringArgsIf(
+            detail::if_has_no_args<OnSuccess, std::true_type, std::false_type>{},
+            std::move(next), std::move(on_success), result.ValueOrDie());
+      } else {
+        ARROW_UNUSED(OnSuccess(std::move(on_success)));
+        continue_future(std::move(next), std::move(on_failure), result.status());
+      }
+    }
+
+    OnSuccess on_success;
+    OnFailure on_failure;
+    ContinuedFuture next;
+  };
+
+  template <typename OnSuccess>
+  struct PassthruOnFailure {
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    Result<typename ContinuedFuture::ValueType> operator()(const Status& s) { return s; }
+  };
 
   /// \brief Consumer API: Register a continuation to run when this future completes
   ///
@@ -573,6 +619,7 @@ class Future {
   /// - OnSuccess, called with the result (const ValueType&) on successul completion.
   ///              for an empty future this will be called with nothing ()
   /// - OnFailure, called with the error (const Status&) on failed completion.
+  ///              This callback is optional and defaults to a passthru of any errors.
   ///
   /// Then() returns a Future whose ValueType is derived from the return type of the
   /// callbacks. If a callback returns:
@@ -595,114 +642,18 @@ class Future {
   /// and the returned future may already be marked complete.
   ///
   /// See AddCallback for general considerations when writing callbacks.
-  template <typename OnSuccess, typename OnFailure,
-            typename ContinuedFuture =
-                detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>>
-  ContinuedFuture Then(
-      OnSuccess on_success, OnFailure on_failure,
-      typename std::enable_if<!detail::has_no_args<OnSuccess>::value>::type* =
-          NULLPTR) const {
-    static_assert(
-        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
-                     ContinuedFuture>::value,
-        "OnSuccess and OnFailure must continue with the same future type");
-    using OnSuccessArg =
-        typename std::decay<internal::call_traits::argument_type<0, OnSuccess>>::type;
-    static_assert(
-        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value,
-        "OnSuccess' argument should not be a Result");
-
-    auto next = ContinuedFuture::Make();
-
-    struct Callback {
-      void operator()(const Result<T>& result) && {
-        detail::ContinueFuture continue_future;
-        if (ARROW_PREDICT_TRUE(result.ok())) {
-          // move on_failure to a(n immediately destroyed) temporary to free its resources
-          ARROW_UNUSED(OnFailure(std::move(on_failure)));
-          continue_future(std::move(next), std::move(on_success), result.ValueOrDie());
-        } else {
-          ARROW_UNUSED(OnSuccess(std::move(on_success)));
-          continue_future(std::move(next), std::move(on_failure), result.status());
-        }
-      }
-
-      OnSuccess on_success;
-      OnFailure on_failure;
-      ContinuedFuture next;
-    };
-
-    AddCallback(Callback{std::forward<OnSuccess>(on_success),
-                         std::forward<OnFailure>(on_failure), next});
-
-    return next;
-  }
-
-  /// \brief Overload for callbacks which ignore the value
-  template <
-      typename OnSuccess, typename OnFailure,
-      typename ContinuedFuture = detail::ContinueFuture::ForSignature<OnSuccess && ()>>
-  ContinuedFuture Then(
-      OnSuccess on_success, OnFailure on_failure,
-      typename std::enable_if<detail::has_no_args<OnSuccess>::value>::type* =
-          NULLPTR) const {
-    static_assert(
-        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
-                     ContinuedFuture>::value,
-        "OnSuccess and OnFailure must continue with the same future type");
-
+  template <typename OnSuccess, typename OnFailure = PassthruOnFailure<OnSuccess>,
+            typename OnComplete = ThenOnComplete<OnSuccess, OnFailure>,
+            typename ContinuedFuture = typename OnComplete::ContinuedFuture>
+  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure = {},
+                       CallbackOptions options = CallbackOptions::Defaults()) const {
     auto next = ContinuedFuture::Make();
-
-    struct Callback {
-      void operator()(const Result<T>& result) && {
-        detail::ContinueFuture continue_future;
-        if (ARROW_PREDICT_TRUE(result.ok())) {
-          // move on_failure to a(n immediately destroyed) temporary to free its resources
-          ARROW_UNUSED(OnFailure(std::move(on_failure)));
-          continue_future(std::move(next), std::move(on_success));
-        } else {
-          ARROW_UNUSED(OnSuccess(std::move(on_success)));
-          continue_future(std::move(next), std::move(on_failure), result.status());
-        }
-      }
-
-      OnSuccess on_success;
-      OnFailure on_failure;
-      ContinuedFuture next;
-    };
-
-    AddCallback(Callback{std::forward<OnSuccess>(on_success),
-                         std::forward<OnFailure>(on_failure), next});
-
+    AddCallback(OnComplete{std::forward<OnSuccess>(on_success),
+                           std::forward<OnFailure>(on_failure), next},
+                options);
     return next;
   }
 
-  /// \brief Overload without OnFailure. Failures will be passed through unchanged.
-  template <typename OnSuccess,
-            typename ContinuedFuture =
-                detail::ContinueFuture::ForSignature<OnSuccess && (const T&)>,
-            typename E = ValueType>
-  typename std::enable_if<!detail::has_no_args<OnSuccess>::value, ContinuedFuture>::type
-  Then(OnSuccess&& on_success) const {
-    return Then(std::forward<OnSuccess>(on_success), [](const Status& s) {
-      return Result<typename ContinuedFuture::ValueType>(s);
-    });
-  }
-
-  /// \brief Statusy overload without OnFailure
-  template <
-      typename OnSuccess,
-      typename ContinuedFuture = detail::ContinueFuture::ForSignature<OnSuccess && ()>,
-      typename E = ValueType>
-  typename std::enable_if<detail::has_no_args<OnSuccess>::value, ContinuedFuture>::type
-  Then(OnSuccess&& on_success) const {
-    static_assert(std::is_same<internal::Empty, ValueType>::value,
-                  "Then callback OnSuccess must receive const T&");
-    return Then(std::forward<OnSuccess>(on_success), [](const Status& s) {
-      return Result<typename ContinuedFuture::ValueType>(s);
-    });
-  }
-
   /// \brief Implicit constructor to create a finished future from a value
   Future(ValueType val) : Future() {  // NOLINT runtime/explicit
     impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc
index b25d77c48cd..0db355433e8 100644
--- a/cpp/src/arrow/util/future_test.cc
+++ b/cpp/src/arrow/util/future_test.cc
@@ -1052,6 +1052,59 @@ TEST_F(FutureSchedulingTest, ScheduleIfUnfinished) {
   }
 }
 
+TEST_F(FutureSchedulingTest, ScheduleIfDifferentExecutor) {
+  struct : internal::Executor {
+    int GetCapacity() override { return pool_->GetCapacity(); }
+
+    bool OwnsThisThread() override { return pool_->OwnsThisThread(); }
+
+    Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task,
+                     StopToken stop_token, StopCallback&& stop_callback) override {
+      ++spawn_count;
+      return pool_->Spawn(hints, std::move(task), std::move(stop_token),
+                          std::move(stop_callback));
+    }
+
+    std::atomic<int> spawn_count{0};
+    internal::Executor* pool_ = internal::GetCpuThreadPool();
+  } executor;
+
+  CallbackOptions options;
+  options.executor = &executor;
+  options.should_schedule = ShouldSchedule::IfDifferentExecutor;
+  auto pass_err = [](const Status& s) { return s; };
+
+  std::atomic<bool> fut0_on_executor{false};
+  std::atomic<bool> fut1_on_executor{false};
+
+  auto fut0 = Future<>::Make();
+  auto fut1 = Future<>::Make();
+
+  auto fut0_done = fut0.Then(
+      [&] {
+        // marked finished on main thread -> must be scheduled to executor
+        fut0_on_executor.store(executor.OwnsThisThread());
+
+        fut1.MarkFinished();
+      },
+      pass_err, options);
+
+  auto fut1_done = fut1.Then(
+      [&] {
+        // marked finished on executor -> no need to schedule
+        fut1_on_executor.store(executor.OwnsThisThread());
+      },
+      pass_err, options);
+
+  fut0.MarkFinished();
+
+  AllComplete({fut0_done, fut1_done}).Wait();
+
+  ASSERT_EQ(executor.spawn_count, 1);
+  ASSERT_TRUE(fut0_on_executor);
+  ASSERT_TRUE(fut1_on_executor);
+}
+
 TEST_F(FutureSchedulingTest, ScheduleAlwaysKeepsFutureAliveUntilCallback) {
   CallbackOptions options;
   options.should_schedule = ShouldSchedule::Always;
@@ -1708,25 +1761,26 @@ TEST(FnOnceTest, MoveOnlyDataType) {
 
 TEST(FutureTest, MatcherExamples) {
   EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
-              Raises(StatusCode::Invalid));
+              Finishes(Raises(StatusCode::Invalid)));
 
   EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
-              Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary")));
+              Finishes(Raises(StatusCode::Invalid, testing::HasSubstr("arbitrary"))));
 
   // message doesn't match, so no match
-  EXPECT_THAT(
-      Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
-      testing::Not(Raises(StatusCode::Invalid, testing::HasSubstr("reasonable"))));
+  EXPECT_THAT(Future<int>::MakeFinished(Status::Invalid("arbitrary error")),
+              Finishes(testing::Not(
+                  Raises(StatusCode::Invalid, testing::HasSubstr("reasonable")))));
 
   // different error code, so no match
   EXPECT_THAT(Future<int>::MakeFinished(Status::TypeError("arbitrary error")),
-              testing::Not(Raises(StatusCode::Invalid)));
+              Finishes(testing::Not(Raises(StatusCode::Invalid))));
 
   // not an error, so no match
-  EXPECT_THAT(Future<int>::MakeFinished(333), testing::Not(Raises(StatusCode::Invalid)));
+  EXPECT_THAT(Future<int>::MakeFinished(333),
+              Finishes(testing::Not(Raises(StatusCode::Invalid))));
 
   EXPECT_THAT(Future<std::string>::MakeFinished("hello world"),
-              ResultWith(testing::HasSubstr("hello")));
+              Finishes(ResultWith(testing::HasSubstr("hello"))));
 
   // Matcher waits on Futures
   auto string_fut = Future<std::string>::Make();
@@ -1734,15 +1788,15 @@ TEST(FutureTest, MatcherExamples) {
     SleepABit();
     string_fut.MarkFinished("hello world");
   });
-  EXPECT_THAT(string_fut, ResultWith(testing::HasSubstr("hello")));
+  EXPECT_THAT(string_fut, Finishes(ResultWith(testing::HasSubstr("hello"))));
   finisher.join();
 
   EXPECT_THAT(Future<std::string>::MakeFinished(Status::Invalid("XXX")),
-              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+              Finishes(testing::Not(ResultWith(testing::HasSubstr("hello")))));
 
   // holds a value, but that value doesn't match the given pattern
   EXPECT_THAT(Future<std::string>::MakeFinished("foo bar"),
-              testing::Not(ResultWith(testing::HasSubstr("hello"))));
+              Finishes(testing::Not(ResultWith(testing::HasSubstr("hello")))));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index febbc997852..9ac8e36a3d8 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -82,17 +82,31 @@ class ARROW_EXPORT Executor {
 
   // Spawn a fire-and-forget task.
   template <typename Function>
-  Status Spawn(Function&& func, StopToken stop_token = StopToken::Unstoppable()) {
+  Status Spawn(Function&& func) {
+    return SpawnReal(TaskHints{}, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(Function&& func, StopToken stop_token) {
     return SpawnReal(TaskHints{}, std::forward<Function>(func), std::move(stop_token),
                      StopCallback{});
   }
-
   template <typename Function>
-  Status Spawn(TaskHints hints, Function&& func,
-               StopToken stop_token = StopToken::Unstoppable()) {
+  Status Spawn(TaskHints hints, Function&& func) {
+    return SpawnReal(hints, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) {
     return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
                      StopCallback{});
   }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token,
+               StopCallback stop_callback) {
+    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
+                     std::move(stop_callback));
+  }
 
   // Transfers a future to this executor.  Any continuations added to the
   // returned future will run in this executor.  Otherwise they would run
@@ -237,7 +251,7 @@ class ARROW_EXPORT SerialExecutor : public Executor {
   template <typename T = ::arrow::internal::Empty>
   using TopLevelTask = internal::FnOnce<Future<T>(Executor*)>;
 
-  ~SerialExecutor();
+  ~SerialExecutor() override;
 
   int GetCapacity() override { return 1; };
   Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
diff --git a/dev/archery/archery/lang/cpp.py b/dev/archery/archery/lang/cpp.py
index 045d23b56b1..c2b1ca68001 100644
--- a/dev/archery/archery/lang/cpp.py
+++ b/dev/archery/archery/lang/cpp.py
@@ -42,7 +42,7 @@ def __init__(self,
                  cc=None, cxx=None, cxx_flags=None,
                  build_type=None, warn_level=None,
                  cpp_package_prefix=None, install_prefix=None, use_conda=None,
-                 build_static=False, build_shared=True,
+                 build_static=False, build_shared=True, build_unity=True,
                  # tests & examples
                  with_tests=None, with_benchmarks=None, with_examples=None,
                  with_integration=None,
@@ -76,6 +76,7 @@ def __init__(self,
         self._use_conda = use_conda
         self.build_static = build_static
         self.build_shared = build_shared
+        self.build_unity = build_unity
 
         self.with_tests = with_tests
         self.with_benchmarks = with_benchmarks
@@ -176,7 +177,6 @@ def _gen_defs(self):
 
         yield ("CMAKE_EXPORT_COMPILE_COMMANDS", truthifier(True))
         yield ("CMAKE_BUILD_TYPE", self.build_type)
-        yield ("CMAKE_UNITY_BUILD", True)
 
         if not self.with_lint_only:
             yield ("BUILD_WARNING_LEVEL",
@@ -195,6 +195,7 @@ def _gen_defs(self):
 
         yield ("ARROW_BUILD_STATIC", truthifier(self.build_static))
         yield ("ARROW_BUILD_SHARED", truthifier(self.build_shared))
+        yield ("CMAKE_UNITY_BUILD", truthifier(self.build_unity))
 
         # Tests and benchmarks
         yield ("ARROW_BUILD_TESTS", truthifier(self.with_tests))

From 176a703599643377e6e1a8507fcee87528c313f1 Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Fri, 9 Jul 2021 16:35:46 -0400
Subject: [PATCH 536/719] ARROW-13225: [Go][FlightRPC][Integration] Implement
 Flight Custom Middleware and Integration Tests for Go

Closes #10633 from zeroshade/flight-middleware

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 dev/archery/archery/integration/tester_go.py  |  52 ++
 docs/source/status.rst                        |   6 +-
 go/arrow/flight/basic_auth_flight_test.go     |   3 +-
 go/arrow/flight/client.go                     | 196 +++++++
 go/arrow/flight/flight_middleware_test.go     | 297 +++++++++++
 go/arrow/flight/flight_test.go                |   2 +-
 go/arrow/flight/record_batch_reader.go        |  11 +
 go/arrow/flight/record_batch_writer.go        |  26 +-
 go/arrow/flight/server.go                     |  75 +++
 go/arrow/flight/server_auth.go                |  30 +-
 go/arrow/go.mod                               |  30 +-
 go/arrow/go.sum                               | 110 +++-
 .../arrow-flight-integration-client/main.go   |  53 ++
 .../arrow-flight-integration-server/main.go   |  44 ++
 .../internal/flight_integration/middleware.go |  58 ++
 .../internal/flight_integration/scenario.go   | 497 ++++++++++++++++++
 16 files changed, 1437 insertions(+), 53 deletions(-)
 create mode 100644 go/arrow/flight/flight_middleware_test.go
 create mode 100644 go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
 create mode 100644 go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
 create mode 100644 go/arrow/internal/flight_integration/middleware.go
 create mode 100644 go/arrow/internal/flight_integration/scenario.go

diff --git a/dev/archery/archery/integration/tester_go.py b/dev/archery/archery/integration/tester_go.py
index ea799c5a1bd..eeba38fe501 100644
--- a/dev/archery/archery/integration/tester_go.py
+++ b/dev/archery/archery/integration/tester_go.py
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import contextlib
 import os
+import subprocess
 
 from .tester import Tester
 from .util import run_cmd, log
@@ -24,6 +26,8 @@
 class GoTester(Tester):
     PRODUCER = True
     CONSUMER = True
+    FLIGHT_SERVER = True
+    FLIGHT_CLIENT = True
 
     # FIXME(sbinet): revisit for Go modules
     HOME = os.getenv('HOME', '~')
@@ -34,6 +38,12 @@ class GoTester(Tester):
     STREAM_TO_FILE = os.path.join(GOBIN, 'arrow-stream-to-file')
     FILE_TO_STREAM = os.path.join(GOBIN, 'arrow-file-to-stream')
 
+    FLIGHT_SERVER_CMD = [
+        os.path.join(GOBIN, 'arrow-flight-integration-server')]
+    FLIGHT_CLIENT_CMD = [
+        os.path.join(GOBIN, 'arrow-flight-integration-client'),
+        '-host', 'localhost']
+
     name = 'Go'
 
     def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
@@ -65,3 +75,45 @@ def stream_to_file(self, stream_path, file_path):
     def file_to_stream(self, file_path, stream_path):
         cmd = [self.FILE_TO_STREAM, file_path, '>', stream_path]
         self.run_shell_command(cmd)
+
+    @contextlib.contextmanager
+    def flight_server(self, scenario_name=None):
+        cmd = self.FLIGHT_SERVER_CMD + ['-port=0']
+        if scenario_name:
+            cmd = cmd + ['-scenario', scenario_name]
+        if self.debug:
+            log(' '.join(cmd))
+        server = subprocess.Popen(cmd,
+                                  stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE)
+
+        try:
+            output = server.stdout.readline().decode()
+            if not output.startswith("Server listening on localhost:"):
+                server.kill()
+                out, err = server.communicate()
+                raise RuntimeError(
+                    "Flight-Go server did not start properly, "
+                    "stdout: \n{}\n\nstderr:\n{}\n"
+                    .format(output + out.decode(), err.decode())
+                )
+            port = int(output.split(":")[1])
+            yield port
+        finally:
+            server.kill()
+            server.wait(5)
+
+    def flight_request(self, port, json_path=None, scenario_name=None):
+        cmd = self.FLIGHT_CLIENT_CMD + [
+            '-port=' + str(port),
+        ]
+        if json_path:
+            cmd.extend(('-path', json_path))
+        elif scenario_name:
+            cmd.extend(('-scenario', scenario_name))
+        else:
+            raise TypeError("Must provide one of json_path or scenario_name")
+
+        if self.debug:
+            log(' '.join(cmd))
+        run_cmd(cmd)
diff --git a/docs/source/status.rst b/docs/source/status.rst
index e4e838a393c..48084187ef9 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -156,13 +156,13 @@ Flight RPC
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | gRPC + TLS transport        | ✓     | ✓     | ✓     |            | ✓     |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| RPC error codes             | ✓     | ✓     |       |            | ✓     |       |       |
+| RPC error codes             | ✓     | ✓     | ✓     |            | ✓     |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 | Authentication handlers     | ✓     | ✓     | ✓     |            | ✓ (2) |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Custom client middleware    | ✓     | ✓     |       |            |       |       |       |
+| Custom client middleware    | ✓     | ✓     | ✓     |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Custom server middleware    | ✓     | ✓     |       |            |       |       |       |
+| Custom server middleware    | ✓     | ✓     | ✓     |            |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+
 
 Notes:
diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go
index 0eb39eedcac..c008566f894 100644
--- a/go/arrow/flight/basic_auth_flight_test.go
+++ b/go/arrow/flight/basic_auth_flight_test.go
@@ -147,8 +147,7 @@ func TestErrorAuths(t *testing.T) {
 }
 
 func TestBasicAuthHelpers(t *testing.T) {
-	unary, stream := flight.CreateServerBearerTokenAuthInterceptors(&validator{})
-	s := flight.NewFlightServer(nil, grpc.UnaryInterceptor(unary), grpc.StreamInterceptor(stream))
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{flight.CreateServerBasicAuthMiddleware(&validator{})})
 	s.Init("localhost:0")
 	f := &HeaderAuthTestFlight{}
 	s.RegisterFlightService(&flight.FlightServiceService{
diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go
index 262b41d0269..735c08bc1f0 100644
--- a/go/arrow/flight/client.go
+++ b/go/arrow/flight/client.go
@@ -20,7 +20,9 @@ import (
 	"context"
 	"encoding/base64"
 	"io"
+	"runtime"
 	"strings"
+	"sync/atomic"
 
 	"golang.org/x/xerrors"
 	"google.golang.org/grpc"
@@ -45,6 +47,168 @@ type Client interface {
 	FlightServiceClient
 }
 
+type CustomClientMiddleware interface {
+	StartCall(ctx context.Context) context.Context
+}
+
+type ClientPostCallMiddleware interface {
+	CallCompleted(ctx context.Context, err error)
+}
+
+type ClientHeadersMiddleware interface {
+	HeadersReceived(ctx context.Context, md metadata.MD)
+}
+
+func CreateClientMiddleware(middleware CustomClientMiddleware) ClientMiddleware {
+	return ClientMiddleware{
+		Unary: func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
+			nctx := middleware.StartCall(ctx)
+			if nctx != nil {
+				ctx = nctx
+			}
+
+			if hdrs, ok := middleware.(ClientHeadersMiddleware); ok {
+				hdrmd := make(metadata.MD)
+				trailermd := make(metadata.MD)
+				opts = append(opts, grpc.Header(&hdrmd), grpc.Trailer(&trailermd))
+				defer func() {
+					hdrs.HeadersReceived(ctx, metadata.Join(hdrmd, trailermd))
+				}()
+			}
+
+			err := invoker(ctx, method, req, reply, cc, opts...)
+			if post, ok := middleware.(ClientPostCallMiddleware); ok {
+				post.CallCompleted(ctx, err)
+			}
+			return err
+		},
+		Stream: func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
+			nctx := middleware.StartCall(ctx)
+			if nctx != nil {
+				ctx = nctx
+			}
+
+			cs, err := streamer(ctx, desc, cc, method, opts...)
+			hdrs, isHdrs := middleware.(ClientHeadersMiddleware)
+			post, isPostcall := middleware.(ClientPostCallMiddleware)
+			if !isPostcall && !isHdrs {
+				return cs, err
+			}
+
+			if err != nil {
+				if isHdrs {
+					md, _ := cs.Header()
+					hdrs.HeadersReceived(ctx, metadata.Join(md, cs.Trailer()))
+				}
+				if isPostcall {
+					post.CallCompleted(ctx, err)
+				}
+				return cs, err
+			}
+
+			// Grab the client stream context because when the finish function or the goroutine below will be
+			// executed it's not guaranteed cs.Context() will be valid.
+			csCtx := cs.Context()
+			finishChan := make(chan struct{})
+			isFinished := new(int32)
+			*isFinished = 0
+			finishFunc := func(err error) {
+
+				// since there are multiple code paths that could call finishFunc
+				// we need some sort of synchronization to guard against multiple
+				// calls to finish
+				if !atomic.CompareAndSwapInt32(isFinished, 0, 1) {
+					return
+				}
+
+				close(finishChan)
+				if isPostcall {
+					post.CallCompleted(csCtx, err)
+				}
+				if isHdrs {
+					hdrmd, _ := cs.Header()
+					hdrs.HeadersReceived(csCtx, metadata.Join(hdrmd, cs.Trailer()))
+				}
+			}
+			go func() {
+				select {
+				case <-finishChan:
+					// finish is being called by something else, no action necessary
+				case <-csCtx.Done():
+					finishFunc(csCtx.Err())
+				}
+			}()
+
+			newCS := &clientStream{
+				ClientStream: cs,
+				desc:         desc,
+				finishFn:     finishFunc,
+			}
+			// The `ClientStream` interface allows one to omit calling `Recv` if it's
+			// known that the result will be `io.EOF`. See
+			// http://stackoverflow.com/q/42915337
+			// In such cases, there's nothing that triggers the span to finish. We,
+			// therefore, set a finalizer so that the span and the context goroutine will
+			// at least be cleaned up when the garbage collector is run.
+			runtime.SetFinalizer(newCS, func(newcs *clientStream) {
+				newcs.finishFn(nil)
+			})
+			return newCS, nil
+		},
+	}
+}
+
+type clientStream struct {
+	grpc.ClientStream
+	desc     *grpc.StreamDesc
+	finishFn func(error)
+}
+
+func (cs *clientStream) Header() (metadata.MD, error) {
+	md, err := cs.ClientStream.Header()
+	if err != nil {
+		cs.finishFn(err)
+	}
+	return md, err
+}
+
+func (cs *clientStream) SendMsg(m interface{}) error {
+	err := cs.ClientStream.SendMsg(m)
+	if err != nil {
+		cs.finishFn(err)
+	}
+	return err
+}
+
+func (cs *clientStream) RecvMsg(m interface{}) error {
+	err := cs.ClientStream.RecvMsg(m)
+	if err == io.EOF {
+		cs.finishFn(nil)
+		return err
+	} else if err != nil {
+		cs.finishFn(err)
+		return err
+	}
+
+	if !cs.desc.ServerStreams {
+		cs.finishFn(nil)
+	}
+	return err
+}
+
+func (cs *clientStream) CloseSend() error {
+	err := cs.ClientStream.CloseSend()
+	if err != nil {
+		cs.finishFn(err)
+	}
+	return err
+}
+
+type ClientMiddleware struct {
+	Stream grpc.StreamClientInterceptor
+	Unary  grpc.UnaryClientInterceptor
+}
+
 type client struct {
 	conn        *grpc.ClientConn
 	authHandler ClientAuthHandler
@@ -60,6 +224,8 @@ type client struct {
 // Alternatively, a grpc client can be constructed as normal without this helper as the
 // grpc generated client code is still exported. This exists to add utility and helpers
 // around the authentication and passing the token with requests.
+//
+// Deprecated: prefer to use NewClientWithMiddleware
 func NewFlightClient(addr string, auth ClientAuthHandler, opts ...grpc.DialOption) (Client, error) {
 	if auth != nil {
 		opts = append([]grpc.DialOption{
@@ -76,6 +242,36 @@ func NewFlightClient(addr string, auth ClientAuthHandler, opts ...grpc.DialOptio
 	return &client{conn: conn, FlightServiceClient: NewFlightServiceClient(conn), authHandler: auth}, nil
 }
 
+// NewClientWithMiddleware takes a slice of middlewares in addition to the auth and address which will be
+// used by grpc and chained, the first middleware will be the outer most with the last middleware
+// being the inner most wrapper around the actual call. It also passes along the dialoptions passed in such
+// as TLS certs and so on.
+func NewClientWithMiddleware(addr string, auth ClientAuthHandler, middleware []ClientMiddleware, opts ...grpc.DialOption) (Client, error) {
+	unary := make([]grpc.UnaryClientInterceptor, 0, len(middleware))
+	stream := make([]grpc.StreamClientInterceptor, 0, len(middleware))
+	if auth != nil {
+		unary = append(unary, createClientAuthUnaryInterceptor(auth))
+		stream = append(stream, createClientAuthStreamInterceptor(auth))
+	}
+	if len(middleware) > 0 {
+		for _, m := range middleware {
+			if m.Unary != nil {
+				unary = append(unary, m.Unary)
+			}
+			if m.Stream != nil {
+				stream = append(stream, m.Stream)
+			}
+		}
+	}
+	opts = append(opts, grpc.WithChainUnaryInterceptor(unary...), grpc.WithChainStreamInterceptor(stream...))
+	conn, err := grpc.Dial(addr, opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	return &client{conn: conn, FlightServiceClient: NewFlightServiceClient(conn), authHandler: auth}, nil
+}
+
 func (c *client) AuthenticateBasicToken(ctx context.Context, username, password string, opts ...grpc.CallOption) (context.Context, error) {
 	authCtx := metadata.AppendToOutgoingContext(ctx, "Authorization", "Basic "+base64.RawStdEncoding.EncodeToString([]byte(strings.Join([]string{username, password}, ":"))))
 
diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go
new file mode 100644
index 00000000000..4227fee8e7d
--- /dev/null
+++ b/go/arrow/flight/flight_middleware_test.go
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flight_test
+
+import (
+	"context"
+	"io"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow/flight"
+	"github.com/apache/arrow/go/arrow/internal/arrdata"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+)
+
+type ServerMiddlewareAddHeader struct {
+	ctx context.Context
+}
+
+func (s *ServerMiddlewareAddHeader) StartCall(ctx context.Context) context.Context {
+	grpc.SetHeader(ctx, metadata.Pairs("foo", "bar"))
+	s.ctx = ctx
+
+	return nil
+}
+
+func (s *ServerMiddlewareAddHeader) CallCompleted(ctx context.Context, err error) {
+	if s.ctx != ctx {
+		panic("invalid context")
+	}
+
+	grpc.SetTrailer(ctx, metadata.Pairs("super", "duper"))
+
+	if err != nil {
+		panic("got error")
+	}
+}
+
+type ServerTraceMiddleware struct{}
+
+type tracetestKey struct{}
+
+func (s ServerTraceMiddleware) StartCall(ctx context.Context) context.Context {
+	return context.WithValue(ctx, tracetestKey{}, "foobar")
+}
+
+func (s ServerTraceMiddleware) CallCompleted(ctx context.Context, _ error) {
+	v := ctx.Value(tracetestKey{}).(string)
+	if v != "foobar" {
+		panic("missing value from context in middleware test")
+	}
+}
+
+type ServerExpectHeaderMiddleware struct{}
+
+func (s ServerExpectHeaderMiddleware) StartCall(ctx context.Context) context.Context {
+	md, ok := metadata.FromIncomingContext(ctx)
+	if !ok {
+		panic("missing metadata headers")
+	}
+
+	bar := md.Get("foo")
+	if len(bar) != 1 || bar[0] != "bar" {
+		panic("incorrect header received: " + bar[0])
+	}
+
+	return nil
+}
+
+func (s ServerExpectHeaderMiddleware) CallCompleted(context.Context, error) {}
+
+func TestServerStreamMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+		flight.CreateServerMiddleware(ServerTraceMiddleware{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		ListFlights: f.ListFlights,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, nil, grpc.WithInsecure())
+	require.NoError(t, err)
+	defer client.Close()
+
+	flightStream, err := client.ListFlights(context.Background(), &flight.Criteria{})
+	require.NoError(t, err)
+
+	md, err := flightStream.Header()
+	assert.NoError(t, err)
+	assert.Equal(t, []string{"bar"}, md.Get("foo"))
+
+	for {
+		info, err := flightStream.Recv()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			assert.NoError(t, err)
+		}
+
+		fname := info.GetFlightDescriptor().GetPath()[0]
+		recs, ok := arrdata.Records[fname]
+		assert.True(t, ok)
+
+		sc, err := flight.DeserializeSchema(info.GetSchema(), f.mem)
+		assert.NoError(t, err)
+
+		assert.True(t, recs[0].Schema().Equal(sc))
+	}
+
+	md = flightStream.Trailer()
+	assert.Equal(t, []string{"duper"}, md.Get("super"))
+}
+
+func TestServerUnaryMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+		flight.CreateServerMiddleware(ServerTraceMiddleware{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		GetSchema: f.GetSchema,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, nil, grpc.WithInsecure())
+	require.NoError(t, err)
+	defer client.Close()
+
+	for name, testrecs := range arrdata.Records {
+		t.Run("flight get schema: "+name, func(t *testing.T) {
+			var (
+				hdrMD     metadata.MD
+				trailerMD metadata.MD
+			)
+			res, err := client.GetSchema(context.Background(), &flight.FlightDescriptor{Path: []string{name}}, grpc.Header(&hdrMD), grpc.Trailer(&trailerMD))
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			schema, err := flight.DeserializeSchema(res.GetSchema(), f.getmem())
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if !testrecs[0].Schema().Equal(schema) {
+				t.Fatalf("schema not match: \ngot = %#v\nwant = %#v\n", schema, testrecs[0].Schema())
+			}
+
+			assert.Equal(t, []string{"bar"}, hdrMD.Get("foo"))
+			assert.Equal(t, []string{"duper"}, trailerMD.Get("super"))
+		})
+	}
+}
+
+type ClientTestSendHeaderMiddleware struct {
+	ctx context.Context
+	md  metadata.MD
+}
+
+func (c *ClientTestSendHeaderMiddleware) StartCall(ctx context.Context) context.Context {
+	c.ctx = context.WithValue(metadata.AppendToOutgoingContext(ctx, "foo", "bar"), tracetestKey{}, "super")
+	return c.ctx
+}
+
+func (c *ClientTestSendHeaderMiddleware) CallCompleted(ctx context.Context, err error) {
+	val := ctx.Value(tracetestKey{}).(string)
+	if val != "super" {
+		panic("invalid context client middleware")
+	}
+}
+
+func (c *ClientTestSendHeaderMiddleware) HeadersReceived(ctx context.Context, md metadata.MD) {
+	val := ctx.Value(tracetestKey{}).(string)
+	if val != "super" {
+		panic("invalid context client middleware")
+	}
+
+	c.md = md
+}
+
+func TestClientStreamMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerExpectHeaderMiddleware{}),
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		ListFlights: f.ListFlights,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	middleware := &ClientTestSendHeaderMiddleware{}
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, []flight.ClientMiddleware{
+		flight.CreateClientMiddleware(middleware),
+	}, grpc.WithInsecure())
+	require.NoError(t, err)
+	defer client.Close()
+
+	flightStream, err := client.ListFlights(context.Background(), &flight.Criteria{})
+	require.NoError(t, err)
+
+	for {
+		info, err := flightStream.Recv()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			assert.NoError(t, err)
+		}
+
+		fname := info.GetFlightDescriptor().GetPath()[0]
+		recs, ok := arrdata.Records[fname]
+		assert.True(t, ok)
+
+		sc, err := flight.DeserializeSchema(info.GetSchema(), f.mem)
+		assert.NoError(t, err)
+
+		assert.True(t, recs[0].Schema().Equal(sc))
+	}
+
+	assert.Equal(t, []string{"bar"}, middleware.md.Get("foo"))
+	assert.Equal(t, []string{"duper"}, middleware.md.Get("super"))
+}
+
+func TestClientUnaryMiddleware(t *testing.T) {
+	s := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(&ServerMiddlewareAddHeader{}),
+		flight.CreateServerMiddleware(ServerExpectHeaderMiddleware{}),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(&flight.FlightServiceService{
+		GetSchema: f.GetSchema,
+	})
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	middle := &ClientTestSendHeaderMiddleware{}
+	client, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, []flight.ClientMiddleware{
+		flight.CreateClientMiddleware(middle),
+	}, grpc.WithInsecure())
+
+	require.NoError(t, err)
+	defer client.Close()
+
+	for name, testrecs := range arrdata.Records {
+		t.Run("flight get schema: "+name, func(t *testing.T) {
+			res, err := client.GetSchema(context.Background(), &flight.FlightDescriptor{Path: []string{name}})
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			schema, err := flight.DeserializeSchema(res.GetSchema(), f.getmem())
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if !testrecs[0].Schema().Equal(schema) {
+				t.Fatalf("schema not match: \ngot = %#v\nwant = %#v\n", schema, testrecs[0].Schema())
+			}
+
+			assert.Equal(t, []string{"bar"}, middle.md.Get("foo"))
+			assert.Equal(t, []string{"duper"}, middle.md.Get("super"))
+
+			middle.md = metadata.MD{}
+		})
+	}
+}
diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go
index 41d6ebe2c26..a6a59f198bf 100644
--- a/go/arrow/flight/flight_test.go
+++ b/go/arrow/flight/flight_test.go
@@ -375,7 +375,7 @@ func TestFlightWithAppMetadata(t *testing.T) {
 	}
 }
 
-type flightErrorReturn struct {}
+type flightErrorReturn struct{}
 
 func (f *flightErrorReturn) DoGet(_ *flight.Ticket, _ flight.FlightService_DoGetServer) error {
 	return status.Error(codes.NotFound, "nofound")
diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go
index 19b70127f44..1af3a58023c 100644
--- a/go/arrow/flight/record_batch_reader.go
+++ b/go/arrow/flight/record_batch_reader.go
@@ -40,6 +40,7 @@ type dataMessageReader struct {
 	msg      *ipc.Message
 
 	lastAppMetadata []byte
+	descr           *FlightDescriptor
 }
 
 func (d *dataMessageReader) Message() (*ipc.Message, error) {
@@ -51,10 +52,12 @@ func (d *dataMessageReader) Message() (*ipc.Message, error) {
 			d.msg = nil
 		}
 		d.lastAppMetadata = nil
+		d.descr = nil
 		return nil, err
 	}
 
 	d.lastAppMetadata = fd.AppMetadata
+	d.descr = fd.FlightDescriptor
 	d.msg = ipc.NewMessage(memory.NewBufferBytes(fd.DataHeader), memory.NewBufferBytes(fd.DataBody))
 	return d.msg, nil
 }
@@ -106,6 +109,14 @@ func (r *Reader) LatestAppMetadata() []byte {
 	return r.dmr.lastAppMetadata
 }
 
+// LatestFlightDescriptor returns a pointer to the last FlightDescriptor object
+// that was received in the most recently read FlightData message that was
+// processed by calling the Next function. The descriptor returned would correspond
+// to the record retrieved by calling Record().
+func (r *Reader) LatestFlightDescriptor() *FlightDescriptor {
+	return r.dmr.descr
+}
+
 // NewRecordReader constructs an ipc reader using the flight data stream reader
 // as the source of the ipc messages, opts passed will be passed to the underlying
 // ipc.Reader such as ipc.WithSchema and ipc.WithAllocator
diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go
index 3e4c4d805ac..857b125fd44 100644
--- a/go/arrow/flight/record_batch_writer.go
+++ b/go/arrow/flight/record_batch_writer.go
@@ -58,14 +58,36 @@ func (f *flightPayloadWriter) Close() error { return nil }
 // are written.
 type Writer struct {
 	*ipc.Writer
-	pw *flightPayloadWriter
+	pw   *flightPayloadWriter
+	desc *FlightDescriptor
+}
+
+// SetFlightDescriptor sets the flight descriptor into the next payload that will
+// be written by the flight writer. It will only be put into the very next payload
+// and afterwards the writer will no longer keep it's pointer to the descriptor.
+func (w *Writer) SetFlightDescriptor(descr *FlightDescriptor) {
+	w.desc = descr
+}
+
+// Write writes a recordbatch payload and returns any error, implementing the arrio.Writer interface
+func (w *Writer) Write(rec array.Record) error {
+	if w.desc != nil {
+		w.pw.fd.FlightDescriptor = w.desc
+		defer func() {
+			w.desc = nil
+			w.pw.fd.FlightDescriptor = nil
+		}()
+	}
+	return w.Writer.Write(rec)
 }
 
 // WriteWithAppMetadata will write this record with the supplied application
 // metadata attached in the flightData message.
 func (w *Writer) WriteWithAppMetadata(rec array.Record, appMeta []byte) error {
 	w.pw.fd.AppMetadata = appMeta
-	defer func() { w.pw.fd.AppMetadata = nil }()
+	defer func() {
+		w.pw.fd.AppMetadata = nil
+	}()
 	return w.Write(rec)
 }
 
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index 90492a7b3a3..779bcfd0c04 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -17,6 +17,7 @@
 package flight
 
 import (
+	context "context"
 	"net"
 	"os"
 	"os/signal"
@@ -47,6 +48,46 @@ type Server interface {
 	RegisterFlightService(*FlightServiceService)
 }
 
+type CustomServerMiddleware interface {
+	// StartCall will be called with the current context of the call, grpc.SetHeader can be used to add outgoing headers
+	// if the returned context is non-nil, then it will be used as the new context being passed through the calls
+	StartCall(ctx context.Context) context.Context
+	// CallCompleted is a callback which is called with the return from the handler
+	// it will be nil if everything was successful or will be the error about to be returned
+	// to grpc
+	CallCompleted(ctx context.Context, err error)
+}
+
+func CreateServerMiddleware(middleware CustomServerMiddleware) ServerMiddleware {
+	return ServerMiddleware{
+		Unary: func(ctx context.Context, req interface{}, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (ret interface{}, err error) {
+			nctx := middleware.StartCall(ctx)
+			if nctx != nil {
+				ctx = nctx
+			}
+
+			ret, err = handler(ctx, req)
+			middleware.CallCompleted(ctx, err)
+			return
+		},
+		Stream: func(srv interface{}, stream grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
+			ctx := middleware.StartCall(stream.Context())
+			if ctx != nil {
+				stream = &wrappedStream{ServerStream: stream, ctx: ctx}
+			}
+
+			err := handler(srv, stream)
+			middleware.CallCompleted(stream.Context(), err)
+			return err
+		},
+	}
+}
+
+type ServerMiddleware struct {
+	Stream grpc.StreamServerInterceptor
+	Unary  grpc.UnaryServerInterceptor
+}
+
 type server struct {
 	lis        net.Listener
 	sigChannel <-chan os.Signal
@@ -63,6 +104,8 @@ type server struct {
 // Alternatively, a grpc server can be created normally without this helper as the
 // grpc server generated code is still being exported. This only exists to allow
 // the utility of the helpers
+//
+// Deprecated: prefer to use NewServerWithMiddleware
 func NewFlightServer(auth ServerAuthHandler, opt ...grpc.ServerOption) Server {
 	if auth != nil {
 		opt = append([]grpc.ServerOption{
@@ -77,6 +120,38 @@ func NewFlightServer(auth ServerAuthHandler, opt ...grpc.ServerOption) Server {
 	}
 }
 
+// NewServerWithMiddleware takes a slice of middleware which will be used
+// by grpc and chained, the first middleware will be the outer most with the last
+// middleware being the inner most wrapper around the actual call. It also takes
+// any grpc Server options desired, such as TLS certs and so on which will just
+// be passed through to the underlying grpc server.
+//
+// Alternatively, a grpc server can be created normally without this helper as the
+// grpc server generated code is still being exported. This only exists to allow
+// the utility of the helpers
+func NewServerWithMiddleware(auth ServerAuthHandler, middleware []ServerMiddleware, opts ...grpc.ServerOption) Server {
+	unary := make([]grpc.UnaryServerInterceptor, 0, len(middleware))
+	stream := make([]grpc.StreamServerInterceptor, 0, len(middleware))
+	if auth != nil {
+		unary = append(unary, createServerAuthUnaryInterceptor(auth))
+		stream = append(stream, createServerAuthStreamInterceptor(auth))
+	}
+
+	if len(middleware) > 0 {
+		for _, m := range middleware {
+			if m.Unary != nil {
+				unary = append(unary, m.Unary)
+			}
+			if m.Stream != nil {
+				stream = append(stream, m.Stream)
+			}
+		}
+	}
+	opts = append(opts, grpc.ChainUnaryInterceptor(unary...), grpc.ChainStreamInterceptor(stream...))
+
+	return &server{server: grpc.NewServer(opts...), authHandler: auth}
+}
+
 func (s *server) Init(addr string) (err error) {
 	s.lis, err = net.Listen("tcp", addr)
 	return
diff --git a/go/arrow/flight/server_auth.go b/go/arrow/flight/server_auth.go
index 861e8ac5c41..7bff59517ff 100644
--- a/go/arrow/flight/server_auth.go
+++ b/go/arrow/flight/server_auth.go
@@ -70,12 +70,12 @@ type ServerAuthHandler interface {
 
 type authCtxKey struct{}
 
-type authWrappedStream struct {
+type wrappedStream struct {
 	grpc.ServerStream
 	ctx context.Context
 }
 
-func (a *authWrappedStream) Context() context.Context { return a.ctx }
+func (a *wrappedStream) Context() context.Context { return a.ctx }
 
 // AuthFromContext will return back whatever object was returned from `IsValid` for a
 // given request context allowing handlers to retrieve identifying information
@@ -136,7 +136,7 @@ func createServerAuthStreamInterceptor(auth ServerAuthHandler) grpc.StreamServer
 			return status.Errorf(codes.Unauthenticated, "auth-error: %s", err)
 		}
 
-		stream = &authWrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, peerIdentity)}
+		stream = &wrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, peerIdentity)}
 		return handler(srv, stream)
 	}
 }
@@ -186,7 +186,7 @@ func createServerBearerTokenStreamInterceptor(validator BasicAuthValidator) grpc
 			}
 		}
 
-		if auth == nil || len(auth) == 0 {
+		if len(auth) == 0 {
 			return status.Error(codes.Unauthenticated, "must authenticate first")
 		}
 
@@ -214,12 +214,16 @@ func createServerBearerTokenStreamInterceptor(validator BasicAuthValidator) grpc
 			if err != nil {
 				return err
 			}
-			return handler(srv, &authWrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, identity)})
+			return handler(srv, &wrappedStream{ServerStream: stream, ctx: context.WithValue(stream.Context(), authCtxKey{}, identity)})
 		}
 		return status.Errorf(codes.Unauthenticated, "Only bearer token auth implemented")
 	}
 }
 
+// CreateServerBearerTokenAuthInterceptors returns grpc interceptors for basic auth handling
+// via bearer tokens. validator cannot be nil
+//
+// Deprecated: use CreateServerBasicAuthMiddleware instead
 func CreateServerBearerTokenAuthInterceptors(validator BasicAuthValidator) (grpc.UnaryServerInterceptor, grpc.StreamServerInterceptor) {
 	if validator == nil {
 		panic("validator cannot be nil")
@@ -227,3 +231,19 @@ func CreateServerBearerTokenAuthInterceptors(validator BasicAuthValidator) (grpc
 
 	return createServerBearerTokenUnaryInterceptor(validator), createServerBearerTokenStreamInterceptor(validator)
 }
+
+// CreateServerBasicAuthMiddleware returns a ServerMiddleware that can be passed to NewServerWithMiddleware
+// in order to automatically add interceptors which will properly enforce auth validation
+// as per the passed in BasicAuthValidator.
+//
+// validator cannot be nil.
+func CreateServerBasicAuthMiddleware(validator BasicAuthValidator) ServerMiddleware {
+	if validator == nil {
+		panic("validator cannot be nil")
+	}
+
+	return ServerMiddleware{
+		Unary:  createServerBearerTokenUnaryInterceptor(validator),
+		Stream: createServerBearerTokenStreamInterceptor(validator),
+	}
+}
diff --git a/go/arrow/go.mod b/go/arrow/go.mod
index 229a0fbb459..20f2f427b53 100644
--- a/go/arrow/go.mod
+++ b/go/arrow/go.mod
@@ -16,22 +16,20 @@
 
 module github.com/apache/arrow/go/arrow
 
-go 1.12
+go 1.15
 
 require (
-	github.com/davecgh/go-spew v1.1.0 // indirect
-	github.com/golang/protobuf v1.4.2
-	github.com/google/flatbuffers v1.11.0
-	github.com/google/go-cmp v0.5.4 // indirect
-	github.com/klauspost/compress v1.11.13
-	github.com/pierrec/lz4/v4 v4.1.4
-	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/stretchr/testify v1.2.0
-	golang.org/x/net v0.0.0-20200904194848-62affa334b73 // indirect
-	golang.org/x/sys v0.0.0-20200909081042-eff7692f9009 // indirect
-	golang.org/x/text v0.3.3 // indirect
-	golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543
-	google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f // indirect
-	google.golang.org/grpc v1.32.0
-	google.golang.org/protobuf v1.25.0
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/golang/protobuf v1.5.2
+	github.com/google/flatbuffers v2.0.0+incompatible
+	github.com/google/go-cmp v0.5.6 // indirect
+	github.com/klauspost/compress v1.13.1
+	github.com/pierrec/lz4/v4 v4.1.8
+	github.com/stretchr/testify v1.7.0
+	golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect
+	golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
+	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
+	google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79 // indirect
+	google.golang.org/grpc v1.39.0
+	google.golang.org/protobuf v1.27.1
 )
diff --git a/go/arrow/go.sum b/go/arrow/go.sum
index 33ae1ba599e..0ac57bae563 100644
--- a/go/arrow/go.sum
+++ b/go/arrow/go.sum
@@ -1,14 +1,23 @@
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
+github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
+github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -20,72 +29,117 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU
 github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
-github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/google/flatbuffers v1.11.0 h1:O7CEyB8Cb3/DmtxODGtLHcEvpr81Jm5qLg/hsHnxA2A=
-github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
+github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/flatbuffers v2.0.0+incompatible h1:dicJ2oXwypfwUGnB2/TYWYEKiuk9eYQlQO/AnOHl5mI=
+github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M=
-github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4=
-github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/pierrec/lz4/v4 v4.1.4 h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE=
-github.com/pierrec/lz4/v4 v4.1.4/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
+github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
+github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
+github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
+github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/stretchr/testify v1.2.0 h1:LThGCOvhuJic9Gyd1VBCkhyUXmO8vKaBFvBsJ2k03rg=
-github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA=
-golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200909081042-eff7692f9009 h1:W0lCpv29Hv0UaM1LXb9QlBHLNP8UFfcKjblhVCWftOM=
-golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.1.4 h1:cVngSRcfgyZCzys3KYOpCFa+4dqX/Oub9tAq00ttGVs=
+golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f h1:Yv4xsIx7HZOoyUGSJ2ksDyWE2qIBXROsZKt2ny3hCGM=
-google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79 h1:s1jFTXJryg4a1mew7xv03VZD8N9XjxFhk1o4Js4WvPQ=
+google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.32.0 h1:zWTV+LMdc3kaiJMSTOFz2UgSBgx8RNQoTGiZu3fR9S0=
-google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
+google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
+google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
+google.golang.org/grpc v1.39.0 h1:Klz8I9kdtkIN6EpHHUOMLCYhTn/2WAe5a0s1hcBkdTI=
+google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -94,8 +148,16 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
 google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
-google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
+google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
new file mode 100644
index 00000000000..b0c0bfbd98f
--- /dev/null
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Client for use with Arrow Flight Integration tests via archery
+package main
+
+import (
+	"flag"
+	"fmt"
+	"time"
+
+	"github.com/apache/arrow/go/arrow/internal/flight_integration"
+	"google.golang.org/grpc"
+)
+
+var (
+	host     = flag.String("host", "localhost", "Server host to connect to")
+	port     = flag.Int("port", 31337, "Server port to connect to")
+	path     = flag.String("path", "", "Resource path to request")
+	scenario = flag.String("scenario", "", "Integration test scenario to run")
+)
+
+const retries = 3
+
+func main() {
+	flag.Parse()
+
+	c := flight_integration.GetScenario(*scenario, *path)
+	var err error
+	for i := 0; i < retries; i++ {
+		err = c.RunClient(fmt.Sprintf("%s:%d", *host, *port), grpc.WithInsecure())
+		if err == nil {
+			break
+		}
+		time.Sleep(time.Duration(i+1) * 500 * time.Millisecond)
+	}
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
new file mode 100644
index 00000000000..7384a74077f
--- /dev/null
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"net"
+	"os"
+	"syscall"
+
+	"github.com/apache/arrow/go/arrow/internal/flight_integration"
+)
+
+var (
+	port     = flag.Int("port", 31337, "Server port to listen on")
+	scenario = flag.String("scenario", "", "Integration test scenario to run")
+)
+
+func main() {
+	flag.Parse()
+
+	s := flight_integration.GetScenario(*scenario)
+	srv := s.MakeServer(*port)
+	srv.Init(fmt.Sprintf("0.0.0.0:%d", *port))
+	srv.SetShutdownOnSignals(syscall.SIGTERM, os.Interrupt)
+	_, p, _ := net.SplitHostPort(srv.Addr().String())
+	fmt.Printf("Server listening on localhost:%s\n", p)
+	srv.Serve()
+}
diff --git a/go/arrow/internal/flight_integration/middleware.go b/go/arrow/internal/flight_integration/middleware.go
new file mode 100644
index 00000000000..073b2843953
--- /dev/null
+++ b/go/arrow/internal/flight_integration/middleware.go
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flight_integration
+
+import (
+	"context"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+)
+
+type testServerMiddleware struct{}
+
+func (testServerMiddleware) StartCall(ctx context.Context) context.Context {
+	var val string
+
+	md, ok := metadata.FromIncomingContext(ctx)
+	if ok {
+		received := md.Get("x-middleware")
+		if len(received) > 0 {
+			val = received[0]
+		}
+	}
+
+	grpc.SetHeader(ctx, metadata.Pairs("x-middleware", val))
+	return nil
+}
+
+func (testServerMiddleware) CallCompleted(_ context.Context, _ error) {}
+
+type testClientMiddleware struct {
+	received string
+}
+
+func (tm *testClientMiddleware) StartCall(ctx context.Context) context.Context {
+	return metadata.AppendToOutgoingContext(ctx, "x-middleware", "expected value")
+}
+
+func (tm *testClientMiddleware) HeadersReceived(_ context.Context, md metadata.MD) {
+	received := md.Get("x-middleware")
+	if len(received) > 0 {
+		tm.received = received[0]
+	}
+}
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
new file mode 100644
index 00000000000..edafe0564ab
--- /dev/null
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -0,0 +1,497 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flight_integration
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/flight"
+	"github.com/apache/arrow/go/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/arrow/internal/testing/types"
+	"github.com/apache/arrow/go/arrow/ipc"
+	"github.com/apache/arrow/go/arrow/memory"
+	"golang.org/x/xerrors"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/proto"
+)
+
+type Scenario interface {
+	MakeServer(port int) flight.Server
+	RunClient(addr string, opts ...grpc.DialOption) error
+}
+
+func GetScenario(name string, args ...string) Scenario {
+	switch name {
+	case "auth:basic_proto":
+		return &authBasicProtoTester{}
+	case "middleware":
+		return &middlewareScenarioTester{}
+	case "":
+		if len(args) > 0 {
+			return &defaultIntegrationTester{path: args[0]}
+		}
+		return &defaultIntegrationTester{}
+	}
+	panic(fmt.Errorf("scenario not found: %s", name))
+}
+
+type integrationDataSet struct {
+	schema *arrow.Schema
+	chunks []array.Record
+}
+
+func consumeFlightLocation(ctx context.Context, loc *flight.Location, tkt *flight.Ticket, orig []array.Record, opts ...grpc.DialOption) error {
+	client, err := flight.NewClientWithMiddleware(loc.GetUri(), nil, nil, opts...)
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	stream, err := client.DoGet(ctx, tkt)
+	if err != nil {
+		return err
+	}
+
+	rdr, err := flight.NewRecordReader(stream)
+	if err != nil {
+		return err
+	}
+	defer rdr.Release()
+
+	for i, chunk := range orig {
+		if !rdr.Next() {
+			return xerrors.Errorf("got fewer batches than expected, received so far: %d, expected: %d", i, len(orig))
+		}
+
+		if !array.RecordEqual(chunk, rdr.Record()) {
+			return xerrors.Errorf("batch %d doesn't match", i)
+		}
+
+		if string(rdr.LatestAppMetadata()) != strconv.Itoa(i) {
+			return xerrors.Errorf("expected metadata value: %s, but got: %s", strconv.Itoa(i), string(rdr.LatestAppMetadata()))
+		}
+	}
+
+	if rdr.Next() {
+		return xerrors.Errorf("got more batches than the expected: %d", len(orig))
+	}
+
+	return nil
+}
+
+type defaultIntegrationTester struct {
+	port           int
+	path           string
+	uploadedChunks map[string]integrationDataSet
+}
+
+func (s *defaultIntegrationTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	client, err := flight.NewClientWithMiddleware(addr, nil, nil, opts...)
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	ctx := context.Background()
+
+	arrow.RegisterExtensionType(types.NewUUIDType())
+	defer arrow.UnregisterExtensionType("uuid")
+
+	descr := &flight.FlightDescriptor{
+		Type: flight.FlightDescriptor_PATH,
+		Path: []string{s.path},
+	}
+
+	fmt.Println("Opening JSON file '", s.path, "'")
+	r, err := os.Open(s.path)
+	if err != nil {
+		return xerrors.Errorf("could not open JSON file: %q: %w", s.path, err)
+	}
+
+	rdr, err := arrjson.NewReader(r)
+	if err != nil {
+		return xerrors.Errorf("could not create JSON file reader from file: %q: %w", s.path, err)
+	}
+
+	dataSet := integrationDataSet{
+		chunks: make([]array.Record, 0),
+		schema: rdr.Schema(),
+	}
+
+	for {
+		rec, err := rdr.Read()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return err
+		}
+		defer rec.Release()
+		dataSet.chunks = append(dataSet.chunks, rec)
+	}
+
+	stream, err := client.DoPut(ctx)
+	if err != nil {
+		return err
+	}
+
+	wr := flight.NewRecordWriter(stream, ipc.WithSchema(dataSet.schema))
+	wr.SetFlightDescriptor(descr)
+
+	for i, rec := range dataSet.chunks {
+		metadata := []byte(strconv.Itoa(i))
+		if err := wr.WriteWithAppMetadata(rec, metadata); err != nil {
+			return err
+		}
+
+		pr, err := stream.Recv()
+		if err != nil {
+			return err
+		}
+
+		acked := pr.GetAppMetadata()
+		switch {
+		case len(acked) == 0:
+			return xerrors.Errorf("expected metadata value: %s, but got nothing.", string(metadata))
+		case !bytes.Equal(metadata, acked):
+			return xerrors.Errorf("expected metadata value: %s, but got: %s", string(metadata), string(acked))
+		}
+	}
+
+	if err := stream.CloseSend(); err != nil {
+		return err
+	}
+
+	info, err := client.GetFlightInfo(ctx, descr)
+	if err != nil {
+		return err
+	}
+
+	if len(info.Endpoint) == 0 {
+		fmt.Fprintln(os.Stderr, "no endpoints returned from flight server.")
+		return xerrors.Errorf("no endpoints returned from flight server")
+	}
+
+	for _, ep := range info.Endpoint {
+		if len(ep.Location) == 0 {
+			return xerrors.Errorf("no locations returned from flight server")
+		}
+
+		for _, loc := range ep.Location {
+			consumeFlightLocation(ctx, loc, ep.Ticket, dataSet.chunks, opts...)
+		}
+	}
+
+	return nil
+}
+
+func (s *defaultIntegrationTester) MakeServer(port int) flight.Server {
+	s.port = port
+	s.uploadedChunks = make(map[string]integrationDataSet)
+	srv := flight.NewServerWithMiddleware(nil, nil)
+	srv.RegisterFlightService(&flight.FlightServiceService{
+		GetFlightInfo: s.GetFlightInfo,
+		DoGet:         s.DoGet,
+		DoPut:         s.DoPut,
+	})
+	return srv
+}
+
+func (s *defaultIntegrationTester) GetFlightInfo(ctx context.Context, in *flight.FlightDescriptor) (*flight.FlightInfo, error) {
+	if in.Type == flight.FlightDescriptor_PATH {
+		if len(in.Path) == 0 {
+			return nil, status.Error(codes.InvalidArgument, "invalid path")
+		}
+
+		data, ok := s.uploadedChunks[in.Path[0]]
+		if !ok {
+			return nil, status.Errorf(codes.NotFound, "could not find flight: %s", in.Path[0])
+		}
+
+		flightData := &flight.FlightInfo{
+			Schema:           flight.SerializeSchema(data.schema, memory.DefaultAllocator),
+			FlightDescriptor: in,
+			Endpoint: []*flight.FlightEndpoint{{
+				Ticket:   &flight.Ticket{Ticket: []byte(in.Path[0])},
+				Location: []*flight.Location{{Uri: fmt.Sprintf("127.0.0.1:%d", s.port)}},
+			}},
+			TotalRecords: 0,
+			TotalBytes:   -1,
+		}
+		for _, r := range data.chunks {
+			flightData.TotalRecords += r.NumRows()
+		}
+		return flightData, nil
+	}
+	return nil, status.Error(codes.Unimplemented, in.Type.String())
+}
+
+func (s *defaultIntegrationTester) DoGet(tkt *flight.Ticket, stream flight.FlightService_DoGetServer) error {
+	data, ok := s.uploadedChunks[string(tkt.Ticket)]
+	if !ok {
+		return status.Errorf(codes.NotFound, "could not find flight: %s", string(tkt.Ticket))
+	}
+
+	wr := flight.NewRecordWriter(stream, ipc.WithSchema(data.schema))
+	defer wr.Close()
+	for i, rec := range data.chunks {
+		wr.WriteWithAppMetadata(rec, []byte(strconv.Itoa(i)))
+	}
+
+	return nil
+}
+
+func (s *defaultIntegrationTester) DoPut(stream flight.FlightService_DoPutServer) error {
+	rdr, err := flight.NewRecordReader(stream)
+	if err != nil {
+		return status.Error(codes.Internal, err.Error())
+	}
+
+	var (
+		key     string
+		dataset integrationDataSet
+	)
+
+	// creating the reader should have gotten the first message which would
+	// have the schema, which should have a populated flight descriptor
+	desc := rdr.LatestFlightDescriptor()
+	if desc.Type != flight.FlightDescriptor_PATH || len(desc.Path) < 1 {
+		return status.Error(codes.InvalidArgument, "must specify a path")
+	}
+
+	key = desc.Path[0]
+	dataset.schema = rdr.Schema()
+	dataset.chunks = make([]array.Record, 0)
+	for rdr.Next() {
+		rec := rdr.Record()
+		rec.Retain()
+
+		dataset.chunks = append(dataset.chunks, rec)
+		if len(rdr.LatestAppMetadata()) > 0 {
+			stream.Send(&flight.PutResult{AppMetadata: rdr.LatestAppMetadata()})
+		}
+	}
+	s.uploadedChunks[key] = dataset
+	return nil
+}
+
+func CheckActionResults(ctx context.Context, client flight.Client, action *flight.Action, results []string) error {
+	stream, err := client.DoAction(ctx, action)
+	if err != nil {
+		return err
+	}
+	defer stream.CloseSend()
+
+	for _, expected := range results {
+		res, err := stream.Recv()
+		if err != nil {
+			return err
+		}
+
+		actual := string(res.Body)
+		if expected != actual {
+			return xerrors.Errorf("got wrong result: expected: %s, got: %s", expected, actual)
+		}
+	}
+
+	res, err := stream.Recv()
+	if res != nil || err != io.EOF {
+		return xerrors.New("action result stream had too many entries")
+	}
+	return nil
+}
+
+const (
+	authUsername = "arrow"
+	authPassword = "flight"
+)
+
+type authBasicValidator struct {
+	auth flight.BasicAuth
+}
+
+func (a *authBasicValidator) Authenticate(conn flight.AuthConn) error {
+	token, err := conn.Read()
+	if err != nil {
+		return err
+	}
+
+	var incoming flight.BasicAuth
+	if err = proto.Unmarshal(token, &incoming); err != nil {
+		return err
+	}
+
+	if incoming.Username != a.auth.Username || incoming.Password != a.auth.Password {
+		return status.Error(codes.Unauthenticated, "invalid token")
+	}
+
+	return conn.Send([]byte(a.auth.Username))
+}
+
+func (a *authBasicValidator) IsValid(token string) (interface{}, error) {
+	if token != a.auth.Username {
+		return nil, status.Error(codes.Unauthenticated, "invalid token")
+	}
+	return token, nil
+}
+
+type clientAuthBasic struct {
+	auth  *flight.BasicAuth
+	token string
+}
+
+func (c *clientAuthBasic) Authenticate(_ context.Context, conn flight.AuthConn) error {
+	if c.auth != nil {
+		data, err := proto.Marshal(c.auth)
+		if err != nil {
+			return err
+		}
+		if err = conn.Send(data); err != nil {
+			return err
+		}
+
+		token, err := conn.Read()
+		c.token = string(token)
+		if err != io.EOF {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c *clientAuthBasic) GetToken(context.Context) (string, error) {
+	return c.token, nil
+}
+
+type authBasicProtoTester struct{}
+
+func (s *authBasicProtoTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	auth := &clientAuthBasic{}
+
+	client, err := flight.NewClientWithMiddleware(addr, auth, nil, opts...)
+	if err != nil {
+		return err
+	}
+
+	ctx := context.Background()
+	stream, err := client.DoAction(ctx, &flight.Action{})
+	if err != nil {
+		return err
+	}
+
+	// should fail unauthenticated
+	_, err = stream.Recv()
+	st, ok := status.FromError(err)
+	if !ok {
+		return err
+	}
+
+	if st.Code() != codes.Unauthenticated {
+		return xerrors.Errorf("expected Unauthenticated, got %s", st.Code())
+	}
+
+	auth.auth = &flight.BasicAuth{Username: authUsername, Password: authPassword}
+	if err := client.Authenticate(ctx); err != nil {
+		return err
+	}
+	return CheckActionResults(ctx, client, &flight.Action{}, []string{authUsername})
+}
+
+func (s *authBasicProtoTester) MakeServer(_ int) flight.Server {
+	srv := flight.NewServerWithMiddleware(&authBasicValidator{
+		auth: flight.BasicAuth{Username: authUsername, Password: authPassword}}, nil)
+	srv.RegisterFlightService(&flight.FlightServiceService{
+		DoAction: s.DoAction,
+	})
+	return srv
+}
+
+func (authBasicProtoTester) DoAction(_ *flight.Action, stream flight.FlightService_DoActionServer) error {
+	auth := flight.AuthFromContext(stream.Context())
+	stream.Send(&flight.Result{Body: []byte(auth.(string))})
+	return nil
+}
+
+type middlewareScenarioTester struct{}
+
+func (m *middlewareScenarioTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	tm := &testClientMiddleware{}
+	client, err := flight.NewClientWithMiddleware(addr, nil, []flight.ClientMiddleware{
+		flight.CreateClientMiddleware(tm)}, opts...)
+	if err != nil {
+		return err
+	}
+
+	ctx := context.Background()
+	// this call is expected to fail
+	_, err = client.GetFlightInfo(ctx, &flight.FlightDescriptor{Type: flight.FlightDescriptor_CMD})
+	if err == nil {
+		return xerrors.New("expected call to fail")
+	}
+
+	if tm.received != "expected value" {
+		return xerrors.Errorf("expected to receive header 'x-middleware: expected value', but instead got %s", tm.received)
+	}
+
+	fmt.Fprintln(os.Stderr, "Headers received successfully on failing call.")
+	tm.received = ""
+	_, err = client.GetFlightInfo(ctx, &flight.FlightDescriptor{Type: flight.FlightDescriptor_CMD, Cmd: []byte("success")})
+	if err != nil {
+		return err
+	}
+
+	if tm.received != "expected value" {
+		return xerrors.Errorf("expected to receive header 'x-middleware: expected value', but instead got %s", tm.received)
+	}
+	fmt.Fprintln(os.Stderr, "Headers received successfully on passing call.")
+	return nil
+}
+
+func (m *middlewareScenarioTester) MakeServer(_ int) flight.Server {
+	srv := flight.NewServerWithMiddleware(nil, []flight.ServerMiddleware{
+		flight.CreateServerMiddleware(testServerMiddleware{})})
+	srv.RegisterFlightService(&flight.FlightServiceService{
+		GetFlightInfo: m.GetFlightInfo,
+	})
+	return srv
+}
+
+func (m *middlewareScenarioTester) GetFlightInfo(ctx context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) {
+	if desc.Type != flight.FlightDescriptor_CMD || string(desc.Cmd) != "success" {
+		return nil, status.Error(codes.Unknown, "unknown")
+	}
+
+	return &flight.FlightInfo{
+		Schema:           flight.SerializeSchema(arrow.NewSchema([]arrow.Field{}, nil), memory.DefaultAllocator),
+		FlightDescriptor: desc,
+		Endpoint: []*flight.FlightEndpoint{{
+			Ticket:   &flight.Ticket{Ticket: []byte("foo")},
+			Location: []*flight.Location{{Uri: "localhost:10010"}},
+		}},
+		TotalRecords: -1,
+		TotalBytes:   -1,
+	}, nil
+}

From 91f261fa9a7841fd914c5ed1d8e747fb4e510a5b Mon Sep 17 00:00:00 2001
From: Matthijs Brobbel <m1brobbel@gmail.com>
Date: Sun, 11 Jul 2021 22:28:03 +0200
Subject: [PATCH 537/719] MINOR: [C++] Fix a typo (#10694)

---
 cpp/src/arrow/array/validate.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 6ac885f8443..5cc3bacf282 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -555,7 +555,7 @@ struct ValidateArrayFullImpl {
       const ArrayData& field = *data.child_data[i];
       const Status field_valid = ValidateArrayFull(field);
       if (!field_valid.ok()) {
-        return Status::Invalid("Struct child array #", i,
+        return Status::Invalid("Union child array #", i,
                                " invalid: ", field_valid.ToString());
       }
     }

From 38b34045d1e3bce4c7517ad6718b4ecf03e88fc7 Mon Sep 17 00:00:00 2001
From: Romain Francois <romain@rstudio.com>
Date: Mon, 12 Jul 2021 09:28:04 -0500
Subject: [PATCH 538/719] ARROW-13113: [R] use RTasks to manage parallel in
 converting arrow to R

Closes #10593 from romainfrancois/ARROW-13113_RTasks

Authored-by: Romain Francois <romain@rstudio.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/arrowExports.R             |   4 +-
 r/R/chunked-array.R            |  12 +-
 r/src/RTasks.cpp               |  74 ++++++
 r/src/altrep.cpp               |   3 +-
 r/src/array_to_vector.cpp      | 422 +++++++++++++++------------------
 r/src/arrowExports.cpp         |  11 +-
 r/src/arrow_types.h            |   2 -
 r/src/r_task_group.h           |  51 ++++
 r/src/r_to_arrow.cpp           |  88 +------
 r/tests/testthat/test-altrep.R |  14 ++
 10 files changed, 341 insertions(+), 340 deletions(-)
 create mode 100644 r/src/RTasks.cpp
 create mode 100644 r/src/r_task_group.h

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 9c1e7dedad2..a5187b497ce 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -148,8 +148,8 @@ Array__as_vector <- function(array){
     .Call(`_arrow_Array__as_vector`, array)
 }
 
-ChunkedArray__as_vector <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array)
+ChunkedArray__as_vector <- function(chunked_array, use_threads){
+    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
 }
 
 RecordBatch__to_dataframe <- function(batch, use_threads){
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index c58e5ac94f9..9465147a8ce 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -61,18 +61,18 @@
 #' # Pass items into chunked_array as separate objects to create chunks
 #' class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
 #' class_scores$num_chunks
-#' 
+#'
 #' # When taking a Slice from a chunked_array, chunks are preserved
 #' class_scores$Slice(2, length = 5)
-#' 
-#' # You can combine Take and SortIndices to return a ChunkedArray with 1 chunk 
+#'
+#' # You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
 #' # containing all values, ordered.
 #' class_scores$Take(class_scores$SortIndices(descending = TRUE))
-#' 
+#'
 #' # If you pass a list into chunked_array, you get a list of length 1
 #' list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
 #' list_scores$num_chunks
-#' 
+#'
 #' # When constructing a ChunkedArray, the first chunk is used to infer type.
 #' doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
 #' doubles$type
@@ -82,7 +82,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
     length = function() ChunkedArray__length(self),
     type_id = function() ChunkedArray__type(self)$id,
     chunk = function(i) Array$create(ChunkedArray__chunk(self, i)),
-    as_vector = function() ChunkedArray__as_vector(self),
+    as_vector = function() ChunkedArray__as_vector(self, option_use_threads()),
     Slice = function(offset, length = NULL) {
       if (is.null(length)) {
         ChunkedArray__Slice1(self, offset)
diff --git a/r/src/RTasks.cpp b/r/src/RTasks.cpp
new file mode 100644
index 00000000000..25bd944cc62
--- /dev/null
+++ b/r/src/RTasks.cpp
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./r_task_group.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+namespace arrow {
+namespace r {
+
+RTasks::RTasks(bool use_threads)
+    : use_threads_(use_threads),
+      stop_source_(),
+      parallel_tasks_(use_threads
+                          ? arrow::internal::TaskGroup::MakeThreaded(
+                                arrow::internal::GetCpuThreadPool(), stop_source_.token())
+                          : nullptr) {}
+
+Status RTasks::Finish() {
+  Status status = Status::OK();
+
+  // run the delayed tasks now
+  for (auto& task : delayed_serial_tasks_) {
+    status &= std::move(task)();
+    if (!status.ok()) {
+      stop_source_.RequestStop();
+      break;
+    }
+  }
+
+  // then wait for the parallel tasks to finish
+  if (use_threads_) {
+    status &= parallel_tasks_->Finish();
+  }
+
+  return status;
+}
+
+void RTasks::Append(bool parallel, RTasks::Task&& task) {
+  if (parallel && use_threads_) {
+    parallel_tasks_->Append(std::move(task));
+  } else {
+    delayed_serial_tasks_.push_back(std::move(task));
+  }
+}
+
+void RTasks::Reset() {
+  delayed_serial_tasks_.clear();
+
+  stop_source_.Reset();
+  if (use_threads_) {
+    parallel_tasks_ = arrow::internal::TaskGroup::MakeThreaded(
+        arrow::internal::GetCpuThreadPool(), stop_source_.token());
+  }
+}
+
+}  // namespace r
+}  // namespace arrow
+
+#endif
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index f5f499ab3f6..c5d309b66a6 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -15,12 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cpp11/altrep.hpp>
-
 #include "./arrow_types.h"
 
 #if defined(ARROW_R_WITH_ARROW)
 
+#include <cpp11/altrep.hpp>
 #if defined(HAS_ALTREP)
 
 #include <R_ext/Altrep.h>
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index a8f7191bf18..d5a5425966f 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -1,7 +1,7 @@
 // Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
+// or more contributor license agreements.  See the NOTICE file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
@@ -25,12 +25,12 @@
 #include <arrow/util/bitmap_reader.h>
 #include <arrow/util/bitmap_writer.h>
 #include <arrow/util/int_util.h>
-#include <arrow/util/parallel.h>
-#include <arrow/util/task_group.h>
 
 #include <cpp11/altrep.hpp>
 #include <type_traits>
 
+#include "./r_task_group.h"
+
 namespace arrow {
 
 using internal::checked_cast;
@@ -40,7 +40,8 @@ namespace r {
 
 class Converter {
  public:
-  explicit Converter(ArrayVector arrays) : arrays_(std::move(arrays)) {}
+  explicit Converter(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : chunked_array_(std::move(chunked_array)) {}
 
   virtual ~Converter() {}
 
@@ -59,55 +60,79 @@ class Converter {
                                    R_xlen_t start, R_xlen_t n,
                                    size_t chunk_index) const = 0;
 
-  // ingest one array
-  Status IngestOne(SEXP data, const std::shared_ptr<arrow::Array>& array, R_xlen_t start,
-                   R_xlen_t n, size_t chunk_index) const {
-    if (array->null_count() == n) {
-      return Ingest_all_nulls(data, start, n);
-    } else {
-      return Ingest_some_nulls(data, array, start, n, chunk_index);
-    }
-  }
-
   // can this run in parallel ?
   virtual bool Parallel() const { return true; }
 
-  // Ingest all the arrays serially
-  Status IngestSerial(SEXP data) {
-    R_xlen_t k = 0, i = 0;
-    for (const auto& array : arrays_) {
-      auto n_chunk = array->length();
-      RETURN_NOT_OK(IngestOne(data, array, k, n_chunk, i));
-      k += n_chunk;
-      i++;
+  // converter is passed as self to outlive the scope of Converter::Convert()
+  SEXP ScheduleConvertTasks(RTasks& tasks, std::shared_ptr<Converter> self) {
+#if defined(HAS_ALTREP)
+    // special case when there is only one array
+    if (chunked_array_->num_chunks() == 1) {
+      const auto& array = chunked_array_->chunk(0);
+      if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 &&
+          array->null_count() == 0) {
+        switch (array->type()->id()) {
+          case arrow::Type::DOUBLE:
+            return arrow::r::MakeDoubleArrayNoNull(array);
+          case arrow::Type::INT32:
+            return arrow::r::MakeInt32ArrayNoNull(array);
+          default:
+            break;
+        }
+      }
     }
-    return Status::OK();
-  }
+#endif
 
-  // ingest the arrays in parallel
-  //
-  // for each array, add a task to the task group
-  //
-  // The task group is Finish() in the caller
-  // The converter itself is passed as `self` so that if one of the parallel ops
-  // hits `stop()`, we don't bail before `tg` is destroyed, which would cause a crash
-  void IngestParallel(SEXP data, const std::shared_ptr<arrow::internal::TaskGroup>& tg,
-                      std::shared_ptr<Converter> self) {
+    // allocating the R vector upfront
+    SEXP out = PROTECT(Allocate(chunked_array_->length()));
+
+    // for each array, fill the relevant slice of `out`, potentially in parallel
     R_xlen_t k = 0, i = 0;
-    for (const auto& array : arrays_) {
+    for (const auto& array : chunked_array_->chunks()) {
       auto n_chunk = array->length();
-      tg->Append([=] { return self->IngestOne(data, array, k, n_chunk, i); });
+
+      tasks.Append(Parallel(), [=] {
+        if (array->null_count() == n_chunk) {
+          return self->Ingest_all_nulls(out, k, n_chunk);
+        } else {
+          return self->Ingest_some_nulls(out, array, k, n_chunk, i);
+        }
+      });
+
       k += n_chunk;
       i++;
     }
+
+    UNPROTECT(1);
+    return out;
   }
 
   // Converter factory
-  static std::shared_ptr<Converter> Make(const std::shared_ptr<DataType>& type,
-                                         ArrayVector arrays);
+  static std::shared_ptr<Converter> Make(
+      const std::shared_ptr<ChunkedArray>& chunked_array);
+
+  static SEXP LazyConvert(const std::shared_ptr<ChunkedArray>& chunked_array,
+                          RTasks& tasks) {
+    auto converter = Make(chunked_array);
+    return converter->ScheduleConvertTasks(tasks, converter);
+  }
+
+  static SEXP Convert(const std::shared_ptr<ChunkedArray>& chunked_array,
+                      bool use_threads) {
+    RTasks tasks(use_threads);
+    SEXP out = PROTECT(Converter::LazyConvert(chunked_array, tasks));
+    StopIfNotOk(tasks.Finish());
+
+    UNPROTECT(1);
+    return out;
+  }
+
+  static SEXP Convert(const std::shared_ptr<Array>& array) {
+    return Convert(std::make_shared<ChunkedArray>(array), false);
+  }
 
  protected:
-  ArrayVector arrays_;
+  std::shared_ptr<ChunkedArray> chunked_array_;
 };
 
 template <typename SetNonNull, typename SetNull>
@@ -141,40 +166,13 @@ Status IngestSome(const std::shared_ptr<arrow::Array>& array, R_xlen_t n,
   return IngestSome(array, n, std::forward<SetNonNull>(set_non_null), nothing);
 }
 
-// Allocate + Ingest
-SEXP ArrayVector__as_vector(R_xlen_t n, const std::shared_ptr<DataType>& type,
-                            const ArrayVector& arrays) {
-#if defined(HAS_ALTREP)
-  // special case when there is only one array
-  if (arrays.size() == 1) {
-    const auto& array = arrays[0];
-    if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 &&
-        array->null_count() == 0) {
-      switch (type->id()) {
-        case arrow::Type::DOUBLE:
-          return arrow::r::MakeDoubleArrayNoNull(array);
-        case arrow::Type::INT32:
-          return arrow::r::MakeInt32ArrayNoNull(array);
-        default:
-          break;
-      }
-    }
-  }
-#endif
-
-  auto converter = Converter::Make(type, arrays);
-  SEXP data = PROTECT(converter->Allocate(n));
-  StopIfNotOk(converter->IngestSerial(data));
-  UNPROTECT(1);
-  return data;
-}
-
 template <typename Type>
 class Converter_Int : public Converter {
   using value_type = typename TypeTraits<Type>::ArrayType::value_type;
 
  public:
-  explicit Converter_Int(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Int(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(INTSXP, n); }
 
@@ -208,7 +206,8 @@ class Converter_Double : public Converter {
   using value_type = typename TypeTraits<Type>::ArrayType::value_type;
 
  public:
-  explicit Converter_Double(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Double(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(REALSXP, n); }
 
@@ -239,7 +238,8 @@ class Converter_Double : public Converter {
 
 class Converter_Date32 : public Converter {
  public:
-  explicit Converter_Date32(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Date32(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP data = PROTECT(Rf_allocVector(REALSXP, n));
@@ -276,7 +276,8 @@ class Converter_Date32 : public Converter {
 template <typename StringArrayType>
 struct Converter_String : public Converter {
  public:
-  explicit Converter_String(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_String(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(STRSXP, n); }
 
@@ -414,7 +415,8 @@ struct Converter_String : public Converter {
 
 class Converter_Boolean : public Converter {
  public:
-  explicit Converter_Boolean(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Boolean(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(LGLSXP, n); }
 
@@ -452,7 +454,8 @@ template <typename ArrayType>
 class Converter_Binary : public Converter {
  public:
   using offset_type = typename ArrayType::offset_type;
-  explicit Converter_Binary(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Binary(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP res = PROTECT(Rf_allocVector(VECSXP, n));
@@ -496,8 +499,9 @@ class Converter_Binary : public Converter {
 
 class Converter_FixedSizeBinary : public Converter {
  public:
-  explicit Converter_FixedSizeBinary(const ArrayVector& arrays, int byte_width)
-      : Converter(arrays), byte_width_(byte_width) {}
+  explicit Converter_FixedSizeBinary(const std::shared_ptr<ChunkedArray>& chunked_array,
+                                     int byte_width)
+      : Converter(chunked_array), byte_width_(byte_width) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP res = PROTECT(Rf_allocVector(VECSXP, n));
@@ -546,25 +550,27 @@ class Converter_Dictionary : public Converter {
   std::shared_ptr<Array> dictionary_;
 
  public:
-  explicit Converter_Dictionary(const ArrayVector& arrays)
-      : Converter(arrays), need_unification_(NeedUnification()) {
+  explicit Converter_Dictionary(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array), need_unification_(NeedUnification()) {
     if (need_unification_) {
-      const auto& arr_first = checked_cast<const DictionaryArray&>(*arrays[0]);
+      const auto& arr_first =
+          checked_cast<const DictionaryArray&>(*chunked_array->chunk(0));
       const auto& arr_type = checked_cast<const DictionaryType&>(*arr_first.type());
       unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
 
-      size_t n_arrays = arrays.size();
+      size_t n_arrays = chunked_array->num_chunks();
       arrays_transpose_.resize(n_arrays);
 
       for (size_t i = 0; i < n_arrays; i++) {
         const auto& dict_i =
-            *checked_cast<const DictionaryArray&>(*arrays[i]).dictionary();
+            *checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
         StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
       }
 
       StopIfNotOk(unifier_->GetResult(&out_type_, &dictionary_));
     } else {
-      const auto& dict_array = checked_cast<const DictionaryArray&>(*arrays_[0]);
+      const auto& dict_array =
+          checked_cast<const DictionaryArray&>(*chunked_array->chunk(0));
 
       auto indices = dict_array.indices();
       switch (indices->type_id()) {
@@ -666,13 +672,14 @@ class Converter_Dictionary : public Converter {
   }
 
   bool NeedUnification() {
-    int n = arrays_.size();
+    int n = chunked_array_->num_chunks();
     if (n < 2) {
       return false;
     }
-    const auto& arr_first = checked_cast<const DictionaryArray&>(*arrays_[0]);
+    const auto& arr_first =
+        checked_cast<const DictionaryArray&>(*chunked_array_->chunk(0));
     for (int i = 1; i < n; i++) {
-      const auto& arr = checked_cast<const DictionaryArray&>(*arrays_[i]);
+      const auto& arr = checked_cast<const DictionaryArray&>(*chunked_array_->chunk(i));
       if (!(arr_first.dictionary()->Equals(arr.dictionary()))) {
         return true;
       }
@@ -681,7 +688,9 @@ class Converter_Dictionary : public Converter {
   }
 
   bool GetOrdered() const {
-    return checked_cast<const DictionaryArray&>(*arrays_[0]).dict_type()->ordered();
+    return checked_cast<const DictionaryArray&>(*chunked_array_->chunk(0))
+        .dict_type()
+        ->ordered();
   }
 
   SEXP GetLevels() const {
@@ -693,8 +702,7 @@ class Converter_Dictionary : public Converter {
       cpp11::warning("Coercing dictionary values to R character factor levels");
     }
 
-    SEXP vec = PROTECT(ArrayVector__as_vector(dictionary_->length(), dictionary_->type(),
-                                              {dictionary_}));
+    SEXP vec = PROTECT(Converter::Convert(dictionary_));
     SEXP strings_vec = PROTECT(Rf_coerceVector(vec, STRSXP));
     UNPROTECT(2);
     return strings_vec;
@@ -703,18 +711,21 @@ class Converter_Dictionary : public Converter {
 
 class Converter_Struct : public Converter {
  public:
-  explicit Converter_Struct(const ArrayVector& arrays) : Converter(arrays), converters() {
-    auto first_array = checked_cast<const arrow::StructArray*>(this->arrays_[0].get());
+  explicit Converter_Struct(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array), converters() {
+    auto first_array =
+        checked_cast<const arrow::StructArray*>(this->chunked_array_->chunk(0).get());
     int nf = first_array->num_fields();
     for (int i = 0; i < nf; i++) {
       converters.push_back(
-          Converter::Make(first_array->field(i)->type(), {first_array->field(i)}));
+          Converter::Make(std::make_shared<ChunkedArray>(first_array->field(i))));
     }
   }
 
   SEXP Allocate(R_xlen_t n) const {
     // allocate a data frame column to host each array
-    auto first_array = checked_cast<const arrow::StructArray*>(this->arrays_[0].get());
+    auto first_array =
+        checked_cast<const arrow::StructArray*>(this->chunked_array_->chunk(0).get());
     auto type = first_array->struct_type();
     auto out =
         arrow::r::to_r_list(converters, [n](const std::shared_ptr<Converter>& converter) {
@@ -769,7 +780,8 @@ double ms_to_seconds(int64_t ms) { return static_cast<double>(ms) / 1000; }
 
 class Converter_Date64 : public Converter {
  public:
-  explicit Converter_Date64(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Date64(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
@@ -801,7 +813,8 @@ class Converter_Date64 : public Converter {
 template <typename value_type, typename unit_type = TimeType>
 class Converter_Time : public Converter {
  public:
-  explicit Converter_Time(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Time(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
@@ -855,13 +868,14 @@ class Converter_Time : public Converter {
 template <typename value_type>
 class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
  public:
-  explicit Converter_Timestamp(const ArrayVector& arrays)
-      : Converter_Time<value_type, TimestampType>(arrays) {}
+  explicit Converter_Timestamp(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter_Time<value_type, TimestampType>(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
     Rf_classgets(data, arrow::r::data::classes_POSIXct);
-    auto array = checked_cast<const TimestampArray*>(this->arrays_[0].get());
+    auto array =
+        checked_cast<const TimestampArray*>(this->chunked_array_->chunk(0).get());
     auto array_type = checked_cast<const TimestampType*>(array->type().get());
     std::string tzone = array_type->timezone();
     if (tzone.size() > 0) {
@@ -873,7 +887,8 @@ class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
 
 class Converter_Decimal : public Converter {
  public:
-  explicit Converter_Decimal(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Decimal(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const { return Rf_allocVector(REALSXP, n); }
 
@@ -906,9 +921,9 @@ class Converter_List : public Converter {
   std::shared_ptr<arrow::DataType> value_type_;
 
  public:
-  explicit Converter_List(const ArrayVector& arrays,
+  explicit Converter_List(const std::shared_ptr<ChunkedArray>& chunked_array,
                           const std::shared_ptr<arrow::DataType>& value_type)
-      : Converter(arrays), value_type_(value_type) {}
+      : Converter(chunked_array), value_type_(value_type) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::list res(n);
@@ -924,7 +939,7 @@ class Converter_List : public Converter {
     StopIfNotOk(builder->Finish(&array));
 
     // convert to an R object to store as the list' ptype
-    res.attr(arrow::r::symbols::ptype) = Array__as_vector(array);
+    res.attr(arrow::r::symbols::ptype) = Converter::Convert(array);
 
     return res;
   }
@@ -941,7 +956,7 @@ class Converter_List : public Converter {
 
     auto ingest_one = [&](R_xlen_t i) {
       auto slice = list_array->value_slice(i);
-      SET_VECTOR_ELT(data, i + start, Array__as_vector(slice));
+      SET_VECTOR_ELT(data, i + start, Converter::Convert(slice));
       return Status::OK();
     };
 
@@ -957,10 +972,10 @@ class Converter_FixedSizeList : public Converter {
   int list_size_;
 
  public:
-  explicit Converter_FixedSizeList(const ArrayVector& arrays,
+  explicit Converter_FixedSizeList(const std::shared_ptr<ChunkedArray>& chunked_array,
                                    const std::shared_ptr<arrow::DataType>& value_type,
                                    int list_size)
-      : Converter(arrays), value_type_(value_type), list_size_(list_size) {}
+      : Converter(chunked_array), value_type_(value_type), list_size_(list_size) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::list res(n);
@@ -975,7 +990,7 @@ class Converter_FixedSizeList : public Converter {
     StopIfNotOk(builder->Finish(&array));
 
     // convert to an R object to store as the list' ptype
-    res.attr(arrow::r::symbols::ptype) = Array__as_vector(array);
+    res.attr(arrow::r::symbols::ptype) = Converter::Convert(array);
 
     return res;
   }
@@ -992,7 +1007,7 @@ class Converter_FixedSizeList : public Converter {
 
     auto ingest_one = [&](R_xlen_t i) {
       auto slice = fixed_size_list_array.value_slice(i);
-      SET_VECTOR_ELT(data, i + start, Array__as_vector(slice));
+      SET_VECTOR_ELT(data, i + start, Converter::Convert(slice));
       return Status::OK();
     };
     return IngestSome(array, n, ingest_one);
@@ -1003,7 +1018,8 @@ class Converter_FixedSizeList : public Converter {
 
 class Converter_Int64 : public Converter {
  public:
-  explicit Converter_Int64(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Int64(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     cpp11::writable::doubles data(n);
@@ -1042,7 +1058,8 @@ class Converter_Int64 : public Converter {
 
 class Converter_Null : public Converter {
  public:
-  explicit Converter_Null(const ArrayVector& arrays) : Converter(arrays) {}
+  explicit Converter_Null(const std::shared_ptr<ChunkedArray>& chunked_array)
+      : Converter(chunked_array) {}
 
   SEXP Allocate(R_xlen_t n) const {
     SEXP data = PROTECT(Rf_allocVector(LGLSXP, n));
@@ -1084,147 +1101,135 @@ bool GetBoolOption(const std::string& name, bool default_) {
   }
 }
 
-std::shared_ptr<Converter> Converter::Make(const std::shared_ptr<DataType>& type,
-                                           ArrayVector arrays) {
-  if (arrays.empty()) {
-    // slight hack for the 0-row case since the converters expect at least one
-    // chunk to process.
-    arrays.push_back(ValueOrStop(arrow::MakeArrayOfNull(type, 0)));
-  }
-
+std::shared_ptr<Converter> Converter::Make(
+    const std::shared_ptr<ChunkedArray>& chunked_array) {
+  const auto& type = chunked_array->type();
   switch (type->id()) {
     // direct support
     case Type::INT32:
-      return std::make_shared<arrow::r::Converter_Int<arrow::Int32Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::Int32Type>>(chunked_array);
 
     case Type::DOUBLE:
       return std::make_shared<arrow::r::Converter_Double<arrow::DoubleType>>(
-          std::move(arrays));
+          chunked_array);
 
       // need to handle 1-bit case
     case Type::BOOL:
-      return std::make_shared<arrow::r::Converter_Boolean>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Boolean>(chunked_array);
 
     case Type::BINARY:
       return std::make_shared<arrow::r::Converter_Binary<arrow::BinaryArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::LARGE_BINARY:
       return std::make_shared<arrow::r::Converter_Binary<arrow::LargeBinaryArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::FIXED_SIZE_BINARY:
       return std::make_shared<arrow::r::Converter_FixedSizeBinary>(
-          std::move(arrays),
-          checked_cast<const FixedSizeBinaryType&>(*type).byte_width());
+          chunked_array, checked_cast<const FixedSizeBinaryType&>(*type).byte_width());
 
       // handle memory dense strings
     case Type::STRING:
       return std::make_shared<arrow::r::Converter_String<arrow::StringArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::LARGE_STRING:
       return std::make_shared<arrow::r::Converter_String<arrow::LargeStringArray>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::DICTIONARY:
-      return std::make_shared<arrow::r::Converter_Dictionary>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Dictionary>(chunked_array);
 
     case Type::DATE32:
-      return std::make_shared<arrow::r::Converter_Date32>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Date32>(chunked_array);
 
     case Type::DATE64:
-      return std::make_shared<arrow::r::Converter_Date64>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Date64>(chunked_array);
 
       // promotions to integer vector
     case Type::INT8:
-      return std::make_shared<arrow::r::Converter_Int<arrow::Int8Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::Int8Type>>(chunked_array);
 
     case Type::UINT8:
-      return std::make_shared<arrow::r::Converter_Int<arrow::UInt8Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::UInt8Type>>(chunked_array);
 
     case Type::INT16:
-      return std::make_shared<arrow::r::Converter_Int<arrow::Int16Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::Int16Type>>(chunked_array);
 
     case Type::UINT16:
-      return std::make_shared<arrow::r::Converter_Int<arrow::UInt16Type>>(
-          std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Int<arrow::UInt16Type>>(chunked_array);
 
       // promotions to numeric vector, if they don't fit into int32
     case Type::UINT32:
-      if (ArraysCanFitInteger(arrays)) {
+      if (ArraysCanFitInteger(chunked_array->chunks())) {
         return std::make_shared<arrow::r::Converter_Int<arrow::UInt32Type>>(
-            std::move(arrays));
+            chunked_array);
       } else {
         return std::make_shared<arrow::r::Converter_Double<arrow::UInt32Type>>(
-            std::move(arrays));
+            chunked_array);
       }
 
     case Type::UINT64:
-      if (ArraysCanFitInteger(arrays)) {
+      if (ArraysCanFitInteger(chunked_array->chunks())) {
         return std::make_shared<arrow::r::Converter_Int<arrow::UInt64Type>>(
-            std::move(arrays));
+            chunked_array);
       } else {
         return std::make_shared<arrow::r::Converter_Double<arrow::UInt64Type>>(
-            std::move(arrays));
+            chunked_array);
       }
 
     case Type::HALF_FLOAT:
       return std::make_shared<arrow::r::Converter_Double<arrow::HalfFloatType>>(
-          std::move(arrays));
+          chunked_array);
 
     case Type::FLOAT:
       return std::make_shared<arrow::r::Converter_Double<arrow::FloatType>>(
-          std::move(arrays));
+          chunked_array);
 
       // time32 and time64
     case Type::TIME32:
-      return std::make_shared<arrow::r::Converter_Time<int32_t>>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Time<int32_t>>(chunked_array);
 
     case Type::TIME64:
-      return std::make_shared<arrow::r::Converter_Time<int64_t>>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Time<int64_t>>(chunked_array);
 
     case Type::TIMESTAMP:
-      return std::make_shared<arrow::r::Converter_Timestamp<int64_t>>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Timestamp<int64_t>>(chunked_array);
 
     case Type::INT64:
       // Prefer integer if it fits, unless option arrow.int64_downcast is `false`
-      if (GetBoolOption("arrow.int64_downcast", true) && ArraysCanFitInteger(arrays)) {
-        return std::make_shared<arrow::r::Converter_Int<arrow::Int64Type>>(
-            std::move(arrays));
+      if (GetBoolOption("arrow.int64_downcast", true) &&
+          ArraysCanFitInteger(chunked_array->chunks())) {
+        return std::make_shared<arrow::r::Converter_Int<arrow::Int64Type>>(chunked_array);
       } else {
-        return std::make_shared<arrow::r::Converter_Int64>(std::move(arrays));
+        return std::make_shared<arrow::r::Converter_Int64>(chunked_array);
       }
 
     case Type::DECIMAL:
-      return std::make_shared<arrow::r::Converter_Decimal>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Decimal>(chunked_array);
 
       // nested
     case Type::STRUCT:
-      return std::make_shared<arrow::r::Converter_Struct>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Struct>(chunked_array);
 
     case Type::LIST:
       return std::make_shared<arrow::r::Converter_List<arrow::ListArray>>(
-          std::move(arrays),
-          checked_cast<const arrow::ListType*>(type.get())->value_type());
+          chunked_array, checked_cast<const arrow::ListType*>(type.get())->value_type());
 
     case Type::LARGE_LIST:
       return std::make_shared<arrow::r::Converter_List<arrow::LargeListArray>>(
-          std::move(arrays),
+          chunked_array,
           checked_cast<const arrow::LargeListType*>(type.get())->value_type());
 
     case Type::FIXED_SIZE_LIST:
       return std::make_shared<arrow::r::Converter_FixedSizeList>(
-          std::move(arrays),
+          chunked_array,
           checked_cast<const arrow::FixedSizeListType&>(*type).value_type(),
           checked_cast<const arrow::FixedSizeListType&>(*type).list_size());
 
     case Type::NA:
-      return std::make_shared<arrow::r::Converter_Null>(std::move(arrays));
+      return std::make_shared<arrow::r::Converter_Null>(chunked_array);
 
     default:
       break;
@@ -1233,54 +1238,32 @@ std::shared_ptr<Converter> Converter::Make(const std::shared_ptr<DataType>& type
   cpp11::stop("cannot handle Array of type ", type->name().c_str());
 }
 
-cpp11::writable::list to_dataframe_serial(
-    int64_t nr, int64_t nc, const cpp11::writable::strings& names,
-    const std::vector<std::shared_ptr<Converter>>& converters) {
-  cpp11::writable::list tbl(nc);
-  for (int i = 0; i < nc; i++) {
-    SEXP column = tbl[i] = converters[i]->Allocate(nr);
-    StopIfNotOk(converters[i]->IngestSerial(column));
-  }
-  tbl.attr(R_NamesSymbol) = names;
-  tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
-  tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
-  return tbl;
+std::shared_ptr<ChunkedArray> to_chunks(const std::shared_ptr<Array>& array) {
+  return std::make_shared<ChunkedArray>(array);
 }
 
-cpp11::writable::list to_dataframe_parallel(
-    int64_t nr, int64_t nc, const cpp11::writable::strings& names,
-    const std::vector<std::shared_ptr<Converter>>& converters) {
-  cpp11::writable::list tbl(nc);
+std::shared_ptr<ChunkedArray> to_chunks(
+    const std::shared_ptr<ChunkedArray>& chunked_array) {
+  return chunked_array;
+}
 
-  // task group to ingest data in parallel
-  auto tg = arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool());
+template <typename Rectangle>
+cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,
+                                    bool use_threads) {
+  int64_t nc = data->num_columns();
+  int64_t nr = data->num_rows();
+  cpp11::writable::strings names(nc);
 
-  // allocate and start ingesting immediately the columns that
-  // can be ingested in parallel, i.e. when ingestion no longer
-  // need to happen on the main thread
-  for (int i = 0; i < nc; i++) {
-    // allocate data for column i
-    SEXP column = tbl[i] = converters[i]->Allocate(nr);
+  arrow::r::RTasks tasks(use_threads);
 
-    // add a task to ingest data of that column if that can be done in parallel
-    if (converters[i]->Parallel()) {
-      converters[i]->IngestParallel(column, tg, converters[i]);
-    }
-  }
-
-  arrow::Status status = arrow::Status::OK();
+  cpp11::writable::list tbl(nc);
 
-  // ingest the columns that cannot be dealt with in parallel
   for (int i = 0; i < nc; i++) {
-    if (!converters[i]->Parallel()) {
-      status &= converters[i]->IngestSerial(tbl[i]);
-    }
+    names[i] = data->schema()->field(i)->name();
+    tbl[i] = Converter::LazyConvert(to_chunks(data->column(i)), tasks);
   }
 
-  // wait for the ingestion to be finished
-  status &= tg->Finish();
-
-  StopIfNotOk(status);
+  StopIfNotOk(tasks.Finish());
 
   tbl.attr(R_NamesSymbol) = names;
   tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
@@ -1294,60 +1277,25 @@ cpp11::writable::list to_dataframe_parallel(
 
 // [[arrow::export]]
 SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
-  return arrow::r::ArrayVector__as_vector(array->length(), array->type(), {array});
+  return arrow::r::Converter::Convert(array);
 }
 
 // [[arrow::export]]
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
-  if (chunked_array->num_chunks() == 1) {
-    return Array__as_vector(chunked_array->chunk(0));
-  }
-
-  return arrow::r::ArrayVector__as_vector(chunked_array->length(), chunked_array->type(),
-                                          chunked_array->chunks());
+SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array,
+                             bool use_threads = false) {
+  return arrow::r::Converter::Convert(chunked_array, use_threads);
 }
 
 // [[arrow::export]]
 cpp11::writable::list RecordBatch__to_dataframe(
     const std::shared_ptr<arrow::RecordBatch>& batch, bool use_threads) {
-  int64_t nc = batch->num_columns();
-  int64_t nr = batch->num_rows();
-  cpp11::writable::strings names(nc);
-  std::vector<arrow::ArrayVector> arrays(nc);
-  std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
-
-  for (R_xlen_t i = 0; i < nc; i++) {
-    names[i] = batch->column_name(i);
-    arrays[i] = {batch->column(i)};
-    converters[i] = arrow::r::Converter::Make(batch->column(i)->type(), arrays[i]);
-  }
-
-  if (use_threads) {
-    return arrow::r::to_dataframe_parallel(nr, nc, names, converters);
-  } else {
-    return arrow::r::to_dataframe_serial(nr, nc, names, converters);
-  }
+  return arrow::r::to_data_frame(batch, use_threads);
 }
 
 // [[arrow::export]]
 cpp11::writable::list Table__to_dataframe(const std::shared_ptr<arrow::Table>& table,
                                           bool use_threads) {
-  int64_t nc = table->num_columns();
-  int64_t nr = table->num_rows();
-  cpp11::writable::strings names(nc);
-  std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
-
-  for (R_xlen_t i = 0; i < nc; i++) {
-    converters[i] =
-        arrow::r::Converter::Make(table->column(i)->type(), table->column(i)->chunks());
-    names[i] = table->field(i)->name();
-  }
-
-  if (use_threads) {
-    return arrow::r::to_dataframe_parallel(nr, nc, names, converters);
-  } else {
-    return arrow::r::to_dataframe_serial(nr, nc, names, converters);
-  }
+  return arrow::r::to_data_frame(table, use_threads);
 }
 
 #endif
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index b5bd751af02..19095a4cbde 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -583,15 +583,16 @@ extern "C" SEXP _arrow_Array__as_vector(SEXP array_sexp){
 
 // array_to_vector.cpp
 #if defined(ARROW_R_WITH_ARROW)
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
-extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){
+SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array, bool use_threads);
+extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp, SEXP use_threads_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type chunked_array(chunked_array_sexp);
-	return cpp11::as_sexp(ChunkedArray__as_vector(chunked_array));
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(ChunkedArray__as_vector(chunked_array, use_threads));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){
+extern "C" SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp, SEXP use_threads_sexp){
 	Rf_error("Cannot call ChunkedArray__as_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
@@ -6977,7 +6978,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, 
 		{ "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, 
 		{ "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, 
-		{ "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, 
+		{ "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 2}, 
 		{ "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, 
 		{ "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, 
 		{ "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 68e1c8659c4..b5a8914d432 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -58,8 +58,6 @@ namespace ds = ::arrow::dataset;
 namespace compute = ::arrow::compute;
 namespace fs = ::arrow::fs;
 
-SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
-SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
 std::shared_ptr<arrow::RecordBatch> RecordBatch__from_arrays(SEXP, SEXP);
 arrow::MemoryPool* gc_memory_pool();
 
diff --git a/r/src/r_task_group.h b/r/src/r_task_group.h
new file mode 100644
index 00000000000..e1c298b27fc
--- /dev/null
+++ b/r/src/r_task_group.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <arrow/util/parallel.h>
+#include <arrow/util/task_group.h>
+
+namespace arrow {
+namespace r {
+
+class RTasks {
+ public:
+  using Task = internal::FnOnce<Status()>;
+
+  explicit RTasks(bool use_threads);
+
+  // This Finish() method must never be called from a thread pool thread
+  // as this would deadlock.
+  //
+  // Usage is to :
+  // - create an RTasks instance on the main thread
+  // - add some tasks with .Append()
+  // - and then call .Finish() so that the parallel tasks are finished
+  Status Finish();
+  void Append(bool parallel, Task&& task);
+
+  void Reset();
+
+  bool use_threads_;
+  StopSource stop_source_;
+  std::shared_ptr<arrow::internal::TaskGroup> parallel_tasks_;
+  std::vector<Task> delayed_serial_tasks_;
+};
+
+}  // namespace r
+}  // namespace arrow
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index 683e8f278e8..b4c16211fb5 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -31,8 +31,8 @@
 #include <arrow/util/checked_cast.h>
 #include <arrow/util/converter.h>
 #include <arrow/util/logging.h>
-#include <arrow/util/parallel.h>
-#include <arrow/util/task_group.h>
+
+#include "./r_task_group.h"
 
 namespace arrow {
 
@@ -50,90 +50,6 @@ using internal::MakeConverter;
 
 namespace r {
 
-class RTasks {
- public:
-  using Task = internal::FnOnce<Status()>;
-
-  explicit RTasks(bool use_threads)
-      : use_threads_(use_threads),
-        stop_source_(),
-        parallel_tasks_(
-            use_threads ? arrow::internal::TaskGroup::MakeThreaded(
-                              arrow::internal::GetCpuThreadPool(), stop_source_.token())
-                        : nullptr) {}
-
-  // This Finish() method must never be called from a thread pool thread
-  // as this would deadlock.
-  //
-  // Usage is to :
-  // - create an RTasks instance on the main thread
-  // - add some tasks with .Append()
-  // - and then call .Finish() so that the parallel tasks are finished
-  Status Finish() {
-    Status status = Status::OK();
-
-    // run the delayed tasks now
-    for (auto& task : delayed_serial_tasks_) {
-      status &= std::move(task)();
-    }
-
-    // then wait for the parallel tasks to finish
-    if (use_threads_) {
-      status &= parallel_tasks_->Finish();
-    }
-
-    return status;
-  }
-
-  void Append(bool parallel, Task&& task) {
-    StoppingTask stopping_task(stop_source_, std::move(task));
-    if (parallel && use_threads_) {
-      parallel_tasks_->Append(std::move(stopping_task));
-    } else {
-      delayed_serial_tasks_.push_back(std::move(stopping_task));
-    }
-  }
-
-  void Reset() {
-    delayed_serial_tasks_.clear();
-
-    stop_source_.Reset();
-    if (use_threads_) {
-      parallel_tasks_ = arrow::internal::TaskGroup::MakeThreaded(
-          arrow::internal::GetCpuThreadPool(), stop_source_.token());
-    }
-  }
-
-  bool use_threads_;
-  StopSource stop_source_;
-  std::shared_ptr<arrow::internal::TaskGroup> parallel_tasks_;
-  std::vector<Task> delayed_serial_tasks_;
-
- private:
-  class StoppingTask {
-   public:
-    StoppingTask(StopSource stop_source, Task&& task) : task_(std::move(task)) {}
-
-    Status operator()() {
-      Status status;
-      StopToken token = stop_source_.token();
-      if (token.IsStopRequested()) {
-        status &= token.Poll();
-      } else {
-        Status status = std::move(task_)();
-        if (!status.ok()) {
-          stop_source_.RequestStop();
-        }
-      }
-      return status;
-    }
-
-   private:
-    StopSource stop_source_;
-    Task task_;
-  };
-};
-
 struct RConversionOptions {
   RConversionOptions() = default;
 
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index ec1c671b12e..42784b61442 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -94,3 +94,17 @@ test_that("empty vectors are not altrep", {
   expect_false(is_altrep_int_nonull(as.vector(v_int)))
   expect_false(is_altrep_dbl_nonull(as.vector(v_dbl)))
 })
+
+test_that("as.data.frame(<Table>, <RecordBatch>) can create altrep vectors", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+
+  table <- Table$create(int = c(1L, 2L, 3L), dbl = c(1, 2, 3))
+  df_table <- as.data.frame(table)
+  expect_true(is_altrep_int_nonull(df_table$int))
+  expect_true(is_altrep_dbl_nonull(df_table$dbl))
+
+  batch <- RecordBatch$create(int = c(1L, 2L, 3L), dbl = c(1, 2, 3))
+  df_batch <- as.data.frame(batch)
+  expect_true(is_altrep_int_nonull(df_batch$int))
+  expect_true(is_altrep_dbl_nonull(df_batch$dbl))
+})

From 9c6d4179fefdf995fd0b940a292b81947fe68035 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 12 Jul 2021 10:13:11 -0500
Subject: [PATCH 539/719] ARROW-13243: [R] altrep function call in R 3.5

This uses the workaround that @romainfrancois blogged about (https://purrple.cat/blog/2018/10/14/altrep-and-cpp/) on versions less than 3.6.0.

Closes #10702 from jonkeane/ARROW-13243-altrep-gate

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/src/altrep.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index c5d309b66a6..b07cbe70ed3 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -22,7 +22,27 @@
 #include <cpp11/altrep.hpp>
 #if defined(HAS_ALTREP)
 
+#if R_VERSION < R_Version(3, 6, 0)
+
+// workaround because R's <R_ext/Altrep.h> not so conveniently uses `class`
+// as a variable name, and C++ is not happy about that
+//
+// SEXP R_new_altrep(R_altrep_class_t class, SEXP data1, SEXP data2);
+//
+#define class klass
+
+// Because functions declared in <R_ext/Altrep.h> have C linkage
+extern "C" {
 #include <R_ext/Altrep.h>
+}
+
+// undo the workaround
+#undef class
+
+#else
+#include <R_ext/Altrep.h>
+#endif
+
 #include <arrow/array.h>
 
 namespace arrow {

From 975f4597992d0b863a87094a592fe23f94c9f378 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Mon, 12 Jul 2021 15:04:45 -0400
Subject: [PATCH 540/719] ARROW-13296: [C++] Provide a reflection compatible
 enum replacement

Provides an enum replacement with minimal reflection capabilities. These can be declared by inheriting from a helper with CRTP, including a static string literal data member containing the enum's values:

```c++
struct Color : EnumType<Color> {
  using EnumType::EnumType;
  static constexpr char* kValues = "red green blue";
};
```

Values of enumerations declared in this way can be constructed from their string representations at compile time, and can be converted to their string representation for easier debugging/logging/and less repetitive boilerplate in the bindings and elsewhere mapping to/from user provided string values.

For example:
```c++
int get_hex_value() {
    std::string input;
    std::cin >> input;
    switch (*Color(repr)) {
      case *Color("red"):
        return 0xff0000;
      case *Color("green"):
        return 0x00ff00;
      case *Color("blue"):
        return 0x0000ff;
      default:
        std::cout << "Don't know that one; input hex value\n";
        std::cin >> std::hex >> input;
        return std::stoi(input);
    }
}
```

Closes #10691 from bkietz/13296-Provide-reflection-compat

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/util/enum.h             | 162 ++++++++++++++++++++++++++
 cpp/src/arrow/util/reflection_test.cc |  66 +++++++++++
 2 files changed, 228 insertions(+)
 create mode 100644 cpp/src/arrow/util/enum.h

diff --git a/cpp/src/arrow/util/enum.h b/cpp/src/arrow/util/enum.h
new file mode 100644
index 00000000000..7a0404d5c76
--- /dev/null
+++ b/cpp/src/arrow/util/enum.h
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace internal {
+
+constexpr bool IsSpace(char c) { return c == ' ' || c == '\n' || c == '\r'; }
+
+constexpr char ToLower(char c) { return c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c; }
+
+constexpr bool CaseInsensitiveEquals(const char* l, const char* r,
+                                     size_t limit = util::string_view::npos) {
+  return limit == 0
+             ? true
+             : ToLower(l[0]) != ToLower(r[0])
+                   ? false
+                   : l[0] == '\0' ? true : CaseInsensitiveEquals(l + 1, r + 1, limit - 1);
+}
+
+constexpr bool CaseInsensitiveEquals(util::string_view l, util::string_view r) {
+  return l.size() == r.size() && CaseInsensitiveEquals(l.data(), r.data(), l.size());
+}
+
+constexpr const char* SkipWhitespace(const char* raw) {
+  return *raw == '\0' || !IsSpace(*raw) ? raw : SkipWhitespace(raw + 1);
+}
+
+constexpr const char* SkipNonWhitespace(const char* raw) {
+  return *raw == '\0' || IsSpace(*raw) ? raw : SkipNonWhitespace(raw + 1);
+}
+
+constexpr size_t TokenSize(const char* token_start) {
+  return SkipNonWhitespace(token_start) - token_start;
+}
+
+constexpr size_t NextTokenStart(const char* raw, size_t token_start) {
+  return SkipWhitespace(SkipNonWhitespace(raw + token_start)) - raw;
+}
+
+template <typename Raw, size_t... Offsets>
+struct EnumTypeImpl {
+  static constexpr int kSize = sizeof...(Offsets);
+
+  static constexpr util::string_view kValueStrs[sizeof...(Offsets)] = {
+      {Raw::kValues + Offsets, TokenSize(Raw::kValues + Offsets)}...};
+
+  static constexpr int GetIndex(util::string_view repr, int i = 0) {
+    return i == kSize
+               ? -1
+               : CaseInsensitiveEquals(kValueStrs[i], repr) ? i : GetIndex(repr, i + 1);
+  }
+};
+
+template <typename Raw, size_t... Offsets>
+constexpr util::string_view const
+    EnumTypeImpl<Raw, Offsets...>::kValueStrs[sizeof...(Offsets)];
+
+/// \cond false
+template <typename Raw, bool IsEnd = false,
+          size_t MaxOffset = SkipWhitespace(Raw::kValues) - Raw::kValues,
+          size_t... Offsets>
+struct EnumTypeBuilder
+    : EnumTypeBuilder<Raw, Raw::kValues[NextTokenStart(Raw::kValues, MaxOffset)] == '\0',
+                      NextTokenStart(Raw::kValues, MaxOffset), Offsets..., MaxOffset> {};
+
+template <typename Raw, size_t TerminalNullOffset, size_t... Offsets>
+struct EnumTypeBuilder<Raw, /*IsEnd=*/true, TerminalNullOffset, Offsets...> {
+  using ImplType = EnumTypeImpl<Raw, Offsets...>;
+};
+
+// reuse struct as an alias for typename EnumTypeBuilder<Raw>::ImplType
+template <typename Raw>
+struct EnumTypeImpl<Raw> : EnumTypeBuilder<Raw>::ImplType {};
+/// \endcond
+
+struct EnumTypeTag {};
+
+/// \brief An enum replacement with minimal reflection capabilities.
+///
+/// Declare an enum by inheriting from this helper with CRTP, including a
+/// static string literal data member containing the enum's values:
+///
+///     struct Color : EnumType<Color> {
+///       using EnumType::EnumType;
+///       static constexpr char* kValues = "red green blue";
+///     };
+///
+/// Ensure the doccomment includes a description of each enum value.
+///
+/// Values of enumerations declared in this way can be constructed from their string
+/// representations at compile time, and can be converted to their string representation
+/// for easier debugging/logging/...
+template <typename Raw>
+struct EnumType : EnumTypeTag {
+  constexpr EnumType() = default;
+
+  constexpr explicit EnumType(int index)
+      : index{index >= 0 && index < EnumTypeImpl<Raw>::kSize ? index : -1} {}
+
+  constexpr explicit EnumType(util::string_view repr)
+      : index{EnumTypeImpl<Raw>::GetIndex(repr)} {}
+
+  constexpr bool operator==(EnumType other) const { return index == other.index; }
+  constexpr bool operator!=(EnumType other) const { return index != other.index; }
+
+  /// Return the string representation of this enum value.
+  std::string ToString() const {
+    return EnumTypeImpl<Raw>::kValueStrs[index].to_string();
+  }
+
+  /// \brief Valid enum values will be truthy.
+  ///
+  /// Invalid enums are constructed with indices outside the range [0, size), with strings
+  /// not present in EnumType::value_strings(), or by default construction.
+  constexpr explicit operator bool() const { return index != -1; }
+
+  /// Convert this enum value to its integer index.
+  constexpr int operator*() const { return index; }
+
+  /// The number of values in this enumeration.
+  static constexpr int size() { return EnumTypeImpl<Raw>::kSize; }
+
+  /// String representations of each value in this enumeration.
+  static std::vector<util::string_view> value_strings() {
+    const util::string_view* begin = EnumTypeImpl<Raw>::kValueStrs;
+    return {begin, begin + size()};
+  }
+
+  int index = -1;
+
+  friend inline void PrintTo(const EnumType& e, std::ostream* os) {
+    PrintTo(e.ToString(), os);
+  }
+};
+
+template <typename T>
+using is_reflection_enum = std::is_base_of<EnumTypeTag, T>;
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_test.cc b/cpp/src/arrow/util/reflection_test.cc
index fb3d3b8fb02..02a40467a80 100644
--- a/cpp/src/arrow/util/reflection_test.cc
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -19,6 +19,7 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/util/enum.h"
 #include "arrow/util/reflection_internal.h"
 #include "arrow/util/string.h"
 
@@ -220,5 +221,70 @@ TEST(Reflection, EnumTraits) {
   static_assert(std::is_same<EnumTraits<PersonType>::Type, Int8Type>::value, "");
 }
 
+TEST(Reflection, CompileTimeStringOps) {
+  static_assert(CaseInsensitiveEquals("a", "a"), "");
+  static_assert(CaseInsensitiveEquals("Ab", "ab"), "");
+  static_assert(CaseInsensitiveEquals("Ab ", "ab", 2), "");
+  static_assert(CaseInsensitiveEquals(util::string_view{"Ab ", 2}, "ab"), "");
+
+  static_assert(CaseInsensitiveEquals(SkipWhitespace("  a"), "a"), "");
+  static_assert(CaseInsensitiveEquals(SkipWhitespace("a  b"), "a  b"), "");
+
+  static_assert(CaseInsensitiveEquals(SkipNonWhitespace("  a"), "  a"), "");
+  static_assert(CaseInsensitiveEquals(SkipNonWhitespace("a  b"), "  b"), "");
+
+  static_assert(TokenSize("aba ddf") == 3, "");
+
+  static_assert(NextTokenStart("aba ddf dfas", 4) == 8, "");
+}
+
+/// \brief Enumeration of primary colors.
+///
+/// - red:   Hex value 0xff0000
+/// - green: Hex value 0x00ff00
+/// - blue:  Hex value 0x0000ff
+struct Color : EnumType<Color> {
+  using EnumType<Color>::EnumType;
+  static constexpr const char* kValues = "red green blue";
+};
+
+TEST(Reflection, EnumType) {
+  static_assert(Color::size() == 3, "");
+  EXPECT_EQ(Color::value_strings(),
+            std::vector<util::string_view>({"red", "green", "blue"}));
+
+  static_assert(Color("red").index == 0, "");
+  static_assert(*Color("GREEN") == 1, "");
+  static_assert(Color("Blue") == Color(2), "");
+
+  EXPECT_EQ(Color("red").ToString(), "red");
+  EXPECT_EQ(Color("GREEN").ToString(), "green");
+  EXPECT_EQ(Color("Blue").ToString(), "blue");
+
+  static_assert(Color("GREEN") == Color("Green"), "");
+  static_assert(Color("GREEN") == Color(1), "");
+  static_assert(Color("GREEN") != Color(), "");
+
+  static_assert(!Color("chartreuse"), "");
+  static_assert(Color("violet") == Color(), "");
+  static_assert(Color(-1) == Color(), "");
+  static_assert(Color(-29) == Color(), "");
+  static_assert(Color(12334) == Color(), "");
+
+  for (util::string_view repr : {"Red", "orange", "BLUE"}) {
+    switch (*Color(repr)) {
+      case* Color("blue"):
+        EXPECT_EQ(repr, "BLUE");
+        break;
+      case* Color("red"):
+        EXPECT_EQ(repr, "Red");
+        break;
+      default:
+        EXPECT_EQ(repr, "orange");
+        break;
+    }
+  }
+}
+
 }  // namespace internal
 }  // namespace arrow

From ba009fbd97cb32aa0edd2025d983dbe11de83b9b Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Mon, 12 Jul 2021 12:06:43 -0700
Subject: [PATCH 541/719] ARROW-13277: [JS] Add declaration maps for TypeScript
 and refactor testing infrastructure

With declaration maps, developers can jump to the implementation rather than a declaration file.

Closes #10673 from domoritz/dom/declaration-maps

Lead-authored-by: Dominik Moritz <domoritz@gmail.com>
Co-authored-by: ptaylor <paul.e.taylor@me.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/.eslintignore                              |   4 +
 js/.npmrc                                     |   5 +
 js/.vscode/extensions.json                    |   6 +
 js/.vscode/launch.json                        | 418 ++++++++++--------
 js/DEVELOP.md                                 |   2 +-
 js/gulp/arrow-task.js                         |   4 +-
 js/gulp/package-task.js                       |   2 +
 js/gulp/test-task.js                          |  27 +-
 js/gulp/typescript-task.js                    |   8 +-
 js/gulp/util.js                               |  14 +-
 js/gulpfile.js                                |  11 +-
 js/jest.config.js                             |  66 +--
 js/jestconfigs/jest.apache-arrow.config.js    |  31 ++
 js/{ => jestconfigs}/jest.coverage.config.js  |  22 +-
 js/jestconfigs/jest.es2015.cjs.config.js      |  31 ++
 js/jestconfigs/jest.es2015.esm.config.js      |  33 ++
 js/jestconfigs/jest.es2015.umd.config.js      |  31 ++
 js/jestconfigs/jest.es5.cjs.config.js         |  31 ++
 js/jestconfigs/jest.es5.esm.config.js         |  33 ++
 js/jestconfigs/jest.es5.umd.config.js         |  31 ++
 js/jestconfigs/jest.esnext.cjs.config.js      |  31 ++
 js/jestconfigs/jest.esnext.esm.config.js      |  33 ++
 js/jestconfigs/jest.esnext.umd.config.js      |  31 ++
 js/jestconfigs/jest.src.config.js             |  28 ++
 js/jestconfigs/jest.ts.config.js              |  32 ++
 js/package.json                               |  19 +-
 js/src/Arrow.dom.ts                           |   1 +
 js/src/Arrow.ts                               |   2 +
 js/test/Arrow.ts                              |  46 +-
 js/test/generate-test-data.ts                 |   4 +-
 js/test/inference/column.ts                   |  10 +-
 js/test/inference/nested.ts                   |  10 +-
 js/test/inference/visitor/get.ts              |   2 +-
 js/test/tsconfig.json                         |  16 +-
 js/test/tsconfig/tsconfig.apache-arrow.json   |   8 +
 js/test/tsconfig/tsconfig.base.json           |  26 ++
 js/test/{ => tsconfig}/tsconfig.coverage.json |   2 +-
 js/test/tsconfig/tsconfig.es2015.cjs.json     |   8 +
 js/test/tsconfig/tsconfig.es2015.esm.json     |   8 +
 js/test/tsconfig/tsconfig.es2015.umd.json     |  11 +
 js/test/tsconfig/tsconfig.es5.cjs.json        |   9 +
 js/test/tsconfig/tsconfig.es5.esm.json        |   9 +
 js/test/tsconfig/tsconfig.es5.umd.json        |  12 +
 js/test/tsconfig/tsconfig.esnext.cjs.json     |   8 +
 js/test/tsconfig/tsconfig.esnext.esm.json     |   8 +
 js/test/tsconfig/tsconfig.esnext.umd.json     |  11 +
 js/test/tsconfig/tsconfig.src.json            |   8 +
 js/test/tsconfig/tsconfig.ts.json             |   8 +
 js/test/unit/bit-tests.ts                     |   2 +-
 js/test/unit/builders/builder-tests.ts        |   8 +-
 js/test/unit/builders/date-tests.ts           |   2 +-
 js/test/unit/builders/dictionary-tests.ts     |   2 +-
 js/test/unit/builders/int64-tests.ts          |   2 +-
 js/test/unit/builders/primitive-tests.ts      |   2 +-
 js/test/unit/builders/uint64-tests.ts         |   2 +-
 js/test/unit/builders/utf8-tests.ts           |   2 +-
 js/test/unit/builders/utils.ts                |   8 +-
 js/test/unit/dataframe-tests.ts               |  13 +-
 js/test/unit/generated-data-validators.ts     |   2 +-
 js/test/unit/int-tests.ts                     |   2 +-
 js/test/unit/ipc/helpers.ts                   |   5 +-
 js/test/unit/ipc/message-reader-tests.ts      |   2 +-
 js/test/unit/ipc/reader/file-reader-tests.ts  |   2 +-
 .../unit/ipc/reader/from-inference-tests.ts   |   6 +-
 js/test/unit/ipc/reader/json-reader-tests.ts  |   6 +-
 .../unit/ipc/reader/stream-reader-tests.ts    |   2 +-
 js/test/unit/ipc/reader/streams-dom-tests.ts  |  13 +-
 js/test/unit/ipc/reader/streams-node-tests.ts |  26 +-
 js/test/unit/ipc/validate.ts                  |   2 +-
 js/test/unit/ipc/writer/file-writer-tests.ts  |   2 +-
 js/test/unit/ipc/writer/json-writer-tests.ts  |   6 +-
 .../unit/ipc/writer/stream-writer-tests.ts    |   6 +-
 js/test/unit/ipc/writer/streams-dom-tests.ts  |   6 +-
 js/test/unit/ipc/writer/streams-node-tests.ts |   6 +-
 js/test/unit/math-tests.ts                    |   2 +-
 .../unit/recordbatch/record-batch-tests.ts    |   2 +-
 js/test/unit/table-tests.ts                   |   2 +-
 js/test/unit/table/assign-tests.ts            |   2 +-
 js/test/unit/table/serialize-tests.ts         |   2 +-
 js/test/unit/utils-tests.ts                   |   3 +-
 js/test/unit/vector/bool-vector-tests.ts      |   2 +-
 js/test/unit/vector/date-vector-tests.ts      |   2 +-
 js/test/unit/vector/numeric-vector-tests.ts   |   8 +-
 js/test/unit/vector/vector-tests.ts           |   2 +-
 js/test/unit/visitor-tests.ts                 |   6 +-
 js/tsconfig.json                              |  15 +-
 js/tsconfig/tsconfig.base.json                |   7 +-
 js/tsconfig/tsconfig.bin.cjs.json             |  17 +-
 js/tsconfig/tsconfig.docs.json                |  10 +-
 js/tsconfig/tsconfig.es2015.cls.json          |   1 +
 js/tsconfig/tsconfig.es5.cls.json             |   1 +
 js/tsconfig/tsconfig.esnext.cls.json          |   1 +
 js/yarn.lock                                  |  53 ++-
 93 files changed, 1029 insertions(+), 457 deletions(-)
 create mode 100644 js/.vscode/extensions.json
 create mode 100644 js/jestconfigs/jest.apache-arrow.config.js
 rename js/{ => jestconfigs}/jest.coverage.config.js (76%)
 create mode 100644 js/jestconfigs/jest.es2015.cjs.config.js
 create mode 100644 js/jestconfigs/jest.es2015.esm.config.js
 create mode 100644 js/jestconfigs/jest.es2015.umd.config.js
 create mode 100644 js/jestconfigs/jest.es5.cjs.config.js
 create mode 100644 js/jestconfigs/jest.es5.esm.config.js
 create mode 100644 js/jestconfigs/jest.es5.umd.config.js
 create mode 100644 js/jestconfigs/jest.esnext.cjs.config.js
 create mode 100644 js/jestconfigs/jest.esnext.esm.config.js
 create mode 100644 js/jestconfigs/jest.esnext.umd.config.js
 create mode 100644 js/jestconfigs/jest.src.config.js
 create mode 100644 js/jestconfigs/jest.ts.config.js
 create mode 100644 js/test/tsconfig/tsconfig.apache-arrow.json
 create mode 100644 js/test/tsconfig/tsconfig.base.json
 rename js/test/{ => tsconfig}/tsconfig.coverage.json (73%)
 create mode 100644 js/test/tsconfig/tsconfig.es2015.cjs.json
 create mode 100644 js/test/tsconfig/tsconfig.es2015.esm.json
 create mode 100644 js/test/tsconfig/tsconfig.es2015.umd.json
 create mode 100644 js/test/tsconfig/tsconfig.es5.cjs.json
 create mode 100644 js/test/tsconfig/tsconfig.es5.esm.json
 create mode 100644 js/test/tsconfig/tsconfig.es5.umd.json
 create mode 100644 js/test/tsconfig/tsconfig.esnext.cjs.json
 create mode 100644 js/test/tsconfig/tsconfig.esnext.esm.json
 create mode 100644 js/test/tsconfig/tsconfig.esnext.umd.json
 create mode 100644 js/test/tsconfig/tsconfig.src.json
 create mode 100644 js/test/tsconfig/tsconfig.ts.json

diff --git a/js/.eslintignore b/js/.eslintignore
index a9ba028ceea..94ef668a61c 100644
--- a/js/.eslintignore
+++ b/js/.eslintignore
@@ -1 +1,5 @@
 .eslintrc.js
+gulp
+jest.config.js
+jestconfigs
+targets
diff --git a/js/.npmrc b/js/.npmrc
index 5536efc09ce..e55040abad7 100644
--- a/js/.npmrc
+++ b/js/.npmrc
@@ -1,2 +1,7 @@
+fund=false
+audit=false
 save-prefix=
+save-exact=true
 engine-strict=true
+update-notifier=false
+registry=https://registry.npmjs.org/
diff --git a/js/.vscode/extensions.json b/js/.vscode/extensions.json
new file mode 100644
index 00000000000..1cb01b6b9fe
--- /dev/null
+++ b/js/.vscode/extensions.json
@@ -0,0 +1,6 @@
+{
+    "recommendations": [
+        "dbaeumer.vscode-eslint",
+        "augustocdias.tasks-shell-input",
+    ]
+}
diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json
index 43851ba5358..ae72e1f4850 100644
--- a/js/.vscode/launch.json
+++ b/js/.vscode/launch.json
@@ -1,182 +1,240 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug Gulp Build",
-            "program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js",
-            "args": [
-                "build",
-                // Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build
-                "--target", "src"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug Unit Tests",
-            "cwd": "${workspaceRoot}",
-            "program": "${workspaceFolder}/node_modules/.bin/jest",
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "env": {
-                "NODE_NO_WARNINGS": "1",
-                "READABLE_STREAM": "disable",
-                "TEST_DOM_STREAMS": "true",
-                "TEST_NODE_STREAMS": "true",
-                // Modify these environment variables to run tests on a specific compilation target + module format combo
-                "TEST_TS_SOURCE": "true",
-                // "TEST_TS_SOURCE": "false",
-                // "TEST_TARGET": "es5",
-                // "TEST_MODULE": "umd"
-            },
-            "args": [
-                "-i",
-                "test/unit/",
-                // "test/unit/builders/",
-
-                // Uncomment any of these to run individual test suites
-                // "test/unit/builders/builder-tests.ts",
-                // "test/unit/builders/int64-tests.ts",
-                // "test/unit/builders/uint64-tests.ts",
-                // "test/unit/builders/date-tests.ts",
-                // "test/unit/builders/primitive-tests.ts",
-                // "test/unit/builders/dictionary-tests.ts",
-                // "test/unit/builders/utf8-tests.ts",
-
-                // "test/unit/int-tests.ts",
-                // "test/unit/math-tests.ts",
-                // "test/unit/table-tests.ts",
-                // "test/unit/generated-data-tests.ts",
-
-                // "test/unit/table/assign-tests.ts",
-                // "test/unit/table/serialize-tests.ts",
-                // "test/unit/recordbatch/record-batch-tests.ts",
-
-                // "test/unit/vector/vector-tests.ts",
-                // "test/unit/vector/bool-vector-tests.ts",
-                // "test/unit/vector/date-vector-tests.ts",
-                // "test/unit/vector/numeric-vector-tests.ts",
-
-                // "test/unit/visitor-tests.ts",
-
-                // "test/unit/ipc/message-reader-tests.ts",
-                // "test/unit/ipc/reader/file-reader-tests.ts",
-                // "test/unit/ipc/reader/json-reader-tests.ts",
-                // "test/unit/ipc/reader/from-inference-tests.ts",
-                // "test/unit/ipc/reader/stream-reader-tests.ts",
-                // "test/unit/ipc/reader/streams-dom-tests.ts",
-                // "test/unit/ipc/reader/streams-node-tests.ts",
-                // "test/unit/ipc/writer/file-writer-tests.ts",
-                // "test/unit/ipc/writer/json-writer-tests.ts",
-                // "test/unit/ipc/writer/stream-writer-tests.ts",
-                // "test/unit/ipc/writer/streams-dom-tests.ts",
-                // "test/unit/ipc/writer/streams-node-tests.ts",
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug Integration Tests",
-            "cwd": "${workspaceRoot}",
-            "program": "${workspaceFolder}/bin/integration.js",
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "env": {
-                "NODE_NO_WARNINGS": "1",
-                "READABLE_STREAM": "disable"
-            },
-            "args": [
-                "--mode", "VALIDATE"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/arrow2csv",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "console": "integratedTerminal",
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/src/bin/arrow2csv.ts",
-                "-f", "./test/data/cpp/stream/simple.arrow"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/file-to-stream",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/file-to-stream.js",
-                "./test/data/cpp/file/struct_example.arrow",
-                "./struct_example-stream-out.arrow",
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/stream-to-file",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/stream-to-file.js",
-                "./test/data/cpp/stream/struct_example.arrow",
-                "./struct_example-file-out.arrow",
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/json-to-arrow",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/json-to-arrow.js",
-                "-j", "./test/data/json/struct_example.json",
-                "-a", "./struct_example-stream-out.arrow",
-                "-f", "stream"
-            ]
-        },
-        {
-            "type": "node",
-            "request": "launch",
-            "name": "Debug bin/print-buffer-alignment",
-            "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
-            "runtimeArgs": ["-r", "ts-node/register"],
-            "skipFiles": [
-                "<node_internals>/**/*.js",
-                "${workspaceFolder}/node_modules/**/*.js"
-            ],
-            "args": [
-                "${workspaceFolder}/bin/print-buffer-alignment.js",
-                "./test/data/cpp/stream/struct_example.arrow"
-            ]
-        }
-    ]
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "inputs": [
+    {
+      "type": "pickString",
+      "default": "src",
+      "id": "TEST_TARGET",
+      "options": [
+        "src",
+        "apache-arrow",
+        "ts",
+        "es5.cjs",
+        "es5.esm",
+        "es5.umd",
+        "es2015.cjs",
+        "es2015.esm",
+        "es2015.umd",
+        "esnext.cjs",
+        "esnext.esm",
+        "esnext.umd",
+      ],
+      "description": "The JS version + Module format combination to test (or src to test source files)",
+    },
+    {
+      "type": "command",
+      "id": "TEST_FILE",
+      "command": "shellCommand.execute",
+      "args": {
+        "cwd": "${workspaceFolder}",
+        "description": "Select a file to debug",
+        "command": "./node_modules/.bin/jest --listTests | sed -r \"s@$PWD/test/@@g\"",
+      }
+    },
+    {
+      "type": "command",
+      "id": "TEST_RUNTIME_ARGS",
+      "command": "shellCommand.execute",
+      "args": {
+        "useSingleResult": "true",
+        "command": "case \"${input:TEST_TARGET}\" in *cjs | *umd | apache-arrow) echo '';; *) echo '--experimental-vm-modules';; esac"
+      }
+    },
+  ],
+  "configurations": [
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Gulp Build",
+      "program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js",
+      "args": [
+        "build",
+        // Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build
+        "--target",
+        "src"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Unit Tests",
+      "cwd": "${workspaceRoot}",
+      "console": "integratedTerminal",
+      "program": "${workspaceFolder}/node_modules/.bin/jest",
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "env": {
+        "NODE_NO_WARNINGS": "1",
+        "NODE_ENV": "production",
+        "TEST_DOM_STREAMS": "true",
+        "TEST_NODE_STREAMS": "true",
+      },
+      "runtimeArgs": ["${input:TEST_RUNTIME_ARGS}"],
+      "args": [
+        "--verbose",
+        "--runInBand",
+        "-c", "jestconfigs/jest.${input:TEST_TARGET}.config.js",
+        "${input:TEST_FILE}"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Integration Tests",
+      "cwd": "${workspaceRoot}",
+      "program": "${workspaceFolder}/bin/integration.js",
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "env": {
+        "NODE_NO_WARNINGS": "1",
+      },
+      "args": [
+        "--mode",
+        "VALIDATE"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/arrow2csv",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "console": "integratedTerminal",
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/src/bin/arrow2csv.ts",
+        "-f",
+        "./test/data/cpp/stream/simple.arrow"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/file-to-stream",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/file-to-stream.js",
+        "./test/data/cpp/file/struct_example.arrow",
+        "./struct_example-stream-out.arrow",
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/stream-to-file",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/stream-to-file.js",
+        "./test/data/cpp/stream/struct_example.arrow",
+        "./struct_example-file-out.arrow",
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/json-to-arrow",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/json-to-arrow.js",
+        "-j",
+        "./test/data/json/struct_example.json",
+        "-a",
+        "./struct_example-stream-out.arrow",
+        "-f",
+        "stream"
+      ]
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug bin/print-buffer-alignment",
+      "env": {
+        "ARROW_JS_DEBUG": "src",
+        "TS_NODE_CACHE": "false"
+      },
+      "runtimeArgs": [
+        "-r",
+        "ts-node/register"
+      ],
+      "skipFiles": [
+        "<node_internals>/**/*.js",
+        "${workspaceFolder}/node_modules/**/*.js"
+      ],
+      "args": [
+        "${workspaceFolder}/bin/print-buffer-alignment.js",
+        "./test/data/cpp/stream/struct_example.arrow"
+      ]
+    },{
+      "type": "node",
+      "name": "vscode-jest-tests",
+      "request": "launch",
+      "console": "integratedTerminal",
+      "internalConsoleOptions": "neverOpen",
+      "disableOptimisticBPs": true,
+      "cwd": "${workspaceFolder}",
+      "program": "${workspaceFolder}/node_modules/.bin/jest",
+      "runtimeArgs": [
+        "--experimental-vm-modules"
+      ],
+      "args": [
+        "--runInBand",
+        "--watchAll=false"
+      ],
+      "env": {
+        "NODE_NO_WARNINGS": "1",
+        "TEST_DOM_STREAMS": "true",
+        "TEST_NODE_STREAMS": "true",
+        "TEST_TS_SOURCE": "true"
+      },
+    }
+  ]
 }
diff --git a/js/DEVELOP.md b/js/DEVELOP.md
index cba4faf3aa5..66cefb08435 100644
--- a/js/DEVELOP.md
+++ b/js/DEVELOP.md
@@ -50,7 +50,7 @@ We use [yarn](https://yarnpkg.com/) to install dependencies and run scrips.
 
 These scripts accept argument lists of targets × modules:
 
-* Available `targets` are `es5`, `es2015`, `esnext`, and `all` (default: `all`)
+* Available `targets` are `es5`, `es2015`, `esnext`, `ts`, and `all` (default: `all`)
 * Available `modules` are `cjs`, `esm`, `umd`, and `all` (default: `all`)
 
 Examples:
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 93e9475e936..277ee745f9e 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -57,8 +57,8 @@ const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target,
     await pipeline(gulp.src(`src/**/*`), gulp.dest(out));
     await del(`${out}/**/*.js`);
 }))({});
-  
-  
+
+
 module.exports = arrowTask;
 module.exports.arrowTask = arrowTask;
 module.exports.arrowTSTask = arrowTSTask;
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index cb1d97c82dd..c320c908d3d 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -85,6 +85,8 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
             browser:  format === 'umd' ? `${mainExport}.js` : `${mainExport}.dom.js`,
             // set "main" to "Arrow" if building scoped UMD target, otherwise "Arrow.node"
             main:     format === 'umd' ? `${mainExport}.js` : `${mainExport}.node`,
+            // set "type" to `module` or `commonjs` (https://nodejs.org/api/packages.html#packages_type)
+            type:     format === 'esm' ? `module` : `commonjs`,
             // set "module" (for https://www.npmjs.com/package/@pika/pack) if building scoped ESM target
             module:   format === 'esm' ? `${mainExport}.dom.js` : undefined,
             // set "sideEffects" to false as a hint to Webpack that it's safe to tree-shake the ESM target
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index 699070a8ff4..7a2cc0441ce 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -28,12 +28,17 @@ const readFile = promisify(require('fs').readFile);
 const asyncDone = promisify(require('async-done'));
 const exec = promisify(require('child_process').exec);
 const parseXML = promisify(require('xml2js').parseString);
+const { targetAndModuleCombinations } = require('./util');
 
 const jestArgv = [`--reporters=jest-silent-reporter`];
-argv.verbose && jestArgv.push(`--verbose`);
-argv.coverage
-    ? jestArgv.push(`-c`, `jest.coverage.config.js`, `--coverage`, `-i`)
-    : jestArgv.push(`-c`, `jest.config.js`, `-i`)
+
+if (argv.verbose) {
+    jestArgv.push(`--verbose`);
+}
+
+if (targetAndModuleCombinations.length > 1) {
+    jestArgv.push(`--detectOpenHandles`);
+}
 
 const jest = path.join(path.parse(require.resolve(`jest`)).dir, `../bin/jest.js`);
 const testOptions = {
@@ -42,15 +47,21 @@ const testOptions = {
         ...process.env,
         // hide fs.promises/stream[Symbol.asyncIterator] warnings
         NODE_NO_WARNINGS: `1`,
-        // prevent the user-land `readable-stream` module from
-        // patching node's streams -- they're better now
-        READABLE_STREAM: `disable`
     },
 };
 
 const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) {
     const opts = { ...testOptions };
-    const args = [...execArgv, `test/unit/`];
+    const args = [...execArgv];
+    if (format === 'esm' || target === 'ts' || target === 'src') {
+        args.unshift(`--experimental-vm-modules`);
+    }
+    if (argv.coverage) {
+        args.push(`-c`, `jestconfigs/jest.coverage.config.js`);
+    } else {
+        const cfgname = [target, format].filter(Boolean).join('.');
+        args.push(`-c`, `jestconfigs/jest.${cfgname}.config.js`, `test/unit/`);
+    }
     opts.env = {
         ...opts.env,
         TEST_TARGET: target,
diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js
index a56de42d381..928e473141b 100644
--- a/js/gulp/typescript-task.js
+++ b/js/gulp/typescript-task.js
@@ -31,7 +31,6 @@ const { memoizeTask } = require('./memoize-task');
 const { Observable, ReplaySubject } = require('rxjs');
 
 const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) {
-
     if (shouldRunInChildProcess(target, format)) {
         return spawnGulpCommandInChildProcess('compile', target, format);
     }
@@ -55,10 +54,11 @@ function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
       tsProject.src(), sourcemaps.init(),
       tsProject(ts.reporter.defaultReporter())
     );
-    const writeDTypes = observableFromStreams(dts, gulp.dest(out));
+    const writeSources = observableFromStreams(tsProject.src(), gulp.dest(out));
+    const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false }), gulp.dest(out));
     const mapFile = tsProject.options.module === 5 ? esmMapFile : cjsMapFile;
-    const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile }), gulp.dest(out));
-    return Observable.forkJoin(writeDTypes, writeJS);
+    const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false }), gulp.dest(out));
+    return Observable.forkJoin(writeSources, writeDTypes, writeJS);
 }
 
 function cjsMapFile(mapFilePath) { return mapFilePath; }
diff --git a/js/gulp/util.js b/js/gulp/util.js
index cf53d0ef18b..b86bb656e06 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -121,12 +121,12 @@ function* combinations(_targets, _modules) {
     const targets = known(knownTargets, _targets || [`all`]);
     const modules = known(knownModules, _modules || [`all`]);
 
-    if (_targets.indexOf(`src`) > -1) {
+    if (_targets.includes(`src`)) {
         yield [`src`, ``];
         return;
     }
 
-    if (_targets.indexOf(`all`) > -1 && _modules.indexOf(`all`) > -1) {
+    if (_targets.includes(`all`) && _modules.includes(`all`)) {
         yield [`ts`, ``];
         yield [`src`, ``];
         yield [npmPkgName, ``];
@@ -139,11 +139,11 @@ function* combinations(_targets, _modules) {
     }
 
     function known(known, values) {
-        return ~values.indexOf(`all`) ? known
-            :  ~values.indexOf(`src`) ? [`src`]
+        return values.includes(`all`) ? known
+            :  values.includes(`src`) ? [`src`]
             : Object.keys(
                 values.reduce((map, arg) => ((
-                    (known.indexOf(arg) !== -1) &&
+                    (known.includes(arg)) &&
                     (map[arg.toLowerCase()] = true)
                     || true) && map
                 ), {})
@@ -181,5 +181,7 @@ module.exports = {
     knownTargets, knownModules, tasksToSkipPerTargetOrFormat, gCCLanguageNames,
 
     taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams,
-    publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess
+    publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess,
+
+    targetAndModuleCombinations: [...combinations(targets, modules)]
 };
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 019f5b0e056..bd860e16340 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -18,15 +18,16 @@
 const del = require('del');
 const gulp = require('gulp');
 const { Observable } = require('rxjs');
+const { targets } = require('./gulp/argv');
 const cleanTask = require('./gulp/clean-task');
 const compileTask = require('./gulp/compile-task');
 const packageTask = require('./gulp/package-task');
-const { targets, modules } = require('./gulp/argv');
 const { testTask, createTestData, cleanTestData } = require('./gulp/test-task');
 const {
     taskName, combinations,
     targetDir, knownTargets,
-    npmPkgName, tasksToSkipPerTargetOrFormat
+    npmPkgName, tasksToSkipPerTargetOrFormat,
+    targetAndModuleCombinations
 } = require('./gulp/util');
 
 for (const [target, format] of combinations([`all`], [`all`])) {
@@ -90,9 +91,9 @@ function gulpConcurrent(tasks) {
 
 function getTasks(name) {
     const tasks = [];
-    if (targets.indexOf(`ts`) !== -1) tasks.push(`${name}:ts`);
-    if (targets.indexOf(npmPkgName) !== -1) tasks.push(`${name}:${npmPkgName}`);
-    for (const [target, format] of combinations(targets, modules)) {
+    if (targets.includes(`ts`)) tasks.push(`${name}:ts`);
+    if (targets.includes(npmPkgName)) tasks.push(`${name}:${npmPkgName}`);
+    for (const [target, format] of targetAndModuleCombinations) {
         if (tasksToSkipPerTargetOrFormat[target] && tasksToSkipPerTargetOrFormat[target][name]) continue;
         if (tasksToSkipPerTargetOrFormat[format] && tasksToSkipPerTargetOrFormat[format][name]) continue;
         tasks.push(`${name}:${taskName(target, format)}`);
diff --git a/js/jest.config.js b/js/jest.config.js
index 9ebf291f701..e4795e654a9 100644
--- a/js/jest.config.js
+++ b/js/jest.config.js
@@ -16,38 +16,38 @@
 // under the License.
 
 module.exports = {
-    "verbose": false,
-    "testEnvironment": "node",
-    "globals": {
-      "ts-jest": {
-        "diagnostics": false,
-        "tsconfig": "test/tsconfig.json"
-      }
+  verbose: false,
+  testEnvironment: "node",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "test/tsconfig.json",
+      useESM: true,
     },
-    "roots": [
-      "<rootDir>/test/"
-    ],
-    "moduleFileExtensions": [
-      "js",
-      "ts",
-      "tsx"
-    ],
-    "coverageReporters": [
-      "lcov"
-    ],
-    "coveragePathIgnorePatterns": [
-      "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$",
-      "test\\/.*\\.(ts|tsx|js)$",
-      "/node_modules/"
-    ],
-    "transform": {
-      "^.+\\.jsx?$": "ts-jest",
-      "^.+\\.tsx?$": "ts-jest"
-    },
-    "transformIgnorePatterns": [
-      "/node_modules/(?!web-stream-tools).+\\.js$"
-    ],
-    "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$",
-    "preset": "ts-jest",
-    "testMatch": null
+  },
+  rootDir: ".",
+  roots: ["<rootDir>/test/"],
+  preset: "ts-jest/presets/default-esm",
+  moduleFileExtensions: ["js", "ts"],
+  coverageReporters: ["lcov", "json"],
+  coveragePathIgnorePatterns: [
+    "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$",
+    "test\\/.*\\.(ts|js)$",
+    "/node_modules/",
+  ],
+  transform: {
+    "^.+\\.js$": "ts-jest",
+    "^.+\\.ts$": "ts-jest",
+  },
+  transformIgnorePatterns: [
+    "/targets/(es5|es2015|esnext|apache-arrow)/",
+    "/node_modules/(?!@openpgp/web-stream-tools)/",
+  ],
+  testRegex: "(.*(-|\\.)(test|spec)s?)\\.(ts|js)$",
+  testMatch: null,
+  moduleNameMapper: {
+    "^apache-arrow$": "<rootDir>/src/Arrow.node",
+    "^apache-arrow(.*)": "<rootDir>/src$1",
+    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+  },
 };
diff --git a/js/jestconfigs/jest.apache-arrow.config.js b/js/jestconfigs/jest.apache-arrow.config.js
new file mode 100644
index 00000000000..9bd011c7aec
--- /dev/null
+++ b/js/jestconfigs/jest.apache-arrow.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.apache-arrow.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/apache-arrow$1",
+  },
+};
diff --git a/js/jest.coverage.config.js b/js/jestconfigs/jest.coverage.config.js
similarity index 76%
rename from js/jest.coverage.config.js
rename to js/jestconfigs/jest.coverage.config.js
index 37917720367..3b0b6a1c6c4 100644
--- a/js/jest.coverage.config.js
+++ b/js/jestconfigs/jest.coverage.config.js
@@ -16,15 +16,15 @@
 // under the License.
 
 module.exports = {
-    ...require('./jest.config'),
-    reporters: undefined,
-    coverageReporters: [
-        lcov, 'json'
-    ],
-    globals: {
-        'ts-jest': {
-            diagnostics: false,
-            tsconfig: 'test/tsconfig.coverage.json'
-        }
-    }
+  ...require("../jest.config"),
+  rootDir: "../",
+  collectCoverage: true,
+  reporters: undefined,
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.coverage.json",
+      useESM: true,
+    },
+  },
 };
diff --git a/js/jestconfigs/jest.es2015.cjs.config.js b/js/jestconfigs/jest.es2015.cjs.config.js
new file mode 100644
index 00000000000..a07bf8418a8
--- /dev/null
+++ b/js/jestconfigs/jest.es2015.cjs.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.cjs.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es2015/cjs$1",
+  },
+};
diff --git a/js/jestconfigs/jest.es2015.esm.config.js b/js/jestconfigs/jest.es2015.esm.config.js
new file mode 100644
index 00000000000..faf59792cdd
--- /dev/null
+++ b/js/jestconfigs/jest.es2015.esm.config.js
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.esm.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es2015/esm$1",
+    tslib: "tslib/tslib.es6.js",
+    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+  },
+};
diff --git a/js/jestconfigs/jest.es2015.umd.config.js b/js/jestconfigs/jest.es2015.umd.config.js
new file mode 100644
index 00000000000..1e861e0eedf
--- /dev/null
+++ b/js/jestconfigs/jest.es2015.umd.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es2015.umd.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es2015/umd/Arrow.js",
+  },
+};
diff --git a/js/jestconfigs/jest.es5.cjs.config.js b/js/jestconfigs/jest.es5.cjs.config.js
new file mode 100644
index 00000000000..c65c71b2ca3
--- /dev/null
+++ b/js/jestconfigs/jest.es5.cjs.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es5.cjs.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es5/cjs$1",
+  },
+};
diff --git a/js/jestconfigs/jest.es5.esm.config.js b/js/jestconfigs/jest.es5.esm.config.js
new file mode 100644
index 00000000000..ca46e9162f7
--- /dev/null
+++ b/js/jestconfigs/jest.es5.esm.config.js
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es5.esm.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es5/esm$1",
+    tslib: "tslib/tslib.es6.js",
+    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+  },
+};
diff --git a/js/jestconfigs/jest.es5.umd.config.js b/js/jestconfigs/jest.es5.umd.config.js
new file mode 100644
index 00000000000..893a46149d2
--- /dev/null
+++ b/js/jestconfigs/jest.es5.umd.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.es5.umd.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/es5/umd/Arrow.js",
+  },
+};
diff --git a/js/jestconfigs/jest.esnext.cjs.config.js b/js/jestconfigs/jest.esnext.cjs.config.js
new file mode 100644
index 00000000000..26cb9c60634
--- /dev/null
+++ b/js/jestconfigs/jest.esnext.cjs.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.esnext.cjs.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/esnext/cjs$1",
+  },
+};
diff --git a/js/jestconfigs/jest.esnext.esm.config.js b/js/jestconfigs/jest.esnext.esm.config.js
new file mode 100644
index 00000000000..26393694908
--- /dev/null
+++ b/js/jestconfigs/jest.esnext.esm.config.js
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.esnext.esm.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/esnext/esm$1",
+    tslib: "tslib/tslib.es6.js",
+    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+  },
+};
diff --git a/js/jestconfigs/jest.esnext.umd.config.js b/js/jestconfigs/jest.esnext.umd.config.js
new file mode 100644
index 00000000000..eeee01fc993
--- /dev/null
+++ b/js/jestconfigs/jest.esnext.umd.config.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  preset: "ts-jest",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.esnext.umd.json",
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/esnext/umd/Arrow.js",
+  },
+};
diff --git a/js/jestconfigs/jest.src.config.js b/js/jestconfigs/jest.src.config.js
new file mode 100644
index 00000000000..08ccad061ba
--- /dev/null
+++ b/js/jestconfigs/jest.src.config.js
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.src.json",
+      useESM: true,
+    },
+  },
+};
diff --git a/js/jestconfigs/jest.ts.config.js b/js/jestconfigs/jest.ts.config.js
new file mode 100644
index 00000000000..7e9ac9b603d
--- /dev/null
+++ b/js/jestconfigs/jest.ts.config.js
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+  ...require("../jest.config"),
+  rootDir: "../",
+  globals: {
+    "ts-jest": {
+      diagnostics: false,
+      tsconfig: "<rootDir>/test/tsconfig/tsconfig.ts.json",
+      useESM: true,
+    },
+  },
+  moduleNameMapper: {
+    "^apache-arrow(.*)": "<rootDir>/targets/ts$1",
+    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+  },
+};
diff --git a/js/package.json b/js/package.json
index 972c2caf82f..040bb4ff5b6 100644
--- a/js/package.json
+++ b/js/package.json
@@ -21,8 +21,7 @@
     "doc": "del-cli ./doc && typedoc --options typedoc.js",
     "lint": "eslint src test --fix",
     "lint:ci": "eslint src test",
-    "prepublishOnly": "echo \"Error: do 'yarn release' instead of 'npm publish'\" && exit 1",
-    "version": "yarn && yarn clean:all"
+    "prepublishOnly": "echo \"Error: do 'yarn release' instead of 'npm publish'\" && exit 1"
   },
   "repository": {
     "type": "git",
@@ -42,14 +41,14 @@
     "bin",
     "src",
     "gulp",
+    "jestconfigs",
     "test",
     "*.json",
-    "tsconfig",
+    "tsconfigs",
     "README.md",
     "gulpfile.js",
     "npm-release.sh",
-    "jest.config.js",
-    "jest.coverage.config.js"
+    "jest.config.js"
   ],
   "dependencies": {
     "@types/flatbuffers": "^1.10.0",
@@ -59,12 +58,13 @@
     "flatbuffers": "1.12.0",
     "json-bignum": "^0.0.3",
     "pad-left": "^2.1.0",
-    "tslib": "^2.2.0"
+    "tslib": "^2.3.0"
   },
   "devDependencies": {
+    "@openpgp/web-stream-tools": "0.0.5",
     "@types/glob": "7.1.3",
     "@types/jest": "26.0.23",
-    "@types/multistream": "2.1.1",
+    "@types/randomatic": "3.1.2",
     "@typescript-eslint/eslint-plugin": "4.25.0",
     "@typescript-eslint/parser": "4.25.0",
     "async-done": "1.3.2",
@@ -92,16 +92,15 @@
     "npm-run-all": "4.1.5",
     "randomatic": "3.1.1",
     "rxjs": "5.5.11",
-    "ts-jest": "27.0.0",
+    "ts-jest": "27.0.3",
     "ts-node": "10.0.0",
     "typedoc": "0.20.36",
     "typescript": "4.0.2",
-    "web-stream-tools": "0.0.1",
     "web-streams-polyfill": "3.0.3",
     "xml2js": "0.4.23"
   },
   "engines": {
-    "node": ">=11.12"
+    "node": ">=12.0"
   },
   "version": "5.0.0-SNAPSHOT"
 }
diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts
index 38729797a3a..07f0c8b8e06 100644
--- a/js/src/Arrow.dom.ts
+++ b/js/src/Arrow.dom.ts
@@ -109,4 +109,5 @@ export {
     TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder,
     UnionBuilder, DenseUnionBuilder, SparseUnionBuilder,
     Utf8Builder,
+    isTypedArray,
 } from './Arrow';
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index 41408c673ae..8bf29631039 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -132,3 +132,5 @@ export const util = {
     compareFields,
     compareTypes,
 };
+
+export { isTypedArray } from './util/args';
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index 8fe53b019d2..de2bc58c715 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -15,48 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// Dynamically load an Arrow target build based on command line arguments
+import 'web-streams-polyfill';
 
-import 'web-streams-polyfill/es6';
-
-// import this before assigning window global since it does a `typeof window` check
-require('web-stream-tools');
-
-(<any> global).window = (<any> global).window || global;
-
-// Fix for Jest in node v10.x
-Object.defineProperty(Object, Symbol.hasInstance, {
-    writable: true,
-    configurable: true,
-    value(inst: any) {
-        return inst?.constructor && inst.constructor.name === 'Object';
-    }
-});
-Object.defineProperty(ArrayBuffer, Symbol.hasInstance, {
-    writable: true,
-    configurable: true,
-    value(inst: any) {
-        return inst?.constructor && inst.constructor.name === 'ArrayBuffer';
-    }
-});
-
-// these are duplicated in the gulpfile :<
-const targets = [`es5`, `es2015`, `esnext`];
-const formats = [`cjs`, `esm`, `cls`, `umd`];
-
-const path = require('path');
-const target = process.env.TEST_TARGET!;
-const format = process.env.TEST_MODULE!;
-const useSrc = process.env.TEST_TS_SOURCE === `true` || (!~targets.indexOf(target) || !~formats.indexOf(format));
-
-let modulePath = ``;
-
-if (useSrc) modulePath = '../src';
-else if (target === `ts` || target === `apache-arrow`) modulePath = target;
-else modulePath = path.join(target, format);
-
-modulePath = path.resolve(`./targets`, modulePath);
-modulePath = path.join(modulePath, `Arrow${format === 'umd' ? '' : '.node'}`);
-const Arrow: typeof import('../src/Arrow') = require(modulePath);
-
-export = Arrow;
+export * from 'apache-arrow';
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 3b83bd149f2..030176e629b 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-const randomatic = require('randomatic');
-import { VectorType as V } from '../src/interfaces';
+import randomatic from 'randomatic';
+import { VectorType as V } from 'apache-arrow/interfaces';
 
 import {
     Data, Vector, Visitor, DataType,
diff --git a/js/test/inference/column.ts b/js/test/inference/column.ts
index 03837612425..440116b69c9 100644
--- a/js/test/inference/column.ts
+++ b/js/test/inference/column.ts
@@ -17,11 +17,11 @@
 
 /* eslint-disable jest/no-standalone-expect */
 
-import { Data } from '../../src/data';
-import { Field } from '../../src/schema';
-import { Column } from '../../src/column';
-import { Vector } from '../../src/vector';
-import { Bool, Int8, Utf8, List, Dictionary, Struct } from '../../src/type';
+import { Data } from 'apache-arrow/data';
+import { Field } from 'apache-arrow/schema';
+import { Column } from 'apache-arrow/column';
+import { Vector } from 'apache-arrow/vector';
+import { Bool, Int8, Utf8, List, Dictionary, Struct } from 'apache-arrow/type';
 
 const boolType = new Bool();
 const boolVector = Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2)));
diff --git a/js/test/inference/nested.ts b/js/test/inference/nested.ts
index 510da89e9f6..0e3dc95e3f4 100644
--- a/js/test/inference/nested.ts
+++ b/js/test/inference/nested.ts
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data } from '../../src/data';
-import { Field } from '../../src/schema';
-import { DataType } from '../../src/type';
-import { Vector, BoolVector } from '../../src/vector/index';
-import { Bool, Int8, Utf8, List, Dictionary, Struct } from '../../src/type';
+import { Data } from 'apache-arrow/data';
+import { Field } from 'apache-arrow/schema';
+import { DataType } from 'apache-arrow/type';
+import { Vector, BoolVector } from 'apache-arrow/vector/index';
+import { Bool, Int8, Utf8, List, Dictionary, Struct } from 'apache-arrow/type';
 
 type NamedSchema = { a: Int8; b: Utf8; c: Dictionary<List<Bool>>; [idx: string]: DataType };
 type IndexSchema = { 0: Int8; 1: Utf8; 2: Dictionary<List<Bool>>; [idx: number]: DataType };
diff --git a/js/test/inference/visitor/get.ts b/js/test/inference/visitor/get.ts
index ad7605f7e21..a983d94d19f 100644
--- a/js/test/inference/visitor/get.ts
+++ b/js/test/inference/visitor/get.ts
@@ -20,7 +20,7 @@ import {
     Bool, List, Dictionary
 } from '../../Arrow';
 
-import { instance as getVisitor } from '../../../src/visitor/get';
+import { instance as getVisitor } from 'apache-arrow/visitor/get';
 
 const data_Bool = new Data(new Bool(), 0, 0);
 const data_List_Bool = new Data(new List<Bool>(null as any), 0, 0);
diff --git a/js/test/tsconfig.json b/js/test/tsconfig.json
index c4977d5d694..8cf2e7e7b66 100644
--- a/js/test/tsconfig.json
+++ b/js/test/tsconfig.json
@@ -1,18 +1,24 @@
 {
   "extends": "../tsconfig.json",
-  "include": ["./**/*.ts"],
+  "include": ["../src/**/*.ts", "../test/**/*.ts"],
   "compilerOptions": {
     "target": "esnext",
-    "module": "commonjs",
+    "module": "es2020",
     "allowJs": true,
     "declaration": false,
+    "declarationMap": false,
     "importHelpers": false,
+    "noEmit": true,
     "noEmitHelpers": false,
     "noEmitOnError": false,
-
-    "sourceMap": false,
+    "sourceMap": true,
     "inlineSources": false,
     "inlineSourceMap": false,
-    "downlevelIteration": false
+    "downlevelIteration": false,
+    "baseUrl": "../",
+    "paths": {
+      "apache-arrow": ["src/Arrow.node"],
+      "apache-arrow/*": ["src/*"]
+    }
   }
 }
diff --git a/js/test/tsconfig/tsconfig.apache-arrow.json b/js/test/tsconfig/tsconfig.apache-arrow.json
new file mode 100644
index 00000000000..161374e02e1
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.apache-arrow.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the apache-arrow target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.base.json b/js/test/tsconfig/tsconfig.base.json
new file mode 100644
index 00000000000..fcae71fb45d
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.base.json
@@ -0,0 +1,26 @@
+// Base TypeScript configuration for all targets' tests
+{
+  "extends": "../../tsconfig/tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs",
+    "allowJs": true,
+    "declaration": false,
+    "importHelpers": false,
+    "noEmit": false,
+    "noEmitHelpers": false,
+    "noEmitOnError": false,
+    "sourceMap": true,
+    "inlineSources": false,
+    "inlineSourceMap": false,
+    "downlevelIteration": false,
+    "esModuleInterop": true,
+    "baseUrl": "../../",
+    "paths": {
+      "apache-arrow": ["src/Arrow.node"],
+      "apache-arrow/*": ["src/*"]
+    }
+  },
+  "exclude": ["../../node_modules"],
+  "include": ["../../src/**/*.ts"]
+}
diff --git a/js/test/tsconfig.coverage.json b/js/test/tsconfig/tsconfig.coverage.json
similarity index 73%
rename from js/test/tsconfig.coverage.json
rename to js/test/tsconfig/tsconfig.coverage.json
index 6830bfb9d66..e903aa1e5b7 100644
--- a/js/test/tsconfig.coverage.json
+++ b/js/test/tsconfig/tsconfig.coverage.json
@@ -1,6 +1,6 @@
 {
   "extends": "./tsconfig.json",
   "compilerOptions": {
-    "target": "es2015"
+    "target": "esnext"
   }
 }
diff --git a/js/test/tsconfig/tsconfig.es2015.cjs.json b/js/test/tsconfig/tsconfig.es2015.cjs.json
new file mode 100644
index 00000000000..ed600bc24d2
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es2015.cjs.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ES2015 CommonJS target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es2015.esm.json b/js/test/tsconfig/tsconfig.es2015.esm.json
new file mode 100644
index 00000000000..a030beba7c1
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es2015.esm.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ES2015 ESModules target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es2015.umd.json b/js/test/tsconfig/tsconfig.es2015.umd.json
new file mode 100644
index 00000000000..3e4de6f3cb5
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es2015.umd.json
@@ -0,0 +1,11 @@
+// TypeScript configuration for the ES2015 Closure Compiler target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "umd",
+    "declaration": false,
+    "noEmitHelpers": true,
+    "importHelpers": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es5.cjs.json b/js/test/tsconfig/tsconfig.es5.cjs.json
new file mode 100644
index 00000000000..edcd6977366
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es5.cjs.json
@@ -0,0 +1,9 @@
+// TypeScript configuration for the ES5 CommonJS target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs",
+    "downlevelIteration": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es5.esm.json b/js/test/tsconfig/tsconfig.es5.esm.json
new file mode 100644
index 00000000000..01af8fabdfe
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es5.esm.json
@@ -0,0 +1,9 @@
+// TypeScript configuration for the ES5 ESModules target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020",
+    "downlevelIteration": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.es5.umd.json b/js/test/tsconfig/tsconfig.es5.umd.json
new file mode 100644
index 00000000000..445ec8809b7
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.es5.umd.json
@@ -0,0 +1,12 @@
+// TypeScript configuration for the ES5 Closure Compiler target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "umd",
+    "declaration": false,
+    "noEmitHelpers": true,
+    "importHelpers": true,
+    "downlevelIteration": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.esnext.cjs.json b/js/test/tsconfig/tsconfig.esnext.cjs.json
new file mode 100644
index 00000000000..6f21fd56c50
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.esnext.cjs.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ESNext CommonJS target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.esnext.esm.json b/js/test/tsconfig/tsconfig.esnext.esm.json
new file mode 100644
index 00000000000..3a9c277453e
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.esnext.esm.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ESNext ESModules target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.esnext.umd.json b/js/test/tsconfig/tsconfig.esnext.umd.json
new file mode 100644
index 00000000000..baccc6994c1
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.esnext.umd.json
@@ -0,0 +1,11 @@
+// TypeScript configuration for the ESNext Closure Compiler target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "umd",
+    "declaration": false,
+    "noEmitHelpers": true,
+    "importHelpers": true
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.src.json b/js/test/tsconfig/tsconfig.src.json
new file mode 100644
index 00000000000..5413898f79d
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.src.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the source target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/tsconfig/tsconfig.ts.json b/js/test/tsconfig/tsconfig.ts.json
new file mode 100644
index 00000000000..1e053698e99
--- /dev/null
+++ b/js/test/tsconfig/tsconfig.ts.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the TypeScript target's tests
+{
+  "extends": "./tsconfig.base.json",
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "es2020"
+  }
+}
diff --git a/js/test/unit/bit-tests.ts b/js/test/unit/bit-tests.ts
index de9c5ee67a7..cdfb37c1681 100644
--- a/js/test/unit/bit-tests.ts
+++ b/js/test/unit/bit-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Arrow from '../Arrow';
+import * as Arrow from 'apache-arrow';
 const { BitIterator, getBool } = Arrow.util;
 
 describe('Bits', () => {
diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts
index 6817999a034..87dbcabfcf6 100644
--- a/js/test/unit/builders/builder-tests.ts
+++ b/js/test/unit/builders/builder-tests.ts
@@ -19,7 +19,7 @@ import '../../jest-extensions';
 import { AsyncIterable } from 'ix';
 import { validateVector } from './utils';
 import * as generate from '../../generate-test-data';
-import { Type, DataType, Chunked, util, Builder, UnionVector } from '../../Arrow';
+import { Type, DataType, Chunked, util, Builder, UnionVector } from 'apache-arrow';
 
 const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
 const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
@@ -227,9 +227,9 @@ function fillNADefault(values: any[], nulls: any[]): any[] {
     });
 }
 
-type BuilderOptions<T extends DataType = any, TNull = any> = import('../../../src/builder').BuilderOptions<T, TNull>;
-type BuilderDuplexOptions<T extends DataType = any, TNull = any> = import('../../../src/io/node/builder').BuilderDuplexOptions<T, TNull>;
-type BuilderTransformOptions<T extends DataType = any, TNull = any> = import('../../../src/io/whatwg/builder').BuilderTransformOptions<T, TNull>;
+type BuilderOptions<T extends DataType = any, TNull = any> = import('apache-arrow/builder').BuilderOptions<T, TNull>;
+type BuilderDuplexOptions<T extends DataType = any, TNull = any> = import('apache-arrow/io/node/builder').BuilderDuplexOptions<T, TNull>;
+type BuilderTransformOptions<T extends DataType = any, TNull = any> = import('apache-arrow/io/whatwg/builder').BuilderTransformOptions<T, TNull>;
 
 async function encodeSingle<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderOptions<T, TNull>) {
     const builder = Builder.new(options);
diff --git a/js/test/unit/builders/date-tests.ts b/js/test/unit/builders/date-tests.ts
index 812383494c1..5a9cc092b16 100644
--- a/js/test/unit/builders/date-tests.ts
+++ b/js/test/unit/builders/date-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { validateVector } from './utils';
-import { Vector, DateDay, DateMillisecond } from '../../Arrow';
+import { Vector, DateDay, DateMillisecond } from 'apache-arrow';
 import {
     encodeAll,
     encodeEach,
diff --git a/js/test/unit/builders/dictionary-tests.ts b/js/test/unit/builders/dictionary-tests.ts
index 9314aced047..19b3603bce1 100644
--- a/js/test/unit/builders/dictionary-tests.ts
+++ b/js/test/unit/builders/dictionary-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { validateVector } from './utils';
-import { Dictionary, Utf8, Int32, Vector } from '../../Arrow';
+import { Dictionary, Utf8, Int32, Vector } from 'apache-arrow';
 import {
     encodeAll,
     encodeEach,
diff --git a/js/test/unit/builders/int64-tests.ts b/js/test/unit/builders/int64-tests.ts
index 38e6cecd10e..876ce703028 100644
--- a/js/test/unit/builders/int64-tests.ts
+++ b/js/test/unit/builders/int64-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { util, Vector, DataType, Int64 } from '../../Arrow';
+import { util, Vector, DataType, Int64 } from 'apache-arrow';
 import {
     validateVector,
     encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
diff --git a/js/test/unit/builders/primitive-tests.ts b/js/test/unit/builders/primitive-tests.ts
index 994d78ed052..3fd515bf406 100644
--- a/js/test/unit/builders/primitive-tests.ts
+++ b/js/test/unit/builders/primitive-tests.ts
@@ -18,7 +18,7 @@
 import {
     Vector, DataType,
     Bool, Int8, Int16, Int32, Uint8, Uint16, Uint32, Float16, Float32, Float64
-} from '../../Arrow';
+} from 'apache-arrow';
 
 import {
     validateVector,
diff --git a/js/test/unit/builders/uint64-tests.ts b/js/test/unit/builders/uint64-tests.ts
index 38802bca04d..e08e25b5c49 100644
--- a/js/test/unit/builders/uint64-tests.ts
+++ b/js/test/unit/builders/uint64-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { util, Vector, DataType, Uint64 } from '../../Arrow';
+import { util, Vector, DataType, Uint64 } from 'apache-arrow';
 import {
     validateVector,
     encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
diff --git a/js/test/unit/builders/utf8-tests.ts b/js/test/unit/builders/utf8-tests.ts
index f6bac44e324..212879ab441 100644
--- a/js/test/unit/builders/utf8-tests.ts
+++ b/js/test/unit/builders/utf8-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { validateVector } from './utils';
-import { Vector, Utf8 } from '../../Arrow';
+import { Vector, Utf8 } from 'apache-arrow';
 import {
     encodeAll,
     encodeEach,
diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts
index 7ec8ca714ab..a9162f64554 100644
--- a/js/test/unit/builders/utils.ts
+++ b/js/test/unit/builders/utils.ts
@@ -17,12 +17,12 @@
 
 import '../../jest-extensions';
 import { AsyncIterable } from 'ix';
-import { util } from '../../Arrow';
-import { Builder } from '../../Arrow';
-import { DataType, Vector, Chunked } from '../../Arrow';
+import { util } from 'apache-arrow';
+import { Builder } from 'apache-arrow';
+import { DataType, Vector, Chunked } from 'apache-arrow';
+import randstr from 'randomatic';
 
 const rand = Math.random.bind(Math);
-const randstr = require('randomatic');
 const randnulls = <T, TNull = null>(values: T[], n: TNull = <any> null) => values.map((x) => Math.random() > 0.25 ? x : n) as (T | TNull)[];
 
 export const randomBytes = (length: number) => fillRandom(Uint8Array, length);
diff --git a/js/test/unit/dataframe-tests.ts b/js/test/unit/dataframe-tests.ts
index 169cc6d1ae8..9e87e372d52 100644
--- a/js/test/unit/dataframe-tests.ts
+++ b/js/test/unit/dataframe-tests.ts
@@ -18,8 +18,9 @@
 import '../jest-extensions';
 import {
     predicate, DataFrame, RecordBatch
-} from '../Arrow';
+} from 'apache-arrow';
 import { test_data } from './table-tests';
+import { jest } from '@jest/globals';
 
 const { col, lit, custom, and, or, And, Or } = predicate;
 
@@ -236,16 +237,6 @@ describe(`DataFrame`, () => {
                     expect(row.dictionary).toEqual(expected_row[DICT]);
                 }
             });
-            // test(`table.toString()`, () => {
-            //     let selected = table.select('i32', 'dictionary');
-            //     let headers = [`"row_id"`, `"i32: Int32"`, `"dictionary: Dictionary<Int8, Utf8>"`];
-            //     let expected = [headers.join(' | '), ...values.map((row, idx) => {
-            //         return [`${idx}`, `${row[I32]}`, `"${row[DICT]}"`].map((str, col) => {
-            //             return leftPad(str, ' ', headers[col].length);
-            //         }).join(' | ');
-            //     })].join('\n') + '\n';
-            //     expect(selected.toString()).toEqual(expected);
-            // });
             test(`table.filter(..).count() on always false predicates returns 0`, () => {
                 expect(df.filter(col('i32').ge(100)).count()).toEqual(0);
                 expect(df.filter(col('dictionary').eq('z')).count()).toEqual(0);
diff --git a/js/test/unit/generated-data-validators.ts b/js/test/unit/generated-data-validators.ts
index 6bcc340e8ff..647932b415b 100644
--- a/js/test/unit/generated-data-validators.ts
+++ b/js/test/unit/generated-data-validators.ts
@@ -22,7 +22,7 @@ import {
     GeneratedVector
 } from '../generate-test-data';
 
-import { util } from '../Arrow';
+import { util } from 'apache-arrow';
 const { createElementComparator: compare } = util;
 
 type DeferredTest = { description: string; tests?: DeferredTest[]; run: (...args: any[]) => any };
diff --git a/js/test/unit/int-tests.ts b/js/test/unit/int-tests.ts
index 09c531e5432..15c75e1a11d 100644
--- a/js/test/unit/int-tests.ts
+++ b/js/test/unit/int-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Arrow from '../Arrow';
+import * as Arrow from 'apache-arrow';
 const { Int64, Uint64, Int128 } = Arrow.util;
 
 describe(`Uint64`, () => {
diff --git a/js/test/unit/ipc/helpers.ts b/js/test/unit/ipc/helpers.ts
index eebf56f70a0..9fccefec968 100644
--- a/js/test/unit/ipc/helpers.ts
+++ b/js/test/unit/ipc/helpers.ts
@@ -23,13 +23,12 @@ import {
     RecordBatchFileWriter,
     RecordBatchJSONWriter,
     RecordBatchStreamWriter,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 import * as fs from 'fs';
 import { fs as memfs } from 'memfs';
 import { Readable, PassThrough } from 'stream';
-
-const randomatic = require('randomatic');
+import randomatic from 'randomatic';
 
 export abstract class ArrowIOTestHelper {
 
diff --git a/js/test/unit/ipc/message-reader-tests.ts b/js/test/unit/ipc/message-reader-tests.ts
index 7320d6ea7aa..c48aa2ce156 100644
--- a/js/test/unit/ipc/message-reader-tests.ts
+++ b/js/test/unit/ipc/message-reader-tests.ts
@@ -22,7 +22,7 @@ import {
 } from '../../data/tables';
 
 import { ArrowIOTestHelper } from './helpers';
-import { MessageReader, AsyncMessageReader } from '../../Arrow';
+import { MessageReader, AsyncMessageReader } from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
 
diff --git a/js/test/unit/ipc/reader/file-reader-tests.ts b/js/test/unit/ipc/reader/file-reader-tests.ts
index 2d784d06589..a7ddfc940a6 100644
--- a/js/test/unit/ipc/reader/file-reader-tests.ts
+++ b/js/test/unit/ipc/reader/file-reader-tests.ts
@@ -31,7 +31,7 @@ import {
     RecordBatchReader,
     RecordBatchFileReader,
     AsyncRecordBatchFileReader
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
 
diff --git a/js/test/unit/ipc/reader/from-inference-tests.ts b/js/test/unit/ipc/reader/from-inference-tests.ts
index 01d15fa8003..c444b78fcc8 100644
--- a/js/test/unit/ipc/reader/from-inference-tests.ts
+++ b/js/test/unit/ipc/reader/from-inference-tests.ts
@@ -27,9 +27,7 @@ import {
     RecordBatchStreamReader,
     AsyncRecordBatchFileReader,
     AsyncRecordBatchStreamReader
-} from '../../../Arrow';
-
-const { parse: bignumJSONParse } = require('json-bignum');
+} from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
     const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
@@ -44,7 +42,7 @@ for (const table of generateRandomTables([10, 20, 30])) {
 function testFromJSON(io: ArrowIOTestHelper, name: string) {
     describe(`should return a RecordBatchJSONReader (${name})`, () => {
         test(`Uint8Array`, io.buffer((buffer) => {
-            const json = bignumJSONParse(`${Buffer.from(buffer)}`);
+            const json = JSON.parse(`${Buffer.from(buffer)}`);
             const reader = RecordBatchReader.from(json);
             expect(reader.isSync()).toEqual(true);
             expect(reader.isAsync()).toEqual(false);
diff --git a/js/test/unit/ipc/reader/json-reader-tests.ts b/js/test/unit/ipc/reader/json-reader-tests.ts
index b41106ecfb7..9bd1e346625 100644
--- a/js/test/unit/ipc/reader/json-reader-tests.ts
+++ b/js/test/unit/ipc/reader/json-reader-tests.ts
@@ -21,11 +21,9 @@ import {
 } from '../../../data/tables';
 
 import { ArrowIOTestHelper } from '../helpers';
-import { RecordBatchReader } from '../../../Arrow';
+import { RecordBatchReader } from 'apache-arrow';
 import { validateRecordBatchReader } from '../validate';
 
-const { parse: bignumJSONParse } = require('json-bignum');
-
 for (const table of generateRandomTables([10, 20, 30])) {
 
     const io = ArrowIOTestHelper.json(table);
@@ -34,7 +32,7 @@ for (const table of generateRandomTables([10, 20, 30])) {
     describe(`RecordBatchJSONReader (${name})`, () => {
         describe(`should read all RecordBatches`, () => {
             test(`Uint8Array`, io.buffer((buffer) => {
-                const json = bignumJSONParse(Buffer.from(buffer).toString());
+                const json = JSON.parse(Buffer.from(buffer).toString());
                 validateRecordBatchReader('json', 3, RecordBatchReader.from(json));
             }));
         });
diff --git a/js/test/unit/ipc/reader/stream-reader-tests.ts b/js/test/unit/ipc/reader/stream-reader-tests.ts
index ae7bbfbf98a..23879cf795e 100644
--- a/js/test/unit/ipc/reader/stream-reader-tests.ts
+++ b/js/test/unit/ipc/reader/stream-reader-tests.ts
@@ -26,7 +26,7 @@ import {
 } from '../validate';
 
 import { ArrowIOTestHelper } from '../helpers';
-import { RecordBatchReader } from '../../../Arrow';
+import { RecordBatchReader } from 'apache-arrow';
 
 for (const table of generateRandomTables([10, 20, 30])) {
 
diff --git a/js/test/unit/ipc/reader/streams-dom-tests.ts b/js/test/unit/ipc/reader/streams-dom-tests.ts
index a338ed77e55..a380e161932 100644
--- a/js/test/unit/ipc/reader/streams-dom-tests.ts
+++ b/js/test/unit/ipc/reader/streams-dom-tests.ts
@@ -24,7 +24,7 @@ import {
     Table,
     RecordBatchReader,
     RecordBatchStreamWriter
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import { validateRecordBatchAsyncIterator } from '../validate';
 import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers';
@@ -35,9 +35,6 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
         return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
     }
 
-    const { parse: bignumJSONParse } = require('json-bignum');
-    const { concatStream } = require('web-stream-tools').default;
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -63,7 +60,7 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
         describe(`toDOMStream (${name})`, () => {
 
             describe(`RecordBatchJSONReader`, () => {
-                test('Uint8Array', json.buffer((source) => validate(bignumJSONParse(`${Buffer.from(source)}`))));
+                test('Uint8Array', json.buffer((source) => validate(JSON.parse(`${Buffer.from(source)}`))));
             });
 
             describe(`RecordBatchFileReader`, () => {
@@ -109,6 +106,8 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
     }
 
     it('readAll() should pipe to separate WhatWG WritableStreams', async () => {
+        // @ts-ignore
+        const { concatStream } = await import('@openpgp/web-stream-tools');
 
         expect.hasAssertions();
 
@@ -146,6 +145,8 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
     });
 
     it('should not close the underlying WhatWG ReadableStream when reading multiple tables to completion', async () => {
+        // @ts-ignore
+        const { concatStream } = await import('@openpgp/web-stream-tools');
 
         expect.hasAssertions();
 
@@ -177,6 +178,8 @@ import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers'
     });
 
     it('should close the underlying WhatWG ReadableStream when reading multiple tables and we break early', async () => {
+        // @ts-ignore
+        const { concatStream } = await import('@openpgp/web-stream-tools');
 
         expect.hasAssertions();
 
diff --git a/js/test/unit/ipc/reader/streams-node-tests.ts b/js/test/unit/ipc/reader/streams-node-tests.ts
index 080ebab73b9..822f9935020 100644
--- a/js/test/unit/ipc/reader/streams-node-tests.ts
+++ b/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -23,7 +23,7 @@ import {
     Table,
     RecordBatchReader,
     RecordBatchStreamWriter
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import { ArrowIOTestHelper } from '../helpers';
 import { validateRecordBatchAsyncIterator } from '../validate';
@@ -34,12 +34,6 @@ import { validateRecordBatchAsyncIterator } from '../validate';
         return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
     }
 
-    const { Readable, PassThrough } = require('stream');
-    const { parse: bignumJSONParse } = require('json-bignum');
-    const concatStream = ((MultiStream) => (...xs: any[]) =>
-        new Readable().wrap(new MultiStream(...xs))
-    )(require('multistream'));
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -65,7 +59,7 @@ import { validateRecordBatchAsyncIterator } from '../validate';
         describe(`toNodeStream (${name})`, () => {
 
             describe(`RecordBatchJSONReader`, () => {
-                test('Uint8Array', json.buffer((source) => validate(bignumJSONParse(`${Buffer.from(source)}`))));
+                test('Uint8Array', json.buffer((source) => validate(JSON.parse(`${Buffer.from(source)}`))));
             });
 
             describe(`RecordBatchFileReader`, () => {
@@ -110,12 +104,15 @@ import { validateRecordBatchAsyncIterator } from '../validate';
     }
 
     it('readAll() should pipe to separate NodeJS WritableStreams', async () => {
+        // @ts-ignore
+        const { default: MultiStream } = await import('multistream');
+        const { PassThrough } = await import('stream');
 
         expect.hasAssertions();
 
         const tables = [...generateRandomTables([10, 20, 30])];
 
-        const stream = concatStream(tables.map((table) =>
+        const stream = new MultiStream(tables.map((table) =>
             () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
         )) as NodeJS.ReadableStream;
 
@@ -144,12 +141,14 @@ import { validateRecordBatchAsyncIterator } from '../validate';
     });
 
     it('should not close the underlying NodeJS ReadableStream when reading multiple tables to completion', async () => {
+        // @ts-ignore
+        const { default: MultiStream } = await import('multistream');
 
         expect.hasAssertions();
 
         const tables = [...generateRandomTables([10, 20, 30])];
 
-        const stream = concatStream(tables.map((table) =>
+        const stream = new MultiStream(tables.map((table) =>
             () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
         )) as NodeJS.ReadableStream;
 
@@ -172,12 +171,14 @@ import { validateRecordBatchAsyncIterator } from '../validate';
     });
 
     it('should close the underlying NodeJS ReadableStream when reading multiple tables and we break early', async () => {
+        // @ts-ignore
+        const { default: MultiStream } = await import('multistream');
 
         expect.hasAssertions();
 
         const tables = [...generateRandomTables([10, 20, 30])];
 
-        const stream = concatStream(tables.map((table) =>
+        const stream = new MultiStream(tables.map((table) =>
             () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
         )) as NodeJS.ReadableStream;
 
@@ -205,8 +206,7 @@ import { validateRecordBatchAsyncIterator } from '../validate';
             }
         }
 
-        // stream.readable should be false here
-        validateStreamState(reader, stream, true);
+        validateStreamState(reader, stream, true, true);
         expect(tableIndex).toBe(tables.length / 2 | 0);
     });
 })();
diff --git a/js/test/unit/ipc/validate.ts b/js/test/unit/ipc/validate.ts
index 27e1e03e69e..aedf87a2d09 100644
--- a/js/test/unit/ipc/validate.ts
+++ b/js/test/unit/ipc/validate.ts
@@ -23,7 +23,7 @@ import {
     RecordBatchReader,
     RecordBatchFileReader,
     RecordBatchStreamReader,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 export function validateRecordBatchReader<T extends RecordBatchFileReader | RecordBatchStreamReader>(type: 'json' | 'file' | 'stream', numBatches: number, r: T) {
     const reader = r.open();
diff --git a/js/test/unit/ipc/writer/file-writer-tests.ts b/js/test/unit/ipc/writer/file-writer-tests.ts
index 81066462304..fa639e5f672 100644
--- a/js/test/unit/ipc/writer/file-writer-tests.ts
+++ b/js/test/unit/ipc/writer/file-writer-tests.ts
@@ -21,7 +21,7 @@ import {
 } from '../../../data/tables';
 
 import { validateRecordBatchIterator } from '../validate';
-import { Table, RecordBatchFileWriter } from '../../../Arrow';
+import { Table, RecordBatchFileWriter } from 'apache-arrow';
 
 describe('RecordBatchFileWriter', () => {
     for (const table of generateRandomTables([10, 20, 30])) {
diff --git a/js/test/unit/ipc/writer/json-writer-tests.ts b/js/test/unit/ipc/writer/json-writer-tests.ts
index b461d0f76ce..05be0e27272 100644
--- a/js/test/unit/ipc/writer/json-writer-tests.ts
+++ b/js/test/unit/ipc/writer/json-writer-tests.ts
@@ -21,9 +21,7 @@ import {
 } from '../../../data/tables';
 
 import { validateRecordBatchIterator } from '../validate';
-import { Table, RecordBatchJSONWriter } from '../../../Arrow';
-
-const { parse: bignumJSONParse } = require('json-bignum');
+import { Table, RecordBatchJSONWriter } from 'apache-arrow';
 
 describe('RecordBatchJSONWriter', () => {
     for (const table of generateRandomTables([10, 20, 30])) {
@@ -42,7 +40,7 @@ function testJSONWriter(table: Table, name: string) {
 
 async function validateTable(source: Table) {
     const writer = RecordBatchJSONWriter.writeAll(source);
-    const result = Table.from(bignumJSONParse(await writer.toString()));
+    const result = Table.from(JSON.parse(await writer.toString()));
     validateRecordBatchIterator(3, source.chunks);
     expect(result).toEqualTable(source);
 }
diff --git a/js/test/unit/ipc/writer/stream-writer-tests.ts b/js/test/unit/ipc/writer/stream-writer-tests.ts
index 3c5cd3c06fe..a83aa39da4c 100644
--- a/js/test/unit/ipc/writer/stream-writer-tests.ts
+++ b/js/test/unit/ipc/writer/stream-writer-tests.ts
@@ -22,9 +22,9 @@ import {
 
 import * as generate from '../../../generate-test-data';
 import { validateRecordBatchIterator } from '../validate';
-import { RecordBatchStreamWriterOptions } from '../../../../src/ipc/writer';
-import { DictionaryVector, Dictionary, Uint32, Int32 } from '../../../Arrow';
-import { Table, Schema, Field, Chunked, Builder, RecordBatch, RecordBatchReader, RecordBatchStreamWriter } from '../../../Arrow';
+import { RecordBatchStreamWriterOptions } from 'apache-arrow/ipc/writer';
+import { DictionaryVector, Dictionary, Uint32, Int32 } from 'apache-arrow';
+import { Table, Schema, Field, Chunked, Builder, RecordBatch, RecordBatchReader, RecordBatchStreamWriter } from 'apache-arrow';
 
 describe('RecordBatchStreamWriter', () => {
 
diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts
index 8a8060b6fb3..18f161f7d73 100644
--- a/js/test/unit/ipc/writer/streams-dom-tests.ts
+++ b/js/test/unit/ipc/writer/streams-dom-tests.ts
@@ -29,7 +29,7 @@ import {
     RecordBatchFileWriter,
     RecordBatchJSONWriter,
     RecordBatchStreamWriter,
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import {
     ArrowIOTestHelper,
@@ -49,8 +49,6 @@ import {
         return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
     }
 
-    const { parse: bignumJSONParse } = require('json-bignum');
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -105,7 +103,7 @@ import {
 
             describe(`RecordBatchJSONWriter`, () => {
 
-                const toJSON = (x: any): { schema: any } => bignumJSONParse(`${Buffer.from(x)}`);
+                const toJSON = (x: any): { schema: any } => JSON.parse(`${Buffer.from(x)}`);
 
                 test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
                 test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts
index d341e829496..52126f612c3 100644
--- a/js/test/unit/ipc/writer/streams-node-tests.ts
+++ b/js/test/unit/ipc/writer/streams-node-tests.ts
@@ -29,7 +29,7 @@ import {
     RecordBatchFileWriter,
     RecordBatchJSONWriter,
     RecordBatchStreamWriter,
-} from '../../../Arrow';
+} from 'apache-arrow';
 
 import {
     ArrowIOTestHelper,
@@ -48,8 +48,6 @@ import {
         return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
     }
 
-    const { parse: bignumJSONParse } = require('json-bignum');
-
     for (const table of generateRandomTables([10, 20, 30])) {
 
         const file = ArrowIOTestHelper.file(table);
@@ -104,7 +102,7 @@ import {
 
             describe(`RecordBatchJSONWriter`, () => {
 
-                const toJSON = (x: any): { schema: any } => bignumJSONParse(`${Buffer.from(x)}`);
+                const toJSON = (x: any): { schema: any } => JSON.parse(`${Buffer.from(x)}`);
 
                 test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
                 test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
diff --git a/js/test/unit/math-tests.ts b/js/test/unit/math-tests.ts
index 2baaa034623..7e3ffcd8ff0 100644
--- a/js/test/unit/math-tests.ts
+++ b/js/test/unit/math-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import * as Arrow from '../Arrow';
+import * as Arrow from 'apache-arrow';
 const { float64ToUint16, uint16ToFloat64 } = Arrow.util;
 
 describe('Float16', () => {
diff --git a/js/test/unit/recordbatch/record-batch-tests.ts b/js/test/unit/recordbatch/record-batch-tests.ts
index de3090a5af7..520c04f84ed 100644
--- a/js/test/unit/recordbatch/record-batch-tests.ts
+++ b/js/test/unit/recordbatch/record-batch-tests.ts
@@ -19,7 +19,7 @@ import '../../jest-extensions';
 import {
     Data, RecordBatch,
     Vector, Int32Vector, Float32Vector, Float32, Int32,
-} from '../../Arrow';
+} from 'apache-arrow';
 import { arange } from '../utils';
 
 function numsRecordBatch(i32Len: number, f32Len: number) {
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index c6b0bb5f75e..2f138182bbd 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -20,7 +20,7 @@ import {
     Data, Schema, Field, Table, RecordBatch, Column,
     Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
     Struct, Float32, Int32, Dictionary, Utf8, Int8
-} from '../Arrow';
+} from 'apache-arrow';
 import { arange } from './utils';
 
 const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
diff --git a/js/test/unit/table/assign-tests.ts b/js/test/unit/table/assign-tests.ts
index a9f76dde190..fa1dacbc638 100644
--- a/js/test/unit/table/assign-tests.ts
+++ b/js/test/unit/table/assign-tests.ts
@@ -23,7 +23,7 @@ import * as generate from '../../generate-test-data';
 import { validateTable } from '../generated-data-validators';
 import {
     Schema, Field, DataType, Int32, Float32, Utf8
-} from '../../Arrow';
+} from 'apache-arrow';
 
 const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
 const schema1             = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Utf8()]);
diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts
index 961f71476a2..5eb21176362 100644
--- a/js/test/unit/table/serialize-tests.ts
+++ b/js/test/unit/table/serialize-tests.ts
@@ -19,7 +19,7 @@ import '../../jest-extensions';
 import * as generate from '../../generate-test-data';
 import {
     Table, Schema, Field, DataType, Dictionary, Int32, Float32, Utf8, Null, Int32Vector
-} from '../../Arrow';
+} from 'apache-arrow';
 
 const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
 const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Dictionary(new Utf8(), new Int32())]);
diff --git a/js/test/unit/utils-tests.ts b/js/test/unit/utils-tests.ts
index 00553c4f1b4..985bec7aab4 100644
--- a/js/test/unit/utils-tests.ts
+++ b/js/test/unit/utils-tests.ts
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { isTypedArray } from '../../src/util/args';
-
+import { isTypedArray } from 'apache-arrow';
 
 describe('isTypedArray', () => {
     test('works for typed arrays', () => {
diff --git a/js/test/unit/vector/bool-vector-tests.ts b/js/test/unit/vector/bool-vector-tests.ts
index 1d59a3c975c..41c53da6075 100644
--- a/js/test/unit/vector/bool-vector-tests.ts
+++ b/js/test/unit/vector/bool-vector-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Data, Bool, Vector, BoolVector } from '../../Arrow';
+import { Data, Bool, Vector, BoolVector } from 'apache-arrow';
 
 const newBoolVector = (length: number, data: Uint8Array) => Vector.new(Data.Bool(new Bool(), 0, length, 0, null, data));
 
diff --git a/js/test/unit/vector/date-vector-tests.ts b/js/test/unit/vector/date-vector-tests.ts
index 4f41d4f8a05..4658633ba74 100644
--- a/js/test/unit/vector/date-vector-tests.ts
+++ b/js/test/unit/vector/date-vector-tests.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Table, DateDay, DateMillisecond } from '../../Arrow';
+import { Table, DateDay, DateMillisecond } from 'apache-arrow';
 
 describe(`DateVector`, () => {
     it('returns days since the epoch as correct JS Dates', () => {
diff --git a/js/test/unit/vector/numeric-vector-tests.ts b/js/test/unit/vector/numeric-vector-tests.ts
index 4c3ad3a46fe..41564716878 100644
--- a/js/test/unit/vector/numeric-vector-tests.ts
+++ b/js/test/unit/vector/numeric-vector-tests.ts
@@ -25,12 +25,12 @@ import {
     FloatVector, Float16Vector, Float32Vector, Float64Vector,
     IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector,
     Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 const { float64ToUint16, uint16ToFloat64 } = util;
-import { VectorType as V } from '../../../src/interfaces';
-import { TypedArray, TypedArrayConstructor } from '../../../src/interfaces';
-import { BigIntArray, BigIntArrayConstructor } from '../../../src/interfaces';
+import { VectorType as V } from 'apache-arrow/interfaces';
+import { TypedArray, TypedArrayConstructor } from 'apache-arrow/interfaces';
+import { BigIntArray, BigIntArrayConstructor } from 'apache-arrow/interfaces';
 
 const { joinUint8Arrays, BN } = util;
 const uint16ToFloat64Array = (b: ArrayBuffer) => new Float64Array([...new Uint16Array(b)].map(uint16ToFloat64));
diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts
index 91c402697f1..60bff94f8a1 100644
--- a/js/test/unit/vector/vector-tests.ts
+++ b/js/test/unit/vector/vector-tests.ts
@@ -18,7 +18,7 @@
 import {
     Int32, Dictionary, DateUnit, util,
     Data, Vector, Utf8Vector, DateVector, DictionaryVector,
-} from '../../Arrow';
+} from 'apache-arrow';
 
 describe(`DateVector`, () => {
     const extras = [
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
index e3339b01f9a..22b3e5ced05 100644
--- a/js/test/unit/visitor-tests.ts
+++ b/js/test/unit/visitor-tests.ts
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Field } from '../Arrow';
-import { Visitor } from '../Arrow';
+import { Field } from 'apache-arrow';
+import { Visitor } from 'apache-arrow';
 import {
     DataType, Dictionary,
     Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
@@ -27,7 +27,7 @@ import {
     Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
     Union, DenseUnion, SparseUnion,
-} from '../Arrow';
+} from 'apache-arrow';
 
 class BasicVisitor extends Visitor {
     public type: DataType | undefined;
diff --git a/js/tsconfig.json b/js/tsconfig.json
index d61218686d6..72351f25971 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -1,14 +1,19 @@
 {
   "extends": "./tsconfig/tsconfig.base.json",
   "formatCodeOptions": {
-    "tabSize": 4,
-    "indentSize": 4
+    "tabSize": 2,
+    "indentSize": 2
   },
   "compilerOptions": {
-    "target": "ESNEXT",
-    "module": "commonjs",
+    "target": "esnext",
+    "module": "es2020",
     "noEmit": true,
-    "esModuleInterop": true
+    "esModuleInterop": true,
+    "baseUrl": "./",
+    "paths": {
+      "apache-arrow": ["src/Arrow.node"],
+      "apache-arrow/*": ["src/*"]
+    }
   },
   "include": ["src/**/*.ts", "test/**/*.ts", "perf/**/*.ts"]
 }
diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json
index 1fbd3296bf6..8ee0d98f65b 100644
--- a/js/tsconfig/tsconfig.base.json
+++ b/js/tsconfig/tsconfig.base.json
@@ -3,6 +3,10 @@
   "include": ["../src/**/*.ts"],
   "compileOnSave": false,
   "compilerOptions": {
+    "baseUrl": "./",
+    "paths": {
+      "apache-arrow/*": ["src/*"]
+    },
 
     /* Basic stuff */
     "moduleResolution": "node",
@@ -10,6 +14,7 @@
 
     /* Control what is emitted */
     "declaration": true,
+    "declarationMap": true,
     "noEmitOnError": true,
     "removeComments": false,
     "noErrorTruncation": true,
@@ -40,6 +45,6 @@
     "allowUnreachableCode": false,
     "noStrictGenericChecks": false,
     "noFallthroughCasesInSwitch": true,
-    "forceConsistentCasingInFileNames": true,
+    "forceConsistentCasingInFileNames": true
   }
 }
diff --git a/js/tsconfig/tsconfig.bin.cjs.json b/js/tsconfig/tsconfig.bin.cjs.json
index 8a006490aef..e9671810a7b 100644
--- a/js/tsconfig/tsconfig.bin.cjs.json
+++ b/js/tsconfig/tsconfig.bin.cjs.json
@@ -1,11 +1,12 @@
 // Compiler configuration to build the ES5 CommonJS bin files
 {
-    "extends": "./tsconfig.base.json",
-    "exclude": ["../node_modules"],
-    "include": ["../src/bin/*.ts"],
-    "compilerOptions": {
-      "target": "esnext",
-      "module": "commonjs",
-      "declaration": false
-    }
+  "extends": "./tsconfig.base.json",
+  "exclude": ["../node_modules"],
+  "include": ["../src/bin/*.ts"],
+  "compilerOptions": {
+    "target": "esnext",
+    "module": "commonjs",
+    "declaration": false,
+    "declarationMap": false
   }
+}
diff --git a/js/tsconfig/tsconfig.docs.json b/js/tsconfig/tsconfig.docs.json
index c73c307d598..b7b990ed8ec 100644
--- a/js/tsconfig/tsconfig.docs.json
+++ b/js/tsconfig/tsconfig.docs.json
@@ -1,8 +1,8 @@
 // Compiler configuration to build the docs
 {
-    "extends": "./tsconfig.base.json",
-    "include": ["../src/**/*.ts"],
-    "compilerOptions": {
-      "target": "ESNEXT"
-    }
+  "extends": "./tsconfig.base.json",
+  "include": ["../src/**/*.ts"],
+  "compilerOptions": {
+    "target": "ESNEXT"
   }
+}
diff --git a/js/tsconfig/tsconfig.es2015.cls.json b/js/tsconfig/tsconfig.es2015.cls.json
index fe2f0b4022a..7cc364b3658 100644
--- a/js/tsconfig/tsconfig.es2015.cls.json
+++ b/js/tsconfig/tsconfig.es2015.cls.json
@@ -5,6 +5,7 @@
     "target": "esnext",
     "module": "es2015",
     "declaration": false,
+    "declarationMap": false,
     "noEmitHelpers": true,
     "importHelpers": false
   }
diff --git a/js/tsconfig/tsconfig.es5.cls.json b/js/tsconfig/tsconfig.es5.cls.json
index 2c379b84759..a03808d365c 100644
--- a/js/tsconfig/tsconfig.es5.cls.json
+++ b/js/tsconfig/tsconfig.es5.cls.json
@@ -5,6 +5,7 @@
     "target": "esnext",
     "module": "es2015",
     "declaration": false,
+    "declarationMap": false,
     "noEmitHelpers": true,
     "importHelpers": false
   }
diff --git a/js/tsconfig/tsconfig.esnext.cls.json b/js/tsconfig/tsconfig.esnext.cls.json
index 176a72ba614..dc35c3f8837 100644
--- a/js/tsconfig/tsconfig.esnext.cls.json
+++ b/js/tsconfig/tsconfig.esnext.cls.json
@@ -5,6 +5,7 @@
     "target": "esnext",
     "module": "es2015",
     "declaration": false,
+    "declarationMap": false,
     "noEmitHelpers": true,
     "importHelpers": false
   }
diff --git a/js/yarn.lock b/js/yarn.lock
index 7b54725509b..3d88ffd759a 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1236,6 +1236,11 @@
     npmlog "^4.1.2"
     write-file-atomic "^3.0.3"
 
+"@mattiasbuelens/web-streams-adapter@0.1.0-alpha.5":
+  version "0.1.0-alpha.5"
+  resolved "https://registry.yarnpkg.com/@mattiasbuelens/web-streams-adapter/-/web-streams-adapter-0.1.0-alpha.5.tgz#091a6256fdada3d53dc0a70501bcc6f3a46add05"
+  integrity sha512-OIfunNt/fTjIgDtUqXhBYOKtgaxm30ZWkMWegI9iS3xUHy2/A3AXki6/k+z40+BywNMi+spON/jSE0FF9WmUKA==
+
 "@mrmlnc/readdir-enhanced@^2.2.1":
   version "2.2.1"
   resolved "https://registry.yarnpkg.com/@mrmlnc/readdir-enhanced/-/readdir-enhanced-2.2.1.tgz#524af240d1a360527b730475ecfa1344aa540dde"
@@ -1434,6 +1439,14 @@
   dependencies:
     "@octokit/openapi-types" "^7.2.3"
 
+"@openpgp/web-stream-tools@0.0.5":
+  version "0.0.5"
+  resolved "https://registry.yarnpkg.com/@openpgp/web-stream-tools/-/web-stream-tools-0.0.5.tgz#f78d73400be010dca940ec09642aaf8c35edf56d"
+  integrity sha512-tdUCdiMi5ogmZlAbR4cQXZDbK34QB8iEnJ434m9bj4P7sxvKg2KKKbEiB4EQb2AWhj/SNKcoNUHhT9WxTqKimQ==
+  dependencies:
+    "@mattiasbuelens/web-streams-adapter" "0.1.0-alpha.5"
+    web-streams-polyfill "~2.1.1"
+
 "@sinonjs/commons@^1.7.0":
   version "1.8.3"
   resolved "https://registry.yarnpkg.com/@sinonjs/commons/-/commons-1.8.3.tgz#3802ddd21a50a949b6721ddd72da36e67e7f1b2d"
@@ -1568,13 +1581,6 @@
   resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
   integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
 
-"@types/multistream@2.1.1":
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/@types/multistream/-/multistream-2.1.1.tgz#4badd2440ee3570594ea552420fe2e29ebe512bd"
-  integrity sha512-PqavtNFnMyXRZS5vuW16wMOKeJUCD5PIGHdNBHzF5Urjncsij90hRQ82Wcy9+uSdnmrR2Gfao6xoJVq1wAWzbA==
-  dependencies:
-    "@types/node" "*"
-
 "@types/node@*", "@types/node@^15.6.1":
   version "15.9.0"
   resolved "https://registry.yarnpkg.com/@types/node/-/node-15.9.0.tgz#0b7f6c33ca5618fe329a9d832b478b4964d325a8"
@@ -1600,6 +1606,11 @@
   resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.2.3.tgz#ef65165aea2924c9359205bf748865b8881753c0"
   integrity sha512-PijRCG/K3s3w1We6ynUKdxEc5AcuuH3NBmMDP8uvKVp6X43UY7NQlTzczakXP3DJR0F4dfNQIGjU2cUeRYs2AA==
 
+"@types/randomatic@3.1.2":
+  version "3.1.2"
+  resolved "https://registry.yarnpkg.com/@types/randomatic/-/randomatic-3.1.2.tgz#3485f0e113bf47fe25fee62fc20ca27713642975"
+  integrity sha512-lLsR0U1lUTjJ8vy1r3VGWlgprGtB/QPVwxs+QVSe28b0MS/7sR5tUfvhDd9XLV/AWc50OmDADAhzdqujavdykg==
+
 "@types/stack-utils@^2.0.0":
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
@@ -6484,7 +6495,7 @@ multimatch@^5.0.0:
 
 multistream@4.1.0:
   version "4.1.0"
-  resolved "https://registry.yarnpkg.com/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8"
+  resolved "https://registry.npmjs.org/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8"
   integrity sha512-J1XDiAmmNpRCBfIWJv+n0ymC4ABcf/Pl+5YvC5B/D2f/2+8PtHvCNxMPKiQcZyi922Hq69J2YOpb1pTywfifyw==
   dependencies:
     once "^1.4.0"
@@ -8787,10 +8798,10 @@ trim-off-newlines@^1.0.0:
   resolved "https://registry.yarnpkg.com/trim-off-newlines/-/trim-off-newlines-1.0.1.tgz#9f9ba9d9efa8764c387698bcbfeb2c848f11adb3"
   integrity sha1-n5up2e+odkw4dpi8v+sshI8RrbM=
 
-ts-jest@27.0.0:
-  version "27.0.0"
-  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-27.0.0.tgz#b94dbad8f39276b583edc7712e6b3c29e16c8863"
-  integrity sha512-YhuEjDZz9ZjxKbUlgT3XtJb9lyditEjctlo1nLcn983my3Xz4BE3c2ogHhonmGlAdUUiGlz/Dq2KOMXmf1WHfA==
+ts-jest@27.0.3:
+  version "27.0.3"
+  resolved "https://registry.npmjs.org/ts-jest/-/ts-jest-27.0.3.tgz#808492f022296cde19390bb6ad627c8126bf93f8"
+  integrity sha512-U5rdMjnYam9Ucw+h0QvtNDbc5+88nxt7tbIvqaZUhFrfG4+SkWhMXjejCLVGcpILTPuV+H3W/GZDZrnZFpPeXw==
   dependencies:
     bs-logger "0.x"
     buffer-from "1.x"
@@ -8824,10 +8835,10 @@ tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
   integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
 
-tslib@^2.2.0:
-  version "2.2.0"
-  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.2.0.tgz#fb2c475977e35e241311ede2693cee1ec6698f5c"
-  integrity sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==
+tslib@^2.3.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.0.tgz#803b8cdab3e12ba581a4ca41c8839bbb0dacb09e"
+  integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==
 
 tsutils@^3.17.1, tsutils@^3.21.0:
   version "3.21.0"
@@ -9249,16 +9260,16 @@ wcwidth@^1.0.0:
   dependencies:
     defaults "^1.0.3"
 
-web-stream-tools@0.0.1:
-  version "0.0.1"
-  resolved "https://registry.yarnpkg.com/web-stream-tools/-/web-stream-tools-0.0.1.tgz#6d2c06a6f5f46eab5e73d82285bae3c9b5ee71a0"
-  integrity sha512-MZUYhvTAMMy1u07OJL2pyp/tdrIu15fRJlGgnfvCQVXBS4cBNbIV1+6veYfVhTfnq0ZLispgx4nv17QxpuX+6w==
-
 web-streams-polyfill@3.0.3:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.0.3.tgz#f49e487eedeca47a207c1aee41ee5578f884b42f"
   integrity sha512-d2H/t0eqRNM4w2WvmTdoeIvzAUSpK7JmATB8Nr2lb7nQ9BTIJVjbQ/TRFVEh2gUH1HwclPdoPtfMoFfetXaZnA==
 
+web-streams-polyfill@~2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-2.1.1.tgz#2c82b6193849ccb9efaa267772c28260ef68d6d2"
+  integrity sha512-dlNpL2aab3g8CKfGz6rl8FNmGaRWLLn2g/DtSc9IjB30mEdE6XxzPfPSig5BwGSzI+oLxHyETrQGKjrVVhbLCg==
+
 webidl-conversions@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"

From 090e2cf3377e7975fa253664296b92ea873f08fb Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 12 Jul 2021 16:46:20 -0400
Subject: [PATCH 542/719] ARROW-13289: [C++] Accept integer args in trig/log
 functions via promotion to double

Instead of adding/generating separate kernels for integers, just promote the arguments instead.

Closes #10686 from lidavidm/arrow-13289

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../compute/kernels/scalar_arithmetic.cc      | 40 +++++++-
 .../compute/kernels/scalar_arithmetic_test.cc | 94 ++++++++++++++++++-
 2 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index f0eabf1b40e..db73294e1fa 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -1076,6 +1076,37 @@ struct ArithmeticFunction : ScalarFunction {
   }
 };
 
+/// An ArithmeticFunction that promotes integer arguments to double.
+struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
+  using ArithmeticFunction::ArithmeticFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+    RETURN_NOT_OK(CheckDecimals(values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    if (values->size() == 2) {
+      ReplaceNullWithOtherType(values);
+    }
+
+    for (auto& descr : *values) {
+      if (is_integer(descr.type->id())) {
+        descr.type = float64();
+      }
+    }
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
                                                        const FunctionDoc* doc) {
@@ -1164,7 +1195,8 @@ std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name,
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPoint(
     std::string name, const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc);
   for (const auto& ty : FloatingPointTypes()) {
     auto output = is_integer(ty->id()) ? float64() : ty;
     auto exec = GenerateArithmeticFloatingPoint<ScalarUnary, Op>(ty);
@@ -1176,7 +1208,8 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPoint(
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPointNotNull(
     std::string name, const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Unary(), doc);
   for (const auto& ty : FloatingPointTypes()) {
     auto output = is_integer(ty->id()) ? float64() : ty;
     auto exec = GenerateArithmeticFloatingPoint<ScalarUnaryNotNull, Op>(ty);
@@ -1188,7 +1221,8 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionFloatingPointNotNull(
 template <typename Op>
 std::shared_ptr<ScalarFunction> MakeArithmeticFunctionFloatingPoint(
     std::string name, const FunctionDoc* doc) {
-  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Binary(), doc);
+  auto func =
+      std::make_shared<ArithmeticFloatingPointFunction>(name, Arity::Binary(), doc);
   for (const auto& ty : FloatingPointTypes()) {
     auto output = is_integer(ty->id()) ? float64() : ty;
     auto exec = GenerateArithmeticFloatingPoint<ScalarBinaryEqualTypes, Op>(ty);
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 877b6f31160..e37fb93fac2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -1042,6 +1042,26 @@ TEST(TestUnaryArithmetic, DispatchBest) {
   for (std::string name : {"negate", "negate_checked", "abs", "abs_checked"}) {
     CheckDispatchFails(name, {null()});
   }
+
+  for (std::string name :
+       {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+
+      CheckDispatchBest(name, {int32()}, {float64()});
+      CheckDispatchBest(name, {uint8()}, {float64()});
+
+      CheckDispatchBest(name, {dictionary(int8(), int64())}, {float64()});
+    }
+  }
+
+  CheckDispatchBest("atan", {int32()}, {float64()});
+  CheckDispatchBest("atan2", {int32(), float64()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {int32(), uint8()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {int32(), null()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {float32(), float64()}, {float64(), float64()});
+  // Integer always promotes to double
+  CheckDispatchBest("atan2", {float32(), int8()}, {float64(), float64()});
 }
 
 TYPED_TEST(TestUnaryArithmeticSigned, Negate) {
@@ -1821,9 +1841,41 @@ TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) {
                               -M_PI_2, 0, M_PI));
 }
 
+TYPED_TEST(TestUnaryArithmeticIntegral, Trig) {
+  // Integer arguments promoted to double, sanity check here
+  auto ty = this->type_singleton();
+  auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Atan(arg, ctx);
+  };
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Sin, ArrayFromJSON(ty, "[0, 1]"),
+                        ArrayFromJSON(float64(), "[0, 0.8414709848078965]"));
+    this->AssertUnaryOp(Cos, ArrayFromJSON(ty, "[0, 1]"),
+                        ArrayFromJSON(float64(), "[1, 0.5403023058681398]"));
+    this->AssertUnaryOp(Tan, ArrayFromJSON(ty, "[0, 1]"),
+                        ArrayFromJSON(float64(), "[0, 1.5574077246549023]"));
+    this->AssertUnaryOp(Asin, ArrayFromJSON(ty, "[0, 1]"),
+                        ArrayFromJSON(float64(), MakeArray(0, M_PI_2)));
+    this->AssertUnaryOp(Acos, ArrayFromJSON(ty, "[0, 1]"),
+                        ArrayFromJSON(float64(), MakeArray(M_PI_2, 0)));
+    this->AssertUnaryOp(atan, ArrayFromJSON(ty, "[0, 1]"),
+                        ArrayFromJSON(float64(), MakeArray(0, M_PI_4)));
+  }
+}
+
+TYPED_TEST(TestBinaryArithmeticIntegral, Trig) {
+  // Integer arguments promoted to double, sanity check here
+  auto ty = this->type_singleton();
+  auto atan2 = [](const Datum& y, const Datum& x, ArithmeticOptions, ExecContext* ctx) {
+    return Atan2(y, x, ctx);
+  };
+  this->AssertBinop(atan2, ArrayFromJSON(ty, "[0, 1]"), ArrayFromJSON(ty, "[1, 0]"),
+                    ArrayFromJSON(float64(), MakeArray(0, M_PI_2)));
+}
+
 TYPED_TEST(TestUnaryArithmeticFloating, Log) {
   using CType = typename TestFixture::CType;
-  auto ty = this->type_singleton();
   this->SetNansEqual(true);
   auto min_val = std::numeric_limits<CType>::min();
   auto max_val = std::numeric_limits<CType>::max();
@@ -1881,5 +1933,45 @@ TYPED_TEST(TestUnaryArithmeticFloating, Log) {
                                   Log1p(lowest_val, this->options_));
 }
 
+TYPED_TEST(TestUnaryArithmeticIntegral, Log) {
+  // Integer arguments promoted to double, sanity check here
+  auto ty = this->type_singleton();
+  for (auto check_overflow : {false, true}) {
+    this->SetOverflowCheck(check_overflow);
+    this->AssertUnaryOp(Ln, ArrayFromJSON(ty, "[1, null]"),
+                        ArrayFromJSON(float64(), "[0, null]"));
+    this->AssertUnaryOp(Log10, ArrayFromJSON(ty, "[1, 10, null]"),
+                        ArrayFromJSON(float64(), "[0, 1, null]"));
+    this->AssertUnaryOp(Log2, ArrayFromJSON(ty, "[1, 2, null]"),
+                        ArrayFromJSON(float64(), "[0, 1, null]"));
+    this->AssertUnaryOp(Log1p, ArrayFromJSON(ty, "[0, null]"),
+                        ArrayFromJSON(float64(), "[0, null]"));
+  }
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Log) {
+  // Integer arguments promoted to double, sanity check here
+  auto ty = this->type_singleton();
+  this->SetNansEqual(true);
+  this->SetOverflowCheck(false);
+  this->AssertUnaryOp(Ln, ArrayFromJSON(ty, "[-1, 0]"),
+                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log10, ArrayFromJSON(ty, "[-1, 0]"),
+                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log2, ArrayFromJSON(ty, "[-1, 0]"),
+                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log1p, ArrayFromJSON(ty, "[-2, -1]"),
+                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->SetOverflowCheck(true);
+  this->AssertUnaryOpRaises(Ln, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Ln, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log10, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log10, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log2, "[0]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log2, "[-1]", "logarithm of negative number");
+  this->AssertUnaryOpRaises(Log1p, "[-1]", "logarithm of zero");
+  this->AssertUnaryOpRaises(Log1p, "[-2]", "logarithm of negative number");
+}
+
 }  // namespace compute
 }  // namespace arrow

From ca50cd1243a83bc784bdf26ff2c0b85bfe9dd916 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 13 Jul 2021 08:40:41 -0400
Subject: [PATCH 543/719] ARROW-12762: [Python] Preserve field name when
 pickling list types

Closes #10704 from amol-/ARROW-12762

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/tests/test_schema.py | 4 +++-
 python/pyarrow/types.pxi            | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index e87f620070d..7ae7dd8099e 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -604,7 +604,9 @@ def test_type_schema_pickling():
         pa.timestamp('ns'),
         pa.decimal128(12, 2),
         pa.decimal256(76, 38),
-        pa.field('a', 'string', metadata={b'foo': b'bar'})
+        pa.field('a', 'string', metadata={b'foo': b'bar'}),
+        pa.list_(pa.field("element", pa.int64())),
+        pa.large_list(pa.field("element", pa.int64()))
     ]
 
     for val in cases:
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 9cc49b3bfd5..9a4ebad2d00 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -277,7 +277,7 @@ cdef class ListType(DataType):
         self.list_type = <const CListType*> type.get()
 
     def __reduce__(self):
-        return list_, (self.value_type,)
+        return list_, (self.value_field,)
 
     @property
     def value_field(self):
@@ -302,7 +302,7 @@ cdef class LargeListType(DataType):
         self.list_type = <const CLargeListType*> type.get()
 
     def __reduce__(self):
-        return large_list, (self.value_type,)
+        return large_list, (self.value_field,)
 
     @property
     def value_field(self):

From 75a64751b70c6d8ea4211c25e3903faa214c442d Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Tue, 13 Jul 2021 10:33:45 -0400
Subject: [PATCH 544/719] ARROW-12994: [R] Fix tests that assume UTC local tz

Also fixes some test warnings. Still to-do: address the failing (now skipped) string reverse functions added in #10589.

Closes #10706 from nealrichardson/arrow-12994

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/tests/testthat/helper-expectation.R         | 70 +++++++++++------
 r/tests/testthat/test-compute-aggregate.R     | 45 ++++++-----
 r/tests/testthat/test-dplyr-filter.R          | 44 +++++------
 r/tests/testthat/test-dplyr-lubridate.R       | 38 +++++-----
 r/tests/testthat/test-dplyr-mutate.R          | 75 ++++++++-----------
 .../testthat/test-dplyr-string-functions.R    | 55 ++++++++------
 6 files changed, 176 insertions(+), 151 deletions(-)

diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index b815515a4fa..d173620398e 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -70,20 +70,39 @@ verify_output <- function(...) {
   testthat::verify_output(...)
 }
 
-expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its start
-                               tbl,  # A tbl/df as reference, will make RB/Table with
-                               skip_record_batch = NULL, # Msg, if should skip RB test
-                               skip_table = NULL,        # Msg, if should skip Table test
+#' @param expr A dplyr pipeline with `input` as its start
+#' @param tbl A tbl/df as reference, will make RB/Table with
+#' @param skip_record_batch string skip message, if should skip RB test
+#' @param skip_table string skip message, if should skip Table test
+#' @param warning string expected warning from the RecordBatch and Table paths,
+#'   passed to `expect_warning()`. Special values:
+#'     * `NA` (the default) for ensuring no warning message
+#'     * `TRUE` is a special case to mean to check for the
+#'      "not supported in Arrow; pulling data into R" message.
+#' @param ... additional arguments, passed to `expect_equivalent()`
+expect_dplyr_equal <- function(expr,
+                               tbl,
+                               skip_record_batch = NULL,
+                               skip_table = NULL,
+                               warning = NA,
                                ...) {
   expr <- rlang::enquo(expr)
   expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl)))
 
+  if (isTRUE(warning)) {
+    # Special-case the simple warning:
+    warning <- "not supported in Arrow; pulling data into R"
+  }
+
   skip_msg <- NULL
 
   if (is.null(skip_record_batch)) {
-    via_batch <- rlang::eval_tidy(
-      expr,
-      rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+    expect_warning(
+      via_batch <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = record_batch(tbl)))
+      ),
+      warning
     )
     expect_equivalent(via_batch, expected, ...)
   } else {
@@ -91,9 +110,12 @@ expect_dplyr_equal <- function(expr, # A dplyr pipeline with `input` as its star
   }
 
   if (is.null(skip_table)) {
-    via_table <- rlang::eval_tidy(
-      expr,
-      rlang::new_data_mask(rlang::env(input = Table$create(tbl)))
+    expect_warning(
+      via_table <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(input = Table$create(tbl)))
+      ),
+      warning
     )
     expect_equivalent(via_table, expected, ...)
   } else {
@@ -110,7 +132,7 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
                                ...) {
   # ensure we have supplied tbl
   force(tbl)
-  
+
   expr <- rlang::enquo(expr)
   msg <- tryCatch(
     rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))),
@@ -126,7 +148,7 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
       # but what we really care about is the `x` block
       # so (temporarily) let's pull those blocks out when we find them
       pattern <- i18ize_error_messages()
-      
+
       if (grepl(pattern, msg)) {
         msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
       }
@@ -179,7 +201,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
   if (is.null(skip_chunked_array)) {
     # split input vector into two to exercise ChunkedArray with >1 chunk
     split_vector <- split_vector_as_list(vec)
-    
+
     via_chunked <- rlang::eval_tidy(
       expr,
       rlang::new_data_mask(rlang::env(input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
@@ -199,29 +221,29 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
                                 skip_array = NULL, # Msg, if should skip Array test
                                 skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
                                 ...) {
-  
+
   expr <- rlang::enquo(expr)
-  
+
   msg <- tryCatch(
     rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))),
     error = function (e) {
       msg <- conditionMessage(e)
-      
+
       pattern <- i18ize_error_messages()
-      
+
       if (grepl(pattern, msg)) {
         msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
       }
       msg
     }
   )
-  
+
   expect_true(identical(typeof(msg), "character"), label = "vector errored")
-  
+
   skip_msg <- NULL
-  
+
   if (is.null(skip_array)) {
-    
+
     expect_error(
       rlang::eval_tidy(
         expr,
@@ -233,11 +255,11 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
   } else {
     skip_msg <- c(skip_msg, skip_array)
   }
-  
+
   if (is.null(skip_chunked_array)) {
     # split input vector into two to exercise ChunkedArray with >1 chunk
     split_vector <- split_vector_as_list(vec)
-    
+
     expect_error(
       rlang::eval_tidy(
         expr,
@@ -249,7 +271,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
   } else {
     skip_msg <- c(skip_msg, skip_chunked_array)
   }
-  
+
   if (!is.null(skip_msg)) {
     skip(paste(skip_msg, collpase = "\n"))
   }
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 25bdddef689..41418014bea 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -209,8 +209,15 @@ test_that("Edge cases", {
   for (type in c(int32(), float64(), bool())) {
     expect_equal(as.vector(sum(a$cast(type), na.rm = TRUE)), sum(NA, na.rm = TRUE))
     expect_equal(as.vector(mean(a$cast(type), na.rm = TRUE)), mean(NA, na.rm = TRUE))
-    expect_equal(as.vector(min(a$cast(type), na.rm = TRUE)), min(NA, na.rm = TRUE))
-    expect_equal(as.vector(max(a$cast(type), na.rm = TRUE)), max(NA, na.rm = TRUE))
+    expect_equal(
+      as.vector(min(a$cast(type), na.rm = TRUE)),
+      # Suppress the base R warning about no non-missing arguments
+      suppressWarnings(min(NA, na.rm = TRUE))
+    )
+    expect_equal(
+      as.vector(max(a$cast(type), na.rm = TRUE)),
+      suppressWarnings(max(NA, na.rm = TRUE))
+    )
   }
 })
 
@@ -342,29 +349,29 @@ test_that("match_arrow", {
 
   ca <- ChunkedArray$create(c(1, 4, 3, 1, 1, 3, 4))
   expect_equal(match_arrow(ca, tab), ChunkedArray$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
-  
+
   sc <- Scalar$create(3)
   expect_equal(match_arrow(sc, tab), Scalar$create(1L))
-  
+
   vec <-  c(1,2)
   expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L)))
-  
+
 })
 
 test_that("is_in", {
   a <- Array$create(c(9, 4, 3))
   tab <- c(4, 3, 2, 1)
   expect_equal(is_in(a, tab), Array$create(c(FALSE, TRUE, TRUE)))
-  
+
   ca <- ChunkedArray$create(c(9, 4, 3))
   expect_equal(is_in(ca, tab), ChunkedArray$create(c(FALSE, TRUE, TRUE)))
-  
+
   sc <- Scalar$create(3)
   expect_equal(is_in(sc, tab), Scalar$create(TRUE))
-  
+
   vec <-  c(1,9)
   expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE)))
-  
+
 })
 
 test_that("value_counts", {
@@ -383,40 +390,40 @@ test_that("value_counts", {
 })
 
 test_that("any.Array and any.ChunkedArray", {
-  
+
   data <- c(1:10, NA, NA)
 
   expect_vector_equal(any(input > 5), data)
   expect_vector_equal(any(input < 1), data)
   expect_vector_equal(any(input < 1, na.rm = TRUE), data)
-  
+
   data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
-  
+
   expect_vector_equal(any(input), data_logical)
   expect_vector_equal(any(input, na.rm = TRUE), data_logical)
-  
+
 })
 
 test_that("all.Array and all.ChunkedArray", {
 
   data <- c(1:10, NA, NA)
-  
+
   expect_vector_equal(all(input > 5), data)
   expect_vector_equal(all(input < 11), data)
   expect_vector_equal(all(input < 11, na.rm = TRUE), data)
-  
+
   data_logical <- c(TRUE, TRUE, NA)
-  
+
   expect_vector_equal(all(input), data_logical)
   expect_vector_equal(all(input, na.rm = TRUE), data_logical)
-  
+
 })
 
 test_that("variance", {
   data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
   arr <- Array$create(data)
   chunked_arr <- ChunkedArray$create(data)
-  
+
   expect_equal(call_function("variance", arr, options = list(ddof = 5)), Scalar$create(34596))
   expect_equal(call_function("variance", chunked_arr, options = list(ddof = 5)), Scalar$create(34596))
 })
@@ -425,7 +432,7 @@ test_that("stddev", {
   data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
   arr <- Array$create(data)
   chunked_arr <- ChunkedArray$create(data)
-  
+
   expect_equal(call_function("stddev", arr, options = list(ddof = 5)), Scalar$create(186))
   expect_equal(call_function("stddev", chunked_arr, options = list(ddof = 5)), Scalar$create(186))
 })
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index 6bba58a7e06..f070a0150e9 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -315,31 +315,25 @@ test_that("Filtering on a column that doesn't exist errors correctly", {
 })
 
 test_that("Filtering with unsupported functions", {
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        filter(int > 2, pnorm(dbl) > .99) %>%
-        collect(),
-      tbl
-    ),
-    'Expression pnorm(dbl) > 0.99 not supported in Arrow; pulling data into R',
-    fixed = TRUE
-  )
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        filter(
-          nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
-          int > 2,                                         # good
-          pnorm(dbl) > .99                                 # bad, opaque
-        ) %>%
-        collect(),
-      tbl
-    ),
-'* In nchar(chr, type = "bytes", allowNA = TRUE) == 1, allowNA = TRUE not supported by Arrow
-* Expression pnorm(dbl) > 0.99 not supported in Arrow
-pulling data into R',
-    fixed = TRUE
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 2, pnorm(dbl) > .99) %>%
+      collect(),
+    tbl,
+    warning = 'Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R'
+  )
+  expect_dplyr_equal(
+    input %>%
+      filter(
+        nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
+        int > 2,                                         # good
+        pnorm(dbl) > .99                                 # bad, opaque
+      ) %>%
+      collect(),
+    tbl,
+    warning = '\\* In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1, allowNA = TRUE not supported by Arrow
+\\* Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow
+pulling data into R'
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
index 47bee2c28e5..d01afc86fef 100644
--- a/r/tests/testthat/test-dplyr-lubridate.R
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -20,15 +20,21 @@ skip_if_not_available("dataset")
 library(lubridate)
 library(dplyr)
 
+# base::strptime() defaults to local timezone
+# but arrow's strptime defaults to UTC.
+# So that tests are consistent, set the local timezone to UTC
+# TODO: consider reevaluating this workaround after ARROW-12980
+withr::local_timezone("UTC")
+
 test_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "")
 test_df <- tibble::tibble(date = test_date)
 
 # We can support this feature after ARROW-12980 is merged
-test_that("timezone aware timestamps are not supported",{
-  
-  tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "BST")
+test_that("timezone aware timestamps are not supported", {
+
+  tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "Asia/Pyongyang")
   tz_aware_df <- tibble::tibble(date = tz_aware_date)
-  
+
   expect_error(
     Table$create(tz_aware_df) %>%
       mutate(x = wday(date)) %>%
@@ -39,10 +45,10 @@ test_that("timezone aware timestamps are not supported",{
 
 # We can support this feature when ARROW-13138 is resolved
 test_that("date32 objects are not supported",{
-  
+
   date <- ymd("2017-01-01")
   df <- tibble::tibble(date = date)
-  
+
   expect_error(
     Table$create(df) %>%
       mutate(x = year(date)) %>%
@@ -70,7 +76,7 @@ test_that("extract isoyear from date", {
     test_df
   )
 })
-  
+
 test_that("extract quarter from date", {
   expect_dplyr_equal(
     input %>%
@@ -106,7 +112,6 @@ test_that("extract day from date", {
     test_df
   )
 })
-  
 
 test_that("extract wday from date", {
  expect_dplyr_equal(
@@ -115,21 +120,21 @@ test_that("extract wday from date", {
       collect(),
     test_df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = wday(date, week_start = 3)) %>%
       collect(),
     test_df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = wday(date, week_start = 1)) %>%
       collect(),
     test_df
   )
-  
+
   # We should be able to support the label argument after this ticket is resolved:
   # https://issues.apache.org/jira/browse/ARROW-13133
   x <- Expression$field_ref("x")
@@ -137,9 +142,9 @@ test_that("extract wday from date", {
     nse_funcs$wday(x, label = TRUE),
     "Label argument not supported by Arrow"
   )
-  
+
 })
-  
+
 test_that("extract yday from date", {
   expect_dplyr_equal(
     input %>%
@@ -148,7 +153,7 @@ test_that("extract yday from date", {
     test_df
   )
 })
-  
+
 test_that("extract hour from date", {
   expect_dplyr_equal(
     input %>%
@@ -157,7 +162,7 @@ test_that("extract hour from date", {
     test_df
   )
 })
-  
+
 test_that("extract minute from date", {
    expect_dplyr_equal(
     input %>%
@@ -166,7 +171,7 @@ test_that("extract minute from date", {
     test_df
   )
 })
-  
+
 test_that("extract second from date", {
   expect_dplyr_equal(
     input %>%
@@ -177,4 +182,3 @@ test_that("extract second from date", {
     tolerance = 1e-6
   )
 })
-
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 66cb9ff305d..3e64891cec5 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -154,20 +154,17 @@ test_that("nchar() arguments", {
     tbl
   )
   # This tests the whole abandon_ship() machinery
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(int, verses) %>%
-        mutate(
-          line_lengths = nchar(verses, type = "bytes", allowNA = TRUE),
-          longer = line_lengths * 10
-        ) %>%
-        filter(line_lengths > 15) %>%
-        collect(),
-      tbl
-    ),
-    'In nchar(verses, type = "bytes", allowNA = TRUE), allowNA = TRUE not supported by Arrow; pulling data into R',
-    fixed = TRUE
+  expect_dplyr_equal(
+    input %>%
+      select(int, verses) %>%
+      mutate(
+        line_lengths = nchar(verses, type = "bytes", allowNA = TRUE),
+        longer = line_lengths * 10
+      ) %>%
+      filter(line_lengths > 15) %>%
+      collect(),
+    tbl,
+    warning = 'In nchar\\(verses, type = "bytes", allowNA = TRUE\\), allowNA = TRUE not supported by Arrow; pulling data into R'
   )
 })
 
@@ -253,28 +250,24 @@ test_that("dplyr::mutate's examples", {
   # but warn that they're pulling data into R to do so
 
   # across + autosplicing: ARROW-11699
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(name, homeworld, species) %>%
-        mutate(across(!name, as.factor)) %>%
-        collect(),
-      starwars
-    ),
-    "Expression across.*not supported in Arrow"
+  expect_dplyr_equal(
+    input %>%
+      select(name, homeworld, species) %>%
+      mutate(across(!name, as.factor)) %>%
+      collect(),
+    starwars,
+    warning = "Expression across.*not supported in Arrow"
   )
 
   # group_by then mutate
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(name, mass, homeworld) %>%
-        group_by(homeworld) %>%
-        mutate(rank = min_rank(desc(mass))) %>%
-        collect(),
-      starwars
-    ),
-    "not supported in Arrow"
+  expect_dplyr_equal(
+    input %>%
+      select(name, mass, homeworld) %>%
+      group_by(homeworld) %>%
+      mutate(rank = min_rank(desc(mass))) %>%
+      collect(),
+    starwars,
+    warning = TRUE
   )
 
   # `.before` and `.after` experimental args: ARROW-11701
@@ -345,15 +338,13 @@ test_that("dplyr::mutate's examples", {
   # tibbles because the expressions are computed within groups.
   # The following normalises `mass` by the global average:
   # TODO(ARROW-11702)
-  expect_warning(
-    expect_dplyr_equal(
-      input %>%
-        select(name, mass, species) %>%
-        mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>%
-        collect(),
-      starwars
-    ),
-    "not supported in Arrow"
+  expect_dplyr_equal(
+    input %>%
+      select(name, mass, species) %>%
+      mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>%
+      collect(),
+    starwars,
+    warning = TRUE
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index 438f1038e57..e7b860eb99c 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -652,6 +652,11 @@ test_that("edge cases in string detection and replacement", {
 })
 
 test_that("strptime", {
+  # base::strptime() defaults to local timezone
+  # but arrow's strptime defaults to UTC.
+  # So that tests are consistent, set the local timezone to UTC
+  # TODO: consider reevaluating this workaround after ARROW-12980
+  withr::local_timezone("UTC")
 
   t_string <- tibble(x = c("2018-10-07 19:04:05", NA))
   t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA))
@@ -769,25 +774,28 @@ test_that("arrow_find_substring and arrow_find_substring_regex", {
 })
 
 test_that("stri_reverse and arrow_ascii_reverse functions", {
-  
+  # TODO: these actually aren't implemented (ARROW-12869)
+  # Fix them, then remove the `warning = TRUE` arguments
   df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
-  
+
   df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = stri_reverse(x)) %>%
       collect(),
-    df_utf8
+    df_utf8,
+    warning = TRUE # Remove me
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = stri_reverse(x)) %>%
       collect(),
-    df_ascii
+    df_ascii,
+    warning = TRUE # Remove me
   )
-  
+
   expect_equivalent(
     df_ascii %>%
       Table$create() %>%
@@ -795,7 +803,7 @@ test_that("stri_reverse and arrow_ascii_reverse functions", {
       collect(),
     tibble(x = c("rab dna\nooF", "xuuq dna xuq dna\tzab"))
   )
-  
+
   expect_error(
     df_utf8 %>%
       Table$create() %>%
@@ -806,12 +814,12 @@ test_that("stri_reverse and arrow_ascii_reverse functions", {
 })
 
 test_that("str_like", {
-  
+
   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
-  
+
   # TODO: After new version of stringr with str_like has been released, update all
   # these tests to use expect_dplyr_equal
-  
+
   # No match - entire string
   expect_equivalent(
     df %>%
@@ -820,7 +828,7 @@ test_that("str_like", {
       collect(),
     tibble(x = c(FALSE, FALSE))
   )
-  
+
   # Match - entire string
   expect_equivalent(
     df %>%
@@ -829,7 +837,7 @@ test_that("str_like", {
       collect(),
     tibble(x = c(TRUE, FALSE))
   )
-  
+
   # Wildcard
   expect_equivalent(
     df %>%
@@ -838,7 +846,7 @@ test_that("str_like", {
       collect(),
     tibble(x = c(TRUE, FALSE))
   )
-  
+
   # Ignore case
   expect_equivalent(
     df %>%
@@ -847,7 +855,7 @@ test_that("str_like", {
       collect(),
     tibble(x = c(FALSE, FALSE))
   )
-  
+
   # Single character
   expect_equivalent(
     df %>%
@@ -856,9 +864,9 @@ test_that("str_like", {
       collect(),
     tibble(x = c(FALSE, TRUE))
   )
-  
+
   # This will give an error until a new version of stringr with str_like has been released
-  skip("Test will fail until stringr > 1.4.0 is release")
+  skip_if_not(packageVersion("stringr") > "1.4.0")
   expect_dplyr_equal(
     input %>%
       mutate(x = str_like(x, "%baz%")) %>%
@@ -868,42 +876,41 @@ test_that("str_like", {
 })
 
 test_that("str_pad", {
-  
   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = str_pad(x, width = 31)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = str_pad(x, width = 30, side = "right")) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(x = str_pad(x, width = 31, side = "both")) %>%
       collect(),
     df
   )
-  
+
 })

From afdb8da6c98da40db9fe7260ad631c165863844c Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Tue, 13 Jul 2021 08:48:04 -0700
Subject: [PATCH 545/719] ARROW-12851: [Go][Parquet] Add Golang Parquet
 encoding package

Adding the implementation of encoding types of data for Parquet including Plain, RLE, Dictionary, Delta Byte Array, Delta Packing types. It also includes hashing implementation for more efficient hash tables than using go's std map implementation as shown in the benchmarks included in the test files which do benchmark comparisons between a go-map based implementation and the hash table implementation that I ported from the C++

In addition, while adding some test cases I discovered that apparently the -force-vector-width=32 argument on the asm generation was causing segfaults on the encoding tests, so let's let LLVM make it's own choice about the vector width and interleaving.

Closes #10379 from zeroshade/parquet-encoding

Lead-authored-by: Matthew Topol <mtopol@factset.com>
Co-authored-by: Matt Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 go/parquet/doc.go                             |    4 +-
 go/parquet/go.mod                             |    6 +-
 go/parquet/go.sum                             |   18 +-
 .../internal/encoding/boolean_decoder.go      |  101 +
 .../internal/encoding/boolean_encoder.go      |   84 +
 .../internal/encoding/byte_array_decoder.go   |   88 +
 .../internal/encoding/byte_array_encoder.go   |  123 +
 go/parquet/internal/encoding/decoder.go       |  186 +
 .../internal/encoding/delta_bit_packing.go    |  520 ++
 .../internal/encoding/delta_byte_array.go     |  216 +
 .../encoding/delta_length_byte_array.go       |  144 +
 go/parquet/internal/encoding/encoder.go       |  311 +
 .../encoding/fixed_len_byte_array_decoder.go  |   66 +
 .../encoding/fixed_len_byte_array_encoder.go  |  104 +
 .../internal/encoding/physical_types.tmpldata |   52 +
 .../encoding/plain_encoder_types.gen.go       |  639 ++
 .../encoding/plain_encoder_types.gen.go.tmpl  |  182 +
 .../internal/encoding/typed_encoder.gen.go    | 1443 ++++
 .../encoding/typed_encoder.gen.go.tmpl        |  341 +
 go/parquet/internal/encoding/types.go         |  497 ++
 go/parquet/internal/testutils/utils.go        |   42 +
 go/parquet/internal/utils/Makefile            |    4 +-
 .../internal/utils/_lib/bit_packing_avx2.s    |    2 +-
 go/parquet/internal/utils/_lib/min_max_avx2.s | 1519 +---
 go/parquet/internal/utils/_lib/min_max_sse4.s |    2 +-
 .../internal/utils/_lib/unpack_bool_avx2.s    | 6361 +--------------
 .../internal/utils/_lib/unpack_bool_sse4.s    |    2 +-
 go/parquet/internal/utils/bitmap_writer.go    |   33 +-
 .../internal/utils/min_max_avx2_amd64.s       | 1567 +---
 .../internal/utils/unpack_bool_avx2_amd64.s   | 7023 +----------------
 30 files changed, 5998 insertions(+), 15682 deletions(-)
 create mode 100644 go/parquet/internal/encoding/boolean_decoder.go
 create mode 100644 go/parquet/internal/encoding/boolean_encoder.go
 create mode 100644 go/parquet/internal/encoding/byte_array_decoder.go
 create mode 100644 go/parquet/internal/encoding/byte_array_encoder.go
 create mode 100644 go/parquet/internal/encoding/decoder.go
 create mode 100644 go/parquet/internal/encoding/delta_bit_packing.go
 create mode 100644 go/parquet/internal/encoding/delta_byte_array.go
 create mode 100644 go/parquet/internal/encoding/delta_length_byte_array.go
 create mode 100644 go/parquet/internal/encoding/encoder.go
 create mode 100644 go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
 create mode 100644 go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
 create mode 100644 go/parquet/internal/encoding/physical_types.tmpldata
 create mode 100644 go/parquet/internal/encoding/plain_encoder_types.gen.go
 create mode 100644 go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
 create mode 100644 go/parquet/internal/encoding/typed_encoder.gen.go
 create mode 100644 go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
 create mode 100644 go/parquet/internal/encoding/types.go
 create mode 100644 go/parquet/internal/testutils/utils.go

diff --git a/go/parquet/doc.go b/go/parquet/doc.go
index cf87b81826e..87a592836a9 100644
--- a/go/parquet/doc.go
+++ b/go/parquet/doc.go
@@ -29,8 +29,8 @@
 //   go get -u github.com/apache/arrow/go/parquet
 //
 // In addition, two cli utilities are provided:
-// 	go install github.factset.com/mtopol/parquet-go/cmd/parquet_reader
-// 	go install github.factset.com/mtopol/parquet-go/cmd/parquet_schema
+// 	go install github.com/apache/arrow/go/parquet/cmd/parquet_reader
+// 	go install github.com/apache/arrow/go/parquet/cmd/parquet_schema
 //
 // Modules
 //
diff --git a/go/parquet/go.mod b/go/parquet/go.mod
index 9c415931191..cf2be66aba0 100644
--- a/go/parquet/go.mod
+++ b/go/parquet/go.mod
@@ -19,15 +19,17 @@ module github.com/apache/arrow/go/parquet
 go 1.15
 
 require (
+	github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216
 	github.com/andybalholm/brotli v1.0.1
-	github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa
+	github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677
 	github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4
 	github.com/golang/snappy v0.0.3
 	github.com/klauspost/asmfmt v1.2.3
-	github.com/klauspost/compress v1.11.12
+	github.com/klauspost/compress v1.12.2
 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
 	github.com/stretchr/testify v1.7.0
+	github.com/zeebo/xxh3 v0.10.0
 	golang.org/x/exp v0.0.0-20210220032938-85be41e4509f
 	golang.org/x/sys v0.0.0-20210309074719-68d13333faf2
 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
diff --git a/go/parquet/go.sum b/go/parquet/go.sum
index be02835cc89..bebc1ff48a1 100644
--- a/go/parquet/go.sum
+++ b/go/parquet/go.sum
@@ -2,11 +2,13 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216 h1:2ZboyJ8vl75fGesnG9NpMTD2DyQI3FzMXy4x752rGF0=
+github.com/JohnCGriffin/overflow v0.0.0-20170615021017-4d914c927216/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk=
 github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
 github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
 github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
-github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa h1:0Bhiab9ep1wmbD1Lm17uqPkzgYhcBIZf1CsvrMhFMGI=
-github.com/apache/arrow/go/arrow v0.0.0-20210310173904-5de02e3697aa/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ=
+github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677 h1:F7HiqIf4aBsF4YUBcLolXZ8duSEideNnZnr3lBGa2sA=
+github.com/apache/arrow/go/arrow v0.0.0-20210520144409-d07f30ada677/go.mod h1:R4hW3Ug0s+n4CUsWHKOj00Pu01ZqU4x/hSF5kXUcXKQ=
 github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4 h1:orNYqmQGnSjgOauLWjHEp9/qIDT98xv/0Aa4Zet3/Y8=
 github.com/apache/thrift/lib/go/thrift v0.0.0-20210120171102-e27e82c46ba4/go.mod h1:V/LzksIyqd3KZuQ2SunvReTG/UkArhII1dAWY5U1sCE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
@@ -42,15 +44,19 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
 github.com/klauspost/asmfmt v1.2.3 h1:qEM7SLDo6DXXXz5yTpqUoxhsrtwH30nNR2riO2ZjznY=
 github.com/klauspost/asmfmt v1.2.3/go.mod h1:RAoUvqkWr2rUa2I19qKMEVZQe4BVtcHGTMCUOcCU2Lg=
-github.com/klauspost/compress v1.11.12 h1:famVnQVu7QwryBN4jNseQdUKES71ZAOnB6UQQJPZvqk=
-github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
+github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
+github.com/klauspost/compress v1.12.2 h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8=
+github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE=
+github.com/pierrec/lz4/v4 v4.1.4 h1:PjkB+qEooc9nw4F6Pxe/e0xaRdWz3suItXWxWqAO1QE=
+github.com/pierrec/lz4/v4 v4.1.4/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@@ -58,6 +64,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/zeebo/xxh3 v0.10.0 h1:1+2Mov9zfxTNUeoDG9k9i13VfxTR0p1JQu8L0vikxB0=
+github.com/zeebo/xxh3 v0.10.0/go.mod h1:AQY73TOrhF3jNsdiM9zZOb8MThrYbZONHj7ryDBaLpg=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@@ -97,6 +105,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200727154430-2d971f7391a4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY=
 golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -135,7 +144,6 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
 google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc/cmd/protoc-gen-go-grpc v0.0.0-20200910201057-6591123024b3/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go
new file mode 100644
index 00000000000..a33b21a3181
--- /dev/null
+++ b/go/parquet/internal/encoding/boolean_decoder.go
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// PlainBooleanDecoder is for the Plain Encoding type, there is no
+// dictionary decoding for bools.
+type PlainBooleanDecoder struct {
+	decoder
+
+	bitOffset int
+}
+
+// Type for the PlainBooleanDecoder is parquet.Types.Boolean
+func (PlainBooleanDecoder) Type() parquet.Type {
+	return parquet.Types.Boolean
+}
+
+// Decode fills out with bools decoded from the data at the current point
+// or until we reach the end of the data.
+//
+// Returns the number of values decoded
+func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+
+	unalignedExtract := func(start, end, curBitOffset int) int {
+		i := start
+		for ; curBitOffset < end; i, curBitOffset = i+1, curBitOffset+1 {
+			out[i] = (dec.data[0] & byte(1<<curBitOffset)) != 0
+		}
+		return i // return the number of bits we extracted
+	}
+
+	// if we aren't at a byte boundary, then get bools until we hit
+	// a byte boundary with the bit offset.
+	i := 0
+	if dec.bitOffset != 0 {
+		i = unalignedExtract(0, 8, dec.bitOffset)
+		dec.bitOffset = 0
+	}
+
+	// determine the number of full bytes worth of bits we can decode
+	// given the number of values we want to decode.
+	bitsRemain := max - i
+	batch := bitsRemain / 8 * 8
+	if batch > 0 { // only go in here if there's at least one full byte to decode
+		if i > 0 { // skip our data forward if we decoded anything above
+			dec.data = dec.data[1:]
+			out = out[i:]
+		}
+		// determine the number of aligned bytes we can grab using SIMD optimized
+		// functions to improve performance.
+		alignedBytes := bitutil.BytesForBits(int64(batch))
+		utils.BytesToBools(dec.data[:alignedBytes], out)
+		dec.data = dec.data[alignedBytes:]
+		out = out[alignedBytes*8:]
+	}
+
+	// grab any trailing bits now that we've got our aligned bytes.
+	dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset)
+
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is like Decode except it expands the values to leave spaces for null
+// as determined by the validBits bitmap.
+func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	if nullCount > 0 {
+		toRead := len(out) - nullCount
+		valuesRead, err := dec.Decode(out[:toRead])
+		if err != nil {
+			return 0, err
+		}
+		if valuesRead != toRead {
+			return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match")
+		}
+		return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+	}
+	return dec.Decode(out)
+}
diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go
new file mode 100644
index 00000000000..fc9cd2728ac
--- /dev/null
+++ b/go/parquet/internal/encoding/boolean_encoder.go
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+const (
+	boolBufSize = 1024
+	boolsInBuf  = boolBufSize * 8
+)
+
+// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
+type PlainBooleanEncoder struct {
+	encoder
+	bitsBuffer []byte
+	wr         utils.BitmapWriter
+}
+
+// Type for the PlainBooleanEncoder is parquet.Types.Boolean
+func (PlainBooleanEncoder) Type() parquet.Type {
+	return parquet.Types.Boolean
+}
+
+// Put encodes the contents of in into the underlying data buffer.
+func (enc *PlainBooleanEncoder) Put(in []bool) {
+	if enc.bitsBuffer == nil {
+		enc.bitsBuffer = make([]byte, boolBufSize)
+	}
+	if enc.wr == nil {
+		enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf)
+	}
+
+	n := enc.wr.AppendBools(in)
+	for n < len(in) {
+		enc.wr.Finish()
+		enc.append(enc.bitsBuffer)
+		enc.wr.Reset(0, boolsInBuf)
+		in = in[n:]
+		n = enc.wr.AppendBools(in)
+	}
+}
+
+// PutSpaced will use the validBits bitmap to determine which values are nulls
+// and can be left out from the slice, and the encoded without those nulls.
+func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) {
+	bufferOut := make([]bool, len(in))
+	nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset)
+	enc.Put(bufferOut[:nvalid])
+}
+
+// EstimatedDataEncodedSize returns the current number of bytes that have
+// been buffered so far
+func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
+	return int64(enc.sink.Len() + int(bitutil.BytesForBits(enc.wr.Pos())))
+}
+
+// FlushValues returns the buffered data, the responsibility is on the caller
+// to release the buffer memory
+func (enc *PlainBooleanEncoder) FlushValues() Buffer {
+	if enc.wr.Pos() > 0 {
+		toFlush := int(enc.wr.Pos())
+		enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))])
+	}
+
+	return enc.sink.Finish()
+}
diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go
new file mode 100644
index 00000000000..fa8033b78fa
--- /dev/null
+++ b/go/parquet/internal/encoding/byte_array_decoder.go
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// PlainByteArrayDecoder decodes a data chunk for bytearrays according to
+// the plain encoding. The byte arrays will use slices to reference the
+// data rather than copying it.
+//
+// The parquet spec defines Plain encoding for ByteArrays as a 4 byte little
+// endian integer containing the length of the bytearray followed by that many
+// bytes being the raw data of the byte array.
+type PlainByteArrayDecoder struct {
+	decoder
+}
+
+// Type returns parquet.Types.ByteArray for this decoder
+func (PlainByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// Decode will populate the slice of bytearrays in full or until the number
+// of values is consumed.
+//
+// Returns the number of values that were decoded.
+func (pbad *PlainByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	max := utils.MinInt(len(out), pbad.nvals)
+
+	for i := 0; i < max; i++ {
+		// there should always be at least four bytes which is the length of the
+		// next value in the data.
+		if len(pbad.data) < 4 {
+			return i, xerrors.New("parquet: eof reading bytearray")
+		}
+
+		// the first 4 bytes are a little endian int32 length
+		byteLen := int32(binary.LittleEndian.Uint32(pbad.data[:4]))
+		if byteLen < 0 {
+			return i, xerrors.New("parquet: invalid BYTE_ARRAY value")
+		}
+
+		if int64(len(pbad.data)) < int64(byteLen)+4 {
+			return i, xerrors.New("parquet: eof reading bytearray")
+		}
+
+		out[i] = pbad.data[4 : byteLen+4 : byteLen+4]
+		pbad.data = pbad.data[byteLen+4:]
+	}
+
+	pbad.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is like Decode, but expands the slice out to leave empty values
+// where the validBits bitmap has 0s
+func (pbad *PlainByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toRead := len(out) - nullCount
+	valuesRead, err := pbad.Decode(out[:toRead])
+	if err != nil {
+		return valuesRead, err
+	}
+	if valuesRead != toRead {
+		return valuesRead, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go
new file mode 100644
index 00000000000..8d46c6f5a9b
--- /dev/null
+++ b/go/parquet/internal/encoding/byte_array_encoder.go
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+	"unsafe"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+// PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding
+// by encoding the length as a int32 followed by the bytes of the value.
+type PlainByteArrayEncoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// PutByteArray writes out the 4 bytes for the length followed by the data
+func (enc *PlainByteArrayEncoder) PutByteArray(val parquet.ByteArray) {
+	inc := val.Len() + arrow.Uint32SizeBytes
+	enc.sink.Reserve(inc)
+	vlen := toLEFunc(uint32(val.Len()))
+	enc.sink.UnsafeWrite((*(*[4]byte)(unsafe.Pointer(&vlen)))[:])
+	enc.sink.UnsafeWrite(val)
+}
+
+// Put writes out all of the values in this slice to the encoding sink
+func (enc *PlainByteArrayEncoder) Put(in []parquet.ByteArray) {
+	for _, val := range in {
+		enc.PutByteArray(val)
+	}
+}
+
+// PutSpaced uses the bitmap of validBits to leave out anything that is null according
+// to the bitmap.
+//
+// If validBits is nil, this is equivalent to calling Put
+func (enc *PlainByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		if enc.bitSetReader == nil {
+			enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+		} else {
+			enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+		}
+
+		for {
+			run := enc.bitSetReader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+		}
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Type returns parquet.Types.ByteArray for the bytearray encoder
+func (PlainByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// WriteDict writes the dictionary out to the provided slice, out should be
+// at least DictEncodedSize() bytes
+func (enc *DictByteArrayEncoder) WriteDict(out []byte) {
+	enc.memo.(BinaryMemoTable).VisitValues(0, func(v []byte) {
+		binary.LittleEndian.PutUint32(out, uint32(len(v)))
+		out = out[arrow.Uint32SizeBytes:]
+		copy(out, v)
+		out = out[len(v):]
+	})
+}
+
+// PutByteArray adds a single byte array to buffer, updating the dictionary
+// and encoded size if it's a new value
+func (enc *DictByteArrayEncoder) PutByteArray(in parquet.ByteArray) {
+	if in == nil {
+		in = empty[:]
+	}
+	memoIdx, found, err := enc.memo.GetOrInsert(in)
+	if err != nil {
+		panic(err)
+	}
+	if !found {
+		enc.dictEncodedSize += in.Len() + arrow.Uint32SizeBytes
+	}
+	enc.addIndex(memoIdx)
+}
+
+// Put takes a slice of ByteArrays to add and encode.
+func (enc *DictByteArrayEncoder) Put(in []parquet.ByteArray) {
+	for _, val := range in {
+		enc.PutByteArray(val)
+	}
+}
+
+// PutSpaced like with the non-dict encoder leaves out the values where the validBits bitmap is 0
+func (enc *DictByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.PutByteArray(in[i+pos])
+		}
+		return nil
+	})
+}
diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go
new file mode 100644
index 00000000000..6de61574ec5
--- /dev/null
+++ b/go/parquet/internal/encoding/decoder.go
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/debug"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"github.com/apache/arrow/go/parquet/schema"
+	"golang.org/x/xerrors"
+)
+
+// DecoderTraits provides an interface for more easily interacting with types
+// to generate decoders for specific types.
+type DecoderTraits interface {
+	Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder
+	BytesRequired(int) int
+}
+
+// NewDecoder constructs a decoder for a given type and encoding
+func NewDecoder(t parquet.Type, e parquet.Encoding, descr *schema.Column, mem memory.Allocator) TypedDecoder {
+	traits := getDecodingTraits(t)
+	if traits == nil {
+		return nil
+	}
+
+	return traits.Decoder(e, descr, false /* use dictionary */, mem)
+}
+
+// NewDictDecoder is like NewDecoder but for dictionary encodings, panics if type is bool.
+//
+// if mem is nil, memory.DefaultAllocator will be used
+func NewDictDecoder(t parquet.Type, descr *schema.Column, mem memory.Allocator) DictDecoder {
+	traits := getDecodingTraits(t)
+	if traits == nil {
+		return nil
+	}
+
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+
+	return traits.Decoder(parquet.Encodings.RLEDict, descr, true /* use dictionary */, mem).(DictDecoder)
+}
+
+type decoder struct {
+	descr    *schema.Column
+	encoding format.Encoding
+	nvals    int
+	data     []byte
+	typeLen  int
+}
+
+// newDecoderBase constructs the base decoding object that is embedded in the
+// type specific decoders.
+func newDecoderBase(e format.Encoding, descr *schema.Column) decoder {
+	typeLen := -1
+	if descr != nil && descr.PhysicalType() == parquet.Types.FixedLenByteArray {
+		typeLen = int(descr.TypeLength())
+	}
+
+	return decoder{
+		descr:    descr,
+		encoding: e,
+		typeLen:  typeLen,
+	}
+}
+
+// SetData sets the data for decoding into the decoder to update the available
+// data bytes and number of values available.
+func (d *decoder) SetData(nvals int, data []byte) error {
+	d.data = data
+	d.nvals = nvals
+	return nil
+}
+
+// ValuesLeft returns the number of remaining values that can be decoded
+func (d *decoder) ValuesLeft() int { return d.nvals }
+
+// Encoding returns the encoding type used by this decoder to decode the bytes.
+func (d *decoder) Encoding() parquet.Encoding { return parquet.Encoding(d.encoding) }
+
+type dictDecoder struct {
+	decoder
+	mem              memory.Allocator
+	dictValueDecoder utils.DictionaryConverter
+	idxDecoder       *utils.RleDecoder
+}
+
+// SetDict sets a decoder that can be used to decode the dictionary that is
+// used for this column in order to return the proper values.
+func (d *dictDecoder) SetDict(dict TypedDecoder) {
+	if dict.Type() != d.descr.PhysicalType() {
+		panic("parquet: mismatch dictionary and column data type")
+	}
+
+	d.dictValueDecoder = NewDictConverter(dict)
+}
+
+// SetData sets the index value data into the decoder.
+func (d *dictDecoder) SetData(nvals int, data []byte) error {
+	d.nvals = nvals
+	if len(data) == 0 {
+		// no data, bitwidth can safely be 0
+		d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data), 0 /* bitwidth */)
+		return nil
+	}
+
+	// grab the bit width from the first byte
+	width := uint8(data[0])
+	if width >= 64 {
+		return xerrors.New("parquet: invalid or corrupted bit width")
+	}
+
+	// pass the rest of the data, minus that first byte, to the decoder
+	d.idxDecoder = utils.NewRleDecoder(bytes.NewReader(data[1:]), int(width))
+	return nil
+}
+
+func (d *dictDecoder) decode(out interface{}) (int, error) {
+	return d.idxDecoder.GetBatchWithDict(d.dictValueDecoder, out)
+}
+
+func (d *dictDecoder) decodeSpaced(out interface{}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	return d.idxDecoder.GetBatchWithDictSpaced(d.dictValueDecoder, out, nullCount, validBits, validBitsOffset)
+}
+
+var empty = [1]byte{0}
+
+// spacedExpand is used to take a slice of data and utilize the bitmap provided to fill in nulls into the
+// correct slots according to the bitmap in order to produce a fully expanded result slice with nulls
+// in the correct slots.
+func spacedExpand(buffer interface{}, nullCount int, validBits []byte, validBitsOffset int64) int {
+	bufferRef := reflect.ValueOf(buffer)
+	if bufferRef.Kind() != reflect.Slice {
+		panic("invalid spacedexpand type, not slice")
+	}
+
+	var (
+		numValues int = bufferRef.Len()
+	)
+
+	idxDecode := int64(numValues - nullCount)
+	if idxDecode == 0 { // if there's nothing to decode there's nothing to do.
+		return numValues
+	}
+
+	// read the bitmap in reverse grabbing runs of valid bits where possible.
+	rdr := utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(numValues))
+	for {
+		run := rdr.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		// copy data from the end of the slice to it's proper location in the slice after accounting for the nulls
+		// because we technically don't care what is in the null slots we don't actually have to clean
+		// up after ourselves because we're doing this in reverse to guarantee that we'll always simply
+		// overwrite any existing data with the correctly spaced data. Any data that happens to be left in the null
+		// slots is fine since it shouldn't matter and saves us work.
+		idxDecode -= run.Length
+		n := reflect.Copy(bufferRef.Slice(int(run.Pos), bufferRef.Len()), bufferRef.Slice(int(idxDecode), int(int64(idxDecode)+run.Length)))
+		debug.Assert(n == int(run.Length), "reflect.Copy copied incorrect number of elements in spacedExpand")
+	}
+
+	return numValues
+}
diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go
new file mode 100644
index 00000000000..babd0b1fa97
--- /dev/null
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -0,0 +1,520 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"math"
+	"math/bits"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// see the deltaBitPack encoder for a description of the encoding format that is
+// used for delta-bitpacking.
+type deltaBitPackDecoder struct {
+	decoder
+
+	mem memory.Allocator
+
+	usedFirst            bool
+	bitdecoder           *utils.BitReader
+	blockSize            uint64
+	currentBlockVals     uint32
+	miniBlocks           uint64
+	valsPerMini          uint32
+	currentMiniBlockVals uint32
+	minDelta             int64
+	miniBlockIdx         uint64
+
+	deltaBitWidths *memory.Buffer
+	deltaBitWidth  byte
+
+	lastVal int64
+}
+
+// returns the number of bytes read so far
+func (d *deltaBitPackDecoder) bytesRead() int64 {
+	return d.bitdecoder.CurOffset()
+}
+
+func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData sets the bytes and the expected number of values to decode
+// into the decoder, updating the decoder and allowing it to be reused.
+func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error {
+	// set our data into the underlying decoder for the type
+	if err := d.decoder.SetData(nvalues, data); err != nil {
+		return err
+	}
+	// create a bit reader for our decoder's values
+	d.bitdecoder = utils.NewBitReader(bytes.NewReader(d.data))
+	d.currentBlockVals = 0
+	d.currentMiniBlockVals = 0
+	if d.deltaBitWidths == nil {
+		d.deltaBitWidths = memory.NewResizableBuffer(d.mem)
+	}
+
+	var ok bool
+	d.blockSize, ok = d.bitdecoder.GetVlqInt()
+	if !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	if d.miniBlocks, ok = d.bitdecoder.GetVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	var totalValues uint64
+	if totalValues, ok = d.bitdecoder.GetVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	if int(totalValues) != d.nvals {
+		return xerrors.New("parquet: mismatch between number of values and count in data header")
+	}
+
+	if d.lastVal, ok = d.bitdecoder.GetZigZagVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	if d.miniBlocks != 0 {
+		d.valsPerMini = uint32(d.blockSize / d.miniBlocks)
+	}
+	return nil
+}
+
+// initialize a block to decode
+func (d *deltaBitPackDecoder) initBlock() error {
+	// first we grab the min delta value that we'll start from
+	var ok bool
+	if d.minDelta, ok = d.bitdecoder.GetZigZagVlqInt(); !ok {
+		return xerrors.New("parquet: eof exception")
+	}
+
+	// ensure we have enough space for our miniblocks to decode the widths
+	d.deltaBitWidths.Resize(int(d.miniBlocks))
+
+	var err error
+	for i := uint64(0); i < d.miniBlocks; i++ {
+		if d.deltaBitWidths.Bytes()[i], err = d.bitdecoder.ReadByte(); err != nil {
+			return err
+		}
+	}
+
+	d.miniBlockIdx = 0
+	d.deltaBitWidth = d.deltaBitWidths.Bytes()[0]
+	d.currentBlockVals = uint32(d.blockSize)
+	return nil
+}
+
+// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm.
+type DeltaBitPackInt32Decoder struct {
+	*deltaBitPackDecoder
+
+	miniBlockValues []int32
+}
+
+func (d *DeltaBitPackInt32Decoder) unpackNextMini() error {
+	if d.miniBlockValues == nil {
+		d.miniBlockValues = make([]int32, 0, int(d.valsPerMini))
+	} else {
+		d.miniBlockValues = d.miniBlockValues[:0]
+	}
+	d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)]
+	d.currentMiniBlockVals = d.valsPerMini
+
+	for j := 0; j < int(d.valsPerMini); j++ {
+		delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth))
+		if !ok {
+			return xerrors.New("parquet: eof exception")
+		}
+
+		d.lastVal += int64(delta) + int64(d.minDelta)
+		d.miniBlockValues = append(d.miniBlockValues, int32(d.lastVal))
+	}
+	d.miniBlockIdx++
+	return nil
+}
+
+// Decode retrieves min(remaining values, len(out)) values from the data and returns the number
+// of values actually decoded and any errors encountered.
+func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	if max == 0 {
+		return 0, nil
+	}
+
+	out = out[:max]
+	if !d.usedFirst { // starting value to calculate deltas against
+		out[0] = int32(d.lastVal)
+		out = out[1:]
+		d.usedFirst = true
+	}
+
+	var err error
+	for len(out) > 0 { // unpack mini blocks until we get all the values we need
+		if d.currentBlockVals == 0 {
+			err = d.initBlock()
+		}
+		if d.currentMiniBlockVals == 0 {
+			err = d.unpackNextMini()
+		}
+		if err != nil {
+			return 0, err
+		}
+
+		// copy as many values from our mini block as we can into out
+		start := int(d.valsPerMini - d.currentMiniBlockVals)
+		end := utils.MinInt(int(d.valsPerMini), len(out))
+		copy(out, d.miniBlockValues[start:end])
+
+		numCopied := end - start
+		out = out[numCopied:]
+		d.currentBlockVals -= uint32(numCopied)
+		d.currentMiniBlockVals -= uint32(numCopied)
+	}
+	return max, nil
+}
+
+// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap
+func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := d.Decode(out[:toread])
+	if err != nil {
+		return values, err
+	}
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
+
+// Type returns the physical parquet type that this decoder decodes, in this case Int32
+func (DeltaBitPackInt32Decoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// DeltaBitPackInt64Decoder decodes a delta bit packed int64 column of data.
+type DeltaBitPackInt64Decoder struct {
+	*deltaBitPackDecoder
+
+	miniBlockValues []int64
+}
+
+func (d *DeltaBitPackInt64Decoder) unpackNextMini() error {
+	if d.miniBlockValues == nil {
+		d.miniBlockValues = make([]int64, 0, int(d.valsPerMini))
+	} else {
+		d.miniBlockValues = d.miniBlockValues[:0]
+	}
+
+	d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)]
+	d.currentMiniBlockVals = d.valsPerMini
+
+	for j := 0; j < int(d.valsPerMini); j++ {
+		delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth))
+		if !ok {
+			return xerrors.New("parquet: eof exception")
+		}
+
+		d.lastVal += int64(delta) + int64(d.minDelta)
+		d.miniBlockValues = append(d.miniBlockValues, d.lastVal)
+	}
+	d.miniBlockIdx++
+	return nil
+}
+
+// Decode retrieves min(remaining values, len(out)) values from the data and returns the number
+// of values actually decoded and any errors encountered.
+func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	if max == 0 {
+		return 0, nil
+	}
+
+	out = out[:max]
+	if !d.usedFirst {
+		out[0] = d.lastVal
+		out = out[1:]
+		d.usedFirst = true
+	}
+
+	var err error
+	for len(out) > 0 {
+		if d.currentBlockVals == 0 {
+			err = d.initBlock()
+		}
+		if d.currentMiniBlockVals == 0 {
+			err = d.unpackNextMini()
+		}
+
+		if err != nil {
+			return 0, err
+		}
+
+		start := int(d.valsPerMini - d.currentMiniBlockVals)
+		end := utils.MinInt(int(d.valsPerMini), len(out))
+		copy(out, d.miniBlockValues[start:end])
+
+		numCopied := end - start
+		out = out[numCopied:]
+		d.currentBlockVals -= uint32(numCopied)
+		d.currentMiniBlockVals -= uint32(numCopied)
+	}
+	return max, nil
+}
+
+// Type returns the physical parquet type that this decoder decodes, in this case Int64
+func (DeltaBitPackInt64Decoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap
+func (d DeltaBitPackInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := d.Decode(out[:toread])
+	if err != nil {
+		return values, err
+	}
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
+
+const (
+	// block size must be a multiple of 128
+	defaultBlockSize     = 128
+	defaultNumMiniBlocks = 4
+	// block size / number of mini blocks must result in a multiple of 32
+	defaultNumValuesPerMini = 32
+	// max size of the header for the delta blocks
+	maxHeaderWriterSize = 32
+)
+
+// deltaBitPackEncoder is an encoder for the DeltaBinary Packing format
+// as per the parquet spec.
+//
+// Consists of a header followed by blocks of delta encoded values binary packed.
+//
+//	Format
+// 		[header] [block 1] [block 2] ... [block N]
+//
+//	Header
+//		[block size] [number of mini blocks per block] [total value count] [first value]
+//
+//	Block
+//		[min delta] [list of bitwidths of the miniblocks] [miniblocks...]
+//
+// Sets aside bytes at the start of the internal buffer where the header will be written,
+// and only writes the header when FlushValues is called before returning it.
+type deltaBitPackEncoder struct {
+	encoder
+
+	bitWriter  *utils.BitWriter
+	totalVals  uint64
+	firstVal   int64
+	currentVal int64
+
+	blockSize     uint64
+	miniBlockSize uint64
+	numMiniBlocks uint64
+	deltas        []int64
+}
+
+// flushBlock flushes out a finished block for writing to the underlying encoder
+func (enc *deltaBitPackEncoder) flushBlock() {
+	if len(enc.deltas) == 0 {
+		return
+	}
+
+	// determine the minimum delta value
+	minDelta := int64(math.MaxInt64)
+	for _, delta := range enc.deltas {
+		if delta < minDelta {
+			minDelta = delta
+		}
+	}
+
+	enc.bitWriter.WriteZigZagVlqInt(minDelta)
+	// reserve enough bytes to write out our miniblock deltas
+	offset := enc.bitWriter.ReserveBytes(int(enc.numMiniBlocks))
+
+	valuesToWrite := int64(len(enc.deltas))
+	for i := 0; i < int(enc.numMiniBlocks); i++ {
+		n := utils.Min(int64(enc.miniBlockSize), valuesToWrite)
+		if n == 0 {
+			break
+		}
+
+		maxDelta := int64(math.MinInt64)
+		start := i * int(enc.miniBlockSize)
+		for _, val := range enc.deltas[start : start+int(n)] {
+			maxDelta = utils.Max(maxDelta, val)
+		}
+
+		// compute bit width to store (max_delta - min_delta)
+		width := uint(bits.Len64(uint64(maxDelta - minDelta)))
+		// write out the bit width we used into the bytes we reserved earlier
+		enc.bitWriter.WriteAt([]byte{byte(width)}, int64(offset+i))
+
+		// write out our deltas
+		for _, val := range enc.deltas[start : start+int(n)] {
+			enc.bitWriter.WriteValue(uint64(val-minDelta), width)
+		}
+
+		valuesToWrite -= n
+
+		// pad the last block if n < miniBlockSize
+		for ; n < int64(enc.miniBlockSize); n++ {
+			enc.bitWriter.WriteValue(0, width)
+		}
+	}
+	enc.deltas = enc.deltas[:0]
+}
+
+// putInternal is the implementation for actually writing data which must be
+// integral data as int, int8, int32, or int64.
+func (enc *deltaBitPackEncoder) putInternal(data interface{}) {
+	v := reflect.ValueOf(data)
+	if v.Len() == 0 {
+		return
+	}
+
+	idx := 0
+	if enc.totalVals == 0 {
+		enc.blockSize = defaultBlockSize
+		enc.numMiniBlocks = defaultNumMiniBlocks
+		enc.miniBlockSize = defaultNumValuesPerMini
+
+		enc.firstVal = v.Index(0).Int()
+		enc.currentVal = enc.firstVal
+		idx = 1
+
+		enc.bitWriter = utils.NewBitWriter(enc.sink)
+	}
+
+	enc.totalVals += uint64(v.Len())
+	for ; idx < v.Len(); idx++ {
+		val := v.Index(idx).Int()
+		enc.deltas = append(enc.deltas, val-enc.currentVal)
+		enc.currentVal = val
+		if len(enc.deltas) == int(enc.blockSize) {
+			enc.flushBlock()
+		}
+	}
+}
+
+// FlushValues flushes any remaining data and returns the finished encoded buffer
+func (enc *deltaBitPackEncoder) FlushValues() Buffer {
+	if enc.bitWriter != nil {
+		// write any remaining values
+		enc.flushBlock()
+		enc.bitWriter.Flush(true)
+	} else {
+		enc.blockSize = defaultBlockSize
+		enc.numMiniBlocks = defaultNumMiniBlocks
+		enc.miniBlockSize = defaultNumValuesPerMini
+	}
+
+	buffer := make([]byte, maxHeaderWriterSize)
+	headerWriter := utils.NewBitWriter(utils.NewWriterAtBuffer(buffer))
+
+	headerWriter.WriteVlqInt(uint64(enc.blockSize))
+	headerWriter.WriteVlqInt(uint64(enc.numMiniBlocks))
+	headerWriter.WriteVlqInt(uint64(enc.totalVals))
+	headerWriter.WriteZigZagVlqInt(int64(enc.firstVal))
+	headerWriter.Flush(false)
+
+	buffer = buffer[:headerWriter.Written()]
+	enc.totalVals = 0
+
+	if enc.bitWriter != nil {
+		flushed := enc.sink.Finish()
+		defer flushed.Release()
+
+		buffer = append(buffer, flushed.Buf()[:enc.bitWriter.Written()]...)
+	}
+	return poolBuffer{memory.NewBufferBytes(buffer)}
+}
+
+// EstimatedDataEncodedSize returns the current amount of data actually flushed out and written
+func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
+	return int64(enc.bitWriter.Written())
+}
+
+// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data.
+type DeltaBitPackInt32Encoder struct {
+	*deltaBitPackEncoder
+}
+
+// Put writes the values from the provided slice of int32 to the encoder
+func (enc DeltaBitPackInt32Encoder) Put(in []int32) {
+	enc.putInternal(in)
+}
+
+// PutSpaced takes a slice of int32 along with a bitmap that describes the nulls and an offset into the bitmap
+// in order to write spaced data to the encoder.
+func (enc DeltaBitPackInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+	buffer := memory.NewResizableBuffer(enc.mem)
+	buffer.Reserve(arrow.Int32Traits.BytesRequired(len(in)))
+	defer buffer.Release()
+
+	data := arrow.Int32Traits.CastFromBytes(buffer.Buf())
+	nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+	enc.Put(data[:nvalid])
+}
+
+// Type returns the underlying physical type this encoder works with, in this case Int32
+func (DeltaBitPackInt32Encoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data.
+type DeltaBitPackInt64Encoder struct {
+	*deltaBitPackEncoder
+}
+
+// Put writes the values from the provided slice of int64 to the encoder
+func (enc DeltaBitPackInt64Encoder) Put(in []int64) {
+	enc.putInternal(in)
+}
+
+// PutSpaced takes a slice of int64 along with a bitmap that describes the nulls and an offset into the bitmap
+// in order to write spaced data to the encoder.
+func (enc DeltaBitPackInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
+	buffer := memory.NewResizableBuffer(enc.mem)
+	buffer.Reserve(arrow.Int64Traits.BytesRequired(len(in)))
+	defer buffer.Release()
+
+	data := arrow.Int64Traits.CastFromBytes(buffer.Buf())
+	nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+	enc.Put(data[:nvalid])
+}
+
+// Type returns the underlying physical type this encoder works with, in this case Int64
+func (DeltaBitPackInt64Encoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go
new file mode 100644
index 00000000000..d11413ea236
--- /dev/null
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// DeltaByteArrayEncoder is an encoder for writing bytearrays which are delta encoded
+// this is also known as incremental encoding or front compression. For each element
+// in a sequence of strings, we store the prefix length of the previous entry plus the suffix
+// see https://en.wikipedia.org/wiki/Incremental_encoding for a longer description.
+//
+// This is stored as a sequence of delta-encoded prefix lengths followed by the suffixes
+// encoded as delta length byte arrays.
+type DeltaByteArrayEncoder struct {
+	encoder
+
+	prefixEncoder *DeltaBitPackInt32Encoder
+	suffixEncoder *DeltaLengthByteArrayEncoder
+
+	lastVal parquet.ByteArray
+}
+
+func (enc *DeltaByteArrayEncoder) initEncoders() {
+	enc.prefixEncoder = &DeltaBitPackInt32Encoder{
+		deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}
+	enc.suffixEncoder = &DeltaLengthByteArrayEncoder{
+		newEncoderBase(enc.encoding, nil, enc.mem),
+		&DeltaBitPackInt32Encoder{
+			deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}}
+}
+
+// Type returns the underlying physical type this operates on, in this case ByteArrays only
+func (DeltaByteArrayEncoder) Type() parquet.Type { return parquet.Types.ByteArray }
+
+// Put writes a slice of ByteArrays to the encoder
+func (enc *DeltaByteArrayEncoder) Put(in []parquet.ByteArray) {
+	if len(in) == 0 {
+		return
+	}
+
+	var suf parquet.ByteArray
+	if enc.prefixEncoder == nil { // initialize our encoders if we haven't yet
+		enc.initEncoders()
+		enc.prefixEncoder.Put([]int32{0})
+		suf = in[0]
+		enc.lastVal = in[0]
+		enc.suffixEncoder.Put([]parquet.ByteArray{suf})
+		in = in[1:]
+	}
+
+	// for each value, figure out the common prefix with the previous value
+	// and then write the prefix length and the suffix.
+	for _, val := range in {
+		l1 := enc.lastVal.Len()
+		l2 := val.Len()
+		j := 0
+		for j < l1 && j < l2 {
+			if enc.lastVal[j] != val[j] {
+				break
+			}
+			j++
+		}
+		enc.prefixEncoder.Put([]int32{int32(j)})
+		suf = val[j:]
+		enc.suffixEncoder.Put([]parquet.ByteArray{suf})
+		enc.lastVal = val
+	}
+
+	// do the memcpy after the loops to keep a copy of the lastVal
+	// we do a copy here so that we only copy and keep a reference
+	// to the suffix, and aren't forcing the *entire* value to stay
+	// in memory while we have this reference to just the suffix.
+	enc.lastVal = append([]byte{}, enc.lastVal...)
+}
+
+// PutSpaced is like Put, but assumes the data is already spaced for nulls and uses the bitmap provided and offset
+// to compress the data before writing it without the null slots.
+func (enc *DeltaByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		data := make([]parquet.ByteArray, len(in))
+		nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+		enc.Put(data[:nvalid])
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Flush flushes any remaining data out and returns the finished encoded buffer.
+func (enc *DeltaByteArrayEncoder) FlushValues() Buffer {
+	if enc.prefixEncoder == nil {
+		enc.initEncoders()
+	}
+	prefixBuf := enc.prefixEncoder.FlushValues()
+	defer prefixBuf.Release()
+
+	suffixBuf := enc.suffixEncoder.FlushValues()
+	defer suffixBuf.Release()
+
+	ret := bufferPool.Get().(*memory.Buffer)
+	ret.ResizeNoShrink(prefixBuf.Len() + suffixBuf.Len())
+	copy(ret.Bytes(), prefixBuf.Bytes())
+	copy(ret.Bytes()[prefixBuf.Len():], suffixBuf.Bytes())
+	return poolBuffer{ret}
+}
+
+// DeltaByteArrayDecoder is a decoder for a column of data encoded using incremental or prefix encoding.
+type DeltaByteArrayDecoder struct {
+	*DeltaLengthByteArrayDecoder
+
+	prefixLengths []int32
+	lastVal       parquet.ByteArray
+}
+
+// Type returns the underlying physical type this decoder operates on, in this case ByteArrays only
+func (DeltaByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+func (d *DeltaByteArrayDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData expects the data passed in to be the prefix lengths, followed by the
+// blocks of suffix data in order to initialize the decoder.
+func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error {
+	prefixLenDec := DeltaBitPackInt32Decoder{
+		deltaBitPackDecoder: &deltaBitPackDecoder{
+			decoder: newDecoderBase(d.encoding, d.descr),
+			mem:     d.mem}}
+
+	if err := prefixLenDec.SetData(nvalues, data); err != nil {
+		return err
+	}
+
+	d.prefixLengths = make([]int32, nvalues)
+	// decode all the prefix lengths first so we know how many bytes it took to get the
+	// prefix lengths for nvalues
+	prefixLenDec.Decode(d.prefixLengths)
+
+	// now that we know how many bytes we needed for the prefix lengths, the rest are the
+	// delta length byte array encoding.
+	return d.DeltaLengthByteArrayDecoder.SetData(nvalues, data[int(prefixLenDec.bytesRead()):])
+}
+
+// Decode decodes byte arrays into the slice provided and returns the number of values actually decoded
+func (d *DeltaByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	if max == 0 {
+		return 0, nil
+	}
+	out = out[:max]
+
+	var err error
+	if d.lastVal == nil {
+		_, err = d.DeltaLengthByteArrayDecoder.Decode(out[:1])
+		if err != nil {
+			return 0, err
+		}
+		d.lastVal = out[0]
+		out = out[1:]
+		d.prefixLengths = d.prefixLengths[1:]
+	}
+
+	var prefixLen int32
+	suffixHolder := make([]parquet.ByteArray, 1)
+	for len(out) > 0 {
+		prefixLen, d.prefixLengths = d.prefixLengths[0], d.prefixLengths[1:]
+
+		prefix := d.lastVal[:prefixLen:prefixLen]
+		_, err = d.DeltaLengthByteArrayDecoder.Decode(suffixHolder)
+		if err != nil {
+			return 0, err
+		}
+
+		if len(suffixHolder[0]) == 0 {
+			d.lastVal = prefix
+		} else {
+			d.lastVal = make([]byte, int(prefixLen)+len(suffixHolder[0]))
+			copy(d.lastVal, prefix)
+			copy(d.lastVal[prefixLen:], suffixHolder[0])
+		}
+		out[0], out = d.lastVal, out[1:]
+	}
+	return max, nil
+}
+
+// DecodeSpaced is like decode, but the result is spaced out based on the bitmap provided.
+func (d *DeltaByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := d.Decode(out[:toread])
+	if err != nil {
+		return values, err
+	}
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go
new file mode 100644
index 00000000000..3563ccec461
--- /dev/null
+++ b/go/parquet/internal/encoding/delta_length_byte_array.go
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// DeltaLengthByteArrayEncoder encodes data using by taking all of the byte array lengths
+// and encoding them in front using delta encoding, followed by all of the binary data
+// concatenated back to back. The expected savings is from the cost of encoding the lengths
+// and possibly better compression in the data which will no longer be interleaved with the lengths.
+//
+// This encoding is always preferred over PLAIN for byte array columns where possible.
+//
+// For example, if the data was "Hello", "World", "Foobar", "ABCDEF" the encoded data would be:
+// DeltaEncoding(5, 5, 6, 6) "HelloWorldFoobarABCDEF"
+type DeltaLengthByteArrayEncoder struct {
+	encoder
+
+	lengthEncoder *DeltaBitPackInt32Encoder
+}
+
+// Put writes the provided slice of byte arrays to the encoder
+func (enc *DeltaLengthByteArrayEncoder) Put(in []parquet.ByteArray) {
+	lengths := make([]int32, len(in))
+	totalLen := int(0)
+	for idx, val := range in {
+		lengths[idx] = int32(val.Len())
+		totalLen += val.Len()
+	}
+
+	enc.lengthEncoder.Put(lengths)
+	enc.sink.Reserve(totalLen)
+	for _, val := range in {
+		enc.sink.UnsafeWrite(val)
+	}
+}
+
+// PutSpaced is like Put, but the data is spaced out according to the bitmap provided and is compressed
+// accordingly before it is written to drop the null data from the write.
+func (enc *DeltaLengthByteArrayEncoder) PutSpaced(in []parquet.ByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		data := make([]parquet.ByteArray, len(in))
+		nvalid := spacedCompress(in, data, validBits, validBitsOffset)
+		enc.Put(data[:nvalid])
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Type returns the underlying type which is handled by this encoder, ByteArrays only.
+func (DeltaLengthByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// FlushValues flushes any remaining data and returns the final encoded buffer of data.
+func (enc *DeltaLengthByteArrayEncoder) FlushValues() Buffer {
+	ret := enc.lengthEncoder.FlushValues()
+	defer ret.Release()
+
+	data := enc.sink.Finish()
+	defer data.Release()
+
+	output := bufferPool.Get().(*memory.Buffer)
+	output.ResizeNoShrink(ret.Len() + data.Len())
+	copy(output.Bytes(), ret.Bytes())
+	copy(output.Bytes()[ret.Len():], data.Bytes())
+	return poolBuffer{output}
+}
+
+// DeltaLengthByteArrayDecoder is a decoder for handling data produced by the corresponding
+// encoder which expects delta packed lengths followed by the bytes of data.
+type DeltaLengthByteArrayDecoder struct {
+	decoder
+
+	mem     memory.Allocator
+	lengths []int32
+}
+
+// Type returns the underlying type which is handled by this encoder, ByteArrays only.
+func (DeltaLengthByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+func (d *DeltaLengthByteArrayDecoder) Allocator() memory.Allocator { return d.mem }
+
+// SetData sets in the expected data to the decoder which should be nvalues delta packed lengths
+// followed by the rest of the byte array data immediately after.
+func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error {
+	dec := DeltaBitPackInt32Decoder{
+		deltaBitPackDecoder: &deltaBitPackDecoder{
+			decoder: newDecoderBase(d.encoding, d.descr),
+			mem:     d.mem}}
+
+	if err := dec.SetData(nvalues, data); err != nil {
+		return err
+	}
+	d.lengths = make([]int32, nvalues)
+	dec.Decode(d.lengths)
+
+	return d.decoder.SetData(nvalues, data[int(dec.bytesRead()):])
+}
+
+// Decode populates the passed in slice with data decoded until it hits the length of out
+// or runs out of values in the column to decode, then returns the number of values actually decoded.
+func (d *DeltaLengthByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	max := utils.MinInt(len(out), d.nvals)
+	for i := 0; i < max; i++ {
+		out[i] = d.data[:d.lengths[i]:d.lengths[i]]
+		d.data = d.data[d.lengths[i]:]
+	}
+	d.nvals -= max
+	d.lengths = d.lengths[max:]
+	return max, nil
+}
+
+// DecodeSpaced is like Decode, but for spaced data using the provided bitmap to determine where the nulls should be inserted.
+func (d *DeltaLengthByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, _ := d.Decode(out[:toread])
+	if values != toread {
+		return values, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go
new file mode 100644
index 00000000000..49072c8e151
--- /dev/null
+++ b/go/parquet/internal/encoding/encoder.go
@@ -0,0 +1,311 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"math/bits"
+	"reflect"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/arrow/endian"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"github.com/apache/arrow/go/parquet/schema"
+)
+
+var toLEFunc func(uint32) uint32
+
+func init() {
+	if endian.IsBigEndian {
+		toLEFunc = bits.ReverseBytes32
+	} else {
+		toLEFunc = func(in uint32) uint32 { return in }
+	}
+}
+
+//go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl
+
+// EncoderTraits is an interface for the different types to make it more
+// convenient to construct encoders for specific types.
+type EncoderTraits interface {
+	Encoder(format.Encoding, bool, *schema.Column, memory.Allocator) TypedEncoder
+}
+
+// NewEncoder will return the appropriately typed encoder for the requested physical type
+// and encoding.
+//
+// If mem is nil, memory.DefaultAllocator will be used.
+func NewEncoder(t parquet.Type, e parquet.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+	traits := getEncodingTraits(t)
+	if traits == nil {
+		return nil
+	}
+
+	if mem == nil {
+		mem = memory.DefaultAllocator
+	}
+	return traits.Encoder(format.Encoding(e), useDict, descr, mem)
+}
+
+type encoder struct {
+	descr    *schema.Column
+	encoding format.Encoding
+	typeLen  int
+	mem      memory.Allocator
+
+	sink *PooledBufferWriter
+}
+
+// newEncoderBase constructs a new base encoder for embedding on the typed encoders
+// encapsulating the common functionality.
+func newEncoderBase(e format.Encoding, descr *schema.Column, mem memory.Allocator) encoder {
+	typelen := -1
+	if descr != nil && descr.PhysicalType() == parquet.Types.FixedLenByteArray {
+		typelen = int(descr.TypeLength())
+	}
+	return encoder{
+		descr:    descr,
+		encoding: e,
+		mem:      mem,
+		typeLen:  typelen,
+		sink:     NewPooledBufferWriter(1024),
+	}
+}
+
+// ReserveForWrite allocates n bytes so that the next n bytes written do not require new allocations.
+func (e *encoder) ReserveForWrite(n int)           { e.sink.Reserve(n) }
+func (e *encoder) EstimatedDataEncodedSize() int64 { return int64(e.sink.Len()) }
+func (e *encoder) Encoding() parquet.Encoding      { return parquet.Encoding(e.encoding) }
+func (e *encoder) Allocator() memory.Allocator     { return e.mem }
+func (e *encoder) append(data []byte)              { e.sink.Write(data) }
+
+// FlushValues flushes any unwritten data to the buffer and returns the finished encoded buffer of data.
+// This also clears the encoder, ownership of the data belongs to whomever called FlushValues, Release
+// should be called on the resulting Buffer when done.
+func (e *encoder) FlushValues() Buffer { return e.sink.Finish() }
+
+// Bytes returns the current bytes that have been written to the encoder's buffer but doesn't transfer ownership.
+func (e *encoder) Bytes() []byte { return e.sink.Bytes() }
+
+// Reset drops the data currently in the encoder and resets for new use.
+func (e *encoder) Reset() { e.sink.Reset(0) }
+
+type dictEncoder struct {
+	encoder
+
+	dictEncodedSize int
+	idxBuffer       *memory.Buffer
+	idxValues       []int32
+	memo            MemoTable
+}
+
+// newDictEncoderBase constructs and returns a dictionary encoder for the appropriate type using the passed
+// in memo table for constructing the index.
+func newDictEncoderBase(descr *schema.Column, memo MemoTable, mem memory.Allocator) dictEncoder {
+	return dictEncoder{
+		encoder:   newEncoderBase(format.Encoding_PLAIN_DICTIONARY, descr, mem),
+		idxBuffer: memory.NewResizableBuffer(mem),
+		memo:      memo,
+	}
+}
+
+// Reset drops all the currently encoded values from the index and indexes from the data to allow
+// restarting the encoding process.
+func (d *dictEncoder) Reset() {
+	d.encoder.Reset()
+	d.dictEncodedSize = 0
+	d.idxValues = d.idxValues[:0]
+	d.idxBuffer.ResizeNoShrink(0)
+	d.memo.Reset()
+}
+
+// append the passed index to the indexbuffer
+func (d *dictEncoder) addIndex(idx int) {
+	if len(d.idxValues) == cap(d.idxValues) {
+		curLen := len(d.idxValues)
+		d.idxBuffer.ResizeNoShrink(arrow.Int32Traits.BytesRequired(bitutil.NextPowerOf2(curLen + 1)))
+		d.idxValues = arrow.Int32Traits.CastFromBytes(d.idxBuffer.Buf())[: curLen : d.idxBuffer.Len()/arrow.Int32SizeBytes]
+	}
+	d.idxValues = append(d.idxValues, int32(idx))
+}
+
+// FlushValues dumps all the currently buffered indexes that would become the data page to a buffer and
+// returns it.
+func (d *dictEncoder) FlushValues() Buffer {
+	buf := bufferPool.Get().(*memory.Buffer)
+	buf.Reserve(int(d.EstimatedDataEncodedSize()))
+	size := d.WriteIndices(buf.Buf())
+	buf.ResizeNoShrink(size)
+	return poolBuffer{buf}
+}
+
+// EstimatedDataEncodedSize returns the maximum number of bytes needed to store the RLE encoded indexes, not including the
+// dictionary index in the computation.
+func (d *dictEncoder) EstimatedDataEncodedSize() int64 {
+	return 1 + int64(utils.MaxBufferSize(d.BitWidth(), len(d.idxValues))+utils.MinBufferSize(d.BitWidth()))
+}
+
+// NumEntries returns the number of entires in the dictionary index for this encoder.
+func (d *dictEncoder) NumEntries() int {
+	return d.memo.Size()
+}
+
+// BitWidth returns the max bitwidth that would be necessary for encoding the index values currently
+// in the dictionary based on the size of the dictionary index.
+func (d *dictEncoder) BitWidth() int {
+	switch d.NumEntries() {
+	case 0:
+		return 0
+	case 1:
+		return 1
+	default:
+		return bits.Len32(uint32(d.NumEntries() - 1))
+	}
+}
+
+// WriteDict writes the dictionary index to the given byte slice.
+func (d *dictEncoder) WriteDict(out []byte) {
+	d.memo.CopyValues(out)
+}
+
+// WriteIndices performs Run Length encoding on the indexes and the writes the encoded
+// index value data to the provided byte slice, returning the number of bytes actually written.
+func (d *dictEncoder) WriteIndices(out []byte) int {
+	out[0] = byte(d.BitWidth())
+
+	enc := utils.NewRleEncoder(utils.NewWriterAtBuffer(out[1:]), d.BitWidth())
+	for _, idx := range d.idxValues {
+		if !enc.Put(uint64(idx)) {
+			return -1
+		}
+	}
+	nbytes := enc.Flush()
+
+	d.idxValues = d.idxValues[:0]
+	return nbytes + 1
+}
+
+// Put adds a value to the dictionary data column, inserting the value if it
+// didn't already exist in the dictionary.
+func (d *dictEncoder) Put(v interface{}) {
+	memoIdx, found, err := d.memo.GetOrInsert(v)
+	if err != nil {
+		panic(err)
+	}
+	if !found {
+		d.dictEncodedSize += int(reflect.TypeOf(v).Size())
+	}
+	d.addIndex(memoIdx)
+}
+
+// DictEncodedSize returns the current size of the encoded dictionary
+func (d *dictEncoder) DictEncodedSize() int {
+	return d.dictEncodedSize
+}
+
+// spacedCompress is a helper function for encoders to remove the slots in the slices passed in according
+// to the bitmap which are null into an output slice that is no longer spaced out with slots for nulls.
+func spacedCompress(src, out interface{}, validBits []byte, validBitsOffset int64) int {
+	nvalid := 0
+
+	// for efficiency we use a type switch because the copy runs significantly faster when typed
+	// than calling reflect.Copy
+	switch s := src.(type) {
+	case []int32:
+		o := out.([]int32)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []int64:
+		o := out.([]int64)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []float32:
+		o := out.([]float32)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []float64:
+		o := out.([]float64)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []parquet.ByteArray:
+		o := out.([]parquet.ByteArray)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []parquet.FixedLenByteArray:
+		o := out.([]parquet.FixedLenByteArray)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	case []bool:
+		o := out.([]bool)
+		reader := utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(s)))
+		for {
+			run := reader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			copy(o[nvalid:], s[int(run.Pos):int(run.Pos+run.Length)])
+			nvalid += int(run.Length)
+		}
+	}
+
+	return nvalid
+}
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
new file mode 100644
index 00000000000..a23489290c8
--- /dev/null
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"math"
+
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// PlainFixedLenByteArrayDecoder is a plain encoding decoder for Fixed Length Byte Arrays
+type PlainFixedLenByteArrayDecoder struct {
+	decoder
+}
+
+// Type returns the physical type this decoder operates on, FixedLength Byte Arrays
+func (PlainFixedLenByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// Decode populates out with fixed length byte array values until either there are no more
+// values to decode or the length of out has been filled. Then returns the total number of values
+// that were decoded.
+func (pflba *PlainFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) {
+	max := utils.MinInt(len(out), pflba.nvals)
+	numBytesNeeded := max * pflba.typeLen
+	if numBytesNeeded > len(pflba.data) || numBytesNeeded > math.MaxInt32 {
+		return 0, xerrors.New("parquet: eof exception")
+	}
+
+	for idx := range out[:max] {
+		out[idx] = pflba.data[:pflba.typeLen]
+		pflba.data = pflba.data[pflba.typeLen:]
+	}
+	return max, nil
+}
+
+// DecodeSpaced does the same as Decode but spaces out the resulting slice according to the bitmap leaving space for null values
+func (pflba *PlainFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toRead := len(out) - nullCount
+	valuesRead, err := pflba.Decode(out[:toRead])
+	if err != nil {
+		return valuesRead, err
+	}
+	if valuesRead != toRead {
+		return valuesRead, xerrors.New("parquet: number of values / definitions levels read did not match")
+	}
+
+	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
+}
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
new file mode 100644
index 00000000000..7eda0d38b0b
--- /dev/null
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+// PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array
+// always writing typeLength bytes for each value.
+type PlainFixedLenByteArrayEncoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put writes the provided values to the encoder
+func (enc *PlainFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) {
+	typeLen := enc.descr.TypeLength()
+	if typeLen == 0 {
+		return
+	}
+
+	bytesNeeded := len(in) * typeLen
+	enc.sink.Reserve(bytesNeeded)
+	for _, val := range in {
+		if val == nil {
+			panic("value cannot be nil")
+		}
+		enc.sink.UnsafeWrite(val[:typeLen])
+	}
+}
+
+// PutSpaced is like Put but works with data that is spaced out according to the passed in bitmap
+func (enc *PlainFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) {
+	if validBits != nil {
+		if enc.bitSetReader == nil {
+			enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+		} else {
+			enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+		}
+
+		for {
+			run := enc.bitSetReader.NextRun()
+			if run.Length == 0 {
+				break
+			}
+			enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+		}
+	} else {
+		enc.Put(in)
+	}
+}
+
+// Type returns the underlying physical type this encoder works with, Fixed Length byte arrays.
+func (PlainFixedLenByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// WriteDict overrides the embedded WriteDict function to call a specialized function
+// for copying out the Fixed length values from the dictionary more efficiently.
+func (enc *DictFixedLenByteArrayEncoder) WriteDict(out []byte) {
+	enc.memo.(BinaryMemoTable).CopyFixedWidthValues(0, enc.typeLen, out)
+}
+
+// Put writes fixed length values to a dictionary encoded column
+func (enc *DictFixedLenByteArrayEncoder) Put(in []parquet.FixedLenByteArray) {
+	for _, v := range in {
+		if v == nil {
+			v = empty[:]
+		}
+		memoIdx, found, err := enc.memo.GetOrInsert(v)
+		if err != nil {
+			panic(err)
+		}
+		if !found {
+			enc.dictEncodedSize += enc.typeLen
+		}
+		enc.addIndex(memoIdx)
+	}
+}
+
+// PutSpaced is like Put but leaves space for nulls
+func (enc *DictFixedLenByteArrayEncoder) PutSpaced(in []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		enc.Put(in[pos : pos+length])
+		return nil
+	})
+}
diff --git a/go/parquet/internal/encoding/physical_types.tmpldata b/go/parquet/internal/encoding/physical_types.tmpldata
new file mode 100644
index 00000000000..0adeb9955bf
--- /dev/null
+++ b/go/parquet/internal/encoding/physical_types.tmpldata
@@ -0,0 +1,52 @@
+[
+  {
+    "Name": "Int32",
+    "name": "int32",
+    "lower": "int32",
+    "prefix": "arrow"
+  },
+  {
+    "Name": "Int64",
+    "name": "int64",
+    "lower": "int64",
+    "prefix": "arrow"
+  },
+  {
+    "Name": "Int96",
+    "name": "parquet.Int96",
+    "lower": "int96",
+    "prefix": "parquet"
+  },
+  {
+    "Name": "Float32",
+    "name": "float32",
+    "lower": "float32",
+    "prefix": "arrow",
+    "physical": "Float"
+  },
+  {
+    "Name": "Float64",
+    "name": "float64",
+    "lower": "float64",
+    "prefix": "arrow",
+    "physical": "Double"
+  },
+  {
+    "Name": "Boolean",
+    "name": "bool",
+    "lower": "bool",
+    "prefix": "arrow"
+  },
+  {
+    "Name": "ByteArray",
+    "name": "parquet.ByteArray",
+    "lower": "byteArray",
+    "prefix": "parquet"
+  },
+  {
+    "Name": "FixedLenByteArray",
+    "name": "parquet.FixedLenByteArray",
+    "lower": "fixedLenByteArray",
+    "prefix": "parquet"
+  }
+]
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go
new file mode 100644
index 00000000000..a3826339dfa
--- /dev/null
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go
@@ -0,0 +1,639 @@
+// Code generated by plain_encoder_types.gen.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"bytes"
+	"encoding/binary"
+	"math"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/endian"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+var (
+	writeInt32LE      func(*encoder, []int32)
+	copyFromInt32LE   func(dst []int32, src []byte)
+	writeInt64LE      func(*encoder, []int64)
+	copyFromInt64LE   func(dst []int64, src []byte)
+	writeInt96LE      func(*encoder, []parquet.Int96)
+	copyFromInt96LE   func(dst []parquet.Int96, src []byte)
+	writeFloat32LE    func(*encoder, []float32)
+	copyFromFloat32LE func(dst []float32, src []byte)
+	writeFloat64LE    func(*encoder, []float64)
+	copyFromFloat64LE func(dst []float64, src []byte)
+)
+
+func init() {
+	// int96 is already internally represented as little endian data
+	// no need to have special behavior on big endian architectures
+	// for read/write, consumers will need to be aware of the fact
+	// that it is internally 12 bytes little endian when attempting
+	// to utilize it.
+	writeInt96LE = func(e *encoder, in []parquet.Int96) {
+		e.append(parquet.Int96Traits.CastToBytes(in))
+	}
+	copyFromInt96LE = func(dst []parquet.Int96, src []byte) {
+		copy(parquet.Int96Traits.CastToBytes(dst), src)
+	}
+
+	if endian.IsBigEndian {
+		writeInt32LE = func(e *encoder, in []int32) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromInt32LE = func(dst []int32, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+		writeInt64LE = func(e *encoder, in []int64) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromInt64LE = func(dst []int64, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+		writeFloat32LE = func(e *encoder, in []float32) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromFloat32LE = func(dst []float32, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+		writeFloat64LE = func(e *encoder, in []float64) {
+			binary.Write(e.sink, binary.LittleEndian, in)
+		}
+		copyFromFloat64LE = func(dst []float64, src []byte) {
+			r := bytes.NewReader(src)
+			binary.Read(r, binary.LittleEndian, &dst)
+		}
+	} else {
+		writeInt32LE = func(e *encoder, in []int32) {
+			e.append(arrow.Int32Traits.CastToBytes(in))
+		}
+		copyFromInt32LE = func(dst []int32, src []byte) {
+			copy(arrow.Int32Traits.CastToBytes(dst), src)
+		}
+		writeInt64LE = func(e *encoder, in []int64) {
+			e.append(arrow.Int64Traits.CastToBytes(in))
+		}
+		copyFromInt64LE = func(dst []int64, src []byte) {
+			copy(arrow.Int64Traits.CastToBytes(dst), src)
+		}
+		writeFloat32LE = func(e *encoder, in []float32) {
+			e.append(arrow.Float32Traits.CastToBytes(in))
+		}
+		copyFromFloat32LE = func(dst []float32, src []byte) {
+			copy(arrow.Float32Traits.CastToBytes(dst), src)
+		}
+		writeFloat64LE = func(e *encoder, in []float64) {
+			e.append(arrow.Float64Traits.CastToBytes(in))
+		}
+		copyFromFloat64LE = func(dst []float64, src []byte) {
+			copy(arrow.Float64Traits.CastToBytes(dst), src)
+		}
+	}
+}
+
+// PlainInt32Encoder is an encoder for int32 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainInt32Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainInt32Encoder) Put(in []int32) {
+	writeInt32LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Int32Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainInt32Encoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// PlainInt32Decoder is a decoder specifically for decoding Plain Encoding data
+// of int32 type.
+type PlainInt32Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainInt32Decoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainInt32Decoder) Decode(out []int32) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Int32SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Int32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromInt32LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainInt64Encoder is an encoder for int64 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainInt64Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainInt64Encoder) Put(in []int64) {
+	writeInt64LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Int64Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainInt64Encoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// PlainInt64Decoder is a decoder specifically for decoding Plain Encoding data
+// of int64 type.
+type PlainInt64Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainInt64Decoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainInt64Decoder) Decode(out []int64) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Int64SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Int64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromInt64LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainInt96Encoder is an encoder for parquet.Int96 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainInt96Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainInt96Encoder) Put(in []parquet.Int96) {
+	writeInt96LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainInt96Encoder) PutSpaced(in []parquet.Int96, validBits []byte, validBitsOffset int64) {
+	nbytes := parquet.Int96Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainInt96Encoder) Type() parquet.Type {
+	return parquet.Types.Int96
+}
+
+// PlainInt96Decoder is a decoder specifically for decoding Plain Encoding data
+// of parquet.Int96 type.
+type PlainInt96Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainInt96Decoder) Type() parquet.Type {
+	return parquet.Types.Int96
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainInt96Decoder) Decode(out []parquet.Int96) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(parquet.Int96SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Int96, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromInt96LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainInt96Decoder) DecodeSpaced(out []parquet.Int96, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainFloat32Encoder is an encoder for float32 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainFloat32Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainFloat32Encoder) Put(in []float32) {
+	writeFloat32LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainFloat32Encoder) PutSpaced(in []float32, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Float32Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainFloat32Encoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// PlainFloat32Decoder is a decoder specifically for decoding Plain Encoding data
+// of float32 type.
+type PlainFloat32Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainFloat32Decoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainFloat32Decoder) Decode(out []float32) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Float32SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Float32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromFloat32LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
+
+// PlainFloat64Encoder is an encoder for float64 values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type PlainFloat64Encoder struct {
+	encoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *PlainFloat64Encoder) Put(in []float64) {
+	writeFloat64LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *PlainFloat64Encoder) PutSpaced(in []float64, validBits []byte, validBitsOffset int64) {
+	nbytes := arrow.Float64Traits.BytesRequired(len(in))
+	enc.ReserveForWrite(nbytes)
+
+	if enc.bitSetReader == nil {
+		enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+	} else {
+		enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+	}
+
+	for {
+		run := enc.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+		enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+	}
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (PlainFloat64Encoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// PlainFloat64Decoder is a decoder specifically for decoding Plain Encoding data
+// of float64 type.
+type PlainFloat64Decoder struct {
+	decoder
+
+	bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (PlainFloat64Decoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *PlainFloat64Decoder) Decode(out []float64) (int, error) {
+	max := utils.MinInt(len(out), dec.nvals)
+	nbytes := int64(max) * int64(arrow.Float64SizeBytes)
+	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+		return 0, xerrors.Errorf("parquet: eof exception decode plain Float64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+	}
+
+	copyFromFloat64LE(out, dec.data[:nbytes])
+	dec.data = dec.data[nbytes:]
+	dec.nvals -= max
+	return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *PlainFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	toread := len(out) - nullCount
+	values, err := dec.Decode(out[:toread])
+	if err != nil {
+		return 0, err
+	}
+	if values != toread {
+		return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+	}
+
+	nvalues := len(out)
+	if nullCount == 0 {
+		return nvalues, nil
+	}
+
+	idxDecode := nvalues - nullCount
+	if dec.bitSetReader == nil {
+		dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+	} else {
+		dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+	}
+
+	for {
+		run := dec.bitSetReader.NextRun()
+		if run.Length == 0 {
+			break
+		}
+
+		idxDecode -= int(run.Length)
+		copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+	}
+	return nvalues, nil
+}
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
new file mode 100644
index 00000000000..1b72497444c
--- /dev/null
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+  "encoding/binary"
+
+  "github.com/apache/arrow/go/arrow"
+  "github.com/apache/arrow/go/parquet"
+  "github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+var (
+{{range .In}}
+{{if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") -}}
+	write{{.Name}}LE   func(*encoder, []{{.name}})
+  copyFrom{{.Name}}LE  func(dst []{{.name}}, src []byte)
+{{- end}}
+{{- end}}
+)
+
+func init() {
+  // int96 is already internally represented as little endian data
+  // no need to have special behavior on big endian architectures
+  // for read/write, consumers will need to be aware of the fact
+  // that it is internally 12 bytes little endian when attempting
+  // to utilize it.
+  writeInt96LE = func(e *encoder, in []parquet.Int96) {
+    e.append(parquet.Int96Traits.CastToBytes(in))
+  }
+  copyFromInt96LE = func(dst []parquet.Int96, src []byte) {
+    copy(parquet.Int96Traits.CastToBytes(dst), src)
+  }
+
+	if endian.IsBigEndian {
+{{- range .In}}
+{{- if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") (ne .Name "Int96")}}
+    write{{.Name}}LE = func(e *encoder, in []{{.name}}) {
+      binary.Write(e.sink, binary.LittleEndian, in)
+    }
+    copyFrom{{.Name}}LE = func(dst []{{.name}}, src []byte) {
+      r := bytes.NewReader(src)
+      binary.Read(r, binary.LittleEndian, &dst)
+    }
+{{- end -}}
+{{- end}}
+	} else {
+{{- range .In}}
+{{- if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray") (ne .Name "Int96")}}
+    write{{.Name}}LE = func(e *encoder, in []{{.name}}) {
+      e.append({{.prefix}}.{{.Name}}Traits.CastToBytes(in))
+    }
+    copyFrom{{.Name}}LE = func(dst []{{.name}}, src []byte) {
+      copy({{.prefix}}.{{.Name}}Traits.CastToBytes(dst), src)
+    }
+{{- end -}}
+{{- end}}
+	}
+}
+
+{{range .In}}
+{{if and (ne .Name "Boolean") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}
+// Plain{{.Name}}Encoder is an encoder for {{.name}} values using Plain Encoding
+// which in general is just storing the values as raw bytes of the appropriate size
+type Plain{{.Name}}Encoder struct {
+  encoder
+
+  bitSetReader utils.SetBitRunReader
+}
+
+// Put encodes a slice of values into the underlying buffer
+func (enc *Plain{{.Name}}Encoder) Put(in []{{.name}}) {
+  write{{.Name}}LE(&enc.encoder, in)
+}
+
+// PutSpaced encodes a slice of values into the underlying buffer which are spaced out
+// including null values defined by the validBits bitmap starting at a given bit offset.
+// the values are first compressed by having the null slots removed before writing to the buffer
+func (enc *Plain{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, validBitsOffset int64) {
+  nbytes := {{.prefix}}.{{.Name}}Traits.BytesRequired(len(in))
+  enc.ReserveForWrite(nbytes)
+
+  if enc.bitSetReader == nil {
+    enc.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(in)))
+  } else {
+    enc.bitSetReader.Reset(validBits, validBitsOffset, int64(len(in)))
+  }
+
+  for {
+    run := enc.bitSetReader.NextRun()
+    if run.Length == 0 {
+      break
+    }
+    enc.Put(in[int(run.Pos):int(run.Pos+run.Length)])
+  }
+}
+
+// Type returns the underlying physical type this encoder is able to encode
+func (Plain{{.Name}}Encoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+// Plain{{.Name}}Decoder is a decoder specifically for decoding Plain Encoding data
+// of {{.name}} type.
+type Plain{{.Name}}Decoder struct {
+  decoder
+
+  bitSetReader utils.SetBitRunReader
+}
+
+// Type returns the physical type this decoder is able to decode for
+func (Plain{{.Name}}Decoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+// Decode populates the given slice with values from the data to be decoded,
+// decoding the min(len(out), remaining values).
+// It returns the number of values actually decoded and any error encountered.
+func (dec *Plain{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
+  max := utils.MinInt(len(out), dec.nvals)
+  nbytes := int64(max) * int64({{.prefix}}.{{.Name}}SizeBytes)
+  if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
+    return 0, xerrors.Errorf("parquet: eof exception decode plain {{.Name}}, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
+  }
+
+  copyFrom{{.Name}}LE(out, dec.data[:nbytes])
+  dec.data = dec.data[nbytes:]
+  dec.nvals -= max
+  return max, nil
+}
+
+// DecodeSpaced is the same as decode, except it expands the data out to leave spaces for null values
+// as defined by the bitmap provided.
+func (dec *Plain{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+  toread := len(out) - nullCount
+  values, err := dec.Decode(out[:toread])
+  if err != nil {
+    return 0, err
+  }
+  if values != toread {
+    return 0, xerrors.New("parquet: number of values / definition levels read did not match")
+  }
+
+  nvalues := len(out)
+  if nullCount == 0 {
+    return nvalues, nil
+  }
+
+  idxDecode := nvalues - nullCount
+  if dec.bitSetReader == nil {
+    dec.bitSetReader = utils.NewReverseSetBitRunReader(validBits, validBitsOffset, int64(nvalues))
+  } else {
+    dec.bitSetReader.Reset(validBits, validBitsOffset, int64(nvalues))
+  }
+
+  for {
+    run := dec.bitSetReader.NextRun()
+    if run.Length == 0 {
+      break
+    }
+
+    idxDecode -= int(run.Length)
+    copy(out[int(run.Pos):], out[idxDecode:idxDecode+int(run.Length)])
+  }
+  return nvalues, nil
+}
+{{end}}
+{{end}}
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go
new file mode 100644
index 00000000000..abcfd95142e
--- /dev/null
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go
@@ -0,0 +1,1443 @@
+// Code generated by typed_encoder.gen.go.tmpl. DO NOT EDIT.
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"unsafe"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"github.com/apache/arrow/go/parquet/schema"
+	"golang.org/x/xerrors"
+)
+
+// fully typed encoder interfaces to enable writing against encoder/decoders
+// without having to care about what encoding type is actually being used.
+
+var (
+	Int32EncoderTraits             int32EncoderTraits
+	Int32DecoderTraits             int32DecoderTraits
+	Int64EncoderTraits             int64EncoderTraits
+	Int64DecoderTraits             int64DecoderTraits
+	Int96EncoderTraits             int96EncoderTraits
+	Int96DecoderTraits             int96DecoderTraits
+	Float32EncoderTraits           float32EncoderTraits
+	Float32DecoderTraits           float32DecoderTraits
+	Float64EncoderTraits           float64EncoderTraits
+	Float64DecoderTraits           float64DecoderTraits
+	BooleanEncoderTraits           boolEncoderTraits
+	BooleanDecoderTraits           boolDecoderTraits
+	ByteArrayEncoderTraits         byteArrayEncoderTraits
+	ByteArrayDecoderTraits         byteArrayDecoderTraits
+	FixedLenByteArrayEncoderTraits fixedLenByteArrayEncoderTraits
+	FixedLenByteArrayDecoderTraits fixedLenByteArrayDecoderTraits
+)
+
+// Int32Encoder is the interface for all encoding types that implement encoding
+// int32 values.
+type Int32Encoder interface {
+	TypedEncoder
+	Put([]int32)
+	PutSpaced([]int32, []byte, int64)
+}
+
+// Int32Decoder is the interface for all encoding types that implement decoding
+// int32 values.
+type Int32Decoder interface {
+	TypedDecoder
+	Decode([]int32) (int, error)
+	DecodeSpaced([]int32, int, []byte, int64) (int, error)
+}
+
+// the int32EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type int32EncoderTraits struct{}
+
+// Encoder returns an encoder for int32 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}
+	case format.Encoding_DELTA_BINARY_PACKED:
+		return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{
+			encoder: newEncoderBase(e, descr, mem)}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// int32DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for int32 values
+type int32DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n int32 values.
+func (int32DecoderTraits) BytesRequired(n int) int {
+	return arrow.Int32Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for int32 typed data of the requested encoding type if available
+func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictInt32Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	case parquet.Encodings.DeltaBinaryPacked:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaBitPackInt32Decoder{
+			deltaBitPackDecoder: &deltaBitPackDecoder{
+				decoder: newDecoderBase(format.Encoding(e), descr),
+				mem:     mem,
+			}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictInt32Encoder is an encoder for int32 data using dictionary encoding
+type DictInt32Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictInt32Encoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictInt32Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Int32Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictInt32Encoder) Put(in []int32) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictInt32Decoder is a decoder for decoding dictionary encoded data for int32 columns
+type DictInt32Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictInt32Decoder) Type() parquet.Type {
+	return parquet.Types.Int32
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictInt32Decoder) Decode(out []int32) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Int32DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Int32DictConverter struct {
+	valueDecoder Int32Decoder
+	dict         []int32
+	zeroVal      int32
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Int32DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]int32, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Int32DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Int32DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]int32)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for int32
+func (dc *Int32DictConverter) FillZero(out interface{}) {
+	o := out.([]int32)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Int32DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]int32)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// Int64Encoder is the interface for all encoding types that implement encoding
+// int64 values.
+type Int64Encoder interface {
+	TypedEncoder
+	Put([]int64)
+	PutSpaced([]int64, []byte, int64)
+}
+
+// Int64Decoder is the interface for all encoding types that implement decoding
+// int64 values.
+type Int64Decoder interface {
+	TypedDecoder
+	Decode([]int64) (int, error)
+	DecodeSpaced([]int64, int, []byte, int64) (int, error)
+}
+
+// the int64EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type int64EncoderTraits struct{}
+
+// Encoder returns an encoder for int64 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}
+	case format.Encoding_DELTA_BINARY_PACKED:
+		return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{
+			encoder: newEncoderBase(e, descr, mem)}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// int64DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for int64 values
+type int64DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n int64 values.
+func (int64DecoderTraits) BytesRequired(n int) int {
+	return arrow.Int64Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for int64 typed data of the requested encoding type if available
+func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictInt64Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	case parquet.Encodings.DeltaBinaryPacked:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaBitPackInt64Decoder{
+			deltaBitPackDecoder: &deltaBitPackDecoder{
+				decoder: newDecoderBase(format.Encoding(e), descr),
+				mem:     mem,
+			}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictInt64Encoder is an encoder for int64 data using dictionary encoding
+type DictInt64Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictInt64Encoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictInt64Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Int64Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictInt64Encoder) Put(in []int64) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictInt64Decoder is a decoder for decoding dictionary encoded data for int64 columns
+type DictInt64Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictInt64Decoder) Type() parquet.Type {
+	return parquet.Types.Int64
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictInt64Decoder) Decode(out []int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Int64DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Int64DictConverter struct {
+	valueDecoder Int64Decoder
+	dict         []int64
+	zeroVal      int64
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Int64DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]int64, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Int64DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Int64DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]int64)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for int64
+func (dc *Int64DictConverter) FillZero(out interface{}) {
+	o := out.([]int64)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Int64DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]int64)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// Int96Encoder is the interface for all encoding types that implement encoding
+// parquet.Int96 values.
+type Int96Encoder interface {
+	TypedEncoder
+	Put([]parquet.Int96)
+	PutSpaced([]parquet.Int96, []byte, int64)
+}
+
+// Int96Decoder is the interface for all encoding types that implement decoding
+// parquet.Int96 values.
+type Int96Decoder interface {
+	TypedDecoder
+	Decode([]parquet.Int96) (int, error)
+	DecodeSpaced([]parquet.Int96, int, []byte, int64) (int, error)
+}
+
+// the int96EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type int96EncoderTraits struct{}
+
+// Encoder returns an encoder for int96 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+// dictionary encoding does not exist for this type and Encoder will panic if useDict is true
+func (int96EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainInt96Encoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// int96DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for int96 values
+type int96DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n int96 values.
+func (int96DecoderTraits) BytesRequired(n int) int {
+	return parquet.Int96Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for int96 typed data of the requested encoding type if available
+func (int96DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		panic("dictionary decoding unimplemented for int96")
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainInt96Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// Float32Encoder is the interface for all encoding types that implement encoding
+// float32 values.
+type Float32Encoder interface {
+	TypedEncoder
+	Put([]float32)
+	PutSpaced([]float32, []byte, int64)
+}
+
+// Float32Decoder is the interface for all encoding types that implement decoding
+// float32 values.
+type Float32Decoder interface {
+	TypedDecoder
+	Decode([]float32) (int, error)
+	DecodeSpaced([]float32, int, []byte, int64) (int, error)
+}
+
+// the float32EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type float32EncoderTraits struct{}
+
+// Encoder returns an encoder for float32 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (float32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainFloat32Encoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// float32DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for float32 values
+type float32DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n float32 values.
+func (float32DecoderTraits) BytesRequired(n int) int {
+	return arrow.Float32Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for float32 typed data of the requested encoding type if available
+func (float32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictFloat32Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainFloat32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictFloat32Encoder is an encoder for float32 data using dictionary encoding
+type DictFloat32Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictFloat32Encoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictFloat32Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Float32Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictFloat32Encoder) Put(in []float32) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictFloat32Encoder) PutSpaced(in []float32, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictFloat32Decoder is a decoder for decoding dictionary encoded data for float32 columns
+type DictFloat32Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictFloat32Decoder) Type() parquet.Type {
+	return parquet.Types.Float
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictFloat32Decoder) Decode(out []float32) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Float32DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Float32DictConverter struct {
+	valueDecoder Float32Decoder
+	dict         []float32
+	zeroVal      float32
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Float32DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]float32, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Float32DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Float32DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]float32)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for float32
+func (dc *Float32DictConverter) FillZero(out interface{}) {
+	o := out.([]float32)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Float32DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]float32)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// Float64Encoder is the interface for all encoding types that implement encoding
+// float64 values.
+type Float64Encoder interface {
+	TypedEncoder
+	Put([]float64)
+	PutSpaced([]float64, []byte, int64)
+}
+
+// Float64Decoder is the interface for all encoding types that implement decoding
+// float64 values.
+type Float64Decoder interface {
+	TypedDecoder
+	Decode([]float64) (int, error)
+	DecodeSpaced([]float64, int, []byte, int64) (int, error)
+}
+
+// the float64EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type float64EncoderTraits struct{}
+
+// Encoder returns an encoder for float64 type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (float64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainFloat64Encoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// float64DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for float64 values
+type float64DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n float64 values.
+func (float64DecoderTraits) BytesRequired(n int) int {
+	return arrow.Float64Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for float64 typed data of the requested encoding type if available
+func (float64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictFloat64Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainFloat64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictFloat64Encoder is an encoder for float64 data using dictionary encoding
+type DictFloat64Encoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictFloat64Encoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// WriteDict populates the byte slice with the dictionary index
+func (enc *DictFloat64Encoder) WriteDict(out []byte) {
+	enc.memo.CopyValues(arrow.Float64Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *DictFloat64Encoder) Put(in []float64) {
+	for _, val := range in {
+		enc.dictEncoder.Put(val)
+	}
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *DictFloat64Encoder) PutSpaced(in []float64, validBits []byte, validBitsOffset int64) {
+	utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+		for i := int64(0); i < length; i++ {
+			enc.dictEncoder.Put(in[i+pos])
+		}
+		return nil
+	})
+}
+
+// DictFloat64Decoder is a decoder for decoding dictionary encoded data for float64 columns
+type DictFloat64Decoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictFloat64Decoder) Type() parquet.Type {
+	return parquet.Types.Double
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictFloat64Decoder) Decode(out []float64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Float64DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type Float64DictConverter struct {
+	valueDecoder Float64Decoder
+	dict         []float64
+	zeroVal      float64
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *Float64DictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]float64, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *Float64DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *Float64DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]float64)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for float64
+func (dc *Float64DictConverter) FillZero(out interface{}) {
+	o := out.([]float64)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *Float64DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]float64)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// BooleanEncoder is the interface for all encoding types that implement encoding
+// bool values.
+type BooleanEncoder interface {
+	TypedEncoder
+	Put([]bool)
+	PutSpaced([]bool, []byte, int64)
+}
+
+// BooleanDecoder is the interface for all encoding types that implement decoding
+// bool values.
+type BooleanDecoder interface {
+	TypedDecoder
+	Decode([]bool) (int, error)
+	DecodeSpaced([]bool, int, []byte, int64) (int, error)
+}
+
+// the boolEncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type boolEncoderTraits struct{}
+
+// Encoder returns an encoder for bool type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+// dictionary encoding does not exist for this type and Encoder will panic if useDict is true
+func (boolEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainBooleanEncoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// boolDecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for bool values
+type boolDecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n bool values.
+func (boolDecoderTraits) BytesRequired(n int) int {
+	return arrow.BooleanTraits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for bool typed data of the requested encoding type if available
+func (boolDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		panic("dictionary decoding unimplemented for bool")
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainBooleanDecoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// ByteArrayEncoder is the interface for all encoding types that implement encoding
+// parquet.ByteArray values.
+type ByteArrayEncoder interface {
+	TypedEncoder
+	Put([]parquet.ByteArray)
+	PutSpaced([]parquet.ByteArray, []byte, int64)
+}
+
+// ByteArrayDecoder is the interface for all encoding types that implement decoding
+// parquet.ByteArray values.
+type ByteArrayDecoder interface {
+	TypedDecoder
+	Decode([]parquet.ByteArray) (int, error)
+	DecodeSpaced([]parquet.ByteArray, int, []byte, int64) (int, error)
+}
+
+// the byteArrayEncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type byteArrayEncoderTraits struct{}
+
+// Encoder returns an encoder for byteArray type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (byteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)}
+	case format.Encoding_DELTA_LENGTH_BYTE_ARRAY:
+		return &DeltaLengthByteArrayEncoder{
+			encoder: newEncoderBase(e, descr, mem),
+			lengthEncoder: &DeltaBitPackInt32Encoder{
+				&deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}},
+		}
+	case format.Encoding_DELTA_BYTE_ARRAY:
+		return &DeltaByteArrayEncoder{
+			encoder: newEncoderBase(e, descr, mem),
+		}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// byteArrayDecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for byteArray values
+type byteArrayDecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n byteArray values.
+func (byteArrayDecoderTraits) BytesRequired(n int) int {
+	return parquet.ByteArrayTraits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for byteArray typed data of the requested encoding type if available
+func (byteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictByteArrayDecoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	case parquet.Encodings.DeltaLengthByteArray:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaLengthByteArrayDecoder{
+			decoder: newDecoderBase(format.Encoding(e), descr),
+			mem:     mem,
+		}
+	case parquet.Encodings.DeltaByteArray:
+		if mem == nil {
+			mem = memory.DefaultAllocator
+		}
+		return &DeltaByteArrayDecoder{
+			DeltaLengthByteArrayDecoder: &DeltaLengthByteArrayDecoder{
+				decoder: newDecoderBase(format.Encoding(e), descr),
+				mem:     mem,
+			}}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictByteArrayEncoder is an encoder for parquet.ByteArray data using dictionary encoding
+type DictByteArrayEncoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// DictByteArrayDecoder is a decoder for decoding dictionary encoded data for parquet.ByteArray columns
+type DictByteArrayDecoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.ByteArray
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// ByteArrayDictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type ByteArrayDictConverter struct {
+	valueDecoder ByteArrayDecoder
+	dict         []parquet.ByteArray
+	zeroVal      parquet.ByteArray
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *ByteArrayDictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]parquet.ByteArray, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *ByteArrayDictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *ByteArrayDictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]parquet.ByteArray)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for parquet.ByteArray
+func (dc *ByteArrayDictConverter) FillZero(out interface{}) {
+	o := out.([]parquet.ByteArray)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *ByteArrayDictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]parquet.ByteArray)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// FixedLenByteArrayEncoder is the interface for all encoding types that implement encoding
+// parquet.FixedLenByteArray values.
+type FixedLenByteArrayEncoder interface {
+	TypedEncoder
+	Put([]parquet.FixedLenByteArray)
+	PutSpaced([]parquet.FixedLenByteArray, []byte, int64)
+}
+
+// FixedLenByteArrayDecoder is the interface for all encoding types that implement decoding
+// parquet.FixedLenByteArray values.
+type FixedLenByteArrayDecoder interface {
+	TypedDecoder
+	Decode([]parquet.FixedLenByteArray) (int, error)
+	DecodeSpaced([]parquet.FixedLenByteArray, int, []byte, int64) (int, error)
+}
+
+// the fixedLenByteArrayEncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type fixedLenByteArrayEncoderTraits struct{}
+
+// Encoder returns an encoder for fixedLenByteArray type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+func (fixedLenByteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+
+	switch e {
+	case format.Encoding_PLAIN:
+		return &PlainFixedLenByteArrayEncoder{encoder: newEncoderBase(e, descr, mem)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// fixedLenByteArrayDecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for fixedLenByteArray values
+type fixedLenByteArrayDecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n fixedLenByteArray values.
+func (fixedLenByteArrayDecoderTraits) BytesRequired(n int) int {
+	return parquet.FixedLenByteArrayTraits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for fixedLenByteArray typed data of the requested encoding type if available
+func (fixedLenByteArrayDecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+	if useDict {
+		return &DictFixedLenByteArrayDecoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+	}
+
+	switch e {
+	case parquet.Encodings.Plain:
+		return &PlainFixedLenByteArrayDecoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+	default:
+		panic("unimplemented encoding type")
+	}
+}
+
+// DictFixedLenByteArrayEncoder is an encoder for parquet.FixedLenByteArray data using dictionary encoding
+type DictFixedLenByteArrayEncoder struct {
+	dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *DictFixedLenByteArrayEncoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// DictFixedLenByteArrayDecoder is a decoder for decoding dictionary encoded data for parquet.FixedLenByteArray columns
+type DictFixedLenByteArrayDecoder struct {
+	dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (DictFixedLenByteArrayDecoder) Type() parquet.Type {
+	return parquet.Types.FixedLenByteArray
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decode(out[:vals])
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *DictFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+	vals := utils.MinInt(len(out), d.nvals)
+	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+	if err != nil {
+		return decoded, err
+	}
+	if vals != decoded {
+		return decoded, xerrors.New("parquet: dict spaced eof exception")
+	}
+	d.nvals -= vals
+	return vals, nil
+}
+
+// FixedLenByteArrayDictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type FixedLenByteArrayDictConverter struct {
+	valueDecoder FixedLenByteArrayDecoder
+	dict         []parquet.FixedLenByteArray
+	zeroVal      parquet.FixedLenByteArray
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *FixedLenByteArrayDictConverter) ensure(idx utils.IndexType) error {
+	if len(dc.dict) <= int(idx) {
+		if cap(dc.dict) <= int(idx) {
+			val := make([]parquet.FixedLenByteArray, int(idx+1)-len(dc.dict))
+			n, err := dc.valueDecoder.Decode(val)
+			if err != nil {
+				return err
+			}
+			dc.dict = append(dc.dict, val[:n]...)
+		} else {
+			cur := len(dc.dict)
+			n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+			if err != nil {
+				return err
+			}
+			dc.dict = dc.dict[:cur+n]
+		}
+	}
+	return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *FixedLenByteArrayDictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+	dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *FixedLenByteArrayDictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]parquet.FixedLenByteArray)
+	if err := dc.ensure(val); err != nil {
+		return err
+	}
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+	return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for parquet.FixedLenByteArray
+func (dc *FixedLenByteArrayDictConverter) FillZero(out interface{}) {
+	o := out.([]parquet.FixedLenByteArray)
+	o[0] = dc.zeroVal
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *FixedLenByteArrayDictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]parquet.FixedLenByteArray)
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+	return nil
+}
+
+// NewDictConverter creates a dict converter of the appropriate type, using the passed in
+// decoder as the decoder to decode the dictionary index.
+func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter {
+	switch dict.Type() {
+	case parquet.Types.Int32:
+		return &Int32DictConverter{valueDecoder: dict.(Int32Decoder), dict: make([]int32, 0, dict.ValuesLeft())}
+	case parquet.Types.Int64:
+		return &Int64DictConverter{valueDecoder: dict.(Int64Decoder), dict: make([]int64, 0, dict.ValuesLeft())}
+	case parquet.Types.Float:
+		return &Float32DictConverter{valueDecoder: dict.(Float32Decoder), dict: make([]float32, 0, dict.ValuesLeft())}
+	case parquet.Types.Double:
+		return &Float64DictConverter{valueDecoder: dict.(Float64Decoder), dict: make([]float64, 0, dict.ValuesLeft())}
+	case parquet.Types.ByteArray:
+		return &ByteArrayDictConverter{valueDecoder: dict.(ByteArrayDecoder), dict: make([]parquet.ByteArray, 0, dict.ValuesLeft())}
+	case parquet.Types.FixedLenByteArray:
+		return &FixedLenByteArrayDictConverter{valueDecoder: dict.(FixedLenByteArrayDecoder), dict: make([]parquet.FixedLenByteArray, 0, dict.ValuesLeft())}
+	default:
+		return nil
+	}
+}
+
+// helper function to get encoding traits object for the physical type indicated
+func getEncodingTraits(t parquet.Type) EncoderTraits {
+	switch t {
+	case parquet.Types.Int32:
+		return Int32EncoderTraits
+	case parquet.Types.Int64:
+		return Int64EncoderTraits
+	case parquet.Types.Int96:
+		return Int96EncoderTraits
+	case parquet.Types.Float:
+		return Float32EncoderTraits
+	case parquet.Types.Double:
+		return Float64EncoderTraits
+	case parquet.Types.Boolean:
+		return BooleanEncoderTraits
+	case parquet.Types.ByteArray:
+		return ByteArrayEncoderTraits
+	case parquet.Types.FixedLenByteArray:
+		return FixedLenByteArrayEncoderTraits
+	default:
+		return nil
+	}
+}
+
+// helper function to get decoding traits object for the physical type indicated
+func getDecodingTraits(t parquet.Type) DecoderTraits {
+	switch t {
+	case parquet.Types.Int32:
+		return Int32DecoderTraits
+	case parquet.Types.Int64:
+		return Int64DecoderTraits
+	case parquet.Types.Int96:
+		return Int96DecoderTraits
+	case parquet.Types.Float:
+		return Float32DecoderTraits
+	case parquet.Types.Double:
+		return Float64DecoderTraits
+	case parquet.Types.Boolean:
+		return BooleanDecoderTraits
+	case parquet.Types.ByteArray:
+		return ByteArrayDecoderTraits
+	case parquet.Types.FixedLenByteArray:
+		return FixedLenByteArrayDecoderTraits
+	default:
+		return nil
+	}
+}
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
new file mode 100644
index 00000000000..509266b6878
--- /dev/null
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
@@ -0,0 +1,341 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+  "github.com/apache/arrow/go/parquet"
+  "github.com/apache/arrow/go/parquet/schema"
+  format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+  "github.com/apache/arrow/go/arrow"
+  "github.com/apache/arrow/go/parquet/internal/utils"
+)
+
+// fully typed encoder interfaces to enable writing against encoder/decoders
+// without having to care about what encoding type is actually being used.
+
+var (
+{{range .In}}
+  {{.Name}}EncoderTraits {{.lower}}EncoderTraits
+  {{.Name}}DecoderTraits {{.lower}}DecoderTraits
+{{- end}}
+)
+
+{{range .In}}
+// {{.Name}}Encoder is the interface for all encoding types that implement encoding
+// {{.name}} values.
+type {{.Name}}Encoder interface {
+  TypedEncoder
+  Put([]{{.name}})
+  PutSpaced([]{{.name}}, []byte, int64)
+}
+
+// {{.Name}}Decoder is the interface for all encoding types that implement decoding
+// {{.name}} values.
+type {{.Name}}Decoder interface {
+  TypedDecoder
+  Decode([]{{.name}}) (int, error)
+  DecodeSpaced([]{{.name}}, int, []byte, int64) (int, error)
+}
+
+// the {{.lower}}EncoderTraits struct is used to make it easy to create encoders and decoders based on type
+type {{.lower}}EncoderTraits struct{}
+
+// Encoder returns an encoder for {{.lower}} type data, using the specified encoding type and whether or not
+// it should be dictionary encoded.
+{{- if or (eq .Name "Boolean") (eq .Name "Int96")}}
+// dictionary encoding does not exist for this type and Encoder will panic if useDict is true
+{{- end }}
+func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder {
+  {{/* if useDict {
+{{- if or (eq .Name "Boolean") (eq .Name "Int96")}}
+    panic("parquet: no {{.name}} dictionary encoding")
+{{- else}}
+    return &Dict{{.Name}}Encoder{newDictEncoderBase(descr, New{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}{{.Name}}Dictionary(){{else}}BinaryDictionary(mem){{end}}, mem)}
+{{- end}}
+  } */}}
+
+  switch e {
+  case format.Encoding_PLAIN:
+    return &Plain{{.Name}}Encoder{encoder: newEncoderBase(e, descr, mem)}
+{{- if or (eq .Name "Int32") (eq .Name "Int64")}}
+  case format.Encoding_DELTA_BINARY_PACKED:
+    return DeltaBitPack{{.Name}}Encoder{&deltaBitPackEncoder{
+      encoder: newEncoderBase(e, descr, mem)}}
+{{- end}}
+{{- if eq .Name "ByteArray"}}
+  case format.Encoding_DELTA_LENGTH_BYTE_ARRAY:
+    return &DeltaLengthByteArrayEncoder{
+      encoder: newEncoderBase(e, descr, mem),
+      lengthEncoder: &DeltaBitPackInt32Encoder{
+        &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}},
+    }
+  case format.Encoding_DELTA_BYTE_ARRAY:
+    return &DeltaByteArrayEncoder{
+      encoder: newEncoderBase(e, descr, mem),
+    }
+{{- end}}
+  default:
+    panic("unimplemented encoding type")
+  }
+}
+
+// {{.lower}}DecoderTraits is a helper struct for providing information regardless of the type
+// and used as a generic way to create a Decoder or Dictionary Decoder for {{.lower}} values
+type {{.lower}}DecoderTraits struct{}
+
+// BytesRequired returns the number of bytes required to store n {{.lower}} values.
+func ({{.lower}}DecoderTraits) BytesRequired(n int) int {
+  return {{.prefix}}.{{.Name}}Traits.BytesRequired(n)
+}
+
+// Decoder returns a decoder for {{.lower}} typed data of the requested encoding type if available
+func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder {
+  if useDict {
+{{- if and (ne .Name "Boolean") (ne .Name "Int96")}}
+    return &Dict{{.Name}}Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}}
+{{- else}}
+    panic("dictionary decoding unimplemented for {{.lower}}")
+{{- end}}
+  }
+
+  switch e {
+  case parquet.Encodings.Plain:
+    return &Plain{{.Name}}Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
+{{- if or (eq .Name "Int32") (eq .Name "Int64")}}
+  case parquet.Encodings.DeltaBinaryPacked:
+    if mem == nil {
+      mem = memory.DefaultAllocator
+    }
+    return &DeltaBitPack{{.Name}}Decoder{
+      deltaBitPackDecoder: &deltaBitPackDecoder{
+        decoder: newDecoderBase(format.Encoding(e), descr),
+        mem:     mem,
+      }}
+{{- end}}
+{{- if eq .Name "ByteArray"}}
+  case parquet.Encodings.DeltaLengthByteArray:
+    if mem == nil {
+      mem = memory.DefaultAllocator
+    }
+    return &DeltaLengthByteArrayDecoder{
+      decoder: newDecoderBase(format.Encoding(e), descr),
+      mem: mem,
+    }
+  case parquet.Encodings.DeltaByteArray:
+    if mem == nil {
+      mem = memory.DefaultAllocator
+    }
+    return &DeltaByteArrayDecoder{
+      DeltaLengthByteArrayDecoder: &DeltaLengthByteArrayDecoder{
+        decoder: newDecoderBase(format.Encoding(e), descr),
+        mem: mem,
+      }}
+{{- end}}
+  default:
+    panic("unimplemented encoding type")
+  }
+}
+
+{{if and (ne .Name "Boolean") (ne .Name "Int96")}}
+// Dict{{.Name}}Encoder is an encoder for {{.name}} data using dictionary encoding
+type Dict{{.Name}}Encoder struct {
+  dictEncoder
+}
+
+// Type returns the underlying physical type that can be encoded with this encoder
+func (enc *Dict{{.Name}}Encoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}
+// WriteDict populates the byte slice with the dictionary index
+func (enc *Dict{{.Name}}Encoder) WriteDict(out []byte) {
+  enc.memo.CopyValues({{.prefix}}.{{.Name}}Traits.CastFromBytes(out))
+}
+
+// Put encodes the values passed in, adding to the index as needed.
+func (enc *Dict{{.Name}}Encoder) Put(in []{{.name}}) {
+  for _, val := range in {
+    enc.dictEncoder.Put(val)
+  }
+}
+
+// PutSpaced is the same as Put but for when the data being encoded has slots open for
+// null values, using the bitmap provided to skip values as needed.
+func (enc *Dict{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, validBitsOffset int64) {
+  utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error {
+    for i := int64(0); i < length; i++ {
+      enc.dictEncoder.Put(in[i+pos])
+    }
+    return nil
+  })
+}
+{{end}}
+
+// Dict{{.Name}}Decoder is a decoder for decoding dictionary encoded data for {{.name}} columns
+type Dict{{.Name}}Decoder struct {
+  dictDecoder
+}
+
+// Type returns the underlying physical type that can be decoded with this decoder
+func (Dict{{.Name}}Decoder) Type() parquet.Type {
+  return parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}
+}
+
+// Decode populates the passed in slice with min(len(out), remaining values) values,
+// decoding using hte dictionary to get the actual values. Returns the number of values
+// actually decoded and any error encountered.
+func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
+  vals := utils.MinInt(len(out), d.nvals)
+  decoded, err := d.decode(out[:vals])
+  if err != nil {
+    return decoded, err
+  }
+  if vals != decoded {
+    return decoded, xerrors.New("parquet: dict eof exception")
+  }
+  d.nvals -= vals
+  return vals, nil
+}
+
+// Decode spaced is like Decode but will space out the data leaving slots for null values
+// based on the provided bitmap.
+func (d *Dict{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+  vals := utils.MinInt(len(out), d.nvals)
+  decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
+  if err != nil {
+    return decoded, err
+  }
+  if vals != decoded {
+    return decoded, xerrors.New("parquet: dict spaced eof exception")
+  }
+  d.nvals -= vals
+  return vals, nil
+}
+
+// {{.Name}}DictConverter is a helper for dictionary handling which is used for converting
+// run length encoded indexes into the actual values that are stored in the dictionary index page.
+type {{.Name}}DictConverter struct {
+  valueDecoder {{.Name}}Decoder
+  dict []{{.name}}
+  zeroVal {{.name}}
+}
+
+// ensure validates that we've decoded dictionary values up to the index
+// provided so that we don't need to decode the entire dictionary at start.
+func (dc *{{.Name}}DictConverter) ensure(idx utils.IndexType) error {
+  if len(dc.dict) <= int(idx) {
+    if cap(dc.dict) <= int(idx) {
+      val := make([]{{.name}}, int(idx+1)-len(dc.dict))
+      n, err := dc.valueDecoder.Decode(val)
+      if err != nil {
+        return err
+      }
+      dc.dict = append(dc.dict, val[:n]...)
+    } else {
+      cur := len(dc.dict)
+      n, err := dc.valueDecoder.Decode(dc.dict[cur : idx+1])
+      if err != nil {
+        return err
+      }
+      dc.dict = dc.dict[:cur+n]
+    }
+  }
+  return nil
+}
+
+// IsValid verifies that the set of indexes passed in are all valid indexes
+// in the dictionary and if necessary decodes dictionary indexes up to the index
+// requested.
+func (dc *{{.Name}}DictConverter) IsValid(idxes ...utils.IndexType) bool {
+	min, max := utils.GetMinMaxInt32(*(*[]int32)(unsafe.Pointer(&idxes)))
+  dc.ensure(utils.IndexType(max))
+
+	return min >= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict)
+}
+
+// Fill populates the slice passed in entirely with the value at dictionary index indicated by val
+func (dc *{{.Name}}DictConverter) Fill(out interface{}, val utils.IndexType) error {
+	o := out.([]{{.name}})
+	if err := dc.ensure(val); err != nil {
+    return err
+  }
+	o[0] = dc.dict[val]
+	for i := 1; i < len(o); i *= 2 {
+		copy(o[i:], o[:i])
+	}
+  return nil
+}
+
+// FillZero populates the entire slice of out with the zero value for {{.name}}
+func (dc *{{.Name}}DictConverter) FillZero(out interface{}) {
+  o := out.([]{{.name}})
+  o[0] = dc.zeroVal
+  for i := 1; i < len(o); i *= 2 {
+    copy(o[i:], o[:i])
+  }
+}
+
+// Copy populates the slice provided with the values in the dictionary at the indexes
+// in the vals slice.
+func (dc *{{.Name}}DictConverter) Copy(out interface{}, vals []utils.IndexType) error {
+	o := out.([]{{.name}})
+	for idx, val := range vals {
+		o[idx] = dc.dict[val]
+	}
+  return nil
+}
+{{end}}
+
+{{end}}
+
+// NewDictConverter creates a dict converter of the appropriate type, using the passed in
+// decoder as the decoder to decode the dictionary index.
+func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter {
+  switch dict.Type() {
+  {{ range .In }}{{ if and (ne .Name "Boolean") (ne .Name "Int96") -}}
+  case parquet.Types.{{if .physical }}{{.physical}}{{else}}{{.Name}}{{end}}:
+    return &{{.Name}}DictConverter{valueDecoder: dict.({{.Name}}Decoder), dict: make([]{{.name}}, 0, dict.ValuesLeft())}
+  {{ end }}{{ end -}}
+  default:
+    return nil
+  }
+}
+
+// helper function to get encoding traits object for the physical type indicated
+func getEncodingTraits(t parquet.Type) EncoderTraits {
+  switch t {
+  {{ range .In -}}
+  case parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}:
+    return {{.Name}}EncoderTraits
+  {{ end -}}
+  default:
+    return nil
+  }
+}
+
+// helper function to get decoding traits object for the physical type indicated
+func getDecodingTraits(t parquet.Type) DecoderTraits {
+  switch t {
+  {{ range .In -}}
+  case parquet.Types.{{if .physical}}{{.physical}}{{else}}{{.Name}}{{end}}:
+    return {{.Name}}DecoderTraits
+  {{ end -}}
+  default:
+    return nil
+  }
+}
diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go
new file mode 100644
index 00000000000..fa3661e1119
--- /dev/null
+++ b/go/parquet/internal/encoding/types.go
@@ -0,0 +1,497 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"io"
+	"sync"
+
+	"github.com/apache/arrow/go/arrow/bitutil"
+	"github.com/apache/arrow/go/arrow/memory"
+	"github.com/apache/arrow/go/parquet"
+	"github.com/apache/arrow/go/parquet/internal/utils"
+	"golang.org/x/xerrors"
+)
+
+// TypedDecoder is the general interface for all decoder types which can
+// then be type asserted to a specific Type Decoder
+type TypedDecoder interface {
+	// SetData updates the data in the decoder with the passed in byte slice and the
+	// stated number of values as expected to be decoded.
+	SetData(buffered int, buf []byte) error
+	// Encoding returns the encoding type that this decoder decodes data of
+	Encoding() parquet.Encoding
+	// ValuesLeft returns the number of remaining values to be decoded
+	ValuesLeft() int
+	// Type returns the physical type this can decode.
+	Type() parquet.Type
+}
+
+// DictDecoder is a special TypedDecoder which implements dictionary decoding
+type DictDecoder interface {
+	TypedDecoder
+	// SetDict takes in a decoder which can decode the dictionary index to be used
+	SetDict(TypedDecoder)
+}
+
+// TypedEncoder is the general interface for all encoding types which
+// can then be type asserted to a specific Type Encoder
+type TypedEncoder interface {
+	// Bytes returns the current slice of bytes that have been encoded but does not pass ownership
+	Bytes() []byte
+	// Reset resets the encoder and dumps all the data to let it be reused.
+	Reset()
+	// ReserveForWrite reserves n bytes in the buffer so that the next n bytes written will not
+	// cause a memory allocation.
+	ReserveForWrite(n int)
+	// EstimatedDataEncodedSize returns the estimated number of bytes in the buffer
+	// so far.
+	EstimatedDataEncodedSize() int64
+	// FlushValues finishes up any unwritten data and returns the buffer of data passing
+	// ownership to the caller, Release needs to be called on the Buffer to free the memory
+	FlushValues() Buffer
+	// Encoding returns the type of encoding that this encoder operates with
+	Encoding() parquet.Encoding
+	// Allocator returns the allocator that was used when creating this encoder
+	Allocator() memory.Allocator
+	// Type returns the underlying physical type this encodes.
+	Type() parquet.Type
+}
+
+// DictEncoder is a special kind of TypedEncoder which implements Dictionary
+// encoding.
+type DictEncoder interface {
+	TypedEncoder
+	// WriteIndices populates the byte slice with the final indexes of data and returns
+	// the number of bytes written
+	WriteIndices(out []byte) int
+	// DictEncodedSize returns the current size of the encoded dictionary index.
+	DictEncodedSize() int
+	// BitWidth returns the bitwidth needed to encode all of the index values based
+	// on the number of values in the dictionary index.
+	BitWidth() int
+	// WriteDict populates out with the dictionary index values, out should be sized to at least
+	// as many bytes as DictEncodedSize
+	WriteDict(out []byte)
+	// NumEntries returns the number of values currently in the dictionary index.
+	NumEntries() int
+}
+
+var bufferPool = sync.Pool{
+	New: func() interface{} {
+		return memory.NewResizableBuffer(memory.DefaultAllocator)
+	},
+}
+
+// Buffer is an interface used as a general interface for handling buffers
+// regardless of the underlying implementation.
+type Buffer interface {
+	Len() int
+	Buf() []byte
+	Bytes() []byte
+	Resize(int)
+	Release()
+}
+
+// poolBuffer is a buffer that will release the allocated buffer to a pool
+// of buffers when release is called in order to allow it to be reused to
+// cut down on the number of allocations.
+type poolBuffer struct {
+	buf *memory.Buffer
+}
+
+func (p poolBuffer) Resize(n int) { p.buf.ResizeNoShrink(n) }
+
+func (p poolBuffer) Len() int { return p.buf.Len() }
+
+func (p poolBuffer) Bytes() []byte { return p.buf.Bytes() }
+
+func (p poolBuffer) Buf() []byte { return p.buf.Buf() }
+
+func (p poolBuffer) Release() {
+	if p.buf.Mutable() {
+		memory.Set(p.buf.Buf(), 0)
+		p.buf.ResizeNoShrink(0)
+		bufferPool.Put(p.buf)
+		return
+	}
+
+	p.buf.Release()
+}
+
+// PooledBufferWriter uses buffers from the buffer pool to back it while
+// implementing io.Writer and io.WriterAt interfaces
+type PooledBufferWriter struct {
+	buf    *memory.Buffer
+	pos    int
+	offset int
+}
+
+// NewPooledBufferWriter returns a new buffer with 'initial' bytes reserved
+// and pre-allocated to guarantee that writing that many more bytes will not
+// require another allocation.
+func NewPooledBufferWriter(initial int) *PooledBufferWriter {
+	ret := &PooledBufferWriter{}
+	ret.Reserve(initial)
+	return ret
+}
+
+// SetOffset sets an offset in the buffer which will ensure that all references
+// to offsets and sizes in the buffer will be offset by this many bytes, allowing
+// the writer to reserve space in the buffer.
+func (b *PooledBufferWriter) SetOffset(offset int) {
+	b.pos -= b.offset
+	b.offset = offset
+	b.pos += offset
+}
+
+// Reserve pre-allocates nbytes to ensure that the next write of that many bytes
+// will not require another allocation.
+func (b *PooledBufferWriter) Reserve(nbytes int) {
+	if b.buf == nil {
+		b.buf = bufferPool.Get().(*memory.Buffer)
+	}
+
+	newCap := utils.MaxInt(b.buf.Cap()+b.offset, 256)
+	for newCap < b.pos+nbytes {
+		newCap = bitutil.NextPowerOf2(newCap)
+	}
+	b.buf.Reserve(newCap)
+}
+
+// Reset will release any current memory and initialize it with the new
+// allocated bytes.
+func (b *PooledBufferWriter) Reset(initial int) {
+	if b.buf != nil {
+		memory.Set(b.buf.Buf(), 0)
+		b.buf.ResizeNoShrink(0)
+		bufferPool.Put(b.buf)
+		b.buf = nil
+	}
+
+	b.pos = 0
+	b.offset = 0
+	b.Reserve(initial)
+}
+
+// Finish returns the current buffer, with the responsibility for releasing
+// the memory on the caller, resetting this writer to be re-used
+func (b *PooledBufferWriter) Finish() Buffer {
+	if b.buf.Len() < b.pos {
+		b.buf.ResizeNoShrink(b.pos)
+	}
+	buf := poolBuffer{b.buf}
+
+	b.buf = nil
+	b.Reset(0)
+	return buf
+}
+
+// WriteAt writes the bytes from p into this buffer starting at offset.
+//
+// Does not affect the internal position of the writer.
+func (b *PooledBufferWriter) WriteAt(p []byte, offset int64) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	offset += int64(b.offset)
+	need := int(offset) + len(p)
+
+	if need >= b.buf.Cap() {
+		b.Reserve(need - b.pos)
+	}
+	n = copy(b.buf.Buf()[offset:], p)
+
+	if need > b.buf.Len() {
+		b.buf.ResizeNoShrink(need)
+	}
+	return
+}
+
+func (b *PooledBufferWriter) Write(buf []byte) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	b.Reserve(len(buf))
+	return b.UnsafeWrite(buf)
+}
+
+func (b *PooledBufferWriter) UnsafeWriteCopy(ncopies int, pattern []byte) (int, error) {
+	nbytes := len(pattern) * ncopies
+	slc := b.buf.Buf()[b.pos : b.pos+nbytes]
+	copy(slc, pattern)
+	for j := len(pattern); j < len(slc); j *= 2 {
+		copy(slc[j:], slc[:j])
+	}
+	b.pos += nbytes
+	return nbytes, nil
+}
+
+// UnsafeWrite does not check the capacity / length before writing.
+func (b *PooledBufferWriter) UnsafeWrite(buf []byte) (n int, err error) {
+	n = copy(b.buf.Buf()[b.pos:], buf)
+	b.pos += n
+	return
+}
+
+func (b *PooledBufferWriter) Tell() int64 {
+	return int64(b.pos)
+}
+
+// Bytes returns the current bytes slice of slice Len
+func (b *PooledBufferWriter) Bytes() []byte {
+	if b.buf.Len() < b.pos {
+		b.buf.ResizeNoShrink(b.pos)
+	}
+	return b.buf.Bytes()[b.offset:]
+}
+
+// Len provides the current Length of the byte slice
+func (b *PooledBufferWriter) Len() int {
+	if b.buf.Len() < b.pos {
+		b.buf.ResizeNoShrink(b.pos)
+	}
+	return b.buf.Len() - b.offset
+}
+
+// BufferWriter is a utility class for building and writing to a memory.Buffer
+// with a given allocator that fulfills the interfaces io.Write, io.WriteAt
+// and io.Seeker, while providing the ability to pre-allocate memory.
+type BufferWriter struct {
+	buffer *memory.Buffer
+	pos    int
+	mem    memory.Allocator
+
+	offset int
+}
+
+// NewBufferWriterFromBuffer wraps the provided buffer to allow it to fulfill these
+// interfaces.
+func NewBufferWriterFromBuffer(b *memory.Buffer, mem memory.Allocator) *BufferWriter {
+	return &BufferWriter{b, 0, mem, 0}
+}
+
+// NewBufferWriter constructs a buffer with initially reserved/allocated memory.
+func NewBufferWriter(initial int, mem memory.Allocator) *BufferWriter {
+	buf := memory.NewResizableBuffer(mem)
+	buf.Reserve(initial)
+	return &BufferWriter{buffer: buf, mem: mem}
+}
+
+func (b *BufferWriter) SetOffset(offset int) {
+	b.offset = offset
+}
+
+// Bytes returns the current bytes slice of slice Len
+func (b *BufferWriter) Bytes() []byte {
+	return b.buffer.Bytes()[b.offset:]
+}
+
+// Len provides the current Length of the byte slice
+func (b *BufferWriter) Len() int {
+	return b.buffer.Len() - b.offset
+}
+
+// Cap returns the current capacity of the underlying buffer
+func (b *BufferWriter) Cap() int {
+	return b.buffer.Cap() - b.offset
+}
+
+// Finish returns the current buffer, with the responsibility for releasing
+// the memory on the caller, resetting this writer to be re-used
+func (b *BufferWriter) Finish() *memory.Buffer {
+	buf := b.buffer
+	b.buffer = nil
+	b.Reset(0)
+	return buf
+}
+
+func (b *BufferWriter) Truncate() {
+	b.pos = 0
+	b.offset = 0
+
+	if b.buffer == nil {
+		b.Reserve(1024)
+	} else {
+		b.buffer.ResizeNoShrink(0)
+	}
+}
+
+// Reset will release any current memory and initialize it with the new
+// allocated bytes.
+func (b *BufferWriter) Reset(initial int) {
+	if b.buffer != nil {
+		b.buffer.Release()
+	}
+
+	b.pos = 0
+	b.offset = 0
+	b.Reserve(initial)
+}
+
+// Reserve ensures that there is at least enough capacity to write nbytes
+// without another allocation, may allocate more than that in order to
+// efficiently reduce allocations
+func (b *BufferWriter) Reserve(nbytes int) {
+	if b.buffer == nil {
+		b.buffer = memory.NewResizableBuffer(b.mem)
+	}
+	newCap := utils.MaxInt(b.buffer.Cap()+b.offset, 256)
+	for newCap < b.pos+nbytes+b.offset {
+		newCap = bitutil.NextPowerOf2(newCap)
+	}
+	b.buffer.Reserve(newCap)
+}
+
+// WriteAt writes the bytes from p into this buffer starting at offset.
+//
+// Does not affect the internal position of the writer.
+func (b *BufferWriter) WriteAt(p []byte, offset int64) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	offset += int64(b.offset)
+	need := int(offset) + len(p)
+
+	if need >= b.buffer.Cap() {
+		b.Reserve(need - b.pos)
+	}
+	copy(b.buffer.Buf()[offset:], p)
+
+	if need > b.buffer.Len() {
+		b.buffer.ResizeNoShrink(need)
+	}
+	return len(p), nil
+}
+
+func (b *BufferWriter) Write(buf []byte) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	if b.buffer == nil {
+		b.Reserve(len(buf))
+	}
+
+	if b.pos+b.offset+len(buf) >= b.buffer.Cap() {
+		b.Reserve(len(buf))
+	}
+	return b.UnsafeWrite(buf)
+}
+
+func (b *BufferWriter) UnsafeWriteCopy(ncopies int, pattern []byte) (int, error) {
+	nbytes := len(pattern) * ncopies
+	slc := b.buffer.Buf()[b.pos : b.pos+nbytes]
+	copy(slc, pattern)
+	for j := len(pattern); j < len(slc); j *= 2 {
+		copy(slc[j:], slc[:j])
+	}
+	b.pos += nbytes
+	b.buffer.ResizeNoShrink(b.pos)
+	return nbytes, nil
+}
+
+// UnsafeWrite does not check the capacity / length before writing.
+func (b *BufferWriter) UnsafeWrite(buf []byte) (int, error) {
+	copy(b.buffer.Buf()[b.pos+b.offset:], buf)
+	b.pos += len(buf)
+	b.buffer.ResizeNoShrink(b.pos)
+	return len(buf), nil
+}
+
+// Seek fulfills the io.Seeker interface returning it's new position
+// whence must be io.SeekStart, io.SeekCurrent or io.SeekEnd or it will be ignored.
+func (b *BufferWriter) Seek(offset int64, whence int) (int64, error) {
+	newPos, offs := 0, int(offset)
+	offs += b.offset
+	switch whence {
+	case io.SeekStart:
+		newPos = offs
+	case io.SeekCurrent:
+		newPos = b.pos + offs
+	case io.SeekEnd:
+		newPos = b.buffer.Len() + offs
+	}
+	if newPos < 0 {
+		return 0, xerrors.New("negative result pos")
+	}
+	b.pos = newPos
+	return int64(newPos), nil
+}
+
+func (b *BufferWriter) Tell() int64 {
+	return int64(b.pos)
+}
+
+// MemoTable interface that can be used to swap out implementations of the hash table
+// used for handling dictionary encoding. Dictionary encoding is built against this interface
+// to make it easy for code generation and changing implementations.
+//
+// Values should remember the order they are inserted to generate a valid dictionary index
+type MemoTable interface {
+	// Reset drops everything in the table allowing it to be reused
+	Reset()
+	// Size returns the current number of unique values stored in the table
+	// including whether or not a null value has been passed in using GetOrInsertNull
+	Size() int
+	// CopyValues populates out with the values currently in the table, out must
+	// be a slice of the appropriate type for the table type.
+	CopyValues(out interface{})
+	// CopyValuesSubset is like CopyValues but only copies a subset of values starting
+	// at the indicated index.
+	CopyValuesSubset(start int, out interface{})
+	// Get returns the index of the table the specified value is, and a boolean indicating
+	// whether or not the value was found in the table. Will panic if val is not the appropriate
+	// type for the underlying table.
+	Get(val interface{}) (int, bool)
+	// GetOrInsert is the same as Get, except if the value is not currently in the table it will
+	// be inserted into the table.
+	GetOrInsert(val interface{}) (idx int, existed bool, err error)
+	// GetNull returns the index of the null value and whether or not it was found in the table
+	GetNull() (int, bool)
+	// GetOrInsertNull returns the index of the null value, if it didn't already exist in the table,
+	// it is inserted.
+	GetOrInsertNull() (idx int, existed bool)
+}
+
+// BinaryMemoTable is an extension of the MemoTable interface adding extra methods
+// for handling byte arrays/strings/fixed length byte arrays.
+type BinaryMemoTable interface {
+	MemoTable
+	// ValuesSize returns the total number of bytes needed to copy all of the values
+	// from this table.
+	ValuesSize() int
+	// CopyOffsets populates out with the start and end offsets of each value in the
+	// table data. Out should be sized to Size()+1 to accomodate all of the offsets.
+	CopyOffsets(out []int8)
+	// CopyOffsetsSubset is like CopyOffsets but only gets a subset of the offsets
+	// starting at the specified index.
+	CopyOffsetsSubset(start int, out []int8)
+	// CopyFixedWidthValues exists to cope with the fact that the table doesn't track
+	// the fixed width when inserting the null value into the databuffer populating
+	// a zero length byte slice for the null value (if found).
+	CopyFixedWidthValues(start int, width int, out []byte)
+	// VisitValues calls visitFn on each value in the table starting with the index specified
+	VisitValues(start int, visitFn func([]byte))
+	// Retain increases the reference count of the separately stored binary data that is
+	// kept alongside the table which contains all of the values in the table. This is
+	// safe to call simultaneously across multiple goroutines.
+	Retain()
+	// Release decreases the reference count by 1 of the separately stored binary data
+	// kept alongside the table containing the values. When the reference count goes to
+	// 0, the memory is freed. This is safe to call across multiple goroutines simultaneoulsy.
+	Release()
+}
diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go
new file mode 100644
index 00000000000..503c60044ab
--- /dev/null
+++ b/go/parquet/internal/testutils/utils.go
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testutils
+
+import (
+	"reflect"
+
+	"github.com/apache/arrow/go/parquet"
+)
+
+var typeToParquetTypeMap = map[reflect.Type]parquet.Type{
+	reflect.TypeOf(true):                        parquet.Types.Boolean,
+	reflect.TypeOf(int32(0)):                    parquet.Types.Int32,
+	reflect.TypeOf(int64(0)):                    parquet.Types.Int64,
+	reflect.TypeOf(float32(0)):                  parquet.Types.Float,
+	reflect.TypeOf(float64(0)):                  parquet.Types.Double,
+	reflect.TypeOf(parquet.ByteArray{}):         parquet.Types.ByteArray,
+	reflect.TypeOf(parquet.Int96{}):             parquet.Types.Int96,
+	reflect.TypeOf(parquet.FixedLenByteArray{}): parquet.Types.FixedLenByteArray,
+}
+
+func TypeToParquetType(typ reflect.Type) parquet.Type {
+	ret, ok := typeToParquetTypeMap[typ]
+	if !ok {
+		panic("invalid type for parquet type")
+	}
+	return ret
+}
diff --git a/go/parquet/internal/utils/Makefile b/go/parquet/internal/utils/Makefile
index 1de4308dc55..41cc68df5bc 100644
--- a/go/parquet/internal/utils/Makefile
+++ b/go/parquet/internal/utils/Makefile
@@ -18,10 +18,10 @@
 PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/'
 
 C2GOASM=c2goasm
-CC=clang
+CC=clang-11
 C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \
 				-fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib
-ASM_FLAGS_AVX2=-mavx2 -mfma -mllvm -force-vector-width=32
+ASM_FLAGS_AVX2=-mavx2 -mfma
 ASM_FLAGS_SSE4=-msse4
 ASM_FLAGS_BMI2=-mbmi2
 ASM_FLAGS_POPCNT=-mpopcnt
diff --git a/go/parquet/internal/utils/_lib/bit_packing_avx2.s b/go/parquet/internal/utils/_lib/bit_packing_avx2.s
index 222bc3ce413..84a5cca2ea3 100644
--- a/go/parquet/internal/utils/_lib/bit_packing_avx2.s
+++ b/go/parquet/internal/utils/_lib/bit_packing_avx2.s
@@ -4007,6 +4007,6 @@ unpack32_avx2:                          # @unpack32_avx2
 .Lfunc_end0:
 	.size	unpack32_avx2, .Lfunc_end0-unpack32_avx2
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/min_max_avx2.s b/go/parquet/internal/utils/_lib/min_max_avx2.s
index dbf9a895ae3..ec24a731d69 100644
--- a/go/parquet/internal/utils/_lib/min_max_avx2.s
+++ b/go/parquet/internal/utils/_lib/min_max_avx2.s
@@ -15,173 +15,89 @@ int32_max_min_avx2:                     # @int32_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 64
+	and	rsp, -8
 	test	esi, esi
 	jle	.LBB0_1
 # %bb.2:
 	mov	r8d, esi
 	cmp	esi, 31
-	ja	.LBB0_6
+	ja	.LBB0_4
 # %bb.3:
-	mov	eax, -2147483648
-	mov	r9d, 2147483647
-	xor	r11d, r11d
-	jmp	.LBB0_4
+	mov	r10d, -2147483648
+	mov	eax, 2147483647
+	xor	r9d, r9d
+	jmp	.LBB0_7
 .LBB0_1:
-	mov	r9d, 2147483647
-	mov	eax, -2147483648
-	jmp	.LBB0_14
-.LBB0_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB0_8
-# %bb.7:
-	vpbroadcastd	ymm0, dword ptr [rip + .LCPI0_0] # ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
-	vpbroadcastd	ymm1, dword ptr [rip + .LCPI0_1] # ymm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
-	xor	eax, eax
-	vmovdqa	ymm2, ymm1
-	vmovdqa	ymm4, ymm1
-	vmovdqa	ymm6, ymm1
-	vmovdqa	ymm3, ymm0
-	vmovdqa	ymm5, ymm0
-	vmovdqa	ymm7, ymm0
-	jmp	.LBB0_10
-.LBB0_8:
-	and	r10, -4
-	vpbroadcastd	ymm0, dword ptr [rip + .LCPI0_0] # ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
-	neg	r10
-	vpbroadcastd	ymm1, dword ptr [rip + .LCPI0_1] # ymm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
+	mov	eax, 2147483647
+	mov	esi, -2147483648
+	jmp	.LBB0_8
+.LBB0_4:
+	mov	r9d, r8d
+	vpbroadcastd	ymm4, dword ptr [rip + .LCPI0_0] # ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
+	and	r9d, -32
+	vpbroadcastd	ymm0, dword ptr [rip + .LCPI0_1] # ymm0 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
 	xor	eax, eax
-	vmovdqa	ymm2, ymm1
-	vmovdqa	ymm4, ymm1
-	vmovdqa	ymm6, ymm1
+	vmovdqa	ymm1, ymm0
+	vmovdqa	ymm2, ymm0
 	vmovdqa	ymm3, ymm0
-	vmovdqa	ymm5, ymm0
-	vmovdqa	ymm7, ymm0
+	vmovdqa	ymm5, ymm4
+	vmovdqa	ymm6, ymm4
+	vmovdqa	ymm7, ymm4
 	.p2align	4, 0x90
-.LBB0_9:                                # =>This Inner Loop Header: Depth=1
+.LBB0_5:                                # =>This Inner Loop Header: Depth=1
 	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax]
 	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 32]
 	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 64]
 	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 96]
-	vpminsd	ymm6, ymm6, ymm11
-	vpminsd	ymm4, ymm4, ymm10
-	vpminsd	ymm1, ymm1, ymm8
-	vpminsd	ymm2, ymm2, ymm9
+	vpminsd	ymm0, ymm0, ymm8
+	vpminsd	ymm1, ymm1, ymm9
+	vpminsd	ymm2, ymm2, ymm10
+	vpminsd	ymm3, ymm3, ymm11
+	vpmaxsd	ymm4, ymm4, ymm8
+	vpmaxsd	ymm5, ymm5, ymm9
+	vpmaxsd	ymm6, ymm6, ymm10
 	vpmaxsd	ymm7, ymm7, ymm11
-	vpmaxsd	ymm5, ymm5, ymm10
-	vpmaxsd	ymm0, ymm0, ymm8
-	vpmaxsd	ymm3, ymm3, ymm9
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 224]
-	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 192]
-	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 128]
-	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 160]
-	vmovdqu	ymm12, ymmword ptr [rdi + 4*rax + 256]
-	vmovdqu	ymm13, ymmword ptr [rdi + 4*rax + 320]
-	vmovdqu	ymm14, ymmword ptr [rdi + 4*rax + 352]
-	vpminsd	ymm15, ymm8, ymm14
-	vpminsd	ymm6, ymm6, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm6         # 32-byte Spill
-	vpminsd	ymm15, ymm9, ymm13
-	vpminsd	ymm4, ymm4, ymm15
-	vpminsd	ymm15, ymm10, ymm12
-	vpminsd	ymm1, ymm1, ymm15
-	vmovdqu	ymm15, ymmword ptr [rdi + 4*rax + 288]
-	vpminsd	ymm6, ymm11, ymm15
-	vpminsd	ymm2, ymm2, ymm6
-	vpmaxsd	ymm6, ymm8, ymm14
-	vpmaxsd	ymm7, ymm7, ymm6
-	vpmaxsd	ymm6, ymm9, ymm13
-	vpmaxsd	ymm5, ymm5, ymm6
-	vpmaxsd	ymm6, ymm10, ymm12
-	vpmaxsd	ymm0, ymm0, ymm6
-	vpmaxsd	ymm6, ymm11, ymm15
-	vpmaxsd	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 416]
-	vpminsd	ymm2, ymm2, ymm6
-	vpmaxsd	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 384]
-	vpminsd	ymm1, ymm1, ymm6
-	vpmaxsd	ymm0, ymm0, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 448]
-	vpminsd	ymm4, ymm4, ymm6
-	vpmaxsd	ymm5, ymm5, ymm6
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 480]
-	vpminsd	ymm6, ymm8, ymmword ptr [rsp]   # 32-byte Folded Reload
-	vpmaxsd	ymm7, ymm7, ymm8
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB0_9
-.LBB0_10:
-	test	r9, r9
-	je	.LBB0_13
-# %bb.11:
-	lea	rax, [rdi + 4*rax]
-	neg	r9
-	.p2align	4, 0x90
-.LBB0_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm8, ymmword ptr [rax]
-	vmovdqu	ymm9, ymmword ptr [rax + 32]
-	vmovdqu	ymm10, ymmword ptr [rax + 64]
-	vmovdqu	ymm11, ymmword ptr [rax + 96]
-	vpminsd	ymm2, ymm2, ymm9
-	vpminsd	ymm1, ymm1, ymm8
-	vpminsd	ymm4, ymm4, ymm10
-	vpminsd	ymm6, ymm6, ymm11
-	vpmaxsd	ymm3, ymm3, ymm9
-	vpmaxsd	ymm0, ymm0, ymm8
-	vpmaxsd	ymm5, ymm5, ymm10
-	vpmaxsd	ymm7, ymm7, ymm11
-	sub	rax, -128
-	inc	r9
-	jne	.LBB0_12
-.LBB0_13:
-	vpminsd	ymm2, ymm2, ymm6
-	vpminsd	ymm1, ymm1, ymm4
-	vpminsd	ymm1, ymm1, ymm2
-	vpmaxsd	ymm2, ymm3, ymm7
-	vpmaxsd	ymm0, ymm0, ymm5
-	vpmaxsd	ymm0, ymm0, ymm2
-	vextracti128	xmm2, ymm0, 1
-	vpmaxsd	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 78                  # xmm2 = xmm0[2,3,0,1]
-	vpmaxsd	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 229                 # xmm2 = xmm0[1,1,2,3]
-	vpmaxsd	xmm0, xmm0, xmm2
-	vmovd	eax, xmm0
-	vextracti128	xmm0, ymm1, 1
-	vpminsd	xmm0, xmm1, xmm0
+	add	rax, 32
+	cmp	r9, rax
+	jne	.LBB0_5
+# %bb.6:
+	vpmaxsd	ymm4, ymm4, ymm5
+	vpmaxsd	ymm4, ymm4, ymm6
+	vpmaxsd	ymm4, ymm4, ymm7
+	vextracti128	xmm5, ymm4, 1
+	vpmaxsd	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 78                  # xmm5 = xmm4[2,3,0,1]
+	vpmaxsd	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 229                 # xmm5 = xmm4[1,1,2,3]
+	vpmaxsd	xmm4, xmm4, xmm5
+	vmovd	r10d, xmm4
+	vpminsd	ymm0, ymm0, ymm1
+	vpminsd	ymm0, ymm0, ymm2
+	vpminsd	ymm0, ymm0, ymm3
+	vextracti128	xmm1, ymm0, 1
+	vpminsd	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
 	vpminsd	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 229                 # xmm1 = xmm0[1,1,2,3]
 	vpminsd	xmm0, xmm0, xmm1
-	vmovd	r9d, xmm0
-	cmp	r11, r8
-	je	.LBB0_14
-.LBB0_4:
-	mov	esi, eax
+	vmovd	eax, xmm0
+	mov	esi, r10d
+	cmp	r9, r8
+	je	.LBB0_8
 	.p2align	4, 0x90
-.LBB0_5:                                # =>This Inner Loop Header: Depth=1
-	mov	eax, dword ptr [rdi + 4*r11]
-	cmp	r9d, eax
-	cmovg	r9d, eax
-	cmp	esi, eax
-	cmovge	eax, esi
-	add	r11, 1
-	mov	esi, eax
-	cmp	r8, r11
-	jne	.LBB0_5
-.LBB0_14:
-	mov	dword ptr [rcx], eax
-	mov	dword ptr [rdx], r9d
+.LBB0_7:                                # =>This Inner Loop Header: Depth=1
+	mov	esi, dword ptr [rdi + 4*r9]
+	cmp	eax, esi
+	cmovg	eax, esi
+	cmp	r10d, esi
+	cmovge	esi, r10d
+	add	r9, 1
+	mov	r10d, esi
+	cmp	r8, r9
+	jne	.LBB0_7
+.LBB0_8:
+	mov	dword ptr [rcx], esi
+	mov	dword ptr [rdx], eax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -196,173 +112,89 @@ uint32_max_min_avx2:                    # @uint32_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 64
+	and	rsp, -8
 	test	esi, esi
 	jle	.LBB1_1
 # %bb.2:
 	mov	r8d, esi
 	cmp	esi, 31
-	ja	.LBB1_6
+	ja	.LBB1_4
 # %bb.3:
-	xor	r11d, r11d
-	mov	r9d, -1
-	xor	esi, esi
-	jmp	.LBB1_4
+	xor	r9d, r9d
+	mov	eax, -1
+	xor	r10d, r10d
+	jmp	.LBB1_7
 .LBB1_1:
-	mov	r9d, -1
+	mov	eax, -1
 	xor	esi, esi
-	jmp	.LBB1_14
-.LBB1_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB1_8
-# %bb.7:
-	vpxor	xmm0, xmm0, xmm0
-	vpcmpeqd	ymm1, ymm1, ymm1
+	jmp	.LBB1_8
+.LBB1_4:
+	mov	r9d, r8d
+	and	r9d, -32
+	vpxor	xmm4, xmm4, xmm4
+	vpcmpeqd	ymm0, ymm0, ymm0
 	xor	eax, eax
-	vpcmpeqd	ymm2, ymm2, ymm2
-	vpcmpeqd	ymm4, ymm4, ymm4
-	vpcmpeqd	ymm6, ymm6, ymm6
-	vpxor	xmm3, xmm3, xmm3
-	vpxor	xmm5, xmm5, xmm5
-	vpxor	xmm7, xmm7, xmm7
-	jmp	.LBB1_10
-.LBB1_8:
-	and	r10, -4
-	neg	r10
-	vpxor	xmm0, xmm0, xmm0
 	vpcmpeqd	ymm1, ymm1, ymm1
-	xor	eax, eax
 	vpcmpeqd	ymm2, ymm2, ymm2
-	vpcmpeqd	ymm4, ymm4, ymm4
-	vpcmpeqd	ymm6, ymm6, ymm6
-	vpxor	xmm3, xmm3, xmm3
+	vpcmpeqd	ymm3, ymm3, ymm3
 	vpxor	xmm5, xmm5, xmm5
+	vpxor	xmm6, xmm6, xmm6
 	vpxor	xmm7, xmm7, xmm7
 	.p2align	4, 0x90
-.LBB1_9:                                # =>This Inner Loop Header: Depth=1
+.LBB1_5:                                # =>This Inner Loop Header: Depth=1
 	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax]
 	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 32]
 	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 64]
 	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 96]
-	vpminud	ymm6, ymm6, ymm11
-	vpminud	ymm4, ymm4, ymm10
-	vpminud	ymm1, ymm1, ymm8
-	vpminud	ymm2, ymm2, ymm9
+	vpminud	ymm0, ymm0, ymm8
+	vpminud	ymm1, ymm1, ymm9
+	vpminud	ymm2, ymm2, ymm10
+	vpminud	ymm3, ymm3, ymm11
+	vpmaxud	ymm4, ymm4, ymm8
+	vpmaxud	ymm5, ymm5, ymm9
+	vpmaxud	ymm6, ymm6, ymm10
 	vpmaxud	ymm7, ymm7, ymm11
-	vpmaxud	ymm5, ymm5, ymm10
-	vpmaxud	ymm0, ymm0, ymm8
-	vpmaxud	ymm3, ymm3, ymm9
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 224]
-	vmovdqu	ymm9, ymmword ptr [rdi + 4*rax + 192]
-	vmovdqu	ymm10, ymmword ptr [rdi + 4*rax + 128]
-	vmovdqu	ymm11, ymmword ptr [rdi + 4*rax + 160]
-	vmovdqu	ymm12, ymmword ptr [rdi + 4*rax + 256]
-	vmovdqu	ymm13, ymmword ptr [rdi + 4*rax + 320]
-	vmovdqu	ymm14, ymmword ptr [rdi + 4*rax + 352]
-	vpminud	ymm15, ymm8, ymm14
-	vpminud	ymm6, ymm6, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm6         # 32-byte Spill
-	vpminud	ymm15, ymm9, ymm13
-	vpminud	ymm4, ymm4, ymm15
-	vpminud	ymm15, ymm10, ymm12
-	vpminud	ymm1, ymm1, ymm15
-	vmovdqu	ymm15, ymmword ptr [rdi + 4*rax + 288]
-	vpminud	ymm6, ymm11, ymm15
-	vpminud	ymm2, ymm2, ymm6
-	vpmaxud	ymm6, ymm8, ymm14
-	vpmaxud	ymm7, ymm7, ymm6
-	vpmaxud	ymm6, ymm9, ymm13
-	vpmaxud	ymm5, ymm5, ymm6
-	vpmaxud	ymm6, ymm10, ymm12
-	vpmaxud	ymm0, ymm0, ymm6
-	vpmaxud	ymm6, ymm11, ymm15
-	vpmaxud	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 416]
-	vpminud	ymm2, ymm2, ymm6
-	vpmaxud	ymm3, ymm3, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 384]
-	vpminud	ymm1, ymm1, ymm6
-	vpmaxud	ymm0, ymm0, ymm6
-	vmovdqu	ymm6, ymmword ptr [rdi + 4*rax + 448]
-	vpminud	ymm4, ymm4, ymm6
-	vpmaxud	ymm5, ymm5, ymm6
-	vmovdqu	ymm8, ymmword ptr [rdi + 4*rax + 480]
-	vpminud	ymm6, ymm8, ymmword ptr [rsp]   # 32-byte Folded Reload
-	vpmaxud	ymm7, ymm7, ymm8
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB1_9
-.LBB1_10:
-	test	r9, r9
-	je	.LBB1_13
-# %bb.11:
-	lea	rax, [rdi + 4*rax]
-	neg	r9
-	.p2align	4, 0x90
-.LBB1_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm8, ymmword ptr [rax]
-	vmovdqu	ymm9, ymmword ptr [rax + 32]
-	vmovdqu	ymm10, ymmword ptr [rax + 64]
-	vmovdqu	ymm11, ymmword ptr [rax + 96]
-	vpminud	ymm2, ymm2, ymm9
-	vpminud	ymm1, ymm1, ymm8
-	vpminud	ymm4, ymm4, ymm10
-	vpminud	ymm6, ymm6, ymm11
-	vpmaxud	ymm3, ymm3, ymm9
-	vpmaxud	ymm0, ymm0, ymm8
-	vpmaxud	ymm5, ymm5, ymm10
-	vpmaxud	ymm7, ymm7, ymm11
-	sub	rax, -128
-	inc	r9
-	jne	.LBB1_12
-.LBB1_13:
-	vpminud	ymm2, ymm2, ymm6
-	vpminud	ymm1, ymm1, ymm4
-	vpminud	ymm1, ymm1, ymm2
-	vpmaxud	ymm2, ymm3, ymm7
-	vpmaxud	ymm0, ymm0, ymm5
-	vpmaxud	ymm0, ymm0, ymm2
-	vextracti128	xmm2, ymm0, 1
-	vpmaxud	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 78                  # xmm2 = xmm0[2,3,0,1]
-	vpmaxud	xmm0, xmm0, xmm2
-	vpshufd	xmm2, xmm0, 229                 # xmm2 = xmm0[1,1,2,3]
-	vpmaxud	xmm0, xmm0, xmm2
-	vmovd	esi, xmm0
-	vextracti128	xmm0, ymm1, 1
-	vpminud	xmm0, xmm1, xmm0
+	add	rax, 32
+	cmp	r9, rax
+	jne	.LBB1_5
+# %bb.6:
+	vpmaxud	ymm4, ymm4, ymm5
+	vpmaxud	ymm4, ymm4, ymm6
+	vpmaxud	ymm4, ymm4, ymm7
+	vextracti128	xmm5, ymm4, 1
+	vpmaxud	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 78                  # xmm5 = xmm4[2,3,0,1]
+	vpmaxud	xmm4, xmm4, xmm5
+	vpshufd	xmm5, xmm4, 229                 # xmm5 = xmm4[1,1,2,3]
+	vpmaxud	xmm4, xmm4, xmm5
+	vmovd	r10d, xmm4
+	vpminud	ymm0, ymm0, ymm1
+	vpminud	ymm0, ymm0, ymm2
+	vpminud	ymm0, ymm0, ymm3
+	vextracti128	xmm1, ymm0, 1
+	vpminud	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
 	vpminud	xmm0, xmm0, xmm1
 	vpshufd	xmm1, xmm0, 229                 # xmm1 = xmm0[1,1,2,3]
 	vpminud	xmm0, xmm0, xmm1
-	vmovd	r9d, xmm0
-	cmp	r11, r8
-	je	.LBB1_14
-.LBB1_4:
-	mov	eax, esi
+	vmovd	eax, xmm0
+	mov	esi, r10d
+	cmp	r9, r8
+	je	.LBB1_8
 	.p2align	4, 0x90
-.LBB1_5:                                # =>This Inner Loop Header: Depth=1
-	mov	esi, dword ptr [rdi + 4*r11]
-	cmp	r9d, esi
-	cmovae	r9d, esi
+.LBB1_7:                                # =>This Inner Loop Header: Depth=1
+	mov	esi, dword ptr [rdi + 4*r9]
 	cmp	eax, esi
-	cmova	esi, eax
-	add	r11, 1
-	mov	eax, esi
-	cmp	r8, r11
-	jne	.LBB1_5
-.LBB1_14:
+	cmovae	eax, esi
+	cmp	r10d, esi
+	cmova	esi, r10d
+	add	r9, 1
+	mov	r10d, esi
+	cmp	r8, r9
+	jne	.LBB1_7
+.LBB1_8:
 	mov	dword ptr [rcx], esi
-	mov	dword ptr [rdx], r9d
+	mov	dword ptr [rdx], eax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -384,387 +216,102 @@ int64_max_min_avx2:                     # @int64_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 224
-	movabs	r9, 9223372036854775807
+	and	rsp, -8
+	movabs	rax, 9223372036854775807
 	test	esi, esi
 	jle	.LBB2_1
 # %bb.2:
 	mov	r8d, esi
-	cmp	esi, 31
-	ja	.LBB2_6
+	cmp	esi, 15
+	ja	.LBB2_4
 # %bb.3:
-	lea	rsi, [r9 + 1]
-	xor	r11d, r11d
-	jmp	.LBB2_4
+	lea	r10, [rax + 1]
+	xor	r9d, r9d
+	jmp	.LBB2_7
 .LBB2_1:
-	lea	rsi, [r9 + 1]
-	jmp	.LBB2_14
-.LBB2_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB2_8
-# %bb.7:
-	vpbroadcastq	ymm15, qword ptr [rip + .LCPI2_0] # ymm15 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	vpbroadcastq	ymm11, qword ptr [rip + .LCPI2_1] # ymm11 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
-	xor	eax, eax
-	vmovdqa	ymmword ptr [rsp + 32], ymm11   # 32-byte Spill
-	vmovdqa	ymm3, ymm11
-	vmovdqa	ymm9, ymm11
-	vmovdqa	ymm5, ymm11
-	vmovdqa	ymm4, ymm11
-	vmovdqa	ymm6, ymm11
-	vmovdqa	ymmword ptr [rsp + 96], ymm11   # 32-byte Spill
-	vmovdqa	ymmword ptr [rsp + 64], ymm15   # 32-byte Spill
-	vmovdqa	ymm2, ymm15
-	vmovdqa	ymm8, ymm15
-	vmovdqa	ymm12, ymm15
-	vmovdqa	ymm13, ymm15
-	vmovdqa	ymm14, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm15        # 32-byte Spill
-	jmp	.LBB2_10
-.LBB2_8:
-	and	r10, -4
-	vpbroadcastq	ymm15, qword ptr [rip + .LCPI2_0] # ymm15 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	neg	r10
-	vpbroadcastq	ymm11, qword ptr [rip + .LCPI2_1] # ymm11 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+	lea	rsi, [rax + 1]
+	jmp	.LBB2_8
+.LBB2_4:
+	mov	r9d, r8d
+	vpbroadcastq	ymm4, qword ptr [rip + .LCPI2_0] # ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+	and	r9d, -16
+	vpbroadcastq	ymm0, qword ptr [rip + .LCPI2_1] # ymm0 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
 	xor	eax, eax
-	vmovdqa	ymmword ptr [rsp + 32], ymm11   # 32-byte Spill
-	vmovdqa	ymm3, ymm11
-	vmovdqa	ymm9, ymm11
-	vmovdqa	ymm5, ymm11
-	vmovdqa	ymm4, ymm11
-	vmovdqa	ymm6, ymm11
-	vmovdqa	ymmword ptr [rsp + 96], ymm11   # 32-byte Spill
-	vmovdqa	ymmword ptr [rsp + 64], ymm15   # 32-byte Spill
-	vmovdqa	ymm2, ymm15
-	vmovdqa	ymm8, ymm15
-	vmovdqa	ymm12, ymm15
-	vmovdqa	ymm13, ymm15
-	vmovdqa	ymm14, ymm15
-	vmovdqa	ymmword ptr [rsp], ymm15        # 32-byte Spill
-	.p2align	4, 0x90
-.LBB2_9:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 224]
-	vmovdqa	ymm10, ymm8
-	vmovdqa	ymm8, ymm2
-	vmovdqa	ymm2, ymm3
-	vmovdqa	ymm3, ymm9
-	vpcmpgtq	ymm9, ymm0, ymm11
-	vblendvpd	ymm1, ymm0, ymm11, ymm9
-	vmovapd	ymmword ptr [rsp + 160], ymm1   # 32-byte Spill
-	vpcmpgtq	ymm9, ymm15, ymm0
-	vblendvpd	ymm0, ymm0, ymm15, ymm9
-	vmovapd	ymmword ptr [rsp + 128], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 192]
-	vpcmpgtq	ymm9, ymm0, ymm6
-	vblendvpd	ymm7, ymm0, ymm6, ymm9
-	vpcmpgtq	ymm9, ymm14, ymm0
-	vblendvpd	ymm14, ymm0, ymm14, ymm9
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 160]
-	vpcmpgtq	ymm9, ymm0, ymm4
-	vblendvpd	ymm6, ymm0, ymm4, ymm9
-	vpcmpgtq	ymm9, ymm13, ymm0
-	vblendvpd	ymm13, ymm0, ymm13, ymm9
-	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 128]
-	vpcmpgtq	ymm0, ymm9, ymm5
-	vblendvpd	ymm1, ymm9, ymm5, ymm0
-	vpcmpgtq	ymm5, ymm12, ymm9
-	vblendvpd	ymm12, ymm9, ymm12, ymm5
-	vmovdqu	ymm5, ymmword ptr [rdi + 8*rax + 96]
-	vpcmpgtq	ymm9, ymm5, ymm3
-	vblendvpd	ymm9, ymm5, ymm3, ymm9
-	vpcmpgtq	ymm4, ymm10, ymm5
-	vblendvpd	ymm10, ymm5, ymm10, ymm4
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 64]
-	vpcmpgtq	ymm5, ymm4, ymm2
-	vblendvpd	ymm5, ymm4, ymm2, ymm5
-	vpcmpgtq	ymm3, ymm8, ymm4
-	vblendvpd	ymm0, ymm4, ymm8, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax]
-	vmovdqa	ymm4, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpcmpgtq	ymm3, ymm2, ymm4
-	vblendvpd	ymm3, ymm2, ymm4, ymm3
-	vmovdqa	ymm11, ymmword ptr [rsp]        # 32-byte Reload
-	vpcmpgtq	ymm4, ymm11, ymm2
-	vblendvpd	ymm4, ymm2, ymm11, ymm4
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 32]
-	vmovdqa	ymm15, ymmword ptr [rsp + 32]   # 32-byte Reload
-	vpcmpgtq	ymm11, ymm2, ymm15
-	vblendvpd	ymm11, ymm2, ymm15, ymm11
-	vmovdqa	ymm8, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpcmpgtq	ymm15, ymm8, ymm2
-	vblendvpd	ymm2, ymm2, ymm8, ymm15
-	vmovdqu	ymm8, ymmword ptr [rdi + 8*rax + 288]
-	vpcmpgtq	ymm15, ymm8, ymm11
-	vblendvpd	ymm11, ymm8, ymm11, ymm15
-	vmovapd	ymmword ptr [rsp + 32], ymm11   # 32-byte Spill
-	vpcmpgtq	ymm11, ymm2, ymm8
-	vblendvpd	ymm2, ymm8, ymm2, ymm11
-	vmovapd	ymmword ptr [rsp], ymm2         # 32-byte Spill
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 256]
-	vpcmpgtq	ymm2, ymm11, ymm3
-	vblendvpd	ymm8, ymm11, ymm3, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm11
-	vblendvpd	ymm3, ymm11, ymm4, ymm3
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 320]
-	vpcmpgtq	ymm4, ymm11, ymm5
-	vblendvpd	ymm4, ymm11, ymm5, ymm4
-	vpcmpgtq	ymm5, ymm0, ymm11
-	vblendvpd	ymm5, ymm11, ymm0, ymm5
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 352]
-	vpcmpgtq	ymm11, ymm0, ymm9
-	vblendvpd	ymm9, ymm0, ymm9, ymm11
-	vpcmpgtq	ymm11, ymm10, ymm0
-	vblendvpd	ymm10, ymm0, ymm10, ymm11
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 384]
-	vpcmpgtq	ymm0, ymm11, ymm1
-	vblendvpd	ymm2, ymm11, ymm1, ymm0
-	vpcmpgtq	ymm1, ymm12, ymm11
-	vblendvpd	ymm12, ymm11, ymm12, ymm1
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 416]
-	vpcmpgtq	ymm11, ymm1, ymm6
-	vblendvpd	ymm6, ymm1, ymm6, ymm11
-	vpcmpgtq	ymm11, ymm13, ymm1
-	vblendvpd	ymm1, ymm1, ymm13, ymm11
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 448]
-	vpcmpgtq	ymm13, ymm11, ymm7
-	vblendvpd	ymm7, ymm11, ymm7, ymm13
-	vpcmpgtq	ymm13, ymm14, ymm11
-	vblendvpd	ymm13, ymm11, ymm14, ymm13
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 480]
-	vmovdqa	ymm0, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpcmpgtq	ymm14, ymm11, ymm0
-	vblendvpd	ymm14, ymm11, ymm0, ymm14
-	vmovdqa	ymm0, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpcmpgtq	ymm15, ymm0, ymm11
-	vblendvpd	ymm15, ymm11, ymm0, ymm15
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 736]
-	vpcmpgtq	ymm11, ymm0, ymm14
-	vblendvpd	ymm11, ymm0, ymm14, ymm11
-	vmovapd	ymmword ptr [rsp + 160], ymm11  # 32-byte Spill
-	vpcmpgtq	ymm14, ymm15, ymm0
-	vblendvpd	ymm0, ymm0, ymm15, ymm14
-	vmovapd	ymmword ptr [rsp + 128], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 704]
-	vpcmpgtq	ymm14, ymm0, ymm7
-	vblendvpd	ymm7, ymm0, ymm7, ymm14
-	vpcmpgtq	ymm14, ymm13, ymm0
-	vblendvpd	ymm14, ymm0, ymm13, ymm14
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 672]
-	vpcmpgtq	ymm13, ymm0, ymm6
-	vblendvpd	ymm6, ymm0, ymm6, ymm13
-	vpcmpgtq	ymm13, ymm1, ymm0
-	vblendvpd	ymm13, ymm0, ymm1, ymm13
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 640]
-	vpcmpgtq	ymm0, ymm1, ymm2
-	vblendvpd	ymm0, ymm1, ymm2, ymm0
-	vpcmpgtq	ymm2, ymm12, ymm1
-	vblendvpd	ymm12, ymm1, ymm12, ymm2
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 608]
-	vpcmpgtq	ymm2, ymm1, ymm9
-	vblendvpd	ymm9, ymm1, ymm9, ymm2
-	vpcmpgtq	ymm2, ymm10, ymm1
-	vblendvpd	ymm10, ymm1, ymm10, ymm2
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 576]
-	vpcmpgtq	ymm2, ymm1, ymm4
-	vblendvpd	ymm2, ymm1, ymm4, ymm2
-	vpcmpgtq	ymm4, ymm5, ymm1
-	vblendvpd	ymm1, ymm1, ymm5, ymm4
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 512]
-	vpcmpgtq	ymm5, ymm4, ymm8
-	vblendvpd	ymm5, ymm4, ymm8, ymm5
-	vpcmpgtq	ymm8, ymm3, ymm4
-	vblendvpd	ymm3, ymm4, ymm3, ymm8
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 544]
-	vmovdqa	ymm11, ymmword ptr [rsp + 32]   # 32-byte Reload
-	vpcmpgtq	ymm8, ymm4, ymm11
-	vblendvpd	ymm8, ymm4, ymm11, ymm8
-	vmovdqa	ymm15, ymmword ptr [rsp]        # 32-byte Reload
-	vpcmpgtq	ymm11, ymm15, ymm4
-	vblendvpd	ymm4, ymm4, ymm15, ymm11
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 800]
-	vpcmpgtq	ymm15, ymm11, ymm8
-	vblendvpd	ymm8, ymm11, ymm8, ymm15
-	vmovapd	ymmword ptr [rsp + 32], ymm8    # 32-byte Spill
-	vpcmpgtq	ymm8, ymm4, ymm11
-	vblendvpd	ymm4, ymm11, ymm4, ymm8
-	vmovapd	ymmword ptr [rsp + 64], ymm4    # 32-byte Spill
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 768]
-	vpcmpgtq	ymm11, ymm4, ymm5
-	vblendvpd	ymm5, ymm4, ymm5, ymm11
-	vmovapd	ymmword ptr [rsp + 96], ymm5    # 32-byte Spill
-	vpcmpgtq	ymm5, ymm3, ymm4
-	vblendvpd	ymm3, ymm4, ymm3, ymm5
-	vmovapd	ymmword ptr [rsp], ymm3         # 32-byte Spill
-	vmovdqu	ymm4, ymmword ptr [rdi + 8*rax + 832]
-	vpcmpgtq	ymm3, ymm4, ymm2
-	vblendvpd	ymm3, ymm4, ymm2, ymm3
-	vpcmpgtq	ymm2, ymm1, ymm4
-	vblendvpd	ymm2, ymm4, ymm1, ymm2
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 864]
-	vpcmpgtq	ymm4, ymm1, ymm9
-	vblendvpd	ymm9, ymm1, ymm9, ymm4
-	vpcmpgtq	ymm5, ymm10, ymm1
-	vblendvpd	ymm8, ymm1, ymm10, ymm5
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 896]
-	vpcmpgtq	ymm5, ymm1, ymm0
-	vblendvpd	ymm5, ymm1, ymm0, ymm5
-	vpcmpgtq	ymm0, ymm12, ymm1
-	vblendvpd	ymm12, ymm1, ymm12, ymm0
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 928]
-	vpcmpgtq	ymm1, ymm0, ymm6
-	vblendvpd	ymm4, ymm0, ymm6, ymm1
-	vpcmpgtq	ymm1, ymm13, ymm0
-	vblendvpd	ymm13, ymm0, ymm13, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 960]
-	vpcmpgtq	ymm1, ymm0, ymm7
-	vblendvpd	ymm6, ymm0, ymm7, ymm1
-	vpcmpgtq	ymm1, ymm14, ymm0
-	vblendvpd	ymm14, ymm0, ymm14, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 992]
-	vmovdqa	ymm7, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpcmpgtq	ymm1, ymm0, ymm7
-	vblendvpd	ymm11, ymm0, ymm7, ymm1
-	vmovdqa	ymm7, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpcmpgtq	ymm1, ymm7, ymm0
-	vblendvpd	ymm15, ymm0, ymm7, ymm1
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB2_9
-.LBB2_10:
-	test	r9, r9
-	vmovdqa	ymm7, ymm5
-	vmovdqa	ymm5, ymm9
-	vmovdqa	ymm9, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vmovdqa	ymm10, ymm3
-	je	.LBB2_13
-# %bb.11:
-	lea	rax, [rdi + 8*rax]
-	neg	r9
+	vmovdqa	ymm3, ymm0
+	vmovdqa	ymm2, ymm0
+	vmovdqa	ymm1, ymm0
+	vmovdqa	ymm7, ymm4
+	vmovdqa	ymm6, ymm4
+	vmovdqa	ymm5, ymm4
 	.p2align	4, 0x90
-.LBB2_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm0, ymmword ptr [rax + 32]
-	vmovdqa	ymm3, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpcmpgtq	ymm1, ymm0, ymm3
-	vblendvpd	ymm3, ymm0, ymm3, ymm1
-	vmovapd	ymmword ptr [rsp + 32], ymm3    # 32-byte Spill
-	vmovdqa	ymm3, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpcmpgtq	ymm1, ymm3, ymm0
-	vblendvpd	ymm3, ymm0, ymm3, ymm1
-	vmovapd	ymmword ptr [rsp + 64], ymm3    # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rax]
-	vpcmpgtq	ymm1, ymm0, ymm9
-	vblendvpd	ymm9, ymm0, ymm9, ymm1
-	vmovdqa	ymm3, ymmword ptr [rsp]         # 32-byte Reload
-	vpcmpgtq	ymm1, ymm3, ymm0
-	vblendvpd	ymm3, ymm0, ymm3, ymm1
-	vmovapd	ymmword ptr [rsp], ymm3         # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rax + 64]
-	vpcmpgtq	ymm1, ymm0, ymm10
-	vblendvpd	ymm10, ymm0, ymm10, ymm1
-	vpcmpgtq	ymm1, ymm2, ymm0
-	vblendvpd	ymm2, ymm0, ymm2, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 96]
-	vpcmpgtq	ymm1, ymm0, ymm5
-	vblendvpd	ymm5, ymm0, ymm5, ymm1
-	vpcmpgtq	ymm1, ymm8, ymm0
-	vblendvpd	ymm8, ymm0, ymm8, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 128]
-	vpcmpgtq	ymm1, ymm0, ymm7
-	vblendvpd	ymm7, ymm0, ymm7, ymm1
-	vpcmpgtq	ymm1, ymm12, ymm0
-	vblendvpd	ymm12, ymm0, ymm12, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 160]
-	vpcmpgtq	ymm1, ymm0, ymm4
-	vblendvpd	ymm4, ymm0, ymm4, ymm1
-	vpcmpgtq	ymm1, ymm13, ymm0
-	vblendvpd	ymm13, ymm0, ymm13, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 192]
-	vpcmpgtq	ymm1, ymm0, ymm6
-	vblendvpd	ymm6, ymm0, ymm6, ymm1
-	vpcmpgtq	ymm1, ymm14, ymm0
-	vblendvpd	ymm14, ymm0, ymm14, ymm1
-	vmovdqu	ymm0, ymmword ptr [rax + 224]
-	vpcmpgtq	ymm1, ymm0, ymm11
-	vblendvpd	ymm11, ymm0, ymm11, ymm1
-	vpcmpgtq	ymm1, ymm15, ymm0
-	vblendvpd	ymm15, ymm0, ymm15, ymm1
-	add	rax, 256
-	inc	r9
-	jne	.LBB2_12
-.LBB2_13:
-	vmovdqa	ymm1, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpcmpgtq	ymm0, ymm1, ymm13
-	vblendvpd	ymm0, ymm13, ymm1, ymm0
-	vpcmpgtq	ymm1, ymm8, ymm15
-	vblendvpd	ymm1, ymm15, ymm8, ymm1
-	vmovdqa	ymm3, ymmword ptr [rsp]         # 32-byte Reload
-	vpcmpgtq	ymm8, ymm3, ymm12
-	vblendvpd	ymm8, ymm12, ymm3, ymm8
-	vmovdqa	ymm3, ymm9
-	vpcmpgtq	ymm9, ymm2, ymm14
-	vblendvpd	ymm2, ymm14, ymm2, ymm9
-	vpcmpgtq	ymm9, ymm8, ymm2
-	vblendvpd	ymm2, ymm2, ymm8, ymm9
-	vpcmpgtq	ymm8, ymm0, ymm1
-	vblendvpd	ymm0, ymm1, ymm0, ymm8
-	vpcmpgtq	ymm1, ymm2, ymm0
-	vblendvpd	ymm0, ymm0, ymm2, ymm1
+.LBB2_5:                                # =>This Inner Loop Header: Depth=1
+	vmovdqu	ymm8, ymmword ptr [rdi + 8*rax]
+	vpcmpgtq	ymm9, ymm8, ymm0
+	vblendvpd	ymm0, ymm8, ymm0, ymm9
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 32]
+	vpcmpgtq	ymm10, ymm9, ymm3
+	vblendvpd	ymm3, ymm9, ymm3, ymm10
+	vmovdqu	ymm10, ymmword ptr [rdi + 8*rax + 64]
+	vpcmpgtq	ymm11, ymm10, ymm2
+	vblendvpd	ymm2, ymm10, ymm2, ymm11
+	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 96]
+	vpcmpgtq	ymm12, ymm11, ymm1
+	vblendvpd	ymm1, ymm11, ymm1, ymm12
+	vpcmpgtq	ymm12, ymm4, ymm8
+	vblendvpd	ymm4, ymm8, ymm4, ymm12
+	vpcmpgtq	ymm8, ymm7, ymm9
+	vblendvpd	ymm7, ymm9, ymm7, ymm8
+	vpcmpgtq	ymm8, ymm6, ymm10
+	vblendvpd	ymm6, ymm10, ymm6, ymm8
+	vpcmpgtq	ymm8, ymm5, ymm11
+	vblendvpd	ymm5, ymm11, ymm5, ymm8
+	add	rax, 16
+	cmp	r9, rax
+	jne	.LBB2_5
+# %bb.6:
+	vpcmpgtq	ymm8, ymm4, ymm7
+	vblendvpd	ymm4, ymm7, ymm4, ymm8
+	vpcmpgtq	ymm7, ymm4, ymm6
+	vblendvpd	ymm4, ymm6, ymm4, ymm7
+	vpcmpgtq	ymm6, ymm4, ymm5
+	vblendvpd	ymm4, ymm5, ymm4, ymm6
+	vextractf128	xmm5, ymm4, 1
+	vpcmpgtq	xmm6, xmm4, xmm5
+	vblendvpd	xmm4, xmm5, xmm4, xmm6
+	vpermilps	xmm5, xmm4, 78          # xmm5 = xmm4[2,3,0,1]
+	vpcmpgtq	xmm6, xmm4, xmm5
+	vblendvpd	xmm4, xmm5, xmm4, xmm6
+	vmovq	r10, xmm4
+	vpcmpgtq	ymm4, ymm3, ymm0
+	vblendvpd	ymm0, ymm3, ymm0, ymm4
+	vpcmpgtq	ymm3, ymm2, ymm0
+	vblendvpd	ymm0, ymm2, ymm0, ymm3
+	vpcmpgtq	ymm2, ymm1, ymm0
+	vblendvpd	ymm0, ymm1, ymm0, ymm2
 	vextractf128	xmm1, ymm0, 1
-	vpcmpgtq	xmm2, xmm0, xmm1
+	vpcmpgtq	xmm2, xmm1, xmm0
 	vblendvpd	xmm0, xmm1, xmm0, xmm2
 	vpermilps	xmm1, xmm0, 78          # xmm1 = xmm0[2,3,0,1]
-	vpcmpgtq	xmm2, xmm0, xmm1
+	vpcmpgtq	xmm2, xmm1, xmm0
 	vblendvpd	xmm0, xmm1, xmm0, xmm2
-	vmovdqa	ymm2, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpcmpgtq	ymm1, ymm4, ymm2
-	vblendvpd	ymm1, ymm4, ymm2, ymm1
-	vpcmpgtq	ymm2, ymm11, ymm5
-	vblendvpd	ymm2, ymm11, ymm5, ymm2
-	vpcmpgtq	ymm4, ymm7, ymm3
-	vblendvpd	ymm4, ymm7, ymm3, ymm4
-	vpcmpgtq	ymm5, ymm6, ymm10
-	vblendvpd	ymm3, ymm6, ymm10, ymm5
-	vpcmpgtq	ymm5, ymm3, ymm4
-	vblendvpd	ymm3, ymm3, ymm4, ymm5
-	vpcmpgtq	ymm4, ymm2, ymm1
-	vblendvpd	ymm1, ymm2, ymm1, ymm4
-	vpcmpgtq	ymm2, ymm1, ymm3
-	vblendvpd	ymm1, ymm1, ymm3, ymm2
-	vextractf128	xmm2, ymm1, 1
-	vpcmpgtq	xmm3, xmm2, xmm1
-	vblendvpd	xmm1, xmm2, xmm1, xmm3
-	vpermilps	xmm2, xmm1, 78          # xmm2 = xmm1[2,3,0,1]
-	vpcmpgtq	xmm3, xmm2, xmm1
-	vblendvpd	xmm1, xmm2, xmm1, xmm3
-	vmovq	rsi, xmm0
-	vmovq	r9, xmm1
-	cmp	r11, r8
-	je	.LBB2_14
-.LBB2_4:
-	mov	rax, rsi
+	vmovq	rax, xmm0
+	mov	rsi, r10
+	cmp	r9, r8
+	je	.LBB2_8
 	.p2align	4, 0x90
-.LBB2_5:                                # =>This Inner Loop Header: Depth=1
-	mov	rsi, qword ptr [rdi + 8*r11]
-	cmp	r9, rsi
-	cmovg	r9, rsi
+.LBB2_7:                                # =>This Inner Loop Header: Depth=1
+	mov	rsi, qword ptr [rdi + 8*r9]
 	cmp	rax, rsi
-	cmovge	rsi, rax
-	add	r11, 1
-	mov	rax, rsi
-	cmp	r8, r11
-	jne	.LBB2_5
-.LBB2_14:
+	cmovg	rax, rsi
+	cmp	r10, rsi
+	cmovge	rsi, r10
+	add	r9, 1
+	mov	r10, rsi
+	cmp	r8, r9
+	jne	.LBB2_7
+.LBB2_8:
 	mov	qword ptr [rcx], rsi
-	mov	qword ptr [rdx], r9
+	mov	qword ptr [rdx], rax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -784,576 +331,136 @@ uint64_max_min_avx2:                    # @uint64_max_min_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	and	rsp, -32
-	sub	rsp, 288
+	and	rsp, -8
 	test	esi, esi
 	jle	.LBB3_1
 # %bb.2:
 	mov	r8d, esi
-	cmp	esi, 31
-	ja	.LBB3_6
+	cmp	esi, 15
+	ja	.LBB3_4
 # %bb.3:
-	mov	r9, -1
-	xor	r11d, r11d
-	xor	esi, esi
-	jmp	.LBB3_4
+	mov	rax, -1
+	xor	r9d, r9d
+	xor	r10d, r10d
+	jmp	.LBB3_7
 .LBB3_1:
-	mov	r9, -1
+	mov	rax, -1
 	xor	esi, esi
-	jmp	.LBB3_14
-.LBB3_6:
-	mov	r11d, r8d
-	and	r11d, -32
-	lea	rax, [r11 - 32]
-	mov	r10, rax
-	shr	r10, 5
-	add	r10, 1
-	mov	r9d, r10d
-	and	r9d, 3
-	cmp	rax, 96
-	jae	.LBB3_8
-# %bb.7:
-	vpxor	xmm4, xmm4, xmm4
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 64], ymm0    # 32-byte Spill
-	xor	eax, eax
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 96], ymm0    # 32-byte Spill
-	vpcmpeqd	ymm5, ymm5, ymm5
-	vpcmpeqd	ymm7, ymm7, ymm7
-	vpcmpeqd	ymm12, ymm12, ymm12
-	vpcmpeqd	ymm10, ymm10, ymm10
-	vpcmpeqd	ymm11, ymm11, ymm11
-	vpcmpeqd	ymm13, ymm13, ymm13
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp + 32], ymm0    # 32-byte Spill
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp], ymm0         # 32-byte Spill
-	vpxor	xmm3, xmm3, xmm3
-	vpxor	xmm9, xmm9, xmm9
-	vpxor	xmm8, xmm8, xmm8
-	vpxor	xmm15, xmm15, xmm15
-	vpxor	xmm0, xmm0, xmm0
-	jmp	.LBB3_10
-.LBB3_8:
-	and	r10, -4
-	neg	r10
-	vpxor	xmm4, xmm4, xmm4
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 64], ymm0    # 32-byte Spill
+	jmp	.LBB3_8
+.LBB3_4:
+	mov	r9d, r8d
+	and	r9d, -16
+	vpxor	xmm5, xmm5, xmm5
+	vpcmpeqd	ymm1, ymm1, ymm1
 	xor	eax, eax
-	vpbroadcastq	ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	vpcmpeqd	ymm0, ymm0, ymm0
-	vmovdqa	ymmword ptr [rsp + 96], ymm0    # 32-byte Spill
-	vpcmpeqd	ymm5, ymm5, ymm5
-	vpcmpeqd	ymm7, ymm7, ymm7
-	vpcmpeqd	ymm12, ymm12, ymm12
-	vpcmpeqd	ymm10, ymm10, ymm10
-	vpcmpeqd	ymm11, ymm11, ymm11
-	vpcmpeqd	ymm13, ymm13, ymm13
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp + 32], ymm0    # 32-byte Spill
-	vpxor	xmm0, xmm0, xmm0
-	vmovdqa	ymmword ptr [rsp], ymm0         # 32-byte Spill
-	vpxor	xmm3, xmm3, xmm3
-	vpxor	xmm9, xmm9, xmm9
+	vpbroadcastq	ymm0, qword ptr [rip + .LCPI3_0] # ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+	vpcmpeqd	ymm4, ymm4, ymm4
+	vpcmpeqd	ymm3, ymm3, ymm3
+	vpcmpeqd	ymm2, ymm2, ymm2
 	vpxor	xmm8, xmm8, xmm8
-	vpxor	xmm15, xmm15, xmm15
-	vpxor	xmm0, xmm0, xmm0
+	vpxor	xmm7, xmm7, xmm7
+	vpxor	xmm6, xmm6, xmm6
 	.p2align	4, 0x90
-.LBB3_9:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm1, ymmword ptr [rdi + 8*rax + 224]
-	vpxor	ymm2, ymm14, ymm1
-	vmovdqa	ymm6, ymm3
-	vpxor	ymm3, ymm13, ymm14
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm3, ymm1, ymm13, ymm3
-	vmovapd	ymmword ptr [rsp + 128], ymm3   # 32-byte Spill
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm0, ymm1, ymm0, ymm2
-	vmovapd	ymmword ptr [rsp + 224], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 192]
-	vpxor	ymm1, ymm14, ymm0
-	vpxor	ymm2, ymm11, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm2, ymm0, ymm11, ymm2
-	vmovapd	ymmword ptr [rsp + 160], ymm2   # 32-byte Spill
-	vpxor	ymm2, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm0, ymm0, ymm15, ymm1
-	vmovapd	ymmword ptr [rsp + 192], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 160]
-	vpxor	ymm1, ymm14, ymm0
-	vpxor	ymm2, ymm10, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vmovdqa	ymm3, ymm8
-	vblendvpd	ymm8, ymm0, ymm10, ymm2
-	vpxor	ymm2, ymm14, ymm3
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm13, ymm0, ymm3, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 128]
-	vpxor	ymm2, ymm14, ymm0
-	vpxor	ymm1, ymm12, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm1, ymm0, ymm12, ymm1
-	vpxor	ymm3, ymm9, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm12, ymm0, ymm9, ymm2
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 96]
-	vpxor	ymm0, ymm14, ymm7
-	vpxor	ymm3, ymm14, ymm2
-	vpcmpgtq	ymm0, ymm3, ymm0
-	vblendvpd	ymm0, ymm2, ymm7, ymm0
-	vmovdqa	ymm15, ymm4
-	vpxor	ymm4, ymm14, ymm6
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm10, ymm2, ymm6, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 64]
-	vpxor	ymm3, ymm14, ymm5
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm5, ymm2, ymm5, ymm3
-	vmovdqa	ymm6, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm6
-	vpcmpgtq	ymm3, ymm3, ymm4
-	vblendvpd	ymm9, ymm2, ymm6, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax]
-	vmovdqa	ymm7, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm7
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm3, ymm2, ymm7, ymm3
-	vpxor	ymm11, ymm15, ymm14
-	vpcmpgtq	ymm4, ymm11, ymm4
-	vblendvpd	ymm4, ymm2, ymm15, ymm4
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 32]
-	vmovdqa	ymm15, ymmword ptr [rsp + 96]   # 32-byte Reload
-	vpxor	ymm11, ymm15, ymm14
-	vpxor	ymm7, ymm14, ymm2
-	vpcmpgtq	ymm11, ymm7, ymm11
-	vblendvpd	ymm11, ymm2, ymm15, ymm11
-	vmovdqa	ymm6, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm15, ymm7
-	vblendvpd	ymm2, ymm2, ymm6, ymm7
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 288]
-	vxorpd	ymm7, ymm11, ymm14
-	vpxor	ymm15, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm15, ymm7
-	vblendvpd	ymm7, ymm6, ymm11, ymm7
-	vmovapd	ymmword ptr [rsp + 96], ymm7    # 32-byte Spill
-	vxorpd	ymm7, ymm14, ymm2
-	vpcmpgtq	ymm7, ymm7, ymm15
-	vblendvpd	ymm2, ymm6, ymm2, ymm7
-	vmovapd	ymmword ptr [rsp + 64], ymm2    # 32-byte Spill
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 256]
-	vxorpd	ymm7, ymm14, ymm3
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm11, ymm7
-	vblendvpd	ymm2, ymm6, ymm3, ymm7
-	vmovapd	ymmword ptr [rsp], ymm2         # 32-byte Spill
-	vxorpd	ymm7, ymm14, ymm4
-	vpcmpgtq	ymm7, ymm7, ymm11
-	vblendvpd	ymm2, ymm6, ymm4, ymm7
-	vmovapd	ymmword ptr [rsp + 32], ymm2    # 32-byte Spill
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 320]
-	vxorpd	ymm7, ymm14, ymm5
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm7, ymm11, ymm7
+.LBB3_5:                                # =>This Inner Loop Header: Depth=1
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax]
+	vpxor	ymm10, ymm1, ymm0
+	vpxor	ymm11, ymm9, ymm0
+	vpcmpgtq	ymm10, ymm11, ymm10
+	vblendvpd	ymm1, ymm9, ymm1, ymm10
+	vpxor	ymm10, ymm5, ymm0
+	vpcmpgtq	ymm10, ymm10, ymm11
+	vblendvpd	ymm5, ymm9, ymm5, ymm10
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 32]
+	vpxor	ymm10, ymm4, ymm0
+	vpxor	ymm11, ymm9, ymm0
+	vpcmpgtq	ymm10, ymm11, ymm10
+	vblendvpd	ymm4, ymm9, ymm4, ymm10
+	vpxor	ymm10, ymm8, ymm0
+	vpcmpgtq	ymm10, ymm10, ymm11
+	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 64]
+	vblendvpd	ymm8, ymm9, ymm8, ymm10
+	vpxor	ymm9, ymm3, ymm0
+	vpxor	ymm10, ymm11, ymm0
+	vpcmpgtq	ymm9, ymm10, ymm9
+	vblendvpd	ymm3, ymm11, ymm3, ymm9
+	vpxor	ymm9, ymm7, ymm0
+	vpcmpgtq	ymm9, ymm9, ymm10
+	vblendvpd	ymm7, ymm11, ymm7, ymm9
+	vmovdqu	ymm9, ymmword ptr [rdi + 8*rax + 96]
+	vpxor	ymm10, ymm2, ymm0
+	vpxor	ymm11, ymm9, ymm0
+	vpcmpgtq	ymm10, ymm11, ymm10
+	vblendvpd	ymm2, ymm9, ymm2, ymm10
+	vpxor	ymm10, ymm6, ymm0
+	vpcmpgtq	ymm10, ymm10, ymm11
+	vblendvpd	ymm6, ymm9, ymm6, ymm10
+	add	rax, 16
+	cmp	r9, rax
+	jne	.LBB3_5
+# %bb.6:
+	vpxor	ymm9, ymm8, ymm0
+	vpxor	ymm10, ymm5, ymm0
+	vpcmpgtq	ymm9, ymm10, ymm9
+	vblendvpd	ymm5, ymm8, ymm5, ymm9
+	vxorpd	ymm8, ymm5, ymm0
+	vpxor	ymm9, ymm7, ymm0
+	vpcmpgtq	ymm8, ymm8, ymm9
+	vblendvpd	ymm5, ymm7, ymm5, ymm8
+	vxorpd	ymm7, ymm5, ymm0
+	vpxor	ymm8, ymm6, ymm0
+	vpcmpgtq	ymm7, ymm7, ymm8
 	vblendvpd	ymm5, ymm6, ymm5, ymm7
-	vxorpd	ymm7, ymm9, ymm14
-	vpcmpgtq	ymm7, ymm7, ymm11
-	vblendvpd	ymm7, ymm6, ymm9, ymm7
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 352]
-	vxorpd	ymm9, ymm14, ymm0
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm9, ymm11, ymm9
-	vblendvpd	ymm9, ymm6, ymm0, ymm9
-	vxorpd	ymm0, ymm10, ymm14
-	vpcmpgtq	ymm0, ymm0, ymm11
-	vblendvpd	ymm10, ymm6, ymm10, ymm0
-	vmovdqu	ymm6, ymmword ptr [rdi + 8*rax + 384]
-	vxorpd	ymm0, ymm14, ymm1
-	vpxor	ymm11, ymm14, ymm6
-	vpcmpgtq	ymm0, ymm11, ymm0
-	vblendvpd	ymm4, ymm6, ymm1, ymm0
-	vxorpd	ymm1, ymm12, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm11
-	vblendvpd	ymm3, ymm6, ymm12, ymm1
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 416]
-	vxorpd	ymm6, ymm8, ymm14
-	vpxor	ymm12, ymm11, ymm14
-	vpcmpgtq	ymm6, ymm12, ymm6
-	vblendvpd	ymm6, ymm11, ymm8, ymm6
-	vxorpd	ymm8, ymm13, ymm14
-	vpcmpgtq	ymm8, ymm8, ymm12
-	vblendvpd	ymm12, ymm11, ymm13, ymm8
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 448]
-	vmovdqa	ymm0, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpxor	ymm8, ymm14, ymm0
-	vpxor	ymm13, ymm11, ymm14
-	vpcmpgtq	ymm8, ymm13, ymm8
-	vblendvpd	ymm8, ymm11, ymm0, ymm8
-	vmovdqa	ymm0, ymmword ptr [rsp + 192]   # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm0
-	vpcmpgtq	ymm13, ymm15, ymm13
-	vblendvpd	ymm13, ymm11, ymm0, ymm13
-	vmovdqu	ymm11, ymmword ptr [rdi + 8*rax + 480]
-	vmovdqa	ymm1, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm1
-	vpxor	ymm0, ymm11, ymm14
-	vpcmpgtq	ymm15, ymm0, ymm15
-	vblendvpd	ymm1, ymm11, ymm1, ymm15
-	vmovdqa	ymm2, ymmword ptr [rsp + 224]   # 32-byte Reload
-	vpxor	ymm15, ymm14, ymm2
-	vpcmpgtq	ymm0, ymm15, ymm0
-	vblendvpd	ymm15, ymm11, ymm2, ymm0
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 736]
-	vxorpd	ymm11, ymm14, ymm1
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm11, ymm2, ymm11
-	vblendvpd	ymm1, ymm0, ymm1, ymm11
-	vmovapd	ymmword ptr [rsp + 128], ymm1   # 32-byte Spill
-	vxorpd	ymm1, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm0, ymm0, ymm15, ymm1
-	vmovapd	ymmword ptr [rsp + 224], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 704]
-	vxorpd	ymm1, ymm8, ymm14
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm1, ymm0, ymm8, ymm1
-	vmovapd	ymmword ptr [rsp + 160], ymm1   # 32-byte Spill
-	vxorpd	ymm1, ymm13, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm0, ymm0, ymm13, ymm1
-	vmovapd	ymmword ptr [rsp + 192], ymm0   # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 672]
-	vxorpd	ymm1, ymm14, ymm6
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm15, ymm0, ymm6, ymm1
-	vxorpd	ymm1, ymm12, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm13, ymm0, ymm12, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 640]
-	vxorpd	ymm1, ymm14, ymm4
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm12, ymm0, ymm4, ymm1
-	vxorpd	ymm1, ymm14, ymm3
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm8, ymm0, ymm3, ymm1
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 608]
-	vxorpd	ymm1, ymm9, ymm14
-	vpxor	ymm3, ymm14, ymm2
-	vpcmpgtq	ymm1, ymm3, ymm1
-	vblendvpd	ymm1, ymm2, ymm9, ymm1
-	vxorpd	ymm4, ymm10, ymm14
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm10, ymm2, ymm10, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 576]
-	vxorpd	ymm3, ymm14, ymm5
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm5, ymm2, ymm5, ymm3
-	vxorpd	ymm3, ymm14, ymm7
-	vpcmpgtq	ymm3, ymm3, ymm4
-	vblendvpd	ymm9, ymm2, ymm7, ymm3
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 512]
-	vmovdqa	ymm0, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm0
-	vpxor	ymm4, ymm14, ymm2
-	vpcmpgtq	ymm3, ymm4, ymm3
-	vblendvpd	ymm3, ymm2, ymm0, ymm3
-	vmovdqa	ymm0, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm6, ymm14, ymm0
-	vpcmpgtq	ymm4, ymm6, ymm4
-	vblendvpd	ymm4, ymm2, ymm0, ymm4
-	vmovdqu	ymm2, ymmword ptr [rdi + 8*rax + 544]
-	vmovdqa	ymm0, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpxor	ymm6, ymm14, ymm0
-	vpxor	ymm7, ymm14, ymm2
+	vextractf128	xmm6, ymm5, 1
+	vxorpd	xmm8, xmm6, xmm0
+	vxorpd	xmm7, xmm5, xmm0
+	vpcmpgtq	xmm7, xmm7, xmm8
+	vblendvpd	xmm5, xmm6, xmm5, xmm7
+	vpermilps	xmm6, xmm5, 78          # xmm6 = xmm5[2,3,0,1]
+	vxorpd	xmm8, xmm5, xmm0
+	vxorpd	xmm7, xmm6, xmm0
+	vpcmpgtq	xmm7, xmm8, xmm7
+	vblendvpd	xmm5, xmm6, xmm5, xmm7
+	vpxor	ymm6, ymm1, ymm0
+	vpxor	ymm7, ymm4, ymm0
 	vpcmpgtq	ymm6, ymm7, ymm6
-	vblendvpd	ymm6, ymm2, ymm0, ymm6
-	vmovdqa	ymm0, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm11, ymm14, ymm0
-	vpcmpgtq	ymm7, ymm11, ymm7
-	vblendvpd	ymm2, ymm2, ymm0, ymm7
-	vmovdqu	ymm7, ymmword ptr [rdi + 8*rax + 800]
-	vxorpd	ymm11, ymm14, ymm6
-	vpxor	ymm0, ymm14, ymm7
-	vpcmpgtq	ymm11, ymm0, ymm11
-	vblendvpd	ymm6, ymm7, ymm6, ymm11
-	vmovapd	ymmword ptr [rsp + 96], ymm6    # 32-byte Spill
-	vxorpd	ymm6, ymm14, ymm2
-	vpcmpgtq	ymm0, ymm6, ymm0
-	vblendvpd	ymm0, ymm7, ymm2, ymm0
-	vmovapd	ymmword ptr [rsp + 32], ymm0    # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 768]
-	vxorpd	ymm2, ymm14, ymm3
-	vpxor	ymm7, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm7, ymm2
-	vblendvpd	ymm2, ymm0, ymm3, ymm2
-	vmovapd	ymmword ptr [rsp + 64], ymm2    # 32-byte Spill
-	vxorpd	ymm2, ymm14, ymm4
-	vpcmpgtq	ymm2, ymm2, ymm7
-	vblendvpd	ymm4, ymm0, ymm4, ymm2
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 832]
-	vxorpd	ymm2, ymm14, ymm5
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm5, ymm0, ymm5, ymm2
-	vxorpd	ymm2, ymm9, ymm14
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm0, ymm0, ymm9, ymm2
-	vmovapd	ymmword ptr [rsp], ymm0         # 32-byte Spill
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 864]
-	vxorpd	ymm2, ymm14, ymm1
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm7, ymm0, ymm1, ymm2
-	vxorpd	ymm1, ymm10, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm3
-	vblendvpd	ymm3, ymm0, ymm10, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 896]
-	vxorpd	ymm1, ymm12, ymm14
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm12, ymm0, ymm12, ymm1
-	vxorpd	ymm1, ymm8, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm9, ymm0, ymm8, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 928]
-	vxorpd	ymm1, ymm15, ymm14
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm10, ymm0, ymm15, ymm1
-	vxorpd	ymm1, ymm13, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm8, ymm0, ymm13, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 960]
-	vmovdqa	ymm6, ymmword ptr [rsp + 160]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm11, ymm0, ymm6, ymm1
-	vmovdqa	ymm6, ymmword ptr [rsp + 192]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm15, ymm0, ymm6, ymm1
-	vmovdqu	ymm0, ymmword ptr [rdi + 8*rax + 992]
-	vmovdqa	ymm6, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpxor	ymm2, ymm14, ymm0
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm13, ymm0, ymm6, ymm1
-	vmovdqa	ymm6, ymmword ptr [rsp + 224]   # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm6
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm0, ymm0, ymm6, ymm1
-	sub	rax, -128
-	add	r10, 4
-	jne	.LBB3_9
-.LBB3_10:
-	vmovaps	ymmword ptr [rsp + 128], ymm10  # 32-byte Spill
-	test	r9, r9
-	vmovdqa	ymm10, ymm12
-	vmovdqa	ymm12, ymm3
-	je	.LBB3_13
-# %bb.11:
-	lea	rax, [rdi + 8*rax]
-	neg	r9
-	vpbroadcastq	ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	.p2align	4, 0x90
-.LBB3_12:                               # =>This Inner Loop Header: Depth=1
-	vmovdqu	ymm1, ymmword ptr [rax + 32]
-	vmovdqa	ymm6, ymm7
-	vmovdqa	ymm7, ymm5
-	vmovdqa	ymm5, ymm4
-	vmovdqa	ymm4, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm14, ymm1
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vmovapd	ymmword ptr [rsp + 96], ymm4    # 32-byte Spill
-	vmovdqa	ymm4, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vmovapd	ymmword ptr [rsp + 32], ymm4    # 32-byte Spill
-	vmovdqu	ymm1, ymmword ptr [rax]
-	vmovdqa	ymm4, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm14, ymm1
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vmovapd	ymmword ptr [rsp + 64], ymm4    # 32-byte Spill
-	vmovdqa	ymm4, ymm5
-	vmovdqa	ymm5, ymm7
-	vmovdqa	ymm7, ymm6
-	vpxor	ymm2, ymm14, ymm4
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vmovdqu	ymm3, ymmword ptr [rax + 64]
-	vblendvpd	ymm4, ymm1, ymm4, ymm2
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm14, ymm5
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm5, ymm3, ymm5, ymm2
-	vmovdqa	ymm6, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm6
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm6, ymm3, ymm6, ymm1
-	vmovapd	ymmword ptr [rsp], ymm6         # 32-byte Spill
-	vmovdqu	ymm1, ymmword ptr [rax + 96]
-	vpxor	ymm2, ymm14, ymm1
-	vpxor	ymm3, ymm14, ymm7
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm7, ymm1, ymm7, ymm3
-	vpxor	ymm3, ymm12, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vmovdqu	ymm3, ymmword ptr [rax + 128]
-	vblendvpd	ymm12, ymm1, ymm12, ymm2
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm10, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm10, ymm3, ymm10, ymm2
-	vpxor	ymm2, ymm9, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm9, ymm3, ymm9, ymm1
-	vmovdqu	ymm1, ymmword ptr [rax + 160]
-	vpxor	ymm2, ymm14, ymm1
-	vmovdqa	ymm6, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm6
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm6, ymm1, ymm6, ymm3
-	vmovapd	ymmword ptr [rsp + 128], ymm6   # 32-byte Spill
-	vpxor	ymm3, ymm8, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vmovdqu	ymm3, ymmword ptr [rax + 192]
-	vblendvpd	ymm8, ymm1, ymm8, ymm2
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm11, ymm14
-	vpcmpgtq	ymm2, ymm1, ymm2
-	vblendvpd	ymm11, ymm3, ymm11, ymm2
-	vpxor	ymm2, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm15, ymm3, ymm15, ymm1
-	vmovdqu	ymm1, ymmword ptr [rax + 224]
-	vpxor	ymm2, ymm14, ymm1
-	vpxor	ymm3, ymm13, ymm14
-	vpcmpgtq	ymm3, ymm2, ymm3
-	vblendvpd	ymm13, ymm1, ymm13, ymm3
-	vpxor	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm0, ymm1, ymm0, ymm2
-	add	rax, 256
-	inc	r9
-	jne	.LBB3_12
-.LBB3_13:
-	vpbroadcastq	ymm14, qword ptr [rip + .LCPI3_0] # ymm14 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-	vmovdqa	ymm3, ymmword ptr [rsp]         # 32-byte Reload
-	vpxor	ymm1, ymm14, ymm3
-	vpxor	ymm2, ymm15, ymm14
-	vpcmpgtq	ymm1, ymm1, ymm2
-	vblendvpd	ymm1, ymm15, ymm3, ymm1
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm9, ymm14
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm2, ymm9, ymm4, ymm2
-	vpxor	ymm3, ymm12, ymm14
-	vpxor	ymm9, ymm14, ymm0
-	vpcmpgtq	ymm3, ymm3, ymm9
-	vblendvpd	ymm0, ymm0, ymm12, ymm3
-	vmovdqa	ymm4, ymmword ptr [rsp + 32]    # 32-byte Reload
-	vpxor	ymm3, ymm14, ymm4
-	vpxor	ymm9, ymm8, ymm14
-	vpcmpgtq	ymm3, ymm3, ymm9
-	vblendvpd	ymm3, ymm8, ymm4, ymm3
-	vxorpd	ymm6, ymm14, ymm3
-	vxorpd	ymm9, ymm14, ymm0
-	vpcmpgtq	ymm6, ymm6, ymm9
-	vblendvpd	ymm0, ymm0, ymm3, ymm6
-	vxorpd	ymm3, ymm14, ymm2
-	vxorpd	ymm6, ymm14, ymm1
-	vpcmpgtq	ymm3, ymm3, ymm6
-	vblendvpd	ymm1, ymm1, ymm2, ymm3
-	vxorpd	ymm2, ymm14, ymm1
-	vxorpd	ymm3, ymm14, ymm0
-	vpcmpgtq	ymm2, ymm2, ymm3
-	vblendvpd	ymm0, ymm0, ymm1, ymm2
-	vextractf128	xmm1, ymm0, 1
-	vxorpd	xmm2, xmm14, xmm1
-	vxorpd	xmm3, xmm14, xmm0
-	vpcmpgtq	xmm2, xmm3, xmm2
-	vblendvpd	xmm0, xmm1, xmm0, xmm2
-	vpermilps	xmm1, xmm0, 78          # xmm1 = xmm0[2,3,0,1]
-	vxorpd	xmm2, xmm14, xmm0
-	vxorpd	xmm3, xmm14, xmm1
-	vpcmpgtq	xmm2, xmm2, xmm3
-	vblendvpd	xmm0, xmm1, xmm0, xmm2
-	vpxor	ymm1, ymm14, ymm5
-	vpxor	ymm2, ymm11, ymm14
-	vpcmpgtq	ymm1, ymm2, ymm1
-	vblendvpd	ymm1, ymm11, ymm5, ymm1
-	vmovdqa	ymm4, ymmword ptr [rsp + 64]    # 32-byte Reload
-	vpxor	ymm2, ymm14, ymm4
-	vpxor	ymm3, ymm10, ymm14
-	vpcmpgtq	ymm2, ymm3, ymm2
-	vblendvpd	ymm2, ymm10, ymm4, ymm2
-	vpxor	ymm3, ymm14, ymm7
-	vpxor	ymm5, ymm13, ymm14
-	vpcmpgtq	ymm3, ymm5, ymm3
-	vblendvpd	ymm3, ymm13, ymm7, ymm3
-	vmovdqa	ymm6, ymmword ptr [rsp + 96]    # 32-byte Reload
-	vpxor	ymm4, ymm14, ymm6
-	vmovdqa	ymm7, ymmword ptr [rsp + 128]   # 32-byte Reload
-	vpxor	ymm5, ymm14, ymm7
-	vpcmpgtq	ymm4, ymm5, ymm4
-	vblendvpd	ymm4, ymm7, ymm6, ymm4
-	vxorpd	ymm5, ymm14, ymm4
-	vxorpd	ymm6, ymm14, ymm3
-	vpcmpgtq	ymm5, ymm6, ymm5
-	vblendvpd	ymm3, ymm3, ymm4, ymm5
-	vxorpd	ymm4, ymm14, ymm2
-	vxorpd	ymm5, ymm14, ymm1
-	vpcmpgtq	ymm4, ymm5, ymm4
-	vblendvpd	ymm1, ymm1, ymm2, ymm4
-	vxorpd	ymm2, ymm14, ymm1
-	vxorpd	ymm4, ymm14, ymm3
-	vpcmpgtq	ymm2, ymm4, ymm2
-	vblendvpd	ymm1, ymm3, ymm1, ymm2
+	vblendvpd	ymm1, ymm4, ymm1, ymm6
+	vxorpd	ymm4, ymm1, ymm0
+	vpxor	ymm6, ymm3, ymm0
+	vpcmpgtq	ymm4, ymm6, ymm4
+	vblendvpd	ymm1, ymm3, ymm1, ymm4
+	vmovq	r10, xmm5
+	vxorpd	ymm3, ymm1, ymm0
+	vpxor	ymm4, ymm2, ymm0
+	vpcmpgtq	ymm3, ymm4, ymm3
+	vblendvpd	ymm1, ymm2, ymm1, ymm3
 	vextractf128	xmm2, ymm1, 1
-	vxorpd	xmm3, xmm14, xmm1
-	vxorpd	xmm4, xmm14, xmm2
+	vxorpd	xmm3, xmm1, xmm0
+	vxorpd	xmm4, xmm2, xmm0
 	vpcmpgtq	xmm3, xmm4, xmm3
 	vblendvpd	xmm1, xmm2, xmm1, xmm3
 	vpermilps	xmm2, xmm1, 78          # xmm2 = xmm1[2,3,0,1]
-	vxorpd	xmm3, xmm14, xmm1
-	vxorpd	xmm4, xmm14, xmm2
-	vpcmpgtq	xmm3, xmm4, xmm3
-	vblendvpd	xmm1, xmm2, xmm1, xmm3
-	vmovq	rsi, xmm0
-	vmovq	r9, xmm1
-	cmp	r11, r8
-	je	.LBB3_14
-.LBB3_4:
-	mov	rax, rsi
+	vxorpd	xmm3, xmm1, xmm0
+	vxorpd	xmm0, xmm2, xmm0
+	vpcmpgtq	xmm0, xmm0, xmm3
+	vblendvpd	xmm0, xmm2, xmm1, xmm0
+	vmovq	rax, xmm0
+	mov	rsi, r10
+	cmp	r9, r8
+	je	.LBB3_8
 	.p2align	4, 0x90
-.LBB3_5:                                # =>This Inner Loop Header: Depth=1
-	mov	rsi, qword ptr [rdi + 8*r11]
-	cmp	r9, rsi
-	cmovae	r9, rsi
+.LBB3_7:                                # =>This Inner Loop Header: Depth=1
+	mov	rsi, qword ptr [rdi + 8*r9]
 	cmp	rax, rsi
-	cmova	rsi, rax
-	add	r11, 1
-	mov	rax, rsi
-	cmp	r8, r11
-	jne	.LBB3_5
-.LBB3_14:
+	cmovae	rax, rsi
+	cmp	r10, rsi
+	cmova	rsi, r10
+	add	r9, 1
+	mov	r10, rsi
+	cmp	r8, r9
+	jne	.LBB3_7
+.LBB3_8:
 	mov	qword ptr [rcx], rsi
-	mov	qword ptr [rdx], r9
+	mov	qword ptr [rdx], rax
 	mov	rsp, rbp
 	pop	rbp
 	vzeroupper
@@ -1361,6 +468,6 @@ uint64_max_min_avx2:                    # @uint64_max_min_avx2
 .Lfunc_end3:
 	.size	uint64_max_min_avx2, .Lfunc_end3-uint64_max_min_avx2
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/min_max_sse4.s b/go/parquet/internal/utils/_lib/min_max_sse4.s
index 98f30e3ed1d..893a0a73f02 100644
--- a/go/parquet/internal/utils/_lib/min_max_sse4.s
+++ b/go/parquet/internal/utils/_lib/min_max_sse4.s
@@ -608,6 +608,6 @@ uint64_max_min_sse4:                    # @uint64_max_min_sse4
 .Lfunc_end3:
 	.size	uint64_max_min_sse4, .Lfunc_end3-uint64_max_min_sse4
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/unpack_bool_avx2.s b/go/parquet/internal/utils/_lib/unpack_bool_avx2.s
index 1bc1be53d4d..6ac34887c00 100644
--- a/go/parquet/internal/utils/_lib/unpack_bool_avx2.s
+++ b/go/parquet/internal/utils/_lib/unpack_bool_avx2.s
@@ -1,6293 +1,104 @@
 	.text
 	.intel_syntax noprefix
 	.file	"unpack_bool.c"
-	.section	.rodata.cst32,"aM",@progbits,32
-	.p2align	5                               # -- Begin function bytes_to_bools_avx2
-.LCPI0_0:
-	.long	24                              # 0x18
-	.long	25                              # 0x19
-	.long	26                              # 0x1a
-	.long	27                              # 0x1b
-	.long	28                              # 0x1c
-	.long	29                              # 0x1d
-	.long	30                              # 0x1e
-	.long	31                              # 0x1f
-.LCPI0_1:
-	.long	16                              # 0x10
-	.long	17                              # 0x11
-	.long	18                              # 0x12
-	.long	19                              # 0x13
-	.long	20                              # 0x14
-	.long	21                              # 0x15
-	.long	22                              # 0x16
-	.long	23                              # 0x17
-.LCPI0_2:
-	.long	8                               # 0x8
-	.long	9                               # 0x9
-	.long	10                              # 0xa
-	.long	11                              # 0xb
-	.long	12                              # 0xc
-	.long	13                              # 0xd
-	.long	14                              # 0xe
-	.long	15                              # 0xf
-.LCPI0_3:
-	.long	0                               # 0x0
-	.long	1                               # 0x1
-	.long	2                               # 0x2
-	.long	3                               # 0x3
-	.long	4                               # 0x4
-	.long	5                               # 0x5
-	.long	6                               # 0x6
-	.long	7                               # 0x7
-.LCPI0_4:
-	.zero	32,1
-	.section	.rodata.cst8,"aM",@progbits,8
-	.p2align	3
-.LCPI0_5:
-	.quad	1                               # 0x1
-.LCPI0_6:
-	.quad	2                               # 0x2
-.LCPI0_7:
-	.quad	3                               # 0x3
-.LCPI0_8:
-	.quad	4                               # 0x4
-.LCPI0_9:
-	.quad	5                               # 0x5
-.LCPI0_10:
-	.quad	6                               # 0x6
-.LCPI0_11:
-	.quad	7                               # 0x7
-	.section	.rodata.cst4,"aM",@progbits,4
-	.p2align	2
-.LCPI0_12:
-	.long	32                              # 0x20
-	.text
-	.globl	bytes_to_bools_avx2
+	.globl	bytes_to_bools_avx2             # -- Begin function bytes_to_bools_avx2
 	.p2align	4, 0x90
 	.type	bytes_to_bools_avx2,@function
 bytes_to_bools_avx2:                    # @bytes_to_bools_avx2
 # %bb.0:
 	push	rbp
 	mov	rbp, rsp
-	push	r15
-	push	r14
-	push	r13
-	push	r12
-	push	rbx
-	and	rsp, -32
-	sub	rsp, 960
+	and	rsp, -8
 	test	esi, esi
-	jle	.LBB0_1051
+	jle	.LBB0_5
 # %bb.1:
-	mov	r9d, ecx
-	mov	r8, rdx
-	mov	r10d, esi
-	cmp	esi, 32
-	jae	.LBB0_3
-.LBB0_2:
-	xor	r12d, r12d
-.LBB0_1055:
-	lea	ecx, [8*r12]
-	jmp	.LBB0_1057
-	.p2align	4, 0x90
-.LBB0_1056:                             #   in Loop: Header=BB0_1057 Depth=1
-	add	r12, 1
-	add	ecx, 8
-	cmp	r10, r12
-	je	.LBB0_1051
-.LBB0_1057:                             # =>This Inner Loop Header: Depth=1
-	mov	edx, ecx
-	mov	ecx, ecx
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1058:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	edx, byte ptr [rdi + r12]
-	and	dl, 1
-	mov	byte ptr [r8 + rcx], dl
-	mov	rdx, rcx
-	or	rdx, 1
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1059:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 2
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1060:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 2
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 3
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1061:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 3
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 4
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1062:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 4
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 5
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1063:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 5
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 6
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1064:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 6
-	and	bl, 1
-	mov	byte ptr [r8 + rdx], bl
-	mov	rdx, rcx
-	or	rdx, 7
-	cmp	edx, r9d
-	jge	.LBB0_1056
-# %bb.1065:                             #   in Loop: Header=BB0_1057 Depth=1
-	movzx	ebx, byte ptr [rdi + r12]
-	shr	bl, 7
-	mov	byte ptr [r8 + rdx], bl
-	jmp	.LBB0_1056
-.LBB0_3:
-	mov	dword ptr [rsp + 16], r9d       # 4-byte Spill
-	mov	qword ptr [rsp + 48], r10       # 8-byte Spill
-	lea	rsi, [r10 - 1]
-	mov	ecx, 8
-	mov	eax, esi
-	mul	ecx
-	seto	r14b
-	mov	rbx, rsi
-	shr	rbx, 32
-	lea	rcx, [r8 + 6]
-	mov	edx, 8
-	mov	rax, rsi
-	mul	rdx
-	seto	sil
-	add	rcx, rax
-	setb	dl
-	lea	rcx, [r8 + 7]
-	add	rcx, rax
-	setb	r13b
-	lea	rcx, [r8 + 5]
-	add	rcx, rax
-	setb	r9b
-	lea	rcx, [r8 + 4]
-	add	rcx, rax
-	setb	r15b
-	lea	rcx, [r8 + 3]
-	add	rcx, rax
-	setb	r11b
-	lea	rcx, [r8 + 2]
-	add	rcx, rax
-	setb	r10b
-	lea	rcx, [r8 + 1]
-	add	rcx, rax
-	setb	cl
-	add	rax, r8
-	setb	al
-	xor	r12d, r12d
-	test	rbx, rbx
-	jne	.LBB0_1052
-# %bb.4:
-	test	r14b, r14b
-	jne	.LBB0_1052
-# %bb.5:
-	test	dl, dl
-	jne	.LBB0_1052
-# %bb.6:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.7:
-	test	r13b, r13b
-	jne	.LBB0_1052
-# %bb.8:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.9:
-	test	r9b, r9b
-	jne	.LBB0_1052
-# %bb.10:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.11:
-	test	r15b, r15b
-	jne	.LBB0_1052
-# %bb.12:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.13:
-	test	r11b, r11b
-	jne	.LBB0_1052
-# %bb.14:
-	test	sil, sil
-	jne	.LBB0_1052
-# %bb.15:
-	test	r10b, r10b
-	jne	.LBB0_1052
-# %bb.16:
-	test	sil, sil
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	jne	.LBB0_1054
-# %bb.17:
-	test	cl, cl
-	jne	.LBB0_1054
-# %bb.18:
-	test	sil, sil
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	jne	.LBB0_1055
-# %bb.19:
-	test	al, al
-	jne	.LBB0_1055
-# %bb.20:
-	test	sil, sil
-	jne	.LBB0_1055
-# %bb.21:
-	lea	rax, [r8 + 8*r10]
-	cmp	rax, rdi
-	jbe	.LBB0_24
-# %bb.22:
-	lea	rax, [rdi + r10]
-	cmp	rax, r8
-	ja	.LBB0_2
-.LBB0_24:
-	mov	r12d, r10d
-	and	r12d, -32
-	vmovd	xmm0, r9d
-	vpbroadcastd	ymm0, xmm0
-	vmovdqa	ymm9, ymmword ptr [rip + .LCPI0_0] # ymm9 = [24,25,26,27,28,29,30,31]
-	vmovdqa	ymm8, ymmword ptr [rip + .LCPI0_1] # ymm8 = [16,17,18,19,20,21,22,23]
-	vmovdqa	ymm3, ymmword ptr [rip + .LCPI0_2] # ymm3 = [8,9,10,11,12,13,14,15]
-	vmovdqa	ymm2, ymmword ptr [rip + .LCPI0_3] # ymm2 = [0,1,2,3,4,5,6,7]
-	xor	r11d, r11d
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_5] # ymm1 = [1,1,1,1]
-	vmovaps	ymmword ptr [rsp + 768], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_6] # ymm1 = [2,2,2,2]
-	vmovaps	ymmword ptr [rsp + 736], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_7] # ymm1 = [3,3,3,3]
-	vmovaps	ymmword ptr [rsp + 704], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_8] # ymm1 = [4,4,4,4]
-	vmovaps	ymmword ptr [rsp + 672], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_9] # ymm1 = [5,5,5,5]
-	vmovaps	ymmword ptr [rsp + 640], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_10] # ymm1 = [6,6,6,6]
-	vmovaps	ymmword ptr [rsp + 608], ymm1   # 32-byte Spill
-	vbroadcastsd	ymm1, qword ptr [rip + .LCPI0_11] # ymm1 = [7,7,7,7]
-	vmovaps	ymmword ptr [rsp + 576], ymm1   # 32-byte Spill
-	vpbroadcastd	ymm1, dword ptr [rip + .LCPI0_12] # ymm1 = [32,32,32,32,32,32,32,32]
-	vmovdqa	ymmword ptr [rsp + 544], ymm1   # 32-byte Spill
-	jmp	.LBB0_26
-	.p2align	4, 0x90
-.LBB0_25:                               #   in Loop: Header=BB0_26 Depth=1
-	add	r11, 32
-	vmovdqa	ymm1, ymmword ptr [rsp + 544]   # 32-byte Reload
-	vpaddd	ymm2, ymm2, ymm1
-	vpaddd	ymm3, ymm3, ymm1
-	vpaddd	ymm8, ymm8, ymm1
-	vpaddd	ymm9, ymm9, ymm1
-	cmp	r11, r12
-	je	.LBB0_1050
-.LBB0_26:                               # =>This Inner Loop Header: Depth=1
-	vmovdqa	ymmword ptr [rsp + 800], ymm2   # 32-byte Spill
-	vpslld	ymm1, ymm2, 3
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vmovd	ecx, xmm2
-                                        # implicit-def: $ymm4
-	test	cl, 1
-	je	.LBB0_28
-# %bb.27:                               #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm4, byte ptr [rdi + r11]
-.LBB0_28:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r10, r11
-	or	r10, 1
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vpackssdw	xmm2, xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 1
-	test	cl, 1
-	je	.LBB0_30
-# %bb.29:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + r10], 1
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_30:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r14, r11
-	or	r14, 2
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vpackssdw	xmm2, xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 2
-	test	cl, 1
-	je	.LBB0_32
-# %bb.31:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + r14], 2
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_32:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm13, ymm1, 1
-	mov	rdx, r11
-	or	rdx, 3
-	vpcmpgtd	xmm2, xmm0, xmm1
-	vpackssdw	xmm2, xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 3
-	test	cl, 1
-	je	.LBB0_34
-# %bb.33:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + rdx], 3
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_34:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, r11
-	or	rcx, 4
-	vextracti128	xmm7, ymm0, 1
-	vpcmpgtd	xmm2, xmm7, xmm13
-	vpextrb	r9d, xmm2, 0
-	test	r9b, 1
-	mov	qword ptr [rsp + 272], rdx      # 8-byte Spill
-	mov	qword ptr [rsp + 264], rcx      # 8-byte Spill
-	je	.LBB0_36
-# %bb.35:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + rcx], 4
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_36:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r15, r11
-	or	r15, 5
-	vpcmpgtd	ymm6, ymm0, ymm1
-	vpackssdw	ymm2, ymm6, ymm0
-	vextracti128	xmm2, ymm2, 1
-	vpbroadcastd	xmm2, xmm2
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 5
-	test	cl, 1
-	je	.LBB0_38
-# %bb.37:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + r15], 5
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_38:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 6
-	vpackssdw	ymm2, ymm6, ymm0
-	vpermq	ymm2, ymm2, 232                 # ymm2 = ymm2[0,2,2,3]
-	vpacksswb	xmm2, xmm2, xmm2
-	vpextrb	ecx, xmm2, 6
-	test	cl, 1
-	je	.LBB0_40
-# %bb.39:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm2, xmm4, byte ptr [rdi + rbx], 6
-	vpblendd	ymm4, ymm4, ymm2, 15            # ymm4 = ymm2[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_40:                               #   in Loop: Header=BB0_26 Depth=1
-	vpslld	ymm2, ymm3, 3
-	mov	rax, r11
-	or	rax, 7
-	vpackssdw	ymm5, ymm6, ymm0
-	vpermq	ymm5, ymm5, 232                 # ymm5 = ymm5[0,2,2,3]
-	vpacksswb	xmm5, xmm5, xmm5
-	vpextrb	ecx, xmm5, 7
-	test	cl, 1
-	je	.LBB0_42
-# %bb.41:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rax], 7
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_42:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, r11
-	or	rsi, 8
-	vpcmpgtd	xmm5, xmm0, xmm2
-	vpextrb	ecx, xmm5, 0
-	test	cl, 1
-	je	.LBB0_44
-# %bb.43:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rsi], 8
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_44:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, r11
-	or	rdx, 9
-	vpcmpgtd	xmm5, xmm0, xmm2
-	vpackssdw	xmm5, xmm5, xmm5
-	vpacksswb	xmm5, xmm5, xmm5
-	vpextrb	ecx, xmm5, 9
-	test	cl, 1
-	mov	qword ptr [rsp + 224], rdx      # 8-byte Spill
-	je	.LBB0_46
-# %bb.45:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rdx], 9
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_46:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, r11
-	or	rdx, 10
-	vpcmpgtd	xmm5, xmm0, xmm2
-	vpackssdw	xmm5, xmm5, xmm5
-	vpacksswb	xmm5, xmm5, xmm5
-	vpextrb	ecx, xmm5, 10
-	test	cl, 1
-	vmovdqa	ymmword ptr [rsp + 832], ymm3   # 32-byte Spill
-	mov	qword ptr [rsp + 96], rsi       # 8-byte Spill
-	je	.LBB0_48
-# %bb.47:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm5, xmm4, byte ptr [rdi + rdx], 10
-	vpblendd	ymm4, ymm4, ymm5, 15            # ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_48:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm5, ymm2, 1
-	mov	rsi, r11
-	or	rsi, 11
-	vpcmpgtd	xmm3, xmm0, xmm2
-	vpackssdw	xmm3, xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	mov	qword ptr [rsp + 152], r10      # 8-byte Spill
-	mov	qword ptr [rsp + 296], r14      # 8-byte Spill
-	mov	qword ptr [rsp + 104], r15      # 8-byte Spill
-	mov	qword ptr [rsp + 288], rbx      # 8-byte Spill
-	mov	qword ptr [rsp + 232], rax      # 8-byte Spill
-	je	.LBB0_50
-# %bb.49:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rsi], 11
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_50:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, r11
-	or	rcx, 12
-	vpcmpgtd	xmm3, xmm7, xmm5
-	vpextrb	r14d, xmm3, 0
-	test	r14b, 1
-	mov	qword ptr [rsp + 256], rsi      # 8-byte Spill
-	mov	qword ptr [rsp + 248], rcx      # 8-byte Spill
-	je	.LBB0_52
-# %bb.51:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rcx], 12
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_52:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, r11
-	or	rax, 13
-	vpcmpgtd	ymm7, ymm0, ymm2
-	vpackssdw	ymm3, ymm7, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpbroadcastd	xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	je	.LBB0_54
-# %bb.53:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rax], 13
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_54:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 14
-	vpackssdw	ymm3, ymm7, ymm0
-	vpermq	ymm3, ymm3, 232                 # ymm3 = ymm3[0,2,2,3]
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 14
-	test	cl, 1
-	mov	qword ptr [rsp + 80], rbx       # 8-byte Spill
-	je	.LBB0_56
-# %bb.55:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rbx], 14
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_56:                               #   in Loop: Header=BB0_26 Depth=1
-	vpslld	ymm10, ymm8, 3
-	mov	rsi, r11
-	or	rsi, 15
-	vpackssdw	ymm3, ymm7, ymm0
-	vpermq	ymm3, ymm3, 232                 # ymm3 = ymm3[0,2,2,3]
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 15
-	test	cl, 1
-	je	.LBB0_58
-# %bb.57:                               #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm3, xmm4, byte ptr [rdi + rsi], 15
-	vpblendd	ymm4, ymm4, ymm3, 15            # ymm4 = ymm3[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_58:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r15, r11
-	or	r15, 16
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vmovd	ecx, xmm3
-	test	cl, 1
-	mov	qword ptr [rsp + 64], r15       # 8-byte Spill
-	mov	qword ptr [rsp + 72], rsi       # 8-byte Spill
-	je	.LBB0_60
-# %bb.59:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r15], 0
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_60:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, r11
-	or	rsi, 17
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	je	.LBB0_62
-# %bb.61:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rsi], 1
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_62:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 18
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	je	.LBB0_64
-# %bb.63:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 2
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_64:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r15, r11
-	or	r15, 19
-	vpcmpgtd	xmm3, xmm0, xmm10
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 3
-	test	cl, 1
-	vmovdqa	ymmword ptr [rsp + 864], ymm8   # 32-byte Spill
-	je	.LBB0_66
-# %bb.65:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r15], 3
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_66:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r13, r11
-	or	r13, 20
-	vpcmpgtd	ymm8, ymm0, ymm10
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 4
-	test	cl, 1
-	mov	qword ptr [rsp + 56], r13       # 8-byte Spill
-	je	.LBB0_68
-# %bb.67:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r13], 4
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_68:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r13, r11
-	or	r13, 21
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 5
-	test	cl, 1
-	mov	qword ptr [rsp + 128], rbx      # 8-byte Spill
-	je	.LBB0_70
-# %bb.69:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r13], 5
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_70:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	r10, r11
-	or	r10, 22
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 6
-	test	cl, 1
-	je	.LBB0_72
-# %bb.71:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + r10], 6
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_72:                               #   in Loop: Header=BB0_26 Depth=1
-	vpslld	ymm11, ymm9, 3
-	mov	rbx, r11
-	or	rbx, 23
-	vpackssdw	ymm3, ymm0, ymm8
-	vpacksswb	ymm3, ymm3, ymm0
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 7
-	test	cl, 1
-	mov	qword ptr [rsp + 240], rbx      # 8-byte Spill
-	vmovdqa	ymmword ptr [rsp + 896], ymm9   # 32-byte Spill
-	je	.LBB0_74
-# %bb.73:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 7
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_74:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 24
-	vpcmpgtd	ymm9, ymm0, ymm11
-	vpermq	ymm12, ymm9, 68                 # ymm12 = ymm9[0,1,0,1]
-	vpacksswb	ymm3, ymm0, ymm12
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 8
-	test	cl, 1
-	mov	qword ptr [rsp + 216], rbx      # 8-byte Spill
-	je	.LBB0_76
-# %bb.75:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 8
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_76:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 25
-	vpcmpgtd	xmm3, xmm0, xmm11
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 9
-	test	cl, 1
-	mov	qword ptr [rsp + 208], rbx      # 8-byte Spill
-	je	.LBB0_78
-# %bb.77:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 9
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_78:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 26
-	vpcmpgtd	xmm3, xmm0, xmm11
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 10
-	test	cl, 1
-	mov	qword ptr [rsp + 200], rbx      # 8-byte Spill
-	je	.LBB0_80
-# %bb.79:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 10
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_80:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 27
-	vpcmpgtd	xmm3, xmm0, xmm11
-	vpackssdw	xmm3, xmm3, xmm3
-	vpermq	ymm3, ymm3, 212                 # ymm3 = ymm3[0,1,1,3]
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	mov	qword ptr [rsp + 192], rbx      # 8-byte Spill
-	mov	qword ptr [rsp + 144], rdx      # 8-byte Spill
-	mov	qword ptr [rsp + 88], rax       # 8-byte Spill
-	je	.LBB0_82
-# %bb.81:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 11
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_82:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, r11
-	or	rdx, 28
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 12
-	test	cl, 1
-	je	.LBB0_84
-# %bb.83:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rdx], 12
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_84:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 29
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	mov	qword ptr [rsp + 176], rbx      # 8-byte Spill
-	je	.LBB0_86
-# %bb.85:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 13
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_86:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 30
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 14
-	test	cl, 1
-	mov	qword ptr [rsp + 168], rbx      # 8-byte Spill
-	je	.LBB0_88
-# %bb.87:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 14
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_88:                               #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, r11
-	or	rbx, 31
-	vpackssdw	ymm3, ymm0, ymm9
-	vpacksswb	ymm3, ymm0, ymm3
-	vextracti128	xmm3, ymm3, 1
-	vpextrb	ecx, xmm3, 15
-	test	cl, 1
-	mov	qword ptr [rsp + 160], rbx      # 8-byte Spill
-	je	.LBB0_90
-# %bb.89:                               #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm3, ymm4, 1
-	vpinsrb	xmm3, xmm3, byte ptr [rdi + rbx], 15
-	vinserti128	ymm4, ymm4, xmm3, 1
-.LBB0_90:                               #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm3, xmm1              # ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-	vmovdqa	ymmword ptr [rsp + 512], ymm3   # 32-byte Spill
-	vpand	ymm15, ymm4, ymmword ptr [rip + .LCPI0_4]
-	vpcmpgtd	xmm3, xmm0, xmm1
-	vmovd	ecx, xmm3
-	test	cl, 1
-	je	.LBB0_92
-# %bb.91:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm3, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vmovq	rcx, xmm3
-	vpextrb	byte ptr [r8 + rcx], xmm15, 0
-.LBB0_92:                               #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm3, xmm0, xmm1
-	vpackssdw	xmm3, xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	je	.LBB0_94
-# %bb.93:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm3, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vpextrq	rcx, xmm3, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 1
-.LBB0_94:                               #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm3, xmm0, xmm1
-	vpackssdw	xmm3, xmm3, xmm3
-	vpacksswb	xmm3, xmm3, xmm3
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	je	.LBB0_96
-# %bb.95:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm3, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vextracti128	xmm3, ymm3, 1
-	vmovq	rcx, xmm3
-	vpextrb	byte ptr [r8 + rcx], xmm15, 2
-.LBB0_96:                               #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm1
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 3
-	test	cl, 1
-	je	.LBB0_98
-# %bb.97:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 512]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 3
-.LBB0_98:                               #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm13             # ymm1 = xmm13[0],zero,xmm13[1],zero,xmm13[2],zero,xmm13[3],zero
-	vmovdqa	ymmword ptr [rsp + 480], ymm1   # 32-byte Spill
-	test	r9b, 1
-	je	.LBB0_100
-# %bb.99:                               #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 4
-.LBB0_100:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpbroadcastd	xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 5
-	test	cl, 1
-	je	.LBB0_102
-# %bb.101:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 5
-.LBB0_102:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 6
-	test	cl, 1
-	je	.LBB0_104
-# %bb.103:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 6
-.LBB0_104:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 7
-	test	cl, 1
-	je	.LBB0_106
-# %bb.105:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 480]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 7
-.LBB0_106:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm2              # ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-	vmovdqa	ymmword ptr [rsp + 448], ymm1   # 32-byte Spill
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpextrb	ecx, xmm1, 0
-	test	cl, 1
-	je	.LBB0_108
-# %bb.107:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 8
-.LBB0_108:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_110
-# %bb.109:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 9
-.LBB0_110:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 10
-	test	cl, 1
-	je	.LBB0_112
-# %bb.111:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 10
-.LBB0_112:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm2
-	vpackssdw	xmm1, xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 11
-	test	cl, 1
-	je	.LBB0_114
-# %bb.113:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 448]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 11
-.LBB0_114:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	qword ptr [rsp + 136], rsi      # 8-byte Spill
-	vpmovzxdq	ymm1, xmm5              # ymm1 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
-	vmovdqa	ymmword ptr [rsp + 416], ymm1   # 32-byte Spill
-	test	r14b, 1
-	je	.LBB0_116
-# %bb.115:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 12
-.LBB0_116:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm7, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpbroadcastd	xmm1, xmm1
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 13
-	test	cl, 1
-	mov	r9, qword ptr [rsp + 152]       # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 296]      # 8-byte Reload
-	mov	r14, qword ptr [rsp + 104]      # 8-byte Reload
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	je	.LBB0_118
-# %bb.117:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 13
-.LBB0_118:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm7, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 14
-	test	cl, 1
-	je	.LBB0_120
-# %bb.119:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 14
-.LBB0_120:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm7, ymm0
-	vpermq	ymm1, ymm1, 232                 # ymm1 = ymm1[0,2,2,3]
-	vpacksswb	xmm1, xmm1, xmm1
-	vpextrb	ecx, xmm1, 15
-	test	cl, 1
-	je	.LBB0_122
-# %bb.121:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 416]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm15, 15
-.LBB0_122:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm10             # ymm1 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero
-	vmovdqa	ymmword ptr [rsp + 384], ymm1   # 32-byte Spill
-	vpcmpgtd	xmm1, xmm0, xmm10
-	vmovd	ecx, xmm1
-	test	cl, 1
-	je	.LBB0_124
-# %bb.123:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 0
-.LBB0_124:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm10
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 1
-	test	cl, 1
-	je	.LBB0_126
-# %bb.125:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 1
-.LBB0_126:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm10
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 2
-	test	cl, 1
-	je	.LBB0_128
-# %bb.127:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 2
-.LBB0_128:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpcmpgtd	xmm2, xmm0, xmm10
-	vpackssdw	xmm2, xmm2, xmm2
-	vpermq	ymm2, ymm2, 212                 # ymm2 = ymm2[0,1,1,3]
-	vpacksswb	ymm2, ymm2, ymm0
-	vextracti128	xmm2, ymm2, 1
-	vpextrb	ecx, xmm2, 3
-	test	cl, 1
-	je	.LBB0_130
-# %bb.129:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm2, ymmword ptr [rsp + 384]   # 32-byte Reload
-	vextracti128	xmm2, ymm2, 1
-	vpextrq	rcx, xmm2, 1
-	vextracti128	xmm2, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm2, 3
-.LBB0_130:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm1              # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-	vmovdqa	ymmword ptr [rsp + 352], ymm1   # 32-byte Spill
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 4
-	test	cl, 1
-	je	.LBB0_132
-# %bb.131:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 4
-.LBB0_132:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 5
-	test	cl, 1
-	je	.LBB0_134
-# %bb.133:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 5
-.LBB0_134:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 6
-	test	cl, 1
-	je	.LBB0_136
-# %bb.135:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 6
-.LBB0_136:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm8
-	vpacksswb	ymm1, ymm1, ymm0
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 7
-	test	cl, 1
-	je	.LBB0_138
-# %bb.137:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 352]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 7
-.LBB0_138:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm1, xmm11             # ymm1 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero
-	vmovdqa	ymmword ptr [rsp + 320], ymm1   # 32-byte Spill
-	vpacksswb	ymm1, ymm0, ymm12
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 8
-	test	cl, 1
-	je	.LBB0_140
-# %bb.139:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 8
-.LBB0_140:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm11
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_142
-# %bb.141:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-.LBB0_142:                              #   in Loop: Header=BB0_26 Depth=1
-	vpcmpgtd	xmm1, xmm0, xmm11
-	vpackssdw	xmm1, xmm1, xmm1
-	vpermq	ymm1, ymm1, 212                 # ymm1 = ymm1[0,1,1,3]
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 10
-	test	cl, 1
-	je	.LBB0_144
-# %bb.143:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vextracti128	xmm1, ymm1, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-.LBB0_144:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpcmpgtd	xmm4, xmm0, xmm11
-	vpackssdw	xmm4, xmm4, xmm4
-	vpermq	ymm4, ymm4, 212                 # ymm4 = ymm4[0,1,1,3]
-	vpacksswb	ymm4, ymm0, ymm4
-	vextracti128	xmm4, ymm4, 1
-	vpextrb	ecx, xmm4, 11
-	test	cl, 1
-	je	.LBB0_146
-# %bb.145:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm2, ymmword ptr [rsp + 320]   # 32-byte Reload
-	vextracti128	xmm4, ymm2, 1
-	vpextrq	rcx, xmm4, 1
-	vextracti128	xmm4, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm4, 11
-.LBB0_146:                              #   in Loop: Header=BB0_26 Depth=1
-	vpmovzxdq	ymm4, xmm1              # ymm4 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 12
-	test	cl, 1
-	je	.LBB0_148
-# %bb.147:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm4
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-.LBB0_148:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 13
-	test	cl, 1
-	je	.LBB0_150
-# %bb.149:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm4, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-.LBB0_150:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 14
-	test	cl, 1
-	je	.LBB0_152
-# %bb.151:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-.LBB0_152:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm0, ymm9
-	vpacksswb	ymm1, ymm0, ymm1
-	vextracti128	xmm1, ymm1, 1
-	vpextrb	ecx, xmm1, 15
-	test	cl, 1
-	je	.LBB0_154
-# %bb.153:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_154:                              #   in Loop: Header=BB0_26 Depth=1
-	vpackssdw	ymm1, ymm6, ymm8
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpackssdw	ymm5, ymm7, ymm9
-	vpermq	ymm5, ymm5, 216                 # ymm5 = ymm5[0,2,1,3]
-	vpacksswb	ymm1, ymm1, ymm5
-	vmovdqa	ymm2, ymmword ptr [rsp + 768]   # 32-byte Reload
-	vpor	ymm15, ymm2, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm2, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm2, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm2, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm2, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm2, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm2, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm2
-	vperm2i128	ymm6, ymm8, ymm7, 49    # ymm6 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm13, ymm8, xmm7, 1
-	vshufps	ymm6, ymm13, ymm6, 136          # ymm6 = ymm13[0,2],ymm6[0,2],ymm13[4,6],ymm6[4,6]
-	vperm2i128	ymm13, ymm12, ymm11, 49 # ymm13 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm14, ymm12, xmm11, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vperm2i128	ymm14, ymm10, ymm9, 49  # ymm14 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm2, ymm10, xmm9, 1
-	vshufps	ymm2, ymm2, ymm14, 136          # ymm2 = ymm2[0,2],ymm14[0,2],ymm2[4,6],ymm14[4,6]
-	vperm2i128	ymm14, ymm15, ymm5, 49  # ymm14 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm3, ymm15, xmm5, 1
-	vshufps	ymm3, ymm3, ymm14, 136          # ymm3 = ymm3[0,2],ymm14[0,2],ymm3[4,6],ymm14[4,6]
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpackssdw	ymm2, ymm3, ymm2
-	vpcmpgtd	ymm3, ymm0, ymm13
-	vpcmpgtd	ymm6, ymm0, ymm6
-	vpackssdw	ymm3, ymm3, ymm6
-	vpermq	ymm2, ymm2, 216                 # ymm2 = ymm2[0,2,1,3]
-	vpermq	ymm3, ymm3, 216                 # ymm3 = ymm3[0,2,1,3]
-	vpacksswb	ymm2, ymm2, ymm3
-	vpand	ymm6, ymm2, ymm1
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_155
-# %bb.660:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + r11]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_661
-.LBB0_156:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rbx, qword ptr [rsp + 224]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_157
-.LBB0_662:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	jne	.LBB0_663
-.LBB0_158:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_159
-.LBB0_664:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_665
-.LBB0_160:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 232]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_161
-.LBB0_666:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_667
-.LBB0_162:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_163
-.LBB0_668:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 96]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_669
-.LBB0_164:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_165
-.LBB0_670:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_671
-.LBB0_166:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_167
-.LBB0_672:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_673
-.LBB0_168:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_169
-.LBB0_674:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	jne	.LBB0_170
-	jmp	.LBB0_171
-	.p2align	4, 0x90
-.LBB0_155:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_156
-.LBB0_661:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rbx, qword ptr [rsp + 224]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_662
-.LBB0_157:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_158
-.LBB0_663:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_664
-.LBB0_159:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_160
-.LBB0_665:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r14], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rsi, qword ptr [rsp + 232]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_666
-.LBB0_161:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_162
-.LBB0_667:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_668
-.LBB0_163:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_164
-.LBB0_669:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	jne	.LBB0_670
-.LBB0_165:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_166
-.LBB0_671:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_672
-.LBB0_167:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_168
-.LBB0_673:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 88]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	jne	.LBB0_674
-.LBB0_169:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_171
-.LBB0_170:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 72]       # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_171:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 208]      # 8-byte Reload
-	vextracti128	xmm13, ymm6, 1
-	vmovd	eax, xmm13
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_172
-# %bb.675:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 64]       # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 0
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 1
-	mov	dword ptr [rsp + 40], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_676
-.LBB0_173:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 2
-	mov	dword ptr [rsp + 36], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_174
-.LBB0_677:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 128]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 2
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 3
-	mov	dword ptr [rsp + 32], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_678
-.LBB0_175:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_176
-.LBB0_679:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_680
-.LBB0_177:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_178
-.LBB0_681:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r10], 6
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 7
-	mov	dword ptr [rsp + 316], eax      # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_682
-.LBB0_179:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpextrb	ebx, xmm13, 8
-	test	bl, 1
-	je	.LBB0_181
-.LBB0_180:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_181:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm13, 9
-	test	r9b, 1
-	mov	qword ptr [rsp + 280], r13      # 8-byte Spill
-	mov	qword ptr [rsp + 112], r10      # 8-byte Spill
-	mov	qword ptr [rsp + 184], rdx      # 8-byte Spill
-	je	.LBB0_183
-# %bb.182:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 9
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_183:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	mov	rcx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpextrb	r13d, xmm13, 10
-	test	r13b, 1
-	je	.LBB0_184
-# %bb.683:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 11
-	test	al, 1
-	mov	qword ptr [rsp + 120], r15      # 8-byte Spill
-	jne	.LBB0_684
-.LBB0_185:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r15d, xmm13, 12
-	test	r15b, 1
-	mov	qword ptr [rsp + 304], r11      # 8-byte Spill
-	je	.LBB0_186
-.LBB0_685:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 12
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	edx, xmm13, 13
-	test	dl, 1
-	jne	.LBB0_686
-.LBB0_187:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm13, 14
-	test	sil, 1
-	je	.LBB0_188
-.LBB0_687:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 14
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	r14d, xmm13, 15
-	test	r14b, 1
-	jne	.LBB0_189
-	jmp	.LBB0_190
-	.p2align	4, 0x90
-.LBB0_172:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 1
-	mov	dword ptr [rsp + 40], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_173
-.LBB0_676:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 136]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 1
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 2
-	mov	dword ptr [rsp + 36], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_677
-.LBB0_174:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 3
-	mov	dword ptr [rsp + 32], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_175
-.LBB0_678:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r15], 3
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_679
-.LBB0_176:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_177
-.LBB0_680:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	eax, xmm13, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_681
-.LBB0_178:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 7
-	mov	dword ptr [rsp + 316], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_179
-.LBB0_682:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm1, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpextrb	ebx, xmm13, 8
-	test	bl, 1
-	jne	.LBB0_180
-	jmp	.LBB0_181
-	.p2align	4, 0x90
-.LBB0_184:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm13, 11
-	test	al, 1
-	mov	qword ptr [rsp + 120], r15      # 8-byte Spill
-	je	.LBB0_185
-.LBB0_684:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 11
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	r15d, xmm13, 12
-	test	r15b, 1
-	mov	qword ptr [rsp + 304], r11      # 8-byte Spill
-	jne	.LBB0_685
-.LBB0_186:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm13, 13
-	test	dl, 1
-	je	.LBB0_187
-.LBB0_686:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 13
-	vinserti128	ymm14, ymm14, xmm1, 1
-	vpextrb	esi, xmm13, 14
-	test	sil, 1
-	jne	.LBB0_687
-.LBB0_188:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm13, 15
-	test	r14b, 1
-	je	.LBB0_190
-.LBB0_189:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rcx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rcx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_190:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 1
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r10d, xmm6
-	test	r10b, 1
-	je	.LBB0_191
-# %bb.688:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm15
-	vpextrb	byte ptr [r8 + rcx], xmm14, 0
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_689
-.LBB0_192:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_193
-.LBB0_690:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 2
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	jne	.LBB0_691
-.LBB0_194:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_195
-.LBB0_692:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm5
-	vpextrb	byte ptr [r8 + rcx], xmm14, 4
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_693
-.LBB0_196:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_197
-.LBB0_694:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 6
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_695
-.LBB0_198:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_199
-.LBB0_696:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm12
-	vpextrb	byte ptr [r8 + rcx], xmm14, 8
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_697
-.LBB0_200:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_201
-.LBB0_698:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 10
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_699
-.LBB0_202:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_203
-.LBB0_700:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm11
-	vpextrb	byte ptr [r8 + rcx], xmm14, 12
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_701
-.LBB0_204:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_205
-.LBB0_702:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rcx, xmm1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 14
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	jne	.LBB0_703
-.LBB0_206:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_207
-.LBB0_704:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_705
-.LBB0_208:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_209
-.LBB0_706:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_707
-.LBB0_210:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_211
-.LBB0_708:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_709
-.LBB0_212:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_213
-.LBB0_710:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 6
-	test	byte ptr [rsp + 316], 1         # 1-byte Folded Reload
-	jne	.LBB0_711
-.LBB0_214:                              #   in Loop: Header=BB0_26 Depth=1
-	test	bl, 1
-	je	.LBB0_215
-.LBB0_712:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 8
-	test	r9b, 1
-	mov	r10, qword ptr [rsp + 224]      # 8-byte Reload
-	mov	r11, qword ptr [rsp + 144]      # 8-byte Reload
-	jne	.LBB0_713
-.LBB0_216:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_217
-.LBB0_714:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	r9, qword ptr [rsp + 288]       # 8-byte Reload
-	mov	rax, qword ptr [rsp + 232]      # 8-byte Reload
-	jne	.LBB0_715
-.LBB0_218:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r15b, 1
-	je	.LBB0_219
-.LBB0_716:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	dl, 1
-	mov	r13, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	r15, qword ptr [rsp + 128]      # 8-byte Reload
-	jne	.LBB0_717
-.LBB0_220:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_221
-.LBB0_718:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_222
-	jmp	.LBB0_223
-	.p2align	4, 0x90
-.LBB0_191:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_192
-.LBB0_689:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm15, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 1
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_690
-.LBB0_193:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_194
-.LBB0_691:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 3
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_692
-.LBB0_195:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_196
-.LBB0_693:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm5, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 5
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_694
-.LBB0_197:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_198
-.LBB0_695:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 7
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_696
-.LBB0_199:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_200
-.LBB0_697:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm12, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 9
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	jne	.LBB0_698
-.LBB0_201:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_202
-.LBB0_699:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 11
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_700
-.LBB0_203:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_204
-.LBB0_701:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm11, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 13
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	jne	.LBB0_702
-.LBB0_205:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_206
-.LBB0_703:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rcx, xmm1, 1
-	vpextrb	byte ptr [r8 + rcx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_704
-.LBB0_207:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_208
-.LBB0_705:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_706
-.LBB0_209:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_210
-.LBB0_707:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_708
-.LBB0_211:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_212
-.LBB0_709:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_710
-.LBB0_213:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 316], 1         # 1-byte Folded Reload
-	je	.LBB0_214
-.LBB0_711:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 7
-	test	bl, 1
-	jne	.LBB0_712
-.LBB0_215:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	mov	r10, qword ptr [rsp + 224]      # 8-byte Reload
-	mov	r11, qword ptr [rsp + 144]      # 8-byte Reload
-	je	.LBB0_216
-.LBB0_713:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	r13b, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_714
-.LBB0_217:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	r9, qword ptr [rsp + 288]       # 8-byte Reload
-	mov	rax, qword ptr [rsp + 232]      # 8-byte Reload
-	je	.LBB0_218
-.LBB0_715:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r15b, 1
-	jne	.LBB0_716
-.LBB0_219:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	r13, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	r15, qword ptr [rsp + 128]      # 8-byte Reload
-	je	.LBB0_220
-.LBB0_717:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_718
-.LBB0_221:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_223
-.LBB0_222:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_223:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 736]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_224
-# %bb.719:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_720
-.LBB0_225:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_227
-.LBB0_226:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_227:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_228
-# %bb.721:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_722
-.LBB0_229:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_230
-.LBB0_723:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_724
-.LBB0_231:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_232
-.LBB0_725:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_726
-.LBB0_233:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_234
-.LBB0_727:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	jne	.LBB0_728
-.LBB0_235:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_236
-.LBB0_729:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_730
-.LBB0_237:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_239
-.LBB0_238:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_239:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_241
-# %bb.240:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_241:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_243
-# %bb.242:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_243:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_245
-# %bb.244:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_245:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	eax, xmm1, 1
-	mov	dword ptr [rsp + 40], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_247
-# %bb.246:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_247:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 280]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	eax, xmm1, 2
-	mov	dword ptr [rsp + 36], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_249
-# %bb.248:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r15], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_249:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 120]      # 8-byte Reload
-	vpextrb	ebx, xmm1, 3
-	mov	dword ptr [rsp + 32], ebx       # 4-byte Spill
-	test	bl, 1
-	je	.LBB0_250
-# %bb.731:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_732
-.LBB0_251:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_252
-.LBB0_733:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_734
-.LBB0_253:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_254
-.LBB0_735:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_736
-.LBB0_255:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_256
-.LBB0_737:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_738
-.LBB0_257:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_258
-.LBB0_739:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_740
-.LBB0_259:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_260
-.LBB0_741:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_742
-.LBB0_261:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_263
-.LBB0_262:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_263:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 2
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_264
-# %bb.743:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_744
-.LBB0_265:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_266
-.LBB0_745:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_746
-.LBB0_267:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_268
-.LBB0_747:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_748
-.LBB0_269:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_270
-.LBB0_749:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_750
-.LBB0_271:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_272
-.LBB0_751:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_752
-.LBB0_273:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_274
-.LBB0_753:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_754
-.LBB0_275:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_276
-.LBB0_755:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_756
-.LBB0_277:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_278
-.LBB0_757:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_758
-.LBB0_279:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_280
-.LBB0_759:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_760
-.LBB0_281:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_282
-.LBB0_761:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_762
-.LBB0_283:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_284
-.LBB0_763:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_764
-.LBB0_285:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_286
-.LBB0_765:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_766
-.LBB0_287:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_288
-.LBB0_767:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_768
-.LBB0_289:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_290
-.LBB0_769:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_770
-.LBB0_291:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_292
-.LBB0_771:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_772
-.LBB0_293:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_294
-.LBB0_773:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_295
-	jmp	.LBB0_296
-	.p2align	4, 0x90
-.LBB0_224:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_225
-.LBB0_720:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_226
-	jmp	.LBB0_227
-	.p2align	4, 0x90
-.LBB0_228:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_229
-.LBB0_722:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_723
-.LBB0_230:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_231
-.LBB0_724:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_725
-.LBB0_232:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_233
-.LBB0_726:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_727
-.LBB0_234:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_235
-.LBB0_728:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r11], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_729
-.LBB0_236:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_237
-.LBB0_730:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_238
-	jmp	.LBB0_239
-	.p2align	4, 0x90
-.LBB0_250:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 4
-	mov	dword ptr [rsp + 28], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_251
-.LBB0_732:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rcx], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_733
-.LBB0_252:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 6
-	mov	dword ptr [rsp + 20], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_253
-.LBB0_734:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_735
-.LBB0_254:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_255
-.LBB0_736:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_737
-.LBB0_256:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_257
-.LBB0_738:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_739
-.LBB0_258:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_259
-.LBB0_740:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_741
-.LBB0_260:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_261
-.LBB0_742:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_262
-	jmp	.LBB0_263
-	.p2align	4, 0x90
-.LBB0_264:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_265
-.LBB0_744:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_745
-.LBB0_266:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_267
-.LBB0_746:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_747
-.LBB0_268:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_269
-.LBB0_748:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_749
-.LBB0_270:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_271
-.LBB0_750:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_751
-.LBB0_272:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_273
-.LBB0_752:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_753
-.LBB0_274:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_275
-.LBB0_754:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_755
-.LBB0_276:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_277
-.LBB0_756:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_757
-.LBB0_278:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_279
-.LBB0_758:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_759
-.LBB0_280:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_281
-.LBB0_760:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_761
-.LBB0_282:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_283
-.LBB0_762:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_763
-.LBB0_284:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_285
-.LBB0_764:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_765
-.LBB0_286:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_287
-.LBB0_766:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_767
-.LBB0_288:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_289
-.LBB0_768:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_769
-.LBB0_290:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_291
-.LBB0_770:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_771
-.LBB0_292:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_293
-.LBB0_772:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_773
-.LBB0_294:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_296
-.LBB0_295:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_296:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 704]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_297
-# %bb.774:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_775
-.LBB0_298:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_300
-.LBB0_299:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_300:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_301
-# %bb.776:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_777
-.LBB0_302:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_303
-.LBB0_778:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_779
-.LBB0_304:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_305
-.LBB0_780:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_781
-.LBB0_306:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_308
-.LBB0_307:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_308:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_309
-# %bb.782:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_783
-.LBB0_310:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_311
-.LBB0_784:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_312
-	jmp	.LBB0_313
-	.p2align	4, 0x90
-.LBB0_297:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_298
-.LBB0_775:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_299
-	jmp	.LBB0_300
-	.p2align	4, 0x90
-.LBB0_301:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_302
-.LBB0_777:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_778
-.LBB0_303:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_304
-.LBB0_779:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_780
-.LBB0_305:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_306
-.LBB0_781:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_307
-	jmp	.LBB0_308
-	.p2align	4, 0x90
-.LBB0_309:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_310
-.LBB0_783:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_784
-.LBB0_311:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_313
-.LBB0_312:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_313:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_315
-# %bb.314:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_315:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_317
-# %bb.316:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_317:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_319
-# %bb.318:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_319:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_320
-# %bb.785:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_786
-.LBB0_321:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_322
-.LBB0_787:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_788
-.LBB0_323:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_325
-.LBB0_324:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_325:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_326
-# %bb.789:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_790
-.LBB0_327:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_328
-.LBB0_791:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_792
-.LBB0_329:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_330
-.LBB0_793:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_794
-.LBB0_331:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_332
-.LBB0_795:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_796
-.LBB0_333:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_334
-.LBB0_797:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_335
-	jmp	.LBB0_336
-	.p2align	4, 0x90
-.LBB0_320:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_321
-.LBB0_786:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_787
-.LBB0_322:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_323
-.LBB0_788:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_324
-	jmp	.LBB0_325
-	.p2align	4, 0x90
-.LBB0_326:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_327
-.LBB0_790:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_791
-.LBB0_328:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_329
-.LBB0_792:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_793
-.LBB0_330:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_331
-.LBB0_794:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_795
-.LBB0_332:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_333
-.LBB0_796:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_797
-.LBB0_334:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_336
-.LBB0_335:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_336:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 3
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_337
-# %bb.798:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_799
-.LBB0_338:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_339
-.LBB0_800:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_801
-.LBB0_340:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_341
-.LBB0_802:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_803
-.LBB0_342:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_343
-.LBB0_804:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_805
-.LBB0_344:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_345
-.LBB0_806:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_807
-.LBB0_346:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_347
-.LBB0_808:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_809
-.LBB0_348:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_349
-.LBB0_810:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_811
-.LBB0_350:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_351
-.LBB0_812:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_813
-.LBB0_352:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_353
-.LBB0_814:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_815
-.LBB0_354:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_355
-.LBB0_816:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_817
-.LBB0_356:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_357
-.LBB0_818:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_819
-.LBB0_358:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_359
-.LBB0_820:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_821
-.LBB0_360:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_361
-.LBB0_822:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_823
-.LBB0_362:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_363
-.LBB0_824:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_825
-.LBB0_364:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_365
-.LBB0_826:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_827
-.LBB0_366:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_367
-.LBB0_828:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_368
-	jmp	.LBB0_369
-	.p2align	4, 0x90
-.LBB0_337:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_338
-.LBB0_799:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_800
-.LBB0_339:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_340
-.LBB0_801:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_802
-.LBB0_341:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_342
-.LBB0_803:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_804
-.LBB0_343:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_344
-.LBB0_805:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_806
-.LBB0_345:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_346
-.LBB0_807:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_808
-.LBB0_347:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_348
-.LBB0_809:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_810
-.LBB0_349:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_350
-.LBB0_811:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_812
-.LBB0_351:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_352
-.LBB0_813:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_814
-.LBB0_353:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_354
-.LBB0_815:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_816
-.LBB0_355:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_356
-.LBB0_817:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_818
-.LBB0_357:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_358
-.LBB0_819:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_820
-.LBB0_359:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_360
-.LBB0_821:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_822
-.LBB0_361:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_362
-.LBB0_823:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_824
-.LBB0_363:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_364
-.LBB0_825:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_826
-.LBB0_365:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_366
-.LBB0_827:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_828
-.LBB0_367:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_369
-.LBB0_368:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_369:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 672]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_370
-# %bb.829:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_830
-.LBB0_371:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_373
-.LBB0_372:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_373:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_374
-# %bb.831:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_832
-.LBB0_375:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_376
-.LBB0_833:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_834
-.LBB0_377:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_378
-.LBB0_835:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_836
-.LBB0_379:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_381
-.LBB0_380:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_381:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_382
-# %bb.837:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_838
-.LBB0_383:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_384
-.LBB0_839:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_385
-	jmp	.LBB0_386
-	.p2align	4, 0x90
-.LBB0_370:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_371
-.LBB0_830:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_372
-	jmp	.LBB0_373
-	.p2align	4, 0x90
-.LBB0_374:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_375
-.LBB0_832:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_833
-.LBB0_376:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_377
-.LBB0_834:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_835
-.LBB0_378:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_379
-.LBB0_836:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_380
-	jmp	.LBB0_381
-	.p2align	4, 0x90
-.LBB0_382:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_383
-.LBB0_838:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_839
-.LBB0_384:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_386
-.LBB0_385:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_386:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_388
-# %bb.387:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_388:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_390
-# %bb.389:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_390:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_392
-# %bb.391:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_392:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_393
-# %bb.840:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_841
-.LBB0_394:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_395
-.LBB0_842:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_843
-.LBB0_396:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_398
-.LBB0_397:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_398:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_399
-# %bb.844:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_845
-.LBB0_400:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_401
-.LBB0_846:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_847
-.LBB0_402:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_403
-.LBB0_848:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_849
-.LBB0_404:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_405
-.LBB0_850:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_851
-.LBB0_406:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_407
-.LBB0_852:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_408
-	jmp	.LBB0_409
-	.p2align	4, 0x90
-.LBB0_393:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_394
-.LBB0_841:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_842
-.LBB0_395:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_396
-.LBB0_843:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_397
-	jmp	.LBB0_398
-	.p2align	4, 0x90
-.LBB0_399:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_400
-.LBB0_845:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_846
-.LBB0_401:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_402
-.LBB0_847:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_848
-.LBB0_403:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_404
-.LBB0_849:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_850
-.LBB0_405:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_406
-.LBB0_851:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_852
-.LBB0_407:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_409
-.LBB0_408:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_409:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 4
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_410
-# %bb.853:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_854
-.LBB0_411:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_412
-.LBB0_855:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_856
-.LBB0_413:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_414
-.LBB0_857:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_858
-.LBB0_415:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_416
-.LBB0_859:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_860
-.LBB0_417:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_418
-.LBB0_861:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_862
-.LBB0_419:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_420
-.LBB0_863:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_864
-.LBB0_421:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_422
-.LBB0_865:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_866
-.LBB0_423:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_424
-.LBB0_867:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_868
-.LBB0_425:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_426
-.LBB0_869:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_870
-.LBB0_427:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_428
-.LBB0_871:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_872
-.LBB0_429:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_430
-.LBB0_873:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_874
-.LBB0_431:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_432
-.LBB0_875:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_876
-.LBB0_433:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_434
-.LBB0_877:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_878
-.LBB0_435:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_436
-.LBB0_879:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_880
-.LBB0_437:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_438
-.LBB0_881:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_882
-.LBB0_439:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_440
-.LBB0_883:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_441
-	jmp	.LBB0_442
-	.p2align	4, 0x90
-.LBB0_410:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_411
-.LBB0_854:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_855
-.LBB0_412:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_413
-.LBB0_856:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_857
-.LBB0_414:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_415
-.LBB0_858:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_859
-.LBB0_416:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_417
-.LBB0_860:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_861
-.LBB0_418:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_419
-.LBB0_862:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_863
-.LBB0_420:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_421
-.LBB0_864:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_865
-.LBB0_422:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_423
-.LBB0_866:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_867
-.LBB0_424:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_425
-.LBB0_868:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_869
-.LBB0_426:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_427
-.LBB0_870:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_871
-.LBB0_428:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_429
-.LBB0_872:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_873
-.LBB0_430:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_431
-.LBB0_874:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_875
-.LBB0_432:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_433
-.LBB0_876:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_877
-.LBB0_434:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_435
-.LBB0_878:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_879
-.LBB0_436:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_437
-.LBB0_880:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_881
-.LBB0_438:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_439
-.LBB0_882:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_883
-.LBB0_440:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_442
-.LBB0_441:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_442:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 640]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_443
-# %bb.884:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_885
-.LBB0_444:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_446
-.LBB0_445:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_446:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_447
-# %bb.886:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_887
-.LBB0_448:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_449
-.LBB0_888:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_889
-.LBB0_450:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_451
-.LBB0_890:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_891
-.LBB0_452:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_454
-.LBB0_453:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_454:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_455
-# %bb.892:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_893
-.LBB0_456:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_457
-.LBB0_894:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_458
-	jmp	.LBB0_459
-	.p2align	4, 0x90
-.LBB0_443:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_444
-.LBB0_885:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_445
-	jmp	.LBB0_446
-	.p2align	4, 0x90
-.LBB0_447:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_448
-.LBB0_887:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_888
-.LBB0_449:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_450
-.LBB0_889:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_890
-.LBB0_451:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_452
-.LBB0_891:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_453
-	jmp	.LBB0_454
-	.p2align	4, 0x90
-.LBB0_455:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_456
-.LBB0_893:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_894
-.LBB0_457:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_459
-.LBB0_458:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_459:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_461
-# %bb.460:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_461:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_463
-# %bb.462:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_463:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_465
-# %bb.464:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_465:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_466
-# %bb.895:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_896
-.LBB0_467:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_468
-.LBB0_897:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_898
-.LBB0_469:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_471
-.LBB0_470:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_471:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_472
-# %bb.899:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_900
-.LBB0_473:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_474
-.LBB0_901:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_902
-.LBB0_475:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_476
-.LBB0_903:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_904
-.LBB0_477:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_478
-.LBB0_905:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_906
-.LBB0_479:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_480
-.LBB0_907:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_481
-	jmp	.LBB0_482
-	.p2align	4, 0x90
-.LBB0_466:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_467
-.LBB0_896:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_897
-.LBB0_468:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_469
-.LBB0_898:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_470
-	jmp	.LBB0_471
-	.p2align	4, 0x90
-.LBB0_472:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_473
-.LBB0_900:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_901
-.LBB0_474:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_475
-.LBB0_902:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_903
-.LBB0_476:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_477
-.LBB0_904:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_905
-.LBB0_478:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_479
-.LBB0_906:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_907
-.LBB0_480:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_482
-.LBB0_481:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_482:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 5
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_483
-# %bb.908:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_909
-.LBB0_484:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_485
-.LBB0_910:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_911
-.LBB0_486:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_487
-.LBB0_912:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_913
-.LBB0_488:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_489
-.LBB0_914:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_915
-.LBB0_490:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_491
-.LBB0_916:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_917
-.LBB0_492:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_493
-.LBB0_918:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_919
-.LBB0_494:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_495
-.LBB0_920:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_921
-.LBB0_496:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_497
-.LBB0_922:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_923
-.LBB0_498:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_499
-.LBB0_924:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_925
-.LBB0_500:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_501
-.LBB0_926:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_927
-.LBB0_502:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_503
-.LBB0_928:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_929
-.LBB0_504:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_505
-.LBB0_930:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_931
-.LBB0_506:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_507
-.LBB0_932:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_933
-.LBB0_508:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_509
-.LBB0_934:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_935
-.LBB0_510:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_511
-.LBB0_936:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_937
-.LBB0_512:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_513
-.LBB0_938:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_514
-	jmp	.LBB0_515
-	.p2align	4, 0x90
-.LBB0_483:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_484
-.LBB0_909:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_910
-.LBB0_485:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_486
-.LBB0_911:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_912
-.LBB0_487:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_488
-.LBB0_913:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_914
-.LBB0_489:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_490
-.LBB0_915:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_916
-.LBB0_491:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_492
-.LBB0_917:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_918
-.LBB0_493:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_494
-.LBB0_919:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_920
-.LBB0_495:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_496
-.LBB0_921:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_922
-.LBB0_497:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_498
-.LBB0_923:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_924
-.LBB0_499:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_500
-.LBB0_925:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_926
-.LBB0_501:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_502
-.LBB0_927:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_928
-.LBB0_503:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_504
-.LBB0_929:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_930
-.LBB0_505:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_506
-.LBB0_931:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_932
-.LBB0_507:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_508
-.LBB0_933:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_934
-.LBB0_509:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_510
-.LBB0_935:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_936
-.LBB0_511:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_512
-.LBB0_937:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_938
-.LBB0_513:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_515
-.LBB0_514:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_515:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 608]   # 32-byte Reload
-	vpor	ymm15, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm12, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm7, ymm4, ymm1
-	vperm2i128	ymm1, ymm8, ymm7, 49    # ymm1 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm2, ymm8, xmm7, 1
-	vshufps	ymm1, ymm2, ymm1, 136           # ymm1 = ymm2[0,2],ymm1[0,2],ymm2[4,6],ymm1[4,6]
-	vperm2i128	ymm2, ymm12, ymm11, 49  # ymm2 = ymm12[2,3],ymm11[2,3]
-	vinserti128	ymm3, ymm12, xmm11, 1
-	vshufps	ymm2, ymm3, ymm2, 136           # ymm2 = ymm3[0,2],ymm2[0,2],ymm3[4,6],ymm2[4,6]
-	vperm2i128	ymm3, ymm10, ymm9, 49   # ymm3 = ymm10[2,3],ymm9[2,3]
-	vinserti128	ymm13, ymm10, xmm9, 1
-	vshufps	ymm3, ymm13, ymm3, 136          # ymm3 = ymm13[0,2],ymm3[0,2],ymm13[4,6],ymm3[4,6]
-	vperm2i128	ymm13, ymm15, ymm5, 49  # ymm13 = ymm15[2,3],ymm5[2,3]
-	vinserti128	ymm14, ymm15, xmm5, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm13, ymm3
-	vpcmpgtd	ymm2, ymm0, ymm2
-	vpcmpgtd	ymm1, ymm0, ymm1
-	vpackssdw	ymm1, ymm2, ymm1
-	vpermq	ymm2, ymm3, 216                 # ymm2 = ymm3[0,2,1,3]
-	vpermq	ymm1, ymm1, 216                 # ymm1 = ymm1[0,2,1,3]
-	vpacksswb	ymm1, ymm2, ymm1
-	vpand	ymm6, ymm1, ymm6
-	vmovd	ecx, xmm6
-                                        # implicit-def: $ymm14
-	test	cl, 1
-	je	.LBB0_516
-# %bb.939:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm14, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	jne	.LBB0_940
-.LBB0_517:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	je	.LBB0_519
-.LBB0_518:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rbx], 2
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_519:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 3
-	test	cl, 1
-	je	.LBB0_520
-# %bb.941:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 3
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	jne	.LBB0_942
-.LBB0_521:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	je	.LBB0_522
-.LBB0_943:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 5
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	jne	.LBB0_944
-.LBB0_523:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	je	.LBB0_524
-.LBB0_945:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r9], 7
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	jne	.LBB0_946
-.LBB0_525:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_527
-.LBB0_526:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r15], 9
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_527:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 10
-	test	cl, 1
-	je	.LBB0_528
-# %bb.947:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 10
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	jne	.LBB0_948
-.LBB0_529:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	je	.LBB0_530
-.LBB0_949:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 12
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	jne	.LBB0_531
-	jmp	.LBB0_532
-	.p2align	4, 0x90
-.LBB0_516:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 1
-	test	cl, 1
-	je	.LBB0_517
-.LBB0_940:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 1
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 2
-	test	cl, 1
-	jne	.LBB0_518
-	jmp	.LBB0_519
-	.p2align	4, 0x90
-.LBB0_520:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	test	cl, 1
-	je	.LBB0_521
-.LBB0_942:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rcx], 4
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 5
-	test	cl, 1
-	jne	.LBB0_943
-.LBB0_522:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 6
-	test	cl, 1
-	je	.LBB0_523
-.LBB0_944:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 6
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 7
-	test	cl, 1
-	jne	.LBB0_945
-.LBB0_524:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 8
-	test	cl, 1
-	je	.LBB0_525
-.LBB0_946:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rsi], 8
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_526
-	jmp	.LBB0_527
-	.p2align	4, 0x90
-.LBB0_528:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 11
-	test	cl, 1
-	je	.LBB0_529
-.LBB0_948:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 11
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-	vpextrb	ecx, xmm6, 12
-	test	cl, 1
-	jne	.LBB0_949
-.LBB0_530:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 13
-	test	cl, 1
-	je	.LBB0_532
-.LBB0_531:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rdx], 13
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_532:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 14
-	test	cl, 1
-	je	.LBB0_534
-# %bb.533:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + rax], 14
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_534:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 15
-	test	cl, 1
-	je	.LBB0_536
-# %bb.535:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm1, xmm14, byte ptr [rdi + r10], 15
-	vpblendd	ymm14, ymm14, ymm1, 15          # ymm14 = ymm1[0,1,2,3],ymm14[4,5,6,7]
-.LBB0_536:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm6, 1
-	vmovd	eax, xmm1
-	mov	dword ptr [rsp + 44], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_538
-# %bb.537:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rdx], 0
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_538:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm1, 1
-	mov	dword ptr [rsp + 40], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_539
-# %bb.950:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rsi], 1
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_951
-.LBB0_540:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_541
-.LBB0_952:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r9], 3
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_953
-.LBB0_542:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	je	.LBB0_544
-.LBB0_543:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + r13], 5
-	vinserti128	ymm14, ymm14, xmm2, 1
-.LBB0_544:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	vpextrb	ecx, xmm1, 6
-	mov	dword ptr [rsp + 20], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_545
-# %bb.954:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 6
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	jne	.LBB0_955
-.LBB0_546:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	je	.LBB0_547
-.LBB0_956:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 8
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	jne	.LBB0_957
-.LBB0_548:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	je	.LBB0_549
-.LBB0_958:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 10
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	jne	.LBB0_959
-.LBB0_550:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	je	.LBB0_551
-.LBB0_960:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 12
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	jne	.LBB0_961
-.LBB0_552:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	je	.LBB0_553
-.LBB0_962:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 14
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	jne	.LBB0_554
-	jmp	.LBB0_555
-	.p2align	4, 0x90
-.LBB0_539:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 2
-	mov	dword ptr [rsp + 36], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_540
-.LBB0_951:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 2
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	ecx, xmm1, 3
-	mov	dword ptr [rsp + 32], ecx       # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_952
-.LBB0_541:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 4
-	mov	dword ptr [rsp + 28], ecx       # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_542
-.LBB0_953:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 4
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	eax, xmm1, 5
-	mov	dword ptr [rsp + 24], eax       # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_543
-	jmp	.LBB0_544
-	.p2align	4, 0x90
-.LBB0_545:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm1, 7
-	test	r9b, 1
-	je	.LBB0_546
-.LBB0_955:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 7
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	edx, xmm1, 8
-	test	dl, 1
-	jne	.LBB0_956
-.LBB0_547:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm1, 9
-	test	cl, 1
-	je	.LBB0_548
-.LBB0_957:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rax], 9
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	esi, xmm1, 10
-	test	sil, 1
-	jne	.LBB0_958
-.LBB0_549:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm1, 11
-	test	al, 1
-	je	.LBB0_550
-.LBB0_959:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 11
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r13d, xmm1, 12
-	test	r13b, 1
-	jne	.LBB0_960
-.LBB0_551:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm1, 13
-	test	r10b, 1
-	je	.LBB0_552
-.LBB0_961:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm2, ymm14, 1
-	mov	rbx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpinsrb	xmm2, xmm2, byte ptr [rdi + rbx], 13
-	vinserti128	ymm14, ymm14, xmm2, 1
-	vpextrb	r11d, xmm1, 14
-	test	r11b, 1
-	jne	.LBB0_962
-.LBB0_553:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r14d, xmm1, 15
-	test	r14b, 1
-	je	.LBB0_555
-.LBB0_554:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm14, 1
-	mov	rbx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 15
-	vinserti128	ymm14, ymm14, xmm1, 1
-.LBB0_555:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm14, 6
-	vpand	ymm14, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm6
-	test	r15b, 1
-	je	.LBB0_556
-# %bb.963:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm15
-	vpextrb	byte ptr [r8 + rbx], xmm14, 0
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	jne	.LBB0_964
-.LBB0_557:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	je	.LBB0_558
-.LBB0_965:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 2
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	jne	.LBB0_966
-.LBB0_559:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	je	.LBB0_560
-.LBB0_967:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm14, 4
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	jne	.LBB0_968
-.LBB0_561:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	je	.LBB0_562
-.LBB0_969:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 6
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	jne	.LBB0_970
-.LBB0_563:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	je	.LBB0_564
-.LBB0_971:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm12
-	vpextrb	byte ptr [r8 + rbx], xmm14, 8
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	jne	.LBB0_972
-.LBB0_565:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	je	.LBB0_566
-.LBB0_973:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 10
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	jne	.LBB0_974
-.LBB0_567:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	je	.LBB0_568
-.LBB0_975:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm14, 12
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	jne	.LBB0_976
-.LBB0_569:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	je	.LBB0_570
-.LBB0_977:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 14
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	jne	.LBB0_978
-.LBB0_571:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	je	.LBB0_572
-.LBB0_979:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	jne	.LBB0_980
-.LBB0_573:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	je	.LBB0_574
-.LBB0_981:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	jne	.LBB0_982
-.LBB0_575:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	je	.LBB0_576
-.LBB0_983:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	jne	.LBB0_984
-.LBB0_577:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	je	.LBB0_578
-.LBB0_985:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	r9b, 1
-	jne	.LBB0_986
-.LBB0_579:                              #   in Loop: Header=BB0_26 Depth=1
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	je	.LBB0_580
-.LBB0_987:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm8
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_988
-.LBB0_581:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_582
-.LBB0_989:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	jne	.LBB0_990
-.LBB0_583:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	je	.LBB0_584
-.LBB0_991:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm7
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	jne	.LBB0_992
-.LBB0_585:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	je	.LBB0_586
-.LBB0_993:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	jne	.LBB0_587
-	jmp	.LBB0_588
-	.p2align	4, 0x90
-.LBB0_556:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 1
-	test	bl, 1
-	je	.LBB0_557
-.LBB0_964:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm15, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 1
-	vpextrb	ebx, xmm6, 2
-	test	bl, 1
-	mov	r15, qword ptr [rsp + 224]      # 8-byte Reload
-	jne	.LBB0_965
-.LBB0_558:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 3
-	test	bl, 1
-	je	.LBB0_559
-.LBB0_966:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 3
-	vpextrb	ebx, xmm6, 4
-	test	bl, 1
-	jne	.LBB0_967
-.LBB0_560:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 5
-	test	bl, 1
-	je	.LBB0_561
-.LBB0_968:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 5
-	vpextrb	ebx, xmm6, 6
-	test	bl, 1
-	jne	.LBB0_969
-.LBB0_562:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 7
-	test	bl, 1
-	je	.LBB0_563
-.LBB0_970:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 7
-	vpextrb	ebx, xmm6, 8
-	test	bl, 1
-	jne	.LBB0_971
-.LBB0_564:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 9
-	test	bl, 1
-	je	.LBB0_565
-.LBB0_972:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm12, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 9
-	vpextrb	ebx, xmm6, 10
-	test	bl, 1
-	jne	.LBB0_973
-.LBB0_566:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 11
-	test	bl, 1
-	je	.LBB0_567
-.LBB0_974:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm12, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 11
-	vpextrb	ebx, xmm6, 12
-	test	bl, 1
-	jne	.LBB0_975
-.LBB0_568:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 13
-	test	bl, 1
-	je	.LBB0_569
-.LBB0_976:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 13
-	vpextrb	ebx, xmm6, 14
-	test	bl, 1
-	jne	.LBB0_977
-.LBB0_570:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm6, 15
-	test	bl, 1
-	je	.LBB0_571
-.LBB0_978:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm14, 15
-	test	byte ptr [rsp + 44], 1          # 1-byte Folded Reload
-	jne	.LBB0_979
-.LBB0_572:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 40], 1          # 1-byte Folded Reload
-	je	.LBB0_573
-.LBB0_980:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 36], 1          # 1-byte Folded Reload
-	jne	.LBB0_981
-.LBB0_574:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 32], 1          # 1-byte Folded Reload
-	je	.LBB0_575
-.LBB0_982:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 28], 1          # 1-byte Folded Reload
-	jne	.LBB0_983
-.LBB0_576:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 24], 1          # 1-byte Folded Reload
-	je	.LBB0_577
-.LBB0_984:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 20], 1          # 1-byte Folded Reload
-	jne	.LBB0_985
-.LBB0_578:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	je	.LBB0_579
-.LBB0_986:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	dl, 1
-	mov	rbx, qword ptr [rsp + 296]      # 8-byte Reload
-	jne	.LBB0_987
-.LBB0_580:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_581
-.LBB0_988:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm8, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	sil, 1
-	mov	rdx, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_989
-.LBB0_582:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	mov	rsi, qword ptr [rsp + 152]      # 8-byte Reload
-	je	.LBB0_583
-.LBB0_990:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	r13b, 1
-	jne	.LBB0_991
-.LBB0_584:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	mov	r13, qword ptr [rsp + 280]      # 8-byte Reload
-	je	.LBB0_585
-.LBB0_992:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm7, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r11b, 1
-	jne	.LBB0_993
-.LBB0_586:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	mov	rax, qword ptr [rsp + 288]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 232]       # 8-byte Reload
-	je	.LBB0_588
-.LBB0_587:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm14, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-.LBB0_588:                              #   in Loop: Header=BB0_26 Depth=1
-	vmovdqa	ymm1, ymmword ptr [rsp + 576]   # 32-byte Reload
-	vpor	ymm11, ymm1, ymmword ptr [rsp + 512] # 32-byte Folded Reload
-	vpor	ymm10, ymm1, ymmword ptr [rsp + 480] # 32-byte Folded Reload
-	vpor	ymm8, ymm1, ymmword ptr [rsp + 384] # 32-byte Folded Reload
-	vpor	ymm7, ymm1, ymmword ptr [rsp + 352] # 32-byte Folded Reload
-	vpor	ymm9, ymm1, ymmword ptr [rsp + 448] # 32-byte Folded Reload
-	vpor	ymm5, ymm1, ymmword ptr [rsp + 416] # 32-byte Folded Reload
-	vpor	ymm2, ymm1, ymmword ptr [rsp + 320] # 32-byte Folded Reload
-	vpor	ymm15, ymm4, ymm1
-	vperm2i128	ymm3, ymm2, ymm15, 49   # ymm3 = ymm2[2,3],ymm15[2,3]
-	vinserti128	ymm4, ymm2, xmm15, 1
-	vshufps	ymm3, ymm4, ymm3, 136           # ymm3 = ymm4[0,2],ymm3[0,2],ymm4[4,6],ymm3[4,6]
-	vperm2i128	ymm4, ymm9, ymm5, 49    # ymm4 = ymm9[2,3],ymm5[2,3]
-	vinserti128	ymm12, ymm9, xmm5, 1
-	vshufps	ymm4, ymm12, ymm4, 136          # ymm4 = ymm12[0,2],ymm4[0,2],ymm12[4,6],ymm4[4,6]
-	vperm2i128	ymm12, ymm8, ymm7, 49   # ymm12 = ymm8[2,3],ymm7[2,3]
-	vinserti128	ymm13, ymm8, xmm7, 1
-	vshufps	ymm12, ymm13, ymm12, 136        # ymm12 = ymm13[0,2],ymm12[0,2],ymm13[4,6],ymm12[4,6]
-	vperm2i128	ymm13, ymm11, ymm10, 49 # ymm13 = ymm11[2,3],ymm10[2,3]
-	vinserti128	ymm14, ymm11, xmm10, 1
-	vshufps	ymm13, ymm14, ymm13, 136        # ymm13 = ymm14[0,2],ymm13[0,2],ymm14[4,6],ymm13[4,6]
-	vpcmpgtd	ymm13, ymm0, ymm13
-	vpcmpgtd	ymm12, ymm0, ymm12
-	vpackssdw	ymm12, ymm13, ymm12
-	vpermq	ymm12, ymm12, 216               # ymm12 = ymm12[0,2,1,3]
-	vpcmpgtd	ymm4, ymm0, ymm4
-	vpcmpgtd	ymm3, ymm0, ymm3
-	vpackssdw	ymm3, ymm4, ymm3
-	vpermq	ymm3, ymm3, 216                 # ymm3 = ymm3[0,2,1,3]
-	vpacksswb	ymm3, ymm12, ymm3
-	vpand	ymm3, ymm3, ymm6
-	vmovd	ecx, xmm3
-                                        # implicit-def: $ymm4
-	test	cl, 1
-	je	.LBB0_589
-# %bb.994:                              #   in Loop: Header=BB0_26 Depth=1
-	vpbroadcastb	ymm4, byte ptr [rdi + rdx]
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	jne	.LBB0_995
-.LBB0_590:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	je	.LBB0_592
-.LBB0_591:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rbx], 2
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_592:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rsi, qword ptr [rsp + 96]       # 8-byte Reload
-	mov	r10, qword ptr [rsp + 72]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 3
-	test	cl, 1
-	je	.LBB0_593
-# %bb.996:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 272]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rcx], 3
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 4
-	test	cl, 1
-	jne	.LBB0_997
-.LBB0_594:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 5
-	test	cl, 1
-	je	.LBB0_595
-.LBB0_998:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rdx], 5
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 6
-	test	cl, 1
-	jne	.LBB0_999
-.LBB0_596:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 7
-	test	cl, 1
-	je	.LBB0_597
-.LBB0_1000:                             #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + r9], 7
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 8
-	test	cl, 1
-	jne	.LBB0_1001
-.LBB0_598:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 9
-	test	cl, 1
-	je	.LBB0_600
-.LBB0_599:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + r15], 9
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_600:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 144]      # 8-byte Reload
-	mov	rsi, qword ptr [rsp + 136]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 128]      # 8-byte Reload
-	mov	r9, qword ptr [rsp + 120]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 10
-	test	cl, 1
-	je	.LBB0_601
-# %bb.1002:                             #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 10
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	jne	.LBB0_1003
-.LBB0_602:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 12
-	test	cl, 1
-	je	.LBB0_603
-.LBB0_1004:                             #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 248]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 12
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	jne	.LBB0_604
-	jmp	.LBB0_605
-	.p2align	4, 0x90
-.LBB0_589:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 1
-	test	cl, 1
-	je	.LBB0_590
-.LBB0_995:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rsi], 1
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 104]      # 8-byte Reload
-	vpextrb	ecx, xmm3, 2
-	test	cl, 1
-	jne	.LBB0_591
-	jmp	.LBB0_592
-	.p2align	4, 0x90
-.LBB0_593:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 4
-	test	cl, 1
-	je	.LBB0_594
-.LBB0_997:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rcx, qword ptr [rsp + 264]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rcx], 4
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 5
-	test	cl, 1
-	jne	.LBB0_998
-.LBB0_595:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 6
-	test	cl, 1
-	je	.LBB0_596
-.LBB0_999:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 6
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 7
-	test	cl, 1
-	jne	.LBB0_1000
-.LBB0_597:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 8
-	test	cl, 1
-	je	.LBB0_598
-.LBB0_1001:                             #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rsi], 8
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	mov	rdx, qword ptr [rsp + 88]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 9
-	test	cl, 1
-	jne	.LBB0_599
-	jmp	.LBB0_600
-	.p2align	4, 0x90
-.LBB0_601:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 11
-	test	cl, 1
-	je	.LBB0_602
-.LBB0_1003:                             #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 256]      # 8-byte Reload
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 11
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-	vpextrb	ecx, xmm3, 12
-	test	cl, 1
-	jne	.LBB0_1004
-.LBB0_603:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 13
-	test	cl, 1
-	je	.LBB0_605
-.LBB0_604:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rdx], 13
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_605:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 80]       # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 64]       # 8-byte Reload
-	vpextrb	ecx, xmm3, 14
-	test	cl, 1
-	je	.LBB0_607
-# %bb.606:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + rax], 14
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_607:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm3, 15
-	test	cl, 1
-	je	.LBB0_609
-# %bb.608:                              #   in Loop: Header=BB0_26 Depth=1
-	vpinsrb	xmm6, xmm4, byte ptr [rdi + r10], 15
-	vpblendd	ymm4, ymm4, ymm6, 15            # ymm4 = ymm6[0,1,2,3],ymm4[4,5,6,7]
-.LBB0_609:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm6, ymm3, 1
-	vmovd	eax, xmm6
-	mov	dword ptr [rsp + 512], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_611
-# %bb.610:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 0
-	vinserti128	ymm4, ymm4, xmm1, 1
-.LBB0_611:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 56]       # 8-byte Reload
-	vpextrb	ecx, xmm6, 1
-	mov	dword ptr [rsp + 480], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_612
-# %bb.1005:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rsi], 1
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 2
-	mov	dword ptr [rsp + 448], ecx      # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_1006
-.LBB0_613:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 3
-	mov	dword ptr [rsp + 416], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_614
-.LBB0_1007:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r9], 3
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 4
-	mov	dword ptr [rsp + 384], ecx      # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_1008
-.LBB0_615:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm6, 5
-	mov	dword ptr [rsp + 352], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_617
-.LBB0_616:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + r13], 5
-	vinserti128	ymm4, ymm4, xmm1, 1
-.LBB0_617:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rax, qword ptr [rsp + 112]      # 8-byte Reload
-	mov	rbx, qword ptr [rsp + 184]      # 8-byte Reload
-	mov	rdx, qword ptr [rsp + 176]      # 8-byte Reload
-	vpextrb	ecx, xmm6, 6
-	mov	dword ptr [rsp + 320], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_618
-# %bb.1009:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 6
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	eax, xmm6, 7
-	mov	dword ptr [rsp + 152], eax      # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_1010
-.LBB0_619:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r9d, xmm6, 8
-	test	r9b, 1
-	je	.LBB0_620
-.LBB0_1011:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 216]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 8
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	jne	.LBB0_1012
-.LBB0_621:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r11d, xmm6, 10
-	test	r11b, 1
-	je	.LBB0_622
-.LBB0_1013:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 200]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 10
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	eax, xmm6, 11
-	test	al, 1
-	jne	.LBB0_1014
-.LBB0_623:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	esi, xmm6, 12
-	test	sil, 1
-	je	.LBB0_624
-.LBB0_1015:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 12
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	r10d, xmm6, 13
-	test	r10b, 1
-	jne	.LBB0_1016
-.LBB0_625:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpextrb	r13d, xmm6, 14
-	test	r13b, 1
-	je	.LBB0_626
-.LBB0_1017:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 14
-	vinserti128	ymm4, ymm4, xmm1, 1
-	mov	rdx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpextrb	r14d, xmm6, 15
-	test	r14b, 1
-	jne	.LBB0_627
-	jmp	.LBB0_628
-	.p2align	4, 0x90
-.LBB0_612:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 2
-	mov	dword ptr [rsp + 448], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_613
-.LBB0_1006:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rbx], 2
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	ecx, xmm6, 3
-	mov	dword ptr [rsp + 416], ecx      # 4-byte Spill
-	test	cl, 1
-	jne	.LBB0_1007
-.LBB0_614:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 4
-	mov	dword ptr [rsp + 384], ecx      # 4-byte Spill
-	test	cl, 1
-	je	.LBB0_615
-.LBB0_1008:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 4
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	eax, xmm6, 5
-	mov	dword ptr [rsp + 352], eax      # 4-byte Spill
-	test	al, 1
-	jne	.LBB0_616
-	jmp	.LBB0_617
-	.p2align	4, 0x90
-.LBB0_618:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm6, 7
-	mov	dword ptr [rsp + 152], eax      # 4-byte Spill
-	test	al, 1
-	je	.LBB0_619
-.LBB0_1010:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 240]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 7
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	r9d, xmm6, 8
-	test	r9b, 1
-	jne	.LBB0_1011
-.LBB0_620:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ecx, xmm6, 9
-	test	cl, 1
-	je	.LBB0_621
-.LBB0_1012:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rax, qword ptr [rsp + 208]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rax], 9
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	r11d, xmm6, 10
-	test	r11b, 1
-	jne	.LBB0_1013
-.LBB0_622:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	eax, xmm6, 11
-	test	al, 1
-	je	.LBB0_623
-.LBB0_1014:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	mov	rsi, qword ptr [rsp + 192]      # 8-byte Reload
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rsi], 11
-	vinserti128	ymm4, ymm4, xmm1, 1
-	vpextrb	esi, xmm6, 12
-	test	sil, 1
-	jne	.LBB0_1015
-.LBB0_624:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	r10d, xmm6, 13
-	test	r10b, 1
-	je	.LBB0_625
-.LBB0_1016:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 13
-	vinserti128	ymm4, ymm4, xmm1, 1
-	mov	rdx, qword ptr [rsp + 168]      # 8-byte Reload
-	vpextrb	r13d, xmm6, 14
-	test	r13b, 1
-	jne	.LBB0_1017
-.LBB0_626:                              #   in Loop: Header=BB0_26 Depth=1
-	mov	rdx, qword ptr [rsp + 160]      # 8-byte Reload
-	vpextrb	r14d, xmm6, 15
-	test	r14b, 1
-	je	.LBB0_628
-.LBB0_627:                              #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm4, 1
-	vpinsrb	xmm1, xmm1, byte ptr [rdi + rdx], 15
-	vinserti128	ymm4, ymm4, xmm1, 1
-.LBB0_628:                              #   in Loop: Header=BB0_26 Depth=1
-	vpsrlw	ymm1, ymm4, 7
-	vpand	ymm4, ymm1, ymmword ptr [rip + .LCPI0_4]
-	vmovd	r15d, xmm3
-	test	r15b, 1
-	je	.LBB0_629
-# %bb.1018:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm11
-	vpextrb	byte ptr [r8 + rbx], xmm4, 0
-	vpextrb	ebx, xmm3, 1
-	test	bl, 1
-	jne	.LBB0_1019
-.LBB0_630:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 2
-	test	bl, 1
-	je	.LBB0_631
-.LBB0_1020:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 2
-	vpextrb	ebx, xmm3, 3
-	test	bl, 1
-	jne	.LBB0_1021
-.LBB0_632:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 4
-	test	bl, 1
-	je	.LBB0_633
-.LBB0_1022:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm10
-	vpextrb	byte ptr [r8 + rbx], xmm4, 4
-	vpextrb	ebx, xmm3, 5
-	test	bl, 1
-	jne	.LBB0_1023
-.LBB0_634:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 6
-	test	bl, 1
-	je	.LBB0_635
-.LBB0_1024:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 6
-	vpextrb	ebx, xmm3, 7
-	test	bl, 1
-	jne	.LBB0_1025
-.LBB0_636:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 8
-	test	bl, 1
-	je	.LBB0_637
-.LBB0_1026:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm9
-	vpextrb	byte ptr [r8 + rbx], xmm4, 8
-	vpextrb	ebx, xmm3, 9
-	test	bl, 1
-	jne	.LBB0_1027
-.LBB0_638:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 10
-	test	bl, 1
-	je	.LBB0_639
-.LBB0_1028:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 10
-	vpextrb	ebx, xmm3, 11
-	test	bl, 1
-	jne	.LBB0_1029
-.LBB0_640:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 12
-	test	bl, 1
-	je	.LBB0_641
-.LBB0_1030:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm5
-	vpextrb	byte ptr [r8 + rbx], xmm4, 12
-	vpextrb	ebx, xmm3, 13
-	test	bl, 1
-	vmovdqa	ymm9, ymmword ptr [rsp + 896]   # 32-byte Reload
-	jne	.LBB0_1031
-.LBB0_642:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 14
-	test	bl, 1
-	je	.LBB0_643
-.LBB0_1032:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vmovq	rbx, xmm1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 14
-	vpextrb	ebx, xmm3, 15
-	test	bl, 1
-	jne	.LBB0_1033
-.LBB0_644:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 512], 1         # 1-byte Folded Reload
-	vmovdqa	ymm3, ymmword ptr [rsp + 832]   # 32-byte Reload
-	je	.LBB0_645
-.LBB0_1034:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm8
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 0
-	test	byte ptr [rsp + 480], 1         # 1-byte Folded Reload
-	jne	.LBB0_1035
-.LBB0_646:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 448], 1         # 1-byte Folded Reload
-	je	.LBB0_647
-.LBB0_1036:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 2
-	test	byte ptr [rsp + 416], 1         # 1-byte Folded Reload
-	jne	.LBB0_1037
-.LBB0_648:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 384], 1         # 1-byte Folded Reload
-	je	.LBB0_649
-.LBB0_1038:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rbx, xmm7
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 4
-	test	byte ptr [rsp + 352], 1         # 1-byte Folded Reload
-	vmovdqa	ymm8, ymmword ptr [rsp + 864]   # 32-byte Reload
-	jne	.LBB0_1039
-.LBB0_650:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 320], 1         # 1-byte Folded Reload
-	je	.LBB0_651
-.LBB0_1040:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vmovq	rbx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 6
-	test	byte ptr [rsp + 152], 1         # 1-byte Folded Reload
-	jne	.LBB0_1041
-.LBB0_652:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r9b, 1
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	je	.LBB0_653
-.LBB0_1042:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rdx, xmm2
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rdx], xmm1, 8
-	test	cl, 1
-	jne	.LBB0_1043
-.LBB0_654:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r11b, 1
-	mov	r11, qword ptr [rsp + 304]      # 8-byte Reload
-	je	.LBB0_655
-.LBB0_1044:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm2, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 10
-	test	al, 1
-	jne	.LBB0_1045
-.LBB0_656:                              #   in Loop: Header=BB0_26 Depth=1
-	test	sil, 1
-	je	.LBB0_657
-.LBB0_1046:                             #   in Loop: Header=BB0_26 Depth=1
-	vmovq	rcx, xmm15
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 12
-	test	r10b, 1
-	vmovdqa	ymm2, ymmword ptr [rsp + 800]   # 32-byte Reload
-	jne	.LBB0_1047
-.LBB0_658:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r13b, 1
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	je	.LBB0_659
-.LBB0_1048:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vmovq	rcx, xmm1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 14
-	test	r14b, 1
-	je	.LBB0_25
-	jmp	.LBB0_1049
-	.p2align	4, 0x90
-.LBB0_629:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 1
-	test	bl, 1
-	je	.LBB0_630
-.LBB0_1019:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm11, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 1
-	vpextrb	ebx, xmm3, 2
-	test	bl, 1
-	jne	.LBB0_1020
-.LBB0_631:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 3
-	test	bl, 1
-	je	.LBB0_632
-.LBB0_1021:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm11, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 3
-	vpextrb	ebx, xmm3, 4
-	test	bl, 1
-	jne	.LBB0_1022
-.LBB0_633:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 5
-	test	bl, 1
-	je	.LBB0_634
-.LBB0_1023:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm10, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 5
-	vpextrb	ebx, xmm3, 6
-	test	bl, 1
-	jne	.LBB0_1024
-.LBB0_635:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 7
-	test	bl, 1
-	je	.LBB0_636
-.LBB0_1025:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm10, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 7
-	vpextrb	ebx, xmm3, 8
-	test	bl, 1
-	jne	.LBB0_1026
-.LBB0_637:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 9
-	test	bl, 1
-	je	.LBB0_638
-.LBB0_1027:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm9, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 9
-	vpextrb	ebx, xmm3, 10
-	test	bl, 1
-	jne	.LBB0_1028
-.LBB0_639:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 11
-	test	bl, 1
-	je	.LBB0_640
-.LBB0_1029:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm9, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 11
-	vpextrb	ebx, xmm3, 12
-	test	bl, 1
-	jne	.LBB0_1030
-.LBB0_641:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 13
-	test	bl, 1
-	vmovdqa	ymm9, ymmword ptr [rsp + 896]   # 32-byte Reload
-	je	.LBB0_642
-.LBB0_1031:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm5, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 13
-	vpextrb	ebx, xmm3, 14
-	test	bl, 1
-	jne	.LBB0_1032
-.LBB0_643:                              #   in Loop: Header=BB0_26 Depth=1
-	vpextrb	ebx, xmm3, 15
-	test	bl, 1
-	je	.LBB0_644
-.LBB0_1033:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm5, 1
-	vpextrq	rbx, xmm1, 1
-	vpextrb	byte ptr [r8 + rbx], xmm4, 15
-	test	byte ptr [rsp + 512], 1         # 1-byte Folded Reload
-	vmovdqa	ymm3, ymmword ptr [rsp + 832]   # 32-byte Reload
-	jne	.LBB0_1034
-.LBB0_645:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 480], 1         # 1-byte Folded Reload
-	je	.LBB0_646
-.LBB0_1035:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm8, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 1
-	test	byte ptr [rsp + 448], 1         # 1-byte Folded Reload
-	jne	.LBB0_1036
-.LBB0_647:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 416], 1         # 1-byte Folded Reload
-	je	.LBB0_648
-.LBB0_1037:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm8, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 3
-	test	byte ptr [rsp + 384], 1         # 1-byte Folded Reload
-	jne	.LBB0_1038
-.LBB0_649:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 352], 1         # 1-byte Folded Reload
-	vmovdqa	ymm8, ymmword ptr [rsp + 864]   # 32-byte Reload
-	je	.LBB0_650
-.LBB0_1039:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rbx, xmm7, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 5
-	test	byte ptr [rsp + 320], 1         # 1-byte Folded Reload
-	jne	.LBB0_1040
-.LBB0_651:                              #   in Loop: Header=BB0_26 Depth=1
-	test	byte ptr [rsp + 152], 1         # 1-byte Folded Reload
-	je	.LBB0_652
-.LBB0_1041:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm7, 1
-	vpextrq	rbx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rbx], xmm1, 7
-	test	r9b, 1
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	jne	.LBB0_1042
-.LBB0_653:                              #   in Loop: Header=BB0_26 Depth=1
-	test	cl, 1
-	je	.LBB0_654
-.LBB0_1043:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm2, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 9
-	test	r11b, 1
-	mov	r11, qword ptr [rsp + 304]      # 8-byte Reload
-	jne	.LBB0_1044
-.LBB0_655:                              #   in Loop: Header=BB0_26 Depth=1
-	test	al, 1
-	je	.LBB0_656
-.LBB0_1045:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm2, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 11
-	test	sil, 1
-	jne	.LBB0_1046
-.LBB0_657:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r10b, 1
-	vmovdqa	ymm2, ymmword ptr [rsp + 800]   # 32-byte Reload
-	je	.LBB0_658
-.LBB0_1047:                             #   in Loop: Header=BB0_26 Depth=1
-	vpextrq	rcx, xmm15, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 13
-	test	r13b, 1
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	jne	.LBB0_1048
-.LBB0_659:                              #   in Loop: Header=BB0_26 Depth=1
-	test	r14b, 1
-	je	.LBB0_25
-.LBB0_1049:                             #   in Loop: Header=BB0_26 Depth=1
-	vextracti128	xmm1, ymm15, 1
-	vpextrq	rcx, xmm1, 1
-	vextracti128	xmm1, ymm4, 1
-	vpextrb	byte ptr [r8 + rcx], xmm1, 15
-	jmp	.LBB0_25
-.LBB0_1050:
-	cmp	r12, r10
-	jne	.LBB0_1055
-.LBB0_1051:
-	lea	rsp, [rbp - 40]
-	pop	rbx
-	pop	r12
-	pop	r13
-	pop	r14
-	pop	r15
+	mov	r8d, esi
+	shl	r8, 3
+	xor	r10d, r10d
+	jmp	.LBB0_2
+	.p2align	4, 0x90
+.LBB0_4:                                #   in Loop: Header=BB0_2 Depth=1
+	add	r10, 8
+	add	rdi, 1
+	cmp	r8, r10
+	je	.LBB0_5
+.LBB0_2:                                # =>This Inner Loop Header: Depth=1
+	cmp	r10d, ecx
+	jge	.LBB0_4
+# %bb.3:                                #   in Loop: Header=BB0_2 Depth=1
+	mov	r9d, r10d
+	movzx	eax, byte ptr [rdi]
+	and	al, 1
+	mov	byte ptr [rdx + r9], al
+	mov	rsi, r9
+	or	rsi, 1
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.6:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 2
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.7:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 2
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 3
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.8:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 3
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 4
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.9:                                #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 4
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 5
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.10:                               #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 5
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	mov	rsi, r9
+	or	rsi, 6
+	cmp	esi, ecx
+	jge	.LBB0_4
+# %bb.11:                               #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 6
+	and	al, 1
+	mov	byte ptr [rdx + rsi], al
+	or	r9, 7
+	cmp	r9d, ecx
+	jge	.LBB0_4
+# %bb.12:                               #   in Loop: Header=BB0_2 Depth=1
+	movzx	eax, byte ptr [rdi]
+	shr	al, 7
+	mov	byte ptr [rdx + r9], al
+	jmp	.LBB0_4
+.LBB0_5:
+	mov	rsp, rbp
 	pop	rbp
-	vzeroupper
 	ret
-.LBB0_1052:
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	mov	r10, qword ptr [rsp + 48]       # 8-byte Reload
-	jmp	.LBB0_1055
-.LBB0_1054:
-	mov	r9d, dword ptr [rsp + 16]       # 4-byte Reload
-	jmp	.LBB0_1055
 .Lfunc_end0:
 	.size	bytes_to_bools_avx2, .Lfunc_end0-bytes_to_bools_avx2
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/_lib/unpack_bool_sse4.s b/go/parquet/internal/utils/_lib/unpack_bool_sse4.s
index 18caa0473df..6719771b865 100644
--- a/go/parquet/internal/utils/_lib/unpack_bool_sse4.s
+++ b/go/parquet/internal/utils/_lib/unpack_bool_sse4.s
@@ -99,6 +99,6 @@ bytes_to_bools_sse4:                    # @bytes_to_bools_sse4
 .Lfunc_end0:
 	.size	bytes_to_bools_sse4, .Lfunc_end0-bytes_to_bools_sse4
                                         # -- End function
-	.ident	"Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162"
+	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
 	.section	".note.GNU-stack","",@progbits
 	.addrsig
diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go
index eed9f867554..f7c1f7a57cd 100644
--- a/go/parquet/internal/utils/bitmap_writer.go
+++ b/go/parquet/internal/utils/bitmap_writer.go
@@ -96,6 +96,9 @@ type BitmapWriter interface {
 	Finish()
 	// AppendWord takes nbits from word which should be an LSB bitmap and appends them to the bitmap.
 	AppendWord(word uint64, nbits int64)
+	// AppendBools appends the bit representation of the bools slice, returning the number
+	// of bools that were able to fit in the remaining length of the bitmapwriter.
+	AppendBools(in []bool) int
 	// Pos is the current position that will be written next
 	Pos() int64
 	// Reset allows reusing the bitmapwriter by resetting Pos to start with length as
@@ -140,7 +143,7 @@ func (b *bitmapWriter) Reset(start, length int64) {
 
 func (b *bitmapWriter) Pos() int64 { return b.pos }
 func (b *bitmapWriter) Set()       { b.curByte |= b.bitMask }
-func (b *bitmapWriter) Clear()     { b.curByte &= b.bitMask ^ 0xFF }
+func (b *bitmapWriter) Clear()     { b.curByte &= ^b.bitMask }
 
 func (b *bitmapWriter) Next() {
 	b.bitMask = b.bitMask << 1
@@ -155,6 +158,30 @@ func (b *bitmapWriter) Next() {
 	}
 }
 
+func (b *bitmapWriter) AppendBools(in []bool) int {
+	space := Min(bitutil.BytesForBits(b.length-b.pos), int64(len(in)))
+
+	// location that the first byte needs to be written to for appending
+	appslice := b.buf[int(b.byteOffset):]
+	// update everything but curByte
+	bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
+	appslice[0] = b.curByte
+	for i, b := range in[:space] {
+		if b {
+			bitutil.SetBit(appslice, i)
+		} else {
+			bitutil.ClearBit(appslice, i)
+		}
+	}
+
+	b.pos += space
+	b.bitMask = bitutil.BitMask[(int64(bitOffset)+space)%8]
+	b.byteOffset += (int64(bitOffset) + space) / 8
+	b.curByte = appslice[len(appslice)-1]
+
+	return int(space)
+}
+
 func (b *bitmapWriter) Finish() {
 	if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) {
 		b.buf[int(b.byteOffset)] = b.curByte
@@ -267,6 +294,10 @@ func (bw *firstTimeBitmapWriter) Next() {
 	}
 }
 
+func (b *firstTimeBitmapWriter) AppendBools(in []bool) int {
+	panic("Append Bools not yet implemented for firstTimeBitmapWriter")
+}
+
 func (bw *firstTimeBitmapWriter) Finish() {
 	// store curByte into the bitmap
 	if bw.length > 0 && bw.bitMask != 0x01 || bw.pos < bw.length {
diff --git a/go/parquet/internal/utils/min_max_avx2_amd64.s b/go/parquet/internal/utils/min_max_avx2_amd64.s
index 6a1bb18fde6..a54758ba1ed 100644
--- a/go/parquet/internal/utils/min_max_avx2_amd64.s
+++ b/go/parquet/internal/utils/min_max_avx2_amd64.s
@@ -4,364 +4,188 @@
 DATA LCDATA1<>+0x000(SB)/8, $0x7fffffff80000000
 GLOBL LCDATA1<>(SB), 8, $8
 
-TEXT ·_int32_max_min_avx2(SB), $72-32
+TEXT ·_int32_max_min_avx2(SB), $0-32
 
 	MOVQ values+0(FP), DI
 	MOVQ length+8(FP), SI
 	MOVQ minout+16(FP), DX
 	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
 	LEAQ LCDATA1<>(SB), BP
 
 	WORD $0xf685                   // test    esi, esi
 	JLE  LBB0_1
 	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
 	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
-	JA   LBB0_6
-	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
-	LONG $0xffffb941; WORD $0x7fff // mov    r9d, 2147483647
-	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
-	JMP  LBB0_4
+	JA   LBB0_4
+	LONG $0x0000ba41; WORD $0x8000 // mov    r10d, -2147483648
+	LONG $0xffffffb8; BYTE $0x7f   // mov    eax, 2147483647
+	WORD $0x3145; BYTE $0xc9       // xor    r9d, r9d
+	JMP  LBB0_7
 
 LBB0_1:
-	LONG $0xffffb941; WORD $0x7fff // mov    r9d, 2147483647
-	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
-	JMP  LBB0_14
+	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
+	LONG $0x000000be; BYTE $0x80 // mov    esi, -2147483648
+	JMP  LBB0_8
 
-LBB0_6:
-	WORD $0x8945; BYTE $0xc3       // mov    r11d, r8d
-	LONG $0xe0e38341               // and    r11d, -32
-	LONG $0xe0438d49               // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2       // mov    r10, rax
-	LONG $0x05eac149               // shr    r10, 5
-	LONG $0x01c28349               // add    r10, 1
-	WORD $0x8945; BYTE $0xd1       // mov    r9d, r10d
-	LONG $0x03e18341               // and    r9d, 3
-	LONG $0x60f88348               // cmp    rax, 96
-	JAE  LBB0_8
-	LONG $0x587de2c4; WORD $0x0045 // vpbroadcastd    ymm0, dword 0[rbp] /* [rip + .LCPI0_0] */
-	LONG $0x587de2c4; WORD $0x044d // vpbroadcastd    ymm1, dword 4[rbp] /* [rip + .LCPI0_1] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0xd16ffdc5               // vmovdqa    ymm2, ymm1
-	LONG $0xe16ffdc5               // vmovdqa    ymm4, ymm1
-	LONG $0xf16ffdc5               // vmovdqa    ymm6, ymm1
-	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
-	LONG $0xe86ffdc5               // vmovdqa    ymm5, ymm0
-	LONG $0xf86ffdc5               // vmovdqa    ymm7, ymm0
-	JMP  LBB0_10
-
-LBB0_8:
-	LONG $0xfce28349               // and    r10, -4
-	LONG $0x587de2c4; WORD $0x0045 // vpbroadcastd    ymm0, dword 0[rbp] /* [rip + .LCPI0_0] */
-	WORD $0xf749; BYTE $0xda       // neg    r10
-	LONG $0x587de2c4; WORD $0x044d // vpbroadcastd    ymm1, dword 4[rbp] /* [rip + .LCPI0_1] */
+LBB0_4:
+	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
+	LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd    ymm4, dword 0[rbp] /* [rip + .LCPI0_0] */
+	LONG $0xe0e18341               // and    r9d, -32
+	LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd    ymm0, dword 4[rbp] /* [rip + .LCPI0_1] */
 	WORD $0xc031                   // xor    eax, eax
-	LONG $0xd16ffdc5               // vmovdqa    ymm2, ymm1
-	LONG $0xe16ffdc5               // vmovdqa    ymm4, ymm1
-	LONG $0xf16ffdc5               // vmovdqa    ymm6, ymm1
+	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
+	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
 	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
-	LONG $0xe86ffdc5               // vmovdqa    ymm5, ymm0
-	LONG $0xf86ffdc5               // vmovdqa    ymm7, ymm0
+	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
+	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
+	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
 
-LBB0_9:
-	LONG $0x046f7ec5; BYTE $0x87         // vmovdqu    ymm8, yword [rdi + 4*rax]
-	LONG $0x4c6f7ec5; WORD $0x2087       // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
-	LONG $0x546f7ec5; WORD $0x4087       // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
-	LONG $0x5c6f7ec5; WORD $0x6087       // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
-	LONG $0x394dc2c4; BYTE $0xf3         // vpminsd    ymm6, ymm6, ymm11
-	LONG $0x395dc2c4; BYTE $0xe2         // vpminsd    ymm4, ymm4, ymm10
-	LONG $0x3975c2c4; BYTE $0xc8         // vpminsd    ymm1, ymm1, ymm8
-	LONG $0x396dc2c4; BYTE $0xd1         // vpminsd    ymm2, ymm2, ymm9
-	LONG $0x3d45c2c4; BYTE $0xfb         // vpmaxsd    ymm7, ymm7, ymm11
-	LONG $0x3d55c2c4; BYTE $0xea         // vpmaxsd    ymm5, ymm5, ymm10
-	LONG $0x3d7dc2c4; BYTE $0xc0         // vpmaxsd    ymm0, ymm0, ymm8
-	LONG $0x3d65c2c4; BYTE $0xd9         // vpmaxsd    ymm3, ymm3, ymm9
-	QUAD $0x0000e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 224]
-	QUAD $0x0000c0878c6f7ec5; BYTE $0x00 // vmovdqu    ymm9, yword [rdi + 4*rax + 192]
-	QUAD $0x00008087946f7ec5; BYTE $0x00 // vmovdqu    ymm10, yword [rdi + 4*rax + 128]
-	QUAD $0x0000a0879c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 4*rax + 160]
-	QUAD $0x00010087a46f7ec5; BYTE $0x00 // vmovdqu    ymm12, yword [rdi + 4*rax + 256]
-	QUAD $0x00014087ac6f7ec5; BYTE $0x00 // vmovdqu    ymm13, yword [rdi + 4*rax + 320]
-	QUAD $0x00016087b46f7ec5; BYTE $0x00 // vmovdqu    ymm14, yword [rdi + 4*rax + 352]
-	LONG $0x393d42c4; BYTE $0xfe         // vpminsd    ymm15, ymm8, ymm14
-	LONG $0x394dc2c4; BYTE $0xf7         // vpminsd    ymm6, ymm6, ymm15
-	LONG $0x347ffdc5; BYTE $0x24         // vmovdqa    yword [rsp], ymm6
-	LONG $0x393542c4; BYTE $0xfd         // vpminsd    ymm15, ymm9, ymm13
-	LONG $0x395dc2c4; BYTE $0xe7         // vpminsd    ymm4, ymm4, ymm15
-	LONG $0x392d42c4; BYTE $0xfc         // vpminsd    ymm15, ymm10, ymm12
-	LONG $0x3975c2c4; BYTE $0xcf         // vpminsd    ymm1, ymm1, ymm15
-	QUAD $0x00012087bc6f7ec5; BYTE $0x00 // vmovdqu    ymm15, yword [rdi + 4*rax + 288]
-	LONG $0x3925c2c4; BYTE $0xf7         // vpminsd    ymm6, ymm11, ymm15
-	LONG $0x396de2c4; BYTE $0xd6         // vpminsd    ymm2, ymm2, ymm6
-	LONG $0x3d3dc2c4; BYTE $0xf6         // vpmaxsd    ymm6, ymm8, ymm14
-	LONG $0x3d45e2c4; BYTE $0xfe         // vpmaxsd    ymm7, ymm7, ymm6
-	LONG $0x3d35c2c4; BYTE $0xf5         // vpmaxsd    ymm6, ymm9, ymm13
-	LONG $0x3d55e2c4; BYTE $0xee         // vpmaxsd    ymm5, ymm5, ymm6
-	LONG $0x3d2dc2c4; BYTE $0xf4         // vpmaxsd    ymm6, ymm10, ymm12
-	LONG $0x3d7de2c4; BYTE $0xc6         // vpmaxsd    ymm0, ymm0, ymm6
-	LONG $0x3d25c2c4; BYTE $0xf7         // vpmaxsd    ymm6, ymm11, ymm15
-	LONG $0x3d65e2c4; BYTE $0xde         // vpmaxsd    ymm3, ymm3, ymm6
-	QUAD $0x0001a087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 416]
-	LONG $0x396de2c4; BYTE $0xd6         // vpminsd    ymm2, ymm2, ymm6
-	LONG $0x3d65e2c4; BYTE $0xde         // vpmaxsd    ymm3, ymm3, ymm6
-	QUAD $0x00018087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 384]
-	LONG $0x3975e2c4; BYTE $0xce         // vpminsd    ymm1, ymm1, ymm6
-	LONG $0x3d7de2c4; BYTE $0xc6         // vpmaxsd    ymm0, ymm0, ymm6
-	QUAD $0x0001c087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 448]
-	LONG $0x395de2c4; BYTE $0xe6         // vpminsd    ymm4, ymm4, ymm6
-	LONG $0x3d55e2c4; BYTE $0xee         // vpmaxsd    ymm5, ymm5, ymm6
-	QUAD $0x0001e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 480]
-	LONG $0x393de2c4; WORD $0x2434       // vpminsd    ymm6, ymm8, yword [rsp]
-	LONG $0x3d45c2c4; BYTE $0xf8         // vpmaxsd    ymm7, ymm7, ymm8
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB0_9
-
-LBB0_10:
-	WORD $0x854d; BYTE $0xc9 // test    r9, r9
-	JE   LBB0_13
-	LONG $0x87048d48         // lea    rax, [rdi + 4*rax]
-	WORD $0xf749; BYTE $0xd9 // neg    r9
-
-LBB0_12:
-	LONG $0x006f7ec5             // vmovdqu    ymm8, yword [rax]
-	LONG $0x486f7ec5; BYTE $0x20 // vmovdqu    ymm9, yword [rax + 32]
-	LONG $0x506f7ec5; BYTE $0x40 // vmovdqu    ymm10, yword [rax + 64]
-	LONG $0x586f7ec5; BYTE $0x60 // vmovdqu    ymm11, yword [rax + 96]
-	LONG $0x396dc2c4; BYTE $0xd1 // vpminsd    ymm2, ymm2, ymm9
-	LONG $0x3975c2c4; BYTE $0xc8 // vpminsd    ymm1, ymm1, ymm8
-	LONG $0x395dc2c4; BYTE $0xe2 // vpminsd    ymm4, ymm4, ymm10
-	LONG $0x394dc2c4; BYTE $0xf3 // vpminsd    ymm6, ymm6, ymm11
-	LONG $0x3d65c2c4; BYTE $0xd9 // vpmaxsd    ymm3, ymm3, ymm9
-	LONG $0x3d7dc2c4; BYTE $0xc0 // vpmaxsd    ymm0, ymm0, ymm8
-	LONG $0x3d55c2c4; BYTE $0xea // vpmaxsd    ymm5, ymm5, ymm10
-	LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd    ymm7, ymm7, ymm11
-	LONG $0x80e88348             // sub    rax, -128
-	WORD $0xff49; BYTE $0xc1     // inc    r9
-	JNE  LBB0_12
-
-LBB0_13:
-	LONG $0x396de2c4; BYTE $0xd6   // vpminsd    ymm2, ymm2, ymm6
-	LONG $0x3975e2c4; BYTE $0xcc   // vpminsd    ymm1, ymm1, ymm4
-	LONG $0x3975e2c4; BYTE $0xca   // vpminsd    ymm1, ymm1, ymm2
-	LONG $0x3d65e2c4; BYTE $0xd7   // vpmaxsd    ymm2, ymm3, ymm7
-	LONG $0x3d7de2c4; BYTE $0xc5   // vpmaxsd    ymm0, ymm0, ymm5
-	LONG $0x3d7de2c4; BYTE $0xc2   // vpmaxsd    ymm0, ymm0, ymm2
-	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
-	LONG $0x3d79e2c4; BYTE $0xc2   // vpmaxsd    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0x4e   // vpshufd    xmm2, xmm0, 78
-	LONG $0x3d79e2c4; BYTE $0xc2   // vpmaxsd    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0xe5   // vpshufd    xmm2, xmm0, 229
-	LONG $0x3d79e2c4; BYTE $0xc2   // vpmaxsd    xmm0, xmm0, xmm2
-	LONG $0xc07ef9c5               // vmovd    eax, xmm0
-	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
-	LONG $0x3971e2c4; BYTE $0xc0   // vpminsd    xmm0, xmm1, xmm0
+LBB0_5:
+	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
+	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
+	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
+	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
+	LONG $0x397dc2c4; BYTE $0xc0   // vpminsd    ymm0, ymm0, ymm8
+	LONG $0x3975c2c4; BYTE $0xc9   // vpminsd    ymm1, ymm1, ymm9
+	LONG $0x396dc2c4; BYTE $0xd2   // vpminsd    ymm2, ymm2, ymm10
+	LONG $0x3965c2c4; BYTE $0xdb   // vpminsd    ymm3, ymm3, ymm11
+	LONG $0x3d5dc2c4; BYTE $0xe0   // vpmaxsd    ymm4, ymm4, ymm8
+	LONG $0x3d55c2c4; BYTE $0xe9   // vpmaxsd    ymm5, ymm5, ymm9
+	LONG $0x3d4dc2c4; BYTE $0xf2   // vpmaxsd    ymm6, ymm6, ymm10
+	LONG $0x3d45c2c4; BYTE $0xfb   // vpmaxsd    ymm7, ymm7, ymm11
+	LONG $0x20c08348               // add    rax, 32
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB0_5
+	LONG $0x3d5de2c4; BYTE $0xe5   // vpmaxsd    ymm4, ymm4, ymm5
+	LONG $0x3d5de2c4; BYTE $0xe6   // vpmaxsd    ymm4, ymm4, ymm6
+	LONG $0x3d5de2c4; BYTE $0xe7   // vpmaxsd    ymm4, ymm4, ymm7
+	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
+	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
+	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
+	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
+	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
+	LONG $0x397de2c4; BYTE $0xc1   // vpminsd    ymm0, ymm0, ymm1
+	LONG $0x397de2c4; BYTE $0xc2   // vpminsd    ymm0, ymm0, ymm2
+	LONG $0x397de2c4; BYTE $0xc3   // vpminsd    ymm0, ymm0, ymm3
+	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
+	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
 	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
 	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
 	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
 	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
-	LONG $0x7e79c1c4; BYTE $0xc1   // vmovd    r9d, xmm0
-	WORD $0x394d; BYTE $0xc3       // cmp    r11, r8
-	JE   LBB0_14
-
-LBB0_4:
-	WORD $0xc689 // mov    esi, eax
+	LONG $0xc07ef9c5               // vmovd    eax, xmm0
+	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB0_8
 
-LBB0_5:
-	LONG $0x9f048b42         // mov    eax, dword [rdi + 4*r11]
-	WORD $0x3941; BYTE $0xc1 // cmp    r9d, eax
-	LONG $0xc84f0f44         // cmovg    r9d, eax
-	WORD $0xc639             // cmp    esi, eax
-	WORD $0x4d0f; BYTE $0xc6 // cmovge    eax, esi
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0xc689             // mov    esi, eax
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB0_5
+LBB0_7:
+	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
+	WORD $0xf039             // cmp    eax, esi
+	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
+	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
+	LONG $0xf24d0f41         // cmovge    esi, r10d
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB0_7
 
-LBB0_14:
-	WORD $0x0189             // mov    dword [rcx], eax
-	WORD $0x8944; BYTE $0x0a // mov    dword [rdx], r9d
-	SUBQ $8, SP
+LBB0_8:
+	WORD $0x3189 // mov    dword [rcx], esi
+	WORD $0x0289 // mov    dword [rdx], eax
 	VZEROUPPER
 	RET
 
-TEXT ·_uint32_max_min_avx2(SB), $72-32
+TEXT ·_uint32_max_min_avx2(SB), $0-32
 
 	MOVQ values+0(FP), DI
 	MOVQ length+8(FP), SI
 	MOVQ minout+16(FP), DX
 	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
 
-	WORD $0xf685                   // test    esi, esi
+	WORD $0xf685                 // test    esi, esi
 	JLE  LBB1_1
-	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
-	JA   LBB1_6
-	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
-	LONG $0xffffb941; WORD $0xffff // mov    r9d, -1
-	WORD $0xf631                   // xor    esi, esi
-	JMP  LBB1_4
+	WORD $0x8941; BYTE $0xf0     // mov    r8d, esi
+	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
+	JA   LBB1_4
+	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
+	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
+	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
+	JMP  LBB1_7
 
 LBB1_1:
-	LONG $0xffffb941; WORD $0xffff // mov    r9d, -1
-	WORD $0xf631                   // xor    esi, esi
-	JMP  LBB1_14
+	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
+	WORD $0xf631                 // xor    esi, esi
+	JMP  LBB1_8
 
-LBB1_6:
-	WORD $0x8945; BYTE $0xc3 // mov    r11d, r8d
-	LONG $0xe0e38341         // and    r11d, -32
-	LONG $0xe0438d49         // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2 // mov    r10, rax
-	LONG $0x05eac149         // shr    r10, 5
-	LONG $0x01c28349         // add    r10, 1
-	WORD $0x8945; BYTE $0xd1 // mov    r9d, r10d
-	LONG $0x03e18341         // and    r9d, 3
-	LONG $0x60f88348         // cmp    rax, 96
-	JAE  LBB1_8
-	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
-	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
+LBB1_4:
+	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
+	LONG $0xe0e18341         // and    r9d, -32
+	LONG $0xe4efd9c5         // vpxor    xmm4, xmm4, xmm4
+	LONG $0xc076fdc5         // vpcmpeqd    ymm0, ymm0, ymm0
 	WORD $0xc031             // xor    eax, eax
-	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
-	LONG $0xe476ddc5         // vpcmpeqd    ymm4, ymm4, ymm4
-	LONG $0xf676cdc5         // vpcmpeqd    ymm6, ymm6, ymm6
-	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3
-	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
-	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
-	JMP  LBB1_10
-
-LBB1_8:
-	LONG $0xfce28349         // and    r10, -4
-	WORD $0xf749; BYTE $0xda // neg    r10
-	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
 	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
-	WORD $0xc031             // xor    eax, eax
 	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
-	LONG $0xe476ddc5         // vpcmpeqd    ymm4, ymm4, ymm4
-	LONG $0xf676cdc5         // vpcmpeqd    ymm6, ymm6, ymm6
-	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3
+	LONG $0xdb76e5c5         // vpcmpeqd    ymm3, ymm3, ymm3
 	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
+	LONG $0xf6efc9c5         // vpxor    xmm6, xmm6, xmm6
 	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
 
-LBB1_9:
-	LONG $0x046f7ec5; BYTE $0x87         // vmovdqu    ymm8, yword [rdi + 4*rax]
-	LONG $0x4c6f7ec5; WORD $0x2087       // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
-	LONG $0x546f7ec5; WORD $0x4087       // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
-	LONG $0x5c6f7ec5; WORD $0x6087       // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
-	LONG $0x3b4dc2c4; BYTE $0xf3         // vpminud    ymm6, ymm6, ymm11
-	LONG $0x3b5dc2c4; BYTE $0xe2         // vpminud    ymm4, ymm4, ymm10
-	LONG $0x3b75c2c4; BYTE $0xc8         // vpminud    ymm1, ymm1, ymm8
-	LONG $0x3b6dc2c4; BYTE $0xd1         // vpminud    ymm2, ymm2, ymm9
-	LONG $0x3f45c2c4; BYTE $0xfb         // vpmaxud    ymm7, ymm7, ymm11
-	LONG $0x3f55c2c4; BYTE $0xea         // vpmaxud    ymm5, ymm5, ymm10
-	LONG $0x3f7dc2c4; BYTE $0xc0         // vpmaxud    ymm0, ymm0, ymm8
-	LONG $0x3f65c2c4; BYTE $0xd9         // vpmaxud    ymm3, ymm3, ymm9
-	QUAD $0x0000e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 224]
-	QUAD $0x0000c0878c6f7ec5; BYTE $0x00 // vmovdqu    ymm9, yword [rdi + 4*rax + 192]
-	QUAD $0x00008087946f7ec5; BYTE $0x00 // vmovdqu    ymm10, yword [rdi + 4*rax + 128]
-	QUAD $0x0000a0879c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 4*rax + 160]
-	QUAD $0x00010087a46f7ec5; BYTE $0x00 // vmovdqu    ymm12, yword [rdi + 4*rax + 256]
-	QUAD $0x00014087ac6f7ec5; BYTE $0x00 // vmovdqu    ymm13, yword [rdi + 4*rax + 320]
-	QUAD $0x00016087b46f7ec5; BYTE $0x00 // vmovdqu    ymm14, yword [rdi + 4*rax + 352]
-	LONG $0x3b3d42c4; BYTE $0xfe         // vpminud    ymm15, ymm8, ymm14
-	LONG $0x3b4dc2c4; BYTE $0xf7         // vpminud    ymm6, ymm6, ymm15
-	LONG $0x347ffdc5; BYTE $0x24         // vmovdqa    yword [rsp], ymm6
-	LONG $0x3b3542c4; BYTE $0xfd         // vpminud    ymm15, ymm9, ymm13
-	LONG $0x3b5dc2c4; BYTE $0xe7         // vpminud    ymm4, ymm4, ymm15
-	LONG $0x3b2d42c4; BYTE $0xfc         // vpminud    ymm15, ymm10, ymm12
-	LONG $0x3b75c2c4; BYTE $0xcf         // vpminud    ymm1, ymm1, ymm15
-	QUAD $0x00012087bc6f7ec5; BYTE $0x00 // vmovdqu    ymm15, yword [rdi + 4*rax + 288]
-	LONG $0x3b25c2c4; BYTE $0xf7         // vpminud    ymm6, ymm11, ymm15
-	LONG $0x3b6de2c4; BYTE $0xd6         // vpminud    ymm2, ymm2, ymm6
-	LONG $0x3f3dc2c4; BYTE $0xf6         // vpmaxud    ymm6, ymm8, ymm14
-	LONG $0x3f45e2c4; BYTE $0xfe         // vpmaxud    ymm7, ymm7, ymm6
-	LONG $0x3f35c2c4; BYTE $0xf5         // vpmaxud    ymm6, ymm9, ymm13
-	LONG $0x3f55e2c4; BYTE $0xee         // vpmaxud    ymm5, ymm5, ymm6
-	LONG $0x3f2dc2c4; BYTE $0xf4         // vpmaxud    ymm6, ymm10, ymm12
-	LONG $0x3f7de2c4; BYTE $0xc6         // vpmaxud    ymm0, ymm0, ymm6
-	LONG $0x3f25c2c4; BYTE $0xf7         // vpmaxud    ymm6, ymm11, ymm15
-	LONG $0x3f65e2c4; BYTE $0xde         // vpmaxud    ymm3, ymm3, ymm6
-	QUAD $0x0001a087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 416]
-	LONG $0x3b6de2c4; BYTE $0xd6         // vpminud    ymm2, ymm2, ymm6
-	LONG $0x3f65e2c4; BYTE $0xde         // vpmaxud    ymm3, ymm3, ymm6
-	QUAD $0x00018087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 384]
-	LONG $0x3b75e2c4; BYTE $0xce         // vpminud    ymm1, ymm1, ymm6
-	LONG $0x3f7de2c4; BYTE $0xc6         // vpmaxud    ymm0, ymm0, ymm6
-	QUAD $0x0001c087b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 4*rax + 448]
-	LONG $0x3b5de2c4; BYTE $0xe6         // vpminud    ymm4, ymm4, ymm6
-	LONG $0x3f55e2c4; BYTE $0xee         // vpmaxud    ymm5, ymm5, ymm6
-	QUAD $0x0001e087846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 4*rax + 480]
-	LONG $0x3b3de2c4; WORD $0x2434       // vpminud    ymm6, ymm8, yword [rsp]
-	LONG $0x3f45c2c4; BYTE $0xf8         // vpmaxud    ymm7, ymm7, ymm8
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB1_9
-
-LBB1_10:
-	WORD $0x854d; BYTE $0xc9 // test    r9, r9
-	JE   LBB1_13
-	LONG $0x87048d48         // lea    rax, [rdi + 4*rax]
-	WORD $0xf749; BYTE $0xd9 // neg    r9
-
-LBB1_12:
-	LONG $0x006f7ec5             // vmovdqu    ymm8, yword [rax]
-	LONG $0x486f7ec5; BYTE $0x20 // vmovdqu    ymm9, yword [rax + 32]
-	LONG $0x506f7ec5; BYTE $0x40 // vmovdqu    ymm10, yword [rax + 64]
-	LONG $0x586f7ec5; BYTE $0x60 // vmovdqu    ymm11, yword [rax + 96]
-	LONG $0x3b6dc2c4; BYTE $0xd1 // vpminud    ymm2, ymm2, ymm9
-	LONG $0x3b75c2c4; BYTE $0xc8 // vpminud    ymm1, ymm1, ymm8
-	LONG $0x3b5dc2c4; BYTE $0xe2 // vpminud    ymm4, ymm4, ymm10
-	LONG $0x3b4dc2c4; BYTE $0xf3 // vpminud    ymm6, ymm6, ymm11
-	LONG $0x3f65c2c4; BYTE $0xd9 // vpmaxud    ymm3, ymm3, ymm9
-	LONG $0x3f7dc2c4; BYTE $0xc0 // vpmaxud    ymm0, ymm0, ymm8
-	LONG $0x3f55c2c4; BYTE $0xea // vpmaxud    ymm5, ymm5, ymm10
-	LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud    ymm7, ymm7, ymm11
-	LONG $0x80e88348             // sub    rax, -128
-	WORD $0xff49; BYTE $0xc1     // inc    r9
-	JNE  LBB1_12
-
-LBB1_13:
-	LONG $0x3b6de2c4; BYTE $0xd6   // vpminud    ymm2, ymm2, ymm6
-	LONG $0x3b75e2c4; BYTE $0xcc   // vpminud    ymm1, ymm1, ymm4
-	LONG $0x3b75e2c4; BYTE $0xca   // vpminud    ymm1, ymm1, ymm2
-	LONG $0x3f65e2c4; BYTE $0xd7   // vpmaxud    ymm2, ymm3, ymm7
-	LONG $0x3f7de2c4; BYTE $0xc5   // vpmaxud    ymm0, ymm0, ymm5
-	LONG $0x3f7de2c4; BYTE $0xc2   // vpmaxud    ymm0, ymm0, ymm2
-	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
-	LONG $0x3f79e2c4; BYTE $0xc2   // vpmaxud    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0x4e   // vpshufd    xmm2, xmm0, 78
-	LONG $0x3f79e2c4; BYTE $0xc2   // vpmaxud    xmm0, xmm0, xmm2
-	LONG $0xd070f9c5; BYTE $0xe5   // vpshufd    xmm2, xmm0, 229
-	LONG $0x3f79e2c4; BYTE $0xc2   // vpmaxud    xmm0, xmm0, xmm2
-	LONG $0xc67ef9c5               // vmovd    esi, xmm0
-	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
-	LONG $0x3b71e2c4; BYTE $0xc0   // vpminud    xmm0, xmm1, xmm0
+LBB1_5:
+	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
+	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
+	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
+	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
+	LONG $0x3b7dc2c4; BYTE $0xc0   // vpminud    ymm0, ymm0, ymm8
+	LONG $0x3b75c2c4; BYTE $0xc9   // vpminud    ymm1, ymm1, ymm9
+	LONG $0x3b6dc2c4; BYTE $0xd2   // vpminud    ymm2, ymm2, ymm10
+	LONG $0x3b65c2c4; BYTE $0xdb   // vpminud    ymm3, ymm3, ymm11
+	LONG $0x3f5dc2c4; BYTE $0xe0   // vpmaxud    ymm4, ymm4, ymm8
+	LONG $0x3f55c2c4; BYTE $0xe9   // vpmaxud    ymm5, ymm5, ymm9
+	LONG $0x3f4dc2c4; BYTE $0xf2   // vpmaxud    ymm6, ymm6, ymm10
+	LONG $0x3f45c2c4; BYTE $0xfb   // vpmaxud    ymm7, ymm7, ymm11
+	LONG $0x20c08348               // add    rax, 32
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB1_5
+	LONG $0x3f5de2c4; BYTE $0xe5   // vpmaxud    ymm4, ymm4, ymm5
+	LONG $0x3f5de2c4; BYTE $0xe6   // vpmaxud    ymm4, ymm4, ymm6
+	LONG $0x3f5de2c4; BYTE $0xe7   // vpmaxud    ymm4, ymm4, ymm7
+	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
+	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
+	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
+	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
+	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
+	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
+	LONG $0x3b7de2c4; BYTE $0xc1   // vpminud    ymm0, ymm0, ymm1
+	LONG $0x3b7de2c4; BYTE $0xc2   // vpminud    ymm0, ymm0, ymm2
+	LONG $0x3b7de2c4; BYTE $0xc3   // vpminud    ymm0, ymm0, ymm3
+	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
+	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
 	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
 	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
 	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
 	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
-	LONG $0x7e79c1c4; BYTE $0xc1   // vmovd    r9d, xmm0
-	WORD $0x394d; BYTE $0xc3       // cmp    r11, r8
-	JE   LBB1_14
-
-LBB1_4:
-	WORD $0xf089 // mov    eax, esi
+	LONG $0xc07ef9c5               // vmovd    eax, xmm0
+	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB1_8
 
-LBB1_5:
-	LONG $0x9f348b42         // mov    esi, dword [rdi + 4*r11]
-	WORD $0x3941; BYTE $0xf1 // cmp    r9d, esi
-	LONG $0xce430f44         // cmovae    r9d, esi
+LBB1_7:
+	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
 	WORD $0xf039             // cmp    eax, esi
-	WORD $0x470f; BYTE $0xf0 // cmova    esi, eax
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0xf089             // mov    eax, esi
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB1_5
+	WORD $0x430f; BYTE $0xc6 // cmovae    eax, esi
+	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
+	LONG $0xf2470f41         // cmova    esi, r10d
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB1_7
 
-LBB1_14:
-	WORD $0x3189             // mov    dword [rcx], esi
-	WORD $0x8944; BYTE $0x0a // mov    dword [rdx], r9d
-	SUBQ $8, SP
+LBB1_8:
+	WORD $0x3189 // mov    dword [rcx], esi
+	WORD $0x0289 // mov    dword [rdx], eax
 	VZEROUPPER
 	RET
 
@@ -369,984 +193,251 @@ DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000
 DATA LCDATA2<>+0x008(SB)/8, $0x7fffffffffffffff
 GLOBL LCDATA2<>(SB), 8, $16
 
-TEXT ·_int64_max_min_avx2(SB), $232-32
+TEXT ·_int64_max_min_avx2(SB), $0-32
 
 	MOVQ values+0(FP), DI
 	MOVQ length+8(FP), SI
 	MOVQ minout+16(FP), DX
 	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
 	LEAQ LCDATA2<>(SB), BP
 
-	QUAD $0xffffffffffffb949; WORD $0x7fff // mov    r9, 9223372036854775807
+	QUAD $0xffffffffffffb848; WORD $0x7fff // mov    rax, 9223372036854775807
 	WORD $0xf685                           // test    esi, esi
 	JLE  LBB2_1
 	WORD $0x8941; BYTE $0xf0               // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f               // cmp    esi, 31
-	JA   LBB2_6
-	LONG $0x01718d49                       // lea    rsi, [r9 + 1]
-	WORD $0x3145; BYTE $0xdb               // xor    r11d, r11d
-	JMP  LBB2_4
+	WORD $0xfe83; BYTE $0x0f               // cmp    esi, 15
+	JA   LBB2_4
+	LONG $0x01508d4c                       // lea    r10, [rax + 1]
+	WORD $0x3145; BYTE $0xc9               // xor    r9d, r9d
+	JMP  LBB2_7
 
 LBB2_1:
-	LONG $0x01718d49 // lea    rsi, [r9 + 1]
-	JMP  LBB2_14
+	LONG $0x01708d48 // lea    rsi, [rax + 1]
+	JMP  LBB2_8
 
-LBB2_6:
-	WORD $0x8945; BYTE $0xc3       // mov    r11d, r8d
-	LONG $0xe0e38341               // and    r11d, -32
-	LONG $0xe0438d49               // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2       // mov    r10, rax
-	LONG $0x05eac149               // shr    r10, 5
-	LONG $0x01c28349               // add    r10, 1
-	WORD $0x8945; BYTE $0xd1       // mov    r9d, r10d
-	LONG $0x03e18341               // and    r9d, 3
-	LONG $0x60f88348               // cmp    rax, 96
-	JAE  LBB2_8
-	LONG $0x597d62c4; WORD $0x007d // vpbroadcastq    ymm15, qword 0[rbp] /* [rip + .LCPI2_0] */
-	LONG $0x597d62c4; WORD $0x085d // vpbroadcastq    ymm11, qword 8[rbp] /* [rip + .LCPI2_1] */
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0x5c7f7dc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm11
-	LONG $0x6f7dc1c4; BYTE $0xdb   // vmovdqa    ymm3, ymm11
-	LONG $0x6f7d41c4; BYTE $0xcb   // vmovdqa    ymm9, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xeb   // vmovdqa    ymm5, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xe3   // vmovdqa    ymm4, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xf3   // vmovdqa    ymm6, ymm11
-	LONG $0x5c7f7dc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm11
-	LONG $0x7c7f7dc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm15
-	LONG $0x6f7dc1c4; BYTE $0xd7   // vmovdqa    ymm2, ymm15
-	LONG $0x6f7d41c4; BYTE $0xc7   // vmovdqa    ymm8, ymm15
-	LONG $0x6f7d41c4; BYTE $0xe7   // vmovdqa    ymm12, ymm15
-	LONG $0x6f7d41c4; BYTE $0xef   // vmovdqa    ymm13, ymm15
-	LONG $0x6f7d41c4; BYTE $0xf7   // vmovdqa    ymm14, ymm15
-	LONG $0x3c7f7dc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm15
-	JMP  LBB2_10
-
-LBB2_8:
-	LONG $0xfce28349               // and    r10, -4
-	LONG $0x597d62c4; WORD $0x007d // vpbroadcastq    ymm15, qword 0[rbp] /* [rip + .LCPI2_0] */
-	WORD $0xf749; BYTE $0xda       // neg    r10
-	LONG $0x597d62c4; WORD $0x085d // vpbroadcastq    ymm11, qword 8[rbp] /* [rip + .LCPI2_1] */
+LBB2_4:
+	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
+	LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq    ymm4, qword 0[rbp] /* [rip + .LCPI2_0] */
+	LONG $0xf0e18341               // and    r9d, -16
+	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI2_1] */
 	WORD $0xc031                   // xor    eax, eax
-	LONG $0x5c7f7dc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm11
-	LONG $0x6f7dc1c4; BYTE $0xdb   // vmovdqa    ymm3, ymm11
-	LONG $0x6f7d41c4; BYTE $0xcb   // vmovdqa    ymm9, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xeb   // vmovdqa    ymm5, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xe3   // vmovdqa    ymm4, ymm11
-	LONG $0x6f7dc1c4; BYTE $0xf3   // vmovdqa    ymm6, ymm11
-	LONG $0x5c7f7dc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm11
-	LONG $0x7c7f7dc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm15
-	LONG $0x6f7dc1c4; BYTE $0xd7   // vmovdqa    ymm2, ymm15
-	LONG $0x6f7d41c4; BYTE $0xc7   // vmovdqa    ymm8, ymm15
-	LONG $0x6f7d41c4; BYTE $0xe7   // vmovdqa    ymm12, ymm15
-	LONG $0x6f7d41c4; BYTE $0xef   // vmovdqa    ymm13, ymm15
-	LONG $0x6f7d41c4; BYTE $0xf7   // vmovdqa    ymm14, ymm15
-	LONG $0x3c7f7dc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm15
-
-LBB2_9:
-	QUAD $0x0000e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 224]
-	LONG $0x6f7d41c4; BYTE $0xd0         // vmovdqa    ymm10, ymm8
-	LONG $0xc26f7dc5                     // vmovdqa    ymm8, ymm2
-	LONG $0xd36ffdc5                     // vmovdqa    ymm2, ymm3
-	LONG $0x6f7dc1c4; BYTE $0xd9         // vmovdqa    ymm3, ymm9
-	LONG $0x377d42c4; BYTE $0xcb         // vpcmpgtq    ymm9, ymm0, ymm11
-	LONG $0x4b7dc3c4; WORD $0x90cb       // vblendvpd    ymm1, ymm0, ymm11, ymm9
-	QUAD $0x0000a0248c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm1
-	LONG $0x370562c4; BYTE $0xc8         // vpcmpgtq    ymm9, ymm15, ymm0
-	LONG $0x4b7dc3c4; WORD $0x90c7       // vblendvpd    ymm0, ymm0, ymm15, ymm9
-	QUAD $0x000080248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm0
-	QUAD $0x0000c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 192]
-	LONG $0x377d62c4; BYTE $0xce         // vpcmpgtq    ymm9, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0x90fe       // vblendvpd    ymm7, ymm0, ymm6, ymm9
-	LONG $0x370d62c4; BYTE $0xc8         // vpcmpgtq    ymm9, ymm14, ymm0
-	LONG $0x4b7d43c4; WORD $0x90f6       // vblendvpd    ymm14, ymm0, ymm14, ymm9
-	QUAD $0x0000a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 160]
-	LONG $0x377d62c4; BYTE $0xcc         // vpcmpgtq    ymm9, ymm0, ymm4
-	LONG $0x4b7de3c4; WORD $0x90f4       // vblendvpd    ymm6, ymm0, ymm4, ymm9
-	LONG $0x371562c4; BYTE $0xc8         // vpcmpgtq    ymm9, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0x90ed       // vblendvpd    ymm13, ymm0, ymm13, ymm9
-	QUAD $0x000080c78c6f7ec5; BYTE $0x00 // vmovdqu    ymm9, yword [rdi + 8*rax + 128]
-	LONG $0x3735e2c4; BYTE $0xc5         // vpcmpgtq    ymm0, ymm9, ymm5
-	LONG $0x4b35e3c4; WORD $0x00cd       // vblendvpd    ymm1, ymm9, ymm5, ymm0
-	LONG $0x371dc2c4; BYTE $0xe9         // vpcmpgtq    ymm5, ymm12, ymm9
-	LONG $0x4b3543c4; WORD $0x50e4       // vblendvpd    ymm12, ymm9, ymm12, ymm5
-	LONG $0x6c6ffec5; WORD $0x60c7       // vmovdqu    ymm5, yword [rdi + 8*rax + 96]
-	LONG $0x375562c4; BYTE $0xcb         // vpcmpgtq    ymm9, ymm5, ymm3
-	LONG $0x4b5563c4; WORD $0x90cb       // vblendvpd    ymm9, ymm5, ymm3, ymm9
-	LONG $0x372de2c4; BYTE $0xe5         // vpcmpgtq    ymm4, ymm10, ymm5
-	LONG $0x4b5543c4; WORD $0x40d2       // vblendvpd    ymm10, ymm5, ymm10, ymm4
-	LONG $0x646ffec5; WORD $0x40c7       // vmovdqu    ymm4, yword [rdi + 8*rax + 64]
-	LONG $0x375de2c4; BYTE $0xea         // vpcmpgtq    ymm5, ymm4, ymm2
-	LONG $0x4b5de3c4; WORD $0x50ea       // vblendvpd    ymm5, ymm4, ymm2, ymm5
-	LONG $0x373de2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm8, ymm4
-	LONG $0x4b5dc3c4; WORD $0x30c0       // vblendvpd    ymm0, ymm4, ymm8, ymm3
-	LONG $0x146ffec5; BYTE $0xc7         // vmovdqu    ymm2, yword [rdi + 8*rax]
-	LONG $0x646ffdc5; WORD $0x6024       // vmovdqa    ymm4, yword [rsp + 96]
-	LONG $0x376de2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm2, ymm4
-	LONG $0x4b6de3c4; WORD $0x30dc       // vblendvpd    ymm3, ymm2, ymm4, ymm3
-	LONG $0x1c6f7dc5; BYTE $0x24         // vmovdqa    ymm11, yword [rsp]
-	LONG $0x3725e2c4; BYTE $0xe2         // vpcmpgtq    ymm4, ymm11, ymm2
-	LONG $0x4b6dc3c4; WORD $0x40e3       // vblendvpd    ymm4, ymm2, ymm11, ymm4
-	LONG $0x546ffec5; WORD $0x20c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 32]
-	LONG $0x7c6f7dc5; WORD $0x2024       // vmovdqa    ymm15, yword [rsp + 32]
-	LONG $0x376d42c4; BYTE $0xdf         // vpcmpgtq    ymm11, ymm2, ymm15
-	LONG $0x4b6d43c4; WORD $0xb0df       // vblendvpd    ymm11, ymm2, ymm15, ymm11
-	LONG $0x446f7dc5; WORD $0x4024       // vmovdqa    ymm8, yword [rsp + 64]
-	LONG $0x373d62c4; BYTE $0xfa         // vpcmpgtq    ymm15, ymm8, ymm2
-	LONG $0x4b6dc3c4; WORD $0xf0d0       // vblendvpd    ymm2, ymm2, ymm8, ymm15
-	QUAD $0x000120c7846f7ec5; BYTE $0x00 // vmovdqu    ymm8, yword [rdi + 8*rax + 288]
-	LONG $0x373d42c4; BYTE $0xfb         // vpcmpgtq    ymm15, ymm8, ymm11
-	LONG $0x4b3d43c4; WORD $0xf0db       // vblendvpd    ymm11, ymm8, ymm11, ymm15
-	LONG $0x5c297dc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm11
-	LONG $0x376d42c4; BYTE $0xd8         // vpcmpgtq    ymm11, ymm2, ymm8
-	LONG $0x4b3de3c4; WORD $0xb0d2       // vblendvpd    ymm2, ymm8, ymm2, ymm11
-	LONG $0x1429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm2
-	QUAD $0x000100c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 256]
-	LONG $0x3725e2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm11, ymm3
-	LONG $0x4b2563c4; WORD $0x20c3       // vblendvpd    ymm8, ymm11, ymm3, ymm2
-	LONG $0x375dc2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm11
-	LONG $0x4b25e3c4; WORD $0x30dc       // vblendvpd    ymm3, ymm11, ymm4, ymm3
-	QUAD $0x000140c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 320]
-	LONG $0x3725e2c4; BYTE $0xe5         // vpcmpgtq    ymm4, ymm11, ymm5
-	LONG $0x4b25e3c4; WORD $0x40e5       // vblendvpd    ymm4, ymm11, ymm5, ymm4
-	LONG $0x377dc2c4; BYTE $0xeb         // vpcmpgtq    ymm5, ymm0, ymm11
-	LONG $0x4b25e3c4; WORD $0x50e8       // vblendvpd    ymm5, ymm11, ymm0, ymm5
-	QUAD $0x000160c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 352]
-	LONG $0x377d42c4; BYTE $0xd9         // vpcmpgtq    ymm11, ymm0, ymm9
-	LONG $0x4b7d43c4; WORD $0xb0c9       // vblendvpd    ymm9, ymm0, ymm9, ymm11
-	LONG $0x372d62c4; BYTE $0xd8         // vpcmpgtq    ymm11, ymm10, ymm0
-	LONG $0x4b7d43c4; WORD $0xb0d2       // vblendvpd    ymm10, ymm0, ymm10, ymm11
-	QUAD $0x000180c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 384]
-	LONG $0x3725e2c4; BYTE $0xc1         // vpcmpgtq    ymm0, ymm11, ymm1
-	LONG $0x4b25e3c4; WORD $0x00d1       // vblendvpd    ymm2, ymm11, ymm1, ymm0
-	LONG $0x371dc2c4; BYTE $0xcb         // vpcmpgtq    ymm1, ymm12, ymm11
-	LONG $0x4b2543c4; WORD $0x10e4       // vblendvpd    ymm12, ymm11, ymm12, ymm1
-	QUAD $0x0001a0c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 416]
-	LONG $0x377562c4; BYTE $0xde         // vpcmpgtq    ymm11, ymm1, ymm6
-	LONG $0x4b75e3c4; WORD $0xb0f6       // vblendvpd    ymm6, ymm1, ymm6, ymm11
-	LONG $0x371562c4; BYTE $0xd9         // vpcmpgtq    ymm11, ymm13, ymm1
-	LONG $0x4b75c3c4; WORD $0xb0cd       // vblendvpd    ymm1, ymm1, ymm13, ymm11
-	QUAD $0x0001c0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 448]
-	LONG $0x372562c4; BYTE $0xef         // vpcmpgtq    ymm13, ymm11, ymm7
-	LONG $0x4b25e3c4; WORD $0xd0ff       // vblendvpd    ymm7, ymm11, ymm7, ymm13
-	LONG $0x370d42c4; BYTE $0xeb         // vpcmpgtq    ymm13, ymm14, ymm11
-	LONG $0x4b2543c4; WORD $0xd0ee       // vblendvpd    ymm13, ymm11, ymm14, ymm13
-	QUAD $0x0001e0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 480]
-	QUAD $0x0000a024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 160]
-	LONG $0x372562c4; BYTE $0xf0         // vpcmpgtq    ymm14, ymm11, ymm0
-	LONG $0x4b2563c4; WORD $0xe0f0       // vblendvpd    ymm14, ymm11, ymm0, ymm14
-	QUAD $0x00008024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 128]
-	LONG $0x377d42c4; BYTE $0xfb         // vpcmpgtq    ymm15, ymm0, ymm11
-	LONG $0x4b2563c4; WORD $0xf0f8       // vblendvpd    ymm15, ymm11, ymm0, ymm15
-	QUAD $0x0002e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 736]
-	LONG $0x377d42c4; BYTE $0xde         // vpcmpgtq    ymm11, ymm0, ymm14
-	LONG $0x4b7d43c4; WORD $0xb0de       // vblendvpd    ymm11, ymm0, ymm14, ymm11
-	QUAD $0x0000a0249c297dc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm11
-	LONG $0x370562c4; BYTE $0xf0         // vpcmpgtq    ymm14, ymm15, ymm0
-	LONG $0x4b7dc3c4; WORD $0xe0c7       // vblendvpd    ymm0, ymm0, ymm15, ymm14
-	QUAD $0x000080248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm0
-	QUAD $0x0002c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 704]
-	LONG $0x377d62c4; BYTE $0xf7         // vpcmpgtq    ymm14, ymm0, ymm7
-	LONG $0x4b7de3c4; WORD $0xe0ff       // vblendvpd    ymm7, ymm0, ymm7, ymm14
-	LONG $0x371562c4; BYTE $0xf0         // vpcmpgtq    ymm14, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0xe0f5       // vblendvpd    ymm14, ymm0, ymm13, ymm14
-	QUAD $0x0002a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 672]
-	LONG $0x377d62c4; BYTE $0xee         // vpcmpgtq    ymm13, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0xd0f6       // vblendvpd    ymm6, ymm0, ymm6, ymm13
-	LONG $0x377562c4; BYTE $0xe8         // vpcmpgtq    ymm13, ymm1, ymm0
-	LONG $0x4b7d63c4; WORD $0xd0e9       // vblendvpd    ymm13, ymm0, ymm1, ymm13
-	QUAD $0x000280c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 640]
-	LONG $0x3775e2c4; BYTE $0xc2         // vpcmpgtq    ymm0, ymm1, ymm2
-	LONG $0x4b75e3c4; WORD $0x00c2       // vblendvpd    ymm0, ymm1, ymm2, ymm0
-	LONG $0x371de2c4; BYTE $0xd1         // vpcmpgtq    ymm2, ymm12, ymm1
-	LONG $0x4b7543c4; WORD $0x20e4       // vblendvpd    ymm12, ymm1, ymm12, ymm2
-	QUAD $0x000260c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 608]
-	LONG $0x3775c2c4; BYTE $0xd1         // vpcmpgtq    ymm2, ymm1, ymm9
-	LONG $0x4b7543c4; WORD $0x20c9       // vblendvpd    ymm9, ymm1, ymm9, ymm2
-	LONG $0x372de2c4; BYTE $0xd1         // vpcmpgtq    ymm2, ymm10, ymm1
-	LONG $0x4b7543c4; WORD $0x20d2       // vblendvpd    ymm10, ymm1, ymm10, ymm2
-	QUAD $0x000240c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 576]
-	LONG $0x3775e2c4; BYTE $0xd4         // vpcmpgtq    ymm2, ymm1, ymm4
-	LONG $0x4b75e3c4; WORD $0x20d4       // vblendvpd    ymm2, ymm1, ymm4, ymm2
-	LONG $0x3755e2c4; BYTE $0xe1         // vpcmpgtq    ymm4, ymm5, ymm1
-	LONG $0x4b75e3c4; WORD $0x40cd       // vblendvpd    ymm1, ymm1, ymm5, ymm4
-	QUAD $0x000200c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 512]
-	LONG $0x375dc2c4; BYTE $0xe8         // vpcmpgtq    ymm5, ymm4, ymm8
-	LONG $0x4b5dc3c4; WORD $0x50e8       // vblendvpd    ymm5, ymm4, ymm8, ymm5
-	LONG $0x376562c4; BYTE $0xc4         // vpcmpgtq    ymm8, ymm3, ymm4
-	LONG $0x4b5de3c4; WORD $0x80db       // vblendvpd    ymm3, ymm4, ymm3, ymm8
-	QUAD $0x000220c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 544]
-	LONG $0x5c6f7dc5; WORD $0x2024       // vmovdqa    ymm11, yword [rsp + 32]
-	LONG $0x375d42c4; BYTE $0xc3         // vpcmpgtq    ymm8, ymm4, ymm11
-	LONG $0x4b5d43c4; WORD $0x80c3       // vblendvpd    ymm8, ymm4, ymm11, ymm8
-	LONG $0x3c6f7dc5; BYTE $0x24         // vmovdqa    ymm15, yword [rsp]
-	LONG $0x370562c4; BYTE $0xdc         // vpcmpgtq    ymm11, ymm15, ymm4
-	LONG $0x4b5dc3c4; WORD $0xb0e7       // vblendvpd    ymm4, ymm4, ymm15, ymm11
-	QUAD $0x000320c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 800]
-	LONG $0x372542c4; BYTE $0xf8         // vpcmpgtq    ymm15, ymm11, ymm8
-	LONG $0x4b2543c4; WORD $0xf0c0       // vblendvpd    ymm8, ymm11, ymm8, ymm15
-	LONG $0x44297dc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm8
-	LONG $0x375d42c4; BYTE $0xc3         // vpcmpgtq    ymm8, ymm4, ymm11
-	LONG $0x4b25e3c4; WORD $0x80e4       // vblendvpd    ymm4, ymm11, ymm4, ymm8
-	LONG $0x6429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm4
-	QUAD $0x000300c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 768]
-	LONG $0x375d62c4; BYTE $0xdd         // vpcmpgtq    ymm11, ymm4, ymm5
-	LONG $0x4b5de3c4; WORD $0xb0ed       // vblendvpd    ymm5, ymm4, ymm5, ymm11
-	LONG $0x6c29fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm5
-	LONG $0x3765e2c4; BYTE $0xec         // vpcmpgtq    ymm5, ymm3, ymm4
-	LONG $0x4b5de3c4; WORD $0x50db       // vblendvpd    ymm3, ymm4, ymm3, ymm5
-	LONG $0x1c29fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm3
-	QUAD $0x000340c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 832]
-	LONG $0x375de2c4; BYTE $0xda         // vpcmpgtq    ymm3, ymm4, ymm2
-	LONG $0x4b5de3c4; WORD $0x30da       // vblendvpd    ymm3, ymm4, ymm2, ymm3
-	LONG $0x3775e2c4; BYTE $0xd4         // vpcmpgtq    ymm2, ymm1, ymm4
-	LONG $0x4b5de3c4; WORD $0x20d1       // vblendvpd    ymm2, ymm4, ymm1, ymm2
-	QUAD $0x000360c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 864]
-	LONG $0x3775c2c4; BYTE $0xe1         // vpcmpgtq    ymm4, ymm1, ymm9
-	LONG $0x4b7543c4; WORD $0x40c9       // vblendvpd    ymm9, ymm1, ymm9, ymm4
-	LONG $0x372de2c4; BYTE $0xe9         // vpcmpgtq    ymm5, ymm10, ymm1
-	LONG $0x4b7543c4; WORD $0x50c2       // vblendvpd    ymm8, ymm1, ymm10, ymm5
-	QUAD $0x000380c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 896]
-	LONG $0x3775e2c4; BYTE $0xe8         // vpcmpgtq    ymm5, ymm1, ymm0
-	LONG $0x4b75e3c4; WORD $0x50e8       // vblendvpd    ymm5, ymm1, ymm0, ymm5
-	LONG $0x371de2c4; BYTE $0xc1         // vpcmpgtq    ymm0, ymm12, ymm1
-	LONG $0x4b7543c4; WORD $0x00e4       // vblendvpd    ymm12, ymm1, ymm12, ymm0
-	QUAD $0x0003a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 928]
-	LONG $0x377de2c4; BYTE $0xce         // vpcmpgtq    ymm1, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0x10e6       // vblendvpd    ymm4, ymm0, ymm6, ymm1
-	LONG $0x3715e2c4; BYTE $0xc8         // vpcmpgtq    ymm1, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0x10ed       // vblendvpd    ymm13, ymm0, ymm13, ymm1
-	QUAD $0x0003c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 960]
-	LONG $0x377de2c4; BYTE $0xcf         // vpcmpgtq    ymm1, ymm0, ymm7
-	LONG $0x4b7de3c4; WORD $0x10f7       // vblendvpd    ymm6, ymm0, ymm7, ymm1
-	LONG $0x370de2c4; BYTE $0xc8         // vpcmpgtq    ymm1, ymm14, ymm0
-	LONG $0x4b7d43c4; WORD $0x10f6       // vblendvpd    ymm14, ymm0, ymm14, ymm1
-	QUAD $0x0003e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 992]
-	QUAD $0x0000a024bc6ffdc5; BYTE $0x00 // vmovdqa    ymm7, yword [rsp + 160]
-	LONG $0x377de2c4; BYTE $0xcf         // vpcmpgtq    ymm1, ymm0, ymm7
-	LONG $0x4b7d63c4; WORD $0x10df       // vblendvpd    ymm11, ymm0, ymm7, ymm1
-	QUAD $0x00008024bc6ffdc5; BYTE $0x00 // vmovdqa    ymm7, yword [rsp + 128]
-	LONG $0x3745e2c4; BYTE $0xc8         // vpcmpgtq    ymm1, ymm7, ymm0
-	LONG $0x4b7d63c4; WORD $0x10ff       // vblendvpd    ymm15, ymm0, ymm7, ymm1
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB2_9
-
-LBB2_10:
-	WORD $0x854d; BYTE $0xc9       // test    r9, r9
-	LONG $0xfd6ffdc5               // vmovdqa    ymm7, ymm5
-	LONG $0x6f7dc1c4; BYTE $0xe9   // vmovdqa    ymm5, ymm9
-	LONG $0x4c6f7dc5; WORD $0x6024 // vmovdqa    ymm9, yword [rsp + 96]
-	LONG $0xd36f7dc5               // vmovdqa    ymm10, ymm3
-	JE   LBB2_13
-	LONG $0xc7048d48               // lea    rax, [rdi + 8*rax]
-	WORD $0xf749; BYTE $0xd9       // neg    r9
-
-LBB2_12:
-	LONG $0x406ffec5; BYTE $0x20   // vmovdqu    ymm0, yword [rax + 32]
-	LONG $0x5c6ffdc5; WORD $0x2024 // vmovdqa    ymm3, yword [rsp + 32]
-	LONG $0x377de2c4; BYTE $0xcb   // vpcmpgtq    ymm1, ymm0, ymm3
-	LONG $0x4b7de3c4; WORD $0x10db // vblendvpd    ymm3, ymm0, ymm3, ymm1
-	LONG $0x5c29fdc5; WORD $0x2024 // vmovapd    yword [rsp + 32], ymm3
-	LONG $0x5c6ffdc5; WORD $0x4024 // vmovdqa    ymm3, yword [rsp + 64]
-	LONG $0x3765e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm3, ymm0
-	LONG $0x4b7de3c4; WORD $0x10db // vblendvpd    ymm3, ymm0, ymm3, ymm1
-	LONG $0x5c29fdc5; WORD $0x4024 // vmovapd    yword [rsp + 64], ymm3
-	LONG $0x006ffec5               // vmovdqu    ymm0, yword [rax]
-	LONG $0x377dc2c4; BYTE $0xc9   // vpcmpgtq    ymm1, ymm0, ymm9
-	LONG $0x4b7d43c4; WORD $0x10c9 // vblendvpd    ymm9, ymm0, ymm9, ymm1
-	LONG $0x1c6ffdc5; BYTE $0x24   // vmovdqa    ymm3, yword [rsp]
-	LONG $0x3765e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm3, ymm0
-	LONG $0x4b7de3c4; WORD $0x10db // vblendvpd    ymm3, ymm0, ymm3, ymm1
-	LONG $0x1c29fdc5; BYTE $0x24   // vmovapd    yword [rsp], ymm3
-	LONG $0x406ffec5; BYTE $0x40   // vmovdqu    ymm0, yword [rax + 64]
-	LONG $0x377dc2c4; BYTE $0xca   // vpcmpgtq    ymm1, ymm0, ymm10
-	LONG $0x4b7d43c4; WORD $0x10d2 // vblendvpd    ymm10, ymm0, ymm10, ymm1
-	LONG $0x376de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm2, ymm0
-	LONG $0x4b7de3c4; WORD $0x10d2 // vblendvpd    ymm2, ymm0, ymm2, ymm1
-	LONG $0x406ffec5; BYTE $0x60   // vmovdqu    ymm0, yword [rax + 96]
-	LONG $0x377de2c4; BYTE $0xcd   // vpcmpgtq    ymm1, ymm0, ymm5
-	LONG $0x4b7de3c4; WORD $0x10ed // vblendvpd    ymm5, ymm0, ymm5, ymm1
-	LONG $0x373de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm8, ymm0
-	LONG $0x4b7d43c4; WORD $0x10c0 // vblendvpd    ymm8, ymm0, ymm8, ymm1
-	QUAD $0x00000080806ffec5       // vmovdqu    ymm0, yword [rax + 128]
-	LONG $0x377de2c4; BYTE $0xcf   // vpcmpgtq    ymm1, ymm0, ymm7
-	LONG $0x4b7de3c4; WORD $0x10ff // vblendvpd    ymm7, ymm0, ymm7, ymm1
-	LONG $0x371de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm12, ymm0
-	LONG $0x4b7d43c4; WORD $0x10e4 // vblendvpd    ymm12, ymm0, ymm12, ymm1
-	QUAD $0x000000a0806ffec5       // vmovdqu    ymm0, yword [rax + 160]
-	LONG $0x377de2c4; BYTE $0xcc   // vpcmpgtq    ymm1, ymm0, ymm4
-	LONG $0x4b7de3c4; WORD $0x10e4 // vblendvpd    ymm4, ymm0, ymm4, ymm1
-	LONG $0x3715e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm13, ymm0
-	LONG $0x4b7d43c4; WORD $0x10ed // vblendvpd    ymm13, ymm0, ymm13, ymm1
-	QUAD $0x000000c0806ffec5       // vmovdqu    ymm0, yword [rax + 192]
-	LONG $0x377de2c4; BYTE $0xce   // vpcmpgtq    ymm1, ymm0, ymm6
-	LONG $0x4b7de3c4; WORD $0x10f6 // vblendvpd    ymm6, ymm0, ymm6, ymm1
-	LONG $0x370de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm14, ymm0
-	LONG $0x4b7d43c4; WORD $0x10f6 // vblendvpd    ymm14, ymm0, ymm14, ymm1
-	QUAD $0x000000e0806ffec5       // vmovdqu    ymm0, yword [rax + 224]
-	LONG $0x377dc2c4; BYTE $0xcb   // vpcmpgtq    ymm1, ymm0, ymm11
-	LONG $0x4b7d43c4; WORD $0x10db // vblendvpd    ymm11, ymm0, ymm11, ymm1
-	LONG $0x3705e2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm15, ymm0
-	LONG $0x4b7d43c4; WORD $0x10ff // vblendvpd    ymm15, ymm0, ymm15, ymm1
-	LONG $0x01000548; WORD $0x0000 // add    rax, 256
-	WORD $0xff49; BYTE $0xc1       // inc    r9
-	JNE  LBB2_12
+	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
+	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
+	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
+	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
+	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
+	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
 
-LBB2_13:
-	LONG $0x4c6ffdc5; WORD $0x4024 // vmovdqa    ymm1, yword [rsp + 64]
-	LONG $0x3775c2c4; BYTE $0xc5   // vpcmpgtq    ymm0, ymm1, ymm13
-	LONG $0x4b15e3c4; WORD $0x00c1 // vblendvpd    ymm0, ymm13, ymm1, ymm0
-	LONG $0x373dc2c4; BYTE $0xcf   // vpcmpgtq    ymm1, ymm8, ymm15
-	LONG $0x4b05c3c4; WORD $0x10c8 // vblendvpd    ymm1, ymm15, ymm8, ymm1
-	LONG $0x1c6ffdc5; BYTE $0x24   // vmovdqa    ymm3, yword [rsp]
-	LONG $0x376542c4; BYTE $0xc4   // vpcmpgtq    ymm8, ymm3, ymm12
-	LONG $0x4b1d63c4; WORD $0x80c3 // vblendvpd    ymm8, ymm12, ymm3, ymm8
-	LONG $0x6f7dc1c4; BYTE $0xd9   // vmovdqa    ymm3, ymm9
-	LONG $0x376d42c4; BYTE $0xce   // vpcmpgtq    ymm9, ymm2, ymm14
-	LONG $0x4b0de3c4; WORD $0x90d2 // vblendvpd    ymm2, ymm14, ymm2, ymm9
-	LONG $0x373d62c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm8, ymm2
-	LONG $0x4b6dc3c4; WORD $0x90d0 // vblendvpd    ymm2, ymm2, ymm8, ymm9
-	LONG $0x377d62c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm0, ymm1
-	LONG $0x4b75e3c4; WORD $0x80c0 // vblendvpd    ymm0, ymm1, ymm0, ymm8
-	LONG $0x376de2c4; BYTE $0xc8   // vpcmpgtq    ymm1, ymm2, ymm0
-	LONG $0x4b7de3c4; WORD $0x10c2 // vblendvpd    ymm0, ymm0, ymm2, ymm1
+LBB2_5:
+	LONG $0x046f7ec5; BYTE $0xc7   // vmovdqu    ymm8, yword [rdi + 8*rax]
+	LONG $0x373d62c4; BYTE $0xc8   // vpcmpgtq    ymm9, ymm8, ymm0
+	LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd    ymm0, ymm8, ymm0, ymm9
+	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
+	LONG $0x373562c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm9, ymm3
+	LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd    ymm3, ymm9, ymm3, ymm10
+	LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu    ymm10, yword [rdi + 8*rax + 64]
+	LONG $0x372d62c4; BYTE $0xda   // vpcmpgtq    ymm11, ymm10, ymm2
+	LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd    ymm2, ymm10, ymm2, ymm11
+	LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 96]
+	LONG $0x372562c4; BYTE $0xe1   // vpcmpgtq    ymm12, ymm11, ymm1
+	LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd    ymm1, ymm11, ymm1, ymm12
+	LONG $0x375d42c4; BYTE $0xe0   // vpcmpgtq    ymm12, ymm4, ymm8
+	LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd    ymm4, ymm8, ymm4, ymm12
+	LONG $0x374542c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm7, ymm9
+	LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd    ymm7, ymm9, ymm7, ymm8
+	LONG $0x374d42c4; BYTE $0xc2   // vpcmpgtq    ymm8, ymm6, ymm10
+	LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd    ymm6, ymm10, ymm6, ymm8
+	LONG $0x375542c4; BYTE $0xc3   // vpcmpgtq    ymm8, ymm5, ymm11
+	LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm11, ymm5, ymm8
+	LONG $0x10c08348               // add    rax, 16
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
+	JNE  LBB2_5
+	LONG $0x375d62c4; BYTE $0xc7   // vpcmpgtq    ymm8, ymm4, ymm7
+	LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd    ymm4, ymm7, ymm4, ymm8
+	LONG $0x375de2c4; BYTE $0xfe   // vpcmpgtq    ymm7, ymm4, ymm6
+	LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd    ymm4, ymm6, ymm4, ymm7
+	LONG $0x375de2c4; BYTE $0xf5   // vpcmpgtq    ymm6, ymm4, ymm5
+	LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd    ymm4, ymm5, ymm4, ymm6
+	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
+	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
+	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
+	LONG $0x0479e3c4; WORD $0x4eec // vpermilps    xmm5, xmm4, 78
+	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
+	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
+	LONG $0x7ef9c1c4; BYTE $0xe2   // vmovq    r10, xmm4
+	LONG $0x3765e2c4; BYTE $0xe0   // vpcmpgtq    ymm4, ymm3, ymm0
+	LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd    ymm0, ymm3, ymm0, ymm4
+	LONG $0x376de2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm2, ymm0
+	LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd    ymm0, ymm2, ymm0, ymm3
+	LONG $0x3775e2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm1, ymm0
+	LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd    ymm0, ymm1, ymm0, ymm2
 	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
-	LONG $0x3779e2c4; BYTE $0xd1   // vpcmpgtq    xmm2, xmm0, xmm1
+	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
 	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
 	LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps    xmm1, xmm0, 78
-	LONG $0x3779e2c4; BYTE $0xd1   // vpcmpgtq    xmm2, xmm0, xmm1
+	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
 	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0x546ffdc5; WORD $0x2024 // vmovdqa    ymm2, yword [rsp + 32]
-	LONG $0x375de2c4; BYTE $0xca   // vpcmpgtq    ymm1, ymm4, ymm2
-	LONG $0x4b5de3c4; WORD $0x10ca // vblendvpd    ymm1, ymm4, ymm2, ymm1
-	LONG $0x3725e2c4; BYTE $0xd5   // vpcmpgtq    ymm2, ymm11, ymm5
-	LONG $0x4b25e3c4; WORD $0x20d5 // vblendvpd    ymm2, ymm11, ymm5, ymm2
-	LONG $0x3745e2c4; BYTE $0xe3   // vpcmpgtq    ymm4, ymm7, ymm3
-	LONG $0x4b45e3c4; WORD $0x40e3 // vblendvpd    ymm4, ymm7, ymm3, ymm4
-	LONG $0x374dc2c4; BYTE $0xea   // vpcmpgtq    ymm5, ymm6, ymm10
-	LONG $0x4b4dc3c4; WORD $0x50da // vblendvpd    ymm3, ymm6, ymm10, ymm5
-	LONG $0x3765e2c4; BYTE $0xec   // vpcmpgtq    ymm5, ymm3, ymm4
-	LONG $0x4b65e3c4; WORD $0x50dc // vblendvpd    ymm3, ymm3, ymm4, ymm5
-	LONG $0x376de2c4; BYTE $0xe1   // vpcmpgtq    ymm4, ymm2, ymm1
-	LONG $0x4b6de3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm2, ymm1, ymm4
-	LONG $0x3775e2c4; BYTE $0xd3   // vpcmpgtq    ymm2, ymm1, ymm3
-	LONG $0x4b75e3c4; WORD $0x20cb // vblendvpd    ymm1, ymm1, ymm3, ymm2
-	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
-	LONG $0x3769e2c4; BYTE $0xd9   // vpcmpgtq    xmm3, xmm2, xmm1
-	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
-	LONG $0x3769e2c4; BYTE $0xd9   // vpcmpgtq    xmm3, xmm2, xmm1
-	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x7ef9e1c4; BYTE $0xc6   // vmovq    rsi, xmm0
-	LONG $0x7ef9c1c4; BYTE $0xc9   // vmovq    r9, xmm1
-	WORD $0x394d; BYTE $0xc3       // cmp    r11, r8
-	JE   LBB2_14
-
-LBB2_4:
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
+	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
+	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB2_8
 
-LBB2_5:
-	LONG $0xdf348b4a         // mov    rsi, qword [rdi + 8*r11]
-	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
-	LONG $0xce4f0f4c         // cmovg    r9, rsi
+LBB2_7:
+	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
 	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
-	LONG $0xf04d0f48         // cmovge    rsi, rax
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
-	JNE  LBB2_5
+	LONG $0xc64f0f48         // cmovg    rax, rsi
+	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
+	LONG $0xf24d0f49         // cmovge    rsi, r10
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB2_7
 
-LBB2_14:
+LBB2_8:
 	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
-	WORD $0x894c; BYTE $0x0a // mov    qword [rdx], r9
-	SUBQ $8, SP
+	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
 	VZEROUPPER
 	RET
 
 DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000
 GLOBL LCDATA3<>(SB), 8, $8
 
-TEXT ·_uint64_max_min_avx2(SB), $296-32
+TEXT ·_uint64_max_min_avx2(SB), $0-32
 
 	MOVQ values+0(FP), DI
 	MOVQ length+8(FP), SI
 	MOVQ minout+16(FP), DX
 	MOVQ maxout+24(FP), CX
-	ADDQ $8, SP
 	LEAQ LCDATA3<>(SB), BP
 
 	WORD $0xf685                               // test    esi, esi
 	JLE  LBB3_1
 	WORD $0x8941; BYTE $0xf0                   // mov    r8d, esi
-	WORD $0xfe83; BYTE $0x1f                   // cmp    esi, 31
-	JA   LBB3_6
-	LONG $0xffc1c749; WORD $0xffff; BYTE $0xff // mov    r9, -1
-	WORD $0x3145; BYTE $0xdb                   // xor    r11d, r11d
-	WORD $0xf631                               // xor    esi, esi
-	JMP  LBB3_4
+	WORD $0xfe83; BYTE $0x0f                   // cmp    esi, 15
+	JA   LBB3_4
+	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
+	WORD $0x3145; BYTE $0xc9                   // xor    r9d, r9d
+	WORD $0x3145; BYTE $0xd2                   // xor    r10d, r10d
+	JMP  LBB3_7
 
 LBB3_1:
-	LONG $0xffc1c749; WORD $0xffff; BYTE $0xff // mov    r9, -1
+	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
 	WORD $0xf631                               // xor    esi, esi
-	JMP  LBB3_14
-
-LBB3_6:
-	WORD $0x8945; BYTE $0xc3       // mov    r11d, r8d
-	LONG $0xe0e38341               // and    r11d, -32
-	LONG $0xe0438d49               // lea    rax, [r11 - 32]
-	WORD $0x8949; BYTE $0xc2       // mov    r10, rax
-	LONG $0x05eac149               // shr    r10, 5
-	LONG $0x01c28349               // add    r10, 1
-	WORD $0x8945; BYTE $0xd1       // mov    r9d, r10d
-	LONG $0x03e18341               // and    r9d, 3
-	LONG $0x60f88348               // cmp    rax, 96
-	JAE  LBB3_8
-	LONG $0xe4efd9c5               // vpxor    xmm4, xmm4, xmm4
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm0
-	WORD $0xc031                   // xor    eax, eax
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm0
-	LONG $0xed76d5c5               // vpcmpeqd    ymm5, ymm5, ymm5
-	LONG $0xff76c5c5               // vpcmpeqd    ymm7, ymm7, ymm7
-	LONG $0x761d41c4; BYTE $0xe4   // vpcmpeqd    ymm12, ymm12, ymm12
-	LONG $0x762d41c4; BYTE $0xd2   // vpcmpeqd    ymm10, ymm10, ymm10
-	LONG $0x762541c4; BYTE $0xdb   // vpcmpeqd    ymm11, ymm11, ymm11
-	LONG $0x761541c4; BYTE $0xed   // vpcmpeqd    ymm13, ymm13, ymm13
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x447ffdc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm0
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x047ffdc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm0
-	LONG $0xdbefe1c5               // vpxor    xmm3, xmm3, xmm3
-	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
-	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
-	LONG $0xef0141c4; BYTE $0xff   // vpxor    xmm15, xmm15, xmm15
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	JMP  LBB3_10
+	JMP  LBB3_8
 
-LBB3_8:
-	LONG $0xfce28349               // and    r10, -4
-	WORD $0xf749; BYTE $0xda       // neg    r10
-	LONG $0xe4efd9c5               // vpxor    xmm4, xmm4, xmm4
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x4024 // vmovdqa    yword [rsp + 64], ymm0
+LBB3_4:
+	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
+	LONG $0xf0e18341               // and    r9d, -16
+	LONG $0xedefd1c5               // vpxor    xmm5, xmm5, xmm5
+	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
 	WORD $0xc031                   // xor    eax, eax
-	LONG $0x597d62c4; WORD $0x0075 // vpbroadcastq    ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */
-	LONG $0xc076fdc5               // vpcmpeqd    ymm0, ymm0, ymm0
-	LONG $0x447ffdc5; WORD $0x6024 // vmovdqa    yword [rsp + 96], ymm0
-	LONG $0xed76d5c5               // vpcmpeqd    ymm5, ymm5, ymm5
-	LONG $0xff76c5c5               // vpcmpeqd    ymm7, ymm7, ymm7
-	LONG $0x761d41c4; BYTE $0xe4   // vpcmpeqd    ymm12, ymm12, ymm12
-	LONG $0x762d41c4; BYTE $0xd2   // vpcmpeqd    ymm10, ymm10, ymm10
-	LONG $0x762541c4; BYTE $0xdb   // vpcmpeqd    ymm11, ymm11, ymm11
-	LONG $0x761541c4; BYTE $0xed   // vpcmpeqd    ymm13, ymm13, ymm13
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x447ffdc5; WORD $0x2024 // vmovdqa    yword [rsp + 32], ymm0
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-	LONG $0x047ffdc5; BYTE $0x24   // vmovdqa    yword [rsp], ymm0
-	LONG $0xdbefe1c5               // vpxor    xmm3, xmm3, xmm3
-	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
+	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */
+	LONG $0xe476ddc5               // vpcmpeqd    ymm4, ymm4, ymm4
+	LONG $0xdb76e5c5               // vpcmpeqd    ymm3, ymm3, ymm3
+	LONG $0xd276edc5               // vpcmpeqd    ymm2, ymm2, ymm2
 	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
-	LONG $0xef0141c4; BYTE $0xff   // vpxor    xmm15, xmm15, xmm15
-	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
-
-LBB3_9:
-	QUAD $0x0000e0c78c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdi + 8*rax + 224]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	LONG $0xf36ffdc5                     // vmovdqa    ymm6, ymm3
-	LONG $0xef15c1c4; BYTE $0xde         // vpxor    ymm3, ymm13, ymm14
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b75c3c4; WORD $0x30dd       // vblendvpd    ymm3, ymm1, ymm13, ymm3
-	QUAD $0x000080249c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm3
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20c0       // vblendvpd    ymm0, ymm1, ymm0, ymm2
-	QUAD $0x0000e0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 224], ymm0
-	QUAD $0x0000c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 192]
-	LONG $0xc8ef8dc5                     // vpxor    ymm1, ymm14, ymm0
-	LONG $0xef25c1c4; BYTE $0xd6         // vpxor    ymm2, ymm11, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b7dc3c4; WORD $0x20d3       // vblendvpd    ymm2, ymm0, ymm11, ymm2
-	QUAD $0x0000a0249429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm2
-	LONG $0xef05c1c4; BYTE $0xd6         // vpxor    ymm2, ymm15, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7dc3c4; WORD $0x10c7       // vblendvpd    ymm0, ymm0, ymm15, ymm1
-	QUAD $0x0000c0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 192], ymm0
-	QUAD $0x0000a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 160]
-	LONG $0xc8ef8dc5                     // vpxor    ymm1, ymm14, ymm0
-	LONG $0xef2dc1c4; BYTE $0xd6         // vpxor    ymm2, ymm10, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x6f7dc1c4; BYTE $0xd8         // vmovdqa    ymm3, ymm8
-	LONG $0x4b7d43c4; WORD $0x20c2       // vblendvpd    ymm8, ymm0, ymm10, ymm2
-	LONG $0xd3ef8dc5                     // vpxor    ymm2, ymm14, ymm3
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10eb       // vblendvpd    ymm13, ymm0, ymm3, ymm1
-	QUAD $0x000080c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 128]
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0xef1dc1c4; BYTE $0xce         // vpxor    ymm1, ymm12, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7dc3c4; WORD $0x10cc       // vblendvpd    ymm1, ymm0, ymm12, ymm1
-	LONG $0xef35c1c4; BYTE $0xde         // vpxor    ymm3, ymm9, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b7d43c4; WORD $0x20e1       // vblendvpd    ymm12, ymm0, ymm9, ymm2
-	LONG $0x546ffec5; WORD $0x60c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 96]
-	LONG $0xc7ef8dc5                     // vpxor    ymm0, ymm14, ymm7
-	LONG $0xdaef8dc5                     // vpxor    ymm3, ymm14, ymm2
-	LONG $0x3765e2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm3, ymm0
-	LONG $0x4b6de3c4; WORD $0x00c7       // vblendvpd    ymm0, ymm2, ymm7, ymm0
-	LONG $0xfc6f7dc5                     // vmovdqa    ymm15, ymm4
-	LONG $0xe6ef8dc5                     // vpxor    ymm4, ymm14, ymm6
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6d63c4; WORD $0x30d6       // vblendvpd    ymm10, ymm2, ymm6, ymm3
-	LONG $0x546ffec5; WORD $0x40c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 64]
-	LONG $0xddef8dc5                     // vpxor    ymm3, ymm14, ymm5
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30ed       // vblendvpd    ymm5, ymm2, ymm5, ymm3
-	LONG $0x346ffdc5; BYTE $0x24         // vmovdqa    ymm6, yword [rsp]
-	LONG $0xdeef8dc5                     // vpxor    ymm3, ymm14, ymm6
-	LONG $0x3765e2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm3, ymm4
-	LONG $0x4b6d63c4; WORD $0x30ce       // vblendvpd    ymm9, ymm2, ymm6, ymm3
-	LONG $0x146ffec5; BYTE $0xc7         // vmovdqu    ymm2, yword [rdi + 8*rax]
-	LONG $0x7c6ffdc5; WORD $0x4024       // vmovdqa    ymm7, yword [rsp + 64]
-	LONG $0xdfef8dc5                     // vpxor    ymm3, ymm14, ymm7
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30df       // vblendvpd    ymm3, ymm2, ymm7, ymm3
-	LONG $0xef0541c4; BYTE $0xde         // vpxor    ymm11, ymm15, ymm14
-	LONG $0x3725e2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm11, ymm4
-	LONG $0x4b6dc3c4; WORD $0x40e7       // vblendvpd    ymm4, ymm2, ymm15, ymm4
-	LONG $0x546ffec5; WORD $0x20c7       // vmovdqu    ymm2, yword [rdi + 8*rax + 32]
-	LONG $0x7c6f7dc5; WORD $0x6024       // vmovdqa    ymm15, yword [rsp + 96]
-	LONG $0xef0541c4; BYTE $0xde         // vpxor    ymm11, ymm15, ymm14
-	LONG $0xfaef8dc5                     // vpxor    ymm7, ymm14, ymm2
-	LONG $0x374542c4; BYTE $0xdb         // vpcmpgtq    ymm11, ymm7, ymm11
-	LONG $0x4b6d43c4; WORD $0xb0df       // vblendvpd    ymm11, ymm2, ymm15, ymm11
-	LONG $0x746ffdc5; WORD $0x2024       // vmovdqa    ymm6, yword [rsp + 32]
-	LONG $0xfeef0dc5                     // vpxor    ymm15, ymm14, ymm6
-	LONG $0x3705e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm15, ymm7
-	LONG $0x4b6de3c4; WORD $0x70d6       // vblendvpd    ymm2, ymm2, ymm6, ymm7
-	QUAD $0x000120c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 288]
-	LONG $0x5725c1c4; BYTE $0xfe         // vxorpd    ymm7, ymm11, ymm14
-	LONG $0xfeef0dc5                     // vpxor    ymm15, ymm14, ymm6
-	LONG $0x3705e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm15, ymm7
-	LONG $0x4b4dc3c4; WORD $0x70fb       // vblendvpd    ymm7, ymm6, ymm11, ymm7
-	LONG $0x7c29fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm7
-	LONG $0xfa578dc5                     // vxorpd    ymm7, ymm14, ymm2
-	LONG $0x3745c2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm7, ymm15
-	LONG $0x4b4de3c4; WORD $0x70d2       // vblendvpd    ymm2, ymm6, ymm2, ymm7
-	LONG $0x5429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm2
-	QUAD $0x000100c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 256]
-	LONG $0xfb578dc5                     // vxorpd    ymm7, ymm14, ymm3
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x3725e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm11, ymm7
-	LONG $0x4b4de3c4; WORD $0x70d3       // vblendvpd    ymm2, ymm6, ymm3, ymm7
-	LONG $0x1429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm2
-	LONG $0xfc578dc5                     // vxorpd    ymm7, ymm14, ymm4
-	LONG $0x3745c2c4; BYTE $0xfb         // vpcmpgtq    ymm7, ymm7, ymm11
-	LONG $0x4b4de3c4; WORD $0x70d4       // vblendvpd    ymm2, ymm6, ymm4, ymm7
-	LONG $0x5429fdc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm2
-	QUAD $0x000140c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 320]
-	LONG $0xfd578dc5                     // vxorpd    ymm7, ymm14, ymm5
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x3725e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm11, ymm7
-	LONG $0x4b4de3c4; WORD $0x70ed       // vblendvpd    ymm5, ymm6, ymm5, ymm7
-	LONG $0x5735c1c4; BYTE $0xfe         // vxorpd    ymm7, ymm9, ymm14
-	LONG $0x3745c2c4; BYTE $0xfb         // vpcmpgtq    ymm7, ymm7, ymm11
-	LONG $0x4b4dc3c4; WORD $0x70f9       // vblendvpd    ymm7, ymm6, ymm9, ymm7
-	QUAD $0x000160c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 352]
-	LONG $0xc8570dc5                     // vxorpd    ymm9, ymm14, ymm0
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x372542c4; BYTE $0xc9         // vpcmpgtq    ymm9, ymm11, ymm9
-	LONG $0x4b4d63c4; WORD $0x90c8       // vblendvpd    ymm9, ymm6, ymm0, ymm9
-	LONG $0x572dc1c4; BYTE $0xc6         // vxorpd    ymm0, ymm10, ymm14
-	LONG $0x377dc2c4; BYTE $0xc3         // vpcmpgtq    ymm0, ymm0, ymm11
-	LONG $0x4b4d43c4; WORD $0x00d2       // vblendvpd    ymm10, ymm6, ymm10, ymm0
-	QUAD $0x000180c7b46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdi + 8*rax + 384]
-	LONG $0xc1578dc5                     // vxorpd    ymm0, ymm14, ymm1
-	LONG $0xdeef0dc5                     // vpxor    ymm11, ymm14, ymm6
-	LONG $0x3725e2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm11, ymm0
-	LONG $0x4b4de3c4; WORD $0x00e1       // vblendvpd    ymm4, ymm6, ymm1, ymm0
-	LONG $0x571dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm12, ymm14
-	LONG $0x3775c2c4; BYTE $0xcb         // vpcmpgtq    ymm1, ymm1, ymm11
-	LONG $0x4b4dc3c4; WORD $0x10dc       // vblendvpd    ymm3, ymm6, ymm12, ymm1
-	QUAD $0x0001a0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 416]
-	LONG $0x573dc1c4; BYTE $0xf6         // vxorpd    ymm6, ymm8, ymm14
-	LONG $0xef2541c4; BYTE $0xe6         // vpxor    ymm12, ymm11, ymm14
-	LONG $0x371de2c4; BYTE $0xf6         // vpcmpgtq    ymm6, ymm12, ymm6
-	LONG $0x4b25c3c4; WORD $0x60f0       // vblendvpd    ymm6, ymm11, ymm8, ymm6
-	LONG $0x571541c4; BYTE $0xc6         // vxorpd    ymm8, ymm13, ymm14
-	LONG $0x373d42c4; BYTE $0xc4         // vpcmpgtq    ymm8, ymm8, ymm12
-	LONG $0x4b2543c4; WORD $0x80e5       // vblendvpd    ymm12, ymm11, ymm13, ymm8
-	QUAD $0x0001c0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 448]
-	QUAD $0x0000a024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 160]
-	LONG $0xc0ef0dc5                     // vpxor    ymm8, ymm14, ymm0
-	LONG $0xef2541c4; BYTE $0xee         // vpxor    ymm13, ymm11, ymm14
-	LONG $0x371542c4; BYTE $0xc0         // vpcmpgtq    ymm8, ymm13, ymm8
-	LONG $0x4b2563c4; WORD $0x80c0       // vblendvpd    ymm8, ymm11, ymm0, ymm8
-	QUAD $0x0000c024846ffdc5; BYTE $0x00 // vmovdqa    ymm0, yword [rsp + 192]
-	LONG $0xf8ef0dc5                     // vpxor    ymm15, ymm14, ymm0
-	LONG $0x370542c4; BYTE $0xed         // vpcmpgtq    ymm13, ymm15, ymm13
-	LONG $0x4b2563c4; WORD $0xd0e8       // vblendvpd    ymm13, ymm11, ymm0, ymm13
-	QUAD $0x0001e0c79c6f7ec5; BYTE $0x00 // vmovdqu    ymm11, yword [rdi + 8*rax + 480]
-	QUAD $0x000080248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 128]
-	LONG $0xf9ef0dc5                     // vpxor    ymm15, ymm14, ymm1
-	LONG $0xef25c1c4; BYTE $0xc6         // vpxor    ymm0, ymm11, ymm14
-	LONG $0x377d42c4; BYTE $0xff         // vpcmpgtq    ymm15, ymm0, ymm15
-	LONG $0x4b25e3c4; WORD $0xf0c9       // vblendvpd    ymm1, ymm11, ymm1, ymm15
-	QUAD $0x0000e024946ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword [rsp + 224]
-	LONG $0xfaef0dc5                     // vpxor    ymm15, ymm14, ymm2
-	LONG $0x3705e2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm15, ymm0
-	LONG $0x4b2563c4; WORD $0x00fa       // vblendvpd    ymm15, ymm11, ymm2, ymm0
-	QUAD $0x0002e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 736]
-	LONG $0xd9570dc5                     // vxorpd    ymm11, ymm14, ymm1
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376d42c4; BYTE $0xdb         // vpcmpgtq    ymm11, ymm2, ymm11
-	LONG $0x4b7de3c4; WORD $0xb0c9       // vblendvpd    ymm1, ymm0, ymm1, ymm11
-	QUAD $0x000080248c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm1
-	LONG $0x5705c1c4; BYTE $0xce         // vxorpd    ymm1, ymm15, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7dc3c4; WORD $0x10c7       // vblendvpd    ymm0, ymm0, ymm15, ymm1
-	QUAD $0x0000e0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 224], ymm0
-	QUAD $0x0002c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 704]
-	LONG $0x573dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm8, ymm14
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7dc3c4; WORD $0x10c8       // vblendvpd    ymm1, ymm0, ymm8, ymm1
-	QUAD $0x0000a0248c29fdc5; BYTE $0x00 // vmovapd    yword [rsp + 160], ymm1
-	LONG $0x5715c1c4; BYTE $0xce         // vxorpd    ymm1, ymm13, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7dc3c4; WORD $0x10c5       // vblendvpd    ymm0, ymm0, ymm13, ymm1
-	QUAD $0x0000c0248429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 192], ymm0
-	QUAD $0x0002a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 672]
-	LONG $0xce578dc5                     // vxorpd    ymm1, ymm14, ymm6
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10fe       // vblendvpd    ymm15, ymm0, ymm6, ymm1
-	LONG $0x571dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm12, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d43c4; WORD $0x10ec       // vblendvpd    ymm13, ymm0, ymm12, ymm1
-	QUAD $0x000280c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 640]
-	LONG $0xcc578dc5                     // vxorpd    ymm1, ymm14, ymm4
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10e4       // vblendvpd    ymm12, ymm0, ymm4, ymm1
-	LONG $0xcb578dc5                     // vxorpd    ymm1, ymm14, ymm3
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d63c4; WORD $0x10c3       // vblendvpd    ymm8, ymm0, ymm3, ymm1
-	QUAD $0x000260c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 608]
-	LONG $0x5735c1c4; BYTE $0xce         // vxorpd    ymm1, ymm9, ymm14
-	LONG $0xdaef8dc5                     // vpxor    ymm3, ymm14, ymm2
-	LONG $0x3765e2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm3, ymm1
-	LONG $0x4b6dc3c4; WORD $0x10c9       // vblendvpd    ymm1, ymm2, ymm9, ymm1
-	LONG $0x572dc1c4; BYTE $0xe6         // vxorpd    ymm4, ymm10, ymm14
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6d43c4; WORD $0x30d2       // vblendvpd    ymm10, ymm2, ymm10, ymm3
-	QUAD $0x000240c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 576]
-	LONG $0xdd578dc5                     // vxorpd    ymm3, ymm14, ymm5
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30ed       // vblendvpd    ymm5, ymm2, ymm5, ymm3
-	LONG $0xdf578dc5                     // vxorpd    ymm3, ymm14, ymm7
-	LONG $0x3765e2c4; BYTE $0xdc         // vpcmpgtq    ymm3, ymm3, ymm4
-	LONG $0x4b6d63c4; WORD $0x30cf       // vblendvpd    ymm9, ymm2, ymm7, ymm3
-	QUAD $0x000200c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 512]
-	LONG $0x046ffdc5; BYTE $0x24         // vmovdqa    ymm0, yword [rsp]
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0xe2ef8dc5                     // vpxor    ymm4, ymm14, ymm2
-	LONG $0x375de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm4, ymm3
-	LONG $0x4b6de3c4; WORD $0x30d8       // vblendvpd    ymm3, ymm2, ymm0, ymm3
-	LONG $0x446ffdc5; WORD $0x2024       // vmovdqa    ymm0, yword [rsp + 32]
-	LONG $0xf0ef8dc5                     // vpxor    ymm6, ymm14, ymm0
-	LONG $0x374de2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm6, ymm4
-	LONG $0x4b6de3c4; WORD $0x40e0       // vblendvpd    ymm4, ymm2, ymm0, ymm4
-	QUAD $0x000220c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 544]
-	LONG $0x446ffdc5; WORD $0x6024       // vmovdqa    ymm0, yword [rsp + 96]
-	LONG $0xf0ef8dc5                     // vpxor    ymm6, ymm14, ymm0
-	LONG $0xfaef8dc5                     // vpxor    ymm7, ymm14, ymm2
-	LONG $0x3745e2c4; BYTE $0xf6         // vpcmpgtq    ymm6, ymm7, ymm6
-	LONG $0x4b6de3c4; WORD $0x60f0       // vblendvpd    ymm6, ymm2, ymm0, ymm6
-	LONG $0x446ffdc5; WORD $0x4024       // vmovdqa    ymm0, yword [rsp + 64]
-	LONG $0xd8ef0dc5                     // vpxor    ymm11, ymm14, ymm0
-	LONG $0x3725e2c4; BYTE $0xff         // vpcmpgtq    ymm7, ymm11, ymm7
-	LONG $0x4b6de3c4; WORD $0x70d0       // vblendvpd    ymm2, ymm2, ymm0, ymm7
-	QUAD $0x000320c7bc6ffec5; BYTE $0x00 // vmovdqu    ymm7, yword [rdi + 8*rax + 800]
-	LONG $0xde570dc5                     // vxorpd    ymm11, ymm14, ymm6
-	LONG $0xc7ef8dc5                     // vpxor    ymm0, ymm14, ymm7
-	LONG $0x377d42c4; BYTE $0xdb         // vpcmpgtq    ymm11, ymm0, ymm11
-	LONG $0x4b45e3c4; WORD $0xb0f6       // vblendvpd    ymm6, ymm7, ymm6, ymm11
-	LONG $0x7429fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm6
-	LONG $0xf2578dc5                     // vxorpd    ymm6, ymm14, ymm2
-	LONG $0x374de2c4; BYTE $0xc0         // vpcmpgtq    ymm0, ymm6, ymm0
-	LONG $0x4b45e3c4; WORD $0x00c2       // vblendvpd    ymm0, ymm7, ymm2, ymm0
-	LONG $0x4429fdc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm0
-	QUAD $0x000300c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 768]
-	LONG $0xd3578dc5                     // vxorpd    ymm2, ymm14, ymm3
-	LONG $0xf8ef8dc5                     // vpxor    ymm7, ymm14, ymm0
-	LONG $0x3745e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm7, ymm2
-	LONG $0x4b7de3c4; WORD $0x20d3       // vblendvpd    ymm2, ymm0, ymm3, ymm2
-	LONG $0x5429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm2
-	LONG $0xd4578dc5                     // vxorpd    ymm2, ymm14, ymm4
-	LONG $0x376de2c4; BYTE $0xd7         // vpcmpgtq    ymm2, ymm2, ymm7
-	LONG $0x4b7de3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm0, ymm4, ymm2
-	QUAD $0x000340c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 832]
-	LONG $0xd5578dc5                     // vxorpd    ymm2, ymm14, ymm5
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b7de3c4; WORD $0x20ed       // vblendvpd    ymm5, ymm0, ymm5, ymm2
-	LONG $0x5735c1c4; BYTE $0xd6         // vxorpd    ymm2, ymm9, ymm14
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b7dc3c4; WORD $0x20c1       // vblendvpd    ymm0, ymm0, ymm9, ymm2
-	LONG $0x0429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm0
-	QUAD $0x000360c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 864]
-	LONG $0xd1578dc5                     // vxorpd    ymm2, ymm14, ymm1
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b7de3c4; WORD $0x20f9       // vblendvpd    ymm7, ymm0, ymm1, ymm2
-	LONG $0x572dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm10, ymm14
-	LONG $0x3775e2c4; BYTE $0xcb         // vpcmpgtq    ymm1, ymm1, ymm3
-	LONG $0x4b7dc3c4; WORD $0x10da       // vblendvpd    ymm3, ymm0, ymm10, ymm1
-	QUAD $0x000380c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 896]
-	LONG $0x571dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm12, ymm14
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d43c4; WORD $0x10e4       // vblendvpd    ymm12, ymm0, ymm12, ymm1
-	LONG $0x573dc1c4; BYTE $0xce         // vxorpd    ymm1, ymm8, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d43c4; WORD $0x10c8       // vblendvpd    ymm9, ymm0, ymm8, ymm1
-	QUAD $0x0003a0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 928]
-	LONG $0x5705c1c4; BYTE $0xce         // vxorpd    ymm1, ymm15, ymm14
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d43c4; WORD $0x10d7       // vblendvpd    ymm10, ymm0, ymm15, ymm1
-	LONG $0x5715c1c4; BYTE $0xce         // vxorpd    ymm1, ymm13, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d43c4; WORD $0x10c5       // vblendvpd    ymm8, ymm0, ymm13, ymm1
-	QUAD $0x0003c0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 960]
-	QUAD $0x0000a024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 160]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10de       // vblendvpd    ymm11, ymm0, ymm6, ymm1
-	QUAD $0x0000c024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 192]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7d63c4; WORD $0x10fe       // vblendvpd    ymm15, ymm0, ymm6, ymm1
-	QUAD $0x0003e0c7846ffec5; BYTE $0x00 // vmovdqu    ymm0, yword [rdi + 8*rax + 992]
-	QUAD $0x00008024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 128]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0xd0ef8dc5                     // vpxor    ymm2, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b7d63c4; WORD $0x10ee       // vblendvpd    ymm13, ymm0, ymm6, ymm1
-	QUAD $0x0000e024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 224]
-	LONG $0xceef8dc5                     // vpxor    ymm1, ymm14, ymm6
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b7de3c4; WORD $0x10c6       // vblendvpd    ymm0, ymm0, ymm6, ymm1
-	LONG $0x80e88348                     // sub    rax, -128
-	LONG $0x04c28349                     // add    r10, 4
-	JNE  LBB3_9
-
-LBB3_10:
-	QUAD $0x0000802494297cc5; BYTE $0x00 // vmovaps    yword [rsp + 128], ymm10
-	WORD $0x854d; BYTE $0xc9             // test    r9, r9
-	LONG $0x6f7d41c4; BYTE $0xd4         // vmovdqa    ymm10, ymm12
-	LONG $0xe36f7dc5                     // vmovdqa    ymm12, ymm3
-	JE   LBB3_13
-	LONG $0xc7048d48                     // lea    rax, [rdi + 8*rax]
-	WORD $0xf749; BYTE $0xd9             // neg    r9
-	LONG $0x597d62c4; WORD $0x0075       // vpbroadcastq    ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */
-
-LBB3_12:
-	LONG $0x486ffec5; BYTE $0x20         // vmovdqu    ymm1, yword [rax + 32]
-	LONG $0xf76ffdc5                     // vmovdqa    ymm6, ymm7
-	LONG $0xfd6ffdc5                     // vmovdqa    ymm7, ymm5
-	LONG $0xec6ffdc5                     // vmovdqa    ymm5, ymm4
-	LONG $0x646ffdc5; WORD $0x6024       // vmovdqa    ymm4, yword [rsp + 96]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xd9ef8dc5                     // vpxor    ymm3, ymm14, ymm1
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0x6429fdc5; WORD $0x6024       // vmovapd    yword [rsp + 96], ymm4
-	LONG $0x646ffdc5; WORD $0x2024       // vmovdqa    ymm4, yword [rsp + 32]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0x6429fdc5; WORD $0x2024       // vmovapd    yword [rsp + 32], ymm4
-	LONG $0x086ffec5                     // vmovdqu    ymm1, yword [rax]
-	LONG $0x646ffdc5; WORD $0x4024       // vmovdqa    ymm4, yword [rsp + 64]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xd9ef8dc5                     // vpxor    ymm3, ymm14, ymm1
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0x6429fdc5; WORD $0x4024       // vmovapd    yword [rsp + 64], ymm4
-	LONG $0xe56ffdc5                     // vmovdqa    ymm4, ymm5
-	LONG $0xef6ffdc5                     // vmovdqa    ymm5, ymm7
-	LONG $0xfe6ffdc5                     // vmovdqa    ymm7, ymm6
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x586ffec5; BYTE $0x40         // vmovdqu    ymm3, yword [rax + 64]
-	LONG $0x4b75e3c4; WORD $0x20e4       // vblendvpd    ymm4, ymm1, ymm4, ymm2
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xd5ef8dc5                     // vpxor    ymm2, ymm14, ymm5
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b65e3c4; WORD $0x20ed       // vblendvpd    ymm5, ymm3, ymm5, ymm2
-	LONG $0x346ffdc5; BYTE $0x24         // vmovdqa    ymm6, yword [rsp]
-	LONG $0xd6ef8dc5                     // vpxor    ymm2, ymm14, ymm6
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b65e3c4; WORD $0x10f6       // vblendvpd    ymm6, ymm3, ymm6, ymm1
-	LONG $0x3429fdc5; BYTE $0x24         // vmovapd    yword [rsp], ymm6
-	LONG $0x486ffec5; BYTE $0x60         // vmovdqu    ymm1, yword [rax + 96]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	LONG $0xdfef8dc5                     // vpxor    ymm3, ymm14, ymm7
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b75e3c4; WORD $0x30ff       // vblendvpd    ymm7, ymm1, ymm7, ymm3
-	LONG $0xef1dc1c4; BYTE $0xde         // vpxor    ymm3, ymm12, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	QUAD $0x00000080986ffec5             // vmovdqu    ymm3, yword [rax + 128]
-	LONG $0x4b7543c4; WORD $0x20e4       // vblendvpd    ymm12, ymm1, ymm12, ymm2
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xef2dc1c4; BYTE $0xd6         // vpxor    ymm2, ymm10, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b6543c4; WORD $0x20d2       // vblendvpd    ymm10, ymm3, ymm10, ymm2
-	LONG $0xef35c1c4; BYTE $0xd6         // vpxor    ymm2, ymm9, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b6543c4; WORD $0x10c9       // vblendvpd    ymm9, ymm3, ymm9, ymm1
-	QUAD $0x000000a0886ffec5             // vmovdqu    ymm1, yword [rax + 160]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	QUAD $0x00008024b46ffdc5; BYTE $0x00 // vmovdqa    ymm6, yword [rsp + 128]
-	LONG $0xdeef8dc5                     // vpxor    ymm3, ymm14, ymm6
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b75e3c4; WORD $0x30f6       // vblendvpd    ymm6, ymm1, ymm6, ymm3
-	QUAD $0x00008024b429fdc5; BYTE $0x00 // vmovapd    yword [rsp + 128], ymm6
-	LONG $0xef3dc1c4; BYTE $0xde         // vpxor    ymm3, ymm8, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	QUAD $0x000000c0986ffec5             // vmovdqu    ymm3, yword [rax + 192]
-	LONG $0x4b7543c4; WORD $0x20c0       // vblendvpd    ymm8, ymm1, ymm8, ymm2
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xef25c1c4; BYTE $0xd6         // vpxor    ymm2, ymm11, ymm14
-	LONG $0x3775e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm1, ymm2
-	LONG $0x4b6543c4; WORD $0x20db       // vblendvpd    ymm11, ymm3, ymm11, ymm2
-	LONG $0xef05c1c4; BYTE $0xd6         // vpxor    ymm2, ymm15, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b6543c4; WORD $0x10ff       // vblendvpd    ymm15, ymm3, ymm15, ymm1
-	QUAD $0x000000e0886ffec5             // vmovdqu    ymm1, yword [rax + 224]
-	LONG $0xd1ef8dc5                     // vpxor    ymm2, ymm14, ymm1
-	LONG $0xef15c1c4; BYTE $0xde         // vpxor    ymm3, ymm13, ymm14
-	LONG $0x376de2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm2, ymm3
-	LONG $0x4b7543c4; WORD $0x30ed       // vblendvpd    ymm13, ymm1, ymm13, ymm3
-	LONG $0xd8ef8dc5                     // vpxor    ymm3, ymm14, ymm0
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b75e3c4; WORD $0x20c0       // vblendvpd    ymm0, ymm1, ymm0, ymm2
-	LONG $0x01000548; WORD $0x0000       // add    rax, 256
-	WORD $0xff49; BYTE $0xc1             // inc    r9
-	JNE  LBB3_12
-
-LBB3_13:
-	LONG $0x597d62c4; WORD $0x0075       // vpbroadcastq    ymm14, qword 0[rbp] /* [rip + .LCPI3_0] */
-	LONG $0x1c6ffdc5; BYTE $0x24         // vmovdqa    ymm3, yword [rsp]
-	LONG $0xcbef8dc5                     // vpxor    ymm1, ymm14, ymm3
-	LONG $0xef05c1c4; BYTE $0xd6         // vpxor    ymm2, ymm15, ymm14
-	LONG $0x3775e2c4; BYTE $0xca         // vpcmpgtq    ymm1, ymm1, ymm2
-	LONG $0x4b05e3c4; WORD $0x10cb       // vblendvpd    ymm1, ymm15, ymm3, ymm1
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xef35c1c4; BYTE $0xde         // vpxor    ymm3, ymm9, ymm14
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b35e3c4; WORD $0x20d4       // vblendvpd    ymm2, ymm9, ymm4, ymm2
-	LONG $0xef1dc1c4; BYTE $0xde         // vpxor    ymm3, ymm12, ymm14
-	LONG $0xc8ef0dc5                     // vpxor    ymm9, ymm14, ymm0
-	LONG $0x3765c2c4; BYTE $0xd9         // vpcmpgtq    ymm3, ymm3, ymm9
-	LONG $0x4b7dc3c4; WORD $0x30c4       // vblendvpd    ymm0, ymm0, ymm12, ymm3
-	LONG $0x646ffdc5; WORD $0x2024       // vmovdqa    ymm4, yword [rsp + 32]
-	LONG $0xdcef8dc5                     // vpxor    ymm3, ymm14, ymm4
-	LONG $0xef3d41c4; BYTE $0xce         // vpxor    ymm9, ymm8, ymm14
-	LONG $0x3765c2c4; BYTE $0xd9         // vpcmpgtq    ymm3, ymm3, ymm9
-	LONG $0x4b3de3c4; WORD $0x30dc       // vblendvpd    ymm3, ymm8, ymm4, ymm3
-	LONG $0xf3578dc5                     // vxorpd    ymm6, ymm14, ymm3
-	LONG $0xc8570dc5                     // vxorpd    ymm9, ymm14, ymm0
-	LONG $0x374dc2c4; BYTE $0xf1         // vpcmpgtq    ymm6, ymm6, ymm9
-	LONG $0x4b7de3c4; WORD $0x60c3       // vblendvpd    ymm0, ymm0, ymm3, ymm6
-	LONG $0xda578dc5                     // vxorpd    ymm3, ymm14, ymm2
-	LONG $0xf1578dc5                     // vxorpd    ymm6, ymm14, ymm1
-	LONG $0x3765e2c4; BYTE $0xde         // vpcmpgtq    ymm3, ymm3, ymm6
-	LONG $0x4b75e3c4; WORD $0x30ca       // vblendvpd    ymm1, ymm1, ymm2, ymm3
-	LONG $0xd1578dc5                     // vxorpd    ymm2, ymm14, ymm1
-	LONG $0xd8578dc5                     // vxorpd    ymm3, ymm14, ymm0
-	LONG $0x376de2c4; BYTE $0xd3         // vpcmpgtq    ymm2, ymm2, ymm3
-	LONG $0x4b7de3c4; WORD $0x20c1       // vblendvpd    ymm0, ymm0, ymm1, ymm2
-	LONG $0x197de3c4; WORD $0x01c1       // vextractf128    xmm1, ymm0, 1
-	LONG $0xd15789c5                     // vxorpd    xmm2, xmm14, xmm1
-	LONG $0xd85789c5                     // vxorpd    xmm3, xmm14, xmm0
-	LONG $0x3761e2c4; BYTE $0xd2         // vpcmpgtq    xmm2, xmm3, xmm2
-	LONG $0x4b71e3c4; WORD $0x20c0       // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0x0479e3c4; WORD $0x4ec8       // vpermilps    xmm1, xmm0, 78
-	LONG $0xd05789c5                     // vxorpd    xmm2, xmm14, xmm0
-	LONG $0xd95789c5                     // vxorpd    xmm3, xmm14, xmm1
-	LONG $0x3769e2c4; BYTE $0xd3         // vpcmpgtq    xmm2, xmm2, xmm3
-	LONG $0x4b71e3c4; WORD $0x20c0       // vblendvpd    xmm0, xmm1, xmm0, xmm2
-	LONG $0xcdef8dc5                     // vpxor    ymm1, ymm14, ymm5
-	LONG $0xef25c1c4; BYTE $0xd6         // vpxor    ymm2, ymm11, ymm14
-	LONG $0x376de2c4; BYTE $0xc9         // vpcmpgtq    ymm1, ymm2, ymm1
-	LONG $0x4b25e3c4; WORD $0x10cd       // vblendvpd    ymm1, ymm11, ymm5, ymm1
-	LONG $0x646ffdc5; WORD $0x4024       // vmovdqa    ymm4, yword [rsp + 64]
-	LONG $0xd4ef8dc5                     // vpxor    ymm2, ymm14, ymm4
-	LONG $0xef2dc1c4; BYTE $0xde         // vpxor    ymm3, ymm10, ymm14
-	LONG $0x3765e2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm3, ymm2
-	LONG $0x4b2de3c4; WORD $0x20d4       // vblendvpd    ymm2, ymm10, ymm4, ymm2
-	LONG $0xdfef8dc5                     // vpxor    ymm3, ymm14, ymm7
-	LONG $0xef15c1c4; BYTE $0xee         // vpxor    ymm5, ymm13, ymm14
-	LONG $0x3755e2c4; BYTE $0xdb         // vpcmpgtq    ymm3, ymm5, ymm3
-	LONG $0x4b15e3c4; WORD $0x30df       // vblendvpd    ymm3, ymm13, ymm7, ymm3
-	LONG $0x746ffdc5; WORD $0x6024       // vmovdqa    ymm6, yword [rsp + 96]
-	LONG $0xe6ef8dc5                     // vpxor    ymm4, ymm14, ymm6
-	QUAD $0x00008024bc6ffdc5; BYTE $0x00 // vmovdqa    ymm7, yword [rsp + 128]
-	LONG $0xefef8dc5                     // vpxor    ymm5, ymm14, ymm7
-	LONG $0x3755e2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm5, ymm4
-	LONG $0x4b45e3c4; WORD $0x40e6       // vblendvpd    ymm4, ymm7, ymm6, ymm4
-	LONG $0xec578dc5                     // vxorpd    ymm5, ymm14, ymm4
-	LONG $0xf3578dc5                     // vxorpd    ymm6, ymm14, ymm3
-	LONG $0x374de2c4; BYTE $0xed         // vpcmpgtq    ymm5, ymm6, ymm5
-	LONG $0x4b65e3c4; WORD $0x50dc       // vblendvpd    ymm3, ymm3, ymm4, ymm5
-	LONG $0xe2578dc5                     // vxorpd    ymm4, ymm14, ymm2
-	LONG $0xe9578dc5                     // vxorpd    ymm5, ymm14, ymm1
-	LONG $0x3755e2c4; BYTE $0xe4         // vpcmpgtq    ymm4, ymm5, ymm4
-	LONG $0x4b75e3c4; WORD $0x40ca       // vblendvpd    ymm1, ymm1, ymm2, ymm4
-	LONG $0xd1578dc5                     // vxorpd    ymm2, ymm14, ymm1
-	LONG $0xe3578dc5                     // vxorpd    ymm4, ymm14, ymm3
-	LONG $0x375de2c4; BYTE $0xd2         // vpcmpgtq    ymm2, ymm4, ymm2
-	LONG $0x4b65e3c4; WORD $0x20c9       // vblendvpd    ymm1, ymm3, ymm1, ymm2
-	LONG $0x197de3c4; WORD $0x01ca       // vextractf128    xmm2, ymm1, 1
-	LONG $0xd95789c5                     // vxorpd    xmm3, xmm14, xmm1
-	LONG $0xe25789c5                     // vxorpd    xmm4, xmm14, xmm2
-	LONG $0x3759e2c4; BYTE $0xdb         // vpcmpgtq    xmm3, xmm4, xmm3
-	LONG $0x4b69e3c4; WORD $0x30c9       // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x0479e3c4; WORD $0x4ed1       // vpermilps    xmm2, xmm1, 78
-	LONG $0xd95789c5                     // vxorpd    xmm3, xmm14, xmm1
-	LONG $0xe25789c5                     // vxorpd    xmm4, xmm14, xmm2
-	LONG $0x3759e2c4; BYTE $0xdb         // vpcmpgtq    xmm3, xmm4, xmm3
-	LONG $0x4b69e3c4; WORD $0x30c9       // vblendvpd    xmm1, xmm2, xmm1, xmm3
-	LONG $0x7ef9e1c4; BYTE $0xc6         // vmovq    rsi, xmm0
-	LONG $0x7ef9c1c4; BYTE $0xc9         // vmovq    r9, xmm1
-	WORD $0x394d; BYTE $0xc3             // cmp    r11, r8
-	JE   LBB3_14
-
-LBB3_4:
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
+	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
+	LONG $0xf6efc9c5               // vpxor    xmm6, xmm6, xmm6
 
 LBB3_5:
-	LONG $0xdf348b4a         // mov    rsi, qword [rdi + 8*r11]
-	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
-	LONG $0xce430f4c         // cmovae    r9, rsi
-	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
-	LONG $0xf0470f48         // cmova    rsi, rax
-	LONG $0x01c38349         // add    r11, 1
-	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
-	WORD $0x394d; BYTE $0xd8 // cmp    r8, r11
+	LONG $0x0c6f7ec5; BYTE $0xc7   // vmovdqu    ymm9, yword [rdi + 8*rax]
+	LONG $0xd0ef75c5               // vpxor    ymm10, ymm1, ymm0
+	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
+	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
+	LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd    ymm1, ymm9, ymm1, ymm10
+	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
+	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
+	LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd    ymm5, ymm9, ymm5, ymm10
+	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
+	LONG $0xd0ef5dc5               // vpxor    ymm10, ymm4, ymm0
+	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
+	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
+	LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd    ymm4, ymm9, ymm4, ymm10
+	LONG $0xd0ef3dc5               // vpxor    ymm10, ymm8, ymm0
+	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
+	LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 64]
+	LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd    ymm8, ymm9, ymm8, ymm10
+	LONG $0xc8ef65c5               // vpxor    ymm9, ymm3, ymm0
+	LONG $0xd0ef25c5               // vpxor    ymm10, ymm11, ymm0
+	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
+	LONG $0x4b25e3c4; WORD $0x90db // vblendvpd    ymm3, ymm11, ymm3, ymm9
+	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
+	LONG $0x373542c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm9, ymm10
+	LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd    ymm7, ymm11, ymm7, ymm9
+	LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 96]
+	LONG $0xd0ef6dc5               // vpxor    ymm10, ymm2, ymm0
+	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
+	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
+	LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd    ymm2, ymm9, ymm2, ymm10
+	LONG $0xd0ef4dc5               // vpxor    ymm10, ymm6, ymm0
+	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
+	LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd    ymm6, ymm9, ymm6, ymm10
+	LONG $0x10c08348               // add    rax, 16
+	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
 	JNE  LBB3_5
+	LONG $0xc8ef3dc5               // vpxor    ymm9, ymm8, ymm0
+	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
+	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
+	LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd    ymm5, ymm8, ymm5, ymm9
+	LONG $0xc05755c5               // vxorpd    ymm8, ymm5, ymm0
+	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
+	LONG $0x373d42c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm8, ymm9
+	LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm7, ymm5, ymm8
+	LONG $0xf857d5c5               // vxorpd    ymm7, ymm5, ymm0
+	LONG $0xc0ef4dc5               // vpxor    ymm8, ymm6, ymm0
+	LONG $0x3745c2c4; BYTE $0xf8   // vpcmpgtq    ymm7, ymm7, ymm8
+	LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd    ymm5, ymm6, ymm5, ymm7
+	LONG $0x197de3c4; WORD $0x01ee // vextractf128    xmm6, ymm5, 1
+	LONG $0xc05749c5               // vxorpd    xmm8, xmm6, xmm0
+	LONG $0xf857d1c5               // vxorpd    xmm7, xmm5, xmm0
+	LONG $0x3741c2c4; BYTE $0xf8   // vpcmpgtq    xmm7, xmm7, xmm8
+	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
+	LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps    xmm6, xmm5, 78
+	LONG $0xc05751c5               // vxorpd    xmm8, xmm5, xmm0
+	LONG $0xf857c9c5               // vxorpd    xmm7, xmm6, xmm0
+	LONG $0x3739e2c4; BYTE $0xff   // vpcmpgtq    xmm7, xmm8, xmm7
+	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
+	LONG $0xf0eff5c5               // vpxor    ymm6, ymm1, ymm0
+	LONG $0xf8efddc5               // vpxor    ymm7, ymm4, ymm0
+	LONG $0x3745e2c4; BYTE $0xf6   // vpcmpgtq    ymm6, ymm7, ymm6
+	LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd    ymm1, ymm4, ymm1, ymm6
+	LONG $0xe057f5c5               // vxorpd    ymm4, ymm1, ymm0
+	LONG $0xf0efe5c5               // vpxor    ymm6, ymm3, ymm0
+	LONG $0x374de2c4; BYTE $0xe4   // vpcmpgtq    ymm4, ymm6, ymm4
+	LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm3, ymm1, ymm4
+	LONG $0x7ef9c1c4; BYTE $0xea   // vmovq    r10, xmm5
+	LONG $0xd857f5c5               // vxorpd    ymm3, ymm1, ymm0
+	LONG $0xe0efedc5               // vpxor    ymm4, ymm2, ymm0
+	LONG $0x375de2c4; BYTE $0xdb   // vpcmpgtq    ymm3, ymm4, ymm3
+	LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd    ymm1, ymm2, ymm1, ymm3
+	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
+	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
+	LONG $0xe057e9c5               // vxorpd    xmm4, xmm2, xmm0
+	LONG $0x3759e2c4; BYTE $0xdb   // vpcmpgtq    xmm3, xmm4, xmm3
+	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
+	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
+	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
+	LONG $0xc057e9c5               // vxorpd    xmm0, xmm2, xmm0
+	LONG $0x3779e2c4; BYTE $0xc3   // vpcmpgtq    xmm0, xmm0, xmm3
+	LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd    xmm0, xmm2, xmm1, xmm0
+	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
+	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
+	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
+	JE   LBB3_8
+
+LBB3_7:
+	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
+	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
+	LONG $0xc6430f48         // cmovae    rax, rsi
+	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
+	LONG $0xf2470f49         // cmova    rsi, r10
+	LONG $0x01c18349         // add    r9, 1
+	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
+	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
+	JNE  LBB3_7
 
-LBB3_14:
+LBB3_8:
 	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
-	WORD $0x894c; BYTE $0x0a // mov    qword [rdx], r9
-	SUBQ $8, SP
+	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
 	VZEROUPPER
 	RET
diff --git a/go/parquet/internal/utils/unpack_bool_avx2_amd64.s b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s
index 99c2cc88265..459ff78675d 100644
--- a/go/parquet/internal/utils/unpack_bool_avx2_amd64.s
+++ b/go/parquet/internal/utils/unpack_bool_avx2_amd64.s
@@ -1,6961 +1,88 @@
 //+build !noasm !appengine
 // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
 
-DATA LCDATA1<>+0x000(SB)/8, $0x0000001900000018
-DATA LCDATA1<>+0x008(SB)/8, $0x0000001b0000001a
-DATA LCDATA1<>+0x010(SB)/8, $0x0000001d0000001c
-DATA LCDATA1<>+0x018(SB)/8, $0x0000001f0000001e
-DATA LCDATA1<>+0x020(SB)/8, $0x0000001100000010
-DATA LCDATA1<>+0x028(SB)/8, $0x0000001300000012
-DATA LCDATA1<>+0x030(SB)/8, $0x0000001500000014
-DATA LCDATA1<>+0x038(SB)/8, $0x0000001700000016
-DATA LCDATA1<>+0x040(SB)/8, $0x0000000900000008
-DATA LCDATA1<>+0x048(SB)/8, $0x0000000b0000000a
-DATA LCDATA1<>+0x050(SB)/8, $0x0000000d0000000c
-DATA LCDATA1<>+0x058(SB)/8, $0x0000000f0000000e
-DATA LCDATA1<>+0x060(SB)/8, $0x0000000100000000
-DATA LCDATA1<>+0x068(SB)/8, $0x0000000300000002
-DATA LCDATA1<>+0x070(SB)/8, $0x0000000500000004
-DATA LCDATA1<>+0x078(SB)/8, $0x0000000700000006
-DATA LCDATA1<>+0x080(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x088(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x090(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x098(SB)/8, $0x0101010101010101
-DATA LCDATA1<>+0x0a0(SB)/8, $0x0000000000000001
-DATA LCDATA1<>+0x0a8(SB)/8, $0x0000000000000002
-DATA LCDATA1<>+0x0b0(SB)/8, $0x0000000000000003
-DATA LCDATA1<>+0x0b8(SB)/8, $0x0000000000000004
-DATA LCDATA1<>+0x0c0(SB)/8, $0x0000000000000005
-DATA LCDATA1<>+0x0c8(SB)/8, $0x0000000000000006
-DATA LCDATA1<>+0x0d0(SB)/8, $0x0000000000000007
-DATA LCDATA1<>+0x0d8(SB)/8, $0x0000000000000020
-GLOBL LCDATA1<>(SB), 8, $224
-
-TEXT ·_bytes_to_bools_avx2(SB), $1000-32
+TEXT ·_bytes_to_bools_avx2(SB), $0-32
 
 	MOVQ in+0(FP), DI
 	MOVQ len+8(FP), SI
 	MOVQ out+16(FP), DX
 	MOVQ outlen+24(FP), CX
-	MOVQ SP, BP
-	ADDQ $32, SP
-	ANDQ $-32, SP
-	MOVQ BP, 960(SP)
-	LEAQ LCDATA1<>(SB), BP
 
 	WORD $0xf685             // test    esi, esi
-	JLE  LBB0_1051
-	WORD $0x8941; BYTE $0xc9 // mov    r9d, ecx
-	WORD $0x8949; BYTE $0xd0 // mov    r8, rdx
-	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
-	WORD $0xfe83; BYTE $0x20 // cmp    esi, 32
-	JAE  LBB0_3
+	JLE  LBB0_5
+	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
+	LONG $0x03e0c149         // shl    r8, 3
+	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
+	JMP  LBB0_2
+
+LBB0_4:
+	LONG $0x08c28349         // add    r10, 8
+	LONG $0x01c78348         // add    rdi, 1
+	WORD $0x394d; BYTE $0xd0 // cmp    r8, r10
+	JE   LBB0_5
 
 LBB0_2:
-	WORD $0x3145; BYTE $0xe4 // xor    r12d, r12d
-
-LBB0_1055:
-	QUAD $0x00000000e50c8d42 // lea    ecx, [8*r12]
-	JMP  LBB0_1057
-
-LBB0_1056:
-	LONG $0x01c48349         // add    r12, 1
-	WORD $0xc183; BYTE $0x08 // add    ecx, 8
-	WORD $0x394d; BYTE $0xe2 // cmp    r10, r12
-	JE   LBB0_1051
-
-LBB0_1057:
-	WORD $0xca89                 // mov    edx, ecx
-	WORD $0xc989                 // mov    ecx, ecx
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x14b60f42; BYTE $0x27 // movzx    edx, byte [rdi + r12]
-	WORD $0xe280; BYTE $0x01     // and    dl, 1
-	LONG $0x08148841             // mov    byte [r8 + rcx], dl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x01ca8348             // or    rdx, 1
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebd0                 // shr    bl, 1
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x02ca8348             // or    rdx, 2
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x02     // shr    bl, 2
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x03ca8348             // or    rdx, 3
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x03     // shr    bl, 3
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x04ca8348             // or    rdx, 4
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x04     // shr    bl, 4
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x05ca8348             // or    rdx, 5
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x05     // shr    bl, 5
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x06ca8348             // or    rdx, 6
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x06     // shr    bl, 6
-	WORD $0xe380; BYTE $0x01     // and    bl, 1
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	WORD $0x8948; BYTE $0xca     // mov    rdx, rcx
-	LONG $0x07ca8348             // or    rdx, 7
-	WORD $0x3944; BYTE $0xca     // cmp    edx, r9d
-	JGE  LBB0_1056
-	LONG $0x1cb60f42; BYTE $0x27 // movzx    ebx, byte [rdi + r12]
-	WORD $0xebc0; BYTE $0x07     // shr    bl, 7
-	LONG $0x101c8841             // mov    byte [r8 + rdx], bl
-	JMP  LBB0_1056
-
-LBB0_3:
-	LONG $0x244c8944; BYTE $0x10 // mov    dword [rsp + 16], r9d
-	LONG $0x2454894c; BYTE $0x30 // mov    qword [rsp + 48], r10
-	LONG $0xff728d49             // lea    rsi, [r10 - 1]
-	LONG $0x000008b9; BYTE $0x00 // mov    ecx, 8
-	WORD $0xf089                 // mov    eax, esi
-	WORD $0xe1f7                 // mul    ecx
-	LONG $0xd6900f41             // seto    r14b
-	WORD $0x8948; BYTE $0xf3     // mov    rbx, rsi
-	LONG $0x20ebc148             // shr    rbx, 32
-	LONG $0x06488d49             // lea    rcx, [r8 + 6]
-	LONG $0x000008ba; BYTE $0x00 // mov    edx, 8
-	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
-	WORD $0xf748; BYTE $0xe2     // mul    rdx
-	LONG $0xd6900f40             // seto    sil
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	WORD $0x920f; BYTE $0xd2     // setb    dl
-	LONG $0x07488d49             // lea    rcx, [r8 + 7]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd5920f41             // setb    r13b
-	LONG $0x05488d49             // lea    rcx, [r8 + 5]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd1920f41             // setb    r9b
-	LONG $0x04488d49             // lea    rcx, [r8 + 4]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd7920f41             // setb    r15b
-	LONG $0x03488d49             // lea    rcx, [r8 + 3]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd3920f41             // setb    r11b
-	LONG $0x02488d49             // lea    rcx, [r8 + 2]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	LONG $0xd2920f41             // setb    r10b
-	LONG $0x01488d49             // lea    rcx, [r8 + 1]
-	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
-	WORD $0x920f; BYTE $0xd1     // setb    cl
-	WORD $0x014c; BYTE $0xc0     // add    rax, r8
-	WORD $0x920f; BYTE $0xd0     // setb    al
-	WORD $0x3145; BYTE $0xe4     // xor    r12d, r12d
-	WORD $0x8548; BYTE $0xdb     // test    rbx, rbx
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xf6     // test    r14b, r14b
-	JNE  LBB0_1052
-	WORD $0xd284                 // test    dl, dl
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xed     // test    r13b, r13b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xff     // test    r15b, r15b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xdb     // test    r11b, r11b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1052
-	WORD $0x8445; BYTE $0xd2     // test    r10b, r10b
-	JNE  LBB0_1052
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	LONG $0x24548b4c; BYTE $0x30 // mov    r10, qword [rsp + 48]
-	JNE  LBB0_1054
-	WORD $0xc984                 // test    cl, cl
-	JNE  LBB0_1054
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	JNE  LBB0_1055
-	WORD $0xc084                 // test    al, al
-	JNE  LBB0_1055
-	WORD $0x8440; BYTE $0xf6     // test    sil, sil
-	JNE  LBB0_1055
-	LONG $0xd0048d4b             // lea    rax, [r8 + 8*r10]
-	WORD $0x3948; BYTE $0xf8     // cmp    rax, rdi
-	JBE  LBB0_24
-	LONG $0x17048d4a             // lea    rax, [rdi + r10]
-	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
-	JA   LBB0_2
-
-LBB0_24:
-	WORD $0x8945; BYTE $0xd4             // mov    r12d, r10d
-	LONG $0xe0e48341                     // and    r12d, -32
-	LONG $0x6e79c1c4; BYTE $0xc1         // vmovd    xmm0, r9d
-	LONG $0x587de2c4; BYTE $0xc0         // vpbroadcastd    ymm0, xmm0
-	LONG $0x4d6f7dc5; BYTE $0x00         // vmovdqa    ymm9, yword 0[rbp] /* [rip + .LCPI0_0] */
-	LONG $0x456f7dc5; BYTE $0x20         // vmovdqa    ymm8, yword 32[rbp] /* [rip + .LCPI0_1] */
-	LONG $0x5d6ffdc5; BYTE $0x40         // vmovdqa    ymm3, yword 64[rbp] /* [rip + .LCPI0_2] */
-	LONG $0x556ffdc5; BYTE $0x60         // vmovdqa    ymm2, yword 96[rbp] /* [rip + .LCPI0_3] */
-	WORD $0x3145; BYTE $0xdb             // xor    r11d, r11d
-	QUAD $0x0000a08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 160[rbp] /* [rip + .LCPI0_5] */
-	QUAD $0x000300248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 768], ymm1
-	QUAD $0x0000a88d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 168[rbp] /* [rip + .LCPI0_6] */
-	QUAD $0x0002e0248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 736], ymm1
-	QUAD $0x0000b08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 176[rbp] /* [rip + .LCPI0_7] */
-	QUAD $0x0002c0248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 704], ymm1
-	QUAD $0x0000b88d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 184[rbp] /* [rip + .LCPI0_8] */
-	QUAD $0x0002a0248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 672], ymm1
-	QUAD $0x0000c08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 192[rbp] /* [rip + .LCPI0_9] */
-	QUAD $0x000280248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 640], ymm1
-	QUAD $0x0000c88d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 200[rbp] /* [rip + .LCPI0_10] */
-	QUAD $0x000260248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 608], ymm1
-	QUAD $0x0000d08d197de2c4; BYTE $0x00 // vbroadcastsd    ymm1, qword 208[rbp] /* [rip + .LCPI0_11] */
-	QUAD $0x000240248c29fcc5; BYTE $0x00 // vmovaps    yword [rsp + 576], ymm1
-	QUAD $0x0000d88d587de2c4; BYTE $0x00 // vpbroadcastd    ymm1, dword 216[rbp] /* [rip + .LCPI0_12] */
-	QUAD $0x000220248c7ffdc5; BYTE $0x00 // vmovdqa    yword [rsp + 544], ymm1
-	JMP  LBB0_26
-
-LBB0_25:
-	LONG $0x20c38349                     // add    r11, 32
-	QUAD $0x000220248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 544]
-	LONG $0xd1feedc5                     // vpaddd    ymm2, ymm2, ymm1
-	LONG $0xd9fee5c5                     // vpaddd    ymm3, ymm3, ymm1
-	LONG $0xc1fe3dc5                     // vpaddd    ymm8, ymm8, ymm1
-	LONG $0xc9fe35c5                     // vpaddd    ymm9, ymm9, ymm1
-	WORD $0x394d; BYTE $0xe3             // cmp    r11, r12
-	JE   LBB0_1050
-
-LBB0_26:
-	QUAD $0x00032024947ffdc5; BYTE $0x00 // vmovdqa    yword [rsp + 800], ymm2
-	LONG $0xf272f5c5; BYTE $0x03         // vpslld    ymm1, ymm2, 3
-	LONG $0xd166f9c5                     // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd17ef9c5                     // vmovd    ecx, xmm2
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_28
-	LONG $0x787da2c4; WORD $0x1f24       // vpbroadcastb    ymm4, byte [rdi + r11]
-
-LBB0_28:
-	WORD $0x894d; BYTE $0xda                   // mov    r10, r11
-	LONG $0x01ca8349                           // or    r10, 1
-	LONG $0xd166f9c5                           // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x01d1             // vpextrb    ecx, xmm2, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_30
-	LONG $0x2059a3c4; WORD $0x1714; BYTE $0x01 // vpinsrb    xmm2, xmm4, byte [rdi + r10], 1
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_30:
-	WORD $0x894d; BYTE $0xde                   // mov    r14, r11
-	LONG $0x02ce8349                           // or    r14, 2
-	LONG $0xd166f9c5                           // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x02d1             // vpextrb    ecx, xmm2, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_32
-	LONG $0x2059a3c4; WORD $0x3714; BYTE $0x02 // vpinsrb    xmm2, xmm4, byte [rdi + r14], 2
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_32:
-	LONG $0x397dc3c4; WORD $0x01cd             // vextracti128    xmm13, ymm1, 1
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x03ca8348                           // or    rdx, 3
-	LONG $0xd166f9c5                           // vpcmpgtd    xmm2, xmm0, xmm1
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x03d1             // vpextrb    ecx, xmm2, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_34
-	LONG $0x2059e3c4; WORD $0x1714; BYTE $0x03 // vpinsrb    xmm2, xmm4, byte [rdi + rdx], 3
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_34:
-	WORD $0x894c; BYTE $0xd9                   // mov    rcx, r11
-	LONG $0x04c98348                           // or    rcx, 4
-	LONG $0x397de3c4; WORD $0x01c7             // vextracti128    xmm7, ymm0, 1
-	LONG $0x6641c1c4; BYTE $0xd5               // vpcmpgtd    xmm2, xmm7, xmm13
-	LONG $0x1479c3c4; WORD $0x00d1             // vpextrb    r9d, xmm2, 0
-	LONG $0x01c1f641                           // test    r9b, 1
-	QUAD $0x0000011024948948                   // mov    qword [rsp + 272], rdx
-	QUAD $0x00000108248c8948                   // mov    qword [rsp + 264], rcx
-	JE   LBB0_36
-	LONG $0x2059e3c4; WORD $0x0f14; BYTE $0x04 // vpinsrb    xmm2, xmm4, byte [rdi + rcx], 4
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_36:
-	WORD $0x894d; BYTE $0xdf                   // mov    r15, r11
-	LONG $0x05cf8349                           // or    r15, 5
-	LONG $0xf166fdc5                           // vpcmpgtd    ymm6, ymm0, ymm1
-	LONG $0xd06bcdc5                           // vpackssdw    ymm2, ymm6, ymm0
-	LONG $0x397de3c4; WORD $0x01d2             // vextracti128    xmm2, ymm2, 1
-	LONG $0x5879e2c4; BYTE $0xd2               // vpbroadcastd    xmm2, xmm2
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x05d1             // vpextrb    ecx, xmm2, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_38
-	LONG $0x2059a3c4; WORD $0x3f14; BYTE $0x05 // vpinsrb    xmm2, xmm4, byte [rdi + r15], 5
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_38:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x06cb8348                           // or    rbx, 6
-	LONG $0xd06bcdc5                           // vpackssdw    ymm2, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8d2             // vpermq    ymm2, ymm2, 232
-	LONG $0xd263e9c5                           // vpacksswb    xmm2, xmm2, xmm2
-	LONG $0x1479e3c4; WORD $0x06d1             // vpextrb    ecx, xmm2, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_40
-	LONG $0x2059e3c4; WORD $0x1f14; BYTE $0x06 // vpinsrb    xmm2, xmm4, byte [rdi + rbx], 6
-	LONG $0x025de3c4; WORD $0x0fe2             // vpblendd    ymm4, ymm4, ymm2, 15
-
-LBB0_40:
-	LONG $0xf372edc5; BYTE $0x03               // vpslld    ymm2, ymm3, 3
-	WORD $0x894c; BYTE $0xd8                   // mov    rax, r11
-	LONG $0x07c88348                           // or    rax, 7
-	LONG $0xe86bcdc5                           // vpackssdw    ymm5, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8ed             // vpermq    ymm5, ymm5, 232
-	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
-	LONG $0x1479e3c4; WORD $0x07e9             // vpextrb    ecx, xmm5, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_42
-	LONG $0x2059e3c4; WORD $0x072c; BYTE $0x07 // vpinsrb    xmm5, xmm4, byte [rdi + rax], 7
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_42:
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x08ce8348                           // or    rsi, 8
-	LONG $0xea66f9c5                           // vpcmpgtd    xmm5, xmm0, xmm2
-	LONG $0x1479e3c4; WORD $0x00e9             // vpextrb    ecx, xmm5, 0
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_44
-	LONG $0x2059e3c4; WORD $0x372c; BYTE $0x08 // vpinsrb    xmm5, xmm4, byte [rdi + rsi], 8
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_44:
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x09ca8348                           // or    rdx, 9
-	LONG $0xea66f9c5                           // vpcmpgtd    xmm5, xmm0, xmm2
-	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
-	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
-	LONG $0x1479e3c4; WORD $0x09e9             // vpextrb    ecx, xmm5, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000e024948948                   // mov    qword [rsp + 224], rdx
-	JE   LBB0_46
-	LONG $0x2059e3c4; WORD $0x172c; BYTE $0x09 // vpinsrb    xmm5, xmm4, byte [rdi + rdx], 9
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_46:
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x0aca8348                           // or    rdx, 10
-	LONG $0xea66f9c5                           // vpcmpgtd    xmm5, xmm0, xmm2
-	LONG $0xed6bd1c5                           // vpackssdw    xmm5, xmm5, xmm5
-	LONG $0xed63d1c5                           // vpacksswb    xmm5, xmm5, xmm5
-	LONG $0x1479e3c4; WORD $0x0ae9             // vpextrb    ecx, xmm5, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000340249c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 832], ymm3
-	LONG $0x24748948; BYTE $0x60               // mov    qword [rsp + 96], rsi
-	JE   LBB0_48
-	LONG $0x2059e3c4; WORD $0x172c; BYTE $0x0a // vpinsrb    xmm5, xmm4, byte [rdi + rdx], 10
-	LONG $0x025de3c4; WORD $0x0fe5             // vpblendd    ymm4, ymm4, ymm5, 15
-
-LBB0_48:
-	LONG $0x397de3c4; WORD $0x01d5             // vextracti128    xmm5, ymm2, 1
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x0bce8348                           // or    rsi, 11
-	LONG $0xda66f9c5                           // vpcmpgtd    xmm3, xmm0, xmm2
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0bd9             // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000982494894c                   // mov    qword [rsp + 152], r10
-	QUAD $0x0000012824b4894c                   // mov    qword [rsp + 296], r14
-	LONG $0x247c894c; BYTE $0x68               // mov    qword [rsp + 104], r15
-	QUAD $0x00000120249c8948                   // mov    qword [rsp + 288], rbx
-	QUAD $0x000000e824848948                   // mov    qword [rsp + 232], rax
-	JE   LBB0_50
-	LONG $0x2059e3c4; WORD $0x371c; BYTE $0x0b // vpinsrb    xmm3, xmm4, byte [rdi + rsi], 11
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_50:
-	WORD $0x894c; BYTE $0xd9                   // mov    rcx, r11
-	LONG $0x0cc98348                           // or    rcx, 12
-	LONG $0xdd66c1c5                           // vpcmpgtd    xmm3, xmm7, xmm5
-	LONG $0x1479c3c4; WORD $0x00de             // vpextrb    r14d, xmm3, 0
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000010024b48948                   // mov    qword [rsp + 256], rsi
-	QUAD $0x000000f8248c8948                   // mov    qword [rsp + 248], rcx
-	JE   LBB0_52
-	LONG $0x2059e3c4; WORD $0x0f1c; BYTE $0x0c // vpinsrb    xmm3, xmm4, byte [rdi + rcx], 12
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_52:
-	WORD $0x894c; BYTE $0xd8                   // mov    rax, r11
-	LONG $0x0dc88348                           // or    rax, 13
-	LONG $0xfa66fdc5                           // vpcmpgtd    ymm7, ymm0, ymm2
-	LONG $0xd86bc5c5                           // vpackssdw    ymm3, ymm7, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x5879e2c4; BYTE $0xdb               // vpbroadcastd    xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0dd9             // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_54
-	LONG $0x2059e3c4; WORD $0x071c; BYTE $0x0d // vpinsrb    xmm3, xmm4, byte [rdi + rax], 13
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_54:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x0ecb8348                           // or    rbx, 14
-	LONG $0xd86bc5c5                           // vpackssdw    ymm3, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8db             // vpermq    ymm3, ymm3, 232
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0ed9             // vpextrb    ecx, xmm3, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	LONG $0x245c8948; BYTE $0x50               // mov    qword [rsp + 80], rbx
-	JE   LBB0_56
-	LONG $0x2059e3c4; WORD $0x1f1c; BYTE $0x0e // vpinsrb    xmm3, xmm4, byte [rdi + rbx], 14
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_56:
-	LONG $0x722dc1c4; WORD $0x03f0             // vpslld    ymm10, ymm8, 3
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x0fce8348                           // or    rsi, 15
-	LONG $0xd86bc5c5                           // vpackssdw    ymm3, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8db             // vpermq    ymm3, ymm3, 232
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x0fd9             // vpextrb    ecx, xmm3, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_58
-	LONG $0x2059e3c4; WORD $0x371c; BYTE $0x0f // vpinsrb    xmm3, xmm4, byte [rdi + rsi], 15
-	LONG $0x025de3c4; WORD $0x0fe3             // vpblendd    ymm4, ymm4, ymm3, 15
-
-LBB0_58:
-	WORD $0x894d; BYTE $0xdf                   // mov    r15, r11
-	LONG $0x10cf8349                           // or    r15, 16
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xd97ef9c5                           // vmovd    ecx, xmm3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	LONG $0x247c894c; BYTE $0x40               // mov    qword [rsp + 64], r15
-	LONG $0x24748948; BYTE $0x48               // mov    qword [rsp + 72], rsi
-	JE   LBB0_60
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x3f1c; BYTE $0x00 // vpinsrb    xmm3, xmm3, byte [rdi + r15], 0
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_60:
-	WORD $0x894c; BYTE $0xde                   // mov    rsi, r11
-	LONG $0x11ce8348                           // or    rsi, 17
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x01d9             // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_62
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x371c; BYTE $0x01 // vpinsrb    xmm3, xmm3, byte [rdi + rsi], 1
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_62:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x12cb8348                           // or    rbx, 18
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x02d9             // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_64
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x02 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 2
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_64:
-	WORD $0x894d; BYTE $0xdf                   // mov    r15, r11
-	LONG $0x13cf8349                           // or    r15, 19
-	LONG $0x6679c1c4; BYTE $0xda               // vpcmpgtd    xmm3, xmm0, xmm10
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x03d9             // vpextrb    ecx, xmm3, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x00036024847f7dc5; BYTE $0x00       // vmovdqa    yword [rsp + 864], ymm8
-	JE   LBB0_66
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x3f1c; BYTE $0x03 // vpinsrb    xmm3, xmm3, byte [rdi + r15], 3
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_66:
-	WORD $0x894d; BYTE $0xdd                   // mov    r13, r11
-	LONG $0x14cd8349                           // or    r13, 20
-	LONG $0x667d41c4; BYTE $0xc2               // vpcmpgtd    ymm8, ymm0, ymm10
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x04d9             // vpextrb    ecx, xmm3, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	LONG $0x246c894c; BYTE $0x38               // mov    qword [rsp + 56], r13
-	JE   LBB0_68
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x2f1c; BYTE $0x04 // vpinsrb    xmm3, xmm3, byte [rdi + r13], 4
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_68:
-	WORD $0x894d; BYTE $0xdd                   // mov    r13, r11
-	LONG $0x15cd8349                           // or    r13, 21
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x05d9             // vpextrb    ecx, xmm3, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x00000080249c8948                   // mov    qword [rsp + 128], rbx
-	JE   LBB0_70
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x2f1c; BYTE $0x05 // vpinsrb    xmm3, xmm3, byte [rdi + r13], 5
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_70:
-	WORD $0x894d; BYTE $0xda                   // mov    r10, r11
-	LONG $0x16ca8349                           // or    r10, 22
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x06d9             // vpextrb    ecx, xmm3, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_72
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061a3c4; WORD $0x171c; BYTE $0x06 // vpinsrb    xmm3, xmm3, byte [rdi + r10], 6
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_72:
-	LONG $0x7225c1c4; WORD $0x03f1             // vpslld    ymm11, ymm9, 3
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x17cb8348                           // or    rbx, 23
-	LONG $0x6b7dc1c4; BYTE $0xd8               // vpackssdw    ymm3, ymm0, ymm8
-	LONG $0xd863e5c5                           // vpacksswb    ymm3, ymm3, ymm0
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x07d9             // vpextrb    ecx, xmm3, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000f0249c8948                   // mov    qword [rsp + 240], rbx
-	QUAD $0x000380248c7f7dc5; BYTE $0x00       // vmovdqa    yword [rsp + 896], ymm9
-	JE   LBB0_74
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x07 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 7
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_74:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x18cb8348                           // or    rbx, 24
-	LONG $0x667d41c4; BYTE $0xcb               // vpcmpgtd    ymm9, ymm0, ymm11
-	LONG $0x00fd43c4; WORD $0x44e1             // vpermq    ymm12, ymm9, 68
-	LONG $0x637dc1c4; BYTE $0xdc               // vpacksswb    ymm3, ymm0, ymm12
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x08d9             // vpextrb    ecx, xmm3, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000d8249c8948                   // mov    qword [rsp + 216], rbx
-	JE   LBB0_76
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x08 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 8
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_76:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x19cb8348                           // or    rbx, 25
-	LONG $0x6679c1c4; BYTE $0xdb               // vpcmpgtd    xmm3, xmm0, xmm11
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x09d9             // vpextrb    ecx, xmm3, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000d0249c8948                   // mov    qword [rsp + 208], rbx
-	JE   LBB0_78
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x09 // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 9
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_78:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1acb8348                           // or    rbx, 26
-	LONG $0x6679c1c4; BYTE $0xdb               // vpcmpgtd    xmm3, xmm0, xmm11
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0ad9             // vpextrb    ecx, xmm3, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000c8249c8948                   // mov    qword [rsp + 200], rbx
-	JE   LBB0_80
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0a // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 10
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_80:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1bcb8348                           // or    rbx, 27
-	LONG $0x6679c1c4; BYTE $0xdb               // vpcmpgtd    xmm3, xmm0, xmm11
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0x00fde3c4; WORD $0xd4db             // vpermq    ymm3, ymm3, 212
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0bd9             // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000c0249c8948                   // mov    qword [rsp + 192], rbx
-	QUAD $0x0000009024948948                   // mov    qword [rsp + 144], rdx
-	LONG $0x24448948; BYTE $0x58               // mov    qword [rsp + 88], rax
-	JE   LBB0_82
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0b // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 11
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_82:
-	WORD $0x894c; BYTE $0xda                   // mov    rdx, r11
-	LONG $0x1cca8348                           // or    rdx, 28
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0cd9             // vpextrb    ecx, xmm3, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_84
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x171c; BYTE $0x0c // vpinsrb    xmm3, xmm3, byte [rdi + rdx], 12
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_84:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1dcb8348                           // or    rbx, 29
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0dd9             // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000b0249c8948                   // mov    qword [rsp + 176], rbx
-	JE   LBB0_86
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0d // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 13
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_86:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1ecb8348                           // or    rbx, 30
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0ed9             // vpextrb    ecx, xmm3, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000a8249c8948                   // mov    qword [rsp + 168], rbx
-	JE   LBB0_88
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0e // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 14
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_88:
-	WORD $0x894c; BYTE $0xdb                   // mov    rbx, r11
-	LONG $0x1fcb8348                           // or    rbx, 31
-	LONG $0x6b7dc1c4; BYTE $0xd9               // vpackssdw    ymm3, ymm0, ymm9
-	LONG $0xdb63fdc5                           // vpacksswb    ymm3, ymm0, ymm3
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x1479e3c4; WORD $0x0fd9             // vpextrb    ecx, xmm3, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x000000a0249c8948                   // mov    qword [rsp + 160], rbx
-	JE   LBB0_90
-	LONG $0x397de3c4; WORD $0x01e3             // vextracti128    xmm3, ymm4, 1
-	LONG $0x2061e3c4; WORD $0x1f1c; BYTE $0x0f // vpinsrb    xmm3, xmm3, byte [rdi + rbx], 15
-	LONG $0x385de3c4; WORD $0x01e3             // vinserti128    ymm4, ymm4, xmm3, 1
-
-LBB0_90:
-	LONG $0x357de2c4; BYTE $0xd9               // vpmovzxdq    ymm3, xmm1
-	QUAD $0x000200249c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 512], ymm3
-	QUAD $0x00000080bddb5dc5                   // vpand    ymm15, ymm4, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0xd966f9c5                           // vpcmpgtd    xmm3, xmm0, xmm1
-	LONG $0xd97ef9c5                           // vmovd    ecx, xmm3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_92
-	QUAD $0x000200249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 512]
-	LONG $0x7ef9e1c4; BYTE $0xd9               // vmovq    rcx, xmm3
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm15, 0
-
-LBB0_92:
-	LONG $0xd966f9c5                           // vpcmpgtd    xmm3, xmm0, xmm1
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x01d9             // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_94
-	QUAD $0x000200249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 512]
-	LONG $0x16f9e3c4; WORD $0x01d9             // vpextrq    rcx, xmm3, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm15, 1
-
-LBB0_94:
-	LONG $0xd966f9c5                           // vpcmpgtd    xmm3, xmm0, xmm1
-	LONG $0xdb6be1c5                           // vpackssdw    xmm3, xmm3, xmm3
-	LONG $0xdb63e1c5                           // vpacksswb    xmm3, xmm3, xmm3
-	LONG $0x1479e3c4; WORD $0x02d9             // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_96
-	QUAD $0x000200249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 512]
-	LONG $0x397de3c4; WORD $0x01db             // vextracti128    xmm3, ymm3, 1
-	LONG $0x7ef9e1c4; BYTE $0xd9               // vmovq    rcx, xmm3
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm15, 2
-
-LBB0_96:
-	LONG $0xc966f9c5                           // vpcmpgtd    xmm1, xmm0, xmm1
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_98
-	QUAD $0x000200248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 512]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm15, 3
-
-LBB0_98:
-	LONG $0x357dc2c4; BYTE $0xcd               // vpmovzxdq    ymm1, xmm13
-	QUAD $0x0001e0248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 480], ymm1
-	LONG $0x01c1f641                           // test    r9b, 1
-	JE   LBB0_100
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm15, 4
-
-LBB0_100:
-	LONG $0xc86bcdc5                           // vpackssdw    ymm1, ymm6, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x5879e2c4; BYTE $0xc9               // vpbroadcastd    xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x05c9             // vpextrb    ecx, xmm1, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_102
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm15, 5
-
-LBB0_102:
-	LONG $0xc86bcdc5                           // vpackssdw    ymm1, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_104
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm15, 6
-
-LBB0_104:
-	LONG $0xc86bcdc5                           // vpackssdw    ymm1, ymm6, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x07c9             // vpextrb    ecx, xmm1, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_106
-	QUAD $0x0001e0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 480]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm15, 7
-
-LBB0_106:
-	LONG $0x357de2c4; BYTE $0xca               // vpmovzxdq    ymm1, xmm2
-	QUAD $0x0001c0248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 448], ymm1
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0x1479e3c4; WORD $0x00c9             // vpextrb    ecx, xmm1, 0
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_108
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm15, 8
-
-LBB0_108:
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_110
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm15, 9
-
-LBB0_110:
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0ac9             // vpextrb    ecx, xmm1, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_112
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm15, 10
-
-LBB0_112:
-	LONG $0xca66f9c5                           // vpcmpgtd    xmm1, xmm0, xmm2
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0bc9             // vpextrb    ecx, xmm1, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_114
-	QUAD $0x0001c0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 448]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm15, 11
-
-LBB0_114:
-	QUAD $0x0000008824b48948                   // mov    qword [rsp + 136], rsi
-	LONG $0x357de2c4; BYTE $0xcd               // vpmovzxdq    ymm1, xmm5
-	QUAD $0x0001a0248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 416], ymm1
-	LONG $0x01c6f641                           // test    r14b, 1
-	JE   LBB0_116
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm15, 12
-
-LBB0_116:
-	LONG $0xc86bc5c5                           // vpackssdw    ymm1, ymm7, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x5879e2c4; BYTE $0xc9               // vpbroadcastd    xmm1, xmm1
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0dc9             // vpextrb    ecx, xmm1, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	QUAD $0x00000098248c8b4c                   // mov    r9, qword [rsp + 152]
-	QUAD $0x0000012824b48b48                   // mov    rsi, qword [rsp + 296]
-	LONG $0x24748b4c; BYTE $0x68               // mov    r14, qword [rsp + 104]
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	JE   LBB0_118
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm15, 13
-
-LBB0_118:
-	LONG $0xc86bc5c5                           // vpackssdw    ymm1, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0ec9             // vpextrb    ecx, xmm1, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_120
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm15, 14
-
-LBB0_120:
-	LONG $0xc86bc5c5                           // vpackssdw    ymm1, ymm7, ymm0
-	LONG $0x00fde3c4; WORD $0xe8c9             // vpermq    ymm1, ymm1, 232
-	LONG $0xc963f1c5                           // vpacksswb    xmm1, xmm1, xmm1
-	LONG $0x1479e3c4; WORD $0x0fc9             // vpextrb    ecx, xmm1, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_122
-	QUAD $0x0001a0248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 416]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x083c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm15, 15
-
-LBB0_122:
-	LONG $0x357dc2c4; BYTE $0xca               // vpmovzxdq    ymm1, xmm10
-	QUAD $0x000180248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 384], ymm1
-	LONG $0x6679c1c4; BYTE $0xca               // vpcmpgtd    xmm1, xmm0, xmm10
-	LONG $0xc97ef9c5                           // vmovd    ecx, xmm1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_124
-	QUAD $0x000180248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 384]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm1, 0
-
-LBB0_124:
-	LONG $0x6679c1c4; BYTE $0xca               // vpcmpgtd    xmm1, xmm0, xmm10
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_126
-	QUAD $0x000180248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 384]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm1, 1
-
-LBB0_126:
-	LONG $0x6679c1c4; BYTE $0xca               // vpcmpgtd    xmm1, xmm0, xmm10
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_128
-	QUAD $0x000180248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 384]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm1, 2
-
-LBB0_128:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x6679c1c4; BYTE $0xd2               // vpcmpgtd    xmm2, xmm0, xmm10
-	LONG $0xd26be9c5                           // vpackssdw    xmm2, xmm2, xmm2
-	LONG $0x00fde3c4; WORD $0xd4d2             // vpermq    ymm2, ymm2, 212
-	LONG $0xd063edc5                           // vpacksswb    ymm2, ymm2, ymm0
-	LONG $0x397de3c4; WORD $0x01d2             // vextracti128    xmm2, ymm2, 1
-	LONG $0x1479e3c4; WORD $0x03d1             // vpextrb    ecx, xmm2, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_130
-	QUAD $0x00018024946ffdc5; BYTE $0x00       // vmovdqa    ymm2, yword [rsp + 384]
-	LONG $0x397de3c4; WORD $0x01d2             // vextracti128    xmm2, ymm2, 1
-	LONG $0x16f9e3c4; WORD $0x01d1             // vpextrq    rcx, xmm2, 1
-	LONG $0x397d63c4; WORD $0x01fa             // vextracti128    xmm2, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x0814; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm2, 3
-
-LBB0_130:
-	LONG $0x357de2c4; BYTE $0xc9               // vpmovzxdq    ymm1, xmm1
-	QUAD $0x000160248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 352], ymm1
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_132
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm1, 4
-
-LBB0_132:
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x05c9             // vpextrb    ecx, xmm1, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_134
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm1, 5
-
-LBB0_134:
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_136
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm1, 6
-
-LBB0_136:
-	LONG $0x6b7dc1c4; BYTE $0xc8               // vpackssdw    ymm1, ymm0, ymm8
-	LONG $0xc863f5c5                           // vpacksswb    ymm1, ymm1, ymm0
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x07c9             // vpextrb    ecx, xmm1, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_138
-	QUAD $0x000160248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 352]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm1, 7
-
-LBB0_138:
-	LONG $0x357dc2c4; BYTE $0xcb               // vpmovzxdq    ymm1, xmm11
-	QUAD $0x000140248c7ffdc5; BYTE $0x00       // vmovdqa    yword [rsp + 320], ymm1
-	LONG $0x637dc1c4; BYTE $0xcc               // vpacksswb    ymm1, ymm0, ymm12
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x08c9             // vpextrb    ecx, xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_140
-	QUAD $0x000140248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 320]
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm1, 8
-
-LBB0_140:
-	LONG $0x6679c1c4; BYTE $0xcb               // vpcmpgtd    xmm1, xmm0, xmm11
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_142
-	QUAD $0x000140248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 320]
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-
-LBB0_142:
-	LONG $0x6679c1c4; BYTE $0xcb               // vpcmpgtd    xmm1, xmm0, xmm11
-	LONG $0xc96bf1c5                           // vpackssdw    xmm1, xmm1, xmm1
-	LONG $0x00fde3c4; WORD $0xd4c9             // vpermq    ymm1, ymm1, 212
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0ac9             // vpextrb    ecx, xmm1, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_144
-	QUAD $0x000140248c6ffdc5; BYTE $0x00       // vmovdqa    ymm1, yword [rsp + 320]
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-
-LBB0_144:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x6679c1c4; BYTE $0xe3               // vpcmpgtd    xmm4, xmm0, xmm11
-	LONG $0xe46bd9c5                           // vpackssdw    xmm4, xmm4, xmm4
-	LONG $0x00fde3c4; WORD $0xd4e4             // vpermq    ymm4, ymm4, 212
-	LONG $0xe463fdc5                           // vpacksswb    ymm4, ymm0, ymm4
-	LONG $0x397de3c4; WORD $0x01e4             // vextracti128    xmm4, ymm4, 1
-	LONG $0x1479e3c4; WORD $0x0be1             // vpextrb    ecx, xmm4, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_146
-	QUAD $0x00014024946ffdc5; BYTE $0x00       // vmovdqa    ymm2, yword [rsp + 320]
-	LONG $0x397de3c4; WORD $0x01d4             // vextracti128    xmm4, ymm2, 1
-	LONG $0x16f9e3c4; WORD $0x01e1             // vpextrq    rcx, xmm4, 1
-	LONG $0x397d63c4; WORD $0x01fc             // vextracti128    xmm4, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x0824; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm4, 11
-
-LBB0_146:
-	LONG $0x357de2c4; BYTE $0xe1               // vpmovzxdq    ymm4, xmm1
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0cc9             // vpextrb    ecx, xmm1, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_148
-	LONG $0x7ef9e1c4; BYTE $0xe1               // vmovq    rcx, xmm4
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-
-LBB0_148:
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0dc9             // vpextrb    ecx, xmm1, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_150
-	LONG $0x16f9e3c4; WORD $0x01e1             // vpextrq    rcx, xmm4, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-
-LBB0_150:
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0ec9             // vpextrb    ecx, xmm1, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_152
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-
-LBB0_152:
-	LONG $0x6b7dc1c4; BYTE $0xc9               // vpackssdw    ymm1, ymm0, ymm9
-	LONG $0xc963fdc5                           // vpacksswb    ymm1, ymm0, ymm1
-	LONG $0x397de3c4; WORD $0x01c9             // vextracti128    xmm1, ymm1, 1
-	LONG $0x1479e3c4; WORD $0x0fc9             // vpextrb    ecx, xmm1, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_154
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_154:
-	LONG $0x6b4dc1c4; BYTE $0xc8         // vpackssdw    ymm1, ymm6, ymm8
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0x6b45c1c4; BYTE $0xe9         // vpackssdw    ymm5, ymm7, ymm9
-	LONG $0x00fde3c4; WORD $0xd8ed       // vpermq    ymm5, ymm5, 216
-	LONG $0xcd63f5c5                     // vpacksswb    ymm1, ymm1, ymm5
-	QUAD $0x00030024946ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword [rsp + 768]
-	QUAD $0x00020024bceb6dc5; BYTE $0x00 // vpor    ymm15, ymm2, yword [rsp + 512]
-	QUAD $0x0001e024acebedc5; BYTE $0x00 // vpor    ymm5, ymm2, yword [rsp + 480]
-	QUAD $0x0001802494eb6dc5; BYTE $0x00 // vpor    ymm10, ymm2, yword [rsp + 384]
-	QUAD $0x000160248ceb6dc5; BYTE $0x00 // vpor    ymm9, ymm2, yword [rsp + 352]
-	QUAD $0x0001c024a4eb6dc5; BYTE $0x00 // vpor    ymm12, ymm2, yword [rsp + 448]
-	QUAD $0x0001a0249ceb6dc5; BYTE $0x00 // vpor    ymm11, ymm2, yword [rsp + 416]
-	QUAD $0x0001402484eb6dc5; BYTE $0x00 // vpor    ymm8, ymm2, yword [rsp + 320]
-	LONG $0xfaebddc5                     // vpor    ymm7, ymm4, ymm2
-	LONG $0x463de3c4; WORD $0x31f7       // vperm2i128    ymm6, ymm8, ymm7, 49
-	LONG $0x383d63c4; WORD $0x01ef       // vinserti128    ymm13, ymm8, xmm7, 1
-	LONG $0xf6c694c5; BYTE $0x88         // vshufps    ymm6, ymm13, ymm6, 136
-	LONG $0x461d43c4; WORD $0x31eb       // vperm2i128    ymm13, ymm12, ymm11, 49
-	LONG $0x381d43c4; WORD $0x01f3       // vinserti128    ymm14, ymm12, xmm11, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x462d43c4; WORD $0x31f1       // vperm2i128    ymm14, ymm10, ymm9, 49
-	LONG $0x382dc3c4; WORD $0x01d1       // vinserti128    ymm2, ymm10, xmm9, 1
-	LONG $0xc66cc1c4; WORD $0x88d6       // vshufps    ymm2, ymm2, ymm14, 136
-	LONG $0x460563c4; WORD $0x31f5       // vperm2i128    ymm14, ymm15, ymm5, 49
-	LONG $0x3805e3c4; WORD $0x01dd       // vinserti128    ymm3, ymm15, xmm5, 1
-	LONG $0xc664c1c4; WORD $0x88de       // vshufps    ymm3, ymm3, ymm14, 136
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xd26be5c5                     // vpackssdw    ymm2, ymm3, ymm2
-	LONG $0x667dc1c4; BYTE $0xdd         // vpcmpgtd    ymm3, ymm0, ymm13
-	LONG $0xf666fdc5                     // vpcmpgtd    ymm6, ymm0, ymm6
-	LONG $0xde6be5c5                     // vpackssdw    ymm3, ymm3, ymm6
-	LONG $0x00fde3c4; WORD $0xd8d2       // vpermq    ymm2, ymm2, 216
-	LONG $0x00fde3c4; WORD $0xd8db       // vpermq    ymm3, ymm3, 216
-	LONG $0xd363edc5                     // vpacksswb    ymm2, ymm2, ymm3
-	LONG $0xf1dbedc5                     // vpand    ymm6, ymm2, ymm1
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_155
-	LONG $0x787d22c4; WORD $0x1f34       // vpbroadcastb    ymm14, byte [rdi + r11]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_661
-
-LBB0_156:
-	QUAD $0x000000e0249c8b48       // mov    rbx, qword [rsp + 224]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_157
-
-LBB0_662:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_663
-
-LBB0_158:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_159
-
-LBB0_664:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_665
-
-LBB0_160:
-	QUAD $0x000000e824b48b48       // mov    rsi, qword [rsp + 232]
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_161
-
-LBB0_666:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_667
-
-LBB0_162:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_163
-
-LBB0_668:
-	LONG $0x24448b48; BYTE $0x60               // mov    rax, qword [rsp + 96]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_669
-
-LBB0_164:
-	LONG $0x1479e3c4; WORD $0x0af1 // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_165
-
-LBB0_670:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_671
-
-LBB0_166:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_167
-
-LBB0_672:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_673
-
-LBB0_168:
-	LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_169
-
-LBB0_674:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_170
-	JMP  LBB0_171
-
-LBB0_155:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_156
-
-LBB0_661:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	QUAD $0x000000e0249c8b48                   // mov    rbx, qword [rsp + 224]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_662
-
-LBB0_157:
-	LONG $0x1479e3c4; WORD $0x03f1 // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_158
-
-LBB0_663:
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_664
-
-LBB0_159:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_160
-
-LBB0_665:
-	LONG $0x2009a3c4; WORD $0x370c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + r14], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	QUAD $0x000000e824b48b48                   // mov    rsi, qword [rsp + 232]
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_666
-
-LBB0_161:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_162
-
-LBB0_667:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_668
-
-LBB0_163:
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_164
-
-LBB0_669:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_670
-
-LBB0_165:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_166
-
-LBB0_671:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_672
-
-LBB0_167:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_168
-
-LBB0_673:
-	LONG $0x24448b48; BYTE $0x58               // mov    rax, qword [rsp + 88]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rax], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_674
-
-LBB0_169:
-	LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_171
-
-LBB0_170:
-	LONG $0x24448b48; BYTE $0x48               // mov    rax, qword [rsp + 72]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + rax], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_171:
-	QUAD $0x000000d0248c8b48                   // mov    rcx, qword [rsp + 208]
-	LONG $0x397dc3c4; WORD $0x01f5             // vextracti128    xmm13, ymm6, 1
-	LONG $0xe87e79c5                           // vmovd    eax, xmm13
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_172
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x24448b48; BYTE $0x40               // mov    rax, qword [rsp + 64]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x00 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 0
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x01e8             // vpextrb    eax, xmm13, 1
-	LONG $0x28244489                           // mov    dword [rsp + 40], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_676
-
-LBB0_173:
-	LONG $0x147963c4; WORD $0x02e8 // vpextrb    eax, xmm13, 2
-	LONG $0x24244489               // mov    dword [rsp + 36], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_174
-
-LBB0_677:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x0000008024848b48                   // mov    rax, qword [rsp + 128]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x02 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 2
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x03e8             // vpextrb    eax, xmm13, 3
-	LONG $0x20244489                           // mov    dword [rsp + 32], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_678
-
-LBB0_175:
-	LONG $0x147963c4; WORD $0x04e8 // vpextrb    eax, xmm13, 4
-	LONG $0x1c244489               // mov    dword [rsp + 28], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_176
-
-LBB0_679:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x04 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x05e8             // vpextrb    eax, xmm13, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_680
-
-LBB0_177:
-	LONG $0x147963c4; WORD $0x06e8 // vpextrb    eax, xmm13, 6
-	LONG $0x14244489               // mov    dword [rsp + 20], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_178
-
-LBB0_681:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071a3c4; WORD $0x170c; BYTE $0x06 // vpinsrb    xmm1, xmm1, byte [rdi + r10], 6
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x07e8             // vpextrb    eax, xmm13, 7
-	LONG $0x3c248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 316], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_682
-
-LBB0_179:
-	QUAD $0x000000d824848b48       // mov    rax, qword [rsp + 216]
-	LONG $0x147963c4; WORD $0x08eb // vpextrb    ebx, xmm13, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_181
-
-LBB0_180:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_181:
-	LONG $0x147943c4; WORD $0x09e9             // vpextrb    r9d, xmm13, 9
-	LONG $0x01c1f641                           // test    r9b, 1
-	QUAD $0x0000011824ac894c                   // mov    qword [rsp + 280], r13
-	LONG $0x2454894c; BYTE $0x70               // mov    qword [rsp + 112], r10
-	QUAD $0x000000b824948948                   // mov    qword [rsp + 184], rdx
-	JE   LBB0_183
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x09 // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 9
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_183:
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	QUAD $0x000000c0248c8b48                   // mov    rcx, qword [rsp + 192]
-	LONG $0x147943c4; WORD $0x0aed             // vpextrb    r13d, xmm13, 10
-	LONG $0x01c5f641                           // test    r13b, 1
-	JE   LBB0_184
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm1, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x0be8             // vpextrb    eax, xmm13, 11
-	WORD $0x01a8                               // test    al, 1
-	LONG $0x247c894c; BYTE $0x78               // mov    qword [rsp + 120], r15
-	JNE  LBB0_684
-
-LBB0_185:
-	LONG $0x147943c4; WORD $0x0cef // vpextrb    r15d, xmm13, 12
-	LONG $0x01c7f641               // test    r15b, 1
-	QUAD $0x00000130249c894c       // mov    qword [rsp + 304], r11
-	JE   LBB0_186
-
-LBB0_685:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000b8248c8b48                   // mov    rcx, qword [rsp + 184]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0c // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 12
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x0dea             // vpextrb    edx, xmm13, 13
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_686
-
-LBB0_187:
-	LONG $0x147963c4; WORD $0x0eee // vpextrb    esi, xmm13, 14
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_188
-
-LBB0_687:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a8248c8b48                   // mov    rcx, qword [rsp + 168]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0e // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 14
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147943c4; WORD $0x0fee             // vpextrb    r14d, xmm13, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_189
-	JMP  LBB0_190
-
-LBB0_172:
-	LONG $0x147963c4; WORD $0x01e8 // vpextrb    eax, xmm13, 1
-	LONG $0x28244489               // mov    dword [rsp + 40], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_173
-
-LBB0_676:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x0000008824848b48                   // mov    rax, qword [rsp + 136]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x01 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 1
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x02e8             // vpextrb    eax, xmm13, 2
-	LONG $0x24244489                           // mov    dword [rsp + 36], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_677
-
-LBB0_174:
-	LONG $0x147963c4; WORD $0x03e8 // vpextrb    eax, xmm13, 3
-	LONG $0x20244489               // mov    dword [rsp + 32], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_175
-
-LBB0_678:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071a3c4; WORD $0x3f0c; BYTE $0x03 // vpinsrb    xmm1, xmm1, byte [rdi + r15], 3
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x04e8             // vpextrb    eax, xmm13, 4
-	LONG $0x1c244489                           // mov    dword [rsp + 28], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_679
-
-LBB0_176:
-	LONG $0x147963c4; WORD $0x05e8 // vpextrb    eax, xmm13, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_177
-
-LBB0_680:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071a3c4; WORD $0x2f0c; BYTE $0x05 // vpinsrb    xmm1, xmm1, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x06e8             // vpextrb    eax, xmm13, 6
-	LONG $0x14244489                           // mov    dword [rsp + 20], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_681
-
-LBB0_178:
-	LONG $0x147963c4; WORD $0x07e8             // vpextrb    eax, xmm13, 7
-	LONG $0x3c248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 316], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_179
-
-LBB0_682:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x147963c4; WORD $0x08eb             // vpextrb    ebx, xmm13, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_180
-	JMP  LBB0_181
-
-LBB0_184:
-	LONG $0x147963c4; WORD $0x0be8 // vpextrb    eax, xmm13, 11
-	WORD $0x01a8                   // test    al, 1
-	LONG $0x247c894c; BYTE $0x78   // mov    qword [rsp + 120], r15
-	JE   LBB0_185
-
-LBB0_684:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0b // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 11
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147943c4; WORD $0x0cef             // vpextrb    r15d, xmm13, 12
-	LONG $0x01c7f641                           // test    r15b, 1
-	QUAD $0x00000130249c894c                   // mov    qword [rsp + 304], r11
-	JNE  LBB0_685
-
-LBB0_186:
-	LONG $0x147963c4; WORD $0x0dea // vpextrb    edx, xmm13, 13
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_187
-
-LBB0_686:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000b0248c8b48                   // mov    rcx, qword [rsp + 176]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0d // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 13
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-	LONG $0x147963c4; WORD $0x0eee             // vpextrb    esi, xmm13, 14
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_687
-
-LBB0_188:
-	LONG $0x147943c4; WORD $0x0fee // vpextrb    r14d, xmm13, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_190
-
-LBB0_189:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0248c8b48                   // mov    rcx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x0f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rcx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_190:
-	LONG $0x7175c1c4; WORD $0x01d6             // vpsrlw    ymm1, ymm14, 1
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf2               // vmovd    r10d, xmm6
-	LONG $0x01c2f641                           // test    r10b, 1
-	JE   LBB0_191
-	LONG $0x7ef961c4; BYTE $0xf9               // vmovq    rcx, xmm15
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f1             // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_689
-
-LBB0_192:
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_193
-
-LBB0_690:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_691
-
-LBB0_194:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_195
-
-LBB0_692:
-	LONG $0x7ef9e1c4; BYTE $0xe9               // vmovq    rcx, xmm5
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_693
-
-LBB0_196:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_197
-
-LBB0_694:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_695
-
-LBB0_198:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_199
-
-LBB0_696:
-	LONG $0x7ef961c4; BYTE $0xe1               // vmovq    rcx, xmm12
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_697
-
-LBB0_200:
-	LONG $0x1479e3c4; WORD $0x0af1 // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_201
-
-LBB0_698:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_699
-
-LBB0_202:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_203
-
-LBB0_700:
-	LONG $0x7ef961c4; BYTE $0xd9               // vmovq    rcx, xmm11
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_701
-
-LBB0_204:
-	LONG $0x1479e3c4; WORD $0x0ef1 // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_205
-
-LBB0_702:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_703
-
-LBB0_206:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_207
-
-LBB0_704:
-	LONG $0x7ef961c4; BYTE $0xd1               // vmovq    rcx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x00 // vpextrb    byte [r8 + rcx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_705
-
-LBB0_208:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_209
-
-LBB0_706:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x02 // vpextrb    byte [r8 + rcx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_707
-
-LBB0_210:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_211
-
-LBB0_708:
-	LONG $0x7ef961c4; BYTE $0xc9               // vmovq    rcx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x04 // vpextrb    byte [r8 + rcx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_709
-
-LBB0_212:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_213
-
-LBB0_710:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x06 // vpextrb    byte [r8 + rcx], xmm1, 6
-	QUAD $0x010000013c2484f6                   // test    byte [rsp + 316], 1
-	JNE  LBB0_711
-
-LBB0_214:
-	WORD $0xc3f6; BYTE $0x01 // test    bl, 1
-	JE   LBB0_215
-
-LBB0_712:
-	LONG $0x7ef961c4; BYTE $0xc1               // vmovq    rcx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x08 // vpextrb    byte [r8 + rcx], xmm1, 8
-	LONG $0x01c1f641                           // test    r9b, 1
-	QUAD $0x000000e024948b4c                   // mov    r10, qword [rsp + 224]
-	QUAD $0x00000090249c8b4c                   // mov    r11, qword [rsp + 144]
-	JNE  LBB0_713
-
-LBB0_216:
-	LONG $0x01c5f641         // test    r13b, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_217
-
-LBB0_714:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x00000120248c8b4c                   // mov    r9, qword [rsp + 288]
-	QUAD $0x000000e824848b48                   // mov    rax, qword [rsp + 232]
-	JNE  LBB0_715
-
-LBB0_218:
-	LONG $0x01c7f641 // test    r15b, 1
-	JE   LBB0_219
-
-LBB0_716:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x0000008824ac8b4c                   // mov    r13, qword [rsp + 136]
-	QUAD $0x0000008024bc8b4c                   // mov    r15, qword [rsp + 128]
-	JNE  LBB0_717
-
-LBB0_220:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_221
-
-LBB0_718:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_222
-	JMP  LBB0_223
-
-LBB0_191:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_192
-
-LBB0_689:
-	LONG $0x16f963c4; WORD $0x01f9             // vpextrq    rcx, xmm15, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_690
-
-LBB0_193:
-	LONG $0x1479e3c4; WORD $0x03f1 // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_194
-
-LBB0_691:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_692
-
-LBB0_195:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_196
-
-LBB0_693:
-	LONG $0x16f9e3c4; WORD $0x01e9             // vpextrq    rcx, xmm5, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_694
-
-LBB0_197:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_198
-
-LBB0_695:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_696
-
-LBB0_199:
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_200
-
-LBB0_697:
-	LONG $0x16f963c4; WORD $0x01e1             // vpextrq    rcx, xmm12, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_698
-
-LBB0_201:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_202
-
-LBB0_699:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_700
-
-LBB0_203:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_204
-
-LBB0_701:
-	LONG $0x16f963c4; WORD $0x01d9             // vpextrq    rcx, xmm11, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_702
-
-LBB0_205:
-	LONG $0x1479e3c4; WORD $0x0ff1 // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_206
-
-LBB0_703:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x147943c4; WORD $0x0834; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_704
-
-LBB0_207:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_208
-
-LBB0_705:
-	LONG $0x16f963c4; WORD $0x01d1             // vpextrq    rcx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x01 // vpextrb    byte [r8 + rcx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_706
-
-LBB0_209:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_210
-
-LBB0_707:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x03 // vpextrb    byte [r8 + rcx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_708
-
-LBB0_211:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_212
-
-LBB0_709:
-	LONG $0x16f963c4; WORD $0x01c9             // vpextrq    rcx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x05 // vpextrb    byte [r8 + rcx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_710
-
-LBB0_213:
-	QUAD $0x010000013c2484f6 // test    byte [rsp + 316], 1
-	JE   LBB0_214
-
-LBB0_711:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x07 // vpextrb    byte [r8 + rcx], xmm1, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_712
-
-LBB0_215:
-	LONG $0x01c1f641         // test    r9b, 1
-	QUAD $0x000000e024948b4c // mov    r10, qword [rsp + 224]
-	QUAD $0x00000090249c8b4c // mov    r11, qword [rsp + 144]
-	JE   LBB0_216
-
-LBB0_713:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c5f641                           // test    r13b, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_714
-
-LBB0_217:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x00000120248c8b4c // mov    r9, qword [rsp + 288]
-	QUAD $0x000000e824848b48 // mov    rax, qword [rsp + 232]
-	JE   LBB0_218
-
-LBB0_715:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c7f641                           // test    r15b, 1
-	JNE  LBB0_716
-
-LBB0_219:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x0000008824ac8b4c // mov    r13, qword [rsp + 136]
-	QUAD $0x0000008024bc8b4c // mov    r15, qword [rsp + 128]
-	JE   LBB0_220
-
-LBB0_717:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_718
-
-LBB0_221:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_223
-
-LBB0_222:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_223:
-	QUAD $0x0002e0248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 736]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_224
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_720
-
-LBB0_225:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_227
-
-LBB0_226:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_227:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x245c8b48; BYTE $0x48               // mov    rbx, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_228
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_722
-
-LBB0_229:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_230
-
-LBB0_723:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_724
-
-LBB0_231:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_232
-
-LBB0_725:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_726
-
-LBB0_233:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_234
-
-LBB0_727:
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r10], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_728
-
-LBB0_235:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_236
-
-LBB0_729:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_730
-
-LBB0_237:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_239
-
-LBB0_238:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_239:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_241
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_241:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_243
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_243:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_245
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_245:
-	LONG $0x244c8b48; BYTE $0x38               // mov    rcx, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c8             // vpextrb    eax, xmm1, 1
-	LONG $0x28244489                           // mov    dword [rsp + 40], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_247
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_247:
-	QUAD $0x0000011824948b48                   // mov    rdx, qword [rsp + 280]
-	LONG $0x24748b48; BYTE $0x70               // mov    rsi, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x02c8             // vpextrb    eax, xmm1, 2
-	LONG $0x24244489                           // mov    dword [rsp + 36], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_249
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x3f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + r15], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_249:
-	LONG $0x24448b48; BYTE $0x78               // mov    rax, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x03cb             // vpextrb    ebx, xmm1, 3
-	LONG $0x20245c89                           // mov    dword [rsp + 32], ebx
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JE   LBB0_250
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c8             // vpextrb    eax, xmm1, 4
-	LONG $0x1c244489                           // mov    dword [rsp + 28], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_732
-
-LBB0_251:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_252
-
-LBB0_733:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x06c8             // vpextrb    eax, xmm1, 6
-	LONG $0x14244489                           // mov    dword [rsp + 20], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_734
-
-LBB0_253:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_254
-
-LBB0_735:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_736
-
-LBB0_255:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_256
-
-LBB0_737:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_738
-
-LBB0_257:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_258
-
-LBB0_739:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_740
-
-LBB0_259:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_260
-
-LBB0_741:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_742
-
-LBB0_261:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_263
-
-LBB0_262:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_263:
-	LONG $0x7175c1c4; WORD $0x02d6             // vpsrlw    ymm1, ymm14, 2
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_264
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_744
-
-LBB0_265:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_266
-
-LBB0_745:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_746
-
-LBB0_267:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_268
-
-LBB0_747:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_748
-
-LBB0_269:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_270
-
-LBB0_749:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_750
-
-LBB0_271:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_272
-
-LBB0_751:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_752
-
-LBB0_273:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_274
-
-LBB0_753:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_754
-
-LBB0_275:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_276
-
-LBB0_755:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_756
-
-LBB0_277:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_278
-
-LBB0_757:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_758
-
-LBB0_279:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_280
-
-LBB0_759:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_760
-
-LBB0_281:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_282
-
-LBB0_761:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_762
-
-LBB0_283:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_284
-
-LBB0_763:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_764
-
-LBB0_285:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_286
-
-LBB0_765:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_766
-
-LBB0_287:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_288
-
-LBB0_767:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_768
-
-LBB0_289:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_290
-
-LBB0_769:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_770
-
-LBB0_291:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_292
-
-LBB0_771:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_772
-
-LBB0_293:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_294
-
-LBB0_773:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_295
-	JMP  LBB0_296
-
-LBB0_224:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_225
-
-LBB0_720:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_226
-	JMP  LBB0_227
-
-LBB0_228:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_229
-
-LBB0_722:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_723
-
-LBB0_230:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_231
-
-LBB0_724:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_725
-
-LBB0_232:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_233
-
-LBB0_726:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_727
-
-LBB0_234:
-	LONG $0x1479e3c4; WORD $0x0af1 // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_235
-
-LBB0_728:
-	LONG $0x2009a3c4; WORD $0x1f0c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + r11], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_729
-
-LBB0_236:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_237
-
-LBB0_730:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_238
-	JMP  LBB0_239
-
-LBB0_250:
-	LONG $0x1479e3c4; WORD $0x04c8 // vpextrb    eax, xmm1, 4
-	LONG $0x1c244489               // mov    dword [rsp + 28], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_251
-
-LBB0_732:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0f14; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rcx], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_733
-
-LBB0_252:
-	LONG $0x1479e3c4; WORD $0x06c8 // vpextrb    eax, xmm1, 6
-	LONG $0x14244489               // mov    dword [rsp + 20], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_253
-
-LBB0_734:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_735
-
-LBB0_254:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_255
-
-LBB0_736:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_737
-
-LBB0_256:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_257
-
-LBB0_738:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_739
-
-LBB0_258:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_259
-
-LBB0_740:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_741
-
-LBB0_260:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_261
-
-LBB0_742:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_262
-	JMP  LBB0_263
-
-LBB0_264:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_265
-
-LBB0_744:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_745
-
-LBB0_266:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_267
-
-LBB0_746:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_747
-
-LBB0_268:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_269
-
-LBB0_748:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_749
-
-LBB0_270:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_271
-
-LBB0_750:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_751
-
-LBB0_272:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_273
-
-LBB0_752:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_753
-
-LBB0_274:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_275
-
-LBB0_754:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_755
-
-LBB0_276:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_277
-
-LBB0_756:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_757
-
-LBB0_278:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_279
-
-LBB0_758:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_759
-
-LBB0_280:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_281
-
-LBB0_760:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_761
-
-LBB0_282:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_283
-
-LBB0_762:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_763
-
-LBB0_284:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_285
-
-LBB0_764:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_765
-
-LBB0_286:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_287
-
-LBB0_766:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_767
-
-LBB0_288:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_289
-
-LBB0_768:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_769
-
-LBB0_290:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_291
-
-LBB0_770:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_771
-
-LBB0_292:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_293
-
-LBB0_772:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_773
-
-LBB0_294:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_296
-
-LBB0_295:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_296:
-	QUAD $0x0002c0248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 704]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_297
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_775
-
-LBB0_298:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_300
-
-LBB0_299:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_300:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_301
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_777
-
-LBB0_302:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_303
-
-LBB0_778:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_779
-
-LBB0_304:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_305
-
-LBB0_780:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_781
-
-LBB0_306:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_308
-
-LBB0_307:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_308:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_309
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_783
-
-LBB0_310:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_311
-
-LBB0_784:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_312
-	JMP  LBB0_313
-
-LBB0_297:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_298
-
-LBB0_775:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_299
-	JMP  LBB0_300
-
-LBB0_301:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_302
-
-LBB0_777:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_778
-
-LBB0_303:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_304
-
-LBB0_779:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_780
-
-LBB0_305:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_306
-
-LBB0_781:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_307
-	JMP  LBB0_308
-
-LBB0_309:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_310
-
-LBB0_783:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_784
-
-LBB0_311:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_313
-
-LBB0_312:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_313:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_315
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_315:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_317
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_317:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_319
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_319:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_320
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_786
-
-LBB0_321:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_322
-
-LBB0_787:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_788
-
-LBB0_323:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_325
-
-LBB0_324:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_325:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_326
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_790
-
-LBB0_327:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_328
-
-LBB0_791:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_792
-
-LBB0_329:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_330
-
-LBB0_793:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_794
-
-LBB0_331:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_332
-
-LBB0_795:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_796
-
-LBB0_333:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_334
-
-LBB0_797:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_335
-	JMP  LBB0_336
-
-LBB0_320:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_321
-
-LBB0_786:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_787
-
-LBB0_322:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_323
-
-LBB0_788:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_324
-	JMP  LBB0_325
-
-LBB0_326:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_327
-
-LBB0_790:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_791
-
-LBB0_328:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_329
-
-LBB0_792:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_793
-
-LBB0_330:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_331
-
-LBB0_794:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_795
-
-LBB0_332:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_333
-
-LBB0_796:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_797
-
-LBB0_334:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_336
-
-LBB0_335:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_336:
-	LONG $0x7175c1c4; WORD $0x03d6             // vpsrlw    ymm1, ymm14, 3
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_337
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_799
-
-LBB0_338:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_339
-
-LBB0_800:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_801
-
-LBB0_340:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_341
-
-LBB0_802:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_803
-
-LBB0_342:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_343
-
-LBB0_804:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_805
-
-LBB0_344:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_345
-
-LBB0_806:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_807
-
-LBB0_346:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_347
-
-LBB0_808:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_809
-
-LBB0_348:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_349
-
-LBB0_810:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_811
-
-LBB0_350:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_351
-
-LBB0_812:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_813
-
-LBB0_352:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_353
-
-LBB0_814:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_815
-
-LBB0_354:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_355
-
-LBB0_816:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_817
-
-LBB0_356:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_357
-
-LBB0_818:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_819
-
-LBB0_358:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_359
-
-LBB0_820:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_821
-
-LBB0_360:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_361
-
-LBB0_822:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_823
-
-LBB0_362:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_363
-
-LBB0_824:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_825
-
-LBB0_364:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_365
-
-LBB0_826:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_827
-
-LBB0_366:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_367
-
-LBB0_828:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_368
-	JMP  LBB0_369
-
-LBB0_337:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_338
-
-LBB0_799:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_800
-
-LBB0_339:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_340
-
-LBB0_801:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_802
-
-LBB0_341:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_342
-
-LBB0_803:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_804
-
-LBB0_343:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_344
-
-LBB0_805:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_806
-
-LBB0_345:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_346
-
-LBB0_807:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_808
-
-LBB0_347:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_348
-
-LBB0_809:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_810
-
-LBB0_349:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_350
-
-LBB0_811:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_812
-
-LBB0_351:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_352
-
-LBB0_813:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_814
-
-LBB0_353:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_354
-
-LBB0_815:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_816
-
-LBB0_355:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_356
-
-LBB0_817:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_818
-
-LBB0_357:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_358
-
-LBB0_819:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_820
-
-LBB0_359:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_360
-
-LBB0_821:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_822
-
-LBB0_361:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_362
-
-LBB0_823:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_824
-
-LBB0_363:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_364
-
-LBB0_825:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_826
-
-LBB0_365:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_366
-
-LBB0_827:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_828
-
-LBB0_367:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_369
-
-LBB0_368:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_369:
-	QUAD $0x0002a0248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 672]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_370
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_830
-
-LBB0_371:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_373
-
-LBB0_372:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_373:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_374
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_832
-
-LBB0_375:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_376
-
-LBB0_833:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_834
-
-LBB0_377:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_378
-
-LBB0_835:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_836
-
-LBB0_379:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_381
-
-LBB0_380:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_381:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_382
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_838
-
-LBB0_383:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_384
-
-LBB0_839:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_385
-	JMP  LBB0_386
-
-LBB0_370:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_371
-
-LBB0_830:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_372
-	JMP  LBB0_373
-
-LBB0_374:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_375
-
-LBB0_832:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_833
-
-LBB0_376:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_377
-
-LBB0_834:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_835
-
-LBB0_378:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_379
-
-LBB0_836:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_380
-	JMP  LBB0_381
-
-LBB0_382:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_383
-
-LBB0_838:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_839
-
-LBB0_384:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_386
-
-LBB0_385:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_386:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_388
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_388:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_390
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_390:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_392
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_392:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_393
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_841
-
-LBB0_394:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_395
-
-LBB0_842:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_843
-
-LBB0_396:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_398
-
-LBB0_397:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_398:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_399
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_845
-
-LBB0_400:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_401
-
-LBB0_846:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_847
-
-LBB0_402:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_403
-
-LBB0_848:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_849
-
-LBB0_404:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_405
-
-LBB0_850:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_851
-
-LBB0_406:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_407
-
-LBB0_852:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_408
-	JMP  LBB0_409
-
-LBB0_393:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_394
-
-LBB0_841:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_842
-
-LBB0_395:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_396
-
-LBB0_843:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_397
-	JMP  LBB0_398
-
-LBB0_399:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_400
-
-LBB0_845:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_846
-
-LBB0_401:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_402
-
-LBB0_847:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_848
-
-LBB0_403:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_404
-
-LBB0_849:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_850
-
-LBB0_405:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_406
-
-LBB0_851:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_852
-
-LBB0_407:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_409
-
-LBB0_408:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_409:
-	LONG $0x7175c1c4; WORD $0x04d6             // vpsrlw    ymm1, ymm14, 4
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_410
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_854
-
-LBB0_411:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_412
-
-LBB0_855:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_856
-
-LBB0_413:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_414
-
-LBB0_857:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_858
-
-LBB0_415:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_416
-
-LBB0_859:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_860
-
-LBB0_417:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_418
-
-LBB0_861:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_862
-
-LBB0_419:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_420
-
-LBB0_863:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_864
-
-LBB0_421:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_422
-
-LBB0_865:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_866
-
-LBB0_423:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_424
-
-LBB0_867:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_868
-
-LBB0_425:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_426
-
-LBB0_869:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_870
-
-LBB0_427:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_428
-
-LBB0_871:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_872
-
-LBB0_429:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_430
-
-LBB0_873:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_874
-
-LBB0_431:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_432
-
-LBB0_875:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_876
-
-LBB0_433:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_434
-
-LBB0_877:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_878
-
-LBB0_435:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_436
-
-LBB0_879:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_880
-
-LBB0_437:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_438
-
-LBB0_881:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_882
-
-LBB0_439:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_440
-
-LBB0_883:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_441
-	JMP  LBB0_442
-
-LBB0_410:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_411
-
-LBB0_854:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_855
-
-LBB0_412:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_413
-
-LBB0_856:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_857
-
-LBB0_414:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_415
-
-LBB0_858:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_859
-
-LBB0_416:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_417
-
-LBB0_860:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_861
-
-LBB0_418:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_419
-
-LBB0_862:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_863
-
-LBB0_420:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_421
-
-LBB0_864:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_865
-
-LBB0_422:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_423
-
-LBB0_866:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_867
-
-LBB0_424:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_425
-
-LBB0_868:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_869
-
-LBB0_426:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_427
-
-LBB0_870:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_871
-
-LBB0_428:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_429
-
-LBB0_872:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_873
-
-LBB0_430:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_431
-
-LBB0_874:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_875
-
-LBB0_432:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_433
-
-LBB0_876:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_877
-
-LBB0_434:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_435
-
-LBB0_878:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_879
-
-LBB0_436:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_437
-
-LBB0_880:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_881
-
-LBB0_438:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_439
-
-LBB0_882:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_883
-
-LBB0_440:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_442
-
-LBB0_441:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_442:
-	QUAD $0x000280248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 640]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_443
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_885
-
-LBB0_444:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_446
-
-LBB0_445:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_446:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_447
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_887
-
-LBB0_448:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_449
-
-LBB0_888:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_889
-
-LBB0_450:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_451
-
-LBB0_890:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_891
-
-LBB0_452:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_454
-
-LBB0_453:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_454:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_455
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_893
-
-LBB0_456:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_457
-
-LBB0_894:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_458
-	JMP  LBB0_459
-
-LBB0_443:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_444
-
-LBB0_885:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_445
-	JMP  LBB0_446
-
-LBB0_447:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_448
-
-LBB0_887:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_888
-
-LBB0_449:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_450
-
-LBB0_889:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_890
-
-LBB0_451:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_452
-
-LBB0_891:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_453
-	JMP  LBB0_454
-
-LBB0_455:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_456
-
-LBB0_893:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_894
-
-LBB0_457:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_459
-
-LBB0_458:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_459:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_461
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_461:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_463
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_463:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_465
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_465:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_466
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_896
-
-LBB0_467:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_468
-
-LBB0_897:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_898
-
-LBB0_469:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_471
-
-LBB0_470:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_471:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_472
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_900
-
-LBB0_473:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_474
-
-LBB0_901:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_902
-
-LBB0_475:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_476
-
-LBB0_903:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_904
-
-LBB0_477:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_478
-
-LBB0_905:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_906
-
-LBB0_479:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_480
-
-LBB0_907:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_481
-	JMP  LBB0_482
-
-LBB0_466:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_467
-
-LBB0_896:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_897
-
-LBB0_468:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_469
-
-LBB0_898:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_470
-	JMP  LBB0_471
-
-LBB0_472:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_473
-
-LBB0_900:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_901
-
-LBB0_474:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_475
-
-LBB0_902:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_903
-
-LBB0_476:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_477
-
-LBB0_904:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_905
-
-LBB0_478:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_479
-
-LBB0_906:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_907
-
-LBB0_480:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_482
-
-LBB0_481:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_482:
-	LONG $0x7175c1c4; WORD $0x05d6             // vpsrlw    ymm1, ymm14, 5
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_483
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_909
-
-LBB0_484:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_485
-
-LBB0_910:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_911
-
-LBB0_486:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_487
-
-LBB0_912:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_913
-
-LBB0_488:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_489
-
-LBB0_914:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_915
-
-LBB0_490:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_491
-
-LBB0_916:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_917
-
-LBB0_492:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_493
-
-LBB0_918:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_919
-
-LBB0_494:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_495
-
-LBB0_920:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_921
-
-LBB0_496:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_497
-
-LBB0_922:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_923
-
-LBB0_498:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_499
-
-LBB0_924:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_925
-
-LBB0_500:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_501
-
-LBB0_926:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_927
-
-LBB0_502:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_503
-
-LBB0_928:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_929
-
-LBB0_504:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_505
-
-LBB0_930:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_931
-
-LBB0_506:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_507
-
-LBB0_932:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_933
-
-LBB0_508:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_509
-
-LBB0_934:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_935
-
-LBB0_510:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_511
-
-LBB0_936:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_937
-
-LBB0_512:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_513
-
-LBB0_938:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_514
-	JMP  LBB0_515
-
-LBB0_483:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_484
-
-LBB0_909:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_910
-
-LBB0_485:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_486
-
-LBB0_911:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_912
-
-LBB0_487:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_488
-
-LBB0_913:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_914
-
-LBB0_489:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_490
-
-LBB0_915:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_916
-
-LBB0_491:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_492
-
-LBB0_917:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_918
-
-LBB0_493:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_494
-
-LBB0_919:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_920
-
-LBB0_495:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_496
-
-LBB0_921:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_922
-
-LBB0_497:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_498
-
-LBB0_923:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_924
-
-LBB0_499:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_500
-
-LBB0_925:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_926
-
-LBB0_501:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_502
-
-LBB0_927:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_928
-
-LBB0_503:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_504
-
-LBB0_929:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_930
-
-LBB0_505:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_506
-
-LBB0_931:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_932
-
-LBB0_507:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_508
-
-LBB0_933:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_934
-
-LBB0_509:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_510
-
-LBB0_935:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_936
-
-LBB0_511:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_512
-
-LBB0_937:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_938
-
-LBB0_513:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_515
-
-LBB0_514:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_515:
-	QUAD $0x000260248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 608]
-	QUAD $0x00020024bceb75c5; BYTE $0x00 // vpor    ymm15, ymm1, yword [rsp + 512]
-	QUAD $0x0001e024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 480]
-	QUAD $0x0001802494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 384]
-	QUAD $0x000160248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 352]
-	QUAD $0x0001c024a4eb75c5; BYTE $0x00 // vpor    ymm12, ymm1, yword [rsp + 448]
-	QUAD $0x0001a0249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 416]
-	QUAD $0x0001402484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 320]
-	LONG $0xf9ebddc5                     // vpor    ymm7, ymm4, ymm1
-	LONG $0x463de3c4; WORD $0x31cf       // vperm2i128    ymm1, ymm8, ymm7, 49
-	LONG $0x383de3c4; WORD $0x01d7       // vinserti128    ymm2, ymm8, xmm7, 1
-	LONG $0xc9c6ecc5; BYTE $0x88         // vshufps    ymm1, ymm2, ymm1, 136
-	LONG $0x461dc3c4; WORD $0x31d3       // vperm2i128    ymm2, ymm12, ymm11, 49
-	LONG $0x381dc3c4; WORD $0x01db       // vinserti128    ymm3, ymm12, xmm11, 1
-	LONG $0xd2c6e4c5; BYTE $0x88         // vshufps    ymm2, ymm3, ymm2, 136
-	LONG $0x462dc3c4; WORD $0x31d9       // vperm2i128    ymm3, ymm10, ymm9, 49
-	LONG $0x382d43c4; WORD $0x01e9       // vinserti128    ymm13, ymm10, xmm9, 1
-	LONG $0xdbc694c5; BYTE $0x88         // vshufps    ymm3, ymm13, ymm3, 136
-	LONG $0x460563c4; WORD $0x31ed       // vperm2i128    ymm13, ymm15, ymm5, 49
-	LONG $0x380563c4; WORD $0x01f5       // vinserti128    ymm14, ymm15, xmm5, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6b95c5                     // vpackssdw    ymm3, ymm13, ymm3
-	LONG $0xd266fdc5                     // vpcmpgtd    ymm2, ymm0, ymm2
-	LONG $0xc966fdc5                     // vpcmpgtd    ymm1, ymm0, ymm1
-	LONG $0xc96bedc5                     // vpackssdw    ymm1, ymm2, ymm1
-	LONG $0x00fde3c4; WORD $0xd8d3       // vpermq    ymm2, ymm3, 216
-	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
-	LONG $0xc963edc5                     // vpacksswb    ymm1, ymm2, ymm1
-	LONG $0xf6dbf5c5                     // vpand    ymm6, ymm1, ymm6
-	LONG $0xf17ef9c5                     // vmovd    ecx, xmm6
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_516
-	LONG $0x787d62c4; WORD $0x1734       // vpbroadcastb    ymm14, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01f1       // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_940
-
-LBB0_517:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1 // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_519
-
-LBB0_518:
-	LONG $0x2009e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm14, byte [rdi + rbx], 2
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_519:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_520
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 3
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_942
-
-LBB0_521:
-	LONG $0x1479e3c4; WORD $0x05f1 // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_522
-
-LBB0_943:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x05 // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 5
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_944
-
-LBB0_523:
-	LONG $0x1479e3c4; WORD $0x07f1 // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_524
-
-LBB0_945:
-	LONG $0x2009a3c4; WORD $0x0f0c; BYTE $0x07 // vpinsrb    xmm1, xmm14, byte [rdi + r9], 7
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x08f1             // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_946
-
-LBB0_525:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_527
-
-LBB0_526:
-	LONG $0x2009a3c4; WORD $0x3f0c; BYTE $0x09 // vpinsrb    xmm1, xmm14, byte [rdi + r15], 9
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_527:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0af1             // vpextrb    ecx, xmm6, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_528
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm14, byte [rdi + rax], 10
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0bf1             // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_948
-
-LBB0_529:
-	LONG $0x1479e3c4; WORD $0x0cf1 // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_530
-
-LBB0_949:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0c // vpinsrb    xmm1, xmm14, byte [rdi + rax], 12
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0df1             // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_531
-	JMP  LBB0_532
-
-LBB0_516:
-	LONG $0x1479e3c4; WORD $0x01f1 // vpextrb    ecx, xmm6, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_517
-
-LBB0_940:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 1
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_518
-	JMP  LBB0_519
-
-LBB0_520:
-	LONG $0x1479e3c4; WORD $0x04f1 // vpextrb    ecx, xmm6, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_521
-
-LBB0_942:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2009e3c4; WORD $0x0f0c; BYTE $0x04 // vpinsrb    xmm1, xmm14, byte [rdi + rcx], 4
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x05f1             // vpextrb    ecx, xmm6, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_943
-
-LBB0_522:
-	LONG $0x1479e3c4; WORD $0x06f1 // vpextrb    ecx, xmm6, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_523
-
-LBB0_944:
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm14, byte [rdi + rax], 6
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x07f1             // vpextrb    ecx, xmm6, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_945
-
-LBB0_524:
-	LONG $0x1479e3c4; WORD $0x08f1 // vpextrb    ecx, xmm6, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_525
-
-LBB0_946:
-	LONG $0x2009e3c4; WORD $0x370c; BYTE $0x08 // vpinsrb    xmm1, xmm14, byte [rdi + rsi], 8
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_526
-	JMP  LBB0_527
-
-LBB0_528:
-	LONG $0x1479e3c4; WORD $0x0bf1 // vpextrb    ecx, xmm6, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_529
-
-LBB0_948:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0b // vpinsrb    xmm1, xmm14, byte [rdi + rax], 11
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-	LONG $0x1479e3c4; WORD $0x0cf1             // vpextrb    ecx, xmm6, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_949
-
-LBB0_530:
-	LONG $0x1479e3c4; WORD $0x0df1 // vpextrb    ecx, xmm6, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_532
-
-LBB0_531:
-	LONG $0x2009e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm14, byte [rdi + rdx], 13
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_532:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ef1             // vpextrb    ecx, xmm6, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_534
-	LONG $0x2009e3c4; WORD $0x070c; BYTE $0x0e // vpinsrb    xmm1, xmm14, byte [rdi + rax], 14
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_534:
-	LONG $0x1479e3c4; WORD $0x0ff1             // vpextrb    ecx, xmm6, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_536
-	LONG $0x2009a3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm14, byte [rdi + r10], 15
-	LONG $0x020d63c4; WORD $0x0ff1             // vpblendd    ymm14, ymm14, ymm1, 15
-
-LBB0_536:
-	LONG $0x397de3c4; WORD $0x01f1             // vextracti128    xmm1, ymm6, 1
-	LONG $0xc87ef9c5                           // vmovd    eax, xmm1
-	LONG $0x2c244489                           // mov    dword [rsp + 44], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_538
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1714; BYTE $0x00 // vpinsrb    xmm2, xmm2, byte [rdi + rdx], 0
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_538:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01c9             // vpextrb    ecx, xmm1, 1
-	LONG $0x28244c89                           // mov    dword [rsp + 40], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_539
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x3714; BYTE $0x01 // vpinsrb    xmm2, xmm2, byte [rdi + rsi], 1
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x02c9             // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89                           // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_951
-
-LBB0_540:
-	LONG $0x1479e3c4; WORD $0x03c9 // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89               // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_541
-
-LBB0_952:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x0f14; BYTE $0x03 // vpinsrb    xmm2, xmm2, byte [rdi + r9], 3
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x04c9             // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89                           // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_953
-
-LBB0_542:
-	LONG $0x1479e3c4; WORD $0x05c8 // vpextrb    eax, xmm1, 5
-	LONG $0x18244489               // mov    dword [rsp + 24], eax
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_544
-
-LBB0_543:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069a3c4; WORD $0x2f14; BYTE $0x05 // vpinsrb    xmm2, xmm2, byte [rdi + r13], 5
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-
-LBB0_544:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	LONG $0x1479e3c4; WORD $0x06c9             // vpextrb    ecx, xmm1, 6
-	LONG $0x14244c89                           // mov    dword [rsp + 20], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_545
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x06 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 6
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x07c9             // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_955
-
-LBB0_546:
-	LONG $0x1479e3c4; WORD $0x08ca // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01       // test    dl, 1
-	JE   LBB0_547
-
-LBB0_956:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x08 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 8
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x09c9             // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_957
-
-LBB0_548:
-	LONG $0x1479e3c4; WORD $0x0ace // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_549
-
-LBB0_958:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x0a // vpinsrb    xmm2, xmm2, byte [rdi + rax], 10
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0bc8             // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_959
-
-LBB0_550:
-	LONG $0x1479c3c4; WORD $0x0ccd // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_551
-
-LBB0_960:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0c // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 12
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0dca             // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_961
-
-LBB0_552:
-	LONG $0x1479c3c4; WORD $0x0ecb // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_553
-
-LBB0_962:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000a8249c8b48                   // mov    rbx, qword [rsp + 168]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0e // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 14
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0fce             // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_554
-	JMP  LBB0_555
-
-LBB0_539:
-	LONG $0x1479e3c4; WORD $0x02c9 // vpextrb    ecx, xmm1, 2
-	LONG $0x24244c89               // mov    dword [rsp + 36], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_540
-
-LBB0_951:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x02 // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 2
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x03c9             // vpextrb    ecx, xmm1, 3
-	LONG $0x20244c89                           // mov    dword [rsp + 32], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_952
-
-LBB0_541:
-	LONG $0x1479e3c4; WORD $0x04c9 // vpextrb    ecx, xmm1, 4
-	LONG $0x1c244c89               // mov    dword [rsp + 28], ecx
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_542
-
-LBB0_953:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x04 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 4
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x05c8             // vpextrb    eax, xmm1, 5
-	LONG $0x18244489                           // mov    dword [rsp + 24], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_543
-	JMP  LBB0_544
-
-LBB0_545:
-	LONG $0x1479c3c4; WORD $0x07c9 // vpextrb    r9d, xmm1, 7
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_546
-
-LBB0_955:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x07 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 7
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x08ca             // vpextrb    edx, xmm1, 8
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	JNE  LBB0_956
-
-LBB0_547:
-	LONG $0x1479e3c4; WORD $0x09c9 // vpextrb    ecx, xmm1, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_548
-
-LBB0_957:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2069e3c4; WORD $0x0714; BYTE $0x09 // vpinsrb    xmm2, xmm2, byte [rdi + rax], 9
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479e3c4; WORD $0x0ace             // vpextrb    esi, xmm1, 10
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_958
-
-LBB0_549:
-	LONG $0x1479e3c4; WORD $0x0bc8 // vpextrb    eax, xmm1, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_550
-
-LBB0_959:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000c0249c8b48                   // mov    rbx, qword [rsp + 192]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0b // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 11
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ccd             // vpextrb    r13d, xmm1, 12
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_960
-
-LBB0_551:
-	LONG $0x1479c3c4; WORD $0x0dca // vpextrb    r10d, xmm1, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_552
-
-LBB0_961:
-	LONG $0x397d63c4; WORD $0x01f2             // vextracti128    xmm2, ymm14, 1
-	QUAD $0x000000b0249c8b48                   // mov    rbx, qword [rsp + 176]
-	LONG $0x2069e3c4; WORD $0x1f14; BYTE $0x0d // vpinsrb    xmm2, xmm2, byte [rdi + rbx], 13
-	LONG $0x380d63c4; WORD $0x01f2             // vinserti128    ymm14, ymm14, xmm2, 1
-	LONG $0x1479c3c4; WORD $0x0ecb             // vpextrb    r11d, xmm1, 14
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_962
-
-LBB0_553:
-	LONG $0x1479c3c4; WORD $0x0fce // vpextrb    r14d, xmm1, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_555
-
-LBB0_554:
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	QUAD $0x000000a0249c8b48                   // mov    rbx, qword [rsp + 160]
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 15
-	LONG $0x380d63c4; WORD $0x01f1             // vinserti128    ymm14, ymm14, xmm1, 1
-
-LBB0_555:
-	LONG $0x7175c1c4; WORD $0x06d6             // vpsrlw    ymm1, ymm14, 6
-	QUAD $0x00000080b5db75c5                   // vpand    ymm14, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xf7               // vmovd    r15d, xmm6
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_556
-	LONG $0x7ef961c4; BYTE $0xfb               // vmovq    rbx, xmm15
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm14, 0
-	LONG $0x1479e3c4; WORD $0x01f3             // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_964
-
-LBB0_557:
-	LONG $0x1479e3c4; WORD $0x02f3 // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	QUAD $0x000000e024bc8b4c       // mov    r15, qword [rsp + 224]
-	JE   LBB0_558
-
-LBB0_965:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm14, 2
-	LONG $0x1479e3c4; WORD $0x03f3             // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_966
-
-LBB0_559:
-	LONG $0x1479e3c4; WORD $0x04f3 // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_560
-
-LBB0_967:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm14, 4
-	LONG $0x1479e3c4; WORD $0x05f3             // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_968
-
-LBB0_561:
-	LONG $0x1479e3c4; WORD $0x06f3 // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_562
-
-LBB0_969:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm14, 6
-	LONG $0x1479e3c4; WORD $0x07f3             // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_970
-
-LBB0_563:
-	LONG $0x1479e3c4; WORD $0x08f3 // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_564
-
-LBB0_971:
-	LONG $0x7ef961c4; BYTE $0xe3               // vmovq    rbx, xmm12
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm14, 8
-	LONG $0x1479e3c4; WORD $0x09f3             // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_972
-
-LBB0_565:
-	LONG $0x1479e3c4; WORD $0x0af3 // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_566
-
-LBB0_973:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm14, 10
-	LONG $0x1479e3c4; WORD $0x0bf3             // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_974
-
-LBB0_567:
-	LONG $0x1479e3c4; WORD $0x0cf3 // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_568
-
-LBB0_975:
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm14, 12
-	LONG $0x1479e3c4; WORD $0x0df3             // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_976
-
-LBB0_569:
-	LONG $0x1479e3c4; WORD $0x0ef3 // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_570
-
-LBB0_977:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm14, 14
-	LONG $0x1479e3c4; WORD $0x0ff3             // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_978
-
-LBB0_571:
-	LONG $0x2c2444f6; BYTE $0x01 // test    byte [rsp + 44], 1
-	JE   LBB0_572
-
-LBB0_979:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	LONG $0x282444f6; BYTE $0x01               // test    byte [rsp + 40], 1
-	JNE  LBB0_980
-
-LBB0_573:
-	LONG $0x242444f6; BYTE $0x01 // test    byte [rsp + 36], 1
-	JE   LBB0_574
-
-LBB0_981:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	LONG $0x202444f6; BYTE $0x01               // test    byte [rsp + 32], 1
-	JNE  LBB0_982
-
-LBB0_575:
-	LONG $0x1c2444f6; BYTE $0x01 // test    byte [rsp + 28], 1
-	JE   LBB0_576
-
-LBB0_983:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	LONG $0x182444f6; BYTE $0x01               // test    byte [rsp + 24], 1
-	JNE  LBB0_984
-
-LBB0_577:
-	LONG $0x142444f6; BYTE $0x01 // test    byte [rsp + 20], 1
-	JE   LBB0_578
-
-LBB0_985:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_986
-
-LBB0_579:
-	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
-	QUAD $0x00000128249c8b48 // mov    rbx, qword [rsp + 296]
-	JE   LBB0_580
-
-LBB0_987:
-	LONG $0x7ef961c4; BYTE $0xc2               // vmovq    rdx, xmm8
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_988
-
-LBB0_581:
-	LONG $0x01c6f640         // test    sil, 1
-	QUAD $0x0000013024948b48 // mov    rdx, qword [rsp + 304]
-	JE   LBB0_582
-
-LBB0_989:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	QUAD $0x0000009824b48b48                   // mov    rsi, qword [rsp + 152]
-	JNE  LBB0_990
-
-LBB0_583:
-	LONG $0x01c5f641 // test    r13b, 1
-	JE   LBB0_584
-
-LBB0_991:
-	LONG $0x7ef9e1c4; BYTE $0xf9               // vmovq    rcx, xmm7
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x0000011824ac8b4c                   // mov    r13, qword [rsp + 280]
-	JNE  LBB0_992
-
-LBB0_585:
-	LONG $0x01c3f641 // test    r11b, 1
-	JE   LBB0_586
-
-LBB0_993:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	QUAD $0x0000012024848b48                   // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c                   // mov    r9, qword [rsp + 232]
-	JNE  LBB0_587
-	JMP  LBB0_588
-
-LBB0_556:
-	LONG $0x1479e3c4; WORD $0x01f3 // vpextrb    ebx, xmm6, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_557
-
-LBB0_964:
-	LONG $0x16f963c4; WORD $0x01fb             // vpextrq    rbx, xmm15, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm14, 1
-	LONG $0x1479e3c4; WORD $0x02f3             // vpextrb    ebx, xmm6, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000000e024bc8b4c                   // mov    r15, qword [rsp + 224]
-	JNE  LBB0_965
-
-LBB0_558:
-	LONG $0x1479e3c4; WORD $0x03f3 // vpextrb    ebx, xmm6, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_559
-
-LBB0_966:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm14, 3
-	LONG $0x1479e3c4; WORD $0x04f3             // vpextrb    ebx, xmm6, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_967
-
-LBB0_560:
-	LONG $0x1479e3c4; WORD $0x05f3 // vpextrb    ebx, xmm6, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_561
-
-LBB0_968:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm14, 5
-	LONG $0x1479e3c4; WORD $0x06f3             // vpextrb    ebx, xmm6, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_969
-
-LBB0_562:
-	LONG $0x1479e3c4; WORD $0x07f3 // vpextrb    ebx, xmm6, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_563
-
-LBB0_970:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm14, 7
-	LONG $0x1479e3c4; WORD $0x08f3             // vpextrb    ebx, xmm6, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_971
-
-LBB0_564:
-	LONG $0x1479e3c4; WORD $0x09f3 // vpextrb    ebx, xmm6, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_565
-
-LBB0_972:
-	LONG $0x16f963c4; WORD $0x01e3             // vpextrq    rbx, xmm12, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm14, 9
-	LONG $0x1479e3c4; WORD $0x0af3             // vpextrb    ebx, xmm6, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_973
-
-LBB0_566:
-	LONG $0x1479e3c4; WORD $0x0bf3 // vpextrb    ebx, xmm6, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_567
-
-LBB0_974:
-	LONG $0x397d63c4; WORD $0x01e1             // vextracti128    xmm1, ymm12, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm14, 11
-	LONG $0x1479e3c4; WORD $0x0cf3             // vpextrb    ebx, xmm6, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_975
-
-LBB0_568:
-	LONG $0x1479e3c4; WORD $0x0df3 // vpextrb    ebx, xmm6, 13
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_569
-
-LBB0_976:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm14, 13
-	LONG $0x1479e3c4; WORD $0x0ef3             // vpextrb    ebx, xmm6, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_977
-
-LBB0_570:
-	LONG $0x1479e3c4; WORD $0x0ff3 // vpextrb    ebx, xmm6, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_571
-
-LBB0_978:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x147943c4; WORD $0x1834; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm14, 15
-	LONG $0x2c2444f6; BYTE $0x01               // test    byte [rsp + 44], 1
-	JNE  LBB0_979
-
-LBB0_572:
-	LONG $0x282444f6; BYTE $0x01 // test    byte [rsp + 40], 1
-	JE   LBB0_573
-
-LBB0_980:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	LONG $0x242444f6; BYTE $0x01               // test    byte [rsp + 36], 1
-	JNE  LBB0_981
-
-LBB0_574:
-	LONG $0x202444f6; BYTE $0x01 // test    byte [rsp + 32], 1
-	JE   LBB0_575
-
-LBB0_982:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	LONG $0x1c2444f6; BYTE $0x01               // test    byte [rsp + 28], 1
-	JNE  LBB0_983
-
-LBB0_576:
-	LONG $0x182444f6; BYTE $0x01 // test    byte [rsp + 24], 1
-	JE   LBB0_577
-
-LBB0_984:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	LONG $0x142444f6; BYTE $0x01               // test    byte [rsp + 20], 1
-	JNE  LBB0_985
-
-LBB0_578:
-	LONG $0x01c1f641 // test    r9b, 1
-	JE   LBB0_579
-
-LBB0_986:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	WORD $0xc2f6; BYTE $0x01                   // test    dl, 1
-	QUAD $0x00000128249c8b48                   // mov    rbx, qword [rsp + 296]
-	JNE  LBB0_987
-
-LBB0_580:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_581
-
-LBB0_988:
-	LONG $0x16f963c4; WORD $0x01c1             // vpextrq    rcx, xmm8, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c6f640                           // test    sil, 1
-	QUAD $0x0000013024948b48                   // mov    rdx, qword [rsp + 304]
-	JNE  LBB0_989
-
-LBB0_582:
-	WORD $0x01a8             // test    al, 1
-	QUAD $0x0000009824b48b48 // mov    rsi, qword [rsp + 152]
-	JE   LBB0_583
-
-LBB0_990:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_991
-
-LBB0_584:
-	LONG $0x01c2f641         // test    r10b, 1
-	QUAD $0x0000011824ac8b4c // mov    r13, qword [rsp + 280]
-	JE   LBB0_585
-
-LBB0_992:
-	LONG $0x16f9e3c4; WORD $0x01f9             // vpextrq    rcx, xmm7, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_993
-
-LBB0_586:
-	LONG $0x01c6f641         // test    r14b, 1
-	QUAD $0x0000012024848b48 // mov    rax, qword [rsp + 288]
-	QUAD $0x000000e8248c8b4c // mov    r9, qword [rsp + 232]
-	JE   LBB0_588
-
-LBB0_587:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397d63c4; WORD $0x01f1             // vextracti128    xmm1, ymm14, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-
-LBB0_588:
-	QUAD $0x000240248c6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword [rsp + 576]
-	QUAD $0x000200249ceb75c5; BYTE $0x00 // vpor    ymm11, ymm1, yword [rsp + 512]
-	QUAD $0x0001e02494eb75c5; BYTE $0x00 // vpor    ymm10, ymm1, yword [rsp + 480]
-	QUAD $0x0001802484eb75c5; BYTE $0x00 // vpor    ymm8, ymm1, yword [rsp + 384]
-	QUAD $0x00016024bcebf5c5; BYTE $0x00 // vpor    ymm7, ymm1, yword [rsp + 352]
-	QUAD $0x0001c0248ceb75c5; BYTE $0x00 // vpor    ymm9, ymm1, yword [rsp + 448]
-	QUAD $0x0001a024acebf5c5; BYTE $0x00 // vpor    ymm5, ymm1, yword [rsp + 416]
-	QUAD $0x0001402494ebf5c5; BYTE $0x00 // vpor    ymm2, ymm1, yword [rsp + 320]
-	LONG $0xf9eb5dc5                     // vpor    ymm15, ymm4, ymm1
-	LONG $0x466dc3c4; WORD $0x31df       // vperm2i128    ymm3, ymm2, ymm15, 49
-	LONG $0x386dc3c4; WORD $0x01e7       // vinserti128    ymm4, ymm2, xmm15, 1
-	LONG $0xdbc6dcc5; BYTE $0x88         // vshufps    ymm3, ymm4, ymm3, 136
-	LONG $0x4635e3c4; WORD $0x31e5       // vperm2i128    ymm4, ymm9, ymm5, 49
-	LONG $0x383563c4; WORD $0x01e5       // vinserti128    ymm12, ymm9, xmm5, 1
-	LONG $0xe4c69cc5; BYTE $0x88         // vshufps    ymm4, ymm12, ymm4, 136
-	LONG $0x463d63c4; WORD $0x31e7       // vperm2i128    ymm12, ymm8, ymm7, 49
-	LONG $0x383d63c4; WORD $0x01ef       // vinserti128    ymm13, ymm8, xmm7, 1
-	LONG $0xc61441c4; WORD $0x88e4       // vshufps    ymm12, ymm13, ymm12, 136
-	LONG $0x462543c4; WORD $0x31ea       // vperm2i128    ymm13, ymm11, ymm10, 49
-	LONG $0x382543c4; WORD $0x01f2       // vinserti128    ymm14, ymm11, xmm10, 1
-	LONG $0xc60c41c4; WORD $0x88ed       // vshufps    ymm13, ymm14, ymm13, 136
-	LONG $0x667d41c4; BYTE $0xed         // vpcmpgtd    ymm13, ymm0, ymm13
-	LONG $0x667d41c4; BYTE $0xe4         // vpcmpgtd    ymm12, ymm0, ymm12
-	LONG $0x6b1541c4; BYTE $0xe4         // vpackssdw    ymm12, ymm13, ymm12
-	LONG $0x00fd43c4; WORD $0xd8e4       // vpermq    ymm12, ymm12, 216
-	LONG $0xe466fdc5                     // vpcmpgtd    ymm4, ymm0, ymm4
-	LONG $0xdb66fdc5                     // vpcmpgtd    ymm3, ymm0, ymm3
-	LONG $0xdb6bddc5                     // vpackssdw    ymm3, ymm4, ymm3
-	LONG $0x00fde3c4; WORD $0xd8db       // vpermq    ymm3, ymm3, 216
-	LONG $0xdb639dc5                     // vpacksswb    ymm3, ymm12, ymm3
-	LONG $0xdedbe5c5                     // vpand    ymm3, ymm3, ymm6
-	LONG $0xd97ef9c5                     // vmovd    ecx, xmm3
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JE   LBB0_589
-	LONG $0x787de2c4; WORD $0x1724       // vpbroadcastb    ymm4, byte [rdi + rdx]
-	LONG $0x1479e3c4; WORD $0x01d9       // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01             // test    cl, 1
-	JNE  LBB0_995
-
-LBB0_590:
-	LONG $0x24548b48; BYTE $0x68   // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02d9 // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_592
-
-LBB0_591:
-	LONG $0x2059e3c4; WORD $0x1f34; BYTE $0x02 // vpinsrb    xmm6, xmm4, byte [rdi + rbx], 2
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_592:
-	LONG $0x24748b48; BYTE $0x60               // mov    rsi, qword [rsp + 96]
-	LONG $0x24548b4c; BYTE $0x48               // mov    r10, qword [rsp + 72]
-	LONG $0x1479e3c4; WORD $0x03d9             // vpextrb    ecx, xmm3, 3
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_593
-	QUAD $0x00000110248c8b48                   // mov    rcx, qword [rsp + 272]
-	LONG $0x2059e3c4; WORD $0x0f34; BYTE $0x03 // vpinsrb    xmm6, xmm4, byte [rdi + rcx], 3
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x04d9             // vpextrb    ecx, xmm3, 4
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_997
-
-LBB0_594:
-	LONG $0x1479e3c4; WORD $0x05d9 // vpextrb    ecx, xmm3, 5
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_595
-
-LBB0_998:
-	LONG $0x2059e3c4; WORD $0x1734; BYTE $0x05 // vpinsrb    xmm6, xmm4, byte [rdi + rdx], 5
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x06d9             // vpextrb    ecx, xmm3, 6
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_999
-
-LBB0_596:
-	LONG $0x1479e3c4; WORD $0x07d9 // vpextrb    ecx, xmm3, 7
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_597
-
-LBB0_1000:
-	LONG $0x2059a3c4; WORD $0x0f34; BYTE $0x07 // vpinsrb    xmm6, xmm4, byte [rdi + r9], 7
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x08d9             // vpextrb    ecx, xmm3, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1001
-
-LBB0_598:
-	LONG $0x24548b48; BYTE $0x58   // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09d9 // vpextrb    ecx, xmm3, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_600
-
-LBB0_599:
-	LONG $0x2059a3c4; WORD $0x3f34; BYTE $0x09 // vpinsrb    xmm6, xmm4, byte [rdi + r15], 9
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_600:
-	QUAD $0x0000009024848b48                   // mov    rax, qword [rsp + 144]
-	QUAD $0x0000008824b48b48                   // mov    rsi, qword [rsp + 136]
-	QUAD $0x00000080249c8b48                   // mov    rbx, qword [rsp + 128]
-	LONG $0x244c8b4c; BYTE $0x78               // mov    r9, qword [rsp + 120]
-	LONG $0x1479e3c4; WORD $0x0ad9             // vpextrb    ecx, xmm3, 10
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_601
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0a // vpinsrb    xmm6, xmm4, byte [rdi + rax], 10
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x0bd9             // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1003
-
-LBB0_602:
-	LONG $0x1479e3c4; WORD $0x0cd9 // vpextrb    ecx, xmm3, 12
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_603
-
-LBB0_1004:
-	QUAD $0x000000f824848b48                   // mov    rax, qword [rsp + 248]
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0c // vpinsrb    xmm6, xmm4, byte [rdi + rax], 12
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x0dd9             // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_604
-	JMP  LBB0_605
-
-LBB0_589:
-	LONG $0x1479e3c4; WORD $0x01d9 // vpextrb    ecx, xmm3, 1
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_590
-
-LBB0_995:
-	LONG $0x2059e3c4; WORD $0x3734; BYTE $0x01 // vpinsrb    xmm6, xmm4, byte [rdi + rsi], 1
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x24548b48; BYTE $0x68               // mov    rdx, qword [rsp + 104]
-	LONG $0x1479e3c4; WORD $0x02d9             // vpextrb    ecx, xmm3, 2
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_591
-	JMP  LBB0_592
-
-LBB0_593:
-	LONG $0x1479e3c4; WORD $0x04d9 // vpextrb    ecx, xmm3, 4
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_594
-
-LBB0_997:
-	QUAD $0x00000108248c8b48                   // mov    rcx, qword [rsp + 264]
-	LONG $0x2059e3c4; WORD $0x0f34; BYTE $0x04 // vpinsrb    xmm6, xmm4, byte [rdi + rcx], 4
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x05d9             // vpextrb    ecx, xmm3, 5
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_998
-
-LBB0_595:
-	LONG $0x1479e3c4; WORD $0x06d9 // vpextrb    ecx, xmm3, 6
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_596
-
-LBB0_999:
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x06 // vpinsrb    xmm6, xmm4, byte [rdi + rax], 6
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x07d9             // vpextrb    ecx, xmm3, 7
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1000
-
-LBB0_597:
-	LONG $0x1479e3c4; WORD $0x08d9 // vpextrb    ecx, xmm3, 8
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_598
-
-LBB0_1001:
-	LONG $0x2059e3c4; WORD $0x3734; BYTE $0x08 // vpinsrb    xmm6, xmm4, byte [rdi + rsi], 8
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x24548b48; BYTE $0x58               // mov    rdx, qword [rsp + 88]
-	LONG $0x1479e3c4; WORD $0x09d9             // vpextrb    ecx, xmm3, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_599
-	JMP  LBB0_600
-
-LBB0_601:
-	LONG $0x1479e3c4; WORD $0x0bd9 // vpextrb    ecx, xmm3, 11
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_602
-
-LBB0_1003:
-	QUAD $0x0000010024848b48                   // mov    rax, qword [rsp + 256]
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0b // vpinsrb    xmm6, xmm4, byte [rdi + rax], 11
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-	LONG $0x1479e3c4; WORD $0x0cd9             // vpextrb    ecx, xmm3, 12
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1004
-
-LBB0_603:
-	LONG $0x1479e3c4; WORD $0x0dd9 // vpextrb    ecx, xmm3, 13
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_605
-
-LBB0_604:
-	LONG $0x2059e3c4; WORD $0x1734; BYTE $0x0d // vpinsrb    xmm6, xmm4, byte [rdi + rdx], 13
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_605:
-	LONG $0x24448b48; BYTE $0x50               // mov    rax, qword [rsp + 80]
-	LONG $0x24548b48; BYTE $0x40               // mov    rdx, qword [rsp + 64]
-	LONG $0x1479e3c4; WORD $0x0ed9             // vpextrb    ecx, xmm3, 14
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_607
-	LONG $0x2059e3c4; WORD $0x0734; BYTE $0x0e // vpinsrb    xmm6, xmm4, byte [rdi + rax], 14
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_607:
-	LONG $0x1479e3c4; WORD $0x0fd9             // vpextrb    ecx, xmm3, 15
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_609
-	LONG $0x2059a3c4; WORD $0x1734; BYTE $0x0f // vpinsrb    xmm6, xmm4, byte [rdi + r10], 15
-	LONG $0x025de3c4; WORD $0x0fe6             // vpblendd    ymm4, ymm4, ymm6, 15
-
-LBB0_609:
-	LONG $0x397de3c4; WORD $0x01de             // vextracti128    xmm6, ymm3, 1
-	LONG $0xf07ef9c5                           // vmovd    eax, xmm6
-	LONG $0x00248489; WORD $0x0002; BYTE $0x00 // mov    dword [rsp + 512], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_611
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x00 // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 0
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-
-LBB0_611:
-	LONG $0x24448b48; BYTE $0x38               // mov    rax, qword [rsp + 56]
-	LONG $0x1479e3c4; WORD $0x01f1             // vpextrb    ecx, xmm6, 1
-	LONG $0xe0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 480], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_612
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x370c; BYTE $0x01 // vpinsrb    xmm1, xmm1, byte [rdi + rsi], 1
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	LONG $0xc0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 448], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1006
-
-LBB0_613:
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	LONG $0xa0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 416], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_614
-
-LBB0_1007:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071a3c4; WORD $0x0f0c; BYTE $0x03 // vpinsrb    xmm1, xmm1, byte [rdi + r9], 3
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	LONG $0x80248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 384], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1008
-
-LBB0_615:
-	LONG $0x1479e3c4; WORD $0x05f0             // vpextrb    eax, xmm6, 5
-	LONG $0x60248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 352], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_617
-
-LBB0_616:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071a3c4; WORD $0x2f0c; BYTE $0x05 // vpinsrb    xmm1, xmm1, byte [rdi + r13], 5
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-
-LBB0_617:
-	LONG $0x24448b48; BYTE $0x70               // mov    rax, qword [rsp + 112]
-	QUAD $0x000000b8249c8b48                   // mov    rbx, qword [rsp + 184]
-	QUAD $0x000000b024948b48                   // mov    rdx, qword [rsp + 176]
-	LONG $0x1479e3c4; WORD $0x06f1             // vpextrb    ecx, xmm6, 6
-	LONG $0x40248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 320], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_618
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x06 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 6
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x07f0             // vpextrb    eax, xmm6, 7
-	LONG $0x98248489; WORD $0x0000; BYTE $0x00 // mov    dword [rsp + 152], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_1010
-
-LBB0_619:
-	LONG $0x1479c3c4; WORD $0x08f1 // vpextrb    r9d, xmm6, 8
-	LONG $0x01c1f641               // test    r9b, 1
-	JE   LBB0_620
-
-LBB0_1011:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000d824848b48                   // mov    rax, qword [rsp + 216]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x08 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 8
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x09f1             // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1012
-
-LBB0_621:
-	LONG $0x1479c3c4; WORD $0x0af3 // vpextrb    r11d, xmm6, 10
-	LONG $0x01c3f641               // test    r11b, 1
-	JE   LBB0_622
-
-LBB0_1013:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000c824848b48                   // mov    rax, qword [rsp + 200]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x0a // vpinsrb    xmm1, xmm1, byte [rdi + rax], 10
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x0bf0             // vpextrb    eax, xmm6, 11
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_1014
-
-LBB0_623:
-	LONG $0x1479e3c4; WORD $0x0cf6 // vpextrb    esi, xmm6, 12
-	LONG $0x01c6f640               // test    sil, 1
-	JE   LBB0_624
-
-LBB0_1015:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x0c // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 12
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x0df2             // vpextrb    r10d, xmm6, 13
-	LONG $0x01c2f641                           // test    r10b, 1
-	JNE  LBB0_1016
-
-LBB0_625:
-	QUAD $0x000000a824948b48       // mov    rdx, qword [rsp + 168]
-	LONG $0x1479c3c4; WORD $0x0ef5 // vpextrb    r13d, xmm6, 14
-	LONG $0x01c5f641               // test    r13b, 1
-	JE   LBB0_626
-
-LBB0_1017:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0e // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 14
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	QUAD $0x000000a024948b48                   // mov    rdx, qword [rsp + 160]
-	LONG $0x1479c3c4; WORD $0x0ff6             // vpextrb    r14d, xmm6, 15
-	LONG $0x01c6f641                           // test    r14b, 1
-	JNE  LBB0_627
-	JMP  LBB0_628
-
-LBB0_612:
-	LONG $0x1479e3c4; WORD $0x02f1             // vpextrb    ecx, xmm6, 2
-	LONG $0xc0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 448], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_613
-
-LBB0_1006:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x1f0c; BYTE $0x02 // vpinsrb    xmm1, xmm1, byte [rdi + rbx], 2
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x03f1             // vpextrb    ecx, xmm6, 3
-	LONG $0xa0248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 416], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1007
-
-LBB0_614:
-	LONG $0x1479e3c4; WORD $0x04f1             // vpextrb    ecx, xmm6, 4
-	LONG $0x80248c89; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 384], ecx
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JE   LBB0_615
-
-LBB0_1008:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x04 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 4
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x05f0             // vpextrb    eax, xmm6, 5
-	LONG $0x60248489; WORD $0x0001; BYTE $0x00 // mov    dword [rsp + 352], eax
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_616
-	JMP  LBB0_617
-
-LBB0_618:
-	LONG $0x1479e3c4; WORD $0x07f0             // vpextrb    eax, xmm6, 7
-	LONG $0x98248489; WORD $0x0000; BYTE $0x00 // mov    dword [rsp + 152], eax
-	WORD $0x01a8                               // test    al, 1
-	JE   LBB0_619
-
-LBB0_1010:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000f024848b48                   // mov    rax, qword [rsp + 240]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x07 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 7
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x08f1             // vpextrb    r9d, xmm6, 8
-	LONG $0x01c1f641                           // test    r9b, 1
-	JNE  LBB0_1011
-
-LBB0_620:
-	LONG $0x1479e3c4; WORD $0x09f1 // vpextrb    ecx, xmm6, 9
-	WORD $0xc1f6; BYTE $0x01       // test    cl, 1
-	JE   LBB0_621
-
-LBB0_1012:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000d024848b48                   // mov    rax, qword [rsp + 208]
-	LONG $0x2071e3c4; WORD $0x070c; BYTE $0x09 // vpinsrb    xmm1, xmm1, byte [rdi + rax], 9
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x0af3             // vpextrb    r11d, xmm6, 10
-	LONG $0x01c3f641                           // test    r11b, 1
-	JNE  LBB0_1013
-
-LBB0_622:
-	LONG $0x1479e3c4; WORD $0x0bf0 // vpextrb    eax, xmm6, 11
-	WORD $0x01a8                   // test    al, 1
-	JE   LBB0_623
-
-LBB0_1014:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	QUAD $0x000000c024b48b48                   // mov    rsi, qword [rsp + 192]
-	LONG $0x2071e3c4; WORD $0x370c; BYTE $0x0b // vpinsrb    xmm1, xmm1, byte [rdi + rsi], 11
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	LONG $0x1479e3c4; WORD $0x0cf6             // vpextrb    esi, xmm6, 12
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_1015
-
-LBB0_624:
-	LONG $0x1479c3c4; WORD $0x0df2 // vpextrb    r10d, xmm6, 13
-	LONG $0x01c2f641               // test    r10b, 1
-	JE   LBB0_625
-
-LBB0_1016:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0d // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 13
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-	QUAD $0x000000a824948b48                   // mov    rdx, qword [rsp + 168]
-	LONG $0x1479c3c4; WORD $0x0ef5             // vpextrb    r13d, xmm6, 14
-	LONG $0x01c5f641                           // test    r13b, 1
-	JNE  LBB0_1017
-
-LBB0_626:
-	QUAD $0x000000a024948b48       // mov    rdx, qword [rsp + 160]
-	LONG $0x1479c3c4; WORD $0x0ff6 // vpextrb    r14d, xmm6, 15
-	LONG $0x01c6f641               // test    r14b, 1
-	JE   LBB0_628
-
-LBB0_627:
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x2071e3c4; WORD $0x170c; BYTE $0x0f // vpinsrb    xmm1, xmm1, byte [rdi + rdx], 15
-	LONG $0x385de3c4; WORD $0x01e1             // vinserti128    ymm4, ymm4, xmm1, 1
-
-LBB0_628:
-	LONG $0xd471f5c5; BYTE $0x07               // vpsrlw    ymm1, ymm4, 7
-	QUAD $0x00000080a5dbf5c5                   // vpand    ymm4, ymm1, yword 128[rbp] /* [rip + .LCPI0_4] */
-	LONG $0x7e79c1c4; BYTE $0xdf               // vmovd    r15d, xmm3
-	LONG $0x01c7f641                           // test    r15b, 1
-	JE   LBB0_629
-	LONG $0x7ef961c4; BYTE $0xdb               // vmovq    rbx, xmm11
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm4, 0
-	LONG $0x1479e3c4; WORD $0x01db             // vpextrb    ebx, xmm3, 1
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1019
-
-LBB0_630:
-	LONG $0x1479e3c4; WORD $0x02db // vpextrb    ebx, xmm3, 2
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_631
-
-LBB0_1020:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm4, 2
-	LONG $0x1479e3c4; WORD $0x03db             // vpextrb    ebx, xmm3, 3
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1021
-
-LBB0_632:
-	LONG $0x1479e3c4; WORD $0x04db // vpextrb    ebx, xmm3, 4
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_633
-
-LBB0_1022:
-	LONG $0x7ef961c4; BYTE $0xd3               // vmovq    rbx, xmm10
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm4, 4
-	LONG $0x1479e3c4; WORD $0x05db             // vpextrb    ebx, xmm3, 5
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1023
-
-LBB0_634:
-	LONG $0x1479e3c4; WORD $0x06db // vpextrb    ebx, xmm3, 6
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_635
-
-LBB0_1024:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm4, 6
-	LONG $0x1479e3c4; WORD $0x07db             // vpextrb    ebx, xmm3, 7
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1025
-
-LBB0_636:
-	LONG $0x1479e3c4; WORD $0x08db // vpextrb    ebx, xmm3, 8
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_637
-
-LBB0_1026:
-	LONG $0x7ef961c4; BYTE $0xcb               // vmovq    rbx, xmm9
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x08 // vpextrb    byte [r8 + rbx], xmm4, 8
-	LONG $0x1479e3c4; WORD $0x09db             // vpextrb    ebx, xmm3, 9
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1027
-
-LBB0_638:
-	LONG $0x1479e3c4; WORD $0x0adb // vpextrb    ebx, xmm3, 10
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_639
-
-LBB0_1028:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0a // vpextrb    byte [r8 + rbx], xmm4, 10
-	LONG $0x1479e3c4; WORD $0x0bdb             // vpextrb    ebx, xmm3, 11
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1029
-
-LBB0_640:
-	LONG $0x1479e3c4; WORD $0x0cdb // vpextrb    ebx, xmm3, 12
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_641
-
-LBB0_1030:
-	LONG $0x7ef9e1c4; BYTE $0xeb               // vmovq    rbx, xmm5
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0c // vpextrb    byte [r8 + rbx], xmm4, 12
-	LONG $0x1479e3c4; WORD $0x0ddb             // vpextrb    ebx, xmm3, 13
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	QUAD $0x000380248c6f7dc5; BYTE $0x00       // vmovdqa    ymm9, yword [rsp + 896]
-	JNE  LBB0_1031
-
-LBB0_642:
-	LONG $0x1479e3c4; WORD $0x0edb // vpextrb    ebx, xmm3, 14
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_643
-
-LBB0_1032:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0e // vpextrb    byte [r8 + rbx], xmm4, 14
-	LONG $0x1479e3c4; WORD $0x0fdb             // vpextrb    ebx, xmm3, 15
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1033
-
-LBB0_644:
-	QUAD $0x01000002002484f6             // test    byte [rsp + 512], 1
-	QUAD $0x000340249c6ffdc5; BYTE $0x00 // vmovdqa    ymm3, yword [rsp + 832]
-	JE   LBB0_645
-
-LBB0_1034:
-	LONG $0x7ef961c4; BYTE $0xc3               // vmovq    rbx, xmm8
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x00 // vpextrb    byte [r8 + rbx], xmm1, 0
-	QUAD $0x01000001e02484f6                   // test    byte [rsp + 480], 1
-	JNE  LBB0_1035
-
-LBB0_646:
-	QUAD $0x01000001c02484f6 // test    byte [rsp + 448], 1
-	JE   LBB0_647
-
-LBB0_1036:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x02 // vpextrb    byte [r8 + rbx], xmm1, 2
-	QUAD $0x01000001a02484f6                   // test    byte [rsp + 416], 1
-	JNE  LBB0_1037
-
-LBB0_648:
-	QUAD $0x01000001802484f6 // test    byte [rsp + 384], 1
-	JE   LBB0_649
-
-LBB0_1038:
-	LONG $0x7ef9e1c4; BYTE $0xfb               // vmovq    rbx, xmm7
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x04 // vpextrb    byte [r8 + rbx], xmm1, 4
-	QUAD $0x01000001602484f6                   // test    byte [rsp + 352], 1
-	QUAD $0x00036024846f7dc5; BYTE $0x00       // vmovdqa    ymm8, yword [rsp + 864]
-	JNE  LBB0_1039
-
-LBB0_650:
-	QUAD $0x01000001402484f6 // test    byte [rsp + 320], 1
-	JE   LBB0_651
-
-LBB0_1040:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x7ef9e1c4; BYTE $0xcb               // vmovq    rbx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x06 // vpextrb    byte [r8 + rbx], xmm1, 6
-	QUAD $0x01000000982484f6                   // test    byte [rsp + 152], 1
-	JNE  LBB0_1041
-
-LBB0_652:
-	LONG $0x01c1f641             // test    r9b, 1
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	JE   LBB0_653
-
-LBB0_1042:
-	LONG $0x7ef9e1c4; BYTE $0xd2               // vmovq    rdx, xmm2
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x100c; BYTE $0x08 // vpextrb    byte [r8 + rdx], xmm1, 8
-	WORD $0xc1f6; BYTE $0x01                   // test    cl, 1
-	JNE  LBB0_1043
-
-LBB0_654:
-	LONG $0x01c3f641         // test    r11b, 1
-	QUAD $0x00000130249c8b4c // mov    r11, qword [rsp + 304]
-	JE   LBB0_655
-
-LBB0_1044:
-	LONG $0x397de3c4; WORD $0x01d1             // vextracti128    xmm1, ymm2, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0a // vpextrb    byte [r8 + rcx], xmm1, 10
-	WORD $0x01a8                               // test    al, 1
-	JNE  LBB0_1045
-
-LBB0_656:
-	LONG $0x01c6f640 // test    sil, 1
-	JE   LBB0_657
-
-LBB0_1046:
-	LONG $0x7ef961c4; BYTE $0xf9               // vmovq    rcx, xmm15
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0c // vpextrb    byte [r8 + rcx], xmm1, 12
-	LONG $0x01c2f641                           // test    r10b, 1
-	QUAD $0x00032024946ffdc5; BYTE $0x00       // vmovdqa    ymm2, yword [rsp + 800]
-	JNE  LBB0_1047
-
-LBB0_658:
-	LONG $0x01c5f641             // test    r13b, 1
-	LONG $0x24548b4c; BYTE $0x30 // mov    r10, qword [rsp + 48]
-	JE   LBB0_659
-
-LBB0_1048:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x7ef9e1c4; BYTE $0xc9               // vmovq    rcx, xmm1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0e // vpextrb    byte [r8 + rcx], xmm1, 14
-	LONG $0x01c6f641                           // test    r14b, 1
-	JE   LBB0_25
-	JMP  LBB0_1049
-
-LBB0_629:
-	LONG $0x1479e3c4; WORD $0x01db // vpextrb    ebx, xmm3, 1
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_630
-
-LBB0_1019:
-	LONG $0x16f963c4; WORD $0x01db             // vpextrq    rbx, xmm11, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm4, 1
-	LONG $0x1479e3c4; WORD $0x02db             // vpextrb    ebx, xmm3, 2
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1020
-
-LBB0_631:
-	LONG $0x1479e3c4; WORD $0x03db // vpextrb    ebx, xmm3, 3
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_632
-
-LBB0_1021:
-	LONG $0x397d63c4; WORD $0x01d9             // vextracti128    xmm1, ymm11, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm4, 3
-	LONG $0x1479e3c4; WORD $0x04db             // vpextrb    ebx, xmm3, 4
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1022
-
-LBB0_633:
-	LONG $0x1479e3c4; WORD $0x05db // vpextrb    ebx, xmm3, 5
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_634
-
-LBB0_1023:
-	LONG $0x16f963c4; WORD $0x01d3             // vpextrq    rbx, xmm10, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm4, 5
-	LONG $0x1479e3c4; WORD $0x06db             // vpextrb    ebx, xmm3, 6
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1024
-
-LBB0_635:
-	LONG $0x1479e3c4; WORD $0x07db // vpextrb    ebx, xmm3, 7
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_636
-
-LBB0_1025:
-	LONG $0x397d63c4; WORD $0x01d1             // vextracti128    xmm1, ymm10, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm4, 7
-	LONG $0x1479e3c4; WORD $0x08db             // vpextrb    ebx, xmm3, 8
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1026
-
-LBB0_637:
-	LONG $0x1479e3c4; WORD $0x09db // vpextrb    ebx, xmm3, 9
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_638
-
-LBB0_1027:
-	LONG $0x16f963c4; WORD $0x01cb             // vpextrq    rbx, xmm9, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x09 // vpextrb    byte [r8 + rbx], xmm4, 9
-	LONG $0x1479e3c4; WORD $0x0adb             // vpextrb    ebx, xmm3, 10
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1028
-
-LBB0_639:
-	LONG $0x1479e3c4; WORD $0x0bdb // vpextrb    ebx, xmm3, 11
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_640
-
-LBB0_1029:
-	LONG $0x397d63c4; WORD $0x01c9             // vextracti128    xmm1, ymm9, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0b // vpextrb    byte [r8 + rbx], xmm4, 11
-	LONG $0x1479e3c4; WORD $0x0cdb             // vpextrb    ebx, xmm3, 12
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1030
-
-LBB0_641:
-	LONG $0x1479e3c4; WORD $0x0ddb       // vpextrb    ebx, xmm3, 13
-	WORD $0xc3f6; BYTE $0x01             // test    bl, 1
-	QUAD $0x000380248c6f7dc5; BYTE $0x00 // vmovdqa    ymm9, yword [rsp + 896]
-	JE   LBB0_642
-
-LBB0_1031:
-	LONG $0x16f9e3c4; WORD $0x01eb             // vpextrq    rbx, xmm5, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0d // vpextrb    byte [r8 + rbx], xmm4, 13
-	LONG $0x1479e3c4; WORD $0x0edb             // vpextrb    ebx, xmm3, 14
-	WORD $0xc3f6; BYTE $0x01                   // test    bl, 1
-	JNE  LBB0_1032
-
-LBB0_643:
-	LONG $0x1479e3c4; WORD $0x0fdb // vpextrb    ebx, xmm3, 15
-	WORD $0xc3f6; BYTE $0x01       // test    bl, 1
-	JE   LBB0_644
-
-LBB0_1033:
-	LONG $0x397de3c4; WORD $0x01e9             // vextracti128    xmm1, ymm5, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x1479c3c4; WORD $0x1824; BYTE $0x0f // vpextrb    byte [r8 + rbx], xmm4, 15
-	QUAD $0x01000002002484f6                   // test    byte [rsp + 512], 1
-	QUAD $0x000340249c6ffdc5; BYTE $0x00       // vmovdqa    ymm3, yword [rsp + 832]
-	JNE  LBB0_1034
-
-LBB0_645:
-	QUAD $0x01000001e02484f6 // test    byte [rsp + 480], 1
-	JE   LBB0_646
-
-LBB0_1035:
-	LONG $0x16f963c4; WORD $0x01c3             // vpextrq    rbx, xmm8, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x01 // vpextrb    byte [r8 + rbx], xmm1, 1
-	QUAD $0x01000001c02484f6                   // test    byte [rsp + 448], 1
-	JNE  LBB0_1036
-
-LBB0_647:
-	QUAD $0x01000001a02484f6 // test    byte [rsp + 416], 1
-	JE   LBB0_648
-
-LBB0_1037:
-	LONG $0x397d63c4; WORD $0x01c1             // vextracti128    xmm1, ymm8, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x03 // vpextrb    byte [r8 + rbx], xmm1, 3
-	QUAD $0x01000001802484f6                   // test    byte [rsp + 384], 1
-	JNE  LBB0_1038
-
-LBB0_649:
-	QUAD $0x01000001602484f6             // test    byte [rsp + 352], 1
-	QUAD $0x00036024846f7dc5; BYTE $0x00 // vmovdqa    ymm8, yword [rsp + 864]
-	JE   LBB0_650
-
-LBB0_1039:
-	LONG $0x16f9e3c4; WORD $0x01fb             // vpextrq    rbx, xmm7, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x05 // vpextrb    byte [r8 + rbx], xmm1, 5
-	QUAD $0x01000001402484f6                   // test    byte [rsp + 320], 1
-	JNE  LBB0_1040
-
-LBB0_651:
-	QUAD $0x01000000982484f6 // test    byte [rsp + 152], 1
-	JE   LBB0_652
-
-LBB0_1041:
-	LONG $0x397de3c4; WORD $0x01f9             // vextracti128    xmm1, ymm7, 1
-	LONG $0x16f9e3c4; WORD $0x01cb             // vpextrq    rbx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x180c; BYTE $0x07 // vpextrb    byte [r8 + rbx], xmm1, 7
-	LONG $0x01c1f641                           // test    r9b, 1
-	LONG $0x244c8b44; BYTE $0x10               // mov    r9d, dword [rsp + 16]
-	JNE  LBB0_1042
-
-LBB0_653:
-	WORD $0xc1f6; BYTE $0x01 // test    cl, 1
-	JE   LBB0_654
-
-LBB0_1043:
-	LONG $0x16f9e3c4; WORD $0x01d1             // vpextrq    rcx, xmm2, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x09 // vpextrb    byte [r8 + rcx], xmm1, 9
-	LONG $0x01c3f641                           // test    r11b, 1
-	QUAD $0x00000130249c8b4c                   // mov    r11, qword [rsp + 304]
-	JNE  LBB0_1044
-
-LBB0_655:
-	WORD $0x01a8  // test    al, 1
-	JE   LBB0_656
-
-LBB0_1045:
-	LONG $0x397de3c4; WORD $0x01d1             // vextracti128    xmm1, ymm2, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0b // vpextrb    byte [r8 + rcx], xmm1, 11
-	LONG $0x01c6f640                           // test    sil, 1
-	JNE  LBB0_1046
-
-LBB0_657:
-	LONG $0x01c2f641                     // test    r10b, 1
-	QUAD $0x00032024946ffdc5; BYTE $0x00 // vmovdqa    ymm2, yword [rsp + 800]
-	JE   LBB0_658
-
-LBB0_1047:
-	LONG $0x16f963c4; WORD $0x01f9             // vpextrq    rcx, xmm15, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0d // vpextrb    byte [r8 + rcx], xmm1, 13
-	LONG $0x01c5f641                           // test    r13b, 1
-	LONG $0x24548b4c; BYTE $0x30               // mov    r10, qword [rsp + 48]
-	JNE  LBB0_1048
-
-LBB0_659:
-	LONG $0x01c6f641 // test    r14b, 1
-	JE   LBB0_25
-
-LBB0_1049:
-	LONG $0x397d63c4; WORD $0x01f9             // vextracti128    xmm1, ymm15, 1
-	LONG $0x16f9e3c4; WORD $0x01c9             // vpextrq    rcx, xmm1, 1
-	LONG $0x397de3c4; WORD $0x01e1             // vextracti128    xmm1, ymm4, 1
-	LONG $0x1479c3c4; WORD $0x080c; BYTE $0x0f // vpextrb    byte [r8 + rcx], xmm1, 15
-	JMP  LBB0_25
-
-LBB0_1050:
-	WORD $0x394d; BYTE $0xd4 // cmp    r12, r10
-	JNE  LBB0_1055
-
-LBB0_1051:
-	MOVQ 960(SP), SP
-	VZEROUPPER
+	WORD $0x3941; BYTE $0xca // cmp    r10d, ecx
+	JGE  LBB0_4
+	WORD $0x8945; BYTE $0xd1 // mov    r9d, r10d
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0x0124             // and    al, 1
+	LONG $0x0a048842         // mov    byte [rdx + r9], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x01ce8348         // or    rsi, 1
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8d0             // shr    al, 1
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x02ce8348         // or    rsi, 2
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x02 // shr    al, 2
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x03ce8348         // or    rsi, 3
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x03 // shr    al, 3
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x04ce8348         // or    rsi, 4
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x04 // shr    al, 4
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x05ce8348         // or    rsi, 5
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x05 // shr    al, 5
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	WORD $0x894c; BYTE $0xce // mov    rsi, r9
+	LONG $0x06ce8348         // or    rsi, 6
+	WORD $0xce39             // cmp    esi, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x06 // shr    al, 6
+	WORD $0x0124             // and    al, 1
+	WORD $0x0488; BYTE $0x32 // mov    byte [rdx + rsi], al
+	LONG $0x07c98349         // or    r9, 7
+	WORD $0x3941; BYTE $0xc9 // cmp    r9d, ecx
+	JGE  LBB0_4
+	WORD $0xb60f; BYTE $0x07 // movzx    eax, byte [rdi]
+	WORD $0xe8c0; BYTE $0x07 // shr    al, 7
+	LONG $0x0a048842         // mov    byte [rdx + r9], al
+	JMP  LBB0_4
+
+LBB0_5:
 	RET
-
-LBB0_1052:
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	LONG $0x24548b4c; BYTE $0x30 // mov    r10, qword [rsp + 48]
-	JMP  LBB0_1055
-
-LBB0_1054:
-	LONG $0x244c8b44; BYTE $0x10 // mov    r9d, dword [rsp + 16]
-	JMP  LBB0_1055

From 1ae84fcc06e2315dd66c7d563b7ada9045e173fd Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Tue, 13 Jul 2021 14:48:59 -0400
Subject: [PATCH 546/719] ARROW-13288: [Python] Missing default values of
 kernel options in PyArrow

This PR ensures that default values for kernel options in PyArrow are consistent with the C++ defaults.

Closes #10683 from edponce/ARROW-13288-Missing-default-values-of-kernel-options

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 python/pyarrow/_compute.pyx | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 1b66f74eb74..d3267dc02d7 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -17,6 +17,8 @@
 
 # cython: language_level = 3
 
+import sys
+
 from cython.operator cimport dereference as deref
 
 from collections import namedtuple
@@ -791,12 +793,13 @@ class ExtractRegexOptions(_ExtractRegexOptions):
 
 
 cdef class _SliceOptions(FunctionOptions):
-    def _set_options(self, start, stop, step):
+    def _set_options(self, int64_t start, int64_t stop, int64_t step):
         self.wrapped.reset(new CSliceOptions(start, stop, step))
 
 
 class SliceOptions(_SliceOptions):
-    def __init__(self, start, stop, step=1):
+    def __init__(self, int64_t start, int64_t stop=sys.maxsize,
+                 int64_t step=1):
         self._set_options(start, stop, step)
 
 

From 9e1a723fbb2d770775f873572a5d548ff3fc3f01 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 13 Jul 2021 14:58:42 -0400
Subject: [PATCH 547/719] ARROW-8655: [C++][Python] Preserve partitioning
 information for a discovered Dataset

Closes #10661 from jorisvandenbossche/ARROW-8655-dataset-partitioning-info

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/dataset/discovery.cc           |   3 +-
 cpp/src/arrow/dataset/file_base.cc           |   4 +-
 cpp/src/arrow/dataset/file_base.h            |  11 +-
 cpp/src/arrow/dataset/file_parquet.cc        |   3 +-
 cpp/src/arrow/dataset/partition.h            |   2 +
 python/pyarrow/_dataset.pyx                  |  61 +++++++++++
 python/pyarrow/includes/libarrow_dataset.pxd |   5 +
 python/pyarrow/tests/test_dataset.py         | 102 +++++++++++++++----
 8 files changed, 170 insertions(+), 21 deletions(-)

diff --git a/cpp/src/arrow/dataset/discovery.cc b/cpp/src/arrow/dataset/discovery.cc
index e124c7abd91..0f9d479b9d6 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -274,7 +274,8 @@ Result<std::shared_ptr<Dataset>> FileSystemDatasetFactory::Finish(FinishOptions
     fragments.push_back(fragment);
   }
 
-  return FileSystemDataset::Make(schema, root_partition_, format_, fs_, fragments);
+  return FileSystemDataset::Make(std::move(schema), root_partition_, format_, fs_,
+                                 std::move(fragments), std::move(partitioning));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 3a67ea48378..d4f7e1e28b8 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -197,12 +197,14 @@ struct FileSystemDataset::FragmentSubtrees {
 Result<std::shared_ptr<FileSystemDataset>> FileSystemDataset::Make(
     std::shared_ptr<Schema> schema, compute::Expression root_partition,
     std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
-    std::vector<std::shared_ptr<FileFragment>> fragments) {
+    std::vector<std::shared_ptr<FileFragment>> fragments,
+    std::shared_ptr<Partitioning> partitioning) {
   std::shared_ptr<FileSystemDataset> out(
       new FileSystemDataset(std::move(schema), std::move(root_partition)));
   out->format_ = std::move(format);
   out->filesystem_ = std::move(filesystem);
   out->fragments_ = std::move(fragments);
+  out->partitioning_ = std::move(partitioning);
   out->SetupSubtreePruning();
   return out;
 }
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index f074e0f81da..36f110cb44d 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -228,6 +228,9 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   /// \param[in] filesystem the filesystem of each FileFragment, or nullptr if the
   ///            fragments wrap buffers.
   /// \param[in] fragments list of fragments to create the dataset from.
+  /// \param[in] partitioning the Partitioning object in case the dataset is created
+  ///            with a known partitioning (e.g. from a discovered partitioning
+  ///            through a DatasetFactory), or nullptr if not known.
   ///
   /// Note that fragments wrapping files resident in differing filesystems are not
   /// permitted; to work with multiple filesystems use a UnionDataset.
@@ -236,7 +239,8 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   static Result<std::shared_ptr<FileSystemDataset>> Make(
       std::shared_ptr<Schema> schema, compute::Expression root_partition,
       std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
-      std::vector<std::shared_ptr<FileFragment>> fragments);
+      std::vector<std::shared_ptr<FileFragment>> fragments,
+      std::shared_ptr<Partitioning> partitioning = NULLPTR);
 
   /// \brief Write a dataset.
   static Status Write(const FileSystemDatasetWriteOptions& write_options,
@@ -258,6 +262,10 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   /// \brief Return the filesystem. May be nullptr if the fragments wrap buffers.
   const std::shared_ptr<fs::FileSystem>& filesystem() const { return filesystem_; }
 
+  /// \brief Return the partitioning. May be nullptr if the dataset was not constructed
+  /// with a partitioning.
+  const std::shared_ptr<Partitioning>& partitioning() const { return partitioning_; }
+
   std::string ToString() const;
 
  protected:
@@ -277,6 +285,7 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
   std::shared_ptr<FileFormat> format_;
   std::shared_ptr<fs::FileSystem> filesystem_;
   std::vector<std::shared_ptr<FileFragment>> fragments_;
+  std::shared_ptr<Partitioning> partitioning_;
 
   std::shared_ptr<FragmentSubtrees> subtrees_;
 };
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 9e29926e837..30ebc304749 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -952,7 +952,8 @@ Result<std::shared_ptr<Dataset>> ParquetDatasetFactory::Finish(FinishOptions opt
 
   ARROW_ASSIGN_OR_RAISE(auto fragments, CollectParquetFragments(*partitioning));
   return FileSystemDataset::Make(std::move(schema), compute::literal(true), format_,
-                                 filesystem_, std::move(fragments));
+                                 filesystem_, std::move(fragments),
+                                 std::move(partitioning));
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/partition.h b/cpp/src/arrow/dataset/partition.h
index db3008f1d67..c074010e8e9 100644
--- a/cpp/src/arrow/dataset/partition.h
+++ b/cpp/src/arrow/dataset/partition.h
@@ -172,6 +172,8 @@ class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
 
   Result<std::string> Format(const compute::Expression& expr) const override;
 
+  const ArrayVector& dictionaries() const { return dictionaries_; }
+
  protected:
   KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
                        KeyValuePartitioningOptions options)
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 562b7a5a3ad..0a179bdbe15 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -669,6 +669,25 @@ cdef class FileSystemDataset(Dataset):
     def filesystem(self):
         return FileSystem.wrap(self.filesystem_dataset.filesystem())
 
+    @property
+    def partitioning(self):
+        """
+        The partitioning of the Dataset source, if discovered.
+
+        If the FileSystemDataset is created using the ``dataset()`` factory
+        function with a partitioning specified, this will return the
+        finalized Partitioning object from the dataset discovery. In all
+        other cases, this returns None.
+        """
+        c_partitioning = self.filesystem_dataset.partitioning()
+        if c_partitioning.get() == nullptr:
+            return None
+        try:
+            return Partitioning.wrap(c_partitioning)
+        except TypeError:
+            # e.g. type_name "default"
+            return None
+
     cdef void init(self, const shared_ptr[CDataset]& sp):
         Dataset.init(self, sp)
         self.filesystem_dataset = <CFileSystemDataset*> sp.get()
@@ -2083,6 +2102,27 @@ cdef class DirectoryPartitioning(Partitioning):
         return PartitioningFactory.wrap(
             CDirectoryPartitioning.MakeFactory(c_field_names, c_options))
 
+    @property
+    def dictionaries(self):
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If not available, this returns None.
+        """
+        cdef vector[shared_ptr[CArray]] c_arrays
+        c_arrays = self.directory_partitioning.dictionaries()
+        res = []
+        for arr in c_arrays:
+            if arr.get() == nullptr:
+                # Partitioning object has not been created through
+                # inspected Factory
+                return None
+            res.append(pyarrow_wrap_array(arr))
+        return res
+
 
 cdef class HivePartitioning(Partitioning):
     """
@@ -2214,6 +2254,27 @@ cdef class HivePartitioning(Partitioning):
         return PartitioningFactory.wrap(
             CHivePartitioning.MakeFactory(c_options))
 
+    @property
+    def dictionaries(self):
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If not available, this returns None.
+        """
+        cdef vector[shared_ptr[CArray]] c_arrays
+        c_arrays = self.hive_partitioning.dictionaries()
+        res = []
+        for arr in c_arrays:
+            if arr.get() == nullptr:
+                # Partitioning object has not been created through
+                # inspected Factory
+                return None
+            res.append(pyarrow_wrap_array(arr))
+        return res
+
 
 cdef class DatasetFactory(_Weakrefable):
     """
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 303285905cd..ede5775425f 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -283,6 +283,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         vector[c_string] files()
         const shared_ptr[CFileFormat]& format() const
         const shared_ptr[CFileSystem]& filesystem() const
+        const shared_ptr[CPartitioning]& partitioning() const
 
     cdef cppclass CParquetFileFormatReaderOptions \
             "arrow::dataset::ParquetFileFormat::ReaderOptions":
@@ -372,6 +373,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioningFactory] MakeFactory(
             vector[c_string] field_names, CPartitioningFactoryOptions)
 
+        vector[shared_ptr[CArray]] dictionaries() const
+
     cdef cppclass CHivePartitioning \
             "arrow::dataset::HivePartitioning"(CPartitioning):
         CHivePartitioning(shared_ptr[CSchema] schema,
@@ -382,6 +385,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioningFactory] MakeFactory(
             CHivePartitioningFactoryOptions)
 
+        vector[shared_ptr[CArray]] dictionaries() const
+
     cdef cppclass CPartitioningOrFactory \
             "arrow::dataset::PartitioningOrFactory":
         CPartitioningOrFactory(shared_ptr[CPartitioning])
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3c79d1281cd..00832069e87 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -496,6 +496,7 @@ def test_partitioning():
             pa.field('key', pa.float64())
         ])
     )
+    assert partitioning.dictionaries is None
     expr = partitioning.parse('/3/3.14')
     assert isinstance(expr, ds.Expression)
 
@@ -516,6 +517,7 @@ def test_partitioning():
         ]),
         null_fallback='xyz'
     )
+    assert partitioning.dictionaries is None
     expr = partitioning.parse('/alpha=0/beta=3')
     expected = (
         (ds.field('alpha') == ds.scalar(0)) &
@@ -1650,6 +1652,7 @@ def test_directory_partitioning_dictionary_key(mockfs):
     dataset = ds.dataset(
         "subdir", format="parquet", filesystem=mockfs, partitioning=part
     )
+    assert dataset.partitioning.schema == schema
     table = dataset.to_table()
 
     assert table.column('group').type.equals(schema.types[0])
@@ -1669,6 +1672,7 @@ def test_hive_partitioning_dictionary_key(multisourcefs):
     dataset = ds.dataset(
         "hive", format="parquet", filesystem=multisourcefs, partitioning=part
     )
+    assert dataset.partitioning.schema == schema
     table = dataset.to_table()
 
     year_dictionary = list(range(2006, 2011))
@@ -1999,39 +2003,46 @@ def test_scan_iterator(use_threads, use_async):
             scanner.to_table()
 
 
-@pytest.mark.parquet
-def test_open_dataset_partitioned_directory(tempdir, dataset_reader):
+def _create_partitioned_dataset(basedir):
     import pyarrow.parquet as pq
     table = pa.table({'a': range(9), 'b': [0.] * 4 + [1.] * 5})
 
-    path = tempdir / "dataset"
+    path = basedir / "dataset-partitioned"
     path.mkdir()
 
-    for part in range(3):
-        part = path / "part={}".format(part)
+    for i in range(3):
+        part = path / "part={}".format(i)
         part.mkdir()
-        pq.write_table(table, part / "test.parquet")
+        pq.write_table(table.slice(3*i, 3), part / "test.parquet")
+
+    full_table = table.append_column(
+        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int32()))
+
+    return full_table, path
+
+
+@pytest.mark.parquet
+def test_open_dataset_partitioned_directory(tempdir, dataset_reader):
+    full_table, path = _create_partitioned_dataset(tempdir)
 
     # no partitioning specified, just read all individual files
-    full_table = pa.concat_tables([table] * 3)
-    _check_dataset_from_path(path, full_table, dataset_reader)
+    table = full_table.select(['a', 'b'])
+    _check_dataset_from_path(path, table, dataset_reader)
 
     # specify partition scheme with discovery
     dataset = ds.dataset(
         str(path), partitioning=ds.partitioning(flavor="hive"))
-    expected_schema = table.schema.append(pa.field("part", pa.int32()))
-    assert dataset.schema.equals(expected_schema)
+    assert dataset.schema.equals(full_table.schema)
 
     # specify partition scheme with discovery and relative path
     with change_cwd(tempdir):
-        dataset = ds.dataset(
-            "dataset/", partitioning=ds.partitioning(flavor="hive"))
-        expected_schema = table.schema.append(pa.field("part", pa.int32()))
-        assert dataset.schema.equals(expected_schema)
+        dataset = ds.dataset("dataset-partitioned/",
+                             partitioning=ds.partitioning(flavor="hive"))
+        assert dataset.schema.equals(full_table.schema)
 
     # specify partition scheme with string short-cut
     dataset = ds.dataset(str(path), partitioning="hive")
-    assert dataset.schema.equals(expected_schema)
+    assert dataset.schema.equals(full_table.schema)
 
     # specify partition scheme with explicit scheme
     dataset = ds.dataset(
@@ -2042,8 +2053,8 @@ def test_open_dataset_partitioned_directory(tempdir, dataset_reader):
     assert dataset.schema.equals(expected_schema)
 
     result = dataset.to_table()
-    expected = full_table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 9), type=pa.int8()))
+    expected = table.append_column(
+        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int8()))
     assert result.equals(expected)
 
 
@@ -2960,6 +2971,63 @@ def test_dataset_project_columns(tempdir, dataset_reader):
         dataset_reader.to_table(dataset, columns={"A": "A"})
 
 
+@pytest.mark.pandas
+@pytest.mark.parquet
+def test_dataset_preserved_partitioning(tempdir):
+    # ARROW-8655
+
+    # through discovery, but without partitioning
+    _, path = _create_single_file(tempdir)
+    dataset = ds.dataset(path)
+    assert dataset.partitioning is None
+
+    # through discovery, with hive partitioning but not specified
+    full_table, path = _create_partitioned_dataset(tempdir)
+    dataset = ds.dataset(path)
+    assert dataset.partitioning is None
+
+    # through discovery, with hive partitioning (from a partitioning factory)
+    dataset = ds.dataset(path, partitioning="hive")
+    part = dataset.partitioning
+    assert part is not None
+    assert isinstance(part, ds.HivePartitioning)
+    assert part.schema == pa.schema([("part", pa.int32())])
+    assert len(part.dictionaries) == 1
+    assert part.dictionaries[0] == pa.array([0, 1, 2], pa.int32())
+
+    # through discovery, with hive partitioning (from a partitioning object)
+    part = ds.partitioning(pa.schema([("part", pa.int32())]), flavor="hive")
+    assert isinstance(part, ds.HivePartitioning)  # not a factory
+    assert part.dictionaries is None
+    dataset = ds.dataset(path, partitioning=part)
+    part = dataset.partitioning
+    assert isinstance(part, ds.HivePartitioning)
+    assert part.schema == pa.schema([("part", pa.int32())])
+    # TODO is this expected?
+    assert part.dictionaries is None
+
+    # through manual creation -> not available
+    dataset = ds.dataset(path, partitioning="hive")
+    dataset2 = ds.FileSystemDataset(
+        list(dataset.get_fragments()), schema=dataset.schema,
+        format=dataset.format, filesystem=dataset.filesystem
+    )
+    assert dataset2.partitioning is None
+
+    # through discovery with ParquetDatasetFactory
+    root_path = tempdir / "data-partitioned-metadata"
+    metadata_path, _ = _create_parquet_dataset_partitioned(root_path)
+    dataset = ds.parquet_dataset(metadata_path, partitioning="hive")
+    part = dataset.partitioning
+    assert part is not None
+    assert isinstance(part, ds.HivePartitioning)
+    assert part.schema == pa.schema([("part", pa.string())])
+    assert len(part.dictionaries) == 1
+    # will be fixed by ARROW-13153 (order is not preserved at the moment)
+    # assert part.dictionaries[0] == pa.array(["a", "b"], pa.string())
+    assert set(part.dictionaries[0].to_pylist()) == {"a", "b"}
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_write_to_dataset_given_null_just_works(tempdir):

From cba7f48126e02eddf1ceefb7e76b1b8bb2576cb3 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Tue, 13 Jul 2021 15:14:23 -0400
Subject: [PATCH 548/719] ARROW-13321 - [C++][Python] MakeArrayFromScalar
 doesn't work for FixedSizeBinaryType

This fixes ARROW-13321 and adds unboxing capability for FSB type

Closes #10708 from nirandaperera/ARROW-13321

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/array/array_test.cc               |  6 ++++++
 cpp/src/arrow/array/util.cc                     | 17 +++++++++++------
 .../arrow/compute/kernels/codegen_internal.h    |  2 +-
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 682baab208d..9bc38f2f6ad 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -471,6 +471,12 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
       ASSERT_OK(array->ValidateFull());
       ASSERT_EQ(array->length(), length);
       ASSERT_EQ(array->null_count(), 0);
+
+      // test case for ARROW-13321
+      for (int64_t i : std::vector<int64_t>{0, length / 2, length - 1}) {
+        ASSERT_OK_AND_ASSIGN(auto s, array->GetScalar(i));
+        AssertScalarsEqual(*s, *scalar, /*verbose=*/true);
+      }
     }
   }
 
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index d4852234cd0..688cb20cb9a 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -510,16 +510,21 @@ class RepeatedArrayFactory {
   }
 
   template <typename T>
-  enable_if_t<is_number_type<T>::value || is_fixed_size_binary_type<T>::value ||
-                  is_temporal_type<T>::value,
-              Status>
-  Visit(const T&) {
+  enable_if_t<is_number_type<T>::value || is_temporal_type<T>::value, Status> Visit(
+      const T&) {
     auto value = checked_cast<const typename TypeTraits<T>::ScalarType&>(scalar_).value;
     return FinishFixedWidth(&value, sizeof(value));
   }
 
-  Status Visit(const Decimal128Type&) {
-    auto value = checked_cast<const Decimal128Scalar&>(scalar_).value.ToBytes();
+  Status Visit(const FixedSizeBinaryType& type) {
+    auto value = checked_cast<const FixedSizeBinaryScalar&>(scalar_).value;
+    return FinishFixedWidth(value->data(), type.byte_width());
+  }
+
+  template <typename T>
+  enable_if_decimal<T, Status> Visit(const T&) {
+    using ScalarType = typename TypeTraits<T>::ScalarType;
+    auto value = checked_cast<const ScalarType&>(scalar_).value.ToBytes();
     return FinishFixedWidth(value.data(), value.size());
   }
 
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index a68bb970b4a..33b7006491a 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -311,7 +311,7 @@ struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
 };
 
 template <typename Type>
-struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+struct UnboxScalar<Type, enable_if_has_string_view<Type>> {
   static util::string_view Unbox(const Scalar& val) {
     if (!val.is_valid) return util::string_view();
     return util::string_view(*checked_cast<const BaseBinaryScalar&>(val).value);

From 7114c4b6fdb639b3500d77cfd66649af8c5c5e6b Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 13 Jul 2021 16:58:26 -0400
Subject: [PATCH 549/719] ARROW-13313: [C++][Compute] Add scalar aggregate node

This is a pretty trivial node but it's needed for completeness and will give bindings a pipeline breaker to experiment with until #10660 merges

Closes #10705 from bkietz/13313-Add-ScalarAggregateNode

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec.cc                 |  16 ++
 cpp/src/arrow/compute/exec.h                  |   3 +
 cpp/src/arrow/compute/exec/exec_plan.cc       | 229 +++++++++++++++++-
 cpp/src/arrow/compute/exec/exec_plan.h        |  20 +-
 cpp/src/arrow/compute/exec/plan_test.cc       |  34 ++-
 cpp/src/arrow/compute/exec/test_util.cc       |   2 +-
 cpp/src/arrow/compute/kernel.cc               |  20 ++
 cpp/src/arrow/compute/kernel.h                |  10 +
 .../kernels/aggregate_basic_internal.h        |   4 +-
 cpp/src/arrow/dataset/scanner.cc              | 113 +++++++--
 cpp/src/arrow/dataset/scanner.h               |  16 +-
 cpp/src/arrow/dataset/scanner_test.cc         |  81 +++++++
 cpp/src/arrow/dataset/test_util.h             |   2 +-
 cpp/src/arrow/testing/matchers.h              |   5 +-
 cpp/src/arrow/util/async_generator.h          |   2 +-
 cpp/src/arrow/util/async_generator_test.cc    |   2 +-
 16 files changed, 514 insertions(+), 45 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 8a469e3fe12..63f8d39f551 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -141,6 +141,22 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values) {
   return ExecBatch(std::move(values), length);
 }
 
+Result<std::shared_ptr<RecordBatch>> ExecBatch::ToRecordBatch(
+    std::shared_ptr<Schema> schema, MemoryPool* pool) const {
+  ArrayVector columns(schema->num_fields());
+
+  for (size_t i = 0; i < columns.size(); ++i) {
+    const Datum& value = values[i];
+    if (value.is_array()) {
+      columns[i] = value.make_array();
+      continue;
+    }
+    ARROW_ASSIGN_OR_RAISE(columns[i], MakeArrayFromScalar(*value.scalar(), length, pool));
+  }
+
+  return RecordBatch::Make(std::move(schema), length, std::move(columns));
+}
+
 namespace {
 
 Result<std::shared_ptr<Buffer>> AllocateDataBuffer(KernelContext* ctx, int64_t length,
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index 77d04b86ceb..de1b695de48 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -182,6 +182,9 @@ struct ARROW_EXPORT ExecBatch {
 
   static Result<ExecBatch> Make(std::vector<Datum> values);
 
+  Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
+      std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
+
   /// The values representing positional arguments to be passed to a kernel's
   /// exec function for processing.
   std::vector<Datum> values;
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index d0d50af1ac7..35e4af3889a 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -18,12 +18,17 @@
 #include "arrow/compute/exec/exec_plan.h"
 
 #include <mutex>
+#include <thread>
+#include <unordered_map>
 #include <unordered_set>
 
+#include "arrow/array/util.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec/expression.h"
+#include "arrow/compute/registry.h"
 #include "arrow/datum.h"
+#include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
@@ -33,6 +38,7 @@
 namespace arrow {
 
 using internal::checked_cast;
+using internal::checked_pointer_cast;
 
 namespace compute {
 
@@ -489,15 +495,23 @@ struct ProjectNode : ExecNode {
 };
 
 Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
-                                  std::vector<Expression> exprs) {
+                                  std::vector<Expression> exprs,
+                                  std::vector<std::string> names) {
   FieldVector fields(exprs.size());
 
+  if (names.size() == 0) {
+    names.resize(exprs.size());
+    for (size_t i = 0; i < exprs.size(); ++i) {
+      names[i] = exprs[i].ToString();
+    }
+  }
+
   int i = 0;
   for (auto& expr : exprs) {
     if (!expr.IsBound()) {
       ARROW_ASSIGN_OR_RAISE(expr, expr.Bind(*input->output_schema()));
     }
-    fields[i] = field(expr.ToString(), expr.type());
+    fields[i] = field(std::move(names[i]), expr.type());
     ++i;
   }
 
@@ -552,15 +566,16 @@ struct SinkNode : ExecNode {
     ++num_received_;
     if (num_received_ == emit_stop_) {
       lock.unlock();
+      producer_.Push(std::move(batch));
       Finish();
-      lock.lock();
+      return;
     }
 
     if (emit_stop_ != -1) {
       DCHECK_LE(seq_num, emit_stop_);
     }
-    lock.unlock();
 
+    lock.unlock();
     producer_.Push(std::move(batch));
   }
 
@@ -574,8 +589,10 @@ struct SinkNode : ExecNode {
   void InputFinished(ExecNode* input, int seq_stop) override {
     std::unique_lock<std::mutex> lock(mutex_);
     emit_stop_ = seq_stop;
-    lock.unlock();
-    Finish();
+    if (num_received_ == emit_stop_) {
+      lock.unlock();
+      Finish();
+    }
   }
 
  private:
@@ -601,5 +618,205 @@ AsyncGenerator<util::optional<ExecBatch>> MakeSinkNode(ExecNode* input,
   return out;
 }
 
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema> schema,
+    std::function<Future<util::optional<ExecBatch>>()> gen, MemoryPool* pool) {
+  struct Impl : RecordBatchReader {
+    std::shared_ptr<Schema> schema() const override { return schema_; }
+
+    Status ReadNext(std::shared_ptr<RecordBatch>* record_batch) override {
+      ARROW_ASSIGN_OR_RAISE(auto batch, iterator_.Next());
+      if (batch) {
+        ARROW_ASSIGN_OR_RAISE(*record_batch, batch->ToRecordBatch(schema_, pool_));
+      } else {
+        *record_batch = IterationEnd<std::shared_ptr<RecordBatch>>();
+      }
+      return Status::OK();
+    }
+
+    MemoryPool* pool_;
+    std::shared_ptr<Schema> schema_;
+    Iterator<util::optional<ExecBatch>> iterator_;
+  };
+
+  auto out = std::make_shared<Impl>();
+  out->pool_ = pool;
+  out->schema_ = std::move(schema);
+  out->iterator_ = MakeGeneratorIterator(std::move(gen));
+  return out;
+}
+
+struct ScalarAggregateNode : ExecNode {
+  ScalarAggregateNode(ExecNode* input, std::string label,
+                      std::shared_ptr<Schema> output_schema,
+                      std::vector<const ScalarAggregateKernel*> kernels,
+                      std::vector<std::vector<std::unique_ptr<KernelState>>> states)
+      : ExecNode(input->plan(), std::move(label), {input}, {"target"},
+                 /*output_schema=*/std::move(output_schema),
+                 /*num_outputs=*/1),
+        kernels_(std::move(kernels)),
+        states_(std::move(states)) {}
+
+  const char* kind_name() override { return "ScalarAggregateNode"; }
+
+  Status DoConsume(const ExecBatch& batch, size_t thread_index) {
+    for (size_t i = 0; i < kernels_.size(); ++i) {
+      KernelContext batch_ctx{plan()->exec_context()};
+      batch_ctx.SetState(states_[i][thread_index].get());
+      ExecBatch single_column_batch{{batch.values[i]}, batch.length};
+      RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch));
+    }
+    return Status::OK();
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    auto it =
+        thread_indices_.emplace(std::this_thread::get_id(), thread_indices_.size()).first;
+    ++num_received_;
+    auto thread_index = it->second;
+
+    lock.unlock();
+
+    Status st = DoConsume(std::move(batch), thread_index);
+    if (!st.ok()) {
+      outputs_[0]->ErrorReceived(this, std::move(st));
+      return;
+    }
+
+    lock.lock();
+    st = MaybeFinish(&lock);
+    if (!st.ok()) {
+      outputs_[0]->ErrorReceived(this, std::move(st));
+    }
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int seq) override {
+    DCHECK_EQ(input, inputs_[0]);
+    std::unique_lock<std::mutex> lock(mutex_);
+    num_total_ = seq;
+    Status st = MaybeFinish(&lock);
+
+    if (!st.ok()) {
+      outputs_[0]->ErrorReceived(this, std::move(st));
+    }
+  }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+    // Scalar aggregates will only output a single batch
+    outputs_[0]->InputFinished(this, 1);
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+    StopProducing();
+  }
+
+  void StopProducing() override {
+    inputs_[0]->StopProducing(this);
+    finished_.MarkFinished();
+  }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  Status MaybeFinish(std::unique_lock<std::mutex>* lock) {
+    if (num_received_ != num_total_) return Status::OK();
+
+    if (finished_.is_finished()) return Status::OK();
+
+    ExecBatch batch{{}, 1};
+    batch.values.resize(kernels_.size());
+
+    for (size_t i = 0; i < kernels_.size(); ++i) {
+      KernelContext ctx{plan()->exec_context()};
+      ARROW_ASSIGN_OR_RAISE(auto merged, ScalarAggregateKernel::MergeAll(
+                                             kernels_[i], &ctx, std::move(states_[i])));
+      RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i]));
+    }
+    lock->unlock();
+
+    outputs_[0]->InputReceived(this, 0, batch);
+
+    finished_.MarkFinished();
+    return Status::OK();
+  }
+
+  Future<> finished_ = Future<>::MakeFinished();
+  std::vector<const ScalarAggregateKernel*> kernels_;
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
+  std::unordered_map<std::thread::id, size_t> thread_indices_;
+  std::mutex mutex_;
+  int num_received_ = 0, num_total_;
+};
+
+Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+                                          std::vector<internal::Aggregate> aggregates) {
+  if (input->output_schema()->num_fields() != static_cast<int>(aggregates.size())) {
+    return Status::Invalid("Provided ", aggregates.size(),
+                           " aggregates, expected one for each field of ",
+                           input->output_schema()->ToString());
+  }
+
+  auto exec_ctx = input->plan()->exec_context();
+
+  std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
+  FieldVector fields(kernels.size());
+
+  for (size_t i = 0; i < kernels.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto function,
+                          exec_ctx->func_registry()->GetFunction(aggregates[i].function));
+
+    if (function->kind() != Function::SCALAR_AGGREGATE) {
+      return Status::Invalid("Provided non ScalarAggregateFunction ",
+                             aggregates[i].function);
+    }
+
+    auto in_type = ValueDescr::Array(input->output_schema()->fields()[i]->type());
+
+    ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, function->DispatchExact({in_type}));
+    kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel);
+
+    if (aggregates[i].options == nullptr) {
+      aggregates[i].options = function->default_options();
+    }
+
+    KernelContext kernel_ctx{exec_ctx};
+    states[i].resize(exec_ctx->executor() ? exec_ctx->executor()->GetCapacity() : 1);
+    RETURN_NOT_OK(Kernel::InitAll(&kernel_ctx,
+                                  KernelInitArgs{kernels[i],
+                                                 {
+                                                     in_type,
+                                                 },
+                                                 aggregates[i].options},
+                                  &states[i]));
+
+    // pick one to resolve the kernel signature
+    kernel_ctx.SetState(states[i][0].get());
+    ARROW_ASSIGN_OR_RAISE(
+        auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
+
+    fields[i] = field(aggregates[i].function, std::move(descr.type));
+  }
+
+  return input->plan()->EmplaceNode<ScalarAggregateNode>(
+      input, std::move(label), schema(std::move(fields)), std::move(kernels),
+      std::move(states));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
index 6c29ddfa7a6..c36c174af05 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -22,6 +22,7 @@
 #include <string>
 #include <vector>
 
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/type_fwd.h"
@@ -243,12 +244,19 @@ ExecNode* MakeSourceNode(ExecPlan* plan, std::string label,
 
 /// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
 ///
-/// Emitted batches will not be ordered; instead they will be tagged with the `seq` at
-/// which they were received.
+/// Emitted batches will not be ordered.
 ARROW_EXPORT
 std::function<Future<util::optional<ExecBatch>>()> MakeSinkNode(ExecNode* input,
                                                                 std::string label);
 
+/// \brief Wrap an ExecBatch generator in a RecordBatchReader.
+///
+/// The RecordBatchReader does not impose any ordering on emitted batches.
+ARROW_EXPORT
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema>, std::function<Future<util::optional<ExecBatch>>()>,
+    MemoryPool*);
+
 /// \brief Make a node which excludes some rows from batches passed through it
 ///
 /// The filter Expression will be evaluated against each batch which is pushed to
@@ -265,9 +273,15 @@ Result<ExecNode*> MakeFilterNode(ExecNode* input, std::string label, Expression
 /// this node to produce a corresponding output column.
 ///
 /// If exprs are not already bound, they will be bound against the input's schema.
+/// If names are not provided, the string representations of exprs will be used.
 ARROW_EXPORT
 Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
-                                  std::vector<Expression> exprs);
+                                  std::vector<Expression> exprs,
+                                  std::vector<std::string> names = {});
+
+ARROW_EXPORT
+Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
+                                          std::vector<internal::Aggregate> aggregates);
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 9ebafc42668..7d412e67c5c 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -206,7 +206,7 @@ Result<ExecNode*> MakeTestSourceNode(ExecPlan* plan, std::string label,
                                      bool slow) {
   DCHECK_GT(batches_with_schema.batches.size(), 0);
 
-  auto opt_batches = internal::MapVector(
+  auto opt_batches = ::arrow::internal::MapVector(
       [](ExecBatch batch) { return util::make_optional(std::move(batch)); },
       std::move(batches_with_schema.batches));
 
@@ -216,10 +216,10 @@ Result<ExecNode*> MakeTestSourceNode(ExecPlan* plan, std::string label,
     // emulate batches completing initial decode-after-scan on a cpu thread
     ARROW_ASSIGN_OR_RAISE(
         gen, MakeBackgroundGenerator(MakeVectorIterator(std::move(opt_batches)),
-                                     internal::GetCpuThreadPool()));
+                                     ::arrow::internal::GetCpuThreadPool()));
 
     // ensure that callbacks are not executed immediately on a background thread
-    gen = MakeTransferredGenerator(std::move(gen), internal::GetCpuThreadPool());
+    gen = MakeTransferredGenerator(std::move(gen), ::arrow::internal::GetCpuThreadPool());
   } else {
     gen = MakeVectorGenerator(std::move(opt_batches));
   }
@@ -245,7 +245,7 @@ Future<std::vector<ExecBatch>> StartAndCollect(
   return AllComplete({plan->finished(), Future<>(collected_fut)})
       .Then([collected_fut]() -> Result<std::vector<ExecBatch>> {
         ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
-        return internal::MapVector(
+        return ::arrow::internal::MapVector(
             [](util::optional<ExecBatch> batch) { return std::move(*batch); },
             std::move(collected));
       });
@@ -412,7 +412,8 @@ TEST(ExecPlanExecution, SourceProjectSink) {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*basic_data.schema));
   }
 
-  ASSERT_OK_AND_ASSIGN(auto projection, MakeProjectNode(source, "project", exprs));
+  ASSERT_OK_AND_ASSIGN(auto projection,
+                       MakeProjectNode(source, "project", exprs, {"!bool", "i32 + 1"}));
 
   auto sink_gen = MakeSinkNode(projection, "sink");
 
@@ -423,5 +424,28 @@ TEST(ExecPlanExecution, SourceProjectSink) {
                                      "[[null, 6], [true, 7], [true, 8]]")}))));
 }
 
+TEST(ExecPlanExecution, SourceScalarAggSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  auto basic_data = MakeBasicBatches();
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  ASSERT_OK_AND_ASSIGN(auto scalar_agg,
+                       MakeScalarAggregateNode(source, "scalar_agg",
+                                               {{"sum", nullptr}, {"any", nullptr}}));
+
+  auto sink_gen = MakeSinkNode(scalar_agg, "sink");
+
+  ASSERT_THAT(
+      StartAndCollect(plan.get(), sink_gen),
+      Finishes(ResultWith(UnorderedElementsAreArray({
+          ExecBatchFromJSON({ValueDescr::Scalar(int64()), ValueDescr::Scalar(boolean())},
+                            "[[22, true]]"),
+      }))));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index bd203b354f0..8cc6200ea40 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -134,7 +134,7 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
 
 ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
                             util::string_view json) {
-  auto fields = internal::MapVector(
+  auto fields = ::arrow::internal::MapVector(
       [](const ValueDescr& descr) { return field("", descr.type); }, descrs);
 
   ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 8fa740ed247..6cdd17adcc9 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -59,6 +59,26 @@ Result<std::shared_ptr<ResizableBuffer>> KernelContext::AllocateBitmap(int64_t n
   return result;
 }
 
+Status Kernel::InitAll(KernelContext* ctx, const KernelInitArgs& args,
+                       std::vector<std::unique_ptr<KernelState>>* states) {
+  for (auto& state : *states) {
+    ARROW_ASSIGN_OR_RAISE(state, args.kernel->init(ctx, args));
+  }
+  return Status::OK();
+}
+
+Result<std::unique_ptr<KernelState>> ScalarAggregateKernel::MergeAll(
+    const ScalarAggregateKernel* kernel, KernelContext* ctx,
+    std::vector<std::unique_ptr<KernelState>> states) {
+  auto out = std::move(states.back());
+  states.pop_back();
+  ctx->SetState(out.get());
+  for (auto& state : states) {
+    RETURN_NOT_OK(kernel->merge(ctx, std::move(*state), out.get()));
+  }
+  return std::move(out);
+}
+
 // ----------------------------------------------------------------------
 // Some basic TypeMatcher implementations
 
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index c88c924817c..50b1dd8e55e 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -522,6 +522,10 @@ struct Kernel {
   /// set up any options or state relevant for execution.
   KernelInit init;
 
+  /// \brief Create a vector of new KernelState for invocations of this kernel.
+  static Status InitAll(KernelContext*, const KernelInitArgs&,
+                        std::vector<std::unique_ptr<KernelState>>*);
+
   /// \brief Indicates whether execution can benefit from parallelization
   /// (splitting large chunks into smaller chunks and using multiple
   /// threads). Some kernels may not support parallel execution at
@@ -673,6 +677,12 @@ struct ScalarAggregateKernel : public Kernel {
             KernelSignature::Make(std::move(in_types), std::move(out_type)),
             std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
 
+  /// \brief Merge a vector of KernelStates into a single KernelState.
+  /// The merged state will be returned and will be set on the KernelContext.
+  static Result<std::unique_ptr<KernelState>> MergeAll(
+      const ScalarAggregateKernel* kernel, KernelContext* ctx,
+      std::vector<std::unique_ptr<KernelState>> states);
+
   ScalarAggregateConsume consume;
   ScalarAggregateMerge merge;
   ScalarAggregateFinalize finalize;
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index 86e321ba522..e6755c05f5d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -62,9 +62,9 @@ struct SumImpl : public ScalarAggregator {
     const auto& data = batch[0].array();
     this->count = data->length - data->GetNullCount();
     if (is_boolean_type<ArrowType>::value) {
-      this->sum = static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
+      this->sum += static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
     } else {
-      this->sum =
+      this->sum +=
           arrow::compute::detail::SumArray<CType, typename SumType::c_type>(*data);
     }
     return Status::OK();
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index cc2e5bcda66..2f7a115bb4b 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -591,27 +591,15 @@ Result<EnumeratedRecordBatch> ToEnumeratedRecordBatch(
     const FragmentVector& fragments) {
   int num_fields = options.projected_schema->num_fields();
 
-  ArrayVector columns(num_fields);
-  for (size_t i = 0; i < columns.size(); ++i) {
-    const Datum& value = batch->values[i];
-    if (value.is_array()) {
-      columns[i] = value.make_array();
-      continue;
-    }
-    ARROW_ASSIGN_OR_RAISE(
-        columns[i], MakeArrayFromScalar(*value.scalar(), batch->length, options.pool));
-  }
-
   EnumeratedRecordBatch out;
   out.fragment.index = batch->values[num_fields].scalar_as<Int32Scalar>().value;
-  out.fragment.value = fragments[out.fragment.index];
   out.fragment.last = false;  // ignored during reordering
+  out.fragment.value = fragments[out.fragment.index];
 
   out.record_batch.index = batch->values[num_fields + 1].scalar_as<Int32Scalar>().value;
-  out.record_batch.value =
-      RecordBatch::Make(options.projected_schema, batch->length, std::move(columns));
   out.record_batch.last = batch->values[num_fields + 2].scalar_as<BooleanScalar>().value;
-
+  ARROW_ASSIGN_OR_RAISE(out.record_batch.value,
+                        batch->ToRecordBatch(options.projected_schema, options.pool));
   return out;
 }
 }  // namespace
@@ -633,11 +621,12 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
                         compute::MakeFilterNode(scan, "filter", scan_options_->filter));
 
   auto exprs = scan_options_->projection.call()->arguments;
-  exprs.push_back(compute::field_ref("__fragment_index"));
-  exprs.push_back(compute::field_ref("__batch_index"));
-  exprs.push_back(compute::field_ref("__last_in_fragment"));
-  ARROW_ASSIGN_OR_RAISE(auto project,
-                        compute::MakeProjectNode(filter, "project", std::move(exprs)));
+  auto names = checked_cast<const compute::ProjectOptions*>(
+                   scan_options_->projection.call()->options.get())
+                   ->field_names;
+  ARROW_ASSIGN_OR_RAISE(
+      auto project,
+      MakeAugmentedProjectNode(filter, "project", std::move(exprs), std::move(names)));
 
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
       compute::MakeSinkNode(project, "sink");
@@ -1176,5 +1165,89 @@ Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
   return MakeScanNode(plan, std::move(fragments_gen), std::move(scan_options));
 }
 
+Result<compute::ExecNode*> MakeAugmentedProjectNode(
+    compute::ExecNode* input, std::string label, std::vector<compute::Expression> exprs,
+    std::vector<std::string> names) {
+  if (names.size() == 0) {
+    names.resize(exprs.size());
+    for (size_t i = 0; i < exprs.size(); ++i) {
+      names[i] = exprs[i].ToString();
+    }
+  }
+
+  for (auto aug_name : {"__fragment_index", "__batch_index", "__last_in_fragment"}) {
+    exprs.push_back(compute::field_ref(aug_name));
+    names.emplace_back(aug_name);
+  }
+  return compute::MakeProjectNode(input, std::move(label), std::move(exprs),
+                                  std::move(names));
+}
+
+Result<AsyncGenerator<util::optional<compute::ExecBatch>>> MakeOrderedSinkNode(
+    compute::ExecNode* input, std::string label) {
+  auto unordered = compute::MakeSinkNode(input, std::move(label));
+
+  const Schema& schema = *input->output_schema();
+  ARROW_ASSIGN_OR_RAISE(FieldPath match, FieldRef("__fragment_index").FindOne(schema));
+  int i = match[0];
+  auto fragment_index = [i](const compute::ExecBatch& batch) {
+    return batch.values[i].scalar_as<Int32Scalar>().value;
+  };
+  compute::ExecBatch before_any{{}, 0};
+  before_any.values.resize(i + 1);
+  before_any.values.back() = Datum(-1);
+
+  ARROW_ASSIGN_OR_RAISE(match, FieldRef("__batch_index").FindOne(schema));
+  i = match[0];
+  auto batch_index = [i](const compute::ExecBatch& batch) {
+    return batch.values[i].scalar_as<Int32Scalar>().value;
+  };
+
+  ARROW_ASSIGN_OR_RAISE(match, FieldRef("__last_in_fragment").FindOne(schema));
+  i = match[0];
+  auto last_in_fragment = [i](const compute::ExecBatch& batch) {
+    return batch.values[i].scalar_as<BooleanScalar>().value;
+  };
+
+  auto is_before_any = [=](const compute::ExecBatch& batch) {
+    return fragment_index(batch) < 0;
+  };
+
+  auto left_after_right = [=](const util::optional<compute::ExecBatch>& left,
+                              const util::optional<compute::ExecBatch>& right) {
+    // Before any comes first
+    if (is_before_any(*left)) {
+      return false;
+    }
+    if (is_before_any(*right)) {
+      return true;
+    }
+    // Compare batches if fragment is the same
+    if (fragment_index(*left) == fragment_index(*right)) {
+      return batch_index(*left) > batch_index(*right);
+    }
+    // Otherwise compare fragment
+    return fragment_index(*left) > fragment_index(*right);
+  };
+
+  auto is_next = [=](const util::optional<compute::ExecBatch>& prev,
+                     const util::optional<compute::ExecBatch>& next) {
+    // Only true if next is the first batch
+    if (is_before_any(*prev)) {
+      return fragment_index(*next) == 0 && batch_index(*next) == 0;
+    }
+    // If same fragment, compare batch index
+    if (fragment_index(*next) == fragment_index(*prev)) {
+      return batch_index(*next) == batch_index(*prev) + 1;
+    }
+    // Else only if next first batch of next fragment and prev is last batch of previous
+    return fragment_index(*next) == fragment_index(*prev) + 1 &&
+           last_in_fragment(*prev) && batch_index(*next) == 0;
+  };
+
+  return MakeSequencingGenerator(std::move(unordered), left_after_right, is_next,
+                                 util::make_optional(std::move(before_any)));
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index c803cde1978..fc715206d7d 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -409,11 +409,25 @@ class ARROW_DS_EXPORT ScannerBuilder {
 
 /// \brief Construct a source ExecNode which yields batches from a dataset scan.
 ///
-/// Does not construct associated filter or project nodes
+/// Does not construct associated filter or project nodes.
+/// Yielded batches will be augmented with fragment/batch indices to enable stable
+/// ordering for simple ExecPlans.
 ARROW_DS_EXPORT Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan*,
                                                         std::shared_ptr<Dataset>,
                                                         std::shared_ptr<ScanOptions>);
 
+/// \brief Construct a ProjectNode which preserves fragment/batch indices.
+ARROW_DS_EXPORT Result<compute::ExecNode*> MakeAugmentedProjectNode(
+    compute::ExecNode* input, std::string label, std::vector<compute::Expression> exprs,
+    std::vector<std::string> names = {});
+
+/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
+///
+/// Emitted batches will be ordered by fragment and batch indices, or an error
+/// will be raised if those fields are not available in the input.
+ARROW_DS_EXPORT Result<AsyncGenerator<util::optional<compute::ExecBatch>>>
+MakeOrderedSinkNode(compute::ExecNode*, std::string label);
+
 /// @}
 
 /// \brief A trivial ScanTask that yields the RecordBatch of an array.
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index f567054bf91..74f558d1738 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1323,5 +1323,86 @@ TEST(ScanNode, MaterializationOfVirtualColumn) {
               Finishes(ResultWith(UnorderedElementsAreArray(expected))));
 }
 
+TEST(ScanNode, MinimalEndToEnd) {
+  // NB: This test is here for didactic purposes
+
+  // Specify a MemoryPool and ThreadPool for the ExecPlan
+  compute::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
+
+  // A ScanNode is constructed from an ExecPlan (into which it is inserted),
+  // a Dataset (whose batches will be scanned), and ScanOptions (to specify a filter for
+  // predicate pushdown, a projection to skip materialization of unnecessary columns, ...)
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<compute::ExecPlan> plan,
+                       compute::ExecPlan::Make(&exec_context));
+
+  std::shared_ptr<Dataset> dataset = std::make_shared<InMemoryDataset>(
+      TableFromJSON(schema({field("a", int32()), field("b", boolean())}),
+                    {
+                        R"([{"a": 1,    "b": null},
+                            {"a": 2,    "b": true}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 3,    "b": false}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 4,    "b": false}])",
+                        R"([{"a": 5,    "b": null},
+                            {"a": 6,    "b": false},
+                            {"a": 7,    "b": false}])",
+                    }));
+
+  auto options = std::make_shared<ScanOptions>();
+  // sync scanning is not supported by ScanNode
+  options->use_async = true;
+  // for now, we must replicate the dataset schema here
+  options->dataset_schema = dataset->schema();
+  // specify the filter
+  compute::Expression b_is_true = field_ref("b");
+  ASSERT_OK_AND_ASSIGN(b_is_true, b_is_true.Bind(*dataset->schema()));
+  options->filter = b_is_true;
+  // for now, specify the projection as the full project expression (eventually this can
+  // just be a list of materialized field names)
+  compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
+  ASSERT_OK_AND_ASSIGN(a_times_2, a_times_2.Bind(*dataset->schema()));
+  options->projection = call("project", {a_times_2}, compute::ProjectOptions{{"a * 2"}});
+
+  // construct the scan node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
+                       dataset::MakeScanNode(plan.get(), dataset, options));
+
+  // pipe the scan node into a filter node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * filter,
+                       compute::MakeFilterNode(scan, "filter", b_is_true));
+
+  // pipe the filter node into a project node
+  // NB: we're using the project node factory which preserves fragment/batch index
+  // tagging, so we *can* reorder later if we choose. The tags will not appear in
+  // our output.
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * project,
+                       dataset::MakeAugmentedProjectNode(filter, "project", {a_times_2}));
+
+  // finally, pipe the project node into a sink node
+  // NB: if we don't need ordering, we could use compute::MakeSinkNode instead
+  ASSERT_OK_AND_ASSIGN(auto sink_gen, dataset::MakeOrderedSinkNode(project, "sink"));
+
+  // translate sink_gen (async) to sink_reader (sync)
+  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      schema({field("a * 2", int32())}), std::move(sink_gen), exec_context.memory_pool());
+
+  // start the ExecPlan then wait 1s for completion
+  ASSERT_OK(plan->StartProducing());
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
+  // collect sink_reader into a Table
+  ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
+
+  auto expected = TableFromJSON(schema({field("a * 2", int32())}), {
+                                                                       R"([
+                                               {"a * 2": 4},
+                                               {"a * 2": null},
+                                               {"a * 2": null}
+                                          ])"});
+
+  AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h
index 201fc7e55b2..2ce99dc0791 100644
--- a/cpp/src/arrow/dataset/test_util.h
+++ b/cpp/src/arrow/dataset/test_util.h
@@ -544,7 +544,7 @@ class FileFormatScanMixin : public FileFormatFixtureMixin<FormatHelper>,
     opts_->use_threads = GetParam().use_threads;
     if (GetParam().use_async) {
       EXPECT_OK_AND_ASSIGN(auto batch_gen, fragment->ScanBatchesAsync(opts_));
-      EXPECT_OK_AND_ASSIGN(auto batch_it, MakeGeneratorIterator(std::move(batch_gen)));
+      auto batch_it = MakeGeneratorIterator(std::move(batch_gen));
       return batch_it;
     }
     EXPECT_OK_AND_ASSIGN(auto scan_task_it, fragment->Scan(opts_));
diff --git a/cpp/src/arrow/testing/matchers.h b/cpp/src/arrow/testing/matchers.h
index f76c25dc096..b64269ea7a1 100644
--- a/cpp/src/arrow/testing/matchers.h
+++ b/cpp/src/arrow/testing/matchers.h
@@ -57,10 +57,7 @@ class FutureMatcher {
           *listener << "which didn't finish within " << wait_seconds_ << " seconds";
           return false;
         }
-
-        const Result<ValueType>& maybe_value = fut.result();
-        testing::StringMatchResultListener value_listener;
-        return result_matcher_.MatchAndExplain(maybe_value, &value_listener);
+        return result_matcher_.MatchAndExplain(fut.result(), listener);
       }
 
       const testing::Matcher<Result<ValueType>> result_matcher_;
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 18149884204..c2aad6cd680 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1492,7 +1492,7 @@ class GeneratorIterator {
 /// \brief Converts an AsyncGenerator<T> to an Iterator<T> by blocking until each future
 /// is finished
 template <typename T>
-Result<Iterator<T>> MakeGeneratorIterator(AsyncGenerator<T> source) {
+Iterator<T> MakeGeneratorIterator(AsyncGenerator<T> source) {
   return Iterator<T>(GeneratorIterator<T>(std::move(source)));
 }
 
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 361ce3eacf0..343eb9b6c4b 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -618,7 +618,7 @@ TEST(TestAsyncUtil, SynchronousFinish) {
 
 TEST(TestAsyncUtil, GeneratorIterator) {
   auto generator = BackgroundAsyncVectorIt({1, 2, 3});
-  ASSERT_OK_AND_ASSIGN(auto iterator, MakeGeneratorIterator(std::move(generator)));
+  auto iterator = MakeGeneratorIterator(std::move(generator));
   ASSERT_OK_AND_EQ(TestInt(1), iterator.Next());
   ASSERT_OK_AND_EQ(TestInt(2), iterator.Next());
   ASSERT_OK_AND_EQ(TestInt(3), iterator.Next());

From e2238582e2a2bf20a68a967145fe1a7b2337a997 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 14 Jul 2021 09:50:31 -0400
Subject: [PATCH 550/719] ARROW-13324: [R] Typo in bindings for utf8_reverse
 and ascii_reverse

This PR fixes a typo in the binding for stri_reverse

Closes #10715 from thisisnic/ARROW-13324

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/expression.R                               | 2 +-
 r/tests/testthat/test-dplyr-string-functions.R | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index 9b4b79e458a..be80c9db969 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -31,7 +31,7 @@
   "str_to_lower" = "utf8_lower",
   "str_to_upper" = "utf8_upper",
   # str_pad is defined in dplyr-functions.R
-  "str_reverse" = "utf8_reverse",
+  "stri_reverse" = "utf8_reverse",
   # str_trim is defined in dplyr-functions.R
   "year" = "year",
   "isoyear" = "iso_year",
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index e7b860eb99c..cd01acf024d 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -774,8 +774,7 @@ test_that("arrow_find_substring and arrow_find_substring_regex", {
 })
 
 test_that("stri_reverse and arrow_ascii_reverse functions", {
-  # TODO: these actually aren't implemented (ARROW-12869)
-  # Fix them, then remove the `warning = TRUE` arguments
+  
   df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
 
   df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
@@ -784,16 +783,14 @@ test_that("stri_reverse and arrow_ascii_reverse functions", {
     input %>%
       mutate(x = stri_reverse(x)) %>%
       collect(),
-    df_utf8,
-    warning = TRUE # Remove me
+    df_utf8
   )
 
   expect_dplyr_equal(
     input %>%
       mutate(x = stri_reverse(x)) %>%
       collect(),
-    df_ascii,
-    warning = TRUE # Remove me
+    df_ascii
   )
 
   expect_equivalent(

From 6db88a9e946c98c59f179210a70bc05ef6a0a296 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 14 Jul 2021 11:42:00 -0400
Subject: [PATCH 551/719] ARROW-9430: [C++] Implement replace_with_mask kernel

This implements a kernel equivalent to NumPy's `arr[mask] = [values]`, i.e. given an array and an equal-length (or scalar) boolean mask, along with an array of replacement values passed via options, each array item for which the corresponding mask value is `true` is replaced with the next value from the replacement value array.

Closes #10412 from lidavidm/arrow-9430

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                  |   1 +
 cpp/src/arrow/array/array_binary.h            |   7 +
 cpp/src/arrow/array/util.cc                   |   5 +
 cpp/src/arrow/compute/api_vector.cc           |   5 +
 cpp/src/arrow/compute/api_vector.h            |  17 +
 cpp/src/arrow/compute/kernels/CMakeLists.txt  |   2 +
 .../arrow/compute/kernels/codegen_internal.h  |   2 +
 cpp/src/arrow/compute/kernels/test_util.h     |  22 +
 .../arrow/compute/kernels/vector_replace.cc   | 540 ++++++++++++++
 .../kernels/vector_replace_benchmark.cc       |  89 +++
 .../compute/kernels/vector_replace_test.cc    | 677 ++++++++++++++++++
 cpp/src/arrow/compute/registry.cc             |   1 +
 cpp/src/arrow/compute/registry_internal.h     |   1 +
 cpp/src/arrow/type_traits.h                   |   1 +
 docs/source/cpp/compute.rst                   |  22 +-
 docs/source/python/api/compute.rst            |   8 +
 16 files changed, 1399 insertions(+), 1 deletion(-)
 create mode 100644 cpp/src/arrow/compute/kernels/vector_replace.cc
 create mode 100644 cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
 create mode 100644 cpp/src/arrow/compute/kernels/vector_replace_test.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 634d202623f..88a92d8c2c9 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -401,6 +401,7 @@ if(ARROW_COMPUTE)
        compute/kernels/util_internal.cc
        compute/kernels/vector_hash.cc
        compute/kernels/vector_nested.cc
+       compute/kernels/vector_replace.cc
        compute/kernels/vector_selection.cc
        compute/kernels/vector_sort.cc
        compute/exec/key_hash.cc
diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h
index db3c640b9a4..f8e8c4f8a44 100644
--- a/cpp/src/arrow/array/array_binary.h
+++ b/cpp/src/arrow/array/array_binary.h
@@ -71,6 +71,13 @@ class BaseBinaryArray : public FlatArray {
                              raw_value_offsets_[i + 1] - pos);
   }
 
+  /// \brief Get binary value as a string_view
+  /// Provided for consistency with other arrays.
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view Value(int64_t i) const { return GetView(i); }
+
   /// \brief Get binary value as a std::string
   ///
   /// \param i the value index
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index 688cb20cb9a..ed26ecff4e0 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -528,6 +528,11 @@ class RepeatedArrayFactory {
     return FinishFixedWidth(value.data(), value.size());
   }
 
+  Status Visit(const Decimal256Type&) {
+    auto value = checked_cast<const Decimal256Scalar&>(scalar_).value.ToBytes();
+    return FinishFixedWidth(value.data(), value.size());
+  }
+
   template <typename T>
   enable_if_base_binary<T, Status> Visit(const T&) {
     std::shared_ptr<Buffer> value =
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 9c1ef8533b4..a68969b2ee5 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -162,6 +162,11 @@ Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
   return result.make_array();
 }
 
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx) {
+  return CallFunction("replace_with_mask", {values, mask, replacements}, ctx);
+}
+
 Result<std::shared_ptr<Array>> SortIndices(const Array& values, SortOrder order,
                                            ExecContext* ctx) {
   ArraySortOptions options(order);
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 6021492320e..9d8d4271db8 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -171,6 +171,23 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndices(
 
 }  // namespace internal
 
+/// \brief ReplaceWithMask replaces each value in the array corresponding
+/// to a true value in the mask with the next element from `replacements`.
+///
+/// \param[in] values Array input to replace
+/// \param[in] mask Array or Scalar of Boolean mask values
+/// \param[in] replacements The replacement values to draw from. There must
+/// be as many replacement values as true values in the mask.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx = NULLPTR);
+
 /// \brief Take from an array of values at indices in another array
 ///
 /// The output array will be of the same type as the input values
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 3362d91cbe8..474ce1418fd 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -48,6 +48,7 @@ add_arrow_compute_test(vector_test
                        SOURCES
                        vector_hash_test.cc
                        vector_nested_test.cc
+                       vector_replace_test.cc
                        vector_selection_test.cc
                        vector_sort_test.cc
                        test_util.cc)
@@ -55,6 +56,7 @@ add_arrow_compute_test(vector_test
 add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_partition_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(vector_replace_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
 
 # ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 33b7006491a..12e80423f7f 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -1240,6 +1240,7 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
     case Type::FLOAT:
     case Type::DATE32:
     case Type::TIME32:
+    case Type::INTERVAL_MONTHS:
       return Generator<UInt32Type>::Exec;
     case Type::UINT64:
     case Type::INT64:
@@ -1248,6 +1249,7 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
     case Type::TIMESTAMP:
     case Type::TIME64:
     case Type::DURATION:
+    case Type::INTERVAL_DAY_TIME:
       return Generator<UInt64Type>::Exec;
     default:
       DCHECK(false);
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index f4854087b51..c691a9f3be3 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -172,5 +172,27 @@ void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> descrs,
 // Check that function fails to produce a Kernel for the set of ValueDescrs.
 void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> descrs);
 
+// Helper to get a default instance of a type, including parameterized types
+template <typename T>
+enable_if_parameter_free<T, std::shared_ptr<DataType>> default_type_instance() {
+  return TypeTraits<T>::type_singleton();
+}
+template <typename T>
+enable_if_time<T, std::shared_ptr<DataType>> default_type_instance() {
+  // Time32 requires second/milli, Time64 requires nano/micro
+  if (bit_width(T::type_id) == 32) {
+    return std::make_shared<T>(TimeUnit::type::SECOND);
+  }
+  return std::make_shared<T>(TimeUnit::type::NANO);
+}
+template <typename T>
+enable_if_timestamp<T, std::shared_ptr<DataType>> default_type_instance() {
+  return std::make_shared<T>(TimeUnit::type::SECOND);
+}
+template <typename T>
+enable_if_decimal<T, std::shared_ptr<DataType>> default_type_instance() {
+  return std::make_shared<T>(5, 2);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc
new file mode 100644
index 00000000000..644aec2a4e9
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -0,0 +1,540 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+Status ReplacementArrayTooShort(int64_t expected, int64_t actual) {
+  return Status::Invalid("Replacement array must be of appropriate length (expected ",
+                         expected, " items but got ", actual, " items)");
+}
+
+// Helper to implement replace_with kernel with scalar mask for fixed-width types,
+// using callbacks to handle both bool and byte-sized types
+template <typename Functor>
+Status ReplaceWithScalarMask(KernelContext* ctx, const ArrayData& array,
+                             const BooleanScalar& mask, const Datum& replacements,
+                             ArrayData* output) {
+  Datum source = array;
+  if (!mask.is_valid) {
+    // Output = null
+    source = MakeNullScalar(output->type);
+  } else if (mask.value) {
+    // Output = replacement
+    source = replacements;
+  }
+  uint8_t* out_bitmap = output->buffers[0]->mutable_data();
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  const int64_t out_offset = output->offset;
+  if (source.is_array()) {
+    const ArrayData& in_data = *source.array();
+    if (in_data.length < array.length) {
+      return ReplacementArrayTooShort(array.length, in_data.length);
+    }
+    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0,
+                      array.length);
+    if (in_data.MayHaveNulls()) {
+      arrow::internal::CopyBitmap(in_data.buffers[0]->data(), in_data.offset,
+                                  array.length, out_bitmap, out_offset);
+    } else {
+      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true);
+    }
+  } else {
+    const Scalar& in_data = *source.scalar();
+    Functor::CopyData(*array.type, out_values, out_offset, in_data, /*in_offset=*/0,
+                      array.length);
+    BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, in_data.is_valid);
+  }
+  return Status::OK();
+}
+
+struct CopyArrayBitmap {
+  const uint8_t* in_bitmap;
+  int64_t in_offset;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    arrow::internal::CopyBitmap(in_bitmap, in_offset + offset, length, out_bitmap,
+                                out_offset);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset,
+                      BitUtil::GetBit(in_bitmap, in_offset + offset));
+  }
+};
+
+struct CopyScalarBitmap {
+  const bool is_valid;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    BitUtil::SetBitsTo(out_bitmap, out_offset, length, is_valid);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset, is_valid);
+  }
+};
+
+// Helper to implement replace_with kernel with array mask for fixed-width types,
+// using callbacks to handle both bool and byte-sized types and to handle
+// scalar and array replacements
+template <typename Functor, typename Data, typename CopyBitmap>
+void ReplaceWithArrayMaskImpl(const ArrayData& array, const ArrayData& mask,
+                              const Data& replacements, bool replacements_bitmap,
+                              const CopyBitmap& copy_bitmap, const uint8_t* mask_bitmap,
+                              const uint8_t* mask_values, uint8_t* out_bitmap,
+                              uint8_t* out_values, const int64_t out_offset) {
+  Functor::CopyData(*array.type, out_values, /*out_offset=*/0, array, /*in_offset=*/0,
+                    array.length);
+  arrow::internal::OptionalBinaryBitBlockCounter counter(
+      mask_values, mask.offset, mask_bitmap, mask.offset, mask.length);
+  int64_t write_offset = 0;
+  int64_t replacements_offset = 0;
+  while (write_offset < array.length) {
+    BitBlockCount block = counter.NextAndBlock();
+    if (block.AllSet()) {
+      // Copy from replacement array
+      Functor::CopyData(*array.type, out_values, out_offset + write_offset, replacements,
+                        replacements_offset, block.length);
+      if (replacements_bitmap) {
+        copy_bitmap.CopyBitmap(out_bitmap, out_offset + write_offset, replacements_offset,
+                               block.length);
+      } else if (!replacements_bitmap && out_bitmap) {
+        BitUtil::SetBitsTo(out_bitmap, out_offset + write_offset, block.length, true);
+      }
+      replacements_offset += block.length;
+    } else if (block.popcount) {
+      for (int64_t i = 0; i < block.length; ++i) {
+        if (BitUtil::GetBit(mask_values, write_offset + mask.offset + i) &&
+            (!mask_bitmap ||
+             BitUtil::GetBit(mask_bitmap, write_offset + mask.offset + i))) {
+          Functor::CopyData(*array.type, out_values, out_offset + write_offset + i,
+                            replacements, replacements_offset, /*length=*/1);
+          if (replacements_bitmap) {
+            copy_bitmap.SetBit(out_bitmap, out_offset + write_offset + i,
+                               replacements_offset);
+          }
+          replacements_offset++;
+        }
+      }
+    }
+    write_offset += block.length;
+  }
+}
+
+template <typename Functor>
+Status ReplaceWithArrayMask(KernelContext* ctx, const ArrayData& array,
+                            const ArrayData& mask, const Datum& replacements,
+                            ArrayData* output) {
+  const int64_t out_offset = output->offset;
+  uint8_t* out_bitmap = nullptr;
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  const uint8_t* mask_bitmap = mask.MayHaveNulls() ? mask.buffers[0]->data() : nullptr;
+  const uint8_t* mask_values = mask.buffers[1]->data();
+  const bool replacements_bitmap = replacements.is_array()
+                                       ? replacements.array()->MayHaveNulls()
+                                       : !replacements.scalar()->is_valid;
+  if (replacements.is_array()) {
+    // Check that we have enough replacement values
+    const int64_t replacements_length = replacements.array()->length;
+
+    BooleanArray mask_arr(mask.length, mask.buffers[1], mask.buffers[0], mask.null_count,
+                          mask.offset);
+    const int64_t count = mask_arr.true_count();
+    if (count > replacements_length) {
+      return ReplacementArrayTooShort(count, replacements_length);
+    }
+  }
+  if (array.MayHaveNulls() || mask.MayHaveNulls() || replacements_bitmap) {
+    out_bitmap = output->buffers[0]->mutable_data();
+    output->null_count = -1;
+    if (array.MayHaveNulls()) {
+      // Copy array's bitmap
+      arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset, array.length,
+                                  out_bitmap, out_offset);
+    } else {
+      // Array has no bitmap but mask/replacements do, generate an all-valid bitmap
+      BitUtil::SetBitsTo(out_bitmap, out_offset, array.length, true);
+    }
+  } else {
+    BitUtil::SetBitsTo(output->buffers[0]->mutable_data(), out_offset, array.length,
+                       true);
+    output->null_count = 0;
+  }
+
+  if (replacements.is_array()) {
+    const ArrayData& array_repl = *replacements.array();
+    ReplaceWithArrayMaskImpl<Functor>(
+        array, mask, array_repl, replacements_bitmap,
+        CopyArrayBitmap{replacements_bitmap ? array_repl.buffers[0]->data() : nullptr,
+                        array_repl.offset},
+        mask_bitmap, mask_values, out_bitmap, out_values, out_offset);
+  } else {
+    const Scalar& scalar_repl = *replacements.scalar();
+    ReplaceWithArrayMaskImpl<Functor>(array, mask, scalar_repl, replacements_bitmap,
+                                      CopyScalarBitmap{scalar_repl.is_valid}, mask_bitmap,
+                                      mask_values, out_bitmap, out_values, out_offset);
+  }
+
+  if (mask.MayHaveNulls()) {
+    arrow::internal::BitmapAnd(out_bitmap, out_offset, mask.buffers[0]->data(),
+                               mask.offset, array.length, out_offset, out_bitmap);
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct ReplaceWithMask {};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_number<Type>> {
+  using T = typename TypeTraits<Type>::CType;
+
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * sizeof(T));
+    std::memcpy(out + (out_offset * sizeof(T)), in_arr, length * sizeof(T));
+  }
+
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    T* begin = reinterpret_cast<T*>(out + (out_offset * sizeof(T)));
+    T* end = begin + length;
+    std::fill(begin, end, UnboxScalar<Type>::Unbox(in));
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_boolean<Type>> {
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const auto in_arr = in.GetValues<uint8_t>(1, /*absolute_offset=*/0);
+    arrow::internal::CopyBitmap(in_arr, in_offset + in.offset, length, out, out_offset);
+  }
+  static void CopyData(const DataType&, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    BitUtil::SetBitsTo(out, out_offset, length, in.is_valid);
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_same<Type, FixedSizeBinaryType>> {
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width);
+    std::memcpy(begin, in_arr, length * width);
+  }
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(in);
+    // Null scalar may have null value buffer
+    if (!scalar.value) return;
+    const Buffer& buffer = *scalar.value;
+    const uint8_t* value = buffer.data();
+    DCHECK_GE(buffer.size(), width);
+    for (int i = 0; i < length; i++) {
+      std::memcpy(begin, value, width);
+      begin += width;
+    }
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_decimal<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const ArrayData& in, const int64_t in_offset,
+                       const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto in_arr = in.GetValues<uint8_t>(1, (in_offset + in.offset) * width);
+    std::memcpy(begin, in_arr, length * width);
+  }
+  static void CopyData(const DataType& ty, uint8_t* out, const int64_t out_offset,
+                       const Scalar& in, const int64_t in_offset, const int64_t length) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(ty).byte_width();
+    uint8_t* begin = out + (out_offset * width);
+    const auto& scalar = checked_cast<const ScalarType&>(in);
+    const auto value = scalar.value.ToBytes();
+    for (int i = 0; i < length; i++) {
+      std::memcpy(begin, value.data(), width);
+      begin += width;
+    }
+  }
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    return ReplaceWithScalarMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                        output);
+  }
+
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    return ReplaceWithArrayMask<ReplaceWithMask<Type>>(ctx, array, mask, replacements,
+                                                       output);
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_null<Type>> {
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    *output = array;
+    return Status::OK();
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    *output = array;
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMask<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static Status ExecScalarMask(KernelContext* ctx, const ArrayData& array,
+                               const BooleanScalar& mask, const Datum& replacements,
+                               ArrayData* output) {
+    if (!mask.is_valid) {
+      // Output = null
+      ARROW_ASSIGN_OR_RAISE(
+          auto replacement_array,
+          MakeArrayOfNull(array.type, array.length, ctx->memory_pool()));
+      *output = *replacement_array->data();
+    } else if (mask.value) {
+      // Output = replacement
+      if (replacements.is_scalar()) {
+        ARROW_ASSIGN_OR_RAISE(auto replacement_array,
+                              MakeArrayFromScalar(*replacements.scalar(), array.length,
+                                                  ctx->memory_pool()));
+        *output = *replacement_array->data();
+      } else {
+        const ArrayData& replacement_array = *replacements.array();
+        if (replacement_array.length < array.length) {
+          return ReplacementArrayTooShort(array.length, replacement_array.length);
+        }
+        *output = replacement_array;
+        output->length = array.length;
+      }
+    } else {
+      // Output = input
+      *output = array;
+    }
+    return Status::OK();
+  }
+  static Status ExecArrayMask(KernelContext* ctx, const ArrayData& array,
+                              const ArrayData& mask, const Datum& replacements,
+                              ArrayData* output) {
+    BuilderType builder(array.type, ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(array.length));
+    RETURN_NOT_OK(builder.ReserveData(array.buffers[2]->size()));
+    int64_t source_offset = 0;
+    int64_t replacements_offset = 0;
+    RETURN_NOT_OK(VisitArrayDataInline<BooleanType>(
+        mask,
+        [&](bool replace) {
+          if (replace && replacements.is_scalar()) {
+            const Scalar& scalar = *replacements.scalar();
+            if (scalar.is_valid) {
+              RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(scalar)));
+            } else {
+              RETURN_NOT_OK(builder.AppendNull());
+            }
+          } else {
+            const ArrayData& source = replace ? *replacements.array() : array;
+            const int64_t offset = replace ? replacements_offset++ : source_offset;
+            if (!source.MayHaveNulls() ||
+                BitUtil::GetBit(source.buffers[0]->data(), source.offset + offset)) {
+              const uint8_t* data = source.buffers[2]->data();
+              const offset_type* offsets = source.GetValues<offset_type>(1);
+              const offset_type offset0 = offsets[offset];
+              const offset_type offset1 = offsets[offset + 1];
+              RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0));
+            } else {
+              RETURN_NOT_OK(builder.AppendNull());
+            }
+          }
+          source_offset++;
+          return Status::OK();
+        },
+        [&]() {
+          RETURN_NOT_OK(builder.AppendNull());
+          source_offset++;
+          return Status::OK();
+        }));
+    std::shared_ptr<Array> temp_output;
+    RETURN_NOT_OK(builder.Finish(&temp_output));
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = array.type;
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ReplaceWithMaskFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ArrayData& array = *batch[0].array();
+    const Datum& replacements = batch[2];
+    ArrayData* output = out->array().get();
+    output->length = array.length;
+
+    // Needed for FixedSizeBinary/parameterized types
+    if (!array.type->Equals(*replacements.type(), /*check_metadata=*/false)) {
+      return Status::Invalid("Replacements must be of same type (expected ",
+                             array.type->ToString(), " but got ",
+                             replacements.type()->ToString(), ")");
+    }
+
+    if (!replacements.is_array() && !replacements.is_scalar()) {
+      return Status::Invalid("Replacements must be array or scalar");
+    }
+
+    if (batch[1].is_scalar()) {
+      return ReplaceWithMask<Type>::ExecScalarMask(
+          ctx, array, batch[1].scalar_as<BooleanScalar>(), replacements, output);
+    }
+    const ArrayData& mask = *batch[1].array();
+    if (array.length != mask.length) {
+      return Status::Invalid("Mask must be of same length as array (expected ",
+                             array.length, " items but got ", mask.length, " items)");
+    }
+    return ReplaceWithMask<Type>::ExecArrayMask(ctx, array, mask, replacements, output);
+  }
+};
+
+}  // namespace
+
+const FunctionDoc replace_with_mask_doc(
+    "Replace items using a mask and replacement values",
+    ("Given an array and a Boolean mask (either scalar or of equal length), "
+     "along with replacement values (either scalar or array), "
+     "each element of the array for which the corresponding mask element is "
+     "true will be replaced by the next value from the replacements, "
+     "or with null if the mask is null. "
+     "Hence, for replacement arrays, len(replacements) == sum(mask == true)."),
+    {"values", "mask", "replacements"});
+
+void RegisterVectorReplace(FunctionRegistry* registry) {
+  auto func = std::make_shared<VectorFunction>("replace_with_mask", Arity::Ternary(),
+                                               &replace_with_mask_doc);
+  auto add_kernel = [&](detail::GetTypeId get_id, ArrayKernelExec exec) {
+    VectorKernel kernel;
+    kernel.can_execute_chunkwise = false;
+    if (is_fixed_width(get_id.id)) {
+      kernel.null_handling = NullHandling::type::COMPUTED_PREALLOCATE;
+    } else {
+      kernel.can_write_into_slices = false;
+      kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    }
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    kernel.signature = KernelSignature::Make(
+        {InputType::Array(get_id.id), InputType(boolean()), InputType(get_id.id)},
+        OutputType(FirstType));
+    kernel.exec = std::move(exec);
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  };
+  auto add_primitive_kernel = [&](detail::GetTypeId get_id) {
+    add_kernel(get_id, GenerateTypeAgnosticPrimitive<ReplaceWithMaskFunctor>(get_id));
+  };
+  for (const auto& ty : NumericTypes()) {
+    add_primitive_kernel(ty);
+  }
+  for (const auto& ty : TemporalTypes()) {
+    add_primitive_kernel(ty);
+  }
+  add_primitive_kernel(null());
+  add_primitive_kernel(boolean());
+  add_primitive_kernel(day_time_interval());
+  add_primitive_kernel(month_interval());
+  add_kernel(Type::FIXED_SIZE_BINARY, ReplaceWithMaskFunctor<FixedSizeBinaryType>::Exec);
+  add_kernel(Type::DECIMAL128, ReplaceWithMaskFunctor<Decimal128Type>::Exec);
+  add_kernel(Type::DECIMAL256, ReplaceWithMaskFunctor<Decimal256Type>::Exec);
+  for (const auto& ty : BaseBinaryTypes()) {
+    add_kernel(ty->id(), GenerateTypeAgnosticVarBinaryBase<ReplaceWithMaskFunctor>(*ty));
+  }
+  // TODO: list types
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+
+  // TODO(ARROW-9431): "replace_with_indices"
+}
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
new file mode 100644
index 00000000000..719969d46ea
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+
+#include "arrow/array.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+
+#include "arrow/compute/api_vector.h"
+
+namespace arrow {
+namespace compute {
+
+using ::arrow::internal::checked_pointer_cast;
+
+static constexpr random::SeedType kRandomSeed = 0xabcdef;
+static constexpr random::SeedType kLongLength = 16384;
+
+static std::shared_ptr<Array> MakeReplacements(random::RandomArrayGenerator* generator,
+                                               const BooleanArray& mask) {
+  int64_t count = 0;
+  for (int64_t i = 0; i < mask.length(); i++) {
+    count += mask.Value(i) && mask.IsValid(i);
+  }
+  return generator->Int64(count, /*min=*/-65536, /*max=*/65536, /*null_probability=*/0.1);
+}
+
+static void ReplaceWithMaskLowSelectivityBench(
+    benchmark::State& state) {  // NOLINT non-const reference
+  random::RandomArrayGenerator generator(kRandomSeed);
+  const int64_t len = state.range(0);
+  const int64_t offset = state.range(1);
+
+  auto values =
+      generator.Int64(len, /*min=*/-65536, /*max=*/65536, /*null_probability=*/0.1)
+          ->Slice(offset);
+  auto mask = checked_pointer_cast<BooleanArray>(
+      generator.Boolean(len, /*true_probability=*/0.1, /*null_probability=*/0.1)
+          ->Slice(offset));
+  auto replacements = MakeReplacements(&generator, *mask);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(ReplaceWithMask(values, mask, replacements));
+  }
+  state.SetBytesProcessed(state.iterations() * (len - offset) * 8);
+}
+
+static void ReplaceWithMaskHighSelectivityBench(
+    benchmark::State& state) {  // NOLINT non-const reference
+  random::RandomArrayGenerator generator(kRandomSeed);
+  const int64_t len = state.range(0);
+  const int64_t offset = state.range(1);
+
+  auto values =
+      generator.Int64(len, /*min=*/-65536, /*max=*/65536, /*null_probability=*/0.1)
+          ->Slice(offset);
+  auto mask = checked_pointer_cast<BooleanArray>(
+      generator.Boolean(len, /*true_probability=*/0.9, /*null_probability=*/0.1)
+          ->Slice(offset));
+  auto replacements = MakeReplacements(&generator, *mask);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(ReplaceWithMask(values, mask, replacements));
+  }
+  state.SetBytesProcessed(state.iterations() * (len - offset) * 8);
+}
+
+BENCHMARK(ReplaceWithMaskLowSelectivityBench)->Args({kLongLength, 0});
+BENCHMARK(ReplaceWithMaskLowSelectivityBench)->Args({kLongLength, 99});
+BENCHMARK(ReplaceWithMaskHighSelectivityBench)->Args({kLongLength, 0});
+BENCHMARK(ReplaceWithMaskHighSelectivityBench)->Args({kLongLength, 99});
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_test.cc b/cpp/src/arrow/compute/kernels/vector_replace_test.cc
new file mode 100644
index 00000000000..48f253e7ca9
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/vector_replace_test.cc
@@ -0,0 +1,677 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/make_unique.h"
+
+namespace arrow {
+namespace compute {
+
+using arrow::internal::checked_pointer_cast;
+
+template <typename T>
+class TestReplaceKernel : public ::testing::Test {
+ protected:
+  virtual std::shared_ptr<DataType> type() = 0;
+
+  using ReplaceFunction = std::function<Result<Datum>(const Datum&, const Datum&,
+                                                      const Datum&, ExecContext*)>;
+
+  void SetUp() override { equal_options_ = equal_options_.nans_equal(true); }
+
+  Datum mask_scalar(bool value) { return Datum(std::make_shared<BooleanScalar>(value)); }
+
+  Datum null_mask_scalar() {
+    auto scalar = std::make_shared<BooleanScalar>(true);
+    scalar->is_valid = false;
+    return Datum(std::move(scalar));
+  }
+
+  Datum scalar(const std::string& json) { return ScalarFromJSON(type(), json); }
+
+  std::shared_ptr<Array> array(const std::string& value) {
+    return ArrayFromJSON(type(), value);
+  }
+
+  std::shared_ptr<Array> mask(const std::string& value) {
+    return ArrayFromJSON(boolean(), value);
+  }
+
+  Status AssertRaises(ReplaceFunction func, const std::shared_ptr<Array>& array,
+                      const Datum& mask, const std::shared_ptr<Array>& replacements) {
+    auto result = func(array, mask, replacements, nullptr);
+    EXPECT_FALSE(result.ok());
+    return result.status();
+  }
+
+  void Assert(ReplaceFunction func, const std::shared_ptr<Array>& array,
+              const Datum& mask, Datum replacements,
+              const std::shared_ptr<Array>& expected) {
+    SCOPED_TRACE("Replacements: " + (replacements.is_array()
+                                         ? replacements.make_array()->ToString()
+                                         : replacements.scalar()->ToString()));
+    SCOPED_TRACE("Mask: " + (mask.is_array() ? mask.make_array()->ToString()
+                                             : mask.scalar()->ToString()));
+    SCOPED_TRACE("Array: " + array->ToString());
+
+    ASSERT_OK_AND_ASSIGN(auto actual, func(array, mask, replacements, nullptr));
+    ASSERT_TRUE(actual.is_array());
+    ASSERT_OK(actual.make_array()->ValidateFull());
+
+    AssertArraysApproxEqual(*expected, *actual.make_array(), /*verbose=*/true,
+                            equal_options_);
+  }
+
+  std::shared_ptr<Array> NaiveImpl(
+      const typename TypeTraits<T>::ArrayType& array, const BooleanArray& mask,
+      const typename TypeTraits<T>::ArrayType& replacements) {
+    auto length = array.length();
+    auto builder = arrow::internal::make_unique<typename TypeTraits<T>::BuilderType>(
+        default_type_instance<T>(), default_memory_pool());
+    int64_t replacement_offset = 0;
+    for (int64_t i = 0; i < length; ++i) {
+      if (mask.IsValid(i)) {
+        if (mask.Value(i)) {
+          if (replacements.IsValid(replacement_offset)) {
+            ARROW_EXPECT_OK(builder->Append(replacements.Value(replacement_offset++)));
+          } else {
+            ARROW_EXPECT_OK(builder->AppendNull());
+            replacement_offset++;
+          }
+        } else {
+          if (array.IsValid(i)) {
+            ARROW_EXPECT_OK(builder->Append(array.Value(i)));
+          } else {
+            ARROW_EXPECT_OK(builder->AppendNull());
+          }
+        }
+      } else {
+        ARROW_EXPECT_OK(builder->AppendNull());
+      }
+    }
+    EXPECT_OK_AND_ASSIGN(auto expected, builder->Finish());
+    return expected;
+  }
+
+  EqualOptions equal_options_ = EqualOptions::Defaults();
+};
+
+template <typename T>
+class TestReplaceNumeric : public TestReplaceKernel<T> {
+ protected:
+  std::shared_ptr<DataType> type() override { return default_type_instance<T>(); }
+};
+
+class TestReplaceBoolean : public TestReplaceKernel<BooleanType> {
+ protected:
+  std::shared_ptr<DataType> type() override {
+    return TypeTraits<BooleanType>::type_singleton();
+  }
+};
+
+class TestReplaceFixedSizeBinary : public TestReplaceKernel<FixedSizeBinaryType> {
+ protected:
+  std::shared_ptr<DataType> type() override { return fixed_size_binary(3); }
+};
+
+template <typename T>
+class TestReplaceDecimal : public TestReplaceKernel<T> {
+ protected:
+  std::shared_ptr<DataType> type() override { return default_type_instance<T>(); }
+};
+
+class TestReplaceDayTimeInterval : public TestReplaceKernel<DayTimeIntervalType> {
+ protected:
+  std::shared_ptr<DataType> type() override {
+    return TypeTraits<DayTimeIntervalType>::type_singleton();
+  }
+};
+
+template <typename T>
+class TestReplaceBinary : public TestReplaceKernel<T> {
+ protected:
+  std::shared_ptr<DataType> type() override { return default_type_instance<T>(); }
+};
+
+using NumericBasedTypes =
+    ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                     Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type,
+                     Time32Type, Time64Type, TimestampType, MonthIntervalType>;
+
+TYPED_TEST_SUITE(TestReplaceNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestReplaceDecimal, DecimalArrowTypes);
+TYPED_TEST_SUITE(TestReplaceBinary, BinaryTypes);
+
+TYPED_TEST(TestReplaceNumeric, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[1]"));
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->mask_scalar(true),
+               this->array("[0]"), this->array("[0]"));
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->mask_scalar(true),
+               this->array("[2, 0]"), this->array("[2]"));
+  this->Assert(ReplaceWithMask, this->array("[1]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[0, 0]"), this->mask_scalar(false),
+               this->scalar("1"), this->array("[0, 0]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 0]"), this->mask_scalar(true),
+               this->scalar("1"), this->array("[1, 1]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 0]"), this->mask_scalar(true),
+               this->scalar("null"), this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[0, 1, 2, 3]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3]"),
+               this->mask("[true, true, true, true]"), this->array("[10, 11, 12, 13]"),
+               this->array("[10, 11, 12, 13]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, null]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[0, 1, 2, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, null]"),
+               this->mask("[true, true, true, true]"), this->array("[10, 11, 12, 13]"),
+               this->array("[10, 11, 12, 13]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, null]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2, 3, 4, 5]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[10, null]"), this->array("[10, null, 2, 3, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[10, null]"),
+               this->array("[10, null, null, null, null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->scalar("1"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1]"), this->mask("[true, true]"),
+               this->scalar("10"), this->array("[10, 10]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1]"), this->mask("[true, true]"),
+               this->scalar("null"), this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[0, 1, 2]"),
+               this->mask("[true, false, null]"), this->scalar("10"),
+               this->array("[10, 1, null]"));
+}
+
+TYPED_TEST(TestReplaceNumeric, ReplaceWithMaskRandom) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  using CType = typename TypeTraits<TypeParam>::CType;
+  auto ty = this->type();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  const int64_t length = 1023;
+  std::vector<std::string> values = {"0.01", "0"};
+  // Clamp the range because date/time types don't print well with extreme values
+  values.push_back(std::to_string(static_cast<CType>(std::min<double>(
+      16384.0, static_cast<double>(std::numeric_limits<CType>::max())))));
+  auto options = key_value_metadata({"null_probability", "min", "max"}, values);
+  auto array =
+      checked_pointer_cast<ArrayType>(rand.ArrayOf(*field("a", ty, options), length));
+  auto mask = checked_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), length, /*null_probability=*/0.01));
+  const int64_t num_replacements = std::count_if(
+      mask->begin(), mask->end(),
+      [](util::optional<bool> value) { return value.has_value() && *value; });
+  auto replacements = checked_pointer_cast<ArrayType>(
+      rand.ArrayOf(*field("a", ty, options), num_replacements));
+  auto expected = this->NaiveImpl(*array, *mask, *replacements);
+
+  this->Assert(ReplaceWithMask, array, mask, replacements, expected);
+  for (int64_t slice = 1; slice <= 16; slice++) {
+    auto sliced_array = checked_pointer_cast<ArrayType>(array->Slice(slice, 15));
+    auto sliced_mask = checked_pointer_cast<BooleanArray>(mask->Slice(slice, 15));
+    auto new_expected = this->NaiveImpl(*sliced_array, *sliced_mask, *replacements);
+    this->Assert(ReplaceWithMask, sliced_array, sliced_mask, replacements, new_expected);
+  }
+}
+
+TYPED_TEST(TestReplaceNumeric, ReplaceWithMaskErrors) {
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 2 "
+                           "items but got 1 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[1, 2]"),
+                         this->mask("[true, true]"), this->array("[0]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 1 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[1, 2]"),
+                         this->mask("[true, null]"), this->array("[]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Mask must be of same length as array (expected 2 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[1, 2]"), this->mask("[]"),
+                         this->array("[]")));
+}
+
+TEST_F(TestReplaceBoolean, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array("[true]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[true]"));
+  this->Assert(ReplaceWithMask, this->array("[true]"), this->mask_scalar(true),
+               this->array("[false]"), this->array("[false]"));
+  this->Assert(ReplaceWithMask, this->array("[true]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask_scalar(false),
+               this->scalar("true"), this->array("[false, false]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask_scalar(true),
+               this->scalar("true"), this->array("[true, true]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask_scalar(true),
+               this->scalar("null"), this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[true, true, true, true]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[false, false, false, false]"),
+               this->array("[false, false, false, false]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, null]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[true, true, true, null]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, null]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[false, false, false, false]"),
+               this->array("[false, false, false, false]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, null]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[true, true, true, true, true, true]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[false, null]"),
+               this->array("[false, null, true, true, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[false, null]"),
+               this->array("[false, null, null, null, null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->scalar("true"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask("[true, true]"),
+               this->scalar("true"), this->array("[true, true]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false]"), this->mask("[true, true]"),
+               this->scalar("null"), this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[false, false, false]"),
+               this->mask("[true, false, null]"), this->scalar("true"),
+               this->array("[true, false, null]"));
+}
+
+TEST_F(TestReplaceBoolean, ReplaceWithMaskErrors) {
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 2 "
+                           "items but got 1 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[true, true]"),
+                         this->mask("[true, true]"), this->array("[false]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Replacement array must be of appropriate length (expected 1 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[true, true]"),
+                         this->mask("[true, null]"), this->array("[]")));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Mask must be of same length as array (expected 2 "
+                           "items but got 0 items)"),
+      this->AssertRaises(ReplaceWithMask, this->array("[true, true]"), this->mask("[]"),
+                         this->array("[]")));
+}
+
+TEST_F(TestReplaceFixedSizeBinary, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(false),
+               this->array("[]"), this->array(R"(["foo"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(true),
+               this->array(R"(["bar"])"), this->array(R"(["bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"),
+               this->mask_scalar(false), this->scalar(R"("baz")"),
+               this->array(R"(["foo", "bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar(R"("baz")"), this->array(R"(["baz", "baz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar("null"), this->array(R"([null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", "ddd"])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["aaa", "bbb", "ccc", "ddd"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", "ddd"])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", "ddd"])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", null])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["aaa", "bbb", "ccc", null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", null])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"),
+               this->array(R"(["eee", "fff", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc", null])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask,
+               this->array(R"(["aaa", "bbb", "ccc", "ddd", "eee", "fff"])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["ggg", null])"),
+               this->array(R"(["ggg", null, "ccc", "ddd", null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"([null, null, null, null, null, null])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["aaa", null])"),
+               this->array(R"(["aaa", null, null, null, null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar(R"("zzz")"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb"])"),
+               this->mask("[true, true]"), this->scalar(R"("zzz")"),
+               this->array(R"(["zzz", "zzz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb"])"),
+               this->mask("[true, true]"), this->scalar("null"),
+               this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["aaa", "bbb", "ccc"])"),
+               this->mask("[true, false, null]"), this->scalar(R"("zzz")"),
+               this->array(R"(["zzz", "bbb", null])"));
+}
+
+TEST_F(TestReplaceFixedSizeBinary, ReplaceWithMaskErrors) {
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::AllOf(
+          ::testing::HasSubstr("Replacements must be of same type (expected "),
+          ::testing::HasSubstr(this->type()->ToString()),
+          ::testing::HasSubstr("but got fixed_size_binary[2]")),
+      this->AssertRaises(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+                         ArrayFromJSON(fixed_size_binary(2), "[]")));
+}
+
+TYPED_TEST(TestReplaceDecimal, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["1.00"])"), this->mask_scalar(false),
+               this->array("[]"), this->array(R"(["1.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["1.00"])"), this->mask_scalar(true),
+               this->array(R"(["0.00"])"), this->array(R"(["0.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["1.00"])"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "0.00"])"),
+               this->mask_scalar(false), this->scalar(R"("1.00")"),
+               this->array(R"(["0.00", "0.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "0.00"])"),
+               this->mask_scalar(true), this->scalar(R"("1.00")"),
+               this->array(R"(["1.00", "1.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "0.00"])"),
+               this->mask_scalar(true), this->scalar("null"),
+               this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", "3.00"])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["0.00", "1.00", "2.00", "3.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", "3.00"])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", "3.00"])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", null])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["0.00", "1.00", "2.00", null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", null])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"),
+               this->array(R"(["10.00", "11.00", "12.00", "13.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00", null])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask,
+               this->array(R"(["0.00", "1.00", "2.00", "3.00", "4.00", "5.00"])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["10.00", null])"),
+               this->array(R"(["10.00", null, "2.00", "3.00", null, null])"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["10.00", null])"),
+               this->array(R"(["10.00", null, null, null, null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar(R"("1.00")"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00"])"),
+               this->mask("[true, true]"), this->scalar(R"("10.00")"),
+               this->array(R"(["10.00", "10.00"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00"])"),
+               this->mask("[true, true]"), this->scalar("null"),
+               this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["0.00", "1.00", "2.00"])"),
+               this->mask("[true, false, null]"), this->scalar(R"("10.00")"),
+               this->array(R"(["10.00", "1.00", null])"));
+}
+
+TEST_F(TestReplaceDayTimeInterval, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array("[[1, 2]]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[[1, 2]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2]]"), this->mask_scalar(true),
+               this->array("[[3, 4]]"), this->array("[[3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2]]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"), this->mask_scalar(false),
+               this->scalar("[7, 8]"), this->array("[[1, 2], [3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"), this->mask_scalar(true),
+               this->scalar("[7, 8]"), this->array("[[7, 8], [7, 8]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"), this->mask_scalar(true),
+               this->scalar("null"), this->array("[null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2]]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], null]"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array("[[1, 2], [1, 2], [1, 2], null]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], null]"),
+               this->mask("[true, true, true, true]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"),
+               this->array("[[3, 4], [3, 4], [3, 4], [3, 4]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], null]"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array("[null, null, null, null]"));
+  this->Assert(
+      ReplaceWithMask, this->array("[[1, 2], [1, 2], [1, 2], [1, 2], [1, 2], [1, 2]]"),
+      this->mask("[true, true, false, false, null, null]"), this->array("[[3, 4], null]"),
+      this->array("[[3, 4], null, [1, 2], [1, 2], null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[null, null, null, null, null, null]"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array("[[3, 4], null]"),
+               this->array("[[3, 4], null, null, null, null, null]"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar("[7, 8]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"),
+               this->mask("[true, true]"), this->scalar("[7, 8]"),
+               this->array("[[7, 8], [7, 8]]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4]]"),
+               this->mask("[true, true]"), this->scalar("null"),
+               this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array("[[1, 2], [3, 4], [5, 6]]"),
+               this->mask("[true, false, null]"), this->scalar("[7, 8]"),
+               this->array("[[7, 8], [3, 4], null]"));
+}
+
+TYPED_TEST(TestReplaceBinary, ReplaceWithMask) {
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(false),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask_scalar(true),
+               this->array("[]"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array("[]"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(false),
+               this->array("[]"), this->array(R"(["foo"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->mask_scalar(true),
+               this->array(R"(["bar"])"), this->array(R"(["bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo"])"), this->null_mask_scalar(),
+               this->array("[]"), this->array("[null]"));
+
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"),
+               this->mask_scalar(false), this->scalar(R"("baz")"),
+               this->array(R"(["foo", "bar"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar(R"("baz")"), this->array(R"(["baz", "baz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["foo", "bar"])"), this->mask_scalar(true),
+               this->scalar("null"), this->array(R"([null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"), this->array("[]"),
+               this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", "dddd"])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["a", "bb", "ccc", "dddd"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", "dddd"])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", "dddd"])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", null])"),
+               this->mask("[false, false, false, false]"), this->array("[]"),
+               this->array(R"(["a", "bb", "ccc", null])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", null])"),
+               this->mask("[true, true, true, true]"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"),
+               this->array(R"(["eeeee", "f", "ggg", "hhh"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc", null])"),
+               this->mask("[null, null, null, null]"), this->array("[]"),
+               this->array(R"([null, null, null, null])"));
+  this->Assert(ReplaceWithMask,
+               this->array(R"(["a", "bb", "ccc", "dddd", "eeeee", "f"])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["ggg", null])"),
+               this->array(R"(["ggg", null, "ccc", "dddd", null, null])"));
+  this->Assert(ReplaceWithMask, this->array(R"([null, null, null, null, null, null])"),
+               this->mask("[true, true, false, false, null, null]"),
+               this->array(R"(["a", null])"),
+               this->array(R"(["a", null, null, null, null, null])"));
+
+  this->Assert(ReplaceWithMask, this->array("[]"), this->mask("[]"),
+               this->scalar(R"("zzz")"), this->array("[]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb"])"), this->mask("[true, true]"),
+               this->scalar(R"("zzz")"), this->array(R"(["zzz", "zzz"])"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb"])"), this->mask("[true, true]"),
+               this->scalar("null"), this->array("[null, null]"));
+  this->Assert(ReplaceWithMask, this->array(R"(["a", "bb", "ccc"])"),
+               this->mask("[true, false, null]"), this->scalar(R"("zzz")"),
+               this->array(R"(["zzz", "bb", null])"));
+}
+
+TYPED_TEST(TestReplaceBinary, ReplaceWithMaskRandom) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  auto ty = this->type();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  const int64_t length = 1023;
+  auto options = key_value_metadata({{"null_probability", "0.01"}, {"max_length", "5"}});
+  auto array =
+      checked_pointer_cast<ArrayType>(rand.ArrayOf(*field("a", ty, options), length));
+  auto mask = checked_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), length, /*null_probability=*/0.01));
+  const int64_t num_replacements = std::count_if(
+      mask->begin(), mask->end(),
+      [](util::optional<bool> value) { return value.has_value() && *value; });
+  auto replacements = checked_pointer_cast<ArrayType>(
+      rand.ArrayOf(*field("a", ty, options), num_replacements));
+  auto expected = this->NaiveImpl(*array, *mask, *replacements);
+
+  this->Assert(ReplaceWithMask, array, mask, replacements, expected);
+  for (int64_t slice = 1; slice <= 16; slice++) {
+    auto sliced_array = checked_pointer_cast<ArrayType>(array->Slice(slice, 15));
+    auto sliced_mask = checked_pointer_cast<BooleanArray>(mask->Slice(slice, 15));
+    auto new_expected = this->NaiveImpl(*sliced_array, *sliced_mask, *replacements);
+    this->Assert(ReplaceWithMask, sliced_array, sliced_mask, replacements, new_expected);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 8a0d9e62518..ca7b6137306 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -168,6 +168,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
 
   // Vector functions
   RegisterVectorHash(registry.get());
+  RegisterVectorReplace(registry.get());
   RegisterVectorSelection(registry.get());
   RegisterVectorNested(registry.get());
   RegisterVectorSort(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index dd0271eb43d..892b54341da 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -41,6 +41,7 @@ void RegisterScalarOptions(FunctionRegistry* registry);
 
 // Vector functions
 void RegisterVectorHash(FunctionRegistry* registry);
+void RegisterVectorReplace(FunctionRegistry* registry);
 void RegisterVectorSelection(FunctionRegistry* registry);
 void RegisterVectorNested(FunctionRegistry* registry);
 void RegisterVectorSort(FunctionRegistry* registry);
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 86664bbb162..e4d809967f9 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -233,6 +233,7 @@ struct TypeTraits<MonthIntervalType> {
   using ArrayType = MonthIntervalArray;
   using BuilderType = MonthIntervalBuilder;
   using ScalarType = MonthIntervalScalar;
+  using CType = MonthIntervalType::c_type;
 
   static constexpr int64_t bytes_required(int64_t elements) {
     return elements * static_cast<int64_t>(sizeof(int32_t));
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index fc6c8b7c7e1..00391052b1e 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -850,6 +850,7 @@ in reverse order.
   as given by :struct:`SliceOptions` where ``start`` and ``stop`` are measured
   in codeunits. Null inputs emit null.
 
+.. _cpp-compute-scalar-structural-transforms:
 
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
@@ -861,7 +862,7 @@ Structural transforms
 +==========================+============+================================================+=====================+=========+
 | fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like  | Input type          | \(1)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal               | Input type          + \(2)    |
+| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal               | Input type          | \(2)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
 | is_finite                | Unary      | Float, Double                                  | Boolean             | \(3)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
@@ -888,6 +889,8 @@ Structural transforms
   input. If the nulls present on the first input, they will be promoted to the
   output, otherwise nulls will be chosen based on the first input values.
 
+  Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
+
 * \(3) Output is true iff the corresponding input element is finite (not Infinity,
   -Infinity, or NaN).
 
@@ -1154,6 +1157,8 @@ value, but smaller than nulls.
   table. If the input is a record batch or table, one or more sort
   keys must be specified.
 
+.. _cpp-compute-vector-structural-transforms:
+
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -1172,3 +1177,18 @@ Structural transforms
 * \(2) For each value in the list child array, the index at which it is found
   in the list array is appended to the output.  Nulls in the parent list array
   are discarded.
+
+These functions create a copy of the first input with some elements
+replaced, based on the remaining inputs.
+
++--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+
+| Function name            | Arity      | Input type 1          | Input type 2 | Input type 3 | Output type  | Notes |
++==========================+============+=======================+==============+==============+==============+=======+
+| replace_with_mask        | Ternary    | Fixed-width or binary | Boolean      | Input type 1 | Input type 1 | \(1)  |
++--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+
+
+* \(1) Each element in input 1 for which the corresponding Boolean in input 2
+  is true is replaced with the next value from input 3. A null in input 2
+  results in a corresponding null in the output.
+
+  Also see: :ref:`if_else <cpp-compute-scalar-structural-transforms>`.
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index a611d2a2384..09c67598193 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -292,6 +292,14 @@ Conversions
    cast
    strptime
 
+Replacements
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   replace_with_mask
+
 Selections
 ----------
 

From 1c002fcd9f081177cb1c197d5f73238630c9f519 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 14 Jul 2021 10:55:35 -0500
Subject: [PATCH 552/719] ARROW-12499: [C++][Compute] Add
 ScalarAggregateOptions to Any and All kernels

This is to resolve [ARROW-12499](https://issues.apache.org/jira/browse/ARROW-12499).

Closes #10476 from rok/ARROW-12499

Lead-authored-by: Rok <rok@mihevc.org>
Co-authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 cpp/src/arrow/compute/api_aggregate.cc        |  10 +-
 cpp/src/arrow/compute/api_aggregate.h         |  22 ++-
 .../arrow/compute/kernels/aggregate_basic.cc  |  64 ++++++--
 .../arrow/compute/kernels/aggregate_test.cc   | 143 ++++++++++++------
 docs/source/cpp/compute.rst                   |  23 +--
 python/pyarrow/tests/test_compute.py          |  10 ++
 r/R/compute.R                                 |  23 +--
 r/src/compute.cpp                             |   2 +-
 r/tests/testthat/test-compute-aggregate.R     |   4 +
 9 files changed, 203 insertions(+), 98 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index be05c3c11d0..1b00c366bfd 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -155,12 +155,14 @@ Result<Datum> MinMax(const Datum& value, const ScalarAggregateOptions& options,
   return CallFunction("min_max", {value}, &options, ctx);
 }
 
-Result<Datum> Any(const Datum& value, ExecContext* ctx) {
-  return CallFunction("any", {value}, ctx);
+Result<Datum> Any(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("any", {value}, &options, ctx);
 }
 
-Result<Datum> All(const Datum& value, ExecContext* ctx) {
-  return CallFunction("all", {value}, ctx);
+Result<Datum> All(const Datum& value, const ScalarAggregateOptions& options,
+                  ExecContext* ctx) {
+  return CallFunction("all", {value}, &options, ctx);
 }
 
 Result<Datum> Mode(const Datum& value, const ModeOptions& options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 7b6e2ef96de..7a6c44bd923 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -205,30 +205,44 @@ Result<Datum> MinMax(
 /// \brief Test whether any element in a boolean array evaluates to true.
 ///
 /// This function returns true if any of the elements in the array evaluates
-/// to true and false otherwise. Null values are skipped.
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneOr for more details on Kleene logic.
 ///
 /// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a BooleanScalar
 ///
 /// \since 3.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Any(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> Any(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Test whether all elements in a boolean array evaluate to true.
 ///
 /// This function returns true if all of the elements in the array evaluate
-/// to true and false otherwise. Null values are skipped.
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneAnd for more details on Kleene logic.
 ///
 /// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
 /// \param[in] ctx the function execution context, optional
 /// \return resulting datum as a BooleanScalar
 
 /// \since 3.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> All(const Datum& value, ExecContext* ctx = NULLPTR);
+Result<Datum> All(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
 
 /// \brief Calculate the modal (most common) value of a numeric array
 ///
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 6a844817686..5e0454c9c4d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -142,13 +142,15 @@ Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
 // Any implementation
 
 struct BooleanAnyImpl : public ScalarAggregator {
+  explicit BooleanAnyImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
   Status Consume(KernelContext*, const ExecBatch& batch) override {
     // short-circuit if seen a True already
     if (this->any == true) {
       return Status::OK();
     }
-
     const auto& data = *batch[0].array();
+    this->has_nulls = data.GetNullCount() > 0;
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0], data.offset, data.buffers[1], data.offset, data.length);
     int64_t position = 0;
@@ -166,32 +168,48 @@ struct BooleanAnyImpl : public ScalarAggregator {
   Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const BooleanAnyImpl&>(src);
     this->any |= other.any;
+    this->has_nulls |= other.has_nulls;
     return Status::OK();
   }
 
-  Status Finalize(KernelContext*, Datum* out) override {
-    out->value = std::make_shared<BooleanScalar>(this->any);
+  Status Finalize(KernelContext* ctx, Datum* out) override {
+    if (!options.skip_nulls && !this->any && this->has_nulls) {
+      out->value = std::make_shared<BooleanScalar>();
+    } else {
+      out->value = std::make_shared<BooleanScalar>(this->any);
+    }
     return Status::OK();
   }
 
   bool any = false;
+  bool has_nulls = false;
+  ScalarAggregateOptions options;
 };
 
 Result<std::unique_ptr<KernelState>> AnyInit(KernelContext*, const KernelInitArgs& args) {
-  return ::arrow::internal::make_unique<BooleanAnyImpl>();
+  const ScalarAggregateOptions options =
+      static_cast<const ScalarAggregateOptions&>(*args.options);
+  return ::arrow::internal::make_unique<BooleanAnyImpl>(
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
 // ----------------------------------------------------------------------
 // All implementation
 
 struct BooleanAllImpl : public ScalarAggregator {
+  explicit BooleanAllImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
+
   Status Consume(KernelContext*, const ExecBatch& batch) override {
     // short-circuit if seen a false already
     if (this->all == false) {
       return Status::OK();
     }
-
+    // short-circuit if seen a null already
+    if (!options.skip_nulls && this->has_nulls) {
+      return Status::OK();
+    }
     const auto& data = *batch[0].array();
+    this->has_nulls = data.GetNullCount() > 0;
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1], data.offset, data.buffers[0], data.offset, data.length);
     int64_t position = 0;
@@ -210,19 +228,27 @@ struct BooleanAllImpl : public ScalarAggregator {
   Status MergeFrom(KernelContext*, KernelState&& src) override {
     const auto& other = checked_cast<const BooleanAllImpl&>(src);
     this->all &= other.all;
+    this->has_nulls |= other.has_nulls;
     return Status::OK();
   }
 
   Status Finalize(KernelContext*, Datum* out) override {
-    out->value = std::make_shared<BooleanScalar>(this->all);
+    if (!options.skip_nulls && this->all && this->has_nulls) {
+      out->value = std::make_shared<BooleanScalar>();
+    } else {
+      out->value = std::make_shared<BooleanScalar>(this->all);
+    }
     return Status::OK();
   }
 
   bool all = true;
+  bool has_nulls = false;
+  ScalarAggregateOptions options;
 };
 
 Result<std::unique_ptr<KernelState>> AllInit(KernelContext*, const KernelInitArgs& args) {
-  return ::arrow::internal::make_unique<BooleanAllImpl>();
+  return ::arrow::internal::make_unique<BooleanAllImpl>(
+      static_cast<const ScalarAggregateOptions&>(*args.options));
 }
 
 // ----------------------------------------------------------------------
@@ -407,12 +433,22 @@ const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numer
                               "ScalarAggregateOptions"};
 
 const FunctionDoc any_doc{"Test whether any element in a boolean array evaluates to true",
-                          ("Null values are ignored."),
-                          {"array"}};
+                          ("Null values are ignored by default.\n"
+                           "If null values are taken into account by setting "
+                           "ScalarAggregateOptions parameter skip_nulls = false then "
+                           "Kleene logic is used.\n"
+                           "See KleeneOr for more details on Kleene logic."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
 
 const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate to true",
-                          ("Null values are ignored."),
-                          {"array"}};
+                          ("Null values are ignored by default.\n"
+                           "If null values are taken into account by setting "
+                           "ScalarAggregateOptions parameter skip_nulls = false then "
+                           "Kleene logic is used.\n"
+                           "See KleeneAnd for more details on Kleene logic."),
+                          {"array"},
+                          "ScalarAggregateOptions"};
 
 const FunctionDoc index_doc{"Find the index of the first occurrence of a given value",
                             ("The result is always computed as an int64_t, regardless\n"
@@ -496,12 +532,14 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   // any
-  func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc);
+  func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc,
+                                                   &default_scalar_aggregate_options);
   aggregate::AddBasicAggKernels(aggregate::AnyInit, {boolean()}, boolean(), func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   // all
-  func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc);
+  func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc,
+                                                   &default_scalar_aggregate_options);
   aggregate::AddBasicAggKernels(aggregate::AllInit, {boolean()}, boolean(), func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 4bce02a990b..7318539df7f 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -942,21 +942,26 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
 
 class TestPrimitiveAnyKernel : public ::testing::Test {
  public:
-  void AssertAnyIs(const Datum& array, bool expected) {
-    ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+  void AssertAnyIs(const Datum& array, const std::shared_ptr<BooleanScalar>& expected,
+                   const ScalarAggregateOptions& options) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Any(array, options, nullptr));
     const BooleanScalar& out_any = out.scalar_as<BooleanScalar>();
-    const auto expected_any = static_cast<const BooleanScalar>(expected);
-    ASSERT_EQ(out_any, expected_any);
+    ASSERT_EQ(out_any, *expected);
   }
 
-  void AssertAnyIs(const std::string& json, bool expected) {
+  void AssertAnyIs(
+      const std::string& json, const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ArrayFromJSON(type_singleton(), json);
-    AssertAnyIs(array, expected);
+    AssertAnyIs(array, expected, options);
   }
 
-  void AssertAnyIs(const std::vector<std::string>& json, bool expected) {
+  void AssertAnyIs(
+      const std::vector<std::string>& json,
+      const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
-    AssertAnyIs(array, expected);
+    AssertAnyIs(array, expected, options);
   }
 
   std::shared_ptr<DataType> type_singleton() {
@@ -967,26 +972,47 @@ class TestPrimitiveAnyKernel : public ::testing::Test {
 class TestAnyKernel : public TestPrimitiveAnyKernel {};
 
 TEST_F(TestAnyKernel, Basics) {
+  auto true_value = std::make_shared<BooleanScalar>(true);
+  auto false_value = std::make_shared<BooleanScalar>(false);
+  auto null_value = std::make_shared<BooleanScalar>();
+  null_value->is_valid = false;
+
   std::vector<std::string> chunked_input0 = {"[]", "[true]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
   std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};
 
-  this->AssertAnyIs("[]", false);
-  this->AssertAnyIs("[false]", false);
-  this->AssertAnyIs("[true, false]", true);
-  this->AssertAnyIs("[null, null, null]", false);
-  this->AssertAnyIs("[false, false, false]", false);
-  this->AssertAnyIs("[false, false, false, null]", false);
-  this->AssertAnyIs("[true, null, true, true]", true);
-  this->AssertAnyIs("[false, null, false, true]", true);
-  this->AssertAnyIs("[true, null, false, true]", true);
-  this->AssertAnyIs(chunked_input0, true);
-  this->AssertAnyIs(chunked_input1, true);
-  this->AssertAnyIs(chunked_input2, false);
-  this->AssertAnyIs(chunked_input3, false);
-  this->AssertAnyIs(chunked_input4, true);
+  this->AssertAnyIs("[]", false_value);
+  this->AssertAnyIs("[false]", false_value);
+  this->AssertAnyIs("[true, false]", true_value);
+  this->AssertAnyIs("[null, null, null]", false_value);
+  this->AssertAnyIs("[false, false, false]", false_value);
+  this->AssertAnyIs("[false, false, false, null]", false_value);
+  this->AssertAnyIs("[true, null, true, true]", true_value);
+  this->AssertAnyIs("[false, null, false, true]", true_value);
+  this->AssertAnyIs("[true, null, false, true]", true_value);
+  this->AssertAnyIs(chunked_input0, true_value);
+  this->AssertAnyIs(chunked_input1, true_value);
+  this->AssertAnyIs(chunked_input2, false_value);
+  this->AssertAnyIs(chunked_input3, false_value);
+  this->AssertAnyIs(chunked_input4, true_value);
+
+  const ScalarAggregateOptions& keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
+  this->AssertAnyIs("[]", false_value, keep_nulls);
+  this->AssertAnyIs("[false]", false_value, keep_nulls);
+  this->AssertAnyIs("[true, false]", true_value, keep_nulls);
+  this->AssertAnyIs("[null, null, null]", null_value, keep_nulls);
+  this->AssertAnyIs("[false, false, false]", false_value, keep_nulls);
+  this->AssertAnyIs("[false, false, false, null]", null_value, keep_nulls);
+  this->AssertAnyIs("[true, null, true, true]", true_value, keep_nulls);
+  this->AssertAnyIs("[false, null, false, true]", true_value, keep_nulls);
+  this->AssertAnyIs("[true, null, false, true]", true_value, keep_nulls);
+  this->AssertAnyIs(chunked_input0, true_value, keep_nulls);
+  this->AssertAnyIs(chunked_input1, true_value, keep_nulls);
+  this->AssertAnyIs(chunked_input2, false_value, keep_nulls);
+  this->AssertAnyIs(chunked_input3, null_value, keep_nulls);
+  this->AssertAnyIs(chunked_input4, true_value, keep_nulls);
 }
 
 //
@@ -995,21 +1021,26 @@ TEST_F(TestAnyKernel, Basics) {
 
 class TestPrimitiveAllKernel : public ::testing::Test {
  public:
-  void AssertAllIs(const Datum& array, bool expected) {
-    ASSERT_OK_AND_ASSIGN(Datum out, All(array));
+  void AssertAllIs(const Datum& array, const std::shared_ptr<BooleanScalar>& expected,
+                   const ScalarAggregateOptions& options) {
+    ASSERT_OK_AND_ASSIGN(Datum out, All(array, options, nullptr));
     const BooleanScalar& out_all = out.scalar_as<BooleanScalar>();
-    const auto expected_all = static_cast<const BooleanScalar>(expected);
-    ASSERT_EQ(out_all, expected_all);
+    ASSERT_EQ(out_all, *expected);
   }
 
-  void AssertAllIs(const std::string& json, bool expected) {
+  void AssertAllIs(
+      const std::string& json, const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ArrayFromJSON(type_singleton(), json);
-    AssertAllIs(array, expected);
+    AssertAllIs(array, expected, options);
   }
 
-  void AssertAllIs(const std::vector<std::string>& json, bool expected) {
+  void AssertAllIs(
+      const std::vector<std::string>& json,
+      const std::shared_ptr<BooleanScalar>& expected,
+      const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
     auto array = ChunkedArrayFromJSON(type_singleton(), json);
-    AssertAllIs(array, expected);
+    AssertAllIs(array, expected, options);
   }
 
   std::shared_ptr<DataType> type_singleton() {
@@ -1020,6 +1051,11 @@ class TestPrimitiveAllKernel : public ::testing::Test {
 class TestAllKernel : public TestPrimitiveAllKernel {};
 
 TEST_F(TestAllKernel, Basics) {
+  auto true_value = std::make_shared<BooleanScalar>(true);
+  auto false_value = std::make_shared<BooleanScalar>(false);
+  auto null_value = std::make_shared<BooleanScalar>();
+  null_value->is_valid = false;
+
   std::vector<std::string> chunked_input0 = {"[]", "[true]"};
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
@@ -1027,21 +1063,38 @@ TEST_F(TestAllKernel, Basics) {
   std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};
   std::vector<std::string> chunked_input5 = {"[false, null]", "[null, true]"};
 
-  this->AssertAllIs("[]", true);
-  this->AssertAllIs("[false]", false);
-  this->AssertAllIs("[true, false]", false);
-  this->AssertAllIs("[null, null, null]", true);
-  this->AssertAllIs("[false, false, false]", false);
-  this->AssertAllIs("[false, false, false, null]", false);
-  this->AssertAllIs("[true, null, true, true]", true);
-  this->AssertAllIs("[false, null, false, true]", false);
-  this->AssertAllIs("[true, null, false, true]", false);
-  this->AssertAllIs(chunked_input0, true);
-  this->AssertAllIs(chunked_input1, true);
-  this->AssertAllIs(chunked_input2, false);
-  this->AssertAllIs(chunked_input3, false);
-  this->AssertAllIs(chunked_input4, false);
-  this->AssertAllIs(chunked_input5, false);
+  this->AssertAllIs("[]", true_value);
+  this->AssertAllIs("[false]", false_value);
+  this->AssertAllIs("[true, false]", false_value);
+  this->AssertAllIs("[null, null, null]", true_value);
+  this->AssertAllIs("[false, false, false]", false_value);
+  this->AssertAllIs("[false, false, false, null]", false_value);
+  this->AssertAllIs("[true, null, true, true]", true_value);
+  this->AssertAllIs("[false, null, false, true]", false_value);
+  this->AssertAllIs("[true, null, false, true]", false_value);
+  this->AssertAllIs(chunked_input0, true_value);
+  this->AssertAllIs(chunked_input1, true_value);
+  this->AssertAllIs(chunked_input2, false_value);
+  this->AssertAllIs(chunked_input3, false_value);
+  this->AssertAllIs(chunked_input4, false_value);
+  this->AssertAllIs(chunked_input5, false_value);
+
+  const ScalarAggregateOptions keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
+  this->AssertAllIs("[]", true_value, keep_nulls);
+  this->AssertAllIs("[false]", false_value, keep_nulls);
+  this->AssertAllIs("[true, false]", false_value, keep_nulls);
+  this->AssertAllIs("[null, null, null]", null_value, keep_nulls);
+  this->AssertAllIs("[false, false, false]", false_value, keep_nulls);
+  this->AssertAllIs("[false, false, false, null]", false_value, keep_nulls);
+  this->AssertAllIs("[true, null, true, true]", null_value, keep_nulls);
+  this->AssertAllIs("[false, null, false, true]", false_value, keep_nulls);
+  this->AssertAllIs("[true, null, false, true]", false_value, keep_nulls);
+  this->AssertAllIs(chunked_input0, true_value, keep_nulls);
+  this->AssertAllIs(chunked_input1, null_value, keep_nulls);
+  this->AssertAllIs(chunked_input2, false_value, keep_nulls);
+  this->AssertAllIs(chunked_input3, false_value, keep_nulls);
+  this->AssertAllIs(chunked_input4, false_value, keep_nulls);
+  this->AssertAllIs(chunked_input5, false_value, keep_nulls);
 }
 
 //
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 00391052b1e..6ce808aba67 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -186,9 +186,9 @@ Aggregations
 +---------------+-------+-------------+----------------+----------------------------------+-------+
 | Function name | Arity | Input types | Output type    | Options class                    | Notes |
 +===============+=======+=============+================+==================================+=======+
-| all           | Unary | Boolean     | Scalar Boolean |                                  |       |
+| all           | Unary | Boolean     | Scalar Boolean | :struct:`ScalarAggregateOptions` | \(1)  |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
-| any           | Unary | Boolean     | Scalar Boolean |                                  |       |
+| any           | Unary | Boolean     | Scalar Boolean | :struct:`ScalarAggregateOptions` | \(1)  |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
 | count         | Unary | Any         | Scalar Int64   | :struct:`ScalarAggregateOptions` |       |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
@@ -196,15 +196,15 @@ Aggregations
 +---------------+-------+-------------+----------------+----------------------------------+-------+
 | mean          | Unary | Numeric     | Scalar Float64 | :struct:`ScalarAggregateOptions` |       |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
-| min_max       | Unary | Numeric     | Scalar Struct  | :struct:`ScalarAggregateOptions` | \(1)  |
+| min_max       | Unary | Numeric     | Scalar Struct  | :struct:`ScalarAggregateOptions` | \(2)  |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
-| mode          | Unary | Numeric     | Struct         | :struct:`ModeOptions`            | \(2)  |
+| mode          | Unary | Numeric     | Struct         | :struct:`ModeOptions`            | \(3)  |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
-| quantile      | Unary | Numeric     | Scalar Numeric | :struct:`QuantileOptions`        | \(3)  |
+| quantile      | Unary | Numeric     | Scalar Numeric | :struct:`QuantileOptions`        | \(4)  |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
 | stddev        | Unary | Numeric     | Scalar Float64 | :struct:`VarianceOptions`        |       |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
-| sum           | Unary | Numeric     | Scalar Numeric | :struct:`ScalarAggregateOptions` | \(4)  |
+| sum           | Unary | Numeric     | Scalar Numeric | :struct:`ScalarAggregateOptions` | \(5)  |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
 | tdigest       | Unary | Numeric     | Scalar Float64 | :struct:`TDigestOptions`         |       |
 +---------------+-------+-------------+----------------+----------------------------------+-------+
@@ -213,18 +213,21 @@ Aggregations
 
 Notes:
 
-* \(1) Output is a ``{"min": input type, "max": input type}`` Struct.
+* \(1) If null values are taken into account by setting ScalarAggregateOptions
+  parameter skip_nulls = false then `Kleene logic`_ logic is applied.
 
-* \(2) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
+* \(2) Output is a ``{"min": input type, "max": input type}`` Struct.
+
+* \(3) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
   It contains the *N* most common elements in the input, in descending
   order, where *N* is given in :member:`ModeOptions::n`.
   If two values have the same count, the smallest one comes first.
   Note that the output can have less than *N* elements if the input has
   less than *N* distinct values.
 
-* \(3) Output is Float64 or input type, depending on QuantileOptions.
+* \(4) Output is Float64 or input type, depending on QuantileOptions.
 
-* \(4) Output is Int64, UInt64 or Float64, depending on the input type.
+* \(5) Output is Int64, UInt64 or Float64, depending on the input type.
 
 Element-wise ("scalar") functions
 ---------------------------------
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 37040ec86b5..b65970745ec 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -548,30 +548,40 @@ def test_min_max():
 
 def test_any():
     # ARROW-1846
+
+    options = pc.ScalarAggregateOptions(skip_nulls=False)
     a = pa.array([False, None, True])
     assert pc.any(a).as_py() is True
+    assert pc.any(a, options=options).as_py() is True
 
     a = pa.array([False, None, False])
     assert pc.any(a).as_py() is False
+    assert pc.any(a, options=options).as_py() is None
 
 
 def test_all():
     # ARROW-10301
 
+    options = pc.ScalarAggregateOptions(skip_nulls=False)
     a = pa.array([], type='bool')
     assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is True
 
     a = pa.array([False, True])
     assert pc.all(a).as_py() is False
+    assert pc.all(a, options=options).as_py() is False
 
     a = pa.array([True, None])
     assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [True, None]])
     assert pc.all(a).as_py() is True
+    assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [False]])
     assert pc.all(a).as_py() is False
+    assert pc.all(a, options=options).as_py() is False
 
 
 def test_is_valid():
diff --git a/r/R/compute.R b/r/R/compute.R
index 5a00e884980..4277ad8d6df 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -203,31 +203,12 @@ unique.ArrowDatum <- function(x, incomparables = FALSE, ...) {
 
 #' @export
 any.ArrowDatum <- function(..., na.rm = FALSE) {
-  
-  a <- collect_arrays_from_dots(list(...))
-  result <- call_function("any", a)
-
-  if (!na.rm && a$null_count > 0 && !as.vector(result)) {
-    # Three-valued logic: with na.rm = FALSE, any(c(TRUE, NA)) returns TRUE but any(c(FALSE, NA)) returns NA
-    # TODO: C++ library should take na.rm for any/all (like ARROW-9054)
-    Scalar$create(NA)
-  } else {
-    result
-  }
+  scalar_aggregate("any", ..., na.rm = na.rm)
 }
 
 #' @export
 all.ArrowDatum <- function(..., na.rm = FALSE) {
-  
-  a <- collect_arrays_from_dots(list(...))
-  result <- call_function("all", a)
-  
-  if (!na.rm && a$null_count > 0 && as.vector(result)) {
-    # See comment above in any() about three-valued logic
-    Scalar$create(NA)
-  } else {
-    result
-  }
+  scalar_aggregate("all", ..., na.rm = na.rm)
 }
 
 #' `match` and `%in%` for Arrow objects
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index cfa895ecb1e..6bb55cbe208 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -172,7 +172,7 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
   }
 
   if (func_name == "min_max" || func_name == "sum" || func_name == "mean" ||
-      func_name == "count") {
+      func_name == "count" || func_name == "any" || func_name == "all") {
     using Options = arrow::compute::ScalarAggregateOptions;
     auto out = std::make_shared<Options>(Options::Defaults());
     out->min_count = cpp11::as_cpp<int>(options["na.min_count"]);
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 41418014bea..428f799c97b 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -394,12 +394,14 @@ test_that("any.Array and any.ChunkedArray", {
   data <- c(1:10, NA, NA)
 
   expect_vector_equal(any(input > 5), data)
+  expect_vector_equal(any(input > 5, na.rm = TRUE), data)
   expect_vector_equal(any(input < 1), data)
   expect_vector_equal(any(input < 1, na.rm = TRUE), data)
 
   data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
 
   expect_vector_equal(any(input), data_logical)
+  expect_vector_equal(any(input, na.rm = FALSE), data_logical)
   expect_vector_equal(any(input, na.rm = TRUE), data_logical)
 
 })
@@ -409,6 +411,8 @@ test_that("all.Array and all.ChunkedArray", {
   data <- c(1:10, NA, NA)
 
   expect_vector_equal(all(input > 5), data)
+  expect_vector_equal(all(input > 5, na.rm = TRUE), data)
+
   expect_vector_equal(all(input < 11), data)
   expect_vector_equal(all(input < 11, na.rm = TRUE), data)
 

From fdce12add5b104b30362e7af4d8fb6c832e266e1 Mon Sep 17 00:00:00 2001
From: Zimo Zhang <zmz@yanhuangdata.com>
Date: Wed, 14 Jul 2021 14:29:54 -0400
Subject: [PATCH 553/719] ARROW-13005: [C++] Add support for take
 implementation on dense union type

https://issues.apache.org/jira/browse/ARROW-13005

Closes #10606 from ZMZ91/feature/dense_union_take

Lead-authored-by: Zimo Zhang <zmz@yanhuangdata.com>
Co-authored-by: ZMZ <zmz@yanhuangdata.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/array/builder_base.h            |  1 +
 .../arrow/compute/kernels/vector_selection.cc | 77 +++++++++++++++++++
 .../compute/kernels/vector_selection_test.cc  | 65 ++++++++--------
 3 files changed, 110 insertions(+), 33 deletions(-)

diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 8e60c306796..905b3c1b491 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -51,6 +51,7 @@ class ARROW_EXPORT ArrayBuilder {
   explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
 
   virtual ~ArrayBuilder() = default;
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
 
   /// For nested types. Since the objects are owned by this class instance, we
   /// skip shared pointers and just return a raw pointer
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 6376ae10404..5845a7ee2d0 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -1668,6 +1668,81 @@ struct ListImpl : public Selection<ListImpl<Type>, Type> {
   }
 };
 
+struct DenseUnionImpl : public Selection<DenseUnionImpl, DenseUnionType> {
+  using Base = Selection<DenseUnionImpl, DenseUnionType>;
+  LIFT_BASE_MEMBERS();
+
+  TypedBufferBuilder<int32_t> value_offset_buffer_builder_;
+  TypedBufferBuilder<int8_t> child_id_buffer_builder_;
+  std::vector<int8_t> type_codes_;
+  std::vector<Int32Builder> child_indices_builders_;
+
+  DenseUnionImpl(KernelContext* ctx, const ExecBatch& batch, int64_t output_length,
+                 Datum* out)
+      : Base(ctx, batch, output_length, out),
+        value_offset_buffer_builder_(ctx->memory_pool()),
+        child_id_buffer_builder_(ctx->memory_pool()),
+        type_codes_(checked_cast<const UnionType&>(*this->values->type).type_codes()),
+        child_indices_builders_(type_codes_.size()) {
+    for (auto& child_indices_builder : child_indices_builders_) {
+      child_indices_builder = Int32Builder(ctx->memory_pool());
+    }
+  }
+
+  template <typename Adapter>
+  Status GenerateOutput() {
+    DenseUnionArray typed_values(this->values);
+    Adapter adapter(this);
+    RETURN_NOT_OK(adapter.Generate(
+        [&](int64_t index) {
+          int8_t child_id = typed_values.child_id(index);
+          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]);
+          int32_t value_offset = typed_values.value_offset(index);
+          value_offset_buffer_builder_.UnsafeAppend(
+              static_cast<int32_t>(child_indices_builders_[child_id].length()));
+          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1));
+          child_indices_builders_[child_id].UnsafeAppend(value_offset);
+          return Status::OK();
+        },
+        [&]() {
+          int8_t child_id = 0;
+          child_id_buffer_builder_.UnsafeAppend(type_codes_[child_id]);
+          value_offset_buffer_builder_.UnsafeAppend(
+              static_cast<int32_t>(child_indices_builders_[child_id].length()));
+          RETURN_NOT_OK(child_indices_builders_[child_id].Reserve(1));
+          child_indices_builders_[child_id].UnsafeAppendNull();
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  Status Init() override {
+    RETURN_NOT_OK(child_id_buffer_builder_.Reserve(output_length));
+    RETURN_NOT_OK(value_offset_buffer_builder_.Reserve(output_length));
+    return Status::OK();
+  }
+
+  Status Finish() override {
+    ARROW_ASSIGN_OR_RAISE(auto child_ids_buffer, child_id_buffer_builder_.Finish());
+    ARROW_ASSIGN_OR_RAISE(auto value_offsets_buffer,
+                          value_offset_buffer_builder_.Finish());
+    DenseUnionArray typed_values(this->values);
+    auto num_fields = typed_values.num_fields();
+    auto num_rows = child_ids_buffer->size();
+    BufferVector buffers{nullptr, std::move(child_ids_buffer),
+                         std::move(value_offsets_buffer)};
+    *out = ArrayData(typed_values.type(), num_rows, std::move(buffers), 0);
+    for (auto i = 0; i < num_fields; i++) {
+      ARROW_ASSIGN_OR_RAISE(auto child_indices_array,
+                            child_indices_builders_[i].Finish());
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> child_array,
+                            Take(*typed_values.field(i), *child_indices_array));
+      out->child_data.push_back(child_array->data());
+    }
+    return Status::OK();
+  }
+};
+
 struct FSLImpl : public Selection<FSLImpl, FixedSizeListType> {
   Int64Builder child_index_builder;
 
@@ -2141,6 +2216,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
       {InputType::Array(Type::LIST), FilterExec<ListImpl<ListType>>},
       {InputType::Array(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
       {InputType::Array(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
+      {InputType::Array(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
       {InputType::Array(Type::STRUCT), StructFilter},
       // TODO: Reuse ListType kernel for MAP
       {InputType::Array(Type::MAP), FilterExec<ListImpl<MapType>>},
@@ -2170,6 +2246,7 @@ void RegisterVectorSelection(FunctionRegistry* registry) {
       {InputType::Array(Type::LIST), TakeExec<ListImpl<ListType>>},
       {InputType::Array(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
       {InputType::Array(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
+      {InputType::Array(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
       {InputType::Array(Type::STRUCT), TakeExec<StructImpl>},
       // TODO: Reuse ListType kernel for MAP
       {InputType::Array(Type::MAP), TakeExec<ListImpl<MapType>>},
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index f428da0fe35..e367d888d00 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -607,31 +607,31 @@ TEST_F(TestFilterKernelWithStruct, FilterStruct) {
 
 class TestFilterKernelWithUnion : public TestFilterKernel<UnionType> {};
 
-TEST_F(TestFilterKernelWithUnion, DISABLED_FilterUnion) {
-  for (auto union_ : UnionTypeFactories()) {
-    auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 5});
-    auto union_json = R"([
-      null,
+TEST_F(TestFilterKernelWithUnion, FilterUnion) {
+  auto union_type = dense_union({field("a", int32()), field("b", utf8())}, {2, 5});
+  auto union_json = R"([
+      [2, null],
       [2, 222],
       [5, "hello"],
       [5, "eh"],
-      null,
-      [2, 111]
+      [2, null],
+      [2, 111],
+      [5, null]
     ])";
-    this->AssertFilter(union_type, union_json, "[0, 0, 0, 0, 0, 0]", "[]");
-    this->AssertFilter(union_type, union_json, "[0, 1, 1, null, 0, 1]", R"([
+  this->AssertFilter(union_type, union_json, "[0, 0, 0, 0, 0, 0, 0]", "[]");
+  this->AssertFilter(union_type, union_json, "[0, 1, 1, null, 0, 1, 1]", R"([
       [2, 222],
       [5, "hello"],
-      null,
-      [2, 111]
+      [2, null],
+      [2, 111],
+      [5, null]
     ])");
-    this->AssertFilter(union_type, union_json, "[1, 0, 1, 0, 1, 0]", R"([
-      null,
+  this->AssertFilter(union_type, union_json, "[1, 0, 1, 0, 1, 0, 0]", R"([
+      [2, null],
       [5, "hello"],
-      null
+      [2, null]
     ])");
-    this->AssertFilter(union_type, union_json, "[1, 1, 1, 1, 1, 1]", union_json);
-  }
+  this->AssertFilter(union_type, union_json, "[1, 1, 1, 1, 1, 1, 1]", union_json);
 }
 
 class TestFilterKernelWithRecordBatch : public TestFilterKernel<RecordBatch> {
@@ -1281,34 +1281,34 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) {
 
 class TestTakeKernelWithUnion : public TestTakeKernelTyped<UnionType> {};
 
-// TODO: Restore Union take functionality
-TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
-  for (auto union_ : UnionTypeFactories()) {
-    auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 5});
-    auto union_json = R"([
-      null,
+TEST_F(TestTakeKernelWithUnion, TakeUnion) {
+  auto union_type = dense_union({field("a", int32()), field("b", utf8())}, {2, 5});
+  auto union_json = R"([
+      [2, null],
       [2, 222],
       [5, "hello"],
       [5, "eh"],
-      null,
-      [2, 111]
+      [2, null],
+      [2, 111],
+      [5, null]
     ])";
-    CheckTake(union_type, union_json, "[]", "[]");
-    CheckTake(union_type, union_json, "[3, 1, 3, 1, 3]", R"([
+  CheckTake(union_type, union_json, "[]", "[]");
+  CheckTake(union_type, union_json, "[3, 1, 3, 1, 3]", R"([
       [5, "eh"],
       [2, 222],
       [5, "eh"],
       [2, 222],
       [5, "eh"]
     ])");
-    CheckTake(union_type, union_json, "[4, 2, 1]", R"([
-      null,
+  CheckTake(union_type, union_json, "[4, 2, 1, 6]", R"([
+      [2, null],
       [5, "hello"],
-      [2, 222]
+      [2, 222],
+      [5, null]
     ])");
-    CheckTake(union_type, union_json, "[0, 1, 2, 3, 4, 5]", union_json);
-    CheckTake(union_type, union_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
-      null,
+  CheckTake(union_type, union_json, "[0, 1, 2, 3, 4, 5, 6]", union_json);
+  CheckTake(union_type, union_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
+      [2, null],
       [5, "hello"],
       [5, "hello"],
       [5, "hello"],
@@ -1316,7 +1316,6 @@ TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
       [5, "hello"],
       [5, "hello"]
     ])");
-  }
 }
 
 class TestPermutationsWithTake : public TestBase {

From a9fe9e34069c635617922e97ede2a71729f5ad07 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 14 Jul 2021 15:04:09 -0400
Subject: [PATCH 554/719] ARROW-13341: [C++][Compute] Fix race condition in
 ScalarAggregateNode

Multiple threads starting DoConsume would already have incremented `num_received_`, so if one were delayed another might erroneously begin to merge/finalize (leaving invalidated states)

Closes #10720 from bkietz/13341-Segfault-in-arrow-compute

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec/exec_plan.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index 35e4af3889a..433e895c243 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -675,7 +675,6 @@ struct ScalarAggregateNode : ExecNode {
     std::unique_lock<std::mutex> lock(mutex_);
     auto it =
         thread_indices_.emplace(std::this_thread::get_id(), thread_indices_.size()).first;
-    ++num_received_;
     auto thread_index = it->second;
 
     lock.unlock();
@@ -687,6 +686,7 @@ struct ScalarAggregateNode : ExecNode {
     }
 
     lock.lock();
+    ++num_received_;
     st = MaybeFinish(&lock);
     if (!st.ok()) {
       outputs_[0]->ErrorReceived(this, std::move(st));
@@ -736,7 +736,7 @@ struct ScalarAggregateNode : ExecNode {
   Status MaybeFinish(std::unique_lock<std::mutex>* lock) {
     if (num_received_ != num_total_) return Status::OK();
 
-    if (finished_.is_finished()) return Status::OK();
+    if (states_.empty()) return Status::OK();
 
     ExecBatch batch{{}, 1};
     batch.values.resize(kernels_.size());
@@ -747,6 +747,7 @@ struct ScalarAggregateNode : ExecNode {
                                              kernels_[i], &ctx, std::move(states_[i])));
       RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i]));
     }
+    states_.clear();
     lock->unlock();
 
     outputs_[0]->InputReceived(this, 0, batch);
@@ -760,7 +761,7 @@ struct ScalarAggregateNode : ExecNode {
   std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
   std::unordered_map<std::thread::id, size_t> thread_indices_;
   std::mutex mutex_;
-  int num_received_ = 0, num_total_;
+  int num_received_ = 0, num_total_ = -1;
 };
 
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,

From 7b66f97330215fe020ec536671ee50f41aa1af35 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 14 Jul 2021 15:55:11 -0400
Subject: [PATCH 555/719] ARROW-12364: [Python] [Dataset] Add
 metadata_collector option to ds.write_dataset()

Created writer_post_finish (similar to writer_pre_finish) to visit dataset-created files after Finish.  Added a similar file_visitor concept to pyarrow which maps to writer_post_finish.  Connected the legacy metadata_collector to the file_visitor so that parquet datasets created with use_legacy_dataset=True can support metadata_collector.

Closes #10628 from westonpace/feature/ARROW-12364--python-dataset-add-metadata_collector-option-t

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/dataset/file_base.cc           |   3 +-
 cpp/src/arrow/dataset/file_base.h            |   6 +
 python/pyarrow/_dataset.pyx                  |  58 +++++++-
 python/pyarrow/dataset.py                    |  23 +++-
 python/pyarrow/includes/libarrow_dataset.pxd |  15 ++
 python/pyarrow/includes/libarrow_fs.pxd      |   4 +
 python/pyarrow/parquet.py                    |   7 +-
 python/pyarrow/tests/test_dataset.py         | 136 ++++++++++++++++++-
 8 files changed, 240 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index d4f7e1e28b8..68c309bea8f 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -546,7 +546,8 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio
   for (const auto& part_queue : state.queues) {
     task_group->Append([&] {
       RETURN_NOT_OK(write_options.writer_pre_finish(part_queue.second->writer().get()));
-      return part_queue.second->writer()->Finish();
+      RETURN_NOT_OK(part_queue.second->writer()->Finish());
+      return write_options.writer_post_finish(part_queue.second->writer().get());
     });
   }
   return task_group->Finish();
diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 36f110cb44d..438fccd9b07 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -369,6 +369,12 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
     return Status::OK();
   };
 
+  /// Callback to be invoked against all FileWriters after they have
+  /// called FileWriter::Finish().
+  std::function<Status(FileWriter*)> writer_post_finish = [](FileWriter*) {
+    return Status::OK();
+  };
+
   const std::shared_ptr<FileFormat>& format() const {
     return file_write_options->format();
   }
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 0a179bdbe15..07684eff3b4 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -3099,7 +3099,7 @@ def _get_partition_keys(Expression partition_expression):
 
     For example, an expression of
     <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
-    is converted to {'part': 'a', 'year': 2016}
+    is converted to {'part': 'A', 'year': 2016}
     """
     cdef:
         CExpression expr = partition_expression.unwrap()
@@ -3114,6 +3114,53 @@ def _get_partition_keys(Expression partition_expression):
     return out
 
 
+ctypedef CParquetFileWriter* _CParquetFileWriterPtr
+
+cdef class WrittenFile(_Weakrefable):
+    """
+    Metadata information about files written as
+    part of a dataset write operation
+    """
+
+    """The full path to the created file"""
+    cdef public str path
+    """
+    If the file is a parquet file this will contain the parquet metadata.
+    This metadata will have the file path attribute set to the path of
+    the written file.
+    """
+    cdef public object metadata
+
+    def __init__(self, path, metadata):
+        self.path = path
+        self.metadata = metadata
+
+cdef void _filesystemdataset_write_visitor(
+        dict visit_args,
+        CFileWriter* file_writer):
+    cdef:
+        str path
+        str base_dir
+        WrittenFile written_file
+        FileMetaData parquet_metadata
+        CParquetFileWriter* parquet_file_writer
+
+    parquet_metadata = None
+    path = frombytes(deref(file_writer).destination().path)
+    if deref(deref(file_writer).format()).type_name() == b"parquet":
+        parquet_file_writer = dynamic_cast[_CParquetFileWriterPtr](file_writer)
+        with nogil:
+            metadata = deref(
+                deref(parquet_file_writer).parquet_writer()).metadata()
+        if metadata:
+            base_dir = frombytes(visit_args['base_dir'])
+            parquet_metadata = FileMetaData()
+            parquet_metadata.init(metadata)
+            parquet_metadata.set_file_path(os.path.relpath(path, base_dir))
+    written_file = WrittenFile(path, parquet_metadata)
+    visit_args['file_visitor'](written_file)
+
+
 def _filesystemdataset_write(
     Scanner data not None,
     object base_dir not None,
@@ -3122,6 +3169,7 @@ def _filesystemdataset_write(
     Partitioning partitioning not None,
     FileWriteOptions file_options not None,
     int max_partitions,
+    object file_visitor
 ):
     """
     CFileSystemDataset.Write wrapper
@@ -3130,6 +3178,7 @@ def _filesystemdataset_write(
         CFileSystemDatasetWriteOptions c_options
         shared_ptr[CScanner] c_scanner
         vector[shared_ptr[CRecordBatch]] c_batches
+        dict visit_args
 
     c_options.file_write_options = file_options.unwrap()
     c_options.filesystem = filesystem.unwrap()
@@ -3137,6 +3186,13 @@ def _filesystemdataset_write(
     c_options.partitioning = partitioning.unwrap()
     c_options.max_partitions = max_partitions
     c_options.basename_template = tobytes(basename_template)
+    if file_visitor is not None:
+        visit_args = {'base_dir': c_options.base_dir,
+                      'file_visitor': file_visitor}
+        # Need to use post_finish because parquet metadata is not available
+        # until after Finish has been called
+        c_options.writer_post_finish = BindFunction[cb_writer_finish_internal](
+            &_filesystemdataset_write_visitor, visit_args)
 
     c_scanner = data.unwrap()
     with nogil:
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index b93f492dd38..8b5799e6da2 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -690,7 +690,7 @@ def _ensure_write_partitioning(scheme):
 def write_dataset(data, base_dir, basename_template=None, format=None,
                   partitioning=None, schema=None,
                   filesystem=None, file_options=None, use_threads=True,
-                  use_async=False, max_partitions=None):
+                  use_async=False, max_partitions=None, file_visitor=None):
     """
     Write a dataset to a given format and partitioning.
 
@@ -731,6 +731,25 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
         (e.g. S3)
     max_partitions : int, default 1024
         Maximum number of partitions any batch may be written into.
+    file_visitor : Function
+        If set, this function will be called with a WrittenFile instance
+        for each file created during the call.  This object will have both
+        a path attribute and a metadata attribute.
+
+        The path attribute will be a string containing the path to
+        the created file.
+
+        The metadata attribute will be the parquet metadata of the file.
+        This metadata will have the file path attribute set and can be used
+        to build a _metadata file.  The metadata attribute will be None if
+        the format is not parquet.
+
+        Example visitor which simple collects the filenames created::
+
+            visited_paths = []
+
+            def file_visitor(written_file):
+                visited_paths.append(written_file.path)
     """
     from pyarrow.fs import _resolve_filesystem_and_path
 
@@ -784,5 +803,5 @@ def write_dataset(data, base_dir, basename_template=None, format=None,
 
     _filesystemdataset_write(
         scanner, base_dir, basename_template, filesystem, partitioning,
-        file_options, max_partitions
+        file_options, max_partitions, file_visitor
     )
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index ede5775425f..85317afdef3 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -81,6 +81,8 @@ cdef extern from "arrow/compute/exec/expression.h" \
         CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"(
             const CExpression& partition_expression)
 
+ctypedef CStatus cb_writer_finish_internal(CFileWriter*)
+ctypedef void cb_writer_finish(dict, CFileWriter*)
 
 cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
@@ -223,6 +225,17 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         const shared_ptr[CFileFormat]& format() const
         c_string type_name() const
 
+    cdef cppclass CFileWriter \
+            "arrow::dataset::FileWriter":
+        const shared_ptr[CFileFormat]& format() const
+        const shared_ptr[CSchema]& schema() const
+        const shared_ptr[CFileWriteOptions]& options() const
+        const CFileLocator& destination() const
+
+    cdef cppclass CParquetFileWriter \
+            "arrow::dataset::ParquetFileWriter"(CFileWriter):
+        const shared_ptr[FileWriter]& parquet_writer() const
+
     cdef cppclass CFileFormat "arrow::dataset::FileFormat":
         shared_ptr[CFragmentScanOptions] default_fragment_scan_options
         c_string type_name() const
@@ -263,6 +276,8 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CPartitioning] partitioning
         int max_partitions
         c_string basename_template
+        function[cb_writer_finish_internal] writer_pre_finish
+        function[cb_writer_finish_internal] writer_post_finish
 
     cdef cppclass CFileSystemDataset \
             "arrow::dataset::FileSystemDataset"(CDataset):
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 52ef97e5757..eef3757bff0 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -52,6 +52,10 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_bool allow_not_found
         c_bool recursive
 
+    cdef cppclass CFileLocator "arrow::fs::FileLocator":
+        shared_ptr[CFileSystem] filesystem
+        c_string path
+
     cdef cppclass CFileSystem "arrow::fs::FileSystem":
         shared_ptr[CFileSystem] shared_from_this()
         c_string type_name() const
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 22763680cd1..c578661851f 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -1958,8 +1958,10 @@ def write_to_dataset(table, root_path, partition_cols=None,
             "implementation."
         )
         metadata_collector = kwargs.pop('metadata_collector', None)
+        file_visitor = None
         if metadata_collector is not None:
-            raise ValueError(msg.format("metadata_collector"))
+            def file_visitor(written_file):
+                metadata_collector.append(written_file.metadata)
         if partition_filename_cb is not None:
             raise ValueError(msg.format("partition_filename_cb"))
 
@@ -1979,7 +1981,8 @@ def write_to_dataset(table, root_path, partition_cols=None,
         ds.write_dataset(
             table, root_path, filesystem=filesystem,
             format=parquet_format, file_options=write_options, schema=schema,
-            partitioning=partitioning, use_threads=use_threads)
+            partitioning=partitioning, use_threads=use_threads,
+            file_visitor=file_visitor)
         return
 
     fs, root_path = legacyfs.resolve_filesystem_and_path(root_path, filesystem)
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 00832069e87..bf60b0f4b52 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2718,6 +2718,9 @@ def test_feather_format(tempdir, dataset_reader):
 
 
 def _create_parquet_dataset_simple(root_path):
+    """
+    Creates a simple (flat files, no nested partitioning) Parquet dataset
+    """
     import pyarrow.parquet as pq
 
     metadata_collector = []
@@ -2749,6 +2752,36 @@ def test_parquet_dataset_factory(tempdir):
     assert result.num_rows == 40
 
 
+@pytest.mark.parquet
+@pytest.mark.pandas  # write_to_dataset currently requires pandas
+@pytest.mark.parametrize('use_legacy_dataset', [False, True])
+def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
+    # Simple test to ensure we can roundtrip dataset to
+    # _metadata/common_metadata and back.  A more complex test
+    # using partitioning will have to wait for ARROW-13269.  The
+    # above test (test_parquet_dataset_factory) will not work
+    # when legacy is False as there is no "append" equivalent in
+    # the new dataset until ARROW-12358
+    import pyarrow.parquet as pq
+    root_path = tempdir / "test_parquet_dataset"
+    table = pa.table({'f1': [0] * 10, 'f2': np.random.randn(10)})
+    metadata_collector = []
+    pq.write_to_dataset(
+        table, str(root_path), metadata_collector=metadata_collector,
+        use_legacy_dataset=use_legacy_dataset
+    )
+    metadata_path = str(root_path / '_metadata')
+    # write _metadata file
+    pq.write_metadata(
+        table.schema, metadata_path,
+        metadata_collector=metadata_collector
+    )
+    dataset = ds.parquet_dataset(metadata_path)
+    assert dataset.schema.equals(table.schema)
+    result = dataset.to_table()
+    assert result.num_rows == 10
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_parquet_dataset_factory_invalid(tempdir):
@@ -3197,10 +3230,29 @@ def test_write_dataset_use_threads(tempdir):
         pa.schema([("part", pa.string())]), flavor="hive")
 
     target1 = tempdir / 'partitioned1'
+    paths_written = []
+
+    def file_visitor(written_file):
+        paths_written.append(written_file.path)
+
     ds.write_dataset(
         dataset, target1, format="feather", partitioning=partitioning,
-        use_threads=True
+        use_threads=True, file_visitor=file_visitor
     )
+
+    # Since it is a multi-threaded write there is no way to know which
+    # directory gets part-0 and which gets part-1
+    expected_paths_a = {
+        target1 / 'part=a' / 'part-0.feather',
+        target1 / 'part=b' / 'part-1.feather'
+    }
+    expected_paths_b = {
+        target1 / 'part=a' / 'part-1.feather',
+        target1 / 'part=b' / 'part-0.feather'
+    }
+    paths_written_set = set(map(pathlib.Path, paths_written))
+    assert paths_written_set in [expected_paths_a, expected_paths_b]
+
     target2 = tempdir / 'partitioned2'
     ds.write_dataset(
         dataset, target2, format="feather", partitioning=partitioning,
@@ -3232,19 +3284,28 @@ def test_write_table(tempdir):
 
     # with partitioning
     base_dir = tempdir / 'partitioned'
+    expected_paths = [
+        base_dir / "part=a", base_dir / "part=a" / "dat_0.arrow",
+        base_dir / "part=b", base_dir / "part=b" / "dat_1.arrow"
+    ]
+
+    visited_paths = []
+
+    def file_visitor(written_file):
+        visited_paths.append(written_file.path)
+
     partitioning = ds.partitioning(
         pa.schema([("part", pa.string())]), flavor="hive")
     ds.write_dataset(table, base_dir, format="feather",
                      basename_template='dat_{i}.arrow',
-                     partitioning=partitioning)
+                     partitioning=partitioning, file_visitor=file_visitor)
     file_paths = list(base_dir.rglob("*"))
-    expected_paths = [
-        base_dir / "part=a", base_dir / "part=a" / "dat_0.arrow",
-        base_dir / "part=b", base_dir / "part=b" / "dat_1.arrow"
-    ]
     assert set(file_paths) == set(expected_paths)
     result = ds.dataset(base_dir, format="ipc", partitioning=partitioning)
     assert result.to_table().equals(table)
+    assert len(visited_paths) == 2
+    for visited_path in visited_paths:
+        assert pathlib.Path(visited_path) in expected_paths
 
 
 def test_write_table_multiple_fragments(tempdir):
@@ -3417,6 +3478,69 @@ def test_write_dataset_csv(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.parquet
+def test_write_dataset_parquet_file_visitor(tempdir):
+    table = pa.table([
+        pa.array(range(20)), pa.array(np.random.randn(20)),
+        pa.array(np.repeat(['a', 'b'], 10))
+    ], names=["f1", "f2", "part"])
+
+    visitor_called = False
+
+    def file_visitor(written_file):
+        nonlocal visitor_called
+        if (written_file.metadata is not None and
+                written_file.metadata.num_columns == 3):
+            visitor_called = True
+
+    base_dir = tempdir / 'parquet_dataset'
+    ds.write_dataset(table, base_dir, format="parquet",
+                     file_visitor=file_visitor)
+
+    assert visitor_called
+
+
+def test_partition_dataset_parquet_file_visitor(tempdir):
+    f1_vals = [item for chunk in range(4) for item in [chunk] * 10]
+    f2_vals = [item*10 for chunk in range(4) for item in [chunk] * 10]
+    table = pa.table({'f1': f1_vals, 'f2': f2_vals,
+                      'part': np.repeat(['a', 'b'], 20)})
+
+    root_path = tempdir / 'partitioned'
+    partitioning = ds.partitioning(
+        pa.schema([("part", pa.string())]), flavor="hive")
+
+    paths_written = []
+
+    sample_metadata = None
+
+    def file_visitor(written_file):
+        nonlocal sample_metadata
+        if written_file.metadata:
+            sample_metadata = written_file.metadata
+        paths_written.append(written_file.path)
+
+    ds.write_dataset(
+        table, root_path, format="parquet", partitioning=partitioning,
+        use_threads=True, file_visitor=file_visitor
+    )
+
+    # Since it is a multi-threaded write there is no way to know which
+    # directory gets part-0 and which gets part-1
+    expected_paths_a = {
+        root_path / 'part=a' / 'part-0.parquet',
+        root_path / 'part=b' / 'part-1.parquet'
+    }
+    expected_paths_b = {
+        root_path / 'part=a' / 'part-1.parquet',
+        root_path / 'part=b' / 'part-0.parquet'
+    }
+    paths_written_set = set(map(pathlib.Path, paths_written))
+    assert paths_written_set in [expected_paths_a, expected_paths_b]
+    assert sample_metadata is not None
+    assert sample_metadata.num_columns == 2
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_write_dataset_arrow_schema_metadata(tempdir):

From dd9261ee67d1e81f036d3d1f9cb5dd544914f5a7 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 14 Jul 2021 15:59:16 -0400
Subject: [PATCH 556/719] ARROW-13218: [Doc] Document/clarify conventions for
 timestamp storage

I've made an attempt to refine the recent discussions into an updated comment describing the timestamp column.  Since the entire discussion has been around fine-grained semantic concepts I will appreciate even minor suggestions to improve the wording.  There are still votes ongoing so this shouldn't be merged until those resolve.

Closes #10629 from westonpace/feature/ARROW-13218

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 format/Schema.fbs | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/format/Schema.fbs b/format/Schema.fbs
index 3b00dd4780d..2d447d30791 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -218,8 +218,33 @@ table Time {
 /// leap seconds, as a 64-bit integer. Note that UNIX time does not include
 /// leap seconds.
 ///
-/// The Timestamp metadata supports both "time zone naive" and "time zone
-/// aware" timestamps. Read about the timezone attribute for more detail
+/// Date & time libraries often have multiple different data types for temporal
+/// data.  In order to ease interoperability between different implementations the
+/// Arrow project has some recommendations for encoding these types into a Timestamp
+/// column.
+///
+/// An "instant" represents a single moment in time that has no meaningful time zone
+/// or the time zone is unknown.  A column of instants can also contain values from
+/// multiple time zones.  To encode an instant set the timezone string to "UTC".
+///
+/// A "zoned date-time" represents a single moment in time that has a meaningful
+/// reference time zone.  To encode a zoned date-time as a Timestamp set the timezone
+/// string to the name of the timezone.  There is some ambiguity between an instant
+/// and a zoned date-time with the UTC time zone.  Both of these are stored the same.
+/// Typically, this distinction does not matter.  If it does, then an application should
+/// use custom metadata or an extension type to distinguish between the two cases.
+///
+/// An "offset date-time" represents a single moment in time combined with a meaningful
+/// offset from UTC.  To encode an offset date-time as a Timestamp set the timezone string
+/// to the numeric time zone offset string (e.g. "+03:00").
+///
+/// A "local date-time" does not represent a single moment in time.  It represents a wall
+/// clock time combined with a date.  Because of daylight savings time there may multiple
+/// instants that correspond to a single local date-time in any given time zone.  A
+/// local date-time is often stored as a struct or a Date32/Time64 pair.  However, it can
+/// also be encoded into a Timestamp column.  To do so the value should be the the time
+/// elapsed from the Unix epoch so that a wall clock in UTC would display the desired time.
+/// The timezone string should be set to null or the empty string.
 table Timestamp {
   unit: TimeUnit;
 
@@ -232,11 +257,9 @@ table Timestamp {
   /// Whether a timezone string is present indicates different semantics about
   /// the data:
   ///
-  /// * If the time zone is null or equal to an empty string, the data is "time
-  ///   zone naive" and shall be displayed *as is* to the user, not localized
-  ///   to the locale of the user. This data can be though of as UTC but
-  ///   without having "UTC" as the time zone, it is not considered to be
-  ///   localized to any time zone
+  /// * If the time zone is null or an empty string, the data is a local date-time
+  ///   and does not represent a single moment in time.  Instead it represents a wall clock
+  ///   time and care should be taken to avoid interpreting it semantically as an instant.
   ///
   /// * If the time zone is set to a valid value, values can be displayed as
   ///   "localized" to that time zone, even though the underlying 64-bit

From 98e0b714dd7ceab05fffa851b494549590d110f3 Mon Sep 17 00:00:00 2001
From: Kevin Gurney <kgurney@mathworks.com>
Date: Thu, 15 Jul 2021 10:02:28 +0900
Subject: [PATCH 557/719] ARROW-13100: [MATLAB] Integrate GoogleTest with
 MATLAB Interface C++ Code

## Overview

This Pull Request:

1. **Adds additional options for integrating the Arrow C++ libraries with the MATLAB interface.**

    1.1. Users can supply a custom `ARROW_HOME` value to make use of their own, pre-built `libarrow` library.

    1.2. If no information is provided, then CMake will attempt to find a system-installed Arrow distribution (e.g. installed via a package manager).

    1.3. If no system Arrow libraries are found by `find_package`, and no additional information is provided, then the Arrow C++ libraries will automatically be built from source using `ExternalProject_Add`.

2. **Adds support for GoogleTest to the CMake build system for the MATLAB interface.**

    2.1.  A user can supply a custom [`GTEST_ROOT`](https://cmake.org/cmake/help/latest/module/FindGTest.html) value to CMake in order to make use of pre-built GoogleTest binaries.

    2.2. If no information is provided, then CMake will attempt to find a system-installed GoogleTest distribution (e.g. installed via a package manager).

    2.3. If no system GoogleTest libraries are found by `find_package`, and no additional information is provided, then the CMake build system will use the bundled GoogleTest binaries that are built as part of the Arrow C++ libraries ( `ARROW_BUILD_TESTS` is set to `ON` under the hood).

3. **Adds a `MATLAB_BUILD_TESTS` option to the CMake build system for the MATLAB interface.**

    3.1. This allows users to enable building of the  C++ tests for the MATLAB interface with `cmake .. -D MATLAB_BUILD_TESTS=ON`.

4. **Allows the MATLAB interface and the C++ tests to be built and run using a consistent set of commands on Windows, macOS, and Linux.**

```bash
$ cmake -S . -B build -D MATLAB_BUILD_TESTS=ON
$ cmake --build build --config Release
$ ctest --test-dir build
```

## Implementation

This implementation uses [`ExternalProject_Add`](https://cmake.org/cmake/help/latest/module/ExternalProject.html#command:externalproject_add) to automatically build the Arrow C++ libraries and bundled GoogleTest binaries from source.

We took heavy inspiration from the CMake patterns used to create imported library targets in [`ThirdpartyToolchain.cmake`](https://github.com/apache/arrow/blob/master/cpp/cmake_modules/ThirdpartyToolchain.cmake).

We followed the [same approach used in the CMake build system for the Arrow C++ libraries](https://github.com/apache/arrow/blob/e2238582e2a2bf20a68a967145fe1a7b2337a997/cpp/cmake_modules/ThirdpartyToolchain.cmake#L1701) and added an automatic step which copies the `gtest.dll` and `gtest_main.dll` runtime libraries to the same folder as the C++ tests for the MATLAB interface, so that they can be found by the [Windows load-time dynamic linker](https://docs.microsoft.com/en-us/windows/win32/dlls/load-time-dynamic-linking).

## Testing

1. These changes were qualified against the following platform/compiler configurations:
 1.1. Windows 10 with Visual Studio 2019
 1.2. macOS Big Sur (11.2.3) with GNU Make 3.81
 1.3. Debian 10 with GNU Make GNU 4.2.1

2. We ran the CMake build with a custom `GTEST_ROOT` value that points to GoogleTest binaries that were built via the CMake `googletest_ep` target from the Arrow C++ libraries. In other words, we built GoogleTest with `cmake -DARROW_BUILD_TESTS=ON  ..; make googletest_ep`.
3. We ran the CMake build without specifying a `GTEST_ROOT` value and the GoogleTest binaries that are bundled with the Arrow C++ libraries (when built with `-D ARROW_BUILD_TESTS=ON`) were automatically used.
4. We ran the CMake build with `MATLAB_BUILD_TESTS=ON` and verified that an Arrow installation found by `find_package` (e.g. when an explicit `ARROW_HOME` is specified) is used, instead of the Arrow library built from source, using `ldd` on Linux, `otool -l` and `otool -L` on macOS, and the [`/VERBOSE:LIB`](https://docs.microsoft.com/en-us/cpp/build/reference/verbose-print-progress-messages?view=msvc-160#remarks) linker flag on Windows. We also verified that deleting the `arrow.lib` import library from a custom `ARROW_HOME` resulted in a linker failure on Windows.
5. We built and ran the tests on Windows, macOS, and Linux using the following commands:

```
$ cmake -S . -B build -D MATLAB_BUILD_TESTS=ON
$ cmake --build build --config Release
$ ctest --test-dir build
```

## Future Directions

1. As follow up work to this PR, a major priority is to integrate the MATLAB interface with the Arrow CI ecosystem, so that both C++ and MATLAB tests will be automatically run with every pull request.
2. Pending acceptance of this pull request, the MATLAB interface will have C++ testing infrastructure in place. This will allow us to shift focus towards incremental delivery of C++ feature code, along with associated tests.
3. We plan to update the MATLAB interface documentation to clearly explain how to build and run the C++ tests with and without a custom `GTEST_ROOT` value. This work is captured in [ARROW-13204](https://issues.apache.org/jira/browse/ARROW-13204).
4. The current implementation requires the C++ tests for the Arrow C++ libraries to be built in order to use the bundled GoogleTest binaries. Building the Arrow C++ tests takes a nontrivial amount of time and isn't strictly necessary to validate the correctness of the MATLAB interface. In the future, we may want to consider fetching and building the GoogleTest binaries from GitHub separately. One approach to achieve this would be to factor out the [`build_gtest` macro](https://github.com/apache/arrow/blob/e2238582e2a2bf20a68a967145fe1a7b2337a997/cpp/cmake_modules/ThirdpartyToolchain.cmake#L1602) from `ThirdpartyToolchain.cmake` and make it reusable by the MATLAB interface.
5. Automatically copying `gtest.dll` and `gtest_main.dll` to the same folder as the C++ tests during the build process helps new developers get started with less friction on Windows. Copying `arrow.dll` to the same folder as the MEX files may similarly be useful. This would prevent users from having to manually add `arrow.dll` to the Windows `%PATH%`. This would also require us to keep all the MEX files in the build folder. There would be some trade-offs with this approach concerning MEX file organization and discoverability on the MATLAB Path, so this would require a bit more thought.
6. This PR doesn't support debug builds of GoogleTest when building the bundled GoogleTest binaries. We may want to support this in the future.
7. When specifying a custom `GTEST_ROOT`, `gtest.dll` and `gtest_main.dll` won't be automatically copied to the location of the C++ tests. Therefore, they won't be discoverable by the Windows load-time dynamic linker by default. It might be nice to support automatic copying for this use case in the future.

## Notes

1. Thank you to @lafiona for helping me with this pull request!

Closes #10614 from kevingurney/ARROW-13100

Authored-by: Kevin Gurney <kgurney@mathworks.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 matlab/CMakeLists.txt          | 212 ++++++++++++++++++++++++++++++++-
 matlab/src/placeholder_test.cc |  27 +++++
 2 files changed, 237 insertions(+), 2 deletions(-)
 create mode 100644 matlab/src/placeholder_test.cc

diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 5ee48a87c3a..18c1237a491 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -17,6 +17,170 @@
 
 cmake_minimum_required(VERSION 3.20)
 
+# Build the Arrow C++ libraries.
+function(build_arrow)
+  set(options BUILD_GTEST)
+  set(one_value_args)
+  set(multi_value_args)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  if(WIN32)
+    set(ARROW_IMPORTED_TYPE IMPORTED_IMPLIB)
+    set(ARROW_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
+  else()
+    set(ARROW_IMPORTED_TYPE IMPORTED_LOCATION)
+    set(ARROW_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+
+  set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix")
+  set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
+  set(ARROW_LIBRARY_DIR "${ARROW_PREFIX}/lib")
+  set(ARROW_SHARED_LIB
+      "${ARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${ARROW_LIBRARY_SUFFIX}")
+  set(ARROW_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-build")
+  set(ARROW_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}"
+                       "-DCMAKE_INSTALL_LIBDIR=lib" "-DARROW_BUILD_STATIC=OFF")
+  set(ARROW_BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}")
+
+  # Building the Arrow C++ libraries and bundled GoogleTest binaries requires ExternalProject.
+  include(ExternalProject)
+
+  if(ARG_BUILD_GTEST)
+    enable_gtest()
+  endif()
+
+  externalproject_add(arrow_ep
+                      SOURCE_DIR "${CMAKE_SOURCE_DIR}/../cpp"
+                      BINARY_DIR "${ARROW_BINARY_DIR}"
+                      CMAKE_ARGS ${ARROW_CMAKE_ARGS}
+                      BUILD_BYPRODUCTS ${ARROW_BUILD_BYPRODUCTS})
+
+  set(ARROW_LIBRARY_TARGET arrow_shared)
+
+  # If find_package has already found a valid Arrow installation, then
+  # we don't want to link against the newly built arrow_shared library.
+  # However, we still need create a library target to trigger building
+  # of the arrow_ep target, which will ultimately build the bundled
+  # GoogleTest binaries.
+  if(Arrow_FOUND)
+    set(ARROW_LIBRARY_TARGET arrow_shared_for_gtest)
+  endif()
+
+  file(MAKE_DIRECTORY "${ARROW_INCLUDE_DIR}")
+  add_library(${ARROW_LIBRARY_TARGET} SHARED IMPORTED)
+  set_target_properties(${ARROW_LIBRARY_TARGET}
+                        PROPERTIES ${ARROW_IMPORTED_TYPE} ${ARROW_SHARED_LIB}
+                                   INTERFACE_INCLUDE_DIRECTORIES ${ARROW_INCLUDE_DIR})
+
+  add_dependencies(${ARROW_LIBRARY_TARGET} arrow_ep)
+
+  if(ARG_BUILD_GTEST)
+    build_gtest()
+  endif()
+
+endfunction()
+
+macro(enable_gtest)
+  if(WIN32)
+    set(ARROW_GTEST_IMPORTED_TYPE IMPORTED_IMPLIB)
+    set(ARROW_GTEST_MAIN_IMPORTED_TYPE IMPORTED_IMPLIB)
+
+    set(ARROW_GTEST_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
+    set(ARROW_GTEST_MAIN_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
+  else()
+    set(ARROW_GTEST_IMPORTED_TYPE IMPORTED_LOCATION)
+    set(ARROW_GTEST_MAIN_IMPORTED_TYPE IMPORTED_LOCATION)
+
+    set(ARROW_GTEST_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+    set(ARROW_GTEST_MAIN_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+  endif()
+
+  set(ARROW_GTEST_PREFIX "${ARROW_BINARY_DIR}/googletest_ep-prefix")
+  set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include")
+  set(ARROW_GTEST_LIBRARY_DIR "${ARROW_GTEST_PREFIX}/lib")
+  set(ARROW_GTEST_SHARED_LIB
+      "${ARROW_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${ARROW_GTEST_LIBRARY_SUFFIX}"
+  )
+
+  set(ARROW_GTEST_MAIN_PREFIX "${ARROW_BINARY_DIR}/googletest_ep-prefix")
+  set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include")
+  set(ARROW_GTEST_MAIN_LIBRARY_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib")
+  set(ARROW_GTEST_MAIN_SHARED_LIB
+      "${ARROW_GTEST_MAIN_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${ARROW_GTEST_MAIN_LIBRARY_SUFFIX}"
+  )
+
+  list(APPEND ARROW_CMAKE_ARGS "-DARROW_BUILD_TESTS=ON")
+  list(APPEND ARROW_BUILD_BYPRODUCTS "${ARROW_GTEST_SHARED_LIB}"
+       "${ARROW_GTEST_MAIN_SHARED_LIB}")
+endmacro()
+
+# Build the GoogleTest binaries that are bundled with the Arrow C++ libraries.
+macro(build_gtest)
+  set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include")
+  set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include")
+
+  file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}")
+
+  if(WIN32)
+    set(ARROW_GTEST_RUNTIME_DIR "${ARROW_GTEST_PREFIX}/bin")
+    set(ARROW_GTEST_MAIN_RUNTIME_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin")
+    set(ARROW_GTEST_RUNTIME_SUFFIX "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(ARROW_GTEST_MAIN_RUNTIME_SUFFIX "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    set(ARROW_GTEST_RUNTIME_LIB
+        "${ARROW_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${ARROW_GTEST_RUNTIME_SUFFIX}"
+    )
+    set(ARROW_GTEST_MAIN_RUNTIME_LIB
+        "${ARROW_GTEST_MAIN_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${ARROW_GTEST_MAIN_RUNTIME_SUFFIX}"
+    )
+
+    # Multi-Configuration generators (e.g. Visual Studio or XCode) place their build artifacts
+    # in a subdirectory named ${CMAKE_BUILD_TYPE} by default, where ${CMAKE_BUILD_TYPE} varies
+    # depending on the chosen build configuration (e.g. Release or Debug).
+    get_property(GENERATOR_IS_MULTI_CONFIG_VALUE GLOBAL
+                 PROPERTY GENERATOR_IS_MULTI_CONFIG)
+    if(GENERATOR_IS_MULTI_CONFIG_VALUE)
+      set(MATLAB_TESTS_DIR "${CMAKE_BINARY_DIR}/$<CONFIG>")
+    else()
+      set(MATLAB_TESTS_DIR "${CMAKE_BINARY_DIR}")
+    endif()
+
+    # We need to copy the gtest and gtest_main runtime DLLs into the directory where the
+    # MATLAB C++ tests reside, since Windows requires that runtime DLLs are in the same
+    # directory as the executables that depend on them (or on the %PATH%).
+    externalproject_add_step(arrow_ep copy
+                             COMMAND ${CMAKE_COMMAND} -E make_directory
+                                     ${MATLAB_TESTS_DIR}
+                             COMMAND ${CMAKE_COMMAND} -E copy ${ARROW_GTEST_RUNTIME_LIB}
+                                     ${MATLAB_TESTS_DIR}
+                             COMMAND ${CMAKE_COMMAND} -E copy
+                                     ${ARROW_GTEST_MAIN_RUNTIME_LIB} ${MATLAB_TESTS_DIR}
+                             DEPENDEES install)
+  endif()
+
+  add_library(GTest::gtest SHARED IMPORTED)
+  set_target_properties(GTest::gtest
+                        PROPERTIES ${ARROW_GTEST_IMPORTED_TYPE} ${ARROW_GTEST_SHARED_LIB}
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   ${ARROW_GTEST_INCLUDE_DIR})
+
+  add_library(GTest::gtest_main SHARED IMPORTED)
+  set_target_properties(GTest::gtest_main
+                        PROPERTIES ${ARROW_GTEST_MAIN_IMPORTED_TYPE}
+                                   ${ARROW_GTEST_MAIN_SHARED_LIB}
+                                   INTERFACE_INCLUDE_DIRECTORIES
+                                   ${ARROW_GTEST_MAIN_INCLUDE_DIR})
+
+  add_dependencies(GTest::gtest arrow_ep)
+  add_dependencies(GTest::gtest_main arrow_ep)
+endmacro()
+
 set(CMAKE_CXX_STANDARD 11)
 
 set(MLARROW_VERSION "5.0.0-SNAPSHOT")
@@ -24,6 +188,8 @@ string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_V
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
 
+option(MATLAB_BUILD_TESTS "Build the C++ tests for the MATLAB interface" OFF)
+
 # Grab CMAKE Modules from the CPP interface
 set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
 if(EXISTS "${CPP_CMAKE_MODULES}")
@@ -32,8 +198,32 @@ endif()
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake_modules)
 
-# Arrow is Required
-find_package(Arrow REQUIRED)
+# Only build the MATLAB interface C++ tests if MATLAB_BUILD_TESTS=ON.
+if(MATLAB_BUILD_TESTS)
+  # find_package(GTest) supports custom GTEST_ROOT as well as package managers.
+  find_package(GTest)
+  if(NOT GTest_FOUND)
+    # find_package(Arrow) supports custom ARROW_HOME as well as package
+    # managers.
+    find_package(Arrow)
+    # Trigger an automatic build of the Arrow C++ libraries and bundled
+    # GoogleTest binaries. If a valid Arrow installation was not already
+    # found by find_package, then build_arrow will use the Arrow
+    # C++ libraries that are built from source.
+    build_arrow(BUILD_GTEST)
+  else()
+    find_package(Arrow)
+    if(NOT Arrow_FOUND)
+      # Trigger an automatic build of the Arrow C++ libraries.
+      build_arrow()
+    endif()
+  endif()
+else()
+  find_package(Arrow)
+  if(NOT Arrow_FOUND)
+    build_arrow()
+  endif()
+endif()
 
 # MATLAB is Required
 find_package(Matlab REQUIRED)
@@ -72,3 +262,21 @@ else()
   set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY
                                                    $<1:${CMAKE_SOURCE_DIR}/src>)
 endif()
+
+# ##############################################################################
+# C++ Tests
+# ##############################################################################
+# Only build the C++ tests if MATLAB_BUILD_TESTS=ON.
+if(MATLAB_BUILD_TESTS)
+  enable_testing()
+
+  # Define a test executable target. TODO: Remove the placeholder test. This is
+  # just for testing GoogleTest integration.
+  add_executable(placeholder_test ${CMAKE_SOURCE_DIR}/src/placeholder_test.cc)
+  # Declare a dependency on the GTest::gtest and GTest::gtest_main IMPORTED
+  # targets.
+  target_link_libraries(placeholder_test GTest::gtest GTest::gtest_main)
+
+  # Add a test target.
+  add_test(PlaceholderTestTarget placeholder_test)
+endif()
diff --git a/matlab/src/placeholder_test.cc b/matlab/src/placeholder_test.cc
new file mode 100644
index 00000000000..eef37e178f6
--- /dev/null
+++ b/matlab/src/placeholder_test.cc
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+namespace arrow {
+namespace matlab {
+namespace test {
+// TODO: Remove this placeholder test.
+TEST(PlaceholderTestSuite, PlaceholderTestCase) { ASSERT_TRUE(true); }
+}  // namespace test
+}  // namespace matlab
+}  // namespace arrow

From 5e43cde120a39130d502741ca70da9650d1be13e Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 14 Jul 2021 20:19:16 -0700
Subject: [PATCH 558/719] ARROW-13299: [JS] Upgrade ix and rxjs

Merge after #10673, only the last commit is new

Closes #10695 from domoritz/dom/upgrades

Lead-authored-by: Dominik Moritz <domoritz@gmail.com>
Co-authored-by: ptaylor <paul.e.taylor@me.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/gulp/arrow-task.js                         |   12 +-
 js/gulp/clean-task.js                         |   12 +-
 js/gulp/package-task.js                       |   15 +-
 js/gulp/typescript-task.js                    |   17 +-
 js/gulp/util.js                               |   31 +-
 js/gulpfile.js                                |   13 +-
 js/package.json                               |    8 +-
 js/test/unit/builders/builder-tests.ts        |   13 +-
 js/test/unit/builders/utils.ts                |   12 +-
 js/test/unit/ipc/writer/streams-dom-tests.ts  |   13 +-
 js/test/unit/ipc/writer/streams-node-tests.ts |   16 +-
 js/yarn.lock                                  | 1005 +++++++++--------
 12 files changed, 654 insertions(+), 513 deletions(-)

diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 277ee745f9e..4f597a0aefa 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -24,7 +24,13 @@ const gulp = require('gulp');
 const mkdirp = require('mkdirp');
 const gulpRename = require(`gulp-rename`);
 const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    forkJoin: ObservableForkJoin,
+} = require('rxjs');
+const {
+    share
+} = require('rxjs/operators');
 const pipeline = require('util').promisify(require('stream').pipeline);
 
 const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
@@ -38,7 +44,7 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
     const esmSourceMapsGlob = `${targetDir(`esnext`, `esm`)}/**/*.map`;
     const es2015UmdSourceMapsGlob = `${targetDir(`es2015`, `umd`)}/*.map`;
     const esnextUmdSourceMapsGlob = `${targetDir(`esnext`, `umd`)}/*.map`;
-    return Observable.forkJoin(
+    return ObservableForkJoin(
         observableFromStreams(gulp.src(dtsGlob),                 gulp.dest(out)), // copy d.ts files
         observableFromStreams(gulp.src(cjsGlob),                 gulp.dest(out)), // copy esnext cjs files
         observableFromStreams(gulp.src(cjsSourceMapsGlob),       gulp.dest(out)), // copy esnext cjs sourcemaps
@@ -48,7 +54,7 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
         observableFromStreams(gulp.src(esmGlob),       gulpRename((p) => { p.extname = '.mjs'; }),          gulp.dest(out)), // copy esnext esm files and rename to `.mjs`
         observableFromStreams(gulp.src(es2015UmdGlob), gulpRename((p) => { p.basename += `.es2015.min`; }), gulp.dest(out)), // copy es2015 umd files and add `.min`
         observableFromStreams(gulp.src(esnextUmdGlob), gulpRename((p) => { p.basename += `.esnext.min`; }), gulp.dest(out)), // copy esnext umd files and add `.esnext.min`
-    ).publish(new ReplaySubject()).refCount();
+    ).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }));
 }))({});
 
 const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) {
diff --git a/js/gulp/clean-task.js b/js/gulp/clean-task.js
index 551aeb41af7..0034f9a095d 100644
--- a/js/gulp/clean-task.js
+++ b/js/gulp/clean-task.js
@@ -16,15 +16,19 @@
 // under the License.
 
 const del = require('del');
-const { Observable } = require('rxjs');
 const { targetDir } = require('./util');
 const memoizeTask = require('./memoize-task');
+const { catchError } = require('rxjs/operators');
+const {
+    from: ObservableFrom,
+    EMPTY: ObservableEmpty,
+} = require('rxjs');
 
 const cleanTask = ((cache) => memoizeTask(cache, function clean(target, format) {
     const dir = targetDir(target, format);
-    return Observable.from(del(dir))
-        .catch((e) => Observable.empty());
+    return ObservableFrom(del(dir))
+        .pipe(catchError((e) => ObservableEmpty()));
 }))({});
 
 module.exports = cleanTask;
-module.exports.cleanTask = cleanTask;
+module.exports.cleanTask = cleanTask;
\ No newline at end of file
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index c320c908d3d..a3f21394685 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -23,20 +23,27 @@ const {
 
 const gulp = require('gulp');
 const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    EMPTY: ObservableEmpty,
+    forkJoin: ObservableForkJoin,
+} = require('rxjs');
+const {
+    share
+} = require('rxjs/operators');
 const gulpJsonTransform = require('gulp-json-transform');
 
 const packageTask = ((cache) => memoizeTask(cache, function bundle(target, format) {
-    if (target === `src`) return Observable.empty();
+    if (target === `src`) return ObservableEmpty();
     const out = targetDir(target, format);
     const jsonTransform = gulpJsonTransform(target === npmPkgName ? createMainPackageJson(target, format) :
                                             target === `ts`       ? createTypeScriptPackageJson(target, format)
                                                                   : createScopedPackageJSON(target, format),
                                             2);
-    return Observable.forkJoin(
+    return ObservableForkJoin([
       observableFromStreams(gulp.src(metadataFiles), gulp.dest(out)), // copy metadata files
       observableFromStreams(gulp.src(`package.json`), jsonTransform, gulp.dest(out)) // write packageJSONs
-    ).publish(new ReplaySubject()).refCount();
+    ]).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }));
 }))({});
 
 module.exports = packageTask;
diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js
index 928e473141b..7c672e70250 100644
--- a/js/gulp/typescript-task.js
+++ b/js/gulp/typescript-task.js
@@ -28,7 +28,15 @@ const path = require('path');
 const ts = require(`gulp-typescript`);
 const sourcemaps = require('gulp-sourcemaps');
 const { memoizeTask } = require('./memoize-task');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    forkJoin: ObservableForkJoin,
+} = require('rxjs');
+const {
+    mergeWith,
+    takeLast,
+    share
+} = require('rxjs/operators');
 
 const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) {
     if (shouldRunInChildProcess(target, format)) {
@@ -38,8 +46,9 @@ const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target
     const out = targetDir(target, format);
     const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName(target, format)}.json`);
     return compileTypescript(out, tsconfigPath)
-        .merge(compileBinFiles(target, format)).takeLast(1)
-        .publish(new ReplaySubject()).refCount();
+        .pipe(mergeWith(compileBinFiles(target, format)))
+        .pipe(takeLast(1))
+        .pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }))
 }))({});
 
 function compileBinFiles(target, format) {
@@ -58,7 +67,7 @@ function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
     const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false }), gulp.dest(out));
     const mapFile = tsProject.options.module === 5 ? esmMapFile : cjsMapFile;
     const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false }), gulp.dest(out));
-    return Observable.forkJoin(writeSources, writeDTypes, writeJS);
+    return ObservableForkJoin(writeSources, writeDTypes, writeJS);
 }
 
 function cjsMapFile(mapFilePath) { return mapFilePath; }
diff --git a/js/gulp/util.js b/js/gulp/util.js
index b86bb656e06..d8cde29e8fa 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -20,7 +20,19 @@ const path = require(`path`);
 const pump = require(`stream`).pipeline;
 const child_process = require(`child_process`);
 const { targets, modules } = require('./argv');
-const { Observable, ReplaySubject } = require('rxjs');
+const {
+    ReplaySubject,
+    empty: ObservableEmpty,
+    throwError: ObservableThrow,
+    fromEvent: ObservableFromEvent
+} = require('rxjs');
+const {
+    share,
+    flatMap,
+    takeUntil,
+    defaultIfEmpty,
+    mergeWith,
+} = require('rxjs/operators');
 const asyncDone = require('util').promisify(require('async-done'));
 
 const mainExport = `Arrow`;
@@ -102,16 +114,17 @@ function spawnGulpCommandInChildProcess(command, target, format) {
         .catch((e) => { throw `Error in "${command}:${taskName(target, format)}" task`; });
 }
 
-const logAndDie = (e) => { if (e) { process.exit(1); } };
+const logAndDie = (e) => { if (e) { process.exit(1) } };
 function observableFromStreams(...streams) {
-    if (streams.length <= 0) { return Observable.empty(); }
+    if (streams.length <= 0) { return ObservableEmpty(); }
     const pumped = streams.length <= 1 ? streams[0] : pump(...streams, logAndDie);
-    const fromEvent = Observable.fromEvent.bind(null, pumped);
-    const streamObs = fromEvent(`data`)
-               .merge(fromEvent(`error`).flatMap((e) => Observable.throw(e)))
-           .takeUntil(fromEvent(`end`).merge(fromEvent(`close`)))
-           .defaultIfEmpty(`empty stream`)
-           .multicast(new ReplaySubject()).refCount();
+    const fromEvent = ObservableFromEvent.bind(null, pumped);
+    const streamObs = fromEvent(`data`).pipe(
+        mergeWith(fromEvent(`error`).pipe(flatMap((e) => ObservableThrow(e)))),
+        takeUntil(fromEvent(`end`).pipe(mergeWith(fromEvent(`close`)))),
+        defaultIfEmpty(`empty stream`),
+        share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false })
+    );
     streamObs.stream = pumped;
     streamObs.observable = streamObs;
     return streamObs;
diff --git a/js/gulpfile.js b/js/gulpfile.js
index bd860e16340..a2c0ce734f8 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -17,8 +17,12 @@
 
 const del = require('del');
 const gulp = require('gulp');
-const { Observable } = require('rxjs');
 const { targets } = require('./gulp/argv');
+const {
+    from: ObservableFrom,
+    bindNodeCallback: ObservableBindNodeCallback
+} = require('rxjs');
+const { flatMap } = require('rxjs/operators');
 const cleanTask = require('./gulp/clean-task');
 const compileTask = require('./gulp/compile-task');
 const packageTask = require('./gulp/package-task');
@@ -83,10 +87,9 @@ gulp.task(`compile`, gulpConcurrent(getTasks(`compile`)));
 gulp.task(`package`, gulpConcurrent(getTasks(`package`)));
 gulp.task(`default`,  gulp.series(`clean`, `build`, `test`));
 
-function gulpConcurrent(tasks) {
-    const numCPUs = Math.max(1, require('os').cpus().length * 0.75) | 0;
-    return () => Observable.from(tasks.map((task) => gulp.series(task)))
-        .flatMap((task) => Observable.bindNodeCallback(task)(), numCPUs);
+function gulpConcurrent(tasks, numCPUs = Math.max(1, require('os').cpus().length * 0.5) | 0) {
+    return () => ObservableFrom(tasks.map((task) => gulp.series(task)))
+        .pipe(flatMap((task) => ObservableBindNodeCallback(task)(), numCPUs || 1));
 }
 
 function getTasks(name) {
diff --git a/js/package.json b/js/package.json
index 040bb4ff5b6..7c3fc0ab856 100644
--- a/js/package.json
+++ b/js/package.json
@@ -71,7 +71,7 @@
     "benny": "3.6.15",
     "cpy": "8.1.2",
     "cross-env": "7.0.3",
-    "del-cli": "3.0.1",
+    "del-cli": "4.0.0",
     "eslint": "7.27.0",
     "eslint-plugin-jest": "24.3.6",
     "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
@@ -82,8 +82,8 @@
     "gulp-rename": "2.0.0",
     "gulp-sourcemaps": "3.0.0",
     "gulp-typescript": "5.0.1",
-    "ix": "2.5.3",
-    "jest": "27.0.1",
+    "ix": "4.4.1",
+    "jest": "27.0.6",
     "jest-silent-reporter": "0.5.0",
     "lerna": "4.0.0",
     "memfs": "3.2.2",
@@ -91,7 +91,7 @@
     "multistream": "4.1.0",
     "npm-run-all": "4.1.5",
     "randomatic": "3.1.1",
-    "rxjs": "5.5.11",
+    "rxjs": "7.2.0",
     "ts-jest": "27.0.3",
     "ts-node": "10.0.0",
     "typedoc": "0.20.36",
diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts
index 87dbcabfcf6..b6fa60271bc 100644
--- a/js/test/unit/builders/builder-tests.ts
+++ b/js/test/unit/builders/builder-tests.ts
@@ -16,7 +16,8 @@
 // under the License.
 
 import '../../jest-extensions';
-import { AsyncIterable } from 'ix';
+import { from, fromDOMStream, toArray } from 'ix/asynciterable';
+import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
 import { validateVector } from './utils';
 import * as generate from '../../generate-test-data';
 import { Type, DataType, Chunked, util, Builder, UnionVector } from 'apache-arrow';
@@ -243,11 +244,10 @@ async function encodeChunks<T extends DataType, TNull = any>(values: (T['TValue'
 
 async function encodeChunksDOM<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderTransformOptions<T, TNull>) {
 
-    const stream = AsyncIterable
-        .from(values).toDOMStream()
+    const stream = from(values).toDOMStream()
         .pipeThrough(Builder.throughDOM(options));
 
-    const chunks = await AsyncIterable.fromDOMStream(stream).toArray();
+    const chunks = await fromDOMStream(stream).pipe(toArray);
 
     return Chunked.concat(...chunks);
 }
@@ -258,12 +258,11 @@ async function encodeChunksNode<T extends DataType, TNull = any>(values: (T['TVa
         options.nullValues =  [...options.nullValues, undefined] as TNull[];
     }
 
-    const stream = AsyncIterable
-        .from(fillNA(values, [undefined]))
+    const stream = from(fillNA(values, [undefined]))
         .toNodeStream({ objectMode: true })
         .pipe(Builder.throughNode(options));
 
-    const chunks: any[] = await AsyncIterable.fromNodeStream(stream, options.highWaterMark).toArray();
+    const chunks: any[] = await fromNodeStream(stream, options.highWaterMark).pipe(toArray);
 
     return Chunked.concat(...chunks);
 }
diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts
index a9162f64554..9bd16fff38c 100644
--- a/js/test/unit/builders/utils.ts
+++ b/js/test/unit/builders/utils.ts
@@ -16,7 +16,9 @@
 // under the License.
 
 import '../../jest-extensions';
-import { AsyncIterable } from 'ix';
+import { from, fromDOMStream, toArray } from 'ix/asynciterable';
+import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
+import 'ix/Ix.node';
 import { util } from 'apache-arrow';
 import { Builder } from 'apache-arrow';
 import { DataType, Vector, Chunked } from 'apache-arrow';
@@ -147,9 +149,9 @@ export function encodeEachDOM<T extends DataType>(typeFactory: () => T, chunkLen
     return async function encodeEachDOM<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
         const type = typeFactory();
         const strategy = { highWaterMark: chunkLen };
-        const source = AsyncIterable.from(vals).toDOMStream();
+        const source = from(vals).toDOMStream();
         const builder = Builder.throughDOM({ type, nullValues, readableStrategy: strategy, writableStrategy: strategy });
-        const chunks = await AsyncIterable.fromDOMStream(source.pipeThrough(builder)).toArray();
+        const chunks = await fromDOMStream(source.pipeThrough(builder)).pipe(toArray);
         return Chunked.concat(...chunks) as Chunked<T>;
     };
 }
@@ -158,10 +160,10 @@ export function encodeEachNode<T extends DataType>(typeFactory: () => T, chunkLe
     return async function encodeEachNode<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
         const type = typeFactory();
         const vals_ = vals.map((x) => x === null ? undefined : x);
-        const source = AsyncIterable.from(vals_).toNodeStream({ objectMode: true });
+        const source = from(vals_).toNodeStream({ objectMode: true });
         const nulls_ = nullValues ? nullValues.map((x) => x === null ? undefined : x) : nullValues;
         const builder = Builder.throughNode({ type, nullValues: nulls_, highWaterMark: chunkLen });
-        const chunks: any[] = await AsyncIterable.fromNodeStream(source.pipe(builder), chunkLen).toArray();
+        const chunks: any[] = await fromNodeStream(source.pipe(builder), chunkLen).pipe(toArray);
         return Chunked.concat(...chunks) as Chunked<T>;
     };
 }
diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts
index 18f161f7d73..a19ddcdd77b 100644
--- a/js/test/unit/ipc/writer/streams-dom-tests.ts
+++ b/js/test/unit/ipc/writer/streams-dom-tests.ts
@@ -20,7 +20,8 @@ import {
     // generateDictionaryTables
 } from '../../../data/tables';
 
-import { AsyncIterable } from 'ix';
+import { from, as } from 'ix/asynciterable';
+import { tap, flatMap } from 'ix/asynciterable/operators';
 
 import {
     Table,
@@ -232,9 +233,9 @@ import {
         it(`should write a stream of tables to the same output stream`, async () => {
 
             const tables = [] as Table[];
-            const stream = AsyncIterable.from(generateRandomTables([10, 20, 30]))
+            const stream: ReadableStream<any> = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
                 .pipeThrough(RecordBatchStreamWriter.throughDOM(opts));
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
@@ -250,11 +251,11 @@ import {
         it(`should write a stream of record batches to the same output stream`, async () => {
 
             const tables = [] as Table[];
-            const stream = AsyncIterable.from(generateRandomTables([10, 20, 30]))
+            const stream = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
                 // flatMap from Table -> RecordBatches[]
-                .flatMap((table) => AsyncIterable.as(table.chunks))
+                .pipe(flatMap((table) => as(table.chunks)))
                 .pipeThrough(RecordBatchStreamWriter.throughDOM(opts));
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts
index 52126f612c3..662129b1b6d 100644
--- a/js/test/unit/ipc/writer/streams-node-tests.ts
+++ b/js/test/unit/ipc/writer/streams-node-tests.ts
@@ -20,7 +20,9 @@ import {
     // generateDictionaryTables
 } from '../../../data/tables';
 
-import { AsyncIterable } from 'ix';
+import { from, as } from 'ix/asynciterable';
+import { tap, flatMap } from 'ix/asynciterable/operators';
+import 'ix/Ix.node';
 
 import {
     Table,
@@ -231,10 +233,9 @@ import {
 
             const tables = [] as Table[];
             const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
-            const stream = AsyncIterable
-                .from(generateRandomTables([10, 20, 30]))
+            const stream = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
                 .pipe(writer);
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
@@ -252,11 +253,10 @@ import {
 
             const tables = [] as Table[];
             const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
-            const stream = AsyncIterable
-                .from(generateRandomTables([10, 20, 30]))
+            const stream = from(generateRandomTables([10, 20, 30]))
                 // insert some asynchrony
-                .tap({ async next(table: Table) { tables.push(table); await sleep(1); } })
-                .flatMap((table) => AsyncIterable.as(table.chunks))
+                .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
+                .pipe(flatMap((table) => as(table.chunks)))
                 .pipe(writer);
 
             for await (const reader of RecordBatchReader.readAll(stream)) {
diff --git a/js/yarn.lock b/js/yarn.lock
index 3d88ffd759a..fd24b340139 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -385,94 +385,94 @@
   resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98"
   integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==
 
-"@jest/console@^27.0.2":
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/@jest/console/-/console-27.0.2.tgz#b8eeff8f21ac51d224c851e1729d2630c18631e6"
-  integrity sha512-/zYigssuHLImGeMAACkjI4VLAiiJznHgAl3xnFT19iWyct2LhrH3KXOjHRmxBGTkiPLZKKAJAgaPpiU9EZ9K+w==
+"@jest/console@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/console/-/console-27.0.6.tgz#3eb72ea80897495c3d73dd97aab7f26770e2260f"
+  integrity sha512-fMlIBocSHPZ3JxgWiDNW/KPj6s+YRd0hicb33IrmelCcjXo/pXPwvuiKFmZz+XuqI/1u7nbUK10zSsWL/1aegg==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
-    jest-message-util "^27.0.2"
-    jest-util "^27.0.2"
+    jest-message-util "^27.0.6"
+    jest-util "^27.0.6"
     slash "^3.0.0"
 
-"@jest/core@^27.0.1", "@jest/core@^27.0.3":
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/@jest/core/-/core-27.0.3.tgz#b5a38675fa0466450a7fd465f4b226762cb592a2"
-  integrity sha512-rN8lr/OJ8iApcQUh4khnMaOCVX4oRnLwy2tPW3Vh70y62K8Da8fhkxMUq0xX9VPa4+yWUm0tGc/jUSJi+Jzuwg==
+"@jest/core@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/core/-/core-27.0.6.tgz#c5f642727a0b3bf0f37c4b46c675372d0978d4a1"
+  integrity sha512-SsYBm3yhqOn5ZLJCtccaBcvD/ccTLCeuDv8U41WJH/V1MW5eKUkeMHT9U+Pw/v1m1AIWlnIW/eM2XzQr0rEmow==
   dependencies:
-    "@jest/console" "^27.0.2"
-    "@jest/reporters" "^27.0.2"
-    "@jest/test-result" "^27.0.2"
-    "@jest/transform" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/console" "^27.0.6"
+    "@jest/reporters" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
     emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-changed-files "^27.0.2"
-    jest-config "^27.0.3"
-    jest-haste-map "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-regex-util "^27.0.1"
-    jest-resolve "^27.0.2"
-    jest-resolve-dependencies "^27.0.3"
-    jest-runner "^27.0.3"
-    jest-runtime "^27.0.3"
-    jest-snapshot "^27.0.2"
-    jest-util "^27.0.2"
-    jest-validate "^27.0.2"
-    jest-watcher "^27.0.2"
+    jest-changed-files "^27.0.6"
+    jest-config "^27.0.6"
+    jest-haste-map "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-resolve-dependencies "^27.0.6"
+    jest-runner "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
+    jest-watcher "^27.0.6"
     micromatch "^4.0.4"
     p-each-series "^2.1.0"
     rimraf "^3.0.0"
     slash "^3.0.0"
     strip-ansi "^6.0.0"
 
-"@jest/environment@^27.0.3":
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-27.0.3.tgz#68769b1dfdd213e3456169d64fbe9bd63a5fda92"
-  integrity sha512-pN9m7fbKsop5vc3FOfH8NF7CKKdRbEZzcxfIo1n2TT6ucKWLFq0P6gCJH0GpnQp036++yY9utHOxpeT1WnkWTA==
+"@jest/environment@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-27.0.6.tgz#ee293fe996db01d7d663b8108fa0e1ff436219d2"
+  integrity sha512-4XywtdhwZwCpPJ/qfAkqExRsERW+UaoSRStSHCCiQTUpoYdLukj+YJbQSFrZjhlUDRZeNiU9SFH0u7iNimdiIg==
   dependencies:
-    "@jest/fake-timers" "^27.0.3"
-    "@jest/types" "^27.0.2"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
-    jest-mock "^27.0.3"
+    jest-mock "^27.0.6"
 
-"@jest/fake-timers@^27.0.3":
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-27.0.3.tgz#9899ba6304cc636734c74478df502e18136461dd"
-  integrity sha512-fQ+UCKRIYKvTCEOyKPnaPnomLATIhMnHC/xPZ7yT1Uldp7yMgMxoYIFidDbpSTgB79+/U+FgfoD30c6wg3IUjA==
+"@jest/fake-timers@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-27.0.6.tgz#cbad52f3fe6abe30e7acb8cd5fa3466b9588e3df"
+  integrity sha512-sqd+xTWtZ94l3yWDKnRTdvTeZ+A/V7SSKrxsrOKSqdyddb9CeNRF8fbhAU0D7ZJBpTTW2nbp6MftmKJDZfW2LQ==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     "@sinonjs/fake-timers" "^7.0.2"
     "@types/node" "*"
-    jest-message-util "^27.0.2"
-    jest-mock "^27.0.3"
-    jest-util "^27.0.2"
+    jest-message-util "^27.0.6"
+    jest-mock "^27.0.6"
+    jest-util "^27.0.6"
 
-"@jest/globals@^27.0.3":
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-27.0.3.tgz#1cf8933b7791bba0b99305cbf39fd4d2e3fe4060"
-  integrity sha512-OzsIuf7uf+QalqAGbjClyezzEcLQkdZ+7PejUrZgDs+okdAK8GwRCGcYCirHvhMBBQh60Jr3NlIGbn/KBPQLEQ==
+"@jest/globals@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-27.0.6.tgz#48e3903f99a4650673d8657334d13c9caf0e8f82"
+  integrity sha512-DdTGCP606rh9bjkdQ7VvChV18iS7q0IMJVP1piwTWyWskol4iqcVwthZmoJEf7obE1nc34OpIyoVGPeqLC+ryw==
   dependencies:
-    "@jest/environment" "^27.0.3"
-    "@jest/types" "^27.0.2"
-    expect "^27.0.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    expect "^27.0.6"
 
-"@jest/reporters@^27.0.2":
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-27.0.2.tgz#ad73835d1cd54da08b0998a70b14446405e8e0d9"
-  integrity sha512-SVQjew/kafNxSN1my4praGQP+VPVGHsU8zqiEDppLvq6j1lryIjdNb9P+bZSsKeifU4bIoaPnf9Ui0tK9WOpFA==
+"@jest/reporters@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-27.0.6.tgz#91e7f2d98c002ad5df94d5b5167c1eb0b9fd5b00"
+  integrity sha512-TIkBt09Cb2gptji3yJXb3EE+eVltW6BjO7frO7NEfjI9vSIYoISi5R3aI3KpEDXlB1xwB+97NXIqz84qYeYsfA==
   dependencies:
     "@bcoe/v8-coverage" "^0.2.3"
-    "@jest/console" "^27.0.2"
-    "@jest/test-result" "^27.0.2"
-    "@jest/transform" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/console" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
     collect-v8-coverage "^1.0.0"
     exit "^0.1.2"
@@ -483,60 +483,60 @@
     istanbul-lib-report "^3.0.0"
     istanbul-lib-source-maps "^4.0.0"
     istanbul-reports "^3.0.2"
-    jest-haste-map "^27.0.2"
-    jest-resolve "^27.0.2"
-    jest-util "^27.0.2"
-    jest-worker "^27.0.2"
+    jest-haste-map "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-util "^27.0.6"
+    jest-worker "^27.0.6"
     slash "^3.0.0"
     source-map "^0.6.0"
     string-length "^4.0.1"
     terminal-link "^2.0.0"
-    v8-to-istanbul "^7.0.0"
+    v8-to-istanbul "^8.0.0"
 
-"@jest/source-map@^27.0.1":
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-27.0.1.tgz#2afbf73ddbaddcb920a8e62d0238a0a9e0a8d3e4"
-  integrity sha512-yMgkF0f+6WJtDMdDYNavmqvbHtiSpwRN2U/W+6uztgfqgkq/PXdKPqjBTUF1RD/feth4rH5N3NW0T5+wIuln1A==
+"@jest/source-map@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-27.0.6.tgz#be9e9b93565d49b0548b86e232092491fb60551f"
+  integrity sha512-Fek4mi5KQrqmlY07T23JRi0e7Z9bXTOOD86V/uS0EIW4PClvPDqZOyFlLpNJheS6QI0FNX1CgmPjtJ4EA/2M+g==
   dependencies:
     callsites "^3.0.0"
     graceful-fs "^4.2.4"
     source-map "^0.6.0"
 
-"@jest/test-result@^27.0.2":
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-27.0.2.tgz#0451049e32ceb609b636004ccc27c8fa22263f10"
-  integrity sha512-gcdWwL3yP5VaIadzwQtbZyZMgpmes8ryBAJp70tuxghiA8qL4imJyZex+i+USQH2H4jeLVVszhwntgdQ97fccA==
+"@jest/test-result@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-27.0.6.tgz#3fa42015a14e4fdede6acd042ce98c7f36627051"
+  integrity sha512-ja/pBOMTufjX4JLEauLxE3LQBPaI2YjGFtXexRAjt1I/MbfNlMx0sytSX3tn5hSLzQsR3Qy2rd0hc1BWojtj9w==
   dependencies:
-    "@jest/console" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/console" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/istanbul-lib-coverage" "^2.0.0"
     collect-v8-coverage "^1.0.0"
 
-"@jest/test-sequencer@^27.0.3":
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-27.0.3.tgz#2a8632b86a9a6f8900e514917cdab6a062e71049"
-  integrity sha512-DcLTzraZ8xLr5fcIl+CF14vKeBBpBrn55wFxI9Ju+dhEBdjRdJQ/Z/pLkMehkPZWIQ+rR23J8e+wFDkfjree0Q==
+"@jest/test-sequencer@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-27.0.6.tgz#80a913ed7a1130545b1cd777ff2735dd3af5d34b"
+  integrity sha512-bISzNIApazYOlTHDum9PwW22NOyDa6VI31n6JucpjTVM0jD6JDgqEZ9+yn575nDdPF0+4csYDxNNW13NvFQGZA==
   dependencies:
-    "@jest/test-result" "^27.0.2"
+    "@jest/test-result" "^27.0.6"
     graceful-fs "^4.2.4"
-    jest-haste-map "^27.0.2"
-    jest-runtime "^27.0.3"
+    jest-haste-map "^27.0.6"
+    jest-runtime "^27.0.6"
 
-"@jest/transform@^27.0.2":
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-27.0.2.tgz#b073b7c589e3f4b842102468875def2bb722d6b5"
-  integrity sha512-H8sqKlgtDfVog/s9I4GG2XMbi4Ar7RBxjsKQDUhn2XHAi3NG+GoQwWMER+YfantzExbjNqQvqBHzo/G2pfTiPw==
+"@jest/transform@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-27.0.6.tgz#189ad7107413208f7600f4719f81dd2f7278cc95"
+  integrity sha512-rj5Dw+mtIcntAUnMlW/Vju5mr73u8yg+irnHwzgtgoeI6cCPOvUwQ0D1uQtc/APmWgvRweEb1g05pkUpxH3iCA==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     babel-plugin-istanbul "^6.0.0"
     chalk "^4.0.0"
     convert-source-map "^1.4.0"
     fast-json-stable-stringify "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-haste-map "^27.0.2"
-    jest-regex-util "^27.0.1"
-    jest-util "^27.0.2"
+    jest-haste-map "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-util "^27.0.6"
     micromatch "^4.0.4"
     pirates "^4.0.1"
     slash "^3.0.0"
@@ -565,6 +565,17 @@
     "@types/yargs" "^16.0.0"
     chalk "^4.0.0"
 
+"@jest/types@^27.0.6":
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.6.tgz#9a992bc517e0c49f035938b8549719c2de40706b"
+  integrity sha512-aSquT1qa9Pik26JK5/3rvnYb4bGtm1VFNesHKmNTwmPIgOrixvhL2ghIvFRNEpzy3gU+rUgjIF/KodbkFAl++g==
+  dependencies:
+    "@types/istanbul-lib-coverage" "^2.0.0"
+    "@types/istanbul-reports" "^3.0.0"
+    "@types/node" "*"
+    "@types/yargs" "^16.0.0"
+    chalk "^4.0.0"
+
 "@lerna/add@4.0.0":
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/@lerna/add/-/add-4.0.0.tgz#c36f57d132502a57b9e7058d1548b7a565ef183f"
@@ -1581,15 +1592,20 @@
   resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
   integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
 
+"@types/minimist@^1.2.1":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c"
+  integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==
+
 "@types/node@*", "@types/node@^15.6.1":
   version "15.9.0"
   resolved "https://registry.yarnpkg.com/@types/node/-/node-15.9.0.tgz#0b7f6c33ca5618fe329a9d832b478b4964d325a8"
   integrity sha512-AR1Vq1Ei1GaA5FjKL5PBqblTZsL5M+monvGSZwe6sSIdGiuu7Xr/pNwWJY+0ZQuN8AapD/XMB5IzBAyYRFbocA==
 
-"@types/node@^11.11.6":
-  version "11.15.54"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-11.15.54.tgz#59ed60e7b0d56905a654292e8d73275034eb6283"
-  integrity sha512-1RWYiq+5UfozGsU6MwJyFX6BtktcT10XRjvcAQmskCtMcW3tPske88lM/nHv7BQG1w9KBXI1zPGuu5PnNCX14g==
+"@types/node@^13.7.4":
+  version "13.13.52"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-13.13.52.tgz#03c13be70b9031baaed79481c0c0cfb0045e53f7"
+  integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ==
 
 "@types/normalize-package-data@^2.4.0":
   version "2.4.0"
@@ -2173,16 +2189,16 @@ aws4@^1.8.0:
   resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.11.0.tgz#d61f46d83b2519250e2784daf5b09479a8b41c59"
   integrity sha512-xh1Rl34h6Fi1DC2WWKfxUTVqRsNnr6LsKz2+hfwDxQJWmrx8+c7ylaqBMcHfl1U1r2dsifOvKX3LQuLNZ+XSvA==
 
-babel-jest@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-27.0.2.tgz#7dc18adb01322acce62c2af76ea2c7cd186ade37"
-  integrity sha512-9OThPl3/IQbo4Yul2vMz4FYwILPQak8XelX4YGowygfHaOl5R5gfjm4iVx4d8aUugkW683t8aq0A74E7b5DU1Q==
+babel-jest@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-27.0.6.tgz#e99c6e0577da2655118e3608b68761a5a69bd0d8"
+  integrity sha512-iTJyYLNc4wRofASmofpOc5NK9QunwMk+TLFgGXsTFS8uEqmd8wdI7sga0FPe2oVH3b5Agt/EAK1QjPEuKL8VfA==
   dependencies:
-    "@jest/transform" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/babel__core" "^7.1.14"
     babel-plugin-istanbul "^6.0.0"
-    babel-preset-jest "^27.0.1"
+    babel-preset-jest "^27.0.6"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
     slash "^3.0.0"
@@ -2198,10 +2214,10 @@ babel-plugin-istanbul@^6.0.0:
     istanbul-lib-instrument "^4.0.0"
     test-exclude "^6.0.0"
 
-babel-plugin-jest-hoist@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-27.0.1.tgz#a6d10e484c93abff0f4e95f437dad26e5736ea11"
-  integrity sha512-sqBF0owAcCDBVEDtxqfYr2F36eSHdx7lAVGyYuOBRnKdD6gzcy0I0XrAYCZgOA3CRrLhmR+Uae9nogPzmAtOfQ==
+babel-plugin-jest-hoist@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-27.0.6.tgz#f7c6b3d764af21cb4a2a1ab6870117dbde15b456"
+  integrity sha512-CewFeM9Vv2gM7Yr9n5eyyLVPRSiBnk6lKZRjgwYnGKSl9M14TMn2vkN02wTF04OGuSDLEzlWiMzvjXuW9mB6Gw==
   dependencies:
     "@babel/template" "^7.3.3"
     "@babel/types" "^7.3.3"
@@ -2226,12 +2242,12 @@ babel-preset-current-node-syntax@^1.0.0:
     "@babel/plugin-syntax-optional-chaining" "^7.8.3"
     "@babel/plugin-syntax-top-level-await" "^7.8.3"
 
-babel-preset-jest@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-27.0.1.tgz#7a50c75d16647c23a2cf5158d5bb9eb206b10e20"
-  integrity sha512-nIBIqCEpuiyhvjQs2mVNwTxQQa2xk70p9Dd/0obQGBf8FBzbnI8QhQKzLsWMN2i6q+5B0OcWDtrboBX5gmOLyA==
+babel-preset-jest@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-27.0.6.tgz#909ef08e9f24a4679768be2f60a3df0856843f9d"
+  integrity sha512-WObA0/Biw2LrVVwZkF/2GqbOdzhKD6Fkdwhoy9ASIrOWr/zodcSpQh72JOkEn6NWyjmnPDjNSqaGN4KnpKzhXw==
   dependencies:
-    babel-plugin-jest-hoist "^27.0.1"
+    babel-plugin-jest-hoist "^27.0.6"
     babel-preset-current-node-syntax "^1.0.0"
 
 bach@^1.0.0:
@@ -2484,7 +2500,7 @@ camelcase@^3.0.0:
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a"
   integrity sha1-MvxLn82vhF/N9+c7uXysImHwqwo=
 
-camelcase@^5.0.0, camelcase@^5.3.1:
+camelcase@^5.3.1:
   version "5.3.1"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
   integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
@@ -3115,11 +3131,16 @@ decamelize-keys@^1.1.0:
     decamelize "^1.1.0"
     map-obj "^1.0.0"
 
-decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.1.2, decamelize@^1.2.0:
+decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.1.2:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
   integrity sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=
 
+decamelize@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-5.0.0.tgz#88358157b010ef133febfd27c18994bd80c6215b"
+  integrity sha512-U75DcT5hrio3KNtvdULAWnLiAPbFUC4191ldxMmj4FA/mRuBnmDwU0boNfPyFRhnan+Jm+haLeSn3P0afcBn4w==
+
 decimal.js@^10.2.1:
   version "10.2.1"
   resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.2.1.tgz#238ae7b0f0c793d3e3cea410108b35a2c01426a3"
@@ -3198,26 +3219,26 @@ define-property@^2.0.2:
     is-descriptor "^1.0.2"
     isobject "^3.0.1"
 
-del-cli@3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-3.0.1.tgz#2d27ff260204b5104cadeda86f78f180a4ebe89a"
-  integrity sha512-BLHItGr82rUbHhjMu41d+vw9Md49i81jmZSV00HdTq4t+RTHywmEht/23mNFpUl2YeLYJZJyGz4rdlMAyOxNeg==
+del-cli@4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-4.0.0.tgz#a7b97c823705829ca22d993efcb21368119b55c0"
+  integrity sha512-G6FD38YZ28nkI34J+oxiYGbJg/t2hCkUgg9di9311gHZWWe9hY4CphewtU5l3RO1LTYxNMxla2D/we4CbBMHcA==
   dependencies:
-    del "^5.1.0"
-    meow "^6.1.1"
+    del "^6.0.0"
+    meow "^10.0.1"
 
-del@^5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/del/-/del-5.1.0.tgz#d9487c94e367410e6eff2925ee58c0c84a75b3a7"
-  integrity sha512-wH9xOVHnczo9jN2IW68BabcecVPxacIA3g/7z6vhSU/4stOKQzeCRK0yD0A24WiAAUJmmVpWqrERcTxnLo3AnA==
+del@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/del/-/del-6.0.0.tgz#0b40d0332cea743f1614f818be4feb717714c952"
+  integrity sha512-1shh9DQ23L16oXSZKB2JxpL7iMy2E0S9d517ptA1P8iw0alkPtQcrKH7ru31rYtKwF499HkTu+DRzq3TCKDFRQ==
   dependencies:
-    globby "^10.0.1"
-    graceful-fs "^4.2.2"
+    globby "^11.0.1"
+    graceful-fs "^4.2.4"
     is-glob "^4.0.1"
     is-path-cwd "^2.2.0"
-    is-path-inside "^3.0.1"
-    p-map "^3.0.0"
-    rimraf "^3.0.0"
+    is-path-inside "^3.0.2"
+    p-map "^4.0.0"
+    rimraf "^3.0.2"
     slash "^3.0.0"
 
 delayed-stream@~1.0.0:
@@ -3278,10 +3299,10 @@ diff-sequences@^26.6.2:
   resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-26.6.2.tgz#48ba99157de1923412eed41db6b6d4aa9ca7c0b1"
   integrity sha512-Mv/TDa3nZ9sbc5soK+OoA74BsS3mL37yixCvUAQkiuA4Wz6YtwP/K47n2rv2ovzHZvoiQeA5FTQOschKkEwB0Q==
 
-diff-sequences@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-27.0.1.tgz#9c9801d52ed5f576ff0a20e3022a13ee6e297e7c"
-  integrity sha512-XPLijkfJUh/PIBnfkcSHgvD6tlYixmcMAn3osTk6jt+H0v/mgURto1XUiD9DKuGX5NDoVS6dSlA23gd9FUaCFg==
+diff-sequences@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-27.0.6.tgz#3305cb2e55a033924054695cc66019fd7f8e5723"
+  integrity sha512-ag6wfpBFyNXZ0p8pcuIDS//D8H062ZQJ3fzYxjpmeKjnz8W4pekL3AI8VohmyZmsWW2PWaHgjsmqR6L13101VQ==
 
 diff@^4.0.1:
   version "4.0.2"
@@ -3702,17 +3723,17 @@ expand-tilde@^2.0.0, expand-tilde@^2.0.2:
   dependencies:
     homedir-polyfill "^1.0.1"
 
-expect@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/expect/-/expect-27.0.2.tgz#e66ca3a4c9592f1c019fa1d46459a9d2084f3422"
-  integrity sha512-YJFNJe2+P2DqH+ZrXy+ydRQYO87oxRUonZImpDodR1G7qo3NYd3pL+NQ9Keqpez3cehczYwZDBC3A7xk3n7M/w==
+expect@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/expect/-/expect-27.0.6.tgz#a4d74fbe27222c718fff68ef49d78e26a8fd4c05"
+  integrity sha512-psNLt8j2kwg42jGBDSfAlU49CEZxejN1f1PlANWDZqIhBOVU/c2Pm888FcjWJzFewhIsNWfZJeLjUjtKGiPuSw==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     ansi-styles "^5.0.0"
-    jest-get-type "^27.0.1"
-    jest-matcher-utils "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-regex-util "^27.0.1"
+    jest-get-type "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-regex-util "^27.0.6"
 
 ext@^1.1.2:
   version "1.4.0"
@@ -3801,7 +3822,7 @@ fast-glob@^2.2.6:
     merge2 "^1.2.3"
     micromatch "^3.1.10"
 
-fast-glob@^3.0.3, fast-glob@^3.1.1:
+fast-glob@^3.1.1:
   version "3.2.5"
   resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.5.tgz#7939af2a656de79a4f1901903ee8adcaa7cb9661"
   integrity sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==
@@ -3913,6 +3934,14 @@ find-up@^4.0.0, find-up@^4.1.0:
     locate-path "^5.0.0"
     path-exists "^4.0.0"
 
+find-up@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc"
+  integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==
+  dependencies:
+    locate-path "^6.0.0"
+    path-exists "^4.0.0"
+
 findup-sync@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/findup-sync/-/findup-sync-2.0.0.tgz#9326b1488c22d1a6088650a86901b2d9a90a2cbc"
@@ -4312,20 +4341,6 @@ globals@^13.6.0:
   dependencies:
     type-fest "^0.20.2"
 
-globby@^10.0.1:
-  version "10.0.2"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-10.0.2.tgz#277593e745acaa4646c3ab411289ec47a0392543"
-  integrity sha512-7dUi7RvCoT/xast/o/dLN53oqND4yk0nsHkhRgn9w65C4PofCLOoJ39iSOg+qVDdWQPIEj+eszMHQ+aLVwwQSg==
-  dependencies:
-    "@types/glob" "^7.1.1"
-    array-union "^2.1.0"
-    dir-glob "^3.0.1"
-    fast-glob "^3.0.3"
-    glob "^7.1.3"
-    ignore "^5.1.1"
-    merge2 "^1.2.3"
-    slash "^3.0.0"
-
 globby@^11.0.1, globby@^11.0.2, globby@^11.0.3:
   version "11.0.3"
   resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.3.tgz#9b1f0cb523e171dd1ad8c7b2a9fb4b644b9593cb"
@@ -4686,7 +4701,7 @@ ignore@^4.0.3, ignore@^4.0.6:
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc"
   integrity sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==
 
-ignore@^5.1.1, ignore@^5.1.4:
+ignore@^5.1.4:
   version "5.1.8"
   resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.8.tgz#f150a8b50a34289b33e22f5889abd4d8016f0e57"
   integrity sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==
@@ -4724,6 +4739,11 @@ indent-string@^4.0.0:
   resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
   integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
 
+indent-string@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-5.0.0.tgz#4fd2980fccaf8622d14c64d694f4cf33c81951a5"
+  integrity sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==
+
 infer-owner@^1.0.4:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/infer-owner/-/infer-owner-1.0.4.tgz#c4cefcaa8e51051c2a40ba2ce8a3d27295af9467"
@@ -5014,7 +5034,7 @@ is-path-cwd@^2.2.0:
   resolved "https://registry.yarnpkg.com/is-path-cwd/-/is-path-cwd-2.2.0.tgz#67d43b82664a7b5191fd9119127eb300048a9fdb"
   integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ==
 
-is-path-inside@^3.0.1:
+is-path-inside@^3.0.2:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283"
   integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==
@@ -5192,92 +5212,92 @@ istanbul-reports@^3.0.2:
     html-escaper "^2.0.0"
     istanbul-lib-report "^3.0.0"
 
-ix@2.5.3:
-  version "2.5.3"
-  resolved "https://registry.yarnpkg.com/ix/-/ix-2.5.3.tgz#b62899723297b2805343d1c77756b8568ed8dbec"
-  integrity sha512-Syz3oYUx5+P0T9F3BZOE2DzvWYF31Mbha1VqDbJsr4e4A+twhxTAf2c1BO8TMlwon12oAvvxAtQ+NJm9P8ISYQ==
+ix@4.4.1:
+  version "4.4.1"
+  resolved "https://registry.yarnpkg.com/ix/-/ix-4.4.1.tgz#8ec5f4f420c504a9906ffc2e2234f50147b9488a"
+  integrity sha512-Jsl7cUf7CA1MkznzAuVy4K6V1Zsfx+EAh0ZgiGhGAADaEGKiMV+sJx8Qe4hx0CsyI475Yt3ppoRS8M8oOueqlA==
   dependencies:
-    "@types/node" "^11.11.6"
-    tslib "^1.9.3"
+    "@types/node" "^13.7.4"
+    tslib "^2.3.0"
 
-jest-changed-files@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-27.0.2.tgz#997253042b4a032950fc5f56abf3c5d1f8560801"
-  integrity sha512-eMeb1Pn7w7x3wue5/vF73LPCJ7DKQuC9wQUR5ebP9hDPpk5hzcT/3Hmz3Q5BOFpR3tgbmaWhJcMTVgC8Z1NuMw==
+jest-changed-files@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-27.0.6.tgz#bed6183fcdea8a285482e3b50a9a7712d49a7a8b"
+  integrity sha512-BuL/ZDauaq5dumYh5y20sn4IISnf1P9A0TDswTxUi84ORGtVa86ApuBHqICL0vepqAnZiY6a7xeSPWv2/yy4eA==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     execa "^5.0.0"
     throat "^6.0.1"
 
-jest-circus@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-27.0.3.tgz#32006967de484e03589da944064d72e172ce3261"
-  integrity sha512-tdMfzs7SgD5l7jRcI1iB3vtQi5fHwCgo4RlO8bzZnYc05PZ+tlAOMZeS8eGYkZ2tPaRY/aRLMFWQp/8zXBrolQ==
+jest-circus@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-27.0.6.tgz#dd4df17c4697db6a2c232aaad4e9cec666926668"
+  integrity sha512-OJlsz6BBeX9qR+7O9lXefWoc2m9ZqcZ5Ohlzz0pTEAG4xMiZUJoacY8f4YDHxgk0oKYxj277AfOk9w6hZYvi1Q==
   dependencies:
-    "@jest/environment" "^27.0.3"
-    "@jest/test-result" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
     co "^4.6.0"
     dedent "^0.7.0"
-    expect "^27.0.2"
+    expect "^27.0.6"
     is-generator-fn "^2.0.0"
-    jest-each "^27.0.2"
-    jest-matcher-utils "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-runtime "^27.0.3"
-    jest-snapshot "^27.0.2"
-    jest-util "^27.0.2"
-    pretty-format "^27.0.2"
+    jest-each "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    pretty-format "^27.0.6"
     slash "^3.0.0"
     stack-utils "^2.0.3"
     throat "^6.0.1"
 
-jest-cli@^27.0.1:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-27.0.3.tgz#b733871acb526054a0f8c971d0466595c5f8316d"
-  integrity sha512-7bt9Sgv4nWH5pUnyJfdLf8CHWfo4+7lSPxeBwQx4r0vBj9jweJam/piE2U91SXtQI+ckm+TIN97OVnqIYpVhSg==
+jest-cli@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-27.0.6.tgz#d021e5f4d86d6a212450d4c7b86cb219f1e6864f"
+  integrity sha512-qUUVlGb9fdKir3RDE+B10ULI+LQrz+MCflEH2UJyoUjoHHCbxDrMxSzjQAPUMsic4SncI62ofYCcAvW6+6rhhg==
   dependencies:
-    "@jest/core" "^27.0.3"
-    "@jest/test-result" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/core" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
     import-local "^3.0.2"
-    jest-config "^27.0.3"
-    jest-util "^27.0.2"
-    jest-validate "^27.0.2"
+    jest-config "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
     prompts "^2.0.1"
     yargs "^16.0.3"
 
-jest-config@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-27.0.3.tgz#31871583573c6d669dcdb5bb2d1a8738f3b91c20"
-  integrity sha512-zgtI2YQo+ekKsmYNyDlXFY/7w7WWBSJFoj/WRe173WB88CDUrEYWr0sLdbLOQe+sRu6l1Y2S0MCS6BOJm5jkoA==
+jest-config@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-27.0.6.tgz#119fb10f149ba63d9c50621baa4f1f179500277f"
+  integrity sha512-JZRR3I1Plr2YxPBhgqRspDE2S5zprbga3swYNrvY3HfQGu7p/GjyLOqwrYad97tX3U3mzT53TPHVmozacfP/3w==
   dependencies:
     "@babel/core" "^7.1.0"
-    "@jest/test-sequencer" "^27.0.3"
-    "@jest/types" "^27.0.2"
-    babel-jest "^27.0.2"
+    "@jest/test-sequencer" "^27.0.6"
+    "@jest/types" "^27.0.6"
+    babel-jest "^27.0.6"
     chalk "^4.0.0"
     deepmerge "^4.2.2"
     glob "^7.1.1"
     graceful-fs "^4.2.4"
     is-ci "^3.0.0"
-    jest-circus "^27.0.3"
-    jest-environment-jsdom "^27.0.3"
-    jest-environment-node "^27.0.3"
-    jest-get-type "^27.0.1"
-    jest-jasmine2 "^27.0.3"
-    jest-regex-util "^27.0.1"
-    jest-resolve "^27.0.2"
-    jest-runner "^27.0.3"
-    jest-util "^27.0.2"
-    jest-validate "^27.0.2"
+    jest-circus "^27.0.6"
+    jest-environment-jsdom "^27.0.6"
+    jest-environment-node "^27.0.6"
+    jest-get-type "^27.0.6"
+    jest-jasmine2 "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-runner "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
     micromatch "^4.0.4"
-    pretty-format "^27.0.2"
+    pretty-format "^27.0.6"
 
 jest-diff@^26.0.0:
   version "26.6.2"
@@ -5289,152 +5309,152 @@ jest-diff@^26.0.0:
     jest-get-type "^26.3.0"
     pretty-format "^26.6.2"
 
-jest-diff@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.0.2.tgz#f315b87cee5dc134cf42c2708ab27375cc3f5a7e"
-  integrity sha512-BFIdRb0LqfV1hBt8crQmw6gGQHVDhM87SpMIZ45FPYKReZYG5er1+5pIn2zKqvrJp6WNox0ylR8571Iwk2Dmgw==
+jest-diff@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.0.6.tgz#4a7a19ee6f04ad70e0e3388f35829394a44c7b5e"
+  integrity sha512-Z1mqgkTCSYaFgwTlP/NUiRzdqgxmmhzHY1Tq17zL94morOHfHu3K4bgSgl+CR4GLhpV8VxkuOYuIWnQ9LnFqmg==
   dependencies:
     chalk "^4.0.0"
-    diff-sequences "^27.0.1"
-    jest-get-type "^27.0.1"
-    pretty-format "^27.0.2"
+    diff-sequences "^27.0.6"
+    jest-get-type "^27.0.6"
+    pretty-format "^27.0.6"
 
-jest-docblock@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-27.0.1.tgz#bd9752819b49fa4fab1a50b73eb58c653b962e8b"
-  integrity sha512-TA4+21s3oebURc7VgFV4r7ltdIJ5rtBH1E3Tbovcg7AV+oLfD5DcJ2V2vJ5zFA9sL5CFd/d2D6IpsAeSheEdrA==
+jest-docblock@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-27.0.6.tgz#cc78266acf7fe693ca462cbbda0ea4e639e4e5f3"
+  integrity sha512-Fid6dPcjwepTFraz0YxIMCi7dejjJ/KL9FBjPYhBp4Sv1Y9PdhImlKZqYU555BlN4TQKaTc+F2Av1z+anVyGkA==
   dependencies:
     detect-newline "^3.0.0"
 
-jest-each@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-27.0.2.tgz#865ddb4367476ced752167926b656fa0dcecd8c7"
-  integrity sha512-OLMBZBZ6JkoXgUenDtseFRWA43wVl2BwmZYIWQws7eS7pqsIvePqj/jJmEnfq91ALk3LNphgwNK/PRFBYi7ITQ==
+jest-each@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-27.0.6.tgz#cee117071b04060158dc8d9a66dc50ad40ef453b"
+  integrity sha512-m6yKcV3bkSWrUIjxkE9OC0mhBZZdhovIW5ergBYirqnkLXkyEn3oUUF/QZgyecA1cF1QFyTE8bRRl8Tfg1pfLA==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
-    jest-get-type "^27.0.1"
-    jest-util "^27.0.2"
-    pretty-format "^27.0.2"
-
-jest-environment-jsdom@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-27.0.3.tgz#ed73e913ddc03864eb9f934b5cbabf1b63504e2e"
-  integrity sha512-5KLmgv1bhiimpSA8oGTnZYk6g4fsNyZiA/6gI2tAZUgrufd7heRUSVh4gRokzZVEj8zlwAQYT0Zs6tuJSW/ECA==
-  dependencies:
-    "@jest/environment" "^27.0.3"
-    "@jest/fake-timers" "^27.0.3"
-    "@jest/types" "^27.0.2"
+    jest-get-type "^27.0.6"
+    jest-util "^27.0.6"
+    pretty-format "^27.0.6"
+
+jest-environment-jsdom@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-environment-jsdom/-/jest-environment-jsdom-27.0.6.tgz#f66426c4c9950807d0a9f209c590ce544f73291f"
+  integrity sha512-FvetXg7lnXL9+78H+xUAsra3IeZRTiegA3An01cWeXBspKXUhAwMM9ycIJ4yBaR0L7HkoMPaZsozCLHh4T8fuw==
+  dependencies:
+    "@jest/environment" "^27.0.6"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
-    jest-mock "^27.0.3"
-    jest-util "^27.0.2"
+    jest-mock "^27.0.6"
+    jest-util "^27.0.6"
     jsdom "^16.6.0"
 
-jest-environment-node@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-27.0.3.tgz#b4acb3679d2552a4215732cab8b0ca7ec4398ee0"
-  integrity sha512-co2/IVnIFL3cItpFULCvXFg9us4gvWXgs7mutAMPCbFhcqh56QAOdKhNzC2+RycsC/k4mbMj1VF+9F/NzA0ROg==
+jest-environment-node@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-27.0.6.tgz#a6699b7ceb52e8d68138b9808b0c404e505f3e07"
+  integrity sha512-+Vi6yLrPg/qC81jfXx3IBlVnDTI6kmRr08iVa2hFCWmJt4zha0XW7ucQltCAPhSR0FEKEoJ3i+W4E6T0s9is0w==
   dependencies:
-    "@jest/environment" "^27.0.3"
-    "@jest/fake-timers" "^27.0.3"
-    "@jest/types" "^27.0.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
-    jest-mock "^27.0.3"
-    jest-util "^27.0.2"
+    jest-mock "^27.0.6"
+    jest-util "^27.0.6"
 
 jest-get-type@^26.3.0:
   version "26.3.0"
   resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-26.3.0.tgz#e97dc3c3f53c2b406ca7afaed4493b1d099199e0"
   integrity sha512-TpfaviN1R2pQWkIihlfEanwOXK0zcxrKEE4MlU6Tn7keoXdN6/3gK/xl0yEh8DOunn5pOVGKf8hB4R9gVh04ig==
 
-jest-get-type@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.0.1.tgz#34951e2b08c8801eb28559d7eb732b04bbcf7815"
-  integrity sha512-9Tggo9zZbu0sHKebiAijyt1NM77Z0uO4tuWOxUCujAiSeXv30Vb5D4xVF4UR4YWNapcftj+PbByU54lKD7/xMg==
+jest-get-type@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.0.6.tgz#0eb5c7f755854279ce9b68a9f1a4122f69047cfe"
+  integrity sha512-XTkK5exIeUbbveehcSR8w0bhH+c0yloW/Wpl+9vZrjzztCPWrxhHwkIFpZzCt71oRBsgxmuUfxEqOYoZI2macg==
 
-jest-haste-map@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.0.2.tgz#3f1819400c671237e48b4d4b76a80a0dbed7577f"
-  integrity sha512-37gYfrYjjhEfk37C4bCMWAC0oPBxDpG0qpl8lYg8BT//wf353YT/fzgA7+Dq0EtM7rPFS3JEcMsxdtDwNMi2cA==
+jest-haste-map@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-27.0.6.tgz#4683a4e68f6ecaa74231679dca237279562c8dc7"
+  integrity sha512-4ldjPXX9h8doB2JlRzg9oAZ2p6/GpQUNAeiYXqcpmrKbP0Qev0wdZlxSMOmz8mPOEnt4h6qIzXFLDi8RScX/1w==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     "@types/graceful-fs" "^4.1.2"
     "@types/node" "*"
     anymatch "^3.0.3"
     fb-watchman "^2.0.0"
     graceful-fs "^4.2.4"
-    jest-regex-util "^27.0.1"
-    jest-serializer "^27.0.1"
-    jest-util "^27.0.2"
-    jest-worker "^27.0.2"
+    jest-regex-util "^27.0.6"
+    jest-serializer "^27.0.6"
+    jest-util "^27.0.6"
+    jest-worker "^27.0.6"
     micromatch "^4.0.4"
     walker "^1.0.7"
   optionalDependencies:
     fsevents "^2.3.2"
 
-jest-jasmine2@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-27.0.3.tgz#fa6f6499566ea1b01b68b3ad13f49d1592b02c85"
-  integrity sha512-odJ2ia8P5c+IsqOcWJPmku4AqbXIfTVLRjYTKHri3TEvbmTdLw0ghy13OAPIl/0v7cVH0TURK7+xFOHKDLvKIA==
+jest-jasmine2@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-jasmine2/-/jest-jasmine2-27.0.6.tgz#fd509a9ed3d92bd6edb68a779f4738b100655b37"
+  integrity sha512-cjpH2sBy+t6dvCeKBsHpW41mjHzXgsavaFMp+VWRf0eR4EW8xASk1acqmljFtK2DgyIECMv2yCdY41r2l1+4iA==
   dependencies:
     "@babel/traverse" "^7.1.0"
-    "@jest/environment" "^27.0.3"
-    "@jest/source-map" "^27.0.1"
-    "@jest/test-result" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/environment" "^27.0.6"
+    "@jest/source-map" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
     co "^4.6.0"
-    expect "^27.0.2"
+    expect "^27.0.6"
     is-generator-fn "^2.0.0"
-    jest-each "^27.0.2"
-    jest-matcher-utils "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-runtime "^27.0.3"
-    jest-snapshot "^27.0.2"
-    jest-util "^27.0.2"
-    pretty-format "^27.0.2"
+    jest-each "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    pretty-format "^27.0.6"
     throat "^6.0.1"
 
-jest-leak-detector@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-27.0.2.tgz#ce19aa9dbcf7a72a9d58907a970427506f624e69"
-  integrity sha512-TZA3DmCOfe8YZFIMD1GxFqXUkQnIoOGQyy4hFCA2mlHtnAaf+FeOMxi0fZmfB41ZL+QbFG6BVaZF5IeFIVy53Q==
+jest-leak-detector@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-27.0.6.tgz#545854275f85450d4ef4b8fe305ca2a26450450f"
+  integrity sha512-2/d6n2wlH5zEcdctX4zdbgX8oM61tb67PQt4Xh8JFAIy6LRKUnX528HulkaG6nD5qDl5vRV1NXejCe1XRCH5gQ==
   dependencies:
-    jest-get-type "^27.0.1"
-    pretty-format "^27.0.2"
+    jest-get-type "^27.0.6"
+    pretty-format "^27.0.6"
 
-jest-matcher-utils@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.0.2.tgz#f14c060605a95a466cdc759acc546c6f4cbfc4f0"
-  integrity sha512-Qczi5xnTNjkhcIB0Yy75Txt+Ez51xdhOxsukN7awzq2auZQGPHcQrJ623PZj0ECDEMOk2soxWx05EXdXGd1CbA==
+jest-matcher-utils@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.0.6.tgz#2a8da1e86c620b39459f4352eaa255f0d43e39a9"
+  integrity sha512-OFgF2VCQx9vdPSYTHWJ9MzFCehs20TsyFi6bIHbk5V1u52zJOnvF0Y/65z3GLZHKRuTgVPY4Z6LVePNahaQ+tA==
   dependencies:
     chalk "^4.0.0"
-    jest-diff "^27.0.2"
-    jest-get-type "^27.0.1"
-    pretty-format "^27.0.2"
+    jest-diff "^27.0.6"
+    jest-get-type "^27.0.6"
+    pretty-format "^27.0.6"
 
-jest-message-util@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-27.0.2.tgz#181c9b67dff504d8f4ad15cba10d8b80f272048c"
-  integrity sha512-rTqWUX42ec2LdMkoUPOzrEd1Tcm+R1KfLOmFK+OVNo4MnLsEaxO5zPDb2BbdSmthdM/IfXxOZU60P/WbWF8BTw==
+jest-message-util@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-27.0.6.tgz#158bcdf4785706492d164a39abca6a14da5ab8b5"
+  integrity sha512-rBxIs2XK7rGy+zGxgi+UJKP6WqQ+KrBbD1YMj517HYN3v2BG66t3Xan3FWqYHKZwjdB700KiAJ+iES9a0M+ixw==
   dependencies:
     "@babel/code-frame" "^7.12.13"
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     "@types/stack-utils" "^2.0.0"
     chalk "^4.0.0"
     graceful-fs "^4.2.4"
     micromatch "^4.0.4"
-    pretty-format "^27.0.2"
+    pretty-format "^27.0.6"
     slash "^3.0.0"
     stack-utils "^2.0.3"
 
-jest-mock@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-27.0.3.tgz#5591844f9192b3335c0dca38e8e45ed297d4d23d"
-  integrity sha512-O5FZn5XDzEp+Xg28mUz4ovVcdwBBPfAhW9+zJLO0Efn2qNbYcDaJvSlRiQ6BCZUCVOJjALicuJQI9mRFjv1o9Q==
+jest-mock@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-27.0.6.tgz#0efdd40851398307ba16778728f6d34d583e3467"
+  integrity sha512-lzBETUoK8cSxts2NYXSBWT+EJNzmUVtVVwS1sU9GwE1DLCfGsngg+ZVSIe0yd0ZSm+y791esiuo+WSwpXJQ5Bw==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
 
 jest-pnp-resolver@^1.2.2:
@@ -5442,74 +5462,76 @@ jest-pnp-resolver@^1.2.2:
   resolved "https://registry.yarnpkg.com/jest-pnp-resolver/-/jest-pnp-resolver-1.2.2.tgz#b704ac0ae028a89108a4d040b3f919dfddc8e33c"
   integrity sha512-olV41bKSMm8BdnuMsewT4jqlZ8+3TCARAXjZGT9jcoSnrfUnRCqnMoF9XEeoWjbzObpqF9dRhHQj0Xb9QdF6/w==
 
-jest-regex-util@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.0.1.tgz#69d4b1bf5b690faa3490113c47486ed85dd45b68"
-  integrity sha512-6nY6QVcpTgEKQy1L41P4pr3aOddneK17kn3HJw6SdwGiKfgCGTvH02hVXL0GU8GEKtPH83eD2DIDgxHXOxVohQ==
+jest-regex-util@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-27.0.6.tgz#02e112082935ae949ce5d13b2675db3d8c87d9c5"
+  integrity sha512-SUhPzBsGa1IKm8hx2F4NfTGGp+r7BXJ4CulsZ1k2kI+mGLG+lxGrs76veN2LF/aUdGosJBzKgXmNCw+BzFqBDQ==
 
-jest-resolve-dependencies@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-27.0.3.tgz#7e258f7d0458bb910855f8a50f5c1e9d92c319dc"
-  integrity sha512-HdjWOvFAgT5CYChF2eiBN2rRKicjaTCCtA3EtH47REIdGzEHGUhYrWYgLahXsiOovvWN6edhcHL5WCa3gbc04A==
+jest-resolve-dependencies@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-27.0.6.tgz#3e619e0ef391c3ecfcf6ef4056207a3d2be3269f"
+  integrity sha512-mg9x9DS3BPAREWKCAoyg3QucCr0n6S8HEEsqRCKSPjPcu9HzRILzhdzY3imsLoZWeosEbJZz6TKasveczzpJZA==
   dependencies:
-    "@jest/types" "^27.0.2"
-    jest-regex-util "^27.0.1"
-    jest-snapshot "^27.0.2"
+    "@jest/types" "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-snapshot "^27.0.6"
 
-jest-resolve@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-27.0.2.tgz#087a3ed17182722a3415f92bfacc99c49cf8a965"
-  integrity sha512-rmfLGyZhwAUR5z3EwPAW7LQTorWAuCYCcsQJoQxT2it+BOgX3zKxa67r1pfpK3ihy2k9TjYD3/lMp5rPm/CL1Q==
+jest-resolve@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-27.0.6.tgz#e90f436dd4f8fbf53f58a91c42344864f8e55bff"
+  integrity sha512-yKmIgw2LgTh7uAJtzv8UFHGF7Dm7XfvOe/LQ3Txv101fLM8cx2h1QVwtSJ51Q/SCxpIiKfVn6G2jYYMDNHZteA==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     chalk "^4.0.0"
     escalade "^3.1.1"
     graceful-fs "^4.2.4"
     jest-pnp-resolver "^1.2.2"
-    jest-util "^27.0.2"
-    jest-validate "^27.0.2"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
     resolve "^1.20.0"
     slash "^3.0.0"
 
-jest-runner@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-27.0.3.tgz#d9747af3bee5a6ffaeb9e10b653263b780258b54"
-  integrity sha512-zH23uIIh1ro1JCD7XX1bQ0bQwXEsBzLX2UJVE/AVLsk4YJRmTfyXIzzRzBWRdnMHHg1NWkJ4fGs7eFP15IqZpQ==
+jest-runner@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-27.0.6.tgz#1325f45055539222bbc7256a6976e993ad2f9520"
+  integrity sha512-W3Bz5qAgaSChuivLn+nKOgjqNxM7O/9JOJoKDCqThPIg2sH/d4A/lzyiaFgnb9V1/w29Le11NpzTJSzga1vyYQ==
   dependencies:
-    "@jest/console" "^27.0.2"
-    "@jest/environment" "^27.0.3"
-    "@jest/test-result" "^27.0.2"
-    "@jest/transform" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/console" "^27.0.6"
+    "@jest/environment" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     chalk "^4.0.0"
     emittery "^0.8.1"
     exit "^0.1.2"
     graceful-fs "^4.2.4"
-    jest-docblock "^27.0.1"
-    jest-haste-map "^27.0.2"
-    jest-leak-detector "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-resolve "^27.0.2"
-    jest-runtime "^27.0.3"
-    jest-util "^27.0.2"
-    jest-worker "^27.0.2"
+    jest-docblock "^27.0.6"
+    jest-environment-jsdom "^27.0.6"
+    jest-environment-node "^27.0.6"
+    jest-haste-map "^27.0.6"
+    jest-leak-detector "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-runtime "^27.0.6"
+    jest-util "^27.0.6"
+    jest-worker "^27.0.6"
     source-map-support "^0.5.6"
     throat "^6.0.1"
 
-jest-runtime@^27.0.3:
-  version "27.0.3"
-  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-27.0.3.tgz#32499c1047e5d953cfbb67fe790ab0167a614d28"
-  integrity sha512-k1Hl2pWWHBkSXdCggX2lyLRuDnnnmMlnJd+DPLb8LmmAeHW87WgGC6TplD377VxY3KQu73sklkhGUIdwFgsRVQ==
-  dependencies:
-    "@jest/console" "^27.0.2"
-    "@jest/environment" "^27.0.3"
-    "@jest/fake-timers" "^27.0.3"
-    "@jest/globals" "^27.0.3"
-    "@jest/source-map" "^27.0.1"
-    "@jest/test-result" "^27.0.2"
-    "@jest/transform" "^27.0.2"
-    "@jest/types" "^27.0.2"
+jest-runtime@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-27.0.6.tgz#45877cfcd386afdd4f317def551fc369794c27c9"
+  integrity sha512-BhvHLRVfKibYyqqEFkybsznKwhrsu7AWx2F3y9G9L95VSIN3/ZZ9vBpm/XCS2bS+BWz3sSeNGLzI3TVQ0uL85Q==
+  dependencies:
+    "@jest/console" "^27.0.6"
+    "@jest/environment" "^27.0.6"
+    "@jest/fake-timers" "^27.0.6"
+    "@jest/globals" "^27.0.6"
+    "@jest/source-map" "^27.0.6"
+    "@jest/test-result" "^27.0.6"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/yargs" "^16.0.0"
     chalk "^4.0.0"
     cjs-module-lexer "^1.0.0"
@@ -5517,22 +5539,22 @@ jest-runtime@^27.0.3:
     exit "^0.1.2"
     glob "^7.1.3"
     graceful-fs "^4.2.4"
-    jest-haste-map "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-mock "^27.0.3"
-    jest-regex-util "^27.0.1"
-    jest-resolve "^27.0.2"
-    jest-snapshot "^27.0.2"
-    jest-util "^27.0.2"
-    jest-validate "^27.0.2"
+    jest-haste-map "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-mock "^27.0.6"
+    jest-regex-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-snapshot "^27.0.6"
+    jest-util "^27.0.6"
+    jest-validate "^27.0.6"
     slash "^3.0.0"
     strip-bom "^4.0.0"
     yargs "^16.0.3"
 
-jest-serializer@^27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-27.0.1.tgz#2464d04dcc33fb71dc80b7c82e3c5e8a08cb1020"
-  integrity sha512-svy//5IH6bfQvAbkAEg1s7xhhgHTtXu0li0I2fdKHDsLP2P2MOiscPQIENQep8oU2g2B3jqLyxKKzotZOz4CwQ==
+jest-serializer@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-serializer/-/jest-serializer-27.0.6.tgz#93a6c74e0132b81a2d54623251c46c498bb5bec1"
+  integrity sha512-PtGdVK9EGC7dsaziskfqaAPib6wTViY3G8E5wz9tLVPhHyiDNTZn/xjZ4khAw+09QkoOVpn7vF5nPSN6dtBexA==
   dependencies:
     "@types/node" "*"
     graceful-fs "^4.2.4"
@@ -5545,10 +5567,10 @@ jest-silent-reporter@0.5.0:
     chalk "^4.0.0"
     jest-util "^26.0.0"
 
-jest-snapshot@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-27.0.2.tgz#40c48dc6afd3cbc5d3d07c061f20fc10d94ca0cd"
-  integrity sha512-4RcgvZbPrrbEE/hT6XQ4hr+NVVLNrmsgUnYSnZRT6UAvW9Q2yzGMS+tfJh+xlQJAapnnkNJzsMn6vUa+yfiVHA==
+jest-snapshot@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-27.0.6.tgz#f4e6b208bd2e92e888344d78f0f650bcff05a4bf"
+  integrity sha512-NTHaz8He+ATUagUgE7C/UtFcRoHqR2Gc+KDfhQIyx+VFgwbeEMjeP+ILpUTLosZn/ZtbNdCF5LkVnN/l+V751A==
   dependencies:
     "@babel/core" "^7.7.2"
     "@babel/generator" "^7.7.2"
@@ -5556,23 +5578,23 @@ jest-snapshot@^27.0.2:
     "@babel/plugin-syntax-typescript" "^7.7.2"
     "@babel/traverse" "^7.7.2"
     "@babel/types" "^7.0.0"
-    "@jest/transform" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/transform" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/babel__traverse" "^7.0.4"
     "@types/prettier" "^2.1.5"
     babel-preset-current-node-syntax "^1.0.0"
     chalk "^4.0.0"
-    expect "^27.0.2"
+    expect "^27.0.6"
     graceful-fs "^4.2.4"
-    jest-diff "^27.0.2"
-    jest-get-type "^27.0.1"
-    jest-haste-map "^27.0.2"
-    jest-matcher-utils "^27.0.2"
-    jest-message-util "^27.0.2"
-    jest-resolve "^27.0.2"
-    jest-util "^27.0.2"
+    jest-diff "^27.0.6"
+    jest-get-type "^27.0.6"
+    jest-haste-map "^27.0.6"
+    jest-matcher-utils "^27.0.6"
+    jest-message-util "^27.0.6"
+    jest-resolve "^27.0.6"
+    jest-util "^27.0.6"
     natural-compare "^1.4.0"
-    pretty-format "^27.0.2"
+    pretty-format "^27.0.6"
     semver "^7.3.2"
 
 jest-util@^26.0.0:
@@ -5587,7 +5609,7 @@ jest-util@^26.0.0:
     is-ci "^2.0.0"
     micromatch "^4.0.2"
 
-jest-util@^27.0.0, jest-util@^27.0.2:
+jest-util@^27.0.0:
   version "27.0.2"
   resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.2.tgz#fc2c7ace3c75ae561cf1e5fdb643bf685a5be7c7"
   integrity sha512-1d9uH3a00OFGGWSibpNYr+jojZ6AckOMCXV2Z4K3YXDnzpkAaXQyIpY14FOJPiUmil7CD+A6Qs+lnnh6ctRbIA==
@@ -5599,48 +5621,60 @@ jest-util@^27.0.0, jest-util@^27.0.2:
     is-ci "^3.0.0"
     picomatch "^2.2.3"
 
-jest-validate@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-27.0.2.tgz#7fe2c100089449cd5cbb47a5b0b6cb7cda5beee5"
-  integrity sha512-UgBF6/oVu1ofd1XbaSotXKihi8nZhg0Prm8twQ9uCuAfo59vlxCXMPI/RKmrZEVgi3Nd9dS0I8A0wzWU48pOvg==
+jest-util@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.6.tgz#e8e04eec159de2f4d5f57f795df9cdc091e50297"
+  integrity sha512-1JjlaIh+C65H/F7D11GNkGDDZtDfMEM8EBXsvd+l/cxtgQ6QhxuloOaiayt89DxUvDarbVhqI98HhgrM1yliFQ==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
+    "@types/node" "*"
+    chalk "^4.0.0"
+    graceful-fs "^4.2.4"
+    is-ci "^3.0.0"
+    picomatch "^2.2.3"
+
+jest-validate@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-27.0.6.tgz#930a527c7a951927df269f43b2dc23262457e2a6"
+  integrity sha512-yhZZOaMH3Zg6DC83n60pLmdU1DQE46DW+KLozPiPbSbPhlXXaiUTDlhHQhHFpaqIFRrInko1FHXjTRpjWRuWfA==
+  dependencies:
+    "@jest/types" "^27.0.6"
     camelcase "^6.2.0"
     chalk "^4.0.0"
-    jest-get-type "^27.0.1"
+    jest-get-type "^27.0.6"
     leven "^3.1.0"
-    pretty-format "^27.0.2"
+    pretty-format "^27.0.6"
 
-jest-watcher@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-27.0.2.tgz#dab5f9443e2d7f52597186480731a8c6335c5deb"
-  integrity sha512-8nuf0PGuTxWj/Ytfw5fyvNn/R80iXY8QhIT0ofyImUvdnoaBdT6kob0GmhXR+wO+ALYVnh8bQxN4Tjfez0JgkA==
+jest-watcher@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-27.0.6.tgz#89526f7f9edf1eac4e4be989bcb6dec6b8878d9c"
+  integrity sha512-/jIoKBhAP00/iMGnTwUBLgvxkn7vsOweDrOTSPzc7X9uOyUtJIDthQBTI1EXz90bdkrxorUZVhJwiB69gcHtYQ==
   dependencies:
-    "@jest/test-result" "^27.0.2"
-    "@jest/types" "^27.0.2"
+    "@jest/test-result" "^27.0.6"
+    "@jest/types" "^27.0.6"
     "@types/node" "*"
     ansi-escapes "^4.2.1"
     chalk "^4.0.0"
-    jest-util "^27.0.2"
+    jest-util "^27.0.6"
     string-length "^4.0.1"
 
-jest-worker@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.0.2.tgz#4ebeb56cef48b3e7514552f80d0d80c0129f0b05"
-  integrity sha512-EoBdilOTTyOgmHXtw/cPc+ZrCA0KJMrkXzkrPGNwLmnvvlN1nj7MPrxpT7m+otSv2e1TLaVffzDnE/LB14zJMg==
+jest-worker@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-27.0.6.tgz#a5fdb1e14ad34eb228cfe162d9f729cdbfa28aed"
+  integrity sha512-qupxcj/dRuA3xHPMUd40gr2EaAurFbkwzOh7wfPaeE9id7hyjURRQoqNfHifHK3XjJU6YJJUQKILGUnwGPEOCA==
   dependencies:
     "@types/node" "*"
     merge-stream "^2.0.0"
     supports-color "^8.0.0"
 
-jest@27.0.1:
-  version "27.0.1"
-  resolved "https://registry.yarnpkg.com/jest/-/jest-27.0.1.tgz#d3822f0904f3bbe884bea393cede2be2aa290d0e"
-  integrity sha512-lFEoUdXjbGAIxk/gZhcv98xOaH1hjqG5R/PQHs5GBfIK5iL3tnXCjHQf4HQLVZZ2rcXML3oeVg9+XrRZbooBdQ==
+jest@27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/jest/-/jest-27.0.6.tgz#10517b2a628f0409087fbf473db44777d7a04505"
+  integrity sha512-EjV8aETrsD0wHl7CKMibKwQNQc3gIRBXlTikBmmHUeVMKaPFxdcUIBfoDqTSXDoGJIivAYGqCWVlzCSaVjPQsA==
   dependencies:
-    "@jest/core" "^27.0.1"
+    "@jest/core" "^27.0.6"
     import-local "^3.0.2"
-    jest-cli "^27.0.1"
+    jest-cli "^27.0.6"
 
 js-tokens@^4.0.0:
   version "4.0.0"
@@ -5982,6 +6016,13 @@ locate-path@^5.0.0:
   dependencies:
     p-locate "^4.1.0"
 
+locate-path@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286"
+  integrity sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==
+  dependencies:
+    p-locate "^5.0.0"
+
 lodash._reinterpolate@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz#0ccf2d89166af03b3663c796538b75ac6e114d9d"
@@ -6226,6 +6267,24 @@ memorystream@^0.3.1:
   resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2"
   integrity sha1-htcJCzDORV1j+64S3aUaR93K+bI=
 
+meow@^10.0.1:
+  version "10.0.1"
+  resolved "https://registry.yarnpkg.com/meow/-/meow-10.0.1.tgz#3252e728f4d8603ecae3a5b6460aaae4aea44ae0"
+  integrity sha512-65vCCdUI8wS5upK24fDFo25FcViNExdTGAR/vaWN4E6fXsWQ8fGdbkjCWp3nDTuJMlIYuEoAEMiB2/b81DBJjg==
+  dependencies:
+    "@types/minimist" "^1.2.1"
+    camelcase-keys "^6.2.2"
+    decamelize "^5.0.0"
+    decamelize-keys "^1.1.0"
+    hard-rejection "^2.1.0"
+    minimist-options "4.1.0"
+    normalize-package-data "^3.0.2"
+    read-pkg-up "^8.0.0"
+    redent "^4.0.0"
+    trim-newlines "^4.0.1"
+    type-fest "^1.0.2"
+    yargs-parser "^20.2.7"
+
 meow@^3.3.0:
   version "3.7.0"
   resolved "https://registry.yarnpkg.com/meow/-/meow-3.7.0.tgz#72cb668b425228290abbfa856892587308a801fb"
@@ -6242,23 +6301,6 @@ meow@^3.3.0:
     redent "^1.0.0"
     trim-newlines "^1.0.0"
 
-meow@^6.1.1:
-  version "6.1.1"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-6.1.1.tgz#1ad64c4b76b2a24dfb2f635fddcadf320d251467"
-  integrity sha512-3YffViIt2QWgTy6Pale5QpopX/IvU3LPL03jOTqp6pGj3VjesdO/U8CuHMKpnQr4shCNCM5fd5XFFvIIl6JBHg==
-  dependencies:
-    "@types/minimist" "^1.2.0"
-    camelcase-keys "^6.2.2"
-    decamelize-keys "^1.1.0"
-    hard-rejection "^2.1.0"
-    minimist-options "^4.0.2"
-    normalize-package-data "^2.5.0"
-    read-pkg-up "^7.0.1"
-    redent "^3.0.0"
-    trim-newlines "^3.0.0"
-    type-fest "^0.13.1"
-    yargs-parser "^18.1.3"
-
 meow@^8.0.0:
   version "8.1.2"
   resolved "https://registry.yarnpkg.com/meow/-/meow-8.1.2.tgz#bcbe45bda0ee1729d350c03cffc8395a36c4e897"
@@ -6330,7 +6372,7 @@ mimic-fn@^2.1.0:
   resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
   integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==
 
-min-indent@^1.0.0:
+min-indent@^1.0.0, min-indent@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869"
   integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==
@@ -6342,7 +6384,7 @@ minimatch@^3.0.0, minimatch@^3.0.4:
   dependencies:
     brace-expansion "^1.1.7"
 
-minimist-options@4.1.0, minimist-options@^4.0.2:
+minimist-options@4.1.0:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-4.1.0.tgz#c0655713c53a8a2ebd77ffa247d342c40f010619"
   integrity sha512-Q4r8ghd80yhO/0j1O3B2BjweX3fiHg9cdOwjJd2J76Q135c+NDxGCqdYKQ1SKBuFfgWbAUzBfvYjPUEeNgqN1A==
@@ -7020,6 +7062,13 @@ p-limit@^2.2.0:
   dependencies:
     p-try "^2.0.0"
 
+p-limit@^3.0.2:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b"
+  integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==
+  dependencies:
+    yocto-queue "^0.1.0"
+
 p-locate@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-2.0.0.tgz#20a0103b222a70c8fd39cc2e580680f3dde5ec43"
@@ -7034,6 +7083,13 @@ p-locate@^4.1.0:
   dependencies:
     p-limit "^2.2.0"
 
+p-locate@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-5.0.0.tgz#83c8315c6785005e3bd021839411c9e110e6d834"
+  integrity sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==
+  dependencies:
+    p-limit "^3.0.2"
+
 p-map-series@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/p-map-series/-/p-map-series-2.1.0.tgz#7560d4c452d9da0c07e692fdbfe6e2c81a2a91f2"
@@ -7168,7 +7224,7 @@ parse-json@^4.0.0:
     error-ex "^1.3.1"
     json-parse-better-errors "^1.0.1"
 
-parse-json@^5.0.0:
+parse-json@^5.0.0, parse-json@^5.2.0:
   version "5.2.0"
   resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-5.2.0.tgz#c76fc66dee54231c962b22bcc8a72cf2f99753cd"
   integrity sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==
@@ -7408,12 +7464,12 @@ pretty-format@^26.0.0, pretty-format@^26.6.2:
     ansi-styles "^4.0.0"
     react-is "^17.0.1"
 
-pretty-format@^27.0.2:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.0.2.tgz#9283ff8c4f581b186b2d4da461617143dca478a4"
-  integrity sha512-mXKbbBPnYTG7Yra9qFBtqj+IXcsvxsvOBco3QHxtxTl+hHKq6QdzMZ+q0CtL4ORHZgwGImRr2XZUX2EWzORxig==
+pretty-format@^27.0.6:
+  version "27.0.6"
+  resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.0.6.tgz#ab770c47b2c6f893a21aefc57b75da63ef49a11f"
+  integrity sha512-8tGD7gBIENgzqA+UBzObyWqQ5B778VIFZA/S66cclyd5YkFLYs2Js7gxDKf0MXtTc9zcS7t1xhdfcElJ3YIvkQ==
   dependencies:
-    "@jest/types" "^27.0.2"
+    "@jest/types" "^27.0.6"
     ansi-regex "^5.0.0"
     ansi-styles "^5.0.0"
     react-is "^17.0.1"
@@ -7623,6 +7679,15 @@ read-pkg-up@^7.0.1:
     read-pkg "^5.2.0"
     type-fest "^0.8.1"
 
+read-pkg-up@^8.0.0:
+  version "8.0.0"
+  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-8.0.0.tgz#72f595b65e66110f43b052dd9af4de6b10534670"
+  integrity sha512-snVCqPczksT0HS2EC+SxUndvSzn6LRCwpfSvLrIfR5BKDQQZMaI6jPRC9dYvYFDRAuFEAnkwww8kBBNE/3VvzQ==
+  dependencies:
+    find-up "^5.0.0"
+    read-pkg "^6.0.0"
+    type-fest "^1.0.1"
+
 read-pkg@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-1.1.0.tgz#f5ffaa5ecd29cb31c0474bca7d756b6bb29e3f28"
@@ -7651,6 +7716,16 @@ read-pkg@^5.2.0:
     parse-json "^5.0.0"
     type-fest "^0.6.0"
 
+read-pkg@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-6.0.0.tgz#a67a7d6a1c2b0c3cd6aa2ea521f40c458a4a504c"
+  integrity sha512-X1Fu3dPuk/8ZLsMhEj5f4wFAF0DWoK7qhGJvgaijocXxBmSToKfbFtqbxMO7bVjNA1dmE5huAzjXj/ey86iw9Q==
+  dependencies:
+    "@types/normalize-package-data" "^2.4.0"
+    normalize-package-data "^3.0.2"
+    parse-json "^5.2.0"
+    type-fest "^1.0.1"
+
 read@1, read@~1.0.1:
   version "1.0.7"
   resolved "https://registry.yarnpkg.com/read/-/read-1.0.7.tgz#b3da19bd052431a97671d44a42634adf710b40c4"
@@ -7722,6 +7797,14 @@ redent@^3.0.0:
     indent-string "^4.0.0"
     strip-indent "^3.0.0"
 
+redent@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/redent/-/redent-4.0.0.tgz#0c0ba7caabb24257ab3bb7a4fd95dd1d5c5681f9"
+  integrity sha512-tYkDkVVtYkSVhuQ4zBgfvciymHaeuel+zFKXShfDnFP5SyVEP7qo70Rf1jTOTCx3vGNAbnEi/xFkcfQVMIBWag==
+  dependencies:
+    indent-string "^5.0.0"
+    strip-indent "^4.0.0"
+
 reduce-flatten@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/reduce-flatten/-/reduce-flatten-2.0.0.tgz#734fd84e65f375d7ca4465c69798c25c9d10ae27"
@@ -7928,12 +8011,12 @@ run-parallel@^1.1.9:
   dependencies:
     queue-microtask "^1.2.2"
 
-rxjs@5.5.11:
-  version "5.5.11"
-  resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-5.5.11.tgz#f733027ca43e3bec6b994473be4ab98ad43ced87"
-  integrity sha512-3bjO7UwWfA2CV7lmwYMBzj4fQ6Cq+ftHc2MvUe+WMS7wcdJ1LosDWmdjPQanYp2dBRj572p7PeU81JUxHKOcBA==
+rxjs@7.2.0:
+  version "7.2.0"
+  resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-7.2.0.tgz#5cd12409639e9514a71c9f5f9192b2c4ae94de31"
+  integrity sha512-aX8w9OpKrQmiPKfT1bqETtUr9JygIz6GZ+gql8v7CijClsP0laoFUdKzxFAoWuRdSlOdU2+crss+cMf+cqMTnw==
   dependencies:
-    symbol-observable "1.0.1"
+    tslib "~2.1.0"
 
 rxjs@^6.6.0:
   version "6.6.7"
@@ -8483,6 +8566,13 @@ strip-indent@^3.0.0:
   dependencies:
     min-indent "^1.0.0"
 
+strip-indent@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-4.0.0.tgz#b41379433dd06f5eae805e21d631e07ee670d853"
+  integrity sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA==
+  dependencies:
+    min-indent "^1.0.1"
+
 strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
@@ -8541,11 +8631,6 @@ sver-compat@^1.5.0:
     es6-iterator "^2.0.1"
     es6-symbol "^3.1.1"
 
-symbol-observable@1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/symbol-observable/-/symbol-observable-1.0.1.tgz#8340fc4702c3122df5d22288f88283f513d3fdd4"
-  integrity sha1-g0D8RwLDEi310iKI+IKD9RPT/dQ=
-
 symbol-tree@^3.2.4:
   version "3.2.4"
   resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.4.tgz#430637d248ba77e078883951fb9aa0eed7c63fa2"
@@ -8793,6 +8878,11 @@ trim-newlines@^3.0.0:
   resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144"
   integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw==
 
+trim-newlines@^4.0.1:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.0.2.tgz#d6aaaf6a0df1b4b536d183879a6b939489808c7c"
+  integrity sha512-GJtWyq9InR/2HRiLZgpIKv+ufIKrVrvjQWEj7PxAXNc5dwbNJkqhAUoAGgzRmULAnoOM5EIpveYd3J2VeSAIew==
+
 trim-off-newlines@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/trim-off-newlines/-/trim-off-newlines-1.0.1.tgz#9f9ba9d9efa8764c387698bcbfeb2c848f11adb3"
@@ -8830,7 +8920,7 @@ ts-node@10.0.0:
     source-map-support "^0.5.17"
     yn "3.1.1"
 
-tslib@^1.8.1, tslib@^1.9.0, tslib@^1.9.3:
+tslib@^1.8.1, tslib@^1.9.0:
   version "1.14.1"
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
   integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
@@ -8840,6 +8930,11 @@ tslib@^2.3.0:
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.0.tgz#803b8cdab3e12ba581a4ca41c8839bbb0dacb09e"
   integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==
 
+tslib@~2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
+  integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==
+
 tsutils@^3.17.1, tsutils@^3.21.0:
   version "3.21.0"
   resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
@@ -8878,11 +8973,6 @@ type-detect@4.0.8:
   resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c"
   integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==
 
-type-fest@^0.13.1:
-  version "0.13.1"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934"
-  integrity sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==
-
 type-fest@^0.18.0:
   version "0.18.1"
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.18.1.tgz#db4bc151a4a2cf4eebf9add5db75508db6cc841f"
@@ -8913,6 +9003,11 @@ type-fest@^0.8.1:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
   integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==
 
+type-fest@^1.0.1, type-fest@^1.0.2:
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-1.2.2.tgz#1930bc36b2064f7ab4aa307a6d1b65965199c698"
+  integrity sha512-pfkPYCcuV0TJoo/jlsUeWNV8rk7uMU6ocnYNvca1Vu+pyKi8Rl8Zo2scPt9O72gCsXIm+dMxOOWuA3VFDSdzWA==
+
 type@^1.0.1:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/type/-/type-1.2.0.tgz#848dd7698dafa3e54a6c479e759c4bc3f18847a0"
@@ -9127,10 +9222,10 @@ v8-compile-cache@^2.0.3:
   resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee"
   integrity sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==
 
-v8-to-istanbul@^7.0.0:
-  version "7.1.2"
-  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-7.1.2.tgz#30898d1a7fa0c84d225a2c1434fb958f290883c1"
-  integrity sha512-TxNb7YEUwkLXCQYeudi6lgQ/SZrzNO4kMdlqVxaZPUIUjCv6iSSypUQX70kNBSERpQ8fk48+d61FXk+tgqcWow==
+v8-to-istanbul@^8.0.0:
+  version "8.0.0"
+  resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-8.0.0.tgz#4229f2a99e367f3f018fa1d5c2b8ec684667c69c"
+  integrity sha512-LkmXi8UUNxnCC+JlH7/fsfsKr5AU110l+SYGJimWNkWhxbN5EyeOtm1MJ0hhvqMMOhGwBj1Fp70Yv9i+hX0QAg==
   dependencies:
     "@types/istanbul-lib-coverage" "^2.0.1"
     convert-source-map "^1.6.0"
@@ -9507,13 +9602,10 @@ yargs-parser@20.x, yargs-parser@^20.2.2, yargs-parser@^20.2.3:
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
   integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
 
-yargs-parser@^18.1.3:
-  version "18.1.3"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
-  integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
-  dependencies:
-    camelcase "^5.0.0"
-    decamelize "^1.2.0"
+yargs-parser@^20.2.7:
+  version "20.2.9"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
+  integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==
 
 yargs-parser@^5.0.1:
   version "5.0.1"
@@ -9559,3 +9651,8 @@ yn@3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
   integrity sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==
+
+yocto-queue@^0.1.0:
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b"
+  integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==

From e8f14ec6d48b7bd8b6bc85b4b683e1c7a758ebe6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 15 Jul 2021 14:20:44 +0200
Subject: [PATCH 559/719] ARROW-13323: [Archery] Validate docker compose
 configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Will create a JIRA for it.

Closes #10692 from kszucs/compose-config-validateion

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .github/workflows/archery.yml     |  2 +-
 dev/archery/archery/docker/cli.py | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 65ae59713a4..66cd04a37c9 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -62,7 +62,7 @@ jobs:
         working-directory: dev/archery
         run: pytest -v archery
       - name: Archery Docker Validation
-        run: archery docker
+        run: archery docker check-config
       - name: Crossbow Check Config
         working-directory: dev/tasks
         run: archery crossbow check-config
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 8d3c64dd1de..e94493af48a 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -68,6 +68,16 @@ def docker(obj, src, dry_run):
     obj['compose'] = compose
 
 
+@docker.command("check-config")
+@click.pass_obj
+def check_config(obj):
+    """
+    Validate docker-compose configuration.
+    """
+    # executes the body of the docker function above which does the validation
+    # during the configuration loading
+
+
 @docker.command('build')
 @click.argument('image')
 @click.option('--force-pull/--no-pull', default=True,

From 5a86d5342e122724e0742466de6e5c3c748d95dd Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 15 Jul 2021 08:23:36 -0400
Subject: [PATCH 560/719] ARROW-13315: [R] Wrap r_task_group includes with
 ARROW_R_WITH_ARROW checking

Also adds a CI job that checks this on pull requests (only when r/src/* are changed). This additional job is quick (~4 min) and will hopefully catch these more quickly than our crossbow jobs do.

Closes #10707 from jonkeane/ARROW-13315-arrow-without-arrow

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 .github/workflows/r-without-arrow.yml | 92 +++++++++++++++++++++++++++
 r/src/r_task_group.h                  |  4 ++
 2 files changed, 96 insertions(+)
 create mode 100644 .github/workflows/r-without-arrow.yml

diff --git a/.github/workflows/r-without-arrow.yml b/.github/workflows/r-without-arrow.yml
new file mode 100644
index 00000000000..309c6ece5d0
--- /dev/null
+++ b/.github/workflows/r-without-arrow.yml
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: R without Arrow
+
+on:
+  push:
+    paths:
+      - ".github/workflows/r-without-arrow.yml"
+      - "r/src/**"
+  pull_request:
+    paths:
+      - ".github/workflows/r-without-arrow.yml"
+      - "r/src/**"
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+env:
+  DOCKER_VOLUME_PREFIX: ".docker/"
+  ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
+  ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
+
+jobs:
+  bundled:
+    name: "R package without arrow"
+    runs-on: ubuntu-latest
+    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+    env:
+      R_ORG: rhub
+      R_IMAGE: ubuntu-gcc-release
+      R_TAG: latest
+    steps:
+      - name: Checkout Arrow
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Fetch Submodules and Tags
+        run: ci/scripts/util_checkout.sh
+      - name: Free Up Disk Space
+        run: ci/scripts/util_cleanup.sh
+      - name: Cache Docker Volumes
+        uses: actions/cache@v2
+        with:
+          path: .docker
+          key: ubuntu-gcc-release-r-${{ hashFiles('cpp/**') }}
+          restore-keys: ubuntu-gcc-release-r-
+      - name: Setup Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.8
+      - name: Setup Archery
+        run: pip install -e dev/archery[docker]
+      - name: Execute Docker Build
+        run: |
+          sudo sysctl -w kernel.core_pattern="core.%e.%p"
+          ulimit -c unlimited
+          archery docker run -e LIBARROW_DOWNLOAD=FALSE -e LIBARROW_BUILD=FALSE -e TEST_R_WITH_ARROW=FALSE -e NOT_CRAN=FALSE r
+      - name: Dump install logs
+        run: cat r/check/arrow.Rcheck/00install.out
+        if: always()
+      - name: Dump test logs
+        run: cat r/check/arrow.Rcheck/tests/testthat.Rout*
+        if: always()
+      - name: Save the test output
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: test-output
+          path: r/check/arrow.Rcheck/tests/testthat.Rout*
+      - name: Docker Push
+        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
+        continue-on-error: true
+        run: archery docker push r
diff --git a/r/src/r_task_group.h b/r/src/r_task_group.h
index e1c298b27fc..723251cd9db 100644
--- a/r/src/r_task_group.h
+++ b/r/src/r_task_group.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#if defined(ARROW_R_WITH_ARROW)
+
 #include <arrow/util/parallel.h>
 #include <arrow/util/task_group.h>
 
@@ -49,3 +51,5 @@ class RTasks {
 
 }  // namespace r
 }  // namespace arrow
+
+#endif

From c56b6e39d241018f7a427bafbf8114294fa320d7 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 15 Jul 2021 08:26:27 -0400
Subject: [PATCH 561/719] ARROW-13215: [R] [CI] Add ENV TZ to docker files

Closes #10703 from jonkeane/ARROW-13215-tz-in-docker

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 .env                                    | 1 +
 .github/workflows/r.yml                 | 9 +++++++--
 ci/docker/linux-apt-r.dockerfile        | 3 +++
 ci/docker/linux-r.dockerfile            | 3 +++
 docker-compose.yml                      | 5 +++++
 r/tests/testthat/test-Array.R           | 4 ++--
 r/tests/testthat/test-data-type.R       | 2 +-
 r/tests/testthat/test-dplyr-lubridate.R | 2 +-
 r/tests/testthat/test-python.R          | 2 +-
 9 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/.env b/.env
index 579895e2d61..0af36084bd7 100644
--- a/.env
+++ b/.env
@@ -66,6 +66,7 @@ GCC_VERSION=""
 R_ORG=rhub
 R_IMAGE=ubuntu-gcc-release
 R_TAG=latest
+TZ=UTC
 # -1 does not attempt to install a devtoolset version, any positive integer will install devtoolset-n
 DEVTOOLSET_VERSION=-1
 
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index e1647807cef..d7e3b67bc4b 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -89,7 +89,9 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run ubuntu-r
+          # Setting a non-default and non-probable Marquesas French Polynesia time
+          # it has both with a .45 offset and very very few people who live there.
+          archery docker run -e TZ=MART ubuntu-r
       - name: Dump install logs
         run: cat r/check/arrow.Rcheck/00install.out
         if: always()
@@ -147,7 +149,10 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run r
+          # Don't set a TZ here to test that case. These builds will have the following warning in them:
+          #   System has not been booted with systemd as init system (PID 1). Can't operate.
+          #   Failed to connect to bus: Host is down
+          archery docker run -e TZ="" r
       - name: Dump install logs
         run: cat r/check/arrow.Rcheck/00install.out
         if: always()
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
index 36f4fb24aba..97029ce62ad 100644
--- a/ci/docker/linux-apt-r.dockerfile
+++ b/ci/docker/linux-apt-r.dockerfile
@@ -19,6 +19,9 @@ ARG base
 FROM ${base}
 ARG arch
 
+ARG tz="UTC"
+ENV TZ=${tz}
+
 # Build R
 # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04
 # [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran
diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile
index ac414829d42..a501d69955c 100644
--- a/ci/docker/linux-r.dockerfile
+++ b/ci/docker/linux-r.dockerfile
@@ -30,6 +30,9 @@ ENV ARROW_R_DEV=${r_dev}
 ARG devtoolset_version=-1
 ENV DEVTOOLSET_VERSION=${devtoolset_version}
 
+ARG tz="UTC"
+ENV TZ=${tz}
+
 # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile)
 ENV PATH "${RPREFIX}/bin:${PATH}"
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 6b435e8da5d..c842ee9f0ea 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1011,6 +1011,7 @@ services:
         r: ${R}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
         gcc_version: ${GCC_VERSION}
+        tz: ${TZ}
     shm_size: *shm-size
     environment:
       <<: *ccache
@@ -1048,6 +1049,7 @@ services:
         base: ${R_ORG}/${R_IMAGE}:${R_TAG}
         r_dev: ${ARROW_R_DEV}
         devtoolset_version: ${DEVTOOLSET_VERSION}
+        tz: ${TZ}
     shm_size: *shm-size
     environment:
       LIBARROW_DOWNLOAD: "false"
@@ -1078,6 +1080,7 @@ services:
       args:
         base: wch1/r-debug:latest
         r_bin: RDsan
+        tz: ${TZ}
     environment:
       <<: *ccache
     volumes: *ubuntu-volumes
@@ -1099,6 +1102,7 @@ services:
       args:
         base: wch1/r-debug:latest
         r_bin: RDvalgrind
+        tz: ${TZ}
     environment:
       <<: *ccache
       ARROW_R_DEV: ${ARROW_R_DEV}
@@ -1123,6 +1127,7 @@ services:
       args:
         base: rstudio/r-base:4.0-focal
         r_dev: ${ARROW_R_DEV}
+        tz: ${TZ}
     shm_size: *shm-size
     environment:
       LIBARROW_DOWNLOAD: "true"
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index a86bdf0add4..63ac64eee5f 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -279,9 +279,9 @@ test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", {
   # Write a feather file as that's what the initial bug report used
   df <- tibble::tibble(
     no_tz = lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10,
-    yes_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Asia/Pyongyang") + 1:10
+    yes_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas") + 1:10
   )
-  if (!identical(Sys.timezone(), "Asia/Pyongyang")) {
+  if (!identical(Sys.timezone(), "Pacific/Marquesas")) {
     # Confirming that the columns are in fact different
     expect_false(any(df$no_tz == df$yes_tz))
   }
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index 25c0dd5fc9f..84c75451eaa 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -413,7 +413,7 @@ test_that("FixedSizeBinary", {
 })
 
 test_that("DataType to C-interface", {
-  datatype <- timestamp("ms", timezone = "Asia/Pyongyang")
+  datatype <- timestamp("ms", timezone = "Pacific/Marquesas")
 
   # export the datatype via the C-interface
   ptr <- allocate_arrow_schema()
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
index d01afc86fef..2d9cfe6ea52 100644
--- a/r/tests/testthat/test-dplyr-lubridate.R
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -32,7 +32,7 @@ test_df <- tibble::tibble(date = test_date)
 # We can support this feature after ARROW-12980 is merged
 test_that("timezone aware timestamps are not supported", {
 
-  tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "Asia/Pyongyang")
+  tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "Pacific/Marquesas")
   tz_aware_df <- tibble::tibble(date = tz_aware_date)
 
   expect_error(
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index c7bedc518ef..7efc2b28715 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -97,7 +97,7 @@ test_that("Table with metadata roundtrip", {
 })
 
 test_that("DataType roundtrip", {
-  r <- timestamp("ms", timezone = "Asia/Pyongyang")
+  r <- timestamp("ms", timezone = "Pacific/Marquesas")
   py <- reticulate::r_to_py(r)
   expect_s3_class(py, "pyarrow.lib.DataType")
   expect_equal(reticulate::py_to_r(py), r)

From b1f90c48bc39452c9c236a988b4bbd38fea96bae Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 15 Jul 2021 21:20:49 +0800
Subject: [PATCH 562/719] ARROW-13253: [FlightRPC][C++] Fix segfault with large
 messages

We can't report errors during serialization - gRPC will just trip an assert. Instead, move these checks into the layer above so we can report them to the client or server as appropriate.

Closes #10663 from lidavidm/arrow-13253

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Yibo Cai <yibo.cai@arm.com>
---
 cpp/src/arrow/flight/client.cc                | 14 ++--
 cpp/src/arrow/flight/flight_test.cc           | 53 ++++++++++++
 .../arrow/flight/serialization_internal.cc    | 81 ++++++++++---------
 cpp/src/arrow/flight/serialization_internal.h | 20 ++---
 cpp/src/arrow/flight/server.cc                | 20 ++---
 cpp/src/arrow/flight/test_util.cc             | 40 +++++++++
 cpp/src/arrow/flight/test_util.h              |  3 +
 cpp/src/arrow/flight/types.cc                 | 15 ++++
 cpp/src/arrow/flight/types.h                  |  3 +
 python/pyarrow/tests/test_flight.py           | 73 +++++++++++++++++
 10 files changed, 260 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index 84fc4a28e92..f9728f849ad 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -688,11 +688,12 @@ class GrpcStreamWriter : public FlightStreamWriter {
   Status WriteMetadata(std::shared_ptr<Buffer> app_metadata) override {
     FlightPayload payload{};
     payload.app_metadata = app_metadata;
-    if (!internal::WritePayload(payload, writer_->stream().get())) {
+    auto status = internal::WritePayload(payload, writer_->stream().get());
+    if (status.IsIOError()) {
       return writer_->Finish(MakeFlightError(FlightStatusCode::Internal,
                                              "Could not write metadata to stream"));
     }
-    return Status::OK();
+    return status;
   }
 
   Status WriteWithMetadata(const RecordBatch& batch,
@@ -808,11 +809,12 @@ class DoPutPayloadWriter : public ipc::internal::IpcPayloadWriter {
       }
     }
 
-    if (!internal::WritePayload(payload, writer_->stream().get())) {
+    auto status = internal::WritePayload(payload, writer_->stream().get());
+    if (status.IsIOError()) {
       return writer_->Finish(MakeFlightError(FlightStatusCode::Internal,
                                              "Could not write record batch to stream"));
     }
-    return Status::OK();
+    return status;
   }
 
   Status Close() override {
@@ -850,10 +852,12 @@ Status GrpcStreamWriter<ProtoReadT, FlightReadT>::Open(
     // calls Begin() to send data, we'll send a redundant descriptor.
     FlightPayload payload{};
     RETURN_NOT_OK(internal::ToPayload(descriptor, &payload.descriptor));
-    if (!internal::WritePayload(payload, instance->writer_->stream().get())) {
+    auto status = internal::WritePayload(payload, instance->writer_->stream().get());
+    if (status.IsIOError()) {
       return writer->Finish(MakeFlightError(FlightStatusCode::Internal,
                                             "Could not write descriptor to stream"));
     }
+    RETURN_NOT_OK(status);
   }
   *out = std::move(instance);
   return Status::OK();
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index 8264f3e2197..56ca468a043 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -1478,6 +1478,59 @@ TEST_F(TestFlightClient, DoGetLargeBatch) {
   CheckDoGet(ticket, expected_batches);
 }
 
+TEST_F(TestFlightClient, FlightDataOverflowServerBatch) {
+  // Regression test for ARROW-13253
+  // N.B. this is rather a slow and memory-hungry test
+  {
+    // DoGet: check for overflow on large batch
+    Ticket ticket{"ARROW-13253-DoGet-Batch"};
+    std::unique_ptr<FlightStreamReader> stream;
+    ASSERT_OK(client_->DoGet(ticket, &stream));
+    FlightStreamChunk chunk;
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        stream->Next(&chunk));
+  }
+  {
+    // DoExchange: check for overflow on large batch from server
+    auto descr = FlightDescriptor::Command("large_batch");
+    std::unique_ptr<FlightStreamReader> reader;
+    std::unique_ptr<FlightStreamWriter> writer;
+    ASSERT_OK(client_->DoExchange(descr, &writer, &reader));
+    BatchVector batches;
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        reader->ReadAll(&batches));
+  }
+}
+
+TEST_F(TestFlightClient, FlightDataOverflowClientBatch) {
+  ASSERT_OK_AND_ASSIGN(auto batch, VeryLargeBatch());
+  {
+    // DoPut: check for overflow on large batch
+    std::unique_ptr<FlightStreamWriter> stream;
+    std::unique_ptr<FlightMetadataReader> reader;
+    auto descr = FlightDescriptor::Path({""});
+    ASSERT_OK(client_->DoPut(descr, batch->schema(), &stream, &reader));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        stream->WriteRecordBatch(*batch));
+    ASSERT_OK(stream->Close());
+  }
+  {
+    // DoExchange: check for overflow on large batch from client
+    auto descr = FlightDescriptor::Command("counter");
+    std::unique_ptr<FlightStreamReader> reader;
+    std::unique_ptr<FlightStreamWriter> writer;
+    ASSERT_OK(client_->DoExchange(descr, &writer, &reader));
+    ASSERT_OK(writer->Begin(batch->schema()));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Cannot send record batches exceeding 2GiB yet"),
+        writer->WriteRecordBatch(*batch));
+    ASSERT_OK(writer->Close());
+  }
+}
+
 TEST_F(TestFlightClient, DoExchange) {
   auto descr = FlightDescriptor::Command("counter");
   BatchVector batches;
diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc
index 8c6b737c7e5..36c6cc9e623 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -164,26 +164,18 @@ static const uint8_t kPaddingBytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 
 // Update the sizes of our Protobuf fields based on the given IPC payload.
 grpc::Status IpcMessageHeaderSize(const arrow::ipc::IpcPayload& ipc_msg, bool has_body,
-                                  size_t* body_size, size_t* header_size,
-                                  int32_t* metadata_size) {
-  DCHECK_LT(ipc_msg.metadata->size(), kInt32Max);
+                                  size_t* header_size, int32_t* metadata_size) {
+  DCHECK_LE(ipc_msg.metadata->size(), kInt32Max);
   *metadata_size = static_cast<int32_t>(ipc_msg.metadata->size());
 
   // 1 byte for metadata tag
   *header_size += 1 + WireFormatLite::LengthDelimitedSize(*metadata_size);
 
-  for (const auto& buffer : ipc_msg.body_buffers) {
-    // Buffer may be null when the row length is zero, or when all
-    // entries are invalid.
-    if (!buffer) continue;
-
-    *body_size += static_cast<size_t>(BitUtil::RoundUpToMultipleOf8(buffer->size()));
-  }
-
   // 2 bytes for body tag
   if (has_body) {
     // We write the body tag in the header but not the actual body data
-    *header_size += 2 + WireFormatLite::LengthDelimitedSize(*body_size) - *body_size;
+    *header_size += 2 + WireFormatLite::LengthDelimitedSize(ipc_msg.body_length) -
+                    ipc_msg.body_length;
   }
 
   return grpc::Status::OK;
@@ -201,9 +193,7 @@ grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
   // Write the descriptor if present
   int32_t descriptor_size = 0;
   if (msg.descriptor != nullptr) {
-    if (msg.descriptor->size() > kInt32Max) {
-      return ToGrpcStatus(Status::CapacityError("Descriptor size overflow (>= 2**31)"));
-    }
+    DCHECK_LE(msg.descriptor->size(), kInt32Max);
     descriptor_size = static_cast<int32_t>(msg.descriptor->size());
     header_size += 1 + WireFormatLite::LengthDelimitedSize(descriptor_size);
   }
@@ -211,7 +201,7 @@ grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
   // App metadata tag if appropriate
   int32_t app_metadata_size = 0;
   if (msg.app_metadata && msg.app_metadata->size() > 0) {
-    DCHECK_LT(msg.app_metadata->size(), kInt32Max);
+    DCHECK_LE(msg.app_metadata->size(), kInt32Max);
     app_metadata_size = static_cast<int32_t>(msg.app_metadata->size());
     header_size += 1 + WireFormatLite::LengthDelimitedSize(app_metadata_size);
   }
@@ -223,15 +213,14 @@ grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
 
   if (has_ipc) {
     DCHECK(has_body || ipc_msg.body_length == 0);
-    GRPC_RETURN_NOT_GRPC_OK(IpcMessageHeaderSize(ipc_msg, has_body, &body_size,
-                                                 &header_size, &metadata_size));
+    GRPC_RETURN_NOT_GRPC_OK(
+        IpcMessageHeaderSize(ipc_msg, has_body, &header_size, &metadata_size));
+    body_size = static_cast<size_t>(ipc_msg.body_length);
   }
 
   // TODO(wesm): messages over 2GB unlikely to be yet supported
-  if (body_size > kInt32Max) {
-    return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
-                        "Cannot send record batches exceeding 2GB yet");
-  }
+  // Validated in WritePayload since returning error here causes gRPC to fail an assertion
+  DCHECK_LE(body_size, kInt32Max);
 
   // Allocate and initialize slices
   std::vector<grpc::Slice> slices;
@@ -404,32 +393,48 @@ ::arrow::Result<std::unique_ptr<ipc::Message>> FlightData::OpenMessage() {
 // pointer argument whichever way we want, including cast it back to the original type.
 // (see customize_protobuf.h).
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerWriter<pb::FlightData>* writer) {
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerWriter<pb::FlightData>* writer) {
+  RETURN_NOT_OK(payload.Validate());
   // Pretend to be pb::FlightData and intercept in SerializationTraits
-  return writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
-                       grpc::WriteOptions());
+  if (!writer->Write(*reinterpret_cast<const pb::FlightData*>(&payload),
+                     grpc::WriteOptions())) {
+    return Status::IOError("Could not write payload to stream");
+  }
+  return Status::OK();
 }
 
 bool ReadPayload(grpc::ClientReader<pb::FlightData>* reader, FlightData* data) {
diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h
index 2a75d6947ee..5f7d0cc487c 100644
--- a/cpp/src/arrow/flight/serialization_internal.h
+++ b/cpp/src/arrow/flight/serialization_internal.h
@@ -54,15 +54,17 @@ struct FlightData {
 };
 
 /// Write Flight message on gRPC stream with zero-copy optimizations.
-/// True is returned on success, false if some error occurred (connection closed?).
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer);
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer);
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer);
-bool WritePayload(const FlightPayload& payload,
-                  grpc::ServerWriter<pb::FlightData>* writer);
+// Returns Invalid if the payload is ill-formed
+// Returns IOError if gRPC did not write the message (note this is not
+// necessarily an error - the client may simply have gone away)
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* writer);
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ClientReaderWriter<pb::FlightData, pb::FlightData>* writer);
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerReaderWriter<pb::FlightData, pb::FlightData>* writer);
+Status WritePayload(const FlightPayload& payload,
+                    grpc::ServerWriter<pb::FlightData>* writer);
 
 /// Read Flight message from gRPC stream with zero-copy optimizations.
 /// True is returned on success, false if stream ended.
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index 8ed76e78da8..b52c1624657 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -336,10 +336,7 @@ class DoExchangeMessageWriter : public FlightMessageWriter {
 
  private:
   Status WritePayload(const FlightPayload& payload) {
-    if (!internal::WritePayload(payload, stream_)) {
-      // gRPC doesn't give us any way to find what the error was (if any).
-      return Status::IOError("Could not write payload to stream");
-    }
+    RETURN_NOT_OK(internal::WritePayload(payload, stream_));
     ++stats_.num_messages;
     return Status::OK();
   }
@@ -658,21 +655,24 @@ class FlightServiceImpl : public FlightService::Service {
     // Write the schema as the first message in the stream
     FlightPayload schema_payload;
     SERVICE_RETURN_NOT_OK(flight_context, data_stream->GetSchemaPayload(&schema_payload));
-    if (!internal::WritePayload(schema_payload, writer)) {
+    auto status = internal::WritePayload(schema_payload, writer);
+    if (status.IsIOError()) {
       // gRPC doesn't give any way for us to know why the message
       // could not be written.
       RETURN_WITH_MIDDLEWARE(flight_context, grpc::Status::OK);
     }
+    SERVICE_RETURN_NOT_OK(flight_context, status);
 
     // Consume data stream and write out payloads
     while (true) {
       FlightPayload payload;
       SERVICE_RETURN_NOT_OK(flight_context, data_stream->Next(&payload));
-      if (payload.ipc_message.metadata == nullptr ||
-          !internal::WritePayload(payload, writer))
-        // No more messages to write, or connection terminated for some other
-        // reason
-        break;
+      // End of stream
+      if (payload.ipc_message.metadata == nullptr) break;
+      auto status = internal::WritePayload(payload, writer);
+      // Connection terminated
+      if (status.IsIOError()) break;
+      SERVICE_RETURN_NOT_OK(flight_context, status);
     }
     RETURN_WITH_MIDDLEWARE(flight_context, grpc::Status::OK);
   }
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index 6136b4367c0..4e387f34b6c 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -212,6 +212,14 @@ class FlightTestServer : public FlightServerBase {
     if (request.ticket == "ARROW-5095-success") {
       return Status::OK();
     }
+    if (request.ticket == "ARROW-13253-DoGet-Batch") {
+      // Make batch > 2GiB in size
+      ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
+      ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchReader::Make({batch}));
+      *data_stream =
+          std::unique_ptr<FlightDataStream>(new RecordBatchStream(std::move(reader)));
+      return Status::OK();
+    }
 
     std::shared_ptr<RecordBatchReader> batch_reader;
     RETURN_NOT_OK(GetBatchForFlight(request, &batch_reader));
@@ -220,6 +228,12 @@ class FlightTestServer : public FlightServerBase {
     return Status::OK();
   }
 
+  Status DoPut(const ServerCallContext&, std::unique_ptr<FlightMessageReader> reader,
+               std::unique_ptr<FlightMetadataWriter> writer) override {
+    BatchVector batches;
+    return reader->ReadAll(&batches);
+  }
+
   Status DoExchange(const ServerCallContext& context,
                     std::unique_ptr<FlightMessageReader> reader,
                     std::unique_ptr<FlightMessageWriter> writer) override {
@@ -242,6 +256,8 @@ class FlightTestServer : public FlightServerBase {
       return RunExchangeTotal(std::move(reader), std::move(writer));
     } else if (cmd == "echo") {
       return RunExchangeEcho(std::move(reader), std::move(writer));
+    } else if (cmd == "large_batch") {
+      return RunExchangeLargeBatch(std::move(reader), std::move(writer));
     } else {
       return Status::NotImplemented("Scenario not implemented: ", cmd);
     }
@@ -401,6 +417,14 @@ class FlightTestServer : public FlightServerBase {
     return Status::OK();
   }
 
+  // Regression test for ARROW-13253
+  Status RunExchangeLargeBatch(std::unique_ptr<FlightMessageReader>,
+                               std::unique_ptr<FlightMessageWriter> writer) {
+    ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
+    RETURN_NOT_OK(writer->Begin(batch->schema()));
+    return writer->WriteRecordBatch(*batch);
+  }
+
   Status RunAction1(const Action& action, std::unique_ptr<ResultStream>* out) {
     std::vector<Result> results;
     for (int i = 0; i < 3; ++i) {
@@ -616,6 +640,22 @@ Status ExampleLargeBatches(BatchVector* out) {
   return Status::OK();
 }
 
+arrow::Result<std::shared_ptr<RecordBatch>> VeryLargeBatch() {
+  // In CI, some platforms don't let us allocate one very large
+  // buffer, so allocate a smaller buffer and repeat it a few times
+  constexpr int64_t nbytes = (1ul << 27ul) + 8ul;
+  constexpr int64_t nrows = nbytes / 8;
+  constexpr int64_t ncols = 16;
+  ARROW_ASSIGN_OR_RAISE(auto values, AllocateBuffer(nbytes));
+  std::memset(values->mutable_data(), 0x00, values->capacity());
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, std::move(values)};
+  auto array = std::make_shared<ArrayData>(int64(), nrows, buffers,
+                                           /*null_count=*/0);
+  std::vector<std::shared_ptr<ArrayData>> arrays(ncols, array);
+  std::vector<std::shared_ptr<Field>> fields(ncols, field("a", int64()));
+  return RecordBatch::Make(schema(std::move(fields)), nrows, std::move(arrays));
+}
+
 std::vector<ActionType> ExampleActionTypes() {
   return {{"drop", "drop a dataset"}, {"cache", "cache a dataset"}};
 }
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index fff82df3d2c..c912c342afe 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -163,6 +163,9 @@ Status ExampleNestedBatches(BatchVector* out);
 ARROW_FLIGHT_EXPORT
 Status ExampleLargeBatches(BatchVector* out);
 
+ARROW_FLIGHT_EXPORT
+arrow::Result<std::shared_ptr<RecordBatch>> VeryLargeBatch();
+
 ARROW_FLIGHT_EXPORT
 std::vector<FlightInfo> ExampleFlightInfo();
 
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 8139b213a82..313be122914 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -21,6 +21,7 @@
 #include <sstream>
 #include <utility>
 
+#include "arrow/buffer.h"
 #include "arrow/flight/serialization_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/dictionary.h"
@@ -126,6 +127,20 @@ std::string FlightDescriptor::ToString() const {
   return ss.str();
 }
 
+Status FlightPayload::Validate() const {
+  static constexpr int64_t kInt32Max = std::numeric_limits<int32_t>::max();
+  if (descriptor && descriptor->size() > kInt32Max) {
+    return Status::CapacityError("Descriptor size overflow (>= 2**31)");
+  }
+  if (app_metadata && app_metadata->size() > kInt32Max) {
+    return Status::CapacityError("app_metadata size overflow (>= 2**31)");
+  }
+  if (ipc_message.body_length > kInt32Max) {
+    return Status::Invalid("Cannot send record batches exceeding 2GiB yet");
+  }
+  return Status::OK();
+}
+
 Status SchemaResult::GetSchema(ipc::DictionaryMemo* dictionary_memo,
                                std::shared_ptr<Schema>* out) const {
   io::BufferReader schema_reader(raw_schema_);
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index cd37318ef1d..1e3051d5c29 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -341,6 +341,9 @@ struct ARROW_FLIGHT_EXPORT FlightPayload {
   std::shared_ptr<Buffer> descriptor;
   std::shared_ptr<Buffer> app_metadata;
   ipc::IpcPayload ipc_message;
+
+  /// \brief Check that the payload can be written to the wire.
+  Status Validate() const;
 };
 
 /// \brief Schema result returned after a schema request RPC
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 36b6d6610d2..1b838f54d47 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -209,6 +209,10 @@ def do_put(self, context, descriptor, reader, writer):
             assert self.expected_schema == reader.schema
         self.last_message = reader.read_all()
 
+    def do_exchange(self, context, descriptor, reader, writer):
+        for chunk in reader:
+            pass
+
 
 class EchoStreamFlightServer(EchoFlightServer):
     """An echo server that streams individual record batches."""
@@ -816,6 +820,23 @@ def sending_headers(self):
         return MultiHeaderClientMiddleware.EXPECTED
 
 
+class LargeMetadataFlightServer(FlightServerBase):
+    """Regression test for ARROW-13253."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._metadata = b' ' * (2 ** 31 + 1)
+
+    def do_get(self, context, ticket):
+        schema = pa.schema([('a', pa.int64())])
+        return flight.GeneratorStream(schema, [
+            (pa.record_batch([[1]], schema=schema), self._metadata),
+        ])
+
+    def do_exchange(self, context, descriptor, reader, writer):
+        writer.write_metadata(self._metadata)
+
+
 def test_flight_server_location_argument():
     locations = [
         None,
@@ -1903,3 +1924,55 @@ def test_never_sends_data():
         # up to a certain extent
         table = client.do_get(flight.Ticket(b'yield_data')).read_all()
         assert table.num_rows == 5
+
+
+@pytest.mark.large_memory
+@pytest.mark.slow
+def test_large_descriptor():
+    # Regression test for ARROW-13253. Placed here with appropriate marks
+    # since some CI pipelines can't run the C++ equivalent
+    large_descriptor = flight.FlightDescriptor.for_command(
+        b' ' * (2 ** 31 + 1))
+    with FlightServerBase() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(OSError,
+                           match="Failed to serialize Flight descriptor"):
+            writer, _ = client.do_put(large_descriptor, pa.schema([]))
+            writer.close()
+        with pytest.raises(pa.ArrowException,
+                           match="Failed to serialize Flight descriptor"):
+            client.do_exchange(large_descriptor)
+
+
+@pytest.mark.large_memory
+@pytest.mark.slow
+def test_large_metadata_client():
+    # Regression test for ARROW-13253
+    descriptor = flight.FlightDescriptor.for_command(b'')
+    metadata = b' ' * (2 ** 31 + 1)
+    with EchoFlightServer() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(pa.ArrowCapacityError,
+                           match="app_metadata size overflow"):
+            writer, _ = client.do_put(descriptor, pa.schema([]))
+            with writer:
+                writer.write_metadata(metadata)
+                writer.close()
+        with pytest.raises(pa.ArrowCapacityError,
+                           match="app_metadata size overflow"):
+            writer, reader = client.do_exchange(descriptor)
+            with writer:
+                writer.write_metadata(metadata)
+
+    del metadata
+    with LargeMetadataFlightServer() as server:
+        client = flight.connect(('localhost', server.port))
+        with pytest.raises(flight.FlightServerError,
+                           match="app_metadata size overflow"):
+            reader = client.do_get(flight.Ticket(b''))
+            reader.read_all()
+        with pytest.raises(pa.ArrowException,
+                           match="app_metadata size overflow"):
+            writer, reader = client.do_exchange(descriptor)
+            with writer:
+                reader.read_all()

From 3e7af175851a551de38be7cb882680319f198c2b Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Thu, 15 Jul 2021 16:24:36 +0200
Subject: [PATCH 563/719] ARROW-12667: [Python] Add a more complete test for
 strided numpy array conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10709 from amol-/ARROW-12667

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 python/pyarrow/tests/test_array.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 9f6ab678a95..57224ef4ebe 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2229,6 +2229,23 @@ def test_array_from_strided_bool():
     assert result.equals(expected)
 
 
+def test_array_from_strided():
+    pydata = [
+        ([b"ab", b"cd", b"ef"], (pa.binary(), pa.binary(2))),
+        ([1, 2, 3], (pa.int8(), pa.int16(), pa.int32(), pa.int64())),
+        ([1.0, 2.0, 3.0], (pa.float32(), pa.float64())),
+        (["ab", "cd", "ef"], (pa.utf8(), ))
+    ]
+
+    for values, dtypes in pydata:
+        nparray = np.array(values)
+        for patype in dtypes:
+            for mask in (None, np.array([False, False])):
+                arrow_array = pa.array(nparray[::2], patype,
+                                       mask=mask)
+                assert values[::2] == arrow_array.to_pylist()
+
+
 def test_boolean_true_count_false_count():
     # ARROW-9145
     arr = pa.array([True, True, None, False, None, True] * 1000)

From 84d6ed7784000511782297ef37a64cf143175505 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 15 Jul 2021 16:30:13 +0200
Subject: [PATCH 564/719] ARROW-13283: [Archery][Dev] Support passing
 CPU/memory limits to Docker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a fairly simplistic way to pass CPU/memory limits to Docker, with default values emulating GitHub actions. Note you'll likely need `ARCHERY_DOCKER_BIN="sudo docker"` unless you have rootless Docker configured with cgroups v2 (else Docker will silently ignore the limits).

Closes #10690 from lidavidm/arrow-13283

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/archery/archery/docker/cli.py             |  16 +-
 dev/archery/archery/docker/core.py            |  18 +-
 .../archery/docker/tests/test_docker.py       |  19 ++
 .../archery/docker/tests/test_docker_cli.py   | 201 ++++++++++++++++++
 dev/archery/archery/tests/test_cli.py         | 143 +------------
 docker-compose.yml                            |  16 ++
 6 files changed, 274 insertions(+), 139 deletions(-)
 create mode 100644 dev/archery/archery/docker/tests/test_docker_cli.py

diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index e94493af48a..01571b43c95 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -47,11 +47,12 @@ def _execute(self, *args, **kwargs):
 @click.option('--dry-run/--execute', default=False,
               help="Display the docker-compose commands instead of executing "
                    "them.")
-@click.pass_obj
-def docker(obj, src, dry_run):
+@click.pass_context
+def docker(ctx, src, dry_run):
     """
     Interact with docker-compose based builds.
     """
+    ctx.ensure_object(dict)
 
     config_path = src.path / 'docker-compose.yml'
     if not config_path.exists():
@@ -65,7 +66,7 @@ def docker(obj, src, dry_run):
     compose = DockerCompose(config_path, params=os.environ)
     if dry_run:
         _mock_compose_calls(compose)
-    obj['compose'] = compose
+    ctx.obj['compose'] = compose
 
 
 @docker.command("check-config")
@@ -155,12 +156,18 @@ def docker_build(obj, image, *, force_pull, using_docker_cli,
               help="Whether to use cache when building only the (leaf) image "
                    "passed as the argument. To disable caching for both the "
                    "image and its ancestors use --no-cache option.")
+@click.option('--resource-limit', default=None,
+              help="A CPU/memory limit preset to mimic CI environments like "
+                   "GitHub Actions. Implies --using-docker-cli. Note that "
+                   "exporting ARCHERY_DOCKER_BIN=\"sudo docker\" is likely "
+                   "required, unless Docker is configured with cgroups v2 "
+                   "(else Docker will silently ignore the limits).")
 @click.option('--volume', '-v', multiple=True,
               help="Set volume within the container")
 @click.pass_obj
 def docker_run(obj, image, command, *, env, user, force_pull, force_build,
                build_only, using_docker_cli, using_docker_buildx, use_cache,
-               use_leaf_cache, volume):
+               use_leaf_cache, resource_limit, volume):
     """
     Execute docker-compose builds.
 
@@ -213,6 +220,7 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
             env=env,
             user=user,
             using_docker=using_docker_cli,
+            resource_limit=resource_limit,
             volumes=volume
         )
     except UndefinedImage as e:
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
index 6d15b21c788..2fe6e353ccc 100644
--- a/dev/archery/archery/docker/core.py
+++ b/dev/archery/archery/docker/core.py
@@ -105,6 +105,7 @@ def _read_config(self, config_path, compose_bin):
 
         services = config['services'].keys()
         self.hierarchy = dict(flatten(config.get('x-hierarchy', {})))
+        self.limit_presets = config.get('x-limit-presets', {})
         self.with_gpus = config.get('x-with-gpus', [])
         nodes = self.hierarchy.keys()
         errors = []
@@ -316,7 +317,7 @@ def _build(service, use_cache):
         _build(service, use_cache=use_cache and use_leaf_cache)
 
     def run(self, service_name, command=None, *, env=None, volumes=None,
-            user=None, using_docker=False):
+            user=None, using_docker=False, resource_limit=None):
         service = self.config.get(service_name)
 
         args = []
@@ -331,7 +332,7 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
             for volume in volumes:
                 args.extend(['--volume', volume])
 
-        if using_docker or service['need_gpu']:
+        if using_docker or service['need_gpu'] or resource_limit:
             # use gpus, requires docker>=19.03
             if service['need_gpu']:
                 args.extend(['--gpus', 'all'])
@@ -354,6 +355,19 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
             if command in ['cmd.exe', 'bash', 'sh', 'powershell']:
                 args.append('-it')
 
+            if resource_limit:
+                limits = self.config.limit_presets.get(resource_limit)
+                if not limits:
+                    raise ValueError(
+                        f"Unknown resource limit preset '{resource_limit}'")
+                cpuset = limits.get('cpuset_cpus', [])
+                if cpuset:
+                    args.append(f'--cpuset-cpus={",".join(map(str, cpuset))}')
+                memory = limits.get('memory')
+                if memory:
+                    args.append(f'--memory={memory}')
+                    args.append(f'--memory-swap={memory}')
+
             # get the actual docker image name instead of the compose service
             # name which we refer as image in general
             args.append(service['image'])
diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py
index 09dcd27a713..982f3bfc189 100644
--- a/dev/archery/archery/docker/tests/test_docker.py
+++ b/dev/archery/archery/docker/tests/test_docker.py
@@ -128,6 +128,11 @@
       - ubuntu-ruby
   - ubuntu-cuda
 
+x-limit-presets:
+  github:
+    cpuset_cpus: [0, 1]
+    memory: 7g
+
 services:
   conda-cpp:
     image: org/conda-cpp
@@ -448,6 +453,20 @@ def test_compose_run(arrow_compose_path):
         compose.run('conda-python', volumes=volumes)
 
 
+def test_compose_run_with_resource_limits(arrow_compose_path):
+    expected_calls = [
+        format_run([
+            "--cpuset-cpus=0,1",
+            "--memory=7g",
+            "--memory-swap=7g",
+            "org/conda-cpp"
+        ]),
+    ]
+    compose = DockerCompose(arrow_compose_path)
+    with assert_docker_calls(compose, expected_calls):
+        compose.run('conda-cpp', resource_limit="github")
+
+
 def test_compose_push(arrow_compose_path):
     compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.8'))
     expected_env = PartialEnv(PYTHON="3.8")
diff --git a/dev/archery/archery/docker/tests/test_docker_cli.py b/dev/archery/archery/docker/tests/test_docker_cli.py
new file mode 100644
index 00000000000..ab39c7b9dbb
--- /dev/null
+++ b/dev/archery/archery/docker/tests/test_docker_cli.py
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from unittest.mock import patch
+
+from click.testing import CliRunner
+
+from archery.docker import DockerCompose
+from archery.docker.cli import docker
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_with_custom_command(run, build, pull):
+    # with custom command
+    args = ["run", "ubuntu-cpp", "bash"]
+    result = CliRunner().invoke(docker, args)
+
+    assert result.exit_code == 0
+    pull.assert_called_once_with(
+        "ubuntu-cpp", pull_leaf=True, using_docker=False
+    )
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=True,
+        use_leaf_cache=True,
+        using_docker=False,
+        using_buildx=False
+    )
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command="bash",
+        env={},
+        resource_limit=None,
+        user=None,
+        using_docker=False,
+        volumes=(),
+    )
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_options(run, build, pull):
+    # environment variables and volumes
+    args = [
+        "run",
+        "-e",
+        "ARROW_GANDIVA=OFF",
+        "-e",
+        "ARROW_FLIGHT=ON",
+        "--volume",
+        "./build:/build",
+        "-v",
+        "./ccache:/ccache:delegated",
+        "-u",
+        "root",
+        "ubuntu-cpp",
+    ]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    pull.assert_called_once_with(
+        "ubuntu-cpp", pull_leaf=True, using_docker=False
+    )
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=True,
+        use_leaf_cache=True,
+        using_docker=False,
+        using_buildx=False
+    )
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
+        resource_limit=None,
+        user="root",
+        using_docker=False,
+        volumes=(
+            "./build:/build",
+            "./ccache:/ccache:delegated",
+        ),
+    )
+
+
+@patch.object(DockerCompose, "run")
+def test_docker_limit_options(run):
+    # environment variables and volumes
+    args = [
+        "run",
+        "-e",
+        "ARROW_GANDIVA=OFF",
+        "-e",
+        "ARROW_FLIGHT=ON",
+        "--volume",
+        "./build:/build",
+        "-v",
+        "./ccache:/ccache:delegated",
+        "-u",
+        "root",
+        "--resource-limit=github",
+        "--no-build",
+        "--no-pull",
+        "ubuntu-cpp",
+    ]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
+        resource_limit="github",
+        user="root",
+        using_docker=False,
+        volumes=(
+            "./build:/build",
+            "./ccache:/ccache:delegated",
+        ),
+    )
+
+
+@patch.object(DockerCompose, "run")
+def test_docker_run_without_pulling_or_building(run):
+    args = ["run", "--no-pull", "--no-build", "ubuntu-cpp"]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={},
+        resource_limit=None,
+        user=None,
+        using_docker=False,
+        volumes=(),
+    )
+
+
+@patch.object(DockerCompose, "pull")
+@patch.object(DockerCompose, "build")
+def test_docker_run_only_pulling_and_building(build, pull):
+    args = ["run", "ubuntu-cpp", "--build-only"]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    pull.assert_called_once_with(
+        "ubuntu-cpp", pull_leaf=True, using_docker=False
+    )
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=True,
+        use_leaf_cache=True,
+        using_docker=False,
+        using_buildx=False
+    )
+
+
+@patch.object(DockerCompose, "build")
+@patch.object(DockerCompose, "run")
+def test_docker_run_without_build_cache(run, build):
+    args = [
+        "run",
+        "--no-pull",
+        "--force-build",
+        "--user",
+        "me",
+        "--no-cache",
+        "--no-leaf-cache",
+        "ubuntu-cpp",
+    ]
+    result = CliRunner().invoke(docker, args)
+    assert result.exit_code == 0
+    build.assert_called_once_with(
+        "ubuntu-cpp",
+        use_cache=False,
+        use_leaf_cache=False,
+        using_docker=False,
+        using_buildx=False
+    )
+    run.assert_called_once_with(
+        "ubuntu-cpp",
+        command=None,
+        env={},
+        resource_limit=None,
+        user="me",
+        using_docker=False,
+        volumes=(),
+    )
diff --git a/dev/archery/archery/tests/test_cli.py b/dev/archery/archery/tests/test_cli.py
index b3199dfaf1f..3891a2c288d 100644
--- a/dev/archery/archery/tests/test_cli.py
+++ b/dev/archery/archery/tests/test_cli.py
@@ -15,148 +15,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from pathlib import Path
 from unittest.mock import patch
 
 from click.testing import CliRunner
 
 from archery.cli import archery
-from archery.docker import DockerCompose
 
 
-@patch.object(DockerCompose, "pull")
-@patch.object(DockerCompose, "build")
-@patch.object(DockerCompose, "run")
-def test_docker_run_with_custom_command(run, build, pull):
-    # with custom command
-    args = ["docker", "run", "ubuntu-cpp", "bash"]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
-    )
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=True,
-        use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
-    )
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command="bash",
-        env={},
-        user=None,
-        using_docker=False,
-        volumes=(),
-    )
-
-
-@patch.object(DockerCompose, "pull")
-@patch.object(DockerCompose, "build")
-@patch.object(DockerCompose, "run")
-def test_docker_run_options(run, build, pull):
-    # environment variables and volumes
-    args = [
-        "docker",
-        "run",
-        "-e",
-        "ARROW_GANDIVA=OFF",
-        "-e",
-        "ARROW_FLIGHT=ON",
-        "--volume",
-        "./build:/build",
-        "-v",
-        "./ccache:/ccache:delegated",
-        "-u",
-        "root",
-        "ubuntu-cpp",
-    ]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
-    )
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=True,
-        use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
-    )
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command=None,
-        env={"ARROW_GANDIVA": "OFF", "ARROW_FLIGHT": "ON"},
-        user="root",
-        using_docker=False,
-        volumes=(
-            "./build:/build",
-            "./ccache:/ccache:delegated",
-        ),
-    )
-
-
-@patch.object(DockerCompose, "run")
-def test_docker_run_without_pulling_or_building(run):
-    args = ["docker", "run", "--no-pull", "--no-build", "ubuntu-cpp"]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command=None,
-        env={},
-        user=None,
-        using_docker=False,
-        volumes=(),
-    )
-
-
-@patch.object(DockerCompose, "pull")
-@patch.object(DockerCompose, "build")
-def test_docker_run_only_pulling_and_building(build, pull):
-    args = ["docker", "run", "ubuntu-cpp", "--build-only"]
-    result = CliRunner().invoke(archery, args)
-    assert result.exit_code == 0
-    pull.assert_called_once_with(
-        "ubuntu-cpp", pull_leaf=True, using_docker=False
-    )
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=True,
-        use_leaf_cache=True,
-        using_docker=False,
-        using_buildx=False
-    )
-
-
-@patch.object(DockerCompose, "build")
-@patch.object(DockerCompose, "run")
-def test_docker_run_without_build_cache(run, build):
+@patch("archery.linking.check_dynamic_library_dependencies")
+def test_linking_check_dependencies(fn):
     args = [
-        "docker",
-        "run",
-        "--no-pull",
-        "--force-build",
-        "--user",
-        "me",
-        "--no-cache",
-        "--no-leaf-cache",
-        "ubuntu-cpp",
+        "linking",
+        "check-dependencies",
+        "-a", "libarrow",
+        "-d", "libcurl",
+        "somelib.so"
     ]
     result = CliRunner().invoke(archery, args)
     assert result.exit_code == 0
-    build.assert_called_once_with(
-        "ubuntu-cpp",
-        use_cache=False,
-        use_leaf_cache=False,
-        using_docker=False,
-        using_buildx=False
-    )
-    run.assert_called_once_with(
-        "ubuntu-cpp",
-        command=None,
-        env={},
-        user="me",
-        using_docker=False,
-        volumes=(),
+    fn.assert_called_once_with(
+        Path('somelib.so'), allowed={'libarrow'}, disallowed={'libcurl'}
     )
diff --git a/docker-compose.yml b/docker-compose.yml
index c842ee9f0ea..93bba30772b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -62,6 +62,22 @@ x-ccache: &ccache
   CCACHE_MAXSIZE: 500M
   CCACHE_DIR: /ccache
 
+# CPU/memory limit presets to pass to Docker.
+#
+# Usage: archery docker run --resource-limit=github <image>
+#
+# Note that exporting ARCHERY_DOCKER_BIN="sudo docker" is likely required,
+# unless Docker is configured with cgroups v2 (else Docker will silently
+# ignore the limits).
+x-limit-presets:
+  # These values emulate GitHub Actions:
+  # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners
+  github:
+    # Note we use cpuset and not cpus since Ninja only detects and limits
+    # parallelism given the former
+    cpuset_cpus: [0, 1]
+    memory: 7g
+
 x-with-gpus:
   - ubuntu-cuda-cpp
   - ubuntu-cuda-python

From a628ee0bd3e5644cc9dc1fa33935c0abf8355c90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:26:55 +0530
Subject: [PATCH 565/719] ARROW-12986: [C++][Gandiva] Implement new cache
 eviction policy algorithm in Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR replaces the LRU based cache for gandiva with a new cache which takes into account the LLVM build time along with the LRU factor.
Here is a description of the suggested algorithm:

```
// A particular cache based on the GreedyDual-Size cache which is a generalization of LRU
// which defines costs for each cache values.
// The algorithm associates a cost, C, with each cache value. Initially, when the value
// is brought into cache, C is set to be the cost related to the value (the cost is
// always non-negative). When a replacement needs to be made, the value with the lowest C
// cost is replaced, and then all values reduce their C costs by the minimum value of C
// over all the values already in the cache.
// If a value is accessed, its C value is restored to its initial cost. Thus, the C costs
// of recently accessed values retain a larger portion of the original cost than those of
// values that have not been accessed for a long time. The C costs are reduced as time
// goes and are restored when accessed.
```
More info [here](https://www.usenix.org/legacy/publications/library/proceedings/usits97/full_papers/cao/cao_html/node8.html)

Closes #10465 from jpedroantunes/feature/change-cache-policy and squashes the following commits:

02a799844 <João Pedro> Add todo for overflow handling for correctness
a62f2d433 <João Pedro> Add overflow handler for cache algorithm
540ac766c <João Pedro> Remove unused constructor for ValueCacheObject
58c64a9f9 <João Pedro> Apply linter corrections
213b74ae1 <João Pedro> Apply corrections on the greedydualsize cache definition
4ab1bf177 <João Pedro> Remove base cache header file not used anymore
e66bff99b <João Pedro> Remove lru cache and change to use the greedy dual by default
6136f7ce7 <João Pedro> Add string variations on cache tests
0d25678e7 <João Pedro> Correct linter errors
e008f8c8c <João Pedro> Add identation to PriorityItem class
e2c38a954 <João Pedro> Correct linter errors
08f1bd627 <João Pedro> Change cache implementation to handle special structure objects as values
d9ef0569c <João Pedro> Change cache main abstraction to consider the usage of a ValueCacheObject
8658b1edc <João Pedro> Remove unused getCacheType function
a45adb904 <João Pedro> Change BaseCache insert method to receive only key and value
9e46ce3cc <João Pedro> Remove unused operator< implementation from cache keys
dabea56f3 <João Pedro> Remove unused operator< implementation from cache keys
88af6868e <João Pedro> Add base logic for gd-size algorithm implementation on the new cache
3b3746e07 <João Pedro> Rename the created cache files to consider the new approach using greedy-dual-size algorithm
abed89599 <João Pedro> Apply corrections and optimization on new cache classes
a8b0bd831 <João Pedro> Change lvu cache for not using unnecessarily a pair as value
3c013c835 <João Pedro> Change cache logic to use unique_ptr
887101899 <João Pedro> Fix wrong use of u_long to use uint64 instead
ddd90fc7b <João Pedro> Correct missing identation on gandiva cache files
a04726a4e <João Pedro> Change for not using u_long and use u_int64 on cache
10a7016e1 <João Pedro> Fix lint problems found on new cache documents on CI builds
f4874f642 <João Pedro> Fix lint problems on all added files
bcec5fcb6 <João Pedro> Add logic for calculating the llvm build time to be considered on cache logic
253c82692 <João Pedro> Adapt lru cache test for using the new insertion method definition
cd49c23f8 <João Pedro> Remove unused method definition from base cache class
0bee1a5cd <João Pedro> Add inheritance definition as public on cache child classes
6763be6ad <João Pedro> Add cache method for defining order parameter
24ec647c8 <João Pedro> Change cache class to handle with BaseClass pointer
e31d922ee <João Pedro> Change base cache methods to be virtual
b8ff8b8ab <João Pedro> Add base cache file as attemp to generalize caches
c32f90e0c <João Pedro> Add operator< implementation necessary for filter and project cache keys for compatibility with the new cache
1ece64d74 <João Pedro> Add new cache unit test file to the project CMakeLists.txt
8ebf66710 <João Pedro> Add unit test for the new cache implementation
a08832a24 <João Pedro> Add implementation for cache based on a lower value policy

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/CMakeLists.txt                |   2 +-
 cpp/src/gandiva/cache.h                       |  13 +-
 cpp/src/gandiva/filter.cc                     |  10 +-
 cpp/src/gandiva/greedy_dual_size_cache.h      | 154 ++++++++++++++++++
 .../gandiva/greedy_dual_size_cache_test.cc    |  88 ++++++++++
 cpp/src/gandiva/lru_cache.h                   | 121 --------------
 cpp/src/gandiva/lru_cache_test.cc             |  64 --------
 cpp/src/gandiva/projector.cc                  |   9 +-
 8 files changed, 267 insertions(+), 194 deletions(-)
 create mode 100644 cpp/src/gandiva/greedy_dual_size_cache.h
 create mode 100644 cpp/src/gandiva/greedy_dual_size_cache_test.cc
 delete mode 100644 cpp/src/gandiva/lru_cache.h
 delete mode 100644 cpp/src/gandiva/lru_cache_test.cc

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 83cec08e71c..08f31ad9554 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -229,7 +229,7 @@ add_gandiva_test(internals-test
                  expr_decomposer_test.cc
                  expression_registry_test.cc
                  selection_vector_test.cc
-                 lru_cache_test.cc
+                 greedy_dual_size_cache_test.cc
                  to_date_holder_test.cc
                  simple_arena_test.cc
                  like_holder_test.cc
diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h
index 73a2fd14224..8d0f75ce36a 100644
--- a/cpp/src/gandiva/cache.h
+++ b/cpp/src/gandiva/cache.h
@@ -18,9 +18,10 @@
 #pragma once
 
 #include <cstdlib>
+#include <memory>
 #include <mutex>
 
-#include "gandiva/lru_cache.h"
+#include "gandiva/greedy_dual_size_cache.h"
 #include "gandiva/visibility.h"
 
 namespace gandiva {
@@ -39,21 +40,21 @@ class Cache {
   Cache() : Cache(GetCapacity()) {}
 
   ValueType GetModule(KeyType cache_key) {
-    arrow::util::optional<ValueType> result;
+    arrow::util::optional<ValueCacheObject<ValueType>> result;
     mtx_.lock();
     result = cache_.get(cache_key);
     mtx_.unlock();
-    return result != arrow::util::nullopt ? *result : nullptr;
+    return result != arrow::util::nullopt ? (*result).module : nullptr;
   }
 
-  void PutModule(KeyType cache_key, ValueType module) {
+  void PutModule(KeyType cache_key, ValueCacheObject<ValueType> valueCacheObject) {
     mtx_.lock();
-    cache_.insert(cache_key, module);
+    cache_.insert(cache_key, valueCacheObject);
     mtx_.unlock();
   }
 
  private:
-  LruCache<KeyType, ValueType> cache_;
+  GreedyDualSizeCache<KeyType, ValueType> cache_;
   std::mutex mtx_;
 };
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc
index 5546c097b06..875cc5447f4 100644
--- a/cpp/src/gandiva/filter.cc
+++ b/cpp/src/gandiva/filter.cc
@@ -118,11 +118,19 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
   // Return if the expression is invalid since we will not be able to process further.
   ExprValidator expr_validator(llvm_gen->types(), schema);
   ARROW_RETURN_NOT_OK(expr_validator.Validate(condition));
+
+  // Start measuring build time
+  auto begin = std::chrono::high_resolution_clock::now();
   ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}, SelectionVector::Mode::MODE_NONE));
+  // Stop measuring time and calculate the elapsed time
+  auto end = std::chrono::high_resolution_clock::now();
+  auto elapsed =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
 
   // Instantiate the filter with the completely built llvm generator
   *filter = std::make_shared<Filter>(std::move(llvm_gen), schema, configuration);
-  cache.PutModule(cache_key, *filter);
+  ValueCacheObject<std::shared_ptr<Filter>> value_cache(*filter, elapsed);
+  cache.PutModule(cache_key, value_cache);
 
   return Status::OK();
 }
diff --git a/cpp/src/gandiva/greedy_dual_size_cache.h b/cpp/src/gandiva/greedy_dual_size_cache.h
new file mode 100644
index 00000000000..cb5c38e075c
--- /dev/null
+++ b/cpp/src/gandiva/greedy_dual_size_cache.h
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <list>
+#include <queue>
+#include <set>
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/util/optional.h"
+
+// modified cache to support evict policy using the GreedyDual-Size algorithm.
+namespace gandiva {
+// Defines a base value object supported on the cache that may contain properties
+template <typename ValueType>
+class ValueCacheObject {
+ public:
+  ValueCacheObject(ValueType module, uint64_t cost) : module(module), cost(cost) {}
+  ValueType module;
+  uint64_t cost;
+  bool operator<(const ValueCacheObject& other) const { return cost < other.cost; }
+};
+
+// A particular cache based on the GreedyDual-Size cache which is a generalization of LRU
+// which defines costs for each cache values.
+// The algorithm associates a cost, C, with each cache value. Initially, when the value
+// is brought into cache, C is set to be the cost related to the value (the cost is
+// always non-negative). When a replacement needs to be made, the value with the lowest C
+// cost is replaced, and then all values reduce their C costs by the minimum value of C
+// over all the values already in the cache.
+// If a value is accessed, its C value is restored to its initial cost. Thus, the C costs
+// of recently accessed values retain a larger portion of the original cost than those of
+// values that have not been accessed for a long time. The C costs are reduced as time
+// goes and are restored when accessed.
+
+template <class Key, class Value>
+class GreedyDualSizeCache {
+  // inner class to define the priority item
+  class PriorityItem {
+   public:
+    PriorityItem(uint64_t actual_priority, uint64_t original_priority, Key key)
+        : actual_priority(actual_priority),
+          original_priority(original_priority),
+          cache_key(key) {}
+    // this ensure that the items with low priority stays in the beginning of the queue,
+    // so it can be the one removed by evict operation
+    bool operator<(const PriorityItem& other) const {
+      return actual_priority < other.actual_priority;
+    }
+    uint64_t actual_priority;
+    uint64_t original_priority;
+    Key cache_key;
+  };
+
+ public:
+  struct hasher {
+    template <typename I>
+    std::size_t operator()(const I& i) const {
+      return i.Hash();
+    }
+  };
+  // a map from 'key' to a pair of Value and a pointer to the priority value
+  using map_type = std::unordered_map<
+      Key, std::pair<ValueCacheObject<Value>, typename std::set<PriorityItem>::iterator>,
+      hasher>;
+
+  explicit GreedyDualSizeCache(size_t capacity) : inflation_(0), capacity_(capacity) {}
+
+  ~GreedyDualSizeCache() = default;
+
+  size_t size() const { return map_.size(); }
+
+  size_t capacity() const { return capacity_; }
+
+  bool empty() const { return map_.empty(); }
+
+  bool contains(const Key& key) { return map_.find(key) != map_.end(); }
+
+  void insert(const Key& key, const ValueCacheObject<Value>& value) {
+    typename map_type::iterator i = map_.find(key);
+    // check if element is not in the cache to add it
+    if (i == map_.end()) {
+      // insert item into the cache, but first check if it is full, to evict an item
+      // if it is necessary
+      if (size() >= capacity_) {
+        evict();
+      }
+
+      // insert the new item
+      auto item =
+          priority_set_.insert(PriorityItem(value.cost + inflation_, value.cost, key));
+      // save on map the value and the priority item iterator position
+      map_.emplace(key, std::make_pair(value, item.first));
+    }
+  }
+
+  arrow::util::optional<ValueCacheObject<Value>> get(const Key& key) {
+    // lookup value in the cache
+    typename map_type::iterator value_for_key = map_.find(key);
+    if (value_for_key == map_.end()) {
+      // value not in cache
+      return arrow::util::nullopt;
+    }
+    PriorityItem item = *value_for_key->second.second;
+    // if the value was found on the cache, update its cost (original + inflation)
+    if (item.actual_priority != item.original_priority + inflation_) {
+      priority_set_.erase(value_for_key->second.second);
+      auto iter = priority_set_.insert(PriorityItem(
+          item.original_priority + inflation_, item.original_priority, item.cache_key));
+      value_for_key->second.second = iter.first;
+    }
+    return value_for_key->second.first;
+  }
+
+  void clear() {
+    map_.clear();
+    priority_set_.clear();
+  }
+
+ private:
+  void evict() {
+    // TODO: inflation overflow is unlikely to happen but needs to be handled
+    //  for correctness.
+    // evict item from the beginning of the set. This set is ordered from the
+    // lower priority value to the higher priority value.
+    typename std::set<PriorityItem>::iterator i = priority_set_.begin();
+    // update the inflation cost related to the evicted item
+    inflation_ = (*i).actual_priority;
+    map_.erase((*i).cache_key);
+    priority_set_.erase(i);
+  }
+
+  map_type map_;
+  std::set<PriorityItem> priority_set_;
+  uint64_t inflation_;
+  size_t capacity_;
+};
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/greedy_dual_size_cache_test.cc b/cpp/src/gandiva/greedy_dual_size_cache_test.cc
new file mode 100644
index 00000000000..3c72eef7092
--- /dev/null
+++ b/cpp/src/gandiva/greedy_dual_size_cache_test.cc
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/greedy_dual_size_cache.h"
+
+#include <string>
+#include <typeinfo>
+
+#include <gtest/gtest.h>
+
+namespace gandiva {
+
+class GreedyDualSizeCacheKey {
+ public:
+  explicit GreedyDualSizeCacheKey(int tmp) : tmp_(tmp) {}
+  std::size_t Hash() const { return tmp_; }
+  bool operator==(const GreedyDualSizeCacheKey& other) const {
+    return tmp_ == other.tmp_;
+  }
+
+ private:
+  int tmp_;
+};
+
+class TestGreedyDualSizeCache : public ::testing::Test {
+ public:
+  TestGreedyDualSizeCache() : cache_(2) {}
+
+ protected:
+  GreedyDualSizeCache<GreedyDualSizeCacheKey, std::string> cache_;
+};
+
+TEST_F(TestGreedyDualSizeCache, TestEvict) {
+  // check if the cache is evicting the items with low priority on cache
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("1", 1));
+  cache_.insert(GreedyDualSizeCacheKey(2), ValueCacheObject<std::string>("2", 10));
+  cache_.insert(GreedyDualSizeCacheKey(3), ValueCacheObject<std::string>("3", 20));
+  cache_.insert(GreedyDualSizeCacheKey(4), ValueCacheObject<std::string>("4", 15));
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("5", 1));
+  ASSERT_EQ(2, cache_.size());
+  // we check initially the values that won't be on the cache, since the get operation
+  // may affect the entity costs, which is not the purpose of this test
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(2)), arrow::util::nullopt);
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(3)), arrow::util::nullopt);
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(1))->module, "5");
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(4))->module, "4");
+}
+
+TEST_F(TestGreedyDualSizeCache, TestGreedyDualSizeBehavior) {
+  // insert 1 and 3 evicting 2 (this eviction will increase the inflation cost by 20)
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("1", 40));
+  cache_.insert(GreedyDualSizeCacheKey(2), ValueCacheObject<std::string>("2", 20));
+  cache_.insert(GreedyDualSizeCacheKey(3), ValueCacheObject<std::string>("3", 30));
+
+  // when accessing key 3, its actual cost will be increased by the inflation, so in the
+  // next eviction, the key 1 will be evicted, since the key 1 actual cost (original(40))
+  // is smaller than key 3 actual increased cost (original(30) + inflation(20))
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(3))->module, "3");
+
+  // try to insert key 2 and expect the eviction of key 1
+  cache_.insert(GreedyDualSizeCacheKey(2), ValueCacheObject<std::string>("2", 20));
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(1)), arrow::util::nullopt);
+
+  // when accessing key 2, its original cost should be increased by inflation, so when
+  // inserting the key 1 again, now the key 3 should be evicted
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(2))->module, "2");
+  cache_.insert(GreedyDualSizeCacheKey(1), ValueCacheObject<std::string>("1", 20));
+
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(1))->module, "1");
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(2))->module, "2");
+  ASSERT_EQ(cache_.get(GreedyDualSizeCacheKey(3)), arrow::util::nullopt);
+  ASSERT_EQ(2, cache_.size());
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/lru_cache.h b/cpp/src/gandiva/lru_cache.h
deleted file mode 100644
index 6602116b0a0..00000000000
--- a/cpp/src/gandiva/lru_cache.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <list>
-#include <unordered_map>
-#include <utility>
-
-#include "arrow/util/optional.h"
-
-// modified from boost LRU cache -> the boost cache supported only an
-// ordered map.
-namespace gandiva {
-// a cache which evicts the least recently used item when it is full
-template <class Key, class Value>
-class LruCache {
- public:
-  using key_type = Key;
-  using value_type = Value;
-  using list_type = std::list<key_type>;
-  struct hasher {
-    template <typename I>
-    std::size_t operator()(const I& i) const {
-      return i.Hash();
-    }
-  };
-  using map_type =
-      std::unordered_map<key_type, std::pair<value_type, typename list_type::iterator>,
-                         hasher>;
-
-  explicit LruCache(size_t capacity) : cache_capacity_(capacity) {}
-
-  ~LruCache() {}
-
-  size_t size() const { return map_.size(); }
-
-  size_t capacity() const { return cache_capacity_; }
-
-  bool empty() const { return map_.empty(); }
-
-  bool contains(const key_type& key) { return map_.find(key) != map_.end(); }
-
-  void insert(const key_type& key, const value_type& value) {
-    typename map_type::iterator i = map_.find(key);
-    if (i == map_.end()) {
-      // insert item into the cache, but first check if it is full
-      if (size() >= cache_capacity_) {
-        // cache is full, evict the least recently used item
-        evict();
-      }
-
-      // insert the new item
-      lru_list_.push_front(key);
-      map_[key] = std::make_pair(value, lru_list_.begin());
-    }
-  }
-
-  arrow::util::optional<value_type> get(const key_type& key) {
-    // lookup value in the cache
-    typename map_type::iterator value_for_key = map_.find(key);
-    if (value_for_key == map_.end()) {
-      // value not in cache
-      return arrow::util::nullopt;
-    }
-
-    // return the value, but first update its place in the most
-    // recently used list
-    typename list_type::iterator position_in_lru_list = value_for_key->second.second;
-    if (position_in_lru_list != lru_list_.begin()) {
-      // move item to the front of the most recently used list
-      lru_list_.erase(position_in_lru_list);
-      lru_list_.push_front(key);
-
-      // update iterator in map
-      position_in_lru_list = lru_list_.begin();
-      const value_type& value = value_for_key->second.first;
-      map_[key] = std::make_pair(value, position_in_lru_list);
-
-      // return the value
-      return value;
-    } else {
-      // the item is already at the front of the most recently
-      // used list so just return it
-      return value_for_key->second.first;
-    }
-  }
-
-  void clear() {
-    map_.clear();
-    lru_list_.clear();
-  }
-
- private:
-  void evict() {
-    // evict item from the end of most recently used list
-    typename list_type::iterator i = --lru_list_.end();
-    map_.erase(*i);
-    lru_list_.erase(i);
-  }
-
- private:
-  map_type map_;
-  list_type lru_list_;
-  size_t cache_capacity_;
-};
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/lru_cache_test.cc b/cpp/src/gandiva/lru_cache_test.cc
deleted file mode 100644
index 06c86d69032..00000000000
--- a/cpp/src/gandiva/lru_cache_test.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gandiva/lru_cache.h"
-
-#include <map>
-#include <string>
-#include <typeinfo>
-
-#include <gtest/gtest.h>
-
-namespace gandiva {
-
-class TestCacheKey {
- public:
-  explicit TestCacheKey(int tmp) : tmp_(tmp) {}
-  std::size_t Hash() const { return tmp_; }
-  bool operator==(const TestCacheKey& other) const { return tmp_ == other.tmp_; }
-
- private:
-  int tmp_;
-};
-
-class TestLruCache : public ::testing::Test {
- public:
-  TestLruCache() : cache_(2) {}
-
- protected:
-  LruCache<TestCacheKey, std::string> cache_;
-};
-
-TEST_F(TestLruCache, TestEvict) {
-  cache_.insert(TestCacheKey(1), "hello");
-  cache_.insert(TestCacheKey(2), "hello");
-  cache_.insert(TestCacheKey(1), "hello");
-  cache_.insert(TestCacheKey(3), "hello");
-  // should have evicted key 1
-  ASSERT_EQ(2, cache_.size());
-  ASSERT_EQ(cache_.get(TestCacheKey(1)), arrow::util::nullopt);
-}
-
-TEST_F(TestLruCache, TestLruBehavior) {
-  cache_.insert(TestCacheKey(1), "hello");
-  cache_.insert(TestCacheKey(2), "hello");
-  cache_.get(TestCacheKey(1));
-  cache_.insert(TestCacheKey(3), "hello");
-  // should have evicted key 2.
-  ASSERT_EQ(*cache_.get(TestCacheKey(1)), "hello");
-}
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index 734720c64c9..ff167538f9c 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -174,7 +174,13 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
     ARROW_RETURN_NOT_OK(expr_validator.Validate(expr));
   }
 
+  // Start measuring build time
+  auto begin = std::chrono::high_resolution_clock::now();
   ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs, selection_vector_mode));
+  // Stop measuring time and calculate the elapsed time
+  auto end = std::chrono::high_resolution_clock::now();
+  auto elapsed =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
 
   // save the output field types. Used for validation at Evaluate() time.
   std::vector<FieldPtr> output_fields;
@@ -186,7 +192,8 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
   // Instantiate the projector with the completely built llvm generator
   *projector = std::shared_ptr<Projector>(
       new Projector(std::move(llvm_gen), schema, output_fields, configuration));
-  cache.PutModule(cache_key, *projector);
+  ValueCacheObject<std::shared_ptr<Projector>> value_cache(*projector, elapsed);
+  cache.PutModule(cache_key, value_cache);
 
   return Status::OK();
 }

From 0d06e34694b36dcbfb59040b6d6199720ae1c31a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:28:41 +0530
Subject: [PATCH 566/719] ARROW-13006: [C++][Gandiva] Implement BASE64 and
 UNBASE64 Hive functions on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement BASE64 and UNBASE64 Hive functions on Gandiva

Closes #10479 from jpedroantunes/feature/add-base64 and squashes the following commits:

6785507bf <João Pedro> Fix ci build errors
75cff2458 <João Pedro> Use string constructor on base64 decode considering in length
15ab89ef8 <João Pedro> Add projector tests for base64 and unbase64
405043f1b <João Pedro> Add gandiva export to created functions
a5617df99 <João Pedro> Add identation to encode base64 functions
e34439c92 <João Pedro> Correct linter errors
c957d9d4d <João Pedro> Register base64 and unbase64 string methods
90ba490cb <João Pedro> Add base logic for base64 encode and decode methods

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc |  6 ++
 cpp/src/gandiva/gdv_function_stubs.cc       | 80 +++++++++++++++++++++
 cpp/src/gandiva/gdv_function_stubs.h        |  8 +++
 cpp/src/gandiva/gdv_function_stubs_test.cc  | 58 +++++++++++++++
 cpp/src/gandiva/tests/projector_test.cc     | 70 ++++++++++++++++++
 5 files changed, 222 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 9235a3e01a2..48c22657258 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -65,6 +65,12 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("ascii", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
                      "ascii_utf8"),
 
+      NativeFunction("base64", {}, DataTypeVector{binary()}, utf8(), kResultNullIfNull,
+                     "gdv_fn_base64_encode_binary", NativeFunction::kNeedsContext),
+
+      NativeFunction("unbase64", {}, DataTypeVector{utf8()}, binary(), kResultNullIfNull,
+                     "gdv_fn_base64_decode_utf8", NativeFunction::kNeedsContext),
+
       NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
                      "gdv_fn_upper_utf8", NativeFunction::kNeedsContext),
 
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 3c278049ed6..99e60b015ec 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -22,6 +22,7 @@
 #include <string>
 #include <vector>
 
+#include "arrow/util/base64.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
@@ -308,6 +309,61 @@ char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
   return ret;
 }
 
+GANDIVA_EXPORT
+const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t in_len,
+                                        int32_t* out_len) {
+  if (in_len < 0) {
+    gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");
+    *out_len = 0;
+    return "";
+  }
+  if (in_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+  // use arrow method to encode base64 string
+  std::string encoded_str =
+      arrow::util::base64_encode(reinterpret_cast<const unsigned char*>(in), in_len);
+  *out_len = static_cast<int32_t>(encoded_str.length());
+  // allocate memory for response
+  char* ret = reinterpret_cast<char*>(
+      gdv_fn_context_arena_malloc(context, static_cast<int32_t>(*out_len)));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory");
+    *out_len = 0;
+    return "";
+  }
+  memcpy(ret, encoded_str.data(), *out_len);
+  return ret;
+}
+
+GANDIVA_EXPORT
+const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t in_len,
+                                      int32_t* out_len) {
+  if (in_len < 0) {
+    gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");
+    *out_len = 0;
+    return "";
+  }
+  if (in_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+  // use arrow method to decode base64 string
+  std::string decoded_str = arrow::util::base64_decode(std::string(in, in_len));
+  *out_len = static_cast<int32_t>(decoded_str.length());
+  // allocate memory for response
+  char* ret = reinterpret_cast<char*>(
+      gdv_fn_context_arena_malloc(context, static_cast<int32_t>(*out_len)));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory");
+    *out_len = 0;
+    return "";
+  }
+  memcpy(ret, decoded_str.data(), *out_len);
+  return ret;
+}
+
 #define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME)                    \
   GANDIVA_EXPORT                                                                     \
   OUT_TYPE gdv_fn_cast##TYPE_NAME##_utf8(int64_t context, const char* data,          \
@@ -1407,6 +1463,30 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
                                   types->i8_ptr_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_sha256_decimal128));
 
+  // gdv_fn_base64_encode_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // in
+      types->i32_type(),      // in_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_base64_encode_binary",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_base64_encode_binary));
+
+  // gdv_fn_base64_decode_utf8
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // in
+      types->i32_type(),      // in_len
+      types->i32_ptr_type(),  // out_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_base64_decode_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_base64_decode_utf8));
+
   // gdv_fn_upper_utf8
   args = {
       types->i64_type(),      // context
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 043e94034ed..9320bf6957d 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -74,6 +74,14 @@ bool in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len, bool in_va
 int gdv_fn_time_with_zone(int* time_fields, const char* zone, int zone_len,
                           int64_t* ret_time);
 
+GANDIVA_EXPORT
+const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t in_len,
+                                        int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t in_len,
+                                      int32_t* out_len);
+
 GANDIVA_EXPORT
 const char* gdv_fn_castVARBINARY_int32_int64(int64_t context, gdv_int32 value,
                                              int64_t out_len, int32_t* out_length);
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 354a8bb191d..08e021eb55f 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -81,6 +81,64 @@ TEST(TestGdvFnStubs, TestCastVarbinaryNumeric) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestGdvFnStubs, TestBase64Encode) {
+  gandiva::ExecutionContext ctx;
+
+  auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  auto value = gdv_fn_base64_encode_binary(ctx_ptr, "hello", 5, &out_len);
+  std::string out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "aGVsbG8=");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "test", 4, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "dGVzdA==");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "hive", 4, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "aGl2ZQ==");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "", 0, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+
+  value = gdv_fn_base64_encode_binary(ctx_ptr, "test", -5, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestBase64Decode) {
+  gandiva::ExecutionContext ctx;
+
+  auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  auto value = gdv_fn_base64_decode_utf8(ctx_ptr, "aGVsbG8=", 8, &out_len);
+  std::string out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "hello");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "dGVzdA==", 8, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "test");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "aGl2ZQ==", 8, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "hive");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "", 0, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+
+  value = gdv_fn_base64_decode_utf8(ctx_ptr, "test", -5, &out_len);
+  out_value = std::string(value, out_len);
+  EXPECT_EQ(out_value, "");
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  ctx.Reset();
+}
+
 TEST(TestGdvFnStubs, TestCastINT) {
   gandiva::ExecutionContext ctx;
 
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index dcdeeb4ee10..c3b50dc0fa9 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -818,6 +818,76 @@ TEST_F(TestProjector, TestConcat) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_concat, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestBase64) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::binary());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_base = field("base64", arrow::utf8());
+
+  // Build expression
+  auto base_expr = TreeExprBuilder::MakeExpression("base64", {field0}, field_base);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {base_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array0 =
+      MakeArrowArrayBinary({"hello", "", "test", "hive"}, {true, true, true, true});
+  // expected output
+  auto exp_base = MakeArrowArrayUtf8({"aGVsbG8=", "", "dGVzdA==", "aGl2ZQ=="},
+                                     {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_base, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestUnbase64) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_base = field("base64", arrow::binary());
+
+  // Build expression
+  auto base_expr = TreeExprBuilder::MakeExpression("unbase64", {field0}, field_base);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {base_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array0 = MakeArrowArrayUtf8({"aGVsbG8=", "", "dGVzdA==", "aGl2ZQ=="},
+                                   {true, true, true, true});
+  // expected output
+  auto exp_unbase =
+      MakeArrowArrayBinary({"hello", "", "test", "hive"}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_unbase, outputs.at(0));
+}
+
 TEST_F(TestProjector, TestLeftString) {
   // schema for input fields
   auto field0 = field("f0", arrow::utf8());

From 9c0debf8eaf6111500f273f7b513da7fdc734c6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:30:23 +0530
Subject: [PATCH 567/719] ARROW-13049: [C++][Gandiva] Implement BIN Hive
 function on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement BIN Hive function on Gandiva

Closes #10516 from jpedroantunes/feature/add-bin-function and squashes the following commits:

0bb20b7ca <João Pedro> Add handler for input 0 case on bin function
94f775644 <João Pedro> Try static cast on for loop to work on mingw
a3fb89a99 <João Pedro> Add missing identation for tests
147dc37f3 <João Pedro> Fix lint errors on tests
03b04708e <João Pedro> Change behavior for int32 bin representation to use only 32 chars
67aeb81fa <João Pedro> Add undef to bin_integer macro function
f958b4182 <João Pedro> Fix lint identation error
cd022ea80 <João Pedro> Correct linter errors
3c639aadc <João Pedro> Add projector test for bin function
250333893 <João Pedro> Change base implementation for bin hive function to extended math ops
08f9f4633 <João Pedro> Add base implementation for bin hive function

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 .../gandiva/function_registry_arithmetic.cc   |  6 +-
 .../gandiva/precompiled/extended_math_ops.cc  | 40 ++++++++++
 .../precompiled/extended_math_ops_test.cc     | 73 +++++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  3 +
 cpp/src/gandiva/tests/projector_test.cc       | 37 ++++++++++
 5 files changed, 158 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
index 2c3049a8b74..6613c1f12c6 100644
--- a/cpp/src/gandiva/function_registry_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -103,7 +103,11 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
       BINARY_RELATIONAL_BOOL_DATE_FN(less_than, {}),
       BINARY_RELATIONAL_BOOL_DATE_FN(less_than_or_equal_to, {}),
       BINARY_RELATIONAL_BOOL_DATE_FN(greater_than, {}),
-      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to, {})};
+      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to, {}),
+
+      // binary representation of integer values
+      UNARY_UNSAFE_NULL_IF_NULL(bin, {}, int32, utf8),
+      UNARY_UNSAFE_NULL_IF_NULL(bin, {}, int64, utf8)};
 
   return arithmetic_fn_registry_;
 }
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc
index b2d62daac7f..365b08a6da9 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -367,4 +367,44 @@ gdv_float64 get_scale_multiplier(gdv_int32 scale) {
   return power_float64_float64(10.0, scale);
 }
 
+// returns the binary representation of a given integer (e.g. 928 -> 1110100000)
+#define BIN_INTEGER(IN_TYPE)                                                          \
+  FORCE_INLINE                                                                        \
+  const char* bin_##IN_TYPE(int64_t context, gdv_##IN_TYPE value, int32_t* out_len) { \
+    *out_len = 0;                                                                     \
+    int32_t len = 8 * sizeof(value);                                                  \
+    char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len));   \
+    if (ret == nullptr) {                                                             \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory for output");  \
+      return "";                                                                      \
+    }                                                                                 \
+    /* handle case when value is zero */                                              \
+    if (value == 0) {                                                                 \
+      *out_len = 1;                                                                   \
+      ret[0] = '0';                                                                   \
+      return ret;                                                                     \
+    }                                                                                 \
+    /* generate binary representation iteratively */                                  \
+    gdv_u##IN_TYPE i;                                                                 \
+    int8_t count = 0;                                                                 \
+    bool first = false; /* flag for not printing left zeros in positive numbers */    \
+    for (i = static_cast<gdv_u##IN_TYPE>(1) << (len - 1); i > 0; i = i / 2) {         \
+      if ((value & i) != 0) {                                                         \
+        ret[count] = '1';                                                             \
+        if (!first) first = true;                                                     \
+      } else {                                                                        \
+        if (!first) continue;                                                         \
+        ret[count] = '0';                                                             \
+      }                                                                               \
+      count += 1;                                                                     \
+    }                                                                                 \
+    *out_len = count;                                                                 \
+    return ret;                                                                       \
+  }
+
+BIN_INTEGER(int32)
+BIN_INTEGER(int64)
+
+#undef BIN_INTEGER
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
index 6e59f684f62..147b4035c7d 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
@@ -273,4 +273,77 @@ TEST(TestExtendedMathOps, TestTrigonometricFunctions) {
   VerifyFuzzyEquals(cot_float64(M_PI / 2), tan(M_PI / 2 - M_PI / 2));
 }
 
+TEST(TestExtendedMathOps, TestBinRepresentation) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = bin_int32(ctx_ptr, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, 28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "110111110000110");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, -28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "11111111111111111001000001111010");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, 58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1110001100000101");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, -58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "11111111111111110001110011111011");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, INT32_MAX, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1111111111111111111111111111111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int32(ctx_ptr, INT32_MIN, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "10000000000000000000000000000000");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 7, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "0");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "110111110000110");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, -28550, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "1111111111111111111111111111111111111111111111111001000001111010");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, 58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "1110001100000101");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, -58117, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "1111111111111111111111111111111111111111111111110001110011111011");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, INT64_MAX, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "111111111111111111111111111111111111111111111111111111111111111");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = bin_int64(ctx_ptr, INT64_MIN, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len),
+            "1000000000000000000000000000000000000000000000000000000000000000");
+  EXPECT_FALSE(ctx.has_error());
+}
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 543a00f0b2d..775421afd2c 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -156,6 +156,9 @@ gdv_int32 round_int32(gdv_int32);
 gdv_int64 round_int64(gdv_int64);
 gdv_int64 get_power_of_10(gdv_int32);
 
+const char* bin_int32(int64_t context, gdv_int32 value, int32_t* out_len);
+const char* bin_int64(int64_t context, gdv_int64 value, int32_t* out_len);
+
 gdv_float64 cbrt_int32(gdv_int32);
 gdv_float64 cbrt_int64(gdv_int64);
 gdv_float64 cbrt_float32(gdv_float32);
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index c3b50dc0fa9..572fb3103ec 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1316,4 +1316,41 @@ TEST_F(TestProjector, TestRpad) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_rpad, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestBinRepresentation) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::int64());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_result = field("bin", arrow::utf8());
+
+  // Build expression
+  auto myexpr = TreeExprBuilder::MakeExpression("bin", {field0}, field_result);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {myexpr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+  auto array0 = MakeArrowArrayInt64({7, -28550, 58117}, {true, true, true});
+  // expected output
+  auto exp = MakeArrowArrayUtf8(
+      {"111", "1111111111111111111111111111111111111111111111111001000001111010",
+       "1110001100000101"},
+      {true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
 }  // namespace gandiva

From b8bcf76e4e9063aeb297a20b0e200cc1687c0857 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Jul 2021 12:01:29 -0400
Subject: [PATCH 568/719] ARROW-13074: [Python] Deprecate ParquetDataset custom
 properties (eg pieces, partitions)

The idea is that before fully deprecating/removing the legacy implementation of `ParquetDataset`, we can already deprecate its custom attributes (which is AFAIK the main/only reason to still use the legacy version).

Closes #10549 from jorisvandenbossche/ARROW-13074-parquetdataset-deprecate-properties

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/parquet.py                     | 149 +++++++++++++++---
 python/pyarrow/tests/parquet/test_dataset.py  |  45 +++++-
 python/pyarrow/tests/parquet/test_metadata.py |   4 +-
 3 files changed, 166 insertions(+), 32 deletions(-)

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index c578661851f..0c43936c33b 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -27,6 +27,7 @@
 import re
 import operator
 import urllib.parse
+import warnings
 
 import pyarrow as pa
 import pyarrow.lib as lib
@@ -705,11 +706,17 @@ def _get_pandas_index_columns(keyvalues):
 
 class ParquetDatasetPiece:
     """
-    A single chunk of a potentially larger Parquet dataset to read.
+    DEPRECATED: A single chunk of a potentially larger Parquet dataset to read.
 
     The arguments will indicate to read either a single row group or all row
     groups, and whether to add partition keys to the resulting pyarrow.Table.
 
+    .. deprecated:: 5.0
+        Directly constructing a ``ParquetDatasetPiece`` is deprecated, as well
+        as accessing the pieces of a ``ParquetDataset`` object. Specify
+        ``use_legacy_dataset=False`` when constructing the ``ParquetDataset``
+        and use the ``ParquetDataset.fragments`` attribute instead.
+
     Parameters
     ----------
     path : str or pathlib.Path
@@ -724,6 +731,23 @@ class ParquetDatasetPiece:
 
     def __init__(self, path, open_file_func=partial(open, mode='rb'),
                  file_options=None, row_group=None, partition_keys=None):
+        warnings.warn(
+            "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
+            "be removed in a future version.",
+            DeprecationWarning, stacklevel=2)
+        self._init(
+            path, open_file_func, file_options, row_group, partition_keys)
+
+    @staticmethod
+    def _create(path, open_file_func=partial(open, mode='rb'),
+                file_options=None, row_group=None, partition_keys=None):
+        self = ParquetDatasetPiece.__new__(ParquetDatasetPiece)
+        self._init(
+            path, open_file_func, file_options, row_group, partition_keys)
+        return self
+
+    def _init(self, path, open_file_func, file_options, row_group,
+              partition_keys):
         self.path = _stringify_path(path)
         self.open_file_func = open_file_func
         self.row_group = row_group
@@ -1106,8 +1130,8 @@ def _parse_partition(self, dirname):
 
     def _push_pieces(self, files, part_keys):
         self.pieces.extend([
-            ParquetDatasetPiece(path, partition_keys=part_keys,
-                                open_file_func=self.open_file_func)
+            ParquetDatasetPiece._create(path, partition_keys=part_keys,
+                                        open_file_func=self.open_file_func)
             for path in files
         ])
 
@@ -1153,6 +1177,12 @@ def _open_dataset_file(dataset, path, meta=None):
     )
 
 
+_DEPR_MSG = (
+    "'{}' attribute is deprecated as of pyarrow 5.0.0 and will be removed "
+    "in a future version.{}"
+)
+
+
 _read_docstring_common = """\
 read_dictionary : list, default None
     List of names or column paths (for nested types) to read directly
@@ -1279,16 +1309,16 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
         self._metadata.memory_map = memory_map
         self._metadata.buffer_size = buffer_size
 
-        (self.pieces,
-         self.partitions,
+        (self._pieces,
+         self._partitions,
          self.common_metadata_path,
          self.metadata_path) = _make_manifest(
-             path_or_paths, self.fs, metadata_nthreads=metadata_nthreads,
+             path_or_paths, self._fs, metadata_nthreads=metadata_nthreads,
              open_file_func=partial(_open_dataset_file, self._metadata)
         )
 
         if self.common_metadata_path is not None:
-            with self.fs.open(self.common_metadata_path) as f:
+            with self._fs.open(self.common_metadata_path) as f:
                 self._metadata.common_metadata = read_metadata(
                     f,
                     memory_map=memory_map
@@ -1297,7 +1327,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
             self._metadata.common_metadata = None
 
         if metadata is None and self.metadata_path is not None:
-            with self.fs.open(self.metadata_path) as f:
+            with self._fs.open(self.metadata_path) as f:
                 self.metadata = read_metadata(f, memory_map=memory_map)
         else:
             self.metadata = metadata
@@ -1320,14 +1350,17 @@ def equals(self, other):
         if not isinstance(other, ParquetDataset):
             raise TypeError('`other` must be an instance of ParquetDataset')
 
-        if self.fs.__class__ != other.fs.__class__:
+        if self._fs.__class__ != other._fs.__class__:
             return False
-        for prop in ('paths', 'memory_map', 'pieces', 'partitions',
+        for prop in ('paths', '_pieces', '_partitions',
                      'common_metadata_path', 'metadata_path',
                      'common_metadata', 'metadata', 'schema',
-                     'buffer_size', 'split_row_groups'):
+                     'split_row_groups'):
             if getattr(self, prop) != getattr(other, prop):
                 return False
+        for prop in ('memory_map', 'buffer_size'):
+            if getattr(self._metadata, prop) != getattr(other._metadata, prop):
+                return False
 
         return True
 
@@ -1342,7 +1375,7 @@ def validate_schemas(self):
             if self.common_metadata is not None:
                 self.schema = self.common_metadata.schema
             else:
-                self.schema = self.pieces[0].get_metadata().schema
+                self.schema = self._pieces[0].get_metadata().schema
         elif self.schema is None:
             self.schema = self.metadata.schema
 
@@ -1350,13 +1383,13 @@ def validate_schemas(self):
         dataset_schema = self.schema.to_arrow_schema()
         # Exclude the partition columns from the schema, they are provided
         # by the path, not the DatasetPiece
-        if self.partitions is not None:
-            for partition_name in self.partitions.partition_names:
+        if self._partitions is not None:
+            for partition_name in self._partitions.partition_names:
                 if dataset_schema.get_field_index(partition_name) != -1:
                     field_idx = dataset_schema.get_field_index(partition_name)
                     dataset_schema = dataset_schema.remove(field_idx)
 
-        for piece in self.pieces:
+        for piece in self._pieces:
             file_metadata = piece.get_metadata()
             file_schema = file_metadata.schema.to_arrow_schema()
             if not dataset_schema.equals(file_schema, check_metadata=False):
@@ -1384,9 +1417,9 @@ def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
             Content of the file as a table (of columns).
         """
         tables = []
-        for piece in self.pieces:
+        for piece in self._pieces:
             table = piece.read(columns=columns, use_threads=use_threads,
-                               partitions=self.partitions,
+                               partitions=self._partitions,
                                use_pandas_metadata=use_pandas_metadata)
             tables.append(table)
 
@@ -1425,7 +1458,7 @@ def _get_common_pandas_metadata(self):
         return keyvalues.get(b'pandas', None)
 
     def _filter(self, filters):
-        accepts_filter = self.partitions.filter_accepts_partition
+        accepts_filter = self._partitions.filter_accepts_partition
 
         def one_filter_accepts(piece, filter):
             return all(accepts_filter(part_key, filter, level)
@@ -1435,17 +1468,65 @@ def all_filters_accept(piece):
             return any(all(one_filter_accepts(piece, f) for f in conjunction)
                        for conjunction in filters)
 
-        self.pieces = [p for p in self.pieces if all_filters_accept(p)]
+        self._pieces = [p for p in self._pieces if all_filters_accept(p)]
+
+    @property
+    def pieces(self):
+        warnings.warn(
+            _DEPR_MSG.format(
+                "ParquetDataset.pieces",
+                " Specify 'use_legacy_dataset=False' while constructing the "
+                "ParquetDataset, and then use the '.fragments' attribute "
+                "instead."),
+            DeprecationWarning, stacklevel=2)
+        return self._pieces
+
+    @property
+    def partitions(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.partitions", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._partitions
+
+    @property
+    def memory_map(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.memory_map", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.memory_map
+
+    @property
+    def read_dictionary(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.read_dictionary
 
-    fs = property(operator.attrgetter('_metadata.fs'))
-    memory_map = property(operator.attrgetter('_metadata.memory_map'))
-    read_dictionary = property(
-        operator.attrgetter('_metadata.read_dictionary')
+    @property
+    def buffer_size(self):
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.buffer_size", ""),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.buffer_size
+
+    _fs = property(
+        operator.attrgetter('_metadata.fs')
     )
+
+    @property
+    def fs(self):
+        warnings.warn(
+            _DEPR_MSG.format(
+                "ParquetDataset.fs",
+                " Specify 'use_legacy_dataset=False' while constructing the "
+                "ParquetDataset, and then use the '.filesystem' attribute "
+                "instead."),
+            DeprecationWarning, stacklevel=2)
+        return self._metadata.fs
+
     common_metadata = property(
         operator.attrgetter('_metadata.common_metadata')
     )
-    buffer_size = property(operator.attrgetter('_metadata.buffer_size'))
 
 
 def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
@@ -1480,7 +1561,8 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
             if not fs.isfile(path):
                 raise OSError('Passed non-file path: {}'
                               .format(path))
-            piece = ParquetDatasetPiece(path, open_file_func=open_file_func)
+            piece = ParquetDatasetPiece._create(
+                path, open_file_func=open_file_func)
             pieces.append(piece)
 
     return pieces, partitions, common_metadata_path, metadata_path
@@ -1663,9 +1745,24 @@ def read_pandas(self, **kwargs):
 
     @property
     def pieces(self):
-        # TODO raise deprecation warning
+        warnings.warn(
+            _DEPR_MSG.format("ParquetDataset.pieces",
+                             " Use the '.fragments' attribute instead"),
+            DeprecationWarning, stacklevel=2)
         return list(self._dataset.get_fragments())
 
+    @property
+    def fragments(self):
+        return list(self._dataset.get_fragments())
+
+    @property
+    def files(self):
+        return self._dataset.files
+
+    @property
+    def filesystem(self):
+        return self._dataset.filesystem
+
 
 _read_table_docstring = """
 {0}
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 8cff6954cf2..81e3cdd7468 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -57,7 +57,8 @@ def test_parquet_piece_read(tempdir):
     path = tempdir / 'parquet_piece_read.parquet'
     _write_table(table, path, version='2.0')
 
-    piece1 = pq.ParquetDatasetPiece(path)
+    with pytest.warns(DeprecationWarning):
+        piece1 = pq.ParquetDatasetPiece(path)
 
     result = piece1.read()
     assert result.equals(table)
@@ -71,7 +72,8 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
     path = tempdir / 'parquet_piece_read.parquet'
     _write_table(table, path, version='2.0')
 
-    piece = pq.ParquetDatasetPiece(path)
+    with pytest.warns(DeprecationWarning):
+        piece = pq.ParquetDatasetPiece(path)
     table1 = piece.read()
     assert isinstance(table1, pa.Table)
     meta1 = piece.get_metadata()
@@ -80,6 +82,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
     assert table.equals(table1)
 
 
+@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning")
 def test_parquet_piece_basics():
     path = '/baz.parq'
 
@@ -139,6 +142,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset):
     _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 @pytest.mark.pandas
 def test_create_parquet_dataset_multi_threaded(tempdir):
     fs = LocalFileSystem._get_instance()
@@ -979,6 +983,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 @pytest.mark.pandas
 @parametrize_legacy_dataset
 def test_dataset_memory_map(tempdir, use_legacy_dataset):
@@ -1056,7 +1061,7 @@ def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5):
 
 def _assert_dataset_paths(dataset, paths, use_legacy_dataset):
     if use_legacy_dataset:
-        assert set(map(str, paths)) == {x.path for x in dataset.pieces}
+        assert set(map(str, paths)) == {x.path for x in dataset._pieces}
     else:
         paths = [str(path.as_posix()) for path in paths]
         assert set(paths) == set(dataset._dataset.files)
@@ -1368,6 +1373,7 @@ def test_write_to_dataset_no_partitions_s3fs(
         path, use_legacy_dataset, filesystem=fs)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 @pytest.mark.pandas
 @parametrize_legacy_dataset_not_supported
 def test_write_to_dataset_with_partitions_and_custom_filenames(
@@ -1456,7 +1462,7 @@ def is_pickleable(obj):
     for column in dataset.metadata.schema:
         assert is_pickleable(column)
 
-    for piece in dataset.pieces:
+    for piece in dataset._pieces:
         assert is_pickleable(piece)
         metadata = piece.get_metadata()
         assert metadata.num_row_groups
@@ -1594,6 +1600,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
 def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ARROW-10462 ensure that on Windows we properly use posix-style paths
     # as used by fsspec
@@ -1608,3 +1615,33 @@ def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ensure the piece path is also posix-style
     expected = path + "/data.parquet"
     assert dataset.pieces[0].path == expected
+
+
+def test_parquet_dataset_deprecated_properties(tempdir):
+    table = pa.table({'a': [1, 2, 3]})
+    path = tempdir / 'data.parquet'
+    pq.write_table(table, path)
+    dataset = pq.ParquetDataset(path)
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+        dataset.pieces
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"):
+        dataset.partitions
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"):
+        dataset.memory_map
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"):
+        dataset.read_dictionary
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"):
+        dataset.buffer_size
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"):
+        dataset.fs
+
+    dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
+
+    with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+        dataset2.pieces
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 4c310661fe9..3ba8a467c40 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -138,8 +138,8 @@ def test_parquet_metadata_lifetime(tempdir):
     # ARROW-6642 - ensure that chained access keeps parent objects alive
     table = pa.table({'a': [1, 2, 3]})
     pq.write_table(table, tempdir / 'test_metadata_segfault.parquet')
-    dataset = pq.ParquetDataset(tempdir / 'test_metadata_segfault.parquet')
-    dataset.pieces[0].get_metadata().row_group(0).column(0).statistics
+    parquet_file = pq.ParquetFile(tempdir / 'test_metadata_segfault.parquet')
+    parquet_file.metadata.row_group(0).column(0).statistics
 
 
 @pytest.mark.pandas

From 5fcd4d5cdc1314ba2f0b2b717026591903e64bcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:32:54 +0530
Subject: [PATCH 569/719] ARROW-13050: [C++][Gandiva] Implement SPACE Hive
 function on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement SPACE Hive function on Gandiva

Closes #10517 from jpedroantunes/feature/add-space-function and squashes the following commits:

73281ba01 <João Pedro> Fix ci build errors
09d8c59b6 <João Pedro> Add space function implementation

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  2 ++
 cpp/src/gandiva/precompiled/string_ops.cc     | 25 ++++++++++++++
 .../gandiva/precompiled/string_ops_test.cc    | 25 ++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  2 ++
 cpp/src/gandiva/tests/utf8_test.cc            | 33 +++++++++++++++++++
 5 files changed, 87 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 48c22657258..8f979b7f17e 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -58,6 +58,8 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       UNARY_UNSAFE_NULL_IF_NULL(ltrim, {}, utf8, utf8),
       UNARY_UNSAFE_NULL_IF_NULL(rtrim, {}, utf8, utf8),
       UNARY_UNSAFE_NULL_IF_NULL(btrim, {}, utf8, utf8),
+      UNARY_UNSAFE_NULL_IF_NULL(space, {}, int32, utf8),
+      UNARY_UNSAFE_NULL_IF_NULL(space, {}, int64, utf8),
 
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
       UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 0820114a0ea..b98a415c0ff 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -243,6 +243,31 @@ UTF8_LENGTH(char_length, utf8)
 UTF8_LENGTH(length, utf8)
 UTF8_LENGTH(lengthUtf8, binary)
 
+// Returns a string of 'n' spaces.
+#define SPACE_STR(IN_TYPE)                                                              \
+  GANDIVA_EXPORT                                                                        \
+  const char* space_##IN_TYPE(gdv_int64 ctx, gdv_##IN_TYPE n, int32_t* out_len) {       \
+    gdv_int32 n_times = static_cast<gdv_int32>(n);                                      \
+    if (n_times <= 0) {                                                                 \
+      *out_len = 0;                                                                     \
+      return "";                                                                        \
+    }                                                                                   \
+    char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx, n_times));     \
+    if (ret == nullptr) {                                                               \
+      gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); \
+      *out_len = 0;                                                                     \
+      return "";                                                                        \
+    }                                                                                   \
+    for (int i = 0; i < n_times; i++) {                                                 \
+      ret[i] = ' ';                                                                     \
+    }                                                                                   \
+    *out_len = n_times;                                                                 \
+    return ret;                                                                         \
+  }
+
+SPACE_STR(int32)
+SPACE_STR(int64)
+
 // Reverse a utf8 sequence
 FORCE_INLINE
 const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index c4854c52db1..067f3dbae72 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -69,6 +69,31 @@ TEST(TestStringOps, TestBeginsEnds) {
   EXPECT_FALSE(ends_with_utf8_utf8("hello", 5, "sir", 3));
 }
 
+TEST(TestStringOps, TestSpace) {
+  // Space - returns a string with 'n' spaces
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  int32_t out_len = 0;
+
+  auto out = space_int32(ctx_ptr, 1, &out_len);
+  EXPECT_EQ(std::string(out, out_len), " ");
+  out = space_int32(ctx_ptr, 10, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "          ");
+  out = space_int32(ctx_ptr, 5, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "     ");
+  out = space_int32(ctx_ptr, -5, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+
+  out = space_int64(ctx_ptr, 2, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "  ");
+  out = space_int64(ctx_ptr, 9, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "         ");
+  out = space_int64(ctx_ptr, 4, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "    ");
+  out = space_int64(ctx_ptr, -5, &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+}
+
 TEST(TestStringOps, TestIsSubstr) {
   EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "world", 5));
   EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "lo wo", 5));
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 775421afd2c..e8a3062d0bd 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -293,6 +293,8 @@ const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
                                        gdv_int32 in3_len, bool in3_validity,
                                        const char* in4, gdv_int32 in4_len,
                                        bool in4_validity, gdv_int32* out_len);
+const char* space_int32(gdv_int64 ctx, gdv_int32 n, int32_t* out_len);
+const char* space_int64(gdv_int64 ctx, gdv_int64 n, int32_t* out_len);
 const char* concat_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc
index 146af2010c6..e19d6712d57 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -715,4 +715,37 @@ TEST_F(TestUtf8, TestAscii) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
 }
 
+TEST_F(TestUtf8, TestSpace) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::int64());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto field_space = field("space", arrow::utf8());
+
+  // Build expression
+  auto space_expr = TreeExprBuilder::MakeExpression("space", {field0}, field_space);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {space_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array0 = MakeArrowArrayInt64({1, 0, -5, 2}, {true, true, true, true});
+  // expected output
+  auto exp_space = MakeArrowArrayUtf8({" ", "", "", "  "}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_space, outputs.at(0));
+}
+
 }  // namespace gandiva

From a3f778cd17cd82d50563127c00673b047e37a41a Mon Sep 17 00:00:00 2001
From: Anthony Louis <anthony@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:34:49 +0530
Subject: [PATCH 570/719] ARROW-13190: [C++] [Gandiva] Change behavior of
 INITCAP function

The current behavior of the INITCAP function is to turn the first character of each word uppercase and remains the other as is.

The desired behavior is to turn the first letter uppercase and the other lowercase. Any character except the [lowercase letters](https://www.compart.com/en/unicode/category/Ll), [uppercase letters](https://www.compart.com/en/unicode/category/Lu) and [decimal numbers](https://www.compart.com/en/unicode/category/Nd) ones should be considered as a word separator.

That behavior is based on these database systems:
    - [Oracle](https://docs.oracle.com/cd/B19306_01/server.102/b14200/functions065.htm)
    - [Postgres](https://w3resource.com/PostgreSQL/initcap-function.php)
    - [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_INITCAP.html)
    - [Splice Machine](https://doc.splicemachine.com/sqlref_builtinfcns_initcap.html)

Closes #10604 from anthonylouisbsb/fixbug/fix-initcap-behavior and squashes the following commits:

68a439925 <Anthony Louis> Change call to get_char_len
8e05abed2 <Anthony Louis> Add force inline option for MSVC compiler
9146c0146 <Anthony Louis> Remove GANDIVA_EXPORT for helper functions
ca0b0d018 <Anthony Louis> Add FORCE_INLINE in functions
1f4cfc7a9 <Anthony Louis> Add tests to modified letters
4a1a58463 <Anthony Louis> Add more tests for other characters groups
32a2c2dd0 <Anthony Louis> Fix java tests for function
4445e51f0 <Anthony Louis> Fix tests after changes in function
faa2169c0 <Anthony Louis> Change comments for is space
c98db7af6 <Anthony Louis> Change initcap function behavior

Authored-by: Anthony Louis <anthony@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/gdv_function_stubs.cc         | 66 ++++++++++---------
 cpp/src/gandiva/gdv_function_stubs.h          | 17 +++--
 cpp/src/gandiva/gdv_function_stubs_test.cc    | 36 ++++++++--
 .../gandiva/evaluator/ProjectorTest.java      |  8 +--
 4 files changed, 80 insertions(+), 47 deletions(-)

diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 99e60b015ec..5bf8da7e718 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -482,7 +482,7 @@ CAST_VARLEN_TYPE_FROM_NUMERIC(VARBINARY)
 #undef GDV_FN_CAST_VARCHAR_INTEGER
 #undef GDV_FN_CAST_VARCHAR_REAL
 
-GANDIVA_EXPORT
+GDV_FORCE_INLINE
 int32_t gdv_fn_utf8_char_length(char c) {
   if ((signed char)c >= 0) {  // 1-byte char (0x00 ~ 0x7F)
     return 1;
@@ -497,7 +497,7 @@ int32_t gdv_fn_utf8_char_length(char c) {
   return 0;
 }
 
-GANDIVA_EXPORT
+GDV_FORCE_INLINE
 void gdv_fn_set_error_for_invalid_utf8(int64_t execution_context, char val) {
   char const* fmt = "unexpected byte \\%02hhx encountered while decoding utf8 string";
   int size = static_cast<int>(strlen(fmt)) + 64;
@@ -651,24 +651,27 @@ const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_le
   return out;
 }
 
-// Checks if the character is a whitespace by its code point. To check the list
-// of the existent whitespaces characters in UTF8, take a look at this link
-// https://en.wikipedia.org/wiki/Whitespace_character#Unicode
+// Any codepoint, except the ones for lowercase letters, uppercase letters,
+// titlecase letters, decimal digits and letter numbers categories will be
+// considered as word separators.
 //
 // The Unicode characters also are divided between categories. This link
-// https://en.wikipedia.org/wiki/Unicode_character_property#General_Category shows
+// https://www.compart.com/en/unicode/category shows
 // more information about characters categories.
-GANDIVA_EXPORT
+GDV_FORCE_INLINE
 bool gdv_fn_is_codepoint_for_space(uint32_t val) {
   auto category = utf8proc_category(val);
 
-  return category == utf8proc_category_t::UTF8PROC_CATEGORY_ZS ||
-         category == utf8proc_category_t::UTF8PROC_CATEGORY_ZL ||
-         category == utf8proc_category_t::UTF8PROC_CATEGORY_ZP;
+  return category != utf8proc_category_t::UTF8PROC_CATEGORY_LU &&
+         category != utf8proc_category_t::UTF8PROC_CATEGORY_LL &&
+         category != utf8proc_category_t::UTF8PROC_CATEGORY_LT &&
+         category != utf8proc_category_t::UTF8PROC_CATEGORY_NL &&
+         category != utf8proc_category_t ::UTF8PROC_CATEGORY_ND;
 }
 
-// For a given text, initialize the first letter of each word, e.g:
-//     - "it is a text str" -> "It Is A Text Str"
+// For a given text, initialize the first letter after a word-separator and lowercase
+// the others e.g:
+//     - "IT is a tEXt str" -> "It Is A Text Str"
 GANDIVA_EXPORT
 const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
                                 int32_t* out_len) {
@@ -691,35 +694,38 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_
   int32_t out_char_len = 0;
   int32_t out_idx = 0;
   uint32_t char_codepoint;
+
+  // Any character is considered as space, except if it is alphanumeric
   bool last_char_was_space = true;
 
   for (int32_t i = 0; i < data_len; i += char_len) {
-    char_len = gdv_fn_utf8_char_length(data[i]);
-    // For single byte characters:
-    // If it is a lowercase ASCII character, set the output to its corresponding uppercase
-    // character; else, set the output to the read character
-    if (char_len == 1) {
+    // An optimization for single byte characters:
+    if (static_cast<signed char>(data[i]) >= 0) {  // 1-byte char (0x00 ~ 0x7F)
+      char_len = 1;
       char cur = data[i];
 
       if (cur >= 0x61 && cur <= 0x7a && last_char_was_space) {
-        // 'A' - 'Z' : 0x41 - 0x5a
-        // 'a' - 'z' : 0x61 - 0x7a
+        // Check if the character is the first one of the word and it is
+        // lowercase -> 'a' - 'z' : 0x61 - 0x7a.
+        // Then turn it into uppercase -> 'A' - 'Z' : 0x41 - 0x5a
         out[out_idx++] = static_cast<char>(cur - 0x20);
         last_char_was_space = false;
+      } else if (cur >= 0x41 && cur <= 0x5a && !last_char_was_space) {
+        out[out_idx++] = static_cast<char>(cur + 0x20);
       } else {
-        // Check if the ASCII character is one of these:
-        // - space : 0x20
-        // - character tabulation : 0x9
-        // - line feed : 0xA
-        // - line tabulation : 0xB
-        // - form feed : 0xC
-        // - carriage return : 0xD
-        last_char_was_space = cur <= 0x20;
+        // Check if the ASCII character is not an alphanumeric character:
+        // '0' - '9': 0x30 - 0x39
+        // 'a' - 'z' : 0x61 - 0x7a
+        // 'A' - 'Z' : 0x41 - 0x5a
+        last_char_was_space = (cur < 0x30) || (cur > 0x39 && cur < 0x41) ||
+                              (cur > 0x5a && cur < 0x61) || (cur > 0x7a);
         out[out_idx++] = cur;
       }
       continue;
     }
 
+    char_len = gdv_fn_utf8_char_length(data[i]);
+
     // Control reaches here when we encounter a multibyte character
     const auto* in_char = (const uint8_t*)(data + i);
 
@@ -738,18 +744,16 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_
 
     int32_t formatted_codepoint;
     if (last_char_was_space && !is_char_space) {
-      // Convert the encoded codepoint to its uppercase codepoint
       formatted_codepoint = utf8proc_toupper(char_codepoint);
     } else {
-      // Leave the codepoint as is
-      formatted_codepoint = char_codepoint;
+      formatted_codepoint = utf8proc_tolower(char_codepoint);
     }
 
     // UTF8Encode advances the pointer by the number of bytes present in the character
     auto* out_char = (uint8_t*)(out + out_idx);
     uint8_t* out_char_start = out_char;
 
-    // Encode the uppercase character
+    // Encode the character
     out_char = arrow::util::UTF8Encode(out_char, formatted_codepoint);
 
     out_char_len = static_cast<int32_t>(out_char - out_char_start);
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 9320bf6957d..1d95c82e3ca 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -43,6 +43,17 @@ using gdv_utf8 = char*;
 using gdv_binary = char*;
 using gdv_day_time_interval = int64_t;
 
+#ifdef GANDIVA_UNIT_TEST
+// unit tests may be compiled without O2, so inlining may not happen.
+#define GDV_FORCE_INLINE
+#else
+#ifdef _MSC_VER
+#define GDV_FORCE_INLINE __forceinline
+#else
+#define GDV_FORCE_INLINE inline __attribute__((always_inline))
+#endif
+#endif
+
 bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
                            const char* pattern, int pattern_len);
 
@@ -135,9 +146,6 @@ const char* gdv_fn_castVARCHAR_float64_int64(int64_t context, double value, int6
 GANDIVA_EXPORT
 int32_t gdv_fn_utf8_char_length(char c);
 
-GANDIVA_EXPORT
-void gdv_fn_set_error_for_invalid_utf8(int64_t execution_context, char val);
-
 GANDIVA_EXPORT
 const char* gdv_fn_upper_utf8(int64_t context, const char* data, int32_t data_len,
                               int32_t* out_len);
@@ -146,9 +154,6 @@ GANDIVA_EXPORT
 const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_len,
                               int32_t* out_len);
 
-GANDIVA_EXPORT
-bool gdv_fn_is_codepoint_for_space(uint32_t val);
-
 GANDIVA_EXPORT
 const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
                                 int32_t* out_len);
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 08e021eb55f..80e6379edab 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -546,20 +546,20 @@ TEST(TestGdvFnStubs, TestInitCap) {
   EXPECT_EQ(std::string(out_str, out_len), "Asdfj\nHlqf");
   EXPECT_FALSE(ctx.has_error());
 
-  out_str = gdv_fn_initcap_utf8(ctx_ptr, "s;DCgs,Jo!L", 11, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "S;DCgs,Jo!L");
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "s;DCgs,Jo!l", 11, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "S;Dcgs,Jo!L");
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_initcap_utf8(ctx_ptr, " mÜNCHEN", 9, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), " MÜNCHEN");
+  EXPECT_EQ(std::string(out_str, out_len), " München");
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_initcap_utf8(ctx_ptr, "citroën CaR", 12, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "Citroën CaR");
+  EXPECT_EQ(std::string(out_str, out_len), "Citroën Car");
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_initcap_utf8(ctx_ptr, "ÂbĆDËFgh\néll", 16, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "ÂbĆDËFgh\nÉll");
+  EXPECT_EQ(std::string(out_str, out_len), "Âbćdëfgh\nÉll");
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_initcap_utf8(ctx_ptr, "  øhpqršvñ  \n\n", 17, &out_len);
@@ -572,7 +572,31 @@ TEST(TestGdvFnStubs, TestInitCap) {
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_initcap_utf8(ctx_ptr, "{ÕHP,pqśv}Ń+", 15, &out_len);
-  EXPECT_EQ(std::string(out_str, out_len), "{ÕHP,pqśv}Ń+");
+  EXPECT_EQ(std::string(out_str, out_len), "{Õhp,Pqśv}Ń+");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "sɦasasdsɦsd\"sdsdɦ", 19, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Sɦasasdsɦsd\"Sdsdɦ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "mysuperscipt@number²isfine", 27, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Mysuperscipt@Number²Isfine");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "Ő<tŵas̓老ƕɱ¢vIYwށ", 25, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Ő<Tŵas̓老Ƕɱ¢Viywށ");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "ↆcheckↆnumberisspace", 24, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ↆcheckↆnumberisspace");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "testing ᾌTitleᾌcase", 23, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "Testing ᾌtitleᾄcase");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = gdv_fn_initcap_utf8(ctx_ptr, "ʳTesting mʳodified", 20, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "ʳTesting MʳOdified");
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_initcap_utf8(ctx_ptr, "", 0, &out_len);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 80d4281f4c2..7fa10a8fabd 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -2310,7 +2310,7 @@ public void testInitCap() throws Exception {
     byte[] validity = new byte[]{(byte) 15, 0};
     String[] valuesX = new String[]{
         "  øhpqršvñ  \n\n",
-        "möbelträgerfüße   \nmöbelträgerfüße",
+        "möbelträger1füße   \nmöbelträge'rfüße",
         "ÂbĆDËFgh\néll",
         "citroën CaR",
         "kjk"
@@ -2318,9 +2318,9 @@ public void testInitCap() throws Exception {
 
     String[] expected = new String[]{
         "  Øhpqršvñ  \n\n",
-        "Möbelträgerfüße   \nMöbelträgerfüße",
-        "ÂbĆDËFgh\nÉll",
-        "Citroën CaR",
+        "Möbelträger1füße   \nMöbelträge'Rfüße",
+        "Âbćdëfgh\nÉll",
+        "Citroën Car",
         null
     };
 

From 9dd00dfb356fac140efc6f5dd2062f1b9094e671 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:36:46 +0530
Subject: [PATCH 571/719] ARROW-13162: [C++][Gandiva] Add new alias for extract
 date functions in registry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new alias for extract date functions in registry to be compatible with some Hive SQL functions

Closes #10594 from jpedroantunes/feature/add-year-month-day-hour-minute-second and squashes the following commits:

8aea83bca <João Pedro> Fix lint errors
4297c3771 <João Pedro> Add new alias for extract date functions in registry

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_datetime.cc | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index 56c10bd706d..6e7a703aa61 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -21,13 +21,15 @@
 
 namespace gandiva {
 
-#define DATE_EXTRACTION_TRUNCATION_FNS(INNER, name)                              \
-  DATE_TYPES(INNER, name##Millennium, {}), DATE_TYPES(INNER, name##Century, {}), \
-      DATE_TYPES(INNER, name##Decade, {}), DATE_TYPES(INNER, name##Year, {}),    \
-      DATE_TYPES(INNER, name##Quarter, {}), DATE_TYPES(INNER, name##Month, {}),  \
-      DATE_TYPES(INNER, name##Week, {}), DATE_TYPES(INNER, name##Day, {}),       \
-      DATE_TYPES(INNER, name##Hour, {}), DATE_TYPES(INNER, name##Minute, {}),    \
-      DATE_TYPES(INNER, name##Second, {})
+#define DATE_EXTRACTION_TRUNCATION_FNS(INNER, name)                                    \
+  DATE_TYPES(INNER, name##Millennium, {}), DATE_TYPES(INNER, name##Century, {}),       \
+      DATE_TYPES(INNER, name##Decade, {}), DATE_TYPES(INNER, name##Year, {"year"}),    \
+      DATE_TYPES(INNER, name##Quarter, {}), DATE_TYPES(INNER, name##Month, {"month"}), \
+      DATE_TYPES(INNER, name##Week, ({"weekofyear", "yearweek"})),                     \
+      DATE_TYPES(INNER, name##Day, ({"day", "dayofmonth"})),                           \
+      DATE_TYPES(INNER, name##Hour, {"hour"}),                                         \
+      DATE_TYPES(INNER, name##Minute, {"minute"}),                                     \
+      DATE_TYPES(INNER, name##Second, {"second"})
 
 #define TO_TIMESTAMP_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                       \
   NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
@@ -37,10 +39,10 @@ namespace gandiva {
   NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
                  time32(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
 
-#define TIME_EXTRACTION_FNS(name)                              \
-  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {}),       \
-      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {}), \
-      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {})
+#define TIME_EXTRACTION_FNS(name)                                      \
+  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {"hour"}),         \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {"minute"}), \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {"second"})
 
 std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
   static std::vector<NativeFunction> date_time_fn_registry_ = {

From 8dc24573b0b936151db697e5f8e67559d355b391 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:39:01 +0530
Subject: [PATCH 572/719] ARROW-13281: [C++][Gandiva] Correct error on
 timestampDiffMonth function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TIMESTAMPDIFF function appears to return incorrect values when a negative number should be returned.

Example:
- For the inputs TIMESTAMPDIFFMONTH("2019-06-30", "2019-03-31") it should return **-3**, but it actually returns **-1**
- For the inputs TIMESTAMPDIFFMONTH("2019-06-30", "2019-05-31") it should return **-1**, but it actually returns **1**

Closes #10674 from jpedroantunes/bugfix/timestamp-diff-month and squashes the following commits:

576be4dbf <João Pedro> Change add order on timestamp diff function
77aa72a8d <João Pedro> Fix bug on timestampdiff month function and add unit tests

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/precompiled/time_test.cc      | 30 +++++++++++++++++++
 .../precompiled/timestamp_arithmetic.cc       |  4 +--
 cpp/src/gandiva/precompiled/types.h           |  2 ++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 4a5ba5b1627..8d3cdccd6ff 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -181,6 +181,36 @@ TEST(TestTime, TestExtractTime) {
   EXPECT_EQ(extractSecond_time32(time_as_millis_in_day), 33);
 }
 
+TEST(TestTime, TestTimestampDiffMonth) {
+  gdv_timestamp ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  gdv_timestamp ts2 = StringToTimestamp("2019-05-31 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -1);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-02-28 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -4);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-03-31 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -3);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-06-30 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-07-31 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-07-30 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1);
+
+  ts1 = StringToTimestamp("2019-06-30 00:00:00");
+  ts2 = StringToTimestamp("2019-07-29 00:00:00");
+  EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0);
+}
+
 TEST(TestTime, TestExtractTimestamp) {
   gdv_timestamp ts = StringToTimestamp("1970-05-02 10:20:33");
 
diff --git a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
index cdf9139fe2e..c17c04cc065 100644
--- a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
+++ b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
@@ -95,9 +95,9 @@ extern "C" {
     }                                                                                 \
     if (end_tm.TmMday() < start_tm.TmMday()) {                                        \
       /* case b */                                                                    \
+      months_diff += (is_last_day_of_month(end_tm) ? 1 : 0);                          \
       diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS);                           \
-      return SIGN_ADJUST_DIFF(is_positive, diff) +                                    \
-             (is_last_day_of_month(end_tm) ? 1 : 0);                                  \
+      return SIGN_ADJUST_DIFF(is_positive, diff);                                     \
     }                                                                                 \
     gdv_int32 end_day_millis =                                                        \
         static_cast<gdv_int32>(end_tm.TmHour() * MILLIS_IN_HOUR +                     \
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index e8a3062d0bd..4e913aaac67 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -76,6 +76,8 @@ gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed);
 gdv_int64 hash64(double val, gdv_int64 seed);
 gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed);
 
+gdv_int32 timestampdiffMonth_timestamp_timestamp(gdv_timestamp, gdv_timestamp);
+
 gdv_int64 timestampaddSecond_int32_timestamp(gdv_int32, gdv_timestamp);
 gdv_int64 timestampaddMinute_int32_timestamp(gdv_int32, gdv_timestamp);
 gdv_int64 timestampaddHour_int32_timestamp(gdv_int32, gdv_timestamp);

From e42b7ed3e4e851bf48970e7c4631854e8d5fc79d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Thu, 15 Jul 2021 21:40:03 +0530
Subject: [PATCH 573/719] ARROW-13217: [C++][Gandiva] Correct error on convert
 replace function for initial invalid bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CONVERT_REPLACE Gandiva function is not working properly for invalid chars on the beginning of the string (e.g. "\xa0\xa1-valid" should be  "-valid" considering an empty replacement char. But it is not replacing correctly).

Closes #10625 from jpedroantunes/bugfix/convert-replace-initial-invalid-chars and squashes the following commits:

940fbf095 <João Pedro> Correct error on convert replace function for initial invalid bytes

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/precompiled/string_ops.cc      |  2 +-
 cpp/src/gandiva/precompiled/string_ops_test.cc | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index b98a415c0ff..751b29d49c6 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1397,7 +1397,7 @@ const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char*
     valid_bytes_to_cpy += char_len;
   }
   // if invalid chars were not found, return the original string
-  if (out_byte_counter == 0) return text_in;
+  if (out_byte_counter == 0 && in_byte_counter == 0) return text_in;
   // if there are still valid bytes to copy, do it
   if (valid_bytes_to_cpy != 0) {
     memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 067f3dbae72..020e380b584 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -217,6 +217,22 @@ TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
   EXPECT_EQ(std::string(g_str, g_in_out_len), "-ok--valid-");
   EXPECT_FALSE(ctx.has_error());
   ctx.Reset();
+
+  std::string h("\xa0\xa1-valid");
+  auto h_in_out_len = static_cast<int>(h.length());
+  const char* h_str = convert_replace_invalid_fromUTF8_binary(
+      ctx_ptr, h.data(), h_in_out_len, "", 0, &h_in_out_len);
+  EXPECT_EQ(std::string(h_str, h_in_out_len), "-valid");
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  std::string i("\xa0\xa1-valid-\xa0\xa1-valid-\xa0\xa1");
+  auto i_in_out_len = static_cast<int>(i.length());
+  const char* i_str = convert_replace_invalid_fromUTF8_binary(
+      ctx_ptr, i.data(), i_in_out_len, "", 0, &i_in_out_len);
+  EXPECT_EQ(std::string(i_str, i_in_out_len), "-valid--valid-");
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
 }
 
 TEST(TestStringOps, TestCastBoolToVarchar) {

From c8444618bce148230fa8709cdcfd746263259aaf Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 15 Jul 2021 14:06:43 -0400
Subject: [PATCH 574/719] ARROW-13280: [R] Bindings for log and trig functions

Closes #10689 from thisisnic/ARROW-13280-log

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/dplyr-functions.R                 |  19 +++++
 r/R/expression.R                      |  10 ++-
 r/tests/testthat/helper-expectation.R |   6 +-
 r/tests/testthat/test-dplyr.R         | 109 ++++++++++++++++++++++++++
 4 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 5ddd6968972..61046d5e829 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -559,3 +559,22 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption
   Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
 
 }
+
+nse_funcs$log <- function(x, base = exp(1)) {
+  
+  if (base == exp(1)) {
+    return(Expression$create("ln_checked", x))
+  }
+  
+  if (base == 2) {
+    return(Expression$create("log2_checked", x))
+  }
+  
+  if (base == 10) {
+    return(Expression$create("log10_checked", x))
+  } 
+  # ARROW-13345
+  stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
+}
+
+nse_funcs$logb <- nse_funcs$log
diff --git a/r/R/expression.R b/r/R/expression.R
index be80c9db969..16915427292 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -43,7 +43,15 @@
   "yday" = "day_of_year",
   "hour" = "hour",
   # second is defined in dplyr-functions.R
-  "minute" = "minute"
+  "minute" = "minute",
+  "log10" = "log10_checked",
+  "log2" = "log2_checked",
+  "log1p" = "log1p_checked",
+  "sin" = "sin_checked",
+  "cos" = "cos_checked",
+  "tan" = "tan_checked",
+  "asin" = "asin_checked",
+  "acos" = "acos_checked"
 )
 
 .binary_function_map <- list(
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index d173620398e..359e31ef57d 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -123,7 +123,7 @@ expect_dplyr_equal <- function(expr,
   }
 
   if (!is.null(skip_msg)) {
-    skip(paste(skip_msg, collpase = "\n"))
+    skip(paste(skip_msg, collapse = "\n"))
   }
 }
 
@@ -212,7 +212,7 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
   }
 
   if (!is.null(skip_msg)) {
-    skip(paste(skip_msg, collpase = "\n"))
+    skip(paste(skip_msg, collapse = "\n"))
   }
 }
 
@@ -273,7 +273,7 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
   }
 
   if (!is.null(skip_msg)) {
-    skip(paste(skip_msg, collpase = "\n"))
+    skip(paste(skip_msg, collapse = "\n"))
   }
 }
 
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 459c5ebc441..63d0433fc23 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -945,3 +945,112 @@ test_that("abs()", {
     df
   )
 })
+
+test_that("log functions", {
+  
+  df <- tibble(x = c(1:10, NA, NA))
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x, base = exp(1))) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x, base = 2)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log(x, base = 10)) %>%
+      collect(),
+    df
+  )
+  
+  expect_error(
+    nse_funcs$log(Expression$scalar(x), base = 5),
+    "`base` values other than exp(1), 2 and 10 not supported in Arrow",
+    fixed = TRUE
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = logb(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log1p(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log2(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = log10(x)) %>%
+      collect(),
+    df
+  )
+
+})
+  
+test_that("trig functions", {
+  
+  df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = sin(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = cos(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = tan(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = asin(x)) %>%
+      collect(),
+    df
+  )
+  
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = acos(x)) %>%
+      collect(),
+    df
+  )
+
+})
\ No newline at end of file

From d55383dc9653c928bb7a8465c03867cc2c4ee62f Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Thu, 15 Jul 2021 14:08:35 -0400
Subject: [PATCH 575/719] ARROW-12992:  [R] bindings for substr(), substring(),
 str_sub()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10624 from pachadotdev/arrow12992_str_sub

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Mauricio Vargas <mavargas11@uc.cl>
Co-authored-by: Nic <thisisnic@gmail.com>
Co-authored-by: Pachá <mvargas@dcc.uchile.cl>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/dplyr-functions.R                         | 103 ++++++++--
 r/R/expression.R                              |   3 +
 r/src/compute.cpp                             |  17 ++
 .../testthat/test-dplyr-string-functions.R    | 192 ++++++++++++++++--
 4 files changed, 283 insertions(+), 32 deletions(-)

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 61046d5e829..35db573550d 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -22,7 +22,7 @@ NULL
 # This environment is an internal cache for things including data mask functions
 # We'll populate it at package load time.
 .cache <- NULL
-init_env <- function () {
+init_env <- function() {
   .cache <<- new.env(hash = TRUE)
 }
 init_env()
@@ -150,16 +150,20 @@ nse_funcs$is.character <- function(x) {
   x$type_id() %in% Type[c("STRING", "LARGE_STRING")]
 }
 nse_funcs$is.numeric <- function(x) {
-  x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
-                          "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
-                          "DECIMAL", "DECIMAL256")]
+  x$type_id() %in% Type[c(
+    "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+    "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
+    "DECIMAL", "DECIMAL256"
+  )]
 }
 nse_funcs$is.double <- function(x) {
   x$type_id() == Type["DOUBLE"]
 }
 nse_funcs$is.integer <- function(x) {
-  x$type_id() %in% Type[c("UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
-                          "UINT64", "INT64")]
+  x$type_id() %in% Type[c(
+    "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+    "UINT64", "INT64"
+  )]
 }
 nse_funcs$is.integer64 <- function(x) {
   x$type_id() == Type["INT64"]
@@ -280,6 +284,78 @@ nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
   Expression$create(trim_fun, string)
 }
 
+nse_funcs$substr <- function(x, start, stop) {
+  assert_that(
+    length(start) == 1,
+    msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+  assert_that(
+    length(stop) == 1,
+    msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  # substr treats values as if they're on a continous number line, so values 
+  # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics 
+  # this behavior
+  if (start <= 0) {
+    start <- 1
+  }
+
+  # if `stop` is lower than `start`, this is invalid, so set `stop` to
+  # 0 so that an empty string will be returned (consistent with base::substr())
+  if (stop < start) {
+    stop <- 0
+  }
+
+  Expression$create(
+    "utf8_slice_codeunits",
+    x,
+    # we don't need to subtract 1 from `stop` as C++ counts exclusively 
+    # which effectively cancels out the difference in indexing between R & C++
+    options = list(start = start - 1L, stop = stop)
+  )
+}
+
+nse_funcs$substring <- function(text, first, last){
+  nse_funcs$substr(x = text, start = first, stop = last)
+}
+
+nse_funcs$str_sub <- function(string, start = 1L, end = -1L) {
+  assert_that(
+    length(start) == 1,
+    msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+  assert_that(
+    length(end) == 1,
+    msg = "`end` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  # In stringr::str_sub, an `end` value of -1 means the end of the string, so
+  # set it to the maximum integer to match this behavior
+  if (end == -1) {
+    end <- .Machine$integer.max
+  }
+
+  # An end value lower than a start value returns an empty string in 
+  # stringr::str_sub so set end to 0 here to match this behavior
+  if (end < start) {
+    end <- 0
+  }
+
+  # subtract 1 from `start` because C++ is 0-based and R is 1-based
+  # str_sub treats a `start` value of 0 or 1 as the same thing so don't subtract 1 when `start` == 0 
+  # when `start` < 0, both str_sub and utf8_slice_codeunits count backwards from the end
+  if (start > 0) {
+    start <- start - 1L
+  }
+
+  Expression$create(
+    "utf8_slice_codeunits",
+    string,
+    options = list(start = start, stop = end)
+  )
+}
+
 nse_funcs$grepl <- function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
   arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex")
   Expression$create(
@@ -390,8 +466,7 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
     arrow_fun,
     string,
     options = list(
-      pattern =
-      opts$pattern,
+      pattern = opts$pattern,
       reverse = FALSE,
       max_splits = n - 1L
     )
@@ -415,19 +490,18 @@ nse_funcs$pmax <- function(..., na.rm = FALSE) {
 }
 
 nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") {
-  
   assert_that(is_integerish(width))
   side <- match.arg(side)
   assert_that(is.string(pad))
-  
+
   if (side == "left") {
-    pad_func = "utf8_lpad"
+    pad_func <- "utf8_lpad"
   } else if (side == "right") {
-    pad_func = "utf8_rpad"
+    pad_func <- "utf8_rpad"
   } else if (side == "both") {
-    pad_func = "utf8_center"
+    pad_func <- "utf8_center"
   }
-  
+
   Expression$create(
     pad_func,
     string,
@@ -557,7 +631,6 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption
   }
 
   Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
-
 }
 
 nse_funcs$log <- function(x, base = exp(1)) {
diff --git a/r/R/expression.R b/r/R/expression.R
index 16915427292..b3fc9fe20c7 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -33,6 +33,9 @@
   # str_pad is defined in dplyr-functions.R
   "stri_reverse" = "utf8_reverse",
   # str_trim is defined in dplyr-functions.R
+  # str_sub is defined in dplyr-functions.R
+  # substr is defined in dplyr-functions.R
+  # substring is defined in dplyr-functions.R
   "year" = "year",
   "isoyear" = "iso_year",
   "quarter" = "quarter",
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 6bb55cbe208..2c5ee77c8d0 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -316,6 +316,23 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return std::make_shared<Options>(max_splits, reverse);
   }
 
+  if (func_name == "utf8_slice_codeunits") {
+    using Options = arrow::compute::SliceOptions;
+
+    int64_t step = 1;
+    if (!Rf_isNull(options["step"])) {
+      step = cpp11::as_cpp<int64_t>(options["step"]);
+    }
+
+    int64_t stop = std::numeric_limits<int32_t>::max();
+    if (!Rf_isNull(options["stop"])) {
+      stop = cpp11::as_cpp<int64_t>(options["stop"]);
+    }
+
+    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["start"]), stop,
+                                     step);
+  }
+
   if (func_name == "variance" || func_name == "stddev") {
     using Options = arrow::compute::VarianceOptions;
     return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["ddof"]));
diff --git a/r/tests/testthat/test-dplyr-string-functions.R b/r/tests/testthat/test-dplyr-string-functions.R
index cd01acf024d..b6b8f5a714a 100644
--- a/r/tests/testthat/test-dplyr-string-functions.R
+++ b/r/tests/testthat/test-dplyr-string-functions.R
@@ -211,7 +211,6 @@ test_that("grepl", {
   df <- tibble(x = c("Foo", "bar"))
 
   for (fixed in c(TRUE, FALSE)) {
-
     expect_dplyr_equal(
       input %>%
         filter(grepl("Foo", x, fixed = fixed)) %>%
@@ -230,9 +229,7 @@ test_that("grepl", {
         collect(),
       df
     )
-
   }
-
 })
 
 test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
@@ -254,7 +251,6 @@ test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
       collect(),
     tibble(x = character(0))
   )
-
 })
 
 test_that("str_detect", {
@@ -302,14 +298,12 @@ test_that("str_detect", {
       collect(),
     df
   )
-
 })
 
 test_that("sub and gsub", {
   df <- tibble(x = c("Foo", "bar"))
 
   for (fixed in c(TRUE, FALSE)) {
-
     expect_dplyr_equal(
       input %>%
         transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>%
@@ -328,7 +322,6 @@ test_that("sub and gsub", {
         collect(),
       df
     )
-
   }
 })
 
@@ -358,7 +351,6 @@ test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
       collect(),
     df # unchanged
   )
-
 })
 
 test_that("str_replace and str_replace_all", {
@@ -409,11 +401,9 @@ test_that("str_replace and str_replace_all", {
       collect(),
     df
   )
-
 })
 
 test_that("strsplit and str_split", {
-
   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
 
   expect_dplyr_equal(
@@ -565,7 +555,6 @@ test_that("errors and warnings in string detection and replacement", {
     nse_funcs$str_replace_all(x, regex("o", multiline = TRUE), "u"),
     "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
   )
-
 })
 
 test_that("backreferences in pattern in string detection", {
@@ -589,8 +578,7 @@ test_that("backreferences (substitutions) in string replacement", {
         "(?:https?|ftp)://([^/\r\n]+)(/[^\r\n]*)?",
         "path `\\2` on server `\\1`",
         url
-        )
-      ) %>%
+      )) %>%
       collect(),
     tibble(url = "https://arrow.apache.org/docs/r/")
   )
@@ -652,6 +640,7 @@ test_that("edge cases in string detection and replacement", {
 })
 
 test_that("strptime", {
+
   # base::strptime() defaults to local timezone
   # but arrow's strptime defaults to UTC.
   # So that tests are consistent, set the local timezone to UTC
@@ -726,12 +715,11 @@ test_that("errors in strptime", {
   x <- Expression$field_ref("x")
   expect_error(
     nse_funcs$strptime(x, tz = "PDT"),
-    'Time zone argument not supported by Arrow'
+    "Time zone argument not supported by Arrow"
   )
 })
 
 test_that("arrow_find_substring and arrow_find_substring_regex", {
-
   df <- tibble(x = c("Foo and Bar", "baz and qux and quux"))
 
   expect_equivalent(
@@ -774,7 +762,6 @@ test_that("arrow_find_substring and arrow_find_substring_regex", {
 })
 
 test_that("stri_reverse and arrow_ascii_reverse functions", {
-  
   df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
 
   df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
@@ -811,7 +798,6 @@ test_that("stri_reverse and arrow_ascii_reverse functions", {
 })
 
 test_that("str_like", {
-
   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
 
   # TODO: After new version of stringr with str_like has been released, update all
@@ -909,5 +895,177 @@ test_that("str_pad", {
       collect(),
     df
   )
+})
+
+test_that("substr", {
+  df <- tibble(x = "Apache Arrow")
 
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 0, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, -1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 6, 1)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, -1, -2)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 9, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, 8, 12)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substr(x, -5, -1)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$substr("Apache Arrow", c(1, 2), 3),
+    "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  expect_error(
+    nse_funcs$substr("Apache Arrow", 1, c(2, 3)),
+    "`stop` must be length 1 - other lengths are not supported in Arrow"
+  )
+})
+
+test_that("substring", {
+  # nse_funcs$substring just calls nse_funcs$substr, tested extensively above
+  df <- tibble(x = "Apache Arrow")
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = substring(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("str_sub", {
+  df <- tibble(x = "Apache Arrow")
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 0, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 6, 1)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -1, -2)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -1, 3)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 9, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, 8, 12)) %>%
+      collect(),
+    df
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = str_sub(x, -5, -1)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$str_sub("Apache Arrow", c(1, 2), 3),
+    "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  expect_error(
+    nse_funcs$str_sub("Apache Arrow", 1, c(2, 3)),
+    "`end` must be length 1 - other lengths are not supported in Arrow"
+  )
 })

From dbeed527dff79f3fa4576722a821fb7af3abd878 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 15 Jul 2021 15:05:55 -0400
Subject: [PATCH 576/719] ARROW-13064: [C++] Implement select ('case when')
 function for fixed-width types

This doesn't support variable-width types (e.g. strings) as the implementation here is columnwise. I will work on those separately (they require a rowwise implementation).

Also fixes a small bug in the CommonNumericType implementation (I noticed uint8 was getting promoted to int8).

Closes #10557 from lidavidm/arrow-13064

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   8 +
 cpp/src/arrow/compute/api_scalar.h            |  17 +
 cpp/src/arrow/compute/kernel.cc               |  23 +-
 cpp/src/arrow/compute/kernel.h                |   6 +-
 cpp/src/arrow/compute/kernel_test.cc          |  31 +-
 .../arrow/compute/kernels/codegen_internal.cc |  20 +-
 .../arrow/compute/kernels/codegen_internal.h  |   3 +
 .../arrow/compute/kernels/scalar_if_else.cc   | 401 +++++++++++++++++-
 .../kernels/scalar_if_else_benchmark.cc       |  99 ++++-
 .../compute/kernels/scalar_if_else_test.cc    | 381 ++++++++++++++++-
 cpp/src/arrow/compute/kernels/test_util.cc    |  33 +-
 cpp/src/arrow/compute/kernels/test_util.h     |   3 +-
 docs/source/cpp/compute.rst                   |  73 ++--
 docs/source/python/api/compute.rst            |   1 +
 14 files changed, 992 insertions(+), 107 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index be6498a74c6..68df5f98b10 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -466,6 +466,14 @@ Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa
   return CallFunction("if_else", {cond, if_true, if_false}, ctx);
 }
 
+Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
+                       ExecContext* ctx) {
+  std::vector<Datum> args = {cond};
+  args.reserve(cases.size() + 1);
+  args.insert(args.end(), cases.begin(), cases.end());
+  return CallFunction("case_when", args, ctx);
+}
+
 // ----------------------------------------------------------------------
 // Temporal functions
 
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index f0aebc8e032..bbaa4d13a21 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -741,6 +741,23 @@ ARROW_EXPORT
 Result<Datum> IfElse(const Datum& cond, const Datum& left, const Datum& right,
                      ExecContext* ctx = NULLPTR);
 
+/// \brief CaseWhen behaves like a switch/case or if-else if-else statement: for
+/// each row, select the first value for which the corresponding condition is
+/// true, or (if given) select the 'else' value, else emit null. Note that a
+/// null condition is the same as false.
+///
+/// \param[in] cond Conditions (Boolean)
+/// \param[in] cases Values (any type), along with an optional 'else' value.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> CaseWhen(const Datum& cond, const std::vector<Datum>& cases,
+                       ExecContext* ctx = NULLPTR);
+
 /// \brief Year returns year for each element of `values`
 ///
 /// \param[in] values input to extract year from
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 6cdd17adcc9..f131f524d2e 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -402,8 +402,7 @@ KernelSignature::KernelSignature(std::vector<InputType> in_types, OutputType out
       out_type_(std::move(out_type)),
       is_varargs_(is_varargs),
       hash_code_(0) {
-  // VarArgs sigs must have only a single input type to use for argument validation
-  DCHECK(!is_varargs || (is_varargs && (in_types_.size() == 1)));
+  DCHECK(!is_varargs || (is_varargs && (in_types_.size() >= 1)));
 }
 
 std::shared_ptr<KernelSignature> KernelSignature::Make(std::vector<InputType> in_types,
@@ -430,8 +429,8 @@ bool KernelSignature::Equals(const KernelSignature& other) const {
 
 bool KernelSignature::MatchesInputs(const std::vector<ValueDescr>& args) const {
   if (is_varargs_) {
-    for (const auto& arg : args) {
-      if (!in_types_[0].Matches(arg)) {
+    for (size_t i = 0; i < args.size(); ++i) {
+      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(args[i])) {
         return false;
       }
     }
@@ -464,15 +463,19 @@ std::string KernelSignature::ToString() const {
   std::stringstream ss;
 
   if (is_varargs_) {
-    ss << "varargs[" << in_types_[0].ToString() << "]";
+    ss << "varargs[";
   } else {
     ss << "(";
-    for (size_t i = 0; i < in_types_.size(); ++i) {
-      if (i > 0) {
-        ss << ", ";
-      }
-      ss << in_types_[i].ToString();
+  }
+  for (size_t i = 0; i < in_types_.size(); ++i) {
+    if (i > 0) {
+      ss << ", ";
     }
+    ss << in_types_[i].ToString();
+  }
+  if (is_varargs_) {
+    ss << "]";
+  } else {
     ss << ")";
   }
   ss << " -> " << out_type_.ToString();
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 50b1dd8e55e..36d20c7289e 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -366,8 +366,10 @@ class ARROW_EXPORT OutputType {
 
 /// \brief Holds the input types and output type of the kernel.
 ///
-/// VarArgs functions should pass a single input type to be used to validate
-/// the input types of a function invocation.
+/// VarArgs functions with minimum N arguments should pass up to N input types to be
+/// used to validate the input types of a function invocation. The first N-1 types
+/// will be matched against the first N-1 arguments, and the last type will be
+/// matched against the remaining arguments.
 class ARROW_EXPORT KernelSignature {
  public:
   KernelSignature(std::vector<InputType> in_types, OutputType out_type,
diff --git a/cpp/src/arrow/compute/kernel_test.cc b/cpp/src/arrow/compute/kernel_test.cc
index a5ef9d44e18..a63c42d4fde 100644
--- a/cpp/src/arrow/compute/kernel_test.cc
+++ b/cpp/src/arrow/compute/kernel_test.cc
@@ -468,15 +468,28 @@ TEST(KernelSignature, MatchesInputs) {
 }
 
 TEST(KernelSignature, VarArgsMatchesInputs) {
-  KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
-
-  std::vector<ValueDescr> args = {int8()};
-  ASSERT_TRUE(sig.MatchesInputs(args));
-  args.push_back(ValueDescr::Scalar(int8()));
-  args.push_back(ValueDescr::Array(int8()));
-  ASSERT_TRUE(sig.MatchesInputs(args));
-  args.push_back(int32());
-  ASSERT_FALSE(sig.MatchesInputs(args));
+  {
+    KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
+
+    std::vector<ValueDescr> args = {int8()};
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(ValueDescr::Scalar(int8()));
+    args.push_back(ValueDescr::Array(int8()));
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(int32());
+    ASSERT_FALSE(sig.MatchesInputs(args));
+  }
+  {
+    KernelSignature sig({int8(), utf8()}, utf8(), /*is_varargs=*/true);
+
+    std::vector<ValueDescr> args = {int8()};
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(ValueDescr::Scalar(utf8()));
+    args.push_back(ValueDescr::Array(utf8()));
+    ASSERT_TRUE(sig.MatchesInputs(args));
+    args.push_back(int32());
+    ASSERT_FALSE(sig.MatchesInputs(args));
+  }
 }
 
 TEST(KernelSignature, ToString) {
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index e723bd7838e..673db088eae 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -218,9 +218,14 @@ void ReplaceTypes(const std::shared_ptr<DataType>& type,
 }
 
 std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
-  DCHECK(!descrs.empty()) << "tried to find CommonNumeric type of an empty set";
+  return CommonNumeric(descrs.data(), descrs.size());
+}
 
-  for (const auto& descr : descrs) {
+std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
+  DCHECK_GT(count, 0) << "tried to find CommonNumeric type of an empty set";
+
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     auto id = descr.type->id();
     if (!is_floating(id) && !is_integer(id)) {
       // a common numeric type is only possible if all types are numeric
@@ -232,17 +237,20 @@ std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
     }
   }
 
-  for (const auto& descr : descrs) {
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     if (descr.type->id() == Type::DOUBLE) return float64();
   }
 
-  for (const auto& descr : descrs) {
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     if (descr.type->id() == Type::FLOAT) return float32();
   }
 
   int max_width_signed = 0, max_width_unsigned = 0;
 
-  for (const auto& descr : descrs) {
+  for (size_t i = 0; i < count; i++) {
+    const auto& descr = *(begin + i);
     auto id = descr.type->id();
     auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned);
     *max_width = std::max(bit_width(id), *max_width);
@@ -253,7 +261,7 @@ std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
     if (max_width_unsigned == 32) return uint32();
     if (max_width_unsigned == 16) return uint16();
     DCHECK_EQ(max_width_unsigned, 8);
-    return int8();
+    return uint8();
   }
 
   if (max_width_signed <= max_width_unsigned) {
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 12e80423f7f..d28ede4f77a 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -1367,6 +1367,9 @@ void ReplaceTypes(const std::shared_ptr<DataType>&, std::vector<ValueDescr>* des
 ARROW_EXPORT
 std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs);
 
+ARROW_EXPORT
+std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count);
+
 ARROW_EXPORT
 std::shared_ptr<DataType> CommonTimestamp(const std::vector<ValueDescr>& descrs);
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 54e0725fce7..32307542d97 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -30,6 +30,7 @@ using internal::Bitmap;
 using internal::BitmapWordReader;
 
 namespace compute {
+namespace internal {
 
 namespace {
 
@@ -676,7 +677,353 @@ void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_fun
   }
 }
 
-}  // namespace
+// Helper to copy or broadcast fixed-width values between buffers.
+template <typename Type, typename Enable = void>
+struct CopyFixedWidth {};
+template <>
+struct CopyFixedWidth<BooleanType> {
+  static void CopyScalar(const Scalar& scalar, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const bool value = UnboxScalar<BooleanType>::Unbox(scalar);
+    BitUtil::SetBitsTo(raw_out_values, out_offset, length, value);
+  }
+  static void CopyArray(const DataType&, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    arrow::internal::CopyBitmap(in_values, in_offset, length, raw_out_values, out_offset);
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_number<Type>> {
+  using CType = typename TypeTraits<Type>::CType;
+  static void CopyScalar(const Scalar& scalar, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    CType* out_values = reinterpret_cast<CType*>(raw_out_values);
+    const CType value = UnboxScalar<Type>::Unbox(scalar);
+    std::fill(out_values + out_offset, out_values + out_offset + length, value);
+  }
+  static void CopyArray(const DataType&, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    std::memcpy(raw_out_values + out_offset * sizeof(CType),
+                in_values + in_offset * sizeof(CType), length * sizeof(CType));
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_same<Type, FixedSizeBinaryType>> {
+  static void CopyScalar(const Scalar& values, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width =
+        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(values);
+    // Scalar may have null value buffer
+    if (!scalar.value) return;
+    DCHECK_EQ(scalar.value->size(), width);
+    for (int i = 0; i < length; i++) {
+      std::memcpy(next, scalar.value->data(), width);
+      next += width;
+    }
+  }
+  static void CopyArray(const DataType& type, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    std::memcpy(next, in_values + in_offset * width, length * width);
+  }
+};
+template <typename Type>
+struct CopyFixedWidth<Type, enable_if_decimal<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static void CopyScalar(const Scalar& values, const int64_t length,
+                         uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width =
+        checked_cast<const FixedSizeBinaryType&>(*values.type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    const auto& scalar = checked_cast<const ScalarType&>(values);
+    const auto value = scalar.value.ToBytes();
+    for (int i = 0; i < length; i++) {
+      std::memcpy(next, value.data(), width);
+      next += width;
+    }
+  }
+  static void CopyArray(const DataType& type, const uint8_t* in_values,
+                        const int64_t in_offset, const int64_t length,
+                        uint8_t* raw_out_values, const int64_t out_offset) {
+    const int32_t width = checked_cast<const FixedSizeBinaryType&>(type).byte_width();
+    uint8_t* next = raw_out_values + (width * out_offset);
+    std::memcpy(next, in_values + in_offset * width, length * width);
+  }
+};
+// Copy fixed-width values from a scalar/array datum into an output values buffer
+template <typename Type>
+void CopyValues(const Datum& in_values, const int64_t in_offset, const int64_t length,
+                uint8_t* out_valid, uint8_t* out_values, const int64_t out_offset) {
+  if (in_values.is_scalar()) {
+    const auto& scalar = *in_values.scalar();
+    if (out_valid) {
+      BitUtil::SetBitsTo(out_valid, out_offset, length, scalar.is_valid);
+    }
+    CopyFixedWidth<Type>::CopyScalar(scalar, length, out_values, out_offset);
+  } else {
+    const ArrayData& array = *in_values.array();
+    if (out_valid) {
+      if (array.MayHaveNulls()) {
+        if (length == 1) {
+          // CopyBitmap is slow for short runs
+          BitUtil::SetBitTo(
+              out_valid, out_offset,
+              BitUtil::GetBit(array.buffers[0]->data(), array.offset + in_offset));
+        } else {
+          arrow::internal::CopyBitmap(array.buffers[0]->data(), array.offset + in_offset,
+                                      length, out_valid, out_offset);
+        }
+      } else {
+        BitUtil::SetBitsTo(out_valid, out_offset, length, true);
+      }
+    }
+    CopyFixedWidth<Type>::CopyArray(*array.type, array.buffers[1]->data(),
+                                    array.offset + in_offset, length, out_values,
+                                    out_offset);
+  }
+}
+
+struct CaseWhenFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    // The first function is a struct of booleans, where the number of fields in the
+    // struct is either equal to the number of other arguments or is one less.
+    RETURN_NOT_OK(CheckArity(*values));
+    EnsureDictionaryDecoded(values);
+    auto first_type = (*values)[0].type;
+    if (first_type->id() != Type::STRUCT) {
+      return Status::TypeError("case_when: first argument must be STRUCT, not ",
+                               *first_type);
+    }
+    auto num_fields = static_cast<size_t>(first_type->num_fields());
+    if (num_fields < values->size() - 2 || num_fields >= values->size()) {
+      return Status::Invalid(
+          "case_when: number of struct fields must be equal to or one less than count of "
+          "remaining arguments (",
+          values->size() - 1, "), got: ", first_type->num_fields());
+    }
+    for (const auto& field : first_type->fields()) {
+      if (field->type()->id() != Type::BOOL) {
+        return Status::TypeError(
+            "case_when: all fields of first argument must be BOOL, but ", field->name(),
+            " was of type: ", *field->type());
+      }
+    }
+
+    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
+      for (auto it = values->begin() + 1; it != values->end(); it++) {
+        it->type = type;
+      }
+    }
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// Implement a 'case when' (SQL)/'select' (NumPy) function for any scalar conditions
+template <typename Type>
+Status ExecScalarCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& conds = checked_cast<const StructScalar&>(*batch.values[0].scalar());
+  if (!conds.is_valid) {
+    return Status::Invalid("cond struct must not be null");
+  }
+  Datum result;
+  for (size_t i = 0; i < batch.values.size() - 1; i++) {
+    if (i < conds.value.size()) {
+      const Scalar& cond = *conds.value[i];
+      if (cond.is_valid && internal::UnboxScalar<BooleanType>::Unbox(cond)) {
+        result = batch[i + 1];
+        break;
+      }
+    } else {
+      // ELSE clause
+      result = batch[i + 1];
+      break;
+    }
+  }
+  if (out->is_scalar()) {
+    *out = result.is_scalar() ? result.scalar() : MakeNullScalar(out->type());
+    return Status::OK();
+  }
+  ArrayData* output = out->mutable_array();
+  if (!result.is_value()) {
+    // All conditions false, no 'else' argument
+    result = MakeNullScalar(out->type());
+  }
+  CopyValues<Type>(result, /*in_offset=*/0, batch.length,
+                   output->GetMutableValues<uint8_t>(0, 0),
+                   output->GetMutableValues<uint8_t>(1, 0), output->offset);
+  return Status::OK();
+}
+
+// Implement 'case when' for any mix of scalar/array arguments for any fixed-width type,
+// given helper functions to copy data from a source array to a target array
+template <typename Type>
+Status ExecArrayCaseWhen(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& conds_array = *batch.values[0].array();
+  if (conds_array.GetNullCount() > 0) {
+    return Status::Invalid("cond struct must not have top-level nulls");
+  }
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  const auto num_value_args = batch.values.size() - 1;
+  const bool have_else_arg =
+      static_cast<size_t>(conds_array.type->num_fields()) < num_value_args;
+  uint8_t* out_valid = output->buffers[0]->mutable_data();
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  if (have_else_arg) {
+    // Copy 'else' value into output
+    CopyValues<Type>(batch.values.back(), /*in_offset=*/0, batch.length, out_valid,
+                     out_values, out_offset);
+  } else {
+    // There's no 'else' argument, so we should have an all-null validity bitmap
+    BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false);
+  }
+
+  // Allocate a temporary bitmap to determine which elements still need setting.
+  ARROW_ASSIGN_OR_RAISE(auto mask_buffer, ctx->AllocateBitmap(batch.length));
+  uint8_t* mask = mask_buffer->mutable_data();
+  std::memset(mask, 0xFF, mask_buffer->size());
+
+  // Then iterate through each argument in turn and set elements.
+  for (size_t i = 0; i < batch.values.size() - (have_else_arg ? 2 : 1); i++) {
+    const ArrayData& cond_array = *conds_array.child_data[i];
+    const int64_t cond_offset = conds_array.offset + cond_array.offset;
+    const uint8_t* cond_values = cond_array.buffers[1]->data();
+    const Datum& values_datum = batch[i + 1];
+    int64_t offset = 0;
+
+    if (cond_array.GetNullCount() == 0) {
+      // If no valid buffer, visit mask & cond bitmap simultaneously
+      BinaryBitBlockCounter counter(mask, /*start_offset=*/0, cond_values, cond_offset,
+                                    batch.length);
+      while (offset < batch.length) {
+        const auto block = counter.NextAndWord();
+        if (block.AllSet()) {
+          CopyValues<Type>(values_datum, offset, block.length, out_valid, out_values,
+                           out_offset + offset);
+          BitUtil::SetBitsTo(mask, offset, block.length, false);
+        } else if (block.popcount) {
+          for (int64_t j = 0; j < block.length; ++j) {
+            if (BitUtil::GetBit(mask, offset + j) &&
+                BitUtil::GetBit(cond_values, cond_offset + offset + j)) {
+              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid,
+                               out_values, out_offset + offset + j);
+              BitUtil::SetBitTo(mask, offset + j, false);
+            }
+          }
+        }
+        offset += block.length;
+      }
+    } else {
+      // Visit mask & cond bitmap & cond validity
+      const uint8_t* cond_valid = cond_array.buffers[0]->data();
+      Bitmap bitmaps[3] = {{mask, /*offset=*/0, batch.length},
+                           {cond_values, cond_offset, batch.length},
+                           {cond_valid, cond_offset, batch.length}};
+      Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 3> words) {
+        const uint64_t word = words[0] & words[1] & words[2];
+        const int64_t block_length = std::min<int64_t>(64, batch.length - offset);
+        if (word == std::numeric_limits<uint64_t>::max()) {
+          CopyValues<Type>(values_datum, offset, block_length, out_valid, out_values,
+                           out_offset + offset);
+          BitUtil::SetBitsTo(mask, offset, block_length, false);
+        } else if (word) {
+          for (int64_t j = 0; j < block_length; ++j) {
+            if (BitUtil::GetBit(mask, offset + j) &&
+                BitUtil::GetBit(cond_valid, cond_offset + offset + j) &&
+                BitUtil::GetBit(cond_values, cond_offset + offset + j)) {
+              CopyValues<Type>(values_datum, offset + j, /*length=*/1, out_valid,
+                               out_values, out_offset + offset + j);
+              BitUtil::SetBitTo(mask, offset + j, false);
+            }
+          }
+        }
+      });
+    }
+  }
+  if (!have_else_arg) {
+    // Need to initialize any remaining null slots (uninitialized memory)
+    BitBlockCounter counter(mask, /*offset=*/0, batch.length);
+    int64_t offset = 0;
+    auto bit_width = checked_cast<const FixedWidthType&>(*out->type()).bit_width();
+    auto byte_width = BitUtil::BytesForBits(bit_width);
+    while (offset < batch.length) {
+      const auto block = counter.NextWord();
+      if (block.AllSet()) {
+        if (bit_width == 1) {
+          BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false);
+        } else {
+          std::memset(out_values + (out_offset + offset) * byte_width, 0x00,
+                      byte_width * block.length);
+        }
+      } else if (!block.NoneSet()) {
+        for (int64_t j = 0; j < block.length; ++j) {
+          if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue;
+          if (bit_width == 1) {
+            BitUtil::ClearBit(out_values, out_offset + offset + j);
+          } else {
+            std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00,
+                        byte_width);
+          }
+        }
+      }
+      offset += block.length;
+    }
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct CaseWhenFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_array()) {
+      return ExecArrayCaseWhen<Type>(ctx, batch, out);
+    }
+    return ExecScalarCaseWhen<Type>(ctx, batch, out);
+  }
+};
+
+template <>
+struct CaseWhenFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
+  ValueDescr result = descrs.back();
+  result.shape = GetBroadcastShape(descrs);
+  return result;
+}
+
+void AddCaseWhenKernel(const std::shared_ptr<CaseWhenFunction>& scalar_function,
+                       detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(
+      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)},
+                            OutputType(LastType),
+                            /*is_varargs=*/true),
+      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar_function,
+                                 const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<CaseWhenFunctor>(*type);
+    AddCaseWhenKernel(scalar_function, type, std::move(exec));
+  }
+}
 
 const FunctionDoc if_else_doc{"Choose values based on a condition",
                               ("`cond` must be a Boolean scalar/ array. \n`left` or "
@@ -685,22 +1032,46 @@ const FunctionDoc if_else_doc{"Choose values based on a condition",
                                " output."),
                               {"cond", "left", "right"}};
 
-namespace internal {
+const FunctionDoc case_when_doc{
+    "Choose values based on multiple conditions",
+    ("`cond` must be a struct of Boolean values. `cases` can be a mix "
+     "of scalar and array arguments (of any type, but all must be the "
+     "same type or castable to a common type), with either exactly one "
+     "datum per child of `cond`, or one more `cases` than children of "
+     "`cond` (in which case we have an \"else\" value).\n"
+     "Each row of the output will be the corresponding value of the "
+     "first datum in `cases` for which the corresponding child of `cond` "
+     "is true, or otherwise the \"else\" value (if given), or null. "
+     "Essentially, this implements a switch-case or if-else, if-else... "
+     "statement."),
+    {"cond", "*cases"}};
+}  // namespace
 
 void RegisterScalarIfElse(FunctionRegistry* registry) {
-  ScalarKernel scalar_kernel;
-  scalar_kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-  scalar_kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-
-  auto func = std::make_shared<IfElseFunction>("if_else", Arity::Ternary(), &if_else_doc);
-
-  AddPrimitiveIfElseKernels(func, NumericTypes());
-  AddPrimitiveIfElseKernels(func, TemporalTypes());
-  AddPrimitiveIfElseKernels(func, {boolean()});
-  AddNullIfElseKernel(func);
-  // todo add binary kernels
-
-  DCHECK_OK(registry->AddFunction(std::move(func)));
+  {
+    auto func =
+        std::make_shared<IfElseFunction>("if_else", Arity::Ternary(), &if_else_doc);
+
+    AddPrimitiveIfElseKernels(func, NumericTypes());
+    AddPrimitiveIfElseKernels(func, TemporalTypes());
+    AddPrimitiveIfElseKernels(func, {boolean(), day_time_interval(), month_interval()});
+    AddNullIfElseKernel(func);
+    // todo add binary kernels
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+  {
+    auto func = std::make_shared<CaseWhenFunction>(
+        "case_when", Arity::VarArgs(/*min_args=*/1), &case_when_doc);
+    AddPrimitiveCaseWhenKernels(func, NumericTypes());
+    AddPrimitiveCaseWhenKernels(func, TemporalTypes());
+    AddPrimitiveCaseWhenKernels(
+        func, {boolean(), null(), day_time_interval(), month_interval()});
+    AddCaseWhenKernel(func, Type::FIXED_SIZE_BINARY,
+                      CaseWhenFunctor<FixedSizeBinaryType>::Exec);
+    AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor<Decimal128Type>::Exec);
+    AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor<Decimal256Type>::Exec);
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index 98fb675da40..9192cf54ebb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -15,12 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <arrow/array/concatenate.h>
-#include <arrow/compute/api_scalar.h>
-#include <arrow/testing/gtest_util.h>
-#include <arrow/testing/random.h>
 #include <benchmark/benchmark.h>
 
+#include "arrow/array/concatenate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/key_value_metadata.h"
+
 namespace arrow {
 namespace compute {
 
@@ -97,6 +99,89 @@ static void IfElseBench32Contiguous(benchmark::State& state) {
   return IfElseBenchContiguous<UInt32Type>(state);
 }
 
+template <typename Type>
+static void CaseWhenBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  auto cond1 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto cond2 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto cond3 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto cond_field =
+      field("cond", boolean(), key_value_metadata({{"null_probability", "0.01"}}));
+  auto cond = rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}),
+                                  key_value_metadata({{"null_probability", "0.0"}})),
+                           len);
+  auto val1 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val2 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val3 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val4 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  for (auto _ : state) {
+    ABORT_NOT_OK(
+        CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset),
+                                       val3->Slice(offset), val4->Slice(offset)}));
+  }
+
+  state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+template <typename Type>
+static void CaseWhenBenchContiguous(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  ASSERT_OK_AND_ASSIGN(auto trues, MakeArrayFromScalar(BooleanScalar(true), len / 3));
+  ASSERT_OK_AND_ASSIGN(auto falses, MakeArrayFromScalar(BooleanScalar(false), len / 3));
+  ASSERT_OK_AND_ASSIGN(auto nulls, MakeArrayOfNull(boolean(), len - 2 * (len / 3)));
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({trues, falses, nulls}));
+  auto cond1 = std::static_pointer_cast<BooleanArray>(concat);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  auto cond2 = std::static_pointer_cast<BooleanArray>(
+      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+  auto val1 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val2 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto val3 = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  ASSERT_OK_AND_ASSIGN(
+      auto cond, StructArray::Make({cond1, cond2}, std::vector<std::string>{"a", "b"},
+                                   nullptr, /*null_count=*/0));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset),
+                                                val3->Slice(offset)}));
+  }
+
+  state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+static void CaseWhenBench64(benchmark::State& state) {
+  return CaseWhenBench<UInt64Type>(state);
+}
+
+static void CaseWhenBench64Contiguous(benchmark::State& state) {
+  return CaseWhenBenchContiguous<UInt64Type>(state);
+}
+
 BENCHMARK(IfElseBench32)->Args({elems, 0});
 BENCHMARK(IfElseBench64)->Args({elems, 0});
 
@@ -109,5 +194,11 @@ BENCHMARK(IfElseBench64Contiguous)->Args({elems, 0});
 BENCHMARK(IfElseBench32Contiguous)->Args({elems, 99});
 BENCHMARK(IfElseBench64Contiguous)->Args({elems, 99});
 
+BENCHMARK(CaseWhenBench64)->Args({elems, 0});
+BENCHMARK(CaseWhenBench64)->Args({elems, 99});
+
+BENCHMARK(CaseWhenBench64Contiguous)->Args({elems, 0});
+BENCHMARK(CaseWhenBench64Contiguous)->Args({elems, 99});
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 670a2d42a3a..cd2d04a13e0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -15,12 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <arrow/array.h>
-#include <arrow/array/concatenate.h>
-#include <arrow/compute/api_scalar.h>
-#include <arrow/compute/kernels/test_util.h>
-#include <arrow/testing/gtest_util.h>
 #include <gtest/gtest.h>
+#include "arrow/array.h"
+#include "arrow/array/concatenate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/registry.h"
+#include "arrow/testing/gtest_util.h"
 
 namespace arrow {
 namespace compute {
@@ -45,15 +46,16 @@ class TestIfElseKernel : public ::testing::Test {};
 template <typename Type>
 class TestIfElsePrimitive : public ::testing::Test {};
 
-using PrimitiveTypes = ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type,
-                                        Int32Type, UInt32Type, Int64Type, UInt64Type,
-                                        FloatType, DoubleType, Date32Type, Date64Type>;
+using NumericBasedTypes =
+    ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
+                     Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type,
+                     Time32Type, Time64Type, TimestampType, MonthIntervalType>;
 
-TYPED_TEST_SUITE(TestIfElsePrimitive, PrimitiveTypes);
+TYPED_TEST_SUITE(TestIfElsePrimitive, NumericBasedTypes);
 
 TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
   using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
-  auto type = TypeTraits<TypeParam>::type_singleton();
+  auto type = default_type_instance<TypeParam>();
 
   random::RandomArrayGenerator rand(/*seed=*/0);
   int64_t len = 1000;
@@ -71,7 +73,7 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
   auto right = std::static_pointer_cast<ArrayType>(
       rand.ArrayOf(type, len, /*null_probability=*/0.01));
 
-  typename TypeTraits<TypeParam>::BuilderType builder;
+  typename TypeTraits<TypeParam>::BuilderType builder(type, default_memory_pool());
 
   for (int64_t i = 0; i < len; ++i) {
     if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
@@ -155,7 +157,7 @@ void CheckWithDifferentShapes(const std::shared_ptr<Array>& cond,
 }
 
 TYPED_TEST(TestIfElsePrimitive, IfElseFixedSize) {
-  auto type = TypeTraits<TypeParam>::type_singleton();
+  auto type = default_type_instance<TypeParam>();
 
   CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
                            ArrayFromJSON(type, "[1, 2, 3, 4]"),
@@ -316,5 +318,360 @@ TEST_F(TestIfElseKernel, IfElseDispatchBest) {
   CheckDispatchBest(name, {null(), uint8(), int8()}, {boolean(), int16(), int16()});
 }
 
+template <typename Type>
+class TestCaseWhenNumeric : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
+
+Datum MakeStruct(const std::vector<Datum>& conds) {
+  ProjectOptions options;
+  options.field_names.resize(conds.size());
+  options.field_metadata.resize(conds.size());
+  for (const auto& datum : conds) {
+    options.field_nullability.push_back(datum.null_count() > 0);
+  }
+  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("project", conds, &options));
+  return result;
+}
+
+TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
+  auto type = default_type_instance<TypeParam>();
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "1");
+  auto scalar2 = ScalarFromJSON(type, "2");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[3, null, 5, 6]");
+  auto values2 = ArrayFromJSON(type, "[7, 8, null, 10]");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, "[1, 1, 2, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, "[null, null, 1, 1]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, "[1, 1, 2, 1]"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, "[3, null, null, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, "[3, null, null, 6]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, "[null, null, null, 6]"));
+
+  CheckScalar(
+      "case_when",
+      {MakeStruct(
+           {ArrayFromJSON(boolean(),
+                          "[true, true, true, false, false, false, null, null, null]"),
+            ArrayFromJSON(boolean(),
+                          "[true, false, null, true, false, null, true, false, null]")}),
+       ArrayFromJSON(type, "[10, 11, 12, 13, 14, 15, 16, 17, 18]"),
+       ArrayFromJSON(type, "[20, 21, 22, 23, 24, 25, 26, 27, 28]")},
+      ArrayFromJSON(type, "[10, 11, 12, 23, null, null, 26, null, null]"));
+  CheckScalar(
+      "case_when",
+      {MakeStruct(
+           {ArrayFromJSON(boolean(),
+                          "[true, true, true, false, false, false, null, null, null]"),
+            ArrayFromJSON(boolean(),
+                          "[true, false, null, true, false, null, true, false, null]")}),
+       ArrayFromJSON(type, "[10, 11, 12, 13, 14, 15, 16, 17, 18]"),
+
+       ArrayFromJSON(type, "[20, 21, 22, 23, 24, 25, 26, 27, 28]"),
+       ArrayFromJSON(type, "[30, 31, 32, 33, 34, null, 36, 37, null]")},
+      ArrayFromJSON(type, "[10, 11, 12, 23, 34, null, 26, 37, null]"));
+
+  // Error cases
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("cond struct must not be null"),
+      CallFunction(
+          "case_when",
+          {Datum(std::make_shared<StructScalar>(struct_({field("", boolean())}))),
+           Datum(scalar1)}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("cond struct must not have top-level nulls"),
+      CallFunction(
+          "case_when",
+          {Datum(*MakeArrayOfNull(struct_({field("", boolean())}), 4)), Datum(values1)}));
+}
+
+TEST(TestCaseWhen, Null) {
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_arr = ArrayFromJSON(boolean(), "[true, true, false, null]");
+  auto scalar = ScalarFromJSON(null(), "null");
+  auto array = ArrayFromJSON(null(), "[null, null, null, null]");
+  CheckScalar("case_when", {MakeStruct({}), array}, array);
+  CheckScalar("case_when", {MakeStruct({cond_false}), array}, array);
+  CheckScalar("case_when", {MakeStruct({cond_true}), array, array}, array);
+  CheckScalar("case_when", {MakeStruct({cond_arr, cond_true}), array, array}, array);
+}
+
+TEST(TestCaseWhen, Boolean) {
+  auto type = boolean();
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "true");
+  auto scalar2 = ScalarFromJSON(type, "false");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[true, null, true, true]");
+  auto values2 = ArrayFromJSON(type, "[false, false, null, false]");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, "[true, true, false, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, "[null, null, true, true]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, "[true, true, false, true]"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, "[true, null, null, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, "[true, null, null, true]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, "[null, null, null, true]"));
+}
+
+TEST(TestCaseWhen, DayTimeInterval) {
+  auto type = day_time_interval();
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "[1, 1]");
+  auto scalar2 = ScalarFromJSON(type, "[2, 2]");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[[3, 3], null, [5, 5], [6, 6]]");
+  auto values2 = ArrayFromJSON(type, "[[7, 7], [8, 8], null, [10, 10]]");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, "[[1, 1], [1, 1], [2, 2], null]"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, "[null, null, [1, 1], [1, 1]]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, "[[1, 1], [1, 1], [2, 2], [1, 1]]"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, "[[3, 3], null, null, null]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, "[[3, 3], null, null, [6, 6]]"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, "[null, null, null, [6, 6]]"));
+}
+
+TEST(TestCaseWhen, Decimal) {
+  for (const auto& type :
+       std::vector<std::shared_ptr<DataType>>{decimal128(3, 2), decimal256(3, 2)}) {
+    auto cond_true = ScalarFromJSON(boolean(), "true");
+    auto cond_false = ScalarFromJSON(boolean(), "false");
+    auto cond_null = ScalarFromJSON(boolean(), "null");
+    auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+    auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+    auto scalar_null = ScalarFromJSON(type, "null");
+    auto scalar1 = ScalarFromJSON(type, R"("1.23")");
+    auto scalar2 = ScalarFromJSON(type, R"("2.34")");
+    auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+    auto values1 = ArrayFromJSON(type, R"(["3.45", null, "5.67", "6.78"])");
+    auto values2 = ArrayFromJSON(type, R"(["7.89", "8.90", null, "1.01"])");
+
+    CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+    CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+    CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+    CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+    CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+    CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+    CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+    CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+    CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+                values1);
+    CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+                values_null);
+    CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+                values1);
+    CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+                values2);
+    CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+                values2);
+    CheckScalar("case_when",
+                {MakeStruct({cond_false, cond_false}), values1, values2, values2},
+                values2);
+
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "2.34", null])"));
+    CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+    CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+                ArrayFromJSON(type, R"([null, null, "1.23", "1.23"])"));
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "2.34", "1.23"])"));
+
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+                ArrayFromJSON(type, R"(["3.45", null, null, null])"));
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+                ArrayFromJSON(type, R"(["3.45", null, null, "6.78"])"));
+    CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+                ArrayFromJSON(type, R"([null, null, null, "6.78"])"));
+  }
+}
+
+TEST(TestCaseWhen, FixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+  auto cond_true = ScalarFromJSON(boolean(), "true");
+  auto cond_false = ScalarFromJSON(boolean(), "false");
+  auto cond_null = ScalarFromJSON(boolean(), "null");
+  auto cond1 = ArrayFromJSON(boolean(), "[true, true, null, null]");
+  auto cond2 = ArrayFromJSON(boolean(), "[true, false, true, null]");
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, R"("abc")");
+  auto scalar2 = ScalarFromJSON(type, R"("bcd")");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, R"(["cde", null, "def", "efg"])");
+  auto values2 = ArrayFromJSON(type, R"(["fgh", "ghi", null, "hij"])");
+
+  CheckScalar("case_when", {MakeStruct({}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({}), values_null}, values_null);
+
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true}), values1, values2}, values1);
+  CheckScalar("case_when", {MakeStruct({cond_false}), values1, values2}, values2);
+  CheckScalar("case_when", {MakeStruct({cond_null}), values1, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_true}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_false}), values1, values2},
+              values_null);
+  CheckScalar("case_when", {MakeStruct({cond_true, cond_false}), values1, values2},
+              values1);
+  CheckScalar("case_when", {MakeStruct({cond_false, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when", {MakeStruct({cond_null, cond_true}), values1, values2},
+              values2);
+  CheckScalar("case_when",
+              {MakeStruct({cond_false, cond_false}), values1, values2, values2}, values2);
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2},
+              ArrayFromJSON(type, R"(["abc", "abc", "bcd", null])"));
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null}, values_null);
+  CheckScalar("case_when", {MakeStruct({cond1}), scalar_null, scalar1},
+              ArrayFromJSON(type, R"([null, null, "abc", "abc"])"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), scalar1, scalar2, scalar1},
+              ArrayFromJSON(type, R"(["abc", "abc", "bcd", "abc"])"));
+
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2},
+              ArrayFromJSON(type, R"(["cde", null, null, null])"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values1, values2, values1},
+              ArrayFromJSON(type, R"(["cde", null, null, "efg"])"));
+  CheckScalar("case_when", {MakeStruct({cond1, cond2}), values_null, values2, values1},
+              ArrayFromJSON(type, R"([null, null, null, "efg"])"));
+}
+
+TEST(TestCaseWhen, DispatchBest) {
+  CheckDispatchBest("case_when", {struct_({field("", boolean())}), int64(), int32()},
+                    {struct_({field("", boolean())}), int64(), int64()});
+
+  ASSERT_RAISES(Invalid, CallFunction("case_when", {}));
+  // Too many/too few conditions
+  ASSERT_RAISES(
+      Invalid, CallFunction("case_when", {MakeStruct({ArrayFromJSON(boolean(), "[]")})}));
+  ASSERT_RAISES(Invalid,
+                CallFunction("case_when", {MakeStruct({}), ArrayFromJSON(int64(), "[]"),
+                                           ArrayFromJSON(int64(), "[]")}));
+  // Conditions must be struct of boolean
+  ASSERT_RAISES(TypeError,
+                CallFunction("case_when", {MakeStruct({ArrayFromJSON(int64(), "[]")}),
+                                           ArrayFromJSON(int64(), "[]")}));
+  ASSERT_RAISES(TypeError, CallFunction("case_when", {ArrayFromJSON(boolean(), "[true]"),
+                                                      ArrayFromJSON(int32(), "[0]")}));
+  // Values must have compatible types
+  ASSERT_RAISES(NotImplemented,
+                CallFunction("case_when", {MakeStruct({ArrayFromJSON(boolean(), "[]")}),
+                                           ArrayFromJSON(int64(), "[]"),
+                                           ArrayFromJSON(utf8(), "[]")}));
+}
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index a1151717d8b..ce8d42e34c2 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -47,12 +47,10 @@ DatumVector GetDatums(const std::vector<T>& inputs) {
 }
 
 void CheckScalarNonRecursive(const std::string& func_name, const DatumVector& inputs,
-                             const std::shared_ptr<Array>& expected,
-                             const FunctionOptions* options) {
+                             const Datum& expected, const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, inputs, options));
-  std::shared_ptr<Array> actual = std::move(out).make_array();
-  ValidateOutput(*actual);
-  AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+  ValidateOutput(out);
+  AssertDatumsEqual(expected, out, /*verbose=*/true);
 }
 
 template <typename... SliceArgs>
@@ -103,35 +101,38 @@ void CheckScalar(std::string func_name, const ScalarVector& inputs,
   }
 }
 
-void CheckScalar(std::string func_name, const DatumVector& inputs,
-                 std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalarNonRecursive(func_name, inputs, expected, options);
+void CheckScalar(std::string func_name, const DatumVector& inputs, Datum expected_datum,
+                 const FunctionOptions* options) {
+  CheckScalarNonRecursive(func_name, inputs, expected_datum, options);
+
+  if (expected_datum.is_scalar()) return;
+  ASSERT_TRUE(expected_datum.is_array())
+      << "CheckScalar is only implemented for scalar/array expected values";
+  auto expected = expected_datum.make_array();
 
   // check for at least 1 array, and make sure the others are of equal length
-  std::shared_ptr<Array> array;
+  bool has_array = false;
   for (const auto& input : inputs) {
     if (input.is_array()) {
-      if (!array) {
-        array = input.make_array();
-      } else {
-        ASSERT_EQ(input.array()->length, array->length());
-      }
+      ASSERT_EQ(input.array()->length, expected->length());
+      has_array = true;
     }
   }
+  ASSERT_TRUE(has_array) << "Must have at least 1 array input to have an array output";
 
   // Check all the input scalars, if scalars are implemented
   if (std::none_of(inputs.begin(), inputs.end(), [](const Datum& datum) {
         return datum.type()->id() == Type::EXTENSION;
       })) {
     // Check all the input scalars
-    for (int64_t i = 0; i < array->length(); ++i) {
+    for (int64_t i = 0; i < expected->length(); ++i) {
       CheckScalar(func_name, GetScalars(inputs, i), *expected->GetScalar(i), options);
     }
   }
 
   // Since it's a scalar function, calling it on sliced inputs should
   // result in the sliced expected output.
-  const auto slice_length = array->length() / 3;
+  const auto slice_length = expected->length() / 3;
   if (slice_length > 0) {
     CheckScalarNonRecursive(func_name, SliceArrays(inputs, 0, slice_length),
                             expected->Slice(0, slice_length), options);
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index c691a9f3be3..a3fb9308f58 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -95,8 +95,7 @@ void CheckScalar(std::string func_name, const ScalarVector& inputs,
                  std::shared_ptr<Scalar> expected,
                  const FunctionOptions* options = nullptr);
 
-void CheckScalar(std::string func_name, const DatumVector& inputs,
-                 std::shared_ptr<Array> expected,
+void CheckScalar(std::string func_name, const DatumVector& inputs, Datum expected,
                  const FunctionOptions* options = nullptr);
 
 void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 6ce808aba67..ed97faead74 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -860,33 +860,44 @@ Structural transforms
 
 .. XXX (this category is a bit of a hodgepodge)
 
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| Function name            | Arity      | Input types                                    | Output type         | Notes   |
-+==========================+============+================================================+=====================+=========+
-| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like  | Input type          | \(1)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal               | Input type          | \(2)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_finite                | Unary      | Float, Double                                  | Boolean             | \(3)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_inf                   | Unary      | Float, Double                                  | Boolean             | \(4)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(5)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                            | Boolean             | \(6)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                            | Boolean             | \(7)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(8)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| project                  | Varargs    | Any                                            | Struct              | \(9)    |
-+--------------------------+------------+------------------------------------------------+---------------------+---------+
-
-* \(1) First input must be an array, second input a scalar of the same type.
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| Function name            | Arity      | Input types                                       | Output type         | Notes   |
++==========================+============+===================================================+=====================+=========+
+| case_when                | Varargs    | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type          | \(1)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like     | Input type          | \(2)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal                  | Input type          | \(3)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_finite                | Unary      | Float, Double                                     | Boolean             | \(4)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_inf                   | Unary      | Float, Double                                     | Boolean             | \(5)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_nan                   | Unary      | Float, Double                                     | Boolean             | \(6)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_null                  | Unary      | Any                                               | Boolean             | \(7)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| is_valid                 | Unary      | Any                                               | Boolean             | \(8)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| list_value_length        | Unary      | List-like                                         | Int32 or Int64      | \(9)    |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| project                  | Varargs    | Any                                               | Struct              | \(10)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+
+* \(1) This function acts like a SQL 'case when' statement or switch-case. The
+  input is a "condition" value, which is a struct of Booleans, followed by the
+  values for each "branch". There must be either exactly one value argument for
+  each child of the condition struct, or one more value argument than children
+  (in which case we have an 'else' or 'default' value). The output is of the
+  same type as the value inputs; each row will be the corresponding value from
+  the first value datum for which the corresponding Boolean is true, or the
+  corresponding value from the 'default' input, or null otherwise.
+
+* \(2) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(2) First input must be a Boolean scalar or array. Second and third inputs
+* \(3) First input must be a Boolean scalar or array. Second and third inputs
   could be scalars or arrays and must be of the same type. Output is an array
   (or scalar if all inputs are scalar) of the same type as the second/ third
   input. If the nulls present on the first input, they will be promoted to the
@@ -894,21 +905,21 @@ Structural transforms
 
   Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
 
-* \(3) Output is true iff the corresponding input element is finite (not Infinity,
+* \(4) Output is true iff the corresponding input element is finite (not Infinity,
   -Infinity, or NaN).
 
-* \(4) Output is true iff the corresponding input element is Infinity/-Infinity.
+* \(5) Output is true iff the corresponding input element is Infinity/-Infinity.
 
-* \(5) Output is true iff the corresponding input element is NaN.
+* \(6) Output is true iff the corresponding input element is NaN.
 
-* \(6) Output is true iff the corresponding input element is null.
+* \(7) Output is true iff the corresponding input element is null.
 
-* \(7) Output is true iff the corresponding input element is non-null.
+* \(8) Output is true iff the corresponding input element is non-null.
 
-* \(8) Each output element is the length of the corresponding input element
+* \(9) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(9) The output struct's field types are the types of its arguments. The
+* \(10) The output struct's field types are the types of its arguments. The
   field names are specified using an instance of :struct:`ProjectOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 09c67598193..c12f2f91b26 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -335,6 +335,7 @@ Structural Transforms
    :toctree: ../generated/
 
    binary_length
+   case_when
    fill_null
    if_else
    is_finite

From 5ccbca02bd7d9e3451c9b0f7d2c10eeeadb6b24d Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 15 Jul 2021 17:02:30 -0400
Subject: [PATCH 577/719] ARROW-13346: [C++] Remove compile time parsing from
 EnumType

Requires a slightly more verbose declaration:

```c++
struct Color : EnumType<Color> {
  using EnumType::EnumType;
  static constexpr EnumStrings<3> values() { return {"red", "green", "blue"}; }
  static constexpr const char* name() { return "Color"; }
};
```

but doesn't do any string processing at compile time beyond computing the length of each string.

Closes #10726 from bkietz/13346-Remove-compile-time-parsi

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/util/enum.h             | 116 +++++++++++++-------------
 cpp/src/arrow/util/reflection_test.cc |  34 +++++---
 2 files changed, 78 insertions(+), 72 deletions(-)

diff --git a/cpp/src/arrow/util/enum.h b/cpp/src/arrow/util/enum.h
index 7a0404d5c76..d0382bc604f 100644
--- a/cpp/src/arrow/util/enum.h
+++ b/cpp/src/arrow/util/enum.h
@@ -21,13 +21,12 @@
 #include <type_traits>
 #include <vector>
 
+#include "arrow/result.h"
 #include "arrow/util/string_view.h"
 
 namespace arrow {
 namespace internal {
 
-constexpr bool IsSpace(char c) { return c == ' ' || c == '\n' || c == '\r'; }
-
 constexpr char ToLower(char c) { return c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c; }
 
 constexpr bool CaseInsensitiveEquals(const char* l, const char* r,
@@ -43,68 +42,51 @@ constexpr bool CaseInsensitiveEquals(util::string_view l, util::string_view r) {
   return l.size() == r.size() && CaseInsensitiveEquals(l.data(), r.data(), l.size());
 }
 
-constexpr const char* SkipWhitespace(const char* raw) {
-  return *raw == '\0' || !IsSpace(*raw) ? raw : SkipWhitespace(raw + 1);
-}
+}  // namespace internal
 
-constexpr const char* SkipNonWhitespace(const char* raw) {
-  return *raw == '\0' || IsSpace(*raw) ? raw : SkipNonWhitespace(raw + 1);
-}
+template <int N>
+struct EnumStrings {
+  template <int M>
+  static constexpr bool assert_count() {
+    static_assert(M == N, "Incorrect number of enum strings provided");
+    return false;
+  }
 
-constexpr size_t TokenSize(const char* token_start) {
-  return SkipNonWhitespace(token_start) - token_start;
-}
+  template <typename... Strs>
+  constexpr EnumStrings(const Strs&... strs)  // NOLINT runtime/explicit
+      : dummy_{assert_count<sizeof...(Strs)>()}, strings_{util::string_view(strs)...} {}
 
-constexpr size_t NextTokenStart(const char* raw, size_t token_start) {
-  return SkipWhitespace(SkipNonWhitespace(raw + token_start)) - raw;
-}
+  constexpr int GetIndex(util::string_view repr, int i = 0) const {
+    return i == N ? -1
+                  : internal::CaseInsensitiveEquals(strings_[i], repr)
+                        ? i
+                        : GetIndex(repr, i + 1);
+  }
 
-template <typename Raw, size_t... Offsets>
-struct EnumTypeImpl {
-  static constexpr int kSize = sizeof...(Offsets);
+  using value_type = util::string_view;
+  using const_iterator = const util::string_view*;
 
-  static constexpr util::string_view kValueStrs[sizeof...(Offsets)] = {
-      {Raw::kValues + Offsets, TokenSize(Raw::kValues + Offsets)}...};
+  constexpr int size() const { return N; }
+  constexpr const util::string_view* data() const { return strings_; }
+  constexpr const_iterator begin() const { return data(); }
+  constexpr const_iterator end() const { return begin() + size(); }
+  constexpr util::string_view operator[](int i) const { return strings_[i]; }
 
-  static constexpr int GetIndex(util::string_view repr, int i = 0) {
-    return i == kSize
-               ? -1
-               : CaseInsensitiveEquals(kValueStrs[i], repr) ? i : GetIndex(repr, i + 1);
-  }
+  bool dummy_;
+  util::string_view strings_[N];  // NOLINT modernize
 };
 
-template <typename Raw, size_t... Offsets>
-constexpr util::string_view const
-    EnumTypeImpl<Raw, Offsets...>::kValueStrs[sizeof...(Offsets)];
-
-/// \cond false
-template <typename Raw, bool IsEnd = false,
-          size_t MaxOffset = SkipWhitespace(Raw::kValues) - Raw::kValues,
-          size_t... Offsets>
-struct EnumTypeBuilder
-    : EnumTypeBuilder<Raw, Raw::kValues[NextTokenStart(Raw::kValues, MaxOffset)] == '\0',
-                      NextTokenStart(Raw::kValues, MaxOffset), Offsets..., MaxOffset> {};
-
-template <typename Raw, size_t TerminalNullOffset, size_t... Offsets>
-struct EnumTypeBuilder<Raw, /*IsEnd=*/true, TerminalNullOffset, Offsets...> {
-  using ImplType = EnumTypeImpl<Raw, Offsets...>;
-};
-
-// reuse struct as an alias for typename EnumTypeBuilder<Raw>::ImplType
-template <typename Raw>
-struct EnumTypeImpl<Raw> : EnumTypeBuilder<Raw>::ImplType {};
-/// \endcond
-
 struct EnumTypeTag {};
 
 /// \brief An enum replacement with minimal reflection capabilities.
 ///
 /// Declare an enum by inheriting from this helper with CRTP, including a
-/// static string literal data member containing the enum's values:
+/// static string literal member function returning the enum's values:
 ///
 ///     struct Color : EnumType<Color> {
 ///       using EnumType::EnumType;
-///       static constexpr char* kValues = "red green blue";
+///       static constexpr EnumStrings<3> values() { return {"red", "green", "blue"}; }
+///       static constexpr const char* name() { return "Color"; }
 ///     };
 ///
 /// Ensure the doccomment includes a description of each enum value.
@@ -117,18 +99,16 @@ struct EnumType : EnumTypeTag {
   constexpr EnumType() = default;
 
   constexpr explicit EnumType(int index)
-      : index{index >= 0 && index < EnumTypeImpl<Raw>::kSize ? index : -1} {}
+      : index{index >= 0 && index < Raw::values().size() ? index : -1} {}
 
   constexpr explicit EnumType(util::string_view repr)
-      : index{EnumTypeImpl<Raw>::GetIndex(repr)} {}
+      : index{Raw::values().GetIndex(repr)} {}
 
   constexpr bool operator==(EnumType other) const { return index == other.index; }
   constexpr bool operator!=(EnumType other) const { return index != other.index; }
 
   /// Return the string representation of this enum value.
-  std::string ToString() const {
-    return EnumTypeImpl<Raw>::kValueStrs[index].to_string();
-  }
+  std::string ToString() const { return Raw::values()[index].to_string(); }
 
   /// \brief Valid enum values will be truthy.
   ///
@@ -140,12 +120,31 @@ struct EnumType : EnumTypeTag {
   constexpr int operator*() const { return index; }
 
   /// The number of values in this enumeration.
-  static constexpr int size() { return EnumTypeImpl<Raw>::kSize; }
+  static constexpr int size() { return Raw::values().size(); }
+
+  /// Construct a valid enum from int or raise an error
+  static Result<Raw> Make(int index) {
+    if (auto valid = Raw(index)) return valid;
+    return Status::Invalid("index ", index, " for enum ", Raw::name(),
+                           "- index should be in range [0, ", Raw::values().size(), ")");
+  }
 
-  /// String representations of each value in this enumeration.
-  static std::vector<util::string_view> value_strings() {
-    const util::string_view* begin = EnumTypeImpl<Raw>::kValueStrs;
-    return {begin, begin + size()};
+  /// Construct a valid enum from repr or raise an error
+  static Result<Raw> Make(util::string_view repr) {
+    if (auto valid = Raw(repr)) return valid;
+
+    std::string values;
+    static std::string sep = ", ";
+    for (auto value : Raw::values()) {
+      values.append("'");
+      values.append(value.data(), value.size());
+      values.append("'");
+      values.append(sep);
+    }
+    values.resize(values.size() - sep.size());
+
+    return Status::Invalid("string '", repr, "' for enum ", Raw::name(),
+                           "- string should be one of {", values, "}");
   }
 
   int index = -1;
@@ -158,5 +157,4 @@ struct EnumType : EnumTypeTag {
 template <typename T>
 using is_reflection_enum = std::is_base_of<EnumTypeTag, T>;
 
-}  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/reflection_test.cc b/cpp/src/arrow/util/reflection_test.cc
index 02a40467a80..9e3afb00589 100644
--- a/cpp/src/arrow/util/reflection_test.cc
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -19,10 +19,15 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/testing/matchers.h"
 #include "arrow/util/enum.h"
 #include "arrow/util/reflection_internal.h"
 #include "arrow/util/string.h"
 
+using testing::ElementsAre;
+using testing::Eq;
+using testing::HasSubstr;
+
 namespace arrow {
 namespace internal {
 
@@ -226,16 +231,6 @@ TEST(Reflection, CompileTimeStringOps) {
   static_assert(CaseInsensitiveEquals("Ab", "ab"), "");
   static_assert(CaseInsensitiveEquals("Ab ", "ab", 2), "");
   static_assert(CaseInsensitiveEquals(util::string_view{"Ab ", 2}, "ab"), "");
-
-  static_assert(CaseInsensitiveEquals(SkipWhitespace("  a"), "a"), "");
-  static_assert(CaseInsensitiveEquals(SkipWhitespace("a  b"), "a  b"), "");
-
-  static_assert(CaseInsensitiveEquals(SkipNonWhitespace("  a"), "  a"), "");
-  static_assert(CaseInsensitiveEquals(SkipNonWhitespace("a  b"), "  b"), "");
-
-  static_assert(TokenSize("aba ddf") == 3, "");
-
-  static_assert(NextTokenStart("aba ddf dfas", 4) == 8, "");
 }
 
 /// \brief Enumeration of primary colors.
@@ -245,13 +240,15 @@ TEST(Reflection, CompileTimeStringOps) {
 /// - blue:  Hex value 0x0000ff
 struct Color : EnumType<Color> {
   using EnumType<Color>::EnumType;
-  static constexpr const char* kValues = "red green blue";
+  static constexpr EnumStrings<3> values() { return {"red", "green", "blue"}; }
+  static constexpr const char* name() { return "Color"; }
 };
 
 TEST(Reflection, EnumType) {
   static_assert(Color::size() == 3, "");
-  EXPECT_EQ(Color::value_strings(),
-            std::vector<util::string_view>({"red", "green", "blue"}));
+  EXPECT_THAT(Color::values(),
+              ElementsAre(util::string_view{"red"}, util::string_view{"green"},
+                          util::string_view{"blue"}));
 
   static_assert(Color("red").index == 0, "");
   static_assert(*Color("GREEN") == 1, "");
@@ -284,6 +281,17 @@ TEST(Reflection, EnumType) {
         break;
     }
   }
+
+  EXPECT_THAT(Color::Make(0), ResultWith(Eq(Color(0))));
+  EXPECT_THAT(Color::Make(-33), Raises(StatusCode::Invalid,
+                                       HasSubstr("index -33 for enum Color- index should "
+                                                 "be in range [0, 3)")));
+
+  EXPECT_THAT(Color::Make("red"), ResultWith(Eq(Color("red"))));
+  EXPECT_THAT(Color::Make("mahogany"),
+              Raises(StatusCode::Invalid,
+                     HasSubstr("string 'mahogany' for enum Color- string should "
+                               "be one of {'red', 'green', 'blue'}")));
 }
 
 }  // namespace internal

From f345287a157530c6f134b829272a63a0e3b76822 Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Thu, 15 Jul 2021 17:09:40 -0400
Subject: [PATCH 578/719] ARROW-12861: [C++][Compute] Add sign function kernels

This PR adds the sign function to the compute layer as a unary scalar function.
* Numeric inputs result in any of (-1,0,1)
* +/-0 input returns 0
* Infinity is treated as a signed number
* NaN input returns NaN

Closes #10395 from edponce/ARROW-12861-Compute-Add-sign-function-kernels

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   1 +
 cpp/src/arrow/compute/api_scalar.h            |   9 ++
 .../compute/kernels/scalar_arithmetic.cc      |  77 +++++++++++++
 .../compute/kernels/scalar_arithmetic_test.cc | 102 +++++++++++++++---
 docs/source/cpp/compute.rst                   |  70 ++++++------
 docs/source/python/api/compute.rst            |   7 +-
 6 files changed, 218 insertions(+), 48 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 68df5f98b10..abf6f75acb8 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -330,6 +330,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
 
 SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked")
 SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked")
+SCALAR_EAGER_UNARY(Sign, "sign")
 SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
 SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked")
 SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked")
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index bbaa4d13a21..e8161ddb5f2 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -513,6 +513,15 @@ Result<Datum> MinElementWise(
     ElementWiseAggregateOptions options = ElementWiseAggregateOptions::Defaults(),
     ExecContext* ctx = NULLPTR);
 
+/// \brief Get the sign of a value. Array values can be of arbitrary length. If argument
+/// is null the result will be null.
+///
+/// \param[in] arg the value to extract sign from
+/// \param[in] ctx the function execution context, optional
+/// \return the elementwise sign function
+ARROW_EXPORT
+Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR);
+
 /// \brief Compare a numeric array with a scalar.
 ///
 /// \param[in] left datum to compare, must be an Array
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index db73294e1fa..28904bdbfa0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -20,8 +20,10 @@
 #include <limits>
 #include <utility>
 
+#include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/util_internal.h"
+#include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
@@ -462,6 +464,23 @@ struct PowerChecked {
   }
 };
 
+struct Sign {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::isnan(arg) ? arg : ((arg == 0) ? 0 : (std::signbit(arg) ? -1 : 1));
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_unsigned_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return arg > 0;
+  }
+
+  template <typename T, typename Arg>
+  static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg arg, Status*) {
+    return (arg > 0) ? 1 : ((arg == 0) ? 0 : -1);
+  }
+};
+
 // Bitwise operations
 
 struct BitWiseNot {
@@ -1033,6 +1052,37 @@ void AddDecimalBinaryKernels(const std::string& name,
   DCHECK_OK((*func)->AddKernel({in_type256, in_type256}, out_type, exec256));
 }
 
+// Generate a kernel given an arithmetic functor
+template <template <typename...> class KernelGenerator, typename OutType, typename Op>
+ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::INT8:
+      return KernelGenerator<OutType, Int8Type, Op>::Exec;
+    case Type::UINT8:
+      return KernelGenerator<OutType, UInt8Type, Op>::Exec;
+    case Type::INT16:
+      return KernelGenerator<OutType, Int16Type, Op>::Exec;
+    case Type::UINT16:
+      return KernelGenerator<OutType, UInt16Type, Op>::Exec;
+    case Type::INT32:
+      return KernelGenerator<OutType, Int32Type, Op>::Exec;
+    case Type::UINT32:
+      return KernelGenerator<OutType, UInt32Type, Op>::Exec;
+    case Type::INT64:
+    case Type::TIMESTAMP:
+      return KernelGenerator<OutType, Int64Type, Op>::Exec;
+    case Type::UINT64:
+      return KernelGenerator<OutType, UInt64Type, Op>::Exec;
+    case Type::FLOAT:
+      return KernelGenerator<FloatType, FloatType, Op>::Exec;
+    case Type::DOUBLE:
+      return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+    default:
+      DCHECK(false);
+      return ExecFail;
+  }
+}
+
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
@@ -1142,6 +1192,21 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name,
   return func;
 }
 
+// Like MakeUnaryArithmeticFunction, but for unary arithmetic ops with a fixed
+// output type for integral inputs.
+template <typename Op, typename IntOutType>
+std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionWithFixedIntOutType(
+    std::string name, const FunctionDoc* doc) {
+  auto int_out_ty = TypeTraits<IntOutType>::type_singleton();
+  auto func = std::make_shared<ArithmeticFunction>(name, Arity::Unary(), doc);
+  for (const auto& ty : NumericTypes()) {
+    auto out_ty = arrow::is_floating(ty->id()) ? ty : int_out_ty;
+    auto exec = GenerateArithmeticWithFixedIntOutType<ScalarUnary, IntOutType, Op>(ty);
+    DCHECK_OK(func->AddKernel({ty}, out_ty, exec));
+  }
+  return func;
+}
+
 // Like MakeUnaryArithmeticFunction, but for arithmetic ops that need to run
 // only on non-null output.
 template <typename Op>
@@ -1318,6 +1383,13 @@ const FunctionDoc pow_checked_doc{
      "or integer overflow is encountered."),
     {"base", "exponent"}};
 
+const FunctionDoc sign_doc{
+    "Get the signedness of the arguments element-wise",
+    ("Output is any of (-1,1) for nonzero inputs and 0 for zero input.\n"
+     "NaN values return NaN.  Integral values return signedness as Int8 and\n"
+     "floating-point values return it with the same type as the input values."),
+    {"x"}};
+
 const FunctionDoc bit_wise_not_doc{
     "Bit-wise negate the arguments element-wise", ("Null values return null."), {"x"}};
 
@@ -1579,6 +1651,11 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
       MakeArithmeticFunctionNotNull<PowerChecked>("power_checked", &pow_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(power_checked)));
 
+  // ----------------------------------------------------------------------
+  auto sign =
+      MakeUnaryArithmeticFunctionWithFixedIntOutType<Sign, Int8Type>("sign", &sign_doc);
+  DCHECK_OK(registry->AddFunction(std::move(sign)));
+
   // ----------------------------------------------------------------------
   // Bitwise functions
   {
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index e37fb93fac2..a495b00a171 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -66,7 +66,7 @@ class TestUnaryArithmetic : public TestBase {
     return *arrow::MakeScalar(type_singleton(), value);
   }
 
-  // (Scalar)
+  // (Scalar, Scalar)
   void AssertUnaryOp(UnaryFunction func, CType argument, CType expected) {
     auto arg = MakeScalar(argument);
     auto exp = MakeScalar(expected);
@@ -74,34 +74,42 @@ class TestUnaryArithmetic : public TestBase {
     AssertScalarsApproxEqual(*exp, *actual.scalar(), /*verbose=*/true);
   }
 
-  // (Scalar)
+  // (Scalar, Scalar)
   void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Scalar>& arg,
                      const std::shared_ptr<Scalar>& expected) {
     ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
     AssertScalarsApproxEqual(*expected, *actual.scalar(), /*verbose=*/true);
   }
 
-  // (Array)
-  void AssertUnaryOp(UnaryFunction func, const std::string& argument,
-                     const std::string& expected) {
-    auto arg = ArrayFromJSON(type_singleton(), argument);
+  // (JSON, JSON)
+  void AssertUnaryOp(UnaryFunction func, const std::string& arg_json,
+                     const std::string& expected_json) {
+    auto arg = ArrayFromJSON(type_singleton(), arg_json);
+    auto expected = ArrayFromJSON(type_singleton(), expected_json);
     AssertUnaryOp(func, arg, expected);
   }
 
-  // (Array)
+  // (Array, JSON)
   void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
                      const std::string& expected_json) {
     const auto expected = ArrayFromJSON(type_singleton(), expected_json);
-    return AssertUnaryOp(func, arg, expected);
+    AssertUnaryOp(func, arg, expected);
   }
 
-  // (Array)
+  // (JSON, Array)
+  void AssertUnaryOp(UnaryFunction func, const std::string& arg_json,
+                     const std::shared_ptr<Array>& expected) {
+    auto arg = ArrayFromJSON(type_singleton(), arg_json);
+    AssertUnaryOp(func, arg, expected);
+  }
+
+  // (Array, Array)
   void AssertUnaryOp(UnaryFunction func, const std::shared_ptr<Array>& arg,
                      const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum actual, func(arg, options_, nullptr));
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
     ValidateAndAssertApproxEqual(actual.make_array(), expected);
 
-    // Also check (Scalar) operations
+    // Also check (Scalar, Scalar) operations
     const int64_t length = expected->length();
     for (int64_t i = 0; i < length; ++i) {
       const auto expected_scalar = *expected->GetScalar(i);
@@ -1024,7 +1032,8 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
 }
 
 TEST(TestUnaryArithmetic, DispatchBest) {
-  for (std::string name : {"negate", "abs", "abs_checked"}) {
+  // All arithmetic
+  for (std::string name : {"negate", "abs", "abs_checked", "sign"}) {
     for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
                            uint64(), float32(), float64()}) {
       CheckDispatchBest(name, {ty}, {ty});
@@ -1032,6 +1041,7 @@ TEST(TestUnaryArithmetic, DispatchBest) {
     }
   }
 
+  // Signed arithmetic
   for (std::string name : {"negate_checked"}) {
     for (const auto& ty : {int8(), int16(), int32(), int64(), float32(), float64()}) {
       CheckDispatchBest(name, {ty}, {ty});
@@ -1039,7 +1049,8 @@ TEST(TestUnaryArithmetic, DispatchBest) {
     }
   }
 
-  for (std::string name : {"negate", "negate_checked", "abs", "abs_checked"}) {
+  // Null input
+  for (std::string name : {"negate", "negate_checked", "abs", "abs_checked", "sign"}) {
     CheckDispatchFails(name, {null()});
   }
 
@@ -1973,5 +1984,70 @@ TYPED_TEST(TestUnaryArithmeticSigned, Log) {
   this->AssertUnaryOpRaises(Log1p, "[-2]", "logarithm of negative number");
 }
 
+TYPED_TEST(TestUnaryArithmeticSigned, Sign) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  // N.B. TestUnaryArithmetic expects a function with ArithmeticOptions as its
+  // second parameter
+  auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Sign(arg, ctx);
+  };
+
+  this->AssertUnaryOp(sign, "[]", ArrayFromJSON(int8(), "[]"));
+  this->AssertUnaryOp(sign, "[null]", ArrayFromJSON(int8(), "[null]"));
+  this->AssertUnaryOp(sign, "[1, null, -10]", ArrayFromJSON(int8(), "[1, null, -1]"));
+  this->AssertUnaryOp(sign, "[0]", ArrayFromJSON(int8(), "[0]"));
+  this->AssertUnaryOp(sign, "[1, 10, 127]", ArrayFromJSON(int8(), "[1, 1, 1]"));
+  this->AssertUnaryOp(sign, "[-1, -10, -127]", ArrayFromJSON(int8(), "[-1, -1, -1]"));
+  this->AssertUnaryOp(sign, this->MakeScalar(min), *arrow::MakeScalar(int8(), -1));
+  this->AssertUnaryOp(sign, this->MakeScalar(max), *arrow::MakeScalar(int8(), 1));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Sign) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::min();
+  auto max = std::numeric_limits<CType>::max();
+
+  // N.B. TestUnaryArithmetic expects a function with ArithmeticOptions as its
+  // second parameter
+  auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Sign(arg, ctx);
+  };
+
+  this->AssertUnaryOp(sign, "[]", ArrayFromJSON(int8(), "[]"));
+  this->AssertUnaryOp(sign, "[null]", ArrayFromJSON(int8(), "[null]"));
+  this->AssertUnaryOp(sign, "[1, null, 10]", ArrayFromJSON(int8(), "[1, null, 1]"));
+  this->AssertUnaryOp(sign, "[0]", ArrayFromJSON(int8(), "[0]"));
+  this->AssertUnaryOp(sign, "[1, 10, 127]", ArrayFromJSON(int8(), "[1, 1, 1]"));
+  this->AssertUnaryOp(sign, this->MakeScalar(min), *arrow::MakeScalar(int8(), 0));
+  this->AssertUnaryOp(sign, this->MakeScalar(max), *arrow::MakeScalar(int8(), 1));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Sign) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  // N.B. TestUnaryArithmetic expects a function with ArithmeticOptions as its
+  // second parameter
+  auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Sign(arg, ctx);
+  };
+
+  this->AssertUnaryOp(sign, "[]", "[]");
+  this->AssertUnaryOp(sign, "[null]", "[null]");
+  this->AssertUnaryOp(sign, "[1.3, null, -10.80]", "[1, null, -1]");
+  this->AssertUnaryOp(sign, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(sign, "[1.3, 10.80, 12748.001]", "[1, 1, 1]");
+  this->AssertUnaryOp(sign, "[-1.3, -10.80, -12748.001]", "[-1, -1, -1]");
+  this->AssertUnaryOp(sign, "[Inf, -Inf]", "[1, -1]");
+  this->AssertUnaryOp(sign, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(sign, this->MakeScalar(min), this->MakeScalar(-1));
+  this->AssertUnaryOp(sign, this->MakeScalar(max), this->MakeScalar(1));
+}
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index ed97faead74..f7fd1fa3f5f 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -258,41 +258,43 @@ Input(s) will be cast to the :ref:`common numeric type <common-numeric-type>`
 (and dictionary decoded, if applicable) before the operation is applied.
 
 The default variant of these functions does not detect overflow (the result
-then typically wraps around).  Each function is also available in an
+then typically wraps around).  Most functions are also available in an
 overflow-checking variant, suffixed ``_checked``, which returns
 an ``Invalid`` :class:`Status` when overflow is detected.
 
-+------------------+--------+----------------+----------------+-------+
-| Function name    | Arity  | Input types    | Output type    | Notes |
-+==================+========+================+================+=======+
-| abs              | Unary  | Numeric        | Numeric        |       |
-+------------------+--------+----------------+----------------+-------+
-| abs_checked      | Unary  | Numeric        | Numeric        |       |
-+------------------+--------+----------------+----------------+-------+
-| add              | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| add_checked      | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| divide           | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| divide_checked   | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| multiply         | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| multiply_checked | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| negate           | Unary  | Numeric        | Numeric        |       |
-+------------------+--------+----------------+----------------+-------+
-| negate_checked   | Unary  | Signed Numeric | Signed Numeric |       |
-+------------------+--------+----------------+----------------+-------+
-| power            | Binary | Numeric        | Numeric        |       |
-+------------------+--------+----------------+----------------+-------+
-| power_checked    | Binary | Numeric        | Numeric        |       |
-+------------------+--------+----------------+----------------+-------+
-| subtract         | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
-| subtract_checked | Binary | Numeric        | Numeric        | \(1)  |
-+------------------+--------+----------------+----------------+-------+
++------------------+--------+----------------+----------------------+-------+
+| Function name    | Arity  | Input types    | Output type          | Notes |
++==================+========+================+======================+=======+
+| abs              | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| abs_checked      | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| add              | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| add_checked      | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| divide           | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| divide_checked   | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| multiply         | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| multiply_checked | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| negate           | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| negate_checked   | Unary  | Signed Numeric | Signed Numeric       |       |
++------------------+--------+----------------+----------------------+-------+
+| power            | Binary | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| power_checked    | Binary | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| sign             | Unary  | Numeric        | Int8/Float32/Float64 | \(2)  |
++------------------+--------+----------------+----------------------+-------+
+| subtract         | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| subtract_checked | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
 
 * \(1) Precision and scale of computed DECIMAL results
 
@@ -315,6 +317,10 @@ an ``Invalid`` :class:`Status` when overflow is detected.
   enough scale kept. Error is returned if the result precision is beyond the
   decimal value range.
 
+* \(2) Output is any of (-1,1) for nonzero inputs and 0 for zero input.
+  NaN values return NaN.  Integral values return signedness as Int8 and
+  floating-point values return it with the same type as the input values.
+
 Bit-wise functions
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index c12f2f91b26..fd32d08f85c 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -39,7 +39,7 @@ Aggregations
 Arithmetic Functions
 --------------------
 
-By default these functions do not detect overflow. Each function is also
+By default these functions do not detect overflow. Most functions are also
 available in an overflow-checking variant, suffixed ``_checked``, which
 throws an ``ArrowInvalid`` exception when overflow is detected.
 
@@ -54,14 +54,15 @@ throws an ``ArrowInvalid`` exception when overflow is detected.
    divide_checked
    multiply
    multiply_checked
-   subtract
-   subtract_checked
    power
    power_checked
    shift_left
    shift_left_checked
    shift_right
    shift_right_checked
+   sign
+   subtract
+   subtract_checked
 
 Bit-wise operations do not offer (or need) a checked variant.
 

From 28d24bdc3f9ff4e474d945db6fc0f40aa8ced483 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Thu, 15 Jul 2021 21:22:15 -0400
Subject: [PATCH 579/719] ARROW-12955:  [C++] Add additional type support for
 if_else kernel

This PR adds fixed and variable binary type support

Closes #10538 from nirandaperera/ARROW-12955

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/compute/kernels/codegen_internal.h  |  14 +-
 .../arrow/compute/kernels/scalar_if_else.cc   | 561 +++++++++++++++---
 .../kernels/scalar_if_else_benchmark.cc       |  79 ++-
 .../compute/kernels/scalar_if_else_test.cc    | 196 +++++-
 cpp/src/arrow/util/bit_util.h                 |   6 +-
 5 files changed, 735 insertions(+), 121 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index d28ede4f77a..cb9b13bb3d7 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -1222,26 +1222,26 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId get_id) {
 // bits).
 //
 // See "Numeric" above for description of the generator functor
-template <template <typename...> class Generator>
+template <template <typename...> class Generator, typename... Args>
 ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
   switch (get_id.id) {
     case Type::NA:
-      return Generator<NullType>::Exec;
+      return Generator<NullType, Args...>::Exec;
     case Type::BOOL:
-      return Generator<BooleanType>::Exec;
+      return Generator<BooleanType, Args...>::Exec;
     case Type::UINT8:
     case Type::INT8:
-      return Generator<UInt8Type>::Exec;
+      return Generator<UInt8Type, Args...>::Exec;
     case Type::UINT16:
     case Type::INT16:
-      return Generator<UInt16Type>::Exec;
+      return Generator<UInt16Type, Args...>::Exec;
     case Type::UINT32:
     case Type::INT32:
     case Type::FLOAT:
     case Type::DATE32:
     case Type::TIME32:
     case Type::INTERVAL_MONTHS:
-      return Generator<UInt32Type>::Exec;
+      return Generator<UInt32Type, Args...>::Exec;
     case Type::UINT64:
     case Type::INT64:
     case Type::DOUBLE:
@@ -1250,7 +1250,7 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
     case Type::TIME64:
     case Type::DURATION:
     case Type::INTERVAL_DAY_TIME:
-      return Generator<UInt64Type>::Exec;
+      return Generator<UInt64Type, Args...>::Exec;
     default:
       DCHECK(false);
       return ExecFail;
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 32307542d97..e8578305bf6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -59,6 +59,7 @@ inline Bitmap GetBitmap(const Datum& datum, int i) {
 // if the condition is null then output is null otherwise we take validity from the
 // selected argument
 // ie. cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
+template <typename AllocateNullBitmap>
 Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& left_d,
                            const Datum& right_d, ArrayData* output) {
   auto cond_const = GetConstantValidityWord(cond_d);
@@ -79,19 +80,37 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum&
   // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
   // In the following cases, we dont need to allocate out_valid bitmap
 
-  // if cond & left & right all ones, then output is all valid. output validity buffer
-  // is already allocated, hence set all bits
+  // if cond & left & right all ones, then output is all valid.
+  // if output validity buffer is already allocated (NullHandling::
+  // COMPUTED_PREALLOCATE) -> set all bits
+  // else, return nullptr
   if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) {
-    BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset,
-                       output->length);
+    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE
+      output->buffers[0] = nullptr;
+    } else {  // NullHandling::COMPUTED_PREALLOCATE
+      BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset,
+                         output->length);
+    }
     return Status::OK();
   }
 
   if (left_const == kAllValid && right_const == kAllValid) {
     // if both left and right are valid, no need to calculate out_valid bitmap. Copy
     // cond validity buffer
-    arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length,
-                                output->buffers[0]->mutable_data(), output->offset);
+    if (AllocateNullBitmap::value) {  // NullHandling::COMPUTED_NO_PREALLOCATE
+      // if there's an offset, copy bitmap (cannot slice a bitmap)
+      if (cond.offset) {
+        ARROW_ASSIGN_OR_RAISE(
+            output->buffers[0],
+            arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(),
+                                        cond.offset, cond.length));
+      } else {  // just copy assign cond validity buffer
+        output->buffers[0] = cond.buffers[0];
+      }
+    } else {  // NullHandling::COMPUTED_PREALLOCATE
+      arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length,
+                                  output->buffers[0]->mutable_data(), output->offset);
+    }
     return Status::OK();
   }
 
@@ -101,6 +120,12 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum&
     return c_valid & ((c_data & l_valid) | (~c_data & r_valid));
   };
 
+  if (AllocateNullBitmap::value) {
+    // following cases requires a separate out_valid buffer. COMPUTED_NO_PREALLOCATE
+    // would not have allocated buffers for it.
+    ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length));
+  }
+
   std::array<Bitmap, 1> out_bitmaps{
       Bitmap{output->buffers[0], output->offset, output->length}};
 
@@ -202,41 +227,37 @@ static constexpr int64_t word_len = sizeof(Word) * 8;
 /// Runs the main if_else loop. Here, it is expected that the right data has already
 /// been copied to the output.
 /// If `invert` is meant to invert the cond.data. If is set to `true`, then the
-/// buffer will be inverted before calling the handle_bulk or handle_each functions.
+/// buffer will be inverted before calling the handle_block or handle_each functions.
 /// This is useful, when left is an array and right is scalar. Then rather than
 /// copying data from the right to output, we can copy left data to the output and
 /// invert the cond data to fill right values. Filling out with a scalar is presumed to
 /// be more efficient than filling with an array
-template <typename HandleBulk, typename HandleEach, bool invert = false>
-static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk,
-                          HandleEach handle_each) {
+///
+/// `HandleBlock` has the signature:
+///     [](int64_t offset, int64_t length){...}
+/// It should copy `length` number of elements from source array to output array with
+/// `offset` offset in both arrays
+template <typename HandleBlock, bool invert = false>
+void RunIfElseLoop(const ArrayData& cond, const HandleBlock& handle_block) {
   int64_t data_offset = 0;
   int64_t bit_offset = cond.offset;
   const auto* cond_data = cond.buffers[1]->data();  // this is a BoolArray
 
   BitmapWordReader<Word> cond_reader(cond_data, cond.offset, cond.length);
 
+  constexpr Word pickAll = invert ? 0 : UINT64_MAX;
+  constexpr Word pickNone = ~pickAll;
+
   int64_t cnt = cond_reader.words();
   while (cnt--) {
     Word word = cond_reader.NextWord();
-    if (invert) {
-      if (word == 0) {
-        handle_bulk(data_offset, word_len);
-      } else if (word != UINT64_MAX) {
-        for (int64_t i = 0; i < word_len; ++i) {
-          if (!BitUtil::GetBit(cond_data, bit_offset + i)) {
-            handle_each(data_offset + i);
-          }
-        }
-      }
-    } else {
-      if (word == UINT64_MAX) {
-        handle_bulk(data_offset, word_len);
-      } else if (word) {
-        for (int64_t i = 0; i < word_len; ++i) {
-          if (BitUtil::GetBit(cond_data, bit_offset + i)) {
-            handle_each(data_offset + i);
-          }
+
+    if (word == pickAll) {
+      handle_block(data_offset, word_len);
+    } else if (word != pickNone) {
+      for (int64_t i = 0; i < word_len; ++i) {
+        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) {
+          handle_block(data_offset + i, 1);
         }
       }
     }
@@ -244,28 +265,21 @@ static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk,
     bit_offset += word_len;
   }
 
+  constexpr uint8_t pickAllByte = invert ? 0 : UINT8_MAX;
+  // byte bit-wise inversion is int-wide. Hence XOR with 0xff
+  constexpr uint8_t pickNoneByte = pickAllByte ^ 0xff;
+
   cnt = cond_reader.trailing_bytes();
   while (cnt--) {
     int valid_bits;
     uint8_t byte = cond_reader.NextTrailingByte(valid_bits);
-    if (invert) {
-      if (byte == 0 && valid_bits == 8) {
-        handle_bulk(data_offset, 8);
-      } else if (byte != UINT8_MAX) {
-        for (int i = 0; i < valid_bits; ++i) {
-          if (!BitUtil::GetBit(cond_data, bit_offset + i)) {
-            handle_each(data_offset + i);
-          }
-        }
-      }
-    } else {
-      if (byte == UINT8_MAX && valid_bits == 8) {
-        handle_bulk(data_offset, 8);
-      } else if (byte) {
-        for (int i = 0; i < valid_bits; ++i) {
-          if (BitUtil::GetBit(cond_data, bit_offset + i)) {
-            handle_each(data_offset + i);
-          }
+
+    if (byte == pickAllByte && valid_bits == 8) {
+      handle_block(data_offset, 8);
+    } else if (byte != pickNoneByte) {
+      for (int i = 0; i < valid_bits; ++i) {
+        if (BitUtil::GetBit(cond_data, bit_offset + i) != invert) {
+          handle_block(data_offset + i, 1);
         }
       }
     }
@@ -274,19 +288,17 @@ static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk,
   }
 }
 
-template <typename HandleBulk, typename HandleEach>
-static void RunIfElseLoopInverted(const ArrayData& cond, HandleBulk handle_bulk,
-                                  HandleEach handle_each) {
-  return RunIfElseLoop<HandleBulk, HandleEach, true>(cond, handle_bulk, handle_each);
+template <typename HandleBlock>
+void RunIfElseLoopInverted(const ArrayData& cond, const HandleBlock& handle_block) {
+  RunIfElseLoop<HandleBlock, true>(cond, handle_block);
 }
 
 /// Runs if-else when cond is a scalar. Two special functions are required,
 /// 1.CopyArrayData, 2. BroadcastScalar
 template <typename CopyArrayData, typename BroadcastScalar>
-static Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left,
-                              const Datum& right, Datum* out,
-                              CopyArrayData copy_array_data,
-                              BroadcastScalar broadcast_scalar) {
+Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left, const Datum& right,
+                       Datum* out, const CopyArrayData& copy_array_data,
+                       const BroadcastScalar& broadcast_scalar) {
   if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar
     if (cond.is_valid) {
       *out = cond.value ? left.scalar() : right.scalar();
@@ -377,13 +389,10 @@ struct IfElseFunctor<Type, enable_if_number<Type>> {
     // selectively copy values from left data
     const T* left_data = left.GetValues<T>(1);
 
-    RunIfElseLoop(
-        cond,
-        [&](int64_t data_offset, int64_t num_elems) {
-          std::memcpy(out_values + data_offset, left_data + data_offset,
-                      num_elems * sizeof(T));
-        },
-        [&](int64_t data_offset) { out_values[data_offset] = left_data[data_offset]; });
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::memcpy(out_values + data_offset, left_data + data_offset,
+                  num_elems * sizeof(T));
+    });
 
     return Status::OK();
   }
@@ -400,13 +409,10 @@ struct IfElseFunctor<Type, enable_if_number<Type>> {
     // selectively copy values from left data
     T left_data = internal::UnboxScalar<Type>::Unbox(left);
 
-    RunIfElseLoop(
-        cond,
-        [&](int64_t data_offset, int64_t num_elems) {
-          std::fill(out_values + data_offset, out_values + data_offset + num_elems,
-                    left_data);
-        },
-        [&](int64_t data_offset) { out_values[data_offset] = left_data; });
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                left_data);
+    });
 
     return Status::OK();
   }
@@ -422,13 +428,10 @@ struct IfElseFunctor<Type, enable_if_number<Type>> {
 
     T right_data = internal::UnboxScalar<Type>::Unbox(right);
 
-    RunIfElseLoopInverted(
-        cond,
-        [&](int64_t data_offset, int64_t num_elems) {
-          std::fill(out_values + data_offset, out_values + data_offset + num_elems,
-                    right_data);
-        },
-        [&](int64_t data_offset) { out_values[data_offset] = right_data; });
+    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                right_data);
+    });
 
     return Status::OK();
   }
@@ -444,13 +447,10 @@ struct IfElseFunctor<Type, enable_if_number<Type>> {
 
     // selectively copy values from left data
     T left_data = internal::UnboxScalar<Type>::Unbox(left);
-    RunIfElseLoop(
-        cond,
-        [&](int64_t data_offset, int64_t num_elems) {
-          std::fill(out_values + data_offset, out_values + data_offset + num_elems,
-                    left_data);
-        },
-        [&](int64_t data_offset) { out_values[data_offset] = left_data; });
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::fill(out_values + data_offset, out_values + data_offset + num_elems,
+                left_data);
+    });
 
     return Status::OK();
   }
@@ -576,6 +576,345 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
 };
 
 template <typename Type>
+struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
+  using OffsetType = typename TypeTraits<Type>::OffsetType::c_type;
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    if (left.is_scalar() && right.is_scalar()) {
+      if (cond.is_valid) {
+        *out = cond.value ? left.scalar() : right.scalar();
+      } else {
+        *out = MakeNullScalar(left.type());
+      }
+      return Status::OK();
+    }
+    // either left or right is an array. Output is always an array
+    int64_t out_arr_len = std::max(left.length(), right.length());
+    if (!cond.is_valid) {
+      // cond is null; just create a null array
+      ARROW_ASSIGN_OR_RAISE(*out,
+                            MakeArrayOfNull(left.type(), out_arr_len, ctx->memory_pool()))
+      return Status::OK();
+    }
+
+    const auto& valid_data = cond.value ? left : right;
+    if (valid_data.is_array()) {
+      *out = valid_data;
+    } else {
+      // valid data is a scalar that needs to be broadcasted
+      ARROW_ASSIGN_OR_RAISE(*out, MakeArrayFromScalar(*valid_data.scalar(), out_arr_len,
+                                                      ctx->memory_pool()));
+    }
+    return Status::OK();
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    const auto* left_offsets = left.GetValues<OffsetType>(1);
+    const uint8_t* left_data = left.buffers[2]->data();
+    const auto* right_offsets = right.GetValues<OffsetType>(1);
+    const uint8_t* right_data = right.buffers[2]->data();
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc = left_offsets[left.length] - left_offsets[0] +
+                              right_offsets[right.length] - right_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out,
+        [&](int64_t i) {
+          builder.UnsafeAppend(left_data + left_offsets[i],
+                               left_offsets[i + 1] - left_offsets[i]);
+        },
+        [&](int64_t i) {
+          builder.UnsafeAppend(right_data + right_offsets[i],
+                               right_offsets[i + 1] - right_offsets[i]);
+        },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left);
+    auto left_size = static_cast<OffsetType>(left_data.size());
+
+    const auto* right_offsets = right.GetValues<OffsetType>(1);
+    const uint8_t* right_data = right.buffers[2]->data();
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc =
+        left_size * cond.length + right_offsets[right.length] - right_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); },
+        [&](int64_t i) {
+          builder.UnsafeAppend(right_data + right_offsets[i],
+                               right_offsets[i + 1] - right_offsets[i]);
+        },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    const auto* left_offsets = left.GetValues<OffsetType>(1);
+    const uint8_t* left_data = left.buffers[2]->data();
+
+    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right);
+    auto right_size = static_cast<OffsetType>(right_data.size());
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc =
+        right_size * cond.length + left_offsets[left.length] - left_offsets[0];
+
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out,
+        [&](int64_t i) {
+          builder.UnsafeAppend(left_data + left_offsets[i],
+                               left_offsets[i + 1] - left_offsets[i]);
+        },
+        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    util::string_view left_data = internal::UnboxScalar<Type>::Unbox(left);
+    auto left_size = static_cast<OffsetType>(left_data.size());
+
+    util::string_view right_data = internal::UnboxScalar<Type>::Unbox(right);
+    auto right_size = static_cast<OffsetType>(right_data.size());
+
+    // allocate data buffer conservatively
+    int64_t data_buff_alloc = std::max(right_size, left_size) * cond.length;
+    BuilderType builder(ctx->memory_pool());
+    ARROW_RETURN_NOT_OK(builder.Reserve(cond.length + 1));
+    ARROW_RETURN_NOT_OK(builder.ReserveData(data_buff_alloc));
+
+    RunLoop(
+        cond, *out, [&](int64_t i) { builder.UnsafeAppend(left_data.data(), left_size); },
+        [&](int64_t i) { builder.UnsafeAppend(right_data.data(), right_size); },
+        [&]() { builder.UnsafeAppendNull(); });
+    ARROW_ASSIGN_OR_RAISE(auto out_arr, builder.Finish());
+
+    out->SetNullCount(out_arr->data()->null_count);
+    out->buffers[0] = std::move(out_arr->data()->buffers[0]);
+    out->buffers[1] = std::move(out_arr->data()->buffers[1]);
+    out->buffers[2] = std::move(out_arr->data()->buffers[2]);
+    return Status::OK();
+  }
+
+  template <typename HandleLeft, typename HandleRight, typename HandleNull>
+  static void RunLoop(const ArrayData& cond, const ArrayData& output,
+                      HandleLeft&& handle_left, HandleRight&& handle_right,
+                      HandleNull&& handle_null) {
+    const auto* cond_data = cond.buffers[1]->data();
+
+    if (output.buffers[0]) {  // output may have nulls
+      // output validity buffer is allocated internally from the IfElseFunctor. Therefore
+      // it is cond.length'd with 0 offset.
+      const auto* out_valid = output.buffers[0]->data();
+
+      for (int64_t i = 0; i < cond.length; i++) {
+        if (BitUtil::GetBit(out_valid, i)) {
+          BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i);
+        } else {
+          handle_null();
+        }
+      }
+    } else {  // output is all valid (no nulls)
+      for (int64_t i = 0; i < cond.length; i++) {
+        BitUtil::GetBit(cond_data, cond.offset + i) ? handle_left(i) : handle_right(i);
+      }
+    }
+  }
+};
+
+template <typename Type>
+struct IfElseFunctor<Type, enable_if_fixed_size_binary<Type>> {
+  // A - Array, S - Scalar, X = Array/Scalar
+
+  // SXX
+  static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left,
+                     const Datum& right, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type(), *right.type()));
+    return RunIfElseScalar(
+        cond, left, right, out,
+        /*CopyArrayData*/
+        [&](const ArrayData& valid_array, ArrayData* out_array) {
+          std::memcpy(
+              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width,
+              valid_array.buffers[1]->data() + valid_array.offset * byte_width,
+              valid_array.length * byte_width);
+        },
+        /*BroadcastScalar*/
+        [&](const Scalar& scalar, ArrayData* out_array) {
+          const util::string_view& scalar_data =
+              internal::UnboxScalar<FixedSizeBinaryType>::Unbox(scalar);
+          uint8_t* start =
+              out_array->buffers[1]->mutable_data() + out_array->offset * byte_width;
+          for (int64_t i = 0; i < out_array->length; i++) {
+            std::memcpy(start + i * byte_width, scalar_data.data(), scalar_data.size());
+          }
+        });
+  }
+
+  //  AAA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width;
+    std::memcpy(out_values, right_data, right.length * byte_width);
+
+    // selectively copy values from left data
+    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width;
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      std::memcpy(out_values + data_offset * byte_width,
+                  left_data + data_offset * byte_width, num_elems * byte_width);
+    });
+
+    return Status::OK();
+  }
+
+  // ASA
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const ArrayData& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const uint8_t* right_data = right.buffers[1]->data() + right.offset * byte_width;
+    std::memcpy(out_values, right_data, right.length * byte_width);
+
+    // selectively copy values from left data
+    const util::string_view& left_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (left_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(),
+                      left_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  // AAS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy left data to out_buff
+    const uint8_t* left_data = left.buffers[1]->data() + left.offset * byte_width;
+    std::memcpy(out_values, left_data, left.length * byte_width);
+
+    const util::string_view& right_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right);
+
+    RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (right_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, right_data.data(),
+                      right_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  // ASS
+  static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left,
+                     const Scalar& right, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto byte_width, GetByteWidth(*left.type, *right.type));
+    auto* out_values = out->buffers[1]->mutable_data() + out->offset * byte_width;
+
+    // copy right data to out_buff
+    const util::string_view& right_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(right);
+    if (right_data.data()) {
+      for (int64_t i = 0; i < cond.length; i++) {
+        std::memcpy(out_values + i * byte_width, right_data.data(), right_data.size());
+      }
+    }
+
+    // selectively copy values from left data
+    const util::string_view& left_data =
+        internal::UnboxScalar<FixedSizeBinaryType>::Unbox(left);
+
+    RunIfElseLoop(cond, [&](int64_t data_offset, int64_t num_elems) {
+      if (left_data.data()) {
+        for (int64_t i = 0; i < num_elems; i++) {
+          std::memcpy(out_values + (data_offset + i) * byte_width, left_data.data(),
+                      left_data.size());
+        }
+      }
+    });
+
+    return Status::OK();
+  }
+
+  static Result<int32_t> GetByteWidth(const DataType& left_type,
+                                      const DataType& right_type) {
+    int width = checked_cast<const FixedSizeBinaryType&>(left_type).byte_width();
+    if (width == checked_cast<const FixedSizeBinaryType&>(right_type).byte_width()) {
+      return width;
+    } else {
+      return Status::Invalid("FixedSizeBinaryType byte_widths should be equal");
+    }
+  }
+};
+
+template <typename Type, typename AllocateMem>
 struct ResolveIfElseExec {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     // cond is scalar
@@ -585,8 +924,8 @@ struct ResolveIfElseExec {
     }
 
     // cond is array. Use functors to sort things out
-    ARROW_RETURN_NOT_OK(
-        PromoteNullsVisitor(ctx, batch[0], batch[1], batch[2], out->mutable_array()));
+    ARROW_RETURN_NOT_OK(PromoteNullsVisitor<AllocateMem>(ctx, batch[0], batch[1],
+                                                         batch[2], out->mutable_array()));
 
     if (batch[1].kind() == Datum::ARRAY) {
       if (batch[2].kind() == Datum::ARRAY) {  // AAA
@@ -608,15 +947,15 @@ struct ResolveIfElseExec {
   }
 };
 
-template <>
-struct ResolveIfElseExec<NullType> {
+template <typename AllocateMem>
+struct ResolveIfElseExec<NullType, AllocateMem> {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-    if (batch[0].is_scalar()) {
+    // if all are scalars, return a null scalar
+    if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) {
       *out = MakeNullScalar(null());
     } else {
-      const std::shared_ptr<ArrayData>& cond_array = batch[0].array();
-      ARROW_ASSIGN_OR_RAISE(
-          *out, MakeArrayOfNull(null(), cond_array->length, ctx->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(*out,
+                            MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
     }
     return Status::OK();
   }
@@ -655,7 +994,8 @@ struct IfElseFunction : ScalarFunction {
 
 void AddNullIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
   ScalarKernel kernel({boolean(), null(), null()}, null(),
-                      ResolveIfElseExec<NullType>::Exec);
+                      ResolveIfElseExec<NullType,
+                                        /*AllocateMem=*/std::true_type>::Exec);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
   kernel.can_write_into_slices = false;
@@ -666,7 +1006,9 @@ void AddNullIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function)
 void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
                                const std::vector<std::shared_ptr<DataType>>& types) {
   for (auto&& type : types) {
-    auto exec = internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec>(*type);
+    auto exec =
+        internal::GenerateTypeAgnosticPrimitive<ResolveIfElseExec,
+                                                /*AllocateMem=*/std::false_type>(*type);
     // cond array needs to be boolean always
     ScalarKernel kernel({boolean(), type, type}, type, exec);
     kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
@@ -677,6 +1019,38 @@ void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_fun
   }
 }
 
+void AddBinaryIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_function,
+                            const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec =
+        internal::GenerateTypeAgnosticVarBinaryBase<ResolveIfElseExec,
+                                                    /*AllocateMem=*/std::true_type>(
+            *type);
+    // cond array needs to be boolean always
+    ScalarKernel kernel({boolean(), type, type}, type, exec);
+    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+    kernel.can_write_into_slices = false;
+
+    DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+  }
+}
+
+void AddFSBinaryIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
+  // cond array needs to be boolean always
+  ScalarKernel kernel(
+      {boolean(), InputType(Type::FIXED_SIZE_BINARY), InputType(Type::FIXED_SIZE_BINARY)},
+      OutputType([](KernelContext*, const std::vector<ValueDescr>& descrs) {
+        return ValueDescr(descrs[1].type, ValueDescr::ANY);
+      }),
+      ResolveIfElseExec<FixedSizeBinaryType, /*AllocateMem=*/std::false_type>::Exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = true;
+
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
 // Helper to copy or broadcast fixed-width values between buffers.
 template <typename Type, typename Enable = void>
 struct CopyFixedWidth {};
@@ -1056,7 +1430,8 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
     AddPrimitiveIfElseKernels(func, TemporalTypes());
     AddPrimitiveIfElseKernels(func, {boolean(), day_time_interval(), month_interval()});
     AddNullIfElseKernel(func);
-    // todo add binary kernels
+    AddBinaryIfElseKernels(func, BaseBinaryTypes());
+    AddFSBinaryIfElseKernel(func);
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
   {
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index 9192cf54ebb..3b8df47162d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -28,9 +28,36 @@ namespace compute {
 
 const int64_t elems = 1024 * 1024;
 
+template <typename Type, typename Enable = void>
+struct SetBytesProcessed {};
+
+template <typename Type>
+struct SetBytesProcessed<Type, enable_if_number<Type>> {
+  static void Set(const std::shared_ptr<Array>& cond, const std::shared_ptr<Array>& left,
+                  const std::shared_ptr<Array>& right, benchmark::State* state) {
+    using CType = typename Type::c_type;
+    state->SetBytesProcessed(state->iterations() *
+                             (cond->length() / 8 + 2 * cond->length() * sizeof(CType)));
+  }
+};
+
+template <typename Type>
+struct SetBytesProcessed<Type, enable_if_base_binary<Type>> {
+  static void Set(const std::shared_ptr<Array>& cond, const std::shared_ptr<Array>& left,
+                  const std::shared_ptr<Array>& right, benchmark::State* state) {
+    using ArrayType = typename TypeTraits<Type>::ArrayType;
+    using OffsetType = typename TypeTraits<Type>::OffsetType::c_type;
+
+    state->SetBytesProcessed(
+        state->iterations() *
+        (cond->length() / 8 + 2 * cond->length() * sizeof(OffsetType) +
+         std::static_pointer_cast<ArrayType>(left)->total_values_length() +
+         std::static_pointer_cast<ArrayType>(right)->total_values_length()));
+  }
+};
+
 template <typename Type>
 static void IfElseBench(benchmark::State& state) {
-  using CType = typename Type::c_type;
   auto type = TypeTraits<Type>::type_singleton();
   using ArrayType = typename TypeTraits<Type>::ArrayType;
 
@@ -40,23 +67,24 @@ static void IfElseBench(benchmark::State& state) {
   random::RandomArrayGenerator rand(/*seed=*/0);
 
   auto cond = std::static_pointer_cast<BooleanArray>(
-      rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+                  rand.ArrayOf(boolean(), len, /*null_probability=*/0.01))
+                  ->Slice(offset);
   auto left = std::static_pointer_cast<ArrayType>(
-      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+                  rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                  ->Slice(offset);
   auto right = std::static_pointer_cast<ArrayType>(
-      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+                   rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                   ->Slice(offset);
 
   for (auto _ : state) {
-    ABORT_NOT_OK(IfElse(cond->Slice(offset), left->Slice(offset), right->Slice(offset)));
+    ABORT_NOT_OK(IfElse(cond, left, right));
   }
 
-  state.SetBytesProcessed(state.iterations() *
-                          ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType)));
+  SetBytesProcessed<Type>::Set(cond, left, right, &state);
 }
 
 template <typename Type>
 static void IfElseBenchContiguous(benchmark::State& state) {
-  using CType = typename Type::c_type;
   auto type = TypeTraits<Type>::type_singleton();
   using ArrayType = typename TypeTraits<Type>::ArrayType;
 
@@ -67,20 +95,21 @@ static void IfElseBenchContiguous(benchmark::State& state) {
   ASSERT_OK_AND_ASSIGN(auto temp2,
                        MakeArrayFromScalar(BooleanScalar(false), len - len / 2));
   ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2}));
-  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+  auto cond = std::static_pointer_cast<BooleanArray>(concat)->Slice(offset);
 
   random::RandomArrayGenerator rand(/*seed=*/0);
   auto left = std::static_pointer_cast<ArrayType>(
-      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+                  rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                  ->Slice(offset);
   auto right = std::static_pointer_cast<ArrayType>(
-      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+                   rand.ArrayOf(type, len, /*null_probability=*/0.01))
+                   ->Slice(offset);
 
   for (auto _ : state) {
-    ABORT_NOT_OK(IfElse(cond->Slice(offset), left->Slice(offset), right->Slice(offset)));
+    ABORT_NOT_OK(IfElse(cond, left, right));
   }
 
-  state.SetBytesProcessed(state.iterations() *
-                          ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType)));
+  SetBytesProcessed<Type>::Set(cond, left, right, &state);
 }
 
 static void IfElseBench64(benchmark::State& state) {
@@ -91,6 +120,14 @@ static void IfElseBench32(benchmark::State& state) {
   return IfElseBench<UInt32Type>(state);
 }
 
+static void IfElseBenchString32(benchmark::State& state) {
+  return IfElseBench<StringType>(state);
+}
+
+static void IfElseBenchString64(benchmark::State& state) {
+  return IfElseBench<LargeStringType>(state);
+}
+
 static void IfElseBench64Contiguous(benchmark::State& state) {
   return IfElseBenchContiguous<UInt64Type>(state);
 }
@@ -99,6 +136,14 @@ static void IfElseBench32Contiguous(benchmark::State& state) {
   return IfElseBenchContiguous<UInt32Type>(state);
 }
 
+static void IfElseBenchString64Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt64Type>(state);
+}
+
+static void IfElseBenchString32Contiguous(benchmark::State& state) {
+  return IfElseBenchContiguous<UInt32Type>(state);
+}
+
 template <typename Type>
 static void CaseWhenBench(benchmark::State& state) {
   using CType = typename Type::c_type;
@@ -194,6 +239,12 @@ BENCHMARK(IfElseBench64Contiguous)->Args({elems, 0});
 BENCHMARK(IfElseBench32Contiguous)->Args({elems, 99});
 BENCHMARK(IfElseBench64Contiguous)->Args({elems, 99});
 
+BENCHMARK(IfElseBenchString32)->Args({elems, 0});
+BENCHMARK(IfElseBenchString64)->Args({elems, 0});
+
+BENCHMARK(IfElseBenchString32Contiguous)->Args({elems, 99});
+BENCHMARK(IfElseBenchString64Contiguous)->Args({elems, 99});
+
 BENCHMARK(CaseWhenBench64)->Args({elems, 0});
 BENCHMARK(CaseWhenBench64)->Args({elems, 99});
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index cd2d04a13e0..4ebed60b3a9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -65,9 +65,9 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) {
   ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
   ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
   auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+
   ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
   auto cond = std::static_pointer_cast<BooleanArray>(concat);
-
   auto left = std::static_pointer_cast<ArrayType>(
       rand.ArrayOf(type, len, /*null_probability=*/0.01));
   auto right = std::static_pointer_cast<ArrayType>(
@@ -275,10 +275,10 @@ TEST_F(TestIfElseKernel, IfElseBooleanRand) {
 }
 
 TEST_F(TestIfElseKernel, IfElseNull) {
-  CheckIfElseOutput(ArrayFromJSON(boolean(), "[null, null, null, null]"),
-                    ArrayFromJSON(null(), "[null, null, null, null]"),
-                    ArrayFromJSON(null(), "[null, null, null, null]"),
-                    ArrayFromJSON(null(), "[null, null, null, null]"));
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, null, null, null]"),
+                           ArrayFromJSON(null(), "[null, null, null, null]"),
+                           ArrayFromJSON(null(), "[null, null, null, null]"),
+                           ArrayFromJSON(null(), "[null, null, null, null]"));
 }
 
 TEST_F(TestIfElseKernel, IfElseMultiType) {
@@ -318,6 +318,192 @@ TEST_F(TestIfElseKernel, IfElseDispatchBest) {
   CheckDispatchBest(name, {null(), uint8(), int8()}, {boolean(), int16(), int16()});
 }
 
+template <typename Type>
+class TestIfElseBaseBinary : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestIfElseBaseBinary, BinaryTypes);
+
+TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinary) {
+  auto type = TypeTraits<TypeParam>::type_singleton();
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "l"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "l"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"([null, "ab", null, "l"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"([null, "ab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"),
+                           ArrayFromJSON(type, R"([null, "ab", "abc", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"),
+                           ArrayFromJSON(type, R"([null, "ab", "abc", "l"])"));
+}
+
+TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) {
+  using ArrayType = typename TypeTraits<TypeParam>::ArrayType;
+  using OffsetType = typename TypeTraits<TypeParam>::OffsetType::c_type;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+
+  //  this is to check the BitBlockCount::AllSet/ NoneSet code paths
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
+  ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
+  auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+
+  auto left = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<ArrayType>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  typename TypeTraits<TypeParam>::BuilderType builder;
+
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    OffsetType offset;
+    const uint8_t* val;
+    if (cond->Value(i)) {
+      val = left->GetValue(i, &offset);
+    } else {
+      val = right->GetValue(i, &offset);
+    }
+    ASSERT_OK(builder.Append(val, offset));
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
+TEST_F(TestIfElseKernel, IfElseFSBinary) {
+  auto type = std::make_shared<FixedSizeBinaryType>(4);
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "llll"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "llll"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"([null, "abab", null, "llll"])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", null, "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"([null, "abab", null, null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", null])"),
+                           ArrayFromJSON(type, R"([null, "abab", "abca", null])"));
+
+  CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"),
+                           ArrayFromJSON(type, R"(["aaaa", "abab", "abca", "abcd"])"),
+                           ArrayFromJSON(type, R"(["lmno", "lmnl", "lmlm", "llll"])"),
+                           ArrayFromJSON(type, R"([null, "abab", "abca", "llll"])"));
+
+  // should fails for non-equal byte_widths
+  auto type1 = std::make_shared<FixedSizeBinaryType>(5);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("FixedSizeBinaryType byte_widths should be equal"),
+      CallFunction("if_else", {ArrayFromJSON(boolean(), "[true]"),
+                               ArrayFromJSON(type, R"(["aaaa"])"),
+                               ArrayFromJSON(type1, R"(["aaaaa"])")}));
+}
+
+TEST_F(TestIfElseKernel, IfElseFSBinaryRand) {
+  auto type = std::make_shared<FixedSizeBinaryType>(5);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+  int64_t len = 1000;
+
+  //  this is to check the BitBlockCount::AllSet/ NoneSet code paths
+  ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64));
+  ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64));
+  auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01);
+
+  ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3}));
+  auto cond = std::static_pointer_cast<BooleanArray>(concat);
+
+  auto left = std::static_pointer_cast<FixedSizeBinaryArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+  auto right = std::static_pointer_cast<FixedSizeBinaryArray>(
+      rand.ArrayOf(type, len, /*null_probability=*/0.01));
+
+  FixedSizeBinaryBuilder builder(type);
+
+  for (int64_t i = 0; i < len; ++i) {
+    if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) ||
+        (!cond->Value(i) && !right->IsValid(i))) {
+      ASSERT_OK(builder.AppendNull());
+      continue;
+    }
+
+    const uint8_t* val;
+    if (cond->Value(i)) {
+      val = left->GetValue(i);
+    } else {
+      val = right->GetValue(i);
+    }
+    ASSERT_OK(builder.Append(val));
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish());
+
+  CheckIfElseOutput(cond, left, right, expected_data);
+}
+
 template <typename Type>
 class TestCaseWhenNumeric : public ::testing::Test {};
 
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 1e97e467610..c306ce7821b 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -290,12 +290,14 @@ static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63
 // the bitwise complement version of kPrecedingBitmask
 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
 
-static inline bool GetBit(const uint8_t* bits, uint64_t i) {
+static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
   return (bits[i >> 3] >> (i & 0x07)) & 1;
 }
 
 // Gets the i-th bit from a byte. Should only be used with i <= 7.
-static inline bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; }
+static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
+  return byte & kBitmask[i];
+}
 
 static inline void ClearBit(uint8_t* bits, int64_t i) {
   bits[i / 8] &= kFlippedBitmask[i % 8];

From a4222a0f7a6188bde7b86b2f4a7350c2174e84ce Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 16 Jul 2021 07:54:32 +0200
Subject: [PATCH 580/719] ARROW-11206: [C++][Compute][Python] Rename 'project'
 to 'make_struct'

Closes #10728 from bkietz/11206-Consider-hiding-renaming-

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           | 25 +++----
 cpp/src/arrow/compute/api_scalar.h            | 12 ++--
 cpp/src/arrow/compute/exec/expression.cc      |  5 +-
 .../arrow/compute/exec/expression_internal.h  |  7 +-
 cpp/src/arrow/compute/function_test.cc        |  6 +-
 .../compute/kernels/scalar_if_else_test.cc    |  8 +--
 .../arrow/compute/kernels/scalar_nested.cc    | 57 +++++++++-------
 .../compute/kernels/scalar_nested_test.cc     | 65 +++++++++++--------
 cpp/src/arrow/dataset/scanner.cc              |  2 +-
 cpp/src/arrow/dataset/scanner_internal.h      |  4 +-
 cpp/src/arrow/dataset/scanner_test.cc         |  3 +-
 docs/source/cpp/compute.rst                   |  2 +-
 python/pyarrow/_compute.pyx                   |  6 +-
 python/pyarrow/compute.py                     |  2 +-
 python/pyarrow/includes/libarrow.pxd          |  6 +-
 python/pyarrow/tests/test_compute.py          | 28 +++++++-
 16 files changed, 141 insertions(+), 97 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index abf6f75acb8..07e56d5f3d1 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -154,10 +154,10 @@ static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
     DataMember("step", &SliceOptions::step));
 static auto kCompareOptionsType =
     GetFunctionOptionsType<CompareOptions>(DataMember("op", &CompareOptions::op));
-static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
-    DataMember("field_names", &ProjectOptions::field_names),
-    DataMember("field_nullability", &ProjectOptions::field_nullability),
-    DataMember("field_metadata", &ProjectOptions::field_metadata));
+static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
+    DataMember("field_names", &MakeStructOptions::field_names),
+    DataMember("field_nullability", &MakeStructOptions::field_nullability),
+    DataMember("field_metadata", &MakeStructOptions::field_metadata));
 static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
     DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
     DataMember("week_start", &DayOfWeekOptions::week_start));
@@ -265,21 +265,22 @@ CompareOptions::CompareOptions(CompareOperator op)
 CompareOptions::CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
 constexpr char CompareOptions::kTypeName[];
 
-ProjectOptions::ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
-                               std::vector<std::shared_ptr<const KeyValueMetadata>> m)
-    : FunctionOptions(internal::kProjectOptionsType),
+MakeStructOptions::MakeStructOptions(
+    std::vector<std::string> n, std::vector<bool> r,
+    std::vector<std::shared_ptr<const KeyValueMetadata>> m)
+    : FunctionOptions(internal::kMakeStructOptionsType),
       field_names(std::move(n)),
       field_nullability(std::move(r)),
       field_metadata(std::move(m)) {}
 
-ProjectOptions::ProjectOptions(std::vector<std::string> n)
-    : FunctionOptions(internal::kProjectOptionsType),
+MakeStructOptions::MakeStructOptions(std::vector<std::string> n)
+    : FunctionOptions(internal::kMakeStructOptionsType),
       field_names(std::move(n)),
       field_nullability(field_names.size(), true),
       field_metadata(field_names.size(), NULLPTR) {}
 
-ProjectOptions::ProjectOptions() : ProjectOptions(std::vector<std::string>()) {}
-constexpr char ProjectOptions::kTypeName[];
+MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {}
+constexpr char MakeStructOptions::kTypeName[];
 
 DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
     : FunctionOptions(internal::kDayOfWeekOptionsType),
@@ -304,7 +305,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
-  DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
 }
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index e8161ddb5f2..285e1eb4f51 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -226,13 +226,13 @@ class ARROW_EXPORT CompareOptions : public FunctionOptions {
   enum CompareOperator op;
 };
 
-class ARROW_EXPORT ProjectOptions : public FunctionOptions {
+class ARROW_EXPORT MakeStructOptions : public FunctionOptions {
  public:
-  ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
-                 std::vector<std::shared_ptr<const KeyValueMetadata>> m);
-  explicit ProjectOptions(std::vector<std::string> n);
-  ProjectOptions();
-  constexpr static char const kTypeName[] = "ProjectOptions";
+  MakeStructOptions(std::vector<std::string> n, std::vector<bool> r,
+                    std::vector<std::shared_ptr<const KeyValueMetadata>> m);
+  explicit MakeStructOptions(std::vector<std::string> n);
+  MakeStructOptions();
+  constexpr static char const kTypeName[] = "MakeStructOptions";
 
   /// Names for wrapped columns
   std::vector<std::string> field_names;
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index bc9a9103f6d..4aab64a46a4 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -166,7 +166,7 @@ std::string Expression::ToString() const {
     return binary(std::move(op));
   }
 
-  if (auto options = GetProjectOptions(*call)) {
+  if (auto options = GetMakeStructOptions(*call)) {
     std::string out = "{";
     auto argument = call->arguments.begin();
     for (const auto& field_name : options->field_names) {
@@ -1122,7 +1122,8 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
 }
 
 Expression project(std::vector<Expression> values, std::vector<std::string> names) {
-  return call("project", std::move(values), compute::ProjectOptions{std::move(names)});
+  return call("make_struct", std::move(values),
+              compute::MakeStructOptions{std::move(names)});
 }
 
 Expression equal(Expression lhs, Expression rhs) {
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index 51d242e8d66..dc38924d932 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -220,9 +220,10 @@ inline bool IsSetLookup(const std::string& function) {
   return function == "is_in" || function == "index_in";
 }
 
-inline const compute::ProjectOptions* GetProjectOptions(const Expression::Call& call) {
-  if (call.function_name != "project") return nullptr;
-  return checked_cast<const compute::ProjectOptions*>(call.options.get());
+inline const compute::MakeStructOptions* GetMakeStructOptions(
+    const Expression::Call& call) {
+  if (call.function_name != "make_struct") return nullptr;
+  return checked_cast<const compute::MakeStructOptions*>(call.options.get());
 }
 
 /// A helper for unboxing an Expression composed of associative function calls.
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index 752ade284b7..225f80736a6 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -86,10 +86,10 @@ TEST(FunctionOptions, Equality) {
   options.emplace_back(new CompareOptions(CompareOperator::EQUAL));
   options.emplace_back(new CompareOptions(CompareOperator::LESS));
   // N.B. we never actually use field_nullability or field_metadata in Arrow
-  options.emplace_back(new ProjectOptions({"col1"}, {true}, {}));
-  options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
+  options.emplace_back(new MakeStructOptions({"col1"}, {true}, {}));
+  options.emplace_back(new MakeStructOptions({"col1"}, {false}, {}));
   options.emplace_back(
-      new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
+      new MakeStructOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
   options.emplace_back(new DayOfWeekOptions(false, 1));
   options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
   options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 4ebed60b3a9..8ff86f3ec29 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -510,13 +510,7 @@ class TestCaseWhenNumeric : public ::testing::Test {};
 TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
 
 Datum MakeStruct(const std::vector<Datum>& conds) {
-  ProjectOptions options;
-  options.field_names.resize(conds.size());
-  options.field_metadata.resize(conds.size());
-  for (const auto& datum : conds) {
-    options.field_nullability.push_back(datum.null_count() > 0);
-  }
-  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("project", conds, &options));
+  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
   return result;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index e4ab3f9b418..e9f0696c8fd 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -62,15 +62,23 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
-Result<ValueDescr> ProjectResolve(KernelContext* ctx,
-                                  const std::vector<ValueDescr>& descrs) {
-  const auto& names = OptionsWrapper<ProjectOptions>::Get(ctx).field_names;
-  const auto& nullable = OptionsWrapper<ProjectOptions>::Get(ctx).field_nullability;
-  const auto& metadata = OptionsWrapper<ProjectOptions>::Get(ctx).field_metadata;
-
-  if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
-      metadata.size() != descrs.size()) {
-    return Status::Invalid("project() was passed ", descrs.size(), " arguments but ",
+Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
+                                     const std::vector<ValueDescr>& descrs) {
+  auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
+  auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
+  auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
+
+  if (names.size() == 0) {
+    names.resize(descrs.size());
+    nullable.resize(descrs.size(), true);
+    metadata.resize(descrs.size(), nullptr);
+    int i = 0;
+    for (auto& name : names) {
+      name = std::to_string(i++);
+    }
+  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
+             metadata.size() != descrs.size()) {
+    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
                            names.size(), " field names, ", nullable.size(),
                            " nullability bits, and ", metadata.size(),
                            " metadata dictionaries.");
@@ -94,15 +102,16 @@ Result<ValueDescr> ProjectResolve(KernelContext* ctx,
       }
     }
 
-    fields[i] = field(names[i], descr.type, nullable[i], metadata[i]);
+    fields[i] =
+        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
     ++i;
   }
 
   return ValueDescr{struct_(std::move(fields)), shape};
 }
 
-Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  ARROW_ASSIGN_OR_RAISE(auto descr, ProjectResolve(ctx, batch.GetDescriptors()));
+Status MakeStructExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
 
   for (int i = 0; i < batch.num_values(); ++i) {
     const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
@@ -139,11 +148,11 @@ Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   return Status::OK();
 }
 
-const FunctionDoc project_doc{"Wrap Arrays into a StructArray",
-                              ("Names of the StructArray's fields are\n"
-                               "specified through ProjectOptions."),
-                              {"*args"},
-                              "ProjectOptions"};
+const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
+                                  ("Names of the StructArray's fields are\n"
+                                   "specified through MakeStructOptions."),
+                                  {"*args"},
+                                  "MakeStructOptions"};
 
 }  // namespace
 
@@ -156,15 +165,17 @@ void RegisterScalarNested(FunctionRegistry* registry) {
                                          ListValueLength<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(list_value_length)));
 
-  auto project_function =
-      std::make_shared<ScalarFunction>("project", Arity::VarArgs(), &project_doc);
-  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{ProjectResolve},
+  static MakeStructOptions kDefaultMakeStructOptions;
+  auto make_struct_function = std::make_shared<ScalarFunction>(
+      "make_struct", Arity::VarArgs(), &make_struct_doc, &kDefaultMakeStructOptions);
+
+  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{MakeStructResolve},
                                             /*is_varargs=*/true),
-                      ProjectExec, OptionsWrapper<ProjectOptions>::Init};
+                      MakeStructExec, OptionsWrapper<MakeStructOptions>::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-  DCHECK_OK(project_function->AddKernel(std::move(kernel)));
-  DCHECK_OK(registry->AddFunction(std::move(project_function)));
+  DCHECK_OK(make_struct_function->AddKernel(std::move(kernel)));
+  DCHECK_OK(registry->AddFunction(std::move(make_struct_function)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index 42de9bcdb50..ef489955fa6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -22,6 +22,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/result.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
@@ -39,48 +40,55 @@ TEST(TestScalarNested, ListValueLength) {
 }
 
 struct {
+  Result<Datum> operator()(std::vector<Datum> args) {
+    return CallFunction("make_struct", args);
+  }
+
   template <typename... Options>
   Result<Datum> operator()(std::vector<Datum> args, std::vector<std::string> field_names,
                            Options... options) {
-    ProjectOptions opts{field_names, options...};
-    return CallFunction("project", args, &opts);
+    MakeStructOptions opts{field_names, options...};
+    return CallFunction("make_struct", args, &opts);
   }
-} Project;
+} MakeStruct;
 
-TEST(Project, Scalar) {
+TEST(MakeStruct, Scalar) {
   auto i32 = MakeScalar(1);
   auto f64 = MakeScalar(2.5);
   auto str = MakeScalar("yo");
 
-  ASSERT_OK_AND_ASSIGN(auto expected,
-                       StructScalar::Make({i32, f64, str}, {"i", "f", "s"}));
-  ASSERT_OK_AND_EQ(Datum(expected), Project({i32, f64, str}, {"i", "f", "s"}));
+  EXPECT_THAT(MakeStruct({i32, f64, str}, {"i", "f", "s"}),
+              ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"i", "f", "s"}))));
 
-  // Three field names but one input value
-  ASSERT_RAISES(Invalid, Project({str}, {"i", "f", "s"}));
+  // Names default to field_index
+  EXPECT_THAT(MakeStruct({i32, f64, str}),
+              ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
 
   // No field names or input values is fine
-  expected.reset(new StructScalar{{}, struct_({})});
-  ASSERT_OK_AND_EQ(Datum(expected), Project(/*args=*/{}, /*field_names=*/{}));
+  EXPECT_THAT(MakeStruct({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
+
+  // Three field names but one input value
+  EXPECT_THAT(MakeStruct({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
 }
 
-TEST(Project, Array) {
+TEST(MakeStruct, Array) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
-  ASSERT_OK_AND_ASSIGN(Datum expected, StructArray::Make({i32, str}, field_names));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  EXPECT_THAT(MakeStruct({i32, str}, {"i", "s"}),
+              ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  EXPECT_THAT(MakeStruct({i32, MakeScalar("aa")}, {"i", "s"}),
+              ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  EXPECT_THAT(MakeStruct({i32->Slice(1), str}, field_names), Raises(StatusCode::Invalid));
 }
 
-TEST(Project, NullableMetadataPassedThru) {
+TEST(MakeStruct, NullableMetadataPassedThru) {
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
 
@@ -90,7 +98,7 @@ TEST(Project, NullableMetadataPassedThru) {
       key_value_metadata({"a", "b"}, {"ALPHA", "BRAVO"}), nullptr};
 
   ASSERT_OK_AND_ASSIGN(auto proj,
-                       Project({i32, str}, field_names, nullability, metadata));
+                       MakeStruct({i32, str}, field_names, nullability, metadata));
 
   AssertTypeEqual(*proj.type(), StructType({
                                     field("i", int32(), /*nullable=*/true, metadata[0]),
@@ -98,11 +106,12 @@ TEST(Project, NullableMetadataPassedThru) {
                                 }));
 
   // error: projecting an array containing nulls with nullable=false
-  str = ArrayFromJSON(utf8(), R"(["aa", null, "aa"])");
-  ASSERT_RAISES(Invalid, Project({i32, str}, field_names, nullability, metadata));
+  EXPECT_THAT(MakeStruct({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
+                         field_names, nullability, metadata),
+              Raises(StatusCode::Invalid));
 }
 
-TEST(Project, ChunkedArray) {
+TEST(MakeStruct, ChunkedArray) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -122,16 +131,16 @@ TEST(Project, ChunkedArray) {
   ASSERT_OK_AND_ASSIGN(Datum expected,
                        ChunkedArray::Make({expected_0, expected_1, expected_2}));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
 }
 
-TEST(Project, ChunkedArrayDifferentChunking) {
+TEST(MakeStruct, ChunkedArrayDifferentChunking) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -159,13 +168,13 @@ TEST(Project, ChunkedArrayDifferentChunking) {
 
   ASSERT_OK_AND_ASSIGN(Datum expected, ChunkedArray::Make(expected_chunks));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 2f7a115bb4b..0a289985ca2 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -621,7 +621,7 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
                         compute::MakeFilterNode(scan, "filter", scan_options_->filter));
 
   auto exprs = scan_options_->projection.call()->arguments;
-  auto names = checked_cast<const compute::ProjectOptions*>(
+  auto names = checked_cast<const compute::MakeStructOptions*>(
                    scan_options_->projection.call()->options.get())
                    ->field_names;
   ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 27b32aa6f19..a7ba070b2cf 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -225,7 +225,7 @@ inline Status SetProjection(ScanOptions* options, const compute::Expression& pro
 
 inline Status SetProjection(ScanOptions* options, std::vector<compute::Expression> exprs,
                             std::vector<std::string> names) {
-  compute::ProjectOptions project_options{std::move(names)};
+  compute::MakeStructOptions project_options{std::move(names)};
 
   for (size_t i = 0; i < exprs.size(); ++i) {
     if (auto ref = exprs[i].field_ref()) {
@@ -239,7 +239,7 @@ inline Status SetProjection(ScanOptions* options, std::vector<compute::Expressio
   }
 
   return SetProjection(options,
-                       call("project", std::move(exprs), std::move(project_options)));
+                       call("make_struct", std::move(exprs), std::move(project_options)));
 }
 
 inline Status SetProjection(ScanOptions* options, std::vector<std::string> names) {
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 74f558d1738..5dc83c662de 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1362,7 +1362,8 @@ TEST(ScanNode, MinimalEndToEnd) {
   // just be a list of materialized field names)
   compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
   ASSERT_OK_AND_ASSIGN(a_times_2, a_times_2.Bind(*dataset->schema()));
-  options->projection = call("project", {a_times_2}, compute::ProjectOptions{{"a * 2"}});
+  options->projection =
+      call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
 
   // construct the scan node
   ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index f7fd1fa3f5f..35011a786a6 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -926,7 +926,7 @@ Structural transforms
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
 * \(10) The output struct's field types are the types of its arguments. The
-  field names are specified using an instance of :struct:`ProjectOptions`.
+  field names are specified using an instance of :struct:`MakeStructOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index d3267dc02d7..46cfdc4e2ef 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -862,16 +862,16 @@ class PartitionNthOptions(_PartitionNthOptions):
         self._set_options(pivot)
 
 
-cdef class _ProjectOptions(FunctionOptions):
+cdef class _MakeStructOptions(FunctionOptions):
     def _set_options(self, field_names):
         cdef:
             vector[c_string] c_field_names
         for n in field_names:
             c_field_names.push_back(tobytes(n))
-        self.wrapped.reset(new CProjectOptions(field_names))
+        self.wrapped.reset(new CMakeStructOptions(c_field_names))
 
 
-class ProjectOptions(_ProjectOptions):
+class MakeStructOptions(_MakeStructOptions):
     def __init__(self, field_names):
         self._set_options(field_names)
 
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 15d1adcbafe..85f637fce5a 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -41,7 +41,7 @@
     ModeOptions,
     PadOptions,
     PartitionNthOptions,
-    ProjectOptions,
+    MakeStructOptions,
     QuantileOptions,
     ReplaceSliceOptions,
     ReplaceSubstringOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6977c26cac5..bd3bdb251f3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1971,9 +1971,9 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CPartitionNthOptions(int64_t pivot)
         int64_t pivot
 
-    cdef cppclass CProjectOptions \
-            "arrow::compute::ProjectOptions"(CFunctionOptions):
-        CProjectOptions(vector[c_string] field_names)
+    cdef cppclass CMakeStructOptions \
+            "arrow::compute::MakeStructOptions"(CFunctionOptions):
+        CMakeStructOptions(vector[c_string] field_names)
         vector[c_string] field_names
 
     ctypedef enum CSortOrder" arrow::compute::SortOrder":
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index b65970745ec..c98b3a224b8 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -119,7 +119,7 @@ def test_option_class_equality():
         pc.MatchSubstringOptions("pattern"),
         pc.PadOptions(5, " "),
         pc.PartitionNthOptions(1),
-        pc.ProjectOptions([b"field", b"names"]),
+        pc.MakeStructOptions(["field", "names"]),
         pc.DayOfWeekOptions(False, 0),
         pc.ReplaceSliceOptions(start=0, stop=1, replacement="a"),
         pc.ReplaceSubstringOptions("a", "b"),
@@ -1645,3 +1645,29 @@ def test_min_max_element_wise():
     assert result == pa.array([2, 3, None])
     result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([1, 2, None])
+
+
+def test_make_struct():
+    assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
+
+    assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
+        'i': 1, 's': 'a'}
+
+    assert pc.make_struct([1, 2, 3],
+                          "a b c".split()) == pa.StructArray.from_arrays([
+                              [1, 2, 3],
+                              "a b c".split()], names='0 1'.split())
+
+    with pytest.raises(ValueError, match="Array arguments must all "
+                                         "be the same length"):
+        pc.make_struct([1, 2, 3, 4], "a b c".split())
+
+    with pytest.raises(ValueError, match="0 arguments but 2 field names"):
+        pc.make_struct(field_names=['one', 'two'])
+
+
+def test_case_when():
+    assert pc.case_when(pc.make_struct([True, False, None],
+                                       [False, True, None]),
+                        [1, 2, 3],
+                        [11, 12, 13]) == pa.array([1, 12, None])

From 93bdbf1df56ffd33af10de104c68cdcb85e54fa5 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Thu, 15 Jul 2021 23:19:10 -0700
Subject: [PATCH 581/719] ARROW-13303: [JS] Revise bundles

Closes #10698 from domoritz/dom/bundles-2

Lead-authored-by: Dominik Moritz <domoritz@gmail.com>
Co-authored-by: ptaylor <paul.e.taylor@me.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/gulp/arrow-task.js                      | 20 ++++++++--------
 js/gulp/package-task.js                    | 27 +++++++++++++++-------
 js/gulp/test-task.js                       |  4 ++--
 js/gulpfile.js                             |  8 +++----
 js/jest.config.js                          |  2 +-
 js/jestconfigs/jest.apache-arrow.config.js |  1 +
 js/jestconfigs/jest.es2015.cjs.config.js   |  1 +
 js/jestconfigs/jest.es2015.esm.config.js   |  3 +--
 js/jestconfigs/jest.es2015.umd.config.js   |  1 +
 js/jestconfigs/jest.es5.cjs.config.js      |  1 +
 js/jestconfigs/jest.es5.esm.config.js      |  3 +--
 js/jestconfigs/jest.es5.umd.config.js      |  1 +
 js/jestconfigs/jest.esnext.cjs.config.js   |  1 +
 js/jestconfigs/jest.esnext.esm.config.js   |  3 +--
 js/jestconfigs/jest.esnext.umd.config.js   |  1 +
 js/jestconfigs/jest.ts.config.js           |  3 +--
 js/package.json                            |  1 -
 js/tsconfig/tsconfig.docs.json             |  2 +-
 js/tsconfig/tsconfig.es2015.cjs.json       |  2 +-
 js/tsconfig/tsconfig.es2015.esm.json       |  2 +-
 js/tsconfig/tsconfig.es5.cjs.json          |  2 +-
 js/tsconfig/tsconfig.es5.esm.json          |  2 +-
 js/tsconfig/tsconfig.esnext.cjs.json       |  2 +-
 js/tsconfig/tsconfig.esnext.esm.json       |  2 +-
 24 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 4f597a0aefa..54a046a1434 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -35,24 +35,24 @@ const pipeline = require('util').promisify(require('stream').pipeline);
 
 const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
     const out = targetDir(target);
-    const dtsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.ts`;
-    const cjsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.js`;
-    const esmGlob = `${targetDir(`esnext`, `esm`)}/**/*.js`;
+    const dtsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.ts`;
+    const cjsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.js`;
+    const esmGlob = `${targetDir(`es2015`, `esm`)}/**/*.js`;
     const es2015UmdGlob = `${targetDir(`es2015`, `umd`)}/*.js`;
     const esnextUmdGlob = `${targetDir(`esnext`, `umd`)}/*.js`;
-    const cjsSourceMapsGlob = `${targetDir(`esnext`, `cjs`)}/**/*.map`;
-    const esmSourceMapsGlob = `${targetDir(`esnext`, `esm`)}/**/*.map`;
+    const cjsSourceMapsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.map`;
+    const esmSourceMapsGlob = `${targetDir(`es2015`, `esm`)}/**/*.map`;
     const es2015UmdSourceMapsGlob = `${targetDir(`es2015`, `umd`)}/*.map`;
     const esnextUmdSourceMapsGlob = `${targetDir(`esnext`, `umd`)}/*.map`;
     return ObservableForkJoin(
         observableFromStreams(gulp.src(dtsGlob),                 gulp.dest(out)), // copy d.ts files
-        observableFromStreams(gulp.src(cjsGlob),                 gulp.dest(out)), // copy esnext cjs files
-        observableFromStreams(gulp.src(cjsSourceMapsGlob),       gulp.dest(out)), // copy esnext cjs sourcemaps
-        observableFromStreams(gulp.src(esmSourceMapsGlob),       gulp.dest(out)), // copy esnext esm sourcemaps
+        observableFromStreams(gulp.src(cjsGlob),                 gulp.dest(out)), // copy es2015 cjs files
+        observableFromStreams(gulp.src(cjsSourceMapsGlob),       gulp.dest(out)), // copy es2015 cjs sourcemaps
+        observableFromStreams(gulp.src(esmSourceMapsGlob),       gulp.dest(out)), // copy es2015 esm sourcemaps
         observableFromStreams(gulp.src(es2015UmdSourceMapsGlob), gulp.dest(out)), // copy es2015 umd sourcemap files, but don't rename
         observableFromStreams(gulp.src(esnextUmdSourceMapsGlob), gulp.dest(out)), // copy esnext umd sourcemap files, but don't rename
-        observableFromStreams(gulp.src(esmGlob),       gulpRename((p) => { p.extname = '.mjs'; }),          gulp.dest(out)), // copy esnext esm files and rename to `.mjs`
-        observableFromStreams(gulp.src(es2015UmdGlob), gulpRename((p) => { p.basename += `.es2015.min`; }), gulp.dest(out)), // copy es2015 umd files and add `.min`
+        observableFromStreams(gulp.src(esmGlob),       gulpRename((p) => { p.extname = '.mjs'; }),          gulp.dest(out)), // copy es2015 esm files and rename to `.mjs`
+        observableFromStreams(gulp.src(es2015UmdGlob), gulpRename((p) => { p.basename += `.es2015.min`; }), gulp.dest(out)), // copy es2015 umd files and add `.es2015.min`
         observableFromStreams(gulp.src(esnextUmdGlob), gulpRename((p) => { p.basename += `.esnext.min`; }), gulp.dest(out)), // copy esnext umd files and add `.esnext.min`
     ).pipe(share({ connector: () => new ReplaySubject(), resetOnError: false, resetOnComplete: false, resetOnRefCountZero: false }));
 }))({});
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index a3f21394685..94986dadbea 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -53,23 +53,34 @@ const createMainPackageJson = (target, format) => (orig) => ({
     ...createTypeScriptPackageJson(target, format)(orig),
     bin: orig.bin,
     name: npmPkgName,
-    main: `${mainExport}.node`,
-    browser: `${mainExport}.dom`,
-    module: `${mainExport}.dom.mjs`,
+    type: 'commonjs',
+    main: `${mainExport}.node.js`,
+    module: `${mainExport}.node.mjs`,
+    browser: {
+        [`${mainExport}.node.js`]: `${mainExport}.dom.js`,
+        [`${mainExport}.node.mjs`]: `${mainExport}.dom.mjs`
+    },
+    exports: {
+        import: `./${mainExport}.node.mjs`,
+        require: `./${mainExport}.node.js`,
+    },
     types: `${mainExport}.node.d.ts`,
     unpkg: `${mainExport}.es2015.min.js`,
     jsdelivr: `${mainExport}.es2015.min.js`,
     sideEffects: false,
-    esm: { mode: `all`, sourceMap: true },
+    esm: { mode: `all`, sourceMap: true }
 });
 
 const createTypeScriptPackageJson = (target, format) => (orig) => ({
     ...createScopedPackageJSON(target, format)(orig),
     bin: undefined,
-    module: undefined,
     main: `${mainExport}.node.ts`,
+    module: `${mainExport}.node.ts`,
     types: `${mainExport}.node.ts`,
     browser: `${mainExport}.dom.ts`,
+    type: "module",
+    sideEffects: false,
+    esm: { mode: `auto`, sourceMap: true },
     dependencies: {
         '@types/flatbuffers': '*',
         '@types/node': '*',
@@ -91,11 +102,11 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
             // set "browser" if building scoped UMD target, otherwise "Arrow.dom"
             browser:  format === 'umd' ? `${mainExport}.js` : `${mainExport}.dom.js`,
             // set "main" to "Arrow" if building scoped UMD target, otherwise "Arrow.node"
-            main:     format === 'umd' ? `${mainExport}.js` : `${mainExport}.node`,
+            main:     format === 'umd' ? `${mainExport}.js` : `${mainExport}.node.js`,
             // set "type" to `module` or `commonjs` (https://nodejs.org/api/packages.html#packages_type)
             type:     format === 'esm' ? `module` : `commonjs`,
-            // set "module" (for https://www.npmjs.com/package/@pika/pack) if building scoped ESM target
-            module:   format === 'esm' ? `${mainExport}.dom.js` : undefined,
+            // set "module" if building scoped ESM target
+            module:   format === 'esm' ? `${mainExport}.node.js` : undefined,
             // set "sideEffects" to false as a hint to Webpack that it's safe to tree-shake the ESM target
             sideEffects: format === 'esm' ? false : undefined,
             // include "esm" settings for https://www.npmjs.com/package/esm if building scoped ESM target
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index 7a2cc0441ce..2012f742907 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -28,7 +28,7 @@ const readFile = promisify(require('fs').readFile);
 const asyncDone = promisify(require('async-done'));
 const exec = promisify(require('child_process').exec);
 const parseXML = promisify(require('xml2js').parseString);
-const { targetAndModuleCombinations } = require('./util');
+const { targetAndModuleCombinations, npmPkgName } = require('./util');
 
 const jestArgv = [`--reporters=jest-silent-reporter`];
 
@@ -53,7 +53,7 @@ const testOptions = {
 const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) {
     const opts = { ...testOptions };
     const args = [...execArgv];
-    if (format === 'esm' || target === 'ts' || target === 'src') {
+    if (format === 'esm' || target === 'ts' || target === 'src' || target === npmPkgName) {
         args.unshift(`--experimental-vm-modules`);
     }
     if (argv.coverage) {
diff --git a/js/gulpfile.js b/js/gulpfile.js
index a2c0ce734f8..a257a2deff0 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -60,15 +60,15 @@ knownTargets.forEach((target) => {
     ));
 });
 
-// The main "apache-arrow" module builds the es2015/umd, esnext/cjs,
-// esnext/esm, and esnext/umd targets, then copies and renames the
+// The main "apache-arrow" module builds the es2015/umd, es2015/cjs,
+// es2015/esm, and esnext/umd targets, then copies and renames the
 // compiled output into the apache-arrow folder
 gulp.task(`build:${npmPkgName}`,
     gulp.series(
         gulp.parallel(
             `build:${taskName(`es2015`, `umd`)}`,
-            `build:${taskName(`esnext`, `cjs`)}`,
-            `build:${taskName(`esnext`, `esm`)}`,
+            `build:${taskName(`es2015`, `cjs`)}`,
+            `build:${taskName(`es2015`, `esm`)}`,
             `build:${taskName(`esnext`, `umd`)}`
         ),
         `clean:${npmPkgName}`,
diff --git a/js/jest.config.js b/js/jest.config.js
index e4795e654a9..fb3f97c4409 100644
--- a/js/jest.config.js
+++ b/js/jest.config.js
@@ -28,7 +28,7 @@ module.exports = {
   rootDir: ".",
   roots: ["<rootDir>/test/"],
   preset: "ts-jest/presets/default-esm",
-  moduleFileExtensions: ["js", "ts"],
+  moduleFileExtensions: ["mjs", "js", "ts"],
   coverageReporters: ["lcov", "json"],
   coveragePathIgnorePatterns: [
     "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$",
diff --git a/js/jestconfigs/jest.apache-arrow.config.js b/js/jestconfigs/jest.apache-arrow.config.js
index 9bd011c7aec..103dc5a92c2 100644
--- a/js/jestconfigs/jest.apache-arrow.config.js
+++ b/js/jestconfigs/jest.apache-arrow.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.es2015.cjs.config.js b/js/jestconfigs/jest.es2015.cjs.config.js
index a07bf8418a8..1d567676129 100644
--- a/js/jestconfigs/jest.es2015.cjs.config.js
+++ b/js/jestconfigs/jest.es2015.cjs.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.es2015.esm.config.js b/js/jestconfigs/jest.es2015.esm.config.js
index faf59792cdd..cf564fb234a 100644
--- a/js/jestconfigs/jest.es2015.esm.config.js
+++ b/js/jestconfigs/jest.es2015.esm.config.js
@@ -27,7 +27,6 @@ module.exports = {
   },
   moduleNameMapper: {
     "^apache-arrow(.*)": "<rootDir>/targets/es2015/esm$1",
-    tslib: "tslib/tslib.es6.js",
-    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+    tslib: "tslib/tslib.es6.js"
   },
 };
diff --git a/js/jestconfigs/jest.es2015.umd.config.js b/js/jestconfigs/jest.es2015.umd.config.js
index 1e861e0eedf..21f27872d91 100644
--- a/js/jestconfigs/jest.es2015.umd.config.js
+++ b/js/jestconfigs/jest.es2015.umd.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.es5.cjs.config.js b/js/jestconfigs/jest.es5.cjs.config.js
index c65c71b2ca3..ae3e9bb4230 100644
--- a/js/jestconfigs/jest.es5.cjs.config.js
+++ b/js/jestconfigs/jest.es5.cjs.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.es5.esm.config.js b/js/jestconfigs/jest.es5.esm.config.js
index ca46e9162f7..0a0a21b761c 100644
--- a/js/jestconfigs/jest.es5.esm.config.js
+++ b/js/jestconfigs/jest.es5.esm.config.js
@@ -27,7 +27,6 @@ module.exports = {
   },
   moduleNameMapper: {
     "^apache-arrow(.*)": "<rootDir>/targets/es5/esm$1",
-    tslib: "tslib/tslib.es6.js",
-    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+    tslib: "tslib/tslib.es6.js"
   },
 };
diff --git a/js/jestconfigs/jest.es5.umd.config.js b/js/jestconfigs/jest.es5.umd.config.js
index 893a46149d2..f52af07bc8a 100644
--- a/js/jestconfigs/jest.es5.umd.config.js
+++ b/js/jestconfigs/jest.es5.umd.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.esnext.cjs.config.js b/js/jestconfigs/jest.esnext.cjs.config.js
index 26cb9c60634..8be999e3d38 100644
--- a/js/jestconfigs/jest.esnext.cjs.config.js
+++ b/js/jestconfigs/jest.esnext.cjs.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.esnext.esm.config.js b/js/jestconfigs/jest.esnext.esm.config.js
index 26393694908..aca4c520805 100644
--- a/js/jestconfigs/jest.esnext.esm.config.js
+++ b/js/jestconfigs/jest.esnext.esm.config.js
@@ -27,7 +27,6 @@ module.exports = {
   },
   moduleNameMapper: {
     "^apache-arrow(.*)": "<rootDir>/targets/esnext/esm$1",
-    tslib: "tslib/tslib.es6.js",
-    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+    tslib: "tslib/tslib.es6.js"
   },
 };
diff --git a/js/jestconfigs/jest.esnext.umd.config.js b/js/jestconfigs/jest.esnext.umd.config.js
index eeee01fc993..5013d45e03a 100644
--- a/js/jestconfigs/jest.esnext.umd.config.js
+++ b/js/jestconfigs/jest.esnext.umd.config.js
@@ -19,6 +19,7 @@ module.exports = {
   ...require("../jest.config"),
   rootDir: "../",
   preset: "ts-jest",
+  moduleFileExtensions: ["js", "ts"],
   globals: {
     "ts-jest": {
       diagnostics: false,
diff --git a/js/jestconfigs/jest.ts.config.js b/js/jestconfigs/jest.ts.config.js
index 7e9ac9b603d..e56161b8b4c 100644
--- a/js/jestconfigs/jest.ts.config.js
+++ b/js/jestconfigs/jest.ts.config.js
@@ -26,7 +26,6 @@ module.exports = {
     },
   },
   moduleNameMapper: {
-    "^apache-arrow(.*)": "<rootDir>/targets/ts$1",
-    flatbuffers: "flatbuffers/js/flatbuffers.mjs",
+    "^apache-arrow(.*)": "<rootDir>/targets/ts$1"
   },
 };
diff --git a/js/package.json b/js/package.json
index 7c3fc0ab856..cf983b7aa51 100644
--- a/js/package.json
+++ b/js/package.json
@@ -1,7 +1,6 @@
 {
   "name": "apache-arrow",
   "description": "Apache Arrow columnar in-memory format",
-  "main": "./index",
   "bin": {
     "arrow2csv": "bin/arrow2csv.js"
   },
diff --git a/js/tsconfig/tsconfig.docs.json b/js/tsconfig/tsconfig.docs.json
index b7b990ed8ec..722838f5ba3 100644
--- a/js/tsconfig/tsconfig.docs.json
+++ b/js/tsconfig/tsconfig.docs.json
@@ -3,6 +3,6 @@
   "extends": "./tsconfig.base.json",
   "include": ["../src/**/*.ts"],
   "compilerOptions": {
-    "target": "ESNEXT"
+    "target": "esnext"
   }
 }
diff --git a/js/tsconfig/tsconfig.es2015.cjs.json b/js/tsconfig/tsconfig.es2015.cjs.json
index 6c7df20fb9e..92f05dd1ccd 100644
--- a/js/tsconfig/tsconfig.es2015.cjs.json
+++ b/js/tsconfig/tsconfig.es2015.cjs.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES2015",
+    "target": "es2015",
     "module": "commonjs"
   }
 }
diff --git a/js/tsconfig/tsconfig.es2015.esm.json b/js/tsconfig/tsconfig.es2015.esm.json
index 17636623d4c..c56b972633d 100644
--- a/js/tsconfig/tsconfig.es2015.esm.json
+++ b/js/tsconfig/tsconfig.es2015.esm.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES2015",
+    "target": "es2015",
     "module": "es2015"
   }
 }
diff --git a/js/tsconfig/tsconfig.es5.cjs.json b/js/tsconfig/tsconfig.es5.cjs.json
index c6f329a397f..7c149d39de2 100644
--- a/js/tsconfig/tsconfig.es5.cjs.json
+++ b/js/tsconfig/tsconfig.es5.cjs.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES5",
+    "target": "es5",
     "module": "commonjs"
   }
 }
diff --git a/js/tsconfig/tsconfig.es5.esm.json b/js/tsconfig/tsconfig.es5.esm.json
index 87a2455bef1..782c303e631 100644
--- a/js/tsconfig/tsconfig.es5.esm.json
+++ b/js/tsconfig/tsconfig.es5.esm.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ES5",
+    "target": "es5",
     "module": "es2015"
   }
 }
diff --git a/js/tsconfig/tsconfig.esnext.cjs.json b/js/tsconfig/tsconfig.esnext.cjs.json
index 6b1fae47126..fb0d2eb1171 100644
--- a/js/tsconfig/tsconfig.esnext.cjs.json
+++ b/js/tsconfig/tsconfig.esnext.cjs.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "commonjs"
   }
 }
diff --git a/js/tsconfig/tsconfig.esnext.esm.json b/js/tsconfig/tsconfig.esnext.esm.json
index 4c70161f882..6701c8e13c1 100644
--- a/js/tsconfig/tsconfig.esnext.esm.json
+++ b/js/tsconfig/tsconfig.esnext.esm.json
@@ -2,7 +2,7 @@
 {
   "extends": "./tsconfig.base.json",
   "compilerOptions": {
-    "target": "ESNEXT",
+    "target": "esnext",
     "module": "es2015"
   }
 }

From 5b772798ad83349a473fe19ec5842118aa5166b8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 16 Jul 2021 12:07:41 +0200
Subject: [PATCH 582/719] ARROW-13350: [Python][CI] Fix
 test_extract_datetime_components for pandas 0.24

Closes #10731 from jorisvandenbossche/ARROW-13350

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/test_compute.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index c98b3a224b8..993742fe664 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1373,15 +1373,15 @@ def _check_datetime_components(timestamps, timezone=None):
 
     if Version(pd.__version__) < Version("1.1.0"):
         # https://github.com/pandas-dev/pandas/issues/33206
-        iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("Int64")
-        iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("Int64")
-        iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("Int64")
+        iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("int64")
+        iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("int64")
+        iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("int64")
     else:
         # Casting is required because pandas isocalendar returns int32
         # while arrow isocalendar returns int64.
-        iso_year = ts.dt.isocalendar()["year"].astype("Int64")
-        iso_week = ts.dt.isocalendar()["week"].astype("Int64")
-        iso_day = ts.dt.isocalendar()["day"].astype("Int64")
+        iso_year = ts.dt.isocalendar()["year"].astype("int64")
+        iso_week = ts.dt.isocalendar()["week"].astype("int64")
+        iso_day = ts.dt.isocalendar()["day"].astype("int64")
 
     iso_calendar = pa.StructArray.from_arrays(
         [iso_year, iso_week, iso_day],

From 082b11e9ecb4acd60d2e36ba8b5b3bda8b2027ce Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Fri, 16 Jul 2021 13:36:54 +0200
Subject: [PATCH 583/719] ARROW-13239: [Python] [Doc] Expose signatures in pyx
 modules

Closes #10684 from amol-/ARROW-13239

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/cmake_modules/UseCython.cmake | 2 ++
 python/pyarrow/_csv.pyx           | 1 -
 python/pyarrow/_feather.pyx       | 1 -
 python/pyarrow/_flight.pyx        | 1 -
 python/pyarrow/_json.pyx          | 1 -
 python/pyarrow/_orc.pyx           | 1 -
 python/pyarrow/_parquet.pyx       | 1 -
 python/pyarrow/_plasma.pyx        | 1 -
 python/pyarrow/gandiva.pyx        | 1 -
 python/pyarrow/lib.pyx            | 1 -
 10 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/cpp/cmake_modules/UseCython.cmake b/cpp/cmake_modules/UseCython.cmake
index 0b65a7842f9..f2025efb4c9 100644
--- a/cpp/cmake_modules/UseCython.cmake
+++ b/cpp/cmake_modules/UseCython.cmake
@@ -145,6 +145,8 @@ function(compile_pyx
             ${no_docstrings_arg}
             ${cython_debug_arg}
             ${CYTHON_FLAGS}
+            # Necessary for autodoc of function arguments
+            --directive embedsignature=True
             # Necessary for Cython code coverage
             --working
             ${CMAKE_CURRENT_SOURCE_DIR}
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 1e896a2076a..2b7a835b10a 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level = 3
 
 from cython.operator cimport dereference as deref
diff --git a/python/pyarrow/_feather.pyx b/python/pyarrow/_feather.pyx
index 2f46bb98724..8df7935aaf3 100644
--- a/python/pyarrow/_feather.pyx
+++ b/python/pyarrow/_feather.pyx
@@ -20,7 +20,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level=3
 
 from cython.operator cimport dereference as deref
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 618291c1a32..c3536a6edc9 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -16,7 +16,6 @@
 # under the License.
 
 # cython: language_level = 3
-# cython: embedsignature = True
 
 import collections
 import contextlib
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
index ef2567a009a..183bd4fdd4d 100644
--- a/python/pyarrow/_json.pyx
+++ b/python/pyarrow/_json.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level = 3
 
 from pyarrow.includes.common cimport *
diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index d0457203446..f58b52a0d37 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 
 from cython.operator cimport dereference as deref
 from libcpp.vector cimport vector as std_vector
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 0b66ea0e0b0..f229a27b601 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 
 import io
 from textwrap import indent
diff --git a/python/pyarrow/_plasma.pyx b/python/pyarrow/_plasma.pyx
index e2c53175ffc..e38c81f8026 100644
--- a/python/pyarrow/_plasma.pyx
+++ b/python/pyarrow/_plasma.pyx
@@ -17,7 +17,6 @@
 
 # cython: profile=False
 # distutils: language = c++
-# cython: embedsignature = True
 # cython: language_level = 3
 
 from libcpp cimport bool as c_bool, nullptr
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 61e2587af2b..12d572b3307 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -18,7 +18,6 @@
 # cython: profile=False
 # distutils: language = c++
 # cython: language_level = 3
-# cython: embedsignature = True
 
 from libcpp cimport bool as c_bool, nullptr
 from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index fdf16961de6..60e1f8c53bb 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -16,7 +16,6 @@
 # under the License.
 
 # cython: profile = False
-# cython: embedsignature = True
 # cython: nonecheck = True
 # distutils: language = c++
 

From af155c3b3a042e08440b7ab8ab0345192ffe5a52 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 16 Jul 2021 10:28:12 -0400
Subject: [PATCH 584/719] ARROW-9056: [C++] Support aggregations over scalars

This adds various trivial implementations to support scalar aggregations over scalar inputs, for all kernels except `index`.

Closes #10725 from lidavidm/arrow-9056

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 c_glib/test/test-struct-scalar.rb             |   2 +-
 cpp/src/arrow/compute/exec/plan_test.cc       |  33 +++
 .../arrow/compute/kernels/aggregate_basic.cc  |  81 +++++--
 .../kernels/aggregate_basic_internal.h        |  65 +++++-
 .../arrow/compute/kernels/aggregate_mode.cc   |  24 +-
 .../compute/kernels/aggregate_quantile.cc     |  37 ++-
 .../compute/kernels/aggregate_tdigest.cc      |  29 ++-
 .../arrow/compute/kernels/aggregate_test.cc   | 211 +++++++++++++++---
 .../compute/kernels/aggregate_var_std.cc      |  37 +++
 cpp/src/arrow/compute/kernels/test_util.h     |  33 ---
 cpp/src/arrow/scalar.cc                       |  14 ++
 r/R/compute.R                                 |   3 +-
 r/tests/testthat/test-compute-aggregate.R     |   4 +-
 13 files changed, 465 insertions(+), 108 deletions(-)

diff --git a/c_glib/test/test-struct-scalar.rb b/c_glib/test/test-struct-scalar.rb
index 917b0f4cc18..9774943ba09 100644
--- a/c_glib/test/test-struct-scalar.rb
+++ b/c_glib/test/test-struct-scalar.rb
@@ -46,7 +46,7 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal("{score:int8 = -29, enabled:bool = true}", @scalar.to_s)
   end
 
   def test_value
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 7d412e67c5c..bcb63c25b3a 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -447,5 +447,38 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
       }))));
 }
 
+TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
+  ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+  BatchesWithSchema basic_data;
+  basic_data.batches = {
+      ExecBatchFromJSON({ValueDescr::Scalar(int32()), ValueDescr::Scalar(int32()),
+                         ValueDescr::Scalar(int32())},
+                        "[[5, 5, 5], [5, 5, 5], [5, 5, 5]]"),
+      ExecBatchFromJSON({int32(), int32(), int32()},
+                        "[[5, 5, 5], [6, 6, 6], [7, 7, 7]]")};
+  basic_data.schema =
+      schema({field("a", int32()), field("b", int32()), field("c", int32())});
+
+  ASSERT_OK_AND_ASSIGN(auto source,
+                       MakeTestSourceNode(plan.get(), "source", basic_data,
+                                          /*parallel=*/false, /*slow=*/false));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto scalar_agg,
+      MakeScalarAggregateNode(source, "scalar_agg",
+                              {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}}));
+
+  auto sink_gen = MakeSinkNode(scalar_agg, "sink");
+
+  ASSERT_THAT(
+      StartAndCollect(plan.get(), sink_gen),
+      Finishes(ResultWith(UnorderedElementsAreArray({
+          ExecBatchFromJSON({ValueDescr::Scalar(int64()), ValueDescr::Scalar(int64()),
+                             ValueDescr::Scalar(float64())},
+                            "[[6, 33, 5.5]]"),
+      }))));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 5e0454c9c4d..a7df66695b2 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -59,10 +59,16 @@ struct CountImpl : public ScalarAggregator {
   explicit CountImpl(ScalarAggregateOptions options) : options(std::move(options)) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
-    const ArrayData& input = *batch[0].array();
-    const int64_t nulls = input.GetNullCount();
-    this->nulls += nulls;
-    this->non_nulls += input.length - nulls;
+    if (batch[0].is_array()) {
+      const ArrayData& input = *batch[0].array();
+      const int64_t nulls = input.GetNullCount();
+      this->nulls += nulls;
+      this->non_nulls += input.length - nulls;
+    } else {
+      const Scalar& input = *batch[0].scalar();
+      this->nulls += !input.is_valid * batch.length;
+      this->non_nulls += input.is_valid * batch.length;
+    }
     return Status::OK();
   }
 
@@ -149,6 +155,12 @@ struct BooleanAnyImpl : public ScalarAggregator {
     if (this->any == true) {
       return Status::OK();
     }
+    if (batch[0].is_scalar()) {
+      const auto& scalar = *batch[0].scalar();
+      this->has_nulls = !scalar.is_valid;
+      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      return Status::OK();
+    }
     const auto& data = *batch[0].array();
     this->has_nulls = data.GetNullCount() > 0;
     arrow::internal::OptionalBinaryBitBlockCounter counter(
@@ -208,6 +220,12 @@ struct BooleanAllImpl : public ScalarAggregator {
     if (!options.skip_nulls && this->has_nulls) {
       return Status::OK();
     }
+    if (batch[0].is_scalar()) {
+      const auto& scalar = *batch[0].scalar();
+      this->has_nulls = !scalar.is_valid;
+      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      return Status::OK();
+    }
     const auto& data = *batch[0].array();
     this->has_nulls = data.GetNullCount() > 0;
     arrow::internal::OptionalBinaryBitBlockCounter counter(
@@ -387,13 +405,33 @@ void AddBasicAggKernels(KernelInit init,
   }
 }
 
+void AddScalarAggKernels(KernelInit init,
+                         const std::vector<std::shared_ptr<DataType>>& types,
+                         std::shared_ptr<DataType> out_ty,
+                         ScalarAggregateFunction* func) {
+  for (const auto& ty : types) {
+    // scalar[InT] -> scalar[OutT]
+    auto sig = KernelSignature::Make({InputType::Scalar(ty)}, ValueDescr::Scalar(out_ty));
+    AddAggKernel(std::move(sig), init, func, SimdLevel::NONE);
+  }
+}
+
+void AddArrayScalarAggKernels(KernelInit init,
+                              const std::vector<std::shared_ptr<DataType>>& types,
+                              std::shared_ptr<DataType> out_ty,
+                              ScalarAggregateFunction* func,
+                              SimdLevel::type simd_level = SimdLevel::NONE) {
+  AddBasicAggKernels(init, types, out_ty, func, simd_level);
+  AddScalarAggKernels(init, types, out_ty, func);
+}
+
 void AddMinMaxKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func, SimdLevel::type simd_level) {
   for (const auto& ty : types) {
-    // array[T] -> scalar[struct<min: T, max: T>]
+    // any[T] -> scalar[struct<min: T, max: T>]
     auto out_ty = struct_({field("min", ty), field("max", ty)});
-    auto sig = KernelSignature::Make({InputType::Array(ty)}, ValueDescr::Scalar(out_ty));
+    auto sig = KernelSignature::Make({InputType(ty)}, ValueDescr::Scalar(out_ty));
     AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
@@ -468,17 +506,21 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   InputType any_array(ValueDescr::ARRAY);
   AddAggKernel(KernelSignature::Make({any_array}, ValueDescr::Scalar(int64())),
                aggregate::CountInit, func.get());
+  AddAggKernel(
+      KernelSignature::Make({InputType(ValueDescr::SCALAR)}, ValueDescr::Scalar(int64())),
+      aggregate::CountInit, func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), &sum_doc,
                                                    &default_scalar_aggregate_options);
-  aggregate::AddBasicAggKernels(aggregate::SumInit, {boolean()}, int64(), func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInit, SignedIntTypes(), int64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInit, UnsignedIntTypes(), uint64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInit, FloatingPointTypes(), float64(),
-                                func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, {boolean()}, int64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, SignedIntTypes(), int64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, UnsignedIntTypes(), uint64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::SumInit, FloatingPointTypes(), float64(),
+                                      func.get());
   // Add the SIMD variants for sum
 #if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX512)
   auto cpu_info = arrow::internal::CpuInfo::GetInstance();
@@ -497,9 +539,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 
   func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary(), &mean_doc,
                                                    &default_scalar_aggregate_options);
-  aggregate::AddBasicAggKernels(aggregate::MeanInit, {boolean()}, float64(), func.get());
-  aggregate::AddBasicAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
-                                func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, {boolean()}, float64(),
+                                      func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
+                                      func.get());
   // Add the SIMD variants for mean
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
@@ -534,13 +577,15 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   // any
   func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc,
                                                    &default_scalar_aggregate_options);
-  aggregate::AddBasicAggKernels(aggregate::AnyInit, {boolean()}, boolean(), func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::AnyInit, {boolean()}, boolean(),
+                                      func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   // all
   func = std::make_shared<ScalarAggregateFunction>("all", Arity::Unary(), &all_doc,
                                                    &default_scalar_aggregate_options);
-  aggregate::AddBasicAggKernels(aggregate::AllInit, {boolean()}, boolean(), func.get());
+  aggregate::AddArrayScalarAggKernels(aggregate::AllInit, {boolean()}, boolean(),
+                                      func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   // index
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index e6755c05f5d..bb1d53c02ac 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -59,13 +59,22 @@ struct SumImpl : public ScalarAggregator {
   using OutputType = typename TypeTraits<SumType>::ScalarType;
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
-    const auto& data = batch[0].array();
-    this->count = data->length - data->GetNullCount();
-    if (is_boolean_type<ArrowType>::value) {
-      this->sum += static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
+    if (batch[0].is_array()) {
+      const auto& data = batch[0].array();
+      this->count += data->length - data->GetNullCount();
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum +=
+            static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
+      } else {
+        this->sum +=
+            arrow::compute::detail::SumArray<CType, typename SumType::c_type>(*data);
+      }
     } else {
-      this->sum +=
-          arrow::compute::detail::SumArray<CType, typename SumType::c_type>(*data);
+      const auto& data = *batch[0].scalar();
+      this->count += data.is_valid * batch.length;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
+      }
     }
     return Status::OK();
   }
@@ -228,9 +237,29 @@ struct MinMaxImpl : public ScalarAggregator {
       : out_type(out_type), options(options) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(ArrayType(batch[0].array()));
+    }
+    return ConsumeScalar(*batch[0].scalar());
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
     StateType local;
+    local.has_nulls = !scalar.is_valid;
+    local.has_values = scalar.is_valid;
 
-    ArrayType arr(batch[0].array());
+    if (local.has_nulls && !options.skip_nulls) {
+      this->state = local;
+      return Status::OK();
+    }
+
+    local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    this->state = local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArrayType& arr) {
+    StateType local;
 
     const auto null_count = arr.null_count();
     local.has_nulls = null_count > 0;
@@ -344,6 +373,9 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
   using MinMaxImpl<BooleanType, SimdLevel>::options;
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar()));
+    }
     StateType local;
     ArrayType arr(batch[0].array());
 
@@ -366,6 +398,25 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
     this->state = local;
     return Status::OK();
   }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    local.has_values = scalar.is_valid;
+    if (local.has_nulls && !options.skip_nulls) {
+      this->state = local;
+      return Status::OK();
+    }
+
+    const int true_count = scalar.is_valid && scalar.value;
+    const int false_count = scalar.is_valid && !scalar.value;
+    local.max = true_count > 0;
+    local.min = false_count == 0;
+
+    this->state = local;
+    return Status::OK();
+  }
 };
 
 template <SimdLevel::type SimdLevel>
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 95362335261..6ad0eeb6456 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -302,6 +302,24 @@ struct Moder<InType, enable_if_t<is_floating_type<InType>::value>> {
   SortModer<InType> impl;
 };
 
+template <typename T>
+Status ScalarMode(KernelContext* ctx, const Scalar& scalar, Datum* out) {
+  using CType = typename T::c_type;
+  if (scalar.is_valid) {
+    bool called = false;
+    return Finalize<T>(ctx, out, [&]() {
+      if (!called) {
+        called = true;
+        return std::pair<CType, uint64_t>(UnboxScalar<T>::Unbox(scalar), 1);
+      }
+      return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+    });
+  }
+  return Finalize<T>(ctx, out, []() {
+    return std::pair<CType, uint64_t>(static_cast<CType>(0), kCountEOF);
+  });
+}
+
 template <typename _, typename InType>
 struct ModeExecutor {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
@@ -313,6 +331,10 @@ struct ModeExecutor {
       return Status::Invalid("ModeOption::n must be strictly positive");
     }
 
+    if (batch[0].is_scalar()) {
+      return ScalarMode<InType>(ctx, *batch[0].scalar(), out);
+    }
+
     return Moder<InType>().impl.Exec(ctx, batch, out);
   }
 };
@@ -325,7 +347,7 @@ VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type) {
   auto out_type =
       struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
   kernel.signature =
-      KernelSignature::Make({InputType::Array(in_type)}, ValueDescr::Array(out_type));
+      KernelSignature::Make({InputType(in_type)}, ValueDescr::Array(out_type));
   return kernel;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 0b7821273cc..7d2ffe0770c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -389,6 +389,36 @@ struct ExactQuantiler<InType, enable_if_t<is_floating_type<InType>::value>> {
   SortQuantiler<InType> impl;
 };
 
+template <typename T>
+Status ScalarQuantile(KernelContext* ctx, const QuantileOptions& options,
+                      const Scalar& scalar, Datum* out) {
+  using CType = typename T::c_type;
+  ArrayData* output = out->mutable_array();
+  if (!scalar.is_valid) {
+    output->length = 0;
+    output->null_count = 0;
+    return Status::OK();
+  }
+  auto out_type = IsDataPoint(options) ? scalar.type : float64();
+  output->length = options.q.size();
+  output->null_count = 0;
+  ARROW_ASSIGN_OR_RAISE(
+      output->buffers[1],
+      ctx->Allocate(output->length * BitUtil::BytesForBits(GetBitWidth(*out_type))));
+  if (IsDataPoint(options)) {
+    CType* out_buffer = output->template GetMutableValues<CType>(1);
+    for (int64_t i = 0; i < output->length; i++) {
+      out_buffer[i] = UnboxScalar<T>::Unbox(scalar);
+    }
+  } else {
+    double* out_buffer = output->template GetMutableValues<double>(1);
+    for (int64_t i = 0; i < output->length; i++) {
+      out_buffer[i] = static_cast<double>(UnboxScalar<T>::Unbox(scalar));
+    }
+  }
+  return Status::OK();
+}
+
 template <typename _, typename InType>
 struct QuantileExecutor {
   static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
@@ -406,6 +436,10 @@ struct QuantileExecutor {
       }
     }
 
+    if (batch[0].is_scalar()) {
+      return ScalarQuantile<InType>(ctx, options, *batch[0].scalar(), out);
+    }
+
     return ExactQuantiler<InType>().impl.Exec(ctx, batch, out);
   }
 };
@@ -427,8 +461,7 @@ void AddQuantileKernels(VectorFunction* func) {
   base.output_chunked = false;
 
   for (const auto& ty : NumericTypes()) {
-    base.signature =
-        KernelSignature::Make({InputType::Array(ty)}, OutputType(ResolveOutput));
+    base.signature = KernelSignature::Make({InputType(ty)}, OutputType(ResolveOutput));
     // output type is determined at runtime, set template argument to nulltype
     base.exec = GenerateNumeric<QuantileExecutor, NullType>(*ty);
     DCHECK_OK(func->AddKernel(base));
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index fb474a6b8b3..4c261604c85 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -40,16 +40,23 @@ struct TDigestImpl : public ScalarAggregator {
       : q{options.q}, tdigest{options.delta, options.buffer_size} {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
-    const ArrayData& data = *batch[0].array();
-    const CType* values = data.GetValues<CType>(1);
-
-    if (data.length > data.GetNullCount()) {
-      VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
-                          [&](int64_t pos, int64_t len) {
-                            for (int64_t i = 0; i < len; ++i) {
-                              this->tdigest.NanAdd(values[pos + i]);
-                            }
-                          });
+    if (batch[0].is_array()) {
+      const ArrayData& data = *batch[0].array();
+      const CType* values = data.GetValues<CType>(1);
+
+      if (data.length > data.GetNullCount()) {
+        VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
+                            [&](int64_t pos, int64_t len) {
+                              for (int64_t i = 0; i < len; ++i) {
+                                this->tdigest.NanAdd(values[pos + i]);
+                              }
+                            });
+      }
+    } else {
+      const CType value = UnboxScalar<ArrowType>::Unbox(*batch[0].scalar());
+      if (batch[0].scalar()->is_valid) {
+        this->tdigest.NanAdd(value);
+      }
     }
     return Status::OK();
   }
@@ -125,7 +132,7 @@ void AddTDigestKernels(KernelInit init,
                        const std::vector<std::shared_ptr<DataType>>& types,
                        ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
-    auto sig = KernelSignature::Make({InputType::Array(ty)}, float64());
+    auto sig = KernelSignature::Make({InputType(ty)}, float64());
     AddAggKernel(std::move(sig), init, func);
   }
 }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 7318539df7f..7d3c3d7a908 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -41,6 +41,7 @@
 
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/logging.h"
 
@@ -103,23 +104,11 @@ static Datum NaiveSum(const Array& array) {
   return Datum(std::make_shared<SumScalarType>(result.first));
 }
 
-template <typename ArrowType>
 void ValidateSum(
-    const Array& input, Datum expected,
+    const Datum input, Datum expected,
     const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
-  using OutputType = typename FindAccumulatorType<ArrowType>::Type;
-
   ASSERT_OK_AND_ASSIGN(Datum result, Sum(input, options));
-  DatumEqual<OutputType>::EnsureEqual(result, expected);
-}
-
-template <typename ArrowType>
-void ValidateSum(const std::shared_ptr<ChunkedArray>& input, Datum expected,
-                 const ScalarAggregateOptions& options) {
-  using OutputType = typename FindAccumulatorType<ArrowType>::Type;
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Sum(input, options));
-  DatumEqual<OutputType>::EnsureEqual(result, expected);
+  AssertDatumsApproxEqual(expected, result, /*verbose=*/true);
 }
 
 template <typename ArrowType>
@@ -127,7 +116,7 @@ void ValidateSum(
     const char* json, Datum expected,
     const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateSum<ArrowType>(*array, expected, options);
+  ValidateSum(*array, expected, options);
 }
 
 template <typename ArrowType>
@@ -135,13 +124,13 @@ void ValidateSum(
     const std::vector<std::string>& json, Datum expected,
     const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) {
   auto array = ChunkedArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), json);
-  ValidateSum<ArrowType>(array, expected, options);
+  ValidateSum(array, expected, options);
 }
 
 template <typename ArrowType>
 void ValidateSum(const Array& array, const ScalarAggregateOptions& options =
                                          ScalarAggregateOptions::Defaults()) {
-  ValidateSum<ArrowType>(array, NaiveSum<ArrowType>(array), options);
+  ValidateSum(array, NaiveSum<ArrowType>(array), options);
 }
 
 using UnaryOp = Result<Datum>(const Datum&, const ScalarAggregateOptions&, ExecContext*);
@@ -191,6 +180,13 @@ TEST(TestBooleanAggregation, Sum) {
                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2));
   ValidateBooleanAgg<Sum>(json, std::make_shared<UInt64Scalar>(),
                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+
+  EXPECT_THAT(Sum(MakeScalar(true)),
+              ResultWith(Datum(std::make_shared<UInt64Scalar>(1))));
+  EXPECT_THAT(Sum(MakeScalar(false)),
+              ResultWith(Datum(std::make_shared<UInt64Scalar>(0))));
+  EXPECT_THAT(Sum(MakeNullScalar(boolean())),
+              ResultWith(Datum(MakeNullScalar(uint64()))));
 }
 
 TEST(TestBooleanAggregation, Mean) {
@@ -227,6 +223,11 @@ TEST(TestBooleanAggregation, Mean) {
                            ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2));
   ValidateBooleanAgg<Mean>(json, std::make_shared<DoubleScalar>(),
                            ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/3));
+
+  EXPECT_THAT(Mean(MakeScalar(true)), ResultWith(Datum(MakeScalar(1.0))));
+  EXPECT_THAT(Mean(MakeScalar(false)), ResultWith(Datum(MakeScalar(0.0))));
+  EXPECT_THAT(Mean(MakeNullScalar(boolean())),
+              ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
 template <typename ArrowType>
@@ -236,6 +237,7 @@ TYPED_TEST_SUITE(TestNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestNumericSumKernel, SimpleSum) {
   using SumType = typename FindAccumulatorType<TypeParam>::Type;
   using ScalarType = typename TypeTraits<SumType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
   using T = typename TypeParam::c_type;
 
   ValidateSum<TypeParam>("[]", Datum(std::make_shared<ScalarType>()));
@@ -273,12 +275,17 @@ TYPED_TEST(TestNumericSumKernel, SimpleSum) {
   const T expected_result = static_cast<T>(14);
   ValidateSum<TypeParam>("[1, null, 3, null, 3, null, 7]",
                          Datum(std::make_shared<ScalarType>(expected_result)), options);
+
+  EXPECT_THAT(Sum(Datum(std::make_shared<InputScalarType>(static_cast<T>(5)))),
+              ResultWith(Datum(std::make_shared<ScalarType>(static_cast<T>(5)))));
+  EXPECT_THAT(Sum(MakeNullScalar(TypeTraits<TypeParam>::type_singleton())),
+              ResultWith(Datum(MakeNullScalar(TypeTraits<SumType>::type_singleton()))));
 }
 
-TYPED_TEST_SUITE(TestNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestNumericSumKernel, ScalarAggregateOptions) {
   using SumType = typename FindAccumulatorType<TypeParam>::Type;
   using ScalarType = typename TypeTraits<SumType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
   using T = typename TypeParam::c_type;
 
   const T expected_result = static_cast<T>(14);
@@ -307,6 +314,16 @@ TYPED_TEST(TestNumericSumKernel, ScalarAggregateOptions) {
                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/4));
   ValidateSum<TypeParam>(json, null_result,
                          ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/5));
+
+  EXPECT_THAT(Sum(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                  ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(std::make_shared<ScalarType>(static_cast<T>(5)))));
+  EXPECT_THAT(Sum(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                  ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2)),
+              ResultWith(Datum(MakeNullScalar(TypeTraits<SumType>::type_singleton()))));
+  EXPECT_THAT(Sum(MakeNullScalar(TypeTraits<TypeParam>::type_singleton()),
+                  ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(MakeNullScalar(TypeTraits<SumType>::type_singleton()))));
 }
 
 template <typename ArrowType>
@@ -442,6 +459,14 @@ TYPED_TEST(TestCountKernel, SimpleCount) {
   ValidateCount<TypeParam>("[1, null, 2]", {2, 1});
   ValidateCount<TypeParam>("[null, null, null]", {0, 3});
   ValidateCount<TypeParam>("[1, 2, 3, 4, 5, 6, 7, 8, 9]", {9, 0});
+
+  auto ty = TypeTraits<TypeParam>::type_singleton();
+  EXPECT_THAT(Count(MakeNullScalar(ty)), ResultWith(Datum(int64_t(0))));
+  EXPECT_THAT(Count(MakeNullScalar(ty), ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(int64_t(1))));
+  EXPECT_THAT(Count(*MakeScalar(ty, 1)), ResultWith(Datum(int64_t(1))));
+  EXPECT_THAT(Count(*MakeScalar(ty, 1), ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(int64_t(0))));
 }
 
 template <typename ArrowType>
@@ -481,15 +506,9 @@ static Datum NaiveMean(const Array& array) {
 template <typename ArrowType>
 void ValidateMean(const Array& input, Datum expected,
                   const ScalarAggregateOptions& options) {
-  using OutputType = typename FindAccumulatorType<DoubleType>::Type;
-
   ASSERT_OK_AND_ASSIGN(Datum result, Mean(input, options, nullptr));
-  using ScalarType = typename TypeTraits<OutputType>::ScalarType;
-  const auto& res = checked_pointer_cast<ScalarType>(result.scalar());
-  const auto& exp = checked_pointer_cast<ScalarType>(expected.scalar());
-  if (!(std::isnan(res->value) && std::isnan(exp->value))) {
-    DatumEqual<OutputType>::EnsureEqual(result, expected);
-  }
+  auto equal_options = EqualOptions::Defaults().nans_equal(true);
+  AssertDatumsApproxEqual(expected, result, /*verbose=*/true, equal_options);
 }
 
 template <typename ArrowType>
@@ -512,6 +531,8 @@ class TestMeanKernelNumeric : public ::testing::Test {};
 TYPED_TEST_SUITE(TestMeanKernelNumeric, NumericArrowTypes);
 TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
+  using T = typename TypeParam::c_type;
 
   const ScalarAggregateOptions& options =
       ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
@@ -534,12 +555,18 @@ TYPED_TEST(TestMeanKernelNumeric, SimpleMean) {
 
   ValidateMean<TypeParam>("[1, 1, 1, 1, 1, 1, 1, 1]",
                           Datum(std::make_shared<ScalarType>(1.0)));
+
+  EXPECT_THAT(Mean(Datum(std::make_shared<InputScalarType>(static_cast<T>(5)))),
+              ResultWith(Datum(std::make_shared<ScalarType>(5.0))));
+  EXPECT_THAT(Mean(MakeNullScalar(TypeTraits<TypeParam>::type_singleton())),
+              ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
-TYPED_TEST_SUITE(TestMeanKernelNumeric, NumericArrowTypes);
 TYPED_TEST(TestMeanKernelNumeric, ScalarAggregateOptions) {
   using ScalarType = typename TypeTraits<DoubleType>::ScalarType;
-  auto expected_result = Datum(std::make_shared<ScalarType>(2));
+  using InputScalarType = typename TypeTraits<TypeParam>::ScalarType;
+  using T = typename TypeParam::c_type;
+  auto expected_result = Datum(std::make_shared<ScalarType>(3));
   auto null_result = Datum(std::make_shared<ScalarType>());
   auto nan_result = Datum(std::make_shared<ScalarType>(NAN));
   const char* json = "[1, null, 2, 2, null, 7]";
@@ -577,6 +604,16 @@ TYPED_TEST(TestMeanKernelNumeric, ScalarAggregateOptions) {
                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/4));
   ValidateMean<TypeParam>(json, null_result,
                           ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/15));
+
+  EXPECT_THAT(Mean(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                   ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(std::make_shared<ScalarType>(5.0))));
+  EXPECT_THAT(Mean(Datum(std::make_shared<InputScalarType>(static_cast<T>(5))),
+                   ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/2)),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Mean(MakeNullScalar(TypeTraits<TypeParam>::type_singleton()),
+                   ScalarAggregateOptions(/*skip_nulls=*/false)),
+              ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
 template <typename ArrowType>
@@ -696,6 +733,7 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
   std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
   std::vector<std::string> chunked_input3 = {"[true, null]", "[null, false]"};
+  auto ty = struct_({field("min", boolean()), field("max", boolean())});
 
   // SKIP nulls by default
   options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/1);
@@ -711,6 +749,13 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIs(chunked_input3, false, true, options);
 
+  Datum null_min_max = ScalarFromJSON(ty, "[null, null]");
+  Datum true_min_max = ScalarFromJSON(ty, "[true, true]");
+  Datum false_min_max = ScalarFromJSON(ty, "[false, false]");
+  EXPECT_THAT(MinMax(MakeNullScalar(boolean())), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(MakeScalar(true)), ResultWith(true_min_max));
+  EXPECT_THAT(MinMax(MakeScalar(false)), ResultWith(false_min_max));
+
   options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null, null, null]", options);
@@ -724,6 +769,10 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, false, false, options);
   this->AssertMinMaxIsNull(chunked_input3, options);
 
+  options = ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/2);
+  EXPECT_THAT(MinMax(MakeNullScalar(boolean()), options), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(MakeScalar(true), options), ResultWith(true_min_max));
+
   options = ScalarAggregateOptions(/*skip_nulls=*/true, /*min_count=*/0);
   this->AssertMinMaxIsNull("[]", options);
   this->AssertMinMaxIsNull("[null]", options);
@@ -735,6 +784,8 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};
+  auto item_ty = TypeTraits<TypeParam>::type_singleton();
+  auto ty = struct_({field("min", item_ty), field("max", item_ty)});
 
   // SKIP nulls by default
   this->AssertMinMaxIsNull("[]", options);
@@ -745,6 +796,14 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);
 
+  Datum null_min_max(std::make_shared<StructScalar>(
+      ScalarVector{MakeNullScalar(item_ty), MakeNullScalar(item_ty)}, ty));
+  auto one_scalar = *MakeScalar(item_ty, static_cast<typename TypeParam::c_type>(1));
+  Datum one_min_max(
+      std::make_shared<StructScalar>(ScalarVector{one_scalar, one_scalar}, ty));
+  EXPECT_THAT(MinMax(MakeNullScalar(item_ty)), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(one_scalar), ResultWith(one_min_max));
+
   options = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   // output null
@@ -761,6 +820,8 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 3, 4]"};
   std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 3, 4]"};
   std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, null, 3, 4]"};
+  auto item_ty = TypeTraits<TypeParam>::type_singleton();
+  auto ty = struct_({field("min", item_ty), field("max", item_ty)});
 
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
@@ -772,6 +833,14 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   this->AssertMinMaxIs(chunked_input2, 1, 9, options);
   this->AssertMinMaxIs(chunked_input3, 1, 9, options);
 
+  Datum null_min_max(std::make_shared<StructScalar>(
+      ScalarVector{MakeNullScalar(item_ty), MakeNullScalar(item_ty)}, ty));
+  auto one_scalar = *MakeScalar(item_ty, static_cast<typename TypeParam::c_type>(1));
+  Datum one_min_max(
+      std::make_shared<StructScalar>(ScalarVector{one_scalar, one_scalar}, ty));
+  EXPECT_THAT(MinMax(MakeNullScalar(item_ty)), ResultWith(null_min_max));
+  EXPECT_THAT(MinMax(one_scalar), ResultWith(one_min_max));
+
   options = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
@@ -998,6 +1067,10 @@ TEST_F(TestAnyKernel, Basics) {
   this->AssertAnyIs(chunked_input3, false_value);
   this->AssertAnyIs(chunked_input4, true_value);
 
+  EXPECT_THAT(Any(Datum(true)), ResultWith(Datum(true)));
+  EXPECT_THAT(Any(Datum(false)), ResultWith(Datum(false)));
+  EXPECT_THAT(Any(MakeNullScalar(boolean())), ResultWith(Datum(false)));
+
   const ScalarAggregateOptions& keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertAnyIs("[]", false_value, keep_nulls);
   this->AssertAnyIs("[false]", false_value, keep_nulls);
@@ -1013,6 +1086,11 @@ TEST_F(TestAnyKernel, Basics) {
   this->AssertAnyIs(chunked_input2, false_value, keep_nulls);
   this->AssertAnyIs(chunked_input3, null_value, keep_nulls);
   this->AssertAnyIs(chunked_input4, true_value, keep_nulls);
+
+  EXPECT_THAT(Any(Datum(true), keep_nulls), ResultWith(Datum(true)));
+  EXPECT_THAT(Any(Datum(false), keep_nulls), ResultWith(Datum(false)));
+  EXPECT_THAT(Any(MakeNullScalar(boolean()), keep_nulls),
+              ResultWith(Datum(MakeNullScalar(boolean()))));
 }
 
 //
@@ -1079,6 +1157,10 @@ TEST_F(TestAllKernel, Basics) {
   this->AssertAllIs(chunked_input4, false_value);
   this->AssertAllIs(chunked_input5, false_value);
 
+  EXPECT_THAT(All(Datum(true)), ResultWith(Datum(true)));
+  EXPECT_THAT(All(Datum(false)), ResultWith(Datum(false)));
+  EXPECT_THAT(All(MakeNullScalar(boolean())), ResultWith(Datum(true)));
+
   const ScalarAggregateOptions keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false);
   this->AssertAllIs("[]", true_value, keep_nulls);
   this->AssertAllIs("[false]", false_value, keep_nulls);
@@ -1095,6 +1177,11 @@ TEST_F(TestAllKernel, Basics) {
   this->AssertAllIs(chunked_input3, false_value, keep_nulls);
   this->AssertAllIs(chunked_input4, false_value, keep_nulls);
   this->AssertAllIs(chunked_input5, false_value, keep_nulls);
+
+  EXPECT_THAT(All(Datum(true), keep_nulls), ResultWith(Datum(true)));
+  EXPECT_THAT(All(Datum(false), keep_nulls), ResultWith(Datum(false)));
+  EXPECT_THAT(All(MakeNullScalar(boolean()), keep_nulls),
+              ResultWith(Datum(MakeNullScalar(boolean()))));
 }
 
 //
@@ -1358,6 +1445,14 @@ TEST_F(TestBooleanModeKernel, Basics) {
   this->AssertModesAre("[true, null, false, false, null, true, null, null, true]", 100,
                        {true, false}, {3, 2});
   this->AssertModesEmpty({"[null, null]", "[]", "[null]"}, 4);
+
+  auto ty = struct_({field("mode", boolean()), field("count", int64())});
+  Datum mode_true = ArrayFromJSON(ty, "[[true, 1]]");
+  Datum mode_false = ArrayFromJSON(ty, "[[false, 1]]");
+  Datum mode_empty = ArrayFromJSON(ty, "[]");
+  EXPECT_THAT(Mode(Datum(true)), ResultWith(mode_true));
+  EXPECT_THAT(Mode(Datum(false)), ResultWith(mode_false));
+  EXPECT_THAT(Mode(MakeNullScalar(boolean())), ResultWith(mode_empty));
 }
 
 TYPED_TEST_SUITE(TestIntegerModeKernel, IntegralArrowTypes);
@@ -1377,6 +1472,12 @@ TYPED_TEST(TestIntegerModeKernel, Basics) {
   this->AssertModesAre("[127, 0, 127, 127, 0, 1, 0, 127]", 2, {127, 0}, {4, 3});
   this->AssertModesAre("[null, null, 2, null, 1]", 3, {1, 2}, {1, 1});
   this->AssertModesEmpty("[null, null, null]", 10);
+
+  auto in_ty = this->type_singleton();
+  auto ty = struct_({field("mode", in_ty), field("count", int64())});
+  EXPECT_THAT(Mode(*MakeScalar(in_ty, 5)),
+              ResultWith(Datum(ArrayFromJSON(ty, "[[5, 1]]"))));
+  EXPECT_THAT(Mode(MakeNullScalar(in_ty)), ResultWith(Datum(ArrayFromJSON(ty, "[]"))));
 }
 
 TYPED_TEST_SUITE(TestFloatingModeKernel, RealArrowTypes);
@@ -1402,6 +1503,12 @@ TYPED_TEST(TestFloatingModeKernel, Floats) {
 
   this->AssertModesAre("[Inf, 100, Inf, 100, Inf]", 2, {INFINITY, 100}, {3, 2});
   this->AssertModesAre("[NaN, NaN, 1, null, 1, 2, 2]", 3, {1, 2, NAN}, {2, 2, 2});
+
+  auto in_ty = this->type_singleton();
+  auto ty = struct_({field("mode", in_ty), field("count", int64())});
+  EXPECT_THAT(Mode(*MakeScalar(in_ty, 5.0)),
+              ResultWith(Datum(ArrayFromJSON(ty, "[[5.0, 1]]"))));
+  EXPECT_THAT(Mode(MakeNullScalar(in_ty)), ResultWith(Datum(ArrayFromJSON(ty, "[]"))));
 }
 
 TEST_F(TestInt8ModeKernelValueRange, Basics) {
@@ -1616,6 +1723,16 @@ TYPED_TEST(TestNumericVarStdKernel, Basics) {
   this->AssertVarStdIsInvalid("[100, null, null]", options);
   chunks = {"[100]", "[null]", "[]"};
   this->AssertVarStdIsInvalid(chunks, options);
+
+  auto ty = this->type_singleton();
+  EXPECT_THAT(Stddev(*MakeScalar(ty, 5)), ResultWith(Datum(0.0)));
+  EXPECT_THAT(Variance(*MakeScalar(ty, 5)), ResultWith(Datum(0.0)));
+  EXPECT_THAT(Stddev(*MakeScalar(ty, 5), options),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Variance(*MakeScalar(ty, 5), options),
+              ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Stddev(MakeNullScalar(ty)), ResultWith(Datum(MakeNullScalar(float64()))));
+  EXPECT_THAT(Variance(MakeNullScalar(ty)), ResultWith(Datum(MakeNullScalar(float64()))));
 }
 
 // Test numerical stability
@@ -1934,6 +2051,19 @@ TYPED_TEST(TestIntegerQuantileKernel, Basics) {
   this->AssertQuantilesEmpty("[]", {0.5});
   this->AssertQuantilesEmpty("[null, null, null]", {0.1, 0.2});
   this->AssertQuantilesEmpty({"[null, null]", "[]", "[null]"}, {0.3, 0.4});
+
+  auto ty = this->type_singleton();
+  for (const auto interpolation : this->interpolations_) {
+    QuantileOptions options({0.0, 0.5, 1.0}, interpolation);
+    auto expected_ty = (interpolation == QuantileOptions::LINEAR ||
+                        interpolation == QuantileOptions::MIDPOINT)
+                           ? float64()
+                           : ty;
+    EXPECT_THAT(Quantile(*MakeScalar(ty, 1), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[1, 1, 1]")));
+    EXPECT_THAT(Quantile(MakeNullScalar(ty), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[]")));
+  }
 }
 
 template <typename ArrowType>
@@ -1967,6 +2097,19 @@ TYPED_TEST(TestFloatingQuantileKernel, Floats) {
   this->AssertQuantilesEmpty("[]", {0.5, 0.6});
   this->AssertQuantilesEmpty("[null, NaN, null]", {0.1});
   this->AssertQuantilesEmpty({"[NaN, NaN]", "[]", "[null]"}, {0.3, 0.4});
+
+  auto ty = this->type_singleton();
+  for (const auto interpolation : this->interpolations_) {
+    QuantileOptions options({0.0, 0.5, 1.0}, interpolation);
+    auto expected_ty = (interpolation == QuantileOptions::LINEAR ||
+                        interpolation == QuantileOptions::MIDPOINT)
+                           ? float64()
+                           : ty;
+    EXPECT_THAT(Quantile(*MakeScalar(ty, 1), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[1, 1, 1]")));
+    EXPECT_THAT(Quantile(MakeNullScalar(ty), options),
+                ResultWith(ArrayFromJSON(expected_ty, "[]")));
+  }
 }
 
 class TestInt8QuantileKernel : public TestPrimitiveQuantileKernel<Int8Type> {};
@@ -2227,9 +2370,7 @@ TEST_F(TestRandomFloatQuantileKernel, Sliced) {
 }
 #endif
 
-class TestTDigestKernel : public ::testing::Test {};
-
-TEST_F(TestTDigestKernel, AllNullsOrNaNs) {
+TEST(TestTDigestKernel, AllNullsOrNaNs) {
   const std::vector<std::vector<std::string>> tests = {
       {"[]"},
       {"[null, null]", "[]", "[null]"},
@@ -2247,5 +2388,13 @@ TEST_F(TestTDigestKernel, AllNullsOrNaNs) {
   }
 }
 
+TEST(TestTDigestKernel, Scalar) {
+  for (const auto& ty : {float64(), int64(), uint64()}) {
+    TDigestOptions options(std::vector<double>{0.0, 0.5, 1.0});
+    EXPECT_THAT(TDigest(*MakeScalar(ty, 1), options),
+                ResultWith(ArrayFromJSON(float64(), "[1, 1, 1]")));
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index 29b2adce3bd..74d7b390c4f 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -180,6 +180,34 @@ struct VarStdImpl : public ScalarAggregator {
   VarOrStd return_type;
 };
 
+struct ScalarVarStdImpl : public ScalarAggregator {
+  explicit ScalarVarStdImpl(const VarianceOptions& options)
+      : options(options), seen(false) {}
+
+  Status Consume(KernelContext*, const ExecBatch& batch) override {
+    seen = batch[0].scalar()->is_valid;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ScalarVarStdImpl&>(src);
+    seen = seen || other.seen;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if (!seen || options.ddof > 0) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      out->value = std::make_shared<DoubleScalar>(0.0);
+    }
+    return Status::OK();
+  }
+
+  const VarianceOptions options;
+  bool seen;
+};
+
 struct VarStdInitState {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
@@ -233,12 +261,21 @@ Result<std::unique_ptr<KernelState>> VarianceInit(KernelContext* ctx,
   return visitor.Create();
 }
 
+Result<std::unique_ptr<KernelState>> ScalarVarStdInit(KernelContext* ctx,
+                                                      const KernelInitArgs& args) {
+  return arrow::internal::make_unique<ScalarVarStdImpl>(
+      static_cast<const VarianceOptions&>(*args.options));
+}
+
 void AddVarStdKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
                       ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
     auto sig = KernelSignature::Make({InputType::Array(ty)}, float64());
     AddAggKernel(std::move(sig), init, func);
+
+    sig = KernelSignature::Make({InputType::Scalar(ty)}, float64());
+    AddAggKernel(std::move(sig), ScalarVarStdInit, func);
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index a3fb9308f58..b10ede6f8f5 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -58,39 +58,6 @@ std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
   return result;
 }
 
-template <typename Type, typename Enable = void>
-struct DatumEqual {};
-
-template <typename Type>
-struct DatumEqual<Type, enable_if_floating_point<Type>> {
-  static constexpr double kArbitraryDoubleErrorBound = 1.0;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-
-  static void EnsureEqual(const Datum& lhs, const Datum& rhs) {
-    ASSERT_EQ(lhs.kind(), rhs.kind());
-    if (lhs.kind() == Datum::SCALAR) {
-      auto left = checked_cast<const ScalarType*>(lhs.scalar().get());
-      auto right = checked_cast<const ScalarType*>(rhs.scalar().get());
-      ASSERT_EQ(left->is_valid, right->is_valid);
-      ASSERT_EQ(left->type->id(), right->type->id());
-      ASSERT_NEAR(left->value, right->value, kArbitraryDoubleErrorBound);
-    }
-  }
-};
-
-template <typename Type>
-struct DatumEqual<Type, enable_if_integer<Type>> {
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  static void EnsureEqual(const Datum& lhs, const Datum& rhs) {
-    ASSERT_EQ(lhs.kind(), rhs.kind());
-    if (lhs.kind() == Datum::SCALAR) {
-      auto left = checked_cast<const ScalarType*>(lhs.scalar().get());
-      auto right = checked_cast<const ScalarType*>(rhs.scalar().get());
-      ASSERT_EQ(*left, *right);
-    }
-  }
-};
-
 void CheckScalar(std::string func_name, const ScalarVector& inputs,
                  std::shared_ptr<Scalar> expected,
                  const FunctionOptions* options = nullptr);
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 56a36114e49..cb7755ba3f1 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -18,6 +18,7 @@
 #include "arrow/scalar.h"
 
 #include <memory>
+#include <sstream>
 #include <string>
 #include <utility>
 
@@ -562,6 +563,19 @@ Status CastImpl(const Decimal256Scalar& from, StringScalar* to) {
   return Status::OK();
 }
 
+Status CastImpl(const StructScalar& from, StringScalar* to) {
+  std::stringstream ss;
+  ss << '{';
+  for (int i = 0; static_cast<size_t>(i) < from.value.size(); i++) {
+    if (i > 0) ss << ", ";
+    ss << from.type->field(i)->name() << ':' << from.type->field(i)->type()->ToString()
+       << " = " << from.value[i]->ToString();
+  }
+  ss << '}';
+  to->value = Buffer::FromString(ss.str());
+  return Status::OK();
+}
+
 struct CastImplVisitor {
   Status NotImplemented() {
     return Status::NotImplemented("cast to ", *to_type_, " from ", *from_.type);
diff --git a/r/R/compute.R b/r/R/compute.R
index 4277ad8d6df..2544471aaf6 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -138,11 +138,12 @@ collect_arrays_from_dots <- function(dots) {
   # Given a list that may contain both Arrays and ChunkedArrays,
   # return a single ChunkedArray containing all of those chunks
   # (may return a regular Array if there is only one element in dots)
-  assert_that(all(map_lgl(dots, is.Array)))
+  # If there is only one element and it is a scalar, it returns the scalar
   if (length(dots) == 1) {
     return(dots[[1]])
   }
 
+  assert_that(all(map_lgl(dots, is.Array)))
   arrays <- unlist(lapply(dots, function(x) {
     if (inherits(x, "ChunkedArray")) {
       x$chunks
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 428f799c97b..4dd929df0bf 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -65,7 +65,6 @@ test_that("sum dots", {
 })
 
 test_that("sum.Scalar", {
-  skip("No sum method in arrow for Scalar: ARROW-9056")
   s <- Scalar$create(4)
   expect_identical(as.numeric(s), as.numeric(sum(s)))
 })
@@ -104,9 +103,8 @@ test_that("mean.ChunkedArray", {
 })
 
 test_that("mean.Scalar", {
-  skip("No mean method in arrow for Scalar: ARROW-9056")
   s <- Scalar$create(4)
-  expect_identical(as.vector(s), mean(s))
+  expect_equal(s, mean(s))
 })
 
 test_that("Bad input handling of call_function", {

From f7cac0e5866a08e946a14976ef854e8c42ac281b Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Fri, 16 Jul 2021 10:44:52 -0400
Subject: [PATCH 585/719] ARROW-13189: [R] Disable row-level metadata
 application on datasets

Closes #10646 from jonkeane/ARROW-13189-ds-row-metadata

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NAMESPACE                      |  1 +
 r/R/metadata.R                   | 45 +++++++++++++++++++++++++++-----
 r/tests/testthat/helper-arrow.R  |  6 +++++
 r/tests/testthat/test-dataset.R  |  6 -----
 r/tests/testthat/test-metadata.R | 42 +++++++++++++++++++++++++++++
 5 files changed, 88 insertions(+), 12 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index ab45aa9985e..814868d8ade 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -327,6 +327,7 @@ importFrom(rlang,quos)
 importFrom(rlang,seq2)
 importFrom(rlang,set_names)
 importFrom(rlang,syms)
+importFrom(rlang,trace_back)
 importFrom(rlang,warn)
 importFrom(stats,median)
 importFrom(stats,na.exclude)
diff --git a/r/R/metadata.R b/r/R/metadata.R
index 408c2214a31..505d0653b4a 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -50,6 +50,7 @@
   })
 }
 
+#' @importFrom rlang trace_back
 apply_arrow_r_metadata <- function(x, r_metadata) {
   tryCatch({
     columns_metadata <- r_metadata$columns
@@ -60,9 +61,27 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
         }
       }
     } else if (is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
-      x <- map2(x, columns_metadata, function(.x, .y) {
-        apply_arrow_r_metadata(.x, .y)
-      })
+      # If we have a list and "columns_metadata" this applies row-level metadata
+      # inside of a column in a dataframe.
+
+      # However, if we are inside of a dplyr collection (including all datasets),
+      # we cannot apply this row-level metadata, since the order of the rows is
+      # not guaranteed to be the same, so don't even try, but warn what's going on
+      trace <- trace_back()
+      in_dplyr_collect <- any(map_lgl(trace$calls, function(x) {
+        grepl("collect.arrow_dplyr_query", x, fixed = TRUE)[[1]]
+      }))
+      if (in_dplyr_collect) {
+        warning(
+          "Row-level metadata is not compatible with this operation and has ",
+          "been ignored",
+          call. = FALSE
+        )
+      } else {
+        x <- map2(x, columns_metadata, function(.x, .y) {
+          apply_arrow_r_metadata(.x, .y)
+        })
+      }
       x
     }
 
@@ -116,9 +135,23 @@ arrow_attributes <- function(x, only_top_level = FALSE) {
 
   columns <- NULL
   if (is.list(x) && !inherits(x, "POSIXlt")) {
-    # for list columns, we also keep attributes of each
-    # element in columns
-    columns <- map(x, arrow_attributes)
+    # However, if we are inside of a dplyr collection (including all datasets),
+    # we cannot apply this row-level metadata, since the order of the rows is
+    # not guaranteed to be the same, so don't even try, but warn what's going on
+    trace <- trace_back()
+    in_dataset_write <- any(map_lgl(trace$calls, function(x) {
+      grepl("write_dataset", x, fixed = TRUE)[[1]]
+    }))
+    if (in_dataset_write) {
+      warning(
+        "Row-level metadata is not compatible with datasets and will be discarded",
+        call. = FALSE
+      )
+    } else {
+      # for list columns, we also keep attributes of each
+      # element in columns
+      columns <- map(x, arrow_attributes)
+    }
     if (all(map_lgl(columns, is.null))) {
       columns <- NULL
     }
diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R
index 0abbfb6a13a..5f2dad841a1 100644
--- a/r/tests/testthat/helper-arrow.R
+++ b/r/tests/testthat/helper-arrow.R
@@ -67,3 +67,9 @@ test_that <- function(what, code) {
 r_only <- function(code) {
   withr::with_options(list(..skip.tests = FALSE), code)
 }
+
+make_temp_dir <- function() {
+  path <- tempfile()
+  dir.create(path)
+  normalizePath(path, winslash = "/")
+}
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index a0b1bdae022..66493376e74 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -21,12 +21,6 @@ context("Dataset")
 
 library(dplyr)
 
-make_temp_dir <- function() {
-  path <- tempfile()
-  dir.create(path)
-  normalizePath(path, winslash = "/")
-}
-
 dataset_dir <- make_temp_dir()
 hive_dir <- make_temp_dir()
 ipc_dir <- make_temp_dir()
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index afce1c2244c..de3542b1c60 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -205,3 +205,45 @@ test_that("metadata of list elements (ARROW-10386)", {
   expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar")
   expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux")
 })
+
+
+test_that("metadata of list elements (ARROW-10386)", {
+  skip_if_not_available("dataset")
+  skip_if_not_available("parquet")
+
+  library(dplyr)
+
+  df <- tibble::tibble(
+    metadata = list(
+      structure(1, my_value_as_attr = 1),
+      structure(2, my_value_as_attr = 2),
+      structure(3, my_value_as_attr = 3),
+      structure(4, my_value_as_attr = 3)),
+    int = 1L:4L,
+    part = c(1, 3, 2, 1)
+  )
+
+  dst_dir <- make_temp_dir()
+  expect_warning(
+    write_dataset(df, dst_dir, partitioning = "part"),
+    "Row-level metadata is not compatible with datasets and will be discarded"
+  )
+
+  # but we need to write a dataset with row-level metadata to make sure when
+  # reading ones that have been written with them we warn appropriately
+  fake_func_name <- write_dataset
+  fake_func_name(df, dst_dir, partitioning = "part")
+
+  ds <- open_dataset(dst_dir)
+  expect_warning(
+    df_from_ds <- collect(ds),
+    "Row-level metadata is not compatible with this operation and has been ignored"
+  )
+  expect_equal(df_from_ds[c(1, 4, 3, 2), ], df, check.attributes = FALSE)
+
+  # however there is *no* warning if we don't select the metadata column
+  expect_warning(
+    df_from_ds <- ds %>% select(int) %>% collect(),
+    NA
+  )
+})

From 2663826267a5c21454e42f6fbe185b8e62afe5f9 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 16 Jul 2021 12:45:42 -0400
Subject: [PATCH 586/719] ARROW-13357: [R] bindings for sign()

Closes #10735 from nealrichardson/r-sign

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/expression.R              |  1 +
 r/tests/testthat/test-dplyr.R | 54 +++++++++++++++++++++--------------
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/r/R/expression.R b/r/R/expression.R
index b3fc9fe20c7..ab83c41757d 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -23,6 +23,7 @@
   "is.na" = "is_null",
   "is.nan" = "is_nan",
   "abs" = "abs_checked",
+  "sign" = "sign",
   # nchar is defined in dplyr-functions.R
   "tolower" = "utf8_lower",
   "toupper" = "utf8_upper",
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 63d0433fc23..21ba019e498 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -67,7 +67,7 @@ chr: string
 See $.data for the source Arrow object',
   fixed = TRUE
   )
-  
+
 })
 
 test_that("summarize", {
@@ -821,7 +821,7 @@ test_that("type checks on expressions", {
       collect(),
     tbl
   )
-  
+
   # the code in the expectation below depends on RE2
   skip_if_not_available("re2")
 
@@ -939,72 +939,82 @@ test_that("abs()", {
 
   expect_dplyr_equal(
     input %>%
-      transmute(
-        abs = abs(x)
-      ) %>% collect(),
+      transmute(abs = abs(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("sign()", {
+  df <- tibble(x = c(-127, -10, -1, -0 , 0, 1, 10, 127, NA))
+
+  expect_dplyr_equal(
+    input %>%
+      transmute(sign = sign(x)) %>%
+      collect(),
     df
   )
 })
 
 test_that("log functions", {
-  
+
   df <- tibble(x = c(1:10, NA, NA))
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log(x, base = exp(1))) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log(x, base = 2)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log(x, base = 10)) %>%
       collect(),
     df
   )
-  
+
   expect_error(
     nse_funcs$log(Expression$scalar(x), base = 5),
     "`base` values other than exp(1), 2 and 10 not supported in Arrow",
     fixed = TRUE
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = logb(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log1p(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log2(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = log10(x)) %>%
@@ -1013,39 +1023,39 @@ test_that("log functions", {
   )
 
 })
-  
+
 test_that("trig functions", {
-  
+
   df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = sin(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = cos(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = tan(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = asin(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = acos(x)) %>%

From e61843aec106820afa85e22ceeb846cac833a071 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 16 Jul 2021 13:52:11 -0400
Subject: [PATCH 587/719] ARROW-13352: [C++] Make sure scalar case_when fully
 initializes output

Closes #10732 from lidavidm/arrow-13352

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/compute/kernels/scalar_if_else.cc | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index e8578305bf6..861fbf13e86 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -1092,11 +1092,14 @@ struct CopyFixedWidth<Type, enable_if_same<Type, FixedSizeBinaryType>> {
     uint8_t* next = raw_out_values + (width * out_offset);
     const auto& scalar = checked_cast<const FixedSizeBinaryScalar&>(values);
     // Scalar may have null value buffer
-    if (!scalar.value) return;
-    DCHECK_EQ(scalar.value->size(), width);
-    for (int i = 0; i < length; i++) {
-      std::memcpy(next, scalar.value->data(), width);
-      next += width;
+    if (!scalar.value) {
+      std::memset(next, 0x00, width * length);
+    } else {
+      DCHECK_EQ(scalar.value->size(), width);
+      for (int i = 0; i < length; i++) {
+        std::memcpy(next, scalar.value->data(), width);
+        next += width;
+      }
     }
   }
   static void CopyArray(const DataType& type, const uint8_t* in_values,

From 8ce0c01c3eb39d41b070989c0702638636418c23 Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Fri, 16 Jul 2021 15:25:58 -0400
Subject: [PATCH 588/719] ARROW-12745: [C++][Compute] Add floor, ceiling, and
 truncate kernels

This PR adds floor, ceiling, and truncate scalar kernels. For all integral inputs, output is a 64-bit floating-point value.

Closes #10727 from edponce/ARROW-12745-Compute-Add-floor-ceiling-and-truncate-k

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc           |   3 +
 cpp/src/arrow/compute/api_scalar.h            |  29 ++
 .../compute/kernels/scalar_arithmetic.cc      |  63 +++-
 .../compute/kernels/scalar_arithmetic_test.cc | 285 ++++++++++++++----
 docs/source/cpp/compute.rst                   |  16 +
 docs/source/python/api/compute.rst            |  13 +
 6 files changed, 349 insertions(+), 60 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 07e56d5f3d1..9357fb5f557 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -358,6 +358,9 @@ SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
 SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked")
 SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked")
 SCALAR_EAGER_BINARY(Atan2, "atan2")
+SCALAR_EAGER_UNARY(Floor, "floor")
+SCALAR_EAGER_UNARY(Ceil, "ceil")
+SCALAR_EAGER_UNARY(Trunc, "trunc")
 
 Result<Datum> MaxElementWise(const std::vector<Datum>& args,
                              ElementWiseAggregateOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 285e1eb4f51..c77994d89bb 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -487,6 +487,35 @@ ARROW_EXPORT
 Result<Datum> Log1p(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(),
                     ExecContext* ctx = NULLPTR);
 
+/// \brief Round to the nearest integer less than or equal in magnitude to the
+/// argument. Array values can be of arbitrary length. If argument is null the
+/// result will be null.
+///
+/// \param[in] arg the value to round
+/// \param[in] ctx the function execution context, optional
+/// \return the rounded value
+ARROW_EXPORT
+Result<Datum> Floor(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Round to the nearest integer greater than or equal in magnitude to the
+/// argument. Array values can be of arbitrary length. If argument is null the
+/// result will be null.
+///
+/// \param[in] arg the value to round
+/// \param[in] ctx the function execution context, optional
+/// \return the rounded value
+ARROW_EXPORT
+Result<Datum> Ceil(const Datum& arg, ExecContext* ctx = NULLPTR);
+
+/// \brief Get the integral part without fractional digits. Array values can be
+/// of arbitrary length. If argument is null the result will be null.
+///
+/// \param[in] arg the value to truncate
+/// \param[in] ctx the function execution context, optional
+/// \return the truncated value
+ARROW_EXPORT
+Result<Datum> Trunc(const Datum& arg, ExecContext* ctx = NULLPTR);
+
 /// \brief Find the element-wise maximum of any number of arrays or scalars.
 /// Array values must be the same length.
 ///
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 28904bdbfa0..a5d4a557740 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -817,6 +817,27 @@ struct Log1pChecked {
   }
 };
 
+struct Floor {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::floor(arg);
+  }
+};
+
+struct Ceil {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::ceil(arg);
+  }
+};
+
+struct Trunc {
+  template <typename T, typename Arg>
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg arg, Status*) {
+    return std::trunc(arg);
+  }
+};
+
 // Generate a kernel given an arithmetic functor
 template <template <typename... Args> class KernelGenerator, typename Op>
 ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
@@ -1391,16 +1412,16 @@ const FunctionDoc sign_doc{
     {"x"}};
 
 const FunctionDoc bit_wise_not_doc{
-    "Bit-wise negate the arguments element-wise", ("Null values return null."), {"x"}};
+    "Bit-wise negate the arguments element-wise", "Null values return null.", {"x"}};
 
 const FunctionDoc bit_wise_and_doc{
-    "Bit-wise AND the arguments element-wise", ("Null values return null."), {"x", "y"}};
+    "Bit-wise AND the arguments element-wise", "Null values return null.", {"x", "y"}};
 
 const FunctionDoc bit_wise_or_doc{
-    "Bit-wise OR the arguments element-wise", ("Null values return null."), {"x", "y"}};
+    "Bit-wise OR the arguments element-wise", "Null values return null.", {"x", "y"}};
 
 const FunctionDoc bit_wise_xor_doc{
-    "Bit-wise XOR the arguments element-wise", ("Null values return null."), {"x", "y"}};
+    "Bit-wise XOR the arguments element-wise", "Null values return null.", {"x", "y"}};
 
 const FunctionDoc shift_left_doc{
     "Left shift `x` by `y`",
@@ -1506,12 +1527,12 @@ const FunctionDoc acos_checked_doc{
     {"x"}};
 
 const FunctionDoc atan_doc{"Compute the principal value of the inverse tangent",
-                           ("Integer arguments return double values."),
+                           "Integer arguments return double values.",
                            {"x"}};
 
 const FunctionDoc atan2_doc{
     "Compute the inverse tangent using argument signs to determine the quadrant",
-    ("Integer arguments return double values."),
+    "Integer arguments return double values.",
     {"y", "x"}};
 
 const FunctionDoc ln_doc{
@@ -1567,6 +1588,24 @@ const FunctionDoc log1p_checked_doc{
      "Use function \"log1p\" if you want non-positive values to return "
      "-inf or NaN."),
     {"x"}};
+
+const FunctionDoc floor_doc{
+    "Round down to the nearest integer",
+    ("Calculate the nearest integer less than or equal in magnitude to the "
+     "argument element-wise"),
+    {"x"}};
+
+const FunctionDoc ceil_doc{
+    "Round up to the nearest integer",
+    ("Calculate the nearest integer greater than or equal in magnitude to the "
+     "argument element-wise"),
+    {"x"}};
+
+const FunctionDoc trunc_doc{
+    "Get the integral part without fractional digits",
+    ("Calculate the nearest integer not greater in magnitude than to the "
+     "argument element-wise."),
+    {"x"}};
 }  // namespace
 
 void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -1592,7 +1631,6 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(add_checked)));
 
   // ----------------------------------------------------------------------
-  // subtract
   auto subtract = MakeArithmeticFunction<Subtract>("subtract", &sub_doc);
   AddDecimalBinaryKernels<Subtract>("subtract", &subtract);
 
@@ -1767,6 +1805,17 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   auto log1p_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull<Log1pChecked>(
       "log1p_checked", &log1p_checked_doc);
   DCHECK_OK(registry->AddFunction(std::move(log1p_checked)));
+
+  // ----------------------------------------------------------------------
+  // Rounding functions
+  auto floor = MakeUnaryArithmeticFunctionFloatingPoint<Floor>("floor", &floor_doc);
+  DCHECK_OK(registry->AddFunction(std::move(floor)));
+
+  auto ceil = MakeUnaryArithmeticFunctionFloatingPoint<Ceil>("ceil", &ceil_doc);
+  DCHECK_OK(registry->AddFunction(std::move(ceil)));
+
+  auto trunc = MakeUnaryArithmeticFunctionFloatingPoint<Trunc>("trunc", &trunc_doc);
+  DCHECK_OK(registry->AddFunction(std::move(trunc)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index a495b00a171..692d4579719 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -66,7 +66,7 @@ class TestUnaryArithmetic : public TestBase {
     return *arrow::MakeScalar(type_singleton(), value);
   }
 
-  // (Scalar, Scalar)
+  // (CScalar, CScalar)
   void AssertUnaryOp(UnaryFunction func, CType argument, CType expected) {
     auto arg = MakeScalar(argument);
     auto exp = MakeScalar(expected);
@@ -991,6 +991,13 @@ TEST(TestBinaryArithmetic, DispatchBest) {
                         {float64(), float64()});
     }
   }
+
+  CheckDispatchBest("atan2", {int32(), float64()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {int32(), uint8()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {int32(), null()}, {float64(), float64()});
+  CheckDispatchBest("atan2", {float32(), float64()}, {float64(), float64()});
+  // Integer always promotes to double
+  CheckDispatchBest("atan2", {float32(), int8()}, {float64(), float64()});
 }
 
 TEST(TestBinaryArithmetic, AddWithImplicitCasts) {
@@ -1032,8 +1039,20 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
 }
 
 TEST(TestUnaryArithmetic, DispatchBest) {
-  // All arithmetic
-  for (std::string name : {"negate", "abs", "abs_checked", "sign"}) {
+  // All types (with _checked variant)
+  for (std::string name : {"abs"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(),
+                             uint32(), uint64(), float32(), float64()}) {
+        CheckDispatchBest(name, {ty}, {ty});
+        CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+      }
+    }
+  }
+
+  // All types
+  for (std::string name : {"negate", "sign"}) {
     for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
                            uint64(), float32(), float64()}) {
       CheckDispatchBest(name, {ty}, {ty});
@@ -1041,7 +1060,21 @@ TEST(TestUnaryArithmetic, DispatchBest) {
     }
   }
 
-  // Signed arithmetic
+  // Fail on null type (with _checked variant)
+  for (std::string name : {"negate", "abs", "ln", "log2", "log10", "log1p", "sin", "cos",
+                           "tan", "asin", "acos"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      CheckDispatchFails(name, {null()});
+    }
+  }
+
+  // Fail on null type
+  for (std::string name : {"atan", "sign", "floor", "ceil", "trunc"}) {
+    CheckDispatchFails(name, {null()});
+  }
+
+  // Signed types
   for (std::string name : {"negate_checked"}) {
     for (const auto& ty : {int8(), int16(), int32(), int64(), float32(), float64()}) {
       CheckDispatchBest(name, {ty}, {ty});
@@ -1049,30 +1082,47 @@ TEST(TestUnaryArithmetic, DispatchBest) {
     }
   }
 
-  // Null input
-  for (std::string name : {"negate", "negate_checked", "abs", "abs_checked", "sign"}) {
-    CheckDispatchFails(name, {null()});
-  }
-
+  // Float types (with _checked variant)
   for (std::string name :
        {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) {
     for (std::string suffix : {"", "_checked"}) {
       name += suffix;
+      for (const auto& ty : {float32(), float64()}) {
+        CheckDispatchBest(name, {ty}, {ty});
+        CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+      }
+    }
+  }
 
-      CheckDispatchBest(name, {int32()}, {float64()});
-      CheckDispatchBest(name, {uint8()}, {float64()});
+  // Float types
+  for (std::string name : {"atan", "floor", "ceil", "trunc"}) {
+    for (const auto& ty : {float32(), float64()}) {
+      CheckDispatchBest(name, {ty}, {ty});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty});
+    }
+  }
 
-      CheckDispatchBest(name, {dictionary(int8(), int64())}, {float64()});
+  // Integer -> Float64 (with _checked variant)
+  for (std::string name :
+       {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) {
+    for (std::string suffix : {"", "_checked"}) {
+      name += suffix;
+      for (const auto& ty :
+           {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
+        CheckDispatchBest(name, {ty}, {float64()});
+        CheckDispatchBest(name, {dictionary(int8(), ty)}, {float64()});
+      }
     }
   }
 
-  CheckDispatchBest("atan", {int32()}, {float64()});
-  CheckDispatchBest("atan2", {int32(), float64()}, {float64(), float64()});
-  CheckDispatchBest("atan2", {int32(), uint8()}, {float64(), float64()});
-  CheckDispatchBest("atan2", {int32(), null()}, {float64(), float64()});
-  CheckDispatchBest("atan2", {float32(), float64()}, {float64(), float64()});
-  // Integer always promotes to double
-  CheckDispatchBest("atan2", {float32(), int8()}, {float64(), float64()});
+  // Integer -> Float64
+  for (std::string name : {"atan", "floor", "ceil", "trunc"}) {
+    for (const auto& ty :
+         {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
+      CheckDispatchBest(name, {ty}, {float64()});
+      CheckDispatchBest(name, {dictionary(int8(), ty)}, {float64()});
+    }
+  }
 }
 
 TYPED_TEST(TestUnaryArithmeticSigned, Negate) {
@@ -1854,24 +1904,20 @@ TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) {
 
 TYPED_TEST(TestUnaryArithmeticIntegral, Trig) {
   // Integer arguments promoted to double, sanity check here
-  auto ty = this->type_singleton();
   auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
     return Atan(arg, ctx);
   };
   for (auto check_overflow : {false, true}) {
     this->SetOverflowCheck(check_overflow);
-    this->AssertUnaryOp(Sin, ArrayFromJSON(ty, "[0, 1]"),
+    this->AssertUnaryOp(Sin, "[0, 1]",
                         ArrayFromJSON(float64(), "[0, 0.8414709848078965]"));
-    this->AssertUnaryOp(Cos, ArrayFromJSON(ty, "[0, 1]"),
+    this->AssertUnaryOp(Cos, "[0, 1]",
                         ArrayFromJSON(float64(), "[1, 0.5403023058681398]"));
-    this->AssertUnaryOp(Tan, ArrayFromJSON(ty, "[0, 1]"),
+    this->AssertUnaryOp(Tan, "[0, 1]",
                         ArrayFromJSON(float64(), "[0, 1.5574077246549023]"));
-    this->AssertUnaryOp(Asin, ArrayFromJSON(ty, "[0, 1]"),
-                        ArrayFromJSON(float64(), MakeArray(0, M_PI_2)));
-    this->AssertUnaryOp(Acos, ArrayFromJSON(ty, "[0, 1]"),
-                        ArrayFromJSON(float64(), MakeArray(M_PI_2, 0)));
-    this->AssertUnaryOp(atan, ArrayFromJSON(ty, "[0, 1]"),
-                        ArrayFromJSON(float64(), MakeArray(0, M_PI_4)));
+    this->AssertUnaryOp(Asin, "[0, 1]", ArrayFromJSON(float64(), MakeArray(0, M_PI_2)));
+    this->AssertUnaryOp(Acos, "[0, 1]", ArrayFromJSON(float64(), MakeArray(M_PI_2, 0)));
+    this->AssertUnaryOp(atan, "[0, 1]", ArrayFromJSON(float64(), MakeArray(0, M_PI_4)));
   }
 }
 
@@ -1946,33 +1992,23 @@ TYPED_TEST(TestUnaryArithmeticFloating, Log) {
 
 TYPED_TEST(TestUnaryArithmeticIntegral, Log) {
   // Integer arguments promoted to double, sanity check here
-  auto ty = this->type_singleton();
   for (auto check_overflow : {false, true}) {
     this->SetOverflowCheck(check_overflow);
-    this->AssertUnaryOp(Ln, ArrayFromJSON(ty, "[1, null]"),
-                        ArrayFromJSON(float64(), "[0, null]"));
-    this->AssertUnaryOp(Log10, ArrayFromJSON(ty, "[1, 10, null]"),
-                        ArrayFromJSON(float64(), "[0, 1, null]"));
-    this->AssertUnaryOp(Log2, ArrayFromJSON(ty, "[1, 2, null]"),
-                        ArrayFromJSON(float64(), "[0, 1, null]"));
-    this->AssertUnaryOp(Log1p, ArrayFromJSON(ty, "[0, null]"),
-                        ArrayFromJSON(float64(), "[0, null]"));
+    this->AssertUnaryOp(Ln, "[1, null]", ArrayFromJSON(float64(), "[0, null]"));
+    this->AssertUnaryOp(Log10, "[1, 10, null]", ArrayFromJSON(float64(), "[0, 1, null]"));
+    this->AssertUnaryOp(Log2, "[1, 2, null]", ArrayFromJSON(float64(), "[0, 1, null]"));
+    this->AssertUnaryOp(Log1p, "[0, null]", ArrayFromJSON(float64(), "[0, null]"));
   }
 }
 
 TYPED_TEST(TestUnaryArithmeticSigned, Log) {
   // Integer arguments promoted to double, sanity check here
-  auto ty = this->type_singleton();
   this->SetNansEqual(true);
   this->SetOverflowCheck(false);
-  this->AssertUnaryOp(Ln, ArrayFromJSON(ty, "[-1, 0]"),
-                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
-  this->AssertUnaryOp(Log10, ArrayFromJSON(ty, "[-1, 0]"),
-                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
-  this->AssertUnaryOp(Log2, ArrayFromJSON(ty, "[-1, 0]"),
-                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
-  this->AssertUnaryOp(Log1p, ArrayFromJSON(ty, "[-2, -1]"),
-                      ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Ln, "[-1, 0]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log10, "[-1, 0]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log2, "[-1, 0]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
+  this->AssertUnaryOp(Log1p, "[-2, -1]", ArrayFromJSON(float64(), "[NaN, -Inf]"));
   this->SetOverflowCheck(true);
   this->AssertUnaryOpRaises(Ln, "[0]", "logarithm of zero");
   this->AssertUnaryOpRaises(Ln, "[-1]", "logarithm of negative number");
@@ -1989,8 +2025,6 @@ TYPED_TEST(TestUnaryArithmeticSigned, Sign) {
   auto min = std::numeric_limits<CType>::min();
   auto max = std::numeric_limits<CType>::max();
 
-  // N.B. TestUnaryArithmetic expects a function with ArithmeticOptions as its
-  // second parameter
   auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
     return Sign(arg, ctx);
   };
@@ -2010,8 +2044,6 @@ TYPED_TEST(TestUnaryArithmeticUnsigned, Sign) {
   auto min = std::numeric_limits<CType>::min();
   auto max = std::numeric_limits<CType>::max();
 
-  // N.B. TestUnaryArithmetic expects a function with ArithmeticOptions as its
-  // second parameter
   auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
     return Sign(arg, ctx);
   };
@@ -2032,8 +2064,6 @@ TYPED_TEST(TestUnaryArithmeticFloating, Sign) {
 
   this->SetNansEqual(true);
 
-  // N.B. TestUnaryArithmetic expects a function with ArithmeticOptions as its
-  // second parameter
   auto sign = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
     return Sign(arg, ctx);
   };
@@ -2049,5 +2079,154 @@ TYPED_TEST(TestUnaryArithmeticFloating, Sign) {
   this->AssertUnaryOp(sign, this->MakeScalar(min), this->MakeScalar(-1));
   this->AssertUnaryOp(sign, this->MakeScalar(max), this->MakeScalar(1));
 }
+
+TYPED_TEST(TestUnaryArithmeticSigned, Floor) {
+  auto floor = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Floor(arg, ctx);
+  };
+
+  this->AssertUnaryOp(floor, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(floor, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(floor, "[1, null, -10]",
+                      ArrayFromJSON(float64(), "[1, null, -10]"));
+  this->AssertUnaryOp(floor, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(floor, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+  this->AssertUnaryOp(floor, "[-1, -10, -127]",
+                      ArrayFromJSON(float64(), "[-1, -10, -127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Floor) {
+  auto floor = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Floor(arg, ctx);
+  };
+
+  this->AssertUnaryOp(floor, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(floor, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(floor, "[1, null, 10]", ArrayFromJSON(float64(), "[1, null, 10]"));
+  this->AssertUnaryOp(floor, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(floor, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Floor) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto floor = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Floor(arg, ctx);
+  };
+
+  this->AssertUnaryOp(floor, "[]", "[]");
+  this->AssertUnaryOp(floor, "[null]", "[null]");
+  this->AssertUnaryOp(floor, "[1.3, null, -10.80]", "[1, null, -11]");
+  this->AssertUnaryOp(floor, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(floor, "[1.3, 10.80, 12748.001]", "[1, 10, 12748]");
+  this->AssertUnaryOp(floor, "[-1.3, -10.80, -12748.001]", "[-2, -11, -12749]");
+  this->AssertUnaryOp(floor, "[Inf, -Inf]", "[Inf, -Inf]");
+  this->AssertUnaryOp(floor, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(floor, this->MakeScalar(min), this->MakeScalar(min));
+  this->AssertUnaryOp(floor, this->MakeScalar(max), this->MakeScalar(max));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Ceil) {
+  auto ceil = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Ceil(arg, ctx);
+  };
+
+  this->AssertUnaryOp(ceil, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(ceil, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(ceil, "[1, null, -10]", ArrayFromJSON(float64(), "[1, null, -10]"));
+  this->AssertUnaryOp(ceil, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(ceil, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+  this->AssertUnaryOp(ceil, "[-1, -10, -127]",
+                      ArrayFromJSON(float64(), "[-1, -10, -127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Ceil) {
+  auto ceil = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Ceil(arg, ctx);
+  };
+
+  this->AssertUnaryOp(ceil, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(ceil, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(ceil, "[1, null, 10]", ArrayFromJSON(float64(), "[1, null, 10]"));
+  this->AssertUnaryOp(ceil, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(ceil, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Ceil) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto ceil = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Ceil(arg, ctx);
+  };
+
+  this->AssertUnaryOp(ceil, "[]", "[]");
+  this->AssertUnaryOp(ceil, "[null]", "[null]");
+  this->AssertUnaryOp(ceil, "[1.3, null, -10.80]", "[2, null, -10]");
+  this->AssertUnaryOp(ceil, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(ceil, "[1.3, 10.80, 12748.001]", "[2, 11, 12749]");
+  this->AssertUnaryOp(ceil, "[-1.3, -10.80, -12748.001]", "[-1, -10, -12748]");
+  this->AssertUnaryOp(ceil, "[Inf, -Inf]", "[Inf, -Inf]");
+  this->AssertUnaryOp(ceil, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(ceil, this->MakeScalar(min), this->MakeScalar(min));
+  this->AssertUnaryOp(ceil, this->MakeScalar(max), this->MakeScalar(max));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Trunc) {
+  auto trunc = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Trunc(arg, ctx);
+  };
+
+  this->AssertUnaryOp(trunc, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(trunc, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(trunc, "[1, null, -10]",
+                      ArrayFromJSON(float64(), "[1, null, -10]"));
+  this->AssertUnaryOp(trunc, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(trunc, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+  this->AssertUnaryOp(trunc, "[-1, -10, -127]",
+                      ArrayFromJSON(float64(), "[-1, -10, -127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticUnsigned, Trunc) {
+  auto trunc = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Trunc(arg, ctx);
+  };
+
+  this->AssertUnaryOp(trunc, "[]", ArrayFromJSON(float64(), "[]"));
+  this->AssertUnaryOp(trunc, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(trunc, "[1, null, 10]", ArrayFromJSON(float64(), "[1, null, 10]"));
+  this->AssertUnaryOp(trunc, "[0]", ArrayFromJSON(float64(), "[0]"));
+  this->AssertUnaryOp(trunc, "[1, 10, 127]", ArrayFromJSON(float64(), "[1, 10, 127]"));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Trunc) {
+  using CType = typename TestFixture::CType;
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetNansEqual(true);
+
+  auto trunc = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Trunc(arg, ctx);
+  };
+
+  this->AssertUnaryOp(trunc, "[]", "[]");
+  this->AssertUnaryOp(trunc, "[null]", "[null]");
+  this->AssertUnaryOp(trunc, "[1.3, null, -10.80]", "[1, null, -10]");
+  this->AssertUnaryOp(trunc, "[0.0, -0.0]", "[0, 0]");
+  this->AssertUnaryOp(trunc, "[1.3, 10.80, 12748.001]", "[1, 10, 12748]");
+  this->AssertUnaryOp(trunc, "[-1.3, -10.80, -12748.001]", "[-1, -10, -12748]");
+  this->AssertUnaryOp(trunc, "[Inf, -Inf]", "[Inf, -Inf]");
+  this->AssertUnaryOp(trunc, "[NaN]", "[NaN]");
+  this->AssertUnaryOp(trunc, this->MakeScalar(min), this->MakeScalar(min));
+  this->AssertUnaryOp(trunc, this->MakeScalar(max), this->MakeScalar(max));
+}
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 35011a786a6..12d86f9969d 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -348,6 +348,22 @@ Bit-wise functions
   out of bounds for the data type.  However, an overflow when shifting the
   first input is not error (truncated bits are silently discarded).
 
+Rounding functions
+~~~~~~~~~~~~~~~~~~
+
+Rounding functions convert a numeric input into an approximate value with a
+simpler representation based on the rounding strategy.
+
++------------------+--------+----------------+-----------------+-------+
+| Function name    | Arity  | Input types    | Output type     | Notes |
++==================+========+================+=================+=======+
+| floor            | Unary  | Numeric        | Float32/Float64 |       |
++------------------+--------+----------------+-----------------+-------+
+| ceil             | Unary  | Numeric        | Float32/Float64 |       |
++------------------+--------+----------------+-----------------+-------+
+| trunc            | Unary  | Numeric        | Float32/Float64 |       |
++------------------+--------+----------------+-----------------+-------+
+
 Logarithmic functions
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index fd32d08f85c..f12b50e7723 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -74,6 +74,19 @@ Bit-wise operations do not offer (or need) a checked variant.
    bit_wise_or
    bit_wise_xor
 
+Rounding Functions
+------------------
+
+Rounding functions convert a numeric input into an approximate value with a
+simpler representation based on the rounding strategy.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ceil
+   floor
+   trunc
+
 Logarithmic Functions
 ---------------------
 

From 43f80156c6d4bdd48c03381007f8f5c36224c8b3 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 16 Jul 2021 11:11:25 -1000
Subject: [PATCH 589/719] ARROW-13332: [C++] TSAN failure in
 TestAsyncUtil.ReadaheadFailed

Minor cleanup fix from my earlier fix of this test in #10602

Closes #10719 from westonpace/bugfix/ARROW-13332--c-tsan-failure-in-testasyncutil-readaheadfaile

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 cpp/src/arrow/testing/gtest_util.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index ea6edb0258e..30cc59800f4 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -907,7 +907,7 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
   double timeout_seconds_;
   Status status_;
   bool unlocked_;
-  int num_launched_ = 0;
+  std::atomic<int> num_launched_{0};
   int num_running_ = 0;
   int num_finished_ = 0;
   std::mutex mx_;

From 17e6f23cf261882ad8ebd573c99d8d5cc3179485 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 16 Jul 2021 11:50:15 -1000
Subject: [PATCH 590/719] ARROW-11889: [C++] Add parallelism to streaming CSV
 reader

This converts the parser & decoder into map functions and then creates the streaming CSV reader as an async generator.  Parallel readahead is then added on top of the parser/decoder to allow for parallel reads.

One thing that is lost at the moment is the ability to encounter a parsing error and then continue.  There was a python test that read in the first block, failed to convert the second block, and then successfully read in a third block.  I'm not sure if that restart behavior is important but if it is I can look into adding it.

Another thing that could be investigated in the future is combining the file readers and table readers more.  They already share some components but the parsing and decoding logic, while basically the same, is handled very differently.  The only real difference is that the table reader saves all the parsed blocks for re-parsing and the streaming reader does not.

Closes #10568 from westonpace/feature/ARROW-11889--c-add-parallelism-to-streaming-csv-reader

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 cpp/src/arrow/csv/column_decoder.cc      | 227 +++-------
 cpp/src/arrow/csv/column_decoder.h       |  39 +-
 cpp/src/arrow/csv/column_decoder_test.cc | 234 +++++------
 cpp/src/arrow/csv/reader.cc              | 514 ++++++++++++-----------
 cpp/src/arrow/csv/reader_test.cc         |  86 +++-
 cpp/src/arrow/util/async_generator.h     |   2 +
 python/pyarrow/tests/test_csv.py         | 120 +++---
 7 files changed, 571 insertions(+), 651 deletions(-)

diff --git a/cpp/src/arrow/csv/column_decoder.cc b/cpp/src/arrow/csv/column_decoder.cc
index 1dd13bc9086..ff5d01d8c4d 100644
--- a/cpp/src/arrow/csv/column_decoder.cc
+++ b/cpp/src/arrow/csv/column_decoder.cc
@@ -45,91 +45,13 @@ using internal::TaskGroup;
 
 class ConcreteColumnDecoder : public ColumnDecoder {
  public:
-  explicit ConcreteColumnDecoder(MemoryPool* pool,
-                                 std::shared_ptr<internal::TaskGroup> task_group,
-                                 int32_t col_index = -1)
-      : ColumnDecoder(std::move(task_group)),
-        pool_(pool),
-        col_index_(col_index),
-        num_chunks_(-1),
-        next_chunk_(0) {}
-
-  void Append(const std::shared_ptr<BlockParser>& parser) override {
-    Insert(static_cast<int64_t>(chunks_.size()), parser);
-  }
-
-  void SetEOF(int64_t num_blocks) override {
-    std::lock_guard<std::mutex> lock(mutex_);
-
-    DCHECK_EQ(num_chunks_, -1) << "Cannot change EOF";
-    num_chunks_ = num_blocks;
-
-    // If further chunks have been requested in NextChunk(), arrange to return nullptr
-    for (int64_t i = num_chunks_; i < static_cast<int64_t>(chunks_.size()); ++i) {
-      auto* chunk = &chunks_[i];
-      if (chunk->is_valid()) {
-        DCHECK(!IsFutureFinished(chunk->state()));
-        chunk->MarkFinished(std::shared_ptr<Array>());
-      }
-    }
-  }
-
-  Result<std::shared_ptr<Array>> NextChunk() override {
-    std::unique_lock<std::mutex> lock(mutex_);
-
-    if (num_chunks_ > 0 && next_chunk_ >= num_chunks_) {
-      return nullptr;  // EOF
-    }
-    PrepareChunkUnlocked(next_chunk_);
-    auto chunk_index = next_chunk_++;
-    WaitForChunkUnlocked(chunk_index);
-    // Move Future to avoid keeping chunk alive
-    return chunks_[chunk_index].MoveResult();
-  }
+  explicit ConcreteColumnDecoder(MemoryPool* pool, int32_t col_index = -1)
+      : ColumnDecoder(), pool_(pool), col_index_(col_index) {}
 
  protected:
   // XXX useful?
   virtual std::shared_ptr<DataType> type() const = 0;
 
-  void WaitForChunkUnlocked(int64_t chunk_index) {
-    auto future = chunks_[chunk_index];  // Make copy because of resizes
-    mutex_.unlock();
-    future.Wait();
-    mutex_.lock();
-  }
-
-  void PrepareChunk(int64_t block_index) {
-    std::lock_guard<std::mutex> lock(mutex_);
-    PrepareChunkUnlocked(block_index);
-  }
-
-  void PrepareChunkUnlocked(int64_t block_index) {
-    size_t chunk_index = static_cast<size_t>(block_index);
-    if (chunks_.size() <= chunk_index) {
-      chunks_.resize(chunk_index + 1);
-    }
-    if (!chunks_[block_index].is_valid()) {
-      chunks_[block_index] = Future<std::shared_ptr<Array>>::Make();
-    }
-  }
-
-  void SetChunk(int64_t chunk_index, Result<std::shared_ptr<Array>> maybe_array) {
-    std::lock_guard<std::mutex> lock(mutex_);
-    SetChunkUnlocked(chunk_index, std::move(maybe_array));
-  }
-
-  void SetChunkUnlocked(int64_t chunk_index, Result<std::shared_ptr<Array>> maybe_array) {
-    auto* chunk = &chunks_[chunk_index];
-    DCHECK(chunk->is_valid());
-    DCHECK(!IsFutureFinished(chunk->state()));
-
-    if (maybe_array.ok()) {
-      chunk->MarkFinished(std::move(maybe_array));
-    } else {
-      chunk->MarkFinished(WrapConversionError(maybe_array.status()));
-    }
-  }
-
   Status WrapConversionError(const Status& st) {
     if (st.ok()) {
       return st;
@@ -142,12 +64,7 @@ class ConcreteColumnDecoder : public ColumnDecoder {
 
   MemoryPool* pool_;
   int32_t col_index_;
-
-  std::vector<Future<std::shared_ptr<Array>>> chunks_;
-  int64_t num_chunks_;
-  int64_t next_chunk_;
-
-  std::mutex mutex_;
+  internal::Executor* executor_;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -155,11 +72,11 @@ class ConcreteColumnDecoder : public ColumnDecoder {
 
 class NullColumnDecoder : public ConcreteColumnDecoder {
  public:
-  explicit NullColumnDecoder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                             const std::shared_ptr<internal::TaskGroup>& task_group)
-      : ConcreteColumnDecoder(pool, task_group), type_(type) {}
+  explicit NullColumnDecoder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ConcreteColumnDecoder(pool), type_(type) {}
 
-  void Insert(int64_t block_index, const std::shared_ptr<BlockParser>& parser) override;
+  Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) override;
 
  protected:
   std::shared_ptr<DataType> type() const override { return type_; }
@@ -167,24 +84,10 @@ class NullColumnDecoder : public ConcreteColumnDecoder {
   std::shared_ptr<DataType> type_;
 };
 
-void NullColumnDecoder::Insert(int64_t block_index,
-                               const std::shared_ptr<BlockParser>& parser) {
-  PrepareChunk(block_index);
-
-  // Spawn a task that will build an array of nulls with the right DataType
-  const int32_t num_rows = parser->num_rows();
-  DCHECK_GE(num_rows, 0);
-
-  task_group_->Append([=]() -> Status {
-    std::unique_ptr<ArrayBuilder> builder;
-    RETURN_NOT_OK(MakeBuilder(pool_, type_, &builder));
-    std::shared_ptr<Array> array;
-    RETURN_NOT_OK(builder->AppendNulls(num_rows));
-    RETURN_NOT_OK(builder->Finish(&array));
-
-    SetChunk(block_index, array);
-    return Status::OK();
-  });
+Future<std::shared_ptr<Array>> NullColumnDecoder::Decode(
+    const std::shared_ptr<BlockParser>& parser) {
+  DCHECK_GE(parser->num_rows(), 0);
+  return MakeArrayOfNull(type_, parser->num_rows(), pool_);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -193,15 +96,13 @@ void NullColumnDecoder::Insert(int64_t block_index,
 class TypedColumnDecoder : public ConcreteColumnDecoder {
  public:
   TypedColumnDecoder(const std::shared_ptr<DataType>& type, int32_t col_index,
-                     const ConvertOptions& options, MemoryPool* pool,
-                     const std::shared_ptr<internal::TaskGroup>& task_group)
-      : ConcreteColumnDecoder(pool, task_group, col_index),
-        type_(type),
-        options_(options) {}
+                     const ConvertOptions& options, MemoryPool* pool)
+      : ConcreteColumnDecoder(pool, col_index), type_(type), options_(options) {}
 
   Status Init();
 
-  void Insert(int64_t block_index, const std::shared_ptr<BlockParser>& parser) override;
+  Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) override;
 
  protected:
   std::shared_ptr<DataType> type() const override { return type_; }
@@ -219,17 +120,11 @@ Status TypedColumnDecoder::Init() {
   return Status::OK();
 }
 
-void TypedColumnDecoder::Insert(int64_t block_index,
-                                const std::shared_ptr<BlockParser>& parser) {
+Future<std::shared_ptr<Array>> TypedColumnDecoder::Decode(
+    const std::shared_ptr<BlockParser>& parser) {
   DCHECK_NE(converter_, nullptr);
-
-  PrepareChunk(block_index);
-
-  // We're careful that all references in the closure outlive the Append() call
-  task_group_->Append([=]() -> Status {
-    SetChunk(block_index, converter_->Convert(*parser, col_index_));
-    return Status::OK();
-  });
+  return Future<std::shared_ptr<Array>>::MakeFinished(
+      converter_->Convert(*parser, col_index_));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -238,16 +133,19 @@ void TypedColumnDecoder::Insert(int64_t block_index,
 class InferringColumnDecoder : public ConcreteColumnDecoder {
  public:
   InferringColumnDecoder(int32_t col_index, const ConvertOptions& options,
-                         MemoryPool* pool,
-                         const std::shared_ptr<internal::TaskGroup>& task_group)
-      : ConcreteColumnDecoder(pool, task_group, col_index),
+                         MemoryPool* pool)
+      : ConcreteColumnDecoder(pool, col_index),
         options_(options),
         infer_status_(options),
-        type_frozen_(false) {}
+        type_frozen_(false) {
+    first_inference_run_ = Future<>::Make();
+    first_inferrer_ = 0;
+  }
 
   Status Init();
 
-  void Insert(int64_t block_index, const std::shared_ptr<BlockParser>& parser) override;
+  Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) override;
 
  protected:
   std::shared_ptr<DataType> type() const override {
@@ -265,10 +163,9 @@ class InferringColumnDecoder : public ConcreteColumnDecoder {
   // Current inference status
   InferStatus infer_status_;
   bool type_frozen_;
+  std::atomic<int> first_inferrer_;
+  Future<> first_inference_run_;
   std::shared_ptr<Converter> converter_;
-
-  // The parsers corresponding to each chunk (for reconverting)
-  std::vector<std::shared_ptr<BlockParser>> parsers_;
 };
 
 Status InferringColumnDecoder::Init() { return UpdateType(); }
@@ -283,55 +180,37 @@ Result<std::shared_ptr<Array>> InferringColumnDecoder::RunInference(
     // (no one else should be updating converter_ concurrently)
     auto maybe_array = converter_->Convert(*parser, col_index_);
 
-    std::unique_lock<std::mutex> lock(mutex_);
     if (maybe_array.ok() || !infer_status_.can_loosen_type()) {
       // Conversion succeeded, or failed definitively
+      DCHECK(!type_frozen_);
+      type_frozen_ = true;
       return maybe_array;
     }
     // Conversion failed temporarily, try another type
     infer_status_.LoosenType(maybe_array.status());
-    RETURN_NOT_OK(UpdateType());
+    auto update_status = UpdateType();
+    if (!update_status.ok()) {
+      return update_status;
+    }
   }
 }
 
-void InferringColumnDecoder::Insert(int64_t block_index,
-                                    const std::shared_ptr<BlockParser>& parser) {
-  PrepareChunk(block_index);
-
+Future<std::shared_ptr<Array>> InferringColumnDecoder::Decode(
+    const std::shared_ptr<BlockParser>& parser) {
+  bool already_taken = first_inferrer_.fetch_or(1);
   // First block: run inference
-  if (block_index == 0) {
-    task_group_->Append([=]() -> Status {
-      auto maybe_array = RunInference(parser);
-
-      std::unique_lock<std::mutex> lock(mutex_);
-      DCHECK(!type_frozen_);
-      type_frozen_ = true;
-      SetChunkUnlocked(block_index, std::move(maybe_array));
-      return Status::OK();
-    });
-    return;
+  if (!already_taken) {
+    auto maybe_array = RunInference(parser);
+    first_inference_run_.MarkFinished();
+    return Future<std::shared_ptr<Array>>::MakeFinished(std::move(maybe_array));
   }
 
   // Non-first block: wait for inference to finish on first block now,
   // without blocking a TaskGroup thread.
-  {
-    std::unique_lock<std::mutex> lock(mutex_);
-    PrepareChunkUnlocked(0);
-    WaitForChunkUnlocked(0);
-    if (!chunks_[0].status().ok()) {
-      // Failed converting first chunk: bail out by marking EOF,
-      // because we can't decide a type for the other chunks.
-      SetChunkUnlocked(block_index, std::shared_ptr<Array>());
-    }
+  return first_inference_run_.Then([this, parser] {
     DCHECK(type_frozen_);
-  }
-
-  // Then use the inferred type to convert this block.
-  task_group_->Append([=]() -> Status {
     auto maybe_array = converter_->Convert(*parser, col_index_);
-
-    SetChunk(block_index, std::move(maybe_array));
-    return Status::OK();
+    return converter_->Convert(*parser, col_index_);
   });
 }
 
@@ -339,28 +218,24 @@ void InferringColumnDecoder::Insert(int64_t block_index,
 // Factory functions
 
 Result<std::shared_ptr<ColumnDecoder>> ColumnDecoder::Make(
-    MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
-    std::shared_ptr<TaskGroup> task_group) {
-  auto ptr = std::make_shared<InferringColumnDecoder>(col_index, options, pool,
-                                                      std::move(task_group));
+    MemoryPool* pool, int32_t col_index, const ConvertOptions& options) {
+  auto ptr = std::make_shared<InferringColumnDecoder>(col_index, options, pool);
   RETURN_NOT_OK(ptr->Init());
   return ptr;
 }
 
 Result<std::shared_ptr<ColumnDecoder>> ColumnDecoder::Make(
     MemoryPool* pool, std::shared_ptr<DataType> type, int32_t col_index,
-    const ConvertOptions& options, std::shared_ptr<TaskGroup> task_group) {
-  auto ptr = std::make_shared<TypedColumnDecoder>(std::move(type), col_index, options,
-                                                  pool, std::move(task_group));
+    const ConvertOptions& options) {
+  auto ptr =
+      std::make_shared<TypedColumnDecoder>(std::move(type), col_index, options, pool);
   RETURN_NOT_OK(ptr->Init());
   return ptr;
 }
 
 Result<std::shared_ptr<ColumnDecoder>> ColumnDecoder::MakeNull(
-    MemoryPool* pool, std::shared_ptr<DataType> type,
-    std::shared_ptr<internal::TaskGroup> task_group) {
-  return std::make_shared<NullColumnDecoder>(std::move(type), pool,
-                                             std::move(task_group));
+    MemoryPool* pool, std::shared_ptr<DataType> type) {
+  return std::make_shared<NullColumnDecoder>(std::move(type), pool);
 }
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/column_decoder.h b/cpp/src/arrow/csv/column_decoder.h
index 92644e3769f..5fbbd5df58b 100644
--- a/cpp/src/arrow/csv/column_decoder.h
+++ b/cpp/src/arrow/csv/column_decoder.h
@@ -36,45 +36,28 @@ class ARROW_EXPORT ColumnDecoder {
  public:
   virtual ~ColumnDecoder() = default;
 
-  /// Spawn a task that will try to convert and append the given CSV block.
-  /// All calls to Append() should happen on the same thread, otherwise
-  /// call Insert() instead.
-  virtual void Append(const std::shared_ptr<BlockParser>& parser) = 0;
-
   /// Spawn a task that will try to convert and insert the given CSV block
-  virtual void Insert(int64_t block_index,
-                      const std::shared_ptr<BlockParser>& parser) = 0;
-
-  /// Set EOF at the given number of blocks.  Must only be called once.
-  virtual void SetEOF(int64_t num_blocks) = 0;
-
-  /// Fetch a chunk.
-  virtual Result<std::shared_ptr<Array>> NextChunk() = 0;
-
-  std::shared_ptr<internal::TaskGroup> task_group() { return task_group_; }
+  virtual Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) = 0;
 
   /// Construct a strictly-typed ColumnDecoder.
-  static Result<std::shared_ptr<ColumnDecoder>> Make(
-      MemoryPool* pool, std::shared_ptr<DataType> type, int32_t col_index,
-      const ConvertOptions& options, std::shared_ptr<internal::TaskGroup> task_group);
+  static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool,
+                                                     std::shared_ptr<DataType> type,
+                                                     int32_t col_index,
+                                                     const ConvertOptions& options);
 
   /// Construct a type-inferring ColumnDecoder.
   /// Inference will run only on the first block, the type will be frozen afterwards.
-  static Result<std::shared_ptr<ColumnDecoder>> Make(
-      MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
-      std::shared_ptr<internal::TaskGroup> task_group);
+  static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool, int32_t col_index,
+                                                     const ConvertOptions& options);
 
   /// Construct a ColumnDecoder for a column of nulls
   /// (i.e. not present in the CSV file).
-  static Result<std::shared_ptr<ColumnDecoder>> MakeNull(
-      MemoryPool* pool, std::shared_ptr<DataType> type,
-      std::shared_ptr<internal::TaskGroup> task_group);
+  static Result<std::shared_ptr<ColumnDecoder>> MakeNull(MemoryPool* pool,
+                                                         std::shared_ptr<DataType> type);
 
  protected:
-  explicit ColumnDecoder(std::shared_ptr<internal::TaskGroup> task_group)
-      : task_group_(std::move(task_group)) {}
-
-  std::shared_ptr<internal::TaskGroup> task_group_;
+  ColumnDecoder() = default;
 };
 
 }  // namespace csv
diff --git a/cpp/src/arrow/csv/column_decoder_test.cc b/cpp/src/arrow/csv/column_decoder_test.cc
index 231ffb85e1b..c8b96e04696 100644
--- a/cpp/src/arrow/csv/column_decoder_test.cc
+++ b/cpp/src/arrow/csv/column_decoder_test.cc
@@ -27,11 +27,11 @@
 #include "arrow/csv/test_common.h"
 #include "arrow/memory_pool.h"
 #include "arrow/table.h"
+#include "arrow/testing/future_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
 
 namespace arrow {
@@ -41,7 +41,6 @@ class BlockParser;
 
 using internal::checked_cast;
 using internal::GetCpuThreadPool;
-using internal::TaskGroup;
 
 using ChunkData = std::vector<std::vector<std::string>>;
 
@@ -65,58 +64,70 @@ ThreadJoiner RunThread(Func&& func) {
   return ThreadJoiner(std::make_shared<std::thread>(std::forward<Func>(func)));
 }
 
-struct SerialExecutor {
-  static std::shared_ptr<TaskGroup> task_group() { return TaskGroup::MakeSerial(); }
-};
-
-struct ParallelExecutor {
-  static std::shared_ptr<TaskGroup> task_group() {
-    return TaskGroup::MakeThreaded(GetCpuThreadPool());
+template <typename Func>
+void RunThreadsAndJoin(Func&& func, int iters) {
+  std::vector<ThreadJoiner> threads;
+  for (int i = 0; i < iters; i++) {
+    threads.emplace_back(std::make_shared<std::thread>([i, func] { func(i); }));
   }
-};
-
-using ExecutorTypes = ::testing::Types<SerialExecutor, ParallelExecutor>;
+}
 
 class ColumnDecoderTest : public ::testing::Test {
  public:
-  ColumnDecoderTest() : tg_(TaskGroup::MakeSerial()), num_chunks_(0) {}
+  ColumnDecoderTest() : num_chunks_(0), read_ptr_(0) {}
 
   void SetDecoder(std::shared_ptr<ColumnDecoder> decoder) {
     decoder_ = std::move(decoder);
+    decoded_chunks_.clear();
     num_chunks_ = 0;
+    read_ptr_ = 0;
   }
 
-  void InsertChunk(int64_t num_chunk, std::vector<std::string> chunk) {
+  void InsertChunk(std::vector<std::string> chunk) {
     std::shared_ptr<BlockParser> parser;
     MakeColumnParser(chunk, &parser);
-    decoder_->Insert(num_chunk, parser);
+    auto decoded = decoder_->Decode(parser);
+    decoded_chunks_.push_back(decoded);
+    ++num_chunks_;
   }
 
   void AppendChunks(const ChunkData& chunks) {
     for (const auto& chunk : chunks) {
-      std::shared_ptr<BlockParser> parser;
-      MakeColumnParser(chunk, &parser);
-      decoder_->Append(parser);
-      ++num_chunks_;
+      InsertChunk(chunk);
     }
   }
 
-  void SetEOF() { decoder_->SetEOF(num_chunks_); }
+  Result<std::shared_ptr<Array>> NextChunk() {
+    EXPECT_LT(read_ptr_, static_cast<int64_t>(decoded_chunks_.size()));
+    return decoded_chunks_[read_ptr_++].result();
+  }
+
+  void AssertChunk(std::vector<std::string> chunk, std::shared_ptr<Array> expected) {
+    std::shared_ptr<BlockParser> parser;
+    MakeColumnParser(chunk, &parser);
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto decoded, decoder_->Decode(parser));
+    AssertArraysEqual(*expected, *decoded);
+  }
+
+  void AssertChunkInvalid(std::vector<std::string> chunk) {
+    std::shared_ptr<BlockParser> parser;
+    MakeColumnParser(chunk, &parser);
+    ASSERT_FINISHES_AND_RAISES(Invalid, decoder_->Decode(parser));
+  }
 
   void AssertFetch(std::shared_ptr<Array> expected_chunk) {
-    ASSERT_OK_AND_ASSIGN(auto chunk, decoder_->NextChunk());
+    ASSERT_OK_AND_ASSIGN(auto chunk, NextChunk());
     ASSERT_NE(chunk, nullptr);
     AssertArraysEqual(*expected_chunk, *chunk);
   }
 
-  void AssertFetchInvalid() { ASSERT_RAISES(Invalid, decoder_->NextChunk()); }
-
-  void AssertFetchEOF() { ASSERT_OK_AND_EQ(nullptr, decoder_->NextChunk()); }
+  void AssertFetchInvalid() { ASSERT_RAISES(Invalid, NextChunk()); }
 
  protected:
-  std::shared_ptr<TaskGroup> tg_;
   std::shared_ptr<ColumnDecoder> decoder_;
-  int64_t num_chunks_;
+  std::vector<Future<std::shared_ptr<Array>>> decoded_chunks_;
+  int64_t num_chunks_ = 0;
+  int64_t read_ptr_ = 0;
 
   ConvertOptions default_options = ConvertOptions::Defaults();
 };
@@ -124,14 +135,13 @@ class ColumnDecoderTest : public ::testing::Test {
 //////////////////////////////////////////////////////////////////////////
 // Tests for null column decoder
 
-template <typename ExecutorType>
 class NullColumnDecoderTest : public ColumnDecoderTest {
  public:
-  NullColumnDecoderTest() { tg_ = ExecutorType::task_group(); }
+  NullColumnDecoderTest() {}
 
   void MakeDecoder(std::shared_ptr<DataType> type) {
     ASSERT_OK_AND_ASSIGN(auto decoder,
-                         ColumnDecoder::MakeNull(default_memory_pool(), type, tg_));
+                         ColumnDecoder::MakeNull(default_memory_pool(), type));
     SetDecoder(decoder);
   }
 
@@ -141,10 +151,8 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type);
 
     AppendChunks({{"1", "2", "3"}, {"4", "5"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, null, null]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    AssertFetchEOF();
 
     MakeDecoder(type);
 
@@ -153,8 +161,6 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
     AppendChunks({{"7", "8"}});
     AssertFetch(ArrayFromJSON(type, "[null]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    SetEOF();
-    AssertFetchEOF();
   }
 
   void TestOtherType() {
@@ -163,57 +169,40 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type);
 
     AppendChunks({{"1", "2", "3"}, {"4", "5"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, null, null]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestThreaded() {
+    constexpr int NITERS = 10;
     auto type = int32();
-
     MakeDecoder(type);
 
-    auto joiner = RunThread([&]() {
-      InsertChunk(1, {"4", "5"});
-      InsertChunk(0, {"1", "2", "3"});
-      InsertChunk(3, {"6"});
-      InsertChunk(2, {});
-      decoder_->SetEOF(4);
-    });
-
-    AssertFetch(ArrayFromJSON(type, "[null, null, null]"));
-    AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetch(ArrayFromJSON(type, "[null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          AssertChunk({"4", "5", std::to_string(thread_id)},
+                      ArrayFromJSON(type, "[null, null, null]"));
+        },
+        NITERS);
   }
-
- protected:
-  ExecutorType executor_;
 };
 
-TYPED_TEST_SUITE(NullColumnDecoderTest, ExecutorTypes);
-
-TYPED_TEST(NullColumnDecoderTest, NullType) { this->TestNullType(); }
+TEST_F(NullColumnDecoderTest, NullType) { this->TestNullType(); }
 
-TYPED_TEST(NullColumnDecoderTest, OtherType) { this->TestOtherType(); }
+TEST_F(NullColumnDecoderTest, OtherType) { this->TestOtherType(); }
 
-TYPED_TEST(NullColumnDecoderTest, Threaded) { this->TestThreaded(); }
+TEST_F(NullColumnDecoderTest, Threaded) { this->TestThreaded(); }
 
 //////////////////////////////////////////////////////////////////////////
 // Tests for fixed-type column decoder
 
-template <typename ExecutorType>
 class TypedColumnDecoderTest : public ColumnDecoderTest {
  public:
-  TypedColumnDecoderTest() { tg_ = ExecutorType::task_group(); }
+  TypedColumnDecoderTest() {}
 
   void MakeDecoder(const std::shared_ptr<DataType>& type, const ConvertOptions& options) {
-    ASSERT_OK_AND_ASSIGN(
-        auto decoder, ColumnDecoder::Make(default_memory_pool(), type, 0, options, tg_));
+    ASSERT_OK_AND_ASSIGN(auto decoder,
+                         ColumnDecoder::Make(default_memory_pool(), type, 0, options));
     SetDecoder(decoder);
   }
 
@@ -223,11 +212,8 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type, default_options);
 
     AppendChunks({{"123", "456", "-78"}, {"901", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, -78]"));
     AssertFetch(ArrayFromJSON(type, "[901, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
 
     MakeDecoder(type, default_options);
 
@@ -236,9 +222,6 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     AppendChunks({{"N/A", "N/A"}});
     AssertFetch(ArrayFromJSON(type, "[-987]"));
     AssertFetch(ArrayFromJSON(type, "[null, null]"));
-    SetEOF();
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestOptions() {
@@ -247,10 +230,7 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type, default_options);
 
     AppendChunks({{"true", "false", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[true, false, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
 
     // With non-default options
     auto options = default_options;
@@ -260,10 +240,7 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(type, options);
 
     AppendChunks({{"true", "false", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, true, false]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestErrors() {
@@ -273,56 +250,46 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
 
     AppendChunks({{"123", "456", "N/A"}, {"-901"}});
     AppendChunks({{"N/A", "1000"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, null]"));
     AssertFetchInvalid();
     AssertFetch(ArrayFromJSON(type, "[null, 1000]"));
-    AssertFetchEOF();
   }
 
   void TestThreaded() {
+    constexpr int NITERS = 10;
     auto type = uint32();
-
     MakeDecoder(type, default_options);
 
-    auto joiner = RunThread([&]() {
-      InsertChunk(1, {"4", "-5"});
-      InsertChunk(0, {"1", "2", "3"});
-      InsertChunk(3, {"6"});
-      InsertChunk(2, {});
-      decoder_->SetEOF(4);
-    });
-
-    AssertFetch(ArrayFromJSON(type, "[1, 2, 3]"));
-    AssertFetchInvalid();
-    AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetch(ArrayFromJSON(type, "[6]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          if (thread_id % 2 == 0) {
+            AssertChunkInvalid({"4", "-5"});
+          } else {
+            AssertChunk({"1", "2", "3"}, ArrayFromJSON(type, "[1, 2, 3]"));
+          }
+        },
+        NITERS);
   }
 };
 
-TYPED_TEST_SUITE(TypedColumnDecoderTest, ExecutorTypes);
+TEST_F(TypedColumnDecoderTest, Integers) { this->TestIntegers(); }
 
-TYPED_TEST(TypedColumnDecoderTest, Integers) { this->TestIntegers(); }
+TEST_F(TypedColumnDecoderTest, Options) { this->TestOptions(); }
 
-TYPED_TEST(TypedColumnDecoderTest, Options) { this->TestOptions(); }
+TEST_F(TypedColumnDecoderTest, Errors) { this->TestErrors(); }
 
-TYPED_TEST(TypedColumnDecoderTest, Errors) { this->TestErrors(); }
-
-TYPED_TEST(TypedColumnDecoderTest, Threaded) { this->TestThreaded(); }
+TEST_F(TypedColumnDecoderTest, Threaded) { this->TestThreaded(); }
 
 //////////////////////////////////////////////////////////////////////////
 // Tests for type-inferring column decoder
 
-template <typename ExecutorType>
 class InferringColumnDecoderTest : public ColumnDecoderTest {
  public:
-  InferringColumnDecoderTest() { tg_ = ExecutorType::task_group(); }
+  InferringColumnDecoderTest() {}
 
   void MakeDecoder(const ConvertOptions& options) {
     ASSERT_OK_AND_ASSIGN(auto decoder,
-                         ColumnDecoder::Make(default_memory_pool(), 0, options, tg_));
+                         ColumnDecoder::Make(default_memory_pool(), 0, options));
     SetDecoder(decoder);
   }
 
@@ -332,35 +299,37 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(default_options);
 
     AppendChunks({{"123", "456", "-78"}, {"901", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, -78]"));
     AssertFetch(ArrayFromJSON(type, "[901, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestThreaded() {
+    constexpr int NITERS = 10;
     auto type = float64();
-
     MakeDecoder(default_options);
 
-    auto joiner = RunThread([&]() {
-      SleepFor(1e-3);
-      InsertChunk(0, {"1.5", "2", "3"});
-      InsertChunk(3, {"6"});
-      decoder_->SetEOF(4);
-    });
-
-    // These chunks will wait for inference to run on chunk 0
-    InsertChunk(1, {"4", "-5", "N/A"});
-    InsertChunk(2, {});
-
-    AssertFetch(ArrayFromJSON(type, "[1.5, 2, 3]"));
-    AssertFetch(ArrayFromJSON(type, "[4, -5, null]"));
-    AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetch(ArrayFromJSON(type, "[6]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
+    // One of these will do the inference so we need to make sure they all have floating
+    // point
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          if (thread_id % 2 == 0) {
+            AssertChunk({"6.3", "7.2"}, ArrayFromJSON(type, "[6.3, 7.2]"));
+          } else {
+            AssertChunk({"1.1", "2", "3"}, ArrayFromJSON(type, "[1.1, 2, 3]"));
+          }
+        },
+        NITERS);
+
+    // These will run after the inference
+    RunThreadsAndJoin(
+        [&](int thread_id) {
+          if (thread_id % 2 == 0) {
+            AssertChunk({"1", "2"}, ArrayFromJSON(type, "[1, 2]"));
+          } else {
+            AssertChunkInvalid({"xyz"});
+          }
+        },
+        NITERS);
   }
 
   void TestOptions() {
@@ -373,11 +342,8 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(options);
 
     AppendChunks({{"true", "false", "N/A"}, {"true"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[null, true, false]"));
     AssertFetch(ArrayFromJSON(type, "[null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestErrors() {
@@ -387,12 +353,9 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
 
     AppendChunks({{"123", "456", "-78"}, {"9.5", "N/A"}});
     AppendChunks({{"1000", "N/A"}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[123, 456, -78]"));
     AssertFetchInvalid();
     AssertFetch(ArrayFromJSON(type, "[1000, null]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 
   void TestEmpty() {
@@ -401,25 +364,20 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
     MakeDecoder(default_options);
 
     AppendChunks({{}, {}});
-    SetEOF();
     AssertFetch(ArrayFromJSON(type, "[]"));
     AssertFetch(ArrayFromJSON(type, "[]"));
-    AssertFetchEOF();
-    AssertFetchEOF();
   }
 };
 
-TYPED_TEST_SUITE(InferringColumnDecoderTest, ExecutorTypes);
-
-TYPED_TEST(InferringColumnDecoderTest, Integers) { this->TestIntegers(); }
+TEST_F(InferringColumnDecoderTest, Integers) { this->TestIntegers(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Threaded) { this->TestThreaded(); }
+TEST_F(InferringColumnDecoderTest, Threaded) { this->TestThreaded(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Options) { this->TestOptions(); }
+TEST_F(InferringColumnDecoderTest, Options) { this->TestOptions(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Errors) { this->TestErrors(); }
+TEST_F(InferringColumnDecoderTest, Errors) { this->TestErrors(); }
 
-TYPED_TEST(InferringColumnDecoderTest, Empty) { this->TestEmpty(); }
+TEST_F(InferringColumnDecoderTest, Empty) { this->TestEmpty(); }
 
 // More inference tests are in InferringColumnBuilderTest
 
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index d57a2f15667..11437297b80 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -50,6 +50,7 @@
 #include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
 #include "arrow/util/utf8.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 namespace csv {
@@ -349,6 +350,186 @@ class ThreadedBlockReader : public BlockReader {
   }
 };
 
+struct ParsedBlock {
+  std::shared_ptr<BlockParser> parser;
+  int64_t block_index;
+  int64_t bytes_parsed_or_skipped;
+};
+
+struct DecodedBlock {
+  std::shared_ptr<RecordBatch> record_batch;
+  // Represents the number of input bytes represented by this batch
+  // This will include bytes skipped when skipping rows after the header
+  int64_t bytes_processed;
+};
+
+}  // namespace
+
+}  // namespace csv
+
+template <>
+struct IterationTraits<csv::ParsedBlock> {
+  static csv::ParsedBlock End() { return csv::ParsedBlock{nullptr, -1, -1}; }
+  static bool IsEnd(const csv::ParsedBlock& val) { return val.block_index < 0; }
+};
+
+template <>
+struct IterationTraits<csv::DecodedBlock> {
+  static csv::DecodedBlock End() { return csv::DecodedBlock{nullptr, -1}; }
+  static bool IsEnd(const csv::DecodedBlock& val) { return val.bytes_processed < 0; }
+};
+
+namespace csv {
+namespace {
+
+// A function object that takes in a buffer of CSV data and returns a parsed batch of CSV
+// data (CSVBlock -> ParsedBlock) for use with MakeMappedGenerator.
+// The parsed batch contains a list of offsets for each of the columns so that columns
+// can be individually scanned
+//
+// This operator is not re-entrant
+class BlockParsingOperator {
+ public:
+  BlockParsingOperator(io::IOContext io_context, ParseOptions parse_options,
+                       int num_csv_cols, bool count_rows)
+      : io_context_(io_context),
+        parse_options_(parse_options),
+        num_csv_cols_(num_csv_cols),
+        count_rows_(count_rows) {}
+
+  Result<ParsedBlock> operator()(const CSVBlock& block) {
+    constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
+    auto parser = std::make_shared<BlockParser>(
+        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows);
+
+    std::shared_ptr<Buffer> straddling;
+    std::vector<util::string_view> views;
+    if (block.partial->size() != 0 || block.completion->size() != 0) {
+      if (block.partial->size() == 0) {
+        straddling = block.completion;
+      } else if (block.completion->size() == 0) {
+        straddling = block.partial;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(
+            straddling,
+            ConcatenateBuffers({block.partial, block.completion}, io_context_.pool()));
+      }
+      views = {util::string_view(*straddling), util::string_view(*block.buffer)};
+    } else {
+      views = {util::string_view(*block.buffer)};
+    }
+    uint32_t parsed_size;
+    if (block.is_final) {
+      RETURN_NOT_OK(parser->ParseFinal(views, &parsed_size));
+    } else {
+      RETURN_NOT_OK(parser->Parse(views, &parsed_size));
+    }
+    if (count_rows_) {
+      num_rows_seen_ += parser->num_rows();
+    }
+    RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    return ParsedBlock{std::move(parser), block.block_index,
+                       static_cast<int64_t>(parsed_size) + block.bytes_skipped};
+  }
+
+ private:
+  io::IOContext io_context_;
+  ParseOptions parse_options_;
+  int num_csv_cols_;
+  bool count_rows_;
+  int num_rows_seen_ = 0;
+};
+
+// A function object that takes in parsed batch of CSV data and decodes it to an arrow
+// record batch (ParsedBlock -> DecodedBlock) for use with MakeMappedGenerator.
+class BlockDecodingOperator {
+ public:
+  Future<DecodedBlock> operator()(const ParsedBlock& block) {
+    DCHECK(!state_->column_decoders.empty());
+    std::vector<Future<std::shared_ptr<Array>>> decoded_array_futs;
+    for (auto& decoder : state_->column_decoders) {
+      decoded_array_futs.push_back(decoder->Decode(block.parser));
+    }
+    auto bytes_parsed_or_skipped = block.bytes_parsed_or_skipped;
+    auto decoded_arrays_fut = All(std::move(decoded_array_futs));
+    auto state = state_;
+    return decoded_arrays_fut.Then(
+        [state, bytes_parsed_or_skipped](
+            const std::vector<Result<std::shared_ptr<Array>>>& maybe_decoded_arrays)
+            -> Result<DecodedBlock> {
+          ARROW_ASSIGN_OR_RAISE(auto decoded_arrays,
+                                internal::UnwrapOrRaise(maybe_decoded_arrays));
+
+          ARROW_ASSIGN_OR_RAISE(auto batch,
+                                state->DecodedArraysToBatch(std::move(decoded_arrays)));
+          return DecodedBlock{std::move(batch), bytes_parsed_or_skipped};
+        });
+  }
+
+  static Result<BlockDecodingOperator> Make(io::IOContext io_context,
+                                            ConvertOptions convert_options,
+                                            ConversionSchema conversion_schema) {
+    BlockDecodingOperator op(std::move(io_context), std::move(convert_options),
+                             std::move(conversion_schema));
+    RETURN_NOT_OK(op.state_->MakeColumnDecoders(io_context));
+    return op;
+  }
+
+ private:
+  BlockDecodingOperator(io::IOContext io_context, ConvertOptions convert_options,
+                        ConversionSchema conversion_schema)
+      : state_(std::make_shared<State>(std::move(io_context), std::move(convert_options),
+                                       std::move(conversion_schema))) {}
+
+  struct State {
+    State(io::IOContext io_context, ConvertOptions convert_options,
+          ConversionSchema conversion_schema)
+        : convert_options(std::move(convert_options)),
+          conversion_schema(std::move(conversion_schema)) {}
+
+    Result<std::shared_ptr<RecordBatch>> DecodedArraysToBatch(
+        std::vector<std::shared_ptr<Array>> arrays) {
+      if (schema == nullptr) {
+        FieldVector fields(arrays.size());
+        for (size_t i = 0; i < arrays.size(); ++i) {
+          fields[i] = field(conversion_schema.columns[i].name, arrays[i]->type());
+        }
+        schema = arrow::schema(std::move(fields));
+      }
+      const auto n_rows = arrays[0]->length();
+      return RecordBatch::Make(schema, n_rows, std::move(arrays));
+    }
+
+    // Make column decoders from conversion schema
+    Status MakeColumnDecoders(io::IOContext io_context) {
+      for (const auto& column : conversion_schema.columns) {
+        std::shared_ptr<ColumnDecoder> decoder;
+        if (column.is_missing) {
+          ARROW_ASSIGN_OR_RAISE(decoder,
+                                ColumnDecoder::MakeNull(io_context.pool(), column.type));
+        } else if (column.type != nullptr) {
+          ARROW_ASSIGN_OR_RAISE(
+              decoder, ColumnDecoder::Make(io_context.pool(), column.type, column.index,
+                                           convert_options));
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              decoder,
+              ColumnDecoder::Make(io_context.pool(), column.index, convert_options));
+        }
+        column_decoders.push_back(std::move(decoder));
+      }
+      return Status::OK();
+    }
+
+    ConvertOptions convert_options;
+    ConversionSchema conversion_schema;
+    std::vector<std::shared_ptr<ColumnDecoder>> column_decoders;
+    std::shared_ptr<Schema> schema;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
 /////////////////////////////////////////////////////////////////////////
 // Base class for common functionality
 
@@ -367,8 +548,9 @@ class ReaderMixin {
 
  protected:
   // Read header and column names from buffer, create column builders
-  Status ProcessHeader(const std::shared_ptr<Buffer>& buf,
-                       std::shared_ptr<Buffer>* rest) {
+  // Returns the # of bytes consumed
+  Result<int64_t> ProcessHeader(const std::shared_ptr<Buffer>& buf,
+                                std::shared_ptr<Buffer>* rest) {
     const uint8_t* data = buf->data();
     const auto data_end = data + buf->size();
     DCHECK_GT(data_end - data, 0);
@@ -430,12 +612,14 @@ class ReaderMixin {
       num_rows_seen_ += read_options_.skip_rows_after_names;
     }
 
-    *rest = SliceBuffer(buf, data - buf->data());
+    auto bytes_consumed = data - buf->data();
+    *rest = SliceBuffer(buf, bytes_consumed);
 
     num_csv_cols_ = static_cast<int32_t>(column_names_.size());
     DCHECK_GT(num_csv_cols_, 0);
 
-    return MakeConversionSchema();
+    RETURN_NOT_OK(MakeConversionSchema());
+    return bytes_consumed;
   }
 
   std::vector<std::string> GenerateColumnNames(int32_t num_cols) {
@@ -642,123 +826,18 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
 /////////////////////////////////////////////////////////////////////////
 // Base class for streaming readers
 
-class BaseStreamingReader : public ReaderMixin, public csv::StreamingReader {
+class StreamingReaderImpl : public ReaderMixin,
+                            public csv::StreamingReader,
+                            public std::enable_shared_from_this<StreamingReaderImpl> {
  public:
-  BaseStreamingReader(io::IOContext io_context, Executor* cpu_executor,
-                      std::shared_ptr<io::InputStream> input,
+  StreamingReaderImpl(io::IOContext io_context, std::shared_ptr<io::InputStream> input,
                       const ReadOptions& read_options, const ParseOptions& parse_options,
                       const ConvertOptions& convert_options, bool count_rows)
       : ReaderMixin(io_context, std::move(input), read_options, parse_options,
                     convert_options, count_rows),
-        cpu_executor_(cpu_executor) {}
-
-  virtual Future<std::shared_ptr<csv::StreamingReader>> Init() = 0;
-
-  std::shared_ptr<Schema> schema() const override { return schema_; }
-
-  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
-    auto next_fut = ReadNextAsync();
-    auto next_result = next_fut.result();
-    return std::move(next_result).Value(batch);
-  }
-
- protected:
-  // Make column decoders from conversion schema
-  Status MakeColumnDecoders() {
-    for (const auto& column : conversion_schema_.columns) {
-      std::shared_ptr<ColumnDecoder> decoder;
-      if (column.is_missing) {
-        ARROW_ASSIGN_OR_RAISE(decoder, ColumnDecoder::MakeNull(io_context_.pool(),
-                                                               column.type, task_group_));
-      } else if (column.type != nullptr) {
-        ARROW_ASSIGN_OR_RAISE(
-            decoder, ColumnDecoder::Make(io_context_.pool(), column.type, column.index,
-                                         convert_options_, task_group_));
-      } else {
-        ARROW_ASSIGN_OR_RAISE(decoder,
-                              ColumnDecoder::Make(io_context_.pool(), column.index,
-                                                  convert_options_, task_group_));
-      }
-      column_decoders_.push_back(std::move(decoder));
-    }
-    return Status::OK();
-  }
-
-  Result<int64_t> ParseAndInsert(const std::shared_ptr<Buffer>& partial,
-                                 const std::shared_ptr<Buffer>& completion,
-                                 const std::shared_ptr<Buffer>& block,
-                                 int64_t block_index, bool is_final) {
-    ARROW_ASSIGN_OR_RAISE(auto result,
-                          Parse(partial, completion, block, block_index, is_final));
-    RETURN_NOT_OK(ProcessData(result.parser, block_index));
-    return result.parsed_bytes;
-  }
-
-  // Trigger conversion of parsed block data
-  Status ProcessData(const std::shared_ptr<BlockParser>& parser, int64_t block_index) {
-    for (auto& decoder : column_decoders_) {
-      decoder->Insert(block_index, parser);
-    }
-    return Status::OK();
-  }
-
-  Result<std::shared_ptr<RecordBatch>> DecodeNextBatch() {
-    DCHECK(!column_decoders_.empty());
-    ArrayVector arrays;
-    arrays.reserve(column_decoders_.size());
-    Status st;
-    for (auto& decoder : column_decoders_) {
-      auto maybe_array = decoder->NextChunk();
-      if (!maybe_array.ok()) {
-        // If there's an error, still fetch results from other decoders to
-        // keep them in sync.
-        st &= maybe_array.status();
-      } else {
-        arrays.push_back(*std::move(maybe_array));
-      }
-    }
-    RETURN_NOT_OK(st);
-    DCHECK_EQ(arrays.size(), column_decoders_.size());
-    const bool is_null = (arrays[0] == nullptr);
-#ifndef NDEBUG
-    for (const auto& array : arrays) {
-      DCHECK_EQ(array == nullptr, is_null);
-    }
-#endif
-    if (is_null) {
-      eof_ = true;
-      return nullptr;
-    }
-
-    if (schema_ == nullptr) {
-      FieldVector fields(arrays.size());
-      for (size_t i = 0; i < arrays.size(); ++i) {
-        fields[i] = field(conversion_schema_.columns[i].name, arrays[i]->type());
-      }
-      schema_ = arrow::schema(std::move(fields));
-    }
-    const auto n_rows = arrays[0]->length();
-    return RecordBatch::Make(schema_, n_rows, std::move(arrays));
-  }
+        bytes_decoded_(std::make_shared<std::atomic<int64_t>>(0)) {}
 
-  // Column decoders (in ConversionSchema order)
-  std::vector<std::shared_ptr<ColumnDecoder>> column_decoders_;
-  std::shared_ptr<Schema> schema_;
-  std::shared_ptr<RecordBatch> pending_batch_;
-  AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator_;
-  Executor* cpu_executor_;
-  bool eof_ = false;
-};
-
-/////////////////////////////////////////////////////////////////////////
-// Serial StreamingReader implementation
-
-class SerialStreamingReader : public BaseStreamingReader,
-                              public std::enable_shared_from_this<SerialStreamingReader> {
- public:
-  using BaseStreamingReader::BaseStreamingReader;
-
-  Future<std::shared_ptr<csv::StreamingReader>> Init() override {
+  Future<> Init(Executor* cpu_executor) {
     ARROW_ASSIGN_OR_RAISE(auto istream_it,
                           io::MakeInputStreamIterator(input_, read_options_.block_size));
 
@@ -766,139 +845,103 @@ class SerialStreamingReader : public BaseStreamingReader,
     ARROW_ASSIGN_OR_RAISE(auto bg_it, MakeBackgroundGenerator(std::move(istream_it),
                                                               io_context_.executor()));
 
-    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor_);
+    auto transferred_it = MakeTransferredGenerator(bg_it, cpu_executor);
 
-    buffer_generator_ = CSVBufferIterator::MakeAsync(std::move(transferred_it));
-    task_group_ = internal::TaskGroup::MakeSerial(io_context_.stop_token());
+    auto buffer_generator = CSVBufferIterator::MakeAsync(std::move(transferred_it));
 
+    int max_readahead = cpu_executor->GetCapacity();
     auto self = shared_from_this();
-    // Read schema from first batch
-    return ReadNextAsync(true).Then(
-        [self](const std::shared_ptr<RecordBatch>& first_batch)
-            -> Result<std::shared_ptr<csv::StreamingReader>> {
-          self->pending_batch_ = first_batch;
-          DCHECK_NE(self->schema_, nullptr);
-          return self;
-        });
-  }
 
-  Result<std::shared_ptr<RecordBatch>> DecodeBatchAndUpdateSchema() {
-    auto maybe_batch = DecodeNextBatch();
-    if (schema_ == nullptr && maybe_batch.ok()) {
-      schema_ = (*maybe_batch)->schema();
-    }
-    return maybe_batch;
+    return buffer_generator().Then([self, buffer_generator, max_readahead](
+                                       const std::shared_ptr<Buffer>& first_buffer) {
+      return self->InitAfterFirstBuffer(first_buffer, buffer_generator, max_readahead);
+    });
   }
 
-  Future<std::shared_ptr<RecordBatch>> DoReadNext(
-      std::shared_ptr<SerialStreamingReader> self) {
-    auto batch = std::move(pending_batch_);
-    if (batch != nullptr) {
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
-    }
+  std::shared_ptr<Schema> schema() const override { return schema_; }
 
-    if (!source_eof_) {
-      return block_generator_()
-          .Then([self](const CSVBlock& maybe_block) -> Status {
-            if (!IsIterationEnd(maybe_block)) {
-              self->bytes_parsed_ += maybe_block.bytes_skipped;
-              self->last_block_index_ = maybe_block.block_index;
-              auto maybe_parsed = self->ParseAndInsert(
-                  maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                  maybe_block.block_index, maybe_block.is_final);
-              if (!maybe_parsed.ok()) {
-                // Parse error => bail out
-                self->eof_ = true;
-                return maybe_parsed.status();
-              }
-              self->bytes_parsed_ += *maybe_parsed;
-              RETURN_NOT_OK(maybe_block.consume_bytes(*maybe_parsed));
-            } else {
-              self->source_eof_ = true;
-              for (auto& decoder : self->column_decoders_) {
-                decoder->SetEOF(self->last_block_index_ + 1);
-              }
-            }
-            return Status::OK();
-          })
-          .Then([self]() -> Result<std::shared_ptr<RecordBatch>> {
-            return self->DecodeBatchAndUpdateSchema();
-          });
-    }
-    return Future<std::shared_ptr<RecordBatch>>::MakeFinished(
-        DecodeBatchAndUpdateSchema());
-  }
+  int64_t bytes_read() const override { return bytes_decoded_->load(); }
 
-  Future<std::shared_ptr<RecordBatch>> ReadNextSkippingEmpty(
-      std::shared_ptr<SerialStreamingReader> self, bool internal_read) {
-    return DoReadNext(self).Then(
-        [self, internal_read](const std::shared_ptr<RecordBatch>& batch) {
-          if (batch != nullptr && batch->num_rows() == 0) {
-            return self->ReadNextSkippingEmpty(self, internal_read);
-          }
-          if (!internal_read) {
-            self->bytes_decoded_ += self->bytes_parsed_;
-            self->bytes_parsed_ = 0;
-          }
-          return Future<std::shared_ptr<RecordBatch>>::MakeFinished(batch);
-        });
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    auto next_fut = ReadNextAsync();
+    auto next_result = next_fut.result();
+    return std::move(next_result).Value(batch);
   }
 
   Future<std::shared_ptr<RecordBatch>> ReadNextAsync() override {
-    return ReadNextAsync(false);
-  };
-
-  int64_t bytes_read() const override { return bytes_decoded_; }
+    return record_batch_gen_();
+  }
 
  protected:
-  Future<> SetupReader(std::shared_ptr<SerialStreamingReader> self) {
-    return buffer_generator_().Then([self](const std::shared_ptr<Buffer>& first_buffer) {
-      if (first_buffer == nullptr) {
-        return Status::Invalid("Empty CSV file");
-      }
-      auto own_first_buffer = first_buffer;
-      auto start = own_first_buffer->data();
-      RETURN_NOT_OK(self->ProcessHeader(own_first_buffer, &own_first_buffer));
-      self->bytes_decoded_ = own_first_buffer->data() - start;
-      RETURN_NOT_OK(self->MakeColumnDecoders());
-
-      self->block_generator_ = SerialBlockReader::MakeAsyncIterator(
-          std::move(self->buffer_generator_), MakeChunker(self->parse_options_),
-          std::move(own_first_buffer), self->read_options_.skip_rows_after_names);
-      return Status::OK();
+  Future<> InitAfterFirstBuffer(const std::shared_ptr<Buffer>& first_buffer,
+                                AsyncGenerator<std::shared_ptr<Buffer>> buffer_generator,
+                                int max_readahead) {
+    if (first_buffer == nullptr) {
+      return Status::Invalid("Empty CSV file");
+    }
+
+    std::shared_ptr<Buffer> after_header;
+    ARROW_ASSIGN_OR_RAISE(auto header_bytes_consumed,
+                          ProcessHeader(first_buffer, &after_header));
+    bytes_decoded_->fetch_add(header_bytes_consumed);
+
+    auto parser_op =
+        BlockParsingOperator(io_context_, parse_options_, num_csv_cols_, count_rows_);
+    ARROW_ASSIGN_OR_RAISE(
+        auto decoder_op,
+        BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
+
+    auto block_gen = SerialBlockReader::MakeAsyncIterator(
+        std::move(buffer_generator), MakeChunker(parse_options_), std::move(after_header),
+        read_options_.skip_rows_after_names);
+    auto parsed_block_gen =
+        MakeMappedGenerator(std::move(block_gen), std::move(parser_op));
+    auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op));
+
+    auto self = shared_from_this();
+    return rb_gen().Then([self, rb_gen, max_readahead](const DecodedBlock& first_block) {
+      return self->InitAfterFirstBatch(first_block, std::move(rb_gen), max_readahead);
     });
   }
 
-  Future<std::shared_ptr<RecordBatch>> ReadNextAsync(bool internal_read) {
-    if (eof_) {
-      return Future<std::shared_ptr<RecordBatch>>::MakeFinished(nullptr);
-    }
-    if (io_context_.stop_token().IsStopRequested()) {
-      eof_ = true;
-      return io_context_.stop_token().Poll();
+  Status InitAfterFirstBatch(const DecodedBlock& first_block,
+                             AsyncGenerator<DecodedBlock> batch_gen, int max_readahead) {
+    schema_ = first_block.record_batch->schema();
+
+    AsyncGenerator<DecodedBlock> readahead_gen;
+    if (read_options_.use_threads) {
+      readahead_gen = MakeReadaheadGenerator(std::move(batch_gen), max_readahead);
+    } else {
+      readahead_gen = std::move(batch_gen);
     }
-    auto self = shared_from_this();
-    if (!block_generator_) {
-      return SetupReader(self).Then(
-          [self, internal_read]() -> Future<std::shared_ptr<RecordBatch>> {
-            return self->ReadNextSkippingEmpty(self, internal_read);
-          },
-          [self](const Status& err) -> Result<std::shared_ptr<RecordBatch>> {
-            self->eof_ = true;
-            return err;
-          });
+
+    AsyncGenerator<DecodedBlock> restarted_gen;
+    // Streaming reader should not emit empty record batches
+    if (first_block.record_batch->num_rows() > 0) {
+      restarted_gen = MakeGeneratorStartsWith({first_block}, std::move(readahead_gen));
     } else {
-      return self->ReadNextSkippingEmpty(self, internal_read);
+      restarted_gen = std::move(readahead_gen);
     }
+
+    auto bytes_decoded = bytes_decoded_;
+    auto unwrap_and_record_bytes =
+        [bytes_decoded](
+            const DecodedBlock& block) -> Result<std::shared_ptr<RecordBatch>> {
+      bytes_decoded->fetch_add(block.bytes_processed);
+      return block.record_batch;
+    };
+
+    auto unwrapped =
+        MakeMappedGenerator(std::move(restarted_gen), std::move(unwrap_and_record_bytes));
+
+    record_batch_gen_ = MakeCancellable(std::move(unwrapped), io_context_.stop_token());
+    return Status::OK();
   }
 
-  bool source_eof_ = false;
-  int64_t last_block_index_ = 0;
-  AsyncGenerator<CSVBlock> block_generator_;
-  // bytes of data parsed but not yet decoded
-  int64_t bytes_parsed_ = 0;
-  // bytes which have been decoded for caller
-  int64_t bytes_decoded_ = 0;
+  std::shared_ptr<Schema> schema_;
+  AsyncGenerator<std::shared_ptr<RecordBatch>> record_batch_gen_;
+  // bytes which have been decoded and asked for by the caller
+  std::shared_ptr<std::atomic<int64_t>> bytes_decoded_;
 };
 
 /////////////////////////////////////////////////////////////////////////
@@ -1089,11 +1132,13 @@ Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
   RETURN_NOT_OK(parse_options.Validate());
   RETURN_NOT_OK(read_options.Validate());
   RETURN_NOT_OK(convert_options.Validate());
-  std::shared_ptr<BaseStreamingReader> reader;
-  reader = std::make_shared<SerialStreamingReader>(
-      io_context, cpu_executor, input, read_options, parse_options, convert_options,
-      /*count_rows=*/true);
-  return reader->Init();
+  std::shared_ptr<StreamingReaderImpl> reader;
+  reader = std::make_shared<StreamingReaderImpl>(io_context, input, read_options,
+                                                 parse_options, convert_options,
+                                                 /*count_rows=*/true);
+  return reader->Init(cpu_executor).Then([reader] {
+    return std::dynamic_pointer_cast<StreamingReader>(reader);
+  });
 }
 
 /////////////////////////////////////////////////////////////////////////
@@ -1139,8 +1184,9 @@ class CSVRowCounter : public ReaderMixin,
   }
 
   Future<int64_t> DoCount(const std::shared_ptr<CSVRowCounter>& self) {
-    // We must return a value instead of Status/Future<> to work with MakeMappedGenerator,
-    // and we must use a type with a valid end value to work with IterationEnd.
+    // count_cb must return a value instead of Status/Future<> to work with
+    // MakeMappedGenerator, and it must use a type with a valid end value to work with
+    // IterationEnd.
     std::function<Result<util::optional<int64_t>>(const CSVBlock&)> count_cb =
         [self](const CSVBlock& maybe_block) -> Result<util::optional<int64_t>> {
       ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 1ab49fa8664..88ead7677f3 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -67,6 +67,38 @@ class StreamingReaderAsTableReader : public TableReader {
 
 using TableReaderFactory =
     std::function<Result<std::shared_ptr<TableReader>>(std::shared_ptr<io::InputStream>)>;
+using StreamingReaderFactory = std::function<Result<std::shared_ptr<StreamingReader>>(
+    std::shared_ptr<io::InputStream>)>;
+
+void TestEmptyTable(TableReaderFactory reader_factory) {
+  auto empty_buffer = std::make_shared<Buffer>("");
+  auto empty_input = std::make_shared<io::BufferReader>(empty_buffer);
+  auto maybe_reader = reader_factory(empty_input);
+  // Streaming reader fails on open, table readers fail on first read
+  if (maybe_reader.ok()) {
+    ASSERT_FINISHES_AND_RAISES(Invalid, (*maybe_reader)->ReadAsync());
+  } else {
+    ASSERT_TRUE(maybe_reader.status().IsInvalid());
+  }
+}
+
+void TestHeaderOnly(TableReaderFactory reader_factory) {
+  auto header_only_buffer = std::make_shared<Buffer>("a,b,c\n");
+  auto input = std::make_shared<io::BufferReader>(header_only_buffer);
+  ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
+  ASSERT_FINISHES_OK_AND_ASSIGN(auto table, reader->ReadAsync());
+  ASSERT_EQ(table->schema()->num_fields(), 3);
+  ASSERT_EQ(table->num_rows(), 0);
+}
+
+void TestHeaderOnlyStreaming(StreamingReaderFactory reader_factory) {
+  auto header_only_buffer = std::make_shared<Buffer>("a,b,c\n");
+  auto input = std::make_shared<io::BufferReader>(header_only_buffer);
+  ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
+  std::shared_ptr<RecordBatch> next_batch;
+  ASSERT_OK(reader->ReadNext(&next_batch));
+  ASSERT_EQ(next_batch, nullptr);
+}
 
 void StressTableReader(TableReaderFactory reader_factory) {
 #ifdef ARROW_VALGRIND
@@ -151,6 +183,8 @@ TableReaderFactory MakeSerialFactory() {
   };
 }
 
+TEST(SerialReaderTests, Empty) { TestEmptyTable(MakeSerialFactory()); }
+TEST(SerialReaderTests, HeaderOnly) { TestHeaderOnly(MakeSerialFactory()); }
 TEST(SerialReaderTests, Stress) { StressTableReader(MakeSerialFactory()); }
 TEST(SerialReaderTests, StressInvalid) { StressInvalidTableReader(MakeSerialFactory()); }
 TEST(SerialReaderTests, NestedParallelism) {
@@ -175,6 +209,14 @@ Result<TableReaderFactory> MakeAsyncFactory(
   };
 }
 
+TEST(AsyncReaderTests, Empty) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
+  TestEmptyTable(table_factory);
+}
+TEST(AsyncReaderTests, HeaderOnly) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
+  TestHeaderOnly(table_factory);
+}
 TEST(AsyncReaderTests, Stress) {
   ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
   StressTableReader(table_factory);
@@ -194,6 +236,7 @@ Result<TableReaderFactory> MakeStreamingFactory() {
              -> Result<std::shared_ptr<TableReader>> {
     auto read_options = ReadOptions::Defaults();
     read_options.block_size = 1 << 10;
+    read_options.use_threads = true;
     ARROW_ASSIGN_OR_RAISE(
         auto streaming_reader,
         StreamingReader::Make(io::default_io_context(), input_stream, read_options,
@@ -202,6 +245,25 @@ Result<TableReaderFactory> MakeStreamingFactory() {
   };
 }
 
+Result<StreamingReaderFactory> MakeStreamingReaderFactory() {
+  return [](std::shared_ptr<io::InputStream> input_stream)
+             -> Result<std::shared_ptr<StreamingReader>> {
+    auto read_options = ReadOptions::Defaults();
+    read_options.block_size = 1 << 10;
+    read_options.use_threads = true;
+    return StreamingReader::Make(io::default_io_context(), input_stream, read_options,
+                                 ParseOptions::Defaults(), ConvertOptions::Defaults());
+  };
+}
+
+TEST(StreamingReaderTests, Empty) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
+  TestEmptyTable(table_factory);
+}
+TEST(StreamingReaderTests, HeaderOnly) {
+  ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingReaderFactory());
+  TestHeaderOnlyStreaming(table_factory);
+}
 TEST(StreamingReaderTests, Stress) {
   ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
   StressTableReader(table_factory);
@@ -227,18 +289,20 @@ TEST(StreamingReaderTest, BytesRead) {
 
     auto read_options = ReadOptions::Defaults();
     read_options.block_size = 20;
+    read_options.use_threads = false;
     ASSERT_OK_AND_ASSIGN(
         auto streaming_reader,
         StreamingReader::Make(io::default_io_context(), input, read_options,
                               ParseOptions::Defaults(), ConvertOptions::Defaults()));
     std::shared_ptr<RecordBatch> batch;
-    int64_t bytes = 6;  // Size of header
+    int64_t bytes = 6;  // Size of header (counted during StreamingReader::Make)
     do {
       ASSERT_EQ(bytes, streaming_reader->bytes_read());
       ASSERT_OK(streaming_reader->ReadNext(&batch));
       bytes += 12;  // Add size of each row
-    } while (batch);
+    } while (bytes <= 42);
     ASSERT_EQ(42, streaming_reader->bytes_read());
+    ASSERT_EQ(batch.get(), nullptr);
   }
 
   // Interaction of skip_rows and bytes_read()
@@ -246,13 +310,18 @@ TEST(StreamingReaderTest, BytesRead) {
     auto input = std::make_shared<io::BufferReader>(table_buffer);
 
     auto read_options = ReadOptions::Defaults();
-    read_options.skip_rows = 2;
+    read_options.skip_rows = 1;
+    read_options.block_size = 32;
     ASSERT_OK_AND_ASSIGN(
         auto streaming_reader,
         StreamingReader::Make(io::default_io_context(), input, read_options,
                               ParseOptions::Defaults(), ConvertOptions::Defaults()));
     std::shared_ptr<RecordBatch> batch;
-    // first two rows and third row as header
+    // The header (6 bytes) and first skipped row (12 bytes) are counted during
+    // StreamingReader::Make
+    ASSERT_EQ(18, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
     ASSERT_EQ(30, streaming_reader->bytes_read());
     ASSERT_OK(streaming_reader->ReadNext(&batch));
     ASSERT_NE(batch.get(), nullptr);
@@ -266,7 +335,8 @@ TEST(StreamingReaderTest, BytesRead) {
     auto input = std::make_shared<io::BufferReader>(table_buffer);
 
     auto read_options = ReadOptions::Defaults();
-    read_options.skip_rows_after_names = 2;
+    read_options.block_size = 32;
+    read_options.skip_rows_after_names = 1;
 
     ASSERT_OK_AND_ASSIGN(
         auto streaming_reader,
@@ -274,10 +344,14 @@ TEST(StreamingReaderTest, BytesRead) {
                               ParseOptions::Defaults(), ConvertOptions::Defaults()));
     std::shared_ptr<RecordBatch> batch;
 
-    // Just header
+    // The header is read as part of StreamingReader::Make
     ASSERT_EQ(6, streaming_reader->bytes_read());
     ASSERT_OK(streaming_reader->ReadNext(&batch));
     ASSERT_NE(batch.get(), nullptr);
+    // Next the skipped batch (12 bytes) and 1 row (12 bytes)
+    ASSERT_EQ(30, streaming_reader->bytes_read());
+    ASSERT_OK(streaming_reader->ReadNext(&batch));
+    ASSERT_NE(batch.get(), nullptr);
     ASSERT_EQ(42, streaming_reader->bytes_read());
     ASSERT_OK(streaming_reader->ReadNext(&batch));
     ASSERT_EQ(batch.get(), nullptr);
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index c2aad6cd680..8992e7bcac2 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -907,6 +907,8 @@ AsyncGenerator<T> MakeVectorGenerator(std::vector<T> vec) {
   return [state]() {
     auto idx = state->vec_idx.fetch_add(1);
     if (idx >= state->vec.size()) {
+      // Eagerly return memory
+      state->vec.clear();
       return AsyncGeneratorEnd<T>();
     }
     return Future<T>::MakeFinished(state->vec[idx]);
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 050342de747..e4a4e3e5935 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -1250,10 +1250,16 @@ def read_csv(self, *args, validate_full=True, **kwargs):
         return table
 
 
-class BaseTestStreamingCSVRead:
+@pytest.mark.parametrize('use_threads', [False, True])
+class TestStreamingCSVRead:
 
-    def open_bytes(self, b, **kwargs):
-        return self.open_csv(pa.py_buffer(b), **kwargs)
+    def open_bytes(self, b, use_threads, **kwargs):
+        return self.open_csv(pa.py_buffer(b), use_threads, **kwargs)
+
+    def open_csv(self, b, use_threads, *args, **kwargs):
+        read_options = kwargs.setdefault('read_options', ReadOptions())
+        read_options.use_threads = use_threads
+        return open_csv(b, *args, **kwargs)
 
     def check_reader(self, reader, expected_schema, expected_data):
         assert reader.schema == expected_schema
@@ -1264,24 +1270,24 @@ def check_reader(self, reader, expected_schema, expected_data):
             assert batch.schema == expected_schema
             assert batch.to_pydict() == expected_batch
 
-    def test_file_object(self):
+    def test_file_object(self, use_threads):
         data = b"a,b\n1,2\n3,4\n"
         expected_data = {'a': [1, 3], 'b': [2, 4]}
         bio = io.BytesIO(data)
-        reader = self.open_csv(bio)
+        reader = self.open_csv(bio, use_threads)
         expected_schema = pa.schema([('a', pa.int64()),
                                      ('b', pa.int64())])
         self.check_reader(reader, expected_schema, [expected_data])
 
-    def test_header(self):
+    def test_header(self, use_threads):
         rows = b"abc,def,gh\n"
-        reader = self.open_bytes(rows)
+        reader = self.open_bytes(rows, use_threads)
         expected_schema = pa.schema([('abc', pa.null()),
                                      ('def', pa.null()),
                                      ('gh', pa.null())])
         self.check_reader(reader, expected_schema, [])
 
-    def test_inference(self):
+    def test_inference(self, use_threads):
         # Inference is done on first block
         rows = b"a,b\n123,456\nabc,de\xff\ngh,ij\n"
         expected_schema = pa.schema([('a', pa.string()),
@@ -1289,25 +1295,25 @@ def test_inference(self):
 
         read_options = ReadOptions()
         read_options.block_size = len(rows)
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         self.check_reader(reader, expected_schema,
                           [{'a': ['123', 'abc', 'gh'],
                             'b': [b'456', b'de\xff', b'ij']}])
 
         read_options.block_size = len(rows) - 1
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         self.check_reader(reader, expected_schema,
                           [{'a': ['123', 'abc'],
                             'b': [b'456', b'de\xff']},
                            {'a': ['gh'],
                             'b': [b'ij']}])
 
-    def test_inference_failure(self):
+    def test_inference_failure(self, use_threads):
         # Inference on first block, then conversion failure on second block
         rows = b"a,b\n123,456\nabc,de\xff\ngh,ij\n"
         read_options = ReadOptions()
         read_options.block_size = len(rows) - 7
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.int64()),
                                      ('b', pa.int64())])
         assert reader.schema == expected_schema
@@ -1322,38 +1328,20 @@ def test_inference_failure(self):
         with pytest.raises(StopIteration):
             reader.read_next_batch()
 
-        # Inference on first block, then conversion failure on second block,
-        # then success on third block
-        rows = b"a,b\n1,2\nabc,def\n45,67\n"
-        read_options.block_size = 8
-        reader = self.open_bytes(rows, read_options=read_options)
-        expected_schema = pa.schema([('a', pa.int64()),
-                                     ('b', pa.int64())])
-        assert reader.schema == expected_schema
-        assert reader.read_next_batch().to_pydict() == {'a': [1], 'b': [2]}
-        # Second block
-        with pytest.raises(ValueError,
-                           match="CSV conversion error to int64"):
-            reader.read_next_batch()
-        # Third block
-        assert reader.read_next_batch().to_pydict() == {'a': [45], 'b': [67]}
-        # EOF
-        with pytest.raises(StopIteration):
-            reader.read_next_batch()
-
-    def test_invalid_csv(self):
+    def test_invalid_csv(self, use_threads):
         # CSV errors on first block
         rows = b"a,b\n1,2,3\n4,5\n6,7\n"
         read_options = ReadOptions()
         read_options.block_size = 10
         with pytest.raises(pa.ArrowInvalid,
                            match="Expected 2 columns, got 3"):
-            reader = self.open_bytes(rows, read_options=read_options)
+            reader = self.open_bytes(
+                rows, use_threads, read_options=read_options)
 
         # CSV errors on second block
         rows = b"a,b\n1,2\n3,4,5\n6,7\n"
         read_options.block_size = 8
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         assert reader.read_next_batch().to_pydict() == {'a': [1], 'b': [2]}
         with pytest.raises(pa.ArrowInvalid,
                            match="Expected 2 columns, got 3"):
@@ -1362,9 +1350,9 @@ def test_invalid_csv(self):
         with pytest.raises(StopIteration):
             reader.read_next_batch()
 
-    def test_options_delimiter(self):
+    def test_options_delimiter(self, use_threads):
         rows = b"a;b,c\nde,fg;eh\n"
-        reader = self.open_bytes(rows)
+        reader = self.open_bytes(rows, use_threads)
         expected_schema = pa.schema([('a;b', pa.string()),
                                      ('c', pa.string())])
         self.check_reader(reader, expected_schema,
@@ -1372,17 +1360,17 @@ def test_options_delimiter(self):
                             'c': ['fg;eh']}])
 
         opts = ParseOptions(delimiter=';')
-        reader = self.open_bytes(rows, parse_options=opts)
+        reader = self.open_bytes(rows, use_threads, parse_options=opts)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b,c', pa.string())])
         self.check_reader(reader, expected_schema,
                           [{'a': ['de,fg'],
                             'b,c': ['eh']}])
 
-    def test_no_ending_newline(self):
+    def test_no_ending_newline(self, use_threads):
         # No \n after last line
         rows = b"a,b,c\n1,2,3\n4,5,6"
-        reader = self.open_bytes(rows)
+        reader = self.open_bytes(rows, use_threads)
         expected_schema = pa.schema([('a', pa.int64()),
                                      ('b', pa.int64()),
                                      ('c', pa.int64())])
@@ -1391,16 +1379,16 @@ def test_no_ending_newline(self):
                             'b': [2, 5],
                             'c': [3, 6]}])
 
-    def test_empty_file(self):
+    def test_empty_file(self, use_threads):
         with pytest.raises(ValueError, match="Empty CSV file"):
-            self.open_bytes(b"")
+            self.open_bytes(b"", use_threads)
 
-    def test_column_options(self):
+    def test_column_options(self, use_threads):
         # With column_names
         rows = b"1,2,3\n4,5,6"
         read_options = ReadOptions()
         read_options.column_names = ['d', 'e', 'f']
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('d', pa.int64()),
                                      ('e', pa.int64()),
                                      ('f', pa.int64())])
@@ -1412,7 +1400,7 @@ def test_column_options(self):
         # With include_columns
         convert_options = ConvertOptions()
         convert_options.include_columns = ['f', 'e']
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('f', pa.int64()),
                                      ('e', pa.int64())])
@@ -1422,7 +1410,7 @@ def test_column_options(self):
 
         # With column_types
         convert_options.column_types = {'e': pa.string()}
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('f', pa.int64()),
                                      ('e', pa.string())])
@@ -1435,11 +1423,12 @@ def test_column_options(self):
         with pytest.raises(
                 KeyError,
                 match="Column 'g' in include_columns does not exist"):
-            reader = self.open_bytes(rows, read_options=read_options,
+            reader = self.open_bytes(rows, use_threads,
+                                     read_options=read_options,
                                      convert_options=convert_options)
 
         convert_options.include_missing_columns = True
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('g', pa.null()),
                                      ('f', pa.int64()),
@@ -1450,7 +1439,7 @@ def test_column_options(self):
                             'f': [3, 6]}])
 
         convert_options.column_types = {'e': pa.string(), 'g': pa.float64()}
-        reader = self.open_bytes(rows, read_options=read_options,
+        reader = self.open_bytes(rows, use_threads, read_options=read_options,
                                  convert_options=convert_options)
         expected_schema = pa.schema([('g', pa.float64()),
                                      ('f', pa.int64()),
@@ -1460,11 +1449,11 @@ def test_column_options(self):
                             'e': ["2", "5"],
                             'f': [3, 6]}])
 
-    def test_encoding(self):
+    def test_encoding(self, use_threads):
         # latin-1 (invalid utf-8)
         rows = b"a,b\nun,\xe9l\xe9phant"
         read_options = ReadOptions()
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b', pa.binary())])
         self.check_reader(reader, expected_schema,
@@ -1472,7 +1461,7 @@ def test_encoding(self):
                             'b': [b"\xe9l\xe9phant"]}])
 
         read_options.encoding = 'latin1'
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b', pa.string())])
         self.check_reader(reader, expected_schema,
@@ -1483,22 +1472,22 @@ def test_encoding(self):
         rows = (b'\xff\xfea\x00,\x00b\x00\n\x00u\x00n\x00,'
                 b'\x00\xe9\x00l\x00\xe9\x00p\x00h\x00a\x00n\x00t\x00')
         read_options.encoding = 'utf16'
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         expected_schema = pa.schema([('a', pa.string()),
                                      ('b', pa.string())])
         self.check_reader(reader, expected_schema,
                           [{'a': ["un"],
                             'b': ["éléphant"]}])
 
-    def test_small_random_csv(self):
+    def test_small_random_csv(self, use_threads):
         csv, expected = make_random_csv(num_cols=2, num_rows=10)
-        reader = self.open_bytes(csv)
+        reader = self.open_bytes(csv, use_threads)
         table = reader.read_all()
         assert table.schema == expected.schema
         assert table.equals(expected)
         assert table.to_pydict() == expected.to_pydict()
 
-    def test_stress_block_sizes(self):
+    def test_stress_block_sizes(self, use_threads):
         # Test a number of small block sizes to stress block stitching
         csv_base, expected = make_random_csv(num_cols=2, num_rows=500)
         block_sizes = [19, 21, 23, 26, 37, 111]
@@ -1508,22 +1497,15 @@ def test_stress_block_sizes(self):
                 # Need at least two lines for type inference
                 assert csv[:block_size].count(b'\n') >= 2
                 read_options = ReadOptions(block_size=block_size)
-                reader = self.open_bytes(csv, read_options=read_options)
+                reader = self.open_bytes(
+                    csv, use_threads, read_options=read_options)
                 table = reader.read_all()
                 assert table.schema == expected.schema
                 if not table.equals(expected):
                     # Better error output
                     assert table.to_pydict() == expected.to_pydict()
 
-
-class TestSerialStreamingCSVRead(BaseTestStreamingCSVRead, unittest.TestCase):
-
-    def open_csv(self, *args, **kwargs):
-        read_options = kwargs.setdefault('read_options', ReadOptions())
-        read_options.use_threads = False
-        return open_csv(*args, **kwargs)
-
-    def test_batch_lifetime(self):
+    def test_batch_lifetime(self, use_threads):
         gc.collect()
         old_allocated = pa.total_allocated_bytes()
 
@@ -1536,15 +1518,15 @@ def check_one_batch(reader, expected):
         read_options = ReadOptions()
         read_options.column_names = ['a', 'b']
         read_options.block_size = 6
-        reader = self.open_bytes(rows, read_options=read_options)
+        reader = self.open_bytes(rows, use_threads, read_options=read_options)
         check_one_batch(reader, {'a': [10], 'b': [11]})
         allocated_after_first_batch = pa.total_allocated_bytes()
         check_one_batch(reader, {'a': [12], 'b': [13]})
-        assert pa.total_allocated_bytes() == allocated_after_first_batch
+        assert pa.total_allocated_bytes() <= allocated_after_first_batch
         check_one_batch(reader, {'a': [14], 'b': [15]})
-        assert pa.total_allocated_bytes() == allocated_after_first_batch
+        assert pa.total_allocated_bytes() <= allocated_after_first_batch
         check_one_batch(reader, {'a': [16], 'b': [17]})
-        assert pa.total_allocated_bytes() == allocated_after_first_batch
+        assert pa.total_allocated_bytes() <= allocated_after_first_batch
         with pytest.raises(StopIteration):
             reader.read_next_batch()
         assert pa.total_allocated_bytes() == old_allocated

From efb338d7ac45602b4c4a5663923c0f42224c3331 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 16 Jul 2021 16:35:03 -1000
Subject: [PATCH 591/719] ARROW-13153: [C++] `parquet_dataset` loses ordering
 of files in `_metadata`

The ParquetDatasetFactory now stores the paths in a list alongside the path-to-id map so that it can create the dataset with a properly ordered set of paths.  I added a test to test_dataset.py to confirm this.

Note: While this does fix the issue, writing a dataset is still non-deterministic.  This is probably inevitable if partitioning is present.  Grouping rows by a partition will destroy any ordering that previously existed.  Furthermore, since the current implementation writes with multiple threads, there is no predictable order in which output files are created.

Closes #10636 from westonpace/bugfix/ARROW-13153--parquet_dataset-loses-ordering-of-files-in-_met

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 cpp/src/arrow/dataset/file_parquet.cc | 21 +++++++++++++--------
 cpp/src/arrow/dataset/file_parquet.h  |  6 +++---
 python/pyarrow/tests/test_dataset.py  | 22 ++++++++++++++++++++++
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 30ebc304749..122894586c6 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -867,7 +867,8 @@ Result<std::shared_ptr<DatasetFactory>> ParquetDatasetFactory::Make(
   ARROW_ASSIGN_OR_RAISE(auto physical_schema, GetSchema(*metadata, properties));
   ARROW_ASSIGN_OR_RAISE(auto manifest, GetSchemaManifest(*metadata, properties));
 
-  std::unordered_map<std::string, std::vector<int>> path_to_row_group_ids;
+  std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids;
+  std::unordered_map<std::string, int> paths_to_index;
 
   for (int i = 0; i < metadata->num_row_groups(); i++) {
     auto row_group = metadata->RowGroup(i);
@@ -877,22 +878,26 @@ Result<std::shared_ptr<DatasetFactory>> ParquetDatasetFactory::Make(
 
     // Insert the path, or increase the count of row groups. It will be assumed that the
     // RowGroup of a file are ordered exactly as in the metadata file.
-    auto row_groups = &path_to_row_group_ids.insert({std::move(path), {}}).first->second;
-    row_groups->emplace_back(i);
+    auto inserted_index = paths_to_index.emplace(
+        std::move(path), static_cast<int>(paths_with_row_group_ids.size()));
+    if (inserted_index.second) {
+      paths_with_row_group_ids.push_back({inserted_index.first->first, {}});
+    }
+    paths_with_row_group_ids[inserted_index.first->second].second.push_back(i);
   }
 
   return std::shared_ptr<DatasetFactory>(new ParquetDatasetFactory(
       std::move(filesystem), std::move(format), std::move(metadata), std::move(manifest),
       std::move(physical_schema), base_path, std::move(options),
-      std::move(path_to_row_group_ids)));
+      std::move(paths_with_row_group_ids)));
 }
 
 Result<std::vector<std::shared_ptr<FileFragment>>>
 ParquetDatasetFactory::CollectParquetFragments(const Partitioning& partitioning) {
-  std::vector<std::shared_ptr<FileFragment>> fragments(path_to_row_group_ids_.size());
+  std::vector<std::shared_ptr<FileFragment>> fragments(paths_with_row_group_ids_.size());
 
   size_t i = 0;
-  for (const auto& e : path_to_row_group_ids_) {
+  for (const auto& e : paths_with_row_group_ids_) {
     const auto& path = e.first;
     auto metadata_subset = metadata_->Subset(e.second);
 
@@ -921,10 +926,10 @@ Result<std::vector<std::shared_ptr<Schema>>> ParquetDatasetFactory::InspectSchem
 
   if (auto factory = options_.partitioning.factory()) {
     // Gather paths found in RowGroups' ColumnChunks.
-    std::vector<std::string> stripped(path_to_row_group_ids_.size());
+    std::vector<std::string> stripped(paths_with_row_group_ids_.size());
 
     size_t i = 0;
-    for (const auto& e : path_to_row_group_ids_) {
+    for (const auto& e : paths_with_row_group_ids_) {
       stripped[i++] = StripPrefixAndFilename(e.first, options_.partition_base_dir);
     }
     ARROW_ASSIGN_OR_RAISE(auto partition_schema, factory->Inspect(stripped));
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index da4fd58ebbe..d61730909e3 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -352,7 +352,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
       std::shared_ptr<parquet::arrow::SchemaManifest> manifest,
       std::shared_ptr<Schema> physical_schema, std::string base_path,
       ParquetFactoryOptions options,
-      std::unordered_map<std::string, std::vector<int>> path_to_row_group_ids)
+      std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids)
       : filesystem_(std::move(filesystem)),
         format_(std::move(format)),
         metadata_(std::move(metadata)),
@@ -360,7 +360,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
         physical_schema_(std::move(physical_schema)),
         base_path_(std::move(base_path)),
         options_(std::move(options)),
-        path_to_row_group_ids_(std::move(path_to_row_group_ids)) {}
+        paths_with_row_group_ids_(std::move(paths_with_row_group_ids)) {}
 
   std::shared_ptr<fs::FileSystem> filesystem_;
   std::shared_ptr<ParquetFileFormat> format_;
@@ -369,7 +369,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
   std::shared_ptr<Schema> physical_schema_;
   std::string base_path_;
   ParquetFactoryOptions options_;
-  std::unordered_map<std::string, std::vector<int>> path_to_row_group_ids_;
+  std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids_;
 
  private:
   Result<std::vector<std::shared_ptr<FileFragment>>> CollectParquetFragments(
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index bf60b0f4b52..6f9662471fc 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2782,6 +2782,28 @@ def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
     assert result.num_rows == 10
 
 
+def test_parquet_dataset_factory_order(tempdir):
+    # The order of the fragments in the dataset should match the order of the
+    # row groups in the _metadata file.
+    import pyarrow.parquet as pq
+    metadatas = []
+    # Create a dataset where f1 is incrementing from 0 to 100 spread across
+    # 10 files.  Put the row groups in the correct order in _metadata
+    for i in range(10):
+        table = pa.table(
+            {'f1': list(range(i*10, (i+1)*10))})
+        table_path = tempdir / f'{i}.parquet'
+        pq.write_table(table, table_path, metadata_collector=metadatas)
+        metadatas[-1].set_file_path(f'{i}.parquet')
+    metadata_path = str(tempdir / '_metadata')
+    pq.write_metadata(table.schema, metadata_path, metadatas)
+    dataset = ds.parquet_dataset(metadata_path)
+    # Ensure the table contains values from 0-100 in the right order
+    scanned_table = dataset.to_table()
+    scanned_col = scanned_table.column('f1').to_pylist()
+    assert scanned_col == list(range(0, 100))
+
+
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_parquet_dataset_factory_invalid(tempdir):

From 6179fc5b8ebab2a59ac14379ad505d7ed2702489 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Sat, 17 Jul 2021 10:02:52 -0500
Subject: [PATCH 592/719] ARROW-13355: [R] ensure that sf is installed in our
 revdep job

Closes #10734 from jonkeane/ARROW-13355-revdep-sf

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/scripts/r_revdepcheck.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ci/scripts/r_revdepcheck.sh b/ci/scripts/r_revdepcheck.sh
index e2605911f93..79ace9ca09d 100755
--- a/ci/scripts/r_revdepcheck.sh
+++ b/ci/scripts/r_revdepcheck.sh
@@ -30,13 +30,17 @@ apt install -y libxml2-dev \
   libfontconfig1-dev \
   libcairo2-dev \
   libglpk-dev \
-  libmariadb-dev \
+  libmysqlclient-dev \
   unixodbc-dev \
   libpq-dev \
   coinor-libsymphony-dev \
   coinor-libcgl-dev \
   coinor-symphony \
-  libzmq3-dev
+  libzmq3-dev \
+  libudunits2-dev \
+  libgdal-dev \
+  libgeos-dev \
+  libproj-dev
 
 pushd ${source_dir}
 

From 45a2ae97ae515fd85a9292b7baff4767b2f8ab21 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Sat, 17 Jul 2021 14:13:33 -0400
Subject: [PATCH 593/719] ARROW-12964: [R] Add bindings for ifelse() and
 if_else()

This also makes the behavior of `is.na()` and `is.nan()` more consistent with base R

Closes #10724 from thisisnic/ARROW-12964_ifelse

Lead-authored-by: Ian Cook <ianmcook@gmail.com>
Co-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/NEWS.md                             |   2 +
 r/R/arrow-datum.R                     |  20 +++-
 r/R/dplyr-functions.R                 |  88 +++++++++++---
 r/R/enums.R                           |   2 +
 r/R/expression.R                      |  14 ++-
 r/tests/testthat/helper-expectation.R |   3 +-
 r/tests/testthat/test-Array.R         |  17 +++
 r/tests/testthat/test-compute-sort.R  |   1 -
 r/tests/testthat/test-dplyr.R         | 165 +++++++++++++++++++++++++-
 9 files changed, 284 insertions(+), 28 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 63be8b9df9b..792b2f4081c 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -26,6 +26,8 @@
 * Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
 * `match_arrow()` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
 * `transmute()` now errors if passed arguments `.keep`, `.before`, or `.after`, for consistency with the behavior of `dplyr` on `data.frame`s.
+* `is.na()` now evaluates to `TRUE` on `NaN` values in floating point number fields, for consistency with base R.
+* `is.nan()` now evaluates to `FALSE` on `NA` values in floating point number fields and `FALSE` on all values in non-floating point fields, for consistency with base R.
 
 # arrow 4.0.1
 
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index 8becc37daf2..4734d44c7ea 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -47,10 +47,26 @@ is.infinite.ArrowDatum <- function(x) {
 }
 
 #' @export
-is.na.ArrowDatum <- function(x) call_function("is_null", x)
+is.na.ArrowDatum <- function(x) {
+  # TODO: if an option is added to the is_null kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13367)
+  if (x$type_id() %in% TYPES_WITH_NAN) {
+    call_function("is_nan", x) | call_function("is_null", x)
+  } else {
+    call_function("is_null", x)
+  }
+}
 
 #' @export
-is.nan.ArrowDatum <- function(x) call_function("is_nan", x)
+is.nan.ArrowDatum <- function(x) {
+  if (x$type_id() %in% TYPES_WITH_NAN) {
+    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
+    # use that to simplify the code here (ARROW-13366)
+    call_function("is_nan", x) & call_function("is_valid", x)
+  } else {
+    Scalar$create(FALSE)$as_array(length(x))
+  }
+}
 
 #' @export
 as.vector.ArrowDatum <- function(x, mode) {
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 35db573550d..d118eefaa85 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -57,6 +57,28 @@ nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
   Expression$create("cast", x, options = opts)
 }
 
+nse_funcs$is.na <- function(x) {
+  # TODO: if an option is added to the is_null kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13367)
+  if (is.double(x) || (inherits(x, "Expression") &&
+      x$type_id() %in% TYPES_WITH_NAN)) {
+    build_expr("is_nan", x) | build_expr("is_null", x)
+  } else {
+    build_expr("is_null", x)
+  }
+}
+
+nse_funcs$is.nan <- function(x) {
+  if (is.double(x) || (inherits(x, "Expression") &&
+      x$type_id() %in% TYPES_WITH_NAN)) {
+    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
+    # use that to simplify the code here (ARROW-13366)
+    build_expr("is_nan", x) & build_expr("is_valid", x)
+  } else {
+    Expression$scalar(FALSE)
+  }
+}
+
 nse_funcs$is <- function(object, class2) {
   if (is.string(class2)) {
     switch(class2,
@@ -147,35 +169,38 @@ nse_funcs$as.numeric <- function(x) {
 
 # is.* type functions
 nse_funcs$is.character <- function(x) {
-  x$type_id() %in% Type[c("STRING", "LARGE_STRING")]
+  is.character(x) || (inherits(x, "Expression") &&
+    x$type_id() %in% Type[c("STRING", "LARGE_STRING")])
 }
 nse_funcs$is.numeric <- function(x) {
-  x$type_id() %in% Type[c(
+  is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
     "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
     "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
     "DECIMAL", "DECIMAL256"
-  )]
+  )])
 }
 nse_funcs$is.double <- function(x) {
-  x$type_id() == Type["DOUBLE"]
+  is.double(x) || (inherits(x, "Expression") && x$type_id() == Type["DOUBLE"])
 }
 nse_funcs$is.integer <- function(x) {
-  x$type_id() %in% Type[c(
+  is.integer(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
     "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
     "UINT64", "INT64"
-  )]
+  )])
 }
 nse_funcs$is.integer64 <- function(x) {
-  x$type_id() == Type["INT64"]
+  is.integer64(x) || (inherits(x, "Expression") && x$type_id() == Type["INT64"])
 }
 nse_funcs$is.logical <- function(x) {
-  x$type_id() == Type["BOOL"]
+  is.logical(x) || (inherits(x, "Expression") && x$type_id() == Type["BOOL"])
 }
 nse_funcs$is.factor <- function(x) {
-  x$type_id() == Type["DICTIONARY"]
+  is.factor(x) || (inherits(x, "Expression") && x$type_id() == Type["DICTIONARY"])
 }
 nse_funcs$is.list <- function(x) {
-  x$type_id() %in% Type[c("LIST", "FIXED_SIZE_LIST", "LARGE_LIST")]
+  is.list(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "LIST", "FIXED_SIZE_LIST", "LARGE_LIST"
+  )])
 }
 
 # rlang::is_* type functions
@@ -294,8 +319,8 @@ nse_funcs$substr <- function(x, start, stop) {
     msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
   )
 
-  # substr treats values as if they're on a continous number line, so values 
-  # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics 
+  # substr treats values as if they're on a continous number line, so values
+  # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
   # this behavior
   if (start <= 0) {
     start <- 1
@@ -310,7 +335,7 @@ nse_funcs$substr <- function(x, start, stop) {
   Expression$create(
     "utf8_slice_codeunits",
     x,
-    # we don't need to subtract 1 from `stop` as C++ counts exclusively 
+    # we don't need to subtract 1 from `stop` as C++ counts exclusively
     # which effectively cancels out the difference in indexing between R & C++
     options = list(start = start - 1L, stop = stop)
   )
@@ -336,14 +361,14 @@ nse_funcs$str_sub <- function(string, start = 1L, end = -1L) {
     end <- .Machine$integer.max
   }
 
-  # An end value lower than a start value returns an empty string in 
+  # An end value lower than a start value returns an empty string in
   # stringr::str_sub so set end to 0 here to match this behavior
   if (end < start) {
     end <- 0
   }
 
   # subtract 1 from `start` because C++ is 0-based and R is 1-based
-  # str_sub treats a `start` value of 0 or 1 as the same thing so don't subtract 1 when `start` == 0 
+  # str_sub treats a `start` value of 0 or 1 as the same thing so don't subtract 1 when `start` == 0
   # when `start` < 0, both str_sub and utf8_slice_codeunits count backwards from the end
   if (start > 0) {
     start <- start - 1L
@@ -634,20 +659,45 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption
 }
 
 nse_funcs$log <- function(x, base = exp(1)) {
-  
+
   if (base == exp(1)) {
     return(Expression$create("ln_checked", x))
   }
-  
+
   if (base == 2) {
     return(Expression$create("log2_checked", x))
   }
-  
+
   if (base == 10) {
     return(Expression$create("log10_checked", x))
-  } 
+  }
   # ARROW-13345
   stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
 }
 
 nse_funcs$logb <- nse_funcs$log
+
+nse_funcs$if_else <- function(condition, true, false, missing = NULL){
+  if (!is.null(missing)) {
+    return(nse_funcs$if_else(
+      nse_funcs$is.na(condition),
+      missing,
+      nse_funcs$if_else(condition, true, false)
+    ))
+  }
+
+  # if_else doesn't yet support factors/dictionaries
+  # TODO: remove this after ARROW-13358 is merged
+  warn_types <- nse_funcs$is.factor(true) | nse_funcs$is.factor(false)
+  if (warn_types) {
+    warning("Dictionaries (in R: factors) are currently converted to strings (characters) in if_else and ifelse", call. = FALSE)
+  }
+
+  build_expr("if_else", condition, true, false)
+}
+
+# Although base R ifelse allows `yes` and `no` to be different classes
+#
+nse_funcs$ifelse <- function(test, yes, no){
+  nse_funcs$if_else(condition = test, true = yes, false = no)
+}
diff --git a/r/R/enums.R b/r/R/enums.R
index 8a5bf7366a9..019ebc7a337 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -81,6 +81,8 @@ Type <- enum("Type::type",
   LARGE_LIST = 36L
 )
 
+TYPES_WITH_NAN <- Type[c("HALF_FLOAT", "FLOAT", "DOUBLE")]
+
 #' @rdname enums
 #' @export
 StatusCode <- enum("StatusCode",
diff --git a/r/R/expression.R b/r/R/expression.R
index ab83c41757d..49752ebcb76 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -20,8 +20,8 @@
 .unary_function_map <- list(
   "!" = "invert",
   "as.factor" = "dictionary_encode",
-  "is.na" = "is_null",
-  "is.nan" = "is_nan",
+  # is.na is defined in dplyr-functions.R
+  # is.nan is defined in dplyr-functions.R
   "abs" = "abs_checked",
   "sign" = "sign",
   # nchar is defined in dplyr-functions.R
@@ -207,4 +207,12 @@ Ops.Expression <- function(e1, e2) {
 }
 
 #' @export
-is.na.Expression <- function(x) Expression$create("is_null", x)
+is.na.Expression <- function(x) {
+  if (!is.null(x$schema) && x$type_id() %in% TYPES_WITH_NAN) {
+    # TODO: if an option is added to the is_null kernel to treat NaN as NA,
+    # use that to simplify the code here (ARROW-13367)
+    Expression$create("is_nan", x) | build_expr("is_null", x)
+  } else {
+    Expression$create("is_null", x)
+  }
+}
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index 359e31ef57d..c4dab9ace45 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -91,7 +91,8 @@ expect_dplyr_equal <- function(expr,
 
   if (isTRUE(warning)) {
     # Special-case the simple warning:
-    warning <- "not supported in Arrow; pulling data into R"
+    # TODO: ARROW-13362 pick one of in or by and use it everywhere
+    warning <- "not supported (in|by) Arrow; pulling data into R"
   }
 
   skip_msg <- NULL
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 63ac64eee5f..305f5a34634 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -317,6 +317,23 @@ test_that("support for NaN (ARROW-3615)", {
   expect_equal(y$null_count, 1L)
 })
 
+test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", {
+  x <- c(1.0, NA, NaN, -1.0)
+  expect_vector_equal(is.nan(input), x)
+})
+
+test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", {
+  x <- c(1L, 2L, 3L)
+  y <- c("foo", "bar")
+  expect_vector_equal(is.nan(input), x)
+  expect_vector_equal(is.nan(input), y)
+})
+
+test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", {
+  x <- c(1, NA, NaN, -1)
+  expect_vector_equal(is.na(input), x)
+})
+
 test_that("integer types casts (ARROW-3741)", {
   # Defining some type groups for use here and in the following tests
   int_types <- c(int8(), int16(), int32(), int64())
diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index 63977b55414..373237ff9a1 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -118,7 +118,6 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
     sort(input, decreasing = FALSE, na.last = TRUE),
     tbl$dbl
   )
-  skip("is.na() evaluates to FALSE on Arrow NaN values (ARROW-12055)")
   expect_vector_equal(
     sort(input, decreasing = TRUE, na.last = NA),
     tbl$dbl
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 21ba019e498..e99f743690b 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -524,7 +524,7 @@ test_that("is.finite(), is.infinite(), is.nan()", {
       ) %>% collect(),
     df
   )
-  skip("is.nan() evaluates to NA on NA values (ARROW-12850)")
+  # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
   expect_dplyr_equal(
     input %>%
       transmute(
@@ -534,6 +534,17 @@ test_that("is.finite(), is.infinite(), is.nan()", {
   )
 })
 
+test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
+  df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7))
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        is_na = is.na(x)
+      ) %>% collect(),
+    df
+  )
+})
+
 test_that("type checks with is() giving Arrow types", {
   # with class2=DataType
   expect_equal(
@@ -835,6 +846,31 @@ test_that("type checks on expressions", {
   )
 })
 
+test_that("type checks on R scalar literals", {
+  expect_dplyr_equal(
+    input %>%
+      transmute(
+        chr_is_chr = is.character("foo"),
+        int_is_chr = is.character(42L),
+        int_is_int = is.integer(42L),
+        chr_is_int = is.integer("foo"),
+        dbl_is_num = is.numeric(3.14159),
+        int_is_num = is.numeric(42L),
+        chr_is_num = is.numeric("foo"),
+        dbl_is_dbl = is.double(3.14159),
+        chr_is_dbl = is.double("foo"),
+        lgl_is_lgl = is.logical(TRUE),
+        chr_is_lgl = is.logical("foo"),
+        fct_is_fct = is.factor(factor("foo", levels = c("foo", "bar", "baz"))),
+        chr_is_fct = is.factor("foo"),
+        lst_is_lst = is.list(list(c(a = "foo", b = "bar"))),
+        chr_is_lst = is.list("foo")
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
 test_that("as.factor()/dictionary_encode()", {
   skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
@@ -1063,4 +1099,129 @@ test_that("trig functions", {
     df
   )
 
-})
\ No newline at end of file
+})
+
+test_that("if_else and ifelse", {
+  tbl <- example_data
+  tbl$another_chr <- tail(letters, 10)
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, 1, 0)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, int, 0L)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_error(
+    Table$create(tbl) %>%
+      mutate(
+        y = if_else(int > 5, 1, FALSE)
+      ) %>% collect(),
+    'NotImplemented: Function if_else has no kernel matching input types'
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, 1, NA_real_)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = ifelse(int > 5, 1, 0)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(dbl > 5, TRUE, FALSE)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(chr %in% letters[1:3], 1L, 3L)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, "one", "zero")
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, chr, another_chr)
+      ) %>% collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, "true", chr, missing = "MISSING")
+      ) %>% collect(),
+    tbl
+  )
+
+  # TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow
+  # supports factors in if(_)else
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, fct, factor("a"))
+      ) %>% collect() %>%
+      # This is a no-op on the Arrow side, but necesary to make the results equal
+      mutate(y = as.character(y)),
+    tbl,
+    warning = "Dictionaries .* are currently converted to strings .* in if_else and ifelse"
+  )
+
+  # detecting NA and NaN works just fine
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(is.na(dbl), chr, "false", missing = "MISSING")
+      ) %>% collect(),
+    example_data_for_sorting
+  )
+
+  # However, currently comparisons with NaNs return false and not NaNs or NAs
+  skip("ARROW-13364")
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(dbl > 5, chr, another_chr, missing = "MISSING")
+      ) %>% collect(),
+    example_data_for_sorting
+  )
+
+  skip("TODO: could? should? we support the autocasting in ifelse")
+  expect_dplyr_equal(
+    input %>%
+      mutate(y = ifelse(int > 5, 1, FALSE)) %>%
+      collect(),
+    tbl
+  )
+})

From 7cee59017e4ab4f86cdd00f520d8176c6a502e89 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Sat, 17 Jul 2021 14:16:26 -0400
Subject: [PATCH 594/719] ARROW-13343: [R] Update NEWS.md for 5.0

Closes #10723 from nealrichardson/news-5.0

Lead-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Co-authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/DESCRIPTION |  1 +
 r/NEWS.md     | 36 ++++++++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 3ad9472a209..f710712ed50 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -4,6 +4,7 @@ Version: 4.0.1.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
+    person("Nic", "Crane", email = "thisisnic@gmail.com", role = c("aut")),
     person("Jonathan", "Keane", email = "jkeane@gmail.com", role = c("aut")),
     person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut"), comment = c(ORCID = "0000-0002-2444-4226")),
     person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 792b2f4081c..a1cd67a2ec3 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,15 +19,39 @@
 
 # arrow 4.0.1.9000
 
-* `write_csv_arrow()` to write Arrow data to CSV
-* Bindings and support for more Arrow C++ Compute functions: `strsplit()` and `str_split()`, `na.omit()` et al., `any()`/`all()`,
-* `arrow_info()` now includes details on the C++ build, such as compiler version
-* `dplyr` queries on `Table` and `RecordBatch` now use the same expression internals as `Dataset` (via `InMemoryDataset`). Among other (mostly internal) benefits that come with this, the print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
-* Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
-* `match_arrow()` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
+## More dplyr
+
+* There are now more than 250 compute functions available for use in `dplyr::filter()`, `mutate()`, etc. Additions in this release include:
+
+  * String operations: `strsplit()` and `str_split()`; `strptime()`; `paste()`, `paste0()`, and `str_c()`; `substr()` and `str_sub()`; `str_like()`; `str_pad()`; `stri_reverse()`
+  * Date/time operations: `lubridate` methods such as `year()`, `month()`, `wday()`, and so on
+  * Math: `log()`, trigonometry (`sin()`, `cos()`, et al.), `abs()`, `sign()`, `pmin()`/`pmax()`
+  * `is.*` functions are supported and can be used inside `relocate()`
+
+* The print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
 * `transmute()` now errors if passed arguments `.keep`, `.before`, or `.after`, for consistency with the behavior of `dplyr` on `data.frame`s.
+
+## CSV writing
+
+* `write_csv_arrow()` to use Arrow to write a data.frame to a single CSV file
+* `write_dataset(format = "csv", ...)` to write a Dataset to CSVs, including with partitioning
+
+## C interface
+
+* Added bindings for the remainder of C data interface: Type, Field, and RecordBatchReader (from the experimental C stream interface). These also have `reticulate::py_to_r()` and `r_to_py()` methods. Along with the addition of the `Scanner$ToRecordBatchReader()` method, you can now build up a Dataset query in R and pass the resulting stream of batches to another tool in process.
+* C interface methods are exposed on Arrow objects (e.g. `Array$export_to_c()`, `RecordBatch$import_from_c()`), similar to how they are in `pyarrow`. This facilitates their use in other packages. See the `py_to_r()` and `r_to_py()` methods for usage examples.
+
+## Other enhancements
+
+* Converting an R `data.frame` to an Arrow `Table` uses multithreading across columns
+* Some Arrow array types now use ALTREP when converting to R. To disable this, set `options(arrow.use_altrep = FALSE)`
 * `is.na()` now evaluates to `TRUE` on `NaN` values in floating point number fields, for consistency with base R.
 * `is.nan()` now evaluates to `FALSE` on `NA` values in floating point number fields and `FALSE` on all values in non-floating point fields, for consistency with base R.
+* Additional methods for `Array`, `ChunkedArray`, `RecordBatch`, and `Table`: `na.omit()` and friends, `any()`/`all()`
+* Scalar inputs to `RecordBatch$create()` and `Table$create()` are recycled
+* `arrow_info()` includes details on the C++ build, such as compiler version
+* `match_arrow()` now converts `x` into an `Array` if it is not a `Scalar`, `Array` or `ChunkedArray` and no longer dispatches `base::match()`.
+* Row-level metadata is now restricted to reading/writing single parquet or feather files. Row-level metadata with datasets is ignored (with a warning) if the dataset contains row-level metadata. Writing a dataset with row-level metadata will also be ignored (with a warning). We are working on a more robust implementation to support row-level metadata (and other complex types) --- stay tuned. For working with {sf} objects, [{sfarrow}](https://CRAN.R-project.org/package=sfarrow) is helpful for serializing sf columns and sharing them with geopandas.
 
 # arrow 4.0.1
 

From b32e0bf79e53d098813930335bb0bf683970b4eb Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Sat, 17 Jul 2021 23:10:56 -0400
Subject: [PATCH 595/719] ARROW-13200: [R] Add binding for case_when()

Adds support for `case_when()` in dplyr verbs. I followed the example of `dbplyr::case_when()`, which is much simpler and more self-contained than `dplyr::case_when()`.

Closes #10737 from ianmcook/ARROW-13200

Lead-authored-by: Ian Cook <ianmcook@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/NEWS.md                     |   1 +
 r/R/dplyr-functions.R         |  35 +++++++++-
 r/src/compute.cpp             |   7 ++
 r/tests/testthat/test-dplyr.R | 119 ++++++++++++++++++++++++++++++++++
 4 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index a1cd67a2ec3..9cd7542a012 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -26,6 +26,7 @@
   * String operations: `strsplit()` and `str_split()`; `strptime()`; `paste()`, `paste0()`, and `str_c()`; `substr()` and `str_sub()`; `str_like()`; `str_pad()`; `stri_reverse()`
   * Date/time operations: `lubridate` methods such as `year()`, `month()`, `wday()`, and so on
   * Math: `log()`, trigonometry (`sin()`, `cos()`, et al.), `abs()`, `sign()`, `pmin()`/`pmax()`
+  * Conditional: `ifelse()` and `if_else()` (fixed-precision decimal numbers do not yet work and factors/dictionaries are converted to character strings); `case_when()` (currently works with numeric data types but not character strings, factors/dictionaries, or lists/structs)
   * `is.*` functions are supported and can be used inside `relocate()`
 
 * The print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index d118eefaa85..d42992006a5 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -698,6 +698,39 @@ nse_funcs$if_else <- function(condition, true, false, missing = NULL){
 
 # Although base R ifelse allows `yes` and `no` to be different classes
 #
-nse_funcs$ifelse <- function(test, yes, no){
+nse_funcs$ifelse <- function(test, yes, no) {
   nse_funcs$if_else(condition = test, true = yes, false = no)
 }
+
+nse_funcs$case_when <- function(...) {
+  formulas <- list2(...)
+  n <- length(formulas)
+  if (n == 0) {
+    abort("No cases provided in case_when()")
+  }
+  query <- vector("list", n)
+  value <- vector("list", n)
+  mask <- caller_env()
+  for (i in seq_len(n)) {
+    f <- formulas[[i]]
+    if (!inherits(f, "formula")) {
+      abort("Each argument to case_when() must be a two-sided formula")
+    }
+    query[[i]] <- arrow_eval(f[[2]], mask)
+    value[[i]] <- arrow_eval(f[[3]], mask)
+    if (!nse_funcs$is.logical(query[[i]])) {
+      abort("Left side of each formula in case_when() must be a logical expression")
+    }
+  }
+  build_expr(
+    "case_when",
+    args = c(
+      build_expr(
+        "make_struct",
+        args = query,
+        options = list(field_names = as.character(seq_along(query)))
+      ),
+      value
+    )
+  )
+}
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 2c5ee77c8d0..30821137383 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -241,6 +241,13 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
     return out;
   }
 
+  if (func_name == "make_struct") {
+    using Options = arrow::compute::MakeStructOptions;
+    // TODO (ARROW-13371): accept `field_nullability` and `field_metadata` options
+    return std::make_shared<Options>(
+        cpp11::as_cpp<std::vector<std::string>>(options["field_names"]));
+  }
+
   if (func_name == "match_substring" || func_name == "match_substring_regex" ||
       func_name == "find_substring" || func_name == "find_substring_regex" ||
       func_name == "match_like") {
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index e99f743690b..468ad8593bd 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -1225,3 +1225,122 @@ test_that("if_else and ifelse", {
     tbl
   )
 })
+
+test_that("case_when()", {
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(cw = case_when(int > 5 ~ 1, TRUE ~ 0)) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      filter(case_when(
+        dbl + int - 1.1 == dbl2 ~ TRUE,
+        NA ~ NA,
+        TRUE ~ FALSE
+      ) & !is.na(dbl2)) %>%
+      collect(),
+    tbl
+  )
+
+  # dplyr::case_when() errors if values on right side of formulas do not have
+  # exactly the same type, but the Arrow case_when kernel allows compatible types
+  expect_equal(
+    tbl %>%
+      mutate(i64 = as.integer64(1e10)) %>%
+      Table$create() %>%
+      transmute(cw = case_when(
+        is.na(fct) ~ int,
+        is.na(chr) ~ dbl,
+        TRUE ~ i64
+      )) %>%
+      collect(),
+    tbl %>%
+      transmute(
+        cw = ifelse(is.na(fct), int, ifelse(is.na(chr), dbl, 1e10))
+      )
+  )
+
+  # expected errors (which are caught by abandon_ship() and changed to warnings)
+  # TODO: Find a way to test these directly without abandon_ship() interfering
+  expect_error(
+    # no cases
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when()),
+      "case_when"
+    )
+  )
+  expect_error(
+    # argument not a formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(TRUE ~ FALSE, TRUE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical R scalar on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(0L ~ FALSE, TRUE ~ FALSE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical Arrow column reference on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(int ~ FALSE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical Arrow expression on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(dbl + 3.14159 ~ TRUE)),
+      "case_when"
+    )
+  )
+
+  skip("case_when does not yet support with variable-width types (ARROW-13222)")
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(lgl ~ "abc")) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      transmute(cw = case_when(lgl ~ verses, !false ~ paste(chr, chr))) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = paste0(case_when(!(!(!(lgl))) ~ factor(chr), TRUE ~ fct), "!")
+      ) %>%
+      collect(),
+    tbl
+  )
+})

From 05665cc2b3b1a990d0975d5326e56e03b0cb3028 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 19 Jul 2021 09:08:40 +0900
Subject: [PATCH 596/719] ARROW-13219: [C++][GLib] Demote/deprecate
 CompareOptions

CompareOptions is not properly a FunctionOptions but it has bindings
in GLib that expect it to be a FunctionOptions. After
https://issues.apache.org/jira/browse/ARROW-13025, we should make
CompareOptions a bare struct (instead of a FunctionOptions subclass)
and deprecate it in favor of just specifying the comparison operator
directly.

In C++, CompareOptions exists and is deprecated.

In GLib, GArrowCompareOptions and related APIs are removed.

Closes #10738 from kou/glib-compare-options

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/compute.cpp                 | 419 ------------------
 c_glib/arrow-glib/compute.h                   |  96 ----
 c_glib/arrow-glib/compute.hpp                 |   3 -
 c_glib/test/test-compare.rb                   |  69 ---
 cpp/src/arrow/compute/api_scalar.cc           |   8 -
 cpp/src/arrow/compute/api_scalar.h            |   9 +-
 cpp/src/arrow/compute/function_test.cc        |   2 -
 .../kernels/scalar_compare_benchmark.cc       |   5 +-
 .../compute/kernels/scalar_compare_test.cc    |   6 +-
 cpp/src/arrow/compute/kernels/test_util.h     |   8 +
 .../compute/kernels/vector_selection_test.cc  |  12 +-
 11 files changed, 26 insertions(+), 611 deletions(-)
 delete mode 100644 c_glib/test/test-compare.rb

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 3a67fbaad8e..e845b1d80cc 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -52,27 +52,6 @@ garrow_numeric_array_sum(GArrowArrayType array,
   }
 }
 
-template <typename GArrowArrayType, typename VALUE>
-GArrowBooleanArray *
-garrow_numeric_array_compare(GArrowArrayType array,
-                             VALUE value,
-                             GArrowCompareOptions *options,
-                             GError **error,
-                             const gchar *tag)
-{
-  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_options = garrow_compare_options_get_raw(options);
-  auto arrow_compared_datum = arrow::compute::Compare(arrow_array,
-                                                      arrow::Datum(value),
-                                                      *arrow_options);
-  if (garrow::check(error, arrow_compared_datum, tag)) {
-    auto arrow_compared_array = (*arrow_compared_datum).make_array();
-    return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_compared_array));
-  } else {
-    return NULL;
-  }
-}
-
 template <typename GArrowTypeNewRaw>
 auto
 garrow_take(arrow::Datum arrow_values,
@@ -140,9 +119,6 @@ G_BEGIN_DECLS
  * #GArrowTakeOptions is a class to customize the `take` function and
  * garrow_array_take() family.
  *
- * #GArrowCompareOptions is a class to customize the `equal` function
- * family and garrow_int8_array_compare() family.
- *
  * #GArrowArraySortOptions is a class to customize the
  * `array_sort_indices` function.
  *
@@ -989,133 +965,6 @@ garrow_take_options_new(void)
 }
 
 
-typedef struct GArrowCompareOptionsPrivate_ {
-  arrow::compute::CompareOptions options;
-} GArrowCompareOptionsPrivate;
-
-enum {
-  PROP_OPERATOR = 1,
-};
-
-static arrow::compute::FunctionOptions *
-garrow_compare_options_get_raw_function_options(GArrowFunctionOptions *options)
-{
-  return garrow_compare_options_get_raw(GARROW_COMPARE_OPTIONS(options));
-}
-
-static void
-garrow_compare_options_function_options_interface_init(
-  GArrowFunctionOptionsInterface *iface)
-{
-  iface->get_raw = garrow_compare_options_get_raw_function_options;
-}
-
-G_DEFINE_TYPE_WITH_CODE(GArrowCompareOptions,
-                        garrow_compare_options,
-                        G_TYPE_OBJECT,
-                        G_ADD_PRIVATE(GArrowCompareOptions)
-                        G_IMPLEMENT_INTERFACE(
-                          GARROW_TYPE_FUNCTION_OPTIONS,
-                          garrow_compare_options_function_options_interface_init))
-
-#define GARROW_COMPARE_OPTIONS_GET_PRIVATE(object)        \
-  static_cast<GArrowCompareOptionsPrivate *>(             \
-    garrow_compare_options_get_instance_private(          \
-      GARROW_COMPARE_OPTIONS(object)))
-
-static void
-garrow_compare_options_finalize(GObject *object)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-  priv->options.~CompareOptions();
-  G_OBJECT_CLASS(garrow_compare_options_parent_class)->finalize(object);
-}
-
-static void
-garrow_compare_options_set_property(GObject *object,
-                                    guint prop_id,
-                                    const GValue *value,
-                                    GParamSpec *pspec)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_OPERATOR:
-    priv->options.op =
-      static_cast<arrow::compute::CompareOperator>(g_value_get_enum(value));
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_compare_options_get_property(GObject *object,
-                                    guint prop_id,
-                                    GValue *value,
-                                    GParamSpec *pspec)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_OPERATOR:
-    g_value_set_enum(value, priv->options.op);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_compare_options_init(GArrowCompareOptions *object)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::compute::CompareOptions(arrow::compute::EQUAL);
-}
-
-static void
-garrow_compare_options_class_init(GArrowCompareOptionsClass *klass)
-{
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->finalize     = garrow_compare_options_finalize;
-  gobject_class->set_property = garrow_compare_options_set_property;
-  gobject_class->get_property = garrow_compare_options_get_property;
-
-  GParamSpec *spec;
-  /**
-   * GArrowCompareOptions:operator:
-   *
-   * How to compare the value.
-   *
-   * Since: 0.14.0
-   */
-  spec = g_param_spec_enum("operator",
-                           "Operator",
-                           "How to compare the value",
-                           GARROW_TYPE_COMPARE_OPERATOR,
-                           0,
-                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_OPERATOR, spec);
-}
-
-/**
- * garrow_compare_options_new:
- *
- * Returns: A newly created #GArrowCompareOptions.
- *
- * Since: 0.14.0
- */
-GArrowCompareOptions *
-garrow_compare_options_new(void)
-{
-  auto compare_options = g_object_new(GARROW_TYPE_COMPARE_OPTIONS, NULL);
-  return GARROW_COMPARE_OPTIONS(compare_options);
-}
-
-
 typedef struct GArrowArraySortOptionsPrivate_ {
   arrow::compute::ArraySortOptions options;
 } GArrowArraySortOptionsPrivate;
@@ -2332,267 +2181,6 @@ garrow_record_batch_take(GArrowRecordBatch *record_batch,
     "[record-batch][take]");
 }
 
-
-/**
- * garrow_int8_array_compare:
- * @array: A #GArrowInt8Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int8_array_compare(GArrowInt8Array *array,
-                          gint8 value,
-                          GArrowCompareOptions *options,
-                          GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int8-array][compare]");
-}
-
-/**
- * garrow_uint8_array_compare:
- * @array: A #GArrowUInt8Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint8_array_compare(GArrowUInt8Array *array,
-                           guint8 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint8-array][compare]");
-}
-
-/**
- * garrow_int16_array_compare:
- * @array: A #GArrowInt16Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int16_array_compare(GArrowInt16Array *array,
-                           gint16 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int16-array][compare]");
-}
-
-/**
- * garrow_uint16_array_compare:
- * @array: A #GArrowUInt16Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint16_array_compare(GArrowUInt16Array *array,
-                            guint16 value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint16-array][compare]");
-}
-
-/**
- * garrow_int32_array_compare:
- * @array: A #GArrowUInt32Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int32_array_compare(GArrowInt32Array *array,
-                           gint32 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int32-array][compare]");
-}
-
-/**
- * garrow_uint32_array_compare:
- * @array: A #GArrowUInt32Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint32_array_compare(GArrowUInt32Array *array,
-                            guint32 value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint32-array][compare]");
-}
-
-/**
- * garrow_int64_array_compare:
- * @array: A #GArrowInt64Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_int64_array_compare(GArrowInt64Array *array,
-                           gint64 value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[int64-array][compare]");
-}
-
-/**
- * garrow_uint64_array_compare:
- * @array: A #GArrowUInt64Array.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_uint64_array_compare(GArrowUInt64Array *array,
-                            guint64 value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[uint64-array][compare]");
-}
-
-/**
- * garrow_float_array_compare:
- * @array: A #GArrowFloatArray.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_float_array_compare(GArrowFloatArray *array,
-                           gfloat value,
-                           GArrowCompareOptions *options,
-                           GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[float-array][compare]");
-}
-
-/**
- * garrow_double_array_compare:
- * @array: A #GArrowDoubleArray.
- * @value: The value to compare.
- * @options: A #GArrowCompareOptions.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full): The #GArrowBooleanArray as
- *   the result compared a numeric array with a scalar on success,
- *   %NULL on error.
- *
- * Since: 0.14.0
- */
-GArrowBooleanArray *
-garrow_double_array_compare(GArrowDoubleArray *array,
-                            gdouble value,
-                            GArrowCompareOptions *options,
-                            GError **error)
-{
-  return garrow_numeric_array_compare(array,
-                                      value,
-                                      options,
-                                      error,
-                                      "[double-array][compare]");
-}
-
 /**
  * garrow_array_filter:
  * @array: A #GArrowArray.
@@ -3120,13 +2708,6 @@ garrow_take_options_get_raw(GArrowTakeOptions *take_options)
   return &(priv->options);
 }
 
-arrow::compute::CompareOptions *
-garrow_compare_options_get_raw(GArrowCompareOptions *compare_options)
-{
-  auto priv = GARROW_COMPARE_OPTIONS_GET_PRIVATE(compare_options);
-  return &(priv->options);
-}
-
 arrow::compute::ArraySortOptions *
 garrow_array_sort_options_get_raw(GArrowArraySortOptions *array_sort_options)
 {
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index a9e57945ba5..1163983644c 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -146,42 +146,6 @@ GArrowTakeOptions *
 garrow_take_options_new(void);
 
 
-/**
- * GArrowCompareOperator:
- * @GARROW_COMPARE_EQUAL: Equal operator.
- * @GARROW_COMPARE_NOT_EQUAL: Not equal operator.
- * @GARROW_COMPARE_GREATER: Greater operator.
- * @GARROW_COMPARE_GREATER_EQUAL: Greater equal operator.
- * @GARROW_COMPARE_LESS: Less operator.
- * @GARROW_COMPARE_LESS_EQUAL: Less equal operator.
- *
- * They are corresponding to `arrow::compute::CompareOperator` values.
- */
-typedef enum {
-  GARROW_COMPARE_EQUAL,
-  GARROW_COMPARE_NOT_EQUAL,
-  GARROW_COMPARE_GREATER,
-  GARROW_COMPARE_GREATER_EQUAL,
-  GARROW_COMPARE_LESS,
-  GARROW_COMPARE_LESS_EQUAL
-} GArrowCompareOperator;
-
-#define GARROW_TYPE_COMPARE_OPTIONS (garrow_compare_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCompareOptions,
-                         garrow_compare_options,
-                         GARROW,
-                         COMPARE_OPTIONS,
-                         GObject)
-struct _GArrowCompareOptionsClass
-{
-  GObjectClass parent_class;
-};
-
-GARROW_AVAILABLE_IN_0_14
-GArrowCompareOptions *
-garrow_compare_options_new(void);
-
-
 /**
  * GArrowSortOrder:
  * @GARROW_SORT_ORDER_ASCENDING: Sort in ascending order.
@@ -375,66 +339,6 @@ garrow_record_batch_take(GArrowRecordBatch *record_batch,
                          GArrowArray *indices,
                          GArrowTakeOptions *options,
                          GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int8_array_compare(GArrowInt8Array *array,
-                          gint8 value,
-                          GArrowCompareOptions *options,
-                          GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint8_array_compare(GArrowUInt8Array *array,
-                           guint8 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int16_array_compare(GArrowInt16Array *array,
-                           gint16 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint16_array_compare(GArrowUInt16Array *array,
-                            guint16 value,
-                            GArrowCompareOptions *options,
-                            GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int32_array_compare(GArrowInt32Array *array,
-                           gint32 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint32_array_compare(GArrowUInt32Array *array,
-                            guint32 value,
-                            GArrowCompareOptions *options,
-                            GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_int64_array_compare(GArrowInt64Array *array,
-                           gint64 value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_uint64_array_compare(GArrowUInt64Array *array,
-                            guint64 value,
-                            GArrowCompareOptions *options,
-                            GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_float_array_compare(GArrowFloatArray *array,
-                           gfloat value,
-                           GArrowCompareOptions *options,
-                           GError **error);
-GARROW_AVAILABLE_IN_0_14
-GArrowBooleanArray *
-garrow_double_array_compare(GArrowDoubleArray *array,
-                            gdouble value,
-                            GArrowCompareOptions *options,
-                            GError **error);
 GARROW_AVAILABLE_IN_0_15
 GArrowArray *
 garrow_array_filter(GArrowArray *array,
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 289bcbe31af..8089a1d3364 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -59,9 +59,6 @@ garrow_filter_options_get_raw(GArrowFilterOptions *filter_options);
 arrow::compute::TakeOptions *
 garrow_take_options_get_raw(GArrowTakeOptions *take_options);
 
-arrow::compute::CompareOptions *
-garrow_compare_options_get_raw(GArrowCompareOptions *compare_options);
-
 arrow::compute::ArraySortOptions *
 garrow_array_sort_options_get_raw(GArrowArraySortOptions *array_sort_options);
 
diff --git a/c_glib/test/test-compare.rb b/c_glib/test/test-compare.rb
deleted file mode 100644
index 2ffe39839df..00000000000
--- a/c_glib/test/test-compare.rb
+++ /dev/null
@@ -1,69 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestCompare < Test::Unit::TestCase
-  include Helper::Buildable
-
-  def setup
-    @options = Arrow::CompareOptions.new
-  end
-
-  sub_test_case("CompareOptions") do
-    def test_default_operator
-      assert_equal(Arrow::CompareOperator::EQUAL,
-                   @options.operator)
-    end
-  end
-
-  sub_test_case("operator") do
-    def test_equal
-      @options.operator = :equal
-      assert_equal(build_boolean_array([true, nil, false]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_not_equal
-      @options.operator = :not_equal
-      assert_equal(build_boolean_array([false, nil, true]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_greater
-      @options.operator = :greater
-      assert_equal(build_boolean_array([false, nil, true]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_greater_equal
-      @options.operator = :greater_equal
-      assert_equal(build_boolean_array([true, nil, true]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_less
-      @options.operator = :less
-      assert_equal(build_boolean_array([false, nil, false]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-
-    def test_less_equal
-      @options.operator = :less_equal
-      assert_equal(build_boolean_array([true, nil, false]),
-                   build_int32_array([1, nil, 3]).compare(1, @options))
-    end
-  end
-end
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 9357fb5f557..1feb4e7eee0 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -152,8 +152,6 @@ static auto kTrimOptionsType = GetFunctionOptionsType<TrimOptions>(
 static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
     DataMember("start", &SliceOptions::start), DataMember("stop", &SliceOptions::stop),
     DataMember("step", &SliceOptions::step));
-static auto kCompareOptionsType =
-    GetFunctionOptionsType<CompareOptions>(DataMember("op", &CompareOptions::op));
 static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
     DataMember("field_names", &MakeStructOptions::field_names),
     DataMember("field_nullability", &MakeStructOptions::field_nullability),
@@ -260,11 +258,6 @@ SliceOptions::SliceOptions(int64_t start, int64_t stop, int64_t step)
 SliceOptions::SliceOptions() : SliceOptions(0, 0, 1) {}
 constexpr char SliceOptions::kTypeName[];
 
-CompareOptions::CompareOptions(CompareOperator op)
-    : FunctionOptions(internal::kCompareOptionsType), op(op) {}
-CompareOptions::CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
-constexpr char CompareOptions::kTypeName[];
-
 MakeStructOptions::MakeStructOptions(
     std::vector<std::string> n, std::vector<bool> r,
     std::vector<std::shared_ptr<const KeyValueMetadata>> m)
@@ -304,7 +297,6 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kPadOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
-  DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
 }
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index c77994d89bb..e07e41569a1 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -218,11 +218,9 @@ enum CompareOperator : int8_t {
   LESS_EQUAL,
 };
 
-class ARROW_EXPORT CompareOptions : public FunctionOptions {
- public:
-  explicit CompareOptions(CompareOperator op);
-  CompareOptions();
-  constexpr static char const kTypeName[] = "CompareOptions";
+struct ARROW_EXPORT CompareOptions {
+  explicit CompareOptions(CompareOperator op) : op(op) {}
+  CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
   enum CompareOperator op;
 };
 
@@ -564,6 +562,7 @@ Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR);
 ///
 /// \since 1.0.0
 /// \note API not yet finalized
+ARROW_DEPRECATED("Deprecated in 5.0.0. Use each compare function directly")
 ARROW_EXPORT
 Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions options,
                       ExecContext* ctx = NULLPTR);
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index 225f80736a6..7aca10ef0fa 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -83,8 +83,6 @@ TEST(FunctionOptions, Equality) {
   options.emplace_back(new TrimOptions("abc"));
   options.emplace_back(new SliceOptions(/*start=*/1));
   options.emplace_back(new SliceOptions(/*start=*/1, /*stop=*/-5, /*step=*/-2));
-  options.emplace_back(new CompareOptions(CompareOperator::EQUAL));
-  options.emplace_back(new CompareOptions(CompareOperator::LESS));
   // N.B. we never actually use field_nullability or field_metadata in Arrow
   options.emplace_back(new MakeStructOptions({"col1"}, {true}, {}));
   options.emplace_back(new MakeStructOptions({"col1"}, {false}, {}));
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
index ce18365fb5d..86be319a345 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc
@@ -38,7 +38,8 @@ static void CompareArrayScalar(benchmark::State& state) {
   auto array = rand.ArrayOf(ty, args.size, args.null_proportion);
   auto scalar = *rand.ArrayOf(ty, 1, 0)->GetScalar(0);
   for (auto _ : state) {
-    ABORT_NOT_OK(Compare(array, Datum(scalar), CompareOptions(op)).status());
+    ABORT_NOT_OK(
+        CallFunction(CompareOperatorToFunctionName(op), {array, Datum(scalar)}).status());
   }
 }
 
@@ -50,7 +51,7 @@ static void CompareArrayArray(benchmark::State& state) {
   auto lhs = rand.ArrayOf(ty, args.size, args.null_proportion);
   auto rhs = rand.ArrayOf(ty, args.size, args.null_proportion);
   for (auto _ : state) {
-    ABORT_NOT_OK(Compare(lhs, rhs, CompareOptions(op)).status());
+    ABORT_NOT_OK(CallFunction(CompareOperatorToFunctionName(op), {lhs, rhs}).status());
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 87f3bd3fc23..37680945a3e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -46,7 +46,8 @@ using util::string_view;
 template <typename ArrowType>
 static void ValidateCompare(CompareOptions options, const Datum& lhs, const Datum& rhs,
                             const Datum& expected) {
-  ASSERT_OK_AND_ASSIGN(Datum result, Compare(lhs, rhs, options));
+  ASSERT_OK_AND_ASSIGN(
+      Datum result, CallFunction(CompareOperatorToFunctionName(options.op), {lhs, rhs}));
   AssertArraysEqual(*expected.make_array(), *result.make_array(),
                     /*verbose=*/true);
 }
@@ -430,7 +431,8 @@ TEST(TestCompareTimestamps, Basics) {
     auto lhs = ArrayFromJSON(type, example1_json);
     auto rhs = ArrayFromJSON(type, example2_json);
     auto expected = ArrayFromJSON(boolean(), expected_json);
-    ASSERT_OK_AND_ASSIGN(Datum result, Compare(lhs, rhs, CompareOptions(op)));
+    ASSERT_OK_AND_ASSIGN(Datum result,
+                         CallFunction(CompareOperatorToFunctionName(op), {lhs, rhs}));
     AssertArraysEqual(*expected, *result.make_array(), /*verbose=*/true);
   };
 
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index b10ede6f8f5..c366b99a71d 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
@@ -58,6 +59,13 @@ std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
   return result;
 }
 
+inline std::string CompareOperatorToFunctionName(CompareOperator op) {
+  static std::string function_names[] = {
+      "equal", "not_equal", "greater", "greater_equal", "less", "less_equal",
+  };
+  return function_names[op];
+}
+
 void CheckScalar(std::string func_name, const ScalarVector& inputs,
                  std::shared_ptr<Scalar> expected,
                  const FunctionOptions* options = nullptr);
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index e367d888d00..2c8830e88c0 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -380,8 +380,9 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareScalarAndFilterRandomNumeric) {
     CType c_fifty = 50;
     auto fifty = std::make_shared<ScalarType>(c_fifty);
     for (auto op : {EQUAL, NOT_EQUAL, GREATER, LESS_EQUAL}) {
-      ASSERT_OK_AND_ASSIGN(Datum selection,
-                           Compare(array, Datum(fifty), CompareOptions(op)));
+      ASSERT_OK_AND_ASSIGN(
+          Datum selection,
+          CallFunction(CompareOperatorToFunctionName(op), {array, Datum(fifty)}));
       ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(array, selection));
       auto filtered_array = filtered.make_array();
       ValidateOutput(*filtered_array);
@@ -403,7 +404,8 @@ TYPED_TEST(TestFilterKernelWithNumeric, CompareArrayAndFilterRandomNumeric) {
     auto rhs = checked_pointer_cast<ArrayType>(
         rand.Numeric<TypeParam>(length, 0, 100, /*null_probability=*/0.0));
     for (auto op : {EQUAL, NOT_EQUAL, GREATER, LESS_EQUAL}) {
-      ASSERT_OK_AND_ASSIGN(Datum selection, Compare(lhs, rhs, CompareOptions(op)));
+      ASSERT_OK_AND_ASSIGN(Datum selection,
+                           CallFunction(CompareOperatorToFunctionName(op), {lhs, rhs}));
       ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(lhs, selection));
       auto filtered_array = filtered.make_array();
       ValidateOutput(*filtered_array);
@@ -428,9 +430,9 @@ TYPED_TEST(TestFilterKernelWithNumeric, ScalarInRangeAndFilterRandomNumeric) {
     auto fifty = std::make_shared<ScalarType>(c_fifty);
     auto hundred = std::make_shared<ScalarType>(c_hundred);
     ASSERT_OK_AND_ASSIGN(Datum greater_than_fifty,
-                         Compare(array, Datum(fifty), CompareOptions(GREATER)));
+                         CallFunction("greater", {array, Datum(fifty)}));
     ASSERT_OK_AND_ASSIGN(Datum less_than_hundred,
-                         Compare(array, Datum(hundred), CompareOptions(LESS)));
+                         CallFunction("less", {array, Datum(hundred)}));
     ASSERT_OK_AND_ASSIGN(Datum selection, And(greater_than_fifty, less_than_hundred));
     ASSERT_OK_AND_ASSIGN(Datum filtered, Filter(array, selection));
     auto filtered_array = filtered.make_array();

From ea42b9e0aa000238fff22fd48f06f3aa516b9f3f Mon Sep 17 00:00:00 2001
From: Jiangtao Peng <pengjiangtao@yanhuangdata.com>
Date: Mon, 19 Jul 2021 17:38:30 +0800
Subject: [PATCH 597/719] ARROW-13306: [Java][JDBC] use
 ResultSetMetaData.getColumnLabel instead of ResultSetMetaData.getColumnName

quick fixed using `getColumnLabel` instead of `getColumnName`

Closes #10700 from NinaPeng/bugfix/use_jdbc_metadata_column_label_instead_of_column_name

Authored-by: Jiangtao Peng <pengjiangtao@yanhuangdata.com>
Signed-off-by: liyafan82 <fan_li_ya@foxmail.com>
---
 .../adapter/jdbc/ArrowVectorIterator.java     |   2 +-
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java  |   8 +-
 .../adapter/jdbc/JdbcToArrowTestHelper.java   |   2 +-
 .../adapter/jdbc/h2/JdbcAliasToArrowTest.java | 140 ++++++++++++++++++
 .../adapter/jdbc/h2/JdbcToArrowTest.java      |   2 +-
 5 files changed, 147 insertions(+), 7 deletions(-)
 create mode 100644 java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
index b8796cda71e..e44505605a3 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
@@ -139,7 +139,7 @@ private VectorSchemaRoot createVectorSchemaRoot() {
   private void load(VectorSchemaRoot root) throws SQLException {
 
     for (int i = 1; i <= consumers.length; i++) {
-      consumers[i - 1].resetValueVector(root.getVector(rsmd.getColumnName(i)));
+      consumers[i - 1].resetValueVector(root.getVector(rsmd.getColumnLabel(i)));
     }
 
     consumeData(root);
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 9f773e8f664..e05f21d48cf 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -115,7 +115,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
    * <ul>
    *  <li>{@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}</li>
    *  <li>{@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}</li>
-   *  <li>{@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnName(int)}</li>
+   *  <li>{@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}</li>
    *  <li>{@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}</li>
    * </ul>
    * </p>
@@ -139,7 +139,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
     List<Field> fields = new ArrayList<>();
     int columnCount = rsmd.getColumnCount();
     for (int i = 1; i <= columnCount; i++) {
-      final String columnName = rsmd.getColumnName(i);
+      final String columnName = rsmd.getColumnLabel(i);
 
       final Map<String, String> metadata;
       if (config.shouldIncludeMetadata()) {
@@ -196,7 +196,7 @@ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType(
 
     JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn);
     if (fieldInfo == null) {
-      fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnName(arrayColumn));
+      fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnLabel(arrayColumn));
     }
     return fieldInfo;
   }
@@ -246,7 +246,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
 
     JdbcConsumer[] consumers = new JdbcConsumer[columnCount];
     for (int i = 1; i <= columnCount; i++) {
-      FieldVector vector = root.getVector(rsmd.getColumnName(i));
+      FieldVector vector = root.getVector(rsmd.getColumnLabel(i));
       consumers[i - 1] = getConsumer(vector.getField().getType(), i, isColumnNullable(rs, i), vector, config);
     }
 
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
index c194dfbc355..9fdb32d80e9 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
@@ -263,7 +263,7 @@ public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData
 
       assertEquals(rsmd.getCatalogName(i), metadata.get(Constants.SQL_CATALOG_NAME_KEY));
       assertEquals(rsmd.getTableName(i), metadata.get(Constants.SQL_TABLE_NAME_KEY));
-      assertEquals(rsmd.getColumnName(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY));
+      assertEquals(rsmd.getColumnLabel(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY));
       assertEquals(rsmd.getColumnTypeName(i), metadata.get(Constants.SQL_TYPE_KEY));
     }
   }
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
new file mode 100644
index 00000000000..f44818a9f09
--- /dev/null
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.junit.Assert.assertEquals;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JdbcAliasToArrowTest {
+  private Connection conn = null;
+
+  private static final String CREATE_STATEMENT =
+      "CREATE TABLE example_table (id INTEGER);";
+  private static final String INSERT_STATEMENT =
+      "INSERT INTO example_table (id) VALUES (?);";
+  private static final String QUERY = "SELECT id as a, id as b FROM example_table;";
+  private static final String DROP_STATEMENT = "DROP TABLE example_table;";
+  private static final String ORIGINAL_COLUMN_NAME = "ID";
+  private static final String COLUMN_A = "A";
+  private static final String COLUMN_B = "B";
+
+  @Before
+  public void setUp() throws Exception {
+    String url = "jdbc:h2:mem:JdbcAliasToArrowTest";
+    String driver = "org.h2.Driver";
+    Class.forName(driver);
+    conn = DriverManager.getConnection(url);
+    try (Statement stmt = conn.createStatement()) {
+      stmt.executeUpdate(CREATE_STATEMENT);
+    }
+  }
+
+  /**
+   * Test h2 database query with alias for column name and column label.
+   * To vetify reading field alias from an H2 database works as expected.
+   * If this test fails, something is either wrong with the setup,
+   * or the H2 SQL behavior changed.
+   */
+  @Test
+  public void testReadH2Alias() throws Exception {
+    // insert rows
+    int rowCount = 4;
+    insertRows(rowCount);
+
+    try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+      ResultSetMetaData rsmd = resultSet.getMetaData();
+      assertEquals(2, rsmd.getColumnCount());
+
+      // check column name and column label
+      assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(1));
+      assertEquals(COLUMN_A, rsmd.getColumnLabel(1));
+      assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(2));
+      assertEquals(COLUMN_B, rsmd.getColumnLabel(2));
+
+      int rowNum = 0;
+
+      while (resultSet.next()) {
+        assertEquals(rowNum, resultSet.getInt(COLUMN_A));
+        assertEquals(rowNum, resultSet.getInt(COLUMN_B));
+        ++rowNum;
+      }
+
+      assertEquals(rowCount, rowNum);
+    }
+  }
+
+  /**
+   * Test jdbc query results with alias to arrow works expected.
+   * Arrow result schema name should be field alias name.
+   */
+  @Test
+  public void testJdbcAliasToArrow() throws Exception {
+    int rowCount = 4;
+    insertRows(rowCount);
+
+    try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+      final VectorSchemaRoot vector =
+          JdbcToArrow.sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE));
+
+      assertEquals(rowCount, vector.getRowCount());
+      Schema vectorSchema = vector.getSchema();
+      List<Field> vectorFields = vectorSchema.getFields();
+      assertEquals(vectorFields.get(0).getName(), COLUMN_A);
+      assertEquals(vectorFields.get(1).getName(), COLUMN_B);
+    }
+  }
+
+  @After
+  public void tearDown() throws SQLException {
+    try (Statement stmt = conn.createStatement()) {
+      stmt.executeUpdate(DROP_STATEMENT);
+    } finally {
+      if (conn != null) {
+        conn.close();
+        conn = null;
+      }
+    }
+  }
+
+  private void insertRows(int numRows) throws SQLException {
+    // Insert [numRows] Rows
+    try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+      for (int i = 0; i < numRows; ++i) {
+        stmt.setInt(1, i);
+        stmt.executeUpdate();
+      }
+    }
+  }
+}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 85c42c0f40d..8c5a17c37f7 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -487,7 +487,7 @@ public int getColumnDisplaySize(int column) throws SQLException {
 
         @Override
         public String getColumnLabel(int column) throws SQLException {
-          return null;
+          return getColumnName(column);
         }
 
         @Override

From bf6bd4f1884c81c1e14ca26d1ee1b963694af778 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Mon, 19 Jul 2021 09:28:05 -0400
Subject: [PATCH 598/719] ARROW-13365: [R] bindings for floor/ceiling/truncate

This also arranges the function mappings in `.unary_function_map` in alphabetical order within categories and adds some comments in `expression.R`

Closes #10745 from ianmcook/ARROW-13365

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/NEWS.md                     |  2 +-
 r/R/dplyr-functions.R         |  9 +++--
 r/R/expression.R              | 73 +++++++++++++++++++++++------------
 r/tests/testthat/test-dplyr.R | 15 +++++++
 4 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 9cd7542a012..b05cc131902 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -25,7 +25,7 @@
 
   * String operations: `strsplit()` and `str_split()`; `strptime()`; `paste()`, `paste0()`, and `str_c()`; `substr()` and `str_sub()`; `str_like()`; `str_pad()`; `stri_reverse()`
   * Date/time operations: `lubridate` methods such as `year()`, `month()`, `wday()`, and so on
-  * Math: `log()`, trigonometry (`sin()`, `cos()`, et al.), `abs()`, `sign()`, `pmin()`/`pmax()`
+  * Math: logarithms (`log()` et al.); trigonometry (`sin()`, `cos()`, et al.); `abs()`; `sign()`; `pmin()` and `pmax()`; `ceiling()`, `floor()`, and `trunc()`
   * Conditional: `ifelse()` and `if_else()` (fixed-precision decimal numbers do not yet work and factors/dictionaries are converted to character strings); `case_when()` (currently works with numeric data types but not character strings, factors/dictionaries, or lists/structs)
   * `is.*` functions are supported and can be used inside `relocate()`
 
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index d42992006a5..11efb7f26d2 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -646,6 +646,11 @@ nse_funcs$second <- function(x) {
   Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
 }
 
+nse_funcs$trunc <- function(x, ...) {
+  # accepts and ignores ... for consistency with base::trunc()
+  build_expr("trunc", x)
+}
+
 nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {
 
   # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
@@ -658,7 +663,7 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption
   Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
 }
 
-nse_funcs$log <- function(x, base = exp(1)) {
+nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
 
   if (base == exp(1)) {
     return(Expression$create("ln_checked", x))
@@ -675,8 +680,6 @@ nse_funcs$log <- function(x, base = exp(1)) {
   stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
 }
 
-nse_funcs$logb <- nse_funcs$log
-
 nse_funcs$if_else <- function(condition, true, false, missing = NULL){
   if (!is.null(missing)) {
     return(nse_funcs$if_else(
diff --git a/r/R/expression.R b/r/R/expression.R
index 49752ebcb76..c4ce38b3ca1 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -18,47 +18,70 @@
 #' @include arrowExports.R
 
 .unary_function_map <- list(
-  "!" = "invert",
-  "as.factor" = "dictionary_encode",
-  # is.na is defined in dplyr-functions.R
-  # is.nan is defined in dplyr-functions.R
+  # NOTE: Each of the R functions mapped here takes exactly *one* argument, maps
+  # *directly* to an Arrow C++ compute kernel, and does not require any
+  # non-default options to be specified. More complex R function mappings are
+  # defined in dplyr-functions.R.
+
+  # functions are arranged alphabetically by name within categories
+
+  # arithmetic functions
   "abs" = "abs_checked",
+  "ceiling" = "ceil",
+  "floor" = "floor",
+  "log10" = "log10_checked",
+  "log1p" = "log1p_checked",
+  "log2" = "log2_checked",
   "sign" = "sign",
+  # trunc is defined in dplyr-functions.R
+
+  # trigonometric functions
+  "acos" = "acos_checked",
+  "asin" = "asin_checked",
+  "cos" = "cos_checked",
+  "sin" = "sin_checked",
+  "tan" = "tan_checked",
+
+  # logical functions
+  "!" = "invert",
+
+  # string functions
   # nchar is defined in dplyr-functions.R
-  "tolower" = "utf8_lower",
-  "toupper" = "utf8_upper",
-  # stringr spellings of those
   "str_length" = "utf8_length",
+  # str_pad is defined in dplyr-functions.R
+  # str_sub is defined in dplyr-functions.R
   "str_to_lower" = "utf8_lower",
   "str_to_upper" = "utf8_upper",
-  # str_pad is defined in dplyr-functions.R
-  "stri_reverse" = "utf8_reverse",
   # str_trim is defined in dplyr-functions.R
-  # str_sub is defined in dplyr-functions.R
+  "stri_reverse" = "utf8_reverse",
   # substr is defined in dplyr-functions.R
   # substring is defined in dplyr-functions.R
-  "year" = "year",
+  "tolower" = "utf8_lower",
+  "toupper" = "utf8_upper",
+
+  # date and time functions
+  "day" = "day",
+  "hour" = "hour",
+  "isoweek" = "iso_week",
   "isoyear" = "iso_year",
-  "quarter" = "quarter",
+  "minute" = "minute",
   "month" = "month",
-  "isoweek" = "iso_week",
-  "day" = "day",
+  "quarter" = "quarter",
+  # second is defined in dplyr-functions.R
   # wday is defined in dplyr-functions.R
   "yday" = "day_of_year",
-  "hour" = "hour",
-  # second is defined in dplyr-functions.R
-  "minute" = "minute",
-  "log10" = "log10_checked",
-  "log2" = "log2_checked",
-  "log1p" = "log1p_checked",
-  "sin" = "sin_checked",
-  "cos" = "cos_checked",
-  "tan" = "tan_checked",
-  "asin" = "asin_checked",
-  "acos" = "acos_checked"
+  "year" = "year",
+
+  # type conversion functions
+  "as.factor" = "dictionary_encode"
 )
 
 .binary_function_map <- list(
+  # NOTE: Each of the R functions/operators mapped here takes exactly *two*
+  # arguments. Most map *directly* to an Arrow C++ compute kernel and require no
+  # non-default options, but some are modified by build_expr(). More complex R
+  # function/operator mappings are defined in dplyr-functions.R.
+
   "==" = "equal",
   "!=" = "not_equal",
   ">" = "greater",
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 468ad8593bd..ac36c5a1bc9 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -992,6 +992,21 @@ test_that("sign()", {
   )
 })
 
+test_that("ceiling(), floor(), trunc()", {
+  df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN))
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        c = ceiling(x),
+        f = floor(x),
+        t = trunc(x)
+      ) %>%
+      collect(),
+    df
+  )
+})
+
 test_that("log functions", {
 
   df <- tibble(x = c(1:10, NA, NA))

From 14a584eedaf65387bc91e928b86562a3bbb1a9a3 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 19 Jul 2021 15:54:36 +0200
Subject: [PATCH 599/719] ARROW-13353: [Docs] Pin breathe to avoid failure
 parsing template parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recent versions of Breathe, when combined with a recent version of Sphinx, will try to have Sphinx parse template parameters. Unfortunately, Sphinx can't handle things like `typename...` which causes the build to fail.

Closes #10733 from lidavidm/arrow-13353

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/docker/linux-apt-docs.dockerfile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 95ccb4b94cc..8f124a77658 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -75,9 +75,11 @@ RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \
     rm -rf /var/lib/apt/lists/* && \
     npm install -g yarn
 
+# ARROW-13353: breathe >= 4.29.1 tries to parse template arguments,
+# but Sphinx can't parse constructs like `typename...`.
 RUN pip install \
         meson \
-        breathe \
+        breathe==4.29.0 \
         ipython \
         sphinx \
         pydata-sphinx-theme

From c9b9fa4e9964926061ff7c80b09ed22eb0ef77a1 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 19 Jul 2021 16:23:50 +0200
Subject: [PATCH 600/719] ARROW-13091: [Python] Add compression_level argument
 to IpcWriteOptions constructor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 * Modified IpcWriteOptions so that it can take an instance of pyarrow.Codec instead of a str
 * Modified pyarrow.Codec to expose compression_level
 * Added helper methods to report the minimum/maximum/default values for the different codecs

Closes #10717 from westonpace/feature/ARROW-13091--python-add-compression_level-argument-to-ipcwrit

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/util/compression.cc             | 30 ++++++
 cpp/src/arrow/util/compression.h              | 21 ++++
 cpp/src/arrow/util/compression_brotli.cc      |  5 +
 cpp/src/arrow/util/compression_bz2.cc         |  6 ++
 cpp/src/arrow/util/compression_lz4.cc         |  6 ++
 cpp/src/arrow/util/compression_snappy.cc      |  3 +
 cpp/src/arrow/util/compression_test.cc        | 26 +++++
 cpp/src/arrow/util/compression_zlib.cc        |  6 ++
 cpp/src/arrow/util/compression_zstd.cc        |  3 +
 docs/source/python/api/memory.rst             |  1 +
 python/pyarrow/includes/libarrow.pxd          | 18 ++++
 python/pyarrow/io.pxi                         | 83 +++++++++++++++-
 python/pyarrow/ipc.pxi                        | 15 ++-
 python/pyarrow/lib.pxd                        |  2 +-
 python/pyarrow/tests/conftest.py              | 13 +++
 .../parquet/test_compliant_nested_type.py     |  2 +
 .../tests/parquet/test_parquet_file.py        |  2 +
 .../tests/parquet/test_parquet_writer.py      |  2 +
 python/pyarrow/tests/test_feather.py          |  7 ++
 python/pyarrow/tests/test_fs.py               |  3 +
 python/pyarrow/tests/test_io.py               | 95 +++++++++++++++++++
 python/pyarrow/tests/test_ipc.py              | 67 ++++++++++---
 python/pyarrow/tests/test_tensor.py           |  1 +
 23 files changed, 395 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
index f9c084f6c26..8db199b4e76 100644
--- a/cpp/src/arrow/util/compression.cc
+++ b/cpp/src/arrow/util/compression.cc
@@ -29,6 +29,18 @@
 namespace arrow {
 namespace util {
 
+namespace {
+
+Status CheckSupportsCompressionLevel(Compression::type type) {
+  if (!Codec::SupportsCompressionLevel(type)) {
+    return Status::Invalid(
+        "The specified codec does not support the compression level parameter");
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
 int Codec::UseDefaultCompressionLevel() { return kUseDefaultCompressionLevel; }
 
 Status Codec::Init() { return Status::OK(); }
@@ -103,6 +115,24 @@ bool Codec::SupportsCompressionLevel(Compression::type codec) {
   }
 }
 
+Result<int> Codec::MaximumCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->maximum_compression_level();
+}
+
+Result<int> Codec::MinimumCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->minimum_compression_level();
+}
+
+Result<int> Codec::DefaultCompressionLevel(Compression::type codec_type) {
+  RETURN_NOT_OK(CheckSupportsCompressionLevel(codec_type));
+  ARROW_ASSIGN_OR_RAISE(auto codec, Codec::Create(codec_type));
+  return codec->default_compression_level();
+}
+
 Result<std::unique_ptr<Codec>> Codec::Create(Compression::type codec_type,
                                              int compression_level) {
   if (!IsAvailable(codec_type)) {
diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h
index 6c9a74c6d21..0832e82a606 100644
--- a/cpp/src/arrow/util/compression.h
+++ b/cpp/src/arrow/util/compression.h
@@ -132,6 +132,27 @@ class ARROW_EXPORT Codec {
   /// \brief Return true if indicated codec supports setting a compression level
   static bool SupportsCompressionLevel(Compression::type codec);
 
+  /// \brief Return the smallest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MinimumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the largest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MaximumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the default compression level
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> DefaultCompressionLevel(Compression::type codec);
+
+  /// \brief Return the smallest supported compression level
+  virtual int minimum_compression_level() const = 0;
+
+  /// \brief Return the largest supported compression level
+  virtual int maximum_compression_level() const = 0;
+
+  /// \brief Return the default compression level
+  virtual int default_compression_level() const = 0;
+
   /// \brief One-shot decompression function
   ///
   /// output_buffer_len must be correct and therefore be obtained in advance.
diff --git a/cpp/src/arrow/util/compression_brotli.cc b/cpp/src/arrow/util/compression_brotli.cc
index 4feabe23345..cb547c2c8cf 100644
--- a/cpp/src/arrow/util/compression_brotli.cc
+++ b/cpp/src/arrow/util/compression_brotli.cc
@@ -224,6 +224,11 @@ class BrotliCodec : public Codec {
   Compression::type compression_type() const override { return Compression::BROTLI; }
 
   int compression_level() const override { return compression_level_; }
+  int minimum_compression_level() const override { return BROTLI_MIN_QUALITY; }
+  int maximum_compression_level() const override { return BROTLI_MAX_QUALITY; }
+  int default_compression_level() const override {
+    return kBrotliDefaultCompressionLevel;
+  }
 
  private:
   const int compression_level_;
diff --git a/cpp/src/arrow/util/compression_bz2.cc b/cpp/src/arrow/util/compression_bz2.cc
index 8a8c1cb7a45..b367f2ff20c 100644
--- a/cpp/src/arrow/util/compression_bz2.cc
+++ b/cpp/src/arrow/util/compression_bz2.cc
@@ -40,6 +40,9 @@ namespace internal {
 
 namespace {
 
+constexpr int kBZ2MinCompressionLevel = 1;
+constexpr int kBZ2MaxCompressionLevel = 9;
+
 // Max number of bytes the bz2 APIs accept at a time
 constexpr auto kSizeLimit =
     static_cast<int64_t>(std::numeric_limits<unsigned int>::max());
@@ -265,6 +268,9 @@ class BZ2Codec : public Codec {
   Compression::type compression_type() const override { return Compression::BZ2; }
 
   int compression_level() const override { return compression_level_; }
+  int minimum_compression_level() const override { return kBZ2MinCompressionLevel; }
+  int maximum_compression_level() const override { return kBZ2MaxCompressionLevel; }
+  int default_compression_level() const override { return kBZ2DefaultCompressionLevel; }
 
  private:
   int compression_level_;
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index 9314dfd7faf..c783e405590 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -300,6 +300,9 @@ class Lz4FrameCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::LZ4_FRAME; }
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 
  protected:
   const LZ4F_preferences_t prefs_;
@@ -350,6 +353,9 @@ class Lz4Codec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::LZ4; }
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression_snappy.cc b/cpp/src/arrow/util/compression_snappy.cc
index 9b016874b56..3756f957d04 100644
--- a/cpp/src/arrow/util/compression_snappy.cc
+++ b/cpp/src/arrow/util/compression_snappy.cc
@@ -86,6 +86,9 @@ class SnappyCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::SNAPPY; }
+  int minimum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int maximum_compression_level() const override { return kUseDefaultCompressionLevel; }
+  int default_compression_level() const override { return kUseDefaultCompressionLevel; }
 };
 
 }  // namespace
diff --git a/cpp/src/arrow/util/compression_test.cc b/cpp/src/arrow/util/compression_test.cc
index 2bd7a176234..795d5e31d65 100644
--- a/cpp/src/arrow/util/compression_test.cc
+++ b/cpp/src/arrow/util/compression_test.cc
@@ -399,6 +399,32 @@ TEST(TestCodecMisc, SpecifyCompressionLevel) {
   }
 }
 
+TEST_P(CodecTest, MinMaxCompressionLevel) {
+  auto type = GetCompression();
+  ASSERT_OK_AND_ASSIGN(auto codec, Codec::Create(type));
+
+  if (Codec::SupportsCompressionLevel(type)) {
+    ASSERT_OK_AND_ASSIGN(auto min_level, Codec::MinimumCompressionLevel(type));
+    ASSERT_OK_AND_ASSIGN(auto max_level, Codec::MaximumCompressionLevel(type));
+    ASSERT_OK_AND_ASSIGN(auto default_level, Codec::DefaultCompressionLevel(type));
+    ASSERT_NE(min_level, Codec::UseDefaultCompressionLevel());
+    ASSERT_NE(max_level, Codec::UseDefaultCompressionLevel());
+    ASSERT_NE(default_level, Codec::UseDefaultCompressionLevel());
+    ASSERT_LT(min_level, max_level);
+    ASSERT_EQ(min_level, codec->minimum_compression_level());
+    ASSERT_EQ(max_level, codec->maximum_compression_level());
+    ASSERT_GE(default_level, min_level);
+    ASSERT_LE(default_level, max_level);
+  } else {
+    ASSERT_RAISES(Invalid, Codec::MinimumCompressionLevel(type));
+    ASSERT_RAISES(Invalid, Codec::MaximumCompressionLevel(type));
+    ASSERT_RAISES(Invalid, Codec::DefaultCompressionLevel(type));
+    ASSERT_EQ(codec->minimum_compression_level(), Codec::UseDefaultCompressionLevel());
+    ASSERT_EQ(codec->maximum_compression_level(), Codec::UseDefaultCompressionLevel());
+    ASSERT_EQ(codec->default_compression_level(), Codec::UseDefaultCompressionLevel());
+  }
+}
+
 TEST_P(CodecTest, OutputBufferIsSmall) {
   auto type = GetCompression();
   if (type != Compression::SNAPPY) {
diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc
index 520e9dcd383..e9cb2470ee2 100644
--- a/cpp/src/arrow/util/compression_zlib.cc
+++ b/cpp/src/arrow/util/compression_zlib.cc
@@ -52,6 +52,9 @@ constexpr int GZIP_CODEC = 16;
 // Determine if this is libz or gzip from header.
 constexpr int DETECT_CODEC = 32;
 
+constexpr int kGZipMinCompressionLevel = 1;
+constexpr int kGZipMaxCompressionLevel = 9;
+
 int CompressionWindowBitsForFormat(GZipFormat::type format) {
   int window_bits = WINDOW_BITS;
   switch (format) {
@@ -468,6 +471,9 @@ class GZipCodec : public Codec {
   Compression::type compression_type() const override { return Compression::GZIP; }
 
   int compression_level() const override { return compression_level_; }
+  int minimum_compression_level() const override { return kGZipMinCompressionLevel; }
+  int maximum_compression_level() const override { return kGZipMaxCompressionLevel; }
+  int default_compression_level() const override { return kGZipDefaultCompressionLevel; }
 
  private:
   // zlib is stateful and the z_stream state variable must be initialized
diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc
index 382e0573b29..e15ecb4e1fe 100644
--- a/cpp/src/arrow/util/compression_zstd.cc
+++ b/cpp/src/arrow/util/compression_zstd.cc
@@ -228,6 +228,9 @@ class ZSTDCodec : public Codec {
   }
 
   Compression::type compression_type() const override { return Compression::ZSTD; }
+  int minimum_compression_level() const override { return ZSTD_minCLevel(); }
+  int maximum_compression_level() const override { return ZSTD_maxCLevel(); }
+  int default_compression_level() const override { return kZSTDDefaultCompressionLevel; }
 
   int compression_level() const override { return compression_level_; }
 
diff --git a/docs/source/python/api/memory.rst b/docs/source/python/api/memory.rst
index b7384748076..f4382ba23c9 100644
--- a/docs/source/python/api/memory.rst
+++ b/docs/source/python/api/memory.rst
@@ -50,6 +50,7 @@ Miscellaneous
 .. autosummary::
    :toctree: ../generated/
 
+   Codec
    compress
    decompress
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index bd3bdb251f3..171b3ede217 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2352,6 +2352,23 @@ cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
         @staticmethod
         CResult[unique_ptr[CCodec]] Create(CCompressionType codec)
 
+        @staticmethod
+        CResult[unique_ptr[CCodec]] CreateWithLevel" Create"(
+            CCompressionType codec,
+            int compression_level)
+
+        @staticmethod
+        c_bool SupportsCompressionLevel(CCompressionType codec)
+
+        @staticmethod
+        CResult[int] MinimumCompressionLevel(CCompressionType codec)
+
+        @staticmethod
+        CResult[int] MaximumCompressionLevel(CCompressionType codec)
+
+        @staticmethod
+        CResult[int] DefaultCompressionLevel(CCompressionType codec)
+
         @staticmethod
         c_bool IsAvailable(CCompressionType codec)
 
@@ -2362,6 +2379,7 @@ cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
                                   int64_t output_buffer_len,
                                   uint8_t* output_buffer)
         c_string name() const
+        int compression_level() const
         int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input)
 
 
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index b5da607950b..7d7cb1afb00 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1625,6 +1625,40 @@ cdef class Codec(_Weakrefable):
         Type of compression codec to initialize, valid values are: 'gzip',
         'bz2', 'brotli', 'lz4' (or 'lz4_frame'), 'lz4_raw', 'zstd' and
         'snappy'.
+    compression_level: int, None
+        Optional parameter specifying how aggressively to compress.  The
+        possible ranges and effect of this parameter depend on the specific
+        codec chosen.  Higher values compress more but typically use more
+        resources (CPU/RAM).  Some codecs support negative values.
+
+        gzip
+            The compression_level maps to the memlevel parameter of
+            deflateInit2.  Higher levels use more RAM but are faster
+            and should have higher compression ratios.
+
+        bz2
+            The compression level maps to the blockSize100k parameter of
+            the BZ2_bzCompressInit function.  Higher levels use more RAM
+            but are faster and should have higher compression ratios.
+
+        brotli
+            The compression level maps to the BROTLI_PARAM_QUALITY
+            parameter.  Higher values are slower and should have higher
+            compression ratios.
+
+        lz4/lz4_frame/lz4_raw
+            The compression level parameter is not supported and must
+            be None
+
+        zstd
+            The compression level maps to the compressionLevel parameter
+            of ZSTD_initCStream.  Negative values are supported.  Higher
+            values are slower and should have higher compression ratios.
+
+        snappy
+            The compression level parameter is not supported and must
+            be None
+
 
     Raises
     ------
@@ -1632,9 +1666,14 @@ cdef class Codec(_Weakrefable):
         If invalid compression value is passed.
     """
 
-    def __init__(self, str compression not None):
+    def __init__(self, str compression not None, compression_level=None):
         cdef CCompressionType typ = _ensure_compression(compression)
-        self.wrapped = move(GetResultValue(CCodec.Create(typ)))
+        if compression_level is not None:
+            self.wrapped = shared_ptr[CCodec](move(GetResultValue(
+                CCodec.CreateWithLevel(typ, compression_level))))
+        else:
+            self.wrapped = shared_ptr[CCodec](move(GetResultValue(
+                CCodec.Create(typ))))
 
     cdef inline CCodec* unwrap(self) nogil:
         return self.wrapped.get()
@@ -1680,10 +1719,50 @@ cdef class Codec(_Weakrefable):
         cdef CCompressionType typ = _ensure_compression(compression)
         return CCodec.IsAvailable(typ)
 
+    @staticmethod
+    def supports_compression_level(str compression not None):
+        """
+        Returns true if the compression level parameter is supported
+        for the given codec.
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return CCodec.SupportsCompressionLevel(typ)
+
+    @staticmethod
+    def default_compression_level(str compression not None):
+        """
+        Returns the compression level that Arrow will use for the codec if
+        None is specified.
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return GetResultValue(CCodec.DefaultCompressionLevel(typ))
+
+    @staticmethod
+    def minimum_compression_level(str compression not None):
+        """
+        Returns the smallest valid value for the compression level
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return GetResultValue(CCodec.MinimumCompressionLevel(typ))
+
+    @staticmethod
+    def maximum_compression_level(str compression not None):
+        """
+        Returns the largest valid value for the compression level
+        """
+        cdef CCompressionType typ = _ensure_compression(compression)
+        return GetResultValue(CCodec.MaximumCompressionLevel(typ))
+
     @property
     def name(self):
+        """Returns the name of the codec"""
         return frombytes(self.unwrap().name())
 
+    @property
+    def compression_level(self):
+        """Returns the compression level parameter of the codec"""
+        return frombytes(self.unwrap().compression_level())
+
     def compress(self, object buf, asbytes=False, memory_pool=None):
         """
         Compress data from buffer-like object.
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 93dd2eaef5e..4b22acc076f 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -96,9 +96,11 @@ cdef class IpcWriteOptions(_Weakrefable):
         If true, allow field lengths that don't fit in a signed 32-bit int.
     use_legacy_format : bool, default False
         Whether to use the pre-Arrow 0.15 IPC format.
-    compression: str or None
-        If not None, compression codec to use for record batch buffers.
-        May only be "lz4", "zstd" or None.
+    compression: str, Codec, or None
+        compression codec to use for record batch buffers.
+        If None then batch buffers will be uncompressed.
+        Must be "lz4", "zstd" or None.
+        To specify a compression_level use `pyarrow.Codec`
     use_threads: bool
         Whether to use the global CPU thread pool to parallelize any
         computational tasks like compression.
@@ -158,9 +160,14 @@ cdef class IpcWriteOptions(_Weakrefable):
     def compression(self, value):
         if value is None:
             self.c_options.codec.reset()
-        else:
+        elif isinstance(value, str):
             self.c_options.codec = shared_ptr[CCodec](GetResultValue(
                 CCodec.Create(_ensure_compression(value))).release())
+        elif isinstance(value, Codec):
+            self.c_options.codec = (<Codec>value).wrapped
+        else:
+            raise TypeError(
+                "Property `compression` must be None, str, or pyarrow.Codec")
 
     @property
     def use_threads(self):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 1959519c49d..414c7b5f26b 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -499,7 +499,7 @@ cdef class RecordBatchReader(_Weakrefable):
 
 cdef class Codec(_Weakrefable):
     cdef:
-        unique_ptr[CCodec] wrapped
+        shared_ptr[CCodec] wrapped
 
     cdef inline CCodec* unwrap(self) nogil
 
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 3de07c4305f..8fb98d4a6e7 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -24,6 +24,7 @@
 import hypothesis as h
 
 from pyarrow.util import find_free_port
+from pyarrow import Codec
 
 
 # setup hypothesis profiles
@@ -44,13 +45,17 @@
 
 
 groups = [
+    'brotli',
+    'bz2',
     'cython',
     'dataset',
     'hypothesis',
     'fastparquet',
     'gandiva',
+    'gzip',
     'hdfs',
     'large_memory',
+    'lz4',
     'memory_leak',
     'nopandas',
     'orc',
@@ -58,20 +63,26 @@
     'parquet',
     'plasma',
     's3',
+    'snappy',
     'tensorflow',
     'flight',
     'slow',
     'requires_testing_data',
+    'zstd',
 ]
 
 defaults = {
+    'brotli': Codec.is_available('brotli'),
+    'bz2': Codec.is_available('bz2'),
     'cython': False,
     'dataset': False,
     'fastparquet': False,
     'hypothesis': False,
     'gandiva': False,
+    'gzip': Codec.is_available('gzip'),
     'hdfs': False,
     'large_memory': False,
+    'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'orc': False,
     'nopandas': False,
@@ -79,10 +90,12 @@
     'parquet': False,
     'plasma': False,
     's3': False,
+    'snappy': Codec.is_available('snappy'),
     'tensorflow': False,
     'flight': False,
     'slow': False,
     'requires_testing_data': True,
+    'zstd': Codec.is_available('zstd'),
 }
 
 try:
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index 804f3738f12..5b10ed4c2f0 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -35,6 +35,8 @@
 except ImportError:
     pd = tm = None
 
+pytestmark = pytest.mark.parquet
+
 # Tests for ARROW-11497
 _test_data_simple = [
     {'items': [1, 2]},
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index dc9a3bb5274..43175b72ab7 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -36,6 +36,8 @@
 except ImportError:
     pd = tm = None
 
+pytestmark = pytest.mark.parquet
+
 
 @pytest.mark.pandas
 def test_pass_separate_metadata():
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index ec1d5256bfd..4218e83cead 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -36,6 +36,8 @@
 except ImportError:
     pd = tm = None
 
+pytestmark = pytest.mark.parquet
+
 
 @pytest.mark.pandas
 @parametrize_legacy_dataset
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index f01ac292ddf..3d0451ee33e 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -55,6 +55,9 @@ def version(request):
 
 @pytest.fixture(scope="module", params=[None, "uncompressed", "lz4", "zstd"])
 def compression(request):
+    if request.param in ['lz4', 'zstd'] and not pa.Codec.is_available(
+            request.param):
+        pytest.skip(f'{request.param} is not available')
     yield request.param
 
 
@@ -599,6 +602,9 @@ def test_v2_set_chunksize():
 
 
 @pytest.mark.pandas
+@pytest.mark.lz4
+@pytest.mark.snappy
+@pytest.mark.zstd
 def test_v2_compression_options():
     df = pd.DataFrame({'A': np.arange(1000)})
 
@@ -776,6 +782,7 @@ def test_roundtrip(table, compression):
     _check_arrow_roundtrip(table, compression=compression)
 
 
+@pytest.mark.lz4
 def test_feather_v017_experimental_compression_backward_compatibility(datadir):
     # ARROW-11163 - ensure newer pyarrow versions can read the old feather
     # files from version 0.17.0 with experimental compression support (before
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 8faddc7b9e4..0e049e21778 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -855,6 +855,7 @@ def identity(v):
     return v
 
 
+@pytest.mark.gzip
 @pytest.mark.parametrize(
     ('compression', 'buffer_size', 'compressor'),
     [
@@ -892,6 +893,7 @@ def test_open_input_file(fs, pathfn):
     assert result == data[read_from:]
 
 
+@pytest.mark.gzip
 @pytest.mark.parametrize(
     ('compression', 'buffer_size', 'decompressor'),
     [
@@ -913,6 +915,7 @@ def test_open_output_stream(fs, pathfn, compression, buffer_size,
         assert f.read(len(data)) == data
 
 
+@pytest.mark.gzip
 @pytest.mark.parametrize(
     ('compression', 'buffer_size', 'compressor', 'decompressor'),
     [
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index a085312bbc7..5119e162595 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -624,6 +624,89 @@ def test_compress_decompress(compression):
         pa.decompress(compressed_bytes, codec=compression)
 
 
+@pytest.mark.parametrize("compression", [
+    pytest.param(
+        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    ),
+    "brotli",
+    "gzip",
+    "lz4",
+    "zstd",
+    "snappy"
+])
+def test_compression_level(compression):
+    if not Codec.is_available(compression):
+        pytest.skip("{} support is not built".format(compression))
+
+    # These codecs do not support a compression level
+    no_level = ['snappy', 'lz4']
+    if compression in no_level:
+        assert not Codec.supports_compression_level(compression)
+        with pytest.raises(ValueError):
+            Codec(compression, 0)
+        with pytest.raises(ValueError):
+            Codec.minimum_compression_level(compression)
+        with pytest.raises(ValueError):
+            Codec.maximum_compression_level(compression)
+        with pytest.raises(ValueError):
+            Codec.default_compression_level(compression)
+        return
+
+    INPUT_SIZE = 10000
+    test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
+                 .astype(np.uint8)
+                 .tobytes())
+    test_buf = pa.py_buffer(test_data)
+
+    min_level = Codec.minimum_compression_level(compression)
+    max_level = Codec.maximum_compression_level(compression)
+    default_level = Codec.default_compression_level(compression)
+
+    assert min_level < max_level
+    assert default_level >= min_level
+    assert default_level <= max_level
+
+    for compression_level in range(min_level, max_level+1):
+        codec = Codec(compression, compression_level)
+        compressed_buf = codec.compress(test_buf)
+        compressed_bytes = codec.compress(test_data, asbytes=True)
+        assert isinstance(compressed_bytes, bytes)
+        decompressed_buf = codec.decompress(compressed_buf, INPUT_SIZE)
+        decompressed_bytes = codec.decompress(compressed_bytes, INPUT_SIZE,
+                                              asbytes=True)
+
+        assert isinstance(decompressed_bytes, bytes)
+
+        assert decompressed_buf.equals(test_buf)
+        assert decompressed_bytes == test_data
+
+        with pytest.raises(ValueError):
+            codec.decompress(compressed_bytes)
+
+    # The ability to set a seed this way is not present on older versions of
+    # numpy (currently in our python 3.6 CI build).  Some inputs might just
+    # happen to compress the same between the two levels so using seeded
+    # random numbers is neccesary to help get more reliable results
+    #
+    # The goal of this part is to ensure the compression_level is being
+    # passed down to the C++ layer, not to verify the compression algs
+    # themselves
+    if not hasattr(np.random, 'default_rng'):
+        pytest.skip('Requires newer version of numpy')
+    rng = np.random.default_rng(seed=42)
+    values = rng.integers(0, 100, 1000)
+    arr = pa.array(values)
+    hard_to_compress_buffer = arr.buffers()[1]
+
+    weak_codec = Codec(compression, min_level)
+    weakly_compressed_buf = weak_codec.compress(hard_to_compress_buffer)
+
+    strong_codec = Codec(compression, max_level)
+    strongly_compressed_buf = strong_codec.compress(hard_to_compress_buffer)
+
+    assert len(weakly_compressed_buf) > len(strongly_compressed_buf)
+
+
 def test_buffer_memoryview_is_immutable():
     val = b'some data'
 
@@ -1183,6 +1266,7 @@ def check_compressed_input(data, fn, compression):
         assert buf.to_pybytes() == data
 
 
+@pytest.mark.gzip
 def test_compressed_input_gzip(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     fn = str(tmpdir / "compressed_input_test.gz")
@@ -1209,6 +1293,7 @@ def check_compressed_concatenated(data, fn, compression):
         assert got == data
 
 
+@pytest.mark.gzip
 def test_compressed_concatenated_gzip(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     fn = str(tmpdir / "compressed_input_test2.gz")
@@ -1219,6 +1304,7 @@ def test_compressed_concatenated_gzip(tmpdir):
     check_compressed_concatenated(data, fn, "gzip")
 
 
+@pytest.mark.gzip
 def test_compressed_input_invalid():
     data = b"foo" * 10
     raw = pa.BufferReader(data)
@@ -1246,6 +1332,7 @@ def make_compressed_output(data, fn, compression):
         f.write(raw.getvalue())
 
 
+@pytest.mark.gzip
 def test_compressed_output_gzip(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     fn = str(tmpdir / "compressed_output_test.gz")
@@ -1433,6 +1520,7 @@ def test_transcoding_decoding_error(src_encoding, dest_encoding):
 # ----------------------------------------------------------------------
 # High-level API
 
+@pytest.mark.gzip
 def test_input_stream_buffer():
     data = b"some test data\n" * 10 + b"eof\n"
     for arg in [pa.py_buffer(data), memoryview(data)]:
@@ -1478,6 +1566,7 @@ def test_input_stream_file_path(tmpdir):
     assert stream.read() == data
 
 
+@pytest.mark.gzip
 def test_input_stream_file_path_compressed(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     gz_data = gzip.compress(data)
@@ -1524,6 +1613,7 @@ def test_input_stream_file_path_buffered(tmpdir):
         pa.input_stream(file_path, buffer_size='million')
 
 
+@pytest.mark.gzip
 def test_input_stream_file_path_compressed_and_buffered(tmpdir):
     data = b"some test data\n" * 100 + b"eof\n"
     gz_data = gzip.compress(data)
@@ -1539,6 +1629,7 @@ def test_input_stream_file_path_compressed_and_buffered(tmpdir):
     assert stream.read() == data
 
 
+@pytest.mark.gzip
 def test_input_stream_python_file(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     bio = BytesIO(data)
@@ -1562,6 +1653,7 @@ def test_input_stream_python_file(tmpdir):
         assert stream.read() == data
 
 
+@pytest.mark.gzip
 def test_input_stream_native_file():
     data = b"some test data\n" * 10 + b"eof\n"
     gz_data = gzip.compress(data)
@@ -1640,6 +1732,7 @@ def check_data(file_path, data):
     check_data(pathlib.Path(str(file_path)), data)
 
 
+@pytest.mark.gzip
 def test_output_stream_file_path_compressed(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
     file_path = tmpdir / 'output_stream.gz'
@@ -1690,6 +1783,7 @@ def check_data(file_path, data, **kwargs):
     assert result == data
 
 
+@pytest.mark.gzip
 def test_output_stream_file_path_compressed_and_buffered(tmpdir):
     data = b"some test data\n" * 100 + b"eof\n"
     file_path = tmpdir / 'output_stream_compressed_and_buffered.gz'
@@ -1729,6 +1823,7 @@ def check_data(file_path, data, **kwargs):
     assert check_data(file_path, data, buffer_size=1024) == data
 
 
+@pytest.mark.gzip
 def test_output_stream_python_file(tmpdir):
     data = b"some test data\n" * 10 + b"eof\n"
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index a15960bce74..87944bcc066 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -329,6 +329,37 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
         reader.read_next_batch()
 
 
+@pytest.mark.zstd
+def test_compression_roundtrip():
+    sink = io.BytesIO()
+    values = np.random.randint(0, 10, 10000)
+    table = pa.Table.from_arrays([values], names=["values"])
+
+    options = pa.ipc.IpcWriteOptions(compression='zstd')
+    with pa.ipc.RecordBatchFileWriter(
+            sink, table.schema, options=options) as writer:
+        writer.write_table(table)
+    len1 = len(sink.getvalue())
+
+    sink2 = io.BytesIO()
+    codec = pa.Codec('zstd', compression_level=5)
+    options = pa.ipc.IpcWriteOptions(compression=codec)
+    with pa.ipc.RecordBatchFileWriter(
+            sink2, table.schema, options=options) as writer:
+        writer.write_table(table)
+    len2 = len(sink2.getvalue())
+
+    # In theory len2 should be less than len1 but for this test we just want
+    # to ensure compression_level is being correctly passed down to the C++
+    # layer so we don't really care if it makes it worse or better
+    assert len2 != len1
+
+    t1 = pa.ipc.open_file(sink).read_all()
+    t2 = pa.ipc.open_file(sink2).read_all()
+
+    assert t1 == t2
+
+
 def test_write_options():
     options = pa.ipc.IpcWriteOptions()
     assert options.allow_64bit is False
@@ -349,28 +380,33 @@ def test_write_options():
 
     assert options.compression is None
     for value in ['lz4', 'zstd']:
-        options.compression = value
-        assert options.compression == value
-        options.compression = value.upper()
-        assert options.compression == value
+        if pa.Codec.is_available(value):
+            options.compression = value
+            assert options.compression == value
+            options.compression = value.upper()
+            assert options.compression == value
     options.compression = None
     assert options.compression is None
 
+    with pytest.raises(TypeError):
+        options.compression = 0
+
     assert options.use_threads is True
     options.use_threads = False
     assert options.use_threads is False
 
-    options = pa.ipc.IpcWriteOptions(
-        metadata_version=pa.ipc.MetadataVersion.V4,
-        allow_64bit=True,
-        use_legacy_format=True,
-        compression='lz4',
-        use_threads=False)
-    assert options.metadata_version == pa.ipc.MetadataVersion.V4
-    assert options.allow_64bit is True
-    assert options.use_legacy_format is True
-    assert options.compression == 'lz4'
-    assert options.use_threads is False
+    if pa.Codec.is_available('lz4'):
+        options = pa.ipc.IpcWriteOptions(
+            metadata_version=pa.ipc.MetadataVersion.V4,
+            allow_64bit=True,
+            use_legacy_format=True,
+            compression='lz4',
+            use_threads=False)
+        assert options.metadata_version == pa.ipc.MetadataVersion.V4
+        assert options.allow_64bit is True
+        assert options.use_legacy_format is True
+        assert options.compression == 'lz4'
+        assert options.use_threads is False
 
 
 def test_write_options_legacy_exclusive(stream_fixture):
@@ -564,6 +600,7 @@ def test_message_serialize_read_message(example_messages):
         pa.ipc.read_message(reader)
 
 
+@pytest.mark.gzip
 def test_message_read_from_compressed(example_messages):
     # Part of ARROW-5910
     _, messages = example_messages
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 07493144919..aee46bc9369 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -108,6 +108,7 @@ def test_tensor_ipc_roundtrip(tmpdir):
     assert result.equals(tensor)
 
 
+@pytest.mark.gzip
 def test_tensor_ipc_read_from_compressed(tempdir):
     # ARROW-5910
     data = np.random.randn(10, 4)

From c848f12122014aba9958a3910e2324661c3c2d7a Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 19 Jul 2021 13:45:07 -0400
Subject: [PATCH 601/719] ARROW-13136: [C++] Add coalesce function

Closes #10608 from lidavidm/arrow-13136

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .../arrow/compute/kernels/codegen_internal.cc |   4 +-
 .../arrow/compute/kernels/scalar_if_else.cc   | 273 ++++++++++++++++++
 .../kernels/scalar_if_else_benchmark.cc       |  61 ++++
 .../compute/kernels/scalar_if_else_test.cc    | 187 ++++++++++++
 cpp/src/arrow/util/bit_block_counter.h        |  13 +
 docs/source/cpp/compute.rst                   |  43 +--
 docs/source/python/api/compute.rst            |   1 +
 7 files changed, 562 insertions(+), 20 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index 673db088eae..bab8e7000cd 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -185,7 +185,9 @@ const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
 // work above
 
 Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return descrs[0];
+  ValueDescr result = descrs.front();
+  result.shape = GetBroadcastShape(descrs);
+  return result;
 }
 
 void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 861fbf13e86..ff308a673a3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -1166,6 +1166,22 @@ void CopyValues(const Datum& in_values, const int64_t in_offset, const int64_t l
   }
 }
 
+// Specialized helper to copy a single value from a source array. Allows avoiding
+// repeatedly calling MayHaveNulls and Buffer::data() which have internal checks that
+// add up when called in a loop.
+template <typename Type>
+void CopyOneArrayValue(const DataType& type, const uint8_t* in_valid,
+                       const uint8_t* in_values, const int64_t in_offset,
+                       uint8_t* out_valid, uint8_t* out_values,
+                       const int64_t out_offset) {
+  if (out_valid) {
+    BitUtil::SetBitTo(out_valid, out_offset,
+                      !in_valid || BitUtil::GetBit(in_valid, in_offset));
+  }
+  CopyFixedWidth<Type>::CopyArray(type, in_values, in_offset, /*length=*/1, out_values,
+                                  out_offset);
+}
+
 struct CaseWhenFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
@@ -1375,6 +1391,221 @@ struct CaseWhenFunctor<NullType> {
   }
 };
 
+struct CoalesceFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    RETURN_NOT_OK(CheckArity(*values));
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    EnsureDictionaryDecoded(values);
+    if (auto type = CommonNumeric(*values)) {
+      ReplaceTypes(type, values);
+    }
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// Implement a 'coalesce' (SQL) operator for any number of scalar inputs
+Status ExecScalarCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  for (const auto& datum : batch.values) {
+    if (datum.scalar()->is_valid) {
+      *out = datum;
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+// Helper: copy from a source datum into all null slots of the output
+template <typename Type>
+void CopyValuesAllValid(Datum source, uint8_t* out_valid, uint8_t* out_values,
+                        const int64_t out_offset, const int64_t length) {
+  BitBlockCounter counter(out_valid, out_offset, length);
+  int64_t offset = 0;
+  while (offset < length) {
+    const auto block = counter.NextWord();
+    if (block.NoneSet()) {
+      CopyValues<Type>(source, offset, block.length, out_valid, out_values,
+                       out_offset + offset);
+    } else if (!block.AllSet()) {
+      for (int64_t j = 0; j < block.length; ++j) {
+        if (!BitUtil::GetBit(out_valid, out_offset + offset + j)) {
+          CopyValues<Type>(source, offset + j, 1, out_valid, out_values,
+                           out_offset + offset + j);
+        }
+      }
+    }
+    offset += block.length;
+  }
+}
+
+// Helper: zero the values buffer of the output wherever the slot is null
+void InitializeNullSlots(const DataType& type, uint8_t* out_valid, uint8_t* out_values,
+                         const int64_t out_offset, const int64_t length) {
+  BitBlockCounter counter(out_valid, out_offset, length);
+  int64_t offset = 0;
+  auto bit_width = checked_cast<const FixedWidthType&>(type).bit_width();
+  auto byte_width = BitUtil::BytesForBits(bit_width);
+  while (offset < length) {
+    const auto block = counter.NextWord();
+    if (block.NoneSet()) {
+      if (bit_width == 1) {
+        BitUtil::SetBitsTo(out_values, out_offset + offset, block.length, false);
+      } else {
+        std::memset(out_values + (out_offset + offset) * byte_width, 0x00,
+                    byte_width * block.length);
+      }
+    } else if (!block.AllSet()) {
+      for (int64_t j = 0; j < block.length; ++j) {
+        if (BitUtil::GetBit(out_valid, out_offset + offset + j)) continue;
+        if (bit_width == 1) {
+          BitUtil::ClearBit(out_values, out_offset + offset + j);
+        } else {
+          std::memset(out_values + (out_offset + offset + j) * byte_width, 0x00,
+                      byte_width);
+        }
+      }
+    }
+    offset += block.length;
+  }
+}
+
+// Implement 'coalesce' for any mix of scalar/array arguments for any fixed-width type
+template <typename Type>
+Status ExecArrayCoalesce(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  // Use output validity buffer as mask to decide what values to copy
+  uint8_t* out_valid = output->buffers[0]->mutable_data();
+  // Clear output buffer - no values are set initially
+  BitUtil::SetBitsTo(out_valid, out_offset, batch.length, false);
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+
+  for (const auto& datum : batch.values) {
+    if ((datum.is_scalar() && datum.scalar()->is_valid) ||
+        (datum.is_array() && !datum.array()->MayHaveNulls())) {
+      // Valid scalar, or all-valid array
+      CopyValuesAllValid<Type>(datum, out_valid, out_values, out_offset, batch.length);
+      break;
+    } else if (datum.is_array()) {
+      // Array with nulls
+      const ArrayData& arr = *datum.array();
+      const DataType& type = *datum.type();
+      const uint8_t* in_valid = arr.buffers[0]->data();
+      const uint8_t* in_values = arr.buffers[1]->data();
+      BinaryBitBlockCounter counter(in_valid, arr.offset, out_valid, out_offset,
+                                    batch.length);
+      int64_t offset = 0;
+      while (offset < batch.length) {
+        const auto block = counter.NextAndNotWord();
+        if (block.AllSet()) {
+          CopyValues<Type>(datum, offset, block.length, out_valid, out_values,
+                           out_offset + offset);
+        } else if (block.popcount) {
+          for (int64_t j = 0; j < block.length; ++j) {
+            if (!BitUtil::GetBit(out_valid, out_offset + offset + j) &&
+                BitUtil::GetBit(in_valid, arr.offset + offset + j)) {
+              // This version lets us avoid calling MayHaveNulls() on every iteration
+              // (which does an atomic load and can add up)
+              CopyOneArrayValue<Type>(type, in_valid, in_values, arr.offset + offset + j,
+                                      out_valid, out_values, out_offset + offset + j);
+            }
+          }
+        }
+        offset += block.length;
+      }
+    }
+  }
+
+  // Initialize any remaining null slots (uninitialized memory)
+  InitializeNullSlots(*out->type(), out_valid, out_values, out_offset, batch.length);
+  return Status::OK();
+}
+
+template <typename Type, typename Enable = void>
+struct CoalesceFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArrayCoalesce<Type>(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+};
+
+template <>
+struct CoalesceFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // Special case: grab any leading non-null scalar or array arguments
+    for (const auto& datum : batch.values) {
+      if (datum.is_scalar()) {
+        if (!datum.scalar()->is_valid) continue;
+        ARROW_ASSIGN_OR_RAISE(
+            *out, MakeArrayFromScalar(*datum.scalar(), batch.length, ctx->memory_pool()));
+        return Status::OK();
+      } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
+        *out = datum;
+        return Status::OK();
+      }
+      break;
+    }
+    ArrayData* output = out->mutable_array();
+    BuilderType builder(batch[0].type(), ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    for (int64_t i = 0; i < batch.length; i++) {
+      bool set = false;
+      for (const auto& datum : batch.values) {
+        if (datum.is_scalar()) {
+          if (datum.scalar()->is_valid) {
+            RETURN_NOT_OK(builder.Append(UnboxScalar<Type>::Unbox(*datum.scalar())));
+            set = true;
+            break;
+          }
+        } else {
+          const ArrayData& source = *datum.array();
+          if (!source.MayHaveNulls() ||
+              BitUtil::GetBit(source.buffers[0]->data(), source.offset + i)) {
+            const uint8_t* data = source.buffers[2]->data();
+            const offset_type* offsets = source.GetValues<offset_type>(1);
+            const offset_type offset0 = offsets[i];
+            const offset_type offset1 = offsets[i + 1];
+            RETURN_NOT_OK(builder.Append(data + offset0, offset1 - offset0));
+            set = true;
+            break;
+          }
+        }
+      }
+      if (!set) RETURN_NOT_OK(builder.AppendNull());
+    }
+    ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish());
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = batch[0].type();
+    return Status::OK();
+  }
+};
+
 Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
   ValueDescr result = descrs.back();
   result.shape = GetBroadcastShape(descrs);
@@ -1402,6 +1633,25 @@ void AddPrimitiveCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar
   }
 }
 
+void AddCoalesceKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
+                       detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, OutputType(FirstType),
+                                            /*is_varargs=*/true),
+                      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                                 const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<CoalesceFunctor>(*type);
+    AddCoalesceKernel(scalar_function, type, std::move(exec));
+  }
+}
+
 const FunctionDoc if_else_doc{"Choose values based on a condition",
                               ("`cond` must be a Boolean scalar/ array. \n`left` or "
                                "`right` must be of the same type scalar/ array.\n"
@@ -1422,6 +1672,13 @@ const FunctionDoc case_when_doc{
      "Essentially, this implements a switch-case or if-else, if-else... "
      "statement."),
     {"cond", "*cases"}};
+
+const FunctionDoc coalesce_doc{
+    "Select the first non-null value in each slot",
+    ("Each row of the output will be the value from the first corresponding input "
+     "for which the value is not null. If all inputs are null in a row, the output "
+     "will be null."),
+    {"*values"}};
 }  // namespace
 
 void RegisterScalarIfElse(FunctionRegistry* registry) {
@@ -1450,6 +1707,22 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
     AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor<Decimal256Type>::Exec);
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+  {
+    auto func = std::make_shared<CoalesceFunction>(
+        "coalesce", Arity::VarArgs(/*min_args=*/1), &coalesce_doc);
+    AddPrimitiveCoalesceKernels(func, NumericTypes());
+    AddPrimitiveCoalesceKernels(func, TemporalTypes());
+    AddPrimitiveCoalesceKernels(
+        func, {boolean(), null(), day_time_interval(), month_interval()});
+    AddCoalesceKernel(func, Type::FIXED_SIZE_BINARY,
+                      CoalesceFunctor<FixedSizeBinaryType>::Exec);
+    AddCoalesceKernel(func, Type::DECIMAL128, CoalesceFunctor<Decimal128Type>::Exec);
+    AddCoalesceKernel(func, Type::DECIMAL256, CoalesceFunctor<Decimal256Type>::Exec);
+    for (const auto& ty : BaseBinaryTypes()) {
+      AddCoalesceKernel(func, ty, GenerateTypeAgnosticVarBinaryBase<CoalesceFunctor>(ty));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index 3b8df47162d..a63492987eb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -227,6 +227,61 @@ static void CaseWhenBench64Contiguous(benchmark::State& state) {
   return CaseWhenBenchContiguous<UInt64Type>(state);
 }
 
+template <typename Type>
+static void CoalesceBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  std::vector<Datum> arguments;
+  for (int i = 0; i < 4; i++) {
+    arguments.emplace_back(
+        rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
+  }
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("coalesce", arguments));
+  }
+
+  state.SetBytesProcessed(state.iterations() * arguments.size() * (len - offset) *
+                          sizeof(CType));
+}
+
+template <typename Type>
+static void CoalesceNonNullBench(benchmark::State& state) {
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  std::vector<Datum> arguments;
+  arguments.emplace_back(
+      rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
+  arguments.emplace_back(rand.ArrayOf(type, len, /*null_probability=*/0)->Slice(offset));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("coalesce", arguments));
+  }
+
+  state.SetBytesProcessed(state.iterations() * arguments.size() * (len - offset) *
+                          sizeof(CType));
+}
+
+static void CoalesceBench64(benchmark::State& state) {
+  return CoalesceBench<Int64Type>(state);
+}
+
+static void CoalesceNonNullBench64(benchmark::State& state) {
+  return CoalesceBench<Int64Type>(state);
+}
+
 BENCHMARK(IfElseBench32)->Args({elems, 0});
 BENCHMARK(IfElseBench64)->Args({elems, 0});
 
@@ -251,5 +306,11 @@ BENCHMARK(CaseWhenBench64)->Args({elems, 99});
 BENCHMARK(CaseWhenBench64Contiguous)->Args({elems, 0});
 BENCHMARK(CaseWhenBench64Contiguous)->Args({elems, 99});
 
+BENCHMARK(CoalesceBench64)->Args({elems, 0});
+BENCHMARK(CoalesceBench64)->Args({elems, 99});
+
+BENCHMARK(CoalesceNonNullBench64)->Args({elems, 0});
+BENCHMARK(CoalesceNonNullBench64)->Args({elems, 99});
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 8ff86f3ec29..48b0cdb457d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -853,5 +853,192 @@ TEST(TestCaseWhen, DispatchBest) {
                                            ArrayFromJSON(int64(), "[]"),
                                            ArrayFromJSON(utf8(), "[]")}));
 }
+
+template <typename Type>
+class TestCoalesceNumeric : public ::testing::Test {};
+template <typename Type>
+class TestCoalesceBinary : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestCoalesceNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestCoalesceBinary, BinaryTypes);
+
+TYPED_TEST(TestCoalesceNumeric, FixedSize) {
+  auto type = default_type_instance<TypeParam>();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "20");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[null, 10, 11, 12]");
+  auto values2 = ArrayFromJSON(type, "[13, 14, 15, 16]");
+  auto values3 = ArrayFromJSON(type, "[17, 18, 19, null]");
+  // N.B. all-scalar cases are checked in CheckScalar
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, "[20, 20, 20, 20]"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1}, ArrayFromJSON(type, "[20, 10, 11, 12]"));
+  CheckScalar("coalesce", {values1, values2}, ArrayFromJSON(type, "[13, 10, 11, 12]"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, "[13, 10, 11, 12]"));
+  CheckScalar("coalesce", {scalar1, values1}, ArrayFromJSON(type, "[20, 20, 20, 20]"));
+}
+
+TYPED_TEST(TestCoalesceBinary, Basics) {
+  auto type = default_type_instance<TypeParam>();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, R"("a")");
+  auto values_null = ArrayFromJSON(type, R"([null, null, null, null])");
+  auto values1 = ArrayFromJSON(type, R"([null, "bc", "def", "ghij"])");
+  auto values2 = ArrayFromJSON(type, R"(["klmno", "p", "qr", "stu"])");
+  auto values3 = ArrayFromJSON(type, R"(["vwxy", "zabc", "d", null])");
+  // N.B. all-scalar cases are checked in CheckScalar
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, R"(["a", "a", "a", "a"])"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, R"(["a", "bc", "def", "ghij"])"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, R"(["klmno", "bc", "def", "ghij"])"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, R"(["klmno", "bc", "def", "ghij"])"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, R"(["a", "a", "a", "a"])"));
+}
+
+TEST(TestCoalesce, Null) {
+  auto type = null();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar_null}, values_null);
+}
+
+TEST(TestCoalesce, Boolean) {
+  auto type = boolean();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "false");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[null, true, false, true]");
+  auto values2 = ArrayFromJSON(type, "[true, false, true, false]");
+  auto values3 = ArrayFromJSON(type, "[false, true, false, null]");
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, "[false, false, false, false]"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, "[false, true, false, true]"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, "[true, true, false, true]"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, "[true, true, false, true]"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, "[false, false, false, false]"));
+}
+
+TEST(TestCoalesce, DayTimeInterval) {
+  auto type = day_time_interval();
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, "[1, 2]");
+  auto values_null = ArrayFromJSON(type, "[null, null, null, null]");
+  auto values1 = ArrayFromJSON(type, "[null, [3, 4], [5, 6], [7, 8]]");
+  auto values2 = ArrayFromJSON(type, "[[9, 10], [11, 12], [13, 14], [15, 16]]");
+  auto values3 = ArrayFromJSON(type, "[[17, 18], [19, 20], [21, 22], null]");
+  // N.B. all-scalar cases are checked in CheckScalar
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, "[[1, 2], [1, 2], [1, 2], [1, 2]]"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, "[[1, 2], [3, 4], [5, 6], [7, 8]]"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, "[[9, 10], [3, 4], [5, 6], [7, 8]]"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, "[[9, 10], [3, 4], [5, 6], [7, 8]]"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, "[[1, 2], [1, 2], [1, 2], [1, 2]]"));
+}
+
+TEST(TestCoalesce, Decimal) {
+  for (const auto& type :
+       std::vector<std::shared_ptr<DataType>>{decimal128(3, 2), decimal256(3, 2)}) {
+    auto scalar_null = ScalarFromJSON(type, "null");
+    auto scalar1 = ScalarFromJSON(type, R"("1.23")");
+    auto values_null = ArrayFromJSON(type, R"([null, null, null, null])");
+    auto values1 = ArrayFromJSON(type, R"([null, "4.56", "7.89", "1.34"])");
+    auto values2 = ArrayFromJSON(type, R"(["1.45", "2.34", "3.45", "4.56"])");
+    auto values3 = ArrayFromJSON(type, R"(["5.67", "6.78", "7.91", null])");
+    CheckScalar("coalesce", {values_null}, values_null);
+    CheckScalar("coalesce", {values_null, scalar1},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "1.23", "1.23"])"));
+    CheckScalar("coalesce", {values_null, values1}, values1);
+    CheckScalar("coalesce", {values_null, values2}, values2);
+    CheckScalar("coalesce", {values1, values_null}, values1);
+    CheckScalar("coalesce", {values2, values_null}, values2);
+    CheckScalar("coalesce", {scalar_null, values1}, values1);
+    CheckScalar("coalesce", {values1, scalar_null}, values1);
+    CheckScalar("coalesce", {values2, values1, values_null}, values2);
+    CheckScalar("coalesce", {values1, scalar1},
+                ArrayFromJSON(type, R"(["1.23", "4.56", "7.89", "1.34"])"));
+    CheckScalar("coalesce", {values1, values2},
+                ArrayFromJSON(type, R"(["1.45", "4.56", "7.89", "1.34"])"));
+    CheckScalar("coalesce", {values1, values2, values3},
+                ArrayFromJSON(type, R"(["1.45", "4.56", "7.89", "1.34"])"));
+    CheckScalar("coalesce", {scalar1, values1},
+                ArrayFromJSON(type, R"(["1.23", "1.23", "1.23", "1.23"])"));
+  }
+}
+
+TEST(TestCoalesce, FixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  auto scalar1 = ScalarFromJSON(type, R"("abc")");
+  auto values_null = ArrayFromJSON(type, R"([null, null, null, null])");
+  auto values1 = ArrayFromJSON(type, R"([null, "def", "ghi", "jkl"])");
+  auto values2 = ArrayFromJSON(type, R"(["mno", "pqr", "stu", "vwx"])");
+  auto values3 = ArrayFromJSON(type, R"(["yza", "bcd", "efg", null])");
+  CheckScalar("coalesce", {values_null}, values_null);
+  CheckScalar("coalesce", {values_null, scalar1},
+              ArrayFromJSON(type, R"(["abc", "abc", "abc", "abc"])"));
+  CheckScalar("coalesce", {values_null, values1}, values1);
+  CheckScalar("coalesce", {values_null, values2}, values2);
+  CheckScalar("coalesce", {values1, values_null}, values1);
+  CheckScalar("coalesce", {values2, values_null}, values2);
+  CheckScalar("coalesce", {scalar_null, values1}, values1);
+  CheckScalar("coalesce", {values1, scalar_null}, values1);
+  CheckScalar("coalesce", {values2, values1, values_null}, values2);
+  CheckScalar("coalesce", {values1, scalar1},
+              ArrayFromJSON(type, R"(["abc", "def", "ghi", "jkl"])"));
+  CheckScalar("coalesce", {values1, values2},
+              ArrayFromJSON(type, R"(["mno", "def", "ghi", "jkl"])"));
+  CheckScalar("coalesce", {values1, values2, values3},
+              ArrayFromJSON(type, R"(["mno", "def", "ghi", "jkl"])"));
+  CheckScalar("coalesce", {scalar1, values1},
+              ArrayFromJSON(type, R"(["abc", "abc", "abc", "abc"])"));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h
index 803b825e1b2..63036af52a4 100644
--- a/cpp/src/arrow/util/bit_block_counter.h
+++ b/cpp/src/arrow/util/bit_block_counter.h
@@ -57,6 +57,16 @@ struct BitBlockAnd<bool> {
   static bool Call(bool left, bool right) { return left && right; }
 };
 
+template <typename T>
+struct BitBlockAndNot {
+  static T Call(T left, T right) { return left & ~right; }
+};
+
+template <>
+struct BitBlockAndNot<bool> {
+  static bool Call(bool left, bool right) { return left && !right; }
+};
+
 template <typename T>
 struct BitBlockOr {
   static T Call(T left, T right) { return left | right; }
@@ -266,6 +276,9 @@ class ARROW_EXPORT BinaryBitBlockCounter {
   /// blocks in subsequent invocations.
   BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); }
 
+  /// \brief Computes "x & ~y" block for each available run of bits.
+  BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); }
+
   /// \brief Computes "x | y" block for each available run of bits.
   BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); }
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 12d86f9969d..4bcb568fdc2 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -885,25 +885,27 @@ Structural transforms
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
 | Function name            | Arity      | Input types                                       | Output type         | Notes   |
 +==========================+============+===================================================+=====================+=========+
-| case_when                | Varargs    | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type          | \(1)   |
+| case_when                | Varargs    | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type          | \(1)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like     | Input type          | \(2)    |
+| coalesce                 | Varargs    | Any                                               | Input type          | \(2)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal                  | Input type          | \(3)    |
+| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like     | Input type          | \(3)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_finite                | Unary      | Float, Double                                     | Boolean             | \(4)    |
+| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal                  | Input type          | \(4)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_inf                   | Unary      | Float, Double                                     | Boolean             | \(5)    |
+| is_finite                | Unary      | Float, Double                                     | Boolean             | \(5)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                     | Boolean             | \(6)    |
+| is_inf                   | Unary      | Float, Double                                     | Boolean             | \(6)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                               | Boolean             | \(7)    |
+| is_nan                   | Unary      | Float, Double                                     | Boolean             | \(7)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                               | Boolean             | \(8)    |
+| is_null                  | Unary      | Any                                               | Boolean             | \(8)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                         | Int32 or Int64      | \(9)    |
+| is_valid                 | Unary      | Any                                               | Boolean             | \(9)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| project                  | Varargs    | Any                                               | Struct              | \(10)   |
+| list_value_length        | Unary      | List-like                                         | Int32 or Int64      | \(10)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| make_struct              | Varargs    | Any                                               | Struct              | \(11)   |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
 
 * \(1) This function acts like a SQL 'case when' statement or switch-case. The
@@ -915,11 +917,14 @@ Structural transforms
   the first value datum for which the corresponding Boolean is true, or the
   corresponding value from the 'default' input, or null otherwise.
 
-* \(2) First input must be an array, second input a scalar of the same type.
+* \(2) Each row of the output will be the corresponding value of the first
+  input which is non-null for that row, otherwise null.
+
+* \(3) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(3) First input must be a Boolean scalar or array. Second and third inputs
+* \(4) First input must be a Boolean scalar or array. Second and third inputs
   could be scalars or arrays and must be of the same type. Output is an array
   (or scalar if all inputs are scalar) of the same type as the second/ third
   input. If the nulls present on the first input, they will be promoted to the
@@ -927,21 +932,21 @@ Structural transforms
 
   Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
 
-* \(4) Output is true iff the corresponding input element is finite (not Infinity,
+* \(5) Output is true iff the corresponding input element is finite (not Infinity,
   -Infinity, or NaN).
 
-* \(5) Output is true iff the corresponding input element is Infinity/-Infinity.
+* \(6) Output is true iff the corresponding input element is Infinity/-Infinity.
 
-* \(6) Output is true iff the corresponding input element is NaN.
+* \(7) Output is true iff the corresponding input element is NaN.
 
-* \(7) Output is true iff the corresponding input element is null.
+* \(8) Output is true iff the corresponding input element is null.
 
-* \(8) Output is true iff the corresponding input element is non-null.
+* \(9) Output is true iff the corresponding input element is non-null.
 
-* \(9) Each output element is the length of the corresponding input element
+* \(10) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(10) The output struct's field types are the types of its arguments. The
+* \(11) The output struct's field types are the types of its arguments. The
   field names are specified using an instance of :struct:`MakeStructOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index f12b50e7723..2fd0bad07e7 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -350,6 +350,7 @@ Structural Transforms
 
    binary_length
    case_when
+   coalesce
    fill_null
    if_else
    is_finite

From 5832c73a7065f59b2a042d544f55c7c27c57c8a4 Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Mon, 19 Jul 2021 15:12:10 -0400
Subject: [PATCH 602/719] ARROW-13381: [C++] ArrayFromJSON doesn't work for
 float value dictionary type

This PR adds float/double cases for dictionary converter.

Closes #10746 from edponce/ARROW-13381-Flight-ArrayFromJSON-doesnt-work-for-flo

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/ipc/json_simple.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index eb7a4f3a790..117b82df30d 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -785,6 +785,8 @@ Status GetDictConverter(const std::shared_ptr<DataType>& type,
     PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
     PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
     PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
+    PARAM_CONVERTER_CASE(Type::FLOAT, FloatConverter, FloatType)
+    PARAM_CONVERTER_CASE(Type::DOUBLE, FloatConverter, DoubleType)
     PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
     PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
     PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)

From ac050c6c583535c2f60063d8b341568b52170485 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 20 Jul 2021 00:10:43 +0200
Subject: [PATCH 603/719] ARROW-10658: [Python][Packaging] Wheel builds for
 Apple Silicon
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The following compiles the 3.9 wheel on apple silicon though I'm getting a segfault on import despite that all binaries have the right arch and linkage/bundling seems correct. I wasn't able to produce a coredump yet.

Closes #10659 from kszucs/m1

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Ursa <ursa@ursa.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .../python-wheel-manylinux-201x.dockerfile    |   4 +-
 .../python-wheel-windows-vs2017.dockerfile    |  15 +-
 ci/scripts/install_python.sh                  |  66 ++++++++
 ci/scripts/python_wheel_macos_build.sh        |  54 +++++--
 ci/scripts/python_wheel_macos_test.sh         |   7 +-
 ci/vcpkg/arm64-osx-static-debug.cmake         |  25 +++
 ci/vcpkg/arm64-osx-static-release.cmake       |  25 +++
 ci/vcpkg/universal2-osx-static-debug.cmake    |  25 +++
 ci/vcpkg/universal2-osx-static-release.cmake  |  25 +++
 cpp/cmake_modules/SetupCxxFlags.cmake         |  40 ++---
 cpp/cmake_modules/ThirdpartyToolchain.cmake   |   4 +-
 cpp/thirdparty/versions.txt                   |   2 +-
 dev/archery/archery/docker/cli.py             |   3 +-
 dev/archery/archery/docker/core.py            |   7 +-
 dev/archery/setup.py                          |   2 +
 dev/tasks/macros.jinja                        |   6 +-
 .../{github.osx.yml => github.osx.amd64.yml}  |  58 +++----
 dev/tasks/python-wheels/github.osx.arm64.yml  | 153 ++++++++++++++++++
 dev/tasks/tasks.yml                           |  30 +++-
 docker-compose.yml                            |  10 ++
 python/requirements-wheel-build.txt           |  11 +-
 python/requirements-wheel-test.txt            |  25 ++-
 22 files changed, 503 insertions(+), 94 deletions(-)
 create mode 100755 ci/scripts/install_python.sh
 create mode 100644 ci/vcpkg/arm64-osx-static-debug.cmake
 create mode 100644 ci/vcpkg/arm64-osx-static-release.cmake
 create mode 100644 ci/vcpkg/universal2-osx-static-debug.cmake
 create mode 100644 ci/vcpkg/universal2-osx-static-release.cmake
 rename dev/tasks/python-wheels/{github.osx.yml => github.osx.amd64.yml} (67%)
 create mode 100644 dev/tasks/python-wheels/github.osx.arm64.yml

diff --git a/ci/docker/python-wheel-manylinux-201x.dockerfile b/ci/docker/python-wheel-manylinux-201x.dockerfile
index 19246a46764..ae1b0a7767c 100644
--- a/ci/docker/python-wheel-manylinux-201x.dockerfile
+++ b/ci/docker/python-wheel-manylinux-201x.dockerfile
@@ -58,7 +58,9 @@ RUN git clone https://github.com/microsoft/vcpkg /opt/vcpkg && \
     ln -s /opt/vcpkg/vcpkg /usr/bin/vcpkg
 
 # Patch ports files as needed
-COPY ci/vcpkg arrow/ci/vcpkg
+COPY ci/vcpkg/*.patch \
+     ci/vcpkg/*linux*.cmake \
+     arrow/ci/vcpkg/
 RUN cd /opt/vcpkg && git apply --ignore-whitespace /arrow/ci/vcpkg/ports.patch
 
 ARG build_type=release
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile
index 0f66a20396e..ebf51d75d29 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -27,14 +27,19 @@ RUN choco install --no-progress -r -y cmake --installargs 'ADD_CMAKE_TO_PATH=Sys
 RUN setx path "%path%;C:\Program Files\Git\usr\bin"
 
 # Install vcpkg
+#
+# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has
+# started to ship precompiled binaries for the vcpkg-tool.
 ARG vcpkg
 RUN git clone https://github.com/Microsoft/vcpkg && \
-    git -C vcpkg checkout %vcpkg% && \
-    vcpkg\bootstrap-vcpkg.bat -disableMetrics -win64 && \
-    setx PATH "%PATH%;C:\vcpkg"
+    vcpkg\bootstrap-vcpkg.bat -disableMetrics && \
+    setx PATH "%PATH%;C:\vcpkg" && \
+    git -C vcpkg checkout %vcpkg%
 
 # Patch ports files as needed
-COPY ci/vcpkg arrow/ci/vcpkg
+COPY ci/vcpkg/*.patch \
+     ci/vcpkg/*windows*.cmake \
+     arrow/ci/vcpkg/
 RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch
 
 # Configure vcpkg and install dependencies
@@ -42,12 +47,12 @@ RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch
 # statements but bash notation in ENV statements
 # VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system
 # cmake's and ninja's versions are recent enough
-COPY ci/vcpkg arrow/ci/vcpkg
 ARG build_type=release
 ENV CMAKE_BUILD_TYPE=${build_type} \
     VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \
     VCPKG_DEFAULT_TRIPLET=x64-windows-static-md-${build_type} \
     VCPKG_FEATURE_FLAGS=-manifests
+
 RUN vcpkg install --clean-after-build \
         abseil \
         aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
new file mode 100755
index 00000000000..bede67216a2
--- /dev/null
+++ b/ci/scripts/install_python.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu
+
+declare -A platforms
+platforms=([windows]=Windows
+           [macos]=MacOSX
+           [linux]=Linux)
+
+declare -A versions
+versions=([3.6]=3.6.8
+          [3.7]=3.7.9
+          [3.8]=3.8.9
+          [3.9]=3.9.6)
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <platform> <version>"
+  exit 1
+elif [[ -z ${platforms[$1]} ]]; then
+  echo "Unexpected platform: ${1}"
+  exit 1
+fi
+
+platform=${platforms[$1]}
+version=$2
+full_version=${versions[$2]}
+
+if [ $platform = "MacOSX" ]; then
+    echo "Downloading Python installer..."
+    if [ "$(uname -m)" = "arm64" ]; then
+        fname="python-${full_version}-macos11.pkg"
+    else
+        fname="python-${full_version}-macosx10.9.pkg"
+    fi
+    wget "https://www.python.org/ftp/python/${full_version}/${fname}"
+
+    echo "Installing Python..."
+    installer -pkg $fname -target /
+    rm $fname
+
+    echo "Installing Pip..."
+    python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}"
+    pip="${python} -m pip"
+
+    $python -m ensurepip
+    $pip install -U pip setuptools virtualenv
+else
+    echo "Unsupported platform: $platform"
+fi
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 93e4939af23..f9fe8f98a3e 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -19,8 +19,9 @@
 
 set -ex
 
-source_dir=${1}
-build_dir=${2}
+arch=${1}
+source_dir=${2}
+build_dir=${3}
 
 echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
 # Clear output directories and leftovers
@@ -31,11 +32,32 @@ rm -rf ${source_dir}/python/repaired_wheels
 rm -rf ${source_dir}/python/pyarrow/*.so
 rm -rf ${source_dir}/python/pyarrow/*.so.*
 
-echo "=== (${PYTHON_VERSION}) Set OSX SDK and C flags ==="
-# Arrow is 64-bit-only at the moment
-export CFLAGS="-fPIC -arch x86_64 ${CFLAGS//-arch i386/}"
-export CXXFLAGS="-fPIC -arch x86_64 ${CXXFLAGS//-arch i386} -std=c++11"
-export SDKROOT="$(xcrun --show-sdk-path)"
+echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
+export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.9}
+export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
+
+if [ $arch = "arm64" ]; then
+  export CMAKE_OSX_ARCHITECTURES="arm64"
+elif [ $arch = "x86_64" ]; then
+  export CMAKE_OSX_ARCHITECTURES="x86_64"
+elif [ $arch = "universal2" ]; then
+  export CMAKE_OSX_ARCHITECTURES="x86_64;arm64"
+else
+  echo "Unexpected architecture: $arch"
+  exit 1
+fi
+
+echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
+export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
+export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
+
+pip install \
+  --only-binary=:all: \
+  --target $PIP_SITE_PACKAGES \
+  --platform $PIP_TARGET_PLATFORM \
+  -r ${source_dir}/python/requirements-wheel-build.txt
+pip install "delocate>=0.9"
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_DATASET:=ON}
@@ -48,6 +70,7 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_PARQUET:=ON}
 : ${ARROW_PLASMA:=ON}
 : ${ARROW_S3:=ON}
+: ${ARROW_SIMD_LEVEL:="SSE4_2"}
 : ${ARROW_TENSORFLOW:=ON}
 : ${ARROW_WITH_BROTLI:=ON}
 : ${ARROW_WITH_BZ2:=ON}
@@ -57,19 +80,23 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_WITH_ZSTD:=ON}
 : ${CMAKE_BUILD_TYPE:=release}
 : ${CMAKE_GENERATOR:=Ninja}
+: ${CMAKE_UNITY_BUILD:=ON}
 : ${VCPKG_FEATURE_FLAGS:=-manifests}
 : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}}
 
 mkdir -p ${build_dir}/build
 pushd ${build_dir}/build
+
 cmake \
     -DARROW_BUILD_SHARED=ON \
+    -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \
+    -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
     -DARROW_DATASET=${ARROW_DATASET} \
     -DARROW_DEPENDENCY_SOURCE="VCPKG" \
     -DARROW_DEPENDENCY_USE_SHARED=OFF \
-    -DARROW_FLIGHT==${ARROW_FLIGHT} \
+    -DARROW_FLIGHT=${ARROW_FLIGHT} \
     -DARROW_GANDIVA=${ARROW_GANDIVA} \
     -DARROW_HDFS=${ARROW_HDFS} \
     -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
@@ -81,6 +108,7 @@ cmake \
     -DARROW_PYTHON=ON \
     -DARROW_RPATH_ORIGIN=ON \
     -DARROW_S3=${ARROW_S3} \
+    -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \
     -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \
     -DARROW_USE_CCACHE=ON \
     -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \
@@ -92,7 +120,7 @@ cmake \
     -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DCMAKE_INSTALL_PREFIX=${build_dir}/install \
-    -DCMAKE_UNITY_BUILD=ON \
+    -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
     -DOPENSSL_USE_STATIC_LIBS=ON \
     -DVCPKG_MANIFEST_MODE=OFF \
     -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
@@ -101,9 +129,6 @@ cmake \
 cmake --build . --target install
 popd
 
-# Check that we don't expose any unwanted symbols
-# check_arrow_visibility
-
 echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
@@ -117,8 +142,11 @@ export PYARROW_WITH_ORC=${ARROW_ORC}
 export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
 export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
 export PYARROW_WITH_S3=${ARROW_S3}
+export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
 # PyArrow build configuration
 export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig
+# Set PyArrow version explicitly
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
 
 pushd ${source_dir}/python
 python setup.py bdist_wheel
@@ -127,7 +155,7 @@ popd
 echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ==="
 deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl)
 
-if echo $deps | grep -v "^@rpath/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
+if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
   echo "There are non-bundled shared library dependencies."
   exit 1
 fi
diff --git a/ci/scripts/python_wheel_macos_test.sh b/ci/scripts/python_wheel_macos_test.sh
index 6ac8576d484..5dabf6e8c41 100755
--- a/ci/scripts/python_wheel_macos_test.sh
+++ b/ci/scripts/python_wheel_macos_test.sh
@@ -22,9 +22,11 @@ set -ex
 source_dir=${1}
 
 : ${ARROW_S3:=ON}
+: ${ARROW_FLIGHT:=ON}
 
 export PYARROW_TEST_CYTHON=OFF
 export PYARROW_TEST_DATASET=ON
+export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
 export PYARROW_TEST_GANDIVA=OFF
 export PYARROW_TEST_HDFS=ON
 export PYARROW_TEST_ORC=ON
@@ -33,7 +35,6 @@ export PYARROW_TEST_PARQUET=ON
 export PYARROW_TEST_PLASMA=ON
 export PYARROW_TEST_S3=${ARROW_S3}
 export PYARROW_TEST_TENSORFLOW=ON
-export PYARROW_TEST_FLIGHT=ON
 
 export ARROW_TEST_DATA=${source_dir}/testing/data
 export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
@@ -47,7 +48,6 @@ import pyarrow
 import pyarrow._hdfs
 import pyarrow.csv
 import pyarrow.dataset
-import pyarrow.flight
 import pyarrow.fs
 import pyarrow.json
 import pyarrow.orc
@@ -58,6 +58,9 @@ import pyarrow.plasma
 if [ "${PYARROW_TEST_S3}" == "ON" ]; then
   python -c "import pyarrow._s3fs"
 fi
+if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
+  python -c "import pyarrow.flight"
+fi
 
 # Install testing dependencies
 pip install -r ${source_dir}/python/requirements-wheel-test.txt
diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
new file mode 100644
index 00000000000..6ed92b25b55
--- /dev/null
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES arm64)
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
new file mode 100644
index 00000000000..0aa78121602
--- /dev/null
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES arm64)
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
new file mode 100644
index 00000000000..7406ef3fe16
--- /dev/null
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
new file mode 100644
index 00000000000..0388ce78d0e
--- /dev/null
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index f12f071642b..aa8e5becab0 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -444,29 +444,31 @@ if(ARROW_CPU_FLAG STREQUAL "ppc")
 endif()
 
 if(ARROW_CPU_FLAG STREQUAL "armv8")
-  if(NOT CXX_SUPPORTS_ARMV8_ARCH)
-    message(FATAL_ERROR "Unsupported arch flag: ${ARROW_ARMV8_ARCH_FLAG}.")
-  endif()
-  if(ARROW_ARMV8_ARCH_FLAG MATCHES "native")
-    message(FATAL_ERROR "native arch not allowed, please specify arch explicitly.")
-  endif()
-  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${ARROW_ARMV8_ARCH_FLAG}")
-
   if(NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
     set(ARROW_HAVE_NEON ON)
-    add_definitions(-DARROW_HAVE_NEON)
-  endif()
 
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS
-                                              "5.4")
-    message(WARNING "Disable Armv8 CRC and Crypto as compiler doesn't support them well.")
-  else()
-    if(ARROW_ARMV8_ARCH_FLAG MATCHES "\\+crypto")
-      add_definitions(-DARROW_HAVE_ARMV8_CRYPTO)
+    if(NOT CXX_SUPPORTS_ARMV8_ARCH)
+      message(FATAL_ERROR "Unsupported arch flag: ${ARROW_ARMV8_ARCH_FLAG}.")
+    endif()
+    if(ARROW_ARMV8_ARCH_FLAG MATCHES "native")
+      message(FATAL_ERROR "native arch not allowed, please specify arch explicitly.")
     endif()
-    # armv8.1+ implies crc support
-    if(ARROW_ARMV8_ARCH_FLAG MATCHES "armv8\\.[1-9]|\\+crc")
-      add_definitions(-DARROW_HAVE_ARMV8_CRC)
+    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${ARROW_ARMV8_ARCH_FLAG}")
+
+    add_definitions(-DARROW_HAVE_NEON)
+
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS
+                                                "5.4")
+      message(WARNING "Disable Armv8 CRC and Crypto as compiler doesn't support them well."
+      )
+    else()
+      if(ARROW_ARMV8_ARCH_FLAG MATCHES "\\+crypto")
+        add_definitions(-DARROW_HAVE_ARMV8_CRYPTO)
+      endif()
+      # armv8.1+ implies crc support
+      if(ARROW_ARMV8_ARCH_FLAG MATCHES "armv8\\.[1-9]|\\+crc")
+        add_definitions(-DARROW_HAVE_ARMV8_CRC)
+      endif()
     endif()
   endif()
 endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 39ccbbe72b0..3d3df156e04 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1562,9 +1562,9 @@ if(ARROW_MIMALLOC)
   endif()
 
   set(MIMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/mimalloc_ep/src/mimalloc_ep")
-  set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/include")
+  set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/include/mimalloc-1.7")
   set(MIMALLOC_STATIC_LIB
-      "${MIMALLOC_PREFIX}/lib/mimalloc-1.6/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+      "${MIMALLOC_PREFIX}/lib/mimalloc-1.7/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
   )
 
   set(MIMALLOC_CMAKE_ARGS
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index a2f7f2c7213..e0cbb2a6a12 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -42,7 +42,7 @@ ARROW_JEMALLOC_BUILD_VERSION=5.2.1
 ARROW_LZ4_BUILD_VERSION=v1.9.3
 # mimalloc 1.6.7 didn't build on Visual Studio 2015
 # https://github.com/microsoft/mimalloc/issues/353
-ARROW_MIMALLOC_BUILD_VERSION=v1.6.4
+ARROW_MIMALLOC_BUILD_VERSION=v1.7.2
 ARROW_ORC_BUILD_VERSION=1.6.6
 ARROW_PROTOBUF_BUILD_VERSION=v3.14.0
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 01571b43c95..c6b4a6473b8 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -116,7 +116,8 @@ def docker_build(obj, image, *, force_pull, using_docker_cli,
         compose.build(image, use_cache=use_cache,
                       use_leaf_cache=use_leaf_cache,
                       using_docker=using_docker_cli,
-                      using_buildx=using_docker_buildx)
+                      using_buildx=using_docker_buildx,
+                      pull_parents=force_pull)
     except UndefinedImage as e:
         raise click.ClickException(
             "There is no service/image defined in docker-compose.yml with "
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
index 2fe6e353ccc..aaf16bdfa6e 100644
--- a/dev/archery/archery/docker/core.py
+++ b/dev/archery/archery/docker/core.py
@@ -245,7 +245,7 @@ def _pull(service):
             _pull(service)
 
     def build(self, service_name, use_cache=True, use_leaf_cache=True,
-              using_docker=False, using_buildx=False):
+              using_docker=False, using_buildx=False, pull_parents=True):
         def _build(service, use_cache):
             if 'build' not in service:
                 # nothing to do
@@ -253,7 +253,7 @@ def _build(service, use_cache):
 
             args = []
             cache_from = list(service.get('build', {}).get('cache_from', []))
-            if use_cache:
+            if pull_parents:
                 for image in cache_from:
                     if image not in self.pull_memory:
                         try:
@@ -262,7 +262,8 @@ def _build(service, use_cache):
                             print(e)
                         finally:
                             self.pull_memory.add(image)
-            else:
+
+            if not use_cache:
                 args.append('--no-cache')
 
             # turn on inline build cache, this is a docker buildx feature
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index eb70551de52..66480737547 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -34,6 +34,8 @@
     'release': [jinja_req, 'jira', 'semver', 'gitpython'],
     'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'ruamel.yaml',
                  'setuptools_scm'],
+    'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',
+                        'setuptools_scm'],
 }
 extras['bot'] = extras['crossbow'] + ['pygithub', 'jira']
 extras['all'] = list(set(functools.reduce(operator.add, extras.values())))
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 5f1056ca6a0..be265caa48a 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -69,7 +69,7 @@ on:
       python-version: 3.8
   - name: Setup Crossbow
     shell: bash
-    run: pip install -e arrow/dev/archery[crossbow]
+    run: pip install -e arrow/dev/archery[crossbow-upload]
   - name: Upload artifacts
     shell: bash
     run: |
@@ -116,7 +116,7 @@ on:
   - task: UsePythonVersion@0
     inputs:
       versionSpec: '3.8'
-  - script: pip install -e arrow/dev/archery[crossbow]
+  - script: pip install -e arrow/dev/archery[crossbow-upload]
     displayName: Install Crossbow
   - bash: |
       archery crossbow \
@@ -170,7 +170,7 @@ on:
 
 {%- macro travis_upload_releases(pattern) -%}
   - sudo -H pip3 install pygit2==1.0
-  - sudo -H pip3 install -e arrow/dev/archery[crossbow]
+  - sudo -H pip3 install -e arrow/dev/archery[crossbow-upload]
   - |
     archery crossbow \
       --queue-path $(pwd) \
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.amd64.yml
similarity index 67%
rename from dev/tasks/python-wheels/github.osx.yml
rename to dev/tasks/python-wheels/github.osx.amd64.yml
index a2e5c0af21b..863bd7fa9c1 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -20,37 +20,40 @@
 
 env:
   ARROW_S3: {{ arrow_s3 }}
-  MACOSX_DEPLOYMENT_TARGET: {{ macos_deployment_target }}
-  MB_PYTHON_VERSION: {{ python_version }}
-  PLAT: x86_64
+  CC: "clang"
+  CXX: "clang++"
+  MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
   PYARROW_BUILD_VERBOSE: 1
-  PYARROW_VERSION: {{ arrow.no_rc_version }}
-  PYTHON_VERSION: {{ python_version }}
-  SETUPTOOLS_SCM_PRETEND_VERSION: {{ arrow.no_rc_version }}
+  PYARROW_VERSION: "{{ arrow.no_rc_version }}"
+  PYTHON_VERSION: "{{ python_version }}"
+  PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
   VCPKG_DEFAULT_TRIPLET: x64-osx-static-release
   VCPKG_FEATURE_FLAGS: "-manifests"
   VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
   VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
+  VCPKG_VERSION: "{{ vcpkg_version }}"
 
 jobs:
   build:
     name: Build wheel for OS X
-    runs-on: macos-latest
+    runs-on: macos-10.15
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
       - name: Install System Dependencies
-        run: brew install bison coreutils ninja cmake
+        run: brew install bash bison coreutils ninja cmake
 
       - uses: actions/cache@v2
         id: vcpkg-cache
         with:
           path: vcpkg
-          key: vcpkg-{{ macos_deployment_target }}-{{ vcpkg_version }}-{{ "${{ hashFiles('arrow/ci/vcpkg/**') }}" }}
+          key: vcpkg-{{ macos_deployment_target }}-{{ vcpkg_version }}-{{ "${{ hashFiles('arrow/ci/vcpkg/*.patch', 'arrow/ci/vcpkg/*osx*.cmake') }}" }}
 
       - name: Install Vcpkg
         if: steps.vcpkg-cache.outputs.cache-hit != 'true'
         shell: bash
+        env:
+          MACOSX_DEPLOYMENT_TARGET: "10.15"
         run: arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
 
       - name: Install Packages
@@ -83,35 +86,24 @@ jobs:
         run: $VCPKG_ROOT/vcpkg install aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer]
       {% endif %}
 
-      - name: Setup Multibuild
-        run: |
-          git clone https://github.com/matthew-brett/multibuild
-          git -C multibuild checkout 03950c9a7feb09d215f82d6563c4ffd91274a1e1
+      - name: Install Python {{ python_version }}
+        shell: bash
+        run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version }}
 
       - name: Build Wheel
-        env:
-          CONFIG_PATH: /dev/null
+        shell: bash
         run: |
-          # configure environment and install python
-          source multibuild/common_utils.sh
-          source multibuild/travis_osx_steps.sh
-          before_install
+          $PYTHON -m virtualenv build-env
+          source build-env/bin/activate
+          pip install --upgrade pip wheel
+          arrow/ci/scripts/python_wheel_macos_build.sh x86_64 $(pwd)/arrow $(pwd)/build
 
-          # install python dependencies
-          pip install -r arrow/python/requirements-wheel-build.txt delocate
-
-          # build the wheel
-          arrow/ci/scripts/python_wheel_macos_build.sh $(pwd)/arrow $(pwd)/build
-
-      - name: Setup Python for Testing
-        uses: actions/setup-python@v2
-        with:
-          python-version: "{{ python_version }}"
-
-      - name: Test the Wheel
+      - name: Test Wheel
+        shell: bash
         run: |
-          # TODO(kszucs): temporarily remove homebrew libs
-          unset MACOSX_DEPLOYMENT_TARGET
+          $PYTHON -m virtualenv test-env
+          source test-env/bin/activate
+          pip install --upgrade pip wheel
           arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
new file mode 100644
index 00000000000..4fa95bbefdc
--- /dev/null
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -0,0 +1,153 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Prerequisits on the host:
+# - brew install bash bison coreutils ninja cmake
+# - sudo arrow/ci/scripts/install_python.sh macos 3.9
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+env:
+  ARROW_FLIGHT: OFF
+  ARROW_JEMALLOC: OFF
+  ARROW_SIMD_LEVEL: NONE
+  CC: "clang"
+  CMAKE_BUILD_TYPE: release
+  CMAKE_CXX_COMPILER_LAUNCHER: "ccache"
+  CXX: "clang++"
+  MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
+  PYARROW_BUILD_VERBOSE: 1
+  PYARROW_VERSION: "{{ arrow.no_rc_version }}"
+  PYTHON_VERSION: "{{ python_version }}"
+  PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
+  VCPKG_DEFAULT_TRIPLET: {{ arch }}-osx-static-release
+  VCPKG_FEATURE_FLAGS: "-manifests"
+  VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
+  VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
+  VCPKG_VERSION: "{{ vcpkg_version }}"
+
+jobs:
+  build:
+    name: Build wheel for OS X
+    runs-on: self-hosted
+    steps:
+      - name: Cleanup
+        shell: bash
+        run: rm -rf arrow vcpkg build crossbow-env build-env test-*-env
+
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Add Brew's Bison to PATH
+        shell: bash
+        run: echo "/opt/homebrew/opt/bison/bin" >> $GITHUB_PATH
+
+      - name: Install Vcpkg
+        shell: bash
+        env:
+          MACOSX_DEPLOYMENT_TARGET: "11.0"
+        run: arch -arm64 arrow/ci/scripts/install_vcpkg.sh $VCPKG_VERSION $VCPKG_ROOT
+
+      - name: Install OpenSSL
+        shell: bash
+        run: arch -arm64 $VCPKG_ROOT/vcpkg install openssl
+
+      {% if arch == "universal2" %}
+      # OpenSSL doesn't provide an universal2 configuration yet, so vcpkg is
+      # unable to propagate the list of architectures from VCPKG_OSX_ARCHIETCTURES.
+      # In order to prevent link time warnings (which may turn out to be errors)
+      # we compile OpenSSL separately for the two architectures and merge the
+      # binaries into universal2 ones using `lipo`.
+      - name: Create universal binaries for OpenSSL
+        shell: bash
+        run: |
+          for arch in arm64 x64; do
+            VCPKG_DEFAULT_TRIPLET=${arch}-osx-static-release arch -arm64 $VCPKG_ROOT/vcpkg install openssl
+          done
+          for lib in libcrypto libssl; do
+            lipo -create $VCPKG_ROOT/installed/arm64-osx-static-release/lib/${lib}.a \
+                         $VCPKG_ROOT/installed/x64-osx-static-release/lib/${lib}.a \
+                 -output $VCPKG_ROOT/installed/universal2-osx-static-release/lib/${lib}.a
+          done
+      {% endif %}
+
+      - name: Install Packages
+        run: |
+          arch -arm64 $VCPKG_ROOT/vcpkg install \
+            aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
+            boost-filesystem \
+            brotli \
+            bzip2 \
+            c-ares \
+            curl \
+            flatbuffers \
+            gflags \
+            glog \
+            lz4 \
+            orc \
+            protobuf \
+            rapidjson \
+            re2 \
+            snappy \
+            thrift \
+            utf8proc \
+            zlib \
+            zstd
+
+      - name: Build Wheel
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv build-env
+          source build-env/bin/activate
+          pip install --upgrade pip wheel
+          arch -arm64 arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build
+
+      - name: Test Wheel on ARM64
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv test-arm64-env
+          source test-arm64-env/bin/activate
+          pip install --upgrade pip wheel
+          arch -arm64 arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+
+      {% if arch == "universal2" %}
+      - name: Test Wheel on AMD64
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv test-amd64-env
+          source test-amd64-env/bin/activate
+          pip install --upgrade pip wheel
+          arch -x86_64 arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+      {% endif %}
+
+      - name: Upload artifacts
+        shell: bash
+        run: |
+          $PYTHON -m virtualenv crossbow-env
+          source crossbow-env/bin/activate
+          arch -arm64 pip install -e arrow/dev/archery[crossbow-upload]
+          arch -arm64 archery crossbow \
+          --queue-path $(pwd) \
+          --queue-remote {{ queue_remote_url }} \
+          upload-artifacts \
+          --sha {{ task.branch }} \
+          --tag {{ task.tag }} \
+          "arrow/python/dist/*.whl"
+        env:
+          CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
+
+      {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 7aea18fdd83..ea0571f3989 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -355,9 +355,9 @@ tasks:
                                                    ("10.13", "high-sierra", "ON")] %}
   {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
-  wheel-osx-{{ macos_codename }}-{{ python_tag }}:
+  wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
     ci: github
-    template: python-wheels/github.osx.yml
+    template: python-wheels/github.osx.amd64.yml
     params:
       vcpkg_version: "2021.04.30"
       python_version: {{ python_version }}
@@ -370,7 +370,7 @@ tasks:
 
 {############################## Wheel Windows ################################}
 
-  wheel-windows-{{ python_tag }}:
+  wheel-windows-{{ python_tag }}-amd64:
     ci: github
     template: python-wheels/github.windows.yml
     params:
@@ -380,6 +380,30 @@ tasks:
 
 {% endfor %}
 
+{############################## Wheel OSX M1 #################################}
+
+  wheel-macos-big-sur-cp39-arm64:
+    ci: github
+    template: python-wheels/github.osx.arm64.yml
+    params:
+      arch: arm64
+      vcpkg_version: "2021.04.30"
+      python_version: "3.9"
+      macos_deployment_target: "11.0"
+    artifacts:
+      - pyarrow-{no_rc_version}-cp39-cp39-macosx_11_0_arm64.whl
+
+  wheel-macos-big-sur-cp39-universal2:
+    ci: github
+    template: python-wheels/github.osx.arm64.yml
+    params:
+      arch: universal2
+      vcpkg_version: "2021.04.30"
+      python_version: "3.9"
+      macos_deployment_target: "11.0"
+    artifacts:
+      - pyarrow-{no_rc_version}-cp39-cp39-macosx_11_0_universal2.whl
+
 {############################ Python sdist ####################################}
 
   python-sdist:
diff --git a/docker-compose.yml b/docker-compose.yml
index 93bba30772b..84ce4dac1ba 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -810,6 +810,16 @@ services:
     command: /arrow/ci/scripts/python_wheel_manylinux_test.sh unittests
 
   python-wheel-windows-vs2017:
+    # The windows images must be built locally and pushed to a remote registry:
+    # export REPO=ghcr.io/ursacomputing/arrow
+    # PYTHON=3.6 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.7 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.8 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.9 archery docker build --no-pull --using-docker-cli python-wheel-windows-vs2017
+    # PYTHON=3.6 archery docker push python-wheel-windows-vs2017
+    # PYTHON=3.7 archery docker push python-wheel-windows-vs2017
+    # PYTHON=3.8 archery docker push python-wheel-windows-vs2017
+    # PYTHON=3.9 archery docker push python-wheel-windows-vs2017
     image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}
     build:
       args:
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index 74a352e8a5b..b2878d2971c 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,6 +1,11 @@
 cython>=0.29.11
-numpy==1.16.6; python_version < "3.9" and platform_machine != "aarch64"
-numpy==1.19.4; python_version >= "3.9" or platform_machine == "aarch64"
-pandas<1.1.0; python_version < "3.8"
 setuptools_scm
 wheel
+numpy==1.19.4; platform_system == "Linux"   and platform_machine == "aarch64"
+numpy==1.16.6; platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.9"
+numpy==1.19.4; platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.9"
+numpy==1.21.0; platform_system == "Darwin"  and platform_machine == "arm64"
+numpy==1.16.6; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.8"
+numpy==1.19.4; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.8"
+numpy==1.16.6; platform_system == "Windows"                                   and python_version <  "3.9"
+numpy==1.19.4; platform_system == "Windows"                                   and python_version >= "3.9"
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index 7377f6b6d8f..723dbdff76c 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -1,11 +1,26 @@
 cffi
 cython
 hypothesis
-numpy==1.19.4; python_version != "3.9" or platform_machine != "aarch64"
-numpy==1.20.1; python_version == "3.9" and platform_machine == "aarch64"
-pandas<1.1.0; python_version < "3.8" and platform_machine != "aarch64"
-pandas; python_version >= "3.8" or platform_machine == "aarch64"
-pickle5; (python_version == "3.6" or python_version == "3.7") and sys_platform != "win32"
+pickle5; platform_system != "Windows" and python_version < "3.8"
 pytest
 pytest-lazy-fixture
 pytz
+
+numpy==1.19.5; platform_system == "Linux"   and platform_machine == "aarch64" and python_version <  "3.7"
+numpy==1.20.3; platform_system == "Linux"   and platform_machine == "aarch64" and python_version >= "3.7"
+numpy==1.19.5; platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.9"
+numpy==1.20.3; platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.9"
+numpy==1.21.0; platform_system == "Darwin"  and platform_machine == "arm64"
+numpy==1.19.5; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.9"
+numpy==1.20.3; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.9"
+numpy==1.19.5; platform_system == "Windows"                                   and python_version <  "3.9"
+numpy==1.20.3; platform_system == "Windows"                                   and python_version >= "3.9"
+
+pandas<1.1.0;  platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.8"
+pandas;        platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.8"
+pandas;        platform_system == "Linux"   and platform_machine == "aarch64"
+pandas<1.1.0;  platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.8"
+pandas;        platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.8"
+pandas;        platform_system == "Darwin"  and platform_machine == "arm64"
+pandas<1.1.0;  platform_system == "Windows"                                   and python_version <  "3.8"
+pandas;        platform_system == "Windows"                                   and python_version >= "3.8"

From 2215b5ecbe8cc0590b8aeef02019edb98f8ecbed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 20 Jul 2021 10:24:51 +0900
Subject: [PATCH 604/719] ARROW-13384: [C++] Specify minimum required zstd
 version in cmake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10747 from nealrichardson/cmake-zstd-version

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/debian-10-cpp.dockerfile                    |  6 +++---
 ci/docker/ubuntu-18.04-cpp.dockerfile                 |  8 ++++----
 cpp/cmake_modules/Findzstd.cmake                      |  2 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake           |  7 ++++++-
 cpp/thirdparty/versions.txt                           |  2 +-
 dev/tasks/linux-packages/apache-arrow/Rakefile        | 11 +++++++++++
 .../apache-arrow/apt/debian-buster/Dockerfile         |  1 -
 .../apache-arrow/apt/ubuntu-bionic/Dockerfile         |  1 -
 .../linux-packages/apache-arrow/debian/control.in     |  4 ++--
 .../apache-arrow/debian/libarrow-dev.install          |  3 +--
 .../apache-arrow/yum/amazon-linux-2/Dockerfile        |  1 -
 .../linux-packages/apache-arrow/yum/arrow.spec.in     |  9 +++++++++
 12 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile
index 83f8ce529cb..d99a2c161bd 100644
--- a/ci/docker/debian-10-cpp.dockerfile
+++ b/ci/docker/debian-10-cpp.dockerfile
@@ -60,7 +60,6 @@ RUN apt-get update -y -q && \
         libssl-dev \
         libthrift-dev \
         libutf8proc-dev \
-        libzstd-dev \
         llvm-${llvm}-dev \
         make \
         ninja-build \
@@ -77,8 +76,8 @@ COPY ci/scripts/install_minio.sh \
 RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
 
 ENV ARROW_BUILD_TESTS=ON \
-    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_DATASET=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_FLIGHT=ON \
     ARROW_GANDIVA=ON \
     ARROW_HOME=/usr/local \
@@ -101,4 +100,5 @@ ENV ARROW_BUILD_TESTS=ON \
     GTest_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
     PATH=/usr/lib/ccache/:$PATH \
-    Protobuf_SOURCE=BUNDLED
+    Protobuf_SOURCE=BUNDLED \
+    zstd_SOURCE=BUNDLED
diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile
index 065f8faf278..0c05ac4ee6b 100644
--- a/ci/docker/ubuntu-18.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-18.04-cpp.dockerfile
@@ -79,7 +79,6 @@ RUN apt-get update -y -q && \
         libre2-dev \
         libsnappy-dev \
         libssl-dev \
-        libzstd-dev \
         ninja-build \
         pkg-config \
         protobuf-compiler \
@@ -99,8 +98,8 @@ RUN apt-get update -y -q && \
 # - utf8proc is too old(v2.1.0)
 # - s3 tests would require boost-asio that is included since Boost 1.66.0
 ENV ARROW_BUILD_TESTS=ON \
-    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_DATASET=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_FLIGHT=OFF \
     ARROW_GANDIVA=ON \
     ARROW_HDFS=ON \
@@ -123,8 +122,9 @@ ENV ARROW_BUILD_TESTS=ON \
     AWSSDK_SOURCE=BUNDLED \
     GTest_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
-    PARQUET_BUILD_EXECUTABLES=ON \
     PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
     PATH=/usr/lib/ccache/:$PATH \
     Thrift_SOURCE=BUNDLED \
-    utf8proc_SOURCE=BUNDLED
\ No newline at end of file
+    utf8proc_SOURCE=BUNDLED \
+    zstd_SOURCE=BUNDLED
diff --git a/cpp/cmake_modules/Findzstd.cmake b/cpp/cmake_modules/Findzstd.cmake
index 73b7ab250fb..3fc14ec0d72 100644
--- a/cpp/cmake_modules/Findzstd.cmake
+++ b/cpp/cmake_modules/Findzstd.cmake
@@ -41,7 +41,7 @@ else()
       "${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${ZSTD_STATIC_LIB_SUFFIX}")
 endif()
 
-# First, find via if specified ZTD_ROOT
+# First, find via if specified ZSTD_ROOT
 if(ZSTD_ROOT)
   message(STATUS "Using ZSTD_ROOT: ${ZSTD_ROOT}")
   find_library(ZSTD_LIB
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 3d3df156e04..a79d4fd7bb3 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2088,7 +2088,12 @@ macro(build_zstd)
 endmacro()
 
 if(ARROW_WITH_ZSTD)
-  resolve_dependency(zstd PC_PACKAGE_NAMES libzstd)
+  # ARROW-13384: ZSTD_minCLevel was added in v1.4.0, required by ARROW-13091
+  resolve_dependency(zstd
+                     PC_PACKAGE_NAMES
+                     libzstd
+                     REQUIRED_VERSION
+                     1.4.0)
 
   if(TARGET zstd::libzstd)
     set(ARROW_ZSTD_LIBZSTD zstd::libzstd)
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index e0cbb2a6a12..5b7528b9b64 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -56,7 +56,7 @@ ARROW_THRIFT_BUILD_MD5_CHECKSUM=38a27d391a2b03214b444cb13d5664f1
 ARROW_UTF8PROC_BUILD_VERSION=v2.6.1
 ARROW_XSIMD_BUILD_VERSION=e9234cd6e6f4428fc260073b2c34ffe86fda1f34
 ARROW_ZLIB_BUILD_VERSION=1.2.11
-ARROW_ZSTD_BUILD_VERSION=v1.4.8
+ARROW_ZSTD_BUILD_VERSION=v1.5.0
 
 # The first field is the name of the environment variable expected by cmake.
 # This _must_ match what is defined. The second field is the name of the
diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile
index e8de53fce4b..f401a06d64c 100644
--- a/dev/tasks/linux-packages/apache-arrow/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -126,12 +126,23 @@ class ApacheArrowPackageTask < PackageTask
     control.gsub(/@USE_SYSTEM_UTF8PROC@/, use_system_utf8proc)
   end
 
+  def apt_prepare_debian_control_zstd(control, target)
+    case target
+    when /\Adebian-buster/, /\Aubuntu-bionic/
+      use_system_zstd = "#"
+    else
+      use_system_zstd = ""
+    end
+    control.gsub(/@USE_SYSTEM_ZSTD@/, use_system_zstd)
+  end
+
   def apt_prepare_debian_control(control_in, target)
     control = control_in.dup
     control = apt_prepare_debian_control_cuda_architecture(control, target)
     control = apt_prepare_debian_control_grpc(control, target)
     control = apt_prepare_debian_control_c_ares(control, target)
     control = apt_prepare_debian_control_utf8proc(control, target)
+    control = apt_prepare_debian_control_zstd(control, target)
     control
   end
 end
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
index 0c681ba7c2a..11a33a1300a 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile
@@ -61,7 +61,6 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
-    libzstd-dev \
     lsb-release \
     ninja-build \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
index b392079fbe4..ac0c6a58d62 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile
@@ -52,7 +52,6 @@ RUN \
     libre2-dev \
     libsnappy-dev \
     libssl-dev \
-    libzstd-dev \
     llvm-10-dev \
     lsb-release \
     ninja-build \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index b20955f467e..2d56463139f 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -25,7 +25,7 @@ Build-Depends:
   libsnappy-dev,
   libssl-dev,
 @USE_SYSTEM_UTF8PROC@  libutf8proc-dev,
-  libzstd-dev,
+@USE_SYSTEM_ZSTD@  libzstd-dev,
   ninja-build,
   nvidia-cuda-toolkit [!arm64],
   pkg-config,
@@ -135,7 +135,7 @@ Depends:
   libsnappy-dev,
   libssl-dev,
 @USE_SYSTEM_UTF8PROC@  libutf8proc-dev,
-  libzstd-dev,
+@USE_SYSTEM_ZSTD@  libzstd-dev,
 @USE_SYSTEM_GRPC@  protobuf-compiler-grpc,
   zlib1g-dev
 Description: Apache Arrow is a data processing library for analysis
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
index 83ddad126f4..ccd0c4e5b06 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install
@@ -6,8 +6,7 @@ usr/lib/*/cmake/arrow/Find*Alt.cmake
 usr/lib/*/cmake/arrow/FindArrow.cmake
 usr/lib/*/cmake/arrow/FindBrotli.cmake
 usr/lib/*/cmake/arrow/FindLz4.cmake
-usr/lib/*/cmake/arrow/FindSnappy.cmake
-usr/lib/*/cmake/arrow/Find[uz]*.cmake
+usr/lib/*/cmake/arrow/Find[Suz]*.cmake
 usr/lib/*/cmake/arrow/arrow-config.cmake
 usr/lib/*/libarrow.a
 usr/lib/*/libarrow.so
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
index 40cf3861feb..6ada8972908 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile
@@ -38,7 +38,6 @@ RUN \
     glog-devel \
     gobject-introspection-devel \
     gtk-doc \
-    libzstd-devel \
     lz4-devel \
     make \
     openssl-devel \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index f2c75e03f64..31b057f99d6 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -56,6 +56,7 @@
 # EPEL ships utf8proc but it's old.
 # %define have_utf8proc (%{rhel} == 7)
 %define have_utf8proc 0
+%define have_zstd (!%{is_amazon_linux})
 
 Name:		@PACKAGE@
 Version:	@VERSION@
@@ -86,7 +87,9 @@ BuildRequires:	gflags-devel
 %endif
 BuildRequires:	git
 BuildRequires:	glog-devel
+%if %{have_zstd}
 BuildRequires:	libzstd-devel
+%endif
 BuildRequires:	lz4-devel %{lz4_requirement}
 BuildRequires:	ninja-build
 BuildRequires:	openssl-devel
@@ -202,7 +205,9 @@ Requires:	brotli
 Requires:	gflags
 %endif
 Requires:	glog
+%if %{have_zstd}
 Requires:	libzstd
+%endif
 Requires:	lz4 %{lz4_requirement}
 %if %{have_re2}
 Requires:	re2
@@ -231,7 +236,9 @@ Requires:	bzip2-devel
 %if %{use_flight}
 Requires:	c-ares-devel
 %endif
+%if %{have_zstd}
 Requires:	libzstd-devel
+%endif
 Requires:	lz4-devel %{lz4_requirement}
 Requires:	openssl-devel
 %if %{have_rapidjson}
@@ -274,7 +281,9 @@ Libraries and header files for Apache Arrow C++.
 %if %{have_utf8proc}
 %{_libdir}/cmake/arrow/Findutf8proc.cmake
 %endif
+%if %{have_zstd}
 %{_libdir}/cmake/arrow/Findzstd.cmake
+%endif
 %{_libdir}/cmake/arrow/arrow-config.cmake
 %{_libdir}/libarrow.a
 %{_libdir}/libarrow.so

From 852933e71fa643bef7aef5f63776dd9831767bc7 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 20 Jul 2021 12:42:27 +0200
Subject: [PATCH 605/719] ARROW-13382: [C++] Avoid multiple definitions of same
 symbol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10750 from lidavidm/arrow-13382

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 .../compute/kernels/aggregate_basic_internal.h    |  3 ++-
 .../arrow/compute/kernels/aggregate_internal.h    | 15 +++++++++++----
 .../arrow/compute/kernels/aggregate_var_std.cc    |  5 +++--
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index bb1d53c02ac..5163d3fd03d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -67,7 +67,8 @@ struct SumImpl : public ScalarAggregator {
             static_cast<typename SumType::c_type>(BooleanArray(data).true_count());
       } else {
         this->sum +=
-            arrow::compute::detail::SumArray<CType, typename SumType::c_type>(*data);
+            arrow::compute::detail::SumArray<CType, typename SumType::c_type, SimdLevel>(
+                *data);
       }
     } else {
       const auto& data = *batch[0].scalar();
diff --git a/cpp/src/arrow/compute/kernels/aggregate_internal.h b/cpp/src/arrow/compute/kernels/aggregate_internal.h
index d74881108ae..ed29f26f2c3 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -63,9 +63,15 @@ namespace detail {
 
 using arrow::internal::VisitSetBitRunsVoid;
 
+// SumArray must be parameterized with the SIMD level since it's called both from
+// translation units with and without vectorization. Normally it gets inlined but
+// if not, without the parameter, we'll have multiple definitions of the same
+// symbol and we'll get unexpected results.
+
 // non-recursive pairwise summation for floating points
 // https://en.wikipedia.org/wiki/Pairwise_summation
-template <typename ValueType, typename SumType, typename ValueFunc>
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel,
+          typename ValueFunc>
 enable_if_t<std::is_floating_point<SumType>::value, SumType> SumArray(
     const ArrayData& data, ValueFunc&& func) {
   const int64_t data_size = data.length - data.GetNullCount();
@@ -139,7 +145,8 @@ enable_if_t<std::is_floating_point<SumType>::value, SumType> SumArray(
 }
 
 // naive summation for integers
-template <typename ValueType, typename SumType, typename ValueFunc>
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel,
+          typename ValueFunc>
 enable_if_t<!std::is_floating_point<SumType>::value, SumType> SumArray(
     const ArrayData& data, ValueFunc&& func) {
   SumType sum = 0;
@@ -153,9 +160,9 @@ enable_if_t<!std::is_floating_point<SumType>::value, SumType> SumArray(
   return sum;
 }
 
-template <typename ValueType, typename SumType>
+template <typename ValueType, typename SumType, SimdLevel::type SimdLevel>
 SumType SumArray(const ArrayData& data) {
-  return SumArray<ValueType, SumType>(
+  return SumArray<ValueType, SumType, SimdLevel>(
       data, [](ValueType v) { return static_cast<SumType>(v); });
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index 74d7b390c4f..d6965fed4a3 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -50,10 +50,11 @@ struct VarStdState {
 
     using SumType =
         typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
-    SumType sum = arrow::compute::detail::SumArray<CType, SumType>(*array.data());
+    SumType sum =
+        arrow::compute::detail::SumArray<CType, SumType, SimdLevel::NONE>(*array.data());
 
     const double mean = static_cast<double>(sum) / count;
-    const double m2 = arrow::compute::detail::SumArray<CType, double>(
+    const double m2 = arrow::compute::detail::SumArray<CType, double, SimdLevel::NONE>(
         *array.data(), [mean](CType value) {
           const double v = static_cast<double>(value);
           return (v - mean) * (v - mean);

From ec52951371336356b2fcfe9d9fa1b6edf4776837 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 20 Jul 2021 15:38:10 +0200
Subject: [PATCH 606/719] ARROW-13224: [Python][Doc] Documentation missing for
 pyarrow.dataset.write_dataset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've added various examples of writing datasets and cleaned up a few warnings

Closes #10693 from westonpace/feature/ARROW-13224--python-doc-documentation-missing-for-pyarrow-da

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/dataset/file_base.h             |   1 +
 docs/source/conf.py                           |   5 +
 docs/source/developers/crossbow.rst           |  18 +-
 docs/source/developers/experimental_repos.rst |  12 +-
 docs/source/java/algorithm.rst                |  11 +-
 docs/source/python/api/dataset.rst            |   1 +
 docs/source/python/api/ipc.rst                |   1 +
 docs/source/python/dataset.rst                | 178 ++++++++++++++++--
 python/pyarrow/_hdfs.pyx                      |   8 +-
 python/pyarrow/array.pxi                      |   1 -
 10 files changed, 193 insertions(+), 43 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_base.h b/cpp/src/arrow/dataset/file_base.h
index 438fccd9b07..49b700a7ef9 100644
--- a/cpp/src/arrow/dataset/file_base.h
+++ b/cpp/src/arrow/dataset/file_base.h
@@ -39,6 +39,7 @@ namespace arrow {
 
 namespace dataset {
 
+/// \defgroup dataset-file-formats File formats for reading and writing datasets
 /// \defgroup dataset-filesystem File system datasets
 ///
 /// @{
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5e2de5207af..2f813c07268 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -34,6 +34,7 @@
 import datetime
 import os
 import sys
+import warnings
 from unittest import mock
 
 import pyarrow
@@ -45,6 +46,10 @@
 
 ])
 
+# Suppresses all warnings printed when sphinx is traversing the code (e.g.
+# deprecation warnings)
+warnings.filterwarnings("ignore", category=FutureWarning, message=".*pyarrow.*")
+
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
diff --git a/docs/source/developers/crossbow.rst b/docs/source/developers/crossbow.rst
index 7d5a3caa683..05830323adc 100644
--- a/docs/source/developers/crossbow.rst
+++ b/docs/source/developers/crossbow.rst
@@ -62,18 +62,18 @@ configuration file to run the requested build (like ``.travis.yml``,
 Scheduler
 ~~~~~~~~~
 
-`Crossbow.py`_ handles version generation, task rendering and
+Crossbow handles version generation, task rendering and
 submission. The tasks are defined in ``tasks.yml``.
 
 Install
 -------
 
-   The following guide depends on GitHub, but theoretically any git
-   server can be used.
+The following guide depends on GitHub, but theoretically any git
+server can be used.
 
 1. `Create the queue repository`_
 
-2. Enable `TravisCI`_, `Appveyor`_, `Azure Pipelines_` and `CircleCI`_
+2. Enable `TravisCI`_, `Appveyor`_, `Azure Pipelines`_ and `CircleCI`_
    integrations on for the newly created queue repository.
 
    -  turn off Travis’ `auto cancellation`_ feature on branches
@@ -100,9 +100,7 @@ Install
 
       export CROSSBOW_GITHUB_TOKEN=<token>
 
-   ..
-
-      or pass as an argument to the CLI script ``--github-token``
+   or pass as an argument to the CLI script ``--github-token``
 
 6. Export the previously created GitHub token on both CI services:
 
@@ -164,10 +162,8 @@ The script does the following:
       git checkout ARROW-<ticket number>
       archery crossbow submit --dry-run conda-linux conda-osx
 
-   ..
-
-      Note that the arrow branch must be pushed beforehand, because the
-      script will clone the selected branch.
+   Note that the arrow branch must be pushed beforehand, because the
+   script will clone the selected branch.
 
 3. Reads and renders the required build configurations with the
    parameters substituted.
diff --git a/docs/source/developers/experimental_repos.rst b/docs/source/developers/experimental_repos.rst
index 6f800b5b9ca..f13adba2b1a 100644
--- a/docs/source/developers/experimental_repos.rst
+++ b/docs/source/developers/experimental_repos.rst
@@ -29,7 +29,7 @@ new repositories, as they offer many important tools to manage it (e.g. github
 issues, “watch”, “github stars” to measure overall interest).
 
 Process
--------
++++++++
 
 * A committer *may* initiate experimental work by creating a separate git
   repository within the Apache Arrow (e.g. via `selfserve <https://selfserve.apache.org/>`_)
@@ -44,21 +44,21 @@ Process
 * The committer decides when the repository is archived.
 
 Repository management
----------------------
++++++++++++++++++++++
 
-* The repository *must* be under `apache/`
-* The repository’s name *must* be prefixed by `arrow-experimental-`
+* The repository *must* be under ``apache/``
+* The repository’s name *must* be prefixed by ``arrow-experimental-``
 * The committer has full permissions over the repository (within possible in ASF)
 * Push / merge permissions *must only* be granted to Apache Arrow committers
 
 Development process
--------------------
++++++++++++++++++++
 
 * The repository must follow the ASF requirements about 3rd party code.
 * The committer decides how to manage issues, PRs, etc.
 
 Divergences
------------
++++++++++++
 
 * If any of the “must” above fails to materialize and no correction measure
   is taken by the committer upon request, the PMC *should* take ownership
diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst
index b0a889459ac..f838398af88 100644
--- a/docs/source/java/algorithm.rst
+++ b/docs/source/java/algorithm.rst
@@ -15,16 +15,15 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-===========
 Java Algorithms
-===========
+===============
 
 Arrow's Java library provides algorithms for some commonly-used
 functionalities. The algorithms are provided in the ``org.apache.arrow.algorithm``
 package of the ``algorithm`` module. 
 
 Comparing Vector Elements
-=================
+-------------------------
 
 Comparing vector elements is the basic for many algorithms. Vector 
 elements can be compared in one of the two ways:
@@ -40,7 +39,7 @@ We provide default implementations to compare vector elements. However, users ca
 for customized comparisons. 
 
 Vector Element Search
-====================
+---------------------
 
 A search algorithm tries to find a particular value in a vector. When successful, a vector index is 
 returned; otherwise, a ``-1`` is returned. The following search algorithms are provided:
@@ -64,7 +63,7 @@ range search algorithm tries to find the upper/lower bound of the region in ``O(
 An implementation is provided in ``org.apache.arrow.algorithm.search.VectorRangeSearcher``.
 
 Vector Sorting
-===================
+--------------
 
 Given a vector, a sorting algorithm turns it into a sorted one. The sorting criteria must
 be specified by some ordering comparison operation. The sorting algorithms can be
@@ -88,6 +87,6 @@ smallest value in the vector. Index sorting is supported by ``org.apache.arrow.a
 which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. 
 
 Other Algorithms
-===================
+----------------
 
 Other algorithms include vector deduplication, dictionary encoding, etc., in the ``algorithm`` module.
diff --git a/docs/source/python/api/dataset.rst b/docs/source/python/api/dataset.rst
index 90d5bbf340a..e837839f907 100644
--- a/docs/source/python/api/dataset.rst
+++ b/docs/source/python/api/dataset.rst
@@ -38,6 +38,7 @@ Factory functions
    partitioning
    field
    scalar
+   write_dataset
 
 Classes
 -------
diff --git a/docs/source/python/api/ipc.rst b/docs/source/python/api/ipc.rst
index cc3ccfe40bc..83ff53de7de 100644
--- a/docs/source/python/api/ipc.rst
+++ b/docs/source/python/api/ipc.rst
@@ -38,6 +38,7 @@ Inter-Process Communication
    ipc.read_tensor
    ipc.write_tensor
    ipc.get_tensor_size
+   ipc.IpcWriteOptions
    ipc.Message
    ipc.MessageReader
    ipc.RecordBatchFileReader
diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst
index f63be7e3932..aa22c6f2725 100644
--- a/docs/source/python/dataset.rst
+++ b/docs/source/python/dataset.rst
@@ -117,7 +117,7 @@ this can require a lot of memory, see below on filtering / iterative loading):
 Reading different file formats
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The above examples use Parquet files as dataset source but the Dataset API
+The above examples use Parquet files as dataset sources but the Dataset API
 provides a consistent interface across multiple file formats and filesystems.
 Currently, Parquet, Feather / Arrow IPC, and CSV file formats are supported;
 more formats are planned in the future.
@@ -386,11 +386,11 @@ some specific methods exist for Parquet Datasets.
 
 Some processing frameworks such as Dask (optionally) use a ``_metadata`` file
 with partitioned datasets which includes information about the schema and the
-row group metadata of the full dataset. Using such file can give a more
+row group metadata of the full dataset. Using such a file can give a more
 efficient creation of a parquet Dataset, since it does not need to infer the
 schema and crawl the directories for all Parquet files (this is especially the
 case for filesystems where accessing files is expensive). The
-:func:`parquet_dataset` function allows to create a Dataset from a partitioned
+:func:`parquet_dataset` function allows us to create a Dataset from a partitioned
 dataset with a ``_metadata`` file:
 
 .. code-block:: python
@@ -456,20 +456,166 @@ is materialized as columns when reading the data and can be used for filtering:
     dataset.to_table().to_pandas()
     dataset.to_table(filter=ds.field('year') == 2019).to_pandas()
 
+Another benefit of manually listing the files is that the order of the files
+controls the order of the data.  When performing an ordered read (or a read to
+a table) then the rows returned will match the order of the files given.  This
+only applies when the dataset is constructed with a list of files.  There
+are no order guarantees given when the files are instead discovered by scanning
+a directory.
 
-Manual scheduling
------------------
+Iterative (out of core or streaming) reads
+------------------------------------------
 
-..
-    Possible content:
-    - fragments (get_fragments)
-    - scan / scan tasks / iterators of record batches
+The previous examples have demonstrated how to read the data into a table using :func:`~Dataset.to_table`.  This is
+useful if the dataset is small or there is only a small amount of data that needs to
+be read.  The dataset API contains additional methods to read and process large amounts
+of data in a streaming fashion.
 
-The :func:`~Dataset.to_table` method loads all selected data into memory
-at once resulting in a pyarrow Table. Alternatively, a dataset can also be
-scanned one RecordBatch at a time in an iterative manner using the
-:func:`~Dataset.scan` method::
+The easiest way to do this is to use the method :meth:`Dataset.to_batches`.  This
+method returns an iterator of record batches.  For example, we can use this method to
+calculate the average of a column without loading the entire column into memory:
 
-    for scan_task in dataset.scan(columns=[...], filter=...):
-        for record_batch in scan_task.execute():
-            # process the record batch
+.. ipython:: python
+
+    import pyarrow.compute as pc
+
+    col2_sum = 0
+    count = 0
+    for batch in dataset.to_batches(columns=["col2"], filter=~ds.field("col2").is_null()):
+        col2_sum += pc.sum(batch.column("col2")).as_py()
+        count += batch.num_rows
+    mean_a = col2_sum/count
+
+Customizing the batch size
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An iterative read of a dataset is often called a "scan" of the dataset and pyarrow
+uses an object called a :class:`Scanner` to do this.  A Scanner is created for you
+automatically by the to_table and to_batches method of the dataset.  Any arguments
+you pass to these methods will be passed on to the Scanner constructor.
+
+One of those parameters is the ``batch_size``.  This controls the maximum size of the
+batches returned by the scanner.  Batches can still be smaller than the `batch_size`
+if the dataset consists of small files or those files themselves consist of small
+row groups.  For example, a parquet file with 10,000 rows per row group will yield
+batches with, at most, 10,000 rows unless the batch_size is set to a smaller value.
+
+The default batch size is one million rows and this is typically a good default but
+you may want to customize it if you are reading a large number of columns.
+
+Writing Datasets
+----------------
+
+The dataset API also simplifies writing data to a dataset using :func:`write_dataset` .  This can be useful when
+you want to partition your data or you need to write a large amount of data.  A
+basic dataset write is similar to writing a table except that you specify a directory
+instead of a filename.
+
+.. ipython:: python
+
+    base = pathlib.Path(tempfile.gettempdir())
+    dataset_root = base / "sample_dataset"
+    dataset_root.mkdir(exist_ok=True)
+
+    table = pa.table({"a": range(10), "b": np.random.randn(10), "c": [1, 2] * 5})
+    ds.write_dataset(table, dataset_root, format="parquet")
+
+The above example will create a single file named part-0.parquet in our sample_dataset
+directory.
+
+.. warning::
+
+    If you run the example again it will replace the existing part-0.parquet file.
+    Appending files to an existing dataset requires specifying a new
+    ``basename_template`` for each call to ``ds.write_dataset``
+    to avoid overwrite.
+
+Writing partitioned data
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+A partitioning object can be used to specify how your output data should be partitioned.
+This uses the same kind of partitioning objects we used for reading datasets.  To write
+our above data out to a partitioned directory we only need to specify how we want the
+dataset to be partitioned.  For example:
+
+.. ipython:: python
+
+    part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor="hive"
+    )
+    ds.write_dataset(table, dataset_root, format="parquet", partitioning=part)
+
+This will create two files.  Half our data will be in the dataset_root/c=1 directory and
+the other half will be in the dataset_root/c=2 directory.
+
+Writing large amounts of data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples wrote data from a table.  If you are writing a large amount of data
+you may not be able to load everything into a single in-memory table.  Fortunately, the
+write_dataset method also accepts an iterable of record batches.  This makes it really
+simple, for example, to repartition a large dataset without loading the entire dataset
+into memory:
+
+.. ipython:: python
+
+    old_part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor="hive"
+    )
+    new_part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor=None
+    )
+    input_dataset = ds.dataset(dataset_root, partitioning=old_part)
+    new_root = base / "repartitioned_dataset"
+    # A scanner can act as an iterator of record batches but you could also receive
+    # data from the network (e.g. via flight), from your own scanning, or from any
+    # other method that yields record batches.  In addition, you can pass a dataset
+    # into write_dataset directly but this method is useful if you want to customize
+    # the scanner (e.g. to filter the input dataset or set a maximum batch size)
+    scanner = input_dataset.scanner()
+
+    ds.write_dataset(scanner, new_root, format="parquet", partitioning=new_part)
+
+After the above example runs our data will be in dataset_root/1 and dataset_root/2
+directories.  In this simple example we are not changing the structure of the data
+(only the directory naming schema) but you could also use this mechnaism to change
+which columns are used to partition the dataset.  This is useful when you expect to
+query your data in specific ways and you can utilize partitioning to reduce the
+amount of data you need to read.
+
+.. To add when ARROW-12364 is merged
+    Customizing & inspecting written files
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    By default the dataset API will create files named "part-i.format" where "i" is a integer
+    generated during the write and "format" is the file format specified in the write_dataset
+    call.  For simple datasets it may be possible to know which files will be created but for
+    larger or partitioned datasets it is not so easy.  The ``file_visitor`` keyword can be used 
+    to supply a visitor that will be called as each file is created:
+
+    .. ipython:: python
+
+        def file_visitor(written_file):
+            print(f"path={written_file.path}")
+            print(f"metadata={written_file.metadata}")
+        ds.write_dataset(table, dataset_root, format="parquet", partitioning=part,
+                        file_visitor=file_visitor)
+
+    This will allow you to collect the filenames that belong to the dataset and store them elsewhere
+    which can be useful when you want to avoid scanning directories the next time you need to read
+    the data.  It can also be used to generate the _metadata index file used by other tools such as
+    dask or spark to create an index of the dataset.
+
+Configuring format-specific parameters during a write
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the common options shared by all formats there are also format specific options
+that are unique to a particular format.  For example, to allow truncated timestamps while writing
+Parquet files:
+
+.. ipython:: python
+
+    parquet_format = ds.ParquetFileFormat()
+    write_options = parquet_format.make_write_options(allow_truncated_timestamps=True)
+    ds.write_dataset(table, dataset_root, format="parquet", partitioning=part,
+                     file_options=write_options)
diff --git a/python/pyarrow/_hdfs.pyx b/python/pyarrow/_hdfs.pyx
index f5cf12fa109..5ede8f5159d 100644
--- a/python/pyarrow/_hdfs.pyx
+++ b/python/pyarrow/_hdfs.pyx
@@ -93,9 +93,11 @@ cdef class HadoopFileSystem(FileSystem):
         Instantiate HadoopFileSystem object from an URI string.
 
         The following two calls are equivalent
-        * HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test'
-                                    '&replication=1')
-        * HadoopFileSystem('localhost', port=8020, user='test', replication=1)
+
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
 
         Parameters
         ----------
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index a6c3b3a382f..62523696c8b 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1622,7 +1622,6 @@ cdef class ListArray(BaseListArray):
             3
           ]
         ]
-
         # nulls in the offsets array become null lists
         >>> offsets = pa.array([0, None, 2, 4])
         >>> pa.ListArray.from_arrays(offsets, values)

From de51528eef7a42eb1e28f2f646924deb39a02969 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 20 Jul 2021 16:39:17 +0200
Subject: [PATCH 607/719] ARROW-13385: [C++] Demonstrate registering compute
 functions out-of-tree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Makes a small change to avoid crashing if we try to serialize an expression with an out-of-tree options.

Closes #10748 from lidavidm/arrow-13385

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/examples/arrow/CMakeLists.txt             |   4 +
 .../arrow/compute_register_example.cc         | 100 ++++++++++++++++++
 cpp/src/arrow/compute/function_internal.cc    |   6 +-
 3 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 cpp/examples/arrow/compute_register_example.cc

diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt
index aad68744fe6..0bcf5de0ad1 100644
--- a/cpp/examples/arrow/CMakeLists.txt
+++ b/cpp/examples/arrow/CMakeLists.txt
@@ -17,6 +17,10 @@
 
 ADD_ARROW_EXAMPLE(row_wise_conversion_example)
 
+if (ARROW_COMPUTE)
+  ADD_ARROW_EXAMPLE(compute_register_example)
+endif()
+
 if (ARROW_PARQUET AND ARROW_DATASET)
   if (ARROW_BUILD_SHARED)
     set(DATASET_EXAMPLES_LINK_LIBS arrow_dataset_shared)
diff --git a/cpp/examples/arrow/compute_register_example.cc b/cpp/examples/arrow/compute_register_example.cc
new file mode 100644
index 00000000000..d1a1372b82a
--- /dev/null
+++ b/cpp/examples/arrow/compute_register_example.cc
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/expression.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+
+// Demonstrate registering an Arrow compute function outside of the Arrow source tree
+
+namespace cp = ::arrow::compute;
+
+#define ABORT_ON_FAILURE(expr)                     \
+  do {                                             \
+    arrow::Status status_ = (expr);                \
+    if (!status_.ok()) {                           \
+      std::cerr << status_.message() << std::endl; \
+      abort();                                     \
+    }                                              \
+  } while (0);
+
+class ExampleFunctionOptionsType : public cp::FunctionOptionsType {
+  const char* type_name() const override { return "ExampleFunctionOptionsType"; }
+  std::string Stringify(const cp::FunctionOptions&) const override {
+    return "ExampleFunctionOptionsType";
+  }
+  bool Compare(const cp::FunctionOptions&, const cp::FunctionOptions&) const override {
+    return true;
+  }
+  // optional: support for serialization
+  // Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override;
+  // Result<std::unique_ptr<FunctionOptions>> Deserialize(const Buffer& buffer) const override;
+};
+
+cp::FunctionOptionsType* GetExampleFunctionOptionsType() {
+  static ExampleFunctionOptionsType options_type;
+  return &options_type;
+}
+
+class ExampleFunctionOptions : public cp::FunctionOptions {
+ public:
+  ExampleFunctionOptions() : cp::FunctionOptions(GetExampleFunctionOptionsType()) {}
+};
+
+arrow::Status ExampleFunctionImpl(cp::KernelContext* ctx, const cp::ExecBatch& batch,
+                                  arrow::Datum* out) {
+  *out->mutable_array() = *batch[0].array();
+  return arrow::Status::OK();
+}
+
+const cp::FunctionDoc func_doc{
+    "Example function to demonstrate registering an out-of-tree function",
+    "",
+    {"x"},
+    "ExampleFunctionOptions"};
+
+int main(int argc, char** argv) {
+  const std::string name = "compute_register_example";
+  auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(), &func_doc);
+  func->AddKernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
+                  ExampleFunctionImpl);
+
+  auto registry = cp::GetFunctionRegistry();
+  ABORT_ON_FAILURE(registry->AddFunction(std::move(func)));
+
+  arrow::Int64Builder builder(arrow::default_memory_pool());
+  std::shared_ptr<arrow::Array> arr;
+  ABORT_ON_FAILURE(builder.Append(42));
+  ABORT_ON_FAILURE(builder.Finish(&arr));
+  auto options = std::make_shared<ExampleFunctionOptions>();
+  auto maybe_result = cp::CallFunction(name, {arr}, options.get());
+  ABORT_ON_FAILURE(maybe_result.status());
+
+  std::cout << maybe_result->make_array()->ToString() << std::endl;
+
+  // Expression serialization will raise NotImplemented if an expression includes FunctionOptions
+  // for which serialization is not supported.
+  auto expr = cp::call(name, {}, options);
+  auto maybe_serialized = cp::Serialize(expr);
+  std::cerr << maybe_serialized.status().ToString() << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/cpp/src/arrow/compute/function_internal.cc b/cpp/src/arrow/compute/function_internal.cc
index 5234a421a7e..0a926e0a39c 100644
--- a/cpp/src/arrow/compute/function_internal.cc
+++ b/cpp/src/arrow/compute/function_internal.cc
@@ -39,7 +39,11 @@ Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
   std::vector<std::string> field_names;
   std::vector<std::shared_ptr<Scalar>> values;
   const auto* options_type =
-      checked_cast<const GenericOptionsType*>(options.options_type());
+      dynamic_cast<const GenericOptionsType*>(options.options_type());
+  if (!options_type) {
+    return Status::NotImplemented("serializing ", options.type_name(),
+                                  " to StructScalar");
+  }
   RETURN_NOT_OK(options_type->ToStructScalar(options, &field_names, &values));
   field_names.push_back(kTypeNameField);
   const char* options_name = options.type_name();

From a47deef69ea831abee1062df91202602c20d54e5 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Tue, 20 Jul 2021 18:40:24 +0200
Subject: [PATCH 608/719] ARROW-11673 - [C++] Casting dictionary type to use
 different index type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adds casting from one dictionary type to anther dictionary type for both scalars and arrays :
ex:
```
dictionary(int8(), int16()) --> dictionary(int32(), int64())
```

Closes #10721 from nirandaperera/ARROW-11673

Lead-authored-by: niranda perera <niranda.perera@gmail.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                  |   1 +
 cpp/src/arrow/compute/cast.cc                 |   1 +
 cpp/src/arrow/compute/cast_internal.h         |   1 +
 cpp/src/arrow/compute/exec/expression_test.cc |  17 ++-
 .../compute/kernels/scalar_cast_dictionary.cc | 126 ++++++++++++++++++
 .../arrow/compute/kernels/scalar_cast_test.cc |  34 +++++
 python/pyarrow/tests/test_array.py            |  24 +++-
 7 files changed, 190 insertions(+), 14 deletions(-)
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 88a92d8c2c9..d2f80ce7213 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -385,6 +385,7 @@ if(ARROW_COMPUTE)
        compute/kernels/scalar_arithmetic.cc
        compute/kernels/scalar_boolean.cc
        compute/kernels/scalar_cast_boolean.cc
+       compute/kernels/scalar_cast_dictionary.cc
        compute/kernels/scalar_cast_internal.cc
        compute/kernels/scalar_cast_nested.cc
        compute/kernels/scalar_cast_numeric.cc
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 521f217213d..4de68ba8d90 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -61,6 +61,7 @@ void InitCastTable() {
   AddCastFunctions(GetNestedCasts());
   AddCastFunctions(GetNumericCasts());
   AddCastFunctions(GetTemporalCasts());
+  AddCastFunctions(GetDictionaryCasts());
 }
 
 void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTable); }
diff --git a/cpp/src/arrow/compute/cast_internal.h b/cpp/src/arrow/compute/cast_internal.h
index c152d10bd86..0105d08a573 100644
--- a/cpp/src/arrow/compute/cast_internal.h
+++ b/cpp/src/arrow/compute/cast_internal.h
@@ -36,6 +36,7 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts();
 std::vector<std::shared_ptr<CastFunction>> GetTemporalCasts();
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts();
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts();
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index 86909f4eb64..b59f8762818 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -939,17 +939,16 @@ TEST(Expression, ReplaceFieldsWithKnownValues) {
   ExpectReplacesTo(is_valid(field_ref("str")), i32_valid_str_null,
                    is_valid(null_literal(utf8())));
 
-  ASSERT_OK_AND_ASSIGN(auto expr, field_ref("dict_str").Bind(*kBoringSchema));
   Datum dict_i32{
       DictionaryScalar::Make(MakeScalar<int32_t>(0), ArrayFromJSON(int32(), R"([3])"))};
-  // Unsupported cast dictionary(int32(), int32()) -> dictionary(int32(), utf8())
-  ASSERT_RAISES(NotImplemented, ReplaceFieldsWithKnownValues(
-                                    KnownFieldValues{{{"dict_str", dict_i32}}}, expr));
-  // Unsupported cast dictionary(int8(), utf8()) -> dictionary(int32(), utf8())
-  dict_str = Datum{
-      DictionaryScalar::Make(MakeScalar<int8_t>(0), ArrayFromJSON(utf8(), R"(["a"])"))};
-  ASSERT_RAISES(NotImplemented, ReplaceFieldsWithKnownValues(
-                                    KnownFieldValues{{{"dict_str", dict_str}}}, expr));
+  // cast dictionary(int32(), int32()) -> dictionary(int32(), utf8())
+  ExpectReplacesTo(field_ref("dict_str"), {{"dict_str", dict_i32}}, literal(dict_str));
+
+  // cast dictionary(int8(), utf8()) -> dictionary(int32(), utf8())
+  auto dict_int8_str = Datum{
+      DictionaryScalar::Make(MakeScalar<int8_t>(0), ArrayFromJSON(utf8(), R"(["3"])"))};
+  ExpectReplacesTo(field_ref("dict_str"), {{"dict_str", dict_int8_str}},
+                   literal(dict_str));
 }
 
 struct {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
new file mode 100644
index 00000000000..b1e1164fd34
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implementation of casting to dictionary type
+
+#include <arrow/util/bitmap_ops.h>
+#include <arrow/util/checked_cast.h>
+
+#include "arrow/array/builder_primitive.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/kernels/scalar_cast_internal.h"
+#include "arrow/compute/kernels/util_internal.h"
+#include "arrow/util/int_util.h"
+
+namespace arrow {
+using internal::CopyBitmap;
+
+namespace compute {
+namespace internal {
+
+Status CastDictionary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const CastOptions& options = CastState::Get(ctx);
+  auto out_type = std::static_pointer_cast<DictionaryType>(out->type());
+
+  // if out type is same as in type, return input
+  if (out_type->Equals(batch[0].type())) {
+    *out = batch[0];
+    return Status::OK();
+  }
+
+  if (batch[0].is_scalar()) {  // if input is scalar
+    auto in_scalar = checked_cast<const DictionaryScalar&>(*batch[0].scalar());
+
+    // if invalid scalar, return null scalar
+    if (!in_scalar.is_valid) {
+      *out = MakeNullScalar(out_type);
+      return Status::OK();
+    }
+
+    Datum casted_index, casted_dict;
+    if (in_scalar.value.index->type->Equals(out_type->index_type())) {
+      casted_index = in_scalar.value.index;
+    } else {
+      ARROW_ASSIGN_OR_RAISE(casted_index,
+                            Cast(in_scalar.value.index, out_type->index_type(), options,
+                                 ctx->exec_context()));
+    }
+
+    if (in_scalar.value.dictionary->type()->Equals(out_type->value_type())) {
+      casted_dict = in_scalar.value.dictionary;
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          casted_dict, Cast(in_scalar.value.dictionary, out_type->value_type(), options,
+                            ctx->exec_context()));
+    }
+
+    *out = std::static_pointer_cast<Scalar>(
+        DictionaryScalar::Make(casted_index.scalar(), casted_dict.make_array()));
+
+    return Status::OK();
+  }
+
+  // if input is array
+  const std::shared_ptr<ArrayData>& in_array = batch[0].array();
+  const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
+
+  ArrayData* out_array = out->mutable_array();
+
+  if (in_type.index_type()->Equals(out_type->index_type())) {
+    out_array->buffers[0] = in_array->buffers[0];
+    out_array->buffers[1] = in_array->buffers[1];
+    out_array->null_count = in_array->GetNullCount();
+    out_array->offset = in_array->offset;
+  } else {
+    // for indices, create a dummy ArrayData with index_type()
+    const std::shared_ptr<ArrayData>& indices_arr =
+        ArrayData::Make(in_type.index_type(), in_array->length, in_array->buffers,
+                        in_array->GetNullCount(), in_array->offset);
+    ARROW_ASSIGN_OR_RAISE(auto casted_indices, Cast(indices_arr, out_type->index_type(),
+                                                    options, ctx->exec_context()));
+    out_array->buffers[0] = std::move(casted_indices.array()->buffers[0]);
+    out_array->buffers[1] = std::move(casted_indices.array()->buffers[1]);
+  }
+
+  // data (dict)
+  if (in_type.value_type()->Equals(out_type->value_type())) {
+    out_array->dictionary = in_array->dictionary;
+  } else {
+    const std::shared_ptr<Array>& dict_arr = MakeArray(in_array->dictionary);
+    ARROW_ASSIGN_OR_RAISE(auto casted_data, Cast(dict_arr, out_type->value_type(),
+                                                 options, ctx->exec_context()));
+    out_array->dictionary = casted_data.array();
+  }
+  return Status::OK();
+}
+
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
+  auto func = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
+  ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, CastDictionary);
+  kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+
+  DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel)));
+
+  return {func};
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 494b15dfbc8..fce8518dd3b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1911,5 +1911,39 @@ TEST(Cast, ExtensionTypeToIntDowncast) {
   }
 }
 
+TEST(Cast, DictTypeToAnotherDict) {
+  auto check_cast = [&](const std::shared_ptr<DataType>& in_type,
+                        const std::shared_ptr<DataType>& out_type,
+                        const std::string& json_str,
+                        const CastOptions& options = CastOptions()) {
+    auto arr = ArrayFromJSON(in_type, json_str);
+    auto exp = in_type->Equals(out_type) ? arr : ArrayFromJSON(out_type, json_str);
+    // this checks for scalars as well
+    CheckCast(arr, exp, options);
+  };
+
+  //    check same type passed on to casting
+  check_cast(dictionary(int8(), int16()), dictionary(int8(), int16()),
+             "[1, 2, 3, 1, null, 3]");
+  check_cast(dictionary(int8(), int16()), dictionary(int32(), int64()),
+             "[1, 2, 3, 1, null, 3]");
+  check_cast(dictionary(int8(), int16()), dictionary(int32(), float64()),
+             "[1, 2, 3, 1, null, 3]");
+  check_cast(dictionary(int32(), utf8()), dictionary(int8(), utf8()),
+             R"(["a", "b", "a", null])");
+
+  auto arr = ArrayFromJSON(dictionary(int32(), int32()), "[1, 1000]");
+  // check casting unsafe values (checking for unsafe indices is unnecessary, because it
+  // would create an invalid index array which results in a ValidateOutput failure)
+  ASSERT_OK_AND_ASSIGN(auto casted,
+                       Cast(arr, dictionary(int8(), int8()), CastOptions::Unsafe()));
+  ValidateOutput(casted);
+
+  // check safe casting values
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Integer value 1000 not in range"),
+      Cast(arr, dictionary(int8(), int8()), CastOptions::Safe()));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 57224ef4ebe..0f137383378 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1524,12 +1524,26 @@ def test_cast_string_to_number_roundtrip():
 
 
 def test_cast_dictionary():
-    arr = pa.DictionaryArray.from_arrays(
-        pa.array([0, 1, None], type=pa.int32()),
-        pa.array(["foo", "bar"]))
-    assert arr.cast(pa.string()).equals(pa.array(["foo", "bar", None]))
+    # cast to the value type
+    arr = pa.array(
+        ["foo", "bar", None],
+        type=pa.dictionary(pa.int64(), pa.string())
+    )
+    expected = pa.array(["foo", "bar", None])
+    assert arr.type == pa.dictionary(pa.int64(), pa.string())
+    assert arr.cast(pa.string()) == expected
+
+    # cast to a different key type
+    for key_type in [pa.int8(), pa.int16(), pa.int32()]:
+        typ = pa.dictionary(key_type, pa.string())
+        expected = pa.array(
+            ["foo", "bar", None],
+            type=pa.dictionary(key_type, pa.string())
+        )
+        assert arr.cast(typ) == expected
+
+    # shouldn't crash (ARROW-7077)
     with pytest.raises(pa.ArrowInvalid):
-        # Shouldn't crash (ARROW-7077)
         arr.cast(pa.int32())
 
 

From 381afac9bd019943f67afbfe77104fc38c959850 Mon Sep 17 00:00:00 2001
From: Linan Qiu <linanqiu@gmail.com>
Date: Wed, 21 Jul 2021 06:37:14 +0900
Subject: [PATCH 609/719] ARROW-13360: [C++] Missing dependencies in cpp
 thirdparty offline dependencies versions.txt

Add missing offline package downloads for c_common, checksums, c_event_stream that cause build errors when building with LBARROW_MINIMAL=false in an airgapped offline environment

Closes #10736 from linanqiu/master

Lead-authored-by: Linan Qiu <linanqiu@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/thirdparty/versions.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 5b7528b9b64..4905d7567c1 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -65,6 +65,9 @@ ARROW_ZSTD_BUILD_VERSION=v1.5.0
 DEPENDENCIES=(
   "ARROW_ABSL_URL absl-${ARROW_ABSL_BUILD_VERSION}.tar.gz https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz"
   "ARROW_AWSSDK_URL aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz https://github.com/aws/aws-sdk-cpp/archive/${ARROW_AWSSDK_BUILD_VERSION}.tar.gz"
+  "ARROW_AWS_CHECKSUMS_URL aws-checksums-${ARROW_AWS_CHECKSUMS_BUILD_VERSION} https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz"
+  "ARROW_AWS_C_COMMON_URL aws-c-common-${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz"
+  "ARROW_AWS_C_EVENT_STREAM_URL aws-c-event-stream-${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION} https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz"
   "ARROW_BOOST_URL boost-${ARROW_BOOST_BUILD_VERSION}.tar.gz https://github.com/ursa-labs/thirdparty/releases/download/latest/boost_${ARROW_BOOST_BUILD_VERSION//./_}.tar.gz"
   "ARROW_BROTLI_URL brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz"
   "ARROW_BZIP2_URL bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz"

From d792eb147380e0b597799db3649457680b4e58c6 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Tue, 20 Jul 2021 17:16:24 -0500
Subject: [PATCH 610/719] ARROW-13418: [R] typo in python.r

Closes #10760 from jonkeane/ARROW-13418-python-typo

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/R/python.R                   |  2 +-
 r/tests/testthat/test-python.R | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/r/R/python.R b/r/R/python.R
index 9d1ecf6347a..0a0afcb06c1 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -171,7 +171,7 @@ py_to_r.pyarrow.lib.RecordBatchReader <- function(x, ...) {
   on.exit(delete_arrow_array_stream(stream_ptr))
 
   x$`_export_to_c`(stream_ptr)
-  RecordBatchFileReader$import_from_c(stream_ptr)
+  RecordBatchReader$import_from_c(stream_ptr)
 }
 
 r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index 7efc2b28715..9e67219e19a 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -132,3 +132,14 @@ test_that("RecordBatchReader to python", {
       filter(int > 6)
   )
 })
+
+test_that("RecordBatchReader from python", {
+  tab <- Table$create(example_data)
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  pyreader <- reticulate::r_to_py(reader)
+  back_to_r <- reticulate::py_to_r(pyreader)
+  rt_table <- back_to_r$read_table()
+  expect_r6_class(rt_table, "Table")
+  expect_identical(as.data.frame(rt_table), example_data)
+})

From 77590a8fa35d580b4e572c85b62bf8c2e407d84a Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Tue, 20 Jul 2021 18:22:45 -0400
Subject: [PATCH 611/719] ARROW-13201: [R] Add binding for coalesce()

Closes #10751 from ianmcook/ARROW-13201

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Ian Cook <ianmcook@gmail.com>
---
 r/NEWS.md                     |   2 +-
 r/R/dplyr-functions.R         |  38 ++++++++++
 r/tests/testthat/test-dplyr.R | 130 +++++++++++++++++++++++++++++++++-
 3 files changed, 168 insertions(+), 2 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index b05cc131902..5ec8492a9ca 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -26,7 +26,7 @@
   * String operations: `strsplit()` and `str_split()`; `strptime()`; `paste()`, `paste0()`, and `str_c()`; `substr()` and `str_sub()`; `str_like()`; `str_pad()`; `stri_reverse()`
   * Date/time operations: `lubridate` methods such as `year()`, `month()`, `wday()`, and so on
   * Math: logarithms (`log()` et al.); trigonometry (`sin()`, `cos()`, et al.); `abs()`; `sign()`; `pmin()` and `pmax()`; `ceiling()`, `floor()`, and `trunc()`
-  * Conditional: `ifelse()` and `if_else()` (fixed-precision decimal numbers do not yet work and factors/dictionaries are converted to character strings); `case_when()` (currently works with numeric data types but not character strings, factors/dictionaries, or lists/structs)
+  * Conditional functions, with some limitations on input type in this release: `ifelse()` and `if_else()` for all but `Decimal` types; `case_when()` for logical, numeric, and temporal types only; `coalesce()` for all but lists/structs. Note also that in this release, factors/dictionaries are converted to strings in these functions.
   * `is.*` functions are supported and can be used inside `relocate()`
 
 * The print method for `arrow_dplyr_query` now includes the expression and the resulting type of columns derived by `mutate()`.
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 11efb7f26d2..8406de1ba8f 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -57,6 +57,44 @@ nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
   Expression$create("cast", x, options = opts)
 }
 
+nse_funcs$coalesce <- function(...) {
+  args <- list2(...)
+  if (length(args) < 1) {
+    abort("At least one argument must be supplied to coalesce()")
+  }
+
+  # Treat NaN like NA for consistency with dplyr::coalesce(), but if *all*
+  # the values are NaN, we should return NaN, not NA, so don't replace
+  # NaN with NA in the final (or only) argument
+  # TODO: if an option is added to the coalesce kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13389)
+  attr(args[[length(args)]], "last") <- TRUE
+  args <- lapply(args, function(arg) {
+    last_arg <- is.null(attr(arg, "last"))
+    attr(arg, "last") <- NULL
+
+    if (!inherits(arg, "Expression")) {
+      arg <- Expression$scalar(arg)
+    }
+
+    # coalesce doesn't yet support factors/dictionaries
+    # TODO: remove this after ARROW-13390 is merged
+    if (nse_funcs$is.factor(arg)) {
+      warning("Dictionaries (in R: factors) are currently converted to strings (characters) in coalesce", call. = FALSE)
+    }
+
+    if (last_arg && arg$type_id() %in% TYPES_WITH_NAN) {
+      # store the NA_real_ in the same type as arg to avoid avoid casting
+      # smaller float types to larger float types
+      NA_expr <- Expression$scalar(Scalar$create(NA_real_, type = arg$type()))
+      Expression$create("if_else", Expression$create("is_nan", arg), NA_expr, arg)
+    } else {
+      arg
+    }
+  })
+  Expression$create("coalesce", args = args)
+}
+
 nse_funcs$is.na <- function(x) {
   # TODO: if an option is added to the is_null kernel to treat NaN as NA,
   # use that to simplify the code here (ARROW-13367)
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index ac36c5a1bc9..0a7ea8da89b 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -1207,7 +1207,7 @@ test_that("if_else and ifelse", {
       mutate(
         y = if_else(int > 5, fct, factor("a"))
       ) %>% collect() %>%
-      # This is a no-op on the Arrow side, but necesary to make the results equal
+      # This is a no-op on the Arrow side, but necessary to make the results equal
       mutate(y = as.character(y)),
     tbl,
     warning = "Dictionaries .* are currently converted to strings .* in if_else and ifelse"
@@ -1359,3 +1359,131 @@ test_that("case_when()", {
     tbl
   )
 })
+
+test_that("coalesce()", {
+  # character
+  df <- tibble(
+    w = c(NA_character_, NA_character_, NA_character_),
+    x = c(NA_character_, NA_character_, "c"),
+    y = c(NA_character_, "b", "c"),
+    z = c("a", "b", "c")
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # integer
+  df <- tibble(
+    w = c(NA_integer_, NA_integer_, NA_integer_),
+    x = c(NA_integer_, NA_integer_, 3L),
+    y = c(NA_integer_, 2L, 3L),
+    z = 1:3
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # double with NaNs
+  df <- tibble(
+    w = c(NA_real_, NaN, NA_real_),
+    x = c(NA_real_, NaN, 3.3),
+    y = c(NA_real_, 2.2, 3.3),
+    z = c(1.1, 2.2, 3.3)
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+  # NaNs stay NaN and are not converted to NA in the results
+  # (testing this requires expect_identical())
+  expect_identical(
+    df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(),
+    df %>% mutate(cwx = coalesce(w, x))
+  )
+  expect_identical(
+    df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(),
+    df %>% transmute(cw = coalesce(w))
+  )
+  expect_identical(
+    df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(),
+    df %>% transmute(cn = coalesce(NaN))
+  )
+  # singles stay single
+  expect_equal(
+    (df %>%
+      Table$create(schema = schema(
+        w = float32(),
+        x = float32(),
+        y = float32(),
+        z = float32()
+      )) %>%
+      transmute(c = coalesce(w, x, y, z)) %>%
+      compute()
+    )$schema[[1]]$type,
+    float32()
+  )
+  # with R literal values
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        c1 = coalesce(4.4),
+        c2 = coalesce(NA_real_),
+        c3 = coalesce(NaN),
+        c4 = coalesce(w, x, y, 5.5),
+        c5 = coalesce(w, x, y, NA_real_),
+        c6 = coalesce(w, x, y, NaN)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # factors
+  # TODO: remove the mutate + warning after ARROW-13390 is merged and Arrow
+  # supports factors in coalesce
+  df <- tibble(
+    x = factor("a", levels = c("a", "z")),
+    y = factor("b", levels = c("a", "b", "c"))
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(c = coalesce(x, y)) %>%
+      collect() %>%
+      # This is a no-op on the Arrow side, but necessary to make the results equal
+      mutate(c = as.character(c)),
+    df,
+    warning = "Dictionaries .* are currently converted to strings .* in coalesce"
+  )
+
+  # no arguments
+  expect_error(
+    nse_funcs$coalesce(),
+    "At least one argument must be supplied to coalesce()",
+    fixed = TRUE
+  )
+})

From d9cec2304faf6eba8fd5a2a2157d834f86ea6df2 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 20 Jul 2021 13:50:19 -1000
Subject: [PATCH 612/719] ARROW-13417: [C++] The merged generator can sometimes
 pull from source sync-reentrant

A couple of things were at play here.

First, the parallel CSV reader added a cleanup step to the async vector generator which made it no longer sync-reentrant safe.  This served as a useful canary to detect this issue.

Second, there was a test in place to ensure that the merged generator did not pull from the individual subscriptions in a sync-reentrant manner.  However, this test was not guarding the source generator.

Finally, the bug itself was pretty straightforward.  The source could be pulled whenever a subscription finished and so if two subscriptions finished at the same time it would cause a race condition (or if a subscription finished very quickly and the initial pull was still adding subscriptions).

Closes #10758 from westonpace/ARROW-13417--c-the-merged-generator-can-sometimes-pull-from

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 cpp/src/arrow/util/async_generator.h       | 11 +++++++++--
 cpp/src/arrow/util/async_generator_test.cc |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 8992e7bcac2..9d1021edff5 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -950,7 +950,7 @@ class MergedGenerator {
     if (state_->first) {
       state_->first = false;
       for (std::size_t i = 0; i < state_->active_subscriptions.size(); i++) {
-        state_->source().AddCallback(OuterCallback{state_, i});
+        state_->PullSource().AddCallback(OuterCallback{state_, i});
       }
     }
     return waiting_future;
@@ -979,6 +979,13 @@ class MergedGenerator {
           finished(false),
           num_active_subscriptions(max_subscriptions) {}
 
+    Future<AsyncGenerator<T>> PullSource() {
+      // Need to guard access to source() so we don't pull sync-reentrantly which
+      // is never valid.
+      auto lock = mutex.Lock();
+      return source();
+    }
+
     AsyncGenerator<AsyncGenerator<T>> source;
     // active_subscriptions and delivered_jobs will be bounded by max_subscriptions
     std::vector<AsyncGenerator<T>> active_subscriptions;
@@ -1014,7 +1021,7 @@ class MergedGenerator {
         }
       }
       if (sub_finished) {
-        state->source().AddCallback(OuterCallback{state, index});
+        state->PullSource().AddCallback(OuterCallback{state, index});
       } else if (sink.is_valid()) {
         sink.MarkFinished(maybe_next);
         if (maybe_next.ok()) {
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 343eb9b6c4b..ccacc380392 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -570,6 +570,7 @@ TEST_P(MergedGeneratorTestFixture, MergedStress) {
       sources.push_back(source);
     }
     AsyncGenerator<AsyncGenerator<TestInt>> source_gen = AsyncVectorIt(sources);
+    auto outer_gaurd = ExpectNotAccessedReentrantly(&source_gen);
 
     auto merged = MakeMergedGenerator(source_gen, 4);
     ASSERT_FINISHES_OK_AND_ASSIGN(auto items, CollectAsyncGenerator(merged));

From 737492e074b816952bc011d18a996fec83b0b55f Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 20 Jul 2021 13:58:02 -1000
Subject: [PATCH 613/719] ARROW-13386: [R][C++] CSV streaming changes break
 Rtools 35 32-bit build

The streaming CSV reader now creates more threads even when use_threads = false (it creates a CPU thread to do the worker tasks in an async way safe for nested parallelism).  This disturbs RTools 3.5 builds on Windows on 32 bit machines.  Per some discussion in Zulip I am disabling those tests as it is not clear the functionality is needed (i.e. it is not clear that there are many 32-bit consumers).

Closes #10757 from westonpace/bugfix/ARROW-13386--r-c-csv-streaming-changes-break-rtools-35-32-

Lead-authored-by: Weston Pace <weston.pace@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 .github/workflows/comment_bot.yml |  8 +++++---
 r/R/dataset-format.R              |  2 +-
 r/man/ChunkedArray.Rd             |  2 +-
 r/man/FileFormat.Rd               |  2 +-
 r/man/arrow-package.Rd            |  1 +
 r/tests/testthat/test-dataset.R   | 17 +++++++++++++++++
 6 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 7531876e71e..5847974ae9f 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -79,9 +79,9 @@ jobs:
             echo "CLANG_FORMAT_R=true" >> $GITHUB_ENV
           fi
       - name: Ensure clang-format has the appropriate versoin
-        if: env.CMAKE_FORMAT == 'true' || 
-          env.CLANG_FORMAT_CPP == 'true' || 
-          env.CLANG_FORMAT_R == 'true' || 
+        if: env.CMAKE_FORMAT == 'true' ||
+          env.CLANG_FORMAT_CPP == 'true' ||
+          env.CLANG_FORMAT_R == 'true' ||
           endsWith(github.event.comment.body, 'everything')
         run: |
           set -e
@@ -120,6 +120,8 @@ jobs:
         run: |
           source("ci/etc/rprofile")
           install.packages(c("remotes", "roxygen2"))
+          # We currently need dev roxygen2 (> 7.1.1) until they release
+          remotes::install_github("r-lib/roxygen2")
           remotes::install_deps("r")
           roxygen2::roxygenize("r")
       - name: Commit results
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index 6aa7d33cc3a..569033ff7f8 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -53,7 +53,7 @@
 #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`)
 #' @rdname FileFormat
 #' @name FileFormat
-#' @examplesIf arrow_with_dataset()
+#' @examplesIf arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows"
 #' ## Semi-colon delimited files
 #' # Set up directory for examples
 #' tf <- tempfile()
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index eaae0b3d4b8..3a504f01466 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -62,7 +62,7 @@ class_scores$num_chunks
 # When taking a Slice from a chunked_array, chunks are preserved
 class_scores$Slice(2, length = 5)
 
-# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk 
+# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
 # containing all values, ordered.
 class_scores$Take(class_scores$SortIndices(descending = TRUE))
 
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index 5bc9475b408..cabacc93755 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -52,7 +52,7 @@ It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFile
 }
 
 \examples{
-\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 ## Semi-colon delimited files
 # Set up directory for examples
 tf <- tempfile()
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index ca6d32a895a..94620e9be1c 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -27,6 +27,7 @@ Useful links:
 Authors:
 \itemize{
   \item Ian Cook \email{ianmcook@gmail.com}
+  \item Nic Crane \email{thisisnic@gmail.com}
   \item Jonathan Keane \email{jkeane@gmail.com}
   \item Romain François \email{romain@rstudio.com} (\href{https://orcid.org/0000-0002-2444-4226}{ORCID})
   \item Jeroen Ooms \email{jeroen@berkeley.edu}
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 66493376e74..ba41bf2d921 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -27,6 +27,16 @@ ipc_dir <- make_temp_dir()
 csv_dir <- make_temp_dir()
 tsv_dir <- make_temp_dir()
 
+skip_if_multithreading_disabled <- function() {
+  is_32bit <- .Machine$sizeof.pointer < 8
+  is_old_r <- getRversion() < "4.0.0"
+  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
+  if (is_32bit && is_old_r && is_windows) {
+    skip("Multithreading does not work properly on this system")
+  }
+}
+
+
 first_date <- lubridate::ymd_hms("2015-04-29 03:12:39")
 df1 <- tibble(
   int = 1:10,
@@ -342,6 +352,7 @@ test_that("IPC/Feather format data", {
 })
 
 test_that("CSV dataset", {
+  skip_if_multithreading_disabled()
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
@@ -369,6 +380,7 @@ test_that("CSV dataset", {
 })
 
 test_that("CSV scan options", {
+  skip_if_multithreading_disabled()
   options <- FragmentScanOptions$create("text")
   expect_equal(options$type, "csv")
   options <- FragmentScanOptions$create("csv",
@@ -408,6 +420,7 @@ test_that("CSV scan options", {
 })
 
 test_that("compressed CSV dataset", {
+  skip_if_multithreading_disabled()
   skip_if_not_available("gzip")
   dst_dir <- make_temp_dir()
   dst_file <- file.path(dst_dir, "data.csv.gz")
@@ -431,6 +444,7 @@ test_that("compressed CSV dataset", {
 })
 
 test_that("CSV dataset options", {
+  skip_if_multithreading_disabled()
   dst_dir <- make_temp_dir()
   dst_file <- file.path(dst_dir, "data.csv")
   df <- tibble(chr = letters[1:10])
@@ -458,6 +472,7 @@ test_that("CSV dataset options", {
 })
 
 test_that("Other text delimited dataset", {
+  skip_if_multithreading_disabled()
   ds1 <- open_dataset(tsv_dir, partitioning = "part", format = "tsv")
   expect_equivalent(
     ds1 %>%
@@ -486,6 +501,7 @@ test_that("Other text delimited dataset", {
 })
 
 test_that("readr parse options", {
+  skip_if_multithreading_disabled()
   arrow_opts <- names(formals(CsvParseOptions$create))
   readr_opts <- names(formals(readr_to_csv_parse_options))
 
@@ -1575,6 +1591,7 @@ test_that("Writing a dataset: Parquet format options", {
 })
 
 test_that("Writing a dataset: CSV format options", {
+  skip_if_multithreading_disabled()
   df <- tibble(
     int = 1:10,
     dbl = as.numeric(1:10),

From 998f4723976c3caa48bedbd4b432b531b842b89b Mon Sep 17 00:00:00 2001
From: Hideaki Hayashi <hihayash@gmail.com>
Date: Wed, 21 Jul 2021 13:10:23 +0200
Subject: [PATCH 614/719] ARROW-12007: [C++] Loading parquet file returns
 "Invalid UTF8 payload" error

Judging from the comment "avoid spending time validating UTF8 data" with the setting of the false value to the cast_options.allow_invalid_utf8, it seems to me this was intended to be true rather than false.

Also, this resolved the error I was getting through the arrow R package, which seems to be ARROW-12007.

Closes #10759 from hideaki/cancel_unnecessary_utf8_check

Authored-by: Hideaki Hayashi <hihayash@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/arrow/reader_internal.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index 0ffa3e89970..f13687079d4 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -429,7 +429,7 @@ Status TransferBinary(RecordReader* reader, MemoryPool* pool,
   }
   ::arrow::compute::ExecContext ctx(pool);
   ::arrow::compute::CastOptions cast_options;
-  cast_options.allow_invalid_utf8 = false;  // avoid spending time validating UTF8 data
+  cast_options.allow_invalid_utf8 = true;  // avoid spending time validating UTF8 data
 
   auto binary_reader = dynamic_cast<BinaryRecordReader*>(reader);
   DCHECK(binary_reader);

From 6323c12f2273e4473108ecc3a270ab40651a76fd Mon Sep 17 00:00:00 2001
From: Karik Isichei <karik.isichei@gmail.com>
Date: Wed, 21 Jul 2021 13:28:43 +0200
Subject: [PATCH 615/719] ARROW-13086: [Python] Expose Parquet
 ArrowReaderProperties::coerce_int96_timestamp_unit_
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10575 from isichei/ARROW-13086

Lead-authored-by: Karik Isichei <karik.isichei@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/src/arrow/dataset/file_parquet.cc         |  6 +-
 cpp/src/arrow/dataset/file_parquet.h          |  1 +
 python/pyarrow/_dataset.pyx                   | 69 ++++++++++++--
 python/pyarrow/_parquet.pxd                   |  2 +
 python/pyarrow/_parquet.pyx                   | 18 +++-
 python/pyarrow/includes/libarrow_dataset.pxd  |  1 +
 python/pyarrow/parquet.py                     | 92 ++++++++++++-------
 python/pyarrow/tests/parquet/test_datetime.py | 67 ++++++++++++++
 python/pyarrow/tests/test_dataset.py          | 16 ++++
 9 files changed, 231 insertions(+), 41 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 122894586c6..b20ca504db4 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -153,6 +153,8 @@ parquet::ArrowReaderProperties MakeArrowReaderProperties(
     auto column_index = metadata.schema()->ColumnIndex(name);
     properties.set_read_dictionary(column_index, true);
   }
+  properties.set_coerce_int96_timestamp_unit(
+      format.reader_options.coerce_int96_timestamp_unit);
   return properties;
 }
 
@@ -289,7 +291,9 @@ bool ParquetFileFormat::Equals(const FileFormat& other) const {
       checked_cast<const ParquetFileFormat&>(other).reader_options;
 
   // FIXME implement comparison for decryption options
-  return reader_options.dict_columns == other_reader_options.dict_columns;
+  return (reader_options.dict_columns == other_reader_options.dict_columns &&
+          reader_options.coerce_int96_timestamp_unit ==
+              other_reader_options.coerce_int96_timestamp_unit);
 }
 
 ParquetFileFormat::ParquetFileFormat(const parquet::ReaderProperties& reader_properties) {
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index d61730909e3..daf4bd92d59 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -86,6 +86,7 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
     ///
     /// @{
     std::unordered_set<std::string> dict_columns;
+    arrow::TimeUnit::type coerce_int96_timestamp_unit = arrow::TimeUnit::NANO;
     /// @}
   } reader_options;
 
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 07684eff3b4..09734c21714 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1397,17 +1397,56 @@ cdef class ParquetReadOptions(_Weakrefable):
     dictionary_columns : list of string, default None
         Names of columns which should be dictionary encoded as
         they are read.
+    coerce_int96_timestamp_unit : str, default None.
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be infered as timestamps
+        in nanoseconds.
     """
 
     cdef public:
         set dictionary_columns
+        TimeUnit _coerce_int96_timestamp_unit
 
     # Also see _PARQUET_READ_OPTIONS
-    def __init__(self, dictionary_columns=None):
+    def __init__(self, dictionary_columns=None,
+                 coerce_int96_timestamp_unit=None):
         self.dictionary_columns = set(dictionary_columns or set())
+        self.coerce_int96_timestamp_unit = coerce_int96_timestamp_unit
+
+    @property
+    def coerce_int96_timestamp_unit(self):
+        unit = self._coerce_int96_timestamp_unit
+        if unit == TimeUnit_SECOND:
+            return "s"
+        elif unit == TimeUnit_MILLI:
+            return "ms"
+        elif unit == TimeUnit_MICRO:
+            return "us"
+        elif unit == TimeUnit_NANO:
+            return "ns"
+        else:
+            return None
+
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit):
+        if unit is None or unit == "ns":
+            self._coerce_int96_timestamp_unit = TimeUnit_NANO
+        elif unit == "us":
+            self._coerce_int96_timestamp_unit = TimeUnit_MICRO
+        elif unit == "ms":
+            self._coerce_int96_timestamp_unit = TimeUnit_MILLI
+        elif unit == "s":
+            self._coerce_int96_timestamp_unit = TimeUnit_SECOND
+        else:
+            raise ValueError(
+                f"Invalid value for coerce_int96_timestamp_unit: {unit}"
+            )
 
     def equals(self, ParquetReadOptions other):
-        return self.dictionary_columns == other.dictionary_columns
+        return (self.dictionary_columns == other.dictionary_columns and
+                self.coerce_int96_timestamp_unit ==
+                other.coerce_int96_timestamp_unit)
 
     def __eq__(self, other):
         try:
@@ -1416,8 +1455,11 @@ cdef class ParquetReadOptions(_Weakrefable):
             return False
 
     def __repr__(self):
-        return (f"<ParquetReadOptions"
-                f" dictionary_columns={self.dictionary_columns}>")
+        return (
+            f"<ParquetReadOptions"
+            f" dictionary_columns={self.dictionary_columns}"
+            f" coerce_int96_timestamp_unit={self.coerce_int96_timestamp_unit}>"
+        )
 
 
 cdef class ParquetFileWriteOptions(FileWriteOptions):
@@ -1500,7 +1542,9 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
         self._set_arrow_properties()
 
 
-cdef set _PARQUET_READ_OPTIONS = {'dictionary_columns'}
+cdef set _PARQUET_READ_OPTIONS = {
+    'dictionary_columns', 'coerce_int96_timestamp_unit'
+}
 
 
 cdef class ParquetFileFormat(FileFormat):
@@ -1565,6 +1609,8 @@ cdef class ParquetFileFormat(FileFormat):
         if read_options.dictionary_columns is not None:
             for column in read_options.dictionary_columns:
                 options.dict_columns.insert(tobytes(column))
+        options.coerce_int96_timestamp_unit = \
+            read_options._coerce_int96_timestamp_unit
 
         self.init(<shared_ptr[CFileFormat]> wrapped)
         self.default_fragment_scan_options = default_fragment_scan_options
@@ -1577,10 +1623,15 @@ cdef class ParquetFileFormat(FileFormat):
     def read_options(self):
         cdef CParquetFileFormatReaderOptions* options
         options = &self.parquet_format.reader_options
-        return ParquetReadOptions(
+        parquet_read_options = ParquetReadOptions(
             dictionary_columns={frombytes(col)
                                 for col in options.dict_columns},
         )
+        # Read options getter/setter works with strings so setting
+        # the private property which uses the C Type
+        parquet_read_options._coerce_int96_timestamp_unit = \
+            options.coerce_int96_timestamp_unit
+        return parquet_read_options
 
     def make_write_options(self, **kwargs):
         opts = FileFormat.make_write_options(self)
@@ -1725,12 +1776,14 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
             self.buffer_size == other.buffer_size and
             self.pre_buffer == other.pre_buffer and
             self.enable_parallel_column_conversion ==
-            other.enable_parallel_column_conversion)
+            other.enable_parallel_column_conversion
+        )
 
     def __reduce__(self):
         return ParquetFragmentScanOptions, (
             self.use_buffered_stream, self.buffer_size, self.pre_buffer,
-            self.enable_parallel_column_conversion)
+            self.enable_parallel_column_conversion
+        )
 
 
 cdef class IpcFileWriteOptions(FileWriteOptions):
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 7a5eb50aab1..6f3a495dffb 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -347,6 +347,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         int64_t batch_size()
         void set_pre_buffer(c_bool pre_buffer)
         c_bool pre_buffer() const
+        void set_coerce_int96_timestamp_unit(TimeUnit unit)
+        TimeUnit coerce_int96_timestamp_unit() const
 
     ArrowReaderProperties default_arrow_reader_properties()
 
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index f229a27b601..1d0f21852fc 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -928,7 +928,8 @@ cdef class ParquetReader(_Weakrefable):
 
     def open(self, object source not None, bint use_memory_map=True,
              read_dictionary=None, FileMetaData metadata=None,
-             int buffer_size=0, bint pre_buffer=False):
+             int buffer_size=0, bint pre_buffer=False,
+             coerce_int96_timestamp_unit=None):
         cdef:
             shared_ptr[CRandomAccessFile] rd_handle
             shared_ptr[CFileMetaData] c_metadata
@@ -951,6 +952,21 @@ cdef class ParquetReader(_Weakrefable):
 
         arrow_props.set_pre_buffer(pre_buffer)
 
+        if coerce_int96_timestamp_unit is None:
+            # use the default defined in default_arrow_reader_properties()
+            pass
+        elif coerce_int96_timestamp_unit == "ns":
+            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_NANO)
+        elif coerce_int96_timestamp_unit == "us":
+            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_MICRO)
+        elif coerce_int96_timestamp_unit == "ms":
+            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_MILLI)
+        elif coerce_int96_timestamp_unit == "s":
+            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_SECOND)
+        else:
+            raise ValueError(f"Invalid value for coerce_int96_timestamp_unit: "
+                             f"{coerce_int96_timestamp_unit}")
+
         self.source = source
 
         get_reader(source, use_memory_map, &rd_handle)
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 85317afdef3..51ae9881f92 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -303,6 +303,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
     cdef cppclass CParquetFileFormatReaderOptions \
             "arrow::dataset::ParquetFileFormat::ReaderOptions":
         unordered_set[c_string] dict_columns
+        TimeUnit coerce_int96_timestamp_unit
 
     cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
             CFileFormat):
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 0c43936c33b..6aaf9d9645c 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -214,15 +214,23 @@ class ParquetFile:
         Coalesce and issue file reads in parallel to improve performance on
         high-latency filesystems (e.g. S3). If True, Arrow will use a
         background I/O thread pool.
+    coerce_int96_timestamp_unit : str, default None.
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be infered as timestamps
+        in nanoseconds.
     """
 
     def __init__(self, source, metadata=None, common_metadata=None,
                  read_dictionary=None, memory_map=False, buffer_size=0,
-                 pre_buffer=False):
+                 pre_buffer=False, coerce_int96_timestamp_unit=None):
         self.reader = ParquetReader()
-        self.reader.open(source, use_memory_map=memory_map,
-                         buffer_size=buffer_size, pre_buffer=pre_buffer,
-                         read_dictionary=read_dictionary, metadata=metadata)
+        self.reader.open(
+            source, use_memory_map=memory_map,
+            buffer_size=buffer_size, pre_buffer=pre_buffer,
+            read_dictionary=read_dictionary, metadata=metadata,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+        )
         self.common_metadata = common_metadata
         self._nested_paths_by_prefix = self._build_nested_paths()
 
@@ -1254,13 +1262,18 @@ class ParquetDataset:
     use_legacy_dataset=False. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results.
+coerce_int96_timestamp_unit : str, default None.
+    Cast timestamps that are stored in INT96 format to a particular resolution
+    (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
+    timestamps will be infered as timestamps in nanoseconds.
 """.format(_read_docstring_common, _DNF_filter_doc)
 
     def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
                 metadata=None, split_row_groups=False, validate_schema=True,
                 filters=None, metadata_nthreads=1, read_dictionary=None,
                 memory_map=False, buffer_size=0, partitioning="hive",
-                use_legacy_dataset=None, pre_buffer=True):
+                use_legacy_dataset=None, pre_buffer=True,
+                coerce_int96_timestamp_unit=None):
         if use_legacy_dataset is None:
             # if a new filesystem is passed -> default to new implementation
             if isinstance(filesystem, FileSystem):
@@ -1270,18 +1283,21 @@ def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
                 use_legacy_dataset = True
 
         if not use_legacy_dataset:
-            return _ParquetDatasetV2(path_or_paths, filesystem=filesystem,
-                                     filters=filters,
-                                     partitioning=partitioning,
-                                     read_dictionary=read_dictionary,
-                                     memory_map=memory_map,
-                                     buffer_size=buffer_size,
-                                     pre_buffer=pre_buffer,
-                                     # unsupported keywords
-                                     schema=schema, metadata=metadata,
-                                     split_row_groups=split_row_groups,
-                                     validate_schema=validate_schema,
-                                     metadata_nthreads=metadata_nthreads)
+            return _ParquetDatasetV2(
+                path_or_paths, filesystem=filesystem,
+                filters=filters,
+                partitioning=partitioning,
+                read_dictionary=read_dictionary,
+                memory_map=memory_map,
+                buffer_size=buffer_size,
+                pre_buffer=pre_buffer,
+                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
+                # unsupported keywords
+                schema=schema, metadata=metadata,
+                split_row_groups=split_row_groups,
+                validate_schema=validate_schema,
+                metadata_nthreads=metadata_nthreads
+            )
         self = object.__new__(cls)
         return self
 
@@ -1289,7 +1305,8 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
                  metadata=None, split_row_groups=False, validate_schema=True,
                  filters=None, metadata_nthreads=1, read_dictionary=None,
                  memory_map=False, buffer_size=0, partitioning="hive",
-                 use_legacy_dataset=True, pre_buffer=True):
+                 use_legacy_dataset=True, pre_buffer=True,
+                 coerce_int96_timestamp_unit=None):
         if partitioning != "hive":
             raise ValueError(
                 'Only "hive" for hive-like partitioning is supported when '
@@ -1582,7 +1599,7 @@ class _ParquetDatasetV2:
     def __init__(self, path_or_paths, filesystem=None, filters=None,
                  partitioning="hive", read_dictionary=None, buffer_size=None,
                  memory_map=False, ignore_prefixes=None, pre_buffer=True,
-                 **kwargs):
+                 coerce_int96_timestamp_unit=None, **kwargs):
         import pyarrow.dataset as ds
 
         # Raise error for not supported keywords
@@ -1596,7 +1613,10 @@ def __init__(self, path_or_paths, filesystem=None, filters=None,
                     "Dataset API".format(keyword))
 
         # map format arguments
-        read_options = {"pre_buffer": pre_buffer}
+        read_options = {
+            "pre_buffer": pre_buffer,
+            "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit
+        }
         if buffer_size:
             read_options.update(use_buffered_stream=True,
                                 buffer_size=buffer_size)
@@ -1825,7 +1845,8 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
                use_pandas_metadata=False, memory_map=False,
                read_dictionary=None, filesystem=None, filters=None,
                buffer_size=0, partitioning="hive", use_legacy_dataset=False,
-               ignore_prefixes=None, pre_buffer=True):
+               ignore_prefixes=None, pre_buffer=True,
+               coerce_int96_timestamp_unit=None):
     if not use_legacy_dataset:
         if metadata is not None:
             raise ValueError(
@@ -1845,6 +1866,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
                 filters=filters,
                 ignore_prefixes=ignore_prefixes,
                 pre_buffer=pre_buffer,
+                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
             )
         except ImportError:
             # fall back on ParquetFile for simple cases when pyarrow.dataset
@@ -1866,7 +1888,9 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
             dataset = ParquetFile(
                 source, metadata=metadata, read_dictionary=read_dictionary,
                 memory_map=memory_map, buffer_size=buffer_size,
-                pre_buffer=pre_buffer)
+                pre_buffer=pre_buffer,
+                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+            )
 
         return dataset.read(columns=columns, use_threads=use_threads,
                             use_pandas_metadata=use_pandas_metadata)
@@ -1877,16 +1901,22 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
             "use_legacy_dataset=False")
 
     if _is_path_like(source):
-        pf = ParquetDataset(source, metadata=metadata, memory_map=memory_map,
-                            read_dictionary=read_dictionary,
-                            buffer_size=buffer_size,
-                            filesystem=filesystem, filters=filters,
-                            partitioning=partitioning)
+        pf = ParquetDataset(
+            source, metadata=metadata, memory_map=memory_map,
+            read_dictionary=read_dictionary,
+            buffer_size=buffer_size,
+            filesystem=filesystem, filters=filters,
+            partitioning=partitioning,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+        )
     else:
-        pf = ParquetFile(source, metadata=metadata,
-                         read_dictionary=read_dictionary,
-                         memory_map=memory_map,
-                         buffer_size=buffer_size)
+        pf = ParquetFile(
+            source, metadata=metadata,
+            read_dictionary=read_dictionary,
+            memory_map=memory_map,
+            buffer_size=buffer_size,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit
+        )
     return pf.read(columns=columns, use_threads=use_threads,
                    use_pandas_metadata=use_pandas_metadata)
 
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index fdbd2422800..5e665637704 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -262,6 +262,73 @@ def test_date_time_types(tempdir):
     assert read_table.equals(expected)
 
 
+@pytest.mark.pandas
+@pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
+def test_coerce_int96_timestamp_unit(unit):
+    i_s = pd.Timestamp('2010-01-01').value / 1000000000  # := 1262304000
+
+    d_s = np.arange(i_s, i_s + 10, 1, dtype='int64')
+    d_ms = d_s * 1000
+    d_us = d_ms * 1000
+    d_ns = d_us * 1000
+
+    a_s = pa.array(d_s, type=pa.timestamp('s'))
+    a_ms = pa.array(d_ms, type=pa.timestamp('ms'))
+    a_us = pa.array(d_us, type=pa.timestamp('us'))
+    a_ns = pa.array(d_ns, type=pa.timestamp('ns'))
+
+    arrays = {"s": a_s, "ms": a_ms, "us": a_us, "ns": a_ns}
+    names = ['ts_s', 'ts_ms', 'ts_us', 'ts_ns']
+    table = pa.Table.from_arrays([a_s, a_ms, a_us, a_ns], names)
+
+    # For either Parquet version, coercing to nanoseconds is allowed
+    # if Int96 storage is used
+    expected = pa.Table.from_arrays([arrays.get(unit)]*4, names)
+    read_table_kwargs = {"coerce_int96_timestamp_unit": unit}
+    _check_roundtrip(table, expected,
+                     read_table_kwargs=read_table_kwargs,
+                     use_deprecated_int96_timestamps=True)
+    _check_roundtrip(table, expected, version='2.0',
+                     read_table_kwargs=read_table_kwargs,
+                     use_deprecated_int96_timestamps=True)
+
+
+@pytest.mark.pandas
+@pytest.mark.parametrize('pq_reader_method', ['ParquetFile', 'read_table'])
+def test_coerce_int96_timestamp_overflow(pq_reader_method, tempdir):
+
+    def get_table(pq_reader_method, filename, **kwargs):
+        if pq_reader_method == "ParquetFile":
+            return pq.ParquetFile(filename, **kwargs).read()
+        elif pq_reader_method == "read_table":
+            return pq.read_table(filename, **kwargs)
+
+    # Recreating the initial JIRA issue referrenced in ARROW-12096
+    oob_dts = [
+        datetime.datetime(1000, 1, 1),
+        datetime.datetime(2000, 1, 1),
+        datetime.datetime(3000, 1, 1)
+    ]
+    df = pd.DataFrame({"a": oob_dts})
+    table = pa.table(df)
+
+    filename = tempdir / "test_round_trip_overflow.parquet"
+    pq.write_table(table, filename, use_deprecated_int96_timestamps=True,
+                   version="1.0")
+
+    # with the default resolution of ns, we get wrong values for INT96
+    # that are out of bounds for nanosecond range
+    tab_error = get_table(pq_reader_method, filename)
+    assert tab_error["a"].to_pylist() != oob_dts
+
+    # avoid this overflow by specifying the resolution to use for INT96 values
+    tab_correct = get_table(
+        pq_reader_method, filename, coerce_int96_timestamp_unit="s"
+    )
+    df_correct = tab_correct.to_pandas(timestamp_as_object=True)
+    tm.assert_frame_equal(df, df_correct)
+
+
 def test_timestamp_restore_timezone():
     # ARROW-5888, restore timezone from serialized metadata
     ty = pa.timestamp('ms', tz='America/New_York')
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 6f9662471fc..992da7f13a9 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -629,13 +629,29 @@ def test_partition_keys():
 def test_parquet_read_options():
     opts1 = ds.ParquetReadOptions()
     opts2 = ds.ParquetReadOptions(dictionary_columns=['a', 'b'])
+    opts3 = ds.ParquetReadOptions(coerce_int96_timestamp_unit="ms")
 
     assert opts1.dictionary_columns == set()
 
     assert opts2.dictionary_columns == {'a', 'b'}
 
+    assert opts1.coerce_int96_timestamp_unit == "ns"
+    assert opts3.coerce_int96_timestamp_unit == "ms"
+
     assert opts1 == opts1
     assert opts1 != opts2
+    assert opts1 != opts3
+
+
+def test_parquet_file_format_read_options():
+    pff1 = ds.ParquetFileFormat()
+    pff2 = ds.ParquetFileFormat(dictionary_columns={'a'})
+    pff3 = ds.ParquetFileFormat(coerce_int96_timestamp_unit="s")
+
+    assert pff1.read_options == ds.ParquetReadOptions()
+    assert pff2.read_options == ds.ParquetReadOptions(dictionary_columns=['a'])
+    assert pff3.read_options == ds.ParquetReadOptions(
+        coerce_int96_timestamp_unit="s")
 
 
 def test_parquet_scan_options():

From 8454bbf48ce5048ecc200767380e11687282ced4 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 21 Jul 2021 04:29:39 -0700
Subject: [PATCH 616/719] MINOR: [JS] Include version in docs (#10763)

---
 js/typedoc.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/js/typedoc.js b/js/typedoc.js
index 1c647012823..3512c01f2d3 100644
--- a/js/typedoc.js
+++ b/js/typedoc.js
@@ -23,6 +23,7 @@ module.exports = {
     excludePrivate: true,
     excludeProtected: true,
     excludeExternals: true,
+    includeVersion: true,
     exclude: [
         'src/fb/*.ts',
         'src/bin/*.ts'

From 72a43e477e97a7bcc129b2dc9f7403d90334a994 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 21 Jul 2021 14:37:20 +0200
Subject: [PATCH 617/719] ARROW-13086: [Python] De-duplicate time unit
 conversion code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Followup to PR #10575

Closes #10766 from pitrou/ARROW-13086-refactor

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 python/pyarrow/_dataset.pyx        | 26 +++-----------------
 python/pyarrow/_parquet.pyx        | 16 ++++--------
 python/pyarrow/lib.pxd             |  3 +++
 python/pyarrow/tests/test_types.py |  8 +++---
 python/pyarrow/types.pxi           | 39 +++++++++++++-----------------
 5 files changed, 33 insertions(+), 59 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 09734c21714..945475bd7f1 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1416,32 +1416,14 @@ cdef class ParquetReadOptions(_Weakrefable):
 
     @property
     def coerce_int96_timestamp_unit(self):
-        unit = self._coerce_int96_timestamp_unit
-        if unit == TimeUnit_SECOND:
-            return "s"
-        elif unit == TimeUnit_MILLI:
-            return "ms"
-        elif unit == TimeUnit_MICRO:
-            return "us"
-        elif unit == TimeUnit_NANO:
-            return "ns"
-        else:
-            return None
+        return timeunit_to_string(self._coerce_int96_timestamp_unit)
 
     @coerce_int96_timestamp_unit.setter
     def coerce_int96_timestamp_unit(self, unit):
-        if unit is None or unit == "ns":
-            self._coerce_int96_timestamp_unit = TimeUnit_NANO
-        elif unit == "us":
-            self._coerce_int96_timestamp_unit = TimeUnit_MICRO
-        elif unit == "ms":
-            self._coerce_int96_timestamp_unit = TimeUnit_MILLI
-        elif unit == "s":
-            self._coerce_int96_timestamp_unit = TimeUnit_SECOND
+        if unit is not None:
+            self._coerce_int96_timestamp_unit = string_to_timeunit(unit)
         else:
-            raise ValueError(
-                f"Invalid value for coerce_int96_timestamp_unit: {unit}"
-            )
+            self._coerce_int96_timestamp_unit = TimeUnit_NANO
 
     def equals(self, ParquetReadOptions other):
         return (self.dictionary_columns == other.dictionary_columns and
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 1d0f21852fc..ef7b6f98d7b 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -36,7 +36,8 @@ from pyarrow.lib cimport (_Weakrefable, Buffer, Array, Schema,
                           pyarrow_wrap_table,
                           pyarrow_wrap_buffer,
                           pyarrow_wrap_batch,
-                          NativeFile, get_reader, get_writer)
+                          NativeFile, get_reader, get_writer,
+                          string_to_timeunit)
 
 from pyarrow.lib import (ArrowException, NativeFile, BufferOutputStream,
                          _stringify_path, _datetime_from_int,
@@ -938,6 +939,7 @@ cdef class ParquetReader(_Weakrefable):
                 default_arrow_reader_properties())
             c_string path
             FileReaderBuilder builder
+            TimeUnit int96_timestamp_unit_code
 
         if metadata is not None:
             c_metadata = metadata.sp_metadata
@@ -955,17 +957,9 @@ cdef class ParquetReader(_Weakrefable):
         if coerce_int96_timestamp_unit is None:
             # use the default defined in default_arrow_reader_properties()
             pass
-        elif coerce_int96_timestamp_unit == "ns":
-            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_NANO)
-        elif coerce_int96_timestamp_unit == "us":
-            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_MICRO)
-        elif coerce_int96_timestamp_unit == "ms":
-            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_MILLI)
-        elif coerce_int96_timestamp_unit == "s":
-            arrow_props.set_coerce_int96_timestamp_unit(TimeUnit_SECOND)
         else:
-            raise ValueError(f"Invalid value for coerce_int96_timestamp_unit: "
-                             f"{coerce_int96_timestamp_unit}")
+            arrow_props.set_coerce_int96_timestamp_unit(
+                string_to_timeunit(coerce_int96_timestamp_unit))
 
         self.source = source
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 414c7b5f26b..592533e7015 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -526,6 +526,9 @@ cdef shared_ptr[CInputStream] native_transcoding_input_stream(
 # Default is allow_none=False
 cpdef DataType ensure_type(object type, bint allow_none=*)
 
+cdef timeunit_to_string(TimeUnit unit)
+cdef TimeUnit string_to_timeunit(unit) except *
+
 # Exceptions may be raised when converting dict values, so need to
 # check exception state on return
 cdef shared_ptr[const CKeyValueMetadata] pyarrow_unwrap_metadata(
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 698ba8df0cc..cd6de936358 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -403,7 +403,7 @@ def test_timestamp():
             assert ty.tz == tz
 
     for invalid_unit in ('m', 'arbit', 'rary'):
-        with pytest.raises(ValueError, match='Invalid TimeUnit string'):
+        with pytest.raises(ValueError, match='Invalid time unit'):
             pa.timestamp(invalid_unit)
 
 
@@ -413,7 +413,7 @@ def test_time32_units():
         assert ty.unit == valid_unit
 
     for invalid_unit in ('m', 'us', 'ns'):
-        error_msg = 'Invalid TimeUnit for time32: {}'.format(invalid_unit)
+        error_msg = 'Invalid time unit for time32: {!r}'.format(invalid_unit)
         with pytest.raises(ValueError, match=error_msg):
             pa.time32(invalid_unit)
 
@@ -424,7 +424,7 @@ def test_time64_units():
         assert ty.unit == valid_unit
 
     for invalid_unit in ('m', 's', 'ms'):
-        error_msg = 'Invalid TimeUnit for time64: {}'.format(invalid_unit)
+        error_msg = 'Invalid time unit for time64: {!r}'.format(invalid_unit)
         with pytest.raises(ValueError, match=error_msg):
             pa.time64(invalid_unit)
 
@@ -435,7 +435,7 @@ def test_duration():
         assert ty.unit == unit
 
     for invalid_unit in ('m', 'arbit', 'rary'):
-        with pytest.raises(ValueError, match='Invalid TimeUnit string'):
+        with pytest.raises(ValueError, match='Invalid time unit'):
             pa.duration(invalid_unit)
 
 
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 9a4ebad2d00..451ff20a776 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1871,6 +1871,19 @@ cdef timeunit_to_string(TimeUnit unit):
         return 'ns'
 
 
+cdef TimeUnit string_to_timeunit(unit) except *:
+    if unit == 's':
+        return TimeUnit_SECOND
+    elif unit == 'ms':
+        return TimeUnit_MILLI
+    elif unit == 'us':
+        return TimeUnit_MICRO
+    elif unit == 'ns':
+        return TimeUnit_NANO
+    else:
+        raise ValueError(f"Invalid time unit: {unit!r}")
+
+
 def tzinfo_to_string(tz):
     """
     Converts a time zone object into a string indicating the name of a time
@@ -1945,16 +1958,7 @@ def timestamp(unit, tz=None):
         TimeUnit unit_code
         c_string c_timezone
 
-    if unit == "s":
-        unit_code = TimeUnit_SECOND
-    elif unit == 'ms':
-        unit_code = TimeUnit_MILLI
-    elif unit == 'us':
-        unit_code = TimeUnit_MICRO
-    elif unit == 'ns':
-        unit_code = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit string')
+    unit_code = string_to_timeunit(unit)
 
     cdef TimestampType out = TimestampType.__new__(TimestampType)
 
@@ -2003,7 +2007,7 @@ def time32(unit):
     elif unit == 'ms':
         unit_code = TimeUnit_MILLI
     else:
-        raise ValueError('Invalid TimeUnit for time32: {}'.format(unit))
+        raise ValueError(f"Invalid time unit for time32: {unit!r}")
 
     if unit_code in _time_type_cache:
         return _time_type_cache[unit_code]
@@ -2046,7 +2050,7 @@ def time64(unit):
     elif unit == 'ns':
         unit_code = TimeUnit_NANO
     else:
-        raise ValueError('Invalid TimeUnit for time64: {}'.format(unit))
+        raise ValueError(f"Invalid time unit for time64: {unit!r}")
 
     if unit_code in _time_type_cache:
         return _time_type_cache[unit_code]
@@ -2084,16 +2088,7 @@ def duration(unit):
     cdef:
         TimeUnit unit_code
 
-    if unit == "s":
-        unit_code = TimeUnit_SECOND
-    elif unit == 'ms':
-        unit_code = TimeUnit_MILLI
-    elif unit == 'us':
-        unit_code = TimeUnit_MICRO
-    elif unit == 'ns':
-        unit_code = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit string')
+    unit_code = string_to_timeunit(unit)
 
     if unit_code in _duration_type_cache:
         return _duration_type_cache[unit_code]

From 1ce1f10a64b3974fbf0912e1e13d0af6184fabe7 Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Wed, 21 Jul 2021 14:48:25 +0200
Subject: [PATCH 618/719] ARROW-12016  [C++] Implement array_sort_indices and
 sort_indices for BOOL type

Adding `array_sort_indices` and `partition_nth_indices` for `BooleanType` using existing sort and Nth-partition utils.
This may be rather inefficient, since the values are traversed bit-by-bit rather than working on a byte/word.

May be we could work on it as a separate improvement?

Closes #10585 from nirandaperera/ARROW-12016

Lead-authored-by: niranda perera <niranda.perera@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/kernels/vector_sort.cc  | 68 ++++++++++++++++
 .../compute/kernels/vector_sort_benchmark.cc  | 16 ++++
 .../arrow/compute/kernels/vector_sort_test.cc | 77 ++++++++++++++++++-
 docs/source/cpp/compute.rst                   | 30 ++++----
 r/tests/testthat/helper-data.R                |  2 +-
 r/tests/testthat/test-dplyr-arrange.R         |  1 -
 6 files changed, 176 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 6c425d65550..7fa43e715d8 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -29,6 +29,8 @@
 #include "arrow/table.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bitmap.h"
+#include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/optional.h"
 #include "arrow/visitor_inline.h"
@@ -42,6 +44,7 @@ namespace internal {
 
 // Visit all physical types for which sorting is implemented.
 #define VISIT_PHYSICAL_TYPES(VISIT) \
+  VISIT(BooleanType)                \
   VISIT(Int8Type)                   \
   VISIT(Int16Type)                  \
   VISIT(Int32Type)                  \
@@ -370,6 +373,24 @@ inline void VisitRawValuesInline(const ArrayType& values,
       [&](int64_t i) { visitor_not_null(data[i]); }, [&]() { visitor_null(); });
 }
 
+template <typename VisitorNotNull, typename VisitorNull>
+inline void VisitRawValuesInline(const BooleanArray& values,
+                                 VisitorNotNull&& visitor_not_null,
+                                 VisitorNull&& visitor_null) {
+  if (values.null_count() != 0) {
+    const uint8_t* data = values.data()->GetValues<uint8_t>(1, 0);
+    VisitBitBlocksVoid(
+        values.null_bitmap(), values.offset(), values.length(),
+        [&](int64_t i) { visitor_not_null(BitUtil::GetBit(data, values.offset() + i)); },
+        [&]() { visitor_null(); });
+  } else {
+    // Can avoid GetBit() overhead in the no-nulls case
+    VisitBitBlocksVoid(
+        values.data()->buffers[1], values.offset(), values.length(),
+        [&](int64_t i) { visitor_not_null(true); }, [&]() { visitor_not_null(false); });
+  }
+}
+
 template <typename ArrowType>
 class ArrayCompareSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
@@ -477,6 +498,42 @@ class ArrayCountSorter {
   }
 };
 
+using ::arrow::internal::Bitmap;
+
+template <>
+class ArrayCountSorter<BooleanType> {
+ public:
+  ArrayCountSorter() = default;
+
+  // Returns where null starts.
+  // `offset` is used when this is called on a chunk of a chunked array
+  uint64_t* Sort(uint64_t* indices_begin, uint64_t* indices_end,
+                 const BooleanArray& values, int64_t offset,
+                 const ArraySortOptions& options) {
+    std::array<int64_t, 2> counts{0, 0};
+
+    const int64_t nulls = values.null_count();
+    const int64_t ones = values.true_count();
+    const int64_t zeros = values.length() - ones - nulls;
+
+    int64_t null_position = values.length() - nulls;
+    int64_t index = offset;
+    const auto nulls_begin = indices_begin + null_position;
+
+    if (options.order == SortOrder::Ascending) {
+      // ones start after zeros
+      counts[1] = zeros;
+    } else {
+      // zeros start after ones
+      counts[0] = ones;
+    }
+    VisitRawValuesInline(
+        values, [&](bool v) { indices_begin[counts[v]++] = index++; },
+        [&]() { indices_begin[null_position++] = index++; });
+    return nulls_begin;
+  }
+};
+
 // Sort integers with counting sort or comparison based sorting algorithm
 // - Use O(n) counting sort if values are in a small range
 // - Use O(nlogn) std::stable_sort otherwise
@@ -527,6 +584,11 @@ class ArrayCountOrCompareSorter {
 template <typename Type, typename Enable = void>
 struct ArraySorter;
 
+template <>
+struct ArraySorter<BooleanType> {
+  ArrayCountSorter<BooleanType> impl;
+};
+
 template <>
 struct ArraySorter<UInt8Type> {
   ArrayCountSorter<UInt8Type> impl;
@@ -576,11 +638,17 @@ struct ArraySortIndices {
 
 // Sort indices kernels implemented for
 //
+// * Boolean type
 // * Number types
 // * Base binary types
 
 template <template <typename...> class ExecTemplate>
 void AddSortingKernels(VectorKernel base, VectorFunction* func) {
+  // bool type
+  base.signature = KernelSignature::Make({InputType::Array(boolean())}, uint64());
+  base.exec = ExecTemplate<UInt64Type, BooleanType>::Exec;
+  DCHECK_OK(func->AddKernel(base));
+
   for (const auto& ty : NumericTypes()) {
     auto physical_type = GetPhysicalType(ty);
     base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
index 820c51ba8ec..d8e3b9b8081 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
@@ -81,6 +81,16 @@ static void ArraySortIndicesInt64Wide(benchmark::State& state) {
   ArraySortIndicesInt64Benchmark(state, min, max);
 }
 
+static void ArraySortIndicesBool(benchmark::State& state) {
+  RegressionArgs args(state);
+
+  const int64_t array_size = args.size * 8;
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto values = rand.Boolean(array_size, 0.5, args.null_proportion);
+
+  ArraySortIndicesBenchmark(state, values);
+}
+
 static void ChunkedArraySortIndicesInt64Narrow(benchmark::State& state) {
   ChunkedArraySortIndicesInt64Benchmark(state, -100, 100);
 }
@@ -235,6 +245,12 @@ BENCHMARK(ArraySortIndicesInt64Wide)
     ->Args({1 << 23, 100})
     ->Unit(benchmark::TimeUnit::kNanosecond);
 
+BENCHMARK(ArraySortIndicesBool)
+    ->Apply(RegressionSetArgs)
+    ->Args({1 << 20, 100})
+    ->Args({1 << 23, 100})
+    ->Unit(benchmark::TimeUnit::kNanosecond);
+
 BENCHMARK(ChunkedArraySortIndicesInt64Narrow)
     ->Apply(RegressionSetArgs)
     ->Args({1 << 20, 100})
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index 2d76f0102f0..478f6ccac3a 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -180,6 +180,10 @@ template <typename ArrowType>
 class TestNthToIndicesForIntegral : public TestNthToIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestNthToIndicesForIntegral, IntegralArrowTypes);
 
+template <typename ArrowType>
+class TestNthToIndicesForBool : public TestNthToIndices<ArrowType> {};
+TYPED_TEST_SUITE(TestNthToIndicesForBool, ::testing::Types<BooleanType>);
+
 template <typename ArrowType>
 class TestNthToIndicesForTemporal : public TestNthToIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestNthToIndicesForTemporal, TemporalArrowTypes);
@@ -223,6 +227,13 @@ TYPED_TEST(TestNthToIndicesForIntegral, Integral) {
   this->AssertNthToIndicesJson("[null, 1, 3, null, 2, 5]", 6);
 }
 
+TYPED_TEST(TestNthToIndicesForBool, Bool) {
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 0);
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 2);
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 5);
+  this->AssertNthToIndicesJson("[null, false, true, null, false, true]", 6);
+}
+
 TYPED_TEST(TestNthToIndicesForTemporal, Temporal) {
   this->AssertNthToIndicesJson("[null, 1, 3, null, 2, 5]", 0);
   this->AssertNthToIndicesJson("[null, 1, 3, null, 2, 5]", 2);
@@ -402,6 +413,10 @@ template <typename ArrowType>
 class TestArraySortIndicesForReal : public TestArraySortIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestArraySortIndicesForReal, RealArrowTypes);
 
+template <typename ArrowType>
+class TestArraySortIndicesForBool : public TestArraySortIndices<ArrowType> {};
+TYPED_TEST_SUITE(TestArraySortIndicesForBool, ::testing::Types<BooleanType>);
+
 template <typename ArrowType>
 class TestArraySortIndicesForIntegral : public TestArraySortIndices<ArrowType> {};
 TYPED_TEST_SUITE(TestArraySortIndicesForIntegral, IntegralArrowTypes);
@@ -464,6 +479,26 @@ TYPED_TEST(TestArraySortIndicesForIntegral, SortIntegral) {
                           "[5, 2, 4, 1, 0, 3]");
 }
 
+TYPED_TEST(TestArraySortIndicesForBool, SortBool) {
+  this->AssertSortIndices("[]", "[]");
+
+  this->AssertSortIndices("[true, true, false]", "[2, 0, 1]");
+  this->AssertSortIndices("[false, false,  false, true, true, true, true]",
+                          "[0, 1, 2, 3, 4, 5, 6]");
+  this->AssertSortIndices("[true, true, true, true, false, false, false]",
+                          "[4, 5, 6, 0, 1, 2, 3]");
+
+  this->AssertSortIndices("[false, true, false, true, true, false, false]",
+                          SortOrder::Ascending, "[0, 2, 5, 6, 1, 3, 4]");
+  this->AssertSortIndices("[false, true, false, true, true, false, false]",
+                          SortOrder::Descending, "[1, 3, 4, 0, 2, 5, 6]");
+
+  this->AssertSortIndices("[null, true, false, null, false, true]", SortOrder::Ascending,
+                          "[2, 4, 1, 5, 0, 3]");
+  this->AssertSortIndices("[null, true, false, null, false, true]", SortOrder::Descending,
+                          "[1, 5, 2, 4, 0, 3]");
+}
+
 TYPED_TEST(TestArraySortIndicesForTemporal, SortTemporal) {
   this->AssertSortIndices("[]", "[]");
 
@@ -546,7 +581,7 @@ class TestArraySortIndicesRandomCompare : public TestBase {};
 using SortIndicesableTypes =
     ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
                      Int32Type, Int64Type, FloatType, DoubleType, StringType,
-                     Decimal128Type>;
+                     Decimal128Type, BooleanType>;
 
 template <typename ArrayType>
 void ValidateSorted(const ArrayType& array, UInt64Array& offsets, SortOrder order) {
@@ -842,6 +877,27 @@ TEST_F(TestRecordBatchSortIndices, NaNAndNull) {
   AssertSortIndices(batch, options, "[7, 1, 2, 6, 5, 4, 0, 3]");
 }
 
+TEST_F(TestRecordBatchSortIndices, Boolean) {
+  auto schema = ::arrow::schema({
+      {field("a", boolean())},
+      {field("b", boolean())},
+  });
+  SortOptions options(
+      {SortKey("a", SortOrder::Ascending), SortKey("b", SortOrder::Descending)});
+
+  auto batch = RecordBatchFromJSON(schema,
+                                   R"([{"a": true,    "b": null},
+                                       {"a": false,   "b": null},
+                                       {"a": true,    "b": true},
+                                       {"a": false,   "b": true},
+                                       {"a": true,    "b": false},
+                                       {"a": null,    "b": false},
+                                       {"a": false,   "b": null},
+                                       {"a": null,    "b": true}
+                                       ])");
+  AssertSortIndices(batch, options, "[3, 1, 6, 2, 4, 0, 7, 5]");
+}
+
 TEST_F(TestRecordBatchSortIndices, MoreTypes) {
   auto schema = ::arrow::schema({
       {field("a", timestamp(TimeUnit::MICRO))},
@@ -980,6 +1036,25 @@ TEST_F(TestTableSortIndices, NaNAndNull) {
   AssertSortIndices(table, options, "[7, 1, 2, 6, 5, 4, 0, 3]");
 }
 
+TEST_F(TestTableSortIndices, Boolean) {
+  auto schema = ::arrow::schema({
+      {field("a", boolean())},
+      {field("b", boolean())},
+  });
+  SortOptions options(
+      {SortKey("a", SortOrder::Ascending), SortKey("b", SortOrder::Descending)});
+  auto table = TableFromJSON(schema, {R"([{"a": true,    "b": null},
+                                       {"a": false,   "b": null},
+                                       {"a": true,    "b": true},
+                                       {"a": false,   "b": true}])",
+                                      R"([{"a": true,    "b": false},
+                                       {"a": null,    "b": false},
+                                       {"a": false,   "b": null},
+                                       {"a": null,    "b": true}
+                                       ])"});
+  AssertSortIndices(table, options, "[3, 1, 6, 2, 4, 0, 7, 5]");
+}
+
 TEST_F(TestTableSortIndices, BinaryLike) {
   auto schema = ::arrow::schema({
       {field("a", large_utf8())},
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 4bcb568fdc2..b389b43c02e 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1163,21 +1163,21 @@ In these functions, nulls are considered greater than any other value
 Floating-point NaN values are considered greater than any other non-null
 value, but smaller than nulls.
 
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| Function name         | Arity      | Input types             | Output type       | Options class                  | Notes          |
-+=======================+============+=========================+===================+================================+================+
-| partition_nth_indices | Unary      | Binary- and String-like | UInt64            | :struct:`PartitionNthOptions`  | \(1) \(3)      |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| partition_nth_indices | Unary      | Numeric                 | UInt64            | :struct:`PartitionNthOptions`  | \(1)           |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| array_sort_indices    | Unary      | Binary- and String-like | UInt64            | :struct:`ArraySortOptions`     | \(2) \(3) \(4) |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| array_sort_indices    | Unary      | Numeric                 | UInt64            | :struct:`ArraySortOptions`     | \(2) \(4)      |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| sort_indices          | Unary      | Binary- and String-like | UInt64            | :struct:`SortOptions`          | \(2) \(3) \(5) |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
-| sort_indices          | Unary      | Numeric                 | UInt64            | :struct:`SortOptions`          | \(2) \(5)      |
-+-----------------------+------------+-------------------------+-------------------+--------------------------------+----------------+
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| Function name         | Arity      | Input types                 | Output type       | Options class                  | Notes          |
++=======================+============+=============================+===================+================================+================+
+| partition_nth_indices | Unary      | Binary- and String-like     | UInt64            | :struct:`PartitionNthOptions`  | \(1) \(3)      |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| partition_nth_indices | Unary      | Boolean, Numeric, Temporal  | UInt64            | :struct:`PartitionNthOptions`  | \(1)           |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| array_sort_indices    | Unary      | Binary- and String-like     | UInt64            | :struct:`ArraySortOptions`     | \(2) \(3) \(4) |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| array_sort_indices    | Unary      | Boolean, Numeric, Temporal  | UInt64            | :struct:`ArraySortOptions`     | \(2) \(4)      |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| sort_indices          | Unary      | Binary- and String-like     | UInt64            | :struct:`SortOptions`          | \(2) \(3) \(5) |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
+| sort_indices          | Unary      | Boolean, Numeric, Temporal  | UInt64            | :struct:`SortOptions`          | \(2) \(5)      |
++-----------------------+------------+-----------------------------+-------------------+--------------------------------+----------------+
 
 * \(1) The output is an array of indices into the input array, that define
   a partial non-stable sort such that the *N*'th index points to the *N*'th
diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R
index 43b5bf0354f..b4b1bac4d7b 100644
--- a/r/tests/testthat/helper-data.R
+++ b/r/tests/testthat/helper-data.R
@@ -152,7 +152,7 @@ example_data_for_sorting <- tibble::tibble(
   int = c(-.Machine$integer.max, -101L, -100L, 0L, 0L, 1L, 100L, 1000L, .Machine$integer.max, NA_integer_),
   dbl = c(-Inf, -.Machine$double.xmax, -.Machine$double.xmin, 0, .Machine$double.xmin, pi, .Machine$double.xmax, Inf, NaN, NA_real_),
   chr = c("", "", "\"", "&", "ABC", "NULL", "a", "abc", "zzz", NA_character_),
-  lgl = c(rep(FALSE, 4L), rep(TRUE, 5L), NA), # bool is not supported (ARROW-12016)
+  lgl = c(rep(FALSE, 4L), rep(TRUE, 5L), NA),
   dttm = lubridate::ymd_hms(c(
     "0000-01-01 00:00:00",
     "1919-05-29 13:08:55",
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index 6e663d23ec6..5131653146b 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -159,7 +159,6 @@ test_that("arrange() on datetime columns", {
 })
 
 test_that("arrange() on logical columns", {
-  skip("Sorting by bool columns is not supported (ARROW-12016)")
   expect_dplyr_equal(
     input %>%
       arrange(lgl, int) %>%

From fae3aa24ee7ca57be201c4230035c1ceb9db14f8 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 21 Jul 2021 07:54:10 -0500
Subject: [PATCH 619/719] MINOR: Rename setup options chunk

There is a typo in the "Arrow R Developer Guide" where there is a space in the name of a code chunk instead of a hyphen.  I'm doing some automated analyses of the different code chunks and can't parse this one due to the typo.  This PR just adds a hyphen.

Closes #10754 from thisisnic/rmd_label

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/vignettes/developing.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 8de751b6947..d6e31392056 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -7,7 +7,7 @@ vignette: >
   %\VignetteEncoding{UTF-8}
 ---
 
-```{r setup options, include=FALSE}
+```{r setup-options, include=FALSE}
 knitr::opts_chunk$set(error = TRUE, eval = FALSE)
 
 # Get environment variables describing what to evaluate

From fd6c2e7817e7a2a68435ce4465732cbe9adce75a Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Wed, 21 Jul 2021 16:46:16 +0200
Subject: [PATCH 620/719] ARROW-12804: [C++] Return expected result for IsNull
 and IsValid for NullArray

If the null count is equal to the number of values in the array that means that ever value is null so IsNull should return true and IsValid should return false.

Closes #10756 from n3world/ARROW-12804-null_array

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_base.h  | 10 ++++++----
 cpp/src/arrow/array/array_test.cc |  6 ++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index e29db00cfcf..2add572e7a4 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -56,15 +56,17 @@ class ARROW_EXPORT Array {
 
   /// \brief Return true if value at index is null. Does not boundscheck
   bool IsNull(int64_t i) const {
-    return null_bitmap_data_ != NULLPTR &&
-           !BitUtil::GetBit(null_bitmap_data_, i + data_->offset);
+    return null_bitmap_data_ != NULLPTR
+               ? !BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count == data_->length;
   }
 
   /// \brief Return true if value at index is valid (not null). Does not
   /// boundscheck
   bool IsValid(int64_t i) const {
-    return null_bitmap_data_ == NULLPTR ||
-           BitUtil::GetBit(null_bitmap_data_, i + data_->offset);
+    return null_bitmap_data_ != NULLPTR
+               ? BitUtil::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count != data_->length;
   }
 
   /// \brief Return a Scalar containing the value of this array at i
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 9bc38f2f6ad..15eca157a4d 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -322,8 +322,6 @@ TEST_F(TestArray, BuildLargeInMemoryArray) {
   ASSERT_EQ(length, result->length());
 }
 
-TEST_F(TestArray, TestCopy) {}
-
 TEST_F(TestArray, TestMakeArrayOfNull) {
   std::shared_ptr<DataType> types[] = {
       // clang-format off
@@ -356,6 +354,10 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
       ASSERT_OK(array->ValidateFull());
       ASSERT_EQ(array->length(), length);
       ASSERT_EQ(array->null_count(), length);
+      for (int64_t i = 0; i < length; ++i) {
+        ASSERT_TRUE(array->IsNull(i));
+        ASSERT_FALSE(array->IsValid(i));
+      }
     }
   }
 }

From 5889ebb99bb1d13b0a108ddc993f33148651e425 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 21 Jul 2021 08:09:27 -0700
Subject: [PATCH 621/719] ARROW-13419: [JS] Fix perf tests

Fixes https://github.com/ursacomputing/benchmarks/issues/35#issuecomment-883775690 found by @dianaclarke.

Closes #10761 from domoritz/dom/perf

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/perf/index.ts | 2 +-
 js/tsconfig.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/js/perf/index.ts b/js/perf/index.ts
index fc2cb8b3a13..9f6cb8f79a0 100644
--- a/js/perf/index.ts
+++ b/js/perf/index.ts
@@ -46,7 +46,7 @@ const results: CaseResult[] = [];
 function cycle(result: CaseResult, _summary: Summary) {
     const duration = result.details.median * 1000;
     if (json) {
-        result.suite = _summary.name
+        result.suite = _summary.name;
         results.push(result);
     }
     console.log(
diff --git a/js/tsconfig.json b/js/tsconfig.json
index 72351f25971..c1e02ca0139 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -6,7 +6,7 @@
   },
   "compilerOptions": {
     "target": "esnext",
-    "module": "es2020",
+    "module": "commonjs",
     "noEmit": true,
     "esModuleInterop": true,
     "baseUrl": "./",

From 55891ed11fd3b8b9b2db940bd060c1bc1217dc10 Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Wed, 21 Jul 2021 18:23:11 +0200
Subject: [PATCH 622/719] ARROW-13391: [CSV] Correct row and column number to
 error messages with CSV streaming reader

Closes #10752 from n3world/ARROW-13391-csv_error_info

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/column_builder.cc |   2 +-
 cpp/src/arrow/csv/column_decoder.cc |  19 +--
 cpp/src/arrow/csv/reader.cc         |  15 ++-
 python/pyarrow/tests/test_csv.py    | 201 ++++++++++++++++------------
 4 files changed, 135 insertions(+), 102 deletions(-)

diff --git a/cpp/src/arrow/csv/column_builder.cc b/cpp/src/arrow/csv/column_builder.cc
index 8178b260b4a..bc974428734 100644
--- a/cpp/src/arrow/csv/column_builder.cc
+++ b/cpp/src/arrow/csv/column_builder.cc
@@ -109,7 +109,7 @@ class ConcreteColumnBuilder : public ColumnBuilder {
   }
 
   Status WrapConversionError(const Status& st) {
-    if (st.ok()) {
+    if (ARROW_PREDICT_TRUE(st.ok())) {
       return st;
     } else {
       std::stringstream ss;
diff --git a/cpp/src/arrow/csv/column_decoder.cc b/cpp/src/arrow/csv/column_decoder.cc
index ff5d01d8c4d..436d703a9cc 100644
--- a/cpp/src/arrow/csv/column_decoder.cc
+++ b/cpp/src/arrow/csv/column_decoder.cc
@@ -15,18 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/csv/column_decoder.h"
+
 #include <cstddef>
 #include <cstdint>
 #include <memory>
-#include <mutex>
 #include <sstream>
 #include <string>
 #include <utility>
-#include <vector>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_base.h"
-#include "arrow/csv/column_decoder.h"
 #include "arrow/csv/converter.h"
 #include "arrow/csv/inference_internal.h"
 #include "arrow/csv/options.h"
@@ -52,10 +51,12 @@ class ConcreteColumnDecoder : public ColumnDecoder {
   // XXX useful?
   virtual std::shared_ptr<DataType> type() const = 0;
 
-  Status WrapConversionError(const Status& st) {
-    if (st.ok()) {
-      return st;
+  Result<std::shared_ptr<Array>> WrapConversionError(
+      const Result<std::shared_ptr<Array>>& result) {
+    if (ARROW_PREDICT_TRUE(result.ok())) {
+      return result;
     } else {
+      const auto& st = result.status();
       std::stringstream ss;
       ss << "In CSV column #" << col_index_ << ": " << st.message();
       return st.WithMessage(ss.str());
@@ -87,7 +88,7 @@ class NullColumnDecoder : public ConcreteColumnDecoder {
 Future<std::shared_ptr<Array>> NullColumnDecoder::Decode(
     const std::shared_ptr<BlockParser>& parser) {
   DCHECK_GE(parser->num_rows(), 0);
-  return MakeArrayOfNull(type_, parser->num_rows(), pool_);
+  return WrapConversionError(MakeArrayOfNull(type_, parser->num_rows(), pool_));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -124,7 +125,7 @@ Future<std::shared_ptr<Array>> TypedColumnDecoder::Decode(
     const std::shared_ptr<BlockParser>& parser) {
   DCHECK_NE(converter_, nullptr);
   return Future<std::shared_ptr<Array>>::MakeFinished(
-      converter_->Convert(*parser, col_index_));
+      WrapConversionError(converter_->Convert(*parser, col_index_)));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -210,7 +211,7 @@ Future<std::shared_ptr<Array>> InferringColumnDecoder::Decode(
   return first_inference_run_.Then([this, parser] {
     DCHECK(type_frozen_);
     auto maybe_array = converter_->Convert(*parser, col_index_);
-    return converter_->Convert(*parser, col_index_);
+    return WrapConversionError(converter_->Convert(*parser, col_index_));
   });
 }
 
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 11437297b80..1a7836561da 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -391,11 +391,12 @@ namespace {
 class BlockParsingOperator {
  public:
   BlockParsingOperator(io::IOContext io_context, ParseOptions parse_options,
-                       int num_csv_cols, bool count_rows)
+                       int num_csv_cols, int64_t first_row)
       : io_context_(io_context),
         parse_options_(parse_options),
         num_csv_cols_(num_csv_cols),
-        count_rows_(count_rows) {}
+        count_rows_(first_row >= 0),
+        num_rows_seen_(first_row) {}
 
   Result<ParsedBlock> operator()(const CSVBlock& block) {
     constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
@@ -437,7 +438,7 @@ class BlockParsingOperator {
   ParseOptions parse_options_;
   int num_csv_cols_;
   bool count_rows_;
-  int num_rows_seen_ = 0;
+  int64_t num_rows_seen_;
 };
 
 // A function object that takes in parsed batch of CSV data and decodes it to an arrow
@@ -886,7 +887,7 @@ class StreamingReaderImpl : public ReaderMixin,
     bytes_decoded_->fetch_add(header_bytes_consumed);
 
     auto parser_op =
-        BlockParsingOperator(io_context_, parse_options_, num_csv_cols_, count_rows_);
+        BlockParsingOperator(io_context_, parse_options_, num_csv_cols_, num_rows_seen_);
     ARROW_ASSIGN_OR_RAISE(
         auto decoder_op,
         BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
@@ -1133,9 +1134,9 @@ Future<std::shared_ptr<StreamingReader>> MakeStreamingReader(
   RETURN_NOT_OK(read_options.Validate());
   RETURN_NOT_OK(convert_options.Validate());
   std::shared_ptr<StreamingReaderImpl> reader;
-  reader = std::make_shared<StreamingReaderImpl>(io_context, input, read_options,
-                                                 parse_options, convert_options,
-                                                 /*count_rows=*/true);
+  reader = std::make_shared<StreamingReaderImpl>(
+      io_context, input, read_options, parse_options, convert_options,
+      /*count_rows=*/!read_options.use_threads || cpu_executor->GetCapacity() == 1);
   return reader->Init(cpu_executor).Then([reader] {
     return std::dynamic_pointer_cast<StreamingReader>(reader);
   });
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index e4a4e3e5935..0c05c28290e 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -316,7 +316,110 @@ def test_write_options():
         opts.validate()
 
 
-class BaseTestCSVRead:
+class BaseTestCSV:
+    """Common tests which are shared by streaming and non streaming readers"""
+
+    def base_row_number_offset_in_errors(self, use_threads, read_bytes,
+                                         num_blocks=3):
+        """
+        num_blocks is a temporary work around because streaming reader does
+        not get schema from first non empty block
+        """
+
+        # Row numbers are only correctly counted in serial reads
+        def format_msg(msg_format, row, *args):
+            if use_threads:
+                row_info = ""
+            else:
+                row_info = "Row #{}: ".format(row)
+            return msg_format.format(row_info, *args)
+
+        csv, _ = make_random_csv(4, 100, write_names=True)
+
+        read_options = ReadOptions()
+        read_options.block_size = len(csv) / num_blocks
+        convert_options = ConvertOptions()
+        convert_options.column_types = {"a": pa.int32()}
+
+        # Test without skip_rows and column names in the csv
+        csv_bad_columns = csv + b"1,2\r\n"
+        message_columns = format_msg("{}Expected 4 columns, got 2", 102)
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        csv_bad_type = csv + b"a,b,c,d\r\n"
+        message_value = format_msg(
+            "In CSV column #0: {}"
+            "CSV conversion error to int32: invalid value 'a'",
+            102, csv)
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+        long_row = (b"this is a long row" * 15) + b",3\r\n"
+        csv_bad_columns_long = csv + long_row
+        message_long = format_msg("{}Expected 4 columns, got 2: {} ...", 102,
+                                  long_row[0:96].decode("utf-8"))
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            read_bytes(csv_bad_columns_long, read_options=read_options,
+                       convert_options=convert_options)
+
+        # Test skipping rows after the names
+        read_options.skip_rows_after_names = 47
+
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            read_bytes(csv_bad_columns_long, read_options=read_options,
+                       convert_options=convert_options)
+
+        read_options.skip_rows_after_names = 0
+
+        # Test without skip_rows and column names not in the csv
+        csv, _ = make_random_csv(4, 100, write_names=False)
+        read_options.column_names = ["a", "b", "c", "d"]
+        csv_bad_columns = csv + b"1,2\r\n"
+        message_columns = format_msg("{}Expected 4 columns, got 2", 101)
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        csv_bad_columns_long = csv + long_row
+        message_long = format_msg("{}Expected 4 columns, got 2: {} ...", 101,
+                                  long_row[0:96].decode("utf-8"))
+        with pytest.raises(pa.ArrowInvalid, match=message_long):
+            read_bytes(csv_bad_columns_long, read_options=read_options,
+                       convert_options=convert_options)
+
+        csv_bad_type = csv + b"a,b,c,d\r\n"
+        message_value = format_msg(
+            "In CSV column #0: {}"
+            "CSV conversion error to int32: invalid value 'a'",
+            101)
+        message_value = message_value.format(len(csv))
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+        # Test with skip_rows and column names not in the csv
+        read_options.skip_rows = 23
+        with pytest.raises(pa.ArrowInvalid, match=message_columns):
+            read_bytes(csv_bad_columns, read_options=read_options,
+                       convert_options=convert_options)
+
+        with pytest.raises(pa.ArrowInvalid, match=message_value):
+            read_bytes(csv_bad_type, read_options=read_options,
+                       convert_options=convert_options)
+
+
+class BaseTestCSVRead(BaseTestCSV):
 
     def read_bytes(self, b, **kwargs):
         return self.read_csv(pa.py_buffer(b), **kwargs)
@@ -1146,6 +1249,10 @@ def test_cancellation_disabled(self):
         t.start()
         t.join()
 
+    def test_row_number_offset_in_errors(self):
+        self.base_row_number_offset_in_errors(
+            isinstance(self, TestParallelCSVRead), self.read_bytes)
+
 
 class TestSerialCSVRead(BaseTestCSVRead, unittest.TestCase):
 
@@ -1156,89 +1263,6 @@ def read_csv(self, *args, validate_full=True, **kwargs):
         table.validate(full=validate_full)
         return table
 
-    def test_row_numbers_in_errors(self):
-        """ Row numbers are only correctly counted in serial reads """
-        csv, _ = make_random_csv(4, 100, write_names=True)
-
-        read_options = ReadOptions()
-        read_options.block_size = len(csv) / 3
-        convert_options = ConvertOptions()
-        convert_options.column_types = {"a": pa.int32(), "d": pa.int32()}
-
-        # Test without skip_rows and column names in the csv
-        csv_bad_columns = csv + b"1,2\r\n"
-        with pytest.raises(pa.ArrowInvalid,
-                           match="Row #102: Expected 4 columns, got 2"):
-            self.read_bytes(csv_bad_columns, read_options=read_options,
-                            convert_options=convert_options)
-
-        csv_bad_type = csv + b"a,b,c,d\r\n"
-        message_value = ("In CSV column #0: Row #102: " +
-                         "CSV conversion error to int32: invalid value 'a'")
-        with pytest.raises(pa.ArrowInvalid, match=message_value):
-            self.read_bytes(csv_bad_type, read_options=read_options,
-                            convert_options=convert_options)
-
-        long_row = (b"this is a long row" * 15) + b",3\r\n"
-        csv_bad_columns_long = csv + long_row
-        message_long = ("Row #102: Expected 4 columns, got 2: " +
-                        long_row[0:96].decode("utf-8") + " ...")
-        with pytest.raises(pa.ArrowInvalid, match=message_long):
-            self.read_bytes(csv_bad_columns_long, read_options=read_options,
-                            convert_options=convert_options)
-
-        # Test skipping rows after the names
-        read_options.skip_rows_after_names = 47
-
-        with pytest.raises(pa.ArrowInvalid,
-                           match="Row #102: Expected 4 columns, got 2"):
-            self.read_bytes(csv_bad_columns, read_options=read_options,
-                            convert_options=convert_options)
-
-        with pytest.raises(pa.ArrowInvalid, match=message_value):
-            self.read_bytes(csv_bad_type, read_options=read_options,
-                            convert_options=convert_options)
-
-        with pytest.raises(pa.ArrowInvalid, match=message_long):
-            self.read_bytes(csv_bad_columns_long, read_options=read_options,
-                            convert_options=convert_options)
-
-        read_options.skip_rows_after_names = 0
-
-        # Test without skip_rows and column names not in the csv
-        csv, _ = make_random_csv(4, 100, write_names=False)
-        read_options.column_names = ["a", "b", "c", "d"]
-        csv_bad_columns = csv + b"1,2\r\n"
-        with pytest.raises(pa.ArrowInvalid,
-                           match="Row #101: Expected 4 columns, got 2"):
-            self.read_bytes(csv_bad_columns, read_options=read_options,
-                            convert_options=convert_options)
-
-        csv_bad_columns_long = csv + long_row
-        message_long = ("Row #101: Expected 4 columns, got 2: " +
-                        long_row[0:96].decode("utf-8") + " ...")
-        with pytest.raises(pa.ArrowInvalid, match=message_long):
-            self.read_bytes(csv_bad_columns_long, read_options=read_options,
-                            convert_options=convert_options)
-
-        csv_bad_type = csv + b"a,b,c,d\r\n"
-        message_value = ("In CSV column #0: Row #101: " +
-                         "CSV conversion error to int32: invalid value 'a'")
-        with pytest.raises(pa.ArrowInvalid, match=message_value):
-            self.read_bytes(csv_bad_type, read_options=read_options,
-                            convert_options=convert_options)
-
-        # Test with skip_rows and column names not in the csv
-        read_options.skip_rows = 23
-        with pytest.raises(pa.ArrowInvalid,
-                           match="Row #101: Expected 4 columns, got 2"):
-            self.read_bytes(csv_bad_columns, read_options=read_options,
-                            convert_options=convert_options)
-
-        with pytest.raises(pa.ArrowInvalid, match=message_value):
-            self.read_bytes(csv_bad_type, read_options=read_options,
-                            convert_options=convert_options)
-
 
 class TestParallelCSVRead(BaseTestCSVRead, unittest.TestCase):
 
@@ -1251,7 +1275,7 @@ def read_csv(self, *args, validate_full=True, **kwargs):
 
 
 @pytest.mark.parametrize('use_threads', [False, True])
-class TestStreamingCSVRead:
+class TestStreamingCSVRead(BaseTestCSV):
 
     def open_bytes(self, b, use_threads, **kwargs):
         return self.open_csv(pa.py_buffer(b), use_threads, **kwargs)
@@ -1533,6 +1557,13 @@ def check_one_batch(reader, expected):
         reader = None
         assert pa.total_allocated_bytes() == old_allocated
 
+    def test_row_number_offset_in_errors(self, use_threads):
+        def read_bytes(b, **kwargs):
+            return self.open_bytes(b, use_threads, **kwargs).read_all()
+
+        self.base_row_number_offset_in_errors(use_threads, read_bytes,
+                                              num_blocks=1)
+
 
 class BaseTestCompressedCSVRead:
 

From 60f49f1ae269336eadb5a0a0f15da2da17dce2e9 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 21 Jul 2021 19:22:31 +0200
Subject: [PATCH 623/719] ARROW-13242: [C++] Improve random generation of
 decimal arrays

- Allow precisions larger than a single uint64
- Implement decimal256 generation
- Add validity tests

Closes #10643 from pitrou/ARROW-13242-dec-random-gen

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/builder_decimal.h |   2 +
 cpp/src/arrow/testing/random.cc       | 119 ++++++++++++++++++++------
 cpp/src/arrow/testing/random.h        |  49 +++++++----
 cpp/src/arrow/testing/random_test.cc  | 106 ++++++++++++++++++++---
 cpp/src/arrow/util/basic_decimal.h    |   1 +
 5 files changed, 223 insertions(+), 54 deletions(-)

diff --git a/cpp/src/arrow/array/builder_decimal.h b/cpp/src/arrow/array/builder_decimal.h
index 8c75e7dd674..f48392ed001 100644
--- a/cpp/src/arrow/array/builder_decimal.h
+++ b/cpp/src/arrow/array/builder_decimal.h
@@ -32,6 +32,7 @@ namespace arrow {
 class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
  public:
   using TypeClass = Decimal128Type;
+  using ValueType = Decimal128;
 
   explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
                              MemoryPool* pool = default_memory_pool());
@@ -61,6 +62,7 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
 class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
  public:
   using TypeClass = Decimal256Type;
+  using ValueType = Decimal256;
 
   explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
                              MemoryPool* pool = default_memory_pool());
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index ab0d4f33245..bf95ea5e051 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -20,6 +20,7 @@
 #include <gtest/gtest.h>
 
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <limits>
 #include <memory>
@@ -229,34 +230,95 @@ std::shared_ptr<Array> RandomArrayGenerator::Float64(int64_t size, double min, d
 #undef PRIMITIVE_RAND_INTEGER_IMPL
 #undef PRIMITIVE_RAND_IMPL
 
-std::shared_ptr<Array> RandomArrayGenerator::Decimal128(std::shared_ptr<DataType> type,
-                                                        int64_t size,
-                                                        double null_probability) {
-  const auto& decimal_type = checked_cast<const Decimal128Type&>(*type);
-  const auto digits = decimal_type.precision();
-  if (digits > 18) {
-    // More than 18 digits + sign don't fit in a int64_t
-    ABORT_NOT_OK(
-        Status::NotImplemented("random decimal128 generation with precision > 18"));
-  }
+namespace {
 
-  // Generate logical values as integers, then convert them
-  const auto max = static_cast<int64_t>(std::llround(std::pow(10.0, digits)) - 1);
-  const auto int_array =
-      checked_pointer_cast<Int64Array>(Int64(size, -max, max, null_probability));
+// A generic generator for random decimal arrays
+template <typename DecimalType>
+struct DecimalGenerator {
+  using DecimalBuilderType = typename TypeTraits<DecimalType>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static uint64_t MaxDecimalInteger(int32_t digits) {
+    // Need to decrement *after* the cast to uint64_t because, while
+    // 10**x is exactly representable in a double for x <= 19,
+    // 10**x - 1 is not.
+    return static_cast<uint64_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability) {
+    // 10**19 fits in a 64-bit unsigned integer
+    static constexpr int32_t kMaxDigitsInInteger = 19;
+    static constexpr int kNumIntegers = DecimalType::kByteWidth / 8;
+
+    static_assert(
+        kNumIntegers ==
+            (DecimalType::kMaxPrecision + kMaxDigitsInInteger - 1) / kMaxDigitsInInteger,
+        "inconsistent decimal metadata: kMaxPrecision doesn't match kByteWidth");
+
+    // First generate separate random values for individual components:
+    // boolean sign (including null-ness), and uint64 "digits" in big endian order.
+    const auto& decimal_type = checked_cast<const DecimalType&>(*type_);
+
+    const auto sign_array = checked_pointer_cast<BooleanArray>(
+        rng_->Boolean(size, /*true_probability=*/0.5, null_probability));
+    std::array<std::shared_ptr<UInt64Array>, kNumIntegers> digit_arrays;
+
+    auto remaining_digits = decimal_type.precision();
+    for (int i = kNumIntegers - 1; i >= 0; --i) {
+      const auto digits = std::min(kMaxDigitsInInteger, remaining_digits);
+      digit_arrays[i] = checked_pointer_cast<UInt64Array>(
+          rng_->UInt64(size, 0, MaxDecimalInteger(digits)));
+      DCHECK_EQ(digit_arrays[i]->null_count(), 0);
+      remaining_digits -= digits;
+    }
 
-  Decimal128Builder builder(type);
-  ABORT_NOT_OK(builder.Reserve(size));
-  for (int64_t i = 0; i < size; ++i) {
-    if (int_array->IsValid(i)) {
-      builder.UnsafeAppend(::arrow::Decimal128(int_array->Value(i)));
-    } else {
-      builder.UnsafeAppendNull();
+    // Second compute decimal values from the individual components,
+    // building up a decimal array.
+    DecimalBuilderType builder(type_);
+    ABORT_NOT_OK(builder.Reserve(size));
+
+    const DecimalValue kDigitsMultiplier =
+        DecimalValue::GetScaleMultiplier(kMaxDigitsInInteger);
+
+    for (int64_t i = 0; i < size; ++i) {
+      if (sign_array->IsValid(i)) {
+        DecimalValue dec_value{0};
+        for (int j = 0; j < kNumIntegers; ++j) {
+          dec_value =
+              dec_value * kDigitsMultiplier + DecimalValue(digit_arrays[j]->Value(i));
+        }
+        if (sign_array->Value(i)) {
+          builder.UnsafeAppend(dec_value.Negate());
+        } else {
+          builder.UnsafeAppend(dec_value);
+        }
+      } else {
+        builder.UnsafeAppendNull();
+      }
     }
+    std::shared_ptr<Array> array;
+    ABORT_NOT_OK(builder.Finish(&array));
+    return array;
   }
-  std::shared_ptr<Array> array;
-  ABORT_NOT_OK(builder.Finish(&array));
-  return array;
+};
+
+}  // namespace
+
+std::shared_ptr<Array> RandomArrayGenerator::Decimal128(std::shared_ptr<DataType> type,
+                                                        int64_t size,
+                                                        double null_probability) {
+  DecimalGenerator<Decimal128Type> gen{type, this};
+  return gen.MakeRandomArray(size, null_probability);
+}
+
+std::shared_ptr<Array> RandomArrayGenerator::Decimal256(std::shared_ptr<DataType> type,
+                                                        int64_t size,
+                                                        double null_probability) {
+  DecimalGenerator<Decimal256Type> gen{type, this};
+  return gen.MakeRandomArray(size, null_probability);
 }
 
 template <typename TypeClass>
@@ -623,6 +685,11 @@ struct RandomArrayGeneratorOfImpl {
     return Status::OK();
   }
 
+  Status Visit(const Decimal256Type&) {
+    out_ = rag_->Decimal256(type_, size_, null_probability_);
+    return Status::OK();
+  }
+
   Status Visit(const Decimal128Type&) {
     out_ = rag_->Decimal128(type_, size_, null_probability_);
     return Status::OK();
@@ -779,7 +846,11 @@ std::shared_ptr<Array> RandomArrayGenerator::ArrayOf(const Field& field, int64_t
     }
 
     case Type::type::DECIMAL128:
+      return Decimal128(field.type(), length, null_probability);
+
     case Type::type::DECIMAL256:
+      return Decimal256(field.type(), length, null_probability);
+
     case Type::type::FIXED_SIZE_BINARY: {
       auto byte_width =
           internal::checked_pointer_cast<FixedSizeBinaryType>(field.type())->byte_width();
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 9d7b4854679..e9b6e426fbc 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -55,7 +55,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///
   /// \param[in] size the size of the array to generate
   /// \param[in] true_probability the probability of a value being 1 / bit-set
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
@@ -66,7 +66,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
@@ -77,7 +77,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
@@ -88,7 +88,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
@@ -99,7 +99,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
@@ -110,7 +110,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
@@ -121,7 +121,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
@@ -132,7 +132,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
@@ -143,7 +143,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
@@ -154,7 +154,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the distribution
   /// \param[in] max the upper bound of the distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
@@ -165,8 +165,8 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
-  /// \param[in] nan_probability the probability of a row being NaN
+  /// \param[in] null_probability the probability of a value being null
+  /// \param[in] nan_probability the probability of a value being NaN
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Float32(int64_t size, float min, float max,
@@ -177,8 +177,8 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] min the lower bound of the uniform distribution
   /// \param[in] max the upper bound of the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
-  /// \param[in] nan_probability the probability of a row being NaN
+  /// \param[in] null_probability the probability of a value being null
+  /// \param[in] nan_probability the probability of a value being NaN
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Float64(int64_t size, double min, double max,
@@ -231,12 +231,23 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] type the type of the array to generate
   ///            (must be an instance of Decimal128Type)
   /// \param[in] size the size of the array to generate
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
                                     double null_probability = 0);
 
+  /// \brief Generate a random Decimal256Array
+  ///
+  /// \param[in] type the type of the array to generate
+  ///            (must be an instance of Decimal256Type)
+  /// \param[in] size the size of the array to generate
+  /// \param[in] null_probability the probability of a value being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size,
+                                    double null_probability = 0);
+
   /// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays)
   ///
   /// \param[in] size the size of the array to generate
@@ -261,7 +272,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///            determined by the uniform distribution
   /// \param[in] max_length the upper bound of the string length
   ///            determined by the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
@@ -274,7 +285,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///            determined by the uniform distribution
   /// \param[in] max_length the upper bound of the string length
   ///            determined by the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length,
@@ -289,7 +300,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///            determined by the uniform distribution
   /// \param[in] max_length the upper bound of the string length
   ///            determined by the uniform distribution
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> StringWithRepeats(int64_t size, int64_t unique,
@@ -305,7 +316,7 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   ///
   /// \param[in] size the size of the array to generate
   /// \param[in] byte_width the byte width of fixed-size binary items
-  /// \param[in] null_probability the probability of a row being null
+  /// \param[in] null_probability the probability of a value being null
   ///
   /// \return a generated Array
   std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
diff --git a/cpp/src/arrow/testing/random_test.cc b/cpp/src/arrow/testing/random_test.cc
index 851088a11b9..553028f8fb3 100644
--- a/cpp/src/arrow/testing/random_test.cc
+++ b/cpp/src/arrow/testing/random_test.cc
@@ -17,6 +17,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
+#include "arrow/array/builder_decimal.h"
 #include "arrow/record_batch.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
@@ -39,16 +40,6 @@ class RandomArrayTest : public ::testing::TestWithParam<std::shared_ptr<Field>>
   std::shared_ptr<Field> GetField() { return GetParam(); }
 };
 
-template <typename T>
-class RandomNumericArrayTest : public ::testing::Test {
- protected:
-  std::shared_ptr<Field> GetField() { return field("field0", std::make_shared<T>()); }
-
-  std::shared_ptr<NumericArray<T>> Downcast(std::shared_ptr<Array> array) {
-    return internal::checked_pointer_cast<NumericArray<T>>(array);
-  }
-};
-
 TEST_P(RandomArrayTest, GenerateArray) {
   auto field = GetField();
   auto array = GenerateArray(*field, kExpectedLength, 0xDEADBEEF);
@@ -109,7 +100,8 @@ auto values = ::testing::Values(
     field("int64", int64()), field("float16", float16()), field("float32", float32()),
     field("float64", float64()), field("string", utf8()), field("binary", binary()),
     field("fixed_size_binary", fixed_size_binary(8)),
-    field("decimal128", decimal128(8, 3)), field("decimal256", decimal256(16, 4)),
+    field("decimal128", decimal128(8, 3)), field("decimal128", decimal128(29, -5)),
+    field("decimal256", decimal256(16, 4)), field("decimal256", decimal256(57, -6)),
     field("date32", date32()), field("date64", date64()),
     field("timestampns", timestamp(TimeUnit::NANO)),
     field("timestamps", timestamp(TimeUnit::SECOND, "America/Phoenix")),
@@ -154,6 +146,16 @@ INSTANTIATE_TEST_SUITE_P(
       return std::to_string(info.index) + info.param->name();
     });
 
+template <typename T>
+class RandomNumericArrayTest : public ::testing::Test {
+ protected:
+  std::shared_ptr<Field> GetField() { return field("field0", std::make_shared<T>()); }
+
+  std::shared_ptr<NumericArray<T>> Downcast(std::shared_ptr<Array> array) {
+    return internal::checked_pointer_cast<NumericArray<T>>(array);
+  }
+};
+
 using NumericTypes =
     ::testing::Types<UInt8Type, Int8Type, UInt16Type, Int16Type, UInt32Type, Int32Type,
                      HalfFloatType, FloatType, DoubleType>;
@@ -173,6 +175,88 @@ TYPED_TEST(RandomNumericArrayTest, GenerateMinMax) {
   }
 }
 
+TYPED_TEST(RandomNumericArrayTest, EmptyRange) {
+  auto field =
+      this->GetField()->WithMetadata(key_value_metadata({{"min", "42"}, {"max", "42"}}));
+  auto batch = GenerateBatch({field}, kExpectedLength, 0xcafe);
+  ASSERT_OK(batch->ValidateFull());
+  AssertSchemaEqual(schema({field}), batch->schema());
+  auto array = this->Downcast(batch->column(0));
+  for (auto slot : *array) {
+    if (!slot.has_value()) continue;
+    ASSERT_EQ(slot, typename TypeParam::c_type(42));
+  }
+}
+
+template <typename DecimalType>
+class RandomDecimalArrayTest : public ::testing::Test {
+ protected:
+  using ArrayType = typename TypeTraits<DecimalType>::ArrayType;
+  using DecimalValue = typename TypeTraits<DecimalType>::BuilderType::ValueType;
+
+  constexpr static int32_t max_precision() { return DecimalType::kMaxPrecision; }
+
+  std::shared_ptr<DataType> type(int32_t precision, int32_t scale) {
+    return std::make_shared<DecimalType>(precision, scale);
+  }
+
+  void CheckArray(const Array& array) {
+    ASSERT_OK(array.ValidateFull());
+
+    const auto& type = checked_cast<const DecimalType&>(*array.type());
+    const auto& values = checked_cast<const ArrayType&>(array);
+
+    const DecimalValue limit = DecimalValue::GetScaleMultiplier(type.precision());
+    const DecimalValue neg_limit = DecimalValue(limit).Negate();
+    const DecimalValue half_limit = limit / DecimalValue(2);
+    const DecimalValue neg_half_limit = DecimalValue(half_limit).Negate();
+
+    // Check that random-generated values:
+    // - satisfy the requested precision
+    // - at least sometimes are close to the max allowable values for precision
+    // - sometimes are negative
+    int64_t non_nulls = 0;
+    int64_t over_half = 0;
+    int64_t negative = 0;
+
+    for (int64_t i = 0; i < values.length(); ++i) {
+      if (values.IsNull(i)) {
+        continue;
+      }
+      ++non_nulls;
+      const DecimalValue value(values.GetValue(i));
+      ASSERT_LT(value, limit);
+      ASSERT_GT(value, neg_limit);
+      if (value >= half_limit || value <= neg_half_limit) {
+        ++over_half;
+      }
+      if (value.Sign() < 0) {
+        ++negative;
+      }
+    }
+
+    ASSERT_GE(over_half, non_nulls * 0.3);
+    ASSERT_LE(over_half, non_nulls * 0.7);
+    ASSERT_GE(negative, non_nulls * 0.3);
+    ASSERT_LE(negative, non_nulls * 0.7);
+  }
+};
+
+using DecimalTypes = ::testing::Types<Decimal128Type, Decimal256Type>;
+TYPED_TEST_SUITE(RandomDecimalArrayTest, DecimalTypes);
+
+TYPED_TEST(RandomDecimalArrayTest, Basic) {
+  random::RandomArrayGenerator rng(42);
+
+  for (const int32_t precision :
+       {1, 2, 5, 9, 18, 19, 25, this->max_precision() - 1, this->max_precision()}) {
+    ARROW_SCOPED_TRACE("precision = ", precision);
+    const auto type = this->type(precision, 5);
+    auto array = rng.ArrayOf(type, /*size=*/1000, /*null_probability=*/0.2);
+    this->CheckArray(*array);
+  }
+}
+
 // Test all the supported options
 TEST(TypeSpecificTests, BoolTrueProbability) {
   auto field =
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index d2e37db0cc1..acc8ea4930f 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -293,6 +293,7 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \param[out] remainder the remainder after the division
   DecimalStatus Divide(const BasicDecimal256& divisor, BasicDecimal256* result,
                        BasicDecimal256* remainder) const;
+
   /// \brief Shift left by the given number of bits.
   BasicDecimal256& operator<<=(uint32_t bits);
 

From dcf3a0bd9d0950e426f000a434e73bf036d1d51a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 22 Jul 2021 13:42:22 +0200
Subject: [PATCH 624/719] ARROW-13432: [Release] Fix ssh connection to the
 binary uploader container
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Out of the sudden the binary upload script has stopped working for me, it was just hanging infinitely.
After a lot of digging found that the `ssh_port` variable contained two identical ports, thus the port number [was passed to the ssh command as an additional argument](https://github.com/apache/arrow/blob/master/dev/release/utils-binary.sh#L45).

Closes #10770 from kszucs/binary-upload-hangs

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/release/05-binary-upload.sh | 1 +
 dev/release/utils-binary.sh     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh
index 6f318fc088f..c841fa748e7 100755
--- a/dev/release/05-binary-upload.sh
+++ b/dev/release/05-binary-upload.sh
@@ -129,6 +129,7 @@ docker_run \
   rake \
     "${rake_tasks[@]}" \
     APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
+    ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \
     ARTIFACTS_DIR="${tmp_dir}/artifacts" \
     RC=${rc} \
     VERSION=${version} \
diff --git a/dev/release/utils-binary.sh b/dev/release/utils-binary.sh
index 7c66e375f8d..31ebcd8e9bb 100644
--- a/dev/release/utils-binary.sh
+++ b/dev/release/utils-binary.sh
@@ -70,7 +70,7 @@ fi
 /usr/sbin/sshd -D
 "
   local container_id=$(cat ${container_id_file})
-  local ssh_port=$(docker port ${container_id} | grep -E -o '[0-9]+$')
+  local ssh_port=$(docker port ${container_id} | grep -E -o '[0-9]+$' | head -n 1)
   # Wait for sshd available
   while ! docker_gpg_ssh ${ssh_port} : > /dev/null 2>&1; do
     sleep 0.1

From 25f016f6a2ab9a6b5b9c3b0f95febda888d9b28b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 22 Jul 2021 14:14:50 +0200
Subject: [PATCH 625/719] ARROW-13431: [Release] Bump go version to 1.15; don't
 verify rust source anymore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10777 from kszucs/go-verification

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/release/verify-release-candidate.sh | 41 ++-----------------------
 dev/tasks/tasks.yml                     |  3 +-
 2 files changed, 3 insertions(+), 41 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 23239040f0a..5dc6d869147 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -24,7 +24,7 @@
 # - JDK >=7
 # - gcc >= 4.8
 # - Node.js >= 11.12 (best way is to use nvm)
-# - Go >= 1.11
+# - Go >= 1.15
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
 # LD_LIBRARY_PATH.
@@ -449,7 +449,7 @@ test_ruby() {
 }
 
 test_go() {
-  local VERSION=1.14.1
+  local VERSION=1.15.14
   local ARCH=amd64
 
   if [ "$(uname)" == "Darwin" ]; then
@@ -478,39 +478,6 @@ test_go() {
   popd
 }
 
-test_rust() {
-  # install rust toolchain in a similar fashion like test-miniconda
-  export RUSTUP_HOME=$PWD/test-rustup
-  export CARGO_HOME=$PWD/test-rustup
-
-  curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path
-
-  export PATH=$RUSTUP_HOME/bin:$PATH
-  source $RUSTUP_HOME/env
-
-  # build and test rust
-  pushd rust
-
-  # raises on any formatting errors
-  rustup component add rustfmt --toolchain stable
-  cargo +stable fmt --all -- --check
-  rustup default stable
-
-  # use local modules because we don't publish modules to crates.io yet
-  sed \
-    -i.bak \
-    -E \
-    -e 's/^arrow = "([^"]*)"/arrow = { version = "\1", path = "..\/arrow" }/g' \
-    -e 's/^parquet = "([^"]*)"/parquet = { version = "\1", path = "..\/parquet" }/g' \
-    */Cargo.toml
-
-  # raises on any warnings
-  RUSTFLAGS="-D warnings" cargo build
-  cargo test
-
-  popd
-}
-
 # Run integration tests
 test_integration() {
   JAVA_DIR=$PWD/java
@@ -588,9 +555,6 @@ test_source_distribution() {
   if [ ${TEST_GO} -gt 0 ]; then
     test_go
   fi
-  if [ ${TEST_RUST} -gt 0 ]; then
-    test_rust
-  fi
   if [ ${TEST_INTEGRATION} -gt 0 ]; then
     test_integration
   fi
@@ -736,7 +700,6 @@ fi
 : ${TEST_PYTHON:=${TEST_DEFAULT}}
 : ${TEST_JS:=${TEST_DEFAULT}}
 : ${TEST_GO:=${TEST_DEFAULT}}
-: ${TEST_RUST:=${TEST_DEFAULT}}
 : ${TEST_INTEGRATION:=${TEST_DEFAULT}}
 if [ ${TEST_BINARY_DISTRIBUTIONS} -gt 0 ]; then
   TEST_BINARY_DISTRIBUTIONS_DEFAULT=${TEST_DEFAULT}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index ea0571f3989..93bce879f22 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -745,8 +745,7 @@ tasks:
                     "java",
                     "js",
                     "python",
-                    "ruby",
-                    "rust"] %}
+                    "ruby"] %}
 
   verify-rc-source-{{ platform }}-{{ target }}:
     ci: github

From 7376eefa1e045ba4e3060545e504fbf23adcdbd4 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 22 Jul 2021 14:29:07 +0200
Subject: [PATCH 626/719] ARROW-13428: [C++][Flight] Add missing -lssl with
 bundled gRPC and system shared OpenSSL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If bundled gRPC uses system shared OpenSSL, libarrow_flight.so should
link system shared OpenSSL.

See also: https://github.com/apache/arrow/pull/10768#issuecomment-884726000

Closes #10773 from kou/cpp-bundled-grpc-and-system-openssl

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index a79d4fd7bb3..e6852d9c210 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2542,6 +2542,7 @@ macro(build_grpc)
       re2::re2
       c-ares::cares
       ZLIB::ZLIB
+      OpenSSL::SSL
       Threads::Threads)
   set_target_properties(gRPC::grpc
                         PROPERTIES IMPORTED_LOCATION "${GRPC_STATIC_LIBRARY_GRPC}"

From 169b249057cfe7dd2462efd43f6d1de7e23bb60e Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Thu, 22 Jul 2021 16:23:24 +0200
Subject: [PATCH 627/719] ARROW-11748: [C++] Ensure Decimal fields are in
 native endian order

Closes #10651 from cyb70289/11748-decimal-native-endian

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/codegen_internal.h  |  94 +---
 .../compute/kernels/scalar_cast_numeric.cc    |   3 +-
 cpp/src/arrow/scalar.cc                       |   3 +-
 cpp/src/arrow/util/basic_decimal.cc           | 477 +++++++++---------
 cpp/src/arrow/util/basic_decimal.h            |  54 +-
 cpp/src/arrow/util/decimal.cc                 |  30 +-
 cpp/src/arrow/util/decimal_test.cc            |  33 +-
 cpp/src/arrow/util/endian.h                   |  64 +++
 8 files changed, 397 insertions(+), 361 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index cb9b13bb3d7..f432c93daac 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -192,8 +192,8 @@ struct GetOutputType<Decimal256Type> {
 // Iteration / value access utilities
 
 template <typename T, typename R = void>
-using enable_if_has_c_type_not_boolean =
-    enable_if_t<has_c_type<T>::value && !is_boolean_type<T>::value, R>;
+using enable_if_c_number_or_decimal = enable_if_t<
+    (has_c_type<T>::value && !is_boolean_type<T>::value) || is_decimal_type<T>::value, R>;
 
 // Iterator over various input array types, yielding a GetViewType<Type>
 
@@ -201,8 +201,8 @@ template <typename Type, typename Enable = void>
 struct ArrayIterator;
 
 template <typename Type>
-struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
-  using T = typename Type::c_type;
+struct ArrayIterator<Type, enable_if_c_number_or_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
   const T* values;
 
   explicit ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
@@ -247,26 +247,14 @@ struct ArrayIterator<Type, enable_if_base_binary<Type>> {
   }
 };
 
-template <typename Type>
-struct ArrayIterator<Type, enable_if_decimal<Type>> {
-  using T = typename TypeTraits<Type>::ScalarType::ValueType;
-  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
-  const endian_agnostic* values;
-
-  explicit ArrayIterator(const ArrayData& data)
-      : values(data.GetValues<endian_agnostic>(1)) {}
-
-  T operator()() { return T{values++->data()}; }
-};
-
 // Iterator over various output array types, taking a GetOutputType<Type>
 
 template <typename Type, typename Enable = void>
 struct OutputArrayWriter;
 
 template <typename Type>
-struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
-  using T = typename Type::c_type;
+struct OutputArrayWriter<Type, enable_if_c_number_or_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
   T* values;
 
   explicit OutputArrayWriter(ArrayData* data) : values(data->GetMutableValues<T>(1)) {}
@@ -277,23 +265,9 @@ struct OutputArrayWriter<Type, enable_if_has_c_type_not_boolean<Type>> {
   // with Write / WriteNull calls
   void WriteNull() { *values++ = T{}; }
 
-  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); }
-};
-
-template <typename Type>
-struct OutputArrayWriter<Type, enable_if_decimal<Type>> {
-  using T = typename TypeTraits<Type>::ScalarType::ValueType;
-  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
-  endian_agnostic* values;
-
-  explicit OutputArrayWriter(ArrayData* data)
-      : values(data->GetMutableValues<endian_agnostic>(1)) {}
-
-  void Write(T value) { value.ToBytes(values++->data()); }
-
-  void WriteNull() { T{}.ToBytes(values++->data()); }
-
-  void WriteAllNull(int64_t length) { std::memset(values, 0, sizeof(T) * length); }
+  void WriteAllNull(int64_t length) {
+    std::memset(static_cast<void*>(values), 0, sizeof(T) * length);
+  }
 };
 
 // (Un)box Scalar to / from C++ value
@@ -551,11 +525,13 @@ struct OutputAdapter<Type, enable_if_boolean<Type>> {
 };
 
 template <typename Type>
-struct OutputAdapter<Type, enable_if_has_c_type_not_boolean<Type>> {
+struct OutputAdapter<Type, enable_if_c_number_or_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+
   template <typename Generator>
   static Status Write(KernelContext*, Datum* out, Generator&& generator) {
     ArrayData* out_arr = out->mutable_array();
-    auto out_data = out_arr->GetMutableValues<typename Type::c_type>(1);
+    auto out_data = out_arr->GetMutableValues<T>(1);
     // TODO: Is this as fast as a more explicitly inlined function?
     for (int64_t i = 0; i < out_arr->length; ++i) {
       *out_data++ = generator();
@@ -572,22 +548,6 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   }
 };
 
-template <typename Type>
-struct OutputAdapter<Type, enable_if_decimal<Type>> {
-  using T = typename TypeTraits<Type>::ScalarType::ValueType;
-  using endian_agnostic = std::array<uint8_t, sizeof(T)>;
-
-  template <typename Generator>
-  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
-    ArrayData* out_arr = out->mutable_array();
-    auto out_data = out_arr->GetMutableValues<endian_agnostic>(1);
-    for (int64_t i = 0; i < out_arr->length; ++i) {
-      generator().ToBytes(out_data++->data());
-    }
-    return Status::OK();
-  }
-};
-
 // A kernel exec generator for unary functions that addresses both array and
 // scalar inputs and dispatches input iteration and output writing to other
 // templates
@@ -667,8 +627,7 @@ struct ScalarUnaryNotNullStateful {
   };
 
   template <typename Type>
-  struct ArrayExec<
-      Type, enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value>> {
+  struct ArrayExec<Type, enable_if_c_number_or_decimal<Type>> {
     static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
                        Datum* out) {
       Status st = Status::OK();
@@ -735,31 +694,6 @@ struct ScalarUnaryNotNullStateful {
     }
   };
 
-  template <typename Type>
-  struct ArrayExec<Type, enable_if_decimal<Type>> {
-    static Status Exec(const ThisType& functor, KernelContext* ctx, const ArrayData& arg0,
-                       Datum* out) {
-      Status st = Status::OK();
-      ArrayData* out_arr = out->mutable_array();
-      // Decimal128 data buffers are not safely reinterpret_cast-able on big-endian
-      using endian_agnostic =
-          std::array<uint8_t, sizeof(typename TypeTraits<Type>::ScalarType::ValueType)>;
-      auto out_data = out_arr->GetMutableValues<endian_agnostic>(1);
-      VisitArrayValuesInline<Arg0Type>(
-          arg0,
-          [&](Arg0Value v) {
-            functor.op.template Call<OutValue, Arg0Value>(ctx, v, &st)
-                .ToBytes(out_data++->data());
-          },
-          [&]() {
-            // null
-            std::memset(out_data, 0, sizeof(*out_data));
-            ++out_data;
-          });
-      return st;
-    }
-  };
-
   Status Scalar(KernelContext* ctx, const Scalar& arg0, Datum* out) {
     Status st = Status::OK();
     if (arg0.is_valid) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index cc7b533f262..cd89a57ed77 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -412,7 +412,8 @@ struct DecimalConversions<Decimal128, Decimal256> {
   // Scale then truncate
   static Decimal256 ConvertInput(Decimal256&& val) { return val; }
   static Decimal128 ConvertOutput(Decimal256&& val) {
-    return Decimal128(val.little_endian_array()[1], val.little_endian_array()[0]);
+    const auto array_le = BitUtil::LittleEndianArray::Make(val.native_endian_array());
+    return Decimal128(array_le[1], array_le[0]);
   }
 };
 
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index cb7755ba3f1..a118a00938b 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -76,7 +76,8 @@ struct ScalarHashImpl {
 
   Status Visit(const Decimal256Scalar& s) {
     Status status = Status::OK();
-    for (uint64_t elem : s.value.little_endian_array()) {
+    // endianness doesn't affect result
+    for (uint64_t elem : s.value.native_endian_array()) {
       status &= StdHash(elem);
     }
     return status;
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
index 56809f28165..9d87cc94e2c 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -121,219 +121,242 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
     BasicDecimal128(271050543121376108LL, 9257742014424809472ULL),
     BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
+#define BasicDecimal256FromLE(v1, v2, v3, v4) \
+  BasicDecimal256(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(v1, v2, v3, v4))
+
 static const BasicDecimal256 ScaleMultipliersDecimal256[] = {
-    BasicDecimal256({1ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({100000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}),
-    BasicDecimal256({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}),
-    BasicDecimal256({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}),
-    BasicDecimal256({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}),
-    BasicDecimal256({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}),
-    BasicDecimal256({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}),
-    BasicDecimal256({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}),
-    BasicDecimal256({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}),
-    BasicDecimal256({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}),
-    BasicDecimal256({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}),
-    BasicDecimal256({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}),
-    BasicDecimal256({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}),
-    BasicDecimal256({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}),
-    BasicDecimal256({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}),
-    BasicDecimal256({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}),
-    BasicDecimal256({4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}),
-    BasicDecimal256({11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}),
-    BasicDecimal256({7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}),
-    BasicDecimal256({80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}),
-    BasicDecimal256({802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}),
-    BasicDecimal256({8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({1ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({100000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({7766279631452241920ULL, 5ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3875820019684212736ULL, 54ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1864712049423024128ULL, 542ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({200376420520689664ULL, 5421ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2003764205206896640ULL, 54210ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1590897978359414784ULL, 542101ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({15908979783594147840ULL, 5421010ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({11515845246265065472ULL, 54210108ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({4477988020393345024ULL, 542101086ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({7886392056514347008ULL, 5421010862ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5076944270305263616ULL, 54210108624ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({13875954555633532928ULL, 542101086242ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({9632337040368467968ULL, 5421010862427ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({4089650035136921600ULL, 54210108624275ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({4003012203950112768ULL, 542101086242752ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3136633892082024448ULL, 5421010862427522ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({12919594847110692864ULL, 54210108624275221ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({68739955140067328ULL, 542101086242752217ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({687399551400673280ULL, 5421010862427522170ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({6873995514006732800ULL, 17316620476856118468ULL, 2ULL, 0ULL}),
+    BasicDecimal256FromLE({13399722918938673152ULL, 7145508105175220139ULL, 29ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {4870020673419870208ULL, 16114848830623546549ULL, 293ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {11806718586779598848ULL, 13574535716559052564ULL, 2938ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {7386721425538678784ULL, 6618148649623664334ULL, 29387ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {80237960548581376ULL, 10841254275107988496ULL, 293873ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {802379605485813760ULL, 16178822382532126880ULL, 2938735ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {8023796054858137600ULL, 14214271235644855872ULL, 29387358ULL, 0ULL}),
+    BasicDecimal256FromLE(
         {6450984253743169536ULL, 13015503840481697412ULL, 293873587ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {9169610316303040512ULL, 1027829888850112811ULL, 2938735877ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {17909126868192198656ULL, 10278298888501128114ULL, 29387358770ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {13070572018536022016ULL, 10549268516463523069ULL, 293873587705ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {1578511669393358848ULL, 13258964796087472617ULL, 2938735877055ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {15785116693933588480ULL, 3462439444907864858ULL, 29387358770557ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10277214349659471872ULL, 16177650375369096972ULL, 293873587705571ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10538423128046960640ULL, 14202551164014556797ULL, 2938735877055718ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {13150510911921848320ULL, 12898303124178706663ULL, 29387358770557187ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {2377900603251621888ULL, 18302566799529756941ULL, 293873587705571876ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {5332261958806667264ULL, 17004971331911604867ULL, 2938735877055718769ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {16429131440647569408ULL, 4029016655730084128ULL, 10940614696847636083ULL, 1ULL}),
-    BasicDecimal256({16717361816799281152ULL, 3396678409881738056ULL,
-                     17172426599928602752ULL, 15ULL}),
-    BasicDecimal256({1152921504606846976ULL, 15520040025107828953ULL,
-                     5703569335900062977ULL, 159ULL}),
-    BasicDecimal256({11529215046068469760ULL, 7626447661401876602ULL,
-                     1695461137871974930ULL, 1593ULL}),
-    BasicDecimal256({4611686018427387904ULL, 2477500319180559562ULL,
-                     16954611378719749304ULL, 15930ULL}),
-    BasicDecimal256({9223372036854775808ULL, 6328259118096044006ULL,
-                     3525417123811528497ULL, 159309ULL}),
-    BasicDecimal256({0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}),
-    BasicDecimal256({0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}),
-    BasicDecimal256({0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({16717361816799281152ULL, 3396678409881738056ULL,
+                           17172426599928602752ULL, 15ULL}),
+    BasicDecimal256FromLE({1152921504606846976ULL, 15520040025107828953ULL,
+                           5703569335900062977ULL, 159ULL}),
+    BasicDecimal256FromLE({11529215046068469760ULL, 7626447661401876602ULL,
+                           1695461137871974930ULL, 1593ULL}),
+    BasicDecimal256FromLE({4611686018427387904ULL, 2477500319180559562ULL,
+                           16954611378719749304ULL, 15930ULL}),
+    BasicDecimal256FromLE({9223372036854775808ULL, 6328259118096044006ULL,
+                           3525417123811528497ULL, 159309ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 7942358959831785217ULL, 16807427164405733357ULL, 1593091ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 5636613303479645706ULL, 2053574980671369030ULL, 15930919ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 1025900813667802212ULL, 2089005733004138687ULL, 159309191ULL}),
+    BasicDecimal256FromLE(
         {0ULL, 10259008136678022120ULL, 2443313256331835254ULL, 1593091911ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 10356360998232463120ULL, 5986388489608800929ULL, 15930919111ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 11329889613776873120ULL, 4523652674959354447ULL, 159309191113ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 2618431695511421504ULL, 8343038602174441244ULL, 1593091911132ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 7737572881404663424ULL, 9643409726906205977ULL, 15930919111324ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 3588752519208427776ULL, 4200376900514301694ULL, 159309191113245ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 17440781118374726144ULL, 5110280857723913709ULL, 1593091911132452ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 8387114520361296896ULL, 14209320429820033867ULL, 15930919111324522ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 8607968719199866880ULL, 532749306367912313ULL, 1593091911132452277ULL})};
 
 static const BasicDecimal256 ScaleMultipliersHalfDecimal256[] = {
-    BasicDecimal256({0ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({50000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({500000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
-    BasicDecimal256({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}),
-    BasicDecimal256({932356024711512064ULL, 271ULL, 0ULL, 0ULL}),
-    BasicDecimal256({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}),
-    BasicDecimal256({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}),
-    BasicDecimal256({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}),
-    BasicDecimal256({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}),
-    BasicDecimal256({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}),
-    BasicDecimal256({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}),
-    BasicDecimal256({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}),
-    BasicDecimal256({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}),
-    BasicDecimal256({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}),
-    BasicDecimal256({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}),
-    BasicDecimal256({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}),
-    BasicDecimal256({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}),
-    BasicDecimal256({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}),
-    BasicDecimal256({15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}),
-    BasicDecimal256({11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}),
-    BasicDecimal256({5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}),
-    BasicDecimal256({3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}),
-    BasicDecimal256({40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}),
-    BasicDecimal256({401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}),
-    BasicDecimal256({4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({0ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({50000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({500000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5000000000000000000ULL, 0ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({13106511852580896768ULL, 2ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1937910009842106368ULL, 27ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({932356024711512064ULL, 271ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({9323560247115120640ULL, 2710ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1001882102603448320ULL, 27105ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({10018821026034483200ULL, 271050ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({7954489891797073920ULL, 2710505ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({5757922623132532736ULL, 27105054ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2238994010196672512ULL, 271050543ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3943196028257173504ULL, 2710505431ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2538472135152631808ULL, 27105054312ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({6937977277816766464ULL, 271050543121ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({14039540557039009792ULL, 2710505431213ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({11268197054423236608ULL, 27105054312137ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({2001506101975056384ULL, 271050543121376ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({1568316946041012224ULL, 2710505431213761ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({15683169460410122240ULL, 27105054312137610ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({9257742014424809472ULL, 271050543121376108ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({343699775700336640ULL, 2710505431213761085ULL, 0ULL, 0ULL}),
+    BasicDecimal256FromLE({3436997757003366400ULL, 8658310238428059234ULL, 1ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {15923233496324112384ULL, 12796126089442385877ULL, 14ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {11658382373564710912ULL, 17280796452166549082ULL, 146ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {5903359293389799424ULL, 6787267858279526282ULL, 1469ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {3693360712769339392ULL, 12532446361666607975ULL, 14693ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {40118980274290688ULL, 14643999174408770056ULL, 146936ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {401189802742906880ULL, 17312783228120839248ULL, 1469367ULL, 0ULL}),
+    BasicDecimal256FromLE(
+        {4011898027429068800ULL, 7107135617822427936ULL, 14693679ULL, 0ULL}),
+    BasicDecimal256FromLE(
         {3225492126871584768ULL, 15731123957095624514ULL, 146936793ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {13808177195006296064ULL, 9737286981279832213ULL, 1469367938ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {8954563434096099328ULL, 5139149444250564057ULL, 14693679385ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {15758658046122786816ULL, 14498006295086537342ULL, 146936793852ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10012627871551455232ULL, 15852854434898512116ULL, 1469367938527ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {7892558346966794240ULL, 10954591759308708237ULL, 14693679385278ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {5138607174829735936ULL, 17312197224539324294ULL, 146936793852785ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {14492583600878256128ULL, 7101275582007278398ULL, 1469367938527859ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {15798627492815699968ULL, 15672523598944129139ULL, 14693679385278593ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {10412322338480586752ULL, 9151283399764878470ULL, 146936793852785938ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {11889503016258109440ULL, 17725857702810578241ULL, 1469367938527859384ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {8214565720323784704ULL, 11237880364719817872ULL, 14693679385278593849ULL, 0ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {8358680908399640576ULL, 1698339204940869028ULL, 17809585336819077184ULL, 7ULL}),
-    BasicDecimal256({9799832789158199296ULL, 16983392049408690284ULL,
-                     12075156704804807296ULL, 79ULL}),
-    BasicDecimal256({5764607523034234880ULL, 3813223830700938301ULL,
-                     10071102605790763273ULL, 796ULL}),
-    BasicDecimal256({2305843009213693952ULL, 1238750159590279781ULL,
-                     8477305689359874652ULL, 7965ULL}),
-    BasicDecimal256({4611686018427387904ULL, 12387501595902797811ULL,
-                     10986080598760540056ULL, 79654ULL}),
-    BasicDecimal256({9223372036854775808ULL, 13194551516770668416ULL,
-                     17627085619057642486ULL, 796545ULL}),
-    BasicDecimal256({0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}),
-    BasicDecimal256({0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE({9799832789158199296ULL, 16983392049408690284ULL,
+                           12075156704804807296ULL, 79ULL}),
+    BasicDecimal256FromLE({5764607523034234880ULL, 3813223830700938301ULL,
+                           10071102605790763273ULL, 796ULL}),
+    BasicDecimal256FromLE({2305843009213693952ULL, 1238750159590279781ULL,
+                           8477305689359874652ULL, 7965ULL}),
+    BasicDecimal256FromLE({4611686018427387904ULL, 12387501595902797811ULL,
+                           10986080598760540056ULL, 79654ULL}),
+    BasicDecimal256FromLE({9223372036854775808ULL, 13194551516770668416ULL,
+                           17627085619057642486ULL, 796545ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 2818306651739822853ULL, 10250159527190460323ULL, 7965459ULL}),
+    BasicDecimal256FromLE(
+        {0ULL, 9736322443688676914ULL, 10267874903356845151ULL, 79654595ULL}),
+    BasicDecimal256FromLE(
         {0ULL, 5129504068339011060ULL, 10445028665020693435ULL, 796545955ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 14401552535971007368ULL, 12216566281659176272ULL, 7965459555ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 14888316843743212368ULL, 11485198374334453031ULL, 79654595556ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 1309215847755710752ULL, 4171519301087220622ULL, 796545955566ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 13092158477557107520ULL, 4821704863453102988ULL, 7965459555662ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 1794376259604213888ULL, 11323560487111926655ULL, 79654595556622ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 17943762596042138880ULL, 2555140428861956854ULL, 796545955566226ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 13416929297035424256ULL, 7104660214910016933ULL, 7965459555662261ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 5042084454387381248ULL, 15706369927971514489ULL, 79654595556622613ULL}),
-    BasicDecimal256(
+    BasicDecimal256FromLE(
         {0ULL, 13527356396454709248ULL, 9489746690038731964ULL, 796545955566226138ULL})};
 
+#undef BasicDecimal256FromLE
+
 #ifdef ARROW_USE_NATIVE_INT128
 static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
 #else
@@ -572,20 +595,24 @@ struct uint128_t {
 
 // Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t
 // array into a same sized output. Elements in the array should be in
-// little endian order, and output will be the same. Overflow in multiplication
+// native endian order, and output will be the same. Overflow in multiplication
 // will result in the lower N * 64 bits of the result being set.
 template <int N>
 inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh,
                                   const std::array<uint64_t, N>& rh,
                                   std::array<uint64_t, N>* result) {
+  const auto lh_le = BitUtil::LittleEndianArray::Make(lh);
+  const auto rh_le = BitUtil::LittleEndianArray::Make(rh);
+  auto result_le = BitUtil::LittleEndianArray::Make(result);
+
   for (int j = 0; j < N; ++j) {
     uint64_t carry = 0;
     for (int i = 0; i < N - j; ++i) {
-      uint128_t tmp(lh[i]);
-      tmp *= uint128_t(rh[j]);
-      tmp += uint128_t((*result)[i + j]);
+      uint128_t tmp(lh_le[i]);
+      tmp *= uint128_t(rh_le[j]);
+      tmp += uint128_t(result_le[i + j]);
       tmp += uint128_t(carry);
-      (*result)[i + j] = tmp.lo();
+      result_le[i + j] = tmp.lo();
       carry = tmp.hi();
     }
   }
@@ -609,22 +636,23 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
   return *this;
 }
 
-/// Expands the given little endian array of uint64_t into a big endian array of
+/// Expands the given native endian array of uint64_t into a big endian array of
 /// uint32_t. The value of input array is expected to be non-negative. The result_array
 /// will remove leading zeros from the input array.
-/// \param value_array a little endian array to represent the value
+/// \param value_array a native endian array to represent the value
 /// \param result_array a big endian array of length N*2 to set with the value
 /// \result the output length of the array
 template <size_t N>
 static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
                            uint32_t* result_array) {
+  const auto value_array_le = BitUtil::LittleEndianArray::Make(value_array);
   int64_t next_index = 0;
   // 1st loop to find out 1st non-negative value in input
   int64_t i = N - 1;
   for (; i >= 0; i--) {
-    if (value_array[i] != 0) {
-      if (value_array[i] <= std::numeric_limits<uint32_t>::max()) {
-        result_array[next_index++] = static_cast<uint32_t>(value_array[i]);
+    if (value_array_le[i] != 0) {
+      if (value_array_le[i] <= std::numeric_limits<uint32_t>::max()) {
+        result_array[next_index++] = static_cast<uint32_t>(value_array_le[i]);
         i--;
       }
       break;
@@ -632,8 +660,8 @@ static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
   }
   // 2nd loop to fill in the rest of the array.
   for (int64_t j = i; j >= 0; j--) {
-    result_array[next_index++] = static_cast<uint32_t>(value_array[j] >> 32);
-    result_array[next_index++] = static_cast<uint32_t>(value_array[j]);
+    result_array[next_index++] = static_cast<uint32_t>(value_array_le[j] >> 32);
+    result_array[next_index++] = static_cast<uint32_t>(value_array_le[j]);
   }
   return next_index;
 }
@@ -699,7 +727,7 @@ static int64_t FillInArray(const BasicDecimal256& value, uint32_t* array,
     positive_value.Negate();
     was_negative = true;
   }
-  return FillInArray<4>(positive_value.little_endian_array(), array);
+  return FillInArray<4>(positive_value.native_endian_array(), array);
 }
 
 /// Shift the number in the array left by bits positions.
@@ -743,7 +771,7 @@ static inline void FixDivisionSigns(DecimalClass* result, DecimalClass* remainde
   }
 }
 
-/// \brief Build a little endian array of uint64_t from a big endian array of uint32_t.
+/// \brief Build a native endian array of uint64_t from a big endian array of uint32_t.
 template <size_t N>
 static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
                                     const uint32_t* array, int64_t length) {
@@ -754,15 +782,16 @@ static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
   }
   int64_t next_index = length - 1;
   size_t i = 0;
+  auto result_array_le = BitUtil::LittleEndianArray::Make(result_array);
   for (; i < N && next_index >= 0; i++) {
     uint64_t lower_bits = array[next_index--];
-    (*result_array)[i] =
+    result_array_le[i] =
         (next_index < 0)
             ? lower_bits
             : ((static_cast<uint64_t>(array[next_index--]) << 32) + lower_bits);
   }
   for (; i < N; i++) {
-    (*result_array)[i] = 0;
+    result_array_le[i] = 0;
   }
   return DecimalStatus::kSuccess;
 }
@@ -775,7 +804,8 @@ static DecimalStatus BuildFromArray(BasicDecimal128* value, const uint32_t* arra
   if (status != DecimalStatus::kSuccess) {
     return status;
   }
-  *value = {static_cast<int64_t>(result_array[1]), result_array[0]};
+  const auto result_array_le = BitUtil::LittleEndianArray::Make(result_array);
+  *value = {static_cast<int64_t>(result_array_le[1]), result_array_le[0]};
   return DecimalStatus::kSuccess;
 }
 
@@ -1117,25 +1147,17 @@ int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
   }
 }
 
-#if ARROW_LITTLE_ENDIAN
-BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
-    : little_endian_array_(
-          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[0],
-                                   reinterpret_cast<const uint64_t*>(bytes)[1],
-                                   reinterpret_cast<const uint64_t*>(bytes)[2],
-                                   reinterpret_cast<const uint64_t*>(bytes)[3]})) {}
-#else
 BasicDecimal256::BasicDecimal256(const uint8_t* bytes)
-    : little_endian_array_(
-          std::array<uint64_t, 4>({reinterpret_cast<const uint64_t*>(bytes)[3],
-                                   reinterpret_cast<const uint64_t*>(bytes)[2],
-                                   reinterpret_cast<const uint64_t*>(bytes)[1],
-                                   reinterpret_cast<const uint64_t*>(bytes)[0]})) {}
-#endif
+    : array_({reinterpret_cast<const uint64_t*>(bytes)[0],
+              reinterpret_cast<const uint64_t*>(bytes)[1],
+              reinterpret_cast<const uint64_t*>(bytes)[2],
+              reinterpret_cast<const uint64_t*>(bytes)[3]}) {}
 
 BasicDecimal256& BasicDecimal256::Negate() {
+  auto array_le = BitUtil::LittleEndianArray::Make(&array_);
   uint64_t carry = 1;
-  for (uint64_t& elem : little_endian_array_) {
+  for (size_t i = 0; i < array_.size(); ++i) {
+    uint64_t& elem = array_le[i];
     elem = ~elem + carry;
     carry &= (elem == 0);
   }
@@ -1150,19 +1172,21 @@ BasicDecimal256 BasicDecimal256::Abs(const BasicDecimal256& in) {
 }
 
 BasicDecimal256& BasicDecimal256::operator+=(const BasicDecimal256& right) {
+  auto array_le = BitUtil::LittleEndianArray::Make(&array_);
+  const auto right_array_le = BitUtil::LittleEndianArray::Make(right.array_);
   uint64_t carry = 0;
-  for (size_t i = 0; i < little_endian_array_.size(); i++) {
-    const uint64_t right_value = right.little_endian_array_[i];
+  for (size_t i = 0; i < array_.size(); i++) {
+    const uint64_t right_value = right_array_le[i];
     uint64_t sum = right_value + carry;
     carry = 0;
     if (sum < right_value) {
       carry += 1;
     }
-    sum += little_endian_array_[i];
-    if (sum < little_endian_array_[i]) {
+    sum += array_le[i];
+    if (sum < array_le[i]) {
       carry += 1;
     }
-    little_endian_array_[i] = sum;
+    array_le[i] = sum;
   }
   return *this;
 }
@@ -1177,23 +1201,22 @@ BasicDecimal256& BasicDecimal256::operator<<=(uint32_t bits) {
     return *this;
   }
   int cross_word_shift = bits / 64;
-  if (static_cast<size_t>(cross_word_shift) >= little_endian_array_.size()) {
-    little_endian_array_ = {0, 0, 0, 0};
+  if (static_cast<size_t>(cross_word_shift) >= array_.size()) {
+    array_ = {0, 0, 0, 0};
     return *this;
   }
   uint32_t in_word_shift = bits % 64;
-  for (int i = static_cast<int>(little_endian_array_.size() - 1); i >= cross_word_shift;
-       i--) {
+  auto array_le = BitUtil::LittleEndianArray::Make(&array_);
+  for (int i = static_cast<int>(array_.size() - 1); i >= cross_word_shift; i--) {
     // Account for shifts larger then 64 bits
-    little_endian_array_[i] = little_endian_array_[i - cross_word_shift];
-    little_endian_array_[i] <<= in_word_shift;
+    array_le[i] = array_le[i - cross_word_shift];
+    array_le[i] <<= in_word_shift;
     if (in_word_shift != 0 && i >= cross_word_shift + 1) {
-      little_endian_array_[i] |=
-          little_endian_array_[i - (cross_word_shift + 1)] >> (64 - in_word_shift);
+      array_le[i] |= array_le[i - (cross_word_shift + 1)] >> (64 - in_word_shift);
     }
   }
   for (int i = cross_word_shift - 1; i >= 0; i--) {
-    little_endian_array_[i] = 0;
+    array_le[i] = 0;
   }
   return *this;
 }
@@ -1206,17 +1229,10 @@ std::array<uint8_t, 32> BasicDecimal256::ToBytes() const {
 
 void BasicDecimal256::ToBytes(uint8_t* out) const {
   DCHECK_NE(out, nullptr);
-#if ARROW_LITTLE_ENDIAN
-  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[0];
-  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[1];
-  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[2];
-  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[3];
-#else
-  reinterpret_cast<int64_t*>(out)[0] = little_endian_array_[3];
-  reinterpret_cast<int64_t*>(out)[1] = little_endian_array_[2];
-  reinterpret_cast<int64_t*>(out)[2] = little_endian_array_[1];
-  reinterpret_cast<int64_t*>(out)[3] = little_endian_array_[0];
-#endif
+  reinterpret_cast<uint64_t*>(out)[0] = array_[0];
+  reinterpret_cast<uint64_t*>(out)[1] = array_[1];
+  reinterpret_cast<uint64_t*>(out)[2] = array_[2];
+  reinterpret_cast<uint64_t*>(out)[3] = array_[3];
 }
 
 BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
@@ -1229,8 +1245,8 @@ BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
   uint128_t r_hi;
   uint128_t r_lo;
   std::array<uint64_t, 4> res{0, 0, 0, 0};
-  MultiplyUnsignedArray<4>(x.little_endian_array_, y.little_endian_array_, &res);
-  little_endian_array_ = res;
+  MultiplyUnsignedArray<4>(x.array_, y.array_, &res);
+  array_ = res;
   if (negate) {
     Negate();
   }
@@ -1301,12 +1317,13 @@ BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& ri
 }
 
 bool operator<(const BasicDecimal256& left, const BasicDecimal256& right) {
-  const std::array<uint64_t, 4>& lhs = left.little_endian_array();
-  const std::array<uint64_t, 4>& rhs = right.little_endian_array();
-  return lhs[3] != rhs[3]
-             ? static_cast<int64_t>(lhs[3]) < static_cast<int64_t>(rhs[3])
-             : lhs[2] != rhs[2] ? lhs[2] < rhs[2]
-                                : lhs[1] != rhs[1] ? lhs[1] < rhs[1] : lhs[0] < rhs[0];
+  const auto lhs_le = BitUtil::LittleEndianArray::Make(left.native_endian_array());
+  const auto rhs_le = BitUtil::LittleEndianArray::Make(right.native_endian_array());
+  return lhs_le[3] != rhs_le[3]
+             ? static_cast<int64_t>(lhs_le[3]) < static_cast<int64_t>(rhs_le[3])
+             : lhs_le[2] != rhs_le[2] ? lhs_le[2] < rhs_le[2]
+                                      : lhs_le[1] != rhs_le[1] ? lhs_le[1] < rhs_le[1]
+                                                               : lhs_le[0] < rhs_le[0];
 }
 
 BasicDecimal256 operator-(const BasicDecimal256& operand) {
@@ -1315,7 +1332,7 @@ BasicDecimal256 operator-(const BasicDecimal256& operand) {
 }
 
 BasicDecimal256 operator~(const BasicDecimal256& operand) {
-  const std::array<uint64_t, 4>& arr = operand.little_endian_array();
+  const std::array<uint64_t, 4>& arr = operand.native_endian_array();
   BasicDecimal256 result({~arr[0], ~arr[1], ~arr[2], ~arr[3]});
   return result;
 }
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index acc8ea4930f..a808396090a 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -23,6 +23,7 @@
 #include <string>
 #include <type_traits>
 
+#include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/type_traits.h"
 #include "arrow/util/visibility.h"
@@ -45,8 +46,13 @@ class ARROW_EXPORT BasicDecimal128 {
   static constexpr int bit_width = 128;
 
   /// \brief Create a BasicDecimal128 from the two's complement representation.
+#if ARROW_LITTLE_ENDIAN
   constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
       : low_bits_(low), high_bits_(high) {}
+#else
+  constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
+      : high_bits_(high), low_bits_(low) {}
+#endif
 
   /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
   constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
@@ -156,8 +162,13 @@ class ARROW_EXPORT BasicDecimal128 {
   static const BasicDecimal128& GetMaxValue();
 
  private:
+#if ARROW_LITTLE_ENDIAN
   uint64_t low_bits_;
   int64_t high_bits_;
+#else
+  int64_t high_bits_;
+  uint64_t low_bits_;
+#endif
 };
 
 ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
@@ -193,23 +204,26 @@ class ARROW_EXPORT BasicDecimal256 {
   static constexpr int bit_width = 256;
 
   /// \brief Create a BasicDecimal256 from the two's complement representation.
-  constexpr BasicDecimal256(const std::array<uint64_t, 4>& little_endian_array) noexcept
-      : little_endian_array_(little_endian_array) {}
+  /// Input array is assumed to be in native endianness.
+  constexpr BasicDecimal256(const std::array<uint64_t, 4>& array) noexcept
+      : array_(array) {}
 
   /// \brief Empty constructor creates a BasicDecimal256 with a value of 0.
-  constexpr BasicDecimal256() noexcept : little_endian_array_({0, 0, 0, 0}) {}
+  constexpr BasicDecimal256() noexcept : array_({0, 0, 0, 0}) {}
 
   /// \brief Convert any integer value into a BasicDecimal256.
   template <typename T,
             typename = typename std::enable_if<
                 std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
   constexpr BasicDecimal256(T value) noexcept
-      : little_endian_array_({static_cast<uint64_t>(value), extend(value), extend(value),
-                              extend(value)}) {}
+      : array_(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(
+            {static_cast<uint64_t>(value), extend(value), extend(value),
+             extend(value)})) {}
 
-  constexpr BasicDecimal256(const BasicDecimal128& value) noexcept
-      : little_endian_array_({value.low_bits(), static_cast<uint64_t>(value.high_bits()),
-                              extend(value.high_bits()), extend(value.high_bits())}) {}
+  explicit BasicDecimal256(const BasicDecimal128& value) noexcept
+      : array_(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(
+            {value.low_bits(), static_cast<uint64_t>(value.high_bits()),
+             extend(value.high_bits()), extend(value.high_bits())})) {}
 
   /// \brief Create a BasicDecimal256 from an array of bytes. Bytes are assumed to be in
   /// native-endian byte order.
@@ -231,17 +245,15 @@ class ARROW_EXPORT BasicDecimal256 {
   BasicDecimal256& operator-=(const BasicDecimal256& right);
 
   /// \brief Get the bits of the two's complement representation of the number. The 4
-  /// elements are in little endian order. The bits within each uint64_t element are in
-  /// native endian order. For example,
-  /// BasicDecimal256(123).little_endian_array() = {123, 0, 0, 0};
-  /// BasicDecimal256(-2).little_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
+  /// elements are in native endian order. The bits within each uint64_t element are in
+  /// native endian order. For example, on a little endian machine,
+  /// BasicDecimal256(123).native_endian_array() = {123, 0, 0, 0};
+  /// BasicDecimal256(-2).native_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
   /// 0xFF...FF}.
-  inline const std::array<uint64_t, 4>& little_endian_array() const {
-    return little_endian_array_;
-  }
+  inline const std::array<uint64_t, 4>& native_endian_array() const { return array_; }
 
   /// \brief Get the lowest bits of the two's complement representation of the number.
-  inline constexpr uint64_t low_bits() const { return little_endian_array_[0]; }
+  inline uint64_t low_bits() const { return BitUtil::LittleEndianArray::Make(array_)[0]; }
 
   /// \brief Return the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, 32> ToBytes() const;
@@ -270,11 +282,11 @@ class ARROW_EXPORT BasicDecimal256 {
   bool FitsInPrecision(int32_t precision) const;
 
   inline int64_t Sign() const {
-    return 1 | (static_cast<int64_t>(little_endian_array_[3]) >> 63);
+    return 1 | (static_cast<int64_t>(BitUtil::LittleEndianArray::Make(array_)[3]) >> 63);
   }
 
   inline int64_t IsNegative() const {
-    return static_cast<int64_t>(little_endian_array_[3]) < 0;
+    return static_cast<int64_t>(BitUtil::LittleEndianArray::Make(array_)[3]) < 0;
   }
 
   /// \brief Multiply this number by another number. The result is truncated to 256 bits.
@@ -301,17 +313,17 @@ class ARROW_EXPORT BasicDecimal256 {
   BasicDecimal256& operator/=(const BasicDecimal256& right);
 
  private:
-  std::array<uint64_t, 4> little_endian_array_;
+  std::array<uint64_t, 4> array_;
 };
 
 ARROW_EXPORT inline bool operator==(const BasicDecimal256& left,
                                     const BasicDecimal256& right) {
-  return left.little_endian_array() == right.little_endian_array();
+  return left.native_endian_array() == right.native_endian_array();
 }
 
 ARROW_EXPORT inline bool operator!=(const BasicDecimal256& left,
                                     const BasicDecimal256& right) {
-  return left.little_endian_array() != right.little_endian_array();
+  return left.native_endian_array() != right.native_endian_array();
 }
 
 ARROW_EXPORT bool operator<(const BasicDecimal256& left, const BasicDecimal256& right);
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 7aefd1ab9cd..66deb97cc96 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -671,13 +671,15 @@ Decimal256::Decimal256(const std::string& str) : Decimal256() {
 
 std::string Decimal256::ToIntegerString() const {
   std::string result;
-  if (static_cast<int64_t>(little_endian_array()[3]) < 0) {
+  if (IsNegative()) {
     result.push_back('-');
     Decimal256 abs = *this;
     abs.Negate();
-    AppendLittleEndianArrayToString(abs.little_endian_array(), &result);
+    AppendLittleEndianArrayToString(
+        BitUtil::LittleEndianArray::FromNative(abs.native_endian_array()), &result);
   } else {
-    AppendLittleEndianArrayToString(little_endian_array(), &result);
+    AppendLittleEndianArrayToString(
+        BitUtil::LittleEndianArray::FromNative(native_endian_array()), &result);
   }
   return result;
 }
@@ -725,7 +727,7 @@ Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
     ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
     ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
                 little_endian_array.size());
-    *out = Decimal256(little_endian_array);
+    *out = Decimal256(BitUtil::LittleEndianArray::ToNative(little_endian_array));
 
     if (dec.sign == '-') {
       out->Negate();
@@ -798,7 +800,7 @@ Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t lengt
     length -= word_length;
   }
 
-  return Decimal256(little_endian_array);
+  return Decimal256(BitUtil::LittleEndianArray::ToNative(little_endian_array));
 }
 
 Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
@@ -841,9 +843,9 @@ struct Decimal256RealConversion {
     DCHECK_LT(part1, 1.8446744073709552e+19);  // 2**64
     DCHECK_GE(part0, 0);
     DCHECK_LT(part0, 1.8446744073709552e+19);  // 2**64
-    return Decimal256(std::array<uint64_t, 4>{
-        static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
-        static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)});
+    return Decimal256(BitUtil::LittleEndianArray::ToNative<uint64_t, 4>(
+        {static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
+         static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)}));
   }
 
   static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) {
@@ -865,11 +867,11 @@ struct Decimal256RealConversion {
   static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
     DCHECK_GE(decimal, 0);
     Real x = 0;
-    const auto& parts = decimal.little_endian_array();
-    x += Derived::two_to_192(static_cast<Real>(parts[3]));
-    x += Derived::two_to_128(static_cast<Real>(parts[2]));
-    x += Derived::two_to_64(static_cast<Real>(parts[1]));
-    x += static_cast<Real>(parts[0]);
+    const auto parts_le = BitUtil::LittleEndianArray::Make(decimal.native_endian_array());
+    x += Derived::two_to_192(static_cast<Real>(parts_le[3]));
+    x += Derived::two_to_128(static_cast<Real>(parts_le[2]));
+    x += Derived::two_to_64(static_cast<Real>(parts_le[1]));
+    x += static_cast<Real>(parts_le[0]);
     if (scale >= -76 && scale <= 76) {
       x *= Derived::powers_of_ten()[-scale + 76];
     } else {
@@ -879,7 +881,7 @@ struct Decimal256RealConversion {
   }
 
   static Real ToReal(Decimal256 decimal, int32_t scale) {
-    if (decimal.little_endian_array()[3] & (1ULL << 63)) {
+    if (decimal.IsNegative()) {
       // Convert the absolute value to avoid precision loss
       decimal.Negate();
       return -ToRealPositive(decimal, scale);
diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc
index fdcbd945e66..29964e0b9e2 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -1281,9 +1281,11 @@ TEST(Decimal256Test, TestComparators) {
   constexpr size_t num_values =
       sizeof(kSortedDecimal256Bits) / sizeof(kSortedDecimal256Bits[0]);
   for (size_t i = 0; i < num_values; ++i) {
-    Decimal256 left(kSortedDecimal256Bits[i]);
+    Decimal256 left(
+        ::arrow::BitUtil::LittleEndianArray::ToNative(kSortedDecimal256Bits[i]));
     for (size_t j = 0; j < num_values; ++j) {
-      Decimal256 right(kSortedDecimal256Bits[j]);
+      Decimal256 right(
+          ::arrow::BitUtil::LittleEndianArray::ToNative(kSortedDecimal256Bits[j]));
       EXPECT_EQ(i == j, left == right);
       EXPECT_EQ(i != j, left != right);
       EXPECT_EQ(i < j, left < right);
@@ -1296,7 +1298,7 @@ TEST(Decimal256Test, TestComparators) {
 
 TEST(Decimal256Test, TestToBytesRoundTrip) {
   for (const std::array<uint64_t, 4>& bits : kSortedDecimal256Bits) {
-    Decimal256 decimal(bits);
+    Decimal256 decimal(::arrow::BitUtil::LittleEndianArray::ToNative(bits));
     EXPECT_EQ(decimal, Decimal256(decimal.ToBytes().data()));
   }
 }
@@ -1318,18 +1320,21 @@ TYPED_TEST_SUITE(Decimal256Test, Decimal256Types);
 TYPED_TEST(Decimal256Test, ConstructibleFromAnyIntegerType) {
   using UInt64Array = std::array<uint64_t, 4>;
   Decimal256 value(TypeParam{42});
-  EXPECT_EQ(UInt64Array({42, 0, 0, 0}), value.little_endian_array());
+  EXPECT_EQ(UInt64Array({42, 0, 0, 0}),
+            ::arrow::BitUtil::LittleEndianArray::FromNative(value.native_endian_array()));
 
   TypeParam max = std::numeric_limits<TypeParam>::max();
   Decimal256 max_value(max);
-  EXPECT_EQ(UInt64Array({static_cast<uint64_t>(max), 0, 0, 0}),
-            max_value.little_endian_array());
+  EXPECT_EQ(
+      UInt64Array({static_cast<uint64_t>(max), 0, 0, 0}),
+      ::arrow::BitUtil::LittleEndianArray::FromNative(max_value.native_endian_array()));
 
   TypeParam min = std::numeric_limits<TypeParam>::min();
   Decimal256 min_value(min);
   uint64_t high_bits = std::is_signed<TypeParam>::value ? ~uint64_t{0} : uint64_t{0};
-  EXPECT_EQ(UInt64Array({static_cast<uint64_t>(min), high_bits, high_bits, high_bits}),
-            min_value.little_endian_array());
+  EXPECT_EQ(
+      UInt64Array({static_cast<uint64_t>(min), high_bits, high_bits, high_bits}),
+      ::arrow::BitUtil::LittleEndianArray::FromNative(min_value.native_endian_array()));
 }
 
 TEST(Decimal256Test, ConstructibleFromBool) {
@@ -1432,12 +1437,12 @@ TEST(Decimal256Test, Shift) {
     Decimal256 v("-12346789123456789123456789");
     v <<= 15;
     ASSERT_EQ(v, Decimal256("-404579585997432065997432061952"))
-        << std::hex << v.little_endian_array()[0] << " " << v.little_endian_array()[1]
-        << " " << v.little_endian_array()[2] << " " << v.little_endian_array()[3] << "\n"
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[0] << " "
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[1] << " "
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[2] << " "
-        << Decimal256("-404579585997432065997432061952").little_endian_array()[3];
+        << std::hex << v.native_endian_array()[0] << " " << v.native_endian_array()[1]
+        << " " << v.native_endian_array()[2] << " " << v.native_endian_array()[3] << "\n"
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[0] << " "
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[1] << " "
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[2] << " "
+        << Decimal256("-404579585997432065997432061952").native_endian_array()[3];
     v <<= 30;
     ASSERT_EQ(v, Decimal256("-434414022622047565860171081516421480448"));
     v <<= 66;
diff --git a/cpp/src/arrow/util/endian.h b/cpp/src/arrow/util/endian.h
index 0cb2e44d275..0fae454e0eb 100644
--- a/cpp/src/arrow/util/endian.h
+++ b/cpp/src/arrow/util/endian.h
@@ -52,6 +52,9 @@
 #define ARROW_BYTE_SWAP32 __builtin_bswap32
 #endif
 
+#include <algorithm>
+#include <array>
+
 #include "arrow/util/type_traits.h"
 #include "arrow/util/ubsan.h"
 
@@ -177,5 +180,66 @@ static inline T FromLittleEndian(T value) {
 }
 #endif
 
+// Handle endianness in *word* granuality (keep individual array element untouched)
+namespace LittleEndianArray {
+
+namespace detail {
+
+// Read a native endian array as little endian
+template <typename T, size_t N>
+struct Reader {
+  const std::array<T, N>& native_array;
+
+  explicit Reader(const std::array<T, N>& native_array) : native_array(native_array) {}
+
+  const T& operator[](size_t i) const {
+    return native_array[ARROW_LITTLE_ENDIAN ? i : N - 1 - i];
+  }
+};
+
+// Read/write a native endian array as little endian
+template <typename T, size_t N>
+struct Writer {
+  std::array<T, N>* native_array;
+
+  explicit Writer(std::array<T, N>* native_array) : native_array(native_array) {}
+
+  const T& operator[](size_t i) const {
+    return (*native_array)[ARROW_LITTLE_ENDIAN ? i : N - 1 - i];
+  }
+  T& operator[](size_t i) { return (*native_array)[ARROW_LITTLE_ENDIAN ? i : N - 1 - i]; }
+};
+
+}  // namespace detail
+
+// Construct array reader and try to deduce template augments
+template <typename T, size_t N>
+static inline detail::Reader<T, N> Make(const std::array<T, N>& native_array) {
+  return detail::Reader<T, N>(native_array);
+}
+
+// Construct array writer and try to deduce template augments
+template <typename T, size_t N>
+static inline detail::Writer<T, N> Make(std::array<T, N>* native_array) {
+  return detail::Writer<T, N>(native_array);
+}
+
+// Convert little endian array to native endian
+template <typename T, size_t N>
+static inline std::array<T, N> ToNative(std::array<T, N> array) {
+  if (!ARROW_LITTLE_ENDIAN) {
+    std::reverse(array.begin(), array.end());
+  }
+  return array;
+}
+
+// Convert native endian array to little endian
+template <typename T, size_t N>
+static inline std::array<T, N> FromNative(std::array<T, N> array) {
+  return ToNative(array);
+}
+
+}  // namespace LittleEndianArray
+
 }  // namespace BitUtil
 }  // namespace arrow

From 3f592a0a24432d3eee9f66081714e7cf4f9928f5 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 22 Jul 2021 19:19:48 +0200
Subject: [PATCH 628/719] ARROW-13424: [C++] Remove needless workaround for
 conda and benchmark

https://github.com/google/benchmark/issues/1046 has been resolved.

Closes #10771 from kou/cpp-conda-remove-workaround-for-benchmark

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/conda_env_cpp.txt                        |  2 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 12 ------------
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
index def40deb98a..2b5fc32d711 100644
--- a/ci/conda_env_cpp.txt
+++ b/ci/conda_env_cpp.txt
@@ -17,7 +17,7 @@
 
 # workaround for https://issues.apache.org/jira/browse/ARROW-13134
 aws-sdk-cpp<1.9
-benchmark=1.5.2
+benchmark>=1.5.4
 boost-cpp>=1.68.0
 brotli
 bzip2
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index e6852d9c210..0631d277b08 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1837,18 +1837,6 @@ endmacro()
 if(ARROW_BUILD_BENCHMARKS)
   # ArgsProduct() is available since 1.5.2
   set(BENCHMARK_REQUIRED_VERSION 1.5.2)
-  if("${ARROW_DEPENDENCY_SOURCE}" STREQUAL "CONDA" AND "${benchmark_SOURCE}" STREQUAL
-                                                       "SYSTEM")
-    # TODO: Remove this workaround once
-    # https://github.com/google/benchmark/issues/1046 is resolved.
-    #
-    # benchmark doesn't set suitable version when we use released
-    # archive. So the benchmark package on conda-forge isn't report
-    # the real version. We accept all the benchmark package with
-    # conda. Conda users should install benchmark 1.5.2 or later by
-    # ci/conda_env_cpp.txt.
-    set(BENCHMARK_REQUIRED_VERSION 0.0.0)
-  endif()
   resolve_dependency(benchmark
                      REQUIRED_VERSION
                      ${BENCHMARK_REQUIRED_VERSION}

From e0401123736c85283e527797a113a3c38c0915f2 Mon Sep 17 00:00:00 2001
From: michalursa <michal@ursacomputing.com>
Date: Fri, 23 Jul 2021 09:00:59 -0400
Subject: [PATCH 629/719] ARROW-12759: [C++][Compute] Add ExecNode for group by

Adding ExecNode for grouped aggregation.

Closes #10660 from michalursa/ARROW-12759-execnode-for-groupby

Lead-authored-by: michalursa <michal@ursacomputing.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .../arrow/compute_register_example.cc         |  10 +-
 cpp/src/arrow/CMakeLists.txt                  |   1 +
 cpp/src/arrow/array/builder_base.h            |   3 +-
 cpp/src/arrow/buffer.h                        |   5 +-
 cpp/src/arrow/compute/api_aggregate.h         |   2 +-
 cpp/src/arrow/compute/api_vector.h            |   3 +-
 cpp/src/arrow/compute/exec.cc                 |   7 +
 cpp/src/arrow/compute/exec.h                  |   2 +
 cpp/src/arrow/compute/exec/exec_plan.cc       | 612 ++++++++++++++++--
 cpp/src/arrow/compute/exec/exec_plan.h        |  18 +
 cpp/src/arrow/compute/exec/plan_test.cc       | 110 +++-
 cpp/src/arrow/compute/exec/test_util.cc       |   6 +-
 cpp/src/arrow/compute/kernel.h                |  23 +-
 .../kernels/aggregate_basic_internal.h        |   6 +-
 .../arrow/compute/kernels/hash_aggregate.cc   | 600 +++++++++--------
 .../compute/kernels/hash_aggregate_test.cc    | 234 +++++--
 cpp/src/arrow/dataset/scanner.cc              |  90 +--
 cpp/src/arrow/dataset/scanner_test.cc         | 216 ++++++-
 cpp/src/arrow/util/future.cc                  |  23 +-
 cpp/src/arrow/util/unreachable.cc             |  29 +
 cpp/src/arrow/util/unreachable.h              |  22 +
 21 files changed, 1531 insertions(+), 491 deletions(-)
 create mode 100644 cpp/src/arrow/util/unreachable.cc
 create mode 100644 cpp/src/arrow/util/unreachable.h

diff --git a/cpp/examples/arrow/compute_register_example.cc b/cpp/examples/arrow/compute_register_example.cc
index d1a1372b82a..3c20a3d2a87 100644
--- a/cpp/examples/arrow/compute_register_example.cc
+++ b/cpp/examples/arrow/compute_register_example.cc
@@ -46,7 +46,7 @@ class ExampleFunctionOptionsType : public cp::FunctionOptionsType {
   }
   // optional: support for serialization
   // Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const override;
-  // Result<std::unique_ptr<FunctionOptions>> Deserialize(const Buffer& buffer) const override;
+  // Result<std::unique_ptr<FunctionOptions>> Deserialize(const Buffer&) const override;
 };
 
 cp::FunctionOptionsType* GetExampleFunctionOptionsType() {
@@ -74,8 +74,8 @@ const cp::FunctionDoc func_doc{
 int main(int argc, char** argv) {
   const std::string name = "compute_register_example";
   auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(), &func_doc);
-  func->AddKernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
-                  ExampleFunctionImpl);
+  ABORT_ON_FAILURE(func->AddKernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
+                                   ExampleFunctionImpl));
 
   auto registry = cp::GetFunctionRegistry();
   ABORT_ON_FAILURE(registry->AddFunction(std::move(func)));
@@ -90,8 +90,8 @@ int main(int argc, char** argv) {
 
   std::cout << maybe_result->make_array()->ToString() << std::endl;
 
-  // Expression serialization will raise NotImplemented if an expression includes FunctionOptions
-  // for which serialization is not supported.
+  // Expression serialization will raise NotImplemented if an expression includes
+  // FunctionOptions for which serialization is not supported.
   auto expr = cp::call(name, {}, options);
   auto maybe_serialized = cp::Serialize(expr);
   std::cerr << maybe_serialized.status().ToString() << std::endl;
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d2f80ce7213..cb6e91bd40e 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -218,6 +218,7 @@ set(ARROW_SRCS
     util/thread_pool.cc
     util/time.cc
     util/trie.cc
+    util/unreachable.cc
     util/uri.cc
     util/utf8.cc
     util/value_parsing.cc
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 905b3c1b491..c2aba4e959f 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -50,9 +50,10 @@ class ARROW_EXPORT ArrayBuilder {
  public:
   explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
 
-  virtual ~ArrayBuilder() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
 
+  virtual ~ArrayBuilder() = default;
+
   /// For nested types. Since the objects are owned by this class instance, we
   /// skip shared pointers and just return a raw pointer
   ArrayBuilder* child(int i) { return children_[i].get(); }
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 6c47a464b1d..cfd525ab2d6 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -416,7 +416,10 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
   ///
   /// @param new_size The new size for the buffer.
   /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
-  virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
+  virtual Status Resize(const int64_t new_size, bool shrink_to_fit) = 0;
+  Status Resize(const int64_t new_size) {
+    return Resize(new_size, /*shrink_to_fit=*/true);
+  }
 
   /// Ensure that buffer has enough memory allocated to fit the indicated
   /// capacity (and meets the 64 byte padding requirement in Layout.md).
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 7a6c44bd923..d66d4f1517c 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -425,7 +425,7 @@ struct ARROW_EXPORT Aggregate {
 /// This will be replaced by streaming execution operators.
 ARROW_EXPORT
 Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
-                      const std::vector<Aggregate>& aggregates,
+                      const std::vector<Aggregate>& aggregates, bool use_threads = false,
                       ExecContext* ctx = default_exec_context());
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 9d8d4271db8..32439980f54 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <memory>
+#include <utility>
 
 #include "arrow/compute/function.h"
 #include "arrow/datum.h"
@@ -87,7 +88,7 @@ enum class SortOrder {
 class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
  public:
   explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
-      : name(name), order(order) {}
+      : name(std::move(name)), order(order) {}
 
   using util::EqualityComparable<SortKey>::Equals;
   using util::EqualityComparable<SortKey>::operator==;
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 63f8d39f551..2a32c96ed3b 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -21,6 +21,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -102,6 +103,12 @@ void PrintTo(const ExecBatch& batch, std::ostream* os) {
   }
 }
 
+std::string ExecBatch::ToString() const {
+  std::stringstream ss;
+  PrintTo(*this, &ss);
+  return ss.str();
+}
+
 ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
   ExecBatch out = *this;
   for (auto& value : out.values) {
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index de1b695de48..1b70ee244cb 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -233,6 +233,8 @@ struct ARROW_EXPORT ExecBatch {
     return result;
   }
 
+  std::string ToString() const;
+
   ARROW_EXPORT friend void PrintTo(const ExecBatch&, std::ostream*);
 };
 
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index 433e895c243..20c8c347cc1 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -22,21 +22,29 @@
 #include <unordered_map>
 #include <unordered_set>
 
+#include "arrow/array/concatenate.h"
 #include "arrow/array/util.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec/expression.h"
+#include "arrow/compute/exec_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/async_generator.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/util/unreachable.h"
+#include "arrow/util/vector.h"
 
 namespace arrow {
 
+using BitUtil::CountLeadingZeros;
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
@@ -237,6 +245,15 @@ Status ExecNode::Validate() const {
   return Status::OK();
 }
 
+bool ExecNode::ErrorIfNotOk(Status status) {
+  if (status.ok()) return false;
+
+  for (auto out : outputs_) {
+    out->ErrorReceived(this, out == outputs_.back() ? std::move(status) : status);
+  }
+  return true;
+}
+
 struct SourceNode : ExecNode {
   SourceNode(ExecPlan* plan, std::string label, std::shared_ptr<Schema> output_schema,
              AsyncGenerator<util::optional<ExecBatch>> generator)
@@ -247,8 +264,7 @@ struct SourceNode : ExecNode {
   const char* kind_name() override { return "SourceNode"; }
 
   [[noreturn]] static void NoInputs() {
-    DCHECK(false) << "no inputs; this should never be called";
-    std::abort();
+    Unreachable("no inputs; this should never be called");
   }
   [[noreturn]] void InputReceived(ExecNode*, int, ExecBatch) override { NoInputs(); }
   [[noreturn]] void ErrorReceived(ExecNode*, Status) override { NoInputs(); }
@@ -377,10 +393,7 @@ struct FilterNode : ExecNode {
     DCHECK_EQ(input, inputs_[0]);
 
     auto maybe_filtered = DoFilter(std::move(batch));
-    if (!maybe_filtered.ok()) {
-      outputs_[0]->ErrorReceived(this, maybe_filtered.status());
-      return;
-    }
+    if (ErrorIfNotOk(maybe_filtered.status())) return;
 
     maybe_filtered->guarantee = batch.guarantee;
     outputs_[0]->InputReceived(this, seq, maybe_filtered.MoveValueUnsafe());
@@ -456,10 +469,7 @@ struct ProjectNode : ExecNode {
     DCHECK_EQ(input, inputs_[0]);
 
     auto maybe_projected = DoProject(std::move(batch));
-    if (!maybe_projected.ok()) {
-      outputs_[0]->ErrorReceived(this, maybe_projected.status());
-      return;
-    }
+    if (ErrorIfNotOk(maybe_projected.status())) return;
 
     maybe_projected->guarantee = batch.guarantee;
     outputs_[0]->InputReceived(this, seq, maybe_projected.MoveValueUnsafe());
@@ -519,6 +529,47 @@ Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
       input, std::move(label), schema(std::move(fields)), std::move(exprs));
 }
 
+class AtomicCounter {
+ public:
+  AtomicCounter() = default;
+
+  int count() const { return count_.load(); }
+
+  util::optional<int> total() const {
+    int total = total_.load();
+    if (total == -1) return {};
+    return total;
+  }
+
+  // return true if the counter is complete
+  bool Increment() {
+    DCHECK_NE(count_.load(), total_.load());
+    int count = count_.fetch_add(1) + 1;
+    if (count != total_.load()) return false;
+    return DoneOnce();
+  }
+
+  // return true if the counter is complete
+  bool SetTotal(int total) {
+    total_.store(total);
+    if (count_.load() != total) return false;
+    return DoneOnce();
+  }
+
+  // return true if the counter has not already been completed
+  bool Cancel() { return DoneOnce(); }
+
+ private:
+  // ensure there is only one true return from Increment(), SetTotal(), or Cancel()
+  bool DoneOnce() {
+    bool expected = false;
+    return complete_.compare_exchange_strong(expected, true);
+  }
+
+  std::atomic<int> count_{0}, total_{-1};
+  std::atomic<bool> complete_{false};
+};
+
 struct SinkNode : ExecNode {
   SinkNode(ExecNode* input, std::string label,
            AsyncGenerator<util::optional<ExecBatch>>* generator)
@@ -543,8 +594,7 @@ struct SinkNode : ExecNode {
 
   // sink nodes have no outputs from which to feel backpressure
   [[noreturn]] static void NoOutputs() {
-    DCHECK(false) << "no outputs; this should never be called";
-    std::abort();
+    Unreachable("no outputs; this should never be called");
   }
   [[noreturn]] void ResumeProducing(ExecNode* output) override { NoOutputs(); }
   [[noreturn]] void PauseProducing(ExecNode* output) override { NoOutputs(); }
@@ -560,37 +610,31 @@ struct SinkNode : ExecNode {
   void InputReceived(ExecNode* input, int seq_num, ExecBatch batch) override {
     DCHECK_EQ(input, inputs_[0]);
 
-    std::unique_lock<std::mutex> lock(mutex_);
-    if (finished_.is_finished()) return;
+    bool did_push = producer_.Push(std::move(batch));
+    if (!did_push) return;  // producer_ was Closed already
 
-    ++num_received_;
-    if (num_received_ == emit_stop_) {
-      lock.unlock();
-      producer_.Push(std::move(batch));
-      Finish();
-      return;
+    if (auto total = input_counter_.total()) {
+      DCHECK_LE(seq_num, *total);
     }
 
-    if (emit_stop_ != -1) {
-      DCHECK_LE(seq_num, emit_stop_);
+    if (input_counter_.Increment()) {
+      Finish();
     }
-
-    lock.unlock();
-    producer_.Push(std::move(batch));
   }
 
   void ErrorReceived(ExecNode* input, Status error) override {
     DCHECK_EQ(input, inputs_[0]);
+
     producer_.Push(std::move(error));
-    Finish();
+
+    if (input_counter_.Cancel()) {
+      Finish();
+    }
     inputs_[0]->StopProducing(this);
   }
 
   void InputFinished(ExecNode* input, int seq_stop) override {
-    std::unique_lock<std::mutex> lock(mutex_);
-    emit_stop_ = seq_stop;
-    if (num_received_ == emit_stop_) {
-      lock.unlock();
+    if (input_counter_.SetTotal(seq_stop)) {
       Finish();
     }
   }
@@ -602,10 +646,7 @@ struct SinkNode : ExecNode {
     }
   }
 
-  std::mutex mutex_;
-
-  int num_received_ = 0;
-  int emit_stop_ = -1;
+  AtomicCounter input_counter_;
   Future<> finished_ = Future<>::MakeFinished();
 
   PushGenerator<util::optional<ExecBatch>>::Producer producer_;
@@ -646,6 +687,34 @@ std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
   return out;
 }
 
+class ThreadIndexer {
+ public:
+  size_t operator()() {
+    auto id = std::this_thread::get_id();
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    const auto& id_index = *id_to_index_.emplace(id, id_to_index_.size()).first;
+
+    return Check(id_index.second);
+  }
+
+  static size_t Capacity() {
+    static size_t max_size = arrow::internal::ThreadPool::DefaultCapacity();
+    return max_size;
+  }
+
+ private:
+  size_t Check(size_t thread_index) {
+    DCHECK_LT(thread_index, Capacity()) << "thread index " << thread_index
+                                        << " is out of range [0, " << Capacity() << ")";
+
+    return thread_index;
+  }
+
+  std::mutex mutex_;
+  std::unordered_map<std::thread::id, size_t> id_to_index_;
+};
+
 struct ScalarAggregateNode : ExecNode {
   ScalarAggregateNode(ExecNode* input, std::string label,
                       std::shared_ptr<Schema> output_schema,
@@ -663,6 +732,7 @@ struct ScalarAggregateNode : ExecNode {
     for (size_t i = 0; i < kernels_.size(); ++i) {
       KernelContext batch_ctx{plan()->exec_context()};
       batch_ctx.SetState(states_[i][thread_index].get());
+
       ExecBatch single_column_batch{{batch.values[i]}, batch.length};
       RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch));
     }
@@ -672,24 +742,12 @@ struct ScalarAggregateNode : ExecNode {
   void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
     DCHECK_EQ(input, inputs_[0]);
 
-    std::unique_lock<std::mutex> lock(mutex_);
-    auto it =
-        thread_indices_.emplace(std::this_thread::get_id(), thread_indices_.size()).first;
-    auto thread_index = it->second;
-
-    lock.unlock();
+    auto thread_index = get_thread_index_();
 
-    Status st = DoConsume(std::move(batch), thread_index);
-    if (!st.ok()) {
-      outputs_[0]->ErrorReceived(this, std::move(st));
-      return;
-    }
+    if (ErrorIfNotOk(DoConsume(std::move(batch), thread_index))) return;
 
-    lock.lock();
-    ++num_received_;
-    st = MaybeFinish(&lock);
-    if (!st.ok()) {
-      outputs_[0]->ErrorReceived(this, std::move(st));
+    if (input_counter_.Increment()) {
+      ErrorIfNotOk(Finish());
     }
   }
 
@@ -698,14 +756,11 @@ struct ScalarAggregateNode : ExecNode {
     outputs_[0]->ErrorReceived(this, std::move(error));
   }
 
-  void InputFinished(ExecNode* input, int seq) override {
+  void InputFinished(ExecNode* input, int num_total) override {
     DCHECK_EQ(input, inputs_[0]);
-    std::unique_lock<std::mutex> lock(mutex_);
-    num_total_ = seq;
-    Status st = MaybeFinish(&lock);
 
-    if (!st.ok()) {
-      outputs_[0]->ErrorReceived(this, std::move(st));
+    if (input_counter_.SetTotal(num_total)) {
+      ErrorIfNotOk(Finish());
     }
   }
 
@@ -726,18 +781,16 @@ struct ScalarAggregateNode : ExecNode {
   }
 
   void StopProducing() override {
+    if (input_counter_.Cancel()) {
+      finished_.MarkFinished();
+    }
     inputs_[0]->StopProducing(this);
-    finished_.MarkFinished();
   }
 
   Future<> finished() override { return finished_; }
 
  private:
-  Status MaybeFinish(std::unique_lock<std::mutex>* lock) {
-    if (num_received_ != num_total_) return Status::OK();
-
-    if (states_.empty()) return Status::OK();
-
+  Status Finish() {
     ExecBatch batch{{}, 1};
     batch.values.resize(kernels_.size());
 
@@ -747,21 +800,19 @@ struct ScalarAggregateNode : ExecNode {
                                              kernels_[i], &ctx, std::move(states_[i])));
       RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i]));
     }
-    states_.clear();
-    lock->unlock();
-
-    outputs_[0]->InputReceived(this, 0, batch);
 
+    outputs_[0]->InputReceived(this, 0, std::move(batch));
     finished_.MarkFinished();
     return Status::OK();
   }
 
   Future<> finished_ = Future<>::MakeFinished();
   std::vector<const ScalarAggregateKernel*> kernels_;
+
   std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
-  std::unordered_map<std::thread::id, size_t> thread_indices_;
-  std::mutex mutex_;
-  int num_received_ = 0, num_total_ = -1;
+
+  ThreadIndexer get_thread_index_;
+  AtomicCounter input_counter_;
 };
 
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
@@ -797,7 +848,7 @@ Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
     }
 
     KernelContext kernel_ctx{exec_ctx};
-    states[i].resize(exec_ctx->executor() ? exec_ctx->executor()->GetCapacity() : 1);
+    states[i].resize(ThreadIndexer::Capacity());
     RETURN_NOT_OK(Kernel::InitAll(&kernel_ctx,
                                   KernelInitArgs{kernels[i],
                                                  {
@@ -819,5 +870,426 @@ Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
       std::move(states));
 }
 
+namespace internal {
+
+Result<std::vector<const HashAggregateKernel*>> GetKernels(
+    ExecContext* ctx, const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs);
+
+Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
+    const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
+    const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<ValueDescr>& in_descrs);
+
+Result<FieldVector> ResolveKernels(
+    const std::vector<internal::Aggregate>& aggregates,
+    const std::vector<const HashAggregateKernel*>& kernels,
+    const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
+    const std::vector<ValueDescr>& descrs);
+
+}  // namespace internal
+
+struct GroupByNode : ExecNode {
+  GroupByNode(ExecNode* input, std::string label, std::shared_ptr<Schema> output_schema,
+              ExecContext* ctx, const std::vector<int>&& key_field_ids,
+              const std::vector<int>&& agg_src_field_ids,
+              const std::vector<internal::Aggregate>&& aggs,
+              const std::vector<const HashAggregateKernel*>&& agg_kernels)
+      : ExecNode(input->plan(), std::move(label), {input}, {"groupby"},
+                 std::move(output_schema), /*num_outputs=*/1),
+        ctx_(ctx),
+        key_field_ids_(std::move(key_field_ids)),
+        agg_src_field_ids_(std::move(agg_src_field_ids)),
+        aggs_(std::move(aggs)),
+        agg_kernels_(std::move(agg_kernels)) {}
+
+  const char* kind_name() override { return "GroupByNode"; }
+
+  Status Consume(ExecBatch batch) {
+    size_t thread_index = get_thread_index_();
+    if (thread_index >= local_states_.size()) {
+      return Status::IndexError("thread index ", thread_index, " is out of range [0, ",
+                                local_states_.size(), ")");
+    }
+
+    auto state = &local_states_[thread_index];
+    RETURN_NOT_OK(InitLocalStateIfNeeded(state));
+
+    // Create a batch with key columns
+    std::vector<Datum> keys(key_field_ids_.size());
+    for (size_t i = 0; i < key_field_ids_.size(); ++i) {
+      keys[i] = batch.values[key_field_ids_[i]];
+    }
+    ARROW_ASSIGN_OR_RAISE(ExecBatch key_batch, ExecBatch::Make(keys));
+
+    // Create a batch with group ids
+    ARROW_ASSIGN_OR_RAISE(Datum id_batch, state->grouper->Consume(key_batch));
+
+    // Execute aggregate kernels
+    for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+      KernelContext kernel_ctx{ctx_};
+      kernel_ctx.SetState(state->agg_states[i].get());
+
+      ARROW_ASSIGN_OR_RAISE(
+          auto agg_batch,
+          ExecBatch::Make({batch.values[agg_src_field_ids_[i]], id_batch}));
+
+      RETURN_NOT_OK(agg_kernels_[i]->resize(&kernel_ctx, state->grouper->num_groups()));
+      RETURN_NOT_OK(agg_kernels_[i]->consume(&kernel_ctx, agg_batch));
+    }
+
+    return Status::OK();
+  }
+
+  Status Merge() {
+    ThreadLocalState* state0 = &local_states_[0];
+    for (size_t i = 1; i < local_states_.size(); ++i) {
+      ThreadLocalState* state = &local_states_[i];
+      if (!state->grouper) {
+        continue;
+      }
+
+      ARROW_ASSIGN_OR_RAISE(ExecBatch other_keys, state->grouper->GetUniques());
+      ARROW_ASSIGN_OR_RAISE(Datum transposition, state0->grouper->Consume(other_keys));
+      state->grouper.reset();
+
+      for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+        KernelContext batch_ctx{ctx_};
+        DCHECK(state0->agg_states[i]);
+        batch_ctx.SetState(state0->agg_states[i].get());
+
+        RETURN_NOT_OK(agg_kernels_[i]->resize(&batch_ctx, state0->grouper->num_groups()));
+        RETURN_NOT_OK(agg_kernels_[i]->merge(&batch_ctx, std::move(*state->agg_states[i]),
+                                             *transposition.array()));
+        state->agg_states[i].reset();
+      }
+    }
+    return Status::OK();
+  }
+
+  Result<ExecBatch> Finalize() {
+    ThreadLocalState* state = &local_states_[0];
+
+    ExecBatch out_data{{}, state->grouper->num_groups()};
+    out_data.values.resize(agg_kernels_.size() + key_field_ids_.size());
+
+    // Aggregate fields come before key fields to match the behavior of GroupBy function
+    for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+      KernelContext batch_ctx{ctx_};
+      batch_ctx.SetState(state->agg_states[i].get());
+      RETURN_NOT_OK(agg_kernels_[i]->finalize(&batch_ctx, &out_data.values[i]));
+      state->agg_states[i].reset();
+    }
+
+    ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, state->grouper->GetUniques());
+    std::move(out_keys.values.begin(), out_keys.values.end(),
+              out_data.values.begin() + agg_kernels_.size());
+    state->grouper.reset();
+
+    if (output_counter_.SetTotal(
+            static_cast<int>(BitUtil::CeilDiv(out_data.length, output_batch_size())))) {
+      // this will be hit if out_data.length == 0
+      finished_.MarkFinished();
+    }
+    return out_data;
+  }
+
+  void OutputNthBatch(int n) {
+    // bail if StopProducing was called
+    if (finished_.is_finished()) return;
+
+    int64_t batch_size = output_batch_size();
+    outputs_[0]->InputReceived(this, n, out_data_.Slice(batch_size * n, batch_size));
+
+    if (output_counter_.Increment()) {
+      finished_.MarkFinished();
+    }
+  }
+
+  Status OutputResult() {
+    RETURN_NOT_OK(Merge());
+    ARROW_ASSIGN_OR_RAISE(out_data_, Finalize());
+
+    int num_output_batches = *output_counter_.total();
+    outputs_[0]->InputFinished(this, num_output_batches);
+
+    auto executor = ctx_->executor();
+    for (int i = 0; i < num_output_batches; ++i) {
+      if (executor) {
+        // bail if StopProducing was called
+        if (finished_.is_finished()) break;
+
+        RETURN_NOT_OK(executor->Spawn([this, i] { OutputNthBatch(i); }));
+      } else {
+        OutputNthBatch(i);
+      }
+    }
+
+    return Status::OK();
+  }
+
+  void InputReceived(ExecNode* input, int seq, ExecBatch batch) override {
+    // bail if StopProducing was called
+    if (finished_.is_finished()) return;
+
+    DCHECK_EQ(input, inputs_[0]);
+
+    if (ErrorIfNotOk(Consume(std::move(batch)))) return;
+
+    if (input_counter_.Increment()) {
+      ErrorIfNotOk(OutputResult());
+    }
+  }
+
+  void ErrorReceived(ExecNode* input, Status error) override {
+    DCHECK_EQ(input, inputs_[0]);
+
+    outputs_[0]->ErrorReceived(this, std::move(error));
+  }
+
+  void InputFinished(ExecNode* input, int num_total) override {
+    // bail if StopProducing was called
+    if (finished_.is_finished()) return;
+
+    DCHECK_EQ(input, inputs_[0]);
+
+    if (input_counter_.SetTotal(num_total)) {
+      ErrorIfNotOk(OutputResult());
+    }
+  }
+
+  Status StartProducing() override {
+    finished_ = Future<>::Make();
+
+    local_states_.resize(ThreadIndexer::Capacity());
+    return Status::OK();
+  }
+
+  void PauseProducing(ExecNode* output) override {}
+
+  void ResumeProducing(ExecNode* output) override {}
+
+  void StopProducing(ExecNode* output) override {
+    DCHECK_EQ(output, outputs_[0]);
+
+    if (input_counter_.Cancel()) {
+      finished_.MarkFinished();
+    } else if (output_counter_.Cancel()) {
+      finished_.MarkFinished();
+    }
+    inputs_[0]->StopProducing(this);
+  }
+
+  void StopProducing() override { StopProducing(outputs_[0]); }
+
+  Future<> finished() override { return finished_; }
+
+ private:
+  struct ThreadLocalState {
+    std::unique_ptr<internal::Grouper> grouper;
+    std::vector<std::unique_ptr<KernelState>> agg_states;
+  };
+
+  ThreadLocalState* GetLocalState() {
+    size_t thread_index = get_thread_index_();
+    return &local_states_[thread_index];
+  }
+
+  Status InitLocalStateIfNeeded(ThreadLocalState* state) {
+    // Get input schema
+    auto input_schema = inputs_[0]->output_schema();
+
+    if (state->grouper != nullptr) return Status::OK();
+
+    // Build vector of key field data types
+    std::vector<ValueDescr> key_descrs(key_field_ids_.size());
+    for (size_t i = 0; i < key_field_ids_.size(); ++i) {
+      auto key_field_id = key_field_ids_[i];
+      key_descrs[i] = ValueDescr(input_schema->field(key_field_id)->type());
+    }
+
+    // Construct grouper
+    ARROW_ASSIGN_OR_RAISE(state->grouper, internal::Grouper::Make(key_descrs, ctx_));
+
+    // Build vector of aggregate source field data types
+    std::vector<ValueDescr> agg_src_descrs(agg_kernels_.size());
+    for (size_t i = 0; i < agg_kernels_.size(); ++i) {
+      auto agg_src_field_id = agg_src_field_ids_[i];
+      agg_src_descrs[i] =
+          ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(
+        state->agg_states,
+        internal::InitKernels(agg_kernels_, ctx_, aggs_, agg_src_descrs));
+
+    return Status::OK();
+  }
+
+  int output_batch_size() const {
+    int result = static_cast<int>(ctx_->exec_chunksize());
+    if (result < 0) {
+      result = 32 * 1024;
+    }
+    return result;
+  }
+
+  ExecContext* ctx_;
+  Future<> finished_ = Future<>::MakeFinished();
+
+  const std::vector<int> key_field_ids_;
+  const std::vector<int> agg_src_field_ids_;
+  const std::vector<internal::Aggregate> aggs_;
+  const std::vector<const HashAggregateKernel*> agg_kernels_;
+
+  ThreadIndexer get_thread_index_;
+  AtomicCounter input_counter_, output_counter_;
+
+  std::vector<ThreadLocalState> local_states_;
+  ExecBatch out_data_;
+};
+
+Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+                                  std::vector<std::string> keys,
+                                  std::vector<std::string> agg_srcs,
+                                  std::vector<internal::Aggregate> aggs) {
+  // Get input schema
+  auto input_schema = input->output_schema();
+
+  // Find input field indices for key fields
+  std::vector<int> key_field_ids(keys.size());
+  for (size_t i = 0; i < keys.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(keys[i]).FindOne(*input_schema));
+    key_field_ids[i] = match[0];
+  }
+
+  // Find input field indices for aggregates
+  std::vector<int> agg_src_field_ids(aggs.size());
+  for (size_t i = 0; i < aggs.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(auto match, FieldRef(agg_srcs[i]).FindOne(*input_schema));
+    agg_src_field_ids[i] = match[0];
+  }
+
+  // Build vector of aggregate source field data types
+  DCHECK_EQ(agg_srcs.size(), aggs.size());
+  std::vector<ValueDescr> agg_src_descrs(aggs.size());
+  for (size_t i = 0; i < aggs.size(); ++i) {
+    auto agg_src_field_id = agg_src_field_ids[i];
+    agg_src_descrs[i] =
+        ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+  }
+
+  auto ctx = input->plan()->exec_context();
+
+  // Construct aggregates
+  ARROW_ASSIGN_OR_RAISE(auto agg_kernels,
+                        internal::GetKernels(ctx, aggs, agg_src_descrs));
+
+  ARROW_ASSIGN_OR_RAISE(auto agg_states,
+                        internal::InitKernels(agg_kernels, ctx, aggs, agg_src_descrs));
+
+  ARROW_ASSIGN_OR_RAISE(
+      FieldVector agg_result_fields,
+      internal::ResolveKernels(aggs, agg_kernels, agg_states, ctx, agg_src_descrs));
+
+  // Build field vector for output schema
+  FieldVector output_fields{keys.size() + aggs.size()};
+
+  // Aggregate fields come before key fields to match the behavior of GroupBy function
+  for (size_t i = 0; i < aggs.size(); ++i) {
+    output_fields[i] = agg_result_fields[i];
+  }
+  size_t base = aggs.size();
+  for (size_t i = 0; i < keys.size(); ++i) {
+    int key_field_id = key_field_ids[i];
+    output_fields[base + i] = input_schema->field(key_field_id);
+  }
+
+  auto aggs_copy = aggs;
+
+  return input->plan()->EmplaceNode<GroupByNode>(
+      input, std::move(label), schema(std::move(output_fields)), ctx,
+      std::move(key_field_ids), std::move(agg_src_field_ids), std::move(aggs),
+      std::move(agg_kernels));
+}
+
+Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
+                                   const std::vector<Datum>& keys,
+                                   const std::vector<internal::Aggregate>& aggregates,
+                                   bool use_threads, ExecContext* ctx) {
+  using arrow::compute::detail::ExecBatchIterator;
+
+  FieldVector scan_fields(arguments.size() + keys.size());
+  std::vector<std::string> keys_str(keys.size());
+  std::vector<std::string> arguments_str(arguments.size());
+  for (size_t i = 0; i < arguments.size(); ++i) {
+    arguments_str[i] = std::string("agg_") + std::to_string(i);
+    scan_fields[i] = field(arguments_str[i], arguments[i].type());
+  }
+  for (size_t i = 0; i < keys.size(); ++i) {
+    keys_str[i] = std::string("key_") + std::to_string(i);
+    scan_fields[arguments.size() + i] = field(keys_str[i], keys[i].type());
+  }
+
+  std::vector<ExecBatch> scan_batches;
+  std::vector<Datum> inputs;
+  for (const auto& argument : arguments) {
+    inputs.push_back(argument);
+  }
+  for (const auto& key : keys) {
+    inputs.push_back(key);
+  }
+  ARROW_ASSIGN_OR_RAISE(auto batch_iterator,
+                        ExecBatchIterator::Make(inputs, ctx->exec_chunksize()));
+  ExecBatch batch;
+  while (batch_iterator->Next(&batch)) {
+    if (batch.length == 0) continue;
+    scan_batches.push_back(batch);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make(ctx));
+  auto source = MakeSourceNode(
+      plan.get(), "source", schema(std::move(scan_fields)),
+      MakeVectorGenerator(arrow::internal::MapVector(
+          [](ExecBatch batch) { return util::make_optional(std::move(batch)); },
+          std::move(scan_batches))));
+
+  ARROW_ASSIGN_OR_RAISE(
+      auto gby, MakeGroupByNode(source, "gby", keys_str, arguments_str, aggregates));
+  auto sink_gen = MakeSinkNode(gby, "sink");
+
+  RETURN_NOT_OK(plan->Validate());
+  RETURN_NOT_OK(plan->StartProducing());
+
+  auto collected_fut = CollectAsyncGenerator(sink_gen);
+
+  auto start_and_collect =
+      AllComplete({plan->finished(), Future<>(collected_fut)})
+          .Then([collected_fut]() -> Result<std::vector<ExecBatch>> {
+            ARROW_ASSIGN_OR_RAISE(auto collected, collected_fut.result());
+            return ::arrow::internal::MapVector(
+                [](util::optional<ExecBatch> batch) { return std::move(*batch); },
+                std::move(collected));
+          });
+
+  std::vector<ExecBatch> output_batches =
+      start_and_collect.MoveResult().MoveValueUnsafe();
+
+  ArrayDataVector out_data(arguments.size() + keys.size());
+  for (size_t i = 0; i < arguments.size() + keys.size(); ++i) {
+    std::vector<std::shared_ptr<Array>> arrays(output_batches.size());
+    for (size_t j = 0; j < output_batches.size(); ++j) {
+      arrays[j] = output_batches[j].values[i].make_array();
+    }
+    ARROW_ASSIGN_OR_RAISE(auto concatenated_array, Concatenate(arrays));
+    out_data[i] = concatenated_array->data();
+  }
+
+  int64_t length = out_data[0]->length;
+  return ArrayData::Make(struct_(gby->output_schema()->fields()), length,
+                         {/*null_bitmap=*/nullptr}, std::move(out_data),
+                         /*null_count=*/0);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
index c36c174af05..07bb365bbc7 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -222,6 +222,10 @@ class ARROW_EXPORT ExecNode {
            std::vector<std::string> input_labels, std::shared_ptr<Schema> output_schema,
            int num_outputs);
 
+  // A helper method to send an error status to all outputs.
+  // Returns true if the status was an error.
+  bool ErrorIfNotOk(Status status);
+
   ExecPlan* plan_;
   std::string label_;
 
@@ -283,5 +287,19 @@ ARROW_EXPORT
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
                                           std::vector<internal::Aggregate> aggregates);
 
+/// \brief Make a node which groups input rows based on key fields and computes
+/// aggregates for each group
+ARROW_EXPORT
+Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+                                  std::vector<std::string> keys,
+                                  std::vector<std::string> agg_srcs,
+                                  std::vector<internal::Aggregate> aggs);
+
+ARROW_EXPORT
+Result<Datum> GroupByUsingExecPlan(const std::vector<Datum>& arguments,
+                                   const std::vector<Datum>& keys,
+                                   const std::vector<internal::Aggregate>& aggregates,
+                                   bool use_threads, ExecContext* ctx);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index bcb63c25b3a..aa807468bcb 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gmock/gmock-matchers.h>
+
 #include <functional>
 #include <memory>
 
-#include <gmock/gmock-matchers.h>
-
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec/exec_plan.h"
 #include "arrow/compute/exec/expression.h"
@@ -382,10 +382,8 @@ TEST(ExecPlanExecution, SourceFilterSink) {
                        MakeTestSourceNode(plan.get(), "source", basic_data,
                                           /*parallel=*/false, /*slow=*/false));
 
-  ASSERT_OK_AND_ASSIGN(auto predicate,
-                       equal(field_ref("i32"), literal(6)).Bind(*basic_data.schema));
-
-  ASSERT_OK_AND_ASSIGN(auto filter, MakeFilterNode(source, "filter", predicate));
+  ASSERT_OK_AND_ASSIGN(
+      auto filter, MakeFilterNode(source, "filter", equal(field_ref("i32"), literal(6))));
 
   auto sink_gen = MakeSinkNode(filter, "sink");
 
@@ -424,6 +422,106 @@ TEST(ExecPlanExecution, SourceProjectSink) {
                                      "[[null, 6], [true, 7], [true, 8]]")}))));
 }
 
+namespace {
+
+BatchesWithSchema MakeGroupableBatches(int multiplicity = 1) {
+  BatchesWithSchema out;
+
+  out.batches = {ExecBatchFromJSON({int32(), utf8()}, R"([
+                   [12, "alfa"],
+                   [7,  "beta"],
+                   [3,  "alfa"]
+                 ])"),
+                 ExecBatchFromJSON({int32(), utf8()}, R"([
+                   [-2, "alfa"],
+                   [-1, "gama"],
+                   [3,  "alfa"]
+                 ])"),
+                 ExecBatchFromJSON({int32(), utf8()}, R"([
+                   [5,  "gama"],
+                   [3,  "beta"],
+                   [-8, "alfa"]
+                 ])")};
+
+  size_t batch_count = out.batches.size();
+  for (int repeat = 1; repeat < multiplicity; ++repeat) {
+    for (size_t i = 0; i < batch_count; ++i) {
+      out.batches.push_back(out.batches[i]);
+    }
+  }
+
+  out.schema = schema({field("i32", int32()), field("str", utf8())});
+
+  return out;
+}
+}  // namespace
+
+TEST(ExecPlanExecution, SourceGroupedSum) {
+  for (bool parallel : {false, true}) {
+    SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
+
+    auto input = MakeGroupableBatches(/*multiplicity=*/parallel ? 100 : 1);
+
+    ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+    ASSERT_OK_AND_ASSIGN(auto source,
+                         MakeTestSourceNode(plan.get(), "source", input,
+                                            /*parallel=*/parallel, /*slow=*/false));
+    ASSERT_OK_AND_ASSIGN(
+        auto gby, MakeGroupByNode(source, "gby", /*keys=*/{"str"}, /*targets=*/{"i32"},
+                                  {{"hash_sum", nullptr}}));
+    auto sink_gen = MakeSinkNode(gby, "sink");
+
+    ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                Finishes(ResultWith(UnorderedElementsAreArray({ExecBatchFromJSON(
+                    {int64(), utf8()},
+                    parallel ? R"([[800, "alfa"], [1000, "beta"], [400, "gama"]])"
+                             : R"([[8, "alfa"], [10, "beta"], [4, "gama"]])")}))));
+  }
+}
+
+TEST(ExecPlanExecution, SourceFilterProjectGroupedSumFilter) {
+  for (bool parallel : {false, true}) {
+    SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
+
+    int batch_multiplicity = parallel ? 100 : 1;
+    auto input = MakeGroupableBatches(/*multiplicity=*/batch_multiplicity);
+
+    ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
+
+    ASSERT_OK_AND_ASSIGN(auto source,
+                         MakeTestSourceNode(plan.get(), "source", input,
+                                            /*parallel=*/parallel, /*slow=*/false));
+    ASSERT_OK_AND_ASSIGN(
+        auto filter,
+        MakeFilterNode(source, "filter", greater_equal(field_ref("i32"), literal(0))));
+
+    ASSERT_OK_AND_ASSIGN(
+        auto projection,
+        MakeProjectNode(filter, "project",
+                        {
+                            field_ref("str"),
+                            call("multiply", {field_ref("i32"), literal(2)}),
+                        }));
+
+    ASSERT_OK_AND_ASSIGN(auto gby, MakeGroupByNode(projection, "gby", /*keys=*/{"str"},
+                                                   /*targets=*/{"multiply(i32, 2)"},
+                                                   {{"hash_sum", nullptr}}));
+
+    ASSERT_OK_AND_ASSIGN(
+        auto having,
+        MakeFilterNode(gby, "having",
+                       greater(field_ref("hash_sum"), literal(10 * batch_multiplicity))));
+
+    auto sink_gen = MakeSinkNode(having, "sink");
+
+    ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+                Finishes(ResultWith(UnorderedElementsAreArray({ExecBatchFromJSON(
+                    {int64(), utf8()}, parallel ? R"([[3600, "alfa"], [2000, "beta"]])"
+                                                : R"([[36, "alfa"], [20, "beta"]])")}))));
+  }
+}
+
 TEST(ExecPlanExecution, SourceScalarAggSink) {
   ASSERT_OK_AND_ASSIGN(auto plan, ExecPlan::Make());
 
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index 8cc6200ea40..b47d6087c0b 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -17,6 +17,9 @@
 
 #include "arrow/compute/exec/test_util.h"
 
+#include <gmock/gmock-matchers.h>
+#include <gtest/gtest.h>
+
 #include <algorithm>
 #include <functional>
 #include <iterator>
@@ -27,9 +30,6 @@
 #include <utility>
 #include <vector>
 
-#include <gmock/gmock-matchers.h>
-#include <gtest/gtest.h>
-
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec/exec_plan.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 36d20c7289e..099bd95bbf2 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -52,7 +52,7 @@ struct ARROW_EXPORT KernelState {
 /// \brief Context/state for the execution of a particular kernel.
 class ARROW_EXPORT KernelContext {
  public:
-  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx), state_() {}
+  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx) {}
 
   /// \brief Allocate buffer from the context's memory pool. The contents are
   /// not initialized.
@@ -80,7 +80,7 @@ class ARROW_EXPORT KernelContext {
 
  private:
   ExecContext* exec_ctx_;
-  KernelState* state_;
+  KernelState* state_ = NULLPTR;
 };
 
 /// \brief The standard kernel execution API that must be implemented for
@@ -693,10 +693,12 @@ struct ScalarAggregateKernel : public Kernel {
 // ----------------------------------------------------------------------
 // HashAggregateKernel (for HashAggregateFunction)
 
+using HashAggregateResize = std::function<Status(KernelContext*, int64_t)>;
+
 using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
 
 using HashAggregateMerge =
-    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
+    std::function<Status(KernelContext*, KernelState&&, const ArrayData&)>;
 
 // Finalize returns Datum to permit multiple return values
 using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
@@ -706,6 +708,7 @@ using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
 /// kernel are the init, consume, merge, and finalize functions.
 ///
 /// * init: creates a new KernelState for a kernel.
+/// * resize: ensure that the KernelState can accommodate the specified number of groups.
 /// * consume: processes an ExecBatch (which includes the argument as well
 ///   as an array of group identifiers) and updates the KernelState found in the
 ///   KernelContext.
@@ -716,20 +719,24 @@ struct HashAggregateKernel : public Kernel {
   HashAggregateKernel() = default;
 
   HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
-                      HashAggregateConsume consume, HashAggregateMerge merge,
-                      HashAggregateFinalize finalize)
+                      HashAggregateResize resize, HashAggregateConsume consume,
+                      HashAggregateMerge merge, HashAggregateFinalize finalize)
       : Kernel(std::move(sig), std::move(init)),
+        resize(std::move(resize)),
         consume(std::move(consume)),
         merge(std::move(merge)),
         finalize(std::move(finalize)) {}
 
   HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
-                      KernelInit init, HashAggregateMerge merge,
-                      HashAggregateConsume consume, HashAggregateFinalize finalize)
+                      KernelInit init, HashAggregateConsume consume,
+                      HashAggregateResize resize, HashAggregateMerge merge,
+                      HashAggregateFinalize finalize)
       : HashAggregateKernel(
             KernelSignature::Make(std::move(in_types), std::move(out_type)),
-            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
+            std::move(init), std::move(resize), std::move(consume), std::move(merge),
+            std::move(finalize)) {}
 
+  HashAggregateResize resize;
   HashAggregateConsume consume;
   HashAggregateMerge merge;
   HashAggregateFinalize finalize;
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index 5163d3fd03d..3d02b273066 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <cmath>
+#include <utility>
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
@@ -233,9 +234,8 @@ struct MinMaxImpl : public ScalarAggregator {
   using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
   using StateType = MinMaxState<ArrowType, SimdLevel>;
 
-  MinMaxImpl(const std::shared_ptr<DataType>& out_type,
-             const ScalarAggregateOptions& options)
-      : out_type(out_type), options(options) {}
+  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
     if (batch[0].is_array()) {
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index ed40a6b1b8c..79213b93b37 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -17,7 +17,9 @@
 
 #include <functional>
 #include <memory>
+#include <mutex>
 #include <string>
+#include <thread>
 #include <unordered_map>
 #include <vector>
 
@@ -39,6 +41,8 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/make_unique.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -748,67 +752,128 @@ struct GrouperFastImpl : Grouper {
 /// Implementations should be default constructible and perform initialization in
 /// Init().
 struct GroupedAggregator : KernelState {
-  virtual Status Init(ExecContext*, const FunctionOptions*,
-                      const std::shared_ptr<DataType>&) = 0;
+  virtual Status Init(ExecContext*, const FunctionOptions*) = 0;
+
+  virtual Status Resize(int64_t new_num_groups) = 0;
 
   virtual Status Consume(const ExecBatch& batch) = 0;
 
-  virtual Result<Datum> Finalize() = 0;
+  virtual Status Merge(GroupedAggregator&& other, const ArrayData& group_id_mapping) = 0;
 
-  template <typename Reserve>
-  Status MaybeReserve(int64_t old_num_groups, const ExecBatch& batch,
-                      const Reserve& reserve) {
-    int64_t new_num_groups = batch[2].scalar_as<UInt32Scalar>().value;
-    if (new_num_groups <= old_num_groups) {
-      return Status::OK();
-    }
-    return reserve(new_num_groups - old_num_groups);
-  }
+  virtual Result<Datum> Finalize() = 0;
 
   virtual std::shared_ptr<DataType> out_type() const = 0;
 };
 
+template <typename Impl>
+Result<std::unique_ptr<KernelState>> HashAggregateInit(KernelContext* ctx,
+                                                       const KernelInitArgs& args) {
+  auto impl = ::arrow::internal::make_unique<Impl>();
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options));
+  return std::move(impl);
+}
+
+HashAggregateKernel MakeKernel(InputType argument_type, KernelInit init) {
+  HashAggregateKernel kernel;
+
+  kernel.init = std::move(init);
+
+  kernel.signature = KernelSignature::Make(
+      {std::move(argument_type), InputType::Array(Type::UINT32)},
+      OutputType(
+          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
+            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
+          }));
+
+  kernel.resize = [](KernelContext* ctx, int64_t num_groups) {
+    return checked_cast<GroupedAggregator*>(ctx->state())->Resize(num_groups);
+  };
+
+  kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) {
+    return checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch);
+  };
+
+  kernel.merge = [](KernelContext* ctx, KernelState&& other,
+                    const ArrayData& group_id_mapping) {
+    return checked_cast<GroupedAggregator*>(ctx->state())
+        ->Merge(checked_cast<GroupedAggregator&&>(other), group_id_mapping);
+  };
+
+  kernel.finalize = [](KernelContext* ctx, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(*out,
+                          checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
+    return Status::OK();
+  };
+
+  return kernel;
+}
+
+Status AddHashAggKernels(
+    const std::vector<std::shared_ptr<DataType>>& types,
+    Result<HashAggregateKernel> make_kernel(const std::shared_ptr<DataType>&),
+    HashAggregateFunction* function) {
+  for (const auto& ty : types) {
+    ARROW_ASSIGN_OR_RAISE(auto kernel, make_kernel(ty));
+    RETURN_NOT_OK(function->AddKernel(std::move(kernel)));
+  }
+  return Status::OK();
+}
+
 // ----------------------------------------------------------------------
 // Count implementation
 
 struct GroupedCountImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const FunctionOptions* options,
-              const std::shared_ptr<DataType>&) override {
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
     options_ = checked_cast<const ScalarAggregateOptions&>(*options);
     counts_ = BufferBuilder(ctx->memory_pool());
     return Status::OK();
   }
 
-  Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
-      num_groups_ += added_groups;
-      return counts_.Append(added_groups * sizeof(int64_t), 0);
-    }));
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    return counts_.Append(added_groups * sizeof(int64_t), 0);
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedCountImpl*>(&raw_other);
 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
-    auto raw_counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto other_counts = reinterpret_cast<const int64_t*>(other->counts_.mutable_data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      counts[*g] += other_counts[other_g];
+    }
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
 
     const auto& input = batch[0].array();
 
-    if (!options_.skip_nulls) {
-      if (input->GetNullCount() != 0) {
-        for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
-          auto g = group_ids[i];
-          raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
-        }
+    if (options_.skip_nulls) {
+      auto g_begin =
+          reinterpret_cast<const uint32_t*>(batch[1].array()->buffers[1]->data());
+
+      arrow::internal::VisitSetBitRunsVoid(input->buffers[0], input->offset,
+                                           input->length,
+                                           [&](int64_t offset, int64_t length) {
+                                             auto g = g_begin + offset;
+                                             for (int64_t i = 0; i < length; ++i, ++g) {
+                                               counts[*g] += 1;
+                                             }
+                                           });
+    } else if (input->MayHaveNulls()) {
+      auto g = batch[1].array()->GetValues<uint32_t>(1);
+
+      auto end = input->offset + input->length;
+      for (int64_t i = input->offset; i < end; ++i, ++g) {
+        counts[*g] += !BitUtil::GetBit(input->buffers[0]->data(), i);
       }
-      return Status::OK();
     }
-
-    arrow::internal::VisitSetBitRunsVoid(
-        input->buffers[0], input->offset, input->length,
-        [&](int64_t begin, int64_t length) {
-          for (int64_t input_i = begin, i = begin - input->offset;
-               input_i < begin + length; ++input_i, ++i) {
-            auto g = group_ids[i];
-            raw_counts[g] += 1;
-          }
-        });
     return Status::OK();
   }
 
@@ -827,72 +892,58 @@ struct GroupedCountImpl : public GroupedAggregator {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+template <typename Type>
 struct GroupedSumImpl : public GroupedAggregator {
-  // NB: whether we are accumulating into double, int64_t, or uint64_t
-  // we always have 64 bits per group in the sums buffer.
-  static constexpr size_t kSumSize = sizeof(int64_t);
-
-  using ConsumeImpl = std::function<void(const std::shared_ptr<ArrayData>&,
-                                         const uint32_t*, void*, int64_t*)>;
-
-  struct GetConsumeImpl {
-    template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
-    Status Visit(const T&) {
-      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group,
-                        void* boxed_sums, int64_t* counts) {
-        auto sums = reinterpret_cast<typename TypeTraits<AccType>::CType*>(boxed_sums);
-
-        VisitArrayDataInline<T>(
-            *input,
-            [&](typename TypeTraits<T>::CType value) {
-              sums[*group] += value;
-              counts[*group] += 1;
-              ++group;
-            },
-            [&] { ++group; });
-      };
-      out_type = TypeTraits<AccType>::type_singleton();
-      return Status::OK();
-    }
-
-    Status Visit(const HalfFloatType& type) {
-      return Status::NotImplemented("Summing data of type ", type);
-    }
+  using AccType = typename FindAccumulatorType<Type>::Type;
+  using SumType = typename TypeTraits<AccType>::CType;
 
-    Status Visit(const DataType& type) {
-      return Status::NotImplemented("Summing data of type ", type);
-    }
-
-    ConsumeImpl consume_impl;
-    std::shared_ptr<DataType> out_type;
-  };
-
-  Status Init(ExecContext* ctx, const FunctionOptions*,
-              const std::shared_ptr<DataType>& input_type) override {
+  Status Init(ExecContext* ctx, const FunctionOptions*) override {
     pool_ = ctx->memory_pool();
     sums_ = BufferBuilder(pool_);
     counts_ = BufferBuilder(pool_);
+    out_type_ = TypeTraits<AccType>::type_singleton();
+    return Status::OK();
+  }
 
-    GetConsumeImpl get_consume_impl;
-    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_consume_impl));
-
-    consume_impl_ = std::move(get_consume_impl.consume_impl);
-    out_type_ = std::move(get_consume_impl.out_type);
-
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    RETURN_NOT_OK(sums_.Append(added_groups * sizeof(AccType), 0));
+    RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
     return Status::OK();
   }
 
   Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
-      num_groups_ += added_groups;
-      RETURN_NOT_OK(sums_.Append(added_groups * kSumSize, 0));
-      RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
-      return Status::OK();
-    }));
+    auto sums = reinterpret_cast<SumType*>(sums_.mutable_data());
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          sums[*g] += value;
+          counts[*g] += 1;
+          ++g;
+        },
+        [&] { ++g; });
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedSumImpl*>(&raw_other);
+
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto sums = reinterpret_cast<SumType*>(sums_.mutable_data());
+
+    auto other_counts = reinterpret_cast<const int64_t*>(other->counts_.mutable_data());
+    auto other_sums = reinterpret_cast<const SumType*>(other->sums_.mutable_data());
 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
-    consume_impl_(batch[0].array(), group_ids, sums_.mutable_data(),
-                  reinterpret_cast<int64_t*>(counts_.mutable_data()));
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      counts[*g] += other_counts[other_g];
+      sums[*g] += other_sums[other_g];
+    }
     return Status::OK();
   }
 
@@ -925,120 +976,118 @@ struct GroupedSumImpl : public GroupedAggregator {
   int64_t num_groups_ = 0;
   BufferBuilder sums_, counts_;
   std::shared_ptr<DataType> out_type_;
-  ConsumeImpl consume_impl_;
   MemoryPool* pool_;
 };
 
+struct GroupedSumFactory {
+  template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
+  Status Visit(const T&) {
+    kernel = MakeKernel(std::move(argument_type), HashAggregateInit<GroupedSumImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedSumFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
 template <typename CType>
-struct Extrema : std::numeric_limits<CType> {};
+struct AntiExtrema {
+  static constexpr CType anti_min() { return std::numeric_limits<CType>::max(); }
+  static constexpr CType anti_max() { return std::numeric_limits<CType>::min(); }
+};
 
 template <>
-struct Extrema<float> {
-  static constexpr float min() { return -std::numeric_limits<float>::infinity(); }
-  static constexpr float max() { return std::numeric_limits<float>::infinity(); }
+struct AntiExtrema<float> {
+  static constexpr float anti_min() { return std::numeric_limits<float>::infinity(); }
+  static constexpr float anti_max() { return -std::numeric_limits<float>::infinity(); }
 };
 
 template <>
-struct Extrema<double> {
-  static constexpr double min() { return -std::numeric_limits<double>::infinity(); }
-  static constexpr double max() { return std::numeric_limits<double>::infinity(); }
+struct AntiExtrema<double> {
+  static constexpr double anti_min() { return std::numeric_limits<double>::infinity(); }
+  static constexpr double anti_max() { return -std::numeric_limits<double>::infinity(); }
 };
 
+template <typename Type>
 struct GroupedMinMaxImpl : public GroupedAggregator {
-  using ConsumeImpl =
-      std::function<void(const std::shared_ptr<ArrayData>&, const uint32_t*, void*, void*,
-                         uint8_t*, uint8_t*)>;
-
-  using ResizeImpl = std::function<Status(BufferBuilder*, int64_t)>;
-
-  template <typename CType>
-  static ResizeImpl MakeResizeImpl(CType anti_extreme) {
-    // resize a min or max buffer, storing the correct anti extreme
-    return [anti_extreme](BufferBuilder* builder, int64_t added_groups) {
-      TypedBufferBuilder<CType> typed_builder(std::move(*builder));
-      RETURN_NOT_OK(typed_builder.Append(added_groups, anti_extreme));
-      *builder = std::move(*typed_builder.bytes_builder());
-      return Status::OK();
-    };
-  }
+  using CType = typename TypeTraits<Type>::CType;
 
-  struct GetImpl {
-    template <typename T, typename CType = typename TypeTraits<T>::CType>
-    enable_if_number<T, Status> Visit(const T&) {
-      consume_impl = [](const std::shared_ptr<ArrayData>& input, const uint32_t* group,
-                        void* mins, void* maxes, uint8_t* has_values,
-                        uint8_t* has_nulls) {
-        auto raw_mins = reinterpret_cast<CType*>(mins);
-        auto raw_maxes = reinterpret_cast<CType*>(maxes);
-
-        VisitArrayDataInline<T>(
-            *input,
-            [&](CType val) {
-              raw_maxes[*group] = std::max(raw_maxes[*group], val);
-              raw_mins[*group] = std::min(raw_mins[*group], val);
-              BitUtil::SetBit(has_values, *group++);
-            },
-            [&] { BitUtil::SetBit(has_nulls, *group++); });
-      };
-
-      resize_min_impl = MakeResizeImpl(Extrema<CType>::max());
-      resize_max_impl = MakeResizeImpl(Extrema<CType>::min());
-      return Status::OK();
-    }
-
-    Status Visit(const BooleanType& type) {
-      return Status::NotImplemented("Grouped MinMax data of type ", type);
-    }
-
-    Status Visit(const HalfFloatType& type) {
-      return Status::NotImplemented("Grouped MinMax data of type ", type);
-    }
-
-    Status Visit(const DataType& type) {
-      return Status::NotImplemented("Grouped MinMax data of type ", type);
-    }
-
-    ConsumeImpl consume_impl;
-    ResizeImpl resize_min_impl, resize_max_impl;
-  };
-
-  Status Init(ExecContext* ctx, const FunctionOptions* options,
-              const std::shared_ptr<DataType>& input_type) override {
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
     options_ = *checked_cast<const ScalarAggregateOptions*>(options);
-    type_ = input_type;
-
-    mins_ = BufferBuilder(ctx->memory_pool());
-    maxes_ = BufferBuilder(ctx->memory_pool());
+    type_ = TypeTraits<Type>::type_singleton();
+    mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
+    maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool());
     has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool());
     has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    return Status::OK();
+  }
 
-    GetImpl get_impl;
-    RETURN_NOT_OK(VisitTypeInline(*input_type, &get_impl));
-
-    consume_impl_ = std::move(get_impl.consume_impl);
-    resize_min_impl_ = std::move(get_impl.resize_min_impl);
-    resize_max_impl_ = std::move(get_impl.resize_max_impl);
-
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    RETURN_NOT_OK(mins_.Append(added_groups, AntiExtrema<CType>::anti_min()));
+    RETURN_NOT_OK(maxes_.Append(added_groups, AntiExtrema<CType>::anti_max()));
+    RETURN_NOT_OK(has_values_.Append(added_groups, false));
+    RETURN_NOT_OK(has_nulls_.Append(added_groups, false));
     return Status::OK();
   }
 
   Status Consume(const ExecBatch& batch) override {
-    RETURN_NOT_OK(MaybeReserve(num_groups_, batch, [&](int64_t added_groups) {
-      num_groups_ += added_groups;
-      RETURN_NOT_OK(resize_min_impl_(&mins_, added_groups));
-      RETURN_NOT_OK(resize_max_impl_(&maxes_, added_groups));
-      RETURN_NOT_OK(has_values_.Append(added_groups, false));
-      RETURN_NOT_OK(has_nulls_.Append(added_groups, false));
-      return Status::OK();
-    }));
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    auto raw_mins = reinterpret_cast<CType*>(mins_.mutable_data());
+    auto raw_maxes = reinterpret_cast<CType*>(maxes_.mutable_data());
+
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](CType val) {
+          raw_maxes[*g] = std::max(raw_maxes[*g], val);
+          raw_mins[*g] = std::min(raw_mins[*g], val);
+          BitUtil::SetBit(has_values_.mutable_data(), *g++);
+        },
+        [&] { BitUtil::SetBit(has_nulls_.mutable_data(), *g++); });
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedMinMaxImpl*>(&raw_other);
+
+    auto raw_mins = reinterpret_cast<CType*>(mins_.mutable_data());
+    auto raw_maxes = reinterpret_cast<CType*>(maxes_.mutable_data());
+
+    auto other_raw_mins = reinterpret_cast<const CType*>(other->mins_.mutable_data());
+    auto other_raw_maxes = reinterpret_cast<const CType*>(other->maxes_.mutable_data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      raw_mins[*g] = std::min(raw_mins[*g], other_raw_mins[other_g]);
+      raw_maxes[*g] = std::max(raw_maxes[*g], other_raw_maxes[other_g]);
 
-    auto group_ids = batch[1].array()->GetValues<uint32_t>(1);
-    consume_impl_(batch[0].array(), group_ids, mins_.mutable_data(),
-                  maxes_.mutable_data(), has_values_.mutable_data(),
-                  has_nulls_.mutable_data());
+      if (BitUtil::GetBit(other->has_values_.data(), other_g)) {
+        BitUtil::SetBit(has_values_.mutable_data(), *g);
+      }
+      if (BitUtil::GetBit(other->has_nulls_.data(), other_g)) {
+        BitUtil::SetBit(has_nulls_.mutable_data(), *g);
+      }
+    }
     return Status::OK();
   }
 
@@ -1067,52 +1116,40 @@ struct GroupedMinMaxImpl : public GroupedAggregator {
   }
 
   int64_t num_groups_;
-  BufferBuilder mins_, maxes_;
+  TypedBufferBuilder<CType> mins_, maxes_;
   TypedBufferBuilder<bool> has_values_, has_nulls_;
   std::shared_ptr<DataType> type_;
-  ConsumeImpl consume_impl_;
-  ResizeImpl resize_min_impl_, resize_max_impl_;
   ScalarAggregateOptions options_;
 };
 
-template <typename Impl>
-HashAggregateKernel MakeKernel(InputType argument_type) {
-  HashAggregateKernel kernel;
-
-  kernel.init = [](KernelContext* ctx,
-                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    auto impl = ::arrow::internal::make_unique<Impl>();
-    // FIXME(bkietz) Init should not take a type. That should be an unboxed template arg
-    // for the Impl. Otherwise we're not exposing dispatch as well as we should.
-    RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options, args.inputs[0].type));
-    return std::move(impl);
-  };
+struct GroupedMinMaxFactory {
+  template <typename T>
+  enable_if_number<T, Status> Visit(const T&) {
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<GroupedMinMaxImpl<T>>);
+    return Status::OK();
+  }
 
-  kernel.signature = KernelSignature::Make(
-      {std::move(argument_type), InputType::Array(Type::UINT32),
-       InputType::Scalar(Type::UINT32)},
-      OutputType(
-          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
-            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
-          }));
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
 
-  kernel.consume = [](KernelContext* ctx, const ExecBatch& batch) {
-    return checked_cast<GroupedAggregator*>(ctx->state())->Consume(batch);
-  };
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Summing data of type ", type);
+  }
 
-  kernel.merge = [](KernelContext* ctx, KernelState&&, KernelState*) {
-    // TODO(ARROW-11840) merge two hash tables
-    return Status::NotImplemented("Merge hashed aggregations");
-  };
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedMinMaxFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
 
-  kernel.finalize = [](KernelContext* ctx, Datum* out) {
-    ARROW_ASSIGN_OR_RAISE(*out,
-                          checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
-    return Status::OK();
-  };
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
 
-  return kernel;
-}
+}  // namespace
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
     ExecContext* ctx, const std::vector<Aggregate>& aggregates,
@@ -1129,8 +1166,7 @@ Result<std::vector<const HashAggregateKernel*>> GetKernels(
                           ctx->func_registry()->GetFunction(aggregates[i].function));
     ARROW_ASSIGN_OR_RAISE(
         const Kernel* kernel,
-        function->DispatchExact(
-            {in_descrs[i], ValueDescr::Array(uint32()), ValueDescr::Scalar(uint32())}));
+        function->DispatchExact({in_descrs[i], ValueDescr::Array(uint32())}));
     kernels[i] = static_cast<const HashAggregateKernel*>(kernel);
   }
   return kernels;
@@ -1154,13 +1190,13 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
 
     KernelContext kernel_ctx{ctx};
     ARROW_ASSIGN_OR_RAISE(
-        states[i], kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
-                                                                {
-                                                                    in_descrs[i].type,
-                                                                    uint32(),
-                                                                    uint32(),
-                                                                },
-                                                                options}));
+        states[i],
+        kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
+                                                     {
+                                                         in_descrs[i],
+                                                         ValueDescr::Array(uint32()),
+                                                     },
+                                                     options}));
   }
 
   return std::move(states);
@@ -1179,17 +1215,14 @@ Result<FieldVector> ResolveKernels(
 
     ARROW_ASSIGN_OR_RAISE(auto descr, kernels[i]->signature->out_type().Resolve(
                                           &kernel_ctx, {
-                                                           descrs[i].type,
-                                                           uint32(),
-                                                           uint32(),
+                                                           descrs[i],
+                                                           ValueDescr::Array(uint32()),
                                                        }));
     fields[i] = field(aggregates[i].function, std::move(descr.type));
   }
   return fields;
 }
 
-}  // namespace
-
 Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs,
                                                ExecContext* ctx) {
   if (GrouperFastImpl::CanUse(descrs)) {
@@ -1199,7 +1232,13 @@ Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& de
 }
 
 Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
-                      const std::vector<Aggregate>& aggregates, ExecContext* ctx) {
+                      const std::vector<Aggregate>& aggregates, bool use_threads,
+                      ExecContext* ctx) {
+  auto task_group =
+      use_threads
+          ? arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool())
+          : arrow::internal::TaskGroup::MakeSerial();
+
   // Construct and initialize HashAggregateKernels
   ARROW_ASSIGN_OR_RAISE(auto argument_descrs,
                         ExecBatch::Make(arguments).Map(
@@ -1207,24 +1246,33 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
   ARROW_ASSIGN_OR_RAISE(auto kernels, GetKernels(ctx, aggregates, argument_descrs));
 
-  ARROW_ASSIGN_OR_RAISE(auto states,
-                        InitKernels(kernels, ctx, aggregates, argument_descrs));
+  std::vector<std::vector<std::unique_ptr<KernelState>>> states(
+      task_group->parallelism());
+  for (auto& state : states) {
+    ARROW_ASSIGN_OR_RAISE(state, InitKernels(kernels, ctx, aggregates, argument_descrs));
+  }
 
   ARROW_ASSIGN_OR_RAISE(
       FieldVector out_fields,
-      ResolveKernels(aggregates, kernels, states, ctx, argument_descrs));
+      ResolveKernels(aggregates, kernels, states[0], ctx, argument_descrs));
 
   using arrow::compute::detail::ExecBatchIterator;
 
   ARROW_ASSIGN_OR_RAISE(auto argument_batch_iterator,
                         ExecBatchIterator::Make(arguments, ctx->exec_chunksize()));
 
-  // Construct Grouper
+  // Construct Groupers
   ARROW_ASSIGN_OR_RAISE(auto key_descrs, ExecBatch::Make(keys).Map([](ExecBatch batch) {
     return batch.GetDescriptors();
   }));
 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_descrs, ctx));
+  std::vector<std::unique_ptr<Grouper>> groupers(task_group->parallelism());
+  for (auto& grouper : groupers) {
+    ARROW_ASSIGN_OR_RAISE(grouper, Grouper::Make(key_descrs, ctx));
+  }
+
+  std::mutex mutex;
+  std::unordered_map<std::thread::id, size_t> thread_ids;
 
   int i = 0;
   for (ValueDescr& key_descr : key_descrs) {
@@ -1240,16 +1288,49 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
          key_batch_iterator->Next(&key_batch)) {
     if (key_batch.length == 0) continue;
 
-    // compute a batch of group ids
-    ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
+    task_group->Append([&, key_batch, argument_batch] {
+      size_t thread_index;
+      {
+        std::unique_lock<std::mutex> lock(mutex);
+        auto it = thread_ids.emplace(std::this_thread::get_id(), thread_ids.size()).first;
+        thread_index = it->second;
+        DCHECK_LT(static_cast<int>(thread_index), task_group->parallelism());
+      }
+
+      auto grouper = groupers[thread_index].get();
+
+      // compute a batch of group ids
+      ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
+
+      // consume group ids with HashAggregateKernels
+      for (size_t i = 0; i < kernels.size(); ++i) {
+        KernelContext batch_ctx{ctx};
+        batch_ctx.SetState(states[thread_index][i].get());
+        ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch}));
+        RETURN_NOT_OK(kernels[i]->resize(&batch_ctx, grouper->num_groups()));
+        RETURN_NOT_OK(kernels[i]->consume(&batch_ctx, batch));
+      }
+
+      return Status::OK();
+    });
+  }
+
+  RETURN_NOT_OK(task_group->Finish());
+
+  // Merge if necessary
+  for (size_t thread_index = 1; thread_index < thread_ids.size(); ++thread_index) {
+    ARROW_ASSIGN_OR_RAISE(ExecBatch other_keys, groupers[thread_index]->GetUniques());
+    ARROW_ASSIGN_OR_RAISE(Datum transposition, groupers[0]->Consume(other_keys));
+    groupers[thread_index].reset();
 
-    // consume group ids with HashAggregateKernels
     for (size_t i = 0; i < kernels.size(); ++i) {
       KernelContext batch_ctx{ctx};
-      batch_ctx.SetState(states[i].get());
-      ARROW_ASSIGN_OR_RAISE(auto batch, ExecBatch::Make({argument_batch[i], id_batch,
-                                                         Datum(grouper->num_groups())}));
-      RETURN_NOT_OK(kernels[i]->consume(&batch_ctx, batch));
+      batch_ctx.SetState(states[0][i].get());
+
+      RETURN_NOT_OK(kernels[i]->resize(&batch_ctx, groupers[0]->num_groups()));
+      RETURN_NOT_OK(kernels[i]->merge(&batch_ctx, std::move(*states[thread_index][i]),
+                                      *transposition.array()));
+      states[thread_index][i].reset();
     }
   }
 
@@ -1259,13 +1340,13 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
   for (size_t i = 0; i < kernels.size(); ++i) {
     KernelContext batch_ctx{ctx};
-    batch_ctx.SetState(states[i].get());
+    batch_ctx.SetState(states[0][i].get());
     Datum out;
     RETURN_NOT_OK(kernels[i]->finalize(&batch_ctx, &out));
     *it++ = out.array();
   }
 
-  ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, grouper->GetUniques());
+  ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, groupers[0]->GetUniques());
   for (const auto& key : out_keys.values) {
     *it++ = key.array();
   }
@@ -1332,18 +1413,18 @@ namespace {
 const FunctionDoc hash_count_doc{"Count the number of null / non-null values",
                                  ("By default, non-null values are counted.\n"
                                   "This can be changed through ScalarAggregateOptions."),
-                                 {"array", "group_id_array", "group_count"},
+                                 {"array", "group_id_array"},
                                  "ScalarAggregateOptions"};
 
 const FunctionDoc hash_sum_doc{"Sum values of a numeric array",
                                ("Null values are ignored."),
-                               {"array", "group_id_array", "group_count"}};
+                               {"array", "group_id_array"}};
 
 const FunctionDoc hash_min_max_doc{
     "Compute the minimum and maximum values of a numeric array",
     ("Null values are ignored by default.\n"
      "This can be changed through ScalarAggregateOptions."),
-    {"array", "group_id_array", "group_count"},
+    {"array", "group_id_array"},
     "ScalarAggregateOptions"};
 }  // namespace
 
@@ -1351,25 +1432,32 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
   {
     static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
-        "hash_count", Arity::Ternary(), &hash_count_doc,
+        "hash_count", Arity::Binary(), &hash_count_doc,
         &default_scalar_aggregate_options);
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedCountImpl>(ValueDescr::ARRAY)));
+
+    DCHECK_OK(func->AddKernel(
+        MakeKernel(ValueDescr::ARRAY, HashAggregateInit<GroupedCountImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
   {
-    auto func = std::make_shared<HashAggregateFunction>("hash_sum", Arity::Ternary(),
+    auto func = std::make_shared<HashAggregateFunction>("hash_sum", Arity::Binary(),
                                                         &hash_sum_doc);
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedSumImpl>(ValueDescr::ARRAY)));
+    DCHECK_OK(AddHashAggKernels({boolean()}, GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(), GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(), GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(FloatingPointTypes(), GroupedSumFactory::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
   {
     static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
-        "hash_min_max", Arity::Ternary(), &hash_min_max_doc,
+        "hash_min_max", Arity::Binary(), &hash_min_max_doc,
         &default_scalar_aggregate_options);
-    DCHECK_OK(func->AddKernel(MakeKernel<GroupedMinMaxImpl>(ValueDescr::ARRAY)));
+    DCHECK_OK(AddHashAggKernels({boolean()}, GroupedSumFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(NumericTypes(), GroupedMinMaxFactory::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 }
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 8c8a4b23932..b0327c7aa81 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -30,6 +30,9 @@
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/test_util.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/test_util.h"
@@ -38,6 +41,7 @@
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -46,6 +50,7 @@
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/thread_pool.h"
 
 using testing::HasSubstr;
 
@@ -121,6 +126,27 @@ void ValidateGroupBy(const std::vector<internal::Aggregate>& aggregates,
   AssertDatumsEqual(expected, actual, /*verbose=*/true);
 }
 
+ExecContext* small_chunksize_context(bool use_threads = false) {
+  static ExecContext ctx,
+      ctx_with_threads{default_memory_pool(), arrow::internal::GetCpuThreadPool()};
+  ctx.set_exec_chunksize(2);
+  ctx_with_threads.set_exec_chunksize(2);
+  return use_threads ? &ctx_with_threads : &ctx;
+}
+
+Result<Datum> GroupByTest(
+    const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
+    const std::vector<::arrow::compute::internal::Aggregate>& aggregates,
+    bool use_threads, bool use_exec_plan) {
+  if (use_exec_plan) {
+    return GroupByUsingExecPlan(arguments, keys, aggregates, use_threads,
+                                small_chunksize_context(use_threads));
+  } else {
+    return internal::GroupBy(arguments, keys, aggregates, use_threads,
+                             default_exec_context());
+  }
+}
+
 }  // namespace
 
 TEST(Grouper, SupportedKeys) {
@@ -175,12 +201,19 @@ struct TestGrouper {
   }
 
   void ExpectConsume(const std::string& key_json, const std::string& expected) {
-    ExpectConsume(ExecBatch(*RecordBatchFromJSON(key_schema_, key_json)),
+    ExpectConsume(ExecBatchFromJSON(descrs_, key_json),
                   ArrayFromJSON(uint32(), expected));
   }
 
-  void ExpectConsume(const std::vector<Datum>& key_batch, Datum expected) {
-    ExpectConsume(*ExecBatch::Make(key_batch), expected);
+  void ExpectConsume(const std::vector<Datum>& key_values, Datum expected) {
+    ASSERT_OK_AND_ASSIGN(auto key_batch, ExecBatch::Make(key_values));
+    ExpectConsume(key_batch, expected);
+  }
+
+  void ExpectConsume(const ExecBatch& key_batch, Datum expected) {
+    Datum ids;
+    ConsumeAndValidate(key_batch, &ids);
+    AssertEquivalentIds(expected, ids);
   }
 
   void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
@@ -190,10 +223,8 @@ struct TestGrouper {
     int64_t num_ids = left->length();
     auto left_data = left->data();
     auto right_data = right->data();
-    const uint32_t* left_ids =
-        reinterpret_cast<const uint32_t*>(left_data->buffers[1]->data());
-    const uint32_t* right_ids =
-        reinterpret_cast<const uint32_t*>(right_data->buffers[1]->data());
+    auto left_ids = reinterpret_cast<const uint32_t*>(left_data->buffers[1]->data());
+    auto right_ids = reinterpret_cast<const uint32_t*>(right_data->buffers[1]->data());
     uint32_t max_left_id = 0;
     uint32_t max_right_id = 0;
     for (int64_t i = 0; i < num_ids; ++i) {
@@ -224,13 +255,6 @@ struct TestGrouper {
     }
   }
 
-  void ExpectConsume(const ExecBatch& key_batch, Datum expected) {
-    Datum ids;
-    ConsumeAndValidate(key_batch, &ids);
-    AssertEquivalentIds(expected, ids);
-    // AssertDatumsEqual(expected, ids, /*verbose=*/true);
-  }
-
   void ConsumeAndValidate(const ExecBatch& key_batch, Datum* ids = nullptr) {
     ASSERT_OK_AND_ASSIGN(Datum id_batch, grouper_->Consume(key_batch));
 
@@ -514,85 +538,171 @@ TEST(GroupBy, Errors) {
     [null,  3]
   ])");
 
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("Direct execution of HASH_AGGREGATE functions"),
-      CallFunction("hash_sum", {batch->GetColumnByName("argument"),
-                                batch->GetColumnByName("group_id"), Datum(uint32_t(4))}));
+  EXPECT_THAT(CallFunction("hash_sum", {batch->GetColumnByName("argument"),
+                                        batch->GetColumnByName("group_id")}),
+              Raises(StatusCode::NotImplemented,
+                     HasSubstr("Direct execution of HASH_AGGREGATE functions")));
 }
 
-TEST(GroupBy, SumOnly) {
-  auto batch = RecordBatchFromJSON(
-      schema({field("argument", float64()), field("key", int64())}), R"([
+namespace {
+void SortBy(std::vector<std::string> names, Datum* aggregated_and_grouped) {
+  SortOptions options{{SortKey("key_0", SortOrder::Ascending)}};
+
+  ASSERT_OK_AND_ASSIGN(
+      auto batch, RecordBatch::FromStructArray(aggregated_and_grouped->make_array()));
+  ASSERT_OK_AND_ASSIGN(Datum sort_indices, SortIndices(batch, options));
+
+  ASSERT_OK_AND_ASSIGN(*aggregated_and_grouped,
+                       Take(*aggregated_and_grouped, sort_indices));
+}
+}  // namespace
+
+TEST(GroupBy, CountOnly) {
+  for (bool use_exec_plan : {false, true}) {
+    for (bool use_threads : {true, false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+      auto table = TableFromJSON(
+          schema({field("argument", float64()), field("key", int64())}), {R"([
     [1.0,   1],
-    [null,  1],
+    [null,  1]
+                        ])",
+                                                                          R"([
     [0.0,   2],
     [null,  3],
     [4.0,   null],
     [3.25,  1],
-    [0.125, 2],
+    [0.125, 2]
+                        ])",
+                                                                          R"([
     [-0.25, 2],
     [0.75,  null],
     [null,  3]
-  ])");
+                        ])"});
 
-  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy({batch->GetColumnByName("argument")},
-                                         {batch->GetColumnByName("key")},
-                                         {
-                                             {"hash_sum", nullptr},
-                                         }));
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest({table->GetColumnByName("argument")},
+                                       {table->GetColumnByName("key")},
+                                       {
+                                           {"hash_count", nullptr},
+                                       },
+                                       use_threads, use_exec_plan));
+      SortBy({"key_0"}, &aggregated_and_grouped);
+
+      AssertDatumsEqual(ArrayFromJSON(struct_({
+                                          field("hash_count", int64()),
+                                          field("key_0", int64()),
+                                      }),
+                                      R"([
+    [2,   1],
+    [3,   2],
+    [0,   3],
+    [2,   null]
+  ])"),
+                        aggregated_and_grouped,
+                        /*verbose=*/true);
+    }
+  }
+}
 
-  AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_sum", float64()),
-                                      field("key_0", int64()),
-                                  }),
-                                  R"([
+TEST(GroupBy, SumOnly) {
+  for (bool use_exec_plan : {false, true}) {
+    for (bool use_threads : {true, false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+      auto table = TableFromJSON(
+          schema({field("argument", float64()), field("key", int64())}), {R"([
+    [1.0,   1],
+    [null,  1]
+                        ])",
+                                                                          R"([
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.25,  1],
+    [0.125, 2]
+                        ])",
+                                                                          R"([
+    [-0.25, 2],
+    [0.75,  null],
+    [null,  3]
+                        ])"});
+
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest({table->GetColumnByName("argument")},
+                                       {table->GetColumnByName("key")},
+                                       {
+                                           {"hash_sum", nullptr},
+                                       },
+                                       use_threads, use_exec_plan));
+      SortBy({"key_0"}, &aggregated_and_grouped);
+
+      AssertDatumsEqual(ArrayFromJSON(struct_({
+                                          field("hash_sum", float64()),
+                                          field("key_0", int64()),
+                                      }),
+                                      R"([
     [4.25,   1],
     [-0.125, 2],
     [null,   3],
     [4.75,   null]
   ])"),
-                    aggregated_and_grouped,
-                    /*verbose=*/true);
+                        aggregated_and_grouped,
+                        /*verbose=*/true);
+    }
+  }
 }
 
 TEST(GroupBy, MinMaxOnly) {
-  auto batch = RecordBatchFromJSON(
-      schema({field("argument", float64()), field("key", int64())}), R"([
+  for (bool use_exec_plan : {false, true}) {
+    for (bool use_threads : {true, false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+      auto table = TableFromJSON(
+          schema({field("argument", float64()), field("key", int64())}), {R"([
     [1.0,   1],
-    [null,  1],
+    [null,  1]
+                        ])",
+                                                                          R"([
     [0.0,   2],
     [null,  3],
     [4.0,   null],
     [3.25,  1],
-    [0.125, 2],
+    [0.125, 2]
+                        ])",
+                                                                          R"([
     [-0.25, 2],
     [0.75,  null],
     [null,  3]
-  ])");
-
-  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       internal::GroupBy({batch->GetColumnByName("argument")},
-                                         {batch->GetColumnByName("key")},
-                                         {
-                                             {"hash_min_max", nullptr},
-                                         }));
+                        ])"});
 
-  AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_min_max", struct_({
-                                                                field("min", float64()),
-                                                                field("max", float64()),
-                                                            })),
-                                      field("key_0", int64()),
-                                  }),
-                                  R"([
+      ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                           GroupByTest({table->GetColumnByName("argument")},
+                                       {table->GetColumnByName("key")},
+                                       {
+                                           {"hash_min_max", nullptr},
+                                       },
+                                       use_threads, use_exec_plan));
+      SortBy({"key_0"}, &aggregated_and_grouped);
+
+      AssertDatumsEqual(
+          ArrayFromJSON(struct_({
+                            field("hash_min_max", struct_({
+                                                      field("min", float64()),
+                                                      field("max", float64()),
+                                                  })),
+                            field("key_0", int64()),
+                        }),
+                        R"([
     [{"min": 1.0,   "max": 3.25},  1],
     [{"min": -0.25, "max": 0.125}, 2],
     [{"min": null,  "max": null},  3],
     [{"min": 0.75,  "max": 4.0},   null]
   ])"),
-                    aggregated_and_grouped,
-                    /*verbose=*/true);
+          aggregated_and_grouped,
+          /*verbose=*/true);
+    }
+  }
 }
 
 TEST(GroupBy, CountAndSum) {
@@ -845,12 +955,6 @@ TEST(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
                     /*verbose=*/true);
 }
 
-ExecContext* small_chunksize_context() {
-  static ExecContext ctx;
-  ctx.set_exec_chunksize(2);
-  return &ctx;
-}
-
 TEST(GroupBy, SmallChunkSizeSumOnly) {
   auto batch = RecordBatchFromJSON(
       schema({field("argument", float64()), field("key", int64())}), R"([
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 0a289985ca2..192f84f46df 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -456,6 +456,12 @@ Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
                              });
 }
 
+const FieldVector kAugmentedFields{
+    field("__fragment_index", int32()),
+    field("__batch_index", int32()),
+    field("__last_in_fragment", boolean()),
+};
+
 Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
                                         FragmentGenerator fragment_gen,
                                         std::shared_ptr<ScanOptions> options) {
@@ -496,12 +502,12 @@ Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
         return batch;
       });
 
-  auto augmented_fields = options->dataset_schema->fields();
-  augmented_fields.push_back(field("__fragment_index", int32()));
-  augmented_fields.push_back(field("__batch_index", int32()));
-  augmented_fields.push_back(field("__last_in_fragment", boolean()));
-  return compute::MakeSourceNode(plan, "dataset_scan",
-                                 schema(std::move(augmented_fields)), std::move(gen));
+  auto fields = options->dataset_schema->fields();
+  for (const auto& aug_field : kAugmentedFields) {
+    fields.push_back(aug_field);
+  }
+  return compute::MakeSourceNode(plan, "dataset_scan", schema(std::move(fields)),
+                                 std::move(gen));
 }
 
 class OneShotScanTask : public ScanTask {
@@ -776,24 +782,13 @@ Future<std::shared_ptr<Table>> AsyncScanner::ToTableAsync(
   });
 }
 
-namespace {
-Result<int64_t> GetSelectionSize(const Datum& selection, int64_t length) {
-  if (length == 0) return 0;
-
-  if (selection.is_scalar()) {
-    if (!selection.scalar()->is_valid) return 0;
-    if (!selection.scalar_as<BooleanScalar>().value) return 0;
-    return length;
-  }
-
-  ARROW_ASSIGN_OR_RAISE(auto count, compute::Sum(selection));
-  return static_cast<int64_t>(count.scalar_as<UInt64Scalar>().value);
-}
-}  // namespace
-
 Result<int64_t> AsyncScanner::CountRows() {
   ARROW_ASSIGN_OR_RAISE(auto fragment_gen, GetFragments());
-  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make());
+
+  auto cpu_executor = scan_options_->use_threads ? internal::GetCpuThreadPool() : nullptr;
+  compute::ExecContext exec_context(scan_options_->pool, cpu_executor);
+
+  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make(&exec_context));
   // Drop projection since we only need to count rows
   auto options = std::make_shared<ScanOptions>(*scan_options_);
   RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
@@ -802,7 +797,7 @@ Result<int64_t> AsyncScanner::CountRows() {
 
   fragment_gen = MakeMappedGenerator(
       std::move(fragment_gen), [&](const std::shared_ptr<Fragment>& fragment) {
-        return fragment->CountRows(scan_options_->filter, scan_options_)
+        return fragment->CountRows(options->filter, options)
             .Then([&, fragment](util::optional<int64_t> fast_count) mutable
                   -> std::shared_ptr<Fragment> {
               if (fast_count) {
@@ -825,25 +820,21 @@ Result<int64_t> AsyncScanner::CountRows() {
       auto get_selection,
       compute::MakeProjectNode(scan, "get_selection", {options->filter}));
 
+  ARROW_ASSIGN_OR_RAISE(
+      auto sum_selection,
+      compute::MakeScalarAggregateNode(get_selection, "sum_selection",
+                                       {compute::internal::Aggregate{"sum", nullptr}}));
+
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
-      compute::MakeSinkNode(get_selection, "sink");
+      compute::MakeSinkNode(sum_selection, "sink");
 
   RETURN_NOT_OK(plan->StartProducing());
-
-  RETURN_NOT_OK(
-      VisitAsyncGenerator(std::move(sink_gen),
-                          [&](const util::optional<compute::ExecBatch>& batch) {
-                            // TODO replace with scalar aggregation node
-                            ARROW_ASSIGN_OR_RAISE(
-                                int64_t slow_count,
-                                GetSelectionSize(batch->values[0], batch->length));
-                            total += slow_count;
-                            return Status::OK();
-                          })
-          .status());
-
+  auto maybe_slow_count = sink_gen().result();
   plan->finished().Wait();
 
+  ARROW_ASSIGN_OR_RAISE(auto slow_count, maybe_slow_count);
+  total += slow_count->values[0].scalar_as<UInt64Scalar>().value;
+
   return total.load();
 }
 
@@ -1157,6 +1148,25 @@ Result<int64_t> SyncScanner::CountRows() {
 Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
                                         std::shared_ptr<Dataset> dataset,
                                         std::shared_ptr<ScanOptions> scan_options) {
+  if (scan_options->dataset_schema == nullptr) {
+    scan_options->dataset_schema = dataset->schema();
+  }
+
+  if (!scan_options->filter.IsBound()) {
+    ARROW_ASSIGN_OR_RAISE(scan_options->filter,
+                          scan_options->filter.Bind(*dataset->schema()));
+  }
+
+  if (!scan_options->projection.IsBound()) {
+    auto fields = dataset->schema()->fields();
+    for (const auto& aug_field : kAugmentedFields) {
+      fields.push_back(aug_field);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(scan_options->projection,
+                          scan_options->projection.Bind(Schema(std::move(fields))));
+  }
+
   // using a generator for speculative forward compatibility with async fragment discovery
   ARROW_ASSIGN_OR_RAISE(auto fragments_it, dataset->GetFragments(scan_options->filter));
   ARROW_ASSIGN_OR_RAISE(auto fragments_vec, fragments_it.ToVector());
@@ -1175,9 +1185,9 @@ Result<compute::ExecNode*> MakeAugmentedProjectNode(
     }
   }
 
-  for (auto aug_name : {"__fragment_index", "__batch_index", "__last_in_fragment"}) {
-    exprs.push_back(compute::field_ref(aug_name));
-    names.emplace_back(aug_name);
+  for (const auto& aug_field : kAugmentedFields) {
+    exprs.push_back(compute::field_ref(aug_field->name()));
+    names.push_back(aug_field->name());
   }
   return compute::MakeProjectNode(input, std::move(label), std::move(exprs),
                                   std::move(names));
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 5dc83c662de..de7f780183a 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1175,6 +1175,22 @@ DatasetAndBatches MakeBasicDataset() {
 
   return {dataset, batches};
 }
+
+compute::Expression Materialize(std::vector<std::string> names,
+                                bool include_aug_fields = false) {
+  if (include_aug_fields) {
+    for (auto aug_name : {"__fragment_index", "__batch_index", "__last_in_fragment"}) {
+      names.emplace_back(aug_name);
+    }
+  }
+
+  std::vector<compute::Expression> exprs;
+  for (const auto& name : names) {
+    exprs.push_back(field_ref(name));
+  }
+
+  return project(exprs, names);
+}
 }  // namespace
 
 TEST(ScanNode, Schema) {
@@ -1184,7 +1200,7 @@ TEST(ScanNode, Schema) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->dataset_schema = basic.dataset->schema();
+  options->projection = Materialize({});  // set an empty projection
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1192,6 +1208,8 @@ TEST(ScanNode, Schema) {
   fields.push_back(field("__fragment_index", int32()));
   fields.push_back(field("__batch_index", int32()));
   fields.push_back(field("__last_in_fragment", boolean()));
+  // output_schema is *always* the full augmented dataset schema, regardless of projection
+  // (but some columns *may* be placeholder null Scalars if not projected)
   AssertSchemaEqual(Schema(fields), *scan->output_schema());
 }
 
@@ -1202,7 +1220,8 @@ TEST(ScanNode, Trivial) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->dataset_schema = basic.dataset->schema();
+  // ensure all fields are materialized
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
   auto sink_gen = MakeSinkNode(scan, "sink");
@@ -1220,9 +1239,9 @@ TEST(ScanNode, FilteredOnVirtualColumn) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->dataset_schema = basic.dataset->schema();
-  ASSERT_OK_AND_ASSIGN(options->filter,
-                       less(field_ref("c"), literal(30)).Bind(*basic.dataset->schema()));
+  options->filter = less(field_ref("c"), literal(30));
+  // ensure all fields are materialized
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1245,10 +1264,9 @@ TEST(ScanNode, DeferredFilterOnPhysicalColumn) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->dataset_schema = basic.dataset->schema();
-  ASSERT_OK_AND_ASSIGN(
-      options->filter,
-      greater(field_ref("a"), literal(4)).Bind(*basic.dataset->schema()));
+  options->filter = greater(field_ref("a"), literal(4));
+  // ensure all fields are materialized
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1270,8 +1288,7 @@ TEST(ScanNode, DISABLED_ProjectionPushdown) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->dataset_schema = basic.dataset->schema();
-  ASSERT_OK(SetProjection(options.get(), {field_ref("b")}, {"b"}));
+  options->projection = Materialize({"b"}, /*include_aug_fields=*/true);
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
@@ -1298,16 +1315,14 @@ TEST(ScanNode, MaterializationOfVirtualColumn) {
 
   auto options = std::make_shared<ScanOptions>();
   options->use_async = true;
-  options->dataset_schema = basic.dataset->schema();
+  options->projection = Materialize({"a", "b", "c"}, /*include_aug_fields=*/true);
 
   ASSERT_OK_AND_ASSIGN(auto scan, MakeScanNode(plan.get(), basic.dataset, options));
 
   ASSERT_OK_AND_ASSIGN(
       auto project,
-      compute::MakeProjectNode(
-          scan, "project",
-          {field_ref("a"), field_ref("b"), field_ref("c"), field_ref("__fragment_index"),
-           field_ref("__batch_index"), field_ref("__last_in_fragment")}));
+      dataset::MakeAugmentedProjectNode(
+          scan, "project", {field_ref("a"), field_ref("b"), field_ref("c")}));
 
   auto sink_gen = MakeSinkNode(project, "sink");
 
@@ -1352,16 +1367,12 @@ TEST(ScanNode, MinimalEndToEnd) {
   auto options = std::make_shared<ScanOptions>();
   // sync scanning is not supported by ScanNode
   options->use_async = true;
-  // for now, we must replicate the dataset schema here
-  options->dataset_schema = dataset->schema();
   // specify the filter
   compute::Expression b_is_true = field_ref("b");
-  ASSERT_OK_AND_ASSIGN(b_is_true, b_is_true.Bind(*dataset->schema()));
   options->filter = b_is_true;
   // for now, specify the projection as the full project expression (eventually this can
   // just be a list of materialized field names)
   compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
-  ASSERT_OK_AND_ASSIGN(a_times_2, a_times_2.Bind(*dataset->schema()));
   options->projection =
       call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
 
@@ -1388,20 +1399,181 @@ TEST(ScanNode, MinimalEndToEnd) {
   std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
       schema({field("a * 2", int32())}), std::move(sink_gen), exec_context.memory_pool());
 
-  // start the ExecPlan then wait 1s for completion
+  // start the ExecPlan
   ASSERT_OK(plan->StartProducing());
-  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
 
   // collect sink_reader into a Table
   ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
 
+  // wait 1s for completion
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
   auto expected = TableFromJSON(schema({field("a * 2", int32())}), {
                                                                        R"([
                                                {"a * 2": 4},
                                                {"a * 2": null},
                                                {"a * 2": null}
                                           ])"});
+  AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
+}
+
+TEST(ScanNode, MinimalScalarAggEndToEnd) {
+  // NB: This test is here for didactic purposes
+
+  // Specify a MemoryPool and ThreadPool for the ExecPlan
+  compute::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
+
+  // A ScanNode is constructed from an ExecPlan (into which it is inserted),
+  // a Dataset (whose batches will be scanned), and ScanOptions (to specify a filter for
+  // predicate pushdown, a projection to skip materialization of unnecessary columns, ...)
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<compute::ExecPlan> plan,
+                       compute::ExecPlan::Make(&exec_context));
+
+  std::shared_ptr<Dataset> dataset = std::make_shared<InMemoryDataset>(
+      TableFromJSON(schema({field("a", int32()), field("b", boolean())}),
+                    {
+                        R"([{"a": 1,    "b": null},
+                            {"a": 2,    "b": true}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 3,    "b": false}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 4,    "b": false}])",
+                        R"([{"a": 5,    "b": null},
+                            {"a": 6,    "b": false},
+                            {"a": 7,    "b": false}])",
+                    }));
+
+  auto options = std::make_shared<ScanOptions>();
+  // sync scanning is not supported by ScanNode
+  options->use_async = true;
+  // specify the filter
+  compute::Expression b_is_true = field_ref("b");
+  options->filter = b_is_true;
+  // for now, specify the projection as the full project expression (eventually this can
+  // just be a list of materialized field names)
+  compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
+  options->projection =
+      call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
+
+  // construct the scan node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
+                       dataset::MakeScanNode(plan.get(), dataset, options));
+
+  // pipe the scan node into a filter node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * filter,
+                       compute::MakeFilterNode(scan, "filter", b_is_true));
+
+  // pipe the filter node into a project node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * project,
+                       compute::MakeProjectNode(filter, "project", {a_times_2}));
+
+  // pipe the projection into a scalar aggregate node
+  ASSERT_OK_AND_ASSIGN(
+      compute::ExecNode * sum,
+      compute::MakeScalarAggregateNode(project, "scalar_agg",
+                                       {compute::internal::Aggregate{"sum", nullptr}}));
+
+  // finally, pipe the project node into a sink node
+  auto sink_gen = compute::MakeSinkNode(sum, "sink");
+
+  // translate sink_gen (async) to sink_reader (sync)
+  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      schema({field("sum", int64())}), std::move(sink_gen), exec_context.memory_pool());
 
+  // start the ExecPlan
+  ASSERT_OK(plan->StartProducing());
+
+  // collect sink_reader into a Table
+  ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
+
+  // wait 1s for completion
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
+  auto expected = TableFromJSON(schema({field("sum", int64())}), {
+                                                                     R"([
+                                               {"sum": 4}
+                                          ])"});
+  AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
+}
+
+TEST(ScanNode, MinimalGroupedAggEndToEnd) {
+  // NB: This test is here for didactic purposes
+
+  // Specify a MemoryPool and ThreadPool for the ExecPlan
+  compute::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
+
+  // A ScanNode is constructed from an ExecPlan (into which it is inserted),
+  // a Dataset (whose batches will be scanned), and ScanOptions (to specify a filter for
+  // predicate pushdown, a projection to skip materialization of unnecessary columns, ...)
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<compute::ExecPlan> plan,
+                       compute::ExecPlan::Make(&exec_context));
+
+  std::shared_ptr<Dataset> dataset = std::make_shared<InMemoryDataset>(
+      TableFromJSON(schema({field("a", int32()), field("b", boolean())}),
+                    {
+                        R"([{"a": 1,    "b": null},
+                            {"a": 2,    "b": true}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 3,    "b": false}])",
+                        R"([{"a": null, "b": true},
+                            {"a": 4,    "b": false}])",
+                        R"([{"a": 5,    "b": null},
+                            {"a": 6,    "b": false},
+                            {"a": 7,    "b": false}])",
+                    }));
+
+  auto options = std::make_shared<ScanOptions>();
+  // sync scanning is not supported by ScanNode
+  options->use_async = true;
+  // specify the filter
+  compute::Expression b_is_true = field_ref("b");
+  options->filter = b_is_true;
+  // for now, specify the projection as the full project expression (eventually this can
+  // just be a list of materialized field names)
+  compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
+  compute::Expression b = field_ref("b");
+  options->projection =
+      call("make_struct", {a_times_2, b}, compute::MakeStructOptions{{"a * 2", "b"}});
+
+  // construct the scan node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
+                       dataset::MakeScanNode(plan.get(), dataset, options));
+
+  // pipe the scan node into a project node
+  ASSERT_OK_AND_ASSIGN(
+      compute::ExecNode * project,
+      compute::MakeProjectNode(scan, "project", {a_times_2, b}, {"a * 2", "b"}));
+
+  // pipe the projection into a grouped aggregate node
+  ASSERT_OK_AND_ASSIGN(compute::ExecNode * sum,
+                       compute::MakeGroupByNode(
+                           project, "grouped_agg", /*keys=*/{"b"}, /*targets=*/{"a * 2"},
+                           {compute::internal::Aggregate{"hash_sum", nullptr}}));
+
+  // finally, pipe the project node into a sink node
+  auto sink_gen = compute::MakeSinkNode(sum, "sink");
+
+  // translate sink_gen (async) to sink_reader (sync)
+  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      schema({field("hash_sum", int64()), field("b", boolean())}), std::move(sink_gen),
+      exec_context.memory_pool());
+
+  // start the ExecPlan
+  ASSERT_OK(plan->StartProducing());
+
+  // collect sink_reader into a Table
+  ASSERT_OK_AND_ASSIGN(auto collected, Table::FromRecordBatchReader(sink_reader.get()));
+
+  // wait 1s for completion
+  ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
+
+  auto expected =
+      TableFromJSON(schema({field("hash_sum", int64()), field("b", boolean())}), {
+                                                                                     R"([
+                                               {"hash_sum": 12, "b": null},
+                                               {"hash_sum": 4,  "b": true},
+                                               {"hash_sum": 40, "b": false}
+                                          ])"});
   AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
 }
 
diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc
index f288a15be3f..fc8022a95e4 100644
--- a/cpp/src/arrow/util/future.cc
+++ b/cpp/src/arrow/util/future.cc
@@ -245,7 +245,8 @@ class ConcreteFutureImpl : public FutureImpl {
     CallbackRecord callback_record{std::move(callback), opts};
     if (IsFutureFinished(state_)) {
       lock.unlock();
-      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/true);
+      RunOrScheduleCallback(shared_from_this(), std::move(callback_record),
+                            /*in_add_callback=*/true);
     } else {
       callbacks_.push_back(std::move(callback_record));
     }
@@ -263,8 +264,8 @@ class ConcreteFutureImpl : public FutureImpl {
     }
   }
 
-  bool ShouldScheduleCallback(const CallbackRecord& callback_record,
-                              bool in_add_callback) {
+  static bool ShouldScheduleCallback(const CallbackRecord& callback_record,
+                                     bool in_add_callback) {
     switch (callback_record.options.should_schedule) {
       case ShouldSchedule::Never:
         return false;
@@ -280,7 +281,9 @@ class ConcreteFutureImpl : public FutureImpl {
     }
   }
 
-  void RunOrScheduleCallback(CallbackRecord&& callback_record, bool in_add_callback) {
+  static void RunOrScheduleCallback(const std::shared_ptr<FutureImpl>& self,
+                                    CallbackRecord&& callback_record,
+                                    bool in_add_callback) {
     if (ShouldScheduleCallback(callback_record, in_add_callback)) {
       struct CallbackTask {
         void operator()() { std::move(callback)(*self); }
@@ -289,10 +292,10 @@ class ConcreteFutureImpl : public FutureImpl {
         std::shared_ptr<FutureImpl> self;
       };
       // Need to keep `this` alive until the callback has a chance to be scheduled.
-      CallbackTask task{std::move(callback_record.callback), shared_from_this()};
+      CallbackTask task{std::move(callback_record.callback), self};
       DCHECK_OK(callback_record.options.executor->Spawn(std::move(task)));
     } else {
-      std::move(callback_record.callback)(*this);
+      std::move(callback_record.callback)(*self);
     }
   }
 
@@ -311,16 +314,18 @@ class ConcreteFutureImpl : public FutureImpl {
     }
     cv_.notify_all();
 
+    auto callbacks = std::move(callbacks_);
+    auto self = shared_from_this();
+
     // run callbacks, lock not needed since the future is finished by this
     // point so nothing else can modify the callbacks list and it is safe
     // to iterate.
     //
     // In fact, it is important not to hold the locks because the callback
     // may be slow or do its own locking on other resources
-    for (auto& callback_record : callbacks_) {
-      RunOrScheduleCallback(std::move(callback_record), /*in_add_callback=*/false);
+    for (auto& callback_record : callbacks) {
+      RunOrScheduleCallback(self, std::move(callback_record), /*in_add_callback=*/false);
     }
-    callbacks_.clear();
   }
 
   void DoWait() {
diff --git a/cpp/src/arrow/util/unreachable.cc b/cpp/src/arrow/util/unreachable.cc
new file mode 100644
index 00000000000..4ffe3a8f787
--- /dev/null
+++ b/cpp/src/arrow/util/unreachable.cc
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/unreachable.h"
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+[[noreturn]] void Unreachable(const char* message) {
+  DCHECK(false) << message;
+  std::abort();
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/unreachable.h b/cpp/src/arrow/util/unreachable.h
new file mode 100644
index 00000000000..027f76e84d2
--- /dev/null
+++ b/cpp/src/arrow/util/unreachable.h
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+namespace arrow {
+
+[[noreturn]] void Unreachable(const char* message = "Unreachable");
+
+}  // namespace arrow

From 42e5726b543135f18db1560278e062b9a7941f5c Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Fri, 23 Jul 2021 11:16:22 -0500
Subject: [PATCH 630/719] ARROW-13433: [R] Remove CLI hack from Valgrind test

Closes #10779 from jonkeane/ARROW-13433

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 ci/scripts/r_valgrind.sh | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh
index 68c8dd57093..ae61d076655 100755
--- a/ci/scripts/r_valgrind.sh
+++ b/ci/scripts/r_valgrind.sh
@@ -29,11 +29,6 @@ pushd ${source_dir}/tests
 
 export TEST_R_WITH_ARROW=TRUE
 
-# Set cli to not use a separate thread. This thread isn't explicitly closed,
-# which triggers a valgrind possibly lost error. We can remove this when
-# https://github.com/r-lib/cli/issues/311 is resolved + released on cran.
-export CLI_NO_THREAD=1
-
 # to generate suppression files run:
 # ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
 ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out

From 09e6bb2cdcbf1aa24ecff72808b2749e9c623673 Mon Sep 17 00:00:00 2001
From: Tahsin Hassan <thassan@ah-thassan-mac.dhcp.mathworks.com>
Date: Sat, 24 Jul 2021 12:06:58 +0900
Subject: [PATCH 631/719] ARROW-13056: [MATLAB] Add a matlab label for dev Pull
 Requests

Create a "lang-matlab" label for Pull Requests

https://issues.apache.org/jira/browse/ARROW-13056
Email : thread
https://lists.apache.org/thread.html/rb348bb8b3c998ba081b68fa24ab86ceb542ecc938bced92b95516cc3%40%3Cdev.arrow.apache.org%3E

Closes #10621 from tahsinH/arrow_13056

Authored-by: Tahsin Hassan <thassan@ah-thassan-mac.dhcp.mathworks.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/dev_pr/labeler.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 235b5918902..8860d91f813 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -36,6 +36,9 @@ lang-js:
 lang-julia:
   - julia/**/*
 
+lang-matlab:
+  - matlab/**/*
+
 lang-python:
   - python/**/*
 

From 91b751b854b0aeea1c640372de4824ead9e4b1e1 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 23 Jul 2021 18:25:58 -1000
Subject: [PATCH 632/719] ARROW-13440: Added a basic mapping generator that
 does not queue incoming jobs.  This allows it to forward async-reentrant
 pressure to the source.  Fixed some issues in the CSV reader that were
 preventing it from running truly parallel.  Performance is now significantly
 better but still not quite the same as the threaded reader.  For the NY taxi
 dataset the streaming read time went from ~7 seconds to ~1.6 seconds. 
 However, the file reader is still at ~0.8 seconds.  I'll do more
 investigation later.

---
 cpp/src/arrow/csv/reader.cc                |  17 ++-
 cpp/src/arrow/testing/gtest_util.cc        |  27 +++-
 cpp/src/arrow/testing/gtest_util.h         |   3 +
 cpp/src/arrow/util/async_generator.h       |  90 ++++++++++--
 cpp/src/arrow/util/async_generator_test.cc | 157 ++++++++++++++++-----
 5 files changed, 235 insertions(+), 59 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 1a7836561da..1f8643ae6cb 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -428,7 +428,7 @@ class BlockParsingOperator {
     if (count_rows_) {
       num_rows_seen_ += parser->num_rows();
     }
-    RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    DCHECK(!block.consume_bytes);
     return ParsedBlock{std::move(parser), block.block_index,
                        static_cast<int64_t>(parsed_size) + block.bytes_skipped};
   }
@@ -892,12 +892,21 @@ class StreamingReaderImpl : public ReaderMixin,
         auto decoder_op,
         BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
 
-    auto block_gen = SerialBlockReader::MakeAsyncIterator(
+    auto block_gen = ThreadedBlockReader::MakeAsyncIterator(
         std::move(buffer_generator), MakeChunker(parse_options_), std::move(after_header),
         read_options_.skip_rows_after_names);
+    AsyncGenerator<CSVBlock> spawning_block_gen = [block_gen]() {
+      CallbackOptions callback_options;
+      callback_options.executor = internal::GetCpuThreadPool();
+      callback_options.should_schedule = ShouldSchedule::Always;
+      return block_gen().Then([](const CSVBlock& val) { return val; },
+                              [](const Status& err) -> Result<CSVBlock> { return err; },
+                              callback_options);
+    };
     auto parsed_block_gen =
-        MakeMappedGenerator(std::move(block_gen), std::move(parser_op));
-    auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op));
+        MakeMappedGenerator(std::move(spawning_block_gen), std::move(parser_op));
+    auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op),
+                                      /*queue_requests=*/false);
 
     auto self = shared_from_this();
     return rb_gen().Then([self, rb_gen, max_readahead](const DecodedBlock& first_block) {
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 30cc59800f4..9faa6aeb724 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -849,7 +849,9 @@ ExtensionTypeGuard::~ExtensionTypeGuard() {
 class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
  public:
   explicit Impl(double timeout_seconds)
-      : timeout_seconds_(timeout_seconds), status_(), unlocked_(false) {}
+      : timeout_seconds_(timeout_seconds), status_(), unlocked_(false) {
+    async_gate_ = Future<>::Make();
+  }
 
   ~Impl() {
     if (num_running_ != num_launched_) {
@@ -871,6 +873,15 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
     return [self] { self->RunTask(); };
   }
 
+  Future<> AsyncTask() {
+    std::unique_lock<std::mutex> lk(mx_);
+    num_launched_++;
+    num_running_++;
+    num_finished_++;
+    running_cv_.notify_all();
+    return async_gate_;
+  }
+
   void RunTask() {
     std::unique_lock<std::mutex> lk(mx_);
     num_running_++;
@@ -883,7 +894,6 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
                                  " seconds) waiting for the gating task to be unlocked");
     }
     num_finished_++;
-    finished_cv_.notify_all();
   }
 
   Status WaitForRunning(int count) {
@@ -897,9 +907,12 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
   }
 
   Status Unlock() {
-    std::lock_guard<std::mutex> lk(mx_);
-    unlocked_ = true;
-    unlocked_cv_.notify_all();
+    {
+      std::lock_guard<std::mutex> lk(mx_);
+      unlocked_ = true;
+      unlocked_cv_.notify_all();
+    }
+    async_gate_.MarkFinished();
     return status_;
   }
 
@@ -913,7 +926,7 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
   std::mutex mx_;
   std::condition_variable running_cv_;
   std::condition_variable unlocked_cv_;
-  std::condition_variable finished_cv_;
+  Future<> async_gate_;
 };
 
 GatingTask::GatingTask(double timeout_seconds) : impl_(new Impl(timeout_seconds)) {}
@@ -922,6 +935,8 @@ GatingTask::~GatingTask() {}
 
 std::function<void()> GatingTask::Task() { return impl_->Task(); }
 
+Future<> GatingTask::AsyncTask() { return impl_->AsyncTask(); }
+
 Status GatingTask::Unlock() { return impl_->Unlock(); }
 
 Status GatingTask::WaitForRunning(int count) { return impl_->WaitForRunning(count); }
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 591745151da..a37ed7fd008 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -603,6 +603,9 @@ class ARROW_TESTING_EXPORT GatingTask {
   ///
   /// Note: The GatingTask must outlive any Task instances
   std::function<void()> Task();
+  /// \brief Creates a future that will complete when this is unlocked or timed out.
+  /// The creation of this future increments the "running" count
+  Future<> AsyncTask();
   /// \brief Waits until at least count tasks are running.
   Status WaitForRunning(int count);
   /// \brief Unlocks all waiting tasks.  Returns an invalid status if any waiting task has
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 9d1021edff5..2f76f0eb8b8 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -141,10 +141,66 @@ Future<std::vector<T>> CollectAsyncGenerator(AsyncGenerator<T> generator) {
 }
 
 /// \see MakeMappedGenerator
+///
+/// Note: This version forwards async pressure on to the source
+template <typename T, typename V>
+class BasicMappingGenerator {
+ public:
+  BasicMappingGenerator(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
+      : state_(std::make_shared<State>(std::move(source), std::move(map))) {}
+
+  Future<V> operator()() {
+    if (state_->finished.load()) {
+      return IterationEnd<V>();
+    }
+    auto state = state_;
+    return state_->source().Then(Callback{state_},
+                                 [state](const Status& err) -> Result<V> {
+                                   state->finished.store(true);
+                                   return err;
+                                 });
+  }
+
+ private:
+  struct State {
+    State(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
+        : source(std::move(source)), map(std::move(map)) {}
+
+    std::atomic<bool> finished{false};
+    AsyncGenerator<T> source;
+    std::function<Future<V>(const T&)> map;
+  };
+
+  struct Callback {
+    Future<V> operator()(const T& maybe_next) {
+      if (IsIterationEnd(maybe_next) || state_->finished.load()) {
+        return IterationEnd<V>();
+      }
+      Future<V> mapped_fut = state_->map(maybe_next);
+      auto state = state_;
+      return mapped_fut.Then([](const V& result) { return result; },
+                             [state](const Status& err) -> Result<V> {
+                               state->finished.store(true);
+                               return err;
+                             });
+    }
+
+    std::shared_ptr<State> state_;
+  };
+
+  std::shared_ptr<State> state_;
+};
+
+/// \see MakeMappedGenerator
+///
+/// Note: This version will queue incoming requests to prevent source
+/// from being pulled async-reentrantly (even if this generator is pulled async
+/// reentrantly).  However, it will still run the map function in parallel.
 template <typename T, typename V>
-class MappingGenerator {
+class QueueingMappingGenerator {
  public:
-  MappingGenerator(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
+  QueueingMappingGenerator(AsyncGenerator<T> source,
+                           std::function<Future<V>(const T&)> map)
       : state_(std::make_shared<State>(std::move(source), std::move(map))) {}
 
   Future<V> operator()() {
@@ -257,6 +313,16 @@ class MappingGenerator {
 /// \brief Creates a generator that will apply the map function to each element of
 /// source.  The map function is not called on the end token.
 ///
+/// This generator is always async-reentrant.  If queue_requests is true then any
+/// reentrant requests will be queued and this generator will not pull from
+/// source_generator in an async-reentrant fashion.  If queue_requests is false then
+/// requests will be forwarded directly to the source_generator and source_generator
+/// will also be pulled in an async-reentrant fashion.
+///
+/// If this generator is pulled async-reentrantly then the map function should be
+/// thread safe (that is, it should be safe to be running map(response N) at the
+/// same time as map(response N-1)) regardless of the value of queue_requests
+///
 /// Note: This function makes a copy of `map` for each item
 /// Note: Errors returned from the `map` function will be propagated
 ///
@@ -264,14 +330,21 @@ class MappingGenerator {
 template <typename T, typename MapFn,
           typename Mapped = detail::result_of_t<MapFn(const T&)>,
           typename V = typename EnsureFuture<Mapped>::type::ValueType>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map) {
+AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map,
+                                      bool queue_requests = true) {
   struct MapCallback {
     MapFn map_;
 
     Future<V> operator()(const T& val) { return ToFuture(map_(val)); }
   };
 
-  return MappingGenerator<T, V>(std::move(source_generator), MapCallback{std::move(map)});
+  if (queue_requests) {
+    return QueueingMappingGenerator<T, V>(std::move(source_generator),
+                                          MapCallback{std::move(map)});
+  } else {
+    return BasicMappingGenerator<T, V>(std::move(source_generator),
+                                       MapCallback{std::move(map)});
+  }
 }
 
 /// \see MakeSequencingGenerator
@@ -735,9 +808,7 @@ class ReadaheadGenerator {
  private:
   struct State {
     State(AsyncGenerator<T> source_generator, int max_readahead)
-        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {
-      finished.store(false);
-    }
+        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {}
 
     void MarkFinishedIfDone(const T& next_result) {
       if (IsIterationEnd(next_result)) {
@@ -747,7 +818,7 @@ class ReadaheadGenerator {
 
     AsyncGenerator<T> source_generator;
     int max_readahead;
-    std::atomic<bool> finished;
+    std::atomic<bool> finished{false};
     std::queue<Future<T>> readahead_queue;
   };
 
@@ -975,7 +1046,6 @@ class MergedGenerator {
           waiting_jobs(),
           mutex(),
           first(true),
-          source_exhausted(false),
           finished(false),
           num_active_subscriptions(max_subscriptions) {}
 
@@ -995,7 +1065,6 @@ class MergedGenerator {
     std::deque<std::shared_ptr<Future<T>>> waiting_jobs;
     util::Mutex mutex;
     bool first;
-    bool source_exhausted;
     bool finished;
     int num_active_subscriptions;
   };
@@ -1041,7 +1110,6 @@ class MergedGenerator {
       {
         auto guard = state->mutex.Lock();
         if (!maybe_next.ok() || IsIterationEnd(*maybe_next)) {
-          state->source_exhausted = true;
           if (!maybe_next.ok() || --state->num_active_subscriptions == 0) {
             state->finished = true;
             should_purge = true;
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index ccacc380392..45f33c093e2 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -347,57 +347,69 @@ TEST(TestAsyncUtil, Collect) {
   ASSERT_EQ(expected, collected_val);
 }
 
-TEST(TestAsyncUtil, Map) {
-  std::vector<TestInt> input = {1, 2, 3};
-  auto generator = AsyncVectorIt(input);
-  std::function<TestStr(const TestInt&)> mapper = [](const TestInt& in) {
+class MapFixture : public GeneratorTestFixture {
+ public:
+  AsyncGenerator<TestInt> MakeSlowSource(std::atomic<uint64_t>* count,
+                                         std::shared_ptr<GatingTask> gating_task) {
+    // Slow source
+    return [count, gating_task]() -> Future<TestInt> {
+      int val = (*count)++;
+      return gating_task->AsyncTask().Then([val]() -> Result<TestInt> {
+        if (val < 2) {
+          return val;
+        }
+        return IterationEnd<TestInt>();
+      });
+    };
+  }
+};
+
+TEST_P(MapFixture, SyncMapFn) {
+  AsyncGenerator<TestInt> source = this->MakeSource({1, 2, 3});
+  std::function<TestStr(const TestInt&)> map_fn = [](const TestInt& in) {
     return std::to_string(in.value);
   };
-  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
+  AsyncGenerator<TestStr> mapped =
+      MakeMappedGenerator(std::move(source), std::move(map_fn));
   std::vector<TestStr> expected{"1", "2", "3"};
   AssertAsyncGeneratorMatch(expected, mapped);
 }
 
-TEST(TestAsyncUtil, MapAsync) {
-  std::vector<TestInt> input = {1, 2, 3};
-  auto generator = AsyncVectorIt(input);
-  std::function<Future<TestStr>(const TestInt&)> mapper = [](const TestInt& in) {
+TEST_P(MapFixture, AsyncMapFn) {
+  AsyncGenerator<TestInt> source = this->MakeSource({1, 2, 3});
+  std::function<Future<TestStr>(const TestInt&)> map_fn = [](const TestInt& in) {
     return SleepAsync(1e-3).Then([in]() { return TestStr(std::to_string(in.value)); });
   };
-  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
+  AsyncGenerator<TestStr> mapped =
+      MakeMappedGenerator(std::move(source), std::move(map_fn));
   std::vector<TestStr> expected{"1", "2", "3"};
   AssertAsyncGeneratorMatch(expected, mapped);
 }
 
-TEST(TestAsyncUtil, MapReentrant) {
-  std::vector<TestInt> input = {1, 2};
-  auto source = AsyncVectorIt(input);
+TEST_P(MapFixture, RunsAsyncFunctionInParallel) {
+  AsyncGenerator<TestInt> source = MakeSource({1, 2});
+  std::shared_ptr<GatingTask> gating_task = GatingTask::Make();
   TrackingGenerator<TestInt> tracker(std::move(source));
-  source = MakeTransferredGenerator(AsyncGenerator<TestInt>(tracker),
-                                    internal::GetCpuThreadPool());
+  source = AsyncGenerator<TestInt>(tracker);
 
-  std::atomic<int> map_tasks_running(0);
   // Mapper blocks until can_proceed is marked finished, should start multiple map tasks
-  Future<> can_proceed = Future<>::Make();
-  std::function<Future<TestStr>(const TestInt&)> mapper = [&](const TestInt& in) {
-    map_tasks_running.fetch_add(1);
-    return can_proceed.Then([in]() { return TestStr(std::to_string(in.value)); });
+  std::function<Future<TestStr>(const TestInt&)> map_fn = [&](const TestInt& in) {
+    return gating_task->AsyncTask().Then(
+        [in]() { return TestStr(std::to_string(in.value)); });
   };
-  auto mapped = MakeMappedGenerator(std::move(source), mapper);
+  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn));
 
   EXPECT_EQ(0, tracker.num_read());
 
   auto one = mapped();
   auto two = mapped();
 
-  BusyWait(10, [&] { return map_tasks_running.load() == 2; });
-  EXPECT_EQ(2, map_tasks_running.load());
-  EXPECT_EQ(2, tracker.num_read());
+  ASSERT_OK(gating_task->WaitForRunning(2));
 
   auto end_one = mapped();
   auto end_two = mapped();
 
-  can_proceed.MarkFinished();
+  ASSERT_OK(gating_task->Unlock());
   ASSERT_FINISHES_OK_AND_ASSIGN(auto oneval, one);
   EXPECT_EQ("1", oneval.value);
   ASSERT_FINISHES_OK_AND_ASSIGN(auto twoval, two);
@@ -408,12 +420,64 @@ TEST(TestAsyncUtil, MapReentrant) {
   ASSERT_EQ(IterationTraits<TestStr>::End(), end);
 }
 
-TEST(TestAsyncUtil, MapParallelStress) {
+TEST_P(MapFixture, BlocksReentrantPressureByDefault) {
+  std::shared_ptr<GatingTask> gating_task = GatingTask::Make();
+  std::atomic<uint64_t> num_pulled_from_source(0);
+  AsyncGenerator<TestInt> source = MakeSlowSource(&num_pulled_from_source, gating_task);
+  std::function<TestStr(const TestInt&)> map_fn = [](const TestInt& in) {
+    return std::to_string(in.value);
+  };
+  auto guard = ExpectNotAccessedReentrantly(&source);
+  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn));
+
+  auto zero = mapped();
+  auto one = mapped();
+  auto end = mapped();
+
+  // Even though we have pulled from the mapped generator twice it should not pull
+  // reentrantly from source and so should only have pulled 1 item.
+  ASSERT_EQ(1, num_pulled_from_source);
+
+  ASSERT_OK(gating_task->Unlock());
+
+  ASSERT_FINISHES_OK_AND_EQ(TestStr("0"), zero);
+  ASSERT_FINISHES_OK_AND_EQ(TestStr("1"), one);
+  ASSERT_FINISHES_OK_AND_EQ(IterationEnd<TestStr>(), end);
+  AssertGeneratorExhausted(mapped);
+}
+
+TEST_P(MapFixture, CanOptionallyForwardReentrantPressure) {
+  std::shared_ptr<GatingTask> gating_task = GatingTask::Make();
+  std::atomic<uint64_t> num_pulled_from_source(0);
+  AsyncGenerator<TestInt> source = MakeSlowSource(&num_pulled_from_source, gating_task);
+  std::function<TestStr(const TestInt&)> map_fn = [](const TestInt& in) {
+    return std::to_string(in.value);
+  };
+  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn), false);
+
+  auto zero = mapped();
+  auto one = mapped();
+  auto end = mapped();
+
+  ASSERT_EQ(3, num_pulled_from_source);
+
+  AssertNotFinished(zero);
+  AssertNotFinished(one);
+  AssertNotFinished(end);
+
+  ASSERT_OK(gating_task->Unlock());
+
+  ASSERT_FINISHES_OK_AND_EQ(TestStr("0"), zero);
+  ASSERT_FINISHES_OK_AND_EQ(TestStr("1"), one);
+  ASSERT_FINISHES_OK_AND_EQ(IterationEnd<TestStr>(), end);
+  AssertGeneratorExhausted(mapped);
+}
+
+TEST_P(MapFixture, QueueingMapStress) {
   constexpr int NTASKS = 10;
   constexpr int NITEMS = 10;
   for (int i = 0; i < NTASKS; i++) {
-    auto gen = MakeVectorGenerator(RangeVector(NITEMS));
-    gen = SlowdownABit(std::move(gen));
+    auto gen = MakeSource(RangeVector(NITEMS));
     auto guard = ExpectNotAccessedReentrantly(&gen);
     std::function<TestStr(const TestInt&)> mapper = [](const TestInt& in) {
       SleepABit();
@@ -426,28 +490,43 @@ TEST(TestAsyncUtil, MapParallelStress) {
   }
 }
 
-TEST(TestAsyncUtil, MapTaskFail) {
-  std::vector<TestInt> input = {1, 2, 3};
-  auto generator = AsyncVectorIt(input);
-  std::function<Result<TestStr>(const TestInt&)> mapper =
+TEST_P(MapFixture, BasicMapStress) {
+  constexpr int NTASKS = 10;
+  constexpr int NITEMS = 10;
+  for (int i = 0; i < NTASKS; i++) {
+    auto gen = MakeSource(RangeVector(NITEMS));
+    std::function<TestStr(const TestInt&)> mapper = [](const TestInt& in) {
+      SleepABit();
+      return std::to_string(in.value);
+    };
+    auto mapped = MakeMappedGenerator(std::move(gen), mapper, false);
+    mapped = MakeReadaheadGenerator(mapped, 8);
+    ASSERT_FINISHES_OK_AND_ASSIGN(auto collected, CollectAsyncGenerator(mapped));
+    ASSERT_EQ(NITEMS, collected.size());
+  }
+}
+
+TEST_P(MapFixture, MapTaskFail) {
+  auto source = MakeSource({1, 2, 3});
+  std::function<Result<TestStr>(const TestInt&)> map_fn =
       [](const TestInt& in) -> Result<TestStr> {
     if (in.value == 2) {
       return Status::Invalid("XYZ");
     }
     return TestStr(std::to_string(in.value));
   };
-  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
+  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn));
   ASSERT_FINISHES_AND_RAISES(Invalid, CollectAsyncGenerator(mapped));
 }
 
-TEST(TestAsyncUtil, MapSourceFail) {
-  std::vector<TestInt> input = {1, 2, 3};
-  auto generator = FailsAt(AsyncVectorIt(input), 1);
-  std::function<Result<TestStr>(const TestInt&)> mapper =
+TEST_P(MapFixture, MapSourceFail) {
+  auto source = MakeSource({1, 2, 3});
+  auto failing_source = FailsAt(std::move(source), 1);
+  std::function<Result<TestStr>(const TestInt&)> map_fn =
       [](const TestInt& in) -> Result<TestStr> {
     return TestStr(std::to_string(in.value));
   };
-  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
+  auto mapped = MakeMappedGenerator(std::move(failing_source), std::move(map_fn));
   ASSERT_FINISHES_AND_RAISES(Invalid, CollectAsyncGenerator(mapped));
 }
 
@@ -461,6 +540,8 @@ TEST(TestAsyncUtil, Concatenated) {
   AssertAsyncGeneratorMatch(expected, concat);
 }
 
+INSTANTIATE_TEST_SUITE_P(MapTests, MapFixture, ::testing::Values(false, true));
+
 class FromFutureFixture : public GeneratorTestFixture {};
 
 TEST_P(FromFutureFixture, Basic) {

From 03533fe89d6d29276ad3705a9f1fb56435d2c62c Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 23 Jul 2021 19:02:34 -1000
Subject: [PATCH 633/719] Revert "ARROW-13440: Added a basic mapping generator
 that does not queue incoming jobs.  This allows it to forward async-reentrant
 pressure to the source.  Fixed some issues in the CSV reader that were
 preventing it from running truly parallel.  Performance is now significantly
 better but still not quite the same as the threaded reader.  For the NY taxi
 dataset the streaming read time went from ~7 seconds to ~1.6 seconds. 
 However, the file reader is still at ~0.8 seconds.  I'll do more
 investigation later."

This reverts commit 91b751b854b0aeea1c640372de4824ead9e4b1e1.
---
 cpp/src/arrow/csv/reader.cc                |  17 +--
 cpp/src/arrow/testing/gtest_util.cc        |  27 +---
 cpp/src/arrow/testing/gtest_util.h         |   3 -
 cpp/src/arrow/util/async_generator.h       |  90 ++----------
 cpp/src/arrow/util/async_generator_test.cc | 157 +++++----------------
 5 files changed, 59 insertions(+), 235 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 1f8643ae6cb..1a7836561da 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -428,7 +428,7 @@ class BlockParsingOperator {
     if (count_rows_) {
       num_rows_seen_ += parser->num_rows();
     }
-    DCHECK(!block.consume_bytes);
+    RETURN_NOT_OK(block.consume_bytes(parsed_size));
     return ParsedBlock{std::move(parser), block.block_index,
                        static_cast<int64_t>(parsed_size) + block.bytes_skipped};
   }
@@ -892,21 +892,12 @@ class StreamingReaderImpl : public ReaderMixin,
         auto decoder_op,
         BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
 
-    auto block_gen = ThreadedBlockReader::MakeAsyncIterator(
+    auto block_gen = SerialBlockReader::MakeAsyncIterator(
         std::move(buffer_generator), MakeChunker(parse_options_), std::move(after_header),
         read_options_.skip_rows_after_names);
-    AsyncGenerator<CSVBlock> spawning_block_gen = [block_gen]() {
-      CallbackOptions callback_options;
-      callback_options.executor = internal::GetCpuThreadPool();
-      callback_options.should_schedule = ShouldSchedule::Always;
-      return block_gen().Then([](const CSVBlock& val) { return val; },
-                              [](const Status& err) -> Result<CSVBlock> { return err; },
-                              callback_options);
-    };
     auto parsed_block_gen =
-        MakeMappedGenerator(std::move(spawning_block_gen), std::move(parser_op));
-    auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op),
-                                      /*queue_requests=*/false);
+        MakeMappedGenerator(std::move(block_gen), std::move(parser_op));
+    auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op));
 
     auto self = shared_from_this();
     return rb_gen().Then([self, rb_gen, max_readahead](const DecodedBlock& first_block) {
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 9faa6aeb724..30cc59800f4 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -849,9 +849,7 @@ ExtensionTypeGuard::~ExtensionTypeGuard() {
 class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
  public:
   explicit Impl(double timeout_seconds)
-      : timeout_seconds_(timeout_seconds), status_(), unlocked_(false) {
-    async_gate_ = Future<>::Make();
-  }
+      : timeout_seconds_(timeout_seconds), status_(), unlocked_(false) {}
 
   ~Impl() {
     if (num_running_ != num_launched_) {
@@ -873,15 +871,6 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
     return [self] { self->RunTask(); };
   }
 
-  Future<> AsyncTask() {
-    std::unique_lock<std::mutex> lk(mx_);
-    num_launched_++;
-    num_running_++;
-    num_finished_++;
-    running_cv_.notify_all();
-    return async_gate_;
-  }
-
   void RunTask() {
     std::unique_lock<std::mutex> lk(mx_);
     num_running_++;
@@ -894,6 +883,7 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
                                  " seconds) waiting for the gating task to be unlocked");
     }
     num_finished_++;
+    finished_cv_.notify_all();
   }
 
   Status WaitForRunning(int count) {
@@ -907,12 +897,9 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
   }
 
   Status Unlock() {
-    {
-      std::lock_guard<std::mutex> lk(mx_);
-      unlocked_ = true;
-      unlocked_cv_.notify_all();
-    }
-    async_gate_.MarkFinished();
+    std::lock_guard<std::mutex> lk(mx_);
+    unlocked_ = true;
+    unlocked_cv_.notify_all();
     return status_;
   }
 
@@ -926,7 +913,7 @@ class GatingTask::Impl : public std::enable_shared_from_this<GatingTask::Impl> {
   std::mutex mx_;
   std::condition_variable running_cv_;
   std::condition_variable unlocked_cv_;
-  Future<> async_gate_;
+  std::condition_variable finished_cv_;
 };
 
 GatingTask::GatingTask(double timeout_seconds) : impl_(new Impl(timeout_seconds)) {}
@@ -935,8 +922,6 @@ GatingTask::~GatingTask() {}
 
 std::function<void()> GatingTask::Task() { return impl_->Task(); }
 
-Future<> GatingTask::AsyncTask() { return impl_->AsyncTask(); }
-
 Status GatingTask::Unlock() { return impl_->Unlock(); }
 
 Status GatingTask::WaitForRunning(int count) { return impl_->WaitForRunning(count); }
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index a37ed7fd008..591745151da 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -603,9 +603,6 @@ class ARROW_TESTING_EXPORT GatingTask {
   ///
   /// Note: The GatingTask must outlive any Task instances
   std::function<void()> Task();
-  /// \brief Creates a future that will complete when this is unlocked or timed out.
-  /// The creation of this future increments the "running" count
-  Future<> AsyncTask();
   /// \brief Waits until at least count tasks are running.
   Status WaitForRunning(int count);
   /// \brief Unlocks all waiting tasks.  Returns an invalid status if any waiting task has
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index 2f76f0eb8b8..9d1021edff5 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -141,66 +141,10 @@ Future<std::vector<T>> CollectAsyncGenerator(AsyncGenerator<T> generator) {
 }
 
 /// \see MakeMappedGenerator
-///
-/// Note: This version forwards async pressure on to the source
-template <typename T, typename V>
-class BasicMappingGenerator {
- public:
-  BasicMappingGenerator(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
-      : state_(std::make_shared<State>(std::move(source), std::move(map))) {}
-
-  Future<V> operator()() {
-    if (state_->finished.load()) {
-      return IterationEnd<V>();
-    }
-    auto state = state_;
-    return state_->source().Then(Callback{state_},
-                                 [state](const Status& err) -> Result<V> {
-                                   state->finished.store(true);
-                                   return err;
-                                 });
-  }
-
- private:
-  struct State {
-    State(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
-        : source(std::move(source)), map(std::move(map)) {}
-
-    std::atomic<bool> finished{false};
-    AsyncGenerator<T> source;
-    std::function<Future<V>(const T&)> map;
-  };
-
-  struct Callback {
-    Future<V> operator()(const T& maybe_next) {
-      if (IsIterationEnd(maybe_next) || state_->finished.load()) {
-        return IterationEnd<V>();
-      }
-      Future<V> mapped_fut = state_->map(maybe_next);
-      auto state = state_;
-      return mapped_fut.Then([](const V& result) { return result; },
-                             [state](const Status& err) -> Result<V> {
-                               state->finished.store(true);
-                               return err;
-                             });
-    }
-
-    std::shared_ptr<State> state_;
-  };
-
-  std::shared_ptr<State> state_;
-};
-
-/// \see MakeMappedGenerator
-///
-/// Note: This version will queue incoming requests to prevent source
-/// from being pulled async-reentrantly (even if this generator is pulled async
-/// reentrantly).  However, it will still run the map function in parallel.
 template <typename T, typename V>
-class QueueingMappingGenerator {
+class MappingGenerator {
  public:
-  QueueingMappingGenerator(AsyncGenerator<T> source,
-                           std::function<Future<V>(const T&)> map)
+  MappingGenerator(AsyncGenerator<T> source, std::function<Future<V>(const T&)> map)
       : state_(std::make_shared<State>(std::move(source), std::move(map))) {}
 
   Future<V> operator()() {
@@ -313,16 +257,6 @@ class QueueingMappingGenerator {
 /// \brief Creates a generator that will apply the map function to each element of
 /// source.  The map function is not called on the end token.
 ///
-/// This generator is always async-reentrant.  If queue_requests is true then any
-/// reentrant requests will be queued and this generator will not pull from
-/// source_generator in an async-reentrant fashion.  If queue_requests is false then
-/// requests will be forwarded directly to the source_generator and source_generator
-/// will also be pulled in an async-reentrant fashion.
-///
-/// If this generator is pulled async-reentrantly then the map function should be
-/// thread safe (that is, it should be safe to be running map(response N) at the
-/// same time as map(response N-1)) regardless of the value of queue_requests
-///
 /// Note: This function makes a copy of `map` for each item
 /// Note: Errors returned from the `map` function will be propagated
 ///
@@ -330,21 +264,14 @@ class QueueingMappingGenerator {
 template <typename T, typename MapFn,
           typename Mapped = detail::result_of_t<MapFn(const T&)>,
           typename V = typename EnsureFuture<Mapped>::type::ValueType>
-AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map,
-                                      bool queue_requests = true) {
+AsyncGenerator<V> MakeMappedGenerator(AsyncGenerator<T> source_generator, MapFn map) {
   struct MapCallback {
     MapFn map_;
 
     Future<V> operator()(const T& val) { return ToFuture(map_(val)); }
   };
 
-  if (queue_requests) {
-    return QueueingMappingGenerator<T, V>(std::move(source_generator),
-                                          MapCallback{std::move(map)});
-  } else {
-    return BasicMappingGenerator<T, V>(std::move(source_generator),
-                                       MapCallback{std::move(map)});
-  }
+  return MappingGenerator<T, V>(std::move(source_generator), MapCallback{std::move(map)});
 }
 
 /// \see MakeSequencingGenerator
@@ -808,7 +735,9 @@ class ReadaheadGenerator {
  private:
   struct State {
     State(AsyncGenerator<T> source_generator, int max_readahead)
-        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {}
+        : source_generator(std::move(source_generator)), max_readahead(max_readahead) {
+      finished.store(false);
+    }
 
     void MarkFinishedIfDone(const T& next_result) {
       if (IsIterationEnd(next_result)) {
@@ -818,7 +747,7 @@ class ReadaheadGenerator {
 
     AsyncGenerator<T> source_generator;
     int max_readahead;
-    std::atomic<bool> finished{false};
+    std::atomic<bool> finished;
     std::queue<Future<T>> readahead_queue;
   };
 
@@ -1046,6 +975,7 @@ class MergedGenerator {
           waiting_jobs(),
           mutex(),
           first(true),
+          source_exhausted(false),
           finished(false),
           num_active_subscriptions(max_subscriptions) {}
 
@@ -1065,6 +995,7 @@ class MergedGenerator {
     std::deque<std::shared_ptr<Future<T>>> waiting_jobs;
     util::Mutex mutex;
     bool first;
+    bool source_exhausted;
     bool finished;
     int num_active_subscriptions;
   };
@@ -1110,6 +1041,7 @@ class MergedGenerator {
       {
         auto guard = state->mutex.Lock();
         if (!maybe_next.ok() || IsIterationEnd(*maybe_next)) {
+          state->source_exhausted = true;
           if (!maybe_next.ok() || --state->num_active_subscriptions == 0) {
             state->finished = true;
             should_purge = true;
diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc
index 45f33c093e2..ccacc380392 100644
--- a/cpp/src/arrow/util/async_generator_test.cc
+++ b/cpp/src/arrow/util/async_generator_test.cc
@@ -347,69 +347,57 @@ TEST(TestAsyncUtil, Collect) {
   ASSERT_EQ(expected, collected_val);
 }
 
-class MapFixture : public GeneratorTestFixture {
- public:
-  AsyncGenerator<TestInt> MakeSlowSource(std::atomic<uint64_t>* count,
-                                         std::shared_ptr<GatingTask> gating_task) {
-    // Slow source
-    return [count, gating_task]() -> Future<TestInt> {
-      int val = (*count)++;
-      return gating_task->AsyncTask().Then([val]() -> Result<TestInt> {
-        if (val < 2) {
-          return val;
-        }
-        return IterationEnd<TestInt>();
-      });
-    };
-  }
-};
-
-TEST_P(MapFixture, SyncMapFn) {
-  AsyncGenerator<TestInt> source = this->MakeSource({1, 2, 3});
-  std::function<TestStr(const TestInt&)> map_fn = [](const TestInt& in) {
+TEST(TestAsyncUtil, Map) {
+  std::vector<TestInt> input = {1, 2, 3};
+  auto generator = AsyncVectorIt(input);
+  std::function<TestStr(const TestInt&)> mapper = [](const TestInt& in) {
     return std::to_string(in.value);
   };
-  AsyncGenerator<TestStr> mapped =
-      MakeMappedGenerator(std::move(source), std::move(map_fn));
+  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
   std::vector<TestStr> expected{"1", "2", "3"};
   AssertAsyncGeneratorMatch(expected, mapped);
 }
 
-TEST_P(MapFixture, AsyncMapFn) {
-  AsyncGenerator<TestInt> source = this->MakeSource({1, 2, 3});
-  std::function<Future<TestStr>(const TestInt&)> map_fn = [](const TestInt& in) {
+TEST(TestAsyncUtil, MapAsync) {
+  std::vector<TestInt> input = {1, 2, 3};
+  auto generator = AsyncVectorIt(input);
+  std::function<Future<TestStr>(const TestInt&)> mapper = [](const TestInt& in) {
     return SleepAsync(1e-3).Then([in]() { return TestStr(std::to_string(in.value)); });
   };
-  AsyncGenerator<TestStr> mapped =
-      MakeMappedGenerator(std::move(source), std::move(map_fn));
+  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
   std::vector<TestStr> expected{"1", "2", "3"};
   AssertAsyncGeneratorMatch(expected, mapped);
 }
 
-TEST_P(MapFixture, RunsAsyncFunctionInParallel) {
-  AsyncGenerator<TestInt> source = MakeSource({1, 2});
-  std::shared_ptr<GatingTask> gating_task = GatingTask::Make();
+TEST(TestAsyncUtil, MapReentrant) {
+  std::vector<TestInt> input = {1, 2};
+  auto source = AsyncVectorIt(input);
   TrackingGenerator<TestInt> tracker(std::move(source));
-  source = AsyncGenerator<TestInt>(tracker);
+  source = MakeTransferredGenerator(AsyncGenerator<TestInt>(tracker),
+                                    internal::GetCpuThreadPool());
 
+  std::atomic<int> map_tasks_running(0);
   // Mapper blocks until can_proceed is marked finished, should start multiple map tasks
-  std::function<Future<TestStr>(const TestInt&)> map_fn = [&](const TestInt& in) {
-    return gating_task->AsyncTask().Then(
-        [in]() { return TestStr(std::to_string(in.value)); });
+  Future<> can_proceed = Future<>::Make();
+  std::function<Future<TestStr>(const TestInt&)> mapper = [&](const TestInt& in) {
+    map_tasks_running.fetch_add(1);
+    return can_proceed.Then([in]() { return TestStr(std::to_string(in.value)); });
   };
-  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn));
+  auto mapped = MakeMappedGenerator(std::move(source), mapper);
 
   EXPECT_EQ(0, tracker.num_read());
 
   auto one = mapped();
   auto two = mapped();
 
-  ASSERT_OK(gating_task->WaitForRunning(2));
+  BusyWait(10, [&] { return map_tasks_running.load() == 2; });
+  EXPECT_EQ(2, map_tasks_running.load());
+  EXPECT_EQ(2, tracker.num_read());
 
   auto end_one = mapped();
   auto end_two = mapped();
 
-  ASSERT_OK(gating_task->Unlock());
+  can_proceed.MarkFinished();
   ASSERT_FINISHES_OK_AND_ASSIGN(auto oneval, one);
   EXPECT_EQ("1", oneval.value);
   ASSERT_FINISHES_OK_AND_ASSIGN(auto twoval, two);
@@ -420,64 +408,12 @@ TEST_P(MapFixture, RunsAsyncFunctionInParallel) {
   ASSERT_EQ(IterationTraits<TestStr>::End(), end);
 }
 
-TEST_P(MapFixture, BlocksReentrantPressureByDefault) {
-  std::shared_ptr<GatingTask> gating_task = GatingTask::Make();
-  std::atomic<uint64_t> num_pulled_from_source(0);
-  AsyncGenerator<TestInt> source = MakeSlowSource(&num_pulled_from_source, gating_task);
-  std::function<TestStr(const TestInt&)> map_fn = [](const TestInt& in) {
-    return std::to_string(in.value);
-  };
-  auto guard = ExpectNotAccessedReentrantly(&source);
-  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn));
-
-  auto zero = mapped();
-  auto one = mapped();
-  auto end = mapped();
-
-  // Even though we have pulled from the mapped generator twice it should not pull
-  // reentrantly from source and so should only have pulled 1 item.
-  ASSERT_EQ(1, num_pulled_from_source);
-
-  ASSERT_OK(gating_task->Unlock());
-
-  ASSERT_FINISHES_OK_AND_EQ(TestStr("0"), zero);
-  ASSERT_FINISHES_OK_AND_EQ(TestStr("1"), one);
-  ASSERT_FINISHES_OK_AND_EQ(IterationEnd<TestStr>(), end);
-  AssertGeneratorExhausted(mapped);
-}
-
-TEST_P(MapFixture, CanOptionallyForwardReentrantPressure) {
-  std::shared_ptr<GatingTask> gating_task = GatingTask::Make();
-  std::atomic<uint64_t> num_pulled_from_source(0);
-  AsyncGenerator<TestInt> source = MakeSlowSource(&num_pulled_from_source, gating_task);
-  std::function<TestStr(const TestInt&)> map_fn = [](const TestInt& in) {
-    return std::to_string(in.value);
-  };
-  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn), false);
-
-  auto zero = mapped();
-  auto one = mapped();
-  auto end = mapped();
-
-  ASSERT_EQ(3, num_pulled_from_source);
-
-  AssertNotFinished(zero);
-  AssertNotFinished(one);
-  AssertNotFinished(end);
-
-  ASSERT_OK(gating_task->Unlock());
-
-  ASSERT_FINISHES_OK_AND_EQ(TestStr("0"), zero);
-  ASSERT_FINISHES_OK_AND_EQ(TestStr("1"), one);
-  ASSERT_FINISHES_OK_AND_EQ(IterationEnd<TestStr>(), end);
-  AssertGeneratorExhausted(mapped);
-}
-
-TEST_P(MapFixture, QueueingMapStress) {
+TEST(TestAsyncUtil, MapParallelStress) {
   constexpr int NTASKS = 10;
   constexpr int NITEMS = 10;
   for (int i = 0; i < NTASKS; i++) {
-    auto gen = MakeSource(RangeVector(NITEMS));
+    auto gen = MakeVectorGenerator(RangeVector(NITEMS));
+    gen = SlowdownABit(std::move(gen));
     auto guard = ExpectNotAccessedReentrantly(&gen);
     std::function<TestStr(const TestInt&)> mapper = [](const TestInt& in) {
       SleepABit();
@@ -490,43 +426,28 @@ TEST_P(MapFixture, QueueingMapStress) {
   }
 }
 
-TEST_P(MapFixture, BasicMapStress) {
-  constexpr int NTASKS = 10;
-  constexpr int NITEMS = 10;
-  for (int i = 0; i < NTASKS; i++) {
-    auto gen = MakeSource(RangeVector(NITEMS));
-    std::function<TestStr(const TestInt&)> mapper = [](const TestInt& in) {
-      SleepABit();
-      return std::to_string(in.value);
-    };
-    auto mapped = MakeMappedGenerator(std::move(gen), mapper, false);
-    mapped = MakeReadaheadGenerator(mapped, 8);
-    ASSERT_FINISHES_OK_AND_ASSIGN(auto collected, CollectAsyncGenerator(mapped));
-    ASSERT_EQ(NITEMS, collected.size());
-  }
-}
-
-TEST_P(MapFixture, MapTaskFail) {
-  auto source = MakeSource({1, 2, 3});
-  std::function<Result<TestStr>(const TestInt&)> map_fn =
+TEST(TestAsyncUtil, MapTaskFail) {
+  std::vector<TestInt> input = {1, 2, 3};
+  auto generator = AsyncVectorIt(input);
+  std::function<Result<TestStr>(const TestInt&)> mapper =
       [](const TestInt& in) -> Result<TestStr> {
     if (in.value == 2) {
       return Status::Invalid("XYZ");
     }
     return TestStr(std::to_string(in.value));
   };
-  auto mapped = MakeMappedGenerator(std::move(source), std::move(map_fn));
+  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
   ASSERT_FINISHES_AND_RAISES(Invalid, CollectAsyncGenerator(mapped));
 }
 
-TEST_P(MapFixture, MapSourceFail) {
-  auto source = MakeSource({1, 2, 3});
-  auto failing_source = FailsAt(std::move(source), 1);
-  std::function<Result<TestStr>(const TestInt&)> map_fn =
+TEST(TestAsyncUtil, MapSourceFail) {
+  std::vector<TestInt> input = {1, 2, 3};
+  auto generator = FailsAt(AsyncVectorIt(input), 1);
+  std::function<Result<TestStr>(const TestInt&)> mapper =
       [](const TestInt& in) -> Result<TestStr> {
     return TestStr(std::to_string(in.value));
   };
-  auto mapped = MakeMappedGenerator(std::move(failing_source), std::move(map_fn));
+  auto mapped = MakeMappedGenerator(std::move(generator), mapper);
   ASSERT_FINISHES_AND_RAISES(Invalid, CollectAsyncGenerator(mapped));
 }
 
@@ -540,8 +461,6 @@ TEST(TestAsyncUtil, Concatenated) {
   AssertAsyncGeneratorMatch(expected, concat);
 }
 
-INSTANTIATE_TEST_SUITE_P(MapTests, MapFixture, ::testing::Values(false, true));
-
 class FromFutureFixture : public GeneratorTestFixture {};
 
 TEST_P(FromFutureFixture, Basic) {

From 2d921dc8620a7edb13b953b75647473eadcd4f1c Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Sat, 24 Jul 2021 09:17:19 -0500
Subject: [PATCH 634/719] ARROW-13434: [R] group_by() with an unnammed
 expression

Closes #10785 from jonkeane/ARROW-13434-groupby-expr

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/NAMESPACE                            |  2 ++
 r/R/arrow-package.R                    |  4 ++--
 r/R/dplyr-group-by.R                   | 17 +++++++++++++----
 r/tests/testthat/test-dplyr-group-by.R |  8 +++++++-
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 814868d8ade..b0f4b0bba75 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -289,6 +289,7 @@ importFrom(bit64,print.integer64)
 importFrom(bit64,str.integer64)
 importFrom(methods,as)
 importFrom(purrr,as_mapper)
+importFrom(purrr,imap_chr)
 importFrom(purrr,keep)
 importFrom(purrr,map)
 importFrom(purrr,map2)
@@ -322,6 +323,7 @@ importFrom(rlang,new_data_mask)
 importFrom(rlang,new_environment)
 importFrom(rlang,quo_get_expr)
 importFrom(rlang,quo_is_null)
+importFrom(rlang,quo_name)
 importFrom(rlang,quo_set_expr)
 importFrom(rlang,quos)
 importFrom(rlang,seq2)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index d2bf81cf5ee..3ebd68776bb 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -17,9 +17,9 @@
 
 #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
-#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character quo_name
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index d2cf79253a5..c426a66b218 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -24,11 +24,20 @@ group_by.arrow_dplyr_query <- function(.data,
                                        add = .add,
                                        .drop = dplyr::group_by_drop_default(.data)) {
   .data <- arrow_dplyr_query(.data)
-  # ... can contain expressions (i.e. can add (or rename?) columns)
-  # Check for those (they show up as named expressions)
   new_groups <- enquos(...)
-  new_groups <- new_groups[nzchar(names(new_groups))]
+  # ... can contain expressions (i.e. can add (or rename?) columns) and so we
+  # need to identify those and add them on to the query with mutate. Specifically,
+  # we want to mark as new:
+  #   * expressions (named or otherwise)
+  #   * variables that have new names
+  # All others (i.e. simple references to variables) should not be (re)-added
+  new_group_ind <- map_lgl(new_groups, ~!(quo_name(.x) %in% names(.data)))
+  named_group_ind <- map_lgl(names(new_groups), nzchar)
+  new_groups <- new_groups[new_group_ind | named_group_ind]
   if (length(new_groups)) {
+    # now either use the name that was given in ... or if that is "" then use the expr
+    names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x), .y))
+
     # Add them to the data
     .data <- dplyr::mutate(.data, !!!new_groups)
   }
@@ -62,4 +71,4 @@ ungroup.arrow_dplyr_query <- function(x, ...) {
   x$drop_empty_groups <- NULL
   x
 }
-ungroup.Dataset <- ungroup.ArrowTabular <- force
\ No newline at end of file
+ungroup.Dataset <- ungroup.ArrowTabular <- force
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index 8583c2f9024..fe0394bc636 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -33,7 +33,7 @@ test_that("group_by groupings are recorded", {
   )
 })
 
-test_that("group_by doesn't yet support creating/renaming", {
+test_that("group_by supports creating/renaming", {
   expect_dplyr_equal(
     input %>%
       group_by(chr, numbers = int) %>%
@@ -46,6 +46,12 @@ test_that("group_by doesn't yet support creating/renaming", {
       collect(),
     tbl
   )
+  expect_dplyr_equal(
+    input %>%
+      group_by(int > 4, lgl, foo = int > 5) %>%
+      collect(),
+    tbl
+  )
 })
 
 test_that("ungroup", {

From 73f73d9bc12412b47dc18db2b6cab2beff8a6d1b Mon Sep 17 00:00:00 2001
From: Matthew Topol <mtopol@factset.com>
Date: Sun, 25 Jul 2021 11:48:50 -0600
Subject: [PATCH 635/719] ARROW-13430: [Go] fix handling of zero value for
 FromBigInt

@kszucs @emkornfield @sbinet

Closes #10796 from zeroshade/fix-decimal

Authored-by: Matthew Topol <mtopol@factset.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
---
 go/arrow/decimal128/decimal128.go      | 9 +++++++--
 go/arrow/decimal128/decimal128_test.go | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
index a0921ce1007..b14b81df878 100644
--- a/go/arrow/decimal128/decimal128.go
+++ b/go/arrow/decimal128/decimal128.go
@@ -61,8 +61,13 @@ func FromI64(v int64) Num {
 // FromBigInt will convert a big.Int to a Num, if the value in v has a
 // BitLen > 128, this will panic.
 func FromBigInt(v *big.Int) (n Num) {
-	if v.BitLen() > 128 {
+	bitlen := v.BitLen()
+	if bitlen > 128 {
 		panic("arrow/decimal128: cannot represent value larger than 128bits")
+	} else if bitlen == 0 {
+		// if bitlen is 0, then the value is 0 so return the default zeroed
+		// out n
+		return
 	}
 
 	// if the value is negative, then get the high and low bytes from
@@ -73,7 +78,7 @@ func FromBigInt(v *big.Int) (n Num) {
 	b := v.Bits()
 	n.lo = uint64(b[0])
 	if len(b) > 1 {
-		n.hi = int64(v.Bits()[1])
+		n.hi = int64(b[1])
 	}
 	if v.Sign() < 0 {
 		return n.negated()
diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
index 5a4fa8ab6a4..dfdf5371a76 100644
--- a/go/arrow/decimal128/decimal128_test.go
+++ b/go/arrow/decimal128/decimal128_test.go
@@ -134,6 +134,7 @@ func TestDecimalToBigInt(t *testing.T) {
 	}{
 		{-18446744073709552, 7083549724304524577, "-340282366920938463463374607431711455"},
 		{1, 4611686018427387904, "23058430092136939520"},
+		{0, 0, "0"},
 	}
 	for _, tc := range tests {
 		t.Run("", func(t *testing.T) {

From 76ca3c919fe0aa604713717a201ff6ab6a99037c Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Mon, 26 Jul 2021 11:14:23 +0200
Subject: [PATCH 636/719] ARROW-13420: [JS] Update dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10762 from domoritz/dom/updates

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 js/package.json                             |   24 +-
 js/test/unit/generated-data-validators.ts   |    2 +-
 js/test/unit/vector/numeric-vector-tests.ts |   56 +-
 js/yarn.lock                                | 1433 +++++++++----------
 4 files changed, 693 insertions(+), 822 deletions(-)

diff --git a/js/package.json b/js/package.json
index cf983b7aa51..45e6487b0d8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -51,8 +51,8 @@
   ],
   "dependencies": {
     "@types/flatbuffers": "^1.10.0",
-    "@types/node": "^15.6.1",
-    "command-line-args": "5.1.1",
+    "@types/node": "^16.4.0",
+    "command-line-args": "5.1.3",
     "command-line-usage": "6.1.1",
     "flatbuffers": "1.12.0",
     "json-bignum": "^0.0.3",
@@ -60,19 +60,19 @@
     "tslib": "^2.3.0"
   },
   "devDependencies": {
-    "@openpgp/web-stream-tools": "0.0.5",
-    "@types/glob": "7.1.3",
-    "@types/jest": "26.0.23",
+    "@openpgp/web-stream-tools": "0.0.6",
+    "@types/glob": "7.1.4",
+    "@types/jest": "26.0.24",
     "@types/randomatic": "3.1.2",
-    "@typescript-eslint/eslint-plugin": "4.25.0",
-    "@typescript-eslint/parser": "4.25.0",
+    "@typescript-eslint/eslint-plugin": "4.28.4",
+    "@typescript-eslint/parser": "4.28.4",
     "async-done": "1.3.2",
     "benny": "3.6.15",
     "cpy": "8.1.2",
     "cross-env": "7.0.3",
-    "del-cli": "4.0.0",
-    "eslint": "7.27.0",
-    "eslint-plugin-jest": "24.3.6",
+    "del-cli": "4.0.1",
+    "eslint": "7.31.0",
+    "eslint-plugin-jest": "24.3.7",
     "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
     "glob": "7.1.7",
     "google-closure-compiler": "20210601.0.0",
@@ -92,8 +92,8 @@
     "randomatic": "3.1.1",
     "rxjs": "7.2.0",
     "ts-jest": "27.0.3",
-    "ts-node": "10.0.0",
-    "typedoc": "0.20.36",
+    "ts-node": "10.1.0",
+    "typedoc": "0.21.4",
     "typescript": "4.0.2",
     "web-streams-polyfill": "3.0.3",
     "xml2js": "0.4.23"
diff --git a/js/test/unit/generated-data-validators.ts b/js/test/unit/generated-data-validators.ts
index 647932b415b..910386d4a0e 100644
--- a/js/test/unit/generated-data-validators.ts
+++ b/js/test/unit/generated-data-validators.ts
@@ -33,7 +33,7 @@ function deferTest(description: string, run: (...args: any[]) => any) {
 
 function deferDescribe(description: string, tests: DeferredTest | DeferredTest[]) {
     const t = (Array.isArray(tests) ? tests : [tests]).filter(Boolean);
-    return { description, tests: t, run: () => describe(description, () => t.forEach((x) => x.run())) };
+    return { description, tests: t, run: () => describe(description, () => { t.forEach((x) => x.run()); } ) };
 }
 
 export function validateTable({ keys, rows, cols, rowBatches, colBatches, keyBatches, table }: GeneratedTable) {
diff --git a/js/test/unit/vector/numeric-vector-tests.ts b/js/test/unit/vector/numeric-vector-tests.ts
index 41564716878..61418c431f3 100644
--- a/js/test/unit/vector/numeric-vector-tests.ts
+++ b/js/test/unit/vector/numeric-vector-tests.ts
@@ -361,21 +361,25 @@ function testIntVector<T extends Int>(DataType: new () => T, values?: Array<any>
     combos.forEach(([chunksType, vector]) => {
         describe(chunksType, () => {
             // test base case no slicing
-            describe(`base case no slicing`, () => testAndValidateVector(vector, typed, jsArray));
+            describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); });
             // test slicing without args
-            describe(`slicing without args`, () => testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()));
+            describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); });
             // test slicing the middle half
-            describe(`slice the middle half`, () => testAndValidateVector(
-                vector.slice(vectorBegin, vectorEnd),
-                typed.slice(typedBegin, typedEnd),
-                jsArray.slice(jsArrayBegin, jsArrayEnd)
-            ));
+            describe(`slice the middle half`, () => {
+                testAndValidateVector(
+                    vector.slice(vectorBegin, vectorEnd),
+                    typed.slice(typedBegin, typedEnd),
+                    jsArray.slice(jsArrayBegin, jsArrayEnd)
+                );
+            });
             // test splicing out the middle half
-            describe(`splicing out the middle half`, () => testAndValidateVector(
-                vector.slice(0, vectorBegin).concat(vector.slice(vectorEnd)),
-                new ArrayType([...typed.slice(0, typedBegin), ...typed.slice(typedEnd)]),
-                [...jsArray.slice(0, jsArrayBegin), ...jsArray.slice(jsArrayEnd)]
-            ));
+            describe(`splicing out the middle half`, () => {
+                testAndValidateVector(
+                    vector.slice(0, vectorBegin).concat(vector.slice(vectorEnd)),
+                    new ArrayType([...typed.slice(0, typedBegin), ...typed.slice(typedEnd)]),
+                    [...jsArray.slice(0, jsArrayBegin), ...jsArray.slice(jsArrayEnd)]
+                );
+            });
         });
     });
 }
@@ -399,21 +403,25 @@ function testFloatVector<T extends Float>(DataType: new () => T, values?: Array<
     combos.forEach(([chunksType, vector]) => {
         describe(chunksType, () => {
             // test base case no slicing
-            describe(`base case no slicing`, () => testAndValidateVector(vector, typed, jsArray));
+            describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); });
             // test slicing without args
-            describe(`slicing without args`, () => testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()));
+            describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); });
             // test slicing the middle half
-            describe(`slice the middle half`, () => testAndValidateVector(
-                vector.slice(begin, end),
-                typed.slice(begin, end),
-                jsArray.slice(begin, end)
-            ));
+            describe(`slice the middle half`, () => {
+                    testAndValidateVector(
+                    vector.slice(begin, end),
+                    typed.slice(begin, end),
+                    jsArray.slice(begin, end)
+                );
+            });
             // test splicing out the middle half
-            describe(`splicing out the middle half`, () => testAndValidateVector(
-                vector.slice(0, begin).concat(vector.slice(end)),
-                new ArrayType([...typed.slice(0, begin), ...typed.slice(end)]),
-                [...jsArray.slice(0, begin), ...jsArray.slice(end)]
-            ));
+            describe(`splicing out the middle half`, () => {
+                testAndValidateVector(
+                    vector.slice(0, begin).concat(vector.slice(end)),
+                    new ArrayType([...typed.slice(0, begin), ...typed.slice(end)]),
+                    [...jsArray.slice(0, begin), ...jsArray.slice(end)]
+                );
+            });
         });
     });
 }
diff --git a/js/yarn.lock b/js/yarn.lock
index fd24b340139..3c77aeed030 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -43,32 +43,32 @@
   dependencies:
     "@babel/highlight" "^7.10.4"
 
-"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.12.13.tgz#dcfc826beef65e75c50e21d3837d7d95798dd658"
-  integrity sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==
+"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.14.5.tgz#23b08d740e83f49c5e59945fbf1b43e80bbf4edb"
+  integrity sha512-9pzDqyc6OLDaqe+zbACgFkb6fKMNG6CObKpnYXChRsvYGyEdc7CA2BaqeOM+vOtCS5ndmJicPJhKAwYRI6UfFw==
   dependencies:
-    "@babel/highlight" "^7.12.13"
+    "@babel/highlight" "^7.14.5"
 
-"@babel/compat-data@^7.14.4":
-  version "7.14.4"
-  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.14.4.tgz#45720fe0cecf3fd42019e1d12cc3d27fadc98d58"
-  integrity sha512-i2wXrWQNkH6JplJQGn3Rd2I4Pij8GdHkXwHMxm+zV5YG/Jci+bCNrWZEWC4o+umiDkRrRs4dVzH3X4GP7vyjQQ==
+"@babel/compat-data@^7.14.5":
+  version "7.14.7"
+  resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.14.7.tgz#7b047d7a3a89a67d2258dc61f604f098f1bc7e08"
+  integrity sha512-nS6dZaISCXJ3+518CWiBfEr//gHyMO02uDxBkXTKZDN5POruCnOZ1N4YBRZDCabwF8nZMWBpRxIicmXtBs+fvw==
 
 "@babel/core@^7.1.0", "@babel/core@^7.7.2", "@babel/core@^7.7.5":
-  version "7.14.3"
-  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.14.3.tgz#5395e30405f0776067fbd9cf0884f15bfb770a38"
-  integrity sha512-jB5AmTKOCSJIZ72sd78ECEhuPiDMKlQdDI/4QRI6lzYATx5SSogS1oQA2AoPecRCknm30gHi2l+QVvNUu3wZAg==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.14.3"
-    "@babel/helper-compilation-targets" "^7.13.16"
-    "@babel/helper-module-transforms" "^7.14.2"
-    "@babel/helpers" "^7.14.0"
-    "@babel/parser" "^7.14.3"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.14.2"
-    "@babel/types" "^7.14.2"
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.14.8.tgz#20cdf7c84b5d86d83fac8710a8bc605a7ba3f010"
+  integrity sha512-/AtaeEhT6ErpDhInbXmjHcUQXH0L0TEgscfcxk1qbOvLuKCa5aZT0SOOtDKFY96/CLROwbLSKyFor6idgNaU4Q==
+  dependencies:
+    "@babel/code-frame" "^7.14.5"
+    "@babel/generator" "^7.14.8"
+    "@babel/helper-compilation-targets" "^7.14.5"
+    "@babel/helper-module-transforms" "^7.14.8"
+    "@babel/helpers" "^7.14.8"
+    "@babel/parser" "^7.14.8"
+    "@babel/template" "^7.14.5"
+    "@babel/traverse" "^7.14.8"
+    "@babel/types" "^7.14.8"
     convert-source-map "^1.7.0"
     debug "^4.1.0"
     gensync "^1.0.0-beta.2"
@@ -76,137 +76,144 @@
     semver "^6.3.0"
     source-map "^0.5.0"
 
-"@babel/generator@^7.14.2", "@babel/generator@^7.14.3", "@babel/generator@^7.7.2":
-  version "7.14.3"
-  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.14.3.tgz#0c2652d91f7bddab7cccc6ba8157e4f40dcedb91"
-  integrity sha512-bn0S6flG/j0xtQdz3hsjJ624h3W0r3llttBMfyHX3YrZ/KtLYr15bjA0FXkgW7FpvrDuTuElXeVjiKlYRpnOFA==
+"@babel/generator@^7.14.8", "@babel/generator@^7.7.2":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.14.8.tgz#bf86fd6af96cf3b74395a8ca409515f89423e070"
+  integrity sha512-cYDUpvIzhBVnMzRoY1fkSEhK/HmwEVwlyULYgn/tMQYd6Obag3ylCjONle3gdErfXBW61SVTlR9QR7uWlgeIkg==
   dependencies:
-    "@babel/types" "^7.14.2"
+    "@babel/types" "^7.14.8"
     jsesc "^2.5.1"
     source-map "^0.5.0"
 
-"@babel/helper-compilation-targets@^7.13.16":
-  version "7.14.4"
-  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.14.4.tgz#33ebd0ffc34248051ee2089350a929ab02f2a516"
-  integrity sha512-JgdzOYZ/qGaKTVkn5qEDV/SXAh8KcyUVkCoSWGN8T3bwrgd6m+/dJa2kVGi6RJYJgEYPBdZ84BZp9dUjNWkBaA==
+"@babel/helper-compilation-targets@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.14.5.tgz#7a99c5d0967911e972fe2c3411f7d5b498498ecf"
+  integrity sha512-v+QtZqXEiOnpO6EYvlImB6zCD2Lel06RzOPzmkz/D/XgQiUu3C/Jb1LOqSt/AIA34TYi/Q+KlT8vTQrgdxkbLw==
   dependencies:
-    "@babel/compat-data" "^7.14.4"
-    "@babel/helper-validator-option" "^7.12.17"
+    "@babel/compat-data" "^7.14.5"
+    "@babel/helper-validator-option" "^7.14.5"
     browserslist "^4.16.6"
     semver "^6.3.0"
 
-"@babel/helper-function-name@^7.14.2":
-  version "7.14.2"
-  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.14.2.tgz#397688b590760b6ef7725b5f0860c82427ebaac2"
-  integrity sha512-NYZlkZRydxw+YT56IlhIcS8PAhb+FEUiOzuhFTfqDyPmzAhRge6ua0dQYT/Uh0t/EDHq05/i+e5M2d4XvjgarQ==
-  dependencies:
-    "@babel/helper-get-function-arity" "^7.12.13"
-    "@babel/template" "^7.12.13"
-    "@babel/types" "^7.14.2"
-
-"@babel/helper-get-function-arity@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz#bc63451d403a3b3082b97e1d8b3fe5bd4091e583"
-  integrity sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-member-expression-to-functions@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.13.12.tgz#dfe368f26d426a07299d8d6513821768216e6d72"
-  integrity sha512-48ql1CLL59aKbU94Y88Xgb2VFy7a95ykGRbJJaaVv+LX5U8wFpLfiGXJJGUozsmA1oEh/o5Bp60Voq7ACyA/Sw==
-  dependencies:
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-module-imports@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.13.12.tgz#c6a369a6f3621cb25da014078684da9196b61977"
-  integrity sha512-4cVvR2/1B693IuOvSI20xqqa/+bl7lqAMR59R4iu39R9aOX8/JoYY1sFaNvUMyMBGnHdwvJgUrzNLoUZxXypxA==
-  dependencies:
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-module-transforms@^7.14.2":
-  version "7.14.2"
-  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.14.2.tgz#ac1cc30ee47b945e3e0c4db12fa0c5389509dfe5"
-  integrity sha512-OznJUda/soKXv0XhpvzGWDnml4Qnwp16GN+D/kZIdLsWoHj05kyu8Rm5kXmMef+rVJZ0+4pSGLkeixdqNUATDA==
-  dependencies:
-    "@babel/helper-module-imports" "^7.13.12"
-    "@babel/helper-replace-supers" "^7.13.12"
-    "@babel/helper-simple-access" "^7.13.12"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/helper-validator-identifier" "^7.14.0"
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.14.2"
-    "@babel/types" "^7.14.2"
-
-"@babel/helper-optimise-call-expression@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz#5c02d171b4c8615b1e7163f888c1c81c30a2aaea"
-  integrity sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.8.0":
-  version "7.13.0"
-  resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.13.0.tgz#806526ce125aed03373bc416a828321e3a6a33af"
-  integrity sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==
-
-"@babel/helper-replace-supers@^7.13.12":
-  version "7.14.4"
-  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.14.4.tgz#b2ab16875deecfff3ddfcd539bc315f72998d836"
-  integrity sha512-zZ7uHCWlxfEAAOVDYQpEf/uyi1dmeC7fX4nCf2iz9drnCwi1zvwXL3HwWWNXUQEJ1k23yVn3VbddiI9iJEXaTQ==
-  dependencies:
-    "@babel/helper-member-expression-to-functions" "^7.13.12"
-    "@babel/helper-optimise-call-expression" "^7.12.13"
-    "@babel/traverse" "^7.14.2"
-    "@babel/types" "^7.14.4"
-
-"@babel/helper-simple-access@^7.13.12":
-  version "7.13.12"
-  resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.13.12.tgz#dd6c538afb61819d205a012c31792a39c7a5eaf6"
-  integrity sha512-7FEjbrx5SL9cWvXioDbnlYTppcZGuCY6ow3/D5vMggb2Ywgu4dMrpTJX0JdQAIcRRUElOIxF3yEooa9gUb9ZbA==
-  dependencies:
-    "@babel/types" "^7.13.12"
-
-"@babel/helper-split-export-declaration@^7.12.13":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz#e9430be00baf3e88b0e13e6f9d4eaf2136372b05"
-  integrity sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==
-  dependencies:
-    "@babel/types" "^7.12.13"
-
-"@babel/helper-validator-identifier@^7.14.0":
-  version "7.14.0"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz#d26cad8a47c65286b15df1547319a5d0bcf27288"
-  integrity sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==
-
-"@babel/helper-validator-option@^7.12.17":
-  version "7.12.17"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz#d1fbf012e1a79b7eebbfdc6d270baaf8d9eb9831"
-  integrity sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==
-
-"@babel/helpers@^7.14.0":
-  version "7.14.0"
-  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.14.0.tgz#ea9b6be9478a13d6f961dbb5f36bf75e2f3b8f62"
-  integrity sha512-+ufuXprtQ1D1iZTO/K9+EBRn+qPWMJjZSw/S0KlFrxCw4tkrzv9grgpDHkY9MeQTjTY8i2sp7Jep8DfU6tN9Mg==
-  dependencies:
-    "@babel/template" "^7.12.13"
-    "@babel/traverse" "^7.14.0"
-    "@babel/types" "^7.14.0"
-
-"@babel/highlight@^7.10.4", "@babel/highlight@^7.12.13":
-  version "7.14.0"
-  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.14.0.tgz#3197e375711ef6bf834e67d0daec88e4f46113cf"
-  integrity sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==
-  dependencies:
-    "@babel/helper-validator-identifier" "^7.14.0"
+"@babel/helper-function-name@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.14.5.tgz#89e2c474972f15d8e233b52ee8c480e2cfcd50c4"
+  integrity sha512-Gjna0AsXWfFvrAuX+VKcN/aNNWonizBj39yGwUzVDVTlMYJMK2Wp6xdpy72mfArFq5uK+NOuexfzZlzI1z9+AQ==
+  dependencies:
+    "@babel/helper-get-function-arity" "^7.14.5"
+    "@babel/template" "^7.14.5"
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-get-function-arity@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-get-function-arity/-/helper-get-function-arity-7.14.5.tgz#25fbfa579b0937eee1f3b805ece4ce398c431815"
+  integrity sha512-I1Db4Shst5lewOM4V+ZKJzQ0JGGaZ6VY1jYvMghRjqs6DWgxLCIyFt30GlnKkfUeFLpJt2vzbMVEXVSXlIFYUg==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-hoist-variables@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.14.5.tgz#e0dd27c33a78e577d7c8884916a3e7ef1f7c7f8d"
+  integrity sha512-R1PXiz31Uc0Vxy4OEOm07x0oSjKAdPPCh3tPivn/Eo8cvz6gveAeuyUUPB21Hoiif0uoPQSSdhIPS3352nvdyQ==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-member-expression-to-functions@^7.14.5":
+  version "7.14.7"
+  resolved "https://registry.yarnpkg.com/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.14.7.tgz#97e56244beb94211fe277bd818e3a329c66f7970"
+  integrity sha512-TMUt4xKxJn6ccjcOW7c4hlwyJArizskAhoSTOCkA0uZ+KghIaci0Qg9R043kUMWI9mtQfgny+NQ5QATnZ+paaA==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-module-imports@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.14.5.tgz#6d1a44df6a38c957aa7c312da076429f11b422f3"
+  integrity sha512-SwrNHu5QWS84XlHwGYPDtCxcA0hrSlL2yhWYLgeOc0w7ccOl2qv4s/nARI0aYZW+bSwAL5CukeXA47B/1NKcnQ==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-module-transforms@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.14.8.tgz#d4279f7e3fd5f4d5d342d833af36d4dd87d7dc49"
+  integrity sha512-RyE+NFOjXn5A9YU1dkpeBaduagTlZ0+fccnIcAGbv1KGUlReBj7utF7oEth8IdIBQPcux0DDgW5MFBH2xu9KcA==
+  dependencies:
+    "@babel/helper-module-imports" "^7.14.5"
+    "@babel/helper-replace-supers" "^7.14.5"
+    "@babel/helper-simple-access" "^7.14.8"
+    "@babel/helper-split-export-declaration" "^7.14.5"
+    "@babel/helper-validator-identifier" "^7.14.8"
+    "@babel/template" "^7.14.5"
+    "@babel/traverse" "^7.14.8"
+    "@babel/types" "^7.14.8"
+
+"@babel/helper-optimise-call-expression@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.14.5.tgz#f27395a8619e0665b3f0364cddb41c25d71b499c"
+  integrity sha512-IqiLIrODUOdnPU9/F8ib1Fx2ohlgDhxnIDU7OEVi+kAbEZcyiF7BLU8W6PfvPi9LzztjS7kcbzbmL7oG8kD6VA==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.14.5", "@babel/helper-plugin-utils@^7.8.0":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.14.5.tgz#5ac822ce97eec46741ab70a517971e443a70c5a9"
+  integrity sha512-/37qQCE3K0vvZKwoK4XU/irIJQdIfCJuhU5eKnNxpFDsOkgFaUAwbv+RYw6eYgsC0E4hS7r5KqGULUogqui0fQ==
+
+"@babel/helper-replace-supers@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-replace-supers/-/helper-replace-supers-7.14.5.tgz#0ecc0b03c41cd567b4024ea016134c28414abb94"
+  integrity sha512-3i1Qe9/8x/hCHINujn+iuHy+mMRLoc77b2nI9TB0zjH1hvn9qGlXjWlggdwUcju36PkPCy/lpM7LLUdcTyH4Ow==
+  dependencies:
+    "@babel/helper-member-expression-to-functions" "^7.14.5"
+    "@babel/helper-optimise-call-expression" "^7.14.5"
+    "@babel/traverse" "^7.14.5"
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-simple-access@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.14.8.tgz#82e1fec0644a7e775c74d305f212c39f8fe73924"
+  integrity sha512-TrFN4RHh9gnWEU+s7JloIho2T76GPwRHhdzOWLqTrMnlas8T9O7ec+oEDNsRXndOmru9ymH9DFrEOxpzPoSbdg==
+  dependencies:
+    "@babel/types" "^7.14.8"
+
+"@babel/helper-split-export-declaration@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.14.5.tgz#22b23a54ef51c2b7605d851930c1976dd0bc693a"
+  integrity sha512-hprxVPu6e5Kdp2puZUmvOGjaLv9TCe58E/Fl6hRq4YiVQxIcNvuq6uTM2r1mT/oPskuS9CgR+I94sqAYv0NGKA==
+  dependencies:
+    "@babel/types" "^7.14.5"
+
+"@babel/helper-validator-identifier@^7.14.5", "@babel/helper-validator-identifier@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.8.tgz#32be33a756f29e278a0d644fa08a2c9e0f88a34c"
+  integrity sha512-ZGy6/XQjllhYQrNw/3zfWRwZCTVSiBLZ9DHVZxn9n2gip/7ab8mv2TWlKPIBk26RwedCBoWdjLmn+t9na2Gcow==
+
+"@babel/helper-validator-option@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.14.5.tgz#6e72a1fff18d5dfcb878e1e62f1a021c4b72d5a3"
+  integrity sha512-OX8D5eeX4XwcroVW45NMvoYaIuFI+GQpA2a8Gi+X/U/cDUIRsV37qQfF905F0htTRCREQIB4KqPeaveRJUl3Ow==
+
+"@babel/helpers@^7.14.8":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.14.8.tgz#839f88f463025886cff7f85a35297007e2da1b77"
+  integrity sha512-ZRDmI56pnV+p1dH6d+UN6GINGz7Krps3+270qqI9UJ4wxYThfAIcI5i7j5vXC4FJ3Wap+S9qcebxeYiqn87DZw==
+  dependencies:
+    "@babel/template" "^7.14.5"
+    "@babel/traverse" "^7.14.8"
+    "@babel/types" "^7.14.8"
+
+"@babel/highlight@^7.10.4", "@babel/highlight@^7.14.5":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.14.5.tgz#6861a52f03966405001f6aa534a01a24d99e8cd9"
+  integrity sha512-qf9u2WFWVV0MppaL877j2dBtQIDgmidgjGk5VIMw3OadXvYaXn66U1BFlH2t4+t3i+8PhedppRv+i40ABzd+gg==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.14.5"
     chalk "^2.0.0"
     js-tokens "^4.0.0"
 
-"@babel/parser@^7.1.0", "@babel/parser@^7.12.13", "@babel/parser@^7.14.2", "@babel/parser@^7.14.3", "@babel/parser@^7.7.2":
-  version "7.14.4"
-  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.14.4.tgz#a5c560d6db6cd8e6ed342368dea8039232cbab18"
-  integrity sha512-ArliyUsWDUqEGfWcmzpGUzNfLxTdTp6WU4IuP6QFSp9gGfWS6boxFCkJSJ/L4+RG8z/FnIU3WxCk6hPL9SSWeA==
+"@babel/parser@^7.1.0", "@babel/parser@^7.14.5", "@babel/parser@^7.14.8", "@babel/parser@^7.7.2":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.14.8.tgz#66fd41666b2d7b840bd5ace7f7416d5ac60208d4"
+  integrity sha512-syoCQFOoo/fzkWDeM0dLEZi5xqurb5vuyzwIMNZRNun+N/9A4cUZeQaE7dTrB8jGaKuJRBtEOajtnmw0I5hvvA==
 
 "@babel/plugin-syntax-async-generators@^7.8.4":
   version "7.8.4"
@@ -286,48 +293,49 @@
     "@babel/helper-plugin-utils" "^7.8.0"
 
 "@babel/plugin-syntax-top-level-await@^7.8.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.12.13.tgz#c5f0fa6e249f5b739727f923540cf7a806130178"
-  integrity sha512-A81F9pDwyS7yM//KwbCSDqy3Uj4NMIurtplxphWxoYtNPov7cJsDkAFNNyVlIZ3jwGycVsurZ+LtOA8gZ376iQ==
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz#c1cfdadc35a646240001f06138247b741c34d94c"
+  integrity sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==
   dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
+    "@babel/helper-plugin-utils" "^7.14.5"
 
 "@babel/plugin-syntax-typescript@^7.7.2":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.12.13.tgz#9dff111ca64154cef0f4dc52cf843d9f12ce4474"
-  integrity sha512-cHP3u1JiUiG2LFDKbXnwVad81GvfyIOmCD6HIEId6ojrY0Drfy2q1jw7BwN7dE84+kTnBjLkXoL3IEy/3JPu2w==
-  dependencies:
-    "@babel/helper-plugin-utils" "^7.12.13"
-
-"@babel/template@^7.12.13", "@babel/template@^7.3.3":
-  version "7.12.13"
-  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.12.13.tgz#530265be8a2589dbb37523844c5bcb55947fb327"
-  integrity sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/parser" "^7.12.13"
-    "@babel/types" "^7.12.13"
-
-"@babel/traverse@^7.1.0", "@babel/traverse@^7.14.0", "@babel/traverse@^7.14.2", "@babel/traverse@^7.7.2":
-  version "7.14.2"
-  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.14.2.tgz#9201a8d912723a831c2679c7ebbf2fe1416d765b"
-  integrity sha512-TsdRgvBFHMyHOOzcP9S6QU0QQtjxlRpEYOy3mcCO5RgmC305ki42aSAmfZEMSSYBla2oZ9BMqYlncBaKmD/7iA==
-  dependencies:
-    "@babel/code-frame" "^7.12.13"
-    "@babel/generator" "^7.14.2"
-    "@babel/helper-function-name" "^7.14.2"
-    "@babel/helper-split-export-declaration" "^7.12.13"
-    "@babel/parser" "^7.14.2"
-    "@babel/types" "^7.14.2"
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.14.5.tgz#b82c6ce471b165b5ce420cf92914d6fb46225716"
+  integrity sha512-u6OXzDaIXjEstBRRoBCQ/uKQKlbuaeE5in0RvWdA4pN6AhqxTIwUsnHPU1CFZA/amYObMsuWhYfRl3Ch90HD0Q==
+  dependencies:
+    "@babel/helper-plugin-utils" "^7.14.5"
+
+"@babel/template@^7.14.5", "@babel/template@^7.3.3":
+  version "7.14.5"
+  resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.14.5.tgz#a9bc9d8b33354ff6e55a9c60d1109200a68974f4"
+  integrity sha512-6Z3Po85sfxRGachLULUhOmvAaOo7xCvqGQtxINai2mEGPFm6pQ4z5QInFnUrRpfoSV60BnjyF5F3c+15fxFV1g==
+  dependencies:
+    "@babel/code-frame" "^7.14.5"
+    "@babel/parser" "^7.14.5"
+    "@babel/types" "^7.14.5"
+
+"@babel/traverse@^7.1.0", "@babel/traverse@^7.14.5", "@babel/traverse@^7.14.8", "@babel/traverse@^7.7.2":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.14.8.tgz#c0253f02677c5de1a8ff9df6b0aacbec7da1a8ce"
+  integrity sha512-kexHhzCljJcFNn1KYAQ6A5wxMRzq9ebYpEDV4+WdNyr3i7O44tanbDOR/xjiG2F3sllan+LgwK+7OMk0EmydHg==
+  dependencies:
+    "@babel/code-frame" "^7.14.5"
+    "@babel/generator" "^7.14.8"
+    "@babel/helper-function-name" "^7.14.5"
+    "@babel/helper-hoist-variables" "^7.14.5"
+    "@babel/helper-split-export-declaration" "^7.14.5"
+    "@babel/parser" "^7.14.8"
+    "@babel/types" "^7.14.8"
     debug "^4.1.0"
     globals "^11.1.0"
 
-"@babel/types@^7.0.0", "@babel/types@^7.12.13", "@babel/types@^7.13.12", "@babel/types@^7.14.0", "@babel/types@^7.14.2", "@babel/types@^7.14.4", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
-  version "7.14.4"
-  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.14.4.tgz#bfd6980108168593b38b3eb48a24aa026b919bc0"
-  integrity sha512-lCj4aIs0xUefJFQnwwQv2Bxg7Omd6bgquZ6LGC+gGMh6/s5qDVfjuCMlDmYQ15SLsWHd9n+X3E75lKIhl5Lkiw==
+"@babel/types@^7.0.0", "@babel/types@^7.14.5", "@babel/types@^7.14.8", "@babel/types@^7.3.0", "@babel/types@^7.3.3":
+  version "7.14.8"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.14.8.tgz#38109de8fcadc06415fbd9b74df0065d4d41c728"
+  integrity sha512-iob4soQa7dZw8nodR/KlOQkPh9S4I8RwCxwRIFuiMRYjOzH/KJzdUfDgz6cGi5dDaclXF4P2PAhCdrBJNIg68Q==
   dependencies:
-    "@babel/helper-validator-identifier" "^7.14.0"
+    "@babel/helper-validator-identifier" "^7.14.8"
     to-fast-properties "^2.0.0"
 
 "@bcoe/v8-coverage@^0.2.3":
@@ -335,15 +343,15 @@
   resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39"
   integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==
 
-"@eslint/eslintrc@^0.4.1":
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.4.1.tgz#442763b88cecbe3ee0ec7ca6d6dd6168550cbf14"
-  integrity sha512-5v7TDE9plVhvxQeWLXDTvFvJBdH6pEsdnl2g/dAptmuFEPedQ4Erq5rsDsX+mvAM610IhNaO2W5V1dOOnDKxkQ==
+"@eslint/eslintrc@^0.4.3":
+  version "0.4.3"
+  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-0.4.3.tgz#9e42981ef035beb3dd49add17acb96e8ff6f394c"
+  integrity sha512-J6KFFz5QCYUJq3pf0mjEcCJVERbzv71PUIDczuh9JkwGEzced6CO5ADLHB1rbf/+oPBtoPfMYNOpGDzCANlbXw==
   dependencies:
     ajv "^6.12.4"
     debug "^4.1.1"
     espree "^7.3.0"
-    globals "^12.1.0"
+    globals "^13.9.0"
     ignore "^4.0.6"
     import-fresh "^3.2.1"
     js-yaml "^3.13.1"
@@ -369,6 +377,25 @@
     normalize-path "^2.0.1"
     through2 "^2.0.3"
 
+"@humanwhocodes/config-array@^0.5.0":
+  version "0.5.0"
+  resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.5.0.tgz#1407967d4c6eecd7388f83acf1eaf4d0c6e58ef9"
+  integrity sha512-FagtKFz74XrTl7y6HCzQpwDfXP0yhxe9lHLD1UZxjvZIcbyRz8zTFF/yYNfSfzU414eDwZ1SrO0Qvtyf+wFMQg==
+  dependencies:
+    "@humanwhocodes/object-schema" "^1.2.0"
+    debug "^4.1.1"
+    minimatch "^3.0.4"
+
+"@humanwhocodes/object-schema@^1.2.0":
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-1.2.0.tgz#87de7af9c231826fdd68ac7258f77c429e0e5fcf"
+  integrity sha512-wdppn25U8z/2yiaT6YGquE6X8sSv7hNMWSXYSSU1jGv/yd6XqjXgTDJ8KP4NgjTXfJ3GbRjeeb8RTV7a/VpM+w==
+
+"@hutson/parse-repository-url@^3.0.0":
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/@hutson/parse-repository-url/-/parse-repository-url-3.0.2.tgz#98c23c950a3d9b6c8f0daed06da6c3af06981340"
+  integrity sha512-H9XAx3hc0BQHY6l+IFSWHDySypcXsvsuLhgYLUGywmJ5pswRVQJUHpOsobnLYp2ZUaUlKiKDrgWWhosOwAEM8Q==
+
 "@istanbuljs/load-nyc-config@^1.0.0":
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz#fd3db1d59ecf7cf121e80650bb86712f9b55eced"
@@ -554,17 +581,6 @@
     "@types/yargs" "^15.0.0"
     chalk "^4.0.0"
 
-"@jest/types@^27.0.2":
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.2.tgz#e153d6c46bda0f2589f0702b071f9898c7bbd37e"
-  integrity sha512-XpjCtJ/99HB4PmyJ2vgmN7vT+JLP7RW1FBT9RgnMFS4Dt7cvIyBee8O3/j98aUZ34ZpenPZFqmaaObWSeL65dg==
-  dependencies:
-    "@types/istanbul-lib-coverage" "^2.0.0"
-    "@types/istanbul-reports" "^3.0.0"
-    "@types/node" "*"
-    "@types/yargs" "^16.0.0"
-    chalk "^4.0.0"
-
 "@jest/types@^27.0.6":
   version "27.0.6"
   resolved "https://registry.yarnpkg.com/@jest/types/-/types-27.0.6.tgz#9a992bc517e0c49f035938b8549719c2de40706b"
@@ -1247,10 +1263,10 @@
     npmlog "^4.1.2"
     write-file-atomic "^3.0.3"
 
-"@mattiasbuelens/web-streams-adapter@0.1.0-alpha.5":
-  version "0.1.0-alpha.5"
-  resolved "https://registry.yarnpkg.com/@mattiasbuelens/web-streams-adapter/-/web-streams-adapter-0.1.0-alpha.5.tgz#091a6256fdada3d53dc0a70501bcc6f3a46add05"
-  integrity sha512-OIfunNt/fTjIgDtUqXhBYOKtgaxm30ZWkMWegI9iS3xUHy2/A3AXki6/k+z40+BywNMi+spON/jSE0FF9WmUKA==
+"@mattiasbuelens/web-streams-adapter@~0.1.0":
+  version "0.1.0"
+  resolved "https://registry.yarnpkg.com/@mattiasbuelens/web-streams-adapter/-/web-streams-adapter-0.1.0.tgz#607b5a25682f4ae2741da7ba6df39302505336b3"
+  integrity sha512-oV4PyZfwJNtmFWhvlJLqYIX1Nn22ML8FZpS16ZUKv0hg7414xV1fjsGqxQzLT2dyK92TKxsJSwMOd7VNHAtPmA==
 
 "@mrmlnc/readdir-enhanced@^2.2.1":
   version "2.2.1"
@@ -1260,18 +1276,18 @@
     call-me-maybe "^1.0.1"
     glob-to-regexp "^0.3.0"
 
-"@nodelib/fs.scandir@2.1.4":
-  version "2.1.4"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz#d4b3549a5db5de2683e0c1071ab4f140904bbf69"
-  integrity sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==
+"@nodelib/fs.scandir@2.1.5":
+  version "2.1.5"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
+  integrity sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==
   dependencies:
-    "@nodelib/fs.stat" "2.0.4"
+    "@nodelib/fs.stat" "2.0.5"
     run-parallel "^1.1.9"
 
-"@nodelib/fs.stat@2.0.4", "@nodelib/fs.stat@^2.0.2":
-  version "2.0.4"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz#a3f2dd61bab43b8db8fa108a121cfffe4c676655"
-  integrity sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==
+"@nodelib/fs.stat@2.0.5", "@nodelib/fs.stat@^2.0.2":
+  version "2.0.5"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz#5bd262af94e9d25bd1e71b05deed44876a222e8b"
+  integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==
 
 "@nodelib/fs.stat@^1.1.2":
   version "1.1.3"
@@ -1279,11 +1295,11 @@
   integrity sha512-shAmDyaQC4H92APFoIaVDHCx5bStIocgvbwQyxPRrbUY20V1EYTbSDchWbuwlMG3V17cprZhA6+78JfB+3DTPw==
 
 "@nodelib/fs.walk@^1.2.3":
-  version "1.2.6"
-  resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz#cce9396b30aa5afe9e3756608f5831adcb53d063"
-  integrity sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==
+  version "1.2.8"
+  resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz#e95737e8bb6746ddedf69c556953494f196fe69a"
+  integrity sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==
   dependencies:
-    "@nodelib/fs.scandir" "2.1.4"
+    "@nodelib/fs.scandir" "2.1.5"
     fastq "^1.6.0"
 
 "@npmcli/ci-detect@^1.0.0":
@@ -1291,10 +1307,10 @@
   resolved "https://registry.yarnpkg.com/@npmcli/ci-detect/-/ci-detect-1.3.0.tgz#6c1d2c625fb6ef1b9dea85ad0a5afcbef85ef22a"
   integrity sha512-oN3y7FAROHhrAt7Rr7PnTSwrHrZVRTS2ZbyxeQwSSYD0ifwM3YNgQqbaRmjcWoPyq77MjchusjJDspbzMmip1Q==
 
-"@npmcli/git@^2.0.1":
-  version "2.0.9"
-  resolved "https://registry.yarnpkg.com/@npmcli/git/-/git-2.0.9.tgz#915bbfe66300e67b4da5ef765a4475ffb2ca5b6b"
-  integrity sha512-hTMbMryvOqGLwnmMBKs5usbPsJtyEsMsgXwJbmNrsEuQQh1LAIMDU77IoOrwkCg+NgQWl+ySlarJASwM3SutCA==
+"@npmcli/git@^2.1.0":
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/@npmcli/git/-/git-2.1.0.tgz#2fbd77e147530247d37f325930d457b3ebe894f6"
+  integrity sha512-/hBFX/QG1b+N7PZBFs0bi+evgRZcK9nWBxQKZkGoXUT5hJSwl5c4d7y8/hm+NQZRPhQ67RzFaj5UM9YeyKoryw==
   dependencies:
     "@npmcli/promise-spawn" "^1.3.2"
     lru-cache "^6.0.0"
@@ -1351,41 +1367,41 @@
   dependencies:
     "@octokit/types" "^6.0.3"
 
-"@octokit/core@^3.2.3":
-  version "3.4.0"
-  resolved "https://registry.yarnpkg.com/@octokit/core/-/core-3.4.0.tgz#b48aa27d755b339fe7550548b340dcc2b513b742"
-  integrity sha512-6/vlKPP8NF17cgYXqucdshWqmMZGXkuvtcrWCgU5NOI0Pl2GjlmZyWgBMrU8zJ3v2MJlM6++CiB45VKYmhiWWg==
+"@octokit/core@^3.5.0":
+  version "3.5.1"
+  resolved "https://registry.yarnpkg.com/@octokit/core/-/core-3.5.1.tgz#8601ceeb1ec0e1b1b8217b960a413ed8e947809b"
+  integrity sha512-omncwpLVxMP+GLpLPgeGJBF6IWJFjXDS5flY5VbppePYX9XehevbDykRH9PdCdvqt9TS5AOTiDide7h0qrkHjw==
   dependencies:
     "@octokit/auth-token" "^2.4.4"
     "@octokit/graphql" "^4.5.8"
-    "@octokit/request" "^5.4.12"
+    "@octokit/request" "^5.6.0"
     "@octokit/request-error" "^2.0.5"
     "@octokit/types" "^6.0.3"
     before-after-hook "^2.2.0"
     universal-user-agent "^6.0.0"
 
 "@octokit/endpoint@^6.0.1":
-  version "6.0.11"
-  resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-6.0.11.tgz#082adc2aebca6dcefa1fb383f5efb3ed081949d1"
-  integrity sha512-fUIPpx+pZyoLW4GCs3yMnlj2LfoXTWDUVPTC4V3MUEKZm48W+XYpeWSZCv+vYF1ZABUm2CqnDVf1sFtIYrj7KQ==
+  version "6.0.12"
+  resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-6.0.12.tgz#3b4d47a4b0e79b1027fb8d75d4221928b2d05658"
+  integrity sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==
   dependencies:
     "@octokit/types" "^6.0.3"
     is-plain-object "^5.0.0"
     universal-user-agent "^6.0.0"
 
 "@octokit/graphql@^4.5.8":
-  version "4.6.2"
-  resolved "https://registry.yarnpkg.com/@octokit/graphql/-/graphql-4.6.2.tgz#ec44abdfa87f2b9233282136ae33e4ba446a04e7"
-  integrity sha512-WmsIR1OzOr/3IqfG9JIczI8gMJUMzzyx5j0XXQ4YihHtKlQc+u35VpVoOXhlKAlaBntvry1WpAzPl/a+s3n89Q==
+  version "4.6.4"
+  resolved "https://registry.yarnpkg.com/@octokit/graphql/-/graphql-4.6.4.tgz#0c3f5bed440822182e972317122acb65d311a5ed"
+  integrity sha512-SWTdXsVheRmlotWNjKzPOb6Js6tjSqA2a8z9+glDJng0Aqjzti8MEWOtuT8ZSu6wHnci7LZNuarE87+WJBG4vg==
   dependencies:
-    "@octokit/request" "^5.3.0"
+    "@octokit/request" "^5.6.0"
     "@octokit/types" "^6.0.3"
     universal-user-agent "^6.0.0"
 
-"@octokit/openapi-types@^7.2.3":
-  version "7.2.3"
-  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-7.2.3.tgz#a7105796db9b85d25d3feba9a1785a124c7803e4"
-  integrity sha512-V1ycxkR19jqbIl3evf2RQiMRBvTNRi+Iy9h20G5OP5dPfEF6GJ1DPlUeiZRxo2HJxRr+UA4i0H1nn4btBDPFrw==
+"@octokit/openapi-types@^9.0.0":
+  version "9.0.0"
+  resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-9.0.0.tgz#05d33f999326785445c915d25167d68bd5eddb24"
+  integrity sha512-GSpv5VUFqarOXZl6uWPsDnjChkKCxnaMALmQhzvCWGiMxONQxX7ZwlomCMS+wB1KqxLPCA5n6gYt016oEMkHmQ==
 
 "@octokit/plugin-enterprise-rest@^6.0.1":
   version "6.0.1"
@@ -1393,70 +1409,70 @@
   integrity sha512-93uGjlhUD+iNg1iWhUENAtJata6w5nE+V4urXOAlIXdco6xNZtUSfYY8dzp3Udy74aqO/B5UZL80x/YMa5PKRw==
 
 "@octokit/plugin-paginate-rest@^2.6.2":
-  version "2.13.3"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.13.3.tgz#f0f1792230805108762d87906fb02d573b9e070a"
-  integrity sha512-46lptzM9lTeSmIBt/sVP/FLSTPGx6DCzAdSX3PfeJ3mTf4h9sGC26WpaQzMEq/Z44cOcmx8VsOhO+uEgE3cjYg==
+  version "2.14.0"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.14.0.tgz#f469cb4a908792fb44679c5973d8bba820c88b0f"
+  integrity sha512-S2uEu2uHeI7Vf+Lvj8tv3O5/5TCAa8GHS0dUQN7gdM7vKA6ZHAbR6HkAVm5yMb1mbedLEbxOuQ+Fa0SQ7tCDLA==
   dependencies:
-    "@octokit/types" "^6.11.0"
+    "@octokit/types" "^6.18.0"
 
 "@octokit/plugin-request-log@^1.0.2":
-  version "1.0.3"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.3.tgz#70a62be213e1edc04bb8897ee48c311482f9700d"
-  integrity sha512-4RFU4li238jMJAzLgAwkBAw+4Loile5haQMQr+uhFq27BmyJXcXSKvoQKqh0agsZEiUlW6iSv3FAgvmGkur7OQ==
+  version "1.0.4"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.4.tgz#5e50ed7083a613816b1e4a28aeec5fb7f1462e85"
+  integrity sha512-mLUsMkgP7K/cnFEw07kWqXGF5LKrOkD+lhCrKvPHXWDywAwuDUeDwWBpc69XK3pNX0uKiVt8g5z96PJ6z9xCFA==
 
-"@octokit/plugin-rest-endpoint-methods@5.3.1":
-  version "5.3.1"
-  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.3.1.tgz#deddce769b4ec3179170709ab42e4e9e6195aaa9"
-  integrity sha512-3B2iguGmkh6bQQaVOtCsS0gixrz8Lg0v4JuXPqBcFqLKuJtxAUf3K88RxMEf/naDOI73spD+goJ/o7Ie7Cvdjg==
+"@octokit/plugin-rest-endpoint-methods@5.4.2":
+  version "5.4.2"
+  resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.4.2.tgz#d090e93ee68ec09985e1ff0a1d2d28581cc883a5"
+  integrity sha512-imNDDvUMy9YzECcP6zTcKNjwutSwqCYGMZjLPnBHF0kdb3V9URrHWmalD0ZvNEYjwbpm2zw8RPewj3ebCpMBRw==
   dependencies:
-    "@octokit/types" "^6.16.2"
+    "@octokit/types" "^6.19.1"
     deprecation "^2.3.1"
 
-"@octokit/request-error@^2.0.0", "@octokit/request-error@^2.0.5":
-  version "2.0.5"
-  resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-2.0.5.tgz#72cc91edc870281ad583a42619256b380c600143"
-  integrity sha512-T/2wcCFyM7SkXzNoyVNWjyVlUwBvW3igM3Btr/eKYiPmucXTtkxt2RBsf6gn3LTzaLSLTQtNmvg+dGsOxQrjZg==
+"@octokit/request-error@^2.0.5", "@octokit/request-error@^2.1.0":
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-2.1.0.tgz#9e150357831bfc788d13a4fd4b1913d60c74d677"
+  integrity sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==
   dependencies:
     "@octokit/types" "^6.0.3"
     deprecation "^2.0.0"
     once "^1.4.0"
 
-"@octokit/request@^5.3.0", "@octokit/request@^5.4.12":
-  version "5.4.15"
-  resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.4.15.tgz#829da413dc7dd3aa5e2cdbb1c7d0ebe1f146a128"
-  integrity sha512-6UnZfZzLwNhdLRreOtTkT9n57ZwulCve8q3IT/Z477vThu6snfdkBuhxnChpOKNGxcQ71ow561Qoa6uqLdPtag==
+"@octokit/request@^5.6.0":
+  version "5.6.0"
+  resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.6.0.tgz#6084861b6e4fa21dc40c8e2a739ec5eff597e672"
+  integrity sha512-4cPp/N+NqmaGQwbh3vUsYqokQIzt7VjsgTYVXiwpUP2pxd5YiZB2XuTedbb0SPtv9XS7nzAKjAuQxmY8/aZkiA==
   dependencies:
     "@octokit/endpoint" "^6.0.1"
-    "@octokit/request-error" "^2.0.0"
-    "@octokit/types" "^6.7.1"
+    "@octokit/request-error" "^2.1.0"
+    "@octokit/types" "^6.16.1"
     is-plain-object "^5.0.0"
     node-fetch "^2.6.1"
     universal-user-agent "^6.0.0"
 
 "@octokit/rest@^18.1.0":
-  version "18.5.6"
-  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.5.6.tgz#8c9a7c9329c7bbf478af20df78ddeab0d21f6d89"
-  integrity sha512-8HdG6ZjQdZytU6tCt8BQ2XLC7EJ5m4RrbyU/EARSkAM1/HP3ceOzMG/9atEfe17EDMer3IVdHWLedz2wDi73YQ==
+  version "18.6.8"
+  resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.6.8.tgz#f73ef3b59686df18206183551c2a835d1db1424f"
+  integrity sha512-n2aT0mJL9N/idCPmnBynCino1qNScfRHvr8OeskQdBNhUYAMc7cxoc8KLlv1DMWxlZUNhed+5kVdu7majVdVag==
   dependencies:
-    "@octokit/core" "^3.2.3"
+    "@octokit/core" "^3.5.0"
     "@octokit/plugin-paginate-rest" "^2.6.2"
     "@octokit/plugin-request-log" "^1.0.2"
-    "@octokit/plugin-rest-endpoint-methods" "5.3.1"
+    "@octokit/plugin-rest-endpoint-methods" "5.4.2"
 
-"@octokit/types@^6.0.3", "@octokit/types@^6.11.0", "@octokit/types@^6.16.2", "@octokit/types@^6.7.1":
-  version "6.16.2"
-  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.16.2.tgz#62242e0565a3eb99ca2fd376283fe78b4ea057b4"
-  integrity sha512-wWPSynU4oLy3i4KGyk+J1BLwRKyoeW2TwRHgwbDz17WtVFzSK2GOErGliruIx8c+MaYtHSYTx36DSmLNoNbtgA==
+"@octokit/types@^6.0.3", "@octokit/types@^6.16.1", "@octokit/types@^6.18.0", "@octokit/types@^6.19.1":
+  version "6.19.1"
+  resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.19.1.tgz#6ea5f759d8d37e892e59c0a65f10892789b84a25"
+  integrity sha512-hMI2EokQzMG8ABWcnvcrabqQFuFHqUdN0HUOG4DPTaOtnf/jqhzhK1SHOGu5vDlI/x+hWJ60e28VxB7QhOP0CQ==
   dependencies:
-    "@octokit/openapi-types" "^7.2.3"
+    "@octokit/openapi-types" "^9.0.0"
 
-"@openpgp/web-stream-tools@0.0.5":
-  version "0.0.5"
-  resolved "https://registry.yarnpkg.com/@openpgp/web-stream-tools/-/web-stream-tools-0.0.5.tgz#f78d73400be010dca940ec09642aaf8c35edf56d"
-  integrity sha512-tdUCdiMi5ogmZlAbR4cQXZDbK34QB8iEnJ434m9bj4P7sxvKg2KKKbEiB4EQb2AWhj/SNKcoNUHhT9WxTqKimQ==
+"@openpgp/web-stream-tools@0.0.6":
+  version "0.0.6"
+  resolved "https://registry.yarnpkg.com/@openpgp/web-stream-tools/-/web-stream-tools-0.0.6.tgz#98ba42f09254149e6a431062f7eab3ebfc804cf7"
+  integrity sha512-U2Ujy4GUwz315W2QfleOWFnlvXTGz2Fjt4mg/nATedruT3EbIWjWzw4qfbaIvnBHjaVIijltsiESuNxIFRdHkw==
   dependencies:
-    "@mattiasbuelens/web-streams-adapter" "0.1.0-alpha.5"
-    web-streams-polyfill "~2.1.1"
+    "@mattiasbuelens/web-streams-adapter" "~0.1.0"
+    web-streams-polyfill "~3.0.3"
 
 "@sinonjs/commons@^1.7.0":
   version "1.8.3"
@@ -1478,29 +1494,29 @@
   integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==
 
 "@tsconfig/node10@^1.0.7":
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.7.tgz#1eb1de36c73478a2479cc661ef5af1c16d86d606"
-  integrity sha512-aBvUmXLQbayM4w3A8TrjwrXs4DZ8iduJnuJLLRGdkWlyakCf1q6uHZJBzXoRA/huAEknG5tcUyQxN3A+In5euQ==
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.8.tgz#c1e4e80d6f964fbecb3359c43bd48b40f7cadad9"
+  integrity sha512-6XFfSQmMgq0CFLY1MslA/CPUfhIL919M1rMsa5lP2P097N2Wd1sSX0tx1u4olM16fLNhtHZpRhedZJphNJqmZg==
 
 "@tsconfig/node12@^1.0.7":
-  version "1.0.7"
-  resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.7.tgz#677bd9117e8164dc319987dd6ff5fc1ba6fbf18b"
-  integrity sha512-dgasobK/Y0wVMswcipr3k0HpevxFJLijN03A8mYfEPvWvOs14v0ZlYTR4kIgMx8g4+fTyTFv8/jLCIfRqLDJ4A==
+  version "1.0.9"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.9.tgz#62c1f6dee2ebd9aead80dc3afa56810e58e1a04c"
+  integrity sha512-/yBMcem+fbvhSREH+s14YJi18sp7J9jpuhYByADT2rypfajMZZN4WQ6zBGgBKp53NKmqI36wFYDb3yaMPurITw==
 
 "@tsconfig/node14@^1.0.0":
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.0.tgz#5bd046e508b1ee90bc091766758838741fdefd6e"
-  integrity sha512-RKkL8eTdPv6t5EHgFKIVQgsDapugbuOptNd9OOunN/HAkzmmTnZELx1kNCK0rSdUYGmiFMM3rRQMAWiyp023LQ==
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.1.tgz#95f2d167ffb9b8d2068b0b235302fafd4df711f2"
+  integrity sha512-509r2+yARFfHHE7T6Puu2jjkoycftovhXRqW328PDXTVGKihlb1P8Z9mMZH04ebyajfRY7dedfGynlrFHJUQCg==
 
 "@tsconfig/node16@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.1.tgz#a6ca6a9a0ff366af433f42f5f0e124794ff6b8f1"
-  integrity sha512-FTgBI767POY/lKNDNbIzgAX6miIDBs6NTCbdlDb8TrWovHsSvaVIZDlTqym29C6UqhzwcJx4CYr+AlrMywA0cA==
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.2.tgz#423c77877d0569db20e1fc80885ac4118314010e"
+  integrity sha512-eZxlbI8GZscaGS7kkc/trHTT5xgrjH3/1n2JDwusC9iahPKWMRvRjJSAN5mCXviuTGQ/lHnhvv8Q1YTpnfz9gA==
 
 "@types/babel__core@^7.0.0", "@types/babel__core@^7.1.14":
-  version "7.1.14"
-  resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.14.tgz#faaeefc4185ec71c389f4501ee5ec84b170cc402"
-  integrity sha512-zGZJzzBUVDo/eV6KgbE0f0ZI7dInEYvo12Rb70uNQDshC3SkRMb67ja0GgRHZgAX3Za6rhaWlvbDO8rrGyAb1g==
+  version "7.1.15"
+  resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.15.tgz#2ccfb1ad55a02c83f8e0ad327cbc332f55eb1024"
+  integrity sha512-bxlMKPDbY8x5h6HBwVzEOk2C8fb6SLfYQ5Jw3uBYuYF1lfWk/kbLd81la82vrIkBb0l+JdmrZaDikPrNxpS/Ew==
   dependencies:
     "@babel/parser" "^7.1.0"
     "@babel/types" "^7.0.0"
@@ -1509,24 +1525,24 @@
     "@types/babel__traverse" "*"
 
 "@types/babel__generator@*":
-  version "7.6.2"
-  resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.2.tgz#f3d71178e187858f7c45e30380f8f1b7415a12d8"
-  integrity sha512-MdSJnBjl+bdwkLskZ3NGFp9YcXGx5ggLpQQPqtgakVhsWK0hTtNYhjpZLlWQTviGTvF8at+Bvli3jV7faPdgeQ==
+  version "7.6.3"
+  resolved "https://registry.yarnpkg.com/@types/babel__generator/-/babel__generator-7.6.3.tgz#f456b4b2ce79137f768aa130d2423d2f0ccfaba5"
+  integrity sha512-/GWCmzJWqV7diQW54smJZzWbSFf4QYtF71WCKhcx6Ru/tFyQIY2eiiITcCAeuPbNSvT9YCGkVMqqvSk2Z0mXiA==
   dependencies:
     "@babel/types" "^7.0.0"
 
 "@types/babel__template@*":
-  version "7.4.0"
-  resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.4.0.tgz#0c888dd70b3ee9eebb6e4f200e809da0076262be"
-  integrity sha512-NTPErx4/FiPCGScH7foPyr+/1Dkzkni+rHiYHHoTjvwou7AQzJkNeD60A9CXRy+ZEN2B1bggmkTMCDb+Mv5k+A==
+  version "7.4.1"
+  resolved "https://registry.yarnpkg.com/@types/babel__template/-/babel__template-7.4.1.tgz#3d1a48fd9d6c0edfd56f2ff578daed48f36c8969"
+  integrity sha512-azBFKemX6kMg5Io+/rdGT0dkGreboUVR0Cdm3fz9QJWpaQGJRQXl7C+6hOTCZcMll7KFyEQpgbYI2lHdsS4U7g==
   dependencies:
     "@babel/parser" "^7.1.0"
     "@babel/types" "^7.0.0"
 
 "@types/babel__traverse@*", "@types/babel__traverse@^7.0.4", "@types/babel__traverse@^7.0.6":
-  version "7.11.1"
-  resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.11.1.tgz#654f6c4f67568e24c23b367e947098c6206fa639"
-  integrity sha512-Vs0hm0vPahPMYi9tDjtP66llufgO3ST16WXaSTtDGEl9cewAl3AibmxWw6TINOqHPT9z0uABKAYjT9jNSg4npw==
+  version "7.14.2"
+  resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.14.2.tgz#ffcd470bbb3f8bf30481678fb5502278ca833a43"
+  integrity sha512-K2waXdXBi2302XUdcHcR1jCeU0LL4TD9HRs/gk0N2Xvrht+G/BfJa4QObBQZfhMdxiCpV3COl5Nfq4uKTeTnJA==
   dependencies:
     "@babel/types" "^7.3.0"
 
@@ -1535,10 +1551,10 @@
   resolved "https://registry.yarnpkg.com/@types/flatbuffers/-/flatbuffers-1.10.0.tgz#aa74e30ffdc86445f2f060e1808fc9d56b5603ba"
   integrity sha512-7btbphLrKvo5yl/5CC2OCxUSMx1wV1wvGT1qDXkSt7yi00/YW7E8k6qzXqJHsp+WU0eoG7r6MTQQXI9lIvd0qA==
 
-"@types/glob@7.1.3", "@types/glob@^7.1.1":
-  version "7.1.3"
-  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.3.tgz#e6ba80f36b7daad2c685acd9266382e68985c183"
-  integrity sha512-SEYeGAIQIQX8NN6LDKprLjbrd5dARM5EXsd8GI/A5l0apYI1fGMWgPHSe4ZKL4eozlAyI+doUE9XbYS4xCkQ1w==
+"@types/glob@7.1.4", "@types/glob@^7.1.1":
+  version "7.1.4"
+  resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.1.4.tgz#ea59e21d2ee5c517914cb4bc8e4153b99e566672"
+  integrity sha512-w+LsMxKyYQm347Otw+IfBXOv9UWVjpHpCDdbBMt8Kz/xbvCYNjP+0qPh91Km3iKfSRLBB0P7fAMf0KHrPu+MyA==
   dependencies:
     "@types/minimatch" "*"
     "@types/node" "*"
@@ -1569,38 +1585,33 @@
   dependencies:
     "@types/istanbul-lib-report" "*"
 
-"@types/jest@26.0.23":
-  version "26.0.23"
-  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.23.tgz#a1b7eab3c503b80451d019efb588ec63522ee4e7"
-  integrity sha512-ZHLmWMJ9jJ9PTiT58juykZpL7KjwJywFN3Rr2pTSkyQfydf/rk22yS7W8p5DaVUMQ2BQC7oYiU3FjbTM/mYrOA==
+"@types/jest@26.0.24":
+  version "26.0.24"
+  resolved "https://registry.yarnpkg.com/@types/jest/-/jest-26.0.24.tgz#943d11976b16739185913a1936e0de0c4a7d595a"
+  integrity sha512-E/X5Vib8BWqZNRlDxj9vYXhsDwPYbPINqKF9BsnSoon4RQ0D9moEuLD8txgyypFLH7J4+Lho9Nr/c8H0Fi+17w==
   dependencies:
     jest-diff "^26.0.0"
     pretty-format "^26.0.0"
 
-"@types/json-schema@^7.0.3", "@types/json-schema@^7.0.7":
-  version "7.0.7"
-  resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.7.tgz#98a993516c859eb0d5c4c8f098317a9ea68db9ad"
-  integrity sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==
+"@types/json-schema@^7.0.7":
+  version "7.0.8"
+  resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.8.tgz#edf1bf1dbf4e04413ca8e5b17b3b7d7d54b59818"
+  integrity sha512-YSBPTLTVm2e2OoQIDYx8HaeWJ5tTToLH67kXR7zYNGupXMEHa2++G8k+DczX2cFVgalypqtyZIcU19AFcmOpmg==
 
 "@types/minimatch@*", "@types/minimatch@^3.0.3":
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.4.tgz#f0ec25dbf2f0e4b18647313ac031134ca5b24b21"
-  integrity sha512-1z8k4wzFnNjVK/tlxvrWuK5WMt6mydWWP7+zvH5eFep4oj+UkrfiJTRtjCeBXNpwaA/FYqqtb4/QS4ianFpIRA==
+  version "3.0.5"
+  resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.5.tgz#1001cc5e6a3704b83c236027e77f2f58ea010f40"
+  integrity sha512-Klz949h02Gz2uZCMGwDUSDS1YBlTdDDgbWHi+81l29tQALUtvz4rAYi5uoVhE5Lagoq6DeqAUlbrHvW/mXDgdQ==
 
-"@types/minimist@^1.2.0":
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.1.tgz#283f669ff76d7b8260df8ab7a4262cc83d988256"
-  integrity sha512-fZQQafSREFyuZcdWFAExYjBiCL7AUCdgsk80iO0q4yihYYdcIiH28CcuPTGFgLOCC8RlW49GSQxdHwZP+I7CNg==
-
-"@types/minimist@^1.2.1":
+"@types/minimist@^1.2.0", "@types/minimist@^1.2.2":
   version "1.2.2"
   resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c"
   integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==
 
-"@types/node@*", "@types/node@^15.6.1":
-  version "15.9.0"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-15.9.0.tgz#0b7f6c33ca5618fe329a9d832b478b4964d325a8"
-  integrity sha512-AR1Vq1Ei1GaA5FjKL5PBqblTZsL5M+monvGSZwe6sSIdGiuu7Xr/pNwWJY+0ZQuN8AapD/XMB5IzBAyYRFbocA==
+"@types/node@*", "@types/node@^16.4.0":
+  version "16.4.0"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-16.4.0.tgz#2c219eaa3b8d1e4d04f4dd6e40bc68c7467d5272"
+  integrity sha512-HrJuE7Mlqcjj+00JqMWpZ3tY8w7EUd+S0U3L1+PQSWiXZbOgyQDvi+ogoUxaHApPJq5diKxYBQwA3iIlNcPqOg==
 
 "@types/node@^13.7.4":
   version "13.13.52"
@@ -1608,9 +1619,9 @@
   integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ==
 
 "@types/normalize-package-data@^2.4.0":
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.0.tgz#e486d0d97396d79beedd0a6e33f4534ff6b4973e"
-  integrity sha512-f5j5b/Gf71L+dbqxIpQ4Z2WlmI/mPJ0fOkGGmFgtb6sAu97EPczzbS3/tJKxmcYDj55OX6ssqwDAWOHIYDRDGA==
+  version "2.4.1"
+  resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz#d3357479a0fdfdd5907fe67e17e0a85c906e1301"
+  integrity sha512-Gj7cI7z+98M282Tqmp2K5EIsoouUEzbBJhQQzDE3jSIRk6r9gsz0oUokqIUR4u1R3dMHo0pDHM7sNOHyhulypw==
 
 "@types/parse-json@^4.0.0":
   version "4.0.0"
@@ -1618,9 +1629,9 @@
   integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA==
 
 "@types/prettier@^2.1.5":
-  version "2.2.3"
-  resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.2.3.tgz#ef65165aea2924c9359205bf748865b8881753c0"
-  integrity sha512-PijRCG/K3s3w1We6ynUKdxEc5AcuuH3NBmMDP8uvKVp6X43UY7NQlTzczakXP3DJR0F4dfNQIGjU2cUeRYs2AA==
+  version "2.3.2"
+  resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.3.2.tgz#fc8c2825e4ed2142473b4a81064e6e081463d1b3"
+  integrity sha512-eI5Yrz3Qv4KPUa/nSIAi0h+qX0XyewOliug5F2QAtuRg6Kjg6jfmxe1GIwoIRhZspD1A0RP8ANrPwvEXXtRFog==
 
 "@types/randomatic@3.1.2":
   version "3.1.2"
@@ -1628,143 +1639,96 @@
   integrity sha512-lLsR0U1lUTjJ8vy1r3VGWlgprGtB/QPVwxs+QVSe28b0MS/7sR5tUfvhDd9XLV/AWc50OmDADAhzdqujavdykg==
 
 "@types/stack-utils@^2.0.0":
-  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.0.tgz#7036640b4e21cc2f259ae826ce843d277dad8cff"
-  integrity sha512-RJJrrySY7A8havqpGObOB4W92QXKJo63/jFLLgpvOtsGUqbQZ9Sbgl35KMm1DjC6j7AvmmU2bIno+3IyEaemaw==
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.1.tgz#20f18294f797f2209b5f65c8e3b5c8e8261d127c"
+  integrity sha512-Hl219/BT5fLAaz6NDkSuhzasy49dwQS/DSdu4MdggFB8zcXv7vflBI3xp7FEmkmdDkBUI2bPUNeMttp2knYdxw==
 
 "@types/yargs-parser@*":
-  version "20.2.0"
-  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.0.tgz#dd3e6699ba3237f0348cd085e4698780204842f9"
-  integrity sha512-37RSHht+gzzgYeobbG+KWryeAW8J33Nhr69cjTqSYymXVZEN9NbRYWoYlRtDhHKPVT1FyNKwaTPC1NynKZpzRA==
+  version "20.2.1"
+  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.1.tgz#3b9ce2489919d9e4fea439b76916abc34b2df129"
+  integrity sha512-7tFImggNeNBVMsn0vLrpn1H1uPrUBdnARPTpZoitY37ZrdJREzf7I16tMrlK3hen349gr1NYh8CmZQa7CTG6Aw==
 
 "@types/yargs@^15.0.0":
-  version "15.0.13"
-  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.13.tgz#34f7fec8b389d7f3c1fd08026a5763e072d3c6dc"
-  integrity sha512-kQ5JNTrbDv3Rp5X2n/iUu37IJBDU2gsZ5R/g1/KHOOEc5IKfUFjXT6DENPGduh08I/pamwtEq4oul7gUqKTQDQ==
+  version "15.0.14"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-15.0.14.tgz#26d821ddb89e70492160b66d10a0eb6df8f6fb06"
+  integrity sha512-yEJzHoxf6SyQGhBhIYGXQDSCkJjB6HohDShto7m8vaKg9Yp0Yn8+71J9eakh2bnPg6BfsH9PRMhiRTZnd4eXGQ==
   dependencies:
     "@types/yargs-parser" "*"
 
 "@types/yargs@^16.0.0":
-  version "16.0.3"
-  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-16.0.3.tgz#4b6d35bb8e680510a7dc2308518a80ee1ef27e01"
-  integrity sha512-YlFfTGS+zqCgXuXNV26rOIeETOkXnGQXP/pjjL9P0gO/EP9jTmc7pUBhx+jVEIxpq41RX33GQ7N3DzOSfZoglQ==
+  version "16.0.4"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-16.0.4.tgz#26aad98dd2c2a38e421086ea9ad42b9e51642977"
+  integrity sha512-T8Yc9wt/5LbJyCaLiHPReJa0kApcIgJ7Bn735GjItUfh08Z1pJvu8QZqb9s+mMvKV6WUQRV7K2R46YbjMXTTJw==
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.25.0.tgz#d82657b6ab4caa4c3f888ff923175fadc2f31f2a"
-  integrity sha512-Qfs3dWkTMKkKwt78xp2O/KZQB8MPS1UQ5D3YW2s6LQWBE1074BE+Rym+b1pXZIX3M3fSvPUDaCvZLKV2ylVYYQ==
+"@typescript-eslint/eslint-plugin@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.28.4.tgz#e73c8cabbf3f08dee0e1bda65ed4e622ae8f8921"
+  integrity sha512-s1oY4RmYDlWMlcV0kKPBaADn46JirZzvvH7c2CtAqxCY96S538JRBAzt83RrfkDheV/+G/vWNK0zek+8TB3Gmw==
   dependencies:
-    "@typescript-eslint/experimental-utils" "4.25.0"
-    "@typescript-eslint/scope-manager" "4.25.0"
-    debug "^4.1.1"
+    "@typescript-eslint/experimental-utils" "4.28.4"
+    "@typescript-eslint/scope-manager" "4.28.4"
+    debug "^4.3.1"
     functional-red-black-tree "^1.0.1"
-    lodash "^4.17.15"
-    regexpp "^3.0.0"
-    semver "^7.3.2"
-    tsutils "^3.17.1"
-
-"@typescript-eslint/experimental-utils@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.25.0.tgz#b2febcfa715d2c1806fd5f0335193a6cd270df54"
-  integrity sha512-f0doRE76vq7NEEU0tw+ajv6CrmPelw5wLoaghEHkA2dNLFb3T/zJQqGPQ0OYt5XlZaS13MtnN+GTPCuUVg338w==
-  dependencies:
-    "@types/json-schema" "^7.0.3"
-    "@typescript-eslint/scope-manager" "4.25.0"
-    "@typescript-eslint/types" "4.25.0"
-    "@typescript-eslint/typescript-estree" "4.25.0"
-    eslint-scope "^5.0.0"
-    eslint-utils "^2.0.0"
+    regexpp "^3.1.0"
+    semver "^7.3.5"
+    tsutils "^3.21.0"
 
-"@typescript-eslint/experimental-utils@^4.0.1":
-  version "4.26.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.26.0.tgz#ba7848b3f088659cdf71bce22454795fc55be99a"
-  integrity sha512-TH2FO2rdDm7AWfAVRB5RSlbUhWxGVuxPNzGT7W65zVfl8H/WeXTk1e69IrcEVsBslrQSTDKQSaJD89hwKrhdkw==
+"@typescript-eslint/experimental-utils@4.28.4", "@typescript-eslint/experimental-utils@^4.0.1":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/experimental-utils/-/experimental-utils-4.28.4.tgz#9c70c35ebed087a5c70fb0ecd90979547b7fec96"
+  integrity sha512-OglKWOQRWTCoqMSy6pm/kpinEIgdcXYceIcH3EKWUl4S8xhFtN34GQRaAvTIZB9DD94rW7d/U7tUg3SYeDFNHA==
   dependencies:
     "@types/json-schema" "^7.0.7"
-    "@typescript-eslint/scope-manager" "4.26.0"
-    "@typescript-eslint/types" "4.26.0"
-    "@typescript-eslint/typescript-estree" "4.26.0"
+    "@typescript-eslint/scope-manager" "4.28.4"
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/typescript-estree" "4.28.4"
     eslint-scope "^5.1.1"
     eslint-utils "^3.0.0"
 
-"@typescript-eslint/parser@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.25.0.tgz#6b2cb6285aa3d55bfb263c650739091b0f19aceb"
-  integrity sha512-OZFa1SKyEJpAhDx8FcbWyX+vLwh7OEtzoo2iQaeWwxucyfbi0mT4DijbOSsTgPKzGHr6GrF2V5p/CEpUH/VBxg==
-  dependencies:
-    "@typescript-eslint/scope-manager" "4.25.0"
-    "@typescript-eslint/types" "4.25.0"
-    "@typescript-eslint/typescript-estree" "4.25.0"
-    debug "^4.1.1"
-
-"@typescript-eslint/scope-manager@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.25.0.tgz#9d86a5bcc46ef40acd03d85ad4e908e5aab8d4ca"
-  integrity sha512-2NElKxMb/0rya+NJG1U71BuNnp1TBd1JgzYsldsdA83h/20Tvnf/HrwhiSlNmuq6Vqa0EzidsvkTArwoq+tH6w==
+"@typescript-eslint/parser@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-4.28.4.tgz#bc462dc2779afeefdcf49082516afdc3e7b96fab"
+  integrity sha512-4i0jq3C6n+og7/uCHiE6q5ssw87zVdpUj1k6VlVYMonE3ILdFApEzTWgppSRG4kVNB/5jxnH+gTeKLMNfUelQA==
   dependencies:
-    "@typescript-eslint/types" "4.25.0"
-    "@typescript-eslint/visitor-keys" "4.25.0"
+    "@typescript-eslint/scope-manager" "4.28.4"
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/typescript-estree" "4.28.4"
+    debug "^4.3.1"
 
-"@typescript-eslint/scope-manager@4.26.0":
-  version "4.26.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.26.0.tgz#60d1a71df162404e954b9d1c6343ff3bee496194"
-  integrity sha512-G6xB6mMo4xVxwMt5lEsNTz3x4qGDt0NSGmTBNBPJxNsrTXJSm21c6raeYroS2OwQsOyIXqKZv266L/Gln1BWqg==
+"@typescript-eslint/scope-manager@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-4.28.4.tgz#bdbce9b6a644e34f767bd68bc17bb14353b9fe7f"
+  integrity sha512-ZJBNs4usViOmlyFMt9X9l+X0WAFcDH7EdSArGqpldXu7aeZxDAuAzHiMAeI+JpSefY2INHrXeqnha39FVqXb8w==
   dependencies:
-    "@typescript-eslint/types" "4.26.0"
-    "@typescript-eslint/visitor-keys" "4.26.0"
-
-"@typescript-eslint/types@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.25.0.tgz#0e444a5c5e3c22d7ffa5e16e0e60510b3de5af87"
-  integrity sha512-+CNINNvl00OkW6wEsi32wU5MhHti2J25TJsJJqgQmJu3B3dYDBcmOxcE5w9cgoM13TrdE/5ND2HoEnBohasxRQ==
-
-"@typescript-eslint/types@4.26.0":
-  version "4.26.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.26.0.tgz#7c6732c0414f0a69595f4f846ebe12616243d546"
-  integrity sha512-rADNgXl1kS/EKnDr3G+m7fB9yeJNnR9kF7xMiXL6mSIWpr3Wg5MhxyfEXy/IlYthsqwBqHOr22boFbf/u6O88A==
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/visitor-keys" "4.28.4"
 
-"@typescript-eslint/typescript-estree@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.25.0.tgz#942e4e25888736bff5b360d9b0b61e013d0cfa25"
-  integrity sha512-1B8U07TGNAFMxZbSpF6jqiDs1cVGO0izVkf18Q/SPcUAc9LhHxzvSowXDTvkHMWUVuPpagupaW63gB6ahTXVlg==
-  dependencies:
-    "@typescript-eslint/types" "4.25.0"
-    "@typescript-eslint/visitor-keys" "4.25.0"
-    debug "^4.1.1"
-    globby "^11.0.1"
-    is-glob "^4.0.1"
-    semver "^7.3.2"
-    tsutils "^3.17.1"
+"@typescript-eslint/types@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-4.28.4.tgz#41acbd79b5816b7c0dd7530a43d97d020d3aeb42"
+  integrity sha512-3eap4QWxGqkYuEmVebUGULMskR6Cuoc/Wii0oSOddleP4EGx1tjLnZQ0ZP33YRoMDCs5O3j56RBV4g14T4jvww==
 
-"@typescript-eslint/typescript-estree@4.26.0":
-  version "4.26.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.26.0.tgz#aea17a40e62dc31c63d5b1bbe9a75783f2ce7109"
-  integrity sha512-GHUgahPcm9GfBuy3TzdsizCcPjKOAauG9xkz9TR8kOdssz2Iz9jRCSQm6+aVFa23d5NcSpo1GdHGSQKe0tlcbg==
+"@typescript-eslint/typescript-estree@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-4.28.4.tgz#252e6863278dc0727244be9e371eb35241c46d00"
+  integrity sha512-z7d8HK8XvCRyN2SNp+OXC2iZaF+O2BTquGhEYLKLx5k6p0r05ureUtgEfo5f6anLkhCxdHtCf6rPM1p4efHYDQ==
   dependencies:
-    "@typescript-eslint/types" "4.26.0"
-    "@typescript-eslint/visitor-keys" "4.26.0"
+    "@typescript-eslint/types" "4.28.4"
+    "@typescript-eslint/visitor-keys" "4.28.4"
     debug "^4.3.1"
     globby "^11.0.3"
     is-glob "^4.0.1"
     semver "^7.3.5"
     tsutils "^3.21.0"
 
-"@typescript-eslint/visitor-keys@4.25.0":
-  version "4.25.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.25.0.tgz#863e7ed23da4287c5b469b13223255d0fde6aaa7"
-  integrity sha512-AmkqV9dDJVKP/TcZrbf6s6i1zYXt5Hl8qOLrRDTFfRNae4+LB8A4N3i+FLZPW85zIxRy39BgeWOfMS3HoH5ngg==
+"@typescript-eslint/visitor-keys@4.28.4":
+  version "4.28.4"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.28.4.tgz#92dacfefccd6751cbb0a964f06683bfd72d0c4d3"
+  integrity sha512-NIAXAdbz1XdOuzqkJHjNKXKj8QQ4cv5cxR/g0uQhCYf/6//XrmfpaYsM7PnBcNbfvTDLUkqQ5TPNm1sozDdTWg==
   dependencies:
-    "@typescript-eslint/types" "4.25.0"
-    eslint-visitor-keys "^2.0.0"
-
-"@typescript-eslint/visitor-keys@4.26.0":
-  version "4.26.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-4.26.0.tgz#26d2583169222815be4dcd1da4fe5459bc3bcc23"
-  integrity sha512-cw4j8lH38V1ycGBbF+aFiLUls9Z0Bw8QschP3mkth50BbWzgFS33ISIgBzUMuQ2IdahoEv/rXstr8Zhlz4B1Zg==
-  dependencies:
-    "@typescript-eslint/types" "4.26.0"
+    "@typescript-eslint/types" "4.28.4"
     eslint-visitor-keys "^2.0.0"
 
 JSONStream@^1.0.4:
@@ -1794,9 +1758,9 @@ acorn-globals@^6.0.0:
     acorn-walk "^7.1.1"
 
 acorn-jsx@^5.3.1:
-  version "5.3.1"
-  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.1.tgz#fc8661e11b7ac1539c47dbfea2e72b3af34d267b"
-  integrity sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==
+  version "5.3.2"
+  resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937"
+  integrity sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==
 
 acorn-walk@^7.1.1:
   version "7.2.0"
@@ -1814,16 +1778,16 @@ acorn@^7.1.1, acorn@^7.4.0:
   integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==
 
 acorn@^8.2.4:
-  version "8.3.0"
-  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.3.0.tgz#1193f9b96c4e8232f00b11a9edff81b2c8b98b88"
-  integrity sha512-tqPKHZ5CaBJw0Xmy0ZZvLs1qTV+BNFSyvn77ASXkpBNfIRk8ev26fKrD9iLGwGA9zedPao52GSHzq8lyZG0NUw==
+  version "8.4.1"
+  resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.4.1.tgz#56c36251fc7cabc7096adc18f05afe814321a28c"
+  integrity sha512-asabaBSkEKosYKMITunzX177CXxQ4Q8BSSzMTKD+FefUhipQC70gfW5SiUDhYQ3vk8G+81HqQk7Fv9OXwwn9KA==
 
 add-stream@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/add-stream/-/add-stream-1.0.0.tgz#6a7990437ca736d5e1288db92bd3266d5f5cb2aa"
   integrity sha1-anmQQ3ynNtXhKI25K9MmbV9csqo=
 
-agent-base@6:
+agent-base@6, agent-base@^6.0.2:
   version "6.0.2"
   resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77"
   integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==
@@ -1858,9 +1822,9 @@ ajv@^6.10.0, ajv@^6.12.3, ajv@^6.12.4:
     uri-js "^4.2.2"
 
 ajv@^8.0.1:
-  version "8.5.0"
-  resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.5.0.tgz#695528274bcb5afc865446aa275484049a18ae4b"
-  integrity sha512-Y2l399Tt1AguU3BPRP9Fn4eN+Or+StUGWCUpbnFyXSo8NZ9S4uj+AG2pjs5apK+ZMOwYOz1+a+VKvKH7CudXgQ==
+  version "8.6.2"
+  resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.6.2.tgz#2fb45e0e5fcbc0813326c1c3da535d1881bb0571"
+  integrity sha512-9807RlWAgT564wT+DjeyU5OFMPjmzxVobvDFmNAhY+5zD6A2ly3jDp6sgnfyDtlIQ+7H97oc/DGCzzfu9rjw9w==
   dependencies:
     fast-deep-equal "^3.1.1"
     json-schema-traverse "^1.0.0"
@@ -2024,7 +1988,7 @@ arr-union@^3.1.0:
   resolved "https://registry.yarnpkg.com/arr-union/-/arr-union-3.1.0.tgz#e39b09aea9def866a8f206e288af63919bae39c4"
   integrity sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=
 
-array-back@^3.0.1:
+array-back@^3.0.1, array-back@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/array-back/-/array-back-3.1.0.tgz#b8859d7a508871c9a7b2cf42f99428f65e96bfb0"
   integrity sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==
@@ -2044,11 +2008,6 @@ array-each@^1.0.0, array-each@^1.0.1:
   resolved "https://registry.yarnpkg.com/array-each/-/array-each-1.0.1.tgz#a794af0c05ab1752846ee753a1f211a05ba0c44f"
   integrity sha1-p5SvDAWrF1KEbudTofIRoFugxE8=
 
-array-find-index@^1.0.1:
-  version "1.0.2"
-  resolved "https://registry.yarnpkg.com/array-find-index/-/array-find-index-1.0.2.tgz#df010aa1287e164bbda6f9723b0a96a1ec4187a1"
-  integrity sha1-3wEKoSh+Fku9pvlyOwqWoexBh6E=
-
 array-ify@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/array-ify/-/array-ify-1.0.0.tgz#9e528762b4a9066ad163a6962a364418e9626ece"
@@ -2291,9 +2250,9 @@ bcrypt-pbkdf@^1.0.0:
     tweetnacl "^0.14.3"
 
 before-after-hook@^2.2.0:
-  version "2.2.1"
-  resolved "https://registry.yarnpkg.com/before-after-hook/-/before-after-hook-2.2.1.tgz#73540563558687586b52ed217dad6a802ab1549c"
-  integrity sha512-/6FKxSTWoJdbsLDF8tdIjaRiFXiE6UHsEHE3OPI/cwPURCVi1ukP0gmLn7XWEiFk5TcwQjjY5PWsU+j+tgXgmw==
+  version "2.2.2"
+  resolved "https://registry.yarnpkg.com/before-after-hook/-/before-after-hook-2.2.2.tgz#a6e8ca41028d90ee2c24222f201c90956091613e"
+  integrity sha512-3pZEU3NT5BFUo/AD5ERPWOgQOCZITni6iavr5AUw5AUwQjMlI0kzu5btnyD39AF0gUEsDPwJT+oY1ORBJijPjQ==
 
 benchmark@^2.1.4:
   version "2.1.4"
@@ -2473,14 +2432,6 @@ callsites@^3.0.0:
   resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
   integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==
 
-camelcase-keys@^2.0.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-2.1.0.tgz#308beeaffdf28119051efa1d932213c91b8f92e7"
-  integrity sha1-MIvur/3ygRkFHvodkyITyRuPkuc=
-  dependencies:
-    camelcase "^2.0.0"
-    map-obj "^1.0.0"
-
 camelcase-keys@^6.2.2:
   version "6.2.2"
   resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-6.2.2.tgz#5e755d6ba51aa223ec7d3d52f25778210f9dc3c0"
@@ -2490,17 +2441,22 @@ camelcase-keys@^6.2.2:
     map-obj "^4.0.0"
     quick-lru "^4.0.1"
 
-camelcase@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-2.1.1.tgz#7c1d16d679a1bbe59ca02cacecfb011e201f5a1f"
-  integrity sha1-fB0W1nmhu+WcoCys7PsBHiAfWh8=
+camelcase-keys@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-7.0.0.tgz#40fcbe171f7432888369d0c871df7cfa5ce4f788"
+  integrity sha512-qlQlECgDl5Ev+gkvONaiD4X4TF2gyZKuLBvzx0zLo2UwAxmz3hJP/841aaMHTeH1T7v5HRwoRq91daulXoYWvg==
+  dependencies:
+    camelcase "^6.2.0"
+    map-obj "^4.1.0"
+    quick-lru "^5.1.1"
+    type-fest "^1.2.1"
 
 camelcase@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a"
   integrity sha1-MvxLn82vhF/N9+c7uXysImHwqwo=
 
-camelcase@^5.3.1:
+camelcase@^5.0.0, camelcase@^5.3.1:
   version "5.3.1"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
   integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
@@ -2511,9 +2467,9 @@ camelcase@^6.2.0:
   integrity sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==
 
 caniuse-lite@^1.0.30001219:
-  version "1.0.30001233"
-  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001233.tgz#b7cb4a377a4b12ed240d2fa5c792951a06e5f2c4"
-  integrity sha512-BmkbxLfStqiPA7IEzQpIk0UFZFf3A4E6fzjPJ6OR+bFC2L8ES9J8zGA/asoi47p8XDVkev+WJo2I2Nc8c/34Yg==
+  version "1.0.30001246"
+  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001246.tgz#fe17d9919f87124d6bb416ef7b325356d69dc76c"
+  integrity sha512-Tc+ff0Co/nFNbLOrziBXmMVtpt9S2c2Y+Z9Nk9Khj09J+0zR9ejvIW5qkZAErCbOrVODCx/MN+GpB5FNBs5GFA==
 
 caseless@~0.12.0:
   version "0.12.0"
@@ -2587,9 +2543,9 @@ ci-info@^3.1.1:
   integrity sha512-dVqRX7fLUm8J6FgHJ418XuIgDLZDkYcDFTeL6TA2gt5WlIZUQrrH6EZrNClwT/H0FateUsZkGIOPRrLbP+PR9A==
 
 cjs-module-lexer@^1.0.0:
-  version "1.2.1"
-  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-1.2.1.tgz#2fd46d9906a126965aa541345c499aaa18e8cd73"
-  integrity sha512-jVamGdJPDeuQilKhvVn1h3knuMOZzr8QDnpk+M9aMlCaMkTDd6fBWPhiDqFvFZ07pL0liqabAiuy8SY4jGHeaw==
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz#9f84ba3244a512f3a54e5277e8eef4c489864e40"
+  integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA==
 
 class-utils@^0.3.5:
   version "0.3.6"
@@ -2747,11 +2703,6 @@ colorette@^1.2.2:
   resolved "https://registry.yarnpkg.com/colorette/-/colorette-1.2.2.tgz#cbcc79d5e99caea2dbf10eb3a26fd8b3e6acfa94"
   integrity sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==
 
-colors@^1.4.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78"
-  integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA==
-
 columnify@^1.5.4:
   version "1.5.4"
   resolved "https://registry.yarnpkg.com/columnify/-/columnify-1.5.4.tgz#4737ddf1c7b69a8a7c340570782e947eec8e78bb"
@@ -2767,12 +2718,12 @@ combined-stream@^1.0.6, combined-stream@^1.0.8, combined-stream@~1.0.6:
   dependencies:
     delayed-stream "~1.0.0"
 
-command-line-args@5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.1.1.tgz#88e793e5bb3ceb30754a86863f0401ac92fd369a"
-  integrity sha512-hL/eG8lrll1Qy1ezvkant+trihbGnaKaeEjj6Scyr3DN+RC7iQ5Rz84IeLERfAWDGo0HBSNAakczwgCilDXnWg==
+command-line-args@5.1.3:
+  version "5.1.3"
+  resolved "https://registry.yarnpkg.com/command-line-args/-/command-line-args-5.1.3.tgz#1e57d2816f28804073bb5e75cd24e02e2aa321e7"
+  integrity sha512-a5tF6mjqRSOBswBwdMkKY47JQ464Dkg9Pcwbxwo9wxRhKWZjtBktmBASllk3AMJ7qBuWgsAGtVa7b2/+EsymOQ==
   dependencies:
-    array-back "^3.0.1"
+    array-back "^3.1.0"
     find-replace "^3.0.0"
     lodash.camelcase "^4.3.0"
     typical "^4.0.0"
@@ -2831,9 +2782,9 @@ concat-stream@^2.0.0:
     typedarray "^0.0.6"
 
 config-chain@^1.1.12:
-  version "1.1.12"
-  resolved "https://registry.yarnpkg.com/config-chain/-/config-chain-1.1.12.tgz#0fde8d091200eb5e808caf25fe618c02f48e4efa"
-  integrity sha512-a1eOIcu8+7lUInge4Rpf/n4Krkf3Dd9lqhljRzII1/Zno/kRtUWnznPO3jOKBmTEktkt3fkxisUcivoj0ebzoA==
+  version "1.1.13"
+  resolved "https://registry.yarnpkg.com/config-chain/-/config-chain-1.1.13.tgz#fad0795aa6a6cdaff9ed1b68e9dff94372c232f4"
+  integrity sha512-qj+f8APARXHrM0hraqXYb2/bOVSV4PvJQlNZ/DVj0QrmNM2q2euizkeuVckQ57J+W0mRH6Hvi+k50M4Jul2VRQ==
   dependencies:
     ini "^1.3.4"
     proto-list "~1.2.1"
@@ -2852,15 +2803,15 @@ conventional-changelog-angular@^5.0.12:
     q "^1.5.1"
 
 conventional-changelog-core@^4.2.2:
-  version "4.2.2"
-  resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-4.2.2.tgz#f0897df6d53b5d63dec36b9442bd45354f8b3ce5"
-  integrity sha512-7pDpRUiobQDNkwHyJG7k9f6maPo9tfPzkSWbRq97GGiZqisElhnvUZSvyQH20ogfOjntB5aadvv6NNcKL1sReg==
+  version "4.2.3"
+  resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-4.2.3.tgz#ce44d4bbba4032e3dc14c00fcd5b53fc00b66433"
+  integrity sha512-MwnZjIoMRL3jtPH5GywVNqetGILC7g6RQFvdb8LRU/fA/338JbeWAku3PZ8yQ+mtVRViiISqJlb0sOz0htBZig==
   dependencies:
     add-stream "^1.0.0"
-    conventional-changelog-writer "^4.0.18"
+    conventional-changelog-writer "^5.0.0"
     conventional-commits-parser "^3.2.0"
     dateformat "^3.0.0"
-    get-pkg-repo "^1.0.0"
+    get-pkg-repo "^4.0.0"
     git-raw-commits "^2.0.8"
     git-remote-origin-url "^2.0.0"
     git-semver-tags "^4.1.1"
@@ -2869,7 +2820,6 @@ conventional-changelog-core@^4.2.2:
     q "^1.5.1"
     read-pkg "^3.0.0"
     read-pkg-up "^3.0.0"
-    shelljs "^0.8.3"
     through2 "^4.0.0"
 
 conventional-changelog-preset-loader@^2.3.4:
@@ -2877,12 +2827,11 @@ conventional-changelog-preset-loader@^2.3.4:
   resolved "https://registry.yarnpkg.com/conventional-changelog-preset-loader/-/conventional-changelog-preset-loader-2.3.4.tgz#14a855abbffd59027fd602581f1f34d9862ea44c"
   integrity sha512-GEKRWkrSAZeTq5+YjUZOYxdHq+ci4dNwHvpaBC3+ENalzFWuCWa9EZXSuZBpkr72sMdKB+1fyDV4takK1Lf58g==
 
-conventional-changelog-writer@^4.0.18:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/conventional-changelog-writer/-/conventional-changelog-writer-4.1.0.tgz#1ca7880b75aa28695ad33312a1f2366f4b12659f"
-  integrity sha512-WwKcUp7WyXYGQmkLsX4QmU42AZ1lqlvRW9mqoyiQzdD+rJWbTepdWoKJuwXTS+yq79XKnQNa93/roViPQrAQgw==
+conventional-changelog-writer@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/conventional-changelog-writer/-/conventional-changelog-writer-5.0.0.tgz#c4042f3f1542f2f41d7d2e0d6cad23aba8df8eec"
+  integrity sha512-HnDh9QHLNWfL6E1uHz6krZEQOgm8hN7z/m7tT16xwd802fwgMN0Wqd7AQYVkhpsjDUx/99oo+nGgvKF657XP5g==
   dependencies:
-    compare-func "^2.0.0"
     conventional-commits-filter "^2.0.7"
     dateformat "^3.0.0"
     handlebars "^4.7.6"
@@ -2929,9 +2878,9 @@ conventional-recommended-bump@^6.1.0:
     q "^1.5.1"
 
 convert-source-map@^1.0.0, convert-source-map@^1.4.0, convert-source-map@^1.5.0, convert-source-map@^1.6.0, convert-source-map@^1.7.0:
-  version "1.7.0"
-  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.7.0.tgz#17a2cb882d7f77d3490585e2ce6c524424a3a442"
-  integrity sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==
+  version "1.8.0"
+  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-1.8.0.tgz#f3373c32d21b4d780dd8004514684fb791ca4369"
+  integrity sha512-+OQdjP49zViI/6i7nIJpA8rAl4sV/JdPfU9nZs3VqOwGIgizICvuN2ru6fMd+4llL0tar18UYJXfZ/TWtmhUjA==
   dependencies:
     safe-buffer "~5.1.1"
 
@@ -3047,13 +2996,6 @@ cssstyle@^2.3.0:
   dependencies:
     cssom "~0.3.6"
 
-currently-unhandled@^0.4.1:
-  version "0.4.1"
-  resolved "https://registry.yarnpkg.com/currently-unhandled/-/currently-unhandled-0.4.1.tgz#988df33feab191ef799a61369dd76c17adf957ea"
-  integrity sha1-mI3zP+qxke95mmE2nddsF635V+o=
-  dependencies:
-    array-find-index "^1.0.1"
-
 d@1, d@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a"
@@ -3105,9 +3047,9 @@ debug@3.X:
     ms "^2.1.1"
 
 debug@4, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1:
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee"
-  integrity sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==
+  version "4.3.2"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b"
+  integrity sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==
   dependencies:
     ms "2.1.2"
 
@@ -3131,7 +3073,7 @@ decamelize-keys@^1.1.0:
     decamelize "^1.1.0"
     map-obj "^1.0.0"
 
-decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.1.2:
+decamelize@^1.1.0, decamelize@^1.1.1, decamelize@^1.2.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
   integrity sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=
@@ -3142,9 +3084,9 @@ decamelize@^5.0.0:
   integrity sha512-U75DcT5hrio3KNtvdULAWnLiAPbFUC4191ldxMmj4FA/mRuBnmDwU0boNfPyFRhnan+Jm+haLeSn3P0afcBn4w==
 
 decimal.js@^10.2.1:
-  version "10.2.1"
-  resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.2.1.tgz#238ae7b0f0c793d3e3cea410108b35a2c01426a3"
-  integrity sha512-KaL7+6Fw6i5A2XSnsbhm/6B+NuEA7TZ4vqxnd5tXz9sbKtrN9Srj8ab4vKVdK8YAqZO9P1kg45Y6YLoduPf+kw==
+  version "10.3.1"
+  resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.3.1.tgz#d8c3a444a9c6774ba60ca6ad7261c3a94fd5e783"
+  integrity sha512-V0pfhfr8suzyPGOx3nmq4aHqabehUZn6Ch9kyFpV79TGDTWFmHqUqXdabR7QHqxzrYolF4+tVmJhUG4OURg5dQ==
 
 decode-uri-component@^0.2.0:
   version "0.2.0"
@@ -3219,13 +3161,13 @@ define-property@^2.0.2:
     is-descriptor "^1.0.2"
     isobject "^3.0.1"
 
-del-cli@4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-4.0.0.tgz#a7b97c823705829ca22d993efcb21368119b55c0"
-  integrity sha512-G6FD38YZ28nkI34J+oxiYGbJg/t2hCkUgg9di9311gHZWWe9hY4CphewtU5l3RO1LTYxNMxla2D/we4CbBMHcA==
+del-cli@4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-4.0.1.tgz#2303ccaa45708ee8c6211568344cf87336abf30a"
+  integrity sha512-KtR/6cBfZkGDAP2NA7z+bP4p1OMob3wjN9mq13+SWvExx6jT9gFWfLgXEeX8J2B47OKeNCq9yTONmtryQ+m+6g==
   dependencies:
     del "^6.0.0"
-    meow "^10.0.1"
+    meow "^10.1.0"
 
 del@^6.0.0:
   version "6.0.0"
@@ -3383,9 +3325,9 @@ ecc-jsbn@~0.1.1:
     safer-buffer "^2.1.0"
 
 electron-to-chromium@^1.3.723:
-  version "1.3.744"
-  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.744.tgz#34e0da7babb325e18b50d3a0214504b12045ca85"
-  integrity sha512-o/vep/PvSXg+7buwCbVJXHY3zbjYVmFPwnMMnchESXgAzrfcasvbX/hQZHCFGG7YdZgdtwt1KTMyK9CyBxPbLA==
+  version "1.3.782"
+  resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.3.782.tgz#522740fe6b4b5255ca754c68d9c406a17b0998e2"
+  integrity sha512-6AI2se1NqWA1SBf/tlD6tQD/6ZOt+yAhqmrTlh4XZw4/g0Mt3p6JhTQPZxRPxPZiOg0o7ss1EBP/CpYejfnoIA==
 
 emittery@^0.8.1:
   version "0.8.1"
@@ -3539,14 +3481,14 @@ escodegen@^2.0.0:
   optionalDependencies:
     source-map "~0.6.1"
 
-eslint-plugin-jest@24.3.6:
-  version "24.3.6"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.3.6.tgz#5f0ca019183c3188c5ad3af8e80b41de6c8e9173"
-  integrity sha512-WOVH4TIaBLIeCX576rLcOgjNXqP+jNlCiEmRgFTfQtJ52DpwnIQKAVGlGPAN7CZ33bW6eNfHD6s8ZbEUTQubJg==
+eslint-plugin-jest@24.3.7:
+  version "24.3.7"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-24.3.7.tgz#a4deaa9e88182b92533a9c25cc4f3c369d7f33eb"
+  integrity sha512-pXED2NA4q2M/5mxlN6GyuUXAFJndT0uosOkQCHaUED9pqgBPd89ZzpcZEU6c5HtZNahC00M36FkwLdDHMDqaHw==
   dependencies:
     "@typescript-eslint/experimental-utils" "^4.0.1"
 
-eslint-scope@^5.0.0, eslint-scope@^5.1.1:
+eslint-scope@^5.1.1:
   version "5.1.1"
   resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
   integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==
@@ -3554,7 +3496,7 @@ eslint-scope@^5.0.0, eslint-scope@^5.1.1:
     esrecurse "^4.3.0"
     estraverse "^4.1.1"
 
-eslint-utils@^2.0.0, eslint-utils@^2.1.0:
+eslint-utils@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/eslint-utils/-/eslint-utils-2.1.0.tgz#d2de5e03424e707dc10c74068ddedae708741b27"
   integrity sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==
@@ -3578,13 +3520,14 @@ eslint-visitor-keys@^2.0.0:
   resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz#f65328259305927392c938ed44eb0a5c9b2bd303"
   integrity sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==
 
-eslint@7.27.0:
-  version "7.27.0"
-  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.27.0.tgz#665a1506d8f95655c9274d84bd78f7166b07e9c7"
-  integrity sha512-JZuR6La2ZF0UD384lcbnd0Cgg6QJjiCwhMD6eU4h/VGPcVGwawNNzKU41tgokGXnfjOOyI6QIffthhJTPzzuRA==
+eslint@7.31.0:
+  version "7.31.0"
+  resolved "https://registry.yarnpkg.com/eslint/-/eslint-7.31.0.tgz#f972b539424bf2604907a970860732c5d99d3aca"
+  integrity sha512-vafgJpSh2ia8tnTkNUkwxGmnumgckLh5aAbLa1xRmIn9+owi8qBNGKL+B881kNKNTy7FFqTEkpNkUvmw0n6PkA==
   dependencies:
     "@babel/code-frame" "7.12.11"
-    "@eslint/eslintrc" "^0.4.1"
+    "@eslint/eslintrc" "^0.4.3"
+    "@humanwhocodes/config-array" "^0.5.0"
     ajv "^6.10.0"
     chalk "^4.0.0"
     cross-spawn "^7.0.2"
@@ -3601,7 +3544,7 @@ eslint@7.27.0:
     fast-deep-equal "^3.1.3"
     file-entry-cache "^6.0.1"
     functional-red-black-tree "^1.0.1"
-    glob-parent "^5.0.0"
+    glob-parent "^5.1.2"
     globals "^13.6.0"
     ignore "^4.0.6"
     import-fresh "^3.0.0"
@@ -3684,9 +3627,9 @@ eventemitter3@^4.0.4:
   integrity sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==
 
 execa@^5.0.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.0.tgz#3ea50ee863d226bfa323528cce1684e7481dfe46"
-  integrity sha512-CkdUB7s2y6S+d4y+OM/+ZtQcJCiKUCth4cNImGMqrt2zEVtW2rfHGspQBE1GDo6LjeNIQmTPKXqTCKjqFKyu3A==
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/execa/-/execa-5.1.1.tgz#f80ad9cbf4298f7bd1d4c9555c21e93741c411dd"
+  integrity sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==
   dependencies:
     cross-spawn "^7.0.3"
     get-stream "^6.0.0"
@@ -3823,16 +3766,15 @@ fast-glob@^2.2.6:
     micromatch "^3.1.10"
 
 fast-glob@^3.1.1:
-  version "3.2.5"
-  resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.5.tgz#7939af2a656de79a4f1901903ee8adcaa7cb9661"
-  integrity sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==
+  version "3.2.7"
+  resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.7.tgz#fd6cb7a2d7e9aa7a7846111e85a196d6b2f766a1"
+  integrity sha512-rYGMRwip6lUMvYD3BTScMwT1HtAs2d71SMv66Vrxs0IekGZEjhM0pcMfjQPnknBt2zeCwQMEupiN02ZP4DiT1Q==
   dependencies:
     "@nodelib/fs.stat" "^2.0.2"
     "@nodelib/fs.walk" "^1.2.3"
-    glob-parent "^5.1.0"
+    glob-parent "^5.1.2"
     merge2 "^1.3.0"
-    micromatch "^4.0.2"
-    picomatch "^2.2.1"
+    micromatch "^4.0.4"
 
 fast-json-stable-stringify@2.x, fast-json-stable-stringify@^2.0.0:
   version "2.1.0"
@@ -3850,9 +3792,9 @@ fast-levenshtein@^2.0.6, fast-levenshtein@~2.0.6:
   integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
 
 fastq@^1.6.0:
-  version "1.11.0"
-  resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.11.0.tgz#bb9fb955a07130a918eb63c1f5161cc32a5d0858"
-  integrity sha512-7Eczs8gIPDrVzT+EksYBcupqMyxSHXXrHOLRRxU2/DicV8789MRBRR8+Hc2uWzUupOs4YS4JzBmBxjjCVBxD/g==
+  version "1.11.1"
+  resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.11.1.tgz#5d8175aae17db61947f8b162cfc7f63264d22807"
+  integrity sha512-HOnr8Mc60eNYl1gzwp6r5RoUyAn5/glBolUzP/Ez6IFVPMPirxn/9phgL6zhOtaTy7ISwPvQ+wT+hfcRZh/bzw==
   dependencies:
     reusify "^1.0.4"
 
@@ -3992,9 +3934,9 @@ flatbuffers@1.12.0:
   integrity sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==
 
 flatted@^3.1.0:
-  version "3.1.1"
-  resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.1.tgz#c4b489e80096d9df1dfc97c79871aea7c617c469"
-  integrity sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==
+  version "3.2.1"
+  resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.2.1.tgz#bbef080d95fca6709362c73044a1634f7c6e7d05"
+  integrity sha512-OMQjaErSFHmHqZe+PSidH5n8j3O0F2DdnVh8JB4j4eUQ2k6KvB0qGfrKIhapvez5JerBbmWkaLYUYWISaESoXg==
 
 flush-write-stream@^1.0.2:
   version "1.1.1"
@@ -4154,15 +4096,14 @@ get-package-type@^0.1.0:
   resolved "https://registry.yarnpkg.com/get-package-type/-/get-package-type-0.1.0.tgz#8de2d803cff44df3bc6c456e6668b36c3926e11a"
   integrity sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==
 
-get-pkg-repo@^1.0.0:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/get-pkg-repo/-/get-pkg-repo-1.4.0.tgz#c73b489c06d80cc5536c2c853f9e05232056972d"
-  integrity sha1-xztInAbYDMVTbCyFP54FIyBWly0=
+get-pkg-repo@^4.0.0:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/get-pkg-repo/-/get-pkg-repo-4.1.2.tgz#c4ffd60015cf091be666a0212753fc158f01a4c0"
+  integrity sha512-/FjamZL9cBYllEbReZkxF2IMh80d8TJoC4e3bmLNif8ibHw95aj0N/tzqK0kZz9eU/3w3dL6lF4fnnX/sDdW3A==
   dependencies:
-    hosted-git-info "^2.1.4"
-    meow "^3.3.0"
-    normalize-package-data "^2.3.0"
-    parse-github-repo-url "^1.3.0"
+    "@hutson/parse-repository-url" "^3.0.0"
+    hosted-git-info "^4.0.0"
+    meow "^7.0.0"
     through2 "^2.0.0"
 
 get-port@^5.1.1:
@@ -4170,11 +4111,6 @@ get-port@^5.1.1:
   resolved "https://registry.yarnpkg.com/get-port/-/get-port-5.1.1.tgz#0469ed07563479de6efb986baf053dcd7d4e3193"
   integrity sha512-g/Q1aTSDOxFpchXC4i8ZWvxA1lnPqx/JHqcpIw0/LX9T8x/GBbi6YnlN5nhaKIFkT8oFsscUKgDJYxfwfS6QsQ==
 
-get-stdin@^4.0.1:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/get-stdin/-/get-stdin-4.0.1.tgz#b968c6b0a04384324902e8bf1a5df32579a450fe"
-  integrity sha1-uWjGsKBDhDJJAui/Gl3zJXmkUP4=
-
 get-stream@^6.0.0:
   version "6.0.1"
   resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-6.0.1.tgz#a262d8eef67aced57c2852ad6167526a43cbf7b7"
@@ -4220,17 +4156,17 @@ git-semver-tags@^4.1.1:
     semver "^6.0.0"
 
 git-up@^4.0.0:
-  version "4.0.2"
-  resolved "https://registry.yarnpkg.com/git-up/-/git-up-4.0.2.tgz#10c3d731051b366dc19d3df454bfca3f77913a7c"
-  integrity sha512-kbuvus1dWQB2sSW4cbfTeGpCMd8ge9jx9RKnhXhuJ7tnvT+NIrTVfYZxjtflZddQYcmdOTlkAcjmx7bor+15AQ==
+  version "4.0.5"
+  resolved "https://registry.yarnpkg.com/git-up/-/git-up-4.0.5.tgz#e7bb70981a37ea2fb8fe049669800a1f9a01d759"
+  integrity sha512-YUvVDg/vX3d0syBsk/CKUTib0srcQME0JyHkL5BaYdwLsiCslPWmDSi8PUMo9pXYjrryMcmsCoCgsTpSCJEQaA==
   dependencies:
     is-ssh "^1.3.0"
-    parse-url "^5.0.0"
+    parse-url "^6.0.0"
 
 git-url-parse@^11.4.4:
-  version "11.4.4"
-  resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-11.4.4.tgz#5d747debc2469c17bc385719f7d0427802d83d77"
-  integrity sha512-Y4o9o7vQngQDIU9IjyCmRJBin5iYjI5u9ZITnddRZpD7dcCFQj2sL2XuMNbLRE4b4B/4ENPsp2Q8P44fjAZ0Pw==
+  version "11.5.0"
+  resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-11.5.0.tgz#acaaf65239cb1536185b19165a24bbc754b3f764"
+  integrity sha512-TZYSMDeM37r71Lqg1mbnMlOqlHd7BSij9qN7XwTkRqSAYFMihGLGhfHwgqQob3GUhEneKnV4nskN9rbQw2KGxA==
   dependencies:
     git-up "^4.0.0"
 
@@ -4249,7 +4185,7 @@ glob-parent@^3.1.0:
     is-glob "^3.1.0"
     path-dirname "^1.0.0"
 
-glob-parent@^5.0.0, glob-parent@^5.1.0, glob-parent@^5.1.1:
+glob-parent@^5.1.1, glob-parent@^5.1.2:
   version "5.1.2"
   resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4"
   integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==
@@ -4290,7 +4226,7 @@ glob-watcher@^5.0.3:
     normalize-path "^3.0.0"
     object.defaults "^1.1.0"
 
-glob@7.1.7, glob@^7.0.0, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6:
+glob@7.1.7, glob@^7.1.1, glob@^7.1.2, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6, glob@^7.1.7:
   version "7.1.7"
   resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.7.tgz#3b193e9233f01d42d0b3f78294bbeeb418f94a90"
   integrity sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==
@@ -4327,24 +4263,17 @@ globals@^11.1.0:
   resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e"
   integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==
 
-globals@^12.1.0:
-  version "12.4.0"
-  resolved "https://registry.yarnpkg.com/globals/-/globals-12.4.0.tgz#a18813576a41b00a24a97e7f815918c2e19925f8"
-  integrity sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==
-  dependencies:
-    type-fest "^0.8.1"
-
-globals@^13.6.0:
-  version "13.9.0"
-  resolved "https://registry.yarnpkg.com/globals/-/globals-13.9.0.tgz#4bf2bf635b334a173fb1daf7c5e6b218ecdc06cb"
-  integrity sha512-74/FduwI/JaIrr1H8e71UbDE+5x7pIPs1C2rrwC52SszOo043CsWOZEMW7o2Y58xwm9b+0RBKDxY5n2sUpEFxA==
+globals@^13.6.0, globals@^13.9.0:
+  version "13.10.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-13.10.0.tgz#60ba56c3ac2ca845cfbf4faeca727ad9dd204676"
+  integrity sha512-piHC3blgLGFjvOuMmWZX60f+na1lXFDhQXBf1UYp2fXPXqvEUbOhNwi6BsQ0bQishwedgnjkwv1d9zKf+MWw3g==
   dependencies:
     type-fest "^0.20.2"
 
 globby@^11.0.1, globby@^11.0.2, globby@^11.0.3:
-  version "11.0.3"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.3.tgz#9b1f0cb523e171dd1ad8c7b2a9fb4b644b9593cb"
-  integrity sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==
+  version "11.0.4"
+  resolved "https://registry.yarnpkg.com/globby/-/globby-11.0.4.tgz#2cbaff77c2f2a62e71e9b2813a67b97a3a3001a5"
+  integrity sha512-9O4MVG9ioZJ08ffbcyVYyLOJLk5JQ688pJ4eMGLpdWLHq/Wr1D9BlriLQyL0E+jbkuePVZXYFj47QM/v093wHg==
   dependencies:
     array-union "^2.1.0"
     dir-glob "^3.0.1"
@@ -4613,7 +4542,7 @@ hosted-git-info@^2.1.4:
   resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9"
   integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==
 
-hosted-git-info@^4.0.1:
+hosted-git-info@^4.0.0, hosted-git-info@^4.0.1:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-4.0.2.tgz#5e425507eede4fea846b7262f0838456c4209961"
   integrity sha512-c9OGXbZ3guC/xOlCg1Ci/VgWlwsqDv1yMQL1CWqXDL0hDjXuNcq0zuR4xqPSuasI3kqFDhqSyTjREz5gzq0fXg==
@@ -4727,13 +4656,6 @@ imurmurhash@^0.1.4:
   resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea"
   integrity sha1-khi5srkoojixPcT7a21XbyMUU+o=
 
-indent-string@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-2.1.0.tgz#8e2d48348742121b4a8218b7a137e9a52049dc80"
-  integrity sha1-ji1INIdCEhtKghi3oTfppSBJ3IA=
-  dependencies:
-    repeating "^2.0.0"
-
 indent-string@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
@@ -4800,7 +4722,7 @@ inquirer@^7.3.3:
     strip-ansi "^6.0.0"
     through "^2.3.6"
 
-interpret@^1.0.0, interpret@^1.4.0:
+interpret@^1.4.0:
   version "1.4.0"
   resolved "https://registry.yarnpkg.com/interpret/-/interpret-1.4.0.tgz#665ab8bc4da27a774a40584e812e3e0fa45b1a1e"
   integrity sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==
@@ -4886,9 +4808,9 @@ is-ci@^3.0.0:
     ci-info "^3.1.1"
 
 is-core-module@^2.2.0:
-  version "2.4.0"
-  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.4.0.tgz#8e9fc8e15027b011418026e98f0e6f4d86305cc1"
-  integrity sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==
+  version "2.5.0"
+  resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.5.0.tgz#f754843617c70bfd29b7bd87327400cda5c18491"
+  integrity sha512-TXCMSDsEHMEEZ6eCA8rwRDbLu55MRGmrctljsBX/2v1d9/GzqHOxW5c5oPSgrUt2vBFXebu9rGqckXGPWOlYpg==
   dependencies:
     has "^1.0.3"
 
@@ -4946,11 +4868,6 @@ is-extglob@^2.1.0, is-extglob@^2.1.1:
   resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2"
   integrity sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=
 
-is-finite@^1.0.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-finite/-/is-finite-1.1.0.tgz#904135c77fb42c0641d6aa1bcdbc4daa8da082f3"
-  integrity sha512-cdyMtqX/BOqqNBBiKlIVkytNHm49MtMlYyn1zxzvJKWmFMlGzm+ry5BBfYyeY9YmNKbRSo/o7OX9w9ale0wg3w==
-
 is-fullwidth-code-point@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz#ef9e31386f031a7f0d643af82fde50c457ef00cb"
@@ -5609,19 +5526,7 @@ jest-util@^26.0.0:
     is-ci "^2.0.0"
     micromatch "^4.0.2"
 
-jest-util@^27.0.0:
-  version "27.0.2"
-  resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.2.tgz#fc2c7ace3c75ae561cf1e5fdb643bf685a5be7c7"
-  integrity sha512-1d9uH3a00OFGGWSibpNYr+jojZ6AckOMCXV2Z4K3YXDnzpkAaXQyIpY14FOJPiUmil7CD+A6Qs+lnnh6ctRbIA==
-  dependencies:
-    "@jest/types" "^27.0.2"
-    "@types/node" "*"
-    chalk "^4.0.0"
-    graceful-fs "^4.2.4"
-    is-ci "^3.0.0"
-    picomatch "^2.2.3"
-
-jest-util@^27.0.6:
+jest-util@^27.0.0, jest-util@^27.0.6:
   version "27.0.6"
   resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-27.0.6.tgz#e8e04eec159de2f4d5f57f795df9cdc091e50297"
   integrity sha512-1JjlaIh+C65H/F7D11GNkGDDZtDfMEM8EBXsvd+l/cxtgQ6QhxuloOaiayt89DxUvDarbVhqI98HhgrM1yliFQ==
@@ -5781,7 +5686,7 @@ json2csv@^5.0.4:
     jsonparse "^1.3.1"
     lodash.get "^4.4.2"
 
-json5@2.x, json5@^2.1.2:
+json5@2.x, json5@^2.1.2, json5@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.0.tgz#2dfefe720c6ba525d9ebd909950f0515316c89a3"
   integrity sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==
@@ -6073,7 +5978,7 @@ lodash.truncate@^4.4.2:
   resolved "https://registry.yarnpkg.com/lodash.truncate/-/lodash.truncate-4.4.2.tgz#5a350da0b1113b837ecfffd5812cbe58d6eae193"
   integrity sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=
 
-lodash@4.x, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.7.0:
+lodash@4.x, lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.4, lodash@^4.7.0:
   version "4.17.21"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
@@ -6088,14 +5993,6 @@ log-update@^4.0.0:
     slice-ansi "^4.0.0"
     wrap-ansi "^6.2.0"
 
-loud-rejection@^1.0.0:
-  version "1.6.0"
-  resolved "https://registry.yarnpkg.com/loud-rejection/-/loud-rejection-1.6.0.tgz#5b46f80147edee578870f086d04821cf998e551f"
-  integrity sha1-W0b4AUft7leIcPCG0Eghz5mOVR8=
-  dependencies:
-    currently-unhandled "^0.4.1"
-    signal-exit "^3.0.0"
-
 lru-cache@^5.1.1:
   version "5.1.1"
   resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920"
@@ -6164,9 +6061,9 @@ make-fetch-happen@^8.0.9:
     ssri "^8.0.0"
 
 make-fetch-happen@^9.0.1:
-  version "9.0.1"
-  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-9.0.1.tgz#77d0e8b8ed7d387be7f137b76621fd904e4e10df"
-  integrity sha512-c2IxuRxsPKpW9ftCUnsbbAD3rBZNGsuRNwexAbWI8Eh9jlEVPrxZYK5ffgYRAVTQBegqrqR3DlWrsvvLhi4xQA==
+  version "9.0.4"
+  resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-9.0.4.tgz#ceaa100e60e0ef9e8d1ede94614bb2ba83c8bb24"
+  integrity sha512-sQWNKMYqSmbAGXqJg2jZ+PmHh5JAybvwu0xM8mZR/bsTjGiTASj3ldXJV7KFHy1k/IJIBkjxQFoWIVsv9+PQMg==
   dependencies:
     agentkeepalive "^4.1.3"
     cacache "^15.2.0"
@@ -6204,12 +6101,12 @@ map-cache@^0.2.0, map-cache@^0.2.2:
   resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf"
   integrity sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8=
 
-map-obj@^1.0.0, map-obj@^1.0.1:
+map-obj@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-1.0.1.tgz#d933ceb9205d82bdcf4886f6742bdc2b4dea146d"
   integrity sha1-2TPOuSBdgr3PSIb2dCvcK03qFG0=
 
-map-obj@^4.0.0:
+map-obj@^4.0.0, map-obj@^4.1.0:
   version "4.2.1"
   resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-4.2.1.tgz#e4ea399dbc979ae735c83c863dd31bdf364277b7"
   integrity sha512-+WA2/1sPmDj1dlvvJmB5G6JKfY9dpn7EVBUL06+y6PoljPkh+6V1QihwxNkbcGxCRjt2b0F9K0taiCuo7MbdFQ==
@@ -6221,10 +6118,10 @@ map-visit@^1.0.0:
   dependencies:
     object-visit "^1.0.0"
 
-marked@^2.0.3:
-  version "2.0.7"
-  resolved "https://registry.yarnpkg.com/marked/-/marked-2.0.7.tgz#bc5b857a09071b48ce82a1f7304913a993d4b7d1"
-  integrity sha512-BJXxkuIfJchcXOJWTT2DOL+yFWifFv2yGYOUzvXg8Qz610QKw+sHCvTMYwA+qWGhlA2uivBezChZ/pBy1tWdkQ==
+marked@^2.1.1:
+  version "2.1.3"
+  resolved "https://registry.yarnpkg.com/marked/-/marked-2.1.3.tgz#bd017cef6431724fd4b27e0657f5ceb14bff3753"
+  integrity sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA==
 
 matchdep@^2.0.0:
   version "2.0.0"
@@ -6267,13 +6164,13 @@ memorystream@^0.3.1:
   resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2"
   integrity sha1-htcJCzDORV1j+64S3aUaR93K+bI=
 
-meow@^10.0.1:
-  version "10.0.1"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-10.0.1.tgz#3252e728f4d8603ecae3a5b6460aaae4aea44ae0"
-  integrity sha512-65vCCdUI8wS5upK24fDFo25FcViNExdTGAR/vaWN4E6fXsWQ8fGdbkjCWp3nDTuJMlIYuEoAEMiB2/b81DBJjg==
+meow@^10.1.0:
+  version "10.1.0"
+  resolved "https://registry.yarnpkg.com/meow/-/meow-10.1.0.tgz#43edce35b3c5b7056d74bd9d63897220d3c190a6"
+  integrity sha512-bks/XR5OSTWcPZbJ/NsE2uCWQJ/ejqv8M9XOYxzhufBjreUMuz7S5ApDN5knzQce/4sLT5QoOQc6BbD5O0yP/w==
   dependencies:
-    "@types/minimist" "^1.2.1"
-    camelcase-keys "^6.2.2"
+    "@types/minimist" "^1.2.2"
+    camelcase-keys "^7.0.0"
     decamelize "^5.0.0"
     decamelize-keys "^1.1.0"
     hard-rejection "^2.1.0"
@@ -6281,25 +6178,26 @@ meow@^10.0.1:
     normalize-package-data "^3.0.2"
     read-pkg-up "^8.0.0"
     redent "^4.0.0"
-    trim-newlines "^4.0.1"
-    type-fest "^1.0.2"
-    yargs-parser "^20.2.7"
-
-meow@^3.3.0:
-  version "3.7.0"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-3.7.0.tgz#72cb668b425228290abbfa856892587308a801fb"
-  integrity sha1-cstmi0JSKCkKu/qFaJJYcwioAfs=
-  dependencies:
-    camelcase-keys "^2.0.0"
-    decamelize "^1.1.2"
-    loud-rejection "^1.0.0"
-    map-obj "^1.0.1"
-    minimist "^1.1.3"
-    normalize-package-data "^2.3.4"
-    object-assign "^4.0.1"
-    read-pkg-up "^1.0.1"
-    redent "^1.0.0"
-    trim-newlines "^1.0.0"
+    trim-newlines "^4.0.2"
+    type-fest "^1.2.2"
+    yargs-parser "^20.2.9"
+
+meow@^7.0.0:
+  version "7.1.1"
+  resolved "https://registry.yarnpkg.com/meow/-/meow-7.1.1.tgz#7c01595e3d337fcb0ec4e8eed1666ea95903d306"
+  integrity sha512-GWHvA5QOcS412WCo8vwKDlTelGLsCGBVevQB5Kva961rmNfun0PCbv5+xta2kUMFJyR8/oWnn7ddeKdosbAPbA==
+  dependencies:
+    "@types/minimist" "^1.2.0"
+    camelcase-keys "^6.2.2"
+    decamelize-keys "^1.1.0"
+    hard-rejection "^2.1.0"
+    minimist-options "4.1.0"
+    normalize-package-data "^2.5.0"
+    read-pkg-up "^7.0.1"
+    redent "^3.0.0"
+    trim-newlines "^3.0.0"
+    type-fest "^0.13.1"
+    yargs-parser "^18.1.3"
 
 meow@^8.0.0:
   version "8.1.2"
@@ -6393,7 +6291,7 @@ minimist-options@4.1.0:
     is-plain-obj "^1.1.0"
     kind-of "^6.0.3"
 
-minimist@1.x, minimist@^1.1.3, minimist@^1.2.0, minimist@^1.2.5:
+minimist@1.x, minimist@^1.2.0, minimist@^1.2.5:
   version "1.2.5"
   resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
   integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==
@@ -6406,9 +6304,9 @@ minipass-collect@^1.0.2:
     minipass "^3.0.0"
 
 minipass-fetch@^1.3.0, minipass-fetch@^1.3.2:
-  version "1.3.3"
-  resolved "https://registry.yarnpkg.com/minipass-fetch/-/minipass-fetch-1.3.3.tgz#34c7cea038c817a8658461bf35174551dce17a0a"
-  integrity sha512-akCrLDWfbdAWkMLBxJEeWTdNsjML+dt5YgOI4gJ53vuO0vrmYQkUPxa6j6V65s9CcePIr2SSWqjT2EcrNseryQ==
+  version "1.3.4"
+  resolved "https://registry.yarnpkg.com/minipass-fetch/-/minipass-fetch-1.3.4.tgz#63f5af868a38746ca7b33b03393ddf8c291244fe"
+  integrity sha512-TielGogIzbUEtd1LsjZFs47RWuHHfhl6TiCx1InVxApBAmQ8bL0dL5ilkLGcRvuyW/A9nE+Lvn855Ewz8S0PnQ==
   dependencies:
     minipass "^3.1.0"
     minipass-sized "^1.0.3"
@@ -6537,7 +6435,7 @@ multimatch@^5.0.0:
 
 multistream@4.1.0:
   version "4.1.0"
-  resolved "https://registry.npmjs.org/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8"
+  resolved "https://registry.yarnpkg.com/multistream/-/multistream-4.1.0.tgz#7bf00dfd119556fbc153cff3de4c6d477909f5a8"
   integrity sha512-J1XDiAmmNpRCBfIWJv+n0ymC4ABcf/Pl+5YvC5B/D2f/2+8PtHvCNxMPKiQcZyi922Hq69J2YOpb1pTywfifyw==
   dependencies:
     once "^1.4.0"
@@ -6659,9 +6557,9 @@ node-modules-regexp@^1.0.0:
   integrity sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=
 
 node-releases@^1.1.71:
-  version "1.1.72"
-  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.72.tgz#14802ab6b1039a79a0c7d662b610a5bbd76eacbe"
-  integrity sha512-LLUo+PpH3dU6XizX3iVoubUNheF/owjXCZZ5yACDxNnPtgFuludV1ZL3ayK1kVep42Rmm0+R9/Y60NQbZ2bifw==
+  version "1.1.73"
+  resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-1.1.73.tgz#dd4e81ddd5277ff846b80b52bb40c49edf7a7b20"
+  integrity sha512-uW7fodD6pyW2FZNZnp/Z3hvWKeEW1Y8R1+1CnErE8cXFXzl5blBOoVB41CvMer6P6Q0S5FXDwcHgFd1Wj0U9zg==
 
 nopt@^4.0.1:
   version "4.0.3"
@@ -6678,7 +6576,7 @@ nopt@^5.0.0:
   dependencies:
     abbrev "1"
 
-normalize-package-data@^2.0.0, normalize-package-data@^2.3.0, normalize-package-data@^2.3.2, normalize-package-data@^2.3.4, normalize-package-data@^2.5.0:
+normalize-package-data@^2.0.0, normalize-package-data@^2.3.2, normalize-package-data@^2.5.0:
   version "2.5.0"
   resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
   integrity sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==
@@ -6710,10 +6608,10 @@ normalize-path@^3.0.0:
   resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
   integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
 
-normalize-url@^6.0.1:
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.0.1.tgz#a4f27f58cf8c7b287b440b8a8201f42d0b00d256"
-  integrity sha512-VU4pzAuh7Kip71XEmO9aNREYAdMHFGTVj/i+CaTImS8x0i1d3jUZkXhqluy/PRgjPLMgsLQulYY3PJ/aSbSjpQ==
+normalize-url@^6.1.0:
+  version "6.1.0"
+  resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-6.1.0.tgz#40d0885b535deffe3f3147bec877d05fe4c5668a"
+  integrity sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A==
 
 now-and-later@^2.0.0:
   version "2.0.1"
@@ -6756,9 +6654,9 @@ npm-normalize-package-bin@^1.0.0, npm-normalize-package-bin@^1.0.1:
   integrity sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==
 
 npm-package-arg@^8.0.0, npm-package-arg@^8.0.1, npm-package-arg@^8.1.0, npm-package-arg@^8.1.2:
-  version "8.1.4"
-  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.4.tgz#8001cdbc4363997b8ef6c6cf7aaf543c5805879d"
-  integrity sha512-xLokoCFqj/rPdr3LvcdDL6Kj6ipXGEDHD/QGpzwU6/pibYUOXmp5DBmg76yukFyx4ZDbrXNOTn+BPyd8TD4Jlw==
+  version "8.1.5"
+  resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.5.tgz#3369b2d5fe8fdc674baa7f1786514ddc15466e44"
+  integrity sha512-LhgZrg0n0VgvzVdSm1oiZworPbTxYHUJCgtsJW8mGvlDpxTM1vSJc3m5QZeUkhAHIzbz3VCHd/R4osi1L1Tg/Q==
   dependencies:
     hosted-git-info "^4.0.1"
     semver "^7.3.4"
@@ -6857,7 +6755,7 @@ oauth-sign@~0.9.0:
   resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.9.0.tgz#47a7b016baa68b5fa0ecf3dee08a85c679ac6455"
   integrity sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==
 
-object-assign@4.X, object-assign@^4.0.1, object-assign@^4.1.0:
+object-assign@4.X, object-assign@^4.1.0:
   version "4.1.1"
   resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
   integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=
@@ -6872,9 +6770,9 @@ object-copy@^0.1.0:
     kind-of "^3.0.3"
 
 object-inspect@^1.10.3, object-inspect@^1.9.0:
-  version "1.10.3"
-  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.10.3.tgz#c2aa7d2d09f50c99375704f7a0adf24c5782d369"
-  integrity sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==
+  version "1.11.0"
+  resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.11.0.tgz#9dceb146cedd4148a0d9e51ab88d34cf509922b1"
+  integrity sha512-jp7ikS6Sd3GxQfZJPyH3cjcbJF6GZPClgdV+EFygjFLQ5FmW/dRUnTd9PQ9k0JhoNDabWFbpF1yCdSWCC6gexg==
 
 object-keys@^1.0.12, object-keys@^1.1.1:
   version "1.1.1"
@@ -7157,11 +7055,11 @@ p-waterfall@^2.1.1:
     p-reduce "^2.0.0"
 
 pacote@^11.2.6:
-  version "11.3.4"
-  resolved "https://registry.yarnpkg.com/pacote/-/pacote-11.3.4.tgz#c290b790a5cee3082bb8fa223f3f3e2fdf3d0bfc"
-  integrity sha512-RfahPCunM9GI7ryJV/zY0bWQiokZyLqaSNHXtbNSoLb7bwTvBbJBEyCJ01KWs4j1Gj7GmX8crYXQ1sNX6P2VKA==
+  version "11.3.5"
+  resolved "https://registry.yarnpkg.com/pacote/-/pacote-11.3.5.tgz#73cf1fc3772b533f575e39efa96c50be8c3dc9d2"
+  integrity sha512-fT375Yczn4zi+6Hkk2TBe1x1sP8FgFsEIZ2/iWaXY2r/NkhDJfxbcn5paz1+RTFCyNf+dPnaoBDJoAxXSU8Bkg==
   dependencies:
-    "@npmcli/git" "^2.0.1"
+    "@npmcli/git" "^2.1.0"
     "@npmcli/installed-package-contents" "^1.0.6"
     "@npmcli/promise-spawn" "^1.2.0"
     "@npmcli/run-script" "^1.8.2"
@@ -7204,11 +7102,6 @@ parse-filepath@^1.0.1:
     map-cache "^0.2.0"
     path-root "^0.1.1"
 
-parse-github-repo-url@^1.3.0:
-  version "1.4.1"
-  resolved "https://registry.yarnpkg.com/parse-github-repo-url/-/parse-github-repo-url-1.4.1.tgz#9e7d8bb252a6cb6ba42595060b7bf6df3dbc1f50"
-  integrity sha1-nn2LslKmy2ukJZUGC3v23z28H1A=
-
 parse-json@^2.2.0:
   version "2.2.0"
   resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-2.2.0.tgz#f480f40434ef80741f8469099f8dea18f55a4dc9"
@@ -7254,13 +7147,13 @@ parse-path@^4.0.0:
     qs "^6.9.4"
     query-string "^6.13.8"
 
-parse-url@^5.0.0:
-  version "5.0.3"
-  resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-5.0.3.tgz#c158560f14cb1560917e0b7fd8b01adc1e9d3cab"
-  integrity sha512-nrLCVMJpqo12X8uUJT4GJPd5AFaTOrGx/QpJy3HNcVtq0AZSstVIsnxS5fqNPuoqMUs3MyfBoOP6Zvu2Arok5A==
+parse-url@^6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-6.0.0.tgz#f5dd262a7de9ec00914939220410b66cff09107d"
+  integrity sha512-cYyojeX7yIIwuJzledIHeLUBVJ6COVLeT4eF+2P6aKVzwvgKQPndCBv3+yQ7pcWjqToYwaligxzSYNNmGoMAvw==
   dependencies:
     is-ssh "^1.3.0"
-    normalize-url "^6.0.1"
+    normalize-url "^6.1.0"
     parse-path "^4.0.0"
     protocols "^1.4.0"
 
@@ -7354,7 +7247,7 @@ performance-now@^2.1.0:
   resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
   integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
 
-picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.2.3:
+picomatch@^2.0.4, picomatch@^2.2.3:
   version "2.3.0"
   resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.0.tgz#f1f061de8f6a4bf022892e2d128234fb98302972"
   integrity sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==
@@ -7431,9 +7324,9 @@ posix-character-classes@^0.1.0:
   integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=
 
 postcss@^7.0.16:
-  version "7.0.35"
-  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.35.tgz#d2be00b998f7f211d8a276974079f2e92b970e24"
-  integrity sha512-3QT8bBJeX/S5zKTTjTCIjRF3If4avAT6kqxcASlTWEtAFCb9NH0OUxNDfgZSWdP5fJnBYCMEWkIFfWeugjzYMg==
+  version "7.0.36"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.36.tgz#056f8cffa939662a8f5905950c07d5285644dfcb"
+  integrity sha512-BebJSIUMwJHRH0HAQoxN4u1CN86glsrwsW0q7T+/m44eXOUAxSNdHRkNZPYz5vVUbg17hFgOQDE7fZk7li3pZw==
   dependencies:
     chalk "^2.4.2"
     source-map "^0.6.1"
@@ -7450,9 +7343,9 @@ prelude-ls@~1.1.2:
   integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
 
 prettier@^2.1.2:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.3.0.tgz#b6a5bf1284026ae640f17f7ff5658a7567fc0d18"
-  integrity sha512-kXtO4s0Lz/DW/IJ9QdWhAf7/NmPWQXkFr/r/WkR3vyI+0v8amTDxiaQSLzs8NBlytfLWX/7uQUMIW677yLKl4w==
+  version "2.3.2"
+  resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.3.2.tgz#ef280a05ec253712e486233db5c6f23441e7342d"
+  integrity sha512-lnJzDfJ66zkMy58OL5/NY5zp70S7Nz6KqcKkXYzn2tMVrNxvbqaBpg7H3qHaLxCJ5lNMsGuM8+ohS7cZrthdLQ==
 
 pretty-format@^26.0.0, pretty-format@^26.6.2:
   version "26.6.2"
@@ -7598,6 +7491,11 @@ quick-lru@^4.0.1:
   resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f"
   integrity sha512-ARhCpm70fzdcvNQfPoy49IaanKkTlRWF2JMzqhcJbhSFRZv7nPTvZJdcY7301IPmvW+/p0RgIWnQDLJxifsQ7g==
 
+quick-lru@^5.1.1:
+  version "5.1.1"
+  resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932"
+  integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==
+
 randomatic@3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/randomatic/-/randomatic-3.1.1.tgz#b776efc59375984e36c537b2f51a1f0aff0da1ed"
@@ -7781,14 +7679,6 @@ rechoir@^0.6.2:
   dependencies:
     resolve "^1.1.6"
 
-redent@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/redent/-/redent-1.0.0.tgz#cf916ab1fd5f1f16dfb20822dd6ec7f730c2afde"
-  integrity sha1-z5Fqsf1fHxbfsggi3W7H9zDCr94=
-  dependencies:
-    indent-string "^2.1.0"
-    strip-indent "^1.0.1"
-
 redent@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/redent/-/redent-3.0.0.tgz#e557b7998316bb53c9f1f56fa626352c6963059f"
@@ -7818,10 +7708,10 @@ regex-not@^1.0.0, regex-not@^1.0.2:
     extend-shallow "^3.0.2"
     safe-regex "^1.1.0"
 
-regexpp@^3.0.0, regexpp@^3.1.0:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.1.0.tgz#206d0ad0a5648cffbdb8ae46438f3dc51c9f78e2"
-  integrity sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==
+regexpp@^3.1.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/regexpp/-/regexpp-3.2.0.tgz#0425a2768d8f23bad70ca4b90461fa2f1213e1b2"
+  integrity sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==
 
 remove-bom-buffer@^3.0.0:
   version "3.0.0"
@@ -7855,13 +7745,6 @@ repeat-string@^1.5.4, repeat-string@^1.6.1:
   resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637"
   integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc=
 
-repeating@^2.0.0:
-  version "2.0.1"
-  resolved "https://registry.yarnpkg.com/repeating/-/repeating-2.0.1.tgz#5214c53a926d3552707527fbab415dbc08d06dda"
-  integrity sha1-UhTFOpJtNVJwdSf7q0FdvAjQbdo=
-  dependencies:
-    is-finite "^1.0.0"
-
 replace-ext@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/replace-ext/-/replace-ext-1.0.1.tgz#2d6d996d04a15855d967443631dd5f77825b016a"
@@ -8134,22 +8017,14 @@ shell-quote@^1.6.1:
   resolved "https://registry.yarnpkg.com/shell-quote/-/shell-quote-1.7.2.tgz#67a7d02c76c9da24f99d20808fcaded0e0e04be2"
   integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==
 
-shelljs@^0.8.3, shelljs@^0.8.4:
-  version "0.8.4"
-  resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.4.tgz#de7684feeb767f8716b326078a8a00875890e3c2"
-  integrity sha512-7gk3UZ9kOfPLIAbslLzyWeGiEqx9e3rxwZM0KE6EL8GlGwjym9Mrlx5/p33bWTu9YG6vcS4MBxYZDHYr5lr8BQ==
-  dependencies:
-    glob "^7.0.0"
-    interpret "^1.0.0"
-    rechoir "^0.6.2"
-
 shiki@^0.9.3:
-  version "0.9.3"
-  resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.9.3.tgz#7bf7bcf3ed50ca525ec89cc09254abce4264d5ca"
-  integrity sha512-NEjg1mVbAUrzRv2eIcUt3TG7X9svX7l3n3F5/3OdFq+/BxUdmBOeKGiH4icZJBLHy354Shnj6sfBTemea2e7XA==
+  version "0.9.5"
+  resolved "https://registry.yarnpkg.com/shiki/-/shiki-0.9.5.tgz#c8da81a05fbfd1810729c6873901a729a72ec541"
+  integrity sha512-XFn+rl3wIowDjzdr5DlHoHgQphXefgUTs2bNp/bZu4WF9gTrTLnKwio3f28VjiFG6Jpip7yQn/p4mMj6OrjrtQ==
   dependencies:
+    json5 "^2.2.0"
     onigasm "^2.2.5"
-    vscode-textmate "^5.2.0"
+    vscode-textmate "5.2.0"
 
 side-channel@^1.0.4:
   version "1.0.4"
@@ -8230,11 +8105,11 @@ snapdragon@^0.8.1:
     use "^3.1.0"
 
 socks-proxy-agent@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-5.0.0.tgz#7c0f364e7b1cf4a7a437e71253bed72e9004be60"
-  integrity sha512-lEpa1zsWCChxiynk+lCycKuC502RxDWLKJZoIhnxrWNjLSDGYRFflHA1/228VkRcnv9TIb8w98derGbpKxJRgA==
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-5.0.1.tgz#032fb583048a29ebffec2e6a73fca0761f48177e"
+  integrity sha512-vZdmnjb9a2Tz6WEQVIurybSwElwPxMZaIc7PzqbJTrezcKNznv6giT7J7tZDZ1BojVaa1jvO/UiUdhDVB0ACoQ==
   dependencies:
-    agent-base "6"
+    agent-base "^6.0.2"
     debug "4"
     socks "^2.3.3"
 
@@ -8552,13 +8427,6 @@ strip-final-newline@^2.0.0:
   resolved "https://registry.yarnpkg.com/strip-final-newline/-/strip-final-newline-2.0.0.tgz#89b852fb2fcbe936f6f4b3187afb0a12c1ab58ad"
   integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==
 
-strip-indent@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-1.0.1.tgz#0c7962a6adefa7bbd4ac366460a638552ae1a0a2"
-  integrity sha1-DHlipq3vp7vUrDZkYKY4VSrhoKI=
-  dependencies:
-    get-stdin "^4.0.1"
-
 strip-indent@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-3.0.0.tgz#c32e1cee940b6b3432c771bc2c54bcce73cd3001"
@@ -8861,24 +8729,19 @@ tough-cookie@~2.5.0:
     psl "^1.1.28"
     punycode "^2.1.1"
 
-tr46@^2.0.2:
+tr46@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/tr46/-/tr46-2.1.0.tgz#fa87aa81ca5d5941da8cbf1f9b749dc969a4e240"
   integrity sha512-15Ih7phfcdP5YxqiB+iDtLoaTz4Nd35+IiAv0kQ5FNKHzXgdWqPoTIqEDDJmXceQt4JZk6lVPT8lnDlPpGDppw==
   dependencies:
     punycode "^2.1.1"
 
-trim-newlines@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-1.0.0.tgz#5887966bb582a4503a41eb524f7d35011815a613"
-  integrity sha1-WIeWa7WCpFA6QetST301ARgVphM=
-
 trim-newlines@^3.0.0:
   version "3.0.1"
   resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-3.0.1.tgz#260a5d962d8b752425b32f3a7db0dcacd176c144"
   integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw==
 
-trim-newlines@^4.0.1:
+trim-newlines@^4.0.2:
   version "4.0.2"
   resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.0.2.tgz#d6aaaf6a0df1b4b536d183879a6b939489808c7c"
   integrity sha512-GJtWyq9InR/2HRiLZgpIKv+ufIKrVrvjQWEj7PxAXNc5dwbNJkqhAUoAGgzRmULAnoOM5EIpveYd3J2VeSAIew==
@@ -8890,7 +8753,7 @@ trim-off-newlines@^1.0.0:
 
 ts-jest@27.0.3:
   version "27.0.3"
-  resolved "https://registry.npmjs.org/ts-jest/-/ts-jest-27.0.3.tgz#808492f022296cde19390bb6ad627c8126bf93f8"
+  resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-27.0.3.tgz#808492f022296cde19390bb6ad627c8126bf93f8"
   integrity sha512-U5rdMjnYam9Ucw+h0QvtNDbc5+88nxt7tbIvqaZUhFrfG4+SkWhMXjejCLVGcpILTPuV+H3W/GZDZrnZFpPeXw==
   dependencies:
     bs-logger "0.x"
@@ -8904,10 +8767,10 @@ ts-jest@27.0.3:
     semver "7.x"
     yargs-parser "20.x"
 
-ts-node@10.0.0:
-  version "10.0.0"
-  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.0.0.tgz#05f10b9a716b0b624129ad44f0ea05dac84ba3be"
-  integrity sha512-ROWeOIUvfFbPZkoDis0L/55Fk+6gFQNZwwKPLinacRl6tsxstTF1DbAcLKkovwnpKMVvOMHP1TIbnwXwtLg1gg==
+ts-node@10.1.0:
+  version "10.1.0"
+  resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.1.0.tgz#e656d8ad3b61106938a867f69c39a8ba6efc966e"
+  integrity sha512-6szn3+J9WyG2hE+5W8e0ruZrzyk1uFLYye6IGMBadnOzDh8aP7t8CbFpsfCiEx2+wMixAhjFt7lOZC4+l+WbEA==
   dependencies:
     "@tsconfig/node10" "^1.0.7"
     "@tsconfig/node12" "^1.0.7"
@@ -8935,7 +8798,7 @@ tslib@~2.1.0:
   resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.1.0.tgz#da60860f1c2ecaa5703ab7d39bc05b6bf988b97a"
   integrity sha512-hcVC3wYEziELGGmEEXue7D75zbwIIVUMWAVbHItGPx0ziyXxrOMQx4rQEVEV45Ut/1IotuEvwqPopzIOkDMf0A==
 
-tsutils@^3.17.1, tsutils@^3.21.0:
+tsutils@^3.21.0:
   version "3.21.0"
   resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
   integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==
@@ -8973,6 +8836,11 @@ type-detect@4.0.8:
   resolved "https://registry.yarnpkg.com/type-detect/-/type-detect-4.0.8.tgz#7646fb5f18871cfbb7749e69bd39a6388eb7450c"
   integrity sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==
 
+type-fest@^0.13.1:
+  version "0.13.1"
+  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.13.1.tgz#0172cb5bce80b0bd542ea348db50c7e21834d934"
+  integrity sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==
+
 type-fest@^0.18.0:
   version "0.18.1"
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.18.1.tgz#db4bc151a4a2cf4eebf9add5db75508db6cc841f"
@@ -9003,7 +8871,7 @@ type-fest@^0.8.1:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
   integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==
 
-type-fest@^1.0.1, type-fest@^1.0.2:
+type-fest@^1.0.1, type-fest@^1.2.1, type-fest@^1.2.2:
   version "1.2.2"
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-1.2.2.tgz#1930bc36b2064f7ab4aa307a6d1b65965199c698"
   integrity sha512-pfkPYCcuV0TJoo/jlsUeWNV8rk7uMU6ocnYNvca1Vu+pyKi8Rl8Zo2scPt9O72gCsXIm+dMxOOWuA3VFDSdzWA==
@@ -9035,20 +8903,17 @@ typedoc-default-themes@^0.12.10:
   resolved "https://registry.yarnpkg.com/typedoc-default-themes/-/typedoc-default-themes-0.12.10.tgz#614c4222fe642657f37693ea62cad4dafeddf843"
   integrity sha512-fIS001cAYHkyQPidWXmHuhs8usjP5XVJjWB8oZGqkTowZaz3v7g3KDZeeqE82FBrmkAnIBOY3jgy7lnPnqATbA==
 
-typedoc@0.20.36:
-  version "0.20.36"
-  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.20.36.tgz#ee5523c32f566ad8283fc732aa8ea322d1a45f6a"
-  integrity sha512-qFU+DWMV/hifQ9ZAlTjdFO9wbUIHuUBpNXzv68ZyURAP9pInjZiO4+jCPeAzHVcaBCHER9WL/+YzzTt6ZlN/Nw==
+typedoc@0.21.4:
+  version "0.21.4"
+  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.21.4.tgz#fced3cffdc30180db60a5dbfec9dbbb273cb5b31"
+  integrity sha512-slZQhvD9U0d9KacktYAyuNMMOXJRFNHy+Gd8xY2Qrqq3eTTTv3frv3N4au/cFnab9t3T5WA0Orb6QUjMc+1bDA==
   dependencies:
-    colors "^1.4.0"
-    fs-extra "^9.1.0"
+    glob "^7.1.7"
     handlebars "^4.7.7"
-    lodash "^4.17.21"
     lunr "^2.3.9"
-    marked "^2.0.3"
+    marked "^2.1.1"
     minimatch "^3.0.0"
     progress "^2.0.3"
-    shelljs "^0.8.4"
     shiki "^0.9.3"
     typedoc-default-themes "^0.12.10"
 
@@ -9068,9 +8933,9 @@ typical@^5.2.0:
   integrity sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==
 
 uglify-js@^3.1.4:
-  version "3.13.8"
-  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.8.tgz#7c2f9f2553f611f3ff592bdc19c6fb208dc60afb"
-  integrity sha512-PvFLMFIQHfIjFFlvAch69U2IvIxK9TNzNWt1SxZGp9JZ/v70yvqIQuiJeVPPtUMOzoNt+aNRDk4wgxb34wvEqA==
+  version "3.13.10"
+  resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.13.10.tgz#a6bd0d28d38f592c3adb6b180ea6e07e1e540a8d"
+  integrity sha512-57H3ACYFXeo1IaZ1w02sfA71wI60MGco/IQFjOqK+WtKoprh7Go2/yvd2HPtoJILO2Or84ncLccI4xoHMTSbGg==
 
 uid-number@0.0.6:
   version "0.0.6"
@@ -9322,10 +9187,10 @@ vinyl@2.x, vinyl@^2.0.0, vinyl@^2.1.0:
     remove-trailing-separator "^1.0.1"
     replace-ext "^1.0.0"
 
-vscode-textmate@^5.2.0:
-  version "5.4.0"
-  resolved "https://registry.yarnpkg.com/vscode-textmate/-/vscode-textmate-5.4.0.tgz#4b25ffc1f14ac3a90faf9a388c67a01d24257cd7"
-  integrity sha512-c0Q4zYZkcLizeYJ3hNyaVUM2AA8KDhNCA3JvXY8CeZSJuBdAy3bAvSbv46RClC4P3dSO9BdwhnKEx2zOo6vP/w==
+vscode-textmate@5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/vscode-textmate/-/vscode-textmate-5.2.0.tgz#01f01760a391e8222fe4f33fbccbd1ad71aed74e"
+  integrity sha512-Uw5ooOQxRASHgu6C7GVvUxisKXfSgW4oFlO+aa+PAkgmH89O3CXxEEzNRNtHSqtXFTl0nAC1uYj0GMSH27uwtQ==
 
 w3c-hr-time@^1.0.2:
   version "1.0.2"
@@ -9355,16 +9220,11 @@ wcwidth@^1.0.0:
   dependencies:
     defaults "^1.0.3"
 
-web-streams-polyfill@3.0.3:
+web-streams-polyfill@3.0.3, web-streams-polyfill@~3.0.3:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.0.3.tgz#f49e487eedeca47a207c1aee41ee5578f884b42f"
   integrity sha512-d2H/t0eqRNM4w2WvmTdoeIvzAUSpK7JmATB8Nr2lb7nQ9BTIJVjbQ/TRFVEh2gUH1HwclPdoPtfMoFfetXaZnA==
 
-web-streams-polyfill@~2.1.1:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-2.1.1.tgz#2c82b6193849ccb9efaa267772c28260ef68d6d2"
-  integrity sha512-dlNpL2aab3g8CKfGz6rl8FNmGaRWLLn2g/DtSc9IjB30mEdE6XxzPfPSig5BwGSzI+oLxHyETrQGKjrVVhbLCg==
-
 webidl-conversions@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-5.0.0.tgz#ae59c8a00b121543a2acc65c0434f57b0fc11aff"
@@ -9388,12 +9248,12 @@ whatwg-mimetype@^2.3.0:
   integrity sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==
 
 whatwg-url@^8.0.0, whatwg-url@^8.4.0, whatwg-url@^8.5.0:
-  version "8.5.0"
-  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.5.0.tgz#7752b8464fc0903fec89aa9846fc9efe07351fd3"
-  integrity sha512-fy+R77xWv0AiqfLl4nuGUlQ3/6b5uNfQ4WAbGQVMYshCTCCPK9psC1nWh3XHuxGVCtlcDDQPQW1csmmIQo+fwg==
+  version "8.7.0"
+  resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-8.7.0.tgz#656a78e510ff8f3937bc0bcbe9f5c0ac35941b77"
+  integrity sha512-gAojqb/m9Q8a5IV96E3fHJM70AzCkgt4uXYX2O7EmuyOnLrViCQlsEBmF9UQIu3/aeAIp2U17rtbpZWNntQqdg==
   dependencies:
     lodash "^4.7.0"
-    tr46 "^2.0.2"
+    tr46 "^2.1.0"
     webidl-conversions "^6.1.0"
 
 which-boxed-primitive@^1.0.2:
@@ -9535,9 +9395,9 @@ write-pkg@^4.0.0:
     write-json-file "^3.2.0"
 
 ws@^7.4.5:
-  version "7.4.6"
-  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.6.tgz#5654ca8ecdeee47c33a9a4bf6d28e2be2980377c"
-  integrity sha512-YmhHDO4MzaDLB+M9ym/mDA5z0naX8j7SIlT8f8z+I0VtzsRbekxEutHSme7NPS2qE8StCYQNUnfWdXta/Yu85A==
+  version "7.5.3"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.3.tgz#160835b63c7d97bfab418fc1b8a9fced2ac01a74"
+  integrity sha512-kQ/dHIzuLrS6Je9+uv81ueZomEwH0qVYstcAQ4/Z93K8zeko9gtAbttJWzoC5ukqXY1PpoouV3+VSOqEAFt5wg==
 
 xml-name-validator@^3.0.0:
   version "3.0.0"
@@ -9597,16 +9457,19 @@ yargs-parser@20.2.4:
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.4.tgz#b42890f14566796f85ae8e3a25290d205f154a54"
   integrity sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==
 
-yargs-parser@20.x, yargs-parser@^20.2.2, yargs-parser@^20.2.3:
-  version "20.2.7"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
-  integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
-
-yargs-parser@^20.2.7:
+yargs-parser@20.x, yargs-parser@^20.2.2, yargs-parser@^20.2.3, yargs-parser@^20.2.9:
   version "20.2.9"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
   integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==
 
+yargs-parser@^18.1.3:
+  version "18.1.3"
+  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-18.1.3.tgz#be68c4975c6b2abf469236b0c870362fab09a7b0"
+  integrity sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==
+  dependencies:
+    camelcase "^5.0.0"
+    decamelize "^1.2.0"
+
 yargs-parser@^5.0.1:
   version "5.0.1"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.1.tgz#7ede329c1d8cdbbe209bd25cdb990e9b1ebbb394"

From 12dacee1673eab66e2b94786c11c9353b8ce3313 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 26 Jul 2021 11:16:22 +0200
Subject: [PATCH 637/719] ARROW-13445: [Java][Packaging] Fix artifact patterns
 for the Java jars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10781 from kszucs/java-jars-artifact-patterns

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/tasks/tasks.yml | 76 ++++++++++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 93bce879f22..e466bee05f3 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -660,45 +660,45 @@ tasks:
     ci: github
     template: java-jars/github.yml
     artifacts:
-      - arrow-algorithm-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-algorithm-{no_rc_version}-SNAPSHOT.jar
-      - arrow-avro-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-avro-{no_rc_version}-SNAPSHOT.jar
-      - arrow-compression-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-compression-{no_rc_version}-SNAPSHOT.jar
-      - arrow-dataset-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-dataset-{no_rc_version}-SNAPSHOT.jar
-      - arrow-format-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-format-{no_rc_version}-SNAPSHOT.jar
-      - arrow-gandiva-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar
-      - arrow-jdbc-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-jdbc-{no_rc_version}-SNAPSHOT.jar
-      - arrow-memory-core-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-memory-core-{no_rc_version}-SNAPSHOT.jar
-      - arrow-memory-netty-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-memory-netty-{no_rc_version}-SNAPSHOT.jar
-      - arrow-memory-unsafe-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-memory-unsafe-{no_rc_version}-SNAPSHOT.jar
-      - arrow-orc-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-orc-{no_rc_version}-SNAPSHOT.jar
-      - arrow-performance-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-performance-{no_rc_version}-SNAPSHOT.jar
-      - arrow-plasma-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-plasma-{no_rc_version}-SNAPSHOT.jar
-      - arrow-tools-{no_rc_version}-SNAPSHOT-jar-with-dependencies.jar
-      - arrow-tools-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-tools-{no_rc_version}-SNAPSHOT.jar
-      - arrow-vector-{no_rc_version}-SNAPSHOT-shade-format-flatbuffers.jar
-      - arrow-vector-{no_rc_version}-SNAPSHOT-tests.jar
-      - arrow-vector-{no_rc_version}-SNAPSHOT.jar
+      - arrow-algorithm-{no_rc_version}-tests.jar
+      - arrow-algorithm-{no_rc_version}.jar
+      - arrow-avro-{no_rc_version}-tests.jar
+      - arrow-avro-{no_rc_version}.jar
+      - arrow-compression-{no_rc_version}-tests.jar
+      - arrow-compression-{no_rc_version}.jar
+      - arrow-dataset-{no_rc_version}-tests.jar
+      - arrow-dataset-{no_rc_version}.jar
+      - arrow-format-{no_rc_version}-tests.jar
+      - arrow-format-{no_rc_version}.jar
+      - arrow-gandiva-{no_rc_version}-tests.jar
+      - arrow-gandiva-{no_rc_version}.jar
+      - arrow-jdbc-{no_rc_version}-tests.jar
+      - arrow-jdbc-{no_rc_version}.jar
+      - arrow-memory-core-{no_rc_version}-tests.jar
+      - arrow-memory-core-{no_rc_version}.jar
+      - arrow-memory-netty-{no_rc_version}-tests.jar
+      - arrow-memory-netty-{no_rc_version}.jar
+      - arrow-memory-unsafe-{no_rc_version}-tests.jar
+      - arrow-memory-unsafe-{no_rc_version}.jar
+      - arrow-orc-{no_rc_version}-tests.jar
+      - arrow-orc-{no_rc_version}.jar
+      - arrow-performance-{no_rc_version}-tests.jar
+      - arrow-performance-{no_rc_version}.jar
+      - arrow-plasma-{no_rc_version}-tests.jar
+      - arrow-plasma-{no_rc_version}.jar
+      - arrow-tools-{no_rc_version}-jar-with-dependencies.jar
+      - arrow-tools-{no_rc_version}-tests.jar
+      - arrow-tools-{no_rc_version}.jar
+      - arrow-vector-{no_rc_version}-shade-format-flatbuffers.jar
+      - arrow-vector-{no_rc_version}-tests.jar
+      - arrow-vector-{no_rc_version}.jar
       - benchmarks.jar
-      - flight-core-{no_rc_version}-SNAPSHOT-jar-with-dependencies.jar
-      - flight-core-{no_rc_version}-SNAPSHOT-shaded-ext.jar
-      - flight-core-{no_rc_version}-SNAPSHOT-shaded.jar
-      - flight-core-{no_rc_version}-SNAPSHOT-tests.jar
-      - flight-core-{no_rc_version}-SNAPSHOT.jar
-      - flight-grpc-{no_rc_version}-SNAPSHOT-tests.jar
+      - flight-core-{no_rc_version}-jar-with-dependencies.jar
+      - flight-core-{no_rc_version}-shaded-ext.jar
+      - flight-core-{no_rc_version}-shaded.jar
+      - flight-core-{no_rc_version}-tests.jar
+      - flight-core-{no_rc_version}.jar
+      - flight-grpc-{no_rc_version}-tests.jar
 
   ############################## NuGet packages ###############################
 

From bb30c08a62fe02ced90180c9aa73e3a83ee976ae Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Mon, 26 Jul 2021 15:17:50 +0200
Subject: [PATCH 638/719] ARROW-13336: [Doc] Make clean in docs should clean
 generated docs

Closes #10755 from amol-/ARROW-13336

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/Makefile b/docs/Makefile
index e38bc91731d..fdff066a39a 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -69,6 +69,7 @@ help:
 .PHONY: clean
 clean:
 	rm -rf $(BUILDDIR)/*
+	rm -rf source/python/generated/*
 
 .PHONY: html
 html:

From 9b8e4dbbc8a774a8cb1eecd3cb887109dd9c1086 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Mon, 26 Jul 2021 10:05:41 -0400
Subject: [PATCH 639/719] ARROW-12876: [R] Fix build flags on Raspberry Pi

Checks if installing on Raspberry PI OS, and if so, adds an additional build flag so that the compiler can link to one of the necessary libraries.

Closes #10404 from thisisnic/ARROW-12876_raspberry_pi

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/configure | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/r/configure b/r/configure
index 22a34bc519f..760c60e6a5c 100755
--- a/r/configure
+++ b/r/configure
@@ -186,6 +186,12 @@ else
   fi
 fi
 
+# If on Raspberry Pi, need to manually link against latomic
+# See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358 for similar example
+if grep raspbian /etc/os-release >/dev/null 2>&1; then
+  PKG_CFLAGS="$PKG_CFLAGS -DARROW_CXXFLAGS=-latomic"
+fi
+
 # If libarrow uses the old GLIBCXX ABI, so we have to use it too
 if [ "$ARROW_USE_OLD_CXXABI" ]; then
   PKG_CFLAGS="$PKG_CFLAGS -D_GLIBCXX_USE_CXX11_ABI=0"

From 5f5b80334e520dd169a18e4d0c8217a2327374a4 Mon Sep 17 00:00:00 2001
From: liyafan82 <fan_li_ya@foxmail.com>
Date: Mon, 26 Jul 2021 10:32:52 -0400
Subject: [PATCH 640/719] ARROW-13443 [C++]: Fix the incorrect mapping from
 flatbuf::MetadataVersion to arrow::ipc::MetadataVersion

Please see https://issues.apache.org/jira/browse/ARROW-13443

Closes #10797 from liyafan82/fly_0726_mp

Lead-authored-by: liyafan82 <fan_li_ya@foxmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/ipc/metadata_internal.cc |  2 +-
 cpp/src/arrow/ipc/metadata_internal.h  |  3 ++-
 cpp/src/arrow/ipc/read_write_test.cc   | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc
index 4b332bd9e1e..b1b9e56528e 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -74,7 +74,7 @@ MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version) {
       return MetadataVersion::V2;
     case flatbuf::MetadataVersion::V3:
       // Arrow 0.3 to 0.7.1
-      return MetadataVersion::V4;
+      return MetadataVersion::V3;
     case flatbuf::MetadataVersion::V4:
       // Arrow 0.8 to 0.17
       return MetadataVersion::V4;
diff --git a/cpp/src/arrow/ipc/metadata_internal.h b/cpp/src/arrow/ipc/metadata_internal.h
index 9cf489dd668..2afa95f6f83 100644
--- a/cpp/src/arrow/ipc/metadata_internal.h
+++ b/cpp/src/arrow/ipc/metadata_internal.h
@@ -68,9 +68,10 @@ static constexpr flatbuf::MetadataVersion kLatestMetadataVersion =
 static constexpr flatbuf::MetadataVersion kMinMetadataVersion =
     flatbuf::MetadataVersion::V4;
 
+// These functions are used in unit tests
+ARROW_EXPORT
 MetadataVersion GetMetadataVersion(flatbuf::MetadataVersion version);
 
-// This function is used in a unit test
 ARROW_EXPORT
 flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version);
 
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 9f8d69d2537..245534b1d5c 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -546,6 +546,23 @@ TEST(TestReadMessage, CorruptedSmallInput) {
   ASSERT_EQ(nullptr, message);
 }
 
+TEST(TestMetadata, GetMetadataVersion) {
+  ASSERT_EQ(MetadataVersion::V1,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V1));
+  ASSERT_EQ(MetadataVersion::V2,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V2));
+  ASSERT_EQ(MetadataVersion::V3,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V3));
+  ASSERT_EQ(MetadataVersion::V4,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V4));
+  ASSERT_EQ(MetadataVersion::V5,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::V5));
+  ASSERT_EQ(MetadataVersion::V1,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::MIN));
+  ASSERT_EQ(MetadataVersion::V5,
+            ipc::internal::GetMetadataVersion(flatbuf::MetadataVersion::MAX));
+}
+
 TEST_P(TestIpcRoundTrip, SliceRoundTrip) {
   std::shared_ptr<RecordBatch> batch;
   ASSERT_OK((*GetParam())(&batch));  // NOLINT clang-tidy gtest issue

From 638338f0570d77a64b13dabbc2cca44536889d9d Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 26 Jul 2021 10:33:36 -0400
Subject: [PATCH 641/719] ARROW-13298: [C++] Implement any/all hash aggregate
 kernels

Closes #10791 from lidavidm/arrow-13298

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 .../arrow/compute/kernels/hash_aggregate.cc   | 139 ++++++++++++++++++
 .../compute/kernels/hash_aggregate_test.cc    |  49 ++++++
 2 files changed, 188 insertions(+)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 79213b93b37..3e4b401bae9 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1149,6 +1149,123 @@ struct GroupedMinMaxFactory {
   InputType argument_type;
 };
 
+// ----------------------------------------------------------------------
+// Any/All implementation
+
+struct GroupedAnyImpl : public GroupedAggregator {
+  Status Init(ExecContext* ctx, const FunctionOptions*) override {
+    seen_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    return seen_.Append(added_groups, false);
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedAnyImpl*>(&raw_other);
+
+    auto seen = seen_.mutable_data();
+    auto other_seen = other->seen_.data();
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      if (BitUtil::GetBit(other_seen, other_g)) BitUtil::SetBitTo(seen, *g, true);
+    }
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto seen = seen_.mutable_data();
+
+    const auto& input = *batch[0].array();
+
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    arrow::internal::VisitTwoBitBlocksVoid(
+        input.buffers[0], input.offset, input.buffers[1], input.offset, input.length,
+        [&](int64_t) { BitUtil::SetBitTo(seen, *g++, true); }, [&]() { g++; });
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    ARROW_ASSIGN_OR_RAISE(auto seen, seen_.Finish());
+    return std::make_shared<BooleanArray>(num_groups_, std::move(seen));
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return boolean(); }
+
+  int64_t num_groups_ = 0;
+  ScalarAggregateOptions options_;
+  TypedBufferBuilder<bool> seen_;
+};
+
+struct GroupedAllImpl : public GroupedAggregator {
+  Status Init(ExecContext* ctx, const FunctionOptions*) override {
+    seen_ = TypedBufferBuilder<bool>(ctx->memory_pool());
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    return seen_.Append(added_groups, true);
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedAllImpl*>(&raw_other);
+
+    auto seen = seen_.mutable_data();
+    auto other_seen = other->seen_.data();
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      BitUtil::SetBitTo(
+          seen, *g, BitUtil::GetBit(seen, *g) && BitUtil::GetBit(other_seen, other_g));
+    }
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto seen = seen_.mutable_data();
+
+    const auto& input = *batch[0].array();
+
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    if (input.MayHaveNulls()) {
+      const uint8_t* bitmap = input.buffers[1]->data();
+      arrow::internal::VisitBitBlocksVoid(
+          input.buffers[0], input.offset, input.length,
+          [&](int64_t position) {
+            BitUtil::SetBitTo(seen, *g,
+                              BitUtil::GetBit(seen, *g) &&
+                                  BitUtil::GetBit(bitmap, input.offset + position));
+            g++;
+          },
+          [&]() { g++; });
+    } else {
+      arrow::internal::VisitBitBlocksVoid(
+          input.buffers[1], input.offset, input.length, [&](int64_t) { g++; },
+          [&]() { BitUtil::SetBitTo(seen, *g++, false); });
+    }
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    ARROW_ASSIGN_OR_RAISE(auto seen, seen_.Finish());
+    return std::make_shared<BooleanArray>(num_groups_, std::move(seen));
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return boolean(); }
+
+  int64_t num_groups_ = 0;
+  ScalarAggregateOptions options_;
+  TypedBufferBuilder<bool> seen_;
+};
+
 }  // namespace
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
@@ -1426,6 +1543,14 @@ const FunctionDoc hash_min_max_doc{
      "This can be changed through ScalarAggregateOptions."),
     {"array", "group_id_array"},
     "ScalarAggregateOptions"};
+
+const FunctionDoc hash_any_doc{"Test whether any element evaluates to true",
+                               ("Null values are ignored."),
+                               {"array", "group_id_array"}};
+
+const FunctionDoc hash_all_doc{"Test whether all elements evaluate to true",
+                               ("Null values are ignored."),
+                               {"array", "group_id_array"}};
 }  // namespace
 
 void RegisterHashAggregateBasic(FunctionRegistry* registry) {
@@ -1460,6 +1585,20 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
     DCHECK_OK(AddHashAggKernels(NumericTypes(), GroupedMinMaxFactory::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_any", Arity::Binary(),
+                                                        &hash_any_doc);
+    DCHECK_OK(func->AddKernel(MakeKernel(boolean(), HashAggregateInit<GroupedAnyImpl>)));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_all", Arity::Binary(),
+                                                        &hash_all_doc);
+    DCHECK_OK(func->AddKernel(MakeKernel(boolean(), HashAggregateInit<GroupedAllImpl>)));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index b0327c7aa81..46c7716abce 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -705,6 +705,55 @@ TEST(GroupBy, MinMaxOnly) {
   }
 }
 
+TEST(GroupBy, AnyAndAll) {
+  for (bool use_threads : {true, false}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+    auto table =
+        TableFromJSON(schema({field("argument", boolean()), field("key", int64())}), {R"([
+    [true,  1],
+    [null,  1]
+                        ])",
+                                                                                      R"([
+    [false, 2],
+    [null,  3],
+    [false, null],
+    [true,  1],
+    [true,  2]
+                        ])",
+                                                                                      R"([
+    [true,  2],
+    [false, null],
+    [null,  3]
+                        ])"});
+
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         internal::GroupBy({table->GetColumnByName("argument"),
+                                            table->GetColumnByName("argument")},
+                                           {table->GetColumnByName("key")},
+                                           {
+                                               {"hash_any", nullptr},
+                                               {"hash_all", nullptr},
+                                           },
+                                           use_threads));
+    SortBy({"key_0"}, &aggregated_and_grouped);
+
+    AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("hash_any", boolean()),
+                                        field("hash_all", boolean()),
+                                        field("key_0", int64()),
+                                    }),
+                                    R"([
+    [true,  true,  1],
+    [true,  false, 2],
+    [false, true, 3],
+    [false, false, null]
+  ])"),
+                      aggregated_and_grouped,
+                      /*verbose=*/true);
+  }
+}
+
 TEST(GroupBy, CountAndSum) {
   auto batch = RecordBatchFromJSON(
       schema({field("argument", float64()), field("key", int64())}), R"([

From 139ac15e157306ebdf48dad21154bae3203d2c95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 26 Jul 2021 10:34:47 -0400
Subject: [PATCH 642/719] ARROW-13446: [Release] Fix verification on amazon
 linux
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quick fix for installing amazon linux packages.

Closes #10786 from kszucs/amazon-linux-verification

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 dev/release/verify-yum.sh | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index ddc45a6c95f..26fb9c91bb1 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -34,13 +34,11 @@ TYPE="$2"
 
 local_prefix="/arrow/dev/tasks/linux-packages"
 
-artifactory_base_url="https://apache.jfrog.io/artifactory/arrow/centos"
-if [ "${TYPE}" = "rc" ]; then
-  artifactory_base_url+="-rc"
-fi
+artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
 
 distribution=$(. /etc/os-release && echo "${ID}")
 distribution_version=$(. /etc/os-release && echo "${VERSION_ID}")
+distribution_prefix="centos"
 
 cmake_package=cmake
 cmake_command=cmake
@@ -50,6 +48,7 @@ have_glib=yes
 have_parquet=yes
 have_python=yes
 install_command="dnf install -y --enablerepo=powertools"
+
 case "${distribution}-${distribution_version}" in
   amzn-2)
     cmake_package=cmake3
@@ -58,6 +57,8 @@ case "${distribution}-${distribution_version}" in
     have_gandiva=no
     have_python=no
     install_command="yum install -y"
+    distribution_prefix="amazon-linux"
+    amazon-linux-extras install epel -y
     ;;
   centos-7)
     cmake_package=cmake3
@@ -101,8 +102,11 @@ if [ "${TYPE}" = "local" ]; then
   ${install_command} "${release_path}"
 else
   package_version="${VERSION}"
+  if [ "${TYPE}" = "rc" ]; then
+    distribution_prefix+="-rc"
+  fi
   ${install_command} \
-    ${artifactory_base_url}/${distribution_version}/apache-arrow-release-latest.rpm
+    ${artifactory_base_url}/${distribution_prefix}/${distribution_version}/apache-arrow-release-latest.rpm
 fi
 
 if [ "${TYPE}" = "local" ]; then
@@ -119,6 +123,7 @@ else
     sed \
       -i"" \
       -e "s,/centos/,/centos-rc/,g" \
+      -e "s,/amazon-linux/,/amazon-linux-rc/,g" \
       /etc/yum.repos.d/Apache-Arrow.repo
   fi
 fi

From c2c3573b11430882bfe061e1bc7cb5d946e7bc89 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 26 Jul 2021 16:35:15 +0200
Subject: [PATCH 643/719] ARROW-13437: [C++] Relax FixedSizeList validation to
 allow excess child values

Previously slicing a FixedSizeList and validating it would fail.

We could also make slicing a FixedSizeList adjust the child length, but currently Slice is not virtual and this would be inconsistent with other arrays.

Closes #10783 from lidavidm/arrow-13437

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_list_test.cc | 19 +++++-
 cpp/src/arrow/array/array_test.cc      | 87 +++++++++++++++-----------
 cpp/src/arrow/array/validate.cc        |  4 +-
 3 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc
index a50cbcc13cf..faeeaf56333 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1036,7 +1036,7 @@ void ValidateBasicFixedSizeListArray(const FixedSizeListArray* result,
     ASSERT_EQ(is_valid[i] == 0, result->IsNull(i));
   }
 
-  ASSERT_EQ(result->length() * result->value_length(), result->values()->length());
+  ASSERT_LE(result->length() * result->value_length(), result->values()->length());
   auto varr = std::dynamic_pointer_cast<Int32Array>(result->values());
 
   for (size_t i = 0; i < values.size(); ++i) {
@@ -1084,7 +1084,7 @@ TEST_F(TestFixedSizeListArray, BulkAppend) {
   ValidateBasicFixedSizeListArray(result_.get(), values, is_valid);
 }
 
-TEST_F(TestFixedSizeListArray, BulkAppendInvalid) {
+TEST_F(TestFixedSizeListArray, BulkAppendExcess) {
   std::vector<int32_t> values = {0, 1, 2, 3, 4, 5};
   std::vector<uint8_t> is_valid = {1, 0, 1};
 
@@ -1099,7 +1099,8 @@ TEST_F(TestFixedSizeListArray, BulkAppendInvalid) {
   }
 
   Done();
-  ASSERT_RAISES(Invalid, result_->ValidateFull());
+  // We appended too many values to the child array, but that's OK
+  ValidateBasicFixedSizeListArray(result_.get(), values, is_valid);
 }
 
 TEST_F(TestFixedSizeListArray, TestZeroLength) {
@@ -1131,4 +1132,16 @@ TEST_F(TestFixedSizeListArray, NegativeLength) {
   ASSERT_RAISES(Invalid, result_->ValidateFull());
 }
 
+TEST_F(TestFixedSizeListArray, NotEnoughValues) {
+  type_ = fixed_size_list(value_type_, 2);
+  auto values = ArrayFromJSON(value_type_, "[]");
+  result_ = std::make_shared<FixedSizeListArray>(type_, 1, values);
+  ASSERT_RAISES(Invalid, result_->ValidateFull());
+
+  // ARROW-13437: too many values is OK though
+  values = ArrayFromJSON(value_type_, "[1, 2, 3, 4]");
+  result_ = std::make_shared<FixedSizeListArray>(type_, 1, values);
+  ASSERT_OK(result_->ValidateFull());
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 15eca157a4d..7b10acb3b13 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -425,47 +425,50 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar)
   }
 }
 
+static ScalarVector GetScalars() {
+  auto hello = Buffer::FromString("hello");
+  DayTimeIntervalType::DayMilliseconds daytime{1, 100};
+
+  return {std::make_shared<BooleanScalar>(false),
+          std::make_shared<Int8Scalar>(3),
+          std::make_shared<UInt16Scalar>(3),
+          std::make_shared<Int32Scalar>(3),
+          std::make_shared<UInt64Scalar>(3),
+          std::make_shared<DoubleScalar>(3.0),
+          std::make_shared<Date32Scalar>(10),
+          std::make_shared<Date64Scalar>(11),
+          std::make_shared<Time32Scalar>(1000, time32(TimeUnit::SECOND)),
+          std::make_shared<Time64Scalar>(1111, time64(TimeUnit::MICRO)),
+          std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
+          std::make_shared<MonthIntervalScalar>(1),
+          std::make_shared<DayTimeIntervalScalar>(daytime),
+          std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
+          std::make_shared<BinaryScalar>(hello),
+          std::make_shared<LargeBinaryScalar>(hello),
+          std::make_shared<FixedSizeBinaryScalar>(
+              hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
+          std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
+          std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
+          std::make_shared<StringScalar>(hello),
+          std::make_shared<LargeStringScalar>(hello),
+          std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
+          std::make_shared<LargeListScalar>(ArrayFromJSON(int8(), "[1, 1, 2, 2, 3, 3]")),
+          std::make_shared<FixedSizeListScalar>(ArrayFromJSON(int8(), "[1, 2, 3, 4]")),
+          std::make_shared<StructScalar>(
+              ScalarVector{
+                  std::make_shared<Int32Scalar>(2),
+                  std::make_shared<Int32Scalar>(6),
+              },
+              struct_({field("min", int32()), field("max", int32())}))};
+}
+
 TEST_F(TestArray, TestMakeArrayFromScalar) {
   ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
   ASSERT_OK(null_array->ValidateFull());
   ASSERT_EQ(null_array->length(), 5);
   ASSERT_EQ(null_array->null_count(), 5);
 
-  auto hello = Buffer::FromString("hello");
-  DayTimeIntervalType::DayMilliseconds daytime{1, 100};
-
-  ScalarVector scalars{
-      std::make_shared<BooleanScalar>(false),
-      std::make_shared<Int8Scalar>(3),
-      std::make_shared<UInt16Scalar>(3),
-      std::make_shared<Int32Scalar>(3),
-      std::make_shared<UInt64Scalar>(3),
-      std::make_shared<DoubleScalar>(3.0),
-      std::make_shared<Date32Scalar>(10),
-      std::make_shared<Date64Scalar>(11),
-      std::make_shared<Time32Scalar>(1000, time32(TimeUnit::SECOND)),
-      std::make_shared<Time64Scalar>(1111, time64(TimeUnit::MICRO)),
-      std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
-      std::make_shared<MonthIntervalScalar>(1),
-      std::make_shared<DayTimeIntervalScalar>(daytime),
-      std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
-      std::make_shared<BinaryScalar>(hello),
-      std::make_shared<LargeBinaryScalar>(hello),
-      std::make_shared<FixedSizeBinaryScalar>(
-          hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
-      std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
-      std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
-      std::make_shared<StringScalar>(hello),
-      std::make_shared<LargeStringScalar>(hello),
-      std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
-      std::make_shared<LargeListScalar>(ArrayFromJSON(int8(), "[1, 1, 2, 2, 3, 3]")),
-      std::make_shared<FixedSizeListScalar>(ArrayFromJSON(int8(), "[1, 2, 3, 4]")),
-      std::make_shared<StructScalar>(
-          ScalarVector{
-              std::make_shared<Int32Scalar>(2),
-              std::make_shared<Int32Scalar>(6),
-          },
-          struct_({field("min", int32()), field("max", int32())}))};
+  auto scalars = GetScalars();
 
   for (int64_t length : {16}) {
     for (auto scalar : scalars) {
@@ -487,6 +490,20 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
   }
 }
 
+TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
+  // Regression test for ARROW-13437
+  auto scalars = GetScalars();
+
+  for (auto scalar : scalars) {
+    SCOPED_TRACE(scalar->type->ToString());
+    ASSERT_OK_AND_ASSIGN(auto array, MakeArrayFromScalar(*scalar, 32));
+    auto sliced = array->Slice(1, 4);
+    ASSERT_EQ(sliced->length(), 4);
+    ASSERT_EQ(sliced->null_count(), 0);
+    ARROW_EXPECT_OK(sliced->ValidateFull());
+  }
+}
+
 TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
   auto dictionary = ArrayFromJSON(utf8(), R"(["foo", "bar", "baz"])");
   auto type = std::make_shared<DictionaryType>(int8(), utf8());
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 5cc3bacf282..0ffba4a5071 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -85,9 +85,9 @@ struct ValidateArrayImpl {
 
     int64_t expected_values_length = -1;
     if (MultiplyWithOverflow(data.length, list_size, &expected_values_length) ||
-        values.length != expected_values_length) {
+        values.length < expected_values_length) {
       return Status::Invalid("Values length (", values.length,
-                             ") is not equal to the length (", data.length,
+                             ") is less than the length (", data.length,
                              ") multiplied by the value size (", list_size, ")");
     }
 

From 2d333619b9993a26f10f0617314d5c1e729552a8 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Mon, 26 Jul 2021 17:44:34 +0200
Subject: [PATCH 644/719] ARROW-12650: [Doc][Python] Improve documentation
 regarding dealing with memory mapped files

![howitgets](https://user-images.githubusercontent.com/601423/117468287-d3c5c780-af54-11eb-869a-120ae75073b9.png)

Closes #10266 from amol-/ARROW-12650

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/python/ipc.rst     | 67 ++++++++++++++++++++++++++++++++++
 docs/source/python/parquet.rst | 23 ++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst
index 6d90179b39e..249780a8dcd 100644
--- a/docs/source/python/ipc.rst
+++ b/docs/source/python/ipc.rst
@@ -154,6 +154,73 @@ DataFrame output:
    df = pa.ipc.open_file(buf).read_pandas()
    df[:5]
 
+Efficiently Writing and Reading Arrow Data
+------------------------------------------
+
+Being optimized for zero copy and memory mapped data, Arrow allows to easily
+read and write arrays consuming the minimum amount of resident memory.
+
+When writing and reading raw Arrow data, we can use the Arrow File Format
+or the Arrow Streaming Format.
+
+To dump an array to file, you can use the :meth:`~pyarrow.ipc.new_file`
+which will provide a new :class:`~pyarrow.ipc.RecordBatchFileWriter` instance
+that can be used to write batches of data to that file.
+
+For example to write an array of 10M integers, we could write it in 1000 chunks
+of 10000 entries:
+
+.. ipython:: python
+
+      BATCH_SIZE = 10000
+      NUM_BATCHES = 1000
+
+      schema = pa.schema([pa.field('nums', pa.int32())])
+
+      with pa.OSFile('bigfile.arrow', 'wb') as sink:
+         with pa.ipc.new_file(sink, schema) as writer:
+            for row in range(NUM_BATCHES):
+                  batch = pa.record_batch([pa.array(range(BATCH_SIZE), type=pa.int32())], schema)
+                  writer.write(batch)
+
+record batches support multiple columns, so in practice we always write the
+equivalent of a :class:`~pyarrow.Table`.
+
+Writing in batches is effective because we in theory need to keep in memory only
+the current batch we are writing. But when reading back, we can be even more effective
+by directly mapping the data from disk and avoid allocating any new memory on read.
+
+Under normal conditions, reading back our file will consume a few hundred megabytes
+of memory:
+
+.. ipython:: python
+
+      with pa.OSFile('bigfile.arrow', 'rb') as source:
+         loaded_array = pa.ipc.open_file(source).read_all()
+
+      print("LEN:", len(loaded_array))
+      print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+
+To more efficiently read big data from disk, we can memory map the file, so that
+Arrow can directly reference the data mapped from disk and avoid having to
+allocate its own memory.
+In such case the operating system will be able to page in the mapped memory
+lazily and page it out without any write back cost when under pressure,
+allowing to more easily read arrays bigger than the total memory.
+
+.. ipython:: python
+
+      with pa.memory_map('bigfile.arrow', 'rb') as source:
+         loaded_array = pa.ipc.open_file(source).read_all()
+      print("LEN:", len(loaded_array))
+      print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+
+.. note::
+
+   Other high level APIs like :meth:`~pyarrow.parquet.read_table` also provide a
+   ``memory_map`` option. But in those cases, the memory mapping can't help with
+   reducing resident memory consumption. See :ref:`parquet_mmap` for details.
+
 Arbitrary Object Serialization
 ------------------------------
 
diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
index 0db0df1bc4c..cab385b8b5d 100644
--- a/docs/source/python/parquet.rst
+++ b/docs/source/python/parquet.rst
@@ -112,6 +112,29 @@ In general, a Python file object will have the worst read performance, while a
 string file path or an instance of :class:`~.NativeFile` (especially memory
 maps) will perform the best.
 
+.. _parquet_mmap:
+
+Reading Parquet and Memory Mapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because Parquet data needs to be decoded from the Parquet format 
+and compression, it can't be directly mapped from disk.
+Thus the ``memory_map`` option might perform better on some systems
+but won't help much with resident memory consumption.
+
+.. code-block:: python
+
+      >>> pq_array = pa.parquet.read_table("area1.parquet", memory_map=True)
+      >>> print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+      RSS: 4299MB
+
+      >>> pq_array = pa.parquet.read_table("area1.parquet", memory_map=False)
+      >>> print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+      RSS: 4299MB   
+
+If you need to deal with Parquet data bigger than memory, 
+the :ref:`dataset` and partitioning is probably what you are looking for.
+
 Parquet file writing options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

From 605b109da32ddca31f7fc0dd88ed64a48282c7eb Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Mon, 26 Jul 2021 17:46:47 +0200
Subject: [PATCH 645/719] ARROW-13405: [Doc] Guide users to the documentation
 for their own platform

Closes #10767 from amol-/ARROW-13405

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/index.rst | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 615f9f8ab36..65aeb47ea9f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -32,27 +32,15 @@ such topics as:
 * Reading and writing file formats (like CSV, Apache ORC, and Apache Parquet)
 * In-memory analytics and query processing
 
-.. _toc.columnar:
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Specifications and Protocols
-
-   format/Versioning
-   format/Columnar
-   format/Flight
-   format/Integration
-   format/CDataInterface
-   format/CStreamInterface
-   format/Other
+**To learn how to use Arrow refer to the documentation specific to your
+target environment.**
 
 .. _toc.usage:
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Libraries
+   :maxdepth: 1
+   :caption: Supported Environments
 
-   status
    C/GLib <https://arrow.apache.org/docs/c_glib/>
    C++ <cpp/index>
    C# <https://github.com/apache/arrow/blob/master/csharp/README.md>
@@ -65,6 +53,21 @@ such topics as:
    R <https://arrow.apache.org/docs/r/>
    Ruby <https://github.com/apache/arrow/blob/master/ruby/README.md>
    Rust <https://docs.rs/crate/arrow/>
+   status
+
+.. _toc.columnar:
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Specifications and Protocols
+
+   format/Versioning
+   format/Columnar
+   format/Flight
+   format/Integration
+   format/CDataInterface
+   format/CStreamInterface
+   format/Other   
 
 .. _toc.development:
 

From 2dba298b1bc8336187363c6473aa6af049228bbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 26 Jul 2021 21:51:40 +0200
Subject: [PATCH 646/719] ARROW-13450: [Python][Packaging] Set deployment
 target to 10.13 for universal2 wheels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Despite that having native wheels for Intel architectures the universal2 wheel is being built with newer macOS version, so pip will prefer the universal wheel rather than the `x86_64` one.

```
pyarrow-5.0.0-cp39-cp39-macosx_10_13_x86_64.whl
pyarrow-5.0.0-cp39-cp39-macosx_10_9_x86_64.whl
pyarrow-5.0.0-cp39-cp39-macosx_11_0_arm64.whl
pyarrow-5.0.0-cp39-cp39-macosx_11_0_universal2.whl
```

This PR changes the universal wheel to have an older platform tag:

```
pyarrow-5.0.0-cp39-cp39-macosx_10_13_universal2.whl
```

Closes #10801 from kszucs/universal2-10-13

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/vcpkg/arm64-osx-static-debug.cmake        | 1 +
 ci/vcpkg/arm64-osx-static-release.cmake      | 1 +
 ci/vcpkg/universal2-osx-static-debug.cmake   | 1 +
 ci/vcpkg/universal2-osx-static-release.cmake | 1 +
 dev/tasks/tasks.yml                          | 4 ++--
 5 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
index 6ed92b25b55..f511819a2ed 100644
--- a/ci/vcpkg/arm64-osx-static-debug.cmake
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -21,5 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
+set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
index 0aa78121602..43d65efb265 100644
--- a/ci/vcpkg/arm64-osx-static-release.cmake
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -21,5 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
+set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
index 7406ef3fe16..706ac47a72c 100644
--- a/ci/vcpkg/universal2-osx-static-debug.cmake
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -21,5 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
index 0388ce78d0e..8670690171e 100644
--- a/ci/vcpkg/universal2-osx-static-release.cmake
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -21,5 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index e466bee05f3..d325c8a82f2 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -400,9 +400,9 @@ tasks:
       arch: universal2
       vcpkg_version: "2021.04.30"
       python_version: "3.9"
-      macos_deployment_target: "11.0"
+      macos_deployment_target: "10.13"
     artifacts:
-      - pyarrow-{no_rc_version}-cp39-cp39-macosx_11_0_universal2.whl
+      - pyarrow-{no_rc_version}-cp39-cp39-macosx_10_13_universal2.whl
 
 {############################ Python sdist ####################################}
 

From 1be837f721e6879ebd08aa4a3a0e8e484003821e Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 26 Jul 2021 15:25:26 -0500
Subject: [PATCH 647/719] ARROW-12688: [R] Use DuckDB to query an Arrow Dataset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Am interface for using DuckDB + Arrow together.

I've added two methods:
  * The proposed `summarise(..., .engine = "duckdb")` method which is (probably) the method that people want to use
  * A lower-level method of specifying exactly when the transfer takes place. `to_duckdb()` which registers the data with DuckDB and returns a `tbl` that can be used in dplyr pipelines

For example,  the following two pipelines are equivalent and will group_by the `lgl` column and then use DuckDB to do group-aggregations

```
ds <- InMemoryDataset$create(example_data)

ds %>%
  group_by(lgl) %>%
  summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE), .engine = "duckdb")

ds %>%
  to_duckdb() %>%
  group_by(lgl) %>%
  summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
  collect()
```

And doing a large aggregation on our taxi dataset (TL;DR with duckdb completes in 27 sec on my MacPro):
```
> library(arrow)
> library(dplyr)
>
> ds <- open_dataset("~/repos/ab_store/data/taxi_parquet/", partitioning = c("year", "month"))
>
> system.time({
+   results <- ds %>%
+     group_by(passenger_count, payment_type) %>%
+     summarise(
+       fare_amount_mean = mean(fare_amount, na.rm = TRUE),
+       tip_amount_mean = mean(tip_amount, na.rm = TRUE),
+       trip_distance_mean = mean(trip_distance, na.rm = TRUE),
+       .engine = "duckdb"
+     ) %>%
+     collect()
+ })
   user  system elapsed
190.925  15.646  27.398
> results
# A tibble: 241 × 5
# Groups:   passenger_count [59]
   passenger_count payment_type fare_amount_mean tip_amount_mean
             <int> <chr>                   <dbl>           <dbl>
 1               5 Credit                  11.5       2.27
 2               2 CASH                     9.61      0.00000552
 3               6 CASH                     9.26      0
 4               1 CASH                     9.27      0.00000590
 5               3 CASH                     9.56      0.0000275
 6               5 CASH                     9.34      0.00000305
 7               2 Credit                  11.9       2.22
 8               1 Credit                  11.6       2.15
 9               3 Credit                  11.5       2.17
10               1 Cash                     8.92      0.00206
# … with 231 more rows, and 1 more variable: trip_distance_mean <dbl>
>
```

Using `to_duckdb()` takes longer since there is not (yet) filter pushdown, so all of the columns are being read then discarded. (TL;DR with duckdb completes in 72 sec on my MacPro):
```
> library(arrow)
> library(dplyr)
>
> ds <- open_dataset("~/repos/ab_store/data/taxi_parquet/", partitioning = c("year", "month"))
>
> system.time({
+   results <- ds %>%
+     select(-rate_code_id) %>% # this shouldn't be necessary, but duckdb hangs without it since it does not (yet) to filter pushdown
+     to_duckdb() %>%
+     group_by(passenger_count, payment_type) %>%
+     summarise(
+       fare_amount_mean = mean(fare_amount, na.rm = TRUE),
+       tip_amount_mean = mean(tip_amount, na.rm = TRUE),
+       trip_distance_mean = mean(trip_distance, na.rm = TRUE)
+     ) %>%
+     collect()
+ })
`summarise()` has grouped output by 'passenger_count'. You can override using the `.groups` argument.
   user  system elapsed
433.151  49.224  72.044
> results
# A tibble: 241 × 5
# Groups:   passenger_count [59]
   passenger_count payment_type fare_amount_mean tip_amount_mean
             <int> <chr>                   <dbl>           <dbl>
 1               1 Credit                  11.6       2.15
 2               1 Cash                     8.92      0.00206
 3               2 Cash                     9.94      0.00185
 4               3 Cash                     9.87      0.00142
 5               4 Cash                    10.1       0.00113
 6               1 CASH                     9.27      0.00000590
 7               1 No Charge                9.83      0.0103
 8               2 CASH                     9.61      0.00000552
 9               1 CREDIT                  12.1       2.26
10               2 Credit                  11.9       2.22
# … with 231 more rows, and 1 more variable: trip_distance_mean <dbl>
>
```

Finally with R+dplyr (TL;DR does not complete, even on a system with 128GB of memory and even if I only pull + aggregate one column):
```
> library(arrow)
> library(dplyr)
>
> ds <- open_dataset("~/repos/ab_store/data/taxi_parquet/", partitioning = c("year", "month"))
>
> system.time({
+   results <- ds %>%
+     select(fare_amount, tip_amount, trip_distance, passenger_count, payment_type) %>%
+     collect() %>%
+     group_by(passenger_count, payment_type) %>%
+     summarise(
+       fare_amount_mean = mean(fare_amount, na.rm = TRUE),
+       tip_amount_mean = mean(tip_amount, na.rm = TRUE),
+       trip_distance_mean = mean(trip_distance, na.rm = TRUE)
+     )
+ })
Error: Internal error in `dict_hash_with()`: Dictionary is full.
Run `rlang::last_error()` to see where the error occurred.
Timing stopped at: 190.2 151.7 115.9
>
> rlang::last_error()
<error/rlang_error>
Internal error in `dict_hash_with()`: Dictionary is full.
Backtrace:
 1. base::system.time(...)
 5. dplyr:::group_by.data.frame(., passenger_count, payment_type)
 6. dplyr::grouped_df(groups$data, groups$group_names, .drop)
 7. dplyr:::compute_groups(data, vars, drop = drop)
 8. dplyr:::vec_split_id_order(group_vars)
 9. vctrs::vec_group_loc(x)
Run `rlang::last_trace()` to see the full context.
```

Closes #10780 from jonkeane/ARROW-12688-duckdb-passoff

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/DESCRIPTION                  |   4 +
 r/NAMESPACE                    |   1 +
 r/R/dplyr-summarize.R          |  17 +--
 r/R/duckdb.R                   | 115 ++++++++++++++++++++
 r/_pkgdown.yml                 |   3 +
 r/man/to_duckdb.Rd             |  60 +++++++++++
 r/tests/testthat/test-duckdb.R | 186 +++++++++++++++++++++++++++++++++
 7 files changed, 380 insertions(+), 6 deletions(-)
 create mode 100644 r/R/duckdb.R
 create mode 100644 r/man/to_duckdb.Rd
 create mode 100644 r/tests/testthat/test-duckdb.R

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index f710712ed50..969cf422b1b 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -40,9 +40,12 @@ Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
 RoxygenNote: 7.1.1.9001
 VignetteBuilder: knitr
 Suggests:
+    DBI,
+    dbplyr,
     decor,
     distro,
     dplyr,
+    duckdb (> 0.2.7),
     hms,
     knitr,
     lubridate,
@@ -93,6 +96,7 @@ Collate:
     'record-batch.R'
     'table.R'
     'dplyr.R'
+    'duckdb.R'
     'feather.R'
     'field.R'
     'filesystem.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index b0f4b0bba75..8bcc58653fb 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -267,6 +267,7 @@ export(struct)
 export(time32)
 export(time64)
 export(timestamp)
+export(to_duckdb)
 export(type)
 export(uint16)
 export(uint32)
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index ecb459c982c..3042e30707e 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -18,12 +18,9 @@
 
 # The following S3 methods are registered on load if dplyr is present
 
-summarise.arrow_dplyr_query <- function(.data, ...) {
+summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb")) {
   call <- match.call()
   .data <- arrow_dplyr_query(.data)
-  if (query_on_dataset(.data)) {
-    not_implemented_for_dataset("summarize()")
-  }
   exprs <- quos(...)
   # Only retain the columns we need to do our aggregations
   vars_to_keep <- unique(c(
@@ -31,6 +28,14 @@ summarise.arrow_dplyr_query <- function(.data, ...) {
     dplyr::group_vars(.data)             # vars needed for grouping
   ))
   .data <- dplyr::select(.data, vars_to_keep)
-  dplyr::summarise(dplyr::collect(.data), ...)
+
+  if (match.arg(.engine) == "duckdb") {
+    dplyr::summarise(to_duckdb(.data), ...)
+  } else {
+    if (query_on_dataset(.data)) {
+      not_implemented_for_dataset("summarize()")
+    }
+    dplyr::summarise(dplyr::collect(.data), ...)
+  }
 }
-summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
\ No newline at end of file
+summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
new file mode 100644
index 00000000000..adeda7fe5d5
--- /dev/null
+++ b/r/R/duckdb.R
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Create a (virtual) DuckDB table from an Arrow object
+#'
+#' This will do the necessary configuration to create a (virtual) table in DuckDB
+#' that is backed by the Arrow object given. No data is copied or modified until
+#' `collect()` or `compute()` are called or a query is run against the table.
+#'
+#' The result is a dbplyr-compatible object that can be used in d(b)plyr pipelines.
+#'
+#' Alternatively, one can pass the argument `.engine = "duckdb"` to `summarise()`
+#' that starts with an Arrow object to use DuckDB to calculate the summarization
+#' step. Internally, this calls `to_duckdb()` with all of the default argument
+#' values.
+#'
+#' @param .data the Arrow object (e.g. Dataset, Table) to use for the DuckDB table
+#' @param con a DuckDB connection to use (default will create one and store it
+#' in `options("arrow_duck_con")`)
+#' @param table_name a name to use in DuckDB for this object. The default is a
+#' unique string `"arrow_"` followed by numbers.
+#' @param auto_disconnect should the table be automatically cleaned up when the
+#' resulting object is removed (and garbage collected)? Default: `TRUE`
+#'
+#' @return A `tbl` of the new table in DuckDB
+#'
+#' @name to_duckdb
+#' @export
+#' @examplesIf arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && requireNamespace("dplyr", quietly = TRUE)
+#' library(dplyr)
+#'
+#' ds <- InMemoryDataset$create(mtcars)
+#'
+#' ds %>%
+#'   filter(mpg < 30) %>%
+#'   to_duckdb() %>%
+#'   group_by(cyl) %>%
+#'   summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+#'
+#' # the same query can be simplified using .engine = "duckdb"
+#' ds %>%
+#'   filter(mpg < 30) %>%
+#'   group_by(cyl) %>%
+#'   summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
+#'
+to_duckdb <- function(.data,
+                      con = arrow_duck_connection(),
+                      table_name =  unique_arrow_tablename(),
+                      auto_disconnect = TRUE) {
+  .data <- arrow_dplyr_query(.data)
+  duckdb::duckdb_register_arrow(con, table_name, .data)
+
+  tbl <- tbl(con, table_name)
+  groups <- dplyr::groups(.data)
+  if (length(groups)) {
+    tbl <- dplyr::group_by(tbl, groups)
+  }
+
+  if (auto_disconnect) {
+    # this will add the correct connection disconnection when the tbl is gced.
+    # we should probably confirm that this use of src$disco is kosher.
+    tbl$src$disco <- duckdb_disconnector(con, table_name)
+  }
+
+  tbl
+}
+
+arrow_duck_connection <- function() {
+  con <- getOption("arrow_duck_con")
+  if (is.null(con) || !DBI::dbIsValid(con)) {
+    con <- DBI::dbConnect(duckdb::duckdb())
+    # Use the same CPU count that the arrow library is set to
+    DBI::dbExecute(con, paste0("PRAGMA threads=", cpu_count()))
+    options(arrow_duck_con = con)
+  }
+  con
+}
+
+# Adapted from dbplyr
+unique_arrow_tablename <- function() {
+  i <- getOption("arrow_table_name", 0) + 1
+  options(arrow_table_name = i)
+  sprintf("arrow_%03i", i)
+}
+
+# Creates an environment that disconnects the database when it's GC'd
+duckdb_disconnector <- function(con, tbl_name) {
+  reg.finalizer(environment(), function(...) {
+    # remote the table we ephemerally created (though only if the connection is
+    # still valid)
+    if (DBI::dbIsValid(con)) {
+      duckdb::duckdb_unregister_arrow(con, tbl_name)
+    }
+
+    # and there are no more tables, so we can safely shutdown
+    if (length(DBI::dbListTables(con)) == 0) {
+      DBI::dbDisconnect(con, shutdown=TRUE)
+    }
+  })
+  environment()
+}
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 5bd8418db7b..90d900ddf28 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -164,6 +164,9 @@ reference:
       - match_arrow
       - value_counts
       - list_compute_functions
+  - title: Connections to other systems
+    contents:
+      - to_duckdb
   - title: Configuration
     contents:
       - arrow_info
diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd
new file mode 100644
index 00000000000..7fc0a7b4073
--- /dev/null
+++ b/r/man/to_duckdb.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/duckdb.R
+\name{to_duckdb}
+\alias{to_duckdb}
+\title{Create a (virtual) DuckDB table from an Arrow object}
+\usage{
+to_duckdb(
+  .data,
+  con = arrow_duck_connection(),
+  table_name = unique_arrow_tablename(),
+  auto_disconnect = TRUE
+)
+}
+\arguments{
+\item{.data}{the Arrow object (e.g. Dataset, Table) to use for the DuckDB table}
+
+\item{con}{a DuckDB connection to use (default will create one and store it
+in \code{options("arrow_duck_con")})}
+
+\item{table_name}{a name to use in DuckDB for this object. The default is a
+unique string \code{"arrow_"} followed by numbers.}
+
+\item{auto_disconnect}{should the table be automatically cleaned up when the
+resulting object is removed (and garbage collected)? Default: \code{TRUE}}
+}
+\value{
+A \code{tbl} of the new table in DuckDB
+}
+\description{
+This will do the necessary configuration to create a (virtual) table in DuckDB
+that is backed by the Arrow object given. No data is copied or modified until
+\code{collect()} or \code{compute()} are called or a query is run against the table.
+}
+\details{
+The result is a dbplyr-compatible object that can be used in d(b)plyr pipelines.
+
+Alternatively, one can pass the argument \code{.engine = "duckdb"} to \code{summarise()}
+that starts with an Arrow object to use DuckDB to calculate the summarization
+step. Internally, this calls \code{to_duckdb()} with all of the default argument
+values.
+}
+\examples{
+\dontshow{if (arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && requireNamespace("dplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(dplyr)
+
+ds <- InMemoryDataset$create(mtcars)
+
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  to_duckdb() \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+
+# the same query can be simplified using .engine = "duckdb"
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
new file mode 100644
index 00000000000..e8ec079b6e0
--- /dev/null
+++ b/r/tests/testthat/test-duckdb.R
@@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_installed("duckdb", minimum_version = "0.2.8")
+skip_if_not_installed("dbplyr")
+library(duckdb)
+library(dplyr)
+
+test_that("to_duckdb", {
+  ds <- InMemoryDataset$create(example_data)
+
+  expect_identical(
+    ds %>%
+      to_duckdb() %>%
+      collect() %>%
+      # factors don't roundtrip
+      select(!fct),
+    select(example_data, !fct)
+  )
+
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      to_duckdb() %>%
+      group_by(lgl) %>%
+      summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+
+  # can group_by before the to_duckdb
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      group_by(lgl) %>%
+      to_duckdb() %>%
+      summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+})
+
+test_that("summarise(..., .engine)", {
+  ds <- InMemoryDataset$create(example_data)
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      group_by(lgl) %>%
+      summarise(
+        mean_int = mean(int, na.rm = TRUE),
+        mean_dbl = mean(dbl, na.rm = TRUE),
+        .engine = "duckdb"
+      ) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+})
+
+# The next set of tests use an already-extant connection to test features of
+# persistence and querying against the table without using the `tbl` itself, so
+# we need to create a connection separate from the ephemeral one that is made
+# with arrow_duck_connection()
+con <- dbConnect(duckdb::duckdb())
+dbExecute(con, "PRAGMA threads=2")
+on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
+
+# write one table to the connection so it is kept open
+DBI::dbWriteTable(con, "mtcars", mtcars)
+
+test_that("Joining, auto-cleanup", {
+  ds <- InMemoryDataset$create(example_data)
+
+  table_one_name <- "my_arrow_table_1"
+  table_one <- to_duckdb(ds, con = con, table_name = table_one_name)
+  table_two_name <- "my_arrow_table_2"
+  table_two <- to_duckdb(ds, con = con, table_name = table_two_name)
+
+  res <- dbGetQuery(
+    con,
+    paste0(
+      "SELECT * FROM ", table_one_name,
+      " INNER JOIN ", table_two_name,
+      " ON ", table_one_name, ".int = ", table_two_name, ".int"
+    )
+  )
+  expect_identical(dim(res), c(9L, 14L))
+
+  # clean up cleans up the tables
+  expect_true(all(c(table_one_name, table_two_name) %in% DBI::dbListTables(con)))
+  rm(table_one, table_two)
+  gc()
+  expect_false(any(c(table_one_name, table_two_name) %in% DBI::dbListTables(con)))
+})
+
+test_that("Joining, auto-cleanup disabling", {
+  ds <- InMemoryDataset$create(example_data)
+
+  table_three_name <- "my_arrow_table_3"
+  table_three <- to_duckdb(ds, con = con, table_name = table_three_name, auto_disconnect = FALSE)
+
+  # clean up does *not* clean these tables
+  expect_true(table_three_name %in% DBI::dbListTables(con))
+  rm(table_three)
+  gc()
+  # but because we aren't auto_disconnecting then we still have this table.
+  expect_true(table_three_name %in% DBI::dbListTables(con))
+})
+
+test_that("to_duckdb with a table", {
+  tab <- Table$create(example_data)
+
+  expect_identical(
+    tab %>%
+      to_duckdb() %>%
+      group_by(int > 4) %>%
+      summarise(
+        int_mean = mean(int, na.rm = TRUE),
+        dbl_mean = mean(dbl, na.rm = TRUE)
+      ) %>%
+    collect(),
+    tibble::tibble(
+      "int > 4" = c(FALSE, NA, TRUE),
+      int_mean = c(2, NA, 7.5),
+      dbl_mean = c(2.1, 4.1, 7.3)
+    )
+  )
+})
+
+test_that("to_duckdb passing a connection", {
+  ds <- InMemoryDataset$create(example_data)
+
+  con_separate <- dbConnect(duckdb::duckdb())
+  # we always want to test in parallel
+  dbExecute(con_separate, "PRAGMA threads=2")
+  on.exit(dbDisconnect(con_separate, shutdown = TRUE), add = TRUE)
+
+  # create a table to join to that we know is in our con_separate
+  new_df <- data.frame(
+    int = 1:10,
+    char = letters[26:17]
+  )
+  DBI::dbWriteTable(con_separate, "separate_join_table", new_df)
+
+  table_four <- ds %>%
+    select(int, lgl, dbl) %>%
+    to_duckdb(con = con_separate, auto_disconnect = FALSE)
+  table_four_name <- table_four$ops$x
+
+  result <- DBI::dbGetQuery(
+    con_separate,
+    paste0(
+      "SELECT * FROM ", table_four_name,
+      " INNER JOIN separate_join_table ",
+      "ON separate_join_table.int = ", table_four_name, ".int"
+    )
+  )
+
+  expect_identical(dim(result), c(9L, 5L))
+  expect_identical(result$char, new_df[new_df$int != 4, ]$char)
+})

From 23a2faab44b90252e79ec75894cb248877a90753 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 26 Jul 2021 16:36:14 -0400
Subject: [PATCH 648/719] ARROW-6072: [C++] Implement casting List <->
 LargeList

Implement casts between `List<T>` and `LargeList<U>` assuming T and U can be cast to each other.

Closes #10804 from pitrou/ARROW-6072-cast-list

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../compute/kernels/scalar_cast_internal.cc   |   4 +
 .../compute/kernels/scalar_cast_nested.cc     | 141 ++++++++++++------
 .../arrow/compute/kernels/scalar_cast_test.cc |  79 +++++-----
 python/pyarrow/tests/test_compute.py          |   5 +
 4 files changed, 144 insertions(+), 85 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 198c82bd97e..8076c35a132 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -29,6 +29,8 @@ namespace internal {
 
 // ----------------------------------------------------------------------
 
+namespace {
+
 template <typename OutT, typename InT>
 ARROW_DISABLE_UBSAN("float-cast-overflow")
 void DoStaticCast(const void* in_data, int64_t in_offset, int64_t length,
@@ -117,6 +119,8 @@ void CastNumberImpl(Type::type out_type, const Datum& input, Datum* out) {
   }
 }
 
+}  // namespace
+
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, const Datum& input,
                               Datum* out) {
   switch (in_type) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index ec92dbb5d60..ab583bbbe8c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -17,6 +17,7 @@
 
 // Implementation of casting to (or between) list types
 
+#include <limits>
 #include <utility>
 #include <vector>
 
@@ -26,6 +27,7 @@
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/int_util.h"
 
 namespace arrow {
 
@@ -34,82 +36,135 @@ using internal::CopyBitmap;
 namespace compute {
 namespace internal {
 
-template <typename Type>
-Status CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  using offset_type = typename Type::offset_type;
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
+namespace {
 
-  const CastOptions& options = CastState::Get(ctx);
+// (Large)List<T> -> (Large)List<U>
 
-  auto child_type = checked_cast<const Type&>(*out->type()).value_type();
+template <typename SrcType, typename DestType>
+typename std::enable_if<SrcType::type_id == DestType::type_id, Status>::type
+CastListOffsets(KernelContext* ctx, const ArrayData& in_array, ArrayData* out_array) {
+  return Status::OK();
+}
+
+template <typename SrcType, typename DestType>
+typename std::enable_if<SrcType::type_id != DestType::type_id, Status>::type
+CastListOffsets(KernelContext* ctx, const ArrayData& in_array, ArrayData* out_array) {
+  using src_offset_type = typename SrcType::offset_type;
+  using dest_offset_type = typename DestType::offset_type;
+
+  ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
+                        ctx->Allocate(sizeof(dest_offset_type) * (in_array.length + 1)));
+  ::arrow::internal::CastInts(in_array.GetValues<src_offset_type>(1),
+                              out_array->GetMutableValues<dest_offset_type>(1),
+                              in_array.length + 1);
+  return Status::OK();
+}
+
+template <typename SrcType, typename DestType>
+struct CastList {
+  using src_offset_type = typename SrcType::offset_type;
+  using dest_offset_type = typename DestType::offset_type;
+
+  static constexpr bool is_upcast = sizeof(src_offset_type) < sizeof(dest_offset_type);
+  static constexpr bool is_downcast = sizeof(src_offset_type) > sizeof(dest_offset_type);
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const CastOptions& options = CastState::Get(ctx);
+
+    auto child_type = checked_cast<const DestType&>(*out->type()).value_type();
 
-  if (out->kind() == Datum::SCALAR) {
-    const auto& in_scalar = checked_cast<const ScalarType&>(*batch[0].scalar());
-    auto out_scalar = checked_cast<ScalarType*>(out->scalar().get());
+    if (out->kind() == Datum::SCALAR) {
+      // The scalar case is simple, as only the underlying values must be cast
+      const auto& in_scalar = checked_cast<const BaseListScalar&>(*batch[0].scalar());
+      auto out_scalar = checked_cast<BaseListScalar*>(out->scalar().get());
 
-    DCHECK(!out_scalar->is_valid);
-    if (in_scalar.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type, options,
-                                                    ctx->exec_context()));
+      DCHECK(!out_scalar->is_valid);
+      if (in_scalar.is_valid) {
+        ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type,
+                                                      options, ctx->exec_context()));
 
-      out_scalar->is_valid = true;
+        out_scalar->is_valid = true;
+      }
+      return Status::OK();
     }
-    return Status::OK();
-  }
 
-  const ArrayData& in_array = *batch[0].array();
-  ArrayData* out_array = out->mutable_array();
+    const ArrayData& in_array = *batch[0].array();
+    auto offsets = in_array.GetValues<src_offset_type>(1);
+    Datum values = in_array.child_data[0];
 
-  // Copy from parent
-  out_array->buffers = in_array.buffers;
-  Datum values = in_array.child_data[0];
+    ArrayData* out_array = out->mutable_array();
+    out_array->buffers = in_array.buffers;
 
-  if (in_array.offset != 0) {
-    if (in_array.buffers[0]) {
+    // Shift bitmap in case the source offset is non-zero
+    if (in_array.offset != 0 && in_array.buffers[0]) {
       ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
                             CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
                                        in_array.offset, in_array.length));
     }
-    ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
-                          ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));
 
-    auto offsets = in_array.GetValues<offset_type>(1);
-    auto shifted_offsets = out_array->GetMutableValues<offset_type>(1);
+    // Handle list offsets
+    // Several cases can arise:
+    // - the source offset is non-zero, in which case we slice the underlying values
+    //   and shift the list offsets (regardless of their respective types)
+    // - the source offset is zero but source and destination types have
+    //   different list offset types, in which case we cast the list offsets
+    // - otherwise, we simply keep the original list offsets
+    if (is_downcast) {
+      if (offsets[in_array.length] > std::numeric_limits<dest_offset_type>::max()) {
+        return Status::Invalid("Array of type ", in_array.type->ToString(),
+                               " too large to convert to ", out_array->type->ToString());
+      }
+    }
 
-    for (int64_t i = 0; i < in_array.length + 1; ++i) {
-      shifted_offsets[i] = offsets[i] - offsets[0];
+    if (in_array.offset != 0) {
+      ARROW_ASSIGN_OR_RAISE(
+          out_array->buffers[1],
+          ctx->Allocate(sizeof(dest_offset_type) * (in_array.length + 1)));
+
+      auto shifted_offsets = out_array->GetMutableValues<dest_offset_type>(1);
+      for (int64_t i = 0; i < in_array.length + 1; ++i) {
+        shifted_offsets[i] = static_cast<dest_offset_type>(offsets[i] - offsets[0]);
+      }
+      values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]);
+    } else {
+      RETURN_NOT_OK((CastListOffsets<SrcType, DestType>(ctx, in_array, out_array)));
     }
-    values = in_array.child_data[0]->Slice(offsets[0], offsets[in_array.length]);
-  }
 
-  ARROW_ASSIGN_OR_RAISE(Datum cast_values,
-                        Cast(values, child_type, options, ctx->exec_context()));
+    // Handle values
+    ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                          Cast(values, child_type, options, ctx->exec_context()));
 
-  DCHECK_EQ(Datum::ARRAY, cast_values.kind());
-  out_array->child_data.push_back(cast_values.array());
-  return Status::OK();
-}
+    DCHECK_EQ(Datum::ARRAY, cast_values.kind());
+    out_array->child_data.push_back(cast_values.array());
+    return Status::OK();
+  }
+};
 
-template <typename Type>
+template <typename SrcType, typename DestType>
 void AddListCast(CastFunction* func) {
   ScalarKernel kernel;
-  kernel.exec = CastListExec<Type>;
-  kernel.signature = KernelSignature::Make({InputType(Type::type_id)}, kOutputTargetType);
+  kernel.exec = CastList<SrcType, DestType>::Exec;
+  kernel.signature =
+      KernelSignature::Make({InputType(SrcType::type_id)}, kOutputTargetType);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-  DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
+  DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+}  // namespace
+
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   // We use the list<T> from the CastOptions when resolving the output type
 
   auto cast_list = std::make_shared<CastFunction>("cast_list", Type::LIST);
   AddCommonCasts(Type::LIST, kOutputTargetType, cast_list.get());
-  AddListCast<ListType>(cast_list.get());
+  AddListCast<ListType, ListType>(cast_list.get());
+  AddListCast<LargeListType, ListType>(cast_list.get());
 
   auto cast_large_list =
       std::make_shared<CastFunction>("cast_large_list", Type::LARGE_LIST);
   AddCommonCasts(Type::LARGE_LIST, kOutputTargetType, cast_large_list.get());
-  AddListCast<LargeListType>(cast_large_list.get());
+  AddListCast<ListType, LargeListType>(cast_large_list.get());
+  AddListCast<LargeListType, LargeListType>(cast_large_list.get());
 
   // FSL is a bit incomplete at the moment
   auto cast_fsl =
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index fce8518dd3b..9f537fecf55 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1676,56 +1676,51 @@ TEST(Cast, ListToPrimitive) {
       Cast(*ArrayFromJSON(list(binary()), R"([["1", "2"], ["3", "4"]])"), utf8()));
 }
 
-TEST(Cast, ListToList) {
-  using make_list_t = std::shared_ptr<DataType>(const std::shared_ptr<DataType>&);
-  for (auto make_list : std::vector<make_list_t*>{&list, &large_list}) {
-    auto list_int32 =
-        ArrayFromJSON(make_list(int32()),
-                      "[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]")
-            ->data();
-
-    auto list_int64 = list_int32->Copy();
-    list_int64->type = make_list(int64());
-    list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
-    ValidateOutput(*list_int64);
-
-    auto list_float32 = list_int32->Copy();
-    list_float32->type = make_list(float32());
-    list_float32->child_data[0] = Cast(list_int32->child_data[0], float32())->array();
-    ValidateOutput(*list_float32);
-
-    CheckCast(MakeArray(list_int32), MakeArray(list_float32));
-    CheckCast(MakeArray(list_float32), MakeArray(list_int64));
-    CheckCast(MakeArray(list_int64), MakeArray(list_float32));
-
-    CheckCast(MakeArray(list_int32), MakeArray(list_int64));
-    CheckCast(MakeArray(list_float32), MakeArray(list_int32));
-    CheckCast(MakeArray(list_int64), MakeArray(list_int32));
+using make_list_t = std::shared_ptr<DataType>(const std::shared_ptr<DataType>&);
+
+static const auto list_factories = std::vector<make_list_t*>{&list, &large_list};
+
+static void CheckListToList(const std::vector<std::shared_ptr<DataType>>& value_types,
+                            const std::string& json_data) {
+  for (auto make_src_list : list_factories) {
+    for (auto make_dest_list : list_factories) {
+      for (const auto& src_value_type : value_types) {
+        for (const auto& dest_value_type : value_types) {
+          const auto src_type = make_src_list(src_value_type);
+          const auto dest_type = make_dest_list(dest_value_type);
+          ARROW_SCOPED_TRACE("src_type = ", src_type->ToString(),
+                             ", dest_type = ", dest_type->ToString());
+          CheckCast(ArrayFromJSON(src_type, json_data),
+                    ArrayFromJSON(dest_type, json_data));
+        }
+      }
+    }
   }
+}
 
-  // No nulls (ARROW-12568)
-  for (auto make_list : std::vector<make_list_t*>{&list, &large_list}) {
-    auto list_int32 = ArrayFromJSON(make_list(int32()),
-                                    "[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]")
-                          ->data();
-    auto list_int64 = list_int32->Copy();
-    list_int64->type = make_list(int64());
-    list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
-    ValidateOutput(*list_int64);
+TEST(Cast, ListToList) {
+  CheckListToList({int32(), float32(), int64()},
+                  "[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]");
+}
 
-    CheckCast(MakeArray(list_int32), MakeArray(list_int64));
-    CheckCast(MakeArray(list_int64), MakeArray(list_int32));
-  }
+TEST(Cast, ListToListNoNulls) {
+  // ARROW-12568
+  CheckListToList({int32(), float32(), int64()},
+                  "[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]");
 }
 
 TEST(Cast, ListToListOptionsPassthru) {
-  auto list_int32 = ArrayFromJSON(list(int32()), "[[87654321]]");
+  for (auto make_src_list : list_factories) {
+    for (auto make_dest_list : list_factories) {
+      auto list_int32 = ArrayFromJSON(make_src_list(int32()), "[[87654321]]");
 
-  auto options = CastOptions::Safe(list(int16()));
-  CheckCastFails(list_int32, options);
+      auto options = CastOptions::Safe(make_dest_list(int16()));
+      CheckCastFails(list_int32, options);
 
-  options.allow_int_overflow = true;
-  CheckCast(list_int32, ArrayFromJSON(list(int16()), "[[32689]]"), options);
+      options.allow_int_overflow = true;
+      CheckCast(list_int32, ArrayFromJSON(make_dest_list(int16()), "[[32689]]"), options);
+    }
+  }
 }
 
 TEST(Cast, IdentityCasts) {
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 993742fe664..b0baa76e50a 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1347,6 +1347,11 @@ def test_cast():
     expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]')
     assert pc.cast(arr, 'timestamp[ms]') == expected
 
+    arr = pa.array([[1, 2], [3, 4, 5]], type=pa.large_list(pa.int8()))
+    expected = pa.array([["1", "2"], ["3", "4", "5"]],
+                        type=pa.list_(pa.utf8()))
+    assert pc.cast(arr, expected.type) == expected
+
 
 def test_strptime():
     arr = pa.array(["5/1/2020", None, "12/13/1900"])

From 2a2c330b18e426143f6534649793f0dd2481e505 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 27 Jul 2021 11:55:07 +0200
Subject: [PATCH 649/719] ARROW-11243: [C++] Recognize time types in CSV files

* Allow reading CSV columns as time32 and time64
* Automatically infer "hh:mm" and "hh:mm:ss" as time32[s]

Closes #10782 from pitrou/ARROW-11243-csv-times

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/column_builder_test.cc | 58 ++++++++++++++
 cpp/src/arrow/csv/converter.cc           | 17 +++--
 cpp/src/arrow/csv/converter_test.cc      | 62 +++++++++++++++
 cpp/src/arrow/csv/inference_internal.h   |  5 ++
 cpp/src/arrow/util/value_parsing.h       | 38 +++++++++-
 cpp/src/arrow/util/value_parsing_test.cc | 97 ++++++++++++++++++++++++
 docs/source/cpp/csv.rst                  |  2 +
 docs/source/python/csv.rst               |  3 +-
 python/pyarrow/tests/test_csv.py         | 27 +++++++
 9 files changed, 299 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/csv/column_builder_test.cc b/cpp/src/arrow/csv/column_builder_test.cc
index 9fa995350e2..7577c883e8c 100644
--- a/cpp/src/arrow/csv/column_builder_test.cc
+++ b/cpp/src/arrow/csv/column_builder_test.cc
@@ -400,6 +400,24 @@ TEST_F(InferringColumnBuilderTest, MultipleChunkDate) {
                  ArrayFromJSON(date32(), "[null]")});
 }
 
+TEST_F(InferringColumnBuilderTest, SingleChunkTime) {
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"", "01:23:45", "NA"}}, options,
+                {ArrayFromJSON(time32(TimeUnit::SECOND), "[null, 5025, null]")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkTime) {
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+  auto type = time32(TimeUnit::SECOND);
+
+  CheckInferred(tg, {{""}, {"01:23:45"}, {"NA"}}, options,
+                {ArrayFromJSON(type, "[null]"), ArrayFromJSON(type, "[5025]"),
+                 ArrayFromJSON(type, "[null]")});
+}
+
 TEST_F(InferringColumnBuilderTest, SingleChunkTimestamp) {
   auto options = ConvertOptions::Defaults();
   auto tg = TaskGroup::MakeSerial();
@@ -453,6 +471,46 @@ TEST_F(InferringColumnBuilderTest, MultipleChunkTimestampNS) {
                 options, expected);
 }
 
+TEST_F(InferringColumnBuilderTest, SingleChunkIntegerAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"", "99", "01:23:45", "NA"}}, options,
+                {ArrayFromJSON(utf8(), R"(["", "99", "01:23:45", "NA"])")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkIntegerAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+  auto type = utf8();
+
+  CheckInferred(tg, {{""}, {"99"}, {"01:23:45", "NA"}}, options,
+                {ArrayFromJSON(type, R"([""])"), ArrayFromJSON(type, R"(["99"])"),
+                 ArrayFromJSON(type, R"(["01:23:45", "NA"])")});
+}
+
+TEST_F(InferringColumnBuilderTest, SingleChunkDateAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"", "01:23:45", "1998-04-05"}}, options,
+                {ArrayFromJSON(utf8(), R"(["", "01:23:45", "1998-04-05"])")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkDateAndTime) {
+  // Fallback to utf-8
+  auto options = ConvertOptions::Defaults();
+  auto tg = TaskGroup::MakeSerial();
+  auto type = utf8();
+
+  CheckInferred(tg, {{""}, {"01:23:45"}, {"1998-04-05"}}, options,
+                {ArrayFromJSON(type, R"([""])"), ArrayFromJSON(type, R"(["01:23:45"])"),
+                 ArrayFromJSON(type, R"(["1998-04-05"])")});
+}
+
 TEST_F(InferringColumnBuilderTest, SingleChunkString) {
   auto options = ConvertOptions::Defaults();
   auto tg = TaskGroup::MakeSerial();
diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index cb72b22b405..b1cde12a28e 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -130,7 +130,7 @@ struct ValueDecoder {
 
  protected:
   Trie null_trie_;
-  std::shared_ptr<DataType> type_;
+  const std::shared_ptr<DataType> type_;
   const ConvertOptions& options_;
 };
 
@@ -191,24 +191,29 @@ struct BinaryValueDecoder : public ValueDecoder {
 };
 
 //
-// Value decoder for integers and floats
+// Value decoder for integers, floats and temporals
 //
 
 template <typename T>
 struct NumericValueDecoder : public ValueDecoder {
   using value_type = typename T::c_type;
 
-  using ValueDecoder::ValueDecoder;
+  explicit NumericValueDecoder(const std::shared_ptr<DataType>& type,
+                               const ConvertOptions& options)
+      : ValueDecoder(type, options), concrete_type_(checked_cast<const T&>(*type)) {}
 
   Status Decode(const uint8_t* data, uint32_t size, bool quoted, value_type* out) {
     // XXX should quoted values be allowed at all?
     TrimWhiteSpace(&data, &size);
-    if (ARROW_PREDICT_FALSE(
-            !internal::ParseValue<T>(reinterpret_cast<const char*>(data), size, out))) {
+    if (ARROW_PREDICT_FALSE(!internal::ParseValue<T>(
+            concrete_type_, reinterpret_cast<const char*>(data), size, out))) {
       return GenericConversionError(type_, data, size);
     }
     return Status::OK();
   }
+
+ protected:
+  const T& concrete_type_;
 };
 
 //
@@ -569,6 +574,8 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
     NUMERIC_CONVERTER_CASE(Type::DOUBLE, DoubleType)
     NUMERIC_CONVERTER_CASE(Type::DATE32, Date32Type)
     NUMERIC_CONVERTER_CASE(Type::DATE64, Date64Type)
+    NUMERIC_CONVERTER_CASE(Type::TIME32, Time32Type)
+    NUMERIC_CONVERTER_CASE(Type::TIME64, Time64Type)
     CONVERTER_CASE(Type::BOOL, (PrimitiveConverter<BooleanType, BooleanValueDecoder>))
     CONVERTER_CASE(Type::BINARY,
                    (PrimitiveConverter<BinaryType, BinaryValueDecoder<false>>))
diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc
index 4bed649d558..f59d184f749 100644
--- a/cpp/src/arrow/csv/converter_test.cc
+++ b/cpp/src/arrow/csv/converter_test.cc
@@ -429,6 +429,11 @@ TEST(Date32Conversion, Nulls) {
                                         {{false, true}});
 }
 
+TEST(Date32Conversion, Errors) {
+  AssertConversionError(date32(), {"1945-06-31\n"}, {0});
+  AssertConversionError(date32(), {"2020-13-01\n"}, {0});
+}
+
 TEST(Date64Conversion, Basics) {
   AssertConversion<Date64Type, int64_t>(date64(), {"1945-05-08\n", "2020-03-15\n"},
                                         {{-777945600000LL, 1584230400000LL}});
@@ -439,6 +444,63 @@ TEST(Date64Conversion, Nulls) {
                                         {{0, 1584230400000LL}}, {{false, true}});
 }
 
+TEST(Date64Conversion, Errors) {
+  AssertConversionError(date64(), {"1945-06-31\n"}, {0});
+  AssertConversionError(date64(), {"2020-13-01\n"}, {0});
+}
+
+TEST(Time32Conversion, Seconds) {
+  const auto type = time32(TimeUnit::SECOND);
+
+  AssertConversion<Time32Type, int32_t>(type, {"00:00\n", "00:00:00\n"}, {{0, 0}});
+  AssertConversion<Time32Type, int32_t>(type, {"01:23:45\n", "23:45:43\n"},
+                                        {{5025, 85543}});
+  AssertConversion<Time32Type, int32_t>(type, {"N/A\n", "23:59:59\n"}, {{0, 86399}},
+                                        {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
+TEST(Time32Conversion, Millis) {
+  const auto type = time32(TimeUnit::MILLI);
+
+  AssertConversion<Time32Type, int32_t>(type, {"00:00\n", "00:00:00\n"}, {{0, 0}});
+  AssertConversion<Time32Type, int32_t>(type, {"01:23:45.1\n", "23:45:43.789\n"},
+                                        {{5025100, 85543789}});
+  AssertConversion<Time32Type, int32_t>(type, {"N/A\n", "23:59:59.999\n"},
+                                        {{0, 86399999}}, {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
+TEST(Time64Conversion, Micros) {
+  const auto type = time64(TimeUnit::MICRO);
+
+  AssertConversion<Time64Type, int64_t>(type, {"00:00\n", "00:00:00\n"}, {{0LL, 0LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"01:23:45.1\n", "23:45:43.456789\n"},
+                                        {{5025100000LL, 85543456789LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"N/A\n", "23:59:59.999999\n"},
+                                        {{0, 86399999999LL}}, {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
+TEST(Time64Conversion, Nanos) {
+  const auto type = time64(TimeUnit::NANO);
+
+  AssertConversion<Time64Type, int64_t>(type, {"00:00\n", "00:00:00\n"}, {{0LL, 0LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"01:23:45.1\n", "23:45:43.123456789\n"},
+                                        {{5025100000000LL, 85543123456789LL}});
+  AssertConversion<Time64Type, int64_t>(type, {"N/A\n", "23:59:59.999999999\n"},
+                                        {{0, 86399999999999LL}}, {{false, true}});
+
+  AssertConversionError(type, {"24:00\n"}, {0});
+  AssertConversionError(type, {"23:59:60\n"}, {0});
+}
+
 TEST(TimestampConversion, Basics) {
   auto type = timestamp(TimeUnit::SECOND);
 
diff --git a/cpp/src/arrow/csv/inference_internal.h b/cpp/src/arrow/csv/inference_internal.h
index 42486a1ebaf..1fd6d41b5cc 100644
--- a/cpp/src/arrow/csv/inference_internal.h
+++ b/cpp/src/arrow/csv/inference_internal.h
@@ -32,6 +32,7 @@ enum class InferKind {
   Boolean,
   Real,
   Date,
+  Time,
   Timestamp,
   TimestampNS,
   TextDict,
@@ -60,6 +61,8 @@ class InferStatus {
       case InferKind::Boolean:
         return SetKind(InferKind::Date);
       case InferKind::Date:
+        return SetKind(InferKind::Time);
+      case InferKind::Time:
         return SetKind(InferKind::Timestamp);
       case InferKind::Timestamp:
         return SetKind(InferKind::TimestampNS);
@@ -114,6 +117,8 @@ class InferStatus {
         return make_converter(boolean());
       case InferKind::Date:
         return make_converter(date32());
+      case InferKind::Time:
+        return make_converter(time32(TimeUnit::SECOND));
       case InferKind::Timestamp:
         return make_converter(timestamp(TimeUnit::SECOND));
       case InferKind::TimestampNS:
diff --git a/cpp/src/arrow/util/value_parsing.h b/cpp/src/arrow/util/value_parsing.h
index 00295d1b51f..02e6fa42e01 100644
--- a/cpp/src/arrow/util/value_parsing.h
+++ b/cpp/src/arrow/util/value_parsing.h
@@ -719,7 +719,9 @@ struct StringConverter<DATE_TYPE, enable_if_date<DATE_TYPE>> {
 
   static bool Convert(const DATE_TYPE& type, const char* s, size_t length,
                       value_type* out) {
-    if (length != 10) return false;
+    if (ARROW_PREDICT_FALSE(length != 10)) {
+      return false;
+    }
 
     duration_type since_epoch;
     if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &since_epoch))) {
@@ -735,12 +737,36 @@ template <typename TIME_TYPE>
 struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
   using value_type = typename TIME_TYPE::c_type;
 
+  // We allow the following formats for all units:
+  // - "hh:mm"
+  // - "hh:mm:ss"
+  //
+  // We allow the following formats for unit == MILLI, MICRO, or NANO:
+  // - "hh:mm:ss.s{1,3}"
+  //
+  // We allow the following formats for unit == MICRO, or NANO:
+  // - "hh:mm:ss.s{4,6}"
+  //
+  // We allow the following formats for unit == NANO:
+  // - "hh:mm:ss.s{7,9}"
+
   static bool Convert(const TIME_TYPE& type, const char* s, size_t length,
                       value_type* out) {
-    if (length < 8) return false;
-    auto unit = type.unit();
-
+    const auto unit = type.unit();
     std::chrono::seconds since_midnight;
+
+    if (length == 5) {
+      if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s, &since_midnight))) {
+        return false;
+      }
+      *out =
+          static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
+      return true;
+    }
+
+    if (ARROW_PREDICT_FALSE(length < 8)) {
+      return false;
+    }
     if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) {
       return false;
     }
@@ -751,6 +777,10 @@ struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
       return true;
     }
 
+    if (ARROW_PREDICT_FALSE(s[8] != '.')) {
+      return false;
+    }
+
     uint32_t subseconds_count = 0;
     if (ARROW_PREDICT_FALSE(
             !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) {
diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc
index e790a10acf1..b5dc5619ded 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -264,6 +264,103 @@ TEST(StringConversion, ToDate64) {
   AssertConversion<Date64Type>("0001-01-01", -62135596800000LL);
 }
 
+template <typename T>
+void AssertInvalidTimes(const T& type) {
+  // Invalid time format
+  AssertConversionFails(type, "");
+  AssertConversionFails(type, "00");
+  AssertConversionFails(type, "00:");
+  AssertConversionFails(type, "00:00:");
+  AssertConversionFails(type, "00:00:00:");
+  AssertConversionFails(type, "000000");
+  AssertConversionFails(type, "000000.000");
+
+  // Invalid time value
+  AssertConversionFails(type, "24:00:00");
+  AssertConversionFails(type, "00:60:00");
+  AssertConversionFails(type, "00:00:60");
+}
+
+TEST(StringConversion, ToTime32) {
+  {
+    Time32Type type{TimeUnit::SECOND};
+
+    AssertConversion(type, "00:00", 0);
+    AssertConversion(type, "01:23", 4980);
+    AssertConversion(type, "23:59", 86340);
+
+    AssertConversion(type, "00:00:00", 0);
+    AssertConversion(type, "01:23:45", 5025);
+    AssertConversion(type, "23:45:43", 85543);
+    AssertConversion(type, "23:59:59", 86399);
+
+    AssertInvalidTimes(type);
+    // No subseconds allowed
+    AssertConversionFails(type, "00:00:00.123");
+  }
+  {
+    Time32Type type{TimeUnit::MILLI};
+
+    AssertConversion(type, "00:00", 0);
+    AssertConversion(type, "01:23", 4980000);
+    AssertConversion(type, "23:59", 86340000);
+
+    AssertConversion(type, "00:00:00", 0);
+    AssertConversion(type, "01:23:45", 5025000);
+    AssertConversion(type, "23:45:43", 85543000);
+    AssertConversion(type, "23:59:59", 86399000);
+
+    AssertConversion(type, "00:00:00.123", 123);
+    AssertConversion(type, "01:23:45.000", 5025000);
+    AssertConversion(type, "01:23:45.1", 5025100);
+    AssertConversion(type, "01:23:45.123", 5025123);
+    AssertConversion(type, "01:23:45.999", 5025999);
+
+    AssertInvalidTimes(type);
+    // Invalid subseconds
+    AssertConversionFails(type, "00:00:00.1234");
+  }
+}
+
+TEST(StringConversion, ToTime64) {
+  {
+    Time64Type type{TimeUnit::MICRO};
+
+    AssertConversion(type, "00:00:00", 0LL);
+    AssertConversion(type, "01:23:45", 5025000000LL);
+    AssertConversion(type, "23:45:43", 85543000000LL);
+    AssertConversion(type, "23:59:59", 86399000000LL);
+
+    AssertConversion(type, "00:00:00.123456", 123456LL);
+    AssertConversion(type, "01:23:45.000000", 5025000000LL);
+    AssertConversion(type, "01:23:45.1", 5025100000LL);
+    AssertConversion(type, "01:23:45.123", 5025123000LL);
+    AssertConversion(type, "01:23:45.999999", 5025999999LL);
+
+    AssertInvalidTimes(type);
+    // Invalid subseconds
+    AssertConversionFails(type, "00:00:00.1234567");
+  }
+  {
+    Time64Type type{TimeUnit::NANO};
+
+    AssertConversion(type, "00:00:00", 0LL);
+    AssertConversion(type, "01:23:45", 5025000000000LL);
+    AssertConversion(type, "23:45:43", 85543000000000LL);
+    AssertConversion(type, "23:59:59", 86399000000000LL);
+
+    AssertConversion(type, "00:00:00.123456789", 123456789LL);
+    AssertConversion(type, "01:23:45.000000000", 5025000000000LL);
+    AssertConversion(type, "01:23:45.1", 5025100000000LL);
+    AssertConversion(type, "01:23:45.1234", 5025123400000LL);
+    AssertConversion(type, "01:23:45.999999999", 5025999999999LL);
+
+    AssertInvalidTimes(type);
+    // Invalid subseconds
+    AssertConversionFails(type, "00:00:00.1234567891");
+  }
+}
+
 TEST(StringConversion, ToTimestampDate_ISO8601) {
   {
     TimestampType type{TimeUnit::SECOND};
diff --git a/docs/source/cpp/csv.rst b/docs/source/cpp/csv.rst
index 2f92708e79f..5ca17a1653b 100644
--- a/docs/source/cpp/csv.rst
+++ b/docs/source/cpp/csv.rst
@@ -151,6 +151,7 @@ column.  Type inference considers the following data types, in order:
 * Int64
 * Boolean
 * Date32
+* Time32 (with seconds unit)
 * Timestamp (with seconds unit)
 * Timestamp (with nanoseconds unit)
 * Float64
@@ -169,6 +170,7 @@ can be chosen from the following list:
 * Decimal128
 * Boolean
 * Date32 and Date64
+* Time32 and Time64
 * Timestamp
 * Binary and Large Binary
 * String and Large String (with optional UTF8 input validation)
diff --git a/docs/source/python/csv.rst b/docs/source/python/csv.rst
index 9c00027b041..1724c63f417 100644
--- a/docs/source/python/csv.rst
+++ b/docs/source/python/csv.rst
@@ -29,7 +29,8 @@ The features currently offered are the following:
   such as ``my_data.csv.gz``)
 * fetching column names from the first row in the CSV file
 * column-wise type inference and conversion to one of ``null``, ``int64``,
-  ``float64``, ``date32``, ``timestamp[s]``, ``timestamp[ns]``, ``string`` or ``binary`` data
+  ``float64``, ``date32``, ``time32[s]``, ``timestamp[s]``, ``timestamp[ns]``,
+  ``string`` or ``binary`` data
 * opportunistic dictionary encoding of ``string`` and ``binary`` columns
   (disabled by default)
 * detecting various spellings of null values such as ``NaN`` or ``#N/A``
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 0c05c28290e..2f3ef4fca9e 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -894,6 +894,33 @@ def test_dates(self):
             'b': [datetime(1970, 1, 2), datetime(1971, 1, 2)],
         }
 
+    def test_times(self):
+        # Times are inferred as time32[s] by default
+        from datetime import time
+
+        rows = b"a,b\n12:34:56,12:34:56.789\n23:59:59,23:59:59.999\n"
+        table = self.read_bytes(rows)
+        # Column 'b' has subseconds, so cannot be inferred as time32[s]
+        schema = pa.schema([('a', pa.time32('s')),
+                            ('b', pa.string())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [time(12, 34, 56), time(23, 59, 59)],
+            'b': ["12:34:56.789", "23:59:59.999"],
+        }
+
+        # Can ask for time types explicitly
+        opts = ConvertOptions()
+        opts.column_types = {'a': pa.time64('us'), 'b': pa.time32('ms')}
+        table = self.read_bytes(rows, convert_options=opts)
+        schema = pa.schema([('a', pa.time64('us')),
+                            ('b', pa.time32('ms'))])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [time(12, 34, 56), time(23, 59, 59)],
+            'b': [time(12, 34, 56, 789000), time(23, 59, 59, 999000)],
+        }
+
     def test_auto_dict_encode(self):
         opts = ConvertOptions(auto_dict_encode=True)
         rows = "a,b\nab,1\ncdé,2\ncdé,3\nab,4".encode()

From ec91a50897efe163e7edcc0ec191b84a260c9f47 Mon Sep 17 00:00:00 2001
From: Alessandro Molina <amol@turbogears.org>
Date: Tue, 27 Jul 2021 07:44:58 -0400
Subject: [PATCH 650/719] ARROW-12837: [C++] Do not crash when printing invalid
 arrays

Closes #10713 from amol-/ARROW-12837

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/array/array_test.cc | 8 ++++++++
 cpp/src/arrow/pretty_print.cc     | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 7b10acb3b13..baa1c54e46b 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -111,6 +111,14 @@ TEST_F(TestArray, TestLength) {
   ASSERT_EQ(arr->length(), 100);
 }
 
+TEST_F(TestArray, TestNullToString) {
+  // Invalid NULL buffer
+  auto data = std::make_shared<Buffer>(nullptr, 400);
+
+  std::unique_ptr<Int32Array> arr(new Int32Array(100, data));
+  ASSERT_EQ(arr->ToString(), "<InvalidArray: Missing values buffer in non-empty array>");
+}
+
 TEST_F(TestArray, TestSliceSafe) {
   std::vector<int32_t> original_data{1, 2, 3, 4, 5, 6, 7};
   auto arr = std::make_shared<Int32Array>(7, Buffer::Wrap(original_data));
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 8d1c16e0ed6..60cdaf0d6af 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -325,6 +325,12 @@ class ArrayPrinter : public PrettyPrinter {
                   std::is_base_of<FixedSizeListArray, T>::value,
               Status>
   Visit(const T& array) {
+    Status st = array.Validate();
+    if (!st.ok()) {
+      (*sink_) << "<InvalidArray: " << st.message() << ">";
+      return Status::OK();
+    }
+
     OpenArray(array);
     if (array.length() > 0) {
       RETURN_NOT_OK(WriteDataValues(array));

From fe0c99b319443b936a15b9940cbaa57fbf935d90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 27 Jul 2021 15:41:26 +0200
Subject: [PATCH 651/719] ARROW-13447: [Release] Verification script for arm64
 and universal2 macOS wheels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- verification script for arm64 macos wheels
- verification tasks for arm64 macos wheels
- verification tasks for source release on arm64 macos

Closes #10799 from kszucs/apple-silicon-verification

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/python_wheel_macos_build.sh        |  4 +
 ci/scripts/python_wheel_manylinux_test.sh     | 73 ----------------
 ...acos_test.sh => python_wheel_unix_test.sh} | 49 +++++++----
 ci/scripts/python_wheel_windows_test.bat      |  2 +-
 dev/release/verify-release-candidate.sh       | 85 +++++++++++--------
 dev/tasks/python-wheels/github.osx.amd64.yml  |  6 +-
 dev/tasks/python-wheels/github.osx.arm64.yml  |  6 +-
 dev/tasks/tasks.yml                           | 81 +++++++++++++++---
 ...ithub.linux.yml => github.linux.amd64.yml} |  2 +-
 ...ithub.macos.yml => github.macos.amd64.yml} |  2 +-
 dev/tasks/verify-rc/github.macos.arm64.yml    | 48 +++++++++++
 docker-compose.yml                            | 10 ++-
 12 files changed, 219 insertions(+), 149 deletions(-)
 delete mode 100755 ci/scripts/python_wheel_manylinux_test.sh
 rename ci/scripts/{python_wheel_macos_test.sh => python_wheel_unix_test.sh} (63%)
 rename dev/tasks/verify-rc/{github.linux.yml => github.linux.amd64.yml} (97%)
 rename dev/tasks/verify-rc/{github.macos.yml => github.macos.amd64.yml} (96%)
 create mode 100644 dev/tasks/verify-rc/github.macos.arm64.yml

diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index f9fe8f98a3e..82e0339c9d0 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -159,3 +159,7 @@ if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
   echo "There are non-bundled shared library dependencies."
   exit 1
 fi
+
+# Move the verified wheels
+mkdir -p ${source_dir}/python/repaired_wheels
+mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/
diff --git a/ci/scripts/python_wheel_manylinux_test.sh b/ci/scripts/python_wheel_manylinux_test.sh
deleted file mode 100755
index 21987748f73..00000000000
--- a/ci/scripts/python_wheel_manylinux_test.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -x
-set -o pipefail
-
-case $# in
-  1) KIND="$1"
-     case $KIND in
-       imports|unittests) ;;
-       *) echo "Invalid argument: '${KIND}', valid options are 'imports', 'unittests'"
-          exit 1
-          ;;
-     esac
-     ;;
-  *) echo "Usage: $0 imports|unittests"
-     exit 1
-     ;;
-esac
-
-export PYARROW_TEST_CYTHON=OFF
-export PYARROW_TEST_DATASET=ON
-export PYARROW_TEST_GANDIVA=OFF
-export PYARROW_TEST_HDFS=ON
-export PYARROW_TEST_ORC=ON
-export PYARROW_TEST_PANDAS=ON
-export PYARROW_TEST_PARQUET=ON
-export PYARROW_TEST_PLASMA=ON
-export PYARROW_TEST_S3=ON
-export PYARROW_TEST_TENSORFLOW=ON
-export PYARROW_TEST_FLIGHT=ON
-
-export ARROW_TEST_DATA=/arrow/testing/data
-export PARQUET_TEST_DATA=/arrow/submodules/parquet-testing/data
-
-# Install the built wheels
-pip install /arrow/python/repaired_wheels/*.whl
-
-if [ "${KIND}" == "imports" ]; then
-  # Test that the modules are importable
-  python -c "
-import pyarrow
-import pyarrow._hdfs
-import pyarrow._s3fs
-import pyarrow.csv
-import pyarrow.dataset
-import pyarrow.flight
-import pyarrow.fs
-import pyarrow.json
-import pyarrow.orc
-import pyarrow.parquet
-import pyarrow.plasma"
-elif [ "${KIND}" == "unittests" ]; then
-  # Execute unittest, test dependencies must be installed
-  pytest -r s --pyargs pyarrow
-fi
diff --git a/ci/scripts/python_wheel_macos_test.sh b/ci/scripts/python_wheel_unix_test.sh
similarity index 63%
rename from ci/scripts/python_wheel_macos_test.sh
rename to ci/scripts/python_wheel_unix_test.sh
index 5dabf6e8c41..50d3ccb4f8e 100755
--- a/ci/scripts/python_wheel_macos_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -17,12 +17,22 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
+set -x
+set -o pipefail
+
+if [ "$#" -ne 1 ]; then
+  echo "Usage: $0 <arrow-src-dir>"
+  exit 1
+fi
 
 source_dir=${1}
 
-: ${ARROW_S3:=ON}
 : ${ARROW_FLIGHT:=ON}
+: ${ARROW_S3:=ON}
+: ${CHECK_IMPORTS:=ON}
+: ${CHECK_UNITTESTS:=ON}
+: ${INSTALL_PYARROW:=ON}
 
 export PYARROW_TEST_CYTHON=OFF
 export PYARROW_TEST_DATASET=ON
@@ -39,11 +49,14 @@ export PYARROW_TEST_TENSORFLOW=ON
 export ARROW_TEST_DATA=${source_dir}/testing/data
 export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
 
-# Install the built wheels
-pip install ${source_dir}/python/dist/*.whl
+if [ "${INSTALL_PYARROW}" == "ON" ]; then
+  # Install the built wheels
+  pip install ${source_dir}/python/repaired_wheels/*.whl
+fi
 
-# Test that the modules are importable
-python -c "
+if [ "${CHECK_IMPORTS}" == "ON" ]; then
+  # Test that the modules are importable
+  python -c "
 import pyarrow
 import pyarrow._hdfs
 import pyarrow.csv
@@ -54,16 +67,18 @@ import pyarrow.orc
 import pyarrow.parquet
 import pyarrow.plasma
 "
-
-if [ "${PYARROW_TEST_S3}" == "ON" ]; then
-  python -c "import pyarrow._s3fs"
-fi
-if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
-  python -c "import pyarrow.flight"
+  if [ "${PYARROW_TEST_S3}" == "ON" ]; then
+    python -c "import pyarrow._s3fs"
+  fi
+  if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
+    python -c "import pyarrow.flight"
+  fi
 fi
 
-# Install testing dependencies
-pip install -r ${source_dir}/python/requirements-wheel-test.txt
-
-# Execute unittest
-pytest -r s --pyargs pyarrow
+if [ "${CHECK_UNITTESTS}" == "ON" ]; then
+  # Install testing dependencies
+  pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+  # Execute unittest, test dependencies must be installed
+  python -c 'import pyarrow; pyarrow.create_library_symlinks()'
+  pytest -r s --pyargs pyarrow
+fi
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index 8352e586226..586fd58f651 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -19,6 +19,7 @@
 
 set PYARROW_TEST_CYTHON=OFF
 set PYARROW_TEST_DATASET=ON
+set PYARROW_TEST_FLIGHT=ON
 set PYARROW_TEST_GANDIVA=OFF
 set PYARROW_TEST_HDFS=ON
 set PYARROW_TEST_ORC=OFF
@@ -27,7 +28,6 @@ set PYARROW_TEST_PARQUET=ON
 set PYARROW_TEST_PLASMA=OFF
 set PYARROW_TEST_S3=OFF
 set PYARROW_TEST_TENSORFLOW=ON
-set PYARROW_TEST_FLIGHT=ON
 
 set ARROW_TEST_DATA=C:\arrow\testing\data
 set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 5dc6d869147..bf8e51fe8fe 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -572,29 +572,6 @@ test_binary_distribution() {
   fi
 }
 
-check_python_imports() {
-   python << IMPORT_TESTS
-import platform
-
-import pyarrow
-import pyarrow.parquet
-import pyarrow.plasma
-import pyarrow.fs
-import pyarrow._hdfs
-import pyarrow.dataset
-import pyarrow.flight
-
-if platform.system() == "Darwin":
-    macos_version = tuple(map(int, platform.mac_ver()[0].split('.')))
-    check_s3fs = macos_version >= (10, 13)
-else:
-    check_s3fs = True
-
-if check_s3fs:
-    import pyarrow._s3fs
-IMPORT_TESTS
-}
-
 test_linux_wheels() {
   local py_arches="3.6m 3.7m 3.8 3.9"
   local manylinuxes="2010 2014"
@@ -608,12 +585,7 @@ test_linux_wheels() {
     for ml_spec in ${manylinuxes}; do
       # check the mandatory and optional imports
       pip install python-rc/${VERSION}-rc${RC_NUMBER}/pyarrow-${VERSION}-cp${py_arch//[mu.]/}-cp${py_arch//./}-manylinux${ml_spec}_x86_64.whl
-      check_python_imports
-
-      # install test requirements and execute the tests
-      pip install -r ${ARROW_DIR}/python/requirements-test.txt
-      python -c 'import pyarrow; pyarrow.create_library_symlinks()'
-      pytest --pyargs pyarrow
+      INSTALL_PYARROW=OFF ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
     done
 
     conda deactivate
@@ -622,7 +594,23 @@ test_linux_wheels() {
 
 test_macos_wheels() {
   local py_arches="3.6m 3.7m 3.8 3.9"
+  local macos_version=$(sw_vers -productVersion)
+  local macos_short_version=${macos_version:0:5}
 
+  local check_s3=ON
+  local check_flight=ON
+
+  # macOS version <= 10.13
+  if [ $(echo "${macos_short_version}\n10.14" | sort -V | head -n1) == "${macos_short_version}" ]; then
+    local check_s3=OFF
+  fi
+  # apple silicon processor
+  if [ "$(uname -m)" = "arm64" ]; then
+    local py_arches="3.9"
+    local check_flight=OFF
+  fi
+
+  # verify arch-native wheels inside an arch-native conda environment
   for py_arch in ${py_arches}; do
     local env=_verify_wheel-${py_arch}
     conda create -yq -n ${env} python=${py_arch//m/}
@@ -631,15 +619,42 @@ test_macos_wheels() {
 
     # check the mandatory and optional imports
     pip install --find-links python-rc/${VERSION}-rc${RC_NUMBER} pyarrow==${VERSION}
-    check_python_imports
-
-    # install test requirements and execute the tests
-    pip install -r ${ARROW_DIR}/python/requirements-test.txt
-    python -c 'import pyarrow; pyarrow.create_library_symlinks()'
-    pytest --pyargs pyarrow
+    INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+      ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
 
     conda deactivate
   done
+
+  # verify arm64 and universal2 wheels using an universal2 python binary
+  # the interpreter should be installed from python.org:
+  #   https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg
+  if [ "$(uname -m)" = "arm64" ]; then
+    for py_arch in ${py_arches}; do
+      local pyver=${py_arch//m/}
+      local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}"
+
+      # create and activate a virtualenv for testing as arm64
+      for arch in "arm64" "x86_64"; do
+        local venv="${ARROW_TMPDIR}/test-${arch}-virtualenv"
+        $python -m virtualenv $venv
+        source $venv/bin/activate
+        pip install -U pip
+
+        # install pyarrow's universal2 wheel
+        pip install \
+            --find-links python-rc/${VERSION}-rc${RC_NUMBER} \
+            --target $(python -c 'import site; print(site.getsitepackages()[0])') \
+            --platform macosx_11_0_universal2 \
+            --only-binary=:all: \
+            pyarrow==${VERSION}
+        # check the imports and execute the unittests
+        INSTALL_PYARROW=OFF ARROW_FLIGHT=${check_flight} ARROW_S3=${check_s3} \
+          arch -${arch} ${ARROW_DIR}/ci/scripts/python_wheel_unix_test.sh ${ARROW_DIR}
+
+        deactivate
+      done
+    done
+  fi
 }
 
 test_wheels() {
diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.amd64.yml
index 863bd7fa9c1..8078abfd56f 100644
--- a/dev/tasks/python-wheels/github.osx.amd64.yml
+++ b/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -104,7 +104,7 @@ jobs:
           $PYTHON -m virtualenv test-env
           source test-env/bin/activate
           pip install --upgrade pip wheel
-          arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+          arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
-      {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
-      {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
index 4fa95bbefdc..516d921ac5f 100644
--- a/dev/tasks/python-wheels/github.osx.arm64.yml
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -122,7 +122,7 @@ jobs:
           $PYTHON -m virtualenv test-arm64-env
           source test-arm64-env/bin/activate
           pip install --upgrade pip wheel
-          arch -arm64 arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+          arch -arm64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {% if arch == "universal2" %}
       - name: Test Wheel on AMD64
@@ -131,7 +131,7 @@ jobs:
           $PYTHON -m virtualenv test-amd64-env
           source test-amd64-env/bin/activate
           pip install --upgrade pip wheel
-          arch -x86_64 arrow/ci/scripts/python_wheel_macos_test.sh $(pwd)/arrow
+          arch -x86_64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
       {% endif %}
 
       - name: Upload artifacts
@@ -150,4 +150,4 @@ jobs:
         env:
           CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
 
-      {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index d325c8a82f2..7dc4af8884a 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -113,10 +113,10 @@ groups:
     - verify-rc-source-*
 
   verify-rc-source-macos:
-    - verify-rc-source-macos-*
+    - verify-rc-source-*-macos-*
 
   verify-rc-source-linux:
-    - verify-rc-source-linux-*
+    - verify-rc-source-*-linux-*
 
 {######################## Tasks to run regularly #############################}
 
@@ -718,9 +718,9 @@ tasks:
   ########################### Release verification ############################
 
 {% for target in ["binary", "yum", "apt"] %}
-  verify-rc-binaries-{{ target }}:
+  verify-rc-binaries-{{ target }}-amd64:
     ci: github
-    template: verify-rc/github.linux.yml
+    template: verify-rc/github.linux.amd64.yml
     params:
       env:
         TEST_DEFAULT: 0
@@ -728,39 +728,94 @@ tasks:
       artifact: "binaries"
 {% endfor %}
 
-{% for platform in ["linux", "macos"] %}
+{% for platform, arch, runner in [("linux", "amd64", "ubuntu-20.04"),
+                                  ("macos", "amd64", "macos-10.15")] %}
+  {% for target in ["cpp",
+                    "csharp",
+                    "go",
+                    "integration",
+                    "java",
+                    "js",
+                    "python",
+                    "ruby"] %}
 
-  verify-rc-wheels-{{ platform }}:
+  verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
     ci: github
-    template: verify-rc/github.{{ platform }}.yml
+    template: verify-rc/github.{{ platform }}.{{ arch }}.yml
     params:
       env:
+        INSTALL_NODE: 0
         TEST_DEFAULT: 0
-      artifact: "wheels"
+        TEST_{{ target|upper }}: 1
+      artifact: "source"
+      github_runner: {{ runner }}
+  {% endfor %}
+{% endfor %}
 
+{% for platform, arch, runner in [("macos", "arm64", "self-hosted")] %}
   {% for target in ["cpp",
                     "csharp",
                     "go",
                     "integration",
-                    "java",
                     "js",
                     "python",
                     "ruby"] %}
 
-  verify-rc-source-{{ platform }}-{{ target }}:
+  verify-rc-source-{{ target }}-{{ platform }}-{{ arch }}:
     ci: github
-    template: verify-rc/github.{{ platform }}.yml
+    template: verify-rc/github.{{ platform }}.{{ arch }}.yml
     params:
       env:
+        ARROW_FLIGHT: 0
+        ARROW_GANDIVA: 0
         INSTALL_NODE: 0
         TEST_DEFAULT: 0
+        TEST_INTEGRATION_JAVA: 0
         TEST_{{ target|upper }}: 1
       artifact: "source"
-
+      github_runner: {{ runner }}
   {% endfor %}
-
 {% endfor %}
 
+  verify-rc-wheels-linux-amd64:
+    ci: github
+    template: verify-rc/github.linux.amd64.yml
+    params:
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  verify-rc-wheels-macos-10.15-amd64:
+    ci: github
+    template: verify-rc/github.macos.amd64.yml
+    params:
+      github_runner: "macos-10.15"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  # The github hosted macos-11 runners are in preview only, but should be switched once they are generally available:
+  #   https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+  verify-rc-wheels-macos-11-amd64:
+    ci: github
+    template: verify-rc/github.macos.arm64.yml
+    params:
+      github_runner: "self-hosted"
+      arch_emulation: "x86_64"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
+  verify-rc-wheels-macos-11-arm64:
+    ci: github
+    template: verify-rc/github.macos.arm64.yml
+    params:
+      github_runner: "self-hosted"
+      arch_emulation: "arm64"
+      env:
+        TEST_DEFAULT: 0
+      artifact: "wheels"
+
   verify-rc-source-windows:
     ci: github
     template: verify-rc/github.win.yml
diff --git a/dev/tasks/verify-rc/github.linux.yml b/dev/tasks/verify-rc/github.linux.amd64.yml
similarity index 97%
rename from dev/tasks/verify-rc/github.linux.yml
rename to dev/tasks/verify-rc/github.linux.amd64.yml
index fdd9de668f0..8a4613a49f7 100644
--- a/dev/tasks/verify-rc/github.linux.yml
+++ b/dev/tasks/verify-rc/github.linux.amd64.yml
@@ -22,7 +22,7 @@
 jobs:
   verify:
     name: "Verify release candidate Ubuntu {{ artifact }}"
-    runs-on: ubuntu-20.04
+    runs-on: {{ github_runner|default("ubuntu-20.04") }}
     {% if env is defined %}
     env:
     {% for key, value in env.items() %}
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.amd64.yml
similarity index 96%
rename from dev/tasks/verify-rc/github.macos.yml
rename to dev/tasks/verify-rc/github.macos.amd64.yml
index ab0c6563bdc..d39cda38203 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.amd64.yml
@@ -22,7 +22,7 @@
 jobs:
   verify:
     name: "Verify release candidate macOS {{ artifact }}"
-    runs-on: macos-latest
+    runs-on: {{ github_runner|default("macos-latest") }}
     {% if env is defined %}
     env:
     {% for key, value in env.items() %}
diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml
new file mode 100644
index 00000000000..26139ed6026
--- /dev/null
+++ b/dev/tasks/verify-rc/github.macos.arm64.yml
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  verify:
+    name: "Verify release candidate macOS {{ artifact }}"
+    runs-on: {{ github_runner }}
+    {% if env is defined %}
+    env:
+    {% for key, value in env.items() %}
+      {{ key }}: {{ value }}
+    {% endfor %}
+    {% endif %}
+
+    steps:
+      - name: Cleanup
+        shell: bash
+        run: rm -rf arrow
+
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Run verification
+        shell: bash
+        run: |
+          export PATH="$(brew --prefix node@14)/bin:$PATH"
+          export PATH="$(brew --prefix ruby)/bin:$PATH"
+          export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig"
+          arch -{{ arch_emulation|default("arm64") }} arrow/dev/release/verify-release-candidate.sh \
+            {{ artifact }} \
+            {{ release|default("1.0.0") }} {{ rc|default("0") }}
diff --git a/docker-compose.yml b/docker-compose.yml
index 84ce4dac1ba..4290578b9bd 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -792,7 +792,10 @@ services:
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
-    command: /arrow/ci/scripts/python_wheel_manylinux_test.sh imports
+    environment:
+      CHECK_IMPORTS: "ON"
+      CHECK_UNITTESTS: "OFF"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-manylinux-test-unittests:
     image: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-test
@@ -807,7 +810,10 @@ services:
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
-    command: /arrow/ci/scripts/python_wheel_manylinux_test.sh unittests
+    environment:
+      CHECK_IMPORTS: "OFF"
+      CHECK_UNITTESTS: "ON"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
   python-wheel-windows-vs2017:
     # The windows images must be built locally and pushed to a remote registry:

From af02cdc9ebb5397eb048d39c6c443edadbf32943 Mon Sep 17 00:00:00 2001
From: Andrey Klochkov <aklochkov@liftoff.io>
Date: Tue, 27 Jul 2021 16:35:52 +0200
Subject: [PATCH 652/719] ARROW-11518: [C++][Parquet] Fix buffer allocation
 when reading/skipping boolean columns

Parquet reader crashes while reading boolean columns due to incorrect calculation of buffer size in TypedColumnReaderImpl::Skip. This change fixes the buffer size calculation to accommodate levels data.

Closes #9432 from diggerk/arrow-11518

Lead-authored-by: Andrey Klochkov <aklochkov@liftoff.io>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Andrey Klochkov <diggerk@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/column_reader.cc       | 3 ++-
 cpp/src/parquet/column_scanner_test.cc | 6 ------
 cpp/src/parquet/test_util.h            | 8 +++++++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 047d99fed9a..d56f163b58a 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1089,8 +1089,9 @@ int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) {
 
       // This will be enough scratch space to accommodate 16-bit levels or any
       // value type
+      int value_size = type_traits<DType::type_num>::value_byte_size;
       std::shared_ptr<ResizableBuffer> scratch = AllocateBuffer(
-          this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size);
+          this->pool_, batch_size * std::max<int>(sizeof(int16_t), value_size));
 
       do {
         batch_size = std::min(batch_size, rows_to_skip);
diff --git a/cpp/src/parquet/column_scanner_test.cc b/cpp/src/parquet/column_scanner_test.cc
index ea54319babe..f6d162e3db7 100644
--- a/cpp/src/parquet/column_scanner_test.cc
+++ b/cpp/src/parquet/column_scanner_test.cc
@@ -39,12 +39,6 @@ using schema::NodePtr;
 
 namespace test {
 
-template <>
-void InitDictValues<bool>(int num_values, int dict_per_page, std::vector<bool>& values,
-                          std::vector<uint8_t>& buffer) {
-  // No op for bool
-}
-
 template <typename Type>
 class TestFlatScanner : public ::testing::Test {
  public:
diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h
index b41230827c8..d4e6de8251c 100644
--- a/cpp/src/parquet/test_util.h
+++ b/cpp/src/parquet/test_util.h
@@ -179,6 +179,12 @@ static void InitDictValues(int num_values, int num_dicts, std::vector<T>& values
   }
 }
 
+template <>
+inline void InitDictValues<bool>(int num_values, int num_dicts, std::vector<bool>& values,
+                                 std::vector<uint8_t>& buffer) {
+  // No op for bool
+}
+
 class MockPageReader : public PageReader {
  public:
   explicit MockPageReader(const std::vector<std::shared_ptr<Page>>& pages)
@@ -563,7 +569,7 @@ template <>
 void inline InitValues<bool>(int num_values, std::vector<bool>& values,
                              std::vector<uint8_t>& buffer) {
   values = {};
-  ::arrow::random_is_valid(num_values, 1., &values,
+  ::arrow::random_is_valid(num_values, 0.5, &values,
                            static_cast<int>(::arrow::random_seed()));
 }
 

From 9e8b1cb8019b76c70a79c5dab5843fbbdcc6a04f Mon Sep 17 00:00:00 2001
From: rvernica <rvernica@gmail.com>
Date: Tue, 27 Jul 2021 16:45:15 +0200
Subject: [PATCH 653/719] ARROW-13458  [C++][Docs] Typo in RecordBatch::schema

Fix typo in docs for RecordBatch::schema

Closes #10811 from rvernica/patch-4

Authored-by: rvernica <rvernica@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/record_batch.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 3dc1f54a083..96683bfade8 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -85,8 +85,7 @@ class ARROW_EXPORT RecordBatch {
   /// \brief Determine if two record batches are approximately equal
   bool ApproxEquals(const RecordBatch& other) const;
 
-  // \return the table's schema
-  /// \return true if batches are equal
+  /// \return the record batch's schema
   const std::shared_ptr<Schema>& schema() const { return schema_; }
 
   /// \brief Retrieve all columns at once

From 31b60f3a2319cc628fa8c692d441f94fb32322fe Mon Sep 17 00:00:00 2001
From: rvernica <rvernica@gmail.com>
Date: Tue, 27 Jul 2021 16:47:15 +0200
Subject: [PATCH 654/719] ARROW-13455  [C++][Docs] Typo in
 RecordBatch::SetColumn

Fix typo in SetColumn docs

Closes #10810 from rvernica/patch-3

Authored-by: rvernica <rvernica@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/record_batch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 96683bfade8..92ffa8b87fb 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -129,7 +129,7 @@ class ARROW_EXPORT RecordBatch {
   virtual Result<std::shared_ptr<RecordBatch>> AddColumn(
       int i, std::string field_name, const std::shared_ptr<Array>& column) const;
 
-  /// \brief Replace a column in the table, producing a new Table
+  /// \brief Replace a column in the record batch, producing a new RecordBatch
   virtual Result<std::shared_ptr<RecordBatch>> SetColumn(
       int i, const std::shared_ptr<Field>& field,
       const std::shared_ptr<Array>& column) const = 0;

From 5b56cb57ec22cd6c136959a5c85381031bcc3344 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 27 Jul 2021 17:56:18 +0200
Subject: [PATCH 655/719] ARROW-13461: [Python][Packaging] Build M1 wheels for
 python 3.8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10812 from kszucs/apple-silicon-py38

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 ci/scripts/install_python.sh                 |  2 +-
 dev/tasks/python-wheels/github.osx.arm64.yml |  6 ++---
 dev/tasks/tasks.yml                          | 28 +++++++++++++++-----
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
index bede67216a2..088da817676 100755
--- a/ci/scripts/install_python.sh
+++ b/ci/scripts/install_python.sh
@@ -27,7 +27,7 @@ platforms=([windows]=Windows
 declare -A versions
 versions=([3.6]=3.6.8
           [3.7]=3.7.9
-          [3.8]=3.8.9
+          [3.8]=3.8.10
           [3.9]=3.9.6)
 
 if [ "$#" -ne 2 ]; then
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
index 516d921ac5f..1000510150f 100644
--- a/dev/tasks/python-wheels/github.osx.arm64.yml
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -139,14 +139,14 @@ jobs:
         run: |
           $PYTHON -m virtualenv crossbow-env
           source crossbow-env/bin/activate
-          arch -arm64 pip install -e arrow/dev/archery[crossbow-upload]
-          arch -arm64 archery crossbow \
+          arch -x86_64 pip install -e arrow/dev/archery[crossbow-upload]
+          arch -x86_64 archery crossbow \
           --queue-path $(pwd) \
           --queue-remote {{ queue_remote_url }} \
           upload-artifacts \
           --sha {{ task.branch }} \
           --tag {{ task.tag }} \
-          "arrow/python/dist/*.whl"
+          "arrow/python/repaired_wheels/*.whl"
         env:
           CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 7dc4af8884a..74aaff24be5 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -382,27 +382,43 @@ tasks:
 
 {############################## Wheel OSX M1 #################################}
 
-  wheel-macos-big-sur-cp39-arm64:
+  # The python 3.8 universal2 installer has been built with macos deployment
+  # target 11.0, so we cannot build binaries with earlier deployment target
+  # otherwise distutils will raise a deployment target version mismatch error.
+  wheel-macos-big-sur-py38-arm64:
     ci: github
     template: python-wheels/github.osx.arm64.yml
     params:
       arch: arm64
       vcpkg_version: "2021.04.30"
-      python_version: "3.9"
+      python_version: "3.8"
       macos_deployment_target: "11.0"
     artifacts:
-      - pyarrow-{no_rc_version}-cp39-cp39-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-cp38-cp38-macosx_11_0_arm64.whl
 
-  wheel-macos-big-sur-cp39-universal2:
+{% for python_version, python_tag in [("3.9", "cp39")] %}
+  wheel-macos-big-sur-{{ python_tag }}-arm64:
+    ci: github
+    template: python-wheels/github.osx.arm64.yml
+    params:
+      arch: arm64
+      vcpkg_version: "2021.04.30"
+      python_version: "{{ python_version }}"
+      macos_deployment_target: "11.0"
+    artifacts:
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+
+  wheel-macos-big-sur-{{ python_tag }}-universal2:
     ci: github
     template: python-wheels/github.osx.arm64.yml
     params:
       arch: universal2
       vcpkg_version: "2021.04.30"
-      python_version: "3.9"
+      python_version: "{{ python_version }}"
       macos_deployment_target: "10.13"
     artifacts:
-      - pyarrow-{no_rc_version}-cp39-cp39-macosx_10_13_universal2.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_10_13_universal2.whl
+{% endfor %}
 
 {############################ Python sdist ####################################}
 

From 1f1d94be8d5a83347985707070f4c6762a255204 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Tue, 27 Jul 2021 18:45:17 +0200
Subject: [PATCH 656/719] ARROW-13463: [Release][Python] Verify python 3.8
 macOS arm64 wheel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10815 from kszucs/verify-m1-cp38-arm64

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
---
 dev/release/verify-release-candidate.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index bf8e51fe8fe..3360b8d2fd5 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -606,7 +606,7 @@ test_macos_wheels() {
   fi
   # apple silicon processor
   if [ "$(uname -m)" = "arm64" ]; then
-    local py_arches="3.9"
+    local py_arches="3.8 3.9"
     local check_flight=OFF
   fi
 
@@ -629,7 +629,7 @@ test_macos_wheels() {
   # the interpreter should be installed from python.org:
   #   https://www.python.org/ftp/python/3.9.6/python-3.9.6-macosx10.9.pkg
   if [ "$(uname -m)" = "arm64" ]; then
-    for py_arch in ${py_arches}; do
+    for py_arch in "3.9"; do
       local pyver=${py_arch//m/}
       local python="/Library/Frameworks/Python.framework/Versions/${pyver}/bin/python${pyver}"
 

From 551c07cbf019f6b746051f2b4102e8150df54321 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Tue, 27 Jul 2021 11:59:55 -0500
Subject: [PATCH 657/719] ARROW-13453: [R] DuckDB has not yet released 0.2.8

Closes #10807 from jonkeane/ARROW-13453-duckdb-version

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 969cf422b1b..18ab0f99ed1 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -45,7 +45,7 @@ Suggests:
     decor,
     distro,
     dplyr,
-    duckdb (> 0.2.7),
+    duckdb,
     hms,
     knitr,
     lubridate,

From 4c02c6d7ffae6f8580aa9e7f42336cfa561d357e Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 27 Jul 2021 15:57:45 -0400
Subject: [PATCH 658/719] ARROW-13444: [C++] Remove usage of deprecated
 std::result_of

Also clean up some UBSAN nullptr arithmetic warnings

Closes #10814 from bkietz/13444-C20-compatibility-by-upda

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/buffer_builder.h          |  3 +--
 cpp/src/arrow/result.h                  |  7 ++++---
 cpp/src/arrow/testing/util.h            |  2 +-
 cpp/src/arrow/util/bit_block_counter.cc | 18 ++++++------------
 cpp/src/arrow/util/bit_run_reader.h     |  2 +-
 cpp/src/arrow/util/bitmap_generate.h    |  2 +-
 cpp/src/arrow/util/bitmap_reader.h      |  2 +-
 cpp/src/arrow/util/functional.h         |  2 +-
 cpp/src/arrow/util/future.h             | 12 +++++++++++-
 cpp/src/arrow/util/ubsan.h              |  6 +++---
 10 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/buffer_builder.h b/cpp/src/arrow/buffer_builder.h
index c6250ae2b76..eb3f68affc0 100644
--- a/cpp/src/arrow/buffer_builder.h
+++ b/cpp/src/arrow/buffer_builder.h
@@ -45,8 +45,7 @@ class ARROW_EXPORT BufferBuilder {
   explicit BufferBuilder(MemoryPool* pool = default_memory_pool())
       : pool_(pool),
         data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
-              &util::internal::non_null_filler),
-
+              util::MakeNonNull<uint8_t>()),
         capacity_(0),
         size_(0) {}
 
diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h
index cb7437cd242..3ef4f8cc7f7 100644
--- a/cpp/src/arrow/result.h
+++ b/cpp/src/arrow/result.h
@@ -385,7 +385,8 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
   /// Apply a function to the internally stored value to produce a new result or propagate
   /// the stored error.
   template <typename M>
-  typename EnsureResult<typename std::result_of<M && (T)>::type>::type Map(M&& m) && {
+  typename EnsureResult<decltype(std::declval<M&&>()(std::declval<T&&>()))>::type Map(
+      M&& m) && {
     if (!ok()) {
       return status();
     }
@@ -395,8 +396,8 @@ class ARROW_MUST_USE_TYPE Result : public util::EqualityComparable<Result<T>> {
   /// Apply a function to the internally stored value to produce a new result or propagate
   /// the stored error.
   template <typename M>
-  typename EnsureResult<typename std::result_of<M && (const T&)>::type>::type Map(
-      M&& m) const& {
+  typename EnsureResult<decltype(std::declval<M&&>()(std::declval<const T&>()))>::type
+  Map(M&& m) const& {
     if (!ok()) {
       return status();
     }
diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h
index 99b438db9c7..05fb8c68e3f 100644
--- a/cpp/src/arrow/testing/util.h
+++ b/cpp/src/arrow/testing/util.h
@@ -123,7 +123,7 @@ struct VisitBuilderImpl {
   template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType,
             // need to let SFINAE drop this Visit when it would result in
             // [](NullBuilder*){}(double_builder)
-            typename E = typename std::result_of<Fn(BuilderType*)>::type>
+            typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))>
   Status Visit(const T&) {
     fn_(internal::checked_cast<BuilderType*>(builder_));
     return Status::OK();
diff --git a/cpp/src/arrow/util/bit_block_counter.cc b/cpp/src/arrow/util/bit_block_counter.cc
index c67cedc4a06..7b5590f1797 100644
--- a/cpp/src/arrow/util/bit_block_counter.cc
+++ b/cpp/src/arrow/util/bit_block_counter.cc
@@ -37,19 +37,12 @@ BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) noexcept {
   return {run_length, popcount};
 }
 
-// Prevent pointer arithmetic on nullptr, which is undefined behavior even if the pointer
-// is never dereferenced.
-inline const uint8_t* EnsureNotNull(const uint8_t* ptr) {
-  static const uint8_t byte{};
-  return ptr == nullptr ? &byte : ptr;
-}
-
 OptionalBitBlockCounter::OptionalBitBlockCounter(const uint8_t* validity_bitmap,
                                                  int64_t offset, int64_t length)
     : has_bitmap_(validity_bitmap != nullptr),
       position_(0),
       length_(length),
-      counter_(EnsureNotNull(validity_bitmap), offset, length) {}
+      counter_(util::MakeNonNull(validity_bitmap), offset, length) {}
 
 OptionalBitBlockCounter::OptionalBitBlockCounter(
     const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset, int64_t length)
@@ -64,10 +57,11 @@ OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(const uint8_t* left
     : has_bitmap_(HasBitmapFromBitmaps(left_bitmap != nullptr, right_bitmap != nullptr)),
       position_(0),
       length_(length),
-      unary_counter_(EnsureNotNull(left_bitmap != nullptr ? left_bitmap : right_bitmap),
-                     left_bitmap != nullptr ? left_offset : right_offset, length),
-      binary_counter_(EnsureNotNull(left_bitmap), left_offset,
-                      EnsureNotNull(right_bitmap), right_offset, length) {}
+      unary_counter_(
+          util::MakeNonNull(left_bitmap != nullptr ? left_bitmap : right_bitmap),
+          left_bitmap != nullptr ? left_offset : right_offset, length),
+      binary_counter_(util::MakeNonNull(left_bitmap), left_offset,
+                      util::MakeNonNull(right_bitmap), right_offset, length) {}
 
 OptionalBinaryBitBlockCounter::OptionalBinaryBitBlockCounter(
     const std::shared_ptr<Buffer>& left_bitmap, int64_t left_offset,
diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h
index 3e196628477..ed9f4fa867a 100644
--- a/cpp/src/arrow/util/bit_run_reader.h
+++ b/cpp/src/arrow/util/bit_run_reader.h
@@ -197,7 +197,7 @@ class BaseSetBitRunReader {
   /// \param[in] length number of bits to copy
   ARROW_NOINLINE
   BaseSetBitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
-      : bitmap_(bitmap),
+      : bitmap_(util::MakeNonNull(bitmap)),
         length_(length),
         remaining_(length_),
         current_word_(0),
diff --git a/cpp/src/arrow/util/bitmap_generate.h b/cpp/src/arrow/util/bitmap_generate.h
index 129fa913231..6b900f246fa 100644
--- a/cpp/src/arrow/util/bitmap_generate.h
+++ b/cpp/src/arrow/util/bitmap_generate.h
@@ -62,7 +62,7 @@ void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generat
 template <class Generator>
 void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
                           Generator&& g) {
-  static_assert(std::is_same<typename std::result_of<Generator && ()>::type, bool>::value,
+  static_assert(std::is_same<decltype(std::declval<Generator>()()), bool>::value,
                 "Functor passed to GenerateBitsUnrolled must return bool");
 
   if (length == 0) {
diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h
index 7c43747fafb..b05c7226ddb 100644
--- a/cpp/src/arrow/util/bitmap_reader.h
+++ b/cpp/src/arrow/util/bitmap_reader.h
@@ -76,7 +76,7 @@ class BitmapReader {
 class BitmapUInt64Reader {
  public:
   BitmapUInt64Reader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
-      : bitmap_(bitmap + start_offset / 8),
+      : bitmap_(util::MakeNonNull(bitmap) + start_offset / 8),
         num_carry_bits_(8 - start_offset % 8),
         length_(length),
         remaining_length_(length_) {
diff --git a/cpp/src/arrow/util/functional.h b/cpp/src/arrow/util/functional.h
index 9da79046fec..41e268852fa 100644
--- a/cpp/src/arrow/util/functional.h
+++ b/cpp/src/arrow/util/functional.h
@@ -129,7 +129,7 @@ class FnOnce<R(A...)> {
 
   template <typename Fn,
             typename = typename std::enable_if<std::is_convertible<
-                typename std::result_of<Fn && (A...)>::type, R>::value>::type>
+                decltype(std::declval<Fn&&>()(std::declval<A>()...)), R>::value>::type>
   FnOnce(Fn fn) : impl_(new FnImpl<Fn>(std::move(fn))) {  // NOLINT runtime/explicit
   }
 
diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h
index d9e0a939f25..6c194cab2ac 100644
--- a/cpp/src/arrow/util/future.h
+++ b/cpp/src/arrow/util/future.h
@@ -28,6 +28,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/functional.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/optional.h"
@@ -47,8 +48,17 @@ struct is_future : std::false_type {};
 template <typename T>
 struct is_future<Future<T>> : std::true_type {};
 
+template <typename Signature, typename Enable = void>
+struct result_of;
+
+template <typename Fn, typename... A>
+struct result_of<Fn(A...),
+                 internal::void_t<decltype(std::declval<Fn>()(std::declval<A>()...))>> {
+  using type = decltype(std::declval<Fn>()(std::declval<A>()...));
+};
+
 template <typename Signature>
-using result_of_t = typename std::result_of<Signature>::type;
+using result_of_t = typename result_of<Signature>::type;
 
 // Helper to find the synchronous counterpart for a Future
 template <typename T>
diff --git a/cpp/src/arrow/util/ubsan.h b/cpp/src/arrow/util/ubsan.h
index 2d4b513894b..77c3cb8e5ac 100644
--- a/cpp/src/arrow/util/ubsan.h
+++ b/cpp/src/arrow/util/ubsan.h
@@ -30,7 +30,7 @@ namespace util {
 
 namespace internal {
 
-static uint8_t non_null_filler;
+constexpr uint8_t kNonNullFiller = 0;
 
 }  // namespace internal
 
@@ -44,12 +44,12 @@ static uint8_t non_null_filler;
 /// https://github.com/google/flatbuffers/pull/5355 is trying to resolve
 /// them.
 template <typename T>
-inline T* MakeNonNull(T* maybe_null) {
+inline T* MakeNonNull(T* maybe_null = NULLPTR) {
   if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) {
     return maybe_null;
   }
 
-  return reinterpret_cast<T*>(&internal::non_null_filler);
+  return const_cast<T*>(reinterpret_cast<const T*>(&internal::kNonNullFiller));
 }
 
 template <typename T>

From e5d8ead78633c506bf53efe34290cfeab0f9efb4 Mon Sep 17 00:00:00 2001
From: "Jorge C. Leitao" <jorgecarleitao@gmail.com>
Date: Wed, 28 Jul 2021 05:29:06 +0900
Subject: [PATCH 659/719] ARROW-13468: [Release] Fix binary download/upload
 failures

These are two minor fixes to the release script.

1. Made git not fetch the 15k tags on crossbow and instead. This was causing timeouts
2. pass the `ARTIFACTORY_API_KEY` to the docker image so that it can use it in pushing artifacts

Closes #10366 from jorgecarleitao/fix_artifactory

Lead-authored-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
Co-authored-by: Jorge Leitao <jorgecarleitao@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/04-binary-download.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/release/04-binary-download.sh b/dev/release/04-binary-download.sh
index e2b97ac6a0c..64d137357f1 100755
--- a/dev/release/04-binary-download.sh
+++ b/dev/release/04-binary-download.sh
@@ -36,4 +36,4 @@ crossbow_job_prefix="release-${version_with_rc}"
 : ${CROSSBOW_JOB_NUMBER:="0"}
 : ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"}
 
-archery crossbow download-artifacts ${CROSSBOW_JOB_ID}
+archery crossbow download-artifacts ${CROSSBOW_JOB_ID} --no-fetch

From 1f481d91fc24151476ba5bec3c9d3594ae27defa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Wed, 28 Jul 2021 19:07:50 +0200
Subject: [PATCH 660/719] [Release][Minor] Bump development versions to
 6.0.0-SNAPSHOT (#10821)

* [Release] Update versions for 6.0.0-SNAPSHOT

* [Release] Update .deb package names for 6.0.0
---
 c_glib/meson.build                            |   2 +-
 ci/scripts/PKGBUILD                           |   2 +-
 cpp/CMakeLists.txt                            |   2 +-
 cpp/vcpkg.json                                |   2 +-
 csharp/Directory.Build.props                  |   2 +-
 dev/release/rat_exclude_files.txt             |  32 ++---
 dev/tasks/homebrew-formulae/apache-arrow.rb   |   2 +-
 .../autobrew/apache-arrow.rb                  |   2 +-
 .../apache-arrow/debian/control.in            | 110 +++++++++---------
 ....install => libarrow-cuda-glib600.install} |   0
 ...da500.install => libarrow-cuda600.install} |   0
 ...stall => libarrow-dataset-glib600.install} |   0
 ...00.install => libarrow-dataset600.install} |   0
 ...nstall => libarrow-flight-glib600.install} |   0
 ...500.install => libarrow-flight600.install} |   0
 ...ib500.install => libarrow-glib600.install} |   0
 ...tall => libarrow-python-flight600.install} |   0
 ...500.install => libarrow-python600.install} |   0
 ...ibarrow500.install => libarrow600.install} |   0
 ...500.install => libgandiva-glib600.install} |   0
 ...ndiva500.install => libgandiva600.install} |   0
 ...500.install => libparquet-glib600.install} |   0
 ...rquet500.install => libparquet600.install} |   0
 ...b500.install => libplasma-glib600.install} |   0
 ...plasma500.install => libplasma600.install} |   0
 dev/tasks/tasks.yml                           |  64 +++++-----
 java/adapter/avro/pom.xml                     |   2 +-
 java/adapter/jdbc/pom.xml                     |   2 +-
 java/adapter/orc/pom.xml                      |   2 +-
 java/algorithm/pom.xml                        |   2 +-
 java/compression/pom.xml                      |   2 +-
 java/dataset/pom.xml                          |   2 +-
 java/flight/flight-core/pom.xml               |   2 +-
 java/flight/flight-grpc/pom.xml               |   2 +-
 java/format/pom.xml                           |   2 +-
 java/gandiva/pom.xml                          |   2 +-
 java/memory/memory-core/pom.xml               |   2 +-
 java/memory/memory-netty/pom.xml              |   2 +-
 java/memory/memory-unsafe/pom.xml             |   2 +-
 java/memory/pom.xml                           |   2 +-
 java/performance/pom.xml                      |   4 +-
 java/plasma/pom.xml                           |   2 +-
 java/pom.xml                                  |   2 +-
 java/tools/pom.xml                            |   2 +-
 java/vector/pom.xml                           |   2 +-
 js/package.json                               |   2 +-
 matlab/CMakeLists.txt                         |   2 +-
 python/setup.py                               |   2 +-
 r/DESCRIPTION                                 |   2 +-
 r/NEWS.md                                     |   2 +
 ruby/red-arrow-cuda/lib/arrow-cuda/version.rb |   2 +-
 .../lib/arrow-dataset/version.rb              |   2 +-
 .../lib/arrow-flight/version.rb               |   2 +-
 ruby/red-arrow/lib/arrow/version.rb           |   2 +-
 ruby/red-gandiva/lib/gandiva/version.rb       |   2 +-
 ruby/red-parquet/lib/parquet/version.rb       |   2 +-
 ruby/red-plasma/lib/plasma/version.rb         |   2 +-
 57 files changed, 143 insertions(+), 141 deletions(-)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda-glib500.install => libarrow-cuda-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda500.install => libarrow-cuda600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset-glib500.install => libarrow-dataset-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset500.install => libarrow-dataset600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-glib500.install => libarrow-flight-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight500.install => libarrow-flight600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-glib500.install => libarrow-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-python-flight500.install => libarrow-python-flight600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-python500.install => libarrow-python600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow500.install => libarrow600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva-glib500.install => libgandiva-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva500.install => libgandiva600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet-glib500.install => libparquet-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet500.install => libparquet600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libplasma-glib500.install => libplasma-glib600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libplasma500.install => libplasma600.install} (100%)

diff --git a/c_glib/meson.build b/c_glib/meson.build
index 7c453af9e33..0e090c97968 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -23,7 +23,7 @@ project('arrow-glib', 'c', 'cpp',
           'cpp_std=c++11',
         ])
 
-version = '5.0.0-SNAPSHOT'
+version = '6.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index f746c4a81a5..56d70d83daf 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=4.0.1.9000
+pkgver=5.0.0.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8a358db8b95..78b19ba5930 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -47,7 +47,7 @@ if(POLICY CMP0074)
   cmake_policy(SET CMP0074 NEW)
 endif()
 
-set(ARROW_VERSION "5.0.0-SNAPSHOT")
+set(ARROW_VERSION "6.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index 5f92affa4eb..723f3a46e78 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "5.0.0-SNAPSHOT",
+  "version-string": "6.0.0-SNAPSHOT",
   "dependencies": [
     "abseil",
     {
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index b610a768358..c42ff55a413 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -29,7 +29,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>5.0.0-SNAPSHOT</Version>
+    <Version>6.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index f8ec55f621e..e2aa6285ea5 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -69,51 +69,51 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib500.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight500.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow-python500.install
-dev/tasks/linux-packages/apache-arrow/debian/libarrow500.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
+dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib500.install
-dev/tasks/linux-packages/apache-arrow/debian/libgandiva500.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib500.install
-dev/tasks/linux-packages/apache-arrow/debian/libparquet500.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install
 dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links
-dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib500.install
-dev/tasks/linux-packages/apache-arrow/debian/libplasma500.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
+dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
 dev/tasks/linux-packages/apache-arrow/debian/patches/series
 dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
 dev/tasks/linux-packages/apache-arrow/debian/rules
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index 62ae516fed6..ca3f83174ca 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -1,7 +1,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-5.0.0-SNAPSHOT/apache-arrow-5.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.0-SNAPSHOT/apache-arrow-6.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git"
diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
index e17ff8cfc82..1d257e6c778 100644
--- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
@@ -19,7 +19,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-4.0.1.9000/apache-arrow-4.0.1.9000.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-5.0.0.9000/apache-arrow-5.0.0.9000.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   head "https://github.com/apache/arrow.git"
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 2d56463139f..b6d849719a1 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -38,7 +38,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
 Homepage: https://arrow.apache.org/
 
-Package: libarrow500
+Package: libarrow600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -50,7 +50,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files.
 
-Package: libarrow-cuda500
+Package: libarrow-cuda600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -58,12 +58,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow500 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-dataset500
+Package: libarrow-dataset600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -71,13 +71,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow500 (= ${binary:Version}),
-  libparquet500 (= ${binary:Version})
+  libarrow600 (= ${binary:Version}),
+  libparquet600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Dataset module.
 
-Package: libarrow-flight500
+Package: libarrow-flight600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -85,12 +85,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow500 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-python500
+Package: libarrow-python600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -98,14 +98,14 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow500 (= ${binary:Version}),
+  libarrow600 (= ${binary:Version}),
   python3,
   python3-numpy
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Python support.
 
-Package: libarrow-python-flight500
+Package: libarrow-python-flight600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -113,8 +113,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight500 (= ${binary:Version}),
-  libarrow-python500 (= ${binary:Version})
+  libarrow-flight600 (= ${binary:Version}),
+  libarrow-python600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight and Python support.
@@ -125,7 +125,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow500 (= ${binary:Version}),
+  libarrow600 (= ${binary:Version}),
   libbrotli-dev,
   libbz2-dev,
 @USE_SYSTEM_C_ARES@  libc-ares-dev,
@@ -149,7 +149,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda500 (= ${binary:Version})
+  libarrow-cuda600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -161,7 +161,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-dataset500 (= ${binary:Version}),
+  libarrow-dataset600 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -174,7 +174,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight500 (= ${binary:Version})
+  libarrow-flight600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight RPC system.
@@ -186,7 +186,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-python500 (= ${binary:Version})
+  libarrow-python600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Python support.
@@ -199,12 +199,12 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-python-dev (= ${binary:Version}),
-  libarrow-python-flight500 (= ${binary:Version})
+  libarrow-python-flight600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight and Python support.
 
-Package: libgandiva500
+Package: libgandiva600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -212,7 +212,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow500 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -225,13 +225,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva500 (= ${binary:Version})
+  libgandiva600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libplasma500
+Package: libplasma600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -239,7 +239,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda500 (= ${binary:Version})
+  libarrow-cuda600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ library files to connect plasma-store-server.
@@ -251,7 +251,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libplasma500 (= ${binary:Version})
+  libplasma600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides plasma-store-server.
@@ -263,12 +263,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
-  libplasma500 (= ${binary:Version})
+  libplasma600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ header files.
 
-Package: libparquet500
+Package: libparquet600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -287,12 +287,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet500 (= ${binary:Version})
+  libparquet600 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib500
+Package: libarrow-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -300,7 +300,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow500 (= ${binary:Version})
+  libarrow600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -324,7 +324,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib500 (= ${binary:Version}),
+  libarrow-glib600 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -342,7 +342,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib500
+Package: libarrow-cuda-glib600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -350,8 +350,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib500 (= ${binary:Version}),
-  libarrow-cuda500 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libarrow-cuda600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -375,13 +375,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib500 (= ${binary:Version}),
+  libarrow-cuda-glib600 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libarrow-dataset-glib500
+Package: libarrow-dataset-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -389,8 +389,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib500 (= ${binary:Version}),
-  libarrow-dataset500 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libarrow-dataset600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for dataset module.
@@ -414,7 +414,7 @@ Depends:
   ${misc:Depends},
   libarrow-dataset-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-dataset-glib500 (= ${binary:Version}),
+  libarrow-dataset-glib600 (= ${binary:Version}),
   gir1.2-arrow-dataset-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -431,7 +431,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
-Package: libarrow-flight-glib500
+Package: libarrow-flight-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -439,8 +439,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib500 (= ${binary:Version}),
-  libarrow-flight500 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libarrow-flight600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight.
@@ -465,7 +465,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-flight-glib500 (= ${binary:Version}),
+  libarrow-flight-glib600 (= ${binary:Version}),
   gir1.2-arrow-flight-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -482,7 +482,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight.
 
-Package: libgandiva-glib500
+Package: libgandiva-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -490,8 +490,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib500 (= ${binary:Version}),
-  libgandiva500 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libgandiva600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -517,7 +517,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib500 (= ${binary:Version}),
+  libgandiva-glib600 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -536,7 +536,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libplasma-glib500
+Package: libplasma-glib600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -544,8 +544,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda-glib500 (= ${binary:Version}),
-  libplasma500 (= ${binary:Version})
+  libarrow-cuda-glib600 (= ${binary:Version}),
+  libplasma600 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides GLib based library files to connect plasma-store-server.
@@ -569,7 +569,7 @@ Depends:
   ${misc:Depends},
   libplasma-dev (= ${binary:Version}),
   libarrow-cuda-glib-dev (= ${binary:Version}),
-  libplasma-glib500 (= ${binary:Version}),
+  libplasma-glib600 (= ${binary:Version}),
   gir1.2-plasma-1.0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
@@ -586,7 +586,7 @@ Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides documentations.
 
-Package: libparquet-glib500
+Package: libparquet-glib600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -594,8 +594,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib500 (= ${binary:Version}),
-  libparquet500 (= ${binary:Version})
+  libarrow-glib600 (= ${binary:Version}),
+  libparquet600 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -619,7 +619,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib500 (= ${binary:Version}),
+  libparquet-glib600 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-python500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-python600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva500.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet500.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib500.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma500.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma600.install
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 74aaff24be5..1246cea400a 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -476,60 +476,60 @@ tasks:
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python-flight500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python-flight500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-python500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-python500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python-flight600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python-flight600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-python600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-python600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow600_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva600_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet600_{no_rc_version}-1_[a-z0-9]+.deb
     {% if architecture == "amd64" %}
       - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda600_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma-glib500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma-glib500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libplasma500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libplasma500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma-glib600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma-glib600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libplasma600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libplasma600_{no_rc_version}-1_[a-z0-9]+.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
       - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb
     {% endif %}
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index 9a424d01200..96952454e3b 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -16,7 +16,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index d080aedd532..76a5f8fba8e 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>5.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 6e30d3f0209..9d50107b6dd 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -87,7 +87,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>5.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 7d140d95e2e..cb504c73b6a 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-algorithm</artifactId>
   <name>Arrow Algorithms</name>
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index c8fc4efc9c4..652d9e692ec 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-compression</artifactId>
   <name>Arrow Compression</name>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index d4fea9f0efe..a71b36210f6 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>5.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 00ab5141f70..93232f99156 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml
index 1c9b66e021a..48ed5000d75 100644
--- a/java/flight/flight-grpc/pom.xml
+++ b/java/flight/flight-grpc/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-java-root</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 5c9fc4217fe..aeb5d86dd20 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 <parent>
   <artifactId>arrow-java-root</artifactId>
   <groupId>org.apache.arrow</groupId>
-  <version>5.0.0-SNAPSHOT</version>
+  <version>6.0.0-SNAPSHOT</version>
 </parent>
 
 <artifactId>arrow-format</artifactId>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 66869ccc18a..3b13f809829 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -14,7 +14,7 @@
     <parent>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-java-root</artifactId>
-      <version>5.0.0-SNAPSHOT</version>
+      <version>6.0.0-SNAPSHOT</version>
     </parent>
 
     <groupId>org.apache.arrow.gandiva</groupId>
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index dc4c2703306..65abe8e0152 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index 0ea36359c8d..b5f256fb102 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml
index 12d6d3a8ce2..d5ceb2cde57 100644
--- a/java/memory/memory-unsafe/pom.xml
+++ b/java/memory/memory-unsafe/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 3882805759f..a7520a76edb 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <name>Arrow Memory</name>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index d6a0b950d4a..d41df57876b 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>5.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-performance</artifactId>
     <packaging>jar</packaging>
@@ -86,7 +86,7 @@
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-algorithm</artifactId>
-            <version>5.0.0-SNAPSHOT</version>
+            <version>6.0.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml
index ab19e3ff7f9..57609fc70f2 100644
--- a/java/plasma/pom.xml
+++ b/java/plasma/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>5.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-plasma</artifactId>
     <name>Arrow Plasma Client</name>
diff --git a/java/pom.xml b/java/pom.xml
index c9fc2c331e6..8752abe1fc4 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>5.0.0-SNAPSHOT</version>
+  <version>6.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index fafa6fa34e2..aed13379c50 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>5.0.0-SNAPSHOT</version>
+        <version>6.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-tools</artifactId>
     <name>Arrow Tools</name>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 2a610d177a2..1336f0debe2 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>5.0.0-SNAPSHOT</version>
+    <version>6.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
diff --git a/js/package.json b/js/package.json
index 45e6487b0d8..efc5f0ff253 100644
--- a/js/package.json
+++ b/js/package.json
@@ -101,5 +101,5 @@
   "engines": {
     "node": ">=12.0"
   },
-  "version": "5.0.0-SNAPSHOT"
+  "version": "6.0.0-SNAPSHOT"
 }
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 18c1237a491..3c3b873ef37 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -183,7 +183,7 @@ endmacro()
 
 set(CMAKE_CXX_STANDARD 11)
 
-set(MLARROW_VERSION "5.0.0-SNAPSHOT")
+set(MLARROW_VERSION "6.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
diff --git a/python/setup.py b/python/setup.py
index 80b6d70dd08..e0aff1aef2d 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -522,7 +522,7 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
 
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
-default_version = '5.0.0-SNAPSHOT'
+default_version = '6.0.0-SNAPSHOT'
 if (not os.path.exists('../.git') and
         not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
     os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 18ab0f99ed1..a0c4b61b7a0 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 4.0.1.9000
+Version: 5.0.0.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 5ec8492a9ca..a2b3f39c18a 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,6 +17,8 @@
   under the License.
 -->
 
+# arrow 5.0.0.9000
+
 # arrow 4.0.1.9000
 
 ## More dplyr
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index 1853aa7cf9b..dbaf09a029f 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
index 390ce96fa2d..56e57651f96 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowDataset
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
index c18cde8aee4..75e8b2e522a 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlight
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index 0c88191f662..6979bc80fc5 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index 1b45d165bb9..2b38fb777d0 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index 8d9d134b33c..f803eb2be2e 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb
index 23a4babd91c..80324291755 100644
--- a/ruby/red-plasma/lib/plasma/version.rb
+++ b/ruby/red-plasma/lib/plasma/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Plasma
-  VERSION = "5.0.0-SNAPSHOT"
+  VERSION = "6.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")

From 52a902cd4759f3b633ab34bcdf64f98274dc2096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 29 Jul 2021 05:42:54 +0900
Subject: [PATCH 661/719] ARROW-13477: [Release] Pass ARTIFACTORY_API_KEY to
 the upload script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10820 from kszucs/post-binary-upload

Lead-authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-02-binary.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dev/release/post-02-binary.sh b/dev/release/post-02-binary.sh
index e6aa08301df..dfbbdab85fd 100755
--- a/dev/release/post-02-binary.sh
+++ b/dev/release/post-02-binary.sh
@@ -83,6 +83,7 @@ docker_run \
     --trace \
     "${rake_tasks[@]}" \
     APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \
+    ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \
     ARTIFACTS_DIR="${tmp_dir}/artifacts" \
     RC=${rc} \
     VERSION=${version} \

From 6a50634f4f1aff200bc937beb3d80dbd2ea2c3f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 29 Jul 2021 06:00:06 +0900
Subject: [PATCH 662/719] ARROW-13475: [Release] Don't consider rust tarballs
 when cleaning up old releases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After uploading the source release to apache svn I realized that all of the earlier arrow releases were removed.
Since we ship rust tarballs separately the regex pattern filtered out all previous apache/arrow releases.

I had to revert the svn repository to the previous revision.

Closes #10819 from kszucs/release-upload

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-01-upload.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/release/post-01-upload.sh b/dev/release/post-01-upload.sh
index 4f8053de8c9..56977e79f53 100755
--- a/dev/release/post-01-upload.sh
+++ b/dev/release/post-01-upload.sh
@@ -52,7 +52,7 @@ svn add ${tmp_dir}/release/${release_version}
 echo "Keep only the three most recent versions"
 old_releases=$(
   svn ls ${tmp_dir}/release/ | \
-  grep '^arrow-' | \
+  grep -E '^arrow-[0-9\.]+' | \
   sort --version-sort --reverse | \
   tail -n +4
 )

From 835e2124ff0b571e2d599b6b7ca659ff27ff9f31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 29 Jul 2021 06:12:04 +0900
Subject: [PATCH 663/719] ARROW-13478: [Release] Unnecessary rc-number argument
 for the version bumping post-release script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #10822 from kszucs/post-12-version

Authored-by: Krisztián Szűcs <szucs.krisztian@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/01-prepare-test.rb                | 29 +++++++++++--------
 ...12-version.sh => post-12-bump-versions.sh} | 14 ++++-----
 2 files changed, 24 insertions(+), 19 deletions(-)
 rename dev/release/{post-12-version.sh => post-12-bump-versions.sh} (89%)

diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index dd908ad8d77..1006ce17fb9 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -31,7 +31,6 @@ def setup
       Dir.chdir(@test_git_repository) do
         @tag_name = "apache-arrow-#{@release_version}"
         @release_branch = "testing-release-#{@release_version}-rc0"
-        @script = "dev/release/01-prepare.sh"
         git("checkout", "-b", @release_branch, @current_commit)
         yield
       end
@@ -43,7 +42,7 @@ def omit_on_release_branch
     omit("Not for release branch") if on_release_branch?
   end
 
-  def prepare(script, *targets)
+  def prepare(*targets)
     if targets.last.is_a?(Hash)
       additional_env = targets.pop
     else
@@ -54,7 +53,16 @@ def prepare(script, *targets)
       env["PREPARE_#{target}"] = "1"
     end
     env = env.merge(additional_env)
-    sh(env, script, @release_version, @next_version, "0")
+    sh(env, "dev/release/01-prepare.sh", @release_version, @next_version, "0")
+  end
+
+  def bump_versions(*targets)
+    env = { "BUMP_DEFAULT" => "0" }
+    targets.each do |target|
+      env["BUMP_#{target}"] = "1"
+    end
+    sh(env, "dev/release/post-12-bump-versions.sh", @release_version,
+       @next_version)
   end
 
   def parse_patch(patch)
@@ -82,10 +90,7 @@ def parse_patch(patch)
   def test_linux_packages
     user = "Arrow Developers"
     email = "dev@arrow.apache.org"
-    prepare("dev/release/01-prepare.sh",
-            "LINUX_PACKAGES",
-            "DEBFULLNAME" => user,
-            "DEBEMAIL" => email)
+    prepare("LINUX_PACKAGES", "DEBFULLNAME" => user, "DEBEMAIL" => email)
     changes = parse_patch(git("log", "-n", "1", "-p"))
     sampled_changes = changes.collect do |change|
       {
@@ -127,7 +132,7 @@ def test_linux_packages
 
   def test_version_pre_tag
     omit_on_release_branch
-    prepare("dev/release/01-prepare.sh", "VERSION_PRE_TAG")
+    prepare("VERSION_PRE_TAG")
     assert_equal([
                    {
                      path: "c_glib/meson.build",
@@ -337,10 +342,10 @@ def test_version_pre_tag
 
   def test_version_post_tag
     if on_release_branch?
-      prepare("dev/release/post-12-version.sh", "VERSION_POST_TAG")
+      bump_versions("VERSION_POST_TAG")
     else
-      prepare("dev/release/01-prepare.sh", "VERSION_PRE_TAG")
-      prepare("dev/release/post-12-version.sh", "VERSION_POST_TAG")
+      prepare("VERSION_PRE_TAG")
+      bump_versions("VERSION_POST_TAG")
     end
     assert_equal([
                    {
@@ -539,7 +544,7 @@ def test_version_post_tag
   end
 
   def test_deb_package_names
-    prepare("dev/release/post-12-version.sh", "DEB_PACKAGE_NAMES")
+    bump_versions("DEB_PACKAGE_NAMES")
     changes = parse_patch(git("log", "-n", "1", "-p"))
     sampled_changes = changes.collect do |change|
       first_hunk = change[:hunks][0]
diff --git a/dev/release/post-12-version.sh b/dev/release/post-12-bump-versions.sh
similarity index 89%
rename from dev/release/post-12-version.sh
rename to dev/release/post-12-bump-versions.sh
index 9ca008625d4..8f3bf4f98d3 100755
--- a/dev/release/post-12-version.sh
+++ b/dev/release/post-12-bump-versions.sh
@@ -21,14 +21,14 @@ set -ue
 
 SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-if [ "$#" -ne 3 ]; then
-  echo "Usage: $0 <version> <next_version> <rc-num>"
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <next_version>"
   exit 1
 fi
 
-: ${PREPARE_DEFAULT:=1}
-: ${PREPARE_VERSION_POST_TAG:=${PREPARE_DEFAULT}}
-: ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}}
+: ${BUMP_DEFAULT:=1}
+: ${BUMP_VERSION_POST_TAG:=${BUMP_DEFAULT}}
+: ${BUMP_DEB_PACKAGE_NAMES:=${BUMP_DEFAULT}}
 
 . $SOURCE_DIR/utils-prepare.sh
 
@@ -36,13 +36,13 @@ version=$1
 next_version=$2
 next_version_snapshot="${next_version}-SNAPSHOT"
 
-if [ ${PREPARE_VERSION_POST_TAG} -gt 0 ]; then
+if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then
   echo "Updating versions for ${next_version_snapshot}"
   update_versions "${version}" "${next_version}" "snapshot"
   git commit -m "[Release] Update versions for ${next_version_snapshot}"
 fi
 
-if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then
+if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ]; then
   echo "Updating .deb package names for ${next_version}"
   so_version() {
     local version=$1

From bbcd7de5aaa516b69a7a50ab30c543226706c2a4 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Thu, 29 Jul 2021 10:39:03 +0900
Subject: [PATCH 664/719] MINOR: [R] Bump 5.0.0 version in r/NEWS.md

FYI @kszucs, this was missed in #10821. There's slightly different version bumping logic when going from snapshot to release than when going from release to snapshot: https://github.com/apache/arrow/blob/master/dev/release/utils-prepare.sh#L124-L136

So now that we don't rebase master on the release, when bumping the snapshot version on master after the release, it would be good to run `update_versions` twice, first to the release version, then to the new snapshot version. Or else first cherry-pick the version bump commit from the release branch.

Closes #10824 from nealrichardson/bump-r-news-version

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 r/NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index a2b3f39c18a..2a22681e457 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,7 +19,7 @@
 
 # arrow 5.0.0.9000
 
-# arrow 4.0.1.9000
+# arrow 5.0.0
 
 ## More dplyr
 

From 9b09c6c44bb0c5569fda3770aedac5b3d35029d2 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 29 Jul 2021 10:44:10 +0900
Subject: [PATCH 665/719] ARROW-13484: [Release] Add support for uploading
 Amazon Linux 2 packages

Closes #10827 from kou/release-post-02-binary-amazon-linux

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-02-binary.sh | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/dev/release/post-02-binary.sh b/dev/release/post-02-binary.sh
index dfbbdab85fd..ef09ecf50b3 100755
--- a/dev/release/post-02-binary.sh
+++ b/dev/release/post-02-binary.sh
@@ -45,6 +45,7 @@ fi
 # To deactivate one category, deactivate the category and all of its dependents.
 # To explicitly select one category, set DEPLOY_DEFAULT=0 DEPLOY_X=1.
 : ${DEPLOY_DEFAULT:=1}
+: ${DEPLOY_AMAZON_LINUX:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_CENTOS:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_DEBIAN:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_NUGET:=${DEPLOY_DEFAULT}}
@@ -54,24 +55,28 @@ fi
 rake_tasks=()
 apt_targets=()
 yum_targets=()
-if [ ${DEPLOY_DEBIAN} -gt 0 ]; then
-  rake_tasks+=(apt:release)
-  apt_targets+=(debian)
-fi
-if [ ${DEPLOY_UBUNTU} -gt 0 ]; then
-  rake_tasks+=(apt:release)
-  apt_targets+=(ubuntu)
+if [ ${DEPLOY_AMAZON_LINUX} -gt 0 ]; then
+  rake_tasks+=(yum:release)
+  yum_targets+=(amazon-linux)
 fi
 if [ ${DEPLOY_CENTOS} -gt 0 ]; then
   rake_tasks+=(yum:release)
   yum_targets+=(centos)
 fi
+if [ ${DEPLOY_DEBIAN} -gt 0 ]; then
+  rake_tasks+=(apt:release)
+  apt_targets+=(debian)
+fi
 if [ ${DEPLOY_NUGET} -gt 0 ]; then
   rake_tasks+=(nuget:release)
 fi
 if [ ${DEPLOY_PYTHON} -gt 0 ]; then
   rake_tasks+=(python:release)
 fi
+if [ ${DEPLOY_UBUNTU} -gt 0 ]; then
+  rake_tasks+=(apt:release)
+  apt_targets+=(ubuntu)
+fi
 rake_tasks+=(summary:release)
 
 tmp_dir=binary/tmp

From 0559a87e45283204c8e6e795de2f9659ff5bbe0d Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 29 Jul 2021 15:01:25 -0400
Subject: [PATCH 666/719] ARROW-13490: [R] [CI] Need to gate duckdb examples on
 duckdb version

Closes #10831 from jonkeane/ARROW-13490-duckdb-gating

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/duckdb.R       | 2 +-
 r/man/to_duckdb.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index adeda7fe5d5..0a224617983 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -40,7 +40,7 @@
 #'
 #' @name to_duckdb
 #' @export
-#' @examplesIf arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && requireNamespace("dplyr", quietly = TRUE)
+#' @examplesIf arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)
 #' library(dplyr)
 #'
 #' ds <- InMemoryDataset$create(mtcars)
diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd
index 7fc0a7b4073..6be65b2b76b 100644
--- a/r/man/to_duckdb.Rd
+++ b/r/man/to_duckdb.Rd
@@ -40,7 +40,7 @@ step. Internally, this calls \code{to_duckdb()} with all of the default argument
 values.
 }
 \examples{
-\dontshow{if (arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && requireNamespace("dplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(dplyr)
 
 ds <- InMemoryDataset$create(mtcars)

From e6e9e6ea52b7a8f2682ffc4160168c936ca1d3e6 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 29 Jul 2021 19:49:24 -0400
Subject: [PATCH 667/719] ARROW-13492: [R] [CI] Move r tools 35 build back to
 per-commit/pre-PR

Closes #10832 from jonkeane/ARROW-13492-rtools35-back-to-PR-checks

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 .github/workflows/r.yml                 |  15 +++-
 dev/tasks/r/github.windows.rtools35.yml | 108 ------------------------
 dev/tasks/tasks.yml                     |   4 -
 3 files changed, 14 insertions(+), 113 deletions(-)
 delete mode 100644 dev/tasks/r/github.windows.rtools35.yml

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index d7e3b67bc4b..10db0bc4249 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -178,7 +178,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        rtools: [40]
+        rtools: [35, 40]
     env:
       TEST_R_WITH_ARROW: "TRUE"
       ARROW_R_CXXFLAGS: "-Werror"
@@ -214,11 +214,19 @@ jobs:
           path: ccache
           key: r-${{ matrix.rtools }}-ccache-mingw-${{ hashFiles('cpp/**') }}
           restore-keys: r-${{ matrix.rtools }}-ccache-mingw-
+      # We use the makepkg-mingw setup that is included in rtools40 even when
+      # we use the rtools35 compilers, so we always install R 4.0/Rtools40
       - uses: r-lib/actions/setup-r@master
         with:
           rtools-version: 40
           r-version: "4.1"
           Ncpus: 2
+      - uses: r-lib/actions/setup-r@master
+        if: ${{ matrix.rtools == 35 }}
+        with:
+          rtools-version: 35
+          r-version: "3.6"
+          Ncpus: 2
       - name: Build Arrow C++
         shell: bash
         env:
@@ -252,3 +260,8 @@ jobs:
         shell: cmd
         run: cat check/arrow.Rcheck/00install.out
         if: always()
+      # We can remove this when we drop support for Rtools 3.5.
+      - name: Ensure using system tar in actions/cache
+        run: |
+          Write-Output "${Env:windir}\System32" | `
+            Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
diff --git a/dev/tasks/r/github.windows.rtools35.yml b/dev/tasks/r/github.windows.rtools35.yml
deleted file mode 100644
index 53b4200d2ce..00000000000
--- a/dev/tasks/r/github.windows.rtools35.yml
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-
-
-
-# NOTE: must set "Crossbow" as name to have the badge links working in the
-# github comment reports!
-name: Crossbow
-
-on:
-  push:
-    branches:
-      - "*-github-*"
-
-jobs:
-  windows-rtools35:
-      name: "AMD64 Windows R 3.6 RTools 35"
-      runs-on: windows-latest
-      timeout-minutes: 60
-      strategy:
-        fail-fast: false
-      env:
-        TEST_R_WITH_ARROW: "TRUE"
-        ARROW_R_CXXFLAGS: "-Werror"
-        _R_CHECK_TESTS_NLINES_: 0
-      steps:
-        - run: git config --global core.autocrlf false
-        - name: Checkout Crossbow
-          uses: actions/checkout@v2
-          with:
-            fetch-depth: 0
-        - name: Make R tests verbose
-          # If you get a segfault/mysterious test Execution halted,
-          # make this `true` to see where it dies.
-          if: false
-          shell: cmd
-          run: |
-            cd arrow/r/tests
-            sed -i.bak -E -e 's/"arrow"/"arrow", reporter = "location"/' testthat.R
-            rm -f testthat.R.bak
-        - name: Checkout Arrow
-          run: |
-            git clone --no-checkout {{ arrow.remote }} arrow
-            git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
-            git -C arrow checkout FETCH_HEAD
-            git -C arrow submodule update --init --recursive
-        # We use the makepkg-mingw setup / pacman that is included in rtools40
-        # even when though use the rtools35 compilers
-        - uses: r-lib/actions/setup-r@master
-          with:
-            rtools-version: 40
-            r-version: "4.0"
-            Ncpus: 2
-        - uses: r-lib/actions/setup-r@master
-          with:
-            rtools-version: 35
-            r-version: "3.6"
-            Ncpus: 2
-        - name: Build Arrow C++
-          shell: bash
-          env:
-            RTOOLS_VERSION: 35
-          run: cd arrow && ci/scripts/r_windows_build.sh
-        - uses: actions/upload-artifact@v1
-          with:
-            name: Rtools 35 Arrow C++
-            path: arrow/libarrow.zip
-        - name: Install R package dependencies
-          shell: Rscript {0}
-          run: |
-            options(pkgType="win.binary")
-            install.packages(c("remotes", "rcmdcheck"))
-            remotes::install_deps("arrow/r", dependencies = TRUE)
-        - name: Check
-          shell: Rscript {0}
-          run: |
-            Sys.setenv(
-              RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "arrow", "libarrow.zip"),
-              MAKEFLAGS = paste0("-j", parallel::detectCores())
-            )
-            rcmdcheck::rcmdcheck("arrow/r",
-                                build_args = '--no-build-vignettes',
-                                args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'),
-                                error_on = 'warning',
-                                check_dir = 'check',
-                                timeout = 3600
-            )
-        - name: Dump install logs
-          shell: cmd
-          run: cat check/arrow.Rcheck/00install.out
-          if: always()
\ No newline at end of file
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 1246cea400a..c6c26a367ef 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -995,10 +995,6 @@ tasks:
     ci: github
     template: r/github.linux.version.compatibility.yml
 
-  test-r-rtools-35:
-    ci: github
-    template: r/github.windows.rtools35.yml
-
   test-r-versions:
     ci: github
     template: r/github.linux.versions.yml

From 5f8c0fddf4c160fb379da735e85bff07750d08fa Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 30 Jul 2021 11:32:48 -0400
Subject: [PATCH 668/719] ARROW-13510: [CI][R][C++] Add -Wall to
 fedora-clang-devel as-cran checks

This also includes a change that should cause builds to fail if R package dependencies don't install successfully. (They generally do fail already, just during the test run and not during the dependency installation step, so it's harder to diagnose.)

Closes #10839 from nealrichardson/fedora-clang-warnings

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 ci/scripts/r_docker_configure.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh
index 3e553fe9edd..2b9bc03bea0 100755
--- a/ci/scripts/r_docker_configure.sh
+++ b/ci/scripts/r_docker_configure.sh
@@ -37,6 +37,9 @@ if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
   dnf install -y libcxx-devel
   sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf
   rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
+  
+  sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf
+  rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
 fi
 
 # Special hacking to try to reproduce quirks on centos using non-default build

From 97c82ba5c34dd62111748fceef332413bc49c7e5 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Fri, 30 Jul 2021 09:58:04 -0700
Subject: [PATCH 669/719] MINOR: [JS] Correct main package description in
 readme

Closes #10825 from domoritz/packages-desc

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/README.md b/js/README.md
index 8da6f933649..4ab99402652 100644
--- a/js/README.md
+++ b/js/README.md
@@ -181,7 +181,7 @@ The base `apache-arrow` package includes all the compilation targets for conveni
 The targets are also published under the `@apache-arrow` namespace:
 
 ```sh
-npm install apache-arrow # <-- combined es2015/UMD + esnext/CommonJS/ESModules/UMD
+npm install apache-arrow # <-- combined es2015/CommonJS/ESModules/UMD + esnext/UMD
 npm install @apache-arrow/ts # standalone TypeScript package
 npm install @apache-arrow/es5-cjs # standalone es5/CommonJS package
 npm install @apache-arrow/es5-esm # standalone es5/ESModules package

From 8d5e6e8ebbf585267b3d4442ec87523dfd534e6d Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Fri, 30 Jul 2021 09:59:04 -0700
Subject: [PATCH 670/719] ARROW-12636: [JS] ESM Tree-Shaking produces broken
 code

This pull request marks arrow as having side effects since we do have side effects in the code.

Closes #10826 from domoritz/sideEffects

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Dominik Moritz <domoritz@gmail.com>
---
 js/gulp/package-task.js | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index 94986dadbea..23edf0b8c6c 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -49,6 +49,9 @@ const packageTask = ((cache) => memoizeTask(cache, function bundle(target, forma
 module.exports = packageTask;
 module.exports.packageTask = packageTask;
 
+// FIXME: set this to false when we have no side effects
+const sideEffects = true;
+
 const createMainPackageJson = (target, format) => (orig) => ({
     ...createTypeScriptPackageJson(target, format)(orig),
     bin: orig.bin,
@@ -67,7 +70,7 @@ const createMainPackageJson = (target, format) => (orig) => ({
     types: `${mainExport}.node.d.ts`,
     unpkg: `${mainExport}.es2015.min.js`,
     jsdelivr: `${mainExport}.es2015.min.js`,
-    sideEffects: false,
+    sideEffects: sideEffects,
     esm: { mode: `all`, sourceMap: true }
 });
 
@@ -79,7 +82,7 @@ const createTypeScriptPackageJson = (target, format) => (orig) => ({
     types: `${mainExport}.node.ts`,
     browser: `${mainExport}.dom.ts`,
     type: "module",
-    sideEffects: false,
+    sideEffects: sideEffects,
     esm: { mode: `auto`, sourceMap: true },
     dependencies: {
         '@types/flatbuffers': '*',
@@ -108,7 +111,7 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
             // set "module" if building scoped ESM target
             module:   format === 'esm' ? `${mainExport}.node.js` : undefined,
             // set "sideEffects" to false as a hint to Webpack that it's safe to tree-shake the ESM target
-            sideEffects: format === 'esm' ? false : undefined,
+            sideEffects: format === 'esm' ? sideEffects : undefined,
             // include "esm" settings for https://www.npmjs.com/package/esm if building scoped ESM target
             esm:      format === `esm` ? { mode: `auto`, sourceMap: true } : undefined,
             // set "types" (for TypeScript/VSCode)

From e7f005d989975fd8d118d38443d1ea4c5ec87755 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 30 Jul 2021 10:16:08 -0700
Subject: [PATCH 671/719] ARROW-13506: [C++][Java] Upgrade ORC to 1.6.9

This PR aims to upgrade Apache ORC to 1.6.9 to bring the latest bug fixes and updates.

- 1.6.9: https://orc.apache.org/news/2021/07/02/ORC-1.6.9/

Currently, `C++` module is using Apache ORC 1.6.6 and `Java` module is using 1.5.5.
Apache ORC 1.5.5 is too old and we had better make it consistent at this time.
Apache ORC community highly recommends 1.6.9.

From Apache ORC 1.6.0+, the followings are added.
- ORC-14 Add column encryption.
- ORC-189 Add timestamp with local timezone
- ORC-203 Trim minimum and maximum string values
- ORC-363 Add zstd support in Java
- ORC-397 Support selectively disabling dictionaries
- ORC-522 Add type annotations

Closes #10838 from dongjoon-hyun/ARROW-13506

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Chao Sun <sunchao@apache.org>
---
 cpp/thirdparty/versions.txt | 2 +-
 java/adapter/orc/pom.xml    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 4905d7567c1..593611fcd4c 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -43,7 +43,7 @@ ARROW_LZ4_BUILD_VERSION=v1.9.3
 # mimalloc 1.6.7 didn't build on Visual Studio 2015
 # https://github.com/microsoft/mimalloc/issues/353
 ARROW_MIMALLOC_BUILD_VERSION=v1.7.2
-ARROW_ORC_BUILD_VERSION=1.6.6
+ARROW_ORC_BUILD_VERSION=1.6.9
 ARROW_PROTOBUF_BUILD_VERSION=v3.14.0
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require
 # a pre-release version of RapidJSON to build with GCC 8 without
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 9d50107b6dd..7928d2ca272 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -35,7 +35,7 @@
 	<dependency>
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
-            <version>1.5.5</version>
+            <version>1.6.9</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>
@@ -79,7 +79,7 @@
         <dependency>
             <groupId>org.apache.hive</groupId>
             <artifactId>hive-storage-api</artifactId>
-            <version>2.6.0</version>
+            <version>2.7.1</version>
             <scope>test</scope>
         </dependency>
     </dependencies>

From 2b34f6629ec3fd03a1d2f27f25093a3299b5a7eb Mon Sep 17 00:00:00 2001
From: ElenaHenderson <elenavhenderson@gmail.com>
Date: Fri, 30 Jul 2021 16:37:34 -0400
Subject: [PATCH 672/719] ARROW-11691: [Developer][CI] Provide a consolidated
 .env file for benchmark-relevant environment variables

This change will allow Arrow Contributors to add and update env vars used by benchmark builds that use conbench for running benchmarks using these steps:

1. Create `apache/arrow` PR
2. Update or add env var value in `benchmarks.env`
3. Add `@ursabot please benchmark` comment to PR
4. Once benchmark builds are done, benchmark results can be viewed via compare/runs links in the PR comment where
- baseline = PR base HEAD commit with default (master branch version) `/dev/conbench_envs/benchmarks.env`
- contender = PR branch HEAD commit with overridden `/dev/conbench_envs/benchmarks.env`

Closes #10667 from ElenaHenderson/ARROW-11691-benchmark-env

Lead-authored-by: ElenaHenderson <elenavhenderson@gmail.com>
Co-authored-by: Elena Henderson <elena@e.local>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 dev/conbench_envs/README.md      | 214 +++++++++++++++++++++++++++++++
 dev/conbench_envs/benchmarks.env |  50 ++++++++
 dev/conbench_envs/hooks.sh       |  91 +++++++++++++
 3 files changed, 355 insertions(+)
 create mode 100644 dev/conbench_envs/README.md
 create mode 100644 dev/conbench_envs/benchmarks.env
 create mode 100755 dev/conbench_envs/hooks.sh

diff --git a/dev/conbench_envs/README.md b/dev/conbench_envs/README.md
new file mode 100644
index 00000000000..5a4eb58b244
--- /dev/null
+++ b/dev/conbench_envs/README.md
@@ -0,0 +1,214 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+# Benchmark Builds Env and Hooks
+This directory contains: 
+- [benchmarks.env](benchmarks.env) - list of env vars used for building Arrow C++/Python/R/Java/JavaScript and running benchmarks using [conbench](https://ursalabs.org/blog/announcing-conbench/).
+- [hooks.sh](hooks.sh) - hooks used by <b>@ursabot</b> benchmark builds that are triggered by `@ursabot please benchmark` PR comments. 
+
+## How to add or update Arrow build and run env vars used by `@ursabot` benchmark builds
+1. Create `apache/arrow` PR
+2. Update or add env var value in [benchmarks.env](../../dev/conbench_envs/benchmarks.env)
+3. Add `@ursabot please benchmark` comment to PR
+4. Once benchmark builds are done, benchmark results can be viewed via compare/runs links in the PR comment where
+- baseline = PR base HEAD commit with unaltered `/dev/conbench_envs/benchmarks.env`
+- contender = PR branch HEAD commit with overridden `/dev/conbench_envs/benchmarks.env`
+
+## Why do`@ursabot` benchmark builds need `hooks.sh`?
+`@ursabot` benchmark builds are maintained in Ursa's private repo.
+Benchmark builds use `hooks.sh` functions as hooks to create conda env with Arrow dependencies and build Arrow C++/Python/R/Java/JavaScript from source for a specific Arrow repo's commit.
+
+Defining hooks in Arrow repo allows benchmark builds for a specific commit to be
+compatible with the files/scripts *in that commit* which are used for installing Arrow
+dependencies and building Arrow. This allows Arrow contributors to asses the perfomance
+implications of different build options, dependency versions, etc by updating
+`hooks.sh`.
+
+## Can other repos and services use `benchmarks.env` and `hooks.sh`?
+
+Yes, other repos and services are welcome to use `benchmarks.env` and `hooks.sh` as long as 
+- existing hooks are not removed or renamed.
+- function definitions for exiting hooks can only be updated in the Arrow commit where Arrow build scripts or files with dependencies have been renamed, moved or added.
+- benchmark builds are run using `@ursabot please benchmark` PR comment to confirm that function definition updates do not break benchmark builds.
+
+## How can other repos and services use `benchmarks.env` and `hooks.sh` to setup benchmark env?
+Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.sh` to setup benchmarking env on Ubuntu:
+
+### 1. Install Arrow dependencies
+    sudo su
+    apt-get update -y -q && \
+        apt-get install -y -q --no-install-recommends \
+            autoconf \
+            ca-certificates \
+            ccache \
+            cmake \
+            g++ \
+            gcc \
+            gdb \
+            git \
+            libbenchmark-dev \
+            libboost-filesystem-dev \
+            libboost-regex-dev \
+            libboost-system-dev \
+            libbrotli-dev \
+            libbz2-dev \
+            libgflags-dev \
+            libcurl4-openssl-dev \
+            libgoogle-glog-dev \
+            liblz4-dev \
+            libprotobuf-dev \
+            libprotoc-dev \
+            libre2-dev \
+            libsnappy-dev \
+            libssl-dev \
+            libthrift-dev \
+            libutf8proc-dev \
+            libzstd-dev \
+            make \
+            ninja-build \
+            pkg-config \
+            protobuf-compiler \
+            rapidjson-dev \
+            tzdata \
+            wget && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists*
+
+    apt-get update -y -q && \
+        apt-get install -y -q \
+            python3 \
+            python3-pip \
+            python3-dev && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+### 2. Install Arrow dependencies for Java
+    sudo su
+    apt-get install openjdk-8-jdk
+    apt-get install maven
+    
+Verify that you have at least these versions of `java`, `javac` and `maven`:
+    
+    # java -version
+    openjdk version "1.8.0_292"
+    ..
+    # javac -version
+    javac 1.8.0_292
+    ...
+    # mvn -version
+    Apache Maven 3.6.3
+    ...
+
+### 3. Install Arrow dependencies for Java Script
+    sudo apt update
+    sudo apt -y upgrade
+    sudo apt update
+    sudo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates
+    curl -fsSL https://deb.nodesource.com/setup_14.x | sudo -E bash -
+    sudo apt-get install -y nodejs
+    sudo apt -y install yarn
+    sudo apt -y install gcc g++ make
+
+Verify that you have at least these versions of `node` and `yarn`:
+
+    # node --version
+    v14.17.2
+    ...
+    # yarn --version
+    1.22.5
+    ...
+    
+### 4. Install Conda
+    sudo apt install curl
+    curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+    sudo bash Miniconda3-latest-Linux-x86_64.sh
+    
+### 5. Set env vars:
+    export ARROW_REPO=https://github.com/apache/arrow.git
+    export BENCHMARKABLE=e6e9e6ea52b7a8f2682ffc4160168c936ca1d3e6
+    export BENCHMARKABLE_TYPE=arrow-commit
+    export PYTHON_VERSION=3.8
+    export CONBENCH_EMAIL=...
+    export CONBENCH_URL="https://conbench.ursa.dev"
+    export CONBENCH_PASSWORD=...
+    export MACHINE=...
+
+### 6. Use `create_conda_env_with_arrow_python` hook to create conda env and build Arrow C++ and Arrow Python
+    git clone "${ARROW_REPO}"
+    pushd arrow
+    git fetch -v --prune -- origin "${BENCHMARKABLE}"
+    git checkout -f "${BENCHMARKABLE}"
+    source dev/conbench_envs/hooks.sh create_conda_env_with_arrow_python
+    popd
+    
+### 7. Install conbench
+    git clone https://github.com/ursacomputing/conbench.git
+    pushd conbench
+    pip install -r requirements-cli.txt
+    pip install -U PyYAML
+    python setup.py install
+    popd
+
+### 8. Setup benchmarks repo
+    git clone https://github.com/ursacomputing/benchmarks.git
+    pushd benchmarks
+    python setup.py develop
+    popd
+    
+### 9. Setup conbench credentials
+    pushd benchmarks
+    touch .conbench
+    echo "url: $CONBENCH_URL" >> .conbench
+    echo "email: $CONBENCH_EMAIL" >> .conbench
+    echo "password: $CONBENCH_PASSWORD" >> .conbench
+    echo "host_name: $MACHINE" >> .conbench
+    popd
+ 
+### 10. Run Python benchmarks
+    cd benchmarks
+    conbench file-read ALL --iterations=3 --all=true --drop-caches=true 
+
+### 11. Use `install_archery` hook to setup archery and run C++ benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh install_archery
+    popd
+    cd benchmarks
+    conbench cpp-micro --iterations=1
+
+### 12. Use `build_arrow_r` hook to build Arrow R and run R benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh build_arrow_r
+    popd
+    R -e "remotes::install_github('ursacomputing/arrowbench')"
+    cd benchmarks
+    conbench dataframe-to-table ALL --iterations=3 --drop-caches=true --language=R
+
+### 13. Use `build_arrow_java` and `install_archery` hooks to build Arrow Java and run Java benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh build_arrow_java
+    source dev/conbench_envs/hooks.sh install_archery
+    popd
+    cd benchmarks
+    conbench java-micro --iterations=1
+
+### 14. Use `install_java_script_project_dependencies` hook to install Java Script dependencies and run Java Script benchmarks
+    pushd arrow
+    source dev/conbench_envs/hooks.sh install_java_script_project_dependencies
+    popd
+    cd benchmarks
+    conbench js-micro
diff --git a/dev/conbench_envs/benchmarks.env b/dev/conbench_envs/benchmarks.env
new file mode 100644
index 00000000000..6c151aa7c1f
--- /dev/null
+++ b/dev/conbench_envs/benchmarks.env
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARROW_BUILD_TESTS=OFF
+ARROW_BUILD_TYPE=release
+ARROW_DEPENDENCY_SOURCE=AUTO
+ARROW_DATASET=ON
+ARROW_DEFAULT_MEMORY_POOL=mimalloc
+ARROW_ENABLE_UNSAFE_MEMORY_ACCESS=true
+ARROW_ENABLE_NULL_CHECK_FOR_GET=false
+ARROW_FLIGHT=OFF
+ARROW_GANDIVA=OFF
+ARROW_HDFS=ON
+ARROW_HOME=$CONDA_PREFIX
+ARROW_INSTALL_NAME_RPATH=OFF
+ARROW_MIMALLOC=ON
+ARROW_NO_DEPRECATED_API=ON
+ARROW_ORC=ON
+ARROW_PARQUET=ON
+ARROW_PLASMA=ON
+ARROW_PYTHON=ON
+ARROW_S3=ON
+ARROW_USE_ASAN=OFF
+ARROW_USE_CCACHE=ON
+ARROW_USE_UBSAN=OFF
+ARROW_WITH_BROTLI=ON
+ARROW_WITH_BZ2=ON
+ARROW_WITH_LZ4=ON
+ARROW_WITH_SNAPPY=ON
+ARROW_WITH_ZLIB=ON
+ARROW_WITH_ZSTD=ON
+GTest_SOURCE=BUNDLED
+ORC_SOURCE=BUNDLED
+PARQUET_BUILD_EXAMPLES=ON
+PARQUET_BUILD_EXECUTABLES=ON
+PYTHON=python
+LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
\ No newline at end of file
diff --git a/dev/conbench_envs/hooks.sh b/dev/conbench_envs/hooks.sh
new file mode 100755
index 00000000000..665a7c10587
--- /dev/null
+++ b/dev/conbench_envs/hooks.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+## These hooks are used by benchmark builds
+# to create a conda env with Arrow dependencies and build Arrow C++, Python, etc
+create_conda_env_for_benchmark_build() {
+  conda create -y -n "${BENCHMARKABLE_TYPE}" -c conda-forge \
+  --file ci/conda_env_unix.txt \
+  --file ci/conda_env_cpp.txt \
+  --file ci/conda_env_python.txt \
+  --file ci/conda_env_gandiva.txt \
+  compilers \
+  python="${PYTHON_VERSION}" \
+  pandas \
+  aws-sdk-cpp \
+  r
+}
+
+activate_conda_env_for_benchmark_build() {
+  conda init bash
+  conda activate "${BENCHMARKABLE_TYPE}"
+}
+
+install_arrow_python_dependencies() {
+  pip install -r python/requirements-build.txt -r python/requirements-test.txt
+}
+
+set_arrow_build_and_run_env_vars() {
+  set -a
+  source dev/conbench_envs/benchmarks.env
+  set +a
+}
+
+build_arrow_cpp() {
+  # Ignore the error when a cache can't be created
+  if ! ci/scripts/cpp_build.sh $(pwd) $(pwd) 2> error.log; then
+      if ! grep -q -F "Can\'t create temporary cache file" error.log; then
+         cat error.log
+      fi
+  fi
+}
+
+build_arrow_python() {
+  ci/scripts/python_build.sh $(pwd) $(pwd)
+}
+
+build_arrow_r() {
+  cat ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+  ci/scripts/r_deps.sh $(pwd) $(pwd)
+  (cd r; R CMD INSTALL .;)
+}
+
+build_arrow_java() {
+  ci/scripts/java_build.sh $(pwd) $(pwd)
+}
+
+install_archery() {
+  pip install -e dev/archery
+}
+
+install_java_script_project_dependencies() {
+  (cd js; yarn;)
+}
+
+create_conda_env_with_arrow_python() {
+  create_conda_env_for_benchmark_build
+  activate_conda_env_for_benchmark_build
+  install_arrow_python_dependencies
+  set_arrow_build_and_run_env_vars
+  build_arrow_cpp
+  build_arrow_python
+}
+
+"$@"

From 1c5e5a4ed77eb4622f868718ad0eceaa40b378fb Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 31 Jul 2021 05:58:03 +0900
Subject: [PATCH 673/719] ARROW-13503: [GLib][Ruby][Flight] Add support for
 DoGet

Closes #10836 from kou/glib-flight-do-get

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/Gemfile                                |   2 +-
 c_glib/arrow-dataset-glib/scanner.cpp         |   2 +-
 c_glib/arrow-flight-glib/client.cpp           |  86 ++++-
 c_glib/arrow-flight-glib/client.h             |  22 +-
 c_glib/arrow-flight-glib/client.hpp           |   3 +
 c_glib/arrow-flight-glib/common.cpp           | 281 ++++++++++++++-
 c_glib/arrow-flight-glib/common.h             |  42 +++
 c_glib/arrow-flight-glib/common.hpp           |  12 +
 c_glib/arrow-flight-glib/server.cpp           | 330 +++++++++++++++++-
 c_glib/arrow-flight-glib/server.h             |  48 +++
 c_glib/arrow-flight-glib/server.hpp           |   7 +
 c_glib/arrow-glib/array-builder.cpp           |   6 +-
 c_glib/arrow-glib/datum.cpp                   |   2 +-
 c_glib/arrow-glib/error.cpp                   |   2 +-
 c_glib/arrow-glib/reader.cpp                  |  75 +++-
 c_glib/arrow-glib/reader.h                    |  11 +-
 c_glib/arrow-glib/version.h.in                |  23 ++
 .../arrow-flight-glib-docs.xml                |   4 +
 c_glib/test/flight/test-client.rb             |  21 +-
 c_glib/test/flight/test-stream-reader.rb      |  69 ++++
 c_glib/test/helper/flight-info-generator.rb   |   7 +-
 c_glib/test/helper/flight-server.rb           |  10 +
 c_glib/test/test-record-batch-reader.rb       |  56 +++
 .../test/helper/info-generator.rb             |   6 +-
 ruby/red-arrow-flight/test/helper/server.rb   |   9 +
 ruby/red-arrow-flight/test/test-client.rb     |   8 +
 ruby/red-arrow/lib/arrow/loader.rb            |   1 +
 .../lib/arrow/record-batch-reader.rb          |  41 +++
 ruby/red-arrow/red-arrow.gemspec              |   2 +-
 .../test/test-record-batch-reader.rb          |  46 +++
 30 files changed, 1192 insertions(+), 42 deletions(-)
 create mode 100644 c_glib/test/flight/test-stream-reader.rb
 create mode 100644 c_glib/test/test-record-batch-reader.rb
 create mode 100644 ruby/red-arrow/lib/arrow/record-batch-reader.rb
 create mode 100644 ruby/red-arrow/test/test-record-batch-reader.rb

diff --git a/c_glib/Gemfile b/c_glib/Gemfile
index bd91b629c9e..6864cfd3244 100644
--- a/c_glib/Gemfile
+++ b/c_glib/Gemfile
@@ -20,4 +20,4 @@
 source "https://rubygems.org/"
 
 gem "test-unit"
-gem "gobject-introspection", ">= 3.4.5"
+gem "gobject-introspection", ">= 3.4.7"
diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp
index 7f8d8be5fdb..efa2a5c3287 100644
--- a/c_glib/arrow-dataset-glib/scanner.cpp
+++ b/c_glib/arrow-dataset-glib/scanner.cpp
@@ -202,7 +202,7 @@ gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass)
 
 /**
  * gadataset_scanner_builder_new:
- * @dataset: A #GADatasetDatast to be scanned.
+ * @dataset: A #GADatasetDataset to be scanned.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GADatasetScannerBuilder on success,
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index c25b8d4b550..7610fc98570 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#include <arrow-glib/error.hpp>
+#include <arrow-glib/arrow-glib.hpp>
 
 #include <arrow-flight-glib/client.hpp>
 #include <arrow-flight-glib/common.hpp>
@@ -30,6 +30,9 @@ G_BEGIN_DECLS
  * @title: Client related classes
  * @include: arrow-flight-glib/arrow-flight-glib.h
  *
+ * #GAFlightStreamReader is a class for reading record batches from a
+ * server.
+ *
  * #GAFlightCallOptions is a class for options of each call.
  *
  * #GAFlightClientOptions is a class for options of each client.
@@ -39,6 +42,20 @@ G_BEGIN_DECLS
  * Since: 5.0.0
  */
 
+G_DEFINE_TYPE(GAFlightStreamReader,
+              gaflight_stream_reader,
+              GAFLIGHT_TYPE_RECORD_BATCH_READER)
+
+static void
+gaflight_stream_reader_init(GAFlightStreamReader *object)
+{
+}
+
+static void
+gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
+{
+}
+
 typedef struct GAFlightCallOptionsPrivate_ {
   arrow::flight::FlightCallOptions options;
 } GAFlightCallOptionsPrivate;
@@ -251,32 +268,32 @@ gaflight_client_new(GAFlightLocation *location,
 /**
  * gaflight_client_list_flights:
  * @client: A #GAFlightClient.
- * @options: (nullable): A #GAFlightCallOptions.
  * @criteria: (nullable): A #GAFlightCriteria.
+ * @options: (nullable): A #GAFlightCallOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable) (element-type GAFlightInfo) (transfer full):
- *   The returned list of #GAFlightInfo, %NULL on error.
+ *   The returned list of #GAFlightInfo on success, %NULL on error.
  *
  * Since: 5.0.0
  */
 GList *
 gaflight_client_list_flights(GAFlightClient *client,
-                             GAFlightCallOptions *options,
                              GAFlightCriteria *criteria,
+                             GAFlightCallOptions *options,
                              GError **error)
 {
   auto flight_client = gaflight_client_get_raw(client);
-  arrow::flight::FlightCallOptions flight_default_options;
-  const auto *flight_options = &flight_default_options;
-  if (options) {
-    flight_options = gaflight_call_options_get_raw(options);
-  }
   arrow::flight::Criteria flight_default_criteria;
-  const auto *flight_criteria = &flight_default_criteria;
+  auto flight_criteria = &flight_default_criteria;
   if (criteria) {
     flight_criteria = gaflight_criteria_get_raw(criteria);
   }
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
   std::unique_ptr<arrow::flight::FlightListing> flight_listing;
   auto status = flight_client->ListFlights(*flight_options,
                                            *flight_criteria,
@@ -305,10 +322,59 @@ gaflight_client_list_flights(GAFlightClient *client,
   return g_list_reverse(listing);
 }
 
+/**
+ * gaflight_client_do_get:
+ * @client: A #GAFlightClient.
+ * @ticket: A #GAFlightTicket.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The #GAFlightStreamReader to read record batched from the server
+ *   on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GAFlightStreamReader *
+gaflight_client_do_get(GAFlightClient *client,
+                       GAFlightTicket *ticket,
+                       GAFlightCallOptions *options,
+                       GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  const auto flight_ticket = gaflight_ticket_get_raw(ticket);
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  std::unique_ptr<arrow::flight::FlightStreamReader> flight_reader;
+  auto status = flight_client->DoGet(*flight_options,
+                                     *flight_ticket,
+                                     &flight_reader);
+  if (garrow::check(error,
+                    status,
+                    "[flight-client][do-get]")) {
+    return gaflight_stream_reader_new_raw(flight_reader.release());
+  } else {
+    return NULL;
+  }
+}
+
 
 G_END_DECLS
 
 
+GAFlightStreamReader *
+gaflight_stream_reader_new_raw(
+  arrow::flight::FlightStreamReader *flight_reader)
+{
+  return GAFLIGHT_STREAM_READER(
+    g_object_new(GAFLIGHT_TYPE_STREAM_READER,
+                 "reader", flight_reader,
+                 NULL));
+}
+
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options)
 {
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index ca75ae24135..bc297116135 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -24,6 +24,19 @@
 G_BEGIN_DECLS
 
 
+#define GAFLIGHT_TYPE_STREAM_READER       \
+  (gaflight_stream_reader_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader,
+                         gaflight_stream_reader,
+                         GAFLIGHT,
+                         STREAM_READER,
+                         GAFlightRecordBatchReader)
+struct _GAFlightStreamReaderClass
+{
+  GAFlightRecordBatchReaderClass parent_class;
+};
+
+
 #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightCallOptions,
                          gaflight_call_options,
@@ -76,9 +89,16 @@ gaflight_client_new(GAFlightLocation *location,
 GARROW_AVAILABLE_IN_5_0
 GList *
 gaflight_client_list_flights(GAFlightClient *client,
-                             GAFlightCallOptions *options,
                              GAFlightCriteria *criteria,
+                             GAFlightCallOptions *options,
                              GError **error);
 
+GARROW_AVAILABLE_IN_6_0
+GAFlightStreamReader *
+gaflight_client_do_get(GAFlightClient *client,
+                       GAFlightTicket *ticket,
+                       GAFlightCallOptions *options,
+                       GError **error);
+
 
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index c3fe7b8c790..1e68761b7ee 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -24,6 +24,9 @@
 #include <arrow-flight-glib/client.h>
 
 
+GAFlightStreamReader *
+gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader);
+
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options);
 
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index 9f643177968..81b00f7a369 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -17,9 +17,7 @@
  * under the License.
  */
 
-#include <arrow-glib/error.hpp>
-#include <arrow-glib/ipc-options.hpp>
-#include <arrow-glib/schema.hpp>
+#include <arrow-glib/arrow-glib.hpp>
 
 #include <arrow-flight-glib/common.hpp>
 
@@ -36,7 +34,7 @@ G_BEGIN_DECLS
  * #GAFlightLocation is a class for location.
  *
  * #GAFlightDescriptor is a base class for all descriptor classes such
- * as #GArrowFlightPathDescriptor.
+ * as #GAFlightPathDescriptor.
  *
  * #GAFlightPathDescriptor is a class for path descriptor.
  *
@@ -48,6 +46,10 @@ G_BEGIN_DECLS
  *
  * #GAFlightInfo is a class for flight information.
  *
+ * #GAFlightStreamChunk is a class for a chunk in stream.
+ *
+ * #GAFlightRecordBatchReader is a class for reading record batches.
+ *
  * Since: 5.0.0
  */
 
@@ -1094,10 +1096,247 @@ gaflight_info_get_total_bytes(GAFlightInfo *info)
   return flight_info->total_bytes();
 }
 
+typedef struct GAFlightStreamChunkPrivate_ {
+  arrow::flight::FlightStreamChunk chunk;
+} GAFlightStreamChunkPrivate;
+
+enum {
+  PROP_CHUNK = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightStreamChunk,
+                           gaflight_stream_chunk,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(obj)            \
+  static_cast<GAFlightStreamChunkPrivate *>(             \
+    gaflight_stream_chunk_get_instance_private(           \
+      GAFLIGHT_STREAM_CHUNK(obj)))
+
+static void
+gaflight_stream_chunk_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(object);
+
+  priv->chunk.~FlightStreamChunk();
+
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_stream_chunk_set_property(GObject *object,
+                                   guint prop_id,
+                                   const GValue *value,
+                                   GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CHUNK:
+    priv->chunk =
+      *static_cast<arrow::flight::FlightStreamChunk *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_stream_chunk_init(GAFlightStreamChunk *object)
+{
+}
+
+static void
+gaflight_stream_chunk_class_init(GAFlightStreamChunkClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_stream_chunk_finalize;
+  gobject_class->set_property = gaflight_stream_chunk_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("chunk",
+                              "Stream chunk",
+                              "The raw arrow::flight::FlightStreamChunk *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CHUNK, spec);
+}
+
+/**
+ * gaflight_stream_chunk_get_data:
+ * @chunk: A #GAFlightStreamChunk.
+ *
+ * Returns: (transfer full): The data of the chunk.
+ *
+ * Since: 6.0.0
+ */
+GArrowRecordBatch *
+gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk)
+{
+  auto flight_chunk = gaflight_stream_chunk_get_raw(chunk);
+  return garrow_record_batch_new_raw(&(flight_chunk->data));
+}
+
+/**
+ * gaflight_stream_chunk_get_metadata:
+ * @chunk: A #GAFlightStreamChunk.
+ *
+ * Returns: (nullable) (transfer full): The metadata of the chunk.
+ *
+ *   The metadata may be NULL.
+ *
+ * Since: 6.0.0
+ */
+GArrowBuffer *
+gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk)
+{
+  auto flight_chunk = gaflight_stream_chunk_get_raw(chunk);
+  if (flight_chunk->app_metadata) {
+    return garrow_buffer_new_raw(&(flight_chunk->app_metadata));
+  } else {
+    return NULL;
+  }
+}
+
+
+typedef struct GAFlightRecordBatchReaderPrivate_ {
+  arrow::flight::MetadataRecordBatchReader *reader;
+} GAFlightRecordBatchReaderPrivate;
+
+enum {
+  PROP_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
+                           gaflight_record_batch_reader,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)            \
+  static_cast<GAFlightRecordBatchReaderPrivate *>(               \
+    gaflight_record_batch_reader_get_instance_private(           \
+      GAFLIGHT_RECORD_BATCH_READER(obj)))
+
+static void
+gaflight_record_batch_reader_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
+
+  delete priv->reader;
+
+  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+}
+
+static void
+gaflight_record_batch_reader_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_READER:
+    priv->reader =
+      static_cast<arrow::flight::MetadataRecordBatchReader *>(
+        g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_reader_init(GAFlightRecordBatchReader *object)
+{
+}
+
+static void
+gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_record_batch_reader_finalize;
+  gobject_class->set_property = gaflight_record_batch_reader_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("reader",
+                              "Reader",
+                              "The raw arrow::flight::MetadataRecordBatchReader *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_READER, spec);
+}
+
+/**
+ * gaflight_record_batch_reader_read_next:
+ * @reader: A #GAFlightRecordBatchReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The next chunk on success, %NULL on end
+ *   of stream, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GAFlightStreamChunk *
+gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
+                                       GError **error)
+{
+  auto flight_reader = gaflight_record_batch_reader_get_raw(reader);
+  arrow::flight::FlightStreamChunk flight_chunk;
+  auto status = flight_reader->Next(&flight_chunk);
+  if (garrow::check(error, status, "[flight-record-batch-reader][read-next]")) {
+    if (flight_chunk.data) {
+      return gaflight_stream_chunk_new_raw(&flight_chunk);
+    } else {
+      return NULL;
+    }
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * gaflight_record_batch_reader_read_all:
+ * @reader: A #GAFlightRecordBatchReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The all data on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GArrowTable *
+gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
+                                      GError **error)
+{
+  auto flight_reader = gaflight_record_batch_reader_get_raw(reader);
+  std::shared_ptr<arrow::Table> arrow_table;
+  auto status = flight_reader->ReadAll(&arrow_table);
+  if (garrow::check(error, status, "[flight-record-batch-reader][read-all]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
 
 G_END_DECLS
 
 
+GAFlightCriteria *
+gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria)
+{
+  auto criteria = g_object_new(GAFLIGHT_TYPE_CRITERIA, NULL);
+  auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria);
+  priv->criteria = *flight_criteria;
+  priv->expression = g_bytes_new(priv->criteria.expression.data(),
+                                 priv->criteria.expression.size());
+  return GAFLIGHT_CRITERIA(criteria);
+}
+
 arrow::flight::Criteria *
 gaflight_criteria_get_raw(GAFlightCriteria *criteria)
 {
@@ -1139,6 +1378,17 @@ gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor)
   return &(priv->descriptor);
 }
 
+GAFlightTicket *
+gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket)
+{
+  auto ticket = g_object_new(GAFLIGHT_TYPE_TICKET, NULL);
+  auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket);
+  priv->ticket = *flight_ticket;
+  priv->data = g_bytes_new(priv->ticket.ticket.data(),
+                           priv->ticket.ticket.size());
+  return GAFLIGHT_TICKET(ticket);
+}
+
 arrow::flight::Ticket *
 gaflight_ticket_get_raw(GAFlightTicket *ticket)
 {
@@ -1192,3 +1442,26 @@ gaflight_info_get_raw(GAFlightInfo *info)
   auto priv = GAFLIGHT_INFO_GET_PRIVATE(info);
   return &(priv->info);
 }
+
+GAFlightStreamChunk *
+gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk)
+{
+  return GAFLIGHT_STREAM_CHUNK(
+    g_object_new(GAFLIGHT_TYPE_STREAM_CHUNK,
+                 "chunk", flight_chunk,
+                 NULL));
+}
+
+arrow::flight::FlightStreamChunk *
+gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk)
+{
+  auto priv = GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(chunk);
+  return &(priv->chunk);
+}
+
+arrow::flight::MetadataRecordBatchReader *
+gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(reader);
+  return priv->reader;
+}
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index 77f64e06123..368fb665b47 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -223,4 +223,46 @@ gint64
 gaflight_info_get_total_bytes(GAFlightInfo *info);
 
 
+#define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamChunk,
+                         gaflight_stream_chunk,
+                         GAFLIGHT,
+                         STREAM_CHUNK,
+                         GObject)
+struct _GAFlightStreamChunkClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GArrowRecordBatch *
+gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk);
+GARROW_AVAILABLE_IN_6_0
+GArrowBuffer *
+gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk);
+
+
+#define GAFLIGHT_TYPE_RECORD_BATCH_READER       \
+  (gaflight_record_batch_reader_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader,
+                         gaflight_record_batch_reader,
+                         GAFLIGHT,
+                         RECORD_BATCH_READER,
+                         GObject)
+struct _GAFlightRecordBatchReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GAFlightStreamChunk *
+gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
+                                       GError **error);
+
+GARROW_AVAILABLE_IN_6_0
+GArrowTable *
+gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
+                                      GError **error);
+
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
index c84bfe7d15c..d23f7c8867f 100644
--- a/c_glib/arrow-flight-glib/common.hpp
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -24,6 +24,8 @@
 #include <arrow-flight-glib/common.h>
 
 
+GAFlightCriteria *
+gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria);
 arrow::flight::Criteria *
 gaflight_criteria_get_raw(GAFlightCriteria *criteria);
 
@@ -36,6 +38,8 @@ gaflight_descriptor_new_raw(
 arrow::flight::FlightDescriptor *
 gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor);
 
+GAFlightTicket *
+gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket);
 arrow::flight::Ticket *
 gaflight_ticket_get_raw(GAFlightTicket *ticket);
 
@@ -49,3 +53,11 @@ GAFlightInfo *
 gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info);
 arrow::flight::FlightInfo *
 gaflight_info_get_raw(GAFlightInfo *info);
+
+GAFlightStreamChunk *
+gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk);
+arrow::flight::FlightStreamChunk *
+gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk);
+
+arrow::flight::MetadataRecordBatchReader *
+gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader);
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
index 8fad34926b4..e283b6d2688 100644
--- a/c_glib/arrow-flight-glib/server.cpp
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -19,7 +19,7 @@
 
 #include <arrow/util/make_unique.h>
 
-#include <arrow-glib/error.hpp>
+#include <arrow-glib/arrow-glib.hpp>
 
 #include <arrow-flight-glib/common.hpp>
 #include <arrow-flight-glib/server.hpp>
@@ -32,14 +32,223 @@ G_BEGIN_DECLS
  * @title: Server related classes
  * @include: arrow-flight-glib/arrow-flight-glib.h
  *
+ * #GAFlightDataStream is a class for producing a sequence of IPC
+ * payloads to be sent in `FlightData` protobuf messages. Generally,
+ * this is not used directly. Generally, #GAFlightRecordBatchStream is
+ * used instead.
+ *
+ * #GAFlightRecordBatchStream is a class for producing a sequence of
+ * IPC payloads to be sent in `FlightData` protobuf messages by
+ * #GArrowREcordBatchReader`.
+ *
  * #GAFlightServerOptions is a class for options of each server.
  *
+ * #GAFlightServerCallContext is a class for context of each server call.
+ *
  * #GAFlightServer is a class to develop an Apache Arrow Flight server.
  *
  * Since: 5.0.0
  */
 
 
+typedef struct GAFlightDataStreamPrivate_ {
+  arrow::flight::FlightDataStream *stream;
+} GAFlightDataStreamPrivate;
+
+enum {
+  PROP_STREAM = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDataStream,
+                           gaflight_data_stream,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_DATA_STREAM_GET_PRIVATE(obj)        \
+  static_cast<GAFlightDataStreamPrivate *>(          \
+    gaflight_data_stream_get_instance_private(       \
+      GAFLIGHT_DATA_STREAM(obj)))
+
+static void
+gaflight_data_stream_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(object);
+
+  delete priv->stream;
+
+  G_OBJECT_CLASS(gaflight_data_stream_parent_class)->finalize(object);
+}
+
+static void
+gaflight_data_stream_set_property(GObject *object,
+                                  guint prop_id,
+                                  const GValue *value,
+                                  GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_STREAM:
+    priv->stream = static_cast<arrow::flight::FlightDataStream *>(
+      g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_data_stream_init(GAFlightDataStream *object)
+{
+}
+
+static void
+gaflight_data_stream_class_init(GAFlightDataStreamClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_data_stream_finalize;
+  gobject_class->set_property = gaflight_data_stream_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer("stream",
+                              "Stream",
+                              "The raw arrow::flight::FlightDataStream *",
+                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
+                                                       G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_STREAM, spec);
+}
+
+
+typedef struct GAFlightRecordBatchStreamPrivate_ {
+  GArrowRecordBatchReader *reader;
+} GAFlightRecordBatchStreamPrivate;
+
+enum {
+  PROP_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchStream,
+                           gaflight_record_batch_stream,
+                           GAFLIGHT_TYPE_DATA_STREAM)
+
+#define GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(obj)        \
+  static_cast<GAFlightRecordBatchStreamPrivate *>(           \
+    gaflight_record_batch_stream_get_instance_private(       \
+      GAFLIGHT_RECORD_BATCH_STREAM(obj)))
+
+static void
+gaflight_record_batch_stream_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object);
+
+  if (priv->reader) {
+    g_object_unref(priv->reader);
+    priv->reader = NULL;
+  }
+
+  G_OBJECT_CLASS(gaflight_record_batch_stream_parent_class)->dispose(object);
+}
+
+static void
+gaflight_record_batch_stream_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_READER:
+    priv->reader = GARROW_RECORD_BATCH_READER(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_stream_get_property(GObject *object,
+                                          guint prop_id,
+                                          GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_READER:
+    g_value_set_object(value, priv->reader);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_record_batch_stream_init(GAFlightRecordBatchStream *object)
+{
+}
+
+static void
+gaflight_record_batch_stream_class_init(GAFlightRecordBatchStreamClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_record_batch_stream_dispose;
+  gobject_class->set_property = gaflight_record_batch_stream_set_property;
+  gobject_class->get_property = gaflight_record_batch_stream_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GAFlightRecordBatchStream:reader:
+   *
+   * The reader that produces record batches.
+   *
+   * Since: 6.0.0
+   */
+  spec = g_param_spec_object("reader",
+                             "Reader",
+                             "The reader that produces record batches",
+                             GARROW_TYPE_RECORD_BATCH_READER,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                      G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_READER, spec);
+}
+
+/**
+ * gaflight_record_batch_stream_new:
+ * @reader: A #GArrowRecordBatchReader to be read.
+ * @options: (nullable): A #GArrowWriteOptions for writing record batches to
+ *   a client.
+ *
+ * Returns: The newly created #GAFlightRecordBatchStream.
+ *
+ * Since: 6.0.0
+ */
+GAFlightRecordBatchStream *
+gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader,
+                                 GArrowWriteOptions *options)
+{
+  auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+  auto arrow_options_default = arrow::ipc::IpcWriteOptions::Defaults();
+  arrow::ipc::IpcWriteOptions *arrow_options = NULL;
+  if (options) {
+    arrow_options = garrow_write_options_get_raw(options);
+  } else {
+    arrow_options = &arrow_options_default;
+  }
+  auto stream = arrow::internal::make_unique<
+    arrow::flight::RecordBatchStream>(arrow_reader, *arrow_options);
+  return static_cast<GAFlightRecordBatchStream *>(
+    g_object_new(GAFLIGHT_TYPE_RECORD_BATCH_STREAM,
+                 "stream", stream.release(),
+                 "reader", reader,
+                 NULL));
+}
+
+
 typedef struct GAFlightServerOptionsPrivate_ {
   arrow::flight::FlightServerOptions options;
   GAFlightLocation *location;
@@ -225,20 +434,61 @@ gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass)
 
 G_END_DECLS
 namespace gaflight {
+  class DataStream : public arrow::flight::FlightDataStream {
+  public:
+    DataStream(GAFlightDataStream *gastream) :
+      arrow::flight::FlightDataStream(),
+      gastream_(gastream) {
+    }
+
+    ~DataStream() override {
+      g_object_unref(gastream_);
+    }
+
+    std::shared_ptr<arrow::Schema> schema() override {
+      auto stream = gaflight_data_stream_get_raw(gastream_);
+      return stream->schema();
+    }
+
+    arrow::Status GetSchemaPayload(
+      arrow::flight::FlightPayload *payload) override {
+      auto stream = gaflight_data_stream_get_raw(gastream_);
+      return stream->GetSchemaPayload(payload);
+    }
+
+    arrow::Status Next(arrow::flight::FlightPayload *payload) override {
+      auto stream = gaflight_data_stream_get_raw(gastream_);
+      return stream->Next(payload);
+    }
+
+  private:
+    GAFlightDataStream *gastream_;
+  };
+
   class Server : public arrow::flight::FlightServerBase {
   public:
     Server(GAFlightServer *gaserver) : gaserver_(gaserver) {
     }
 
-    arrow::Status ListFlights(
+    arrow::Status
+    ListFlights(
       const arrow::flight::ServerCallContext &context,
       const arrow::flight::Criteria *criteria,
       std::unique_ptr<arrow::flight::FlightListing> *listing) override {
+      auto gacontext = gaflight_server_call_context_new_raw(&context);
+      GAFlightCriteria *gacriteria = NULL;
+      if (criteria) {
+        gacriteria = gaflight_criteria_new_raw(criteria);
+      }
       GError *gerror = NULL;
       auto gaflights = gaflight_server_list_flights(gaserver_,
-                                                    NULL,
-                                                    NULL,
+                                                    gacontext,
+                                                    gacriteria,
                                                     &gerror);
+      if (gacriteria) {
+        g_object_unref(gacriteria);
+      }
+      g_object_unref(gacontext);
       if (gerror) {
         return garrow_error_to_status(gerror,
                                       arrow::StatusCode::UnknownError,
@@ -256,6 +506,28 @@ namespace gaflight {
       return arrow::Status::OK();
     }
 
+    arrow::Status DoGet(
+      const arrow::flight::ServerCallContext &context,
+      const arrow::flight::Ticket &ticket,
+      std::unique_ptr<arrow::flight::FlightDataStream> *stream) override {
+      auto gacontext = gaflight_server_call_context_new_raw(&context);
+      auto gaticket = gaflight_ticket_new_raw(&ticket);
+      GError *gerror = NULL;
+      auto gastream = gaflight_server_do_get(gaserver_,
+                                             gacontext,
+                                             gaticket,
+                                             &gerror);
+      g_object_unref(gaticket);
+      g_object_unref(gacontext);
+      if (gerror) {
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-server][do-get]");
+      }
+      *stream = arrow::internal::make_unique<DataStream>(gastream);
+      return arrow::Status::OK();
+    }
+
   private:
     GAFlightServer *gaserver_;
   };
@@ -343,7 +615,7 @@ gaflight_server_get_port(GAFlightServer *server)
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Shuts down the serve. This function can be called from signal
- * handler or another thread while gaflight_server_serve() blocks.
+ * handler or another thread.
  *
  * Returns: %TRUE on success, %FALSE on error.
  *
@@ -366,9 +638,6 @@ gaflight_server_shutdown(GAFlightServer *server,
  * @criteria: (nullable): A #GAFlightCriteria.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Shuts down the serve. This function can be called from signal
- * handler or another thread while gaflight_server_serve() blocks.
- *
  * Returns: (element-type GAFlightInfo) (transfer full):
  *   #GList of #GAFlightInfo on success, %NULL on error.
  *
@@ -391,10 +660,45 @@ gaflight_server_list_flights(GAFlightServer *server,
   return (*(klass->list_flights))(server, context, criteria, error);
 }
 
+/**
+ * gaflight_server_do_get:
+ * @server: A #GAFlightServer.
+ * @context: A #GAFlightServerCallContext.
+ * @ticket: A #GAFlightTicket.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): #GAFlightDataStream on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GAFlightDataStream *
+gaflight_server_do_get(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightTicket *ticket,
+                       GError **error)
+{
+  auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
+  if (!(klass && klass->do_get)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "not implemented");
+    return NULL;
+  }
+  return (*(klass->do_get))(server, context, ticket, error);
+}
+
 
 G_END_DECLS
 
 
+arrow::flight::FlightDataStream *
+gaflight_data_stream_get_raw(GAFlightDataStream *stream)
+{
+  auto priv = GAFLIGHT_DATA_STREAM_GET_PRIVATE(stream);
+  return priv->stream;
+}
+
 arrow::flight::FlightServerOptions *
 gaflight_server_options_get_raw(GAFlightServerOptions *options)
 {
@@ -402,6 +706,16 @@ gaflight_server_options_get_raw(GAFlightServerOptions *options)
   return &(priv->options);
 }
 
+GAFlightServerCallContext *
+gaflight_server_call_context_new_raw(
+  const arrow::flight::ServerCallContext *call_context)
+{
+  return GAFLIGHT_SERVER_CALL_CONTEXT(
+    g_object_new(GAFLIGHT_TYPE_SERVER_CALL_CONTEXT,
+                 "call-context", call_context,
+                 NULL));
+}
+
 arrow::flight::FlightServerBase *
 gaflight_server_get_raw(GAFlightServer *server)
 {
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
index f7431e8da68..107fe44bf77 100644
--- a/c_glib/arrow-flight-glib/server.h
+++ b/c_glib/arrow-flight-glib/server.h
@@ -24,6 +24,37 @@
 G_BEGIN_DECLS
 
 
+#define GAFLIGHT_TYPE_DATA_STREAM       \
+  (gaflight_data_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightDataStream,
+                         gaflight_data_stream,
+                         GAFLIGHT,
+                         DATA_STREAM,
+                         GObject)
+struct _GAFlightDataStreamClass
+{
+  GObjectClass parent_class;
+};
+
+
+#define GAFLIGHT_TYPE_RECORD_BATCH_STREAM       \
+  (gaflight_record_batch_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream,
+                         gaflight_record_batch_stream,
+                         GAFLIGHT,
+                         RECORD_BATCH_STREAM,
+                         GAFlightDataStream)
+struct _GAFlightRecordBatchStreamClass
+{
+  GAFlightDataStreamClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_6_0
+GAFlightRecordBatchStream *
+gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader,
+                                 GArrowWriteOptions *options);
+
+
 #define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerOptions,
                          gaflight_server_options,
@@ -59,6 +90,13 @@ G_DECLARE_DERIVABLE_TYPE(GAFlightServer,
                          GAFLIGHT,
                          SERVER,
                          GObject)
+/**
+ * GAFlightServerClass:
+ * @list_flights: A virtual function to implement `ListFlights` API.
+ * @do_get: A virtual function to implement `DoGet` API.
+ *
+ * Since: 5.0.0
+ */
 struct _GAFlightServerClass
 {
   GObjectClass parent_class;
@@ -67,6 +105,10 @@ struct _GAFlightServerClass
                          GAFlightServerCallContext *context,
                          GAFlightCriteria *criteria,
                          GError **error);
+  GAFlightDataStream *(*do_get)(GAFlightServer *server,
+                                GAFlightServerCallContext *context,
+                                GAFlightTicket *ticket,
+                                GError **error);
 };
 
 GARROW_AVAILABLE_IN_5_0
@@ -92,5 +134,11 @@ gaflight_server_list_flights(GAFlightServer *server,
                              GAFlightServerCallContext *context,
                              GAFlightCriteria *criteria,
                              GError **error);
+GARROW_AVAILABLE_IN_6_0
+GAFlightDataStream *
+gaflight_server_do_get(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightTicket *ticket,
+                       GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp
index 0103d14996d..f7f2a7aba1b 100644
--- a/c_glib/arrow-flight-glib/server.hpp
+++ b/c_glib/arrow-flight-glib/server.hpp
@@ -24,8 +24,15 @@
 #include <arrow-flight-glib/server.h>
 
 
+arrow::flight::FlightDataStream *
+gaflight_data_stream_get_raw(GAFlightDataStream *stream);
+
 arrow::flight::FlightServerOptions *
 gaflight_server_options_get_raw(GAFlightServerOptions *options);
 
+GAFlightServerCallContext *
+gaflight_server_call_context_new_raw(
+  const arrow::flight::ServerCallContext *flight_context);
+
 arrow::flight::FlightServerBase *
 gaflight_server_get_raw(GAFlightServer *server);
diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp
index c9ac8f5755c..c5ae035a7bb 100644
--- a/c_glib/arrow-glib/array-builder.cpp
+++ b/c_glib/arrow-glib/array-builder.cpp
@@ -6142,9 +6142,9 @@ garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder,
       break;
     case arrow::Type::type::DICTIONARY:
       {
-        const auto& dict_type =
-          arrow::internal::checked_cast<arrow::DictionaryType&>(*arrow_builder->type());
-        switch (dict_type.value_type()->id()) {
+        auto dict_type =
+          std::static_pointer_cast<arrow::DictionaryType>(arrow_builder->type());
+        switch (dict_type->value_type()->id()) {
           case arrow::Type::type::BINARY:
             type = GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER;
             break;
diff --git a/c_glib/arrow-glib/datum.cpp b/c_glib/arrow-glib/datum.cpp
index 8f37719a3a9..66993d6c229 100644
--- a/c_glib/arrow-glib/datum.cpp
+++ b/c_glib/arrow-glib/datum.cpp
@@ -163,7 +163,7 @@ garrow_datum_is_scalar(GArrowDatum *datum)
  * garrow_datum_is_value:
  * @datum: A #GArrowDatum.
  *
- * Returns: %TRUE if the datum holds a #GArrowArray, #GChunkedArray or
+ * Returns: %TRUE if the datum holds a #GArrowArray, #GArrowChunkedArray or
  *   #GArrowScalar, %FALSE otherwise.
  *
  * Since: 5.0.0
diff --git a/c_glib/arrow-glib/error.cpp b/c_glib/arrow-glib/error.cpp
index 9502d114e88..ac61ddc499a 100644
--- a/c_glib/arrow-glib/error.cpp
+++ b/c_glib/arrow-glib/error.cpp
@@ -135,8 +135,8 @@ garrow_error_to_status(GError *error,
   message << context << ": " << g_quark_to_string(error->domain);
   message << "(" << error->code << "): ";
   message << error->message;
-  g_error_free(error);
   auto code = garrow_error_to_status_code(error, default_code);
+  g_error_free(error);
   return arrow::Status(code, message.str());
 }
 
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 762d0c30faf..ca580e8dcf3 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -144,6 +144,42 @@ garrow_record_batch_reader_class_init(GArrowRecordBatchReaderClass *klass)
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER, spec);
 }
 
+/**
+ * garrow_record_batch_reader_new:
+ * @record_batches: (element-type GArrowRecordBatch):
+ *   A list of #GArrowRecordBatch.
+ * @schema: (nullable): A #GArrowSchema to confirm to.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: The schema in the stream on success, %NULL on error.
+ *
+ * Since: 6.0.0
+ */
+GArrowRecordBatchReader *
+garrow_record_batch_reader_new(GList *record_batches,
+                               GArrowSchema *schema,
+                               GError **error)
+{
+  std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
+  for (auto node = record_batches; node; node = node->next) {
+    auto record_batch = GARROW_RECORD_BATCH(node->data);
+    arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch));
+  }
+  std::shared_ptr<arrow::Schema> arrow_schema;
+  if (schema) {
+    arrow_schema = garrow_schema_get_raw(schema);
+  }
+  auto arrow_reader_result =
+    arrow::RecordBatchReader::Make(arrow_record_batches, arrow_schema);
+  if (garrow::check(error,
+                    arrow_reader_result,
+                    "[record-batch-stream-reader][new]")) {
+    return garrow_record_batch_reader_new_raw(&*arrow_reader_result);
+  } else {
+    return NULL;
+  }
+}
+
 /**
  * garrow_record_batch_reader_get_schema:
  * @reader: A #GArrowRecordBatchReader.
@@ -231,6 +267,33 @@ garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader,
   }
 }
 
+/**
+ * garrow_record_batch_reader_read_all:
+ * @reader: A #GArrowRecordBatchReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The all record batches in the stream as #GArrowTable.
+ *
+ * Since: 6.0.0
+ */
+GArrowTable *
+garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader,
+                                    GError **error)
+{
+  auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
+  std::shared_ptr<arrow::Table> arrow_table;
+  auto status = arrow_reader->ReadAll(&arrow_table);
+
+  if (garrow::check(error,
+                    status,
+                    "[record-batch-reader][read-all]")) {
+    return garrow_table_new_raw(&arrow_table);
+  } else {
+    return NULL;
+  }
+}
+
 
 G_DEFINE_TYPE(GArrowTableBatchReader,
               garrow_table_batch_reader,
@@ -2077,13 +2140,13 @@ garrow_json_reader_read(GArrowJSONReader *reader,
 G_END_DECLS
 
 GArrowRecordBatchReader *
-garrow_record_batch_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchReader> *arrow_reader)
+garrow_record_batch_reader_new_raw(
+  std::shared_ptr<arrow::RecordBatchReader> *arrow_reader)
 {
-  auto reader =
-    GARROW_RECORD_BATCH_READER(g_object_new(GARROW_TYPE_RECORD_BATCH_READER,
-                                            "record-batch-reader", arrow_reader,
-                                            NULL));
-  return reader;
+  return GARROW_RECORD_BATCH_READER(
+    g_object_new(GARROW_TYPE_RECORD_BATCH_READER,
+                 "record-batch-reader", arrow_reader,
+                 NULL));
 }
 
 std::shared_ptr<arrow::ipc::RecordBatchReader>
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 2628a7292ee..563b0cf227a 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -41,6 +41,12 @@ struct _GArrowRecordBatchReaderClass
   GObjectClass parent_class;
 };
 
+GARROW_AVAILABLE_IN_6_0
+GArrowRecordBatchReader *
+garrow_record_batch_reader_new(GList *record_batches,
+                               GArrowSchema *schema,
+                               GError **error);
+
 GArrowSchema *garrow_record_batch_reader_get_schema(
   GArrowRecordBatchReader *reader);
 #ifndef GARROW_DISABLE_DEPRECATED
@@ -58,7 +64,10 @@ GArrowRecordBatch *garrow_record_batch_reader_read_next_record_batch(
 GArrowRecordBatch *garrow_record_batch_reader_read_next(
   GArrowRecordBatchReader *reader,
   GError **error);
-
+GARROW_AVAILABLE_IN_6_0
+GArrowTable *
+garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader,
+                                    GError **error);
 
 #define GARROW_TYPE_TABLE_BATCH_READER (garrow_table_batch_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTableBatchReader,
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index 3dc2f702c5e..987764f556a 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -110,6 +110,15 @@
 #  define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
 #endif
 
+/**
+ * GARROW_VERSION_6_0:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 6.0.0
+ */
+#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6 , 0)
+
 /**
  * GARROW_VERSION_5_0:
  *
@@ -265,6 +274,20 @@
 
 #define GARROW_AVAILABLE_IN_ALL
 
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_6_0
+#  define GARROW_DEPRECATED_IN_6_0                GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_6_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_6_0
+#  define GARROW_DEPRECATED_IN_6_0_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_6_0
+#  define GARROW_AVAILABLE_IN_6_0 GARROW_UNAVAILABLE(6, 0)
+#else
+#  define GARROW_AVAILABLE_IN_6_0
+#endif
+
 #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0
 #  define GARROW_DEPRECATED_IN_5_0                GARROW_DEPRECATED
 #  define GARROW_DEPRECATED_IN_5_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
diff --git a/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
index d1850185692..397a8bec0d0 100644
--- a/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
+++ b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
@@ -55,6 +55,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-6-0-0" role="6.0.0">
+    <title>Index of new symbols in 6.0.0</title>
+    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-5-0-0" role="5.0.0">
     <title>Index of new symbols in 5.0.0</title>
     <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
index f3fca0116de..f6660a4ca49 100644
--- a/c_glib/test/flight/test-client.rb
+++ b/c_glib/test/flight/test-client.rb
@@ -22,7 +22,7 @@ def setup
     @server = nil
     omit("Arrow Flight is required") unless defined?(ArrowFlight)
     omit("Unstable on Windows") if Gem.win_platform?
-    require_gi_bindings(3, 4, 5)
+    require_gi_bindings(3, 4, 7)
     @server = Helper::FlightServer.new
     host = "127.0.0.1"
     location = ArrowFlight::Location.new("grpc://#{host}:0")
@@ -42,4 +42,23 @@ def test_list_flights
     assert_equal([generator.page_view],
                  client.list_flights)
   end
+
+  sub_test_case("#do_get") do
+    def test_success
+      client = ArrowFlight::Client.new(@location)
+      info = client.list_flights.first
+      endpoint = info.endpoints.first
+      generator = Helper::FlightInfoGenerator.new
+      reader = client.do_get(endpoint.ticket)
+      assert_equal(generator.page_view_table,
+                   reader.read_all)
+    end
+
+    def test_error
+      client = ArrowFlight::Client.new(@location)
+      assert_raise(Arrow::Error::Invalid) do
+        client.do_get(ArrowFlight::Ticket.new("invalid"))
+      end
+    end
+  end
 end
diff --git a/c_glib/test/flight/test-stream-reader.rb b/c_glib/test/flight/test-stream-reader.rb
new file mode 100644
index 00000000000..f2e6229b0b3
--- /dev/null
+++ b/c_glib/test/flight/test-stream-reader.rb
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFlightStreamReader < Test::Unit::TestCase
+  include Helper::Omittable
+
+  def setup
+    @server = nil
+    omit("Arrow Flight is required") unless defined?(ArrowFlight)
+    omit("Unstable on Windows") if Gem.win_platform?
+    require_gi_bindings(3, 4, 5)
+    @server = Helper::FlightServer.new
+    host = "127.0.0.1"
+    location = ArrowFlight::Location.new("grpc://#{host}:0")
+    options = ArrowFlight::ServerOptions.new(location)
+    @server.listen(options)
+    location = ArrowFlight::Location.new("grpc://#{host}:#{@server.port}")
+    client = ArrowFlight::Client.new(location)
+    @generator = Helper::FlightInfoGenerator.new
+    @reader = client.do_get(@generator.page_view_ticket)
+  end
+
+  def teardown
+    return if @server.nil?
+    @server.shutdown
+  end
+
+  def test_read_next
+    chunks = []
+    loop do
+      chunk = @reader.read_next
+      break if chunk.nil?
+      chunks << chunk
+    end
+    chunks_content = chunks.collect do |chunk|
+      [
+        chunk.data,
+        chunk.metadata&.data&.to_s,
+      ]
+    end
+    table_batch_reader = Arrow::TableBatchReader.new(@generator.page_view_table)
+    assert_equal([
+                   [
+                     table_batch_reader.read_next,
+                     nil,
+                   ],
+                 ],
+                 chunks_content)
+  end
+
+  def test_read_all
+    assert_equal(@generator.page_view_table,
+                 @reader.read_all)
+  end
+end
diff --git a/c_glib/test/helper/flight-info-generator.rb b/c_glib/test/helper/flight-info-generator.rb
index ad48eda2b6f..c57530879cb 100644
--- a/c_glib/test/helper/flight-info-generator.rb
+++ b/c_glib/test/helper/flight-info-generator.rb
@@ -34,14 +34,17 @@ def page_view_descriptor
       ArrowFlight::PathDescriptor.new(["page-view"])
     end
 
+    def page_view_ticket
+      ArrowFlight::Ticket.new("page-view")
+    end
+
     def page_view_endpoints
       locations = [
         ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"),
         ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"),
       ]
       [
-        ArrowFlight::Endpoint.new(ArrowFlight::Ticket.new("page-view"),
-                                  locations),
+        ArrowFlight::Endpoint.new(page_view_ticket, locations),
       ]
     end
 
diff --git a/c_glib/test/helper/flight-server.rb b/c_glib/test/helper/flight-server.rb
index a6bcd9ec233..89fd13b4211 100644
--- a/c_glib/test/helper/flight-server.rb
+++ b/c_glib/test/helper/flight-server.rb
@@ -26,5 +26,15 @@ def virtual_do_list_flights(context, criteria)
       generator = FlightInfoGenerator.new
       [generator.page_view]
     end
+
+    def virtual_do_do_get(context, ticket)
+      generator = FlightInfoGenerator.new
+      unless ticket == generator.page_view_ticket
+        raise Arrow::Error::Invalid.new("invalid ticket")
+      end
+      table = generator.page_view_table
+      reader = Arrow::TableBatchReader.new(table)
+      ArrowFlight::RecordBatchStream.new(reader)
+    end
   end
 end
diff --git a/c_glib/test/test-record-batch-reader.rb b/c_glib/test/test-record-batch-reader.rb
new file mode 100644
index 00000000000..a41da65fd76
--- /dev/null
+++ b/c_glib/test/test-record-batch-reader.rb
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestRecordBatchReader <Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    fields = [
+      Arrow::Field.new("visible", Arrow::BooleanDataType.new),
+      Arrow::Field.new("point", Arrow::Int32DataType.new),
+    ]
+    @schema = Arrow::Schema.new(fields)
+    @record_batches = [
+      [
+        build_boolean_array([true, false, true]),
+        build_int32_array([1, 2, 3]),
+      ],
+      [
+        build_boolean_array([false, true, false, true]),
+        build_int32_array([-1, -2, -3, -4]),
+      ]
+    ].collect do |columns|
+      Arrow::RecordBatch.new(@schema, columns[0].length, columns)
+    end
+    @reader = Arrow::RecordBatchReader.new(@record_batches, @schema)
+  end
+
+  def test_schema
+    assert_equal(@schema, @reader.schema)
+  end
+
+  def test_read_next
+    assert_equal(@record_batches[0], @reader.read_next)
+    assert_equal(@record_batches[1], @reader.read_next)
+    assert_nil(@reader.read_next)
+  end
+
+  def test_read_all
+    assert_equal(Arrow::Table.new(@schema, @record_batches),
+                 @reader.read_all)
+  end
+end
diff --git a/ruby/red-arrow-flight/test/helper/info-generator.rb b/ruby/red-arrow-flight/test/helper/info-generator.rb
index ef931ebbab4..e5430fd6b5e 100644
--- a/ruby/red-arrow-flight/test/helper/info-generator.rb
+++ b/ruby/red-arrow-flight/test/helper/info-generator.rb
@@ -27,13 +27,17 @@ def page_view_descriptor
       ArrowFlight::PathDescriptor.new(["page-view"])
     end
 
+    def page_view_ticket
+      "page-view"
+    end
+
     def page_view_endpoints
       locations = [
         ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10000"),
         ArrowFlight::Location.new("grpc+tcp://127.0.0.1:10001"),
       ]
       [
-        ArrowFlight::Endpoint.new("page-view", locations),
+        ArrowFlight::Endpoint.new(page_view_ticket, locations),
       ]
     end
 
diff --git a/ruby/red-arrow-flight/test/helper/server.rb b/ruby/red-arrow-flight/test/helper/server.rb
index a884a527b48..269bb5f3d78 100644
--- a/ruby/red-arrow-flight/test/helper/server.rb
+++ b/ruby/red-arrow-flight/test/helper/server.rb
@@ -26,5 +26,14 @@ def virtual_do_list_flights(context, criteria)
       generator = InfoGenerator.new
       [generator.page_view]
     end
+
+    def virtual_do_do_get(context, ticket)
+      generator = InfoGenerator.new
+      if ticket.data.to_s != generator.page_view_ticket
+        raise Arrow::Error::Invalid.new("invalid ticket")
+      end
+      table = generator.page_view_table
+      ArrowFlight::RecordBatchStream.new(table)
+    end
   end
 end
diff --git a/ruby/red-arrow-flight/test/test-client.rb b/ruby/red-arrow-flight/test/test-client.rb
index 50d5d57a63f..850d6f45790 100644
--- a/ruby/red-arrow-flight/test/test-client.rb
+++ b/ruby/red-arrow-flight/test/test-client.rb
@@ -35,4 +35,12 @@ def test_list_flights
     assert_equal([generator.page_view],
                  client.list_flights)
   end
+
+  def test_do_get
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    reader = client.do_get(generator.page_view_ticket)
+    assert_equal(generator.page_view_table,
+                 reader.read_all)
+  end
 end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 7af00ffa441..be0aee7b5dc 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -81,6 +81,7 @@ def require_libraries
       require "arrow/record-batch-builder"
       require "arrow/record-batch-file-reader"
       require "arrow/record-batch-iterator"
+      require "arrow/record-batch-reader"
       require "arrow/record-batch-stream-reader"
       require "arrow/rolling-window"
       require "arrow/scalar"
diff --git a/ruby/red-arrow/lib/arrow/record-batch-reader.rb b/ruby/red-arrow/lib/arrow/record-batch-reader.rb
new file mode 100644
index 00000000000..e030e4f3ba8
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/record-batch-reader.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class RecordBatchReader
+    class << self
+      # @api private
+      def try_convert(value)
+        case value
+        when ::Array
+          return nil if value.empty?
+          if value.all? {|v| v.is_a?(RecordBatch)}
+            new(value)
+          else
+            nil
+          end
+        when RecordBatch
+          new([value])
+        when Table
+          TableBatchReader.new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index 562e474cda3..f75a51d8653 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -48,7 +48,7 @@ Gem::Specification.new do |spec|
 
   spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
   spec.add_runtime_dependency("extpp", ">= 0.0.7")
-  spec.add_runtime_dependency("gio2", ">= 3.4.5")
+  spec.add_runtime_dependency("gio2", ">= 3.4.7")
   spec.add_runtime_dependency("native-package-installer")
   spec.add_runtime_dependency("pkg-config")
 
diff --git a/ruby/red-arrow/test/test-record-batch-reader.rb b/ruby/red-arrow/test/test-record-batch-reader.rb
new file mode 100644
index 00000000000..1becdf5b63b
--- /dev/null
+++ b/ruby/red-arrow/test/test-record-batch-reader.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestRecordBatchReader < Test::Unit::TestCase
+  sub_test_case(".try_convert") do
+    test("Arrow::RecordBatch") do
+      record_batch =
+        Arrow::RecordBatch.new("count" => [1, 2, 3],
+                               "private" => [true, false, true])
+      reader = Arrow::RecordBatchReader.try_convert(record_batch)
+      assert_equal(record_batch,
+                   reader.read_next)
+    end
+
+    test("[Arrow::RecordBatch]") do
+      record_batch =
+        Arrow::RecordBatch.new("count" => [1, 2, 3],
+                               "private" => [true, false, true])
+      reader = Arrow::RecordBatchReader.try_convert([record_batch])
+      assert_equal(record_batch,
+                   reader.read_next)
+    end
+
+    test("Arrow::Table") do
+      table = Arrow::Table.new("count" => [1, 2, 3],
+                               "private" => [true, false, true])
+      reader = Arrow::RecordBatchReader.try_convert(table)
+      assert_equal(table,
+                   reader.read_all)
+    end
+  end
+end

From 0c39d027bbaf96ae08ee89e53b59118035bbf674 Mon Sep 17 00:00:00 2001
From: Rommel Quintanilla <rommel.quintanilla.c@gmail.com>
Date: Fri, 30 Jul 2021 20:30:01 -0400
Subject: [PATCH 674/719] ARROW-4700: [C++] Added support for decimal128 and
 decimal256 json converted

Changes:
- Added support for decimal128 and decimal256 json converted.
- Added unit tests for converting decimal128 and decimal256. (Note: This is not providing inferring support)

Closes #10843 from rommelDB/4700-add-decimal-support-for-json-reading

Authored-by: Rommel Quintanilla <rommel.quintanilla.c@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/json/converter.cc      |  39 +++++
 cpp/src/arrow/json/converter_test.cc | 214 ++++++++++++++++++++-------
 cpp/src/arrow/json/parser.cc         |   2 +
 cpp/src/arrow/json/test_common.h     |  19 ++-
 4 files changed, 218 insertions(+), 56 deletions(-)

diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc
index fe9500d40ca..a2f584c0b7f 100644
--- a/cpp/src/arrow/json/converter.cc
+++ b/cpp/src/arrow/json/converter.cc
@@ -22,11 +22,13 @@
 
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_decimal.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/array/builder_time.h"
 #include "arrow/json/parser.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string_view.h"
 #include "arrow/util/value_parsing.h"
@@ -147,6 +149,41 @@ class NumericConverter : public PrimitiveConverter {
   const T& numeric_type_;
 };
 
+template <typename T>
+class DecimalConverter : public PrimitiveConverter {
+ public:
+  using value_type = typename TypeTraits<T>::BuilderType::ValueType;
+
+  DecimalConverter(MemoryPool* pool, const std::shared_ptr<DataType>& type)
+      : PrimitiveConverter(pool, type) {}
+
+  Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+    if (in->type_id() == Type::NA) {
+      return MakeArrayOfNull(out_type_, in->length(), pool_).Value(out);
+    }
+    const auto& dict_array = GetDictionaryArray(in);
+
+    using Builder = typename TypeTraits<T>::BuilderType;
+    Builder builder(out_type_, pool_);
+    RETURN_NOT_OK(builder.Resize(dict_array.indices()->length()));
+
+    auto visit_valid = [&builder](string_view repr) {
+      ARROW_ASSIGN_OR_RAISE(value_type value,
+                            TypeTraits<T>::BuilderType::ValueType::FromString(repr));
+      builder.UnsafeAppend(value);
+      return Status::OK();
+    };
+
+    auto visit_null = [&builder]() {
+      builder.UnsafeAppendNull();
+      return Status::OK();
+    };
+
+    RETURN_NOT_OK(VisitDictionaryEntries(dict_array, visit_valid, visit_null));
+    return builder.Finish(out);
+  }
+};
+
 template <typename DateTimeType>
 class DateTimeConverter : public PrimitiveConverter {
  public:
@@ -250,6 +287,8 @@ Status MakeConverter(const std::shared_ptr<DataType>& out_type, MemoryPool* pool
     CONVERTER_CASE(Type::STRING, BinaryConverter<StringType>);
     CONVERTER_CASE(Type::LARGE_BINARY, BinaryConverter<LargeBinaryType>);
     CONVERTER_CASE(Type::LARGE_STRING, BinaryConverter<LargeStringType>);
+    CONVERTER_CASE(Type::DECIMAL128, DecimalConverter<Decimal128Type>);
+    CONVERTER_CASE(Type::DECIMAL256, DecimalConverter<Decimal256Type>);
     default:
       return Status::NotImplemented("JSON conversion to ", *out_type,
                                     " is not supported");
diff --git a/cpp/src/arrow/json/converter_test.cc b/cpp/src/arrow/json/converter_test.cc
index 6d787db0bbd..030f2a7bc13 100644
--- a/cpp/src/arrow/json/converter_test.cc
+++ b/cpp/src/arrow/json/converter_test.cc
@@ -17,83 +17,197 @@
 
 #include "arrow/json/converter.h"
 
-#include <string>
-
 #include <gtest/gtest.h>
 
+#include <string>
+
 #include "arrow/json/options.h"
 #include "arrow/json/test_common.h"
 
 namespace arrow {
 namespace json {
 
-using util::string_view;
-
-void AssertConvert(const std::shared_ptr<DataType>& expected_type,
-                   const std::string& expected_json,
-                   const std::string& unconverted_json) {
-  // make an unconverted array
-  auto scalar_values = ArrayFromJSON(utf8(), unconverted_json);
-  Int32Builder indices_builder;
-  ASSERT_OK(indices_builder.Resize(scalar_values->length()));
-  for (int i = 0; i < scalar_values->length(); ++i) {
-    if (scalar_values->IsNull(i)) {
-      indices_builder.UnsafeAppendNull();
-    } else {
-      indices_builder.UnsafeAppend(i);
-    }
-  }
-  std::shared_ptr<Array> indices, unconverted, converted;
-  ASSERT_OK(indices_builder.Finish(&indices));
-
-  auto unconverted_type = dictionary(int32(), scalar_values->type());
-  unconverted =
-      std::make_shared<DictionaryArray>(unconverted_type, indices, scalar_values);
-
+Result<std::shared_ptr<Array>> Convert(std::shared_ptr<DataType> type,
+                                       std::shared_ptr<Array> unconverted) {
+  std::shared_ptr<Array> converted;
   // convert the array
   std::shared_ptr<Converter> converter;
-  ASSERT_OK(MakeConverter(expected_type, default_memory_pool(), &converter));
-  ASSERT_OK(converter->Convert(unconverted, &converted));
-  ASSERT_OK(converted->ValidateFull());
-
-  // assert equality
-  auto expected = ArrayFromJSON(expected_type, expected_json);
-  AssertArraysEqual(*expected, *converted);
+  RETURN_NOT_OK(MakeConverter(type, default_memory_pool(), &converter));
+  RETURN_NOT_OK(converter->Convert(unconverted, &converted));
+  RETURN_NOT_OK(converted->ValidateFull());
+  return converted;
 }
 
 // bool, null are trivial pass throughs
 
 TEST(ConverterTest, Integers) {
-  for (auto expected_type : {uint8(), uint16(), uint32(), uint64()}) {
-    AssertConvert(expected_type, "[0, null, 1, 32, 45, 12, 64, 124]",
-                  R"(["0", null, "1", "32", "45", "12", "64", "124"])");
+  for (auto int_type : {int8(), int16(), int32(), int64()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", int_type)});
+
+    std::string json_source = R"(
+    {"" : -0}
+    {"" : null}
+    {"" : -1}
+    {"" : 32}
+    {"" : -45}
+    {"" : 12}
+    {"" : -64}
+    {"" : 124}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(int_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(int_type, R"([
+          -0, null, -1, 32, -45, 12, -64, 124])");
+
+    AssertArraysEqual(*expected, *converted);
   }
-  for (auto expected_type : {int8(), int16(), int32(), int64()}) {
-    AssertConvert(expected_type, "[0, null, -1, 32, -45, 12, -64, 124]",
-                  R"(["-0", null, "-1", "32", "-45", "12", "-64", "124"])");
+}
+
+TEST(ConverterTest, UnsignedIntegers) {
+  for (auto uint_type : {uint8(), uint16(), uint32(), uint64()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", uint_type)});
+
+    std::string json_source = R"(
+    {"" : 0}
+    {"" : null}
+    {"" : 1}
+    {"" : 32}
+    {"" : 45}
+    {"" : 12}
+    {"" : 64}
+    {"" : 124}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(uint_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(uint_type, R"([
+          0, null, 1, 32, 45, 12, 64, 124])");
+
+    AssertArraysEqual(*expected, *converted);
   }
 }
 
 TEST(ConverterTest, Floats) {
-  for (auto expected_type : {float32(), float64()}) {
-    AssertConvert(expected_type, "[0, -0.0, null, 32.0, 1e5]",
-                  R"(["0", "-0.0", null, "32.0", "1e5"])");
+  for (auto float_type : {float32(), float64()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", float_type)});
+
+    std::string json_source = R"(
+    {"" : 0}
+    {"" : -0.0}
+    {"" : null}
+    {"" : 32.0}
+    {"" : 1e5}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(float_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(float_type, R"([
+          0, -0.0, null, 32.0, 1e5])");
+
+    AssertArraysEqual(*expected, *converted);
   }
 }
 
-TEST(ConverterTest, String) {
-  std::string src = R"(["a", "b c", null, "d e f", "g"])";
-  AssertConvert(utf8(), src, src);
-}
+TEST(ConverterTest, StringAndLargeString) {
+  for (auto string_type : {utf8(), large_utf8()}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", string_type)});
+
+    std::string json_source = R"(
+    {"" : "a"}
+    {"" : "b c"}
+    {"" : null}
+    {"" : "d e f"}
+    {"" : "g"}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(string_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(string_type, R"([
+          "a", "b c", null, "d e f", "g"])");
 
-TEST(ConverterTest, LargeString) {
-  std::string src = R"(["a", "b c", null, "d e f", "g"])";
-  AssertConvert(large_utf8(), src, src);
+    AssertArraysEqual(*expected, *converted);
+  }
 }
 
 TEST(ConverterTest, Timestamp) {
-  std::string src = R"([null, "1970-01-01", "2018-11-13 17:11:10"])";
-  AssertConvert(timestamp(TimeUnit::SECOND), src, src);
+  auto timestamp_type = timestamp(TimeUnit::SECOND);
+
+  ParseOptions options;
+  options.explicit_schema = schema({field("", timestamp_type)});
+
+  std::string json_source = R"(
+    {"" : null}
+    {"" : "1970-01-01"}
+    {"" : "2018-11-13 17:11:10"}
+  )";
+
+  std::shared_ptr<StructArray> parse_array;
+  ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+  // call to convert
+  ASSERT_OK_AND_ASSIGN(auto converted,
+                       Convert(timestamp_type, parse_array->GetFieldByName("")));
+
+  // assert equality
+  auto expected = ArrayFromJSON(timestamp_type, R"([
+          null, "1970-01-01", "2018-11-13 17:11:10"])");
+
+  AssertArraysEqual(*expected, *converted);
+}
+
+TEST(ConverterTest, Decimal128And256) {
+  for (auto decimal_type : {decimal128(38, 10), decimal256(38, 10)}) {
+    ParseOptions options;
+    options.explicit_schema = schema({field("", decimal_type)});
+
+    std::string json_source = R"(
+    {"" : "02.0000000000"}
+    {"" : "30.0000000000"}
+  )";
+
+    std::shared_ptr<StructArray> parse_array;
+    ASSERT_OK(ParseFromString(options, json_source, &parse_array));
+
+    // call to convert
+    ASSERT_OK_AND_ASSIGN(auto converted,
+                         Convert(decimal_type, parse_array->GetFieldByName("")));
+
+    // assert equality
+    auto expected = ArrayFromJSON(decimal_type, R"([
+          "02.0000000000",
+          "30.0000000000"])");
+
+    AssertArraysEqual(*expected, *converted);
+  }
 }
 
 }  // namespace json
diff --git a/cpp/src/arrow/json/parser.cc b/cpp/src/arrow/json/parser.cc
index 05f155645a6..62e1cd7fc41 100644
--- a/cpp/src/arrow/json/parser.cc
+++ b/cpp/src/arrow/json/parser.cc
@@ -102,6 +102,8 @@ Status Kind::ForType(const DataType& type, Kind::type* kind) {
     Status Visit(const TimeType&) { return SetKind(Kind::kNumber); }
     Status Visit(const DateType&) { return SetKind(Kind::kNumber); }
     Status Visit(const BinaryType&) { return SetKind(Kind::kString); }
+    Status Visit(const LargeBinaryType&) { return SetKind(Kind::kString); }
+    Status Visit(const TimestampType&) { return SetKind(Kind::kString); }
     Status Visit(const FixedSizeBinaryType&) { return SetKind(Kind::kString); }
     Status Visit(const DictionaryType& dict_type) {
       return Kind::ForType(*dict_type.value_type(), kind_);
diff --git a/cpp/src/arrow/json/test_common.h b/cpp/src/arrow/json/test_common.h
index 618b16ae424..1a1a3bd85d2 100644
--- a/cpp/src/arrow/json/test_common.h
+++ b/cpp/src/arrow/json/test_common.h
@@ -24,21 +24,20 @@
 #include <utility>
 #include <vector>
 
-#include "arrow/json/rapidjson_defs.h"
-#include "rapidjson/document.h"
-#include "rapidjson/prettywriter.h"
-#include "rapidjson/reader.h"
-#include "rapidjson/writer.h"
-
 #include "arrow/io/memory.h"
 #include "arrow/json/converter.h"
 #include "arrow/json/options.h"
 #include "arrow/json/parser.h"
+#include "arrow/json/rapidjson_defs.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/string_view.h"
 #include "arrow/visitor_inline.h"
+#include "rapidjson/document.h"
+#include "rapidjson/prettywriter.h"
+#include "rapidjson/reader.h"
+#include "rapidjson/writer.h"
 
 namespace arrow {
 
@@ -192,6 +191,14 @@ inline static Status ParseFromString(ParseOptions options, string_view src_str,
   return parser->Finish(parsed);
 }
 
+inline static Status ParseFromString(ParseOptions options, string_view src_str,
+                                     std::shared_ptr<StructArray>* parsed) {
+  std::shared_ptr<Array> parsed_non_struct;
+  RETURN_NOT_OK(ParseFromString(options, src_str, &parsed_non_struct));
+  *parsed = internal::checked_pointer_cast<StructArray>(parsed_non_struct);
+  return Status::OK();
+}
+
 static inline std::string PrettyPrint(string_view one_line) {
   rj::Document document;
 

From aca6ec979ca1a79ef8009bcbdf7e178be315a10a Mon Sep 17 00:00:00 2001
From: niranda perera <niranda.perera@gmail.com>
Date: Fri, 30 Jul 2021 21:20:32 -0400
Subject: [PATCH 675/719] ARROW-13493  [C++] Anonymous structs in an anonymous
 union are a GNU extension

Closes #10833 from nirandaperera/ARROW-13493

Authored-by: niranda perera <niranda.perera@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/util/bitmap_reader.h | 20 ++++++++++----------
 cpp/src/arrow/util/bitmap_writer.h | 24 ++++++++++++------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h
index b05c7226ddb..55d92d15ce0 100644
--- a/cpp/src/arrow/util/bitmap_reader.h
+++ b/cpp/src/arrow/util/bitmap_reader.h
@@ -163,16 +163,16 @@ class BitmapWordReader {
     trailing_bytes_ = static_cast<int>(BitUtil::BytesForBits(trailing_bits_));
 
     if (nwords_ > 0) {
-      current_word_ = load<Word>(bitmap_);
+      current_data.word_ = load<Word>(bitmap_);
     } else if (length > 0) {
-      current_byte_ = load<uint8_t>(bitmap_);
+      current_data.epi.byte_ = load<uint8_t>(bitmap_);
     }
   }
 
   Word NextWord() {
     bitmap_ += sizeof(Word);
     const Word next_word = load<Word>(bitmap_);
-    Word word = current_word_;
+    Word word = current_data.word_;
     if (may_have_byte_offset && offset_) {
       // combine two adjacent words into one word
       // |<------ next ----->|<---- current ---->|
@@ -188,7 +188,7 @@ class BitmapWordReader {
       word >>= offset_;
       word |= next_word << (sizeof(Word) * 8 - offset_);
     }
-    current_word_ = next_word;
+    current_data.word_ = next_word;
     return word;
   }
 
@@ -213,12 +213,12 @@ class BitmapWordReader {
     } else {
       ++bitmap_;
       const uint8_t next_byte = load<uint8_t>(bitmap_);
-      byte = current_byte_;
+      byte = current_data.epi.byte_;
       if (may_have_byte_offset && offset_) {
         byte >>= offset_;
         byte |= next_byte << (8 - offset_);
       }
-      current_byte_ = next_byte;
+      current_data.epi.byte_ = next_byte;
       trailing_bits_ -= 8;
       trailing_bytes_--;
       valid_bits = 8;
@@ -238,14 +238,14 @@ class BitmapWordReader {
   int trailing_bits_;
   int trailing_bytes_;
   union {
-    Word current_word_;
+    Word word_;
     struct {
 #if ARROW_LITTLE_ENDIAN == 0
       uint8_t padding_bytes_[sizeof(Word) - 1];
 #endif
-      uint8_t current_byte_;
-    };
-  };
+      uint8_t byte_;
+    } epi;
+  } current_data;
 
   template <typename DType>
   DType load(const uint8_t* bitmap) {
diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h
index d5c6d909df0..1df1baa0f2e 100644
--- a/cpp/src/arrow/util/bitmap_writer.h
+++ b/cpp/src/arrow/util/bitmap_writer.h
@@ -191,9 +191,9 @@ class BitmapWordWriter {
         mask_((1U << offset_) - 1) {
     if (offset_) {
       if (length >= static_cast<int>(sizeof(Word) * 8)) {
-        current_word_ = load<Word>(bitmap_);
+        current_data.word_ = load<Word>(bitmap_);
       } else if (length > 0) {
-        current_byte_ = load<uint8_t>(bitmap_);
+        current_data.epi.byte_ = load<uint8_t>(bitmap_);
       }
     }
   }
@@ -213,11 +213,11 @@ class BitmapWordWriter {
       // |<------ next ----->|<---- current ---->|
       word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
       Word next_word = load<Word>(bitmap_ + sizeof(Word));
-      current_word_ = (current_word_ & mask_) | (word & ~mask_);
+      current_data.word_ = (current_data.word_ & mask_) | (word & ~mask_);
       next_word = (next_word & ~mask_) | (word & mask_);
-      store<Word>(bitmap_, current_word_);
+      store<Word>(bitmap_, current_data.word_);
       store<Word>(bitmap_ + sizeof(Word), next_word);
-      current_word_ = next_word;
+      current_data.word_ = next_word;
     } else {
       store<Word>(bitmap_, word);
     }
@@ -229,11 +229,11 @@ class BitmapWordWriter {
       if (may_have_byte_offset && offset_) {
         byte = (byte << offset_) | (byte >> (8 - offset_));
         uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
-        current_byte_ = (current_byte_ & mask_) | (byte & ~mask_);
+        current_data.epi.byte_ = (current_data.epi.byte_ & mask_) | (byte & ~mask_);
         next_byte = (next_byte & ~mask_) | (byte & mask_);
-        store<uint8_t>(bitmap_, current_byte_);
+        store<uint8_t>(bitmap_, current_data.epi.byte_);
         store<uint8_t>(bitmap_ + 1, next_byte);
-        current_byte_ = next_byte;
+        current_data.epi.byte_ = next_byte;
       } else {
         store<uint8_t>(bitmap_, byte);
       }
@@ -259,14 +259,14 @@ class BitmapWordWriter {
   const uint8_t* bitmap_end_;
   uint64_t mask_;
   union {
-    Word current_word_;
+    Word word_;
     struct {
 #if ARROW_LITTLE_ENDIAN == 0
       uint8_t padding_bytes_[sizeof(Word) - 1];
 #endif
-      uint8_t current_byte_;
-    };
-  };
+      uint8_t byte_;
+    } epi;
+  } current_data;
 
   template <typename DType>
   DType load(const uint8_t* bitmap) {

From c51e4a179379628578a69f536ffca80a844efcd2 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Sun, 1 Aug 2021 12:14:25 -0400
Subject: [PATCH 676/719] ARROW-13496: [CI][R] Repair r-sanitizer job

Closes #10840 from nealrichardson/fix-r-sanitizer

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 ci/scripts/r_deps.sh         |  9 +++++++++
 ci/scripts/r_sanitize.sh     | 15 +++++++++++++--
 r/inst/build_arrow_static.sh |  2 +-
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh
index 7e9d2eac7a9..379ee945559 100755
--- a/ci/scripts/r_deps.sh
+++ b/ci/scripts/r_deps.sh
@@ -26,6 +26,15 @@ pushd ${source_dir}
 
 # Install R package dependencies
 ${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))"
+
+if [ ${R_BIN} = "RDsan" ]; then
+  # To prevent the build from timing out, let's prune some optional deps
+  ${R_BIN} -e 'd <- read.dcf("DESCRIPTION")
+  to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate")
+  pattern <- paste0("\\n?", to_prune, ",?", collapse = "|")
+  d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"])
+  write.dcf(d, "DESCRIPTION")'
+fi
 ${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
 
 popd
diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh
index 89963eb2dd8..61d0755878f 100755
--- a/ci/scripts/r_sanitize.sh
+++ b/ci/scripts/r_sanitize.sh
@@ -22,12 +22,23 @@ set -ex
 
 source_dir=${1}/r
 
-${R_BIN} CMD INSTALL ${source_dir}
-pushd ${source_dir}/tests
+pushd ${source_dir}
+
+# Unity builds were causing the CI job to run out of memory
+export CMAKE_UNITY_BUILD=OFF
+# Make installation verbose so that the CI job doesn't time out due to silence
+export ARROW_R_DEV=TRUE
+${R_BIN} CMD INSTALL .
+# But unset the env var so that it doesn't cause us to run extra dev tests
+unset ARROW_R_DEV
 
 export TEST_R_WITH_ARROW=TRUE
 export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
+
+pushd tests
 ${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; }
+popd
+${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> testthat.out 2>&1 || { cat testthat.out; exit 1; }
 
 cat testthat.out
 if grep -q "runtime error" testthat.out; then
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index cac0619ee61..fdd99ca52ef 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -74,7 +74,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DCMAKE_INSTALL_PREFIX=${DEST_DIR} \
     -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \
     -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \
-    -DCMAKE_UNITY_BUILD=ON \
+    -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-ON} \
     ${EXTRA_CMAKE_FLAGS} \
     -G ${CMAKE_GENERATOR:-"Unix Makefiles"} \
     ${SOURCE_DIR}

From 9680d5b666861428a0a8c784ff08f4ca6d1a36b8 Mon Sep 17 00:00:00 2001
From: "darion.yaphet" <darion.yaphet@gmail.com>
Date: Mon, 2 Aug 2021 15:38:04 +0200
Subject: [PATCH 677/719] ARROW-13524: [C++] Fix description for
 ApplicationVersion::VersionEq

fix the comment about metadata

Closes #10850 from darionyaphet/fix-meta

Authored-by: darion.yaphet <darion.yaphet@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/metadata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index 1865115e423..e8d20bcb9eb 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -84,7 +84,7 @@ class PARQUET_EXPORT ApplicationVersion {
   // Returns true if version is strictly less than other_version
   bool VersionLt(const ApplicationVersion& other_version) const;
 
-  // Returns true if version is strictly less than other_version
+  // Returns true if version is strictly equal with other_version
   bool VersionEq(const ApplicationVersion& other_version) const;
 
   // Checks if the Version has the correct statistics for a given column

From e5b1df171f6a219ffbb4459218511e2d85c6b707 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 2 Aug 2021 15:40:44 +0200
Subject: [PATCH 678/719] ARROW-13516: [C++] Detect --version-script flag
 availability

Mingw-w64 + Clang (lld) doesn't support it.

See also: https://github.com/msys2/MINGW-packages/pull/9255

Closes #10848 from kou/cpp-version-script-mingw64-clang

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/CMakeLists.txt                 | 20 ++++++++++++++++++++
 cpp/src/arrow/CMakeLists.txt       |  5 +----
 cpp/src/gandiva/CMakeLists.txt     |  5 +----
 cpp/src/gandiva/jni/CMakeLists.txt |  2 +-
 cpp/src/parquet/CMakeLists.txt     |  5 +----
 cpp/src/plasma/CMakeLists.txt      |  5 +----
 6 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 78b19ba5930..2bcdc0de179 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -453,6 +453,26 @@ endif()
 
 include(SetupCxxFlags)
 
+#
+# Linker flags
+#
+
+# Localize thirdparty symbols using a linker version script. This hides them
+# from the client application. The OS X linker does not support the
+# version-script option.
+if(CMAKE_VERSION VERSION_LESS 3.18)
+  if(APPLE OR WIN32)
+    set(CXX_LINKER_SUPPORTS_VERSION_SCRIPT FALSE)
+  else()
+    set(CXX_LINKER_SUPPORTS_VERSION_SCRIPT TRUE)
+  endif()
+else()
+  include(CheckLinkerFlag)
+  check_linker_flag(CXX
+                    "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/arrow/symbols.map"
+                    CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
+endif()
+
 #
 # Build output directory
 #
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index cb6e91bd40e..f13e5b1ef75 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -483,10 +483,7 @@ if(ARROW_ORC)
   list(APPEND ARROW_SRCS adapters/orc/adapter.cc adapters/orc/adapter_util.cc)
 endif()
 
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(ARROW_VERSION_SCRIPT_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
   set(ARROW_SHARED_LINK_FLAGS ${ARROW_VERSION_SCRIPT_FLAGS})
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 08f31ad9554..2da8bb68092 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -111,10 +111,7 @@ endif()
 #     set(GANDIVA_SHARED_LINK_FLAGS "${GANDIVA_SHARED_LINK_FLAGS} /EXPORT:${SYMBOL}")
 #   endforeach()
 # endif()
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(GANDIVA_VERSION_SCRIPT_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
   set(GANDIVA_SHARED_LINK_FLAGS
diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt
index 04fd22eec9a..046934141f6 100644
--- a/cpp/src/gandiva/jni/CMakeLists.txt
+++ b/cpp/src/gandiva/jni/CMakeLists.txt
@@ -98,7 +98,7 @@ add_dependencies(gandiva ${GANDIVA_JNI_LIBRARIES})
 if(ARROW_BUILD_SHARED)
   # filter out everything that is not needed for the jni bridge
   # statically linked stdc++ has conflicts with stdc++ loaded by other libraries.
-  if(NOT APPLE)
+  if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
     set_target_properties(gandiva_jni_shared
                           PROPERTIES LINK_FLAGS
                                      "-Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map"
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index a487760a03e..ce459132547 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -232,10 +232,7 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY)
 
 endif(NOT PARQUET_MINIMAL_DEPENDENCY)
 
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(PARQUET_SHARED_LINK_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
 endif()
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index a4e6e72b41b..46603d6f85c 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -61,10 +61,7 @@ if(ARROW_CUDA)
   add_definitions(-DPLASMA_CUDA)
 endif()
 
-if(NOT APPLE AND NOT MSVC_TOOLCHAIN)
-  # Localize thirdparty symbols using a linker version script. This hides them
-  # from the client application. The OS X linker does not support the
-  # version-script option.
+if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(PLASMA_SHARED_LINK_FLAGS
       "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
 endif()

From 8125a8b86a1d175a90050aa9be25b6b883cebadf Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Mon, 2 Aug 2021 15:45:58 +0200
Subject: [PATCH 679/719] ARROW-13504: [Python] Move marks from fixtures to
 individual tests/params

https://issues.apache.org/jira/browse/ARROW-13504

The problem is that tests should be skippable with

  pytest pyarrow/tests -m "(not s3)"

but that doesn't deselect all the s3 tests currently.

The issue is that applying marks to fixtures is not currently supported:
https://github.com/pytest-dev/pytest/issues/1368

This removes marks from fixtures to avoid confusion that this dead code may cause.

Closes #10837 from veprbl/pr/ARROW-13504

Authored-by: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/tests/conftest.py                    | 2 --
 python/pyarrow/tests/parquet/test_dataset.py        | 5 +++++
 python/pyarrow/tests/parquet/test_parquet_writer.py | 1 +
 python/pyarrow/tests/test_fs.py                     | 9 ++++++---
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 8fb98d4a6e7..40836867f5f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -257,7 +257,6 @@ def disable_aws_metadata(monkeypatch):
 # TODO(kszucs): move the following fixtures to test_fs.py once the previous
 # parquet dataset implementation and hdfs implementation are removed.
 
-@pytest.mark.hdfs
 @pytest.fixture(scope='session')
 def hdfs_connection():
     host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
@@ -266,7 +265,6 @@ def hdfs_connection():
     return host, port, user
 
 
-@pytest.mark.s3
 @pytest.fixture(scope='session')
 def s3_connection():
     host, port = 'localhost', find_free_port()
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 81e3cdd7468..e90a24121e1 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -601,6 +601,7 @@ def test_partition_keys_with_underscores(tempdir, use_legacy_dataset):
     assert result.column("year_week").to_pylist() == string_keys
 
 
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_read_s3fs(s3_example_s3fs, use_legacy_dataset):
     fs, path = s3_example_s3fs
@@ -614,6 +615,7 @@ def test_read_s3fs(s3_example_s3fs, use_legacy_dataset):
     assert result.equals(table)
 
 
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_read_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
     fs, directory = s3_example_s3fs
@@ -653,6 +655,7 @@ def test_read_partitioned_directory_s3fs_wrapper(
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_read_partitioned_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
     fs, path = s3_example_s3fs
@@ -1352,6 +1355,7 @@ def test_write_to_dataset_pathlib_nonlocal(
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_write_to_dataset_with_partitions_s3fs(
     s3_example_s3fs, use_legacy_dataset
@@ -1363,6 +1367,7 @@ def test_write_to_dataset_with_partitions_s3fs(
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 @parametrize_legacy_dataset
 def test_write_to_dataset_no_partitions_s3fs(
     s3_example_s3fs, use_legacy_dataset
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index 4218e83cead..d766d276cc6 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -206,6 +206,7 @@ def test_parquet_writer_filesystem_s3_uri(s3_example_fs):
 
 
 @pytest.mark.pandas
+@pytest.mark.s3
 def test_parquet_writer_filesystem_s3fs(s3_example_s3fs):
     df = _test_dataframe(100)
     table = pa.Table.from_pandas(df, preserve_index=False)
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 0e049e21778..365b7defe32 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -384,11 +384,13 @@ def py_fsspec_s3fs(request, s3_connection, s3_server):
     ),
     pytest.param(
         pytest.lazy_fixture('s3fs'),
-        id='S3FileSystem'
+        id='S3FileSystem',
+        marks=pytest.mark.s3
     ),
     pytest.param(
         pytest.lazy_fixture('hdfs'),
-        id='HadoopFileSystem'
+        id='HadoopFileSystem',
+        marks=pytest.mark.hdfs
     ),
     pytest.param(
         pytest.lazy_fixture('mockfs'),
@@ -412,7 +414,8 @@ def py_fsspec_s3fs(request, s3_connection, s3_server):
     ),
     pytest.param(
         pytest.lazy_fixture('py_fsspec_s3fs'),
-        id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))'
+        id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))',
+        marks=pytest.mark.s3
     ),
 ])
 def filesystem_config(request):

From ca81973cf72cd694c3a60ade9726558335b64a70 Mon Sep 17 00:00:00 2001
From: "darion.yaphet" <darion.yaphet@gmail.com>
Date: Mon, 2 Aug 2021 16:09:16 +0200
Subject: [PATCH 680/719] ARROW-13523: [C++] Normalize test executable name

Use hyphens, not underscores

Closes #10818 from darionyaphet/reader_test

Authored-by: darion.yaphet <darion.yaphet@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index ce459132547..39e3d0541d5 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -324,7 +324,7 @@ add_parquet_test(internals-test
 set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                                           SKIP_UNITY_BUILD_INCLUSION ON)
 
-add_parquet_test(reader_test
+add_parquet_test(reader-test
                  SOURCES
                  column_reader_test.cc
                  level_conversion_test.cc

From c02a40f2a4316ae27b023a541dfb2ebddaf3f63c Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 2 Aug 2021 16:16:41 +0200
Subject: [PATCH 681/719] ARROW-13295: [C++] add hash_mean, hash_variance,
 hash_stddev kernels

Note these don't use pairwise summation and so may be prone to precision issues.

Closes #10792 from lidavidm/arrow-13295

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/aggregate_internal.h      |   4 +
 .../compute/kernels/aggregate_var_std.cc      |  31 +-
 .../kernels/aggregate_var_std_internal.h      |  68 ++++
 .../arrow/compute/kernels/hash_aggregate.cc   | 363 ++++++++++++++++++
 .../compute/kernels/hash_aggregate_test.cc    | 161 ++++++++
 5 files changed, 604 insertions(+), 23 deletions(-)
 create mode 100644 cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h

diff --git a/cpp/src/arrow/compute/kernels/aggregate_internal.h b/cpp/src/arrow/compute/kernels/aggregate_internal.h
index ed29f26f2c3..3f5ba39d30e 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -55,6 +55,10 @@ struct ScalarAggregator : public KernelState {
   virtual Status Finalize(KernelContext* ctx, Datum* out) = 0;
 };
 
+// Helper to differentiate between var/std calculation so we can fold
+// kernel implementations together
+enum class VarOrStd : bool { Var, Std };
+
 void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
                   ScalarAggregateFunction* func,
                   SimdLevel::type simd_level = SimdLevel::NONE);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index d6965fed4a3..6fa49d03d76 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/int128_internal.h"
@@ -85,32 +86,22 @@ struct VarStdState {
       valid_count -= count;
 
       if (count > 0) {
-        int64_t sum = 0;
-        int128_t square_sum = 0;
+        IntegerVarStd<ArrowType> var_std;
         const ArrayData& data = *slice->data();
         const CType* values = data.GetValues<CType>(1);
         VisitSetBitRunsVoid(data.buffers[0], data.offset, data.length,
                             [&](int64_t pos, int64_t len) {
                               for (int64_t i = 0; i < len; ++i) {
                                 const auto value = values[pos + i];
-                                sum += value;
-                                square_sum += static_cast<uint64_t>(value) * value;
+                                var_std.ConsumeOne(value);
                               }
                             });
 
-        const double mean = static_cast<double>(sum) / count;
-        // calculate m2 = square_sum - sum * sum / count
-        // decompose `sum * sum / count` into integers and fractions
-        const int128_t sum_square = static_cast<int128_t>(sum) * sum;
-        const int128_t integers = sum_square / count;
-        const double fractions = static_cast<double>(sum_square % count) / count;
-        const double m2 = static_cast<double>(square_sum - integers) - fractions;
-
         // merge variance
         ThisType state;
-        state.count = count;
-        state.mean = mean;
-        state.m2 = m2;
+        state.count = var_std.count;
+        state.mean = var_std.mean();
+        state.m2 = var_std.m2();
         this->MergeFrom(state);
       }
     }
@@ -128,12 +119,8 @@ struct VarStdState {
       this->m2 = state.m2;
       return;
     }
-    double mean = (this->mean * this->count + state.mean * state.count) /
-                  (this->count + state.count);
-    this->m2 += state.m2 + this->count * (this->mean - mean) * (this->mean - mean) +
-                state.count * (state.mean - mean) * (state.mean - mean);
-    this->count += state.count;
-    this->mean = mean;
+    MergeVarStd(this->count, this->mean, state.count, state.mean, state.m2, &this->count,
+                &this->mean, &this->m2);
   }
 
   int64_t count = 0;
@@ -141,8 +128,6 @@ struct VarStdState {
   double m2 = 0;  // m2 = count*s2 = sum((X-mean)^2)
 };
 
-enum class VarOrStd : bool { Var, Std };
-
 template <typename ArrowType>
 struct VarStdImpl : public ScalarAggregator {
   using ThisType = VarStdImpl<ArrowType>;
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h b/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
new file mode 100644
index 00000000000..675ebfd91d3
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/int128_internal.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+using arrow::internal::int128_t;
+
+// Accumulate sum/squared sum (using naive summation)
+// Shared implementation between scalar/hash aggregate variance/stddev kernels
+template <typename ArrowType>
+struct IntegerVarStd {
+  using c_type = typename ArrowType::c_type;
+
+  int64_t count = 0;
+  int64_t sum = 0;
+  int128_t square_sum = 0;
+
+  void ConsumeOne(const c_type value) {
+    sum += value;
+    square_sum += static_cast<uint64_t>(value) * value;
+    count++;
+  }
+
+  double mean() const { return static_cast<double>(sum) / count; }
+
+  double m2() const {
+    // calculate m2 = square_sum - sum * sum / count
+    // decompose `sum * sum / count` into integers and fractions
+    const int128_t sum_square = static_cast<int128_t>(sum) * sum;
+    const int128_t integers = sum_square / count;
+    const double fractions = static_cast<double>(sum_square % count) / count;
+    return static_cast<double>(square_sum - integers) - fractions;
+  }
+};
+
+static inline void MergeVarStd(int64_t count1, double mean1, int64_t count2, double mean2,
+                               double m22, int64_t* out_count, double* out_mean,
+                               double* out_m2) {
+  double mean = (mean1 * count1 + mean2 * count2) / (count1 + count2);
+  *out_m2 += m22 + count1 * (mean1 - mean) * (mean1 - mean) +
+             count2 * (mean2 - mean) * (mean2 - mean);
+  *out_count += count2;
+  *out_mean = mean;
+}
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 3e4b401bae9..472ae956388 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cmath>
 #include <functional>
 #include <memory>
 #include <mutex>
@@ -34,12 +35,14 @@
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_writer.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/cpu_info.h"
+#include "arrow/util/int128_internal.h"
 #include "arrow/util/make_unique.h"
 #include "arrow/util/task_group.h"
 #include "arrow/util/thread_pool.h"
@@ -917,6 +920,8 @@ struct GroupedSumImpl : public GroupedAggregator {
     auto sums = reinterpret_cast<SumType*>(sums_.mutable_data());
     auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
 
+    // XXX this uses naive summation; we should switch to pairwise summation as was
+    // done for the scalar aggregate kernel in ARROW-11758
     auto g = batch[1].array()->GetValues<uint32_t>(1);
     VisitArrayDataInline<Type>(
         *batch[0].array(),
@@ -1005,6 +1010,307 @@ struct GroupedSumFactory {
   InputType argument_type;
 };
 
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename Type>
+struct GroupedMeanImpl : public GroupedSumImpl<Type> {
+  Result<Datum> Finalize() override {
+    using SumType = typename GroupedSumImpl<Type>::SumType;
+    std::shared_ptr<Buffer> null_bitmap;
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> values,
+                          AllocateBuffer(num_groups_ * sizeof(double), pool_));
+    int64_t null_count = 0;
+
+    const int64_t* counts = reinterpret_cast<const int64_t*>(counts_.data());
+    const auto* sums = reinterpret_cast<const SumType*>(sums_.data());
+    double* means = reinterpret_cast<double*>(values->mutable_data());
+    for (int64_t i = 0; i < num_groups_; ++i) {
+      if (counts[i] > 0) {
+        means[i] = static_cast<double>(sums[i]) / counts[i];
+        continue;
+      }
+      means[i] = 0;
+
+      if (null_bitmap == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(num_groups_, pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, num_groups_, true);
+      }
+
+      null_count += 1;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
+    }
+
+    return ArrayData::Make(float64(), num_groups_,
+                           {std::move(null_bitmap), std::move(values)}, null_count);
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return float64(); }
+
+  using GroupedSumImpl<Type>::num_groups_;
+  using GroupedSumImpl<Type>::pool_;
+  using GroupedSumImpl<Type>::counts_;
+  using GroupedSumImpl<Type>::sums_;
+};
+
+struct GroupedMeanFactory {
+  template <typename T, typename AccType = typename FindAccumulatorType<T>::Type>
+  Status Visit(const T&) {
+    kernel = MakeKernel(std::move(argument_type), HashAggregateInit<GroupedMeanImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Computing mean of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Computing mean of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedMeanFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
+// Variance/Stdev implementation
+
+using arrow::internal::int128_t;
+
+template <typename Type>
+struct GroupedVarStdImpl : public GroupedAggregator {
+  using CType = typename Type::c_type;
+
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+    options_ = *checked_cast<const VarianceOptions*>(options);
+    ctx_ = ctx;
+    pool_ = ctx->memory_pool();
+    counts_ = BufferBuilder(pool_);
+    means_ = BufferBuilder(pool_);
+    m2s_ = BufferBuilder(pool_);
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    auto added_groups = new_num_groups - num_groups_;
+    num_groups_ = new_num_groups;
+    RETURN_NOT_OK(counts_.Append(added_groups * sizeof(int64_t), 0));
+    RETURN_NOT_OK(means_.Append(added_groups * sizeof(double), 0));
+    RETURN_NOT_OK(m2s_.Append(added_groups * sizeof(double), 0));
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override { return ConsumeImpl(batch); }
+
+  // float/double/int64: calculate `m2` (sum((X-mean)^2)) with `two pass algorithm`
+  // (see aggregate_var_std.cc)
+  template <typename T = Type>
+  enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4), Status> ConsumeImpl(
+      const ExecBatch& batch) {
+    using SumType =
+        typename std::conditional<is_floating_type<T>::value, double, int128_t>::type;
+
+    int64_t* counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    double* means = reinterpret_cast<double*>(means_.mutable_data());
+    double* m2s = reinterpret_cast<double*>(m2s_.mutable_data());
+
+    // XXX this uses naive summation; we should switch to pairwise summation as was
+    // done for the scalar aggregate kernel in ARROW-11567
+    std::vector<SumType> sums(num_groups_);
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          sums[*g] += value;
+          counts[*g] += 1;
+          ++g;
+        },
+        [&] { ++g; });
+
+    for (int64_t i = 0; i < num_groups_; i++) {
+      means[i] = static_cast<double>(sums[i]) / counts[i];
+    }
+
+    g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          const double v = static_cast<double>(value);
+          m2s[*g] += (v - means[*g]) * (v - means[*g]);
+          ++g;
+        },
+        [&] { ++g; });
+
+    return Status::OK();
+  }
+
+  // int32/16/8: textbook one pass algorithm with integer arithmetic (see
+  // aggregate_var_std.cc)
+  template <typename T = Type>
+  enable_if_t<is_integer_type<T>::value && (sizeof(CType) <= 4), Status> ConsumeImpl(
+      const ExecBatch& batch) {
+    // max number of elements that sum will not overflow int64 (2Gi int32 elements)
+    // for uint32:    0 <= sum < 2^63 (int64 >= 0)
+    // for int32: -2^62 <= sum < 2^62
+    constexpr int64_t max_length = 1ULL << (63 - sizeof(CType) * 8);
+
+    const auto& array = *batch[0].array();
+    const auto g = batch[1].array()->GetValues<uint32_t>(1);
+
+    std::vector<IntegerVarStd<Type>> var_std(num_groups_);
+
+    ARROW_ASSIGN_OR_RAISE(auto mapping,
+                          AllocateBuffer(num_groups_ * sizeof(uint32_t), pool_));
+    for (uint32_t i = 0; static_cast<int64_t>(i) < num_groups_; i++) {
+      reinterpret_cast<uint32_t*>(mapping->mutable_data())[i] = i;
+    }
+    ArrayData group_id_mapping(uint32(), num_groups_, {nullptr, std::move(mapping)},
+                               /*null_count=*/0);
+
+    const CType* values = array.GetValues<CType>(1);
+
+    for (int64_t start_index = 0; start_index < batch.length; start_index += max_length) {
+      // process in chunks that overflow will never happen
+
+      // reset state
+      var_std.clear();
+      var_std.resize(num_groups_);
+      GroupedVarStdImpl<Type> state;
+      RETURN_NOT_OK(state.Init(ctx_, &options_));
+      RETURN_NOT_OK(state.Resize(num_groups_));
+      int64_t* other_counts = reinterpret_cast<int64_t*>(state.counts_.mutable_data());
+      double* other_means = reinterpret_cast<double*>(state.means_.mutable_data());
+      double* other_m2s = reinterpret_cast<double*>(state.m2s_.mutable_data());
+
+      arrow::internal::VisitSetBitRunsVoid(
+          array.buffers[0], array.offset + start_index,
+          std::min(max_length, batch.length - start_index),
+          [&](int64_t pos, int64_t len) {
+            for (int64_t i = 0; i < len; ++i) {
+              const int64_t index = start_index + pos + i;
+              const auto value = values[index];
+              var_std[g[index]].ConsumeOne(value);
+            }
+          });
+
+      for (int64_t i = 0; i < num_groups_; i++) {
+        if (var_std[i].count == 0) continue;
+
+        other_counts[i] = var_std[i].count;
+        other_means[i] = var_std[i].mean();
+        other_m2s[i] = var_std[i].m2();
+      }
+      RETURN_NOT_OK(this->Merge(std::move(state), group_id_mapping));
+    }
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    // Combine m2 from two chunks (see aggregate_var_std.cc)
+    auto other = checked_cast<GroupedVarStdImpl*>(&raw_other);
+
+    auto counts = reinterpret_cast<int64_t*>(counts_.mutable_data());
+    auto means = reinterpret_cast<double*>(means_.mutable_data());
+    auto m2s = reinterpret_cast<double*>(m2s_.mutable_data());
+
+    const auto* other_counts = reinterpret_cast<const int64_t*>(other->counts_.data());
+    const auto* other_means = reinterpret_cast<const double*>(other->means_.data());
+    const auto* other_m2s = reinterpret_cast<const double*>(other->m2s_.data());
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      if (other_counts[other_g] == 0) continue;
+      MergeVarStd(counts[*g], means[*g], other_counts[other_g], other_means[other_g],
+                  other_m2s[other_g], &counts[*g], &means[*g], &m2s[*g]);
+    }
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    std::shared_ptr<Buffer> null_bitmap;
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> values,
+                          AllocateBuffer(num_groups_ * sizeof(double), pool_));
+    int64_t null_count = 0;
+
+    double* results = reinterpret_cast<double*>(values->mutable_data());
+    const int64_t* counts = reinterpret_cast<const int64_t*>(counts_.data());
+    const double* m2s = reinterpret_cast<const double*>(m2s_.data());
+    for (int64_t i = 0; i < num_groups_; ++i) {
+      if (counts[i] > options_.ddof) {
+        const double variance = m2s[i] / (counts[i] - options_.ddof);
+        results[i] = result_type_ == VarOrStd::Var ? variance : std::sqrt(variance);
+        continue;
+      }
+
+      results[i] = 0;
+      if (null_bitmap == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(num_groups_, pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, num_groups_, true);
+      }
+
+      null_count += 1;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
+    }
+
+    return ArrayData::Make(float64(), num_groups_,
+                           {std::move(null_bitmap), std::move(values)}, null_count);
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return float64(); }
+
+  VarOrStd result_type_;
+  VarianceOptions options_;
+  int64_t num_groups_ = 0;
+  // m2 = count * s2 = sum((X-mean)^2)
+  BufferBuilder counts_, means_, m2s_;
+  ExecContext* ctx_;
+  MemoryPool* pool_;
+};
+
+template <typename T, VarOrStd result_type>
+Result<std::unique_ptr<KernelState>> VarStdInit(KernelContext* ctx,
+                                                const KernelInitArgs& args) {
+  auto impl = ::arrow::internal::make_unique<GroupedVarStdImpl<T>>();
+  impl->result_type_ = result_type;
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.options));
+  return std::move(impl);
+}
+
+template <VarOrStd result_type>
+struct GroupedVarStdFactory {
+  template <typename T, typename Enable = enable_if_t<is_integer_type<T>::value ||
+                                                      is_floating_type<T>::value>>
+  Status Visit(const T&) {
+    kernel = MakeKernel(std::move(argument_type), VarStdInit<T, result_type>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Computing variance/stddev of data of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Computing variance/stddev of data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedVarStdFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
@@ -1537,6 +1843,26 @@ const FunctionDoc hash_sum_doc{"Sum values of a numeric array",
                                ("Null values are ignored."),
                                {"array", "group_id_array"}};
 
+const FunctionDoc hash_mean_doc{"Average values of a numeric array",
+                                ("Null values are ignored."),
+                                {"array", "group_id_array"}};
+
+const FunctionDoc hash_stddev_doc{
+    "Calculate the standard deviation of a numeric array",
+    ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
+     "By default (`ddof` = 0), the population standard deviation is calculated.\n"
+     "Nulls are ignored.  If there are not enough non-null values in the array\n"
+     "to satisfy `ddof`, null is returned."),
+    {"array", "group_id_array"}};
+
+const FunctionDoc hash_variance_doc{
+    "Calculate the variance of a numeric array",
+    ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
+     "By default (`ddof` = 0), the population variance is calculated.\n"
+     "Nulls are ignored.  If there are not enough non-null values in the array\n"
+     "to satisfy `ddof`, null is returned."),
+    {"array", "group_id_array"}};
+
 const FunctionDoc hash_min_max_doc{
     "Compute the minimum and maximum values of a numeric array",
     ("Null values are ignored by default.\n"
@@ -1576,6 +1902,43 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_mean", Arity::Binary(),
+                                                        &hash_mean_doc);
+    DCHECK_OK(AddHashAggKernels({boolean()}, GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(), GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(UnsignedIntTypes(), GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(FloatingPointTypes(), GroupedMeanFactory::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  static auto default_variance_options = VarianceOptions::Defaults();
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_stddev", Arity::Binary(), &hash_stddev_doc, &default_variance_options);
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
+                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_variance", Arity::Binary(), &hash_variance_doc, &default_variance_options);
+    DCHECK_OK(AddHashAggKernels(SignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(),
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
+                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
   {
     static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 46c7716abce..f4df6aa18a3 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -653,6 +653,167 @@ TEST(GroupBy, SumOnly) {
   }
 }
 
+TEST(GroupBy, MeanOnly) {
+  for (bool use_threads : {true, false}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+    auto table =
+        TableFromJSON(schema({field("argument", float64()), field("key", int64())}), {R"([
+    [1.0,   1],
+    [null,  1]
+                        ])",
+                                                                                      R"([
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.25,  1],
+    [0.125, 2]
+                        ])",
+                                                                                      R"([
+    [-0.25, 2],
+    [0.75,  null],
+    [null,  3]
+                        ])"});
+
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         internal::GroupBy({table->GetColumnByName("argument")},
+                                           {table->GetColumnByName("key")},
+                                           {
+                                               {"hash_mean", nullptr},
+                                           },
+                                           use_threads));
+    SortBy({"key_0"}, &aggregated_and_grouped);
+
+    AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                              field("hash_mean", float64()),
+                                              field("key_0", int64()),
+                                          }),
+                                          R"([
+    [2.125,   1],
+    [-0.041666666666666664, 2],
+    [null,   3],
+    [2.375,   null]
+  ])"),
+                            aggregated_and_grouped,
+                            /*verbose=*/true);
+  }
+}
+
+TEST(GroupBy, VarianceAndStddev) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", int32()), field("key", int64())}), R"([
+    [1,   1],
+    [null,  1],
+    [0,   2],
+    [null,  3],
+    [4,   null],
+    [3,  1],
+    [0, 2],
+    [-1, 2],
+    [1,  null],
+    [null,  3]
+  ])");
+
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", nullptr},
+                               {"hash_stddev", nullptr},
+                           }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [1.0,                 1.0,                1],
+    [0.22222222222222224, 0.4714045207910317, 2],
+    [null,                null,               3],
+    [2.25,                1.5,                null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
+
+  batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", int64())}), R"([
+    [1.0,   1],
+    [null,  1],
+    [0.0,   2],
+    [null,  3],
+    [4.0,   null],
+    [3.0,  1],
+    [0.0, 2],
+    [-1.0, 2],
+    [1.0,  null],
+    [null,  3]
+  ])");
+
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped, internal::GroupBy(
+                                                   {
+                                                       batch->GetColumnByName("argument"),
+                                                       batch->GetColumnByName("argument"),
+                                                   },
+                                                   {
+                                                       batch->GetColumnByName("key"),
+                                                   },
+                                                   {
+                                                       {"hash_variance", nullptr},
+                                                       {"hash_stddev", nullptr},
+                                                   }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [1.0,                 1.0,                1],
+    [0.22222222222222224, 0.4714045207910317, 2],
+    [null,                null,               3],
+    [2.25,                1.5,                null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
+
+  // Test ddof
+  VarianceOptions variance_options(/*ddof=*/2);
+  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_variance", &variance_options},
+                               {"hash_stddev", &variance_options},
+                           }));
+
+  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("hash_variance", float64()),
+                                            field("hash_stddev", float64()),
+                                            field("key_0", int64()),
+                                        }),
+                                        R"([
+    [null,                null,               1],
+    [0.6666666666666667,  0.816496580927726,  2],
+    [null,                null,               3],
+    [null,                null,               null]
+  ])"),
+                          aggregated_and_grouped,
+                          /*verbose=*/true);
+}
+
 TEST(GroupBy, MinMaxOnly) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {

From 29c8c4d643fd99178dc426f60428c0c08cebf58a Mon Sep 17 00:00:00 2001
From: Keith Kraus <3665167+kkraus14@users.noreply.github.com>
Date: Mon, 2 Aug 2021 16:26:04 +0200
Subject: [PATCH 682/719] ARROW-13500 [C++] Fix using
 '-Wno-unknown-warning-option' with GCC

The `-Wno-unknown-warning-option` is implemented in Clang but not GCC.

Alternatively I could change this to pass `-Wno-unknown-warning` which seems to have similar behavior according to https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html

Closes #10834 from kkraus14/fix_gcc_unknown_warning

Authored-by: Keith Kraus <3665167+kkraus14@users.noreply.github.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/cmake_modules/SetupCxxFlags.cmake | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index aa8e5becab0..86c6e9706e0 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -343,9 +343,6 @@ if(MSVC)
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd4065")
 
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  # Avoid error when an unknown warning flag is passed
-  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
-
   if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "7.0" OR CMAKE_CXX_COMPILER_VERSION
                                                        VERSION_GREATER "7.0")
     # Without this, gcc >= 7 warns related to changes in C++17

From 1ea8d975afa5e61fa209f0eb31ae8df88037a14c Mon Sep 17 00:00:00 2001
From: michalursa <michal@ursacomputing.com>
Date: Mon, 2 Aug 2021 10:36:54 -0400
Subject: [PATCH 683/719] ARROW-13495: [C++][Compute] Fixing unaligned memory
 access in GrouperFastImpl

Temp vectors used in grouper implementation were allocated using stack implementation.
The buffer addresses returned by the stack were not aligned to the size of requested vector element.
Changing this to have returned buffers aligned to 8 byte boundaries.

Closes #10835 from michalursa/ARROW-13495-fix-alignment-for-temp-vectors

Authored-by: michalursa <michal@ursacomputing.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/exec/util.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h
index 471cc332220..d8248ceacab 100644
--- a/cpp/src/arrow/compute/exec/util.h
+++ b/cpp/src/arrow/compute/exec/util.h
@@ -24,6 +24,7 @@
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/logging.h"
 
@@ -66,9 +67,19 @@ class TempVectorStack {
   }
 
  private:
+  int64_t PaddedAllocationSize(int64_t num_bytes) {
+    // Round up allocation size to multiple of 8 bytes
+    // to avoid returning temp vectors with unaligned address.
+    //
+    // Also add padding at the end to facilitate loads and stores
+    // using SIMD when number of vector elements is not divisible
+    // by the number of SIMD lanes.
+    //
+    return ::arrow::BitUtil::RoundUp(num_bytes, sizeof(int64_t)) + padding;
+  }
   void alloc(uint32_t num_bytes, uint8_t** data, int* id) {
     int64_t old_top = top_;
-    top_ += num_bytes + padding;
+    top_ += PaddedAllocationSize(num_bytes);
     // Stack overflow check
     ARROW_DCHECK(top_ <= buffer_size_);
     *data = buffer_->mutable_data() + old_top;
@@ -76,7 +87,7 @@ class TempVectorStack {
   }
   void release(int id, uint32_t num_bytes) {
     ARROW_DCHECK(num_vectors_ == id + 1);
-    int64_t size = num_bytes + padding;
+    int64_t size = PaddedAllocationSize(num_bytes);
     ARROW_DCHECK(top_ >= size);
     top_ -= size;
     --num_vectors_;

From 122985841dcedd13bee5388cf5cb887f75cb4527 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 2 Aug 2021 16:39:50 +0200
Subject: [PATCH 684/719] ARROW-13379: [Dev][Docs] Improvements to archery docs

Closes #10830 from jonkeane/ARROW-13379-archery-docs

Lead-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/README.md               | 49 +++++++++++++++++++++++++++++
 docs/source/developers/archery.rst  |  4 ++-
 docs/source/developers/crossbow.rst |  9 ++++--
 3 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 dev/archery/README.md

diff --git a/dev/archery/README.md b/dev/archery/README.md
new file mode 100644
index 00000000000..eff65441661
--- /dev/null
+++ b/dev/archery/README.md
@@ -0,0 +1,49 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+
+# Developing with Archery
+
+Archery is documented on the Arrow website:
+
+* [Daily development using Archery](https://arrow.apache.org/docs/developers/archery.html)
+* [Using Archery and Crossbow](https://arrow.apache.org/docs/developers/crossbow.html)
+* [Using Archer and Docker](https://arrow.apache.org/docs/developers/docker.html)
+
+# Installing Archery
+
+See the pages linked aboved for more details. As a general overview, Archery
+comes in a number of subpackages, each needing to be installed if you want
+to use the functionality of it:
+
+* lint – lint (and in some cases auto-format) code in the Arrow repo
+  To install: `pip install -e "arrow/dev/archery[lint]"`
+* benchmark – to run Arrow benchmarks using Archery
+  To install: `pip install -e "arrow/dev/archery[benchmark]"`
+* docker – to run docker-compose based tasks more easily
+  To install: `pip install -e "arrow/dev/archery[docker]"`
+* release – release related helpers
+  To install: `pip install -e "arrow/dev/archery[release]"`
+* crossbow – to trigger + interact with the crossbow build system
+  To install: `pip install -e "arrow/dev/archery[crossbow]"`
+* crossbow-upload
+  To install: `pip install -e "arrow/dev/archery[crossbow-upload]"`
+
+Additionally, if you would prefer to install everything at once,
+`pip install -e "arrow/dev/archery[all]"` is an alias for all of
+the above subpackages.
\ No newline at end of file
diff --git a/docs/source/developers/archery.rst b/docs/source/developers/archery.rst
index c5a508d6fd0..f929bb228a7 100644
--- a/docs/source/developers/archery.rst
+++ b/docs/source/developers/archery.rst
@@ -57,10 +57,13 @@ You can inspect Archery usage by passing the ``--help`` flag:
    Commands:
      benchmark    Arrow benchmarking.
      build        Initialize an Arrow C++ build
+     crossbow     Schedule packaging tasks or nightly builds on CI services.
      docker       Interact with docker-compose based builds.
      integration  Execute protocol and Flight integration tests
+     linking      Quick and dirty utilities for checking library linkage.
      lint         Check Arrow source tree for errors
      numpydoc     Lint python docstring with NumpyDoc
+     release      Release releated commands.
      trigger-bot
 
 Archery exposes independent subcommands, each of which provides dedicated
@@ -81,4 +84,3 @@ help output, for example:
      images  List the available docker-compose images.
      push    Push the generated docker-compose image.
      run     Execute docker-compose builds.
-
diff --git a/docs/source/developers/crossbow.rst b/docs/source/developers/crossbow.rst
index 05830323adc..cb49a244614 100644
--- a/docs/source/developers/crossbow.rst
+++ b/docs/source/developers/crossbow.rst
@@ -71,6 +71,10 @@ Install
 The following guide depends on GitHub, but theoretically any git
 server can be used.
 
+If you are not using the `ursacomputing/crossbow <https://github.com/ursacomputing/crossbow>`_
+repository, you will need to complete the first two steps, otherwise procede
+to step 3:
+
 1. `Create the queue repository`_
 
 2. Enable `TravisCI`_, `Appveyor`_, `Azure Pipelines`_ and `CircleCI`_
@@ -78,7 +82,8 @@ server can be used.
 
    -  turn off Travis’ `auto cancellation`_ feature on branches
 
-3. Clone the newly created repository next to the arrow repository:
+3. Clone either ursacomputing/crossbow if you are using that, or the newly
+   created repository next to the arrow repository:
 
    By default the scripts looks for ``crossbow`` next to arrow repository, but
    this can configured through command line arguments.
@@ -127,7 +132,7 @@ server can be used.
 
    .. code:: bash
 
-      pip install -e arrow/dev/archery[crossbow]
+      pip install -e "arrow/dev/archery[crossbow]"
 
 9. Try running it:
 

From 7e5f6189bea57da05c62dc758a576e0513c3d5a7 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 2 Aug 2021 17:34:28 +0200
Subject: [PATCH 685/719] ARROW-9434: [C++] Store type code in UnionScalar

Also:
- implement MakeArrayFromScalar for unions
- implement ArrayBuilder::AppendScalar for unions

Closes #10817 from pitrou/ARROW-9434-union-scalar-type-code

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 c_glib/arrow-glib/scalar.cpp              |  29 +++-
 c_glib/arrow-glib/scalar.h                |   5 +
 c_glib/arrow-glib/version.h.in            |   2 +-
 c_glib/doc/arrow-glib/arrow-glib-docs.xml |   4 +
 c_glib/test/test-dense-union-scalar.rb    |  10 +-
 c_glib/test/test-sparse-union-scalar.rb   |  10 +-
 cpp/src/arrow/array/array_base.cc         |  12 +-
 cpp/src/arrow/array/array_nested.h        |   3 +
 cpp/src/arrow/array/array_test.cc         |  85 ++++++----
 cpp/src/arrow/array/builder_base.cc       |  49 ++++++
 cpp/src/arrow/array/util.cc               |  76 ++++++++-
 cpp/src/arrow/scalar.cc                   |  32 +++-
 cpp/src/arrow/scalar.h                    |   9 +-
 cpp/src/arrow/scalar_test.cc              | 191 ++++++++++++----------
 python/pyarrow/includes/libarrow.pxd      |   1 +
 python/pyarrow/scalar.pxi                 |   8 +
 python/pyarrow/tests/test_scalars.py      |   6 +
 17 files changed, 389 insertions(+), 143 deletions(-)

diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
index 98202a9e281..847b48620bd 100644
--- a/c_glib/arrow-glib/scalar.cpp
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -2120,13 +2120,15 @@ G_END_DECLS
 template<typename ArrowUnionScalarType>
 GArrowScalar *
 garrow_union_scalar_new(GArrowDataType *data_type,
+                        gint8 type_code,
                         GArrowScalar *value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_value = garrow_scalar_get_raw(value);
   auto arrow_scalar =
     std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<ArrowUnionScalarType>(arrow_value, arrow_data_type));
+      std::make_shared<ArrowUnionScalarType>(arrow_value, type_code,
+                                             arrow_data_type));
   auto scalar = garrow_scalar_new_raw(&arrow_scalar,
                                       "scalar", &arrow_scalar,
                                       "data-type", data_type,
@@ -2136,6 +2138,23 @@ garrow_union_scalar_new(GArrowDataType *data_type,
 }
 G_BEGIN_DECLS
 
+/**
+ * garrow_union_scalar_get_type_code:
+ * @scalar: A #GArrowUnionScalar.
+ *
+ * Returns: The type code of this scalar.
+ *
+ * Since: 6.0.0
+ */
+gint8
+garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar)
+{
+  const auto &arrow_scalar =
+    std::static_pointer_cast<arrow::UnionScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  return arrow_scalar->type_code;
+}
+
 /**
  * garrow_union_scalar_get_value:
  * @scalar: A #GArrowUnionScalar.
@@ -2169,6 +2188,7 @@ garrow_sparse_union_scalar_class_init(GArrowSparseUnionScalarClass *klass)
 /**
  * garrow_sparse_union_scalar_new:
  * @data_type: A #GArrowSparseUnionDataType for this scalar.
+ * @type_code: The type code of this scalar.
  * @value: The value of this scalar.
  *
  * Returns: A newly created #GArrowSparseUnionScalar.
@@ -2177,11 +2197,12 @@ garrow_sparse_union_scalar_class_init(GArrowSparseUnionScalarClass *klass)
  */
 GArrowSparseUnionScalar *
 garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
+                               gint8 type_code,
                                GArrowScalar *value)
 {
   return GARROW_SPARSE_UNION_SCALAR(
     garrow_union_scalar_new<arrow::SparseUnionScalar>(
-      GARROW_DATA_TYPE(data_type), value));
+      GARROW_DATA_TYPE(data_type), type_code, value));
 }
 
 
@@ -2202,6 +2223,7 @@ garrow_dense_union_scalar_class_init(GArrowDenseUnionScalarClass *klass)
 /**
  * garrow_dense_union_scalar_new:
  * @data_type: A #GArrowDenseUnionDataType for this scalar.
+ * @type_code: The type code of this scalar.
  * @value: The value of this scalar.
  *
  * Returns: A newly created #GArrowDenseUnionScalar.
@@ -2210,11 +2232,12 @@ garrow_dense_union_scalar_class_init(GArrowDenseUnionScalarClass *klass)
  */
 GArrowDenseUnionScalar *
 garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
+                              gint8 type_code,
                               GArrowScalar *value)
 {
   return GARROW_DENSE_UNION_SCALAR(
     garrow_union_scalar_new<arrow::DenseUnionScalar>(
-      GARROW_DATA_TYPE(data_type), value));
+      GARROW_DATA_TYPE(data_type), type_code, value));
 }
 
 
diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h
index 007f1cd29b3..a110d1c5ef6 100644
--- a/c_glib/arrow-glib/scalar.h
+++ b/c_glib/arrow-glib/scalar.h
@@ -625,6 +625,9 @@ struct _GArrowUnionScalarClass
   GArrowScalarClass parent_class;
 };
 
+GARROW_AVAILABLE_IN_6_0
+gint8
+garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar);
 GARROW_AVAILABLE_IN_5_0
 GArrowScalar *
 garrow_union_scalar_get_value(GArrowUnionScalar *scalar);
@@ -644,6 +647,7 @@ struct _GArrowSparseUnionScalarClass
 GARROW_AVAILABLE_IN_5_0
 GArrowSparseUnionScalar *
 garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
+                               gint8 type_code,
                                GArrowScalar *value);
 
 
@@ -661,6 +665,7 @@ struct _GArrowDenseUnionScalarClass
 GARROW_AVAILABLE_IN_5_0
 GArrowDenseUnionScalar *
 garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
+                              gint8 type_code,
                               GArrowScalar *value);
 
 
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index 987764f556a..193853602ff 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -117,7 +117,7 @@
  *
  * Since: 6.0.0
  */
-#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6 , 0)
+#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6, 0)
 
 /**
  * GARROW_VERSION_5_0:
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index f7706f9c96e..4c061c06c40 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -183,6 +183,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-6-0-0" role="6.0.0">
+    <title>Index of new symbols in 6.0.0</title>
+    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-5-0-0" role="5.0.0">
     <title>Index of new symbols in 5.0.0</title>
     <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/test/test-dense-union-scalar.rb b/c_glib/test/test-dense-union-scalar.rb
index 16d9458a9fa..ec2053b3fe9 100644
--- a/c_glib/test/test-dense-union-scalar.rb
+++ b/c_glib/test/test-dense-union-scalar.rb
@@ -22,8 +22,14 @@ def setup
       Arrow::Field.new("text", Arrow::StringDataType.new),
     ]
     @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9])
+    @type_code = 2
     @value = Arrow::Int8Scalar.new(-29)
-    @scalar = Arrow::DenseUnionScalar.new(@data_type, @value)
+    @scalar = Arrow::DenseUnionScalar.new(@data_type, @type_code, @value)
+  end
+
+  def test_type_code
+    assert_equal(@type_code,
+                 @scalar.type_code)
   end
 
   def test_data_type
@@ -38,7 +44,7 @@ def test_valid?
   end
 
   def test_equal
-    assert_equal(Arrow::DenseUnionScalar.new(@data_type, @value),
+    assert_equal(Arrow::DenseUnionScalar.new(@data_type, @type_code, @value),
                  @scalar)
   end
 
diff --git a/c_glib/test/test-sparse-union-scalar.rb b/c_glib/test/test-sparse-union-scalar.rb
index 9ca7d62a08d..acb8531560b 100644
--- a/c_glib/test/test-sparse-union-scalar.rb
+++ b/c_glib/test/test-sparse-union-scalar.rb
@@ -22,8 +22,14 @@ def setup
       Arrow::Field.new("text", Arrow::StringDataType.new),
     ]
     @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9])
+    @type_code = 2
     @value = Arrow::Int8Scalar.new(-29)
-    @scalar = Arrow::SparseUnionScalar.new(@data_type, @value)
+    @scalar = Arrow::SparseUnionScalar.new(@data_type, @type_code, @value)
+  end
+
+  def test_type_code
+    assert_equal(@type_code,
+                 @scalar.type_code)
   end
 
   def test_data_type
@@ -38,7 +44,7 @@ def test_valid?
   end
 
   def test_equal
-    assert_equal(Arrow::SparseUnionScalar.new(@data_type, @value),
+    assert_equal(Arrow::SparseUnionScalar.new(@data_type, @type_code, @value),
                  @scalar)
   end
 
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index 67c5ca84e1f..dad689d3ca7 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -103,28 +103,30 @@ struct ScalarFromArraySlotImpl {
   }
 
   Status Visit(const SparseUnionArray& a) {
+    const auto type_code = a.type_code(index_);
     // child array which stores the actual value
-    auto arr = a.field(a.child_id(index_));
+    const auto arr = a.field(a.child_id(index_));
     // no need to adjust the index
     ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
     if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, a.type()));
+      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, type_code, a.type()));
     } else {
-      out_ = MakeNullScalar(a.type());
+      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(type_code, a.type()));
     }
     return Status::OK();
   }
 
   Status Visit(const DenseUnionArray& a) {
+    const auto type_code = a.type_code(index_);
     // child array which stores the actual value
     auto arr = a.field(a.child_id(index_));
     // need to look up the value based on offsets
     auto offset = a.value_offset(index_);
     ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
     if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, a.type()));
+      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, type_code, a.type()));
     } else {
-      out_ = MakeNullScalar(a.type());
+      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(type_code, a.type()));
     }
     return Status::OK();
   }
diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h
index d39f33f4702..b0edb9591c5 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -378,6 +378,9 @@ class ARROW_EXPORT UnionArray : public Array {
 
   const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
 
+  /// The logical type code of the value at index.
+  type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; }
+
   /// The physical child id containing value at index.
   int child_id(int64_t i) const {
     return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index baa1c54e46b..5cee0a2691f 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -421,15 +421,23 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar)
   std::shared_ptr<Array> out;
   FinishAndCheckPadding(builder.get(), &out);
   ASSERT_OK(out->ValidateFull());
+  AssertTypeEqual(scalar->type, out->type());
   ASSERT_EQ(out->length(), 9);
-  ASSERT_EQ(out->null_count(), 4);
+
+  const bool can_check_nulls = internal::HasValidityBitmap(out->type()->id());
+
+  if (can_check_nulls) {
+    ASSERT_EQ(out->null_count(), 4);
+  }
   for (const auto index : {0, 1, 3, 5, 6}) {
     ASSERT_FALSE(out->IsNull(index));
     ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
     AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true);
   }
   for (const auto index : {2, 4, 7, 8}) {
-    ASSERT_TRUE(out->IsNull(index));
+    ASSERT_EQ(out->IsNull(index), can_check_nulls);
+    ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
+    AssertScalarsEqual(*null_scalar, *scalar_i, /*verbose=*/true);
   }
 }
 
@@ -437,37 +445,48 @@ static ScalarVector GetScalars() {
   auto hello = Buffer::FromString("hello");
   DayTimeIntervalType::DayMilliseconds daytime{1, 100};
 
-  return {std::make_shared<BooleanScalar>(false),
-          std::make_shared<Int8Scalar>(3),
-          std::make_shared<UInt16Scalar>(3),
-          std::make_shared<Int32Scalar>(3),
-          std::make_shared<UInt64Scalar>(3),
-          std::make_shared<DoubleScalar>(3.0),
-          std::make_shared<Date32Scalar>(10),
-          std::make_shared<Date64Scalar>(11),
-          std::make_shared<Time32Scalar>(1000, time32(TimeUnit::SECOND)),
-          std::make_shared<Time64Scalar>(1111, time64(TimeUnit::MICRO)),
-          std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
-          std::make_shared<MonthIntervalScalar>(1),
-          std::make_shared<DayTimeIntervalScalar>(daytime),
-          std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
-          std::make_shared<BinaryScalar>(hello),
-          std::make_shared<LargeBinaryScalar>(hello),
-          std::make_shared<FixedSizeBinaryScalar>(
-              hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
-          std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
-          std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
-          std::make_shared<StringScalar>(hello),
-          std::make_shared<LargeStringScalar>(hello),
-          std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
-          std::make_shared<LargeListScalar>(ArrayFromJSON(int8(), "[1, 1, 2, 2, 3, 3]")),
-          std::make_shared<FixedSizeListScalar>(ArrayFromJSON(int8(), "[1, 2, 3, 4]")),
-          std::make_shared<StructScalar>(
-              ScalarVector{
-                  std::make_shared<Int32Scalar>(2),
-                  std::make_shared<Int32Scalar>(6),
-              },
-              struct_({field("min", int32()), field("max", int32())}))};
+  FieldVector union_fields{field("string", utf8()), field("number", int32()),
+                           field("other_number", int32())};
+  std::vector<int8_t> union_type_codes{5, 6, 42};
+
+  const auto sparse_union_ty = ::arrow::sparse_union(union_fields, union_type_codes);
+  const auto dense_union_ty = ::arrow::dense_union(union_fields, union_type_codes);
+
+  return {
+      std::make_shared<BooleanScalar>(false), std::make_shared<Int8Scalar>(3),
+      std::make_shared<UInt16Scalar>(3), std::make_shared<Int32Scalar>(3),
+      std::make_shared<UInt64Scalar>(3), std::make_shared<DoubleScalar>(3.0),
+      std::make_shared<Date32Scalar>(10), std::make_shared<Date64Scalar>(11),
+      std::make_shared<Time32Scalar>(1000, time32(TimeUnit::SECOND)),
+      std::make_shared<Time64Scalar>(1111, time64(TimeUnit::MICRO)),
+      std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
+      std::make_shared<MonthIntervalScalar>(1),
+      std::make_shared<DayTimeIntervalScalar>(daytime),
+      std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
+      std::make_shared<BinaryScalar>(hello), std::make_shared<LargeBinaryScalar>(hello),
+      std::make_shared<FixedSizeBinaryScalar>(
+          hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
+      std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
+      std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
+      std::make_shared<StringScalar>(hello), std::make_shared<LargeStringScalar>(hello),
+      std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
+      std::make_shared<LargeListScalar>(ArrayFromJSON(int8(), "[1, 1, 2, 2, 3, 3]")),
+      std::make_shared<FixedSizeListScalar>(ArrayFromJSON(int8(), "[1, 2, 3, 4]")),
+      std::make_shared<StructScalar>(
+          ScalarVector{
+              std::make_shared<Int32Scalar>(2),
+              std::make_shared<Int32Scalar>(6),
+          },
+          struct_({field("min", int32()), field("max", int32())})),
+      // Same values, different union type codes
+      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 6,
+                                          sparse_union_ty),
+      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 42,
+                                          sparse_union_ty),
+      std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 6,
+                                         dense_union_ty),
+      std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 42,
+                                         dense_union_ty)};
 }
 
 TEST_F(TestArray, TestMakeArrayFromScalar) {
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index c892e3d664b..2f4e63b546d 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -18,6 +18,7 @@
 #include "arrow/array/builder_base.h"
 
 #include <cstdint>
+#include <type_traits>
 #include <vector>
 
 #include "arrow/array/array_base.h"
@@ -96,6 +97,7 @@ Status ArrayBuilder::Advance(int64_t elements) {
 }
 
 namespace {
+
 struct AppendScalarImpl {
   template <typename T>
   enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
@@ -206,6 +208,52 @@ struct AppendScalarImpl {
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionType& type) { return MakeUnionArray(type); }
+
+  Status Visit(const DenseUnionType& type) { return MakeUnionArray(type); }
+
+  template <typename T>
+  Status MakeUnionArray(const T& type) {
+    using BuilderType = typename TypeTraits<T>::BuilderType;
+    constexpr bool is_dense = std::is_same<T, DenseUnionType>::value;
+
+    auto* builder = internal::checked_cast<BuilderType*>(builder_);
+    const auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
+
+    RETURN_NOT_OK(builder->Reserve(count));
+
+    DCHECK_EQ(type.num_fields(), builder->num_children());
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      RETURN_NOT_OK(builder->child_builder(field_index)->Reserve(count));
+    }
+
+    for (int64_t i = 0; i < n_repeats_; i++) {
+      for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
+        // For each scalar,
+        //  1. append the type code,
+        //  2. append the value to the corresponding child,
+        //  3. if the union is sparse, append null to the other children.
+        const auto& scalar = internal::checked_cast<const UnionScalar&>(**s);
+        const auto scalar_field_index = type.child_ids()[scalar.type_code];
+        RETURN_NOT_OK(builder->Append(scalar.type_code));
+
+        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+          auto* child_builder = builder->child_builder(field_index).get();
+          if (field_index == scalar_field_index) {
+            if (scalar.is_valid) {
+              RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value));
+            } else {
+              RETURN_NOT_OK(child_builder->AppendNull());
+            }
+          } else if (!is_dense) {
+            RETURN_NOT_OK(child_builder->AppendNull());
+          }
+        }
+      }
+    }
+    return Status::OK();
+  }
+
   Status Visit(const DataType& type) {
     return Status::NotImplemented("AppendScalar for type ", type);
   }
@@ -217,6 +265,7 @@ struct AppendScalarImpl {
   int64_t n_repeats_;
   ArrayBuilder* builder_;
 };
+
 }  // namespace
 
 Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index ed26ecff4e0..fae379e51f4 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -34,6 +34,7 @@
 #include "arrow/buffer.h"
 #include "arrow/buffer_builder.h"
 #include "arrow/extension_type.h"
+#include "arrow/result.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -613,18 +614,85 @@ class RepeatedArrayFactory {
     return Status::OK();
   }
 
-  Status Visit(const ExtensionType& type) {
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+  Status Visit(const SparseUnionType& type) {
+    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
+    const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
+    const auto scalar_type_code = union_scalar.type_code;
+    const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
+
+    // Create child arrays: most of them are all-null, except for the child array
+    // for the given type code (if the scalar is valid).
+    ArrayVector fields;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      fields.emplace_back();
+      if (i == scalar_child_id && scalar_.is_valid) {
+        ARROW_ASSIGN_OR_RAISE(fields.back(),
+                              MakeArrayFromScalar(*union_scalar.value, length_, pool_));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(
+            fields.back(), MakeArrayOfNull(union_type.field(i)->type(), length_, pool_));
+      }
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto type_codes_buffer, CreateUnionTypeCodes(scalar_type_code));
+
+    out_ = std::make_shared<SparseUnionArray>(scalar_.type, length_, std::move(fields),
+                                              std::move(type_codes_buffer));
+    return Status::OK();
   }
 
   Status Visit(const DenseUnionType& type) {
-    return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
+    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
+    const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
+    const auto scalar_type_code = union_scalar.type_code;
+    const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
+
+    // Create child arrays: all of them are empty, except for the child array
+    // for the given type code (if length > 0).
+    ArrayVector fields;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      fields.emplace_back();
+      if (i == scalar_child_id && length_ > 0) {
+        if (scalar_.is_valid) {
+          // One valid element (will be referenced by multiple offsets)
+          ARROW_ASSIGN_OR_RAISE(fields.back(),
+                                MakeArrayFromScalar(*union_scalar.value, 1, pool_));
+        } else {
+          // One null element (will be referenced by multiple offsets)
+          ARROW_ASSIGN_OR_RAISE(fields.back(),
+                                MakeArrayOfNull(union_type.field(i)->type(), 1, pool_));
+        }
+      } else {
+        // Zero element (will not be referenced by any offset)
+        ARROW_ASSIGN_OR_RAISE(fields.back(),
+                              MakeArrayOfNull(union_type.field(i)->type(), 0, pool_));
+      }
+    }
+
+    // Create an offsets buffer with all offsets equal to 0
+    ARROW_ASSIGN_OR_RAISE(auto offsets_buffer,
+                          AllocateBuffer(length_ * sizeof(int32_t), pool_));
+    memset(offsets_buffer->mutable_data(), 0, offsets_buffer->size());
+
+    ARROW_ASSIGN_OR_RAISE(auto type_codes_buffer, CreateUnionTypeCodes(scalar_type_code));
+
+    out_ = std::make_shared<DenseUnionArray>(scalar_.type, length_, std::move(fields),
+                                             std::move(type_codes_buffer),
+                                             std::move(offsets_buffer));
+    return Status::OK();
   }
 
-  Status Visit(const SparseUnionType& type) {
+  Status Visit(const ExtensionType& type) {
     return Status::NotImplemented("construction from scalar of type ", *scalar_.type);
   }
 
+  Result<std::shared_ptr<Buffer>> CreateUnionTypeCodes(int8_t type_code) {
+    TypedBufferBuilder<int8_t> builder(pool_);
+    RETURN_NOT_OK(builder.Resize(length_));
+    builder.UnsafeAppend(length_, type_code);
+    return builder.Finish();
+  }
+
   template <typename OffsetType>
   Status CreateOffsetsBuffer(OffsetType value_length, std::shared_ptr<Buffer>* out) {
     TypedBufferBuilder<OffsetType> builder(pool_);
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index a118a00938b..314894bd04e 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -49,9 +49,9 @@ bool Scalar::ApproxEquals(const Scalar& other, const EqualOptions& options) cons
   return ScalarApproxEquals(*this, other, options);
 }
 
-struct ScalarHashImpl {
-  static std::hash<std::string> string_hash;
+namespace {
 
+struct ScalarHashImpl {
   Status Visit(const NullScalar& s) { return Status::OK(); }
 
   template <typename T>
@@ -97,8 +97,13 @@ struct ScalarHashImpl {
     return Status::OK();
   }
 
-  // TODO(bkietz) implement less wimpy hashing when these have ValueType
-  Status Visit(const UnionScalar& s) { return Status::OK(); }
+  Status Visit(const UnionScalar& s) {
+    // type_code is ignored when comparing for equality, so do not hash it either
+    AccumulateHashFrom(*s.value);
+    return Status::OK();
+  }
+
+  // TODO(bkietz) implement less wimpy hashing when this has ValueType
   Status Visit(const ExtensionScalar& s) { return Status::OK(); }
 
   template <typename T>
@@ -147,6 +152,8 @@ struct ScalarHashImpl {
   size_t hash_;
 };
 
+}  // namespace
+
 size_t Scalar::hash() const { return ScalarHashImpl(*this).hash_; }
 
 StringScalar::StringScalar(std::string s)
@@ -285,6 +292,8 @@ std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar>
                                             std::move(type));
 }
 
+namespace {
+
 template <typename T>
 using scalar_constructor_has_arrow_type =
     std::is_constructible<typename TypeTraits<T>::ScalarType, std::shared_ptr<DataType>>;
@@ -310,6 +319,19 @@ struct MakeNullImpl {
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionType& type) { return MakeUnionScalar(type); }
+
+  Status Visit(const DenseUnionType& type) { return MakeUnionScalar(type); }
+
+  template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
+  Status MakeUnionScalar(const T& type) {
+    if (type.num_fields() == 0) {
+      return Status::Invalid("Cannot make scalar of empty union type");
+    }
+    out_ = std::make_shared<ScalarType>(type.type_codes()[0], type_);
+    return Status::OK();
+  }
+
   std::shared_ptr<Scalar> Finish() && {
     // Should not fail.
     DCHECK_OK(VisitTypeInline(*type_, this));
@@ -320,6 +342,8 @@ struct MakeNullImpl {
   std::shared_ptr<Scalar> out_;
 };
 
+}  // namespace
+
 std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type) {
   return MakeNullImpl{std::move(type), nullptr}.Finish();
 }
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 24744859686..68d8c245b1d 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -420,10 +420,15 @@ struct ARROW_EXPORT StructScalar : public Scalar {
 struct ARROW_EXPORT UnionScalar : public Scalar {
   using Scalar::Scalar;
   using ValueType = std::shared_ptr<Scalar>;
+
   ValueType value;
+  int8_t type_code;
 
-  UnionScalar(ValueType value, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)) {}
+  UnionScalar(int8_t type_code, std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), false), type_code(type_code) {}
+
+  UnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), true), value(std::move(value)), type_code(type_code) {}
 };
 
 struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index d99debb2ba9..eeb98a2fb72 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -930,117 +930,134 @@ TEST(TestDictionaryScalar, Cast) {
   }
 }
 
-TEST(TestSparseUnionScalar, Basics) {
-  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+void CheckGetValidUnionScalar(const Array& arr, int64_t index, const Scalar& expected,
+                              const Scalar& expected_value) {
+  ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
+  ASSERT_TRUE(scalar->Equals(expected));
+
+  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
+  ASSERT_TRUE(as_union.is_valid);
+  ASSERT_TRUE(as_union.value->Equals(expected_value));
+}
 
-  auto alpha = MakeScalar("alpha");
-  auto beta = MakeScalar("beta");
-  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+void CheckGetNullUnionScalar(const Array& arr, int64_t index) {
+  ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
+  ASSERT_TRUE(scalar->Equals(MakeNullScalar(arr.type())));
 
-  auto scalar_alpha = SparseUnionScalar(alpha, ty);
-  auto scalar_beta = SparseUnionScalar(beta, ty);
-  auto scalar_two = SparseUnionScalar(two, ty);
+  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
+  ASSERT_FALSE(as_union.is_valid);
+  // XXX in reality, the union array doesn't have a validity bitmap.
+  // Validity is inferred from the underlying child value, which should maybe
+  // be reflected here...
+  ASSERT_EQ(as_union.value, nullptr);
+}
 
-  // test Array.GetScalar
-  std::vector<std::shared_ptr<Array>> children{
-      ArrayFromJSON(utf8(), R"(["alpha", "", "beta", null, "gamma"])"),
-      ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+template <typename Type>
+class TestUnionScalar : public ::testing::Test {
+ public:
+  using UnionType = Type;
+  using ScalarType = typename TypeTraits<UnionType>::ScalarType;
 
-  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
-  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
-  ASSERT_OK(arr.ValidateFull());
+  void SetUp() {
+    type_.reset(new UnionType({field("string", utf8()), field("number", uint64()),
+                               field("other_number", uint64())},
+                              /*type_codes=*/{3, 42, 43}));
+    alpha_ = MakeScalar("alpha");
+    beta_ = MakeScalar("beta");
+    ASSERT_OK_AND_ASSIGN(two_, MakeScalar(uint64(), 2));
+    ASSERT_OK_AND_ASSIGN(three_, MakeScalar(uint64(), 3));
+
+    union_alpha_ = std::make_shared<ScalarType>(alpha_, 3, type_);
+    union_beta_ = std::make_shared<ScalarType>(beta_, 3, type_);
+    union_two_ = std::make_shared<ScalarType>(two_, 42, type_);
+    union_other_two_ = std::make_shared<ScalarType>(two_, 43, type_);
+    union_three_ = std::make_shared<ScalarType>(three_, 42, type_);
+    union_string_null_ = MakeSpecificNullScalar(3);
+    union_number_null_ = MakeSpecificNullScalar(42);
+  }
 
-  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
-  ASSERT_TRUE(first->Equals(scalar_alpha));
+  void TestEquals() {
+    // Differing values
+    ASSERT_FALSE(union_alpha_->Equals(union_beta_));
+    ASSERT_FALSE(union_two_->Equals(union_three_));
+    // Differing validities
+    ASSERT_FALSE(union_alpha_->Equals(union_string_null_));
+    // Differing types
+    ASSERT_FALSE(union_alpha_->Equals(union_two_));
+    ASSERT_FALSE(union_alpha_->Equals(union_other_two_));
+    // Type codes don't count when comparing union scalars: the underlying values
+    // are identical even though their provenance is different.
+    ASSERT_TRUE(union_two_->Equals(union_other_two_));
+    ASSERT_TRUE(union_string_null_->Equals(union_number_null_));
+  }
 
-  const auto& first_as_union = checked_cast<const SparseUnionScalar&>(*first);
-  ASSERT_TRUE(first_as_union.is_valid);
-  ASSERT_TRUE(first_as_union.value->Equals(alpha));
+  void TestMakeNullScalar() {
+    const auto scalar = MakeNullScalar(type_);
+    const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
+    AssertTypeEqual(type_, as_union.type);
+    ASSERT_FALSE(as_union.is_valid);
+    ASSERT_EQ(as_union.value, nullptr);
+    // Abstractly, the type code must be valid.
+    // Concretely, the first child field is chosen.
+    ASSERT_EQ(as_union.type_code, 3);
+  }
 
-  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
-  ASSERT_TRUE(second->Equals(scalar_two));
+ protected:
+  std::shared_ptr<Scalar> MakeSpecificNullScalar(int8_t type_code) {
+    auto scal = MakeNullScalar(type_);
+    checked_cast<UnionScalar*>(scal.get())->type_code = type_code;
+    return scal;
+  }
 
-  const auto& second_as_union = checked_cast<const SparseUnionScalar&>(*second);
-  ASSERT_TRUE(second_as_union.is_valid);
-  ASSERT_TRUE(second_as_union.value->Equals(two));
+  std::shared_ptr<DataType> type_;
+  std::shared_ptr<Scalar> alpha_, beta_, two_, three_;
+  std::shared_ptr<Scalar> union_alpha_, union_beta_, union_two_, union_three_,
+      union_other_two_, union_string_null_, union_number_null_;
+};
 
-  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
-  ASSERT_TRUE(third->Equals(scalar_beta));
+class TestSparseUnionScalar : public TestUnionScalar<SparseUnionType> {};
 
-  const auto& third_as_union = checked_cast<const SparseUnionScalar&>(*third);
-  ASSERT_TRUE(third_as_union.is_valid);
-  ASSERT_TRUE(third_as_union.value->Equals(beta));
+TEST_F(TestSparseUnionScalar, Equals) { this->TestEquals(); }
 
-  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
-  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));
+TEST_F(TestSparseUnionScalar, MakeNullScalar) { this->TestMakeNullScalar(); }
 
-  const auto& fourth_as_union = checked_cast<const SparseUnionScalar&>(*fourth);
-  ASSERT_FALSE(fourth_as_union.is_valid);
+TEST_F(TestSparseUnionScalar, GetScalar) {
+  ArrayVector children{ArrayFromJSON(utf8(), R"(["alpha", "", "beta", null, "gamma"])"),
+                       ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]"),
+                       ArrayFromJSON(uint64(), "[100, 101, 102, 103, 104]")};
 
-  ASSERT_OK_AND_ASSIGN(auto fifth, arr.GetScalar(4));
-  ASSERT_TRUE(fifth->Equals(MakeNullScalar(ty)));
+  auto type_ids = ArrayFromJSON(int8(), "[3, 42, 3, 3, 42]");
+  SparseUnionArray arr(type_, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
 
-  const auto& fifth_as_union = checked_cast<const SparseUnionScalar&>(*fifth);
-  ASSERT_FALSE(fifth_as_union.is_valid);
+  CheckGetValidUnionScalar(arr, 0, *union_alpha_, *alpha_);
+  CheckGetValidUnionScalar(arr, 1, *union_two_, *two_);
+  CheckGetValidUnionScalar(arr, 2, *union_beta_, *beta_);
+  CheckGetNullUnionScalar(arr, 3);
+  CheckGetNullUnionScalar(arr, 4);
 }
 
-TEST(TestDenseUnionScalar, Basics) {
-  auto ty = dense_union({field("string", utf8()), field("number", uint64())});
+class TestDenseUnionScalar : public TestUnionScalar<DenseUnionType> {};
 
-  auto alpha = MakeScalar("alpha");
-  auto beta = MakeScalar("beta");
-  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
-  ASSERT_OK_AND_ASSIGN(auto three, MakeScalar(uint64(), 3));
+TEST_F(TestDenseUnionScalar, Equals) { this->TestEquals(); }
 
-  auto scalar_alpha = DenseUnionScalar(alpha, ty);
-  auto scalar_beta = DenseUnionScalar(beta, ty);
-  auto scalar_two = DenseUnionScalar(two, ty);
-  auto scalar_three = DenseUnionScalar(three, ty);
+TEST_F(TestDenseUnionScalar, MakeNullScalar) { this->TestMakeNullScalar(); }
 
-  // test Array.GetScalar
-  std::vector<std::shared_ptr<Array>> children = {
-      ArrayFromJSON(utf8(), R"(["alpha", "beta", null])"),
-      ArrayFromJSON(uint64(), "[2, 3]")};
+TEST_F(TestDenseUnionScalar, GetScalar) {
+  ArrayVector children{ArrayFromJSON(utf8(), R"(["alpha", "beta", null])"),
+                       ArrayFromJSON(uint64(), "[2, 3]"), ArrayFromJSON(uint64(), "[]")};
 
-  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  auto type_ids = ArrayFromJSON(int8(), "[3, 42, 3, 3, 42]");
   auto offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]");
-  DenseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1],
+  DenseUnionArray arr(type_, 5, children, type_ids->data()->buffers[1],
                       offsets->data()->buffers[1]);
   ASSERT_OK(arr.ValidateFull());
 
-  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
-  ASSERT_TRUE(first->Equals(scalar_alpha));
-
-  const auto& first_as_union = checked_cast<const DenseUnionScalar&>(*first);
-  ASSERT_TRUE(first_as_union.value->Equals(alpha));
-  ASSERT_TRUE(first_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
-  ASSERT_TRUE(second->Equals(scalar_two));
-
-  const auto& second_as_union = checked_cast<const DenseUnionScalar&>(*second);
-  ASSERT_TRUE(second_as_union.value->Equals(two));
-  ASSERT_TRUE(second_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
-  ASSERT_TRUE(third->Equals(scalar_beta));
-
-  const auto& third_as_union = checked_cast<const DenseUnionScalar&>(*third);
-  ASSERT_TRUE(third_as_union.value->Equals(beta));
-  ASSERT_TRUE(third_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
-  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));
-
-  const auto& fourth_as_union = checked_cast<const DenseUnionScalar&>(*fourth);
-  ASSERT_FALSE(fourth_as_union.is_valid);
-
-  ASSERT_OK_AND_ASSIGN(auto fifth, arr.GetScalar(4));
-  ASSERT_TRUE(fifth->Equals(scalar_three));
-
-  const auto& fifth_as_union = checked_cast<const DenseUnionScalar&>(*fifth);
-  ASSERT_TRUE(fifth_as_union.value->Equals(three));
-  ASSERT_TRUE(fifth_as_union.is_valid);
+  CheckGetValidUnionScalar(arr, 0, *union_alpha_, *alpha_);
+  CheckGetValidUnionScalar(arr, 1, *union_two_, *two_);
+  CheckGetValidUnionScalar(arr, 2, *union_beta_, *beta_);
+  CheckGetNullUnionScalar(arr, 3);
+  CheckGetValidUnionScalar(arr, 4, *union_three_, *three_);
 }
 
 }  // namespace arrow
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 171b3ede217..0b5ceb7a0ea 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1020,6 +1020,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar):
         shared_ptr[CScalar] value
+        int8_t type_code
 
     shared_ptr[CScalar] MakeScalar[Value](Value value)
 
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 5c0d3ca1157..7953bd93621 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -829,6 +829,14 @@ cdef class UnionScalar(Scalar):
         value = self.value
         return None if value is None else value.as_py()
 
+    @property
+    def type_code(self):
+        """
+        Return the union type code for this scalar.
+        """
+        cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
+        return sp.type_code
+
 
 cdef dict _scalar_classes = {
     _Type_BOOL: BooleanScalar,
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index d20226135d7..86dfe949cb4 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -645,9 +645,13 @@ def test_union():
         with pytest.raises(pa.ArrowNotImplementedError):
             pickle.loads(pickle.dumps(s))
 
+    assert arr[0].type_code == 0
     assert arr[0].as_py() == "a"
+    assert arr[1].type_code == 0
     assert arr[1].as_py() == "b"
+    assert arr[2].type_code == 1
     assert arr[2].as_py() == 3
+    assert arr[3].type_code == 1
     assert arr[3].as_py() == 4
 
     # dense
@@ -666,5 +670,7 @@ def test_union():
         with pytest.raises(pa.ArrowNotImplementedError):
             pickle.loads(pickle.dumps(s))
 
+    assert arr[0].type_code == 0
     assert arr[0].as_py() == b'a'
+    assert arr[5].type_code == 1
     assert arr[5].as_py() == 3

From 3fa47f24190ad9a52658c5706bc11105782f8080 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Mon, 2 Aug 2021 19:06:23 +0200
Subject: [PATCH 686/719] ARROW-8928: [C++] Add microbenchmarks to help measure
 ExecBatchIterator overhead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These are only preliminary benchmarks but may help in examining microperformance overhead related to `ExecBatch` and its implementation (as a `vector<Datum>`).

It may be desirable to devise an "array reference" data structure with few or no heap-allocated data structures and no `shared_ptr` interactions required to obtain memory addresses and other array information.

On my test machine (macOS i9-9880H 2.3ghz), I see about 472 CPU cycles per field overhead for each ExecBatch produced. These benchmarks take a record batch with 1M rows and 10 columns/fields and iterates through the rows in smaller ExecBatches of the indicated sizes

```
BM_ExecBatchIterator/256      8207877 ns      8204914 ns           81 items_per_second=121.878/s
BM_ExecBatchIterator/512      4421049 ns      4419958 ns          166 items_per_second=226.247/s
BM_ExecBatchIterator/1024     2056636 ns      2055369 ns          333 items_per_second=486.531/s
BM_ExecBatchIterator/2048     1056415 ns      1056264 ns          682 items_per_second=946.733/s
BM_ExecBatchIterator/4096      514276 ns       514136 ns         1246 items_per_second=1.94501k/s
BM_ExecBatchIterator/8192      262539 ns       262391 ns         2736 items_per_second=3.81111k/s
BM_ExecBatchIterator/16384     128995 ns       128974 ns         5398 items_per_second=7.75351k/s
BM_ExecBatchIterator/32768      64987 ns        64970 ns        10811 items_per_second=15.3917k/s
```

So for the 1024 case, it takes 2,055,369 ns to iterate through all 1024 batches. That seems a bit expensive to me (?) — I suspect we can do better while also improving compilation times and reducing generated code size by using simpler data structures in our compute internals.

Closes #9280 from wesm/cpp-compute-microbenchmarks

Lead-authored-by: Wes McKinney <wesm@apache.org>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/function_benchmark.cc | 34 +++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/cpp/src/arrow/compute/function_benchmark.cc b/cpp/src/arrow/compute/function_benchmark.cc
index daf03754984..a29a766be79 100644
--- a/cpp/src/arrow/compute/function_benchmark.cc
+++ b/cpp/src/arrow/compute/function_benchmark.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api.h"
+#include "arrow/compute/exec_internal.h"
 #include "arrow/memory_pool.h"
 #include "arrow/scalar.h"
 #include "arrow/testing/gtest_util.h"
@@ -174,11 +175,44 @@ void BM_ExecuteScalarKernelOnScalar(benchmark::State& state) {
   state.SetItemsProcessed(state.iterations() * N);
 }
 
+void BM_ExecBatchIterator(benchmark::State& state) {
+  // Measure overhead related to splitting ExecBatch into smaller ExecBatches
+  // for parallelism or more optimal CPU cache affinity
+  random::RandomArrayGenerator rag(kSeed);
+
+  const int64_t length = 1 << 20;
+  const int num_fields = 32;
+
+  std::vector<Datum> args(num_fields);
+  for (int i = 0; i < num_fields; ++i) {
+    args[i] = rag.Int64(length, 0, 100)->data();
+  }
+
+  const int64_t blocksize = state.range(0);
+  for (auto _ : state) {
+    std::unique_ptr<detail::ExecBatchIterator> it =
+        *detail::ExecBatchIterator::Make(args, blocksize);
+    ExecBatch batch;
+    while (it->Next(&batch)) {
+      for (int i = 0; i < num_fields; ++i) {
+        auto data = batch.values[i].array()->buffers[1]->data();
+        benchmark::DoNotOptimize(data);
+      }
+    }
+    benchmark::DoNotOptimize(batch);
+  }
+  // Provides comparability across blocksizes by looking at the iterations per
+  // second. So 1000 iterations/second means that input splitting associated
+  // with ExecBatchIterator takes up 1ms every time.
+  state.SetItemsProcessed(state.iterations());
+}
+
 BENCHMARK(BM_CastDispatch);
 BENCHMARK(BM_CastDispatchBaseline);
 BENCHMARK(BM_AddDispatch);
 BENCHMARK(BM_ExecuteScalarFunctionOnScalar);
 BENCHMARK(BM_ExecuteScalarKernelOnScalar);
+BENCHMARK(BM_ExecBatchIterator)->RangeMultiplier(4)->Range(1024, 64 * 1024);
 
 }  // namespace compute
 }  // namespace arrow

From 8628fb1287cebf0d7e7dbeb5ff6b8c4f07a9524f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 2 Aug 2021 20:26:54 +0200
Subject: [PATCH 687/719] ARROW-13425: [Archery] Avoid importing PyArrow
 indirectly

Pandas will try to import PyArrow if seemingly available.
However, the currently installed PyArrow may not be compatible with the last compiled Arrow C++
(e.g. when using `archery benchmark diff ...`).

Closes #10769 from pitrou/ARROW-13425-archery-import-pyarrow

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/archery/cli.py    | 3 ++-
 dev/archery/archery/compat.py | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 7fef9edb4b9..582a4288492 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -28,6 +28,7 @@
 from .benchmark.codec import JsonEncoder
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
 from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner
+from .compat import _import_pandas
 from .lang.cpp import CppCMakeDefinition, CppConfiguration
 from .utils.cli import ArrowBool, validate_arrow_sources, add_optional_command
 from .utils.lint import linter, python_numpydoc, LintValidationException
@@ -647,7 +648,7 @@ def _get_comparisons_as_json(comparisons):
 
 def _format_comparisons_with_pandas(comparisons_json, no_counters,
                                     ren_counters):
-    import pandas as pd
+    pd = _import_pandas()
     df = pd.read_json(StringIO(comparisons_json), lines=True)
     # parse change % so we can sort by it
     df['change %'] = df.pop('change').str[:-1].map(float)
diff --git a/dev/archery/archery/compat.py b/dev/archery/archery/compat.py
index 22cb9fc7957..bb0b1542832 100644
--- a/dev/archery/archery/compat.py
+++ b/dev/archery/archery/compat.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import pathlib
+import sys
 
 
 def _is_path_like(path):
@@ -49,3 +50,10 @@ def _stringify_path(path):
             return str(path)
 
     raise TypeError("not a path-like object")
+
+
+def _import_pandas():
+    # ARROW-13425: avoid importing PyArrow from Pandas
+    sys.modules['pyarrow'] = None
+    import pandas as pd
+    return pd

From de7cc1e4820e96531aae3b7f3fc9105f7b3a5f3a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 3 Aug 2021 10:00:21 +0200
Subject: [PATCH 688/719] ARROW-13522: [C++] Fix regression in UTF8 trim
 functions

Closes #10853 from pitrou/ARROW-13522-utf8-trim-regression

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    |  6 +++--
 .../compute/kernels/scalar_string_test.cc     | 26 ++++++++++++-------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index ab0a490eeb3..6ef08a7d2bb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -3022,8 +3022,9 @@ struct UTF8TrimTransform : public StringTransformBase {
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
     const uint8_t* begin_trimmed = begin;
+    const auto& codepoints = state_.codepoints_;
 
-    auto predicate = [&](uint32_t c) { return !state_.codepoints_[c]; };
+    auto predicate = [&](uint32_t c) { return c >= codepoints.size() || !codepoints[c]; };
     if (TrimLeft && !ARROW_PREDICT_TRUE(
                         arrow::util::UTF8FindIf(begin, end, predicate, &begin_trimmed))) {
       return kTransformError;
@@ -3111,8 +3112,9 @@ struct AsciiTrimTransform : public StringTransformBase {
     const uint8_t* end = input + input_string_ncodeunits;
     const uint8_t* end_trimmed = end;
     const uint8_t* begin_trimmed = begin;
+    const auto& characters = state_.characters_;
 
-    auto predicate = [&](uint8_t c) { return !state_.characters_[c]; };
+    auto predicate = [&](uint8_t c) { return !characters[c]; };
     if (TrimLeft) {
       begin_trimmed = std::find_if(begin, end, predicate);
     }
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 67f1e02558b..785c82ca044 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1269,15 +1269,23 @@ TYPED_TEST(TestStringKernels, TrimWhitespaceUTF8) {
 }
 
 TYPED_TEST(TestStringKernels, TrimUTF8) {
-  TrimOptions options{"ȺA"};
-  this->CheckUnary("utf8_trim", "[\"ȺȺfooȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺbarA\"]",
-                   this->type(), "[\"foo\", null, \"bar\", \"fooȺAȺbar\"]", &options);
-  this->CheckUnary("utf8_ltrim", "[\"ȺȺfooȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺbarA\"]",
-                   this->type(), "[\"fooȺAȺ\", null, \"barȺAȺ\", \"fooȺAȺbarA\"]",
-                   &options);
-  this->CheckUnary("utf8_rtrim", "[\"ȺȺfooȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺbarA\"]",
-                   this->type(), "[\"ȺȺfoo\", null, \"bar\", \"ȺAȺfooȺAȺbar\"]",
-                   &options);
+  auto options = TrimOptions{"ab"};
+  this->CheckUnary("utf8_trim", "[\"azȺz矢ba\", null, \"bab\", \"zȺz\"]", this->type(),
+                   "[\"zȺz矢\", null, \"\", \"zȺz\"]", &options);
+  this->CheckUnary("utf8_ltrim", "[\"azȺz矢ba\", null, \"bab\", \"zȺz\"]", this->type(),
+                   "[\"zȺz矢ba\", null, \"\", \"zȺz\"]", &options);
+  this->CheckUnary("utf8_rtrim", "[\"azȺz矢ba\", null, \"bab\", \"zȺz\"]", this->type(),
+                   "[\"azȺz矢\", null, \"\", \"zȺz\"]", &options);
+
+  options = TrimOptions{"ȺA"};
+  this->CheckUnary("utf8_trim", "[\"ȺȺfoo矢ȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺ矢barA\"]",
+                   this->type(), "[\"foo矢\", null, \"bar\", \"fooȺAȺ矢bar\"]", &options);
+  this->CheckUnary(
+      "utf8_ltrim", "[\"ȺȺfoo矢ȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺ矢barA\"]",
+      this->type(), "[\"foo矢ȺAȺ\", null, \"barȺAȺ\", \"fooȺAȺ矢barA\"]", &options);
+  this->CheckUnary(
+      "utf8_rtrim", "[\"ȺȺfoo矢ȺAȺ\", null, \"barȺAȺ\", \"ȺAȺfooȺAȺ矢barA\"]",
+      this->type(), "[\"ȺȺfoo矢\", null, \"bar\", \"ȺAȺfooȺAȺ矢bar\"]", &options);
 
   TrimOptions options_invalid{"ɑa\xFFɑ"};
   auto input = ArrayFromJSON(this->type(), "[\"foo\"]");

From dee62082423a6cbec28d47d58038a2ba4068896f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 3 Aug 2021 12:47:41 +0200
Subject: [PATCH 689/719] ARROW-9948: [C++] Fix scale handling in Decimal{128,
 256}::FromString

* The wrong scale could be inferred if the input both had fractional digits and an exponent
* An out-of-bounds access could be provoked when correcting an excessive negative scale (for example "1e39")
* Make testing more generic and more thorough

Closes #10823 from pitrou/ARROW-9948-decimal-from-string

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 c_glib/arrow-glib/decimal.cpp         |   2 +-
 cpp/src/arrow/util/basic_decimal.cc   |   4 +-
 cpp/src/arrow/util/basic_decimal.h    |  90 +++-
 cpp/src/arrow/util/decimal.cc         |  84 +---
 cpp/src/arrow/util/decimal_test.cc    | 661 ++++++++++++++++++--------
 cpp/src/gandiva/tests/decimal_test.cc |   2 +-
 6 files changed, 575 insertions(+), 268 deletions(-)

diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal.cpp
index cf0a08a3d7c..497d76fcfaa 100644
--- a/c_glib/arrow-glib/decimal.cpp
+++ b/c_glib/arrow-glib/decimal.cpp
@@ -177,7 +177,7 @@ garrow_decimal_to_bytes(typename DecimalConverter<Decimal>::GArrowType *decimal)
 {
   DecimalConverter<Decimal> converter;
   const auto arrow_decimal = converter.get_raw(decimal);
-  uint8_t data[DecimalConverter<Decimal>::ArrowType::bit_width / 8];
+  uint8_t data[DecimalConverter<Decimal>::ArrowType::kBitWidth / 8];
   arrow_decimal->ToBytes(data);
   return g_bytes_new(data, sizeof(data));
 }
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
index 9d87cc94e2c..edc25e25db8 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -830,7 +830,7 @@ static inline DecimalStatus SingleDivide(const uint32_t* dividend,
                                          bool divisor_was_negative,
                                          DecimalClass* result) {
   uint64_t r = 0;
-  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t) + 1;
+  constexpr int64_t kDecimalArrayLength = DecimalClass::kBitWidth / sizeof(uint32_t) + 1;
   uint32_t result_array[kDecimalArrayLength];
   for (int64_t j = 0; j < dividend_length; j++) {
     r <<= 32;
@@ -853,7 +853,7 @@ template <class DecimalClass>
 static inline DecimalStatus DecimalDivide(const DecimalClass& dividend,
                                           const DecimalClass& divisor,
                                           DecimalClass* result, DecimalClass* remainder) {
-  constexpr int64_t kDecimalArrayLength = DecimalClass::bit_width / sizeof(uint32_t);
+  constexpr int64_t kDecimalArrayLength = DecimalClass::kBitWidth / sizeof(uint32_t);
   // Split the dividend and divisor into integer pieces so that we can
   // work on them.
   uint32_t dividend_array[kDecimalArrayLength + 1];
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index a808396090a..745d8ac4602 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -42,8 +42,15 @@ enum class DecimalStatus {
 /// This class is also compiled into LLVM IR - so, it should not have cpp references like
 /// streams and boost.
 class ARROW_EXPORT BasicDecimal128 {
+  struct LittleEndianArrayTag {};
+
  public:
-  static constexpr int bit_width = 128;
+  static constexpr int kBitWidth = 128;
+  static constexpr int kMaxPrecision = 38;
+  static constexpr int kMaxScale = 38;
+
+  // A constructor tag to introduce a little-endian encoded array
+  static constexpr LittleEndianArrayTag LittleEndianArray{};
 
   /// \brief Create a BasicDecimal128 from the two's complement representation.
 #if ARROW_LITTLE_ENDIAN
@@ -54,6 +61,23 @@ class ARROW_EXPORT BasicDecimal128 {
       : high_bits_(high), low_bits_(low) {}
 #endif
 
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
+  /// Input array is assumed to be in native endianness.
+#if ARROW_LITTLE_ENDIAN
+  constexpr BasicDecimal128(const std::array<uint64_t, 2>& array) noexcept
+      : low_bits_(array[0]), high_bits_(static_cast<int64_t>(array[1])) {}
+#else
+  constexpr BasicDecimal128(const std::array<uint64_t, 2>& array) noexcept
+      : high_bits_(static_cast<int64_t>(array[0])), low_bits_(array[1]) {}
+#endif
+
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
+  /// Input array is assumed to be in little endianness, with native endian elements.
+  BasicDecimal128(LittleEndianArrayTag, const std::array<uint64_t, 2>& array) noexcept
+      : BasicDecimal128(BitUtil::LittleEndianArray::ToNative(array)) {}
+
   /// \brief Empty constructor creates a BasicDecimal128 with a value of 0.
   constexpr BasicDecimal128() noexcept : BasicDecimal128(0, 0) {}
 
@@ -122,6 +146,30 @@ class ARROW_EXPORT BasicDecimal128 {
   /// \brief Get the low bits of the two's complement representation of the number.
   inline constexpr uint64_t low_bits() const { return low_bits_; }
 
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 2 elements are in native endian order. The bits within each uint64_t element
+  /// are in native endian order. For example, on a little endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {123, 0};
+  /// but on a big endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {0, 123};
+  inline std::array<uint64_t, 2> native_endian_array() const {
+#if ARROW_LITTLE_ENDIAN
+    return {low_bits_, static_cast<uint64_t>(high_bits_)};
+#else
+    return {static_cast<uint64_t>(high_bits_), low_bits_};
+#endif
+  }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 2 elements are in little endian order. However, the bits within each
+  /// uint64_t element are in native endian order.
+  /// For example, BasicDecimal128(123).little_endian_array() = {123, 0};
+  inline std::array<uint64_t, 2> little_endian_array() const {
+    return {low_bits_, static_cast<uint64_t>(high_bits_)};
+  }
+
   /// \brief Return the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, 16> ToBytes() const;
   void ToBytes(uint8_t* out) const;
@@ -200,14 +248,28 @@ class ARROW_EXPORT BasicDecimal256 {
     return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
   }
 
+  struct LittleEndianArrayTag {};
+
  public:
-  static constexpr int bit_width = 256;
+  static constexpr int kBitWidth = 256;
+  static constexpr int kMaxPrecision = 76;
+  static constexpr int kMaxScale = 76;
+
+  // A constructor tag to denote a little-endian encoded array
+  static constexpr LittleEndianArrayTag LittleEndianArray{};
 
   /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
   /// Input array is assumed to be in native endianness.
   constexpr BasicDecimal256(const std::array<uint64_t, 4>& array) noexcept
       : array_(array) {}
 
+  /// \brief Create a BasicDecimal256 from the two's complement representation.
+  ///
+  /// Input array is assumed to be in little endianness, with native endian elements.
+  BasicDecimal256(LittleEndianArrayTag, const std::array<uint64_t, 4>& array) noexcept
+      : BasicDecimal256(BitUtil::LittleEndianArray::ToNative(array)) {}
+
   /// \brief Empty constructor creates a BasicDecimal256 with a value of 0.
   constexpr BasicDecimal256() noexcept : array_({0, 0, 0, 0}) {}
 
@@ -244,14 +306,28 @@ class ARROW_EXPORT BasicDecimal256 {
   /// \brief Subtract a number from this one. The result is truncated to 256 bits.
   BasicDecimal256& operator-=(const BasicDecimal256& right);
 
-  /// \brief Get the bits of the two's complement representation of the number. The 4
-  /// elements are in native endian order. The bits within each uint64_t element are in
-  /// native endian order. For example, on a little endian machine,
-  /// BasicDecimal256(123).native_endian_array() = {123, 0, 0, 0};
-  /// BasicDecimal256(-2).native_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 4 elements are in native endian order. The bits within each uint64_t element
+  /// are in native endian order. For example, on a little endian machine,
+  ///   BasicDecimal256(123).native_endian_array() = {123, 0, 0, 0};
+  ///   BasicDecimal256(-2).native_endian_array() = {0xFF...FE, 0xFF...FF, 0xFF...FF,
   /// 0xFF...FF}.
+  /// while on a big endian machine,
+  ///   BasicDecimal256(123).native_endian_array() = {0, 0, 0, 123};
+  ///   BasicDecimal256(-2).native_endian_array() = {0xFF...FF, 0xFF...FF, 0xFF...FF,
+  /// 0xFF...FE}.
   inline const std::array<uint64_t, 4>& native_endian_array() const { return array_; }
 
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The 4 elements are in little endian order. However, the bits within each
+  /// uint64_t element are in native endian order.
+  /// For example, BasicDecimal256(123).little_endian_array() = {123, 0};
+  inline const std::array<uint64_t, 4> little_endian_array() const {
+    return BitUtil::LittleEndianArray::FromNative(array_);
+  }
+
   /// \brief Get the lowest bits of the two's complement representation of the number.
   inline uint64_t low_bits() const { return BitUtil::LittleEndianArray::Make(array_)[0]; }
 
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 66deb97cc96..5e3e5e4ab43 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -504,17 +504,16 @@ inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
   return Status::OK();
 }
 
-}  // namespace
-
-Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
-                              int32_t* precision, int32_t* scale) {
+template <typename Decimal>
+Status DecimalFromString(const char* type_name, const util::string_view& s, Decimal* out,
+                         int32_t* precision, int32_t* scale) {
   if (s.empty()) {
-    return Status::Invalid("Empty string cannot be converted to decimal");
+    return Status::Invalid("Empty string cannot be converted to ", type_name);
   }
 
   DecimalComponents dec;
   if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
-    return Status::Invalid("The string '", s, "' is not a valid decimal number");
+    return Status::Invalid("The string '", s, "' is not a valid ", type_name, " number");
   }
 
   // Count number of significant digits (without leading zeros)
@@ -528,29 +527,33 @@ Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
   int32_t parsed_scale = 0;
   if (dec.has_exponent) {
     auto adjusted_exponent = dec.exponent;
-    auto len = static_cast<int32_t>(significant_digits);
-    parsed_scale = -adjusted_exponent + len - 1;
+    parsed_scale =
+        -adjusted_exponent + static_cast<int32_t>(dec.fractional_digits.size());
   } else {
     parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
   }
 
   if (out != nullptr) {
-    std::array<uint64_t, 2> little_endian_array = {0, 0};
+    static_assert(Decimal::kBitWidth % 64 == 0, "decimal bit-width not a multiple of 64");
+    std::array<uint64_t, Decimal::kBitWidth / 64> little_endian_array{};
     ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
     ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
                 little_endian_array.size());
-    *out =
-        Decimal128(static_cast<int64_t>(little_endian_array[1]), little_endian_array[0]);
-    if (parsed_scale < 0) {
-      *out *= GetScaleMultiplier(-parsed_scale);
-    }
-
+    *out = Decimal(BitUtil::LittleEndianArray::ToNative(little_endian_array));
     if (dec.sign == '-') {
       out->Negate();
     }
   }
 
   if (parsed_scale < 0) {
+    // Force the scale to zero, to avoid negative scales (due to compatibility issues
+    // with external systems such as databases)
+    if (-parsed_scale > Decimal::kMaxScale) {
+      return Status::Invalid("The string '", s, "' cannot be represented as ", type_name);
+    }
+    if (out != nullptr) {
+      *out *= Decimal::GetScaleMultiplier(-parsed_scale);
+    }
     parsed_precision -= parsed_scale;
     parsed_scale = 0;
   }
@@ -565,6 +568,13 @@ Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
   return Status::OK();
 }
 
+}  // namespace
+
+Status Decimal128::FromString(const util::string_view& s, Decimal128* out,
+                              int32_t* precision, int32_t* scale) {
+  return DecimalFromString("decimal128", s, out, precision, scale);
+}
+
 Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
                               int32_t* scale) {
   return FromString(util::string_view(s), out, precision, scale);
@@ -692,49 +702,7 @@ std::string Decimal256::ToString(int32_t scale) const {
 
 Status Decimal256::FromString(const util::string_view& s, Decimal256* out,
                               int32_t* precision, int32_t* scale) {
-  if (s.empty()) {
-    return Status::Invalid("Empty string cannot be converted to decimal");
-  }
-
-  DecimalComponents dec;
-  if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
-    return Status::Invalid("The string '", s, "' is not a valid decimal number");
-  }
-
-  // Count number of significant digits (without leading zeros)
-  size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
-  size_t significant_digits = dec.fractional_digits.size();
-  if (first_non_zero != std::string::npos) {
-    significant_digits += dec.whole_digits.size() - first_non_zero;
-  }
-
-  if (precision != nullptr) {
-    *precision = static_cast<int32_t>(significant_digits);
-  }
-
-  if (scale != nullptr) {
-    if (dec.has_exponent) {
-      auto adjusted_exponent = dec.exponent;
-      auto len = static_cast<int32_t>(significant_digits);
-      *scale = -adjusted_exponent + len - 1;
-    } else {
-      *scale = static_cast<int32_t>(dec.fractional_digits.size());
-    }
-  }
-
-  if (out != nullptr) {
-    std::array<uint64_t, 4> little_endian_array = {0, 0, 0, 0};
-    ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
-    ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
-                little_endian_array.size());
-    *out = Decimal256(BitUtil::LittleEndianArray::ToNative(little_endian_array));
-
-    if (dec.sign == '-') {
-      out->Negate();
-    }
-  }
-
-  return Status::OK();
+  return DecimalFromString("decimal256", s, out, precision, scale);
 }
 
 Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc
index 29964e0b9e2..75716f943c6 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -20,7 +20,6 @@
 #include <cmath>
 #include <cstdint>
 #include <ostream>
-#include <sstream>
 #include <string>
 #include <tuple>
 #include <utility>
@@ -29,9 +28,13 @@
 #include <gtest/gtest.h>
 #include <boost/multiprecision/cpp_int.hpp>
 
+#include "arrow/array.h"
+#include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/int128_internal.h"
@@ -39,53 +42,200 @@
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::int128_t;
 using internal::uint128_t;
 
+using DecimalTypes = ::testing::Types<Decimal128, Decimal256>;
+
 static const int128_t kInt128Max =
     (static_cast<int128_t>(INT64_MAX) << 64) + static_cast<int128_t>(UINT64_MAX);
 
-class DecimalTestFixture : public ::testing::Test {
+template <typename DecimalType>
+void AssertDecimalFromString(const std::string& s, const DecimalType& expected,
+                             int32_t expected_precision, int32_t expected_scale) {
+  ARROW_SCOPED_TRACE("s = '", s, "'");
+  DecimalType d;
+  int32_t precision, scale;
+  ASSERT_OK(DecimalType::FromString(s, &d, &precision, &scale));
+  EXPECT_EQ(expected, d);
+  EXPECT_EQ(expected_precision, precision);
+  EXPECT_EQ(expected_scale, scale);
+}
+
+// Assert that the low bits of an array of integers are equal to `expected_low`,
+// and that all other bits are equal to `expected_high`.
+template <typename T, size_t N, typename U, typename V>
+void AssertArrayBits(const std::array<T, N>& a, U expected_low, V expected_high) {
+  EXPECT_EQ(a[0], expected_low);
+  for (size_t i = 1; i < N; ++i) {
+    EXPECT_EQ(a[i], expected_high);
+  }
+}
+
+Decimal128 Decimal128FromLE(const std::array<uint64_t, 2>& a) {
+  return Decimal128(Decimal128::LittleEndianArray, a);
+}
+
+Decimal256 Decimal256FromLE(const std::array<uint64_t, 4>& a) {
+  return Decimal256(Decimal256::LittleEndianArray, a);
+}
+
+template <typename DecimalType>
+struct DecimalTraits {};
+
+template <>
+struct DecimalTraits<Decimal128> {
+  using ArrowType = Decimal128Type;
+};
+
+template <>
+struct DecimalTraits<Decimal256> {
+  using ArrowType = Decimal256Type;
+};
+
+template <typename DecimalType>
+class DecimalFromStringTest : public ::testing::Test {
  public:
-  DecimalTestFixture() : integer_value_(23423445), string_value_("234.23445") {}
-  Decimal128 integer_value_;
-  std::string string_value_;
+  using ArrowType = typename DecimalTraits<DecimalType>::ArrowType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  void TestBasics() { AssertDecimalFromString("234.23445", DecimalType(23423445), 8, 5); }
+
+  void TestStringStartingWithPlus() {
+    AssertDecimalFromString("+234.567", DecimalType(234567), 6, 3);
+    AssertDecimalFromString("+2342394230592.232349023094",
+                            DecimalType("2342394230592232349023094"), 25, 12);
+  }
+
+  void TestInvalidInput() {
+    for (const std::string invalid_value :
+         {"-", "0.0.0", "0-13-32", "a", "-23092.235-", "-+23092.235", "+-23092.235",
+          "00a", "1e1a", "0.00123D/3", "1.23eA8", "1.23E+3A", "-1.23E--5",
+          "1.2345E+++07"}) {
+      ARROW_SCOPED_TRACE("invalid_value = '", invalid_value, "'");
+      ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
+    }
+  }
+
+  void TestLeadingZerosNoDecimalPoint() {
+    AssertDecimalFromString("0000000", DecimalType(0), 0, 0);
+  }
+
+  void TestLeadingZerosDecimalPoint() {
+    AssertDecimalFromString("000.0000", DecimalType(0), 4, 4);
+  }
+
+  void TestNoLeadingZerosDecimalPoint() {
+    AssertDecimalFromString(".00000", DecimalType(0), 5, 5);
+  }
+
+  void TestNoDecimalPointExponent() {
+    AssertDecimalFromString("1E1", DecimalType(10), 2, 0);
+  }
+
+  void TestWithExponentAndNullptrScale() {
+    const DecimalType expected_value(123);
+    ASSERT_OK_AND_EQ(expected_value, DecimalType::FromString("1.23E-8"));
+  }
+
+  void TestSmallValues() {
+    struct TestValue {
+      std::string s;
+      int64_t expected;
+      int32_t expected_precision;
+      int32_t expected_scale;
+    };
+    for (const auto& tv : std::vector<TestValue>{{"12.3", 123LL, 3, 1},
+                                                 {"0.00123", 123LL, 5, 5},
+                                                 {"1.23E-8", 123LL, 3, 10},
+                                                 {"-1.23E-8", -123LL, 3, 10},
+                                                 {"1.23E+3", 1230LL, 4, 0},
+                                                 {"-1.23E+3", -1230LL, 4, 0},
+                                                 {"1.23E+5", 123000LL, 6, 0},
+                                                 {"1.2345E+7", 12345000LL, 8, 0},
+                                                 {"1.23e-8", 123LL, 3, 10},
+                                                 {"-1.23e-8", -123LL, 3, 10},
+                                                 {"1.23e+3", 1230LL, 4, 0},
+                                                 {"-1.23e+3", -1230LL, 4, 0},
+                                                 {"1.23e+5", 123000LL, 6, 0},
+                                                 {"1.2345e+7", 12345000LL, 8, 0}}) {
+      ARROW_SCOPED_TRACE("s = '", tv.s, "'");
+      AssertDecimalFromString(tv.s, DecimalType(tv.expected), tv.expected_precision,
+                              tv.expected_scale);
+    }
+  }
+
+  void CheckRandomValuesRoundTrip(int32_t precision, int32_t scale) {
+    auto rnd = random::RandomArrayGenerator(42);
+    const auto ty = std::make_shared<ArrowType>(precision, scale);
+    const auto array = rnd.ArrayOf(ty, 100, /*null_probability=*/0.0);
+    for (int64_t i = 0; i < array->length(); ++i) {
+      ASSERT_OK_AND_ASSIGN(auto scalar, array->GetScalar(i));
+      const DecimalType& dec_value = checked_cast<const ScalarType&>(*scalar).value;
+      const auto s = dec_value.ToString(scale);
+      ASSERT_OK_AND_ASSIGN(auto round_tripped, DecimalType::FromString(s));
+      ASSERT_EQ(dec_value, round_tripped);
+    }
+  }
+
+  void TestRandomSmallValuesRoundTrip() {
+    for (int32_t scale : {0, 2, 9}) {
+      ARROW_SCOPED_TRACE("scale = ", scale);
+      CheckRandomValuesRoundTrip(9, scale);
+    }
+  }
+
+  void TestRandomValuesRoundTrip() {
+    const auto max_scale = DecimalType::kMaxScale;
+    for (int32_t scale : {0, 3, max_scale / 2, max_scale}) {
+      ARROW_SCOPED_TRACE("scale = ", scale);
+      CheckRandomValuesRoundTrip(DecimalType::kMaxPrecision, scale);
+    }
+  }
 };
 
-TEST_F(DecimalTestFixture, TestFromString) {
-  Decimal128 expected(this->integer_value_);
-  Decimal128 result;
-  int32_t precision, scale;
-  ASSERT_OK(Decimal128::FromString(this->string_value_, &result, &precision, &scale));
-  ASSERT_EQ(result, expected);
-  ASSERT_EQ(precision, 8);
-  ASSERT_EQ(scale, 5);
+TYPED_TEST_SUITE(DecimalFromStringTest, DecimalTypes);
+
+TYPED_TEST(DecimalFromStringTest, Basics) { this->TestBasics(); }
+
+TYPED_TEST(DecimalFromStringTest, StringStartingWithPlus) {
+  this->TestStringStartingWithPlus();
 }
 
-TEST_F(DecimalTestFixture, TestStringStartingWithPlus) {
-  std::string plus_value("+234.234");
-  Decimal128 out;
-  int32_t scale;
-  int32_t precision;
-  ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale));
-  ASSERT_EQ(234234, out);
-  ASSERT_EQ(6, precision);
-  ASSERT_EQ(3, scale);
+TYPED_TEST(DecimalFromStringTest, InvalidInput) { this->TestInvalidInput(); }
+
+TYPED_TEST(DecimalFromStringTest, LeadingZerosDecimalPoint) {
+  this->TestLeadingZerosDecimalPoint();
 }
 
-TEST_F(DecimalTestFixture, TestStringStartingWithPlus128) {
-  std::string plus_value("+2342394230592.232349023094");
-  Decimal128 expected_value("2342394230592232349023094");
-  Decimal128 out;
-  int32_t scale;
-  int32_t precision;
-  ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale));
-  ASSERT_EQ(expected_value, out);
-  ASSERT_EQ(25, precision);
-  ASSERT_EQ(12, scale);
+TYPED_TEST(DecimalFromStringTest, LeadingZerosNoDecimalPoint) {
+  this->TestLeadingZerosNoDecimalPoint();
+}
+
+TYPED_TEST(DecimalFromStringTest, NoLeadingZerosDecimalPoint) {
+  this->TestNoLeadingZerosDecimalPoint();
+}
+
+TYPED_TEST(DecimalFromStringTest, NoDecimalPointExponent) {
+  this->TestNoDecimalPointExponent();
+}
+
+TYPED_TEST(DecimalFromStringTest, WithExponentAndNullptrScale) {
+  this->TestWithExponentAndNullptrScale();
+}
+
+TYPED_TEST(DecimalFromStringTest, SmallValues) { this->TestSmallValues(); }
+
+TYPED_TEST(DecimalFromStringTest, RandomSmallValuesRoundTrip) {
+  this->TestRandomSmallValuesRoundTrip();
+}
+
+TYPED_TEST(DecimalFromStringTest, RandomValuesRoundTrip) {
+  this->TestRandomValuesRoundTrip();
 }
 
-TEST(DecimalTest, TestFromStringDecimal128) {
+TEST(Decimal128Test, TestFromStringDecimal128) {
   std::string string_value("-23049223942343532412");
   Decimal128 result(string_value);
   Decimal128 expected(static_cast<int64_t>(-230492239423435324));
@@ -95,7 +245,7 @@ TEST(DecimalTest, TestFromStringDecimal128) {
   ASSERT_NE(result.high_bits(), 0);
 }
 
-TEST(DecimalTest, TestFromDecimalString128) {
+TEST(Decimal128Test, TestFromDecimalString128) {
   std::string string_value("-23049223942343.532412");
   Decimal128 result;
   ASSERT_OK_AND_ASSIGN(result, Decimal128::FromString(string_value));
@@ -106,7 +256,7 @@ TEST(DecimalTest, TestFromDecimalString128) {
   ASSERT_NE(result.high_bits(), 0);
 }
 
-TEST(DecimalTest, TestStringRoundTrip) {
+TEST(Decimal128Test, TestStringRoundTrip) {
   static constexpr uint64_t kTestBits[] = {
       0,
       1,
@@ -135,7 +285,7 @@ TEST(DecimalTest, TestStringRoundTrip) {
   }
 }
 
-TEST(DecimalTest, TestDecimal32SignedRoundTrip) {
+TEST(Decimal128Test, TestDecimal32SignedRoundTrip) {
   Decimal128 expected("-3402692");
 
   auto bytes = expected.ToBytes();
@@ -143,7 +293,7 @@ TEST(DecimalTest, TestDecimal32SignedRoundTrip) {
   ASSERT_EQ(expected, result);
 }
 
-TEST(DecimalTest, TestDecimal64SignedRoundTrip) {
+TEST(Decimal128Test, TestDecimal64SignedRoundTrip) {
   Decimal128 expected;
   std::string string_value("-34034293045.921");
   ASSERT_OK_AND_ASSIGN(expected, Decimal128::FromString(string_value));
@@ -154,7 +304,7 @@ TEST(DecimalTest, TestDecimal64SignedRoundTrip) {
   ASSERT_EQ(expected, result);
 }
 
-TEST(DecimalTest, TestDecimalStringAndBytesRoundTrip) {
+TEST(Decimal128Test, TestDecimalStringAndBytesRoundTrip) {
   Decimal128 expected;
   std::string string_value("-340282366920938463463374607431.711455");
   ASSERT_OK_AND_ASSIGN(expected, Decimal128::FromString(string_value));
@@ -171,117 +321,289 @@ TEST(DecimalTest, TestDecimalStringAndBytesRoundTrip) {
   ASSERT_EQ(expected, result);
 }
 
-TEST(DecimalTest, TestInvalidInputMinus) {
-  std::string invalid_value("-");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputDot) {
-  std::string invalid_value("0.0.0");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputEmbeddedMinus) {
-  std::string invalid_value("0-13-32");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputSingleChar) {
-  std::string invalid_value("a");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputWithValidSubstring) {
-  std::string invalid_value("-23092.235-");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputWithMinusPlus) {
-  std::string invalid_value("-+23092.235");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalTest, TestInvalidInputWithPlusMinus) {
-  std::string invalid_value("+-23092.235");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
+/*
+  Note: generating a number of 64-bit decimal digits from a bigint:
+
+  >>> def dec(x, n):
+  ...:     sign = x < 0
+  ...:     if sign:
+  ...:         x = 2**(64*n) + x
+  ...:     a = []
+  ...:     for i in range(n-1):
+  ...:         x, r = divmod(x, 2**64)
+  ...:         a.append(r)
+  ...:     assert x < 2**64
+  ...:     a.append(x)
+  ...:     return a
+  ...:
+  >>> dec(10**37, 2)
+  [68739955140067328, 542101086242752217]
+  >>> dec(-10**37, 2)
+  [18378004118569484288, 17904642987466799398]
+  >>> dec(10**75, 4)
+  [0, 10084168908774762496, 12965995782233477362, 159309191113245227]
+  >>> dec(-10**75, 4)
+  [0, 8362575164934789120, 5480748291476074253, 18287434882596306388]
+*/
+
+TEST(Decimal128Test, FromStringLimits) {
+  // Positive / zero exponent
+  AssertDecimalFromString(
+      "1e37", Decimal128FromLE({68739955140067328ULL, 542101086242752217ULL}), 38, 0);
+  AssertDecimalFromString(
+      "-1e37", Decimal128FromLE({18378004118569484288ULL, 17904642987466799398ULL}), 38,
+      0);
+  AssertDecimalFromString(
+      "9.87e37", Decimal128FromLE({15251391175463010304ULL, 5350537721215964381ULL}), 38,
+      0);
+  AssertDecimalFromString(
+      "-9.87e37", Decimal128FromLE({3195352898246541312ULL, 13096206352493587234ULL}), 38,
+      0);
+  AssertDecimalFromString(
+      "12345678901234567890123456789012345678",
+      Decimal128FromLE({14143994781733811022ULL, 669260594276348691ULL}), 38, 0);
+  AssertDecimalFromString(
+      "-12345678901234567890123456789012345678",
+      Decimal128FromLE({4302749291975740594ULL, 17777483479433202924ULL}), 38, 0);
+
+  // "9..9" (38 times)
+  const auto dec38times9pos =
+      Decimal128FromLE({687399551400673279ULL, 5421010862427522170ULL});
+  // "-9..9" (38 times)
+  const auto dec38times9neg =
+      Decimal128FromLE({17759344522308878337ULL, 13025733211282029445ULL});
+
+  AssertDecimalFromString("99999999999999999999999999999999999999", dec38times9pos, 38,
+                          0);
+  AssertDecimalFromString("-99999999999999999999999999999999999999", dec38times9neg, 38,
+                          0);
+  AssertDecimalFromString("9.9999999999999999999999999999999999999e37", dec38times9pos,
+                          38, 0);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999e37", dec38times9neg,
+                          38, 0);
+
+  // Positive / zero exponent, precision too large for a non-negative scale
+  ASSERT_RAISES(Invalid, Decimal128::FromString("1e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("-1e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("9e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("-9e39"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("9.9e40"));
+  ASSERT_RAISES(Invalid, Decimal128::FromString("-9.9e40"));
+  // XXX conversion overflows are currently not detected
+  //   ASSERT_RAISES(Invalid, Decimal128::FromString("99e38"));
+  //   ASSERT_RAISES(Invalid, Decimal128::FromString("-99e38"));
+  //   ASSERT_RAISES(Invalid,
+  //   Decimal128::FromString("999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //   Decimal128::FromString("-999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //   Decimal128::FromString("999999999999999999999999999999999999999"));
+
+  // No exponent, many fractional digits
+  AssertDecimalFromString("9.9999999999999999999999999999999999999", dec38times9pos, 38,
+                          37);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999", dec38times9neg, 38,
+                          37);
+  AssertDecimalFromString("0.99999999999999999999999999999999999999", dec38times9pos, 38,
+                          38);
+  AssertDecimalFromString("-0.99999999999999999999999999999999999999", dec38times9neg, 38,
+                          38);
+
+  // Negative exponent
+  AssertDecimalFromString("1e-38", Decimal128FromLE({1, 0}), 1, 38);
+  AssertDecimalFromString(
+      "-1e-38", Decimal128FromLE({18446744073709551615ULL, 18446744073709551615ULL}), 1,
+      38);
+  AssertDecimalFromString("9.99e-36", Decimal128FromLE({999, 0}), 3, 38);
+  AssertDecimalFromString(
+      "-9.99e-36", Decimal128FromLE({18446744073709550617ULL, 18446744073709551615ULL}),
+      3, 38);
+  AssertDecimalFromString("987e-38", Decimal128FromLE({987, 0}), 3, 38);
+  AssertDecimalFromString(
+      "-987e-38", Decimal128FromLE({18446744073709550629ULL, 18446744073709551615ULL}), 3,
+      38);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-37", dec38times9pos,
+                          38, 37);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-37", dec38times9neg,
+                          38, 37);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-38", dec38times9pos,
+                          38, 38);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-38", dec38times9neg,
+                          38, 38);
+}
+
+TEST(Decimal256Test, FromStringLimits) {
+  // Positive / zero exponent
+  AssertDecimalFromString(
+      "1e75",
+      Decimal256FromLE(
+          {0, 10084168908774762496ULL, 12965995782233477362ULL, 159309191113245227ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "-1e75",
+      Decimal256FromLE(
+          {0, 8362575164934789120ULL, 5480748291476074253ULL, 18287434882596306388ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "9.87e75",
+      Decimal256FromLE(
+          {0, 3238743064843046400ULL, 7886074450795240548ULL, 1572381716287730397ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "-9.87e75",
+      Decimal256FromLE(
+          {0, 15208001008866505216ULL, 10560669622914311067ULL, 16874362357421821218ULL}),
+      76, 0);
+
+  AssertDecimalFromString(
+      "1234567890123456789012345678901234567890123456789012345678901234567890123456",
+      Decimal256FromLE({17877984925544397504ULL, 5352188884907840935ULL,
+                        234631617561833724ULL, 196678011949953713ULL}),
+      76, 0);
+  AssertDecimalFromString(
+      "-1234567890123456789012345678901234567890123456789012345678901234567890123456",
+      Decimal256FromLE({568759148165154112ULL, 13094555188801710680ULL,
+                        18212112456147717891ULL, 18250066061759597902ULL}),
+      76, 0);
+
+  // "9..9" (76 times)
+  const auto dec76times9pos =
+      Decimal256FromLE({18446744073709551615ULL, 8607968719199866879ULL,
+                        532749306367912313ULL, 1593091911132452277ULL});
+  // "-9..9" (76 times)
+  const auto dec76times9neg = Decimal256FromLE(
+      {1, 9838775354509684736ULL, 17913994767341639302ULL, 16853652162577099338ULL});
+
+  AssertDecimalFromString(
+      "9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9pos, 76, 0);
+  AssertDecimalFromString(
+      "-9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9neg, 76, 0);
+  AssertDecimalFromString(
+      "9.999999999999999999999999999999999999999999999999999999999999999999999999999e75",
+      dec76times9pos, 76, 0);
+  AssertDecimalFromString(
+      "-9.999999999999999999999999999999999999999999999999999999999999999999999999999e75",
+      dec76times9neg, 76, 0);
+
+  // Positive / zero exponent, precision too large for a non-negative scale
+  ASSERT_RAISES(Invalid, Decimal256::FromString("1e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("-1e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("9e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("-9e77"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("9.9e78"));
+  ASSERT_RAISES(Invalid, Decimal256::FromString("-9.9e78"));
+
+  // XXX conversion overflows are currently not detected
+  //   ASSERT_RAISES(Invalid, Decimal256::FromString("99e76"));
+  //   ASSERT_RAISES(Invalid, Decimal256::FromString("-99e76"));
+  //   ASSERT_RAISES(Invalid,
+  //     Decimal256::FromString("9999999999999999999999999999999999999999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //     Decimal256::FromString("-9999999999999999999999999999999999999999999999999999999999999999999999999999e1"));
+  //   ASSERT_RAISES(Invalid,
+  //     Decimal256::FromString("99999999999999999999999999999999999999999999999999999999999999999999999999999"));
+
+  // No exponent, many fractional digits
+  AssertDecimalFromString(
+      "9.999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9pos, 76, 75);
+  AssertDecimalFromString(
+      "-9.999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9neg, 76, 75);
+  AssertDecimalFromString(
+      "0.9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9pos, 76, 76);
+  AssertDecimalFromString(
+      "-0.9999999999999999999999999999999999999999999999999999999999999999999999999999",
+      dec76times9neg, 76, 76);
+
+  // Negative exponent
+  AssertDecimalFromString("1e-76", Decimal256FromLE({1, 0, 0, 0}), 1, 76);
+  AssertDecimalFromString(
+      "-1e-76",
+      Decimal256FromLE({18446744073709551615ULL, 18446744073709551615ULL,
+                        18446744073709551615ULL, 18446744073709551615ULL}),
+      1, 76);
+  AssertDecimalFromString("9.99e-74", Decimal256FromLE({999, 0, 0, 0}), 3, 76);
+  AssertDecimalFromString(
+      "-9.99e-74",
+      Decimal256FromLE({18446744073709550617ULL, 18446744073709551615ULL,
+                        18446744073709551615ULL, 18446744073709551615ULL}),
+      3, 76);
+  AssertDecimalFromString("987e-76", Decimal256FromLE({987, 0, 0, 0}), 3, 76);
+  AssertDecimalFromString(
+      "-987e-76",
+      Decimal256FromLE({18446744073709550629ULL, 18446744073709551615ULL,
+                        18446744073709551615ULL, 18446744073709551615ULL}),
+      3, 76);
+  AssertDecimalFromString(
+      "9999999999999999999999999999999999999999999999999999999999999999999999999999e-75",
+      dec76times9pos, 76, 75);
+  AssertDecimalFromString(
+      "-9999999999999999999999999999999999999999999999999999999999999999999999999999e-75",
+      dec76times9neg, 76, 75);
+  AssertDecimalFromString(
+      "9999999999999999999999999999999999999999999999999999999999999999999999999999e-76",
+      dec76times9pos, 76, 76);
+  AssertDecimalFromString(
+      "-9999999999999999999999999999999999999999999999999999999999999999999999999999e-76",
+      dec76times9neg, 76, 76);
 }
 
-TEST(DecimalTest, TestInvalidInputWithLeadingZeros) {
-  std::string invalid_value("00a");
-  ASSERT_RAISES(Invalid, Decimal128::FromString(invalid_value));
-}
-
-TEST(DecimalZerosTest, LeadingZerosNoDecimalPoint) {
-  std::string string_value("0000000");
-  Decimal128 d;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
-  ASSERT_EQ(0, precision);
-  ASSERT_EQ(0, scale);
-  ASSERT_EQ(0, d);
-}
-
-TEST(DecimalZerosTest, LeadingZerosDecimalPoint) {
-  std::string string_value("000.0000");
-  Decimal128 d;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
-  ASSERT_EQ(4, precision);
-  ASSERT_EQ(4, scale);
-  ASSERT_EQ(0, d);
-}
-
-TEST(DecimalZerosTest, NoLeadingZerosDecimalPoint) {
-  std::string string_value(".00000");
-  Decimal128 d;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
-  ASSERT_EQ(5, precision);
-  ASSERT_EQ(5, scale);
-  ASSERT_EQ(0, d);
-}
-
-template <typename T>
-class Decimal128Test : public ::testing::Test {
+template <typename DecimalType>
+class DecimalFromIntegerTest : public ::testing::Test {
  public:
-  Decimal128Test() {}
-};
-
-using Decimal128Types =
-    ::testing::Types<char, unsigned char, short, unsigned short,  // NOLINT
-                     int, unsigned int, long, unsigned long,      // NOLINT
-                     long long, unsigned long long                // NOLINT
-                     >;
-
-TYPED_TEST_SUITE(Decimal128Test, Decimal128Types);
+  template <typename IntegerType>
+  void CheckConstructFrom() {
+    DecimalType value(IntegerType{42});
+    AssertArrayBits(value.little_endian_array(), 42, 0);
+
+    DecimalType max_value(std::numeric_limits<IntegerType>::max());
+    AssertArrayBits(max_value.little_endian_array(),
+                    std::numeric_limits<IntegerType>::max(), 0);
+
+    DecimalType min_value(std::numeric_limits<IntegerType>::min());
+    AssertArrayBits(min_value.little_endian_array(),
+                    std::numeric_limits<IntegerType>::min(),
+                    (std::is_signed<IntegerType>::value ? -1 : 0));
+  }
 
-TYPED_TEST(Decimal128Test, ConstructibleFromAnyIntegerType) {
-  Decimal128 value(TypeParam{42});
-  EXPECT_EQ(42, value.low_bits());
-  EXPECT_EQ(0, value.high_bits());
+  void TestConstructibleFromAnyIntegerType() {
+    CheckConstructFrom<char>();                // NOLINT
+    CheckConstructFrom<signed char>();         // NOLINT
+    CheckConstructFrom<unsigned char>();       // NOLINT
+    CheckConstructFrom<short>();               // NOLINT
+    CheckConstructFrom<unsigned short>();      // NOLINT
+    CheckConstructFrom<int>();                 // NOLINT
+    CheckConstructFrom<unsigned int>();        // NOLINT
+    CheckConstructFrom<long>();                // NOLINT
+    CheckConstructFrom<unsigned long>();       // NOLINT
+    CheckConstructFrom<long long>();           // NOLINT
+    CheckConstructFrom<unsigned long long>();  // NOLINT
+  }
 
-  Decimal128 max_value(std::numeric_limits<TypeParam>::max());
-  EXPECT_EQ(std::numeric_limits<TypeParam>::max(), max_value.low_bits());
-  EXPECT_EQ(0, max_value.high_bits());
+  void TestConstructibleFromBool() {
+    {
+      DecimalType value(true);
+      AssertArrayBits(value.little_endian_array(), 1, 0);
+    }
+    {
+      DecimalType value(false);
+      AssertArrayBits(value.little_endian_array(), 0, 0);
+    }
+  }
+};
 
-  Decimal128 min_value(std::numeric_limits<TypeParam>::min());
-  EXPECT_EQ(std::numeric_limits<TypeParam>::min(), min_value.low_bits());
-  EXPECT_EQ((std::is_signed<TypeParam>::value ? -1 : 0), min_value.high_bits());
-}
+TYPED_TEST_SUITE(DecimalFromIntegerTest, DecimalTypes);
 
-TEST(Decimal128TestTrue, ConstructibleFromBool) {
-  Decimal128 value(true);
-  EXPECT_EQ(1, value.low_bits());
-  EXPECT_EQ(0, value.high_bits());
+TYPED_TEST(DecimalFromIntegerTest, ConstructibleFromAnyIntegerType) {
+  this->TestConstructibleFromAnyIntegerType();
 }
 
-TEST(Decimal128TestFalse, ConstructibleFromBool) {
-  Decimal128 value(false);
-  EXPECT_EQ(0, value.low_bits());
-  EXPECT_EQ(0, value.high_bits());
+TYPED_TEST(DecimalFromIntegerTest, ConstructibleFromBool) {
+  this->TestConstructibleFromBool();
 }
 
 TEST(Decimal128Test, Division) {
@@ -408,53 +730,6 @@ TEST_P(Decimal128ToStringTest, ToString) {
 INSTANTIATE_TEST_SUITE_P(Decimal128ToStringTest, Decimal128ToStringTest,
                          ::testing::ValuesIn(kToStringTestData));
 
-class Decimal128ParsingTest
-    : public ::testing::TestWithParam<std::tuple<std::string, uint64_t, int32_t>> {};
-
-TEST_P(Decimal128ParsingTest, Parse) {
-  std::string test_string;
-  uint64_t expected_low_bits;
-  int32_t expected_scale;
-  std::tie(test_string, expected_low_bits, expected_scale) = GetParam();
-  Decimal128 value;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString(test_string, &value, nullptr, &scale));
-  ASSERT_EQ(value.low_bits(), expected_low_bits);
-  ASSERT_EQ(expected_scale, scale);
-}
-
-INSTANTIATE_TEST_SUITE_P(Decimal128ParsingTest, Decimal128ParsingTest,
-                         ::testing::Values(std::make_tuple("12.3", 123ULL, 1),
-                                           std::make_tuple("0.00123", 123ULL, 5),
-                                           std::make_tuple("1.23E-8", 123ULL, 10),
-                                           std::make_tuple("-1.23E-8", -123LL, 10),
-                                           std::make_tuple("1.23E+3", 1230ULL, 0),
-                                           std::make_tuple("-1.23E+3", -1230LL, 0),
-                                           std::make_tuple("1.23E+5", 123000ULL, 0),
-                                           std::make_tuple("1.2345E+7", 12345000ULL, 0),
-                                           std::make_tuple("1.23e-8", 123ULL, 10),
-                                           std::make_tuple("-1.23e-8", -123LL, 10),
-                                           std::make_tuple("1.23e+3", 1230ULL, 0),
-                                           std::make_tuple("-1.23e+3", -1230LL, 0),
-                                           std::make_tuple("1.23e+5", 123000ULL, 0),
-                                           std::make_tuple("1.2345e+7", 12345000ULL, 0)));
-
-class Decimal128ParsingTestInvalid : public ::testing::TestWithParam<std::string> {};
-
-TEST_P(Decimal128ParsingTestInvalid, Parse) {
-  std::string test_string = GetParam();
-  ASSERT_RAISES(Invalid, Decimal128::FromString(test_string));
-}
-
-INSTANTIATE_TEST_SUITE_P(Decimal128ParsingTestInvalid, Decimal128ParsingTestInvalid,
-                         ::testing::Values("0.00123D/3", "1.23eA8", "1.23E+3A",
-                                           "-1.23E--5", "1.2345E+++07"));
-
-TEST(Decimal128ParseTest, WithExponentAndNullptrScale) {
-  const Decimal128 expected_value(123);
-  ASSERT_OK_AND_EQ(expected_value, Decimal128::FromString("1.23E-8"));
-}
-
 template <typename Decimal, typename Real>
 void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale,
                           const std::string& expected) {
@@ -559,8 +834,6 @@ TYPED_TEST(TestDecimalFromReal, TestSuccess) { this->TestSuccess(); }
 
 TYPED_TEST(TestDecimalFromReal, TestErrors) { this->TestErrors(); }
 
-using DecimalTypes = ::testing::Types<Decimal128, Decimal256>;
-
 // Tests for Decimal128::FromReal(float, ...) and Decimal256::FromReal(float, ...)
 template <typename T>
 class TestDecimalFromRealFloat : public ::testing::Test {
@@ -862,16 +1135,6 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) {
 
 #endif  // __MINGW32__
 
-TEST(Decimal128Test, TestNoDecimalPointExponential) {
-  Decimal128 value;
-  int32_t precision;
-  int32_t scale;
-  ASSERT_OK(Decimal128::FromString("1E1", &value, &precision, &scale));
-  ASSERT_EQ(10, value.low_bits());
-  ASSERT_EQ(2, precision);
-  ASSERT_EQ(0, scale);
-}
-
 TEST(Decimal128Test, TestFromBigEndian) {
   // We test out a variety of scenarios:
   //
diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc
index 51e9dcb3c87..31f2dedf5c8 100644
--- a/cpp/src/gandiva/tests/decimal_test.cc
+++ b/cpp/src/gandiva/tests/decimal_test.cc
@@ -1012,7 +1012,7 @@ TEST_F(TestDecimal, TestCastDecimalVarCharInvalidInput) {
   arrow::ArrayVector outputs_1;
   status = projector->Evaluate(*in_batch_1, pool_, &outputs_1);
   EXPECT_FALSE(status.ok()) << status.message();
-  EXPECT_TRUE(status.message().find("not a valid decimal number") != std::string::npos);
+  EXPECT_NE(status.message().find("not a valid decimal128 number"), std::string::npos);
 }
 
 TEST_F(TestDecimal, TestVarCharDecimalNestedCast) {

From e5b0957b2c591f383a0de546727a08176687d29d Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 3 Aug 2021 13:41:13 +0200
Subject: [PATCH 690/719] ARROW-13421: [C++][Python] Add CSV convert option to
 change decimal point

Closes #10852 from pitrou/ARROW-13421-csv-decimal-point

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/converter.cc       | 95 ++++++++++++++++++++++++++--
 cpp/src/arrow/csv/converter_test.cc  | 38 +++++++++++
 cpp/src/arrow/csv/options.h          |  3 +
 python/pyarrow/_csv.pyx              | 38 ++++++++---
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/tests/test_csv.py     | 30 +++++++++
 6 files changed, 189 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index b1cde12a28e..5381e733914 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/csv/converter.h"
 
+#include <array>
 #include <cstring>
 #include <limits>
 #include <sstream>
@@ -290,6 +291,55 @@ struct DecimalValueDecoder : public ValueDecoder {
   const int32_t type_scale_;
 };
 
+//
+// Value decoder wrapper for floating-point and decimals
+// with a non-default decimal point
+//
+
+template <typename WrappedDecoder>
+struct CustomDecimalPointValueDecoder : public ValueDecoder {
+  using value_type = typename WrappedDecoder::value_type;
+
+  explicit CustomDecimalPointValueDecoder(const std::shared_ptr<DataType>& type,
+                                          const ConvertOptions& options)
+      : ValueDecoder(type, options), wrapped_decoder_(type, options) {}
+
+  Status Initialize() {
+    RETURN_NOT_OK(wrapped_decoder_.Initialize());
+    for (int i = 0; i < 256; ++i) {
+      mapping_[i] = i;
+    }
+    mapping_[options_.decimal_point] = '.';
+    mapping_['.'] = options_.decimal_point;  // error out on standard decimal point
+    temp_.resize(30);
+    return Status::OK();
+  }
+
+  Status Decode(const uint8_t* data, uint32_t size, bool quoted, value_type* out) {
+    if (ARROW_PREDICT_FALSE(size > temp_.size())) {
+      temp_.resize(size);
+    }
+    uint8_t* temp_data = temp_.data();
+    for (uint32_t i = 0; i < size; ++i) {
+      temp_data[i] = mapping_[data[i]];
+    }
+    if (ARROW_PREDICT_FALSE(
+            !wrapped_decoder_.Decode(temp_data, size, quoted, out).ok())) {
+      return GenericConversionError(type_, data, size);
+    }
+    return Status::OK();
+  }
+
+  bool IsNull(const uint8_t* data, uint32_t size, bool quoted) {
+    return wrapped_decoder_.IsNull(data, size, quoted);
+  }
+
+ protected:
+  WrappedDecoder wrapped_decoder_;
+  std::array<uint8_t, 256> mapping_;
+  std::vector<uint8_t> temp_;
+};
+
 //
 // Value decoders for timestamps
 //
@@ -532,6 +582,24 @@ std::shared_ptr<Converter> MakeTimestampConverter(const std::shared_ptr<DataType
   }
 }
 
+//
+// Concrete Converter factory for reals
+//
+
+template <typename ConverterType, template <typename...> class ConcreteConverterType,
+          typename Type, typename DecoderType>
+std::shared_ptr<ConverterType> MakeRealConverter(const std::shared_ptr<DataType>& type,
+                                                 const ConvertOptions& options,
+                                                 MemoryPool* pool) {
+  if (options.decimal_point == '.') {
+    return std::make_shared<ConcreteConverterType<Type, DecoderType>>(type, options,
+                                                                      pool);
+  }
+  return std::make_shared<
+      ConcreteConverterType<Type, CustomDecimalPointValueDecoder<DecoderType>>>(
+      type, options, pool);
+}
+
 }  // namespace
 
 /////////////////////////////////////////////////////////////////////////
@@ -561,6 +629,12 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
   CONVERTER_CASE(TYPE_ID,                           \
                  (PrimitiveConverter<TYPE_CLASS, NumericValueDecoder<TYPE_CLASS>>))
 
+#define REAL_CONVERTER_CASE(TYPE_ID, TYPE_CLASS, DECODER)                        \
+  case TYPE_ID:                                                                  \
+    ptr = MakeRealConverter<Converter, PrimitiveConverter, TYPE_CLASS, DECODER>( \
+        type, options, pool);                                                    \
+    break;
+
     CONVERTER_CASE(Type::NA, NullConverter)
     NUMERIC_CONVERTER_CASE(Type::INT8, Int8Type)
     NUMERIC_CONVERTER_CASE(Type::INT16, Int16Type)
@@ -570,8 +644,9 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
     NUMERIC_CONVERTER_CASE(Type::UINT16, UInt16Type)
     NUMERIC_CONVERTER_CASE(Type::UINT32, UInt32Type)
     NUMERIC_CONVERTER_CASE(Type::UINT64, UInt64Type)
-    NUMERIC_CONVERTER_CASE(Type::FLOAT, FloatType)
-    NUMERIC_CONVERTER_CASE(Type::DOUBLE, DoubleType)
+    REAL_CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    REAL_CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    REAL_CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
     NUMERIC_CONVERTER_CASE(Type::DATE32, Date32Type)
     NUMERIC_CONVERTER_CASE(Type::DATE64, Date64Type)
     NUMERIC_CONVERTER_CASE(Type::TIME32, Time32Type)
@@ -583,8 +658,6 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
                    (PrimitiveConverter<LargeBinaryType, BinaryValueDecoder<false>>))
     CONVERTER_CASE(Type::FIXED_SIZE_BINARY,
                    (PrimitiveConverter<FixedSizeBinaryType, FixedSizeBinaryValueDecoder>))
-    CONVERTER_CASE(Type::DECIMAL,
-                   (PrimitiveConverter<Decimal128Type, DecimalValueDecoder>))
 
     case Type::TIMESTAMP:
       ptr = MakeTimestampConverter<PrimitiveConverter>(type, options, pool);
@@ -630,6 +703,7 @@ Result<std::shared_ptr<Converter>> Converter::Make(const std::shared_ptr<DataTyp
 
 #undef CONVERTER_CASE
 #undef NUMERIC_CONVERTER_CASE
+#undef REAL_CONVERTER_CASE
   }
   RETURN_NOT_OK(ptr->Initialize());
   return ptr;
@@ -647,14 +721,20 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
         new TypedDictionaryConverter<TYPE, VALUE_DECODER_TYPE>(type, options, pool)); \
     break;
 
+#define REAL_CONVERTER_CASE(TYPE_ID, TYPE_CLASS, DECODER)                              \
+  case TYPE_ID:                                                                        \
+    ptr = MakeRealConverter<DictionaryConverter, TypedDictionaryConverter, TYPE_CLASS, \
+                            DECODER>(type, options, pool);                             \
+    break;
+
     // XXX Are 32-bit types useful?
     CONVERTER_CASE(Type::INT32, Int32Type, NumericValueDecoder<Int32Type>)
     CONVERTER_CASE(Type::INT64, Int64Type, NumericValueDecoder<Int64Type>)
     CONVERTER_CASE(Type::UINT32, UInt32Type, NumericValueDecoder<UInt32Type>)
     CONVERTER_CASE(Type::UINT64, UInt64Type, NumericValueDecoder<UInt64Type>)
-    CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
-    CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
-    CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
+    REAL_CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    REAL_CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    REAL_CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
     CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryType,
                    FixedSizeBinaryValueDecoder)
     CONVERTER_CASE(Type::BINARY, BinaryType, BinaryValueDecoder<false>)
@@ -690,6 +770,7 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
     }
 
 #undef CONVERTER_CASE
+#undef REAL_CONVERTER_CASE
   }
   RETURN_NOT_OK(ptr->Initialize());
   return ptr;
diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc
index f59d184f749..0407de91f67 100644
--- a/cpp/src/arrow/csv/converter_test.cc
+++ b/cpp/src/arrow/csv/converter_test.cc
@@ -398,6 +398,18 @@ TEST(FloatingPointConversion, Whitespace) {
                                        {{12., 0.}, {34.5, -1e100}});
 }
 
+TEST(FloatingPointConversion, CustomDecimalPoint) {
+  auto options = ConvertOptions::Defaults();
+  options.decimal_point = '/';
+
+  AssertConversion<FloatType, float>(float32(), {"1/5\n", "-1e10\n", "N/A\n"},
+                                     {{1.5, -1e10f, 0.}}, {{true, true, false}}, options);
+  AssertConversion<DoubleType, double>(float64(), {"1/5\n", "-1e10\n", "N/A\n"},
+                                       {{1.5, -1e10, 0.}}, {{true, true, false}},
+                                       options);
+  AssertConversionError(float32(), {"1.5\n"}, {0}, options);
+}
+
 TEST(BooleanConversion, Basics) {
   // XXX we may want to accept more bool-like values
   AssertConversion<BooleanType, bool>(boolean(), {"true,false\n", "1,0\n"},
@@ -582,6 +594,17 @@ TEST(DecimalConversion, CustomNulls) {
       {{true, false}, {false, true}}, options);
 }
 
+TEST(DecimalConversion, CustomDecimalPoint) {
+  auto options = ConvertOptions::Defaults();
+  options.decimal_point = '/';
+
+  AssertConversion<Decimal128Type, Decimal128>(
+      decimal(14, 3), {"1/5,0/\n", ",-1e3\n"},
+      {{Dec128("1.500"), Decimal128()}, {Decimal128(), Dec128("-1000.000")}},
+      {{true, false}, {true, true}}, options);
+  AssertConversionError(decimal128(14, 3), {"1.5\n"}, {0}, options);
+}
+
 TEST(DecimalConversion, Whitespace) {
   AssertConversion<Decimal128Type, Decimal128>(
       decimal(5, 1), {" 12.00,34.5\n", " 0 ,-1e2 \n"},
@@ -725,6 +748,21 @@ TEST(TestDecimalDictConverter, Basics) {
                        expected_dict);
 }
 
+TEST(TestDecimalDictConverter, CustomDecimalPoint) {
+  auto value_type = decimal(9, 3);
+
+  auto options = ConvertOptions::Defaults();
+  options.decimal_point = '\'';
+
+  auto expected_dict = ArrayFromJSON(value_type, R"(["1.234", "456.789"])");
+  auto expected_indices = ArrayFromJSON(int32(), "[0, 1, null, 1]");
+
+  AssertDictConversion("1'234\n456'789\nN/A\n4'56789e2\n", expected_indices,
+                       expected_dict, -1, options);
+
+  ASSERT_RAISES(Invalid, DictConversion(value_type, "1.234\n", -1, options));
+}
+
 TEST(TestDecimalDictConverter, Errors) {
   auto value_type = decimal(9, 3);
 
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 5face6f32d8..efa95c5b3a2 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -101,6 +101,9 @@ struct ARROW_EXPORT ConvertOptions {
   bool auto_dict_encode = false;
   int32_t auto_dict_max_cardinality = 50;
 
+  /// Decimal point character for floating-point and decimal data
+  char decimal_point = '.';
+
   // XXX Should we have a separate FilterOptions?
 
   /// If non-empty, indicates the names of columns from the CSV file that should
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 2b7a835b10a..295c64246fe 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -437,6 +437,9 @@ cdef class ConvertOptions(_Weakrefable):
     false_values: list, optional
         A sequence of strings that denote false booleans in the data
         (defaults are appropriate in most cases).
+    decimal_point: 1-character string, optional (default '.')
+        The character used as decimal point in floating-point and decimal
+        data.
     timestamp_parsers: list, optional
         A sequence of strptime()-compatible format strings, tried in order
         when attempting to infer or convert timestamp values (the special
@@ -483,7 +486,7 @@ cdef class ConvertOptions(_Weakrefable):
             new CCSVConvertOptions(CCSVConvertOptions.Defaults()))
 
     def __init__(self, *, check_utf8=None, column_types=None, null_values=None,
-                 true_values=None, false_values=None,
+                 true_values=None, false_values=None, decimal_point=None,
                  strings_can_be_null=None, quoted_strings_can_be_null=None,
                  include_columns=None, include_missing_columns=None,
                  auto_dict_encode=None, auto_dict_max_cardinality=None,
@@ -498,6 +501,8 @@ cdef class ConvertOptions(_Weakrefable):
             self.true_values = true_values
         if false_values is not None:
             self.false_values = false_values
+        if decimal_point is not None:
+            self.decimal_point = decimal_point
         if strings_can_be_null is not None:
             self.strings_can_be_null = strings_can_be_null
         if quoted_strings_can_be_null is not None:
@@ -607,6 +612,18 @@ cdef class ConvertOptions(_Weakrefable):
     def false_values(self, value):
         deref(self.options).false_values = [tobytes(x) for x in value]
 
+    @property
+    def decimal_point(self):
+        """
+        The character used as decimal point in floating-point and decimal
+        data.
+        """
+        return chr(deref(self.options).decimal_point)
+
+    @decimal_point.setter
+    def decimal_point(self, value):
+        deref(self.options).decimal_point = _single_char(value)
+
     @property
     def auto_dict_encode(self):
         """
@@ -717,6 +734,7 @@ cdef class ConvertOptions(_Weakrefable):
             self.null_values == other.null_values and
             self.true_values == other.true_values and
             self.false_values == other.false_values and
+            self.decimal_point == other.decimal_point and
             self.timestamp_parsers == other.timestamp_parsers and
             self.strings_can_be_null == other.strings_can_be_null and
             self.quoted_strings_can_be_null ==
@@ -730,17 +748,19 @@ cdef class ConvertOptions(_Weakrefable):
 
     def __getstate__(self):
         return (self.check_utf8, self.column_types, self.null_values,
-                self.true_values, self.false_values, self.timestamp_parsers,
-                self.strings_can_be_null, self.quoted_strings_can_be_null,
-                self.auto_dict_encode, self.auto_dict_max_cardinality,
-                self.include_columns, self.include_missing_columns)
+                self.true_values, self.false_values, self.decimal_point,
+                self.timestamp_parsers, self.strings_can_be_null,
+                self.quoted_strings_can_be_null, self.auto_dict_encode,
+                self.auto_dict_max_cardinality, self.include_columns,
+                self.include_missing_columns)
 
     def __setstate__(self, state):
         (self.check_utf8, self.column_types, self.null_values,
-         self.true_values, self.false_values, self.timestamp_parsers,
-         self.strings_can_be_null, self.quoted_strings_can_be_null,
-         self.auto_dict_encode, self.auto_dict_max_cardinality,
-         self.include_columns, self.include_missing_columns) = state
+         self.true_values, self.false_values, self.decimal_point,
+         self.timestamp_parsers, self.strings_can_be_null,
+         self.quoted_strings_can_be_null, self.auto_dict_encode,
+         self.auto_dict_max_cardinality, self.include_columns,
+         self.include_missing_columns) = state
 
     def __eq__(self, other):
         try:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 0b5ceb7a0ea..34d81bce04b 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1607,6 +1607,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
 
         c_bool auto_dict_encode
         int32_t auto_dict_max_cardinality
+        unsigned char decimal_point
 
         vector[c_string] include_columns
         c_bool include_missing_columns
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 2f3ef4fca9e..b70072c8107 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -226,6 +226,7 @@ def test_convert_options():
         cls, check_utf8=[True, False],
         strings_can_be_null=[False, True],
         quoted_strings_can_be_null=[True, False],
+        decimal_point=['.', ','],
         include_columns=[[], ['def', 'abc']],
         include_missing_columns=[False, True],
         auto_dict_encode=[False, True],
@@ -235,11 +236,15 @@ def test_convert_options():
         cls, check_utf8=False,
         strings_can_be_null=True,
         quoted_strings_can_be_null=False,
+        decimal_point=',',
         include_columns=['def', 'abc'],
         include_missing_columns=False,
         auto_dict_encode=True,
         timestamp_parsers=[ISO8601, '%y-%m'])
 
+    with pytest.raises(ValueError):
+        opts.decimal_point = '..'
+
     assert opts.auto_dict_max_cardinality > 0
     opts.auto_dict_max_cardinality = 99999
     assert opts.auto_dict_max_cardinality == 99999
@@ -807,6 +812,31 @@ def test_simple_nulls(self):
             'f': [None, True, False],
         }
 
+    def test_decimal_point(self):
+        # Infer floats with a custom decimal point
+        parse_options = ParseOptions(delimiter=';')
+        rows = b"a;b\n1.25;2,5\nNA;-3\n-4;NA"
+
+        table = self.read_bytes(rows, parse_options=parse_options)
+        schema = pa.schema([('a', pa.float64()),
+                            ('b', pa.string())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [1.25, None, -4.0],
+            'b': ["2,5", "-3", "NA"],
+        }
+
+        convert_options = ConvertOptions(decimal_point=',')
+        table = self.read_bytes(rows, parse_options=parse_options,
+                                convert_options=convert_options)
+        schema = pa.schema([('a', pa.string()),
+                            ('b', pa.float64())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': ["1.25", "NA", "-4"],
+            'b': [2.5, -3.0, None],
+        }
+
     def test_simple_timestamps(self):
         # Infer a timestamp column
         rows = (b"a,b,c\n"

From 6cacfff48098b97354a83e0879c156b13e7a0277 Mon Sep 17 00:00:00 2001
From: Yibo Cai <yibo.cai@arm.com>
Date: Tue, 3 Aug 2021 13:43:08 +0200
Subject: [PATCH 691/719] ARROW-13534: [C++] Improve csv chunker

Add hints to help compiler deleting dead code per template augments.
Tested with clang-10, gcc-9.3, on x86 and Arm, observed ~20% uplift for
quoting=false benchmark. Probably due to better code locality.

Closes #10859 from cyb70289/13534-csv-chunker

Authored-by: Yibo Cai <yibo.cai@arm.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/chunker.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cpp/src/arrow/csv/chunker.cc b/cpp/src/arrow/csv/chunker.cc
index b3a0dead593..6ae9f492f62 100644
--- a/cpp/src/arrow/csv/chunker.cc
+++ b/cpp/src/arrow/csv/chunker.cc
@@ -63,12 +63,18 @@ class Lexer {
       case IN_FIELD:
         goto InField;
       case AT_ESCAPE:
+        // will never reach here if escaping = false
+        // just to hint the compiler to remove dead code
+        if (!escaping) return nullptr;
         goto AtEscape;
       case IN_QUOTED_FIELD:
+        if (!quoting) return nullptr;
         goto InQuotedField;
       case AT_QUOTED_QUOTE:
+        if (!quoting) return nullptr;
         goto AtQuotedQuote;
       case AT_QUOTED_ESCAPE:
+        if (!quoting) return nullptr;
         goto AtQuotedEscape;
     }
 

From a52050a8617a3d1164ea3e960b0a0260023d98bd Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Tue, 3 Aug 2021 08:58:39 -0500
Subject: [PATCH 692/719] ARROW-13326: [R] [Archery] Add linting to dev CI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This does three things:

  * Adds a linting step to the dev workflow that uses {lintr} to check that the R package lints correctly
  * Adds an autotune step that uses {styler} to autostyle R code such that it should then lint fine
  * Makes the slew of changes needed for the package to lint cleanly — there's a huge delta here, but it is almost all whitespace changes

Given the wide-reaching nature of these changes, it does already conflict with changes we've merged in. I am happy to go through, rebase, fix all that needs to be fixed and then merge quickly so we don't continue conflicting once we decide this is a good way to go. Though it would be helpful to do this once and only after we all agree this is a good step forward.

Closes #10805 from jonkeane/ARROW-13326-r-linting

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .github/workflows/comment_bot.yml             |   16 +-
 .github/workflows/dev.yml                     |    2 +-
 ci/docker/linux-apt-lint.dockerfile           |   39 +
 r/.Rbuildignore                               |    1 +
 r/.lintr                                      |   31 +
 r/R/array.R                                   |   20 +-
 r/R/arrow-datum.R                             |   12 +-
 r/R/arrow-package.R                           |   43 +-
 r/R/arrow-tabular.R                           |    5 +-
 r/R/arrowExports.R                            | 1751 ++++++++---------
 r/R/buffer.R                                  |    6 +-
 r/R/chunked-array.R                           |    3 +-
 r/R/compression.R                             |    5 +-
 r/R/compute.R                                 |   34 +-
 r/R/csv.R                                     |   77 +-
 r/R/dataset-factory.R                         |    3 +-
 r/R/dataset-format.R                          |   76 +-
 r/R/dataset-partition.R                       |    5 +-
 r/R/dataset-scan.R                            |   13 +-
 r/R/dataset-write.R                           |    6 +-
 r/R/dataset.R                                 |   43 +-
 r/R/dplyr-arrange.R                           |    2 +-
 r/R/dplyr-collect.R                           |    2 +-
 r/R/dplyr-eval.R                              |   10 +-
 r/R/dplyr-filter.R                            |    4 +-
 r/R/dplyr-functions.R                         |   20 +-
 r/R/dplyr-group-by.R                          |    4 +-
 r/R/dplyr-mutate.R                            |    6 +-
 r/R/dplyr-select.R                            |    2 +-
 r/R/dplyr-summarize.R                         |    4 +-
 r/R/dplyr.R                                   |    2 +-
 r/R/duckdb.R                                  |    8 +-
 r/R/expression.R                              |    8 +-
 r/R/feather.R                                 |    7 +-
 r/R/field.R                                   |    4 +-
 r/R/filesystem.R                              |   35 +-
 r/R/flight.R                                  |    3 +-
 r/R/io.R                                      |   25 +-
 r/R/ipc_stream.R                              |    8 +-
 r/R/json.R                                    |   14 +-
 r/R/message.R                                 |    6 +-
 r/R/metadata.R                                |  109 +-
 r/R/parquet.R                                 |   43 +-
 r/R/python.R                                  |    3 +-
 r/R/record-batch-reader.R                     |    9 +-
 r/R/record-batch-writer.R                     |   10 +-
 r/R/record-batch.R                            |    4 +-
 r/R/scalar.R                                  |    8 +-
 r/R/schema.R                                  |    8 +-
 r/R/table.R                                   |   10 +-
 r/R/type.R                                    |    3 +-
 r/R/util.R                                    |   49 +-
 r/lint.sh                                     |    4 +
 r/man/ChunkedArray.Rd                         |   22 -
 r/man/Field.Rd                                |    5 -
 r/man/FileFormat.Rd                           |   15 -
 r/man/ParquetFileReader.Rd                    |   15 +-
 r/man/RecordBatch.Rd                          |   11 -
 r/man/RecordBatchReader.Rd                    |   37 -
 r/man/RecordBatchWriter.Rd                    |   37 -
 r/man/Scalar.Rd                               |   17 -
 r/man/Schema.Rd                               |    9 -
 r/man/Table.Rd                                |   11 -
 r/man/array.Rd                                |   23 -
 r/man/buffer.Rd                               |    9 -
 r/man/call_function.Rd                        |   13 +-
 r/man/codec_is_available.Rd                   |    5 -
 r/man/copy_files.Rd                           |   10 -
 r/man/data-type.Rd                            |    8 -
 r/man/hive_partition.Rd                       |    5 -
 r/man/list_compute_functions.Rd               |    7 -
 r/man/load_flight_server.Rd                   |    5 -
 r/man/match_arrow.Rd                          |   25 -
 r/man/open_dataset.Rd                         |   49 -
 r/man/read_delim_arrow.Rd                     |   11 -
 r/man/read_feather.Rd                         |   11 -
 r/man/read_json_arrow.Rd                      |   12 -
 r/man/read_parquet.Rd                         |    9 -
 r/man/s3_bucket.Rd                            |    5 -
 r/man/to_duckdb.Rd                            |   19 -
 r/man/type.Rd                                 |   10 -
 r/man/unify_schemas.Rd                        |    7 -
 r/man/value_counts.Rd                         |    6 -
 r/man/write_csv_arrow.Rd                      |    7 -
 r/man/write_feather.Rd                        |    7 -
 r/man/write_ipc_stream.Rd                     |    7 -
 r/man/write_parquet.Rd                        |   12 -
 r/man/write_to_raw.Rd                         |    7 -
 r/tests/testthat/helper-data.R                |   17 +-
 r/tests/testthat/helper-expectation.R         |   24 +-
 r/tests/testthat/helper-skip.R                |    2 +-
 r/tests/testthat/test-Array.R                 |  121 +-
 r/tests/testthat/test-RecordBatch.R           |   58 +-
 r/tests/testthat/test-Table.R                 |   50 +-
 r/tests/testthat/test-arrow-info.R            |    2 +-
 r/tests/testthat/test-arrow.R                 |    9 +-
 .../testthat/test-backwards-compatibility.R   |   20 +-
 r/tests/testthat/test-buffer-reader.R         |    4 +-
 r/tests/testthat/test-buffer.R                |    2 +-
 r/tests/testthat/test-chunked-array.R         |   25 +-
 r/tests/testthat/test-compute-aggregate.R     |   14 +-
 r/tests/testthat/test-compute-arith.R         |   24 +-
 r/tests/testthat/test-compute-vector.R        |    4 +-
 r/tests/testthat/test-csv.R                   |   56 +-
 r/tests/testthat/test-data-type.R             |   11 +-
 r/tests/testthat/test-dataset.R               |  123 +-
 r/tests/testthat/test-dplyr-arrange.R         |    2 +-
 r/tests/testthat/test-dplyr-filter.R          |   15 +-
 r/tests/testthat/test-dplyr-lubridate.R       |    9 +-
 r/tests/testthat/test-dplyr-mutate.R          |   27 +-
 r/tests/testthat/test-dplyr.R                 |  269 +--
 r/tests/testthat/test-duckdb.R                |    2 +-
 r/tests/testthat/test-expression.R            |    7 +-
 r/tests/testthat/test-feather.R               |    8 +-
 r/tests/testthat/test-filesystem.R            |    2 +-
 r/tests/testthat/test-install-arrow.R         |    2 +-
 r/tests/testthat/test-json.R                  |   46 +-
 r/tests/testthat/test-message.R               |    4 +-
 r/tests/testthat/test-metadata.R              |    6 +-
 r/tests/testthat/test-na-omit.R               |    7 +-
 r/tests/testthat/test-parquet.R               |   13 +-
 r/tests/testthat/test-python-flight.R         |    1 -
 r/tests/testthat/test-read-write.R            |    6 +-
 r/tests/testthat/test-s3-minio.R              |    2 -
 r/tests/testthat/test-s3.R                    |   23 +-
 r/tests/testthat/test-scalar.R                |   11 +-
 r/tests/testthat/test-schema.R                |    4 +-
 r/tests/testthat/test-type.R                  |    3 +-
 r/vignettes/developing.Rmd                    |   13 +
 129 files changed, 2003 insertions(+), 2138 deletions(-)
 create mode 100644 r/.lintr

diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 5847974ae9f..5709ceaffd6 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -68,6 +68,7 @@ jobs:
           }
           if changed '^r/.*\.R$'; then
             echo "R_DOCS=true" >> $GITHUB_ENV
+            echo "R_CODE=true" >> $GITHUB_ENV
           fi
           if changed 'cmake' || changed 'CMake'; then
             echo "CMAKE_FORMAT=true" >> $GITHUB_ENV
@@ -113,7 +114,7 @@ jobs:
               --exclude_glob=cpp/build-support/lint_exclusions.txt \
               --source_dir=r/src --quiet --fix
       - uses: r-lib/actions/setup-r@v1
-        if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything')
+        if: env.R_DOCS == 'true' || env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything')
       - name: Update R docs
         if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything')
         shell: Rscript {0}
@@ -124,6 +125,19 @@ jobs:
           remotes::install_github("r-lib/roxygen2")
           remotes::install_deps("r")
           roxygen2::roxygenize("r")
+      - name: Style R code
+        if: env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything')
+        shell: Rscript {0}
+        run: |
+          changed_files <- system("git diff --name-only HEAD..upstream/master 2>&1", intern = TRUE)
+          # only grab the .R files under r/
+          changed_files <- grep('^r/.*\\.R$', changed_files, value = TRUE)
+          # remove latin1 which is unstylable due to encoding and codegen.R which is unique
+          changed_files <- changed_files[!changed_files %in% c("r/tests/testthat/latin1.R", "r/data-raw/codegen.R")]
+          source("ci/etc/rprofile")
+          install.packages(c("remotes", "styler"))
+          remotes::install_deps("r")
+          styler::style_file(changed_files)
       - name: Commit results
         run: |
           git config user.name "$(git log -1 --pretty=format:%an)"
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 03851a0cc88..9ef46c31fa3 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -55,7 +55,7 @@ jobs:
         run: |
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
-          archery docker run ubuntu-lint
+          archery docker run -e GITHUB_ACTIONS=true ubuntu-lint
       - name: Docker Push
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile
index 2f4bf0010bc..04646585322 100644
--- a/ci/docker/linux-apt-lint.dockerfile
+++ b/ci/docker/linux-apt-lint.dockerfile
@@ -35,9 +35,48 @@ RUN apt-get update && \
         python3-dev \
         python3-pip \
         ruby \
+        apt-transport-https \
+        software-properties-common \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
+ARG r=4.1
+RUN apt-key adv \
+        --keyserver keyserver.ubuntu.com \
+        --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
+    # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix
+    # for trusty, xenial, bionic, and eoan (as of May 2020)
+    # -cran40 has 4.0 versions for bionic and focal
+    # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
+    # TODO: make sure OS version and R version are valid together and conditionally set repo suffix
+    # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
+    add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
+    apt-get install -y \
+        r-base=${r}* \
+        r-recommended=${r}* \
+        libxml2-dev
+
+# Ensure parallel R package installation, set CRAN repo mirror,
+# and use pre-built binaries where possible
+COPY ci/etc/rprofile /arrow/ci/etc/
+RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+# Also ensure parallel compilation of C/C++ code
+RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf
+
+
+COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
+COPY r/DESCRIPTION /arrow/r/
+# We need to install Arrow's dependencies in order for lintr's namespace searching to work.
+# This could be removed if lintr no longer loads the dependency namespaces (see issues/PRs below)
+RUN /arrow/ci/scripts/r_deps.sh /arrow
+# This fork has a number of changes that have PRs and Issues to resolve upstream:
+#   https://github.com/jimhester/lintr/pull/843
+#   https://github.com/jimhester/lintr/pull/841
+#   https://github.com/jimhester/lintr/pull/845
+#   https://github.com/jimhester/lintr/issues/842
+#   https://github.com/jimhester/lintr/issues/846
+RUN R -e "remotes::install_github('jonkeane/lintr@arrow-branch')"
+
 # Docker linter
 COPY --from=hadolint /bin/hadolint /usr/bin/hadolint
 
diff --git a/r/.Rbuildignore b/r/.Rbuildignore
index cf4b7ce31ba..2f4cea9a34d 100644
--- a/r/.Rbuildignore
+++ b/r/.Rbuildignore
@@ -24,3 +24,4 @@ clang_format.sh
 ^apache-arrow.rb$
 ^.*\.Rhistory$
 ^extra-tests
+^.lintr
diff --git a/r/.lintr b/r/.lintr
new file mode 100644
index 00000000000..fb9ca8f87c7
--- /dev/null
+++ b/r/.lintr
@@ -0,0 +1,31 @@
+license:  #  Licensed to the Apache Software Foundation (ASF) under one
+  #  or more contributor license agreements.  See the NOTICE file
+  #  distributed with this work for additional information
+  #  regarding copyright ownership.  The ASF licenses this file
+  #  to you under the Apache License, Version 2.0 (the
+  #  "License"); you may not use this file except in compliance
+  #  with the License.  You may obtain a copy of the License at
+  #
+  #   http://www.apache.org/licenses/LICENSE-2.0
+  #
+  #  Unless required by applicable law or agreed to in writing,
+  #  software distributed under the License is distributed on an
+  #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  #  KIND, either express or implied.  See the License for the
+  #  specific language governing permissions and limitations
+  #  under the License.
+linters: with_defaults(
+  line_length_linter = line_length_linter(120),
+  object_name_linter = NULL,
+  # Even with a liberal definition of name styles, some of our names cause issues due to `.`s for s3 classes or NA in the name
+  # TODO: figure out if we con contribute to lintr to make these work
+  # object_name_linter = object_name_linter(styles = c("snake_case", "camelCase", "CamelCase", "symbols", "dotted.case", "UPPERCASE", "SNAKE_CASE")),
+  object_length_linter = object_length_linter(40),
+  object_usage_linter = NULL, # R6 methods are flagged,
+  cyclocomp_linter = cyclocomp_linter(26) # TODO: reduce to default of 15
+  )
+exclusions: list(
+  "tests/testthat/latin1.R",
+  "R/arrowExports.R",
+  "data-raw/codegen.R"
+  )
diff --git a/r/R/array.R b/r/R/array.R
index 93d148ec29b..46acc14ff0e 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -101,10 +101,9 @@
 #' new_array$offset
 #'
 #' # Compare 2 arrays
-#' na_array2 = na_array
+#' na_array2 <- na_array
 #' na_array2 == na_array # element-wise comparison
 #' na_array2$Equals(na_array) # overall comparison
-#'
 #' @export
 Array <- R6Class("Array",
   inherit = ArrowDatum,
@@ -196,7 +195,8 @@ Array$import_from_c <- ImportArray
 #' @usage NULL
 #' @format NULL
 #' @export
-DictionaryArray <- R6Class("DictionaryArray", inherit = Array,
+DictionaryArray <- R6Class("DictionaryArray",
+  inherit = Array,
   public = list(
     indices = function() DictionaryArray__indices(self),
     dictionary = function() DictionaryArray__dictionary(self)
@@ -227,7 +227,8 @@ DictionaryArray$create <- function(x, dict = NULL) {
 #' @usage NULL
 #' @format NULL
 #' @export
-StructArray <- R6Class("StructArray", inherit = Array,
+StructArray <- R6Class("StructArray",
+  inherit = Array,
   public = list(
     field = function(i) StructArray__field(self, i),
     GetFieldByName = function(name) StructArray__GetFieldByName(self, name),
@@ -272,7 +273,8 @@ as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...
 #' @usage NULL
 #' @format NULL
 #' @export
-ListArray <- R6Class("ListArray", inherit = Array,
+ListArray <- R6Class("ListArray",
+  inherit = Array,
   public = list(
     values = function() ListArray__values(self),
     value_length = function(i) ListArray__value_length(self, i),
@@ -288,7 +290,8 @@ ListArray <- R6Class("ListArray", inherit = Array,
 #' @usage NULL
 #' @format NULL
 #' @export
-LargeListArray <- R6Class("LargeListArray", inherit = Array,
+LargeListArray <- R6Class("LargeListArray",
+  inherit = Array,
   public = list(
     values = function() LargeListArray__values(self),
     value_length = function(i) LargeListArray__value_length(self, i),
@@ -304,7 +307,8 @@ LargeListArray <- R6Class("LargeListArray", inherit = Array,
 #' @usage NULL
 #' @format NULL
 #' @export
-FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array,
+FixedSizeListArray <- R6Class("FixedSizeListArray",
+  inherit = Array,
   public = list(
     values = function() FixedSizeListArray__values(self),
     value_length = function(i) FixedSizeListArray__value_length(self, i),
@@ -316,7 +320,7 @@ FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array,
   )
 )
 
-is.Array <- function(x, type = NULL) {
+is.Array <- function(x, type = NULL) { # nolint
   is_it <- inherits(x, c("Array", "ChunkedArray"))
   if (is_it && !is.null(type)) {
     is_it <- x$type$ToString() %in% type
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index 4734d44c7ea..b3635f239c4 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -19,7 +19,8 @@
 
 # Base class for Array, ChunkedArray, and Scalar, for S3 method dispatch only.
 # Does not exist in C++ class hierarchy
-ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
+ArrowDatum <- R6Class("ArrowDatum",
+  inherit = ArrowObject,
   public = list(
     cast = function(target_type, safe = TRUE, ...) {
       opts <- cast_options(safe, ...)
@@ -107,15 +108,14 @@ eval_array_expression <- function(FUN,
   # integer inputs and floating-point division on floats
   if (FUN == "/") {
     # TODO: omg so many ways it's wrong to assume these types
-    args <- map(args, ~.$cast(float64()))
+    args <- map(args, ~ .$cast(float64()))
   } else if (FUN == "%/%") {
     # In R, integer division works like floor(float division)
     out <- eval_array_expression("/", args = args, options = options)
     return(out$cast(int32(), allow_float_truncate = TRUE))
   } else if (FUN == "%%") {
-    # {e1 - e2 * ( e1 %/% e2 )}
-    # ^^^ form doesn't work because Ops.Array evaluates eagerly,
-    # but we can build that up
+    # We can't simply do {e1 - e2 * ( e1 %/% e2 )} since Ops.Array evaluates
+    # eagerly, but we can build that up
     quotient <- eval_array_expression("%/%", args = args)
     base <- eval_array_expression("*", quotient, args[[2]])
     # this cast is to ensure that the result of this and e1 are the same
@@ -193,7 +193,7 @@ filter_rows <- function(x, i, keep_na = TRUE, ...) {
     if (is.Array(i)) {
       stop("Cannot extract rows with an Array of type ", i$type$ToString(), call. = FALSE)
     }
-    stop("Cannot extract rows with an object of class ", class(i), call.=FALSE)
+    stop("Cannot extract rows with an object of class ", class(i), call. = FALSE)
   }
 }
 
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 3ebd68776bb..10c14a00af4 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -19,7 +19,10 @@
 #' @importFrom R6 R6Class
 #' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr
 #' @importFrom assertthat assert_that is.string
-#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character quo_name
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
+#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec
+#' @importFrom rlang is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs
+#' @importFrom rlang expr caller_env is_character quo_name
 #' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
 #' @useDynLib arrow, .registration = TRUE
 #' @keywords internal
@@ -42,8 +45,10 @@
   }
   s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
 
-  for (cl in c("Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
-               "Field", "DataType", "RecordBatchReader")) {
+  for (cl in c(
+    "Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
+    "Field", "DataType", "RecordBatchReader"
+  )) {
     s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
     s3_register("reticulate::r_to_py", cl)
   }
@@ -111,25 +116,33 @@
 #' `vignette("install", package = "arrow")` for guidance on reinstalling the
 #' package.
 arrow_available <- function() {
-  tryCatch(.Call(`_arrow_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_arrow_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 #' @rdname arrow_available
 #' @export
 arrow_with_dataset <- function() {
-  tryCatch(.Call(`_dataset_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_dataset_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 #' @rdname arrow_available
 #' @export
 arrow_with_parquet <- function() {
-  tryCatch(.Call(`_parquet_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_parquet_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 #' @rdname arrow_available
 #' @export
 arrow_with_s3 <- function() {
-  tryCatch(.Call(`_s3_available`), error = function(e) return(FALSE))
+  tryCatch(.Call(`_s3_available`), error = function(e) {
+    return(FALSE)
+  })
 }
 
 option_use_threads <- function() {
@@ -218,7 +231,10 @@ print.arrow_info <- function(x, ...) {
     ))
     if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
       # Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
-      cat("To reinstall with more optional capabilities enabled, see\n  https://arrow.apache.org/docs/r/articles/install.html\n\n")
+      cat(
+        "To reinstall with more optional capabilities enabled, see\n",
+        "  https://arrow.apache.org/docs/r/articles/install.html\n\n"
+      )
     }
 
     if (length(x$options)) {
@@ -245,7 +261,10 @@ print.arrow_info <- function(x, ...) {
       `Git ID` = x$build_info$git_id
     ))
   } else {
-    cat("Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html for troubleshooting.\n")
+    cat(
+      "Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ",
+      "for troubleshooting.\n"
+    )
   }
   invisible(x)
 }
@@ -258,7 +277,6 @@ option_compress_metadata <- function() {
 ArrowObject <- R6Class("ArrowObject",
   public = list(
     initialize = function(xp) self$set_pointer(xp),
-
     pointer = function() get(".:xp:.", envir = self),
     `.:xp:.` = NULL,
     set_pointer = function(xp) {
@@ -284,7 +302,6 @@ ArrowObject <- R6Class("ArrowObject",
       }
       invisible(self)
     },
-
     invalidate = function() {
       assign(".:xp:.", NULL, envir = self)
     }
@@ -292,10 +309,10 @@ ArrowObject <- R6Class("ArrowObject",
 )
 
 #' @export
-`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs)
+`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint
 
 #' @export
-`==.ArrowObject` <- function(x, y) {
+`==.ArrowObject` <- function(x, y) { # nolint
   x$Equals(y)
 }
 
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index 440dcea5994..250f4f90b39 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -19,7 +19,8 @@
 
 # Base class for RecordBatch and Table for S3 method dispatch only.
 # Does not exist in C++ class hierarchy
-ArrowTabular <- R6Class("ArrowTabular", inherit = ArrowObject,
+ArrowTabular <- R6Class("ArrowTabular",
+  inherit = ArrowObject,
   public = list(
     ToString = function() ToString_tabular(self),
     Take = function(i) {
@@ -223,7 +224,7 @@ na.fail.ArrowTabular <- function(object, ...) {
 
 #' @export
 na.omit.ArrowTabular <- function(object, ...) {
-  not_na <- map(object$columns, ~call_function("is_valid", .x))
+  not_na <- map(object$columns, ~ call_function("is_valid", .x))
   not_na_agg <- Reduce("&", not_na)
   object$Filter(not_na_agg)
 }
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index a5187b497ce..e54f88e9d4e 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1,1752 +1,1749 @@
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
-is_altrep_int_nonull <- function(x){
-    .Call(`_arrow_is_altrep_int_nonull`, x)
+is_altrep_int_nonull <- function(x) {
+  .Call(`_arrow_is_altrep_int_nonull`, x)
 }
 
-is_altrep_dbl_nonull <- function(x){
-    .Call(`_arrow_is_altrep_dbl_nonull`, x)
+is_altrep_dbl_nonull <- function(x) {
+  .Call(`_arrow_is_altrep_dbl_nonull`, x)
 }
 
-Array__Slice1 <- function(array, offset){
-    .Call(`_arrow_Array__Slice1`, array, offset)
+Array__Slice1 <- function(array, offset) {
+  .Call(`_arrow_Array__Slice1`, array, offset)
 }
 
-Array__Slice2 <- function(array, offset, length){
-    .Call(`_arrow_Array__Slice2`, array, offset, length)
+Array__Slice2 <- function(array, offset, length) {
+  .Call(`_arrow_Array__Slice2`, array, offset, length)
 }
 
-Array__IsNull <- function(x, i){
-    .Call(`_arrow_Array__IsNull`, x, i)
+Array__IsNull <- function(x, i) {
+  .Call(`_arrow_Array__IsNull`, x, i)
 }
 
-Array__IsValid <- function(x, i){
-    .Call(`_arrow_Array__IsValid`, x, i)
+Array__IsValid <- function(x, i) {
+  .Call(`_arrow_Array__IsValid`, x, i)
 }
 
-Array__length <- function(x){
-    .Call(`_arrow_Array__length`, x)
+Array__length <- function(x) {
+  .Call(`_arrow_Array__length`, x)
 }
 
-Array__offset <- function(x){
-    .Call(`_arrow_Array__offset`, x)
+Array__offset <- function(x) {
+  .Call(`_arrow_Array__offset`, x)
 }
 
-Array__null_count <- function(x){
-    .Call(`_arrow_Array__null_count`, x)
+Array__null_count <- function(x) {
+  .Call(`_arrow_Array__null_count`, x)
 }
 
-Array__type <- function(x){
-    .Call(`_arrow_Array__type`, x)
+Array__type <- function(x) {
+  .Call(`_arrow_Array__type`, x)
 }
 
-Array__ToString <- function(x){
-    .Call(`_arrow_Array__ToString`, x)
+Array__ToString <- function(x) {
+  .Call(`_arrow_Array__ToString`, x)
 }
 
-Array__type_id <- function(x){
-    .Call(`_arrow_Array__type_id`, x)
+Array__type_id <- function(x) {
+  .Call(`_arrow_Array__type_id`, x)
 }
 
-Array__Equals <- function(lhs, rhs){
-    .Call(`_arrow_Array__Equals`, lhs, rhs)
+Array__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Equals`, lhs, rhs)
 }
 
-Array__ApproxEquals <- function(lhs, rhs){
-    .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+Array__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
 }
 
-Array__Diff <- function(lhs, rhs){
-    .Call(`_arrow_Array__Diff`, lhs, rhs)
+Array__Diff <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Diff`, lhs, rhs)
 }
 
-Array__data <- function(array){
-    .Call(`_arrow_Array__data`, array)
+Array__data <- function(array) {
+  .Call(`_arrow_Array__data`, array)
 }
 
-Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx){
-    .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
+Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) {
+  .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
 }
 
-Array__View <- function(array, type){
-    .Call(`_arrow_Array__View`, array, type)
+Array__View <- function(array, type) {
+  .Call(`_arrow_Array__View`, array, type)
 }
 
-Array__Validate <- function(array){
-    invisible(.Call(`_arrow_Array__Validate`, array))
+Array__Validate <- function(array) {
+  invisible(.Call(`_arrow_Array__Validate`, array))
 }
 
-DictionaryArray__indices <- function(array){
-    .Call(`_arrow_DictionaryArray__indices`, array)
+DictionaryArray__indices <- function(array) {
+  .Call(`_arrow_DictionaryArray__indices`, array)
 }
 
-DictionaryArray__dictionary <- function(array){
-    .Call(`_arrow_DictionaryArray__dictionary`, array)
+DictionaryArray__dictionary <- function(array) {
+  .Call(`_arrow_DictionaryArray__dictionary`, array)
 }
 
-StructArray__field <- function(array, i){
-    .Call(`_arrow_StructArray__field`, array, i)
+StructArray__field <- function(array, i) {
+  .Call(`_arrow_StructArray__field`, array, i)
 }
 
-StructArray__GetFieldByName <- function(array, name){
-    .Call(`_arrow_StructArray__GetFieldByName`, array, name)
+StructArray__GetFieldByName <- function(array, name) {
+  .Call(`_arrow_StructArray__GetFieldByName`, array, name)
 }
 
-StructArray__Flatten <- function(array){
-    .Call(`_arrow_StructArray__Flatten`, array)
+StructArray__Flatten <- function(array) {
+  .Call(`_arrow_StructArray__Flatten`, array)
 }
 
-ListArray__value_type <- function(array){
-    .Call(`_arrow_ListArray__value_type`, array)
+ListArray__value_type <- function(array) {
+  .Call(`_arrow_ListArray__value_type`, array)
 }
 
-LargeListArray__value_type <- function(array){
-    .Call(`_arrow_LargeListArray__value_type`, array)
+LargeListArray__value_type <- function(array) {
+  .Call(`_arrow_LargeListArray__value_type`, array)
 }
 
-ListArray__values <- function(array){
-    .Call(`_arrow_ListArray__values`, array)
+ListArray__values <- function(array) {
+  .Call(`_arrow_ListArray__values`, array)
 }
 
-LargeListArray__values <- function(array){
-    .Call(`_arrow_LargeListArray__values`, array)
+LargeListArray__values <- function(array) {
+  .Call(`_arrow_LargeListArray__values`, array)
 }
 
-ListArray__value_length <- function(array, i){
-    .Call(`_arrow_ListArray__value_length`, array, i)
+ListArray__value_length <- function(array, i) {
+  .Call(`_arrow_ListArray__value_length`, array, i)
 }
 
-LargeListArray__value_length <- function(array, i){
-    .Call(`_arrow_LargeListArray__value_length`, array, i)
+LargeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_length`, array, i)
 }
 
-FixedSizeListArray__value_length <- function(array, i){
-    .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
+FixedSizeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
 }
 
-ListArray__value_offset <- function(array, i){
-    .Call(`_arrow_ListArray__value_offset`, array, i)
+ListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_ListArray__value_offset`, array, i)
 }
 
-LargeListArray__value_offset <- function(array, i){
-    .Call(`_arrow_LargeListArray__value_offset`, array, i)
+LargeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_offset`, array, i)
 }
 
-FixedSizeListArray__value_offset <- function(array, i){
-    .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
+FixedSizeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
 }
 
-ListArray__raw_value_offsets <- function(array){
-    .Call(`_arrow_ListArray__raw_value_offsets`, array)
+ListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_ListArray__raw_value_offsets`, array)
 }
 
-LargeListArray__raw_value_offsets <- function(array){
-    .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
+LargeListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
 }
 
-Array__as_vector <- function(array){
-    .Call(`_arrow_Array__as_vector`, array)
+Array__as_vector <- function(array) {
+  .Call(`_arrow_Array__as_vector`, array)
 }
 
-ChunkedArray__as_vector <- function(chunked_array, use_threads){
-    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
+ChunkedArray__as_vector <- function(chunked_array, use_threads) {
+  .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
 }
 
-RecordBatch__to_dataframe <- function(batch, use_threads){
-    .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+RecordBatch__to_dataframe <- function(batch, use_threads) {
+  .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
 }
 
-Table__to_dataframe <- function(table, use_threads){
-    .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+Table__to_dataframe <- function(table, use_threads) {
+  .Call(`_arrow_Table__to_dataframe`, table, use_threads)
 }
 
-ArrayData__get_type <- function(x){
-    .Call(`_arrow_ArrayData__get_type`, x)
+ArrayData__get_type <- function(x) {
+  .Call(`_arrow_ArrayData__get_type`, x)
 }
 
-ArrayData__get_length <- function(x){
-    .Call(`_arrow_ArrayData__get_length`, x)
+ArrayData__get_length <- function(x) {
+  .Call(`_arrow_ArrayData__get_length`, x)
 }
 
-ArrayData__get_null_count <- function(x){
-    .Call(`_arrow_ArrayData__get_null_count`, x)
+ArrayData__get_null_count <- function(x) {
+  .Call(`_arrow_ArrayData__get_null_count`, x)
 }
 
-ArrayData__get_offset <- function(x){
-    .Call(`_arrow_ArrayData__get_offset`, x)
+ArrayData__get_offset <- function(x) {
+  .Call(`_arrow_ArrayData__get_offset`, x)
 }
 
-ArrayData__buffers <- function(x){
-    .Call(`_arrow_ArrayData__buffers`, x)
+ArrayData__buffers <- function(x) {
+  .Call(`_arrow_ArrayData__buffers`, x)
 }
 
-Buffer__is_mutable <- function(buffer){
-    .Call(`_arrow_Buffer__is_mutable`, buffer)
+Buffer__is_mutable <- function(buffer) {
+  .Call(`_arrow_Buffer__is_mutable`, buffer)
 }
 
-Buffer__ZeroPadding <- function(buffer){
-    invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
+Buffer__ZeroPadding <- function(buffer) {
+  invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
 }
 
-Buffer__capacity <- function(buffer){
-    .Call(`_arrow_Buffer__capacity`, buffer)
+Buffer__capacity <- function(buffer) {
+  .Call(`_arrow_Buffer__capacity`, buffer)
 }
 
-Buffer__size <- function(buffer){
-    .Call(`_arrow_Buffer__size`, buffer)
+Buffer__size <- function(buffer) {
+  .Call(`_arrow_Buffer__size`, buffer)
 }
 
-r___RBuffer__initialize <- function(x){
-    .Call(`_arrow_r___RBuffer__initialize`, x)
+r___RBuffer__initialize <- function(x) {
+  .Call(`_arrow_r___RBuffer__initialize`, x)
 }
 
-Buffer__data <- function(buffer){
-    .Call(`_arrow_Buffer__data`, buffer)
+Buffer__data <- function(buffer) {
+  .Call(`_arrow_Buffer__data`, buffer)
 }
 
-Buffer__Equals <- function(x, y){
-    .Call(`_arrow_Buffer__Equals`, x, y)
+Buffer__Equals <- function(x, y) {
+  .Call(`_arrow_Buffer__Equals`, x, y)
 }
 
-ChunkedArray__length <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__length`, chunked_array)
+ChunkedArray__length <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__length`, chunked_array)
 }
 
-ChunkedArray__null_count <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
+ChunkedArray__null_count <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
 }
 
-ChunkedArray__num_chunks <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
+ChunkedArray__num_chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
 }
 
-ChunkedArray__chunk <- function(chunked_array, i){
-    .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
+ChunkedArray__chunk <- function(chunked_array, i) {
+  .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
 }
 
-ChunkedArray__chunks <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
+ChunkedArray__chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
 }
 
-ChunkedArray__type <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__type`, chunked_array)
+ChunkedArray__type <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__type`, chunked_array)
 }
 
-ChunkedArray__Slice1 <- function(chunked_array, offset){
-    .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
+ChunkedArray__Slice1 <- function(chunked_array, offset) {
+  .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
 }
 
-ChunkedArray__Slice2 <- function(chunked_array, offset, length){
-    .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
+ChunkedArray__Slice2 <- function(chunked_array, offset, length) {
+  .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
 }
 
-ChunkedArray__View <- function(array, type){
-    .Call(`_arrow_ChunkedArray__View`, array, type)
+ChunkedArray__View <- function(array, type) {
+  .Call(`_arrow_ChunkedArray__View`, array, type)
 }
 
-ChunkedArray__Validate <- function(chunked_array){
-    invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
+ChunkedArray__Validate <- function(chunked_array) {
+  invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
 }
 
-ChunkedArray__Equals <- function(x, y){
-    .Call(`_arrow_ChunkedArray__Equals`, x, y)
+ChunkedArray__Equals <- function(x, y) {
+  .Call(`_arrow_ChunkedArray__Equals`, x, y)
 }
 
-ChunkedArray__ToString <- function(x){
-    .Call(`_arrow_ChunkedArray__ToString`, x)
+ChunkedArray__ToString <- function(x) {
+  .Call(`_arrow_ChunkedArray__ToString`, x)
 }
 
-ChunkedArray__from_list <- function(chunks, s_type){
-    .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
+ChunkedArray__from_list <- function(chunks, s_type) {
+  .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
 }
 
-util___Codec__Create <- function(codec, compression_level){
-    .Call(`_arrow_util___Codec__Create`, codec, compression_level)
+util___Codec__Create <- function(codec, compression_level) {
+  .Call(`_arrow_util___Codec__Create`, codec, compression_level)
 }
 
-util___Codec__name <- function(codec){
-    .Call(`_arrow_util___Codec__name`, codec)
+util___Codec__name <- function(codec) {
+  .Call(`_arrow_util___Codec__name`, codec)
 }
 
-util___Codec__IsAvailable <- function(codec){
-    .Call(`_arrow_util___Codec__IsAvailable`, codec)
+util___Codec__IsAvailable <- function(codec) {
+  .Call(`_arrow_util___Codec__IsAvailable`, codec)
 }
 
-io___CompressedOutputStream__Make <- function(codec, raw){
-    .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+io___CompressedOutputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
 }
 
-io___CompressedInputStream__Make <- function(codec, raw){
-    .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+io___CompressedInputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
 }
 
-RecordBatch__cast <- function(batch, schema, options){
-    .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
+RecordBatch__cast <- function(batch, schema, options) {
+  .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
 
-Table__cast <- function(table, schema, options){
-    .Call(`_arrow_Table__cast`, table, schema, options)
+Table__cast <- function(table, schema, options) {
+  .Call(`_arrow_Table__cast`, table, schema, options)
 }
 
-compute__CallFunction <- function(func_name, args, options){
-    .Call(`_arrow_compute__CallFunction`, func_name, args, options)
+compute__CallFunction <- function(func_name, args, options) {
+  .Call(`_arrow_compute__CallFunction`, func_name, args, options)
 }
 
-compute__GroupBy <- function(arguments, keys, options){
-    .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
+compute__GroupBy <- function(arguments, keys, options) {
+  .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
 }
 
-compute__GetFunctionNames <- function(){
-    .Call(`_arrow_compute__GetFunctionNames`)
+compute__GetFunctionNames <- function() {
+  .Call(`_arrow_compute__GetFunctionNames`)
 }
 
-build_info <- function(){
-    .Call(`_arrow_build_info`)
+build_info <- function() {
+  .Call(`_arrow_build_info`)
 }
 
-runtime_info <- function(){
-    .Call(`_arrow_runtime_info`)
+runtime_info <- function() {
+  .Call(`_arrow_runtime_info`)
 }
 
-csv___WriteOptions__initialize <- function(options){
-    .Call(`_arrow_csv___WriteOptions__initialize`, options)
+csv___WriteOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___WriteOptions__initialize`, options)
 }
 
-csv___ReadOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ReadOptions__initialize`, options)
+csv___ReadOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
 
-csv___ParseOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ParseOptions__initialize`, options)
+csv___ParseOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ParseOptions__initialize`, options)
 }
 
-csv___ReadOptions__column_names <- function(options){
-    .Call(`_arrow_csv___ReadOptions__column_names`, options)
+csv___ReadOptions__column_names <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__column_names`, options)
 }
 
-csv___ConvertOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+csv___ConvertOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ConvertOptions__initialize`, options)
 }
 
-csv___TableReader__Make <- function(input, read_options, parse_options, convert_options){
-    .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
+  .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
 }
 
-csv___TableReader__Read <- function(table_reader){
-    .Call(`_arrow_csv___TableReader__Read`, table_reader)
+csv___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_csv___TableReader__Read`, table_reader)
 }
 
-TimestampParser__kind <- function(parser){
-    .Call(`_arrow_TimestampParser__kind`, parser)
+TimestampParser__kind <- function(parser) {
+  .Call(`_arrow_TimestampParser__kind`, parser)
 }
 
-TimestampParser__format <- function(parser){
-    .Call(`_arrow_TimestampParser__format`, parser)
+TimestampParser__format <- function(parser) {
+  .Call(`_arrow_TimestampParser__format`, parser)
 }
 
-TimestampParser__MakeStrptime <- function(format){
-    .Call(`_arrow_TimestampParser__MakeStrptime`, format)
+TimestampParser__MakeStrptime <- function(format) {
+  .Call(`_arrow_TimestampParser__MakeStrptime`, format)
 }
 
-TimestampParser__MakeISO8601 <- function(){
-    .Call(`_arrow_TimestampParser__MakeISO8601`)
+TimestampParser__MakeISO8601 <- function() {
+  .Call(`_arrow_TimestampParser__MakeISO8601`)
 }
 
-csv___WriteCSV__Table <- function(table, write_options, stream){
-    invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
+csv___WriteCSV__Table <- function(table, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
 }
 
-csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream){
-    invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
 }
 
-dataset___Dataset__NewScan <- function(ds){
-    .Call(`_arrow_dataset___Dataset__NewScan`, ds)
+dataset___Dataset__NewScan <- function(ds) {
+  .Call(`_arrow_dataset___Dataset__NewScan`, ds)
 }
 
-dataset___Dataset__schema <- function(dataset){
-    .Call(`_arrow_dataset___Dataset__schema`, dataset)
+dataset___Dataset__schema <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__schema`, dataset)
 }
 
-dataset___Dataset__type_name <- function(dataset){
-    .Call(`_arrow_dataset___Dataset__type_name`, dataset)
+dataset___Dataset__type_name <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__type_name`, dataset)
 }
 
-dataset___Dataset__ReplaceSchema <- function(dataset, schm){
-    .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
+dataset___Dataset__ReplaceSchema <- function(dataset, schm) {
+  .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
 }
 
-dataset___UnionDataset__create <- function(datasets, schm){
-    .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
+dataset___UnionDataset__create <- function(datasets, schm) {
+  .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
 }
 
-dataset___InMemoryDataset__create <- function(table){
-    .Call(`_arrow_dataset___InMemoryDataset__create`, table)
+dataset___InMemoryDataset__create <- function(table) {
+  .Call(`_arrow_dataset___InMemoryDataset__create`, table)
 }
 
-dataset___UnionDataset__children <- function(ds){
-    .Call(`_arrow_dataset___UnionDataset__children`, ds)
+dataset___UnionDataset__children <- function(ds) {
+  .Call(`_arrow_dataset___UnionDataset__children`, ds)
 }
 
-dataset___FileSystemDataset__format <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
+dataset___FileSystemDataset__format <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
 }
 
-dataset___FileSystemDataset__filesystem <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
+dataset___FileSystemDataset__filesystem <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
 }
 
-dataset___FileSystemDataset__files <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
+dataset___FileSystemDataset__files <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
 }
 
-dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas){
-    .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
+dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
 }
 
-dataset___DatasetFactory__Finish2 <- function(factory, schema){
-    .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
+dataset___DatasetFactory__Finish2 <- function(factory, schema) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
 }
 
-dataset___DatasetFactory__Inspect <- function(factory, unify_schemas){
-    .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
+dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
 }
 
-dataset___UnionDatasetFactory__Make <- function(children){
-    .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
+dataset___UnionDatasetFactory__Make <- function(children) {
+  .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
 }
 
-dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
+dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
 }
 
-dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
+dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
 }
 
-dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
+dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
 }
 
-dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
+dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
 }
 
-dataset___FileFormat__type_name <- function(format){
-    .Call(`_arrow_dataset___FileFormat__type_name`, format)
+dataset___FileFormat__type_name <- function(format) {
+  .Call(`_arrow_dataset___FileFormat__type_name`, format)
 }
 
-dataset___FileFormat__DefaultWriteOptions <- function(fmt){
-    .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
+dataset___FileFormat__DefaultWriteOptions <- function(fmt) {
+  .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
 }
 
-dataset___ParquetFileFormat__Make <- function(options, dict_columns){
-    .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
+dataset___ParquetFileFormat__Make <- function(options, dict_columns) {
+  .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
 }
 
-dataset___FileWriteOptions__type_name <- function(options){
-    .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
+dataset___FileWriteOptions__type_name <- function(options) {
+  .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
 }
 
-dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){
-    invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) {
+  invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
 }
 
-dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version){
-    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
+dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
 }
 
-dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version){
-    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
+dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
 }
 
-dataset___CsvFileWriteOptions__update <- function(csv_options, write_options){
-    invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) {
+  invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
 }
 
-dataset___IpcFileFormat__Make <- function(){
-    .Call(`_arrow_dataset___IpcFileFormat__Make`)
+dataset___IpcFileFormat__Make <- function() {
+  .Call(`_arrow_dataset___IpcFileFormat__Make`)
 }
 
-dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options){
-    .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
+dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
 }
 
-dataset___FragmentScanOptions__type_name <- function(fragment_scan_options){
-    .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
+dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) {
+  .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
 }
 
-dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options){
-    .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
+dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
 }
 
-dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer){
-    .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
+dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) {
+  .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
 }
 
-dataset___DirectoryPartitioning <- function(schm, segment_encoding){
-    .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
+dataset___DirectoryPartitioning <- function(schm, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
 }
 
-dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding){
-    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
 }
 
-dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding){
-    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
 }
 
-dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding){
-    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
 }
 
-dataset___ScannerBuilder__ProjectNames <- function(sb, cols){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
+dataset___ScannerBuilder__ProjectNames <- function(sb, cols) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
 }
 
-dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
+dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
 }
 
-dataset___ScannerBuilder__Filter <- function(sb, expr){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
+dataset___ScannerBuilder__Filter <- function(sb, expr) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
 }
 
-dataset___ScannerBuilder__UseThreads <- function(sb, threads){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
+dataset___ScannerBuilder__UseThreads <- function(sb, threads) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
 }
 
-dataset___ScannerBuilder__UseAsync <- function(sb, use_async){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
 }
 
-dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
+dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
 }
 
-dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
+dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
 }
 
-dataset___ScannerBuilder__schema <- function(sb){
-    .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
+dataset___ScannerBuilder__schema <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
 }
 
-dataset___ScannerBuilder__Finish <- function(sb){
-    .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
+dataset___ScannerBuilder__Finish <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
 }
 
-dataset___Scanner__ToTable <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
+dataset___Scanner__ToTable <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
 }
 
-dataset___Scanner__ScanBatches <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
+dataset___Scanner__ScanBatches <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
 }
 
-dataset___Scanner__ToRecordBatchReader <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
+dataset___Scanner__ToRecordBatchReader <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
 }
 
-dataset___Scanner__head <- function(scanner, n){
-    .Call(`_arrow_dataset___Scanner__head`, scanner, n)
+dataset___Scanner__head <- function(scanner, n) {
+  .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
 
-dataset___Scanner__schema <- function(sc){
-    .Call(`_arrow_dataset___Scanner__schema`, sc)
+dataset___Scanner__schema <- function(sc) {
+  .Call(`_arrow_dataset___Scanner__schema`, sc)
 }
 
-dataset___ScanTask__get_batches <- function(scan_task){
-    .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
+dataset___ScanTask__get_batches <- function(scan_task) {
+  .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
 }
 
-dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){
-    invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner) {
+  invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
-dataset___Scanner__TakeRows <- function(scanner, indices){
-    .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
+dataset___Scanner__TakeRows <- function(scanner, indices) {
+  .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
 }
 
-dataset___Scanner__CountRows <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
+dataset___Scanner__CountRows <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
 }
 
-Int8__initialize <- function(){
-    .Call(`_arrow_Int8__initialize`)
+Int8__initialize <- function() {
+  .Call(`_arrow_Int8__initialize`)
 }
 
-Int16__initialize <- function(){
-    .Call(`_arrow_Int16__initialize`)
+Int16__initialize <- function() {
+  .Call(`_arrow_Int16__initialize`)
 }
 
-Int32__initialize <- function(){
-    .Call(`_arrow_Int32__initialize`)
+Int32__initialize <- function() {
+  .Call(`_arrow_Int32__initialize`)
 }
 
-Int64__initialize <- function(){
-    .Call(`_arrow_Int64__initialize`)
+Int64__initialize <- function() {
+  .Call(`_arrow_Int64__initialize`)
 }
 
-UInt8__initialize <- function(){
-    .Call(`_arrow_UInt8__initialize`)
+UInt8__initialize <- function() {
+  .Call(`_arrow_UInt8__initialize`)
 }
 
-UInt16__initialize <- function(){
-    .Call(`_arrow_UInt16__initialize`)
+UInt16__initialize <- function() {
+  .Call(`_arrow_UInt16__initialize`)
 }
 
-UInt32__initialize <- function(){
-    .Call(`_arrow_UInt32__initialize`)
+UInt32__initialize <- function() {
+  .Call(`_arrow_UInt32__initialize`)
 }
 
-UInt64__initialize <- function(){
-    .Call(`_arrow_UInt64__initialize`)
+UInt64__initialize <- function() {
+  .Call(`_arrow_UInt64__initialize`)
 }
 
-Float16__initialize <- function(){
-    .Call(`_arrow_Float16__initialize`)
+Float16__initialize <- function() {
+  .Call(`_arrow_Float16__initialize`)
 }
 
-Float32__initialize <- function(){
-    .Call(`_arrow_Float32__initialize`)
+Float32__initialize <- function() {
+  .Call(`_arrow_Float32__initialize`)
 }
 
-Float64__initialize <- function(){
-    .Call(`_arrow_Float64__initialize`)
+Float64__initialize <- function() {
+  .Call(`_arrow_Float64__initialize`)
 }
 
-Boolean__initialize <- function(){
-    .Call(`_arrow_Boolean__initialize`)
+Boolean__initialize <- function() {
+  .Call(`_arrow_Boolean__initialize`)
 }
 
-Utf8__initialize <- function(){
-    .Call(`_arrow_Utf8__initialize`)
+Utf8__initialize <- function() {
+  .Call(`_arrow_Utf8__initialize`)
 }
 
-LargeUtf8__initialize <- function(){
-    .Call(`_arrow_LargeUtf8__initialize`)
+LargeUtf8__initialize <- function() {
+  .Call(`_arrow_LargeUtf8__initialize`)
 }
 
-Binary__initialize <- function(){
-    .Call(`_arrow_Binary__initialize`)
+Binary__initialize <- function() {
+  .Call(`_arrow_Binary__initialize`)
 }
 
-LargeBinary__initialize <- function(){
-    .Call(`_arrow_LargeBinary__initialize`)
+LargeBinary__initialize <- function() {
+  .Call(`_arrow_LargeBinary__initialize`)
 }
 
-Date32__initialize <- function(){
-    .Call(`_arrow_Date32__initialize`)
+Date32__initialize <- function() {
+  .Call(`_arrow_Date32__initialize`)
 }
 
-Date64__initialize <- function(){
-    .Call(`_arrow_Date64__initialize`)
+Date64__initialize <- function() {
+  .Call(`_arrow_Date64__initialize`)
 }
 
-Null__initialize <- function(){
-    .Call(`_arrow_Null__initialize`)
+Null__initialize <- function() {
+  .Call(`_arrow_Null__initialize`)
 }
 
-Decimal128Type__initialize <- function(precision, scale){
-    .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
+Decimal128Type__initialize <- function(precision, scale) {
+  .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
 }
 
-FixedSizeBinary__initialize <- function(byte_width){
-    .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
+FixedSizeBinary__initialize <- function(byte_width) {
+  .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
 }
 
-Timestamp__initialize <- function(unit, timezone){
-    .Call(`_arrow_Timestamp__initialize`, unit, timezone)
+Timestamp__initialize <- function(unit, timezone) {
+  .Call(`_arrow_Timestamp__initialize`, unit, timezone)
 }
 
-Time32__initialize <- function(unit){
-    .Call(`_arrow_Time32__initialize`, unit)
+Time32__initialize <- function(unit) {
+  .Call(`_arrow_Time32__initialize`, unit)
 }
 
-Time64__initialize <- function(unit){
-    .Call(`_arrow_Time64__initialize`, unit)
+Time64__initialize <- function(unit) {
+  .Call(`_arrow_Time64__initialize`, unit)
 }
 
-list__ <- function(x){
-    .Call(`_arrow_list__`, x)
+list__ <- function(x) {
+  .Call(`_arrow_list__`, x)
 }
 
-large_list__ <- function(x){
-    .Call(`_arrow_large_list__`, x)
+large_list__ <- function(x) {
+  .Call(`_arrow_large_list__`, x)
 }
 
-fixed_size_list__ <- function(x, list_size){
-    .Call(`_arrow_fixed_size_list__`, x, list_size)
+fixed_size_list__ <- function(x, list_size) {
+  .Call(`_arrow_fixed_size_list__`, x, list_size)
 }
 
-struct__ <- function(fields){
-    .Call(`_arrow_struct__`, fields)
+struct__ <- function(fields) {
+  .Call(`_arrow_struct__`, fields)
 }
 
-DataType__ToString <- function(type){
-    .Call(`_arrow_DataType__ToString`, type)
+DataType__ToString <- function(type) {
+  .Call(`_arrow_DataType__ToString`, type)
 }
 
-DataType__name <- function(type){
-    .Call(`_arrow_DataType__name`, type)
+DataType__name <- function(type) {
+  .Call(`_arrow_DataType__name`, type)
 }
 
-DataType__Equals <- function(lhs, rhs){
-    .Call(`_arrow_DataType__Equals`, lhs, rhs)
+DataType__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_DataType__Equals`, lhs, rhs)
 }
 
-DataType__num_fields <- function(type){
-    .Call(`_arrow_DataType__num_fields`, type)
+DataType__num_fields <- function(type) {
+  .Call(`_arrow_DataType__num_fields`, type)
 }
 
-DataType__fields <- function(type){
-    .Call(`_arrow_DataType__fields`, type)
+DataType__fields <- function(type) {
+  .Call(`_arrow_DataType__fields`, type)
 }
 
-DataType__id <- function(type){
-    .Call(`_arrow_DataType__id`, type)
+DataType__id <- function(type) {
+  .Call(`_arrow_DataType__id`, type)
 }
 
-ListType__ToString <- function(type){
-    .Call(`_arrow_ListType__ToString`, type)
+ListType__ToString <- function(type) {
+  .Call(`_arrow_ListType__ToString`, type)
 }
 
-FixedWidthType__bit_width <- function(type){
-    .Call(`_arrow_FixedWidthType__bit_width`, type)
+FixedWidthType__bit_width <- function(type) {
+  .Call(`_arrow_FixedWidthType__bit_width`, type)
 }
 
-DateType__unit <- function(type){
-    .Call(`_arrow_DateType__unit`, type)
+DateType__unit <- function(type) {
+  .Call(`_arrow_DateType__unit`, type)
 }
 
-TimeType__unit <- function(type){
-    .Call(`_arrow_TimeType__unit`, type)
+TimeType__unit <- function(type) {
+  .Call(`_arrow_TimeType__unit`, type)
 }
 
-DecimalType__precision <- function(type){
-    .Call(`_arrow_DecimalType__precision`, type)
+DecimalType__precision <- function(type) {
+  .Call(`_arrow_DecimalType__precision`, type)
 }
 
-DecimalType__scale <- function(type){
-    .Call(`_arrow_DecimalType__scale`, type)
+DecimalType__scale <- function(type) {
+  .Call(`_arrow_DecimalType__scale`, type)
 }
 
-TimestampType__timezone <- function(type){
-    .Call(`_arrow_TimestampType__timezone`, type)
+TimestampType__timezone <- function(type) {
+  .Call(`_arrow_TimestampType__timezone`, type)
 }
 
-TimestampType__unit <- function(type){
-    .Call(`_arrow_TimestampType__unit`, type)
+TimestampType__unit <- function(type) {
+  .Call(`_arrow_TimestampType__unit`, type)
 }
 
-DictionaryType__initialize <- function(index_type, value_type, ordered){
-    .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
+DictionaryType__initialize <- function(index_type, value_type, ordered) {
+  .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
 }
 
-DictionaryType__index_type <- function(type){
-    .Call(`_arrow_DictionaryType__index_type`, type)
+DictionaryType__index_type <- function(type) {
+  .Call(`_arrow_DictionaryType__index_type`, type)
 }
 
-DictionaryType__value_type <- function(type){
-    .Call(`_arrow_DictionaryType__value_type`, type)
+DictionaryType__value_type <- function(type) {
+  .Call(`_arrow_DictionaryType__value_type`, type)
 }
 
-DictionaryType__name <- function(type){
-    .Call(`_arrow_DictionaryType__name`, type)
+DictionaryType__name <- function(type) {
+  .Call(`_arrow_DictionaryType__name`, type)
 }
 
-DictionaryType__ordered <- function(type){
-    .Call(`_arrow_DictionaryType__ordered`, type)
+DictionaryType__ordered <- function(type) {
+  .Call(`_arrow_DictionaryType__ordered`, type)
 }
 
-StructType__GetFieldByName <- function(type, name){
-    .Call(`_arrow_StructType__GetFieldByName`, type, name)
+StructType__GetFieldByName <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldByName`, type, name)
 }
 
-StructType__GetFieldIndex <- function(type, name){
-    .Call(`_arrow_StructType__GetFieldIndex`, type, name)
+StructType__GetFieldIndex <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldIndex`, type, name)
 }
 
-StructType__field_names <- function(type){
-    .Call(`_arrow_StructType__field_names`, type)
+StructType__field_names <- function(type) {
+  .Call(`_arrow_StructType__field_names`, type)
 }
 
-ListType__value_field <- function(type){
-    .Call(`_arrow_ListType__value_field`, type)
+ListType__value_field <- function(type) {
+  .Call(`_arrow_ListType__value_field`, type)
 }
 
-ListType__value_type <- function(type){
-    .Call(`_arrow_ListType__value_type`, type)
+ListType__value_type <- function(type) {
+  .Call(`_arrow_ListType__value_type`, type)
 }
 
-LargeListType__value_field <- function(type){
-    .Call(`_arrow_LargeListType__value_field`, type)
+LargeListType__value_field <- function(type) {
+  .Call(`_arrow_LargeListType__value_field`, type)
 }
 
-LargeListType__value_type <- function(type){
-    .Call(`_arrow_LargeListType__value_type`, type)
+LargeListType__value_type <- function(type) {
+  .Call(`_arrow_LargeListType__value_type`, type)
 }
 
-FixedSizeListType__value_field <- function(type){
-    .Call(`_arrow_FixedSizeListType__value_field`, type)
+FixedSizeListType__value_field <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_field`, type)
 }
 
-FixedSizeListType__value_type <- function(type){
-    .Call(`_arrow_FixedSizeListType__value_type`, type)
+FixedSizeListType__value_type <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_type`, type)
 }
 
-FixedSizeListType__list_size <- function(type){
-    .Call(`_arrow_FixedSizeListType__list_size`, type)
+FixedSizeListType__list_size <- function(type) {
+  .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
-compute___expr__call <- function(func_name, argument_list, options){
-    .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
+compute___expr__call <- function(func_name, argument_list, options) {
+  .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-compute___expr__field_ref <- function(name){
-    .Call(`_arrow_compute___expr__field_ref`, name)
+compute___expr__field_ref <- function(name) {
+  .Call(`_arrow_compute___expr__field_ref`, name)
 }
 
-compute___expr__get_field_ref_name <- function(x){
-    .Call(`_arrow_compute___expr__get_field_ref_name`, x)
+compute___expr__get_field_ref_name <- function(x) {
+  .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
-compute___expr__scalar <- function(x){
-    .Call(`_arrow_compute___expr__scalar`, x)
+compute___expr__scalar <- function(x) {
+  .Call(`_arrow_compute___expr__scalar`, x)
 }
 
-compute___expr__ToString <- function(x){
-    .Call(`_arrow_compute___expr__ToString`, x)
+compute___expr__ToString <- function(x) {
+  .Call(`_arrow_compute___expr__ToString`, x)
 }
 
-compute___expr__type <- function(x, schema){
-    .Call(`_arrow_compute___expr__type`, x, schema)
+compute___expr__type <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type`, x, schema)
 }
 
-compute___expr__type_id <- function(x, schema){
-    .Call(`_arrow_compute___expr__type_id`, x, schema)
+compute___expr__type_id <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type_id`, x, schema)
 }
 
-ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
-    invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
+ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) {
+  invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
 
-ipc___feather___Reader__version <- function(reader){
-    .Call(`_arrow_ipc___feather___Reader__version`, reader)
+ipc___feather___Reader__version <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__version`, reader)
 }
 
-ipc___feather___Reader__Read <- function(reader, columns){
-    .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
+ipc___feather___Reader__Read <- function(reader, columns) {
+  .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
 }
 
-ipc___feather___Reader__Open <- function(stream){
-    .Call(`_arrow_ipc___feather___Reader__Open`, stream)
+ipc___feather___Reader__Open <- function(stream) {
+  .Call(`_arrow_ipc___feather___Reader__Open`, stream)
 }
 
-ipc___feather___Reader__schema <- function(reader){
-    .Call(`_arrow_ipc___feather___Reader__schema`, reader)
+ipc___feather___Reader__schema <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__schema`, reader)
 }
 
-Field__initialize <- function(name, field, nullable){
-    .Call(`_arrow_Field__initialize`, name, field, nullable)
+Field__initialize <- function(name, field, nullable) {
+  .Call(`_arrow_Field__initialize`, name, field, nullable)
 }
 
-Field__ToString <- function(field){
-    .Call(`_arrow_Field__ToString`, field)
+Field__ToString <- function(field) {
+  .Call(`_arrow_Field__ToString`, field)
 }
 
-Field__name <- function(field){
-    .Call(`_arrow_Field__name`, field)
+Field__name <- function(field) {
+  .Call(`_arrow_Field__name`, field)
 }
 
-Field__Equals <- function(field, other){
-    .Call(`_arrow_Field__Equals`, field, other)
+Field__Equals <- function(field, other) {
+  .Call(`_arrow_Field__Equals`, field, other)
 }
 
-Field__nullable <- function(field){
-    .Call(`_arrow_Field__nullable`, field)
+Field__nullable <- function(field) {
+  .Call(`_arrow_Field__nullable`, field)
 }
 
-Field__type <- function(field){
-    .Call(`_arrow_Field__type`, field)
+Field__type <- function(field) {
+  .Call(`_arrow_Field__type`, field)
 }
 
-fs___FileInfo__type <- function(x){
-    .Call(`_arrow_fs___FileInfo__type`, x)
+fs___FileInfo__type <- function(x) {
+  .Call(`_arrow_fs___FileInfo__type`, x)
 }
 
-fs___FileInfo__set_type <- function(x, type){
-    invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
+fs___FileInfo__set_type <- function(x, type) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
 }
 
-fs___FileInfo__path <- function(x){
-    .Call(`_arrow_fs___FileInfo__path`, x)
+fs___FileInfo__path <- function(x) {
+  .Call(`_arrow_fs___FileInfo__path`, x)
 }
 
-fs___FileInfo__set_path <- function(x, path){
-    invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
+fs___FileInfo__set_path <- function(x, path) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
 }
 
-fs___FileInfo__size <- function(x){
-    .Call(`_arrow_fs___FileInfo__size`, x)
+fs___FileInfo__size <- function(x) {
+  .Call(`_arrow_fs___FileInfo__size`, x)
 }
 
-fs___FileInfo__set_size <- function(x, size){
-    invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
+fs___FileInfo__set_size <- function(x, size) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
 }
 
-fs___FileInfo__base_name <- function(x){
-    .Call(`_arrow_fs___FileInfo__base_name`, x)
+fs___FileInfo__base_name <- function(x) {
+  .Call(`_arrow_fs___FileInfo__base_name`, x)
 }
 
-fs___FileInfo__extension <- function(x){
-    .Call(`_arrow_fs___FileInfo__extension`, x)
+fs___FileInfo__extension <- function(x) {
+  .Call(`_arrow_fs___FileInfo__extension`, x)
 }
 
-fs___FileInfo__mtime <- function(x){
-    .Call(`_arrow_fs___FileInfo__mtime`, x)
+fs___FileInfo__mtime <- function(x) {
+  .Call(`_arrow_fs___FileInfo__mtime`, x)
 }
 
-fs___FileInfo__set_mtime <- function(x, time){
-    invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
+fs___FileInfo__set_mtime <- function(x, time) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
 }
 
-fs___FileSelector__base_dir <- function(selector){
-    .Call(`_arrow_fs___FileSelector__base_dir`, selector)
+fs___FileSelector__base_dir <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__base_dir`, selector)
 }
 
-fs___FileSelector__allow_not_found <- function(selector){
-    .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
+fs___FileSelector__allow_not_found <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
 }
 
-fs___FileSelector__recursive <- function(selector){
-    .Call(`_arrow_fs___FileSelector__recursive`, selector)
+fs___FileSelector__recursive <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__recursive`, selector)
 }
 
-fs___FileSelector__create <- function(base_dir, allow_not_found, recursive){
-    .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
+fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) {
+  .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
 }
 
-fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths){
-    .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
+fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
 }
 
-fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector){
-    .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
+fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
 }
 
-fs___FileSystem__CreateDir <- function(file_system, path, recursive){
-    invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
+fs___FileSystem__CreateDir <- function(file_system, path, recursive) {
+  invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
 }
 
-fs___FileSystem__DeleteDir <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
+fs___FileSystem__DeleteDir <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
 }
 
-fs___FileSystem__DeleteDirContents <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
+fs___FileSystem__DeleteDirContents <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
 }
 
-fs___FileSystem__DeleteFile <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
+fs___FileSystem__DeleteFile <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
 }
 
-fs___FileSystem__DeleteFiles <- function(file_system, paths){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
+fs___FileSystem__DeleteFiles <- function(file_system, paths) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
 }
 
-fs___FileSystem__Move <- function(file_system, src, dest){
-    invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
+fs___FileSystem__Move <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
 }
 
-fs___FileSystem__CopyFile <- function(file_system, src, dest){
-    invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
+fs___FileSystem__CopyFile <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
 }
 
-fs___FileSystem__OpenInputStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
+fs___FileSystem__OpenInputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
 }
 
-fs___FileSystem__OpenInputFile <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
+fs___FileSystem__OpenInputFile <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
 }
 
-fs___FileSystem__OpenOutputStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
+fs___FileSystem__OpenOutputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
 }
 
-fs___FileSystem__OpenAppendStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
+fs___FileSystem__OpenAppendStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
 }
 
-fs___FileSystem__type_name <- function(file_system){
-    .Call(`_arrow_fs___FileSystem__type_name`, file_system)
+fs___FileSystem__type_name <- function(file_system) {
+  .Call(`_arrow_fs___FileSystem__type_name`, file_system)
 }
 
-fs___LocalFileSystem__create <- function(){
-    .Call(`_arrow_fs___LocalFileSystem__create`)
+fs___LocalFileSystem__create <- function() {
+  .Call(`_arrow_fs___LocalFileSystem__create`)
 }
 
-fs___SubTreeFileSystem__create <- function(base_path, base_fs){
-    .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
+fs___SubTreeFileSystem__create <- function(base_path, base_fs) {
+  .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
 }
 
-fs___SubTreeFileSystem__base_fs <- function(file_system){
-    .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
+fs___SubTreeFileSystem__base_fs <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
 }
 
-fs___SubTreeFileSystem__base_path <- function(file_system){
-    .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
+fs___SubTreeFileSystem__base_path <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
 }
 
-fs___FileSystemFromUri <- function(path){
-    .Call(`_arrow_fs___FileSystemFromUri`, path)
+fs___FileSystemFromUri <- function(path) {
+  .Call(`_arrow_fs___FileSystemFromUri`, path)
 }
 
-fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads){
-    invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
+fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) {
+  invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
 }
 
-fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes){
-    .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
+fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) {
+  .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
 }
 
-fs___S3FileSystem__region <- function(fs){
-    .Call(`_arrow_fs___S3FileSystem__region`, fs)
+fs___S3FileSystem__region <- function(fs) {
+  .Call(`_arrow_fs___S3FileSystem__region`, fs)
 }
 
-io___Readable__Read <- function(x, nbytes){
-    .Call(`_arrow_io___Readable__Read`, x, nbytes)
+io___Readable__Read <- function(x, nbytes) {
+  .Call(`_arrow_io___Readable__Read`, x, nbytes)
 }
 
-io___InputStream__Close <- function(x){
-    invisible(.Call(`_arrow_io___InputStream__Close`, x))
+io___InputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___InputStream__Close`, x))
 }
 
-io___OutputStream__Close <- function(x){
-    invisible(.Call(`_arrow_io___OutputStream__Close`, x))
+io___OutputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___OutputStream__Close`, x))
 }
 
-io___RandomAccessFile__GetSize <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
+io___RandomAccessFile__GetSize <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
 }
 
-io___RandomAccessFile__supports_zero_copy <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
+io___RandomAccessFile__supports_zero_copy <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
 }
 
-io___RandomAccessFile__Seek <- function(x, position){
-    invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
+io___RandomAccessFile__Seek <- function(x, position) {
+  invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
 }
 
-io___RandomAccessFile__Tell <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__Tell`, x)
+io___RandomAccessFile__Tell <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Tell`, x)
 }
 
-io___RandomAccessFile__Read0 <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__Read0`, x)
+io___RandomAccessFile__Read0 <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Read0`, x)
 }
 
-io___RandomAccessFile__ReadAt <- function(x, position, nbytes){
-    .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
+io___RandomAccessFile__ReadAt <- function(x, position, nbytes) {
+  .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
 }
 
-io___MemoryMappedFile__Create <- function(path, size){
-    .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
+io___MemoryMappedFile__Create <- function(path, size) {
+  .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
 }
 
-io___MemoryMappedFile__Open <- function(path, mode){
-    .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
+io___MemoryMappedFile__Open <- function(path, mode) {
+  .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
 }
 
-io___MemoryMappedFile__Resize <- function(x, size){
-    invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
+io___MemoryMappedFile__Resize <- function(x, size) {
+  invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
 }
 
-io___ReadableFile__Open <- function(path){
-    .Call(`_arrow_io___ReadableFile__Open`, path)
+io___ReadableFile__Open <- function(path) {
+  .Call(`_arrow_io___ReadableFile__Open`, path)
 }
 
-io___BufferReader__initialize <- function(buffer){
-    .Call(`_arrow_io___BufferReader__initialize`, buffer)
+io___BufferReader__initialize <- function(buffer) {
+  .Call(`_arrow_io___BufferReader__initialize`, buffer)
 }
 
-io___Writable__write <- function(stream, buf){
-    invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+io___Writable__write <- function(stream, buf) {
+  invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
 }
 
-io___OutputStream__Tell <- function(stream){
-    .Call(`_arrow_io___OutputStream__Tell`, stream)
+io___OutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___OutputStream__Tell`, stream)
 }
 
-io___FileOutputStream__Open <- function(path){
-    .Call(`_arrow_io___FileOutputStream__Open`, path)
+io___FileOutputStream__Open <- function(path) {
+  .Call(`_arrow_io___FileOutputStream__Open`, path)
 }
 
-io___BufferOutputStream__Create <- function(initial_capacity){
-    .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
+io___BufferOutputStream__Create <- function(initial_capacity) {
+  .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
 }
 
-io___BufferOutputStream__capacity <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
+io___BufferOutputStream__capacity <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
 }
 
-io___BufferOutputStream__Finish <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
+io___BufferOutputStream__Finish <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
 }
 
-io___BufferOutputStream__Tell <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
+io___BufferOutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
 }
 
-io___BufferOutputStream__Write <- function(stream, bytes){
-    invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
+io___BufferOutputStream__Write <- function(stream, bytes) {
+  invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
 }
 
-json___ReadOptions__initialize <- function(use_threads, block_size){
-    .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
+json___ReadOptions__initialize <- function(use_threads, block_size) {
+  .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-json___ParseOptions__initialize1 <- function(newlines_in_values){
-    .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+json___ParseOptions__initialize1 <- function(newlines_in_values) {
+  .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
 }
 
-json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){
-    .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) {
+  .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
-json___TableReader__Make <- function(input, read_options, parse_options){
-    .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
+json___TableReader__Make <- function(input, read_options, parse_options) {
+  .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
 }
 
-json___TableReader__Read <- function(table_reader){
-    .Call(`_arrow_json___TableReader__Read`, table_reader)
+json___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_json___TableReader__Read`, table_reader)
 }
 
-MemoryPool__default <- function(){
-    .Call(`_arrow_MemoryPool__default`)
+MemoryPool__default <- function() {
+  .Call(`_arrow_MemoryPool__default`)
 }
 
-MemoryPool__bytes_allocated <- function(pool){
-    .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
+MemoryPool__bytes_allocated <- function(pool) {
+  .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
 }
 
-MemoryPool__max_memory <- function(pool){
-    .Call(`_arrow_MemoryPool__max_memory`, pool)
+MemoryPool__max_memory <- function(pool) {
+  .Call(`_arrow_MemoryPool__max_memory`, pool)
 }
 
-MemoryPool__backend_name <- function(pool){
-    .Call(`_arrow_MemoryPool__backend_name`, pool)
+MemoryPool__backend_name <- function(pool) {
+  .Call(`_arrow_MemoryPool__backend_name`, pool)
 }
 
-supported_memory_backends <- function(){
-    .Call(`_arrow_supported_memory_backends`)
+supported_memory_backends <- function() {
+  .Call(`_arrow_supported_memory_backends`)
 }
 
-ipc___Message__body_length <- function(message){
-    .Call(`_arrow_ipc___Message__body_length`, message)
+ipc___Message__body_length <- function(message) {
+  .Call(`_arrow_ipc___Message__body_length`, message)
 }
 
-ipc___Message__metadata <- function(message){
-    .Call(`_arrow_ipc___Message__metadata`, message)
+ipc___Message__metadata <- function(message) {
+  .Call(`_arrow_ipc___Message__metadata`, message)
 }
 
-ipc___Message__body <- function(message){
-    .Call(`_arrow_ipc___Message__body`, message)
+ipc___Message__body <- function(message) {
+  .Call(`_arrow_ipc___Message__body`, message)
 }
 
-ipc___Message__Verify <- function(message){
-    .Call(`_arrow_ipc___Message__Verify`, message)
+ipc___Message__Verify <- function(message) {
+  .Call(`_arrow_ipc___Message__Verify`, message)
 }
 
-ipc___Message__type <- function(message){
-    .Call(`_arrow_ipc___Message__type`, message)
+ipc___Message__type <- function(message) {
+  .Call(`_arrow_ipc___Message__type`, message)
 }
 
-ipc___Message__Equals <- function(x, y){
-    .Call(`_arrow_ipc___Message__Equals`, x, y)
+ipc___Message__Equals <- function(x, y) {
+  .Call(`_arrow_ipc___Message__Equals`, x, y)
 }
 
-ipc___ReadRecordBatch__Message__Schema <- function(message, schema){
-    .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
+ipc___ReadRecordBatch__Message__Schema <- function(message, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
 }
 
-ipc___ReadSchema_InputStream <- function(stream){
-    .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
+ipc___ReadSchema_InputStream <- function(stream) {
+  .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
 }
 
-ipc___ReadSchema_Message <- function(message){
-    .Call(`_arrow_ipc___ReadSchema_Message`, message)
+ipc___ReadSchema_Message <- function(message) {
+  .Call(`_arrow_ipc___ReadSchema_Message`, message)
 }
 
-ipc___MessageReader__Open <- function(stream){
-    .Call(`_arrow_ipc___MessageReader__Open`, stream)
+ipc___MessageReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___MessageReader__Open`, stream)
 }
 
-ipc___MessageReader__ReadNextMessage <- function(reader){
-    .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
+ipc___MessageReader__ReadNextMessage <- function(reader) {
+  .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
 }
 
-ipc___ReadMessage <- function(stream){
-    .Call(`_arrow_ipc___ReadMessage`, stream)
+ipc___ReadMessage <- function(stream) {
+  .Call(`_arrow_ipc___ReadMessage`, stream)
 }
 
-parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
 }
 
-parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){
-    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
 }
 
-parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
 }
 
-parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
 }
 
-parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){
-    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
 }
 
-parquet___arrow___FileReader__OpenFile <- function(file, props){
-    .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
+parquet___arrow___FileReader__OpenFile <- function(file, props) {
+  .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
 }
 
-parquet___arrow___FileReader__ReadTable1 <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
+parquet___arrow___FileReader__ReadTable1 <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
 }
 
-parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
 }
 
-parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
+parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
 }
 
-parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
+parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
 }
 
-parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
+parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
 }
 
-parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
+parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
 }
 
-parquet___arrow___FileReader__num_rows <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
+parquet___arrow___FileReader__num_rows <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
 }
 
-parquet___arrow___FileReader__num_columns <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
+parquet___arrow___FileReader__num_columns <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
 }
 
-parquet___arrow___FileReader__num_row_groups <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
+parquet___arrow___FileReader__num_row_groups <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
 }
 
-parquet___arrow___FileReader__ReadColumn <- function(reader, i){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
+parquet___arrow___FileReader__ReadColumn <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
 }
 
-parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit){
-    .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
+parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) {
+  .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
 }
 
-parquet___WriterProperties___Builder__create <- function(){
-    .Call(`_arrow_parquet___WriterProperties___Builder__create`)
+parquet___WriterProperties___Builder__create <- function() {
+  .Call(`_arrow_parquet___WriterProperties___Builder__create`)
 }
 
-parquet___WriterProperties___Builder__version <- function(builder, version){
-    invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
+parquet___WriterProperties___Builder__version <- function(builder, version) {
+  invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
 }
 
-parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
+parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
 }
 
-parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
+parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
 }
 
-parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
+parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
 }
 
-parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
+parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
 }
 
-parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
+parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
 }
 
-parquet___WriterProperties___Builder__build <- function(builder){
-    .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
+parquet___WriterProperties___Builder__build <- function(builder) {
+  .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
 }
 
-parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){
-    .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
+parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) {
+  .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
 }
 
-parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size){
-    invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
+parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
 }
 
-parquet___arrow___FileWriter__Close <- function(writer){
-    invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
+parquet___arrow___FileWriter__Close <- function(writer) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
 }
 
-parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties){
-    invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
+parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) {
+  invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
 }
 
-parquet___arrow___FileReader__GetSchema <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
+parquet___arrow___FileReader__GetSchema <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
 }
 
-allocate_arrow_schema <- function(){
-    .Call(`_arrow_allocate_arrow_schema`)
+allocate_arrow_schema <- function() {
+  .Call(`_arrow_allocate_arrow_schema`)
 }
 
-delete_arrow_schema <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+delete_arrow_schema <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
 }
 
-allocate_arrow_array <- function(){
-    .Call(`_arrow_allocate_arrow_array`)
+allocate_arrow_array <- function() {
+  .Call(`_arrow_allocate_arrow_array`)
 }
 
-delete_arrow_array <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+delete_arrow_array <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array`, ptr))
 }
 
-allocate_arrow_array_stream <- function(){
-    .Call(`_arrow_allocate_arrow_array_stream`)
+allocate_arrow_array_stream <- function() {
+  .Call(`_arrow_allocate_arrow_array_stream`)
 }
 
-delete_arrow_array_stream <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
+delete_arrow_array_stream <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
 }
 
-ImportArray <- function(array, schema){
-    .Call(`_arrow_ImportArray`, array, schema)
+ImportArray <- function(array, schema) {
+  .Call(`_arrow_ImportArray`, array, schema)
 }
 
-ImportRecordBatch <- function(array, schema){
-    .Call(`_arrow_ImportRecordBatch`, array, schema)
+ImportRecordBatch <- function(array, schema) {
+  .Call(`_arrow_ImportRecordBatch`, array, schema)
 }
 
-ImportSchema <- function(schema){
-    .Call(`_arrow_ImportSchema`, schema)
+ImportSchema <- function(schema) {
+  .Call(`_arrow_ImportSchema`, schema)
 }
 
-ImportField <- function(field){
-    .Call(`_arrow_ImportField`, field)
+ImportField <- function(field) {
+  .Call(`_arrow_ImportField`, field)
 }
 
-ImportType <- function(type){
-    .Call(`_arrow_ImportType`, type)
+ImportType <- function(type) {
+  .Call(`_arrow_ImportType`, type)
 }
 
-ImportRecordBatchReader <- function(stream){
-    .Call(`_arrow_ImportRecordBatchReader`, stream)
+ImportRecordBatchReader <- function(stream) {
+  .Call(`_arrow_ImportRecordBatchReader`, stream)
 }
 
-ExportType <- function(type, ptr){
-    invisible(.Call(`_arrow_ExportType`, type, ptr))
+ExportType <- function(type, ptr) {
+  invisible(.Call(`_arrow_ExportType`, type, ptr))
 }
 
-ExportField <- function(field, ptr){
-    invisible(.Call(`_arrow_ExportField`, field, ptr))
+ExportField <- function(field, ptr) {
+  invisible(.Call(`_arrow_ExportField`, field, ptr))
 }
 
-ExportSchema <- function(schema, ptr){
-    invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
+ExportSchema <- function(schema, ptr) {
+  invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
 }
 
-ExportArray <- function(array, array_ptr, schema_ptr){
-    invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
+ExportArray <- function(array, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
 }
 
-ExportRecordBatch <- function(batch, array_ptr, schema_ptr){
-    invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
+ExportRecordBatch <- function(batch, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
 }
 
-ExportRecordBatchReader <- function(reader, stream_ptr){
-    invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
+ExportRecordBatchReader <- function(reader, stream_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
 }
 
-Table__from_dots <- function(lst, schema_sxp, use_threads){
-    .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
+Table__from_dots <- function(lst, schema_sxp, use_threads) {
+  .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
 }
 
-vec_to_arrow <- function(x, s_type){
-    .Call(`_arrow_vec_to_arrow`, x, s_type)
+vec_to_arrow <- function(x, s_type) {
+  .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
 
-DictionaryArray__FromArrays <- function(type, indices, dict){
-    .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
+DictionaryArray__FromArrays <- function(type, indices, dict) {
+  .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
 }
 
-RecordBatch__num_columns <- function(x){
-    .Call(`_arrow_RecordBatch__num_columns`, x)
+RecordBatch__num_columns <- function(x) {
+  .Call(`_arrow_RecordBatch__num_columns`, x)
 }
 
-RecordBatch__num_rows <- function(x){
-    .Call(`_arrow_RecordBatch__num_rows`, x)
+RecordBatch__num_rows <- function(x) {
+  .Call(`_arrow_RecordBatch__num_rows`, x)
 }
 
-RecordBatch__schema <- function(x){
-    .Call(`_arrow_RecordBatch__schema`, x)
+RecordBatch__schema <- function(x) {
+  .Call(`_arrow_RecordBatch__schema`, x)
 }
 
-RecordBatch__RenameColumns <- function(batch, names){
-    .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
+RecordBatch__RenameColumns <- function(batch, names) {
+  .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
 }
 
-RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){
-    .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
+RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
 }
 
-RecordBatch__columns <- function(batch){
-    .Call(`_arrow_RecordBatch__columns`, batch)
+RecordBatch__columns <- function(batch) {
+  .Call(`_arrow_RecordBatch__columns`, batch)
 }
 
-RecordBatch__column <- function(batch, i){
-    .Call(`_arrow_RecordBatch__column`, batch, i)
+RecordBatch__column <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column`, batch, i)
 }
 
-RecordBatch__GetColumnByName <- function(batch, name){
-    .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
+RecordBatch__GetColumnByName <- function(batch, name) {
+  .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
 }
 
-RecordBatch__SelectColumns <- function(batch, indices){
-    .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
+RecordBatch__SelectColumns <- function(batch, indices) {
+  .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
 }
 
-RecordBatch__Equals <- function(self, other, check_metadata){
-    .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
+RecordBatch__Equals <- function(self, other, check_metadata) {
+  .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
 }
 
-RecordBatch__AddColumn <- function(batch, i, field, column){
-    .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
+RecordBatch__AddColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
 }
 
-RecordBatch__SetColumn <- function(batch, i, field, column){
-    .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
+RecordBatch__SetColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
 }
 
-RecordBatch__RemoveColumn <- function(batch, i){
-    .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
+RecordBatch__RemoveColumn <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
 }
 
-RecordBatch__column_name <- function(batch, i){
-    .Call(`_arrow_RecordBatch__column_name`, batch, i)
+RecordBatch__column_name <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column_name`, batch, i)
 }
 
-RecordBatch__names <- function(batch){
-    .Call(`_arrow_RecordBatch__names`, batch)
+RecordBatch__names <- function(batch) {
+  .Call(`_arrow_RecordBatch__names`, batch)
 }
 
-RecordBatch__Slice1 <- function(self, offset){
-    .Call(`_arrow_RecordBatch__Slice1`, self, offset)
+RecordBatch__Slice1 <- function(self, offset) {
+  .Call(`_arrow_RecordBatch__Slice1`, self, offset)
 }
 
-RecordBatch__Slice2 <- function(self, offset, length){
-    .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
+RecordBatch__Slice2 <- function(self, offset, length) {
+  .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
 }
 
-ipc___SerializeRecordBatch__Raw <- function(batch){
-    .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+ipc___SerializeRecordBatch__Raw <- function(batch) {
+  .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
 }
 
-ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema){
-    .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
 }
 
-RecordBatch__from_arrays <- function(schema_sxp, lst){
-    .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
+RecordBatch__from_arrays <- function(schema_sxp, lst) {
+  .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
 }
 
-RecordBatchReader__schema <- function(reader){
-    .Call(`_arrow_RecordBatchReader__schema`, reader)
+RecordBatchReader__schema <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__schema`, reader)
 }
 
-RecordBatchReader__ReadNext <- function(reader){
-    .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
+RecordBatchReader__ReadNext <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
 }
 
-RecordBatchReader__batches <- function(reader){
-    .Call(`_arrow_RecordBatchReader__batches`, reader)
+RecordBatchReader__batches <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__batches`, reader)
 }
 
-Table__from_RecordBatchReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
+Table__from_RecordBatchReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchReader`, reader)
 }
 
-ipc___RecordBatchStreamReader__Open <- function(stream){
-    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+ipc___RecordBatchStreamReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
-ipc___RecordBatchFileReader__schema <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
+ipc___RecordBatchFileReader__schema <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
 }
 
-ipc___RecordBatchFileReader__num_record_batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
+ipc___RecordBatchFileReader__num_record_batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
 }
 
-ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i){
-    .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
+ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
 }
 
-ipc___RecordBatchFileReader__Open <- function(file){
-    .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
+ipc___RecordBatchFileReader__Open <- function(file) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
 }
 
-Table__from_RecordBatchFileReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
+Table__from_RecordBatchFileReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
 }
 
-ipc___RecordBatchFileReader__batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
+ipc___RecordBatchFileReader__batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
 }
 
-ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
 }
 
-ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
+ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
 }
 
-ipc___RecordBatchWriter__Close <- function(batch_writer){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
+ipc___RecordBatchWriter__Close <- function(batch_writer) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
 }
 
-ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
-    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
-    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Array__GetScalar <- function(x, i){
-    .Call(`_arrow_Array__GetScalar`, x, i)
+Array__GetScalar <- function(x, i) {
+  .Call(`_arrow_Array__GetScalar`, x, i)
 }
 
-Scalar__ToString <- function(s){
-    .Call(`_arrow_Scalar__ToString`, s)
+Scalar__ToString <- function(s) {
+  .Call(`_arrow_Scalar__ToString`, s)
 }
 
-StructScalar__field <- function(s, i){
-    .Call(`_arrow_StructScalar__field`, s, i)
+StructScalar__field <- function(s, i) {
+  .Call(`_arrow_StructScalar__field`, s, i)
 }
 
-StructScalar__GetFieldByName <- function(s, name){
-    .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
+StructScalar__GetFieldByName <- function(s, name) {
+  .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
 }
 
-Scalar__as_vector <- function(scalar){
-    .Call(`_arrow_Scalar__as_vector`, scalar)
+Scalar__as_vector <- function(scalar) {
+  .Call(`_arrow_Scalar__as_vector`, scalar)
 }
 
-MakeArrayFromScalar <- function(scalar, n){
-    .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
+MakeArrayFromScalar <- function(scalar, n) {
+  .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
 }
 
-Scalar__is_valid <- function(s){
-    .Call(`_arrow_Scalar__is_valid`, s)
+Scalar__is_valid <- function(s) {
+  .Call(`_arrow_Scalar__is_valid`, s)
 }
 
-Scalar__type <- function(s){
-    .Call(`_arrow_Scalar__type`, s)
+Scalar__type <- function(s) {
+  .Call(`_arrow_Scalar__type`, s)
 }
 
-Scalar__Equals <- function(lhs, rhs){
-    .Call(`_arrow_Scalar__Equals`, lhs, rhs)
+Scalar__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__Equals`, lhs, rhs)
 }
 
-Scalar__ApproxEquals <- function(lhs, rhs){
-    .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
+Scalar__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
 }
 
-schema_ <- function(fields){
-    .Call(`_arrow_schema_`, fields)
+schema_ <- function(fields) {
+  .Call(`_arrow_schema_`, fields)
 }
 
-Schema__ToString <- function(s){
-    .Call(`_arrow_Schema__ToString`, s)
+Schema__ToString <- function(s) {
+  .Call(`_arrow_Schema__ToString`, s)
 }
 
-Schema__num_fields <- function(s){
-    .Call(`_arrow_Schema__num_fields`, s)
+Schema__num_fields <- function(s) {
+  .Call(`_arrow_Schema__num_fields`, s)
 }
 
-Schema__field <- function(s, i){
-    .Call(`_arrow_Schema__field`, s, i)
+Schema__field <- function(s, i) {
+  .Call(`_arrow_Schema__field`, s, i)
 }
 
-Schema__AddField <- function(s, i, field){
-    .Call(`_arrow_Schema__AddField`, s, i, field)
+Schema__AddField <- function(s, i, field) {
+  .Call(`_arrow_Schema__AddField`, s, i, field)
 }
 
-Schema__SetField <- function(s, i, field){
-    .Call(`_arrow_Schema__SetField`, s, i, field)
+Schema__SetField <- function(s, i, field) {
+  .Call(`_arrow_Schema__SetField`, s, i, field)
 }
 
-Schema__RemoveField <- function(s, i){
-    .Call(`_arrow_Schema__RemoveField`, s, i)
+Schema__RemoveField <- function(s, i) {
+  .Call(`_arrow_Schema__RemoveField`, s, i)
 }
 
-Schema__GetFieldByName <- function(s, x){
-    .Call(`_arrow_Schema__GetFieldByName`, s, x)
+Schema__GetFieldByName <- function(s, x) {
+  .Call(`_arrow_Schema__GetFieldByName`, s, x)
 }
 
-Schema__fields <- function(schema){
-    .Call(`_arrow_Schema__fields`, schema)
+Schema__fields <- function(schema) {
+  .Call(`_arrow_Schema__fields`, schema)
 }
 
-Schema__field_names <- function(schema){
-    .Call(`_arrow_Schema__field_names`, schema)
+Schema__field_names <- function(schema) {
+  .Call(`_arrow_Schema__field_names`, schema)
 }
 
-Schema__HasMetadata <- function(schema){
-    .Call(`_arrow_Schema__HasMetadata`, schema)
+Schema__HasMetadata <- function(schema) {
+  .Call(`_arrow_Schema__HasMetadata`, schema)
 }
 
-Schema__metadata <- function(schema){
-    .Call(`_arrow_Schema__metadata`, schema)
+Schema__metadata <- function(schema) {
+  .Call(`_arrow_Schema__metadata`, schema)
 }
 
-Schema__WithMetadata <- function(schema, metadata){
-    .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+Schema__WithMetadata <- function(schema, metadata) {
+  .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
 }
 
-Schema__serialize <- function(schema){
-    .Call(`_arrow_Schema__serialize`, schema)
+Schema__serialize <- function(schema) {
+  .Call(`_arrow_Schema__serialize`, schema)
 }
 
-Schema__Equals <- function(schema, other, check_metadata){
-    .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+Schema__Equals <- function(schema, other, check_metadata) {
+  .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
 }
 
-arrow__UnifySchemas <- function(schemas){
-    .Call(`_arrow_arrow__UnifySchemas`, schemas)
+arrow__UnifySchemas <- function(schemas) {
+  .Call(`_arrow_arrow__UnifySchemas`, schemas)
 }
 
-Table__num_columns <- function(x){
-    .Call(`_arrow_Table__num_columns`, x)
+Table__num_columns <- function(x) {
+  .Call(`_arrow_Table__num_columns`, x)
 }
 
-Table__num_rows <- function(x){
-    .Call(`_arrow_Table__num_rows`, x)
+Table__num_rows <- function(x) {
+  .Call(`_arrow_Table__num_rows`, x)
 }
 
-Table__schema <- function(x){
-    .Call(`_arrow_Table__schema`, x)
+Table__schema <- function(x) {
+  .Call(`_arrow_Table__schema`, x)
 }
 
-Table__ReplaceSchemaMetadata <- function(x, metadata){
-    .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+Table__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
 }
 
-Table__column <- function(table, i){
-    .Call(`_arrow_Table__column`, table, i)
+Table__column <- function(table, i) {
+  .Call(`_arrow_Table__column`, table, i)
 }
 
-Table__field <- function(table, i){
-    .Call(`_arrow_Table__field`, table, i)
+Table__field <- function(table, i) {
+  .Call(`_arrow_Table__field`, table, i)
 }
 
-Table__columns <- function(table){
-    .Call(`_arrow_Table__columns`, table)
+Table__columns <- function(table) {
+  .Call(`_arrow_Table__columns`, table)
 }
 
-Table__ColumnNames <- function(table){
-    .Call(`_arrow_Table__ColumnNames`, table)
+Table__ColumnNames <- function(table) {
+  .Call(`_arrow_Table__ColumnNames`, table)
 }
 
-Table__RenameColumns <- function(table, names){
-    .Call(`_arrow_Table__RenameColumns`, table, names)
+Table__RenameColumns <- function(table, names) {
+  .Call(`_arrow_Table__RenameColumns`, table, names)
 }
 
-Table__Slice1 <- function(table, offset){
-    .Call(`_arrow_Table__Slice1`, table, offset)
+Table__Slice1 <- function(table, offset) {
+  .Call(`_arrow_Table__Slice1`, table, offset)
 }
 
-Table__Slice2 <- function(table, offset, length){
-    .Call(`_arrow_Table__Slice2`, table, offset, length)
+Table__Slice2 <- function(table, offset, length) {
+  .Call(`_arrow_Table__Slice2`, table, offset, length)
 }
 
-Table__Equals <- function(lhs, rhs, check_metadata){
-    .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+Table__Equals <- function(lhs, rhs, check_metadata) {
+  .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
 }
 
-Table__Validate <- function(table){
-    .Call(`_arrow_Table__Validate`, table)
+Table__Validate <- function(table) {
+  .Call(`_arrow_Table__Validate`, table)
 }
 
-Table__ValidateFull <- function(table){
-    .Call(`_arrow_Table__ValidateFull`, table)
+Table__ValidateFull <- function(table) {
+  .Call(`_arrow_Table__ValidateFull`, table)
 }
 
-Table__GetColumnByName <- function(table, name){
-    .Call(`_arrow_Table__GetColumnByName`, table, name)
+Table__GetColumnByName <- function(table, name) {
+  .Call(`_arrow_Table__GetColumnByName`, table, name)
 }
 
-Table__RemoveColumn <- function(table, i){
-    .Call(`_arrow_Table__RemoveColumn`, table, i)
+Table__RemoveColumn <- function(table, i) {
+  .Call(`_arrow_Table__RemoveColumn`, table, i)
 }
 
-Table__AddColumn <- function(table, i, field, column){
-    .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+Table__AddColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__AddColumn`, table, i, field, column)
 }
 
-Table__SetColumn <- function(table, i, field, column){
-    .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+Table__SetColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__SetColumn`, table, i, field, column)
 }
 
-Table__SelectColumns <- function(table, indices){
-    .Call(`_arrow_Table__SelectColumns`, table, indices)
+Table__SelectColumns <- function(table, indices) {
+  .Call(`_arrow_Table__SelectColumns`, table, indices)
 }
 
-all_record_batches <- function(lst){
-    .Call(`_arrow_all_record_batches`, lst)
+all_record_batches <- function(lst) {
+  .Call(`_arrow_all_record_batches`, lst)
 }
 
-Table__from_record_batches <- function(batches, schema_sxp){
-    .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+Table__from_record_batches <- function(batches, schema_sxp) {
+  .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
 }
 
-GetCpuThreadPoolCapacity <- function(){
-    .Call(`_arrow_GetCpuThreadPoolCapacity`)
+GetCpuThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetCpuThreadPoolCapacity`)
 }
 
-SetCpuThreadPoolCapacity <- function(threads){
-    invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+SetCpuThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
 }
 
-GetIOThreadPoolCapacity <- function(){
-    .Call(`_arrow_GetIOThreadPoolCapacity`)
+GetIOThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetIOThreadPoolCapacity`)
 }
 
-SetIOThreadPoolCapacity <- function(threads){
-    invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+SetIOThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
 }
 
-Array__infer_type <- function(x){
-    .Call(`_arrow_Array__infer_type`, x)
+Array__infer_type <- function(x) {
+  .Call(`_arrow_Array__infer_type`, x)
 }
-
-
-
diff --git a/r/R/buffer.R b/r/R/buffer.R
index 8ea0d74cdae..a9424fd0da1 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -28,7 +28,7 @@
 #' - `$is_mutable` : is this buffer mutable?
 #' - `$ZeroPadding()` : zero bytes in padding, i.e. bytes between size and capacity
 #' - `$size` : size in memory, in bytes
-#' - `$capacity`: possible capacity, in bytes 
+#' - `$capacity`: possible capacity, in bytes
 #'
 #' @rdname buffer
 #' @name buffer
@@ -41,7 +41,8 @@
 #' @export
 #' @include arrow-package.R
 #' @include enums.R
-Buffer <- R6Class("Buffer", inherit = ArrowObject,
+Buffer <- R6Class("Buffer",
+  inherit = ArrowObject,
   public = list(
     ZeroPadding = function() Buffer__ZeroPadding(self),
     data = function() Buffer__data(self),
@@ -49,7 +50,6 @@ Buffer <- R6Class("Buffer", inherit = ArrowObject,
       inherits(other, "Buffer") && Buffer__Equals(self, other)
     }
   ),
-
   active = list(
     is_mutable = function() Buffer__is_mutable(self),
     size = function() Buffer__size(self),
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index 9465147a8ce..597180ea738 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -77,7 +77,8 @@
 #' doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
 #' doubles$type
 #' @export
-ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum,
+ChunkedArray <- R6Class("ChunkedArray",
+  inherit = ArrowDatum,
   public = list(
     length = function() ChunkedArray__length(self),
     type_id = function() ChunkedArray__type(self)$id,
diff --git a/r/R/compression.R b/r/R/compression.R
index 499a75c83e1..7107012d031 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -38,7 +38,8 @@
 #' @rdname Codec
 #' @name Codec
 #' @export
-Codec <- R6Class("Codec", inherit = ArrowObject,
+Codec <- R6Class("Codec",
+  inherit = ArrowObject,
   active = list(
     name = function() util___Codec__name(self),
     level = function() abort("Codec$level() not yet implemented")
@@ -70,7 +71,7 @@ codec_is_available <- function(type) {
 }
 
 compression_from_name <- function(name) {
-  map_int(name, ~CompressionType[[match.arg(toupper(.x), names(CompressionType))]])
+  map_int(name, ~ CompressionType[[match.arg(toupper(.x), names(CompressionType))]])
 }
 
 #' @title Compressed stream classes
diff --git a/r/R/compute.R b/r/R/compute.R
index 2544471aaf6..5c27e206d7e 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -30,7 +30,8 @@
 #' @details When passing indices in `...`, `args`, or `options`, express them as
 #' 0-based integers (consistent with C++).
 #' @return An `Array`, `ChunkedArray`, `Scalar`, `RecordBatch`, or `Table`, whatever the compute function results in.
-#' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for the functions and their respective options.
+#' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for
+#'   the functions and their respective options.
 #' @examplesIf arrow_available()
 #' a <- Array$create(c(1L, 2L, 3L, NA, 5L))
 #' s <- Scalar$create(4L)
@@ -47,11 +48,15 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
   assert_that(is.list(options), !is.null(names(options)))
 
   datum_classes <- c("Array", "ChunkedArray", "RecordBatch", "Table", "Scalar")
-  valid_args <- map_lgl(args, ~inherits(., datum_classes))
+  valid_args <- map_lgl(args, ~ inherits(., datum_classes))
   if (!all(valid_args)) {
     # Lame, just pick one to report
     first_bad <- min(which(!valid_args))
-    stop("Argument ", first_bad, " is of class ", head(class(args[[first_bad]]), 1), " but it must be one of ", oxford_paste(datum_classes, "or"), call. = FALSE)
+    stop(
+      "Argument ", first_bad, " is of class ", head(class(args[[first_bad]]), 1),
+      " but it must be one of ", oxford_paste(datum_classes, "or"),
+      call. = FALSE
+    )
   }
 
   compute__CallFunction(function_name, args, options)
@@ -84,7 +89,7 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
 #' @param ... Additional parameters passed to `grep()`
 #' @return A character vector of available Arrow C++ function names
 #' @examplesIf arrow_available()
-#' list_compute_functions() 
+#' list_compute_functions()
 #' list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 #' list_compute_functions(pattern = "^is", invert = TRUE)
 #' @export
@@ -119,8 +124,8 @@ max.ArrowDatum <- function(..., na.rm = FALSE) {
 scalar_aggregate <- function(FUN, ..., na.rm = FALSE, na.min_count = 0) {
   a <- collect_arrays_from_dots(list(...))
   if (!na.rm) {
-    # When not removing null values, we require all values to be not null and 
-    # return null otherwise. We do that by setting minimum count of non-null 
+    # When not removing null values, we require all values to be not null and
+    # return null otherwise. We do that by setting minimum count of non-null
     # option values to the full array length.
     na.min_count <- length(a)
   }
@@ -230,28 +235,28 @@ all.ArrowDatum <- function(..., na.rm = FALSE) {
 #' match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
 #'
 #' is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
-#' 
-#' # Although there are multiple matches, you are returned the index of the first 
+#'
+#' # Although there are multiple matches, you are returned the index of the first
 #' # match, as with the base R equivalent
 #' match(4, mtcars$cyl) # 1-indexed
 #' match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
-#' 
-#' # If `x` contains multiple values, you are returned the indices of the first 
+#'
+#' # If `x` contains multiple values, you are returned the indices of the first
 #' # match for each value.
 #' match(c(4, 6, 8), mtcars$cyl)
 #' match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
-#' 
+#'
 #' # Return type matches type of `x`
 #' is_in(c(4, 6, 8), mtcars$cyl) # returns vector
 #' is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
 #' is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
 #' is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
 #' @export
-match_arrow <- function(x, table, ...)  {
+match_arrow <- function(x, table, ...) {
   if (!inherits(x, "ArrowDatum")) {
     x <- Array$create(x)
   }
-  
+
   if (!inherits(table, c("Array", "ChunkedArray"))) {
     table <- Array$create(table)
   }
@@ -261,11 +266,10 @@ match_arrow <- function(x, table, ...)  {
 #' @rdname match_arrow
 #' @export
 is_in <- function(x, table, ...) {
-  
   if (!inherits(x, "ArrowDatum")) {
     x <- Array$create(x)
   }
-  
+
   if (!inherits(table, c("Array", "DictionaryArray", "ChunkedArray"))) {
     table <- Array$create(table)
   }
diff --git a/r/R/csv.R b/r/R/csv.R
index 1312a2676ae..9206a04ff5d 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -130,13 +130,13 @@
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
 #' @export
 #' @examplesIf arrow_available()
-#'   tf <- tempfile()
-#'   on.exit(unlink(tf))
-#'   write.csv(mtcars, file = tf)
-#'   df <- read_csv_arrow(tf)
-#'   dim(df)
-#'   # Can select columns
-#'   df <- read_csv_arrow(tf, col_select = starts_with("d"))
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write.csv(mtcars, file = tf)
+#' df <- read_csv_arrow(tf)
+#' dim(df)
+#' # Can select columns
+#' df <- read_csv_arrow(tf, col_select = starts_with("d"))
 read_delim_arrow <- function(file,
                              delim = ",",
                              quote = '"',
@@ -226,7 +226,6 @@ read_csv_arrow <- function(file,
                            read_options = NULL,
                            as_data_frame = TRUE,
                            timestamp_parsers = NULL) {
-
   mc <- match.call()
   mc$delim <- ","
   mc[[1]] <- get("read_delim_arrow", envir = asNamespace("arrow"))
@@ -252,7 +251,6 @@ read_tsv_arrow <- function(file,
                            read_options = NULL,
                            as_data_frame = TRUE,
                            timestamp_parsers = NULL) {
-
   mc <- match.call()
   mc$delim <- "\t"
   mc[[1]] <- get("read_delim_arrow", envir = asNamespace("arrow"))
@@ -285,7 +283,8 @@ read_tsv_arrow <- function(file,
 #'
 #' @include arrow-package.R
 #' @export
-CsvTableReader <- R6Class("CsvTableReader", inherit = ArrowObject,
+CsvTableReader <- R6Class("CsvTableReader",
+  inherit = ArrowObject,
   public = list(
     Read = function() csv___TableReader__Read(self)
   )
@@ -379,11 +378,11 @@ CsvTableReader$create <- function(file,
 #' `TimestampParser$create()` takes an optional `format` string argument.
 #' See [`strptime()`][base::strptime()] for example syntax.
 #' The default is to use an ISO-8601 format parser.
-#' 
+#'
 #' The `CsvWriteOptions$create()` factory method takes the following arguments:
 #' - `include_header` Whether to write an initial header line with column names
 #' - `batch_size` Maximum number of rows processed at a time. Default is 1024.
-#' 
+#'
 #' @section Active bindings:
 #'
 #' - `column_names`: from `CsvReadOptions`
@@ -447,10 +446,9 @@ CsvParseOptions$create <- function(delimiter = ",",
                                    quote_char = '"',
                                    double_quote = TRUE,
                                    escaping = FALSE,
-                                   escape_char = '\\',
+                                   escape_char = "\\",
                                    newlines_in_values = FALSE,
                                    ignore_empty_lines = TRUE) {
-
   csv___ParseOptions__initialize(
     list(
       delimiter = delimiter,
@@ -478,7 +476,7 @@ readr_to_csv_parse_options <- function(delim = ",",
     quote_char = quote,
     double_quote = escape_double,
     escaping = escape_backslash,
-    escape_char = '\\',
+    escape_char = "\\",
     newlines_in_values = escape_backslash,
     ignore_empty_lines = skip_empty_rows
   )
@@ -489,7 +487,8 @@ readr_to_csv_parse_options <- function(delim = ",",
 #' @format NULL
 #' @docType class
 #' @export
-TimestampParser <- R6Class("TimestampParser", inherit = ArrowObject,
+TimestampParser <- R6Class("TimestampParser",
+  inherit = ArrowObject,
   public = list(
     kind = function() TimestampParser__kind(self),
     format = function() TimestampParser__format(self)
@@ -512,7 +511,7 @@ CsvConvertOptions <- R6Class("CsvConvertOptions", inherit = ArrowObject)
 CsvConvertOptions$create <- function(check_utf8 = TRUE,
                                      null_values = c("", "NA"),
                                      true_values = c("T", "true", "TRUE"),
-                                     false_values= c("F", "false", "FALSE"),
+                                     false_values = c("F", "false", "FALSE"),
                                      strings_can_be_null = FALSE,
                                      col_types = NULL,
                                      auto_dict_encode = FALSE,
@@ -520,7 +519,6 @@ CsvConvertOptions$create <- function(check_utf8 = TRUE,
                                      include_columns = character(),
                                      include_missing_columns = FALSE,
                                      timestamp_parsers = NULL) {
-
   if (!is.null(col_types) && !inherits(col_types, "Schema")) {
     abort(c(
       "Unsupported `col_types` specification.",
@@ -562,25 +560,25 @@ readr_to_csv_convert_options <- function(na,
       abort("Compact specification for `col_types` requires `col_names`")
     }
 
-    col_types <- set_names(nm = col_names, map2(specs, col_names, ~{
+    col_types <- set_names(nm = col_names, map2(specs, col_names, ~ {
       switch(.x,
-             "c" = utf8(),
-             "i" = int32(),
-             "n" = float64(),
-             "d" = float64(),
-             "l" = bool(),
-             "f" = dictionary(),
-             "D" = date32(),
-             "T" = time32(),
-             "t" = timestamp(),
-             "_" = null(),
-             "-" = null(),
-             "?" = NULL,
-             abort("Unsupported compact specification: '", .x,"' for column '", .y, "'")
+        "c" = utf8(),
+        "i" = int32(),
+        "n" = float64(),
+        "d" = float64(),
+        "l" = bool(),
+        "f" = dictionary(),
+        "D" = date32(),
+        "T" = time32(),
+        "t" = timestamp(),
+        "_" = null(),
+        "-" = null(),
+        "?" = NULL,
+        abort("Unsupported compact specification: '", .x, "' for column '", .y, "'")
       )
     }))
     # To "guess" types, omit them from col_types
-    col_types <- keep(col_types, ~!is.null(.x))
+    col_types <- keep(col_types, ~ !is.null(.x))
     col_types <- schema(!!!col_types)
   }
 
@@ -588,7 +586,7 @@ readr_to_csv_convert_options <- function(na,
     assert_is(col_types, "Schema")
     # If any columns are null(), drop them
     # (by specifying the other columns in include_columns)
-    nulls <- map_lgl(col_types$fields, ~.$type$Equals(null()))
+    nulls <- map_lgl(col_types$fields, ~ .$type$Equals(null()))
     if (any(nulls)) {
       include_columns <- setdiff(col_names, names(col_types)[nulls])
     }
@@ -622,26 +620,25 @@ write_csv_arrow <- function(x,
                             sink,
                             include_header = TRUE,
                             batch_size = 1024L) {
-  
   write_options <- CsvWriteOptions$create(include_header, batch_size)
-  
+
   x_out <- x
   if (is.data.frame(x)) {
     x <- Table$create(x)
   }
-  
+
   assert_that(is_writable_table(x))
-  
+
   if (!inherits(sink, "OutputStream")) {
     sink <- make_output_stream(sink)
     on.exit(sink$close())
   }
-  
+
   if (inherits(x, "RecordBatch")) {
     csv___WriteCSV__RecordBatch(x, write_options, sink)
   } else if (inherits(x, "Table")) {
     csv___WriteCSV__Table(x, write_options, sink)
   }
-  
+
   invisible(x_out)
 }
diff --git a/r/R/dataset-factory.R b/r/R/dataset-factory.R
index 0e029cb74bd..c56a6b18106 100644
--- a/r/R/dataset-factory.R
+++ b/r/R/dataset-factory.R
@@ -21,7 +21,8 @@
 #' @format NULL
 #' @rdname Dataset
 #' @export
-DatasetFactory <- R6Class("DatasetFactory", inherit = ArrowObject,
+DatasetFactory <- R6Class("DatasetFactory",
+  inherit = ArrowObject,
   public = list(
     Finish = function(schema = NULL, unify_schemas = FALSE) {
       if (is.null(schema)) {
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index 569033ff7f8..983b6f614a7 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -60,13 +60,14 @@
 #' dir.create(tf)
 #' on.exit(unlink(tf))
 #' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
-#' 
+#'
 #' # Create FileFormat object
 #' format <- FileFormat$create(format = "text", delimiter = ";")
-#' 
+#'
 #' open_dataset(tf, format = format)
 #' @export
-FileFormat <- R6Class("FileFormat", inherit = ArrowObject,
+FileFormat <- R6Class("FileFormat",
+  inherit = ArrowObject,
   active = list(
     # @description
     # Return the `FileFormat`'s type
@@ -102,8 +103,8 @@ as.character.FileFormat <- function(x, ...) {
 ParquetFileFormat <- R6Class("ParquetFileFormat", inherit = FileFormat)
 ParquetFileFormat$create <- function(...,
                                      dict_columns = character(0)) {
- options <- ParquetFragmentScanOptions$create(...)
- dataset___ParquetFileFormat__Make(options, dict_columns)
+  options <- ParquetFragmentScanOptions$create(...)
+  dataset___ParquetFileFormat__Make(options, dict_columns)
 }
 
 #' @usage NULL
@@ -118,8 +119,8 @@ IpcFileFormat <- R6Class("IpcFileFormat", inherit = FileFormat)
 #' @export
 CsvFileFormat <- R6Class("CsvFileFormat", inherit = FileFormat)
 CsvFileFormat$create <- function(..., opts = csv_file_format_parse_options(...),
-                                 convert_options = csv_file_format_convert_options(...),
-                                 read_options = csv_file_format_read_options(...)) {
+                                 convert_options = csv_file_format_convert_opts(...),
+                                 read_options = csv_file_format_read_opts(...)) {
   dataset___CsvFileFormat__Make(opts, convert_options, read_options)
 }
 
@@ -174,18 +175,20 @@ csv_file_format_parse_options <- function(...) {
   ambig_opts <- opt_names[is_ambig_opt]
   if (length(ambig_opts)) {
     stop("Ambiguous ",
-         ngettext(length(ambig_opts), "option", "options"),
-         ": ",
-         oxford_paste(ambig_opts),
-         ". Use full argument names",
-         call. = FALSE)
+      ngettext(length(ambig_opts), "option", "options"),
+      ": ",
+      oxford_paste(ambig_opts),
+      ". Use full argument names",
+      call. = FALSE
+    )
   }
   if (any(is_readr_opt)) {
     # Catch cases when the user specifies a mix of Arrow C++ options and
     # readr-style options
     if (!all(is_readr_opt)) {
       stop("Use either Arrow parse options or readr parse options, not both",
-           call. = FALSE)
+        call. = FALSE
+      )
     }
     do.call(readr_to_csv_parse_options, opts) # all options have readr-style names
   } else {
@@ -193,7 +196,7 @@ csv_file_format_parse_options <- function(...) {
   }
 }
 
-csv_file_format_convert_options <- function(...) {
+csv_file_format_convert_opts <- function(...) {
   opts <- list(...)
   # Filter out arguments meant for CsvParseOptions/CsvReadOptions
   arrow_opts <- names(formals(CsvParseOptions$create))
@@ -205,7 +208,7 @@ csv_file_format_convert_options <- function(...) {
   do.call(CsvConvertOptions$create, opts)
 }
 
-csv_file_format_read_options <- function(...) {
+csv_file_format_read_opts <- function(...) {
   opts <- list(...)
   # Filter out arguments meant for CsvParseOptions/CsvConvertOptions
   arrow_opts <- names(formals(CsvParseOptions$create))
@@ -247,7 +250,8 @@ csv_file_format_read_options <- function(...) {
 #' @rdname FragmentScanOptions
 #' @name FragmentScanOptions
 #' @export
-FragmentScanOptions <- R6Class("FragmentScanOptions", inherit = ArrowObject,
+FragmentScanOptions <- R6Class("FragmentScanOptions",
+  inherit = ArrowObject,
   active = list(
     # @description
     # Return the `FragmentScanOptions`'s type
@@ -255,7 +259,6 @@ FragmentScanOptions <- R6Class("FragmentScanOptions", inherit = ArrowObject,
   )
 )
 FragmentScanOptions$create <- function(format, ...) {
-  opt_names <- names(list(...))
   if (format %in% c("csv", "text", "tsv")) {
     CsvFragmentScanOptions$create(...)
   } else if (format == "parquet") {
@@ -276,8 +279,8 @@ as.character.FragmentScanOptions <- function(x, ...) {
 #' @export
 CsvFragmentScanOptions <- R6Class("CsvFragmentScanOptions", inherit = FragmentScanOptions)
 CsvFragmentScanOptions$create <- function(...,
-                                          convert_opts = csv_file_format_convert_options(...),
-                                          read_opts = csv_file_format_read_options(...)) {
+                                          convert_opts = csv_file_format_convert_opts(...),
+                                          read_opts = csv_file_format_read_opts(...)) {
   dataset___CsvFragmentScanOptions__Make(convert_opts, read_opts)
 }
 
@@ -296,28 +299,37 @@ ParquetFragmentScanOptions$create <- function(use_buffered_stream = FALSE,
 #'
 #' @description
 #' A `FileWriteOptions` holds write options specific to a `FileFormat`.
-FileWriteOptions <- R6Class("FileWriteOptions", inherit = ArrowObject,
+FileWriteOptions <- R6Class("FileWriteOptions",
+  inherit = ArrowObject,
   public = list(
     update = function(table, ...) {
       if (self$type == "parquet") {
-        dataset___ParquetFileWriteOptions__update(self,
-            ParquetWriterProperties$create(table, ...),
-            ParquetArrowWriterProperties$create(...))
+        dataset___ParquetFileWriteOptions__update(
+          self,
+          ParquetWriterProperties$create(table, ...),
+          ParquetArrowWriterProperties$create(...)
+        )
       } else if (self$type == "ipc") {
         args <- list(...)
         if (is.null(args$codec)) {
-          dataset___IpcFileWriteOptions__update1(self,
-              get_ipc_use_legacy_format(args$use_legacy_format),
-              get_ipc_metadata_version(args$metadata_version))
+          dataset___IpcFileWriteOptions__update1(
+            self,
+            get_ipc_use_legacy_format(args$use_legacy_format),
+            get_ipc_metadata_version(args$metadata_version)
+          )
         } else {
-          dataset___IpcFileWriteOptions__update2(self,
-              get_ipc_use_legacy_format(args$use_legacy_format),
-              args$codec,
-              get_ipc_metadata_version(args$metadata_version))
+          dataset___IpcFileWriteOptions__update2(
+            self,
+            get_ipc_use_legacy_format(args$use_legacy_format),
+            args$codec,
+            get_ipc_metadata_version(args$metadata_version)
+          )
         }
       } else if (self$type == "csv") {
-          dataset___CsvFileWriteOptions__update(self,
-              CsvWriteOptions$create(...))
+        dataset___CsvFileWriteOptions__update(
+          self,
+          CsvWriteOptions$create(...)
+        )
       }
       invisible(self)
     }
diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R
index 6e29e4ea31c..35d5bc00cd1 100644
--- a/r/R/dataset-partition.R
+++ b/r/R/dataset-partition.R
@@ -75,8 +75,9 @@ DirectoryPartitioning$create <- function(schm, segment_encoding = "uri") {
 HivePartitioning <- R6Class("HivePartitioning", inherit = Partitioning)
 HivePartitioning$create <- function(schm, null_fallback = NULL, segment_encoding = "uri") {
   dataset___HivePartitioning(schm,
-                             null_fallback = null_fallback_or_default(null_fallback),
-                             segment_encoding = segment_encoding)
+    null_fallback = null_fallback_or_default(null_fallback),
+    segment_encoding = segment_encoding
+  )
 }
 
 #' Construct Hive partitioning
diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R
index 4fc73485e3a..66ecfa7a429 100644
--- a/r/R/dataset-scan.R
+++ b/r/R/dataset-scan.R
@@ -56,7 +56,8 @@
 #' @rdname Scanner
 #' @name Scanner
 #' @export
-Scanner <- R6Class("Scanner", inherit = ArrowObject,
+Scanner <- R6Class("Scanner",
+  inherit = ArrowObject,
   public = list(
     ToTable = function() dataset___Scanner__ToTable(self),
     ScanBatches = function() dataset___Scanner__ScanBatches(self),
@@ -76,7 +77,7 @@ Scanner$create <- function(dataset,
                            fragment_scan_options = NULL,
                            ...) {
   if (is.null(use_async)) {
-    use_async = getOption("arrow.use_async", FALSE)
+    use_async <- getOption("arrow.use_async", FALSE)
   }
 
   if (inherits(dataset, "arrow_dplyr_query")) {
@@ -125,7 +126,8 @@ Scanner$create <- function(dataset,
 #' @export
 names.Scanner <- function(x) names(x$schema)
 
-ScanTask <- R6Class("ScanTask", inherit = ArrowObject,
+ScanTask <- R6Class("ScanTask",
+  inherit = ArrowObject,
   public = list(
     Execute = function() dataset___ScanTask__get_batches(self)
   )
@@ -155,9 +157,7 @@ map_batches <- function(X, FUN, ..., .data.frame = TRUE) {
   }
   scanner <- Scanner$create(ensure_group_vars(X))
   FUN <- as_mapper(FUN)
-  # message("Making ScanTasks")
   lapply(scanner$ScanBatches(), function(batch) {
-    # message("Processing Batch")
     # TODO: wrap batch in arrow_dplyr_query with X$selected_columns,
     # X$temp_columns, and X$group_by_vars
     # if X is arrow_dplyr_query, if some other arg (.dplyr?) == TRUE
@@ -169,7 +169,8 @@ map_batches <- function(X, FUN, ..., .data.frame = TRUE) {
 #' @format NULL
 #' @rdname Scanner
 #' @export
-ScannerBuilder <- R6Class("ScannerBuilder", inherit = ArrowObject,
+ScannerBuilder <- R6Class("ScannerBuilder",
+  inherit = ArrowObject,
   public = list(
     Project = function(cols) {
       # cols is either a character vector or a named list of Expressions
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 2cc7201b839..9deb0233d58 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -86,6 +86,8 @@ write_dataset <- function(dataset,
   path_and_fs <- get_path_and_filesystem(path)
   options <- FileWriteOptions$create(format, table = scanner, ...)
 
-  dataset___Dataset__Write(options, path_and_fs$fs, path_and_fs$path,
-                           partitioning, basename_template, scanner)
+  dataset___Dataset__Write(
+    options, path_and_fs$fs, path_and_fs$path,
+    partitioning, basename_template, scanner
+  )
 }
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 6706b48ecc4..eb7cfaf4f44 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -81,21 +81,21 @@
 #' @export
 #' @seealso `vignette("dataset", package = "arrow")`
 #' @include arrow-package.R
-#' @examplesIf arrow_with_dataset() & arrow_with_parquet() 
+#' @examplesIf arrow_with_dataset() & arrow_with_parquet()
 #' # Set up directory for examples
 #' tf <- tempfile()
 #' dir.create(tf)
 #' on.exit(unlink(tf))
-#' 
+#'
 #' data <- dplyr::group_by(mtcars, cyl)
 #' write_dataset(data, tf)
-#' 
+#'
 #' # You can specify a directory containing the files for your dataset and
 #' # open_dataset will scan all files in your directory.
 #' open_dataset(tf)
-#' 
+#'
 #' # You can also supply a vector of paths
-#' open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf,"cyl=8/part-2.parquet")))
+#' open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf, "cyl=8/part-2.parquet")))
 #'
 #' ## You must specify the file format if using a format other than parquet.
 #' tf2 <- tempfile()
@@ -103,28 +103,30 @@
 #' on.exit(unlink(tf2))
 #' write_dataset(data, tf2, format = "ipc")
 #' # This line will results in errors when you try to work with the data
-#' \dontrun{open_dataset(tf2)}
+#' \dontrun{
+#' open_dataset(tf2)
+#' }
 #' # This line will work
-#' open_dataset(tf2, format = "ipc") 
-#' 
+#' open_dataset(tf2, format = "ipc")
+#'
 #' ## You can specify file partitioning to include it as a field in your dataset
 #' # Create a temporary directory and write example dataset
 #' tf3 <- tempfile()
 #' dir.create(tf3)
 #' on.exit(unlink(tf3))
 #' write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
-#' 
-#' # View files - you can see the partitioning means that files have been written 
+#'
+#' # View files - you can see the partitioning means that files have been written
 #' # to folders based on Month/Day values
 #' list.files(tf3, recursive = TRUE)
-#' 
+#'
 #' # With no partitioning specified, dataset contains all files but doesn't include
 #' # directory names as field names
 #' open_dataset(tf3)
-#' 
+#'
 #' # Now that partitioning has been specified, your dataset contains columns for Month and Day
 #' open_dataset(tf3, partitioning = c("Month", "Day"))
-#' 
+#'
 #' # If you want to specify the data types for your fields, you can pass in a Schema
 #' open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
 open_dataset <- function(sources,
@@ -137,7 +139,7 @@ open_dataset <- function(sources,
     if (is.null(schema)) {
       if (is.null(unify_schemas) || isTRUE(unify_schemas)) {
         # Default is to unify schemas here
-        schema <- unify_schemas(schemas = map(sources, ~.$schema))
+        schema <- unify_schemas(schemas = map(sources, ~ .$schema))
       } else {
         # Take the first one.
         schema <- sources[[1]]$schema
@@ -151,12 +153,12 @@ open_dataset <- function(sources,
     })
     return(dataset___UnionDataset__create(sources, schema))
   }
-  
+
   factory <- DatasetFactory$create(sources, partitioning = partitioning, format = format, ...)
   tryCatch(
     # Default is _not_ to inspect/unify schemas
     factory$Finish(schema, isTRUE(unify_schemas)),
-    error = function(e){
+    error = function(e) {
       handle_parquet_io_error(e, format)
     }
   )
@@ -220,7 +222,8 @@ open_dataset <- function(sources,
 #'
 #' @export
 #' @seealso [open_dataset()] for a simple interface to creating a `Dataset`
-Dataset <- R6Class("Dataset", inherit = ArrowObject,
+Dataset <- R6Class("Dataset",
+  inherit = ArrowObject,
   public = list(
     # @description
     # Start a new scan of the data
@@ -250,7 +253,8 @@ Dataset$create <- open_dataset
 #' @name FileSystemDataset
 #' @rdname Dataset
 #' @export
-FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset,
+FileSystemDataset <- R6Class("FileSystemDataset",
+  inherit = Dataset,
   public = list(
     .class_title = function() {
       nfiles <- length(self$files)
@@ -289,7 +293,8 @@ FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset,
 #' @name UnionDataset
 #' @rdname Dataset
 #' @export
-UnionDataset <- R6Class("UnionDataset", inherit = Dataset,
+UnionDataset <- R6Class("UnionDataset",
+  inherit = Dataset,
   active = list(
     # @description
     # Return the UnionDataset's child `Dataset`s
diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R
index 5ab60abbada..345fc183295 100644
--- a/r/R/dplyr-arrange.R
+++ b/r/R/dplyr-arrange.R
@@ -41,7 +41,7 @@ arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
     exprs[[i]] <- x[["quos"]]
     sorts[[i]] <- arrow_eval(exprs[[i]], mask)
     if (inherits(sorts[[i]], "try-error")) {
-      msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
+      msg <- paste("Expression", as_label(exprs[[i]]), "not supported in Arrow")
       return(abandon_ship(call, .data, msg))
     }
     names(sorts)[i] <- as_label(exprs[[i]])
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index 55716291dcb..cec56ab9110 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -59,4 +59,4 @@ pull.arrow_dplyr_query <- function(.data, var = -1) {
   .data$selected_columns <- set_names(.data$selected_columns[var], var)
   dplyr::collect(.data)[[1]]
 }
-pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
\ No newline at end of file
+pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index de68d2f2c4d..57497e41cd2 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-arrow_eval <- function (expr, mask) {
+arrow_eval <- function(expr, mask) {
   # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
   # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
   # .data is a Dataset).
@@ -51,10 +51,10 @@ handle_arrow_not_supported <- function(err, lab) {
   # Look for informative message from the Arrow function version (see above)
   if (inherits(err, "arrow-try-error")) {
     # Include it if found
-    paste0('In ', lab, ', ', as.character(err))
+    paste0("In ", lab, ", ", as.character(err))
   } else {
     # Otherwise be opaque (the original error is probably not useful)
-    paste('Expression', lab, 'not supported in Arrow')
+    paste("Expression", lab, "not supported in Arrow")
   }
 }
 
@@ -65,7 +65,7 @@ i18ize_error_messages <- function() {
     obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
     fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
   )
-  paste(map(out, ~sub("X_____X", ".*", .)), collapse = "|")
+  paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|")
 }
 
 # Helper to raise a common error
@@ -87,7 +87,7 @@ arrow_mask <- function(.data) {
   }
 
   # Assign the schema to the expressions
-  map(.data$selected_columns, ~(.$schema <- .data$.data$schema))
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
 
   # Add the column references and make the mask
   out <- new_data_mask(
diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R
index 3cbc34511a4..c9aa96fd5a7 100644
--- a/r/R/dplyr-filter.R
+++ b/r/R/dplyr-filter.R
@@ -29,7 +29,7 @@ filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
   .data <- arrow_dplyr_query(.data)
   # tidy-eval the filter expressions inside an Arrow data_mask
   filters <- lapply(filts, arrow_eval, arrow_mask(.data))
-  bad_filters <- map_lgl(filters, ~inherits(., "try-error"))
+  bad_filters <- map_lgl(filters, ~ inherits(., "try-error"))
   if (any(bad_filters)) {
     # This is similar to abandon_ship() except that the filter eval is
     # vectorized, and we apply filters that _did_ work before abandoning ship
@@ -81,4 +81,4 @@ set_filters <- function(.data, expressions) {
     }
   }
   .data
-}
\ No newline at end of file
+}
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 8406de1ba8f..40e4cd4776b 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -99,7 +99,7 @@ nse_funcs$is.na <- function(x) {
   # TODO: if an option is added to the is_null kernel to treat NaN as NA,
   # use that to simplify the code here (ARROW-13367)
   if (is.double(x) || (inherits(x, "Expression") &&
-      x$type_id() %in% TYPES_WITH_NAN)) {
+    x$type_id() %in% TYPES_WITH_NAN)) {
     build_expr("is_nan", x) | build_expr("is_null", x)
   } else {
     build_expr("is_null", x)
@@ -108,7 +108,7 @@ nse_funcs$is.na <- function(x) {
 
 nse_funcs$is.nan <- function(x) {
   if (is.double(x) || (inherits(x, "Expression") &&
-      x$type_id() %in% TYPES_WITH_NAN)) {
+    x$type_id() %in% TYPES_WITH_NAN)) {
     # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
     # use that to simplify the code here (ARROW-13366)
     build_expr("is_nan", x) & build_expr("is_valid", x)
@@ -379,7 +379,7 @@ nse_funcs$substr <- function(x, start, stop) {
   )
 }
 
-nse_funcs$substring <- function(text, first, last){
+nse_funcs$substring <- function(text, first, last) {
   nse_funcs$substr(x = text, start = first, stop = last)
 }
 
@@ -496,9 +496,8 @@ nse_funcs$strsplit <- function(x,
   if (fixed && perl) {
     warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
   }
-  # since split is not a regex, proceed without any warnings or errors
-  # regardless of the value of perl, for consistency with the behavior of
-  # base::strsplit()
+  # since split is not a regex, proceed without any warnings or errors regardless
+  # of the value of perl, for consistency with the behavior of base::strsplit()
   Expression$create(
     arrow_fun,
     x,
@@ -702,7 +701,6 @@ nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption
 }
 
 nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
-
   if (base == exp(1)) {
     return(Expression$create("ln_checked", x))
   }
@@ -718,7 +716,7 @@ nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
   stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
 }
 
-nse_funcs$if_else <- function(condition, true, false, missing = NULL){
+nse_funcs$if_else <- function(condition, true, false, missing = NULL) {
   if (!is.null(missing)) {
     return(nse_funcs$if_else(
       nse_funcs$is.na(condition),
@@ -731,7 +729,11 @@ nse_funcs$if_else <- function(condition, true, false, missing = NULL){
   # TODO: remove this after ARROW-13358 is merged
   warn_types <- nse_funcs$is.factor(true) | nse_funcs$is.factor(false)
   if (warn_types) {
-    warning("Dictionaries (in R: factors) are currently converted to strings (characters) in if_else and ifelse", call. = FALSE)
+    warning(
+      "Dictionaries (in R: factors) are currently converted to strings (characters) ",
+      "in if_else and ifelse",
+      call. = FALSE
+    )
   }
 
   build_expr("if_else", condition, true, false)
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index c426a66b218..42cca039022 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -31,7 +31,7 @@ group_by.arrow_dplyr_query <- function(.data,
   #   * expressions (named or otherwise)
   #   * variables that have new names
   # All others (i.e. simple references to variables) should not be (re)-added
-  new_group_ind <- map_lgl(new_groups, ~!(quo_name(.x) %in% names(.data)))
+  new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data)))
   named_group_ind <- map_lgl(names(new_groups), nzchar)
   new_groups <- new_groups[new_group_ind | named_group_ind]
   if (length(new_groups)) {
@@ -42,7 +42,7 @@ group_by.arrow_dplyr_query <- function(.data,
     .data <- dplyr::mutate(.data, !!!new_groups)
   }
   if (".add" %in% names(formals(dplyr::group_by))) {
-    # dplyr >= 1.0
+    # For compatibility with dplyr >= 1.0
     gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
   } else {
     gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
index a961fbf056c..f19505c1958 100644
--- a/r/R/dplyr-mutate.R
+++ b/r/R/dplyr-mutate.R
@@ -42,7 +42,7 @@ mutate.arrow_dplyr_query <- function(.data,
     # mutate() on a grouped dataset does calculations within groups
     # This doesn't matter on scalar ops (arithmetic etc.) but it does
     # for things with aggregations (e.g. subtracting the mean)
-    return(abandon_ship(call, .data, 'mutate() on grouped data not supported in Arrow'))
+    return(abandon_ship(call, .data, "mutate() on grouped data not supported in Arrow"))
   }
 
   # Check for unnamed expressions and fix if any
@@ -64,10 +64,10 @@ mutate.arrow_dplyr_query <- function(.data,
       )
       return(abandon_ship(call, .data, msg))
     } else if (!inherits(results[[new_var]], "Expression") &&
-               !is.null(results[[new_var]])) {
+      !is.null(results[[new_var]])) {
       # We need some wrapping to handle literal values
       if (length(results[[new_var]]) != 1) {
-        msg <- paste0('In ', new_var, " = ", as_label(exprs[[i]]), ", only values of size one are recycled")
+        msg <- paste0("In ", new_var, " = ", as_label(exprs[[i]]), ", only values of size one are recycled")
         return(abandon_ship(call, .data, msg))
       }
       results[[new_var]] <- Expression$scalar(results[[new_var]])
diff --git a/r/R/dplyr-select.R b/r/R/dplyr-select.R
index 686965a4197..ee740db4cfb 100644
--- a/r/R/dplyr-select.R
+++ b/r/R/dplyr-select.R
@@ -63,7 +63,7 @@ relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL
   .data <- arrow_dplyr_query(.data)
 
   # Assign the schema to the expressions
-  map(.data$selected_columns, ~(.$schema <- .data$.data$schema))
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
 
   # Create a mask for evaluating expressions in tidyselect helpers
   mask <- new_environment(.cache$functions, parent = caller_env())
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 3042e30707e..26db190099f 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -24,8 +24,8 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
   exprs <- quos(...)
   # Only retain the columns we need to do our aggregations
   vars_to_keep <- unique(c(
-    unlist(lapply(exprs, all.vars)),     # vars referenced in summarise
-    dplyr::group_vars(.data)             # vars needed for grouping
+    unlist(lapply(exprs, all.vars)), # vars referenced in summarise
+    dplyr::group_vars(.data) # vars needed for grouping
   ))
   .data <- dplyr::select(.data, vars_to_keep)
 
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index b77b0cf6575..88accac24e9 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -42,7 +42,7 @@ arrow_dplyr_query <- function(.data) {
       )
     ))
   }
-  
+
   structure(
     list(
       .data = if (inherits(.data, "Dataset")) {
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index 0a224617983..6ed1df3d826 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -40,7 +40,8 @@
 #'
 #' @name to_duckdb
 #' @export
-#' @examplesIf arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)
+#' @examplesIf { arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) &&
+#'   packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE) }
 #' library(dplyr)
 #'
 #' ds <- InMemoryDataset$create(mtcars)
@@ -56,10 +57,9 @@
 #'   filter(mpg < 30) %>%
 #'   group_by(cyl) %>%
 #'   summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
-#'
 to_duckdb <- function(.data,
                       con = arrow_duck_connection(),
-                      table_name =  unique_arrow_tablename(),
+                      table_name = unique_arrow_tablename(),
                       auto_disconnect = TRUE) {
   .data <- arrow_dplyr_query(.data)
   duckdb::duckdb_register_arrow(con, table_name, .data)
@@ -108,7 +108,7 @@ duckdb_disconnector <- function(con, tbl_name) {
 
     # and there are no more tables, so we can safely shutdown
     if (length(DBI::dbListTables(con)) == 0) {
-      DBI::dbDisconnect(con, shutdown=TRUE)
+      DBI::dbDisconnect(con, shutdown = TRUE)
     }
   })
   environment()
diff --git a/r/R/expression.R b/r/R/expression.R
index c4ce38b3ca1..0526eb73bc9 100644
--- a/r/R/expression.R
+++ b/r/R/expression.R
@@ -81,7 +81,6 @@
   # arguments. Most map *directly* to an Arrow C++ compute kernel and require no
   # non-default options, but some are modified by build_expr(). More complex R
   # function/operator mappings are defined in dplyr-functions.R.
-
   "==" = "equal",
   "!=" = "not_equal",
   ">" = "greater",
@@ -96,7 +95,7 @@
   "/" = "divide_checked",
   "%/%" = "divide_checked",
   # we don't actually use divide_checked with `%%`, rather it is rewritten to
-  # use %/% above.
+  # use `%/%` above.
   "%%" = "divide_checked",
   "^" = "power_checked",
   "%in%" = "is_in_meta_binary"
@@ -121,7 +120,8 @@
 #' @name Expression
 #' @rdname Expression
 #' @export
-Expression <- R6Class("Expression", inherit = ArrowObject,
+Expression <- R6Class("Expression",
+  inherit = ArrowObject,
   public = list(
     ToString = function() compute___expr__ToString(self),
     # TODO: Implement type determination without storing
@@ -212,7 +212,7 @@ build_expr <- function(FUN,
       out <- build_expr("/", args = args)
       return(out$cast(int32(), allow_float_truncate = TRUE))
     } else if (FUN == "%%") {
-      return(args[[1]] - args[[2]] * ( args[[1]] %/% args[[2]] ))
+      return(args[[1]] - args[[2]] * (args[[1]] %/% args[[2]]))
     }
 
     expr <- Expression$create(.array_function_map[[FUN]] %||% FUN, args = args, options = options)
diff --git a/r/R/feather.R b/r/R/feather.R
index 187a5e06279..70a270bbe02 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -94,7 +94,7 @@ write_feather <- function(x,
 
   # "lz4" is the convenience
   if (compression == "lz4") {
-     compression <- "lz4_frame"
+    compression <- "lz4_frame"
   }
 
   compression <- compression_from_name(compression)
@@ -103,7 +103,7 @@ write_feather <- function(x,
   if (is.data.frame(x) || inherits(x, "RecordBatch")) {
     x <- Table$create(x)
   }
-  
+
   assert_that(is_writable_table(x))
 
   if (!inherits(sink, "OutputStream")) {
@@ -190,7 +190,8 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
 #'
 #' @export
 #' @include arrow-package.R
-FeatherReader <- R6Class("FeatherReader", inherit = ArrowObject,
+FeatherReader <- R6Class("FeatherReader",
+  inherit = ArrowObject,
   public = list(
     Read = function(columns) {
       ipc___feather___Reader__Read(self, columns)
diff --git a/r/R/field.R b/r/R/field.R
index 60d8ffde22b..e5c938bf5ee 100644
--- a/r/R/field.R
+++ b/r/R/field.R
@@ -31,7 +31,8 @@
 #' @rdname Field
 #' @name Field
 #' @export
-Field <- R6Class("Field", inherit = ArrowObject,
+Field <- R6Class("Field",
+  inherit = ArrowObject,
   public = list(
     ToString = function() {
       prettier_dictionary_type(Field__ToString(self))
@@ -41,7 +42,6 @@ Field <- R6Class("Field", inherit = ArrowObject,
     },
     export_to_c = function(ptr) ExportField(self, ptr)
   ),
-
   active = list(
     name = function() {
       Field__name(self)
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index 283fbbb0ae5..a09d0a51d7b 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -57,7 +57,6 @@ FileInfo <- R6Class("FileInfo",
         invisible(fs___FileInfo__set_path(self))
       }
     },
-
     size = function(size) {
       if (missing(size)) {
         fs___FileInfo__size(self)
@@ -65,7 +64,6 @@ FileInfo <- R6Class("FileInfo",
         invisible(fs___FileInfo__set_size(self, size))
       }
     },
-
     mtime = function(time) {
       if (missing(time)) {
         fs___FileInfo__mtime(self)
@@ -198,7 +196,8 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
 #' @rdname FileSystem
 #' @name FileSystem
 #' @export
-FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
+FileSystem <- R6Class("FileSystem",
+  inherit = ArrowObject,
   public = list(
     GetFileInfo = function(x) {
       if (inherits(x, "FileSelector")) {
@@ -209,35 +208,27 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
         abort("incompatible type for FileSystem$GetFileInfo()")
       }
     },
-
     CreateDir = function(path, recursive = TRUE) {
       fs___FileSystem__CreateDir(self, clean_path_rel(path), isTRUE(recursive))
     },
-
     DeleteDir = function(path) {
       fs___FileSystem__DeleteDir(self, clean_path_rel(path))
     },
-
     DeleteDirContents = function(path) {
       fs___FileSystem__DeleteDirContents(self, clean_path_rel(path))
     },
-
     DeleteFile = function(path) {
       fs___FileSystem__DeleteFile(self, clean_path_rel(path))
     },
-
     DeleteFiles = function(paths) {
       fs___FileSystem__DeleteFiles(self, clean_path_rel(paths))
     },
-
     Move = function(src, dest) {
       fs___FileSystem__Move(self, clean_path_rel(src), clean_path_rel(dest))
     },
-
     CopyFile = function(src, dest) {
       fs___FileSystem__CopyFile(self, clean_path_rel(src), clean_path_rel(dest))
     },
-
     OpenInputStream = function(path) {
       fs___FileSystem__OpenInputStream(self, clean_path_rel(path))
     },
@@ -257,7 +248,7 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
     ls = function(path = "", ...) {
       selector <- FileSelector$create(path, ...) # ... for recursive = TRUE
       infos <- self$GetFileInfo(selector)
-      map_chr(infos, ~.$path)
+      map_chr(infos, ~ .$path)
       # TODO: add full.names argument like base::dir() (default right now is TRUE)
       # TODO: see fs package for glob/regexp filtering
       # TODO: verbose method that shows other attributes as df
@@ -289,14 +280,14 @@ get_paths_and_filesystem <- function(x, filesystem = NULL) {
       # Stop? Can't have URL (which yields a fs) and another fs
     }
     x <- lapply(x, FileSystem$from_uri)
-    if (length(unique(map(x, ~class(.$fs)))) > 1) {
+    if (length(unique(map(x, ~ class(.$fs)))) > 1) {
       stop(
         "Vectors of URIs for different file systems are not supported",
         call. = FALSE
       )
     }
-    fs  <- x[[1]]$fs
-    path <- map_chr(x, ~.$path) # singular name "path" used for compatibility
+    fs <- x[[1]]$fs
+    path <- map_chr(x, ~ .$path) # singular name "path" used for compatibility
   } else {
     fs <- filesystem %||% LocalFileSystem$create()
     if (inherits(fs, "LocalFileSystem")) {
@@ -335,7 +326,8 @@ LocalFileSystem$create <- function() {
 #' @rdname FileSystem
 #' @importFrom utils modifyList
 #' @export
-S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem,
+S3FileSystem <- R6Class("S3FileSystem",
+  inherit = FileSystem,
   active = list(
     region = function() fs___S3FileSystem__region(self)
   )
@@ -343,7 +335,13 @@ S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem,
 S3FileSystem$create <- function(anonymous = FALSE, ...) {
   args <- list2(...)
   if (anonymous) {
-    invalid_args <- intersect(c("access_key", "secret_key", "session_token", "role_arn", "session_name", "external_id", "load_frequency"), names(args))
+    invalid_args <- intersect(
+      c(
+        "access_key", "secret_key", "session_token", "role_arn", "session_name",
+        "external_id", "load_frequency"
+      ),
+      names(args)
+    )
     if (length(invalid_args)) {
       stop("Cannot specify ", oxford_paste(invalid_args), " when anonymous = TRUE", call. = FALSE)
     }
@@ -425,7 +423,8 @@ s3_bucket <- function(bucket, ...) {
 #' @format NULL
 #' @rdname FileSystem
 #' @export
-SubTreeFileSystem <- R6Class("SubTreeFileSystem", inherit = FileSystem,
+SubTreeFileSystem <- R6Class("SubTreeFileSystem",
+  inherit = FileSystem,
   public = list(
     print = function(...) {
       if (inherits(self$base_fs, "LocalFileSystem")) {
diff --git a/r/R/flight.R b/r/R/flight.R
index 0143dc5b9ef..cde29785324 100644
--- a/r/R/flight.R
+++ b/r/R/flight.R
@@ -107,7 +107,8 @@ list_flights <- function(client) {
 #' @rdname list_flights
 #' @export
 flight_path_exists <- function(client, path) {
-  it_exists <- tryCatch({
+  it_exists <- tryCatch(
+    expr = {
       client$get_flight_info(descriptor_for_path(path))
       TRUE
     },
diff --git a/r/R/io.R b/r/R/io.R
index 5f015ce3b06..898b306a3dd 100644
--- a/r/R/io.R
+++ b/r/R/io.R
@@ -21,7 +21,8 @@
 
 # OutputStream ------------------------------------------------------------
 
-Writable <- R6Class("Writable", inherit = ArrowObject,
+Writable <- R6Class("Writable",
+  inherit = ArrowObject,
   public = list(
     write = function(x) io___Writable__write(self, buffer(x))
   )
@@ -55,7 +56,8 @@ Writable <- R6Class("Writable", inherit = ArrowObject,
 #'
 #' @rdname OutputStream
 #' @name OutputStream
-OutputStream <- R6Class("OutputStream", inherit = Writable,
+OutputStream <- R6Class("OutputStream",
+  inherit = Writable,
   public = list(
     close = function() io___OutputStream__Close(self),
     tell = function() io___OutputStream__Tell(self)
@@ -75,7 +77,8 @@ FileOutputStream$create <- function(path) {
 #' @format NULL
 #' @rdname OutputStream
 #' @export
-BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream,
+BufferOutputStream <- R6Class("BufferOutputStream",
+  inherit = OutputStream,
   public = list(
     capacity = function() io___BufferOutputStream__capacity(self),
     finish = function() io___BufferOutputStream__Finish(self),
@@ -90,7 +93,8 @@ BufferOutputStream$create <- function(initial_capacity = 0L) {
 # InputStream -------------------------------------------------------------
 
 
-Readable <- R6Class("Readable", inherit = ArrowObject,
+Readable <- R6Class("Readable",
+  inherit = ArrowObject,
   public = list(
     Read = function(nbytes) io___Readable__Read(self, nbytes)
   )
@@ -129,7 +133,8 @@ Readable <- R6Class("Readable", inherit = ArrowObject,
 #'
 #' @rdname InputStream
 #' @name InputStream
-InputStream <- R6Class("InputStream", inherit = Readable,
+InputStream <- R6Class("InputStream",
+  inherit = Readable,
   public = list(
     close = function() io___InputStream__Close(self)
   )
@@ -139,13 +144,13 @@ InputStream <- R6Class("InputStream", inherit = Readable,
 #' @format NULL
 #' @rdname InputStream
 #' @export
-RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream,
+RandomAccessFile <- R6Class("RandomAccessFile",
+  inherit = InputStream,
   public = list(
     GetSize = function() io___RandomAccessFile__GetSize(self),
     supports_zero_copy = function() io___RandomAccessFile__supports_zero_copy(self),
     seek = function(position) io___RandomAccessFile__Seek(self, position),
     tell = function() io___RandomAccessFile__Tell(self),
-
     Read = function(nbytes = NULL) {
       if (is.null(nbytes)) {
         io___RandomAccessFile__Read0(self)
@@ -153,7 +158,6 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream,
         io___Readable__Read(self, nbytes)
       }
     },
-
     ReadAt = function(position, nbytes = NULL) {
       if (is.null(nbytes)) {
         nbytes <- self$GetSize() - position
@@ -167,7 +171,8 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream,
 #' @format NULL
 #' @rdname InputStream
 #' @export
-MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile,
+MemoryMappedFile <- R6Class("MemoryMappedFile",
+  inherit = RandomAccessFile,
   public = list(
     Resize = function(size) io___MemoryMappedFile__Resize(self, size)
   )
@@ -267,7 +272,7 @@ make_output_stream <- function(x, filesystem = NULL) {
     x <- x$base_path
   } else if (is_url(x)) {
     fs_and_path <- FileSystem$from_uri(x)
-    filesystem = fs_and_path$fs
+    filesystem <- fs_and_path$fs
     x <- fs_and_path$path
   }
   assert_that(is.string(x))
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 2a489c03cae..1e101d84e3c 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -18,7 +18,8 @@
 #' Write Arrow IPC stream format
 #'
 #' Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather. `write_ipc_stream()`
 #' and [write_feather()] write those formats, respectively.
 #'
@@ -35,7 +36,7 @@
 #' serialize data to a buffer.
 #' [RecordBatchWriter] for a lower-level interface.
 #' @export
-#' @examplesIf arrow_available() 
+#' @examplesIf arrow_available()
 #' tf <- tempfile()
 #' on.exit(unlink(tf))
 #' write_ipc_stream(mtcars, tf)
@@ -83,7 +84,8 @@ write_to_raw <- function(x, format = c("stream", "file")) {
 #' Read Arrow IPC stream format
 #'
 #' Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather. `read_ipc_stream()`
 #' and [read_feather()] read those formats, respectively.
 #'
diff --git a/r/R/json.R b/r/R/json.R
index cc4866512cd..6560a07fe06 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -26,14 +26,14 @@
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
 #' @export
 #' @examplesIf arrow_available()
-#'   tf <- tempfile()
-#'   on.exit(unlink(tf))
-#'   writeLines('
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' writeLines('
 #'     { "hello": 3.5, "world": false, "yo": "thing" }
 #'     { "hello": 3.25, "world": null }
 #'     { "hello": 0.0, "world": true, "yo": null }
-#'   ', tf, useBytes=TRUE)
-#'   df <- read_json_arrow(tf)
+#'   ', tf, useBytes = TRUE)
+#' df <- read_json_arrow(tf)
 read_json_arrow <- function(file,
                             col_select = NULL,
                             as_data_frame = TRUE,
@@ -62,7 +62,8 @@ read_json_arrow <- function(file,
 #' @format NULL
 #' @docType class
 #' @export
-JsonTableReader <- R6Class("JsonTableReader", inherit = ArrowObject,
+JsonTableReader <- R6Class("JsonTableReader",
+  inherit = ArrowObject,
   public = list(
     Read = function() json___TableReader__Read(self)
   )
@@ -98,5 +99,4 @@ JsonParseOptions$create <- function(newlines_in_values = FALSE, schema = NULL) {
   } else {
     json___ParseOptions__initialize2(newlines_in_values, schema)
   }
-  
 }
diff --git a/r/R/message.R b/r/R/message.R
index 6a374a2b24f..ef33f1623d2 100644
--- a/r/R/message.R
+++ b/r/R/message.R
@@ -29,7 +29,8 @@
 #'
 #' @rdname Message
 #' @name Message
-Message <- R6Class("Message", inherit = ArrowObject,
+Message <- R6Class("Message",
+  inherit = ArrowObject,
   public = list(
     Equals = function(other, ...) {
       inherits(other, "Message") && ipc___Message__Equals(self, other)
@@ -57,7 +58,8 @@ Message <- R6Class("Message", inherit = ArrowObject,
 #' @rdname MessageReader
 #' @name MessageReader
 #' @export
-MessageReader <- R6Class("MessageReader", inherit = ArrowObject,
+MessageReader <- R6Class("MessageReader",
+  inherit = ArrowObject,
   public = list(
     ReadNextMessage = function() ipc___MessageReader__ReadNextMessage(self)
   )
diff --git a/r/R/metadata.R b/r/R/metadata.R
index 505d0653b4a..104e5408764 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -36,69 +36,74 @@
 }
 
 .unserialize_arrow_r_metadata <- function(x) {
-  tryCatch({
-    out <- unserialize(charToRaw(x))
+  tryCatch(
+    expr = {
+      out <- unserialize(charToRaw(x))
 
-    # if this is still raw, try decompressing
-    if (is.raw(out)) {
-      out <- unserialize(memDecompress(out, type = "gzip"))
+      # if this is still raw, try decompressing
+      if (is.raw(out)) {
+        out <- unserialize(memDecompress(out, type = "gzip"))
+      }
+      out
+    },
+    error = function(e) {
+      warning("Invalid metadata$r", call. = FALSE)
+      NULL
     }
-    out
-  }, error = function(e) {
-    warning("Invalid metadata$r", call. = FALSE)
-    NULL
-  })
+  )
 }
 
 #' @importFrom rlang trace_back
 apply_arrow_r_metadata <- function(x, r_metadata) {
-  tryCatch({
-    columns_metadata <- r_metadata$columns
-    if (is.data.frame(x)) {
-      if (length(names(x)) && !is.null(columns_metadata)) {
-        for (name in intersect(names(columns_metadata), names(x))) {
-          x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
+  tryCatch(
+    expr = {
+      columns_metadata <- r_metadata$columns
+      if (is.data.frame(x)) {
+        if (length(names(x)) && !is.null(columns_metadata)) {
+          for (name in intersect(names(columns_metadata), names(x))) {
+            x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
+          }
         }
+      } else if (is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
+        # If we have a list and "columns_metadata" this applies row-level metadata
+        # inside of a column in a dataframe.
+
+        # However, if we are inside of a dplyr collection (including all datasets),
+        # we cannot apply this row-level metadata, since the order of the rows is
+        # not guaranteed to be the same, so don't even try, but warn what's going on
+        trace <- trace_back()
+        in_dplyr_collect <- any(map_lgl(trace$calls, function(x) {
+          grepl("collect.arrow_dplyr_query", x, fixed = TRUE)[[1]]
+        }))
+        if (in_dplyr_collect) {
+          warning(
+            "Row-level metadata is not compatible with this operation and has ",
+            "been ignored",
+            call. = FALSE
+          )
+        } else {
+          x <- map2(x, columns_metadata, function(.x, .y) {
+            apply_arrow_r_metadata(.x, .y)
+          })
+        }
+        x
       }
-    } else if (is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
-      # If we have a list and "columns_metadata" this applies row-level metadata
-      # inside of a column in a dataframe.
-
-      # However, if we are inside of a dplyr collection (including all datasets),
-      # we cannot apply this row-level metadata, since the order of the rows is
-      # not guaranteed to be the same, so don't even try, but warn what's going on
-      trace <- trace_back()
-      in_dplyr_collect <- any(map_lgl(trace$calls, function(x) {
-        grepl("collect.arrow_dplyr_query", x, fixed = TRUE)[[1]]
-      }))
-      if (in_dplyr_collect) {
-        warning(
-          "Row-level metadata is not compatible with this operation and has ",
-          "been ignored",
-          call. = FALSE
-        )
-      } else {
-        x <- map2(x, columns_metadata, function(.x, .y) {
-          apply_arrow_r_metadata(.x, .y)
-        })
-      }
-      x
-    }
 
-    if (!is.null(r_metadata$attributes)) {
-      attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
-      if (inherits(x, "POSIXlt")) {
-        # We store POSIXlt as a StructArray, which is translated back to R
-        # as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
-        # attribute, POSIXlt does not, so since this is now no longer an object
-        # of class data.frame, remove the extraneous attribute
-        attr(x, "row.names") <- NULL
+      if (!is.null(r_metadata$attributes)) {
+        attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
+        if (inherits(x, "POSIXlt")) {
+          # We store POSIXlt as a StructArray, which is translated back to R
+          # as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
+          # attribute, POSIXlt does not, so since this is now no longer an object
+          # of class data.frame, remove the extraneous attribute
+          attr(x, "row.names") <- NULL
+        }
       }
+    },
+    error = function(e) {
+      warning("Invalid metadata$r", call. = FALSE)
     }
-
-  }, error = function(e) {
-    warning("Invalid metadata$r", call. = FALSE)
-  })
+  )
   x
 }
 
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 3006fcbbe50..ee2ed57de24 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -152,11 +152,11 @@ write_parquet <- function(x,
                           properties = NULL,
                           arrow_properties = NULL) {
   x_out <- x
-  
+
   if (is.data.frame(x) || inherits(x, "RecordBatch")) {
     x <- Table$create(x)
   }
-  
+
   assert_that(is_writable_table(x))
 
   if (!inherits(sink, "OutputStream")) {
@@ -166,10 +166,16 @@ write_parquet <- function(x,
 
   # Deprecation warnings
   if (!is.null(properties)) {
-    warning("Providing 'properties' is deprecated. If you need to assemble properties outside this function, use ParquetFileWriter instead.")
+    warning(
+      "Providing 'properties' is deprecated. If you need to assemble properties outside ",
+      "this function, use ParquetFileWriter instead."
+    )
   }
   if (!is.null(arrow_properties)) {
-    warning("Providing 'arrow_properties' is deprecated. If you need to assemble arrow_properties outside this function, use ParquetFileWriter instead.")
+    warning(
+      "Providing 'arrow_properties' is deprecated. If you need to assemble arrow_properties ",
+      "outside this function, use ParquetFileWriter instead."
+    )
   }
 
   writer <- ParquetFileWriter$create(
@@ -213,7 +219,8 @@ ParquetArrowWriterProperties$create <- function(use_deprecated_int96_timestamps
   if (is.null(coerce_timestamps)) {
     timestamp_unit <- -1L # null sentinel value
   } else {
-    timestamp_unit <- make_valid_time_unit(coerce_timestamps,
+    timestamp_unit <- make_valid_time_unit(
+      coerce_timestamps,
       c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO)
     )
   }
@@ -236,7 +243,7 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version)
   tryCatch(
     valid_versions[[match.arg(version, choices = names(valid_versions))]],
     error = function(cond) {
-      stop('"version" should be one of ', oxford_paste(names(valid_versions), "or"), call.=FALSE)
+      stop('"version" should be one of ', oxford_paste(names(valid_versions), "or"), call. = FALSE)
     }
   )
 }
@@ -284,7 +291,8 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version)
 #'
 #' @export
 ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObject)
-ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inherit = ArrowObject,
+ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder",
+  inherit = ArrowObject,
   public = list(
     set_version = function(version) {
       parquet___WriterProperties___Builder__version(self, make_valid_version(version))
@@ -292,26 +300,30 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe
     set_compression = function(table, compression) {
       compression <- compression_from_name(compression)
       assert_that(is.integer(compression))
-      private$.set(table, compression,
+      private$.set(
+        table, compression,
         parquet___ArrowWriterProperties___Builder__set_compressions
       )
     },
     set_compression_level = function(table, compression_level) {
       # cast to integer but keep names
       compression_level <- set_names(as.integer(compression_level), names(compression_level))
-      private$.set(table, compression_level,
+      private$.set(
+        table, compression_level,
         parquet___ArrowWriterProperties___Builder__set_compression_levels
       )
     },
     set_dictionary = function(table, use_dictionary) {
       assert_that(is.logical(use_dictionary))
-      private$.set(table, use_dictionary,
+      private$.set(
+        table, use_dictionary,
         parquet___ArrowWriterProperties___Builder__set_use_dictionary
       )
     },
     set_write_statistics = function(table, write_statistics) {
       assert_that(is.logical(write_statistics))
-      private$.set(table, write_statistics,
+      private$.set(
+        table, write_statistics,
         parquet___ArrowWriterProperties___Builder__set_write_statistics
       )
     },
@@ -319,7 +331,6 @@ ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inhe
       parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size)
     }
   ),
-
   private = list(
     .set = function(table, value, FUN) {
       msg <- paste0("unsupported ", substitute(value), "= specification")
@@ -399,7 +410,8 @@ ParquetWriterProperties$create <- function(table,
 #'
 #' @export
 #' @include arrow-package.R
-ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject,
+ParquetFileWriter <- R6Class("ParquetFileWriter",
+  inherit = ArrowObject,
   public = list(
     WriteTable = function(table, chunk_size) {
       parquet___arrow___FileWriter__WriteTable(self, table, chunk_size)
@@ -441,7 +453,8 @@ ParquetFileWriter$create <- function(schema,
 #'    `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
 #' - `$ReadRowGroup(i, column_indices)`: get an `arrow::Table` by reading the `i`th row group (0-based).
 #'    The optional `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
-#' - `$ReadRowGroups(row_groups, column_indices)`: get an `arrow::Table` by reading several row groups (0-based integers).
+#' - `$ReadRowGroups(row_groups, column_indices)`: get an `arrow::Table` by reading several row
+#'    groups (0-based integers).
 #'    The optional `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
 #' - `$GetSchema()`: get the `arrow::Schema` of the data in the file
 #' - `$ReadColumn(i)`: read the `i`th column (0-based) as a [ChunkedArray].
@@ -454,7 +467,7 @@ ParquetFileWriter$create <- function(schema,
 #'
 #' @export
 #' @examplesIf arrow_with_parquet()
-#' f <- system.file("v0.7.1.parquet", package="arrow")
+#' f <- system.file("v0.7.1.parquet", package = "arrow")
 #' pq <- ParquetFileReader$create(f)
 #' pq$GetSchema()
 #' if (codec_is_available("snappy")) {
diff --git a/r/R/python.R b/r/R/python.R
index 0a0afcb06c1..07cd4456b1a 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -212,7 +212,8 @@ maybe_py_to_r <- function(x) {
 #' @export
 install_pyarrow <- function(envname = NULL, nightly = FALSE, ...) {
   if (nightly) {
-    reticulate::py_install("pyarrow", envname = envname, ...,
+    reticulate::py_install("pyarrow",
+      envname = envname, ...,
       # Nightly for pip
       pip_options = "--extra-index-url https://repo.fury.io/arrow-nightlies/ --pre --upgrade",
       # Nightly for conda
diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R
index 9fffea7da37..53f8f26291a 100644
--- a/r/R/record-batch-reader.R
+++ b/r/R/record-batch-reader.R
@@ -18,7 +18,8 @@
 
 #' @title RecordBatchReader classes
 #' @description Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather.
 #' `RecordBatchStreamReader` and `RecordBatchFileReader` are
 #' interfaces for accessing record batches from input sources in those formats,
@@ -90,7 +91,8 @@
 #' # Unlike the Writers, we don't have to close RecordBatchReaders,
 #' # but we do still need to close the file connection
 #' read_file_obj$close()
-RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject,
+RecordBatchReader <- R6Class("RecordBatchReader",
+  inherit = ArrowObject,
   public = list(
     read_next_batch = function() RecordBatchReader__ReadNext(self),
     batches = function() RecordBatchReader__batches(self),
@@ -123,7 +125,8 @@ RecordBatchReader$import_from_c <- RecordBatchStreamReader$import_from_c <- Impo
 #' @usage NULL
 #' @format NULL
 #' @export
-RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = ArrowObject,
+RecordBatchFileReader <- R6Class("RecordBatchFileReader",
+  inherit = ArrowObject,
   # Why doesn't this inherit from RecordBatchReader in C++?
   # Origin: https://github.com/apache/arrow/pull/679
   public = list(
diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R
index 64c1cf0cec8..8675e785a41 100644
--- a/r/R/record-batch-writer.R
+++ b/r/R/record-batch-writer.R
@@ -18,7 +18,8 @@
 
 #' @title RecordBatchWriter classes
 #' @description Apache Arrow defines two formats for [serializing data for interprocess
-#' communication (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
 #' a "stream" format and a "file" format, known as Feather.
 #' `RecordBatchStreamWriter` and `RecordBatchFileWriter` are
 #' interfaces for writing record batches to those formats, respectively.
@@ -93,11 +94,11 @@
 #' # Unlike the Writers, we don't have to close RecordBatchReaders,
 #' # but we do still need to close the file connection
 #' read_file_obj$close()
-RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = ArrowObject,
+RecordBatchWriter <- R6Class("RecordBatchWriter",
+  inherit = ArrowObject,
   public = list(
     write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch),
     write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table),
-
     write = function(x) {
       if (inherits(x, "RecordBatch")) {
         self$write_batch(x)
@@ -107,7 +108,6 @@ RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = ArrowObject,
         self$write_table(Table$create(x))
       }
     },
-
     close = function() ipc___RecordBatchWriter__Close(self)
   )
 )
@@ -173,7 +173,7 @@ get_ipc_metadata_version <- function(x) {
     x <- paste0("V", x)
   } else if (is.null(x)) {
     if (identical(Sys.getenv("ARROW_PRE_1_0_METADATA_VERSION"), "1") ||
-        identical(Sys.getenv("ARROW_PRE_0_15_IPC_FORMAT"), "1")) {
+      identical(Sys.getenv("ARROW_PRE_0_15_IPC_FORMAT"), "1")) {
       # PRE_1_0 is specific for this;
       # if you already set PRE_0_15, PRE_1_0 should be implied
       x <- "V4"
diff --git a/r/R/record-batch.R b/r/R/record-batch.R
index c42834762ef..e1c5251b254 100644
--- a/r/R/record-batch.R
+++ b/r/R/record-batch.R
@@ -77,7 +77,8 @@
 #' @rdname RecordBatch
 #' @name RecordBatch
 #' @export
-RecordBatch <- R6Class("RecordBatch", inherit = ArrowTabular,
+RecordBatch <- R6Class("RecordBatch",
+  inherit = ArrowTabular,
   public = list(
     column = function(i) RecordBatch__column(self, i),
     column_name = function(i) RecordBatch__column_name(self, i),
@@ -123,7 +124,6 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowTabular,
       ExportRecordBatch(self, array_ptr, schema_ptr)
     }
   ),
-
   active = list(
     num_columns = function() RecordBatch__num_columns(self),
     num_rows = function() RecordBatch__num_rows(self),
diff --git a/r/R/scalar.R b/r/R/scalar.R
index 6e5e63cee3e..4dedc6c1232 100644
--- a/r/R/scalar.R
+++ b/r/R/scalar.R
@@ -23,7 +23,7 @@
 #' @docType class
 #'
 #' @description A `Scalar` holds a single value of an Arrow type.
-#' 
+#'
 #' @section Methods:
 #'   `$ToString()`: convert to a string
 #'   `$as_vector()`: convert to an R vector
@@ -31,7 +31,7 @@
 #'   `$Equals(other)`: is this Scalar equal to `other`
 #'   `$ApproxEquals(other)`: is this Scalar approximately equal to `other`
 #'   `$is_valid`: is this Scalar valid
-#'   `$null_count`: number of invalid values - 1 or 0 
+#'   `$null_count`: number of invalid values - 1 or 0
 #'   `$type`: Scalar type
 #'
 #' @name Scalar
@@ -41,14 +41,14 @@
 #' Scalar$create(404)
 #' # If you pass a vector into Scalar$create, you get a list containing your items
 #' Scalar$create(c(1, 2, 3))
-#' 
+#'
 #' # Comparisons
 #' my_scalar <- Scalar$create(99)
 #' my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
 #' my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
 #' my_scalar$Equals(Scalar$create(99.000009)) # FALSE
 #' my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
-#' 
+#'
 #' my_scalar$ToString()
 #' @export
 Scalar <- R6Class("Scalar",
diff --git a/r/R/schema.R b/r/R/schema.R
index 3adebe259aa..60223c95665 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -158,7 +158,7 @@ prepare_key_value_metadata <- function(metadata) {
 
 print_schema_fields <- function(s) {
   # Alternative to Schema__ToString that doesn't print metadata
-  paste(map_chr(s$fields, ~.$ToString()), collapse = "\n")
+  paste(map_chr(s$fields, ~ .$ToString()), collapse = "\n")
 }
 
 #' @param ... named list of [data types][data-type]
@@ -236,7 +236,7 @@ length.Schema <- function(x) x$num_fields
       i <- setdiff(seq_len(length(x)), -1 * i)
     }
   }
-  fields <- map(i, ~x[[.]])
+  fields <- map(i, ~ x[[.]])
   invalid <- map_lgl(fields, is.null)
   if (any(invalid)) {
     stop(
@@ -291,7 +291,9 @@ read_schema <- function(stream, ...) {
 #' z <- schema(b = double(), k = utf8())
 #' unify_schemas(a, z)
 unify_schemas <- function(..., schemas = list(...)) {
-  if (any(vapply(schemas, is.null, TRUE))) return(NULL)
+  if (any(vapply(schemas, is.null, TRUE))) {
+    return(NULL)
+  }
   arrow__UnifySchemas(schemas)
 }
 
diff --git a/r/R/table.R b/r/R/table.R
index 3e5c52d9624..5aae067f0fc 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -94,7 +94,8 @@
 #' tab[["cyl"]]
 #' as.data.frame(tab[4:8, c("gear", "hp", "wt")])
 #' @export
-Table <- R6Class("Table", inherit = ArrowTabular,
+Table <- R6Class("Table",
+  inherit = ArrowTabular,
   public = list(
     column = function(i) Table__column(self, i),
     ColumnNames = function() Table__ColumnNames(self),
@@ -136,7 +137,6 @@ Table <- R6Class("Table", inherit = ArrowTabular,
       super$invalidate()
     }
   ),
-
   active = list(
     num_columns = function() Table__num_columns(self),
     num_rows = function() Table__num_rows(self),
@@ -166,16 +166,16 @@ Table$create <- function(..., schema = NULL) {
     names(dots) <- rep_len("", length(dots))
   }
   stopifnot(length(dots) > 0)
-  
+
   if (all_record_batches(dots)) {
     return(Table__from_record_batches(dots, schema))
   }
 
-  # If any arrays are length 1, recycle them  
+  # If any arrays are length 1, recycle them
   dots <- recycle_scalars(dots)
 
   out <- Table__from_dots(dots, schema, option_use_threads())
-  
+
   # Preserve any grouping
   if (length(dots) == 1 && inherits(dots[[1]], "grouped_df")) {
     out <- dplyr::group_by(out, !!!dplyr::groups(dots[[1]]))
diff --git a/r/R/type.R b/r/R/type.R
index c96f43bbb46..4ef7cefb56e 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -42,7 +42,6 @@ DataType <- R6Class("DataType",
     },
     export_to_c = function(ptr) ExportType(self, ptr)
   ),
-
   active = list(
     id = function() DataType__id(self),
     name = function() DataType__name(self),
@@ -139,7 +138,7 @@ Null <- R6Class("Null", inherit = DataType)
 Timestamp <- R6Class("Timestamp",
   inherit = FixedWidthType,
   public = list(
-    timezone = function()  TimestampType__timezone(self),
+    timezone = function() TimestampType__timezone(self),
     unit = function() TimestampType__unit(self)
   )
 )
diff --git a/r/R/util.R b/r/R/util.R
index 884c346e503..5958b0b3111 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -17,7 +17,7 @@
 
 # for compatibility with R versions earlier than 4.0.0
 if (!exists("deparse1")) {
-  deparse1 <- function (expr, collapse = " ", width.cutoff = 500L, ...) {
+  deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) {
     paste(deparse(expr, width.cutoff, ...), collapse = collapse)
   }
 }
@@ -48,7 +48,7 @@ assert_is_list_of <- function(object, class) {
 }
 
 is_list_of <- function(object, class) {
-  is.list(object) && all(map_lgl(object, ~inherits(., class)))
+  is.list(object) && all(map_lgl(object, ~ inherits(., class)))
 }
 
 empty_named_list <- function() structure(list(), .Names = character(0))
@@ -72,7 +72,7 @@ is_function <- function(expr, name) {
 
 all_funs <- function(expr) {
   names <- all_names(expr)
-  names[vapply(names, function(name) {is_function(expr, name)}, TRUE)]
+  names[vapply(names, function(name) is_function(expr, name), TRUE)]
 }
 
 all_vars <- function(expr) {
@@ -117,7 +117,7 @@ handle_parquet_io_error <- function(e, format) {
     # If length(format) > 1, that means it is (almost certainly) the default/not specified value
     # so let the user know that they should specify the actual (not parquet) format
     abort(c(
-      msg, 
+      msg,
       i = "Did you mean to specify a 'format' other than the default (parquet)?"
     ))
   }
@@ -128,42 +128,41 @@ is_writable_table <- function(x) {
   inherits(x, c("data.frame", "ArrowTabular"))
 }
 
-# This attribute is used when is_writable is passed into assert_that, and allows 
+# This attribute is used when is_writable is passed into assert_that, and allows
 # the call to form part of the error message when is_writable is FALSE
-attr(is_writable_table, "fail") <- function(call, env){
+attr(is_writable_table, "fail") <- function(call, env) {
   paste0(
     deparse(call$x),
     " must be an object of class 'data.frame', 'RecordBatch', or 'Table', not '",
-    class(env[[deparse(call$x)]])[[1]], 
+    class(env[[deparse(call$x)]])[[1]],
     "'."
   )
 }
 
 #' Recycle scalar values in a list of arrays
-#' 
+#'
 #' @param arrays List of arrays
-#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled 
+#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled
 #' @keywords internal
-recycle_scalars <- function(arrays){
+recycle_scalars <- function(arrays) {
   # Get lengths of items in arrays
   arr_lens <- map_int(arrays, NROW)
-  
+
   is_scalar <- arr_lens == 1
-  
+
   if (length(arrays) > 1 && any(is_scalar) && !all(is_scalar)) {
-    
+
     # Recycling not supported for tibbles and data.frames
-    if (all(map_lgl(arrays, ~inherits(.x, "data.frame")))) {
-      
+    if (all(map_lgl(arrays, ~ inherits(.x, "data.frame")))) {
       abort(c(
-          "All input tibbles or data.frames must have the same number of rows",
-          x = paste(
-            "Number of rows in longest and shortest inputs:",
-            oxford_paste(c(max(arr_lens), min(arr_lens)))
-          )
+        "All input tibbles or data.frames must have the same number of rows",
+        x = paste(
+          "Number of rows in longest and shortest inputs:",
+          oxford_paste(c(max(arr_lens), min(arr_lens)))
+        )
       ))
     }
-    
+
     max_array_len <- max(arr_lens)
     arrays[is_scalar] <- lapply(arrays[is_scalar], repeat_value_as_array, max_array_len)
   }
@@ -171,16 +170,16 @@ recycle_scalars <- function(arrays){
 }
 
 #' Take an object of length 1 and repeat it.
-#' 
+#'
 #' @param object Object of length 1 to be repeated - vector, `Scalar`, `Array`, or `ChunkedArray`
 #' @param n Number of repetitions
-#' 
+#'
 #' @return `Array` of length `n`
-#' 
+#'
 #' @keywords internal
 repeat_value_as_array <- function(object, n) {
   if (inherits(object, "ChunkedArray")) {
     return(Scalar$create(object$chunks[[1]])$as_array(n))
   }
   return(Scalar$create(object)$as_array(n))
-}
\ No newline at end of file
+}
diff --git a/r/lint.sh b/r/lint.sh
index 58c26d9f731..243444e0d1e 100755
--- a/r/lint.sh
+++ b/r/lint.sh
@@ -39,3 +39,7 @@ $CPP_BUILD_SUPPORT/run_cpplint.py \
     --cpplint_binary=$CPPLINT \
     --exclude_glob=$CPP_BUILD_SUPPORT/lint_exclusions.txt \
     --source_dir=$SOURCE_DIR/src --quiet
+
+# Run lintr
+R -e "if(!requireNamespace('lintr', quietly=TRUE)){stop('lintr is not installed, please install it with R -e \"install.packages(\'lintr\')\"')}"
+NOT_CRAN=true R -e "lintr::lint_package('${SOURCE_DIR}', path_prefix = 'r')"
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index 3a504f01466..486b6222af7 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -53,28 +53,6 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# Pass items into chunked_array as separate objects to create chunks
-class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
-class_scores$num_chunks
-
-# When taking a Slice from a chunked_array, chunks are preserved
-class_scores$Slice(2, length = 5)
-
-# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
-# containing all values, ordered.
-class_scores$Take(class_scores$SortIndices(descending = TRUE))
-
-# If you pass a list into chunked_array, you get a list of length 1
-list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
-list_scores$num_chunks
-
-# When constructing a ChunkedArray, the first chunk is used to infer type.
-doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
-doubles$type
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \link{Array}
 }
diff --git a/r/man/Field.Rd b/r/man/Field.Rd
index 77d31fa637a..03dffd11ca9 100644
--- a/r/man/Field.Rd
+++ b/r/man/Field.Rd
@@ -28,8 +28,3 @@ field(name, type, metadata)
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-field("x", int32())
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index cabacc93755..b8d4dc01bad 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -51,18 +51,3 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time)
 It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat})
 }
 
-\examples{
-\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-## Semi-colon delimited files
-# Set up directory for examples
-tf <- tempfile()
-dir.create(tf)
-on.exit(unlink(tf))
-write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
-
-# Create FileFormat object
-format <- FileFormat$create(format = "text", delimiter = ";")
-
-open_dataset(tf, format = format)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd
index 39146919768..0b49df79d6b 100644
--- a/r/man/ParquetFileReader.Rd
+++ b/r/man/ParquetFileReader.Rd
@@ -28,7 +28,8 @@ takes the following arguments:
 \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
 \item \verb{$ReadRowGroup(i, column_indices)}: get an \code{arrow::Table} by reading the \code{i}th row group (0-based).
 The optional \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
-\item \verb{$ReadRowGroups(row_groups, column_indices)}: get an \code{arrow::Table} by reading several row groups (0-based integers).
+\item \verb{$ReadRowGroups(row_groups, column_indices)}: get an \code{arrow::Table} by reading several row
+groups (0-based integers).
 The optional \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
 \item \verb{$GetSchema()}: get the \code{arrow::Schema} of the data in the file
 \item \verb{$ReadColumn(i)}: read the \code{i}th column (0-based) as a \link{ChunkedArray}.
@@ -44,15 +45,3 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat
 }
 }
 
-\examples{
-\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-f <- system.file("v0.7.1.parquet", package="arrow")
-pq <- ParquetFileReader$create(f)
-pq$GetSchema()
-if (codec_is_available("snappy")) {
-  # This file has compressed data columns
-  tab <- pq$ReadTable()
-  tab$schema
-}
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd
index ff08c215853..e3024b91b7a 100644
--- a/r/man/RecordBatch.Rd
+++ b/r/man/RecordBatch.Rd
@@ -79,14 +79,3 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-batch <- record_batch(name = rownames(mtcars), mtcars)
-dim(batch)
-dim(head(batch))
-names(batch)
-batch$mpg
-batch[["cyl"]]
-as.data.frame(batch[4:8, c("gear", "hp", "wt")])
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd
index 90c796a6693..a206c30c8fb 100644
--- a/r/man/RecordBatchReader.Rd
+++ b/r/man/RecordBatchReader.Rd
@@ -43,43 +43,6 @@ are in the file.
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-
-batch <- record_batch(chickwts)
-
-# This opens a connection to the file in Arrow
-file_obj <- FileOutputStream$create(tf)
-# Pass that to a RecordBatchWriter to write data conforming to a schema
-writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
-writer$write(batch)
-# You may write additional batches to the stream, provided that they have
-# the same schema.
-# Call "close" on the writer to indicate end-of-file/stream
-writer$close()
-# Then, close the connection--closing the IPC message does not close the file
-file_obj$close()
-
-# Now, we have a file we can read from. Same pattern: open file connection,
-# then pass it to a RecordBatchReader
-read_file_obj <- ReadableFile$create(tf)
-reader <- RecordBatchFileReader$create(read_file_obj)
-# RecordBatchFileReader knows how many batches it has (StreamReader does not)
-reader$num_record_batches
-# We could consume the Reader by calling $read_next_batch() until all are,
-# consumed, or we can call $read_table() to pull them all into a Table
-tab <- reader$read_table()
-# Call as.data.frame to turn that Table into an R data.frame
-df <- as.data.frame(tab)
-# This should be the same data we sent
-all.equal(df, chickwts, check.attributes = FALSE)
-# Unlike the Writers, we don't have to close RecordBatchReaders,
-# but we do still need to close the file connection
-read_file_obj$close()
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
 for reading data from these formats and are sufficient for many use cases.
diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd
index 219c150e6a4..cc6d2feb3ac 100644
--- a/r/man/RecordBatchWriter.Rd
+++ b/r/man/RecordBatchWriter.Rd
@@ -45,43 +45,6 @@ to be closed separately.
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-
-batch <- record_batch(chickwts)
-
-# This opens a connection to the file in Arrow
-file_obj <- FileOutputStream$create(tf)
-# Pass that to a RecordBatchWriter to write data conforming to a schema
-writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
-writer$write(batch)
-# You may write additional batches to the stream, provided that they have
-# the same schema.
-# Call "close" on the writer to indicate end-of-file/stream
-writer$close()
-# Then, close the connection--closing the IPC message does not close the file
-file_obj$close()
-
-# Now, we have a file we can read from. Same pattern: open file connection,
-# then pass it to a RecordBatchReader
-read_file_obj <- ReadableFile$create(tf)
-reader <- RecordBatchFileReader$create(read_file_obj)
-# RecordBatchFileReader knows how many batches it has (StreamReader does not)
-reader$num_record_batches
-# We could consume the Reader by calling $read_next_batch() until all are,
-# consumed, or we can call $read_table() to pull them all into a Table
-tab <- reader$read_table()
-# Call as.data.frame to turn that Table into an R data.frame
-df <- as.data.frame(tab)
-# This should be the same data we sent
-all.equal(df, chickwts, check.attributes = FALSE)
-# Unlike the Writers, we don't have to close RecordBatchReaders,
-# but we do still need to close the file connection
-read_file_obj$close()
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
 interface for writing data to these formats and are sufficient for many use
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index 21e04c12e08..9128988d11c 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -19,20 +19,3 @@ A \code{Scalar} holds a single value of an Arrow type.
 \verb{$type}: Scalar type
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-Scalar$create(pi)
-Scalar$create(404)
-# If you pass a vector into Scalar$create, you get a list containing your items
-Scalar$create(c(1, 2, 3))
-
-# Comparisons
-my_scalar <- Scalar$create(99)
-my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
-my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
-my_scalar$Equals(Scalar$create(99.000009)) # FALSE
-my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
-
-my_scalar$ToString()
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd
index 6e385bb804e..0c66e5c2a42 100644
--- a/r/man/Schema.Rd
+++ b/r/man/Schema.Rd
@@ -74,12 +74,3 @@ Files with compressed metadata are readable by older versions of arrow, but
 the metadata is dropped.
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
-tab1 <- Table$create(df)
-tab1$schema
-tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
-tab2$schema
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/Table.Rd b/r/man/Table.Rd
index 2675943e572..d955b0f5a29 100644
--- a/r/man/Table.Rd
+++ b/r/man/Table.Rd
@@ -79,14 +79,3 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tab <- Table$create(name = rownames(mtcars), mtcars)
-dim(tab)
-dim(head(tab))
-names(tab)
-tab$mpg
-tab[["cyl"]]
-as.data.frame(tab[4:8, c("gear", "hp", "wt")])
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/array.Rd b/r/man/array.Rd
index 71957aff90c..ed25a2b0a34 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -82,26 +82,3 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-my_array <- Array$create(1:10)
-my_array$type
-my_array$cast(int8())
-
-# Check if value is null; zero-indexed
-na_array <- Array$create(c(1:5, NA))
-na_array$IsNull(0)
-na_array$IsNull(5)
-na_array$IsValid(5)
-na_array$null_count
-
-# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
-new_array <- na_array$Slice(5)
-new_array$offset
-
-# Compare 2 arrays
-na_array2 = na_array
-na_array2 == na_array # element-wise comparison
-na_array2$Equals(na_array) # overall comparison
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index a3ca1fc2fcb..99b636da3c7 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -33,12 +33,3 @@ contiguous memory with a particular size.
 }
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-my_buffer <- buffer(c(1, 2, 3, 4))
-my_buffer$is_mutable
-my_buffer$ZeroPadding()
-my_buffer$size
-my_buffer$capacity
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd
index f63038442dc..7e9b7e50ea0 100644
--- a/r/man/call_function.Rd
+++ b/r/man/call_function.Rd
@@ -35,16 +35,7 @@ are callable with an \code{arrow_} prefix.
 When passing indices in \code{...}, \code{args}, or \code{options}, express them as
 0-based integers (consistent with C++).
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-a <- Array$create(c(1L, 2L, 3L, NA, 5L))
-s <- Scalar$create(4L)
-call_function("fill_null", a, s)
-
-a <- Array$create(rnorm(10000))
-call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
-\dontshow{\}) # examplesIf}
-}
 \seealso{
-\href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for the functions and their respective options.
+\href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for
+the functions and their respective options.
 }
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index b3238ff1dca..1b5e8278fa9 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -18,8 +18,3 @@ Support for compression libraries depends on the build-time settings of
 the Arrow C++ library. This function lets you know which are available for
 use.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-codec_is_available("gzip")
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd
index 1b83703f19f..75cc4405d8a 100644
--- a/r/man/copy_files.Rd
+++ b/r/man/copy_files.Rd
@@ -23,13 +23,3 @@ Nothing: called for side effects in the file system
 \description{
 Copy files between FileSystems
 }
-\examples{
-\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# Copy an S3 bucket's files to a local directory:
-copy_files("s3://your-bucket-name", "local-directory")
-# Using a FileSystem object
-copy_files(s3_bucket("your-bucket-name"), "local-directory")
-# Or go the other way, from local to S3
-copy_files("local-directory", s3_bucket("your-bucket-name"))
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
index a0631897573..101702a2fb2 100644
--- a/r/man/data-type.Rd
+++ b/r/man/data-type.Rd
@@ -150,14 +150,6 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c
 types, this conversion can be disabled (so that \code{int64} always yields a
 \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-bool()
-struct(a = int32(), b = double())
-timestamp("ms", timezone = "CEST")
-time64("ns")
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
 }
diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd
index eef9f9157ea..39d5d8d0ae2 100644
--- a/r/man/hive_partition.Rd
+++ b/r/man/hive_partition.Rd
@@ -28,8 +28,3 @@ Hive partitioning embeds field names and values in path segments, such as
 Because fields are named in the path segments, order of fields passed to
 \code{hive_partition()} does not matter.
 }
-\examples{
-\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-hive_partition(year = int16(), month = int8())
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index 668e090c0ca..ba17688d833 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -37,10 +37,3 @@ The package includes Arrow methods for many base R functions that can
 be called directly on Arrow objects, as well as some tidyverse-flavored versions
 available inside \code{dplyr} verbs.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-list_compute_functions() 
-list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
-list_compute_functions(pattern = "^is", invert = TRUE)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd
index 66d30f39147..7e2000a9ca2 100644
--- a/r/man/load_flight_server.Rd
+++ b/r/man/load_flight_server.Rd
@@ -15,8 +15,3 @@ to look in the \verb{inst/} directory for included modules.}
 \description{
 Load a Python Flight server
 }
-\examples{
-\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-load_flight_server("demo_flight_server")
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index d63ef3eed87..21481af4c6b 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -26,28 +26,3 @@ per element of \code{x} it it is present in \code{table}.
 \code{base::match()} is not a generic, so we can't just define Arrow methods for
 it. This function exposes the analogous functions in the Arrow C++ library.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# note that the returned value is 0-indexed
-cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
-match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
-
-is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
-
-# Although there are multiple matches, you are returned the index of the first 
-# match, as with the base R equivalent
-match(4, mtcars$cyl) # 1-indexed
-match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
-
-# If `x` contains multiple values, you are returned the indices of the first 
-# match for each value.
-match(c(4, 6, 8), mtcars$cyl)
-match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
-
-# Return type matches type of `x`
-is_in(c(4, 6, 8), mtcars$cyl) # returns vector
-is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
-is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
-is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 1ca3d661880..974d4286f59 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -90,55 +90,6 @@ can accelerate queries that only touch some partitions (files). Call
 \code{open_dataset()} to point to a directory of data files and return a
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
-\examples{
-\dontshow{if (arrow_with_dataset() & arrow_with_parquet() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# Set up directory for examples
-tf <- tempfile()
-dir.create(tf)
-on.exit(unlink(tf))
-
-data <- dplyr::group_by(mtcars, cyl)
-write_dataset(data, tf)
-
-# You can specify a directory containing the files for your dataset and
-# open_dataset will scan all files in your directory.
-open_dataset(tf)
-
-# You can also supply a vector of paths
-open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf,"cyl=8/part-2.parquet")))
-
-## You must specify the file format if using a format other than parquet.
-tf2 <- tempfile()
-dir.create(tf2)
-on.exit(unlink(tf2))
-write_dataset(data, tf2, format = "ipc")
-# This line will results in errors when you try to work with the data
-\dontrun{open_dataset(tf2)}
-# This line will work
-open_dataset(tf2, format = "ipc") 
-
-## You can specify file partitioning to include it as a field in your dataset
-# Create a temporary directory and write example dataset
-tf3 <- tempfile()
-dir.create(tf3)
-on.exit(unlink(tf3))
-write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
-
-# View files - you can see the partitioning means that files have been written 
-# to folders based on Month/Day values
-list.files(tf3, recursive = TRUE)
-
-# With no partitioning specified, dataset contains all files but doesn't include
-# directory names as field names
-open_dataset(tf3)
-
-# Now that partitioning has been specified, your dataset contains columns for Month and Day
-open_dataset(tf3, partitioning = c("Month", "Day"))
-
-# If you want to specify the data types for your fields, you can pass in a Schema
-open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \code{vignette("dataset", package = "arrow")}
 }
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index 71394e547c9..d9c80306931 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -205,14 +205,3 @@ Note that if you are specifying column names, whether by \code{schema} or
 to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
 }
 
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-  tf <- tempfile()
-  on.exit(unlink(tf))
-  write.csv(mtcars, file = tf)
-  df <- read_csv_arrow(tf)
-  dim(df)
-  # Can select columns
-  df <- read_csv_arrow(tf, col_select = starts_with("d"))
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index 95f4d1d12c6..fa18e3f7844 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -34,17 +34,6 @@ and to make sharing data across data analysis languages easy.
 This function reads both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-write_feather(mtcars, tf)
-df <- read_feather(tf)
-dim(df)
-# Can select columns
-df <- read_feather(tf, col_select = starts_with("d"))
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data.
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 4806b4ad1f0..476c99fe4de 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -38,15 +38,3 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
 \description{
 Using \link{JsonTableReader}
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-  tf <- tempfile()
-  on.exit(unlink(tf))
-  writeLines('
-    { "hello": 3.5, "world": false, "yo": "thing" }
-    { "hello": 3.25, "world": null }
-    { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
-  df <- read_json_arrow(tf)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index 056e8644747..ffb2cf7109f 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -39,12 +39,3 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is
 '\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format.
 This function enables you to read Parquet files into R.
 }
-\examples{
-\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-write_parquet(mtcars, tf)
-df <- read_parquet(tf, col_select = starts_with("d"))
-head(df)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd
index 95a086deae5..78d527a56c4 100644
--- a/r/man/s3_bucket.Rd
+++ b/r/man/s3_bucket.Rd
@@ -21,8 +21,3 @@ are authorized to access the bucket's contents.
 that automatically detects the bucket's AWS region and holding onto the its
 relative path.
 }
-\examples{
-\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-bucket <- s3_bucket("ursa-labs-taxi-data")
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd
index 6be65b2b76b..c273a7520d5 100644
--- a/r/man/to_duckdb.Rd
+++ b/r/man/to_duckdb.Rd
@@ -39,22 +39,3 @@ that starts with an Arrow object to use DuckDB to calculate the summarization
 step. Internally, this calls \code{to_duckdb()} with all of the default argument
 values.
 }
-\examples{
-\dontshow{if (arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-library(dplyr)
-
-ds <- InMemoryDataset$create(mtcars)
-
-ds \%>\%
-  filter(mpg < 30) \%>\%
-  to_duckdb() \%>\%
-  group_by(cyl) \%>\%
-  summarize(mean_mpg = mean(mpg, na.rm = TRUE))
-
-# the same query can be simplified using .engine = "duckdb"
-ds \%>\%
-  filter(mpg < 30) \%>\%
-  group_by(cyl) \%>\%
-  summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/type.Rd b/r/man/type.Rd
index d55bbe24bd5..2f85e4a6ac6 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -15,13 +15,3 @@ an arrow logical type
 \description{
 infer the arrow Array type from an R vector
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-type(1:10)
-type(1L:10L)
-type(c(1, 1.5, 2))
-type(c("A", "B", "C"))
-type(mtcars)
-type(Sys.Date())
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd
index 50c80c2dda9..709e33a5e74 100644
--- a/r/man/unify_schemas.Rd
+++ b/r/man/unify_schemas.Rd
@@ -18,10 +18,3 @@ A \code{Schema} with the union of fields contained in the inputs, or
 \description{
 Combine and harmonize schemas
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-a <- schema(b = double(), c = bool())
-z <- schema(b = double(), k = utf8())
-unify_schemas(a, z)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index 6ef77cd4727..139af8edc63 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -16,9 +16,3 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
 \description{
 This function tabulates the values in the array and returns a table of counts.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-cyl_vals <- Array$create(mtcars$cyl)
-value_counts(cyl_vals)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
index 55a239ca998..d6df2bcd08e 100644
--- a/r/man/write_csv_arrow.Rd
+++ b/r/man/write_csv_arrow.Rd
@@ -23,10 +23,3 @@ the stream will be left open.
 \description{
 Write CSV file to disk
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-write_csv_arrow(mtcars, tf)
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index c6273b61be8..0cc8c591369 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -47,13 +47,6 @@ and to make sharing data across data analysis languages easy.
 This function writes both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-write_feather(mtcars, tf)
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data.
 
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 888d947eb99..4f742ce9178 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -31,13 +31,6 @@ with some nonstandard behavior, is deprecated. You should explicitly choose
 the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
-\examples{
-\dontshow{if (arrow_available() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf <- tempfile()
-on.exit(unlink(tf))
-write_ipc_stream(mtcars, tf)
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
 serialize data to a buffer.
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index d7147f7e8e6..823a6038e84 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -94,15 +94,3 @@ The default "snappy" is used if available, otherwise "uncompressed". To
 disable compression, set \code{compression = "uncompressed"}.
 Note that "uncompressed" columns may still have dictionary encoding.
 }
-\examples{
-\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-tf1 <- tempfile(fileext = ".parquet")
-write_parquet(data.frame(x = 1:5), tf1)
-
-# using compression
-if (codec_is_available("gzip")) {
-  tf2 <- tempfile(fileext = ".gz.parquet")
-  write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
-}
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index 1f507e384c3..46af09a96e8 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -20,10 +20,3 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give
 This function wraps those so that you can serialize data to a buffer and
 access that buffer as a \code{raw} vector in R.
 }
-\examples{
-\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# The default format is "stream"
-write_to_raw(mtcars)
-write_to_raw(mtcars, format = "file")
-\dontshow{\}) # examplesIf}
-}
diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R
index b4b1bac4d7b..e3d1d9ab162 100644
--- a/r/tests/testthat/helper-data.R
+++ b/r/tests/testthat/helper-data.R
@@ -109,15 +109,15 @@ verses <- list(
 
 make_big_string <- function() {
   # This creates a character vector that would exceed the capacity of BinaryArray
-  rep(purrr::map_chr(2047:2050, ~paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18)
+  rep(purrr::map_chr(2047:2050, ~ paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18)
 }
 
 make_random_string_of_size <- function(size = 1) {
-  purrr::map_chr(1000*size, ~paste(sample(letters, ., replace = TRUE), collapse = ""))
+  purrr::map_chr(1000 * size, ~ paste(sample(letters, ., replace = TRUE), collapse = ""))
 }
 
 make_string_of_size <- function(size = 1) {
-  paste(rep(letters, length = 1000*size), collapse = "")
+  paste(rep(letters, length = 1000 * size), collapse = "")
 }
 
 example_with_extra_metadata <- example_with_metadata
@@ -140,17 +140,18 @@ example_with_logical_factors <- tibble::tibble(
 # sort order. The Arrow C++ library orders strings lexicographically as byte
 # strings. The order of a string array sorted by Arrow will not match the order
 # of an equivalent character vector sorted by R unless you set the R collation
-# locale to "C" by running:
-#   Sys.setlocale("LC_COLLATE", "C")
+# locale to "C" by running:   Sys.setlocale("LC_COLLATE", "C")
 # These test scripts set that, but if you are running individual tests you might
 # need to set it manually. When finished, you can restore the default
-# collation locale by running:
-#   Sys.setlocale("LC_COLLATE")
+# collation locale by running:   Sys.setlocale("LC_COLLATE")
 # In the future, the string collation locale used by the Arrow C++ library might
 # be configurable (ARROW-12046).
 example_data_for_sorting <- tibble::tibble(
   int = c(-.Machine$integer.max, -101L, -100L, 0L, 0L, 1L, 100L, 1000L, .Machine$integer.max, NA_integer_),
-  dbl = c(-Inf, -.Machine$double.xmax, -.Machine$double.xmin, 0, .Machine$double.xmin, pi, .Machine$double.xmax, Inf, NaN, NA_real_),
+  dbl = c(
+    -Inf, -.Machine$double.xmax, -.Machine$double.xmin, 0, .Machine$double.xmin,
+    pi, .Machine$double.xmax, Inf, NaN, NA_real_
+  ),
   chr = c("", "", "\"", "&", "ABC", "NULL", "a", "abc", "zzz", NA_character_),
   lgl = c(rep(FALSE, 4L), rep(TRUE, 5L), NA),
   dttm = lubridate::ymd_hms(c(
diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R
index c4dab9ace45..72f07f32c96 100644
--- a/r/tests/testthat/helper-expectation.R
+++ b/r/tests/testthat/helper-expectation.R
@@ -36,7 +36,7 @@ expect_r6_class <- function(object, class) {
 expect_equivalent <- function(object, expected, ...) {
   # HACK: dplyr includes an all.equal.tbl_df method that is causing failures.
   # They look spurious, like:
-  # `Can't join on 'b' x 'b' because of incompatible types (tbl_df/tbl/data.frame / tbl_df/tbl/data.frame)`
+  # `Can't join on 'b' x 'b' because of incompatible types (tbl_df/tbl/data.frame / tbl_df/tbl/data.frame)` # nolint
   if (tibble::is_tibble(object)) {
     class(object) <- "data.frame"
   }
@@ -57,7 +57,7 @@ expect_type_equal <- function(object, expected, ...) {
   expect_equal(object, expected, ..., label = object$ToString(), expected.label = expected$ToString())
 }
 
-expect_match_arg_error <- function(object, values=c()) {
+expect_match_arg_error <- function(object, values = c()) {
   expect_error(object, paste0("'arg' .*", paste(dQuote(values), collapse = ", ")))
 }
 
@@ -129,7 +129,7 @@ expect_dplyr_equal <- function(expr,
 }
 
 expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its start
-                               tbl,  # A tbl/df as reference, will make RB/Table with
+                               tbl, # A tbl/df as reference, will make RB/Table with
                                ...) {
   # ensure we have supplied tbl
   force(tbl)
@@ -137,7 +137,7 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
   expr <- rlang::enquo(expr)
   msg <- tryCatch(
     rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = tbl))),
-    error = function (e) {
+    error = function(e) {
       msg <- conditionMessage(e)
 
       # The error here is of the form:
@@ -180,11 +180,11 @@ expect_dplyr_error <- function(expr, # A dplyr pipeline with `input` as its star
 }
 
 expect_vector_equal <- function(expr, # A vectorized R expression containing `input` as its input
-                               vec,  # A vector as reference, will make Array/ChunkedArray with
-                               skip_array = NULL, # Msg, if should skip Array test
-                               skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
-                               ignore_attr = FALSE, # ignore attributes?
-                               ...) {
+                                vec, # A vector as reference, will make Array/ChunkedArray with
+                                skip_array = NULL, # Msg, if should skip Array test
+                                skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
+                                ignore_attr = FALSE, # ignore attributes?
+                                ...) {
   expr <- rlang::enquo(expr)
   expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec)))
   skip_msg <- NULL
@@ -218,16 +218,15 @@ expect_vector_equal <- function(expr, # A vectorized R expression containing `in
 }
 
 expect_vector_error <- function(expr, # A vectorized R expression containing `input` as its input
-                                vec,  # A vector as reference, will make Array/ChunkedArray with
+                                vec, # A vector as reference, will make Array/ChunkedArray with
                                 skip_array = NULL, # Msg, if should skip Array test
                                 skip_chunked_array = NULL, # Msg, if should skip ChunkedArray test
                                 ...) {
-
   expr <- rlang::enquo(expr)
 
   msg <- tryCatch(
     rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(input = vec))),
-    error = function (e) {
+    error = function(e) {
       msg <- conditionMessage(e)
 
       pattern <- i18ize_error_messages()
@@ -244,7 +243,6 @@ expect_vector_error <- function(expr, # A vectorized R expression containing `in
   skip_msg <- NULL
 
   if (is.null(skip_array)) {
-
     expect_error(
       rlang::eval_tidy(
         expr,
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index b1c7d66bec8..906963e38d1 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -58,7 +58,7 @@ skip_if_not_running_large_memory_tests <- function() {
 
 skip_on_valgrind <- function() {
   # This does not actually skip on valgrind because we can't exactly detect it.
-  # Instead, it skips on CRAN when the OS is linux + and the R version is development 
+  # Instead, it skips on CRAN when the OS is linux + and the R version is development
   # (which is where valgrind is run as of this code)
   linux_dev <- identical(tolower(Sys.info()[["sysname"]]), "linux") &&
     grepl("devel", R.version.string)
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 305f5a34634..a2fd7bfec86 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -132,10 +132,11 @@ test_that("Slice() and RangeEquals()", {
   expect_error(x$RangeEquals(y, 10, NA), "'end_idx' cannot be NA")
   expect_error(x$RangeEquals(y, 10, 24, NA), "'other_start_idx' cannot be NA")
   expect_error(x$RangeEquals(y, "ten", 24))
-  # TODO (if anyone uses RangeEquals)
-  # expect_error(x$RangeEquals(y, 10, 2400, 0)) # does not error
-  # expect_error(x$RangeEquals(y, 1000, 24, 0)) # does not error
-  # expect_error(x$RangeEquals(y, 10, 24, 1000)) # does not error
+
+  skip("TODO: (if anyone uses RangeEquals)")
+  expect_error(x$RangeEquals(y, 10, 2400, 0)) # does not error
+  expect_error(x$RangeEquals(y, 1000, 24, 0)) # does not error
+  expect_error(x$RangeEquals(y, 10, 24, 1000)) # does not error
 })
 
 test_that("Double Array", {
@@ -264,7 +265,7 @@ test_that("array supports POSIXct (ARROW-3340)", {
 test_that("array supports POSIXct without timezone", {
   # Make sure timezone is not set
   withr::with_envvar(c(TZ = ""), {
-    times <- strptime("2019-02-03 12:34:56", format="%Y-%m-%d %H:%M:%S") + 1:10
+    times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
     expect_array_roundtrip(times, timestamp("us", ""))
 
     # Also test the INTSXP code path
@@ -475,7 +476,10 @@ test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", {
   expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8()))
   expect_equivalent(as.vector(a), df)
 
-  df <- structure(list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")), class = "data.frame", row.names = c(NA, -1L))
+  df <- structure(
+    list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")),
+    class = "data.frame", row.names = c(NA, -1L)
+  )
   a <- Array$create(df)
   expect_type_equal(a$type, struct(col = list_of(list_of(list_of(float64())))))
   expect_equivalent(as.vector(a), df)
@@ -498,10 +502,16 @@ test_that("Array$create() can handle data frame with custom struct type (not inf
   expect_type_equal(a$type, type)
 
   type <- struct(x = float64(), y = int16(), z = int32())
-  expect_error(Array$create(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame")
+  expect_error(
+    Array$create(df, type = type),
+    regexp = "Number of fields in struct.* incompatible with number of columns in the data frame"
+  )
 
   type <- struct(y = int16(), x = float64())
-  expect_error(Array$create(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame")
+  expect_error(
+    Array$create(df, type = type),
+    regexp = "Field name in position.*does not match the name of the column of the data frame"
+  )
 
   type <- struct(x = float64(), y = utf8())
   expect_error(Array$create(df, type = type), regexp = "Invalid")
@@ -566,29 +576,57 @@ test_that("Array$create() handles vector -> large list arrays", {
   expect_array_roundtrip(list(NA), large_list_of(bool()), as = large_list_of(bool()))
   expect_array_roundtrip(list(logical(0)), large_list_of(bool()), as = large_list_of(bool()))
   expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), large_list_of(bool()), as = large_list_of(bool()))
-  expect_array_roundtrip(list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)), large_list_of(bool()), as = large_list_of(bool()))
+  expect_array_roundtrip(
+    list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)),
+    large_list_of(bool()),
+    as = large_list_of(bool())
+  )
 
   # integer
   expect_array_roundtrip(list(NA_integer_), large_list_of(int32()), as = large_list_of(int32()))
   expect_array_roundtrip(list(integer(0)), large_list_of(int32()), as = large_list_of(int32()))
   expect_array_roundtrip(list(1:2, 3:4, 12:18), large_list_of(int32()), as = large_list_of(int32()))
-  expect_array_roundtrip(list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)), large_list_of(int32()), as = large_list_of(int32()))
+  expect_array_roundtrip(
+    list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)),
+    large_list_of(int32()),
+    as = large_list_of(int32())
+  )
 
   # numeric
   expect_array_roundtrip(list(NA_real_), large_list_of(float64()), as = large_list_of(float64()))
   expect_array_roundtrip(list(numeric(0)), large_list_of(float64()), as = large_list_of(float64()))
   expect_array_roundtrip(list(1, c(2, 3), 4), large_list_of(float64()), as = large_list_of(float64()))
-  expect_array_roundtrip(list(1, numeric(0), c(2, 3, NA_real_), 4), large_list_of(float64()), as = large_list_of(float64()))
+  expect_array_roundtrip(
+    list(1, numeric(0), c(2, 3, NA_real_), 4),
+    large_list_of(float64()),
+    as = large_list_of(float64())
+  )
 
   # character
   expect_array_roundtrip(list(NA_character_), large_list_of(utf8()), as = large_list_of(utf8()))
   expect_array_roundtrip(list(character(0)), large_list_of(utf8()), as = large_list_of(utf8()))
-  expect_array_roundtrip(list("itsy", c("bitsy", "spider"), c("is")), large_list_of(utf8()), as = large_list_of(utf8()))
-  expect_array_roundtrip(list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")), large_list_of(utf8()), as = large_list_of(utf8()))
+  expect_array_roundtrip(
+    list("itsy", c("bitsy", "spider"), c("is")),
+    large_list_of(utf8()),
+    as = large_list_of(utf8())
+  )
+  expect_array_roundtrip(
+    list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")),
+    large_list_of(utf8()),
+    as = large_list_of(utf8())
+  )
 
   # factor
-  expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), large_list_of(dictionary(int8(), utf8())), as = large_list_of(dictionary(int8(), utf8())))
-  expect_array_roundtrip(list(factor(NA, levels = c("a", "b"))), large_list_of(dictionary(int8(), utf8())), as = large_list_of(dictionary(int8(), utf8())))
+  expect_array_roundtrip(
+    list(factor(c("b", "a"), levels = c("a", "b"))),
+    large_list_of(dictionary(int8(), utf8())),
+    as = large_list_of(dictionary(int8(), utf8()))
+  )
+  expect_array_roundtrip(
+    list(factor(NA, levels = c("a", "b"))),
+    large_list_of(dictionary(int8(), utf8())),
+    as = large_list_of(dictionary(int8(), utf8()))
+  )
 
   # struct
   expect_array_roundtrip(
@@ -609,25 +647,53 @@ test_that("Array$create() handles vector -> fixed size list arrays", {
 
   # logical
   expect_array_roundtrip(list(NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
-  expect_array_roundtrip(list(c(TRUE, FALSE), c(FALSE, TRUE)), fixed_size_list_of(bool(), 2L), as = fixed_size_list_of(bool(), 2L))
-  expect_array_roundtrip(list(c(TRUE), c(FALSE), NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
+  expect_array_roundtrip(
+    list(c(TRUE, FALSE), c(FALSE, TRUE)),
+    fixed_size_list_of(bool(), 2L),
+    as = fixed_size_list_of(bool(), 2L)
+  )
+  expect_array_roundtrip(
+    list(c(TRUE), c(FALSE), NA),
+    fixed_size_list_of(bool(), 1L),
+    as = fixed_size_list_of(bool(), 1L)
+  )
 
   # integer
   expect_array_roundtrip(list(NA_integer_), fixed_size_list_of(int32(), 1L), as = fixed_size_list_of(int32(), 1L))
   expect_array_roundtrip(list(1:2, 3:4, 11:12), fixed_size_list_of(int32(), 2L), as = fixed_size_list_of(int32(), 2L))
-  expect_array_roundtrip(list(c(1:2), c(NA_integer_, 3L)), fixed_size_list_of(int32(), 2L), as = fixed_size_list_of(int32(), 2L))
+  expect_array_roundtrip(
+    list(c(1:2), c(NA_integer_, 3L)),
+    fixed_size_list_of(int32(), 2L),
+    as = fixed_size_list_of(int32(), 2L)
+  )
 
   # numeric
   expect_array_roundtrip(list(NA_real_), fixed_size_list_of(float64(), 1L), as = fixed_size_list_of(float64(), 1L))
-  expect_array_roundtrip(list(c(1,2), c(2, 3)), fixed_size_list_of(float64(), 2L), as = fixed_size_list_of(float64(), 2L))
-  expect_array_roundtrip(list(c(1,2), c(NA_real_, 4)), fixed_size_list_of(float64(), 2L), as = fixed_size_list_of(float64(), 2L))
+  expect_array_roundtrip(
+    list(c(1, 2), c(2, 3)),
+    fixed_size_list_of(float64(), 2L),
+    as = fixed_size_list_of(float64(), 2L)
+  )
+  expect_array_roundtrip(
+    list(c(1, 2), c(NA_real_, 4)),
+    fixed_size_list_of(float64(), 2L),
+    as = fixed_size_list_of(float64(), 2L)
+  )
 
   # character
   expect_array_roundtrip(list(NA_character_), fixed_size_list_of(utf8(), 1L), as = fixed_size_list_of(utf8(), 1L))
-  expect_array_roundtrip(list(c("itsy", "bitsy"), c("spider", "is"), c(NA_character_, NA_character_), c("", "")), fixed_size_list_of(utf8(), 2L), as = fixed_size_list_of(utf8(), 2L))
+  expect_array_roundtrip(
+    list(c("itsy", "bitsy"), c("spider", "is"), c(NA_character_, NA_character_), c("", "")),
+    fixed_size_list_of(utf8(), 2L),
+    as = fixed_size_list_of(utf8(), 2L)
+  )
 
   # factor
-  expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), fixed_size_list_of(dictionary(int8(), utf8()), 2L), as = fixed_size_list_of(dictionary(int8(), utf8()), 2L))
+  expect_array_roundtrip(
+    list(factor(c("b", "a"), levels = c("a", "b"))),
+    fixed_size_list_of(dictionary(int8(), utf8()), 2L),
+    as = fixed_size_list_of(dictionary(int8(), utf8()), 2L)
+  )
 
   # struct
   expect_array_roundtrip(
@@ -654,8 +720,10 @@ test_that("Handling string data with embedded nuls", {
     as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
     as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
     as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
-    as.raw(c(0x74, 0x76))),
-    class = c("arrow_binary", "vctrs_vctr", "list"))
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
   expect_error(
     rawToChar(raws[[3]]),
     "embedded nul in string: 'ma\\0n'", # See?
@@ -664,7 +732,10 @@ test_that("Handling string data with embedded nuls", {
   array_with_nul <- Array$create(raws)$cast(utf8())
   expect_error(
     as.vector(array_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+      "to R, set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 681406caf64..dc327c07981 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -52,27 +52,27 @@ test_that("RecordBatch", {
   expect_error(batch$column_name("one"))
 
   col_int <- batch$column(0)
-  expect_true(inherits(col_int, 'Array'))
+  expect_true(inherits(col_int, "Array"))
   expect_equal(col_int$as_vector(), tbl$int)
   expect_equal(col_int$type, int32())
 
   col_dbl <- batch$column(1)
-  expect_true(inherits(col_dbl, 'Array'))
+  expect_true(inherits(col_dbl, "Array"))
   expect_equal(col_dbl$as_vector(), tbl$dbl)
   expect_equal(col_dbl$type, float64())
 
   col_lgl <- batch$column(2)
-  expect_true(inherits(col_dbl, 'Array'))
+  expect_true(inherits(col_dbl, "Array"))
   expect_equal(col_lgl$as_vector(), tbl$lgl)
   expect_equal(col_lgl$type, boolean())
 
   col_chr <- batch$column(3)
-  expect_true(inherits(col_chr, 'Array'))
+  expect_true(inherits(col_chr, "Array"))
   expect_equal(col_chr$as_vector(), tbl$chr)
   expect_equal(col_chr$type, utf8())
 
   col_fct <- batch$column(4)
-  expect_true(inherits(col_fct, 'Array'))
+  expect_true(inherits(col_fct, "Array"))
   expect_equal(col_fct$as_vector(), tbl$fct)
   expect_equal(col_fct$type, dictionary(int8(), utf8()))
 
@@ -89,7 +89,7 @@ test_that("RecordBatch", {
     schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
   )
   expect_equal(batch2$column(0), batch$column(1))
-  expect_data_frame(batch2, tbl[,-1])
+  expect_data_frame(batch2, tbl[, -1])
 
   # input validation
   expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
@@ -109,10 +109,10 @@ test_that("RecordBatch S3 methods", {
 
 test_that("RecordBatch$Slice", {
   batch3 <- batch$Slice(5)
-  expect_data_frame(batch3, tbl[6:10,])
+  expect_data_frame(batch3, tbl[6:10, ])
 
   batch4 <- batch$Slice(5, 2)
-  expect_data_frame(batch4, tbl[6:7,])
+  expect_data_frame(batch4, tbl[6:7, ])
 
   # Input validation
   expect_error(batch$Slice("ten"))
@@ -131,25 +131,25 @@ test_that("RecordBatch$Slice", {
 })
 
 test_that("[ on RecordBatch", {
-  expect_data_frame(batch[6:7,], tbl[6:7,])
-  expect_data_frame(batch[c(6, 7),], tbl[6:7,])
+  expect_data_frame(batch[6:7, ], tbl[6:7, ])
+  expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
   expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
   expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
   expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
   expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
   expect_data_frame(
-    batch[rep(c(FALSE, TRUE), 5),],
-    tbl[c(2, 4, 6, 8, 10),]
+    batch[rep(c(FALSE, TRUE), 5), ],
+    tbl[c(2, 4, 6, 8, 10), ]
   )
   # bool Array
-  expect_data_frame(batch[batch$lgl,], tbl[tbl$lgl,])
+  expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
   # int Array
   expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
 
   # input validation
   expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
-  expect_error(batch[, c(6, NA)], 'Column indices cannot be NA')
-  expect_error(batch[, c(2, -2)], 'Invalid column index')
+  expect_error(batch[, c(6, NA)], "Column indices cannot be NA")
+  expect_error(batch[, c(2, -2)], "Invalid column index")
 })
 
 test_that("[[ and $ on RecordBatch", {
@@ -161,7 +161,7 @@ test_that("[[ and $ on RecordBatch", {
   expect_error(batch[[c(4, 3)]])
   expect_error(batch[[NA]], "'i' must be character or numeric, not logical")
   expect_error(batch[[NULL]], "'i' must be character or numeric, not NULL")
-  expect_error(batch[[c("asdf", "jkl;")]], 'name is not a string', fixed = TRUE)
+  expect_error(batch[[c("asdf", "jkl;")]], "name is not a string", fixed = TRUE)
 })
 
 test_that("[[<- assignment", {
@@ -325,7 +325,7 @@ test_that("record_batch(schema=) does some basic consistency checking of the sch
 })
 
 test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
-  batch <- record_batch(x = 1:10, y  = 1:10)
+  batch <- record_batch(x = 1:10, y = 1:10)
   expect_equal(dim(batch), c(10L, 2L))
   expect_equal(nrow(batch), 10L)
 })
@@ -411,7 +411,7 @@ test_that("record_batch() only auto splice data frames", {
 
 test_that("record_batch() handles null type (ARROW-7064)", {
   batch <- record_batch(a = 1:10, n = vctrs::unspecified(10))
-  expect_equivalent(batch$schema,  schema(a = int32(), n = null()))
+  expect_equivalent(batch$schema, schema(a = int32(), n = null()))
 })
 
 test_that("record_batch() scalar recycling with vectors", {
@@ -422,7 +422,6 @@ test_that("record_batch() scalar recycling with vectors", {
 })
 
 test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
-
   expect_data_frame(
     record_batch(a = Array$create(1:10), b = Scalar$create(5)),
     tibble::tibble(a = 1:10, b = 5)
@@ -437,7 +436,6 @@ test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArra
     record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-
 })
 
 test_that("record_batch() no recycling with tibbles", {
@@ -482,8 +480,8 @@ test_that("RecordBatch$Equals(check_metadata)", {
   expect_true(rb1$Equals(rb2))
   expect_false(rb1$Equals(rb2, check_metadata = TRUE))
 
-  expect_failure(expect_equal(rb1, rb2))  # expect_equal has check_metadata=TRUE
-  expect_equivalent(rb1, rb2)  # expect_equivalent has check_metadata=FALSE
+  expect_failure(expect_equal(rb1, rb2)) # expect_equal has check_metadata=TRUE
+  expect_equivalent(rb1, rb2) # expect_equivalent has check_metadata=FALSE
 
   expect_false(rb1$Equals(24)) # Not a RecordBatch
 })
@@ -511,13 +509,18 @@ test_that("Handling string data with embedded nuls", {
     as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
     as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
     as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
-    as.raw(c(0x74, 0x76))),
-    class = c("arrow_binary", "vctrs_vctr", "list"))
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
   batch_with_nul <- record_batch(a = 1:5, b = raws)
   batch_with_nul$b <- batch_with_nul$b$cast(utf8())
   expect_error(
     as.data.frame(batch_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
@@ -549,11 +552,9 @@ test_that("ARROW-11769 - grouping preserved in record batch creation", {
       dplyr::group_vars(),
     c("fct", "fct2")
   )
-
 })
 
 test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
-
   tbl <- tibble::tibble(
     int = 1:10,
     fct = factor(rep(c("A", "B"), 5)),
@@ -563,12 +564,11 @@ test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
   rb <- record_batch(!!!tbl)
 
   expect_identical(length(rb), 3L)
-
 })
 
 test_that("RecordBatchReader to C-interface", {
   skip_if_not_available("dataset")
-  
+
   tab <- Table$create(example_data)
 
   # export the RecordBatchReader via the C-interface
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 9a40e40edf4..00ba4036164 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -70,7 +70,7 @@ test_that("Table S3 methods", {
 })
 
 test_that("Table $column and $field", {
-  tab <- Table$create(x = 1:10, y  = 1:10)
+  tab <- Table$create(x = 1:10, y = 1:10)
 
   expect_equal(tab$field(0), field("x", int32()))
 
@@ -100,26 +100,26 @@ test_that("[, [[, $ for Table", {
 
   expect_identical(names(tab), names(tbl))
 
-  expect_data_frame(tab[6:7,], tbl[6:7,])
+  expect_data_frame(tab[6:7, ], tbl[6:7, ])
   expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
   expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
   expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr)
   # Take within a single chunk
   expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
-  expect_data_frame(tab[rep(c(FALSE, TRUE), 5),], tbl[c(2, 4, 6, 8, 10),])
+  expect_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
   # bool ChunkedArray (with one chunk)
-  expect_data_frame(tab[tab$lgl,], tbl[tbl$lgl,])
+  expect_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
   # ChunkedArray with multiple chunks
   c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
   c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE)
   ca <- ChunkedArray$create(c1, c2)
-  expect_data_frame(tab[ca,], tbl[c(1, 3, 4, 8, 9),])
+  expect_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
   # int Array
   expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
   # ChunkedArray
   expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
   # Expression
-  expect_data_frame(tab[tab$int > 6,], tbl[tbl$int > 6,])
+  expect_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
 
   expect_as_vector(tab[["int"]], tbl$int)
   expect_as_vector(tab$int, tbl$int)
@@ -134,14 +134,14 @@ test_that("[, [[, $ for Table", {
   expect_error(tab[[c(4, 3)]])
   expect_error(tab[[NA]], "'i' must be character or numeric, not logical")
   expect_error(tab[[NULL]], "'i' must be character or numeric, not NULL")
-  expect_error(tab[[c("asdf", "jkl;")]], 'length(name) not equal to 1', fixed = TRUE)
+  expect_error(tab[[c("asdf", "jkl;")]], "length(name) not equal to 1", fixed = TRUE)
   expect_error(tab[-3:3], "Invalid column index")
-  expect_error(tab[1000],  "Invalid column index")
+  expect_error(tab[1000], "Invalid column index")
   expect_error(tab[1:1000], "Invalid column index")
 
   # input validation
   expect_error(tab[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
-  expect_error(tab[, c(6, NA)], 'Column indices cannot be NA')
+  expect_error(tab[, c(6, NA)], "Column indices cannot be NA")
 
   skip("Table with 0 cols doesn't know how many rows it should have")
   expect_data_frame(tab[0], tbl[0])
@@ -226,10 +226,10 @@ test_that("Table$Slice", {
   )
   tab <- Table$create(tbl)
   tab2 <- tab$Slice(5)
-  expect_data_frame(tab2, tbl[6:10,])
+  expect_data_frame(tab2, tbl[6:10, ])
 
   tab3 <- tab$Slice(5, 2)
-  expect_data_frame(tab3, tbl[6:7,])
+  expect_data_frame(tab3, tbl[6:7, ])
 
   # Input validation
   expect_error(tab$Slice("ten"))
@@ -334,7 +334,8 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", {
   )
   res <- as.data.frame(tab)
   expect_equal(names(res), c("a", "b", "c", "x", "y"))
-  expect_equal(res,
+  expect_equal(
+    res,
     tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10])
   )
 })
@@ -395,8 +396,10 @@ test_that("==.Table", {
 
 test_that("Table$Equals(check_metadata)", {
   tab1 <- Table$create(x = 1:2, y = c("a", "b"))
-  tab2 <- Table$create(x = 1:2, y = c("a", "b"),
-                       schema = tab1$schema$WithMetadata(list(some="metadata")))
+  tab2 <- Table$create(
+    x = 1:2, y = c("a", "b"),
+    schema = tab1$schema$WithMetadata(list(some = "metadata"))
+  )
 
   expect_r6_class(tab1, "Table")
   expect_r6_class(tab2, "Table")
@@ -408,8 +411,8 @@ test_that("Table$Equals(check_metadata)", {
   expect_true(tab1$Equals(tab2))
   expect_false(tab1$Equals(tab2, check_metadata = TRUE))
 
-  expect_failure(expect_equal(tab1, tab2))  # expect_equal has check_metadata=TRUE
-  expect_equivalent(tab1, tab2)  # expect_equivalent has check_metadata=FALSE
+  expect_failure(expect_equal(tab1, tab2)) # expect_equal has check_metadata=TRUE
+  expect_equivalent(tab1, tab2) # expect_equivalent has check_metadata=FALSE
 
   expect_false(tab1$Equals(24)) # Not a Table
 })
@@ -420,10 +423,10 @@ test_that("Table handles null type (ARROW-7064)", {
 })
 
 test_that("Can create table with specific dictionary types", {
-  fact <- example_data[,"fct"]
+  fact <- example_data[, "fct"]
   int_types <- c(int8(), int16(), int32(), int64())
   # TODO: test uint types when format allows
-  # uint_types <- c(uint8(), uint16(), uint32(), uint64())
+  # uint_types <- c(uint8(), uint16(), uint32(), uint64()) # nolint
   for (i in int_types) {
     sch <- schema(fct = dictionary(i, utf8()))
     tab <- Table$create(fact, schema = sch)
@@ -481,22 +484,20 @@ test_that("Table$create() scalar recycling with vectors", {
 })
 
 test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
-  
   expect_data_frame(
     Table$create(a = Array$create(1:10), b = Scalar$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-  
+
   expect_data_frame(
     Table$create(a = Array$create(1:10), b = Array$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-  
+
   expect_data_frame(
     Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)),
     tibble::tibble(a = 1:10, b = 5)
   )
-  
 })
 
 test_that("Table$create() no recycling with tibbles", {
@@ -507,7 +508,7 @@ test_that("Table$create() no recycling with tibbles", {
     ),
     regexp = "All input tibbles or data.frames must have the same number of rows"
   )
-  
+
   expect_error(
     Table$create(
       tibble::tibble(a = 1:10, b = 5),
@@ -533,11 +534,9 @@ test_that("ARROW-11769 - grouping preserved in table creation", {
       dplyr::group_vars(),
     c("fct", "fct2")
   )
-
 })
 
 test_that("ARROW-12729 - length returns number of columns in Table", {
-
   tbl <- tibble::tibble(
     int = 1:10,
     fct = factor(rep(c("A", "B"), 5)),
@@ -547,5 +546,4 @@ test_that("ARROW-12729 - length returns number of columns in Table", {
   tab <- Table$create(!!!tbl)
 
   expect_identical(length(tab), 3L)
-
 })
diff --git a/r/tests/testthat/test-arrow-info.R b/r/tests/testthat/test-arrow-info.R
index 3fac3f422e8..9eac6081486 100644
--- a/r/tests/testthat/test-arrow-info.R
+++ b/r/tests/testthat/test-arrow-info.R
@@ -18,6 +18,6 @@
 test_that("arrow_info()", {
   expect_s3_class(arrow_info(), "arrow_info")
   expect_output(print(arrow_info()), "Arrow package version")
-  options(arrow.foo=FALSE)
+  options(arrow.foo = FALSE)
   expect_output(print(arrow_info()), "arrow.foo")
 })
diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R
index 2ab127e4800..2259da54bab 100644
--- a/r/tests/testthat/test-arrow.R
+++ b/r/tests/testthat/test-arrow.R
@@ -50,7 +50,8 @@ r_only({
 
 test_that("arrow gracefully fails to load objects from other sessions (ARROW-10071)", {
   a <- Array$create(1:10)
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   saveRDS(a, tf)
 
   b <- readRDS(tf)
@@ -68,11 +69,11 @@ test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-100
 
   env <- new.env()
   trace(gc, print = FALSE, tracer = function() {
-          env$gc_was_called <- TRUE
-        })
+    env$gc_was_called <- TRUE
+  })
   on.exit(untrace(gc))
   # We expect this should fail because we don't have this much memory,
   # but it should gc() and retry (and fail again)
-  expect_error(BufferOutputStream$create(2 ** 60))
+  expect_error(BufferOutputStream$create(2**60))
   expect_true(env$gc_was_called)
 })
diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R
index 3cf5d91ee5f..145a21de7ff 100644
--- a/r/tests/testthat/test-backwards-compatibility.R
+++ b/r/tests/testthat/test-backwards-compatibility.R
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# nolint start
 # To write a new version of a test file for a current version:
 # write_parquet(example_with_metadata, test_path("golden-files/data-arrow_2.0.0.parquet"))
 
@@ -30,17 +31,18 @@
 # # get example data into the global env
 # write_parquet(example_with_metadata, "arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet")
 # quit()/exit
+# nolint end
 
 skip_if(getRversion() < "3.5.0", "The serialization format changed in 3.5")
 
 expect_identical_with_metadata <- function(object, expected, ..., top_level = TRUE) {
   attrs_to_keep <- c("names", "class", "row.names")
   if (!top_level) {
-      # remove not-tbl and not-data.frame attributes
-      for (attribute in names(attributes(expected))) {
-        if (attribute %in% attrs_to_keep) next
-        attributes(expected)[[attribute]] <- NULL
-      }
+    # remove not-tbl and not-data.frame attributes
+    for (attribute in names(attributes(expected))) {
+      if (attribute %in% attrs_to_keep) next
+      attributes(expected)[[attribute]] <- NULL
+    }
   }
   expect_identical(object, expected, ...)
 }
@@ -76,13 +78,15 @@ test_that("reading a known Parquet file to dataframe with 1.0.1", {
 })
 
 for (comp in c("lz4", "uncompressed", "zstd")) {
+  # nolint start
   # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_lz4.feather"), compression = "lz4")
   # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_uncompressed.feather"), compression = "uncompressed")
   # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_zstd.feather"), compression = "zstd")
+  # nolint end
   test_that("reading a known Feather file to dataframe with 2.0.0", {
     skip_if_not_available("parquet")
     skip_if_not_available(comp)
-    feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp,".feather"))
+    feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp, ".feather"))
 
     df <- read_feather(feather_file)
     expect_identical_with_metadata(df, example_with_metadata)
@@ -91,7 +95,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) {
   test_that("reading a known Feather file to dataframe with 1.0.1", {
     skip_if_not_available("parquet")
     skip_if_not_available(comp)
-    feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp,".feather"))
+    feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp, ".feather"))
 
     df <- read_feather(feather_file)
     # 1.0.1 didn't save top-level metadata, so we need to remove it.
@@ -101,7 +105,7 @@ for (comp in c("lz4", "uncompressed", "zstd")) {
   test_that("reading a known Feather file to dataframe with 0.17.0", {
     skip_if_not_available("parquet")
     skip_if_not_available(comp)
-    feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp,".feather"))
+    feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp, ".feather"))
 
     df <- read_feather(feather_file)
     # the metadata from 0.17.0 doesn't have the top level, the special class is
diff --git a/r/tests/testthat/test-buffer-reader.R b/r/tests/testthat/test-buffer-reader.R
index 3236a3a477d..865ee7d4e4b 100644
--- a/r/tests/testthat/test-buffer-reader.R
+++ b/r/tests/testthat/test-buffer-reader.R
@@ -26,8 +26,8 @@ test_that("BufferReader can be created from R objects", {
   expect_r6_class(int, "BufferReader")
   expect_r6_class(raw, "BufferReader")
 
-  expect_equal(num$GetSize(), 13*8)
-  expect_equal(int$GetSize(), 13*4)
+  expect_equal(num$GetSize(), 13 * 8)
+  expect_equal(int$GetSize(), 13 * 4)
   expect_equal(raw$GetSize(), 16)
 })
 
diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R
index 1b3ea09cb92..0d24ab02537 100644
--- a/r/tests/testthat/test-buffer.R
+++ b/r/tests/testthat/test-buffer.R
@@ -51,7 +51,7 @@ test_that("buffer buffer buffers buffers", {
 
 test_that("Other types can't be converted to Buffers", {
   expect_error(
-    buffer(data.frame(a="asdf")),
+    buffer(data.frame(a = "asdf")),
     "Cannot convert object of class data.frame to arrow::Buffer"
   )
 })
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
index f9b102c6819..3964abcb65a 100644
--- a/r/tests/testthat/test-chunked-array.R
+++ b/r/tests/testthat/test-chunked-array.R
@@ -61,7 +61,7 @@ test_that("ChunkedArray", {
   expect_equal(z$length(), 5L)
   expect_equal(z$as_vector(), c(9:10, 1:3))
 
-  expect_chunked_roundtrip(list(c(1,2,3), c(4,5,6)), float64())
+  expect_chunked_roundtrip(list(c(1, 2, 3), c(4, 5, 6)), float64())
 
   # input validation
   expect_error(x$chunk(14), "subscript out of bounds")
@@ -94,8 +94,8 @@ test_that("ChunkedArray", {
 
 test_that("print ChunkedArray", {
   verify_output(test_path("test-chunked-array.txt"), {
-    chunked_array(c(1,2,3), c(4,5,6))
-    chunked_array(1:30, c(4,5,6))
+    chunked_array(c(1, 2, 3), c(4, 5, 6))
+    chunked_array(1:30, c(4, 5, 6))
     chunked_array(1:30)
     chunked_array(factor(c("a", "b")), factor(c("c", "d")))
   })
@@ -273,14 +273,14 @@ test_that("chunked_array() uses the first ... to infer type", {
 })
 
 test_that("chunked_array() handles downcasting", {
-   a <- chunked_array(10L, 10)
-   expect_type_equal(a$type, int32())
-   expect_equal(as.vector(a), c(10L, 10L))
+  a <- chunked_array(10L, 10)
+  expect_type_equal(a$type, int32())
+  expect_equal(as.vector(a), c(10L, 10L))
 })
 
 test_that("chunked_array() makes chunks of the same type", {
   a <- chunked_array(10L, bit64::as.integer64(13), type = int64())
-  for(chunk in a$chunks) {
+  for (chunk in a$chunks) {
     expect_type_equal(chunk$type, int64())
   }
 })
@@ -407,12 +407,17 @@ test_that("Handling string data with embedded nuls", {
     as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
     as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
     as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
-    as.raw(c(0x74, 0x76))),
-    class = c("arrow_binary", "vctrs_vctr", "list"))
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
   chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())
   expect_error(
     as.vector(chunked_array_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 4dd929df0bf..eb1282e6ffb 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -99,7 +99,7 @@ test_that("mean.ChunkedArray", {
   a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
   expect_r6_class(mean(a), "Scalar")
   expect_true(is.na(as.vector(mean(a))))
-  expect_identical(as.vector(mean(a, na.rm = TRUE)), 35/13)
+  expect_identical(as.vector(mean(a, na.rm = TRUE)), 35 / 13)
 })
 
 test_that("mean.Scalar", {
@@ -223,7 +223,7 @@ test_that("quantile.Array and quantile.ChunkedArray", {
   a <- Array$create(c(0, 1, 2, 3))
   ca <- ChunkedArray$create(c(0, 1), c(2, 3))
   probs <- c(0.49, 0.51)
-  for(ad in list(a, ca)) {
+  for (ad in list(a, ca)) {
     for (type in c(int32(), uint64(), float64())) {
       expect_equal(
         quantile(ad$cast(type), probs = probs, interpolation = "linear"),
@@ -351,9 +351,8 @@ test_that("match_arrow", {
   sc <- Scalar$create(3)
   expect_equal(match_arrow(sc, tab), Scalar$create(1L))
 
-  vec <-  c(1,2)
+  vec <- c(1, 2)
   expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L)))
-
 })
 
 test_that("is_in", {
@@ -367,9 +366,8 @@ test_that("is_in", {
   sc <- Scalar$create(3)
   expect_equal(is_in(sc, tab), Scalar$create(TRUE))
 
-  vec <-  c(1,9)
+  vec <- c(1, 9)
   expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE)))
-
 })
 
 test_that("value_counts", {
@@ -388,7 +386,6 @@ test_that("value_counts", {
 })
 
 test_that("any.Array and any.ChunkedArray", {
-
   data <- c(1:10, NA, NA)
 
   expect_vector_equal(any(input > 5), data)
@@ -401,11 +398,9 @@ test_that("any.Array and any.ChunkedArray", {
   expect_vector_equal(any(input), data_logical)
   expect_vector_equal(any(input, na.rm = FALSE), data_logical)
   expect_vector_equal(any(input, na.rm = TRUE), data_logical)
-
 })
 
 test_that("all.Array and all.ChunkedArray", {
-
   data <- c(1:10, NA, NA)
 
   expect_vector_equal(all(input > 5), data)
@@ -418,7 +413,6 @@ test_that("all.Array and all.ChunkedArray", {
 
   expect_vector_equal(all(input), data_logical)
   expect_vector_equal(all(input, na.rm = TRUE), data_logical)
-
 })
 
 test_that("variance", {
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index 2586ba865b3..40a5d3c4ace 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -45,16 +45,20 @@ test_that("Subtraction", {
   a <- Array$create(c(1:4, NA_integer_))
   expect_equal(a - 3L, Array$create(c(-2:1, NA_integer_)))
 
-  expect_equal(Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)) - a,
-               Array$create(c(4.1, 4.1, 4.1, 4.1, NA_real_)))
+  expect_equal(
+    Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)) - a,
+    Array$create(c(4.1, 4.1, 4.1, 4.1, NA_real_))
+  )
 })
 
 test_that("Multiplication", {
   a <- Array$create(c(1:4, NA_integer_))
   expect_equal(a * 2L, Array$create(c(1:4 * 2L, NA_integer_)))
 
-  expect_equal((a * 0.5) * 3L,
-               Array$create(c(1.5, 3, 4.5, 6, NA_real_)))
+  expect_equal(
+    (a * 0.5) * 3L,
+    Array$create(c(1.5, 3, 4.5, 6, NA_real_))
+  )
 })
 
 test_that("Division", {
@@ -71,9 +75,11 @@ test_that("Division", {
   # the behavior of %/% matches R's (i.e. the integer of the quotient, not
   # simply dividing two integers)
   expect_equal(b / 2.2, Array$create(c(1:4 / 2.2, NA_real_)))
+  # nolint start
   # c(1:4) %/% 2.2 != c(1:4) %/% as.integer(2.2)
   # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
   # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
+  # nolint end
   expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
 
   expect_equal(a %% 2, Array$create(c(1L, 0L, 1L, 0L, NA_integer_)))
@@ -89,22 +95,22 @@ test_that("Power", {
 
   expect_equal(a^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(a^2, Array$create(c(1, 4, 9, 16, NA_real_)))
-  expect_equal(a^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(a^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
   expect_equal(a^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 
   expect_equal(b^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(b^2, Array$create(c(1, 4, 9, 16, NA_real_)))
-  expect_equal(b^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(b^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
   expect_equal(b^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 
   expect_equal(c^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(c^2, Array$create(c(1, 4, 9, 16, NA_real_)))
-  expect_equal(c^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(c^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
   expect_equal(c^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 
   expect_equal(d^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(d^2, Array$create(c(1, 4, 9, 16, NA_real_)))
-  expect_equal(d^(-1), Array$create(c(1, 1/2, 1/3, 1/4, NA_real_)))
+  expect_equal(d^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
   expect_equal(d^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 })
 
@@ -113,5 +119,5 @@ test_that("Dates casting", {
 
   skip("ARROW-11090 (date/datetime arithmetic)")
   # Error: NotImplemented: Function add_checked has no kernel matching input types (array[date32[day]], scalar[double])
-  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4 ) + 2), NA_integer_))
+  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4) + 2), NA_integer_))
 })
diff --git a/r/tests/testthat/test-compute-vector.R b/r/tests/testthat/test-compute-vector.R
index 95e93634934..345da5656bf 100644
--- a/r/tests/testthat/test-compute-vector.R
+++ b/r/tests/testthat/test-compute-vector.R
@@ -43,7 +43,7 @@ test_that("compare ops with Array", {
   expect_array_compares(Array$create(c(NA, 1:5)), 4)
   expect_array_compares(Array$create(as.numeric(c(NA, 1:5))), 4)
   expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5))))
-  expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5)), type=double()))
+  expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5)), type = double()))
 })
 
 test_that("compare ops with ChunkedArray", {
@@ -108,7 +108,7 @@ test_that("call_function validation", {
   )
   expect_error(
     call_function("filter", Array$create(1:4), 3),
-    'Argument 2 is of class numeric'
+    "Argument 2 is of class numeric"
   )
   expect_error(
     call_function("filter",
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index 2a62b8c4e34..db8bb30585b 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -62,8 +62,8 @@ test_that("read_delim_arrow parsing options: quote", {
   tf <- tempfile()
   on.exit(unlink(tf))
 
-  df <- data.frame(a=c(1, 2), b=c("'abc'", "'def'"))
-  write.table(df, sep=";", tf, row.names = FALSE, quote = FALSE)
+  df <- data.frame(a = c(1, 2), b = c("'abc'", "'def'"))
+  write.table(df, sep = ";", tf, row.names = FALSE, quote = FALSE)
   tab1 <- read_delim_arrow(tf, delim = ";", quote = "'")
 
   # Is this a problem?
@@ -71,7 +71,7 @@ test_that("read_delim_arrow parsing options: quote", {
   tab1$a <- as.numeric(tab1$a)
   expect_equivalent(
     tab1,
-    data.frame(a=c(1, 2), b=c("abc", "def"), stringsAsFactors = FALSE)
+    data.frame(a = c(1, 2), b = c("abc", "def"), stringsAsFactors = FALSE)
   )
 })
 
@@ -138,7 +138,7 @@ test_that("read_csv_arrow parsing options: na strings", {
     b = c(NA, "B", "C", NA),
     stringsAsFactors = FALSE
   )
-  write.csv(df, tf, row.names=FALSE)
+  write.csv(df, tf, row.names = FALSE)
   expect_equal(grep("NA", readLines(tf)), 2:5)
 
   tab1 <- read_csv_arrow(tf)
@@ -147,7 +147,7 @@ test_that("read_csv_arrow parsing options: na strings", {
 
   unlink(tf) # Delete and write to the same file name again
 
-  write.csv(df, tf, row.names=FALSE, na = "asdf")
+  write.csv(df, tf, row.names = FALSE, na = "asdf")
   expect_equal(grep("asdf", readLines(tf)), 2:5)
 
   tab2 <- read_csv_arrow(tf, na = "asdf")
@@ -180,7 +180,8 @@ test_that("read_csv_arrow() can detect compression from file name", {
 
 test_that("read_csv_arrow(schema=)", {
   tbl <- example_data[, "int"]
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, schema = schema(int = float64()), skip = 1)
@@ -189,7 +190,8 @@ test_that("read_csv_arrow(schema=)", {
 
 test_that("read_csv_arrow(col_types = <Schema>)", {
   tbl <- example_data[, "int"]
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, col_types = schema(int = float64()))
@@ -198,7 +200,8 @@ test_that("read_csv_arrow(col_types = <Schema>)", {
 
 test_that("read_csv_arrow(col_types=string, col_names)", {
   tbl <- example_data[, "int"]
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, col_names = "int", col_types = "d", skip = 1)
@@ -212,7 +215,8 @@ test_that("read_csv_arrow(col_types=string, col_names)", {
 
 test_that("read_csv_arrow() can read timestamps", {
   tbl <- tibble::tibble(time = as.POSIXct("2020-07-20 16:20", tz = "UTC"))
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write.csv(tbl, tf, row.names = FALSE)
 
   df <- read_csv_arrow(tf, col_types = schema(time = timestamp(timezone = "UTC")))
@@ -223,7 +227,8 @@ test_that("read_csv_arrow() can read timestamps", {
 })
 
 test_that("read_csv_arrow(timestamp_parsers=)", {
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   tbl <- tibble::tibble(time = "23/09/2020")
   write.csv(tbl, tf, row.names = FALSE)
 
@@ -236,7 +241,8 @@ test_that("read_csv_arrow(timestamp_parsers=)", {
 })
 
 test_that("Skipping columns with null()", {
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   cols <- c("dbl", "lgl", "false", "chr")
   tbl <- example_data[, cols]
   write.csv(tbl, tf, row.names = FALSE)
@@ -246,7 +252,8 @@ test_that("Skipping columns with null()", {
 })
 
 test_that("Mix of guessing and declaring types", {
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   cols <- c("dbl", "lgl", "false", "chr")
   tbl <- example_data[, cols]
   write.csv(tbl, tf, row.names = FALSE)
@@ -263,69 +270,64 @@ test_that("Write a CSV file with header", {
   tbl_out <- write_csv_arrow(tbl_no_dates, csv_file)
   expect_true(file.exists(csv_file))
   expect_identical(tbl_out, tbl_no_dates)
-  
+
   tbl_in <- read_csv_arrow(csv_file)
   expect_identical(tbl_in, tbl_no_dates)
-  
+
   skip("Doesn't yet work with date columns due to ARROW-12540")
-  
+
   tbl_out <- write_csv_arrow(tbl, csv_file)
   expect_true(file.exists(csv_file))
   expect_identical(tbl_out, tbl)
-  
+
   tbl_in <- read_csv_arrow(csv_file)
   expect_identical(tbl_in, tbl)
 })
 
 
 test_that("Write a CSV file with no header", {
-  
   tbl_out <- write_csv_arrow(tbl_no_dates, csv_file, include_header = FALSE)
   expect_true(file.exists(csv_file))
   expect_identical(tbl_out, tbl_no_dates)
   tbl_in <- read_csv_arrow(csv_file, col_names = FALSE)
-  
+
   tbl_expected <- tbl_no_dates
   names(tbl_expected) <- c("f0", "f1", "f2", "f3")
-  
+
   expect_identical(tbl_in, tbl_expected)
-  
 })
 
 test_that("Write a CSV file with different batch sizes", {
-  
   tbl_out1 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 1)
   expect_true(file.exists(csv_file))
   expect_identical(tbl_out1, tbl_no_dates)
   tbl_in1 <- read_csv_arrow(csv_file)
   expect_identical(tbl_in1, tbl_no_dates)
-  
+
   tbl_out2 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 2)
   expect_true(file.exists(csv_file))
   expect_identical(tbl_out2, tbl_no_dates)
   tbl_in2 <- read_csv_arrow(csv_file)
   expect_identical(tbl_in2, tbl_no_dates)
-  
+
   tbl_out3 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 12)
   expect_true(file.exists(csv_file))
   expect_identical(tbl_out3, tbl_no_dates)
   tbl_in3 <- read_csv_arrow(csv_file)
   expect_identical(tbl_in3, tbl_no_dates)
-  
 })
 
 test_that("Write a CSV file with invalid input type", {
-  
   bad_input <- Array$create(1:5)
   expect_error(
     write_csv_arrow(bad_input, csv_file),
     regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
-    )
+  )
 })
 
 test_that("Write a CSV file with invalid batch size", {
   expect_error(
     write_csv_arrow(tbl_no_dates, csv_file, batch_size = -1),
-    regexp = 'batch_size not greater than 0'
+    regexp = "batch_size not greater than 0"
   )
 })
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index 84c75451eaa..51d73b589c8 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -17,7 +17,7 @@
 
 context("DataType")
 
-test_that("null type works as expected",{
+test_that("null type works as expected", {
   x <- null()
   expect_equal(x$id, 0L)
   expect_equal(x$name, "null")
@@ -28,7 +28,7 @@ test_that("null type works as expected",{
   expect_equal(x$fields(), list())
 })
 
-test_that("boolean type work as expected",{
+test_that("boolean type work as expected", {
   x <- boolean()
   expect_equal(x$id, Type$BOOL)
   expect_equal(x$name, "bool")
@@ -40,7 +40,7 @@ test_that("boolean type work as expected",{
   expect_equal(x$bit_width, 1L)
 })
 
-test_that("int types works as expected",{
+test_that("int types works as expected", {
   x <- uint8()
   expect_equal(x$id, Type$UINT8)
   expect_equal(x$name, "uint8")
@@ -122,7 +122,7 @@ test_that("int types works as expected",{
   expect_equal(x$bit_width, 64L)
 })
 
-test_that("float types work as expected",{
+test_that("float types work as expected", {
   x <- float16()
   expect_equal(x$id, Type$HALF_FLOAT)
   expect_equal(x$name, "halffloat")
@@ -154,7 +154,7 @@ test_that("float types work as expected",{
   expect_equal(x$bit_width, 64L)
 })
 
-test_that("utf8 type works as expected",{
+test_that("utf8 type works as expected", {
   x <- utf8()
   expect_equal(x$id, Type$STRING)
   expect_equal(x$name, "utf8")
@@ -393,7 +393,6 @@ test_that("decimal type and validation", {
   expect_error(decimal(4, NA), '"scale" must be an integer')
 
   expect_r6_class(decimal(4, 2), "Decimal128Type")
-
 })
 
 test_that("Binary", {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index ba41bf2d921..4711cacfcd0 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -151,7 +151,7 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_
 
   expect_identical(
     ds %>%
-      filter(chr == 'a') %>%
+      filter(chr == "a") %>%
       dim(),
     c(2L, 7L)
   )
@@ -164,7 +164,7 @@ test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_
   expect_identical(
     ds %>%
       select(chr, fct, int) %>%
-      filter(chr == 'a') %>%
+      filter(chr == "a") %>%
       dim(),
     c(2L, 3L)
   )
@@ -268,8 +268,10 @@ test_that("open_dataset errors on mixed paths and URIs", {
 
 test_that("Simple interface for datasets (custom ParquetFileFormat)", {
   skip_if_not_available("parquet")
-  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()),
-                     format = FileFormat$create("parquet", dict_columns = c("chr")))
+  ds <- open_dataset(dataset_dir,
+    partitioning = schema(part = uint8()),
+    format = FileFormat$create("parquet", dict_columns = c("chr"))
+  )
   expect_type_equal(ds$schema$GetFieldByName("chr")$type, dictionary())
 })
 
@@ -384,8 +386,9 @@ test_that("CSV scan options", {
   options <- FragmentScanOptions$create("text")
   expect_equal(options$type, "csv")
   options <- FragmentScanOptions$create("csv",
-                                        null_values = c("mynull"),
-                                        strings_can_be_null = TRUE)
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
   expect_equal(options$type, "csv")
 
   dst_dir <- make_temp_dir()
@@ -403,20 +406,26 @@ test_that("CSV scan options", {
   expect_equivalent(as.data.frame(tab), tibble(chr = c("foo", NA)))
 
   # Set default convert options in CsvFileFormat
-  csv_format <- CsvFileFormat$create(null_values = c("mynull"),
-                                     strings_can_be_null = TRUE)
+  csv_format <- CsvFileFormat$create(
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
   ds <- open_dataset(dst_dir, format = csv_format)
   expect_equivalent(ds %>% collect(), tibble(chr = c("foo", NA)))
 
   # Set both parse and convert options
   df <- tibble(chr = c("foo", "mynull"), chr2 = c("bar", "baz"))
   write.table(df, dst_file, row.names = FALSE, quote = FALSE, sep = "\t")
-  ds <- open_dataset(dst_dir, format = "csv",
-                     delimiter="\t",
-                     null_values = c("mynull"),
-                     strings_can_be_null = TRUE)
-  expect_equivalent(ds %>% collect(), tibble(chr = c("foo", NA),
-                                             chr2 = c("bar", "baz")))
+  ds <- open_dataset(dst_dir,
+    format = "csv",
+    delimiter = "\t",
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
+  expect_equivalent(ds %>% collect(), tibble(
+    chr = c("foo", NA),
+    chr2 = c("bar", "baz")
+  ))
 })
 
 test_that("compressed CSV dataset", {
@@ -457,7 +466,7 @@ test_that("CSV dataset options", {
     ds %>%
       select(string = a) %>%
       collect(),
-    df1[-1,] %>%
+    df1[-1, ] %>%
       select(string = chr)
   )
 
@@ -467,7 +476,7 @@ test_that("CSV dataset options", {
     ds %>%
       select(string = foo) %>%
       collect(),
-    tibble(foo = c(c('chr'), letters[1:10]))
+    tibble(foo = c(c("chr"), letters[1:10]))
   )
 })
 
@@ -633,7 +642,7 @@ test_that("map_batches", {
     ds %>%
       filter(int > 5) %>%
       select(int, lgl) %>%
-      map_batches(~summarize(., min_int = min(int))),
+      map_batches(~ summarize(., min_int = min(int))),
     tibble(min_int = c(6L, 101L))
   )
 })
@@ -746,7 +755,7 @@ test_that("mutate()", {
     mutate(twice = int * 2)
   expect_output(
     print(mutated),
-"FileSystemDataset (query)
+    "FileSystemDataset (query)
 chr: string
 dbl: double
 int: int32
@@ -858,9 +867,9 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
   tab2 <- ds %>% collect(as_data_frame = FALSE)
   expect_is(tab2, "Table")
 
-  tab3 <-  ds %>%
+  tab3 <- ds %>%
     mutate(negint = -int) %>%
-    filter(negint > - 100) %>%
+    filter(negint > -100) %>%
     arrange(chr) %>%
     select(negint) %>%
     compute()
@@ -872,9 +881,9 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
     tibble(negint = -1:-10)
   )
 
-  tab4 <-  ds %>%
+  tab4 <- ds %>%
     mutate(negint = -int) %>%
-    filter(negint > - 100) %>%
+    filter(negint > -100) %>%
     arrange(chr) %>%
     select(negint) %>%
     collect(as_data_frame = FALSE)
@@ -898,7 +907,6 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
   expect_r6_class(tab5$.data, "InMemoryDataset")
   # ... and the mutate() was evaluated
   expect_true("negint" %in% names(tab5$.data))
-
 })
 
 test_that("head/tail", {
@@ -907,27 +915,27 @@ test_that("head/tail", {
   expect_equal(as.data.frame(head(ds)), head(df1))
   expect_equal(
     as.data.frame(head(ds, 12)),
-    rbind(df1, df2[1:2,])
+    rbind(df1, df2[1:2, ])
   )
   expect_equal(
     ds %>%
       filter(int > 6) %>%
       head() %>%
       as.data.frame(),
-    rbind(df1[7:10,], df2[1:2,])
+    rbind(df1[7:10, ], df2[1:2, ])
   )
 
   expect_equal(as.data.frame(tail(ds)), tail(df2))
   expect_equal(
     as.data.frame(tail(ds, 12)),
-    rbind(df1[9:10,], df2)
+    rbind(df1[9:10, ], df2)
   )
   expect_equal(
     ds %>%
       filter(int < 105) %>%
       tail() %>%
       as.data.frame(),
-    rbind(df1[9:10,], df2[1:4,])
+    rbind(df1[9:10, ], df2[1:4, ])
   )
 })
 
@@ -1128,25 +1136,33 @@ test_that("URI-decoding with directory partitioning", {
   write_feather(df1, file.path(dir1, "data.feather"))
 
   partitioning <- DirectoryPartitioning$create(
-    schema(date = timestamp(unit = "s"), string = utf8()))
+    schema(date = timestamp(unit = "s"), string = utf8())
+  )
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning = partitioning)
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
   schm <- factory$Inspect()
   ds <- factory$Finish(schm)
   expect_scan_result(ds, schm)
 
   partitioning <- DirectoryPartitioning$create(
     schema(date = timestamp(unit = "s"), string = utf8()),
-    segment_encoding = "none")
+    segment_encoding = "none"
+  )
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning = partitioning)
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
   schm <- factory$Inspect()
   expect_error(factory$Finish(schm), "Invalid: error parsing")
 
   partitioning_factory <- DirectoryPartitioningFactory$create(
-    c("date", "string"))
+    c("date", "string")
+  )
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning_factory)
+    fs, selector, NULL, fmt, partitioning_factory
+  )
   schm <- factory$Inspect()
   ds <- factory$Finish(schm)
   # Can't directly inspect partition expressions, so do it implicitly via scan
@@ -1159,9 +1175,12 @@ test_that("URI-decoding with directory partitioning", {
   )
 
   partitioning_factory <- DirectoryPartitioningFactory$create(
-    c("date", "string"), segment_encoding = "none")
+    c("date", "string"),
+    segment_encoding = "none"
+  )
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning_factory)
+    fs, selector, NULL, fmt, partitioning_factory
+  )
   schm <- factory$Inspect()
   ds <- factory$Finish(schm)
   expect_equal(
@@ -1183,21 +1202,28 @@ test_that("URI-decoding with hive partitioning", {
   write_feather(df1, file.path(dir1, "data.feather"))
 
   partitioning <- hive_partition(
-    date = timestamp(unit = "s"), string = utf8())
+    date = timestamp(unit = "s"), string = utf8()
+  )
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning = partitioning)
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
   ds <- factory$Finish(schm)
   expect_scan_result(ds, schm)
 
   partitioning <- hive_partition(
-    date = timestamp(unit = "s"), string = utf8(), segment_encoding = "none")
+    date = timestamp(unit = "s"), string = utf8(), segment_encoding = "none"
+  )
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning = partitioning)
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
   expect_error(factory$Finish(schm), "Invalid: error parsing")
 
   partitioning_factory <- hive_partition()
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning_factory)
+    fs, selector, NULL, fmt, partitioning_factory
+  )
   schm <- factory$Inspect()
   ds <- factory$Finish(schm)
   # Can't directly inspect partition expressions, so do it implicitly via scan
@@ -1211,7 +1237,8 @@ test_that("URI-decoding with hive partitioning", {
 
   partitioning_factory <- hive_partition(segment_encoding = "none")
   factory <- FileSystemDatasetFactory$create(
-    fs, selector, NULL, fmt, partitioning_factory)
+    fs, selector, NULL, fmt, partitioning_factory
+  )
   schm <- factory$Inspect()
   ds <- factory$Finish(schm)
   expect_equal(
@@ -1243,7 +1270,7 @@ test_that("Assembling multiple DatasetFactories with DatasetFactory", {
   expect_r6_class(ds, "UnionDataset")
   expect_r6_class(ds$schema, "Schema")
   expect_equal(names(schm), names(ds$schema))
-  expect_equivalent(map(ds$children, ~.$files), files)
+  expect_equivalent(map(ds$children, ~ .$files), files)
 
   expect_scan_result(ds, schm)
 })
@@ -1445,18 +1472,18 @@ test_that("Dataset writing: partition on null", {
   ds <- open_dataset(hive_dir)
 
   dst_dir <- tempfile()
-  partitioning = hive_partition(lgl = boolean())
+  partitioning <- hive_partition(lgl = boolean())
   write_dataset(ds, dst_dir, partitioning = partitioning)
   expect_true(dir.exists(dst_dir))
   expect_identical(dir(dst_dir), c("lgl=__HIVE_DEFAULT_PARTITION__", "lgl=false", "lgl=true"))
 
   dst_dir <- tempfile()
-  partitioning = hive_partition(lgl = boolean(), null_fallback="xyz")
+  partitioning <- hive_partition(lgl = boolean(), null_fallback = "xyz")
   write_dataset(ds, dst_dir, partitioning = partitioning)
   expect_true(dir.exists(dst_dir))
   expect_identical(dir(dst_dir), c("lgl=false", "lgl=true", "lgl=xyz"))
 
-  ds_readback <- open_dataset(dst_dir, partitioning = hive_partition(lgl = boolean(), null_fallback="xyz"))
+  ds_readback <- open_dataset(dst_dir, partitioning = hive_partition(lgl = boolean(), null_fallback = "xyz"))
 
   expect_identical(
     ds %>%
@@ -1608,8 +1635,10 @@ test_that("Writing a dataset: CSV format options", {
   dst_dir <- make_temp_dir()
   write_dataset(df, dst_dir, format = "csv", include_header = FALSE)
   expect_true(dir.exists(dst_dir))
-  new_ds <- open_dataset(dst_dir, format = "csv",
-                         column_names = c("int", "dbl", "lgl", "chr"))
+  new_ds <- open_dataset(dst_dir,
+    format = "csv",
+    column_names = c("int", "dbl", "lgl", "chr")
+  )
   expect_equivalent(new_ds %>% collect(), df)
 })
 
diff --git a/r/tests/testthat/test-dplyr-arrange.R b/r/tests/testthat/test-dplyr-arrange.R
index 5131653146b..fc24df58ca7 100644
--- a/r/tests/testthat/test-dplyr-arrange.R
+++ b/r/tests/testthat/test-dplyr-arrange.R
@@ -56,7 +56,7 @@ test_that("arrange() on integer, double, and character columns", {
   )
   expect_dplyr_equal(
     input %>%
-      mutate(zzz = int + dbl,) %>%
+      mutate(zzz = int + dbl, ) %>%
       arrange(zzz, chr) %>%
       collect(),
     tbl
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index f070a0150e9..e56ee4be462 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -25,8 +25,8 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
-tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl))
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl)) # nolint
 
 test_that("filter() on is.na()", {
   expect_dplyr_equal(
@@ -186,7 +186,7 @@ test_that("Negative scalar values", {
       filter(some_negative %in% -1) %>%
       collect(),
     tbl
-    )
+  )
   expect_dplyr_equal(
     input %>%
       filter(int == -some_negative) %>%
@@ -217,7 +217,7 @@ test_that("filter() with between()", {
       collect(),
     tbl %>%
       filter(dbl >= int, dbl <= dbl2)
-    )
+  )
 
   expect_error(
     tbl %>%
@@ -239,7 +239,6 @@ test_that("filter() with between()", {
       filter(between(chr, 1, 2)) %>%
       collect()
   )
-
 })
 
 test_that("filter() with string ops", {
@@ -320,14 +319,14 @@ test_that("Filtering with unsupported functions", {
       filter(int > 2, pnorm(dbl) > .99) %>%
       collect(),
     tbl,
-    warning = 'Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R'
+    warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R"
   )
   expect_dplyr_equal(
     input %>%
       filter(
         nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
-        int > 2,                                         # good
-        pnorm(dbl) > .99                                 # bad, opaque
+        int > 2, # good
+        pnorm(dbl) > .99 # bad, opaque
       ) %>%
       collect(),
     tbl,
diff --git a/r/tests/testthat/test-dplyr-lubridate.R b/r/tests/testthat/test-dplyr-lubridate.R
index 2d9cfe6ea52..cedbe064ba1 100644
--- a/r/tests/testthat/test-dplyr-lubridate.R
+++ b/r/tests/testthat/test-dplyr-lubridate.R
@@ -31,7 +31,6 @@ test_df <- tibble::tibble(date = test_date)
 
 # We can support this feature after ARROW-12980 is merged
 test_that("timezone aware timestamps are not supported", {
-
   tz_aware_date <- as.POSIXct("2017-01-01 00:00:12.3456789", tz = "Pacific/Marquesas")
   tz_aware_df <- tibble::tibble(date = tz_aware_date)
 
@@ -44,8 +43,7 @@ test_that("timezone aware timestamps are not supported", {
 })
 
 # We can support this feature when ARROW-13138 is resolved
-test_that("date32 objects are not supported",{
-
+test_that("date32 objects are not supported", {
   date <- ymd("2017-01-01")
   df <- tibble::tibble(date = date)
 
@@ -114,7 +112,7 @@ test_that("extract day from date", {
 })
 
 test_that("extract wday from date", {
- expect_dplyr_equal(
+  expect_dplyr_equal(
     input %>%
       mutate(x = wday(date)) %>%
       collect(),
@@ -142,7 +140,6 @@ test_that("extract wday from date", {
     nse_funcs$wday(x, label = TRUE),
     "Label argument not supported by Arrow"
   )
-
 })
 
 test_that("extract yday from date", {
@@ -164,7 +161,7 @@ test_that("extract hour from date", {
 })
 
 test_that("extract minute from date", {
-   expect_dplyr_equal(
+  expect_dplyr_equal(
     input %>%
       mutate(x = minute(date)) %>%
       collect(),
diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R
index 3e64891cec5..44127839108 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -25,7 +25,7 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10)+1, side = "both")
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
 
 test_that("mutate() is lazy", {
   expect_s3_class(
@@ -164,7 +164,10 @@ test_that("nchar() arguments", {
       filter(line_lengths > 15) %>%
       collect(),
     tbl,
-    warning = 'In nchar\\(verses, type = "bytes", allowNA = TRUE\\), allowNA = TRUE not supported by Arrow; pulling data into R'
+    warning = paste0(
+      "In nchar\\(verses, type = \"bytes\", allowNA = TRUE\\), ",
+      "allowNA = TRUE not supported by Arrow; pulling data into R"
+    )
   )
 })
 
@@ -187,8 +190,8 @@ test_that("mutate with unnamed expressions", {
     input %>%
       select(int, padded_strings) %>%
       mutate(
-        int,                   # bare column name
-        nchar(padded_strings)  # expression
+        int, # bare column name
+        nchar(padded_strings) # expression
       ) %>%
       filter(int > 5) %>%
       collect(),
@@ -249,7 +252,7 @@ test_that("dplyr::mutate's examples", {
   # Examples we don't support should succeed
   # but warn that they're pulling data into R to do so
 
-  # across + autosplicing: ARROW-11699
+  # across and autosplicing: ARROW-11699
   expect_dplyr_equal(
     input %>%
       select(name, homeworld, species) %>%
@@ -337,7 +340,7 @@ test_that("dplyr::mutate's examples", {
   # The mutate operation may yield different results on grouped
   # tibbles because the expressions are computed within groups.
   # The following normalises `mass` by the global average:
-  # TODO(ARROW-11702)
+  # TODO: ARROW-11702
   expect_dplyr_equal(
     input %>%
       select(name, mass, species) %>%
@@ -385,11 +388,11 @@ test_that("print a mutated table", {
       select(int) %>%
       mutate(twice = int * 2) %>%
       print(),
-'InMemoryDataset (query)
+    "InMemoryDataset (query)
 int: int32
 twice: double (multiply_checked(int, 2))
 
-See $.data for the source Arrow object',
+See $.data for the source Arrow object",
     fixed = TRUE
   )
 })
@@ -457,9 +460,9 @@ test_that("mutate and pmin/pmax", {
     input %>%
       mutate(
         max_val_1 = pmax(val1, val2, val3),
-        max_val_2 = pmax(val1, val2, val3, na.rm = T),
+        max_val_2 = pmax(val1, val2, val3, na.rm = TRUE),
         min_val_1 = pmin(val1, val2, val3),
-        min_val_2 = pmin(val1, val2, val3, na.rm = T)
+        min_val_2 = pmin(val1, val2, val3, na.rm = TRUE)
       ) %>%
       collect(),
     df
@@ -468,8 +471,8 @@ test_that("mutate and pmin/pmax", {
   expect_dplyr_equal(
     input %>%
       mutate(
-        max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = T),
-        min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = T),
+        max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = TRUE),
+        min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = TRUE),
       ) %>%
       collect(),
     df
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index 0a7ea8da89b..da21ccd9ed1 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -25,7 +25,7 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 # c(" a ", "  b  ", "   c   ", ...) increasing padding
 # nchar =   3  5  7  9 11 13 15 17 19 21
-tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2*(1:10) + 1, side = "both")
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
 
 test_that("basic select/filter/collect", {
   batch <- record_batch(tbl)
@@ -59,15 +59,14 @@ test_that("Print method", {
       filter(int < 5) %>%
       select(int, chr) %>%
       print(),
-'InMemoryDataset (query)
+    'InMemoryDataset (query)
 int: int32
 chr: string
 
 * Filter: (((dbl > 2) and ((chr == "d") or (chr == "f"))) and (int < 5))
 See $.data for the source Arrow object',
-  fixed = TRUE
+    fixed = TRUE
   )
-
 })
 
 test_that("summarize", {
@@ -213,7 +212,7 @@ test_that("collect(as_data_frame=FALSE)", {
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
-    )
+  )
 })
 
 test_that("compute()", {
@@ -283,7 +282,7 @@ test_that("head", {
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
-    )
+  )
 })
 
 test_that("tail", {
@@ -316,7 +315,7 @@ test_that("tail", {
     expected %>%
       rename(strng = chr) %>%
       group_by(int)
-    )
+  )
 })
 
 test_that("relocate", {
@@ -389,7 +388,7 @@ test_that("explicit type conversions with cast()", {
 
   for (type in types) {
     expect_type_equal(
-      {
+      object = {
         t1 <- Table$create(x = num_int32) %>%
           transmute(x = cast(x, type)) %>%
           compute()
@@ -398,7 +397,7 @@ test_that("explicit type conversions with cast()", {
       as_type(type)
     )
     expect_type_equal(
-      {
+      object = {
         t1 <- Table$create(x = num_int64) %>%
           transmute(x = cast(x, type)) %>%
           compute()
@@ -411,7 +410,7 @@ test_that("explicit type conversions with cast()", {
   # Arrow errors when truncating floats...
   expect_error(
     expect_type_equal(
-      {
+      object = {
         t1 <- Table$create(pi = pi) %>%
           transmute(three = cast(pi, int32())) %>%
           compute()
@@ -424,7 +423,7 @@ test_that("explicit type conversions with cast()", {
 
   # ... unless safe = FALSE (or allow_float_truncate = TRUE)
   expect_type_equal(
-    {
+    object = {
       t1 <- Table$create(pi = pi) %>%
         transmute(three = cast(pi, int32(), safe = FALSE)) %>%
         compute()
@@ -514,14 +513,17 @@ test_that("explicit type conversions with as.*()", {
 })
 
 test_that("is.finite(), is.infinite(), is.nan()", {
-  df <- tibble(x =c(-4.94065645841246544e-324, 1.79769313486231570e+308, 0,
-                    NA_real_, NaN, Inf, -Inf))
+  df <- tibble(x = c(
+    -4.94065645841246544e-324, 1.79769313486231570e+308, 0,
+    NA_real_, NaN, Inf, -Inf
+  ))
   expect_dplyr_equal(
     input %>%
       transmute(
         is_fin = is.finite(x),
         is_inf = is.infinite(x)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     df
   )
   # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
@@ -529,7 +531,8 @@ test_that("is.finite(), is.infinite(), is.nan()", {
     input %>%
       transmute(
         is_nan = is.nan(x)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     df
   )
 })
@@ -540,7 +543,8 @@ test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
     input %>%
       transmute(
         is_na = is.na(x)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     df
   )
 })
@@ -549,91 +553,101 @@ test_that("type checks with is() giving Arrow types", {
   # with class2=DataType
   expect_equal(
     Table$create(
-        i32 = Array$create(1, int32()),
-        dec = Array$create(pi)$cast(decimal(3, 2)),
-        f64 = Array$create(1.1, float64()),
-        str = Array$create("a", arrow::string())
-      ) %>% transmute(
-        i32_is_i32 = is(i32, int32()),
-        i32_is_dec = is(i32, decimal(3, 2)),
-        i32_is_i64 = is(i32, float64()),
-        i32_is_str = is(i32, arrow::string()),
-        dec_is_i32 = is(dec, int32()),
-        dec_is_dec = is(dec, decimal(3, 2)),
-        dec_is_i64 = is(dec, float64()),
-        dec_is_str = is(dec, arrow::string()),
-        f64_is_i32 = is(f64, int32()),
-        f64_is_dec = is(f64, decimal(3, 2)),
-        f64_is_i64 = is(f64, float64()),
-        f64_is_str = is(f64, arrow::string()),
-        str_is_i32 = is(str, int32()),
-        str_is_dec = is(str, decimal(3, 2)),
-        str_is_i64 = is(str, float64()),
-        str_is_str = is(str, arrow::string())
-      ) %>%
-      collect() %>% t() %>% as.vector(),
-    c(TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
-      FALSE, FALSE, FALSE, FALSE, TRUE)
+      i32 = Array$create(1, int32()),
+      dec = Array$create(pi)$cast(decimal(3, 2)),
+      f64 = Array$create(1.1, float64()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      i32_is_i32 = is(i32, int32()),
+      i32_is_dec = is(i32, decimal(3, 2)),
+      i32_is_i64 = is(i32, float64()),
+      i32_is_str = is(i32, arrow::string()),
+      dec_is_i32 = is(dec, int32()),
+      dec_is_dec = is(dec, decimal(3, 2)),
+      dec_is_i64 = is(dec, float64()),
+      dec_is_str = is(dec, arrow::string()),
+      f64_is_i32 = is(f64, int32()),
+      f64_is_dec = is(f64, decimal(3, 2)),
+      f64_is_i64 = is(f64, float64()),
+      f64_is_str = is(f64, arrow::string()),
+      str_is_i32 = is(str, int32()),
+      str_is_dec = is(str, decimal(3, 2)),
+      str_is_i64 = is(str, float64()),
+      str_is_str = is(str, arrow::string())
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(
+      TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
+      FALSE, FALSE, FALSE, FALSE, TRUE
+    )
   )
   # with class2=string
   expect_equal(
     Table$create(
-        i32 = Array$create(1, int32()),
-        f64 = Array$create(1.1, float64()),
-        str = Array$create("a", arrow::string())
-      ) %>% transmute(
-        i32_is_i32 = is(i32, "int32"),
-        i32_is_i64 = is(i32, "double"),
-        i32_is_str = is(i32, "string"),
-        f64_is_i32 = is(f64, "int32"),
-        f64_is_i64 = is(f64, "double"),
-        f64_is_str = is(f64, "string"),
-        str_is_i32 = is(str, "int32"),
-        str_is_i64 = is(str, "double"),
-        str_is_str = is(str, "string")
-      ) %>%
-      collect() %>% t() %>% as.vector(),
+      i32 = Array$create(1, int32()),
+      f64 = Array$create(1.1, float64()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      i32_is_i32 = is(i32, "int32"),
+      i32_is_i64 = is(i32, "double"),
+      i32_is_str = is(i32, "string"),
+      f64_is_i32 = is(f64, "int32"),
+      f64_is_i64 = is(f64, "double"),
+      f64_is_str = is(f64, "string"),
+      str_is_i32 = is(str, "int32"),
+      str_is_i64 = is(str, "double"),
+      str_is_str = is(str, "string")
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
     c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE)
   )
   # with class2=string alias
   expect_equal(
     Table$create(
-        f16 = Array$create(NA_real_, halffloat()),
-        f32 = Array$create(1.1, float()),
-        f64 = Array$create(2.2, float64()),
-        lgl = Array$create(TRUE, bool()),
-        str = Array$create("a", arrow::string())
-      ) %>% transmute(
-        f16_is_f16 = is(f16, "float16"),
-        f16_is_f32 = is(f16, "float32"),
-        f16_is_f64 = is(f16, "float64"),
-        f16_is_lgl = is(f16, "boolean"),
-        f16_is_str = is(f16, "utf8"),
-        f32_is_f16 = is(f32, "float16"),
-        f32_is_f32 = is(f32, "float32"),
-        f32_is_f64 = is(f32, "float64"),
-        f32_is_lgl = is(f32, "boolean"),
-        f32_is_str = is(f32, "utf8"),
-        f64_is_f16 = is(f64, "float16"),
-        f64_is_f32 = is(f64, "float32"),
-        f64_is_f64 = is(f64, "float64"),
-        f64_is_lgl = is(f64, "boolean"),
-        f64_is_str = is(f64, "utf8"),
-        lgl_is_f16 = is(lgl, "float16"),
-        lgl_is_f32 = is(lgl, "float32"),
-        lgl_is_f64 = is(lgl, "float64"),
-        lgl_is_lgl = is(lgl, "boolean"),
-        lgl_is_str = is(lgl, "utf8"),
-        str_is_f16 = is(str, "float16"),
-        str_is_f32 = is(str, "float32"),
-        str_is_f64 = is(str, "float64"),
-        str_is_lgl = is(str, "boolean"),
-        str_is_str = is(str, "utf8")
-      ) %>%
-      collect() %>% t() %>% as.vector(),
-    c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
+      f16 = Array$create(NA_real_, halffloat()),
+      f32 = Array$create(1.1, float()),
+      f64 = Array$create(2.2, float64()),
+      lgl = Array$create(TRUE, bool()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      f16_is_f16 = is(f16, "float16"),
+      f16_is_f32 = is(f16, "float32"),
+      f16_is_f64 = is(f16, "float64"),
+      f16_is_lgl = is(f16, "boolean"),
+      f16_is_str = is(f16, "utf8"),
+      f32_is_f16 = is(f32, "float16"),
+      f32_is_f32 = is(f32, "float32"),
+      f32_is_f64 = is(f32, "float64"),
+      f32_is_lgl = is(f32, "boolean"),
+      f32_is_str = is(f32, "utf8"),
+      f64_is_f16 = is(f64, "float16"),
+      f64_is_f32 = is(f64, "float32"),
+      f64_is_f64 = is(f64, "float64"),
+      f64_is_lgl = is(f64, "boolean"),
+      f64_is_str = is(f64, "utf8"),
+      lgl_is_f16 = is(lgl, "float16"),
+      lgl_is_f32 = is(lgl, "float32"),
+      lgl_is_f64 = is(lgl, "float64"),
+      lgl_is_lgl = is(lgl, "boolean"),
+      lgl_is_str = is(lgl, "utf8"),
+      str_is_f16 = is(str, "float16"),
+      str_is_f32 = is(str, "float32"),
+      str_is_f64 = is(str, "float64"),
+      str_is_lgl = is(str, "boolean"),
+      str_is_str = is(str, "utf8")
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(
+      TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
       FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
-      FALSE, FALSE, TRUE)
+      FALSE, FALSE, TRUE
+    )
   )
 })
 
@@ -687,12 +701,12 @@ test_that("type checks with is() giving R types", {
         i64_is_chr = is(i64, "character"),
         i64_is_fct = is(i64, "factor"),
         # we want Arrow to return TRUE, but bit64 returns FALSE
-        #i64_is_int = is(i64, "integer"),
+        # i64_is_int = is(i64, "integer"),
         i64_is_i64 = is(i64, "integer64"),
         i64_is_lst = is(i64, "list"),
         i64_is_lgl = is(i64, "logical"),
         # we want Arrow to return TRUE, but bit64 returns FALSE
-        #i64_is_num = is(i64, "numeric"),
+        # i64_is_num = is(i64, "numeric"),
         lst_is_chr = is(lst, "character"),
         lst_is_fct = is(lst, "factor"),
         lst_is_int = is(lst, "integer"),
@@ -763,10 +777,10 @@ test_that("type checks with is.*()", {
       transmute(
         i64_is_chr = is.character(i64),
         # TODO: investigate why this is not matching when testthat runs it
-        #i64_is_dbl = is.double(i64),
+        # i64_is_dbl = is.double(i64),
         i64_is_fct = is.factor(i64),
         # we want Arrow to return TRUE, but bit64 returns FALSE
-        #i64_is_int = is.integer(i64),
+        # i64_is_int = is.integer(i64),
         i64_is_i64 = is.integer64(i64),
         i64_is_lst = is.list(i64),
         i64_is_lgl = is.logical(i64),
@@ -872,7 +886,7 @@ test_that("type checks on R scalar literals", {
 })
 
 test_that("as.factor()/dictionary_encode()", {
-  skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}")
+  skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression")
   df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
   df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))
 
@@ -896,7 +910,7 @@ test_that("as.factor()/dictionary_encode()", {
   # dictionary values with default null encoding behavior ("mask") omits
   # nulls from the dictionary values
   expect_equal(
-    {
+    object = {
       rb1 <- df1 %>%
         record_batch() %>%
         transmute(x = dictionary_encode(x)) %>%
@@ -910,7 +924,7 @@ test_that("as.factor()/dictionary_encode()", {
   # dictionary values with "encode" null encoding behavior includes nulls in
   # the dictionary values
   expect_equal(
-    {
+    object = {
       rb1 <- df1 %>%
         record_batch() %>%
         transmute(x = dictionary_encode(x, null_encoding_behavior = "encode")) %>%
@@ -920,7 +934,6 @@ test_that("as.factor()/dictionary_encode()", {
     },
     sort(unique(df1$x), na.last = TRUE)
   )
-
 })
 
 test_that("bad explicit type conversions with as.*()", {
@@ -931,8 +944,7 @@ test_that("bad explicit type conversions with as.*()", {
       input %>%
         transmute(lgl2chr = as.character(lgl)) %>%
         collect(),
-      tibble(lgl = c(TRUE, FALSE, NA)
-      )
+      tibble(lgl = c(TRUE, FALSE, NA))
     )
   )
 
@@ -959,19 +971,21 @@ test_that("bad explicit type conversions with as.*()", {
       tibble(chr = c("TRU", "FAX", ""))
     )
   )
-
 })
 
 test_that("No duplicate field names are allowed in an arrow_dplyr_query", {
   expect_error(
     Table$create(tbl, tbl) %>%
       filter(int > 0),
-    regexp = 'The following field names were found more than once in the data: "int", "dbl", "dbl2", "lgl", "false", "chr", "fct", "verses", and "padded_strings"'
+    regexp = paste0(
+      'The following field names were found more than once in the data: "int", "dbl", ',
+      '"dbl2", "lgl", "false", "chr", "fct", "verses", and "padded_strings"'
+    )
   )
 })
 
 test_that("abs()", {
-  df <- tibble(x = c(-127, -10, -1, -0 , 0, 1, 10, 127, NA))
+  df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
 
   expect_dplyr_equal(
     input %>%
@@ -982,7 +996,7 @@ test_that("abs()", {
 })
 
 test_that("sign()", {
-  df <- tibble(x = c(-127, -10, -1, -0 , 0, 1, 10, 127, NA))
+  df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
 
   expect_dplyr_equal(
     input %>%
@@ -1008,7 +1022,6 @@ test_that("ceiling(), floor(), trunc()", {
 })
 
 test_that("log functions", {
-
   df <- tibble(x = c(1:10, NA, NA))
 
   expect_dplyr_equal(
@@ -1072,11 +1085,9 @@ test_that("log functions", {
       collect(),
     df
   )
-
 })
 
 test_that("trig functions", {
-
   df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
 
   expect_dplyr_equal(
@@ -1113,7 +1124,6 @@ test_that("trig functions", {
       collect(),
     df
   )
-
 })
 
 test_that("if_else and ifelse", {
@@ -1124,7 +1134,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(int > 5, 1, 0)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1132,7 +1143,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(int > 5, int, 0L)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1140,15 +1152,17 @@ test_that("if_else and ifelse", {
     Table$create(tbl) %>%
       mutate(
         y = if_else(int > 5, 1, FALSE)
-      ) %>% collect(),
-    'NotImplemented: Function if_else has no kernel matching input types'
+      ) %>%
+      collect(),
+    "NotImplemented: Function if_else has no kernel matching input types"
   )
 
   expect_dplyr_equal(
     input %>%
       mutate(
         y = if_else(int > 5, 1, NA_real_)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1156,7 +1170,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = ifelse(int > 5, 1, 0)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1164,7 +1179,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(dbl > 5, TRUE, FALSE)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1172,7 +1188,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(chr %in% letters[1:3], 1L, 3L)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1180,7 +1197,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(int > 5, "one", "zero")
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1188,7 +1206,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(int > 5, chr, another_chr)
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1196,7 +1215,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(int > 5, "true", chr, missing = "MISSING")
-      ) %>% collect(),
+      ) %>%
+      collect(),
     tbl
   )
 
@@ -1206,7 +1226,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(int > 5, fct, factor("a"))
-      ) %>% collect() %>%
+      ) %>%
+      collect() %>%
       # This is a no-op on the Arrow side, but necessary to make the results equal
       mutate(y = as.character(y)),
     tbl,
@@ -1218,7 +1239,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(is.na(dbl), chr, "false", missing = "MISSING")
-      ) %>% collect(),
+      ) %>%
+      collect(),
     example_data_for_sorting
   )
 
@@ -1228,7 +1250,8 @@ test_that("if_else and ifelse", {
     input %>%
       mutate(
         y = if_else(dbl > 5, chr, another_chr, missing = "MISSING")
-      ) %>% collect(),
+      ) %>%
+      collect(),
     example_data_for_sorting
   )
 
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index e8ec079b6e0..ff468a799ec 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -143,7 +143,7 @@ test_that("to_duckdb with a table", {
         int_mean = mean(int, na.rm = TRUE),
         dbl_mean = mean(dbl, na.rm = TRUE)
       ) %>%
-    collect(),
+      collect(),
     tibble::tibble(
       "int > 4" = c(FALSE, NA, TRUE),
       int_mean = c(2, NA, 7.5),
diff --git a/r/tests/testthat/test-expression.R b/r/tests/testthat/test-expression.R
index d8c26db0143..034c4049a34 100644
--- a/r/tests/testthat/test-expression.R
+++ b/r/tests/testthat/test-expression.R
@@ -35,7 +35,7 @@ test_that("C++ expressions", {
   expect_r6_class(f == i64, "Expression")
   expect_r6_class(f == time, "Expression")
   # can't seem to make this work right now because of R Ops.method dispatch
-  # expect_r6_class(f == as.Date("2020-01-15"), "Expression")
+  # expect_r6_class(f == as.Date("2020-01-15"), "Expression") # nolint
   expect_r6_class(f == ts, "Expression")
   expect_r6_class(f <= 2L, "Expression")
   expect_r6_class(f != FALSE, "Expression")
@@ -45,7 +45,7 @@ test_that("C++ expressions", {
   expect_r6_class(!(f < 4), "Expression")
   expect_output(
     print(f > 4),
-    'Expression\n(f > 4)',
+    "Expression\n(f > 4)",
     fixed = TRUE
   )
   expect_type_equal(
@@ -58,12 +58,11 @@ test_that("C++ expressions", {
   )
   # Interprets that as a list type
   expect_r6_class(f == c(1L, 2L), "Expression")
-  
+
   expect_error(
     Expression$create("add", 1, 2),
     "Expression arguments must be Expression objects"
   )
-  
 })
 
 test_that("Field reference expression schemas and types", {
diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R
index 5a537c7a9bc..80757b04f05 100644
--- a/r/tests/testthat/test-feather.R
+++ b/r/tests/testthat/test-feather.R
@@ -232,7 +232,13 @@ unlink(feather_file)
 ft_file <- test_path("golden-files/data-arrow_2.0.0_lz4.feather")
 
 test_that("Error messages are shown when the compression algorithm lz4 is not found", {
-  msg <- "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  msg <- paste0(
+    "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, ",
+    "you will need to reinstall arrow with additional features enabled.\nSet one of ",
+    "these environment variables before installing:\n\n * LIBARROW_MINIMAL=false ",
+    "(for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')",
+    "\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  )
 
   if (codec_is_available("lz4")) {
     d <- read_feather(ft_file)
diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R
index df084f35a49..38b6f61269e 100644
--- a/r/tests/testthat/test-filesystem.R
+++ b/r/tests/testthat/test-filesystem.R
@@ -90,7 +90,7 @@ test_that("SubTreeFilesystem", {
   )
 
   # FIXME windows has a trailing slash for one but not the other
-  # expect_identical(normalizePath(st_fs$base_path), normalizePath(td))
+  # expect_identical(normalizePath(st_fs$base_path), normalizePath(td)) # nolint
 
   st_fs$CreateDir("test")
   st_fs$CopyFile("DESCRIPTION", "DESC.txt")
diff --git a/r/tests/testthat/test-install-arrow.R b/r/tests/testthat/test-install-arrow.R
index d9d1cc74b02..c53ee829829 100644
--- a/r/tests/testthat/test-install-arrow.R
+++ b/r/tests/testthat/test-install-arrow.R
@@ -24,7 +24,7 @@ r_only({
     other <- "https://cran.fiocruz.br/"
 
     opts <- list(
-      repos=c(CRAN = "@CRAN@"),  # Restore defaul
+      repos = c(CRAN = "@CRAN@"), # Restore defaul
       arrow.dev_repo = ours
     )
     withr::with_options(opts, {
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index ad5ff8a1260..668b040d868 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -25,7 +25,7 @@ test_that("Can read json file with scalars columns (ARROW-5503)", {
     { "hello": 3.25, "world": null }
     { "hello": 3.125, "world": null, "yo": "\u5fcd" }
     { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
+  ', tf, useBytes = TRUE)
 
   tab1 <- read_json_arrow(tf, as_data_frame = FALSE)
   tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE)
@@ -52,7 +52,7 @@ test_that("read_json_arrow() converts to tibble", {
     { "hello": 3.25, "world": null }
     { "hello": 3.125, "world": null, "yo": "\u5fcd" }
     { "hello": 0.0, "world": true, "yo": null }
-  ', tf, useBytes=TRUE)
+  ', tf, useBytes = TRUE)
 
   tab1 <- read_json_arrow(tf)
   tab2 <- read_json_arrow(mmap_open(tf))
@@ -94,16 +94,16 @@ test_that("read_json_arrow(schema=) with empty schema", {
     { "hello": 3.125, "world": 8, "third_col": 97 }
     { "hello": 0.0, "world": 10, "third_col": 96}
   ', tf)
-  
+
   tab1 <- read_json_arrow(tf, schema = schema())
-  
+
   expect_identical(
-    tab1, 
+    tab1,
     tibble::tibble(
       hello = c(3.5, 3.25, 3.125, 0),
       world = c(2L, 5L, 8L, 10L),
-      third_col = c(99L,98L,97L,96L)
-    )               
+      third_col = c(99L, 98L, 97L, 96L)
+    )
   )
 })
 
@@ -115,34 +115,34 @@ test_that("read_json_arrow(schema=) with partial schema", {
     { "hello": 3.125, "world": 8, "third_col": 97 }
     { "hello": 0.0, "world": 10, "third_col": 96}
   ', tf)
-  
+
   tab1 <- read_json_arrow(tf, schema = schema(third_col = float64(), world = float64()))
-  
+
   expect_identical(
-    tab1, 
+    tab1,
     tibble::tibble(
-      third_col = c(99,98,97,96),
+      third_col = c(99, 98, 97, 96),
       world = c(2, 5, 8, 10),
       hello = c(3.5, 3.25, 3.125, 0)
-    )               
+    )
   )
-  
+
   tf2 <- tempfile()
   writeLines('
     { "hello": 3.5, "world": 2, "third_col": "99"}
     { "hello": 3.25, "world": 5, "third_col": "98"}
     { "hello": 3.125, "world": 8, "third_col": "97"}
   ', tf2)
-  
+
   tab2 <- read_json_arrow(tf2, schema = schema(third_col = string(), world = float64()))
-  
+
   expect_identical(
-    tab2, 
+    tab2,
     tibble::tibble(
-      third_col = c("99","98","97"),
+      third_col = c("99", "98", "97"),
       world = c(2, 5, 8),
       hello = c(3.5, 3.25, 3.125)
-    )               
+    )
   )
 })
 
@@ -154,7 +154,7 @@ test_that("read_json_arrow(schema=) with full schema", {
     { "hello": 3.125, "world": 8, "third_col": 97}
     { "hello": 0.0, "world": 10, "third_col": 96}
   ', tf)
-  
+
   tab1 <- read_json_arrow(
     tf,
     schema = schema(
@@ -163,14 +163,14 @@ test_that("read_json_arrow(schema=) with full schema", {
       world = float64()
     )
   )
-  
+
   expect_identical(
-    tab1, 
+    tab1,
     tibble::tibble(
       hello = c(3.5, 3.25, 3.125, 0),
-      third_col = c(99,98,97,96),
+      third_col = c(99, 98, 97, 96),
       world = c(2, 5, 8, 10)
-    )               
+    )
   )
 })
 
diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R
index b9fb3a162a7..3fbb038272c 100644
--- a/r/tests/testthat/test-message.R
+++ b/r/tests/testthat/test-message.R
@@ -33,7 +33,7 @@ test_that("read_message can read from input stream", {
 })
 
 test_that("read_message() can read Schema messages", {
-  bytes <- schema(x=int32())$serialize()
+  bytes <- schema(x = int32())$serialize()
   stream <- BufferReader$create(bytes)
   message <- read_message(stream)
 
@@ -55,7 +55,7 @@ test_that("read_message() can handle raw vectors", {
   message_raw <- read_message(bytes)
   expect_equal(message_stream, message_raw)
 
-  bytes <- schema(x=int32())$serialize()
+  bytes <- schema(x = int32())$serialize()
   stream <- BufferReader$create(bytes)
   message_stream <- read_message(stream)
   message_raw <- read_message(bytes)
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index de3542b1c60..bc6d285b333 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -113,7 +113,8 @@ test_that("Metadata serialization compression", {
   )
 
   # But we can disable compression
-  op <- options(arrow.compress_metadata = FALSE); on.exit(options(op))
+  op <- options(arrow.compress_metadata = FALSE)
+  on.exit(options(op))
 
   large_strings <- as.list(rep(make_string_of_size(1), 100))
   large <- .serialize_arrow_r_metadata(large_strings)
@@ -218,7 +219,8 @@ test_that("metadata of list elements (ARROW-10386)", {
       structure(1, my_value_as_attr = 1),
       structure(2, my_value_as_attr = 2),
       structure(3, my_value_as_attr = 3),
-      structure(4, my_value_as_attr = 3)),
+      structure(4, my_value_as_attr = 3)
+    ),
     int = 1L:4L,
     part = c(1, 3, 2, 1)
   )
diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R
index 834ccb013ec..3cd56cca64f 100644
--- a/r/tests/testthat/test-na-omit.R
+++ b/r/tests/testthat/test-na-omit.R
@@ -27,16 +27,16 @@ test_that("na.fail on Scalar", {
 
 test_that("na.omit on Array and ChunkedArray", {
   expect_vector_equal(na.omit(input), data_no_na)
-  expect_vector_equal(na.omit(input), data_na, ignore_attr=TRUE)
+  expect_vector_equal(na.omit(input), data_na, ignore_attr = TRUE)
 })
 
 test_that("na.exclude on Array and ChunkedArray", {
   expect_vector_equal(na.exclude(input), data_no_na)
-  expect_vector_equal(na.exclude(input), data_na, ignore_attr=TRUE)
+  expect_vector_equal(na.exclude(input), data_na, ignore_attr = TRUE)
 })
 
 test_that("na.fail on Array and ChunkedArray", {
-  expect_vector_equal(na.fail(input), data_no_na, ignore_attr=TRUE)
+  expect_vector_equal(na.fail(input), data_no_na, ignore_attr = TRUE)
   expect_vector_error(na.fail(input), data_na)
 })
 
@@ -75,4 +75,3 @@ test_that("na.fail on RecordBatch", {
   batch <- record_batch(example_data)
   expect_error(na.fail(batch), "missing values in object")
 })
-
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index 2e282a4b5fc..41dcfe38c94 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -209,7 +209,7 @@ test_that("write_parquet() handles version argument", {
 })
 
 test_that("ParquetFileWriter raises an error for non-OutputStream sink", {
-  sch = schema(a = float32())
+  sch <- schema(a = float32())
   # ARROW-9946
   expect_error(
     ParquetFileWriter$create(schema = sch, sink = tempfile()),
@@ -219,7 +219,8 @@ test_that("ParquetFileWriter raises an error for non-OutputStream sink", {
 
 test_that("ParquetFileReader $ReadRowGroup(s) methods", {
   tab <- Table$create(x = 1:100)
-  tf <- tempfile(); on.exit(unlink(tf))
+  tf <- tempfile()
+  on.exit(unlink(tf))
   write_parquet(tab, tf, chunk_size = 10)
 
   reader <- ParquetFileReader$create(tf)
@@ -244,7 +245,13 @@ test_that("ParquetFileReader $ReadRowGroup(s) methods", {
 })
 
 test_that("Error messages are shown when the compression algorithm snappy is not found", {
-  msg <- "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, you will need to reinstall arrow with additional features enabled.\nSet one of these environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  msg <- paste0(
+    "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, ",
+    "you will need to reinstall arrow with additional features enabled.\nSet one of these ",
+    "environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional ",
+    "features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\n",
+    "See https://arrow.apache.org/docs/r/articles/install.html for details"
+  )
 
   if (codec_is_available("snappy")) {
     d <- read_parquet(pq_file)
diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R
index dbd2ba9a8b2..c87f3a562ac 100644
--- a/r/tests/testthat/test-python-flight.R
+++ b/r/tests/testthat/test-python-flight.R
@@ -53,7 +53,6 @@ if (process_is_running("demo_flight_server")) {
     flight_put(client, example_with_times, path = flight_obj)
     expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times)
   })
-
 } else {
   # Kinda hacky, let's put a skipped test here, just so we note that the tests
   # didn't run
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index ea3aa34a424..3b4205443cd 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -38,7 +38,7 @@ test_that("table round trip", {
   chunks_int <- chunked_array_int$chunks
   expect_equal(length(chunks_int), chunked_array_int$num_chunks)
   for (i in seq_along(chunks_int)) {
-    expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]])
+    expect_equal(chunked_array_int$chunk(i - 1L), chunks_int[[i]])
   }
 
   # ChunkedArray
@@ -51,7 +51,7 @@ test_that("table round trip", {
   chunks_dbl <- chunked_array_dbl$chunks
   expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks)
   for (i in seq_along(chunks_dbl)) {
-    expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]])
+    expect_equal(chunked_array_dbl$chunk(i - 1L), chunks_dbl[[i]])
   }
 
   # ChunkedArray
@@ -64,7 +64,7 @@ test_that("table round trip", {
   chunks_raw <- chunked_array_raw$chunks
   expect_equal(length(chunks_raw), chunked_array_raw$num_chunks)
   for (i in seq_along(chunks_raw)) {
-    expect_equal(chunked_array_raw$chunk(i-1L), chunks_raw[[i]])
+    expect_equal(chunked_array_raw$chunk(i - 1L), chunks_raw[[i]])
   }
   tf <- tempfile()
   write_feather(tbl, tf)
diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R
index 8cb0dafdfe4..94451e5351a 100644
--- a/r/tests/testthat/test-s3-minio.R
+++ b/r/tests/testthat/test-s3-minio.R
@@ -75,7 +75,6 @@ if (arrow_with_s3() && process_is_running("minio server")) {
   })
 
   if (arrow_with_dataset()) {
-
     library(dplyr)
 
     make_temp_dir <- function() {
@@ -183,7 +182,6 @@ if (arrow_with_s3() && process_is_running("minio server")) {
         rbind(df1[, c("dbl", "lgl")], df2[, c("dbl", "lgl")])
       )
     })
-
   }
 
   test_that("S3FileSystem input validation", {
diff --git a/r/tests/testthat/test-s3.R b/r/tests/testthat/test-s3.R
index 938e0c6fdb2..995730a7977 100644
--- a/r/tests/testthat/test-s3.R
+++ b/r/tests/testthat/test-s3.R
@@ -17,19 +17,22 @@
 
 context("S3 integration tests")
 
-run_these <- tryCatch({
-  if (arrow_with_s3() &&
+run_these <- tryCatch(
+  expr = {
+    if (arrow_with_s3() &&
       identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") &&
       !identical(Sys.getenv("AWS_ACCESS_KEY_ID"), "") &&
       !identical(Sys.getenv("AWS_SECRET_ACCESS_KEY"), "")) {
-    # See if we have access to the test bucket
-    bucket <- s3_bucket("ursa-labs-r-test")
-    bucket$GetFileInfo("")
-    TRUE
-  } else {
-    FALSE
-  }
-}, error = function(e) FALSE)
+      # See if we have access to the test bucket
+      bucket <- s3_bucket("ursa-labs-r-test")
+      bucket$GetFileInfo("")
+      TRUE
+    } else {
+      FALSE
+    }
+  },
+  error = function(e) FALSE
+)
 
 bucket_uri <- function(..., bucket = "s3://ursa-labs-r-test/%s?region=us-west-2") {
   segments <- paste(..., sep = "/")
diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R
index d0b13423463..566228cbcaa 100644
--- a/r/tests/testthat/test-scalar.R
+++ b/r/tests/testthat/test-scalar.R
@@ -24,7 +24,7 @@ expect_scalar_roundtrip <- function(x, type) {
   expect_identical(length(s), 1L)
   if (inherits(type, "NestedType")) {
     # Should this be? Missing if all elements are missing?
-    # expect_identical(is.na(s), all(is.na(x)))
+    # expect_identical(is.na(s), all(is.na(x))) # nolint
   } else {
     expect_identical(as.vector(is.na(s)), is.na(x))
     # MakeArrayFromScalar not implemented for list types
@@ -37,7 +37,7 @@ test_that("Scalar object roundtrip", {
   expect_scalar_roundtrip(2L, int32())
   expect_scalar_roundtrip(c(2, 4), list_of(float64()))
   expect_scalar_roundtrip(c(NA, NA), list_of(bool()))
-  expect_scalar_roundtrip(data.frame(a=2, b=4L), struct(a = double(), b = int32()))
+  expect_scalar_roundtrip(data.frame(a = 2, b = 4L), struct(a = double(), b = int32()))
 })
 
 test_that("Scalar print", {
@@ -87,7 +87,10 @@ test_that("Handling string data with embedded nuls", {
   scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8())
   expect_error(
     as.vector(scalar_with_nul),
-    "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)",
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
     fixed = TRUE
   )
 
@@ -101,4 +104,4 @@ test_that("Handling string data with embedded nuls", {
       fixed = TRUE
     )
   })
-})
\ No newline at end of file
+})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 0de6ccae7a6..933ba4785a8 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -42,7 +42,7 @@ test_that("Schema $GetFieldByName", {
   schm <- schema(b = double(), c = string())
   expect_equal(schm$GetFieldByName("b"), field("b", double()))
   expect_null(schm$GetFieldByName("f"))
-  # TODO: schema(b = double(), b = string())$GetFieldByName("b")
+  # TODO: schema(b = double(), b = string())$GetFieldByName("b") # nolint
   # also returns NULL and probably should error bc duplicated names
 })
 
@@ -153,7 +153,7 @@ test_that("Input validation when creating a table with a schema", {
 
 test_that("Schema$Equals", {
   a <- schema(b = double(), c = bool())
-  b <- a$WithMetadata(list(some="metadata"))
+  b <- a$WithMetadata(list(some = "metadata"))
 
   # different metadata
   expect_failure(expect_equal(a, b))
diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R
index 1f13466c9dd..d17b811974d 100644
--- a/r/tests/testthat/test-type.R
+++ b/r/tests/testthat/test-type.R
@@ -104,7 +104,6 @@ test_that("Masked data type functions still work", {
     arrow::string()
   )
   rm(type)
-
 })
 
 test_that("Type strings are correctly canonicalized", {
@@ -160,7 +159,7 @@ test_that("Type strings are correctly canonicalized", {
   )
   expect_equal(
     canonical_type_str("decimal"),
-    sub("^([^([<]+).*$", "\\1", decimal(3,2)$ToString())
+    sub("^([^([<]+).*$", "\\1", decimal(3, 2)$ToString())
   )
   expect_equal(
     canonical_type_str("struct"),
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index d6e31392056..ddb1ace6ccc 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -420,6 +420,19 @@ There are a number of scripts that are triggered when `R CMD INSTALL .`. For Arr
   * `*** Proceed without C++` dependencies (this is an error and the package will not work, but if you see this message you know the previous steps have not succeeded/were not enabled)
 * `inst/build_arrow_static.sh` this script builds Arrow for a bundled, static build. It is called by `tools/nixlibs.R` when the Arrow library is being built. (If you're looking at this script, and you've gotten this far, it should look _incredibly_ familiar: it's basically the contents of this guide in script form — with a few important changes)
 
+## Styling and linting of the R code in the R package
+
+The R code in the package follows [the tidyverse style](https://style.tidyverse.org/). On PR submission (and on pushes) our CI will run linting and will flag possible errors on the pull request with annotations.
+
+To run the [lintr](https://github.com/jimhester/lintr) locally, install the lintr package (note, we currently use a fork that includes fixes not yet accepted upstream, see how lintr is being installed in the file `ci/docker/linux-apt-lint.dockerfile` for the current status) and then run `lintr::lint_package("arrow/r")`.
+
+One can automatically change the formatting of the code in the package using the [styler](https://styler.r-lib.org/) package. There are two ways to do this:
+
+1. Use the comment bot to do this automatically with the command `@github-actions autotune` on a PR and commit it back to the branch.
+2. Locally, with the command `styler::style_pkg(exclude_files = c("tests/testthat/latin1.R", "data-raw/codegen.R"))` note the two excluded files which should not be styled.
+
+The styler package will fix many styling errors, thought not all lintr errors are automatically fixable with styler.
+
 ## Editing C++ code in the R package
 
 The `arrow` package uses some customized tools on top of `cpp11` to prepare its

From 6d4ed4fcaee693a1b3c23ed6d1565bf019230c89 Mon Sep 17 00:00:00 2001
From: Fernando Rodriguez <diegodfrf@gmail.com>
Date: Tue, 3 Aug 2021 15:15:12 -0400
Subject: [PATCH 693/719] ARROW-13469: [C++] Suppress
 -Wmissing-field-initializers in DayMilliseconds arrow/type.h

Set initial values to DayMilliseconds with 0

Closes #10842 from diegodfrf/ARROW-13469-Suppress-Wmissing-field-initializers-in-

Lead-authored-by: Fernando Rodriguez <diegodfrf@gmail.com>
Co-authored-by: Fernando Rodriguez <fernando@blazingdb.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/type.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index b933da66089..005b4458b91 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -1295,8 +1295,11 @@ class ARROW_EXPORT MonthIntervalType : public IntervalType {
 class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
  public:
   struct DayMilliseconds {
-    int32_t days;
-    int32_t milliseconds;
+    int32_t days = 0;
+    int32_t milliseconds = 0;
+    DayMilliseconds() = default;
+    DayMilliseconds(int32_t days, int32_t milliseconds)
+        : days(days), milliseconds(milliseconds) {}
     bool operator==(DayMilliseconds other) const {
       return this->days == other.days && this->milliseconds == other.milliseconds;
     }

From 0c7e8b05f3b9d318500ec6d0aaf063bfb3e8e471 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 4 Aug 2021 11:57:31 +0900
Subject: [PATCH 694/719] ARROW-13485: [Release] Replace
 ${PREVIOUS_RELEASE}.9000 in r/NEWS.md by post-12-bump-versions.sh

We need this because we changed to use release branch style.

Closes #10828 from kou/release-update-version-r

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/01-prepare-test.rb | 91 ++++++++++++++++------------------
 dev/release/utils-prepare.sh   | 11 ++--
 2 files changed, 48 insertions(+), 54 deletions(-)

diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index 1006ce17fb9..098e7f47d69 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -341,201 +341,198 @@ def test_version_pre_tag
   end
 
   def test_version_post_tag
-    if on_release_branch?
-      bump_versions("VERSION_POST_TAG")
-    else
-      prepare("VERSION_PRE_TAG")
-      bump_versions("VERSION_POST_TAG")
-    end
+    omit_on_release_branch
+    bump_versions("VERSION_POST_TAG")
     assert_equal([
                    {
                      path: "c_glib/meson.build",
                      hunks: [
-                       ["-version = '#{@release_version}'",
+                       ["-version = '#{@snapshot_version}'",
                         "+version = '#{@next_snapshot_version}'"],
                      ],
                    },
                    {
                      path: "ci/scripts/PKGBUILD",
                      hunks: [
-                       ["-pkgver=#{@release_version}",
+                       ["-pkgver=#{@previous_version}.9000",
                         "+pkgver=#{@release_version}.9000"],
                      ],
                    },
                    {
                      path: "cpp/CMakeLists.txt",
                      hunks: [
-                       ["-set(ARROW_VERSION \"#{@release_version}\")",
+                       ["-set(ARROW_VERSION \"#{@snapshot_version}\")",
                         "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"],
                      ],
                    },
                    {
                      path: "cpp/vcpkg.json",
                      hunks: [
-                       ["-  \"version-string\": \"#{@release_version}\",",
+                       ["-  \"version-string\": \"#{@snapshot_version}\",",
                         "+  \"version-string\": \"#{@next_snapshot_version}\","],
                      ],
                    },
                    {
                      path: "csharp/Directory.Build.props",
                      hunks: [
-                       ["-    <Version>#{@release_version}</Version>",
+                       ["-    <Version>#{@snapshot_version}</Version>",
                         "+    <Version>#{@next_snapshot_version}</Version>"],
                      ],
                    },
                    {
                      path: "dev/tasks/homebrew-formulae/apache-arrow.rb",
                      hunks: [
-                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\"",
+                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"",
                         "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""],
                      ],
                    },
                    {
                      path: "dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb",
                      hunks: [
-                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}/apache-arrow-#{@release_version}.tar.gz\"",
+                       ["-  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@previous_version}.9000/apache-arrow-#{@previous_version}.9000.tar.gz\"",
                         "+  url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""],
                      ],
                    },
                    { path: "java/adapter/avro/pom.xml",
-                     hunks: [["-    <version>#{@release_version}</version>",
+                     hunks: [["-    <version>#{@snapshot_version}</version>",
                              "+    <version>#{@next_snapshot_version}</version>"]] },
-                   { hunks: [["-        <version>#{@release_version}</version>",
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
                               "+        <version>#{@next_snapshot_version}</version>"]],
                      path: "java/adapter/jdbc/pom.xml" },
-                   { hunks: [["-        <version>#{@release_version}</version>",
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
                               "+        <version>#{@next_snapshot_version}</version>"]],
                      path: "java/adapter/orc/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/algorithm/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/compression/pom.xml" },
-                   { hunks: [["-        <version>#{@release_version}</version>",
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
                               "+        <version>#{@next_snapshot_version}</version>"]],
                      path: "java/dataset/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/flight/flight-core/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/flight/flight-grpc/pom.xml" },
-                   { hunks: [["-  <version>#{@release_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
                      path: "java/format/pom.xml" },
-                   { hunks: [["-      <version>#{@release_version}</version>",
+                   { hunks: [["-      <version>#{@snapshot_version}</version>",
                               "+      <version>#{@next_snapshot_version}</version>"]],
                      path: "java/gandiva/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/memory/memory-core/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/memory/memory-netty/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/memory/memory-unsafe/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/memory/pom.xml" },
-                   { hunks: [["-        <version>#{@release_version}</version>",
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
                               "+        <version>#{@next_snapshot_version}</version>"],
-                             ["-            <version>#{@release_version}</version>",
+                             ["-            <version>#{@snapshot_version}</version>",
                               "+            <version>#{@next_snapshot_version}</version>"]],
                      path: "java/performance/pom.xml" },
-                   { hunks: [["-        <version>#{@release_version}</version>",
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
                               "+        <version>#{@next_snapshot_version}</version>"]],
                      path: "java/plasma/pom.xml" },
-                   { hunks: [["-  <version>#{@release_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
+                   { hunks: [["-  <version>#{@snapshot_version}</version>", "+  <version>#{@next_snapshot_version}</version>"]],
                      path: "java/pom.xml" },
-                   { hunks: [["-        <version>#{@release_version}</version>",
+                   { hunks: [["-        <version>#{@snapshot_version}</version>",
                               "+        <version>#{@next_snapshot_version}</version>"]],
                      path: "java/tools/pom.xml" },
-                   { hunks: [["-    <version>#{@release_version}</version>",
+                   { hunks: [["-    <version>#{@snapshot_version}</version>",
                               "+    <version>#{@next_snapshot_version}</version>"]],
                      path: "java/vector/pom.xml" },
                    {
                      path: "js/package.json",
                      hunks: [
-                       ["-  \"version\": \"#{@release_version}\"",
+                       ["-  \"version\": \"#{@snapshot_version}\"",
                         "+  \"version\": \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "matlab/CMakeLists.txt",
                      hunks: [
-                       ["-set(MLARROW_VERSION \"#{@release_version}\")",
+                       ["-set(MLARROW_VERSION \"#{@snapshot_version}\")",
                         "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
                      ],
                    },
                    {
                      path: "python/setup.py",
                      hunks: [
-                       ["-default_version = '#{@release_version}'",
+                       ["-default_version = '#{@snapshot_version}'",
                         "+default_version = '#{@next_snapshot_version}'"],
                      ],
                    },
                    {
                      path: "r/DESCRIPTION",
                      hunks: [
-                       ["-Version: #{@release_version}",
+                       ["-Version: #{@previous_version}.9000",
                         "+Version: #{@release_version}.9000"],
                      ],
                    },
                    {
                      path: "r/NEWS.md",
-                     # Note that these are additions only, no replacement
                      hunks: [
-                       ["+# arrow #{@release_version}.9000",
-                        "+"],
+                       ["-# arrow #{@previous_version}.9000",
+                        "+# arrow #{@release_version}.9000",
+                        "+",
+                        "+# arrow #{@release_version}",],
                      ],
                    },
                    {
                      path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-arrow-dataset/lib/arrow-dataset/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-arrow-flight/lib/arrow-flight/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-arrow/lib/arrow/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-gandiva/lib/gandiva/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-parquet/lib/parquet/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-plasma/lib/plasma/version.rb",
                      hunks: [
-                       ["-  VERSION = \"#{@release_version}\"",
+                       ["-  VERSION = \"#{@snapshot_version}\"",
                         "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
index 93ddb18b77c..7ba786a754c 100644
--- a/dev/release/utils-prepare.sh
+++ b/dev/release/utils-prepare.sh
@@ -121,18 +121,15 @@ update_versions() {
     DESCRIPTION
   rm -f DESCRIPTION.bak
   git add DESCRIPTION
+  # Replace dev version with release version
+  sed -i.bak -E -e \
+    "0,/^# arrow /s/^# arrow .+/# arrow ${base_version}/" \
+    NEWS.md
   if [ ${type} = "snapshot" ]; then
     # Add a news entry for the new dev version
-    echo "dev"
     sed -i.bak -E -e \
       "0,/^# arrow /s/^(# arrow .+)/# arrow ${r_version}\n\n\1/" \
       NEWS.md
-  else
-    # Replace dev version with release version
-    echo "release"
-    sed -i.bak -E -e \
-      "0,/^# arrow /s/^# arrow .+/# arrow ${r_version}/" \
-      NEWS.md
   fi
   rm -f NEWS.md.bak
   git add NEWS.md

From 7ee8edb68ce6ce25944ff69f333027fdeaaf93e7 Mon Sep 17 00:00:00 2001
From: Projjal Chanda <iam@pchanda.com>
Date: Wed, 4 Aug 2021 15:56:07 +0530
Subject: [PATCH 695/719] ARROW-13429: [C++][Gandiva] Fix Gandiva codegen for
 if-else expression with binary type

Closes #10775 from projjal/fixvarbinary and squashes the following commits:

060b1425b <Projjal Chanda> Fixed codegen in if-else with binary type

Authored-by: Projjal Chanda <iam@pchanda.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/llvm_generator.cc    |  3 +-
 cpp/src/gandiva/tests/binary_test.cc | 49 +++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 77feb99eb29..d84a0374e6b 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -1139,7 +1139,8 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition,
 
   LValuePtr ret;
   switch (result_type->id()) {
-    case arrow::Type::STRING: {
+    case arrow::Type::STRING:
+    case arrow::Type::BINARY: {
       llvm::PHINode* result_length;
       result_length = builder->CreatePHI(types->i32_type(), 2, "res_length");
       result_length->addIncoming(then_lvalue->length(), then_bb);
diff --git a/cpp/src/gandiva/tests/binary_test.cc b/cpp/src/gandiva/tests/binary_test.cc
index 6ac3c515519..591c5befcbd 100644
--- a/cpp/src/gandiva/tests/binary_test.cc
+++ b/cpp/src/gandiva/tests/binary_test.cc
@@ -16,9 +16,10 @@
 // under the License.
 
 #include <gtest/gtest.h>
+
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
-
+#include "gandiva/node.h"
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"
@@ -86,4 +87,50 @@ TEST_F(TestBinary, TestSimple) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestBinary, TestIfElse) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::binary());
+  auto field1 = field("f1", arrow::binary());
+
+  auto schema = arrow::schema({field0, field1});
+
+  auto f0 = TreeExprBuilder::MakeField(field0);
+  auto f1 = TreeExprBuilder::MakeField(field1);
+
+  // output fields
+  auto field_result = field("out", arrow::binary());
+
+  // Build expression
+  auto cond = TreeExprBuilder::MakeFunction("isnotnull", {f0}, arrow::boolean());
+  auto ifexpr = TreeExprBuilder::MakeIf(cond, f0, f1, arrow::binary());
+  auto expr = TreeExprBuilder::MakeExpression(ifexpr, field_result);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_f0 =
+      MakeArrowArrayBinary({"foo", "hello", "hi", "bye"}, {true, true, true, false});
+  auto array_f1 =
+      MakeArrowArrayBinary({"fe", "fi", "fo", "fum"}, {true, true, true, true});
+
+  // expected output
+  auto exp =
+      MakeArrowArrayBinary({"foo", "hello", "hi", "fum"}, {true, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_f0, array_f1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
 }  // namespace gandiva

From c4e53e09fe6964f404fe0fa89a5d3eb8a2607442 Mon Sep 17 00:00:00 2001
From: frank400 <j.victorhuguenin2018@gmail.com>
Date: Wed, 4 Aug 2021 15:58:54 +0530
Subject: [PATCH 696/719] ARROW-12388: [C++][Gandiva] Implement cast numbers
 from varbinary functions in gandiva

Closes #10033 from jvictorhuguenin/feature/add-cast-numbers-from-varbinary and squashes the following commits:

63d96359f <frank400> Fix projector bad construction
52bf306ca <frank400> Fix checkstyle
6641e1e84 <frank400> Remove miss placed tests
4bef9c305 <frank400> Fix checkstyle
07c75dd70 <frank400> Fix unnecessary functions
9a0a32cb1 <frank400> Add tests with hex strings
7fb41bd7f <frank400> Add initial support for castFLOAT4 and castFLOAT8 for varbinary
71a3265de <frank400> Restructures the castINT and castBIGINT functions implementation
578aac9fd <frank400> Fix checkstyle
a61388f93 <frank400> Unify macros used to cast numbers from strings and varbinary
4bf6a532d <frank400> Add java tests
41a147c07 <frank400> Implement cast varbinary to number types

Authored-by: frank400 <j.victorhuguenin2018@gmail.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  16 ++
 cpp/src/gandiva/gdv_function_stubs.cc         |  50 +++++-
 cpp/src/gandiva/gdv_function_stubs.h          |  12 ++
 cpp/src/gandiva/gdv_function_stubs_test.cc    | 143 ++++++++++++++++++
 cpp/src/gandiva/precompiled/string_ops.cc     |  63 ++++++++
 .../gandiva/precompiled/string_ops_test.cc    |   1 +
 cpp/src/gandiva/tests/projector_test.cc       |  56 +++++++
 .../gandiva/evaluator/ProjectorTest.java      |  53 +++++++
 8 files changed, 391 insertions(+), 3 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 8f979b7f17e..270417c1bf3 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -102,6 +102,22 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      kResultNullIfNull, "gdv_fn_castFLOAT8_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
+      NativeFunction("castINT", {}, DataTypeVector{binary()}, int32(), kResultNullIfNull,
+                     "gdv_fn_castINT_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castBIGINT", {}, DataTypeVector{binary()}, int64(),
+                     kResultNullIfNull, "gdv_fn_castBIGINT_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castFLOAT4", {}, DataTypeVector{binary()}, float32(),
+                     kResultNullIfNull, "gdv_fn_castFLOAT4_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castFLOAT8", {}, DataTypeVector{binary()}, float64(),
+                     kResultNullIfNull, "gdv_fn_castFLOAT8_varbinary",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
       NativeFunction("castVARCHAR", {}, DataTypeVector{boolean(), int64()}, utf8(),
                      kResultNullIfNull, "castVARCHAR_bool_int64",
                      NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 5bf8da7e718..f0142741959 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "arrow/util/base64.h"
+#include "arrow/util/double_conversion.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/utf8.h"
 #include "arrow/util/value_parsing.h"
@@ -364,10 +365,10 @@ const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t i
   return ret;
 }
 
-#define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME)                    \
+#define CAST_NUMERIC_FROM_VARLEN_TYPES(OUT_TYPE, ARROW_TYPE, TYPE_NAME, INNER_TYPE)  \
   GANDIVA_EXPORT                                                                     \
-  OUT_TYPE gdv_fn_cast##TYPE_NAME##_utf8(int64_t context, const char* data,          \
-                                         int32_t len) {                              \
+  OUT_TYPE gdv_fn_cast##TYPE_NAME##_##INNER_TYPE(int64_t context, const char* data,  \
+                                                 int32_t len) {                      \
     OUT_TYPE val = 0;                                                                \
     /* trim leading and trailing spaces */                                           \
     int32_t trimmed_len;                                                             \
@@ -388,6 +389,9 @@ const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t i
     return val;                                                                      \
   }
 
+#define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME) \
+  CAST_NUMERIC_FROM_VARLEN_TYPES(OUT_TYPE, ARROW_TYPE, TYPE_NAME, utf8)
+
 CAST_NUMERIC_FROM_STRING(int32_t, arrow::Int32Type, INT)
 CAST_NUMERIC_FROM_STRING(int64_t, arrow::Int64Type, BIGINT)
 CAST_NUMERIC_FROM_STRING(float, arrow::FloatType, FLOAT4)
@@ -395,6 +399,16 @@ CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
 
 #undef CAST_NUMERIC_FROM_STRING
 
+#define CAST_NUMERIC_FROM_VARBINARY(OUT_TYPE, ARROW_TYPE, TYPE_NAME) \
+  CAST_NUMERIC_FROM_VARLEN_TYPES(OUT_TYPE, ARROW_TYPE, TYPE_NAME, varbinary)
+
+CAST_NUMERIC_FROM_VARBINARY(int32_t, arrow::Int32Type, INT)
+CAST_NUMERIC_FROM_VARBINARY(int64_t, arrow::Int64Type, BIGINT)
+CAST_NUMERIC_FROM_VARBINARY(float, arrow::FloatType, FLOAT4)
+CAST_NUMERIC_FROM_VARBINARY(double, arrow::DoubleType, FLOAT8)
+
+#undef CAST_NUMERIC_STRING
+
 #define GDV_FN_CAST_VARLEN_TYPE_FROM_INTEGER(IN_TYPE, CAST_NAME, ARROW_TYPE)      \
   GANDIVA_EXPORT                                                                  \
   const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
@@ -1056,6 +1070,36 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
       "gdv_fn_castVARCHAR_float64_int64", types->i8_ptr_type() /*return_type*/, args,
       reinterpret_cast<void*>(gdv_fn_castVARCHAR_float64_int64));
 
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castINT_varbinary", types->i32_type(), args,
+                                  reinterpret_cast<void*>(gdv_fn_castINT_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castBIGINT_varbinary", types->i64_type(), args,
+                                  reinterpret_cast<void*>(gdv_fn_castBIGINT_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT4_varbinary", types->float_type(),
+                                  args,
+                                  reinterpret_cast<void*>(gdv_fn_castFLOAT4_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_varbinary", types->double_type(),
+                                  args,
+                                  reinterpret_cast<void*>(gdv_fn_castFLOAT8_varbinary));
+
   // gdv_fn_sha1_int8
   args = {
       types->i64_type(),     // context
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 1d95c82e3ca..7736b975089 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -157,4 +157,16 @@ const char* gdv_fn_lower_utf8(int64_t context, const char* data, int32_t data_le
 GANDIVA_EXPORT
 const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_len,
                                 int32_t* out_len);
+
+GANDIVA_EXPORT
+int32_t gdv_fn_castINT_varbinary(gdv_int64 context, const char* in, int32_t in_len);
+
+GANDIVA_EXPORT
+int64_t gdv_fn_castBIGINT_varbinary(gdv_int64 context, const char* in, int32_t in_len);
+
+GANDIVA_EXPORT
+float gdv_fn_castFLOAT4_varbinary(gdv_int64 context, const char* in, int32_t in_len);
+
+GANDIVA_EXPORT
+double gdv_fn_castFLOAT8_varbinary(gdv_int64 context, const char* in, int32_t in_len);
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index 80e6379edab..f7c21981cbc 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -623,4 +623,147 @@ TEST(TestGdvFnStubs, TestInitCap) {
                   "unexpected byte \\e0 encountered while decoding utf8 string"));
   ctx.Reset();
 }
+
+TEST(TestGdvFnStubs, TestCastVarbinaryINT) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "-45", 3), -45);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "0", 1), 0);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "2147483647", 10), 2147483647);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "\x32\x33", 2), 23);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "02147483647", 11), 2147483647);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "-2147483648", 11), -2147483648LL);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, "-02147483648", 12), -2147483648LL);
+  EXPECT_EQ(gdv_fn_castINT_varbinary(ctx_ptr, " 12 ", 4), 12);
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "2147483648", 10);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string 2147483648 to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "-2147483649", 11);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string -2147483649 to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "12.34", 5);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string 12.34 to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "abc", 3);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string abc to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to int32"));
+  ctx.Reset();
+
+  gdv_fn_castINT_varbinary(ctx_ptr, "-", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string - to int32"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryBIGINT) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "-45", 3), -45);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "0", 1), 0);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "9223372036854775807", 19),
+            9223372036854775807LL);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "09223372036854775807", 20),
+            9223372036854775807LL);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "-9223372036854775808", 20),
+            -9223372036854775807LL - 1);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, "-009223372036854775808", 22),
+            -9223372036854775807LL - 1);
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr, " 12 ", 4), 12);
+
+  EXPECT_EQ(gdv_fn_castBIGINT_varbinary(ctx_ptr,
+                                        "\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39", 10),
+            9999999999LL);
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "9223372036854775808", 19);
+  EXPECT_THAT(
+      ctx.get_error(),
+      ::testing::HasSubstr("Failed to cast the string 9223372036854775808 to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "-9223372036854775809", 20);
+  EXPECT_THAT(
+      ctx.get_error(),
+      ::testing::HasSubstr("Failed to cast the string -9223372036854775809 to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "12.34", 5);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string 12.34 to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "abc", 3);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string abc to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to int64"));
+  ctx.Reset();
+
+  gdv_fn_castBIGINT_varbinary(ctx_ptr, "-", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string - to int64"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryFloat4) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-45.34", 6), -45.34f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "0", 1), 0.0f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "5", 1), 5.0f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, " 3.4 ", 5), 3.4f);
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, " \x33\x2E\x34 ", 5), 3.4f);
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to float"));
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "e", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string e to float"));
+  ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryFloat8) {
+  gandiva::ExecutionContext ctx;
+
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, "-45.34", 6), -45.34);
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, "0", 1), 0.0);
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, "5", 1), 5.0);
+  EXPECT_EQ(gdv_fn_castFLOAT8_varbinary(ctx_ptr, " \x33\x2E\x34 ", 5), 3.4);
+
+  gdv_fn_castFLOAT8_varbinary(ctx_ptr, "", 0);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string  to double"));
+  ctx.Reset();
+
+  gdv_fn_castFLOAT8_varbinary(ctx_ptr, "e", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Failed to cast the string e to double"));
+  ctx.Reset();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 751b29d49c6..24064fa2e06 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2052,6 +2052,69 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   return ret;
 }
 
+#define CAST_INT_BIGINT_VARBINARY(OUT_TYPE, TYPE_NAME)                                 \
+  FORCE_INLINE                                                                         \
+  OUT_TYPE                                                                             \
+  cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in, int32_t in_len) {     \
+    if (in_len == 0) {                                                                 \
+      gdv_fn_context_set_error_msg(context, "Can't cast an empty string.");            \
+      return -1;                                                                       \
+    }                                                                                  \
+    char sign = in[0];                                                                 \
+                                                                                       \
+    bool negative = false;                                                             \
+    if (sign == '-') {                                                                 \
+      negative = true;                                                                 \
+      /* Ignores the sign char in the hexadecimal string */                            \
+      in++;                                                                            \
+      in_len--;                                                                        \
+    }                                                                                  \
+                                                                                       \
+    if (negative && in_len == 0) {                                                     \
+      gdv_fn_context_set_error_msg(context,                                            \
+                                   "Can't cast hexadecimal with only a minus sign.");  \
+      return -1;                                                                       \
+    }                                                                                  \
+                                                                                       \
+    OUT_TYPE result = 0;                                                               \
+    int digit;                                                                         \
+                                                                                       \
+    int read_index = 0;                                                                \
+    while (read_index < in_len) {                                                      \
+      char c1 = in[read_index];                                                        \
+      if (isxdigit(c1)) {                                                              \
+        digit = to_binary_from_hex(c1);                                                \
+                                                                                       \
+        OUT_TYPE next = result * 16 - digit;                                           \
+                                                                                       \
+        if (next > result) {                                                           \
+          gdv_fn_context_set_error_msg(context, "Integer overflow.");                  \
+          return -1;                                                                   \
+        }                                                                              \
+        result = next;                                                                 \
+        read_index++;                                                                  \
+      } else {                                                                         \
+        gdv_fn_context_set_error_msg(context,                                          \
+                                     "The hexadecimal given has invalid characters."); \
+        return -1;                                                                     \
+      }                                                                                \
+    }                                                                                  \
+    if (!negative) {                                                                   \
+      result *= -1;                                                                    \
+                                                                                       \
+      if (result < 0) {                                                                \
+        gdv_fn_context_set_error_msg(context, "Integer overflow.");                    \
+        return -1;                                                                     \
+      }                                                                                \
+    }                                                                                  \
+    return result;                                                                     \
+  }
+
+CAST_INT_BIGINT_VARBINARY(int32_t, INT)
+CAST_INT_BIGINT_VARBINARY(int64_t, BIGINT)
+
+#undef CAST_INT_BIGINT_VARBINARY
+
 // Produces the binary representation of a string y characters long derived by starting
 // at offset 'x' and considering the defined length 'y'. Notice that the offset index
 // may be a negative number (starting from the end of the string), or a positive number
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 020e380b584..a21b2671f96 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1683,4 +1683,5 @@ TEST(TestStringOps, TestConvertToBigEndian) {
   }
 #endif
 }
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 572fb3103ec..33f83a44c4c 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1130,6 +1130,62 @@ TEST_F(TestProjector, TestCastBitFunction) {
   EXPECT_ARROW_ARRAY_EQUALS(out, outputs.at(0));
 }
 
+// Test to ensure behaviour of cast functions when the validity is false for an input. The
+// function should not run for that input.
+TEST_F(TestProjector, TestCastVarbinaryFunction) {
+  auto field0 = field("f0", arrow::binary());
+  auto schema = arrow::schema({field0});
+
+  // output fields
+  auto res_int4 = field("res_int4", arrow::int32());
+  auto res_int8 = field("res_int8", arrow::int64());
+  auto res_float4 = field("res_float4", arrow::float32());
+  auto res_float8 = field("res_float8", arrow::float64());
+
+  // Build expression
+  auto cast_expr_int4 = TreeExprBuilder::MakeExpression("castINT", {field0}, res_int4);
+  auto cast_expr_int8 = TreeExprBuilder::MakeExpression("castBIGINT", {field0}, res_int8);
+  auto cast_expr_float4 =
+      TreeExprBuilder::MakeExpression("castFLOAT4", {field0}, res_float4);
+  auto cast_expr_float8 =
+      TreeExprBuilder::MakeExpression("castFLOAT8", {field0}, res_float8);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8}
+  auto status = Projector::Make(
+      schema, {cast_expr_int4, cast_expr_int8, cast_expr_float4, cast_expr_float8},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 =
+      MakeArrowArrayBinary({"37", "-99999", "99999", "4"}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  auto out_int4 = MakeArrowArrayInt32({37, -99999, 99999, 0}, {true, true, true, false});
+  auto out_int8 = MakeArrowArrayInt64({37, -99999, 99999, 0}, {true, true, true, false});
+  auto out_float4 =
+      MakeArrowArrayFloat32({37, -99999, 99999, 0}, {true, true, true, false});
+  auto out_float8 =
+      MakeArrowArrayFloat64({37, -99999, 99999, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_int4, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_int8, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(2));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(3));
+}
+
 TEST_F(TestProjector, TestToDate) {
   // schema for input fields
   auto field0 = field("f0", arrow::utf8());
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 7fa10a8fabd..c4a6cd1e135 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -2006,6 +2006,59 @@ public void testCastFloat() throws Exception {
     releaseValueVectors(output);
   }
 
+  @Test
+  public void testCastFloatVarbinary() throws Exception {
+    Field inField = Field.nullable("input", new ArrowType.Binary());
+    TreeNode inNode = TreeBuilder.makeField(inField);
+    TreeNode castFLOAT8Fn = TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode),
+            float64);
+    Field resultField = Field.nullable("result", float64);
+    List<ExpressionTree> exprs =
+            Lists.newArrayList(
+                    TreeBuilder.makeExpression(castFLOAT8Fn, resultField));
+    Schema schema = new Schema(Lists.newArrayList(inField));
+    Projector eval = Projector.make(schema, exprs);
+    int numRows = 5;
+    byte[] validity = new byte[] {(byte) 255};
+    String[] values =
+        new String[] {
+            "2.3",
+            "-11.11",
+            "0",
+            "111",
+            "12345.67"
+        };
+    double[] expValues =
+        new double[] {
+            2.3, -11.11, 0, 111, 12345.67
+        };
+    ArrowBuf bufValidity = buf(validity);
+    List<ArrowBuf> bufData = stringBufs(values);
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(fieldNode),
+                    Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+    List<ValueVector> output = new ArrayList<>();
+    for (int i = 0; i < exprs.size(); i++) {
+      Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+      float8Vector.allocateNew(numRows);
+      output.add(float8Vector);
+    }
+    eval.evaluate(batch, output);
+    eval.close();
+    for (ValueVector valueVector : output) {
+      Float8Vector float8Vector = (Float8Vector) valueVector;
+      for (int j = 0; j < numRows; j++) {
+        assertFalse(float8Vector.isNull(j));
+        assertTrue(expValues[j] == float8Vector.get(j));
+      }
+    }
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
   @Test(expected = GandivaException.class)
   public void testCastFloatInvalidValue() throws Exception {
     Field inField = Field.nullable("input", new ArrowType.Utf8());

From 87e0252abc14cd6925857d613b8865a1eebf9ba3 Mon Sep 17 00:00:00 2001
From: frank400 <j.victorhuguenin2018@gmail.com>
Date: Wed, 4 Aug 2021 16:00:10 +0530
Subject: [PATCH 697/719] ARROW-12479: [C++][Gandiva] Implement castBigInt,
 castInt, castIntervalDay and castIntervalYear extra functions

Closes #10112 from jvictorhuguenin/feature/implement-castint-cast-bigint-for-various-types and squashes the following commits:

9a3e8ad1f <frank400> Remove castNULLABLE unnecessary implementations
d029d128e <frank400> Change macro parameter from NAME to TYPE
33dad3a9f <frank400> Change alias for year_interval to month_interval
44bf1ad43 <frank400> Fix implicit cast
5a42c3ce5 <frank400> Implement round method
9fc9324c8 <frank400> Fix unsigned minus sign
5e7a33570 <frank400> Fix build problems
f52655fe6 <frank400> Fix checkstyle
6f1f87d4f <frank400> Add alias for nullable functions
813d9cc58 <frank400> Add projector test for nullable functions
8afc3eba6 <frank400> Add Cast nullable functions
c284b1ef3 <frank400> Fix year Interval type
89a2daf21 <frank400> Add test for implemented castINT functions
1f3e44b60 <frank400> Fix tests implementation
e177ebbcb <frank400> Fix duplicated function
5c6a7f308 <frank400> Add castINT and castBIGINT from dayinterval and yearinterval
d98af88f9 <frank400> Add castINT and castBIGINT for floats and doubles

Authored-by: frank400 <j.victorhuguenin2018@gmail.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 .../gandiva/function_registry_arithmetic.cc   |  14 +-
 cpp/src/gandiva/function_registry_common.h    |   1 +
 cpp/src/gandiva/function_registry_datetime.cc |  26 +++
 cpp/src/gandiva/gdv_function_stubs.h          |   1 +
 cpp/src/gandiva/precompiled/arithmetic_ops.cc |  15 ++
 .../precompiled/arithmetic_ops_test.cc        |  40 +++++
 cpp/src/gandiva/precompiled/time.cc           |  31 ++++
 cpp/src/gandiva/precompiled/time_test.cc      |  44 +++++
 cpp/src/gandiva/precompiled/types.h           |  22 +++
 cpp/src/gandiva/tests/projector_test.cc       | 159 ++++++++++++++++++
 10 files changed, 351 insertions(+), 2 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
index 6613c1f12c6..f34289f372e 100644
--- a/cpp/src/gandiva/function_registry_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -29,9 +29,13 @@ namespace gandiva {
 #define BINARY_RELATIONAL_BOOL_DATE_FN(name, ALIASES) \
   NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name, ALIASES)
 
-#define UNARY_CAST_TO_FLOAT64(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, {}, name, float64)
+#define UNARY_CAST_TO_FLOAT64(type) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, {}, type, float64)
 
-#define UNARY_CAST_TO_FLOAT32(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, {}, name, float32)
+#define UNARY_CAST_TO_FLOAT32(type) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, {}, type, float32)
+
+#define UNARY_CAST_TO_INT32(type) UNARY_SAFE_NULL_IF_NULL(castINT, {}, type, int32)
+
+#define UNARY_CAST_TO_INT64(type) UNARY_SAFE_NULL_IF_NULL(castBIGINT, {}, type, int64)
 
 std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
   static std::vector<NativeFunction> arithmetic_fn_registry_ = {
@@ -44,6 +48,12 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
       UNARY_CAST_TO_FLOAT32(int32), UNARY_CAST_TO_FLOAT32(int64),
       UNARY_CAST_TO_FLOAT32(float64),
 
+      // cast to int32
+      UNARY_CAST_TO_INT32(float32), UNARY_CAST_TO_INT32(float64),
+
+      // cast to int64
+      UNARY_CAST_TO_INT64(float32), UNARY_CAST_TO_INT64(float64),
+
       // cast to float64
       UNARY_CAST_TO_FLOAT64(int32), UNARY_CAST_TO_FLOAT64(int64),
       UNARY_CAST_TO_FLOAT64(float32), UNARY_CAST_TO_FLOAT64(decimal128),
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index 40efc1fe1a9..66f94515089 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -43,6 +43,7 @@ using arrow::int16;
 using arrow::int32;
 using arrow::int64;
 using arrow::int8;
+using arrow::month_interval;
 using arrow::uint16;
 using arrow::uint32;
 using arrow::uint64;
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index 6e7a703aa61..b8d2e7b6c7d 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -93,6 +93,32 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
       NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()}, int64(),
                      kResultNullIfNull, "castBIGINT_daytimeinterval"),
 
+      NativeFunction("castINT", {"castNULLABLEINT"}, DataTypeVector{month_interval()},
+                     int32(), kResultNullIfNull, "castINT_year_interval",
+                     NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castBIGINT", {"castNULLABLEBIGINT"},
+                     DataTypeVector{month_interval()}, int64(), kResultNullIfNull,
+                     "castBIGINT_year_interval", NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castNULLABLEINTERVALYEAR", {"castINTERVALYEAR"},
+                     DataTypeVector{int32()}, month_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALYEAR_int32",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castNULLABLEINTERVALYEAR", {"castINTERVALYEAR"},
+                     DataTypeVector{int64()}, month_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALYEAR_int64",
+                     NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
+
+      NativeFunction("castNULLABLEINTERVALDAY", {"castINTERVALDAY"},
+                     DataTypeVector{int32()}, day_time_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALDAY_int32"),
+
+      NativeFunction("castNULLABLEINTERVALDAY", {"castINTERVALDAY"},
+                     DataTypeVector{int64()}, day_time_interval(), kResultNullIfNull,
+                     "castNULLABLEINTERVALDAY_int64"),
+
       NativeFunction("extractDay", {}, DataTypeVector{day_time_interval()}, int64(),
                      kResultNullIfNull, "extractDay_daytimeinterval"),
 
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 7736b975089..670ac94df1b 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -42,6 +42,7 @@ using gdv_timestamp = int64_t;
 using gdv_utf8 = char*;
 using gdv_binary = char*;
 using gdv_day_time_interval = int64_t;
+using gdv_month_interval = int32_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
index a173a60d6d0..c736c38d32c 100644
--- a/cpp/src/gandiva/precompiled/arithmetic_ops.cc
+++ b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
@@ -122,6 +122,21 @@ CAST_UNARY(castFLOAT4, float64, float32)
 
 #undef CAST_UNARY
 
+// cast float types to int types.
+#define CAST_INT_FLOAT(NAME, IN_TYPE, OUT_TYPE)                  \
+  FORCE_INLINE                                                   \
+  gdv_##OUT_TYPE NAME##_##IN_TYPE(gdv_##IN_TYPE in) {            \
+    gdv_##OUT_TYPE out = static_cast<gdv_##OUT_TYPE>(round(in)); \
+    return out;                                                  \
+  }
+
+CAST_INT_FLOAT(castBIGINT, float32, int64)
+CAST_INT_FLOAT(castBIGINT, float64, int64)
+CAST_INT_FLOAT(castINT, float32, int32)
+CAST_INT_FLOAT(castINT, float64, int32)
+
+#undef CAST_INT_FLOAT
+
 // simple nullable functions, result value = fn(input validity)
 #define VALIDITY_OP(NAME, TYPE, OP) \
   FORCE_INLINE                      \
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
index b3359ac7d6c..36b50bcfdae 100644
--- a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
@@ -137,4 +137,44 @@ TEST(TestArithmeticOps, TestBitwiseOps) {
   EXPECT_EQ(bitwise_not_int64(0x0000000000000000), 0xFFFFFFFFFFFFFFFF);
 }
 
+TEST(TestArithmeticOps, TestIntCastFloatDouble) {
+  // castINT from floats
+  EXPECT_EQ(castINT_float32(6.6f), 7);
+  EXPECT_EQ(castINT_float32(-6.6f), -7);
+  EXPECT_EQ(castINT_float32(-6.3f), -6);
+  EXPECT_EQ(castINT_float32(0.0f), 0);
+  EXPECT_EQ(castINT_float32(-0), 0);
+
+  // castINT from doubles
+  EXPECT_EQ(castINT_float64(6.6), 7);
+  EXPECT_EQ(castINT_float64(-6.6), -7);
+  EXPECT_EQ(castINT_float64(-6.3), -6);
+  EXPECT_EQ(castINT_float64(0.0), 0);
+  EXPECT_EQ(castINT_float64(-0), 0);
+  EXPECT_EQ(castINT_float64(999999.99999999999999999999999), 1000000);
+  EXPECT_EQ(castINT_float64(-999999.99999999999999999999999), -1000000);
+  EXPECT_EQ(castINT_float64(INT32_MAX), 2147483647);
+  EXPECT_EQ(castINT_float64(-2147483647), -2147483647);
+}
+
+TEST(TestArithmeticOps, TestBigIntCastFloatDouble) {
+  // castINT from floats
+  EXPECT_EQ(castBIGINT_float32(6.6f), 7);
+  EXPECT_EQ(castBIGINT_float32(-6.6f), -7);
+  EXPECT_EQ(castBIGINT_float32(-6.3f), -6);
+  EXPECT_EQ(castBIGINT_float32(0.0f), 0);
+  EXPECT_EQ(castBIGINT_float32(-0), 0);
+
+  // castINT from doubles
+  EXPECT_EQ(castBIGINT_float64(6.6), 7);
+  EXPECT_EQ(castBIGINT_float64(-6.6), -7);
+  EXPECT_EQ(castBIGINT_float64(-6.3), -6);
+  EXPECT_EQ(castBIGINT_float64(0.0), 0);
+  EXPECT_EQ(castBIGINT_float64(-0), 0);
+  EXPECT_EQ(castBIGINT_float64(999999.99999999999999999999999), 1000000);
+  EXPECT_EQ(castBIGINT_float64(-999999.99999999999999999999999), -1000000);
+  EXPECT_EQ(castBIGINT_float64(INT32_MAX), 2147483647);
+  EXPECT_EQ(castBIGINT_float64(-2147483647), -2147483647);
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index e5cdd9de64f..336f692267d 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -860,4 +860,35 @@ NUMERIC_TYPES(TO_TIMESTAMP)
 
 NUMERIC_TYPES(TO_TIME)
 
+#define CAST_INT_YEAR_INTERVAL(TYPE, OUT_TYPE)                 \
+  FORCE_INLINE                                                 \
+  gdv_##OUT_TYPE TYPE##_year_interval(gdv_month_interval in) { \
+    return static_cast<gdv_##OUT_TYPE>(in / 12.0);             \
+  }
+
+CAST_INT_YEAR_INTERVAL(castBIGINT, int64)
+CAST_INT_YEAR_INTERVAL(castINT, int32)
+
+#define CAST_NULLABLE_INTERVAL_DAY(TYPE)                                \
+  FORCE_INLINE                                                          \
+  gdv_day_time_interval castNULLABLEINTERVALDAY_##TYPE(gdv_##TYPE in) { \
+    return static_cast<gdv_day_time_interval>(in);                      \
+  }
+
+CAST_NULLABLE_INTERVAL_DAY(int32)
+CAST_NULLABLE_INTERVAL_DAY(int64)
+
+#define CAST_NULLABLE_INTERVAL_YEAR(TYPE)                                              \
+  FORCE_INLINE                                                                         \
+  gdv_month_interval castNULLABLEINTERVALYEAR_##TYPE(int64_t context, gdv_##TYPE in) { \
+    gdv_month_interval value = static_cast<gdv_month_interval>(in);                    \
+    if (value != in) {                                                                 \
+      gdv_fn_context_set_error_msg(context, "Integer overflow");                       \
+    }                                                                                  \
+    return value;                                                                      \
+  }
+
+CAST_NULLABLE_INTERVAL_YEAR(int32)
+CAST_NULLABLE_INTERVAL_YEAR(int64)
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 8d3cdccd6ff..cec3cf747c2 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -869,4 +869,48 @@ TEST(TestTime, TestToTimeNumeric) {
   EXPECT_EQ(expected_output, to_time_float64(3601.500));
 }
 
+TEST(TestTime, TestCastIntDayInterval) {
+  EXPECT_EQ(castBIGINT_daytimeinterval(10), 864000000);
+  EXPECT_EQ(castBIGINT_daytimeinterval(-100), -8640000001);
+  EXPECT_EQ(castBIGINT_daytimeinterval(-0), 0);
+}
+
+TEST(TestTime, TestCastIntYearInterval) {
+  EXPECT_EQ(castINT_year_interval(24), 2);
+  EXPECT_EQ(castINT_year_interval(-24), -2);
+  EXPECT_EQ(castINT_year_interval(-23), -1);
+
+  EXPECT_EQ(castBIGINT_year_interval(24), 2);
+  EXPECT_EQ(castBIGINT_year_interval(-24), -2);
+  EXPECT_EQ(castBIGINT_year_interval(-23), -1);
+}
+
+TEST(TestTime, TestCastNullableInterval) {
+  ExecutionContext context;
+  auto context_ptr = reinterpret_cast<int64_t>(&context);
+  // Test castNULLABLEINTERVALDAY for int and bigint
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-55), -55);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-1201), -1201);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-55), -55);
+  EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-1201), -1201);
+
+  // Test castNULLABLEINTERVALYEAR for int and bigint
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 55), 55);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1201), 1201);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1), 1);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 12), 12);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 55), 55);
+  EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1201), 1201);
+  // validate overflow error when using bigint as input
+  castNULLABLEINTERVALYEAR_int64(context_ptr, INT64_MAX);
+  EXPECT_EQ(context.get_error(), "Integer overflow");
+  context.Reset();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 4e913aaac67..7032f459974 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -543,4 +543,26 @@ float castFLOAT4_utf8(int64_t context, const char* data, int32_t len);
 
 double castFLOAT8_utf8(int64_t context, const char* data, int32_t len);
 
+int32_t castINT_float32(gdv_float32 value);
+
+int32_t castINT_float64(gdv_float64 value);
+
+int64_t castBIGINT_float32(gdv_float32 value);
+
+int64_t castBIGINT_float64(gdv_float64 value);
+
+int64_t castBIGINT_daytimeinterval(gdv_day_time_interval in);
+
+int32_t castINT_year_interval(gdv_month_interval in);
+
+int64_t castBIGINT_year_interval(gdv_month_interval in);
+
+gdv_day_time_interval castNULLABLEINTERVALDAY_int32(gdv_int32 in);
+
+gdv_day_time_interval castNULLABLEINTERVALDAY_int64(gdv_int64 in);
+
+gdv_month_interval castNULLABLEINTERVALYEAR_int32(int64_t context, gdv_int32 in);
+
+gdv_month_interval castNULLABLEINTERVALYEAR_int64(int64_t context, gdv_int64 in);
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 33f83a44c4c..2ce52befdc1 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1409,4 +1409,163 @@ TEST_F(TestProjector, TestBinRepresentation) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestBigIntCastFunction) {
+  // input fields
+  auto field0 = field("f0", arrow::float32());
+  auto field1 = field("f1", arrow::float64());
+  auto field2 = field("f2", arrow::day_time_interval());
+  auto field3 = field("f3", arrow::month_interval());
+  auto schema = arrow::schema({field0, field1, field2, field3});
+
+  // output fields
+  auto res_int64 = field("res", arrow::int64());
+
+  // Build expression
+  auto cast_expr_float4 =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field0}, res_int64);
+  auto cast_expr_float8 =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field1}, res_int64);
+  auto cast_expr_day_interval =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field2}, res_int64);
+  auto cast_expr_year_interval =
+      TreeExprBuilder::MakeExpression("castBIGINT", {field3}, res_int64);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_float4, cast_expr_float8, cast_expr_day_interval,
+  //  cast_expr_year_interval}
+  auto status = Projector::Make(schema,
+                                {cast_expr_float4, cast_expr_float8,
+                                 cast_expr_day_interval, cast_expr_year_interval},
+                                TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 =
+      MakeArrowArrayFloat32({6.6f, -6.6f, 9.999999f, 0}, {true, true, true, false});
+  auto array1 =
+      MakeArrowArrayFloat64({6.6, -6.6, 9.99999999999, 0}, {true, true, true, false});
+  auto array2 = MakeArrowArrayInt64({100, 25, -0, 0}, {true, true, true, false});
+  auto array3 = MakeArrowArrayInt32({25, -25, -0, 0}, {true, true, true, false});
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2, array3});
+
+  auto out_float4 = MakeArrowArrayInt64({7, -7, 10, 0}, {true, true, true, false});
+  auto out_float8 = MakeArrowArrayInt64({7, -7, 10, 0}, {true, true, true, false});
+  auto out_days_interval =
+      MakeArrowArrayInt64({8640000000, 2160000000, 0, 0}, {true, true, true, false});
+  auto out_year_interval = MakeArrowArrayInt64({2, -2, 0, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(out_days_interval, outputs.at(2));
+  EXPECT_ARROW_ARRAY_EQUALS(out_year_interval, outputs.at(3));
+}
+
+TEST_F(TestProjector, TestIntCastFunction) {
+  // input fields
+  auto field0 = field("f0", arrow::float32());
+  auto field1 = field("f1", arrow::float64());
+  auto field2 = field("f2", arrow::month_interval());
+  auto schema = arrow::schema({field0, field1, field2});
+
+  // output fields
+  auto res_int32 = field("res", arrow::int32());
+
+  // Build expression
+  auto cast_expr_float4 = TreeExprBuilder::MakeExpression("castINT", {field0}, res_int32);
+  auto cast_expr_float8 = TreeExprBuilder::MakeExpression("castINT", {field1}, res_int32);
+  auto cast_expr_year_interval =
+      TreeExprBuilder::MakeExpression("castINT", {field2}, res_int32);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_float4, cast_expr_float8, cast_expr_day_interval,
+  //  cast_expr_year_interval}
+  auto status = Projector::Make(
+      schema, {cast_expr_float4, cast_expr_float8, cast_expr_year_interval},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 =
+      MakeArrowArrayFloat32({6.6f, -6.6f, 9.999999f, 0}, {true, true, true, false});
+  auto array1 =
+      MakeArrowArrayFloat64({6.6, -6.6, 9.99999999999, 0}, {true, true, true, false});
+  auto array2 = MakeArrowArrayInt32({25, -25, -0, 0}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
+
+  auto out_float4 = MakeArrowArrayInt32({7, -7, 10, 0}, {true, true, true, false});
+  auto out_float8 = MakeArrowArrayInt32({7, -7, 10, 0}, {true, true, true, false});
+  auto out_year_interval = MakeArrowArrayInt32({2, -2, 0, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_float4, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_float8, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(out_year_interval, outputs.at(2));
+}
+
+TEST_F(TestProjector, TestCastNullableIntYearInterval) {
+  // input fields
+  auto field1 = field("f1", arrow::month_interval());
+  auto schema = arrow::schema({field1});
+
+  // output fields
+  auto res_int32 = field("res", arrow::int32());
+  auto res_int64 = field("res", arrow::int64());
+
+  // Build expression
+  auto cast_expr_int32 =
+      TreeExprBuilder::MakeExpression("castNULLABLEINT", {field1}, res_int32);
+  auto cast_expr_int64 =
+      TreeExprBuilder::MakeExpression("castNULLABLEBIGINT", {field1}, res_int64);
+
+  std::shared_ptr<Projector> projector;
+
+  //  {cast_expr_int32, cast_expr_int64, cast_expr_day_interval,
+  //  cast_expr_year_interval}
+  auto status = Projector::Make(schema, {cast_expr_int32, cast_expr_int64},
+                                TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  // Last validity is false and the cast functions throw error when input is empty. Should
+  // not be evaluated due to addition of NativeFunction::kCanReturnErrors
+  auto array0 = MakeArrowArrayInt32({12, -24, -0, 0}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  auto out_int32 = MakeArrowArrayInt32({1, -2, -0, 0}, {true, true, true, false});
+  auto out_int64 = MakeArrowArrayInt64({1, -2, -0, 0}, {true, true, true, false});
+
+  arrow::ArrayVector outputs;
+
+  // Evaluate expression
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  EXPECT_ARROW_ARRAY_EQUALS(out_int32, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(out_int64, outputs.at(1));
+}
+
 }  // namespace gandiva

From 5f0641b29f170ee4faac058f6d26d72d7747bcc3 Mon Sep 17 00:00:00 2001
From: rodrigojdebem <rodrigodebem1@gmail.com>
Date: Wed, 4 Aug 2021 16:01:35 +0530
Subject: [PATCH 698/719] ARROW-12410: [C++][Gandiva] Implement regexp_replace
 function on Gandiva

Closes #10059 from rodrigojdebem/feature/implement-regexp-replace and squashes the following commits:

baf27780b <rodrigojdebem> Add implementation for REGEXP_REPLACE

Authored-by: rodrigojdebem <rodrigodebem1@gmail.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/CMakeLists.txt                |   2 +
 cpp/src/gandiva/function_holder_registry.h    |   2 +
 cpp/src/gandiva/function_registry_string.cc   |   6 +
 cpp/src/gandiva/gdv_function_stubs.cc         |  28 ++++
 .../gandiva/precompiled/string_ops_test.cc    |  10 +-
 cpp/src/gandiva/replace_holder.cc             |  65 +++++++++
 cpp/src/gandiva/replace_holder.h              |  97 +++++++++++++
 cpp/src/gandiva/replace_holder_test.cc        | 129 ++++++++++++++++++
 .../gandiva/evaluator/ProjectorTest.java      |  60 ++++++++
 9 files changed, 394 insertions(+), 5 deletions(-)
 create mode 100644 cpp/src/gandiva/replace_holder.cc
 create mode 100644 cpp/src/gandiva/replace_holder.h
 create mode 100644 cpp/src/gandiva/replace_holder_test.cc

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 2da8bb68092..654a4a40be1 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -86,6 +86,7 @@ set(SRC_FILES
     literal_holder.cc
     projector.cc
     regex_util.cc
+    replace_holder.cc
     selection_vector.cc
     tree_expr_builder.cc
     to_date_holder.cc
@@ -230,6 +231,7 @@ add_gandiva_test(internals-test
                  to_date_holder_test.cc
                  simple_arena_test.cc
                  like_holder_test.cc
+                 replace_holder_test.cc
                  decimal_type_util_test.cc
                  random_generator_holder_test.cc
                  hash_utils_test.cc
diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h
index 225c73207fc..ced1538915d 100644
--- a/cpp/src/gandiva/function_holder_registry.h
+++ b/cpp/src/gandiva/function_holder_registry.h
@@ -28,6 +28,7 @@
 #include "gandiva/like_holder.h"
 #include "gandiva/node.h"
 #include "gandiva/random_generator_holder.h"
+#include "gandiva/replace_holder.h"
 #include "gandiva/to_date_holder.h"
 
 namespace gandiva {
@@ -66,6 +67,7 @@ class FunctionHolderRegistry {
         {"to_date", LAMBDA_MAKER(ToDateHolder)},
         {"random", LAMBDA_MAKER(RandomGeneratorHolder)},
         {"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
+        {"regexp_replace", LAMBDA_MAKER(ReplaceHolder)},
     };
     return maker_map;
   }
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 270417c1bf3..efb0b5ff3a3 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -194,6 +194,12 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("rpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
                      kResultNullIfNull, "rpad_utf8_int32", NativeFunction::kNeedsContext),
 
+      NativeFunction("regexp_replace", {}, DataTypeVector{utf8(), utf8(), utf8()}, utf8(),
+                     kResultNullIfNull, "gdv_fn_regexp_replace_utf8_utf8",
+                     NativeFunction::kNeedsContext |
+                         NativeFunction::kNeedsFunctionHolder |
+                         NativeFunction::kCanReturnErrors),
+
       NativeFunction("concatOperator", {}, DataTypeVector{utf8(), utf8()}, utf8(),
                      kResultNullIfNull, "concatOperator_utf8_utf8",
                      NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index f0142741959..2cac036abd5 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -35,6 +35,7 @@
 #include "gandiva/like_holder.h"
 #include "gandiva/precompiled/types.h"
 #include "gandiva/random_generator_holder.h"
+#include "gandiva/replace_holder.h"
 #include "gandiva/to_date_holder.h"
 
 /// Stub functions that can be accessed from LLVM or the pre-compiled library.
@@ -60,6 +61,18 @@ bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
   return (*holder)(std::string(data, data_len));
 }
 
+const char* gdv_fn_regexp_replace_utf8_utf8(
+    int64_t ptr, int64_t holder_ptr, const char* data, int32_t data_len,
+    const char* /*pattern*/, int32_t /*pattern_len*/, const char* replace_string,
+    int32_t replace_string_len, int32_t* out_length) {
+  gandiva::ExecutionContext* context = reinterpret_cast<gandiva::ExecutionContext*>(ptr);
+
+  gandiva::ReplaceHolder* holder = reinterpret_cast<gandiva::ReplaceHolder*>(holder_ptr);
+
+  return (*holder)(context, data, data_len, replace_string, replace_string_len,
+                   out_length);
+}
+
 double gdv_fn_random(int64_t ptr) {
   gandiva::RandomGeneratorHolder* holder =
       reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
@@ -898,6 +911,21 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_ilike_utf8_utf8));
 
+  // gdv_fn_regexp_replace_utf8_utf8
+  args = {types->i64_type(),       // int64_t ptr
+          types->i64_type(),       // int64_t holder_ptr
+          types->i8_ptr_type(),    // const char* data
+          types->i32_type(),       // int data_len
+          types->i8_ptr_type(),    // const char* pattern
+          types->i32_type(),       // int pattern_len
+          types->i8_ptr_type(),    // const char* replace_string
+          types->i32_type(),       // int32_t replace_string_len
+          types->i32_ptr_type()};  // int32_t* out_length
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_regexp_replace_utf8_utf8", types->i8_ptr_type() /*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_regexp_replace_utf8_utf8));
+
   // gdv_fn_to_date_utf8_utf8
   args = {types->i64_type(),                   // int64_t execution_context
           types->i64_type(),                   // int64_t holder_ptr
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index a21b2671f96..b8ddc187f38 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -256,7 +256,7 @@ TEST(TestStringOps, TestCastBoolToVarchar) {
   EXPECT_EQ(std::string(out_str, out_len), "false");
   EXPECT_FALSE(ctx.has_error());
 
-  out_str = castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len);
+  castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len);
   EXPECT_THAT(ctx.get_error(),
               ::testing::HasSubstr("Output buffer length can't be negative"));
   ctx.Reset();
@@ -1441,13 +1441,13 @@ TEST(TestStringOps, TestReplace) {
   EXPECT_EQ(std::string(out_str, out_len), "TestString");
   EXPECT_FALSE(ctx.has_error());
 
-  out_str = replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5,
-                                                5, &out_len);
+  replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5, 5,
+                                      &out_len);
   EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
   ctx.Reset();
 
-  out_str = replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14,
-                                                &out_len);
+  replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14,
+                                      &out_len);
   EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
   ctx.Reset();
 }
diff --git a/cpp/src/gandiva/replace_holder.cc b/cpp/src/gandiva/replace_holder.cc
new file mode 100644
index 00000000000..8b42b585f9c
--- /dev/null
+++ b/cpp/src/gandiva/replace_holder.cc
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/replace_holder.h"
+
+#include "gandiva/node.h"
+#include "gandiva/regex_util.h"
+
+namespace gandiva {
+
+static bool IsArrowStringLiteral(arrow::Type::type type) {
+  return type == arrow::Type::STRING || type == arrow::Type::BINARY;
+}
+
+Status ReplaceHolder::Make(const FunctionNode& node,
+                           std::shared_ptr<ReplaceHolder>* holder) {
+  ARROW_RETURN_IF(node.children().size() != 3,
+                  Status::Invalid("'replace' function requires three parameters"));
+
+  auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+  ARROW_RETURN_IF(
+      literal == nullptr,
+      Status::Invalid("'replace' function requires a literal as the second parameter"));
+
+  auto literal_type = literal->return_type()->id();
+  ARROW_RETURN_IF(
+      !IsArrowStringLiteral(literal_type),
+      Status::Invalid(
+          "'replace' function requires a string literal as the second parameter"));
+
+  return Make(arrow::util::get<std::string>(literal->holder()), holder);
+}
+
+Status ReplaceHolder::Make(const std::string& sql_pattern,
+                           std::shared_ptr<ReplaceHolder>* holder) {
+  auto lholder = std::shared_ptr<ReplaceHolder>(new ReplaceHolder(sql_pattern));
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", sql_pattern, "' failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
+
+void ReplaceHolder::return_error(ExecutionContext* context, std::string& data,
+                                 std::string& replace_string) {
+  std::string err_msg = "Error replacing '" + replace_string + "' on the given string '" +
+                        data + "' for the given pattern: " + pattern_;
+  context->set_error_msg(err_msg.c_str());
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder.h b/cpp/src/gandiva/replace_holder.h
new file mode 100644
index 00000000000..79150d7aa4d
--- /dev/null
+++ b/cpp/src/gandiva/replace_holder.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <re2/re2.h>
+
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "gandiva/execution_context.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+namespace gandiva {
+
+/// Function Holder for 'replace'
+class GANDIVA_EXPORT ReplaceHolder : public FunctionHolder {
+ public:
+  ~ReplaceHolder() override = default;
+
+  static Status Make(const FunctionNode& node, std::shared_ptr<ReplaceHolder>* holder);
+
+  static Status Make(const std::string& sql_pattern,
+                     std::shared_ptr<ReplaceHolder>* holder);
+
+  /// Return a new string with the pattern that matched the regex replaced for
+  /// the replace_input parameter.
+  const char* operator()(ExecutionContext* ctx, const char* user_input,
+                         int32_t user_input_len, const char* replace_input,
+                         int32_t replace_input_len, int32_t* out_length) {
+    std::string user_input_as_str(user_input, user_input_len);
+    std::string replace_input_as_str(replace_input, replace_input_len);
+
+    int32_t total_replaces =
+        RE2::GlobalReplace(&user_input_as_str, regex_, replace_input_as_str);
+
+    if (total_replaces < 0) {
+      return_error(ctx, user_input_as_str, replace_input_as_str);
+      *out_length = 0;
+      return "";
+    }
+
+    if (total_replaces == 0) {
+      *out_length = user_input_len;
+      return user_input;
+    }
+
+    *out_length = static_cast<int32_t>(user_input_as_str.size());
+
+    // This condition treats the case where the whole string is replaced by an empty
+    // string
+    if (*out_length == 0) {
+      return "";
+    }
+
+    char* result_buffer = reinterpret_cast<char*>(ctx->arena()->Allocate(*out_length));
+
+    if (result_buffer == NULLPTR) {
+      ctx->set_error_msg("Could not allocate memory for result");
+      *out_length = 0;
+      return "";
+    }
+
+    memcpy(result_buffer, user_input_as_str.data(), *out_length);
+
+    return result_buffer;
+  }
+
+ private:
+  explicit ReplaceHolder(const std::string& pattern)
+      : pattern_(pattern), regex_(pattern) {}
+
+  void return_error(ExecutionContext* context, std::string& data,
+                    std::string& replace_string);
+
+  std::string pattern_;  // posix pattern string, to help debugging
+  RE2 regex_;            // compiled regex for the pattern
+};
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder_test.cc b/cpp/src/gandiva/replace_holder_test.cc
new file mode 100644
index 00000000000..b0830d4f004
--- /dev/null
+++ b/cpp/src/gandiva/replace_holder_test.cc
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/replace_holder.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <vector>
+
+namespace gandiva {
+
+class TestReplaceHolder : public ::testing::Test {
+ protected:
+  ExecutionContext execution_context_;
+};
+
+TEST_F(TestReplaceHolder, TestMultipleReplace) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("ana", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "banana";
+  std::string replace_string;
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_str(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "bna");
+
+  input_string = "bananaana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "bna");
+
+  input_string = "bananana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 2);
+  EXPECT_EQ(ret_as_str, "bn");
+
+  input_string = "anaana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_FALSE(execution_context_.has_error());
+  EXPECT_EQ(ret_as_str, "");
+}
+
+TEST_F(TestReplaceHolder, TestNoMatchPattern) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("ana", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "apple";
+  std::string replace_string;
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_string(ret, out_length);
+  EXPECT_EQ(out_length, 5);
+  EXPECT_EQ(ret_as_string, "apple");
+}
+
+TEST_F(TestReplaceHolder, TestReplaceSameSize) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("a", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "ananindeua";
+  std::string replace_string = "b";
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_string(ret, out_length);
+  EXPECT_EQ(out_length, 10);
+  EXPECT_EQ(ret_as_string, "bnbnindeub");
+}
+
+TEST_F(TestReplaceHolder, TestReplaceInvalidPattern) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("+", &replace_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+
+  execution_context_.Reset();
+}
+
+}  // namespace gandiva
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index c4a6cd1e135..03c9377b0e7 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -654,6 +654,66 @@ public void testRegex() throws GandivaException {
     eval.close();
   }
 
+  @Test
+  public void testRegexpReplace() throws GandivaException {
+
+    Field x = Field.nullable("x", new ArrowType.Utf8());
+    Field replaceString = Field.nullable("replaceString", new ArrowType.Utf8());
+
+    Field retType = Field.nullable("c", new ArrowType.Utf8());
+
+    TreeNode cond =
+            TreeBuilder.makeFunction(
+                    "regexp_replace",
+                    Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("ana"),
+                            TreeBuilder.makeField(replaceString)),
+                    new ArrowType.Utf8());
+    ExpressionTree expr = TreeBuilder.makeExpression(cond, retType);
+    Schema schema = new Schema(Lists.newArrayList(x, replaceString));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    int numRows = 5;
+    byte[] validity = new byte[]{(byte) 15, 0};
+    String[] valuesX = new String[]{"banana", "bananaana", "bananana", "anaana", "anaana"};
+    String[] valuesReplace = new String[]{"ue", "", "", "c", ""};
+    String[] expected = new String[]{"buena", "bna", "bn", "cc", null};
+
+    ArrowBuf validityX = buf(validity);
+    ArrowBuf validityReplace = buf(validity);
+    List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+    List<ArrowBuf> dataBufsReplace = stringBufs(valuesReplace);
+
+    ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+
+    ArrowRecordBatch batch =
+            new ArrowRecordBatch(
+                    numRows,
+                    Lists.newArrayList(fieldNode, fieldNode),
+                    Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1), validityReplace,
+                            dataBufsReplace.get(0), dataBufsReplace.get(1)));
+
+    // allocate data for output vector.
+    VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+    outVector.allocateNew(numRows * 15, numRows);
+
+    // evaluate expression
+    List<ValueVector> output = new ArrayList<>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+    eval.close();
+
+    // match expected output.
+    for (int i = 0; i < numRows - 1; i++) {
+      assertFalse("Expect none value equals null", outVector.isNull(i));
+      assertEquals(expected[i], new String(outVector.get(i)));
+    }
+
+    assertTrue("Last value must be null", outVector.isNull(numRows - 1));
+
+    releaseRecordBatch(batch);
+    releaseValueVectors(output);
+  }
+
   @Test
   public void testRand() throws GandivaException {
 

From bc175f9f4e6575bfbaaabe84aecde13244330c1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Wed, 4 Aug 2021 16:06:39 +0530
Subject: [PATCH 699/719] ARROW-12866: [C++][Gandiva] Implement STRPOS function
 on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Search for a string within another string.
Same as "LOCATE(substr, str)", except for the reverse order of the arguments STRPOS(str, substr).

Closes #10396 from jpedroantunes/feature/add-str-pos and squashes the following commits:

a407ac1aa <João Pedro> Fix linter errors
0f3a8ec84 <João Pedro> Add strpos function definition and registry

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  1 +
 cpp/src/gandiva/precompiled/string_ops.cc     |  8 ++++
 .../gandiva/precompiled/string_ops_test.cc    | 44 +++++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  3 ++
 4 files changed, 56 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index efb0b5ff3a3..089277a9e46 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -48,6 +48,7 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(is_substr, {}),
 
       BINARY_UNSAFE_NULL_IF_NULL(locate, {"position"}, utf8, int32),
+      BINARY_UNSAFE_NULL_IF_NULL(strpos, {}, utf8, int32),
 
       UNARY_OCTET_LEN_FN(octet_length, {}), UNARY_OCTET_LEN_FN(bit_length, {}),
 
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 24064fa2e06..e3cac7c26bc 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1615,6 +1615,14 @@ const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len
   return value;
 }
 
+// Search for a string within another string
+// Same as "locate(substr, str)", except for the reverse order of the arguments.
+FORCE_INLINE
+gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
+                           const char* sub_str, gdv_int32 sub_str_len) {
+  return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1);
+}
+
 // Search for a string within another string
 FORCE_INLINE
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index b8ddc187f38..ed8d7b66110 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1399,6 +1399,50 @@ TEST(TestStringOps, TestByteSubstr) {
   EXPECT_FALSE(ctx.has_error());
 }
 
+TEST(TestStringOps, TestStrPos) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+  int pos;
+
+  pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
+  EXPECT_EQ(pos, 5);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
+  EXPECT_EQ(pos, 5);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "abcabc", 6, "abc", 3);
+  EXPECT_EQ(pos, 1);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "s†å†emçåå†d", 21, "çåå", 6);
+  EXPECT_EQ(pos, 7);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "†barbar", 9, "bar", 3);
+  EXPECT_EQ(pos, 2);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "", 0, "sub", 3);
+  EXPECT_EQ(pos, 0);
+  EXPECT_FALSE(ctx.has_error());
+
+  pos = strpos_utf8_utf8(ctx_ptr, "str", 3, "", 0);
+  EXPECT_EQ(pos, 0);
+  EXPECT_FALSE(ctx.has_error());
+
+  std::string d(
+      "a\xff"
+      "c");
+  pos = strpos_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "c", 1);
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr(
+                  "unexpected byte \\ff encountered while decoding utf8 string"));
+  ctx.Reset();
+}
+
 TEST(TestStringOps, TestReplace) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 7032f459974..0d0ea8a348e 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -434,6 +434,9 @@ gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len);
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
                            const char* str, gdv_int32 str_len);
 
+gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
+                           const char* sub_str, gdv_int32 sub_str_len);
+
 gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
                                  gdv_int32 sub_str_len, const char* str,
                                  gdv_int32 str_len, gdv_int32 start_pos);

From 1f8c62ef6a7dfa460408ab14a2825ef6b359f2e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Pedro?= <joaop@simbioseventures.com>
Date: Wed, 4 Aug 2021 16:08:16 +0530
Subject: [PATCH 700/719] ARROW-13163: [C++][Gandiva] Implement REPEAT function
 on Gandiva
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 Implement REPEAT function on Gandiva which concatenate a string "n" times.
- REPEAT(str, int)

Closes #10595 from jpedroantunes/feature/add-repeat-function and squashes the following commits:

f279a7455 <João Pedro> Fix lint errors
57506c799 <João Pedro> Add implementation for repeat function

Authored-by: João Pedro <joaop@simbioseventures.com>
Signed-off-by: Praveen <praveen@dremio.com>
---
 cpp/src/gandiva/function_registry_string.cc   |  4 ++
 cpp/src/gandiva/precompiled/string_ops.cc     | 27 +++++++++++++
 .../gandiva/precompiled/string_ops_test.cc    | 27 +++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  3 ++
 cpp/src/gandiva/tests/projector_test.cc       | 38 +++++++++++++++++++
 5 files changed, 99 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 089277a9e46..3ea426c85f4 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -74,6 +74,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("unbase64", {}, DataTypeVector{utf8()}, binary(), kResultNullIfNull,
                      "gdv_fn_base64_decode_utf8", NativeFunction::kNeedsContext),
 
+      NativeFunction("repeat", {}, DataTypeVector{utf8(), int32()}, utf8(),
+                     kResultNullIfNull, "repeat_utf8_int32",
+                     NativeFunction::kNeedsContext),
+
       NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
                      "gdv_fn_upper_utf8", NativeFunction::kNeedsContext),
 
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index e3cac7c26bc..48c24b862b8 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -808,6 +808,33 @@ const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in
   return substr_utf8_int64_int64(context, input, in_len, offset64, in_len, out_len);
 }
 
+FORCE_INLINE
+const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
+                              gdv_int32 repeat_number, gdv_int32* out_len) {
+  // if the repeat number is zero, then return empty string
+  if (repeat_number == 0 || in_len <= 0) {
+    *out_len = 0;
+    return "";
+  }
+  // if the repeat number is a negative number, an error is set on context
+  if (repeat_number < 0) {
+    gdv_fn_context_set_error_msg(context, "Repeat number can't be negative");
+    *out_len = 0;
+    return "";
+  }
+  *out_len = repeat_number * in_len;
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+  for (int i = 0; i < repeat_number; ++i) {
+    memcpy(ret + (i * in_len), in, in_len);
+  }
+  return ret;
+}
+
 FORCE_INLINE
 const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len,
                              bool left_validity, const char* right, gdv_int32 right_len,
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index ed8d7b66110..6221dffb302 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -235,6 +235,33 @@ TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
   ctx.Reset();
 }
 
+TEST(TestStringOps, TestRepeat) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+
+  const char* out_str = repeat_utf8_int32(ctx_ptr, "abc", 3, 2, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "abcabc");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "a", 1, 5, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "aaaaa");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "", 0, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "", -20, 10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = repeat_utf8_int32(ctx_ptr, "a", 1, -10, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Repeat number can't be negative"));
+  ctx.Reset();
+}
+
 TEST(TestStringOps, TestCastBoolToVarchar) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 0d0ea8a348e..16d1550b46b 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -275,6 +275,9 @@ gdv_date64 last_day_from_timestamp(gdv_date64 millis);
 
 gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale);
 
+const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
+                              gdv_int32 repeat_times, gdv_int32* out_len);
+
 const char* substr_utf8_int64_int64(gdv_int64 context, const char* input,
                                     gdv_int32 in_len, gdv_int64 offset64,
                                     gdv_int64 length, gdv_int32* out_len);
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 2ce52befdc1..12020777309 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -1288,6 +1288,44 @@ TEST_F(TestProjector, TestIfElseOpt) {
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestRepeat) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_repeat = field("repeat", arrow::utf8());
+
+  // Build expression
+  auto repeat_expr =
+      TreeExprBuilder::MakeExpression("repeat", {field0, field1}, field_repeat);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {repeat_expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array0 =
+      MakeArrowArrayUtf8({"ab", "a", "car", "valid", ""}, {true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({2, 1, 3, 2, 10}, {true, true, true, true, true});
+  // expected output
+  auto exp_repeat = MakeArrowArrayUtf8({"abab", "a", "carcarcar", "validvalid", ""},
+                                       {true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_repeat, outputs.at(0));
+}
+
 TEST_F(TestProjector, TestLpad) {
   // schema for input fields
   auto field0 = field("f0", arrow::utf8());

From e4ba2f28f79fd5bcc4bf466c4b0ee75a0bf2c375 Mon Sep 17 00:00:00 2001
From: christian <ccce91@gmail.com>
Date: Wed, 4 Aug 2021 14:02:34 +0200
Subject: [PATCH 701/719] ARROW-12946: [C++] String swap case kernel

This PR adds `swapcase` compute kernel for string.  It is similar to  `Python str.swapcase()`

Closes #10855 from Christian8491/ARROW-12946-String-swap-case-kernel

Authored-by: christian <ccce91@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    | 349 +++++++++++-------
 .../compute/kernels/scalar_string_test.cc     |  28 ++
 docs/source/cpp/compute.rst                   |   4 +
 docs/source/python/api/compute.rst            |   2 +
 4 files changed, 240 insertions(+), 143 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 6ef08a7d2bb..5359567fc12 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -81,6 +81,51 @@ static inline uint8_t ascii_toupper(uint8_t utf8_code_unit) {
                                                               : utf8_code_unit;
 }
 
+static inline bool IsLowerCaseCharacterAscii(uint8_t ascii_character) {
+  return (ascii_character >= 'a') && (ascii_character <= 'z');
+}
+
+static inline bool IsUpperCaseCharacterAscii(uint8_t ascii_character) {
+  return (ascii_character >= 'A') && (ascii_character <= 'Z');
+}
+
+static inline bool IsCasedCharacterAscii(uint8_t ascii_character) {
+  return IsLowerCaseCharacterAscii(ascii_character) ||
+         IsUpperCaseCharacterAscii(ascii_character);
+}
+
+static inline bool IsAlphaCharacterAscii(uint8_t ascii_character) {
+  return IsCasedCharacterAscii(ascii_character);  // same
+}
+
+static inline bool IsAlphaNumericCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= '0') && (ascii_character <= '9')) ||
+         ((ascii_character >= 'a') && (ascii_character <= 'z')) ||
+         ((ascii_character >= 'A') && (ascii_character <= 'Z'));
+}
+
+static inline bool IsDecimalCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= '0') && (ascii_character <= '9'));
+}
+
+static inline bool IsSpaceCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= 0x09) && (ascii_character <= 0x0D)) ||
+         (ascii_character == ' ');
+}
+
+static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= ' ') && (ascii_character <= '~'));
+}
+
+static inline uint8_t ascii_swapcase(uint8_t utf8_code_unit) {
+  if (IsLowerCaseCharacterAscii(utf8_code_unit)) {
+    utf8_code_unit -= 32;
+  } else if (IsUpperCaseCharacterAscii(utf8_code_unit)) {
+    utf8_code_unit += 32;
+  }
+  return utf8_code_unit;
+}
+
 template <typename T>
 static inline bool IsAsciiCharacter(T character) {
   return character < 128;
@@ -109,17 +154,130 @@ constexpr uint32_t kMaxCodepointLookup =
     0xffff;  // up to this codepoint is in a lookup table
 std::vector<uint32_t> lut_upper_codepoint;
 std::vector<uint32_t> lut_lower_codepoint;
+std::vector<uint32_t> lut_swapcase_codepoint;
 std::vector<utf8proc_category_t> lut_category;
 std::once_flag flag_case_luts;
 
+// IsAlpha/Digit etc
+
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask) {
+  utf8proc_category_t general_category = codepoint <= kMaxCodepointLookup
+                                             ? lut_category[codepoint]
+                                             : utf8proc_category(codepoint);
+  uint32_t general_category_bit = 1 << general_category;
+  // for e.g. undefined (but valid) codepoints, general_category == 0 ==
+  // UTF8PROC_CATEGORY_CN
+  return (general_category != UTF8PROC_CATEGORY_CN) &&
+         ((general_category_bit & mask) != 0);
+}
+
+template <typename... Categories>
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask,
+                                                utf8proc_category_t category,
+                                                Categories... categories) {
+  return HasAnyUnicodeGeneralCategory(codepoint, mask | (1 << category), categories...);
+}
+
+template <typename... Categories>
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint,
+                                                utf8proc_category_t category,
+                                                Categories... categories) {
+  return HasAnyUnicodeGeneralCategory(codepoint, static_cast<uint32_t>(1u << category),
+                                      categories...);
+}
+
+static inline bool IsCasedCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
+                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT) ||
+         ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) ||
+          (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint));
+}
+
+static inline bool IsLowerCaseCharacterUnicode(uint32_t codepoint) {
+  // although this trick seems to work for upper case, this is not enough for lower case
+  // testing, see https://github.com/JuliaStrings/utf8proc/issues/195 . But currently the
+  // best we can do
+  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LL) ||
+          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) &&
+           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) == codepoint))) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
+}
+
+static inline bool IsUpperCaseCharacterUnicode(uint32_t codepoint) {
+  // this seems to be a good workaround for utf8proc not having case information
+  // https://github.com/JuliaStrings/utf8proc/issues/195
+  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU) ||
+          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) == codepoint) &&
+           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint))) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
+}
+
+static inline bool IsAlphaNumericCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(
+      codepoint, UTF8PROC_CATEGORY_LU, UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
+      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO, UTF8PROC_CATEGORY_ND,
+      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
+}
+
+static inline bool IsAlphaCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
+                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
+                                      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO);
+}
+
+static inline bool IsDecimalCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+}
+
+static inline bool IsDigitCharacterUnicode(uint32_t codepoint) {
+  // Python defines this as Numeric_Type=Digit or Numeric_Type=Decimal.
+  // utf8proc has no support for this, this is the best we can do:
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+}
+
+static inline bool IsNumericCharacterUnicode(uint32_t codepoint) {
+  // Formally this is not correct, but utf8proc does not allow us to query for Numerical
+  // properties, e.g. Numeric_Value and Numeric_Type
+  // Python defines Numeric as Numeric_Type=Digit, Numeric_Type=Decimal or
+  // Numeric_Type=Numeric.
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND,
+                                      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
+}
+
+static inline bool IsSpaceCharacterUnicode(uint32_t codepoint) {
+  auto property = utf8proc_get_property(codepoint);
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ZS) ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_WS ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_B ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_S;
+}
+
+static inline bool IsPrintableCharacterUnicode(uint32_t codepoint) {
+  uint32_t general_category = utf8proc_category(codepoint);
+  return (general_category != UTF8PROC_CATEGORY_CN) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_CC,
+                                       UTF8PROC_CATEGORY_CF, UTF8PROC_CATEGORY_CS,
+                                       UTF8PROC_CATEGORY_CO, UTF8PROC_CATEGORY_ZS,
+                                       UTF8PROC_CATEGORY_ZL, UTF8PROC_CATEGORY_ZP);
+}
+
 void EnsureLookupTablesFilled() {
   std::call_once(flag_case_luts, []() {
     lut_upper_codepoint.reserve(kMaxCodepointLookup + 1);
     lut_lower_codepoint.reserve(kMaxCodepointLookup + 1);
+    lut_swapcase_codepoint.reserve(kMaxCodepointLookup + 1);
     for (uint32_t i = 0; i <= kMaxCodepointLookup; i++) {
       lut_upper_codepoint.push_back(utf8proc_toupper(i));
       lut_lower_codepoint.push_back(utf8proc_tolower(i));
       lut_category.push_back(utf8proc_category(i));
+
+      if (IsLowerCaseCharacterUnicode(i)) {
+        lut_swapcase_codepoint.push_back(utf8proc_toupper(i));
+      } else if (IsUpperCaseCharacterUnicode(i)) {
+        lut_swapcase_codepoint.push_back(utf8proc_tolower(i));
+      } else {
+        lut_swapcase_codepoint.push_back(i);
+      }
     }
   });
 }
@@ -318,6 +476,26 @@ struct UTF8LowerTransform : public CaseMappingTransform {
 template <typename Type>
 using UTF8Lower = StringTransformExec<Type, StringTransformCodepoint<UTF8LowerTransform>>;
 
+struct UTF8SwapCaseTransform : public CaseMappingTransform {
+  static uint32_t TransformCodepoint(uint32_t codepoint) {
+    if (codepoint <= kMaxCodepointLookup) {
+      return lut_swapcase_codepoint[codepoint];
+    } else {
+      if (IsLowerCaseCharacterUnicode(codepoint)) {
+        return utf8proc_toupper(codepoint);
+      } else if (IsUpperCaseCharacterUnicode(codepoint)) {
+        return utf8proc_tolower(codepoint);
+      }
+    }
+
+    return codepoint;
+  }
+};
+
+template <typename Type>
+using UTF8SwapCase =
+    StringTransformExec<Type, StringTransformCodepoint<UTF8SwapCaseTransform>>;
+
 #endif  // ARROW_WITH_UTF8PROC
 
 struct AsciiReverseTransform : public StringTransformBase {
@@ -443,6 +621,17 @@ struct AsciiLower {
   }
 };
 
+void TransformAsciiSwapCase(const uint8_t* input, int64_t length, uint8_t* output) {
+  std::transform(input, input + length, output, ascii_swapcase);
+}
+
+template <typename Type>
+struct AsciiSwapCase {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return StringDataTransform<Type>(ctx, batch, TransformAsciiSwapCase, out);
+  }
+};
+
 // ----------------------------------------------------------------------
 // exact pattern detection
 
@@ -1351,149 +1540,6 @@ void AddSlice(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-// IsAlpha/Digit etc
-
-#ifdef ARROW_WITH_UTF8PROC
-
-static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask) {
-  utf8proc_category_t general_category = codepoint <= kMaxCodepointLookup
-                                             ? lut_category[codepoint]
-                                             : utf8proc_category(codepoint);
-  uint32_t general_category_bit = 1 << general_category;
-  // for e.g. undefined (but valid) codepoints, general_category == 0 ==
-  // UTF8PROC_CATEGORY_CN
-  return (general_category != UTF8PROC_CATEGORY_CN) &&
-         ((general_category_bit & mask) != 0);
-}
-
-template <typename... Categories>
-static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask,
-                                                utf8proc_category_t category,
-                                                Categories... categories) {
-  return HasAnyUnicodeGeneralCategory(codepoint, mask | (1 << category), categories...);
-}
-
-template <typename... Categories>
-static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint,
-                                                utf8proc_category_t category,
-                                                Categories... categories) {
-  return HasAnyUnicodeGeneralCategory(codepoint, static_cast<uint32_t>(1u << category),
-                                      categories...);
-}
-
-static inline bool IsCasedCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
-                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT) ||
-         ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) ||
-          (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint));
-}
-
-static inline bool IsLowerCaseCharacterUnicode(uint32_t codepoint) {
-  // although this trick seems to work for upper case, this is not enough for lower case
-  // testing, see https://github.com/JuliaStrings/utf8proc/issues/195 . But currently the
-  // best we can do
-  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LL) ||
-          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) &&
-           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) == codepoint))) &&
-         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
-}
-
-static inline bool IsUpperCaseCharacterUnicode(uint32_t codepoint) {
-  // this seems to be a good workaround for utf8proc not having case information
-  // https://github.com/JuliaStrings/utf8proc/issues/195
-  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU) ||
-          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) == codepoint) &&
-           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint))) &&
-         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
-}
-
-static inline bool IsAlphaNumericCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(
-      codepoint, UTF8PROC_CATEGORY_LU, UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
-      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO, UTF8PROC_CATEGORY_ND,
-      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
-}
-
-static inline bool IsAlphaCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
-                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
-                                      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO);
-}
-
-static inline bool IsDecimalCharacterUnicode(uint32_t codepoint) {
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
-}
-
-static inline bool IsDigitCharacterUnicode(uint32_t codepoint) {
-  // Python defines this as Numeric_Type=Digit or Numeric_Type=Decimal.
-  // utf8proc has no support for this, this is the best we can do:
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
-}
-
-static inline bool IsNumericCharacterUnicode(uint32_t codepoint) {
-  // Formally this is not correct, but utf8proc does not allow us to query for Numerical
-  // properties, e.g. Numeric_Value and Numeric_Type
-  // Python defines Numeric as Numeric_Type=Digit, Numeric_Type=Decimal or
-  // Numeric_Type=Numeric.
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND,
-                                      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
-}
-
-static inline bool IsSpaceCharacterUnicode(uint32_t codepoint) {
-  auto property = utf8proc_get_property(codepoint);
-  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ZS) ||
-         property->bidi_class == UTF8PROC_BIDI_CLASS_WS ||
-         property->bidi_class == UTF8PROC_BIDI_CLASS_B ||
-         property->bidi_class == UTF8PROC_BIDI_CLASS_S;
-}
-
-static inline bool IsPrintableCharacterUnicode(uint32_t codepoint) {
-  uint32_t general_category = utf8proc_category(codepoint);
-  return (general_category != UTF8PROC_CATEGORY_CN) &&
-         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_CC,
-                                       UTF8PROC_CATEGORY_CF, UTF8PROC_CATEGORY_CS,
-                                       UTF8PROC_CATEGORY_CO, UTF8PROC_CATEGORY_ZS,
-                                       UTF8PROC_CATEGORY_ZL, UTF8PROC_CATEGORY_ZP);
-}
-
-#endif
-
-static inline bool IsLowerCaseCharacterAscii(uint8_t ascii_character) {
-  return (ascii_character >= 'a') && (ascii_character <= 'z');
-}
-
-static inline bool IsUpperCaseCharacterAscii(uint8_t ascii_character) {
-  return (ascii_character >= 'A') && (ascii_character <= 'Z');
-}
-
-static inline bool IsCasedCharacterAscii(uint8_t ascii_character) {
-  return IsLowerCaseCharacterAscii(ascii_character) ||
-         IsUpperCaseCharacterAscii(ascii_character);
-}
-
-static inline bool IsAlphaCharacterAscii(uint8_t ascii_character) {
-  return IsCasedCharacterAscii(ascii_character);  // same
-}
-
-static inline bool IsAlphaNumericCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= '0') && (ascii_character <= '9')) ||
-         ((ascii_character >= 'a') && (ascii_character <= 'z')) ||
-         ((ascii_character >= 'A') && (ascii_character <= 'Z'));
-}
-
-static inline bool IsDecimalCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= '0') && (ascii_character <= '9'));
-}
-
-static inline bool IsSpaceCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= 0x09) && (ascii_character <= 0x0D)) ||
-         (ascii_character == ' ');
-}
-
-static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
-  return ((ascii_character >= ' ') && (ascii_character <= '~'));
-}
-
 template <typename Derived, bool allow_empty = false>
 struct CharacterPredicateUnicode {
   static bool Call(KernelContext*, const uint8_t* input, size_t input_string_ncodeunits,
@@ -4020,6 +4066,14 @@ const FunctionDoc ascii_lower_doc(
      "non-ASCII characters, use \"utf8_lower\" instead."),
     {"strings"});
 
+const FunctionDoc ascii_swapcase_doc(
+    "Transform ASCII input lowercase characters to uppercase and uppercase characters to "
+    "lowercase",
+    ("For each string in `strings`, return a string with opposite casing.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_swapcase\" instead."),
+    {"strings"});
+
 const FunctionDoc utf8_upper_doc(
     "Transform input to uppercase",
     ("For each string in `strings`, return an uppercase version."), {"strings"});
@@ -4028,6 +4082,11 @@ const FunctionDoc utf8_lower_doc(
     "Transform input to lowercase",
     ("For each string in `strings`, return a lowercase version."), {"strings"});
 
+const FunctionDoc utf8_swapcase_doc(
+    "Transform input lowercase characters to uppercase and uppercase characters to "
+    "lowercase",
+    ("For each string in `strings`, return an opposite case version."), {"strings"});
+
 const FunctionDoc ascii_reverse_doc(
     "Reverse ASCII input",
     ("For each ASCII string in `strings`, return a reversed version.\n\n"
@@ -4052,6 +4111,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
                                          MemAllocation::NO_PREALLOCATE);
   MakeUnaryStringBatchKernel<AsciiLower>("ascii_lower", registry, &ascii_lower_doc,
                                          MemAllocation::NO_PREALLOCATE);
+  MakeUnaryStringBatchKernel<AsciiSwapCase>(
+      "ascii_swapcase", registry, &ascii_swapcase_doc, MemAllocation::NO_PREALLOCATE);
   MakeUnaryStringBatchKernel<AsciiTrimWhitespace>("ascii_trim_whitespace", registry,
                                                   &ascii_trim_whitespace_doc);
   MakeUnaryStringBatchKernel<AsciiLTrimWhitespace>("ascii_ltrim_whitespace", registry,
@@ -4095,6 +4156,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
 #ifdef ARROW_WITH_UTF8PROC
   MakeUnaryStringUTF8TransformKernel<UTF8Upper>("utf8_upper", registry, &utf8_upper_doc);
   MakeUnaryStringUTF8TransformKernel<UTF8Lower>("utf8_lower", registry, &utf8_lower_doc);
+  MakeUnaryStringUTF8TransformKernel<UTF8SwapCase>("utf8_swapcase", registry,
+                                                   &utf8_swapcase_doc);
   MakeUnaryStringBatchKernel<UTF8TrimWhitespace>("utf8_trim_whitespace", registry,
                                                  &utf8_trim_whitespace_doc);
   MakeUnaryStringBatchKernel<UTF8LTrimWhitespace>("utf8_ltrim_whitespace", registry,
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 785c82ca044..3aa6f5368d2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -395,6 +395,14 @@ TYPED_TEST(TestStringKernels, AsciiLower) {
                    "[\"aaazzæÆ&\", null, \"\", \"bbb\"]");
 }
 
+TYPED_TEST(TestStringKernels, AsciiSwapCase) {
+  this->CheckUnary("ascii_swapcase", "[]", this->type(), "[]");
+  this->CheckUnary("ascii_swapcase", "[\"aAazZæÆ&\", null, \"\", \"BbB\"]", this->type(),
+                   "[\"AaAZzæÆ&\", null, \"\", \"bBb\"]");
+  this->CheckUnary("ascii_swapcase", "[\"hEllO, WoRld!\", \"$. A35?\"]", this->type(),
+                   "[\"HeLLo, wOrLD!\", \"$. a35?\"]");
+}
+
 TYPED_TEST(TestStringKernels, AsciiReverse) {
   this->CheckUnary("ascii_reverse", "[]", this->type(), "[]");
   this->CheckUnary("ascii_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
@@ -493,6 +501,26 @@ TYPED_TEST(TestStringKernels, Utf8Lower) {
                                   CallFunction("utf8_lower", {invalid_input}));
 }
 
+TYPED_TEST(TestStringKernels, Utf8SwapCase) {
+  this->CheckUnary("utf8_swapcase", "[\"aAazZæÆ&\", null, \"\", \"b\"]", this->type(),
+                   "[\"AaAZzÆæ&\", null, \"\", \"B\"]");
+
+  // test varying encoding lengths and thus changing indices/offsets
+  this->CheckUnary("utf8_swapcase", "[\"ⱭɽⱤoW\", null, \"ıI\", \"B\"]", this->type(),
+                   "[\"ɑⱤɽOw\", null, \"Ii\", \"b\"]");
+
+  // test maximum buffer growth
+  this->CheckUnary("utf8_swapcase", "[\"ȺȺȺȺ\"]", this->type(), "[\"ⱥⱥⱥⱥ\"]");
+
+  this->CheckUnary("ascii_swapcase", "[\"hEllO, WoRld!\", \"$. A35?\"]", this->type(),
+                   "[\"HeLLo, wOrLD!\", \"$. a35?\"]");
+
+  // Test invalid data
+  auto invalid_input = ArrayFromJSON(this->type(), "[\"Ⱥa\xFFⱭ\", \"Ɽ\xe1\xbdⱤaA\"]");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Invalid UTF8 sequence"),
+                                  CallFunction("utf8_swapcase", {invalid_input}));
+}
+
 TYPED_TEST(TestStringKernels, IsAlphaNumericUnicode) {
   // U+08BE (utf8: 	\xE0\xA2\xBE) is undefined, but utf8proc things it is
   // UTF8PROC_CATEGORY_LO
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index b389b43c02e..01dc1d92e17 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -591,6 +591,8 @@ String transforms
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | ascii_reverse           | Unary | String-like            | String-like            |                                   | \(2)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_swapcase          | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | ascii_upper             | Unary | String-like            | String-like            |                                   | \(1)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | binary_length           | Unary | Binary- or String-like | Int32 or Int64         |                                   | \(3)  |
@@ -609,6 +611,8 @@ String transforms
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | utf8_reverse            | Unary | String-like            | String-like            |                                   | \(9)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_swapcase           | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | utf8_upper              | Unary | String-like            | String-like            |                                   | \(8)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 2fd0bad07e7..c503cba319c 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -259,6 +259,7 @@ String Transforms
    ascii_rpad
    ascii_rtrim
    ascii_rtrim_whitespace
+   ascii_swapcase
    ascii_trim
    ascii_upper
    binary_length
@@ -276,6 +277,7 @@ String Transforms
    utf8_rpad
    utf8_rtrim
    utf8_rtrim_whitespace
+   utf8_swapcase
    utf8_trim
    utf8_upper
 

From decfe5c73d3a87592ae2e7dcaeda1aee68c37e49 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 4 Aug 2021 10:41:57 -0400
Subject: [PATCH 702/719] ARROW-13520: [C++] Implement hash_aggregate tdigest
 kernel

Closes #10860 from lidavidm/arrow-13520

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/compute/kernels/hash_aggregate.cc   | 141 ++++++++++++++++++
 .../compute/kernels/hash_aggregate_test.cc    |  51 +++++++
 2 files changed, 192 insertions(+)

diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 472ae956388..ba5c90f15de 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -45,6 +45,7 @@
 #include "arrow/util/int128_internal.h"
 #include "arrow/util/make_unique.h"
 #include "arrow/util/task_group.h"
+#include "arrow/util/tdigest.h"
 #include "arrow/util/thread_pool.h"
 #include "arrow/visitor_inline.h"
 
@@ -1311,6 +1312,126 @@ struct GroupedVarStdFactory {
   InputType argument_type;
 };
 
+// ----------------------------------------------------------------------
+// TDigest implementation
+
+using arrow::internal::TDigest;
+
+template <typename Type>
+struct GroupedTDigestImpl : public GroupedAggregator {
+  using CType = typename Type::c_type;
+
+  Status Init(ExecContext* ctx, const FunctionOptions* options) override {
+    options_ = *checked_cast<const TDigestOptions*>(options);
+    ctx_ = ctx;
+    pool_ = ctx->memory_pool();
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    const int64_t added_groups = new_num_groups - tdigests_.size();
+    tdigests_.reserve(new_num_groups);
+    for (int64_t i = 0; i < added_groups; i++) {
+      tdigests_.emplace_back(options_.delta, options_.buffer_size);
+    }
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    auto g = batch[1].array()->GetValues<uint32_t>(1);
+    VisitArrayDataInline<Type>(
+        *batch[0].array(),
+        [&](typename TypeTraits<Type>::CType value) {
+          this->tdigests_[*g].NanAdd(value);
+          ++g;
+        },
+        [&] { ++g; });
+    return Status::OK();
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedTDigestImpl*>(&raw_other);
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    std::vector<TDigest> other_tdigest(1);
+    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+      other_tdigest[0] = std::move(other->tdigests_[other_g]);
+      tdigests_[*g].Merge(&other_tdigest);
+    }
+
+    return Status::OK();
+  }
+
+  Result<Datum> Finalize() override {
+    std::shared_ptr<Buffer> null_bitmap;
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<Buffer> values,
+        AllocateBuffer(tdigests_.size() * options_.q.size() * sizeof(double), pool_));
+    int64_t null_count = 0;
+    const int64_t slot_length = options_.q.size();
+
+    double* results = reinterpret_cast<double*>(values->mutable_data());
+    for (int64_t i = 0; static_cast<size_t>(i) < tdigests_.size(); ++i) {
+      if (!tdigests_[i].is_empty()) {
+        for (int64_t j = 0; j < slot_length; j++) {
+          results[i * slot_length + j] = tdigests_[i].Quantile(options_.q[j]);
+        }
+        continue;
+      }
+
+      if (!null_bitmap) {
+        ARROW_ASSIGN_OR_RAISE(null_bitmap, AllocateBitmap(tdigests_.size(), pool_));
+        BitUtil::SetBitsTo(null_bitmap->mutable_data(), 0, tdigests_.size(), true);
+      }
+      null_count++;
+      BitUtil::SetBitTo(null_bitmap->mutable_data(), i, false);
+      std::fill(&results[i * slot_length], &results[(i + 1) * slot_length], 0.0);
+    }
+
+    auto child = ArrayData::Make(float64(), tdigests_.size() * options_.q.size(),
+                                 {nullptr, std::move(values)}, /*null_count=*/0);
+    return ArrayData::Make(out_type(), tdigests_.size(), {std::move(null_bitmap)},
+                           {std::move(child)}, null_count);
+  }
+
+  std::shared_ptr<DataType> out_type() const override {
+    return fixed_size_list(float64(), static_cast<int32_t>(options_.q.size()));
+  }
+
+  TDigestOptions options_;
+  std::vector<TDigest> tdigests_;
+  ExecContext* ctx_;
+  MemoryPool* pool_;
+};
+
+struct GroupedTDigestFactory {
+  template <typename T>
+  enable_if_number<T, Status> Visit(const T&) {
+    kernel =
+        MakeKernel(std::move(argument_type), HashAggregateInit<GroupedTDigestImpl<T>>);
+    return Status::OK();
+  }
+
+  Status Visit(const HalfFloatType& type) {
+    return Status::NotImplemented("Computing t-digest of data of type ", type);
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::NotImplemented("Computing t-digest of data of type ", type);
+  }
+
+  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
+    GroupedTDigestFactory factory;
+    factory.argument_type = InputType::Array(type);
+    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
+    return std::move(factory.kernel);
+  }
+
+  HashAggregateKernel kernel;
+  InputType argument_type;
+};
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
@@ -1863,6 +1984,13 @@ const FunctionDoc hash_variance_doc{
      "to satisfy `ddof`, null is returned."),
     {"array", "group_id_array"}};
 
+const FunctionDoc hash_tdigest_doc{
+    "Calculate approximate quantiles of a numeric array with the T-Digest algorithm",
+    ("By default, the 0.5 quantile (median) is returned.\n"
+     "Nulls and NaNs are ignored.\n"
+     "A null array is returned if there are no valid data points."),
+    {"array", "group_id_array"}};
+
 const FunctionDoc hash_min_max_doc{
     "Compute the minimum and maximum values of a numeric array",
     ("Null values are ignored by default.\n"
@@ -1939,6 +2067,19 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
+  static auto default_tdigest_options = TDigestOptions::Defaults();
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_tdigest", Arity::Binary(), &hash_tdigest_doc, &default_tdigest_options);
+    DCHECK_OK(
+        AddHashAggKernels(SignedIntTypes(), GroupedTDigestFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(UnsignedIntTypes(), GroupedTDigestFactory::Make, func.get()));
+    DCHECK_OK(
+        AddHashAggKernels(FloatingPointTypes(), GroupedTDigestFactory::Make, func.get()));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
   {
     static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
     auto func = std::make_shared<HashAggregateFunction>(
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index f4df6aa18a3..d37d8f32ac8 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -814,6 +814,57 @@ TEST(GroupBy, VarianceAndStddev) {
                           /*verbose=*/true);
 }
 
+TEST(GroupBy, TDigest) {
+  auto batch = RecordBatchFromJSON(
+      schema({field("argument", float64()), field("key", int64())}), R"([
+    [1,   1],
+    [null,  1],
+    [0,   2],
+    [null,  3],
+    [4,   null],
+    [3,  1],
+    [0, 2],
+    [-1, 2],
+    [1,  null],
+    [NaN,  3]
+  ])");
+
+  TDigestOptions options1(std::vector<double>{0.5, 0.9, 0.99});
+  TDigestOptions options2(std::vector<double>{0.5, 0.9, 0.99}, /*delta=*/50,
+                          /*buffer_size=*/1024);
+  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                       internal::GroupBy(
+                           {
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                               batch->GetColumnByName("argument"),
+                           },
+                           {
+                               batch->GetColumnByName("key"),
+                           },
+                           {
+                               {"hash_tdigest", nullptr},
+                               {"hash_tdigest", &options1},
+                               {"hash_tdigest", &options2},
+                           }));
+
+  AssertDatumsApproxEqual(
+      ArrayFromJSON(struct_({
+                        field("hash_tdigest", fixed_size_list(float64(), 1)),
+                        field("hash_tdigest", fixed_size_list(float64(), 3)),
+                        field("hash_tdigest", fixed_size_list(float64(), 3)),
+                        field("key_0", int64()),
+                    }),
+                    R"([
+    [[1.0], [1.0, 3.0, 3.0], [1.0, 3.0, 3.0], 1],
+    [[0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], 2],
+    [null,  null,            null,            3],
+    [[1.0], [1.0, 4.0, 4.0], [1.0, 4.0, 4.0], null]
+  ])"),
+      aggregated_and_grouped,
+      /*verbose=*/true);
+}
+
 TEST(GroupBy, MinMaxOnly) {
   for (bool use_exec_plan : {false, true}) {
     for (bool use_threads : {true, false}) {

From b04d59ee2bff6b8fb845195fc4ba9222de9d0534 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 4 Aug 2021 17:02:12 +0200
Subject: [PATCH 703/719] ARROW-9719: [Python] Improve HadoopFileSystem
 docstring

Document missing parameters.

Closes #10867 from pitrou/ARROW-9719-hdfs-docstring

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_hdfs.pyx | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/pyarrow/_hdfs.pyx b/python/pyarrow/_hdfs.pyx
index 5ede8f5159d..6d1a85e18e5 100644
--- a/python/pyarrow/_hdfs.pyx
+++ b/python/pyarrow/_hdfs.pyx
@@ -37,6 +37,8 @@ cdef class HadoopFileSystem(FileSystem):
         HDFS host to connect to.
     port : int, default 8020
         HDFS port to connect to.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
     replication : int, default 3
         Number of copies each block will have.
     buffer_size : int, default 0
@@ -47,6 +49,9 @@ cdef class HadoopFileSystem(FileSystem):
         128 MB.
     kerb_ticket : string or path, default None
         If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
     """
 
     cdef:

From 99c9231ffdf74dade81216a7f9e76e985b9f89e8 Mon Sep 17 00:00:00 2001
From: Eduardo Ponce <edponce00@gmail.com>
Date: Wed, 4 Aug 2021 17:05:23 +0200
Subject: [PATCH 704/719] ARROW-12944: [C++] String capitalize kernel

This PR adds scalar compute functions for string capitalization, namely "ascii_capitalize" and "utf8_capitalize".

Closes #10857 from edponce/ARROW-12944-String-capitalize-kernel

Authored-by: Eduardo Ponce <edponce00@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/scalar_string.cc    | 82 ++++++++++++++++---
 .../compute/kernels/scalar_string_test.cc     | 22 ++++-
 docs/source/cpp/compute.rst                   |  4 +
 docs/source/python/api/compute.rst            |  2 +
 4 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index 5359567fc12..8d815274479 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -446,10 +446,10 @@ struct StringTransformCodepoint : public StringTransformBase {
 // struct CaseMappingMixin {
 struct CaseMappingTransform {
   static int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) {
-    // Section 5.18 of the Unicode spec claim that the number of codepoints for case
+    // Section 5.18 of the Unicode spec claims that the number of codepoints for case
     // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes
     // However, since we don't support all casings (SpecialCasing.txt) the growth
-    // in bytes iss actually only at max 3/2 (as covered by the unittest).
+    // in bytes is actually only at max 3/2 (as covered by the unittest).
     // Note that rounding down the 3/2 is ok, since only codepoints encoded by
     // two code units (even) can grow to 3 code units.
     return static_cast<int64_t>(input_ncodeunits) * 3 / 2;
@@ -496,6 +496,37 @@ template <typename Type>
 using UTF8SwapCase =
     StringTransformExec<Type, StringTransformCodepoint<UTF8SwapCaseTransform>>;
 
+struct Utf8CapitalizeTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    uint8_t* output_start = output;
+    if (input_string_ncodeunits > 0) {
+      // Get number of code units in first code point
+      uint32_t codepoint = 0;
+      const uint8_t* i = input;
+      if (ARROW_PREDICT_FALSE(!util::UTF8Decode(&i, &codepoint))) {
+        return kTransformError;
+      }
+      int64_t codepoint_ncodeunits =
+          std::min(static_cast<int64_t>(i - input), input_string_ncodeunits);
+      if (ARROW_PREDICT_FALSE(
+              !util::UTF8Transform(input, input + codepoint_ncodeunits, &output,
+                                   UTF8UpperTransform::TransformCodepoint))) {
+        return kTransformError;
+      }
+      if (ARROW_PREDICT_FALSE(!util::UTF8Transform(
+              input + codepoint_ncodeunits, input + input_string_ncodeunits, &output,
+              UTF8LowerTransform::TransformCodepoint))) {
+        return kTransformError;
+      }
+    }
+    return output - output_start;
+  }
+};
+
+template <typename Type>
+using Utf8Capitalize = StringTransformExec<Type, Utf8CapitalizeTransform>;
+
 #endif  // ARROW_WITH_UTF8PROC
 
 struct AsciiReverseTransform : public StringTransformBase {
@@ -632,6 +663,20 @@ struct AsciiSwapCase {
   }
 };
 
+struct AsciiCapitalizeTransform : public StringTransformBase {
+  int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits,
+                    uint8_t* output) {
+    if (input_string_ncodeunits > 0) {
+      *output = ascii_toupper(*input);
+      TransformAsciiLower(input + 1, input_string_ncodeunits - 1, output + 1);
+    }
+    return input_string_ncodeunits;
+  }
+};
+
+template <typename Type>
+using AsciiCapitalize = StringTransformExec<Type, AsciiCapitalizeTransform>;
+
 // ----------------------------------------------------------------------
 // exact pattern detection
 
@@ -4074,6 +4119,20 @@ const FunctionDoc ascii_swapcase_doc(
      "non-ASCII characters, use \"utf8_swapcase\" instead."),
     {"strings"});
 
+const FunctionDoc ascii_capitalize_doc(
+    "Capitalize the first character of ASCII input",
+    ("For each string in `strings`, return a capitalized version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_capitalize\" instead."),
+    {"strings"});
+
+const FunctionDoc ascii_reverse_doc(
+    "Reverse ASCII input",
+    ("For each ASCII string in `strings`, return a reversed version.\n\n"
+     "This function assumes the input is fully ASCII.  If it may contain\n"
+     "non-ASCII characters, use \"utf8_reverse\" instead."),
+    {"strings"});
+
 const FunctionDoc utf8_upper_doc(
     "Transform input to uppercase",
     ("For each string in `strings`, return an uppercase version."), {"strings"});
@@ -4087,17 +4146,16 @@ const FunctionDoc utf8_swapcase_doc(
     "lowercase",
     ("For each string in `strings`, return an opposite case version."), {"strings"});
 
-const FunctionDoc ascii_reverse_doc(
-    "Reverse ASCII input",
-    ("For each ASCII string in `strings`, return a reversed version.\n\n"
-     "This function assumes the input is fully ASCII.  If it may contain\n"
-     "non-ASCII characters, use \"utf8_reverse\" instead."),
+const FunctionDoc utf8_capitalize_doc(
+    "Capitalize the first character of input",
+    ("For each string in `strings`, return a capitalized version,\n"
+     "with the first character uppercased and the others lowercased."),
     {"strings"});
 
 const FunctionDoc utf8_reverse_doc(
-    "Reverse utf8 input",
-    ("For each utf8 string in `strings`, return a reversed version.\n\n"
-     "This function operates on codepoints/UTF-8 code units, not grapheme\n"
+    "Reverse input",
+    ("For each string in `strings`, return a reversed version.\n\n"
+     "This function operates on Unicode codepoints, not grapheme\n"
      "clusters. Hence, it will not correctly reverse grapheme clusters\n"
      "composed of multiple codepoints."),
     {"strings"});
@@ -4113,6 +4171,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
                                          MemAllocation::NO_PREALLOCATE);
   MakeUnaryStringBatchKernel<AsciiSwapCase>(
       "ascii_swapcase", registry, &ascii_swapcase_doc, MemAllocation::NO_PREALLOCATE);
+  MakeUnaryStringBatchKernel<AsciiCapitalize>("ascii_capitalize", registry,
+                                              &ascii_capitalize_doc);
   MakeUnaryStringBatchKernel<AsciiTrimWhitespace>("ascii_trim_whitespace", registry,
                                                   &ascii_trim_whitespace_doc);
   MakeUnaryStringBatchKernel<AsciiLTrimWhitespace>("ascii_ltrim_whitespace", registry,
@@ -4158,6 +4218,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) {
   MakeUnaryStringUTF8TransformKernel<UTF8Lower>("utf8_lower", registry, &utf8_lower_doc);
   MakeUnaryStringUTF8TransformKernel<UTF8SwapCase>("utf8_swapcase", registry,
                                                    &utf8_swapcase_doc);
+  MakeUnaryStringBatchKernel<Utf8Capitalize>("utf8_capitalize", registry,
+                                             &utf8_capitalize_doc);
   MakeUnaryStringBatchKernel<UTF8TrimWhitespace>("utf8_trim_whitespace", registry,
                                                  &utf8_trim_whitespace_doc);
   MakeUnaryStringBatchKernel<UTF8LTrimWhitespace>("utf8_ltrim_whitespace", registry,
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 3aa6f5368d2..920197ca3c3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -403,6 +403,16 @@ TYPED_TEST(TestStringKernels, AsciiSwapCase) {
                    "[\"HeLLo, wOrLD!\", \"$. a35?\"]");
 }
 
+TYPED_TEST(TestStringKernels, AsciiCapitalize) {
+  this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]");
+  this->CheckUnary("ascii_capitalize",
+                   "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"hEllO, WoRld!\", \"$. A3\", "
+                   "\"!hELlo, wORLd!\"]",
+                   this->type(),
+                   "[\"AaazzæÆ&\", null, \"\", \"Bbb\", \"Hello, world!\", \"$. a3\", "
+                   "\"!hello, world!\"]");
+}
+
 TYPED_TEST(TestStringKernels, AsciiReverse) {
   this->CheckUnary("ascii_reverse", "[]", this->type(), "[]");
   this->CheckUnary("ascii_reverse", R"(["abcd", null, "", "bbb"])", this->type(),
@@ -462,7 +472,7 @@ TYPED_TEST(TestStringKernels, Utf8Upper) {
   this->CheckUnary("utf8_upper", "[\"aAazZæÆ&\", null, \"\", \"b\"]", this->type(),
                    "[\"AAAZZÆÆ&\", null, \"\", \"B\"]");
 
-  // test varying encoding lenghts and thus changing indices/offsets
+  // test varying encoding lengths and thus changing indices/offsets
   this->CheckUnary("utf8_upper", "[\"ɑɽⱤoW\", null, \"ıI\", \"b\"]", this->type(),
                    "[\"ⱭⱤⱤOW\", null, \"II\", \"B\"]");
 
@@ -521,6 +531,16 @@ TYPED_TEST(TestStringKernels, Utf8SwapCase) {
                                   CallFunction("utf8_swapcase", {invalid_input}));
 }
 
+TYPED_TEST(TestStringKernels, Utf8Capitalize) {
+  this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]");
+  this->CheckUnary("utf8_capitalize",
+                   "[\"aAazZæÆ&\", null, \"\", \"b\", \"ɑɽⱤoW\", \"ıI\", \"ⱥⱥⱥȺ\", "
+                   "\"hEllO, WoRld!\", \"$. A3\", \"!ɑⱤⱤow\"]",
+                   this->type(),
+                   "[\"Aaazzææ&\", null, \"\", \"B\", \"Ɑɽɽow\", \"Ii\", \"Ⱥⱥⱥⱥ\", "
+                   "\"Hello, world!\", \"$. a3\", \"!ɑɽɽow\"]");
+}
+
 TYPED_TEST(TestStringKernels, IsAlphaNumericUnicode) {
   // U+08BE (utf8: 	\xE0\xA2\xBE) is undefined, but utf8proc things it is
   // UTF8PROC_CATEGORY_LO
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 01dc1d92e17..b12d0f2efde 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -587,6 +587,8 @@ String transforms
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | Function name           | Arity | Input types            | Output type            | Options class                     | Notes |
 +=========================+=======+========================+========================+===================================+=======+
+| ascii_capitalize        | Unary | String-like            | String-like            |                                   |       |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | ascii_lower             | Unary | String-like            | String-like            |                                   | \(1)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | ascii_reverse           | Unary | String-like            | String-like            |                                   | \(2)  |
@@ -603,6 +605,8 @@ String transforms
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | replace_substring_regex | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(6)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_capitalize         | Unary | String-like            | String-like            |                                   |       |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | utf8_length             | Unary | String-like            | Int32 or Int64         |                                   | \(7)  |
 +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
 | utf8_lower              | Unary | String-like            | String-like            |                                   | \(8)  |
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index c503cba319c..b3ab086899a 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -250,6 +250,7 @@ String Transforms
 .. autosummary::
    :toctree: ../generated/
 
+   ascii_capitalize
    ascii_center
    ascii_lpad
    ascii_ltrim
@@ -266,6 +267,7 @@ String Transforms
    binary_replace_slice
    replace_substring
    replace_substring_regex
+   utf8_capitalize
    utf8_center
    utf8_length
    utf8_lower

From af7588344c3fe59a69e02b6f5203e7b7ae06addf Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Wed, 4 Aug 2021 19:03:43 +0200
Subject: [PATCH 705/719] ARROW-13220: [C++] Implement 'choose' function

Also makes two changes:
- MakeArrayFromScalar now works with Decimal256 and FixedSizeBinaryType
- VarArgs(min_args=N) for N>1 now works (before different assertions/runtime checks would trigger). Though, might it make sense to update KernelSignature to reflect min_args?

Closes #10642 from lidavidm/arrow-13220

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/compute/kernels/codegen_internal.cc |   9 +
 .../arrow/compute/kernels/codegen_internal.h  |   3 +
 .../arrow/compute/kernels/scalar_if_else.cc   | 278 +++++++++++++++++-
 .../kernels/scalar_if_else_benchmark.cc       |  76 +++--
 .../compute/kernels/scalar_if_else_test.cc    | 200 +++++++++++++
 .../arrow/compute/kernels/vector_replace.cc   |   5 +-
 docs/source/cpp/compute.rst                   |  50 ++--
 docs/source/python/api/compute.rst            |   1 +
 8 files changed, 574 insertions(+), 48 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index bab8e7000cd..f8b90085010 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -47,6 +47,7 @@ std::vector<std::shared_ptr<DataType>> g_floating_types;
 std::vector<std::shared_ptr<DataType>> g_numeric_types;
 std::vector<std::shared_ptr<DataType>> g_base_binary_types;
 std::vector<std::shared_ptr<DataType>> g_temporal_types;
+std::vector<std::shared_ptr<DataType>> g_interval_types;
 std::vector<std::shared_ptr<DataType>> g_primitive_types;
 std::vector<Type::type> g_decimal_type_ids;
 static std::once_flag codegen_static_initialized;
@@ -91,6 +92,9 @@ static void InitStaticData() {
                       timestamp(TimeUnit::MICRO),
                       timestamp(TimeUnit::NANO)};
 
+  // Interval types
+  g_interval_types = {day_time_interval(), month_interval()};
+
   // Base binary types (without FixedSizeBinary)
   g_base_binary_types = {binary(), utf8(), large_binary(), large_utf8()};
 
@@ -157,6 +161,11 @@ const std::vector<std::shared_ptr<DataType>>& TemporalTypes() {
   return g_temporal_types;
 }
 
+const std::vector<std::shared_ptr<DataType>>& IntervalTypes() {
+  std::call_once(codegen_static_initialized, InitStaticData);
+  return g_interval_types;
+}
+
 const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes() {
   std::call_once(codegen_static_initialized, InitStaticData);
   return g_primitive_types;
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index f432c93daac..9c8b2cef198 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -442,6 +442,9 @@ const std::vector<std::shared_ptr<DataType>>& NumericTypes();
 // Temporal types including time and timestamps for each unit
 const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
 
+// Interval types
+const std::vector<std::shared_ptr<DataType>>& IntervalTypes();
+
 // Integer, floating point, base binary, and temporal
 const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index ff308a673a3..cb261ec59a7 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -1182,6 +1182,28 @@ void CopyOneArrayValue(const DataType& type, const uint8_t* in_valid,
                                   out_offset);
 }
 
+template <typename Type>
+void CopyOneScalarValue(const Scalar& scalar, uint8_t* out_valid, uint8_t* out_values,
+                        const int64_t out_offset) {
+  if (out_valid) {
+    BitUtil::SetBitTo(out_valid, out_offset, scalar.is_valid);
+  }
+  CopyFixedWidth<Type>::CopyScalar(scalar, /*length=*/1, out_values, out_offset);
+}
+
+template <typename Type>
+void CopyOneValue(const Datum& in_values, const int64_t in_offset, uint8_t* out_valid,
+                  uint8_t* out_values, const int64_t out_offset) {
+  if (in_values.is_array()) {
+    const ArrayData& array = *in_values.array();
+    CopyOneArrayValue<Type>(*array.type, array.GetValues<uint8_t>(0, 0),
+                            array.GetValues<uint8_t>(1, 0), array.offset + in_offset,
+                            out_valid, out_values, out_offset);
+  } else {
+    CopyOneScalarValue<Type>(*in_values.scalar(), out_valid, out_values, out_offset);
+  }
+}
+
 struct CaseWhenFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
@@ -1606,6 +1628,206 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+Status ExecScalarChoose(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const auto& index_scalar = *batch[0].scalar();
+  if (!index_scalar.is_valid) {
+    if (out->is_array()) {
+      auto source = MakeNullScalar(out->type());
+      ArrayData* output = out->mutable_array();
+      CopyValues<Type>(source, /*row=*/0, batch.length,
+                       output->GetMutableValues<uint8_t>(0, /*absolute_offset=*/0),
+                       output->GetMutableValues<uint8_t>(1, /*absolute_offset=*/0),
+                       output->offset);
+    }
+    return Status::OK();
+  }
+  auto index = UnboxScalar<Int64Type>::Unbox(index_scalar);
+  if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+    return Status::IndexError("choose: index ", index, " out of range");
+  }
+  auto source = batch.values[index + 1];
+  if (out->is_scalar()) {
+    *out = source;
+  } else {
+    ArrayData* output = out->mutable_array();
+    CopyValues<Type>(source, /*row=*/0, batch.length,
+                     output->GetMutableValues<uint8_t>(0, /*absolute_offset=*/0),
+                     output->GetMutableValues<uint8_t>(1, /*absolute_offset=*/0),
+                     output->offset);
+  }
+  return Status::OK();
+}
+
+template <typename Type>
+Status ExecArrayChoose(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ArrayData* output = out->mutable_array();
+  const int64_t out_offset = output->offset;
+  // Need a null bitmap if any input has nulls
+  uint8_t* out_valid = nullptr;
+  if (std::any_of(batch.values.begin(), batch.values.end(),
+                  [](const Datum& d) { return d.null_count() > 0; })) {
+    out_valid = output->buffers[0]->mutable_data();
+  } else {
+    BitUtil::SetBitsTo(output->buffers[0]->mutable_data(), out_offset, batch.length,
+                       true);
+  }
+  uint8_t* out_values = output->buffers[1]->mutable_data();
+  int64_t row = 0;
+  return VisitArrayValuesInline<Int64Type>(
+      *batch[0].array(),
+      [&](int64_t index) {
+        if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+          return Status::IndexError("choose: index ", index, " out of range");
+        }
+        const auto& source = batch.values[index + 1];
+        CopyOneValue<Type>(source, row, out_valid, out_values, out_offset + row);
+        row++;
+        return Status::OK();
+      },
+      [&]() {
+        // Index is null, but we should still initialize the output with some value
+        const auto& source = batch.values[1];
+        CopyOneValue<Type>(source, row, out_valid, out_values, out_offset + row);
+        BitUtil::ClearBit(out_valid, out_offset + row);
+        row++;
+        return Status::OK();
+      });
+}
+
+template <typename Type, typename Enable = void>
+struct ChooseFunctor {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_scalar()) {
+      return ExecScalarChoose<Type>(ctx, batch, out);
+    }
+    return ExecArrayChoose<Type>(ctx, batch, out);
+  }
+};
+
+template <>
+struct ChooseFunctor<NullType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    return Status::OK();
+  }
+};
+
+template <typename Type>
+struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
+  using offset_type = typename Type::offset_type;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch.values[0].is_scalar()) {
+      const auto& index_scalar = *batch[0].scalar();
+      if (!index_scalar.is_valid) {
+        if (out->is_array()) {
+          ARROW_ASSIGN_OR_RAISE(
+              auto temp_array,
+              MakeArrayOfNull(out->type(), batch.length, ctx->memory_pool()));
+          *out->mutable_array() = *temp_array->data();
+        }
+        return Status::OK();
+      }
+      auto index = UnboxScalar<Int64Type>::Unbox(index_scalar);
+      if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+        return Status::IndexError("choose: index ", index, " out of range");
+      }
+      auto source = batch.values[index + 1];
+      if (source.is_scalar() && out->is_array()) {
+        ARROW_ASSIGN_OR_RAISE(
+            auto temp_array,
+            MakeArrayFromScalar(*source.scalar(), batch.length, ctx->memory_pool()));
+        *out->mutable_array() = *temp_array->data();
+      } else {
+        *out = source;
+      }
+      return Status::OK();
+    }
+
+    // Row-wise implementation
+    BuilderType builder(out->type(), ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    int64_t reserve_data = 0;
+    for (const auto& value : batch.values) {
+      if (value.is_scalar()) {
+        if (!value.scalar()->is_valid) continue;
+        const auto row_length =
+            checked_cast<const BaseBinaryScalar&>(*value.scalar()).value->size();
+        reserve_data = std::max<int64_t>(reserve_data, batch.length * row_length);
+        continue;
+      }
+      const ArrayData& arr = *value.array();
+      const offset_type* offsets = arr.GetValues<offset_type>(1);
+      const offset_type values_length = offsets[arr.length] - offsets[0];
+      reserve_data = std::max<int64_t>(reserve_data, values_length);
+    }
+    RETURN_NOT_OK(builder.ReserveData(reserve_data));
+    int64_t row = 0;
+    RETURN_NOT_OK(VisitArrayValuesInline<Int64Type>(
+        *batch[0].array(),
+        [&](int64_t index) {
+          if (index < 0 || static_cast<size_t>(index + 1) >= batch.values.size()) {
+            return Status::IndexError("choose: index ", index, " out of range");
+          }
+          const auto& source = batch.values[index + 1];
+          return CopyValue(source, &builder, row++);
+        },
+        [&]() {
+          row++;
+          return builder.AppendNull();
+        }));
+    auto actual_type = out->type();
+    std::shared_ptr<Array> temp_output;
+    RETURN_NOT_OK(builder.Finish(&temp_output));
+    ArrayData* output = out->mutable_array();
+    *output = *temp_output->data();
+    // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
+    output->type = std::move(actual_type);
+    return Status::OK();
+  }
+
+  static Status CopyValue(const Datum& datum, BuilderType* builder, int64_t row) {
+    if (datum.is_scalar()) {
+      const auto& scalar = checked_cast<const BaseBinaryScalar&>(*datum.scalar());
+      if (!scalar.value) return builder->AppendNull();
+      return builder->Append(scalar.value->data(),
+                             static_cast<offset_type>(scalar.value->size()));
+    }
+    const ArrayData& source = *datum.array();
+    if (!source.MayHaveNulls() ||
+        BitUtil::GetBit(source.buffers[0]->data(), source.offset + row)) {
+      const uint8_t* data = source.buffers[2]->data();
+      const offset_type* offsets = source.GetValues<offset_type>(1);
+      const offset_type offset0 = offsets[row];
+      const offset_type offset1 = offsets[row + 1];
+      return builder->Append(data + offset0, offset1 - offset0);
+    }
+    return builder->AppendNull();
+  }
+};
+
+struct ChooseFunction : ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+    // The first argument is always int64 or promoted to it. The kernel is dispatched
+    // based on the type of the rest of the arguments.
+    RETURN_NOT_OK(CheckArity(*values));
+    EnsureDictionaryDecoded(values);
+    if (values->front().type->id() != Type::INT64) {
+      values->front().type = int64();
+    }
+    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
+      for (auto it = values->begin() + 1; it != values->end(); it++) {
+        it->type = type;
+      }
+    }
+    if (auto kernel = DispatchExactImpl(this, {values->back()})) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
 Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
   ValueDescr result = descrs.back();
   result.shape = GetBroadcastShape(descrs);
@@ -1652,6 +1874,26 @@ void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_f
   }
 }
 
+void AddChooseKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
+                     detail::GetTypeId get_id, ArrayKernelExec exec) {
+  ScalarKernel kernel(
+      KernelSignature::Make({Type::INT64, InputType(get_id.id)}, OutputType(LastType),
+                            /*is_varargs=*/true),
+      exec);
+  kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.can_write_into_slices = is_fixed_width(get_id.id);
+  DCHECK_OK(scalar_function->AddKernel(std::move(kernel)));
+}
+
+void AddPrimitiveChooseKernels(const std::shared_ptr<ScalarFunction>& scalar_function,
+                               const std::vector<std::shared_ptr<DataType>>& types) {
+  for (auto&& type : types) {
+    auto exec = GenerateTypeAgnosticPrimitive<ChooseFunctor>(*type);
+    AddChooseKernel(scalar_function, type, std::move(exec));
+  }
+}
+
 const FunctionDoc if_else_doc{"Choose values based on a condition",
                               ("`cond` must be a Boolean scalar/ array. \n`left` or "
                                "`right` must be of the same type scalar/ array.\n"
@@ -1679,6 +1921,15 @@ const FunctionDoc coalesce_doc{
      "for which the value is not null. If all inputs are null in a row, the output "
      "will be null."),
     {"*values"}};
+
+const FunctionDoc choose_doc{
+    "Given indices and arrays, choose the value from the corresponding array for each "
+    "index",
+    ("For each row, the value of the first argument is used as a 0-based index into the "
+     "rest of the arguments (i.e. index 0 selects the second argument). The output value "
+     "is the corresponding value of the selected argument.\n"
+     "If an index is null, the output will be null."),
+    {"indices", "*values"}};
 }  // namespace
 
 void RegisterScalarIfElse(FunctionRegistry* registry) {
@@ -1688,7 +1939,8 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
 
     AddPrimitiveIfElseKernels(func, NumericTypes());
     AddPrimitiveIfElseKernels(func, TemporalTypes());
-    AddPrimitiveIfElseKernels(func, {boolean(), day_time_interval(), month_interval()});
+    AddPrimitiveIfElseKernels(func, IntervalTypes());
+    AddPrimitiveIfElseKernels(func, {boolean()});
     AddNullIfElseKernel(func);
     AddBinaryIfElseKernels(func, BaseBinaryTypes());
     AddFSBinaryIfElseKernel(func);
@@ -1699,8 +1951,8 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
         "case_when", Arity::VarArgs(/*min_args=*/1), &case_when_doc);
     AddPrimitiveCaseWhenKernels(func, NumericTypes());
     AddPrimitiveCaseWhenKernels(func, TemporalTypes());
-    AddPrimitiveCaseWhenKernels(
-        func, {boolean(), null(), day_time_interval(), month_interval()});
+    AddPrimitiveCaseWhenKernels(func, IntervalTypes());
+    AddPrimitiveCaseWhenKernels(func, {boolean(), null()});
     AddCaseWhenKernel(func, Type::FIXED_SIZE_BINARY,
                       CaseWhenFunctor<FixedSizeBinaryType>::Exec);
     AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor<Decimal128Type>::Exec);
@@ -1712,8 +1964,8 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
         "coalesce", Arity::VarArgs(/*min_args=*/1), &coalesce_doc);
     AddPrimitiveCoalesceKernels(func, NumericTypes());
     AddPrimitiveCoalesceKernels(func, TemporalTypes());
-    AddPrimitiveCoalesceKernels(
-        func, {boolean(), null(), day_time_interval(), month_interval()});
+    AddPrimitiveCoalesceKernels(func, IntervalTypes());
+    AddPrimitiveCoalesceKernels(func, {boolean(), null()});
     AddCoalesceKernel(func, Type::FIXED_SIZE_BINARY,
                       CoalesceFunctor<FixedSizeBinaryType>::Exec);
     AddCoalesceKernel(func, Type::DECIMAL128, CoalesceFunctor<Decimal128Type>::Exec);
@@ -1723,6 +1975,22 @@ void RegisterScalarIfElse(FunctionRegistry* registry) {
     }
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+  {
+    auto func = std::make_shared<ChooseFunction>("choose", Arity::VarArgs(/*min_args=*/2),
+                                                 &choose_doc);
+    AddPrimitiveChooseKernels(func, NumericTypes());
+    AddPrimitiveChooseKernels(func, TemporalTypes());
+    AddPrimitiveChooseKernels(func, IntervalTypes());
+    AddPrimitiveChooseKernels(func, {boolean(), null()});
+    AddChooseKernel(func, Type::FIXED_SIZE_BINARY,
+                    ChooseFunctor<FixedSizeBinaryType>::Exec);
+    AddChooseKernel(func, Type::DECIMAL128, ChooseFunctor<Decimal128Type>::Exec);
+    AddChooseKernel(func, Type::DECIMAL256, ChooseFunctor<Decimal256Type>::Exec);
+    for (const auto& ty : BaseBinaryTypes()) {
+      AddChooseKernel(func, ty, GenerateTypeAgnosticVarBinaryBase<ChooseFunctor>(ty));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index a63492987eb..9b59d54c3da 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -26,7 +26,7 @@
 namespace arrow {
 namespace compute {
 
-const int64_t elems = 1024 * 1024;
+const int64_t kNumItems = 1024 * 1024;
 
 template <typename Type, typename Enable = void>
 struct SetBytesProcessed {};
@@ -282,35 +282,69 @@ static void CoalesceNonNullBench64(benchmark::State& state) {
   return CoalesceBench<Int64Type>(state);
 }
 
-BENCHMARK(IfElseBench32)->Args({elems, 0});
-BENCHMARK(IfElseBench64)->Args({elems, 0});
+template <typename Type>
+static void ChooseBench(benchmark::State& state) {
+  constexpr int kNumChoices = 5;
+  using CType = typename Type::c_type;
+  auto type = TypeTraits<Type>::type_singleton();
+
+  int64_t len = state.range(0);
+  int64_t offset = state.range(1);
+
+  random::RandomArrayGenerator rand(/*seed=*/0);
+
+  std::vector<Datum> arguments;
+  arguments.emplace_back(
+      rand.Int64(len, /*min=*/0, /*max=*/kNumChoices - 1, /*null_probability=*/0.1)
+          ->Slice(offset));
+  for (int i = 0; i < kNumChoices; i++) {
+    arguments.emplace_back(
+        rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
+  }
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(CallFunction("choose", arguments));
+  }
+
+  state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+static void ChooseBench64(benchmark::State& state) {
+  return ChooseBench<Int64Type>(state);
+}
+
+BENCHMARK(IfElseBench32)->Args({kNumItems, 0});
+BENCHMARK(IfElseBench64)->Args({kNumItems, 0});
+
+BENCHMARK(IfElseBench32)->Args({kNumItems, 99});
+BENCHMARK(IfElseBench64)->Args({kNumItems, 99});
 
-BENCHMARK(IfElseBench32)->Args({elems, 99});
-BENCHMARK(IfElseBench64)->Args({elems, 99});
+BENCHMARK(IfElseBench32Contiguous)->Args({kNumItems, 0});
+BENCHMARK(IfElseBench64Contiguous)->Args({kNumItems, 0});
 
-BENCHMARK(IfElseBench32Contiguous)->Args({elems, 0});
-BENCHMARK(IfElseBench64Contiguous)->Args({elems, 0});
+BENCHMARK(IfElseBench32Contiguous)->Args({kNumItems, 99});
+BENCHMARK(IfElseBench64Contiguous)->Args({kNumItems, 99});
 
-BENCHMARK(IfElseBench32Contiguous)->Args({elems, 99});
-BENCHMARK(IfElseBench64Contiguous)->Args({elems, 99});
+BENCHMARK(IfElseBenchString32)->Args({kNumItems, 0});
+BENCHMARK(IfElseBenchString64)->Args({kNumItems, 0});
 
-BENCHMARK(IfElseBenchString32)->Args({elems, 0});
-BENCHMARK(IfElseBenchString64)->Args({elems, 0});
+BENCHMARK(IfElseBenchString32Contiguous)->Args({kNumItems, 99});
+BENCHMARK(IfElseBenchString64Contiguous)->Args({kNumItems, 99});
 
-BENCHMARK(IfElseBenchString32Contiguous)->Args({elems, 99});
-BENCHMARK(IfElseBenchString64Contiguous)->Args({elems, 99});
+BENCHMARK(CaseWhenBench64)->Args({kNumItems, 0});
+BENCHMARK(CaseWhenBench64)->Args({kNumItems, 99});
 
-BENCHMARK(CaseWhenBench64)->Args({elems, 0});
-BENCHMARK(CaseWhenBench64)->Args({elems, 99});
+BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 0});
+BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 99});
 
-BENCHMARK(CaseWhenBench64Contiguous)->Args({elems, 0});
-BENCHMARK(CaseWhenBench64Contiguous)->Args({elems, 99});
+BENCHMARK(CoalesceBench64)->Args({kNumItems, 0});
+BENCHMARK(CoalesceBench64)->Args({kNumItems, 99});
 
-BENCHMARK(CoalesceBench64)->Args({elems, 0});
-BENCHMARK(CoalesceBench64)->Args({elems, 99});
+BENCHMARK(CoalesceNonNullBench64)->Args({kNumItems, 0});
+BENCHMARK(CoalesceNonNullBench64)->Args({kNumItems, 99});
 
-BENCHMARK(CoalesceNonNullBench64)->Args({elems, 0});
-BENCHMARK(CoalesceNonNullBench64)->Args({elems, 99});
+BENCHMARK(ChooseBench64)->Args({kNumItems, 0});
+BENCHMARK(ChooseBench64)->Args({kNumItems, 99});
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 48b0cdb457d..f06a6822a0f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -1040,5 +1040,205 @@ TEST(TestCoalesce, FixedSizeBinary) {
               ArrayFromJSON(type, R"(["abc", "abc", "abc", "abc"])"));
 }
 
+template <typename Type>
+class TestChooseNumeric : public ::testing::Test {};
+template <typename Type>
+class TestChooseBinary : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestChooseNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestChooseBinary, BinaryTypes);
+
+TYPED_TEST(TestChooseNumeric, FixedSize) {
+  auto type = default_type_instance<TypeParam>();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, "[10, 11, null, null, 14]");
+  auto values2 = ArrayFromJSON(type, "[20, 21, null, null, 24]");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, "[10, 21, null, null, null]"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, "1"), values1},
+              ArrayFromJSON(type, "[1, 11, 1, null, null]"));
+  // Mixed scalar and array (note CheckScalar checks all-scalar cases for us)
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, "42");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TYPED_TEST(TestChooseBinary, Basics) {
+  auto type = default_type_instance<TypeParam>();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, R"(["a", "bc", null, null, "def"])");
+  auto values2 = ArrayFromJSON(type, R"(["ghij", "klmno", null, null, "pqrstu"])");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, R"(["a", "klmno", null, null, null])"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, R"("foo")"), values1},
+              ArrayFromJSON(type, R"(["foo", "bc", "foo", null, null])"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, R"("abcd")");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChoose, Null) {
+  auto type = null();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto nulls = *MakeArrayOfNull(type, 5);
+  CheckScalar("choose", {indices1, nulls, nulls}, nulls);
+  CheckScalar("choose", {indices1, MakeNullScalar(type), nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), nulls, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), nulls, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), nulls, nulls}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar_null, nulls}, nulls);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), nulls, nulls}, nulls);
+}
+
+TEST(TestChoose, Boolean) {
+  auto type = boolean();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, "[true, true, null, null, true]");
+  auto values2 = ArrayFromJSON(type, "[false, false, null, null, false]");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, "[true, false, null, null, null]"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, "false"), values1},
+              ArrayFromJSON(type, "[false, true, false, null, null]"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, "true");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChoose, DayTimeInterval) {
+  auto type = day_time_interval();
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, "[[10, 1], [10, 1], null, null, [10, 1]]");
+  auto values2 = ArrayFromJSON(type, "[[2, 20], [2, 20], null, null, [2, 20]]");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, "[[10, 1], [2, 20], null, null, null]"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, "[1, 2]"), values1},
+              ArrayFromJSON(type, "[[1, 2], [10, 1], [1, 2], null, null]"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, "[10, 1]");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChoose, Decimal) {
+  for (const auto& type : {decimal128(3, 2), decimal256(3, 2)}) {
+    auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+    auto values1 = ArrayFromJSON(type, R"(["1.23", "1.24", null, null, "1.25"])");
+    auto values2 = ArrayFromJSON(type, R"(["4.56", "4.57", null, null, "4.58"])");
+    auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+    CheckScalar("choose", {indices1, values1, values2},
+                ArrayFromJSON(type, R"(["1.23", "4.57", null, null, null])"));
+    CheckScalar("choose", {indices1, ScalarFromJSON(type, R"("2.34")"), values1},
+                ArrayFromJSON(type, R"(["2.34", "1.24", "2.34", null, null])"));
+    CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+    CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+    CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+    auto scalar1 = ScalarFromJSON(type, R"("1.23")");
+    CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+                *MakeArrayFromScalar(*scalar1, 5));
+    CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+    CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+    auto scalar_null = ScalarFromJSON(type, "null");
+    CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+                *MakeArrayOfNull(type, 5));
+  }
+}
+
+TEST(TestChoose, FixedSizeBinary) {
+  auto type = fixed_size_binary(3);
+  auto indices1 = ArrayFromJSON(int64(), "[0, 1, 0, 1, null]");
+  auto values1 = ArrayFromJSON(type, R"(["abc", "abd", null, null, "abe"])");
+  auto values2 = ArrayFromJSON(type, R"(["def", "deg", null, null, "deh"])");
+  auto nulls = ArrayFromJSON(type, "[null, null, null, null, null]");
+  CheckScalar("choose", {indices1, values1, values2},
+              ArrayFromJSON(type, R"(["abc", "deg", null, null, null])"));
+  CheckScalar("choose", {indices1, ScalarFromJSON(type, R"("xyz")"), values1},
+              ArrayFromJSON(type, R"(["xyz", "abd", "xyz", null, null])"));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), values1, values2}, values1);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), values1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar1 = ScalarFromJSON(type, R"("abc")");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar1, values2},
+              *MakeArrayFromScalar(*scalar1, 5));
+  CheckScalar("choose", {ScalarFromJSON(int64(), "1"), scalar1, values2}, values2);
+  CheckScalar("choose", {ScalarFromJSON(int64(), "null"), values1, values2}, nulls);
+  auto scalar_null = ScalarFromJSON(type, "null");
+  CheckScalar("choose", {ScalarFromJSON(int64(), "0"), scalar_null, values2},
+              *MakeArrayOfNull(type, 5));
+}
+
+TEST(TestChooseKernel, DispatchBest) {
+  ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction("choose"));
+  auto Check = [&](std::vector<ValueDescr> original_values) {
+    auto values = original_values;
+    ARROW_EXPECT_OK(function->DispatchBest(&values));
+    return values;
+  };
+
+  // Since DispatchBest for this kernel pulls tricks, we can't compare it to DispatchExact
+  // as CheckDispatchBest does
+  for (auto ty :
+       {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
+    // Index always promoted to int64
+    EXPECT_EQ((std::vector<ValueDescr>{int64(), ty}), Check({ty, ty}));
+    EXPECT_EQ((std::vector<ValueDescr>{int64(), int64(), int64()}),
+              Check({ty, ty, int64()}));
+  }
+  // Other arguments promoted separately from index
+  EXPECT_EQ((std::vector<ValueDescr>{int64(), int32(), int32()}),
+            Check({int8(), int32(), uint8()}));
+}
+
+TEST(TestChooseKernel, Errors) {
+  ASSERT_RAISES(Invalid, CallFunction("choose", {}));
+  ASSERT_RAISES(Invalid, CallFunction("choose", {ArrayFromJSON(int64(), "[]")}));
+  ASSERT_RAISES(Invalid, CallFunction("choose", {ArrayFromJSON(utf8(), "[\"a\"]"),
+                                                 ArrayFromJSON(int64(), "[0]")}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IndexError, ::testing::HasSubstr("choose: index 1 out of range"),
+      CallFunction("choose",
+                   {ArrayFromJSON(int64(), "[1]"), ArrayFromJSON(int32(), "[0]")}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      IndexError, ::testing::HasSubstr("choose: index -1 out of range"),
+      CallFunction("choose",
+                   {ArrayFromJSON(int64(), "[-1]"), ArrayFromJSON(int32(), "[0]")}));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc
index 644aec2a4e9..450f99d7826 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -520,10 +520,11 @@ void RegisterVectorReplace(FunctionRegistry* registry) {
   for (const auto& ty : TemporalTypes()) {
     add_primitive_kernel(ty);
   }
+  for (const auto& ty : IntervalTypes()) {
+    add_primitive_kernel(ty);
+  }
   add_primitive_kernel(null());
   add_primitive_kernel(boolean());
-  add_primitive_kernel(day_time_interval());
-  add_primitive_kernel(month_interval());
   add_kernel(Type::FIXED_SIZE_BINARY, ReplaceWithMaskFunctor<FixedSizeBinaryType>::Exec);
   add_kernel(Type::DECIMAL128, ReplaceWithMaskFunctor<Decimal128Type>::Exec);
   add_kernel(Type::DECIMAL256, ReplaceWithMaskFunctor<Decimal256Type>::Exec);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index b12d0f2efde..0540f806522 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -895,25 +895,27 @@ Structural transforms
 +==========================+============+===================================================+=====================+=========+
 | case_when                | Varargs    | Struct of Boolean (Arg 0), Any fixed-width (rest) | Input type          | \(1)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| coalesce                 | Varargs    | Any                                               | Input type          | \(2)    |
+| choose                   | Varargs    | Integral (Arg 0); Fixed-width/Binary-like (rest)  | Input type          | \(2)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like     | Input type          | \(3)    |
+| coalesce                 | Varargs    | Any                                               | Input type          | \(3)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal                  | Input type          | \(4)    |
+| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like     | Input type          | \(4)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_finite                | Unary      | Float, Double                                     | Boolean             | \(5)    |
+| if_else                  | Ternary    | Boolean, Null, Numeric, Temporal                  | Input type          | \(5)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_inf                   | Unary      | Float, Double                                     | Boolean             | \(6)    |
+| is_finite                | Unary      | Float, Double                                     | Boolean             | \(6)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                     | Boolean             | \(7)    |
+| is_inf                   | Unary      | Float, Double                                     | Boolean             | \(7)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                               | Boolean             | \(8)    |
+| is_nan                   | Unary      | Float, Double                                     | Boolean             | \(8)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                               | Boolean             | \(9)    |
+| is_null                  | Unary      | Any                                               | Boolean             | \(9)    |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                         | Int32 or Int64      | \(10)   |
+| is_valid                 | Unary      | Any                                               | Boolean             | \(10)   |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
-| make_struct              | Varargs    | Any                                               | Struct              | \(11)   |
+| list_value_length        | Unary      | List-like                                         | Int32 or Int64      | \(11)   |
++--------------------------+------------+---------------------------------------------------+---------------------+---------+
+| make_struct              | Varargs    | Any                                               | Struct              | \(12)   |
 +--------------------------+------------+---------------------------------------------------+---------------------+---------+
 
 * \(1) This function acts like a SQL 'case when' statement or switch-case. The
@@ -925,14 +927,22 @@ Structural transforms
   the first value datum for which the corresponding Boolean is true, or the
   corresponding value from the 'default' input, or null otherwise.
 
-* \(2) Each row of the output will be the corresponding value of the first
+* \(2) The first input must be an integral type. The rest of the arguments can be
+  any type, but must all be the same type or promotable to a common type. Each
+  value of the first input (the 'index') is used as a zero-based index into the
+  remaining arguments (i.e. index 0 is the second argument, index 1 is the third
+  argument, etc.), and the value of the output for that row will be the
+  corresponding value of the selected input at that row. If the index is null,
+  then the output will also be null.
+
+* \(3) Each row of the output will be the corresponding value of the first
   input which is non-null for that row, otherwise null.
 
-* \(3) First input must be an array, second input a scalar of the same type.
+* \(4) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(4) First input must be a Boolean scalar or array. Second and third inputs
+* \(5) First input must be a Boolean scalar or array. Second and third inputs
   could be scalars or arrays and must be of the same type. Output is an array
   (or scalar if all inputs are scalar) of the same type as the second/ third
   input. If the nulls present on the first input, they will be promoted to the
@@ -940,21 +950,21 @@ Structural transforms
 
   Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
 
-* \(5) Output is true iff the corresponding input element is finite (not Infinity,
+* \(6) Output is true iff the corresponding input element is finite (not Infinity,
   -Infinity, or NaN).
 
-* \(6) Output is true iff the corresponding input element is Infinity/-Infinity.
+* \(7) Output is true iff the corresponding input element is Infinity/-Infinity.
 
-* \(7) Output is true iff the corresponding input element is NaN.
+* \(8) Output is true iff the corresponding input element is NaN.
 
-* \(8) Output is true iff the corresponding input element is null.
+* \(9) Output is true iff the corresponding input element is null.
 
-* \(9) Output is true iff the corresponding input element is non-null.
+* \(10) Output is true iff the corresponding input element is non-null.
 
-* \(10) Each output element is the length of the corresponding input element
+* \(11) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(11) The output struct's field types are the types of its arguments. The
+* \(12) The output struct's field types are the types of its arguments. The
   field names are specified using an instance of :struct:`MakeStructOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index b3ab086899a..08d6e9da051 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -354,6 +354,7 @@ Structural Transforms
 
    binary_length
    case_when
+   choose
    coalesce
    fill_null
    if_else

From 71a57b156a077de29a879fd5627e366684c140ac Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 4 Aug 2021 14:26:57 -0400
Subject: [PATCH 706/719] ARROW-13552: [C++] Remove deprecated APIs

Remove APIs that have been deprecated for long enough.

Closes #10868 from pitrou/ARROW-13552-cpp-deprecated-apis

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/array/array_nested.cc |  2 -
 cpp/src/arrow/array/array_nested.h  |  6 ---
 cpp/src/arrow/array/concatenate.cc  |  5 ---
 cpp/src/arrow/array/concatenate.h   |  5 ---
 cpp/src/arrow/compute/api_vector.cc | 39 ----------------
 cpp/src/arrow/compute/api_vector.h  | 36 ---------------
 cpp/src/arrow/csv/reader.h          |  6 ++-
 cpp/src/arrow/ipc/writer.h          |  4 +-
 cpp/src/arrow/json/reader.cc        |  9 ----
 cpp/src/arrow/json/reader.h         |  8 ----
 cpp/src/arrow/python/pyarrow.cc     |  3 --
 cpp/src/arrow/python/pyarrow.h      | 13 +++---
 cpp/src/arrow/sparse_tensor.h       |  7 ---
 cpp/src/arrow/tensor.h              |  4 --
 cpp/src/arrow/type.h                | 15 ++-----
 cpp/src/arrow/type_fwd.h            | 69 ++---------------------------
 cpp/src/arrow/util/compiler_util.h  | 22 ---------
 cpp/src/parquet/schema.h            |  3 --
 18 files changed, 18 insertions(+), 238 deletions(-)
 delete mode 100644 cpp/src/arrow/util/compiler_util.h

diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index f967127c5f1..102a82512e1 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -730,8 +730,6 @@ Result<std::shared_ptr<Array>> SparseUnionArray::Make(
   return std::make_shared<SparseUnionArray>(std::move(internal_data));
 }
 
-std::shared_ptr<Array> UnionArray::child(int i) const { return field(i); }
-
 std::shared_ptr<Array> UnionArray::field(int i) const {
   if (i < 0 ||
       static_cast<decltype(boxed_fields_)::size_type>(i) >= boxed_fields_.size()) {
diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h
index b0edb9591c5..bd5abaa3a8f 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -390,12 +390,6 @@ class ARROW_EXPORT UnionArray : public Array {
 
   UnionMode::type mode() const { return union_type_->mode(); }
 
-  // Return the given field as an individual array.
-  // For sparse unions, the returned array has its offset, length and null
-  // count adjusted.
-  ARROW_DEPRECATED("Deprecated in 1.0.0. Use field(pos)")
-  std::shared_ptr<Array> child(int pos) const;
-
   /// \brief Return the given field as an individual array.
   ///
   /// For sparse unions, the returned array has its offset, length and null
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index 32478783394..e2a5898c209 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -482,9 +482,4 @@ Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool
   return MakeArray(std::move(out_data));
 }
 
-Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
-                   std::shared_ptr<Array>* out) {
-  return Concatenate(arrays, pool).Value(out);
-}
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/concatenate.h b/cpp/src/arrow/array/concatenate.h
index a6c1c3cf3c1..e7597aad812 100644
--- a/cpp/src/arrow/array/concatenate.h
+++ b/cpp/src/arrow/array/concatenate.h
@@ -34,9 +34,4 @@ ARROW_EXPORT
 Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
                                            MemoryPool* pool = default_memory_pool());
 
-ARROW_DEPRECATED("Use Result-returning version")
-ARROW_EXPORT
-Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
-                   std::shared_ptr<Array>* out);
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index a68969b2ee5..9f3b3fa71b3 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -236,45 +236,6 @@ Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
 // ----------------------------------------------------------------------
 // Deprecated functions
 
-Result<std::shared_ptr<ChunkedArray>> Take(const ChunkedArray& values,
-                                           const Array& indices,
-                                           const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(values), Datum(indices), options, ctx));
-  return result.chunked_array();
-}
-
-Result<std::shared_ptr<ChunkedArray>> Take(const ChunkedArray& values,
-                                           const ChunkedArray& indices,
-                                           const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(values), Datum(indices), options, ctx));
-  return result.chunked_array();
-}
-
-Result<std::shared_ptr<ChunkedArray>> Take(const Array& values,
-                                           const ChunkedArray& indices,
-                                           const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(values), Datum(indices), options, ctx));
-  return result.chunked_array();
-}
-
-Result<std::shared_ptr<RecordBatch>> Take(const RecordBatch& batch, const Array& indices,
-                                          const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(batch), Datum(indices), options, ctx));
-  return result.record_batch();
-}
-
-Result<std::shared_ptr<Table>> Take(const Table& table, const Array& indices,
-                                    const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(table), Datum(indices), options, ctx));
-  return result.table();
-}
-
-Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indices,
-                                    const TakeOptions& options, ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(table), Datum(indices), options, ctx));
-  return result.table();
-}
-
 Result<std::shared_ptr<Array>> SortToIndices(const Array& values, ExecContext* ctx) {
   return SortIndices(values, SortOrder::Ascending, ctx);
 }
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 32439980f54..2d9522b0732 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -366,42 +366,6 @@ Result<Datum> DictionaryEncode(
 // ----------------------------------------------------------------------
 // Deprecated functions
 
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<ChunkedArray>> Take(
-    const ChunkedArray& values, const Array& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<ChunkedArray>> Take(
-    const ChunkedArray& values, const ChunkedArray& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<ChunkedArray>> Take(
-    const Array& values, const ChunkedArray& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<RecordBatch>> Take(
-    const RecordBatch& batch, const Array& indices,
-    const TakeOptions& options = TakeOptions::Defaults(), ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<Table>> Take(const Table& table, const Array& indices,
-                                    const TakeOptions& options = TakeOptions::Defaults(),
-                                    ExecContext* context = NULLPTR);
-
-ARROW_DEPRECATED("Deprecated in 1.0.0. Use Datum-based version")
-ARROW_EXPORT
-Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indices,
-                                    const TakeOptions& options = TakeOptions::Defaults(),
-                                    ExecContext* context = NULLPTR);
-
 ARROW_DEPRECATED("Deprecated in 3.0.0. Use SortIndices()")
 ARROW_EXPORT
 Result<std::shared_ptr<Array>> SortToIndices(const Array& values,
diff --git a/cpp/src/arrow/csv/reader.h b/cpp/src/arrow/csv/reader.h
index 48f02882b10..253db689296 100644
--- a/cpp/src/arrow/csv/reader.h
+++ b/cpp/src/arrow/csv/reader.h
@@ -53,7 +53,9 @@ class ARROW_EXPORT TableReader {
                                                    const ParseOptions&,
                                                    const ConvertOptions&);
 
-  ARROW_DEPRECATED("Use MemoryPool-less variant (the IOContext holds a pool already)")
+  ARROW_DEPRECATED(
+      "Deprecated in 4.0.0. "
+      "Use MemoryPool-less variant (the IOContext holds a pool already)")
   static Result<std::shared_ptr<TableReader>> Make(
       MemoryPool* pool, io::IOContext io_context, std::shared_ptr<io::InputStream> input,
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
@@ -104,7 +106,7 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
       io::IOContext io_context, std::shared_ptr<io::InputStream> input,
       const ReadOptions&, const ParseOptions&, const ConvertOptions&);
 
-  ARROW_DEPRECATED("Use IOContext-based overload")
+  ARROW_DEPRECATED("Deprecated in 4.0.0. Use IOContext-based overload")
   static Result<std::shared_ptr<StreamingReader>> Make(
       MemoryPool* pool, std::shared_ptr<io::InputStream> input,
       const ReadOptions& read_options, const ParseOptions& parse_options,
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 0ea83d7630a..e976b41a1c5 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -167,13 +167,13 @@ Result<std::shared_ptr<RecordBatchWriter>> MakeFileWriter(
 
 /// @}
 
-ARROW_DEPRECATED("Use MakeStreamWriter")
+ARROW_DEPRECATED("Deprecated in 3.0.0. Use MakeStreamWriter")
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchWriter>> NewStreamWriter(
     io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
     const IpcWriteOptions& options = IpcWriteOptions::Defaults());
 
-ARROW_DEPRECATED("Use MakeFileWriter")
+ARROW_DEPRECATED("Deprecated in 2.0.0. Use MakeFileWriter")
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchWriter>> NewFileWriter(
     io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
diff --git a/cpp/src/arrow/json/reader.cc b/cpp/src/arrow/json/reader.cc
index 51c77fa4df9..18aed0235ff 100644
--- a/cpp/src/arrow/json/reader.cc
+++ b/cpp/src/arrow/json/reader.cc
@@ -168,8 +168,6 @@ class TableReaderImpl : public TableReader,
   std::shared_ptr<ChunkedArrayBuilder> builder_;
 };
 
-Status TableReader::Read(std::shared_ptr<Table>* out) { return Read().Value(out); }
-
 Result<std::shared_ptr<TableReader>> TableReader::Make(
     MemoryPool* pool, std::shared_ptr<io::InputStream> input,
     const ReadOptions& read_options, const ParseOptions& parse_options) {
@@ -185,13 +183,6 @@ Result<std::shared_ptr<TableReader>> TableReader::Make(
   return ptr;
 }
 
-Status TableReader::Make(MemoryPool* pool, std::shared_ptr<io::InputStream> input,
-                         const ReadOptions& read_options,
-                         const ParseOptions& parse_options,
-                         std::shared_ptr<TableReader>* out) {
-  return TableReader::Make(pool, input, read_options, parse_options).Value(out);
-}
-
 Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
                                               std::shared_ptr<Buffer> json) {
   std::unique_ptr<BlockParser> parser;
diff --git a/cpp/src/arrow/json/reader.h b/cpp/src/arrow/json/reader.h
index c40338c1e1c..3374931a043 100644
--- a/cpp/src/arrow/json/reader.h
+++ b/cpp/src/arrow/json/reader.h
@@ -50,19 +50,11 @@ class ARROW_EXPORT TableReader {
   /// Read the entire JSON file and convert it to a Arrow Table
   virtual Result<std::shared_ptr<Table>> Read() = 0;
 
-  ARROW_DEPRECATED("Use Result-returning version")
-  Status Read(std::shared_ptr<Table>* out);
-
   /// Create a TableReader instance
   static Result<std::shared_ptr<TableReader>> Make(MemoryPool* pool,
                                                    std::shared_ptr<io::InputStream> input,
                                                    const ReadOptions&,
                                                    const ParseOptions&);
-
-  ARROW_DEPRECATED("Use Result-returning version")
-  static Status Make(MemoryPool* pool, std::shared_ptr<io::InputStream> input,
-                     const ReadOptions&, const ParseOptions&,
-                     std::shared_ptr<TableReader>* out);
 };
 
 ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc
index bea35ff3b61..c3244b74bf5 100644
--- a/cpp/src/arrow/python/pyarrow.cc
+++ b/cpp/src/arrow/python/pyarrow.cc
@@ -57,9 +57,6 @@ int import_pyarrow() {
     } else {                                                                            \
       return UnwrapError(obj, #TYPE_NAME);                                              \
     }                                                                                   \
-  }                                                                                     \
-  Status unwrap_##FUNC_SUFFIX(PyObject* obj, std::shared_ptr<TYPE_NAME>* out) {         \
-    return unwrap_##FUNC_SUFFIX(obj).Value(out);                                        \
   }
 
 DEFINE_WRAP_FUNCTIONS(buffer, Buffer)
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
index 8056e700a0c..4c365081d70 100644
--- a/cpp/src/arrow/python/pyarrow.h
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -45,14 +45,11 @@ namespace py {
 // Returns 0 on success, -1 on error.
 ARROW_PYTHON_EXPORT int import_pyarrow();
 
-#define DECLARE_WRAP_FUNCTIONS(FUNC_SUFFIX, TYPE_NAME)                                 \
-  ARROW_PYTHON_EXPORT bool is_##FUNC_SUFFIX(PyObject*);                                \
-  ARROW_PYTHON_EXPORT Result<std::shared_ptr<TYPE_NAME>> unwrap_##FUNC_SUFFIX(         \
-      PyObject*);                                                                      \
-  ARROW_PYTHON_EXPORT PyObject* wrap_##FUNC_SUFFIX(const std::shared_ptr<TYPE_NAME>&); \
-  ARROW_DEPRECATED("Use Result-returning version")                                     \
-  ARROW_PYTHON_EXPORT Status unwrap_##FUNC_SUFFIX(PyObject*,                           \
-                                                  std::shared_ptr<TYPE_NAME>* out);
+#define DECLARE_WRAP_FUNCTIONS(FUNC_SUFFIX, TYPE_NAME)                         \
+  ARROW_PYTHON_EXPORT bool is_##FUNC_SUFFIX(PyObject*);                        \
+  ARROW_PYTHON_EXPORT Result<std::shared_ptr<TYPE_NAME>> unwrap_##FUNC_SUFFIX( \
+      PyObject*);                                                              \
+  ARROW_PYTHON_EXPORT PyObject* wrap_##FUNC_SUFFIX(const std::shared_ptr<TYPE_NAME>&);
 
 DECLARE_WRAP_FUNCTIONS(buffer, Buffer)
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 1f2f8c0d82e..4ec824dfa7d 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -508,13 +508,6 @@ class ARROW_EXPORT SparseTensor {
     return ToTensor(default_memory_pool());
   }
 
-  /// \brief Status-return version of ToTensor().
-  ARROW_DEPRECATED("Use Result-returning version")
-  Status ToTensor(std::shared_ptr<Tensor>* out) const { return ToTensor().Value(out); }
-  Status ToTensor(MemoryPool* pool, std::shared_ptr<Tensor>* out) const {
-    return ToTensor(pool).Value(out);
-  }
-
  protected:
   // Constructor with all attributes
   SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index 91e9ad26066..ff6f3735f91 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -152,10 +152,6 @@ class ARROW_EXPORT Tensor {
   /// Compute the number of non-zero values in the tensor
   Result<int64_t> CountNonZero() const;
 
-  /// Compute the number of non-zero values in the tensor
-  ARROW_DEPRECATED("Use Result-returning version")
-  Status CountNonZero(int64_t* result) const { return CountNonZero().Value(result); }
-
   /// Return the offset of the given index on the given strides
   static int64_t CalculateValueOffset(const std::vector<int64_t>& strides,
                                       const std::vector<int64_t>& index) {
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 005b4458b91..506fb785957 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -124,22 +124,13 @@ class ARROW_EXPORT DataType : public detail::Fingerprintable {
   /// \brief Return whether the types are equal
   bool Equals(const std::shared_ptr<DataType>& other) const;
 
-  ARROW_DEPRECATED("Use field(i)")
-  const std::shared_ptr<Field>& child(int i) const { return field(i); }
-
-  /// Returns the child-field at index i.
+  /// \brief Return the child field at index i.
   const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
 
-  ARROW_DEPRECATED("Use fields()")
-  const std::vector<std::shared_ptr<Field>>& children() const { return fields(); }
-
-  /// \brief Returns the children fields associated with this type.
+  /// \brief Return the children fields associated with this type.
   const std::vector<std::shared_ptr<Field>>& fields() const { return children_; }
 
-  ARROW_DEPRECATED("Use num_fields()")
-  int num_children() const { return num_fields(); }
-
-  /// \brief Returns the number of children fields associated with this type.
+  /// \brief Return the number of children fields associated with this type.
   int num_fields() const { return static_cast<int>(children_.size()); }
 
   Status Accept(TypeVisitor* visitor) const;
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 7e564106bbe..d77f519a3c5 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -536,80 +536,19 @@ struct_(const std::vector<std::shared_ptr<Field>>& fields);
 /// \brief Create a SparseUnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT sparse_union(FieldVector child_fields,
                                                     std::vector<int8_t> type_codes = {});
-/// \brief Create a DenseUnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT dense_union(FieldVector child_fields,
-                                                   std::vector<int8_t> type_codes = {});
-
 /// \brief Create a SparseUnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT
 sparse_union(const ArrayVector& children, std::vector<std::string> field_names = {},
              std::vector<int8_t> type_codes = {});
+
+/// \brief Create a DenseUnionType instance
+std::shared_ptr<DataType> ARROW_EXPORT dense_union(FieldVector child_fields,
+                                                   std::vector<int8_t> type_codes = {});
 /// \brief Create a DenseUnionType instance
 std::shared_ptr<DataType> ARROW_EXPORT
 dense_union(const ArrayVector& children, std::vector<std::string> field_names = {},
             std::vector<int8_t> type_codes = {});
 
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,
-       const std::vector<int8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(child_fields, type_codes);
-  } else {
-    return dense_union(child_fields, type_codes);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(child_fields);
-  } else {
-    return dense_union(child_fields);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
-       const std::vector<std::string>& field_names, const std::vector<int8_t>& type_codes,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(children, field_names, type_codes);
-  } else {
-    return dense_union(children, field_names, type_codes);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
-       const std::vector<std::string>& field_names,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(children, field_names);
-  } else {
-    return dense_union(children, field_names);
-  }
-}
-
-/// \brief Create a UnionType instance
-ARROW_DEPRECATED("Deprecated in 1.0.0")
-inline std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Array>>& children,
-       UnionMode::type mode = UnionMode::SPARSE) {
-  if (mode == UnionMode::SPARSE) {
-    return sparse_union(children);
-  } else {
-    return dense_union(children);
-  }
-}
 /// \brief Create a DictionaryType instance
 /// \param[in] index_type the type of the dictionary indices (must be
 /// a signed integer)
diff --git a/cpp/src/arrow/util/compiler_util.h b/cpp/src/arrow/util/compiler_util.h
deleted file mode 100644
index ac1745074a1..00000000000
--- a/cpp/src/arrow/util/compiler_util.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Deprecated header, here for backwards compatibility in parquet-cpp
-
-#pragma once
-
-#include "arrow/util/macros.h"
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index 7dcfa7d144e..83d0cf24f1e 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -127,9 +127,6 @@ class PARQUET_EXPORT Node {
   /// Thrift.
   int field_id() const { return field_id_; }
 
-  PARQUET_DEPRECATED("id() is deprecated. Use field_id() instead")
-  int id() const { return field_id_; }
-
   const Node* parent() const { return parent_; }
 
   const std::shared_ptr<ColumnPath> path() const;

From 72b52ef1b2681f927e8cfc2bc6a643648c542058 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Wed, 4 Aug 2021 15:10:20 -0400
Subject: [PATCH 707/719] ARROW-13344: [R] Initial bindings for
 ExecPlan/ExecNode

This PR adds support for both scalar and group-by aggregation via dplyr::summarize(). Only the functions sum, any, and all are wired up. Followup issues (both bugs and features):

* [C++] Aggregation nodes seem not to respect FunctionOptions, or else I'm not passing them in correctly (ARROW-13497)
* [C++] ScanNode takes filter but doesn't filter (ARROW-13498)
* [R] Aggregation on expression doesn't NSE correctly (ARROW-13499)
* [R] Bindings for mean, var, sd aggregation (ARROW-13528)
* [R] Bindings for count aggregation (ARROW-13501)
* [R] Bindings for min/max aggregation (ARROW-13502)
* [R] Handle summarize() with 0 arguments or no aggregate functions (ARROW-13543)
* [R] Support .groups argument to summarize() (ARROW-13550)
* [C++] MakeScalarAggregateNode and MakeGroupByNode have quite different function signatures, which makes working with the API confusing; GroupBy doesn't let you specify the names of the output columns (ARROW-13482)
* [C++] Grouped aggregation functions all have to be invoked with a `hash_` prefix to the name, which seems unnecessary because you can't call a non-hash-aggregation function in GroupBy and you can't call a hash_ function in ScalarAggregate (ARROW-13451)

Closes #10722 from nealrichardson/scalar-aggregate-node

Lead-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 cpp/src/arrow/compute/exec.cc           |    2 +-
 cpp/src/arrow/compute/exec/exec_plan.cc |   35 +-
 cpp/src/arrow/compute/exec/exec_plan.h  |    4 +-
 cpp/src/arrow/compute/exec/plan_test.cc |   11 +-
 cpp/src/arrow/dataset/scanner.cc        |    9 +-
 cpp/src/arrow/dataset/scanner_test.cc   |   14 +-
 r/DESCRIPTION                           |    1 +
 r/R/arrowExports.R                      | 1783 ++++++++++++-----------
 r/R/dplyr-eval.R                        |    8 +-
 r/R/dplyr-functions.R                   |   41 +-
 r/R/dplyr-summarize.R                   |  102 +-
 r/R/dplyr.R                             |   22 +-
 r/R/duckdb.R                            |    7 +-
 r/R/query-engine.R                      |   75 +
 r/man/ChunkedArray.Rd                   |   22 +
 r/man/Field.Rd                          |    5 +
 r/man/FileFormat.Rd                     |   15 +
 r/man/ParquetFileReader.Rd              |   12 +
 r/man/RecordBatch.Rd                    |   11 +
 r/man/RecordBatchReader.Rd              |   37 +
 r/man/RecordBatchWriter.Rd              |   37 +
 r/man/Scalar.Rd                         |   17 +
 r/man/Schema.Rd                         |    9 +
 r/man/Table.Rd                          |   11 +
 r/man/array.Rd                          |   23 +
 r/man/buffer.Rd                         |    9 +
 r/man/call_function.Rd                  |   10 +
 r/man/codec_is_available.Rd             |    5 +
 r/man/copy_files.Rd                     |   10 +
 r/man/data-type.Rd                      |    8 +
 r/man/hive_partition.Rd                 |    5 +
 r/man/list_compute_functions.Rd         |    7 +
 r/man/load_flight_server.Rd             |    5 +
 r/man/match_arrow.Rd                    |   25 +
 r/man/open_dataset.Rd                   |   51 +
 r/man/read_delim_arrow.Rd               |   11 +
 r/man/read_feather.Rd                   |   11 +
 r/man/read_json_arrow.Rd                |   12 +
 r/man/read_parquet.Rd                   |    9 +
 r/man/s3_bucket.Rd                      |    5 +
 r/man/to_duckdb.Rd                      |   19 +
 r/man/type.Rd                           |   10 +
 r/man/unify_schemas.Rd                  |    7 +
 r/man/value_counts.Rd                   |    6 +
 r/man/write_csv_arrow.Rd                |    7 +
 r/man/write_feather.Rd                  |    7 +
 r/man/write_ipc_stream.Rd               |    7 +
 r/man/write_parquet.Rd                  |   12 +
 r/man/write_to_raw.Rd                   |    7 +
 r/src/arrowExports.cpp                  |  155 +-
 r/src/arrow_types.h                     |   10 +
 r/src/compute-exec.cpp                  |  177 +++
 r/src/expression.cpp                    |   15 +-
 r/tests/testthat/test-dataset.R         |   26 +-
 r/tests/testthat/test-dplyr-aggregate.R |  185 +++
 r/tests/testthat/test-dplyr-group-by.R  |    6 +-
 r/tests/testthat/test-dplyr.R           |   20 +-
 57 files changed, 2199 insertions(+), 973 deletions(-)
 create mode 100644 r/R/query-engine.R
 create mode 100644 r/src/compute-exec.cpp
 create mode 100644 r/tests/testthat/test-dplyr-aggregate.R

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 2a32c96ed3b..7d6db9f58db 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -115,7 +115,7 @@ ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const {
     if (value.is_scalar()) continue;
     value = value.array()->Slice(offset, length);
   }
-  out.length = length;
+  out.length = std::min(length, this->length - offset);
   return out;
 }
 
diff --git a/cpp/src/arrow/compute/exec/exec_plan.cc b/cpp/src/arrow/compute/exec/exec_plan.cc
index 20c8c347cc1..4a4758c8471 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.cc
+++ b/cpp/src/arrow/compute/exec/exec_plan.cc
@@ -719,11 +719,13 @@ struct ScalarAggregateNode : ExecNode {
   ScalarAggregateNode(ExecNode* input, std::string label,
                       std::shared_ptr<Schema> output_schema,
                       std::vector<const ScalarAggregateKernel*> kernels,
+                      std::vector<int> argument_indices,
                       std::vector<std::vector<std::unique_ptr<KernelState>>> states)
       : ExecNode(input->plan(), std::move(label), {input}, {"target"},
                  /*output_schema=*/std::move(output_schema),
                  /*num_outputs=*/1),
         kernels_(std::move(kernels)),
+        argument_indices_(std::move(argument_indices)),
         states_(std::move(states)) {}
 
   const char* kind_name() override { return "ScalarAggregateNode"; }
@@ -733,7 +735,7 @@ struct ScalarAggregateNode : ExecNode {
       KernelContext batch_ctx{plan()->exec_context()};
       batch_ctx.SetState(states_[i][thread_index].get());
 
-      ExecBatch single_column_batch{{batch.values[i]}, batch.length};
+      ExecBatch single_column_batch{{batch[argument_indices_[i]]}, batch.length};
       RETURN_NOT_OK(kernels_[i]->consume(&batch_ctx, single_column_batch));
     }
     return Status::OK();
@@ -807,7 +809,8 @@ struct ScalarAggregateNode : ExecNode {
   }
 
   Future<> finished_ = Future<>::MakeFinished();
-  std::vector<const ScalarAggregateKernel*> kernels_;
+  const std::vector<const ScalarAggregateKernel*> kernels_;
+  const std::vector<int> argument_indices_;
 
   std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
 
@@ -816,11 +819,17 @@ struct ScalarAggregateNode : ExecNode {
 };
 
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
-                                          std::vector<internal::Aggregate> aggregates) {
-  if (input->output_schema()->num_fields() != static_cast<int>(aggregates.size())) {
-    return Status::Invalid("Provided ", aggregates.size(),
-                           " aggregates, expected one for each field of ",
-                           input->output_schema()->ToString());
+                                          std::vector<internal::Aggregate> aggregates,
+                                          std::vector<FieldRef> arguments,
+                                          std::vector<std::string> out_field_names) {
+  if (aggregates.size() != arguments.size()) {
+    return Status::Invalid("Provided ", aggregates.size(), " aggregates but ",
+                           arguments.size(), " arguments.");
+  }
+
+  if (aggregates.size() != out_field_names.size()) {
+    return Status::Invalid("Provided ", aggregates.size(), " aggregates but ",
+                           out_field_names.size(), " field names for the output.");
   }
 
   auto exec_ctx = input->plan()->exec_context();
@@ -828,8 +837,16 @@ Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
   std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
   std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
   FieldVector fields(kernels.size());
+  std::vector<int> argument_indices(kernels.size());
 
   for (size_t i = 0; i < kernels.size(); ++i) {
+    if (!arguments[i].IsName()) {
+      return Status::NotImplemented("Non name field refs");
+    }
+    ARROW_ASSIGN_OR_RAISE(auto match,
+                          arguments[i].FindOneOrNone(*input->output_schema()));
+    argument_indices[i] = match[0];
+
     ARROW_ASSIGN_OR_RAISE(auto function,
                           exec_ctx->func_registry()->GetFunction(aggregates[i].function));
 
@@ -862,12 +879,12 @@ Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
     ARROW_ASSIGN_OR_RAISE(
         auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
 
-    fields[i] = field(aggregates[i].function, std::move(descr.type));
+    fields[i] = field(std::move(out_field_names[i]), std::move(descr.type));
   }
 
   return input->plan()->EmplaceNode<ScalarAggregateNode>(
       input, std::move(label), schema(std::move(fields)), std::move(kernels),
-      std::move(states));
+      std::move(argument_indices), std::move(states));
 }
 
 namespace internal {
diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h
index 07bb365bbc7..fc3af92af4a 100644
--- a/cpp/src/arrow/compute/exec/exec_plan.h
+++ b/cpp/src/arrow/compute/exec/exec_plan.h
@@ -285,7 +285,9 @@ Result<ExecNode*> MakeProjectNode(ExecNode* input, std::string label,
 
 ARROW_EXPORT
 Result<ExecNode*> MakeScalarAggregateNode(ExecNode* input, std::string label,
-                                          std::vector<internal::Aggregate> aggregates);
+                                          std::vector<internal::Aggregate> aggregates,
+                                          std::vector<FieldRef> arguments,
+                                          std::vector<std::string> out_field_names);
 
 /// \brief Make a node which groups input rows based on key fields and computes
 /// aggregates for each group
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index aa807468bcb..101257f5de8 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -531,9 +531,11 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
                        MakeTestSourceNode(plan.get(), "source", basic_data,
                                           /*parallel=*/false, /*slow=*/false));
 
-  ASSERT_OK_AND_ASSIGN(auto scalar_agg,
-                       MakeScalarAggregateNode(source, "scalar_agg",
-                                               {{"sum", nullptr}, {"any", nullptr}}));
+  ASSERT_OK_AND_ASSIGN(
+      auto scalar_agg,
+      MakeScalarAggregateNode(source, "scalar_agg", {{"sum", nullptr}, {"any", nullptr}},
+                              /*targets=*/{"i32", "bool"},
+                              /*out_field_names=*/{"sum(i32)", "any(bool)"}));
 
   auto sink_gen = MakeSinkNode(scalar_agg, "sink");
 
@@ -565,7 +567,8 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
   ASSERT_OK_AND_ASSIGN(
       auto scalar_agg,
       MakeScalarAggregateNode(source, "scalar_agg",
-                              {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}}));
+                              {{"count", nullptr}, {"sum", nullptr}, {"mean", nullptr}},
+                              {"a", "b", "c"}, {"sum a", "sum b", "sum c"}));
 
   auto sink_gen = MakeSinkNode(scalar_agg, "sink");
 
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 192f84f46df..d81b9cd1c5c 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -816,14 +816,15 @@ Result<int64_t> AsyncScanner::CountRows() {
   ARROW_ASSIGN_OR_RAISE(auto scan,
                         MakeScanNode(plan.get(), std::move(fragment_gen), options));
 
-  ARROW_ASSIGN_OR_RAISE(
-      auto get_selection,
-      compute::MakeProjectNode(scan, "get_selection", {options->filter}));
+  ARROW_ASSIGN_OR_RAISE(auto get_selection,
+                        compute::MakeProjectNode(scan, "get_selection", {options->filter},
+                                                 {"selection_mask"}));
 
   ARROW_ASSIGN_OR_RAISE(
       auto sum_selection,
       compute::MakeScalarAggregateNode(get_selection, "sum_selection",
-                                       {compute::internal::Aggregate{"sum", nullptr}}));
+                                       {compute::internal::Aggregate{"sum", nullptr}},
+                                       {"selection_mask"}, {"sum"}));
 
   AsyncGenerator<util::optional<compute::ExecBatch>> sink_gen =
       compute::MakeSinkNode(sum_selection, "sink");
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index de7f780183a..34fa1486ef2 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1471,14 +1471,16 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   ASSERT_OK_AND_ASSIGN(
       compute::ExecNode * sum,
       compute::MakeScalarAggregateNode(project, "scalar_agg",
-                                       {compute::internal::Aggregate{"sum", nullptr}}));
+                                       {compute::internal::Aggregate{"sum", nullptr}},
+                                       {a_times_2.ToString()}, {"a*2 sum"}));
 
   // finally, pipe the project node into a sink node
   auto sink_gen = compute::MakeSinkNode(sum, "sink");
 
   // translate sink_gen (async) to sink_reader (sync)
-  std::shared_ptr<RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
-      schema({field("sum", int64())}), std::move(sink_gen), exec_context.memory_pool());
+  std::shared_ptr<RecordBatchReader> sink_reader =
+      compute::MakeGeneratorReader(schema({field("a*2 sum", int64())}),
+                                   std::move(sink_gen), exec_context.memory_pool());
 
   // start the ExecPlan
   ASSERT_OK(plan->StartProducing());
@@ -1489,9 +1491,9 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   // wait 1s for completion
   ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
 
-  auto expected = TableFromJSON(schema({field("sum", int64())}), {
-                                                                     R"([
-                                               {"sum": 4}
+  auto expected = TableFromJSON(schema({field("a*2 sum", int64())}), {
+                                                                         R"([
+                                               {"a*2 sum": 4}
                                           ])"});
   AssertTablesEqual(*expected, *collected, /*same_chunk_layout=*/false);
 }
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index a0c4b61b7a0..3d10aa4745e 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -109,6 +109,7 @@ Collate:
     'metadata.R'
     'parquet.R'
     'python.R'
+    'query-engine.R'
     'record-batch-reader.R'
     'record-batch-writer.R'
     'reexports-bit64.R'
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index e54f88e9d4e..268a17ef4f4 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1,1749 +1,1784 @@
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
-is_altrep_int_nonull <- function(x) {
-  .Call(`_arrow_is_altrep_int_nonull`, x)
+is_altrep_int_nonull <- function(x){
+    .Call(`_arrow_is_altrep_int_nonull`, x)
 }
 
-is_altrep_dbl_nonull <- function(x) {
-  .Call(`_arrow_is_altrep_dbl_nonull`, x)
+is_altrep_dbl_nonull <- function(x){
+    .Call(`_arrow_is_altrep_dbl_nonull`, x)
 }
 
-Array__Slice1 <- function(array, offset) {
-  .Call(`_arrow_Array__Slice1`, array, offset)
+Array__Slice1 <- function(array, offset){
+    .Call(`_arrow_Array__Slice1`, array, offset)
 }
 
-Array__Slice2 <- function(array, offset, length) {
-  .Call(`_arrow_Array__Slice2`, array, offset, length)
+Array__Slice2 <- function(array, offset, length){
+    .Call(`_arrow_Array__Slice2`, array, offset, length)
 }
 
-Array__IsNull <- function(x, i) {
-  .Call(`_arrow_Array__IsNull`, x, i)
+Array__IsNull <- function(x, i){
+    .Call(`_arrow_Array__IsNull`, x, i)
 }
 
-Array__IsValid <- function(x, i) {
-  .Call(`_arrow_Array__IsValid`, x, i)
+Array__IsValid <- function(x, i){
+    .Call(`_arrow_Array__IsValid`, x, i)
 }
 
-Array__length <- function(x) {
-  .Call(`_arrow_Array__length`, x)
+Array__length <- function(x){
+    .Call(`_arrow_Array__length`, x)
 }
 
-Array__offset <- function(x) {
-  .Call(`_arrow_Array__offset`, x)
+Array__offset <- function(x){
+    .Call(`_arrow_Array__offset`, x)
 }
 
-Array__null_count <- function(x) {
-  .Call(`_arrow_Array__null_count`, x)
+Array__null_count <- function(x){
+    .Call(`_arrow_Array__null_count`, x)
 }
 
-Array__type <- function(x) {
-  .Call(`_arrow_Array__type`, x)
+Array__type <- function(x){
+    .Call(`_arrow_Array__type`, x)
 }
 
-Array__ToString <- function(x) {
-  .Call(`_arrow_Array__ToString`, x)
+Array__ToString <- function(x){
+    .Call(`_arrow_Array__ToString`, x)
 }
 
-Array__type_id <- function(x) {
-  .Call(`_arrow_Array__type_id`, x)
+Array__type_id <- function(x){
+    .Call(`_arrow_Array__type_id`, x)
 }
 
-Array__Equals <- function(lhs, rhs) {
-  .Call(`_arrow_Array__Equals`, lhs, rhs)
+Array__Equals <- function(lhs, rhs){
+    .Call(`_arrow_Array__Equals`, lhs, rhs)
 }
 
-Array__ApproxEquals <- function(lhs, rhs) {
-  .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+Array__ApproxEquals <- function(lhs, rhs){
+    .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
 }
 
-Array__Diff <- function(lhs, rhs) {
-  .Call(`_arrow_Array__Diff`, lhs, rhs)
+Array__Diff <- function(lhs, rhs){
+    .Call(`_arrow_Array__Diff`, lhs, rhs)
 }
 
-Array__data <- function(array) {
-  .Call(`_arrow_Array__data`, array)
+Array__data <- function(array){
+    .Call(`_arrow_Array__data`, array)
 }
 
-Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) {
-  .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
+Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx){
+    .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
 }
 
-Array__View <- function(array, type) {
-  .Call(`_arrow_Array__View`, array, type)
+Array__View <- function(array, type){
+    .Call(`_arrow_Array__View`, array, type)
 }
 
-Array__Validate <- function(array) {
-  invisible(.Call(`_arrow_Array__Validate`, array))
+Array__Validate <- function(array){
+    invisible(.Call(`_arrow_Array__Validate`, array))
 }
 
-DictionaryArray__indices <- function(array) {
-  .Call(`_arrow_DictionaryArray__indices`, array)
+DictionaryArray__indices <- function(array){
+    .Call(`_arrow_DictionaryArray__indices`, array)
 }
 
-DictionaryArray__dictionary <- function(array) {
-  .Call(`_arrow_DictionaryArray__dictionary`, array)
+DictionaryArray__dictionary <- function(array){
+    .Call(`_arrow_DictionaryArray__dictionary`, array)
 }
 
-StructArray__field <- function(array, i) {
-  .Call(`_arrow_StructArray__field`, array, i)
+StructArray__field <- function(array, i){
+    .Call(`_arrow_StructArray__field`, array, i)
 }
 
-StructArray__GetFieldByName <- function(array, name) {
-  .Call(`_arrow_StructArray__GetFieldByName`, array, name)
+StructArray__GetFieldByName <- function(array, name){
+    .Call(`_arrow_StructArray__GetFieldByName`, array, name)
 }
 
-StructArray__Flatten <- function(array) {
-  .Call(`_arrow_StructArray__Flatten`, array)
+StructArray__Flatten <- function(array){
+    .Call(`_arrow_StructArray__Flatten`, array)
 }
 
-ListArray__value_type <- function(array) {
-  .Call(`_arrow_ListArray__value_type`, array)
+ListArray__value_type <- function(array){
+    .Call(`_arrow_ListArray__value_type`, array)
 }
 
-LargeListArray__value_type <- function(array) {
-  .Call(`_arrow_LargeListArray__value_type`, array)
+LargeListArray__value_type <- function(array){
+    .Call(`_arrow_LargeListArray__value_type`, array)
 }
 
-ListArray__values <- function(array) {
-  .Call(`_arrow_ListArray__values`, array)
+ListArray__values <- function(array){
+    .Call(`_arrow_ListArray__values`, array)
 }
 
-LargeListArray__values <- function(array) {
-  .Call(`_arrow_LargeListArray__values`, array)
+LargeListArray__values <- function(array){
+    .Call(`_arrow_LargeListArray__values`, array)
 }
 
-ListArray__value_length <- function(array, i) {
-  .Call(`_arrow_ListArray__value_length`, array, i)
+ListArray__value_length <- function(array, i){
+    .Call(`_arrow_ListArray__value_length`, array, i)
 }
 
-LargeListArray__value_length <- function(array, i) {
-  .Call(`_arrow_LargeListArray__value_length`, array, i)
+LargeListArray__value_length <- function(array, i){
+    .Call(`_arrow_LargeListArray__value_length`, array, i)
 }
 
-FixedSizeListArray__value_length <- function(array, i) {
-  .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
+FixedSizeListArray__value_length <- function(array, i){
+    .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
 }
 
-ListArray__value_offset <- function(array, i) {
-  .Call(`_arrow_ListArray__value_offset`, array, i)
+ListArray__value_offset <- function(array, i){
+    .Call(`_arrow_ListArray__value_offset`, array, i)
 }
 
-LargeListArray__value_offset <- function(array, i) {
-  .Call(`_arrow_LargeListArray__value_offset`, array, i)
+LargeListArray__value_offset <- function(array, i){
+    .Call(`_arrow_LargeListArray__value_offset`, array, i)
 }
 
-FixedSizeListArray__value_offset <- function(array, i) {
-  .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
+FixedSizeListArray__value_offset <- function(array, i){
+    .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
 }
 
-ListArray__raw_value_offsets <- function(array) {
-  .Call(`_arrow_ListArray__raw_value_offsets`, array)
+ListArray__raw_value_offsets <- function(array){
+    .Call(`_arrow_ListArray__raw_value_offsets`, array)
 }
 
-LargeListArray__raw_value_offsets <- function(array) {
-  .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
+LargeListArray__raw_value_offsets <- function(array){
+    .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
 }
 
-Array__as_vector <- function(array) {
-  .Call(`_arrow_Array__as_vector`, array)
+Array__as_vector <- function(array){
+    .Call(`_arrow_Array__as_vector`, array)
 }
 
-ChunkedArray__as_vector <- function(chunked_array, use_threads) {
-  .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
+ChunkedArray__as_vector <- function(chunked_array, use_threads){
+    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
 }
 
-RecordBatch__to_dataframe <- function(batch, use_threads) {
-  .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+RecordBatch__to_dataframe <- function(batch, use_threads){
+    .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
 }
 
-Table__to_dataframe <- function(table, use_threads) {
-  .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+Table__to_dataframe <- function(table, use_threads){
+    .Call(`_arrow_Table__to_dataframe`, table, use_threads)
 }
 
-ArrayData__get_type <- function(x) {
-  .Call(`_arrow_ArrayData__get_type`, x)
+ArrayData__get_type <- function(x){
+    .Call(`_arrow_ArrayData__get_type`, x)
 }
 
-ArrayData__get_length <- function(x) {
-  .Call(`_arrow_ArrayData__get_length`, x)
+ArrayData__get_length <- function(x){
+    .Call(`_arrow_ArrayData__get_length`, x)
 }
 
-ArrayData__get_null_count <- function(x) {
-  .Call(`_arrow_ArrayData__get_null_count`, x)
+ArrayData__get_null_count <- function(x){
+    .Call(`_arrow_ArrayData__get_null_count`, x)
 }
 
-ArrayData__get_offset <- function(x) {
-  .Call(`_arrow_ArrayData__get_offset`, x)
+ArrayData__get_offset <- function(x){
+    .Call(`_arrow_ArrayData__get_offset`, x)
 }
 
-ArrayData__buffers <- function(x) {
-  .Call(`_arrow_ArrayData__buffers`, x)
+ArrayData__buffers <- function(x){
+    .Call(`_arrow_ArrayData__buffers`, x)
 }
 
-Buffer__is_mutable <- function(buffer) {
-  .Call(`_arrow_Buffer__is_mutable`, buffer)
+Buffer__is_mutable <- function(buffer){
+    .Call(`_arrow_Buffer__is_mutable`, buffer)
 }
 
-Buffer__ZeroPadding <- function(buffer) {
-  invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
+Buffer__ZeroPadding <- function(buffer){
+    invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
 }
 
-Buffer__capacity <- function(buffer) {
-  .Call(`_arrow_Buffer__capacity`, buffer)
+Buffer__capacity <- function(buffer){
+    .Call(`_arrow_Buffer__capacity`, buffer)
 }
 
-Buffer__size <- function(buffer) {
-  .Call(`_arrow_Buffer__size`, buffer)
+Buffer__size <- function(buffer){
+    .Call(`_arrow_Buffer__size`, buffer)
 }
 
-r___RBuffer__initialize <- function(x) {
-  .Call(`_arrow_r___RBuffer__initialize`, x)
+r___RBuffer__initialize <- function(x){
+    .Call(`_arrow_r___RBuffer__initialize`, x)
 }
 
-Buffer__data <- function(buffer) {
-  .Call(`_arrow_Buffer__data`, buffer)
+Buffer__data <- function(buffer){
+    .Call(`_arrow_Buffer__data`, buffer)
 }
 
-Buffer__Equals <- function(x, y) {
-  .Call(`_arrow_Buffer__Equals`, x, y)
+Buffer__Equals <- function(x, y){
+    .Call(`_arrow_Buffer__Equals`, x, y)
 }
 
-ChunkedArray__length <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__length`, chunked_array)
+ChunkedArray__length <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__length`, chunked_array)
 }
 
-ChunkedArray__null_count <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
+ChunkedArray__null_count <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
 }
 
-ChunkedArray__num_chunks <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
+ChunkedArray__num_chunks <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
 }
 
-ChunkedArray__chunk <- function(chunked_array, i) {
-  .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
+ChunkedArray__chunk <- function(chunked_array, i){
+    .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
 }
 
-ChunkedArray__chunks <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
+ChunkedArray__chunks <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
 }
 
-ChunkedArray__type <- function(chunked_array) {
-  .Call(`_arrow_ChunkedArray__type`, chunked_array)
+ChunkedArray__type <- function(chunked_array){
+    .Call(`_arrow_ChunkedArray__type`, chunked_array)
 }
 
-ChunkedArray__Slice1 <- function(chunked_array, offset) {
-  .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
+ChunkedArray__Slice1 <- function(chunked_array, offset){
+    .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
 }
 
-ChunkedArray__Slice2 <- function(chunked_array, offset, length) {
-  .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
+ChunkedArray__Slice2 <- function(chunked_array, offset, length){
+    .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
 }
 
-ChunkedArray__View <- function(array, type) {
-  .Call(`_arrow_ChunkedArray__View`, array, type)
+ChunkedArray__View <- function(array, type){
+    .Call(`_arrow_ChunkedArray__View`, array, type)
 }
 
-ChunkedArray__Validate <- function(chunked_array) {
-  invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
+ChunkedArray__Validate <- function(chunked_array){
+    invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
 }
 
-ChunkedArray__Equals <- function(x, y) {
-  .Call(`_arrow_ChunkedArray__Equals`, x, y)
+ChunkedArray__Equals <- function(x, y){
+    .Call(`_arrow_ChunkedArray__Equals`, x, y)
 }
 
-ChunkedArray__ToString <- function(x) {
-  .Call(`_arrow_ChunkedArray__ToString`, x)
+ChunkedArray__ToString <- function(x){
+    .Call(`_arrow_ChunkedArray__ToString`, x)
 }
 
-ChunkedArray__from_list <- function(chunks, s_type) {
-  .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
+ChunkedArray__from_list <- function(chunks, s_type){
+    .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
 }
 
-util___Codec__Create <- function(codec, compression_level) {
-  .Call(`_arrow_util___Codec__Create`, codec, compression_level)
+util___Codec__Create <- function(codec, compression_level){
+    .Call(`_arrow_util___Codec__Create`, codec, compression_level)
 }
 
-util___Codec__name <- function(codec) {
-  .Call(`_arrow_util___Codec__name`, codec)
+util___Codec__name <- function(codec){
+    .Call(`_arrow_util___Codec__name`, codec)
 }
 
-util___Codec__IsAvailable <- function(codec) {
-  .Call(`_arrow_util___Codec__IsAvailable`, codec)
+util___Codec__IsAvailable <- function(codec){
+    .Call(`_arrow_util___Codec__IsAvailable`, codec)
 }
 
-io___CompressedOutputStream__Make <- function(codec, raw) {
-  .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+io___CompressedOutputStream__Make <- function(codec, raw){
+    .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
 }
 
-io___CompressedInputStream__Make <- function(codec, raw) {
-  .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+io___CompressedInputStream__Make <- function(codec, raw){
+    .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
 }
 
-RecordBatch__cast <- function(batch, schema, options) {
-  .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
+ExecPlan_create <- function(use_threads){
+    .Call(`_arrow_ExecPlan_create`, use_threads)
 }
 
-Table__cast <- function(table, schema, options) {
-  .Call(`_arrow_Table__cast`, table, schema, options)
+ExecPlan_run <- function(plan, final_node){
+    .Call(`_arrow_ExecPlan_run`, plan, final_node)
 }
 
-compute__CallFunction <- function(func_name, args, options) {
-  .Call(`_arrow_compute__CallFunction`, func_name, args, options)
+ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names){
+    .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names)
 }
 
-compute__GroupBy <- function(arguments, keys, options) {
-  .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
+ExecNode_Filter <- function(input, filter){
+    .Call(`_arrow_ExecNode_Filter`, input, filter)
 }
 
-compute__GetFunctionNames <- function() {
-  .Call(`_arrow_compute__GetFunctionNames`)
+ExecNode_Project <- function(input, exprs, names){
+    .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
-build_info <- function() {
-  .Call(`_arrow_build_info`)
+ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_names){
+    .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names)
 }
 
-runtime_info <- function() {
-  .Call(`_arrow_runtime_info`)
+ExecNode_GroupByAggregate <- function(input, group_vars, agg_srcs, aggregations){
+    .Call(`_arrow_ExecNode_GroupByAggregate`, input, group_vars, agg_srcs, aggregations)
 }
 
-csv___WriteOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___WriteOptions__initialize`, options)
+RecordBatch__cast <- function(batch, schema, options){
+    .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
 
-csv___ReadOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___ReadOptions__initialize`, options)
+Table__cast <- function(table, schema, options){
+    .Call(`_arrow_Table__cast`, table, schema, options)
 }
 
-csv___ParseOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___ParseOptions__initialize`, options)
+compute__CallFunction <- function(func_name, args, options){
+    .Call(`_arrow_compute__CallFunction`, func_name, args, options)
 }
 
-csv___ReadOptions__column_names <- function(options) {
-  .Call(`_arrow_csv___ReadOptions__column_names`, options)
+compute__GroupBy <- function(arguments, keys, options){
+    .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
 }
 
-csv___ConvertOptions__initialize <- function(options) {
-  .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+compute__GetFunctionNames <- function(){
+    .Call(`_arrow_compute__GetFunctionNames`)
 }
 
-csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
-  .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+build_info <- function(){
+    .Call(`_arrow_build_info`)
 }
 
-csv___TableReader__Read <- function(table_reader) {
-  .Call(`_arrow_csv___TableReader__Read`, table_reader)
+runtime_info <- function(){
+    .Call(`_arrow_runtime_info`)
 }
 
-TimestampParser__kind <- function(parser) {
-  .Call(`_arrow_TimestampParser__kind`, parser)
+csv___WriteOptions__initialize <- function(options){
+    .Call(`_arrow_csv___WriteOptions__initialize`, options)
 }
 
-TimestampParser__format <- function(parser) {
-  .Call(`_arrow_TimestampParser__format`, parser)
+csv___ReadOptions__initialize <- function(options){
+    .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
 
-TimestampParser__MakeStrptime <- function(format) {
-  .Call(`_arrow_TimestampParser__MakeStrptime`, format)
+csv___ParseOptions__initialize <- function(options){
+    .Call(`_arrow_csv___ParseOptions__initialize`, options)
 }
 
-TimestampParser__MakeISO8601 <- function() {
-  .Call(`_arrow_TimestampParser__MakeISO8601`)
+csv___ReadOptions__column_names <- function(options){
+    .Call(`_arrow_csv___ReadOptions__column_names`, options)
 }
 
-csv___WriteCSV__Table <- function(table, write_options, stream) {
-  invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
+csv___ConvertOptions__initialize <- function(options){
+    .Call(`_arrow_csv___ConvertOptions__initialize`, options)
 }
 
-csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) {
-  invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options){
+    .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
 }
 
-dataset___Dataset__NewScan <- function(ds) {
-  .Call(`_arrow_dataset___Dataset__NewScan`, ds)
+csv___TableReader__Read <- function(table_reader){
+    .Call(`_arrow_csv___TableReader__Read`, table_reader)
 }
 
-dataset___Dataset__schema <- function(dataset) {
-  .Call(`_arrow_dataset___Dataset__schema`, dataset)
+TimestampParser__kind <- function(parser){
+    .Call(`_arrow_TimestampParser__kind`, parser)
 }
 
-dataset___Dataset__type_name <- function(dataset) {
-  .Call(`_arrow_dataset___Dataset__type_name`, dataset)
+TimestampParser__format <- function(parser){
+    .Call(`_arrow_TimestampParser__format`, parser)
 }
 
-dataset___Dataset__ReplaceSchema <- function(dataset, schm) {
-  .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
+TimestampParser__MakeStrptime <- function(format){
+    .Call(`_arrow_TimestampParser__MakeStrptime`, format)
 }
 
-dataset___UnionDataset__create <- function(datasets, schm) {
-  .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
+TimestampParser__MakeISO8601 <- function(){
+    .Call(`_arrow_TimestampParser__MakeISO8601`)
 }
 
-dataset___InMemoryDataset__create <- function(table) {
-  .Call(`_arrow_dataset___InMemoryDataset__create`, table)
+csv___WriteCSV__Table <- function(table, write_options, stream){
+    invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
 }
 
-dataset___UnionDataset__children <- function(ds) {
-  .Call(`_arrow_dataset___UnionDataset__children`, ds)
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream){
+    invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
 }
 
-dataset___FileSystemDataset__format <- function(dataset) {
-  .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
+dataset___Dataset__NewScan <- function(ds){
+    .Call(`_arrow_dataset___Dataset__NewScan`, ds)
 }
 
-dataset___FileSystemDataset__filesystem <- function(dataset) {
-  .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
+dataset___Dataset__schema <- function(dataset){
+    .Call(`_arrow_dataset___Dataset__schema`, dataset)
 }
 
-dataset___FileSystemDataset__files <- function(dataset) {
-  .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
+dataset___Dataset__type_name <- function(dataset){
+    .Call(`_arrow_dataset___Dataset__type_name`, dataset)
 }
 
-dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) {
-  .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
+dataset___Dataset__ReplaceSchema <- function(dataset, schm){
+    .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
 }
 
-dataset___DatasetFactory__Finish2 <- function(factory, schema) {
-  .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
+dataset___UnionDataset__create <- function(datasets, schm){
+    .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
 }
 
-dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) {
-  .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
+dataset___InMemoryDataset__create <- function(table){
+    .Call(`_arrow_dataset___InMemoryDataset__create`, table)
 }
 
-dataset___UnionDatasetFactory__Make <- function(children) {
-  .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
+dataset___UnionDataset__children <- function(ds){
+    .Call(`_arrow_dataset___UnionDataset__children`, ds)
 }
 
-dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
+dataset___FileSystemDataset__format <- function(dataset){
+    .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
+dataset___FileSystemDataset__filesystem <- function(dataset){
+    .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
+dataset___FileSystemDataset__files <- function(dataset){
+    .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
 }
 
-dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) {
-  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
+dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas){
+    .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
 }
 
-dataset___FileFormat__type_name <- function(format) {
-  .Call(`_arrow_dataset___FileFormat__type_name`, format)
+dataset___DatasetFactory__Finish2 <- function(factory, schema){
+    .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
 }
 
-dataset___FileFormat__DefaultWriteOptions <- function(fmt) {
-  .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
+dataset___DatasetFactory__Inspect <- function(factory, unify_schemas){
+    .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
 }
 
-dataset___ParquetFileFormat__Make <- function(options, dict_columns) {
-  .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
+dataset___UnionDatasetFactory__Make <- function(children){
+    .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
 }
 
-dataset___FileWriteOptions__type_name <- function(options) {
-  .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
+dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
 }
 
-dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) {
-  invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
+dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
 }
 
-dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) {
-  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
+dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
 }
 
-dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) {
-  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
+dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory){
+    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
 }
 
-dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) {
-  invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
+dataset___FileFormat__type_name <- function(format){
+    .Call(`_arrow_dataset___FileFormat__type_name`, format)
 }
 
-dataset___IpcFileFormat__Make <- function() {
-  .Call(`_arrow_dataset___IpcFileFormat__Make`)
+dataset___FileFormat__DefaultWriteOptions <- function(fmt){
+    .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
 }
 
-dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) {
-  .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
+dataset___ParquetFileFormat__Make <- function(options, dict_columns){
+    .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
 }
 
-dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) {
-  .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
+dataset___FileWriteOptions__type_name <- function(options){
+    .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
 }
 
-dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) {
-  .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){
+    invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
 }
 
-dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) {
-  .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
+dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version){
+    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
 }
 
-dataset___DirectoryPartitioning <- function(schm, segment_encoding) {
-  .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
+dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version){
+    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
 }
 
-dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) {
-  .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options){
+    invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
 }
 
-dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) {
-  .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
+dataset___IpcFileFormat__Make <- function(){
+    .Call(`_arrow_dataset___IpcFileFormat__Make`)
 }
 
-dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) {
-  .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
+dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options){
+    .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
 }
 
-dataset___ScannerBuilder__ProjectNames <- function(sb, cols) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
+dataset___FragmentScanOptions__type_name <- function(fragment_scan_options){
+    .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
 }
 
-dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
+dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options){
+    .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
 }
 
-dataset___ScannerBuilder__Filter <- function(sb, expr) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
+dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer){
+    .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
 }
 
-dataset___ScannerBuilder__UseThreads <- function(sb, threads) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
+dataset___DirectoryPartitioning <- function(schm, segment_encoding){
+    .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
 }
 
-dataset___ScannerBuilder__UseAsync <- function(sb, use_async) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding){
+    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
 }
 
-dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding){
+    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
 }
 
-dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) {
-  invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding){
+    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
 }
 
-dataset___ScannerBuilder__schema <- function(sb) {
-  .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
+dataset___ScannerBuilder__ProjectNames <- function(sb, cols){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
 }
 
-dataset___ScannerBuilder__Finish <- function(sb) {
-  .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
+dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
 }
 
-dataset___Scanner__ToTable <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
+dataset___ScannerBuilder__Filter <- function(sb, expr){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
 }
 
-dataset___Scanner__ScanBatches <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
+dataset___ScannerBuilder__UseThreads <- function(sb, threads){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
 }
 
-dataset___Scanner__ToRecordBatchReader <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
 }
 
-dataset___Scanner__head <- function(scanner, n) {
-  .Call(`_arrow_dataset___Scanner__head`, scanner, n)
+dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
 }
 
-dataset___Scanner__schema <- function(sc) {
-  .Call(`_arrow_dataset___Scanner__schema`, sc)
+dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options){
+    invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
 }
 
-dataset___ScanTask__get_batches <- function(scan_task) {
-  .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
+dataset___ScannerBuilder__schema <- function(sb){
+    .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
 }
 
-dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner) {
-  invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
+dataset___ScannerBuilder__Finish <- function(sb){
+    .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
 }
 
-dataset___Scanner__TakeRows <- function(scanner, indices) {
-  .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
+dataset___Scanner__ToTable <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
 }
 
-dataset___Scanner__CountRows <- function(scanner) {
-  .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
+dataset___Scanner__ScanBatches <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
 }
 
-Int8__initialize <- function() {
-  .Call(`_arrow_Int8__initialize`)
+dataset___Scanner__ToRecordBatchReader <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
 }
 
-Int16__initialize <- function() {
-  .Call(`_arrow_Int16__initialize`)
+dataset___Scanner__head <- function(scanner, n){
+    .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
 
-Int32__initialize <- function() {
-  .Call(`_arrow_Int32__initialize`)
+dataset___Scanner__schema <- function(sc){
+    .Call(`_arrow_dataset___Scanner__schema`, sc)
 }
 
-Int64__initialize <- function() {
-  .Call(`_arrow_Int64__initialize`)
+dataset___ScanTask__get_batches <- function(scan_task){
+    .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
 }
 
-UInt8__initialize <- function() {
-  .Call(`_arrow_UInt8__initialize`)
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){
+    invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
-UInt16__initialize <- function() {
-  .Call(`_arrow_UInt16__initialize`)
+dataset___Scanner__TakeRows <- function(scanner, indices){
+    .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
 }
 
-UInt32__initialize <- function() {
-  .Call(`_arrow_UInt32__initialize`)
+dataset___Scanner__CountRows <- function(scanner){
+    .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
 }
 
-UInt64__initialize <- function() {
-  .Call(`_arrow_UInt64__initialize`)
+Int8__initialize <- function(){
+    .Call(`_arrow_Int8__initialize`)
 }
 
-Float16__initialize <- function() {
-  .Call(`_arrow_Float16__initialize`)
+Int16__initialize <- function(){
+    .Call(`_arrow_Int16__initialize`)
 }
 
-Float32__initialize <- function() {
-  .Call(`_arrow_Float32__initialize`)
+Int32__initialize <- function(){
+    .Call(`_arrow_Int32__initialize`)
 }
 
-Float64__initialize <- function() {
-  .Call(`_arrow_Float64__initialize`)
+Int64__initialize <- function(){
+    .Call(`_arrow_Int64__initialize`)
 }
 
-Boolean__initialize <- function() {
-  .Call(`_arrow_Boolean__initialize`)
+UInt8__initialize <- function(){
+    .Call(`_arrow_UInt8__initialize`)
 }
 
-Utf8__initialize <- function() {
-  .Call(`_arrow_Utf8__initialize`)
+UInt16__initialize <- function(){
+    .Call(`_arrow_UInt16__initialize`)
 }
 
-LargeUtf8__initialize <- function() {
-  .Call(`_arrow_LargeUtf8__initialize`)
+UInt32__initialize <- function(){
+    .Call(`_arrow_UInt32__initialize`)
 }
 
-Binary__initialize <- function() {
-  .Call(`_arrow_Binary__initialize`)
+UInt64__initialize <- function(){
+    .Call(`_arrow_UInt64__initialize`)
 }
 
-LargeBinary__initialize <- function() {
-  .Call(`_arrow_LargeBinary__initialize`)
+Float16__initialize <- function(){
+    .Call(`_arrow_Float16__initialize`)
 }
 
-Date32__initialize <- function() {
-  .Call(`_arrow_Date32__initialize`)
+Float32__initialize <- function(){
+    .Call(`_arrow_Float32__initialize`)
 }
 
-Date64__initialize <- function() {
-  .Call(`_arrow_Date64__initialize`)
+Float64__initialize <- function(){
+    .Call(`_arrow_Float64__initialize`)
 }
 
-Null__initialize <- function() {
-  .Call(`_arrow_Null__initialize`)
+Boolean__initialize <- function(){
+    .Call(`_arrow_Boolean__initialize`)
 }
 
-Decimal128Type__initialize <- function(precision, scale) {
-  .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
+Utf8__initialize <- function(){
+    .Call(`_arrow_Utf8__initialize`)
 }
 
-FixedSizeBinary__initialize <- function(byte_width) {
-  .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
+LargeUtf8__initialize <- function(){
+    .Call(`_arrow_LargeUtf8__initialize`)
 }
 
-Timestamp__initialize <- function(unit, timezone) {
-  .Call(`_arrow_Timestamp__initialize`, unit, timezone)
+Binary__initialize <- function(){
+    .Call(`_arrow_Binary__initialize`)
 }
 
-Time32__initialize <- function(unit) {
-  .Call(`_arrow_Time32__initialize`, unit)
+LargeBinary__initialize <- function(){
+    .Call(`_arrow_LargeBinary__initialize`)
 }
 
-Time64__initialize <- function(unit) {
-  .Call(`_arrow_Time64__initialize`, unit)
+Date32__initialize <- function(){
+    .Call(`_arrow_Date32__initialize`)
 }
 
-list__ <- function(x) {
-  .Call(`_arrow_list__`, x)
+Date64__initialize <- function(){
+    .Call(`_arrow_Date64__initialize`)
 }
 
-large_list__ <- function(x) {
-  .Call(`_arrow_large_list__`, x)
+Null__initialize <- function(){
+    .Call(`_arrow_Null__initialize`)
 }
 
-fixed_size_list__ <- function(x, list_size) {
-  .Call(`_arrow_fixed_size_list__`, x, list_size)
+Decimal128Type__initialize <- function(precision, scale){
+    .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
 }
 
-struct__ <- function(fields) {
-  .Call(`_arrow_struct__`, fields)
+FixedSizeBinary__initialize <- function(byte_width){
+    .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
 }
 
-DataType__ToString <- function(type) {
-  .Call(`_arrow_DataType__ToString`, type)
+Timestamp__initialize <- function(unit, timezone){
+    .Call(`_arrow_Timestamp__initialize`, unit, timezone)
 }
 
-DataType__name <- function(type) {
-  .Call(`_arrow_DataType__name`, type)
+Time32__initialize <- function(unit){
+    .Call(`_arrow_Time32__initialize`, unit)
 }
 
-DataType__Equals <- function(lhs, rhs) {
-  .Call(`_arrow_DataType__Equals`, lhs, rhs)
+Time64__initialize <- function(unit){
+    .Call(`_arrow_Time64__initialize`, unit)
 }
 
-DataType__num_fields <- function(type) {
-  .Call(`_arrow_DataType__num_fields`, type)
+list__ <- function(x){
+    .Call(`_arrow_list__`, x)
 }
 
-DataType__fields <- function(type) {
-  .Call(`_arrow_DataType__fields`, type)
+large_list__ <- function(x){
+    .Call(`_arrow_large_list__`, x)
 }
 
-DataType__id <- function(type) {
-  .Call(`_arrow_DataType__id`, type)
+fixed_size_list__ <- function(x, list_size){
+    .Call(`_arrow_fixed_size_list__`, x, list_size)
 }
 
-ListType__ToString <- function(type) {
-  .Call(`_arrow_ListType__ToString`, type)
+struct__ <- function(fields){
+    .Call(`_arrow_struct__`, fields)
 }
 
-FixedWidthType__bit_width <- function(type) {
-  .Call(`_arrow_FixedWidthType__bit_width`, type)
+DataType__ToString <- function(type){
+    .Call(`_arrow_DataType__ToString`, type)
 }
 
-DateType__unit <- function(type) {
-  .Call(`_arrow_DateType__unit`, type)
+DataType__name <- function(type){
+    .Call(`_arrow_DataType__name`, type)
 }
 
-TimeType__unit <- function(type) {
-  .Call(`_arrow_TimeType__unit`, type)
+DataType__Equals <- function(lhs, rhs){
+    .Call(`_arrow_DataType__Equals`, lhs, rhs)
 }
 
-DecimalType__precision <- function(type) {
-  .Call(`_arrow_DecimalType__precision`, type)
+DataType__num_fields <- function(type){
+    .Call(`_arrow_DataType__num_fields`, type)
 }
 
-DecimalType__scale <- function(type) {
-  .Call(`_arrow_DecimalType__scale`, type)
+DataType__fields <- function(type){
+    .Call(`_arrow_DataType__fields`, type)
 }
 
-TimestampType__timezone <- function(type) {
-  .Call(`_arrow_TimestampType__timezone`, type)
+DataType__id <- function(type){
+    .Call(`_arrow_DataType__id`, type)
 }
 
-TimestampType__unit <- function(type) {
-  .Call(`_arrow_TimestampType__unit`, type)
+ListType__ToString <- function(type){
+    .Call(`_arrow_ListType__ToString`, type)
 }
 
-DictionaryType__initialize <- function(index_type, value_type, ordered) {
-  .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
+FixedWidthType__bit_width <- function(type){
+    .Call(`_arrow_FixedWidthType__bit_width`, type)
 }
 
-DictionaryType__index_type <- function(type) {
-  .Call(`_arrow_DictionaryType__index_type`, type)
+DateType__unit <- function(type){
+    .Call(`_arrow_DateType__unit`, type)
 }
 
-DictionaryType__value_type <- function(type) {
-  .Call(`_arrow_DictionaryType__value_type`, type)
+TimeType__unit <- function(type){
+    .Call(`_arrow_TimeType__unit`, type)
 }
 
-DictionaryType__name <- function(type) {
-  .Call(`_arrow_DictionaryType__name`, type)
+DecimalType__precision <- function(type){
+    .Call(`_arrow_DecimalType__precision`, type)
 }
 
-DictionaryType__ordered <- function(type) {
-  .Call(`_arrow_DictionaryType__ordered`, type)
+DecimalType__scale <- function(type){
+    .Call(`_arrow_DecimalType__scale`, type)
 }
 
-StructType__GetFieldByName <- function(type, name) {
-  .Call(`_arrow_StructType__GetFieldByName`, type, name)
+TimestampType__timezone <- function(type){
+    .Call(`_arrow_TimestampType__timezone`, type)
 }
 
-StructType__GetFieldIndex <- function(type, name) {
-  .Call(`_arrow_StructType__GetFieldIndex`, type, name)
+TimestampType__unit <- function(type){
+    .Call(`_arrow_TimestampType__unit`, type)
 }
 
-StructType__field_names <- function(type) {
-  .Call(`_arrow_StructType__field_names`, type)
+DictionaryType__initialize <- function(index_type, value_type, ordered){
+    .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
 }
 
-ListType__value_field <- function(type) {
-  .Call(`_arrow_ListType__value_field`, type)
+DictionaryType__index_type <- function(type){
+    .Call(`_arrow_DictionaryType__index_type`, type)
 }
 
-ListType__value_type <- function(type) {
-  .Call(`_arrow_ListType__value_type`, type)
+DictionaryType__value_type <- function(type){
+    .Call(`_arrow_DictionaryType__value_type`, type)
 }
 
-LargeListType__value_field <- function(type) {
-  .Call(`_arrow_LargeListType__value_field`, type)
+DictionaryType__name <- function(type){
+    .Call(`_arrow_DictionaryType__name`, type)
 }
 
-LargeListType__value_type <- function(type) {
-  .Call(`_arrow_LargeListType__value_type`, type)
+DictionaryType__ordered <- function(type){
+    .Call(`_arrow_DictionaryType__ordered`, type)
 }
 
-FixedSizeListType__value_field <- function(type) {
-  .Call(`_arrow_FixedSizeListType__value_field`, type)
+StructType__GetFieldByName <- function(type, name){
+    .Call(`_arrow_StructType__GetFieldByName`, type, name)
 }
 
-FixedSizeListType__value_type <- function(type) {
-  .Call(`_arrow_FixedSizeListType__value_type`, type)
+StructType__GetFieldIndex <- function(type, name){
+    .Call(`_arrow_StructType__GetFieldIndex`, type, name)
 }
 
-FixedSizeListType__list_size <- function(type) {
-  .Call(`_arrow_FixedSizeListType__list_size`, type)
+StructType__field_names <- function(type){
+    .Call(`_arrow_StructType__field_names`, type)
 }
 
-compute___expr__call <- function(func_name, argument_list, options) {
-  .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
+ListType__value_field <- function(type){
+    .Call(`_arrow_ListType__value_field`, type)
 }
 
-compute___expr__field_ref <- function(name) {
-  .Call(`_arrow_compute___expr__field_ref`, name)
+ListType__value_type <- function(type){
+    .Call(`_arrow_ListType__value_type`, type)
 }
 
-compute___expr__get_field_ref_name <- function(x) {
-  .Call(`_arrow_compute___expr__get_field_ref_name`, x)
+LargeListType__value_field <- function(type){
+    .Call(`_arrow_LargeListType__value_field`, type)
 }
 
-compute___expr__scalar <- function(x) {
-  .Call(`_arrow_compute___expr__scalar`, x)
+LargeListType__value_type <- function(type){
+    .Call(`_arrow_LargeListType__value_type`, type)
 }
 
-compute___expr__ToString <- function(x) {
-  .Call(`_arrow_compute___expr__ToString`, x)
+FixedSizeListType__value_field <- function(type){
+    .Call(`_arrow_FixedSizeListType__value_field`, type)
 }
 
-compute___expr__type <- function(x, schema) {
-  .Call(`_arrow_compute___expr__type`, x, schema)
+FixedSizeListType__value_type <- function(type){
+    .Call(`_arrow_FixedSizeListType__value_type`, type)
 }
 
-compute___expr__type_id <- function(x, schema) {
-  .Call(`_arrow_compute___expr__type_id`, x, schema)
+FixedSizeListType__list_size <- function(type){
+    .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
-ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) {
-  invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
+compute___expr__call <- function(func_name, argument_list, options){
+    .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-ipc___feather___Reader__version <- function(reader) {
-  .Call(`_arrow_ipc___feather___Reader__version`, reader)
+field_names_in_expression <- function(x){
+    .Call(`_arrow_field_names_in_expression`, x)
 }
 
-ipc___feather___Reader__Read <- function(reader, columns) {
-  .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
+compute___expr__get_field_ref_name <- function(x){
+    .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
-ipc___feather___Reader__Open <- function(stream) {
-  .Call(`_arrow_ipc___feather___Reader__Open`, stream)
+compute___expr__field_ref <- function(name){
+    .Call(`_arrow_compute___expr__field_ref`, name)
 }
 
-ipc___feather___Reader__schema <- function(reader) {
-  .Call(`_arrow_ipc___feather___Reader__schema`, reader)
+compute___expr__scalar <- function(x){
+    .Call(`_arrow_compute___expr__scalar`, x)
 }
 
-Field__initialize <- function(name, field, nullable) {
-  .Call(`_arrow_Field__initialize`, name, field, nullable)
+compute___expr__ToString <- function(x){
+    .Call(`_arrow_compute___expr__ToString`, x)
 }
 
-Field__ToString <- function(field) {
-  .Call(`_arrow_Field__ToString`, field)
+compute___expr__type <- function(x, schema){
+    .Call(`_arrow_compute___expr__type`, x, schema)
 }
 
-Field__name <- function(field) {
-  .Call(`_arrow_Field__name`, field)
+compute___expr__type_id <- function(x, schema){
+    .Call(`_arrow_compute___expr__type_id`, x, schema)
 }
 
-Field__Equals <- function(field, other) {
-  .Call(`_arrow_Field__Equals`, field, other)
+ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
+    invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
 
-Field__nullable <- function(field) {
-  .Call(`_arrow_Field__nullable`, field)
+ipc___feather___Reader__version <- function(reader){
+    .Call(`_arrow_ipc___feather___Reader__version`, reader)
 }
 
-Field__type <- function(field) {
-  .Call(`_arrow_Field__type`, field)
+ipc___feather___Reader__Read <- function(reader, columns){
+    .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
 }
 
-fs___FileInfo__type <- function(x) {
-  .Call(`_arrow_fs___FileInfo__type`, x)
+ipc___feather___Reader__Open <- function(stream){
+    .Call(`_arrow_ipc___feather___Reader__Open`, stream)
 }
 
-fs___FileInfo__set_type <- function(x, type) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
+ipc___feather___Reader__schema <- function(reader){
+    .Call(`_arrow_ipc___feather___Reader__schema`, reader)
 }
 
-fs___FileInfo__path <- function(x) {
-  .Call(`_arrow_fs___FileInfo__path`, x)
+Field__initialize <- function(name, field, nullable){
+    .Call(`_arrow_Field__initialize`, name, field, nullable)
 }
 
-fs___FileInfo__set_path <- function(x, path) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
+Field__ToString <- function(field){
+    .Call(`_arrow_Field__ToString`, field)
 }
 
-fs___FileInfo__size <- function(x) {
-  .Call(`_arrow_fs___FileInfo__size`, x)
+Field__name <- function(field){
+    .Call(`_arrow_Field__name`, field)
 }
 
-fs___FileInfo__set_size <- function(x, size) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
+Field__Equals <- function(field, other){
+    .Call(`_arrow_Field__Equals`, field, other)
 }
 
-fs___FileInfo__base_name <- function(x) {
-  .Call(`_arrow_fs___FileInfo__base_name`, x)
+Field__nullable <- function(field){
+    .Call(`_arrow_Field__nullable`, field)
 }
 
-fs___FileInfo__extension <- function(x) {
-  .Call(`_arrow_fs___FileInfo__extension`, x)
+Field__type <- function(field){
+    .Call(`_arrow_Field__type`, field)
 }
 
-fs___FileInfo__mtime <- function(x) {
-  .Call(`_arrow_fs___FileInfo__mtime`, x)
+fs___FileInfo__type <- function(x){
+    .Call(`_arrow_fs___FileInfo__type`, x)
 }
 
-fs___FileInfo__set_mtime <- function(x, time) {
-  invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
+fs___FileInfo__set_type <- function(x, type){
+    invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
 }
 
-fs___FileSelector__base_dir <- function(selector) {
-  .Call(`_arrow_fs___FileSelector__base_dir`, selector)
+fs___FileInfo__path <- function(x){
+    .Call(`_arrow_fs___FileInfo__path`, x)
 }
 
-fs___FileSelector__allow_not_found <- function(selector) {
-  .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
+fs___FileInfo__set_path <- function(x, path){
+    invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
 }
 
-fs___FileSelector__recursive <- function(selector) {
-  .Call(`_arrow_fs___FileSelector__recursive`, selector)
+fs___FileInfo__size <- function(x){
+    .Call(`_arrow_fs___FileInfo__size`, x)
 }
 
-fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) {
-  .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
+fs___FileInfo__set_size <- function(x, size){
+    invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
 }
 
-fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) {
-  .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
+fs___FileInfo__base_name <- function(x){
+    .Call(`_arrow_fs___FileInfo__base_name`, x)
 }
 
-fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) {
-  .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
+fs___FileInfo__extension <- function(x){
+    .Call(`_arrow_fs___FileInfo__extension`, x)
 }
 
-fs___FileSystem__CreateDir <- function(file_system, path, recursive) {
-  invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
+fs___FileInfo__mtime <- function(x){
+    .Call(`_arrow_fs___FileInfo__mtime`, x)
 }
 
-fs___FileSystem__DeleteDir <- function(file_system, path) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
+fs___FileInfo__set_mtime <- function(x, time){
+    invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
 }
 
-fs___FileSystem__DeleteDirContents <- function(file_system, path) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
+fs___FileSelector__base_dir <- function(selector){
+    .Call(`_arrow_fs___FileSelector__base_dir`, selector)
 }
 
-fs___FileSystem__DeleteFile <- function(file_system, path) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
+fs___FileSelector__allow_not_found <- function(selector){
+    .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
 }
 
-fs___FileSystem__DeleteFiles <- function(file_system, paths) {
-  invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
+fs___FileSelector__recursive <- function(selector){
+    .Call(`_arrow_fs___FileSelector__recursive`, selector)
 }
 
-fs___FileSystem__Move <- function(file_system, src, dest) {
-  invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
+fs___FileSelector__create <- function(base_dir, allow_not_found, recursive){
+    .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
 }
 
-fs___FileSystem__CopyFile <- function(file_system, src, dest) {
-  invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
+fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths){
+    .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
 }
 
-fs___FileSystem__OpenInputStream <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
+fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector){
+    .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
 }
 
-fs___FileSystem__OpenInputFile <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
+fs___FileSystem__CreateDir <- function(file_system, path, recursive){
+    invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
 }
 
-fs___FileSystem__OpenOutputStream <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
+fs___FileSystem__DeleteDir <- function(file_system, path){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
 }
 
-fs___FileSystem__OpenAppendStream <- function(file_system, path) {
-  .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
+fs___FileSystem__DeleteDirContents <- function(file_system, path){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
 }
 
-fs___FileSystem__type_name <- function(file_system) {
-  .Call(`_arrow_fs___FileSystem__type_name`, file_system)
+fs___FileSystem__DeleteFile <- function(file_system, path){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
 }
 
-fs___LocalFileSystem__create <- function() {
-  .Call(`_arrow_fs___LocalFileSystem__create`)
+fs___FileSystem__DeleteFiles <- function(file_system, paths){
+    invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
 }
 
-fs___SubTreeFileSystem__create <- function(base_path, base_fs) {
-  .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
+fs___FileSystem__Move <- function(file_system, src, dest){
+    invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
 }
 
-fs___SubTreeFileSystem__base_fs <- function(file_system) {
-  .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
+fs___FileSystem__CopyFile <- function(file_system, src, dest){
+    invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
 }
 
-fs___SubTreeFileSystem__base_path <- function(file_system) {
-  .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
+fs___FileSystem__OpenInputStream <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
 }
 
-fs___FileSystemFromUri <- function(path) {
-  .Call(`_arrow_fs___FileSystemFromUri`, path)
+fs___FileSystem__OpenInputFile <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
 }
 
-fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) {
-  invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
+fs___FileSystem__OpenOutputStream <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
 }
 
-fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) {
-  .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
+fs___FileSystem__OpenAppendStream <- function(file_system, path){
+    .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
 }
 
-fs___S3FileSystem__region <- function(fs) {
-  .Call(`_arrow_fs___S3FileSystem__region`, fs)
+fs___FileSystem__type_name <- function(file_system){
+    .Call(`_arrow_fs___FileSystem__type_name`, file_system)
 }
 
-io___Readable__Read <- function(x, nbytes) {
-  .Call(`_arrow_io___Readable__Read`, x, nbytes)
+fs___LocalFileSystem__create <- function(){
+    .Call(`_arrow_fs___LocalFileSystem__create`)
 }
 
-io___InputStream__Close <- function(x) {
-  invisible(.Call(`_arrow_io___InputStream__Close`, x))
+fs___SubTreeFileSystem__create <- function(base_path, base_fs){
+    .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
 }
 
-io___OutputStream__Close <- function(x) {
-  invisible(.Call(`_arrow_io___OutputStream__Close`, x))
+fs___SubTreeFileSystem__base_fs <- function(file_system){
+    .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
 }
 
-io___RandomAccessFile__GetSize <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
+fs___SubTreeFileSystem__base_path <- function(file_system){
+    .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
 }
 
-io___RandomAccessFile__supports_zero_copy <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
+fs___FileSystemFromUri <- function(path){
+    .Call(`_arrow_fs___FileSystemFromUri`, path)
 }
 
-io___RandomAccessFile__Seek <- function(x, position) {
-  invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
+fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads){
+    invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
 }
 
-io___RandomAccessFile__Tell <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__Tell`, x)
+fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes){
+    .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
 }
 
-io___RandomAccessFile__Read0 <- function(x) {
-  .Call(`_arrow_io___RandomAccessFile__Read0`, x)
+fs___S3FileSystem__region <- function(fs){
+    .Call(`_arrow_fs___S3FileSystem__region`, fs)
 }
 
-io___RandomAccessFile__ReadAt <- function(x, position, nbytes) {
-  .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
+io___Readable__Read <- function(x, nbytes){
+    .Call(`_arrow_io___Readable__Read`, x, nbytes)
 }
 
-io___MemoryMappedFile__Create <- function(path, size) {
-  .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
+io___InputStream__Close <- function(x){
+    invisible(.Call(`_arrow_io___InputStream__Close`, x))
 }
 
-io___MemoryMappedFile__Open <- function(path, mode) {
-  .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
+io___OutputStream__Close <- function(x){
+    invisible(.Call(`_arrow_io___OutputStream__Close`, x))
 }
 
-io___MemoryMappedFile__Resize <- function(x, size) {
-  invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
+io___RandomAccessFile__GetSize <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
 }
 
-io___ReadableFile__Open <- function(path) {
-  .Call(`_arrow_io___ReadableFile__Open`, path)
+io___RandomAccessFile__supports_zero_copy <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
 }
 
-io___BufferReader__initialize <- function(buffer) {
-  .Call(`_arrow_io___BufferReader__initialize`, buffer)
+io___RandomAccessFile__Seek <- function(x, position){
+    invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
 }
 
-io___Writable__write <- function(stream, buf) {
-  invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+io___RandomAccessFile__Tell <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__Tell`, x)
 }
 
-io___OutputStream__Tell <- function(stream) {
-  .Call(`_arrow_io___OutputStream__Tell`, stream)
+io___RandomAccessFile__Read0 <- function(x){
+    .Call(`_arrow_io___RandomAccessFile__Read0`, x)
 }
 
-io___FileOutputStream__Open <- function(path) {
-  .Call(`_arrow_io___FileOutputStream__Open`, path)
+io___RandomAccessFile__ReadAt <- function(x, position, nbytes){
+    .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
 }
 
-io___BufferOutputStream__Create <- function(initial_capacity) {
-  .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
+io___MemoryMappedFile__Create <- function(path, size){
+    .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
 }
 
-io___BufferOutputStream__capacity <- function(stream) {
-  .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
+io___MemoryMappedFile__Open <- function(path, mode){
+    .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
 }
 
-io___BufferOutputStream__Finish <- function(stream) {
-  .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
+io___MemoryMappedFile__Resize <- function(x, size){
+    invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
 }
 
-io___BufferOutputStream__Tell <- function(stream) {
-  .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
+io___ReadableFile__Open <- function(path){
+    .Call(`_arrow_io___ReadableFile__Open`, path)
 }
 
-io___BufferOutputStream__Write <- function(stream, bytes) {
-  invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
+io___BufferReader__initialize <- function(buffer){
+    .Call(`_arrow_io___BufferReader__initialize`, buffer)
 }
 
-json___ReadOptions__initialize <- function(use_threads, block_size) {
-  .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
+io___Writable__write <- function(stream, buf){
+    invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
 }
 
-json___ParseOptions__initialize1 <- function(newlines_in_values) {
-  .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+io___OutputStream__Tell <- function(stream){
+    .Call(`_arrow_io___OutputStream__Tell`, stream)
 }
 
-json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) {
-  .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
+io___FileOutputStream__Open <- function(path){
+    .Call(`_arrow_io___FileOutputStream__Open`, path)
 }
 
-json___TableReader__Make <- function(input, read_options, parse_options) {
-  .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
+io___BufferOutputStream__Create <- function(initial_capacity){
+    .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
 }
 
-json___TableReader__Read <- function(table_reader) {
-  .Call(`_arrow_json___TableReader__Read`, table_reader)
+io___BufferOutputStream__capacity <- function(stream){
+    .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
 }
 
-MemoryPool__default <- function() {
-  .Call(`_arrow_MemoryPool__default`)
+io___BufferOutputStream__Finish <- function(stream){
+    .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
 }
 
-MemoryPool__bytes_allocated <- function(pool) {
-  .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
+io___BufferOutputStream__Tell <- function(stream){
+    .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
 }
 
-MemoryPool__max_memory <- function(pool) {
-  .Call(`_arrow_MemoryPool__max_memory`, pool)
+io___BufferOutputStream__Write <- function(stream, bytes){
+    invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
 }
 
-MemoryPool__backend_name <- function(pool) {
-  .Call(`_arrow_MemoryPool__backend_name`, pool)
+json___ReadOptions__initialize <- function(use_threads, block_size){
+    .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-supported_memory_backends <- function() {
-  .Call(`_arrow_supported_memory_backends`)
+json___ParseOptions__initialize1 <- function(newlines_in_values){
+    .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
 }
 
-ipc___Message__body_length <- function(message) {
-  .Call(`_arrow_ipc___Message__body_length`, message)
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){
+    .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
-ipc___Message__metadata <- function(message) {
-  .Call(`_arrow_ipc___Message__metadata`, message)
+json___TableReader__Make <- function(input, read_options, parse_options){
+    .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
 }
 
-ipc___Message__body <- function(message) {
-  .Call(`_arrow_ipc___Message__body`, message)
+json___TableReader__Read <- function(table_reader){
+    .Call(`_arrow_json___TableReader__Read`, table_reader)
 }
 
-ipc___Message__Verify <- function(message) {
-  .Call(`_arrow_ipc___Message__Verify`, message)
+MemoryPool__default <- function(){
+    .Call(`_arrow_MemoryPool__default`)
 }
 
-ipc___Message__type <- function(message) {
-  .Call(`_arrow_ipc___Message__type`, message)
+MemoryPool__bytes_allocated <- function(pool){
+    .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
 }
 
-ipc___Message__Equals <- function(x, y) {
-  .Call(`_arrow_ipc___Message__Equals`, x, y)
+MemoryPool__max_memory <- function(pool){
+    .Call(`_arrow_MemoryPool__max_memory`, pool)
 }
 
-ipc___ReadRecordBatch__Message__Schema <- function(message, schema) {
-  .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
+MemoryPool__backend_name <- function(pool){
+    .Call(`_arrow_MemoryPool__backend_name`, pool)
 }
 
-ipc___ReadSchema_InputStream <- function(stream) {
-  .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
+supported_memory_backends <- function(){
+    .Call(`_arrow_supported_memory_backends`)
 }
 
-ipc___ReadSchema_Message <- function(message) {
-  .Call(`_arrow_ipc___ReadSchema_Message`, message)
+ipc___Message__body_length <- function(message){
+    .Call(`_arrow_ipc___Message__body_length`, message)
 }
 
-ipc___MessageReader__Open <- function(stream) {
-  .Call(`_arrow_ipc___MessageReader__Open`, stream)
+ipc___Message__metadata <- function(message){
+    .Call(`_arrow_ipc___Message__metadata`, message)
 }
 
-ipc___MessageReader__ReadNextMessage <- function(reader) {
-  .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
+ipc___Message__body <- function(message){
+    .Call(`_arrow_ipc___Message__body`, message)
 }
 
-ipc___ReadMessage <- function(stream) {
-  .Call(`_arrow_ipc___ReadMessage`, stream)
+ipc___Message__Verify <- function(message){
+    .Call(`_arrow_ipc___Message__Verify`, message)
 }
 
-parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) {
-  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
+ipc___Message__type <- function(message){
+    .Call(`_arrow_ipc___Message__type`, message)
 }
 
-parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) {
-  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
+ipc___Message__Equals <- function(x, y){
+    .Call(`_arrow_ipc___Message__Equals`, x, y)
 }
 
-parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) {
-  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
+ipc___ReadRecordBatch__Message__Schema <- function(message, schema){
+    .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
 }
 
-parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) {
-  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
+ipc___ReadSchema_InputStream <- function(stream){
+    .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
 }
 
-parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) {
-  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
+ipc___ReadSchema_Message <- function(message){
+    .Call(`_arrow_ipc___ReadSchema_Message`, message)
 }
 
-parquet___arrow___FileReader__OpenFile <- function(file, props) {
-  .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
+ipc___MessageReader__Open <- function(stream){
+    .Call(`_arrow_ipc___MessageReader__Open`, stream)
 }
 
-parquet___arrow___FileReader__ReadTable1 <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
+ipc___MessageReader__ReadNextMessage <- function(reader){
+    .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
 }
 
-parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
+ipc___ReadMessage <- function(stream){
+    .Call(`_arrow_ipc___ReadMessage`, stream)
 }
 
-parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
 }
 
-parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){
+    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
 }
 
-parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
 }
 
-parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){
+    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
 }
 
-parquet___arrow___FileReader__num_rows <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){
+    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
 }
 
-parquet___arrow___FileReader__num_columns <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
+parquet___arrow___FileReader__OpenFile <- function(file, props){
+    .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
 }
 
-parquet___arrow___FileReader__num_row_groups <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
+parquet___arrow___FileReader__ReadTable1 <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
 }
 
-parquet___arrow___FileReader__ReadColumn <- function(reader, i) {
-  .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
 }
 
-parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) {
-  .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
+parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
 }
 
-parquet___WriterProperties___Builder__create <- function() {
-  .Call(`_arrow_parquet___WriterProperties___Builder__create`)
+parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
 }
 
-parquet___WriterProperties___Builder__version <- function(builder, version) {
-  invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
+parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
 }
 
-parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
+parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
 }
 
-parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
+parquet___arrow___FileReader__num_rows <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
 }
 
-parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
+parquet___arrow___FileReader__num_columns <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
 }
 
-parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
+parquet___arrow___FileReader__num_row_groups <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
 }
 
-parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) {
-  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
+parquet___arrow___FileReader__ReadColumn <- function(reader, i){
+    .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
 }
 
-parquet___WriterProperties___Builder__build <- function(builder) {
-  .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
+parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit){
+    .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
 }
 
-parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) {
-  .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
+parquet___WriterProperties___Builder__create <- function(){
+    .Call(`_arrow_parquet___WriterProperties___Builder__create`)
 }
 
-parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) {
-  invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
+parquet___WriterProperties___Builder__version <- function(builder, version){
+    invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
 }
 
-parquet___arrow___FileWriter__Close <- function(writer) {
-  invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
+parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
 }
 
-parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) {
-  invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
+parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
 }
 
-parquet___arrow___FileReader__GetSchema <- function(reader) {
-  .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
+parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
 }
 
-allocate_arrow_schema <- function() {
-  .Call(`_arrow_allocate_arrow_schema`)
+parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
 }
 
-delete_arrow_schema <- function(ptr) {
-  invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size){
+    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
 }
 
-allocate_arrow_array <- function() {
-  .Call(`_arrow_allocate_arrow_array`)
+parquet___WriterProperties___Builder__build <- function(builder){
+    .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
 }
 
-delete_arrow_array <- function(ptr) {
-  invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){
+    .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
 }
 
-allocate_arrow_array_stream <- function() {
-  .Call(`_arrow_allocate_arrow_array_stream`)
+parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size){
+    invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
 }
 
-delete_arrow_array_stream <- function(ptr) {
-  invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
+parquet___arrow___FileWriter__Close <- function(writer){
+    invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
 }
 
-ImportArray <- function(array, schema) {
-  .Call(`_arrow_ImportArray`, array, schema)
+parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties){
+    invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
 }
 
-ImportRecordBatch <- function(array, schema) {
-  .Call(`_arrow_ImportRecordBatch`, array, schema)
+parquet___arrow___FileReader__GetSchema <- function(reader){
+    .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
 }
 
-ImportSchema <- function(schema) {
-  .Call(`_arrow_ImportSchema`, schema)
+allocate_arrow_schema <- function(){
+    .Call(`_arrow_allocate_arrow_schema`)
 }
 
-ImportField <- function(field) {
-  .Call(`_arrow_ImportField`, field)
+delete_arrow_schema <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
 }
 
-ImportType <- function(type) {
-  .Call(`_arrow_ImportType`, type)
+allocate_arrow_array <- function(){
+    .Call(`_arrow_allocate_arrow_array`)
 }
 
-ImportRecordBatchReader <- function(stream) {
-  .Call(`_arrow_ImportRecordBatchReader`, stream)
+delete_arrow_array <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
 }
 
-ExportType <- function(type, ptr) {
-  invisible(.Call(`_arrow_ExportType`, type, ptr))
+allocate_arrow_array_stream <- function(){
+    .Call(`_arrow_allocate_arrow_array_stream`)
 }
 
-ExportField <- function(field, ptr) {
-  invisible(.Call(`_arrow_ExportField`, field, ptr))
+delete_arrow_array_stream <- function(ptr){
+    invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
 }
 
-ExportSchema <- function(schema, ptr) {
-  invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
+ImportArray <- function(array, schema){
+    .Call(`_arrow_ImportArray`, array, schema)
 }
 
-ExportArray <- function(array, array_ptr, schema_ptr) {
-  invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
+ImportRecordBatch <- function(array, schema){
+    .Call(`_arrow_ImportRecordBatch`, array, schema)
 }
 
-ExportRecordBatch <- function(batch, array_ptr, schema_ptr) {
-  invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
+ImportSchema <- function(schema){
+    .Call(`_arrow_ImportSchema`, schema)
 }
 
-ExportRecordBatchReader <- function(reader, stream_ptr) {
-  invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
+ImportField <- function(field){
+    .Call(`_arrow_ImportField`, field)
 }
 
-Table__from_dots <- function(lst, schema_sxp, use_threads) {
-  .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
+ImportType <- function(type){
+    .Call(`_arrow_ImportType`, type)
 }
 
-vec_to_arrow <- function(x, s_type) {
-  .Call(`_arrow_vec_to_arrow`, x, s_type)
+ImportRecordBatchReader <- function(stream){
+    .Call(`_arrow_ImportRecordBatchReader`, stream)
 }
 
-DictionaryArray__FromArrays <- function(type, indices, dict) {
-  .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
+ExportType <- function(type, ptr){
+    invisible(.Call(`_arrow_ExportType`, type, ptr))
 }
 
-RecordBatch__num_columns <- function(x) {
-  .Call(`_arrow_RecordBatch__num_columns`, x)
+ExportField <- function(field, ptr){
+    invisible(.Call(`_arrow_ExportField`, field, ptr))
 }
 
-RecordBatch__num_rows <- function(x) {
-  .Call(`_arrow_RecordBatch__num_rows`, x)
+ExportSchema <- function(schema, ptr){
+    invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
 }
 
-RecordBatch__schema <- function(x) {
-  .Call(`_arrow_RecordBatch__schema`, x)
+ExportArray <- function(array, array_ptr, schema_ptr){
+    invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
 }
 
-RecordBatch__RenameColumns <- function(batch, names) {
-  .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
+ExportRecordBatch <- function(batch, array_ptr, schema_ptr){
+    invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
 }
 
-RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) {
-  .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
+ExportRecordBatchReader <- function(reader, stream_ptr){
+    invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
 }
 
-RecordBatch__columns <- function(batch) {
-  .Call(`_arrow_RecordBatch__columns`, batch)
+Table__from_dots <- function(lst, schema_sxp, use_threads){
+    .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
 }
 
-RecordBatch__column <- function(batch, i) {
-  .Call(`_arrow_RecordBatch__column`, batch, i)
+vec_to_arrow <- function(x, s_type){
+    .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
 
-RecordBatch__GetColumnByName <- function(batch, name) {
-  .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
+DictionaryArray__FromArrays <- function(type, indices, dict){
+    .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
 }
 
-RecordBatch__SelectColumns <- function(batch, indices) {
-  .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
+RecordBatch__num_columns <- function(x){
+    .Call(`_arrow_RecordBatch__num_columns`, x)
 }
 
-RecordBatch__Equals <- function(self, other, check_metadata) {
-  .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
+RecordBatch__num_rows <- function(x){
+    .Call(`_arrow_RecordBatch__num_rows`, x)
 }
 
-RecordBatch__AddColumn <- function(batch, i, field, column) {
-  .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
+RecordBatch__schema <- function(x){
+    .Call(`_arrow_RecordBatch__schema`, x)
 }
 
-RecordBatch__SetColumn <- function(batch, i, field, column) {
-  .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
+RecordBatch__RenameColumns <- function(batch, names){
+    .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
 }
 
-RecordBatch__RemoveColumn <- function(batch, i) {
-  .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
+RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){
+    .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
 }
 
-RecordBatch__column_name <- function(batch, i) {
-  .Call(`_arrow_RecordBatch__column_name`, batch, i)
+RecordBatch__columns <- function(batch){
+    .Call(`_arrow_RecordBatch__columns`, batch)
 }
 
-RecordBatch__names <- function(batch) {
-  .Call(`_arrow_RecordBatch__names`, batch)
+RecordBatch__column <- function(batch, i){
+    .Call(`_arrow_RecordBatch__column`, batch, i)
 }
 
-RecordBatch__Slice1 <- function(self, offset) {
-  .Call(`_arrow_RecordBatch__Slice1`, self, offset)
+RecordBatch__GetColumnByName <- function(batch, name){
+    .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
 }
 
-RecordBatch__Slice2 <- function(self, offset, length) {
-  .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
+RecordBatch__SelectColumns <- function(batch, indices){
+    .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
 }
 
-ipc___SerializeRecordBatch__Raw <- function(batch) {
-  .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+RecordBatch__Equals <- function(self, other, check_metadata){
+    .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
 }
 
-ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
-  .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+RecordBatch__AddColumn <- function(batch, i, field, column){
+    .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
 }
 
-RecordBatch__from_arrays <- function(schema_sxp, lst) {
-  .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
+RecordBatch__SetColumn <- function(batch, i, field, column){
+    .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
 }
 
-RecordBatchReader__schema <- function(reader) {
-  .Call(`_arrow_RecordBatchReader__schema`, reader)
+RecordBatch__RemoveColumn <- function(batch, i){
+    .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
 }
 
-RecordBatchReader__ReadNext <- function(reader) {
-  .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
+RecordBatch__column_name <- function(batch, i){
+    .Call(`_arrow_RecordBatch__column_name`, batch, i)
 }
 
-RecordBatchReader__batches <- function(reader) {
-  .Call(`_arrow_RecordBatchReader__batches`, reader)
+RecordBatch__names <- function(batch){
+    .Call(`_arrow_RecordBatch__names`, batch)
 }
 
-Table__from_RecordBatchReader <- function(reader) {
-  .Call(`_arrow_Table__from_RecordBatchReader`, reader)
+RecordBatch__Slice1 <- function(self, offset){
+    .Call(`_arrow_RecordBatch__Slice1`, self, offset)
 }
 
-ipc___RecordBatchStreamReader__Open <- function(stream) {
-  .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+RecordBatch__Slice2 <- function(self, offset, length){
+    .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
 }
 
-ipc___RecordBatchFileReader__schema <- function(reader) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
+ipc___SerializeRecordBatch__Raw <- function(batch){
+    .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
 }
 
-ipc___RecordBatchFileReader__num_record_batches <- function(reader) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema){
+    .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
 }
 
-ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
+RecordBatch__from_arrays <- function(schema_sxp, lst){
+    .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
 }
 
-ipc___RecordBatchFileReader__Open <- function(file) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
+RecordBatchReader__schema <- function(reader){
+    .Call(`_arrow_RecordBatchReader__schema`, reader)
 }
 
-Table__from_RecordBatchFileReader <- function(reader) {
-  .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
+RecordBatchReader__ReadNext <- function(reader){
+    .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
 }
 
-ipc___RecordBatchFileReader__batches <- function(reader) {
-  .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
+RecordBatchReader__batches <- function(reader){
+    .Call(`_arrow_RecordBatchReader__batches`, reader)
 }
 
-ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
-  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
+Table__from_RecordBatchReader <- function(reader){
+    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
 }
 
-ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
-  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
+ipc___RecordBatchStreamReader__Open <- function(stream){
+    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
-ipc___RecordBatchWriter__Close <- function(batch_writer) {
-  invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
+ipc___RecordBatchFileReader__schema <- function(reader){
+    .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
 }
 
-ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
-  .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchFileReader__num_record_batches <- function(reader){
+    .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
 }
 
-ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
-  .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i){
+    .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
 }
 
-Array__GetScalar <- function(x, i) {
-  .Call(`_arrow_Array__GetScalar`, x, i)
+ipc___RecordBatchFileReader__Open <- function(file){
+    .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
 }
 
-Scalar__ToString <- function(s) {
-  .Call(`_arrow_Scalar__ToString`, s)
+Table__from_RecordBatchFileReader <- function(reader){
+    .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
 }
 
-StructScalar__field <- function(s, i) {
-  .Call(`_arrow_StructScalar__field`, s, i)
+ipc___RecordBatchFileReader__batches <- function(reader){
+    .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
 }
 
-StructScalar__GetFieldByName <- function(s, name) {
-  .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch){
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
 }
 
-Scalar__as_vector <- function(scalar) {
-  .Call(`_arrow_Scalar__as_vector`, scalar)
+ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table){
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
 }
 
-MakeArrayFromScalar <- function(scalar, n) {
-  .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
+ipc___RecordBatchWriter__Close <- function(batch_writer){
+    invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
 }
 
-Scalar__is_valid <- function(s) {
-  .Call(`_arrow_Scalar__is_valid`, s)
+ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
+    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Scalar__type <- function(s) {
-  .Call(`_arrow_Scalar__type`, s)
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
+    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Scalar__Equals <- function(lhs, rhs) {
-  .Call(`_arrow_Scalar__Equals`, lhs, rhs)
+Array__GetScalar <- function(x, i){
+    .Call(`_arrow_Array__GetScalar`, x, i)
 }
 
-Scalar__ApproxEquals <- function(lhs, rhs) {
-  .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
+Scalar__ToString <- function(s){
+    .Call(`_arrow_Scalar__ToString`, s)
 }
 
-schema_ <- function(fields) {
-  .Call(`_arrow_schema_`, fields)
+StructScalar__field <- function(s, i){
+    .Call(`_arrow_StructScalar__field`, s, i)
 }
 
-Schema__ToString <- function(s) {
-  .Call(`_arrow_Schema__ToString`, s)
+StructScalar__GetFieldByName <- function(s, name){
+    .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
 }
 
-Schema__num_fields <- function(s) {
-  .Call(`_arrow_Schema__num_fields`, s)
+Scalar__as_vector <- function(scalar){
+    .Call(`_arrow_Scalar__as_vector`, scalar)
 }
 
-Schema__field <- function(s, i) {
-  .Call(`_arrow_Schema__field`, s, i)
+MakeArrayFromScalar <- function(scalar, n){
+    .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
 }
 
-Schema__AddField <- function(s, i, field) {
-  .Call(`_arrow_Schema__AddField`, s, i, field)
+Scalar__is_valid <- function(s){
+    .Call(`_arrow_Scalar__is_valid`, s)
 }
 
-Schema__SetField <- function(s, i, field) {
-  .Call(`_arrow_Schema__SetField`, s, i, field)
+Scalar__type <- function(s){
+    .Call(`_arrow_Scalar__type`, s)
 }
 
-Schema__RemoveField <- function(s, i) {
-  .Call(`_arrow_Schema__RemoveField`, s, i)
+Scalar__Equals <- function(lhs, rhs){
+    .Call(`_arrow_Scalar__Equals`, lhs, rhs)
 }
 
-Schema__GetFieldByName <- function(s, x) {
-  .Call(`_arrow_Schema__GetFieldByName`, s, x)
+Scalar__ApproxEquals <- function(lhs, rhs){
+    .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
 }
 
-Schema__fields <- function(schema) {
-  .Call(`_arrow_Schema__fields`, schema)
+schema_ <- function(fields){
+    .Call(`_arrow_schema_`, fields)
 }
 
-Schema__field_names <- function(schema) {
-  .Call(`_arrow_Schema__field_names`, schema)
+Schema__ToString <- function(s){
+    .Call(`_arrow_Schema__ToString`, s)
 }
 
-Schema__HasMetadata <- function(schema) {
-  .Call(`_arrow_Schema__HasMetadata`, schema)
+Schema__num_fields <- function(s){
+    .Call(`_arrow_Schema__num_fields`, s)
 }
 
-Schema__metadata <- function(schema) {
-  .Call(`_arrow_Schema__metadata`, schema)
+Schema__field <- function(s, i){
+    .Call(`_arrow_Schema__field`, s, i)
 }
 
-Schema__WithMetadata <- function(schema, metadata) {
-  .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+Schema__AddField <- function(s, i, field){
+    .Call(`_arrow_Schema__AddField`, s, i, field)
 }
 
-Schema__serialize <- function(schema) {
-  .Call(`_arrow_Schema__serialize`, schema)
+Schema__SetField <- function(s, i, field){
+    .Call(`_arrow_Schema__SetField`, s, i, field)
 }
 
-Schema__Equals <- function(schema, other, check_metadata) {
-  .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+Schema__RemoveField <- function(s, i){
+    .Call(`_arrow_Schema__RemoveField`, s, i)
 }
 
-arrow__UnifySchemas <- function(schemas) {
-  .Call(`_arrow_arrow__UnifySchemas`, schemas)
+Schema__GetFieldByName <- function(s, x){
+    .Call(`_arrow_Schema__GetFieldByName`, s, x)
 }
 
-Table__num_columns <- function(x) {
-  .Call(`_arrow_Table__num_columns`, x)
+Schema__fields <- function(schema){
+    .Call(`_arrow_Schema__fields`, schema)
 }
 
-Table__num_rows <- function(x) {
-  .Call(`_arrow_Table__num_rows`, x)
+Schema__field_names <- function(schema){
+    .Call(`_arrow_Schema__field_names`, schema)
 }
 
-Table__schema <- function(x) {
-  .Call(`_arrow_Table__schema`, x)
+Schema__HasMetadata <- function(schema){
+    .Call(`_arrow_Schema__HasMetadata`, schema)
 }
 
-Table__ReplaceSchemaMetadata <- function(x, metadata) {
-  .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+Schema__metadata <- function(schema){
+    .Call(`_arrow_Schema__metadata`, schema)
 }
 
-Table__column <- function(table, i) {
-  .Call(`_arrow_Table__column`, table, i)
+Schema__WithMetadata <- function(schema, metadata){
+    .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
 }
 
-Table__field <- function(table, i) {
-  .Call(`_arrow_Table__field`, table, i)
+Schema__serialize <- function(schema){
+    .Call(`_arrow_Schema__serialize`, schema)
 }
 
-Table__columns <- function(table) {
-  .Call(`_arrow_Table__columns`, table)
+Schema__Equals <- function(schema, other, check_metadata){
+    .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
 }
 
-Table__ColumnNames <- function(table) {
-  .Call(`_arrow_Table__ColumnNames`, table)
+arrow__UnifySchemas <- function(schemas){
+    .Call(`_arrow_arrow__UnifySchemas`, schemas)
 }
 
-Table__RenameColumns <- function(table, names) {
-  .Call(`_arrow_Table__RenameColumns`, table, names)
+Table__num_columns <- function(x){
+    .Call(`_arrow_Table__num_columns`, x)
 }
 
-Table__Slice1 <- function(table, offset) {
-  .Call(`_arrow_Table__Slice1`, table, offset)
+Table__num_rows <- function(x){
+    .Call(`_arrow_Table__num_rows`, x)
 }
 
-Table__Slice2 <- function(table, offset, length) {
-  .Call(`_arrow_Table__Slice2`, table, offset, length)
+Table__schema <- function(x){
+    .Call(`_arrow_Table__schema`, x)
 }
 
-Table__Equals <- function(lhs, rhs, check_metadata) {
-  .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+Table__ReplaceSchemaMetadata <- function(x, metadata){
+    .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
 }
 
-Table__Validate <- function(table) {
-  .Call(`_arrow_Table__Validate`, table)
+Table__column <- function(table, i){
+    .Call(`_arrow_Table__column`, table, i)
 }
 
-Table__ValidateFull <- function(table) {
-  .Call(`_arrow_Table__ValidateFull`, table)
+Table__field <- function(table, i){
+    .Call(`_arrow_Table__field`, table, i)
 }
 
-Table__GetColumnByName <- function(table, name) {
-  .Call(`_arrow_Table__GetColumnByName`, table, name)
+Table__columns <- function(table){
+    .Call(`_arrow_Table__columns`, table)
 }
 
-Table__RemoveColumn <- function(table, i) {
-  .Call(`_arrow_Table__RemoveColumn`, table, i)
+Table__ColumnNames <- function(table){
+    .Call(`_arrow_Table__ColumnNames`, table)
 }
 
-Table__AddColumn <- function(table, i, field, column) {
-  .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+Table__RenameColumns <- function(table, names){
+    .Call(`_arrow_Table__RenameColumns`, table, names)
 }
 
-Table__SetColumn <- function(table, i, field, column) {
-  .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+Table__Slice1 <- function(table, offset){
+    .Call(`_arrow_Table__Slice1`, table, offset)
 }
 
-Table__SelectColumns <- function(table, indices) {
-  .Call(`_arrow_Table__SelectColumns`, table, indices)
+Table__Slice2 <- function(table, offset, length){
+    .Call(`_arrow_Table__Slice2`, table, offset, length)
 }
 
-all_record_batches <- function(lst) {
-  .Call(`_arrow_all_record_batches`, lst)
+Table__Equals <- function(lhs, rhs, check_metadata){
+    .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
 }
 
-Table__from_record_batches <- function(batches, schema_sxp) {
-  .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+Table__Validate <- function(table){
+    .Call(`_arrow_Table__Validate`, table)
 }
 
-GetCpuThreadPoolCapacity <- function() {
-  .Call(`_arrow_GetCpuThreadPoolCapacity`)
+Table__ValidateFull <- function(table){
+    .Call(`_arrow_Table__ValidateFull`, table)
 }
 
-SetCpuThreadPoolCapacity <- function(threads) {
-  invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+Table__GetColumnByName <- function(table, name){
+    .Call(`_arrow_Table__GetColumnByName`, table, name)
 }
 
-GetIOThreadPoolCapacity <- function() {
-  .Call(`_arrow_GetIOThreadPoolCapacity`)
+Table__RemoveColumn <- function(table, i){
+    .Call(`_arrow_Table__RemoveColumn`, table, i)
 }
 
-SetIOThreadPoolCapacity <- function(threads) {
-  invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+Table__AddColumn <- function(table, i, field, column){
+    .Call(`_arrow_Table__AddColumn`, table, i, field, column)
 }
 
-Array__infer_type <- function(x) {
-  .Call(`_arrow_Array__infer_type`, x)
+Table__SetColumn <- function(table, i, field, column){
+    .Call(`_arrow_Table__SetColumn`, table, i, field, column)
 }
+
+Table__SelectColumns <- function(table, indices){
+    .Call(`_arrow_Table__SelectColumns`, table, indices)
+}
+
+all_record_batches <- function(lst){
+    .Call(`_arrow_all_record_batches`, lst)
+}
+
+Table__from_record_batches <- function(batches, schema_sxp){
+    .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+}
+
+GetCpuThreadPoolCapacity <- function(){
+    .Call(`_arrow_GetCpuThreadPoolCapacity`)
+}
+
+SetCpuThreadPoolCapacity <- function(threads){
+    invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+}
+
+GetIOThreadPoolCapacity <- function(){
+    .Call(`_arrow_GetIOThreadPoolCapacity`)
+}
+
+SetIOThreadPoolCapacity <- function(threads){
+    invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+}
+
+Array__infer_type <- function(x){
+    .Call(`_arrow_Array__infer_type`, x)
+}
+
+
+
diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index 57497e41cd2..3a1261602a3 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -39,7 +39,7 @@ arrow_eval <- function(expr, mask) {
     }
 
     out <- structure(msg, class = "try-error", condition = e)
-    if (grepl("not supported.*Arrow", msg)) {
+    if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) {
       # One of ours. Mark it so that consumers can handle it differently
       class(out) <- c("arrow-try-error", class(out))
     }
@@ -75,7 +75,7 @@ arrow_not_supported <- function(msg) {
 }
 
 # Create a data mask for evaluating a dplyr expression
-arrow_mask <- function(.data) {
+arrow_mask <- function(.data, aggregation = FALSE) {
   f_env <- new_environment(.cache$functions)
 
   # Add functions that need to error hard and clear.
@@ -86,6 +86,10 @@ arrow_mask <- function(.data) {
     f_env[[f]] <- fail
   }
 
+  if (aggregation) {
+    f_env <- new_environment(agg_funcs, parent = f_env)
+  }
+
   # Assign the schema to the expressions
   map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
 
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 40e4cd4776b..607be82c36b 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -713,7 +713,7 @@ nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
     return(Expression$create("log10_checked", x))
   }
   # ARROW-13345
-  stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
+  arrow_not_supported("`base` values other than exp(1), 2 and 10")
 }
 
 nse_funcs$if_else <- function(condition, true, false, missing = NULL) {
@@ -777,3 +777,42 @@ nse_funcs$case_when <- function(...) {
     )
   )
 }
+
+# Aggregation functions
+# These all return a list of:
+# @param fun string function name
+# @param data Expression (these are all currently a single field)
+# @param options list of function options, as passed to call_function
+# For group-by aggregation, `hash_` gets prepended to the function name.
+# So to see a list of available hash aggregation functions, do
+# list_compute_functions("^hash_")
+agg_funcs <- list()
+agg_funcs$sum <- function(x, na.rm = FALSE) {
+  list(
+    fun = "sum",
+    data = x,
+    options = arrow_na_rm(na.rm = na.rm)
+  )
+}
+agg_funcs$any <- function(x, na.rm = FALSE) {
+  list(
+    fun = "any",
+    data = x,
+    options = arrow_na_rm(na.rm)
+  )
+}
+agg_funcs$all <- function(x, na.rm = FALSE) {
+  list(
+    fun = "all",
+    data = x,
+    options = arrow_na_rm(na.rm)
+  )
+}
+
+arrow_na_rm <- function(na.rm) {
+  if (!isTRUE(na.rm)) {
+    # TODO: ARROW-13497
+    arrow_not_supported(paste("na.rm =", na.rm))
+  }
+  list(na.rm = na.rm, na.min_count = 0L)
+}
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 26db190099f..5677afb904a 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -28,14 +28,108 @@ summarise.arrow_dplyr_query <- function(.data, ..., .engine = c("arrow", "duckdb
     dplyr::group_vars(.data) # vars needed for grouping
   ))
   .data <- dplyr::select(.data, vars_to_keep)
-
   if (match.arg(.engine) == "duckdb") {
     dplyr::summarise(to_duckdb(.data), ...)
   } else {
-    if (query_on_dataset(.data)) {
-      not_implemented_for_dataset("summarize()")
+    # Try stuff, if successful return()
+    out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
+    if (inherits(out, "try-error")) {
+      return(abandon_ship(call, .data, format(out)))
+    } else {
+      return(out)
     }
-    dplyr::summarise(dplyr::collect(.data), ...)
   }
 }
 summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
+
+do_arrow_summarize <- function(.data, ..., .groups = NULL) {
+  if (!is.null(.groups)) {
+    # ARROW-13550
+    abort("`summarize()` with `.groups` argument not supported in Arrow")
+  }
+  exprs <- quos(...)
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], as_label)
+
+  mask <- arrow_mask(.data, aggregation = TRUE)
+
+  results <- list()
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    new_var <- names(exprs)[i]
+    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(results[[new_var]], "try-error")) {
+      msg <- handle_arrow_not_supported(
+        results[[new_var]],
+        as_label(exprs[[i]])
+      )
+      stop(msg, call. = FALSE)
+    }
+    # Put it in the data mask too?
+    # mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+  }
+
+  # Now, from that, split out the data (expressions) and options
+  .data$aggregations <- lapply(results, function(x) x[c("fun", "options")])
+
+  inputs <- lapply(results, function(x) x$data)
+  # This is essentially a projection, and the column names don't matter
+  # (but must exist)
+  names(inputs) <- as.character(seq_along(inputs))
+  .data$selected_columns <- inputs
+
+  # Eventually, we will return .data here if (dataset) but do it eagerly now
+  do_exec_plan(.data, group_vars = dplyr::group_vars(.data))
+}
+
+do_exec_plan <- function(.data, group_vars = NULL) {
+  plan <- ExecPlan$create()
+
+  grouped <- length(group_vars) > 0
+
+  # Collect the target names first because we have to add back the group vars
+  target_names <- names(.data)
+
+  if (grouped) {
+    .data <- ensure_group_vars(.data)
+    # We also need to prefix all of the aggregation function names with "hash_"
+    .data$aggregations <- lapply(.data$aggregations, function(x) {
+      x[["fun"]] <- paste0("hash_", x[["fun"]])
+      x
+    })
+  }
+
+  start_node <- plan$Scan(.data)
+  # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
+  if (inherits(.data$filtered_rows, "Expression")) {
+    start_node <- start_node$Filter(.data$filtered_rows)
+  }
+  # If any columns are derived we need to Project (otherwise this may be no-op)
+  project_node <- start_node$Project(.data$selected_columns)
+
+  if (grouped) {
+    final_node <- project_node$GroupByAggregate(
+      group_vars,
+      target_names = target_names,
+      aggregations = .data$aggregations
+    )
+    out <- plan$Run(final_node)
+    # The result will have result columns first (named by their function)
+    # then the grouping cols. dplyr orders group cols first, and it accepts
+    # names for the result cols. Adapt the result to meet that expectation.
+    n_results <- length(.data$aggregations)
+    names(out)[seq_along(.data$aggregations)] <- names(.data$aggregations)
+    out <- out[c((n_results + 1):ncol(out), seq_along(.data$aggregations))]
+  } else {
+    final_node <- project_node$ScalarAggregate(
+      options = .data$aggregations,
+      target_names = target_names,
+      out_field_names = names(.data$aggregations)
+    )
+    out <- plan$Run(final_node)
+  }
+  out
+}
diff --git a/r/R/dplyr.R b/r/R/dplyr.R
index 88accac24e9..b2793bdb3c3 100644
--- a/r/R/dplyr.R
+++ b/r/R/dplyr.R
@@ -216,31 +216,17 @@ restore_dplyr_features <- function(df, query) {
 # Helper to handle unsupported dplyr features
 # * For Table/RecordBatch, we collect() and then call the dplyr method in R
 # * For Dataset, we just error
-abandon_ship <- function(call, .data, msg = NULL) {
+abandon_ship <- function(call, .data, msg) {
   dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]]))
   if (query_on_dataset(.data)) {
-    if (is.null(msg)) {
-      # Default message: function not implemented
-      not_implemented_for_dataset(paste0(dplyr_fun_name, "()"))
-    } else {
-      stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
-    }
+    stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
   }
   # else, collect and call dplyr method
-  if (!is.null(msg)) {
-    warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
-  }
+  msg <- sub("\\n$", "", msg)
+  warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
   call$.data <- dplyr::collect(.data)
   call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr"))
   eval.parent(call, 2)
 }
 
 query_on_dataset <- function(x) !inherits(x$.data, "InMemoryDataset")
-
-not_implemented_for_dataset <- function(method) {
-  stop(
-    method, " is not currently implemented for Arrow Datasets. ",
-    "Call collect() first to pull data into R.",
-    call. = FALSE
-  )
-}
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index 6ed1df3d826..bc003a6ea8f 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -40,8 +40,7 @@
 #'
 #' @name to_duckdb
 #' @export
-#' @examplesIf { arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) &&
-#'   packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE) }
+#' @examplesIf getFromNamespace("run_duckdb_examples", "arrow")()
 #' library(dplyr)
 #'
 #' ds <- InMemoryDataset$create(mtcars)
@@ -113,3 +112,7 @@ duckdb_disconnector <- function(con, tbl_name) {
   })
   environment()
 }
+
+run_duckdb_examples <- function() {
+  arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)
+}
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
new file mode 100644
index 00000000000..72c35c515db
--- /dev/null
+++ b/r/R/query-engine.R
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ExecPlan <- R6Class("ExecPlan",
+  inherit = ArrowObject,
+  public = list(
+    Scan = function(dataset) {
+      # Handle arrow_dplyr_query
+      if (inherits(dataset, "arrow_dplyr_query")) {
+        filter <- dataset$filtered_rows
+        if (isTRUE(filter)) {
+          filter <- Expression$scalar(TRUE)
+        }
+        # Use FieldsInExpression to find all from dataset$selected_columns
+        colnames <- unique(unlist(map(
+          dataset$selected_columns,
+          field_names_in_expression
+        )))
+        dataset <- dataset$.data
+      } else {
+        if (inherits(dataset, "ArrowTabular")) {
+          dataset <- InMemoryDataset$create(dataset)
+        }
+        assert_is(dataset, "Dataset")
+        # Set some defaults
+        filter <- Expression$scalar(TRUE)
+        colnames <- names(dataset)
+      }
+      # ScanNode needs the filter to do predicate pushdown and skip partitions,
+      # and it needs to know which fields to materialize (and which are unnecessary)
+      ExecNode_Scan(self, dataset, filter, colnames)
+    },
+    Run = function(node) {
+      assert_is(node, "ExecNode")
+      ExecPlan_run(self, node)
+    }
+  )
+)
+ExecPlan$create <- function(use_threads = option_use_threads()) {
+  ExecPlan_create(use_threads)
+}
+
+ExecNode <- R6Class("ExecNode",
+  inherit = ArrowObject,
+  public = list(
+    Project = function(cols) {
+      assert_is_list_of(cols, "Expression")
+      ExecNode_Project(self, cols, names(cols))
+    },
+    Filter = function(expr) {
+      assert_is(expr, "Expression")
+      ExecNode_Filter(self, expr)
+    },
+    ScalarAggregate = function(options, target_names, out_field_names) {
+      ExecNode_ScalarAggregate(self, options, target_names, out_field_names)
+    },
+    GroupByAggregate = function(group_vars, target_names, aggregations) {
+      ExecNode_GroupByAggregate(self, group_vars, target_names, aggregations)
+    }
+  )
+)
diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd
index 486b6222af7..3a504f01466 100644
--- a/r/man/ChunkedArray.Rd
+++ b/r/man/ChunkedArray.Rd
@@ -53,6 +53,28 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Pass items into chunked_array as separate objects to create chunks
+class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+class_scores$num_chunks
+
+# When taking a Slice from a chunked_array, chunks are preserved
+class_scores$Slice(2, length = 5)
+
+# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
+# containing all values, ordered.
+class_scores$Take(class_scores$SortIndices(descending = TRUE))
+
+# If you pass a list into chunked_array, you get a list of length 1
+list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+list_scores$num_chunks
+
+# When constructing a ChunkedArray, the first chunk is used to infer type.
+doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+doubles$type
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{Array}
 }
diff --git a/r/man/Field.Rd b/r/man/Field.Rd
index 03dffd11ca9..77d31fa637a 100644
--- a/r/man/Field.Rd
+++ b/r/man/Field.Rd
@@ -28,3 +28,8 @@ field(name, type, metadata)
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+field("x", int32())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd
index b8d4dc01bad..cabacc93755 100644
--- a/r/man/FileFormat.Rd
+++ b/r/man/FileFormat.Rd
@@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time)
 It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat})
 }
 
+\examples{
+\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+## Semi-colon delimited files
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+
+# Create FileFormat object
+format <- FileFormat$create(format = "text", delimiter = ";")
+
+open_dataset(tf, format = format)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd
index 0b49df79d6b..30d0725a498 100644
--- a/r/man/ParquetFileReader.Rd
+++ b/r/man/ParquetFileReader.Rd
@@ -45,3 +45,15 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat
 }
 }
 
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+f <- system.file("v0.7.1.parquet", package = "arrow")
+pq <- ParquetFileReader$create(f)
+pq$GetSchema()
+if (codec_is_available("snappy")) {
+  # This file has compressed data columns
+  tab <- pq$ReadTable()
+  tab$schema
+}
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd
index e3024b91b7a..ff08c215853 100644
--- a/r/man/RecordBatch.Rd
+++ b/r/man/RecordBatch.Rd
@@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+batch <- record_batch(name = rownames(mtcars), mtcars)
+dim(batch)
+dim(head(batch))
+names(batch)
+batch$mpg
+batch[["cyl"]]
+as.data.frame(batch[4:8, c("gear", "hp", "wt")])
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd
index a206c30c8fb..90c796a6693 100644
--- a/r/man/RecordBatchReader.Rd
+++ b/r/man/RecordBatchReader.Rd
@@ -43,6 +43,43 @@ are in the file.
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
 for reading data from these formats and are sufficient for many use cases.
diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd
index cc6d2feb3ac..219c150e6a4 100644
--- a/r/man/RecordBatchWriter.Rd
+++ b/r/man/RecordBatchWriter.Rd
@@ -45,6 +45,43 @@ to be closed separately.
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
 interface for writing data to these formats and are sufficient for many use
diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd
index 9128988d11c..21e04c12e08 100644
--- a/r/man/Scalar.Rd
+++ b/r/man/Scalar.Rd
@@ -19,3 +19,20 @@ A \code{Scalar} holds a single value of an Arrow type.
 \verb{$type}: Scalar type
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+Scalar$create(pi)
+Scalar$create(404)
+# If you pass a vector into Scalar$create, you get a list containing your items
+Scalar$create(c(1, 2, 3))
+
+# Comparisons
+my_scalar <- Scalar$create(99)
+my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+
+my_scalar$ToString()
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd
index 0c66e5c2a42..6e385bb804e 100644
--- a/r/man/Schema.Rd
+++ b/r/man/Schema.Rd
@@ -74,3 +74,12 @@ Files with compressed metadata are readable by older versions of arrow, but
 the metadata is dropped.
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
+tab1 <- Table$create(df)
+tab1$schema
+tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32()))
+tab2$schema
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/Table.Rd b/r/man/Table.Rd
index d955b0f5a29..2675943e572 100644
--- a/r/man/Table.Rd
+++ b/r/man/Table.Rd
@@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tab <- Table$create(name = rownames(mtcars), mtcars)
+dim(tab)
+dim(head(tab))
+names(tab)
+tab$mpg
+tab[["cyl"]]
+as.data.frame(tab[4:8, c("gear", "hp", "wt")])
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/array.Rd b/r/man/array.Rd
index ed25a2b0a34..78d3eaff6ea 100644
--- a/r/man/array.Rd
+++ b/r/man/array.Rd
@@ -82,3 +82,26 @@ within the array's internal data. This can be an expensive check, potentially \c
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_array <- Array$create(1:10)
+my_array$type
+my_array$cast(int8())
+
+# Check if value is null; zero-indexed
+na_array <- Array$create(c(1:5, NA))
+na_array$IsNull(0)
+na_array$IsNull(5)
+na_array$IsValid(5)
+na_array$null_count
+
+# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+new_array <- na_array$Slice(5)
+new_array$offset
+
+# Compare 2 arrays
+na_array2 <- na_array
+na_array2 == na_array # element-wise comparison
+na_array2$Equals(na_array) # overall comparison
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd
index 99b636da3c7..a3ca1fc2fcb 100644
--- a/r/man/buffer.Rd
+++ b/r/man/buffer.Rd
@@ -33,3 +33,12 @@ contiguous memory with a particular size.
 }
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_buffer <- buffer(c(1, 2, 3, 4))
+my_buffer$is_mutable
+my_buffer$ZeroPadding()
+my_buffer$size
+my_buffer$capacity
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd
index 7e9b7e50ea0..bef89f10b18 100644
--- a/r/man/call_function.Rd
+++ b/r/man/call_function.Rd
@@ -35,6 +35,16 @@ are callable with an \code{arrow_} prefix.
 When passing indices in \code{...}, \code{args}, or \code{options}, express them as
 0-based integers (consistent with C++).
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+a <- Array$create(c(1L, 2L, 3L, NA, 5L))
+s <- Scalar$create(4L)
+call_function("fill_null", a, s)
+
+a <- Array$create(rnorm(10000))
+call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for
 the functions and their respective options.
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index 1b5e8278fa9..b3238ff1dca 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -18,3 +18,8 @@ Support for compression libraries depends on the build-time settings of
 the Arrow C++ library. This function lets you know which are available for
 use.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+codec_is_available("gzip")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd
index 75cc4405d8a..1b83703f19f 100644
--- a/r/man/copy_files.Rd
+++ b/r/man/copy_files.Rd
@@ -23,3 +23,13 @@ Nothing: called for side effects in the file system
 \description{
 Copy files between FileSystems
 }
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Copy an S3 bucket's files to a local directory:
+copy_files("s3://your-bucket-name", "local-directory")
+# Using a FileSystem object
+copy_files(s3_bucket("your-bucket-name"), "local-directory")
+# Or go the other way, from local to S3
+copy_files("local-directory", s3_bucket("your-bucket-name"))
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
index 101702a2fb2..a0631897573 100644
--- a/r/man/data-type.Rd
+++ b/r/man/data-type.Rd
@@ -150,6 +150,14 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c
 types, this conversion can be disabled (so that \code{int64} always yields a
 \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bool()
+struct(a = int32(), b = double())
+timestamp("ms", timezone = "CEST")
+time64("ns")
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
 }
diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd
index 39d5d8d0ae2..eef9f9157ea 100644
--- a/r/man/hive_partition.Rd
+++ b/r/man/hive_partition.Rd
@@ -28,3 +28,8 @@ Hive partitioning embeds field names and values in path segments, such as
 Because fields are named in the path segments, order of fields passed to
 \code{hive_partition()} does not matter.
 }
+\examples{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+hive_partition(year = int16(), month = int8())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index ba17688d833..4ca0e518f13 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -37,3 +37,10 @@ The package includes Arrow methods for many base R functions that can
 be called directly on Arrow objects, as well as some tidyverse-flavored versions
 available inside \code{dplyr} verbs.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+list_compute_functions()
+list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+list_compute_functions(pattern = "^is", invert = TRUE)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd
index 7e2000a9ca2..66d30f39147 100644
--- a/r/man/load_flight_server.Rd
+++ b/r/man/load_flight_server.Rd
@@ -15,3 +15,8 @@ to look in the \verb{inst/} directory for included modules.}
 \description{
 Load a Python Flight server
 }
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+load_flight_server("demo_flight_server")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd
index 21481af4c6b..c2343361c6e 100644
--- a/r/man/match_arrow.Rd
+++ b/r/man/match_arrow.Rd
@@ -26,3 +26,28 @@ per element of \code{x} it it is present in \code{table}.
 \code{base::match()} is not a generic, so we can't just define Arrow methods for
 it. This function exposes the analogous functions in the Arrow C++ library.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# note that the returned value is 0-indexed
+cars_tbl <- Table$create(name = rownames(mtcars), mtcars)
+match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+
+is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+
+# Although there are multiple matches, you are returned the index of the first
+# match, as with the base R equivalent
+match(4, mtcars$cyl) # 1-indexed
+match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+
+# If `x` contains multiple values, you are returned the indices of the first
+# match for each value.
+match(c(4, 6, 8), mtcars$cyl)
+match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+
+# Return type matches type of `x`
+is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 974d4286f59..53eade595be 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -90,6 +90,57 @@ can accelerate queries that only touch some partitions (files). Call
 \code{open_dataset()} to point to a directory of data files and return a
 \code{Dataset}, then use \code{dplyr} methods to query it.
 }
+\examples{
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+
+data <- dplyr::group_by(mtcars, cyl)
+write_dataset(data, tf)
+
+# You can specify a directory containing the files for your dataset and
+# open_dataset will scan all files in your directory.
+open_dataset(tf)
+
+# You can also supply a vector of paths
+open_dataset(c(file.path(tf, "cyl=4/part-1.parquet"), file.path(tf, "cyl=8/part-2.parquet")))
+
+## You must specify the file format if using a format other than parquet.
+tf2 <- tempfile()
+dir.create(tf2)
+on.exit(unlink(tf2))
+write_dataset(data, tf2, format = "ipc")
+# This line will results in errors when you try to work with the data
+\dontrun{
+open_dataset(tf2)
+}
+# This line will work
+open_dataset(tf2, format = "ipc")
+
+## You can specify file partitioning to include it as a field in your dataset
+# Create a temporary directory and write example dataset
+tf3 <- tempfile()
+dir.create(tf3)
+on.exit(unlink(tf3))
+write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+
+# View files - you can see the partitioning means that files have been written
+# to folders based on Month/Day values
+list.files(tf3, recursive = TRUE)
+
+# With no partitioning specified, dataset contains all files but doesn't include
+# directory names as field names
+open_dataset(tf3)
+
+# Now that partitioning has been specified, your dataset contains columns for Month and Day
+open_dataset(tf3, partitioning = c("Month", "Day"))
+
+# If you want to specify the data types for your fields, you can pass in a Schema
+open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{vignette("dataset", package = "arrow")}
 }
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index d9c80306931..30b146a4fee 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -205,3 +205,14 @@ Note that if you are specifying column names, whether by \code{schema} or
 to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
 }
 
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write.csv(mtcars, file = tf)
+df <- read_csv_arrow(tf)
+dim(df)
+# Can select columns
+df <- read_csv_arrow(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd
index fa18e3f7844..95f4d1d12c6 100644
--- a/r/man/read_feather.Rd
+++ b/r/man/read_feather.Rd
@@ -34,6 +34,17 @@ and to make sharing data across data analysis languages easy.
 This function reads both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_feather(mtcars, tf)
+df <- read_feather(tf)
+dim(df)
+# Can select columns
+df <- read_feather(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data.
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 476c99fe4de..53d7107ae81 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -38,3 +38,15 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
 \description{
 Using \link{JsonTableReader}
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+writeLines('
+    { "hello": 3.5, "world": false, "yo": "thing" }
+    { "hello": 3.25, "world": null }
+    { "hello": 0.0, "world": true, "yo": null }
+  ', tf, useBytes = TRUE)
+df <- read_json_arrow(tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd
index ffb2cf7109f..056e8644747 100644
--- a/r/man/read_parquet.Rd
+++ b/r/man/read_parquet.Rd
@@ -39,3 +39,12 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is
 '\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format.
 This function enables you to read Parquet files into R.
 }
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_parquet(mtcars, tf)
+df <- read_parquet(tf, col_select = starts_with("d"))
+head(df)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd
index 78d527a56c4..95a086deae5 100644
--- a/r/man/s3_bucket.Rd
+++ b/r/man/s3_bucket.Rd
@@ -21,3 +21,8 @@ are authorized to access the bucket's contents.
 that automatically detects the bucket's AWS region and holding onto the its
 relative path.
 }
+\examples{
+\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bucket <- s3_bucket("ursa-labs-taxi-data")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd
index c273a7520d5..ffde91f14f2 100644
--- a/r/man/to_duckdb.Rd
+++ b/r/man/to_duckdb.Rd
@@ -39,3 +39,22 @@ that starts with an Arrow object to use DuckDB to calculate the summarization
 step. Internally, this calls \code{to_duckdb()} with all of the default argument
 values.
 }
+\examples{
+\dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(dplyr)
+
+ds <- InMemoryDataset$create(mtcars)
+
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  to_duckdb() \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+
+# the same query can be simplified using .engine = "duckdb"
+ds \%>\%
+  filter(mpg < 30) \%>\%
+  group_by(cyl) \%>\%
+  summarize(mean_mpg = mean(mpg, na.rm = TRUE), .engine = "duckdb")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/type.Rd b/r/man/type.Rd
index 2f85e4a6ac6..d55bbe24bd5 100644
--- a/r/man/type.Rd
+++ b/r/man/type.Rd
@@ -15,3 +15,13 @@ an arrow logical type
 \description{
 infer the arrow Array type from an R vector
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+type(1:10)
+type(1L:10L)
+type(c(1, 1.5, 2))
+type(c("A", "B", "C"))
+type(mtcars)
+type(Sys.Date())
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd
index 709e33a5e74..50c80c2dda9 100644
--- a/r/man/unify_schemas.Rd
+++ b/r/man/unify_schemas.Rd
@@ -18,3 +18,10 @@ A \code{Schema} with the union of fields contained in the inputs, or
 \description{
 Combine and harmonize schemas
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+a <- schema(b = double(), c = bool())
+z <- schema(b = double(), k = utf8())
+unify_schemas(a, z)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index 139af8edc63..6ef77cd4727 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -16,3 +16,9 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
 \description{
 This function tabulates the values in the array and returns a table of counts.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+cyl_vals <- Array$create(mtcars$cyl)
+value_counts(cyl_vals)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd
index d6df2bcd08e..55a239ca998 100644
--- a/r/man/write_csv_arrow.Rd
+++ b/r/man/write_csv_arrow.Rd
@@ -23,3 +23,10 @@ the stream will be left open.
 \description{
 Write CSV file to disk
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_csv_arrow(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index 0cc8c591369..c6273b61be8 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -47,6 +47,13 @@ and to make sharing data across data analysis languages easy.
 This function writes both the original, limited specification of the format
 and the version 2 specification, which is the Apache Arrow IPC file format.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_feather(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data.
 
diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd
index 4f742ce9178..2f215f25fd7 100644
--- a/r/man/write_ipc_stream.Rd
+++ b/r/man/write_ipc_stream.Rd
@@ -31,6 +31,13 @@ with some nonstandard behavior, is deprecated. You should explicitly choose
 the function that will write the desired IPC format (stream or file) since
 either can be written to a file or \code{OutputStream}.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_ipc_stream(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
 \seealso{
 \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
 serialize data to a buffer.
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index 823a6038e84..d7147f7e8e6 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -94,3 +94,15 @@ The default "snappy" is used if available, otherwise "uncompressed". To
 disable compression, set \code{compression = "uncompressed"}.
 Note that "uncompressed" columns may still have dictionary encoding.
 }
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf1 <- tempfile(fileext = ".parquet")
+write_parquet(data.frame(x = 1:5), tf1)
+
+# using compression
+if (codec_is_available("gzip")) {
+  tf2 <- tempfile(fileext = ".gz.parquet")
+  write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
+}
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index 46af09a96e8..1f507e384c3 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -20,3 +20,10 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give
 This function wraps those so that you can serialize data to a buffer and
 access that buffer as a \code{raw} vector in R.
 }
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# The default format is "stream"
+write_to_raw(mtcars)
+write_to_raw(mtcars, format = "file")
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 19095a4cbde..92ddbae23fd 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1092,6 +1092,124 @@ extern "C" SEXP _arrow_io___CompressedInputStream__Make(SEXP codec_sexp, SEXP ra
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads);
+extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
+BEGIN_CPP11
+	arrow::r::Input<bool>::type use_threads(use_threads_sexp);
+	return cpp11::as_sexp(ExecPlan_create(use_threads));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecPlan_create(SEXP use_threads_sexp){
+	Rf_error("Cannot call ExecPlan_create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::Table> ExecPlan_run(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<compute::ExecNode>& final_node);
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type final_node(final_node_sexp);
+	return cpp11::as_sexp(ExecPlan_run(plan, final_node));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecPlan_run(SEXP plan_sexp, SEXP final_node_sexp){
+	Rf_error("Cannot call ExecPlan_run(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_DATASET)
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(const std::shared_ptr<compute::ExecPlan>& plan, const std::shared_ptr<arrow::dataset::Dataset>& dataset, const std::shared_ptr<compute::Expression>& filter, std::vector<std::string> materialized_field_names);
+extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::dataset::Dataset>&>::type dataset(dataset_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type filter(filter_sexp);
+	arrow::r::Input<std::vector<std::string>>::type materialized_field_names(materialized_field_names_sexp);
+	return cpp11::as_sexp(ExecNode_Scan(plan, dataset, filter, materialized_field_names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Scan(SEXP plan_sexp, SEXP dataset_sexp, SEXP filter_sexp, SEXP materialized_field_names_sexp){
+	Rf_error("Cannot call ExecNode_Scan(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(const std::shared_ptr<compute::ExecNode>& input, const std::shared_ptr<compute::Expression>& filter);
+extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type filter(filter_sexp);
+	return cpp11::as_sexp(ExecNode_Filter(input, filter));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Filter(SEXP input_sexp, SEXP filter_sexp){
+	Rf_error("Cannot call ExecNode_Filter(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_Project(const std::shared_ptr<compute::ExecNode>& input, const std::vector<std::shared_ptr<compute::Expression>>& exprs, std::vector<std::string> names);
+extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<const std::vector<std::shared_ptr<compute::Expression>>&>::type exprs(exprs_sexp);
+	arrow::r::Input<std::vector<std::string>>::type names(names_sexp);
+	return cpp11::as_sexp(ExecNode_Project(input, exprs, names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_Project(SEXP input_sexp, SEXP exprs_sexp, SEXP names_sexp){
+	Rf_error("Cannot call ExecNode_Project(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(const std::shared_ptr<compute::ExecNode>& input, cpp11::list options, std::vector<std::string> target_names, std::vector<std::string> out_field_names);
+extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<cpp11::list>::type options(options_sexp);
+	arrow::r::Input<std::vector<std::string>>::type target_names(target_names_sexp);
+	arrow::r::Input<std::vector<std::string>>::type out_field_names(out_field_names_sexp);
+	return cpp11::as_sexp(ExecNode_ScalarAggregate(input, options, target_names, out_field_names));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_ScalarAggregate(SEXP input_sexp, SEXP options_sexp, SEXP target_names_sexp, SEXP out_field_names_sexp){
+	Rf_error("Cannot call ExecNode_ScalarAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::ExecNode> ExecNode_GroupByAggregate(const std::shared_ptr<compute::ExecNode>& input, std::vector<std::string> group_vars, std::vector<std::string> agg_srcs, cpp11::list aggregations);
+extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecNode>&>::type input(input_sexp);
+	arrow::r::Input<std::vector<std::string>>::type group_vars(group_vars_sexp);
+	arrow::r::Input<std::vector<std::string>>::type agg_srcs(agg_srcs_sexp);
+	arrow::r::Input<cpp11::list>::type aggregations(aggregations_sexp);
+	return cpp11::as_sexp(ExecNode_GroupByAggregate(input, group_vars, agg_srcs, aggregations));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_ExecNode_GroupByAggregate(SEXP input_sexp, SEXP group_vars_sexp, SEXP agg_srcs_sexp, SEXP aggregations_sexp){
+	Rf_error("Cannot call ExecNode_GroupByAggregate(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -3123,16 +3241,16 @@ extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_l
 
 // expression.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name);
-extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+std::vector<std::string> field_names_in_expression(const std::shared_ptr<compute::Expression>& x);
+extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){
 BEGIN_CPP11
-	arrow::r::Input<std::string>::type name(name_sexp);
-	return cpp11::as_sexp(compute___expr__field_ref(name));
+	arrow::r::Input<const std::shared_ptr<compute::Expression>&>::type x(x_sexp);
+	return cpp11::as_sexp(field_names_in_expression(x));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
-	Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_field_names_in_expression(SEXP x_sexp){
+	Rf_error("Cannot call field_names_in_expression(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -3151,6 +3269,21 @@ extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){
 }
 #endif
 
+// expression.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name);
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+BEGIN_CPP11
+	arrow::r::Input<std::string>::type name(name_sexp);
+	return cpp11::as_sexp(compute___expr__field_ref(name));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){
+	Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // expression.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<compute::Expression> compute___expr__scalar(const std::shared_ptr<arrow::Scalar>& x);
@@ -7011,6 +7144,13 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, 
 		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, 
 		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, 
+		{ "_arrow_ExecPlan_create", (DL_FUNC) &_arrow_ExecPlan_create, 1}, 
+		{ "_arrow_ExecPlan_run", (DL_FUNC) &_arrow_ExecPlan_run, 2}, 
+		{ "_arrow_ExecNode_Scan", (DL_FUNC) &_arrow_ExecNode_Scan, 4}, 
+		{ "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, 
+		{ "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, 
+		{ "_arrow_ExecNode_ScalarAggregate", (DL_FUNC) &_arrow_ExecNode_ScalarAggregate, 4}, 
+		{ "_arrow_ExecNode_GroupByAggregate", (DL_FUNC) &_arrow_ExecNode_GroupByAggregate, 4}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
@@ -7142,8 +7282,9 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, 
 		{ "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, 
 		{ "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, 
-		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
+		{ "_arrow_field_names_in_expression", (DL_FUNC) &_arrow_field_names_in_expression, 1}, 
 		{ "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, 
+		{ "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, 
 		{ "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, 
 		{ "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, 
 		{ "_arrow_compute___expr__type", (DL_FUNC) &_arrow_compute___expr__type, 2}, 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index b5a8914d432..4ecb99174b5 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -47,6 +47,15 @@
 #include <arrow/type_fwd.h>
 #include <arrow/util/type_fwd.h>
 
+namespace arrow {
+namespace compute {
+
+class ExecPlan;
+class ExecNode;
+
+}  // namespace compute
+}  // namespace arrow
+
 #if defined(ARROW_R_WITH_PARQUET)
 #include <parquet/type_fwd.h>
 #endif
@@ -60,6 +69,7 @@ namespace fs = ::arrow::fs;
 
 std::shared_ptr<arrow::RecordBatch> RecordBatch__from_arrays(SEXP, SEXP);
 arrow::MemoryPool* gc_memory_pool();
+arrow::compute::ExecContext* gc_context();
 
 #if (R_VERSION < R_Version(3, 5, 0))
 #define LOGICAL_RO(x) ((const int*)LOGICAL(x))
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
new file mode 100644
index 00000000000..61a79bf462e
--- /dev/null
+++ b/r/src/compute-exec.cpp
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./arrow_types.h"
+
+#if defined(ARROW_R_WITH_ARROW)
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/exec_plan.h>
+#include <arrow/compute/exec/expression.h>
+#include <arrow/table.h>
+#include <arrow/util/future.h>
+#include <arrow/util/thread_pool.h>
+
+#include <iostream>
+
+namespace compute = ::arrow::compute;
+
+std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
+                                                               cpp11::list options);
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecPlan> ExecPlan_create(bool use_threads) {
+  static compute::ExecContext threaded_context{gc_memory_pool(),
+                                               arrow::internal::GetCpuThreadPool()};
+  auto plan = ValueOrStop(
+      compute::ExecPlan::Make(use_threads ? &threaded_context : gc_context()));
+  return plan;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> ExecPlan_run(
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    const std::shared_ptr<compute::ExecNode>& final_node) {
+  // For now, don't require R to construct SinkNodes.
+  // Instead, just pass the node we should collect as an argument.
+  auto sink_gen = compute::MakeSinkNode(final_node.get(), "sink");
+
+  StopIfNotOk(plan->Validate());
+  StopIfNotOk(plan->StartProducing());
+
+  std::shared_ptr<arrow::RecordBatchReader> sink_reader = compute::MakeGeneratorReader(
+      final_node->output_schema(), std::move(sink_gen), gc_memory_pool());
+
+  plan->finished().Wait();
+  return ValueOrStop(arrow::Table::FromRecordBatchReader(sink_reader.get()));
+}
+
+std::shared_ptr<compute::ExecNode> ExecNodeOrStop(
+    arrow::Result<compute::ExecNode*> maybe_node) {
+  return std::shared_ptr<compute::ExecNode>(ValueOrStop(maybe_node), [](...) {
+    // empty destructor: ExecNode lifetime is managed by an ExecPlan
+  });
+}
+
+#if defined(ARROW_R_WITH_DATASET)
+
+#include <arrow/dataset/scanner.h>
+
+// [[dataset::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Scan(
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    const std::shared_ptr<arrow::dataset::Dataset>& dataset,
+    const std::shared_ptr<compute::Expression>& filter,
+    std::vector<std::string> materialized_field_names) {
+  // TODO: pass in FragmentScanOptions
+  auto options = std::make_shared<arrow::dataset::ScanOptions>();
+
+  options->use_async = true;
+
+  options->dataset_schema = dataset->schema();
+
+  // ScanNode needs the filter to do predicate pushdown and skip partitions
+  options->filter = ValueOrStop(filter->Bind(*dataset->schema()));
+
+  // ScanNode needs to know which fields to materialize (and which are unnecessary)
+  std::vector<compute::Expression> exprs;
+  for (const auto& name : materialized_field_names) {
+    exprs.push_back(compute::field_ref(name));
+  }
+
+  options->projection =
+      ValueOrStop(call("make_struct", std::move(exprs),
+                       compute::MakeStructOptions{std::move(materialized_field_names)})
+                      .Bind(*dataset->schema()));
+
+  return ExecNodeOrStop(arrow::dataset::MakeScanNode(plan.get(), dataset, options));
+}
+
+#endif
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Filter(
+    const std::shared_ptr<compute::ExecNode>& input,
+    const std::shared_ptr<compute::Expression>& filter) {
+  return ExecNodeOrStop(
+      compute::MakeFilterNode(input.get(), /*label=*/"filter", *filter));
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_Project(
+    const std::shared_ptr<compute::ExecNode>& input,
+    const std::vector<std::shared_ptr<compute::Expression>>& exprs,
+    std::vector<std::string> names) {
+  // We have shared_ptrs of expressions but need the Expressions
+  std::vector<compute::Expression> expressions;
+  for (auto expr : exprs) {
+    expressions.push_back(*expr);
+  }
+  return ExecNodeOrStop(compute::MakeProjectNode(
+      input.get(), /*label=*/"project", std::move(expressions), std::move(names)));
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_ScalarAggregate(
+    const std::shared_ptr<compute::ExecNode>& input, cpp11::list options,
+    std::vector<std::string> target_names, std::vector<std::string> out_field_names) {
+  std::vector<arrow::compute::internal::Aggregate> aggregates;
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+
+  for (cpp11::list name_opts : options) {
+    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(name, name_opts[1]);
+
+    aggregates.push_back(
+        arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+    keep_alives.push_back(std::move(opts));
+  }
+
+  std::vector<arrow::FieldRef> targets;
+  for (auto&& name : target_names) {
+    targets.emplace_back(std::move(name));
+  }
+  return ExecNodeOrStop(compute::MakeScalarAggregateNode(
+      input.get(), /*label=*/"scalar_agg", std::move(aggregates), std::move(targets),
+      std::move(out_field_names)));
+}
+
+// [[arrow::export]]
+std::shared_ptr<compute::ExecNode> ExecNode_GroupByAggregate(
+    const std::shared_ptr<compute::ExecNode>& input, std::vector<std::string> group_vars,
+    std::vector<std::string> agg_srcs, cpp11::list aggregations) {
+  std::vector<arrow::compute::internal::Aggregate> aggs;
+  std::vector<std::shared_ptr<arrow::compute::FunctionOptions>> keep_alives;
+
+  for (cpp11::list name_opts : aggregations) {
+    auto name = cpp11::as_cpp<std::string>(name_opts[0]);
+    auto opts = make_compute_options(name, name_opts[1]);
+
+    aggs.push_back(arrow::compute::internal::Aggregate{std::move(name), opts.get()});
+    keep_alives.push_back(std::move(opts));
+  }
+
+  return ExecNodeOrStop(compute::MakeGroupByNode(input.get(), /*label=*/"group_agg",
+                                                 /*keys=*/std::move(group_vars),
+                                                 std::move(agg_srcs), std::move(aggs)));
+}
+
+// Result<ExecNode*> MakeGroupByNode(ExecNode* input, std::string label,
+//                                   std::vector<std::string> keys,
+//                                   std::vector<std::string> agg_srcs,
+//                                   std::vector<internal::Aggregate> aggs);
+#endif
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index 4b671cb99dd..3fcba46e911 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -44,8 +44,14 @@ std::shared_ptr<compute::Expression> compute___expr__call(std::string func_name,
 }
 
 // [[arrow::export]]
-std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name) {
-  return std::make_shared<compute::Expression>(compute::field_ref(std::move(name)));
+std::vector<std::string> field_names_in_expression(
+    const std::shared_ptr<compute::Expression>& x) {
+  std::vector<std::string> out;
+  auto field_refs = FieldsInExpression(*x);
+  for (auto f : field_refs) {
+    out.push_back(*f.name());
+  }
+  return out;
 }
 
 // [[arrow::export]]
@@ -57,6 +63,11 @@ std::string compute___expr__get_field_ref_name(
   return "";
 }
 
+// [[arrow::export]]
+std::shared_ptr<compute::Expression> compute___expr__field_ref(std::string name) {
+  return std::make_shared<compute::Expression>(compute::field_ref(std::move(name)));
+}
+
 // [[arrow::export]]
 std::shared_ptr<compute::Expression> compute___expr__scalar(
     const std::shared_ptr<arrow::Scalar>& x) {
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 4711cacfcd0..1a71fea86c7 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -638,12 +638,15 @@ test_that("Creating UnionDataset", {
 test_that("map_batches", {
   skip_if_not_available("parquet")
   ds <- open_dataset(dataset_dir, partitioning = "part")
-  expect_equivalent(
-    ds %>%
-      filter(int > 5) %>%
-      select(int, lgl) %>%
-      map_batches(~ summarize(., min_int = min(int))),
-    tibble(min_int = c(6L, 101L))
+  expect_warning(
+    expect_equivalent(
+      ds %>%
+        filter(int > 5) %>%
+        select(int, lgl) %>%
+        map_batches(~ summarize(., min_int = min(int))),
+      tibble(min_int = c(6L, 101L))
+    ),
+    "pulling data into R" # ARROW-13502
   )
 })
 
@@ -986,17 +989,6 @@ test_that("dplyr method not implemented messages", {
     "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.",
     fixed = TRUE
   )
-  # One explicit test of the full message
-  expect_error(
-    ds %>% summarize(mean(int)),
-    "summarize() is not currently implemented for Arrow Datasets. Call collect() first to pull data into R.",
-    fixed = TRUE
-  )
-  # Helper for everything else
-  expect_not_implemented <- function(x) {
-    expect_error(x, "is not currently implemented for Arrow Datasets")
-  }
-  expect_not_implemented(ds %>% filter(int == 1) %>% summarize(n()))
 })
 
 test_that("Dataset and query print methods", {
diff --git a/r/tests/testthat/test-dplyr-aggregate.R b/r/tests/testthat/test-dplyr-aggregate.R
new file mode 100644
index 00000000000..8235ef29948
--- /dev/null
+++ b/r/tests/testthat/test-dplyr-aggregate.R
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+test_that("summarize", {
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int)),
+    tbl,
+    warning = TRUE
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      summarize(min_int = min(int) / 2),
+    tbl,
+    warning = TRUE
+  )
+})
+
+test_that("Can aggregate in Arrow", {
+  expect_dplyr_equal(
+    input %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      summarize(total = sum(int)) %>%
+      collect(),
+    tbl,
+    # ARROW-13497: This is failing because the default is na.rm = FALSE
+    warning = TRUE
+  )
+})
+
+test_that("Group by sum on dataset", {
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int * 4, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl,
+    # ARROW-13497: This is failing because the default is na.rm = FALSE
+    warning = TRUE
+  )
+})
+
+test_that("Group by any/all", {
+  withr::local_options(list(arrow.debug = TRUE))
+
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(all(lgl, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  # ARROW-13497: na.rm option also is not being passed/received to any/all
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(any(has_words, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  expect_dplyr_equal(
+    input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(all(has_words, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+  skip("This seems to be calling base::nchar")
+  expect_dplyr_equal(
+    input %>%
+      group_by(some_grouping) %>%
+      summarize(has_words = all(nchar(verses) < 0)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Filter and aggregate", {
+  expect_dplyr_equal(
+    input %>%
+      filter(some_grouping == 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 5) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(some_grouping == 2) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      filter(int > 5) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+})
diff --git a/r/tests/testthat/test-dplyr-group-by.R b/r/tests/testthat/test-dplyr-group-by.R
index fe0394bc636..18be2a9304a 100644
--- a/r/tests/testthat/test-dplyr-group-by.R
+++ b/r/tests/testthat/test-dplyr-group-by.R
@@ -29,7 +29,8 @@ test_that("group_by groupings are recorded", {
       select(int, chr) %>%
       filter(int > 5) %>%
       summarize(min_int = min(int)),
-    tbl
+    tbl,
+    warning = TRUE
   )
 })
 
@@ -62,7 +63,8 @@ test_that("ungroup", {
       ungroup() %>%
       filter(int > 5) %>%
       summarize(min_int = min(int)),
-    tbl
+    tbl,
+    warning = TRUE
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R
index da21ccd9ed1..ed03c58a884 100644
--- a/r/tests/testthat/test-dplyr.R
+++ b/r/tests/testthat/test-dplyr.R
@@ -69,24 +69,6 @@ See $.data for the source Arrow object',
   )
 })
 
-test_that("summarize", {
-  expect_dplyr_equal(
-    input %>%
-      select(int, chr) %>%
-      filter(int > 5) %>%
-      summarize(min_int = min(int)),
-    tbl
-  )
-
-  expect_dplyr_equal(
-    input %>%
-      select(int, chr) %>%
-      filter(int > 5) %>%
-      summarize(min_int = min(int) / 2),
-    tbl
-  )
-})
-
 test_that("Empty select returns no columns", {
   expect_dplyr_equal(
     input %>% select() %>% collect(),
@@ -1054,7 +1036,7 @@ test_that("log functions", {
 
   expect_error(
     nse_funcs$log(Expression$scalar(x), base = 5),
-    "`base` values other than exp(1), 2 and 10 not supported in Arrow",
+    "`base` values other than exp(1), 2 and 10 not supported by Arrow",
     fixed = TRUE
   )
 

From c9d20e494efc89a6dbe87197b73c356a97024980 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 4 Aug 2021 15:35:44 -0400
Subject: [PATCH 708/719] ARROW-13507: [R] LTO job on CRAN fails

A test to see if we can (for now) build r-debug before using it

Closes #10849 from jonkeane/ARROW-13507-r-lto

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 dev/tasks/tasks.yml | 3 ++-
 r/DESCRIPTION       | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c6c26a367ef..99d6082ca99 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1014,7 +1014,8 @@ tasks:
                                  ("rstudio", "r-base", "3.6-bionic"),
                                  ("rstudio", "r-base", "3.6-centos8"),
                                  ("rstudio", "r-base", "3.6-opensuse15"),
-                                 ("rstudio", "r-base", "3.6-opensuse42")] %}
+                                 ("rstudio", "r-base", "3.6-opensuse42"),
+                                 ("rhub", "debian-gcc-devel-lto", "latest")] %}
   test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}:
     ci: azure
     template: r/azure.linux.yml
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 3d10aa4745e..78eec3631a1 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -116,3 +116,4 @@ Collate:
     'reexports-tidyselect.R'
     'schema.R'
     'util.R'
+UseLTO: false

From 3a8506ddc024a8fabc6508b4069f2001263dc6b1 Mon Sep 17 00:00:00 2001
From: Nic <thisisnic@gmail.com>
Date: Wed, 4 Aug 2021 15:44:43 -0400
Subject: [PATCH 709/719] ARROW-13519: [R] Make doc examples less noisy

Closes #10851 from thisisnic/ARROW-13519_noisy_docs

Lead-authored-by: Nic <thisisnic@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Co-authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/R/compute.R                   | 7 +++----
 r/R/dataset.R                   | 2 +-
 r/R/ipc_stream.R                | 3 +--
 r/man/list_compute_functions.Rd | 5 ++---
 r/man/open_dataset.Rd           | 2 +-
 r/man/value_counts.Rd           | 2 +-
 r/man/write_to_raw.Rd           | 3 +--
 7 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/r/R/compute.R b/r/R/compute.R
index 5c27e206d7e..8cfaaf7b415 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -89,9 +89,8 @@ call_function <- function(function_name, ..., args = list(...), options = empty_
 #' @param ... Additional parameters passed to `grep()`
 #' @return A character vector of available Arrow C++ function names
 #' @examplesIf arrow_available()
-#' list_compute_functions()
-#' list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
-#' list_compute_functions(pattern = "^is", invert = TRUE)
+#' available_funcs <- list_compute_functions()
+#' utf8_funcs <- list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 #' @export
 list_compute_functions <- function(pattern = NULL, ...) {
   funcs <- compute__GetFunctionNames()
@@ -284,7 +283,7 @@ is_in <- function(x, table, ...) {
 #' `Int64`.
 #' @examplesIf arrow_available()
 #' cyl_vals <- Array$create(mtcars$cyl)
-#' value_counts(cyl_vals)
+#' counts <- value_counts(cyl_vals)
 #' @export
 value_counts <- function(x) {
   call_function("value_counts", x)
diff --git a/r/R/dataset.R b/r/R/dataset.R
index eb7cfaf4f44..072a0f3ae96 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -118,7 +118,7 @@
 #'
 #' # View files - you can see the partitioning means that files have been written
 #' # to folders based on Month/Day values
-#' list.files(tf3, recursive = TRUE)
+#' tf3_files <- list.files(tf3, recursive = TRUE)
 #'
 #' # With no partitioning specified, dataset contains all files but doesn't include
 #' # directory names as field names
diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R
index 1e101d84e3c..c45d1de6e33 100644
--- a/r/R/ipc_stream.R
+++ b/r/R/ipc_stream.R
@@ -68,8 +68,7 @@ write_ipc_stream <- function(x, sink, ...) {
 #' @return A `raw` vector containing the bytes of the IPC serialized data.
 #' @examplesIf arrow_available()
 #' # The default format is "stream"
-#' write_to_raw(mtcars)
-#' write_to_raw(mtcars, format = "file")
+#' mtcars_raw <- write_to_raw(mtcars)
 #' @export
 write_to_raw <- function(x, format = c("stream", "file")) {
   sink <- BufferOutputStream$create()
diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd
index 4ca0e518f13..45e0338368e 100644
--- a/r/man/list_compute_functions.Rd
+++ b/r/man/list_compute_functions.Rd
@@ -39,8 +39,7 @@ available inside \code{dplyr} verbs.
 }
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-list_compute_functions()
-list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
-list_compute_functions(pattern = "^is", invert = TRUE)
+available_funcs <- list_compute_functions()
+utf8_funcs <- list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
 \dontshow{\}) # examplesIf}
 }
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 53eade595be..59717863932 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -128,7 +128,7 @@ write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FA
 
 # View files - you can see the partitioning means that files have been written
 # to folders based on Month/Day values
-list.files(tf3, recursive = TRUE)
+tf3_files <- list.files(tf3, recursive = TRUE)
 
 # With no partitioning specified, dataset contains all files but doesn't include
 # directory names as field names
diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd
index 6ef77cd4727..7e64d1550cf 100644
--- a/r/man/value_counts.Rd
+++ b/r/man/value_counts.Rd
@@ -19,6 +19,6 @@ This function tabulates the values in the array and returns a table of counts.
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 cyl_vals <- Array$create(mtcars$cyl)
-value_counts(cyl_vals)
+counts <- value_counts(cyl_vals)
 \dontshow{\}) # examplesIf}
 }
diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd
index 1f507e384c3..a3c6e324b54 100644
--- a/r/man/write_to_raw.Rd
+++ b/r/man/write_to_raw.Rd
@@ -23,7 +23,6 @@ access that buffer as a \code{raw} vector in R.
 \examples{
 \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # The default format is "stream"
-write_to_raw(mtcars)
-write_to_raw(mtcars, format = "file")
+mtcars_raw <- write_to_raw(mtcars)
 \dontshow{\}) # examplesIf}
 }

From fe2457e67c6346341bde575351bf27589e79a7d8 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 4 Aug 2021 15:45:31 -0400
Subject: [PATCH 710/719] ARROW-13399: [R] Update dataset.Rmd vignette

Various updates to dataset.Rmd including:
* separating out dense text chunks
* rephrasing based on suggestions by Grammarly to simplify phrasing
* rephrasing "we" to "you"

Closes #10765 from thisisnic/ARROW_13399_dataset_vignette

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Nic <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
---
 r/.Rbuildignore         |   1 +
 r/STYLE.md              |  38 ++++++++
 r/vignettes/dataset.Rmd | 200 ++++++++++++++++++++++------------------
 3 files changed, 151 insertions(+), 88 deletions(-)
 create mode 100644 r/STYLE.md

diff --git a/r/.Rbuildignore b/r/.Rbuildignore
index 2f4cea9a34d..3f67ef7cf3c 100644
--- a/r/.Rbuildignore
+++ b/r/.Rbuildignore
@@ -24,4 +24,5 @@ clang_format.sh
 ^apache-arrow.rb$
 ^.*\.Rhistory$
 ^extra-tests
+STYLE.md
 ^.lintr
diff --git a/r/STYLE.md b/r/STYLE.md
new file mode 100644
index 00000000000..760084936a4
--- /dev/null
+++ b/r/STYLE.md
@@ -0,0 +1,38 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Style
+
+This is a style guide to writing documentation for arrow.
+
+## Coding style
+
+Please use the [tidyverse coding style](https://style.tidyverse.org/).
+
+## Referring to external packages
+
+When referring to external packages, include a link to the package at the first mention, and subsequently refer to it in plain text, e.g.
+
+* "The arrow R package provides a [dplyr](https://dplyr.tidyverse.org/) interface to Arrow Datasets.  This vignette introduces Datasets and shows how to use dplyr to analyze them."
+
+## Data frames
+
+When referring to the concept, use the phrase "data frame", whereas when referring to an object of that class or when the class is important, write `data.frame`, e.g.
+
+* "You can call `write_dataset()` on tabular data objects such as Arrow Tables or RecordBatches, or R data frames. If working with data frames you might want to use a `tibble` instead of a `data.frame` to take advantage of the default behaviour of partitioning data based on grouped variables."
diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd
index b5e17578b29..3f33cbae47c 100644
--- a/r/vignettes/dataset.Rmd
+++ b/r/vignettes/dataset.Rmd
@@ -8,46 +8,46 @@ vignette: >
 ---
 
 Apache Arrow lets you work efficiently with large, multi-file datasets.
-The `arrow` R package provides a `dplyr` interface to Arrow Datasets,
-as well as other tools for interactive exploration of Arrow data.
+The arrow R package provides a [dplyr](https://dplyr.tidyverse.org/) interface to Arrow Datasets,
+and other tools for interactive exploration of Arrow data.
 
-This vignette introduces Datasets and shows how to use `dplyr` to analyze them.
-It describes both what is possible to do with Arrow now
-and what is on the immediate development roadmap.
+This vignette introduces Datasets and shows how to use dplyr to analyze them.
 
 ## Example: NYC taxi data
 
 The [New York City taxi trip record data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
 is widely used in big data exercises and competitions.
 For demonstration purposes, we have hosted a Parquet-formatted version
-of about 10 years of the trip data in a public Amazon S3 bucket.
+of about ten years of the trip data in a public Amazon S3 bucket.
 
 The total file size is around 37 gigabytes, even in the efficient Parquet file
-format. That's bigger than memory on most people's computers, so we can't just
+format. That's bigger than memory on most people's computers, so you can't just
 read it all in and stack it into a single data frame.
 
-In Windows and macOS binary packages, S3 support is included.
-On Linux when installing from source, S3 support is not enabled by default,
+In Windows (for R > 3.6) and macOS binary packages, S3 support is included.
+On Linux, when installing from source, S3 support is not enabled by default,
 and it has additional system requirements.
 See `vignette("install", package = "arrow")` for details.
-To see if your `arrow` installation has S3 support, run
+To see if your arrow installation has S3 support, run:
 
 ```{r}
 arrow::arrow_with_s3()
 ```
 
-Even with S3 support enabled network, speed will be a bottleneck unless your
+Even with S3 support enabled, network speed will be a bottleneck unless your
 machine is located in the same AWS region as the data. So, for this vignette,
-we assume that the NYC taxi dataset has been downloaded locally in a "nyc-taxi"
+we assume that the NYC taxi dataset has been downloaded locally in an "nyc-taxi"
 directory.
 
-If your `arrow` build has S3 support, you can sync the data locally with:
+### Retrieving data from a public Amazon S3 bucket
+
+If your arrow build has S3 support, you can sync the data locally with:
 
 ```{r, eval = FALSE}
 arrow::copy_files("s3://ursa-labs-taxi-data", "nyc-taxi")
 ```
 
-If your `arrow` build doesn't have S3 support, you can download the files
+If your arrow build doesn't have S3 support, you can download the files
 with some additional code:
 
 ```{r, eval = FALSE}
@@ -77,39 +77,51 @@ feel free to grab only a year or two of data.
 
 If you don't have the taxi data downloaded, the vignette will still run and will
 yield previously cached output for reference. To be explicit about which version
-is running, let's check whether we're running with live data:
+is running, let's check whether you're running with live data:
 
 ```{r}
 dir.exists("nyc-taxi")
 ```
 
-## Getting started
+## Opening the dataset
 
-Because `dplyr` is not necessary for many Arrow workflows,
+Because dplyr is not necessary for many Arrow workflows,
 it is an optional (`Suggests`) dependency. So, to work with Datasets,
-we need to load both `arrow` and `dplyr`.
+you need to load both arrow and dplyr.
 
 ```{r}
 library(arrow, warn.conflicts = FALSE)
 library(dplyr, warn.conflicts = FALSE)
 ```
 
-The first step is to create our Dataset object, pointing at the directory of data.
+The first step is to create a Dataset object, pointing at the directory of data.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds <- open_dataset("nyc-taxi", partitioning = c("year", "month"))
 ```
 
-The default file format for `open_dataset()` is Parquet; if we had a directory
-of Arrow format files, we could include `format = "arrow"` in the call.
-Other supported formats include: `"feather"` (an alias for `"arrow"`, as Feather
-v2 is the Arrow file format), `"csv"`, `"tsv"` (for tab-delimited), and `"text"`
-for generic text-delimited files. For text files, you can pass any parsing
-options (`delim`, `quote`, etc.) to `open_dataset()` that you would otherwise
-pass to `read_csv_arrow()`.
+The file format for `open_dataset()` is controlled by the `format` parameter, 
+which has a default value of `"parquet"`.  If you had a directory
+of Arrow format files, you could instead specify `format = "arrow"` in the call.
+
+Other supported formats include: 
+
+* `"feather"` or `"ipc"` (aliases for `"arrow"`, as Feather v2 is the Arrow file format)
+* `"csv"` (comma-delimited files) and `"tsv"` (tab-delimited files)
+* `"text"` (generic text-delimited files - use the `delimiter` argument to specify which to use)
+
+For text files, you can pass the following parsing options to `open_dataset()`:
 
-The `partitioning` argument lets us specify how the file paths provide information
-about how the dataset is chunked into different files. Our files in this example
+* `delim`
+* `quote`
+* `escape_double`
+* `escape_backslash`
+* `skip_empty_rows`
+
+For more information on the usage of these parameters, see `?read_delim_arrow()`.
+
+The `partitioning` argument lets you specify how the file paths provide information
+about how the dataset is chunked into different files. The files in this example
 have file paths like
 
 ```
@@ -118,13 +130,13 @@ have file paths like
 ...
 ```
 
-By providing a character vector to `partitioning`, we're saying that the first
-path segment gives the value for `year` and the second segment is `month`.
+By providing `c("year", "month")` to the `partitioning` argument, you're saying that the first
+path segment gives the value for `year`, and the second segment is `month`.
 Every row in `2009/01/data.parquet` has a value of 2009 for `year`
-and 1 for `month`, even though those columns may not actually be present in the file.
+and 1 for `month`, even though those columns may not be present in the file.
 
-Indeed, when we look at the dataset, we see that in addition to the columns present
-in every file, there are also columns `year` and `month`.
+Indeed, when you look at the dataset, you can see that in addition to the columns present
+in every file, there are also columns `year` and `month` even though they are not present in the files themselves.
 
 ```{r, eval = file.exists("nyc-taxi")}
 ds
@@ -159,7 +171,7 @@ See $metadata for additional Schema metadata
 
 The other form of partitioning currently supported is [Hive](https://hive.apache.org/)-style,
 in which the partition variable names are included in the path segments.
-If we had saved our files in paths like
+If you had saved your files in paths like:
 
 ```
 year=2009/month=01/data.parquet
@@ -167,29 +179,29 @@ year=2009/month=02/data.parquet
 ...
 ```
 
-we would not have had to provide the names in `partitioning`:
-we could have just called `ds <- open_dataset("nyc-taxi")` and the partitions
+you would not have had to provide the names in `partitioning`;
+you could have just called `ds <- open_dataset("nyc-taxi")` and the partitions
 would have been detected automatically.
 
 ## Querying the dataset
 
-Up to this point, we haven't loaded any data: we have walked directories to find
-files, we've parsed file paths to identify partitions, and we've read the
-headers of the Parquet files to inspect their schemas so that we can make sure
-they all line up.
+Up to this point, you haven't loaded any data. You've walked directories to find
+files, you've parsed file paths to identify partitions, and you've read the
+headers of the Parquet files to inspect their schemas so that you can make sure
+they all are as expected.
 
-In the current release, `arrow` supports the dplyr verbs `mutate()`, 
+In the current release, arrow supports the dplyr verbs `mutate()`, 
 `transmute()`, `select()`, `rename()`, `relocate()`, `filter()`, and 
 `arrange()`. Aggregation is not yet supported, so before you call `summarise()`
 or other verbs with aggregate functions, use `collect()` to pull the selected
 subset of the data into an in-memory R data frame.
 
-If you attempt to call unsupported `dplyr` verbs or unimplemented functions in
-your query on an Arrow Dataset, the `arrow` package raises an error. However,
-for `dplyr` queries on `Table` objects (which are typically smaller in size) the
-package automatically calls `collect()` before processing that `dplyr` verb.
+Suppose you attempt to call unsupported dplyr verbs or unimplemented functions
+in your query on an Arrow Dataset. In that case, the arrow package raises an error. However,
+for dplyr queries on Arrow Table objects (which are already in memory), the
+package automatically calls `collect()` before processing that dplyr verb.
 
-Here's an example. Suppose I was curious about tipping behavior among the
+Here's an example: suppose that you are curious about tipping behavior among the
 longest taxi rides. Let's find the median tip percentage for rides with
 fares greater than $100 in 2015, broken down by the number of passengers:
 
@@ -228,12 +240,11 @@ cat("
 ")
 ```
 
-We just selected a subset out of a dataset with around 2 billion rows, computed
-a new column, and aggregated on it in under 2 seconds on my laptop. How does
+You've just selected a subset out of a dataset with around 2 billion rows, computed
+a new column, and aggregated it in under 2 seconds on a modern laptop. How does
 this work?
 
-First, 
-`mutate()`/`transmute()`, `select()`/`rename()`/`relocate()`, `filter()`, 
+First, `mutate()`/`transmute()`, `select()`/`rename()`/`relocate()`, `filter()`, 
 `group_by()`, and `arrange()` record their actions but don't evaluate on the
 data until you run `collect()`.
 
@@ -259,47 +270,58 @@ See $.data for the source Arrow object
 ")
 ```
 
-This returns instantly and shows the manipulations you've made, without
+This code returns an output instantly and shows the manipulations you've made, without
 loading data from the files. Because the evaluation of these queries is deferred,
 you can build up a query that selects down to a small subset without generating
 intermediate datasets that would potentially be large.
 
 Second, all work is pushed down to the individual data files,
 and depending on the file format, chunks of data within the files. As a result,
-we can select a subset of data from a much larger dataset by collecting the
-smaller slices from each file--we don't have to load the whole dataset in memory
-in order to slice from it.
+you can select a subset of data from a much larger dataset by collecting the
+smaller slices from each file—you don't have to load the whole dataset in 
+memory to slice from it.
 
-Third, because of partitioning, we can ignore some files entirely.
+Third, because of partitioning, you can ignore some files entirely.
 In this example, by filtering `year == 2015`, all files corresponding to other years
-are immediately excluded: we don't have to load them in order to find that no
+are immediately excluded: you don't have to load them in order to find that no
 rows match the filter. Relatedly, since Parquet files contain row groups with
-statistics on the data within, there may be entire chunks of data we can
+statistics on the data within, there may be entire chunks of data you can
 avoid scanning because they have no rows where `total_amount > 100`.
 
 ## More dataset options
 
 There are a few ways you can control the Dataset creation to adapt to special use cases.
-For one, if you are working with a single file or a set of files that are not
-all in the same directory, you can provide a file path or a vector of multiple
-file paths to `open_dataset()`. This is useful if, for example, you have a
-single CSV file that is too big to read into memory. You could pass the file
-path to `open_dataset()`, use `group_by()` to partition the Dataset into
-manageable chunks, then use `write_dataset()` to write each chunk to a separate
-Parquet file---all without needing to read the full CSV file into R.
-
-You can specify a `schema` argument to `open_dataset()` to declare the columns
-and their data types. This is useful if you have data files that have different
-storage schema (for example, a column could be `int32` in one and `int8` in another)
-and you want to ensure that the resulting Dataset has a specific type.
-To be clear, it's not necessary to specify a schema, even in this example of
-mixed integer types, because the Dataset constructor will reconcile differences like these.
-The schema specification just lets you declare what you want the result to be.
+
+### Work with files in a directory
+
+If you are working with a single file or a set of files that are not all in the 
+same directory, you can provide a file path or a vector of multiple file paths 
+to `open_dataset()`. This is useful if, for example, you have a single CSV file 
+that is too big to read into memory. You could pass the file path to 
+`open_dataset()`, use `group_by()` to partition the Dataset into manageable chunks, 
+then use `write_dataset()` to write each chunk to a separate Parquet file—all 
+without needing to read the full CSV file into R.
+
+### Explicitly declare column names and data types
+
+You can specify the `schema` argument to `open_dataset()` to declare the columns 
+and their data types. This is useful if you have data files that have different 
+storage schema (for example, a column could be `int32` in one and `int8` in 
+another) and you want to ensure that the resulting Dataset has a specific type.
+
+To be clear, it's not necessary to specify a schema, even in this example of 
+mixed integer types, because the Dataset constructor will reconcile differences
+like these. The schema specification just lets you declare what you want the 
+result to be.
+
+### Explicitly declare partition format
 
 Similarly, you can provide a Schema in the `partitioning` argument of `open_dataset()`
 in order to declare the types of the virtual columns that define the partitions.
-This would be useful, in our taxi dataset example, if you wanted to keep
-`month` as a string instead of an integer for some reason.
+This would be useful, in the taxi dataset example, if you wanted to keep
+`month` as a string instead of an integer.
+
+### Work with multiple data sources
 
 Another feature of Datasets is that they can be composed of multiple data sources.
 That is, you may have a directory of partitioned Parquet files in one location,
@@ -313,27 +335,29 @@ instead of a file path, or simply concatenate them like `big_dataset <- c(ds1, d
 
 As you can see, querying a large dataset can be made quite fast by storage in an
 efficient binary columnar format like Parquet or Feather and partitioning based on
-columns commonly used for filtering. However, we don't always get our data delivered
-to us that way. Sometimes we start with one giant CSV. Our first step in analyzing data
+columns commonly used for filtering. However, data isn't always stored that way.
+Sometimes you might start with one giant CSV. The first step in analyzing data 
 is cleaning is up and reshaping it into a more usable form.
 
-The `write_dataset()` function allows you to take a Dataset or other tabular data object---an Arrow `Table` or `RecordBatch`, or an R `data.frame`---and write it to a different file format, partitioned into multiple files.
+The `write_dataset()` function allows you to take a Dataset or another tabular 
+data object—an Arrow Table or RecordBatch, or an R data frame—and write
+it to a different file format, partitioned into multiple files.
 
-Assume we have a version of the NYC Taxi data as CSV:
+Assume that you have a version of the NYC Taxi data as CSV:
 
 ```r
 ds <- open_dataset("nyc-taxi/csv/", format = "csv")
 ```
 
-We can write it to a new location and translate the files to the Feather format
+You can write it to a new location and translate the files to the Feather format
 by calling `write_dataset()` on it:
 
 ```r
 write_dataset(ds, "nyc-taxi/feather", format = "feather")
 ```
 
-Next, let's imagine that the `payment_type` column is something we often filter
-on, so we want to partition the data by that variable. By doing so we ensure
+Next, let's imagine that the `payment_type` column is something you often filter
+on, so you want to partition the data by that variable. By doing so you ensure
 that a filter like `payment_type == "Cash"` will touch only a subset of files
 where `payment_type` is always `"Cash"`.
 
@@ -367,14 +391,14 @@ system("tree nyc-taxi/feather")
 
 Note that the directory names are `payment_type=Cash` and similar:
 this is the Hive-style partitioning described above. This means that when
-we call `open_dataset()` on this directory, we don't have to declare what the
+you call `open_dataset()` on this directory, you don't have to declare what the
 partitions are because they can be read from the file paths.
 (To instead write bare values for partition segments, i.e. `Cash` rather than 
 `payment_type=Cash`, call `write_dataset()` with `hive_style = FALSE`.)
 
-Perhaps, though, `payment_type == "Cash"` is the only data we ever care about,
-and we just want to drop the rest and have a smaller working set.
-For this, we can `filter()` them out when writing:
+Perhaps, though, `payment_type == "Cash"` is the only data you ever care about,
+and you just want to drop the rest and have a smaller working set.
+For this, you can `filter()` them out when writing:
 
 ```r
 ds %>%
@@ -382,9 +406,9 @@ ds %>%
   write_dataset("nyc-taxi/feather", format = "feather")
 ```
 
-The other thing we can do when writing datasets is select a subset of and/or reorder
-columns. Suppose we never care about `vendor_id`, and being a string column,
-it can take up a lot of space when we read it in, so let's drop it:
+The other thing you can do when writing datasets is select a subset of columns 
+or reorder them. Suppose you never care about `vendor_id`, and being a string column,
+it can take up a lot of space when you read it in, so let's drop it:
 
 ```r
 ds %>%

From d6ae99cdeadcb14a32617843cf2088a3e6190a67 Mon Sep 17 00:00:00 2001
From: kharoc <kharoly.cs@gmail.com>
Date: Wed, 4 Aug 2021 16:50:51 -0400
Subject: [PATCH 711/719] ARROW-13089: [Python] Allow creating RecordBatch from
 Python dict

Create a from_pydict function in RecordBatch class.
Create unit test for from_pydict

Closes #10854 from kharoc/ARROW-13089

Authored-by: kharoc <kharoly.cs@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 python/pyarrow/table.pxi           | 104 +++++++++++++++++++++--------
 python/pyarrow/tests/test_table.py |  27 +++++---
 2 files changed, 94 insertions(+), 37 deletions(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 65f1ba11dc9..d92bdb2efa3 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -616,6 +616,30 @@ cdef class RecordBatch(_PandasConvertible):
         self.sp_batch = batch
         self.batch = batch.get()
 
+    @staticmethod
+    def from_pydict(mapping, schema=None, metadata=None):
+        """
+        Construct a RecordBatch from Arrow arrays or columns.
+
+        Parameters
+        ----------
+        mapping : dict or Mapping
+            A mapping of strings to Arrays or Python lists.
+        schema : Schema, default None
+            If not passed, will be inferred from the Mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        RecordBatch
+        """
+
+        return _from_pydict(cls=RecordBatch,
+                            mapping=mapping,
+                            schema=schema,
+                            metadata=metadata)
+
     def __reduce__(self):
         return _reconstruct_record_batch, (self.columns, self.schema)
 
@@ -1631,33 +1655,11 @@ cdef class Table(_PandasConvertible):
         -------
         Table
         """
-        arrays = []
-        if schema is None:
-            names = []
-            for k, v in mapping.items():
-                names.append(k)
-                arrays.append(asarray(v))
-            return Table.from_arrays(arrays, names, metadata=metadata)
-        elif isinstance(schema, Schema):
-            for field in schema:
-                try:
-                    v = mapping[field.name]
-                except KeyError:
-                    try:
-                        v = mapping[tobytes(field.name)]
-                    except KeyError:
-                        present = mapping.keys()
-                        missing = [n for n in schema.names if n not in present]
-                        raise KeyError(
-                            "The passed mapping doesn't contain the "
-                            "following field(s) of the schema: {}".
-                            format(', '.join(missing))
-                        )
-                arrays.append(asarray(v, type=field.type))
-            # Will raise if metadata is not None
-            return Table.from_arrays(arrays, schema=schema, metadata=metadata)
-        else:
-            raise TypeError('Schema must be an instance of pyarrow.Schema')
+
+        return _from_pydict(cls=Table,
+                            mapping=mapping,
+                            schema=schema,
+                            metadata=metadata)
 
     @staticmethod
     def from_batches(batches, Schema schema=None):
@@ -2272,3 +2274,51 @@ def concat_tables(tables, c_bool promote=False, MemoryPool memory_pool=None):
             ConcatenateTables(c_tables, options, pool))
 
     return pyarrow_wrap_table(c_result_table)
+
+
+def _from_pydict(cls, mapping, schema, metadata):
+    """
+    Construct a Table/RecordBatch from Arrow arrays or columns.
+
+    Parameters
+    ----------
+    cls : Class Table/RecordBatch
+    mapping : dict or Mapping
+        A mapping of strings to Arrays or Python lists.
+    schema : Schema, default None
+        If not passed, will be inferred from the Mapping values.
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if inferred).
+
+    Returns
+    -------
+    Table/RecordBatch
+    """
+
+    arrays = []
+    if schema is None:
+        names = []
+        for k, v in mapping.items():
+            names.append(k)
+            arrays.append(asarray(v))
+        return cls.from_arrays(arrays, names, metadata=metadata)
+    elif isinstance(schema, Schema):
+        for field in schema:
+            try:
+                v = mapping[field.name]
+            except KeyError:
+                try:
+                    v = mapping[tobytes(field.name)]
+                except KeyError:
+                    present = mapping.keys()
+                    missing = [n for n in schema.names if n not in present]
+                    raise KeyError(
+                        "The passed mapping doesn't contain the "
+                        "following field(s) of the schema: {}".
+                        format(', '.join(missing))
+                    )
+            arrays.append(asarray(v, type=field.type))
+        # Will raise if metadata is not None
+        return cls.from_arrays(arrays, schema=schema, metadata=metadata)
+    else:
+        raise TypeError('Schema must be an instance of pyarrow.Schema')
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 7ba844aa809..72bb8ef2d99 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1339,8 +1339,15 @@ def test_from_arrays_schema(data, klass):
         pa.Table.from_arrays(data, schema=schema, metadata={b'foo': b'bar'})
 
 
-def test_table_from_pydict():
-    table = pa.Table.from_pydict({})
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_from_pydict(cls):
+    table = cls.from_pydict({})
     assert table.num_columns == 0
     assert table.num_rows == 0
     assert table.schema == pa.schema([])
@@ -1351,7 +1358,7 @@ def test_table_from_pydict():
     # With lists as values
     data = OrderedDict([('strs', ['', 'foo', 'bar']),
                         ('floats', [4.5, 5, None])])
-    table = pa.Table.from_pydict(data)
+    table = cls.from_pydict(data)
     assert table.num_columns == 2
     assert table.num_rows == 3
     assert table.schema == schema
@@ -1360,29 +1367,29 @@ def test_table_from_pydict():
     # With metadata and inferred schema
     metadata = {b'foo': b'bar'}
     schema = schema.with_metadata(metadata)
-    table = pa.Table.from_pydict(data, metadata=metadata)
+    table = cls.from_pydict(data, metadata=metadata)
     assert table.schema == schema
     assert table.schema.metadata == metadata
     assert table.to_pydict() == data
 
     # With explicit schema
-    table = pa.Table.from_pydict(data, schema=schema)
+    table = cls.from_pydict(data, schema=schema)
     assert table.schema == schema
     assert table.schema.metadata == metadata
     assert table.to_pydict() == data
 
     # Cannot pass both schema and metadata
     with pytest.raises(ValueError):
-        pa.Table.from_pydict(data, schema=schema, metadata=metadata)
+        cls.from_pydict(data, schema=schema, metadata=metadata)
 
     # Non-convertible values given schema
     with pytest.raises(TypeError):
-        pa.Table.from_pydict({'c0': [0, 1, 2]},
-                             schema=pa.schema([("c0", pa.string())]))
+        cls.from_pydict({'c0': [0, 1, 2]},
+                        schema=pa.schema([("c0", pa.string())]))
 
     # Missing schema fields from the passed mapping
     with pytest.raises(KeyError, match="doesn\'t contain.* c, d"):
-        pa.Table.from_pydict(
+        cls.from_pydict(
             {'a': [1, 2, 3], 'b': [3, 4, 5]},
             schema=pa.schema([
                 ('a', pa.int64()),
@@ -1393,7 +1400,7 @@ def test_table_from_pydict():
 
     # Passed wrong schema type
     with pytest.raises(TypeError):
-        pa.Table.from_pydict({'a': [1, 2, 3]}, schema={})
+        cls.from_pydict({'a': [1, 2, 3]}, schema={})
 
 
 @pytest.mark.parametrize('data, klass', [

From 9ec77d4d9975c7ad2ff85f01e1e93a0700594e20 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 4 Aug 2021 16:12:52 -0500
Subject: [PATCH 712/719] ARROW-13538: [R] [CI] Don't test DuckDB in the
 minimal build

Also request the correct version of duckdb now that it's been released.

Closes #10861 from jonkeane/ARROW-13538-gate-duckdb-tests

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .github/workflows/r.yml        |  3 ++-
 ci/scripts/r_deps.sh           |  4 ++--
 r/DESCRIPTION                  |  2 +-
 r/R/duckdb.R                   | 10 ++++++++++
 r/tests/testthat/test-duckdb.R |  1 +
 5 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 10db0bc4249..9a2fcf5daec 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -247,7 +247,8 @@ jobs:
         run: |
           Sys.setenv(
             RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "libarrow.zip"),
-            MAKEFLAGS = paste0("-j", parallel::detectCores())
+            MAKEFLAGS = paste0("-j", parallel::detectCores()),
+            "_R_CHECK_FORCE_SUGGESTS_" = FALSE
           )
           rcmdcheck::rcmdcheck("r",
             build_args = '--no-build-vignettes',
diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh
index 379ee945559..243a7efc9cf 100755
--- a/ci/scripts/r_deps.sh
+++ b/ci/scripts/r_deps.sh
@@ -28,10 +28,10 @@ pushd ${source_dir}
 ${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))"
 
 if [ ${R_BIN} = "RDsan" ]; then
-  # To prevent the build from timing out, let's prune some optional deps
+  # To prevent the build from timing out, let's prune some optional deps (and their possible version requirements)
   ${R_BIN} -e 'd <- read.dcf("DESCRIPTION")
   to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate")
-  pattern <- paste0("\\n?", to_prune, ",?", collapse = "|")
+  pattern <- paste0("\\n?", to_prune, " (\\\\(.*\\\\))?,?", collapse = "|")
   d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"])
   write.dcf(d, "DESCRIPTION")'
 fi
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 78eec3631a1..b531f75643f 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -45,7 +45,7 @@ Suggests:
     decor,
     distro,
     dplyr,
-    duckdb,
+    duckdb (>= 0.2.8),
     hms,
     knitr,
     lubridate,
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index bc003a6ea8f..c57edf0df23 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -89,6 +89,16 @@ arrow_duck_connection <- function() {
   con
 }
 
+# helper function to determine if duckdb examples should run
+# see: https://github.com/r-lib/roxygen2/issues/1242
+run_duckdb_examples <- function() {
+  arrow_with_dataset() &&
+    requireNamespace("duckdb", quietly = TRUE) &&
+    packageVersion("duckdb") > "0.2.7" &&
+    requireNamespace("dplyr", quietly = TRUE) &&
+    requireNamespace("dbplyr", quietly = TRUE)
+}
+
 # Adapted from dbplyr
 unique_arrow_tablename <- function() {
   i <- getOption("arrow_table_name", 0) + 1
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index ff468a799ec..aec54d8eed4 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -17,6 +17,7 @@
 
 skip_if_not_installed("duckdb", minimum_version = "0.2.8")
 skip_if_not_installed("dbplyr")
+skip_if_not_available("dataset")
 library(duckdb)
 library(dplyr)
 

From fab0d70880f76bf0cea5d8681021b03eabacff21 Mon Sep 17 00:00:00 2001
From: Nate Clark <nate@neworld.us>
Date: Thu, 5 Aug 2021 06:35:46 +0900
Subject: [PATCH 713/719] ARROW-13556: [C++] Add protobuf to linking for flight

Closes #10873 from n3world/ARROW-13556_link_protobuf

Authored-by: Nate Clark <nate@neworld.us>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/flight/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 4e46243b18d..5429a23672a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -19,7 +19,7 @@ add_custom_target(arrow_flight)
 
 arrow_install_all_headers("arrow/flight")
 
-set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++)
+set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF})
 
 if(WIN32)
   list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)

From cbcf5cbc705c55469ad047cbe0dded083676d7b7 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 4 Aug 2021 18:38:54 -0500
Subject: [PATCH 714/719] ARROW-13562: [R] Styler followups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds styling tasks to the Makefile (for 🦖  like me; I found that the styling-on-save from vscode was not reliable). Also makes codegen.R generate styled R code.

Closes #10879 from nealrichardson/styler2

Lead-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .github/workflows/comment_bot.yml |    2 +-
 r/.Rbuildignore                   |    1 +
 r/.styler_excludes.R              |   18 +
 r/Makefile                        |    8 +-
 r/R/arrowExports.R                | 1782 ++++++++++++++---------------
 r/R/dplyr-functions.R             |    4 +-
 r/R/dplyr-summarize.R             |    2 +-
 r/R/duckdb.R                      |    8 +-
 r/data-raw/codegen.R              |    7 +-
 r/extra-tests/write-files.R       |    1 -
 r/tests/testthat/test-duckdb.R    |    2 +
 r/tools/nixlibs.R                 |   12 +-
 r/vignettes/developing.Rmd        |   18 +-
 13 files changed, 944 insertions(+), 921 deletions(-)
 create mode 100644 r/.styler_excludes.R

diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 5709ceaffd6..35d889152fb 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -133,7 +133,7 @@ jobs:
           # only grab the .R files under r/
           changed_files <- grep('^r/.*\\.R$', changed_files, value = TRUE)
           # remove latin1 which is unstylable due to encoding and codegen.R which is unique
-          changed_files <- changed_files[!changed_files %in% c("r/tests/testthat/latin1.R", "r/data-raw/codegen.R")]
+          changed_files <- changed_files[!changed_files %in% file.path("r", source("r/.styler_excludes.R")$value)]
           source("ci/etc/rprofile")
           install.packages(c("remotes", "styler"))
           remotes::install_deps("r")
diff --git a/r/.Rbuildignore b/r/.Rbuildignore
index 3f67ef7cf3c..4bead75ea7e 100644
--- a/r/.Rbuildignore
+++ b/r/.Rbuildignore
@@ -26,3 +26,4 @@ clang_format.sh
 ^extra-tests
 STYLE.md
 ^.lintr
+^.styler_excludes.R
diff --git a/r/.styler_excludes.R b/r/.styler_excludes.R
new file mode 100644
index 00000000000..19cd1ffa550
--- /dev/null
+++ b/r/.styler_excludes.R
@@ -0,0 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+c("tests/testthat/latin1.R", "data-raw/codegen.R")
\ No newline at end of file
diff --git a/r/Makefile b/r/Makefile
index efc55abf0ae..7a51cbd5188 100644
--- a/r/Makefile
+++ b/r/Makefile
@@ -19,7 +19,13 @@ VERSION=$(shell grep ^Version DESCRIPTION | sed s/Version:\ //)
 ARROW_R_DEV="TRUE"
 ARROW_LARGE_MEMORY_TESTS=$(ARROW_R_DEV)
 
-doc:
+style:
+	R -s -e 'setwd(".."); if (requireNamespace("styler")) styler::style_file(setdiff(system("git diff --name-only | grep r/.*R$$", intern = TRUE), file.path("r", source("r/.styler_excludes.R")$$value)))'
+
+style-all:
+	R -s -e 'styler::style_file(setdiff(dir(pattern = "R$$", recursive = TRUE), source(".styler_excludes.R")$$value))'
+
+doc: style
 	R -s -e 'roxygen2::roxygenize()'
 	-git add --all man/*.Rd
 
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 268a17ef4f4..73f3ba67de6 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1,1784 +1,1782 @@
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
-is_altrep_int_nonull <- function(x){
-    .Call(`_arrow_is_altrep_int_nonull`, x)
+is_altrep_int_nonull <- function(x) {
+  .Call(`_arrow_is_altrep_int_nonull`, x)
 }
 
-is_altrep_dbl_nonull <- function(x){
-    .Call(`_arrow_is_altrep_dbl_nonull`, x)
+is_altrep_dbl_nonull <- function(x) {
+  .Call(`_arrow_is_altrep_dbl_nonull`, x)
 }
 
-Array__Slice1 <- function(array, offset){
-    .Call(`_arrow_Array__Slice1`, array, offset)
+Array__Slice1 <- function(array, offset) {
+  .Call(`_arrow_Array__Slice1`, array, offset)
 }
 
-Array__Slice2 <- function(array, offset, length){
-    .Call(`_arrow_Array__Slice2`, array, offset, length)
+Array__Slice2 <- function(array, offset, length) {
+  .Call(`_arrow_Array__Slice2`, array, offset, length)
 }
 
-Array__IsNull <- function(x, i){
-    .Call(`_arrow_Array__IsNull`, x, i)
+Array__IsNull <- function(x, i) {
+  .Call(`_arrow_Array__IsNull`, x, i)
 }
 
-Array__IsValid <- function(x, i){
-    .Call(`_arrow_Array__IsValid`, x, i)
+Array__IsValid <- function(x, i) {
+  .Call(`_arrow_Array__IsValid`, x, i)
 }
 
-Array__length <- function(x){
-    .Call(`_arrow_Array__length`, x)
+Array__length <- function(x) {
+  .Call(`_arrow_Array__length`, x)
 }
 
-Array__offset <- function(x){
-    .Call(`_arrow_Array__offset`, x)
+Array__offset <- function(x) {
+  .Call(`_arrow_Array__offset`, x)
 }
 
-Array__null_count <- function(x){
-    .Call(`_arrow_Array__null_count`, x)
+Array__null_count <- function(x) {
+  .Call(`_arrow_Array__null_count`, x)
 }
 
-Array__type <- function(x){
-    .Call(`_arrow_Array__type`, x)
+Array__type <- function(x) {
+  .Call(`_arrow_Array__type`, x)
 }
 
-Array__ToString <- function(x){
-    .Call(`_arrow_Array__ToString`, x)
+Array__ToString <- function(x) {
+  .Call(`_arrow_Array__ToString`, x)
 }
 
-Array__type_id <- function(x){
-    .Call(`_arrow_Array__type_id`, x)
+Array__type_id <- function(x) {
+  .Call(`_arrow_Array__type_id`, x)
 }
 
-Array__Equals <- function(lhs, rhs){
-    .Call(`_arrow_Array__Equals`, lhs, rhs)
+Array__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Equals`, lhs, rhs)
 }
 
-Array__ApproxEquals <- function(lhs, rhs){
-    .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+Array__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
 }
 
-Array__Diff <- function(lhs, rhs){
-    .Call(`_arrow_Array__Diff`, lhs, rhs)
+Array__Diff <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Diff`, lhs, rhs)
 }
 
-Array__data <- function(array){
-    .Call(`_arrow_Array__data`, array)
+Array__data <- function(array) {
+  .Call(`_arrow_Array__data`, array)
 }
 
-Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx){
-    .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
+Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) {
+  .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
 }
 
-Array__View <- function(array, type){
-    .Call(`_arrow_Array__View`, array, type)
+Array__View <- function(array, type) {
+  .Call(`_arrow_Array__View`, array, type)
 }
 
-Array__Validate <- function(array){
-    invisible(.Call(`_arrow_Array__Validate`, array))
+Array__Validate <- function(array) {
+  invisible(.Call(`_arrow_Array__Validate`, array))
 }
 
-DictionaryArray__indices <- function(array){
-    .Call(`_arrow_DictionaryArray__indices`, array)
+DictionaryArray__indices <- function(array) {
+  .Call(`_arrow_DictionaryArray__indices`, array)
 }
 
-DictionaryArray__dictionary <- function(array){
-    .Call(`_arrow_DictionaryArray__dictionary`, array)
+DictionaryArray__dictionary <- function(array) {
+  .Call(`_arrow_DictionaryArray__dictionary`, array)
 }
 
-StructArray__field <- function(array, i){
-    .Call(`_arrow_StructArray__field`, array, i)
+StructArray__field <- function(array, i) {
+  .Call(`_arrow_StructArray__field`, array, i)
 }
 
-StructArray__GetFieldByName <- function(array, name){
-    .Call(`_arrow_StructArray__GetFieldByName`, array, name)
+StructArray__GetFieldByName <- function(array, name) {
+  .Call(`_arrow_StructArray__GetFieldByName`, array, name)
 }
 
-StructArray__Flatten <- function(array){
-    .Call(`_arrow_StructArray__Flatten`, array)
+StructArray__Flatten <- function(array) {
+  .Call(`_arrow_StructArray__Flatten`, array)
 }
 
-ListArray__value_type <- function(array){
-    .Call(`_arrow_ListArray__value_type`, array)
+ListArray__value_type <- function(array) {
+  .Call(`_arrow_ListArray__value_type`, array)
 }
 
-LargeListArray__value_type <- function(array){
-    .Call(`_arrow_LargeListArray__value_type`, array)
+LargeListArray__value_type <- function(array) {
+  .Call(`_arrow_LargeListArray__value_type`, array)
 }
 
-ListArray__values <- function(array){
-    .Call(`_arrow_ListArray__values`, array)
+ListArray__values <- function(array) {
+  .Call(`_arrow_ListArray__values`, array)
 }
 
-LargeListArray__values <- function(array){
-    .Call(`_arrow_LargeListArray__values`, array)
+LargeListArray__values <- function(array) {
+  .Call(`_arrow_LargeListArray__values`, array)
 }
 
-ListArray__value_length <- function(array, i){
-    .Call(`_arrow_ListArray__value_length`, array, i)
+ListArray__value_length <- function(array, i) {
+  .Call(`_arrow_ListArray__value_length`, array, i)
 }
 
-LargeListArray__value_length <- function(array, i){
-    .Call(`_arrow_LargeListArray__value_length`, array, i)
+LargeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_length`, array, i)
 }
 
-FixedSizeListArray__value_length <- function(array, i){
-    .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
+FixedSizeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
 }
 
-ListArray__value_offset <- function(array, i){
-    .Call(`_arrow_ListArray__value_offset`, array, i)
+ListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_ListArray__value_offset`, array, i)
 }
 
-LargeListArray__value_offset <- function(array, i){
-    .Call(`_arrow_LargeListArray__value_offset`, array, i)
+LargeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_offset`, array, i)
 }
 
-FixedSizeListArray__value_offset <- function(array, i){
-    .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
+FixedSizeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
 }
 
-ListArray__raw_value_offsets <- function(array){
-    .Call(`_arrow_ListArray__raw_value_offsets`, array)
+ListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_ListArray__raw_value_offsets`, array)
 }
 
-LargeListArray__raw_value_offsets <- function(array){
-    .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
+LargeListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
 }
 
-Array__as_vector <- function(array){
-    .Call(`_arrow_Array__as_vector`, array)
+Array__as_vector <- function(array) {
+  .Call(`_arrow_Array__as_vector`, array)
 }
 
-ChunkedArray__as_vector <- function(chunked_array, use_threads){
-    .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
+ChunkedArray__as_vector <- function(chunked_array, use_threads) {
+  .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
 }
 
-RecordBatch__to_dataframe <- function(batch, use_threads){
-    .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+RecordBatch__to_dataframe <- function(batch, use_threads) {
+  .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
 }
 
-Table__to_dataframe <- function(table, use_threads){
-    .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+Table__to_dataframe <- function(table, use_threads) {
+  .Call(`_arrow_Table__to_dataframe`, table, use_threads)
 }
 
-ArrayData__get_type <- function(x){
-    .Call(`_arrow_ArrayData__get_type`, x)
+ArrayData__get_type <- function(x) {
+  .Call(`_arrow_ArrayData__get_type`, x)
 }
 
-ArrayData__get_length <- function(x){
-    .Call(`_arrow_ArrayData__get_length`, x)
+ArrayData__get_length <- function(x) {
+  .Call(`_arrow_ArrayData__get_length`, x)
 }
 
-ArrayData__get_null_count <- function(x){
-    .Call(`_arrow_ArrayData__get_null_count`, x)
+ArrayData__get_null_count <- function(x) {
+  .Call(`_arrow_ArrayData__get_null_count`, x)
 }
 
-ArrayData__get_offset <- function(x){
-    .Call(`_arrow_ArrayData__get_offset`, x)
+ArrayData__get_offset <- function(x) {
+  .Call(`_arrow_ArrayData__get_offset`, x)
 }
 
-ArrayData__buffers <- function(x){
-    .Call(`_arrow_ArrayData__buffers`, x)
+ArrayData__buffers <- function(x) {
+  .Call(`_arrow_ArrayData__buffers`, x)
 }
 
-Buffer__is_mutable <- function(buffer){
-    .Call(`_arrow_Buffer__is_mutable`, buffer)
+Buffer__is_mutable <- function(buffer) {
+  .Call(`_arrow_Buffer__is_mutable`, buffer)
 }
 
-Buffer__ZeroPadding <- function(buffer){
-    invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
+Buffer__ZeroPadding <- function(buffer) {
+  invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
 }
 
-Buffer__capacity <- function(buffer){
-    .Call(`_arrow_Buffer__capacity`, buffer)
+Buffer__capacity <- function(buffer) {
+  .Call(`_arrow_Buffer__capacity`, buffer)
 }
 
-Buffer__size <- function(buffer){
-    .Call(`_arrow_Buffer__size`, buffer)
+Buffer__size <- function(buffer) {
+  .Call(`_arrow_Buffer__size`, buffer)
 }
 
-r___RBuffer__initialize <- function(x){
-    .Call(`_arrow_r___RBuffer__initialize`, x)
+r___RBuffer__initialize <- function(x) {
+  .Call(`_arrow_r___RBuffer__initialize`, x)
 }
 
-Buffer__data <- function(buffer){
-    .Call(`_arrow_Buffer__data`, buffer)
+Buffer__data <- function(buffer) {
+  .Call(`_arrow_Buffer__data`, buffer)
 }
 
-Buffer__Equals <- function(x, y){
-    .Call(`_arrow_Buffer__Equals`, x, y)
+Buffer__Equals <- function(x, y) {
+  .Call(`_arrow_Buffer__Equals`, x, y)
 }
 
-ChunkedArray__length <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__length`, chunked_array)
+ChunkedArray__length <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__length`, chunked_array)
 }
 
-ChunkedArray__null_count <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
+ChunkedArray__null_count <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
 }
 
-ChunkedArray__num_chunks <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
+ChunkedArray__num_chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
 }
 
-ChunkedArray__chunk <- function(chunked_array, i){
-    .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
+ChunkedArray__chunk <- function(chunked_array, i) {
+  .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
 }
 
-ChunkedArray__chunks <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
+ChunkedArray__chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
 }
 
-ChunkedArray__type <- function(chunked_array){
-    .Call(`_arrow_ChunkedArray__type`, chunked_array)
+ChunkedArray__type <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__type`, chunked_array)
 }
 
-ChunkedArray__Slice1 <- function(chunked_array, offset){
-    .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
+ChunkedArray__Slice1 <- function(chunked_array, offset) {
+  .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
 }
 
-ChunkedArray__Slice2 <- function(chunked_array, offset, length){
-    .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
+ChunkedArray__Slice2 <- function(chunked_array, offset, length) {
+  .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
 }
 
-ChunkedArray__View <- function(array, type){
-    .Call(`_arrow_ChunkedArray__View`, array, type)
+ChunkedArray__View <- function(array, type) {
+  .Call(`_arrow_ChunkedArray__View`, array, type)
 }
 
-ChunkedArray__Validate <- function(chunked_array){
-    invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
+ChunkedArray__Validate <- function(chunked_array) {
+  invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
 }
 
-ChunkedArray__Equals <- function(x, y){
-    .Call(`_arrow_ChunkedArray__Equals`, x, y)
+ChunkedArray__Equals <- function(x, y) {
+  .Call(`_arrow_ChunkedArray__Equals`, x, y)
 }
 
-ChunkedArray__ToString <- function(x){
-    .Call(`_arrow_ChunkedArray__ToString`, x)
+ChunkedArray__ToString <- function(x) {
+  .Call(`_arrow_ChunkedArray__ToString`, x)
 }
 
-ChunkedArray__from_list <- function(chunks, s_type){
-    .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
+ChunkedArray__from_list <- function(chunks, s_type) {
+  .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
 }
 
-util___Codec__Create <- function(codec, compression_level){
-    .Call(`_arrow_util___Codec__Create`, codec, compression_level)
+util___Codec__Create <- function(codec, compression_level) {
+  .Call(`_arrow_util___Codec__Create`, codec, compression_level)
 }
 
-util___Codec__name <- function(codec){
-    .Call(`_arrow_util___Codec__name`, codec)
+util___Codec__name <- function(codec) {
+  .Call(`_arrow_util___Codec__name`, codec)
 }
 
-util___Codec__IsAvailable <- function(codec){
-    .Call(`_arrow_util___Codec__IsAvailable`, codec)
+util___Codec__IsAvailable <- function(codec) {
+  .Call(`_arrow_util___Codec__IsAvailable`, codec)
 }
 
-io___CompressedOutputStream__Make <- function(codec, raw){
-    .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+io___CompressedOutputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
 }
 
-io___CompressedInputStream__Make <- function(codec, raw){
-    .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+io___CompressedInputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
 }
 
-ExecPlan_create <- function(use_threads){
-    .Call(`_arrow_ExecPlan_create`, use_threads)
+ExecPlan_create <- function(use_threads) {
+  .Call(`_arrow_ExecPlan_create`, use_threads)
 }
 
-ExecPlan_run <- function(plan, final_node){
-    .Call(`_arrow_ExecPlan_run`, plan, final_node)
+ExecPlan_run <- function(plan, final_node) {
+  .Call(`_arrow_ExecPlan_run`, plan, final_node)
 }
 
-ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names){
-    .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names)
+ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names) {
+  .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names)
 }
 
-ExecNode_Filter <- function(input, filter){
-    .Call(`_arrow_ExecNode_Filter`, input, filter)
+ExecNode_Filter <- function(input, filter) {
+  .Call(`_arrow_ExecNode_Filter`, input, filter)
 }
 
-ExecNode_Project <- function(input, exprs, names){
-    .Call(`_arrow_ExecNode_Project`, input, exprs, names)
+ExecNode_Project <- function(input, exprs, names) {
+  .Call(`_arrow_ExecNode_Project`, input, exprs, names)
 }
 
-ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_names){
-    .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names)
+ExecNode_ScalarAggregate <- function(input, options, target_names, out_field_names) {
+  .Call(`_arrow_ExecNode_ScalarAggregate`, input, options, target_names, out_field_names)
 }
 
-ExecNode_GroupByAggregate <- function(input, group_vars, agg_srcs, aggregations){
-    .Call(`_arrow_ExecNode_GroupByAggregate`, input, group_vars, agg_srcs, aggregations)
+ExecNode_GroupByAggregate <- function(input, group_vars, agg_srcs, aggregations) {
+  .Call(`_arrow_ExecNode_GroupByAggregate`, input, group_vars, agg_srcs, aggregations)
 }
 
-RecordBatch__cast <- function(batch, schema, options){
-    .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
+RecordBatch__cast <- function(batch, schema, options) {
+  .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
 
-Table__cast <- function(table, schema, options){
-    .Call(`_arrow_Table__cast`, table, schema, options)
+Table__cast <- function(table, schema, options) {
+  .Call(`_arrow_Table__cast`, table, schema, options)
 }
 
-compute__CallFunction <- function(func_name, args, options){
-    .Call(`_arrow_compute__CallFunction`, func_name, args, options)
+compute__CallFunction <- function(func_name, args, options) {
+  .Call(`_arrow_compute__CallFunction`, func_name, args, options)
 }
 
-compute__GroupBy <- function(arguments, keys, options){
-    .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
+compute__GroupBy <- function(arguments, keys, options) {
+  .Call(`_arrow_compute__GroupBy`, arguments, keys, options)
 }
 
-compute__GetFunctionNames <- function(){
-    .Call(`_arrow_compute__GetFunctionNames`)
+compute__GetFunctionNames <- function() {
+  .Call(`_arrow_compute__GetFunctionNames`)
 }
 
-build_info <- function(){
-    .Call(`_arrow_build_info`)
+build_info <- function() {
+  .Call(`_arrow_build_info`)
 }
 
-runtime_info <- function(){
-    .Call(`_arrow_runtime_info`)
+runtime_info <- function() {
+  .Call(`_arrow_runtime_info`)
 }
 
-csv___WriteOptions__initialize <- function(options){
-    .Call(`_arrow_csv___WriteOptions__initialize`, options)
+csv___WriteOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___WriteOptions__initialize`, options)
 }
 
-csv___ReadOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ReadOptions__initialize`, options)
+csv___ReadOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__initialize`, options)
 }
 
-csv___ParseOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ParseOptions__initialize`, options)
+csv___ParseOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ParseOptions__initialize`, options)
 }
 
-csv___ReadOptions__column_names <- function(options){
-    .Call(`_arrow_csv___ReadOptions__column_names`, options)
+csv___ReadOptions__column_names <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__column_names`, options)
 }
 
-csv___ConvertOptions__initialize <- function(options){
-    .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+csv___ConvertOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ConvertOptions__initialize`, options)
 }
 
-csv___TableReader__Make <- function(input, read_options, parse_options, convert_options){
-    .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
+  .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
 }
 
-csv___TableReader__Read <- function(table_reader){
-    .Call(`_arrow_csv___TableReader__Read`, table_reader)
+csv___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_csv___TableReader__Read`, table_reader)
 }
 
-TimestampParser__kind <- function(parser){
-    .Call(`_arrow_TimestampParser__kind`, parser)
+TimestampParser__kind <- function(parser) {
+  .Call(`_arrow_TimestampParser__kind`, parser)
 }
 
-TimestampParser__format <- function(parser){
-    .Call(`_arrow_TimestampParser__format`, parser)
+TimestampParser__format <- function(parser) {
+  .Call(`_arrow_TimestampParser__format`, parser)
 }
 
-TimestampParser__MakeStrptime <- function(format){
-    .Call(`_arrow_TimestampParser__MakeStrptime`, format)
+TimestampParser__MakeStrptime <- function(format) {
+  .Call(`_arrow_TimestampParser__MakeStrptime`, format)
 }
 
-TimestampParser__MakeISO8601 <- function(){
-    .Call(`_arrow_TimestampParser__MakeISO8601`)
+TimestampParser__MakeISO8601 <- function() {
+  .Call(`_arrow_TimestampParser__MakeISO8601`)
 }
 
-csv___WriteCSV__Table <- function(table, write_options, stream){
-    invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
+csv___WriteCSV__Table <- function(table, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
 }
 
-csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream){
-    invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
 }
 
-dataset___Dataset__NewScan <- function(ds){
-    .Call(`_arrow_dataset___Dataset__NewScan`, ds)
+dataset___Dataset__NewScan <- function(ds) {
+  .Call(`_arrow_dataset___Dataset__NewScan`, ds)
 }
 
-dataset___Dataset__schema <- function(dataset){
-    .Call(`_arrow_dataset___Dataset__schema`, dataset)
+dataset___Dataset__schema <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__schema`, dataset)
 }
 
-dataset___Dataset__type_name <- function(dataset){
-    .Call(`_arrow_dataset___Dataset__type_name`, dataset)
+dataset___Dataset__type_name <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__type_name`, dataset)
 }
 
-dataset___Dataset__ReplaceSchema <- function(dataset, schm){
-    .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
+dataset___Dataset__ReplaceSchema <- function(dataset, schm) {
+  .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
 }
 
-dataset___UnionDataset__create <- function(datasets, schm){
-    .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
+dataset___UnionDataset__create <- function(datasets, schm) {
+  .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
 }
 
-dataset___InMemoryDataset__create <- function(table){
-    .Call(`_arrow_dataset___InMemoryDataset__create`, table)
+dataset___InMemoryDataset__create <- function(table) {
+  .Call(`_arrow_dataset___InMemoryDataset__create`, table)
 }
 
-dataset___UnionDataset__children <- function(ds){
-    .Call(`_arrow_dataset___UnionDataset__children`, ds)
+dataset___UnionDataset__children <- function(ds) {
+  .Call(`_arrow_dataset___UnionDataset__children`, ds)
 }
 
-dataset___FileSystemDataset__format <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
+dataset___FileSystemDataset__format <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
 }
 
-dataset___FileSystemDataset__filesystem <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
+dataset___FileSystemDataset__filesystem <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
 }
 
-dataset___FileSystemDataset__files <- function(dataset){
-    .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
+dataset___FileSystemDataset__files <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
 }
 
-dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas){
-    .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
+dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
 }
 
-dataset___DatasetFactory__Finish2 <- function(factory, schema){
-    .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
+dataset___DatasetFactory__Finish2 <- function(factory, schema) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
 }
 
-dataset___DatasetFactory__Inspect <- function(factory, unify_schemas){
-    .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
+dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
 }
 
-dataset___UnionDatasetFactory__Make <- function(children){
-    .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
+dataset___UnionDatasetFactory__Make <- function(children) {
+  .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
 }
 
-dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
+dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
 }
 
-dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
+dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
 }
 
-dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
+dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
 }
 
-dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory){
-    .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
+dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
 }
 
-dataset___FileFormat__type_name <- function(format){
-    .Call(`_arrow_dataset___FileFormat__type_name`, format)
+dataset___FileFormat__type_name <- function(format) {
+  .Call(`_arrow_dataset___FileFormat__type_name`, format)
 }
 
-dataset___FileFormat__DefaultWriteOptions <- function(fmt){
-    .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
+dataset___FileFormat__DefaultWriteOptions <- function(fmt) {
+  .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
 }
 
-dataset___ParquetFileFormat__Make <- function(options, dict_columns){
-    .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
+dataset___ParquetFileFormat__Make <- function(options, dict_columns) {
+  .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
 }
 
-dataset___FileWriteOptions__type_name <- function(options){
-    .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
+dataset___FileWriteOptions__type_name <- function(options) {
+  .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
 }
 
-dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props){
-    invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) {
+  invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
 }
 
-dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version){
-    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
+dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
 }
 
-dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version){
-    invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
+dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
 }
 
-dataset___CsvFileWriteOptions__update <- function(csv_options, write_options){
-    invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) {
+  invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
 }
 
-dataset___IpcFileFormat__Make <- function(){
-    .Call(`_arrow_dataset___IpcFileFormat__Make`)
+dataset___IpcFileFormat__Make <- function() {
+  .Call(`_arrow_dataset___IpcFileFormat__Make`)
 }
 
-dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options){
-    .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
+dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
 }
 
-dataset___FragmentScanOptions__type_name <- function(fragment_scan_options){
-    .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
+dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) {
+  .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
 }
 
-dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options){
-    .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
+dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
 }
 
-dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer){
-    .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
+dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) {
+  .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
 }
 
-dataset___DirectoryPartitioning <- function(schm, segment_encoding){
-    .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
+dataset___DirectoryPartitioning <- function(schm, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
 }
 
-dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding){
-    .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
 }
 
-dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding){
-    .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
 }
 
-dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding){
-    .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
 }
 
-dataset___ScannerBuilder__ProjectNames <- function(sb, cols){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
+dataset___ScannerBuilder__ProjectNames <- function(sb, cols) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
 }
 
-dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
+dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
 }
 
-dataset___ScannerBuilder__Filter <- function(sb, expr){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
+dataset___ScannerBuilder__Filter <- function(sb, expr) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
 }
 
-dataset___ScannerBuilder__UseThreads <- function(sb, threads){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
+dataset___ScannerBuilder__UseThreads <- function(sb, threads) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
 }
 
-dataset___ScannerBuilder__UseAsync <- function(sb, use_async){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
 }
 
-dataset___ScannerBuilder__BatchSize <- function(sb, batch_size){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
+dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
 }
 
-dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options){
-    invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
+dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
 }
 
-dataset___ScannerBuilder__schema <- function(sb){
-    .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
+dataset___ScannerBuilder__schema <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
 }
 
-dataset___ScannerBuilder__Finish <- function(sb){
-    .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
+dataset___ScannerBuilder__Finish <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
 }
 
-dataset___Scanner__ToTable <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
+dataset___Scanner__ToTable <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
 }
 
-dataset___Scanner__ScanBatches <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
+dataset___Scanner__ScanBatches <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
 }
 
-dataset___Scanner__ToRecordBatchReader <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
+dataset___Scanner__ToRecordBatchReader <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
 }
 
-dataset___Scanner__head <- function(scanner, n){
-    .Call(`_arrow_dataset___Scanner__head`, scanner, n)
+dataset___Scanner__head <- function(scanner, n) {
+  .Call(`_arrow_dataset___Scanner__head`, scanner, n)
 }
 
-dataset___Scanner__schema <- function(sc){
-    .Call(`_arrow_dataset___Scanner__schema`, sc)
+dataset___Scanner__schema <- function(sc) {
+  .Call(`_arrow_dataset___Scanner__schema`, sc)
 }
 
-dataset___ScanTask__get_batches <- function(scan_task){
-    .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
+dataset___ScanTask__get_batches <- function(scan_task) {
+  .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
 }
 
-dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner){
-    invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner) {
+  invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner))
 }
 
-dataset___Scanner__TakeRows <- function(scanner, indices){
-    .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
+dataset___Scanner__TakeRows <- function(scanner, indices) {
+  .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
 }
 
-dataset___Scanner__CountRows <- function(scanner){
-    .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
+dataset___Scanner__CountRows <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
 }
 
-Int8__initialize <- function(){
-    .Call(`_arrow_Int8__initialize`)
+Int8__initialize <- function() {
+  .Call(`_arrow_Int8__initialize`)
 }
 
-Int16__initialize <- function(){
-    .Call(`_arrow_Int16__initialize`)
+Int16__initialize <- function() {
+  .Call(`_arrow_Int16__initialize`)
 }
 
-Int32__initialize <- function(){
-    .Call(`_arrow_Int32__initialize`)
+Int32__initialize <- function() {
+  .Call(`_arrow_Int32__initialize`)
 }
 
-Int64__initialize <- function(){
-    .Call(`_arrow_Int64__initialize`)
+Int64__initialize <- function() {
+  .Call(`_arrow_Int64__initialize`)
 }
 
-UInt8__initialize <- function(){
-    .Call(`_arrow_UInt8__initialize`)
+UInt8__initialize <- function() {
+  .Call(`_arrow_UInt8__initialize`)
 }
 
-UInt16__initialize <- function(){
-    .Call(`_arrow_UInt16__initialize`)
+UInt16__initialize <- function() {
+  .Call(`_arrow_UInt16__initialize`)
 }
 
-UInt32__initialize <- function(){
-    .Call(`_arrow_UInt32__initialize`)
+UInt32__initialize <- function() {
+  .Call(`_arrow_UInt32__initialize`)
 }
 
-UInt64__initialize <- function(){
-    .Call(`_arrow_UInt64__initialize`)
+UInt64__initialize <- function() {
+  .Call(`_arrow_UInt64__initialize`)
 }
 
-Float16__initialize <- function(){
-    .Call(`_arrow_Float16__initialize`)
+Float16__initialize <- function() {
+  .Call(`_arrow_Float16__initialize`)
 }
 
-Float32__initialize <- function(){
-    .Call(`_arrow_Float32__initialize`)
+Float32__initialize <- function() {
+  .Call(`_arrow_Float32__initialize`)
 }
 
-Float64__initialize <- function(){
-    .Call(`_arrow_Float64__initialize`)
+Float64__initialize <- function() {
+  .Call(`_arrow_Float64__initialize`)
 }
 
-Boolean__initialize <- function(){
-    .Call(`_arrow_Boolean__initialize`)
+Boolean__initialize <- function() {
+  .Call(`_arrow_Boolean__initialize`)
 }
 
-Utf8__initialize <- function(){
-    .Call(`_arrow_Utf8__initialize`)
+Utf8__initialize <- function() {
+  .Call(`_arrow_Utf8__initialize`)
 }
 
-LargeUtf8__initialize <- function(){
-    .Call(`_arrow_LargeUtf8__initialize`)
+LargeUtf8__initialize <- function() {
+  .Call(`_arrow_LargeUtf8__initialize`)
 }
 
-Binary__initialize <- function(){
-    .Call(`_arrow_Binary__initialize`)
+Binary__initialize <- function() {
+  .Call(`_arrow_Binary__initialize`)
 }
 
-LargeBinary__initialize <- function(){
-    .Call(`_arrow_LargeBinary__initialize`)
+LargeBinary__initialize <- function() {
+  .Call(`_arrow_LargeBinary__initialize`)
 }
 
-Date32__initialize <- function(){
-    .Call(`_arrow_Date32__initialize`)
+Date32__initialize <- function() {
+  .Call(`_arrow_Date32__initialize`)
 }
 
-Date64__initialize <- function(){
-    .Call(`_arrow_Date64__initialize`)
+Date64__initialize <- function() {
+  .Call(`_arrow_Date64__initialize`)
 }
 
-Null__initialize <- function(){
-    .Call(`_arrow_Null__initialize`)
+Null__initialize <- function() {
+  .Call(`_arrow_Null__initialize`)
 }
 
-Decimal128Type__initialize <- function(precision, scale){
-    .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
+Decimal128Type__initialize <- function(precision, scale) {
+  .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
 }
 
-FixedSizeBinary__initialize <- function(byte_width){
-    .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
+FixedSizeBinary__initialize <- function(byte_width) {
+  .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
 }
 
-Timestamp__initialize <- function(unit, timezone){
-    .Call(`_arrow_Timestamp__initialize`, unit, timezone)
+Timestamp__initialize <- function(unit, timezone) {
+  .Call(`_arrow_Timestamp__initialize`, unit, timezone)
 }
 
-Time32__initialize <- function(unit){
-    .Call(`_arrow_Time32__initialize`, unit)
+Time32__initialize <- function(unit) {
+  .Call(`_arrow_Time32__initialize`, unit)
 }
 
-Time64__initialize <- function(unit){
-    .Call(`_arrow_Time64__initialize`, unit)
+Time64__initialize <- function(unit) {
+  .Call(`_arrow_Time64__initialize`, unit)
 }
 
-list__ <- function(x){
-    .Call(`_arrow_list__`, x)
+list__ <- function(x) {
+  .Call(`_arrow_list__`, x)
 }
 
-large_list__ <- function(x){
-    .Call(`_arrow_large_list__`, x)
+large_list__ <- function(x) {
+  .Call(`_arrow_large_list__`, x)
 }
 
-fixed_size_list__ <- function(x, list_size){
-    .Call(`_arrow_fixed_size_list__`, x, list_size)
+fixed_size_list__ <- function(x, list_size) {
+  .Call(`_arrow_fixed_size_list__`, x, list_size)
 }
 
-struct__ <- function(fields){
-    .Call(`_arrow_struct__`, fields)
+struct__ <- function(fields) {
+  .Call(`_arrow_struct__`, fields)
 }
 
-DataType__ToString <- function(type){
-    .Call(`_arrow_DataType__ToString`, type)
+DataType__ToString <- function(type) {
+  .Call(`_arrow_DataType__ToString`, type)
 }
 
-DataType__name <- function(type){
-    .Call(`_arrow_DataType__name`, type)
+DataType__name <- function(type) {
+  .Call(`_arrow_DataType__name`, type)
 }
 
-DataType__Equals <- function(lhs, rhs){
-    .Call(`_arrow_DataType__Equals`, lhs, rhs)
+DataType__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_DataType__Equals`, lhs, rhs)
 }
 
-DataType__num_fields <- function(type){
-    .Call(`_arrow_DataType__num_fields`, type)
+DataType__num_fields <- function(type) {
+  .Call(`_arrow_DataType__num_fields`, type)
 }
 
-DataType__fields <- function(type){
-    .Call(`_arrow_DataType__fields`, type)
+DataType__fields <- function(type) {
+  .Call(`_arrow_DataType__fields`, type)
 }
 
-DataType__id <- function(type){
-    .Call(`_arrow_DataType__id`, type)
+DataType__id <- function(type) {
+  .Call(`_arrow_DataType__id`, type)
 }
 
-ListType__ToString <- function(type){
-    .Call(`_arrow_ListType__ToString`, type)
+ListType__ToString <- function(type) {
+  .Call(`_arrow_ListType__ToString`, type)
 }
 
-FixedWidthType__bit_width <- function(type){
-    .Call(`_arrow_FixedWidthType__bit_width`, type)
+FixedWidthType__bit_width <- function(type) {
+  .Call(`_arrow_FixedWidthType__bit_width`, type)
 }
 
-DateType__unit <- function(type){
-    .Call(`_arrow_DateType__unit`, type)
+DateType__unit <- function(type) {
+  .Call(`_arrow_DateType__unit`, type)
 }
 
-TimeType__unit <- function(type){
-    .Call(`_arrow_TimeType__unit`, type)
+TimeType__unit <- function(type) {
+  .Call(`_arrow_TimeType__unit`, type)
 }
 
-DecimalType__precision <- function(type){
-    .Call(`_arrow_DecimalType__precision`, type)
+DecimalType__precision <- function(type) {
+  .Call(`_arrow_DecimalType__precision`, type)
 }
 
-DecimalType__scale <- function(type){
-    .Call(`_arrow_DecimalType__scale`, type)
+DecimalType__scale <- function(type) {
+  .Call(`_arrow_DecimalType__scale`, type)
 }
 
-TimestampType__timezone <- function(type){
-    .Call(`_arrow_TimestampType__timezone`, type)
+TimestampType__timezone <- function(type) {
+  .Call(`_arrow_TimestampType__timezone`, type)
 }
 
-TimestampType__unit <- function(type){
-    .Call(`_arrow_TimestampType__unit`, type)
+TimestampType__unit <- function(type) {
+  .Call(`_arrow_TimestampType__unit`, type)
 }
 
-DictionaryType__initialize <- function(index_type, value_type, ordered){
-    .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
+DictionaryType__initialize <- function(index_type, value_type, ordered) {
+  .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
 }
 
-DictionaryType__index_type <- function(type){
-    .Call(`_arrow_DictionaryType__index_type`, type)
+DictionaryType__index_type <- function(type) {
+  .Call(`_arrow_DictionaryType__index_type`, type)
 }
 
-DictionaryType__value_type <- function(type){
-    .Call(`_arrow_DictionaryType__value_type`, type)
+DictionaryType__value_type <- function(type) {
+  .Call(`_arrow_DictionaryType__value_type`, type)
 }
 
-DictionaryType__name <- function(type){
-    .Call(`_arrow_DictionaryType__name`, type)
+DictionaryType__name <- function(type) {
+  .Call(`_arrow_DictionaryType__name`, type)
 }
 
-DictionaryType__ordered <- function(type){
-    .Call(`_arrow_DictionaryType__ordered`, type)
+DictionaryType__ordered <- function(type) {
+  .Call(`_arrow_DictionaryType__ordered`, type)
 }
 
-StructType__GetFieldByName <- function(type, name){
-    .Call(`_arrow_StructType__GetFieldByName`, type, name)
+StructType__GetFieldByName <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldByName`, type, name)
 }
 
-StructType__GetFieldIndex <- function(type, name){
-    .Call(`_arrow_StructType__GetFieldIndex`, type, name)
+StructType__GetFieldIndex <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldIndex`, type, name)
 }
 
-StructType__field_names <- function(type){
-    .Call(`_arrow_StructType__field_names`, type)
+StructType__field_names <- function(type) {
+  .Call(`_arrow_StructType__field_names`, type)
 }
 
-ListType__value_field <- function(type){
-    .Call(`_arrow_ListType__value_field`, type)
+ListType__value_field <- function(type) {
+  .Call(`_arrow_ListType__value_field`, type)
 }
 
-ListType__value_type <- function(type){
-    .Call(`_arrow_ListType__value_type`, type)
+ListType__value_type <- function(type) {
+  .Call(`_arrow_ListType__value_type`, type)
 }
 
-LargeListType__value_field <- function(type){
-    .Call(`_arrow_LargeListType__value_field`, type)
+LargeListType__value_field <- function(type) {
+  .Call(`_arrow_LargeListType__value_field`, type)
 }
 
-LargeListType__value_type <- function(type){
-    .Call(`_arrow_LargeListType__value_type`, type)
+LargeListType__value_type <- function(type) {
+  .Call(`_arrow_LargeListType__value_type`, type)
 }
 
-FixedSizeListType__value_field <- function(type){
-    .Call(`_arrow_FixedSizeListType__value_field`, type)
+FixedSizeListType__value_field <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_field`, type)
 }
 
-FixedSizeListType__value_type <- function(type){
-    .Call(`_arrow_FixedSizeListType__value_type`, type)
+FixedSizeListType__value_type <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_type`, type)
 }
 
-FixedSizeListType__list_size <- function(type){
-    .Call(`_arrow_FixedSizeListType__list_size`, type)
+FixedSizeListType__list_size <- function(type) {
+  .Call(`_arrow_FixedSizeListType__list_size`, type)
 }
 
-compute___expr__call <- function(func_name, argument_list, options){
-    .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
+compute___expr__call <- function(func_name, argument_list, options) {
+  .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
 }
 
-field_names_in_expression <- function(x){
-    .Call(`_arrow_field_names_in_expression`, x)
+field_names_in_expression <- function(x) {
+  .Call(`_arrow_field_names_in_expression`, x)
 }
 
-compute___expr__get_field_ref_name <- function(x){
-    .Call(`_arrow_compute___expr__get_field_ref_name`, x)
+compute___expr__get_field_ref_name <- function(x) {
+  .Call(`_arrow_compute___expr__get_field_ref_name`, x)
 }
 
-compute___expr__field_ref <- function(name){
-    .Call(`_arrow_compute___expr__field_ref`, name)
+compute___expr__field_ref <- function(name) {
+  .Call(`_arrow_compute___expr__field_ref`, name)
 }
 
-compute___expr__scalar <- function(x){
-    .Call(`_arrow_compute___expr__scalar`, x)
+compute___expr__scalar <- function(x) {
+  .Call(`_arrow_compute___expr__scalar`, x)
 }
 
-compute___expr__ToString <- function(x){
-    .Call(`_arrow_compute___expr__ToString`, x)
+compute___expr__ToString <- function(x) {
+  .Call(`_arrow_compute___expr__ToString`, x)
 }
 
-compute___expr__type <- function(x, schema){
-    .Call(`_arrow_compute___expr__type`, x, schema)
+compute___expr__type <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type`, x, schema)
 }
 
-compute___expr__type_id <- function(x, schema){
-    .Call(`_arrow_compute___expr__type_id`, x, schema)
+compute___expr__type_id <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type_id`, x, schema)
 }
 
-ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){
-    invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
+ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) {
+  invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
 }
 
-ipc___feather___Reader__version <- function(reader){
-    .Call(`_arrow_ipc___feather___Reader__version`, reader)
+ipc___feather___Reader__version <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__version`, reader)
 }
 
-ipc___feather___Reader__Read <- function(reader, columns){
-    .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
+ipc___feather___Reader__Read <- function(reader, columns) {
+  .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
 }
 
-ipc___feather___Reader__Open <- function(stream){
-    .Call(`_arrow_ipc___feather___Reader__Open`, stream)
+ipc___feather___Reader__Open <- function(stream) {
+  .Call(`_arrow_ipc___feather___Reader__Open`, stream)
 }
 
-ipc___feather___Reader__schema <- function(reader){
-    .Call(`_arrow_ipc___feather___Reader__schema`, reader)
+ipc___feather___Reader__schema <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__schema`, reader)
 }
 
-Field__initialize <- function(name, field, nullable){
-    .Call(`_arrow_Field__initialize`, name, field, nullable)
+Field__initialize <- function(name, field, nullable) {
+  .Call(`_arrow_Field__initialize`, name, field, nullable)
 }
 
-Field__ToString <- function(field){
-    .Call(`_arrow_Field__ToString`, field)
+Field__ToString <- function(field) {
+  .Call(`_arrow_Field__ToString`, field)
 }
 
-Field__name <- function(field){
-    .Call(`_arrow_Field__name`, field)
+Field__name <- function(field) {
+  .Call(`_arrow_Field__name`, field)
 }
 
-Field__Equals <- function(field, other){
-    .Call(`_arrow_Field__Equals`, field, other)
+Field__Equals <- function(field, other) {
+  .Call(`_arrow_Field__Equals`, field, other)
 }
 
-Field__nullable <- function(field){
-    .Call(`_arrow_Field__nullable`, field)
+Field__nullable <- function(field) {
+  .Call(`_arrow_Field__nullable`, field)
 }
 
-Field__type <- function(field){
-    .Call(`_arrow_Field__type`, field)
+Field__type <- function(field) {
+  .Call(`_arrow_Field__type`, field)
 }
 
-fs___FileInfo__type <- function(x){
-    .Call(`_arrow_fs___FileInfo__type`, x)
+fs___FileInfo__type <- function(x) {
+  .Call(`_arrow_fs___FileInfo__type`, x)
 }
 
-fs___FileInfo__set_type <- function(x, type){
-    invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
+fs___FileInfo__set_type <- function(x, type) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
 }
 
-fs___FileInfo__path <- function(x){
-    .Call(`_arrow_fs___FileInfo__path`, x)
+fs___FileInfo__path <- function(x) {
+  .Call(`_arrow_fs___FileInfo__path`, x)
 }
 
-fs___FileInfo__set_path <- function(x, path){
-    invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
+fs___FileInfo__set_path <- function(x, path) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
 }
 
-fs___FileInfo__size <- function(x){
-    .Call(`_arrow_fs___FileInfo__size`, x)
+fs___FileInfo__size <- function(x) {
+  .Call(`_arrow_fs___FileInfo__size`, x)
 }
 
-fs___FileInfo__set_size <- function(x, size){
-    invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
+fs___FileInfo__set_size <- function(x, size) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
 }
 
-fs___FileInfo__base_name <- function(x){
-    .Call(`_arrow_fs___FileInfo__base_name`, x)
+fs___FileInfo__base_name <- function(x) {
+  .Call(`_arrow_fs___FileInfo__base_name`, x)
 }
 
-fs___FileInfo__extension <- function(x){
-    .Call(`_arrow_fs___FileInfo__extension`, x)
+fs___FileInfo__extension <- function(x) {
+  .Call(`_arrow_fs___FileInfo__extension`, x)
 }
 
-fs___FileInfo__mtime <- function(x){
-    .Call(`_arrow_fs___FileInfo__mtime`, x)
+fs___FileInfo__mtime <- function(x) {
+  .Call(`_arrow_fs___FileInfo__mtime`, x)
 }
 
-fs___FileInfo__set_mtime <- function(x, time){
-    invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
+fs___FileInfo__set_mtime <- function(x, time) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
 }
 
-fs___FileSelector__base_dir <- function(selector){
-    .Call(`_arrow_fs___FileSelector__base_dir`, selector)
+fs___FileSelector__base_dir <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__base_dir`, selector)
 }
 
-fs___FileSelector__allow_not_found <- function(selector){
-    .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
+fs___FileSelector__allow_not_found <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
 }
 
-fs___FileSelector__recursive <- function(selector){
-    .Call(`_arrow_fs___FileSelector__recursive`, selector)
+fs___FileSelector__recursive <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__recursive`, selector)
 }
 
-fs___FileSelector__create <- function(base_dir, allow_not_found, recursive){
-    .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
+fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) {
+  .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
 }
 
-fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths){
-    .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
+fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
 }
 
-fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector){
-    .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
+fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
 }
 
-fs___FileSystem__CreateDir <- function(file_system, path, recursive){
-    invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
+fs___FileSystem__CreateDir <- function(file_system, path, recursive) {
+  invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
 }
 
-fs___FileSystem__DeleteDir <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
+fs___FileSystem__DeleteDir <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
 }
 
-fs___FileSystem__DeleteDirContents <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
+fs___FileSystem__DeleteDirContents <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
 }
 
-fs___FileSystem__DeleteFile <- function(file_system, path){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
+fs___FileSystem__DeleteFile <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
 }
 
-fs___FileSystem__DeleteFiles <- function(file_system, paths){
-    invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
+fs___FileSystem__DeleteFiles <- function(file_system, paths) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
 }
 
-fs___FileSystem__Move <- function(file_system, src, dest){
-    invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
+fs___FileSystem__Move <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
 }
 
-fs___FileSystem__CopyFile <- function(file_system, src, dest){
-    invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
+fs___FileSystem__CopyFile <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
 }
 
-fs___FileSystem__OpenInputStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
+fs___FileSystem__OpenInputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
 }
 
-fs___FileSystem__OpenInputFile <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
+fs___FileSystem__OpenInputFile <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
 }
 
-fs___FileSystem__OpenOutputStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
+fs___FileSystem__OpenOutputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
 }
 
-fs___FileSystem__OpenAppendStream <- function(file_system, path){
-    .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
+fs___FileSystem__OpenAppendStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
 }
 
-fs___FileSystem__type_name <- function(file_system){
-    .Call(`_arrow_fs___FileSystem__type_name`, file_system)
+fs___FileSystem__type_name <- function(file_system) {
+  .Call(`_arrow_fs___FileSystem__type_name`, file_system)
 }
 
-fs___LocalFileSystem__create <- function(){
-    .Call(`_arrow_fs___LocalFileSystem__create`)
+fs___LocalFileSystem__create <- function() {
+  .Call(`_arrow_fs___LocalFileSystem__create`)
 }
 
-fs___SubTreeFileSystem__create <- function(base_path, base_fs){
-    .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
+fs___SubTreeFileSystem__create <- function(base_path, base_fs) {
+  .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
 }
 
-fs___SubTreeFileSystem__base_fs <- function(file_system){
-    .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
+fs___SubTreeFileSystem__base_fs <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
 }
 
-fs___SubTreeFileSystem__base_path <- function(file_system){
-    .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
+fs___SubTreeFileSystem__base_path <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
 }
 
-fs___FileSystemFromUri <- function(path){
-    .Call(`_arrow_fs___FileSystemFromUri`, path)
+fs___FileSystemFromUri <- function(path) {
+  .Call(`_arrow_fs___FileSystemFromUri`, path)
 }
 
-fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads){
-    invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
+fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) {
+  invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
 }
 
-fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes){
-    .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
+fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) {
+  .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
 }
 
-fs___S3FileSystem__region <- function(fs){
-    .Call(`_arrow_fs___S3FileSystem__region`, fs)
+fs___S3FileSystem__region <- function(fs) {
+  .Call(`_arrow_fs___S3FileSystem__region`, fs)
 }
 
-io___Readable__Read <- function(x, nbytes){
-    .Call(`_arrow_io___Readable__Read`, x, nbytes)
+io___Readable__Read <- function(x, nbytes) {
+  .Call(`_arrow_io___Readable__Read`, x, nbytes)
 }
 
-io___InputStream__Close <- function(x){
-    invisible(.Call(`_arrow_io___InputStream__Close`, x))
+io___InputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___InputStream__Close`, x))
 }
 
-io___OutputStream__Close <- function(x){
-    invisible(.Call(`_arrow_io___OutputStream__Close`, x))
+io___OutputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___OutputStream__Close`, x))
 }
 
-io___RandomAccessFile__GetSize <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
+io___RandomAccessFile__GetSize <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
 }
 
-io___RandomAccessFile__supports_zero_copy <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
+io___RandomAccessFile__supports_zero_copy <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
 }
 
-io___RandomAccessFile__Seek <- function(x, position){
-    invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
+io___RandomAccessFile__Seek <- function(x, position) {
+  invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
 }
 
-io___RandomAccessFile__Tell <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__Tell`, x)
+io___RandomAccessFile__Tell <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Tell`, x)
 }
 
-io___RandomAccessFile__Read0 <- function(x){
-    .Call(`_arrow_io___RandomAccessFile__Read0`, x)
+io___RandomAccessFile__Read0 <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Read0`, x)
 }
 
-io___RandomAccessFile__ReadAt <- function(x, position, nbytes){
-    .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
+io___RandomAccessFile__ReadAt <- function(x, position, nbytes) {
+  .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
 }
 
-io___MemoryMappedFile__Create <- function(path, size){
-    .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
+io___MemoryMappedFile__Create <- function(path, size) {
+  .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
 }
 
-io___MemoryMappedFile__Open <- function(path, mode){
-    .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
+io___MemoryMappedFile__Open <- function(path, mode) {
+  .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
 }
 
-io___MemoryMappedFile__Resize <- function(x, size){
-    invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
+io___MemoryMappedFile__Resize <- function(x, size) {
+  invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
 }
 
-io___ReadableFile__Open <- function(path){
-    .Call(`_arrow_io___ReadableFile__Open`, path)
+io___ReadableFile__Open <- function(path) {
+  .Call(`_arrow_io___ReadableFile__Open`, path)
 }
 
-io___BufferReader__initialize <- function(buffer){
-    .Call(`_arrow_io___BufferReader__initialize`, buffer)
+io___BufferReader__initialize <- function(buffer) {
+  .Call(`_arrow_io___BufferReader__initialize`, buffer)
 }
 
-io___Writable__write <- function(stream, buf){
-    invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+io___Writable__write <- function(stream, buf) {
+  invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
 }
 
-io___OutputStream__Tell <- function(stream){
-    .Call(`_arrow_io___OutputStream__Tell`, stream)
+io___OutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___OutputStream__Tell`, stream)
 }
 
-io___FileOutputStream__Open <- function(path){
-    .Call(`_arrow_io___FileOutputStream__Open`, path)
+io___FileOutputStream__Open <- function(path) {
+  .Call(`_arrow_io___FileOutputStream__Open`, path)
 }
 
-io___BufferOutputStream__Create <- function(initial_capacity){
-    .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
+io___BufferOutputStream__Create <- function(initial_capacity) {
+  .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
 }
 
-io___BufferOutputStream__capacity <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
+io___BufferOutputStream__capacity <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
 }
 
-io___BufferOutputStream__Finish <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
+io___BufferOutputStream__Finish <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
 }
 
-io___BufferOutputStream__Tell <- function(stream){
-    .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
+io___BufferOutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
 }
 
-io___BufferOutputStream__Write <- function(stream, bytes){
-    invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
+io___BufferOutputStream__Write <- function(stream, bytes) {
+  invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
 }
 
-json___ReadOptions__initialize <- function(use_threads, block_size){
-    .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
+json___ReadOptions__initialize <- function(use_threads, block_size) {
+  .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-json___ParseOptions__initialize1 <- function(newlines_in_values){
-    .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+json___ParseOptions__initialize1 <- function(newlines_in_values) {
+  .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
 }
 
-json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){
-    .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) {
+  .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
-json___TableReader__Make <- function(input, read_options, parse_options){
-    .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
+json___TableReader__Make <- function(input, read_options, parse_options) {
+  .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
 }
 
-json___TableReader__Read <- function(table_reader){
-    .Call(`_arrow_json___TableReader__Read`, table_reader)
+json___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_json___TableReader__Read`, table_reader)
 }
 
-MemoryPool__default <- function(){
-    .Call(`_arrow_MemoryPool__default`)
+MemoryPool__default <- function() {
+  .Call(`_arrow_MemoryPool__default`)
 }
 
-MemoryPool__bytes_allocated <- function(pool){
-    .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
+MemoryPool__bytes_allocated <- function(pool) {
+  .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
 }
 
-MemoryPool__max_memory <- function(pool){
-    .Call(`_arrow_MemoryPool__max_memory`, pool)
+MemoryPool__max_memory <- function(pool) {
+  .Call(`_arrow_MemoryPool__max_memory`, pool)
 }
 
-MemoryPool__backend_name <- function(pool){
-    .Call(`_arrow_MemoryPool__backend_name`, pool)
+MemoryPool__backend_name <- function(pool) {
+  .Call(`_arrow_MemoryPool__backend_name`, pool)
 }
 
-supported_memory_backends <- function(){
-    .Call(`_arrow_supported_memory_backends`)
+supported_memory_backends <- function() {
+  .Call(`_arrow_supported_memory_backends`)
 }
 
-ipc___Message__body_length <- function(message){
-    .Call(`_arrow_ipc___Message__body_length`, message)
+ipc___Message__body_length <- function(message) {
+  .Call(`_arrow_ipc___Message__body_length`, message)
 }
 
-ipc___Message__metadata <- function(message){
-    .Call(`_arrow_ipc___Message__metadata`, message)
+ipc___Message__metadata <- function(message) {
+  .Call(`_arrow_ipc___Message__metadata`, message)
 }
 
-ipc___Message__body <- function(message){
-    .Call(`_arrow_ipc___Message__body`, message)
+ipc___Message__body <- function(message) {
+  .Call(`_arrow_ipc___Message__body`, message)
 }
 
-ipc___Message__Verify <- function(message){
-    .Call(`_arrow_ipc___Message__Verify`, message)
+ipc___Message__Verify <- function(message) {
+  .Call(`_arrow_ipc___Message__Verify`, message)
 }
 
-ipc___Message__type <- function(message){
-    .Call(`_arrow_ipc___Message__type`, message)
+ipc___Message__type <- function(message) {
+  .Call(`_arrow_ipc___Message__type`, message)
 }
 
-ipc___Message__Equals <- function(x, y){
-    .Call(`_arrow_ipc___Message__Equals`, x, y)
+ipc___Message__Equals <- function(x, y) {
+  .Call(`_arrow_ipc___Message__Equals`, x, y)
 }
 
-ipc___ReadRecordBatch__Message__Schema <- function(message, schema){
-    .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
+ipc___ReadRecordBatch__Message__Schema <- function(message, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
 }
 
-ipc___ReadSchema_InputStream <- function(stream){
-    .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
+ipc___ReadSchema_InputStream <- function(stream) {
+  .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
 }
 
-ipc___ReadSchema_Message <- function(message){
-    .Call(`_arrow_ipc___ReadSchema_Message`, message)
+ipc___ReadSchema_Message <- function(message) {
+  .Call(`_arrow_ipc___ReadSchema_Message`, message)
 }
 
-ipc___MessageReader__Open <- function(stream){
-    .Call(`_arrow_ipc___MessageReader__Open`, stream)
+ipc___MessageReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___MessageReader__Open`, stream)
 }
 
-ipc___MessageReader__ReadNextMessage <- function(reader){
-    .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
+ipc___MessageReader__ReadNextMessage <- function(reader) {
+  .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
 }
 
-ipc___ReadMessage <- function(stream){
-    .Call(`_arrow_ipc___ReadMessage`, stream)
+ipc___ReadMessage <- function(stream) {
+  .Call(`_arrow_ipc___ReadMessage`, stream)
 }
 
-parquet___arrow___ArrowReaderProperties__Make <- function(use_threads){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
 }
 
-parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads){
-    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
 }
 
-parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
 }
 
-parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index){
-    .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
 }
 
-parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict){
-    invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
 }
 
-parquet___arrow___FileReader__OpenFile <- function(file, props){
-    .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
+parquet___arrow___FileReader__OpenFile <- function(file, props) {
+  .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
 }
 
-parquet___arrow___FileReader__ReadTable1 <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
+parquet___arrow___FileReader__ReadTable1 <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
 }
 
-parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
 }
 
-parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
+parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
 }
 
-parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
+parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
 }
 
-parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
+parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
 }
 
-parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
+parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
 }
 
-parquet___arrow___FileReader__num_rows <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
+parquet___arrow___FileReader__num_rows <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
 }
 
-parquet___arrow___FileReader__num_columns <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
+parquet___arrow___FileReader__num_columns <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
 }
 
-parquet___arrow___FileReader__num_row_groups <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
+parquet___arrow___FileReader__num_row_groups <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
 }
 
-parquet___arrow___FileReader__ReadColumn <- function(reader, i){
-    .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
+parquet___arrow___FileReader__ReadColumn <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
 }
 
-parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit){
-    .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
+parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) {
+  .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
 }
 
-parquet___WriterProperties___Builder__create <- function(){
-    .Call(`_arrow_parquet___WriterProperties___Builder__create`)
+parquet___WriterProperties___Builder__create <- function() {
+  .Call(`_arrow_parquet___WriterProperties___Builder__create`)
 }
 
-parquet___WriterProperties___Builder__version <- function(builder, version){
-    invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
+parquet___WriterProperties___Builder__version <- function(builder, version) {
+  invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
 }
 
-parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
+parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
 }
 
-parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
+parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
 }
 
-parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
+parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
 }
 
-parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
+parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
 }
 
-parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size){
-    invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
+parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
 }
 
-parquet___WriterProperties___Builder__build <- function(builder){
-    .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
+parquet___WriterProperties___Builder__build <- function(builder) {
+  .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
 }
 
-parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){
-    .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
+parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) {
+  .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
 }
 
-parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size){
-    invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
+parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
 }
 
-parquet___arrow___FileWriter__Close <- function(writer){
-    invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
+parquet___arrow___FileWriter__Close <- function(writer) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
 }
 
-parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties){
-    invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
+parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) {
+  invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
 }
 
-parquet___arrow___FileReader__GetSchema <- function(reader){
-    .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
+parquet___arrow___FileReader__GetSchema <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
 }
 
-allocate_arrow_schema <- function(){
-    .Call(`_arrow_allocate_arrow_schema`)
+allocate_arrow_schema <- function() {
+  .Call(`_arrow_allocate_arrow_schema`)
 }
 
-delete_arrow_schema <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+delete_arrow_schema <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
 }
 
-allocate_arrow_array <- function(){
-    .Call(`_arrow_allocate_arrow_array`)
+allocate_arrow_array <- function() {
+  .Call(`_arrow_allocate_arrow_array`)
 }
 
-delete_arrow_array <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+delete_arrow_array <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array`, ptr))
 }
 
-allocate_arrow_array_stream <- function(){
-    .Call(`_arrow_allocate_arrow_array_stream`)
+allocate_arrow_array_stream <- function() {
+  .Call(`_arrow_allocate_arrow_array_stream`)
 }
 
-delete_arrow_array_stream <- function(ptr){
-    invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
+delete_arrow_array_stream <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
 }
 
-ImportArray <- function(array, schema){
-    .Call(`_arrow_ImportArray`, array, schema)
+ImportArray <- function(array, schema) {
+  .Call(`_arrow_ImportArray`, array, schema)
 }
 
-ImportRecordBatch <- function(array, schema){
-    .Call(`_arrow_ImportRecordBatch`, array, schema)
+ImportRecordBatch <- function(array, schema) {
+  .Call(`_arrow_ImportRecordBatch`, array, schema)
 }
 
-ImportSchema <- function(schema){
-    .Call(`_arrow_ImportSchema`, schema)
+ImportSchema <- function(schema) {
+  .Call(`_arrow_ImportSchema`, schema)
 }
 
-ImportField <- function(field){
-    .Call(`_arrow_ImportField`, field)
+ImportField <- function(field) {
+  .Call(`_arrow_ImportField`, field)
 }
 
-ImportType <- function(type){
-    .Call(`_arrow_ImportType`, type)
+ImportType <- function(type) {
+  .Call(`_arrow_ImportType`, type)
 }
 
-ImportRecordBatchReader <- function(stream){
-    .Call(`_arrow_ImportRecordBatchReader`, stream)
+ImportRecordBatchReader <- function(stream) {
+  .Call(`_arrow_ImportRecordBatchReader`, stream)
 }
 
-ExportType <- function(type, ptr){
-    invisible(.Call(`_arrow_ExportType`, type, ptr))
+ExportType <- function(type, ptr) {
+  invisible(.Call(`_arrow_ExportType`, type, ptr))
 }
 
-ExportField <- function(field, ptr){
-    invisible(.Call(`_arrow_ExportField`, field, ptr))
+ExportField <- function(field, ptr) {
+  invisible(.Call(`_arrow_ExportField`, field, ptr))
 }
 
-ExportSchema <- function(schema, ptr){
-    invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
+ExportSchema <- function(schema, ptr) {
+  invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
 }
 
-ExportArray <- function(array, array_ptr, schema_ptr){
-    invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
+ExportArray <- function(array, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
 }
 
-ExportRecordBatch <- function(batch, array_ptr, schema_ptr){
-    invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
+ExportRecordBatch <- function(batch, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
 }
 
-ExportRecordBatchReader <- function(reader, stream_ptr){
-    invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
+ExportRecordBatchReader <- function(reader, stream_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
 }
 
-Table__from_dots <- function(lst, schema_sxp, use_threads){
-    .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
+Table__from_dots <- function(lst, schema_sxp, use_threads) {
+  .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
 }
 
-vec_to_arrow <- function(x, s_type){
-    .Call(`_arrow_vec_to_arrow`, x, s_type)
+vec_to_arrow <- function(x, s_type) {
+  .Call(`_arrow_vec_to_arrow`, x, s_type)
 }
 
-DictionaryArray__FromArrays <- function(type, indices, dict){
-    .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
+DictionaryArray__FromArrays <- function(type, indices, dict) {
+  .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
 }
 
-RecordBatch__num_columns <- function(x){
-    .Call(`_arrow_RecordBatch__num_columns`, x)
+RecordBatch__num_columns <- function(x) {
+  .Call(`_arrow_RecordBatch__num_columns`, x)
 }
 
-RecordBatch__num_rows <- function(x){
-    .Call(`_arrow_RecordBatch__num_rows`, x)
+RecordBatch__num_rows <- function(x) {
+  .Call(`_arrow_RecordBatch__num_rows`, x)
 }
 
-RecordBatch__schema <- function(x){
-    .Call(`_arrow_RecordBatch__schema`, x)
+RecordBatch__schema <- function(x) {
+  .Call(`_arrow_RecordBatch__schema`, x)
 }
 
-RecordBatch__RenameColumns <- function(batch, names){
-    .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
+RecordBatch__RenameColumns <- function(batch, names) {
+  .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
 }
 
-RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){
-    .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
+RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
 }
 
-RecordBatch__columns <- function(batch){
-    .Call(`_arrow_RecordBatch__columns`, batch)
+RecordBatch__columns <- function(batch) {
+  .Call(`_arrow_RecordBatch__columns`, batch)
 }
 
-RecordBatch__column <- function(batch, i){
-    .Call(`_arrow_RecordBatch__column`, batch, i)
+RecordBatch__column <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column`, batch, i)
 }
 
-RecordBatch__GetColumnByName <- function(batch, name){
-    .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
+RecordBatch__GetColumnByName <- function(batch, name) {
+  .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
 }
 
-RecordBatch__SelectColumns <- function(batch, indices){
-    .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
+RecordBatch__SelectColumns <- function(batch, indices) {
+  .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
 }
 
-RecordBatch__Equals <- function(self, other, check_metadata){
-    .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
+RecordBatch__Equals <- function(self, other, check_metadata) {
+  .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
 }
 
-RecordBatch__AddColumn <- function(batch, i, field, column){
-    .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
+RecordBatch__AddColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
 }
 
-RecordBatch__SetColumn <- function(batch, i, field, column){
-    .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
+RecordBatch__SetColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
 }
 
-RecordBatch__RemoveColumn <- function(batch, i){
-    .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
+RecordBatch__RemoveColumn <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
 }
 
-RecordBatch__column_name <- function(batch, i){
-    .Call(`_arrow_RecordBatch__column_name`, batch, i)
+RecordBatch__column_name <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column_name`, batch, i)
 }
 
-RecordBatch__names <- function(batch){
-    .Call(`_arrow_RecordBatch__names`, batch)
+RecordBatch__names <- function(batch) {
+  .Call(`_arrow_RecordBatch__names`, batch)
 }
 
-RecordBatch__Slice1 <- function(self, offset){
-    .Call(`_arrow_RecordBatch__Slice1`, self, offset)
+RecordBatch__Slice1 <- function(self, offset) {
+  .Call(`_arrow_RecordBatch__Slice1`, self, offset)
 }
 
-RecordBatch__Slice2 <- function(self, offset, length){
-    .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
+RecordBatch__Slice2 <- function(self, offset, length) {
+  .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
 }
 
-ipc___SerializeRecordBatch__Raw <- function(batch){
-    .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+ipc___SerializeRecordBatch__Raw <- function(batch) {
+  .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
 }
 
-ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema){
-    .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
 }
 
-RecordBatch__from_arrays <- function(schema_sxp, lst){
-    .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
+RecordBatch__from_arrays <- function(schema_sxp, lst) {
+  .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
 }
 
-RecordBatchReader__schema <- function(reader){
-    .Call(`_arrow_RecordBatchReader__schema`, reader)
+RecordBatchReader__schema <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__schema`, reader)
 }
 
-RecordBatchReader__ReadNext <- function(reader){
-    .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
+RecordBatchReader__ReadNext <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
 }
 
-RecordBatchReader__batches <- function(reader){
-    .Call(`_arrow_RecordBatchReader__batches`, reader)
+RecordBatchReader__batches <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__batches`, reader)
 }
 
-Table__from_RecordBatchReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchReader`, reader)
+Table__from_RecordBatchReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchReader`, reader)
 }
 
-ipc___RecordBatchStreamReader__Open <- function(stream){
-    .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+ipc___RecordBatchStreamReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
 }
 
-ipc___RecordBatchFileReader__schema <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
+ipc___RecordBatchFileReader__schema <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
 }
 
-ipc___RecordBatchFileReader__num_record_batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
+ipc___RecordBatchFileReader__num_record_batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
 }
 
-ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i){
-    .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
+ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
 }
 
-ipc___RecordBatchFileReader__Open <- function(file){
-    .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
+ipc___RecordBatchFileReader__Open <- function(file) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
 }
 
-Table__from_RecordBatchFileReader <- function(reader){
-    .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
+Table__from_RecordBatchFileReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
 }
 
-ipc___RecordBatchFileReader__batches <- function(reader){
-    .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
+ipc___RecordBatchFileReader__batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
 }
 
-ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
 }
 
-ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
+ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
 }
 
-ipc___RecordBatchWriter__Close <- function(batch_writer){
-    invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
+ipc___RecordBatchWriter__Close <- function(batch_writer) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
 }
 
-ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
-    .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version){
-    .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
 }
 
-Array__GetScalar <- function(x, i){
-    .Call(`_arrow_Array__GetScalar`, x, i)
+Array__GetScalar <- function(x, i) {
+  .Call(`_arrow_Array__GetScalar`, x, i)
 }
 
-Scalar__ToString <- function(s){
-    .Call(`_arrow_Scalar__ToString`, s)
+Scalar__ToString <- function(s) {
+  .Call(`_arrow_Scalar__ToString`, s)
 }
 
-StructScalar__field <- function(s, i){
-    .Call(`_arrow_StructScalar__field`, s, i)
+StructScalar__field <- function(s, i) {
+  .Call(`_arrow_StructScalar__field`, s, i)
 }
 
-StructScalar__GetFieldByName <- function(s, name){
-    .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
+StructScalar__GetFieldByName <- function(s, name) {
+  .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
 }
 
-Scalar__as_vector <- function(scalar){
-    .Call(`_arrow_Scalar__as_vector`, scalar)
+Scalar__as_vector <- function(scalar) {
+  .Call(`_arrow_Scalar__as_vector`, scalar)
 }
 
-MakeArrayFromScalar <- function(scalar, n){
-    .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
+MakeArrayFromScalar <- function(scalar, n) {
+  .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
 }
 
-Scalar__is_valid <- function(s){
-    .Call(`_arrow_Scalar__is_valid`, s)
+Scalar__is_valid <- function(s) {
+  .Call(`_arrow_Scalar__is_valid`, s)
 }
 
-Scalar__type <- function(s){
-    .Call(`_arrow_Scalar__type`, s)
+Scalar__type <- function(s) {
+  .Call(`_arrow_Scalar__type`, s)
 }
 
-Scalar__Equals <- function(lhs, rhs){
-    .Call(`_arrow_Scalar__Equals`, lhs, rhs)
+Scalar__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__Equals`, lhs, rhs)
 }
 
-Scalar__ApproxEquals <- function(lhs, rhs){
-    .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
+Scalar__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
 }
 
-schema_ <- function(fields){
-    .Call(`_arrow_schema_`, fields)
+schema_ <- function(fields) {
+  .Call(`_arrow_schema_`, fields)
 }
 
-Schema__ToString <- function(s){
-    .Call(`_arrow_Schema__ToString`, s)
+Schema__ToString <- function(s) {
+  .Call(`_arrow_Schema__ToString`, s)
 }
 
-Schema__num_fields <- function(s){
-    .Call(`_arrow_Schema__num_fields`, s)
+Schema__num_fields <- function(s) {
+  .Call(`_arrow_Schema__num_fields`, s)
 }
 
-Schema__field <- function(s, i){
-    .Call(`_arrow_Schema__field`, s, i)
+Schema__field <- function(s, i) {
+  .Call(`_arrow_Schema__field`, s, i)
 }
 
-Schema__AddField <- function(s, i, field){
-    .Call(`_arrow_Schema__AddField`, s, i, field)
+Schema__AddField <- function(s, i, field) {
+  .Call(`_arrow_Schema__AddField`, s, i, field)
 }
 
-Schema__SetField <- function(s, i, field){
-    .Call(`_arrow_Schema__SetField`, s, i, field)
+Schema__SetField <- function(s, i, field) {
+  .Call(`_arrow_Schema__SetField`, s, i, field)
 }
 
-Schema__RemoveField <- function(s, i){
-    .Call(`_arrow_Schema__RemoveField`, s, i)
+Schema__RemoveField <- function(s, i) {
+  .Call(`_arrow_Schema__RemoveField`, s, i)
 }
 
-Schema__GetFieldByName <- function(s, x){
-    .Call(`_arrow_Schema__GetFieldByName`, s, x)
+Schema__GetFieldByName <- function(s, x) {
+  .Call(`_arrow_Schema__GetFieldByName`, s, x)
 }
 
-Schema__fields <- function(schema){
-    .Call(`_arrow_Schema__fields`, schema)
+Schema__fields <- function(schema) {
+  .Call(`_arrow_Schema__fields`, schema)
 }
 
-Schema__field_names <- function(schema){
-    .Call(`_arrow_Schema__field_names`, schema)
+Schema__field_names <- function(schema) {
+  .Call(`_arrow_Schema__field_names`, schema)
 }
 
-Schema__HasMetadata <- function(schema){
-    .Call(`_arrow_Schema__HasMetadata`, schema)
+Schema__HasMetadata <- function(schema) {
+  .Call(`_arrow_Schema__HasMetadata`, schema)
 }
 
-Schema__metadata <- function(schema){
-    .Call(`_arrow_Schema__metadata`, schema)
+Schema__metadata <- function(schema) {
+  .Call(`_arrow_Schema__metadata`, schema)
 }
 
-Schema__WithMetadata <- function(schema, metadata){
-    .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+Schema__WithMetadata <- function(schema, metadata) {
+  .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
 }
 
-Schema__serialize <- function(schema){
-    .Call(`_arrow_Schema__serialize`, schema)
+Schema__serialize <- function(schema) {
+  .Call(`_arrow_Schema__serialize`, schema)
 }
 
-Schema__Equals <- function(schema, other, check_metadata){
-    .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+Schema__Equals <- function(schema, other, check_metadata) {
+  .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
 }
 
-arrow__UnifySchemas <- function(schemas){
-    .Call(`_arrow_arrow__UnifySchemas`, schemas)
+arrow__UnifySchemas <- function(schemas) {
+  .Call(`_arrow_arrow__UnifySchemas`, schemas)
 }
 
-Table__num_columns <- function(x){
-    .Call(`_arrow_Table__num_columns`, x)
+Table__num_columns <- function(x) {
+  .Call(`_arrow_Table__num_columns`, x)
 }
 
-Table__num_rows <- function(x){
-    .Call(`_arrow_Table__num_rows`, x)
+Table__num_rows <- function(x) {
+  .Call(`_arrow_Table__num_rows`, x)
 }
 
-Table__schema <- function(x){
-    .Call(`_arrow_Table__schema`, x)
+Table__schema <- function(x) {
+  .Call(`_arrow_Table__schema`, x)
 }
 
-Table__ReplaceSchemaMetadata <- function(x, metadata){
-    .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+Table__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
 }
 
-Table__column <- function(table, i){
-    .Call(`_arrow_Table__column`, table, i)
+Table__column <- function(table, i) {
+  .Call(`_arrow_Table__column`, table, i)
 }
 
-Table__field <- function(table, i){
-    .Call(`_arrow_Table__field`, table, i)
+Table__field <- function(table, i) {
+  .Call(`_arrow_Table__field`, table, i)
 }
 
-Table__columns <- function(table){
-    .Call(`_arrow_Table__columns`, table)
+Table__columns <- function(table) {
+  .Call(`_arrow_Table__columns`, table)
 }
 
-Table__ColumnNames <- function(table){
-    .Call(`_arrow_Table__ColumnNames`, table)
+Table__ColumnNames <- function(table) {
+  .Call(`_arrow_Table__ColumnNames`, table)
 }
 
-Table__RenameColumns <- function(table, names){
-    .Call(`_arrow_Table__RenameColumns`, table, names)
+Table__RenameColumns <- function(table, names) {
+  .Call(`_arrow_Table__RenameColumns`, table, names)
 }
 
-Table__Slice1 <- function(table, offset){
-    .Call(`_arrow_Table__Slice1`, table, offset)
+Table__Slice1 <- function(table, offset) {
+  .Call(`_arrow_Table__Slice1`, table, offset)
 }
 
-Table__Slice2 <- function(table, offset, length){
-    .Call(`_arrow_Table__Slice2`, table, offset, length)
+Table__Slice2 <- function(table, offset, length) {
+  .Call(`_arrow_Table__Slice2`, table, offset, length)
 }
 
-Table__Equals <- function(lhs, rhs, check_metadata){
-    .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+Table__Equals <- function(lhs, rhs, check_metadata) {
+  .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
 }
 
-Table__Validate <- function(table){
-    .Call(`_arrow_Table__Validate`, table)
+Table__Validate <- function(table) {
+  .Call(`_arrow_Table__Validate`, table)
 }
 
-Table__ValidateFull <- function(table){
-    .Call(`_arrow_Table__ValidateFull`, table)
+Table__ValidateFull <- function(table) {
+  .Call(`_arrow_Table__ValidateFull`, table)
 }
 
-Table__GetColumnByName <- function(table, name){
-    .Call(`_arrow_Table__GetColumnByName`, table, name)
+Table__GetColumnByName <- function(table, name) {
+  .Call(`_arrow_Table__GetColumnByName`, table, name)
 }
 
-Table__RemoveColumn <- function(table, i){
-    .Call(`_arrow_Table__RemoveColumn`, table, i)
+Table__RemoveColumn <- function(table, i) {
+  .Call(`_arrow_Table__RemoveColumn`, table, i)
 }
 
-Table__AddColumn <- function(table, i, field, column){
-    .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+Table__AddColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__AddColumn`, table, i, field, column)
 }
 
-Table__SetColumn <- function(table, i, field, column){
-    .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+Table__SetColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__SetColumn`, table, i, field, column)
 }
 
-Table__SelectColumns <- function(table, indices){
-    .Call(`_arrow_Table__SelectColumns`, table, indices)
+Table__SelectColumns <- function(table, indices) {
+  .Call(`_arrow_Table__SelectColumns`, table, indices)
 }
 
-all_record_batches <- function(lst){
-    .Call(`_arrow_all_record_batches`, lst)
+all_record_batches <- function(lst) {
+  .Call(`_arrow_all_record_batches`, lst)
 }
 
-Table__from_record_batches <- function(batches, schema_sxp){
-    .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+Table__from_record_batches <- function(batches, schema_sxp) {
+  .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
 }
 
-GetCpuThreadPoolCapacity <- function(){
-    .Call(`_arrow_GetCpuThreadPoolCapacity`)
+GetCpuThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetCpuThreadPoolCapacity`)
 }
 
-SetCpuThreadPoolCapacity <- function(threads){
-    invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+SetCpuThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
 }
 
-GetIOThreadPoolCapacity <- function(){
-    .Call(`_arrow_GetIOThreadPoolCapacity`)
+GetIOThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetIOThreadPoolCapacity`)
 }
 
-SetIOThreadPoolCapacity <- function(threads){
-    invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+SetIOThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
 }
 
-Array__infer_type <- function(x){
-    .Call(`_arrow_Array__infer_type`, x)
+Array__infer_type <- function(x) {
+  .Call(`_arrow_Array__infer_type`, x)
 }
 
-
-
diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
index 607be82c36b..9972d4796a8 100644
--- a/r/R/dplyr-functions.R
+++ b/r/R/dplyr-functions.R
@@ -784,8 +784,8 @@ nse_funcs$case_when <- function(...) {
 # @param data Expression (these are all currently a single field)
 # @param options list of function options, as passed to call_function
 # For group-by aggregation, `hash_` gets prepended to the function name.
-# So to see a list of available hash aggregation functions, do
-# list_compute_functions("^hash_")
+# So to see a list of available hash aggregation functions,
+# you can use list_compute_functions("^hash_")
 agg_funcs <- list()
 agg_funcs$sum <- function(x, na.rm = FALSE) {
   list(
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 5677afb904a..87e1157dfc7 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -69,7 +69,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
       stop(msg, call. = FALSE)
     }
     # Put it in the data mask too?
-    # mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+    # Should we: mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
   }
 
   # Now, from that, split out the data (expressions) and options
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index c57edf0df23..edef5cdc143 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -96,7 +96,9 @@ run_duckdb_examples <- function() {
     requireNamespace("duckdb", quietly = TRUE) &&
     packageVersion("duckdb") > "0.2.7" &&
     requireNamespace("dplyr", quietly = TRUE) &&
-    requireNamespace("dbplyr", quietly = TRUE)
+    requireNamespace("dbplyr", quietly = TRUE) &&
+    # These examples are flaking: https://github.com/duckdb/duckdb/issues/2100
+    FALSE
 }
 
 # Adapted from dbplyr
@@ -122,7 +124,3 @@ duckdb_disconnector <- function(con, tbl_name) {
   })
   environment()
 }
-
-run_duckdb_examples <- function() {
-  arrow_with_dataset() && requireNamespace("duckdb", quietly = TRUE) && packageVersion("duckdb") > "0.2.7" && requireNamespace("dplyr", quietly = TRUE)
-}
diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R
index 20ae3d42a0e..bb0e92eb640 100644
--- a/r/data-raw/codegen.R
+++ b/r/data-raw/codegen.R
@@ -238,8 +238,8 @@ r_functions <- arrow_exports %>%
     }
 
     glue::glue('
-    {name} <- function({list_params}){{
-        {call}
+    {name} <- function({list_params}) {{
+      {call}
     }}
 
     ',
@@ -253,7 +253,6 @@ arrow_exports_r <- glue::glue('
 # Generated by using data-raw/codegen.R -> do not edit by hand
 
 {r_functions}
-
-\n')
+')
 
 write_if_modified(arrow_exports_r, "R/arrowExports.R")
diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R
index e11405d67bf..4495507f3b3 100644
--- a/r/extra-tests/write-files.R
+++ b/r/extra-tests/write-files.R
@@ -39,4 +39,3 @@ write_feather(example_with_metadata_v1, "extra-tests/files/ex_data_v1.feather",
 write_ipc_stream(example_with_metadata, "extra-tests/files/ex_data.stream")
 
 write_parquet(example_with_extra_metadata, "extra-tests/files/ex_data_extra_metadata.parquet")
-
diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R
index aec54d8eed4..cdfcb62d02d 100644
--- a/r/tests/testthat/test-duckdb.R
+++ b/r/tests/testthat/test-duckdb.R
@@ -18,6 +18,8 @@
 skip_if_not_installed("duckdb", minimum_version = "0.2.8")
 skip_if_not_installed("dbplyr")
 skip_if_not_available("dataset")
+# when we remove this, we should also remove the FALSE in run_duckdb_examples
+skip("These tests are flaking: https://github.com/duckdb/duckdb/issues/2100")
 library(duckdb)
 library(dplyr)
 
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index f638b7b8a9b..a321be8f42a 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -163,7 +163,7 @@ os_release <- function() {
       out$codename <- vals[["VERSION_CODENAME"]]
     } else {
       # This probably isn't right, maybe could extract codename from pretty name?
-      out$codename = vals[["PRETTY_NAME"]]
+      out$codename <- vals[["PRETTY_NAME"]]
     }
     out
   } else {
@@ -410,14 +410,16 @@ cmake_version <- function(cmd = "cmake") {
       which_line <- grep(pat, raw_version)
       package_version(sub(pat, "\\1", raw_version[which_line]))
     },
-    error = function(e) return(0)
+    error = function(e) {
+      return(0)
+    }
   )
 }
 
 with_s3_support <- function(env_vars) {
   arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
   # but if ARROW_S3=OFF explicitly, we are definitely off, so override
-  if (toupper(Sys.getenv("ARROW_S3")) == "OFF" ) {
+  if (toupper(Sys.getenv("ARROW_S3")) == "OFF") {
     arrow_s3 <- FALSE
   }
   if (arrow_s3) {
@@ -441,7 +443,7 @@ with_s3_support <- function(env_vars) {
 with_mimalloc <- function(env_vars) {
   arrow_mimalloc <- toupper(Sys.getenv("ARROW_MIMALLOC")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
   if (arrow_mimalloc) {
-  # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
+    # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
     if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
       cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n")
       arrow_mimalloc <- FALSE
@@ -515,6 +517,6 @@ if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
       cat("*** Proceeding without C++ dependencies\n")
     }
   } else {
-   cat("*** Proceeding without C++ dependencies\n")
+    cat("*** Proceeding without C++ dependencies\n")
   }
 }
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index ddb1ace6ccc..f5435c06797 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -105,7 +105,7 @@ sudo apt install -y cmake libcurl4-openssl-dev libssl-dev
 
 You can choose to build and then install the Arrow library into a user-defined directory or into a system-level directory. You only need to do one of these two options.
 
-It is recommended that you install the arrow library to a user-level directory to be used in development. This is so that the development version you are using doesn't overwrite a released version of Arrow you may have installed. You are also able to have more than one version of the Arrow library to link to with this approach (by using different `ARROW_HOME` directories for the different versions). This approach also matches the recommendations for other Arrow bindings like [Python](http://arrow.apache.org/docs/developers/python.html). 
+It is recommended that you install the arrow library to a user-level directory to be used in development. This is so that the development version you are using doesn't overwrite a released version of Arrow you may have installed. You are also able to have more than one version of the Arrow library to link to with this approach (by using different `ARROW_HOME` directories for the different versions). This approach also matches the recommendations for other Arrow bindings like [Python](http://arrow.apache.org/docs/developers/python.html).
 
 #### Configure for installing to a user directory
 
@@ -280,11 +280,11 @@ cmake \
   ..
 ```
 </p>
-</details>  
+</details>
 
 ### Documentation
 
-The documentation for the R package uses features of `roxygen2` that haven't yet been released on CRAN, such as conditional inclusion of examples via the `@examplesIf` tag.  If you are making changes which require updating the documentation, please install the development version of `roxygen2` from GitHub. 
+The documentation for the R package uses features of `roxygen2` that haven't yet been released on CRAN, such as conditional inclusion of examples via the `@examplesIf` tag.  If you are making changes which require updating the documentation, please install the development version of `roxygen2` from GitHub.
 
 ```{r}
 remotes::install_github("r-lib/roxygen2")
@@ -429,9 +429,9 @@ To run the [lintr](https://github.com/jimhester/lintr) locally, install the lint
 One can automatically change the formatting of the code in the package using the [styler](https://styler.r-lib.org/) package. There are two ways to do this:
 
 1. Use the comment bot to do this automatically with the command `@github-actions autotune` on a PR and commit it back to the branch.
-2. Locally, with the command `styler::style_pkg(exclude_files = c("tests/testthat/latin1.R", "data-raw/codegen.R"))` note the two excluded files which should not be styled.
+2. Locally, with the command `make style` (for only the files changed), `make style-all` (for all files), or use `styler::style_pkg(exclude_files = c("tests/testthat/latin1.R", "data-raw/codegen.R"))` note the two excluded files which should not be styled.
 
-The styler package will fix many styling errors, thought not all lintr errors are automatically fixable with styler.
+The styler package will fix many styling errors, thought not all lintr errors are automatically fixable with styler. The list of files we habitually do not style is in `r/.styler_excludes.R`.
 
 ## Editing C++ code in the R package
 
@@ -440,12 +440,12 @@ C++ code in `src/`. This is because we have some features that are only enabled
 and built conditionally during build time. If you change C++ code in the R
 package, you will need to set the `ARROW_R_DEV` environment variable to `true`
 (optionally, add it to your `~/.Renviron` file to persist across sessions) so
-that the `data-raw/codegen.R` file is used for code generation. The `Makefile` 
+that the `data-raw/codegen.R` file is used for code generation. The `Makefile`
 commands also handles this automatically.
 
 We use Google C++ style in our C++ code. The easiest way to accomplish this is
-use an editors/IDE that formats your code for you. Many popular editors/IDEs 
-have support for running `clang-format` on C++ files when you save them. 
+use an editors/IDE that formats your code for you. Many popular editors/IDEs
+have support for running `clang-format` on C++ files when you save them.
 Installing/enabling the appropriate plugin may save you much frustration.
 
 Check for style errors with
@@ -466,7 +466,7 @@ isn’t found, you can explicitly provide the path to it like
 this by installing LLVM via Homebrew and running the script as
 `CLANG_FORMAT=$(brew --prefix llvm@8)/bin/clang-format ./lint.sh`
 
-_Note_ that the lint script requires Python 3 and the Python dependencies 
+_Note_ that the lint script requires Python 3 and the Python dependencies
 (note that `cmake_format is pinned to a specific version):
 
 * autopep8

From a29d20a7d366469095261f65378b69d03b9b6fdb Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 5 Aug 2021 09:10:54 +0900
Subject: [PATCH 715/719] ARROW-13565: [Packaging][Ubuntu] Drop support for
 20.10

It reached EOL.

Closes #10881 from kou/linux-ubuntu-drop-20.10

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/verify-release-candidate.sh       |  4 +-
 dev/tasks/linux-packages/Rakefile             |  4 +-
 .../apt/ubuntu-groovy/Dockerfile              | 41 ----------
 .../apt/ubuntu-xenial/Dockerfile              | 41 ----------
 .../apache-arrow/apt/ubuntu-groovy-arm64/from | 18 -----
 .../apache-arrow/apt/ubuntu-groovy/Dockerfile | 80 -------------------
 dev/tasks/linux-packages/package-task.rb      |  2 -
 dev/tasks/tasks.yml                           |  1 -
 8 files changed, 4 insertions(+), 187 deletions(-)
 delete mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-groovy/Dockerfile
 delete mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-xenial/Dockerfile
 delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from
 delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 3360b8d2fd5..d6d2140a8e6 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -143,8 +143,8 @@ test_apt() {
                 "arm64v8/ubuntu:bionic" \
                 "ubuntu:focal" \
                 "arm64v8/ubuntu:focal" \
-                "ubuntu:groovy" \
-                "arm64v8/ubuntu:groovy"; do \
+                "ubuntu:hirsute" \
+                "arm64v8/ubuntu:hirsute"; do \
     case "${target}" in
       arm64v8/debian:bullseye)
         # qemu-user-static in Ubuntu 20.04 has a crash bug:
diff --git a/dev/tasks/linux-packages/Rakefile b/dev/tasks/linux-packages/Rakefile
index 53a372cf5c2..13a15877a1e 100644
--- a/dev/tasks/linux-packages/Rakefile
+++ b/dev/tasks/linux-packages/Rakefile
@@ -165,8 +165,8 @@ class LocalBinaryTask < BinaryTask
       # "ubuntu-bionic-arm64",
       "ubuntu-focal",
       # "ubuntu-focal-arm64",
-      "ubuntu-groovy",
-      # "ubuntu-groovy-arm64",
+      "ubuntu-hirsute",
+      # "ubuntu-hirsute-arm64",
     ]
   end
 
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-groovy/Dockerfile
deleted file mode 100644
index 7efd5d1df32..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-groovy/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:groovy
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    debhelper \
-    devscripts \
-    fakeroot \
-    gnupg \
-    lsb-release && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-xenial/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-xenial/Dockerfile
deleted file mode 100644
index e05843081ee..00000000000
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-xenial/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:xenial
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    debhelper \
-    devscripts \
-    fakeroot \
-    gnupg \
-    lsb-release && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from
deleted file mode 100644
index d1f6aa9a854..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy-arm64/from
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-arm64v8/ubuntu:groovy
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
deleted file mode 100644
index 4f57a47e24c..00000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-groovy/Dockerfile
+++ /dev/null
@@ -1,80 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG FROM=ubuntu:groovy
-FROM ${FROM}
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    ccache \
-    clang \
-    cmake \
-    debhelper \
-    devscripts \
-    git \
-    gtk-doc-tools \
-    libboost-filesystem-dev \
-    libboost-system-dev \
-    libbrotli-dev \
-    libbz2-dev \
-    libc-ares-dev \
-    libcurl4-openssl-dev \
-    libgirepository1.0-dev \
-    libglib2.0-doc \
-    libgmock-dev \
-    libgoogle-glog-dev \
-    libgrpc++-dev \
-    libgtest-dev \
-    liblz4-dev \
-    libprotoc-dev \
-    libprotobuf-dev \
-    libre2-dev \
-    libsnappy-dev \
-    libssl-dev \
-    libthrift-dev \
-    libutf8proc-dev \
-    libzstd-dev \
-    llvm-dev \
-    lsb-release \
-    ninja-build \
-    pkg-config \
-    protobuf-compiler-grpc \
-    python3-dev \
-    python3-numpy \
-    python3-pip \
-    python3-setuptools \
-    rapidjson-dev \
-    tzdata \
-    zlib1g-dev && \
-  ! apt list | grep -q '^libcuda1' || \
-    apt install -y -V ${quiet} nvidia-cuda-toolkit && \
-  apt clean && \
-  python3 -m pip install --no-use-pep517 meson && \
-  ln -s /usr/local/bin/meson /usr/bin/ && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index 0d894a365bd..394c88fae13 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -266,8 +266,6 @@ def apt_targets_default
       # "ubuntu-bionic-arm64",
       "ubuntu-focal",
       # "ubuntu-focal-arm64",
-      "ubuntu-groovy",
-      # "ubuntu-groovy-arm64",
       "ubuntu-hirsute",
       # "ubuntu-hirsute-arm64",
     ]
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 99d6082ca99..12fd8ba5496 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -434,7 +434,6 @@ tasks:
                   "debian-bullseye",
                   "ubuntu-bionic",
                   "ubuntu-focal",
-                  "ubuntu-groovy",
                   "ubuntu-hirsute"] %}
   {% for architecture in ["amd64", "arm64"] %}
   {{ target }}-{{ architecture }}:

From 4b6baceb2953dc13b09d7e4b22274319936ae24d Mon Sep 17 00:00:00 2001
From: Fernando Rodriguez <diegodfrf@gmail.com>
Date: Wed, 4 Aug 2021 20:21:15 -0400
Subject: [PATCH 716/719] ARROW-12953: [C++][Compute] Refactor CheckScalar* to
 take Datum arguments

Update shared_ptr<Scalar> and shared_ptr<Arrow> to Datum in CheckScalar* functions

Closes #10878 from diegodfrf/ARROW-12953-Refactor-CheckScalar-to-take-Datum-argum

Authored-by: Fernando Rodriguez <diegodfrf@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../compute/kernels/scalar_string_test.cc     |  4 +--
 cpp/src/arrow/compute/kernels/test_util.cc    | 36 ++++---------------
 cpp/src/arrow/compute/kernels/test_util.h     | 28 ++-------------
 3 files changed, 11 insertions(+), 57 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 920197ca3c3..1f1a05d9643 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -1234,9 +1234,9 @@ TYPED_TEST(TestStringKernels, BinaryJoin) {
   auto expected =
       ArrayFromJSON(this->type(), R"(["a--bb--ccc", "", null, "dd", null, "ff--"])");
   CheckScalarBinary("binary_join", ArrayFromJSON(list(this->type()), list_json),
-                    separator, expected);
+                    Datum(separator), expected);
   CheckScalarBinary("binary_join", ArrayFromJSON(large_list(this->type()), list_json),
-                    separator, expected);
+                    Datum(separator), expected);
 
   auto separator_null = MakeNullScalar(this->type());
   expected = ArrayFromJSON(this->type(), R"([null, null, null, null, null, null])");
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index ce8d42e34c2..ceea9cbc15c 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -174,10 +174,10 @@ void CheckScalar(std::string func_name, const DatumVector& inputs, Datum expecte
   }
 }
 
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Array> input,
-                      std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  ArrayVector input_vector = {input};
-  CheckScalar(std::move(func_name), GetDatums(input_vector), expected, options);
+void CheckScalarUnary(std::string func_name, Datum input, Datum expected,
+                      const FunctionOptions* options) {
+  std::vector<Datum> input_vector = {std::move(input)};
+  CheckScalar(std::move(func_name), input_vector, expected, options);
 }
 
 void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
@@ -187,11 +187,6 @@ void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
                    ArrayFromJSON(out_ty, json_expected), options);
 }
 
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Scalar> input,
-                      std::shared_ptr<Scalar> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {input}, expected, options);
-}
-
 void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array> expected,
                       const FunctionOptions* options) {
   ASSERT_OK_AND_ASSIGN(Datum out, CallFunction(func_name, {input}, options));
@@ -200,27 +195,8 @@ void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array>
   AssertArraysEqual(*expected, *actual, /*verbose=*/true);
 }
 
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
-                       std::shared_ptr<Scalar> right_input,
-                       std::shared_ptr<Scalar> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
-}
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
-                       std::shared_ptr<Array> right_input,
-                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
-}
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
-                       std::shared_ptr<Scalar> right_input,
-                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
-  CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
-}
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
-                       std::shared_ptr<Array> right_input,
-                       std::shared_ptr<Array> expected, const FunctionOptions* options) {
+void CheckScalarBinary(std::string func_name, Datum left_input, Datum right_input,
+                       Datum expected, const FunctionOptions* options) {
   CheckScalar(std::move(func_name), {left_input, right_input}, expected, options);
 }
 
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index c366b99a71d..eecedb64317 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -78,33 +78,11 @@ void CheckScalarUnary(std::string func_name, std::shared_ptr<DataType> in_ty,
                       std::string json_expected,
                       const FunctionOptions* options = nullptr);
 
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Array> input,
-                      std::shared_ptr<Array> expected,
+void CheckScalarUnary(std::string func_name, Datum input, Datum expected,
                       const FunctionOptions* options = nullptr);
 
-void CheckScalarUnary(std::string func_name, std::shared_ptr<Scalar> input,
-                      std::shared_ptr<Scalar> expected,
-                      const FunctionOptions* options = nullptr);
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
-                       std::shared_ptr<Scalar> right_input,
-                       std::shared_ptr<Scalar> expected,
-                       const FunctionOptions* options = nullptr);
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
-                       std::shared_ptr<Array> right_input,
-                       std::shared_ptr<Array> expected,
-                       const FunctionOptions* options = nullptr);
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
-                       std::shared_ptr<Scalar> right_input,
-                       std::shared_ptr<Array> expected,
-                       const FunctionOptions* options = nullptr);
-
-void CheckScalarBinary(std::string func_name, std::shared_ptr<Scalar> left_input,
-                       std::shared_ptr<Array> right_input,
-                       std::shared_ptr<Array> expected,
-                       const FunctionOptions* options = nullptr);
+void CheckScalarBinary(std::string func_name, Datum left_input, Datum right_input,
+                       Datum expected, const FunctionOptions* options = nullptr);
 
 void CheckVectorUnary(std::string func_name, Datum input, std::shared_ptr<Array> expected,
                       const FunctionOptions* options = nullptr);

From 48964bf9692eaf2036c2b601c3f18ae788fe026d Mon Sep 17 00:00:00 2001
From: ZMZ <zmz@yanhuangdata.com>
Date: Mon, 12 Apr 2021 15:11:27 +0800
Subject: [PATCH 717/719] support null data type in gdv

---
 cpp/src/gandiva/CMakeLists.txt             |   2 +
 cpp/src/gandiva/annotator.cc               |  18 ++-
 cpp/src/gandiva/dex.h                      |   8 ++
 cpp/src/gandiva/dex_visitor.h              |   3 +
 cpp/src/gandiva/exported_funcs.h           |   6 +
 cpp/src/gandiva/expr_decomposer.cc         |   7 ++
 cpp/src/gandiva/expr_decomposer.h          |   1 +
 cpp/src/gandiva/expr_validator.cc          |  22 +++-
 cpp/src/gandiva/expr_validator.h           |   1 +
 cpp/src/gandiva/function_registry.cc       |   4 +
 cpp/src/gandiva/function_registry_common.h |   1 +
 cpp/src/gandiva/function_registry_null.h   |  40 +++++++
 cpp/src/gandiva/llvm_generator.cc          |  28 ++++-
 cpp/src/gandiva/llvm_generator.h           |   1 +
 cpp/src/gandiva/node.h                     |  15 ++-
 cpp/src/gandiva/node_visitor.h             |   2 +
 cpp/src/gandiva/null_ops.cc                |  50 ++++++++
 cpp/src/gandiva/null_ops.h                 |  30 +++++
 cpp/src/gandiva/null_ops_test.cc           |  30 +++++
 cpp/src/gandiva/precompiled/types.h        |   1 +
 cpp/src/gandiva/projector.cc               |  13 ++-
 cpp/src/gandiva/tests/CMakeLists.txt       |   1 +
 cpp/src/gandiva/tests/null_test.cc         | 130 +++++++++++++++++++++
 cpp/src/gandiva/tree_expr_builder.cc       |   2 +
 24 files changed, 399 insertions(+), 17 deletions(-)
 create mode 100644 cpp/src/gandiva/function_registry_null.h
 create mode 100644 cpp/src/gandiva/null_ops.cc
 create mode 100644 cpp/src/gandiva/null_ops.h
 create mode 100644 cpp/src/gandiva/null_ops_test.cc
 create mode 100644 cpp/src/gandiva/tests/null_test.cc

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 654a4a40be1..18932a9523c 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -69,6 +69,7 @@ set(SRC_FILES
     expression_registry.cc
     exported_funcs_registry.cc
     filter.cc
+    null_ops.cc
     function_ir_builder.cc
     function_registry.cc
     function_registry_arithmetic.cc
@@ -236,6 +237,7 @@ add_gandiva_test(internals-test
                  random_generator_holder_test.cc
                  hash_utils_test.cc
                  gdv_function_stubs_test.cc
+                 null_ops_test.cc
                  EXTRA_DEPENDENCIES
                  LLVM::LLVM_INTERFACE
                  ${GANDIVA_OPENSSL_LIBS}
diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc
index f6acaff1804..8d0eb145e17 100644
--- a/cpp/src/gandiva/annotator.cc
+++ b/cpp/src/gandiva/annotator.cc
@@ -77,13 +77,21 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
     ++buffer_idx;
   }
 
-  uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
-  eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
+  if (array_data.type->id() == arrow::Type::NA) {
+    eval_batch->SetBuffer(desc.data_idx(), nullptr, array_data.offset);
+  } else {
+    uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
+    eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
+  }
   if (is_output) {
     // pass in the Buffer object for output data buffers. Can be used for resizing.
-    uint8_t* data_buf_ptr =
-        reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
-    eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
+    if (array_data.type->id() == arrow::Type::NA) {
+      eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), nullptr, array_data.offset);
+    } else {
+      uint8_t* data_buf_ptr =
+          reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
+      eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
+    }
   }
 }
 
diff --git a/cpp/src/gandiva/dex.h b/cpp/src/gandiva/dex.h
index d1115c0516a..0b6cc22c197 100644
--- a/cpp/src/gandiva/dex.h
+++ b/cpp/src/gandiva/dex.h
@@ -205,6 +205,14 @@ class GANDIVA_EXPORT LiteralDex : public Dex {
   LiteralHolder holder_;
 };
 
+/// decomposed expression for a null literal.
+class GANDIVA_EXPORT NullLiteralDex : public Dex {
+ public:
+  NullLiteralDex() {}
+
+  void Accept(DexVisitor& visitor) override { visitor.Visit(*this); }
+};
+
 /// decomposed if-else expression.
 class GANDIVA_EXPORT IfDex : public Dex {
  public:
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index 5d160bb22ca..28378db0c19 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -31,6 +31,7 @@ class VectorReadFixedLenValueDex;
 class VectorReadVarLenValueDex;
 class LocalBitMapValidityDex;
 class LiteralDex;
+class NullLiteralDex;
 class TrueDex;
 class FalseDex;
 class NonNullableFuncDex;
@@ -54,6 +55,7 @@ class GANDIVA_EXPORT DexVisitor {
   virtual void Visit(const TrueDex& dex) = 0;
   virtual void Visit(const FalseDex& dex) = 0;
   virtual void Visit(const LiteralDex& dex) = 0;
+  virtual void Visit(const NullLiteralDex& dex) = 0;
   virtual void Visit(const NonNullableFuncDex& dex) = 0;
   virtual void Visit(const NullableNeverFuncDex& dex) = 0;
   virtual void Visit(const NullableInternalFuncDex& dex) = 0;
@@ -80,6 +82,7 @@ class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(TrueDex)
   VISIT_DCHECK(FalseDex)
   VISIT_DCHECK(LiteralDex)
+  VISIT_DCHECK(NullLiteralDex)
   VISIT_DCHECK(NonNullableFuncDex)
   VISIT_DCHECK(NullableNeverFuncDex)
   VISIT_DCHECK(NullableInternalFuncDex)
diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h
index 58205266094..1dc1f57f770 100644
--- a/cpp/src/gandiva/exported_funcs.h
+++ b/cpp/src/gandiva/exported_funcs.h
@@ -32,6 +32,12 @@ class ExportedFuncsBase {
   virtual void AddMappings(Engine* engine) const = 0;
 };
 
+// Class for exporting Null functions
+class ExportedNullFunctions : public ExportedFuncsBase {
+  void AddMappings(Engine* engine) const override;
+};
+REGISTER_EXPORTED_FUNCS(ExportedNullFunctions);
+
 // Class for exporting Stub functions
 class ExportedStubFunctions : public ExportedFuncsBase {
   void AddMappings(Engine* engine) const override;
diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc
index 1c09d28f5e0..02bb050724b 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -225,6 +225,13 @@ Status ExprDecomposer::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
+Status ExprDecomposer::Visit(const NullLiteralNode& node) {
+  auto value_dex = std::make_shared<NullLiteralDex>();
+  auto validity_dex = std::make_shared<FalseDex>();
+  result_ = std::make_shared<ValueValidityPair>(validity_dex, value_dex);
+  return Status::OK();
+}
+
 // The bolow functions use a stack to detect :
 // a. nested if-else expressions.
 //    In such cases,  the local bitmap can be re-used.
diff --git a/cpp/src/gandiva/expr_decomposer.h b/cpp/src/gandiva/expr_decomposer.h
index f68b8a8fc02..d5b3866ea35 100644
--- a/cpp/src/gandiva/expr_decomposer.h
+++ b/cpp/src/gandiva/expr_decomposer.h
@@ -64,6 +64,7 @@ class GANDIVA_EXPORT ExprDecomposer : public NodeVisitor {
   Status Visit(const FunctionNode& node) override;
   Status Visit(const IfNode& node) override;
   Status Visit(const LiteralNode& node) override;
+  Status Visit(const NullLiteralNode& node) override;
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index c3c784c9511..32aab53bf07 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -42,11 +42,14 @@ Status ExprValidator::Validate(const ExpressionPtr& expr) {
 }
 
 Status ExprValidator::Visit(const FieldNode& node) {
-  auto llvm_type = types_->IRType(node.return_type()->id());
-  ARROW_RETURN_IF(llvm_type == nullptr,
-                  Status::ExpressionValidationError("Field ", node.field()->name(),
-                                                    " has unsupported data type ",
-                                                    node.return_type()->name()));
+  auto return_type = node.return_type();
+  if (return_type->id() != arrow::Type::NA) {
+    auto llvm_type = types_->DataVecType(node.return_type());
+    ARROW_RETURN_IF(llvm_type == nullptr,
+                    Status::ExpressionValidationError("Field ", node.field()->name(),
+                                                      " has unsupported data type ",
+                                                      node.return_type()->name()));
+  }
 
   // Ensure that field is found in schema
   auto field_in_schema_entry = field_map_.find(node.field()->name());
@@ -120,6 +123,15 @@ Status ExprValidator::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
+Status ExprValidator::Visit(const NullLiteralNode& node) {
+  auto llvm_type = types_->DataVecType(node.return_type());
+  ARROW_RETURN_IF(llvm_type != nullptr,
+                  Status::ExpressionValidationError("Should be data type ",
+                                                    node.return_type()->name()));
+
+  return Status::OK();
+}
+
 Status ExprValidator::Visit(const BooleanNode& node) {
   ARROW_RETURN_IF(
       node.children().size() < 2,
diff --git a/cpp/src/gandiva/expr_validator.h b/cpp/src/gandiva/expr_validator.h
index daaf50897fc..08b3e422761 100644
--- a/cpp/src/gandiva/expr_validator.h
+++ b/cpp/src/gandiva/expr_validator.h
@@ -57,6 +57,7 @@ class ExprValidator : public NodeVisitor {
   Status Visit(const FunctionNode& node) override;
   Status Visit(const IfNode& node) override;
   Status Visit(const LiteralNode& node) override;
+  Status Visit(const NullLiteralNode& node) override;
   Status Visit(const BooleanNode& node) override;
   Status Visit(const InExpressionNode<int32_t>& node) override;
   Status Visit(const InExpressionNode<int64_t>& node) override;
diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc
index d5d015c10b4..2d622124102 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -20,6 +20,7 @@
 #include "gandiva/function_registry_datetime.h"
 #include "gandiva/function_registry_hash.h"
 #include "gandiva/function_registry_math_ops.h"
+#include "gandiva/function_registry_null.h"
 #include "gandiva/function_registry_string.h"
 #include "gandiva/function_registry_timestamp_arithmetic.h"
 
@@ -65,6 +66,9 @@ SignatureMap FunctionRegistry::InitPCMap() {
   auto v6 = GetDateTimeArithmeticFunctionRegistry();
   pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end());
 
+  auto v8 = GetNullFunctionRegistry();
+  pc_registry_.insert(std::end(pc_registry_), v8.begin(), v8.end());
+
   for (auto& elem : pc_registry_) {
     for (auto& func_signature : elem.signatures()) {
       map.insert(std::make_pair(&(func_signature), &elem));
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index 66f94515089..b95b8684d6c 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -44,6 +44,7 @@ using arrow::int32;
 using arrow::int64;
 using arrow::int8;
 using arrow::month_interval;
+using arrow::null;
 using arrow::uint16;
 using arrow::uint32;
 using arrow::uint64;
diff --git a/cpp/src/gandiva/function_registry_null.h b/cpp/src/gandiva/function_registry_null.h
new file mode 100644
index 00000000000..a01cbef6fc1
--- /dev/null
+++ b/cpp/src/gandiva/function_registry_null.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "gandiva/native_function.h"
+
+namespace gandiva {
+
+std::vector<NativeFunction> GetNullFunctionRegistry() {
+  static std::vector<NativeFunction> null_fn_registry_ = {
+      NativeFunction("equal",
+                     {"not_equal", "less_than", "less_than_or_equal_to", "greater_than",
+                      "greater_than_or_equal_to"},
+                     DataTypeVector{null(), null()}, boolean(), kResultNullNever,
+                     "compare_null_null"),
+      NativeFunction("isnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
+                     "isnull_null"),
+      NativeFunction("isnotnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
+                     "isnotnull_null")};
+  return null_fn_registry_;
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index d84a0374e6b..8d6a233623d 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -170,6 +170,9 @@ llvm::Value* LLVMGenerator::GetDataReference(llvm::Value* arg_addrs, int idx,
   llvm::Value* load = LoadVectorAtIndex(arg_addrs, idx, name);
   llvm::Type* base_type = types()->DataVecType(field->type());
   llvm::Value* ret;
+  if (base_type == nullptr) {
+    return nullptr;
+  }
   if (base_type->isPointerTy()) {
     ret = ir_builder()->CreateIntToPtr(load, base_type, name + "_darray");
   } else {
@@ -363,6 +366,8 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count,
     AddFunctionCall("gdv_fn_populate_varlen_vector", types()->i32_type(),
                     {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var,
                      output_value->data(), output_value->length()});
+  } else if (output_type_id == arrow::Type::NA) {
+    // Do nothing when data type is null
   } else {
     return Status::NotImplemented("output type ", output->Type()->ToString(),
                                   " not supported");
@@ -452,6 +457,10 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr,
   // Extract the destination bitmap address.
   int out_idx = compiled_expr.output()->validity_idx();
   uint8_t* dst_bitmap = eval_batch.GetBuffer(out_idx);
+  if (dst_bitmap == nullptr) {
+    // Return when dst_bitmap is null meaning data type is null
+    return;
+  }
   // Compute the destination bitmap.
   if (selection_vector == nullptr) {
     accumulator.ComputeResult(dst_bitmap);
@@ -556,6 +565,9 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueDex& dex) {
       break;
     }
 
+    case arrow::Type::NA:
+      break;
+
     default: {
       auto slot_offset = builder->CreateGEP(slot_ref, slot_index);
       slot_value = builder->CreateLoad(slot_offset, dex.FieldName());
@@ -720,6 +732,13 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) {
   result_.reset(new LValue(value, len));
 }
 
+void LLVMGenerator::Visitor::Visit(const NullLiteralDex& dex) {
+  llvm::Value* value = nullptr;
+  llvm::Value* len = nullptr;
+  ADD_VISITOR_TRACE("visit Literal null");
+  result_.reset(new LValue(value, len));
+}
+
 void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) {
   const std::string& function_name = dex.func_descriptor()->name();
   ADD_VISITOR_TRACE("visit NonNullableFunc base function " + function_name);
@@ -1248,10 +1267,11 @@ std::vector<llvm::Value*> LLVMGenerator::Visitor::BuildParams(
     // build value.
     DexPtr value_expr = pair->value_expr();
     value_expr->Accept(*this);
-    LValue& result_ref = *result();
-
-    // append all the parameters corresponding to this LValue.
-    result_ref.AppendFunctionParams(&params);
+    if (auto result_ptr = result()) {
+      LValue& result_ref = *result_ptr;
+      // append all the parameters corresponding to this LValue.
+      result_ref.AppendFunctionParams(&params);
+    }
 
     // build validity.
     if (with_validity) {
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index ff6d846024c..d18a47a2735 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -100,6 +100,7 @@ class GANDIVA_EXPORT LLVMGenerator {
     void Visit(const TrueDex& dex) override;
     void Visit(const FalseDex& dex) override;
     void Visit(const LiteralDex& dex) override;
+    void Visit(const NullLiteralDex& dex) override;
     void Visit(const NonNullableFuncDex& dex) override;
     void Visit(const NullableNeverFuncDex& dex) override;
     void Visit(const NullableInternalFuncDex& dex) override;
diff --git a/cpp/src/gandiva/node.h b/cpp/src/gandiva/node.h
index 20807d4a0cb..6e4c22e93b1 100644
--- a/cpp/src/gandiva/node.h
+++ b/cpp/src/gandiva/node.h
@@ -23,7 +23,6 @@
 #include <vector>
 
 #include "arrow/status.h"
-
 #include "gandiva/arrow.h"
 #include "gandiva/func_descriptor.h"
 #include "gandiva/gandiva_aliases.h"
@@ -94,6 +93,20 @@ class GANDIVA_EXPORT LiteralNode : public Node {
   bool is_null_;
 };
 
+/// \brief Node in the expression tree, representing a NullLiteralNode.
+class GANDIVA_EXPORT NullLiteralNode : public Node {
+ public:
+  NullLiteralNode() : Node(arrow::null()) {}
+
+  Status Accept(NodeVisitor& visitor) const override { return visitor.Visit(*this); }
+
+  std::string ToString() const override {
+    std::stringstream ss;
+    ss << "(const " << return_type()->ToString() << ") null";
+    return ss.str();
+  }
+};
+
 /// \brief Node in the expression tree, representing an arrow field.
 class GANDIVA_EXPORT FieldNode : public Node {
  public:
diff --git a/cpp/src/gandiva/node_visitor.h b/cpp/src/gandiva/node_visitor.h
index 8f233f5b77c..a8f94fe8735 100644
--- a/cpp/src/gandiva/node_visitor.h
+++ b/cpp/src/gandiva/node_visitor.h
@@ -31,6 +31,7 @@ class FieldNode;
 class FunctionNode;
 class IfNode;
 class LiteralNode;
+class NullLiteralNode;
 class BooleanNode;
 template <typename Type>
 class InExpressionNode;
@@ -44,6 +45,7 @@ class GANDIVA_EXPORT NodeVisitor {
   virtual Status Visit(const FunctionNode& node) = 0;
   virtual Status Visit(const IfNode& node) = 0;
   virtual Status Visit(const LiteralNode& node) = 0;
+  virtual Status Visit(const NullLiteralNode& node) = 0;
   virtual Status Visit(const BooleanNode& node) = 0;
   virtual Status Visit(const InExpressionNode<int32_t>& node) = 0;
   virtual Status Visit(const InExpressionNode<int64_t>& node) = 0;
diff --git a/cpp/src/gandiva/null_ops.cc b/cpp/src/gandiva/null_ops.cc
new file mode 100644
index 00000000000..79d21ae6c9a
--- /dev/null
+++ b/cpp/src/gandiva/null_ops.cc
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "gandiva/null_ops.h"
+
+#include "gandiva/engine.h"
+#include "gandiva/exported_funcs.h"
+#include "gandiva/gdv_function_stubs.h"
+
+/// Stub functions that can be accessed from LLVM or the pre-compiled library.
+
+extern "C" {
+bool compare_null_null() { return false; }
+
+bool isnull_null() { return true; }
+
+bool isnotnull_null() { return false; }
+}
+
+namespace gandiva {
+void ExportedNullFunctions::AddMappings(Engine* engine) const {
+  std::vector<llvm::Type*> args;
+  auto types = engine->types();
+
+  args = {types->i1_type(), types->i1_type()};
+  engine->AddGlobalMappingForFunc("compare_null_null", types->i1_type() /*return_type*/,
+                                  args, reinterpret_cast<void*>(compare_null_null));
+
+  args = {types->i1_type()};
+  engine->AddGlobalMappingForFunc("isnull_null", types->i1_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(isnull_null));
+
+  args = {types->i1_type()};
+  engine->AddGlobalMappingForFunc("isnotnull_null", types->i1_type() /*return_type*/,
+                                  args, reinterpret_cast<void*>(isnotnull_null));
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/null_ops.h b/cpp/src/gandiva/null_ops.h
new file mode 100644
index 00000000000..65bce6fe149
--- /dev/null
+++ b/cpp/src/gandiva/null_ops.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+/// Stub functions that can be accessed from LLVM.
+extern "C" {
+
+bool compare_null_null();
+
+bool isnull_null();
+
+bool isnotnull_null();
+}
\ No newline at end of file
diff --git a/cpp/src/gandiva/null_ops_test.cc b/cpp/src/gandiva/null_ops_test.cc
new file mode 100644
index 00000000000..3ef351cb773
--- /dev/null
+++ b/cpp/src/gandiva/null_ops_test.cc
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestNullOps, Test) {
+  EXPECT_FALSE(compare_null_null());
+  EXPECT_TRUE(isnull_null());
+  EXPECT_FALSE(isnotnull_null());
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 16d1550b46b..f216cc98756 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -20,6 +20,7 @@
 #include <cstdint>
 
 #include "gandiva/gdv_function_stubs.h"
+#include "gandiva/null_ops.h"
 
 // Use the same names as in arrow data types. Makes it easy to write pre-processor macros.
 using gdv_boolean = bool;
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index ff167538f9c..dbdd746d671 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -24,7 +24,6 @@
 
 #include "arrow/util/hash_util.h"
 #include "arrow/util/logging.h"
-
 #include "gandiva/cache.h"
 #include "gandiva/expr_validator.h"
 #include "gandiva/llvm_generator.h"
@@ -296,6 +295,8 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
   } else if (arrow::is_binary_like(type_id)) {
     // we don't know the expected size for varlen output vectors.
     data_len = 0;
+  } else if (type_id == arrow::Type::NA) {
+    data_len = 0;
   } else {
     return Status::Invalid("Unsupported output data type " + type->ToString());
   }
@@ -308,7 +309,11 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
   }
   buffers.push_back(std::move(data_buffer));
 
-  *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers));
+  if (type_id == arrow::Type::NA) {
+    *array_data = arrow::ArrayData::Make(type, num_records, {nullptr});
+  } else {
+    *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers));
+  }
   return Status::OK();
 }
 
@@ -357,6 +362,10 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
     int64_t data_len = array_data.buffers[1]->capacity();
     ARROW_RETURN_IF(data_len < min_data_len,
                     Status::Invalid("Data buffer too small for ", field.name()));
+  } else if (type_id == arrow::Type::NA) {
+    ARROW_RETURN_IF(array_data.buffers.size() == 1 && array_data.buffers[0] == nullptr,
+                    Status::Invalid("Data buffer should be nullptr for null typed field",
+                                    field.name()));
   } else {
     return Status::Invalid("Unsupported output data type " + field.type()->ToString());
   }
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt
index 5fa2da16c63..a57085c589e 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -25,6 +25,7 @@ add_gandiva_test(binary_test)
 add_gandiva_test(date_time_test)
 add_gandiva_test(to_string_test)
 add_gandiva_test(utf8_test)
+add_gandiva_test(null_test)
 add_gandiva_test(hash_test)
 add_gandiva_test(in_expr_test)
 add_gandiva_test(null_validity_test)
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
new file mode 100644
index 00000000000..a3ff18baa32
--- /dev/null
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "gandiva/projector.h"
+#include "gandiva/tests/test_util.h"
+#include "gandiva/tree_expr_builder.h"
+
+namespace gandiva {
+
+using arrow::boolean;
+using arrow::null;
+
+class TestNull : public ::testing::Test {
+ public:
+  void SetUp() { pool_ = arrow::default_memory_pool(); }
+
+ protected:
+  arrow::MemoryPool* pool_;
+};
+
+TEST_F(TestNull, TestSimple) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  auto literal_null = TreeExprBuilder::MakeNull(arrow::null());
+  auto node_field_null = TreeExprBuilder::MakeField(field_null);
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+  auto expr_1 = TreeExprBuilder::MakeExpression(literal_null, res_1);
+  auto expr_2 = TreeExprBuilder::MakeExpression(node_field_null, res_2);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {expr_1, expr_2}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto nb = std::make_shared<arrow::NullBuilder>();
+  auto _ = nb->AppendNulls(4);
+  std::shared_ptr<arrow::NullArray> null_array;
+  _ = nb->Finish(&null_array);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(null_array, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(null_array, outputs.at(1));
+}
+
+TEST_F(TestNull, TestOps) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  // output fields
+  auto res_1 = field("res1", boolean());
+  auto res_2 = field("res2", boolean());
+  auto res_3 = field("res3", boolean());
+  auto res_4 = field("res4", boolean());
+  auto res_5 = field("res5", boolean());
+  auto res_6 = field("res6", boolean());
+  auto res_7 = field("res7", boolean());
+  auto res_8 = field("res8", boolean());
+  auto expr_1 = TreeExprBuilder::MakeExpression("equal", {field_null, field_null}, res_1);
+  auto expr_2 =
+      TreeExprBuilder::MakeExpression("not_equal", {field_null, field_null}, res_2);
+  auto expr_3 =
+      TreeExprBuilder::MakeExpression("less_than", {field_null, field_null}, res_3);
+  auto expr_4 = TreeExprBuilder::MakeExpression("less_than_or_equal_to",
+                                                {field_null, field_null}, res_4);
+  auto expr_5 =
+      TreeExprBuilder::MakeExpression("greater_than", {field_null, field_null}, res_5);
+  auto expr_6 = TreeExprBuilder::MakeExpression("greater_than_or_equal_to",
+                                                {field_null, field_null}, res_6);
+  auto expr_7 = TreeExprBuilder::MakeExpression("isnull", {field_null}, res_7);
+  auto expr_8 = TreeExprBuilder::MakeExpression("isnotnull", {field_null}, res_8);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(
+      schema, {expr_1, expr_2, expr_3, expr_4, expr_5, expr_6, expr_7, expr_8},
+      TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto nb = std::make_shared<arrow::NullBuilder>();
+  auto _ = nb->AppendNulls(4);
+  std::shared_ptr<arrow::NullArray> null_array;
+  _ = nb->Finish(&null_array);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  auto exp_true = MakeArrowArrayBool({true, true, true, true}, {true, true, true, true});
+  auto exp_false =
+      MakeArrowArrayBool({false, false, false, false}, {true, true, true, true});
+  for (int i = 0; i < 8; i++) {
+    if (i == 6) {
+      EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(i));
+    } else {
+      EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(i));
+    }
+  }
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc
index de8e3445a12..c7869c147ec 100644
--- a/cpp/src/gandiva/tree_expr_builder.cc
+++ b/cpp/src/gandiva/tree_expr_builder.cc
@@ -105,6 +105,8 @@ NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) {
       DecimalScalar128 literal(decimal_type->precision(), decimal_type->scale());
       return std::make_shared<LiteralNode>(data_type, LiteralHolder(literal), true);
     }
+    case arrow::Type::NA:
+      return std::make_shared<NullLiteralNode>();
     default:
       return nullptr;
   }

From b76e985d0f3237a5916c44e7083b6de00a55b811 Mon Sep 17 00:00:00 2001
From: ZMZ <zmz@yanhuangdata.com>
Date: Tue, 13 Apr 2021 09:39:09 +0800
Subject: [PATCH 718/719] update compare function return type

---
 cpp/src/gandiva/function_registry_null.h |  2 +-
 cpp/src/gandiva/llvm_generator.cc        |  2 +-
 cpp/src/gandiva/null_ops.cc              |  8 ++++----
 cpp/src/gandiva/null_ops.h               |  6 +++---
 cpp/src/gandiva/null_ops_test.cc         |  6 +++---
 cpp/src/gandiva/tests/null_test.cc       | 22 ++++++++++------------
 6 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_null.h b/cpp/src/gandiva/function_registry_null.h
index a01cbef6fc1..ab45e6f4e41 100644
--- a/cpp/src/gandiva/function_registry_null.h
+++ b/cpp/src/gandiva/function_registry_null.h
@@ -28,7 +28,7 @@ std::vector<NativeFunction> GetNullFunctionRegistry() {
       NativeFunction("equal",
                      {"not_equal", "less_than", "less_than_or_equal_to", "greater_than",
                       "greater_than_or_equal_to"},
-                     DataTypeVector{null(), null()}, boolean(), kResultNullNever,
+                     DataTypeVector{null(), null()}, null(), kResultNullNever,
                      "compare_null_null"),
       NativeFunction("isnull", {}, DataTypeVector{null()}, boolean(), kResultNullNever,
                      "isnull_null"),
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 8d6a233623d..b9d5d20a11d 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -500,7 +500,7 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name,
 
   // build a call to the llvm function.
   llvm::Value* value;
-  if (ret_type->isVoidTy()) {
+  if (ret_type == nullptr || ret_type->isVoidTy()) {
     // void functions can't have a name for the call.
     value = ir_builder()->CreateCall(fn, args);
   } else {
diff --git a/cpp/src/gandiva/null_ops.cc b/cpp/src/gandiva/null_ops.cc
index 79d21ae6c9a..b7179a8e8be 100644
--- a/cpp/src/gandiva/null_ops.cc
+++ b/cpp/src/gandiva/null_ops.cc
@@ -23,11 +23,11 @@
 /// Stub functions that can be accessed from LLVM or the pre-compiled library.
 
 extern "C" {
-bool compare_null_null() { return false; }
+void compare_null_null(bool in1_valid, bool in2_valid) {}
 
-bool isnull_null() { return true; }
+bool isnull_null(bool in_valid) { return true; }
 
-bool isnotnull_null() { return false; }
+bool isnotnull_null(bool in_valid) { return false; }
 }
 
 namespace gandiva {
@@ -36,7 +36,7 @@ void ExportedNullFunctions::AddMappings(Engine* engine) const {
   auto types = engine->types();
 
   args = {types->i1_type(), types->i1_type()};
-  engine->AddGlobalMappingForFunc("compare_null_null", types->i1_type() /*return_type*/,
+  engine->AddGlobalMappingForFunc("compare_null_null", types->void_type() /*return_type*/,
                                   args, reinterpret_cast<void*>(compare_null_null));
 
   args = {types->i1_type()};
diff --git a/cpp/src/gandiva/null_ops.h b/cpp/src/gandiva/null_ops.h
index 65bce6fe149..492eb6033cd 100644
--- a/cpp/src/gandiva/null_ops.h
+++ b/cpp/src/gandiva/null_ops.h
@@ -22,9 +22,9 @@
 /// Stub functions that can be accessed from LLVM.
 extern "C" {
 
-bool compare_null_null();
+void compare_null_null(bool in1_valid, bool in2_valid);
 
-bool isnull_null();
+bool isnull_null(bool in_valid);
 
-bool isnotnull_null();
+bool isnotnull_null(bool in_valid);
 }
\ No newline at end of file
diff --git a/cpp/src/gandiva/null_ops_test.cc b/cpp/src/gandiva/null_ops_test.cc
index 3ef351cb773..a979b82a771 100644
--- a/cpp/src/gandiva/null_ops_test.cc
+++ b/cpp/src/gandiva/null_ops_test.cc
@@ -23,8 +23,8 @@
 namespace gandiva {
 
 TEST(TestNullOps, Test) {
-  EXPECT_FALSE(compare_null_null());
-  EXPECT_TRUE(isnull_null());
-  EXPECT_FALSE(isnotnull_null());
+  compare_null_null(true, true);
+  EXPECT_TRUE(isnull_null(true));
+  EXPECT_FALSE(isnotnull_null(true));
 }
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
index a3ff18baa32..e018ab0dff4 100644
--- a/cpp/src/gandiva/tests/null_test.cc
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -76,12 +76,12 @@ TEST_F(TestNull, TestOps) {
   auto schema = arrow::schema({field_null});
 
   // output fields
-  auto res_1 = field("res1", boolean());
-  auto res_2 = field("res2", boolean());
-  auto res_3 = field("res3", boolean());
-  auto res_4 = field("res4", boolean());
-  auto res_5 = field("res5", boolean());
-  auto res_6 = field("res6", boolean());
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+  auto res_3 = field("res3", null());
+  auto res_4 = field("res4", null());
+  auto res_5 = field("res5", null());
+  auto res_6 = field("res6", null());
   auto res_7 = field("res7", boolean());
   auto res_8 = field("res8", boolean());
   auto expr_1 = TreeExprBuilder::MakeExpression("equal", {field_null, field_null}, res_1);
@@ -118,13 +118,11 @@ TEST_F(TestNull, TestOps) {
   auto exp_true = MakeArrowArrayBool({true, true, true, true}, {true, true, true, true});
   auto exp_false =
       MakeArrowArrayBool({false, false, false, false}, {true, true, true, true});
-  for (int i = 0; i < 8; i++) {
-    if (i == 6) {
-      EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(i));
-    } else {
-      EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(i));
-    }
+  for (int i = 0; i < 6; i++) {
+    EXPECT_EQ(outputs.at(i)->null_count(), 4);
   }
+  EXPECT_ARROW_ARRAY_EQUALS(exp_true, outputs.at(6));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(7));
 }
 
 }  // namespace gandiva

From a5224552157c9c85e44cb1c5de548db2ad5fb728 Mon Sep 17 00:00:00 2001
From: ZMZ <zmz@yanhuangdata.com>
Date: Fri, 30 Apr 2021 16:26:33 +0800
Subject: [PATCH 719/719] support null in makeif

---
 cpp/src/gandiva/llvm_generator.cc  |  3 ++
 cpp/src/gandiva/tests/null_test.cc | 47 +++++++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index b9d5d20a11d..33345f19d2c 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -1152,6 +1152,9 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition,
   // Emit the merge block.
   builder->SetInsertPoint(merge_bb);
   auto llvm_type = types->IRType(result_type->id());
+  if (llvm_type == nullptr) {
+    return nullptr;
+  }
   llvm::PHINode* result_value = builder->CreatePHI(llvm_type, 2, "res_value");
   result_value->addIncoming(then_lvalue->data(), then_bb);
   result_value->addIncoming(else_lvalue->data(), else_bb);
diff --git a/cpp/src/gandiva/tests/null_test.cc b/cpp/src/gandiva/tests/null_test.cc
index e018ab0dff4..db67117c3ef 100644
--- a/cpp/src/gandiva/tests/null_test.cc
+++ b/cpp/src/gandiva/tests/null_test.cc
@@ -57,10 +57,7 @@ TEST_F(TestNull, TestSimple) {
   EXPECT_TRUE(status.ok()) << status.message();
 
   arrow::ArrayVector outputs;
-  auto nb = std::make_shared<arrow::NullBuilder>();
-  auto _ = nb->AppendNulls(4);
-  std::shared_ptr<arrow::NullArray> null_array;
-  _ = nb->Finish(&null_array);
+  auto null_array = std::make_shared<arrow::NullArray>(4);
   auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
   status = projector->Evaluate(*in_batch, pool_, &outputs);
   EXPECT_TRUE(status.ok());
@@ -106,10 +103,7 @@ TEST_F(TestNull, TestOps) {
   EXPECT_TRUE(status.ok()) << status.message();
 
   arrow::ArrayVector outputs;
-  auto nb = std::make_shared<arrow::NullBuilder>();
-  auto _ = nb->AppendNulls(4);
-  std::shared_ptr<arrow::NullArray> null_array;
-  _ = nb->Finish(&null_array);
+  auto null_array = std::make_shared<arrow::NullArray>(4);
   auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
   status = projector->Evaluate(*in_batch, pool_, &outputs);
   EXPECT_TRUE(status.ok());
@@ -125,4 +119,41 @@ TEST_F(TestNull, TestOps) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_false, outputs.at(7));
 }
 
+TEST_F(TestNull, TestMakeIf) {
+  // schema for input fields
+  auto field_null = field("field_null", null());
+  auto schema = arrow::schema({field_null});
+
+  // output fields
+  auto res_1 = field("res1", null());
+  auto res_2 = field("res2", null());
+
+  auto null_node = TreeExprBuilder::MakeNull(null());
+  auto expr_1 = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeIf(TreeExprBuilder::MakeLiteral(true), null_node, null_node,
+                              null()),
+      res_1);
+  auto expr_2 = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeIf(TreeExprBuilder::MakeLiteral(false), null_node, null_node,
+                              null()),
+      res_2);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {expr_1, expr_2}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  arrow::ArrayVector outputs;
+  auto null_array = std::make_shared<arrow::NullArray>(4);
+  auto in_batch = arrow::RecordBatch::Make(schema, 4, {null_array});
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  for (auto& output : outputs) {
+    EXPECT_EQ(output->null_count(), 4);
+  }
+}
+
 }  // namespace gandiva